{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 9793, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 24.37600491006036, "learning_rate": 3.4013605442176873e-08, "loss": 2.0743, "step": 1 }, { "epoch": 0.0, "grad_norm": 26.833030527657595, "learning_rate": 6.802721088435375e-08, "loss": 2.2916, "step": 2 }, { "epoch": 0.0, "grad_norm": 39.88834984250017, "learning_rate": 1.0204081632653061e-07, "loss": 2.3772, "step": 3 }, { "epoch": 0.0, "grad_norm": 29.207542521370527, "learning_rate": 1.360544217687075e-07, "loss": 2.3653, "step": 4 }, { "epoch": 0.0, "grad_norm": 21.65482985939375, "learning_rate": 1.700680272108844e-07, "loss": 1.9552, "step": 5 }, { "epoch": 0.0, "grad_norm": 22.591810408608218, "learning_rate": 2.0408163265306121e-07, "loss": 1.9914, "step": 6 }, { "epoch": 0.0, "grad_norm": 24.260494885556234, "learning_rate": 2.3809523809523811e-07, "loss": 2.1797, "step": 7 }, { "epoch": 0.0, "grad_norm": 29.736886445718735, "learning_rate": 2.72108843537415e-07, "loss": 2.3747, "step": 8 }, { "epoch": 0.0, "grad_norm": 26.00120855143027, "learning_rate": 3.0612244897959183e-07, "loss": 2.2367, "step": 9 }, { "epoch": 0.0, "grad_norm": 29.498999591570936, "learning_rate": 3.401360544217688e-07, "loss": 2.423, "step": 10 }, { "epoch": 0.0, "grad_norm": 31.328948547416154, "learning_rate": 3.7414965986394563e-07, "loss": 2.4417, "step": 11 }, { "epoch": 0.0, "grad_norm": 26.20672876260872, "learning_rate": 4.0816326530612243e-07, "loss": 2.0552, "step": 12 }, { "epoch": 0.0, "grad_norm": 20.63067023545699, "learning_rate": 4.421768707482994e-07, "loss": 1.9437, "step": 13 }, { "epoch": 0.0, "grad_norm": 23.89154889027501, "learning_rate": 4.7619047619047623e-07, "loss": 2.1113, "step": 14 }, { "epoch": 0.0, "grad_norm": 24.041290883893975, "learning_rate": 5.102040816326531e-07, "loss": 2.2619, "step": 15 }, { "epoch": 0.0, "grad_norm": 26.186653281260032, "learning_rate": 5.4421768707483e-07, "loss": 2.2418, "step": 16 }, { "epoch": 0.0, "grad_norm": 28.031896811693304, "learning_rate": 5.782312925170068e-07, "loss": 2.4184, "step": 17 }, { "epoch": 0.0, "grad_norm": 22.946602850272914, "learning_rate": 6.122448979591837e-07, "loss": 2.0323, "step": 18 }, { "epoch": 0.0, "grad_norm": 24.33565681437707, "learning_rate": 6.462585034013606e-07, "loss": 2.0499, "step": 19 }, { "epoch": 0.0, "grad_norm": 17.73868814104414, "learning_rate": 6.802721088435376e-07, "loss": 1.7899, "step": 20 }, { "epoch": 0.0, "grad_norm": 23.937911919304142, "learning_rate": 7.142857142857143e-07, "loss": 2.075, "step": 21 }, { "epoch": 0.0, "grad_norm": 28.1874224166998, "learning_rate": 7.482993197278913e-07, "loss": 2.1711, "step": 22 }, { "epoch": 0.0, "grad_norm": 50.91502072696533, "learning_rate": 7.823129251700681e-07, "loss": 1.7261, "step": 23 }, { "epoch": 0.0, "grad_norm": 23.279490435593765, "learning_rate": 8.163265306122449e-07, "loss": 1.8366, "step": 24 }, { "epoch": 0.0, "grad_norm": 16.677940878155336, "learning_rate": 8.503401360544218e-07, "loss": 1.5824, "step": 25 }, { "epoch": 0.0, "grad_norm": 19.027254847799774, "learning_rate": 8.843537414965988e-07, "loss": 1.5952, "step": 26 }, { "epoch": 0.0, "grad_norm": 16.279227277244416, "learning_rate": 9.183673469387756e-07, "loss": 1.4854, "step": 27 }, { "epoch": 0.0, "grad_norm": 17.9496115202291, "learning_rate": 9.523809523809525e-07, "loss": 1.5111, "step": 28 }, { "epoch": 0.0, "grad_norm": 16.69742326514714, "learning_rate": 9.863945578231293e-07, "loss": 1.4919, "step": 29 }, { "epoch": 0.0, "grad_norm": 21.427445086853666, "learning_rate": 1.0204081632653063e-06, "loss": 1.3762, "step": 30 }, { "epoch": 0.0, "grad_norm": 11.39847539056294, "learning_rate": 1.0544217687074832e-06, "loss": 1.2993, "step": 31 }, { "epoch": 0.0, "grad_norm": 10.544738912788258, "learning_rate": 1.08843537414966e-06, "loss": 1.2876, "step": 32 }, { "epoch": 0.0, "grad_norm": 10.081785456776537, "learning_rate": 1.122448979591837e-06, "loss": 1.293, "step": 33 }, { "epoch": 0.0, "grad_norm": 7.502772363151497, "learning_rate": 1.1564625850340136e-06, "loss": 1.2483, "step": 34 }, { "epoch": 0.0, "grad_norm": 12.534910864179452, "learning_rate": 1.1904761904761906e-06, "loss": 1.1106, "step": 35 }, { "epoch": 0.0, "grad_norm": 11.449108492965744, "learning_rate": 1.2244897959183673e-06, "loss": 1.1696, "step": 36 }, { "epoch": 0.0, "grad_norm": 10.209073221448715, "learning_rate": 1.2585034013605443e-06, "loss": 1.1738, "step": 37 }, { "epoch": 0.0, "grad_norm": 10.621540219416996, "learning_rate": 1.2925170068027212e-06, "loss": 1.1055, "step": 38 }, { "epoch": 0.0, "grad_norm": 9.746916680551596, "learning_rate": 1.3265306122448982e-06, "loss": 1.2064, "step": 39 }, { "epoch": 0.0, "grad_norm": 9.596810148377433, "learning_rate": 1.3605442176870751e-06, "loss": 1.0852, "step": 40 }, { "epoch": 0.0, "grad_norm": 10.40195266347631, "learning_rate": 1.3945578231292517e-06, "loss": 1.0018, "step": 41 }, { "epoch": 0.0, "grad_norm": 10.571380586504354, "learning_rate": 1.4285714285714286e-06, "loss": 0.953, "step": 42 }, { "epoch": 0.0, "grad_norm": 7.585533548102302, "learning_rate": 1.4625850340136056e-06, "loss": 0.8821, "step": 43 }, { "epoch": 0.0, "grad_norm": 8.404713441536495, "learning_rate": 1.4965986394557825e-06, "loss": 0.7695, "step": 44 }, { "epoch": 0.0, "grad_norm": 4.5867288041776115, "learning_rate": 1.5306122448979593e-06, "loss": 0.8648, "step": 45 }, { "epoch": 0.0, "grad_norm": 3.6721548785708147, "learning_rate": 1.5646258503401362e-06, "loss": 0.884, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.9264402406580023, "learning_rate": 1.5986394557823132e-06, "loss": 0.8249, "step": 47 }, { "epoch": 0.0, "grad_norm": 2.7930408464111736, "learning_rate": 1.6326530612244897e-06, "loss": 0.792, "step": 48 }, { "epoch": 0.01, "grad_norm": 2.215723099085952, "learning_rate": 1.6666666666666667e-06, "loss": 0.8562, "step": 49 }, { "epoch": 0.01, "grad_norm": 1.9110389985898923, "learning_rate": 1.7006802721088436e-06, "loss": 0.7334, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.7751749149458163, "learning_rate": 1.7346938775510206e-06, "loss": 0.8077, "step": 51 }, { "epoch": 0.01, "grad_norm": 1.8377844508849992, "learning_rate": 1.7687074829931975e-06, "loss": 0.6702, "step": 52 }, { "epoch": 0.01, "grad_norm": 2.086538165010561, "learning_rate": 1.8027210884353743e-06, "loss": 0.8929, "step": 53 }, { "epoch": 0.01, "grad_norm": 2.0196230733195977, "learning_rate": 1.8367346938775512e-06, "loss": 0.8344, "step": 54 }, { "epoch": 0.01, "grad_norm": 1.9114575198902781, "learning_rate": 1.8707482993197282e-06, "loss": 0.8461, "step": 55 }, { "epoch": 0.01, "grad_norm": 1.8386564137345003, "learning_rate": 1.904761904761905e-06, "loss": 0.6944, "step": 56 }, { "epoch": 0.01, "grad_norm": 1.7669677526086902, "learning_rate": 1.938775510204082e-06, "loss": 0.7036, "step": 57 }, { "epoch": 0.01, "grad_norm": 1.9443175535118087, "learning_rate": 1.9727891156462586e-06, "loss": 0.8055, "step": 58 }, { "epoch": 0.01, "grad_norm": 1.982251283245862, "learning_rate": 2.0068027210884353e-06, "loss": 0.9017, "step": 59 }, { "epoch": 0.01, "grad_norm": 1.8199052931941335, "learning_rate": 2.0408163265306125e-06, "loss": 0.7122, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.878052718699954, "learning_rate": 2.0748299319727892e-06, "loss": 0.8013, "step": 61 }, { "epoch": 0.01, "grad_norm": 1.870597860514317, "learning_rate": 2.1088435374149664e-06, "loss": 0.7924, "step": 62 }, { "epoch": 0.01, "grad_norm": 1.8140266668575502, "learning_rate": 2.1428571428571427e-06, "loss": 0.762, "step": 63 }, { "epoch": 0.01, "grad_norm": 1.8512636158186084, "learning_rate": 2.17687074829932e-06, "loss": 0.7776, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.7782401794331282, "learning_rate": 2.2108843537414966e-06, "loss": 0.7948, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.615784656212362, "learning_rate": 2.244897959183674e-06, "loss": 0.6374, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.6969498860283947, "learning_rate": 2.2789115646258505e-06, "loss": 0.8809, "step": 67 }, { "epoch": 0.01, "grad_norm": 1.8060882082928729, "learning_rate": 2.3129251700680273e-06, "loss": 0.7395, "step": 68 }, { "epoch": 0.01, "grad_norm": 1.546166806022019, "learning_rate": 2.3469387755102044e-06, "loss": 0.709, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.7747747409487362, "learning_rate": 2.380952380952381e-06, "loss": 0.7696, "step": 70 }, { "epoch": 0.01, "grad_norm": 1.7423198098105417, "learning_rate": 2.414965986394558e-06, "loss": 0.7306, "step": 71 }, { "epoch": 0.01, "grad_norm": 1.6591794334548446, "learning_rate": 2.4489795918367347e-06, "loss": 0.7088, "step": 72 }, { "epoch": 0.01, "grad_norm": 1.7273554866335024, "learning_rate": 2.482993197278912e-06, "loss": 0.6595, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.498835420617115, "learning_rate": 2.5170068027210886e-06, "loss": 0.7601, "step": 74 }, { "epoch": 0.01, "grad_norm": 2.023250684153149, "learning_rate": 2.5510204081632657e-06, "loss": 0.7256, "step": 75 }, { "epoch": 0.01, "grad_norm": 1.5478968829077804, "learning_rate": 2.5850340136054425e-06, "loss": 0.8096, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.5796132249663166, "learning_rate": 2.6190476190476192e-06, "loss": 0.7599, "step": 77 }, { "epoch": 0.01, "grad_norm": 1.815673980902825, "learning_rate": 2.6530612244897964e-06, "loss": 0.6603, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.7715881520831036, "learning_rate": 2.687074829931973e-06, "loss": 0.6769, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.7299283528700988, "learning_rate": 2.7210884353741503e-06, "loss": 0.7392, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.8763211240670208, "learning_rate": 2.7551020408163266e-06, "loss": 0.7027, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.8686641855315056, "learning_rate": 2.7891156462585034e-06, "loss": 0.7101, "step": 82 }, { "epoch": 0.01, "grad_norm": 1.6590831450372834, "learning_rate": 2.8231292517006805e-06, "loss": 0.6334, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.7996401131391662, "learning_rate": 2.8571428571428573e-06, "loss": 0.6968, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.7924888792598253, "learning_rate": 2.891156462585034e-06, "loss": 0.8295, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.78037398791936, "learning_rate": 2.925170068027211e-06, "loss": 0.8654, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.6982050089652276, "learning_rate": 2.959183673469388e-06, "loss": 0.7033, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.3175985590031105, "learning_rate": 2.993197278911565e-06, "loss": 0.6211, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.5667400178759465, "learning_rate": 3.027210884353742e-06, "loss": 0.6603, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.5447734339078725, "learning_rate": 3.0612244897959185e-06, "loss": 0.593, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.7032861071932233, "learning_rate": 3.0952380952380957e-06, "loss": 0.6921, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.7927700457708917, "learning_rate": 3.1292517006802725e-06, "loss": 0.8349, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.4925265611080762, "learning_rate": 3.1632653061224496e-06, "loss": 0.5846, "step": 93 }, { "epoch": 0.01, "grad_norm": 1.7117070631244786, "learning_rate": 3.1972789115646264e-06, "loss": 0.647, "step": 94 }, { "epoch": 0.01, "grad_norm": 1.697511535926943, "learning_rate": 3.231292517006803e-06, "loss": 0.6722, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.7254382684213527, "learning_rate": 3.2653061224489794e-06, "loss": 0.7303, "step": 96 }, { "epoch": 0.01, "grad_norm": 1.5951206732941405, "learning_rate": 3.2993197278911566e-06, "loss": 0.6651, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.513794665767939, "learning_rate": 3.3333333333333333e-06, "loss": 0.6315, "step": 98 }, { "epoch": 0.01, "grad_norm": 1.5406773122462292, "learning_rate": 3.3673469387755105e-06, "loss": 0.6029, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.718731893724658, "learning_rate": 3.4013605442176872e-06, "loss": 0.6344, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.6456803238841704, "learning_rate": 3.435374149659864e-06, "loss": 0.7193, "step": 101 }, { "epoch": 0.01, "grad_norm": 1.7577242071574983, "learning_rate": 3.469387755102041e-06, "loss": 0.72, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.7714953522747514, "learning_rate": 3.503401360544218e-06, "loss": 0.6679, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.4480708411488254, "learning_rate": 3.537414965986395e-06, "loss": 0.6531, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.8365206929893136, "learning_rate": 3.5714285714285718e-06, "loss": 0.7044, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.486860676912911, "learning_rate": 3.6054421768707485e-06, "loss": 0.6841, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.7835492388918792, "learning_rate": 3.6394557823129257e-06, "loss": 0.8068, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.615931527000937, "learning_rate": 3.6734693877551024e-06, "loss": 0.6417, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.6534131925371214, "learning_rate": 3.7074829931972796e-06, "loss": 0.7121, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.7452346765076943, "learning_rate": 3.7414965986394563e-06, "loss": 0.7656, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.526698037613455, "learning_rate": 3.7755102040816327e-06, "loss": 0.6468, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.626993741117334, "learning_rate": 3.80952380952381e-06, "loss": 0.7149, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.6455964103104863, "learning_rate": 3.843537414965986e-06, "loss": 0.6237, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.9670441182773475, "learning_rate": 3.877551020408164e-06, "loss": 0.671, "step": 114 }, { "epoch": 0.01, "grad_norm": 1.6796708307838442, "learning_rate": 3.9115646258503405e-06, "loss": 0.7076, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.7009685464056012, "learning_rate": 3.945578231292517e-06, "loss": 0.7223, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.9120639167262867, "learning_rate": 3.979591836734694e-06, "loss": 0.787, "step": 117 }, { "epoch": 0.01, "grad_norm": 1.777949509784645, "learning_rate": 4.013605442176871e-06, "loss": 0.6989, "step": 118 }, { "epoch": 0.01, "grad_norm": 1.6695237176759665, "learning_rate": 4.047619047619048e-06, "loss": 0.6026, "step": 119 }, { "epoch": 0.01, "grad_norm": 1.691856525739425, "learning_rate": 4.081632653061225e-06, "loss": 0.6694, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.936074972007308, "learning_rate": 4.115646258503402e-06, "loss": 0.6586, "step": 121 }, { "epoch": 0.01, "grad_norm": 1.5318563602943813, "learning_rate": 4.1496598639455785e-06, "loss": 0.7188, "step": 122 }, { "epoch": 0.01, "grad_norm": 1.7090167741081472, "learning_rate": 4.183673469387755e-06, "loss": 0.6876, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.6004107393124056, "learning_rate": 4.217687074829933e-06, "loss": 0.6473, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.49202666703522, "learning_rate": 4.251700680272109e-06, "loss": 0.6339, "step": 125 }, { "epoch": 0.01, "grad_norm": 2.0495089038501577, "learning_rate": 4.2857142857142855e-06, "loss": 0.7167, "step": 126 }, { "epoch": 0.01, "grad_norm": 1.9023979558535915, "learning_rate": 4.319727891156463e-06, "loss": 0.7305, "step": 127 }, { "epoch": 0.01, "grad_norm": 1.6630273111408913, "learning_rate": 4.35374149659864e-06, "loss": 0.7452, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.5732605781574536, "learning_rate": 4.3877551020408165e-06, "loss": 0.6768, "step": 129 }, { "epoch": 0.01, "grad_norm": 1.577499692776106, "learning_rate": 4.421768707482993e-06, "loss": 0.6329, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.6235826168929235, "learning_rate": 4.45578231292517e-06, "loss": 0.6762, "step": 131 }, { "epoch": 0.01, "grad_norm": 1.5428248461039875, "learning_rate": 4.489795918367348e-06, "loss": 0.7562, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.9164925856686985, "learning_rate": 4.523809523809524e-06, "loss": 0.7119, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.848979264012177, "learning_rate": 4.557823129251701e-06, "loss": 0.7637, "step": 134 }, { "epoch": 0.01, "grad_norm": 1.7455309836610662, "learning_rate": 4.591836734693878e-06, "loss": 0.5604, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.982203693765271, "learning_rate": 4.6258503401360546e-06, "loss": 0.7572, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.7676801756158114, "learning_rate": 4.659863945578232e-06, "loss": 0.7314, "step": 137 }, { "epoch": 0.01, "grad_norm": 1.729013206901177, "learning_rate": 4.693877551020409e-06, "loss": 0.6294, "step": 138 }, { "epoch": 0.01, "grad_norm": 1.492398202583556, "learning_rate": 4.727891156462586e-06, "loss": 0.6717, "step": 139 }, { "epoch": 0.01, "grad_norm": 1.690617402068083, "learning_rate": 4.761904761904762e-06, "loss": 0.7673, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.574257935610297, "learning_rate": 4.795918367346939e-06, "loss": 0.6553, "step": 141 }, { "epoch": 0.01, "grad_norm": 1.409526437467423, "learning_rate": 4.829931972789116e-06, "loss": 0.5825, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.7407821477674807, "learning_rate": 4.863945578231293e-06, "loss": 0.7204, "step": 143 }, { "epoch": 0.01, "grad_norm": 1.5900512122792572, "learning_rate": 4.897959183673469e-06, "loss": 0.6604, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.5847846049873673, "learning_rate": 4.931972789115647e-06, "loss": 0.6514, "step": 145 }, { "epoch": 0.01, "grad_norm": 1.5852684118811338, "learning_rate": 4.965986394557824e-06, "loss": 0.7289, "step": 146 }, { "epoch": 0.02, "grad_norm": 1.6256376221782494, "learning_rate": 5e-06, "loss": 0.728, "step": 147 }, { "epoch": 0.02, "grad_norm": 1.8966560928887934, "learning_rate": 5.034013605442177e-06, "loss": 0.7663, "step": 148 }, { "epoch": 0.02, "grad_norm": 1.751550488089904, "learning_rate": 5.068027210884354e-06, "loss": 0.6407, "step": 149 }, { "epoch": 0.02, "grad_norm": 1.7479045405763847, "learning_rate": 5.1020408163265315e-06, "loss": 0.6899, "step": 150 }, { "epoch": 0.02, "grad_norm": 1.9618681495537234, "learning_rate": 5.136054421768708e-06, "loss": 0.7526, "step": 151 }, { "epoch": 0.02, "grad_norm": 1.5733610872317492, "learning_rate": 5.170068027210885e-06, "loss": 0.7931, "step": 152 }, { "epoch": 0.02, "grad_norm": 1.8748072584536348, "learning_rate": 5.204081632653062e-06, "loss": 0.7314, "step": 153 }, { "epoch": 0.02, "grad_norm": 1.8234924533734982, "learning_rate": 5.2380952380952384e-06, "loss": 0.7427, "step": 154 }, { "epoch": 0.02, "grad_norm": 1.5731421411012545, "learning_rate": 5.272108843537416e-06, "loss": 0.5526, "step": 155 }, { "epoch": 0.02, "grad_norm": 1.7075369108005138, "learning_rate": 5.306122448979593e-06, "loss": 0.6797, "step": 156 }, { "epoch": 0.02, "grad_norm": 2.0357739005013644, "learning_rate": 5.3401360544217695e-06, "loss": 0.8152, "step": 157 }, { "epoch": 0.02, "grad_norm": 1.7076817300130989, "learning_rate": 5.374149659863946e-06, "loss": 0.6771, "step": 158 }, { "epoch": 0.02, "grad_norm": 1.8725695744854283, "learning_rate": 5.408163265306123e-06, "loss": 0.7164, "step": 159 }, { "epoch": 0.02, "grad_norm": 1.5620211617083959, "learning_rate": 5.442176870748301e-06, "loss": 0.6818, "step": 160 }, { "epoch": 0.02, "grad_norm": 1.7354422521737343, "learning_rate": 5.476190476190477e-06, "loss": 0.692, "step": 161 }, { "epoch": 0.02, "grad_norm": 1.9244326149144657, "learning_rate": 5.510204081632653e-06, "loss": 0.6534, "step": 162 }, { "epoch": 0.02, "grad_norm": 1.8923513909698555, "learning_rate": 5.54421768707483e-06, "loss": 0.7655, "step": 163 }, { "epoch": 0.02, "grad_norm": 1.5514753077745618, "learning_rate": 5.578231292517007e-06, "loss": 0.6224, "step": 164 }, { "epoch": 0.02, "grad_norm": 1.6207023956004094, "learning_rate": 5.6122448979591834e-06, "loss": 0.7219, "step": 165 }, { "epoch": 0.02, "grad_norm": 1.7037729108688764, "learning_rate": 5.646258503401361e-06, "loss": 0.7033, "step": 166 }, { "epoch": 0.02, "grad_norm": 1.9003632231431964, "learning_rate": 5.680272108843538e-06, "loss": 0.7299, "step": 167 }, { "epoch": 0.02, "grad_norm": 1.5310345878298803, "learning_rate": 5.7142857142857145e-06, "loss": 0.6551, "step": 168 }, { "epoch": 0.02, "grad_norm": 1.8564773701655501, "learning_rate": 5.748299319727891e-06, "loss": 0.7196, "step": 169 }, { "epoch": 0.02, "grad_norm": 1.8188628177708248, "learning_rate": 5.782312925170068e-06, "loss": 0.5661, "step": 170 }, { "epoch": 0.02, "grad_norm": 1.6699767667087497, "learning_rate": 5.816326530612246e-06, "loss": 0.6518, "step": 171 }, { "epoch": 0.02, "grad_norm": 1.7206189531206606, "learning_rate": 5.850340136054422e-06, "loss": 0.7742, "step": 172 }, { "epoch": 0.02, "grad_norm": 1.8033774086922054, "learning_rate": 5.884353741496599e-06, "loss": 0.6411, "step": 173 }, { "epoch": 0.02, "grad_norm": 1.713982349135499, "learning_rate": 5.918367346938776e-06, "loss": 0.6396, "step": 174 }, { "epoch": 0.02, "grad_norm": 1.7246869304478905, "learning_rate": 5.9523809523809525e-06, "loss": 0.6396, "step": 175 }, { "epoch": 0.02, "grad_norm": 1.8034513347448429, "learning_rate": 5.98639455782313e-06, "loss": 0.6783, "step": 176 }, { "epoch": 0.02, "grad_norm": 1.643393825504336, "learning_rate": 6.020408163265307e-06, "loss": 0.7187, "step": 177 }, { "epoch": 0.02, "grad_norm": 1.5194823760447236, "learning_rate": 6.054421768707484e-06, "loss": 0.7227, "step": 178 }, { "epoch": 0.02, "grad_norm": 1.6859058852522955, "learning_rate": 6.08843537414966e-06, "loss": 0.7347, "step": 179 }, { "epoch": 0.02, "grad_norm": 1.9486375688174924, "learning_rate": 6.122448979591837e-06, "loss": 0.7446, "step": 180 }, { "epoch": 0.02, "grad_norm": 1.6763759487748717, "learning_rate": 6.156462585034015e-06, "loss": 0.6381, "step": 181 }, { "epoch": 0.02, "grad_norm": 1.8564234182142143, "learning_rate": 6.1904761904761914e-06, "loss": 0.6583, "step": 182 }, { "epoch": 0.02, "grad_norm": 1.8403007537092617, "learning_rate": 6.224489795918368e-06, "loss": 0.7823, "step": 183 }, { "epoch": 0.02, "grad_norm": 1.789212280535635, "learning_rate": 6.258503401360545e-06, "loss": 0.6117, "step": 184 }, { "epoch": 0.02, "grad_norm": 1.7657040447912309, "learning_rate": 6.292517006802722e-06, "loss": 0.5913, "step": 185 }, { "epoch": 0.02, "grad_norm": 1.7023335777039261, "learning_rate": 6.326530612244899e-06, "loss": 0.6352, "step": 186 }, { "epoch": 0.02, "grad_norm": 1.7093598643828203, "learning_rate": 6.360544217687076e-06, "loss": 0.7674, "step": 187 }, { "epoch": 0.02, "grad_norm": 1.9711429225819475, "learning_rate": 6.394557823129253e-06, "loss": 0.7496, "step": 188 }, { "epoch": 0.02, "grad_norm": 1.894817277752049, "learning_rate": 6.4285714285714295e-06, "loss": 0.6425, "step": 189 }, { "epoch": 0.02, "grad_norm": 1.7978083853039564, "learning_rate": 6.462585034013606e-06, "loss": 0.8063, "step": 190 }, { "epoch": 0.02, "grad_norm": 1.7078735099836249, "learning_rate": 6.496598639455784e-06, "loss": 0.7289, "step": 191 }, { "epoch": 0.02, "grad_norm": 1.5358377887324042, "learning_rate": 6.530612244897959e-06, "loss": 0.6782, "step": 192 }, { "epoch": 0.02, "grad_norm": 1.771217831130146, "learning_rate": 6.5646258503401364e-06, "loss": 0.7417, "step": 193 }, { "epoch": 0.02, "grad_norm": 1.707479636164301, "learning_rate": 6.598639455782313e-06, "loss": 0.6823, "step": 194 }, { "epoch": 0.02, "grad_norm": 1.6059713975610566, "learning_rate": 6.63265306122449e-06, "loss": 0.6423, "step": 195 }, { "epoch": 0.02, "grad_norm": 1.662850459013901, "learning_rate": 6.666666666666667e-06, "loss": 0.5996, "step": 196 }, { "epoch": 0.02, "grad_norm": 1.814379826522703, "learning_rate": 6.700680272108843e-06, "loss": 0.6818, "step": 197 }, { "epoch": 0.02, "grad_norm": 1.9705264765932713, "learning_rate": 6.734693877551021e-06, "loss": 0.7526, "step": 198 }, { "epoch": 0.02, "grad_norm": 1.9222029712087092, "learning_rate": 6.768707482993198e-06, "loss": 0.7334, "step": 199 }, { "epoch": 0.02, "grad_norm": 1.4868086378442777, "learning_rate": 6.8027210884353745e-06, "loss": 0.6678, "step": 200 }, { "epoch": 0.02, "grad_norm": 1.6904148367334393, "learning_rate": 6.836734693877551e-06, "loss": 0.711, "step": 201 }, { "epoch": 0.02, "grad_norm": 1.7054865387721603, "learning_rate": 6.870748299319728e-06, "loss": 0.6971, "step": 202 }, { "epoch": 0.02, "grad_norm": 1.4808626704516115, "learning_rate": 6.9047619047619055e-06, "loss": 0.6826, "step": 203 }, { "epoch": 0.02, "grad_norm": 1.8690482533019264, "learning_rate": 6.938775510204082e-06, "loss": 0.6808, "step": 204 }, { "epoch": 0.02, "grad_norm": 1.711029145635382, "learning_rate": 6.972789115646259e-06, "loss": 0.7081, "step": 205 }, { "epoch": 0.02, "grad_norm": 3.0440937592685136, "learning_rate": 7.006802721088436e-06, "loss": 0.6996, "step": 206 }, { "epoch": 0.02, "grad_norm": 1.7724505160576767, "learning_rate": 7.0408163265306125e-06, "loss": 0.6248, "step": 207 }, { "epoch": 0.02, "grad_norm": 1.8129683012489666, "learning_rate": 7.07482993197279e-06, "loss": 0.7575, "step": 208 }, { "epoch": 0.02, "grad_norm": 1.702825929632407, "learning_rate": 7.108843537414967e-06, "loss": 0.6695, "step": 209 }, { "epoch": 0.02, "grad_norm": 1.673458227103071, "learning_rate": 7.1428571428571436e-06, "loss": 0.7505, "step": 210 }, { "epoch": 0.02, "grad_norm": 1.7633218550687653, "learning_rate": 7.17687074829932e-06, "loss": 0.7367, "step": 211 }, { "epoch": 0.02, "grad_norm": 2.1762667467628325, "learning_rate": 7.210884353741497e-06, "loss": 0.7545, "step": 212 }, { "epoch": 0.02, "grad_norm": 1.840644329551339, "learning_rate": 7.244897959183675e-06, "loss": 0.7109, "step": 213 }, { "epoch": 0.02, "grad_norm": 1.6929102756112142, "learning_rate": 7.278911564625851e-06, "loss": 0.7755, "step": 214 }, { "epoch": 0.02, "grad_norm": 1.391025296277614, "learning_rate": 7.312925170068028e-06, "loss": 0.6075, "step": 215 }, { "epoch": 0.02, "grad_norm": 1.8083483409122258, "learning_rate": 7.346938775510205e-06, "loss": 0.6886, "step": 216 }, { "epoch": 0.02, "grad_norm": 2.029611278793335, "learning_rate": 7.380952380952382e-06, "loss": 0.7421, "step": 217 }, { "epoch": 0.02, "grad_norm": 1.605238625039483, "learning_rate": 7.414965986394559e-06, "loss": 0.6755, "step": 218 }, { "epoch": 0.02, "grad_norm": 1.915954205787534, "learning_rate": 7.448979591836736e-06, "loss": 0.6946, "step": 219 }, { "epoch": 0.02, "grad_norm": 1.7331297380694044, "learning_rate": 7.482993197278913e-06, "loss": 0.6961, "step": 220 }, { "epoch": 0.02, "grad_norm": 1.493482305986828, "learning_rate": 7.5170068027210886e-06, "loss": 0.627, "step": 221 }, { "epoch": 0.02, "grad_norm": 1.662452465583868, "learning_rate": 7.551020408163265e-06, "loss": 0.7223, "step": 222 }, { "epoch": 0.02, "grad_norm": 1.6917204744172722, "learning_rate": 7.585034013605442e-06, "loss": 0.6982, "step": 223 }, { "epoch": 0.02, "grad_norm": 1.522113284582916, "learning_rate": 7.61904761904762e-06, "loss": 0.712, "step": 224 }, { "epoch": 0.02, "grad_norm": 1.7791369359126876, "learning_rate": 7.653061224489796e-06, "loss": 0.6898, "step": 225 }, { "epoch": 0.02, "grad_norm": 1.6291975647096288, "learning_rate": 7.687074829931972e-06, "loss": 0.7722, "step": 226 }, { "epoch": 0.02, "grad_norm": 1.5859355891222724, "learning_rate": 7.72108843537415e-06, "loss": 0.6929, "step": 227 }, { "epoch": 0.02, "grad_norm": 1.7554126637804617, "learning_rate": 7.755102040816327e-06, "loss": 0.7707, "step": 228 }, { "epoch": 0.02, "grad_norm": 1.5554115216030258, "learning_rate": 7.789115646258504e-06, "loss": 0.6783, "step": 229 }, { "epoch": 0.02, "grad_norm": 1.857646813790035, "learning_rate": 7.823129251700681e-06, "loss": 0.6721, "step": 230 }, { "epoch": 0.02, "grad_norm": 1.681841736803624, "learning_rate": 7.857142857142858e-06, "loss": 0.6104, "step": 231 }, { "epoch": 0.02, "grad_norm": 1.6997027191468794, "learning_rate": 7.891156462585034e-06, "loss": 0.7461, "step": 232 }, { "epoch": 0.02, "grad_norm": 1.8666196638128256, "learning_rate": 7.925170068027211e-06, "loss": 0.6964, "step": 233 }, { "epoch": 0.02, "grad_norm": 1.7910578341864032, "learning_rate": 7.959183673469388e-06, "loss": 0.7568, "step": 234 }, { "epoch": 0.02, "grad_norm": 1.5875099746705743, "learning_rate": 7.993197278911565e-06, "loss": 0.6356, "step": 235 }, { "epoch": 0.02, "grad_norm": 1.5736688271574586, "learning_rate": 8.027210884353741e-06, "loss": 0.6181, "step": 236 }, { "epoch": 0.02, "grad_norm": 1.8054557360323615, "learning_rate": 8.06122448979592e-06, "loss": 0.62, "step": 237 }, { "epoch": 0.02, "grad_norm": 1.6323469776825377, "learning_rate": 8.095238095238097e-06, "loss": 0.5427, "step": 238 }, { "epoch": 0.02, "grad_norm": 1.4954541275463547, "learning_rate": 8.129251700680273e-06, "loss": 0.6242, "step": 239 }, { "epoch": 0.02, "grad_norm": 1.774973810269845, "learning_rate": 8.16326530612245e-06, "loss": 0.6487, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.7284190673054922, "learning_rate": 8.197278911564627e-06, "loss": 0.627, "step": 241 }, { "epoch": 0.02, "grad_norm": 1.6273031001169656, "learning_rate": 8.231292517006804e-06, "loss": 0.5584, "step": 242 }, { "epoch": 0.02, "grad_norm": 1.5953339794079997, "learning_rate": 8.26530612244898e-06, "loss": 0.6208, "step": 243 }, { "epoch": 0.02, "grad_norm": 1.5444142249213404, "learning_rate": 8.299319727891157e-06, "loss": 0.6777, "step": 244 }, { "epoch": 0.03, "grad_norm": 1.7657198104025196, "learning_rate": 8.333333333333334e-06, "loss": 0.7233, "step": 245 }, { "epoch": 0.03, "grad_norm": 1.8120455891326517, "learning_rate": 8.36734693877551e-06, "loss": 0.6377, "step": 246 }, { "epoch": 0.03, "grad_norm": 1.556272482829569, "learning_rate": 8.401360544217689e-06, "loss": 0.6382, "step": 247 }, { "epoch": 0.03, "grad_norm": 1.8470053215578899, "learning_rate": 8.435374149659866e-06, "loss": 0.7668, "step": 248 }, { "epoch": 0.03, "grad_norm": 1.5844735302918436, "learning_rate": 8.469387755102042e-06, "loss": 0.7055, "step": 249 }, { "epoch": 0.03, "grad_norm": 1.625837253915243, "learning_rate": 8.503401360544217e-06, "loss": 0.6383, "step": 250 }, { "epoch": 0.03, "grad_norm": 1.629091625687662, "learning_rate": 8.537414965986394e-06, "loss": 0.675, "step": 251 }, { "epoch": 0.03, "grad_norm": 1.6162413644647362, "learning_rate": 8.571428571428571e-06, "loss": 0.6413, "step": 252 }, { "epoch": 0.03, "grad_norm": 1.4726043683184689, "learning_rate": 8.60544217687075e-06, "loss": 0.6919, "step": 253 }, { "epoch": 0.03, "grad_norm": 1.8063050667125247, "learning_rate": 8.639455782312926e-06, "loss": 0.7365, "step": 254 }, { "epoch": 0.03, "grad_norm": 1.5936306269935512, "learning_rate": 8.673469387755103e-06, "loss": 0.6275, "step": 255 }, { "epoch": 0.03, "grad_norm": 1.954508471281085, "learning_rate": 8.70748299319728e-06, "loss": 0.6877, "step": 256 }, { "epoch": 0.03, "grad_norm": 1.8399361827787828, "learning_rate": 8.741496598639456e-06, "loss": 0.6723, "step": 257 }, { "epoch": 0.03, "grad_norm": 1.7366783096391685, "learning_rate": 8.775510204081633e-06, "loss": 0.5553, "step": 258 }, { "epoch": 0.03, "grad_norm": 1.7384484849657749, "learning_rate": 8.80952380952381e-06, "loss": 0.7079, "step": 259 }, { "epoch": 0.03, "grad_norm": 1.5867696195598588, "learning_rate": 8.843537414965987e-06, "loss": 0.7277, "step": 260 }, { "epoch": 0.03, "grad_norm": 1.8873619587701858, "learning_rate": 8.877551020408163e-06, "loss": 0.6686, "step": 261 }, { "epoch": 0.03, "grad_norm": 1.866961755049858, "learning_rate": 8.91156462585034e-06, "loss": 0.7212, "step": 262 }, { "epoch": 0.03, "grad_norm": 1.5288024814997492, "learning_rate": 8.945578231292518e-06, "loss": 0.5727, "step": 263 }, { "epoch": 0.03, "grad_norm": 1.8046992400509005, "learning_rate": 8.979591836734695e-06, "loss": 0.6972, "step": 264 }, { "epoch": 0.03, "grad_norm": 1.7460279927264508, "learning_rate": 9.013605442176872e-06, "loss": 0.7408, "step": 265 }, { "epoch": 0.03, "grad_norm": 1.8589228331519838, "learning_rate": 9.047619047619049e-06, "loss": 0.7276, "step": 266 }, { "epoch": 0.03, "grad_norm": 1.7760656557692274, "learning_rate": 9.081632653061225e-06, "loss": 0.7765, "step": 267 }, { "epoch": 0.03, "grad_norm": 1.6791017783947724, "learning_rate": 9.115646258503402e-06, "loss": 0.6635, "step": 268 }, { "epoch": 0.03, "grad_norm": 1.9090164050180325, "learning_rate": 9.149659863945579e-06, "loss": 0.6706, "step": 269 }, { "epoch": 0.03, "grad_norm": 1.562425232072614, "learning_rate": 9.183673469387756e-06, "loss": 0.6411, "step": 270 }, { "epoch": 0.03, "grad_norm": 1.7263666544303566, "learning_rate": 9.217687074829932e-06, "loss": 0.7089, "step": 271 }, { "epoch": 0.03, "grad_norm": 1.6993249972038187, "learning_rate": 9.251700680272109e-06, "loss": 0.6738, "step": 272 }, { "epoch": 0.03, "grad_norm": 1.5903547724566363, "learning_rate": 9.285714285714288e-06, "loss": 0.6562, "step": 273 }, { "epoch": 0.03, "grad_norm": 1.828612171833203, "learning_rate": 9.319727891156464e-06, "loss": 0.6687, "step": 274 }, { "epoch": 0.03, "grad_norm": 1.5797828500543456, "learning_rate": 9.353741496598641e-06, "loss": 0.7041, "step": 275 }, { "epoch": 0.03, "grad_norm": 1.5969578220449532, "learning_rate": 9.387755102040818e-06, "loss": 0.6659, "step": 276 }, { "epoch": 0.03, "grad_norm": 1.717296987631691, "learning_rate": 9.421768707482995e-06, "loss": 0.6346, "step": 277 }, { "epoch": 0.03, "grad_norm": 1.5980749248927124, "learning_rate": 9.455782312925171e-06, "loss": 0.5849, "step": 278 }, { "epoch": 0.03, "grad_norm": 1.6816584359074274, "learning_rate": 9.489795918367348e-06, "loss": 0.7534, "step": 279 }, { "epoch": 0.03, "grad_norm": 1.9433480787808408, "learning_rate": 9.523809523809525e-06, "loss": 0.7462, "step": 280 }, { "epoch": 0.03, "grad_norm": 1.7284931859320865, "learning_rate": 9.557823129251701e-06, "loss": 0.7111, "step": 281 }, { "epoch": 0.03, "grad_norm": 1.7960853320813222, "learning_rate": 9.591836734693878e-06, "loss": 0.7121, "step": 282 }, { "epoch": 0.03, "grad_norm": 1.696278155156235, "learning_rate": 9.625850340136055e-06, "loss": 0.7395, "step": 283 }, { "epoch": 0.03, "grad_norm": 1.6511935591459885, "learning_rate": 9.659863945578232e-06, "loss": 0.6015, "step": 284 }, { "epoch": 0.03, "grad_norm": 1.8887478316781658, "learning_rate": 9.693877551020408e-06, "loss": 0.7543, "step": 285 }, { "epoch": 0.03, "grad_norm": 1.7203196523879802, "learning_rate": 9.727891156462585e-06, "loss": 0.6412, "step": 286 }, { "epoch": 0.03, "grad_norm": 1.5671510228033791, "learning_rate": 9.761904761904762e-06, "loss": 0.669, "step": 287 }, { "epoch": 0.03, "grad_norm": 1.5045166236295249, "learning_rate": 9.795918367346939e-06, "loss": 0.6636, "step": 288 }, { "epoch": 0.03, "grad_norm": 1.6504726574643758, "learning_rate": 9.829931972789115e-06, "loss": 0.5797, "step": 289 }, { "epoch": 0.03, "grad_norm": 1.5912906639346713, "learning_rate": 9.863945578231294e-06, "loss": 0.6031, "step": 290 }, { "epoch": 0.03, "grad_norm": 1.7686113039496512, "learning_rate": 9.89795918367347e-06, "loss": 0.7857, "step": 291 }, { "epoch": 0.03, "grad_norm": 1.8684862320704891, "learning_rate": 9.931972789115647e-06, "loss": 0.6741, "step": 292 }, { "epoch": 0.03, "grad_norm": 1.5024619067243543, "learning_rate": 9.965986394557824e-06, "loss": 0.6339, "step": 293 }, { "epoch": 0.03, "grad_norm": 2.140626671922462, "learning_rate": 1e-05, "loss": 0.5383, "step": 294 }, { "epoch": 0.03, "grad_norm": 1.84610374198352, "learning_rate": 9.999999726546193e-06, "loss": 0.7058, "step": 295 }, { "epoch": 0.03, "grad_norm": 1.7580418096433525, "learning_rate": 9.9999989061848e-06, "loss": 0.6724, "step": 296 }, { "epoch": 0.03, "grad_norm": 1.528991517191071, "learning_rate": 9.999997538915913e-06, "loss": 0.613, "step": 297 }, { "epoch": 0.03, "grad_norm": 1.4873285053250405, "learning_rate": 9.99999562473968e-06, "loss": 0.6774, "step": 298 }, { "epoch": 0.03, "grad_norm": 1.676381267927691, "learning_rate": 9.99999316365631e-06, "loss": 0.6932, "step": 299 }, { "epoch": 0.03, "grad_norm": 1.7971108491728833, "learning_rate": 9.999990155666072e-06, "loss": 0.6831, "step": 300 }, { "epoch": 0.03, "grad_norm": 1.802668181006372, "learning_rate": 9.999986600769296e-06, "loss": 0.6491, "step": 301 }, { "epoch": 0.03, "grad_norm": 1.6125812184358819, "learning_rate": 9.99998249896637e-06, "loss": 0.7199, "step": 302 }, { "epoch": 0.03, "grad_norm": 1.6516542836357435, "learning_rate": 9.999977850257745e-06, "loss": 0.6227, "step": 303 }, { "epoch": 0.03, "grad_norm": 1.7772359548265464, "learning_rate": 9.999972654643927e-06, "loss": 0.6472, "step": 304 }, { "epoch": 0.03, "grad_norm": 1.7995702522839545, "learning_rate": 9.999966912125486e-06, "loss": 0.713, "step": 305 }, { "epoch": 0.03, "grad_norm": 1.9104549191009221, "learning_rate": 9.999960622703049e-06, "loss": 0.7386, "step": 306 }, { "epoch": 0.03, "grad_norm": 1.6095517696465063, "learning_rate": 9.999953786377303e-06, "loss": 0.6846, "step": 307 }, { "epoch": 0.03, "grad_norm": 1.7129235638501261, "learning_rate": 9.999946403148997e-06, "loss": 0.6521, "step": 308 }, { "epoch": 0.03, "grad_norm": 1.645060795578927, "learning_rate": 9.99993847301894e-06, "loss": 0.5647, "step": 309 }, { "epoch": 0.03, "grad_norm": 1.8022738130137739, "learning_rate": 9.999929995987996e-06, "loss": 0.6969, "step": 310 }, { "epoch": 0.03, "grad_norm": 1.5081083400694233, "learning_rate": 9.999920972057096e-06, "loss": 0.7086, "step": 311 }, { "epoch": 0.03, "grad_norm": 1.6514991304147824, "learning_rate": 9.999911401227222e-06, "loss": 0.7613, "step": 312 }, { "epoch": 0.03, "grad_norm": 1.6466494644830805, "learning_rate": 9.999901283499427e-06, "loss": 0.6938, "step": 313 }, { "epoch": 0.03, "grad_norm": 1.754666323150444, "learning_rate": 9.999890618874814e-06, "loss": 0.7497, "step": 314 }, { "epoch": 0.03, "grad_norm": 2.0015570509267904, "learning_rate": 9.999879407354551e-06, "loss": 0.7746, "step": 315 }, { "epoch": 0.03, "grad_norm": 1.7973646930478318, "learning_rate": 9.999867648939863e-06, "loss": 0.6379, "step": 316 }, { "epoch": 0.03, "grad_norm": 1.537862720835183, "learning_rate": 9.999855343632037e-06, "loss": 0.6258, "step": 317 }, { "epoch": 0.03, "grad_norm": 2.0250361038685423, "learning_rate": 9.99984249143242e-06, "loss": 0.7052, "step": 318 }, { "epoch": 0.03, "grad_norm": 1.5696156193634585, "learning_rate": 9.999829092342412e-06, "loss": 0.6554, "step": 319 }, { "epoch": 0.03, "grad_norm": 1.56017788458782, "learning_rate": 9.999815146363488e-06, "loss": 0.628, "step": 320 }, { "epoch": 0.03, "grad_norm": 1.7129708204267096, "learning_rate": 9.999800653497166e-06, "loss": 0.642, "step": 321 }, { "epoch": 0.03, "grad_norm": 1.655487796752772, "learning_rate": 9.999785613745035e-06, "loss": 0.6903, "step": 322 }, { "epoch": 0.03, "grad_norm": 1.7367841491388956, "learning_rate": 9.999770027108737e-06, "loss": 0.7374, "step": 323 }, { "epoch": 0.03, "grad_norm": 1.6655831582312017, "learning_rate": 9.999753893589981e-06, "loss": 0.6669, "step": 324 }, { "epoch": 0.03, "grad_norm": 2.2309504564697176, "learning_rate": 9.999737213190529e-06, "loss": 0.7111, "step": 325 }, { "epoch": 0.03, "grad_norm": 1.6234964734450033, "learning_rate": 9.999719985912205e-06, "loss": 0.6197, "step": 326 }, { "epoch": 0.03, "grad_norm": 1.7738899021011265, "learning_rate": 9.999702211756896e-06, "loss": 0.7545, "step": 327 }, { "epoch": 0.03, "grad_norm": 1.650626478635823, "learning_rate": 9.999683890726544e-06, "loss": 0.655, "step": 328 }, { "epoch": 0.03, "grad_norm": 1.736533757422792, "learning_rate": 9.999665022823153e-06, "loss": 0.5757, "step": 329 }, { "epoch": 0.03, "grad_norm": 1.6827461937447343, "learning_rate": 9.999645608048788e-06, "loss": 0.645, "step": 330 }, { "epoch": 0.03, "grad_norm": 1.5294909794964617, "learning_rate": 9.99962564640557e-06, "loss": 0.6501, "step": 331 }, { "epoch": 0.03, "grad_norm": 1.3766208081466154, "learning_rate": 9.999605137895687e-06, "loss": 0.6092, "step": 332 }, { "epoch": 0.03, "grad_norm": 1.6341502276949094, "learning_rate": 9.999584082521378e-06, "loss": 0.7326, "step": 333 }, { "epoch": 0.03, "grad_norm": 1.4992907854525923, "learning_rate": 9.999562480284948e-06, "loss": 0.6273, "step": 334 }, { "epoch": 0.03, "grad_norm": 1.6030394939379573, "learning_rate": 9.99954033118876e-06, "loss": 0.6341, "step": 335 }, { "epoch": 0.03, "grad_norm": 1.6179378916660214, "learning_rate": 9.999517635235237e-06, "loss": 0.6948, "step": 336 }, { "epoch": 0.03, "grad_norm": 1.592600111619067, "learning_rate": 9.999494392426858e-06, "loss": 0.7526, "step": 337 }, { "epoch": 0.03, "grad_norm": 1.6963273232930673, "learning_rate": 9.99947060276617e-06, "loss": 0.7329, "step": 338 }, { "epoch": 0.03, "grad_norm": 1.6221727235816963, "learning_rate": 9.999446266255773e-06, "loss": 0.6847, "step": 339 }, { "epoch": 0.03, "grad_norm": 1.6060871459692083, "learning_rate": 9.99942138289833e-06, "loss": 0.7769, "step": 340 }, { "epoch": 0.03, "grad_norm": 1.52983625454078, "learning_rate": 9.999395952696561e-06, "loss": 0.7275, "step": 341 }, { "epoch": 0.03, "grad_norm": 1.6447826638303187, "learning_rate": 9.99936997565325e-06, "loss": 0.7322, "step": 342 }, { "epoch": 0.04, "grad_norm": 1.8176677854425842, "learning_rate": 9.999343451771235e-06, "loss": 0.7515, "step": 343 }, { "epoch": 0.04, "grad_norm": 1.9437219178406995, "learning_rate": 9.99931638105342e-06, "loss": 0.7721, "step": 344 }, { "epoch": 0.04, "grad_norm": 1.6771473152664433, "learning_rate": 9.999288763502766e-06, "loss": 0.7918, "step": 345 }, { "epoch": 0.04, "grad_norm": 1.634450586945855, "learning_rate": 9.999260599122294e-06, "loss": 0.7096, "step": 346 }, { "epoch": 0.04, "grad_norm": 1.5794527310696247, "learning_rate": 9.999231887915081e-06, "loss": 0.7076, "step": 347 }, { "epoch": 0.04, "grad_norm": 1.6903165958654152, "learning_rate": 9.999202629884272e-06, "loss": 0.5892, "step": 348 }, { "epoch": 0.04, "grad_norm": 1.661872423535565, "learning_rate": 9.999172825033065e-06, "loss": 0.6807, "step": 349 }, { "epoch": 0.04, "grad_norm": 1.8169513655170109, "learning_rate": 9.999142473364722e-06, "loss": 0.6684, "step": 350 }, { "epoch": 0.04, "grad_norm": 1.4179570309899656, "learning_rate": 9.999111574882561e-06, "loss": 0.5212, "step": 351 }, { "epoch": 0.04, "grad_norm": 1.5709673247863372, "learning_rate": 9.999080129589962e-06, "loss": 0.7175, "step": 352 }, { "epoch": 0.04, "grad_norm": 1.7580929735342585, "learning_rate": 9.999048137490364e-06, "loss": 0.5493, "step": 353 }, { "epoch": 0.04, "grad_norm": 1.607032516083019, "learning_rate": 9.999015598587269e-06, "loss": 0.8199, "step": 354 }, { "epoch": 0.04, "grad_norm": 1.5395257215149827, "learning_rate": 9.998982512884233e-06, "loss": 0.6235, "step": 355 }, { "epoch": 0.04, "grad_norm": 1.6265690899890533, "learning_rate": 9.998948880384876e-06, "loss": 0.6672, "step": 356 }, { "epoch": 0.04, "grad_norm": 1.7205110582250012, "learning_rate": 9.998914701092877e-06, "loss": 0.6951, "step": 357 }, { "epoch": 0.04, "grad_norm": 1.702629156958473, "learning_rate": 9.998879975011976e-06, "loss": 0.7128, "step": 358 }, { "epoch": 0.04, "grad_norm": 1.6643499990535289, "learning_rate": 9.99884470214597e-06, "loss": 0.6929, "step": 359 }, { "epoch": 0.04, "grad_norm": 1.7980669051900724, "learning_rate": 9.998808882498717e-06, "loss": 0.6677, "step": 360 }, { "epoch": 0.04, "grad_norm": 1.6256884747426377, "learning_rate": 9.998772516074134e-06, "loss": 0.6444, "step": 361 }, { "epoch": 0.04, "grad_norm": 1.9028793663520989, "learning_rate": 9.9987356028762e-06, "loss": 0.715, "step": 362 }, { "epoch": 0.04, "grad_norm": 1.6572368512796947, "learning_rate": 9.998698142908954e-06, "loss": 0.6624, "step": 363 }, { "epoch": 0.04, "grad_norm": 1.6048567014072381, "learning_rate": 9.998660136176493e-06, "loss": 0.6217, "step": 364 }, { "epoch": 0.04, "grad_norm": 1.679923583278858, "learning_rate": 9.99862158268297e-06, "loss": 0.6989, "step": 365 }, { "epoch": 0.04, "grad_norm": 1.5440123258426055, "learning_rate": 9.998582482432608e-06, "loss": 0.633, "step": 366 }, { "epoch": 0.04, "grad_norm": 1.549205003483857, "learning_rate": 9.998542835429681e-06, "loss": 0.678, "step": 367 }, { "epoch": 0.04, "grad_norm": 1.5878322170196446, "learning_rate": 9.998502641678525e-06, "loss": 0.7212, "step": 368 }, { "epoch": 0.04, "grad_norm": 1.6783232194641986, "learning_rate": 9.998461901183537e-06, "loss": 0.6678, "step": 369 }, { "epoch": 0.04, "grad_norm": 1.6316060836910617, "learning_rate": 9.998420613949176e-06, "loss": 0.6773, "step": 370 }, { "epoch": 0.04, "grad_norm": 1.3995291220316652, "learning_rate": 9.998378779979954e-06, "loss": 0.6325, "step": 371 }, { "epoch": 0.04, "grad_norm": 1.8907242673785583, "learning_rate": 9.998336399280448e-06, "loss": 0.705, "step": 372 }, { "epoch": 0.04, "grad_norm": 1.4708764256327442, "learning_rate": 9.998293471855297e-06, "loss": 0.5808, "step": 373 }, { "epoch": 0.04, "grad_norm": 1.9638067794491618, "learning_rate": 9.998249997709191e-06, "loss": 0.717, "step": 374 }, { "epoch": 0.04, "grad_norm": 1.7846350755306453, "learning_rate": 9.998205976846888e-06, "loss": 0.6905, "step": 375 }, { "epoch": 0.04, "grad_norm": 1.5488051470852124, "learning_rate": 9.998161409273203e-06, "loss": 0.5818, "step": 376 }, { "epoch": 0.04, "grad_norm": 1.8473018030931272, "learning_rate": 9.99811629499301e-06, "loss": 0.7489, "step": 377 }, { "epoch": 0.04, "grad_norm": 1.560360346176351, "learning_rate": 9.998070634011247e-06, "loss": 0.6002, "step": 378 }, { "epoch": 0.04, "grad_norm": 1.5280423488815236, "learning_rate": 9.998024426332904e-06, "loss": 0.6258, "step": 379 }, { "epoch": 0.04, "grad_norm": 1.9453762492435107, "learning_rate": 9.997977671963038e-06, "loss": 0.8001, "step": 380 }, { "epoch": 0.04, "grad_norm": 1.7010005835540527, "learning_rate": 9.997930370906763e-06, "loss": 0.7038, "step": 381 }, { "epoch": 0.04, "grad_norm": 1.6476142291288893, "learning_rate": 9.997882523169251e-06, "loss": 0.7714, "step": 382 }, { "epoch": 0.04, "grad_norm": 1.755630617057486, "learning_rate": 9.997834128755738e-06, "loss": 0.7055, "step": 383 }, { "epoch": 0.04, "grad_norm": 1.6267695886719453, "learning_rate": 9.997785187671517e-06, "loss": 0.6993, "step": 384 }, { "epoch": 0.04, "grad_norm": 1.7780687623299787, "learning_rate": 9.997735699921937e-06, "loss": 0.7541, "step": 385 }, { "epoch": 0.04, "grad_norm": 1.5915846416910782, "learning_rate": 9.997685665512418e-06, "loss": 0.625, "step": 386 }, { "epoch": 0.04, "grad_norm": 1.785436101251001, "learning_rate": 9.997635084448427e-06, "loss": 0.7341, "step": 387 }, { "epoch": 0.04, "grad_norm": 1.5382395997524894, "learning_rate": 9.997583956735502e-06, "loss": 0.7149, "step": 388 }, { "epoch": 0.04, "grad_norm": 1.4503720028105898, "learning_rate": 9.99753228237923e-06, "loss": 0.5882, "step": 389 }, { "epoch": 0.04, "grad_norm": 1.7306280651256394, "learning_rate": 9.997480061385266e-06, "loss": 0.6247, "step": 390 }, { "epoch": 0.04, "grad_norm": 1.5815025719752098, "learning_rate": 9.997427293759323e-06, "loss": 0.5849, "step": 391 }, { "epoch": 0.04, "grad_norm": 1.7546433792448335, "learning_rate": 9.99737397950717e-06, "loss": 0.7412, "step": 392 }, { "epoch": 0.04, "grad_norm": 1.5284632920314802, "learning_rate": 9.997320118634641e-06, "loss": 0.6658, "step": 393 }, { "epoch": 0.04, "grad_norm": 1.7098333451918801, "learning_rate": 9.997265711147627e-06, "loss": 0.8079, "step": 394 }, { "epoch": 0.04, "grad_norm": 1.7057797280024627, "learning_rate": 9.997210757052079e-06, "loss": 0.6556, "step": 395 }, { "epoch": 0.04, "grad_norm": 1.708397633814117, "learning_rate": 9.997155256354005e-06, "loss": 0.7991, "step": 396 }, { "epoch": 0.04, "grad_norm": 1.827890767908938, "learning_rate": 9.997099209059482e-06, "loss": 0.6933, "step": 397 }, { "epoch": 0.04, "grad_norm": 1.6145896613024784, "learning_rate": 9.997042615174635e-06, "loss": 0.6451, "step": 398 }, { "epoch": 0.04, "grad_norm": 1.591775785420633, "learning_rate": 9.996985474705654e-06, "loss": 0.7044, "step": 399 }, { "epoch": 0.04, "grad_norm": 1.665556780527005, "learning_rate": 9.996927787658795e-06, "loss": 0.6584, "step": 400 }, { "epoch": 0.04, "grad_norm": 1.6290931677994356, "learning_rate": 9.996869554040362e-06, "loss": 0.6498, "step": 401 }, { "epoch": 0.04, "grad_norm": 1.5809216941170372, "learning_rate": 9.996810773856729e-06, "loss": 0.6305, "step": 402 }, { "epoch": 0.04, "grad_norm": 1.686773042164428, "learning_rate": 9.996751447114322e-06, "loss": 0.7137, "step": 403 }, { "epoch": 0.04, "grad_norm": 1.4422562163575243, "learning_rate": 9.996691573819632e-06, "loss": 0.6768, "step": 404 }, { "epoch": 0.04, "grad_norm": 1.7003502533161388, "learning_rate": 9.996631153979209e-06, "loss": 0.7548, "step": 405 }, { "epoch": 0.04, "grad_norm": 1.5887772475966933, "learning_rate": 9.996570187599657e-06, "loss": 0.6778, "step": 406 }, { "epoch": 0.04, "grad_norm": 2.155730414170845, "learning_rate": 9.996508674687652e-06, "loss": 0.7923, "step": 407 }, { "epoch": 0.04, "grad_norm": 1.471160445533601, "learning_rate": 9.996446615249917e-06, "loss": 0.7711, "step": 408 }, { "epoch": 0.04, "grad_norm": 1.6763196731389678, "learning_rate": 9.99638400929324e-06, "loss": 0.6092, "step": 409 }, { "epoch": 0.04, "grad_norm": 1.435952843813544, "learning_rate": 9.996320856824473e-06, "loss": 0.6627, "step": 410 }, { "epoch": 0.04, "grad_norm": 1.4792720263313655, "learning_rate": 9.996257157850519e-06, "loss": 0.7005, "step": 411 }, { "epoch": 0.04, "grad_norm": 1.7611777891365465, "learning_rate": 9.99619291237835e-06, "loss": 0.7117, "step": 412 }, { "epoch": 0.04, "grad_norm": 1.6016812979384114, "learning_rate": 9.996128120414989e-06, "loss": 0.7087, "step": 413 }, { "epoch": 0.04, "grad_norm": 1.4786609313116865, "learning_rate": 9.996062781967529e-06, "loss": 0.6386, "step": 414 }, { "epoch": 0.04, "grad_norm": 1.7585561593157046, "learning_rate": 9.995996897043109e-06, "loss": 0.716, "step": 415 }, { "epoch": 0.04, "grad_norm": 1.749975726499476, "learning_rate": 9.99593046564894e-06, "loss": 0.6753, "step": 416 }, { "epoch": 0.04, "grad_norm": 1.6182760833576801, "learning_rate": 9.995863487792289e-06, "loss": 0.7216, "step": 417 }, { "epoch": 0.04, "grad_norm": 1.573042490970415, "learning_rate": 9.99579596348048e-06, "loss": 0.7134, "step": 418 }, { "epoch": 0.04, "grad_norm": 1.5862376923884296, "learning_rate": 9.995727892720902e-06, "loss": 0.6771, "step": 419 }, { "epoch": 0.04, "grad_norm": 1.760960310832639, "learning_rate": 9.995659275520996e-06, "loss": 0.6469, "step": 420 }, { "epoch": 0.04, "grad_norm": 1.5177966756046415, "learning_rate": 9.99559011188827e-06, "loss": 0.6309, "step": 421 }, { "epoch": 0.04, "grad_norm": 1.6662647897125524, "learning_rate": 9.995520401830291e-06, "loss": 0.7176, "step": 422 }, { "epoch": 0.04, "grad_norm": 1.5658371724284577, "learning_rate": 9.995450145354682e-06, "loss": 0.7523, "step": 423 }, { "epoch": 0.04, "grad_norm": 1.454489304170467, "learning_rate": 9.995379342469127e-06, "loss": 0.6572, "step": 424 }, { "epoch": 0.04, "grad_norm": 1.7609138312852388, "learning_rate": 9.995307993181374e-06, "loss": 0.7127, "step": 425 }, { "epoch": 0.04, "grad_norm": 1.5258757171660189, "learning_rate": 9.995236097499222e-06, "loss": 0.742, "step": 426 }, { "epoch": 0.04, "grad_norm": 1.673971563727336, "learning_rate": 9.99516365543054e-06, "loss": 0.7603, "step": 427 }, { "epoch": 0.04, "grad_norm": 1.5431588831806191, "learning_rate": 9.995090666983248e-06, "loss": 0.595, "step": 428 }, { "epoch": 0.04, "grad_norm": 1.5415074275212355, "learning_rate": 9.995017132165332e-06, "loss": 0.6826, "step": 429 }, { "epoch": 0.04, "grad_norm": 1.7385736707723702, "learning_rate": 9.994943050984835e-06, "loss": 0.7487, "step": 430 }, { "epoch": 0.04, "grad_norm": 1.5386634051189538, "learning_rate": 9.99486842344986e-06, "loss": 0.6182, "step": 431 }, { "epoch": 0.04, "grad_norm": 1.5913661610749916, "learning_rate": 9.994793249568568e-06, "loss": 0.6604, "step": 432 }, { "epoch": 0.04, "grad_norm": 1.7884640509896725, "learning_rate": 9.994717529349186e-06, "loss": 0.7561, "step": 433 }, { "epoch": 0.04, "grad_norm": 1.4719264452929186, "learning_rate": 9.994641262799991e-06, "loss": 0.7683, "step": 434 }, { "epoch": 0.04, "grad_norm": 1.5350678647320022, "learning_rate": 9.99456444992933e-06, "loss": 0.5652, "step": 435 }, { "epoch": 0.04, "grad_norm": 1.4758686156576362, "learning_rate": 9.994487090745601e-06, "loss": 0.655, "step": 436 }, { "epoch": 0.04, "grad_norm": 1.7665322059127975, "learning_rate": 9.994409185257268e-06, "loss": 0.7114, "step": 437 }, { "epoch": 0.04, "grad_norm": 1.5711899527627375, "learning_rate": 9.994330733472852e-06, "loss": 0.713, "step": 438 }, { "epoch": 0.04, "grad_norm": 1.4491459339069, "learning_rate": 9.994251735400935e-06, "loss": 0.5868, "step": 439 }, { "epoch": 0.04, "grad_norm": 1.765890641516599, "learning_rate": 9.994172191050155e-06, "loss": 0.7696, "step": 440 }, { "epoch": 0.05, "grad_norm": 1.3621720055702158, "learning_rate": 9.994092100429216e-06, "loss": 0.5921, "step": 441 }, { "epoch": 0.05, "grad_norm": 1.691973504512258, "learning_rate": 9.994011463546876e-06, "loss": 0.6994, "step": 442 }, { "epoch": 0.05, "grad_norm": 1.648346461700514, "learning_rate": 9.993930280411958e-06, "loss": 0.617, "step": 443 }, { "epoch": 0.05, "grad_norm": 1.683115575215652, "learning_rate": 9.993848551033339e-06, "loss": 0.7175, "step": 444 }, { "epoch": 0.05, "grad_norm": 1.4888770348199771, "learning_rate": 9.99376627541996e-06, "loss": 0.7437, "step": 445 }, { "epoch": 0.05, "grad_norm": 1.4803020926068573, "learning_rate": 9.99368345358082e-06, "loss": 0.5644, "step": 446 }, { "epoch": 0.05, "grad_norm": 1.6019932977556914, "learning_rate": 9.99360008552498e-06, "loss": 0.7058, "step": 447 }, { "epoch": 0.05, "grad_norm": 1.420506168896845, "learning_rate": 9.993516171261555e-06, "loss": 0.6262, "step": 448 }, { "epoch": 0.05, "grad_norm": 1.638464800086788, "learning_rate": 9.993431710799727e-06, "loss": 0.7253, "step": 449 }, { "epoch": 0.05, "grad_norm": 1.6598107515230407, "learning_rate": 9.993346704148734e-06, "loss": 0.7058, "step": 450 }, { "epoch": 0.05, "grad_norm": 1.508953962213073, "learning_rate": 9.993261151317872e-06, "loss": 0.5897, "step": 451 }, { "epoch": 0.05, "grad_norm": 1.6337693465042007, "learning_rate": 9.993175052316503e-06, "loss": 0.6275, "step": 452 }, { "epoch": 0.05, "grad_norm": 1.5758135481899527, "learning_rate": 9.993088407154041e-06, "loss": 0.6737, "step": 453 }, { "epoch": 0.05, "grad_norm": 1.7331042972361952, "learning_rate": 9.993001215839963e-06, "loss": 0.6469, "step": 454 }, { "epoch": 0.05, "grad_norm": 1.6735668226839144, "learning_rate": 9.99291347838381e-06, "loss": 0.6747, "step": 455 }, { "epoch": 0.05, "grad_norm": 1.515940846838253, "learning_rate": 9.992825194795178e-06, "loss": 0.7466, "step": 456 }, { "epoch": 0.05, "grad_norm": 1.469179102834175, "learning_rate": 9.992736365083718e-06, "loss": 0.6438, "step": 457 }, { "epoch": 0.05, "grad_norm": 1.554476908587931, "learning_rate": 9.992646989259153e-06, "loss": 0.6411, "step": 458 }, { "epoch": 0.05, "grad_norm": 1.622628595768616, "learning_rate": 9.992557067331256e-06, "loss": 0.6537, "step": 459 }, { "epoch": 0.05, "grad_norm": 1.7429560021220534, "learning_rate": 9.992466599309863e-06, "loss": 0.7065, "step": 460 }, { "epoch": 0.05, "grad_norm": 1.5466660324427381, "learning_rate": 9.99237558520487e-06, "loss": 0.6491, "step": 461 }, { "epoch": 0.05, "grad_norm": 2.1054325327843593, "learning_rate": 9.992284025026231e-06, "loss": 0.6246, "step": 462 }, { "epoch": 0.05, "grad_norm": 1.569510712931431, "learning_rate": 9.992191918783964e-06, "loss": 0.6623, "step": 463 }, { "epoch": 0.05, "grad_norm": 1.7124929417313908, "learning_rate": 9.992099266488142e-06, "loss": 0.6953, "step": 464 }, { "epoch": 0.05, "grad_norm": 1.57778385376264, "learning_rate": 9.992006068148898e-06, "loss": 0.6475, "step": 465 }, { "epoch": 0.05, "grad_norm": 1.409779353746265, "learning_rate": 9.991912323776428e-06, "loss": 0.6332, "step": 466 }, { "epoch": 0.05, "grad_norm": 1.6387409008447908, "learning_rate": 9.991818033380985e-06, "loss": 0.7173, "step": 467 }, { "epoch": 0.05, "grad_norm": 1.665016244645226, "learning_rate": 9.991723196972883e-06, "loss": 0.6852, "step": 468 }, { "epoch": 0.05, "grad_norm": 1.5042410883469257, "learning_rate": 9.991627814562495e-06, "loss": 0.5788, "step": 469 }, { "epoch": 0.05, "grad_norm": 1.6842467368281626, "learning_rate": 9.991531886160254e-06, "loss": 0.6145, "step": 470 }, { "epoch": 0.05, "grad_norm": 1.670955531834775, "learning_rate": 9.991435411776654e-06, "loss": 0.7435, "step": 471 }, { "epoch": 0.05, "grad_norm": 1.494600387985506, "learning_rate": 9.991338391422247e-06, "loss": 0.7163, "step": 472 }, { "epoch": 0.05, "grad_norm": 1.4409002785067817, "learning_rate": 9.991240825107645e-06, "loss": 0.6875, "step": 473 }, { "epoch": 0.05, "grad_norm": 1.5107234971957306, "learning_rate": 9.99114271284352e-06, "loss": 0.6557, "step": 474 }, { "epoch": 0.05, "grad_norm": 1.4656200556540082, "learning_rate": 9.991044054640601e-06, "loss": 0.6999, "step": 475 }, { "epoch": 0.05, "grad_norm": 1.594455898846147, "learning_rate": 9.990944850509685e-06, "loss": 0.6617, "step": 476 }, { "epoch": 0.05, "grad_norm": 1.561961938894592, "learning_rate": 9.990845100461618e-06, "loss": 0.6902, "step": 477 }, { "epoch": 0.05, "grad_norm": 1.6788946782998317, "learning_rate": 9.990744804507315e-06, "loss": 0.7163, "step": 478 }, { "epoch": 0.05, "grad_norm": 1.4761150403436254, "learning_rate": 9.990643962657744e-06, "loss": 0.6651, "step": 479 }, { "epoch": 0.05, "grad_norm": 1.3946456821489803, "learning_rate": 9.990542574923935e-06, "loss": 0.7245, "step": 480 }, { "epoch": 0.05, "grad_norm": 1.7905484506263207, "learning_rate": 9.990440641316979e-06, "loss": 0.7078, "step": 481 }, { "epoch": 0.05, "grad_norm": 1.5118520199992873, "learning_rate": 9.990338161848024e-06, "loss": 0.7461, "step": 482 }, { "epoch": 0.05, "grad_norm": 1.6747201465967994, "learning_rate": 9.990235136528281e-06, "loss": 0.7172, "step": 483 }, { "epoch": 0.05, "grad_norm": 1.416511909984224, "learning_rate": 9.99013156536902e-06, "loss": 0.6234, "step": 484 }, { "epoch": 0.05, "grad_norm": 1.728548359758747, "learning_rate": 9.990027448381568e-06, "loss": 0.8126, "step": 485 }, { "epoch": 0.05, "grad_norm": 1.4294049918410565, "learning_rate": 9.989922785577311e-06, "loss": 0.7111, "step": 486 }, { "epoch": 0.05, "grad_norm": 1.6735141726127771, "learning_rate": 9.989817576967704e-06, "loss": 0.7209, "step": 487 }, { "epoch": 0.05, "grad_norm": 1.4029247906660804, "learning_rate": 9.98971182256425e-06, "loss": 0.6393, "step": 488 }, { "epoch": 0.05, "grad_norm": 1.4903183324794946, "learning_rate": 9.989605522378516e-06, "loss": 0.6788, "step": 489 }, { "epoch": 0.05, "grad_norm": 1.6711799557576887, "learning_rate": 9.989498676422132e-06, "loss": 0.6601, "step": 490 }, { "epoch": 0.05, "grad_norm": 1.6582852442169198, "learning_rate": 9.989391284706783e-06, "loss": 0.6908, "step": 491 }, { "epoch": 0.05, "grad_norm": 1.623681054596805, "learning_rate": 9.989283347244216e-06, "loss": 0.7883, "step": 492 }, { "epoch": 0.05, "grad_norm": 1.3727322653160583, "learning_rate": 9.989174864046239e-06, "loss": 0.6017, "step": 493 }, { "epoch": 0.05, "grad_norm": 1.4589474203213995, "learning_rate": 9.989065835124716e-06, "loss": 0.7297, "step": 494 }, { "epoch": 0.05, "grad_norm": 1.7696143263693611, "learning_rate": 9.988956260491573e-06, "loss": 0.8105, "step": 495 }, { "epoch": 0.05, "grad_norm": 2.033106731476567, "learning_rate": 9.988846140158798e-06, "loss": 0.7772, "step": 496 }, { "epoch": 0.05, "grad_norm": 1.5588437676601945, "learning_rate": 9.988735474138434e-06, "loss": 0.7016, "step": 497 }, { "epoch": 0.05, "grad_norm": 1.5657083429020284, "learning_rate": 9.988624262442585e-06, "loss": 0.7846, "step": 498 }, { "epoch": 0.05, "grad_norm": 1.3896560662576913, "learning_rate": 9.988512505083416e-06, "loss": 0.6073, "step": 499 }, { "epoch": 0.05, "grad_norm": 1.5107887468523504, "learning_rate": 9.988400202073153e-06, "loss": 0.6334, "step": 500 }, { "epoch": 0.05, "grad_norm": 1.506902593544748, "learning_rate": 9.988287353424077e-06, "loss": 0.7283, "step": 501 }, { "epoch": 0.05, "grad_norm": 1.6139460820418774, "learning_rate": 9.988173959148535e-06, "loss": 0.7268, "step": 502 }, { "epoch": 0.05, "grad_norm": 1.3359696756291453, "learning_rate": 9.988060019258928e-06, "loss": 0.6187, "step": 503 }, { "epoch": 0.05, "grad_norm": 1.5664604476135433, "learning_rate": 9.987945533767718e-06, "loss": 0.6565, "step": 504 }, { "epoch": 0.05, "grad_norm": 1.7265550298682768, "learning_rate": 9.98783050268743e-06, "loss": 0.7188, "step": 505 }, { "epoch": 0.05, "grad_norm": 1.515489931870135, "learning_rate": 9.987714926030644e-06, "loss": 0.6133, "step": 506 }, { "epoch": 0.05, "grad_norm": 1.4416757625674665, "learning_rate": 9.987598803810006e-06, "loss": 0.6681, "step": 507 }, { "epoch": 0.05, "grad_norm": 1.51344453074681, "learning_rate": 9.98748213603821e-06, "loss": 0.6453, "step": 508 }, { "epoch": 0.05, "grad_norm": 1.6242321186387574, "learning_rate": 9.987364922728026e-06, "loss": 0.7998, "step": 509 }, { "epoch": 0.05, "grad_norm": 1.5885905648904102, "learning_rate": 9.987247163892271e-06, "loss": 0.6309, "step": 510 }, { "epoch": 0.05, "grad_norm": 1.9603133696010449, "learning_rate": 9.987128859543825e-06, "loss": 0.6341, "step": 511 }, { "epoch": 0.05, "grad_norm": 1.5406004847268495, "learning_rate": 9.987010009695627e-06, "loss": 0.6762, "step": 512 }, { "epoch": 0.05, "grad_norm": 1.6387471157027147, "learning_rate": 9.986890614360681e-06, "loss": 0.634, "step": 513 }, { "epoch": 0.05, "grad_norm": 1.6069973882117803, "learning_rate": 9.986770673552042e-06, "loss": 0.6951, "step": 514 }, { "epoch": 0.05, "grad_norm": 1.3752665334315188, "learning_rate": 9.986650187282836e-06, "loss": 0.6347, "step": 515 }, { "epoch": 0.05, "grad_norm": 1.6425148993081269, "learning_rate": 9.986529155566233e-06, "loss": 0.7698, "step": 516 }, { "epoch": 0.05, "grad_norm": 1.467061353126147, "learning_rate": 9.98640757841548e-06, "loss": 0.6651, "step": 517 }, { "epoch": 0.05, "grad_norm": 1.5461795582568134, "learning_rate": 9.98628545584387e-06, "loss": 0.5251, "step": 518 }, { "epoch": 0.05, "grad_norm": 1.7628762300048824, "learning_rate": 9.986162787864764e-06, "loss": 0.6601, "step": 519 }, { "epoch": 0.05, "grad_norm": 1.480295261734004, "learning_rate": 9.986039574491578e-06, "loss": 0.7628, "step": 520 }, { "epoch": 0.05, "grad_norm": 1.820730698766, "learning_rate": 9.98591581573779e-06, "loss": 0.6943, "step": 521 }, { "epoch": 0.05, "grad_norm": 1.2487304563219783, "learning_rate": 9.985791511616935e-06, "loss": 0.5827, "step": 522 }, { "epoch": 0.05, "grad_norm": 1.5584498544080858, "learning_rate": 9.985666662142613e-06, "loss": 0.693, "step": 523 }, { "epoch": 0.05, "grad_norm": 1.5953389193291934, "learning_rate": 9.985541267328479e-06, "loss": 0.7212, "step": 524 }, { "epoch": 0.05, "grad_norm": 1.7610545623296376, "learning_rate": 9.985415327188245e-06, "loss": 0.6589, "step": 525 }, { "epoch": 0.05, "grad_norm": 1.5312279735296874, "learning_rate": 9.985288841735693e-06, "loss": 0.7354, "step": 526 }, { "epoch": 0.05, "grad_norm": 1.7871952626468732, "learning_rate": 9.985161810984652e-06, "loss": 0.7463, "step": 527 }, { "epoch": 0.05, "grad_norm": 1.5755233160705768, "learning_rate": 9.98503423494902e-06, "loss": 0.614, "step": 528 }, { "epoch": 0.05, "grad_norm": 1.6530511714274079, "learning_rate": 9.984906113642753e-06, "loss": 0.7155, "step": 529 }, { "epoch": 0.05, "grad_norm": 1.4657524751007716, "learning_rate": 9.984777447079861e-06, "loss": 0.6925, "step": 530 }, { "epoch": 0.05, "grad_norm": 1.5171390952022263, "learning_rate": 9.984648235274422e-06, "loss": 0.7037, "step": 531 }, { "epoch": 0.05, "grad_norm": 1.7349274353633164, "learning_rate": 9.984518478240569e-06, "loss": 0.7211, "step": 532 }, { "epoch": 0.05, "grad_norm": 1.5433538695898017, "learning_rate": 9.98438817599249e-06, "loss": 0.7634, "step": 533 }, { "epoch": 0.05, "grad_norm": 1.8670086078470336, "learning_rate": 9.984257328544442e-06, "loss": 0.7798, "step": 534 }, { "epoch": 0.05, "grad_norm": 1.5350557167495118, "learning_rate": 9.984125935910737e-06, "loss": 0.6526, "step": 535 }, { "epoch": 0.05, "grad_norm": 1.6208305183111038, "learning_rate": 9.983993998105746e-06, "loss": 0.7075, "step": 536 }, { "epoch": 0.05, "grad_norm": 1.7488184749584004, "learning_rate": 9.983861515143901e-06, "loss": 0.6663, "step": 537 }, { "epoch": 0.05, "grad_norm": 2.042714271798969, "learning_rate": 9.983728487039691e-06, "loss": 0.7814, "step": 538 }, { "epoch": 0.06, "grad_norm": 1.4200814136779272, "learning_rate": 9.983594913807672e-06, "loss": 0.5452, "step": 539 }, { "epoch": 0.06, "grad_norm": 3.187041438417489, "learning_rate": 9.983460795462452e-06, "loss": 0.6795, "step": 540 }, { "epoch": 0.06, "grad_norm": 1.9606628290627766, "learning_rate": 9.983326132018698e-06, "loss": 0.7356, "step": 541 }, { "epoch": 0.06, "grad_norm": 1.5868239167679457, "learning_rate": 9.983190923491143e-06, "loss": 0.6101, "step": 542 }, { "epoch": 0.06, "grad_norm": 1.5847538100885545, "learning_rate": 9.983055169894575e-06, "loss": 0.6375, "step": 543 }, { "epoch": 0.06, "grad_norm": 1.584217607978949, "learning_rate": 9.982918871243842e-06, "loss": 0.7571, "step": 544 }, { "epoch": 0.06, "grad_norm": 1.6635621168541042, "learning_rate": 9.982782027553858e-06, "loss": 0.6847, "step": 545 }, { "epoch": 0.06, "grad_norm": 1.6382022346321148, "learning_rate": 9.982644638839583e-06, "loss": 0.6803, "step": 546 }, { "epoch": 0.06, "grad_norm": 1.434956988685626, "learning_rate": 9.98250670511605e-06, "loss": 0.6432, "step": 547 }, { "epoch": 0.06, "grad_norm": 1.6378717878135038, "learning_rate": 9.982368226398344e-06, "loss": 0.7163, "step": 548 }, { "epoch": 0.06, "grad_norm": 1.6730682855233037, "learning_rate": 9.982229202701615e-06, "loss": 0.6889, "step": 549 }, { "epoch": 0.06, "grad_norm": 1.55734545305828, "learning_rate": 9.982089634041068e-06, "loss": 0.5671, "step": 550 }, { "epoch": 0.06, "grad_norm": 1.529144514897054, "learning_rate": 9.981949520431968e-06, "loss": 0.5986, "step": 551 }, { "epoch": 0.06, "grad_norm": 1.644768400889591, "learning_rate": 9.981808861889644e-06, "loss": 0.6502, "step": 552 }, { "epoch": 0.06, "grad_norm": 1.6446303415730554, "learning_rate": 9.981667658429477e-06, "loss": 0.665, "step": 553 }, { "epoch": 0.06, "grad_norm": 1.5483269561758477, "learning_rate": 9.981525910066916e-06, "loss": 0.7482, "step": 554 }, { "epoch": 0.06, "grad_norm": 1.6286959973094726, "learning_rate": 9.981383616817464e-06, "loss": 0.7302, "step": 555 }, { "epoch": 0.06, "grad_norm": 1.573871289654491, "learning_rate": 9.981240778696683e-06, "loss": 0.6557, "step": 556 }, { "epoch": 0.06, "grad_norm": 1.6743148406426847, "learning_rate": 9.981097395720203e-06, "loss": 0.7423, "step": 557 }, { "epoch": 0.06, "grad_norm": 1.5252506194486974, "learning_rate": 9.980953467903702e-06, "loss": 0.6597, "step": 558 }, { "epoch": 0.06, "grad_norm": 1.6301269624290742, "learning_rate": 9.980808995262923e-06, "loss": 0.651, "step": 559 }, { "epoch": 0.06, "grad_norm": 1.3980441924733011, "learning_rate": 9.980663977813672e-06, "loss": 0.6278, "step": 560 }, { "epoch": 0.06, "grad_norm": 1.5067032425059903, "learning_rate": 9.980518415571809e-06, "loss": 0.682, "step": 561 }, { "epoch": 0.06, "grad_norm": 1.7139029500493148, "learning_rate": 9.980372308553257e-06, "loss": 0.6932, "step": 562 }, { "epoch": 0.06, "grad_norm": 1.501536412019019, "learning_rate": 9.980225656773997e-06, "loss": 0.6772, "step": 563 }, { "epoch": 0.06, "grad_norm": 1.7112562890881953, "learning_rate": 9.98007846025007e-06, "loss": 0.7907, "step": 564 }, { "epoch": 0.06, "grad_norm": 1.6286064111849599, "learning_rate": 9.979930718997574e-06, "loss": 0.7245, "step": 565 }, { "epoch": 0.06, "grad_norm": 1.6321380314573433, "learning_rate": 9.979782433032675e-06, "loss": 0.8327, "step": 566 }, { "epoch": 0.06, "grad_norm": 1.4638571432086223, "learning_rate": 9.979633602371586e-06, "loss": 0.6483, "step": 567 }, { "epoch": 0.06, "grad_norm": 1.66644146092201, "learning_rate": 9.979484227030591e-06, "loss": 0.6322, "step": 568 }, { "epoch": 0.06, "grad_norm": 1.615766769920733, "learning_rate": 9.979334307026027e-06, "loss": 0.6686, "step": 569 }, { "epoch": 0.06, "grad_norm": 1.6475558295737622, "learning_rate": 9.979183842374294e-06, "loss": 0.7293, "step": 570 }, { "epoch": 0.06, "grad_norm": 1.697185904007092, "learning_rate": 9.979032833091849e-06, "loss": 0.7073, "step": 571 }, { "epoch": 0.06, "grad_norm": 1.701861663086938, "learning_rate": 9.978881279195209e-06, "loss": 0.6515, "step": 572 }, { "epoch": 0.06, "grad_norm": 1.5223887336344482, "learning_rate": 9.978729180700953e-06, "loss": 0.6643, "step": 573 }, { "epoch": 0.06, "grad_norm": 1.4005498763357658, "learning_rate": 9.978576537625715e-06, "loss": 0.5844, "step": 574 }, { "epoch": 0.06, "grad_norm": 1.4650169538025493, "learning_rate": 9.978423349986193e-06, "loss": 0.6397, "step": 575 }, { "epoch": 0.06, "grad_norm": 1.7786164129359814, "learning_rate": 9.978269617799143e-06, "loss": 0.6921, "step": 576 }, { "epoch": 0.06, "grad_norm": 1.6181570653441621, "learning_rate": 9.97811534108138e-06, "loss": 0.6493, "step": 577 }, { "epoch": 0.06, "grad_norm": 1.553675145600646, "learning_rate": 9.97796051984978e-06, "loss": 0.8055, "step": 578 }, { "epoch": 0.06, "grad_norm": 1.5911304586160968, "learning_rate": 9.977805154121278e-06, "loss": 0.6939, "step": 579 }, { "epoch": 0.06, "grad_norm": 1.4704513122306515, "learning_rate": 9.977649243912866e-06, "loss": 0.6474, "step": 580 }, { "epoch": 0.06, "grad_norm": 1.6040925912659265, "learning_rate": 9.977492789241599e-06, "loss": 0.6275, "step": 581 }, { "epoch": 0.06, "grad_norm": 1.6786299902662194, "learning_rate": 9.977335790124589e-06, "loss": 0.593, "step": 582 }, { "epoch": 0.06, "grad_norm": 1.40682385807263, "learning_rate": 9.97717824657901e-06, "loss": 0.612, "step": 583 }, { "epoch": 0.06, "grad_norm": 1.4581226868218453, "learning_rate": 9.977020158622096e-06, "loss": 0.6443, "step": 584 }, { "epoch": 0.06, "grad_norm": 1.5066479192364155, "learning_rate": 9.976861526271136e-06, "loss": 0.7965, "step": 585 }, { "epoch": 0.06, "grad_norm": 1.4741816995078096, "learning_rate": 9.976702349543481e-06, "loss": 0.568, "step": 586 }, { "epoch": 0.06, "grad_norm": 1.6514824161086927, "learning_rate": 9.976542628456544e-06, "loss": 0.6881, "step": 587 }, { "epoch": 0.06, "grad_norm": 1.4891627820104336, "learning_rate": 9.976382363027797e-06, "loss": 0.7003, "step": 588 }, { "epoch": 0.06, "grad_norm": 1.7104583130139306, "learning_rate": 9.976221553274767e-06, "loss": 0.7689, "step": 589 }, { "epoch": 0.06, "grad_norm": 1.6468907041690632, "learning_rate": 9.976060199215046e-06, "loss": 0.7306, "step": 590 }, { "epoch": 0.06, "grad_norm": 1.5999926378188378, "learning_rate": 9.975898300866279e-06, "loss": 0.7782, "step": 591 }, { "epoch": 0.06, "grad_norm": 1.5447954643712054, "learning_rate": 9.975735858246179e-06, "loss": 0.6822, "step": 592 }, { "epoch": 0.06, "grad_norm": 1.5087164767078907, "learning_rate": 9.975572871372513e-06, "loss": 0.676, "step": 593 }, { "epoch": 0.06, "grad_norm": 1.4522416351435052, "learning_rate": 9.975409340263109e-06, "loss": 0.7069, "step": 594 }, { "epoch": 0.06, "grad_norm": 1.475019591192796, "learning_rate": 9.975245264935853e-06, "loss": 0.5998, "step": 595 }, { "epoch": 0.06, "grad_norm": 1.4360122576651775, "learning_rate": 9.975080645408693e-06, "loss": 0.7315, "step": 596 }, { "epoch": 0.06, "grad_norm": 1.3539123095318346, "learning_rate": 9.974915481699634e-06, "loss": 0.674, "step": 597 }, { "epoch": 0.06, "grad_norm": 1.7306710462296289, "learning_rate": 9.974749773826744e-06, "loss": 0.6698, "step": 598 }, { "epoch": 0.06, "grad_norm": 1.4507460069072786, "learning_rate": 9.974583521808147e-06, "loss": 0.6136, "step": 599 }, { "epoch": 0.06, "grad_norm": 1.4028071358389023, "learning_rate": 9.974416725662028e-06, "loss": 0.6842, "step": 600 }, { "epoch": 0.06, "grad_norm": 1.5213516654325054, "learning_rate": 9.974249385406631e-06, "loss": 0.6742, "step": 601 }, { "epoch": 0.06, "grad_norm": 1.6276445147315939, "learning_rate": 9.97408150106026e-06, "loss": 0.7605, "step": 602 }, { "epoch": 0.06, "grad_norm": 1.4669498841815967, "learning_rate": 9.97391307264128e-06, "loss": 0.6462, "step": 603 }, { "epoch": 0.06, "grad_norm": 1.564466266052309, "learning_rate": 9.973744100168112e-06, "loss": 0.6503, "step": 604 }, { "epoch": 0.06, "grad_norm": 1.5802174950627303, "learning_rate": 9.97357458365924e-06, "loss": 0.76, "step": 605 }, { "epoch": 0.06, "grad_norm": 1.579790444320063, "learning_rate": 9.973404523133206e-06, "loss": 0.5834, "step": 606 }, { "epoch": 0.06, "grad_norm": 1.7406822496086212, "learning_rate": 9.973233918608609e-06, "loss": 0.6632, "step": 607 }, { "epoch": 0.06, "grad_norm": 1.6100796455385775, "learning_rate": 9.973062770104114e-06, "loss": 0.6494, "step": 608 }, { "epoch": 0.06, "grad_norm": 1.830932133574916, "learning_rate": 9.972891077638438e-06, "loss": 0.7405, "step": 609 }, { "epoch": 0.06, "grad_norm": 1.468468526370486, "learning_rate": 9.972718841230363e-06, "loss": 0.6115, "step": 610 }, { "epoch": 0.06, "grad_norm": 1.7762323191939906, "learning_rate": 9.972546060898725e-06, "loss": 0.731, "step": 611 }, { "epoch": 0.06, "grad_norm": 1.7512281292041953, "learning_rate": 9.972372736662428e-06, "loss": 0.7265, "step": 612 }, { "epoch": 0.06, "grad_norm": 1.4396195534515968, "learning_rate": 9.972198868540429e-06, "loss": 0.6302, "step": 613 }, { "epoch": 0.06, "grad_norm": 1.5424278602717028, "learning_rate": 9.972024456551742e-06, "loss": 0.6823, "step": 614 }, { "epoch": 0.06, "grad_norm": 1.610790625591573, "learning_rate": 9.971849500715448e-06, "loss": 0.6997, "step": 615 }, { "epoch": 0.06, "grad_norm": 1.4055547243555115, "learning_rate": 9.971674001050687e-06, "loss": 0.6235, "step": 616 }, { "epoch": 0.06, "grad_norm": 1.4427172367842984, "learning_rate": 9.971497957576648e-06, "loss": 0.6289, "step": 617 }, { "epoch": 0.06, "grad_norm": 1.5087236104747683, "learning_rate": 9.971321370312594e-06, "loss": 0.6534, "step": 618 }, { "epoch": 0.06, "grad_norm": 1.646882428986127, "learning_rate": 9.971144239277836e-06, "loss": 0.7634, "step": 619 }, { "epoch": 0.06, "grad_norm": 1.584229639475891, "learning_rate": 9.970966564491749e-06, "loss": 0.6862, "step": 620 }, { "epoch": 0.06, "grad_norm": 1.587513261215053, "learning_rate": 9.97078834597377e-06, "loss": 0.6991, "step": 621 }, { "epoch": 0.06, "grad_norm": 1.5113056035004537, "learning_rate": 9.97060958374339e-06, "loss": 0.6086, "step": 622 }, { "epoch": 0.06, "grad_norm": 1.512768449486159, "learning_rate": 9.970430277820165e-06, "loss": 0.7285, "step": 623 }, { "epoch": 0.06, "grad_norm": 1.4622336277606856, "learning_rate": 9.970250428223705e-06, "loss": 0.6197, "step": 624 }, { "epoch": 0.06, "grad_norm": 1.4432909550145328, "learning_rate": 9.970070034973683e-06, "loss": 0.7576, "step": 625 }, { "epoch": 0.06, "grad_norm": 1.7423386291336609, "learning_rate": 9.969889098089831e-06, "loss": 0.6955, "step": 626 }, { "epoch": 0.06, "grad_norm": 1.4806930566378802, "learning_rate": 9.969707617591942e-06, "loss": 0.6763, "step": 627 }, { "epoch": 0.06, "grad_norm": 1.600798807652634, "learning_rate": 9.969525593499864e-06, "loss": 0.7703, "step": 628 }, { "epoch": 0.06, "grad_norm": 1.4584459094885056, "learning_rate": 9.969343025833506e-06, "loss": 0.5955, "step": 629 }, { "epoch": 0.06, "grad_norm": 1.398903307034492, "learning_rate": 9.969159914612843e-06, "loss": 0.6105, "step": 630 }, { "epoch": 0.06, "grad_norm": 1.4018409503905258, "learning_rate": 9.968976259857899e-06, "loss": 0.7054, "step": 631 }, { "epoch": 0.06, "grad_norm": 1.425076087728577, "learning_rate": 9.968792061588763e-06, "loss": 0.7389, "step": 632 }, { "epoch": 0.06, "grad_norm": 1.4798361969676121, "learning_rate": 9.968607319825586e-06, "loss": 0.6794, "step": 633 }, { "epoch": 0.06, "grad_norm": 1.5903827684575869, "learning_rate": 9.968422034588571e-06, "loss": 0.8359, "step": 634 }, { "epoch": 0.06, "grad_norm": 1.517976770658435, "learning_rate": 9.96823620589799e-06, "loss": 0.7325, "step": 635 }, { "epoch": 0.06, "grad_norm": 1.712594598837515, "learning_rate": 9.968049833774163e-06, "loss": 0.6705, "step": 636 }, { "epoch": 0.07, "grad_norm": 1.7513829212158254, "learning_rate": 9.96786291823748e-06, "loss": 0.6583, "step": 637 }, { "epoch": 0.07, "grad_norm": 1.604313570488668, "learning_rate": 9.967675459308384e-06, "loss": 0.7075, "step": 638 }, { "epoch": 0.07, "grad_norm": 2.1862272386386143, "learning_rate": 9.967487457007382e-06, "loss": 0.6474, "step": 639 }, { "epoch": 0.07, "grad_norm": 1.5975366694347963, "learning_rate": 9.967298911355037e-06, "loss": 0.7261, "step": 640 }, { "epoch": 0.07, "grad_norm": 1.395837171262982, "learning_rate": 9.96710982237197e-06, "loss": 0.6952, "step": 641 }, { "epoch": 0.07, "grad_norm": 1.628631721670521, "learning_rate": 9.966920190078866e-06, "loss": 0.5486, "step": 642 }, { "epoch": 0.07, "grad_norm": 1.5273908717691993, "learning_rate": 9.966730014496468e-06, "loss": 0.637, "step": 643 }, { "epoch": 0.07, "grad_norm": 1.3952910709101838, "learning_rate": 9.966539295645576e-06, "loss": 0.5717, "step": 644 }, { "epoch": 0.07, "grad_norm": 1.5423297718973281, "learning_rate": 9.966348033547053e-06, "loss": 0.6618, "step": 645 }, { "epoch": 0.07, "grad_norm": 1.5087069963500583, "learning_rate": 9.966156228221817e-06, "loss": 0.7431, "step": 646 }, { "epoch": 0.07, "grad_norm": 1.720021884444298, "learning_rate": 9.96596387969085e-06, "loss": 0.6955, "step": 647 }, { "epoch": 0.07, "grad_norm": 1.6824760535365206, "learning_rate": 9.96577098797519e-06, "loss": 0.7011, "step": 648 }, { "epoch": 0.07, "grad_norm": 1.4581762789461568, "learning_rate": 9.965577553095937e-06, "loss": 0.6429, "step": 649 }, { "epoch": 0.07, "grad_norm": 1.5039927103268536, "learning_rate": 9.965383575074247e-06, "loss": 0.6398, "step": 650 }, { "epoch": 0.07, "grad_norm": 1.7076193690331254, "learning_rate": 9.96518905393134e-06, "loss": 0.6421, "step": 651 }, { "epoch": 0.07, "grad_norm": 1.3855773192072325, "learning_rate": 9.964993989688494e-06, "loss": 0.6867, "step": 652 }, { "epoch": 0.07, "grad_norm": 1.3864229082058903, "learning_rate": 9.964798382367043e-06, "loss": 0.6765, "step": 653 }, { "epoch": 0.07, "grad_norm": 1.5711638271933577, "learning_rate": 9.964602231988384e-06, "loss": 0.7528, "step": 654 }, { "epoch": 0.07, "grad_norm": 1.4863608267714563, "learning_rate": 9.964405538573971e-06, "loss": 0.8208, "step": 655 }, { "epoch": 0.07, "grad_norm": 1.5312223686792994, "learning_rate": 9.96420830214532e-06, "loss": 0.6671, "step": 656 }, { "epoch": 0.07, "grad_norm": 1.3418770209576751, "learning_rate": 9.964010522724004e-06, "loss": 0.5739, "step": 657 }, { "epoch": 0.07, "grad_norm": 1.5227302784270833, "learning_rate": 9.963812200331656e-06, "loss": 0.5994, "step": 658 }, { "epoch": 0.07, "grad_norm": 1.396672467629274, "learning_rate": 9.96361333498997e-06, "loss": 0.6692, "step": 659 }, { "epoch": 0.07, "grad_norm": 1.3673036437410797, "learning_rate": 9.9634139267207e-06, "loss": 0.5465, "step": 660 }, { "epoch": 0.07, "grad_norm": 1.5405186711769745, "learning_rate": 9.963213975545652e-06, "loss": 0.6153, "step": 661 }, { "epoch": 0.07, "grad_norm": 1.4865433692946808, "learning_rate": 9.963013481486704e-06, "loss": 0.5728, "step": 662 }, { "epoch": 0.07, "grad_norm": 1.575429825217039, "learning_rate": 9.96281244456578e-06, "loss": 0.7027, "step": 663 }, { "epoch": 0.07, "grad_norm": 1.4908478449675833, "learning_rate": 9.962610864804873e-06, "loss": 0.6082, "step": 664 }, { "epoch": 0.07, "grad_norm": 3.8913457445933113, "learning_rate": 9.962408742226032e-06, "loss": 0.7169, "step": 665 }, { "epoch": 0.07, "grad_norm": 1.5296991525330568, "learning_rate": 9.962206076851364e-06, "loss": 0.7307, "step": 666 }, { "epoch": 0.07, "grad_norm": 1.2872614768788615, "learning_rate": 9.96200286870304e-06, "loss": 0.6434, "step": 667 }, { "epoch": 0.07, "grad_norm": 1.4136328802542142, "learning_rate": 9.961799117803283e-06, "loss": 0.6491, "step": 668 }, { "epoch": 0.07, "grad_norm": 1.8232334217285997, "learning_rate": 9.961594824174382e-06, "loss": 0.7153, "step": 669 }, { "epoch": 0.07, "grad_norm": 1.3560220531161902, "learning_rate": 9.961389987838684e-06, "loss": 0.5716, "step": 670 }, { "epoch": 0.07, "grad_norm": 1.4981524253856333, "learning_rate": 9.961184608818592e-06, "loss": 0.7717, "step": 671 }, { "epoch": 0.07, "grad_norm": 1.4719411779702094, "learning_rate": 9.96097868713657e-06, "loss": 0.6497, "step": 672 }, { "epoch": 0.07, "grad_norm": 1.3869851034801974, "learning_rate": 9.960772222815147e-06, "loss": 0.6492, "step": 673 }, { "epoch": 0.07, "grad_norm": 1.7088042433984718, "learning_rate": 9.9605652158769e-06, "loss": 0.7391, "step": 674 }, { "epoch": 0.07, "grad_norm": 1.3718183757126596, "learning_rate": 9.960357666344478e-06, "loss": 0.7376, "step": 675 }, { "epoch": 0.07, "grad_norm": 1.663449150095655, "learning_rate": 9.960149574240576e-06, "loss": 0.7329, "step": 676 }, { "epoch": 0.07, "grad_norm": 1.6734329148468892, "learning_rate": 9.959940939587962e-06, "loss": 0.8037, "step": 677 }, { "epoch": 0.07, "grad_norm": 1.6162121490445434, "learning_rate": 9.959731762409454e-06, "loss": 0.7482, "step": 678 }, { "epoch": 0.07, "grad_norm": 1.7700811316961436, "learning_rate": 9.959522042727932e-06, "loss": 0.7275, "step": 679 }, { "epoch": 0.07, "grad_norm": 1.602534700432714, "learning_rate": 9.959311780566334e-06, "loss": 0.6319, "step": 680 }, { "epoch": 0.07, "grad_norm": 1.4493316358030455, "learning_rate": 9.959100975947661e-06, "loss": 0.6428, "step": 681 }, { "epoch": 0.07, "grad_norm": 1.4382420651833725, "learning_rate": 9.95888962889497e-06, "loss": 0.6865, "step": 682 }, { "epoch": 0.07, "grad_norm": 1.6227553880435548, "learning_rate": 9.958677739431381e-06, "loss": 0.6658, "step": 683 }, { "epoch": 0.07, "grad_norm": 1.4037923469005473, "learning_rate": 9.958465307580068e-06, "loss": 0.6995, "step": 684 }, { "epoch": 0.07, "grad_norm": 1.638619758540511, "learning_rate": 9.958252333364266e-06, "loss": 0.7884, "step": 685 }, { "epoch": 0.07, "grad_norm": 1.3754025436742778, "learning_rate": 9.958038816807275e-06, "loss": 0.7095, "step": 686 }, { "epoch": 0.07, "grad_norm": 1.786461334508807, "learning_rate": 9.957824757932446e-06, "loss": 0.6831, "step": 687 }, { "epoch": 0.07, "grad_norm": 1.7462089627166315, "learning_rate": 9.957610156763196e-06, "loss": 0.7374, "step": 688 }, { "epoch": 0.07, "grad_norm": 1.4354368118066607, "learning_rate": 9.957395013322995e-06, "loss": 0.5641, "step": 689 }, { "epoch": 0.07, "grad_norm": 1.6128881754580708, "learning_rate": 9.957179327635376e-06, "loss": 0.6626, "step": 690 }, { "epoch": 0.07, "grad_norm": 1.5785915659039564, "learning_rate": 9.956963099723933e-06, "loss": 0.6741, "step": 691 }, { "epoch": 0.07, "grad_norm": 1.63297120862018, "learning_rate": 9.956746329612319e-06, "loss": 0.7355, "step": 692 }, { "epoch": 0.07, "grad_norm": 1.3111413284355167, "learning_rate": 9.95652901732424e-06, "loss": 0.7284, "step": 693 }, { "epoch": 0.07, "grad_norm": 1.5744898037204522, "learning_rate": 9.95631116288347e-06, "loss": 0.7349, "step": 694 }, { "epoch": 0.07, "grad_norm": 2.176371550404322, "learning_rate": 9.956092766313834e-06, "loss": 0.6762, "step": 695 }, { "epoch": 0.07, "grad_norm": 1.3659929738958114, "learning_rate": 9.955873827639225e-06, "loss": 0.7055, "step": 696 }, { "epoch": 0.07, "grad_norm": 1.6374243324877134, "learning_rate": 9.95565434688359e-06, "loss": 0.7035, "step": 697 }, { "epoch": 0.07, "grad_norm": 1.4248026000478744, "learning_rate": 9.955434324070932e-06, "loss": 0.672, "step": 698 }, { "epoch": 0.07, "grad_norm": 1.4912356742856683, "learning_rate": 9.955213759225322e-06, "loss": 0.6977, "step": 699 }, { "epoch": 0.07, "grad_norm": 1.4942913689941888, "learning_rate": 9.954992652370885e-06, "loss": 0.6579, "step": 700 }, { "epoch": 0.07, "grad_norm": 1.6148624531351736, "learning_rate": 9.954771003531804e-06, "loss": 0.7516, "step": 701 }, { "epoch": 0.07, "grad_norm": 1.7289459955722677, "learning_rate": 9.954548812732327e-06, "loss": 0.7111, "step": 702 }, { "epoch": 0.07, "grad_norm": 1.4838905713459316, "learning_rate": 9.954326079996753e-06, "loss": 0.6876, "step": 703 }, { "epoch": 0.07, "grad_norm": 1.5093433971746462, "learning_rate": 9.954102805349445e-06, "loss": 0.6993, "step": 704 }, { "epoch": 0.07, "grad_norm": 1.5968612887870748, "learning_rate": 9.95387898881483e-06, "loss": 0.6686, "step": 705 }, { "epoch": 0.07, "grad_norm": 1.4722252710366441, "learning_rate": 9.953654630417387e-06, "loss": 0.6994, "step": 706 }, { "epoch": 0.07, "grad_norm": 1.6175578319681294, "learning_rate": 9.953429730181653e-06, "loss": 0.7208, "step": 707 }, { "epoch": 0.07, "grad_norm": 1.5856835272852725, "learning_rate": 9.953204288132234e-06, "loss": 0.6388, "step": 708 }, { "epoch": 0.07, "grad_norm": 1.4094951806360376, "learning_rate": 9.952978304293785e-06, "loss": 0.616, "step": 709 }, { "epoch": 0.07, "grad_norm": 1.4117105068602782, "learning_rate": 9.952751778691027e-06, "loss": 0.6235, "step": 710 }, { "epoch": 0.07, "grad_norm": 1.5710694207856035, "learning_rate": 9.952524711348734e-06, "loss": 0.8144, "step": 711 }, { "epoch": 0.07, "grad_norm": 1.4792381776791648, "learning_rate": 9.952297102291747e-06, "loss": 0.6712, "step": 712 }, { "epoch": 0.07, "grad_norm": 1.43104794344488, "learning_rate": 9.95206895154496e-06, "loss": 0.5402, "step": 713 }, { "epoch": 0.07, "grad_norm": 1.4605040661701794, "learning_rate": 9.95184025913333e-06, "loss": 0.7121, "step": 714 }, { "epoch": 0.07, "grad_norm": 1.4024887097929768, "learning_rate": 9.95161102508187e-06, "loss": 0.6851, "step": 715 }, { "epoch": 0.07, "grad_norm": 1.3130962639400225, "learning_rate": 9.951381249415655e-06, "loss": 0.5611, "step": 716 }, { "epoch": 0.07, "grad_norm": 1.444725354340494, "learning_rate": 9.951150932159818e-06, "loss": 0.7384, "step": 717 }, { "epoch": 0.07, "grad_norm": 1.506099196991615, "learning_rate": 9.950920073339551e-06, "loss": 0.6858, "step": 718 }, { "epoch": 0.07, "grad_norm": 1.7407613915392024, "learning_rate": 9.950688672980106e-06, "loss": 0.672, "step": 719 }, { "epoch": 0.07, "grad_norm": 1.4355614715195884, "learning_rate": 9.950456731106795e-06, "loss": 0.6697, "step": 720 }, { "epoch": 0.07, "grad_norm": 1.4619100857908067, "learning_rate": 9.950224247744986e-06, "loss": 0.7027, "step": 721 }, { "epoch": 0.07, "grad_norm": 1.5097494136076948, "learning_rate": 9.94999122292011e-06, "loss": 0.5837, "step": 722 }, { "epoch": 0.07, "grad_norm": 1.3878649789177244, "learning_rate": 9.949757656657656e-06, "loss": 0.7354, "step": 723 }, { "epoch": 0.07, "grad_norm": 1.5762122755020032, "learning_rate": 9.949523548983171e-06, "loss": 0.7214, "step": 724 }, { "epoch": 0.07, "grad_norm": 1.620556827407485, "learning_rate": 9.94928889992226e-06, "loss": 0.6888, "step": 725 }, { "epoch": 0.07, "grad_norm": 1.428253393467334, "learning_rate": 9.949053709500595e-06, "loss": 0.6047, "step": 726 }, { "epoch": 0.07, "grad_norm": 1.4919374871077513, "learning_rate": 9.948817977743895e-06, "loss": 0.6365, "step": 727 }, { "epoch": 0.07, "grad_norm": 1.6074495223410719, "learning_rate": 9.948581704677949e-06, "loss": 0.6252, "step": 728 }, { "epoch": 0.07, "grad_norm": 1.6370620256032802, "learning_rate": 9.948344890328599e-06, "loss": 0.6657, "step": 729 }, { "epoch": 0.07, "grad_norm": 1.4109257669015676, "learning_rate": 9.948107534721747e-06, "loss": 0.7635, "step": 730 }, { "epoch": 0.07, "grad_norm": 1.666037814513328, "learning_rate": 9.947869637883359e-06, "loss": 0.7282, "step": 731 }, { "epoch": 0.07, "grad_norm": 1.391430341040286, "learning_rate": 9.947631199839454e-06, "loss": 0.6925, "step": 732 }, { "epoch": 0.07, "grad_norm": 1.5437629182195212, "learning_rate": 9.947392220616111e-06, "loss": 0.628, "step": 733 }, { "epoch": 0.07, "grad_norm": 1.604811408380839, "learning_rate": 9.947152700239475e-06, "loss": 0.7108, "step": 734 }, { "epoch": 0.08, "grad_norm": 1.3034823618945983, "learning_rate": 9.946912638735742e-06, "loss": 0.6034, "step": 735 }, { "epoch": 0.08, "grad_norm": 1.6912153782959676, "learning_rate": 9.946672036131169e-06, "loss": 0.6561, "step": 736 }, { "epoch": 0.08, "grad_norm": 1.4041562196199298, "learning_rate": 9.946430892452074e-06, "loss": 0.6841, "step": 737 }, { "epoch": 0.08, "grad_norm": 1.5805161006561947, "learning_rate": 9.946189207724837e-06, "loss": 0.6649, "step": 738 }, { "epoch": 0.08, "grad_norm": 1.4885786608175968, "learning_rate": 9.94594698197589e-06, "loss": 0.6277, "step": 739 }, { "epoch": 0.08, "grad_norm": 1.5249180866772214, "learning_rate": 9.94570421523173e-06, "loss": 0.7178, "step": 740 }, { "epoch": 0.08, "grad_norm": 1.4632464009891633, "learning_rate": 9.945460907518912e-06, "loss": 0.6345, "step": 741 }, { "epoch": 0.08, "grad_norm": 1.3908972335656686, "learning_rate": 9.945217058864046e-06, "loss": 0.6597, "step": 742 }, { "epoch": 0.08, "grad_norm": 1.7879858524801013, "learning_rate": 9.944972669293807e-06, "loss": 0.7801, "step": 743 }, { "epoch": 0.08, "grad_norm": 1.6262321518162768, "learning_rate": 9.944727738834926e-06, "loss": 0.6956, "step": 744 }, { "epoch": 0.08, "grad_norm": 1.3178614055502278, "learning_rate": 9.944482267514193e-06, "loss": 0.7071, "step": 745 }, { "epoch": 0.08, "grad_norm": 1.3759558842884225, "learning_rate": 9.944236255358462e-06, "loss": 0.6976, "step": 746 }, { "epoch": 0.08, "grad_norm": 1.5902116215291053, "learning_rate": 9.943989702394636e-06, "loss": 0.6786, "step": 747 }, { "epoch": 0.08, "grad_norm": 1.7006540807732031, "learning_rate": 9.94374260864969e-06, "loss": 0.7091, "step": 748 }, { "epoch": 0.08, "grad_norm": 1.543431831359023, "learning_rate": 9.943494974150645e-06, "loss": 0.5988, "step": 749 }, { "epoch": 0.08, "grad_norm": 1.5966830219617996, "learning_rate": 9.943246798924594e-06, "loss": 0.7294, "step": 750 }, { "epoch": 0.08, "grad_norm": 1.8202653696540907, "learning_rate": 9.942998082998676e-06, "loss": 0.6997, "step": 751 }, { "epoch": 0.08, "grad_norm": 1.776556981416034, "learning_rate": 9.942748826400102e-06, "loss": 0.6726, "step": 752 }, { "epoch": 0.08, "grad_norm": 1.5880330172843506, "learning_rate": 9.942499029156133e-06, "loss": 0.7356, "step": 753 }, { "epoch": 0.08, "grad_norm": 1.53381214094961, "learning_rate": 9.942248691294092e-06, "loss": 0.6816, "step": 754 }, { "epoch": 0.08, "grad_norm": 1.4190093658351381, "learning_rate": 9.941997812841364e-06, "loss": 0.6948, "step": 755 }, { "epoch": 0.08, "grad_norm": 1.549004164240338, "learning_rate": 9.941746393825386e-06, "loss": 0.5948, "step": 756 }, { "epoch": 0.08, "grad_norm": 1.6549832700361333, "learning_rate": 9.941494434273665e-06, "loss": 0.6855, "step": 757 }, { "epoch": 0.08, "grad_norm": 1.392873166947951, "learning_rate": 9.941241934213753e-06, "loss": 0.6606, "step": 758 }, { "epoch": 0.08, "grad_norm": 1.4967196341992595, "learning_rate": 9.940988893673274e-06, "loss": 0.6278, "step": 759 }, { "epoch": 0.08, "grad_norm": 1.4722600499808094, "learning_rate": 9.940735312679905e-06, "loss": 0.6529, "step": 760 }, { "epoch": 0.08, "grad_norm": 1.6222193346744558, "learning_rate": 9.940481191261383e-06, "loss": 0.7376, "step": 761 }, { "epoch": 0.08, "grad_norm": 1.3938566606469198, "learning_rate": 9.940226529445504e-06, "loss": 0.7741, "step": 762 }, { "epoch": 0.08, "grad_norm": 1.7843490124251227, "learning_rate": 9.939971327260123e-06, "loss": 0.72, "step": 763 }, { "epoch": 0.08, "grad_norm": 1.5919327345057552, "learning_rate": 9.939715584733154e-06, "loss": 0.6122, "step": 764 }, { "epoch": 0.08, "grad_norm": 1.4843055667503227, "learning_rate": 9.939459301892571e-06, "loss": 0.6452, "step": 765 }, { "epoch": 0.08, "grad_norm": 1.8852675705140476, "learning_rate": 9.939202478766408e-06, "loss": 0.831, "step": 766 }, { "epoch": 0.08, "grad_norm": 1.6013955107833253, "learning_rate": 9.938945115382756e-06, "loss": 0.7972, "step": 767 }, { "epoch": 0.08, "grad_norm": 1.5943648001591086, "learning_rate": 9.938687211769765e-06, "loss": 0.7482, "step": 768 }, { "epoch": 0.08, "grad_norm": 1.3728490596263394, "learning_rate": 9.938428767955643e-06, "loss": 0.6165, "step": 769 }, { "epoch": 0.08, "grad_norm": 1.8304940355100636, "learning_rate": 9.938169783968664e-06, "loss": 0.7573, "step": 770 }, { "epoch": 0.08, "grad_norm": 1.4753482061794105, "learning_rate": 9.93791025983715e-06, "loss": 0.6009, "step": 771 }, { "epoch": 0.08, "grad_norm": 1.6049286087116308, "learning_rate": 9.937650195589492e-06, "loss": 0.6613, "step": 772 }, { "epoch": 0.08, "grad_norm": 1.4535567857280312, "learning_rate": 9.937389591254138e-06, "loss": 0.6518, "step": 773 }, { "epoch": 0.08, "grad_norm": 1.5457379176075796, "learning_rate": 9.93712844685959e-06, "loss": 0.7853, "step": 774 }, { "epoch": 0.08, "grad_norm": 1.5882593112184904, "learning_rate": 9.936866762434412e-06, "loss": 0.6835, "step": 775 }, { "epoch": 0.08, "grad_norm": 1.5306270726353821, "learning_rate": 9.936604538007229e-06, "loss": 0.5581, "step": 776 }, { "epoch": 0.08, "grad_norm": 1.7078359620259624, "learning_rate": 9.936341773606723e-06, "loss": 0.7276, "step": 777 }, { "epoch": 0.08, "grad_norm": 1.4527735609677104, "learning_rate": 9.936078469261636e-06, "loss": 0.6641, "step": 778 }, { "epoch": 0.08, "grad_norm": 1.701673986840774, "learning_rate": 9.93581462500077e-06, "loss": 0.6797, "step": 779 }, { "epoch": 0.08, "grad_norm": 1.543380226190425, "learning_rate": 9.935550240852982e-06, "loss": 0.6253, "step": 780 }, { "epoch": 0.08, "grad_norm": 1.3569867006576266, "learning_rate": 9.935285316847192e-06, "loss": 0.6583, "step": 781 }, { "epoch": 0.08, "grad_norm": 1.4945030069199543, "learning_rate": 9.935019853012376e-06, "loss": 0.7429, "step": 782 }, { "epoch": 0.08, "grad_norm": 1.497448780618609, "learning_rate": 9.934753849377573e-06, "loss": 0.6483, "step": 783 }, { "epoch": 0.08, "grad_norm": 1.8048484861379455, "learning_rate": 9.934487305971881e-06, "loss": 0.8179, "step": 784 }, { "epoch": 0.08, "grad_norm": 1.5216645582901371, "learning_rate": 9.934220222824448e-06, "loss": 0.6696, "step": 785 }, { "epoch": 0.08, "grad_norm": 1.7094916372892157, "learning_rate": 9.933952599964496e-06, "loss": 0.6458, "step": 786 }, { "epoch": 0.08, "grad_norm": 2.1524866909110587, "learning_rate": 9.933684437421293e-06, "loss": 0.6694, "step": 787 }, { "epoch": 0.08, "grad_norm": 1.5411384783278956, "learning_rate": 9.933415735224171e-06, "loss": 0.6148, "step": 788 }, { "epoch": 0.08, "grad_norm": 1.5269361676939264, "learning_rate": 9.933146493402523e-06, "loss": 0.7245, "step": 789 }, { "epoch": 0.08, "grad_norm": 1.6492406072962904, "learning_rate": 9.932876711985799e-06, "loss": 0.7754, "step": 790 }, { "epoch": 0.08, "grad_norm": 1.7260519607476996, "learning_rate": 9.932606391003507e-06, "loss": 0.784, "step": 791 }, { "epoch": 0.08, "grad_norm": 1.533330964934782, "learning_rate": 9.932335530485216e-06, "loss": 0.6665, "step": 792 }, { "epoch": 0.08, "grad_norm": 1.3560907074405497, "learning_rate": 9.932064130460552e-06, "loss": 0.5334, "step": 793 }, { "epoch": 0.08, "grad_norm": 1.8386283878726157, "learning_rate": 9.931792190959202e-06, "loss": 0.6822, "step": 794 }, { "epoch": 0.08, "grad_norm": 1.672123907217798, "learning_rate": 9.931519712010912e-06, "loss": 0.7994, "step": 795 }, { "epoch": 0.08, "grad_norm": 1.281197056439436, "learning_rate": 9.931246693645484e-06, "loss": 0.6294, "step": 796 }, { "epoch": 0.08, "grad_norm": 1.7491197131208178, "learning_rate": 9.930973135892784e-06, "loss": 0.7344, "step": 797 }, { "epoch": 0.08, "grad_norm": 1.5752779686836618, "learning_rate": 9.93069903878273e-06, "loss": 0.6697, "step": 798 }, { "epoch": 0.08, "grad_norm": 1.2850989178147274, "learning_rate": 9.930424402345307e-06, "loss": 0.5788, "step": 799 }, { "epoch": 0.08, "grad_norm": 1.6696302031348687, "learning_rate": 9.930149226610555e-06, "loss": 0.5564, "step": 800 }, { "epoch": 0.08, "grad_norm": 1.6286139250020988, "learning_rate": 9.92987351160857e-06, "loss": 0.7336, "step": 801 }, { "epoch": 0.08, "grad_norm": 1.443379926147209, "learning_rate": 9.929597257369513e-06, "loss": 0.6997, "step": 802 }, { "epoch": 0.08, "grad_norm": 1.6438476340684987, "learning_rate": 9.929320463923599e-06, "loss": 0.6345, "step": 803 }, { "epoch": 0.08, "grad_norm": 1.489650737220151, "learning_rate": 9.929043131301108e-06, "loss": 0.7266, "step": 804 }, { "epoch": 0.08, "grad_norm": 1.4268271797022936, "learning_rate": 9.928765259532372e-06, "loss": 0.5731, "step": 805 }, { "epoch": 0.08, "grad_norm": 1.4596755738425629, "learning_rate": 9.928486848647783e-06, "loss": 0.6192, "step": 806 }, { "epoch": 0.08, "grad_norm": 1.455363398463971, "learning_rate": 9.928207898677798e-06, "loss": 0.6733, "step": 807 }, { "epoch": 0.08, "grad_norm": 1.6393593431700064, "learning_rate": 9.927928409652926e-06, "loss": 0.7746, "step": 808 }, { "epoch": 0.08, "grad_norm": 1.7588719106231918, "learning_rate": 9.927648381603741e-06, "loss": 0.7223, "step": 809 }, { "epoch": 0.08, "grad_norm": 1.4361729246034496, "learning_rate": 9.927367814560871e-06, "loss": 0.6604, "step": 810 }, { "epoch": 0.08, "grad_norm": 1.5593753579965104, "learning_rate": 9.927086708555005e-06, "loss": 0.7088, "step": 811 }, { "epoch": 0.08, "grad_norm": 1.354930879216606, "learning_rate": 9.92680506361689e-06, "loss": 0.6459, "step": 812 }, { "epoch": 0.08, "grad_norm": 1.5110377257232752, "learning_rate": 9.926522879777334e-06, "loss": 0.7599, "step": 813 }, { "epoch": 0.08, "grad_norm": 1.398105232119592, "learning_rate": 9.926240157067203e-06, "loss": 0.6017, "step": 814 }, { "epoch": 0.08, "grad_norm": 1.5562934393183991, "learning_rate": 9.92595689551742e-06, "loss": 0.6277, "step": 815 }, { "epoch": 0.08, "grad_norm": 1.4827524544911077, "learning_rate": 9.92567309515897e-06, "loss": 0.6562, "step": 816 }, { "epoch": 0.08, "grad_norm": 1.6320194215523285, "learning_rate": 9.925388756022896e-06, "loss": 0.6457, "step": 817 }, { "epoch": 0.08, "grad_norm": 1.4827438145644802, "learning_rate": 9.925103878140296e-06, "loss": 0.6497, "step": 818 }, { "epoch": 0.08, "grad_norm": 1.3672110068487282, "learning_rate": 9.924818461542337e-06, "loss": 0.6355, "step": 819 }, { "epoch": 0.08, "grad_norm": 1.7060408273648526, "learning_rate": 9.92453250626023e-06, "loss": 0.6727, "step": 820 }, { "epoch": 0.08, "grad_norm": 1.625087699583472, "learning_rate": 9.92424601232526e-06, "loss": 0.7468, "step": 821 }, { "epoch": 0.08, "grad_norm": 1.464915378565336, "learning_rate": 9.92395897976876e-06, "loss": 0.7679, "step": 822 }, { "epoch": 0.08, "grad_norm": 1.4575689516221026, "learning_rate": 9.923671408622128e-06, "loss": 0.6601, "step": 823 }, { "epoch": 0.08, "grad_norm": 1.5339626602095846, "learning_rate": 9.923383298916818e-06, "loss": 0.6595, "step": 824 }, { "epoch": 0.08, "grad_norm": 1.5208378652550087, "learning_rate": 9.923094650684346e-06, "loss": 0.6466, "step": 825 }, { "epoch": 0.08, "grad_norm": 1.51501072744254, "learning_rate": 9.922805463956283e-06, "loss": 0.7493, "step": 826 }, { "epoch": 0.08, "grad_norm": 1.9995022224330814, "learning_rate": 9.92251573876426e-06, "loss": 0.7247, "step": 827 }, { "epoch": 0.08, "grad_norm": 1.4026240500272156, "learning_rate": 9.922225475139969e-06, "loss": 0.6613, "step": 828 }, { "epoch": 0.08, "grad_norm": 1.621143756159044, "learning_rate": 9.921934673115159e-06, "loss": 0.6654, "step": 829 }, { "epoch": 0.08, "grad_norm": 1.5486585143379812, "learning_rate": 9.921643332721637e-06, "loss": 0.789, "step": 830 }, { "epoch": 0.08, "grad_norm": 1.7787087974226634, "learning_rate": 9.921351453991273e-06, "loss": 0.729, "step": 831 }, { "epoch": 0.08, "grad_norm": 1.5045357547007467, "learning_rate": 9.921059036955992e-06, "loss": 0.6701, "step": 832 }, { "epoch": 0.09, "grad_norm": 1.60252229906293, "learning_rate": 9.92076608164778e-06, "loss": 0.7128, "step": 833 }, { "epoch": 0.09, "grad_norm": 1.5374179629643359, "learning_rate": 9.920472588098676e-06, "loss": 0.7686, "step": 834 }, { "epoch": 0.09, "grad_norm": 1.6382088530837653, "learning_rate": 9.92017855634079e-06, "loss": 0.7425, "step": 835 }, { "epoch": 0.09, "grad_norm": 1.5961483264680727, "learning_rate": 9.919883986406277e-06, "loss": 0.719, "step": 836 }, { "epoch": 0.09, "grad_norm": 1.5137447609877472, "learning_rate": 9.919588878327362e-06, "loss": 0.651, "step": 837 }, { "epoch": 0.09, "grad_norm": 1.6457435645573324, "learning_rate": 9.919293232136324e-06, "loss": 0.6713, "step": 838 }, { "epoch": 0.09, "grad_norm": 1.5724378470925944, "learning_rate": 9.9189970478655e-06, "loss": 0.7441, "step": 839 }, { "epoch": 0.09, "grad_norm": 1.5996234872551869, "learning_rate": 9.918700325547286e-06, "loss": 0.7293, "step": 840 }, { "epoch": 0.09, "grad_norm": 1.4863847085609065, "learning_rate": 9.918403065214139e-06, "loss": 0.6575, "step": 841 }, { "epoch": 0.09, "grad_norm": 1.466119462901999, "learning_rate": 9.918105266898575e-06, "loss": 0.7314, "step": 842 }, { "epoch": 0.09, "grad_norm": 1.6740519425124583, "learning_rate": 9.917806930633166e-06, "loss": 0.7322, "step": 843 }, { "epoch": 0.09, "grad_norm": 1.5581635583304876, "learning_rate": 9.917508056450546e-06, "loss": 0.6611, "step": 844 }, { "epoch": 0.09, "grad_norm": 1.5837836077074425, "learning_rate": 9.917208644383405e-06, "loss": 0.6806, "step": 845 }, { "epoch": 0.09, "grad_norm": 1.7331642586893752, "learning_rate": 9.916908694464494e-06, "loss": 0.675, "step": 846 }, { "epoch": 0.09, "grad_norm": 1.8729056420084766, "learning_rate": 9.91660820672662e-06, "loss": 0.7448, "step": 847 }, { "epoch": 0.09, "grad_norm": 1.3225131452128933, "learning_rate": 9.916307181202654e-06, "loss": 0.6973, "step": 848 }, { "epoch": 0.09, "grad_norm": 1.5286354484742075, "learning_rate": 9.916005617925519e-06, "loss": 0.6684, "step": 849 }, { "epoch": 0.09, "grad_norm": 1.5645530910630978, "learning_rate": 9.915703516928204e-06, "loss": 0.5957, "step": 850 }, { "epoch": 0.09, "grad_norm": 1.3382795945824129, "learning_rate": 9.915400878243753e-06, "loss": 0.6445, "step": 851 }, { "epoch": 0.09, "grad_norm": 1.3658524036223645, "learning_rate": 9.915097701905266e-06, "loss": 0.6753, "step": 852 }, { "epoch": 0.09, "grad_norm": 1.9382747759385088, "learning_rate": 9.914793987945907e-06, "loss": 0.7031, "step": 853 }, { "epoch": 0.09, "grad_norm": 1.512682123698674, "learning_rate": 9.914489736398895e-06, "loss": 0.6591, "step": 854 }, { "epoch": 0.09, "grad_norm": 1.4936831238164565, "learning_rate": 9.914184947297513e-06, "loss": 0.6321, "step": 855 }, { "epoch": 0.09, "grad_norm": 1.6186344574408844, "learning_rate": 9.913879620675096e-06, "loss": 0.7913, "step": 856 }, { "epoch": 0.09, "grad_norm": 1.624512651531238, "learning_rate": 9.913573756565043e-06, "loss": 0.7508, "step": 857 }, { "epoch": 0.09, "grad_norm": 1.735297599093702, "learning_rate": 9.913267355000809e-06, "loss": 0.6769, "step": 858 }, { "epoch": 0.09, "grad_norm": 1.5075933485573085, "learning_rate": 9.91296041601591e-06, "loss": 0.7243, "step": 859 }, { "epoch": 0.09, "grad_norm": 1.4648991740953294, "learning_rate": 9.912652939643916e-06, "loss": 0.6811, "step": 860 }, { "epoch": 0.09, "grad_norm": 1.5347259585840594, "learning_rate": 9.912344925918463e-06, "loss": 0.7032, "step": 861 }, { "epoch": 0.09, "grad_norm": 1.5653757757718947, "learning_rate": 9.91203637487324e-06, "loss": 0.632, "step": 862 }, { "epoch": 0.09, "grad_norm": 1.4986807957822277, "learning_rate": 9.911727286541997e-06, "loss": 0.7478, "step": 863 }, { "epoch": 0.09, "grad_norm": 1.4244985082758035, "learning_rate": 9.911417660958543e-06, "loss": 0.6456, "step": 864 }, { "epoch": 0.09, "grad_norm": 1.561189110931194, "learning_rate": 9.911107498156744e-06, "loss": 0.7343, "step": 865 }, { "epoch": 0.09, "grad_norm": 1.4274518335582962, "learning_rate": 9.910796798170528e-06, "loss": 0.7006, "step": 866 }, { "epoch": 0.09, "grad_norm": 1.625928049491734, "learning_rate": 9.91048556103388e-06, "loss": 0.6688, "step": 867 }, { "epoch": 0.09, "grad_norm": 1.4355696669211713, "learning_rate": 9.910173786780842e-06, "loss": 0.6815, "step": 868 }, { "epoch": 0.09, "grad_norm": 1.518591479717503, "learning_rate": 9.909861475445517e-06, "loss": 0.687, "step": 869 }, { "epoch": 0.09, "grad_norm": 1.492520871047679, "learning_rate": 9.909548627062067e-06, "loss": 0.8311, "step": 870 }, { "epoch": 0.09, "grad_norm": 1.4700857344135274, "learning_rate": 9.90923524166471e-06, "loss": 0.6782, "step": 871 }, { "epoch": 0.09, "grad_norm": 1.482602812747363, "learning_rate": 9.908921319287726e-06, "loss": 0.6436, "step": 872 }, { "epoch": 0.09, "grad_norm": 1.3985651825119136, "learning_rate": 9.908606859965451e-06, "loss": 0.6889, "step": 873 }, { "epoch": 0.09, "grad_norm": 1.5576816640527404, "learning_rate": 9.908291863732281e-06, "loss": 0.8333, "step": 874 }, { "epoch": 0.09, "grad_norm": 1.4843420118042383, "learning_rate": 9.907976330622674e-06, "loss": 0.6532, "step": 875 }, { "epoch": 0.09, "grad_norm": 1.517133821215593, "learning_rate": 9.907660260671141e-06, "loss": 0.6044, "step": 876 }, { "epoch": 0.09, "grad_norm": 1.423160374280887, "learning_rate": 9.907343653912253e-06, "loss": 0.6355, "step": 877 }, { "epoch": 0.09, "grad_norm": 1.7687290446494184, "learning_rate": 9.907026510380642e-06, "loss": 0.6377, "step": 878 }, { "epoch": 0.09, "grad_norm": 1.5117307854019042, "learning_rate": 9.906708830110997e-06, "loss": 0.6651, "step": 879 }, { "epoch": 0.09, "grad_norm": 1.452224184993254, "learning_rate": 9.90639061313807e-06, "loss": 0.7329, "step": 880 }, { "epoch": 0.09, "grad_norm": 1.546259136970326, "learning_rate": 9.906071859496665e-06, "loss": 0.6669, "step": 881 }, { "epoch": 0.09, "grad_norm": 1.611055008093151, "learning_rate": 9.905752569221648e-06, "loss": 0.6837, "step": 882 }, { "epoch": 0.09, "grad_norm": 1.398696737401967, "learning_rate": 9.905432742347943e-06, "loss": 0.6456, "step": 883 }, { "epoch": 0.09, "grad_norm": 1.6638226962768066, "learning_rate": 9.905112378910532e-06, "loss": 0.572, "step": 884 }, { "epoch": 0.09, "grad_norm": 1.4934790164374492, "learning_rate": 9.90479147894446e-06, "loss": 0.5419, "step": 885 }, { "epoch": 0.09, "grad_norm": 1.6123212171283796, "learning_rate": 9.904470042484826e-06, "loss": 0.7401, "step": 886 }, { "epoch": 0.09, "grad_norm": 1.5101749766781867, "learning_rate": 9.904148069566789e-06, "loss": 0.7041, "step": 887 }, { "epoch": 0.09, "grad_norm": 1.4448286370464396, "learning_rate": 9.903825560225567e-06, "loss": 0.6393, "step": 888 }, { "epoch": 0.09, "grad_norm": 1.78021996956657, "learning_rate": 9.903502514496436e-06, "loss": 0.674, "step": 889 }, { "epoch": 0.09, "grad_norm": 1.4071637831526351, "learning_rate": 9.903178932414734e-06, "loss": 0.6433, "step": 890 }, { "epoch": 0.09, "grad_norm": 1.3675352382899275, "learning_rate": 9.902854814015853e-06, "loss": 0.5734, "step": 891 }, { "epoch": 0.09, "grad_norm": 1.3946952235328773, "learning_rate": 9.902530159335245e-06, "loss": 0.5952, "step": 892 }, { "epoch": 0.09, "grad_norm": 1.5813643275637552, "learning_rate": 9.902204968408419e-06, "loss": 0.6459, "step": 893 }, { "epoch": 0.09, "grad_norm": 1.587167104881681, "learning_rate": 9.90187924127095e-06, "loss": 0.7791, "step": 894 }, { "epoch": 0.09, "grad_norm": 1.4947398646588272, "learning_rate": 9.901552977958463e-06, "loss": 0.618, "step": 895 }, { "epoch": 0.09, "grad_norm": 1.5411438817974468, "learning_rate": 9.901226178506646e-06, "loss": 0.6545, "step": 896 }, { "epoch": 0.09, "grad_norm": 1.709850160572515, "learning_rate": 9.900898842951246e-06, "loss": 0.6373, "step": 897 }, { "epoch": 0.09, "grad_norm": 1.6911390866616147, "learning_rate": 9.900570971328066e-06, "loss": 0.717, "step": 898 }, { "epoch": 0.09, "grad_norm": 1.6463962347048415, "learning_rate": 9.90024256367297e-06, "loss": 0.6202, "step": 899 }, { "epoch": 0.09, "grad_norm": 1.4379849668646079, "learning_rate": 9.899913620021879e-06, "loss": 0.7255, "step": 900 }, { "epoch": 0.09, "grad_norm": 1.5882693349541972, "learning_rate": 9.899584140410773e-06, "loss": 0.7408, "step": 901 }, { "epoch": 0.09, "grad_norm": 1.5653857508741345, "learning_rate": 9.899254124875692e-06, "loss": 0.8156, "step": 902 }, { "epoch": 0.09, "grad_norm": 1.4276232863898428, "learning_rate": 9.898923573452734e-06, "loss": 0.6194, "step": 903 }, { "epoch": 0.09, "grad_norm": 1.4764759352742691, "learning_rate": 9.898592486178054e-06, "loss": 0.6967, "step": 904 }, { "epoch": 0.09, "grad_norm": 1.5658604257288686, "learning_rate": 9.898260863087868e-06, "loss": 0.7282, "step": 905 }, { "epoch": 0.09, "grad_norm": 1.6822439427372098, "learning_rate": 9.897928704218446e-06, "loss": 0.6398, "step": 906 }, { "epoch": 0.09, "grad_norm": 1.4573792256489224, "learning_rate": 9.897596009606127e-06, "loss": 0.6781, "step": 907 }, { "epoch": 0.09, "grad_norm": 1.6581097238766536, "learning_rate": 9.897262779287295e-06, "loss": 0.6803, "step": 908 }, { "epoch": 0.09, "grad_norm": 1.567222908110655, "learning_rate": 9.896929013298401e-06, "loss": 0.6717, "step": 909 }, { "epoch": 0.09, "grad_norm": 1.5688753459279126, "learning_rate": 9.896594711675955e-06, "loss": 0.7365, "step": 910 }, { "epoch": 0.09, "grad_norm": 1.7143258595879358, "learning_rate": 9.896259874456521e-06, "loss": 0.7181, "step": 911 }, { "epoch": 0.09, "grad_norm": 1.4310518560280532, "learning_rate": 9.895924501676725e-06, "loss": 0.5851, "step": 912 }, { "epoch": 0.09, "grad_norm": 4.398777125863036, "learning_rate": 9.895588593373252e-06, "loss": 0.7084, "step": 913 }, { "epoch": 0.09, "grad_norm": 1.6568688044066806, "learning_rate": 9.89525214958284e-06, "loss": 0.6979, "step": 914 }, { "epoch": 0.09, "grad_norm": 1.4773040201950822, "learning_rate": 9.894915170342297e-06, "loss": 0.6557, "step": 915 }, { "epoch": 0.09, "grad_norm": 1.5839835124016126, "learning_rate": 9.894577655688473e-06, "loss": 0.711, "step": 916 }, { "epoch": 0.09, "grad_norm": 1.3516522324037805, "learning_rate": 9.894239605658292e-06, "loss": 0.6639, "step": 917 }, { "epoch": 0.09, "grad_norm": 1.3394470385840083, "learning_rate": 9.893901020288729e-06, "loss": 0.6406, "step": 918 }, { "epoch": 0.09, "grad_norm": 1.4044417839904844, "learning_rate": 9.89356189961682e-06, "loss": 0.7465, "step": 919 }, { "epoch": 0.09, "grad_norm": 1.3531700411276173, "learning_rate": 9.893222243679657e-06, "loss": 0.6509, "step": 920 }, { "epoch": 0.09, "grad_norm": 1.376262725417472, "learning_rate": 9.892882052514392e-06, "loss": 0.7002, "step": 921 }, { "epoch": 0.09, "grad_norm": 1.331561380725538, "learning_rate": 9.892541326158237e-06, "loss": 0.6329, "step": 922 }, { "epoch": 0.09, "grad_norm": 1.4582206636738693, "learning_rate": 9.89220006464846e-06, "loss": 0.643, "step": 923 }, { "epoch": 0.09, "grad_norm": 1.5217686897917908, "learning_rate": 9.89185826802239e-06, "loss": 0.6634, "step": 924 }, { "epoch": 0.09, "grad_norm": 1.5044487458479345, "learning_rate": 9.89151593631741e-06, "loss": 0.7381, "step": 925 }, { "epoch": 0.09, "grad_norm": 1.4571332502691172, "learning_rate": 9.89117306957097e-06, "loss": 0.7187, "step": 926 }, { "epoch": 0.09, "grad_norm": 1.4273868900314082, "learning_rate": 9.890829667820569e-06, "loss": 0.6202, "step": 927 }, { "epoch": 0.09, "grad_norm": 1.457451099685629, "learning_rate": 9.89048573110377e-06, "loss": 0.5502, "step": 928 }, { "epoch": 0.09, "grad_norm": 1.4115756469582452, "learning_rate": 9.890141259458193e-06, "loss": 0.7446, "step": 929 }, { "epoch": 0.09, "grad_norm": 1.6668380534436822, "learning_rate": 9.889796252921518e-06, "loss": 0.7098, "step": 930 }, { "epoch": 0.1, "grad_norm": 1.540477417783889, "learning_rate": 9.889450711531483e-06, "loss": 0.7638, "step": 931 }, { "epoch": 0.1, "grad_norm": 1.4841172943049548, "learning_rate": 9.88910463532588e-06, "loss": 0.7024, "step": 932 }, { "epoch": 0.1, "grad_norm": 1.3449992572961462, "learning_rate": 9.88875802434257e-06, "loss": 0.6667, "step": 933 }, { "epoch": 0.1, "grad_norm": 1.5039862064607739, "learning_rate": 9.888410878619457e-06, "loss": 0.6261, "step": 934 }, { "epoch": 0.1, "grad_norm": 1.4421541902357462, "learning_rate": 9.88806319819452e-06, "loss": 0.6105, "step": 935 }, { "epoch": 0.1, "grad_norm": 1.6137257388894712, "learning_rate": 9.887714983105783e-06, "loss": 0.6863, "step": 936 }, { "epoch": 0.1, "grad_norm": 1.5090684067527236, "learning_rate": 9.88736623339134e-06, "loss": 0.6356, "step": 937 }, { "epoch": 0.1, "grad_norm": 2.0544129399722415, "learning_rate": 9.887016949089334e-06, "loss": 0.7211, "step": 938 }, { "epoch": 0.1, "grad_norm": 1.418591167565375, "learning_rate": 9.886667130237971e-06, "loss": 0.8457, "step": 939 }, { "epoch": 0.1, "grad_norm": 1.4952473100462902, "learning_rate": 9.886316776875515e-06, "loss": 0.7032, "step": 940 }, { "epoch": 0.1, "grad_norm": 1.5353829399792756, "learning_rate": 9.885965889040288e-06, "loss": 0.6918, "step": 941 }, { "epoch": 0.1, "grad_norm": 1.7614343814058409, "learning_rate": 9.885614466770673e-06, "loss": 0.7855, "step": 942 }, { "epoch": 0.1, "grad_norm": 1.791735321996246, "learning_rate": 9.885262510105102e-06, "loss": 0.7849, "step": 943 }, { "epoch": 0.1, "grad_norm": 1.5190264958895998, "learning_rate": 9.884910019082082e-06, "loss": 0.6109, "step": 944 }, { "epoch": 0.1, "grad_norm": 1.6638336702370642, "learning_rate": 9.884556993740161e-06, "loss": 0.6647, "step": 945 }, { "epoch": 0.1, "grad_norm": 1.375679432322263, "learning_rate": 9.88420343411796e-06, "loss": 0.549, "step": 946 }, { "epoch": 0.1, "grad_norm": 1.5578158032311549, "learning_rate": 9.883849340254145e-06, "loss": 0.7417, "step": 947 }, { "epoch": 0.1, "grad_norm": 1.5011849617945963, "learning_rate": 9.883494712187455e-06, "loss": 0.6781, "step": 948 }, { "epoch": 0.1, "grad_norm": 1.3594971927239257, "learning_rate": 9.883139549956673e-06, "loss": 0.7143, "step": 949 }, { "epoch": 0.1, "grad_norm": 1.4568866246167413, "learning_rate": 9.88278385360065e-06, "loss": 0.6765, "step": 950 }, { "epoch": 0.1, "grad_norm": 1.572006152153727, "learning_rate": 9.882427623158295e-06, "loss": 0.7421, "step": 951 }, { "epoch": 0.1, "grad_norm": 1.717573041989544, "learning_rate": 9.882070858668569e-06, "loss": 0.7717, "step": 952 }, { "epoch": 0.1, "grad_norm": 1.5196079918248417, "learning_rate": 9.881713560170499e-06, "loss": 0.713, "step": 953 }, { "epoch": 0.1, "grad_norm": 1.3856638748760455, "learning_rate": 9.881355727703161e-06, "loss": 0.5777, "step": 954 }, { "epoch": 0.1, "grad_norm": 1.7792144619406554, "learning_rate": 9.880997361305703e-06, "loss": 0.7825, "step": 955 }, { "epoch": 0.1, "grad_norm": 1.5433405491798908, "learning_rate": 9.880638461017318e-06, "loss": 0.6589, "step": 956 }, { "epoch": 0.1, "grad_norm": 1.4493584914344984, "learning_rate": 9.880279026877265e-06, "loss": 0.6992, "step": 957 }, { "epoch": 0.1, "grad_norm": 1.639607865745341, "learning_rate": 9.87991905892486e-06, "loss": 0.6986, "step": 958 }, { "epoch": 0.1, "grad_norm": 1.6215404661904023, "learning_rate": 9.879558557199475e-06, "loss": 0.6781, "step": 959 }, { "epoch": 0.1, "grad_norm": 1.4110074666286492, "learning_rate": 9.879197521740544e-06, "loss": 0.6233, "step": 960 }, { "epoch": 0.1, "grad_norm": 1.4340248683612347, "learning_rate": 9.878835952587559e-06, "loss": 0.6676, "step": 961 }, { "epoch": 0.1, "grad_norm": 1.7640118756206506, "learning_rate": 9.878473849780066e-06, "loss": 0.6809, "step": 962 }, { "epoch": 0.1, "grad_norm": 1.5928981514697396, "learning_rate": 9.878111213357672e-06, "loss": 0.8281, "step": 963 }, { "epoch": 0.1, "grad_norm": 1.2971512482998673, "learning_rate": 9.877748043360045e-06, "loss": 0.629, "step": 964 }, { "epoch": 0.1, "grad_norm": 1.4650100812322426, "learning_rate": 9.877384339826907e-06, "loss": 0.7001, "step": 965 }, { "epoch": 0.1, "grad_norm": 1.5548827747082223, "learning_rate": 9.877020102798045e-06, "loss": 0.7539, "step": 966 }, { "epoch": 0.1, "grad_norm": 1.7333262767405548, "learning_rate": 9.876655332313294e-06, "loss": 0.7799, "step": 967 }, { "epoch": 0.1, "grad_norm": 1.4332402142236065, "learning_rate": 9.876290028412555e-06, "loss": 0.7184, "step": 968 }, { "epoch": 0.1, "grad_norm": 1.6719373571682425, "learning_rate": 9.875924191135787e-06, "loss": 0.6814, "step": 969 }, { "epoch": 0.1, "grad_norm": 1.5579186885234866, "learning_rate": 9.875557820523005e-06, "loss": 0.7784, "step": 970 }, { "epoch": 0.1, "grad_norm": 1.5474014431401764, "learning_rate": 9.875190916614284e-06, "loss": 0.7514, "step": 971 }, { "epoch": 0.1, "grad_norm": 1.5142269025385882, "learning_rate": 9.874823479449753e-06, "loss": 0.704, "step": 972 }, { "epoch": 0.1, "grad_norm": 1.2676180347075894, "learning_rate": 9.874455509069609e-06, "loss": 0.5902, "step": 973 }, { "epoch": 0.1, "grad_norm": 1.397315935977229, "learning_rate": 9.874087005514095e-06, "loss": 0.6764, "step": 974 }, { "epoch": 0.1, "grad_norm": 1.3224542027349337, "learning_rate": 9.873717968823522e-06, "loss": 0.6475, "step": 975 }, { "epoch": 0.1, "grad_norm": 1.3877687132299679, "learning_rate": 9.873348399038254e-06, "loss": 0.7294, "step": 976 }, { "epoch": 0.1, "grad_norm": 1.574578122785035, "learning_rate": 9.872978296198718e-06, "loss": 0.6839, "step": 977 }, { "epoch": 0.1, "grad_norm": 1.5671143494667914, "learning_rate": 9.872607660345392e-06, "loss": 0.6846, "step": 978 }, { "epoch": 0.1, "grad_norm": 1.7303431725228133, "learning_rate": 9.87223649151882e-06, "loss": 0.7054, "step": 979 }, { "epoch": 0.1, "grad_norm": 1.425862545446861, "learning_rate": 9.871864789759603e-06, "loss": 0.7427, "step": 980 }, { "epoch": 0.1, "grad_norm": 1.5076832367047925, "learning_rate": 9.87149255510839e-06, "loss": 0.6606, "step": 981 }, { "epoch": 0.1, "grad_norm": 1.465950652857815, "learning_rate": 9.871119787605907e-06, "loss": 0.6049, "step": 982 }, { "epoch": 0.1, "grad_norm": 1.4561601772884898, "learning_rate": 9.870746487292921e-06, "loss": 0.673, "step": 983 }, { "epoch": 0.1, "grad_norm": 1.5210630892611634, "learning_rate": 9.870372654210265e-06, "loss": 0.5585, "step": 984 }, { "epoch": 0.1, "grad_norm": 1.3687159006561243, "learning_rate": 9.869998288398831e-06, "loss": 0.5785, "step": 985 }, { "epoch": 0.1, "grad_norm": 1.472890593065836, "learning_rate": 9.869623389899568e-06, "loss": 0.7475, "step": 986 }, { "epoch": 0.1, "grad_norm": 1.4505511214774987, "learning_rate": 9.869247958753483e-06, "loss": 0.7251, "step": 987 }, { "epoch": 0.1, "grad_norm": 1.518652509405699, "learning_rate": 9.868871995001638e-06, "loss": 0.6164, "step": 988 }, { "epoch": 0.1, "grad_norm": 1.380402864892874, "learning_rate": 9.86849549868516e-06, "loss": 0.6342, "step": 989 }, { "epoch": 0.1, "grad_norm": 1.5980784945418163, "learning_rate": 9.868118469845231e-06, "loss": 0.615, "step": 990 }, { "epoch": 0.1, "grad_norm": 1.4721532419180492, "learning_rate": 9.867740908523089e-06, "loss": 0.6078, "step": 991 }, { "epoch": 0.1, "grad_norm": 1.4846386357792967, "learning_rate": 9.867362814760032e-06, "loss": 0.7707, "step": 992 }, { "epoch": 0.1, "grad_norm": 1.2911474961071974, "learning_rate": 9.866984188597417e-06, "loss": 0.6211, "step": 993 }, { "epoch": 0.1, "grad_norm": 1.420790103038177, "learning_rate": 9.866605030076661e-06, "loss": 0.5656, "step": 994 }, { "epoch": 0.1, "grad_norm": 1.6318342835087492, "learning_rate": 9.866225339239232e-06, "loss": 0.692, "step": 995 }, { "epoch": 0.1, "grad_norm": 1.6005779715692519, "learning_rate": 9.865845116126666e-06, "loss": 0.6222, "step": 996 }, { "epoch": 0.1, "grad_norm": 1.8153148550915525, "learning_rate": 9.86546436078055e-06, "loss": 0.6748, "step": 997 }, { "epoch": 0.1, "grad_norm": 1.509768594053771, "learning_rate": 9.865083073242534e-06, "loss": 0.6665, "step": 998 }, { "epoch": 0.1, "grad_norm": 1.5562832649410305, "learning_rate": 9.86470125355432e-06, "loss": 0.6809, "step": 999 }, { "epoch": 0.1, "grad_norm": 1.589308110499111, "learning_rate": 9.864318901757675e-06, "loss": 0.7219, "step": 1000 }, { "epoch": 0.1, "grad_norm": 1.3855743024133322, "learning_rate": 9.863936017894419e-06, "loss": 0.6117, "step": 1001 }, { "epoch": 0.1, "grad_norm": 1.6803182930564677, "learning_rate": 9.863552602006435e-06, "loss": 0.7156, "step": 1002 }, { "epoch": 0.1, "grad_norm": 1.5602019120558035, "learning_rate": 9.863168654135657e-06, "loss": 0.6365, "step": 1003 }, { "epoch": 0.1, "grad_norm": 1.4489175373206655, "learning_rate": 9.862784174324088e-06, "loss": 0.7979, "step": 1004 }, { "epoch": 0.1, "grad_norm": 1.606305008140649, "learning_rate": 9.86239916261378e-06, "loss": 0.6472, "step": 1005 }, { "epoch": 0.1, "grad_norm": 1.3574120380462602, "learning_rate": 9.862013619046845e-06, "loss": 0.6758, "step": 1006 }, { "epoch": 0.1, "grad_norm": 1.4069313926147322, "learning_rate": 9.861627543665456e-06, "loss": 0.6676, "step": 1007 }, { "epoch": 0.1, "grad_norm": 1.7329859070040396, "learning_rate": 9.861240936511842e-06, "loss": 0.7111, "step": 1008 }, { "epoch": 0.1, "grad_norm": 1.5160346791283101, "learning_rate": 9.86085379762829e-06, "loss": 0.7007, "step": 1009 }, { "epoch": 0.1, "grad_norm": 1.4181907536563352, "learning_rate": 9.860466127057148e-06, "loss": 0.6102, "step": 1010 }, { "epoch": 0.1, "grad_norm": 1.4322295311153541, "learning_rate": 9.860077924840819e-06, "loss": 0.6539, "step": 1011 }, { "epoch": 0.1, "grad_norm": 1.6396652539507601, "learning_rate": 9.859689191021762e-06, "loss": 0.6321, "step": 1012 }, { "epoch": 0.1, "grad_norm": 1.4388681784844277, "learning_rate": 9.859299925642502e-06, "loss": 0.6432, "step": 1013 }, { "epoch": 0.1, "grad_norm": 1.4035868216114997, "learning_rate": 9.858910128745615e-06, "loss": 0.5903, "step": 1014 }, { "epoch": 0.1, "grad_norm": 1.497432366832456, "learning_rate": 9.858519800373738e-06, "loss": 0.6912, "step": 1015 }, { "epoch": 0.1, "grad_norm": 1.4259238381462906, "learning_rate": 9.858128940569565e-06, "loss": 0.6078, "step": 1016 }, { "epoch": 0.1, "grad_norm": 1.5251266796338787, "learning_rate": 9.857737549375849e-06, "loss": 0.5806, "step": 1017 }, { "epoch": 0.1, "grad_norm": 1.5668347198946653, "learning_rate": 9.857345626835404e-06, "loss": 0.7327, "step": 1018 }, { "epoch": 0.1, "grad_norm": 1.5734196210241849, "learning_rate": 9.856953172991094e-06, "loss": 0.7334, "step": 1019 }, { "epoch": 0.1, "grad_norm": 1.4092287489450896, "learning_rate": 9.85656018788585e-06, "loss": 0.6942, "step": 1020 }, { "epoch": 0.1, "grad_norm": 1.688637686064683, "learning_rate": 9.856166671562657e-06, "loss": 0.7873, "step": 1021 }, { "epoch": 0.1, "grad_norm": 1.5412351253177268, "learning_rate": 9.855772624064556e-06, "loss": 0.6293, "step": 1022 }, { "epoch": 0.1, "grad_norm": 1.353120540659839, "learning_rate": 9.855378045434651e-06, "loss": 0.7127, "step": 1023 }, { "epoch": 0.1, "grad_norm": 1.4308303574020225, "learning_rate": 9.854982935716101e-06, "loss": 0.7083, "step": 1024 }, { "epoch": 0.1, "grad_norm": 1.6952361564896856, "learning_rate": 9.854587294952122e-06, "loss": 0.6793, "step": 1025 }, { "epoch": 0.1, "grad_norm": 1.3395744498353297, "learning_rate": 9.854191123185992e-06, "loss": 0.6681, "step": 1026 }, { "epoch": 0.1, "grad_norm": 1.5314578061443727, "learning_rate": 9.853794420461045e-06, "loss": 0.706, "step": 1027 }, { "epoch": 0.1, "grad_norm": 1.6262355333548812, "learning_rate": 9.85339718682067e-06, "loss": 0.7798, "step": 1028 }, { "epoch": 0.11, "grad_norm": 1.3439855190676477, "learning_rate": 9.852999422308321e-06, "loss": 0.6083, "step": 1029 }, { "epoch": 0.11, "grad_norm": 1.5025267589363323, "learning_rate": 9.852601126967502e-06, "loss": 0.5989, "step": 1030 }, { "epoch": 0.11, "grad_norm": 1.609311019821413, "learning_rate": 9.852202300841782e-06, "loss": 0.7463, "step": 1031 }, { "epoch": 0.11, "grad_norm": 1.3587881947872167, "learning_rate": 9.851802943974784e-06, "loss": 0.6929, "step": 1032 }, { "epoch": 0.11, "grad_norm": 1.6289112754437205, "learning_rate": 9.851403056410192e-06, "loss": 0.6468, "step": 1033 }, { "epoch": 0.11, "grad_norm": 1.473178118046101, "learning_rate": 9.851002638191744e-06, "loss": 0.7868, "step": 1034 }, { "epoch": 0.11, "grad_norm": 1.549211356822489, "learning_rate": 9.850601689363239e-06, "loss": 0.643, "step": 1035 }, { "epoch": 0.11, "grad_norm": 1.4829849890558537, "learning_rate": 9.850200209968535e-06, "loss": 0.7434, "step": 1036 }, { "epoch": 0.11, "grad_norm": 1.5560008642671106, "learning_rate": 9.849798200051547e-06, "loss": 0.8216, "step": 1037 }, { "epoch": 0.11, "grad_norm": 1.5037433770345283, "learning_rate": 9.849395659656241e-06, "loss": 0.733, "step": 1038 }, { "epoch": 0.11, "grad_norm": 1.6248262395833244, "learning_rate": 9.848992588826654e-06, "loss": 0.6367, "step": 1039 }, { "epoch": 0.11, "grad_norm": 1.4666635758957327, "learning_rate": 9.848588987606874e-06, "loss": 0.662, "step": 1040 }, { "epoch": 0.11, "grad_norm": 1.421089524807471, "learning_rate": 9.848184856041046e-06, "loss": 0.6294, "step": 1041 }, { "epoch": 0.11, "grad_norm": 1.7603899956890166, "learning_rate": 9.847780194173373e-06, "loss": 0.8273, "step": 1042 }, { "epoch": 0.11, "grad_norm": 1.6259125231705311, "learning_rate": 9.847375002048119e-06, "loss": 0.6655, "step": 1043 }, { "epoch": 0.11, "grad_norm": 1.5273532741872742, "learning_rate": 9.846969279709606e-06, "loss": 0.7651, "step": 1044 }, { "epoch": 0.11, "grad_norm": 1.7285561725926528, "learning_rate": 9.846563027202211e-06, "loss": 0.6317, "step": 1045 }, { "epoch": 0.11, "grad_norm": 1.483272027914591, "learning_rate": 9.84615624457037e-06, "loss": 0.5938, "step": 1046 }, { "epoch": 0.11, "grad_norm": 1.365345662600114, "learning_rate": 9.845748931858582e-06, "loss": 0.6369, "step": 1047 }, { "epoch": 0.11, "grad_norm": 1.4915272735315455, "learning_rate": 9.84534108911139e-06, "loss": 0.6173, "step": 1048 }, { "epoch": 0.11, "grad_norm": 1.594621397577371, "learning_rate": 9.844932716373415e-06, "loss": 0.6759, "step": 1049 }, { "epoch": 0.11, "grad_norm": 1.3947861966149853, "learning_rate": 9.844523813689317e-06, "loss": 0.6202, "step": 1050 }, { "epoch": 0.11, "grad_norm": 1.4604520796779932, "learning_rate": 9.84411438110383e-06, "loss": 0.7299, "step": 1051 }, { "epoch": 0.11, "grad_norm": 1.66344845346613, "learning_rate": 9.843704418661731e-06, "loss": 0.7477, "step": 1052 }, { "epoch": 0.11, "grad_norm": 1.669146448160865, "learning_rate": 9.843293926407866e-06, "loss": 0.6798, "step": 1053 }, { "epoch": 0.11, "grad_norm": 1.4868867037448876, "learning_rate": 9.842882904387136e-06, "loss": 0.6288, "step": 1054 }, { "epoch": 0.11, "grad_norm": 1.4392425304483771, "learning_rate": 9.842471352644499e-06, "loss": 0.5954, "step": 1055 }, { "epoch": 0.11, "grad_norm": 1.5113899911666882, "learning_rate": 9.842059271224969e-06, "loss": 0.669, "step": 1056 }, { "epoch": 0.11, "grad_norm": 1.5750906789995522, "learning_rate": 9.84164666017362e-06, "loss": 0.7793, "step": 1057 }, { "epoch": 0.11, "grad_norm": 1.5820363835869191, "learning_rate": 9.841233519535588e-06, "loss": 0.7009, "step": 1058 }, { "epoch": 0.11, "grad_norm": 1.5152307593796257, "learning_rate": 9.84081984935606e-06, "loss": 0.6185, "step": 1059 }, { "epoch": 0.11, "grad_norm": 1.5266895989974079, "learning_rate": 9.840405649680285e-06, "loss": 0.6731, "step": 1060 }, { "epoch": 0.11, "grad_norm": 1.34109963975567, "learning_rate": 9.839990920553566e-06, "loss": 0.5872, "step": 1061 }, { "epoch": 0.11, "grad_norm": 1.6565581788115784, "learning_rate": 9.83957566202127e-06, "loss": 0.7935, "step": 1062 }, { "epoch": 0.11, "grad_norm": 2.5387414165191178, "learning_rate": 9.839159874128818e-06, "loss": 0.732, "step": 1063 }, { "epoch": 0.11, "grad_norm": 1.5144855092820184, "learning_rate": 9.838743556921688e-06, "loss": 0.7058, "step": 1064 }, { "epoch": 0.11, "grad_norm": 1.4643829099886851, "learning_rate": 9.838326710445418e-06, "loss": 0.696, "step": 1065 }, { "epoch": 0.11, "grad_norm": 1.7491385738310683, "learning_rate": 9.837909334745605e-06, "loss": 0.7318, "step": 1066 }, { "epoch": 0.11, "grad_norm": 1.6594326253192155, "learning_rate": 9.8374914298679e-06, "loss": 0.7083, "step": 1067 }, { "epoch": 0.11, "grad_norm": 1.6067730397757278, "learning_rate": 9.837072995858016e-06, "loss": 0.6795, "step": 1068 }, { "epoch": 0.11, "grad_norm": 1.7233577032523608, "learning_rate": 9.83665403276172e-06, "loss": 0.7303, "step": 1069 }, { "epoch": 0.11, "grad_norm": 1.4063859814799726, "learning_rate": 9.836234540624839e-06, "loss": 0.7249, "step": 1070 }, { "epoch": 0.11, "grad_norm": 1.7092526426643515, "learning_rate": 9.835814519493258e-06, "loss": 0.7485, "step": 1071 }, { "epoch": 0.11, "grad_norm": 1.4129557254959766, "learning_rate": 9.835393969412921e-06, "loss": 0.7029, "step": 1072 }, { "epoch": 0.11, "grad_norm": 1.6443822242598385, "learning_rate": 9.834972890429828e-06, "loss": 0.6553, "step": 1073 }, { "epoch": 0.11, "grad_norm": 1.6987605130657415, "learning_rate": 9.834551282590037e-06, "loss": 0.6619, "step": 1074 }, { "epoch": 0.11, "grad_norm": 1.4705654810453288, "learning_rate": 9.83412914593966e-06, "loss": 0.6468, "step": 1075 }, { "epoch": 0.11, "grad_norm": 1.3505643832473813, "learning_rate": 9.833706480524878e-06, "loss": 0.6024, "step": 1076 }, { "epoch": 0.11, "grad_norm": 1.3226147089799016, "learning_rate": 9.833283286391919e-06, "loss": 0.6248, "step": 1077 }, { "epoch": 0.11, "grad_norm": 1.4793102595386012, "learning_rate": 9.832859563587075e-06, "loss": 0.6182, "step": 1078 }, { "epoch": 0.11, "grad_norm": 1.354197200018565, "learning_rate": 9.832435312156688e-06, "loss": 0.6433, "step": 1079 }, { "epoch": 0.11, "grad_norm": 1.8173669352388444, "learning_rate": 9.832010532147167e-06, "loss": 0.6665, "step": 1080 }, { "epoch": 0.11, "grad_norm": 1.5920830028413762, "learning_rate": 9.831585223604975e-06, "loss": 0.8138, "step": 1081 }, { "epoch": 0.11, "grad_norm": 1.509716818274583, "learning_rate": 9.831159386576634e-06, "loss": 0.7704, "step": 1082 }, { "epoch": 0.11, "grad_norm": 1.5093714460493042, "learning_rate": 9.830733021108721e-06, "loss": 0.7531, "step": 1083 }, { "epoch": 0.11, "grad_norm": 1.6524246810310157, "learning_rate": 9.830306127247874e-06, "loss": 0.7169, "step": 1084 }, { "epoch": 0.11, "grad_norm": 1.5227279903696405, "learning_rate": 9.829878705040784e-06, "loss": 0.6847, "step": 1085 }, { "epoch": 0.11, "grad_norm": 1.3912766072455534, "learning_rate": 9.829450754534206e-06, "loss": 0.6452, "step": 1086 }, { "epoch": 0.11, "grad_norm": 1.371374460359586, "learning_rate": 9.829022275774951e-06, "loss": 0.6369, "step": 1087 }, { "epoch": 0.11, "grad_norm": 1.6605556147085654, "learning_rate": 9.828593268809882e-06, "loss": 0.751, "step": 1088 }, { "epoch": 0.11, "grad_norm": 1.333240260569073, "learning_rate": 9.82816373368593e-06, "loss": 0.6788, "step": 1089 }, { "epoch": 0.11, "grad_norm": 1.575818952645652, "learning_rate": 9.827733670450074e-06, "loss": 0.6588, "step": 1090 }, { "epoch": 0.11, "grad_norm": 1.422719670956732, "learning_rate": 9.827303079149357e-06, "loss": 0.6144, "step": 1091 }, { "epoch": 0.11, "grad_norm": 1.5903581437463894, "learning_rate": 9.826871959830878e-06, "loss": 0.782, "step": 1092 }, { "epoch": 0.11, "grad_norm": 1.4128619865248617, "learning_rate": 9.82644031254179e-06, "loss": 0.7264, "step": 1093 }, { "epoch": 0.11, "grad_norm": 1.445737124653864, "learning_rate": 9.826008137329312e-06, "loss": 0.7554, "step": 1094 }, { "epoch": 0.11, "grad_norm": 1.5943182682782255, "learning_rate": 9.825575434240713e-06, "loss": 0.6959, "step": 1095 }, { "epoch": 0.11, "grad_norm": 1.4716965887574178, "learning_rate": 9.825142203323325e-06, "loss": 0.6319, "step": 1096 }, { "epoch": 0.11, "grad_norm": 1.4595333346712651, "learning_rate": 9.824708444624532e-06, "loss": 0.7086, "step": 1097 }, { "epoch": 0.11, "grad_norm": 1.6713179467277117, "learning_rate": 9.824274158191784e-06, "loss": 0.6438, "step": 1098 }, { "epoch": 0.11, "grad_norm": 1.384285334561596, "learning_rate": 9.823839344072582e-06, "loss": 0.5959, "step": 1099 }, { "epoch": 0.11, "grad_norm": 1.489171836932964, "learning_rate": 9.823404002314483e-06, "loss": 0.7908, "step": 1100 }, { "epoch": 0.11, "grad_norm": 1.5013574362916549, "learning_rate": 9.82296813296511e-06, "loss": 0.6261, "step": 1101 }, { "epoch": 0.11, "grad_norm": 1.4471552788873185, "learning_rate": 9.822531736072134e-06, "loss": 0.659, "step": 1102 }, { "epoch": 0.11, "grad_norm": 1.6538141762699932, "learning_rate": 9.822094811683295e-06, "loss": 0.6572, "step": 1103 }, { "epoch": 0.11, "grad_norm": 1.5566527253045355, "learning_rate": 9.821657359846381e-06, "loss": 0.6946, "step": 1104 }, { "epoch": 0.11, "grad_norm": 1.8116520077383491, "learning_rate": 9.82121938060924e-06, "loss": 0.7439, "step": 1105 }, { "epoch": 0.11, "grad_norm": 1.5853708772875865, "learning_rate": 9.820780874019782e-06, "loss": 0.7558, "step": 1106 }, { "epoch": 0.11, "grad_norm": 1.3702998694261523, "learning_rate": 9.820341840125972e-06, "loss": 0.6136, "step": 1107 }, { "epoch": 0.11, "grad_norm": 1.6854524495672616, "learning_rate": 9.819902278975827e-06, "loss": 0.7375, "step": 1108 }, { "epoch": 0.11, "grad_norm": 1.3236454532279214, "learning_rate": 9.81946219061743e-06, "loss": 0.647, "step": 1109 }, { "epoch": 0.11, "grad_norm": 1.6125300883277198, "learning_rate": 9.81902157509892e-06, "loss": 0.6865, "step": 1110 }, { "epoch": 0.11, "grad_norm": 1.386348448109922, "learning_rate": 9.81858043246849e-06, "loss": 0.6918, "step": 1111 }, { "epoch": 0.11, "grad_norm": 1.4597548209025037, "learning_rate": 9.818138762774393e-06, "loss": 0.7026, "step": 1112 }, { "epoch": 0.11, "grad_norm": 1.979981509165618, "learning_rate": 9.81769656606494e-06, "loss": 0.6751, "step": 1113 }, { "epoch": 0.11, "grad_norm": 1.5535355664048294, "learning_rate": 9.817253842388501e-06, "loss": 0.6907, "step": 1114 }, { "epoch": 0.11, "grad_norm": 1.4359840656586014, "learning_rate": 9.8168105917935e-06, "loss": 0.623, "step": 1115 }, { "epoch": 0.11, "grad_norm": 1.2604919719784682, "learning_rate": 9.816366814328419e-06, "loss": 0.6465, "step": 1116 }, { "epoch": 0.11, "grad_norm": 1.3183635895859493, "learning_rate": 9.815922510041801e-06, "loss": 0.545, "step": 1117 }, { "epoch": 0.11, "grad_norm": 1.381579391262395, "learning_rate": 9.815477678982244e-06, "loss": 0.7611, "step": 1118 }, { "epoch": 0.11, "grad_norm": 1.396368074586291, "learning_rate": 9.815032321198406e-06, "loss": 0.6595, "step": 1119 }, { "epoch": 0.11, "grad_norm": 1.3595425443487417, "learning_rate": 9.814586436738998e-06, "loss": 0.6398, "step": 1120 }, { "epoch": 0.11, "grad_norm": 1.4634085832405757, "learning_rate": 9.814140025652792e-06, "loss": 0.6024, "step": 1121 }, { "epoch": 0.11, "grad_norm": 1.339155323682816, "learning_rate": 9.81369308798862e-06, "loss": 0.5805, "step": 1122 }, { "epoch": 0.11, "grad_norm": 1.5232327115398614, "learning_rate": 9.813245623795365e-06, "loss": 0.6941, "step": 1123 }, { "epoch": 0.11, "grad_norm": 1.5218527306496543, "learning_rate": 9.812797633121975e-06, "loss": 0.7381, "step": 1124 }, { "epoch": 0.11, "grad_norm": 1.3888667190946178, "learning_rate": 9.812349116017449e-06, "loss": 0.684, "step": 1125 }, { "epoch": 0.11, "grad_norm": 1.5230678258827741, "learning_rate": 9.811900072530849e-06, "loss": 0.6661, "step": 1126 }, { "epoch": 0.12, "grad_norm": 1.3549769798805533, "learning_rate": 9.811450502711289e-06, "loss": 0.6448, "step": 1127 }, { "epoch": 0.12, "grad_norm": 1.467643977885382, "learning_rate": 9.811000406607944e-06, "loss": 0.7191, "step": 1128 }, { "epoch": 0.12, "grad_norm": 1.4089932128254332, "learning_rate": 9.81054978427005e-06, "loss": 0.6705, "step": 1129 }, { "epoch": 0.12, "grad_norm": 2.128661152692617, "learning_rate": 9.810098635746891e-06, "loss": 0.7733, "step": 1130 }, { "epoch": 0.12, "grad_norm": 1.5313853002706113, "learning_rate": 9.80964696108782e-06, "loss": 0.6727, "step": 1131 }, { "epoch": 0.12, "grad_norm": 1.3365383978163252, "learning_rate": 9.809194760342238e-06, "loss": 0.6033, "step": 1132 }, { "epoch": 0.12, "grad_norm": 1.2971575589283677, "learning_rate": 9.808742033559611e-06, "loss": 0.6637, "step": 1133 }, { "epoch": 0.12, "grad_norm": 1.669784268554686, "learning_rate": 9.808288780789454e-06, "loss": 0.6526, "step": 1134 }, { "epoch": 0.12, "grad_norm": 1.4945452558967673, "learning_rate": 9.807835002081348e-06, "loss": 0.7179, "step": 1135 }, { "epoch": 0.12, "grad_norm": 1.3507964850308576, "learning_rate": 9.807380697484927e-06, "loss": 0.6387, "step": 1136 }, { "epoch": 0.12, "grad_norm": 1.5366663998319205, "learning_rate": 9.806925867049885e-06, "loss": 0.7325, "step": 1137 }, { "epoch": 0.12, "grad_norm": 1.37353314840065, "learning_rate": 9.80647051082597e-06, "loss": 0.6949, "step": 1138 }, { "epoch": 0.12, "grad_norm": 1.3542788650145683, "learning_rate": 9.80601462886299e-06, "loss": 0.6731, "step": 1139 }, { "epoch": 0.12, "grad_norm": 1.618932893467632, "learning_rate": 9.80555822121081e-06, "loss": 0.6821, "step": 1140 }, { "epoch": 0.12, "grad_norm": 1.3730186823436048, "learning_rate": 9.805101287919351e-06, "loss": 0.6375, "step": 1141 }, { "epoch": 0.12, "grad_norm": 1.5203913133176497, "learning_rate": 9.804643829038598e-06, "loss": 0.6814, "step": 1142 }, { "epoch": 0.12, "grad_norm": 1.663547240914694, "learning_rate": 9.804185844618586e-06, "loss": 0.6744, "step": 1143 }, { "epoch": 0.12, "grad_norm": 1.7467836590338102, "learning_rate": 9.803727334709407e-06, "loss": 0.7771, "step": 1144 }, { "epoch": 0.12, "grad_norm": 1.5431711801933867, "learning_rate": 9.803268299361217e-06, "loss": 0.6477, "step": 1145 }, { "epoch": 0.12, "grad_norm": 1.466742494366636, "learning_rate": 9.802808738624225e-06, "loss": 0.7132, "step": 1146 }, { "epoch": 0.12, "grad_norm": 1.7158084963625893, "learning_rate": 9.8023486525487e-06, "loss": 0.7717, "step": 1147 }, { "epoch": 0.12, "grad_norm": 1.3336021567324878, "learning_rate": 9.801888041184963e-06, "loss": 0.6883, "step": 1148 }, { "epoch": 0.12, "grad_norm": 1.2933227065365782, "learning_rate": 9.801426904583402e-06, "loss": 0.6181, "step": 1149 }, { "epoch": 0.12, "grad_norm": 1.423982806844315, "learning_rate": 9.800965242794451e-06, "loss": 0.6372, "step": 1150 }, { "epoch": 0.12, "grad_norm": 1.5856662053634163, "learning_rate": 9.800503055868611e-06, "loss": 0.6703, "step": 1151 }, { "epoch": 0.12, "grad_norm": 1.4313553601259776, "learning_rate": 9.800040343856434e-06, "loss": 0.7546, "step": 1152 }, { "epoch": 0.12, "grad_norm": 1.4120899171572059, "learning_rate": 9.799577106808536e-06, "loss": 0.7437, "step": 1153 }, { "epoch": 0.12, "grad_norm": 1.4227904845154533, "learning_rate": 9.79911334477558e-06, "loss": 0.6405, "step": 1154 }, { "epoch": 0.12, "grad_norm": 1.5846315455303928, "learning_rate": 9.798649057808303e-06, "loss": 0.6515, "step": 1155 }, { "epoch": 0.12, "grad_norm": 1.4602975419487805, "learning_rate": 9.79818424595748e-06, "loss": 0.5767, "step": 1156 }, { "epoch": 0.12, "grad_norm": 1.7647727406124758, "learning_rate": 9.797718909273959e-06, "loss": 0.8241, "step": 1157 }, { "epoch": 0.12, "grad_norm": 1.9364743502635913, "learning_rate": 9.797253047808635e-06, "loss": 0.6939, "step": 1158 }, { "epoch": 0.12, "grad_norm": 1.7457442485211985, "learning_rate": 9.796786661612467e-06, "loss": 0.7414, "step": 1159 }, { "epoch": 0.12, "grad_norm": 1.5916796729365772, "learning_rate": 9.796319750736467e-06, "loss": 0.6899, "step": 1160 }, { "epoch": 0.12, "grad_norm": 1.5257480633642322, "learning_rate": 9.79585231523171e-06, "loss": 0.6439, "step": 1161 }, { "epoch": 0.12, "grad_norm": 1.3912189773065546, "learning_rate": 9.795384355149323e-06, "loss": 0.6122, "step": 1162 }, { "epoch": 0.12, "grad_norm": 1.3156734704989774, "learning_rate": 9.79491587054049e-06, "loss": 0.6612, "step": 1163 }, { "epoch": 0.12, "grad_norm": 1.3982299879997948, "learning_rate": 9.794446861456456e-06, "loss": 0.6888, "step": 1164 }, { "epoch": 0.12, "grad_norm": 1.4352346621872085, "learning_rate": 9.793977327948525e-06, "loss": 0.7031, "step": 1165 }, { "epoch": 0.12, "grad_norm": 1.5084718003063122, "learning_rate": 9.79350727006805e-06, "loss": 0.7791, "step": 1166 }, { "epoch": 0.12, "grad_norm": 1.5291633198909362, "learning_rate": 9.79303668786645e-06, "loss": 0.652, "step": 1167 }, { "epoch": 0.12, "grad_norm": 1.462275714786321, "learning_rate": 9.7925655813952e-06, "loss": 0.7476, "step": 1168 }, { "epoch": 0.12, "grad_norm": 1.4867618151190236, "learning_rate": 9.792093950705822e-06, "loss": 0.6688, "step": 1169 }, { "epoch": 0.12, "grad_norm": 1.479630109742492, "learning_rate": 9.791621795849915e-06, "loss": 0.5495, "step": 1170 }, { "epoch": 0.12, "grad_norm": 1.4375246894728315, "learning_rate": 9.791149116879114e-06, "loss": 0.6626, "step": 1171 }, { "epoch": 0.12, "grad_norm": 1.4903521660072594, "learning_rate": 9.790675913845128e-06, "loss": 0.7199, "step": 1172 }, { "epoch": 0.12, "grad_norm": 1.6620964568394205, "learning_rate": 9.790202186799715e-06, "loss": 0.6951, "step": 1173 }, { "epoch": 0.12, "grad_norm": 1.650868958578703, "learning_rate": 9.789727935794691e-06, "loss": 0.6955, "step": 1174 }, { "epoch": 0.12, "grad_norm": 1.7074551167252883, "learning_rate": 9.789253160881929e-06, "loss": 0.7625, "step": 1175 }, { "epoch": 0.12, "grad_norm": 1.5634363730819094, "learning_rate": 9.788777862113363e-06, "loss": 0.7956, "step": 1176 }, { "epoch": 0.12, "grad_norm": 1.4880721506059629, "learning_rate": 9.788302039540982e-06, "loss": 0.721, "step": 1177 }, { "epoch": 0.12, "grad_norm": 1.5007127201012493, "learning_rate": 9.78782569321683e-06, "loss": 0.5636, "step": 1178 }, { "epoch": 0.12, "grad_norm": 1.3870451748973258, "learning_rate": 9.787348823193013e-06, "loss": 0.6343, "step": 1179 }, { "epoch": 0.12, "grad_norm": 1.4658060772614132, "learning_rate": 9.786871429521692e-06, "loss": 0.7397, "step": 1180 }, { "epoch": 0.12, "grad_norm": 1.3264782757356837, "learning_rate": 9.786393512255082e-06, "loss": 0.6926, "step": 1181 }, { "epoch": 0.12, "grad_norm": 1.4378546611024594, "learning_rate": 9.785915071445457e-06, "loss": 0.7643, "step": 1182 }, { "epoch": 0.12, "grad_norm": 1.3727627677196328, "learning_rate": 9.785436107145156e-06, "loss": 0.5457, "step": 1183 }, { "epoch": 0.12, "grad_norm": 1.5470670962233546, "learning_rate": 9.784956619406565e-06, "loss": 0.6895, "step": 1184 }, { "epoch": 0.12, "grad_norm": 1.4478282461092589, "learning_rate": 9.784476608282132e-06, "loss": 0.6739, "step": 1185 }, { "epoch": 0.12, "grad_norm": 1.5702510634321716, "learning_rate": 9.78399607382436e-06, "loss": 0.725, "step": 1186 }, { "epoch": 0.12, "grad_norm": 1.388603180500861, "learning_rate": 9.783515016085811e-06, "loss": 0.6691, "step": 1187 }, { "epoch": 0.12, "grad_norm": 1.320355933238751, "learning_rate": 9.783033435119105e-06, "loss": 0.7111, "step": 1188 }, { "epoch": 0.12, "grad_norm": 1.6679363237258735, "learning_rate": 9.782551330976917e-06, "loss": 0.6832, "step": 1189 }, { "epoch": 0.12, "grad_norm": 1.5957982962995547, "learning_rate": 9.78206870371198e-06, "loss": 0.7016, "step": 1190 }, { "epoch": 0.12, "grad_norm": 1.9772360804647189, "learning_rate": 9.781585553377086e-06, "loss": 0.6563, "step": 1191 }, { "epoch": 0.12, "grad_norm": 1.3942432756001686, "learning_rate": 9.781101880025082e-06, "loss": 0.6135, "step": 1192 }, { "epoch": 0.12, "grad_norm": 1.6532460121355683, "learning_rate": 9.78061768370887e-06, "loss": 0.6539, "step": 1193 }, { "epoch": 0.12, "grad_norm": 1.4846776450514065, "learning_rate": 9.780132964481418e-06, "loss": 0.7001, "step": 1194 }, { "epoch": 0.12, "grad_norm": 1.377196356436685, "learning_rate": 9.779647722395743e-06, "loss": 0.6996, "step": 1195 }, { "epoch": 0.12, "grad_norm": 1.7120270080809443, "learning_rate": 9.779161957504917e-06, "loss": 0.7539, "step": 1196 }, { "epoch": 0.12, "grad_norm": 1.4134005101694695, "learning_rate": 9.77867566986208e-06, "loss": 0.6199, "step": 1197 }, { "epoch": 0.12, "grad_norm": 1.5053288264089328, "learning_rate": 9.77818885952042e-06, "loss": 0.7694, "step": 1198 }, { "epoch": 0.12, "grad_norm": 1.3979771757547534, "learning_rate": 9.777701526533185e-06, "loss": 0.7529, "step": 1199 }, { "epoch": 0.12, "grad_norm": 1.505284090858098, "learning_rate": 9.77721367095368e-06, "loss": 0.6841, "step": 1200 }, { "epoch": 0.12, "grad_norm": 1.5314783758471238, "learning_rate": 9.776725292835269e-06, "loss": 0.7608, "step": 1201 }, { "epoch": 0.12, "grad_norm": 1.5310125598659894, "learning_rate": 9.77623639223137e-06, "loss": 0.5683, "step": 1202 }, { "epoch": 0.12, "grad_norm": 1.6902821094578555, "learning_rate": 9.77574696919546e-06, "loss": 0.712, "step": 1203 }, { "epoch": 0.12, "grad_norm": 1.4077821851360084, "learning_rate": 9.775257023781075e-06, "loss": 0.6198, "step": 1204 }, { "epoch": 0.12, "grad_norm": 1.5047899863493694, "learning_rate": 9.774766556041802e-06, "loss": 0.739, "step": 1205 }, { "epoch": 0.12, "grad_norm": 1.5110317247768426, "learning_rate": 9.774275566031292e-06, "loss": 0.6571, "step": 1206 }, { "epoch": 0.12, "grad_norm": 1.4745570925101263, "learning_rate": 9.773784053803247e-06, "loss": 0.7601, "step": 1207 }, { "epoch": 0.12, "grad_norm": 1.4260899306969692, "learning_rate": 9.773292019411435e-06, "loss": 0.6185, "step": 1208 }, { "epoch": 0.12, "grad_norm": 1.3429397373442717, "learning_rate": 9.772799462909672e-06, "loss": 0.5144, "step": 1209 }, { "epoch": 0.12, "grad_norm": 1.459962832789337, "learning_rate": 9.772306384351833e-06, "loss": 0.6837, "step": 1210 }, { "epoch": 0.12, "grad_norm": 1.4297233068667814, "learning_rate": 9.771812783791854e-06, "loss": 0.6505, "step": 1211 }, { "epoch": 0.12, "grad_norm": 1.490673684769898, "learning_rate": 9.771318661283726e-06, "loss": 0.6547, "step": 1212 }, { "epoch": 0.12, "grad_norm": 1.543359149785061, "learning_rate": 9.770824016881496e-06, "loss": 0.7379, "step": 1213 }, { "epoch": 0.12, "grad_norm": 1.4948089413382026, "learning_rate": 9.770328850639268e-06, "loss": 0.7396, "step": 1214 }, { "epoch": 0.12, "grad_norm": 1.4236897756649625, "learning_rate": 9.769833162611206e-06, "loss": 0.6439, "step": 1215 }, { "epoch": 0.12, "grad_norm": 1.5940505019586901, "learning_rate": 9.769336952851526e-06, "loss": 0.668, "step": 1216 }, { "epoch": 0.12, "grad_norm": 1.6663046944588593, "learning_rate": 9.768840221414509e-06, "loss": 0.7301, "step": 1217 }, { "epoch": 0.12, "grad_norm": 1.3699200208813878, "learning_rate": 9.768342968354485e-06, "loss": 0.6159, "step": 1218 }, { "epoch": 0.12, "grad_norm": 1.6332689417528823, "learning_rate": 9.767845193725845e-06, "loss": 0.7348, "step": 1219 }, { "epoch": 0.12, "grad_norm": 1.5222862558851558, "learning_rate": 9.767346897583036e-06, "loss": 0.6679, "step": 1220 }, { "epoch": 0.12, "grad_norm": 1.4049808664230026, "learning_rate": 9.766848079980565e-06, "loss": 0.6498, "step": 1221 }, { "epoch": 0.12, "grad_norm": 1.4810965122211, "learning_rate": 9.766348740972989e-06, "loss": 0.6052, "step": 1222 }, { "epoch": 0.12, "grad_norm": 1.4812148872676199, "learning_rate": 9.765848880614927e-06, "loss": 0.6399, "step": 1223 }, { "epoch": 0.12, "grad_norm": 1.4616552015756965, "learning_rate": 9.765348498961059e-06, "loss": 0.692, "step": 1224 }, { "epoch": 0.13, "grad_norm": 1.5456437597404753, "learning_rate": 9.764847596066111e-06, "loss": 0.6833, "step": 1225 }, { "epoch": 0.13, "grad_norm": 1.3329983440552295, "learning_rate": 9.76434617198488e-06, "loss": 0.6363, "step": 1226 }, { "epoch": 0.13, "grad_norm": 1.6081595725596427, "learning_rate": 9.763844226772206e-06, "loss": 0.5965, "step": 1227 }, { "epoch": 0.13, "grad_norm": 1.582059117465588, "learning_rate": 9.763341760482996e-06, "loss": 0.7118, "step": 1228 }, { "epoch": 0.13, "grad_norm": 1.425450353590425, "learning_rate": 9.76283877317221e-06, "loss": 0.6571, "step": 1229 }, { "epoch": 0.13, "grad_norm": 1.5103127515081234, "learning_rate": 9.762335264894864e-06, "loss": 0.6027, "step": 1230 }, { "epoch": 0.13, "grad_norm": 1.405389705273917, "learning_rate": 9.761831235706035e-06, "loss": 0.6494, "step": 1231 }, { "epoch": 0.13, "grad_norm": 1.7555163160910925, "learning_rate": 9.761326685660852e-06, "loss": 0.7839, "step": 1232 }, { "epoch": 0.13, "grad_norm": 1.455926607015357, "learning_rate": 9.760821614814507e-06, "loss": 0.7291, "step": 1233 }, { "epoch": 0.13, "grad_norm": 1.3810011147090377, "learning_rate": 9.760316023222241e-06, "loss": 0.6667, "step": 1234 }, { "epoch": 0.13, "grad_norm": 1.4501986994955618, "learning_rate": 9.759809910939358e-06, "loss": 0.6377, "step": 1235 }, { "epoch": 0.13, "grad_norm": 1.4436103335734154, "learning_rate": 9.75930327802122e-06, "loss": 0.7138, "step": 1236 }, { "epoch": 0.13, "grad_norm": 1.6310369655314167, "learning_rate": 9.758796124523238e-06, "loss": 0.7288, "step": 1237 }, { "epoch": 0.13, "grad_norm": 1.3620099647792927, "learning_rate": 9.75828845050089e-06, "loss": 0.6298, "step": 1238 }, { "epoch": 0.13, "grad_norm": 1.4099238238716447, "learning_rate": 9.757780256009705e-06, "loss": 0.6445, "step": 1239 }, { "epoch": 0.13, "grad_norm": 1.5248172458361533, "learning_rate": 9.757271541105268e-06, "loss": 0.7031, "step": 1240 }, { "epoch": 0.13, "grad_norm": 1.800179520289802, "learning_rate": 9.756762305843227e-06, "loss": 0.6682, "step": 1241 }, { "epoch": 0.13, "grad_norm": 1.5517999196422263, "learning_rate": 9.756252550279278e-06, "loss": 0.6245, "step": 1242 }, { "epoch": 0.13, "grad_norm": 1.5308300567462467, "learning_rate": 9.755742274469182e-06, "loss": 0.6028, "step": 1243 }, { "epoch": 0.13, "grad_norm": 1.583783132628247, "learning_rate": 9.755231478468753e-06, "loss": 0.7104, "step": 1244 }, { "epoch": 0.13, "grad_norm": 1.506368675419186, "learning_rate": 9.754720162333862e-06, "loss": 0.6329, "step": 1245 }, { "epoch": 0.13, "grad_norm": 1.4726222239768725, "learning_rate": 9.75420832612044e-06, "loss": 0.7305, "step": 1246 }, { "epoch": 0.13, "grad_norm": 1.3709305891753734, "learning_rate": 9.753695969884469e-06, "loss": 0.6429, "step": 1247 }, { "epoch": 0.13, "grad_norm": 1.6108668529743728, "learning_rate": 9.753183093681993e-06, "loss": 0.6842, "step": 1248 }, { "epoch": 0.13, "grad_norm": 1.4260024776686975, "learning_rate": 9.752669697569112e-06, "loss": 0.6299, "step": 1249 }, { "epoch": 0.13, "grad_norm": 1.4919030594939835, "learning_rate": 9.752155781601981e-06, "loss": 0.7047, "step": 1250 }, { "epoch": 0.13, "grad_norm": 1.4212377476055291, "learning_rate": 9.751641345836814e-06, "loss": 0.6867, "step": 1251 }, { "epoch": 0.13, "grad_norm": 1.4303400453665385, "learning_rate": 9.751126390329877e-06, "loss": 0.7273, "step": 1252 }, { "epoch": 0.13, "grad_norm": 1.603095225883887, "learning_rate": 9.750610915137502e-06, "loss": 0.7112, "step": 1253 }, { "epoch": 0.13, "grad_norm": 1.5862498878488998, "learning_rate": 9.750094920316068e-06, "loss": 0.6834, "step": 1254 }, { "epoch": 0.13, "grad_norm": 1.6370759065284024, "learning_rate": 9.74957840592202e-06, "loss": 0.7457, "step": 1255 }, { "epoch": 0.13, "grad_norm": 1.417925780250081, "learning_rate": 9.74906137201185e-06, "loss": 0.6757, "step": 1256 }, { "epoch": 0.13, "grad_norm": 1.448039305087841, "learning_rate": 9.748543818642115e-06, "loss": 0.6875, "step": 1257 }, { "epoch": 0.13, "grad_norm": 1.541480787819567, "learning_rate": 9.748025745869424e-06, "loss": 0.639, "step": 1258 }, { "epoch": 0.13, "grad_norm": 1.334365696448328, "learning_rate": 9.747507153750448e-06, "loss": 0.7152, "step": 1259 }, { "epoch": 0.13, "grad_norm": 1.5028569687969042, "learning_rate": 9.746988042341907e-06, "loss": 0.7741, "step": 1260 }, { "epoch": 0.13, "grad_norm": 1.4987866192864998, "learning_rate": 9.746468411700584e-06, "loss": 0.7867, "step": 1261 }, { "epoch": 0.13, "grad_norm": 1.4537428231976874, "learning_rate": 9.74594826188332e-06, "loss": 0.6113, "step": 1262 }, { "epoch": 0.13, "grad_norm": 1.483852023743828, "learning_rate": 9.745427592947006e-06, "loss": 0.7673, "step": 1263 }, { "epoch": 0.13, "grad_norm": 1.494260931981766, "learning_rate": 9.744906404948593e-06, "loss": 0.7405, "step": 1264 }, { "epoch": 0.13, "grad_norm": 1.5246511989935554, "learning_rate": 9.744384697945093e-06, "loss": 0.741, "step": 1265 }, { "epoch": 0.13, "grad_norm": 1.4166878598106485, "learning_rate": 9.743862471993568e-06, "loss": 0.629, "step": 1266 }, { "epoch": 0.13, "grad_norm": 1.511217359130723, "learning_rate": 9.74333972715114e-06, "loss": 0.661, "step": 1267 }, { "epoch": 0.13, "grad_norm": 1.622016547269435, "learning_rate": 9.742816463474988e-06, "loss": 0.7107, "step": 1268 }, { "epoch": 0.13, "grad_norm": 1.19059628190505, "learning_rate": 9.742292681022351e-06, "loss": 0.5912, "step": 1269 }, { "epoch": 0.13, "grad_norm": 1.6018461766582583, "learning_rate": 9.741768379850515e-06, "loss": 0.6766, "step": 1270 }, { "epoch": 0.13, "grad_norm": 1.4608596560912281, "learning_rate": 9.741243560016834e-06, "loss": 0.6028, "step": 1271 }, { "epoch": 0.13, "grad_norm": 1.4192008264146332, "learning_rate": 9.74071822157871e-06, "loss": 0.6227, "step": 1272 }, { "epoch": 0.13, "grad_norm": 1.4014178249203457, "learning_rate": 9.740192364593608e-06, "loss": 0.6159, "step": 1273 }, { "epoch": 0.13, "grad_norm": 1.5037501249475562, "learning_rate": 9.739665989119047e-06, "loss": 0.6757, "step": 1274 }, { "epoch": 0.13, "grad_norm": 1.5044960929504638, "learning_rate": 9.7391390952126e-06, "loss": 0.6528, "step": 1275 }, { "epoch": 0.13, "grad_norm": 1.4956202829106877, "learning_rate": 9.7386116829319e-06, "loss": 0.6739, "step": 1276 }, { "epoch": 0.13, "grad_norm": 1.4184580768812654, "learning_rate": 9.73808375233464e-06, "loss": 0.7609, "step": 1277 }, { "epoch": 0.13, "grad_norm": 1.3615975282735542, "learning_rate": 9.73755530347856e-06, "loss": 0.624, "step": 1278 }, { "epoch": 0.13, "grad_norm": 1.6838043231192736, "learning_rate": 9.737026336421467e-06, "loss": 0.7824, "step": 1279 }, { "epoch": 0.13, "grad_norm": 1.530173979168358, "learning_rate": 9.73649685122122e-06, "loss": 0.8389, "step": 1280 }, { "epoch": 0.13, "grad_norm": 1.3475548581053214, "learning_rate": 9.735966847935732e-06, "loss": 0.6429, "step": 1281 }, { "epoch": 0.13, "grad_norm": 1.486071880080569, "learning_rate": 9.735436326622977e-06, "loss": 0.7332, "step": 1282 }, { "epoch": 0.13, "grad_norm": 1.3518417710312667, "learning_rate": 9.734905287340985e-06, "loss": 0.526, "step": 1283 }, { "epoch": 0.13, "grad_norm": 1.6382087630298665, "learning_rate": 9.734373730147844e-06, "loss": 0.7144, "step": 1284 }, { "epoch": 0.13, "grad_norm": 1.4185644379527975, "learning_rate": 9.73384165510169e-06, "loss": 0.6175, "step": 1285 }, { "epoch": 0.13, "grad_norm": 1.8216900080903204, "learning_rate": 9.733309062260728e-06, "loss": 0.5962, "step": 1286 }, { "epoch": 0.13, "grad_norm": 1.4931668381670575, "learning_rate": 9.732775951683213e-06, "loss": 0.6524, "step": 1287 }, { "epoch": 0.13, "grad_norm": 1.5891711970286624, "learning_rate": 9.732242323427454e-06, "loss": 0.6649, "step": 1288 }, { "epoch": 0.13, "grad_norm": 1.5787840232105728, "learning_rate": 9.731708177551825e-06, "loss": 0.6211, "step": 1289 }, { "epoch": 0.13, "grad_norm": 1.445456793836552, "learning_rate": 9.731173514114747e-06, "loss": 0.6881, "step": 1290 }, { "epoch": 0.13, "grad_norm": 1.2372088792535996, "learning_rate": 9.730638333174706e-06, "loss": 0.7156, "step": 1291 }, { "epoch": 0.13, "grad_norm": 1.344403059792079, "learning_rate": 9.730102634790239e-06, "loss": 0.6791, "step": 1292 }, { "epoch": 0.13, "grad_norm": 1.698624885942366, "learning_rate": 9.72956641901994e-06, "loss": 0.7318, "step": 1293 }, { "epoch": 0.13, "grad_norm": 1.5837262743319473, "learning_rate": 9.729029685922465e-06, "loss": 0.6448, "step": 1294 }, { "epoch": 0.13, "grad_norm": 1.3283486980185981, "learning_rate": 9.72849243555652e-06, "loss": 0.5493, "step": 1295 }, { "epoch": 0.13, "grad_norm": 1.5521952582421972, "learning_rate": 9.727954667980872e-06, "loss": 0.694, "step": 1296 }, { "epoch": 0.13, "grad_norm": 1.4362908704003505, "learning_rate": 9.72741638325434e-06, "loss": 0.6575, "step": 1297 }, { "epoch": 0.13, "grad_norm": 1.640211353363939, "learning_rate": 9.726877581435806e-06, "loss": 0.5824, "step": 1298 }, { "epoch": 0.13, "grad_norm": 1.3873079100716705, "learning_rate": 9.7263382625842e-06, "loss": 0.6612, "step": 1299 }, { "epoch": 0.13, "grad_norm": 1.5766984239126418, "learning_rate": 9.72579842675852e-06, "loss": 0.7501, "step": 1300 }, { "epoch": 0.13, "grad_norm": 1.451436804877606, "learning_rate": 9.72525807401781e-06, "loss": 0.7182, "step": 1301 }, { "epoch": 0.13, "grad_norm": 1.4238044703333437, "learning_rate": 9.724717204421175e-06, "loss": 0.7128, "step": 1302 }, { "epoch": 0.13, "grad_norm": 1.2671815466538114, "learning_rate": 9.724175818027775e-06, "loss": 0.6123, "step": 1303 }, { "epoch": 0.13, "grad_norm": 1.4256362292645128, "learning_rate": 9.72363391489683e-06, "loss": 0.5978, "step": 1304 }, { "epoch": 0.13, "grad_norm": 1.60579709304853, "learning_rate": 9.723091495087614e-06, "loss": 0.6929, "step": 1305 }, { "epoch": 0.13, "grad_norm": 1.4901144002483935, "learning_rate": 9.722548558659457e-06, "loss": 0.6775, "step": 1306 }, { "epoch": 0.13, "grad_norm": 1.5507864156130453, "learning_rate": 9.722005105671746e-06, "loss": 0.7123, "step": 1307 }, { "epoch": 0.13, "grad_norm": 1.41397617028633, "learning_rate": 9.721461136183924e-06, "loss": 0.6543, "step": 1308 }, { "epoch": 0.13, "grad_norm": 1.4658678010808786, "learning_rate": 9.720916650255492e-06, "loss": 0.6202, "step": 1309 }, { "epoch": 0.13, "grad_norm": 1.4187595107199007, "learning_rate": 9.720371647946008e-06, "loss": 0.6414, "step": 1310 }, { "epoch": 0.13, "grad_norm": 1.6507130929177358, "learning_rate": 9.719826129315084e-06, "loss": 0.7105, "step": 1311 }, { "epoch": 0.13, "grad_norm": 1.6766926005507805, "learning_rate": 9.71928009442239e-06, "loss": 0.7945, "step": 1312 }, { "epoch": 0.13, "grad_norm": 1.3050754553041293, "learning_rate": 9.71873354332765e-06, "loss": 0.6764, "step": 1313 }, { "epoch": 0.13, "grad_norm": 1.3999590655872707, "learning_rate": 9.71818647609065e-06, "loss": 0.6582, "step": 1314 }, { "epoch": 0.13, "grad_norm": 1.3492864201565937, "learning_rate": 9.717638892771228e-06, "loss": 0.5884, "step": 1315 }, { "epoch": 0.13, "grad_norm": 1.4144625241818116, "learning_rate": 9.717090793429277e-06, "loss": 0.7528, "step": 1316 }, { "epoch": 0.13, "grad_norm": 1.40002622134308, "learning_rate": 9.716542178124753e-06, "loss": 0.6113, "step": 1317 }, { "epoch": 0.13, "grad_norm": 1.5637484828492791, "learning_rate": 9.71599304691766e-06, "loss": 0.7429, "step": 1318 }, { "epoch": 0.13, "grad_norm": 1.642948257357369, "learning_rate": 9.715443399868067e-06, "loss": 0.576, "step": 1319 }, { "epoch": 0.13, "grad_norm": 1.4396507603811133, "learning_rate": 9.714893237036092e-06, "loss": 0.7447, "step": 1320 }, { "epoch": 0.13, "grad_norm": 1.5272145051219963, "learning_rate": 9.714342558481913e-06, "loss": 0.7375, "step": 1321 }, { "epoch": 0.13, "grad_norm": 1.4940803817287416, "learning_rate": 9.713791364265767e-06, "loss": 0.701, "step": 1322 }, { "epoch": 0.14, "grad_norm": 1.643370756428968, "learning_rate": 9.713239654447944e-06, "loss": 0.7745, "step": 1323 }, { "epoch": 0.14, "grad_norm": 1.6672689025892937, "learning_rate": 9.712687429088787e-06, "loss": 0.7091, "step": 1324 }, { "epoch": 0.14, "grad_norm": 1.5086585005825313, "learning_rate": 9.712134688248701e-06, "loss": 0.7328, "step": 1325 }, { "epoch": 0.14, "grad_norm": 1.4636306962307533, "learning_rate": 9.711581431988147e-06, "loss": 0.7579, "step": 1326 }, { "epoch": 0.14, "grad_norm": 1.775824546878904, "learning_rate": 9.711027660367641e-06, "loss": 0.6552, "step": 1327 }, { "epoch": 0.14, "grad_norm": 1.551707325566836, "learning_rate": 9.710473373447754e-06, "loss": 0.6871, "step": 1328 }, { "epoch": 0.14, "grad_norm": 1.4769638564036849, "learning_rate": 9.709918571289114e-06, "loss": 0.6562, "step": 1329 }, { "epoch": 0.14, "grad_norm": 1.3557571310277599, "learning_rate": 9.70936325395241e-06, "loss": 0.7026, "step": 1330 }, { "epoch": 0.14, "grad_norm": 1.4759622389832916, "learning_rate": 9.708807421498379e-06, "loss": 0.7784, "step": 1331 }, { "epoch": 0.14, "grad_norm": 1.6541975144679364, "learning_rate": 9.708251073987822e-06, "loss": 0.7399, "step": 1332 }, { "epoch": 0.14, "grad_norm": 1.593262578875364, "learning_rate": 9.707694211481591e-06, "loss": 0.6691, "step": 1333 }, { "epoch": 0.14, "grad_norm": 1.439377331525682, "learning_rate": 9.707136834040598e-06, "loss": 0.6087, "step": 1334 }, { "epoch": 0.14, "grad_norm": 1.629949524829443, "learning_rate": 9.706578941725809e-06, "loss": 0.7515, "step": 1335 }, { "epoch": 0.14, "grad_norm": 1.4399382616630527, "learning_rate": 9.706020534598246e-06, "loss": 0.6899, "step": 1336 }, { "epoch": 0.14, "grad_norm": 1.3085376583771786, "learning_rate": 9.705461612718991e-06, "loss": 0.6182, "step": 1337 }, { "epoch": 0.14, "grad_norm": 1.3096161678328389, "learning_rate": 9.704902176149178e-06, "loss": 0.6994, "step": 1338 }, { "epoch": 0.14, "grad_norm": 1.6321938216976772, "learning_rate": 9.70434222495e-06, "loss": 0.6486, "step": 1339 }, { "epoch": 0.14, "grad_norm": 1.5151224075916845, "learning_rate": 9.703781759182705e-06, "loss": 0.6363, "step": 1340 }, { "epoch": 0.14, "grad_norm": 1.2086613080496014, "learning_rate": 9.703220778908595e-06, "loss": 0.6227, "step": 1341 }, { "epoch": 0.14, "grad_norm": 1.4060352941476453, "learning_rate": 9.702659284189036e-06, "loss": 0.7004, "step": 1342 }, { "epoch": 0.14, "grad_norm": 1.3994713641221734, "learning_rate": 9.70209727508544e-06, "loss": 0.5222, "step": 1343 }, { "epoch": 0.14, "grad_norm": 1.3563728456393644, "learning_rate": 9.701534751659284e-06, "loss": 0.5925, "step": 1344 }, { "epoch": 0.14, "grad_norm": 1.5521731519519548, "learning_rate": 9.700971713972096e-06, "loss": 0.6397, "step": 1345 }, { "epoch": 0.14, "grad_norm": 1.5991537591201566, "learning_rate": 9.700408162085463e-06, "loss": 0.6492, "step": 1346 }, { "epoch": 0.14, "grad_norm": 1.4287149340163472, "learning_rate": 9.699844096061026e-06, "loss": 0.5951, "step": 1347 }, { "epoch": 0.14, "grad_norm": 1.548888982349059, "learning_rate": 9.699279515960484e-06, "loss": 0.6772, "step": 1348 }, { "epoch": 0.14, "grad_norm": 1.4083348466794887, "learning_rate": 9.698714421845592e-06, "loss": 0.6539, "step": 1349 }, { "epoch": 0.14, "grad_norm": 1.4241415689179853, "learning_rate": 9.698148813778158e-06, "loss": 0.667, "step": 1350 }, { "epoch": 0.14, "grad_norm": 1.4042161748191444, "learning_rate": 9.697582691820054e-06, "loss": 0.6476, "step": 1351 }, { "epoch": 0.14, "grad_norm": 1.3850311961825321, "learning_rate": 9.697016056033202e-06, "loss": 0.649, "step": 1352 }, { "epoch": 0.14, "grad_norm": 1.8648500211854269, "learning_rate": 9.696448906479577e-06, "loss": 0.6459, "step": 1353 }, { "epoch": 0.14, "grad_norm": 1.5135154421090673, "learning_rate": 9.69588124322122e-06, "loss": 0.6369, "step": 1354 }, { "epoch": 0.14, "grad_norm": 1.5642465851693195, "learning_rate": 9.69531306632022e-06, "loss": 0.8716, "step": 1355 }, { "epoch": 0.14, "grad_norm": 1.5324680077793773, "learning_rate": 9.694744375838725e-06, "loss": 0.6683, "step": 1356 }, { "epoch": 0.14, "grad_norm": 1.457524435165209, "learning_rate": 9.694175171838942e-06, "loss": 0.6703, "step": 1357 }, { "epoch": 0.14, "grad_norm": 1.5510892914152692, "learning_rate": 9.693605454383129e-06, "loss": 0.6817, "step": 1358 }, { "epoch": 0.14, "grad_norm": 1.4939174626597145, "learning_rate": 9.693035223533602e-06, "loss": 0.7394, "step": 1359 }, { "epoch": 0.14, "grad_norm": 1.586835690790642, "learning_rate": 9.692464479352735e-06, "loss": 0.8118, "step": 1360 }, { "epoch": 0.14, "grad_norm": 1.6047790674343525, "learning_rate": 9.691893221902958e-06, "loss": 0.6951, "step": 1361 }, { "epoch": 0.14, "grad_norm": 1.4526818703043076, "learning_rate": 9.691321451246754e-06, "loss": 0.7231, "step": 1362 }, { "epoch": 0.14, "grad_norm": 1.3856969624739406, "learning_rate": 9.690749167446663e-06, "loss": 0.6582, "step": 1363 }, { "epoch": 0.14, "grad_norm": 1.369301411876941, "learning_rate": 9.690176370565285e-06, "loss": 0.7325, "step": 1364 }, { "epoch": 0.14, "grad_norm": 1.5173035389922735, "learning_rate": 9.689603060665273e-06, "loss": 0.773, "step": 1365 }, { "epoch": 0.14, "grad_norm": 1.5854017411757413, "learning_rate": 9.689029237809336e-06, "loss": 0.6852, "step": 1366 }, { "epoch": 0.14, "grad_norm": 1.488137104749296, "learning_rate": 9.68845490206024e-06, "loss": 0.6548, "step": 1367 }, { "epoch": 0.14, "grad_norm": 1.7526445082141207, "learning_rate": 9.687880053480806e-06, "loss": 0.7419, "step": 1368 }, { "epoch": 0.14, "grad_norm": 1.6251160225002745, "learning_rate": 9.687304692133912e-06, "loss": 0.775, "step": 1369 }, { "epoch": 0.14, "grad_norm": 1.3819971873548047, "learning_rate": 9.686728818082491e-06, "loss": 0.6688, "step": 1370 }, { "epoch": 0.14, "grad_norm": 1.63271607483268, "learning_rate": 9.686152431389536e-06, "loss": 0.7097, "step": 1371 }, { "epoch": 0.14, "grad_norm": 1.621346286036983, "learning_rate": 9.68557553211809e-06, "loss": 0.7239, "step": 1372 }, { "epoch": 0.14, "grad_norm": 1.212779511676642, "learning_rate": 9.684998120331255e-06, "loss": 0.7055, "step": 1373 }, { "epoch": 0.14, "grad_norm": 1.9486821241508703, "learning_rate": 9.684420196092193e-06, "loss": 0.7829, "step": 1374 }, { "epoch": 0.14, "grad_norm": 1.7498501699848743, "learning_rate": 9.683841759464114e-06, "loss": 0.7393, "step": 1375 }, { "epoch": 0.14, "grad_norm": 1.5300503614699759, "learning_rate": 9.68326281051029e-06, "loss": 0.8078, "step": 1376 }, { "epoch": 0.14, "grad_norm": 1.4652631660110336, "learning_rate": 9.682683349294046e-06, "loss": 0.68, "step": 1377 }, { "epoch": 0.14, "grad_norm": 1.3043415884731084, "learning_rate": 9.682103375878766e-06, "loss": 0.6022, "step": 1378 }, { "epoch": 0.14, "grad_norm": 1.946310175607534, "learning_rate": 9.681522890327888e-06, "loss": 0.6714, "step": 1379 }, { "epoch": 0.14, "grad_norm": 1.4711031271631003, "learning_rate": 9.680941892704907e-06, "loss": 0.6197, "step": 1380 }, { "epoch": 0.14, "grad_norm": 1.31627878096371, "learning_rate": 9.680360383073373e-06, "loss": 0.6322, "step": 1381 }, { "epoch": 0.14, "grad_norm": 1.5778038373669363, "learning_rate": 9.679778361496892e-06, "loss": 0.6692, "step": 1382 }, { "epoch": 0.14, "grad_norm": 1.4175096548711328, "learning_rate": 9.679195828039126e-06, "loss": 0.6936, "step": 1383 }, { "epoch": 0.14, "grad_norm": 1.3070630202319453, "learning_rate": 9.678612782763794e-06, "loss": 0.6481, "step": 1384 }, { "epoch": 0.14, "grad_norm": 2.2938720917709605, "learning_rate": 9.67802922573467e-06, "loss": 0.7177, "step": 1385 }, { "epoch": 0.14, "grad_norm": 1.518977971503795, "learning_rate": 9.677445157015586e-06, "loss": 0.7765, "step": 1386 }, { "epoch": 0.14, "grad_norm": 1.7249755844024632, "learning_rate": 9.676860576670426e-06, "loss": 0.7202, "step": 1387 }, { "epoch": 0.14, "grad_norm": 1.357081379435144, "learning_rate": 9.676275484763133e-06, "loss": 0.6397, "step": 1388 }, { "epoch": 0.14, "grad_norm": 1.4452899195642037, "learning_rate": 9.675689881357707e-06, "loss": 0.7421, "step": 1389 }, { "epoch": 0.14, "grad_norm": 1.9540063559319685, "learning_rate": 9.6751037665182e-06, "loss": 0.6736, "step": 1390 }, { "epoch": 0.14, "grad_norm": 1.5635924424997054, "learning_rate": 9.674517140308721e-06, "loss": 0.6865, "step": 1391 }, { "epoch": 0.14, "grad_norm": 1.310318902715064, "learning_rate": 9.67393000279344e-06, "loss": 0.5538, "step": 1392 }, { "epoch": 0.14, "grad_norm": 1.374155933391238, "learning_rate": 9.673342354036574e-06, "loss": 0.6397, "step": 1393 }, { "epoch": 0.14, "grad_norm": 1.4948196756075451, "learning_rate": 9.672754194102407e-06, "loss": 0.687, "step": 1394 }, { "epoch": 0.14, "grad_norm": 1.5061640058164052, "learning_rate": 9.672165523055269e-06, "loss": 0.6515, "step": 1395 }, { "epoch": 0.14, "grad_norm": 1.654545344851996, "learning_rate": 9.67157634095955e-06, "loss": 0.7829, "step": 1396 }, { "epoch": 0.14, "grad_norm": 1.387725170295849, "learning_rate": 9.670986647879695e-06, "loss": 0.6759, "step": 1397 }, { "epoch": 0.14, "grad_norm": 1.3561236737507358, "learning_rate": 9.670396443880208e-06, "loss": 0.7499, "step": 1398 }, { "epoch": 0.14, "grad_norm": 1.628773295425161, "learning_rate": 9.669805729025644e-06, "loss": 0.6281, "step": 1399 }, { "epoch": 0.14, "grad_norm": 1.453796284615248, "learning_rate": 9.669214503380619e-06, "loss": 0.6536, "step": 1400 }, { "epoch": 0.14, "grad_norm": 1.3538431573446357, "learning_rate": 9.668622767009799e-06, "loss": 0.6473, "step": 1401 }, { "epoch": 0.14, "grad_norm": 1.497787773576774, "learning_rate": 9.66803051997791e-06, "loss": 0.7792, "step": 1402 }, { "epoch": 0.14, "grad_norm": 1.268226178081315, "learning_rate": 9.667437762349733e-06, "loss": 0.6701, "step": 1403 }, { "epoch": 0.14, "grad_norm": 1.5663080968239382, "learning_rate": 9.666844494190107e-06, "loss": 0.7138, "step": 1404 }, { "epoch": 0.14, "grad_norm": 1.572687405789375, "learning_rate": 9.66625071556392e-06, "loss": 0.788, "step": 1405 }, { "epoch": 0.14, "grad_norm": 1.507265212365516, "learning_rate": 9.665656426536125e-06, "loss": 0.6509, "step": 1406 }, { "epoch": 0.14, "grad_norm": 1.490843341021994, "learning_rate": 9.665061627171726e-06, "loss": 0.7009, "step": 1407 }, { "epoch": 0.14, "grad_norm": 1.4693193570860923, "learning_rate": 9.66446631753578e-06, "loss": 0.7434, "step": 1408 }, { "epoch": 0.14, "grad_norm": 1.3064183295426406, "learning_rate": 9.663870497693402e-06, "loss": 0.739, "step": 1409 }, { "epoch": 0.14, "grad_norm": 1.8248259548185382, "learning_rate": 9.663274167709766e-06, "loss": 0.7538, "step": 1410 }, { "epoch": 0.14, "grad_norm": 1.6500934058444034, "learning_rate": 9.662677327650101e-06, "loss": 0.6462, "step": 1411 }, { "epoch": 0.14, "grad_norm": 1.3863306759516125, "learning_rate": 9.662079977579689e-06, "loss": 0.7271, "step": 1412 }, { "epoch": 0.14, "grad_norm": 1.5613541842793988, "learning_rate": 9.661482117563868e-06, "loss": 0.6889, "step": 1413 }, { "epoch": 0.14, "grad_norm": 1.5651104145755754, "learning_rate": 9.660883747668033e-06, "loss": 0.7374, "step": 1414 }, { "epoch": 0.14, "grad_norm": 1.3578025933974691, "learning_rate": 9.660284867957637e-06, "loss": 0.6668, "step": 1415 }, { "epoch": 0.14, "grad_norm": 1.4807999972675896, "learning_rate": 9.659685478498183e-06, "loss": 0.736, "step": 1416 }, { "epoch": 0.14, "grad_norm": 1.3390239865903202, "learning_rate": 9.659085579355234e-06, "loss": 0.6375, "step": 1417 }, { "epoch": 0.14, "grad_norm": 1.5331749027336072, "learning_rate": 9.65848517059441e-06, "loss": 0.6417, "step": 1418 }, { "epoch": 0.14, "grad_norm": 1.3528351776742433, "learning_rate": 9.657884252281383e-06, "loss": 0.6217, "step": 1419 }, { "epoch": 0.15, "grad_norm": 1.488510680707841, "learning_rate": 9.657282824481882e-06, "loss": 0.5818, "step": 1420 }, { "epoch": 0.15, "grad_norm": 1.5734721621049839, "learning_rate": 9.656680887261693e-06, "loss": 0.726, "step": 1421 }, { "epoch": 0.15, "grad_norm": 1.5624316611646047, "learning_rate": 9.656078440686655e-06, "loss": 0.6439, "step": 1422 }, { "epoch": 0.15, "grad_norm": 1.8689863016770512, "learning_rate": 9.655475484822668e-06, "loss": 0.7858, "step": 1423 }, { "epoch": 0.15, "grad_norm": 1.3460644708310185, "learning_rate": 9.654872019735681e-06, "loss": 0.6819, "step": 1424 }, { "epoch": 0.15, "grad_norm": 1.4277690116352753, "learning_rate": 9.654268045491704e-06, "loss": 0.6779, "step": 1425 }, { "epoch": 0.15, "grad_norm": 1.3488816378258686, "learning_rate": 9.6536635621568e-06, "loss": 0.6347, "step": 1426 }, { "epoch": 0.15, "grad_norm": 1.6431425446759351, "learning_rate": 9.653058569797088e-06, "loss": 0.7178, "step": 1427 }, { "epoch": 0.15, "grad_norm": 1.423748543265916, "learning_rate": 9.652453068478742e-06, "loss": 0.6274, "step": 1428 }, { "epoch": 0.15, "grad_norm": 1.2895738520012205, "learning_rate": 9.651847058267994e-06, "loss": 0.5693, "step": 1429 }, { "epoch": 0.15, "grad_norm": 1.4072338417259398, "learning_rate": 9.65124053923113e-06, "loss": 0.7424, "step": 1430 }, { "epoch": 0.15, "grad_norm": 1.4583798762520206, "learning_rate": 9.650633511434491e-06, "loss": 0.7796, "step": 1431 }, { "epoch": 0.15, "grad_norm": 1.3413426454906427, "learning_rate": 9.650025974944478e-06, "loss": 0.5465, "step": 1432 }, { "epoch": 0.15, "grad_norm": 1.510482808621102, "learning_rate": 9.649417929827539e-06, "loss": 0.7074, "step": 1433 }, { "epoch": 0.15, "grad_norm": 1.4957987745674741, "learning_rate": 9.648809376150187e-06, "loss": 0.8154, "step": 1434 }, { "epoch": 0.15, "grad_norm": 1.377584099552141, "learning_rate": 9.648200313978986e-06, "loss": 0.7328, "step": 1435 }, { "epoch": 0.15, "grad_norm": 1.4696650302181735, "learning_rate": 9.647590743380555e-06, "loss": 0.6104, "step": 1436 }, { "epoch": 0.15, "grad_norm": 1.3888973974896437, "learning_rate": 9.64698066442157e-06, "loss": 0.6658, "step": 1437 }, { "epoch": 0.15, "grad_norm": 1.30169040177265, "learning_rate": 9.646370077168763e-06, "loss": 0.6364, "step": 1438 }, { "epoch": 0.15, "grad_norm": 1.5033912111385213, "learning_rate": 9.645758981688921e-06, "loss": 0.6959, "step": 1439 }, { "epoch": 0.15, "grad_norm": 1.4359368938669128, "learning_rate": 9.645147378048884e-06, "loss": 0.6957, "step": 1440 }, { "epoch": 0.15, "grad_norm": 1.6495834592149663, "learning_rate": 9.644535266315556e-06, "loss": 0.7991, "step": 1441 }, { "epoch": 0.15, "grad_norm": 1.4019474336393933, "learning_rate": 9.643922646555883e-06, "loss": 0.7217, "step": 1442 }, { "epoch": 0.15, "grad_norm": 1.596710856295514, "learning_rate": 9.64330951883688e-06, "loss": 0.6855, "step": 1443 }, { "epoch": 0.15, "grad_norm": 1.9173984268388269, "learning_rate": 9.64269588322561e-06, "loss": 0.7028, "step": 1444 }, { "epoch": 0.15, "grad_norm": 1.5466994911976115, "learning_rate": 9.642081739789193e-06, "loss": 0.6796, "step": 1445 }, { "epoch": 0.15, "grad_norm": 1.2635128593655178, "learning_rate": 9.641467088594807e-06, "loss": 0.5286, "step": 1446 }, { "epoch": 0.15, "grad_norm": 1.3300235231008248, "learning_rate": 9.64085192970968e-06, "loss": 0.6067, "step": 1447 }, { "epoch": 0.15, "grad_norm": 1.391610163610659, "learning_rate": 9.640236263201102e-06, "loss": 0.7477, "step": 1448 }, { "epoch": 0.15, "grad_norm": 1.5747464090879215, "learning_rate": 9.639620089136414e-06, "loss": 0.6794, "step": 1449 }, { "epoch": 0.15, "grad_norm": 1.4646136558844376, "learning_rate": 9.639003407583013e-06, "loss": 0.7251, "step": 1450 }, { "epoch": 0.15, "grad_norm": 1.513502773346232, "learning_rate": 9.638386218608357e-06, "loss": 0.6834, "step": 1451 }, { "epoch": 0.15, "grad_norm": 1.5419686875318517, "learning_rate": 9.637768522279951e-06, "loss": 0.6258, "step": 1452 }, { "epoch": 0.15, "grad_norm": 2.2383699610375642, "learning_rate": 9.63715031866536e-06, "loss": 0.5923, "step": 1453 }, { "epoch": 0.15, "grad_norm": 1.6146737083888074, "learning_rate": 9.636531607832206e-06, "loss": 0.7182, "step": 1454 }, { "epoch": 0.15, "grad_norm": 1.4628445123142233, "learning_rate": 9.635912389848164e-06, "loss": 0.765, "step": 1455 }, { "epoch": 0.15, "grad_norm": 1.3185916250053948, "learning_rate": 9.635292664780961e-06, "loss": 0.6177, "step": 1456 }, { "epoch": 0.15, "grad_norm": 1.6332831996433905, "learning_rate": 9.634672432698388e-06, "loss": 0.7229, "step": 1457 }, { "epoch": 0.15, "grad_norm": 1.2696452091699464, "learning_rate": 9.634051693668287e-06, "loss": 0.543, "step": 1458 }, { "epoch": 0.15, "grad_norm": 1.4233671342739074, "learning_rate": 9.633430447758554e-06, "loss": 0.6697, "step": 1459 }, { "epoch": 0.15, "grad_norm": 1.3533997046955304, "learning_rate": 9.632808695037141e-06, "loss": 0.6507, "step": 1460 }, { "epoch": 0.15, "grad_norm": 1.4730952473266374, "learning_rate": 9.632186435572057e-06, "loss": 0.6328, "step": 1461 }, { "epoch": 0.15, "grad_norm": 1.591551282746815, "learning_rate": 9.631563669431368e-06, "loss": 0.7021, "step": 1462 }, { "epoch": 0.15, "grad_norm": 1.3648456210816882, "learning_rate": 9.630940396683189e-06, "loss": 0.6732, "step": 1463 }, { "epoch": 0.15, "grad_norm": 1.4371406453337914, "learning_rate": 9.630316617395697e-06, "loss": 0.7258, "step": 1464 }, { "epoch": 0.15, "grad_norm": 1.5825189562439028, "learning_rate": 9.629692331637121e-06, "loss": 0.7612, "step": 1465 }, { "epoch": 0.15, "grad_norm": 1.8138180431602255, "learning_rate": 9.629067539475747e-06, "loss": 0.6631, "step": 1466 }, { "epoch": 0.15, "grad_norm": 1.4698044728743482, "learning_rate": 9.628442240979915e-06, "loss": 0.6452, "step": 1467 }, { "epoch": 0.15, "grad_norm": 1.6020914033411033, "learning_rate": 9.627816436218022e-06, "loss": 0.6741, "step": 1468 }, { "epoch": 0.15, "grad_norm": 1.618799974620149, "learning_rate": 9.62719012525852e-06, "loss": 0.7169, "step": 1469 }, { "epoch": 0.15, "grad_norm": 1.380936751596611, "learning_rate": 9.626563308169915e-06, "loss": 0.6831, "step": 1470 }, { "epoch": 0.15, "grad_norm": 1.3191966970418756, "learning_rate": 9.625935985020767e-06, "loss": 0.6015, "step": 1471 }, { "epoch": 0.15, "grad_norm": 1.3695738375790267, "learning_rate": 9.625308155879696e-06, "loss": 0.5855, "step": 1472 }, { "epoch": 0.15, "grad_norm": 1.384522361210549, "learning_rate": 9.624679820815377e-06, "loss": 0.6447, "step": 1473 }, { "epoch": 0.15, "grad_norm": 1.3695319280437601, "learning_rate": 9.624050979896533e-06, "loss": 0.6129, "step": 1474 }, { "epoch": 0.15, "grad_norm": 1.5591016502879882, "learning_rate": 9.623421633191952e-06, "loss": 0.738, "step": 1475 }, { "epoch": 0.15, "grad_norm": 1.246912072477499, "learning_rate": 9.62279178077047e-06, "loss": 0.6312, "step": 1476 }, { "epoch": 0.15, "grad_norm": 1.3896212736909361, "learning_rate": 9.622161422700983e-06, "loss": 0.7211, "step": 1477 }, { "epoch": 0.15, "grad_norm": 1.484899538550859, "learning_rate": 9.621530559052441e-06, "loss": 0.6351, "step": 1478 }, { "epoch": 0.15, "grad_norm": 1.4406631223603847, "learning_rate": 9.620899189893848e-06, "loss": 0.7208, "step": 1479 }, { "epoch": 0.15, "grad_norm": 1.5867604307175804, "learning_rate": 9.620267315294262e-06, "loss": 0.685, "step": 1480 }, { "epoch": 0.15, "grad_norm": 1.6179792392078578, "learning_rate": 9.619634935322803e-06, "loss": 0.7421, "step": 1481 }, { "epoch": 0.15, "grad_norm": 1.428860671872939, "learning_rate": 9.619002050048638e-06, "loss": 0.7065, "step": 1482 }, { "epoch": 0.15, "grad_norm": 1.6721469792035584, "learning_rate": 9.618368659540993e-06, "loss": 0.6665, "step": 1483 }, { "epoch": 0.15, "grad_norm": 1.5504371332382072, "learning_rate": 9.617734763869151e-06, "loss": 0.7195, "step": 1484 }, { "epoch": 0.15, "grad_norm": 1.5473725621741468, "learning_rate": 9.617100363102448e-06, "loss": 0.7243, "step": 1485 }, { "epoch": 0.15, "grad_norm": 1.6343162130796804, "learning_rate": 9.616465457310275e-06, "loss": 0.6254, "step": 1486 }, { "epoch": 0.15, "grad_norm": 1.6368790793883046, "learning_rate": 9.61583004656208e-06, "loss": 0.6967, "step": 1487 }, { "epoch": 0.15, "grad_norm": 1.454357984995009, "learning_rate": 9.615194130927363e-06, "loss": 0.6285, "step": 1488 }, { "epoch": 0.15, "grad_norm": 1.5590248563310252, "learning_rate": 9.614557710475685e-06, "loss": 0.6979, "step": 1489 }, { "epoch": 0.15, "grad_norm": 1.4251837785678463, "learning_rate": 9.613920785276655e-06, "loss": 0.7278, "step": 1490 }, { "epoch": 0.15, "grad_norm": 1.7633262901889806, "learning_rate": 9.613283355399944e-06, "loss": 0.747, "step": 1491 }, { "epoch": 0.15, "grad_norm": 1.3098886433002235, "learning_rate": 9.612645420915273e-06, "loss": 0.6166, "step": 1492 }, { "epoch": 0.15, "grad_norm": 1.5430984524492324, "learning_rate": 9.61200698189242e-06, "loss": 0.7392, "step": 1493 }, { "epoch": 0.15, "grad_norm": 1.366260901562195, "learning_rate": 9.611368038401221e-06, "loss": 0.6534, "step": 1494 }, { "epoch": 0.15, "grad_norm": 1.644070999123335, "learning_rate": 9.610728590511563e-06, "loss": 0.7069, "step": 1495 }, { "epoch": 0.15, "grad_norm": 1.681596997774252, "learning_rate": 9.610088638293387e-06, "loss": 0.7245, "step": 1496 }, { "epoch": 0.15, "grad_norm": 1.6667215859660491, "learning_rate": 9.609448181816696e-06, "loss": 0.6478, "step": 1497 }, { "epoch": 0.15, "grad_norm": 1.589376662579165, "learning_rate": 9.608807221151544e-06, "loss": 0.7307, "step": 1498 }, { "epoch": 0.15, "grad_norm": 1.4191657084704166, "learning_rate": 9.608165756368036e-06, "loss": 0.6817, "step": 1499 }, { "epoch": 0.15, "grad_norm": 1.4103605077805894, "learning_rate": 9.607523787536342e-06, "loss": 0.6921, "step": 1500 }, { "epoch": 0.15, "grad_norm": 1.4075090135185602, "learning_rate": 9.606881314726676e-06, "loss": 0.6386, "step": 1501 }, { "epoch": 0.15, "grad_norm": 1.3465807987172482, "learning_rate": 9.60623833800932e-06, "loss": 0.7166, "step": 1502 }, { "epoch": 0.15, "grad_norm": 1.5408780088913776, "learning_rate": 9.605594857454595e-06, "loss": 0.6535, "step": 1503 }, { "epoch": 0.15, "grad_norm": 1.4806779554958098, "learning_rate": 9.604950873132892e-06, "loss": 0.679, "step": 1504 }, { "epoch": 0.15, "grad_norm": 1.4234026805513744, "learning_rate": 9.60430638511465e-06, "loss": 0.7764, "step": 1505 }, { "epoch": 0.15, "grad_norm": 1.4310941157954817, "learning_rate": 9.603661393470361e-06, "loss": 0.7272, "step": 1506 }, { "epoch": 0.15, "grad_norm": 1.5218625386795175, "learning_rate": 9.60301589827058e-06, "loss": 0.6804, "step": 1507 }, { "epoch": 0.15, "grad_norm": 1.5862797870710539, "learning_rate": 9.602369899585909e-06, "loss": 0.7274, "step": 1508 }, { "epoch": 0.15, "grad_norm": 1.2580726991311957, "learning_rate": 9.601723397487007e-06, "loss": 0.6497, "step": 1509 }, { "epoch": 0.15, "grad_norm": 1.528109345736152, "learning_rate": 9.601076392044593e-06, "loss": 0.6308, "step": 1510 }, { "epoch": 0.15, "grad_norm": 1.5363168628806365, "learning_rate": 9.600428883329436e-06, "loss": 0.7263, "step": 1511 }, { "epoch": 0.15, "grad_norm": 1.5266320687848185, "learning_rate": 9.599780871412359e-06, "loss": 0.7146, "step": 1512 }, { "epoch": 0.15, "grad_norm": 1.8033088170961553, "learning_rate": 9.599132356364247e-06, "loss": 0.6226, "step": 1513 }, { "epoch": 0.15, "grad_norm": 1.5326877434197945, "learning_rate": 9.598483338256033e-06, "loss": 0.6927, "step": 1514 }, { "epoch": 0.15, "grad_norm": 1.4513607613670128, "learning_rate": 9.597833817158709e-06, "loss": 0.632, "step": 1515 }, { "epoch": 0.15, "grad_norm": 1.3711716536897667, "learning_rate": 9.597183793143319e-06, "loss": 0.5665, "step": 1516 }, { "epoch": 0.15, "grad_norm": 1.473909160877686, "learning_rate": 9.596533266280962e-06, "loss": 0.6562, "step": 1517 }, { "epoch": 0.16, "grad_norm": 1.357632188870578, "learning_rate": 9.595882236642797e-06, "loss": 0.6708, "step": 1518 }, { "epoch": 0.16, "grad_norm": 1.438087734296727, "learning_rate": 9.595230704300035e-06, "loss": 0.6201, "step": 1519 }, { "epoch": 0.16, "grad_norm": 3.132390172071574, "learning_rate": 9.59457866932394e-06, "loss": 0.6747, "step": 1520 }, { "epoch": 0.16, "grad_norm": 1.5472163137080857, "learning_rate": 9.593926131785832e-06, "loss": 0.7064, "step": 1521 }, { "epoch": 0.16, "grad_norm": 1.340509224892641, "learning_rate": 9.593273091757085e-06, "loss": 0.6107, "step": 1522 }, { "epoch": 0.16, "grad_norm": 1.387685079839363, "learning_rate": 9.592619549309134e-06, "loss": 0.6988, "step": 1523 }, { "epoch": 0.16, "grad_norm": 1.5588511015348792, "learning_rate": 9.59196550451346e-06, "loss": 0.6902, "step": 1524 }, { "epoch": 0.16, "grad_norm": 1.6404787007754897, "learning_rate": 9.591310957441608e-06, "loss": 0.7658, "step": 1525 }, { "epoch": 0.16, "grad_norm": 1.3612412610474582, "learning_rate": 9.590655908165171e-06, "loss": 0.6114, "step": 1526 }, { "epoch": 0.16, "grad_norm": 1.4437441285401336, "learning_rate": 9.590000356755798e-06, "loss": 0.6812, "step": 1527 }, { "epoch": 0.16, "grad_norm": 1.335250516216802, "learning_rate": 9.589344303285195e-06, "loss": 0.6186, "step": 1528 }, { "epoch": 0.16, "grad_norm": 1.358435971740354, "learning_rate": 9.588687747825123e-06, "loss": 0.6249, "step": 1529 }, { "epoch": 0.16, "grad_norm": 1.5140445593858904, "learning_rate": 9.588030690447398e-06, "loss": 0.6048, "step": 1530 }, { "epoch": 0.16, "grad_norm": 1.542812325771578, "learning_rate": 9.587373131223888e-06, "loss": 0.7192, "step": 1531 }, { "epoch": 0.16, "grad_norm": 1.47886491063619, "learning_rate": 9.586715070226516e-06, "loss": 0.7338, "step": 1532 }, { "epoch": 0.16, "grad_norm": 1.4474796054572896, "learning_rate": 9.586056507527266e-06, "loss": 0.6419, "step": 1533 }, { "epoch": 0.16, "grad_norm": 1.4615528933771689, "learning_rate": 9.58539744319817e-06, "loss": 0.6826, "step": 1534 }, { "epoch": 0.16, "grad_norm": 1.391848406065365, "learning_rate": 9.584737877311318e-06, "loss": 0.7826, "step": 1535 }, { "epoch": 0.16, "grad_norm": 1.3808486309987018, "learning_rate": 9.584077809938856e-06, "loss": 0.67, "step": 1536 }, { "epoch": 0.16, "grad_norm": 1.451191553950156, "learning_rate": 9.58341724115298e-06, "loss": 0.7397, "step": 1537 }, { "epoch": 0.16, "grad_norm": 1.3006835952286737, "learning_rate": 9.582756171025946e-06, "loss": 0.6809, "step": 1538 }, { "epoch": 0.16, "grad_norm": 1.4417984531810426, "learning_rate": 9.582094599630063e-06, "loss": 0.7134, "step": 1539 }, { "epoch": 0.16, "grad_norm": 1.6346764723299112, "learning_rate": 9.581432527037693e-06, "loss": 0.7054, "step": 1540 }, { "epoch": 0.16, "grad_norm": 1.3093058352071723, "learning_rate": 9.580769953321255e-06, "loss": 0.6405, "step": 1541 }, { "epoch": 0.16, "grad_norm": 1.3152849357506609, "learning_rate": 9.580106878553227e-06, "loss": 0.657, "step": 1542 }, { "epoch": 0.16, "grad_norm": 1.4818471617140818, "learning_rate": 9.57944330280613e-06, "loss": 0.702, "step": 1543 }, { "epoch": 0.16, "grad_norm": 1.719938185075544, "learning_rate": 9.57877922615255e-06, "loss": 0.7715, "step": 1544 }, { "epoch": 0.16, "grad_norm": 1.571042408808525, "learning_rate": 9.578114648665126e-06, "loss": 0.6349, "step": 1545 }, { "epoch": 0.16, "grad_norm": 1.4982235236521535, "learning_rate": 9.577449570416549e-06, "loss": 0.6493, "step": 1546 }, { "epoch": 0.16, "grad_norm": 1.4557190800344633, "learning_rate": 9.576783991479566e-06, "loss": 0.7378, "step": 1547 }, { "epoch": 0.16, "grad_norm": 1.498835557477676, "learning_rate": 9.576117911926978e-06, "loss": 0.7332, "step": 1548 }, { "epoch": 0.16, "grad_norm": 1.4838667132919754, "learning_rate": 9.575451331831646e-06, "loss": 0.6501, "step": 1549 }, { "epoch": 0.16, "grad_norm": 1.408877958755017, "learning_rate": 9.574784251266477e-06, "loss": 0.6144, "step": 1550 }, { "epoch": 0.16, "grad_norm": 1.4052099242368024, "learning_rate": 9.574116670304441e-06, "loss": 0.6842, "step": 1551 }, { "epoch": 0.16, "grad_norm": 1.3812007104611597, "learning_rate": 9.573448589018556e-06, "loss": 0.5827, "step": 1552 }, { "epoch": 0.16, "grad_norm": 1.373959602687861, "learning_rate": 9.572780007481899e-06, "loss": 0.6656, "step": 1553 }, { "epoch": 0.16, "grad_norm": 1.5246478944771633, "learning_rate": 9.5721109257676e-06, "loss": 0.6938, "step": 1554 }, { "epoch": 0.16, "grad_norm": 1.506393283401899, "learning_rate": 9.571441343948848e-06, "loss": 0.7488, "step": 1555 }, { "epoch": 0.16, "grad_norm": 1.5103834808203875, "learning_rate": 9.570771262098878e-06, "loss": 0.7894, "step": 1556 }, { "epoch": 0.16, "grad_norm": 1.6352439622870119, "learning_rate": 9.570100680290983e-06, "loss": 0.7083, "step": 1557 }, { "epoch": 0.16, "grad_norm": 1.2725983292009073, "learning_rate": 9.569429598598517e-06, "loss": 0.7497, "step": 1558 }, { "epoch": 0.16, "grad_norm": 1.7951892606998798, "learning_rate": 9.568758017094884e-06, "loss": 0.8169, "step": 1559 }, { "epoch": 0.16, "grad_norm": 1.5428596003409887, "learning_rate": 9.56808593585354e-06, "loss": 0.664, "step": 1560 }, { "epoch": 0.16, "grad_norm": 1.4544473044947233, "learning_rate": 9.567413354947998e-06, "loss": 0.7119, "step": 1561 }, { "epoch": 0.16, "grad_norm": 1.5801178637337885, "learning_rate": 9.566740274451826e-06, "loss": 0.5645, "step": 1562 }, { "epoch": 0.16, "grad_norm": 1.4010297202076905, "learning_rate": 9.56606669443865e-06, "loss": 0.6563, "step": 1563 }, { "epoch": 0.16, "grad_norm": 1.2447315385248137, "learning_rate": 9.565392614982145e-06, "loss": 0.6323, "step": 1564 }, { "epoch": 0.16, "grad_norm": 1.493048995791234, "learning_rate": 9.564718036156043e-06, "loss": 0.7352, "step": 1565 }, { "epoch": 0.16, "grad_norm": 1.2963547631028147, "learning_rate": 9.564042958034128e-06, "loss": 0.5655, "step": 1566 }, { "epoch": 0.16, "grad_norm": 1.4820408227666333, "learning_rate": 9.563367380690244e-06, "loss": 0.7534, "step": 1567 }, { "epoch": 0.16, "grad_norm": 1.7020138749805864, "learning_rate": 9.562691304198286e-06, "loss": 0.6185, "step": 1568 }, { "epoch": 0.16, "grad_norm": 1.6068178043409806, "learning_rate": 9.562014728632204e-06, "loss": 0.6835, "step": 1569 }, { "epoch": 0.16, "grad_norm": 1.3617465019605763, "learning_rate": 9.561337654066003e-06, "loss": 0.6965, "step": 1570 }, { "epoch": 0.16, "grad_norm": 1.5671358380918343, "learning_rate": 9.560660080573742e-06, "loss": 0.6727, "step": 1571 }, { "epoch": 0.16, "grad_norm": 1.5851810186026196, "learning_rate": 9.559982008229537e-06, "loss": 0.6199, "step": 1572 }, { "epoch": 0.16, "grad_norm": 1.42929796750644, "learning_rate": 9.559303437107554e-06, "loss": 0.6557, "step": 1573 }, { "epoch": 0.16, "grad_norm": 1.4541945006863342, "learning_rate": 9.558624367282018e-06, "loss": 0.7126, "step": 1574 }, { "epoch": 0.16, "grad_norm": 1.3371458186066096, "learning_rate": 9.557944798827205e-06, "loss": 0.6033, "step": 1575 }, { "epoch": 0.16, "grad_norm": 1.5971634133505295, "learning_rate": 9.557264731817451e-06, "loss": 0.6972, "step": 1576 }, { "epoch": 0.16, "grad_norm": 1.4590599472939012, "learning_rate": 9.556584166327136e-06, "loss": 0.6618, "step": 1577 }, { "epoch": 0.16, "grad_norm": 1.3713428641895076, "learning_rate": 9.555903102430708e-06, "loss": 0.6258, "step": 1578 }, { "epoch": 0.16, "grad_norm": 1.5554745758985775, "learning_rate": 9.555221540202659e-06, "loss": 0.6263, "step": 1579 }, { "epoch": 0.16, "grad_norm": 1.3344530704346602, "learning_rate": 9.55453947971754e-06, "loss": 0.8261, "step": 1580 }, { "epoch": 0.16, "grad_norm": 1.648205029567865, "learning_rate": 9.553856921049955e-06, "loss": 0.6581, "step": 1581 }, { "epoch": 0.16, "grad_norm": 1.5304360857123083, "learning_rate": 9.553173864274567e-06, "loss": 0.6981, "step": 1582 }, { "epoch": 0.16, "grad_norm": 1.562372003559339, "learning_rate": 9.552490309466086e-06, "loss": 0.7604, "step": 1583 }, { "epoch": 0.16, "grad_norm": 1.472440110675299, "learning_rate": 9.551806256699281e-06, "loss": 0.7114, "step": 1584 }, { "epoch": 0.16, "grad_norm": 1.396797109698029, "learning_rate": 9.551121706048975e-06, "loss": 0.8255, "step": 1585 }, { "epoch": 0.16, "grad_norm": 1.6133874604545033, "learning_rate": 9.550436657590047e-06, "loss": 0.6086, "step": 1586 }, { "epoch": 0.16, "grad_norm": 1.4836507100506104, "learning_rate": 9.549751111397427e-06, "loss": 0.7496, "step": 1587 }, { "epoch": 0.16, "grad_norm": 1.5275757512063057, "learning_rate": 9.549065067546101e-06, "loss": 0.7458, "step": 1588 }, { "epoch": 0.16, "grad_norm": 1.4137008765621732, "learning_rate": 9.548378526111108e-06, "loss": 0.6048, "step": 1589 }, { "epoch": 0.16, "grad_norm": 1.2726381457847646, "learning_rate": 9.547691487167546e-06, "loss": 0.5708, "step": 1590 }, { "epoch": 0.16, "grad_norm": 1.6861524986184642, "learning_rate": 9.547003950790563e-06, "loss": 0.698, "step": 1591 }, { "epoch": 0.16, "grad_norm": 1.5068505503898013, "learning_rate": 9.546315917055362e-06, "loss": 0.6543, "step": 1592 }, { "epoch": 0.16, "grad_norm": 1.5016374998240887, "learning_rate": 9.545627386037203e-06, "loss": 0.7354, "step": 1593 }, { "epoch": 0.16, "grad_norm": 1.2891953346130607, "learning_rate": 9.544938357811397e-06, "loss": 0.7938, "step": 1594 }, { "epoch": 0.16, "grad_norm": 1.3165071556758978, "learning_rate": 9.544248832453312e-06, "loss": 0.5835, "step": 1595 }, { "epoch": 0.16, "grad_norm": 1.315289252963215, "learning_rate": 9.54355881003837e-06, "loss": 0.696, "step": 1596 }, { "epoch": 0.16, "grad_norm": 1.6784296831875216, "learning_rate": 9.542868290642043e-06, "loss": 0.6462, "step": 1597 }, { "epoch": 0.16, "grad_norm": 1.4519343487469687, "learning_rate": 9.542177274339864e-06, "loss": 0.6696, "step": 1598 }, { "epoch": 0.16, "grad_norm": 1.5683012008940351, "learning_rate": 9.541485761207418e-06, "loss": 0.6462, "step": 1599 }, { "epoch": 0.16, "grad_norm": 1.395201049462234, "learning_rate": 9.540793751320341e-06, "loss": 0.5788, "step": 1600 }, { "epoch": 0.16, "grad_norm": 1.4772815638969767, "learning_rate": 9.540101244754329e-06, "loss": 0.7355, "step": 1601 }, { "epoch": 0.16, "grad_norm": 1.5019052449179258, "learning_rate": 9.539408241585129e-06, "loss": 0.674, "step": 1602 }, { "epoch": 0.16, "grad_norm": 1.361952496888972, "learning_rate": 9.538714741888542e-06, "loss": 0.6465, "step": 1603 }, { "epoch": 0.16, "grad_norm": 1.708498549209986, "learning_rate": 9.538020745740425e-06, "loss": 0.7775, "step": 1604 }, { "epoch": 0.16, "grad_norm": 1.6305325609816383, "learning_rate": 9.537326253216685e-06, "loss": 0.7275, "step": 1605 }, { "epoch": 0.16, "grad_norm": 1.5482808949190132, "learning_rate": 9.536631264393289e-06, "loss": 0.7101, "step": 1606 }, { "epoch": 0.16, "grad_norm": 1.496886127960313, "learning_rate": 9.535935779346257e-06, "loss": 0.7418, "step": 1607 }, { "epoch": 0.16, "grad_norm": 1.333824956951945, "learning_rate": 9.535239798151662e-06, "loss": 0.6962, "step": 1608 }, { "epoch": 0.16, "grad_norm": 1.4636959267980236, "learning_rate": 9.53454332088563e-06, "loss": 0.7323, "step": 1609 }, { "epoch": 0.16, "grad_norm": 1.3971988683373153, "learning_rate": 9.533846347624342e-06, "loss": 0.7051, "step": 1610 }, { "epoch": 0.16, "grad_norm": 1.3929423520901576, "learning_rate": 9.533148878444037e-06, "loss": 0.5738, "step": 1611 }, { "epoch": 0.16, "grad_norm": 1.5501957020057526, "learning_rate": 9.532450913421005e-06, "loss": 0.704, "step": 1612 }, { "epoch": 0.16, "grad_norm": 1.45852758141409, "learning_rate": 9.531752452631586e-06, "loss": 0.7082, "step": 1613 }, { "epoch": 0.16, "grad_norm": 1.7093639658296966, "learning_rate": 9.531053496152186e-06, "loss": 0.7086, "step": 1614 }, { "epoch": 0.16, "grad_norm": 1.6414423861868286, "learning_rate": 9.53035404405925e-06, "loss": 0.8041, "step": 1615 }, { "epoch": 0.17, "grad_norm": 1.410611440273584, "learning_rate": 9.52965409642929e-06, "loss": 0.618, "step": 1616 }, { "epoch": 0.17, "grad_norm": 1.543111509373193, "learning_rate": 9.528953653338867e-06, "loss": 0.5755, "step": 1617 }, { "epoch": 0.17, "grad_norm": 1.5511918901828483, "learning_rate": 9.528252714864595e-06, "loss": 0.6249, "step": 1618 }, { "epoch": 0.17, "grad_norm": 1.6876251347805522, "learning_rate": 9.527551281083145e-06, "loss": 0.7767, "step": 1619 }, { "epoch": 0.17, "grad_norm": 1.5153511555897174, "learning_rate": 9.526849352071241e-06, "loss": 0.7052, "step": 1620 }, { "epoch": 0.17, "grad_norm": 1.5026706192325665, "learning_rate": 9.52614692790566e-06, "loss": 0.7145, "step": 1621 }, { "epoch": 0.17, "grad_norm": 1.2348152065296556, "learning_rate": 9.525444008663233e-06, "loss": 0.6721, "step": 1622 }, { "epoch": 0.17, "grad_norm": 1.5761225396202383, "learning_rate": 9.524740594420851e-06, "loss": 0.6572, "step": 1623 }, { "epoch": 0.17, "grad_norm": 1.43783279229503, "learning_rate": 9.52403668525545e-06, "loss": 0.7295, "step": 1624 }, { "epoch": 0.17, "grad_norm": 1.4126900077762568, "learning_rate": 9.523332281244027e-06, "loss": 0.5935, "step": 1625 }, { "epoch": 0.17, "grad_norm": 1.2938176453009855, "learning_rate": 9.522627382463629e-06, "loss": 0.6331, "step": 1626 }, { "epoch": 0.17, "grad_norm": 1.3207110363440822, "learning_rate": 9.52192198899136e-06, "loss": 0.646, "step": 1627 }, { "epoch": 0.17, "grad_norm": 1.5082567767428459, "learning_rate": 9.521216100904379e-06, "loss": 0.7104, "step": 1628 }, { "epoch": 0.17, "grad_norm": 1.6029530709834787, "learning_rate": 9.520509718279891e-06, "loss": 0.6876, "step": 1629 }, { "epoch": 0.17, "grad_norm": 1.5098569848846615, "learning_rate": 9.51980284119517e-06, "loss": 0.55, "step": 1630 }, { "epoch": 0.17, "grad_norm": 1.4637678709801538, "learning_rate": 9.519095469727528e-06, "loss": 0.7208, "step": 1631 }, { "epoch": 0.17, "grad_norm": 1.5875336183710087, "learning_rate": 9.51838760395434e-06, "loss": 0.6577, "step": 1632 }, { "epoch": 0.17, "grad_norm": 1.5102202801737592, "learning_rate": 9.517679243953037e-06, "loss": 0.6514, "step": 1633 }, { "epoch": 0.17, "grad_norm": 1.742699699559081, "learning_rate": 9.516970389801096e-06, "loss": 0.7217, "step": 1634 }, { "epoch": 0.17, "grad_norm": 1.4324585450840395, "learning_rate": 9.516261041576056e-06, "loss": 0.6642, "step": 1635 }, { "epoch": 0.17, "grad_norm": 1.5258876292336154, "learning_rate": 9.515551199355504e-06, "loss": 0.6243, "step": 1636 }, { "epoch": 0.17, "grad_norm": 1.6055781393082964, "learning_rate": 9.514840863217084e-06, "loss": 0.6708, "step": 1637 }, { "epoch": 0.17, "grad_norm": 1.5300484721737155, "learning_rate": 9.514130033238494e-06, "loss": 0.6059, "step": 1638 }, { "epoch": 0.17, "grad_norm": 1.430795800971349, "learning_rate": 9.513418709497489e-06, "loss": 0.5891, "step": 1639 }, { "epoch": 0.17, "grad_norm": 1.580921873042615, "learning_rate": 9.512706892071869e-06, "loss": 0.7967, "step": 1640 }, { "epoch": 0.17, "grad_norm": 1.3658838811651157, "learning_rate": 9.511994581039496e-06, "loss": 0.7197, "step": 1641 }, { "epoch": 0.17, "grad_norm": 1.5118580745778194, "learning_rate": 9.511281776478285e-06, "loss": 0.7111, "step": 1642 }, { "epoch": 0.17, "grad_norm": 1.368157778545191, "learning_rate": 9.510568478466202e-06, "loss": 0.5243, "step": 1643 }, { "epoch": 0.17, "grad_norm": 1.6555200727914618, "learning_rate": 9.509854687081271e-06, "loss": 0.7996, "step": 1644 }, { "epoch": 0.17, "grad_norm": 1.4551568090205904, "learning_rate": 9.509140402401563e-06, "loss": 0.6453, "step": 1645 }, { "epoch": 0.17, "grad_norm": 1.4836448889136906, "learning_rate": 9.508425624505212e-06, "loss": 0.5978, "step": 1646 }, { "epoch": 0.17, "grad_norm": 1.5925007250375587, "learning_rate": 9.507710353470399e-06, "loss": 0.7738, "step": 1647 }, { "epoch": 0.17, "grad_norm": 1.6322026738632658, "learning_rate": 9.506994589375363e-06, "loss": 0.5878, "step": 1648 }, { "epoch": 0.17, "grad_norm": 1.4785226533261897, "learning_rate": 9.506278332298395e-06, "loss": 0.711, "step": 1649 }, { "epoch": 0.17, "grad_norm": 1.435193395428221, "learning_rate": 9.505561582317839e-06, "loss": 0.7103, "step": 1650 }, { "epoch": 0.17, "grad_norm": 1.5014914545269806, "learning_rate": 9.504844339512096e-06, "loss": 0.7306, "step": 1651 }, { "epoch": 0.17, "grad_norm": 1.4151507265102012, "learning_rate": 9.504126603959618e-06, "loss": 0.6462, "step": 1652 }, { "epoch": 0.17, "grad_norm": 1.6141989301540927, "learning_rate": 9.503408375738913e-06, "loss": 0.6605, "step": 1653 }, { "epoch": 0.17, "grad_norm": 1.2853182232366571, "learning_rate": 9.502689654928541e-06, "loss": 0.7224, "step": 1654 }, { "epoch": 0.17, "grad_norm": 1.4686661219572088, "learning_rate": 9.501970441607116e-06, "loss": 0.583, "step": 1655 }, { "epoch": 0.17, "grad_norm": 1.4634934246856632, "learning_rate": 9.501250735853307e-06, "loss": 0.7708, "step": 1656 }, { "epoch": 0.17, "grad_norm": 1.3612490348602306, "learning_rate": 9.500530537745838e-06, "loss": 0.6657, "step": 1657 }, { "epoch": 0.17, "grad_norm": 1.517105544830488, "learning_rate": 9.499809847363486e-06, "loss": 0.6398, "step": 1658 }, { "epoch": 0.17, "grad_norm": 1.4026169605192231, "learning_rate": 9.499088664785078e-06, "loss": 0.6305, "step": 1659 }, { "epoch": 0.17, "grad_norm": 1.59277863388078, "learning_rate": 9.498366990089501e-06, "loss": 0.7233, "step": 1660 }, { "epoch": 0.17, "grad_norm": 1.4612262904823001, "learning_rate": 9.49764482335569e-06, "loss": 0.6191, "step": 1661 }, { "epoch": 0.17, "grad_norm": 1.4744547010084748, "learning_rate": 9.496922164662638e-06, "loss": 0.7614, "step": 1662 }, { "epoch": 0.17, "grad_norm": 1.618107160013442, "learning_rate": 9.496199014089391e-06, "loss": 0.7521, "step": 1663 }, { "epoch": 0.17, "grad_norm": 1.391227082141835, "learning_rate": 9.495475371715051e-06, "loss": 0.5692, "step": 1664 }, { "epoch": 0.17, "grad_norm": 1.3273451812290873, "learning_rate": 9.494751237618765e-06, "loss": 0.6552, "step": 1665 }, { "epoch": 0.17, "grad_norm": 1.564120893012405, "learning_rate": 9.494026611879744e-06, "loss": 0.6468, "step": 1666 }, { "epoch": 0.17, "grad_norm": 1.4305418541456534, "learning_rate": 9.493301494577248e-06, "loss": 0.6439, "step": 1667 }, { "epoch": 0.17, "grad_norm": 1.61309559328116, "learning_rate": 9.492575885790591e-06, "loss": 0.7335, "step": 1668 }, { "epoch": 0.17, "grad_norm": 1.416101643538599, "learning_rate": 9.491849785599142e-06, "loss": 0.6126, "step": 1669 }, { "epoch": 0.17, "grad_norm": 1.3304850000112909, "learning_rate": 9.491123194082322e-06, "loss": 0.6468, "step": 1670 }, { "epoch": 0.17, "grad_norm": 1.2825086849043295, "learning_rate": 9.490396111319608e-06, "loss": 0.6855, "step": 1671 }, { "epoch": 0.17, "grad_norm": 1.4010011636605755, "learning_rate": 9.489668537390527e-06, "loss": 0.6731, "step": 1672 }, { "epoch": 0.17, "grad_norm": 1.621760361140951, "learning_rate": 9.488940472374663e-06, "loss": 0.7062, "step": 1673 }, { "epoch": 0.17, "grad_norm": 1.4555525098211213, "learning_rate": 9.488211916351656e-06, "loss": 0.6928, "step": 1674 }, { "epoch": 0.17, "grad_norm": 1.4446947362022275, "learning_rate": 9.487482869401193e-06, "loss": 0.7434, "step": 1675 }, { "epoch": 0.17, "grad_norm": 1.5075750343948302, "learning_rate": 9.486753331603018e-06, "loss": 0.6858, "step": 1676 }, { "epoch": 0.17, "grad_norm": 1.4037802338331826, "learning_rate": 9.486023303036931e-06, "loss": 0.681, "step": 1677 }, { "epoch": 0.17, "grad_norm": 1.6449340617542183, "learning_rate": 9.485292783782785e-06, "loss": 0.6504, "step": 1678 }, { "epoch": 0.17, "grad_norm": 1.352509794096973, "learning_rate": 9.484561773920479e-06, "loss": 0.6019, "step": 1679 }, { "epoch": 0.17, "grad_norm": 1.4870709277648944, "learning_rate": 9.48383027352998e-06, "loss": 0.667, "step": 1680 }, { "epoch": 0.17, "grad_norm": 1.7188555048510545, "learning_rate": 9.483098282691297e-06, "loss": 0.6639, "step": 1681 }, { "epoch": 0.17, "grad_norm": 1.5244513565258115, "learning_rate": 9.482365801484493e-06, "loss": 0.674, "step": 1682 }, { "epoch": 0.17, "grad_norm": 1.568999967207152, "learning_rate": 9.481632829989692e-06, "loss": 0.7214, "step": 1683 }, { "epoch": 0.17, "grad_norm": 1.5586246838669555, "learning_rate": 9.480899368287067e-06, "loss": 0.706, "step": 1684 }, { "epoch": 0.17, "grad_norm": 1.3713739743748008, "learning_rate": 9.480165416456846e-06, "loss": 0.7169, "step": 1685 }, { "epoch": 0.17, "grad_norm": 1.5598774269719378, "learning_rate": 9.479430974579307e-06, "loss": 0.6824, "step": 1686 }, { "epoch": 0.17, "grad_norm": 1.4684049437380562, "learning_rate": 9.478696042734784e-06, "loss": 0.7285, "step": 1687 }, { "epoch": 0.17, "grad_norm": 1.8118797127608164, "learning_rate": 9.47796062100367e-06, "loss": 0.5826, "step": 1688 }, { "epoch": 0.17, "grad_norm": 1.4933180016085976, "learning_rate": 9.477224709466403e-06, "loss": 0.7188, "step": 1689 }, { "epoch": 0.17, "grad_norm": 1.3685340560402763, "learning_rate": 9.476488308203477e-06, "loss": 0.6569, "step": 1690 }, { "epoch": 0.17, "grad_norm": 1.2594914292480852, "learning_rate": 9.475751417295443e-06, "loss": 0.614, "step": 1691 }, { "epoch": 0.17, "grad_norm": 1.433443751347294, "learning_rate": 9.475014036822902e-06, "loss": 0.5764, "step": 1692 }, { "epoch": 0.17, "grad_norm": 1.484586317276886, "learning_rate": 9.47427616686651e-06, "loss": 0.7329, "step": 1693 }, { "epoch": 0.17, "grad_norm": 1.4875288111877898, "learning_rate": 9.473537807506978e-06, "loss": 0.717, "step": 1694 }, { "epoch": 0.17, "grad_norm": 1.6118479265446213, "learning_rate": 9.472798958825067e-06, "loss": 0.6301, "step": 1695 }, { "epoch": 0.17, "grad_norm": 1.5511453747221036, "learning_rate": 9.472059620901593e-06, "loss": 0.6686, "step": 1696 }, { "epoch": 0.17, "grad_norm": 1.5022356213572938, "learning_rate": 9.471319793817427e-06, "loss": 0.6745, "step": 1697 }, { "epoch": 0.17, "grad_norm": 1.531800036128653, "learning_rate": 9.470579477653491e-06, "loss": 0.6784, "step": 1698 }, { "epoch": 0.17, "grad_norm": 1.4624213814651454, "learning_rate": 9.469838672490764e-06, "loss": 0.6685, "step": 1699 }, { "epoch": 0.17, "grad_norm": 1.6396544832954385, "learning_rate": 9.469097378410276e-06, "loss": 0.6696, "step": 1700 }, { "epoch": 0.17, "grad_norm": 1.3334397493130765, "learning_rate": 9.46835559549311e-06, "loss": 0.6668, "step": 1701 }, { "epoch": 0.17, "grad_norm": 1.6260036862011205, "learning_rate": 9.467613323820404e-06, "loss": 0.715, "step": 1702 }, { "epoch": 0.17, "grad_norm": 1.5633795324154858, "learning_rate": 9.466870563473347e-06, "loss": 0.7874, "step": 1703 }, { "epoch": 0.17, "grad_norm": 1.5112243278513326, "learning_rate": 9.466127314533187e-06, "loss": 0.6481, "step": 1704 }, { "epoch": 0.17, "grad_norm": 1.469236406263723, "learning_rate": 9.465383577081217e-06, "loss": 0.6674, "step": 1705 }, { "epoch": 0.17, "grad_norm": 1.4463848808079671, "learning_rate": 9.464639351198792e-06, "loss": 0.6836, "step": 1706 }, { "epoch": 0.17, "grad_norm": 1.5253841731948372, "learning_rate": 9.463894636967314e-06, "loss": 0.7181, "step": 1707 }, { "epoch": 0.17, "grad_norm": 1.4750559509191434, "learning_rate": 9.463149434468244e-06, "loss": 0.7269, "step": 1708 }, { "epoch": 0.17, "grad_norm": 1.5658870068828061, "learning_rate": 9.46240374378309e-06, "loss": 0.6579, "step": 1709 }, { "epoch": 0.17, "grad_norm": 1.5422243414349912, "learning_rate": 9.461657564993419e-06, "loss": 0.6335, "step": 1710 }, { "epoch": 0.17, "grad_norm": 1.7655619829906615, "learning_rate": 9.460910898180849e-06, "loss": 0.6807, "step": 1711 }, { "epoch": 0.17, "grad_norm": 1.3863217598607407, "learning_rate": 9.460163743427049e-06, "loss": 0.5345, "step": 1712 }, { "epoch": 0.17, "grad_norm": 1.4382602134141447, "learning_rate": 9.459416100813746e-06, "loss": 0.6448, "step": 1713 }, { "epoch": 0.18, "grad_norm": 1.4399494978974132, "learning_rate": 9.45866797042272e-06, "loss": 0.7502, "step": 1714 }, { "epoch": 0.18, "grad_norm": 1.4757459046673875, "learning_rate": 9.4579193523358e-06, "loss": 0.6978, "step": 1715 }, { "epoch": 0.18, "grad_norm": 1.3832771703523532, "learning_rate": 9.457170246634872e-06, "loss": 0.6032, "step": 1716 }, { "epoch": 0.18, "grad_norm": 1.502777553846113, "learning_rate": 9.456420653401874e-06, "loss": 0.7592, "step": 1717 }, { "epoch": 0.18, "grad_norm": 1.3586603118183513, "learning_rate": 9.455670572718798e-06, "loss": 0.606, "step": 1718 }, { "epoch": 0.18, "grad_norm": 1.468801141519581, "learning_rate": 9.45492000466769e-06, "loss": 0.6461, "step": 1719 }, { "epoch": 0.18, "grad_norm": 1.454905761386861, "learning_rate": 9.454168949330644e-06, "loss": 0.6121, "step": 1720 }, { "epoch": 0.18, "grad_norm": 1.662934042181215, "learning_rate": 9.453417406789817e-06, "loss": 0.6864, "step": 1721 }, { "epoch": 0.18, "grad_norm": 1.5686921210502314, "learning_rate": 9.452665377127411e-06, "loss": 0.6569, "step": 1722 }, { "epoch": 0.18, "grad_norm": 1.8151319981583374, "learning_rate": 9.451912860425685e-06, "loss": 0.8101, "step": 1723 }, { "epoch": 0.18, "grad_norm": 1.430100768976858, "learning_rate": 9.451159856766948e-06, "loss": 0.7218, "step": 1724 }, { "epoch": 0.18, "grad_norm": 1.5624802445040433, "learning_rate": 9.450406366233568e-06, "loss": 0.6297, "step": 1725 }, { "epoch": 0.18, "grad_norm": 1.7029314027074733, "learning_rate": 9.449652388907963e-06, "loss": 0.6853, "step": 1726 }, { "epoch": 0.18, "grad_norm": 1.3745479257010567, "learning_rate": 9.448897924872602e-06, "loss": 0.6402, "step": 1727 }, { "epoch": 0.18, "grad_norm": 1.5503635486531262, "learning_rate": 9.44814297421001e-06, "loss": 0.6236, "step": 1728 }, { "epoch": 0.18, "grad_norm": 1.6226580502175567, "learning_rate": 9.447387537002764e-06, "loss": 0.7467, "step": 1729 }, { "epoch": 0.18, "grad_norm": 1.456750131617158, "learning_rate": 9.446631613333498e-06, "loss": 0.6728, "step": 1730 }, { "epoch": 0.18, "grad_norm": 2.1323595294755275, "learning_rate": 9.445875203284894e-06, "loss": 0.6382, "step": 1731 }, { "epoch": 0.18, "grad_norm": 1.3962761422340562, "learning_rate": 9.445118306939686e-06, "loss": 0.6583, "step": 1732 }, { "epoch": 0.18, "grad_norm": 1.9242967079540525, "learning_rate": 9.444360924380672e-06, "loss": 0.7408, "step": 1733 }, { "epoch": 0.18, "grad_norm": 1.5681311174218198, "learning_rate": 9.443603055690687e-06, "loss": 0.6428, "step": 1734 }, { "epoch": 0.18, "grad_norm": 1.3836642710011127, "learning_rate": 9.442844700952635e-06, "loss": 0.7529, "step": 1735 }, { "epoch": 0.18, "grad_norm": 1.4754528477914082, "learning_rate": 9.442085860249461e-06, "loss": 0.7129, "step": 1736 }, { "epoch": 0.18, "grad_norm": 1.5595519538521114, "learning_rate": 9.441326533664172e-06, "loss": 0.7418, "step": 1737 }, { "epoch": 0.18, "grad_norm": 1.3751014459782782, "learning_rate": 9.440566721279823e-06, "loss": 0.6203, "step": 1738 }, { "epoch": 0.18, "grad_norm": 1.253911500778563, "learning_rate": 9.439806423179523e-06, "loss": 0.5493, "step": 1739 }, { "epoch": 0.18, "grad_norm": 1.6462943428049708, "learning_rate": 9.439045639446433e-06, "loss": 0.6678, "step": 1740 }, { "epoch": 0.18, "grad_norm": 1.4319278284936126, "learning_rate": 9.438284370163771e-06, "loss": 0.7224, "step": 1741 }, { "epoch": 0.18, "grad_norm": 1.439927918154658, "learning_rate": 9.437522615414808e-06, "loss": 0.6455, "step": 1742 }, { "epoch": 0.18, "grad_norm": 1.3673310629011177, "learning_rate": 9.436760375282858e-06, "loss": 0.6251, "step": 1743 }, { "epoch": 0.18, "grad_norm": 1.543123751752078, "learning_rate": 9.435997649851305e-06, "loss": 0.6962, "step": 1744 }, { "epoch": 0.18, "grad_norm": 1.4491111543151791, "learning_rate": 9.435234439203572e-06, "loss": 0.6707, "step": 1745 }, { "epoch": 0.18, "grad_norm": 1.3254364907536875, "learning_rate": 9.434470743423139e-06, "loss": 0.6139, "step": 1746 }, { "epoch": 0.18, "grad_norm": 1.4455745174220287, "learning_rate": 9.433706562593543e-06, "loss": 0.6927, "step": 1747 }, { "epoch": 0.18, "grad_norm": 1.4733954718428435, "learning_rate": 9.432941896798374e-06, "loss": 0.7336, "step": 1748 }, { "epoch": 0.18, "grad_norm": 1.365346031442962, "learning_rate": 9.432176746121264e-06, "loss": 0.6442, "step": 1749 }, { "epoch": 0.18, "grad_norm": 1.3871961679324123, "learning_rate": 9.431411110645914e-06, "loss": 0.6871, "step": 1750 }, { "epoch": 0.18, "grad_norm": 1.6848374234868835, "learning_rate": 9.430644990456069e-06, "loss": 0.774, "step": 1751 }, { "epoch": 0.18, "grad_norm": 1.3783831746500368, "learning_rate": 9.429878385635525e-06, "loss": 0.7047, "step": 1752 }, { "epoch": 0.18, "grad_norm": 1.5224995253178768, "learning_rate": 9.429111296268138e-06, "loss": 0.625, "step": 1753 }, { "epoch": 0.18, "grad_norm": 1.5414063109549188, "learning_rate": 9.42834372243781e-06, "loss": 0.6702, "step": 1754 }, { "epoch": 0.18, "grad_norm": 1.4399759663579605, "learning_rate": 9.427575664228503e-06, "loss": 0.7206, "step": 1755 }, { "epoch": 0.18, "grad_norm": 1.6253119432668466, "learning_rate": 9.426807121724225e-06, "loss": 0.7591, "step": 1756 }, { "epoch": 0.18, "grad_norm": 1.4585751246126248, "learning_rate": 9.426038095009043e-06, "loss": 0.7149, "step": 1757 }, { "epoch": 0.18, "grad_norm": 1.3500617935117651, "learning_rate": 9.425268584167074e-06, "loss": 0.6295, "step": 1758 }, { "epoch": 0.18, "grad_norm": 1.4195487288041524, "learning_rate": 9.424498589282487e-06, "loss": 0.6423, "step": 1759 }, { "epoch": 0.18, "grad_norm": 1.5445241337834528, "learning_rate": 9.423728110439506e-06, "loss": 0.6541, "step": 1760 }, { "epoch": 0.18, "grad_norm": 1.4795573423528592, "learning_rate": 9.422957147722406e-06, "loss": 0.6878, "step": 1761 }, { "epoch": 0.18, "grad_norm": 1.4270077563282202, "learning_rate": 9.422185701215517e-06, "loss": 0.7463, "step": 1762 }, { "epoch": 0.18, "grad_norm": 1.3721286463283597, "learning_rate": 9.421413771003222e-06, "loss": 0.6692, "step": 1763 }, { "epoch": 0.18, "grad_norm": 1.3587220891769312, "learning_rate": 9.420641357169955e-06, "loss": 0.6556, "step": 1764 }, { "epoch": 0.18, "grad_norm": 1.5170328604382546, "learning_rate": 9.419868459800202e-06, "loss": 0.8112, "step": 1765 }, { "epoch": 0.18, "grad_norm": 1.4160921240661395, "learning_rate": 9.419095078978506e-06, "loss": 0.6401, "step": 1766 }, { "epoch": 0.18, "grad_norm": 1.6093340845359625, "learning_rate": 9.418321214789461e-06, "loss": 0.726, "step": 1767 }, { "epoch": 0.18, "grad_norm": 1.491282011871045, "learning_rate": 9.417546867317713e-06, "loss": 0.6586, "step": 1768 }, { "epoch": 0.18, "grad_norm": 1.2437842030865636, "learning_rate": 9.416772036647959e-06, "loss": 0.6922, "step": 1769 }, { "epoch": 0.18, "grad_norm": 1.5064021930983185, "learning_rate": 9.415996722864954e-06, "loss": 0.741, "step": 1770 }, { "epoch": 0.18, "grad_norm": 1.469491149747272, "learning_rate": 9.4152209260535e-06, "loss": 0.6742, "step": 1771 }, { "epoch": 0.18, "grad_norm": 1.4681544215213336, "learning_rate": 9.41444464629846e-06, "loss": 0.6636, "step": 1772 }, { "epoch": 0.18, "grad_norm": 1.4966793043031008, "learning_rate": 9.41366788368474e-06, "loss": 0.7207, "step": 1773 }, { "epoch": 0.18, "grad_norm": 1.399483789122562, "learning_rate": 9.412890638297304e-06, "loss": 0.6905, "step": 1774 }, { "epoch": 0.18, "grad_norm": 1.3789352675142579, "learning_rate": 9.41211291022117e-06, "loss": 0.6463, "step": 1775 }, { "epoch": 0.18, "grad_norm": 1.4518847676243607, "learning_rate": 9.411334699541406e-06, "loss": 0.5935, "step": 1776 }, { "epoch": 0.18, "grad_norm": 1.410580364920344, "learning_rate": 9.410556006343134e-06, "loss": 0.6331, "step": 1777 }, { "epoch": 0.18, "grad_norm": 1.4289704138166213, "learning_rate": 9.409776830711527e-06, "loss": 0.6682, "step": 1778 }, { "epoch": 0.18, "grad_norm": 1.4262154375515879, "learning_rate": 9.408997172731818e-06, "loss": 0.7199, "step": 1779 }, { "epoch": 0.18, "grad_norm": 1.5135834761019062, "learning_rate": 9.408217032489281e-06, "loss": 0.7664, "step": 1780 }, { "epoch": 0.18, "grad_norm": 1.4287018597861765, "learning_rate": 9.40743641006925e-06, "loss": 0.7207, "step": 1781 }, { "epoch": 0.18, "grad_norm": 1.636291280956083, "learning_rate": 9.406655305557115e-06, "loss": 0.7178, "step": 1782 }, { "epoch": 0.18, "grad_norm": 1.389708418201275, "learning_rate": 9.40587371903831e-06, "loss": 0.6511, "step": 1783 }, { "epoch": 0.18, "grad_norm": 1.7460658343708593, "learning_rate": 9.405091650598327e-06, "loss": 0.7324, "step": 1784 }, { "epoch": 0.18, "grad_norm": 1.4328091673280212, "learning_rate": 9.40430910032271e-06, "loss": 0.6709, "step": 1785 }, { "epoch": 0.18, "grad_norm": 1.5342252957493623, "learning_rate": 9.403526068297058e-06, "loss": 0.734, "step": 1786 }, { "epoch": 0.18, "grad_norm": 1.348204259915184, "learning_rate": 9.402742554607017e-06, "loss": 0.6806, "step": 1787 }, { "epoch": 0.18, "grad_norm": 1.6142597894376995, "learning_rate": 9.401958559338289e-06, "loss": 0.6626, "step": 1788 }, { "epoch": 0.18, "grad_norm": 1.271839274560957, "learning_rate": 9.40117408257663e-06, "loss": 0.6593, "step": 1789 }, { "epoch": 0.18, "grad_norm": 1.5868203025026077, "learning_rate": 9.400389124407848e-06, "loss": 0.5714, "step": 1790 }, { "epoch": 0.18, "grad_norm": 1.3379589128871927, "learning_rate": 9.399603684917799e-06, "loss": 0.632, "step": 1791 }, { "epoch": 0.18, "grad_norm": 1.5117795907280676, "learning_rate": 9.3988177641924e-06, "loss": 0.7243, "step": 1792 }, { "epoch": 0.18, "grad_norm": 1.6576008762478296, "learning_rate": 9.398031362317615e-06, "loss": 0.7457, "step": 1793 }, { "epoch": 0.18, "grad_norm": 1.4203398694659357, "learning_rate": 9.397244479379461e-06, "loss": 0.6216, "step": 1794 }, { "epoch": 0.18, "grad_norm": 1.5040078340587881, "learning_rate": 9.396457115464009e-06, "loss": 0.6661, "step": 1795 }, { "epoch": 0.18, "grad_norm": 1.4894229898601656, "learning_rate": 9.395669270657383e-06, "loss": 0.6043, "step": 1796 }, { "epoch": 0.18, "grad_norm": 1.508274613856428, "learning_rate": 9.394880945045756e-06, "loss": 0.738, "step": 1797 }, { "epoch": 0.18, "grad_norm": 1.4023870020811406, "learning_rate": 9.394092138715359e-06, "loss": 0.7044, "step": 1798 }, { "epoch": 0.18, "grad_norm": 1.4517645914821231, "learning_rate": 9.39330285175247e-06, "loss": 0.7083, "step": 1799 }, { "epoch": 0.18, "grad_norm": 1.609563678860988, "learning_rate": 9.392513084243426e-06, "loss": 0.7165, "step": 1800 }, { "epoch": 0.18, "grad_norm": 1.6046516747661435, "learning_rate": 9.39172283627461e-06, "loss": 0.7065, "step": 1801 }, { "epoch": 0.18, "grad_norm": 1.5429831707017163, "learning_rate": 9.390932107932461e-06, "loss": 0.6401, "step": 1802 }, { "epoch": 0.18, "grad_norm": 1.6784943859939663, "learning_rate": 9.39014089930347e-06, "loss": 0.6542, "step": 1803 }, { "epoch": 0.18, "grad_norm": 1.4708159582432019, "learning_rate": 9.389349210474183e-06, "loss": 0.6234, "step": 1804 }, { "epoch": 0.18, "grad_norm": 1.6543968849221198, "learning_rate": 9.388557041531195e-06, "loss": 0.8172, "step": 1805 }, { "epoch": 0.18, "grad_norm": 1.5446928386000371, "learning_rate": 9.387764392561153e-06, "loss": 0.6736, "step": 1806 }, { "epoch": 0.18, "grad_norm": 1.5039518875943214, "learning_rate": 9.386971263650759e-06, "loss": 0.6341, "step": 1807 }, { "epoch": 0.18, "grad_norm": 1.6915456825926751, "learning_rate": 9.386177654886766e-06, "loss": 0.7401, "step": 1808 }, { "epoch": 0.18, "grad_norm": 1.4191415008539772, "learning_rate": 9.385383566355983e-06, "loss": 0.6031, "step": 1809 }, { "epoch": 0.18, "grad_norm": 1.3935377855810909, "learning_rate": 9.384588998145265e-06, "loss": 0.6429, "step": 1810 }, { "epoch": 0.18, "grad_norm": 1.4348665030109977, "learning_rate": 9.383793950341526e-06, "loss": 0.6753, "step": 1811 }, { "epoch": 0.19, "grad_norm": 1.31801232490348, "learning_rate": 9.382998423031728e-06, "loss": 0.7331, "step": 1812 }, { "epoch": 0.19, "grad_norm": 1.4201029135454524, "learning_rate": 9.382202416302885e-06, "loss": 0.7112, "step": 1813 }, { "epoch": 0.19, "grad_norm": 1.467043067055318, "learning_rate": 9.381405930242069e-06, "loss": 0.7176, "step": 1814 }, { "epoch": 0.19, "grad_norm": 1.5855464235521306, "learning_rate": 9.3806089649364e-06, "loss": 0.5718, "step": 1815 }, { "epoch": 0.19, "grad_norm": 1.4796224459394232, "learning_rate": 9.37981152047305e-06, "loss": 0.6404, "step": 1816 }, { "epoch": 0.19, "grad_norm": 1.4225864487575755, "learning_rate": 9.379013596939246e-06, "loss": 0.6753, "step": 1817 }, { "epoch": 0.19, "grad_norm": 1.4923720706931733, "learning_rate": 9.378215194422266e-06, "loss": 0.6943, "step": 1818 }, { "epoch": 0.19, "grad_norm": 1.400839111161059, "learning_rate": 9.37741631300944e-06, "loss": 0.5562, "step": 1819 }, { "epoch": 0.19, "grad_norm": 1.7471980945600643, "learning_rate": 9.37661695278815e-06, "loss": 0.6717, "step": 1820 }, { "epoch": 0.19, "grad_norm": 1.5471093976408348, "learning_rate": 9.375817113845832e-06, "loss": 0.693, "step": 1821 }, { "epoch": 0.19, "grad_norm": 1.5472425560941139, "learning_rate": 9.375016796269973e-06, "loss": 0.7579, "step": 1822 }, { "epoch": 0.19, "grad_norm": 1.5661104704916418, "learning_rate": 9.374216000148115e-06, "loss": 0.7628, "step": 1823 }, { "epoch": 0.19, "grad_norm": 1.4636972994506454, "learning_rate": 9.373414725567849e-06, "loss": 0.698, "step": 1824 }, { "epoch": 0.19, "grad_norm": 1.5915011643539954, "learning_rate": 9.37261297261682e-06, "loss": 0.6588, "step": 1825 }, { "epoch": 0.19, "grad_norm": 1.3423367504848627, "learning_rate": 9.371810741382724e-06, "loss": 0.6991, "step": 1826 }, { "epoch": 0.19, "grad_norm": 1.476390374053697, "learning_rate": 9.37100803195331e-06, "loss": 0.7699, "step": 1827 }, { "epoch": 0.19, "grad_norm": 1.4419786046674272, "learning_rate": 9.370204844416381e-06, "loss": 0.5994, "step": 1828 }, { "epoch": 0.19, "grad_norm": 1.7484833786094256, "learning_rate": 9.369401178859792e-06, "loss": 0.6662, "step": 1829 }, { "epoch": 0.19, "grad_norm": 1.582842335830439, "learning_rate": 9.368597035371446e-06, "loss": 0.6864, "step": 1830 }, { "epoch": 0.19, "grad_norm": 1.3421765595190236, "learning_rate": 9.367792414039303e-06, "loss": 0.6978, "step": 1831 }, { "epoch": 0.19, "grad_norm": 1.4770453394050702, "learning_rate": 9.366987314951373e-06, "loss": 0.7316, "step": 1832 }, { "epoch": 0.19, "grad_norm": 1.4323708364146073, "learning_rate": 9.36618173819572e-06, "loss": 0.7347, "step": 1833 }, { "epoch": 0.19, "grad_norm": 1.2492293909634107, "learning_rate": 9.365375683860458e-06, "loss": 0.612, "step": 1834 }, { "epoch": 0.19, "grad_norm": 1.5734327461968083, "learning_rate": 9.364569152033756e-06, "loss": 0.8008, "step": 1835 }, { "epoch": 0.19, "grad_norm": 1.6852112794363636, "learning_rate": 9.363762142803834e-06, "loss": 0.7245, "step": 1836 }, { "epoch": 0.19, "grad_norm": 1.521573781456474, "learning_rate": 9.362954656258963e-06, "loss": 0.6477, "step": 1837 }, { "epoch": 0.19, "grad_norm": 1.506566860894992, "learning_rate": 9.362146692487463e-06, "loss": 0.6975, "step": 1838 }, { "epoch": 0.19, "grad_norm": 1.394763130012161, "learning_rate": 9.361338251577718e-06, "loss": 0.6496, "step": 1839 }, { "epoch": 0.19, "grad_norm": 1.6557183460064873, "learning_rate": 9.360529333618152e-06, "loss": 0.7465, "step": 1840 }, { "epoch": 0.19, "grad_norm": 1.447031676240176, "learning_rate": 9.359719938697247e-06, "loss": 0.7257, "step": 1841 }, { "epoch": 0.19, "grad_norm": 1.7284527216695846, "learning_rate": 9.358910066903533e-06, "loss": 0.8037, "step": 1842 }, { "epoch": 0.19, "grad_norm": 1.4503344341037188, "learning_rate": 9.3580997183256e-06, "loss": 0.6613, "step": 1843 }, { "epoch": 0.19, "grad_norm": 3.5041227648625286, "learning_rate": 9.35728889305208e-06, "loss": 0.6629, "step": 1844 }, { "epoch": 0.19, "grad_norm": 1.4317874402224129, "learning_rate": 9.356477591171666e-06, "loss": 0.6399, "step": 1845 }, { "epoch": 0.19, "grad_norm": 1.361499386101776, "learning_rate": 9.355665812773099e-06, "loss": 0.6039, "step": 1846 }, { "epoch": 0.19, "grad_norm": 1.6899098839779128, "learning_rate": 9.35485355794517e-06, "loss": 0.7801, "step": 1847 }, { "epoch": 0.19, "grad_norm": 1.5720660771187391, "learning_rate": 9.354040826776727e-06, "loss": 0.6638, "step": 1848 }, { "epoch": 0.19, "grad_norm": 1.5351824120396766, "learning_rate": 9.353227619356667e-06, "loss": 0.6843, "step": 1849 }, { "epoch": 0.19, "grad_norm": 1.4462578550373173, "learning_rate": 9.352413935773939e-06, "loss": 0.7429, "step": 1850 }, { "epoch": 0.19, "grad_norm": 1.5979603961319206, "learning_rate": 9.351599776117547e-06, "loss": 0.6686, "step": 1851 }, { "epoch": 0.19, "grad_norm": 1.3892718014678729, "learning_rate": 9.350785140476546e-06, "loss": 0.6554, "step": 1852 }, { "epoch": 0.19, "grad_norm": 1.632038133483299, "learning_rate": 9.34997002894004e-06, "loss": 0.7033, "step": 1853 }, { "epoch": 0.19, "grad_norm": 1.3989124552746677, "learning_rate": 9.349154441597184e-06, "loss": 0.8415, "step": 1854 }, { "epoch": 0.19, "grad_norm": 1.908002092409772, "learning_rate": 9.348338378537192e-06, "loss": 0.7128, "step": 1855 }, { "epoch": 0.19, "grad_norm": 1.9279853362815882, "learning_rate": 9.347521839849328e-06, "loss": 0.6256, "step": 1856 }, { "epoch": 0.19, "grad_norm": 1.45624626970452, "learning_rate": 9.346704825622901e-06, "loss": 0.6077, "step": 1857 }, { "epoch": 0.19, "grad_norm": 1.3061674442015532, "learning_rate": 9.345887335947281e-06, "loss": 0.5655, "step": 1858 }, { "epoch": 0.19, "grad_norm": 1.4975740818424867, "learning_rate": 9.345069370911887e-06, "loss": 0.6148, "step": 1859 }, { "epoch": 0.19, "grad_norm": 1.4925205011626606, "learning_rate": 9.344250930606186e-06, "loss": 0.6358, "step": 1860 }, { "epoch": 0.19, "grad_norm": 1.3631679341292717, "learning_rate": 9.343432015119703e-06, "loss": 0.4948, "step": 1861 }, { "epoch": 0.19, "grad_norm": 1.437203028231221, "learning_rate": 9.34261262454201e-06, "loss": 0.6675, "step": 1862 }, { "epoch": 0.19, "grad_norm": 1.5019238624448945, "learning_rate": 9.341792758962736e-06, "loss": 0.5725, "step": 1863 }, { "epoch": 0.19, "grad_norm": 1.7150807048470826, "learning_rate": 9.340972418471556e-06, "loss": 0.7019, "step": 1864 }, { "epoch": 0.19, "grad_norm": 1.4232979504239647, "learning_rate": 9.340151603158203e-06, "loss": 0.7128, "step": 1865 }, { "epoch": 0.19, "grad_norm": 6.338376827081418, "learning_rate": 9.339330313112457e-06, "loss": 0.6389, "step": 1866 }, { "epoch": 0.19, "grad_norm": 1.3135031772573909, "learning_rate": 9.338508548424152e-06, "loss": 0.5868, "step": 1867 }, { "epoch": 0.19, "grad_norm": 1.7057072070812187, "learning_rate": 9.337686309183176e-06, "loss": 0.6966, "step": 1868 }, { "epoch": 0.19, "grad_norm": 1.5696664243219844, "learning_rate": 9.336863595479464e-06, "loss": 0.7512, "step": 1869 }, { "epoch": 0.19, "grad_norm": 1.5004906924237313, "learning_rate": 9.336040407403006e-06, "loss": 0.7324, "step": 1870 }, { "epoch": 0.19, "grad_norm": 1.473745751007304, "learning_rate": 9.335216745043845e-06, "loss": 0.7608, "step": 1871 }, { "epoch": 0.19, "grad_norm": 1.5020886434973764, "learning_rate": 9.334392608492074e-06, "loss": 0.6724, "step": 1872 }, { "epoch": 0.19, "grad_norm": 1.3502817680581187, "learning_rate": 9.333567997837837e-06, "loss": 0.6705, "step": 1873 }, { "epoch": 0.19, "grad_norm": 1.592319456167198, "learning_rate": 9.332742913171336e-06, "loss": 0.5956, "step": 1874 }, { "epoch": 0.19, "grad_norm": 1.376445175030331, "learning_rate": 9.331917354582814e-06, "loss": 0.588, "step": 1875 }, { "epoch": 0.19, "grad_norm": 1.3875271980158612, "learning_rate": 9.331091322162573e-06, "loss": 0.6824, "step": 1876 }, { "epoch": 0.19, "grad_norm": 1.6892913183544165, "learning_rate": 9.33026481600097e-06, "loss": 0.6476, "step": 1877 }, { "epoch": 0.19, "grad_norm": 1.447263631145952, "learning_rate": 9.329437836188403e-06, "loss": 0.6921, "step": 1878 }, { "epoch": 0.19, "grad_norm": 1.484495677472395, "learning_rate": 9.328610382815336e-06, "loss": 0.7214, "step": 1879 }, { "epoch": 0.19, "grad_norm": 1.5504380215381603, "learning_rate": 9.32778245597227e-06, "loss": 0.7525, "step": 1880 }, { "epoch": 0.19, "grad_norm": 1.4702783501759027, "learning_rate": 9.326954055749767e-06, "loss": 0.7168, "step": 1881 }, { "epoch": 0.19, "grad_norm": 1.3728930786862834, "learning_rate": 9.326125182238441e-06, "loss": 0.5864, "step": 1882 }, { "epoch": 0.19, "grad_norm": 1.6609176375447465, "learning_rate": 9.325295835528954e-06, "loss": 0.7346, "step": 1883 }, { "epoch": 0.19, "grad_norm": 1.4613465755460793, "learning_rate": 9.32446601571202e-06, "loss": 0.7246, "step": 1884 }, { "epoch": 0.19, "grad_norm": 1.4077881575127251, "learning_rate": 9.323635722878408e-06, "loss": 0.6547, "step": 1885 }, { "epoch": 0.19, "grad_norm": 1.3967109492675671, "learning_rate": 9.322804957118935e-06, "loss": 0.6945, "step": 1886 }, { "epoch": 0.19, "grad_norm": 1.458715981519205, "learning_rate": 9.321973718524472e-06, "loss": 0.6857, "step": 1887 }, { "epoch": 0.19, "grad_norm": 1.864327767904818, "learning_rate": 9.32114200718594e-06, "loss": 0.7599, "step": 1888 }, { "epoch": 0.19, "grad_norm": 1.3270777348847624, "learning_rate": 9.320309823194317e-06, "loss": 0.6832, "step": 1889 }, { "epoch": 0.19, "grad_norm": 1.3765600572181642, "learning_rate": 9.319477166640625e-06, "loss": 0.5944, "step": 1890 }, { "epoch": 0.19, "grad_norm": 1.3104525827514122, "learning_rate": 9.318644037615943e-06, "loss": 0.6362, "step": 1891 }, { "epoch": 0.19, "grad_norm": 1.6510644825804095, "learning_rate": 9.317810436211397e-06, "loss": 0.7082, "step": 1892 }, { "epoch": 0.19, "grad_norm": 1.3308813572449611, "learning_rate": 9.31697636251817e-06, "loss": 0.6715, "step": 1893 }, { "epoch": 0.19, "grad_norm": 1.4901156305291516, "learning_rate": 9.316141816627493e-06, "loss": 0.6809, "step": 1894 }, { "epoch": 0.19, "grad_norm": 1.743606561912427, "learning_rate": 9.315306798630652e-06, "loss": 0.7521, "step": 1895 }, { "epoch": 0.19, "grad_norm": 1.63336916749725, "learning_rate": 9.31447130861898e-06, "loss": 0.6387, "step": 1896 }, { "epoch": 0.19, "grad_norm": 1.571136145423166, "learning_rate": 9.313635346683866e-06, "loss": 0.6872, "step": 1897 }, { "epoch": 0.19, "grad_norm": 1.560165768136455, "learning_rate": 9.312798912916747e-06, "loss": 0.66, "step": 1898 }, { "epoch": 0.19, "grad_norm": 1.35839065182495, "learning_rate": 9.311962007409117e-06, "loss": 0.6196, "step": 1899 }, { "epoch": 0.19, "grad_norm": 1.5368337273293597, "learning_rate": 9.311124630252514e-06, "loss": 0.7038, "step": 1900 }, { "epoch": 0.19, "grad_norm": 1.3109146265384404, "learning_rate": 9.310286781538532e-06, "loss": 0.6987, "step": 1901 }, { "epoch": 0.19, "grad_norm": 1.5440345801114934, "learning_rate": 9.309448461358818e-06, "loss": 0.6645, "step": 1902 }, { "epoch": 0.19, "grad_norm": 1.5144254702652913, "learning_rate": 9.308609669805068e-06, "loss": 0.7244, "step": 1903 }, { "epoch": 0.19, "grad_norm": 1.3368106841337033, "learning_rate": 9.307770406969032e-06, "loss": 0.6372, "step": 1904 }, { "epoch": 0.19, "grad_norm": 1.6026045097784372, "learning_rate": 9.306930672942507e-06, "loss": 0.6795, "step": 1905 }, { "epoch": 0.19, "grad_norm": 1.3178482702831769, "learning_rate": 9.306090467817345e-06, "loss": 0.6495, "step": 1906 }, { "epoch": 0.19, "grad_norm": 1.4100321425254911, "learning_rate": 9.305249791685449e-06, "loss": 0.6148, "step": 1907 }, { "epoch": 0.19, "grad_norm": 1.5736199712061711, "learning_rate": 9.304408644638775e-06, "loss": 0.6477, "step": 1908 }, { "epoch": 0.19, "grad_norm": 1.5011298302617835, "learning_rate": 9.303567026769328e-06, "loss": 0.7407, "step": 1909 }, { "epoch": 0.2, "grad_norm": 1.4477097118166187, "learning_rate": 9.302724938169165e-06, "loss": 0.6524, "step": 1910 }, { "epoch": 0.2, "grad_norm": 1.436377639133506, "learning_rate": 9.301882378930393e-06, "loss": 0.7199, "step": 1911 }, { "epoch": 0.2, "grad_norm": 1.3919059293069556, "learning_rate": 9.301039349145178e-06, "loss": 0.6792, "step": 1912 }, { "epoch": 0.2, "grad_norm": 1.340242646257213, "learning_rate": 9.300195848905726e-06, "loss": 0.5445, "step": 1913 }, { "epoch": 0.2, "grad_norm": 1.5675763688604687, "learning_rate": 9.299351878304306e-06, "loss": 0.7087, "step": 1914 }, { "epoch": 0.2, "grad_norm": 1.6825194509543926, "learning_rate": 9.298507437433227e-06, "loss": 0.634, "step": 1915 }, { "epoch": 0.2, "grad_norm": 1.5452199663711286, "learning_rate": 9.29766252638486e-06, "loss": 0.6672, "step": 1916 }, { "epoch": 0.2, "grad_norm": 1.5072144520212991, "learning_rate": 9.296817145251617e-06, "loss": 0.6292, "step": 1917 }, { "epoch": 0.2, "grad_norm": 1.3927637035234997, "learning_rate": 9.295971294125973e-06, "loss": 0.7517, "step": 1918 }, { "epoch": 0.2, "grad_norm": 1.300550132935694, "learning_rate": 9.295124973100446e-06, "loss": 0.6079, "step": 1919 }, { "epoch": 0.2, "grad_norm": 1.406251576610478, "learning_rate": 9.294278182267607e-06, "loss": 0.6366, "step": 1920 }, { "epoch": 0.2, "grad_norm": 1.2386696835046214, "learning_rate": 9.293430921720081e-06, "loss": 0.5969, "step": 1921 }, { "epoch": 0.2, "grad_norm": 1.5214297440688411, "learning_rate": 9.292583191550541e-06, "loss": 0.6674, "step": 1922 }, { "epoch": 0.2, "grad_norm": 1.352512622896264, "learning_rate": 9.291734991851716e-06, "loss": 0.696, "step": 1923 }, { "epoch": 0.2, "grad_norm": 1.6119546029771101, "learning_rate": 9.29088632271638e-06, "loss": 0.6449, "step": 1924 }, { "epoch": 0.2, "grad_norm": 1.3693130410181737, "learning_rate": 9.290037184237363e-06, "loss": 0.6374, "step": 1925 }, { "epoch": 0.2, "grad_norm": 1.3985160567966102, "learning_rate": 9.289187576507544e-06, "loss": 0.6043, "step": 1926 }, { "epoch": 0.2, "grad_norm": 1.505726156882047, "learning_rate": 9.288337499619856e-06, "loss": 0.7315, "step": 1927 }, { "epoch": 0.2, "grad_norm": 1.4150480696226788, "learning_rate": 9.287486953667282e-06, "loss": 0.7475, "step": 1928 }, { "epoch": 0.2, "grad_norm": 1.4011549459035544, "learning_rate": 9.286635938742855e-06, "loss": 0.5913, "step": 1929 }, { "epoch": 0.2, "grad_norm": 1.2693239754932424, "learning_rate": 9.285784454939661e-06, "loss": 0.6365, "step": 1930 }, { "epoch": 0.2, "grad_norm": 1.4238620753496247, "learning_rate": 9.284932502350835e-06, "loss": 0.685, "step": 1931 }, { "epoch": 0.2, "grad_norm": 1.5341516396156414, "learning_rate": 9.284080081069566e-06, "loss": 0.659, "step": 1932 }, { "epoch": 0.2, "grad_norm": 1.6671464312298545, "learning_rate": 9.283227191189095e-06, "loss": 0.6701, "step": 1933 }, { "epoch": 0.2, "grad_norm": 1.490039550546993, "learning_rate": 9.28237383280271e-06, "loss": 0.7167, "step": 1934 }, { "epoch": 0.2, "grad_norm": 1.3996854342761833, "learning_rate": 9.28152000600375e-06, "loss": 0.6602, "step": 1935 }, { "epoch": 0.2, "grad_norm": 1.5578173107399367, "learning_rate": 9.280665710885613e-06, "loss": 0.6981, "step": 1936 }, { "epoch": 0.2, "grad_norm": 1.527201910395064, "learning_rate": 9.27981094754174e-06, "loss": 0.67, "step": 1937 }, { "epoch": 0.2, "grad_norm": 1.399144963178917, "learning_rate": 9.27895571606563e-06, "loss": 0.54, "step": 1938 }, { "epoch": 0.2, "grad_norm": 1.4339012861825853, "learning_rate": 9.278100016550826e-06, "loss": 0.6846, "step": 1939 }, { "epoch": 0.2, "grad_norm": 1.4078017049301883, "learning_rate": 9.277243849090925e-06, "loss": 0.6347, "step": 1940 }, { "epoch": 0.2, "grad_norm": 1.6166732758186249, "learning_rate": 9.276387213779579e-06, "loss": 0.7452, "step": 1941 }, { "epoch": 0.2, "grad_norm": 1.4373322617403705, "learning_rate": 9.275530110710486e-06, "loss": 0.7218, "step": 1942 }, { "epoch": 0.2, "grad_norm": 1.5276209627109423, "learning_rate": 9.274672539977398e-06, "loss": 0.6426, "step": 1943 }, { "epoch": 0.2, "grad_norm": 1.4740655710705723, "learning_rate": 9.273814501674117e-06, "loss": 0.6888, "step": 1944 }, { "epoch": 0.2, "grad_norm": 1.4904043634513555, "learning_rate": 9.272955995894498e-06, "loss": 0.6775, "step": 1945 }, { "epoch": 0.2, "grad_norm": 1.7957913189033463, "learning_rate": 9.272097022732444e-06, "loss": 0.7057, "step": 1946 }, { "epoch": 0.2, "grad_norm": 1.4072094561729946, "learning_rate": 9.271237582281912e-06, "loss": 0.7125, "step": 1947 }, { "epoch": 0.2, "grad_norm": 1.4028583370304726, "learning_rate": 9.270377674636906e-06, "loss": 0.6617, "step": 1948 }, { "epoch": 0.2, "grad_norm": 2.1704792714332135, "learning_rate": 9.269517299891486e-06, "loss": 0.7177, "step": 1949 }, { "epoch": 0.2, "grad_norm": 1.5319623920842143, "learning_rate": 9.268656458139763e-06, "loss": 0.6891, "step": 1950 }, { "epoch": 0.2, "grad_norm": 1.6538247163160409, "learning_rate": 9.267795149475895e-06, "loss": 0.8043, "step": 1951 }, { "epoch": 0.2, "grad_norm": 1.3573000235533605, "learning_rate": 9.266933373994094e-06, "loss": 0.7277, "step": 1952 }, { "epoch": 0.2, "grad_norm": 1.3349480331844517, "learning_rate": 9.266071131788621e-06, "loss": 0.6826, "step": 1953 }, { "epoch": 0.2, "grad_norm": 1.5610858112881525, "learning_rate": 9.26520842295379e-06, "loss": 0.6667, "step": 1954 }, { "epoch": 0.2, "grad_norm": 1.4575654240869538, "learning_rate": 9.264345247583968e-06, "loss": 0.748, "step": 1955 }, { "epoch": 0.2, "grad_norm": 1.5494638797514229, "learning_rate": 9.263481605773568e-06, "loss": 0.7542, "step": 1956 }, { "epoch": 0.2, "grad_norm": 1.4525825698152588, "learning_rate": 9.262617497617055e-06, "loss": 0.7995, "step": 1957 }, { "epoch": 0.2, "grad_norm": 1.7468013551036177, "learning_rate": 9.261752923208948e-06, "loss": 0.694, "step": 1958 }, { "epoch": 0.2, "grad_norm": 1.374437075529906, "learning_rate": 9.260887882643816e-06, "loss": 0.6987, "step": 1959 }, { "epoch": 0.2, "grad_norm": 1.5412750778813207, "learning_rate": 9.26002237601628e-06, "loss": 0.7538, "step": 1960 }, { "epoch": 0.2, "grad_norm": 1.5719338375386078, "learning_rate": 9.259156403421007e-06, "loss": 0.7085, "step": 1961 }, { "epoch": 0.2, "grad_norm": 1.4765159488497526, "learning_rate": 9.258289964952721e-06, "loss": 0.6667, "step": 1962 }, { "epoch": 0.2, "grad_norm": 1.4335858188719686, "learning_rate": 9.25742306070619e-06, "loss": 0.6314, "step": 1963 }, { "epoch": 0.2, "grad_norm": 1.4157754613201239, "learning_rate": 9.256555690776243e-06, "loss": 0.7012, "step": 1964 }, { "epoch": 0.2, "grad_norm": 1.4767239526867268, "learning_rate": 9.255687855257752e-06, "loss": 0.6905, "step": 1965 }, { "epoch": 0.2, "grad_norm": 1.3739189465688757, "learning_rate": 9.254819554245642e-06, "loss": 0.6833, "step": 1966 }, { "epoch": 0.2, "grad_norm": 1.5465807259764133, "learning_rate": 9.253950787834888e-06, "loss": 0.6319, "step": 1967 }, { "epoch": 0.2, "grad_norm": 1.499432464698134, "learning_rate": 9.25308155612052e-06, "loss": 0.7329, "step": 1968 }, { "epoch": 0.2, "grad_norm": 1.394352847983067, "learning_rate": 9.252211859197614e-06, "loss": 0.5896, "step": 1969 }, { "epoch": 0.2, "grad_norm": 1.4010305099726252, "learning_rate": 9.251341697161295e-06, "loss": 0.657, "step": 1970 }, { "epoch": 0.2, "grad_norm": 1.4271002985100252, "learning_rate": 9.250471070106748e-06, "loss": 0.7453, "step": 1971 }, { "epoch": 0.2, "grad_norm": 1.447213711422621, "learning_rate": 9.249599978129203e-06, "loss": 0.6927, "step": 1972 }, { "epoch": 0.2, "grad_norm": 1.634505351716555, "learning_rate": 9.24872842132394e-06, "loss": 0.6816, "step": 1973 }, { "epoch": 0.2, "grad_norm": 1.338222781147939, "learning_rate": 9.247856399786293e-06, "loss": 0.6348, "step": 1974 }, { "epoch": 0.2, "grad_norm": 1.477394546893368, "learning_rate": 9.246983913611641e-06, "loss": 0.676, "step": 1975 }, { "epoch": 0.2, "grad_norm": 1.471599275252791, "learning_rate": 9.24611096289542e-06, "loss": 0.7308, "step": 1976 }, { "epoch": 0.2, "grad_norm": 1.4501061564642945, "learning_rate": 9.245237547733117e-06, "loss": 0.6463, "step": 1977 }, { "epoch": 0.2, "grad_norm": 1.6007247739201091, "learning_rate": 9.244363668220265e-06, "loss": 0.7475, "step": 1978 }, { "epoch": 0.2, "grad_norm": 1.3351696570414877, "learning_rate": 9.24348932445245e-06, "loss": 0.6193, "step": 1979 }, { "epoch": 0.2, "grad_norm": 1.3458495384062004, "learning_rate": 9.242614516525311e-06, "loss": 0.631, "step": 1980 }, { "epoch": 0.2, "grad_norm": 1.441369741783886, "learning_rate": 9.241739244534534e-06, "loss": 0.6776, "step": 1981 }, { "epoch": 0.2, "grad_norm": 1.4717780548707933, "learning_rate": 9.24086350857586e-06, "loss": 0.8322, "step": 1982 }, { "epoch": 0.2, "grad_norm": 1.306072389334237, "learning_rate": 9.239987308745074e-06, "loss": 0.5646, "step": 1983 }, { "epoch": 0.2, "grad_norm": 1.391972115679673, "learning_rate": 9.23911064513802e-06, "loss": 0.6858, "step": 1984 }, { "epoch": 0.2, "grad_norm": 1.3841689365497511, "learning_rate": 9.238233517850588e-06, "loss": 0.6005, "step": 1985 }, { "epoch": 0.2, "grad_norm": 1.5389056847882305, "learning_rate": 9.237355926978716e-06, "loss": 0.6006, "step": 1986 }, { "epoch": 0.2, "grad_norm": 1.3002183848853306, "learning_rate": 9.236477872618402e-06, "loss": 0.5826, "step": 1987 }, { "epoch": 0.2, "grad_norm": 1.3927863893793357, "learning_rate": 9.235599354865686e-06, "loss": 0.6556, "step": 1988 }, { "epoch": 0.2, "grad_norm": 1.4881395975234952, "learning_rate": 9.234720373816661e-06, "loss": 0.6037, "step": 1989 }, { "epoch": 0.2, "grad_norm": 1.3640828060070522, "learning_rate": 9.233840929567472e-06, "loss": 0.6834, "step": 1990 }, { "epoch": 0.2, "grad_norm": 1.2760791922464407, "learning_rate": 9.232961022214314e-06, "loss": 0.6915, "step": 1991 }, { "epoch": 0.2, "grad_norm": 1.3615205272787725, "learning_rate": 9.232080651853435e-06, "loss": 0.7592, "step": 1992 }, { "epoch": 0.2, "grad_norm": 1.4737439793930032, "learning_rate": 9.231199818581125e-06, "loss": 0.7181, "step": 1993 }, { "epoch": 0.2, "grad_norm": 1.5782580729310627, "learning_rate": 9.230318522493737e-06, "loss": 0.7157, "step": 1994 }, { "epoch": 0.2, "grad_norm": 1.6010484016069038, "learning_rate": 9.229436763687665e-06, "loss": 0.7973, "step": 1995 }, { "epoch": 0.2, "grad_norm": 1.405808863166628, "learning_rate": 9.22855454225936e-06, "loss": 0.6895, "step": 1996 }, { "epoch": 0.2, "grad_norm": 1.5777311540260401, "learning_rate": 9.227671858305318e-06, "loss": 0.6243, "step": 1997 }, { "epoch": 0.2, "grad_norm": 1.4590342855110905, "learning_rate": 9.22678871192209e-06, "loss": 0.6784, "step": 1998 }, { "epoch": 0.2, "grad_norm": 1.4159753786774727, "learning_rate": 9.225905103206273e-06, "loss": 0.6061, "step": 1999 }, { "epoch": 0.2, "grad_norm": 1.2318599320303514, "learning_rate": 9.225021032254523e-06, "loss": 0.633, "step": 2000 }, { "epoch": 0.2, "grad_norm": 1.5386748928526863, "learning_rate": 9.224136499163537e-06, "loss": 0.7066, "step": 2001 }, { "epoch": 0.2, "grad_norm": 1.7024041908116367, "learning_rate": 9.223251504030067e-06, "loss": 0.6594, "step": 2002 }, { "epoch": 0.2, "grad_norm": 1.4859837641836788, "learning_rate": 9.222366046950915e-06, "loss": 0.6698, "step": 2003 }, { "epoch": 0.2, "grad_norm": 1.543191761503095, "learning_rate": 9.221480128022935e-06, "loss": 0.6621, "step": 2004 }, { "epoch": 0.2, "grad_norm": 1.4334959432146743, "learning_rate": 9.220593747343028e-06, "loss": 0.5367, "step": 2005 }, { "epoch": 0.2, "grad_norm": 1.269512355790103, "learning_rate": 9.21970690500815e-06, "loss": 0.5679, "step": 2006 }, { "epoch": 0.2, "grad_norm": 1.176359807861402, "learning_rate": 9.218819601115302e-06, "loss": 0.6835, "step": 2007 }, { "epoch": 0.21, "grad_norm": 1.432533947343924, "learning_rate": 9.217931835761544e-06, "loss": 0.7174, "step": 2008 }, { "epoch": 0.21, "grad_norm": 1.6462307238666327, "learning_rate": 9.217043609043976e-06, "loss": 0.7561, "step": 2009 }, { "epoch": 0.21, "grad_norm": 1.386937637583084, "learning_rate": 9.216154921059755e-06, "loss": 0.6201, "step": 2010 }, { "epoch": 0.21, "grad_norm": 1.5961485078100612, "learning_rate": 9.21526577190609e-06, "loss": 0.6855, "step": 2011 }, { "epoch": 0.21, "grad_norm": 1.5184756444513476, "learning_rate": 9.214376161680231e-06, "loss": 0.6387, "step": 2012 }, { "epoch": 0.21, "grad_norm": 1.4934592103231306, "learning_rate": 9.213486090479493e-06, "loss": 0.6817, "step": 2013 }, { "epoch": 0.21, "grad_norm": 1.3213498946853164, "learning_rate": 9.212595558401228e-06, "loss": 0.6821, "step": 2014 }, { "epoch": 0.21, "grad_norm": 1.4448487629688604, "learning_rate": 9.211704565542845e-06, "loss": 0.7015, "step": 2015 }, { "epoch": 0.21, "grad_norm": 1.5444694425158676, "learning_rate": 9.210813112001802e-06, "loss": 0.6419, "step": 2016 }, { "epoch": 0.21, "grad_norm": 1.3205951390115038, "learning_rate": 9.209921197875606e-06, "loss": 0.6149, "step": 2017 }, { "epoch": 0.21, "grad_norm": 1.45157046376402, "learning_rate": 9.20902882326182e-06, "loss": 0.7354, "step": 2018 }, { "epoch": 0.21, "grad_norm": 1.292014878597021, "learning_rate": 9.20813598825805e-06, "loss": 0.6165, "step": 2019 }, { "epoch": 0.21, "grad_norm": 1.8922875517412157, "learning_rate": 9.207242692961958e-06, "loss": 0.781, "step": 2020 }, { "epoch": 0.21, "grad_norm": 1.4666326735455824, "learning_rate": 9.206348937471251e-06, "loss": 0.6642, "step": 2021 }, { "epoch": 0.21, "grad_norm": 1.3408871242937257, "learning_rate": 9.205454721883691e-06, "loss": 0.6095, "step": 2022 }, { "epoch": 0.21, "grad_norm": 1.5754526622792349, "learning_rate": 9.20456004629709e-06, "loss": 0.5648, "step": 2023 }, { "epoch": 0.21, "grad_norm": 1.2890663056585276, "learning_rate": 9.203664910809307e-06, "loss": 0.5811, "step": 2024 }, { "epoch": 0.21, "grad_norm": 1.566781162140852, "learning_rate": 9.202769315518253e-06, "loss": 0.6797, "step": 2025 }, { "epoch": 0.21, "grad_norm": 1.428109319509882, "learning_rate": 9.20187326052189e-06, "loss": 0.618, "step": 2026 }, { "epoch": 0.21, "grad_norm": 1.4730415413596913, "learning_rate": 9.200976745918233e-06, "loss": 0.7178, "step": 2027 }, { "epoch": 0.21, "grad_norm": 1.5405471123060914, "learning_rate": 9.200079771805338e-06, "loss": 0.6102, "step": 2028 }, { "epoch": 0.21, "grad_norm": 1.4920064598505114, "learning_rate": 9.199182338281324e-06, "loss": 0.749, "step": 2029 }, { "epoch": 0.21, "grad_norm": 1.7381350176788943, "learning_rate": 9.198284445444348e-06, "loss": 0.6706, "step": 2030 }, { "epoch": 0.21, "grad_norm": 1.4528770812057863, "learning_rate": 9.197386093392627e-06, "loss": 0.6676, "step": 2031 }, { "epoch": 0.21, "grad_norm": 2.2781326975945, "learning_rate": 9.196487282224422e-06, "loss": 0.614, "step": 2032 }, { "epoch": 0.21, "grad_norm": 1.450444610323633, "learning_rate": 9.195588012038047e-06, "loss": 0.683, "step": 2033 }, { "epoch": 0.21, "grad_norm": 1.4162102798266571, "learning_rate": 9.194688282931865e-06, "loss": 0.7087, "step": 2034 }, { "epoch": 0.21, "grad_norm": 1.388209227158477, "learning_rate": 9.19378809500429e-06, "loss": 0.746, "step": 2035 }, { "epoch": 0.21, "grad_norm": 1.4961650095893422, "learning_rate": 9.192887448353786e-06, "loss": 0.6977, "step": 2036 }, { "epoch": 0.21, "grad_norm": 1.7793391068125737, "learning_rate": 9.191986343078868e-06, "loss": 0.7442, "step": 2037 }, { "epoch": 0.21, "grad_norm": 1.476001172804284, "learning_rate": 9.1910847792781e-06, "loss": 0.7264, "step": 2038 }, { "epoch": 0.21, "grad_norm": 1.409628363799354, "learning_rate": 9.190182757050094e-06, "loss": 0.6962, "step": 2039 }, { "epoch": 0.21, "grad_norm": 1.4366015274262152, "learning_rate": 9.189280276493518e-06, "loss": 0.5943, "step": 2040 }, { "epoch": 0.21, "grad_norm": 1.3931907904400465, "learning_rate": 9.188377337707083e-06, "loss": 0.6804, "step": 2041 }, { "epoch": 0.21, "grad_norm": 1.4863477043444868, "learning_rate": 9.187473940789558e-06, "loss": 0.6526, "step": 2042 }, { "epoch": 0.21, "grad_norm": 1.6159316748423838, "learning_rate": 9.186570085839754e-06, "loss": 0.7667, "step": 2043 }, { "epoch": 0.21, "grad_norm": 1.511428688064586, "learning_rate": 9.18566577295654e-06, "loss": 0.6667, "step": 2044 }, { "epoch": 0.21, "grad_norm": 1.3519045744253744, "learning_rate": 9.184761002238827e-06, "loss": 0.6934, "step": 2045 }, { "epoch": 0.21, "grad_norm": 1.501876595161675, "learning_rate": 9.183855773785582e-06, "loss": 0.6881, "step": 2046 }, { "epoch": 0.21, "grad_norm": 1.4832458948229008, "learning_rate": 9.182950087695821e-06, "loss": 0.6843, "step": 2047 }, { "epoch": 0.21, "grad_norm": 1.4578912691867807, "learning_rate": 9.18204394406861e-06, "loss": 0.6102, "step": 2048 }, { "epoch": 0.21, "grad_norm": 1.4272899773314363, "learning_rate": 9.181137343003062e-06, "loss": 0.6633, "step": 2049 }, { "epoch": 0.21, "grad_norm": 1.4889026466467141, "learning_rate": 9.180230284598343e-06, "loss": 0.7638, "step": 2050 }, { "epoch": 0.21, "grad_norm": 1.2946430564180211, "learning_rate": 9.179322768953671e-06, "loss": 0.6182, "step": 2051 }, { "epoch": 0.21, "grad_norm": 1.7506834346046733, "learning_rate": 9.178414796168309e-06, "loss": 0.7046, "step": 2052 }, { "epoch": 0.21, "grad_norm": 1.563864984676347, "learning_rate": 9.177506366341571e-06, "loss": 0.7169, "step": 2053 }, { "epoch": 0.21, "grad_norm": 1.9733009785135294, "learning_rate": 9.176597479572826e-06, "loss": 0.6731, "step": 2054 }, { "epoch": 0.21, "grad_norm": 1.520852579662777, "learning_rate": 9.175688135961487e-06, "loss": 0.7691, "step": 2055 }, { "epoch": 0.21, "grad_norm": 1.3144564528151446, "learning_rate": 9.174778335607019e-06, "loss": 0.7088, "step": 2056 }, { "epoch": 0.21, "grad_norm": 1.53351610352697, "learning_rate": 9.173868078608941e-06, "loss": 0.7161, "step": 2057 }, { "epoch": 0.21, "grad_norm": 1.3818222467326897, "learning_rate": 9.172957365066815e-06, "loss": 0.6424, "step": 2058 }, { "epoch": 0.21, "grad_norm": 1.6458712089769427, "learning_rate": 9.172046195080255e-06, "loss": 0.6575, "step": 2059 }, { "epoch": 0.21, "grad_norm": 1.674258140694923, "learning_rate": 9.17113456874893e-06, "loss": 0.6552, "step": 2060 }, { "epoch": 0.21, "grad_norm": 1.4720337401377181, "learning_rate": 9.170222486172554e-06, "loss": 0.6615, "step": 2061 }, { "epoch": 0.21, "grad_norm": 1.574451120274815, "learning_rate": 9.16930994745089e-06, "loss": 0.686, "step": 2062 }, { "epoch": 0.21, "grad_norm": 1.4607619608239895, "learning_rate": 9.168396952683754e-06, "loss": 0.5663, "step": 2063 }, { "epoch": 0.21, "grad_norm": 1.5680653675768996, "learning_rate": 9.16748350197101e-06, "loss": 0.6825, "step": 2064 }, { "epoch": 0.21, "grad_norm": 1.6298826890292755, "learning_rate": 9.166569595412576e-06, "loss": 0.7267, "step": 2065 }, { "epoch": 0.21, "grad_norm": 1.565305942966309, "learning_rate": 9.165655233108413e-06, "loss": 0.5753, "step": 2066 }, { "epoch": 0.21, "grad_norm": 1.4767378645533373, "learning_rate": 9.164740415158535e-06, "loss": 0.7461, "step": 2067 }, { "epoch": 0.21, "grad_norm": 1.2787102708048121, "learning_rate": 9.163825141663008e-06, "loss": 0.6211, "step": 2068 }, { "epoch": 0.21, "grad_norm": 1.5338875765333417, "learning_rate": 9.162909412721946e-06, "loss": 0.7911, "step": 2069 }, { "epoch": 0.21, "grad_norm": 1.5876347235308899, "learning_rate": 9.161993228435513e-06, "loss": 0.6721, "step": 2070 }, { "epoch": 0.21, "grad_norm": 1.4603192191328558, "learning_rate": 9.161076588903921e-06, "loss": 0.6755, "step": 2071 }, { "epoch": 0.21, "grad_norm": 1.3967866441501164, "learning_rate": 9.160159494227435e-06, "loss": 0.6899, "step": 2072 }, { "epoch": 0.21, "grad_norm": 1.3598601158530204, "learning_rate": 9.159241944506366e-06, "loss": 0.7037, "step": 2073 }, { "epoch": 0.21, "grad_norm": 1.3951604773133826, "learning_rate": 9.15832393984108e-06, "loss": 0.7155, "step": 2074 }, { "epoch": 0.21, "grad_norm": 1.3498808056805922, "learning_rate": 9.157405480331988e-06, "loss": 0.7866, "step": 2075 }, { "epoch": 0.21, "grad_norm": 1.7324285158503163, "learning_rate": 9.156486566079552e-06, "loss": 0.7667, "step": 2076 }, { "epoch": 0.21, "grad_norm": 1.7422578657201346, "learning_rate": 9.155567197184287e-06, "loss": 0.7512, "step": 2077 }, { "epoch": 0.21, "grad_norm": 1.3618668676045873, "learning_rate": 9.154647373746753e-06, "loss": 0.6458, "step": 2078 }, { "epoch": 0.21, "grad_norm": 1.36409672485177, "learning_rate": 9.153727095867559e-06, "loss": 0.6787, "step": 2079 }, { "epoch": 0.21, "grad_norm": 1.4101146788048091, "learning_rate": 9.152806363647371e-06, "loss": 0.7276, "step": 2080 }, { "epoch": 0.21, "grad_norm": 1.478197075386133, "learning_rate": 9.151885177186899e-06, "loss": 0.7527, "step": 2081 }, { "epoch": 0.21, "grad_norm": 1.2195211805833372, "learning_rate": 9.150963536586902e-06, "loss": 0.5494, "step": 2082 }, { "epoch": 0.21, "grad_norm": 1.5783391863381344, "learning_rate": 9.150041441948193e-06, "loss": 0.6734, "step": 2083 }, { "epoch": 0.21, "grad_norm": 1.4243440707946062, "learning_rate": 9.149118893371627e-06, "loss": 0.5424, "step": 2084 }, { "epoch": 0.21, "grad_norm": 1.5492983345261044, "learning_rate": 9.14819589095812e-06, "loss": 0.7634, "step": 2085 }, { "epoch": 0.21, "grad_norm": 1.733283044631517, "learning_rate": 9.147272434808627e-06, "loss": 0.7478, "step": 2086 }, { "epoch": 0.21, "grad_norm": 1.224483057081978, "learning_rate": 9.14634852502416e-06, "loss": 0.6193, "step": 2087 }, { "epoch": 0.21, "grad_norm": 1.3117300688114049, "learning_rate": 9.145424161705777e-06, "loss": 0.5871, "step": 2088 }, { "epoch": 0.21, "grad_norm": 1.249316148785036, "learning_rate": 9.144499344954585e-06, "loss": 0.6509, "step": 2089 }, { "epoch": 0.21, "grad_norm": 1.4590738184892864, "learning_rate": 9.143574074871743e-06, "loss": 0.6837, "step": 2090 }, { "epoch": 0.21, "grad_norm": 1.5186659055504985, "learning_rate": 9.142648351558459e-06, "loss": 0.6821, "step": 2091 }, { "epoch": 0.21, "grad_norm": 1.4953912148930697, "learning_rate": 9.141722175115987e-06, "loss": 0.6993, "step": 2092 }, { "epoch": 0.21, "grad_norm": 1.302807926894604, "learning_rate": 9.140795545645637e-06, "loss": 0.7212, "step": 2093 }, { "epoch": 0.21, "grad_norm": 1.3478852909566545, "learning_rate": 9.139868463248763e-06, "loss": 0.6516, "step": 2094 }, { "epoch": 0.21, "grad_norm": 1.3479710072190145, "learning_rate": 9.138940928026772e-06, "loss": 0.7585, "step": 2095 }, { "epoch": 0.21, "grad_norm": 1.5955168252925254, "learning_rate": 9.138012940081118e-06, "loss": 0.673, "step": 2096 }, { "epoch": 0.21, "grad_norm": 1.4522996594242072, "learning_rate": 9.137084499513307e-06, "loss": 0.6867, "step": 2097 }, { "epoch": 0.21, "grad_norm": 1.4389671875171774, "learning_rate": 9.136155606424893e-06, "loss": 0.6738, "step": 2098 }, { "epoch": 0.21, "grad_norm": 1.9323951060756122, "learning_rate": 9.135226260917477e-06, "loss": 0.7285, "step": 2099 }, { "epoch": 0.21, "grad_norm": 1.3969122210391622, "learning_rate": 9.134296463092718e-06, "loss": 0.6887, "step": 2100 }, { "epoch": 0.21, "grad_norm": 1.5421193334975913, "learning_rate": 9.133366213052313e-06, "loss": 0.7232, "step": 2101 }, { "epoch": 0.21, "grad_norm": 1.4629385573849785, "learning_rate": 9.132435510898017e-06, "loss": 0.6632, "step": 2102 }, { "epoch": 0.21, "grad_norm": 1.473312357859702, "learning_rate": 9.131504356731632e-06, "loss": 0.7322, "step": 2103 }, { "epoch": 0.21, "grad_norm": 1.641659060743907, "learning_rate": 9.130572750655008e-06, "loss": 0.6309, "step": 2104 }, { "epoch": 0.21, "grad_norm": 1.6124432085749913, "learning_rate": 9.129640692770046e-06, "loss": 0.7325, "step": 2105 }, { "epoch": 0.22, "grad_norm": 1.701733015444287, "learning_rate": 9.128708183178694e-06, "loss": 0.8065, "step": 2106 }, { "epoch": 0.22, "grad_norm": 1.629076438583958, "learning_rate": 9.127775221982954e-06, "loss": 0.783, "step": 2107 }, { "epoch": 0.22, "grad_norm": 1.320948215717889, "learning_rate": 9.126841809284874e-06, "loss": 0.6306, "step": 2108 }, { "epoch": 0.22, "grad_norm": 1.3804410169116166, "learning_rate": 9.12590794518655e-06, "loss": 0.6521, "step": 2109 }, { "epoch": 0.22, "grad_norm": 1.5823984479342792, "learning_rate": 9.124973629790132e-06, "loss": 0.6614, "step": 2110 }, { "epoch": 0.22, "grad_norm": 1.5241292661132009, "learning_rate": 9.124038863197817e-06, "loss": 0.7147, "step": 2111 }, { "epoch": 0.22, "grad_norm": 1.6768797004312712, "learning_rate": 9.12310364551185e-06, "loss": 0.6753, "step": 2112 }, { "epoch": 0.22, "grad_norm": 1.5346030230285908, "learning_rate": 9.122167976834526e-06, "loss": 0.7073, "step": 2113 }, { "epoch": 0.22, "grad_norm": 1.3514137478515698, "learning_rate": 9.121231857268192e-06, "loss": 0.7169, "step": 2114 }, { "epoch": 0.22, "grad_norm": 1.2650918509273468, "learning_rate": 9.12029528691524e-06, "loss": 0.6152, "step": 2115 }, { "epoch": 0.22, "grad_norm": 1.4186780987609715, "learning_rate": 9.119358265878114e-06, "loss": 0.5607, "step": 2116 }, { "epoch": 0.22, "grad_norm": 1.4372811671733747, "learning_rate": 9.118420794259307e-06, "loss": 0.6507, "step": 2117 }, { "epoch": 0.22, "grad_norm": 1.629752063755341, "learning_rate": 9.117482872161362e-06, "loss": 0.7032, "step": 2118 }, { "epoch": 0.22, "grad_norm": 1.4812112202523964, "learning_rate": 9.116544499686867e-06, "loss": 0.644, "step": 2119 }, { "epoch": 0.22, "grad_norm": 1.4843297565794071, "learning_rate": 9.115605676938469e-06, "loss": 0.7661, "step": 2120 }, { "epoch": 0.22, "grad_norm": 1.327584067695179, "learning_rate": 9.114666404018852e-06, "loss": 0.6488, "step": 2121 }, { "epoch": 0.22, "grad_norm": 1.3279313844544147, "learning_rate": 9.113726681030756e-06, "loss": 0.6315, "step": 2122 }, { "epoch": 0.22, "grad_norm": 1.6257030531695784, "learning_rate": 9.112786508076972e-06, "loss": 0.7083, "step": 2123 }, { "epoch": 0.22, "grad_norm": 1.3504774223689011, "learning_rate": 9.111845885260336e-06, "loss": 0.5824, "step": 2124 }, { "epoch": 0.22, "grad_norm": 1.6596123705070098, "learning_rate": 9.110904812683736e-06, "loss": 0.7543, "step": 2125 }, { "epoch": 0.22, "grad_norm": 1.5746178926502405, "learning_rate": 9.109963290450103e-06, "loss": 0.6939, "step": 2126 }, { "epoch": 0.22, "grad_norm": 1.4774029373816278, "learning_rate": 9.109021318662429e-06, "loss": 0.7141, "step": 2127 }, { "epoch": 0.22, "grad_norm": 1.443058817215059, "learning_rate": 9.108078897423744e-06, "loss": 0.6961, "step": 2128 }, { "epoch": 0.22, "grad_norm": 1.5909217418003023, "learning_rate": 9.107136026837132e-06, "loss": 0.8233, "step": 2129 }, { "epoch": 0.22, "grad_norm": 1.4318294874523034, "learning_rate": 9.106192707005727e-06, "loss": 0.6871, "step": 2130 }, { "epoch": 0.22, "grad_norm": 1.4382127023227231, "learning_rate": 9.10524893803271e-06, "loss": 0.7203, "step": 2131 }, { "epoch": 0.22, "grad_norm": 1.5933990295754612, "learning_rate": 9.10430472002131e-06, "loss": 0.673, "step": 2132 }, { "epoch": 0.22, "grad_norm": 1.49490875647206, "learning_rate": 9.103360053074812e-06, "loss": 0.6352, "step": 2133 }, { "epoch": 0.22, "grad_norm": 3.297624747245061, "learning_rate": 9.102414937296542e-06, "loss": 0.6933, "step": 2134 }, { "epoch": 0.22, "grad_norm": 1.4539574159200659, "learning_rate": 9.101469372789875e-06, "loss": 0.684, "step": 2135 }, { "epoch": 0.22, "grad_norm": 1.2489659927457335, "learning_rate": 9.100523359658245e-06, "loss": 0.664, "step": 2136 }, { "epoch": 0.22, "grad_norm": 1.5321255458568594, "learning_rate": 9.099576898005121e-06, "loss": 0.7133, "step": 2137 }, { "epoch": 0.22, "grad_norm": 1.3713926136707535, "learning_rate": 9.098629987934036e-06, "loss": 0.6724, "step": 2138 }, { "epoch": 0.22, "grad_norm": 1.2577442949207858, "learning_rate": 9.097682629548558e-06, "loss": 0.6045, "step": 2139 }, { "epoch": 0.22, "grad_norm": 1.518888578602032, "learning_rate": 9.096734822952314e-06, "loss": 0.7444, "step": 2140 }, { "epoch": 0.22, "grad_norm": 1.734626946662168, "learning_rate": 9.095786568248975e-06, "loss": 0.6716, "step": 2141 }, { "epoch": 0.22, "grad_norm": 1.5219924601972314, "learning_rate": 9.094837865542265e-06, "loss": 0.7604, "step": 2142 }, { "epoch": 0.22, "grad_norm": 1.339358214744254, "learning_rate": 9.093888714935952e-06, "loss": 0.6774, "step": 2143 }, { "epoch": 0.22, "grad_norm": 1.4728873467755765, "learning_rate": 9.092939116533857e-06, "loss": 0.7182, "step": 2144 }, { "epoch": 0.22, "grad_norm": 1.4942240167377947, "learning_rate": 9.091989070439846e-06, "loss": 0.7282, "step": 2145 }, { "epoch": 0.22, "grad_norm": 1.3823828415264845, "learning_rate": 9.09103857675784e-06, "loss": 0.6392, "step": 2146 }, { "epoch": 0.22, "grad_norm": 1.4897077774816219, "learning_rate": 9.090087635591803e-06, "loss": 0.638, "step": 2147 }, { "epoch": 0.22, "grad_norm": 1.4732760741452013, "learning_rate": 9.089136247045752e-06, "loss": 0.6069, "step": 2148 }, { "epoch": 0.22, "grad_norm": 1.5710287192341994, "learning_rate": 9.08818441122375e-06, "loss": 0.7697, "step": 2149 }, { "epoch": 0.22, "grad_norm": 1.3544721531913606, "learning_rate": 9.087232128229912e-06, "loss": 0.6967, "step": 2150 }, { "epoch": 0.22, "grad_norm": 1.5420220755297798, "learning_rate": 9.086279398168397e-06, "loss": 0.6922, "step": 2151 }, { "epoch": 0.22, "grad_norm": 1.4221829090552567, "learning_rate": 9.08532622114342e-06, "loss": 0.6575, "step": 2152 }, { "epoch": 0.22, "grad_norm": 1.5437669834652719, "learning_rate": 9.084372597259237e-06, "loss": 0.698, "step": 2153 }, { "epoch": 0.22, "grad_norm": 1.2173881935329862, "learning_rate": 9.083418526620161e-06, "loss": 0.5875, "step": 2154 }, { "epoch": 0.22, "grad_norm": 1.4294249587597798, "learning_rate": 9.082464009330545e-06, "loss": 0.6308, "step": 2155 }, { "epoch": 0.22, "grad_norm": 1.489457043804161, "learning_rate": 9.081509045494801e-06, "loss": 0.8007, "step": 2156 }, { "epoch": 0.22, "grad_norm": 1.3276265738046291, "learning_rate": 9.080553635217379e-06, "loss": 0.5953, "step": 2157 }, { "epoch": 0.22, "grad_norm": 1.2846117754120188, "learning_rate": 9.079597778602787e-06, "loss": 0.677, "step": 2158 }, { "epoch": 0.22, "grad_norm": 1.5298828501423818, "learning_rate": 9.078641475755576e-06, "loss": 0.6261, "step": 2159 }, { "epoch": 0.22, "grad_norm": 1.5324192910785492, "learning_rate": 9.07768472678035e-06, "loss": 0.7361, "step": 2160 }, { "epoch": 0.22, "grad_norm": 1.459270002587596, "learning_rate": 9.076727531781756e-06, "loss": 0.8019, "step": 2161 }, { "epoch": 0.22, "grad_norm": 1.397825550435603, "learning_rate": 9.075769890864496e-06, "loss": 0.6958, "step": 2162 }, { "epoch": 0.22, "grad_norm": 1.533397255877406, "learning_rate": 9.074811804133319e-06, "loss": 0.7054, "step": 2163 }, { "epoch": 0.22, "grad_norm": 1.3705217260499938, "learning_rate": 9.073853271693022e-06, "loss": 0.6113, "step": 2164 }, { "epoch": 0.22, "grad_norm": 1.2851058206672177, "learning_rate": 9.072894293648448e-06, "loss": 0.6413, "step": 2165 }, { "epoch": 0.22, "grad_norm": 1.4456489808138708, "learning_rate": 9.071934870104493e-06, "loss": 0.7875, "step": 2166 }, { "epoch": 0.22, "grad_norm": 1.3276635984982605, "learning_rate": 9.070975001166102e-06, "loss": 0.7021, "step": 2167 }, { "epoch": 0.22, "grad_norm": 1.3036067444286323, "learning_rate": 9.070014686938265e-06, "loss": 0.5665, "step": 2168 }, { "epoch": 0.22, "grad_norm": 1.3237703111463675, "learning_rate": 9.069053927526021e-06, "loss": 0.6042, "step": 2169 }, { "epoch": 0.22, "grad_norm": 1.3423857624841409, "learning_rate": 9.068092723034463e-06, "loss": 0.5155, "step": 2170 }, { "epoch": 0.22, "grad_norm": 1.2570911666218558, "learning_rate": 9.067131073568726e-06, "loss": 0.6017, "step": 2171 }, { "epoch": 0.22, "grad_norm": 1.292421921466412, "learning_rate": 9.066168979233998e-06, "loss": 0.5854, "step": 2172 }, { "epoch": 0.22, "grad_norm": 1.5773666534794566, "learning_rate": 9.065206440135516e-06, "loss": 0.646, "step": 2173 }, { "epoch": 0.22, "grad_norm": 3.024232035999044, "learning_rate": 9.064243456378562e-06, "loss": 0.6779, "step": 2174 }, { "epoch": 0.22, "grad_norm": 1.3827773065562636, "learning_rate": 9.063280028068467e-06, "loss": 0.5711, "step": 2175 }, { "epoch": 0.22, "grad_norm": 1.4605349143836563, "learning_rate": 9.062316155310616e-06, "loss": 0.7491, "step": 2176 }, { "epoch": 0.22, "grad_norm": 1.4913889322100151, "learning_rate": 9.061351838210435e-06, "loss": 0.5961, "step": 2177 }, { "epoch": 0.22, "grad_norm": 1.56392655558746, "learning_rate": 9.060387076873405e-06, "loss": 0.6975, "step": 2178 }, { "epoch": 0.22, "grad_norm": 1.407559579422447, "learning_rate": 9.059421871405054e-06, "loss": 0.7054, "step": 2179 }, { "epoch": 0.22, "grad_norm": 1.537371049002153, "learning_rate": 9.058456221910956e-06, "loss": 0.6895, "step": 2180 }, { "epoch": 0.22, "grad_norm": 1.3237987545776888, "learning_rate": 9.057490128496735e-06, "loss": 0.6411, "step": 2181 }, { "epoch": 0.22, "grad_norm": 1.4494289748898879, "learning_rate": 9.056523591268064e-06, "loss": 0.7321, "step": 2182 }, { "epoch": 0.22, "grad_norm": 1.4006605264821603, "learning_rate": 9.055556610330664e-06, "loss": 0.6837, "step": 2183 }, { "epoch": 0.22, "grad_norm": 1.4378231240215518, "learning_rate": 9.054589185790305e-06, "loss": 0.6268, "step": 2184 }, { "epoch": 0.22, "grad_norm": 1.5742011669498925, "learning_rate": 9.053621317752807e-06, "loss": 0.6992, "step": 2185 }, { "epoch": 0.22, "grad_norm": 1.437022501663322, "learning_rate": 9.052653006324036e-06, "loss": 0.7527, "step": 2186 }, { "epoch": 0.22, "grad_norm": 1.4584222513775147, "learning_rate": 9.051684251609907e-06, "loss": 0.6217, "step": 2187 }, { "epoch": 0.22, "grad_norm": 1.4852499496764904, "learning_rate": 9.050715053716384e-06, "loss": 0.7102, "step": 2188 }, { "epoch": 0.22, "grad_norm": 1.710091694330564, "learning_rate": 9.049745412749478e-06, "loss": 0.681, "step": 2189 }, { "epoch": 0.22, "grad_norm": 1.4552123351134938, "learning_rate": 9.048775328815251e-06, "loss": 0.6889, "step": 2190 }, { "epoch": 0.22, "grad_norm": 1.4418119076991607, "learning_rate": 9.047804802019813e-06, "loss": 0.641, "step": 2191 }, { "epoch": 0.22, "grad_norm": 1.3972706949525113, "learning_rate": 9.04683383246932e-06, "loss": 0.7385, "step": 2192 }, { "epoch": 0.22, "grad_norm": 1.5363340683680469, "learning_rate": 9.04586242026998e-06, "loss": 0.7536, "step": 2193 }, { "epoch": 0.22, "grad_norm": 1.5445740999790811, "learning_rate": 9.044890565528045e-06, "loss": 0.7524, "step": 2194 }, { "epoch": 0.22, "grad_norm": 1.8266607268397554, "learning_rate": 9.043918268349822e-06, "loss": 0.665, "step": 2195 }, { "epoch": 0.22, "grad_norm": 1.3078999976718013, "learning_rate": 9.042945528841657e-06, "loss": 0.624, "step": 2196 }, { "epoch": 0.22, "grad_norm": 1.5765675086976638, "learning_rate": 9.041972347109956e-06, "loss": 0.9013, "step": 2197 }, { "epoch": 0.22, "grad_norm": 1.3627692574697408, "learning_rate": 9.04099872326116e-06, "loss": 0.7155, "step": 2198 }, { "epoch": 0.22, "grad_norm": 1.9108109483671678, "learning_rate": 9.040024657401773e-06, "loss": 0.6585, "step": 2199 }, { "epoch": 0.22, "grad_norm": 1.3391018971870627, "learning_rate": 9.039050149638334e-06, "loss": 0.7093, "step": 2200 }, { "epoch": 0.22, "grad_norm": 1.3912408423889362, "learning_rate": 9.038075200077438e-06, "loss": 0.7715, "step": 2201 }, { "epoch": 0.22, "grad_norm": 1.4226330804505771, "learning_rate": 9.037099808825728e-06, "loss": 0.7464, "step": 2202 }, { "epoch": 0.22, "grad_norm": 1.4968883007515308, "learning_rate": 9.036123975989893e-06, "loss": 0.6573, "step": 2203 }, { "epoch": 0.23, "grad_norm": 1.382468545290005, "learning_rate": 9.035147701676667e-06, "loss": 0.6308, "step": 2204 }, { "epoch": 0.23, "grad_norm": 1.5190628931738082, "learning_rate": 9.034170985992843e-06, "loss": 0.6729, "step": 2205 }, { "epoch": 0.23, "grad_norm": 1.8183359028078554, "learning_rate": 9.03319382904525e-06, "loss": 0.6753, "step": 2206 }, { "epoch": 0.23, "grad_norm": 1.3726709045806282, "learning_rate": 9.032216230940775e-06, "loss": 0.8453, "step": 2207 }, { "epoch": 0.23, "grad_norm": 1.5661571931545653, "learning_rate": 9.031238191786348e-06, "loss": 0.6153, "step": 2208 }, { "epoch": 0.23, "grad_norm": 1.2948674065300876, "learning_rate": 9.030259711688948e-06, "loss": 0.647, "step": 2209 }, { "epoch": 0.23, "grad_norm": 1.4266456740425206, "learning_rate": 9.029280790755601e-06, "loss": 0.5958, "step": 2210 }, { "epoch": 0.23, "grad_norm": 1.4183825108725046, "learning_rate": 9.028301429093387e-06, "loss": 0.583, "step": 2211 }, { "epoch": 0.23, "grad_norm": 1.3686087420945177, "learning_rate": 9.027321626809426e-06, "loss": 0.6435, "step": 2212 }, { "epoch": 0.23, "grad_norm": 1.4162097950005987, "learning_rate": 9.026341384010892e-06, "loss": 0.7331, "step": 2213 }, { "epoch": 0.23, "grad_norm": 7.306791315977959, "learning_rate": 9.025360700805005e-06, "loss": 0.649, "step": 2214 }, { "epoch": 0.23, "grad_norm": 1.3226061758679648, "learning_rate": 9.024379577299034e-06, "loss": 0.6191, "step": 2215 }, { "epoch": 0.23, "grad_norm": 1.3727791700288419, "learning_rate": 9.023398013600297e-06, "loss": 0.6155, "step": 2216 }, { "epoch": 0.23, "grad_norm": 1.3227863751205897, "learning_rate": 9.022416009816155e-06, "loss": 0.6732, "step": 2217 }, { "epoch": 0.23, "grad_norm": 1.633118321466661, "learning_rate": 9.021433566054026e-06, "loss": 0.7437, "step": 2218 }, { "epoch": 0.23, "grad_norm": 1.462814206109422, "learning_rate": 9.020450682421368e-06, "loss": 0.746, "step": 2219 }, { "epoch": 0.23, "grad_norm": 1.5201329812001343, "learning_rate": 9.019467359025693e-06, "loss": 0.7062, "step": 2220 }, { "epoch": 0.23, "grad_norm": 1.2814846468998216, "learning_rate": 9.018483595974554e-06, "loss": 0.6173, "step": 2221 }, { "epoch": 0.23, "grad_norm": 1.4034087924610312, "learning_rate": 9.01749939337556e-06, "loss": 0.7359, "step": 2222 }, { "epoch": 0.23, "grad_norm": 1.2994170896277413, "learning_rate": 9.016514751336365e-06, "loss": 0.6463, "step": 2223 }, { "epoch": 0.23, "grad_norm": 1.2800720536448826, "learning_rate": 9.015529669964668e-06, "loss": 0.6287, "step": 2224 }, { "epoch": 0.23, "grad_norm": 1.3775051792556774, "learning_rate": 9.014544149368221e-06, "loss": 0.7051, "step": 2225 }, { "epoch": 0.23, "grad_norm": 1.6707954796060565, "learning_rate": 9.013558189654819e-06, "loss": 0.6966, "step": 2226 }, { "epoch": 0.23, "grad_norm": 1.2871972809098993, "learning_rate": 9.01257179093231e-06, "loss": 0.6084, "step": 2227 }, { "epoch": 0.23, "grad_norm": 1.414934068969456, "learning_rate": 9.011584953308589e-06, "loss": 0.6712, "step": 2228 }, { "epoch": 0.23, "grad_norm": 1.3930025788400726, "learning_rate": 9.010597676891596e-06, "loss": 0.644, "step": 2229 }, { "epoch": 0.23, "grad_norm": 1.3576745513430835, "learning_rate": 9.009609961789319e-06, "loss": 0.6426, "step": 2230 }, { "epoch": 0.23, "grad_norm": 1.6168720345190468, "learning_rate": 9.008621808109799e-06, "loss": 0.627, "step": 2231 }, { "epoch": 0.23, "grad_norm": 1.484712003106606, "learning_rate": 9.00763321596112e-06, "loss": 0.694, "step": 2232 }, { "epoch": 0.23, "grad_norm": 1.5310590985828596, "learning_rate": 9.006644185451416e-06, "loss": 0.6798, "step": 2233 }, { "epoch": 0.23, "grad_norm": 1.609435824238784, "learning_rate": 9.005654716688871e-06, "loss": 0.5845, "step": 2234 }, { "epoch": 0.23, "grad_norm": 1.560474831352397, "learning_rate": 9.00466480978171e-06, "loss": 0.6931, "step": 2235 }, { "epoch": 0.23, "grad_norm": 1.6005134106174013, "learning_rate": 9.003674464838214e-06, "loss": 0.7638, "step": 2236 }, { "epoch": 0.23, "grad_norm": 1.2487121548873543, "learning_rate": 9.002683681966708e-06, "loss": 0.6731, "step": 2237 }, { "epoch": 0.23, "grad_norm": 1.5018950132302407, "learning_rate": 9.001692461275565e-06, "loss": 0.7792, "step": 2238 }, { "epoch": 0.23, "grad_norm": 1.5973080415994294, "learning_rate": 9.000700802873205e-06, "loss": 0.6439, "step": 2239 }, { "epoch": 0.23, "grad_norm": 1.5283702338143754, "learning_rate": 8.999708706868098e-06, "loss": 0.6626, "step": 2240 }, { "epoch": 0.23, "grad_norm": 1.5923617355874755, "learning_rate": 8.998716173368762e-06, "loss": 0.6927, "step": 2241 }, { "epoch": 0.23, "grad_norm": 1.418827875517266, "learning_rate": 8.99772320248376e-06, "loss": 0.7665, "step": 2242 }, { "epoch": 0.23, "grad_norm": 1.5499178401443388, "learning_rate": 8.996729794321707e-06, "loss": 0.7052, "step": 2243 }, { "epoch": 0.23, "grad_norm": 1.3909325536721704, "learning_rate": 8.99573594899126e-06, "loss": 0.6413, "step": 2244 }, { "epoch": 0.23, "grad_norm": 1.4017176339798347, "learning_rate": 8.99474166660113e-06, "loss": 0.6551, "step": 2245 }, { "epoch": 0.23, "grad_norm": 1.8094732975127292, "learning_rate": 8.993746947260073e-06, "loss": 0.5885, "step": 2246 }, { "epoch": 0.23, "grad_norm": 1.6498493397861724, "learning_rate": 8.992751791076893e-06, "loss": 0.7226, "step": 2247 }, { "epoch": 0.23, "grad_norm": 1.3815036955279771, "learning_rate": 8.99175619816044e-06, "loss": 0.7252, "step": 2248 }, { "epoch": 0.23, "grad_norm": 1.4243440603664363, "learning_rate": 8.990760168619616e-06, "loss": 0.7313, "step": 2249 }, { "epoch": 0.23, "grad_norm": 1.5257054668716086, "learning_rate": 8.989763702563366e-06, "loss": 0.6906, "step": 2250 }, { "epoch": 0.23, "grad_norm": 1.5558962208663738, "learning_rate": 8.988766800100686e-06, "loss": 0.7408, "step": 2251 }, { "epoch": 0.23, "grad_norm": 1.3032855190410089, "learning_rate": 8.987769461340617e-06, "loss": 0.6023, "step": 2252 }, { "epoch": 0.23, "grad_norm": 1.3302050768967777, "learning_rate": 8.986771686392253e-06, "loss": 0.6837, "step": 2253 }, { "epoch": 0.23, "grad_norm": 1.3931800160003067, "learning_rate": 8.98577347536473e-06, "loss": 0.6539, "step": 2254 }, { "epoch": 0.23, "grad_norm": 1.4714176105851318, "learning_rate": 8.984774828367233e-06, "loss": 0.6106, "step": 2255 }, { "epoch": 0.23, "grad_norm": 1.3856357279291314, "learning_rate": 8.983775745508996e-06, "loss": 0.7583, "step": 2256 }, { "epoch": 0.23, "grad_norm": 1.4699869807636028, "learning_rate": 8.9827762268993e-06, "loss": 0.7701, "step": 2257 }, { "epoch": 0.23, "grad_norm": 1.452096817810861, "learning_rate": 8.981776272647478e-06, "loss": 0.5466, "step": 2258 }, { "epoch": 0.23, "grad_norm": 1.4036749580880086, "learning_rate": 8.9807758828629e-06, "loss": 0.6596, "step": 2259 }, { "epoch": 0.23, "grad_norm": 1.550085788006423, "learning_rate": 8.979775057654993e-06, "loss": 0.7091, "step": 2260 }, { "epoch": 0.23, "grad_norm": 1.2360947145725532, "learning_rate": 8.97877379713323e-06, "loss": 0.6277, "step": 2261 }, { "epoch": 0.23, "grad_norm": 1.4973058521675637, "learning_rate": 8.977772101407132e-06, "loss": 0.6083, "step": 2262 }, { "epoch": 0.23, "grad_norm": 1.5917879908184143, "learning_rate": 8.97676997058626e-06, "loss": 0.759, "step": 2263 }, { "epoch": 0.23, "grad_norm": 1.2324801518493658, "learning_rate": 8.975767404780233e-06, "loss": 0.6152, "step": 2264 }, { "epoch": 0.23, "grad_norm": 1.4270919624091079, "learning_rate": 8.974764404098713e-06, "loss": 0.724, "step": 2265 }, { "epoch": 0.23, "grad_norm": 1.3627750984260154, "learning_rate": 8.973760968651408e-06, "loss": 0.675, "step": 2266 }, { "epoch": 0.23, "grad_norm": 1.347417390321762, "learning_rate": 8.972757098548074e-06, "loss": 0.6604, "step": 2267 }, { "epoch": 0.23, "grad_norm": 1.3957291428989675, "learning_rate": 8.971752793898523e-06, "loss": 0.872, "step": 2268 }, { "epoch": 0.23, "grad_norm": 1.6546507399960548, "learning_rate": 8.9707480548126e-06, "loss": 0.757, "step": 2269 }, { "epoch": 0.23, "grad_norm": 1.471940164474822, "learning_rate": 8.969742881400206e-06, "loss": 0.5481, "step": 2270 }, { "epoch": 0.23, "grad_norm": 1.4064486467037844, "learning_rate": 8.968737273771291e-06, "loss": 0.7268, "step": 2271 }, { "epoch": 0.23, "grad_norm": 1.4641946529319272, "learning_rate": 8.967731232035848e-06, "loss": 0.6643, "step": 2272 }, { "epoch": 0.23, "grad_norm": 1.4614142920597868, "learning_rate": 8.96672475630392e-06, "loss": 0.7427, "step": 2273 }, { "epoch": 0.23, "grad_norm": 1.3859321329546652, "learning_rate": 8.965717846685597e-06, "loss": 0.7327, "step": 2274 }, { "epoch": 0.23, "grad_norm": 1.426772798805651, "learning_rate": 8.964710503291017e-06, "loss": 0.6571, "step": 2275 }, { "epoch": 0.23, "grad_norm": 1.5042584943548765, "learning_rate": 8.963702726230363e-06, "loss": 0.6991, "step": 2276 }, { "epoch": 0.23, "grad_norm": 1.756021294997881, "learning_rate": 8.96269451561387e-06, "loss": 0.6026, "step": 2277 }, { "epoch": 0.23, "grad_norm": 1.4834358094654563, "learning_rate": 8.961685871551813e-06, "loss": 0.7563, "step": 2278 }, { "epoch": 0.23, "grad_norm": 1.5154157438731817, "learning_rate": 8.960676794154523e-06, "loss": 0.7499, "step": 2279 }, { "epoch": 0.23, "grad_norm": 1.4927658137391855, "learning_rate": 8.959667283532373e-06, "loss": 0.6419, "step": 2280 }, { "epoch": 0.23, "grad_norm": 1.5805299909286301, "learning_rate": 8.958657339795785e-06, "loss": 0.6327, "step": 2281 }, { "epoch": 0.23, "grad_norm": 1.413829247023115, "learning_rate": 8.957646963055227e-06, "loss": 0.6527, "step": 2282 }, { "epoch": 0.23, "grad_norm": 1.4664243408400321, "learning_rate": 8.95663615342122e-06, "loss": 0.7269, "step": 2283 }, { "epoch": 0.23, "grad_norm": 1.2811680957713578, "learning_rate": 8.955624911004321e-06, "loss": 0.5953, "step": 2284 }, { "epoch": 0.23, "grad_norm": 1.2064779109490213, "learning_rate": 8.954613235915147e-06, "loss": 0.6405, "step": 2285 }, { "epoch": 0.23, "grad_norm": 1.465753063782494, "learning_rate": 8.953601128264352e-06, "loss": 0.6702, "step": 2286 }, { "epoch": 0.23, "grad_norm": 1.4165271618750728, "learning_rate": 8.952588588162647e-06, "loss": 0.6944, "step": 2287 }, { "epoch": 0.23, "grad_norm": 1.4727068649254944, "learning_rate": 8.95157561572078e-06, "loss": 0.8091, "step": 2288 }, { "epoch": 0.23, "grad_norm": 1.5165285003686677, "learning_rate": 8.950562211049556e-06, "loss": 0.7166, "step": 2289 }, { "epoch": 0.23, "grad_norm": 1.5549789957915625, "learning_rate": 8.94954837425982e-06, "loss": 0.6609, "step": 2290 }, { "epoch": 0.23, "grad_norm": 1.3747765796895706, "learning_rate": 8.948534105462467e-06, "loss": 0.6571, "step": 2291 }, { "epoch": 0.23, "grad_norm": 1.4998746154341522, "learning_rate": 8.94751940476844e-06, "loss": 0.734, "step": 2292 }, { "epoch": 0.23, "grad_norm": 1.3657499226773728, "learning_rate": 8.94650427228873e-06, "loss": 0.6231, "step": 2293 }, { "epoch": 0.23, "grad_norm": 1.3201329804719313, "learning_rate": 8.94548870813437e-06, "loss": 0.6283, "step": 2294 }, { "epoch": 0.23, "grad_norm": 1.5277571951437219, "learning_rate": 8.944472712416448e-06, "loss": 0.7158, "step": 2295 }, { "epoch": 0.23, "grad_norm": 1.4548441750653525, "learning_rate": 8.943456285246092e-06, "loss": 0.6751, "step": 2296 }, { "epoch": 0.23, "grad_norm": 1.4803020087577405, "learning_rate": 8.942439426734482e-06, "loss": 0.5355, "step": 2297 }, { "epoch": 0.23, "grad_norm": 1.46395052820612, "learning_rate": 8.941422136992843e-06, "loss": 0.6353, "step": 2298 }, { "epoch": 0.23, "grad_norm": 1.5196499638151277, "learning_rate": 8.940404416132448e-06, "loss": 0.7007, "step": 2299 }, { "epoch": 0.23, "grad_norm": 1.388925214256492, "learning_rate": 8.939386264264616e-06, "loss": 0.6149, "step": 2300 }, { "epoch": 0.23, "grad_norm": 1.2686570514075013, "learning_rate": 8.938367681500717e-06, "loss": 0.6795, "step": 2301 }, { "epoch": 0.24, "grad_norm": 1.3774335149664247, "learning_rate": 8.93734866795216e-06, "loss": 0.6349, "step": 2302 }, { "epoch": 0.24, "grad_norm": 1.4553670704128248, "learning_rate": 8.936329223730411e-06, "loss": 0.6781, "step": 2303 }, { "epoch": 0.24, "grad_norm": 1.3238305563326274, "learning_rate": 8.935309348946976e-06, "loss": 0.6429, "step": 2304 }, { "epoch": 0.24, "grad_norm": 1.4874581704439058, "learning_rate": 8.93428904371341e-06, "loss": 0.6283, "step": 2305 }, { "epoch": 0.24, "grad_norm": 1.1978172205585431, "learning_rate": 8.933268308141318e-06, "loss": 0.5205, "step": 2306 }, { "epoch": 0.24, "grad_norm": 1.3027904298034698, "learning_rate": 8.932247142342348e-06, "loss": 0.6551, "step": 2307 }, { "epoch": 0.24, "grad_norm": 1.3559535863027286, "learning_rate": 8.931225546428197e-06, "loss": 0.6669, "step": 2308 }, { "epoch": 0.24, "grad_norm": 1.3698149818416252, "learning_rate": 8.930203520510607e-06, "loss": 0.6278, "step": 2309 }, { "epoch": 0.24, "grad_norm": 1.9704799721850903, "learning_rate": 8.929181064701371e-06, "loss": 0.7022, "step": 2310 }, { "epoch": 0.24, "grad_norm": 1.4812062157436732, "learning_rate": 8.928158179112325e-06, "loss": 0.6409, "step": 2311 }, { "epoch": 0.24, "grad_norm": 1.4965329573033155, "learning_rate": 8.927134863855356e-06, "loss": 0.6648, "step": 2312 }, { "epoch": 0.24, "grad_norm": 1.4308305010171904, "learning_rate": 8.926111119042396e-06, "loss": 0.6554, "step": 2313 }, { "epoch": 0.24, "grad_norm": 1.3861612997184092, "learning_rate": 8.92508694478542e-06, "loss": 0.6806, "step": 2314 }, { "epoch": 0.24, "grad_norm": 1.2778756167955485, "learning_rate": 8.924062341196456e-06, "loss": 0.6264, "step": 2315 }, { "epoch": 0.24, "grad_norm": 1.6523985220335664, "learning_rate": 8.92303730838758e-06, "loss": 0.6071, "step": 2316 }, { "epoch": 0.24, "grad_norm": 1.4578539391921046, "learning_rate": 8.922011846470904e-06, "loss": 0.7672, "step": 2317 }, { "epoch": 0.24, "grad_norm": 1.4877450711930127, "learning_rate": 8.9209859555586e-06, "loss": 0.6922, "step": 2318 }, { "epoch": 0.24, "grad_norm": 1.3745788974108688, "learning_rate": 8.919959635762883e-06, "loss": 0.582, "step": 2319 }, { "epoch": 0.24, "grad_norm": 1.5528139038474595, "learning_rate": 8.918932887196008e-06, "loss": 0.7795, "step": 2320 }, { "epoch": 0.24, "grad_norm": 1.4763762472349846, "learning_rate": 8.917905709970287e-06, "loss": 0.6399, "step": 2321 }, { "epoch": 0.24, "grad_norm": 1.5679510523397877, "learning_rate": 8.916878104198074e-06, "loss": 0.76, "step": 2322 }, { "epoch": 0.24, "grad_norm": 1.4707884252508268, "learning_rate": 8.915850069991766e-06, "loss": 0.6594, "step": 2323 }, { "epoch": 0.24, "grad_norm": 1.5185445914756213, "learning_rate": 8.914821607463816e-06, "loss": 0.716, "step": 2324 }, { "epoch": 0.24, "grad_norm": 1.448347751833592, "learning_rate": 8.913792716726712e-06, "loss": 0.6582, "step": 2325 }, { "epoch": 0.24, "grad_norm": 1.451324281201705, "learning_rate": 8.912763397893003e-06, "loss": 0.807, "step": 2326 }, { "epoch": 0.24, "grad_norm": 1.3786452719288484, "learning_rate": 8.911733651075273e-06, "loss": 0.6577, "step": 2327 }, { "epoch": 0.24, "grad_norm": 1.4431402859282458, "learning_rate": 8.91070347638616e-06, "loss": 0.7771, "step": 2328 }, { "epoch": 0.24, "grad_norm": 1.4197272141167505, "learning_rate": 8.909672873938345e-06, "loss": 0.6746, "step": 2329 }, { "epoch": 0.24, "grad_norm": 1.533240471094537, "learning_rate": 8.908641843844556e-06, "loss": 0.7001, "step": 2330 }, { "epoch": 0.24, "grad_norm": 1.4948181617272212, "learning_rate": 8.907610386217569e-06, "loss": 0.5624, "step": 2331 }, { "epoch": 0.24, "grad_norm": 1.4034541850154485, "learning_rate": 8.906578501170206e-06, "loss": 0.6929, "step": 2332 }, { "epoch": 0.24, "grad_norm": 1.343185335000264, "learning_rate": 8.905546188815339e-06, "loss": 0.5979, "step": 2333 }, { "epoch": 0.24, "grad_norm": 1.43605815924013, "learning_rate": 8.90451344926588e-06, "loss": 0.6186, "step": 2334 }, { "epoch": 0.24, "grad_norm": 1.4652999402593798, "learning_rate": 8.903480282634793e-06, "loss": 0.7082, "step": 2335 }, { "epoch": 0.24, "grad_norm": 1.3785428593169247, "learning_rate": 8.902446689035089e-06, "loss": 0.6899, "step": 2336 }, { "epoch": 0.24, "grad_norm": 1.4138011602002598, "learning_rate": 8.901412668579823e-06, "loss": 0.6842, "step": 2337 }, { "epoch": 0.24, "grad_norm": 1.5515779944685513, "learning_rate": 8.900378221382097e-06, "loss": 0.7956, "step": 2338 }, { "epoch": 0.24, "grad_norm": 1.5016778479515767, "learning_rate": 8.899343347555061e-06, "loss": 0.7605, "step": 2339 }, { "epoch": 0.24, "grad_norm": 1.3603221825863958, "learning_rate": 8.89830804721191e-06, "loss": 0.5859, "step": 2340 }, { "epoch": 0.24, "grad_norm": 1.7523798348333453, "learning_rate": 8.897272320465887e-06, "loss": 0.7538, "step": 2341 }, { "epoch": 0.24, "grad_norm": 1.4030824178726866, "learning_rate": 8.896236167430283e-06, "loss": 0.6912, "step": 2342 }, { "epoch": 0.24, "grad_norm": 1.3236881706048418, "learning_rate": 8.895199588218433e-06, "loss": 0.6522, "step": 2343 }, { "epoch": 0.24, "grad_norm": 1.3992039468286595, "learning_rate": 8.894162582943722e-06, "loss": 0.5804, "step": 2344 }, { "epoch": 0.24, "grad_norm": 1.515213378118412, "learning_rate": 8.893125151719574e-06, "loss": 0.701, "step": 2345 }, { "epoch": 0.24, "grad_norm": 1.4771568557191386, "learning_rate": 8.892087294659467e-06, "loss": 0.6318, "step": 2346 }, { "epoch": 0.24, "grad_norm": 1.490926193756801, "learning_rate": 8.891049011876927e-06, "loss": 0.6323, "step": 2347 }, { "epoch": 0.24, "grad_norm": 1.4161154061586145, "learning_rate": 8.890010303485519e-06, "loss": 0.6257, "step": 2348 }, { "epoch": 0.24, "grad_norm": 1.4019400928936672, "learning_rate": 8.888971169598857e-06, "loss": 0.6496, "step": 2349 }, { "epoch": 0.24, "grad_norm": 1.5276615374677183, "learning_rate": 8.88793161033061e-06, "loss": 0.6768, "step": 2350 }, { "epoch": 0.24, "grad_norm": 1.5227789028035426, "learning_rate": 8.886891625794479e-06, "loss": 0.662, "step": 2351 }, { "epoch": 0.24, "grad_norm": 1.6147136874241839, "learning_rate": 8.885851216104222e-06, "loss": 0.7571, "step": 2352 }, { "epoch": 0.24, "grad_norm": 1.390385498734165, "learning_rate": 8.884810381373643e-06, "loss": 0.6584, "step": 2353 }, { "epoch": 0.24, "grad_norm": 1.4788655925750886, "learning_rate": 8.883769121716586e-06, "loss": 0.7726, "step": 2354 }, { "epoch": 0.24, "grad_norm": 1.3251586912175637, "learning_rate": 8.882727437246948e-06, "loss": 0.6666, "step": 2355 }, { "epoch": 0.24, "grad_norm": 1.3718798168892878, "learning_rate": 8.88168532807867e-06, "loss": 0.7129, "step": 2356 }, { "epoch": 0.24, "grad_norm": 1.5342442529774047, "learning_rate": 8.880642794325739e-06, "loss": 0.7607, "step": 2357 }, { "epoch": 0.24, "grad_norm": 1.3168917819274333, "learning_rate": 8.879599836102188e-06, "loss": 0.6555, "step": 2358 }, { "epoch": 0.24, "grad_norm": 1.3202133432562697, "learning_rate": 8.8785564535221e-06, "loss": 0.6855, "step": 2359 }, { "epoch": 0.24, "grad_norm": 1.3507908040376526, "learning_rate": 8.8775126466996e-06, "loss": 0.6894, "step": 2360 }, { "epoch": 0.24, "grad_norm": 1.421780740964073, "learning_rate": 8.876468415748861e-06, "loss": 0.7119, "step": 2361 }, { "epoch": 0.24, "grad_norm": 1.3151734471424004, "learning_rate": 8.875423760784101e-06, "loss": 0.6205, "step": 2362 }, { "epoch": 0.24, "grad_norm": 1.369972953179782, "learning_rate": 8.874378681919589e-06, "loss": 0.5944, "step": 2363 }, { "epoch": 0.24, "grad_norm": 1.494461443372828, "learning_rate": 8.873333179269635e-06, "loss": 0.6747, "step": 2364 }, { "epoch": 0.24, "grad_norm": 1.395031954280436, "learning_rate": 8.872287252948601e-06, "loss": 0.7741, "step": 2365 }, { "epoch": 0.24, "grad_norm": 1.4495868360687836, "learning_rate": 8.871240903070889e-06, "loss": 0.6837, "step": 2366 }, { "epoch": 0.24, "grad_norm": 1.7030092994956902, "learning_rate": 8.870194129750948e-06, "loss": 0.6474, "step": 2367 }, { "epoch": 0.24, "grad_norm": 1.4161072785519715, "learning_rate": 8.869146933103282e-06, "loss": 0.6286, "step": 2368 }, { "epoch": 0.24, "grad_norm": 1.317215340105073, "learning_rate": 8.868099313242433e-06, "loss": 0.5903, "step": 2369 }, { "epoch": 0.24, "grad_norm": 1.4286400204670688, "learning_rate": 8.867051270282988e-06, "loss": 0.6375, "step": 2370 }, { "epoch": 0.24, "grad_norm": 1.3232058194846505, "learning_rate": 8.866002804339586e-06, "loss": 0.6336, "step": 2371 }, { "epoch": 0.24, "grad_norm": 1.375771502680238, "learning_rate": 8.864953915526908e-06, "loss": 0.6039, "step": 2372 }, { "epoch": 0.24, "grad_norm": 1.3910892630832539, "learning_rate": 8.863904603959686e-06, "loss": 0.6035, "step": 2373 }, { "epoch": 0.24, "grad_norm": 1.6143205996615608, "learning_rate": 8.862854869752692e-06, "loss": 0.6598, "step": 2374 }, { "epoch": 0.24, "grad_norm": 1.6860425870020501, "learning_rate": 8.861804713020752e-06, "loss": 0.6772, "step": 2375 }, { "epoch": 0.24, "grad_norm": 1.601589465933505, "learning_rate": 8.86075413387873e-06, "loss": 0.678, "step": 2376 }, { "epoch": 0.24, "grad_norm": 1.4848925053744604, "learning_rate": 8.859703132441539e-06, "loss": 0.8174, "step": 2377 }, { "epoch": 0.24, "grad_norm": 1.2885579072759061, "learning_rate": 8.858651708824143e-06, "loss": 0.6607, "step": 2378 }, { "epoch": 0.24, "grad_norm": 1.4649108907676789, "learning_rate": 8.857599863141546e-06, "loss": 0.6942, "step": 2379 }, { "epoch": 0.24, "grad_norm": 1.4290322150686647, "learning_rate": 8.8565475955088e-06, "loss": 0.6502, "step": 2380 }, { "epoch": 0.24, "grad_norm": 1.3636424600776145, "learning_rate": 8.855494906041005e-06, "loss": 0.6563, "step": 2381 }, { "epoch": 0.24, "grad_norm": 1.376512890460803, "learning_rate": 8.854441794853305e-06, "loss": 0.7019, "step": 2382 }, { "epoch": 0.24, "grad_norm": 1.3314127721234585, "learning_rate": 8.853388262060892e-06, "loss": 0.6968, "step": 2383 }, { "epoch": 0.24, "grad_norm": 1.487526043475763, "learning_rate": 8.852334307779003e-06, "loss": 0.6957, "step": 2384 }, { "epoch": 0.24, "grad_norm": 1.4929085842913936, "learning_rate": 8.851279932122918e-06, "loss": 0.6787, "step": 2385 }, { "epoch": 0.24, "grad_norm": 1.3955645179866796, "learning_rate": 8.85022513520797e-06, "loss": 0.7093, "step": 2386 }, { "epoch": 0.24, "grad_norm": 1.3330497673267465, "learning_rate": 8.849169917149532e-06, "loss": 0.6665, "step": 2387 }, { "epoch": 0.24, "grad_norm": 1.3813407528482713, "learning_rate": 8.848114278063026e-06, "loss": 0.6234, "step": 2388 }, { "epoch": 0.24, "grad_norm": 1.486170957167406, "learning_rate": 8.847058218063922e-06, "loss": 0.6864, "step": 2389 }, { "epoch": 0.24, "grad_norm": 5.179211469524297, "learning_rate": 8.846001737267727e-06, "loss": 0.6557, "step": 2390 }, { "epoch": 0.24, "grad_norm": 1.3669078439915896, "learning_rate": 8.844944835790009e-06, "loss": 0.7613, "step": 2391 }, { "epoch": 0.24, "grad_norm": 1.4435833990712952, "learning_rate": 8.843887513746365e-06, "loss": 0.6426, "step": 2392 }, { "epoch": 0.24, "grad_norm": 1.4521365694703223, "learning_rate": 8.842829771252452e-06, "loss": 0.6401, "step": 2393 }, { "epoch": 0.24, "grad_norm": 1.480090594736585, "learning_rate": 8.841771608423967e-06, "loss": 0.6409, "step": 2394 }, { "epoch": 0.24, "grad_norm": 1.545608279542602, "learning_rate": 8.84071302537665e-06, "loss": 0.6507, "step": 2395 }, { "epoch": 0.24, "grad_norm": 1.337594609633068, "learning_rate": 8.839654022226296e-06, "loss": 0.6346, "step": 2396 }, { "epoch": 0.24, "grad_norm": 1.397555612416364, "learning_rate": 8.838594599088735e-06, "loss": 0.6925, "step": 2397 }, { "epoch": 0.24, "grad_norm": 1.4133514682268786, "learning_rate": 8.837534756079853e-06, "loss": 0.6961, "step": 2398 }, { "epoch": 0.24, "grad_norm": 1.3862594738998841, "learning_rate": 8.836474493315573e-06, "loss": 0.6956, "step": 2399 }, { "epoch": 0.25, "grad_norm": 1.620771373413421, "learning_rate": 8.83541381091187e-06, "loss": 0.6981, "step": 2400 }, { "epoch": 0.25, "grad_norm": 1.5651713095456277, "learning_rate": 8.834352708984763e-06, "loss": 0.6418, "step": 2401 }, { "epoch": 0.25, "grad_norm": 1.526265393089714, "learning_rate": 8.833291187650316e-06, "loss": 0.6041, "step": 2402 }, { "epoch": 0.25, "grad_norm": 1.421788344174124, "learning_rate": 8.832229247024643e-06, "loss": 0.624, "step": 2403 }, { "epoch": 0.25, "grad_norm": 1.346979433575832, "learning_rate": 8.831166887223896e-06, "loss": 0.5769, "step": 2404 }, { "epoch": 0.25, "grad_norm": 1.3896437802321888, "learning_rate": 8.830104108364284e-06, "loss": 0.6402, "step": 2405 }, { "epoch": 0.25, "grad_norm": 1.2938480559894256, "learning_rate": 8.829040910562047e-06, "loss": 0.6645, "step": 2406 }, { "epoch": 0.25, "grad_norm": 1.1976085896785618, "learning_rate": 8.827977293933487e-06, "loss": 0.6824, "step": 2407 }, { "epoch": 0.25, "grad_norm": 1.4642454543714376, "learning_rate": 8.826913258594938e-06, "loss": 0.748, "step": 2408 }, { "epoch": 0.25, "grad_norm": 1.35846420036757, "learning_rate": 8.82584880466279e-06, "loss": 0.6239, "step": 2409 }, { "epoch": 0.25, "grad_norm": 1.8357393045255705, "learning_rate": 8.824783932253472e-06, "loss": 0.6302, "step": 2410 }, { "epoch": 0.25, "grad_norm": 1.5759332378703785, "learning_rate": 8.823718641483463e-06, "loss": 0.6068, "step": 2411 }, { "epoch": 0.25, "grad_norm": 1.2266603176109532, "learning_rate": 8.822652932469284e-06, "loss": 0.6485, "step": 2412 }, { "epoch": 0.25, "grad_norm": 1.309967802055883, "learning_rate": 8.821586805327507e-06, "loss": 0.689, "step": 2413 }, { "epoch": 0.25, "grad_norm": 1.3279123717755954, "learning_rate": 8.820520260174744e-06, "loss": 0.6434, "step": 2414 }, { "epoch": 0.25, "grad_norm": 1.4281351397171413, "learning_rate": 8.819453297127657e-06, "loss": 0.603, "step": 2415 }, { "epoch": 0.25, "grad_norm": 1.318351220520943, "learning_rate": 8.818385916302952e-06, "loss": 0.6377, "step": 2416 }, { "epoch": 0.25, "grad_norm": 1.4796250167421885, "learning_rate": 8.817318117817379e-06, "loss": 0.6997, "step": 2417 }, { "epoch": 0.25, "grad_norm": 1.312353823368486, "learning_rate": 8.816249901787736e-06, "loss": 0.628, "step": 2418 }, { "epoch": 0.25, "grad_norm": 1.3145620808287235, "learning_rate": 8.815181268330867e-06, "loss": 0.7255, "step": 2419 }, { "epoch": 0.25, "grad_norm": 1.4839027055192873, "learning_rate": 8.814112217563662e-06, "loss": 0.6742, "step": 2420 }, { "epoch": 0.25, "grad_norm": 1.5259762969693456, "learning_rate": 8.813042749603052e-06, "loss": 0.7119, "step": 2421 }, { "epoch": 0.25, "grad_norm": 1.3993357052228559, "learning_rate": 8.811972864566019e-06, "loss": 0.6137, "step": 2422 }, { "epoch": 0.25, "grad_norm": 1.47341472294293, "learning_rate": 8.81090256256959e-06, "loss": 0.6499, "step": 2423 }, { "epoch": 0.25, "grad_norm": 1.3654478522294657, "learning_rate": 8.809831843730831e-06, "loss": 0.6526, "step": 2424 }, { "epoch": 0.25, "grad_norm": 1.4818882376906928, "learning_rate": 8.808760708166866e-06, "loss": 0.6886, "step": 2425 }, { "epoch": 0.25, "grad_norm": 1.4178571439934007, "learning_rate": 8.807689155994853e-06, "loss": 0.751, "step": 2426 }, { "epoch": 0.25, "grad_norm": 1.5309545033161105, "learning_rate": 8.806617187332e-06, "loss": 0.6882, "step": 2427 }, { "epoch": 0.25, "grad_norm": 1.4859571803184934, "learning_rate": 8.805544802295563e-06, "loss": 0.628, "step": 2428 }, { "epoch": 0.25, "grad_norm": 1.3535166961186567, "learning_rate": 8.80447200100284e-06, "loss": 0.6226, "step": 2429 }, { "epoch": 0.25, "grad_norm": 1.2879455821644152, "learning_rate": 8.803398783571173e-06, "loss": 0.6452, "step": 2430 }, { "epoch": 0.25, "grad_norm": 1.4907609669872817, "learning_rate": 8.802325150117956e-06, "loss": 0.6711, "step": 2431 }, { "epoch": 0.25, "grad_norm": 1.4933104965331618, "learning_rate": 8.801251100760625e-06, "loss": 0.6162, "step": 2432 }, { "epoch": 0.25, "grad_norm": 1.3481225580700626, "learning_rate": 8.800176635616658e-06, "loss": 0.667, "step": 2433 }, { "epoch": 0.25, "grad_norm": 1.3034839947224366, "learning_rate": 8.799101754803581e-06, "loss": 0.6956, "step": 2434 }, { "epoch": 0.25, "grad_norm": 1.3395435319003386, "learning_rate": 8.798026458438968e-06, "loss": 0.7169, "step": 2435 }, { "epoch": 0.25, "grad_norm": 1.2447191711355623, "learning_rate": 8.79695074664044e-06, "loss": 0.6225, "step": 2436 }, { "epoch": 0.25, "grad_norm": 1.4287429239875518, "learning_rate": 8.795874619525653e-06, "loss": 0.7618, "step": 2437 }, { "epoch": 0.25, "grad_norm": 1.5956871929872467, "learning_rate": 8.79479807721232e-06, "loss": 0.7389, "step": 2438 }, { "epoch": 0.25, "grad_norm": 1.5175644968730992, "learning_rate": 8.793721119818195e-06, "loss": 0.5823, "step": 2439 }, { "epoch": 0.25, "grad_norm": 1.3358899635530712, "learning_rate": 8.792643747461075e-06, "loss": 0.6288, "step": 2440 }, { "epoch": 0.25, "grad_norm": 1.3641246858692613, "learning_rate": 8.791565960258808e-06, "loss": 0.6078, "step": 2441 }, { "epoch": 0.25, "grad_norm": 1.5108454221127374, "learning_rate": 8.79048775832928e-06, "loss": 0.6941, "step": 2442 }, { "epoch": 0.25, "grad_norm": 1.5098087833352456, "learning_rate": 8.789409141790428e-06, "loss": 0.7288, "step": 2443 }, { "epoch": 0.25, "grad_norm": 1.439788194910484, "learning_rate": 8.788330110760231e-06, "loss": 0.6991, "step": 2444 }, { "epoch": 0.25, "grad_norm": 1.4103181015899635, "learning_rate": 8.78725066535672e-06, "loss": 0.7172, "step": 2445 }, { "epoch": 0.25, "grad_norm": 1.368037319278611, "learning_rate": 8.786170805697962e-06, "loss": 0.6086, "step": 2446 }, { "epoch": 0.25, "grad_norm": 1.2164273665823906, "learning_rate": 8.785090531902074e-06, "loss": 0.5361, "step": 2447 }, { "epoch": 0.25, "grad_norm": 1.4213257904531784, "learning_rate": 8.78400984408722e-06, "loss": 0.7305, "step": 2448 }, { "epoch": 0.25, "grad_norm": 1.4072334991939628, "learning_rate": 8.782928742371607e-06, "loss": 0.6698, "step": 2449 }, { "epoch": 0.25, "grad_norm": 1.372571219494245, "learning_rate": 8.781847226873485e-06, "loss": 0.6375, "step": 2450 }, { "epoch": 0.25, "grad_norm": 1.453353369319716, "learning_rate": 8.780765297711154e-06, "loss": 0.6985, "step": 2451 }, { "epoch": 0.25, "grad_norm": 1.5580984316637414, "learning_rate": 8.779682955002957e-06, "loss": 0.6716, "step": 2452 }, { "epoch": 0.25, "grad_norm": 1.7351279077001747, "learning_rate": 8.778600198867282e-06, "loss": 0.6767, "step": 2453 }, { "epoch": 0.25, "grad_norm": 1.760020439947184, "learning_rate": 8.777517029422562e-06, "loss": 0.7047, "step": 2454 }, { "epoch": 0.25, "grad_norm": 1.4454415389581645, "learning_rate": 8.776433446787276e-06, "loss": 0.6707, "step": 2455 }, { "epoch": 0.25, "grad_norm": 1.6154193910557235, "learning_rate": 8.775349451079948e-06, "loss": 0.7417, "step": 2456 }, { "epoch": 0.25, "grad_norm": 1.332505196379351, "learning_rate": 8.774265042419148e-06, "loss": 0.5603, "step": 2457 }, { "epoch": 0.25, "grad_norm": 1.2955865052323252, "learning_rate": 8.77318022092349e-06, "loss": 0.6195, "step": 2458 }, { "epoch": 0.25, "grad_norm": 1.4195254796524046, "learning_rate": 8.772094986711632e-06, "loss": 0.6803, "step": 2459 }, { "epoch": 0.25, "grad_norm": 1.5360325078676558, "learning_rate": 8.77100933990228e-06, "loss": 0.6487, "step": 2460 }, { "epoch": 0.25, "grad_norm": 1.626942020647102, "learning_rate": 8.769923280614184e-06, "loss": 0.7512, "step": 2461 }, { "epoch": 0.25, "grad_norm": 1.4866709567724254, "learning_rate": 8.768836808966135e-06, "loss": 0.6881, "step": 2462 }, { "epoch": 0.25, "grad_norm": 1.5994922096587043, "learning_rate": 8.767749925076979e-06, "loss": 0.7865, "step": 2463 }, { "epoch": 0.25, "grad_norm": 1.299238769618501, "learning_rate": 8.766662629065594e-06, "loss": 0.6121, "step": 2464 }, { "epoch": 0.25, "grad_norm": 1.3727545322847994, "learning_rate": 8.765574921050916e-06, "loss": 0.6304, "step": 2465 }, { "epoch": 0.25, "grad_norm": 1.353367259784444, "learning_rate": 8.764486801151921e-06, "loss": 0.7075, "step": 2466 }, { "epoch": 0.25, "grad_norm": 1.4647962863216546, "learning_rate": 8.763398269487623e-06, "loss": 0.7004, "step": 2467 }, { "epoch": 0.25, "grad_norm": 1.3137539204937212, "learning_rate": 8.762309326177093e-06, "loss": 0.5707, "step": 2468 }, { "epoch": 0.25, "grad_norm": 1.2241974007945204, "learning_rate": 8.761219971339436e-06, "loss": 0.666, "step": 2469 }, { "epoch": 0.25, "grad_norm": 1.4942144235419117, "learning_rate": 8.760130205093812e-06, "loss": 0.6731, "step": 2470 }, { "epoch": 0.25, "grad_norm": 1.4954784598020627, "learning_rate": 8.75904002755942e-06, "loss": 0.6988, "step": 2471 }, { "epoch": 0.25, "grad_norm": 1.255903955623688, "learning_rate": 8.757949438855504e-06, "loss": 0.7133, "step": 2472 }, { "epoch": 0.25, "grad_norm": 1.5721796996104618, "learning_rate": 8.756858439101354e-06, "loss": 0.6451, "step": 2473 }, { "epoch": 0.25, "grad_norm": 1.3807051193648905, "learning_rate": 8.755767028416307e-06, "loss": 0.6298, "step": 2474 }, { "epoch": 0.25, "grad_norm": 1.3197888499135209, "learning_rate": 8.754675206919743e-06, "loss": 0.6212, "step": 2475 }, { "epoch": 0.25, "grad_norm": 1.4521833655938032, "learning_rate": 8.753582974731087e-06, "loss": 0.7245, "step": 2476 }, { "epoch": 0.25, "grad_norm": 1.2826765584457558, "learning_rate": 8.752490331969807e-06, "loss": 0.601, "step": 2477 }, { "epoch": 0.25, "grad_norm": 1.5401378176790173, "learning_rate": 8.75139727875542e-06, "loss": 0.6571, "step": 2478 }, { "epoch": 0.25, "grad_norm": 1.371676184895952, "learning_rate": 8.750303815207487e-06, "loss": 0.6112, "step": 2479 }, { "epoch": 0.25, "grad_norm": 1.5219455040050307, "learning_rate": 8.749209941445608e-06, "loss": 0.7671, "step": 2480 }, { "epoch": 0.25, "grad_norm": 1.2565327896672462, "learning_rate": 8.748115657589437e-06, "loss": 0.6055, "step": 2481 }, { "epoch": 0.25, "grad_norm": 1.3308504084450237, "learning_rate": 8.747020963758667e-06, "loss": 0.6381, "step": 2482 }, { "epoch": 0.25, "grad_norm": 1.3241509824961064, "learning_rate": 8.745925860073037e-06, "loss": 0.569, "step": 2483 }, { "epoch": 0.25, "grad_norm": 1.4412392967346503, "learning_rate": 8.744830346652332e-06, "loss": 0.6438, "step": 2484 }, { "epoch": 0.25, "grad_norm": 1.5230034762491635, "learning_rate": 8.74373442361638e-06, "loss": 0.6449, "step": 2485 }, { "epoch": 0.25, "grad_norm": 1.5256670706331819, "learning_rate": 8.742638091085055e-06, "loss": 0.6369, "step": 2486 }, { "epoch": 0.25, "grad_norm": 1.5697380214194587, "learning_rate": 8.741541349178275e-06, "loss": 0.7269, "step": 2487 }, { "epoch": 0.25, "grad_norm": 1.4159100980269892, "learning_rate": 8.740444198016005e-06, "loss": 0.6116, "step": 2488 }, { "epoch": 0.25, "grad_norm": 1.3146886995323968, "learning_rate": 8.739346637718252e-06, "loss": 0.607, "step": 2489 }, { "epoch": 0.25, "grad_norm": 1.4113992714647472, "learning_rate": 8.738248668405068e-06, "loss": 0.6643, "step": 2490 }, { "epoch": 0.25, "grad_norm": 1.6015770574562707, "learning_rate": 8.73715029019655e-06, "loss": 0.7159, "step": 2491 }, { "epoch": 0.25, "grad_norm": 1.565182576639966, "learning_rate": 8.736051503212843e-06, "loss": 0.7021, "step": 2492 }, { "epoch": 0.25, "grad_norm": 1.4817298134780432, "learning_rate": 8.734952307574133e-06, "loss": 0.6706, "step": 2493 }, { "epoch": 0.25, "grad_norm": 1.568544153877794, "learning_rate": 8.733852703400652e-06, "loss": 0.6218, "step": 2494 }, { "epoch": 0.25, "grad_norm": 1.4655097252044262, "learning_rate": 8.732752690812673e-06, "loss": 0.6963, "step": 2495 }, { "epoch": 0.25, "grad_norm": 1.5823767698781135, "learning_rate": 8.731652269930522e-06, "loss": 0.6315, "step": 2496 }, { "epoch": 0.25, "grad_norm": 1.5234059219086322, "learning_rate": 8.730551440874562e-06, "loss": 0.733, "step": 2497 }, { "epoch": 0.26, "grad_norm": 1.5652800598969827, "learning_rate": 8.729450203765204e-06, "loss": 0.6868, "step": 2498 }, { "epoch": 0.26, "grad_norm": 1.4787796240553364, "learning_rate": 8.7283485587229e-06, "loss": 0.6705, "step": 2499 }, { "epoch": 0.26, "grad_norm": 1.2116023737828028, "learning_rate": 8.727246505868155e-06, "loss": 0.5798, "step": 2500 }, { "epoch": 0.26, "grad_norm": 1.3978143160000496, "learning_rate": 8.726144045321509e-06, "loss": 0.662, "step": 2501 }, { "epoch": 0.26, "grad_norm": 1.5689707995348376, "learning_rate": 8.725041177203556e-06, "loss": 0.7456, "step": 2502 }, { "epoch": 0.26, "grad_norm": 1.3509937758315136, "learning_rate": 8.723937901634923e-06, "loss": 0.6774, "step": 2503 }, { "epoch": 0.26, "grad_norm": 1.6340916306957192, "learning_rate": 8.72283421873629e-06, "loss": 0.6863, "step": 2504 }, { "epoch": 0.26, "grad_norm": 1.4476620505266926, "learning_rate": 8.72173012862838e-06, "loss": 0.6678, "step": 2505 }, { "epoch": 0.26, "grad_norm": 1.5640495964557164, "learning_rate": 8.720625631431963e-06, "loss": 0.7068, "step": 2506 }, { "epoch": 0.26, "grad_norm": 1.5109905299787065, "learning_rate": 8.719520727267847e-06, "loss": 0.7129, "step": 2507 }, { "epoch": 0.26, "grad_norm": 1.6986193262686224, "learning_rate": 8.718415416256888e-06, "loss": 0.7322, "step": 2508 }, { "epoch": 0.26, "grad_norm": 1.3697235031427017, "learning_rate": 8.717309698519989e-06, "loss": 0.7247, "step": 2509 }, { "epoch": 0.26, "grad_norm": 1.2729354143967895, "learning_rate": 8.716203574178093e-06, "loss": 0.6646, "step": 2510 }, { "epoch": 0.26, "grad_norm": 1.3730388053780602, "learning_rate": 8.71509704335219e-06, "loss": 0.7215, "step": 2511 }, { "epoch": 0.26, "grad_norm": 1.735578277725833, "learning_rate": 8.713990106163314e-06, "loss": 0.713, "step": 2512 }, { "epoch": 0.26, "grad_norm": 1.3233052485468313, "learning_rate": 8.712882762732544e-06, "loss": 0.6084, "step": 2513 }, { "epoch": 0.26, "grad_norm": 1.204688293407673, "learning_rate": 8.711775013181003e-06, "loss": 0.6052, "step": 2514 }, { "epoch": 0.26, "grad_norm": 1.5101132172598437, "learning_rate": 8.71066685762986e-06, "loss": 0.798, "step": 2515 }, { "epoch": 0.26, "grad_norm": 1.3956787001892077, "learning_rate": 8.70955829620032e-06, "loss": 0.6441, "step": 2516 }, { "epoch": 0.26, "grad_norm": 1.4013855235973547, "learning_rate": 8.708449329013648e-06, "loss": 0.639, "step": 2517 }, { "epoch": 0.26, "grad_norm": 1.3931001623487753, "learning_rate": 8.707339956191139e-06, "loss": 0.698, "step": 2518 }, { "epoch": 0.26, "grad_norm": 1.199822911434836, "learning_rate": 8.70623017785414e-06, "loss": 0.4864, "step": 2519 }, { "epoch": 0.26, "grad_norm": 1.495665548501712, "learning_rate": 8.705119994124038e-06, "loss": 0.663, "step": 2520 }, { "epoch": 0.26, "grad_norm": 1.434178214264092, "learning_rate": 8.70400940512227e-06, "loss": 0.6096, "step": 2521 }, { "epoch": 0.26, "grad_norm": 1.580938937322999, "learning_rate": 8.70289841097031e-06, "loss": 0.7233, "step": 2522 }, { "epoch": 0.26, "grad_norm": 1.2361833996911524, "learning_rate": 8.701787011789684e-06, "loss": 0.6926, "step": 2523 }, { "epoch": 0.26, "grad_norm": 1.3524680568124456, "learning_rate": 8.700675207701956e-06, "loss": 0.5857, "step": 2524 }, { "epoch": 0.26, "grad_norm": 1.4965384674565056, "learning_rate": 8.699562998828739e-06, "loss": 0.6937, "step": 2525 }, { "epoch": 0.26, "grad_norm": 1.3761694676255152, "learning_rate": 8.698450385291686e-06, "loss": 0.6864, "step": 2526 }, { "epoch": 0.26, "grad_norm": 1.3233149937285034, "learning_rate": 8.697337367212498e-06, "loss": 0.6917, "step": 2527 }, { "epoch": 0.26, "grad_norm": 1.5085944716638797, "learning_rate": 8.696223944712917e-06, "loss": 0.6254, "step": 2528 }, { "epoch": 0.26, "grad_norm": 1.6224188915086568, "learning_rate": 8.69511011791473e-06, "loss": 0.6648, "step": 2529 }, { "epoch": 0.26, "grad_norm": 1.4160244205600145, "learning_rate": 8.693995886939771e-06, "loss": 0.6046, "step": 2530 }, { "epoch": 0.26, "grad_norm": 1.3504554412103036, "learning_rate": 8.692881251909917e-06, "loss": 0.6456, "step": 2531 }, { "epoch": 0.26, "grad_norm": 1.374180655819839, "learning_rate": 8.691766212947087e-06, "loss": 0.6795, "step": 2532 }, { "epoch": 0.26, "grad_norm": 1.3365804992656367, "learning_rate": 8.690650770173247e-06, "loss": 0.7018, "step": 2533 }, { "epoch": 0.26, "grad_norm": 1.3795446320036981, "learning_rate": 8.689534923710403e-06, "loss": 0.5522, "step": 2534 }, { "epoch": 0.26, "grad_norm": 1.357581134038579, "learning_rate": 8.688418673680611e-06, "loss": 0.636, "step": 2535 }, { "epoch": 0.26, "grad_norm": 1.4650555154968545, "learning_rate": 8.687302020205967e-06, "loss": 0.7239, "step": 2536 }, { "epoch": 0.26, "grad_norm": 1.6611922262088832, "learning_rate": 8.686184963408613e-06, "loss": 0.639, "step": 2537 }, { "epoch": 0.26, "grad_norm": 1.3488813930536523, "learning_rate": 8.685067503410733e-06, "loss": 0.6013, "step": 2538 }, { "epoch": 0.26, "grad_norm": 1.4780518881047389, "learning_rate": 8.683949640334557e-06, "loss": 0.7154, "step": 2539 }, { "epoch": 0.26, "grad_norm": 1.4285513803701437, "learning_rate": 8.68283137430236e-06, "loss": 0.5596, "step": 2540 }, { "epoch": 0.26, "grad_norm": 1.3884762800183021, "learning_rate": 8.681712705436457e-06, "loss": 0.6823, "step": 2541 }, { "epoch": 0.26, "grad_norm": 1.455411723160549, "learning_rate": 8.68059363385921e-06, "loss": 0.6418, "step": 2542 }, { "epoch": 0.26, "grad_norm": 1.5204302636828175, "learning_rate": 8.679474159693028e-06, "loss": 0.6796, "step": 2543 }, { "epoch": 0.26, "grad_norm": 1.4384672280852495, "learning_rate": 8.678354283060358e-06, "loss": 0.6174, "step": 2544 }, { "epoch": 0.26, "grad_norm": 1.687612863643961, "learning_rate": 8.677234004083692e-06, "loss": 0.6458, "step": 2545 }, { "epoch": 0.26, "grad_norm": 1.3266694298757025, "learning_rate": 8.676113322885573e-06, "loss": 0.6951, "step": 2546 }, { "epoch": 0.26, "grad_norm": 1.31430573132461, "learning_rate": 8.67499223958858e-06, "loss": 0.6766, "step": 2547 }, { "epoch": 0.26, "grad_norm": 1.4483085969010703, "learning_rate": 8.673870754315336e-06, "loss": 0.724, "step": 2548 }, { "epoch": 0.26, "grad_norm": 1.397828580934761, "learning_rate": 8.672748867188517e-06, "loss": 0.7328, "step": 2549 }, { "epoch": 0.26, "grad_norm": 1.4351914069005944, "learning_rate": 8.671626578330831e-06, "loss": 0.6523, "step": 2550 }, { "epoch": 0.26, "grad_norm": 1.4328418156687648, "learning_rate": 8.670503887865039e-06, "loss": 0.6061, "step": 2551 }, { "epoch": 0.26, "grad_norm": 1.5525582877143371, "learning_rate": 8.669380795913942e-06, "loss": 0.7368, "step": 2552 }, { "epoch": 0.26, "grad_norm": 1.6021986881445898, "learning_rate": 8.668257302600385e-06, "loss": 0.7225, "step": 2553 }, { "epoch": 0.26, "grad_norm": 1.8705715071762734, "learning_rate": 8.667133408047257e-06, "loss": 0.7057, "step": 2554 }, { "epoch": 0.26, "grad_norm": 1.3837619131332206, "learning_rate": 8.666009112377492e-06, "loss": 0.7014, "step": 2555 }, { "epoch": 0.26, "grad_norm": 1.4212308778720775, "learning_rate": 8.664884415714065e-06, "loss": 0.6285, "step": 2556 }, { "epoch": 0.26, "grad_norm": 1.5532822671890065, "learning_rate": 8.663759318180001e-06, "loss": 0.7047, "step": 2557 }, { "epoch": 0.26, "grad_norm": 1.4816317024844927, "learning_rate": 8.662633819898363e-06, "loss": 0.7882, "step": 2558 }, { "epoch": 0.26, "grad_norm": 1.6209359963971588, "learning_rate": 8.66150792099226e-06, "loss": 0.731, "step": 2559 }, { "epoch": 0.26, "grad_norm": 1.5203713944835349, "learning_rate": 8.660381621584843e-06, "loss": 0.6422, "step": 2560 }, { "epoch": 0.26, "grad_norm": 1.5134563349361199, "learning_rate": 8.65925492179931e-06, "loss": 0.6252, "step": 2561 }, { "epoch": 0.26, "grad_norm": 1.4969454058187848, "learning_rate": 8.658127821758899e-06, "loss": 0.6978, "step": 2562 }, { "epoch": 0.26, "grad_norm": 1.4470515343963004, "learning_rate": 8.657000321586897e-06, "loss": 0.6367, "step": 2563 }, { "epoch": 0.26, "grad_norm": 1.3947630593156326, "learning_rate": 8.655872421406631e-06, "loss": 0.7165, "step": 2564 }, { "epoch": 0.26, "grad_norm": 1.397202610450154, "learning_rate": 8.65474412134147e-06, "loss": 0.6387, "step": 2565 }, { "epoch": 0.26, "grad_norm": 1.5315733990063995, "learning_rate": 8.653615421514832e-06, "loss": 0.687, "step": 2566 }, { "epoch": 0.26, "grad_norm": 1.5557755364242838, "learning_rate": 8.652486322050176e-06, "loss": 0.5604, "step": 2567 }, { "epoch": 0.26, "grad_norm": 1.4820592830571815, "learning_rate": 8.651356823071001e-06, "loss": 0.7323, "step": 2568 }, { "epoch": 0.26, "grad_norm": 1.5158562984511805, "learning_rate": 8.650226924700856e-06, "loss": 0.6467, "step": 2569 }, { "epoch": 0.26, "grad_norm": 1.3629261564978474, "learning_rate": 8.649096627063331e-06, "loss": 0.6352, "step": 2570 }, { "epoch": 0.26, "grad_norm": 1.4818488922914392, "learning_rate": 8.64796593028206e-06, "loss": 0.6413, "step": 2571 }, { "epoch": 0.26, "grad_norm": 1.3622798037412018, "learning_rate": 8.64683483448072e-06, "loss": 0.6202, "step": 2572 }, { "epoch": 0.26, "grad_norm": 1.51634329768555, "learning_rate": 8.645703339783029e-06, "loss": 0.7172, "step": 2573 }, { "epoch": 0.26, "grad_norm": 1.4462216795238636, "learning_rate": 8.644571446312757e-06, "loss": 0.6643, "step": 2574 }, { "epoch": 0.26, "grad_norm": 3.9340353372809105, "learning_rate": 8.64343915419371e-06, "loss": 0.6297, "step": 2575 }, { "epoch": 0.26, "grad_norm": 1.3928257360272949, "learning_rate": 8.642306463549736e-06, "loss": 0.6979, "step": 2576 }, { "epoch": 0.26, "grad_norm": 1.4099940152523565, "learning_rate": 8.641173374504737e-06, "loss": 0.5402, "step": 2577 }, { "epoch": 0.26, "grad_norm": 1.2933041080613354, "learning_rate": 8.640039887182648e-06, "loss": 0.6305, "step": 2578 }, { "epoch": 0.26, "grad_norm": 1.4970132479415295, "learning_rate": 8.638906001707452e-06, "loss": 0.6876, "step": 2579 }, { "epoch": 0.26, "grad_norm": 1.3270964818036473, "learning_rate": 8.637771718203174e-06, "loss": 0.6618, "step": 2580 }, { "epoch": 0.26, "grad_norm": 3.46779631878976, "learning_rate": 8.636637036793887e-06, "loss": 0.5874, "step": 2581 }, { "epoch": 0.26, "grad_norm": 1.3222815381721964, "learning_rate": 8.635501957603703e-06, "loss": 0.6877, "step": 2582 }, { "epoch": 0.26, "grad_norm": 1.374254871832945, "learning_rate": 8.634366480756775e-06, "loss": 0.6263, "step": 2583 }, { "epoch": 0.26, "grad_norm": 1.5330335557937063, "learning_rate": 8.63323060637731e-06, "loss": 0.7248, "step": 2584 }, { "epoch": 0.26, "grad_norm": 1.4563717872807203, "learning_rate": 8.632094334589544e-06, "loss": 0.6578, "step": 2585 }, { "epoch": 0.26, "grad_norm": 1.7517015284669026, "learning_rate": 8.630957665517771e-06, "loss": 0.6843, "step": 2586 }, { "epoch": 0.26, "grad_norm": 1.5234940867482207, "learning_rate": 8.629820599286316e-06, "loss": 0.6784, "step": 2587 }, { "epoch": 0.26, "grad_norm": 1.5163726763914984, "learning_rate": 8.628683136019557e-06, "loss": 0.5601, "step": 2588 }, { "epoch": 0.26, "grad_norm": 1.321265892355816, "learning_rate": 8.627545275841912e-06, "loss": 0.6365, "step": 2589 }, { "epoch": 0.26, "grad_norm": 1.2364879007642746, "learning_rate": 8.626407018877839e-06, "loss": 0.7025, "step": 2590 }, { "epoch": 0.26, "grad_norm": 1.4800151926325122, "learning_rate": 8.625268365251841e-06, "loss": 0.7725, "step": 2591 }, { "epoch": 0.26, "grad_norm": 1.4890615747469311, "learning_rate": 8.624129315088469e-06, "loss": 0.6821, "step": 2592 }, { "epoch": 0.26, "grad_norm": 1.3248061510979081, "learning_rate": 8.622989868512316e-06, "loss": 0.637, "step": 2593 }, { "epoch": 0.26, "grad_norm": 1.4178113415874598, "learning_rate": 8.621850025648008e-06, "loss": 0.6696, "step": 2594 }, { "epoch": 0.26, "grad_norm": 1.307978044198609, "learning_rate": 8.620709786620231e-06, "loss": 0.5932, "step": 2595 }, { "epoch": 0.27, "grad_norm": 1.4934601667239567, "learning_rate": 8.619569151553705e-06, "loss": 0.6683, "step": 2596 }, { "epoch": 0.27, "grad_norm": 1.8283607061605893, "learning_rate": 8.61842812057319e-06, "loss": 0.6084, "step": 2597 }, { "epoch": 0.27, "grad_norm": 1.5220656540885067, "learning_rate": 8.617286693803498e-06, "loss": 0.6861, "step": 2598 }, { "epoch": 0.27, "grad_norm": 3.168974315319435, "learning_rate": 8.616144871369477e-06, "loss": 0.6637, "step": 2599 }, { "epoch": 0.27, "grad_norm": 1.3942810295120813, "learning_rate": 8.615002653396023e-06, "loss": 0.606, "step": 2600 }, { "epoch": 0.27, "grad_norm": 1.5811810362863525, "learning_rate": 8.613860040008073e-06, "loss": 0.7777, "step": 2601 }, { "epoch": 0.27, "grad_norm": 1.4539497620071407, "learning_rate": 8.612717031330608e-06, "loss": 0.7306, "step": 2602 }, { "epoch": 0.27, "grad_norm": 1.5692743974822088, "learning_rate": 8.611573627488652e-06, "loss": 0.6538, "step": 2603 }, { "epoch": 0.27, "grad_norm": 1.517984467034463, "learning_rate": 8.610429828607273e-06, "loss": 0.6293, "step": 2604 }, { "epoch": 0.27, "grad_norm": 1.4621492595499126, "learning_rate": 8.60928563481158e-06, "loss": 0.6145, "step": 2605 }, { "epoch": 0.27, "grad_norm": 1.3724112428754935, "learning_rate": 8.608141046226725e-06, "loss": 0.6842, "step": 2606 }, { "epoch": 0.27, "grad_norm": 1.281399053694709, "learning_rate": 8.60699606297791e-06, "loss": 0.6891, "step": 2607 }, { "epoch": 0.27, "grad_norm": 1.344939265108837, "learning_rate": 8.60585068519037e-06, "loss": 0.6321, "step": 2608 }, { "epoch": 0.27, "grad_norm": 1.4783713903675804, "learning_rate": 8.604704912989394e-06, "loss": 0.6585, "step": 2609 }, { "epoch": 0.27, "grad_norm": 1.2117335175815058, "learning_rate": 8.603558746500303e-06, "loss": 0.6407, "step": 2610 }, { "epoch": 0.27, "grad_norm": 1.383761362673763, "learning_rate": 8.602412185848467e-06, "loss": 0.6756, "step": 2611 }, { "epoch": 0.27, "grad_norm": 1.521633241965078, "learning_rate": 8.601265231159299e-06, "loss": 0.6419, "step": 2612 }, { "epoch": 0.27, "grad_norm": 1.4797723415159216, "learning_rate": 8.600117882558257e-06, "loss": 0.76, "step": 2613 }, { "epoch": 0.27, "grad_norm": 1.6410732100079983, "learning_rate": 8.598970140170837e-06, "loss": 0.721, "step": 2614 }, { "epoch": 0.27, "grad_norm": 1.492530316495302, "learning_rate": 8.597822004122582e-06, "loss": 0.5979, "step": 2615 }, { "epoch": 0.27, "grad_norm": 1.4095512381983204, "learning_rate": 8.596673474539078e-06, "loss": 0.5947, "step": 2616 }, { "epoch": 0.27, "grad_norm": 1.285485646901025, "learning_rate": 8.59552455154595e-06, "loss": 0.7079, "step": 2617 }, { "epoch": 0.27, "grad_norm": 1.3222669434571868, "learning_rate": 8.594375235268873e-06, "loss": 0.5933, "step": 2618 }, { "epoch": 0.27, "grad_norm": 1.4191614756738227, "learning_rate": 8.593225525833555e-06, "loss": 0.6896, "step": 2619 }, { "epoch": 0.27, "grad_norm": 1.3152790633709115, "learning_rate": 8.59207542336576e-06, "loss": 0.6649, "step": 2620 }, { "epoch": 0.27, "grad_norm": 1.4285253599420022, "learning_rate": 8.590924927991282e-06, "loss": 0.6401, "step": 2621 }, { "epoch": 0.27, "grad_norm": 1.376858722164842, "learning_rate": 8.589774039835967e-06, "loss": 0.6536, "step": 2622 }, { "epoch": 0.27, "grad_norm": 1.4555177571680493, "learning_rate": 8.5886227590257e-06, "loss": 0.7353, "step": 2623 }, { "epoch": 0.27, "grad_norm": 1.5509747589844092, "learning_rate": 8.587471085686411e-06, "loss": 0.7346, "step": 2624 }, { "epoch": 0.27, "grad_norm": 1.539618992928685, "learning_rate": 8.586319019944071e-06, "loss": 0.6222, "step": 2625 }, { "epoch": 0.27, "grad_norm": 1.534725003231731, "learning_rate": 8.585166561924694e-06, "loss": 0.6629, "step": 2626 }, { "epoch": 0.27, "grad_norm": 3.9198209571363734, "learning_rate": 8.584013711754337e-06, "loss": 0.7085, "step": 2627 }, { "epoch": 0.27, "grad_norm": 1.83157235018872, "learning_rate": 8.582860469559104e-06, "loss": 0.7618, "step": 2628 }, { "epoch": 0.27, "grad_norm": 1.450412314191225, "learning_rate": 8.581706835465136e-06, "loss": 0.6559, "step": 2629 }, { "epoch": 0.27, "grad_norm": 1.556976501576558, "learning_rate": 8.580552809598618e-06, "loss": 0.6555, "step": 2630 }, { "epoch": 0.27, "grad_norm": 1.4542740526947222, "learning_rate": 8.579398392085782e-06, "loss": 0.7656, "step": 2631 }, { "epoch": 0.27, "grad_norm": 1.2812416624349272, "learning_rate": 8.578243583052898e-06, "loss": 0.5947, "step": 2632 }, { "epoch": 0.27, "grad_norm": 1.3422946983363926, "learning_rate": 8.57708838262628e-06, "loss": 0.7026, "step": 2633 }, { "epoch": 0.27, "grad_norm": 1.4466220007403132, "learning_rate": 8.57593279093229e-06, "loss": 0.6131, "step": 2634 }, { "epoch": 0.27, "grad_norm": 1.3572295832534225, "learning_rate": 8.574776808097322e-06, "loss": 0.5745, "step": 2635 }, { "epoch": 0.27, "grad_norm": 1.5483642904548396, "learning_rate": 8.573620434247822e-06, "loss": 0.7191, "step": 2636 }, { "epoch": 0.27, "grad_norm": 1.4230124722907984, "learning_rate": 8.572463669510278e-06, "loss": 0.7085, "step": 2637 }, { "epoch": 0.27, "grad_norm": 1.5600365740525026, "learning_rate": 8.571306514011217e-06, "loss": 0.7016, "step": 2638 }, { "epoch": 0.27, "grad_norm": 1.2143029877799612, "learning_rate": 8.570148967877209e-06, "loss": 0.6363, "step": 2639 }, { "epoch": 0.27, "grad_norm": 1.3728182324267593, "learning_rate": 8.56899103123487e-06, "loss": 0.6582, "step": 2640 }, { "epoch": 0.27, "grad_norm": 1.3468930746341568, "learning_rate": 8.567832704210858e-06, "loss": 0.7085, "step": 2641 }, { "epoch": 0.27, "grad_norm": 1.5586223533191494, "learning_rate": 8.56667398693187e-06, "loss": 0.6408, "step": 2642 }, { "epoch": 0.27, "grad_norm": 1.3722335192452064, "learning_rate": 8.56551487952465e-06, "loss": 0.6661, "step": 2643 }, { "epoch": 0.27, "grad_norm": 1.665962339029075, "learning_rate": 8.56435538211598e-06, "loss": 0.6147, "step": 2644 }, { "epoch": 0.27, "grad_norm": 1.5311837775836865, "learning_rate": 8.563195494832693e-06, "loss": 0.7279, "step": 2645 }, { "epoch": 0.27, "grad_norm": 1.4642068554754621, "learning_rate": 8.562035217801654e-06, "loss": 0.7178, "step": 2646 }, { "epoch": 0.27, "grad_norm": 1.5598917131475656, "learning_rate": 8.560874551149777e-06, "loss": 0.6481, "step": 2647 }, { "epoch": 0.27, "grad_norm": 1.5482691510317828, "learning_rate": 8.559713495004019e-06, "loss": 0.724, "step": 2648 }, { "epoch": 0.27, "grad_norm": 1.4006937315044377, "learning_rate": 8.558552049491378e-06, "loss": 0.6229, "step": 2649 }, { "epoch": 0.27, "grad_norm": 1.5527505884230566, "learning_rate": 8.557390214738895e-06, "loss": 0.6891, "step": 2650 }, { "epoch": 0.27, "grad_norm": 1.3414287748065572, "learning_rate": 8.556227990873651e-06, "loss": 0.6608, "step": 2651 }, { "epoch": 0.27, "grad_norm": 1.3000489923816652, "learning_rate": 8.555065378022773e-06, "loss": 0.5507, "step": 2652 }, { "epoch": 0.27, "grad_norm": 1.46168518939286, "learning_rate": 8.553902376313432e-06, "loss": 0.623, "step": 2653 }, { "epoch": 0.27, "grad_norm": 1.4428280152455986, "learning_rate": 8.552738985872834e-06, "loss": 0.7585, "step": 2654 }, { "epoch": 0.27, "grad_norm": 1.4279724076299027, "learning_rate": 8.551575206828235e-06, "loss": 0.6954, "step": 2655 }, { "epoch": 0.27, "grad_norm": 1.4900775312220997, "learning_rate": 8.550411039306932e-06, "loss": 0.6495, "step": 2656 }, { "epoch": 0.27, "grad_norm": 1.442305822630593, "learning_rate": 8.549246483436262e-06, "loss": 0.5801, "step": 2657 }, { "epoch": 0.27, "grad_norm": 1.4191505198185437, "learning_rate": 8.548081539343606e-06, "loss": 0.6617, "step": 2658 }, { "epoch": 0.27, "grad_norm": 1.3956617595878653, "learning_rate": 8.546916207156388e-06, "loss": 0.6731, "step": 2659 }, { "epoch": 0.27, "grad_norm": 1.7045737168545152, "learning_rate": 8.545750487002073e-06, "loss": 0.7608, "step": 2660 }, { "epoch": 0.27, "grad_norm": 1.4786264715002222, "learning_rate": 8.544584379008168e-06, "loss": 0.6859, "step": 2661 }, { "epoch": 0.27, "grad_norm": 1.4492604256228145, "learning_rate": 8.543417883302227e-06, "loss": 0.7358, "step": 2662 }, { "epoch": 0.27, "grad_norm": 1.4041917339293806, "learning_rate": 8.54225100001184e-06, "loss": 0.6502, "step": 2663 }, { "epoch": 0.27, "grad_norm": 1.2392301590044188, "learning_rate": 8.541083729264646e-06, "loss": 0.6135, "step": 2664 }, { "epoch": 0.27, "grad_norm": 1.6557603863405053, "learning_rate": 8.53991607118832e-06, "loss": 0.6152, "step": 2665 }, { "epoch": 0.27, "grad_norm": 1.5367608312584045, "learning_rate": 8.538748025910582e-06, "loss": 0.675, "step": 2666 }, { "epoch": 0.27, "grad_norm": 1.4478046395261805, "learning_rate": 8.537579593559195e-06, "loss": 0.7102, "step": 2667 }, { "epoch": 0.27, "grad_norm": 1.3783960032155629, "learning_rate": 8.536410774261966e-06, "loss": 0.6273, "step": 2668 }, { "epoch": 0.27, "grad_norm": 1.4967759314689224, "learning_rate": 8.535241568146737e-06, "loss": 0.6891, "step": 2669 }, { "epoch": 0.27, "grad_norm": 1.2566767359396072, "learning_rate": 8.534071975341404e-06, "loss": 0.678, "step": 2670 }, { "epoch": 0.27, "grad_norm": 1.4904413185549792, "learning_rate": 8.532901995973896e-06, "loss": 0.769, "step": 2671 }, { "epoch": 0.27, "grad_norm": 1.8193262794574825, "learning_rate": 8.531731630172187e-06, "loss": 0.686, "step": 2672 }, { "epoch": 0.27, "grad_norm": 2.3030351699564733, "learning_rate": 8.530560878064292e-06, "loss": 0.6387, "step": 2673 }, { "epoch": 0.27, "grad_norm": 1.480833972809105, "learning_rate": 8.529389739778273e-06, "loss": 0.6496, "step": 2674 }, { "epoch": 0.27, "grad_norm": 1.4027312977384283, "learning_rate": 8.528218215442227e-06, "loss": 0.5554, "step": 2675 }, { "epoch": 0.27, "grad_norm": 1.5375964826963802, "learning_rate": 8.527046305184301e-06, "loss": 0.7079, "step": 2676 }, { "epoch": 0.27, "grad_norm": 1.3216810494669786, "learning_rate": 8.525874009132677e-06, "loss": 0.6335, "step": 2677 }, { "epoch": 0.27, "grad_norm": 1.3454434944274407, "learning_rate": 8.524701327415586e-06, "loss": 0.6315, "step": 2678 }, { "epoch": 0.27, "grad_norm": 1.4194902706306625, "learning_rate": 8.523528260161293e-06, "loss": 0.689, "step": 2679 }, { "epoch": 0.27, "grad_norm": 1.3730164626808823, "learning_rate": 8.522354807498114e-06, "loss": 0.6421, "step": 2680 }, { "epoch": 0.27, "grad_norm": 1.2638996406039025, "learning_rate": 8.5211809695544e-06, "loss": 0.6117, "step": 2681 }, { "epoch": 0.27, "grad_norm": 1.5041576654093516, "learning_rate": 8.520006746458552e-06, "loss": 0.7256, "step": 2682 }, { "epoch": 0.27, "grad_norm": 1.4991146727181628, "learning_rate": 8.518832138339005e-06, "loss": 0.697, "step": 2683 }, { "epoch": 0.27, "grad_norm": 1.4921338321993287, "learning_rate": 8.517657145324237e-06, "loss": 0.6895, "step": 2684 }, { "epoch": 0.27, "grad_norm": 1.7800269375029205, "learning_rate": 8.516481767542775e-06, "loss": 0.7563, "step": 2685 }, { "epoch": 0.27, "grad_norm": 1.5364954436202687, "learning_rate": 8.51530600512318e-06, "loss": 0.6827, "step": 2686 }, { "epoch": 0.27, "grad_norm": 1.4239834296362734, "learning_rate": 8.51412985819406e-06, "loss": 0.6665, "step": 2687 }, { "epoch": 0.27, "grad_norm": 1.5219868143922046, "learning_rate": 8.512953326884066e-06, "loss": 0.664, "step": 2688 }, { "epoch": 0.27, "grad_norm": 1.7870917526068675, "learning_rate": 8.511776411321886e-06, "loss": 0.7217, "step": 2689 }, { "epoch": 0.27, "grad_norm": 1.40799251148579, "learning_rate": 8.510599111636252e-06, "loss": 0.6999, "step": 2690 }, { "epoch": 0.27, "grad_norm": 1.4450281990031906, "learning_rate": 8.509421427955944e-06, "loss": 0.639, "step": 2691 }, { "epoch": 0.27, "grad_norm": 1.4517313324072503, "learning_rate": 8.508243360409773e-06, "loss": 0.7198, "step": 2692 }, { "epoch": 0.27, "grad_norm": 1.5329219985688576, "learning_rate": 8.507064909126602e-06, "loss": 0.672, "step": 2693 }, { "epoch": 0.28, "grad_norm": 1.3251436548903546, "learning_rate": 8.505886074235327e-06, "loss": 0.649, "step": 2694 }, { "epoch": 0.28, "grad_norm": 1.4589190780655508, "learning_rate": 8.504706855864897e-06, "loss": 0.7346, "step": 2695 }, { "epoch": 0.28, "grad_norm": 1.3983801903861444, "learning_rate": 8.50352725414429e-06, "loss": 0.6353, "step": 2696 }, { "epoch": 0.28, "grad_norm": 1.1263121125553297, "learning_rate": 8.50234726920254e-06, "loss": 0.6082, "step": 2697 }, { "epoch": 0.28, "grad_norm": 1.5264326456226864, "learning_rate": 8.50116690116871e-06, "loss": 0.6981, "step": 2698 }, { "epoch": 0.28, "grad_norm": 1.276829926729859, "learning_rate": 8.499986150171911e-06, "loss": 0.6492, "step": 2699 }, { "epoch": 0.28, "grad_norm": 1.2912875837719102, "learning_rate": 8.498805016341296e-06, "loss": 0.5695, "step": 2700 }, { "epoch": 0.28, "grad_norm": 1.319731094506015, "learning_rate": 8.497623499806062e-06, "loss": 0.6815, "step": 2701 }, { "epoch": 0.28, "grad_norm": 1.4203502995268487, "learning_rate": 8.49644160069544e-06, "loss": 0.7592, "step": 2702 }, { "epoch": 0.28, "grad_norm": 1.3585890997719152, "learning_rate": 8.495259319138714e-06, "loss": 0.6235, "step": 2703 }, { "epoch": 0.28, "grad_norm": 1.4146077181974108, "learning_rate": 8.494076655265198e-06, "loss": 0.6612, "step": 2704 }, { "epoch": 0.28, "grad_norm": 1.3960623023412135, "learning_rate": 8.492893609204257e-06, "loss": 0.69, "step": 2705 }, { "epoch": 0.28, "grad_norm": 1.6168874769746624, "learning_rate": 8.491710181085293e-06, "loss": 0.5691, "step": 2706 }, { "epoch": 0.28, "grad_norm": 1.2865305930224091, "learning_rate": 8.49052637103775e-06, "loss": 0.7482, "step": 2707 }, { "epoch": 0.28, "grad_norm": 1.4928932991524484, "learning_rate": 8.489342179191118e-06, "loss": 0.6866, "step": 2708 }, { "epoch": 0.28, "grad_norm": 1.3948760380088983, "learning_rate": 8.488157605674924e-06, "loss": 0.6246, "step": 2709 }, { "epoch": 0.28, "grad_norm": 1.3364338106151872, "learning_rate": 8.48697265061874e-06, "loss": 0.676, "step": 2710 }, { "epoch": 0.28, "grad_norm": 1.396487188725367, "learning_rate": 8.485787314152174e-06, "loss": 0.6542, "step": 2711 }, { "epoch": 0.28, "grad_norm": 1.5882642280895023, "learning_rate": 8.484601596404885e-06, "loss": 0.6762, "step": 2712 }, { "epoch": 0.28, "grad_norm": 1.4092962008742917, "learning_rate": 8.483415497506567e-06, "loss": 0.6709, "step": 2713 }, { "epoch": 0.28, "grad_norm": 1.4244031262770642, "learning_rate": 8.482229017586954e-06, "loss": 0.7317, "step": 2714 }, { "epoch": 0.28, "grad_norm": 1.6767709383035336, "learning_rate": 8.481042156775828e-06, "loss": 0.7257, "step": 2715 }, { "epoch": 0.28, "grad_norm": 1.4366783658890678, "learning_rate": 8.47985491520301e-06, "loss": 0.793, "step": 2716 }, { "epoch": 0.28, "grad_norm": 1.5260275930182268, "learning_rate": 8.478667292998361e-06, "loss": 0.7272, "step": 2717 }, { "epoch": 0.28, "grad_norm": 1.3415367751493654, "learning_rate": 8.477479290291787e-06, "loss": 0.6781, "step": 2718 }, { "epoch": 0.28, "grad_norm": 1.4813106495222752, "learning_rate": 8.476290907213232e-06, "loss": 0.7314, "step": 2719 }, { "epoch": 0.28, "grad_norm": 1.6298193787556436, "learning_rate": 8.475102143892681e-06, "loss": 0.6567, "step": 2720 }, { "epoch": 0.28, "grad_norm": 1.623983384552813, "learning_rate": 8.473913000460167e-06, "loss": 0.669, "step": 2721 }, { "epoch": 0.28, "grad_norm": 1.3627075172954952, "learning_rate": 8.472723477045756e-06, "loss": 0.6537, "step": 2722 }, { "epoch": 0.28, "grad_norm": 1.3562460804497474, "learning_rate": 8.471533573779565e-06, "loss": 0.6877, "step": 2723 }, { "epoch": 0.28, "grad_norm": 1.2429335677460949, "learning_rate": 8.470343290791742e-06, "loss": 0.5766, "step": 2724 }, { "epoch": 0.28, "grad_norm": 1.341435463928787, "learning_rate": 8.469152628212487e-06, "loss": 0.6564, "step": 2725 }, { "epoch": 0.28, "grad_norm": 1.5495978478231822, "learning_rate": 8.467961586172032e-06, "loss": 0.7192, "step": 2726 }, { "epoch": 0.28, "grad_norm": 1.7133083221732925, "learning_rate": 8.466770164800658e-06, "loss": 0.8249, "step": 2727 }, { "epoch": 0.28, "grad_norm": 1.3443896147243042, "learning_rate": 8.465578364228684e-06, "loss": 0.6064, "step": 2728 }, { "epoch": 0.28, "grad_norm": 1.4328280605214405, "learning_rate": 8.464386184586468e-06, "loss": 0.6644, "step": 2729 }, { "epoch": 0.28, "grad_norm": 1.208954487362703, "learning_rate": 8.463193626004417e-06, "loss": 0.5864, "step": 2730 }, { "epoch": 0.28, "grad_norm": 1.4119536150555205, "learning_rate": 8.462000688612973e-06, "loss": 0.7133, "step": 2731 }, { "epoch": 0.28, "grad_norm": 1.43657419762153, "learning_rate": 8.460807372542618e-06, "loss": 0.6542, "step": 2732 }, { "epoch": 0.28, "grad_norm": 1.2353324839197792, "learning_rate": 8.459613677923887e-06, "loss": 0.6739, "step": 2733 }, { "epoch": 0.28, "grad_norm": 1.3306032170895374, "learning_rate": 8.45841960488734e-06, "loss": 0.6977, "step": 2734 }, { "epoch": 0.28, "grad_norm": 1.410793710162558, "learning_rate": 8.457225153563588e-06, "loss": 0.6866, "step": 2735 }, { "epoch": 0.28, "grad_norm": 1.4178370781779697, "learning_rate": 8.456030324083285e-06, "loss": 0.7302, "step": 2736 }, { "epoch": 0.28, "grad_norm": 1.431058951293633, "learning_rate": 8.45483511657712e-06, "loss": 0.7296, "step": 2737 }, { "epoch": 0.28, "grad_norm": 1.340528178270691, "learning_rate": 8.453639531175832e-06, "loss": 0.6563, "step": 2738 }, { "epoch": 0.28, "grad_norm": 1.3901150901664385, "learning_rate": 8.45244356801019e-06, "loss": 0.6455, "step": 2739 }, { "epoch": 0.28, "grad_norm": 1.3362670538245975, "learning_rate": 8.451247227211011e-06, "loss": 0.7007, "step": 2740 }, { "epoch": 0.28, "grad_norm": 1.2472879723157528, "learning_rate": 8.450050508909155e-06, "loss": 0.671, "step": 2741 }, { "epoch": 0.28, "grad_norm": 1.8039923470133756, "learning_rate": 8.448853413235521e-06, "loss": 0.7572, "step": 2742 }, { "epoch": 0.28, "grad_norm": 1.5883545925410607, "learning_rate": 8.447655940321047e-06, "loss": 0.6596, "step": 2743 }, { "epoch": 0.28, "grad_norm": 1.456598610022876, "learning_rate": 8.446458090296717e-06, "loss": 0.6112, "step": 2744 }, { "epoch": 0.28, "grad_norm": 1.3676364412942463, "learning_rate": 8.445259863293552e-06, "loss": 0.6597, "step": 2745 }, { "epoch": 0.28, "grad_norm": 1.3419093719183455, "learning_rate": 8.444061259442613e-06, "loss": 0.7561, "step": 2746 }, { "epoch": 0.28, "grad_norm": 1.485206385879731, "learning_rate": 8.442862278875011e-06, "loss": 0.6748, "step": 2747 }, { "epoch": 0.28, "grad_norm": 1.2670736782997387, "learning_rate": 8.441662921721889e-06, "loss": 0.6956, "step": 2748 }, { "epoch": 0.28, "grad_norm": 1.6010481852157943, "learning_rate": 8.440463188114432e-06, "loss": 0.6143, "step": 2749 }, { "epoch": 0.28, "grad_norm": 1.4307704008094806, "learning_rate": 8.439263078183875e-06, "loss": 0.6978, "step": 2750 }, { "epoch": 0.28, "grad_norm": 2.2661308019411535, "learning_rate": 8.438062592061484e-06, "loss": 0.7452, "step": 2751 }, { "epoch": 0.28, "grad_norm": 1.3221672194119756, "learning_rate": 8.43686172987857e-06, "loss": 0.7432, "step": 2752 }, { "epoch": 0.28, "grad_norm": 1.5175627262196607, "learning_rate": 8.435660491766487e-06, "loss": 0.642, "step": 2753 }, { "epoch": 0.28, "grad_norm": 1.3492134826193576, "learning_rate": 8.434458877856626e-06, "loss": 0.6188, "step": 2754 }, { "epoch": 0.28, "grad_norm": 1.3068328750268665, "learning_rate": 8.433256888280422e-06, "loss": 0.7051, "step": 2755 }, { "epoch": 0.28, "grad_norm": 1.4484740747501152, "learning_rate": 8.43205452316935e-06, "loss": 0.6787, "step": 2756 }, { "epoch": 0.28, "grad_norm": 1.334539044205879, "learning_rate": 8.430851782654928e-06, "loss": 0.594, "step": 2757 }, { "epoch": 0.28, "grad_norm": 1.5691186055309303, "learning_rate": 8.429648666868713e-06, "loss": 0.6391, "step": 2758 }, { "epoch": 0.28, "grad_norm": 1.3365180392615514, "learning_rate": 8.428445175942304e-06, "loss": 0.7464, "step": 2759 }, { "epoch": 0.28, "grad_norm": 1.4883434687768666, "learning_rate": 8.427241310007338e-06, "loss": 0.7149, "step": 2760 }, { "epoch": 0.28, "grad_norm": 1.4290180381585182, "learning_rate": 8.4260370691955e-06, "loss": 0.7335, "step": 2761 }, { "epoch": 0.28, "grad_norm": 1.3968327548219026, "learning_rate": 8.424832453638507e-06, "loss": 0.5861, "step": 2762 }, { "epoch": 0.28, "grad_norm": 1.3447142068418696, "learning_rate": 8.423627463468127e-06, "loss": 0.6474, "step": 2763 }, { "epoch": 0.28, "grad_norm": 1.3930646602106036, "learning_rate": 8.422422098816159e-06, "loss": 0.6698, "step": 2764 }, { "epoch": 0.28, "grad_norm": 1.591767126454726, "learning_rate": 8.421216359814451e-06, "loss": 0.7499, "step": 2765 }, { "epoch": 0.28, "grad_norm": 1.3900962928989389, "learning_rate": 8.420010246594887e-06, "loss": 0.5931, "step": 2766 }, { "epoch": 0.28, "grad_norm": 1.5284510582571262, "learning_rate": 8.418803759289392e-06, "loss": 0.7289, "step": 2767 }, { "epoch": 0.28, "grad_norm": 1.520064230749176, "learning_rate": 8.417596898029936e-06, "loss": 0.6413, "step": 2768 }, { "epoch": 0.28, "grad_norm": 1.4967554304141877, "learning_rate": 8.416389662948527e-06, "loss": 0.5886, "step": 2769 }, { "epoch": 0.28, "grad_norm": 1.3574580608073146, "learning_rate": 8.415182054177213e-06, "loss": 0.601, "step": 2770 }, { "epoch": 0.28, "grad_norm": 1.275958594479038, "learning_rate": 8.413974071848084e-06, "loss": 0.5047, "step": 2771 }, { "epoch": 0.28, "grad_norm": 1.3569568407804673, "learning_rate": 8.412765716093273e-06, "loss": 0.5721, "step": 2772 }, { "epoch": 0.28, "grad_norm": 1.5099432741724619, "learning_rate": 8.411556987044948e-06, "loss": 0.7352, "step": 2773 }, { "epoch": 0.28, "grad_norm": 1.366584813730855, "learning_rate": 8.410347884835323e-06, "loss": 0.6777, "step": 2774 }, { "epoch": 0.28, "grad_norm": 1.2229239210731138, "learning_rate": 8.409138409596655e-06, "loss": 0.7147, "step": 2775 }, { "epoch": 0.28, "grad_norm": 1.7535139593014688, "learning_rate": 8.407928561461237e-06, "loss": 0.7118, "step": 2776 }, { "epoch": 0.28, "grad_norm": 1.224762227562558, "learning_rate": 8.4067183405614e-06, "loss": 0.661, "step": 2777 }, { "epoch": 0.28, "grad_norm": 1.2916907136527975, "learning_rate": 8.405507747029524e-06, "loss": 0.6165, "step": 2778 }, { "epoch": 0.28, "grad_norm": 1.5312121144806896, "learning_rate": 8.404296780998022e-06, "loss": 0.7345, "step": 2779 }, { "epoch": 0.28, "grad_norm": 1.312537485596847, "learning_rate": 8.403085442599355e-06, "loss": 0.6928, "step": 2780 }, { "epoch": 0.28, "grad_norm": 1.279664540511041, "learning_rate": 8.40187373196602e-06, "loss": 0.7225, "step": 2781 }, { "epoch": 0.28, "grad_norm": 1.4900228743466972, "learning_rate": 8.400661649230553e-06, "loss": 0.6993, "step": 2782 }, { "epoch": 0.28, "grad_norm": 1.4916171624542078, "learning_rate": 8.399449194525538e-06, "loss": 0.6632, "step": 2783 }, { "epoch": 0.28, "grad_norm": 1.4480551383091644, "learning_rate": 8.398236367983591e-06, "loss": 0.6168, "step": 2784 }, { "epoch": 0.28, "grad_norm": 1.373187785949036, "learning_rate": 8.397023169737375e-06, "loss": 0.6136, "step": 2785 }, { "epoch": 0.28, "grad_norm": 1.3903455992670142, "learning_rate": 8.39580959991959e-06, "loss": 0.6953, "step": 2786 }, { "epoch": 0.28, "grad_norm": 1.5226445260896642, "learning_rate": 8.394595658662983e-06, "loss": 0.606, "step": 2787 }, { "epoch": 0.28, "grad_norm": 1.4167448482699223, "learning_rate": 8.393381346100328e-06, "loss": 0.646, "step": 2788 }, { "epoch": 0.28, "grad_norm": 1.2977347301438293, "learning_rate": 8.392166662364457e-06, "loss": 0.6486, "step": 2789 }, { "epoch": 0.28, "grad_norm": 1.245383872389809, "learning_rate": 8.390951607588228e-06, "loss": 0.5857, "step": 2790 }, { "epoch": 0.28, "grad_norm": 1.3962872746790485, "learning_rate": 8.38973618190455e-06, "loss": 0.6071, "step": 2791 }, { "epoch": 0.29, "grad_norm": 1.4568089214985729, "learning_rate": 8.388520385446362e-06, "loss": 0.6363, "step": 2792 }, { "epoch": 0.29, "grad_norm": 1.320970877932928, "learning_rate": 8.387304218346657e-06, "loss": 0.7377, "step": 2793 }, { "epoch": 0.29, "grad_norm": 1.2165068522662525, "learning_rate": 8.386087680738457e-06, "loss": 0.572, "step": 2794 }, { "epoch": 0.29, "grad_norm": 1.2915770714418655, "learning_rate": 8.384870772754827e-06, "loss": 0.668, "step": 2795 }, { "epoch": 0.29, "grad_norm": 1.3310467569343134, "learning_rate": 8.38365349452888e-06, "loss": 0.6577, "step": 2796 }, { "epoch": 0.29, "grad_norm": 1.5122631790557919, "learning_rate": 8.38243584619376e-06, "loss": 0.7723, "step": 2797 }, { "epoch": 0.29, "grad_norm": 1.3225934399442456, "learning_rate": 8.381217827882654e-06, "loss": 0.6397, "step": 2798 }, { "epoch": 0.29, "grad_norm": 1.3240988499205095, "learning_rate": 8.379999439728793e-06, "loss": 0.7379, "step": 2799 }, { "epoch": 0.29, "grad_norm": 1.3852215315697503, "learning_rate": 8.378780681865446e-06, "loss": 0.658, "step": 2800 }, { "epoch": 0.29, "grad_norm": 1.525567546249349, "learning_rate": 8.377561554425923e-06, "loss": 0.7026, "step": 2801 }, { "epoch": 0.29, "grad_norm": 1.5473859324350696, "learning_rate": 8.37634205754357e-06, "loss": 0.6197, "step": 2802 }, { "epoch": 0.29, "grad_norm": 1.3332031222085179, "learning_rate": 8.375122191351784e-06, "loss": 0.594, "step": 2803 }, { "epoch": 0.29, "grad_norm": 1.3731595462871098, "learning_rate": 8.37390195598399e-06, "loss": 0.6206, "step": 2804 }, { "epoch": 0.29, "grad_norm": 1.4197703331863132, "learning_rate": 8.372681351573664e-06, "loss": 0.6757, "step": 2805 }, { "epoch": 0.29, "grad_norm": 1.349566609316016, "learning_rate": 8.371460378254314e-06, "loss": 0.5144, "step": 2806 }, { "epoch": 0.29, "grad_norm": 1.328355615822837, "learning_rate": 8.370239036159493e-06, "loss": 0.6733, "step": 2807 }, { "epoch": 0.29, "grad_norm": 1.4192855614683337, "learning_rate": 8.369017325422793e-06, "loss": 0.5995, "step": 2808 }, { "epoch": 0.29, "grad_norm": 1.367554374543165, "learning_rate": 8.367795246177847e-06, "loss": 0.6308, "step": 2809 }, { "epoch": 0.29, "grad_norm": 1.6714064010317267, "learning_rate": 8.366572798558328e-06, "loss": 0.7318, "step": 2810 }, { "epoch": 0.29, "grad_norm": 1.4222858026646716, "learning_rate": 8.36534998269795e-06, "loss": 0.6253, "step": 2811 }, { "epoch": 0.29, "grad_norm": 1.5200571059292078, "learning_rate": 8.364126798730463e-06, "loss": 0.7131, "step": 2812 }, { "epoch": 0.29, "grad_norm": 1.4950185397777729, "learning_rate": 8.362903246789665e-06, "loss": 0.6447, "step": 2813 }, { "epoch": 0.29, "grad_norm": 1.4676398195974125, "learning_rate": 8.36167932700939e-06, "loss": 0.7262, "step": 2814 }, { "epoch": 0.29, "grad_norm": 1.4693786526571644, "learning_rate": 8.360455039523507e-06, "loss": 0.7087, "step": 2815 }, { "epoch": 0.29, "grad_norm": 1.341241433724615, "learning_rate": 8.359230384465936e-06, "loss": 0.6641, "step": 2816 }, { "epoch": 0.29, "grad_norm": 1.370674502999527, "learning_rate": 8.358005361970627e-06, "loss": 0.7146, "step": 2817 }, { "epoch": 0.29, "grad_norm": 1.3929538404903217, "learning_rate": 8.35677997217158e-06, "loss": 0.7661, "step": 2818 }, { "epoch": 0.29, "grad_norm": 1.3557818718617505, "learning_rate": 8.355554215202824e-06, "loss": 0.6727, "step": 2819 }, { "epoch": 0.29, "grad_norm": 1.562873098122924, "learning_rate": 8.35432809119844e-06, "loss": 0.6648, "step": 2820 }, { "epoch": 0.29, "grad_norm": 1.3613311865575992, "learning_rate": 8.353101600292542e-06, "loss": 0.5908, "step": 2821 }, { "epoch": 0.29, "grad_norm": 1.3821712339513206, "learning_rate": 8.351874742619282e-06, "loss": 0.7518, "step": 2822 }, { "epoch": 0.29, "grad_norm": 1.393890571513378, "learning_rate": 8.350647518312858e-06, "loss": 0.5814, "step": 2823 }, { "epoch": 0.29, "grad_norm": 1.3727814725154608, "learning_rate": 8.349419927507505e-06, "loss": 0.6412, "step": 2824 }, { "epoch": 0.29, "grad_norm": 1.6076012675737212, "learning_rate": 8.3481919703375e-06, "loss": 0.7679, "step": 2825 }, { "epoch": 0.29, "grad_norm": 1.7233758364705165, "learning_rate": 8.346963646937158e-06, "loss": 0.5602, "step": 2826 }, { "epoch": 0.29, "grad_norm": 1.5566968551887537, "learning_rate": 8.345734957440834e-06, "loss": 0.7142, "step": 2827 }, { "epoch": 0.29, "grad_norm": 1.7995717693715387, "learning_rate": 8.344505901982927e-06, "loss": 0.7429, "step": 2828 }, { "epoch": 0.29, "grad_norm": 1.3528118129274895, "learning_rate": 8.343276480697868e-06, "loss": 0.5412, "step": 2829 }, { "epoch": 0.29, "grad_norm": 1.6099892830380622, "learning_rate": 8.342046693720136e-06, "loss": 0.7379, "step": 2830 }, { "epoch": 0.29, "grad_norm": 1.6543766081949272, "learning_rate": 8.34081654118425e-06, "loss": 0.774, "step": 2831 }, { "epoch": 0.29, "grad_norm": 1.2520676085967344, "learning_rate": 8.33958602322476e-06, "loss": 0.568, "step": 2832 }, { "epoch": 0.29, "grad_norm": 1.3574790055853103, "learning_rate": 8.338355139976264e-06, "loss": 0.6007, "step": 2833 }, { "epoch": 0.29, "grad_norm": 1.5570390573776889, "learning_rate": 8.3371238915734e-06, "loss": 0.5957, "step": 2834 }, { "epoch": 0.29, "grad_norm": 1.5467079089089526, "learning_rate": 8.335892278150842e-06, "loss": 0.6946, "step": 2835 }, { "epoch": 0.29, "grad_norm": 1.3352078079634042, "learning_rate": 8.334660299843304e-06, "loss": 0.7015, "step": 2836 }, { "epoch": 0.29, "grad_norm": 1.4006615020115065, "learning_rate": 8.333427956785545e-06, "loss": 0.5927, "step": 2837 }, { "epoch": 0.29, "grad_norm": 1.5875325354916794, "learning_rate": 8.33219524911236e-06, "loss": 0.7855, "step": 2838 }, { "epoch": 0.29, "grad_norm": 1.307397280349866, "learning_rate": 8.330962176958584e-06, "loss": 0.686, "step": 2839 }, { "epoch": 0.29, "grad_norm": 1.9171014406658287, "learning_rate": 8.329728740459092e-06, "loss": 0.7556, "step": 2840 }, { "epoch": 0.29, "grad_norm": 1.4706119711498407, "learning_rate": 8.3284949397488e-06, "loss": 0.6914, "step": 2841 }, { "epoch": 0.29, "grad_norm": 1.2774150676443297, "learning_rate": 8.32726077496266e-06, "loss": 0.6286, "step": 2842 }, { "epoch": 0.29, "grad_norm": 1.4364704222691076, "learning_rate": 8.32602624623567e-06, "loss": 0.6073, "step": 2843 }, { "epoch": 0.29, "grad_norm": 1.5591093474019464, "learning_rate": 8.324791353702862e-06, "loss": 0.7001, "step": 2844 }, { "epoch": 0.29, "grad_norm": 1.1860280211700684, "learning_rate": 8.323556097499314e-06, "loss": 0.5694, "step": 2845 }, { "epoch": 0.29, "grad_norm": 1.4871366656397094, "learning_rate": 8.322320477760138e-06, "loss": 0.6621, "step": 2846 }, { "epoch": 0.29, "grad_norm": 1.3914455071778713, "learning_rate": 8.321084494620489e-06, "loss": 0.7467, "step": 2847 }, { "epoch": 0.29, "grad_norm": 1.4438295872879618, "learning_rate": 8.319848148215559e-06, "loss": 0.7742, "step": 2848 }, { "epoch": 0.29, "grad_norm": 1.3589652839059865, "learning_rate": 8.318611438680581e-06, "loss": 0.6867, "step": 2849 }, { "epoch": 0.29, "grad_norm": 1.273148223084731, "learning_rate": 8.317374366150832e-06, "loss": 0.6389, "step": 2850 }, { "epoch": 0.29, "grad_norm": 1.3696893795737979, "learning_rate": 8.316136930761622e-06, "loss": 0.7204, "step": 2851 }, { "epoch": 0.29, "grad_norm": 1.3364777439890207, "learning_rate": 8.314899132648303e-06, "loss": 0.7234, "step": 2852 }, { "epoch": 0.29, "grad_norm": 1.2961549680002116, "learning_rate": 8.313660971946268e-06, "loss": 0.6807, "step": 2853 }, { "epoch": 0.29, "grad_norm": 1.2995635904802934, "learning_rate": 8.312422448790953e-06, "loss": 0.618, "step": 2854 }, { "epoch": 0.29, "grad_norm": 1.2776136792488788, "learning_rate": 8.311183563317821e-06, "loss": 0.6289, "step": 2855 }, { "epoch": 0.29, "grad_norm": 1.4096221071550443, "learning_rate": 8.30994431566239e-06, "loss": 0.6436, "step": 2856 }, { "epoch": 0.29, "grad_norm": 1.3479822861843598, "learning_rate": 8.30870470596021e-06, "loss": 0.7442, "step": 2857 }, { "epoch": 0.29, "grad_norm": 1.2717045747906253, "learning_rate": 8.307464734346867e-06, "loss": 0.6327, "step": 2858 }, { "epoch": 0.29, "grad_norm": 1.3776212161585846, "learning_rate": 8.306224400957997e-06, "loss": 0.7103, "step": 2859 }, { "epoch": 0.29, "grad_norm": 1.328449249806277, "learning_rate": 8.304983705929264e-06, "loss": 0.6108, "step": 2860 }, { "epoch": 0.29, "grad_norm": 1.346977698725083, "learning_rate": 8.303742649396382e-06, "loss": 0.6786, "step": 2861 }, { "epoch": 0.29, "grad_norm": 1.403903836569081, "learning_rate": 8.302501231495095e-06, "loss": 0.6458, "step": 2862 }, { "epoch": 0.29, "grad_norm": 1.7187150118669805, "learning_rate": 8.301259452361197e-06, "loss": 0.7231, "step": 2863 }, { "epoch": 0.29, "grad_norm": 1.534176808691166, "learning_rate": 8.30001731213051e-06, "loss": 0.6656, "step": 2864 }, { "epoch": 0.29, "grad_norm": 1.4032977771150132, "learning_rate": 8.298774810938903e-06, "loss": 0.5991, "step": 2865 }, { "epoch": 0.29, "grad_norm": 1.3334517361756628, "learning_rate": 8.297531948922284e-06, "loss": 0.5426, "step": 2866 }, { "epoch": 0.29, "grad_norm": 1.3986272389798304, "learning_rate": 8.296288726216599e-06, "loss": 0.6004, "step": 2867 }, { "epoch": 0.29, "grad_norm": 1.456699642869778, "learning_rate": 8.295045142957832e-06, "loss": 0.6982, "step": 2868 }, { "epoch": 0.29, "grad_norm": 1.2341457943795286, "learning_rate": 8.293801199282009e-06, "loss": 0.6105, "step": 2869 }, { "epoch": 0.29, "grad_norm": 1.4052520815587548, "learning_rate": 8.292556895325195e-06, "loss": 0.5974, "step": 2870 }, { "epoch": 0.29, "grad_norm": 1.477730929267848, "learning_rate": 8.291312231223492e-06, "loss": 0.6748, "step": 2871 }, { "epoch": 0.29, "grad_norm": 1.355544078463449, "learning_rate": 8.290067207113044e-06, "loss": 0.654, "step": 2872 }, { "epoch": 0.29, "grad_norm": 1.399951364631002, "learning_rate": 8.288821823130035e-06, "loss": 0.7981, "step": 2873 }, { "epoch": 0.29, "grad_norm": 1.2516222444627076, "learning_rate": 8.287576079410687e-06, "loss": 0.6573, "step": 2874 }, { "epoch": 0.29, "grad_norm": 1.3922215636857418, "learning_rate": 8.286329976091261e-06, "loss": 0.5685, "step": 2875 }, { "epoch": 0.29, "grad_norm": 1.5399221330343689, "learning_rate": 8.285083513308054e-06, "loss": 0.6444, "step": 2876 }, { "epoch": 0.29, "grad_norm": 1.6005113203580943, "learning_rate": 8.283836691197413e-06, "loss": 0.6102, "step": 2877 }, { "epoch": 0.29, "grad_norm": 3.0755016554826446, "learning_rate": 8.28258950989571e-06, "loss": 0.6746, "step": 2878 }, { "epoch": 0.29, "grad_norm": 1.597483736882933, "learning_rate": 8.28134196953937e-06, "loss": 0.7415, "step": 2879 }, { "epoch": 0.29, "grad_norm": 1.549062450479773, "learning_rate": 8.280094070264846e-06, "loss": 0.8153, "step": 2880 }, { "epoch": 0.29, "grad_norm": 1.443938386691563, "learning_rate": 8.278845812208639e-06, "loss": 0.6164, "step": 2881 }, { "epoch": 0.29, "grad_norm": 1.5182607038011915, "learning_rate": 8.277597195507282e-06, "loss": 0.7815, "step": 2882 }, { "epoch": 0.29, "grad_norm": 1.6349261990753656, "learning_rate": 8.27634822029735e-06, "loss": 0.6517, "step": 2883 }, { "epoch": 0.29, "grad_norm": 1.281190460278781, "learning_rate": 8.275098886715463e-06, "loss": 0.5597, "step": 2884 }, { "epoch": 0.29, "grad_norm": 1.5709878337070642, "learning_rate": 8.273849194898269e-06, "loss": 0.7339, "step": 2885 }, { "epoch": 0.29, "grad_norm": 1.3906412797359404, "learning_rate": 8.272599144982466e-06, "loss": 0.6641, "step": 2886 }, { "epoch": 0.29, "grad_norm": 1.6423141197019597, "learning_rate": 8.271348737104782e-06, "loss": 0.6181, "step": 2887 }, { "epoch": 0.29, "grad_norm": 1.4671378371247419, "learning_rate": 8.270097971401994e-06, "loss": 0.6899, "step": 2888 }, { "epoch": 0.3, "grad_norm": 1.3301876915338822, "learning_rate": 8.268846848010908e-06, "loss": 0.6497, "step": 2889 }, { "epoch": 0.3, "grad_norm": 1.4364659645819182, "learning_rate": 8.267595367068375e-06, "loss": 0.7115, "step": 2890 }, { "epoch": 0.3, "grad_norm": 1.3262046220065915, "learning_rate": 8.266343528711284e-06, "loss": 0.6206, "step": 2891 }, { "epoch": 0.3, "grad_norm": 1.29460120168121, "learning_rate": 8.265091333076566e-06, "loss": 0.5856, "step": 2892 }, { "epoch": 0.3, "grad_norm": 1.4586251784257225, "learning_rate": 8.263838780301182e-06, "loss": 0.6664, "step": 2893 }, { "epoch": 0.3, "grad_norm": 1.233340114866028, "learning_rate": 8.262585870522142e-06, "loss": 0.5956, "step": 2894 }, { "epoch": 0.3, "grad_norm": 1.3959626923610384, "learning_rate": 8.261332603876493e-06, "loss": 0.7492, "step": 2895 }, { "epoch": 0.3, "grad_norm": 1.4072511288718892, "learning_rate": 8.260078980501313e-06, "loss": 0.6771, "step": 2896 }, { "epoch": 0.3, "grad_norm": 1.4279074294182157, "learning_rate": 8.258825000533732e-06, "loss": 0.6173, "step": 2897 }, { "epoch": 0.3, "grad_norm": 1.4322875389965195, "learning_rate": 8.257570664110907e-06, "loss": 0.7213, "step": 2898 }, { "epoch": 0.3, "grad_norm": 1.2569891071177175, "learning_rate": 8.256315971370044e-06, "loss": 0.5904, "step": 2899 }, { "epoch": 0.3, "grad_norm": 1.3749317697728851, "learning_rate": 8.255060922448379e-06, "loss": 0.668, "step": 2900 }, { "epoch": 0.3, "grad_norm": 1.4614439336234868, "learning_rate": 8.253805517483193e-06, "loss": 0.6557, "step": 2901 }, { "epoch": 0.3, "grad_norm": 1.4992022328596144, "learning_rate": 8.252549756611804e-06, "loss": 0.7173, "step": 2902 }, { "epoch": 0.3, "grad_norm": 1.3995416470566462, "learning_rate": 8.25129363997157e-06, "loss": 0.6587, "step": 2903 }, { "epoch": 0.3, "grad_norm": 1.4490289376650525, "learning_rate": 8.250037167699884e-06, "loss": 0.7038, "step": 2904 }, { "epoch": 0.3, "grad_norm": 1.5561263456964813, "learning_rate": 8.248780339934183e-06, "loss": 0.6602, "step": 2905 }, { "epoch": 0.3, "grad_norm": 1.398422175424479, "learning_rate": 8.247523156811943e-06, "loss": 0.6087, "step": 2906 }, { "epoch": 0.3, "grad_norm": 1.3638179344328956, "learning_rate": 8.246265618470673e-06, "loss": 0.6379, "step": 2907 }, { "epoch": 0.3, "grad_norm": 1.392303153451699, "learning_rate": 8.245007725047925e-06, "loss": 0.7538, "step": 2908 }, { "epoch": 0.3, "grad_norm": 1.5409745638640473, "learning_rate": 8.243749476681289e-06, "loss": 0.6181, "step": 2909 }, { "epoch": 0.3, "grad_norm": 1.343063190340107, "learning_rate": 8.242490873508396e-06, "loss": 0.6172, "step": 2910 }, { "epoch": 0.3, "grad_norm": 1.7332820489969276, "learning_rate": 8.241231915666912e-06, "loss": 0.6345, "step": 2911 }, { "epoch": 0.3, "grad_norm": 1.3737962607703784, "learning_rate": 8.239972603294545e-06, "loss": 0.7365, "step": 2912 }, { "epoch": 0.3, "grad_norm": 1.4286752327089227, "learning_rate": 8.238712936529041e-06, "loss": 0.6286, "step": 2913 }, { "epoch": 0.3, "grad_norm": 1.561922185596383, "learning_rate": 8.237452915508184e-06, "loss": 0.6847, "step": 2914 }, { "epoch": 0.3, "grad_norm": 1.3838073653263139, "learning_rate": 8.236192540369796e-06, "loss": 0.5745, "step": 2915 }, { "epoch": 0.3, "grad_norm": 1.5711768143389904, "learning_rate": 8.234931811251739e-06, "loss": 0.7413, "step": 2916 }, { "epoch": 0.3, "grad_norm": 1.4329093574307614, "learning_rate": 8.233670728291913e-06, "loss": 0.5956, "step": 2917 }, { "epoch": 0.3, "grad_norm": 1.2754602232895567, "learning_rate": 8.232409291628259e-06, "loss": 0.7013, "step": 2918 }, { "epoch": 0.3, "grad_norm": 1.3384275486352817, "learning_rate": 8.231147501398753e-06, "loss": 0.5869, "step": 2919 }, { "epoch": 0.3, "grad_norm": 1.3088672195705575, "learning_rate": 8.229885357741412e-06, "loss": 0.7384, "step": 2920 }, { "epoch": 0.3, "grad_norm": 1.392926890785383, "learning_rate": 8.228622860794293e-06, "loss": 0.5751, "step": 2921 }, { "epoch": 0.3, "grad_norm": 1.4883786472523248, "learning_rate": 8.227360010695486e-06, "loss": 0.8778, "step": 2922 }, { "epoch": 0.3, "grad_norm": 1.3912184050575116, "learning_rate": 8.226096807583128e-06, "loss": 0.6644, "step": 2923 }, { "epoch": 0.3, "grad_norm": 1.5294655566579751, "learning_rate": 8.224833251595387e-06, "loss": 0.6969, "step": 2924 }, { "epoch": 0.3, "grad_norm": 1.4668804377973632, "learning_rate": 8.223569342870472e-06, "loss": 0.639, "step": 2925 }, { "epoch": 0.3, "grad_norm": 1.393798152466432, "learning_rate": 8.222305081546636e-06, "loss": 0.6387, "step": 2926 }, { "epoch": 0.3, "grad_norm": 1.3470990423406364, "learning_rate": 8.22104046776216e-06, "loss": 0.7929, "step": 2927 }, { "epoch": 0.3, "grad_norm": 1.4007323998285939, "learning_rate": 8.219775501655372e-06, "loss": 0.6327, "step": 2928 }, { "epoch": 0.3, "grad_norm": 2.3820070350243037, "learning_rate": 8.218510183364637e-06, "loss": 0.8213, "step": 2929 }, { "epoch": 0.3, "grad_norm": 1.398534450172922, "learning_rate": 8.217244513028354e-06, "loss": 0.6459, "step": 2930 }, { "epoch": 0.3, "grad_norm": 1.3908042631030517, "learning_rate": 8.215978490784971e-06, "loss": 0.6321, "step": 2931 }, { "epoch": 0.3, "grad_norm": 1.5377731129823502, "learning_rate": 8.214712116772959e-06, "loss": 0.7823, "step": 2932 }, { "epoch": 0.3, "grad_norm": 1.2067415097829621, "learning_rate": 8.213445391130841e-06, "loss": 0.5546, "step": 2933 }, { "epoch": 0.3, "grad_norm": 1.5057712602392899, "learning_rate": 8.212178313997172e-06, "loss": 0.5717, "step": 2934 }, { "epoch": 0.3, "grad_norm": 1.1716270128524586, "learning_rate": 8.210910885510546e-06, "loss": 0.5553, "step": 2935 }, { "epoch": 0.3, "grad_norm": 1.3840400357873988, "learning_rate": 8.209643105809597e-06, "loss": 0.6222, "step": 2936 }, { "epoch": 0.3, "grad_norm": 1.3927770874244059, "learning_rate": 8.208374975032998e-06, "loss": 0.6703, "step": 2937 }, { "epoch": 0.3, "grad_norm": 1.3749098316698507, "learning_rate": 8.207106493319459e-06, "loss": 0.7214, "step": 2938 }, { "epoch": 0.3, "grad_norm": 1.8566798676967748, "learning_rate": 8.205837660807726e-06, "loss": 0.7241, "step": 2939 }, { "epoch": 0.3, "grad_norm": 1.3632260302473227, "learning_rate": 8.204568477636585e-06, "loss": 0.6631, "step": 2940 }, { "epoch": 0.3, "grad_norm": 1.3360141484214143, "learning_rate": 8.203298943944866e-06, "loss": 0.7149, "step": 2941 }, { "epoch": 0.3, "grad_norm": 1.5486306716299951, "learning_rate": 8.202029059871428e-06, "loss": 0.7794, "step": 2942 }, { "epoch": 0.3, "grad_norm": 1.8143281776637556, "learning_rate": 8.200758825555178e-06, "loss": 0.6002, "step": 2943 }, { "epoch": 0.3, "grad_norm": 1.6031596944941937, "learning_rate": 8.19948824113505e-06, "loss": 0.7724, "step": 2944 }, { "epoch": 0.3, "grad_norm": 1.4625188524082335, "learning_rate": 8.198217306750028e-06, "loss": 0.7591, "step": 2945 }, { "epoch": 0.3, "grad_norm": 1.3959724461573013, "learning_rate": 8.196946022539123e-06, "loss": 0.6087, "step": 2946 }, { "epoch": 0.3, "grad_norm": 1.3256228651848116, "learning_rate": 8.195674388641394e-06, "loss": 0.6721, "step": 2947 }, { "epoch": 0.3, "grad_norm": 1.2758112373793835, "learning_rate": 8.194402405195933e-06, "loss": 0.7318, "step": 2948 }, { "epoch": 0.3, "grad_norm": 1.4196173669541972, "learning_rate": 8.193130072341872e-06, "loss": 0.6684, "step": 2949 }, { "epoch": 0.3, "grad_norm": 1.3236713382321958, "learning_rate": 8.191857390218381e-06, "loss": 0.6619, "step": 2950 }, { "epoch": 0.3, "grad_norm": 1.500899573108176, "learning_rate": 8.190584358964669e-06, "loss": 0.7089, "step": 2951 }, { "epoch": 0.3, "grad_norm": 1.5755645684695077, "learning_rate": 8.18931097871998e-06, "loss": 0.6873, "step": 2952 }, { "epoch": 0.3, "grad_norm": 1.4264333507905362, "learning_rate": 8.188037249623598e-06, "loss": 0.5915, "step": 2953 }, { "epoch": 0.3, "grad_norm": 1.5162537853810325, "learning_rate": 8.186763171814846e-06, "loss": 0.7586, "step": 2954 }, { "epoch": 0.3, "grad_norm": 1.32311107557953, "learning_rate": 8.185488745433084e-06, "loss": 0.5726, "step": 2955 }, { "epoch": 0.3, "grad_norm": 2.5072648399810564, "learning_rate": 8.184213970617713e-06, "loss": 0.6566, "step": 2956 }, { "epoch": 0.3, "grad_norm": 1.5537272968674976, "learning_rate": 8.182938847508168e-06, "loss": 0.5862, "step": 2957 }, { "epoch": 0.3, "grad_norm": 1.4966637053826997, "learning_rate": 8.181663376243924e-06, "loss": 0.7914, "step": 2958 }, { "epoch": 0.3, "grad_norm": 1.3803274424974536, "learning_rate": 8.180387556964495e-06, "loss": 0.6447, "step": 2959 }, { "epoch": 0.3, "grad_norm": 1.5011933113144602, "learning_rate": 8.17911138980943e-06, "loss": 0.6614, "step": 2960 }, { "epoch": 0.3, "grad_norm": 1.54080018455709, "learning_rate": 8.17783487491832e-06, "loss": 0.6648, "step": 2961 }, { "epoch": 0.3, "grad_norm": 1.4861979018037283, "learning_rate": 8.176558012430792e-06, "loss": 0.7002, "step": 2962 }, { "epoch": 0.3, "grad_norm": 1.4568143843072017, "learning_rate": 8.175280802486512e-06, "loss": 0.635, "step": 2963 }, { "epoch": 0.3, "grad_norm": 1.4624205942660653, "learning_rate": 8.174003245225178e-06, "loss": 0.6327, "step": 2964 }, { "epoch": 0.3, "grad_norm": 1.4195168486148195, "learning_rate": 8.172725340786538e-06, "loss": 0.699, "step": 2965 }, { "epoch": 0.3, "grad_norm": 1.4893686102239985, "learning_rate": 8.171447089310368e-06, "loss": 0.6883, "step": 2966 }, { "epoch": 0.3, "grad_norm": 1.5718816231910133, "learning_rate": 8.170168490936484e-06, "loss": 0.7933, "step": 2967 }, { "epoch": 0.3, "grad_norm": 1.4158394842687316, "learning_rate": 8.168889545804743e-06, "loss": 0.7067, "step": 2968 }, { "epoch": 0.3, "grad_norm": 1.5187046134574818, "learning_rate": 8.167610254055039e-06, "loss": 0.6978, "step": 2969 }, { "epoch": 0.3, "grad_norm": 1.404786077091221, "learning_rate": 8.166330615827299e-06, "loss": 0.5914, "step": 2970 }, { "epoch": 0.3, "grad_norm": 1.1407779721335054, "learning_rate": 8.165050631261493e-06, "loss": 0.6435, "step": 2971 }, { "epoch": 0.3, "grad_norm": 1.405634132666316, "learning_rate": 8.163770300497631e-06, "loss": 0.6707, "step": 2972 }, { "epoch": 0.3, "grad_norm": 1.3117772206364926, "learning_rate": 8.162489623675753e-06, "loss": 0.6109, "step": 2973 }, { "epoch": 0.3, "grad_norm": 1.4305539875960518, "learning_rate": 8.161208600935945e-06, "loss": 0.6408, "step": 2974 }, { "epoch": 0.3, "grad_norm": 1.4299933617579317, "learning_rate": 8.159927232418326e-06, "loss": 0.7479, "step": 2975 }, { "epoch": 0.3, "grad_norm": 1.5986684650917053, "learning_rate": 8.158645518263051e-06, "loss": 0.7342, "step": 2976 }, { "epoch": 0.3, "grad_norm": 1.369764453642538, "learning_rate": 8.15736345861032e-06, "loss": 0.6342, "step": 2977 }, { "epoch": 0.3, "grad_norm": 1.4418280424935588, "learning_rate": 8.156081053600366e-06, "loss": 0.6257, "step": 2978 }, { "epoch": 0.3, "grad_norm": 1.4223948909061845, "learning_rate": 8.15479830337346e-06, "loss": 0.6533, "step": 2979 }, { "epoch": 0.3, "grad_norm": 1.4445112338685686, "learning_rate": 8.153515208069912e-06, "loss": 0.691, "step": 2980 }, { "epoch": 0.3, "grad_norm": 1.6710602539784407, "learning_rate": 8.152231767830065e-06, "loss": 0.6962, "step": 2981 }, { "epoch": 0.3, "grad_norm": 1.5667765024258506, "learning_rate": 8.150947982794308e-06, "loss": 0.686, "step": 2982 }, { "epoch": 0.3, "grad_norm": 1.5666480486540522, "learning_rate": 8.14966385310306e-06, "loss": 0.5792, "step": 2983 }, { "epoch": 0.3, "grad_norm": 1.3709943514765144, "learning_rate": 8.148379378896784e-06, "loss": 0.6416, "step": 2984 }, { "epoch": 0.3, "grad_norm": 1.7237369773381892, "learning_rate": 8.147094560315978e-06, "loss": 0.6995, "step": 2985 }, { "epoch": 0.3, "grad_norm": 1.3941114948673567, "learning_rate": 8.145809397501175e-06, "loss": 0.6373, "step": 2986 }, { "epoch": 0.31, "grad_norm": 1.4821446923538655, "learning_rate": 8.144523890592947e-06, "loss": 0.66, "step": 2987 }, { "epoch": 0.31, "grad_norm": 1.3514634877641465, "learning_rate": 8.143238039731909e-06, "loss": 0.5839, "step": 2988 }, { "epoch": 0.31, "grad_norm": 1.413910800883606, "learning_rate": 8.141951845058707e-06, "loss": 0.7877, "step": 2989 }, { "epoch": 0.31, "grad_norm": 1.3009440262356684, "learning_rate": 8.140665306714026e-06, "loss": 0.6872, "step": 2990 }, { "epoch": 0.31, "grad_norm": 1.54192629943645, "learning_rate": 8.13937842483859e-06, "loss": 0.5842, "step": 2991 }, { "epoch": 0.31, "grad_norm": 1.5492673026625774, "learning_rate": 8.138091199573161e-06, "loss": 0.6934, "step": 2992 }, { "epoch": 0.31, "grad_norm": 1.4536831941722048, "learning_rate": 8.13680363105854e-06, "loss": 0.7189, "step": 2993 }, { "epoch": 0.31, "grad_norm": 1.6063927035904813, "learning_rate": 8.135515719435556e-06, "loss": 0.7354, "step": 2994 }, { "epoch": 0.31, "grad_norm": 1.4030388455425187, "learning_rate": 8.13422746484509e-06, "loss": 0.7542, "step": 2995 }, { "epoch": 0.31, "grad_norm": 1.4651769620013069, "learning_rate": 8.13293886742805e-06, "loss": 0.597, "step": 2996 }, { "epoch": 0.31, "grad_norm": 1.603097336117866, "learning_rate": 8.131649927325385e-06, "loss": 0.6429, "step": 2997 }, { "epoch": 0.31, "grad_norm": 1.7172390164108517, "learning_rate": 8.130360644678082e-06, "loss": 0.6245, "step": 2998 }, { "epoch": 0.31, "grad_norm": 1.39571994266053, "learning_rate": 8.129071019627163e-06, "loss": 0.6371, "step": 2999 }, { "epoch": 0.31, "grad_norm": 1.4209199903191196, "learning_rate": 8.127781052313692e-06, "loss": 0.7806, "step": 3000 }, { "epoch": 0.31, "grad_norm": 1.4786207179346689, "learning_rate": 8.126490742878766e-06, "loss": 0.7258, "step": 3001 }, { "epoch": 0.31, "grad_norm": 1.5889831212936718, "learning_rate": 8.125200091463519e-06, "loss": 0.6715, "step": 3002 }, { "epoch": 0.31, "grad_norm": 1.29535093417495, "learning_rate": 8.123909098209127e-06, "loss": 0.6679, "step": 3003 }, { "epoch": 0.31, "grad_norm": 1.2799186139291465, "learning_rate": 8.122617763256798e-06, "loss": 0.6244, "step": 3004 }, { "epoch": 0.31, "grad_norm": 1.362012834262055, "learning_rate": 8.121326086747787e-06, "loss": 0.5399, "step": 3005 }, { "epoch": 0.31, "grad_norm": 1.539845240942195, "learning_rate": 8.120034068823373e-06, "loss": 0.7537, "step": 3006 }, { "epoch": 0.31, "grad_norm": 1.383555926703856, "learning_rate": 8.11874170962488e-06, "loss": 0.6185, "step": 3007 }, { "epoch": 0.31, "grad_norm": 1.548017172735805, "learning_rate": 8.117449009293668e-06, "loss": 0.7028, "step": 3008 }, { "epoch": 0.31, "grad_norm": 1.4334809408451548, "learning_rate": 8.116155967971138e-06, "loss": 0.6627, "step": 3009 }, { "epoch": 0.31, "grad_norm": 1.3378564497361205, "learning_rate": 8.11486258579872e-06, "loss": 0.7639, "step": 3010 }, { "epoch": 0.31, "grad_norm": 1.386226271966759, "learning_rate": 8.113568862917889e-06, "loss": 0.6403, "step": 3011 }, { "epoch": 0.31, "grad_norm": 1.2167050168746156, "learning_rate": 8.112274799470154e-06, "loss": 0.6336, "step": 3012 }, { "epoch": 0.31, "grad_norm": 1.3054230378763894, "learning_rate": 8.110980395597063e-06, "loss": 0.5896, "step": 3013 }, { "epoch": 0.31, "grad_norm": 1.40543942571914, "learning_rate": 8.109685651440197e-06, "loss": 0.5938, "step": 3014 }, { "epoch": 0.31, "grad_norm": 1.4341282236799842, "learning_rate": 8.108390567141177e-06, "loss": 0.6406, "step": 3015 }, { "epoch": 0.31, "grad_norm": 1.5041756574411491, "learning_rate": 8.107095142841665e-06, "loss": 0.6498, "step": 3016 }, { "epoch": 0.31, "grad_norm": 1.7597525576829471, "learning_rate": 8.105799378683352e-06, "loss": 0.7245, "step": 3017 }, { "epoch": 0.31, "grad_norm": 1.391236370787527, "learning_rate": 8.104503274807976e-06, "loss": 0.6175, "step": 3018 }, { "epoch": 0.31, "grad_norm": 1.4940908116679847, "learning_rate": 8.1032068313573e-06, "loss": 0.76, "step": 3019 }, { "epoch": 0.31, "grad_norm": 1.2405809137835044, "learning_rate": 8.101910048473136e-06, "loss": 0.5583, "step": 3020 }, { "epoch": 0.31, "grad_norm": 1.4981603689591703, "learning_rate": 8.10061292629733e-06, "loss": 0.6744, "step": 3021 }, { "epoch": 0.31, "grad_norm": 1.6768542760999432, "learning_rate": 8.099315464971755e-06, "loss": 0.6566, "step": 3022 }, { "epoch": 0.31, "grad_norm": 1.3902595415835515, "learning_rate": 8.098017664638335e-06, "loss": 0.7338, "step": 3023 }, { "epoch": 0.31, "grad_norm": 1.490948548658886, "learning_rate": 8.096719525439027e-06, "loss": 0.6082, "step": 3024 }, { "epoch": 0.31, "grad_norm": 1.451305606302395, "learning_rate": 8.095421047515819e-06, "loss": 0.6083, "step": 3025 }, { "epoch": 0.31, "grad_norm": 1.1860765998572698, "learning_rate": 8.094122231010743e-06, "loss": 0.582, "step": 3026 }, { "epoch": 0.31, "grad_norm": 1.428297387147512, "learning_rate": 8.092823076065865e-06, "loss": 0.713, "step": 3027 }, { "epoch": 0.31, "grad_norm": 1.5050805603463895, "learning_rate": 8.09152358282329e-06, "loss": 0.6086, "step": 3028 }, { "epoch": 0.31, "grad_norm": 1.6409738127615772, "learning_rate": 8.090223751425154e-06, "loss": 0.7351, "step": 3029 }, { "epoch": 0.31, "grad_norm": 1.393305141725986, "learning_rate": 8.08892358201364e-06, "loss": 0.719, "step": 3030 }, { "epoch": 0.31, "grad_norm": 1.3916289242798225, "learning_rate": 8.08762307473096e-06, "loss": 0.6756, "step": 3031 }, { "epoch": 0.31, "grad_norm": 1.3734241164609022, "learning_rate": 8.086322229719365e-06, "loss": 0.6899, "step": 3032 }, { "epoch": 0.31, "grad_norm": 1.2996188013188308, "learning_rate": 8.085021047121145e-06, "loss": 0.6312, "step": 3033 }, { "epoch": 0.31, "grad_norm": 1.3893133923188548, "learning_rate": 8.083719527078624e-06, "loss": 0.6156, "step": 3034 }, { "epoch": 0.31, "grad_norm": 1.5271790576926192, "learning_rate": 8.082417669734164e-06, "loss": 0.6844, "step": 3035 }, { "epoch": 0.31, "grad_norm": 1.4420918073726026, "learning_rate": 8.081115475230165e-06, "loss": 0.5888, "step": 3036 }, { "epoch": 0.31, "grad_norm": 2.0054373087129966, "learning_rate": 8.079812943709064e-06, "loss": 0.6115, "step": 3037 }, { "epoch": 0.31, "grad_norm": 1.2673801303175465, "learning_rate": 8.078510075313331e-06, "loss": 0.6992, "step": 3038 }, { "epoch": 0.31, "grad_norm": 1.6341847049222582, "learning_rate": 8.077206870185477e-06, "loss": 0.7718, "step": 3039 }, { "epoch": 0.31, "grad_norm": 1.4188824947536836, "learning_rate": 8.07590332846805e-06, "loss": 0.6625, "step": 3040 }, { "epoch": 0.31, "grad_norm": 1.3961488822905324, "learning_rate": 8.074599450303633e-06, "loss": 0.7322, "step": 3041 }, { "epoch": 0.31, "grad_norm": 1.3927478335637717, "learning_rate": 8.073295235834844e-06, "loss": 0.6659, "step": 3042 }, { "epoch": 0.31, "grad_norm": 1.5441234805916146, "learning_rate": 8.071990685204342e-06, "loss": 0.7058, "step": 3043 }, { "epoch": 0.31, "grad_norm": 1.6175686655539057, "learning_rate": 8.070685798554822e-06, "loss": 0.6864, "step": 3044 }, { "epoch": 0.31, "grad_norm": 1.5195399502059903, "learning_rate": 8.069380576029011e-06, "loss": 0.7008, "step": 3045 }, { "epoch": 0.31, "grad_norm": 1.4946720993630787, "learning_rate": 8.068075017769679e-06, "loss": 0.6911, "step": 3046 }, { "epoch": 0.31, "grad_norm": 1.7793644687807313, "learning_rate": 8.066769123919627e-06, "loss": 0.7222, "step": 3047 }, { "epoch": 0.31, "grad_norm": 1.5065612904005579, "learning_rate": 8.065462894621698e-06, "loss": 0.7357, "step": 3048 }, { "epoch": 0.31, "grad_norm": 1.3629999192600115, "learning_rate": 8.06415633001877e-06, "loss": 0.5702, "step": 3049 }, { "epoch": 0.31, "grad_norm": 1.6119153137375022, "learning_rate": 8.062849430253756e-06, "loss": 0.6994, "step": 3050 }, { "epoch": 0.31, "grad_norm": 1.357694696414448, "learning_rate": 8.061542195469606e-06, "loss": 0.6841, "step": 3051 }, { "epoch": 0.31, "grad_norm": 1.3545106247067407, "learning_rate": 8.060234625809306e-06, "loss": 0.6426, "step": 3052 }, { "epoch": 0.31, "grad_norm": 1.3722402588907963, "learning_rate": 8.058926721415885e-06, "loss": 0.6336, "step": 3053 }, { "epoch": 0.31, "grad_norm": 1.3819176805038027, "learning_rate": 8.057618482432399e-06, "loss": 0.6875, "step": 3054 }, { "epoch": 0.31, "grad_norm": 1.4647024902022165, "learning_rate": 8.056309909001946e-06, "loss": 0.7549, "step": 3055 }, { "epoch": 0.31, "grad_norm": 1.4187737651220136, "learning_rate": 8.055001001267662e-06, "loss": 0.7202, "step": 3056 }, { "epoch": 0.31, "grad_norm": 1.3339331647664534, "learning_rate": 8.053691759372716e-06, "loss": 0.6917, "step": 3057 }, { "epoch": 0.31, "grad_norm": 1.429999803291352, "learning_rate": 8.052382183460313e-06, "loss": 0.7635, "step": 3058 }, { "epoch": 0.31, "grad_norm": 1.895666219744085, "learning_rate": 8.051072273673697e-06, "loss": 0.6515, "step": 3059 }, { "epoch": 0.31, "grad_norm": 1.6423918064951035, "learning_rate": 8.049762030156151e-06, "loss": 0.6624, "step": 3060 }, { "epoch": 0.31, "grad_norm": 1.398829490959151, "learning_rate": 8.04845145305099e-06, "loss": 0.6052, "step": 3061 }, { "epoch": 0.31, "grad_norm": 1.3688672127508348, "learning_rate": 8.047140542501564e-06, "loss": 0.6251, "step": 3062 }, { "epoch": 0.31, "grad_norm": 1.5134379374815479, "learning_rate": 8.045829298651267e-06, "loss": 0.7321, "step": 3063 }, { "epoch": 0.31, "grad_norm": 1.3834987484640298, "learning_rate": 8.044517721643521e-06, "loss": 0.6861, "step": 3064 }, { "epoch": 0.31, "grad_norm": 1.3337888711152797, "learning_rate": 8.043205811621791e-06, "loss": 0.6098, "step": 3065 }, { "epoch": 0.31, "grad_norm": 1.3680823490195626, "learning_rate": 8.041893568729574e-06, "loss": 0.6488, "step": 3066 }, { "epoch": 0.31, "grad_norm": 1.4460550364102112, "learning_rate": 8.040580993110404e-06, "loss": 0.578, "step": 3067 }, { "epoch": 0.31, "grad_norm": 1.3836301216358236, "learning_rate": 8.039268084907857e-06, "loss": 0.5997, "step": 3068 }, { "epoch": 0.31, "grad_norm": 1.5210352151034277, "learning_rate": 8.037954844265534e-06, "loss": 0.7404, "step": 3069 }, { "epoch": 0.31, "grad_norm": 1.483017834258222, "learning_rate": 8.036641271327087e-06, "loss": 0.6728, "step": 3070 }, { "epoch": 0.31, "grad_norm": 1.3732295893517588, "learning_rate": 8.035327366236192e-06, "loss": 0.6239, "step": 3071 }, { "epoch": 0.31, "grad_norm": 1.3567046934730889, "learning_rate": 8.034013129136566e-06, "loss": 0.6156, "step": 3072 }, { "epoch": 0.31, "grad_norm": 1.3396976935925997, "learning_rate": 8.032698560171965e-06, "loss": 0.7311, "step": 3073 }, { "epoch": 0.31, "grad_norm": 1.407143538358502, "learning_rate": 8.031383659486176e-06, "loss": 0.6502, "step": 3074 }, { "epoch": 0.31, "grad_norm": 1.6425541203928322, "learning_rate": 8.030068427223024e-06, "loss": 0.7437, "step": 3075 }, { "epoch": 0.31, "grad_norm": 1.560783549945538, "learning_rate": 8.028752863526373e-06, "loss": 0.703, "step": 3076 }, { "epoch": 0.31, "grad_norm": 1.4076931882682018, "learning_rate": 8.027436968540124e-06, "loss": 0.664, "step": 3077 }, { "epoch": 0.31, "grad_norm": 1.5169265381945536, "learning_rate": 8.026120742408204e-06, "loss": 0.6197, "step": 3078 }, { "epoch": 0.31, "grad_norm": 1.531613918243405, "learning_rate": 8.024804185274592e-06, "loss": 0.63, "step": 3079 }, { "epoch": 0.31, "grad_norm": 1.2589465587008009, "learning_rate": 8.023487297283289e-06, "loss": 0.6491, "step": 3080 }, { "epoch": 0.31, "grad_norm": 1.48932262395971, "learning_rate": 8.022170078578342e-06, "loss": 0.7044, "step": 3081 }, { "epoch": 0.31, "grad_norm": 1.402849529432927, "learning_rate": 8.020852529303828e-06, "loss": 0.6782, "step": 3082 }, { "epoch": 0.31, "grad_norm": 1.3946708428448757, "learning_rate": 8.019534649603865e-06, "loss": 0.6989, "step": 3083 }, { "epoch": 0.31, "grad_norm": 1.4496300112456197, "learning_rate": 8.018216439622603e-06, "loss": 0.6553, "step": 3084 }, { "epoch": 0.32, "grad_norm": 1.380831334334434, "learning_rate": 8.01689789950423e-06, "loss": 0.5705, "step": 3085 }, { "epoch": 0.32, "grad_norm": 1.454121332348811, "learning_rate": 8.01557902939297e-06, "loss": 0.6286, "step": 3086 }, { "epoch": 0.32, "grad_norm": 1.4754399056920602, "learning_rate": 8.014259829433082e-06, "loss": 0.7675, "step": 3087 }, { "epoch": 0.32, "grad_norm": 1.2718191798760137, "learning_rate": 8.012940299768865e-06, "loss": 0.6291, "step": 3088 }, { "epoch": 0.32, "grad_norm": 1.5788961421742707, "learning_rate": 8.011620440544649e-06, "loss": 0.7506, "step": 3089 }, { "epoch": 0.32, "grad_norm": 1.4332479185394298, "learning_rate": 8.010300251904802e-06, "loss": 0.7549, "step": 3090 }, { "epoch": 0.32, "grad_norm": 1.523130361347255, "learning_rate": 8.008979733993727e-06, "loss": 0.6242, "step": 3091 }, { "epoch": 0.32, "grad_norm": 1.2846440046848686, "learning_rate": 8.007658886955868e-06, "loss": 0.5776, "step": 3092 }, { "epoch": 0.32, "grad_norm": 1.4451045987612792, "learning_rate": 8.0063377109357e-06, "loss": 0.702, "step": 3093 }, { "epoch": 0.32, "grad_norm": 1.5008706637653004, "learning_rate": 8.005016206077732e-06, "loss": 0.8603, "step": 3094 }, { "epoch": 0.32, "grad_norm": 1.397370554267536, "learning_rate": 8.003694372526516e-06, "loss": 0.66, "step": 3095 }, { "epoch": 0.32, "grad_norm": 1.424376149701813, "learning_rate": 8.002372210426637e-06, "loss": 0.6668, "step": 3096 }, { "epoch": 0.32, "grad_norm": 1.3886867275470989, "learning_rate": 8.001049719922708e-06, "loss": 0.638, "step": 3097 }, { "epoch": 0.32, "grad_norm": 1.4649600248117194, "learning_rate": 7.999726901159395e-06, "loss": 0.7465, "step": 3098 }, { "epoch": 0.32, "grad_norm": 1.3221967098255003, "learning_rate": 7.998403754281384e-06, "loss": 0.7171, "step": 3099 }, { "epoch": 0.32, "grad_norm": 1.4319493450250314, "learning_rate": 7.997080279433402e-06, "loss": 0.6994, "step": 3100 }, { "epoch": 0.32, "grad_norm": 1.4272037340296548, "learning_rate": 7.995756476760214e-06, "loss": 0.6369, "step": 3101 }, { "epoch": 0.32, "grad_norm": 1.8396770835464453, "learning_rate": 7.994432346406623e-06, "loss": 0.6932, "step": 3102 }, { "epoch": 0.32, "grad_norm": 1.4935729743234543, "learning_rate": 7.993107888517459e-06, "loss": 0.6059, "step": 3103 }, { "epoch": 0.32, "grad_norm": 1.4212106875880808, "learning_rate": 7.991783103237596e-06, "loss": 0.6319, "step": 3104 }, { "epoch": 0.32, "grad_norm": 1.303801890972975, "learning_rate": 7.990457990711942e-06, "loss": 0.5743, "step": 3105 }, { "epoch": 0.32, "grad_norm": 1.2938787228629283, "learning_rate": 7.989132551085436e-06, "loss": 0.6191, "step": 3106 }, { "epoch": 0.32, "grad_norm": 1.4280547549455227, "learning_rate": 7.987806784503062e-06, "loss": 0.5982, "step": 3107 }, { "epoch": 0.32, "grad_norm": 1.5524385590323233, "learning_rate": 7.98648069110983e-06, "loss": 0.798, "step": 3108 }, { "epoch": 0.32, "grad_norm": 1.3809249571196034, "learning_rate": 7.985154271050792e-06, "loss": 0.6616, "step": 3109 }, { "epoch": 0.32, "grad_norm": 1.4572078368082986, "learning_rate": 7.983827524471034e-06, "loss": 0.7631, "step": 3110 }, { "epoch": 0.32, "grad_norm": 2.0747482708697262, "learning_rate": 7.982500451515676e-06, "loss": 0.6362, "step": 3111 }, { "epoch": 0.32, "grad_norm": 1.453764129179902, "learning_rate": 7.981173052329875e-06, "loss": 0.6703, "step": 3112 }, { "epoch": 0.32, "grad_norm": 1.4757677404526128, "learning_rate": 7.979845327058829e-06, "loss": 0.775, "step": 3113 }, { "epoch": 0.32, "grad_norm": 1.553690136124334, "learning_rate": 7.97851727584776e-06, "loss": 0.7038, "step": 3114 }, { "epoch": 0.32, "grad_norm": 1.5730586129970499, "learning_rate": 7.977188898841935e-06, "loss": 0.6127, "step": 3115 }, { "epoch": 0.32, "grad_norm": 1.3386881642734587, "learning_rate": 7.975860196186656e-06, "loss": 0.633, "step": 3116 }, { "epoch": 0.32, "grad_norm": 1.4140275192292495, "learning_rate": 7.974531168027255e-06, "loss": 0.6323, "step": 3117 }, { "epoch": 0.32, "grad_norm": 1.3757767498215, "learning_rate": 7.973201814509104e-06, "loss": 0.6334, "step": 3118 }, { "epoch": 0.32, "grad_norm": 1.5088590687106371, "learning_rate": 7.971872135777612e-06, "loss": 0.6732, "step": 3119 }, { "epoch": 0.32, "grad_norm": 1.40853084783773, "learning_rate": 7.970542131978219e-06, "loss": 0.7662, "step": 3120 }, { "epoch": 0.32, "grad_norm": 1.4793835220216163, "learning_rate": 7.969211803256402e-06, "loss": 0.6793, "step": 3121 }, { "epoch": 0.32, "grad_norm": 1.4580005598502626, "learning_rate": 7.967881149757678e-06, "loss": 0.6918, "step": 3122 }, { "epoch": 0.32, "grad_norm": 1.581229214018674, "learning_rate": 7.966550171627592e-06, "loss": 0.7189, "step": 3123 }, { "epoch": 0.32, "grad_norm": 1.331832817530907, "learning_rate": 7.965218869011733e-06, "loss": 0.6136, "step": 3124 }, { "epoch": 0.32, "grad_norm": 1.295106498851967, "learning_rate": 7.963887242055714e-06, "loss": 0.6524, "step": 3125 }, { "epoch": 0.32, "grad_norm": 1.5869673078359645, "learning_rate": 7.962555290905198e-06, "loss": 0.7818, "step": 3126 }, { "epoch": 0.32, "grad_norm": 1.408872293710248, "learning_rate": 7.96122301570587e-06, "loss": 0.7602, "step": 3127 }, { "epoch": 0.32, "grad_norm": 1.2933846439290628, "learning_rate": 7.95989041660346e-06, "loss": 0.6146, "step": 3128 }, { "epoch": 0.32, "grad_norm": 1.5227047715894926, "learning_rate": 7.958557493743729e-06, "loss": 0.6663, "step": 3129 }, { "epoch": 0.32, "grad_norm": 1.50047865893503, "learning_rate": 7.957224247272471e-06, "loss": 0.7232, "step": 3130 }, { "epoch": 0.32, "grad_norm": 1.3191976462090118, "learning_rate": 7.955890677335523e-06, "loss": 0.5642, "step": 3131 }, { "epoch": 0.32, "grad_norm": 1.3267528485845501, "learning_rate": 7.95455678407875e-06, "loss": 0.5841, "step": 3132 }, { "epoch": 0.32, "grad_norm": 1.5997705816037495, "learning_rate": 7.953222567648056e-06, "loss": 0.6897, "step": 3133 }, { "epoch": 0.32, "grad_norm": 1.4727062060776, "learning_rate": 7.95188802818938e-06, "loss": 0.6566, "step": 3134 }, { "epoch": 0.32, "grad_norm": 1.3105897493165883, "learning_rate": 7.950553165848697e-06, "loss": 0.7779, "step": 3135 }, { "epoch": 0.32, "grad_norm": 1.3769795710511878, "learning_rate": 7.949217980772013e-06, "loss": 0.6972, "step": 3136 }, { "epoch": 0.32, "grad_norm": 1.2963206112668784, "learning_rate": 7.947882473105377e-06, "loss": 0.6147, "step": 3137 }, { "epoch": 0.32, "grad_norm": 1.5541061040776543, "learning_rate": 7.946546642994864e-06, "loss": 0.6705, "step": 3138 }, { "epoch": 0.32, "grad_norm": 1.5062690511192216, "learning_rate": 7.945210490586593e-06, "loss": 0.753, "step": 3139 }, { "epoch": 0.32, "grad_norm": 1.5331226313675215, "learning_rate": 7.943874016026711e-06, "loss": 0.7923, "step": 3140 }, { "epoch": 0.32, "grad_norm": 1.5250261442611313, "learning_rate": 7.942537219461409e-06, "loss": 0.7144, "step": 3141 }, { "epoch": 0.32, "grad_norm": 1.4798901344832822, "learning_rate": 7.941200101036902e-06, "loss": 0.6028, "step": 3142 }, { "epoch": 0.32, "grad_norm": 1.3148676624714282, "learning_rate": 7.939862660899449e-06, "loss": 0.6921, "step": 3143 }, { "epoch": 0.32, "grad_norm": 1.3358149786415945, "learning_rate": 7.938524899195341e-06, "loss": 0.6436, "step": 3144 }, { "epoch": 0.32, "grad_norm": 1.6133051580104585, "learning_rate": 7.937186816070904e-06, "loss": 0.7312, "step": 3145 }, { "epoch": 0.32, "grad_norm": 1.217136924847, "learning_rate": 7.935848411672499e-06, "loss": 0.671, "step": 3146 }, { "epoch": 0.32, "grad_norm": 1.437107496982625, "learning_rate": 7.934509686146525e-06, "loss": 0.6402, "step": 3147 }, { "epoch": 0.32, "grad_norm": 1.5654721855894727, "learning_rate": 7.933170639639412e-06, "loss": 0.7164, "step": 3148 }, { "epoch": 0.32, "grad_norm": 1.2807404760505918, "learning_rate": 7.931831272297627e-06, "loss": 0.6538, "step": 3149 }, { "epoch": 0.32, "grad_norm": 1.580359029309084, "learning_rate": 7.93049158426767e-06, "loss": 0.7302, "step": 3150 }, { "epoch": 0.32, "grad_norm": 1.4257485210195535, "learning_rate": 7.929151575696083e-06, "loss": 0.6331, "step": 3151 }, { "epoch": 0.32, "grad_norm": 1.3494261455036587, "learning_rate": 7.927811246729436e-06, "loss": 0.6707, "step": 3152 }, { "epoch": 0.32, "grad_norm": 1.4332808116719993, "learning_rate": 7.926470597514335e-06, "loss": 0.6521, "step": 3153 }, { "epoch": 0.32, "grad_norm": 1.4069060992040063, "learning_rate": 7.92512962819742e-06, "loss": 0.8012, "step": 3154 }, { "epoch": 0.32, "grad_norm": 1.4939733802307034, "learning_rate": 7.923788338925376e-06, "loss": 0.6765, "step": 3155 }, { "epoch": 0.32, "grad_norm": 1.7175256280895115, "learning_rate": 7.922446729844909e-06, "loss": 0.7981, "step": 3156 }, { "epoch": 0.32, "grad_norm": 1.3495090544514958, "learning_rate": 7.921104801102767e-06, "loss": 0.7281, "step": 3157 }, { "epoch": 0.32, "grad_norm": 1.3542553639637813, "learning_rate": 7.919762552845734e-06, "loss": 0.585, "step": 3158 }, { "epoch": 0.32, "grad_norm": 1.263176474976202, "learning_rate": 7.918419985220625e-06, "loss": 0.5899, "step": 3159 }, { "epoch": 0.32, "grad_norm": 1.4799522572037762, "learning_rate": 7.917077098374293e-06, "loss": 0.6433, "step": 3160 }, { "epoch": 0.32, "grad_norm": 1.4893269011685064, "learning_rate": 7.915733892453627e-06, "loss": 0.5807, "step": 3161 }, { "epoch": 0.32, "grad_norm": 1.4967805265540695, "learning_rate": 7.914390367605546e-06, "loss": 0.7045, "step": 3162 }, { "epoch": 0.32, "grad_norm": 1.5273687966207974, "learning_rate": 7.913046523977008e-06, "loss": 0.684, "step": 3163 }, { "epoch": 0.32, "grad_norm": 1.332882783986617, "learning_rate": 7.911702361715006e-06, "loss": 0.6812, "step": 3164 }, { "epoch": 0.32, "grad_norm": 1.5382659561144592, "learning_rate": 7.910357880966563e-06, "loss": 0.6838, "step": 3165 }, { "epoch": 0.32, "grad_norm": 1.4100310608054216, "learning_rate": 7.909013081878744e-06, "loss": 0.6006, "step": 3166 }, { "epoch": 0.32, "grad_norm": 1.3347493076393968, "learning_rate": 7.907667964598642e-06, "loss": 0.6852, "step": 3167 }, { "epoch": 0.32, "grad_norm": 1.2287913783354707, "learning_rate": 7.906322529273392e-06, "loss": 0.6231, "step": 3168 }, { "epoch": 0.32, "grad_norm": 1.4549554793011412, "learning_rate": 7.904976776050156e-06, "loss": 0.6349, "step": 3169 }, { "epoch": 0.32, "grad_norm": 1.4540995258028047, "learning_rate": 7.903630705076138e-06, "loss": 0.5388, "step": 3170 }, { "epoch": 0.32, "grad_norm": 1.5189035515973734, "learning_rate": 7.902284316498567e-06, "loss": 0.685, "step": 3171 }, { "epoch": 0.32, "grad_norm": 1.4540828736671643, "learning_rate": 7.900937610464721e-06, "loss": 0.646, "step": 3172 }, { "epoch": 0.32, "grad_norm": 1.387856060661385, "learning_rate": 7.8995905871219e-06, "loss": 0.5903, "step": 3173 }, { "epoch": 0.32, "grad_norm": 1.3826637147894139, "learning_rate": 7.898243246617443e-06, "loss": 0.6787, "step": 3174 }, { "epoch": 0.32, "grad_norm": 1.3432168248661958, "learning_rate": 7.896895589098728e-06, "loss": 0.6682, "step": 3175 }, { "epoch": 0.32, "grad_norm": 1.3604272093860243, "learning_rate": 7.895547614713161e-06, "loss": 0.6386, "step": 3176 }, { "epoch": 0.32, "grad_norm": 1.718698060104393, "learning_rate": 7.894199323608186e-06, "loss": 0.6831, "step": 3177 }, { "epoch": 0.32, "grad_norm": 1.2135612517387975, "learning_rate": 7.89285071593128e-06, "loss": 0.6316, "step": 3178 }, { "epoch": 0.32, "grad_norm": 1.4113007550878072, "learning_rate": 7.891501791829957e-06, "loss": 0.6763, "step": 3179 }, { "epoch": 0.32, "grad_norm": 1.3400537083589363, "learning_rate": 7.890152551451766e-06, "loss": 0.7084, "step": 3180 }, { "epoch": 0.32, "grad_norm": 1.4430650479638678, "learning_rate": 7.888802994944285e-06, "loss": 0.7054, "step": 3181 }, { "epoch": 0.32, "grad_norm": 1.326892397457987, "learning_rate": 7.887453122455133e-06, "loss": 0.7078, "step": 3182 }, { "epoch": 0.33, "grad_norm": 1.2440438133592127, "learning_rate": 7.886102934131962e-06, "loss": 0.6402, "step": 3183 }, { "epoch": 0.33, "grad_norm": 1.2874189146862596, "learning_rate": 7.884752430122458e-06, "loss": 0.7236, "step": 3184 }, { "epoch": 0.33, "grad_norm": 1.4029002652731637, "learning_rate": 7.883401610574338e-06, "loss": 0.6369, "step": 3185 }, { "epoch": 0.33, "grad_norm": 13.26841691649975, "learning_rate": 7.882050475635356e-06, "loss": 0.6088, "step": 3186 }, { "epoch": 0.33, "grad_norm": 1.2626367867220134, "learning_rate": 7.880699025453307e-06, "loss": 0.5553, "step": 3187 }, { "epoch": 0.33, "grad_norm": 1.587531567319097, "learning_rate": 7.879347260176008e-06, "loss": 0.7615, "step": 3188 }, { "epoch": 0.33, "grad_norm": 1.4194614028342878, "learning_rate": 7.877995179951324e-06, "loss": 0.6899, "step": 3189 }, { "epoch": 0.33, "grad_norm": 1.377350960339119, "learning_rate": 7.876642784927143e-06, "loss": 0.7147, "step": 3190 }, { "epoch": 0.33, "grad_norm": 1.6818828438107427, "learning_rate": 7.875290075251395e-06, "loss": 0.7073, "step": 3191 }, { "epoch": 0.33, "grad_norm": 1.5456332482949076, "learning_rate": 7.873937051072037e-06, "loss": 0.6409, "step": 3192 }, { "epoch": 0.33, "grad_norm": 1.5171734464962485, "learning_rate": 7.872583712537065e-06, "loss": 0.7365, "step": 3193 }, { "epoch": 0.33, "grad_norm": 1.3812684980771488, "learning_rate": 7.871230059794514e-06, "loss": 0.6536, "step": 3194 }, { "epoch": 0.33, "grad_norm": 1.4629668393540938, "learning_rate": 7.869876092992447e-06, "loss": 0.6998, "step": 3195 }, { "epoch": 0.33, "grad_norm": 1.4632053780005896, "learning_rate": 7.868521812278962e-06, "loss": 0.7752, "step": 3196 }, { "epoch": 0.33, "grad_norm": 1.9322881446928515, "learning_rate": 7.867167217802193e-06, "loss": 0.6938, "step": 3197 }, { "epoch": 0.33, "grad_norm": 1.5165726026704385, "learning_rate": 7.865812309710306e-06, "loss": 0.6437, "step": 3198 }, { "epoch": 0.33, "grad_norm": 1.3051190256975302, "learning_rate": 7.864457088151503e-06, "loss": 0.7187, "step": 3199 }, { "epoch": 0.33, "grad_norm": 1.3782025249929002, "learning_rate": 7.863101553274022e-06, "loss": 0.6711, "step": 3200 }, { "epoch": 0.33, "grad_norm": 1.4255976887794728, "learning_rate": 7.861745705226131e-06, "loss": 0.6384, "step": 3201 }, { "epoch": 0.33, "grad_norm": 1.4813269730281275, "learning_rate": 7.860389544156139e-06, "loss": 0.6915, "step": 3202 }, { "epoch": 0.33, "grad_norm": 1.5783513705121115, "learning_rate": 7.859033070212381e-06, "loss": 0.6912, "step": 3203 }, { "epoch": 0.33, "grad_norm": 1.5358065431070005, "learning_rate": 7.857676283543233e-06, "loss": 0.707, "step": 3204 }, { "epoch": 0.33, "grad_norm": 1.620846355273038, "learning_rate": 7.856319184297098e-06, "loss": 0.6144, "step": 3205 }, { "epoch": 0.33, "grad_norm": 1.3802255637797984, "learning_rate": 7.854961772622423e-06, "loss": 0.6728, "step": 3206 }, { "epoch": 0.33, "grad_norm": 1.3730889574370835, "learning_rate": 7.85360404866768e-06, "loss": 0.6238, "step": 3207 }, { "epoch": 0.33, "grad_norm": 1.4999928614672111, "learning_rate": 7.85224601258138e-06, "loss": 0.6838, "step": 3208 }, { "epoch": 0.33, "grad_norm": 1.3560162352507998, "learning_rate": 7.850887664512068e-06, "loss": 0.6612, "step": 3209 }, { "epoch": 0.33, "grad_norm": 1.3521234078106261, "learning_rate": 7.849529004608321e-06, "loss": 0.5895, "step": 3210 }, { "epoch": 0.33, "grad_norm": 1.6354478149882212, "learning_rate": 7.848170033018752e-06, "loss": 0.6123, "step": 3211 }, { "epoch": 0.33, "grad_norm": 1.4924807118345953, "learning_rate": 7.846810749892005e-06, "loss": 0.6467, "step": 3212 }, { "epoch": 0.33, "grad_norm": 1.264887179248318, "learning_rate": 7.845451155376765e-06, "loss": 0.6022, "step": 3213 }, { "epoch": 0.33, "grad_norm": 1.2188987523964943, "learning_rate": 7.844091249621742e-06, "loss": 0.5725, "step": 3214 }, { "epoch": 0.33, "grad_norm": 1.3986894184033756, "learning_rate": 7.842731032775688e-06, "loss": 0.706, "step": 3215 }, { "epoch": 0.33, "grad_norm": 1.4272889876348334, "learning_rate": 7.841370504987384e-06, "loss": 0.6641, "step": 3216 }, { "epoch": 0.33, "grad_norm": 1.3547729138329256, "learning_rate": 7.840009666405645e-06, "loss": 0.5937, "step": 3217 }, { "epoch": 0.33, "grad_norm": 1.3838320359799778, "learning_rate": 7.838648517179327e-06, "loss": 0.566, "step": 3218 }, { "epoch": 0.33, "grad_norm": 1.6397041987118053, "learning_rate": 7.837287057457307e-06, "loss": 0.7627, "step": 3219 }, { "epoch": 0.33, "grad_norm": 1.4270339803945151, "learning_rate": 7.83592528738851e-06, "loss": 0.6235, "step": 3220 }, { "epoch": 0.33, "grad_norm": 1.4928127732362864, "learning_rate": 7.834563207121885e-06, "loss": 0.6931, "step": 3221 }, { "epoch": 0.33, "grad_norm": 1.52930934924975, "learning_rate": 7.83320081680642e-06, "loss": 0.7316, "step": 3222 }, { "epoch": 0.33, "grad_norm": 1.3588619373935977, "learning_rate": 7.831838116591135e-06, "loss": 0.6147, "step": 3223 }, { "epoch": 0.33, "grad_norm": 1.423173409575675, "learning_rate": 7.830475106625082e-06, "loss": 0.7572, "step": 3224 }, { "epoch": 0.33, "grad_norm": 1.2632833504649874, "learning_rate": 7.829111787057352e-06, "loss": 0.6246, "step": 3225 }, { "epoch": 0.33, "grad_norm": 1.3394111208725867, "learning_rate": 7.827748158037067e-06, "loss": 0.6462, "step": 3226 }, { "epoch": 0.33, "grad_norm": 1.3323987590106476, "learning_rate": 7.82638421971338e-06, "loss": 0.5392, "step": 3227 }, { "epoch": 0.33, "grad_norm": 1.4804510792658978, "learning_rate": 7.825019972235483e-06, "loss": 0.6329, "step": 3228 }, { "epoch": 0.33, "grad_norm": 1.5277294314090886, "learning_rate": 7.8236554157526e-06, "loss": 0.6724, "step": 3229 }, { "epoch": 0.33, "grad_norm": 1.5996864746534882, "learning_rate": 7.822290550413988e-06, "loss": 0.7656, "step": 3230 }, { "epoch": 0.33, "grad_norm": 1.4364699458493755, "learning_rate": 7.820925376368936e-06, "loss": 0.6332, "step": 3231 }, { "epoch": 0.33, "grad_norm": 1.4818066323492696, "learning_rate": 7.819559893766769e-06, "loss": 0.6972, "step": 3232 }, { "epoch": 0.33, "grad_norm": 1.8713000760946898, "learning_rate": 7.818194102756848e-06, "loss": 0.5779, "step": 3233 }, { "epoch": 0.33, "grad_norm": 1.2270705544695781, "learning_rate": 7.816828003488564e-06, "loss": 0.7518, "step": 3234 }, { "epoch": 0.33, "grad_norm": 1.5080780592231946, "learning_rate": 7.81546159611134e-06, "loss": 0.673, "step": 3235 }, { "epoch": 0.33, "grad_norm": 1.6664961716088302, "learning_rate": 7.814094880774642e-06, "loss": 0.6586, "step": 3236 }, { "epoch": 0.33, "grad_norm": 1.479900154517557, "learning_rate": 7.81272785762796e-06, "loss": 0.571, "step": 3237 }, { "epoch": 0.33, "grad_norm": 1.4732996826500948, "learning_rate": 7.81136052682082e-06, "loss": 0.6155, "step": 3238 }, { "epoch": 0.33, "grad_norm": 1.3062323210301068, "learning_rate": 7.809992888502784e-06, "loss": 0.661, "step": 3239 }, { "epoch": 0.33, "grad_norm": 1.3410433773680186, "learning_rate": 7.808624942823447e-06, "loss": 0.6853, "step": 3240 }, { "epoch": 0.33, "grad_norm": 1.3696698674921857, "learning_rate": 7.807256689932436e-06, "loss": 0.6117, "step": 3241 }, { "epoch": 0.33, "grad_norm": 2.1922354913218958, "learning_rate": 7.805888129979411e-06, "loss": 0.642, "step": 3242 }, { "epoch": 0.33, "grad_norm": 1.3728089663346603, "learning_rate": 7.804519263114071e-06, "loss": 0.6645, "step": 3243 }, { "epoch": 0.33, "grad_norm": 1.3927926825061339, "learning_rate": 7.803150089486144e-06, "loss": 0.6223, "step": 3244 }, { "epoch": 0.33, "grad_norm": 1.4632997405735222, "learning_rate": 7.801780609245389e-06, "loss": 0.6473, "step": 3245 }, { "epoch": 0.33, "grad_norm": 1.4020634901235742, "learning_rate": 7.800410822541603e-06, "loss": 0.7053, "step": 3246 }, { "epoch": 0.33, "grad_norm": 1.4219890180279857, "learning_rate": 7.799040729524618e-06, "loss": 0.6169, "step": 3247 }, { "epoch": 0.33, "grad_norm": 1.4331603628508254, "learning_rate": 7.797670330344294e-06, "loss": 0.6995, "step": 3248 }, { "epoch": 0.33, "grad_norm": 1.3283530515542348, "learning_rate": 7.796299625150528e-06, "loss": 0.6625, "step": 3249 }, { "epoch": 0.33, "grad_norm": 1.3459831057498783, "learning_rate": 7.794928614093251e-06, "loss": 0.6413, "step": 3250 }, { "epoch": 0.33, "grad_norm": 1.3763777756626692, "learning_rate": 7.793557297322427e-06, "loss": 0.7171, "step": 3251 }, { "epoch": 0.33, "grad_norm": 1.3548765958883429, "learning_rate": 7.792185674988049e-06, "loss": 0.6901, "step": 3252 }, { "epoch": 0.33, "grad_norm": 1.4060296961315317, "learning_rate": 7.79081374724015e-06, "loss": 0.6274, "step": 3253 }, { "epoch": 0.33, "grad_norm": 1.5158997377340495, "learning_rate": 7.789441514228792e-06, "loss": 0.6006, "step": 3254 }, { "epoch": 0.33, "grad_norm": 1.418256742765439, "learning_rate": 7.788068976104075e-06, "loss": 0.652, "step": 3255 }, { "epoch": 0.33, "grad_norm": 1.3261498664315672, "learning_rate": 7.786696133016125e-06, "loss": 0.6416, "step": 3256 }, { "epoch": 0.33, "grad_norm": 1.2487136120678635, "learning_rate": 7.78532298511511e-06, "loss": 0.6142, "step": 3257 }, { "epoch": 0.33, "grad_norm": 1.4246593180631766, "learning_rate": 7.783949532551224e-06, "loss": 0.7485, "step": 3258 }, { "epoch": 0.33, "grad_norm": 1.292558240856679, "learning_rate": 7.782575775474696e-06, "loss": 0.6276, "step": 3259 }, { "epoch": 0.33, "grad_norm": 1.3427883627604165, "learning_rate": 7.781201714035795e-06, "loss": 0.5946, "step": 3260 }, { "epoch": 0.33, "grad_norm": 2.2729771700949337, "learning_rate": 7.779827348384814e-06, "loss": 0.5403, "step": 3261 }, { "epoch": 0.33, "grad_norm": 1.4102762178147936, "learning_rate": 7.778452678672083e-06, "loss": 0.7088, "step": 3262 }, { "epoch": 0.33, "grad_norm": 1.2777428696298656, "learning_rate": 7.777077705047966e-06, "loss": 0.5875, "step": 3263 }, { "epoch": 0.33, "grad_norm": 1.4234504751080606, "learning_rate": 7.77570242766286e-06, "loss": 0.7415, "step": 3264 }, { "epoch": 0.33, "grad_norm": 1.4501474568913748, "learning_rate": 7.774326846667199e-06, "loss": 0.6025, "step": 3265 }, { "epoch": 0.33, "grad_norm": 1.4325355874000774, "learning_rate": 7.772950962211439e-06, "loss": 0.7304, "step": 3266 }, { "epoch": 0.33, "grad_norm": 1.4933047455447597, "learning_rate": 7.77157477444608e-06, "loss": 0.6534, "step": 3267 }, { "epoch": 0.33, "grad_norm": 1.2375766681760283, "learning_rate": 7.77019828352165e-06, "loss": 0.5556, "step": 3268 }, { "epoch": 0.33, "grad_norm": 1.3123618090857285, "learning_rate": 7.768821489588715e-06, "loss": 0.5737, "step": 3269 }, { "epoch": 0.33, "grad_norm": 1.4510336560159847, "learning_rate": 7.767444392797865e-06, "loss": 0.6763, "step": 3270 }, { "epoch": 0.33, "grad_norm": 1.456506453315939, "learning_rate": 7.766066993299735e-06, "loss": 0.6809, "step": 3271 }, { "epoch": 0.33, "grad_norm": 1.4696461944673778, "learning_rate": 7.764689291244985e-06, "loss": 0.6182, "step": 3272 }, { "epoch": 0.33, "grad_norm": 1.4893721230707146, "learning_rate": 7.763311286784308e-06, "loss": 0.7321, "step": 3273 }, { "epoch": 0.33, "grad_norm": 1.4085268834907188, "learning_rate": 7.761932980068437e-06, "loss": 0.7667, "step": 3274 }, { "epoch": 0.33, "grad_norm": 1.420512865725823, "learning_rate": 7.760554371248126e-06, "loss": 0.5952, "step": 3275 }, { "epoch": 0.33, "grad_norm": 1.5087477029389504, "learning_rate": 7.759175460474176e-06, "loss": 0.6689, "step": 3276 }, { "epoch": 0.33, "grad_norm": 1.5787076600570025, "learning_rate": 7.75779624789741e-06, "loss": 0.6556, "step": 3277 }, { "epoch": 0.33, "grad_norm": 1.6466741766636894, "learning_rate": 7.756416733668695e-06, "loss": 0.6982, "step": 3278 }, { "epoch": 0.33, "grad_norm": 1.2899824077134185, "learning_rate": 7.755036917938917e-06, "loss": 0.6042, "step": 3279 }, { "epoch": 0.33, "grad_norm": 1.3261208308196133, "learning_rate": 7.753656800859002e-06, "loss": 0.5855, "step": 3280 }, { "epoch": 0.34, "grad_norm": 1.308822192271398, "learning_rate": 7.752276382579915e-06, "loss": 0.6325, "step": 3281 }, { "epoch": 0.34, "grad_norm": 1.4377986155826712, "learning_rate": 7.750895663252646e-06, "loss": 0.5575, "step": 3282 }, { "epoch": 0.34, "grad_norm": 1.4553864783631851, "learning_rate": 7.749514643028219e-06, "loss": 0.5704, "step": 3283 }, { "epoch": 0.34, "grad_norm": 1.4669219708354735, "learning_rate": 7.748133322057693e-06, "loss": 0.663, "step": 3284 }, { "epoch": 0.34, "grad_norm": 1.4409119782225526, "learning_rate": 7.746751700492159e-06, "loss": 0.6986, "step": 3285 }, { "epoch": 0.34, "grad_norm": 1.5307301583092867, "learning_rate": 7.745369778482741e-06, "loss": 0.6971, "step": 3286 }, { "epoch": 0.34, "grad_norm": 1.3509985168902379, "learning_rate": 7.743987556180595e-06, "loss": 0.5667, "step": 3287 }, { "epoch": 0.34, "grad_norm": 2.3459709928976453, "learning_rate": 7.74260503373691e-06, "loss": 0.6699, "step": 3288 }, { "epoch": 0.34, "grad_norm": 1.326109981362341, "learning_rate": 7.74122221130291e-06, "loss": 0.8165, "step": 3289 }, { "epoch": 0.34, "grad_norm": 1.8027894405567246, "learning_rate": 7.739839089029851e-06, "loss": 0.5941, "step": 3290 }, { "epoch": 0.34, "grad_norm": 1.3600956558445894, "learning_rate": 7.738455667069018e-06, "loss": 0.6079, "step": 3291 }, { "epoch": 0.34, "grad_norm": 1.386904542401977, "learning_rate": 7.737071945571734e-06, "loss": 0.7274, "step": 3292 }, { "epoch": 0.34, "grad_norm": 1.714117371649306, "learning_rate": 7.735687924689352e-06, "loss": 0.7451, "step": 3293 }, { "epoch": 0.34, "grad_norm": 1.3861492517537672, "learning_rate": 7.734303604573259e-06, "loss": 0.6468, "step": 3294 }, { "epoch": 0.34, "grad_norm": 1.4277457426748392, "learning_rate": 7.732918985374874e-06, "loss": 0.6551, "step": 3295 }, { "epoch": 0.34, "grad_norm": 1.3632424170025226, "learning_rate": 7.731534067245646e-06, "loss": 0.7025, "step": 3296 }, { "epoch": 0.34, "grad_norm": 1.49891462819164, "learning_rate": 7.730148850337062e-06, "loss": 0.709, "step": 3297 }, { "epoch": 0.34, "grad_norm": 1.3529362923292907, "learning_rate": 7.72876333480064e-06, "loss": 0.677, "step": 3298 }, { "epoch": 0.34, "grad_norm": 1.434580809907063, "learning_rate": 7.727377520787928e-06, "loss": 0.7246, "step": 3299 }, { "epoch": 0.34, "grad_norm": 1.257618903594639, "learning_rate": 7.725991408450508e-06, "loss": 0.7227, "step": 3300 }, { "epoch": 0.34, "grad_norm": 1.3454499134324225, "learning_rate": 7.724604997939998e-06, "loss": 0.6966, "step": 3301 }, { "epoch": 0.34, "grad_norm": 1.4230775135693585, "learning_rate": 7.723218289408043e-06, "loss": 0.7203, "step": 3302 }, { "epoch": 0.34, "grad_norm": 1.4160307790469022, "learning_rate": 7.721831283006323e-06, "loss": 0.6565, "step": 3303 }, { "epoch": 0.34, "grad_norm": 1.4667719191925028, "learning_rate": 7.720443978886552e-06, "loss": 0.6624, "step": 3304 }, { "epoch": 0.34, "grad_norm": 1.4045968125305484, "learning_rate": 7.719056377200475e-06, "loss": 0.6254, "step": 3305 }, { "epoch": 0.34, "grad_norm": 1.377002814118092, "learning_rate": 7.717668478099873e-06, "loss": 0.637, "step": 3306 }, { "epoch": 0.34, "grad_norm": 1.309323258524344, "learning_rate": 7.716280281736553e-06, "loss": 0.5687, "step": 3307 }, { "epoch": 0.34, "grad_norm": 1.5849491499508603, "learning_rate": 7.714891788262357e-06, "loss": 0.7081, "step": 3308 }, { "epoch": 0.34, "grad_norm": 1.5123670560940516, "learning_rate": 7.713502997829164e-06, "loss": 0.6988, "step": 3309 }, { "epoch": 0.34, "grad_norm": 1.2659187162559398, "learning_rate": 7.71211391058888e-06, "loss": 0.6647, "step": 3310 }, { "epoch": 0.34, "grad_norm": 1.2935328427927166, "learning_rate": 7.710724526693445e-06, "loss": 0.6238, "step": 3311 }, { "epoch": 0.34, "grad_norm": 1.4112938154401256, "learning_rate": 7.709334846294833e-06, "loss": 0.5677, "step": 3312 }, { "epoch": 0.34, "grad_norm": 1.1491429502783406, "learning_rate": 7.707944869545052e-06, "loss": 0.6797, "step": 3313 }, { "epoch": 0.34, "grad_norm": 1.4804270696607227, "learning_rate": 7.706554596596134e-06, "loss": 0.695, "step": 3314 }, { "epoch": 0.34, "grad_norm": 2.027314332652494, "learning_rate": 7.705164027600154e-06, "loss": 0.7421, "step": 3315 }, { "epoch": 0.34, "grad_norm": 1.465618830978413, "learning_rate": 7.703773162709212e-06, "loss": 0.6838, "step": 3316 }, { "epoch": 0.34, "grad_norm": 1.3718005695125224, "learning_rate": 7.702382002075443e-06, "loss": 0.6301, "step": 3317 }, { "epoch": 0.34, "grad_norm": 1.4324994028473885, "learning_rate": 7.700990545851015e-06, "loss": 0.6809, "step": 3318 }, { "epoch": 0.34, "grad_norm": 1.24069388939026, "learning_rate": 7.699598794188128e-06, "loss": 0.5974, "step": 3319 }, { "epoch": 0.34, "grad_norm": 1.277535707897165, "learning_rate": 7.698206747239012e-06, "loss": 0.622, "step": 3320 }, { "epoch": 0.34, "grad_norm": 1.3749645991044448, "learning_rate": 7.696814405155934e-06, "loss": 0.6319, "step": 3321 }, { "epoch": 0.34, "grad_norm": 1.3956217273608391, "learning_rate": 7.69542176809119e-06, "loss": 0.6945, "step": 3322 }, { "epoch": 0.34, "grad_norm": 1.4777340687164238, "learning_rate": 7.694028836197107e-06, "loss": 0.7444, "step": 3323 }, { "epoch": 0.34, "grad_norm": 1.5808243526851922, "learning_rate": 7.692635609626045e-06, "loss": 0.6012, "step": 3324 }, { "epoch": 0.34, "grad_norm": 1.3793848696093594, "learning_rate": 7.691242088530401e-06, "loss": 0.7029, "step": 3325 }, { "epoch": 0.34, "grad_norm": 1.2790847517920518, "learning_rate": 7.689848273062599e-06, "loss": 0.5946, "step": 3326 }, { "epoch": 0.34, "grad_norm": 1.3435266122229552, "learning_rate": 7.688454163375095e-06, "loss": 0.6263, "step": 3327 }, { "epoch": 0.34, "grad_norm": 1.410328097499068, "learning_rate": 7.687059759620381e-06, "loss": 0.6145, "step": 3328 }, { "epoch": 0.34, "grad_norm": 1.2583861453834382, "learning_rate": 7.685665061950978e-06, "loss": 0.685, "step": 3329 }, { "epoch": 0.34, "grad_norm": 2.8516682071146846, "learning_rate": 7.68427007051944e-06, "loss": 0.6797, "step": 3330 }, { "epoch": 0.34, "grad_norm": 1.3434881500553144, "learning_rate": 7.682874785478354e-06, "loss": 0.6963, "step": 3331 }, { "epoch": 0.34, "grad_norm": 1.500556793914849, "learning_rate": 7.681479206980338e-06, "loss": 0.6252, "step": 3332 }, { "epoch": 0.34, "grad_norm": 1.3081808806987287, "learning_rate": 7.680083335178042e-06, "loss": 0.7003, "step": 3333 }, { "epoch": 0.34, "grad_norm": 1.484557585824953, "learning_rate": 7.678687170224148e-06, "loss": 0.6885, "step": 3334 }, { "epoch": 0.34, "grad_norm": 1.3610158746227314, "learning_rate": 7.677290712271374e-06, "loss": 0.6257, "step": 3335 }, { "epoch": 0.34, "grad_norm": 1.310672894180565, "learning_rate": 7.675893961472462e-06, "loss": 0.6714, "step": 3336 }, { "epoch": 0.34, "grad_norm": 1.4498870913130273, "learning_rate": 7.674496917980195e-06, "loss": 0.724, "step": 3337 }, { "epoch": 0.34, "grad_norm": 1.3843904930241027, "learning_rate": 7.673099581947381e-06, "loss": 0.5696, "step": 3338 }, { "epoch": 0.34, "grad_norm": 1.4615936682756567, "learning_rate": 7.671701953526863e-06, "loss": 0.6797, "step": 3339 }, { "epoch": 0.34, "grad_norm": 1.3132382355352776, "learning_rate": 7.670304032871517e-06, "loss": 0.5422, "step": 3340 }, { "epoch": 0.34, "grad_norm": 1.3782275930024863, "learning_rate": 7.66890582013425e-06, "loss": 0.6666, "step": 3341 }, { "epoch": 0.34, "grad_norm": 1.314677266656617, "learning_rate": 7.667507315467999e-06, "loss": 0.6677, "step": 3342 }, { "epoch": 0.34, "grad_norm": 1.5374294945537128, "learning_rate": 7.666108519025733e-06, "loss": 0.5709, "step": 3343 }, { "epoch": 0.34, "grad_norm": 1.3428799299033023, "learning_rate": 7.66470943096046e-06, "loss": 0.5408, "step": 3344 }, { "epoch": 0.34, "grad_norm": 1.4318838784453092, "learning_rate": 7.663310051425209e-06, "loss": 0.6311, "step": 3345 }, { "epoch": 0.34, "grad_norm": 1.369843791050783, "learning_rate": 7.66191038057305e-06, "loss": 0.6491, "step": 3346 }, { "epoch": 0.34, "grad_norm": 1.5579341050690871, "learning_rate": 7.660510418557078e-06, "loss": 0.6705, "step": 3347 }, { "epoch": 0.34, "grad_norm": 1.3309048034252833, "learning_rate": 7.659110165530424e-06, "loss": 0.7477, "step": 3348 }, { "epoch": 0.34, "grad_norm": 1.4345634825649118, "learning_rate": 7.657709621646253e-06, "loss": 0.7067, "step": 3349 }, { "epoch": 0.34, "grad_norm": 1.394338521759616, "learning_rate": 7.656308787057753e-06, "loss": 0.7091, "step": 3350 }, { "epoch": 0.34, "grad_norm": 1.3312940634874972, "learning_rate": 7.654907661918153e-06, "loss": 0.5683, "step": 3351 }, { "epoch": 0.34, "grad_norm": 1.3680148835759516, "learning_rate": 7.65350624638071e-06, "loss": 0.659, "step": 3352 }, { "epoch": 0.34, "grad_norm": 1.3689078766314853, "learning_rate": 7.652104540598712e-06, "loss": 0.631, "step": 3353 }, { "epoch": 0.34, "grad_norm": 1.4150950782517686, "learning_rate": 7.65070254472548e-06, "loss": 0.6688, "step": 3354 }, { "epoch": 0.34, "grad_norm": 1.389144194925525, "learning_rate": 7.64930025891437e-06, "loss": 0.6639, "step": 3355 }, { "epoch": 0.34, "grad_norm": 1.4899807814845, "learning_rate": 7.647897683318758e-06, "loss": 0.607, "step": 3356 }, { "epoch": 0.34, "grad_norm": 1.3906451716924177, "learning_rate": 7.646494818092067e-06, "loss": 0.6517, "step": 3357 }, { "epoch": 0.34, "grad_norm": 1.4217513364435779, "learning_rate": 7.645091663387742e-06, "loss": 0.5887, "step": 3358 }, { "epoch": 0.34, "grad_norm": 1.4914864958918796, "learning_rate": 7.643688219359265e-06, "loss": 0.7085, "step": 3359 }, { "epoch": 0.34, "grad_norm": 1.3207544131100266, "learning_rate": 7.64228448616014e-06, "loss": 0.6774, "step": 3360 }, { "epoch": 0.34, "grad_norm": 1.4092626146767888, "learning_rate": 7.640880463943915e-06, "loss": 0.5581, "step": 3361 }, { "epoch": 0.34, "grad_norm": 1.5003977015633532, "learning_rate": 7.639476152864163e-06, "loss": 0.7224, "step": 3362 }, { "epoch": 0.34, "grad_norm": 1.2881416132322743, "learning_rate": 7.638071553074491e-06, "loss": 0.6487, "step": 3363 }, { "epoch": 0.34, "grad_norm": 1.6203148723172949, "learning_rate": 7.63666666472853e-06, "loss": 0.7687, "step": 3364 }, { "epoch": 0.34, "grad_norm": 1.32680751338943, "learning_rate": 7.63526148797996e-06, "loss": 0.7304, "step": 3365 }, { "epoch": 0.34, "grad_norm": 1.7608666807606868, "learning_rate": 7.633856022982472e-06, "loss": 0.7213, "step": 3366 }, { "epoch": 0.34, "grad_norm": 1.2747297058461733, "learning_rate": 7.6324502698898e-06, "loss": 0.6299, "step": 3367 }, { "epoch": 0.34, "grad_norm": 1.4173417750353972, "learning_rate": 7.63104422885571e-06, "loss": 0.8039, "step": 3368 }, { "epoch": 0.34, "grad_norm": 1.4603122828973563, "learning_rate": 7.629637900033993e-06, "loss": 0.6582, "step": 3369 }, { "epoch": 0.34, "grad_norm": 1.2923729062092604, "learning_rate": 7.628231283578479e-06, "loss": 0.6399, "step": 3370 }, { "epoch": 0.34, "grad_norm": 1.3688643726551744, "learning_rate": 7.626824379643023e-06, "loss": 0.7586, "step": 3371 }, { "epoch": 0.34, "grad_norm": 1.6176561399942877, "learning_rate": 7.625417188381517e-06, "loss": 0.5997, "step": 3372 }, { "epoch": 0.34, "grad_norm": 1.324609250199181, "learning_rate": 7.62400970994788e-06, "loss": 0.5678, "step": 3373 }, { "epoch": 0.34, "grad_norm": 1.4455478799486678, "learning_rate": 7.622601944496064e-06, "loss": 0.714, "step": 3374 }, { "epoch": 0.34, "grad_norm": 1.3536277225467668, "learning_rate": 7.621193892180054e-06, "loss": 0.6728, "step": 3375 }, { "epoch": 0.34, "grad_norm": 1.4499961626190179, "learning_rate": 7.619785553153864e-06, "loss": 0.5976, "step": 3376 }, { "epoch": 0.34, "grad_norm": 1.3195227957768039, "learning_rate": 7.618376927571541e-06, "loss": 0.6036, "step": 3377 }, { "epoch": 0.34, "grad_norm": 1.4506419252841158, "learning_rate": 7.6169680155871605e-06, "loss": 0.6302, "step": 3378 }, { "epoch": 0.35, "grad_norm": 1.4570795947301733, "learning_rate": 7.6155588173548335e-06, "loss": 0.6915, "step": 3379 }, { "epoch": 0.35, "grad_norm": 1.3471234405702501, "learning_rate": 7.6141493330287e-06, "loss": 0.6483, "step": 3380 }, { "epoch": 0.35, "grad_norm": 1.558744389632982, "learning_rate": 7.6127395627629295e-06, "loss": 0.7007, "step": 3381 }, { "epoch": 0.35, "grad_norm": 1.5498823286790226, "learning_rate": 7.6113295067117274e-06, "loss": 0.7329, "step": 3382 }, { "epoch": 0.35, "grad_norm": 1.6506367346063004, "learning_rate": 7.6099191650293265e-06, "loss": 0.6678, "step": 3383 }, { "epoch": 0.35, "grad_norm": 1.4698989138104377, "learning_rate": 7.608508537869994e-06, "loss": 0.5909, "step": 3384 }, { "epoch": 0.35, "grad_norm": 1.3894785810291215, "learning_rate": 7.607097625388022e-06, "loss": 0.6375, "step": 3385 }, { "epoch": 0.35, "grad_norm": 1.3587622737318337, "learning_rate": 7.605686427737744e-06, "loss": 0.6979, "step": 3386 }, { "epoch": 0.35, "grad_norm": 1.5115653937034825, "learning_rate": 7.604274945073515e-06, "loss": 0.6804, "step": 3387 }, { "epoch": 0.35, "grad_norm": 1.4809201172846214, "learning_rate": 7.6028631775497255e-06, "loss": 0.6094, "step": 3388 }, { "epoch": 0.35, "grad_norm": 1.4279204049345358, "learning_rate": 7.601451125320798e-06, "loss": 0.6233, "step": 3389 }, { "epoch": 0.35, "grad_norm": 1.4414064359649468, "learning_rate": 7.600038788541184e-06, "loss": 0.7879, "step": 3390 }, { "epoch": 0.35, "grad_norm": 1.3987271929947063, "learning_rate": 7.598626167365369e-06, "loss": 0.6632, "step": 3391 }, { "epoch": 0.35, "grad_norm": 1.2707645558771041, "learning_rate": 7.597213261947863e-06, "loss": 0.637, "step": 3392 }, { "epoch": 0.35, "grad_norm": 1.3863984701903536, "learning_rate": 7.595800072443218e-06, "loss": 0.6345, "step": 3393 }, { "epoch": 0.35, "grad_norm": 1.4079600258546126, "learning_rate": 7.594386599006006e-06, "loss": 0.6801, "step": 3394 }, { "epoch": 0.35, "grad_norm": 1.4423481140557814, "learning_rate": 7.5929728417908374e-06, "loss": 0.6355, "step": 3395 }, { "epoch": 0.35, "grad_norm": 1.3532491029418765, "learning_rate": 7.59155880095235e-06, "loss": 0.6832, "step": 3396 }, { "epoch": 0.35, "grad_norm": 1.765100294106647, "learning_rate": 7.590144476645213e-06, "loss": 0.6439, "step": 3397 }, { "epoch": 0.35, "grad_norm": 1.4342122672024256, "learning_rate": 7.588729869024131e-06, "loss": 0.6824, "step": 3398 }, { "epoch": 0.35, "grad_norm": 1.3602245227911205, "learning_rate": 7.5873149782438295e-06, "loss": 0.6479, "step": 3399 }, { "epoch": 0.35, "grad_norm": 1.4132567080636873, "learning_rate": 7.5858998044590785e-06, "loss": 0.7198, "step": 3400 }, { "epoch": 0.35, "grad_norm": 1.5488472911251343, "learning_rate": 7.584484347824668e-06, "loss": 0.7291, "step": 3401 }, { "epoch": 0.35, "grad_norm": 1.4197534998981778, "learning_rate": 7.583068608495421e-06, "loss": 0.77, "step": 3402 }, { "epoch": 0.35, "grad_norm": 1.6914564923439845, "learning_rate": 7.581652586626198e-06, "loss": 0.6542, "step": 3403 }, { "epoch": 0.35, "grad_norm": 1.3775634810383168, "learning_rate": 7.580236282371882e-06, "loss": 0.6077, "step": 3404 }, { "epoch": 0.35, "grad_norm": 1.4664698348691612, "learning_rate": 7.578819695887394e-06, "loss": 0.6974, "step": 3405 }, { "epoch": 0.35, "grad_norm": 1.3663845775869188, "learning_rate": 7.577402827327675e-06, "loss": 0.6384, "step": 3406 }, { "epoch": 0.35, "grad_norm": 1.3990638008804215, "learning_rate": 7.575985676847715e-06, "loss": 0.629, "step": 3407 }, { "epoch": 0.35, "grad_norm": 1.440283370258478, "learning_rate": 7.574568244602516e-06, "loss": 0.6293, "step": 3408 }, { "epoch": 0.35, "grad_norm": 1.34541106160336, "learning_rate": 7.573150530747122e-06, "loss": 0.6423, "step": 3409 }, { "epoch": 0.35, "grad_norm": 1.2973562979259683, "learning_rate": 7.571732535436602e-06, "loss": 0.6744, "step": 3410 }, { "epoch": 0.35, "grad_norm": 1.6325278322643515, "learning_rate": 7.570314258826062e-06, "loss": 0.7636, "step": 3411 }, { "epoch": 0.35, "grad_norm": 1.4076554620912085, "learning_rate": 7.568895701070636e-06, "loss": 0.6393, "step": 3412 }, { "epoch": 0.35, "grad_norm": 1.254961636718017, "learning_rate": 7.567476862325483e-06, "loss": 0.5013, "step": 3413 }, { "epoch": 0.35, "grad_norm": 1.4699875212701397, "learning_rate": 7.5660577427458e-06, "loss": 0.6989, "step": 3414 }, { "epoch": 0.35, "grad_norm": 1.503541971077859, "learning_rate": 7.564638342486814e-06, "loss": 0.7159, "step": 3415 }, { "epoch": 0.35, "grad_norm": 1.3964112158449355, "learning_rate": 7.5632186617037816e-06, "loss": 0.6546, "step": 3416 }, { "epoch": 0.35, "grad_norm": 1.4074114808664298, "learning_rate": 7.561798700551985e-06, "loss": 0.6902, "step": 3417 }, { "epoch": 0.35, "grad_norm": 1.4787374014676782, "learning_rate": 7.560378459186748e-06, "loss": 0.6415, "step": 3418 }, { "epoch": 0.35, "grad_norm": 1.2513696552414748, "learning_rate": 7.558957937763416e-06, "loss": 0.5979, "step": 3419 }, { "epoch": 0.35, "grad_norm": 1.47512835542606, "learning_rate": 7.557537136437364e-06, "loss": 0.5345, "step": 3420 }, { "epoch": 0.35, "grad_norm": 1.6261037563118792, "learning_rate": 7.556116055364008e-06, "loss": 0.667, "step": 3421 }, { "epoch": 0.35, "grad_norm": 1.2635532460687546, "learning_rate": 7.554694694698783e-06, "loss": 0.6708, "step": 3422 }, { "epoch": 0.35, "grad_norm": 1.5712017552049276, "learning_rate": 7.553273054597163e-06, "loss": 0.6097, "step": 3423 }, { "epoch": 0.35, "grad_norm": 1.34980426722248, "learning_rate": 7.5518511352146466e-06, "loss": 0.6812, "step": 3424 }, { "epoch": 0.35, "grad_norm": 1.2984375123938492, "learning_rate": 7.5504289367067665e-06, "loss": 0.7147, "step": 3425 }, { "epoch": 0.35, "grad_norm": 1.4354045135429123, "learning_rate": 7.549006459229087e-06, "loss": 0.7464, "step": 3426 }, { "epoch": 0.35, "grad_norm": 1.609763836252723, "learning_rate": 7.547583702937195e-06, "loss": 0.6876, "step": 3427 }, { "epoch": 0.35, "grad_norm": 1.2877804119040854, "learning_rate": 7.546160667986722e-06, "loss": 0.675, "step": 3428 }, { "epoch": 0.35, "grad_norm": 1.4216371179683407, "learning_rate": 7.544737354533314e-06, "loss": 0.8281, "step": 3429 }, { "epoch": 0.35, "grad_norm": 1.5256308385645576, "learning_rate": 7.543313762732661e-06, "loss": 0.6645, "step": 3430 }, { "epoch": 0.35, "grad_norm": 1.4164774483076754, "learning_rate": 7.541889892740473e-06, "loss": 0.629, "step": 3431 }, { "epoch": 0.35, "grad_norm": 1.2654098909900626, "learning_rate": 7.540465744712497e-06, "loss": 0.5198, "step": 3432 }, { "epoch": 0.35, "grad_norm": 1.394488660433993, "learning_rate": 7.539041318804512e-06, "loss": 0.6313, "step": 3433 }, { "epoch": 0.35, "grad_norm": 1.498564267182782, "learning_rate": 7.5376166151723164e-06, "loss": 0.6962, "step": 3434 }, { "epoch": 0.35, "grad_norm": 1.282583125087625, "learning_rate": 7.536191633971753e-06, "loss": 0.6514, "step": 3435 }, { "epoch": 0.35, "grad_norm": 1.4212130920046557, "learning_rate": 7.534766375358686e-06, "loss": 0.6487, "step": 3436 }, { "epoch": 0.35, "grad_norm": 1.4365641232097035, "learning_rate": 7.5333408394890116e-06, "loss": 0.7033, "step": 3437 }, { "epoch": 0.35, "grad_norm": 1.3190231687699052, "learning_rate": 7.531915026518659e-06, "loss": 0.659, "step": 3438 }, { "epoch": 0.35, "grad_norm": 1.3621290862134743, "learning_rate": 7.530488936603584e-06, "loss": 0.5743, "step": 3439 }, { "epoch": 0.35, "grad_norm": 2.210291168268242, "learning_rate": 7.5290625698997765e-06, "loss": 0.7886, "step": 3440 }, { "epoch": 0.35, "grad_norm": 1.4466972638018745, "learning_rate": 7.527635926563251e-06, "loss": 0.6683, "step": 3441 }, { "epoch": 0.35, "grad_norm": 1.4006810861287664, "learning_rate": 7.52620900675006e-06, "loss": 0.7576, "step": 3442 }, { "epoch": 0.35, "grad_norm": 1.3534695879259324, "learning_rate": 7.524781810616279e-06, "loss": 0.6882, "step": 3443 }, { "epoch": 0.35, "grad_norm": 1.457329430532135, "learning_rate": 7.523354338318019e-06, "loss": 0.6204, "step": 3444 }, { "epoch": 0.35, "grad_norm": 1.3569250253069938, "learning_rate": 7.521926590011419e-06, "loss": 0.6209, "step": 3445 }, { "epoch": 0.35, "grad_norm": 1.3466391010287135, "learning_rate": 7.520498565852647e-06, "loss": 0.6554, "step": 3446 }, { "epoch": 0.35, "grad_norm": 1.3025290700359144, "learning_rate": 7.5190702659979045e-06, "loss": 0.6073, "step": 3447 }, { "epoch": 0.35, "grad_norm": 1.298272764140661, "learning_rate": 7.517641690603419e-06, "loss": 0.5994, "step": 3448 }, { "epoch": 0.35, "grad_norm": 1.4609170383459924, "learning_rate": 7.5162128398254495e-06, "loss": 0.6318, "step": 3449 }, { "epoch": 0.35, "grad_norm": 1.6300139676423235, "learning_rate": 7.514783713820288e-06, "loss": 0.6805, "step": 3450 }, { "epoch": 0.35, "grad_norm": 1.5039443118880969, "learning_rate": 7.513354312744255e-06, "loss": 0.6862, "step": 3451 }, { "epoch": 0.35, "grad_norm": 1.4463055449513653, "learning_rate": 7.511924636753699e-06, "loss": 0.6114, "step": 3452 }, { "epoch": 0.35, "grad_norm": 1.3702056816911217, "learning_rate": 7.510494686005001e-06, "loss": 0.7314, "step": 3453 }, { "epoch": 0.35, "grad_norm": 1.5610691632000582, "learning_rate": 7.50906446065457e-06, "loss": 0.664, "step": 3454 }, { "epoch": 0.35, "grad_norm": 1.2642045071687193, "learning_rate": 7.507633960858848e-06, "loss": 0.6184, "step": 3455 }, { "epoch": 0.35, "grad_norm": 1.4853104196957156, "learning_rate": 7.506203186774304e-06, "loss": 0.6939, "step": 3456 }, { "epoch": 0.35, "grad_norm": 1.6012546103373497, "learning_rate": 7.504772138557437e-06, "loss": 0.6358, "step": 3457 }, { "epoch": 0.35, "grad_norm": 1.5163808739307612, "learning_rate": 7.5033408163647795e-06, "loss": 0.6311, "step": 3458 }, { "epoch": 0.35, "grad_norm": 1.3524557850238472, "learning_rate": 7.501909220352891e-06, "loss": 0.6316, "step": 3459 }, { "epoch": 0.35, "grad_norm": 1.3240254964651192, "learning_rate": 7.500477350678361e-06, "loss": 0.6177, "step": 3460 }, { "epoch": 0.35, "grad_norm": 1.6447224647633278, "learning_rate": 7.499045207497811e-06, "loss": 0.7174, "step": 3461 }, { "epoch": 0.35, "grad_norm": 1.3830693779293548, "learning_rate": 7.497612790967891e-06, "loss": 0.7278, "step": 3462 }, { "epoch": 0.35, "grad_norm": 1.6313055039280242, "learning_rate": 7.496180101245279e-06, "loss": 0.5711, "step": 3463 }, { "epoch": 0.35, "grad_norm": 1.3822089777014497, "learning_rate": 7.494747138486686e-06, "loss": 0.7578, "step": 3464 }, { "epoch": 0.35, "grad_norm": 1.1389132345909083, "learning_rate": 7.493313902848851e-06, "loss": 0.616, "step": 3465 }, { "epoch": 0.35, "grad_norm": 1.5781759444376389, "learning_rate": 7.491880394488544e-06, "loss": 0.6857, "step": 3466 }, { "epoch": 0.35, "grad_norm": 5.904479659258149, "learning_rate": 7.4904466135625644e-06, "loss": 0.5778, "step": 3467 }, { "epoch": 0.35, "grad_norm": 1.563324583421775, "learning_rate": 7.489012560227742e-06, "loss": 0.6368, "step": 3468 }, { "epoch": 0.35, "grad_norm": 1.5136340218102573, "learning_rate": 7.487578234640935e-06, "loss": 0.7218, "step": 3469 }, { "epoch": 0.35, "grad_norm": 2.008780095989577, "learning_rate": 7.4861436369590316e-06, "loss": 0.7308, "step": 3470 }, { "epoch": 0.35, "grad_norm": 1.7973457586065213, "learning_rate": 7.484708767338951e-06, "loss": 0.7185, "step": 3471 }, { "epoch": 0.35, "grad_norm": 1.356121124110509, "learning_rate": 7.48327362593764e-06, "loss": 0.6977, "step": 3472 }, { "epoch": 0.35, "grad_norm": 3.6327039021427074, "learning_rate": 7.481838212912079e-06, "loss": 0.6635, "step": 3473 }, { "epoch": 0.35, "grad_norm": 1.3914802202760255, "learning_rate": 7.480402528419274e-06, "loss": 0.6178, "step": 3474 }, { "epoch": 0.35, "grad_norm": 1.6275560998481553, "learning_rate": 7.478966572616264e-06, "loss": 0.6781, "step": 3475 }, { "epoch": 0.35, "grad_norm": 1.5873051448295017, "learning_rate": 7.477530345660113e-06, "loss": 0.7426, "step": 3476 }, { "epoch": 0.36, "grad_norm": 1.2693961208318332, "learning_rate": 7.476093847707922e-06, "loss": 0.6444, "step": 3477 }, { "epoch": 0.36, "grad_norm": 1.4476184268646182, "learning_rate": 7.474657078916812e-06, "loss": 0.6751, "step": 3478 }, { "epoch": 0.36, "grad_norm": 1.2525722533723835, "learning_rate": 7.473220039443942e-06, "loss": 0.635, "step": 3479 }, { "epoch": 0.36, "grad_norm": 1.4822967673840954, "learning_rate": 7.4717827294464996e-06, "loss": 0.6101, "step": 3480 }, { "epoch": 0.36, "grad_norm": 1.5333053905300122, "learning_rate": 7.470345149081696e-06, "loss": 0.6537, "step": 3481 }, { "epoch": 0.36, "grad_norm": 1.3689181477498749, "learning_rate": 7.468907298506779e-06, "loss": 0.6681, "step": 3482 }, { "epoch": 0.36, "grad_norm": 1.3184141254050081, "learning_rate": 7.467469177879021e-06, "loss": 0.6782, "step": 3483 }, { "epoch": 0.36, "grad_norm": 1.5135029516275356, "learning_rate": 7.466030787355724e-06, "loss": 0.7767, "step": 3484 }, { "epoch": 0.36, "grad_norm": 1.3632875973119511, "learning_rate": 7.464592127094228e-06, "loss": 0.5579, "step": 3485 }, { "epoch": 0.36, "grad_norm": 1.1813510840438561, "learning_rate": 7.463153197251889e-06, "loss": 0.6019, "step": 3486 }, { "epoch": 0.36, "grad_norm": 1.445284979078064, "learning_rate": 7.461713997986102e-06, "loss": 0.6537, "step": 3487 }, { "epoch": 0.36, "grad_norm": 1.3065048069168854, "learning_rate": 7.460274529454289e-06, "loss": 0.6198, "step": 3488 }, { "epoch": 0.36, "grad_norm": 1.349633999006486, "learning_rate": 7.458834791813901e-06, "loss": 0.5489, "step": 3489 }, { "epoch": 0.36, "grad_norm": 4.593118547304828, "learning_rate": 7.457394785222419e-06, "loss": 0.6181, "step": 3490 }, { "epoch": 0.36, "grad_norm": 1.445835359001453, "learning_rate": 7.455954509837352e-06, "loss": 0.657, "step": 3491 }, { "epoch": 0.36, "grad_norm": 1.303462035653078, "learning_rate": 7.454513965816242e-06, "loss": 0.6141, "step": 3492 }, { "epoch": 0.36, "grad_norm": 1.4964175339560113, "learning_rate": 7.453073153316654e-06, "loss": 0.646, "step": 3493 }, { "epoch": 0.36, "grad_norm": 1.3852408300111525, "learning_rate": 7.451632072496189e-06, "loss": 0.703, "step": 3494 }, { "epoch": 0.36, "grad_norm": 1.4598863361309649, "learning_rate": 7.450190723512475e-06, "loss": 0.665, "step": 3495 }, { "epoch": 0.36, "grad_norm": 1.321648495433645, "learning_rate": 7.448749106523168e-06, "loss": 0.6264, "step": 3496 }, { "epoch": 0.36, "grad_norm": 1.6364931554255633, "learning_rate": 7.447307221685953e-06, "loss": 0.6538, "step": 3497 }, { "epoch": 0.36, "grad_norm": 1.3561876782341424, "learning_rate": 7.445865069158546e-06, "loss": 0.6967, "step": 3498 }, { "epoch": 0.36, "grad_norm": 1.4323907537424605, "learning_rate": 7.444422649098697e-06, "loss": 0.6623, "step": 3499 }, { "epoch": 0.36, "grad_norm": 1.5335384650678203, "learning_rate": 7.4429799616641714e-06, "loss": 0.7277, "step": 3500 }, { "epoch": 0.36, "grad_norm": 2.1727822005716595, "learning_rate": 7.44153700701278e-06, "loss": 0.6203, "step": 3501 }, { "epoch": 0.36, "grad_norm": 1.4056685149310055, "learning_rate": 7.44009378530235e-06, "loss": 0.5905, "step": 3502 }, { "epoch": 0.36, "grad_norm": 1.5659382206339607, "learning_rate": 7.438650296690748e-06, "loss": 0.6924, "step": 3503 }, { "epoch": 0.36, "grad_norm": 1.3706444762790364, "learning_rate": 7.437206541335861e-06, "loss": 0.6714, "step": 3504 }, { "epoch": 0.36, "grad_norm": 1.3077898674873016, "learning_rate": 7.4357625193956086e-06, "loss": 0.6572, "step": 3505 }, { "epoch": 0.36, "grad_norm": 1.657440383450956, "learning_rate": 7.434318231027945e-06, "loss": 0.5992, "step": 3506 }, { "epoch": 0.36, "grad_norm": 1.2883556808042316, "learning_rate": 7.432873676390845e-06, "loss": 0.6341, "step": 3507 }, { "epoch": 0.36, "grad_norm": 1.4171689227124382, "learning_rate": 7.431428855642318e-06, "loss": 0.5974, "step": 3508 }, { "epoch": 0.36, "grad_norm": 1.3352333374800345, "learning_rate": 7.429983768940398e-06, "loss": 0.5994, "step": 3509 }, { "epoch": 0.36, "grad_norm": 1.4684946634261338, "learning_rate": 7.428538416443154e-06, "loss": 0.634, "step": 3510 }, { "epoch": 0.36, "grad_norm": 1.3074116776919922, "learning_rate": 7.427092798308678e-06, "loss": 0.6161, "step": 3511 }, { "epoch": 0.36, "grad_norm": 1.4910520669162122, "learning_rate": 7.425646914695095e-06, "loss": 0.6702, "step": 3512 }, { "epoch": 0.36, "grad_norm": 1.3915592387458837, "learning_rate": 7.4242007657605595e-06, "loss": 0.664, "step": 3513 }, { "epoch": 0.36, "grad_norm": 1.394506848094982, "learning_rate": 7.422754351663252e-06, "loss": 0.6918, "step": 3514 }, { "epoch": 0.36, "grad_norm": 1.473368097248594, "learning_rate": 7.421307672561383e-06, "loss": 0.6319, "step": 3515 }, { "epoch": 0.36, "grad_norm": 1.3290308237507864, "learning_rate": 7.419860728613193e-06, "loss": 0.6136, "step": 3516 }, { "epoch": 0.36, "grad_norm": 1.3778244348303987, "learning_rate": 7.418413519976951e-06, "loss": 0.6989, "step": 3517 }, { "epoch": 0.36, "grad_norm": 1.4037400547365249, "learning_rate": 7.416966046810957e-06, "loss": 0.6041, "step": 3518 }, { "epoch": 0.36, "grad_norm": 1.632190653028426, "learning_rate": 7.415518309273533e-06, "loss": 0.667, "step": 3519 }, { "epoch": 0.36, "grad_norm": 1.3747879882305543, "learning_rate": 7.41407030752304e-06, "loss": 0.7708, "step": 3520 }, { "epoch": 0.36, "grad_norm": 1.482060558836773, "learning_rate": 7.4126220417178585e-06, "loss": 0.649, "step": 3521 }, { "epoch": 0.36, "grad_norm": 1.6312539997420312, "learning_rate": 7.411173512016404e-06, "loss": 0.6148, "step": 3522 }, { "epoch": 0.36, "grad_norm": 1.4824979490748613, "learning_rate": 7.409724718577118e-06, "loss": 0.7285, "step": 3523 }, { "epoch": 0.36, "grad_norm": 1.525326113798973, "learning_rate": 7.408275661558473e-06, "loss": 0.633, "step": 3524 }, { "epoch": 0.36, "grad_norm": 1.4076903529842277, "learning_rate": 7.406826341118969e-06, "loss": 0.63, "step": 3525 }, { "epoch": 0.36, "grad_norm": 1.5348699360367397, "learning_rate": 7.405376757417132e-06, "loss": 0.6355, "step": 3526 }, { "epoch": 0.36, "grad_norm": 1.3875129541024402, "learning_rate": 7.403926910611524e-06, "loss": 0.6139, "step": 3527 }, { "epoch": 0.36, "grad_norm": 1.429125356848783, "learning_rate": 7.40247680086073e-06, "loss": 0.6303, "step": 3528 }, { "epoch": 0.36, "grad_norm": 1.462981514388602, "learning_rate": 7.401026428323362e-06, "loss": 0.56, "step": 3529 }, { "epoch": 0.36, "grad_norm": 1.3850754851795235, "learning_rate": 7.3995757931580665e-06, "loss": 0.6664, "step": 3530 }, { "epoch": 0.36, "grad_norm": 1.445502154272, "learning_rate": 7.398124895523519e-06, "loss": 0.6831, "step": 3531 }, { "epoch": 0.36, "grad_norm": 1.418823961379243, "learning_rate": 7.396673735578417e-06, "loss": 0.6213, "step": 3532 }, { "epoch": 0.36, "grad_norm": 1.3521683054253206, "learning_rate": 7.395222313481489e-06, "loss": 0.6778, "step": 3533 }, { "epoch": 0.36, "grad_norm": 1.4565537780951219, "learning_rate": 7.3937706293915e-06, "loss": 0.7805, "step": 3534 }, { "epoch": 0.36, "grad_norm": 1.4602376341334555, "learning_rate": 7.392318683467232e-06, "loss": 0.6664, "step": 3535 }, { "epoch": 0.36, "grad_norm": 1.4821792107799565, "learning_rate": 7.390866475867503e-06, "loss": 0.571, "step": 3536 }, { "epoch": 0.36, "grad_norm": 1.6265099488777077, "learning_rate": 7.389414006751159e-06, "loss": 0.642, "step": 3537 }, { "epoch": 0.36, "grad_norm": 1.5233275856505006, "learning_rate": 7.3879612762770695e-06, "loss": 0.7257, "step": 3538 }, { "epoch": 0.36, "grad_norm": 1.5878233525915446, "learning_rate": 7.3865082846041415e-06, "loss": 0.8086, "step": 3539 }, { "epoch": 0.36, "grad_norm": 1.5328962968519388, "learning_rate": 7.3850550318913e-06, "loss": 0.7172, "step": 3540 }, { "epoch": 0.36, "grad_norm": 1.4054878781571851, "learning_rate": 7.383601518297508e-06, "loss": 0.6262, "step": 3541 }, { "epoch": 0.36, "grad_norm": 1.5575585560921426, "learning_rate": 7.382147743981751e-06, "loss": 0.6738, "step": 3542 }, { "epoch": 0.36, "grad_norm": 1.4318866594060848, "learning_rate": 7.380693709103047e-06, "loss": 0.7059, "step": 3543 }, { "epoch": 0.36, "grad_norm": 1.5011489554889434, "learning_rate": 7.379239413820438e-06, "loss": 0.7113, "step": 3544 }, { "epoch": 0.36, "grad_norm": 1.3730478636089536, "learning_rate": 7.377784858292999e-06, "loss": 0.6974, "step": 3545 }, { "epoch": 0.36, "grad_norm": 1.3901377426358383, "learning_rate": 7.376330042679832e-06, "loss": 0.6076, "step": 3546 }, { "epoch": 0.36, "grad_norm": 1.5375124530367839, "learning_rate": 7.3748749671400635e-06, "loss": 0.7125, "step": 3547 }, { "epoch": 0.36, "grad_norm": 1.404709818372694, "learning_rate": 7.373419631832856e-06, "loss": 0.6452, "step": 3548 }, { "epoch": 0.36, "grad_norm": 1.3731449766700663, "learning_rate": 7.371964036917394e-06, "loss": 0.5935, "step": 3549 }, { "epoch": 0.36, "grad_norm": 1.3908335993885423, "learning_rate": 7.3705081825528925e-06, "loss": 0.6702, "step": 3550 }, { "epoch": 0.36, "grad_norm": 2.0308212580315907, "learning_rate": 7.369052068898596e-06, "loss": 0.6279, "step": 3551 }, { "epoch": 0.36, "grad_norm": 1.2799213443631483, "learning_rate": 7.367595696113776e-06, "loss": 0.7174, "step": 3552 }, { "epoch": 0.36, "grad_norm": 1.4675583425702106, "learning_rate": 7.366139064357733e-06, "loss": 0.6558, "step": 3553 }, { "epoch": 0.36, "grad_norm": 1.4684915594836876, "learning_rate": 7.364682173789794e-06, "loss": 0.6692, "step": 3554 }, { "epoch": 0.36, "grad_norm": 1.3703183672143973, "learning_rate": 7.363225024569321e-06, "loss": 0.4864, "step": 3555 }, { "epoch": 0.36, "grad_norm": 1.4447033092497688, "learning_rate": 7.361767616855693e-06, "loss": 0.6992, "step": 3556 }, { "epoch": 0.36, "grad_norm": 1.524749628623359, "learning_rate": 7.3603099508083265e-06, "loss": 0.666, "step": 3557 }, { "epoch": 0.36, "grad_norm": 1.327219822874011, "learning_rate": 7.358852026586664e-06, "loss": 0.6648, "step": 3558 }, { "epoch": 0.36, "grad_norm": 1.514130972437804, "learning_rate": 7.357393844350172e-06, "loss": 0.728, "step": 3559 }, { "epoch": 0.36, "grad_norm": 1.3599212404563752, "learning_rate": 7.355935404258354e-06, "loss": 0.6203, "step": 3560 }, { "epoch": 0.36, "grad_norm": 1.3050507774433875, "learning_rate": 7.35447670647073e-06, "loss": 0.6158, "step": 3561 }, { "epoch": 0.36, "grad_norm": 1.3032117827705445, "learning_rate": 7.353017751146861e-06, "loss": 0.6229, "step": 3562 }, { "epoch": 0.36, "grad_norm": 1.5477628845263351, "learning_rate": 7.3515585384463264e-06, "loss": 0.642, "step": 3563 }, { "epoch": 0.36, "grad_norm": 1.5721948683306874, "learning_rate": 7.3500990685287355e-06, "loss": 0.7687, "step": 3564 }, { "epoch": 0.36, "grad_norm": 1.8113251931820622, "learning_rate": 7.3486393415537306e-06, "loss": 0.72, "step": 3565 }, { "epoch": 0.36, "grad_norm": 1.4693574700461045, "learning_rate": 7.3471793576809765e-06, "loss": 0.6243, "step": 3566 }, { "epoch": 0.36, "grad_norm": 1.5996097703058558, "learning_rate": 7.345719117070171e-06, "loss": 0.6783, "step": 3567 }, { "epoch": 0.36, "grad_norm": 1.8346396234062945, "learning_rate": 7.3442586198810355e-06, "loss": 0.6345, "step": 3568 }, { "epoch": 0.36, "grad_norm": 1.6291772080310356, "learning_rate": 7.342797866273321e-06, "loss": 0.7377, "step": 3569 }, { "epoch": 0.36, "grad_norm": 1.4085086461322958, "learning_rate": 7.341336856406809e-06, "loss": 0.7368, "step": 3570 }, { "epoch": 0.36, "grad_norm": 1.373007618249725, "learning_rate": 7.339875590441305e-06, "loss": 0.6426, "step": 3571 }, { "epoch": 0.36, "grad_norm": 1.5337745907562206, "learning_rate": 7.338414068536646e-06, "loss": 0.7227, "step": 3572 }, { "epoch": 0.36, "grad_norm": 1.3382942270081482, "learning_rate": 7.3369522908526926e-06, "loss": 0.6676, "step": 3573 }, { "epoch": 0.36, "grad_norm": 1.3545904738648846, "learning_rate": 7.3354902575493416e-06, "loss": 0.6473, "step": 3574 }, { "epoch": 0.37, "grad_norm": 1.4127955626858049, "learning_rate": 7.334027968786507e-06, "loss": 0.7034, "step": 3575 }, { "epoch": 0.37, "grad_norm": 1.549150312441737, "learning_rate": 7.332565424724139e-06, "loss": 0.6074, "step": 3576 }, { "epoch": 0.37, "grad_norm": 1.6996807789093027, "learning_rate": 7.331102625522213e-06, "loss": 0.6719, "step": 3577 }, { "epoch": 0.37, "grad_norm": 1.3254359512683513, "learning_rate": 7.329639571340731e-06, "loss": 0.6102, "step": 3578 }, { "epoch": 0.37, "grad_norm": 1.4555528177145842, "learning_rate": 7.328176262339725e-06, "loss": 0.6005, "step": 3579 }, { "epoch": 0.37, "grad_norm": 1.2851979920184766, "learning_rate": 7.3267126986792524e-06, "loss": 0.653, "step": 3580 }, { "epoch": 0.37, "grad_norm": 1.4922918602172286, "learning_rate": 7.325248880519403e-06, "loss": 0.7819, "step": 3581 }, { "epoch": 0.37, "grad_norm": 1.4944127306552224, "learning_rate": 7.323784808020288e-06, "loss": 0.7329, "step": 3582 }, { "epoch": 0.37, "grad_norm": 1.4726493229046975, "learning_rate": 7.322320481342053e-06, "loss": 0.661, "step": 3583 }, { "epoch": 0.37, "grad_norm": 1.6165061050239924, "learning_rate": 7.320855900644867e-06, "loss": 0.6556, "step": 3584 }, { "epoch": 0.37, "grad_norm": 1.444923770576875, "learning_rate": 7.319391066088927e-06, "loss": 0.6867, "step": 3585 }, { "epoch": 0.37, "grad_norm": 1.3529790573357985, "learning_rate": 7.317925977834459e-06, "loss": 0.7624, "step": 3586 }, { "epoch": 0.37, "grad_norm": 1.293196101545055, "learning_rate": 7.316460636041719e-06, "loss": 0.6627, "step": 3587 }, { "epoch": 0.37, "grad_norm": 1.4530178020624633, "learning_rate": 7.314995040870987e-06, "loss": 0.6218, "step": 3588 }, { "epoch": 0.37, "grad_norm": 1.7918943763101387, "learning_rate": 7.3135291924825695e-06, "loss": 0.821, "step": 3589 }, { "epoch": 0.37, "grad_norm": 1.3261589126514017, "learning_rate": 7.312063091036808e-06, "loss": 0.6531, "step": 3590 }, { "epoch": 0.37, "grad_norm": 1.392680470285592, "learning_rate": 7.310596736694062e-06, "loss": 0.6717, "step": 3591 }, { "epoch": 0.37, "grad_norm": 1.3374053243376747, "learning_rate": 7.309130129614728e-06, "loss": 0.693, "step": 3592 }, { "epoch": 0.37, "grad_norm": 1.5397986901015892, "learning_rate": 7.3076632699592225e-06, "loss": 0.6877, "step": 3593 }, { "epoch": 0.37, "grad_norm": 1.352671161672763, "learning_rate": 7.306196157887993e-06, "loss": 0.647, "step": 3594 }, { "epoch": 0.37, "grad_norm": 1.4759116868163624, "learning_rate": 7.304728793561517e-06, "loss": 0.6445, "step": 3595 }, { "epoch": 0.37, "grad_norm": 1.4496234828026224, "learning_rate": 7.303261177140294e-06, "loss": 0.6564, "step": 3596 }, { "epoch": 0.37, "grad_norm": 1.3692762637949945, "learning_rate": 7.301793308784857e-06, "loss": 0.6265, "step": 3597 }, { "epoch": 0.37, "grad_norm": 1.582895855666929, "learning_rate": 7.300325188655762e-06, "loss": 0.5938, "step": 3598 }, { "epoch": 0.37, "grad_norm": 1.4185094621431733, "learning_rate": 7.298856816913593e-06, "loss": 0.7082, "step": 3599 }, { "epoch": 0.37, "grad_norm": 1.4099343762466245, "learning_rate": 7.297388193718966e-06, "loss": 0.588, "step": 3600 }, { "epoch": 0.37, "grad_norm": 1.2775411491628292, "learning_rate": 7.295919319232518e-06, "loss": 0.6333, "step": 3601 }, { "epoch": 0.37, "grad_norm": 1.408064170462245, "learning_rate": 7.29445019361492e-06, "loss": 0.6429, "step": 3602 }, { "epoch": 0.37, "grad_norm": 1.4274009027170493, "learning_rate": 7.292980817026863e-06, "loss": 0.5906, "step": 3603 }, { "epoch": 0.37, "grad_norm": 1.4721298990376994, "learning_rate": 7.291511189629075e-06, "loss": 0.673, "step": 3604 }, { "epoch": 0.37, "grad_norm": 1.407513649348867, "learning_rate": 7.290041311582301e-06, "loss": 0.6789, "step": 3605 }, { "epoch": 0.37, "grad_norm": 1.4753752593839233, "learning_rate": 7.288571183047321e-06, "loss": 0.6879, "step": 3606 }, { "epoch": 0.37, "grad_norm": 1.3671795523475787, "learning_rate": 7.287100804184939e-06, "loss": 0.6291, "step": 3607 }, { "epoch": 0.37, "grad_norm": 1.476892590761029, "learning_rate": 7.285630175155989e-06, "loss": 0.6626, "step": 3608 }, { "epoch": 0.37, "grad_norm": 1.5060046456679888, "learning_rate": 7.284159296121329e-06, "loss": 0.7328, "step": 3609 }, { "epoch": 0.37, "grad_norm": 1.4471925049354126, "learning_rate": 7.282688167241846e-06, "loss": 0.6868, "step": 3610 }, { "epoch": 0.37, "grad_norm": 1.413632825067203, "learning_rate": 7.281216788678456e-06, "loss": 0.7003, "step": 3611 }, { "epoch": 0.37, "grad_norm": 1.4362074300026006, "learning_rate": 7.2797451605920975e-06, "loss": 0.5323, "step": 3612 }, { "epoch": 0.37, "grad_norm": 1.3482152565509082, "learning_rate": 7.278273283143742e-06, "loss": 0.6889, "step": 3613 }, { "epoch": 0.37, "grad_norm": 1.4106182060589711, "learning_rate": 7.276801156494385e-06, "loss": 0.6906, "step": 3614 }, { "epoch": 0.37, "grad_norm": 1.3986647584147827, "learning_rate": 7.27532878080505e-06, "loss": 0.6531, "step": 3615 }, { "epoch": 0.37, "grad_norm": 1.4916992632545887, "learning_rate": 7.273856156236789e-06, "loss": 0.7845, "step": 3616 }, { "epoch": 0.37, "grad_norm": 1.5567735304613544, "learning_rate": 7.272383282950676e-06, "loss": 0.6739, "step": 3617 }, { "epoch": 0.37, "grad_norm": 1.4330239753765412, "learning_rate": 7.2709101611078206e-06, "loss": 0.7461, "step": 3618 }, { "epoch": 0.37, "grad_norm": 1.2201252928931963, "learning_rate": 7.269436790869352e-06, "loss": 0.6089, "step": 3619 }, { "epoch": 0.37, "grad_norm": 1.4909426928453904, "learning_rate": 7.267963172396431e-06, "loss": 0.6439, "step": 3620 }, { "epoch": 0.37, "grad_norm": 1.4596395028387465, "learning_rate": 7.266489305850244e-06, "loss": 0.5675, "step": 3621 }, { "epoch": 0.37, "grad_norm": 1.4508606202085281, "learning_rate": 7.2650151913920065e-06, "loss": 0.6184, "step": 3622 }, { "epoch": 0.37, "grad_norm": 1.558101547717375, "learning_rate": 7.263540829182957e-06, "loss": 0.5959, "step": 3623 }, { "epoch": 0.37, "grad_norm": 1.37169426765717, "learning_rate": 7.262066219384363e-06, "loss": 0.6613, "step": 3624 }, { "epoch": 0.37, "grad_norm": 1.284077451933098, "learning_rate": 7.260591362157522e-06, "loss": 0.6127, "step": 3625 }, { "epoch": 0.37, "grad_norm": 1.4497295272596544, "learning_rate": 7.259116257663754e-06, "loss": 0.6362, "step": 3626 }, { "epoch": 0.37, "grad_norm": 1.436422868658878, "learning_rate": 7.257640906064409e-06, "loss": 0.6777, "step": 3627 }, { "epoch": 0.37, "grad_norm": 1.4010718422342148, "learning_rate": 7.256165307520864e-06, "loss": 0.692, "step": 3628 }, { "epoch": 0.37, "grad_norm": 1.5901101588882938, "learning_rate": 7.254689462194522e-06, "loss": 0.8117, "step": 3629 }, { "epoch": 0.37, "grad_norm": 1.3405387052972237, "learning_rate": 7.253213370246812e-06, "loss": 0.721, "step": 3630 }, { "epoch": 0.37, "grad_norm": 1.523725154373421, "learning_rate": 7.2517370318391925e-06, "loss": 0.727, "step": 3631 }, { "epoch": 0.37, "grad_norm": 1.2662136154977763, "learning_rate": 7.250260447133146e-06, "loss": 0.6028, "step": 3632 }, { "epoch": 0.37, "grad_norm": 1.4721764474186287, "learning_rate": 7.248783616290186e-06, "loss": 0.6839, "step": 3633 }, { "epoch": 0.37, "grad_norm": 1.330548325656677, "learning_rate": 7.2473065394718475e-06, "loss": 0.5689, "step": 3634 }, { "epoch": 0.37, "grad_norm": 1.3680323365608937, "learning_rate": 7.2458292168397e-06, "loss": 0.6567, "step": 3635 }, { "epoch": 0.37, "grad_norm": 1.3143590500741962, "learning_rate": 7.244351648555329e-06, "loss": 0.6718, "step": 3636 }, { "epoch": 0.37, "grad_norm": 1.425627510083582, "learning_rate": 7.242873834780358e-06, "loss": 0.6785, "step": 3637 }, { "epoch": 0.37, "grad_norm": 1.2669858069763509, "learning_rate": 7.241395775676431e-06, "loss": 0.6039, "step": 3638 }, { "epoch": 0.37, "grad_norm": 1.3594972040062032, "learning_rate": 7.239917471405221e-06, "loss": 0.6781, "step": 3639 }, { "epoch": 0.37, "grad_norm": 2.851380992823845, "learning_rate": 7.238438922128426e-06, "loss": 0.5931, "step": 3640 }, { "epoch": 0.37, "grad_norm": 1.4055997771773432, "learning_rate": 7.236960128007773e-06, "loss": 0.584, "step": 3641 }, { "epoch": 0.37, "grad_norm": 1.549541243845856, "learning_rate": 7.235481089205013e-06, "loss": 0.694, "step": 3642 }, { "epoch": 0.37, "grad_norm": 1.3290172533169757, "learning_rate": 7.2340018058819275e-06, "loss": 0.6304, "step": 3643 }, { "epoch": 0.37, "grad_norm": 1.267605249031264, "learning_rate": 7.232522278200324e-06, "loss": 0.6583, "step": 3644 }, { "epoch": 0.37, "grad_norm": 1.5219404348289394, "learning_rate": 7.231042506322031e-06, "loss": 0.6412, "step": 3645 }, { "epoch": 0.37, "grad_norm": 1.4024710518361987, "learning_rate": 7.229562490408911e-06, "loss": 0.6929, "step": 3646 }, { "epoch": 0.37, "grad_norm": 1.251510057220911, "learning_rate": 7.22808223062285e-06, "loss": 0.7589, "step": 3647 }, { "epoch": 0.37, "grad_norm": 1.4190624252562962, "learning_rate": 7.226601727125763e-06, "loss": 0.7428, "step": 3648 }, { "epoch": 0.37, "grad_norm": 1.3877263285271708, "learning_rate": 7.225120980079587e-06, "loss": 0.5862, "step": 3649 }, { "epoch": 0.37, "grad_norm": 1.3710954815242706, "learning_rate": 7.223639989646289e-06, "loss": 0.5953, "step": 3650 }, { "epoch": 0.37, "grad_norm": 2.0188315034285087, "learning_rate": 7.222158755987864e-06, "loss": 0.5779, "step": 3651 }, { "epoch": 0.37, "grad_norm": 1.3576968552799367, "learning_rate": 7.220677279266327e-06, "loss": 0.7038, "step": 3652 }, { "epoch": 0.37, "grad_norm": 1.3390102276775706, "learning_rate": 7.2191955596437306e-06, "loss": 0.5743, "step": 3653 }, { "epoch": 0.37, "grad_norm": 1.5117829179166302, "learning_rate": 7.217713597282141e-06, "loss": 0.7129, "step": 3654 }, { "epoch": 0.37, "grad_norm": 1.42429555776662, "learning_rate": 7.2162313923436624e-06, "loss": 0.6959, "step": 3655 }, { "epoch": 0.37, "grad_norm": 1.4830332794834982, "learning_rate": 7.214748944990417e-06, "loss": 0.6473, "step": 3656 }, { "epoch": 0.37, "grad_norm": 1.3671996170317315, "learning_rate": 7.21326625538456e-06, "loss": 0.7117, "step": 3657 }, { "epoch": 0.37, "grad_norm": 1.378536720201767, "learning_rate": 7.211783323688271e-06, "loss": 0.6793, "step": 3658 }, { "epoch": 0.37, "grad_norm": 1.5060771858104363, "learning_rate": 7.21030015006375e-06, "loss": 0.6915, "step": 3659 }, { "epoch": 0.37, "grad_norm": 1.4635588492466467, "learning_rate": 7.208816734673235e-06, "loss": 0.74, "step": 3660 }, { "epoch": 0.37, "grad_norm": 1.6048244699989302, "learning_rate": 7.207333077678981e-06, "loss": 0.6766, "step": 3661 }, { "epoch": 0.37, "grad_norm": 1.4476573409170677, "learning_rate": 7.2058491792432716e-06, "loss": 0.6477, "step": 3662 }, { "epoch": 0.37, "grad_norm": 1.284652656058416, "learning_rate": 7.2043650395284214e-06, "loss": 0.6612, "step": 3663 }, { "epoch": 0.37, "grad_norm": 1.3801825590880954, "learning_rate": 7.202880658696765e-06, "loss": 0.6547, "step": 3664 }, { "epoch": 0.37, "grad_norm": 1.4539927485225062, "learning_rate": 7.201396036910669e-06, "loss": 0.7425, "step": 3665 }, { "epoch": 0.37, "grad_norm": 1.444834723143224, "learning_rate": 7.1999111743325176e-06, "loss": 0.6294, "step": 3666 }, { "epoch": 0.37, "grad_norm": 1.3739011955402778, "learning_rate": 7.1984260711247356e-06, "loss": 0.5973, "step": 3667 }, { "epoch": 0.37, "grad_norm": 1.5136347309363596, "learning_rate": 7.196940727449759e-06, "loss": 0.6703, "step": 3668 }, { "epoch": 0.37, "grad_norm": 1.4936861910059325, "learning_rate": 7.195455143470061e-06, "loss": 0.6981, "step": 3669 }, { "epoch": 0.37, "grad_norm": 1.3780250215452408, "learning_rate": 7.193969319348135e-06, "loss": 0.6426, "step": 3670 }, { "epoch": 0.37, "grad_norm": 1.2108717462846557, "learning_rate": 7.192483255246505e-06, "loss": 0.7086, "step": 3671 }, { "epoch": 0.37, "grad_norm": 1.3977358379333051, "learning_rate": 7.190996951327717e-06, "loss": 0.6688, "step": 3672 }, { "epoch": 0.38, "grad_norm": 1.3786343305930295, "learning_rate": 7.1895104077543435e-06, "loss": 0.6564, "step": 3673 }, { "epoch": 0.38, "grad_norm": 1.4518784080581326, "learning_rate": 7.188023624688989e-06, "loss": 0.7333, "step": 3674 }, { "epoch": 0.38, "grad_norm": 1.437781924756814, "learning_rate": 7.186536602294278e-06, "loss": 0.6216, "step": 3675 }, { "epoch": 0.38, "grad_norm": 1.3642921211303904, "learning_rate": 7.185049340732863e-06, "loss": 0.7076, "step": 3676 }, { "epoch": 0.38, "grad_norm": 1.396930260631506, "learning_rate": 7.183561840167423e-06, "loss": 0.5231, "step": 3677 }, { "epoch": 0.38, "grad_norm": 4.890490393568402, "learning_rate": 7.182074100760663e-06, "loss": 0.6598, "step": 3678 }, { "epoch": 0.38, "grad_norm": 1.3524244880119878, "learning_rate": 7.180586122675316e-06, "loss": 0.6625, "step": 3679 }, { "epoch": 0.38, "grad_norm": 1.2527441436182982, "learning_rate": 7.179097906074136e-06, "loss": 0.6356, "step": 3680 }, { "epoch": 0.38, "grad_norm": 1.5192643839164306, "learning_rate": 7.17760945111991e-06, "loss": 0.6231, "step": 3681 }, { "epoch": 0.38, "grad_norm": 1.2308694807435991, "learning_rate": 7.1761207579754444e-06, "loss": 0.6627, "step": 3682 }, { "epoch": 0.38, "grad_norm": 1.3010548994576177, "learning_rate": 7.174631826803577e-06, "loss": 0.6965, "step": 3683 }, { "epoch": 0.38, "grad_norm": 1.6265092785812838, "learning_rate": 7.173142657767167e-06, "loss": 0.6445, "step": 3684 }, { "epoch": 0.38, "grad_norm": 1.335793283007943, "learning_rate": 7.171653251029105e-06, "loss": 0.6684, "step": 3685 }, { "epoch": 0.38, "grad_norm": 1.606570478548884, "learning_rate": 7.170163606752303e-06, "loss": 0.7558, "step": 3686 }, { "epoch": 0.38, "grad_norm": 2.020568216599083, "learning_rate": 7.1686737250997e-06, "loss": 0.5823, "step": 3687 }, { "epoch": 0.38, "grad_norm": 1.5442609857363219, "learning_rate": 7.167183606234261e-06, "loss": 0.7344, "step": 3688 }, { "epoch": 0.38, "grad_norm": 1.6790490381783192, "learning_rate": 7.1656932503189805e-06, "loss": 0.7099, "step": 3689 }, { "epoch": 0.38, "grad_norm": 1.4770328967039195, "learning_rate": 7.164202657516872e-06, "loss": 0.6624, "step": 3690 }, { "epoch": 0.38, "grad_norm": 1.471847750385534, "learning_rate": 7.162711827990983e-06, "loss": 0.7497, "step": 3691 }, { "epoch": 0.38, "grad_norm": 1.4907177046988205, "learning_rate": 7.161220761904378e-06, "loss": 0.7353, "step": 3692 }, { "epoch": 0.38, "grad_norm": 1.4666236558884056, "learning_rate": 7.159729459420157e-06, "loss": 0.6049, "step": 3693 }, { "epoch": 0.38, "grad_norm": 1.4414838978123647, "learning_rate": 7.158237920701436e-06, "loss": 0.5911, "step": 3694 }, { "epoch": 0.38, "grad_norm": 1.4911749608659453, "learning_rate": 7.156746145911366e-06, "loss": 0.6588, "step": 3695 }, { "epoch": 0.38, "grad_norm": 1.3648827697653696, "learning_rate": 7.155254135213117e-06, "loss": 0.5997, "step": 3696 }, { "epoch": 0.38, "grad_norm": 1.5965026174424095, "learning_rate": 7.1537618887698876e-06, "loss": 0.6645, "step": 3697 }, { "epoch": 0.38, "grad_norm": 1.513857915385254, "learning_rate": 7.152269406744904e-06, "loss": 0.6809, "step": 3698 }, { "epoch": 0.38, "grad_norm": 1.8166523925118174, "learning_rate": 7.150776689301414e-06, "loss": 0.755, "step": 3699 }, { "epoch": 0.38, "grad_norm": 1.5256864541397936, "learning_rate": 7.149283736602696e-06, "loss": 0.6455, "step": 3700 }, { "epoch": 0.38, "grad_norm": 1.490759555421278, "learning_rate": 7.147790548812047e-06, "loss": 0.7246, "step": 3701 }, { "epoch": 0.38, "grad_norm": 1.451473268024587, "learning_rate": 7.146297126092798e-06, "loss": 0.6948, "step": 3702 }, { "epoch": 0.38, "grad_norm": 1.4597817003731635, "learning_rate": 7.144803468608299e-06, "loss": 0.7287, "step": 3703 }, { "epoch": 0.38, "grad_norm": 1.4739529588437255, "learning_rate": 7.143309576521931e-06, "loss": 0.6588, "step": 3704 }, { "epoch": 0.38, "grad_norm": 1.4465878591815564, "learning_rate": 7.141815449997098e-06, "loss": 0.8178, "step": 3705 }, { "epoch": 0.38, "grad_norm": 1.403354595722698, "learning_rate": 7.140321089197229e-06, "loss": 0.6404, "step": 3706 }, { "epoch": 0.38, "grad_norm": 1.5884788505961915, "learning_rate": 7.1388264942857795e-06, "loss": 0.6827, "step": 3707 }, { "epoch": 0.38, "grad_norm": 1.6024872752742698, "learning_rate": 7.137331665426231e-06, "loss": 0.6971, "step": 3708 }, { "epoch": 0.38, "grad_norm": 1.4013439492332622, "learning_rate": 7.135836602782088e-06, "loss": 0.6454, "step": 3709 }, { "epoch": 0.38, "grad_norm": 1.389845558369966, "learning_rate": 7.134341306516885e-06, "loss": 0.7218, "step": 3710 }, { "epoch": 0.38, "grad_norm": 1.4658714675729316, "learning_rate": 7.1328457767941795e-06, "loss": 0.6498, "step": 3711 }, { "epoch": 0.38, "grad_norm": 1.3591640251709267, "learning_rate": 7.131350013777555e-06, "loss": 0.6113, "step": 3712 }, { "epoch": 0.38, "grad_norm": 1.3844184893359648, "learning_rate": 7.12985401763062e-06, "loss": 0.6527, "step": 3713 }, { "epoch": 0.38, "grad_norm": 1.501247154071339, "learning_rate": 7.128357788517009e-06, "loss": 0.6306, "step": 3714 }, { "epoch": 0.38, "grad_norm": 1.387549159994551, "learning_rate": 7.126861326600382e-06, "loss": 0.5406, "step": 3715 }, { "epoch": 0.38, "grad_norm": 1.4974232805011596, "learning_rate": 7.125364632044423e-06, "loss": 0.6478, "step": 3716 }, { "epoch": 0.38, "grad_norm": 1.7811036713007375, "learning_rate": 7.123867705012843e-06, "loss": 0.6804, "step": 3717 }, { "epoch": 0.38, "grad_norm": 1.4031041234868724, "learning_rate": 7.12237054566938e-06, "loss": 0.6795, "step": 3718 }, { "epoch": 0.38, "grad_norm": 1.2535661809805136, "learning_rate": 7.120873154177793e-06, "loss": 0.5526, "step": 3719 }, { "epoch": 0.38, "grad_norm": 1.5391901531333352, "learning_rate": 7.119375530701872e-06, "loss": 0.6115, "step": 3720 }, { "epoch": 0.38, "grad_norm": 1.396023149945599, "learning_rate": 7.117877675405427e-06, "loss": 0.6092, "step": 3721 }, { "epoch": 0.38, "grad_norm": 1.5406424736333968, "learning_rate": 7.1163795884522955e-06, "loss": 0.7535, "step": 3722 }, { "epoch": 0.38, "grad_norm": 1.44795824897894, "learning_rate": 7.114881270006342e-06, "loss": 0.6123, "step": 3723 }, { "epoch": 0.38, "grad_norm": 1.3883041236382387, "learning_rate": 7.113382720231455e-06, "loss": 0.7501, "step": 3724 }, { "epoch": 0.38, "grad_norm": 1.5369657652866215, "learning_rate": 7.111883939291546e-06, "loss": 0.638, "step": 3725 }, { "epoch": 0.38, "grad_norm": 1.664965245038734, "learning_rate": 7.110384927350556e-06, "loss": 0.6131, "step": 3726 }, { "epoch": 0.38, "grad_norm": 1.4666660318729654, "learning_rate": 7.1088856845724486e-06, "loss": 0.6959, "step": 3727 }, { "epoch": 0.38, "grad_norm": 1.505461909829233, "learning_rate": 7.107386211121215e-06, "loss": 0.6225, "step": 3728 }, { "epoch": 0.38, "grad_norm": 1.4116214827781135, "learning_rate": 7.105886507160865e-06, "loss": 0.6195, "step": 3729 }, { "epoch": 0.38, "grad_norm": 1.3636432465596195, "learning_rate": 7.1043865728554416e-06, "loss": 0.6354, "step": 3730 }, { "epoch": 0.38, "grad_norm": 1.3355892048402145, "learning_rate": 7.102886408369012e-06, "loss": 0.6166, "step": 3731 }, { "epoch": 0.38, "grad_norm": 2.57910577544432, "learning_rate": 7.101386013865663e-06, "loss": 0.6426, "step": 3732 }, { "epoch": 0.38, "grad_norm": 2.0247385049090467, "learning_rate": 7.099885389509512e-06, "loss": 0.6501, "step": 3733 }, { "epoch": 0.38, "grad_norm": 1.4465220926498947, "learning_rate": 7.098384535464698e-06, "loss": 0.6322, "step": 3734 }, { "epoch": 0.38, "grad_norm": 1.4224373592106987, "learning_rate": 7.09688345189539e-06, "loss": 0.6326, "step": 3735 }, { "epoch": 0.38, "grad_norm": 1.314117410385963, "learning_rate": 7.095382138965775e-06, "loss": 0.676, "step": 3736 }, { "epoch": 0.38, "grad_norm": 1.433395039559413, "learning_rate": 7.093880596840069e-06, "loss": 0.6105, "step": 3737 }, { "epoch": 0.38, "grad_norm": 2.638105867222974, "learning_rate": 7.092378825682517e-06, "loss": 0.6652, "step": 3738 }, { "epoch": 0.38, "grad_norm": 1.4387588294962808, "learning_rate": 7.09087682565738e-06, "loss": 0.6588, "step": 3739 }, { "epoch": 0.38, "grad_norm": 1.5269287192747016, "learning_rate": 7.089374596928953e-06, "loss": 0.6943, "step": 3740 }, { "epoch": 0.38, "grad_norm": 1.5448072889984767, "learning_rate": 7.087872139661549e-06, "loss": 0.684, "step": 3741 }, { "epoch": 0.38, "grad_norm": 1.4650812889649476, "learning_rate": 7.086369454019513e-06, "loss": 0.5613, "step": 3742 }, { "epoch": 0.38, "grad_norm": 1.443180097446927, "learning_rate": 7.084866540167207e-06, "loss": 0.6818, "step": 3743 }, { "epoch": 0.38, "grad_norm": 3.095541392503257, "learning_rate": 7.083363398269022e-06, "loss": 0.6685, "step": 3744 }, { "epoch": 0.38, "grad_norm": 1.4042583960731863, "learning_rate": 7.081860028489378e-06, "loss": 0.7022, "step": 3745 }, { "epoch": 0.38, "grad_norm": 3.8697796108819347, "learning_rate": 7.080356430992712e-06, "loss": 0.7316, "step": 3746 }, { "epoch": 0.38, "grad_norm": 1.2601867565560425, "learning_rate": 7.078852605943491e-06, "loss": 0.5998, "step": 3747 }, { "epoch": 0.38, "grad_norm": 1.3636135872221709, "learning_rate": 7.077348553506207e-06, "loss": 0.7571, "step": 3748 }, { "epoch": 0.38, "grad_norm": 1.3944830434016617, "learning_rate": 7.0758442738453745e-06, "loss": 0.7049, "step": 3749 }, { "epoch": 0.38, "grad_norm": 1.3694622269601093, "learning_rate": 7.074339767125533e-06, "loss": 0.6927, "step": 3750 }, { "epoch": 0.38, "grad_norm": 1.2958074797417933, "learning_rate": 7.0728350335112485e-06, "loss": 0.5956, "step": 3751 }, { "epoch": 0.38, "grad_norm": 1.337813019002157, "learning_rate": 7.071330073167112e-06, "loss": 0.5683, "step": 3752 }, { "epoch": 0.38, "grad_norm": 1.3957351313490824, "learning_rate": 7.069824886257736e-06, "loss": 0.695, "step": 3753 }, { "epoch": 0.38, "grad_norm": 1.526968853132866, "learning_rate": 7.068319472947763e-06, "loss": 0.625, "step": 3754 }, { "epoch": 0.38, "grad_norm": 1.4137389274005474, "learning_rate": 7.066813833401856e-06, "loss": 0.7543, "step": 3755 }, { "epoch": 0.38, "grad_norm": 1.4023369864808706, "learning_rate": 7.065307967784704e-06, "loss": 0.7221, "step": 3756 }, { "epoch": 0.38, "grad_norm": 1.437385209190125, "learning_rate": 7.0638018762610205e-06, "loss": 0.5441, "step": 3757 }, { "epoch": 0.38, "grad_norm": 1.5432099658411356, "learning_rate": 7.062295558995544e-06, "loss": 0.5913, "step": 3758 }, { "epoch": 0.38, "grad_norm": 1.6206848476832139, "learning_rate": 7.06078901615304e-06, "loss": 0.6, "step": 3759 }, { "epoch": 0.38, "grad_norm": 1.782252166560553, "learning_rate": 7.059282247898294e-06, "loss": 0.7814, "step": 3760 }, { "epoch": 0.38, "grad_norm": 1.4758139854495256, "learning_rate": 7.05777525439612e-06, "loss": 0.6728, "step": 3761 }, { "epoch": 0.38, "grad_norm": 1.5373180611925403, "learning_rate": 7.056268035811353e-06, "loss": 0.6291, "step": 3762 }, { "epoch": 0.38, "grad_norm": 1.4302135073271853, "learning_rate": 7.054760592308858e-06, "loss": 0.6451, "step": 3763 }, { "epoch": 0.38, "grad_norm": 1.3030123368856228, "learning_rate": 7.05325292405352e-06, "loss": 0.7096, "step": 3764 }, { "epoch": 0.38, "grad_norm": 1.2923791958892392, "learning_rate": 7.05174503121025e-06, "loss": 0.7465, "step": 3765 }, { "epoch": 0.38, "grad_norm": 1.363992802242298, "learning_rate": 7.050236913943984e-06, "loss": 0.5966, "step": 3766 }, { "epoch": 0.38, "grad_norm": 1.450901030099206, "learning_rate": 7.048728572419681e-06, "loss": 0.6904, "step": 3767 }, { "epoch": 0.38, "grad_norm": 1.3353992556954144, "learning_rate": 7.0472200068023265e-06, "loss": 0.7185, "step": 3768 }, { "epoch": 0.38, "grad_norm": 1.4190142278587778, "learning_rate": 7.04571121725693e-06, "loss": 0.7461, "step": 3769 }, { "epoch": 0.38, "grad_norm": 1.5307717158201934, "learning_rate": 7.044202203948524e-06, "loss": 0.7611, "step": 3770 }, { "epoch": 0.39, "grad_norm": 1.6149715794789838, "learning_rate": 7.04269296704217e-06, "loss": 0.6683, "step": 3771 }, { "epoch": 0.39, "grad_norm": 1.5159627616240967, "learning_rate": 7.041183506702946e-06, "loss": 0.6631, "step": 3772 }, { "epoch": 0.39, "grad_norm": 1.344717286921889, "learning_rate": 7.039673823095964e-06, "loss": 0.7002, "step": 3773 }, { "epoch": 0.39, "grad_norm": 1.3326290190888355, "learning_rate": 7.03816391638635e-06, "loss": 0.6236, "step": 3774 }, { "epoch": 0.39, "grad_norm": 1.4614855395842437, "learning_rate": 7.036653786739264e-06, "loss": 0.5985, "step": 3775 }, { "epoch": 0.39, "grad_norm": 1.2359549300557116, "learning_rate": 7.035143434319884e-06, "loss": 0.6559, "step": 3776 }, { "epoch": 0.39, "grad_norm": 1.3247642974663765, "learning_rate": 7.033632859293417e-06, "loss": 0.571, "step": 3777 }, { "epoch": 0.39, "grad_norm": 1.4896607577730097, "learning_rate": 7.032122061825091e-06, "loss": 0.6896, "step": 3778 }, { "epoch": 0.39, "grad_norm": 1.3523471733539085, "learning_rate": 7.0306110420801565e-06, "loss": 0.6515, "step": 3779 }, { "epoch": 0.39, "grad_norm": 1.4590261935564581, "learning_rate": 7.029099800223896e-06, "loss": 0.6924, "step": 3780 }, { "epoch": 0.39, "grad_norm": 1.7281943410756881, "learning_rate": 7.0275883364216076e-06, "loss": 0.6911, "step": 3781 }, { "epoch": 0.39, "grad_norm": 1.2854078134451732, "learning_rate": 7.0260766508386195e-06, "loss": 0.59, "step": 3782 }, { "epoch": 0.39, "grad_norm": 1.4698799106196772, "learning_rate": 7.024564743640281e-06, "loss": 0.8097, "step": 3783 }, { "epoch": 0.39, "grad_norm": 1.487941520768955, "learning_rate": 7.023052614991968e-06, "loss": 0.6662, "step": 3784 }, { "epoch": 0.39, "grad_norm": 1.589781626452256, "learning_rate": 7.021540265059079e-06, "loss": 0.7258, "step": 3785 }, { "epoch": 0.39, "grad_norm": 1.5235829986416205, "learning_rate": 7.020027694007037e-06, "loss": 0.7872, "step": 3786 }, { "epoch": 0.39, "grad_norm": 1.2565464203555732, "learning_rate": 7.01851490200129e-06, "loss": 0.6571, "step": 3787 }, { "epoch": 0.39, "grad_norm": 1.3292061845924086, "learning_rate": 7.0170018892073065e-06, "loss": 0.6929, "step": 3788 }, { "epoch": 0.39, "grad_norm": 1.3088212194012179, "learning_rate": 7.015488655790587e-06, "loss": 0.632, "step": 3789 }, { "epoch": 0.39, "grad_norm": 1.662842762793231, "learning_rate": 7.0139752019166474e-06, "loss": 0.5897, "step": 3790 }, { "epoch": 0.39, "grad_norm": 4.1701588940953105, "learning_rate": 7.012461527751034e-06, "loss": 0.6134, "step": 3791 }, { "epoch": 0.39, "grad_norm": 1.606677294318204, "learning_rate": 7.010947633459313e-06, "loss": 0.7534, "step": 3792 }, { "epoch": 0.39, "grad_norm": 1.5835366691957729, "learning_rate": 7.009433519207075e-06, "loss": 0.6892, "step": 3793 }, { "epoch": 0.39, "grad_norm": 1.4468831269408555, "learning_rate": 7.007919185159942e-06, "loss": 0.7332, "step": 3794 }, { "epoch": 0.39, "grad_norm": 1.5525150538133625, "learning_rate": 7.006404631483549e-06, "loss": 0.7236, "step": 3795 }, { "epoch": 0.39, "grad_norm": 1.4134628895477397, "learning_rate": 7.004889858343561e-06, "loss": 0.6794, "step": 3796 }, { "epoch": 0.39, "grad_norm": 1.4606241373902293, "learning_rate": 7.0033748659056675e-06, "loss": 0.5609, "step": 3797 }, { "epoch": 0.39, "grad_norm": 1.8893878000597173, "learning_rate": 7.001859654335579e-06, "loss": 0.6933, "step": 3798 }, { "epoch": 0.39, "grad_norm": 1.545844240139801, "learning_rate": 7.000344223799035e-06, "loss": 0.7423, "step": 3799 }, { "epoch": 0.39, "grad_norm": 1.4541533136318734, "learning_rate": 6.998828574461791e-06, "loss": 0.6812, "step": 3800 }, { "epoch": 0.39, "grad_norm": 1.6674203209807366, "learning_rate": 6.997312706489634e-06, "loss": 0.7303, "step": 3801 }, { "epoch": 0.39, "grad_norm": 1.402478311892567, "learning_rate": 6.9957966200483715e-06, "loss": 0.5823, "step": 3802 }, { "epoch": 0.39, "grad_norm": 1.5995295236152118, "learning_rate": 6.994280315303835e-06, "loss": 0.7496, "step": 3803 }, { "epoch": 0.39, "grad_norm": 1.5573975467733145, "learning_rate": 6.99276379242188e-06, "loss": 0.7362, "step": 3804 }, { "epoch": 0.39, "grad_norm": 1.4636501663598556, "learning_rate": 6.991247051568387e-06, "loss": 0.6654, "step": 3805 }, { "epoch": 0.39, "grad_norm": 1.6791520330654899, "learning_rate": 6.989730092909259e-06, "loss": 0.7574, "step": 3806 }, { "epoch": 0.39, "grad_norm": 1.402250187271637, "learning_rate": 6.988212916610422e-06, "loss": 0.7036, "step": 3807 }, { "epoch": 0.39, "grad_norm": 1.4709976382170957, "learning_rate": 6.98669552283783e-06, "loss": 0.6983, "step": 3808 }, { "epoch": 0.39, "grad_norm": 1.3431808639891414, "learning_rate": 6.985177911757455e-06, "loss": 0.5793, "step": 3809 }, { "epoch": 0.39, "grad_norm": 1.5771916706923474, "learning_rate": 6.983660083535295e-06, "loss": 0.6524, "step": 3810 }, { "epoch": 0.39, "grad_norm": 1.501278406529444, "learning_rate": 6.9821420383373765e-06, "loss": 0.6432, "step": 3811 }, { "epoch": 0.39, "grad_norm": 1.3960680608490117, "learning_rate": 6.980623776329741e-06, "loss": 0.6913, "step": 3812 }, { "epoch": 0.39, "grad_norm": 1.3739868030370157, "learning_rate": 6.979105297678462e-06, "loss": 0.6216, "step": 3813 }, { "epoch": 0.39, "grad_norm": 1.2563813043578194, "learning_rate": 6.97758660254963e-06, "loss": 0.6195, "step": 3814 }, { "epoch": 0.39, "grad_norm": 1.5147268146377073, "learning_rate": 6.976067691109365e-06, "loss": 0.653, "step": 3815 }, { "epoch": 0.39, "grad_norm": 1.4245880556084167, "learning_rate": 6.974548563523805e-06, "loss": 0.5535, "step": 3816 }, { "epoch": 0.39, "grad_norm": 1.2872484222690503, "learning_rate": 6.973029219959117e-06, "loss": 0.6453, "step": 3817 }, { "epoch": 0.39, "grad_norm": 1.4508353375339555, "learning_rate": 6.971509660581487e-06, "loss": 0.6278, "step": 3818 }, { "epoch": 0.39, "grad_norm": 1.491671420684238, "learning_rate": 6.969989885557128e-06, "loss": 0.7072, "step": 3819 }, { "epoch": 0.39, "grad_norm": 1.438460802371683, "learning_rate": 6.9684698950522755e-06, "loss": 0.6155, "step": 3820 }, { "epoch": 0.39, "grad_norm": 1.4849464966556476, "learning_rate": 6.966949689233187e-06, "loss": 0.6475, "step": 3821 }, { "epoch": 0.39, "grad_norm": 1.2360936403459912, "learning_rate": 6.965429268266147e-06, "loss": 0.5193, "step": 3822 }, { "epoch": 0.39, "grad_norm": 1.4925215824558362, "learning_rate": 6.96390863231746e-06, "loss": 0.6662, "step": 3823 }, { "epoch": 0.39, "grad_norm": 1.5866236408524368, "learning_rate": 6.962387781553455e-06, "loss": 0.7179, "step": 3824 }, { "epoch": 0.39, "grad_norm": 1.3265528333646555, "learning_rate": 6.960866716140486e-06, "loss": 0.7134, "step": 3825 }, { "epoch": 0.39, "grad_norm": 1.266065520914308, "learning_rate": 6.9593454362449286e-06, "loss": 0.5923, "step": 3826 }, { "epoch": 0.39, "grad_norm": 1.5896792817928254, "learning_rate": 6.957823942033186e-06, "loss": 0.6211, "step": 3827 }, { "epoch": 0.39, "grad_norm": 1.4597127837181685, "learning_rate": 6.956302233671676e-06, "loss": 0.7402, "step": 3828 }, { "epoch": 0.39, "grad_norm": 1.5202663489021802, "learning_rate": 6.95478031132685e-06, "loss": 0.6387, "step": 3829 }, { "epoch": 0.39, "grad_norm": 1.3415302247691538, "learning_rate": 6.953258175165175e-06, "loss": 0.678, "step": 3830 }, { "epoch": 0.39, "grad_norm": 1.5756054734882978, "learning_rate": 6.951735825353146e-06, "loss": 0.6876, "step": 3831 }, { "epoch": 0.39, "grad_norm": 1.4221810088844171, "learning_rate": 6.950213262057281e-06, "loss": 0.6369, "step": 3832 }, { "epoch": 0.39, "grad_norm": 1.4829432744164817, "learning_rate": 6.948690485444118e-06, "loss": 0.7317, "step": 3833 }, { "epoch": 0.39, "grad_norm": 1.340296082041533, "learning_rate": 6.947167495680224e-06, "loss": 0.6892, "step": 3834 }, { "epoch": 0.39, "grad_norm": 1.4001101608431132, "learning_rate": 6.945644292932181e-06, "loss": 0.5469, "step": 3835 }, { "epoch": 0.39, "grad_norm": 1.3436821415069793, "learning_rate": 6.944120877366605e-06, "loss": 0.6222, "step": 3836 }, { "epoch": 0.39, "grad_norm": 1.337716778730659, "learning_rate": 6.942597249150124e-06, "loss": 0.6197, "step": 3837 }, { "epoch": 0.39, "grad_norm": 1.6076103518551965, "learning_rate": 6.941073408449398e-06, "loss": 0.75, "step": 3838 }, { "epoch": 0.39, "grad_norm": 1.2692027119917835, "learning_rate": 6.9395493554311065e-06, "loss": 0.5827, "step": 3839 }, { "epoch": 0.39, "grad_norm": 1.4397952196428032, "learning_rate": 6.938025090261953e-06, "loss": 0.7218, "step": 3840 }, { "epoch": 0.39, "grad_norm": 1.4957553196583935, "learning_rate": 6.936500613108663e-06, "loss": 0.736, "step": 3841 }, { "epoch": 0.39, "grad_norm": 1.4533412860540427, "learning_rate": 6.934975924137987e-06, "loss": 0.6729, "step": 3842 }, { "epoch": 0.39, "grad_norm": 1.3189237982667423, "learning_rate": 6.933451023516697e-06, "loss": 0.6949, "step": 3843 }, { "epoch": 0.39, "grad_norm": 1.2847943694465749, "learning_rate": 6.93192591141159e-06, "loss": 0.521, "step": 3844 }, { "epoch": 0.39, "grad_norm": 1.8745321519029279, "learning_rate": 6.930400587989484e-06, "loss": 0.6482, "step": 3845 }, { "epoch": 0.39, "grad_norm": 1.551311371302876, "learning_rate": 6.928875053417221e-06, "loss": 0.6521, "step": 3846 }, { "epoch": 0.39, "grad_norm": 1.4678129664671256, "learning_rate": 6.927349307861668e-06, "loss": 0.5983, "step": 3847 }, { "epoch": 0.39, "grad_norm": 1.4042998049822168, "learning_rate": 6.925823351489713e-06, "loss": 0.7635, "step": 3848 }, { "epoch": 0.39, "grad_norm": 1.4334547993065336, "learning_rate": 6.924297184468265e-06, "loss": 0.5807, "step": 3849 }, { "epoch": 0.39, "grad_norm": 1.322081035613864, "learning_rate": 6.922770806964263e-06, "loss": 0.5675, "step": 3850 }, { "epoch": 0.39, "grad_norm": 1.4935769945777213, "learning_rate": 6.921244219144659e-06, "loss": 0.6705, "step": 3851 }, { "epoch": 0.39, "grad_norm": 1.5168873107689702, "learning_rate": 6.919717421176437e-06, "loss": 0.722, "step": 3852 }, { "epoch": 0.39, "grad_norm": 1.4469791647578862, "learning_rate": 6.9181904132266e-06, "loss": 0.669, "step": 3853 }, { "epoch": 0.39, "grad_norm": 1.6404695187980047, "learning_rate": 6.916663195462174e-06, "loss": 0.6546, "step": 3854 }, { "epoch": 0.39, "grad_norm": 1.543580568220848, "learning_rate": 6.91513576805021e-06, "loss": 0.7368, "step": 3855 }, { "epoch": 0.39, "grad_norm": 1.446271440423492, "learning_rate": 6.913608131157776e-06, "loss": 0.5927, "step": 3856 }, { "epoch": 0.39, "grad_norm": 1.338987933470376, "learning_rate": 6.912080284951973e-06, "loss": 0.5799, "step": 3857 }, { "epoch": 0.39, "grad_norm": 1.4117734826582886, "learning_rate": 6.910552229599916e-06, "loss": 0.6548, "step": 3858 }, { "epoch": 0.39, "grad_norm": 1.7527563375005604, "learning_rate": 6.909023965268746e-06, "loss": 0.6331, "step": 3859 }, { "epoch": 0.39, "grad_norm": 1.3536006221422947, "learning_rate": 6.907495492125626e-06, "loss": 0.6126, "step": 3860 }, { "epoch": 0.39, "grad_norm": 1.3878674267840836, "learning_rate": 6.905966810337744e-06, "loss": 0.7706, "step": 3861 }, { "epoch": 0.39, "grad_norm": 1.391548372338459, "learning_rate": 6.904437920072311e-06, "loss": 0.5793, "step": 3862 }, { "epoch": 0.39, "grad_norm": 1.4085387494739612, "learning_rate": 6.902908821496557e-06, "loss": 0.6265, "step": 3863 }, { "epoch": 0.39, "grad_norm": 1.4786843970515213, "learning_rate": 6.901379514777739e-06, "loss": 0.7139, "step": 3864 }, { "epoch": 0.39, "grad_norm": 1.3823723253947375, "learning_rate": 6.8998500000831326e-06, "loss": 0.5982, "step": 3865 }, { "epoch": 0.39, "grad_norm": 1.3432475360807938, "learning_rate": 6.898320277580039e-06, "loss": 0.6747, "step": 3866 }, { "epoch": 0.39, "grad_norm": 1.490435330064673, "learning_rate": 6.8967903474357845e-06, "loss": 0.6443, "step": 3867 }, { "epoch": 0.39, "grad_norm": 1.6246825214289367, "learning_rate": 6.895260209817711e-06, "loss": 0.6546, "step": 3868 }, { "epoch": 0.4, "grad_norm": 1.2507209155493546, "learning_rate": 6.893729864893192e-06, "loss": 0.6432, "step": 3869 }, { "epoch": 0.4, "grad_norm": 1.4862291104348944, "learning_rate": 6.892199312829613e-06, "loss": 0.669, "step": 3870 }, { "epoch": 0.4, "grad_norm": 1.5607343225463437, "learning_rate": 6.890668553794393e-06, "loss": 0.6571, "step": 3871 }, { "epoch": 0.4, "grad_norm": 1.352118929964382, "learning_rate": 6.889137587954967e-06, "loss": 0.6544, "step": 3872 }, { "epoch": 0.4, "grad_norm": 1.4054777014756772, "learning_rate": 6.887606415478794e-06, "loss": 0.6397, "step": 3873 }, { "epoch": 0.4, "grad_norm": 1.5077073121593871, "learning_rate": 6.886075036533358e-06, "loss": 0.6376, "step": 3874 }, { "epoch": 0.4, "grad_norm": 1.4360497949502282, "learning_rate": 6.88454345128616e-06, "loss": 0.6246, "step": 3875 }, { "epoch": 0.4, "grad_norm": 1.6498459414046283, "learning_rate": 6.88301165990473e-06, "loss": 0.7198, "step": 3876 }, { "epoch": 0.4, "grad_norm": 1.3118084477641954, "learning_rate": 6.881479662556616e-06, "loss": 0.6395, "step": 3877 }, { "epoch": 0.4, "grad_norm": 1.1871340279102676, "learning_rate": 6.8799474594093925e-06, "loss": 0.5053, "step": 3878 }, { "epoch": 0.4, "grad_norm": 1.3407155890710996, "learning_rate": 6.878415050630651e-06, "loss": 0.645, "step": 3879 }, { "epoch": 0.4, "grad_norm": 1.6795265918479718, "learning_rate": 6.876882436388013e-06, "loss": 0.6769, "step": 3880 }, { "epoch": 0.4, "grad_norm": 1.6323795670726753, "learning_rate": 6.875349616849113e-06, "loss": 0.7826, "step": 3881 }, { "epoch": 0.4, "grad_norm": 1.5471767792227158, "learning_rate": 6.873816592181617e-06, "loss": 0.6662, "step": 3882 }, { "epoch": 0.4, "grad_norm": 1.4375215619212174, "learning_rate": 6.872283362553209e-06, "loss": 0.641, "step": 3883 }, { "epoch": 0.4, "grad_norm": 1.372797143502432, "learning_rate": 6.8707499281315925e-06, "loss": 0.5519, "step": 3884 }, { "epoch": 0.4, "grad_norm": 1.456144694541805, "learning_rate": 6.869216289084503e-06, "loss": 0.7497, "step": 3885 }, { "epoch": 0.4, "grad_norm": 1.4678300312734947, "learning_rate": 6.867682445579687e-06, "loss": 0.74, "step": 3886 }, { "epoch": 0.4, "grad_norm": 1.5295113036053953, "learning_rate": 6.866148397784922e-06, "loss": 0.6764, "step": 3887 }, { "epoch": 0.4, "grad_norm": 2.0094624610992984, "learning_rate": 6.8646141458680015e-06, "loss": 0.636, "step": 3888 }, { "epoch": 0.4, "grad_norm": 1.3265830967931327, "learning_rate": 6.863079689996747e-06, "loss": 0.5968, "step": 3889 }, { "epoch": 0.4, "grad_norm": 1.3854378951039223, "learning_rate": 6.861545030338998e-06, "loss": 0.6501, "step": 3890 }, { "epoch": 0.4, "grad_norm": 1.491405224882651, "learning_rate": 6.860010167062618e-06, "loss": 0.6938, "step": 3891 }, { "epoch": 0.4, "grad_norm": 1.474731026543773, "learning_rate": 6.858475100335496e-06, "loss": 0.716, "step": 3892 }, { "epoch": 0.4, "grad_norm": 1.453070159764045, "learning_rate": 6.8569398303255345e-06, "loss": 0.658, "step": 3893 }, { "epoch": 0.4, "grad_norm": 1.4415451532723322, "learning_rate": 6.855404357200666e-06, "loss": 0.6782, "step": 3894 }, { "epoch": 0.4, "grad_norm": 1.41908584608301, "learning_rate": 6.853868681128844e-06, "loss": 0.6883, "step": 3895 }, { "epoch": 0.4, "grad_norm": 1.6389232136736933, "learning_rate": 6.852332802278043e-06, "loss": 0.6607, "step": 3896 }, { "epoch": 0.4, "grad_norm": 1.59987270381699, "learning_rate": 6.850796720816257e-06, "loss": 0.716, "step": 3897 }, { "epoch": 0.4, "grad_norm": 1.2497095258443183, "learning_rate": 6.849260436911508e-06, "loss": 0.5921, "step": 3898 }, { "epoch": 0.4, "grad_norm": 1.4339251235136123, "learning_rate": 6.847723950731837e-06, "loss": 0.7497, "step": 3899 }, { "epoch": 0.4, "grad_norm": 1.3134037792619382, "learning_rate": 6.846187262445305e-06, "loss": 0.6477, "step": 3900 }, { "epoch": 0.4, "grad_norm": 1.350012546416609, "learning_rate": 6.844650372219999e-06, "loss": 0.6961, "step": 3901 }, { "epoch": 0.4, "grad_norm": 1.5412768653780091, "learning_rate": 6.8431132802240255e-06, "loss": 0.7082, "step": 3902 }, { "epoch": 0.4, "grad_norm": 1.3971425138129112, "learning_rate": 6.8415759866255124e-06, "loss": 0.6007, "step": 3903 }, { "epoch": 0.4, "grad_norm": 1.3859950335749054, "learning_rate": 6.840038491592617e-06, "loss": 0.7109, "step": 3904 }, { "epoch": 0.4, "grad_norm": 1.7107781129422737, "learning_rate": 6.838500795293506e-06, "loss": 0.676, "step": 3905 }, { "epoch": 0.4, "grad_norm": 1.3209622653309963, "learning_rate": 6.83696289789638e-06, "loss": 0.5533, "step": 3906 }, { "epoch": 0.4, "grad_norm": 1.3108505111410933, "learning_rate": 6.835424799569453e-06, "loss": 0.6223, "step": 3907 }, { "epoch": 0.4, "grad_norm": 1.481438333540888, "learning_rate": 6.833886500480967e-06, "loss": 0.7257, "step": 3908 }, { "epoch": 0.4, "grad_norm": 1.6871593740185225, "learning_rate": 6.832348000799183e-06, "loss": 0.6035, "step": 3909 }, { "epoch": 0.4, "grad_norm": 1.3563651575704643, "learning_rate": 6.830809300692383e-06, "loss": 0.657, "step": 3910 }, { "epoch": 0.4, "grad_norm": 1.4482136356104804, "learning_rate": 6.8292704003288734e-06, "loss": 0.6634, "step": 3911 }, { "epoch": 0.4, "grad_norm": 1.3976296122336231, "learning_rate": 6.827731299876981e-06, "loss": 0.6414, "step": 3912 }, { "epoch": 0.4, "grad_norm": 1.609922507879148, "learning_rate": 6.826191999505056e-06, "loss": 0.746, "step": 3913 }, { "epoch": 0.4, "grad_norm": 1.4038415576215566, "learning_rate": 6.82465249938147e-06, "loss": 0.5957, "step": 3914 }, { "epoch": 0.4, "grad_norm": 1.51497141970311, "learning_rate": 6.823112799674614e-06, "loss": 0.6149, "step": 3915 }, { "epoch": 0.4, "grad_norm": 1.3999921541512825, "learning_rate": 6.821572900552902e-06, "loss": 0.6721, "step": 3916 }, { "epoch": 0.4, "grad_norm": 1.407804792381667, "learning_rate": 6.820032802184773e-06, "loss": 0.7059, "step": 3917 }, { "epoch": 0.4, "grad_norm": 1.3742918088734086, "learning_rate": 6.8184925047386855e-06, "loss": 0.7081, "step": 3918 }, { "epoch": 0.4, "grad_norm": 1.9277571367962831, "learning_rate": 6.816952008383115e-06, "loss": 0.6645, "step": 3919 }, { "epoch": 0.4, "grad_norm": 1.4708686882977482, "learning_rate": 6.815411313286569e-06, "loss": 0.5947, "step": 3920 }, { "epoch": 0.4, "grad_norm": 1.4044551709061548, "learning_rate": 6.813870419617568e-06, "loss": 0.7287, "step": 3921 }, { "epoch": 0.4, "grad_norm": 1.3323092566240204, "learning_rate": 6.812329327544658e-06, "loss": 0.6093, "step": 3922 }, { "epoch": 0.4, "grad_norm": 1.3434636194901406, "learning_rate": 6.810788037236406e-06, "loss": 0.6906, "step": 3923 }, { "epoch": 0.4, "grad_norm": 1.5552983936307005, "learning_rate": 6.8092465488614e-06, "loss": 0.6999, "step": 3924 }, { "epoch": 0.4, "grad_norm": 1.2352061267183976, "learning_rate": 6.807704862588253e-06, "loss": 0.7315, "step": 3925 }, { "epoch": 0.4, "grad_norm": 1.4581501445096905, "learning_rate": 6.806162978585592e-06, "loss": 0.6485, "step": 3926 }, { "epoch": 0.4, "grad_norm": 1.3602969988236044, "learning_rate": 6.804620897022076e-06, "loss": 0.6476, "step": 3927 }, { "epoch": 0.4, "grad_norm": 1.411843089632885, "learning_rate": 6.803078618066378e-06, "loss": 0.7662, "step": 3928 }, { "epoch": 0.4, "grad_norm": 1.4186196539870883, "learning_rate": 6.801536141887193e-06, "loss": 0.6572, "step": 3929 }, { "epoch": 0.4, "grad_norm": 1.5930008736129733, "learning_rate": 6.799993468653242e-06, "loss": 0.6969, "step": 3930 }, { "epoch": 0.4, "grad_norm": 1.5025012465550378, "learning_rate": 6.798450598533264e-06, "loss": 0.6092, "step": 3931 }, { "epoch": 0.4, "grad_norm": 1.4559786217499466, "learning_rate": 6.796907531696021e-06, "loss": 0.6216, "step": 3932 }, { "epoch": 0.4, "grad_norm": 1.2670854680477222, "learning_rate": 6.795364268310294e-06, "loss": 0.5288, "step": 3933 }, { "epoch": 0.4, "grad_norm": 1.5580580884874773, "learning_rate": 6.7938208085448905e-06, "loss": 0.6902, "step": 3934 }, { "epoch": 0.4, "grad_norm": 1.50242688395326, "learning_rate": 6.792277152568634e-06, "loss": 0.6367, "step": 3935 }, { "epoch": 0.4, "grad_norm": 1.4351606061670579, "learning_rate": 6.790733300550374e-06, "loss": 0.6722, "step": 3936 }, { "epoch": 0.4, "grad_norm": 1.391199855894573, "learning_rate": 6.789189252658976e-06, "loss": 0.6288, "step": 3937 }, { "epoch": 0.4, "grad_norm": 1.7069551959821383, "learning_rate": 6.787645009063335e-06, "loss": 0.7934, "step": 3938 }, { "epoch": 0.4, "grad_norm": 1.4129470530228219, "learning_rate": 6.7861005699323605e-06, "loss": 0.6767, "step": 3939 }, { "epoch": 0.4, "grad_norm": 1.6000959101570333, "learning_rate": 6.784555935434985e-06, "loss": 0.699, "step": 3940 }, { "epoch": 0.4, "grad_norm": 2.7658725983356196, "learning_rate": 6.783011105740161e-06, "loss": 0.728, "step": 3941 }, { "epoch": 0.4, "grad_norm": 1.6345240864639805, "learning_rate": 6.78146608101687e-06, "loss": 0.6747, "step": 3942 }, { "epoch": 0.4, "grad_norm": 1.601425515524452, "learning_rate": 6.779920861434104e-06, "loss": 0.7153, "step": 3943 }, { "epoch": 0.4, "grad_norm": 1.5277735090970321, "learning_rate": 6.7783754471608835e-06, "loss": 0.6599, "step": 3944 }, { "epoch": 0.4, "grad_norm": 1.325911988886263, "learning_rate": 6.776829838366248e-06, "loss": 0.6147, "step": 3945 }, { "epoch": 0.4, "grad_norm": 1.6744239942960595, "learning_rate": 6.77528403521926e-06, "loss": 0.7656, "step": 3946 }, { "epoch": 0.4, "grad_norm": 1.7495408625247595, "learning_rate": 6.773738037888999e-06, "loss": 0.7301, "step": 3947 }, { "epoch": 0.4, "grad_norm": 1.436804275670504, "learning_rate": 6.7721918465445715e-06, "loss": 0.6563, "step": 3948 }, { "epoch": 0.4, "grad_norm": 1.4323994195998584, "learning_rate": 6.770645461355099e-06, "loss": 0.6236, "step": 3949 }, { "epoch": 0.4, "grad_norm": 1.5431325360389265, "learning_rate": 6.76909888248973e-06, "loss": 0.7016, "step": 3950 }, { "epoch": 0.4, "grad_norm": 1.3900903532393287, "learning_rate": 6.767552110117631e-06, "loss": 0.6879, "step": 3951 }, { "epoch": 0.4, "grad_norm": 1.4743769244844216, "learning_rate": 6.76600514440799e-06, "loss": 0.6644, "step": 3952 }, { "epoch": 0.4, "grad_norm": 1.389901615175386, "learning_rate": 6.764457985530017e-06, "loss": 0.6475, "step": 3953 }, { "epoch": 0.4, "grad_norm": 1.2531223366700657, "learning_rate": 6.762910633652943e-06, "loss": 0.5912, "step": 3954 }, { "epoch": 0.4, "grad_norm": 1.3747391754962168, "learning_rate": 6.7613630889460165e-06, "loss": 0.6434, "step": 3955 }, { "epoch": 0.4, "grad_norm": 1.4252571877073115, "learning_rate": 6.759815351578516e-06, "loss": 0.619, "step": 3956 }, { "epoch": 0.4, "grad_norm": 1.340931100902377, "learning_rate": 6.7582674217197294e-06, "loss": 0.632, "step": 3957 }, { "epoch": 0.4, "grad_norm": 1.3243212389172143, "learning_rate": 6.756719299538976e-06, "loss": 0.6193, "step": 3958 }, { "epoch": 0.4, "grad_norm": 1.5109614703053067, "learning_rate": 6.75517098520559e-06, "loss": 0.6189, "step": 3959 }, { "epoch": 0.4, "grad_norm": 1.4025596228765635, "learning_rate": 6.7536224788889285e-06, "loss": 0.6609, "step": 3960 }, { "epoch": 0.4, "grad_norm": 1.4469307018641278, "learning_rate": 6.752073780758369e-06, "loss": 0.7654, "step": 3961 }, { "epoch": 0.4, "grad_norm": 1.3293275101104773, "learning_rate": 6.75052489098331e-06, "loss": 0.5992, "step": 3962 }, { "epoch": 0.4, "grad_norm": 1.3620258516210835, "learning_rate": 6.748975809733175e-06, "loss": 0.6363, "step": 3963 }, { "epoch": 0.4, "grad_norm": 1.4500764482350688, "learning_rate": 6.747426537177401e-06, "loss": 0.5902, "step": 3964 }, { "epoch": 0.4, "grad_norm": 1.376793459939033, "learning_rate": 6.745877073485449e-06, "loss": 0.6952, "step": 3965 }, { "epoch": 0.4, "grad_norm": 1.4649593818029207, "learning_rate": 6.744327418826806e-06, "loss": 0.7135, "step": 3966 }, { "epoch": 0.41, "grad_norm": 1.495025233586697, "learning_rate": 6.742777573370974e-06, "loss": 0.6974, "step": 3967 }, { "epoch": 0.41, "grad_norm": 1.2798050286185771, "learning_rate": 6.7412275372874756e-06, "loss": 0.5661, "step": 3968 }, { "epoch": 0.41, "grad_norm": 1.4872098934254045, "learning_rate": 6.739677310745856e-06, "loss": 0.6117, "step": 3969 }, { "epoch": 0.41, "grad_norm": 1.4407650929806908, "learning_rate": 6.738126893915685e-06, "loss": 0.7106, "step": 3970 }, { "epoch": 0.41, "grad_norm": 1.4007887247515238, "learning_rate": 6.7365762869665465e-06, "loss": 0.6031, "step": 3971 }, { "epoch": 0.41, "grad_norm": 1.3183651079498326, "learning_rate": 6.735025490068048e-06, "loss": 0.6269, "step": 3972 }, { "epoch": 0.41, "grad_norm": 1.6592385722706224, "learning_rate": 6.733474503389819e-06, "loss": 0.7626, "step": 3973 }, { "epoch": 0.41, "grad_norm": 1.280707548653041, "learning_rate": 6.7319233271015104e-06, "loss": 0.5407, "step": 3974 }, { "epoch": 0.41, "grad_norm": 1.5634368626628947, "learning_rate": 6.730371961372789e-06, "loss": 0.7264, "step": 3975 }, { "epoch": 0.41, "grad_norm": 1.3968447946110785, "learning_rate": 6.728820406373346e-06, "loss": 0.6698, "step": 3976 }, { "epoch": 0.41, "grad_norm": 1.4366777506756938, "learning_rate": 6.727268662272897e-06, "loss": 0.6479, "step": 3977 }, { "epoch": 0.41, "grad_norm": 1.2263055263310174, "learning_rate": 6.72571672924117e-06, "loss": 0.689, "step": 3978 }, { "epoch": 0.41, "grad_norm": 1.451798983323493, "learning_rate": 6.724164607447919e-06, "loss": 0.6951, "step": 3979 }, { "epoch": 0.41, "grad_norm": 1.3556071529126017, "learning_rate": 6.722612297062916e-06, "loss": 0.5359, "step": 3980 }, { "epoch": 0.41, "grad_norm": 1.4860946629408822, "learning_rate": 6.721059798255958e-06, "loss": 0.7109, "step": 3981 }, { "epoch": 0.41, "grad_norm": 1.575144007615824, "learning_rate": 6.719507111196857e-06, "loss": 0.6871, "step": 3982 }, { "epoch": 0.41, "grad_norm": 1.613158010466805, "learning_rate": 6.717954236055449e-06, "loss": 0.8596, "step": 3983 }, { "epoch": 0.41, "grad_norm": 1.5170440039455777, "learning_rate": 6.716401173001591e-06, "loss": 0.6606, "step": 3984 }, { "epoch": 0.41, "grad_norm": 1.3424709206369905, "learning_rate": 6.714847922205159e-06, "loss": 0.6829, "step": 3985 }, { "epoch": 0.41, "grad_norm": 1.5100845526310698, "learning_rate": 6.713294483836048e-06, "loss": 0.68, "step": 3986 }, { "epoch": 0.41, "grad_norm": 1.5212726324014427, "learning_rate": 6.711740858064177e-06, "loss": 0.6698, "step": 3987 }, { "epoch": 0.41, "grad_norm": 1.3719090117507573, "learning_rate": 6.710187045059486e-06, "loss": 0.783, "step": 3988 }, { "epoch": 0.41, "grad_norm": 1.9397156478703081, "learning_rate": 6.708633044991931e-06, "loss": 0.7586, "step": 3989 }, { "epoch": 0.41, "grad_norm": 1.6057920701133626, "learning_rate": 6.707078858031491e-06, "loss": 0.7234, "step": 3990 }, { "epoch": 0.41, "grad_norm": 1.6646545903625276, "learning_rate": 6.705524484348165e-06, "loss": 0.7159, "step": 3991 }, { "epoch": 0.41, "grad_norm": 1.3892667905620053, "learning_rate": 6.703969924111974e-06, "loss": 0.6855, "step": 3992 }, { "epoch": 0.41, "grad_norm": 1.5212730675037411, "learning_rate": 6.702415177492957e-06, "loss": 0.6782, "step": 3993 }, { "epoch": 0.41, "grad_norm": 1.5210578569599595, "learning_rate": 6.700860244661175e-06, "loss": 0.6732, "step": 3994 }, { "epoch": 0.41, "grad_norm": 1.4072713287086285, "learning_rate": 6.699305125786709e-06, "loss": 0.5717, "step": 3995 }, { "epoch": 0.41, "grad_norm": 1.3867738077064036, "learning_rate": 6.697749821039662e-06, "loss": 0.7181, "step": 3996 }, { "epoch": 0.41, "grad_norm": 1.4863781526119904, "learning_rate": 6.6961943305901515e-06, "loss": 0.6801, "step": 3997 }, { "epoch": 0.41, "grad_norm": 1.4689551114486352, "learning_rate": 6.694638654608324e-06, "loss": 0.5254, "step": 3998 }, { "epoch": 0.41, "grad_norm": 1.3999822165475815, "learning_rate": 6.693082793264338e-06, "loss": 0.5976, "step": 3999 }, { "epoch": 0.41, "grad_norm": 1.52458992132758, "learning_rate": 6.6915267467283785e-06, "loss": 0.6231, "step": 4000 }, { "epoch": 0.41, "grad_norm": 1.2278251840924328, "learning_rate": 6.689970515170646e-06, "loss": 0.5811, "step": 4001 }, { "epoch": 0.41, "grad_norm": 1.4049041688264128, "learning_rate": 6.688414098761365e-06, "loss": 0.6198, "step": 4002 }, { "epoch": 0.41, "grad_norm": 1.7446108573484054, "learning_rate": 6.68685749767078e-06, "loss": 0.7614, "step": 4003 }, { "epoch": 0.41, "grad_norm": 1.4297577607256018, "learning_rate": 6.68530071206915e-06, "loss": 0.6579, "step": 4004 }, { "epoch": 0.41, "grad_norm": 1.2866390366501392, "learning_rate": 6.683743742126765e-06, "loss": 0.6771, "step": 4005 }, { "epoch": 0.41, "grad_norm": 1.324815383963844, "learning_rate": 6.682186588013924e-06, "loss": 0.6936, "step": 4006 }, { "epoch": 0.41, "grad_norm": 1.3770882047876951, "learning_rate": 6.68062924990095e-06, "loss": 0.767, "step": 4007 }, { "epoch": 0.41, "grad_norm": 1.5106821186706385, "learning_rate": 6.6790717279581905e-06, "loss": 0.8006, "step": 4008 }, { "epoch": 0.41, "grad_norm": 1.335236309670909, "learning_rate": 6.677514022356007e-06, "loss": 0.6431, "step": 4009 }, { "epoch": 0.41, "grad_norm": 1.3570676753524185, "learning_rate": 6.675956133264787e-06, "loss": 0.5583, "step": 4010 }, { "epoch": 0.41, "grad_norm": 1.4176953611154337, "learning_rate": 6.674398060854931e-06, "loss": 0.6147, "step": 4011 }, { "epoch": 0.41, "grad_norm": 1.4231333587731443, "learning_rate": 6.672839805296867e-06, "loss": 0.7019, "step": 4012 }, { "epoch": 0.41, "grad_norm": 1.5120813051056907, "learning_rate": 6.671281366761036e-06, "loss": 0.6735, "step": 4013 }, { "epoch": 0.41, "grad_norm": 1.4287748822904416, "learning_rate": 6.669722745417903e-06, "loss": 0.7297, "step": 4014 }, { "epoch": 0.41, "grad_norm": 1.4737449506808555, "learning_rate": 6.668163941437953e-06, "loss": 0.5449, "step": 4015 }, { "epoch": 0.41, "grad_norm": 1.4988698074422462, "learning_rate": 6.666604954991691e-06, "loss": 0.747, "step": 4016 }, { "epoch": 0.41, "grad_norm": 1.4983790503579892, "learning_rate": 6.665045786249642e-06, "loss": 0.6925, "step": 4017 }, { "epoch": 0.41, "grad_norm": 1.5949303474162018, "learning_rate": 6.663486435382347e-06, "loss": 0.6024, "step": 4018 }, { "epoch": 0.41, "grad_norm": 1.2338657558662864, "learning_rate": 6.661926902560374e-06, "loss": 0.662, "step": 4019 }, { "epoch": 0.41, "grad_norm": 1.245935378857045, "learning_rate": 6.660367187954304e-06, "loss": 0.6757, "step": 4020 }, { "epoch": 0.41, "grad_norm": 1.702969291934631, "learning_rate": 6.6588072917347435e-06, "loss": 0.6607, "step": 4021 }, { "epoch": 0.41, "grad_norm": 1.8936872480133642, "learning_rate": 6.6572472140723145e-06, "loss": 0.7612, "step": 4022 }, { "epoch": 0.41, "grad_norm": 1.4240755477690232, "learning_rate": 6.655686955137661e-06, "loss": 0.719, "step": 4023 }, { "epoch": 0.41, "grad_norm": 1.4017275795481534, "learning_rate": 6.6541265151014475e-06, "loss": 0.6259, "step": 4024 }, { "epoch": 0.41, "grad_norm": 1.460462799449177, "learning_rate": 6.652565894134355e-06, "loss": 0.6904, "step": 4025 }, { "epoch": 0.41, "grad_norm": 1.4185353790770312, "learning_rate": 6.65100509240709e-06, "loss": 0.5741, "step": 4026 }, { "epoch": 0.41, "grad_norm": 1.2981711803631382, "learning_rate": 6.649444110090372e-06, "loss": 0.6175, "step": 4027 }, { "epoch": 0.41, "grad_norm": 1.4173099869838246, "learning_rate": 6.647882947354945e-06, "loss": 0.7066, "step": 4028 }, { "epoch": 0.41, "grad_norm": 1.9215489246596429, "learning_rate": 6.646321604371572e-06, "loss": 0.7406, "step": 4029 }, { "epoch": 0.41, "grad_norm": 1.4234444266629984, "learning_rate": 6.6447600813110346e-06, "loss": 0.6579, "step": 4030 }, { "epoch": 0.41, "grad_norm": 1.2868249890586203, "learning_rate": 6.643198378344136e-06, "loss": 0.574, "step": 4031 }, { "epoch": 0.41, "grad_norm": 1.4782325744385936, "learning_rate": 6.641636495641694e-06, "loss": 0.6463, "step": 4032 }, { "epoch": 0.41, "grad_norm": 1.531150340700836, "learning_rate": 6.640074433374554e-06, "loss": 0.7652, "step": 4033 }, { "epoch": 0.41, "grad_norm": 1.66838357410976, "learning_rate": 6.6385121917135735e-06, "loss": 0.6523, "step": 4034 }, { "epoch": 0.41, "grad_norm": 1.2774532008080717, "learning_rate": 6.636949770829633e-06, "loss": 0.6456, "step": 4035 }, { "epoch": 0.41, "grad_norm": 1.407240345600175, "learning_rate": 6.635387170893635e-06, "loss": 0.6812, "step": 4036 }, { "epoch": 0.41, "grad_norm": 1.4760253126255038, "learning_rate": 6.633824392076497e-06, "loss": 0.5476, "step": 4037 }, { "epoch": 0.41, "grad_norm": 1.4141190196921123, "learning_rate": 6.632261434549159e-06, "loss": 0.6623, "step": 4038 }, { "epoch": 0.41, "grad_norm": 1.3270533225972991, "learning_rate": 6.630698298482578e-06, "loss": 0.6419, "step": 4039 }, { "epoch": 0.41, "grad_norm": 1.463292849206302, "learning_rate": 6.629134984047736e-06, "loss": 0.6656, "step": 4040 }, { "epoch": 0.41, "grad_norm": 1.4414609312799862, "learning_rate": 6.627571491415625e-06, "loss": 0.6624, "step": 4041 }, { "epoch": 0.41, "grad_norm": 1.4438447739476534, "learning_rate": 6.6260078207572675e-06, "loss": 0.7125, "step": 4042 }, { "epoch": 0.41, "grad_norm": 1.4857779077123363, "learning_rate": 6.6244439722436985e-06, "loss": 0.7198, "step": 4043 }, { "epoch": 0.41, "grad_norm": 1.5663570231855908, "learning_rate": 6.622879946045972e-06, "loss": 0.7236, "step": 4044 }, { "epoch": 0.41, "grad_norm": 1.2310926414363763, "learning_rate": 6.6213157423351686e-06, "loss": 0.6058, "step": 4045 }, { "epoch": 0.41, "grad_norm": 1.4846759270025687, "learning_rate": 6.619751361282377e-06, "loss": 0.6375, "step": 4046 }, { "epoch": 0.41, "grad_norm": 1.5553967884139155, "learning_rate": 6.618186803058717e-06, "loss": 0.7282, "step": 4047 }, { "epoch": 0.41, "grad_norm": 1.3581543290091023, "learning_rate": 6.6166220678353185e-06, "loss": 0.5248, "step": 4048 }, { "epoch": 0.41, "grad_norm": 1.4286566276100259, "learning_rate": 6.615057155783335e-06, "loss": 0.5813, "step": 4049 }, { "epoch": 0.41, "grad_norm": 1.4877929021588816, "learning_rate": 6.613492067073942e-06, "loss": 0.7132, "step": 4050 }, { "epoch": 0.41, "grad_norm": 1.7956355127076788, "learning_rate": 6.6119268018783286e-06, "loss": 0.7168, "step": 4051 }, { "epoch": 0.41, "grad_norm": 1.4683036408574346, "learning_rate": 6.610361360367708e-06, "loss": 0.6214, "step": 4052 }, { "epoch": 0.41, "grad_norm": 1.8502618941518765, "learning_rate": 6.608795742713306e-06, "loss": 0.5679, "step": 4053 }, { "epoch": 0.41, "grad_norm": 1.48802517542491, "learning_rate": 6.607229949086379e-06, "loss": 0.7137, "step": 4054 }, { "epoch": 0.41, "grad_norm": 1.5756614631089165, "learning_rate": 6.60566397965819e-06, "loss": 0.7742, "step": 4055 }, { "epoch": 0.41, "grad_norm": 1.4920052797955403, "learning_rate": 6.604097834600031e-06, "loss": 0.6469, "step": 4056 }, { "epoch": 0.41, "grad_norm": 1.4098165917931995, "learning_rate": 6.602531514083207e-06, "loss": 0.6389, "step": 4057 }, { "epoch": 0.41, "grad_norm": 1.5774930272058802, "learning_rate": 6.600965018279046e-06, "loss": 0.6651, "step": 4058 }, { "epoch": 0.41, "grad_norm": 1.257157965236311, "learning_rate": 6.599398347358892e-06, "loss": 0.6258, "step": 4059 }, { "epoch": 0.41, "grad_norm": 1.3150377587001927, "learning_rate": 6.597831501494111e-06, "loss": 0.6288, "step": 4060 }, { "epoch": 0.41, "grad_norm": 1.4864919686545808, "learning_rate": 6.5962644808560875e-06, "loss": 0.6856, "step": 4061 }, { "epoch": 0.41, "grad_norm": 1.4387854221740823, "learning_rate": 6.594697285616224e-06, "loss": 0.6591, "step": 4062 }, { "epoch": 0.41, "grad_norm": 1.2930421865077073, "learning_rate": 6.593129915945942e-06, "loss": 0.4979, "step": 4063 }, { "epoch": 0.41, "grad_norm": 1.340456412845302, "learning_rate": 6.591562372016683e-06, "loss": 0.6545, "step": 4064 }, { "epoch": 0.42, "grad_norm": 1.3592989886374094, "learning_rate": 6.589994653999909e-06, "loss": 0.6762, "step": 4065 }, { "epoch": 0.42, "grad_norm": 1.3662141384489808, "learning_rate": 6.588426762067099e-06, "loss": 0.6776, "step": 4066 }, { "epoch": 0.42, "grad_norm": 1.4981424876353302, "learning_rate": 6.586858696389748e-06, "loss": 0.7451, "step": 4067 }, { "epoch": 0.42, "grad_norm": 1.2181396017971873, "learning_rate": 6.585290457139378e-06, "loss": 0.6367, "step": 4068 }, { "epoch": 0.42, "grad_norm": 1.9369427224528248, "learning_rate": 6.583722044487523e-06, "loss": 0.6489, "step": 4069 }, { "epoch": 0.42, "grad_norm": 1.8275892144883115, "learning_rate": 6.582153458605738e-06, "loss": 0.6736, "step": 4070 }, { "epoch": 0.42, "grad_norm": 1.3659172793343366, "learning_rate": 6.580584699665598e-06, "loss": 0.6209, "step": 4071 }, { "epoch": 0.42, "grad_norm": 1.385018268679821, "learning_rate": 6.579015767838698e-06, "loss": 0.676, "step": 4072 }, { "epoch": 0.42, "grad_norm": 1.3531335375564935, "learning_rate": 6.577446663296648e-06, "loss": 0.6853, "step": 4073 }, { "epoch": 0.42, "grad_norm": 1.5533467765470816, "learning_rate": 6.575877386211078e-06, "loss": 0.7253, "step": 4074 }, { "epoch": 0.42, "grad_norm": 1.5259648070557623, "learning_rate": 6.5743079367536415e-06, "loss": 0.6929, "step": 4075 }, { "epoch": 0.42, "grad_norm": 1.4690400952477023, "learning_rate": 6.5727383150960035e-06, "loss": 0.7101, "step": 4076 }, { "epoch": 0.42, "grad_norm": 1.2561907129815046, "learning_rate": 6.571168521409854e-06, "loss": 0.6493, "step": 4077 }, { "epoch": 0.42, "grad_norm": 1.4319694470436748, "learning_rate": 6.569598555866898e-06, "loss": 0.6742, "step": 4078 }, { "epoch": 0.42, "grad_norm": 1.4169995610251502, "learning_rate": 6.568028418638864e-06, "loss": 0.5492, "step": 4079 }, { "epoch": 0.42, "grad_norm": 1.3800633873664188, "learning_rate": 6.566458109897491e-06, "loss": 0.6804, "step": 4080 }, { "epoch": 0.42, "grad_norm": 1.682911455207132, "learning_rate": 6.564887629814544e-06, "loss": 0.7588, "step": 4081 }, { "epoch": 0.42, "grad_norm": 1.2625313246184464, "learning_rate": 6.563316978561806e-06, "loss": 0.6052, "step": 4082 }, { "epoch": 0.42, "grad_norm": 1.4088081462621116, "learning_rate": 6.561746156311075e-06, "loss": 0.6558, "step": 4083 }, { "epoch": 0.42, "grad_norm": 1.4453684927279946, "learning_rate": 6.5601751632341725e-06, "loss": 0.6756, "step": 4084 }, { "epoch": 0.42, "grad_norm": 1.548335936123107, "learning_rate": 6.5586039995029326e-06, "loss": 0.6754, "step": 4085 }, { "epoch": 0.42, "grad_norm": 1.4141054487336442, "learning_rate": 6.557032665289214e-06, "loss": 0.6649, "step": 4086 }, { "epoch": 0.42, "grad_norm": 1.5731592758086017, "learning_rate": 6.555461160764892e-06, "loss": 0.723, "step": 4087 }, { "epoch": 0.42, "grad_norm": 1.3421513274007195, "learning_rate": 6.553889486101858e-06, "loss": 0.6714, "step": 4088 }, { "epoch": 0.42, "grad_norm": 1.5604362640796383, "learning_rate": 6.552317641472027e-06, "loss": 0.6985, "step": 4089 }, { "epoch": 0.42, "grad_norm": 1.5894657472600862, "learning_rate": 6.5507456270473265e-06, "loss": 0.6372, "step": 4090 }, { "epoch": 0.42, "grad_norm": 1.5864920962641091, "learning_rate": 6.549173442999709e-06, "loss": 0.7114, "step": 4091 }, { "epoch": 0.42, "grad_norm": 1.4514509495363177, "learning_rate": 6.54760108950114e-06, "loss": 0.5092, "step": 4092 }, { "epoch": 0.42, "grad_norm": 1.2958873687488064, "learning_rate": 6.546028566723607e-06, "loss": 0.7356, "step": 4093 }, { "epoch": 0.42, "grad_norm": 1.3928567914343255, "learning_rate": 6.544455874839115e-06, "loss": 0.6634, "step": 4094 }, { "epoch": 0.42, "grad_norm": 1.5032020475340668, "learning_rate": 6.542883014019687e-06, "loss": 0.6344, "step": 4095 }, { "epoch": 0.42, "grad_norm": 1.4293673546928418, "learning_rate": 6.541309984437365e-06, "loss": 0.6897, "step": 4096 }, { "epoch": 0.42, "grad_norm": 1.3473510559423187, "learning_rate": 6.53973678626421e-06, "loss": 0.693, "step": 4097 }, { "epoch": 0.42, "grad_norm": 1.5423119461486061, "learning_rate": 6.5381634196723e-06, "loss": 0.7019, "step": 4098 }, { "epoch": 0.42, "grad_norm": 1.4868559910756955, "learning_rate": 6.536589884833733e-06, "loss": 0.7029, "step": 4099 }, { "epoch": 0.42, "grad_norm": 1.5581324514160713, "learning_rate": 6.535016181920623e-06, "loss": 0.612, "step": 4100 }, { "epoch": 0.42, "grad_norm": 1.6203962793762945, "learning_rate": 6.533442311105107e-06, "loss": 0.6271, "step": 4101 }, { "epoch": 0.42, "grad_norm": 1.2591679868314687, "learning_rate": 6.531868272559333e-06, "loss": 0.5343, "step": 4102 }, { "epoch": 0.42, "grad_norm": 1.517727279762144, "learning_rate": 6.530294066455476e-06, "loss": 0.7671, "step": 4103 }, { "epoch": 0.42, "grad_norm": 1.4198754103920703, "learning_rate": 6.528719692965724e-06, "loss": 0.6771, "step": 4104 }, { "epoch": 0.42, "grad_norm": 1.6075267596693261, "learning_rate": 6.527145152262282e-06, "loss": 0.6669, "step": 4105 }, { "epoch": 0.42, "grad_norm": 1.8812105363769576, "learning_rate": 6.5255704445173785e-06, "loss": 0.7166, "step": 4106 }, { "epoch": 0.42, "grad_norm": 1.5167590711812744, "learning_rate": 6.523995569903256e-06, "loss": 0.618, "step": 4107 }, { "epoch": 0.42, "grad_norm": 1.3772258471174308, "learning_rate": 6.522420528592177e-06, "loss": 0.5665, "step": 4108 }, { "epoch": 0.42, "grad_norm": 1.5399580261705212, "learning_rate": 6.520845320756421e-06, "loss": 0.6461, "step": 4109 }, { "epoch": 0.42, "grad_norm": 1.5085164499417965, "learning_rate": 6.519269946568289e-06, "loss": 0.6828, "step": 4110 }, { "epoch": 0.42, "grad_norm": 2.1394540379848586, "learning_rate": 6.517694406200097e-06, "loss": 0.6589, "step": 4111 }, { "epoch": 0.42, "grad_norm": 1.5327480432938916, "learning_rate": 6.516118699824179e-06, "loss": 0.6465, "step": 4112 }, { "epoch": 0.42, "grad_norm": 1.5155929654777605, "learning_rate": 6.514542827612887e-06, "loss": 0.6161, "step": 4113 }, { "epoch": 0.42, "grad_norm": 1.4560468419215422, "learning_rate": 6.512966789738595e-06, "loss": 0.7352, "step": 4114 }, { "epoch": 0.42, "grad_norm": 1.4331588076646373, "learning_rate": 6.511390586373692e-06, "loss": 0.7247, "step": 4115 }, { "epoch": 0.42, "grad_norm": 1.460533640748436, "learning_rate": 6.509814217690582e-06, "loss": 0.6484, "step": 4116 }, { "epoch": 0.42, "grad_norm": 1.5281973915740497, "learning_rate": 6.508237683861697e-06, "loss": 0.6922, "step": 4117 }, { "epoch": 0.42, "grad_norm": 1.518065912489487, "learning_rate": 6.506660985059475e-06, "loss": 0.6648, "step": 4118 }, { "epoch": 0.42, "grad_norm": 1.4797377485876393, "learning_rate": 6.505084121456381e-06, "loss": 0.6389, "step": 4119 }, { "epoch": 0.42, "grad_norm": 1.2696017523433285, "learning_rate": 6.503507093224892e-06, "loss": 0.6041, "step": 4120 }, { "epoch": 0.42, "grad_norm": 1.5081892184782424, "learning_rate": 6.5019299005375085e-06, "loss": 0.6806, "step": 4121 }, { "epoch": 0.42, "grad_norm": 1.6081555357767652, "learning_rate": 6.5003525435667455e-06, "loss": 0.6582, "step": 4122 }, { "epoch": 0.42, "grad_norm": 1.3634785224168113, "learning_rate": 6.4987750224851345e-06, "loss": 0.683, "step": 4123 }, { "epoch": 0.42, "grad_norm": 1.3890726129360345, "learning_rate": 6.4971973374652305e-06, "loss": 0.6221, "step": 4124 }, { "epoch": 0.42, "grad_norm": 1.4056382410746642, "learning_rate": 6.495619488679601e-06, "loss": 0.7456, "step": 4125 }, { "epoch": 0.42, "grad_norm": 1.575987446372525, "learning_rate": 6.494041476300833e-06, "loss": 0.6652, "step": 4126 }, { "epoch": 0.42, "grad_norm": 1.4464468066037293, "learning_rate": 6.492463300501534e-06, "loss": 0.5886, "step": 4127 }, { "epoch": 0.42, "grad_norm": 1.5261941431787858, "learning_rate": 6.4908849614543256e-06, "loss": 0.6129, "step": 4128 }, { "epoch": 0.42, "grad_norm": 1.2683839040890326, "learning_rate": 6.489306459331851e-06, "loss": 0.5702, "step": 4129 }, { "epoch": 0.42, "grad_norm": 1.5137330858952822, "learning_rate": 6.487727794306765e-06, "loss": 0.6625, "step": 4130 }, { "epoch": 0.42, "grad_norm": 1.3241375902529227, "learning_rate": 6.48614896655175e-06, "loss": 0.6246, "step": 4131 }, { "epoch": 0.42, "grad_norm": 1.344480535825932, "learning_rate": 6.484569976239495e-06, "loss": 0.6615, "step": 4132 }, { "epoch": 0.42, "grad_norm": 1.4552630608902426, "learning_rate": 6.482990823542717e-06, "loss": 0.8446, "step": 4133 }, { "epoch": 0.42, "grad_norm": 1.437496205980814, "learning_rate": 6.481411508634143e-06, "loss": 0.7288, "step": 4134 }, { "epoch": 0.42, "grad_norm": 1.4087669822268194, "learning_rate": 6.479832031686522e-06, "loss": 0.7152, "step": 4135 }, { "epoch": 0.42, "grad_norm": 1.569952917815755, "learning_rate": 6.4782523928726205e-06, "loss": 0.6801, "step": 4136 }, { "epoch": 0.42, "grad_norm": 1.3970326790529954, "learning_rate": 6.476672592365219e-06, "loss": 0.7399, "step": 4137 }, { "epoch": 0.42, "grad_norm": 3.2030575277502287, "learning_rate": 6.475092630337123e-06, "loss": 0.6567, "step": 4138 }, { "epoch": 0.42, "grad_norm": 1.3314658595775593, "learning_rate": 6.473512506961147e-06, "loss": 0.6072, "step": 4139 }, { "epoch": 0.42, "grad_norm": 1.379854530474421, "learning_rate": 6.471932222410129e-06, "loss": 0.7302, "step": 4140 }, { "epoch": 0.42, "grad_norm": 1.462604976711243, "learning_rate": 6.470351776856922e-06, "loss": 0.6722, "step": 4141 }, { "epoch": 0.42, "grad_norm": 1.3405747442916858, "learning_rate": 6.4687711704743995e-06, "loss": 0.5678, "step": 4142 }, { "epoch": 0.42, "grad_norm": 1.4676281632764396, "learning_rate": 6.467190403435451e-06, "loss": 0.6705, "step": 4143 }, { "epoch": 0.42, "grad_norm": 1.6451383474889443, "learning_rate": 6.465609475912977e-06, "loss": 0.5869, "step": 4144 }, { "epoch": 0.42, "grad_norm": 1.3205530908888368, "learning_rate": 6.46402838807991e-06, "loss": 0.679, "step": 4145 }, { "epoch": 0.42, "grad_norm": 1.3861097826857047, "learning_rate": 6.462447140109187e-06, "loss": 0.6853, "step": 4146 }, { "epoch": 0.42, "grad_norm": 1.4980406118158303, "learning_rate": 6.460865732173769e-06, "loss": 0.6429, "step": 4147 }, { "epoch": 0.42, "grad_norm": 1.4124675387017958, "learning_rate": 6.459284164446632e-06, "loss": 0.6355, "step": 4148 }, { "epoch": 0.42, "grad_norm": 1.280875030840417, "learning_rate": 6.45770243710077e-06, "loss": 0.6436, "step": 4149 }, { "epoch": 0.42, "grad_norm": 1.3396994917155127, "learning_rate": 6.456120550309198e-06, "loss": 0.6911, "step": 4150 }, { "epoch": 0.42, "grad_norm": 1.5348175706978755, "learning_rate": 6.454538504244938e-06, "loss": 0.6558, "step": 4151 }, { "epoch": 0.42, "grad_norm": 1.3594332675335536, "learning_rate": 6.452956299081046e-06, "loss": 0.64, "step": 4152 }, { "epoch": 0.42, "grad_norm": 1.4733235180011277, "learning_rate": 6.451373934990578e-06, "loss": 0.7123, "step": 4153 }, { "epoch": 0.42, "grad_norm": 1.337602161148088, "learning_rate": 6.44979141214662e-06, "loss": 0.6542, "step": 4154 }, { "epoch": 0.42, "grad_norm": 1.3661788239834216, "learning_rate": 6.448208730722267e-06, "loss": 0.5298, "step": 4155 }, { "epoch": 0.42, "grad_norm": 2.35054308681544, "learning_rate": 6.44662589089064e-06, "loss": 0.721, "step": 4156 }, { "epoch": 0.42, "grad_norm": 1.527168087115257, "learning_rate": 6.44504289282487e-06, "loss": 0.6777, "step": 4157 }, { "epoch": 0.42, "grad_norm": 1.5004291541363748, "learning_rate": 6.443459736698106e-06, "loss": 0.5654, "step": 4158 }, { "epoch": 0.42, "grad_norm": 1.3911217932973827, "learning_rate": 6.441876422683519e-06, "loss": 0.7296, "step": 4159 }, { "epoch": 0.42, "grad_norm": 1.6742102161005872, "learning_rate": 6.440292950954291e-06, "loss": 0.6836, "step": 4160 }, { "epoch": 0.42, "grad_norm": 1.3760094506424345, "learning_rate": 6.438709321683627e-06, "loss": 0.6022, "step": 4161 }, { "epoch": 0.42, "grad_norm": 1.4787835570296524, "learning_rate": 6.437125535044747e-06, "loss": 0.7101, "step": 4162 }, { "epoch": 0.43, "grad_norm": 1.813779036174551, "learning_rate": 6.435541591210888e-06, "loss": 0.7132, "step": 4163 }, { "epoch": 0.43, "grad_norm": 1.3629127862024635, "learning_rate": 6.433957490355303e-06, "loss": 0.5906, "step": 4164 }, { "epoch": 0.43, "grad_norm": 1.4210947706549153, "learning_rate": 6.4323732326512615e-06, "loss": 0.6854, "step": 4165 }, { "epoch": 0.43, "grad_norm": 1.5321587218654742, "learning_rate": 6.430788818272057e-06, "loss": 0.6433, "step": 4166 }, { "epoch": 0.43, "grad_norm": 1.714847149693616, "learning_rate": 6.429204247390992e-06, "loss": 0.6443, "step": 4167 }, { "epoch": 0.43, "grad_norm": 1.7776983423528432, "learning_rate": 6.4276195201813885e-06, "loss": 0.7472, "step": 4168 }, { "epoch": 0.43, "grad_norm": 1.5164105576986382, "learning_rate": 6.426034636816588e-06, "loss": 0.6692, "step": 4169 }, { "epoch": 0.43, "grad_norm": 1.5437532751657552, "learning_rate": 6.4244495974699475e-06, "loss": 0.6802, "step": 4170 }, { "epoch": 0.43, "grad_norm": 1.4036697730980148, "learning_rate": 6.4228644023148415e-06, "loss": 0.6027, "step": 4171 }, { "epoch": 0.43, "grad_norm": 1.3477643317198258, "learning_rate": 6.421279051524658e-06, "loss": 0.6859, "step": 4172 }, { "epoch": 0.43, "grad_norm": 1.475512108926545, "learning_rate": 6.419693545272809e-06, "loss": 0.65, "step": 4173 }, { "epoch": 0.43, "grad_norm": 1.397820495029363, "learning_rate": 6.4181078837327186e-06, "loss": 0.6298, "step": 4174 }, { "epoch": 0.43, "grad_norm": 2.9748779682486544, "learning_rate": 6.416522067077826e-06, "loss": 0.7421, "step": 4175 }, { "epoch": 0.43, "grad_norm": 1.448531756501715, "learning_rate": 6.414936095481592e-06, "loss": 0.6571, "step": 4176 }, { "epoch": 0.43, "grad_norm": 1.4381423082669842, "learning_rate": 6.413349969117495e-06, "loss": 0.7299, "step": 4177 }, { "epoch": 0.43, "grad_norm": 1.5460237502501173, "learning_rate": 6.411763688159025e-06, "loss": 0.6599, "step": 4178 }, { "epoch": 0.43, "grad_norm": 1.310171023847785, "learning_rate": 6.410177252779691e-06, "loss": 0.7136, "step": 4179 }, { "epoch": 0.43, "grad_norm": 1.412812517882533, "learning_rate": 6.408590663153025e-06, "loss": 0.6239, "step": 4180 }, { "epoch": 0.43, "grad_norm": 1.5622795488533532, "learning_rate": 6.407003919452565e-06, "loss": 0.6102, "step": 4181 }, { "epoch": 0.43, "grad_norm": 1.4175498405661087, "learning_rate": 6.4054170218518715e-06, "loss": 0.6667, "step": 4182 }, { "epoch": 0.43, "grad_norm": 1.4697214351656878, "learning_rate": 6.4038299705245265e-06, "loss": 0.6155, "step": 4183 }, { "epoch": 0.43, "grad_norm": 1.9799368191953814, "learning_rate": 6.40224276564412e-06, "loss": 0.7194, "step": 4184 }, { "epoch": 0.43, "grad_norm": 1.4317206824558164, "learning_rate": 6.400655407384265e-06, "loss": 0.5873, "step": 4185 }, { "epoch": 0.43, "grad_norm": 1.2965369286204773, "learning_rate": 6.399067895918588e-06, "loss": 0.5926, "step": 4186 }, { "epoch": 0.43, "grad_norm": 1.4109269451133095, "learning_rate": 6.397480231420734e-06, "loss": 0.5872, "step": 4187 }, { "epoch": 0.43, "grad_norm": 2.1555440091698466, "learning_rate": 6.395892414064363e-06, "loss": 0.7052, "step": 4188 }, { "epoch": 0.43, "grad_norm": 1.3578576124657218, "learning_rate": 6.394304444023154e-06, "loss": 0.5906, "step": 4189 }, { "epoch": 0.43, "grad_norm": 1.4509440859284355, "learning_rate": 6.392716321470802e-06, "loss": 0.7202, "step": 4190 }, { "epoch": 0.43, "grad_norm": 1.4047382138251396, "learning_rate": 6.391128046581016e-06, "loss": 0.6949, "step": 4191 }, { "epoch": 0.43, "grad_norm": 1.1643005580155434, "learning_rate": 6.38953961952753e-06, "loss": 0.5756, "step": 4192 }, { "epoch": 0.43, "grad_norm": 2.1760572951190866, "learning_rate": 6.387951040484081e-06, "loss": 0.6245, "step": 4193 }, { "epoch": 0.43, "grad_norm": 1.5217330492820988, "learning_rate": 6.386362309624432e-06, "loss": 0.681, "step": 4194 }, { "epoch": 0.43, "grad_norm": 1.9196746983327797, "learning_rate": 6.384773427122364e-06, "loss": 0.635, "step": 4195 }, { "epoch": 0.43, "grad_norm": 1.3869640006911548, "learning_rate": 6.383184393151669e-06, "loss": 0.6919, "step": 4196 }, { "epoch": 0.43, "grad_norm": 1.7866998525645559, "learning_rate": 6.381595207886158e-06, "loss": 0.658, "step": 4197 }, { "epoch": 0.43, "grad_norm": 1.4480545755338428, "learning_rate": 6.380005871499659e-06, "loss": 0.7387, "step": 4198 }, { "epoch": 0.43, "grad_norm": 1.3607156809803769, "learning_rate": 6.378416384166018e-06, "loss": 0.6479, "step": 4199 }, { "epoch": 0.43, "grad_norm": 1.4191137847740778, "learning_rate": 6.376826746059092e-06, "loss": 0.7201, "step": 4200 }, { "epoch": 0.43, "grad_norm": 1.645321350919578, "learning_rate": 6.375236957352759e-06, "loss": 0.8293, "step": 4201 }, { "epoch": 0.43, "grad_norm": 1.3778058032684932, "learning_rate": 6.373647018220915e-06, "loss": 0.6377, "step": 4202 }, { "epoch": 0.43, "grad_norm": 1.4370751514927262, "learning_rate": 6.3720569288374665e-06, "loss": 0.6767, "step": 4203 }, { "epoch": 0.43, "grad_norm": 1.3619131649301077, "learning_rate": 6.370466689376343e-06, "loss": 0.6368, "step": 4204 }, { "epoch": 0.43, "grad_norm": 1.6446414700487084, "learning_rate": 6.368876300011484e-06, "loss": 0.7004, "step": 4205 }, { "epoch": 0.43, "grad_norm": 1.3556991073172358, "learning_rate": 6.367285760916852e-06, "loss": 0.7304, "step": 4206 }, { "epoch": 0.43, "grad_norm": 1.325081530821212, "learning_rate": 6.3656950722664205e-06, "loss": 0.6376, "step": 4207 }, { "epoch": 0.43, "grad_norm": 1.4409478731469787, "learning_rate": 6.3641042342341806e-06, "loss": 0.747, "step": 4208 }, { "epoch": 0.43, "grad_norm": 1.4870295290196027, "learning_rate": 6.362513246994144e-06, "loss": 0.6576, "step": 4209 }, { "epoch": 0.43, "grad_norm": 1.3492738776033746, "learning_rate": 6.360922110720333e-06, "loss": 0.5989, "step": 4210 }, { "epoch": 0.43, "grad_norm": 1.402758975789882, "learning_rate": 6.3593308255867894e-06, "loss": 0.6158, "step": 4211 }, { "epoch": 0.43, "grad_norm": 1.577353829126367, "learning_rate": 6.357739391767568e-06, "loss": 0.7272, "step": 4212 }, { "epoch": 0.43, "grad_norm": 1.4427822613800751, "learning_rate": 6.3561478094367455e-06, "loss": 0.697, "step": 4213 }, { "epoch": 0.43, "grad_norm": 1.2925598999085803, "learning_rate": 6.35455607876841e-06, "loss": 0.644, "step": 4214 }, { "epoch": 0.43, "grad_norm": 1.3004916771431871, "learning_rate": 6.352964199936666e-06, "loss": 0.6005, "step": 4215 }, { "epoch": 0.43, "grad_norm": 1.411040274649824, "learning_rate": 6.35137217311564e-06, "loss": 0.7503, "step": 4216 }, { "epoch": 0.43, "grad_norm": 1.4061659840565361, "learning_rate": 6.349779998479466e-06, "loss": 0.6474, "step": 4217 }, { "epoch": 0.43, "grad_norm": 1.6314703081931285, "learning_rate": 6.3481876762023e-06, "loss": 0.6243, "step": 4218 }, { "epoch": 0.43, "grad_norm": 1.3598516889582422, "learning_rate": 6.3465952064583136e-06, "loss": 0.6065, "step": 4219 }, { "epoch": 0.43, "grad_norm": 1.2803505633241279, "learning_rate": 6.345002589421693e-06, "loss": 0.6111, "step": 4220 }, { "epoch": 0.43, "grad_norm": 1.3919829180840844, "learning_rate": 6.34340982526664e-06, "loss": 0.634, "step": 4221 }, { "epoch": 0.43, "grad_norm": 1.2390863534031955, "learning_rate": 6.341816914167372e-06, "loss": 0.6819, "step": 4222 }, { "epoch": 0.43, "grad_norm": 1.4168682478320522, "learning_rate": 6.3402238562981285e-06, "loss": 0.5931, "step": 4223 }, { "epoch": 0.43, "grad_norm": 1.3756025017191615, "learning_rate": 6.3386306518331595e-06, "loss": 0.7534, "step": 4224 }, { "epoch": 0.43, "grad_norm": 1.099515174547318, "learning_rate": 6.3370373009467286e-06, "loss": 0.6462, "step": 4225 }, { "epoch": 0.43, "grad_norm": 1.3477263042029373, "learning_rate": 6.335443803813124e-06, "loss": 0.639, "step": 4226 }, { "epoch": 0.43, "grad_norm": 1.2080931699903537, "learning_rate": 6.333850160606641e-06, "loss": 0.5951, "step": 4227 }, { "epoch": 0.43, "grad_norm": 1.3657151466167354, "learning_rate": 6.332256371501596e-06, "loss": 0.5939, "step": 4228 }, { "epoch": 0.43, "grad_norm": 1.564236194573732, "learning_rate": 6.330662436672319e-06, "loss": 0.6516, "step": 4229 }, { "epoch": 0.43, "grad_norm": 1.343707250673319, "learning_rate": 6.32906835629316e-06, "loss": 0.6893, "step": 4230 }, { "epoch": 0.43, "grad_norm": 1.505766003615363, "learning_rate": 6.327474130538479e-06, "loss": 0.7088, "step": 4231 }, { "epoch": 0.43, "grad_norm": 1.3139301406677644, "learning_rate": 6.325879759582656e-06, "loss": 0.6167, "step": 4232 }, { "epoch": 0.43, "grad_norm": 1.663102886304925, "learning_rate": 6.324285243600085e-06, "loss": 0.783, "step": 4233 }, { "epoch": 0.43, "grad_norm": 1.5625782431952362, "learning_rate": 6.322690582765178e-06, "loss": 0.6578, "step": 4234 }, { "epoch": 0.43, "grad_norm": 1.6005595160887625, "learning_rate": 6.3210957772523605e-06, "loss": 0.7361, "step": 4235 }, { "epoch": 0.43, "grad_norm": 1.3135897564224503, "learning_rate": 6.319500827236074e-06, "loss": 0.6, "step": 4236 }, { "epoch": 0.43, "grad_norm": 1.378386630749617, "learning_rate": 6.317905732890779e-06, "loss": 0.6858, "step": 4237 }, { "epoch": 0.43, "grad_norm": 1.6103595326767977, "learning_rate": 6.316310494390947e-06, "loss": 0.7912, "step": 4238 }, { "epoch": 0.43, "grad_norm": 1.402189213293792, "learning_rate": 6.314715111911068e-06, "loss": 0.6274, "step": 4239 }, { "epoch": 0.43, "grad_norm": 1.37412235453689, "learning_rate": 6.313119585625648e-06, "loss": 0.6658, "step": 4240 }, { "epoch": 0.43, "grad_norm": 1.4773706693679085, "learning_rate": 6.311523915709208e-06, "loss": 0.661, "step": 4241 }, { "epoch": 0.43, "grad_norm": 1.6761603885159084, "learning_rate": 6.309928102336285e-06, "loss": 0.6882, "step": 4242 }, { "epoch": 0.43, "grad_norm": 1.3854703064338447, "learning_rate": 6.30833214568143e-06, "loss": 0.5797, "step": 4243 }, { "epoch": 0.43, "grad_norm": 1.2544966388808205, "learning_rate": 6.306736045919213e-06, "loss": 0.5914, "step": 4244 }, { "epoch": 0.43, "grad_norm": 1.5603594640580747, "learning_rate": 6.305139803224217e-06, "loss": 0.7353, "step": 4245 }, { "epoch": 0.43, "grad_norm": 1.6535941703908656, "learning_rate": 6.303543417771042e-06, "loss": 0.6286, "step": 4246 }, { "epoch": 0.43, "grad_norm": 1.2611748597155057, "learning_rate": 6.301946889734302e-06, "loss": 0.6301, "step": 4247 }, { "epoch": 0.43, "grad_norm": 1.460007848240035, "learning_rate": 6.300350219288629e-06, "loss": 0.6945, "step": 4248 }, { "epoch": 0.43, "grad_norm": 1.3368351234743001, "learning_rate": 6.298753406608669e-06, "loss": 0.5994, "step": 4249 }, { "epoch": 0.43, "grad_norm": 1.5089760136247383, "learning_rate": 6.297156451869082e-06, "loss": 0.7553, "step": 4250 }, { "epoch": 0.43, "grad_norm": 1.363161922192042, "learning_rate": 6.295559355244547e-06, "loss": 0.6302, "step": 4251 }, { "epoch": 0.43, "grad_norm": 1.2633360535044076, "learning_rate": 6.293962116909758e-06, "loss": 0.6804, "step": 4252 }, { "epoch": 0.43, "grad_norm": 1.3671393833423957, "learning_rate": 6.2923647370394205e-06, "loss": 0.5624, "step": 4253 }, { "epoch": 0.43, "grad_norm": 1.4701557405621306, "learning_rate": 6.29076721580826e-06, "loss": 0.6335, "step": 4254 }, { "epoch": 0.43, "grad_norm": 1.4420754266714808, "learning_rate": 6.289169553391016e-06, "loss": 0.7104, "step": 4255 }, { "epoch": 0.43, "grad_norm": 1.3918188829225844, "learning_rate": 6.287571749962443e-06, "loss": 0.7022, "step": 4256 }, { "epoch": 0.43, "grad_norm": 1.4508746996662696, "learning_rate": 6.28597380569731e-06, "loss": 0.6552, "step": 4257 }, { "epoch": 0.43, "grad_norm": 1.5946827481503116, "learning_rate": 6.284375720770405e-06, "loss": 0.6517, "step": 4258 }, { "epoch": 0.43, "grad_norm": 1.3163412727007893, "learning_rate": 6.282777495356527e-06, "loss": 0.6408, "step": 4259 }, { "epoch": 0.44, "grad_norm": 1.6138594198389065, "learning_rate": 6.2811791296304925e-06, "loss": 0.7464, "step": 4260 }, { "epoch": 0.44, "grad_norm": 1.4699418058120677, "learning_rate": 6.279580623767133e-06, "loss": 0.6266, "step": 4261 }, { "epoch": 0.44, "grad_norm": 1.3155156419843972, "learning_rate": 6.277981977941297e-06, "loss": 0.6384, "step": 4262 }, { "epoch": 0.44, "grad_norm": 1.5733563779505404, "learning_rate": 6.276383192327846e-06, "loss": 0.6237, "step": 4263 }, { "epoch": 0.44, "grad_norm": 1.4724581555630827, "learning_rate": 6.274784267101657e-06, "loss": 0.7252, "step": 4264 }, { "epoch": 0.44, "grad_norm": 1.3304929218967287, "learning_rate": 6.273185202437624e-06, "loss": 0.5927, "step": 4265 }, { "epoch": 0.44, "grad_norm": 1.3572256665478142, "learning_rate": 6.271585998510655e-06, "loss": 0.6701, "step": 4266 }, { "epoch": 0.44, "grad_norm": 1.422051752951996, "learning_rate": 6.2699866554956715e-06, "loss": 0.5346, "step": 4267 }, { "epoch": 0.44, "grad_norm": 1.3532928547022, "learning_rate": 6.268387173567614e-06, "loss": 0.6083, "step": 4268 }, { "epoch": 0.44, "grad_norm": 1.4381556566660023, "learning_rate": 6.266787552901434e-06, "loss": 0.6295, "step": 4269 }, { "epoch": 0.44, "grad_norm": 1.4183246667899652, "learning_rate": 6.265187793672105e-06, "loss": 0.7286, "step": 4270 }, { "epoch": 0.44, "grad_norm": 1.4091142676801347, "learning_rate": 6.263587896054606e-06, "loss": 0.7416, "step": 4271 }, { "epoch": 0.44, "grad_norm": 1.3438770028745541, "learning_rate": 6.26198786022394e-06, "loss": 0.6352, "step": 4272 }, { "epoch": 0.44, "grad_norm": 1.4273128412422407, "learning_rate": 6.260387686355121e-06, "loss": 0.8339, "step": 4273 }, { "epoch": 0.44, "grad_norm": 1.2533997740465783, "learning_rate": 6.258787374623174e-06, "loss": 0.6416, "step": 4274 }, { "epoch": 0.44, "grad_norm": 1.3425515947605438, "learning_rate": 6.257186925203149e-06, "loss": 0.6716, "step": 4275 }, { "epoch": 0.44, "grad_norm": 1.6298984587044452, "learning_rate": 6.255586338270102e-06, "loss": 0.5717, "step": 4276 }, { "epoch": 0.44, "grad_norm": 1.3665635527973472, "learning_rate": 6.253985613999112e-06, "loss": 0.7172, "step": 4277 }, { "epoch": 0.44, "grad_norm": 1.3933641737292892, "learning_rate": 6.252384752565262e-06, "loss": 0.691, "step": 4278 }, { "epoch": 0.44, "grad_norm": 1.36133818858983, "learning_rate": 6.250783754143663e-06, "loss": 0.6506, "step": 4279 }, { "epoch": 0.44, "grad_norm": 1.3611010070429204, "learning_rate": 6.249182618909431e-06, "loss": 0.5839, "step": 4280 }, { "epoch": 0.44, "grad_norm": 1.5128608815991982, "learning_rate": 6.247581347037701e-06, "loss": 0.6503, "step": 4281 }, { "epoch": 0.44, "grad_norm": 1.2434735500727825, "learning_rate": 6.245979938703623e-06, "loss": 0.6533, "step": 4282 }, { "epoch": 0.44, "grad_norm": 2.061879491359289, "learning_rate": 6.244378394082363e-06, "loss": 0.7413, "step": 4283 }, { "epoch": 0.44, "grad_norm": 1.4944262638745112, "learning_rate": 6.2427767133491e-06, "loss": 0.6457, "step": 4284 }, { "epoch": 0.44, "grad_norm": 1.258698170287561, "learning_rate": 6.241174896679026e-06, "loss": 0.5806, "step": 4285 }, { "epoch": 0.44, "grad_norm": 1.3649051779764088, "learning_rate": 6.239572944247352e-06, "loss": 0.5982, "step": 4286 }, { "epoch": 0.44, "grad_norm": 1.5457574462165686, "learning_rate": 6.237970856229301e-06, "loss": 0.6916, "step": 4287 }, { "epoch": 0.44, "grad_norm": 1.2771620059718714, "learning_rate": 6.236368632800111e-06, "loss": 0.6969, "step": 4288 }, { "epoch": 0.44, "grad_norm": 1.3704458391855712, "learning_rate": 6.234766274135039e-06, "loss": 0.6122, "step": 4289 }, { "epoch": 0.44, "grad_norm": 1.4923464780742193, "learning_rate": 6.23316378040935e-06, "loss": 0.704, "step": 4290 }, { "epoch": 0.44, "grad_norm": 1.6197430128896608, "learning_rate": 6.23156115179833e-06, "loss": 0.7594, "step": 4291 }, { "epoch": 0.44, "grad_norm": 1.3919180598055392, "learning_rate": 6.229958388477274e-06, "loss": 0.551, "step": 4292 }, { "epoch": 0.44, "grad_norm": 1.4900454977459991, "learning_rate": 6.228355490621498e-06, "loss": 0.6627, "step": 4293 }, { "epoch": 0.44, "grad_norm": 1.5235213277834956, "learning_rate": 6.226752458406326e-06, "loss": 0.6304, "step": 4294 }, { "epoch": 0.44, "grad_norm": 1.4170292146652153, "learning_rate": 6.225149292007102e-06, "loss": 0.6538, "step": 4295 }, { "epoch": 0.44, "grad_norm": 1.4674053183189322, "learning_rate": 6.223545991599184e-06, "loss": 0.6802, "step": 4296 }, { "epoch": 0.44, "grad_norm": 1.6172758042324613, "learning_rate": 6.221942557357941e-06, "loss": 0.6217, "step": 4297 }, { "epoch": 0.44, "grad_norm": 1.4891286579378786, "learning_rate": 6.220338989458761e-06, "loss": 0.6077, "step": 4298 }, { "epoch": 0.44, "grad_norm": 1.5246878327120108, "learning_rate": 6.218735288077041e-06, "loss": 0.6143, "step": 4299 }, { "epoch": 0.44, "grad_norm": 1.3713060381393418, "learning_rate": 6.217131453388202e-06, "loss": 0.6784, "step": 4300 }, { "epoch": 0.44, "grad_norm": 1.5621076133810758, "learning_rate": 6.215527485567669e-06, "loss": 0.6748, "step": 4301 }, { "epoch": 0.44, "grad_norm": 1.4084401622988294, "learning_rate": 6.21392338479089e-06, "loss": 0.6566, "step": 4302 }, { "epoch": 0.44, "grad_norm": 1.3413857233461748, "learning_rate": 6.212319151233321e-06, "loss": 0.6796, "step": 4303 }, { "epoch": 0.44, "grad_norm": 1.5762175428652818, "learning_rate": 6.210714785070438e-06, "loss": 0.6714, "step": 4304 }, { "epoch": 0.44, "grad_norm": 1.5309652417579822, "learning_rate": 6.209110286477727e-06, "loss": 0.7049, "step": 4305 }, { "epoch": 0.44, "grad_norm": 1.290578823351892, "learning_rate": 6.20750565563069e-06, "loss": 0.6029, "step": 4306 }, { "epoch": 0.44, "grad_norm": 1.4816467771753425, "learning_rate": 6.205900892704848e-06, "loss": 0.7159, "step": 4307 }, { "epoch": 0.44, "grad_norm": 1.471984444382491, "learning_rate": 6.2042959978757285e-06, "loss": 0.7296, "step": 4308 }, { "epoch": 0.44, "grad_norm": 1.5652403120169498, "learning_rate": 6.20269097131888e-06, "loss": 0.6933, "step": 4309 }, { "epoch": 0.44, "grad_norm": 1.3322102110664535, "learning_rate": 6.201085813209859e-06, "loss": 0.6897, "step": 4310 }, { "epoch": 0.44, "grad_norm": 1.2782046051258098, "learning_rate": 6.199480523724244e-06, "loss": 0.7297, "step": 4311 }, { "epoch": 0.44, "grad_norm": 1.3007917129844433, "learning_rate": 6.197875103037624e-06, "loss": 0.7041, "step": 4312 }, { "epoch": 0.44, "grad_norm": 1.8863101162818074, "learning_rate": 6.196269551325597e-06, "loss": 0.7221, "step": 4313 }, { "epoch": 0.44, "grad_norm": 1.3273980624391544, "learning_rate": 6.194663868763788e-06, "loss": 0.6184, "step": 4314 }, { "epoch": 0.44, "grad_norm": 1.363331576605251, "learning_rate": 6.193058055527824e-06, "loss": 0.5929, "step": 4315 }, { "epoch": 0.44, "grad_norm": 1.3965263412826172, "learning_rate": 6.1914521117933535e-06, "loss": 0.642, "step": 4316 }, { "epoch": 0.44, "grad_norm": 1.3670772745804498, "learning_rate": 6.189846037736036e-06, "loss": 0.5579, "step": 4317 }, { "epoch": 0.44, "grad_norm": 1.4373472232813147, "learning_rate": 6.1882398335315465e-06, "loss": 0.5845, "step": 4318 }, { "epoch": 0.44, "grad_norm": 1.3118130589029202, "learning_rate": 6.186633499355576e-06, "loss": 0.6446, "step": 4319 }, { "epoch": 0.44, "grad_norm": 1.7350651809701723, "learning_rate": 6.185027035383823e-06, "loss": 0.6178, "step": 4320 }, { "epoch": 0.44, "grad_norm": 1.4223841487751947, "learning_rate": 6.18342044179201e-06, "loss": 0.7121, "step": 4321 }, { "epoch": 0.44, "grad_norm": 1.5226988474002925, "learning_rate": 6.181813718755867e-06, "loss": 0.6779, "step": 4322 }, { "epoch": 0.44, "grad_norm": 1.604476784321015, "learning_rate": 6.18020686645114e-06, "loss": 0.6961, "step": 4323 }, { "epoch": 0.44, "grad_norm": 1.39223320030612, "learning_rate": 6.178599885053587e-06, "loss": 0.6025, "step": 4324 }, { "epoch": 0.44, "grad_norm": 1.3751861564803285, "learning_rate": 6.1769927747389835e-06, "loss": 0.6321, "step": 4325 }, { "epoch": 0.44, "grad_norm": 1.443942670772087, "learning_rate": 6.175385535683119e-06, "loss": 0.5913, "step": 4326 }, { "epoch": 0.44, "grad_norm": 1.3040433531284361, "learning_rate": 6.173778168061792e-06, "loss": 0.6129, "step": 4327 }, { "epoch": 0.44, "grad_norm": 1.3110559573836986, "learning_rate": 6.172170672050826e-06, "loss": 0.5565, "step": 4328 }, { "epoch": 0.44, "grad_norm": 1.346124693942863, "learning_rate": 6.170563047826043e-06, "loss": 0.6547, "step": 4329 }, { "epoch": 0.44, "grad_norm": 1.5474799509753692, "learning_rate": 6.168955295563291e-06, "loss": 0.7089, "step": 4330 }, { "epoch": 0.44, "grad_norm": 1.825300879009905, "learning_rate": 6.167347415438429e-06, "loss": 0.7362, "step": 4331 }, { "epoch": 0.44, "grad_norm": 1.3578284885060359, "learning_rate": 6.16573940762733e-06, "loss": 0.661, "step": 4332 }, { "epoch": 0.44, "grad_norm": 1.5245074516932249, "learning_rate": 6.164131272305879e-06, "loss": 0.7044, "step": 4333 }, { "epoch": 0.44, "grad_norm": 1.5130232525512126, "learning_rate": 6.162523009649975e-06, "loss": 0.6806, "step": 4334 }, { "epoch": 0.44, "grad_norm": 1.3167519895100488, "learning_rate": 6.160914619835535e-06, "loss": 0.6015, "step": 4335 }, { "epoch": 0.44, "grad_norm": 1.4391136326291731, "learning_rate": 6.159306103038486e-06, "loss": 0.6282, "step": 4336 }, { "epoch": 0.44, "grad_norm": 1.5069538295415459, "learning_rate": 6.157697459434768e-06, "loss": 0.6686, "step": 4337 }, { "epoch": 0.44, "grad_norm": 1.3911633793110905, "learning_rate": 6.15608868920034e-06, "loss": 0.602, "step": 4338 }, { "epoch": 0.44, "grad_norm": 1.34195219960631, "learning_rate": 6.154479792511171e-06, "loss": 0.6201, "step": 4339 }, { "epoch": 0.44, "grad_norm": 1.5182706817791745, "learning_rate": 6.152870769543245e-06, "loss": 0.6131, "step": 4340 }, { "epoch": 0.44, "grad_norm": 1.52386199953226, "learning_rate": 6.151261620472554e-06, "loss": 0.7304, "step": 4341 }, { "epoch": 0.44, "grad_norm": 1.3011231073995575, "learning_rate": 6.149652345475118e-06, "loss": 0.6904, "step": 4342 }, { "epoch": 0.44, "grad_norm": 1.304054665644063, "learning_rate": 6.148042944726955e-06, "loss": 0.6979, "step": 4343 }, { "epoch": 0.44, "grad_norm": 1.3870535818922434, "learning_rate": 6.146433418404108e-06, "loss": 0.748, "step": 4344 }, { "epoch": 0.44, "grad_norm": 1.5934954558632226, "learning_rate": 6.1448237666826264e-06, "loss": 0.7296, "step": 4345 }, { "epoch": 0.44, "grad_norm": 1.4152793396169752, "learning_rate": 6.143213989738578e-06, "loss": 0.6859, "step": 4346 }, { "epoch": 0.44, "grad_norm": 1.5208848693675099, "learning_rate": 6.1416040877480435e-06, "loss": 0.6515, "step": 4347 }, { "epoch": 0.44, "grad_norm": 1.4457644294520973, "learning_rate": 6.1399940608871126e-06, "loss": 0.6876, "step": 4348 }, { "epoch": 0.44, "grad_norm": 1.3525258277698546, "learning_rate": 6.1383839093318975e-06, "loss": 0.6532, "step": 4349 }, { "epoch": 0.44, "grad_norm": 1.390578272198269, "learning_rate": 6.136773633258515e-06, "loss": 0.6489, "step": 4350 }, { "epoch": 0.44, "grad_norm": 1.3266678709549025, "learning_rate": 6.1351632328431025e-06, "loss": 0.5793, "step": 4351 }, { "epoch": 0.44, "grad_norm": 1.4735257146523613, "learning_rate": 6.133552708261805e-06, "loss": 0.6788, "step": 4352 }, { "epoch": 0.44, "grad_norm": 1.2282116212737737, "learning_rate": 6.1319420596907874e-06, "loss": 0.6003, "step": 4353 }, { "epoch": 0.44, "grad_norm": 1.3285484183630356, "learning_rate": 6.130331287306224e-06, "loss": 0.6279, "step": 4354 }, { "epoch": 0.44, "grad_norm": 1.4334438431989944, "learning_rate": 6.128720391284301e-06, "loss": 0.5665, "step": 4355 }, { "epoch": 0.44, "grad_norm": 1.2139602620491348, "learning_rate": 6.127109371801223e-06, "loss": 0.7567, "step": 4356 }, { "epoch": 0.44, "grad_norm": 1.3762012627736044, "learning_rate": 6.1254982290332065e-06, "loss": 0.6523, "step": 4357 }, { "epoch": 0.45, "grad_norm": 1.512530581052133, "learning_rate": 6.123886963156478e-06, "loss": 0.6501, "step": 4358 }, { "epoch": 0.45, "grad_norm": 1.4420570274038973, "learning_rate": 6.122275574347283e-06, "loss": 0.6221, "step": 4359 }, { "epoch": 0.45, "grad_norm": 1.2573302276260454, "learning_rate": 6.120664062781876e-06, "loss": 0.5822, "step": 4360 }, { "epoch": 0.45, "grad_norm": 1.7175360798487957, "learning_rate": 6.1190524286365285e-06, "loss": 0.7302, "step": 4361 }, { "epoch": 0.45, "grad_norm": 1.3827468513736496, "learning_rate": 6.117440672087519e-06, "loss": 0.6818, "step": 4362 }, { "epoch": 0.45, "grad_norm": 1.3886719425906406, "learning_rate": 6.11582879331115e-06, "loss": 0.6822, "step": 4363 }, { "epoch": 0.45, "grad_norm": 1.2343251266144255, "learning_rate": 6.114216792483726e-06, "loss": 0.5807, "step": 4364 }, { "epoch": 0.45, "grad_norm": 1.2995285361538453, "learning_rate": 6.112604669781572e-06, "loss": 0.6443, "step": 4365 }, { "epoch": 0.45, "grad_norm": 1.2870100849838009, "learning_rate": 6.110992425381026e-06, "loss": 0.6171, "step": 4366 }, { "epoch": 0.45, "grad_norm": 1.4965407689160692, "learning_rate": 6.109380059458436e-06, "loss": 0.67, "step": 4367 }, { "epoch": 0.45, "grad_norm": 1.304554010941805, "learning_rate": 6.107767572190167e-06, "loss": 0.5844, "step": 4368 }, { "epoch": 0.45, "grad_norm": 1.3426152098396202, "learning_rate": 6.106154963752592e-06, "loss": 0.6221, "step": 4369 }, { "epoch": 0.45, "grad_norm": 1.4590745472108984, "learning_rate": 6.104542234322104e-06, "loss": 0.6715, "step": 4370 }, { "epoch": 0.45, "grad_norm": 1.30220789818174, "learning_rate": 6.1029293840751025e-06, "loss": 0.6106, "step": 4371 }, { "epoch": 0.45, "grad_norm": 1.3996533197078025, "learning_rate": 6.101316413188005e-06, "loss": 0.6177, "step": 4372 }, { "epoch": 0.45, "grad_norm": 1.4262767627728603, "learning_rate": 6.099703321837243e-06, "loss": 0.6148, "step": 4373 }, { "epoch": 0.45, "grad_norm": 2.0131921841788794, "learning_rate": 6.098090110199255e-06, "loss": 0.668, "step": 4374 }, { "epoch": 0.45, "grad_norm": 1.4122398491328412, "learning_rate": 6.0964767784505e-06, "loss": 0.6475, "step": 4375 }, { "epoch": 0.45, "grad_norm": 1.4201409964350913, "learning_rate": 6.094863326767443e-06, "loss": 0.628, "step": 4376 }, { "epoch": 0.45, "grad_norm": 1.3254944166583589, "learning_rate": 6.093249755326569e-06, "loss": 0.5819, "step": 4377 }, { "epoch": 0.45, "grad_norm": 1.4661350107234823, "learning_rate": 6.091636064304372e-06, "loss": 0.5827, "step": 4378 }, { "epoch": 0.45, "grad_norm": 1.3225129792438368, "learning_rate": 6.090022253877359e-06, "loss": 0.5607, "step": 4379 }, { "epoch": 0.45, "grad_norm": 1.5417772101438425, "learning_rate": 6.0884083242220515e-06, "loss": 0.7325, "step": 4380 }, { "epoch": 0.45, "grad_norm": 1.588494385469769, "learning_rate": 6.086794275514985e-06, "loss": 0.5752, "step": 4381 }, { "epoch": 0.45, "grad_norm": 1.4938723394443552, "learning_rate": 6.085180107932706e-06, "loss": 0.574, "step": 4382 }, { "epoch": 0.45, "grad_norm": 1.2138767157519668, "learning_rate": 6.083565821651772e-06, "loss": 0.5801, "step": 4383 }, { "epoch": 0.45, "grad_norm": 1.5094057962593315, "learning_rate": 6.0819514168487595e-06, "loss": 0.694, "step": 4384 }, { "epoch": 0.45, "grad_norm": 1.5360860361620514, "learning_rate": 6.080336893700251e-06, "loss": 0.6552, "step": 4385 }, { "epoch": 0.45, "grad_norm": 1.5358640835631865, "learning_rate": 6.07872225238285e-06, "loss": 0.7069, "step": 4386 }, { "epoch": 0.45, "grad_norm": 1.4710672130328064, "learning_rate": 6.0771074930731665e-06, "loss": 0.6388, "step": 4387 }, { "epoch": 0.45, "grad_norm": 1.254809575101268, "learning_rate": 6.075492615947824e-06, "loss": 0.5334, "step": 4388 }, { "epoch": 0.45, "grad_norm": 1.4137126460484988, "learning_rate": 6.073877621183462e-06, "loss": 0.6133, "step": 4389 }, { "epoch": 0.45, "grad_norm": 1.837912625023607, "learning_rate": 6.072262508956727e-06, "loss": 0.7073, "step": 4390 }, { "epoch": 0.45, "grad_norm": 1.5274700387790903, "learning_rate": 6.070647279444291e-06, "loss": 0.6911, "step": 4391 }, { "epoch": 0.45, "grad_norm": 1.2864944237993239, "learning_rate": 6.069031932822822e-06, "loss": 0.6471, "step": 4392 }, { "epoch": 0.45, "grad_norm": 1.3768914895703046, "learning_rate": 6.067416469269012e-06, "loss": 0.5559, "step": 4393 }, { "epoch": 0.45, "grad_norm": 1.9731214112007232, "learning_rate": 6.065800888959563e-06, "loss": 0.6811, "step": 4394 }, { "epoch": 0.45, "grad_norm": 1.4346121579930546, "learning_rate": 6.0641851920711905e-06, "loss": 0.6854, "step": 4395 }, { "epoch": 0.45, "grad_norm": 1.3601343262194359, "learning_rate": 6.06256937878062e-06, "loss": 0.5701, "step": 4396 }, { "epoch": 0.45, "grad_norm": 1.3530873779199624, "learning_rate": 6.060953449264592e-06, "loss": 0.6367, "step": 4397 }, { "epoch": 0.45, "grad_norm": 1.3854532784104951, "learning_rate": 6.059337403699862e-06, "loss": 0.6228, "step": 4398 }, { "epoch": 0.45, "grad_norm": 1.329077040580198, "learning_rate": 6.057721242263191e-06, "loss": 0.6052, "step": 4399 }, { "epoch": 0.45, "grad_norm": 1.2796084221786255, "learning_rate": 6.056104965131362e-06, "loss": 0.5328, "step": 4400 }, { "epoch": 0.45, "grad_norm": 1.366413367927911, "learning_rate": 6.054488572481161e-06, "loss": 0.6097, "step": 4401 }, { "epoch": 0.45, "grad_norm": 1.1672120428146349, "learning_rate": 6.052872064489396e-06, "loss": 0.7407, "step": 4402 }, { "epoch": 0.45, "grad_norm": 1.641160116243557, "learning_rate": 6.05125544133288e-06, "loss": 0.6554, "step": 4403 }, { "epoch": 0.45, "grad_norm": 1.4703358170222613, "learning_rate": 6.049638703188443e-06, "loss": 0.6681, "step": 4404 }, { "epoch": 0.45, "grad_norm": 1.4744455465414297, "learning_rate": 6.048021850232925e-06, "loss": 0.6375, "step": 4405 }, { "epoch": 0.45, "grad_norm": 1.5454880418912231, "learning_rate": 6.046404882643183e-06, "loss": 0.5795, "step": 4406 }, { "epoch": 0.45, "grad_norm": 1.6020579429324615, "learning_rate": 6.04478780059608e-06, "loss": 0.6448, "step": 4407 }, { "epoch": 0.45, "grad_norm": 1.7228837713434328, "learning_rate": 6.043170604268496e-06, "loss": 0.6779, "step": 4408 }, { "epoch": 0.45, "grad_norm": 1.3991007637443307, "learning_rate": 6.041553293837323e-06, "loss": 0.6189, "step": 4409 }, { "epoch": 0.45, "grad_norm": 1.554611233910772, "learning_rate": 6.0399358694794655e-06, "loss": 0.6803, "step": 4410 }, { "epoch": 0.45, "grad_norm": 1.536894801648476, "learning_rate": 6.038318331371836e-06, "loss": 0.6069, "step": 4411 }, { "epoch": 0.45, "grad_norm": 1.4009436264299528, "learning_rate": 6.036700679691368e-06, "loss": 0.6529, "step": 4412 }, { "epoch": 0.45, "grad_norm": 1.3723684621068073, "learning_rate": 6.0350829146150015e-06, "loss": 0.6532, "step": 4413 }, { "epoch": 0.45, "grad_norm": 1.4761625721819236, "learning_rate": 6.033465036319688e-06, "loss": 0.6434, "step": 4414 }, { "epoch": 0.45, "grad_norm": 3.476646819812137, "learning_rate": 6.031847044982395e-06, "loss": 0.6917, "step": 4415 }, { "epoch": 0.45, "grad_norm": 1.3491795802534232, "learning_rate": 6.030228940780101e-06, "loss": 0.6389, "step": 4416 }, { "epoch": 0.45, "grad_norm": 1.3246675416375835, "learning_rate": 6.028610723889797e-06, "loss": 0.6059, "step": 4417 }, { "epoch": 0.45, "grad_norm": 1.3623926447345611, "learning_rate": 6.026992394488483e-06, "loss": 0.5871, "step": 4418 }, { "epoch": 0.45, "grad_norm": 1.466650288795115, "learning_rate": 6.0253739527531795e-06, "loss": 0.6966, "step": 4419 }, { "epoch": 0.45, "grad_norm": 1.2611406940622045, "learning_rate": 6.023755398860911e-06, "loss": 0.6788, "step": 4420 }, { "epoch": 0.45, "grad_norm": 1.5346850421085751, "learning_rate": 6.022136732988717e-06, "loss": 0.6406, "step": 4421 }, { "epoch": 0.45, "grad_norm": 1.6512264612337277, "learning_rate": 6.020517955313651e-06, "loss": 0.5433, "step": 4422 }, { "epoch": 0.45, "grad_norm": 1.368773695977286, "learning_rate": 6.018899066012776e-06, "loss": 0.6403, "step": 4423 }, { "epoch": 0.45, "grad_norm": 1.4498280230533076, "learning_rate": 6.0172800652631706e-06, "loss": 0.6444, "step": 4424 }, { "epoch": 0.45, "grad_norm": 1.5966762953114109, "learning_rate": 6.015660953241919e-06, "loss": 0.6794, "step": 4425 }, { "epoch": 0.45, "grad_norm": 1.4350736604843053, "learning_rate": 6.01404173012613e-06, "loss": 0.7145, "step": 4426 }, { "epoch": 0.45, "grad_norm": 1.4265299347054075, "learning_rate": 6.012422396092911e-06, "loss": 0.5765, "step": 4427 }, { "epoch": 0.45, "grad_norm": 1.344525184560144, "learning_rate": 6.010802951319387e-06, "loss": 0.6967, "step": 4428 }, { "epoch": 0.45, "grad_norm": 1.6444097457352131, "learning_rate": 6.009183395982697e-06, "loss": 0.6684, "step": 4429 }, { "epoch": 0.45, "grad_norm": 1.501659754710972, "learning_rate": 6.00756373025999e-06, "loss": 0.7268, "step": 4430 }, { "epoch": 0.45, "grad_norm": 1.4388184870243013, "learning_rate": 6.005943954328429e-06, "loss": 0.6262, "step": 4431 }, { "epoch": 0.45, "grad_norm": 1.385196370341548, "learning_rate": 6.004324068365186e-06, "loss": 0.6632, "step": 4432 }, { "epoch": 0.45, "grad_norm": 1.466527269566031, "learning_rate": 6.002704072547445e-06, "loss": 0.594, "step": 4433 }, { "epoch": 0.45, "grad_norm": 1.2896848090722628, "learning_rate": 6.001083967052408e-06, "loss": 0.7128, "step": 4434 }, { "epoch": 0.45, "grad_norm": 1.5816774524256612, "learning_rate": 5.9994637520572795e-06, "loss": 0.6304, "step": 4435 }, { "epoch": 0.45, "grad_norm": 1.314024934789067, "learning_rate": 5.997843427739285e-06, "loss": 0.7345, "step": 4436 }, { "epoch": 0.45, "grad_norm": 1.5248985573733327, "learning_rate": 5.996222994275656e-06, "loss": 0.8105, "step": 4437 }, { "epoch": 0.45, "grad_norm": 1.3788099675775147, "learning_rate": 5.9946024518436404e-06, "loss": 0.612, "step": 4438 }, { "epoch": 0.45, "grad_norm": 1.2882428009378317, "learning_rate": 5.992981800620492e-06, "loss": 0.6164, "step": 4439 }, { "epoch": 0.45, "grad_norm": 1.4202913937917019, "learning_rate": 5.991361040783482e-06, "loss": 0.6876, "step": 4440 }, { "epoch": 0.45, "grad_norm": 1.5974204980013786, "learning_rate": 5.989740172509893e-06, "loss": 0.7983, "step": 4441 }, { "epoch": 0.45, "grad_norm": 1.5617276833965363, "learning_rate": 5.988119195977015e-06, "loss": 0.6341, "step": 4442 }, { "epoch": 0.45, "grad_norm": 1.4491708991181795, "learning_rate": 5.986498111362155e-06, "loss": 0.6988, "step": 4443 }, { "epoch": 0.45, "grad_norm": 1.5340912821194281, "learning_rate": 5.98487691884263e-06, "loss": 0.7207, "step": 4444 }, { "epoch": 0.45, "grad_norm": 1.5312644809962337, "learning_rate": 5.983255618595767e-06, "loss": 0.5685, "step": 4445 }, { "epoch": 0.45, "grad_norm": 1.4981511440187818, "learning_rate": 5.981634210798907e-06, "loss": 0.6955, "step": 4446 }, { "epoch": 0.45, "grad_norm": 1.4969072733479165, "learning_rate": 5.9800126956294e-06, "loss": 0.7093, "step": 4447 }, { "epoch": 0.45, "grad_norm": 1.3385962046959232, "learning_rate": 5.978391073264614e-06, "loss": 0.7041, "step": 4448 }, { "epoch": 0.45, "grad_norm": 1.4366036150663544, "learning_rate": 5.976769343881922e-06, "loss": 0.6452, "step": 4449 }, { "epoch": 0.45, "grad_norm": 1.472430280751867, "learning_rate": 5.9751475076587115e-06, "loss": 0.6857, "step": 4450 }, { "epoch": 0.45, "grad_norm": 1.471749019296, "learning_rate": 5.973525564772381e-06, "loss": 0.7471, "step": 4451 }, { "epoch": 0.45, "grad_norm": 1.2470626783529988, "learning_rate": 5.971903515400342e-06, "loss": 0.6142, "step": 4452 }, { "epoch": 0.45, "grad_norm": 1.153474878570765, "learning_rate": 5.970281359720017e-06, "loss": 0.5446, "step": 4453 }, { "epoch": 0.45, "grad_norm": 2.6862765719436936, "learning_rate": 5.968659097908836e-06, "loss": 0.6356, "step": 4454 }, { "epoch": 0.45, "grad_norm": 1.272735348529763, "learning_rate": 5.967036730144252e-06, "loss": 0.6845, "step": 4455 }, { "epoch": 0.46, "grad_norm": 1.680575012551999, "learning_rate": 5.965414256603716e-06, "loss": 0.66, "step": 4456 }, { "epoch": 0.46, "grad_norm": 1.2813935029723869, "learning_rate": 5.963791677464696e-06, "loss": 0.7101, "step": 4457 }, { "epoch": 0.46, "grad_norm": 1.405213236685378, "learning_rate": 5.9621689929046765e-06, "loss": 0.6083, "step": 4458 }, { "epoch": 0.46, "grad_norm": 1.5022827002715136, "learning_rate": 5.960546203101148e-06, "loss": 0.6383, "step": 4459 }, { "epoch": 0.46, "grad_norm": 1.6014181250463955, "learning_rate": 5.958923308231612e-06, "loss": 0.7415, "step": 4460 }, { "epoch": 0.46, "grad_norm": 1.396942234343013, "learning_rate": 5.957300308473583e-06, "loss": 0.6343, "step": 4461 }, { "epoch": 0.46, "grad_norm": 1.7184120068339512, "learning_rate": 5.95567720400459e-06, "loss": 0.7387, "step": 4462 }, { "epoch": 0.46, "grad_norm": 1.5419744927382062, "learning_rate": 5.954053995002168e-06, "loss": 0.5683, "step": 4463 }, { "epoch": 0.46, "grad_norm": 1.5121728348018886, "learning_rate": 5.952430681643867e-06, "loss": 0.6719, "step": 4464 }, { "epoch": 0.46, "grad_norm": 1.162014153649989, "learning_rate": 5.950807264107248e-06, "loss": 0.6055, "step": 4465 }, { "epoch": 0.46, "grad_norm": 1.268484528814523, "learning_rate": 5.9491837425698815e-06, "loss": 0.5817, "step": 4466 }, { "epoch": 0.46, "grad_norm": 1.2005470942721108, "learning_rate": 5.947560117209353e-06, "loss": 0.5978, "step": 4467 }, { "epoch": 0.46, "grad_norm": 1.4715236979830615, "learning_rate": 5.9459363882032536e-06, "loss": 0.6362, "step": 4468 }, { "epoch": 0.46, "grad_norm": 1.5438301827733325, "learning_rate": 5.9443125557291945e-06, "loss": 0.6812, "step": 4469 }, { "epoch": 0.46, "grad_norm": 1.3872900990104091, "learning_rate": 5.9426886199647885e-06, "loss": 0.7003, "step": 4470 }, { "epoch": 0.46, "grad_norm": 1.5408014664165202, "learning_rate": 5.9410645810876644e-06, "loss": 0.6309, "step": 4471 }, { "epoch": 0.46, "grad_norm": 1.5905093106838024, "learning_rate": 5.939440439275464e-06, "loss": 0.7393, "step": 4472 }, { "epoch": 0.46, "grad_norm": 1.3285244817725705, "learning_rate": 5.937816194705838e-06, "loss": 0.6786, "step": 4473 }, { "epoch": 0.46, "grad_norm": 1.265066637498177, "learning_rate": 5.936191847556449e-06, "loss": 0.625, "step": 4474 }, { "epoch": 0.46, "grad_norm": 2.110552629312104, "learning_rate": 5.934567398004967e-06, "loss": 0.5692, "step": 4475 }, { "epoch": 0.46, "grad_norm": 1.4820347534191531, "learning_rate": 5.932942846229082e-06, "loss": 0.6538, "step": 4476 }, { "epoch": 0.46, "grad_norm": 1.4783024367865367, "learning_rate": 5.931318192406486e-06, "loss": 0.6233, "step": 4477 }, { "epoch": 0.46, "grad_norm": 1.398088580160598, "learning_rate": 5.9296934367148895e-06, "loss": 0.7634, "step": 4478 }, { "epoch": 0.46, "grad_norm": 1.6145604070457975, "learning_rate": 5.928068579332008e-06, "loss": 0.6501, "step": 4479 }, { "epoch": 0.46, "grad_norm": 1.681467696234303, "learning_rate": 5.926443620435572e-06, "loss": 0.6515, "step": 4480 }, { "epoch": 0.46, "grad_norm": 1.417741724654837, "learning_rate": 5.924818560203323e-06, "loss": 0.7182, "step": 4481 }, { "epoch": 0.46, "grad_norm": 1.7682993295268175, "learning_rate": 5.923193398813009e-06, "loss": 0.6828, "step": 4482 }, { "epoch": 0.46, "grad_norm": 1.4793164546179827, "learning_rate": 5.9215681364423975e-06, "loss": 0.6243, "step": 4483 }, { "epoch": 0.46, "grad_norm": 1.546642512706988, "learning_rate": 5.919942773269258e-06, "loss": 0.7517, "step": 4484 }, { "epoch": 0.46, "grad_norm": 1.4128950790248274, "learning_rate": 5.918317309471378e-06, "loss": 0.6672, "step": 4485 }, { "epoch": 0.46, "grad_norm": 1.4871479955162246, "learning_rate": 5.9166917452265505e-06, "loss": 0.6351, "step": 4486 }, { "epoch": 0.46, "grad_norm": 1.4185251569443986, "learning_rate": 5.915066080712585e-06, "loss": 0.6445, "step": 4487 }, { "epoch": 0.46, "grad_norm": 1.2754041621685737, "learning_rate": 5.913440316107298e-06, "loss": 0.698, "step": 4488 }, { "epoch": 0.46, "grad_norm": 1.7250572677702485, "learning_rate": 5.911814451588517e-06, "loss": 0.6981, "step": 4489 }, { "epoch": 0.46, "grad_norm": 1.4735958873802715, "learning_rate": 5.910188487334084e-06, "loss": 0.5878, "step": 4490 }, { "epoch": 0.46, "grad_norm": 1.4537888865963822, "learning_rate": 5.908562423521848e-06, "loss": 0.7455, "step": 4491 }, { "epoch": 0.46, "grad_norm": 2.0107773177480284, "learning_rate": 5.9069362603296686e-06, "loss": 0.7701, "step": 4492 }, { "epoch": 0.46, "grad_norm": 1.3335531187546334, "learning_rate": 5.9053099979354214e-06, "loss": 0.56, "step": 4493 }, { "epoch": 0.46, "grad_norm": 1.5092761491815079, "learning_rate": 5.903683636516987e-06, "loss": 0.6288, "step": 4494 }, { "epoch": 0.46, "grad_norm": 1.766495789107194, "learning_rate": 5.902057176252262e-06, "loss": 0.629, "step": 4495 }, { "epoch": 0.46, "grad_norm": 1.7335756636743453, "learning_rate": 5.900430617319146e-06, "loss": 0.6733, "step": 4496 }, { "epoch": 0.46, "grad_norm": 1.5399125872453177, "learning_rate": 5.89880395989556e-06, "loss": 0.7264, "step": 4497 }, { "epoch": 0.46, "grad_norm": 1.3776180034146692, "learning_rate": 5.897177204159428e-06, "loss": 0.7408, "step": 4498 }, { "epoch": 0.46, "grad_norm": 1.353361673222786, "learning_rate": 5.895550350288685e-06, "loss": 0.6564, "step": 4499 }, { "epoch": 0.46, "grad_norm": 1.6172475236330695, "learning_rate": 5.893923398461283e-06, "loss": 0.6249, "step": 4500 }, { "epoch": 0.46, "grad_norm": 1.3282147865680682, "learning_rate": 5.892296348855177e-06, "loss": 0.6304, "step": 4501 }, { "epoch": 0.46, "grad_norm": 1.348423290960829, "learning_rate": 5.890669201648339e-06, "loss": 0.558, "step": 4502 }, { "epoch": 0.46, "grad_norm": 1.4257735929866011, "learning_rate": 5.889041957018745e-06, "loss": 0.6274, "step": 4503 }, { "epoch": 0.46, "grad_norm": 1.4093398146857987, "learning_rate": 5.8874146151443885e-06, "loss": 0.6481, "step": 4504 }, { "epoch": 0.46, "grad_norm": 1.3311436115399804, "learning_rate": 5.8857871762032715e-06, "loss": 0.7209, "step": 4505 }, { "epoch": 0.46, "grad_norm": 1.3247538783731712, "learning_rate": 5.884159640373402e-06, "loss": 0.594, "step": 4506 }, { "epoch": 0.46, "grad_norm": 1.2179577637137653, "learning_rate": 5.882532007832805e-06, "loss": 0.5255, "step": 4507 }, { "epoch": 0.46, "grad_norm": 1.4494995230108423, "learning_rate": 5.880904278759514e-06, "loss": 0.6655, "step": 4508 }, { "epoch": 0.46, "grad_norm": 1.4629156412383166, "learning_rate": 5.879276453331571e-06, "loss": 0.7414, "step": 4509 }, { "epoch": 0.46, "grad_norm": 1.6075709734891366, "learning_rate": 5.877648531727028e-06, "loss": 0.731, "step": 4510 }, { "epoch": 0.46, "grad_norm": 1.450974800391085, "learning_rate": 5.876020514123955e-06, "loss": 0.5803, "step": 4511 }, { "epoch": 0.46, "grad_norm": 1.5436631526956195, "learning_rate": 5.874392400700423e-06, "loss": 0.6818, "step": 4512 }, { "epoch": 0.46, "grad_norm": 1.4829134490913358, "learning_rate": 5.872764191634518e-06, "loss": 0.6746, "step": 4513 }, { "epoch": 0.46, "grad_norm": 1.5176275988586112, "learning_rate": 5.8711358871043365e-06, "loss": 0.6571, "step": 4514 }, { "epoch": 0.46, "grad_norm": 1.649380928525928, "learning_rate": 5.869507487287985e-06, "loss": 0.6639, "step": 4515 }, { "epoch": 0.46, "grad_norm": 1.5401447819723462, "learning_rate": 5.8678789923635815e-06, "loss": 0.569, "step": 4516 }, { "epoch": 0.46, "grad_norm": 1.3739040711441566, "learning_rate": 5.86625040250925e-06, "loss": 0.6379, "step": 4517 }, { "epoch": 0.46, "grad_norm": 1.4840202070465824, "learning_rate": 5.864621717903133e-06, "loss": 0.6562, "step": 4518 }, { "epoch": 0.46, "grad_norm": 1.5893157327586478, "learning_rate": 5.862992938723373e-06, "loss": 0.7687, "step": 4519 }, { "epoch": 0.46, "grad_norm": 1.4231163197136623, "learning_rate": 5.8613640651481315e-06, "loss": 0.6719, "step": 4520 }, { "epoch": 0.46, "grad_norm": 1.1480272738660917, "learning_rate": 5.859735097355577e-06, "loss": 0.5962, "step": 4521 }, { "epoch": 0.46, "grad_norm": 1.4649796632773124, "learning_rate": 5.858106035523888e-06, "loss": 0.7547, "step": 4522 }, { "epoch": 0.46, "grad_norm": 1.348989043499635, "learning_rate": 5.856476879831256e-06, "loss": 0.6005, "step": 4523 }, { "epoch": 0.46, "grad_norm": 1.437018405470859, "learning_rate": 5.854847630455875e-06, "loss": 0.6558, "step": 4524 }, { "epoch": 0.46, "grad_norm": 2.7500790204412398, "learning_rate": 5.853218287575961e-06, "loss": 0.7332, "step": 4525 }, { "epoch": 0.46, "grad_norm": 1.283016007155159, "learning_rate": 5.85158885136973e-06, "loss": 0.669, "step": 4526 }, { "epoch": 0.46, "grad_norm": 1.5679457274596653, "learning_rate": 5.849959322015414e-06, "loss": 0.6491, "step": 4527 }, { "epoch": 0.46, "grad_norm": 1.8162893286706008, "learning_rate": 5.848329699691253e-06, "loss": 0.6481, "step": 4528 }, { "epoch": 0.46, "grad_norm": 1.5012480592752866, "learning_rate": 5.8466999845754964e-06, "loss": 0.6405, "step": 4529 }, { "epoch": 0.46, "grad_norm": 1.6597343201537873, "learning_rate": 5.845070176846408e-06, "loss": 0.7152, "step": 4530 }, { "epoch": 0.46, "grad_norm": 1.5694440521110906, "learning_rate": 5.843440276682254e-06, "loss": 0.6097, "step": 4531 }, { "epoch": 0.46, "grad_norm": 1.6748731602530578, "learning_rate": 5.84181028426132e-06, "loss": 0.786, "step": 4532 }, { "epoch": 0.46, "grad_norm": 1.562300586035439, "learning_rate": 5.840180199761894e-06, "loss": 0.818, "step": 4533 }, { "epoch": 0.46, "grad_norm": 1.4117420818370494, "learning_rate": 5.8385500233622785e-06, "loss": 0.5919, "step": 4534 }, { "epoch": 0.46, "grad_norm": 1.3444076310643922, "learning_rate": 5.836919755240784e-06, "loss": 0.6688, "step": 4535 }, { "epoch": 0.46, "grad_norm": 1.5042793200695674, "learning_rate": 5.835289395575732e-06, "loss": 0.6739, "step": 4536 }, { "epoch": 0.46, "grad_norm": 1.3443287735385796, "learning_rate": 5.833658944545455e-06, "loss": 0.7492, "step": 4537 }, { "epoch": 0.46, "grad_norm": 1.4310644270045583, "learning_rate": 5.83202840232829e-06, "loss": 0.7077, "step": 4538 }, { "epoch": 0.46, "grad_norm": 1.3249079358494944, "learning_rate": 5.830397769102595e-06, "loss": 0.605, "step": 4539 }, { "epoch": 0.46, "grad_norm": 1.4358308835407343, "learning_rate": 5.828767045046725e-06, "loss": 0.6775, "step": 4540 }, { "epoch": 0.46, "grad_norm": 1.8314197761506188, "learning_rate": 5.827136230339053e-06, "loss": 0.6081, "step": 4541 }, { "epoch": 0.46, "grad_norm": 1.6769046441319413, "learning_rate": 5.825505325157962e-06, "loss": 0.7706, "step": 4542 }, { "epoch": 0.46, "grad_norm": 1.3035517888025032, "learning_rate": 5.82387432968184e-06, "loss": 0.7311, "step": 4543 }, { "epoch": 0.46, "grad_norm": 1.808914994086022, "learning_rate": 5.822243244089091e-06, "loss": 0.6374, "step": 4544 }, { "epoch": 0.46, "grad_norm": 1.4911188265448583, "learning_rate": 5.820612068558121e-06, "loss": 0.6358, "step": 4545 }, { "epoch": 0.46, "grad_norm": 1.5856427007113345, "learning_rate": 5.818980803267355e-06, "loss": 0.6655, "step": 4546 }, { "epoch": 0.46, "grad_norm": 1.449821336143896, "learning_rate": 5.8173494483952205e-06, "loss": 0.652, "step": 4547 }, { "epoch": 0.46, "grad_norm": 1.3974174143232583, "learning_rate": 5.815718004120159e-06, "loss": 0.606, "step": 4548 }, { "epoch": 0.46, "grad_norm": 1.5420778940591136, "learning_rate": 5.81408647062062e-06, "loss": 0.6735, "step": 4549 }, { "epoch": 0.46, "grad_norm": 1.3453494636471537, "learning_rate": 5.812454848075063e-06, "loss": 0.5794, "step": 4550 }, { "epoch": 0.46, "grad_norm": 1.3876410980625016, "learning_rate": 5.810823136661958e-06, "loss": 0.7625, "step": 4551 }, { "epoch": 0.46, "grad_norm": 1.4967676408844923, "learning_rate": 5.8091913365597805e-06, "loss": 0.5804, "step": 4552 }, { "epoch": 0.46, "grad_norm": 1.5795757835555317, "learning_rate": 5.807559447947025e-06, "loss": 0.6246, "step": 4553 }, { "epoch": 0.47, "grad_norm": 1.7160148270661246, "learning_rate": 5.805927471002187e-06, "loss": 0.7041, "step": 4554 }, { "epoch": 0.47, "grad_norm": 1.3773557812535557, "learning_rate": 5.804295405903775e-06, "loss": 0.6766, "step": 4555 }, { "epoch": 0.47, "grad_norm": 1.803903642415365, "learning_rate": 5.802663252830305e-06, "loss": 0.5732, "step": 4556 }, { "epoch": 0.47, "grad_norm": 1.3145989557175193, "learning_rate": 5.801031011960307e-06, "loss": 0.5345, "step": 4557 }, { "epoch": 0.47, "grad_norm": 1.5935247736446219, "learning_rate": 5.799398683472317e-06, "loss": 0.58, "step": 4558 }, { "epoch": 0.47, "grad_norm": 1.3702710900029162, "learning_rate": 5.79776626754488e-06, "loss": 0.6805, "step": 4559 }, { "epoch": 0.47, "grad_norm": 1.7113886796647262, "learning_rate": 5.796133764356556e-06, "loss": 0.7562, "step": 4560 }, { "epoch": 0.47, "grad_norm": 1.4412711810309367, "learning_rate": 5.794501174085907e-06, "loss": 0.6205, "step": 4561 }, { "epoch": 0.47, "grad_norm": 1.4546153271935374, "learning_rate": 5.79286849691151e-06, "loss": 0.6296, "step": 4562 }, { "epoch": 0.47, "grad_norm": 1.617373593327276, "learning_rate": 5.791235733011949e-06, "loss": 0.6782, "step": 4563 }, { "epoch": 0.47, "grad_norm": 1.4855723205802522, "learning_rate": 5.789602882565818e-06, "loss": 0.6459, "step": 4564 }, { "epoch": 0.47, "grad_norm": 1.7933055194831198, "learning_rate": 5.787969945751722e-06, "loss": 0.6987, "step": 4565 }, { "epoch": 0.47, "grad_norm": 1.4794974341679485, "learning_rate": 5.786336922748272e-06, "loss": 0.7267, "step": 4566 }, { "epoch": 0.47, "grad_norm": 1.425094037113205, "learning_rate": 5.784703813734094e-06, "loss": 0.6194, "step": 4567 }, { "epoch": 0.47, "grad_norm": 1.34164894776839, "learning_rate": 5.783070618887816e-06, "loss": 0.6251, "step": 4568 }, { "epoch": 0.47, "grad_norm": 1.5593014467030082, "learning_rate": 5.781437338388082e-06, "loss": 0.6288, "step": 4569 }, { "epoch": 0.47, "grad_norm": 1.4298785550640394, "learning_rate": 5.77980397241354e-06, "loss": 0.7009, "step": 4570 }, { "epoch": 0.47, "grad_norm": 1.3535685948333227, "learning_rate": 5.778170521142854e-06, "loss": 0.5625, "step": 4571 }, { "epoch": 0.47, "grad_norm": 1.5634825812991497, "learning_rate": 5.776536984754691e-06, "loss": 0.602, "step": 4572 }, { "epoch": 0.47, "grad_norm": 1.382360526328401, "learning_rate": 5.774903363427729e-06, "loss": 0.6635, "step": 4573 }, { "epoch": 0.47, "grad_norm": 1.3129208640643641, "learning_rate": 5.773269657340658e-06, "loss": 0.6374, "step": 4574 }, { "epoch": 0.47, "grad_norm": 1.4272541560005094, "learning_rate": 5.7716358666721725e-06, "loss": 0.5904, "step": 4575 }, { "epoch": 0.47, "grad_norm": 1.3921461620215083, "learning_rate": 5.770001991600982e-06, "loss": 0.6377, "step": 4576 }, { "epoch": 0.47, "grad_norm": 1.5187860842688328, "learning_rate": 5.7683680323058e-06, "loss": 0.7209, "step": 4577 }, { "epoch": 0.47, "grad_norm": 1.5870705904555393, "learning_rate": 5.766733988965354e-06, "loss": 0.6478, "step": 4578 }, { "epoch": 0.47, "grad_norm": 1.5202971738200008, "learning_rate": 5.765099861758376e-06, "loss": 0.6857, "step": 4579 }, { "epoch": 0.47, "grad_norm": 1.5767251191582985, "learning_rate": 5.763465650863609e-06, "loss": 0.6896, "step": 4580 }, { "epoch": 0.47, "grad_norm": 1.3288342889452391, "learning_rate": 5.761831356459807e-06, "loss": 0.6453, "step": 4581 }, { "epoch": 0.47, "grad_norm": 1.3147330521794183, "learning_rate": 5.76019697872573e-06, "loss": 0.5908, "step": 4582 }, { "epoch": 0.47, "grad_norm": 1.3867027075136535, "learning_rate": 5.758562517840151e-06, "loss": 0.6514, "step": 4583 }, { "epoch": 0.47, "grad_norm": 1.380708421417263, "learning_rate": 5.756927973981847e-06, "loss": 0.6092, "step": 4584 }, { "epoch": 0.47, "grad_norm": 1.3153188692113225, "learning_rate": 5.7552933473296105e-06, "loss": 0.702, "step": 4585 }, { "epoch": 0.47, "grad_norm": 1.4814901853596438, "learning_rate": 5.753658638062237e-06, "loss": 0.6578, "step": 4586 }, { "epoch": 0.47, "grad_norm": 1.49188616474066, "learning_rate": 5.752023846358532e-06, "loss": 0.6277, "step": 4587 }, { "epoch": 0.47, "grad_norm": 1.5015436087295795, "learning_rate": 5.750388972397315e-06, "loss": 0.7267, "step": 4588 }, { "epoch": 0.47, "grad_norm": 1.393019018840603, "learning_rate": 5.748754016357408e-06, "loss": 0.641, "step": 4589 }, { "epoch": 0.47, "grad_norm": 1.410860759578179, "learning_rate": 5.747118978417647e-06, "loss": 0.6719, "step": 4590 }, { "epoch": 0.47, "grad_norm": 1.3373905642458286, "learning_rate": 5.745483858756875e-06, "loss": 0.6566, "step": 4591 }, { "epoch": 0.47, "grad_norm": 2.506915864740002, "learning_rate": 5.743848657553943e-06, "loss": 0.6557, "step": 4592 }, { "epoch": 0.47, "grad_norm": 1.3039254795819613, "learning_rate": 5.742213374987712e-06, "loss": 0.5803, "step": 4593 }, { "epoch": 0.47, "grad_norm": 1.57027257922558, "learning_rate": 5.74057801123705e-06, "loss": 0.6661, "step": 4594 }, { "epoch": 0.47, "grad_norm": 1.746239423143022, "learning_rate": 5.738942566480839e-06, "loss": 0.6796, "step": 4595 }, { "epoch": 0.47, "grad_norm": 1.4392124968811078, "learning_rate": 5.7373070408979644e-06, "loss": 0.6726, "step": 4596 }, { "epoch": 0.47, "grad_norm": 1.6080713452582727, "learning_rate": 5.7356714346673225e-06, "loss": 0.6941, "step": 4597 }, { "epoch": 0.47, "grad_norm": 1.5021908392618775, "learning_rate": 5.734035747967819e-06, "loss": 0.6023, "step": 4598 }, { "epoch": 0.47, "grad_norm": 1.6182764836149335, "learning_rate": 5.732399980978366e-06, "loss": 0.5874, "step": 4599 }, { "epoch": 0.47, "grad_norm": 1.5015973454526628, "learning_rate": 5.73076413387789e-06, "loss": 0.6156, "step": 4600 }, { "epoch": 0.47, "grad_norm": 1.2628201380476385, "learning_rate": 5.729128206845317e-06, "loss": 0.6221, "step": 4601 }, { "epoch": 0.47, "grad_norm": 1.634960018440488, "learning_rate": 5.727492200059593e-06, "loss": 0.7386, "step": 4602 }, { "epoch": 0.47, "grad_norm": 1.4864654721516934, "learning_rate": 5.725856113699664e-06, "loss": 0.7374, "step": 4603 }, { "epoch": 0.47, "grad_norm": 1.3325552700846703, "learning_rate": 5.724219947944486e-06, "loss": 0.6778, "step": 4604 }, { "epoch": 0.47, "grad_norm": 1.3503946875478345, "learning_rate": 5.722583702973028e-06, "loss": 0.5959, "step": 4605 }, { "epoch": 0.47, "grad_norm": 1.681352350627633, "learning_rate": 5.720947378964264e-06, "loss": 0.7041, "step": 4606 }, { "epoch": 0.47, "grad_norm": 1.4348938386491008, "learning_rate": 5.719310976097179e-06, "loss": 0.6482, "step": 4607 }, { "epoch": 0.47, "grad_norm": 1.399276164833572, "learning_rate": 5.717674494550762e-06, "loss": 0.6017, "step": 4608 }, { "epoch": 0.47, "grad_norm": 1.3477085566246239, "learning_rate": 5.716037934504017e-06, "loss": 0.6343, "step": 4609 }, { "epoch": 0.47, "grad_norm": 1.452617618137928, "learning_rate": 5.714401296135952e-06, "loss": 0.6049, "step": 4610 }, { "epoch": 0.47, "grad_norm": 1.4123441482738313, "learning_rate": 5.712764579625585e-06, "loss": 0.6607, "step": 4611 }, { "epoch": 0.47, "grad_norm": 1.7435952165242914, "learning_rate": 5.711127785151942e-06, "loss": 0.7758, "step": 4612 }, { "epoch": 0.47, "grad_norm": 1.4558662935683715, "learning_rate": 5.70949091289406e-06, "loss": 0.6232, "step": 4613 }, { "epoch": 0.47, "grad_norm": 1.427722127025702, "learning_rate": 5.707853963030982e-06, "loss": 0.7175, "step": 4614 }, { "epoch": 0.47, "grad_norm": 1.403419661991379, "learning_rate": 5.7062169357417576e-06, "loss": 0.6357, "step": 4615 }, { "epoch": 0.47, "grad_norm": 1.2210664512197995, "learning_rate": 5.70457983120545e-06, "loss": 0.6268, "step": 4616 }, { "epoch": 0.47, "grad_norm": 1.5797855933218936, "learning_rate": 5.7029426496011275e-06, "loss": 0.7297, "step": 4617 }, { "epoch": 0.47, "grad_norm": 1.2529783177135072, "learning_rate": 5.701305391107868e-06, "loss": 0.6372, "step": 4618 }, { "epoch": 0.47, "grad_norm": 1.4528918523012748, "learning_rate": 5.6996680559047545e-06, "loss": 0.6325, "step": 4619 }, { "epoch": 0.47, "grad_norm": 1.7613488616209838, "learning_rate": 5.698030644170886e-06, "loss": 0.707, "step": 4620 }, { "epoch": 0.47, "grad_norm": 1.3698954242801253, "learning_rate": 5.696393156085361e-06, "loss": 0.6843, "step": 4621 }, { "epoch": 0.47, "grad_norm": 1.485359178962199, "learning_rate": 5.6947555918272925e-06, "loss": 0.6206, "step": 4622 }, { "epoch": 0.47, "grad_norm": 2.1143746022642875, "learning_rate": 5.6931179515758e-06, "loss": 0.6386, "step": 4623 }, { "epoch": 0.47, "grad_norm": 1.572931196272959, "learning_rate": 5.69148023551001e-06, "loss": 0.671, "step": 4624 }, { "epoch": 0.47, "grad_norm": 1.5418632328872384, "learning_rate": 5.689842443809057e-06, "loss": 0.7215, "step": 4625 }, { "epoch": 0.47, "grad_norm": 1.6494028210050884, "learning_rate": 5.688204576652089e-06, "loss": 0.5905, "step": 4626 }, { "epoch": 0.47, "grad_norm": 1.6522258786038935, "learning_rate": 5.686566634218255e-06, "loss": 0.7028, "step": 4627 }, { "epoch": 0.47, "grad_norm": 1.3376870821839875, "learning_rate": 5.684928616686718e-06, "loss": 0.6474, "step": 4628 }, { "epoch": 0.47, "grad_norm": 1.5672803041887464, "learning_rate": 5.683290524236644e-06, "loss": 0.6138, "step": 4629 }, { "epoch": 0.47, "grad_norm": 1.4489443961602604, "learning_rate": 5.681652357047214e-06, "loss": 0.6807, "step": 4630 }, { "epoch": 0.47, "grad_norm": 1.448641645014608, "learning_rate": 5.6800141152976096e-06, "loss": 0.6859, "step": 4631 }, { "epoch": 0.47, "grad_norm": 1.3058028119489533, "learning_rate": 5.678375799167026e-06, "loss": 0.5968, "step": 4632 }, { "epoch": 0.47, "grad_norm": 1.615509126595657, "learning_rate": 5.676737408834664e-06, "loss": 0.6573, "step": 4633 }, { "epoch": 0.47, "grad_norm": 1.4374582076141174, "learning_rate": 5.675098944479734e-06, "loss": 0.5894, "step": 4634 }, { "epoch": 0.47, "grad_norm": 1.273994865286417, "learning_rate": 5.6734604062814535e-06, "loss": 0.5936, "step": 4635 }, { "epoch": 0.47, "grad_norm": 1.3585943488675425, "learning_rate": 5.671821794419046e-06, "loss": 0.6825, "step": 4636 }, { "epoch": 0.47, "grad_norm": 1.4105609897685507, "learning_rate": 5.6701831090717504e-06, "loss": 0.6915, "step": 4637 }, { "epoch": 0.47, "grad_norm": 1.2968012327114882, "learning_rate": 5.668544350418805e-06, "loss": 0.574, "step": 4638 }, { "epoch": 0.47, "grad_norm": 1.3938229345980504, "learning_rate": 5.66690551863946e-06, "loss": 0.5995, "step": 4639 }, { "epoch": 0.47, "grad_norm": 1.413283662951143, "learning_rate": 5.665266613912972e-06, "loss": 0.6228, "step": 4640 }, { "epoch": 0.47, "grad_norm": 1.459534730525245, "learning_rate": 5.663627636418611e-06, "loss": 0.633, "step": 4641 }, { "epoch": 0.47, "grad_norm": 1.4878831477270604, "learning_rate": 5.661988586335647e-06, "loss": 0.6805, "step": 4642 }, { "epoch": 0.47, "grad_norm": 1.5426563722802489, "learning_rate": 5.660349463843363e-06, "loss": 0.661, "step": 4643 }, { "epoch": 0.47, "grad_norm": 1.340290334592526, "learning_rate": 5.658710269121051e-06, "loss": 0.5699, "step": 4644 }, { "epoch": 0.47, "grad_norm": 1.2891502938836727, "learning_rate": 5.657071002348004e-06, "loss": 0.5734, "step": 4645 }, { "epoch": 0.47, "grad_norm": 1.4618600325527786, "learning_rate": 5.6554316637035315e-06, "loss": 0.6555, "step": 4646 }, { "epoch": 0.47, "grad_norm": 1.450633546737094, "learning_rate": 5.653792253366945e-06, "loss": 0.6094, "step": 4647 }, { "epoch": 0.47, "grad_norm": 1.831297707896994, "learning_rate": 5.6521527715175665e-06, "loss": 0.6225, "step": 4648 }, { "epoch": 0.47, "grad_norm": 1.4399504314701819, "learning_rate": 5.650513218334724e-06, "loss": 0.6418, "step": 4649 }, { "epoch": 0.47, "grad_norm": 1.674345710846292, "learning_rate": 5.6488735939977545e-06, "loss": 0.7129, "step": 4650 }, { "epoch": 0.47, "grad_norm": 1.3165958582965662, "learning_rate": 5.647233898686005e-06, "loss": 0.6315, "step": 4651 }, { "epoch": 0.48, "grad_norm": 1.455275749828691, "learning_rate": 5.645594132578824e-06, "loss": 0.6685, "step": 4652 }, { "epoch": 0.48, "grad_norm": 1.6634279743112097, "learning_rate": 5.643954295855575e-06, "loss": 0.7292, "step": 4653 }, { "epoch": 0.48, "grad_norm": 1.3086128717903336, "learning_rate": 5.6423143886956235e-06, "loss": 0.6441, "step": 4654 }, { "epoch": 0.48, "grad_norm": 1.3100260468775982, "learning_rate": 5.640674411278345e-06, "loss": 0.5851, "step": 4655 }, { "epoch": 0.48, "grad_norm": 1.4926723658464722, "learning_rate": 5.639034363783126e-06, "loss": 0.589, "step": 4656 }, { "epoch": 0.48, "grad_norm": 1.4882065603700612, "learning_rate": 5.637394246389352e-06, "loss": 0.6067, "step": 4657 }, { "epoch": 0.48, "grad_norm": 1.6459493884116196, "learning_rate": 5.635754059276428e-06, "loss": 0.6275, "step": 4658 }, { "epoch": 0.48, "grad_norm": 1.3686623203512043, "learning_rate": 5.634113802623755e-06, "loss": 0.6602, "step": 4659 }, { "epoch": 0.48, "grad_norm": 1.4506524451860658, "learning_rate": 5.632473476610748e-06, "loss": 0.6494, "step": 4660 }, { "epoch": 0.48, "grad_norm": 1.5609332516348071, "learning_rate": 5.630833081416829e-06, "loss": 0.6072, "step": 4661 }, { "epoch": 0.48, "grad_norm": 1.3579901756362016, "learning_rate": 5.629192617221427e-06, "loss": 0.541, "step": 4662 }, { "epoch": 0.48, "grad_norm": 1.4723000499702383, "learning_rate": 5.62755208420398e-06, "loss": 0.7009, "step": 4663 }, { "epoch": 0.48, "grad_norm": 1.3390442121942572, "learning_rate": 5.625911482543928e-06, "loss": 0.54, "step": 4664 }, { "epoch": 0.48, "grad_norm": 1.2693642106636258, "learning_rate": 5.624270812420726e-06, "loss": 0.516, "step": 4665 }, { "epoch": 0.48, "grad_norm": 1.417105098361127, "learning_rate": 5.622630074013831e-06, "loss": 0.7357, "step": 4666 }, { "epoch": 0.48, "grad_norm": 1.5198436931029573, "learning_rate": 5.62098926750271e-06, "loss": 0.6956, "step": 4667 }, { "epoch": 0.48, "grad_norm": 1.3697354778401114, "learning_rate": 5.619348393066838e-06, "loss": 0.6249, "step": 4668 }, { "epoch": 0.48, "grad_norm": 1.4681075258924885, "learning_rate": 5.617707450885695e-06, "loss": 0.6661, "step": 4669 }, { "epoch": 0.48, "grad_norm": 1.5029765808901585, "learning_rate": 5.616066441138772e-06, "loss": 0.612, "step": 4670 }, { "epoch": 0.48, "grad_norm": 1.632112252061343, "learning_rate": 5.61442536400556e-06, "loss": 0.6507, "step": 4671 }, { "epoch": 0.48, "grad_norm": 1.5602045623731928, "learning_rate": 5.612784219665568e-06, "loss": 0.7215, "step": 4672 }, { "epoch": 0.48, "grad_norm": 1.5400567936330998, "learning_rate": 5.611143008298305e-06, "loss": 0.8179, "step": 4673 }, { "epoch": 0.48, "grad_norm": 1.242234126265599, "learning_rate": 5.609501730083289e-06, "loss": 0.7034, "step": 4674 }, { "epoch": 0.48, "grad_norm": 1.3156413742436566, "learning_rate": 5.6078603852000445e-06, "loss": 0.5694, "step": 4675 }, { "epoch": 0.48, "grad_norm": 1.3042780658043716, "learning_rate": 5.606218973828106e-06, "loss": 0.6566, "step": 4676 }, { "epoch": 0.48, "grad_norm": 1.4300190496389604, "learning_rate": 5.604577496147014e-06, "loss": 0.6349, "step": 4677 }, { "epoch": 0.48, "grad_norm": 1.43892171527889, "learning_rate": 5.602935952336313e-06, "loss": 0.6396, "step": 4678 }, { "epoch": 0.48, "grad_norm": 1.404737787471969, "learning_rate": 5.601294342575558e-06, "loss": 0.705, "step": 4679 }, { "epoch": 0.48, "grad_norm": 1.334318264361282, "learning_rate": 5.599652667044315e-06, "loss": 0.5631, "step": 4680 }, { "epoch": 0.48, "grad_norm": 4.923202472669649, "learning_rate": 5.598010925922148e-06, "loss": 0.8031, "step": 4681 }, { "epoch": 0.48, "grad_norm": 1.311144981990001, "learning_rate": 5.596369119388636e-06, "loss": 0.6013, "step": 4682 }, { "epoch": 0.48, "grad_norm": 1.4297101812945503, "learning_rate": 5.5947272476233614e-06, "loss": 0.5619, "step": 4683 }, { "epoch": 0.48, "grad_norm": 1.4452960095253458, "learning_rate": 5.593085310805916e-06, "loss": 0.6152, "step": 4684 }, { "epoch": 0.48, "grad_norm": 1.3124103165754868, "learning_rate": 5.591443309115893e-06, "loss": 0.6455, "step": 4685 }, { "epoch": 0.48, "grad_norm": 1.4356651599880141, "learning_rate": 5.5898012427329015e-06, "loss": 0.7171, "step": 4686 }, { "epoch": 0.48, "grad_norm": 1.3340178313100832, "learning_rate": 5.588159111836553e-06, "loss": 0.6049, "step": 4687 }, { "epoch": 0.48, "grad_norm": 1.5723035777291108, "learning_rate": 5.5865169166064635e-06, "loss": 0.6677, "step": 4688 }, { "epoch": 0.48, "grad_norm": 1.4583596929634413, "learning_rate": 5.584874657222261e-06, "loss": 0.589, "step": 4689 }, { "epoch": 0.48, "grad_norm": 1.3514944516453937, "learning_rate": 5.583232333863578e-06, "loss": 0.7061, "step": 4690 }, { "epoch": 0.48, "grad_norm": 1.4668085316302302, "learning_rate": 5.581589946710054e-06, "loss": 0.6498, "step": 4691 }, { "epoch": 0.48, "grad_norm": 1.3877899138300456, "learning_rate": 5.579947495941335e-06, "loss": 0.6195, "step": 4692 }, { "epoch": 0.48, "grad_norm": 1.5812270116477583, "learning_rate": 5.578304981737076e-06, "loss": 0.6846, "step": 4693 }, { "epoch": 0.48, "grad_norm": 1.4293633305722597, "learning_rate": 5.576662404276939e-06, "loss": 0.5731, "step": 4694 }, { "epoch": 0.48, "grad_norm": 1.3940479725383987, "learning_rate": 5.575019763740588e-06, "loss": 0.7002, "step": 4695 }, { "epoch": 0.48, "grad_norm": 1.3673301884440567, "learning_rate": 5.5733770603077e-06, "loss": 0.705, "step": 4696 }, { "epoch": 0.48, "grad_norm": 1.3673747721942142, "learning_rate": 5.571734294157955e-06, "loss": 0.7028, "step": 4697 }, { "epoch": 0.48, "grad_norm": 1.3773110301665312, "learning_rate": 5.5700914654710444e-06, "loss": 0.6875, "step": 4698 }, { "epoch": 0.48, "grad_norm": 1.4468490123425133, "learning_rate": 5.56844857442666e-06, "loss": 0.6776, "step": 4699 }, { "epoch": 0.48, "grad_norm": 1.5086429638053218, "learning_rate": 5.566805621204504e-06, "loss": 0.7003, "step": 4700 }, { "epoch": 0.48, "grad_norm": 1.3721264766103198, "learning_rate": 5.5651626059842865e-06, "loss": 0.653, "step": 4701 }, { "epoch": 0.48, "grad_norm": 1.3033241410973744, "learning_rate": 5.563519528945723e-06, "loss": 0.6241, "step": 4702 }, { "epoch": 0.48, "grad_norm": 1.4657880313289906, "learning_rate": 5.561876390268535e-06, "loss": 0.7634, "step": 4703 }, { "epoch": 0.48, "grad_norm": 1.2761956729724775, "learning_rate": 5.560233190132451e-06, "loss": 0.5872, "step": 4704 }, { "epoch": 0.48, "grad_norm": 1.6929606052374333, "learning_rate": 5.558589928717208e-06, "loss": 0.6324, "step": 4705 }, { "epoch": 0.48, "grad_norm": 1.3631060823717096, "learning_rate": 5.556946606202548e-06, "loss": 0.6209, "step": 4706 }, { "epoch": 0.48, "grad_norm": 1.6229540054213443, "learning_rate": 5.555303222768218e-06, "loss": 0.6406, "step": 4707 }, { "epoch": 0.48, "grad_norm": 1.3518221034653601, "learning_rate": 5.553659778593978e-06, "loss": 0.6582, "step": 4708 }, { "epoch": 0.48, "grad_norm": 1.3972504877189216, "learning_rate": 5.552016273859587e-06, "loss": 0.7059, "step": 4709 }, { "epoch": 0.48, "grad_norm": 1.5358963592415427, "learning_rate": 5.5503727087448155e-06, "loss": 0.7373, "step": 4710 }, { "epoch": 0.48, "grad_norm": 1.4157638329895263, "learning_rate": 5.548729083429439e-06, "loss": 0.6936, "step": 4711 }, { "epoch": 0.48, "grad_norm": 1.375804610873168, "learning_rate": 5.54708539809324e-06, "loss": 0.6909, "step": 4712 }, { "epoch": 0.48, "grad_norm": 1.4150827653422036, "learning_rate": 5.545441652916008e-06, "loss": 0.6877, "step": 4713 }, { "epoch": 0.48, "grad_norm": 1.4310901280081436, "learning_rate": 5.543797848077535e-06, "loss": 0.6809, "step": 4714 }, { "epoch": 0.48, "grad_norm": 1.921237086885028, "learning_rate": 5.542153983757627e-06, "loss": 0.6952, "step": 4715 }, { "epoch": 0.48, "grad_norm": 1.3582071416489634, "learning_rate": 5.54051006013609e-06, "loss": 0.6614, "step": 4716 }, { "epoch": 0.48, "grad_norm": 1.4447554497684787, "learning_rate": 5.538866077392738e-06, "loss": 0.677, "step": 4717 }, { "epoch": 0.48, "grad_norm": 1.4360576564710552, "learning_rate": 5.537222035707396e-06, "loss": 0.6835, "step": 4718 }, { "epoch": 0.48, "grad_norm": 1.4606814804612522, "learning_rate": 5.5355779352598884e-06, "loss": 0.7372, "step": 4719 }, { "epoch": 0.48, "grad_norm": 1.2903988915912707, "learning_rate": 5.533933776230052e-06, "loss": 0.6093, "step": 4720 }, { "epoch": 0.48, "grad_norm": 1.5185466146576168, "learning_rate": 5.532289558797722e-06, "loss": 0.6976, "step": 4721 }, { "epoch": 0.48, "grad_norm": 1.65297779079868, "learning_rate": 5.530645283142754e-06, "loss": 0.6238, "step": 4722 }, { "epoch": 0.48, "grad_norm": 1.412895277712733, "learning_rate": 5.529000949444994e-06, "loss": 0.6345, "step": 4723 }, { "epoch": 0.48, "grad_norm": 1.3321898514774992, "learning_rate": 5.527356557884305e-06, "loss": 0.6182, "step": 4724 }, { "epoch": 0.48, "grad_norm": 1.356463687660733, "learning_rate": 5.525712108640553e-06, "loss": 0.609, "step": 4725 }, { "epoch": 0.48, "grad_norm": 1.6270071060340259, "learning_rate": 5.524067601893609e-06, "loss": 0.6104, "step": 4726 }, { "epoch": 0.48, "grad_norm": 1.3313878612429024, "learning_rate": 5.5224230378233544e-06, "loss": 0.615, "step": 4727 }, { "epoch": 0.48, "grad_norm": 1.7809367277748342, "learning_rate": 5.52077841660967e-06, "loss": 0.6767, "step": 4728 }, { "epoch": 0.48, "grad_norm": 2.332731801043278, "learning_rate": 5.519133738432451e-06, "loss": 0.6448, "step": 4729 }, { "epoch": 0.48, "grad_norm": 1.5960767895884047, "learning_rate": 5.5174890034715925e-06, "loss": 0.7116, "step": 4730 }, { "epoch": 0.48, "grad_norm": 1.5799666630714198, "learning_rate": 5.515844211906999e-06, "loss": 0.5765, "step": 4731 }, { "epoch": 0.48, "grad_norm": 1.4433226451391972, "learning_rate": 5.514199363918578e-06, "loss": 0.6557, "step": 4732 }, { "epoch": 0.48, "grad_norm": 1.3344736702891262, "learning_rate": 5.5125544596862505e-06, "loss": 0.6478, "step": 4733 }, { "epoch": 0.48, "grad_norm": 1.4598626121054714, "learning_rate": 5.510909499389935e-06, "loss": 0.6856, "step": 4734 }, { "epoch": 0.48, "grad_norm": 1.55184590248852, "learning_rate": 5.509264483209558e-06, "loss": 0.6861, "step": 4735 }, { "epoch": 0.48, "grad_norm": 1.4288769551641982, "learning_rate": 5.507619411325058e-06, "loss": 0.7058, "step": 4736 }, { "epoch": 0.48, "grad_norm": 2.1737570952127476, "learning_rate": 5.505974283916373e-06, "loss": 0.6456, "step": 4737 }, { "epoch": 0.48, "grad_norm": 1.6122583246673743, "learning_rate": 5.504329101163451e-06, "loss": 0.7123, "step": 4738 }, { "epoch": 0.48, "grad_norm": 1.4939878523602534, "learning_rate": 5.502683863246243e-06, "loss": 0.6183, "step": 4739 }, { "epoch": 0.48, "grad_norm": 1.4875952588707593, "learning_rate": 5.501038570344709e-06, "loss": 0.6369, "step": 4740 }, { "epoch": 0.48, "grad_norm": 1.437445130609055, "learning_rate": 5.499393222638813e-06, "loss": 0.6511, "step": 4741 }, { "epoch": 0.48, "grad_norm": 1.1966473632036132, "learning_rate": 5.497747820308524e-06, "loss": 0.5509, "step": 4742 }, { "epoch": 0.48, "grad_norm": 1.6513418887584383, "learning_rate": 5.496102363533824e-06, "loss": 0.6615, "step": 4743 }, { "epoch": 0.48, "grad_norm": 1.662957774226166, "learning_rate": 5.4944568524946895e-06, "loss": 0.6626, "step": 4744 }, { "epoch": 0.48, "grad_norm": 1.410341626945113, "learning_rate": 5.4928112873711115e-06, "loss": 0.6077, "step": 4745 }, { "epoch": 0.48, "grad_norm": 1.4230169006904503, "learning_rate": 5.491165668343084e-06, "loss": 0.6277, "step": 4746 }, { "epoch": 0.48, "grad_norm": 1.695004309875544, "learning_rate": 5.4895199955906085e-06, "loss": 0.7394, "step": 4747 }, { "epoch": 0.48, "grad_norm": 1.5692709037114718, "learning_rate": 5.487874269293692e-06, "loss": 0.6305, "step": 4748 }, { "epoch": 0.48, "grad_norm": 1.8367193964056683, "learning_rate": 5.486228489632342e-06, "loss": 0.664, "step": 4749 }, { "epoch": 0.49, "grad_norm": 1.471962551284706, "learning_rate": 5.48458265678658e-06, "loss": 0.7462, "step": 4750 }, { "epoch": 0.49, "grad_norm": 1.6296205989141659, "learning_rate": 5.48293677093643e-06, "loss": 0.5806, "step": 4751 }, { "epoch": 0.49, "grad_norm": 1.4009751548390612, "learning_rate": 5.48129083226192e-06, "loss": 0.6472, "step": 4752 }, { "epoch": 0.49, "grad_norm": 1.5118380024723626, "learning_rate": 5.479644840943085e-06, "loss": 0.6716, "step": 4753 }, { "epoch": 0.49, "grad_norm": 1.5450331721447546, "learning_rate": 5.477998797159967e-06, "loss": 0.6924, "step": 4754 }, { "epoch": 0.49, "grad_norm": 1.3305903888337751, "learning_rate": 5.476352701092615e-06, "loss": 0.6397, "step": 4755 }, { "epoch": 0.49, "grad_norm": 1.3555500803291691, "learning_rate": 5.474706552921074e-06, "loss": 0.7272, "step": 4756 }, { "epoch": 0.49, "grad_norm": 1.4179421705961592, "learning_rate": 5.473060352825412e-06, "loss": 0.6063, "step": 4757 }, { "epoch": 0.49, "grad_norm": 1.4374228265343516, "learning_rate": 5.471414100985686e-06, "loss": 0.6483, "step": 4758 }, { "epoch": 0.49, "grad_norm": 1.2978880029845339, "learning_rate": 5.469767797581969e-06, "loss": 0.5869, "step": 4759 }, { "epoch": 0.49, "grad_norm": 1.573847558684022, "learning_rate": 5.468121442794333e-06, "loss": 0.7613, "step": 4760 }, { "epoch": 0.49, "grad_norm": 1.4899098775230393, "learning_rate": 5.466475036802861e-06, "loss": 0.6391, "step": 4761 }, { "epoch": 0.49, "grad_norm": 1.4745003009843776, "learning_rate": 5.464828579787641e-06, "loss": 0.676, "step": 4762 }, { "epoch": 0.49, "grad_norm": 1.5544422822939588, "learning_rate": 5.46318207192876e-06, "loss": 0.6391, "step": 4763 }, { "epoch": 0.49, "grad_norm": 1.4224561156761533, "learning_rate": 5.4615355134063205e-06, "loss": 0.6402, "step": 4764 }, { "epoch": 0.49, "grad_norm": 1.441864833640491, "learning_rate": 5.459888904400423e-06, "loss": 0.6649, "step": 4765 }, { "epoch": 0.49, "grad_norm": 1.6441572213710236, "learning_rate": 5.4582422450911775e-06, "loss": 0.8068, "step": 4766 }, { "epoch": 0.49, "grad_norm": 1.4959548600671624, "learning_rate": 5.456595535658696e-06, "loss": 0.7363, "step": 4767 }, { "epoch": 0.49, "grad_norm": 2.0497616033110426, "learning_rate": 5.454948776283101e-06, "loss": 0.6382, "step": 4768 }, { "epoch": 0.49, "grad_norm": 1.4886308207483885, "learning_rate": 5.4533019671445165e-06, "loss": 0.5564, "step": 4769 }, { "epoch": 0.49, "grad_norm": 1.5389356427133771, "learning_rate": 5.451655108423069e-06, "loss": 0.652, "step": 4770 }, { "epoch": 0.49, "grad_norm": 1.5571637941226666, "learning_rate": 5.450008200298902e-06, "loss": 0.6783, "step": 4771 }, { "epoch": 0.49, "grad_norm": 1.3444201215568818, "learning_rate": 5.448361242952151e-06, "loss": 0.7121, "step": 4772 }, { "epoch": 0.49, "grad_norm": 1.5649390306253053, "learning_rate": 5.446714236562963e-06, "loss": 0.6503, "step": 4773 }, { "epoch": 0.49, "grad_norm": 1.2708129175923253, "learning_rate": 5.445067181311492e-06, "loss": 0.6514, "step": 4774 }, { "epoch": 0.49, "grad_norm": 1.5000518020147466, "learning_rate": 5.443420077377895e-06, "loss": 0.6284, "step": 4775 }, { "epoch": 0.49, "grad_norm": 1.3749499134132084, "learning_rate": 5.441772924942337e-06, "loss": 0.6189, "step": 4776 }, { "epoch": 0.49, "grad_norm": 1.6284928595239705, "learning_rate": 5.4401257241849805e-06, "loss": 0.6652, "step": 4777 }, { "epoch": 0.49, "grad_norm": 1.4811617221296716, "learning_rate": 5.438478475286003e-06, "loss": 0.6448, "step": 4778 }, { "epoch": 0.49, "grad_norm": 1.4295039146078545, "learning_rate": 5.436831178425582e-06, "loss": 0.6196, "step": 4779 }, { "epoch": 0.49, "grad_norm": 1.3664697132695145, "learning_rate": 5.435183833783902e-06, "loss": 0.596, "step": 4780 }, { "epoch": 0.49, "grad_norm": 2.379577804845241, "learning_rate": 5.433536441541152e-06, "loss": 0.6344, "step": 4781 }, { "epoch": 0.49, "grad_norm": 1.3117855473599598, "learning_rate": 5.431889001877524e-06, "loss": 0.646, "step": 4782 }, { "epoch": 0.49, "grad_norm": 1.4533622794494483, "learning_rate": 5.430241514973221e-06, "loss": 0.7632, "step": 4783 }, { "epoch": 0.49, "grad_norm": 1.5155987506980264, "learning_rate": 5.428593981008443e-06, "loss": 0.7271, "step": 4784 }, { "epoch": 0.49, "grad_norm": 1.6236939659791372, "learning_rate": 5.4269464001634065e-06, "loss": 0.7922, "step": 4785 }, { "epoch": 0.49, "grad_norm": 2.1050941211354575, "learning_rate": 5.42529877261832e-06, "loss": 0.6014, "step": 4786 }, { "epoch": 0.49, "grad_norm": 1.3537525528469965, "learning_rate": 5.423651098553406e-06, "loss": 0.6827, "step": 4787 }, { "epoch": 0.49, "grad_norm": 1.4934010751504219, "learning_rate": 5.42200337814889e-06, "loss": 0.6184, "step": 4788 }, { "epoch": 0.49, "grad_norm": 1.606547674965262, "learning_rate": 5.420355611585002e-06, "loss": 0.7236, "step": 4789 }, { "epoch": 0.49, "grad_norm": 1.3645524260238635, "learning_rate": 5.418707799041978e-06, "loss": 0.6561, "step": 4790 }, { "epoch": 0.49, "grad_norm": 1.436574758579422, "learning_rate": 5.417059940700056e-06, "loss": 0.6869, "step": 4791 }, { "epoch": 0.49, "grad_norm": 1.3204480568432055, "learning_rate": 5.4154120367394825e-06, "loss": 0.6654, "step": 4792 }, { "epoch": 0.49, "grad_norm": 1.4635056381045932, "learning_rate": 5.4137640873405085e-06, "loss": 0.6278, "step": 4793 }, { "epoch": 0.49, "grad_norm": 1.381568698106322, "learning_rate": 5.412116092683387e-06, "loss": 0.6419, "step": 4794 }, { "epoch": 0.49, "grad_norm": 1.5044094398389394, "learning_rate": 5.410468052948381e-06, "loss": 0.6804, "step": 4795 }, { "epoch": 0.49, "grad_norm": 1.4904195149023278, "learning_rate": 5.408819968315754e-06, "loss": 0.608, "step": 4796 }, { "epoch": 0.49, "grad_norm": 1.3316568368541477, "learning_rate": 5.407171838965777e-06, "loss": 0.6041, "step": 4797 }, { "epoch": 0.49, "grad_norm": 1.313808960906077, "learning_rate": 5.4055236650787215e-06, "loss": 0.638, "step": 4798 }, { "epoch": 0.49, "grad_norm": 1.277631253828919, "learning_rate": 5.403875446834872e-06, "loss": 0.5312, "step": 4799 }, { "epoch": 0.49, "grad_norm": 1.4057154103545553, "learning_rate": 5.402227184414512e-06, "loss": 0.698, "step": 4800 }, { "epoch": 0.49, "grad_norm": 1.5728075601195475, "learning_rate": 5.4005788779979276e-06, "loss": 0.5921, "step": 4801 }, { "epoch": 0.49, "grad_norm": 1.623397089789035, "learning_rate": 5.398930527765416e-06, "loss": 0.6325, "step": 4802 }, { "epoch": 0.49, "grad_norm": 1.2471380421766265, "learning_rate": 5.397282133897276e-06, "loss": 0.5859, "step": 4803 }, { "epoch": 0.49, "grad_norm": 1.4747741332000908, "learning_rate": 5.395633696573812e-06, "loss": 0.7344, "step": 4804 }, { "epoch": 0.49, "grad_norm": 1.5292835680185866, "learning_rate": 5.39398521597533e-06, "loss": 0.6203, "step": 4805 }, { "epoch": 0.49, "grad_norm": 1.4129768967781435, "learning_rate": 5.392336692282146e-06, "loss": 0.6589, "step": 4806 }, { "epoch": 0.49, "grad_norm": 1.4565096934365693, "learning_rate": 5.390688125674578e-06, "loss": 0.6515, "step": 4807 }, { "epoch": 0.49, "grad_norm": 1.3510361855449315, "learning_rate": 5.389039516332947e-06, "loss": 0.7469, "step": 4808 }, { "epoch": 0.49, "grad_norm": 1.3831440250604279, "learning_rate": 5.387390864437581e-06, "loss": 0.6197, "step": 4809 }, { "epoch": 0.49, "grad_norm": 1.4084530825007386, "learning_rate": 5.385742170168811e-06, "loss": 0.5695, "step": 4810 }, { "epoch": 0.49, "grad_norm": 1.575569678635247, "learning_rate": 5.384093433706977e-06, "loss": 0.6193, "step": 4811 }, { "epoch": 0.49, "grad_norm": 1.5134594220704483, "learning_rate": 5.382444655232418e-06, "loss": 0.7197, "step": 4812 }, { "epoch": 0.49, "grad_norm": 1.5274119293004196, "learning_rate": 5.380795834925479e-06, "loss": 0.6509, "step": 4813 }, { "epoch": 0.49, "grad_norm": 1.6204782648503655, "learning_rate": 5.3791469729665125e-06, "loss": 0.6536, "step": 4814 }, { "epoch": 0.49, "grad_norm": 1.6066033237088118, "learning_rate": 5.377498069535872e-06, "loss": 0.6784, "step": 4815 }, { "epoch": 0.49, "grad_norm": 1.477017729282854, "learning_rate": 5.375849124813919e-06, "loss": 0.7583, "step": 4816 }, { "epoch": 0.49, "grad_norm": 1.3925585254416304, "learning_rate": 5.374200138981015e-06, "loss": 0.5744, "step": 4817 }, { "epoch": 0.49, "grad_norm": 1.4393331376639225, "learning_rate": 5.372551112217531e-06, "loss": 0.6214, "step": 4818 }, { "epoch": 0.49, "grad_norm": 1.2699506430544685, "learning_rate": 5.370902044703838e-06, "loss": 0.6333, "step": 4819 }, { "epoch": 0.49, "grad_norm": 1.3716668112900217, "learning_rate": 5.369252936620315e-06, "loss": 0.6409, "step": 4820 }, { "epoch": 0.49, "grad_norm": 1.6959372766577718, "learning_rate": 5.367603788147343e-06, "loss": 0.6183, "step": 4821 }, { "epoch": 0.49, "grad_norm": 1.3410094193116615, "learning_rate": 5.365954599465308e-06, "loss": 0.6801, "step": 4822 }, { "epoch": 0.49, "grad_norm": 1.4784616956475898, "learning_rate": 5.364305370754603e-06, "loss": 0.6928, "step": 4823 }, { "epoch": 0.49, "grad_norm": 1.630729988030891, "learning_rate": 5.362656102195621e-06, "loss": 0.7477, "step": 4824 }, { "epoch": 0.49, "grad_norm": 1.3591948580194353, "learning_rate": 5.361006793968764e-06, "loss": 0.5967, "step": 4825 }, { "epoch": 0.49, "grad_norm": 1.3463592174920267, "learning_rate": 5.359357446254431e-06, "loss": 0.6006, "step": 4826 }, { "epoch": 0.49, "grad_norm": 1.4370210979595217, "learning_rate": 5.357708059233036e-06, "loss": 0.6316, "step": 4827 }, { "epoch": 0.49, "grad_norm": 1.4039057111793276, "learning_rate": 5.3560586330849875e-06, "loss": 0.6224, "step": 4828 }, { "epoch": 0.49, "grad_norm": 1.4482061367610717, "learning_rate": 5.354409167990703e-06, "loss": 0.5942, "step": 4829 }, { "epoch": 0.49, "grad_norm": 1.4132091387637675, "learning_rate": 5.352759664130603e-06, "loss": 0.5901, "step": 4830 }, { "epoch": 0.49, "grad_norm": 1.399958005220816, "learning_rate": 5.351110121685116e-06, "loss": 0.6618, "step": 4831 }, { "epoch": 0.49, "grad_norm": 1.6627382075407133, "learning_rate": 5.349460540834669e-06, "loss": 0.6195, "step": 4832 }, { "epoch": 0.49, "grad_norm": 1.4178017927186264, "learning_rate": 5.347810921759694e-06, "loss": 0.6897, "step": 4833 }, { "epoch": 0.49, "grad_norm": 1.3540150465176688, "learning_rate": 5.346161264640633e-06, "loss": 0.6469, "step": 4834 }, { "epoch": 0.49, "grad_norm": 1.4426353647243362, "learning_rate": 5.344511569657923e-06, "loss": 0.6048, "step": 4835 }, { "epoch": 0.49, "grad_norm": 1.302308752300724, "learning_rate": 5.342861836992015e-06, "loss": 0.5657, "step": 4836 }, { "epoch": 0.49, "grad_norm": 1.3108741505372778, "learning_rate": 5.341212066823356e-06, "loss": 0.7079, "step": 4837 }, { "epoch": 0.49, "grad_norm": 1.47277776136647, "learning_rate": 5.339562259332401e-06, "loss": 0.7762, "step": 4838 }, { "epoch": 0.49, "grad_norm": 1.346263306096647, "learning_rate": 5.337912414699611e-06, "loss": 0.6026, "step": 4839 }, { "epoch": 0.49, "grad_norm": 1.3847764380637062, "learning_rate": 5.336262533105443e-06, "loss": 0.6133, "step": 4840 }, { "epoch": 0.49, "grad_norm": 1.3752249701427126, "learning_rate": 5.33461261473037e-06, "loss": 0.7272, "step": 4841 }, { "epoch": 0.49, "grad_norm": 1.618025465999984, "learning_rate": 5.332962659754859e-06, "loss": 0.6081, "step": 4842 }, { "epoch": 0.49, "grad_norm": 1.4209500385951124, "learning_rate": 5.331312668359384e-06, "loss": 0.6041, "step": 4843 }, { "epoch": 0.49, "grad_norm": 1.4573201449065822, "learning_rate": 5.329662640724426e-06, "loss": 0.6177, "step": 4844 }, { "epoch": 0.49, "grad_norm": 1.3988874672053122, "learning_rate": 5.328012577030466e-06, "loss": 0.7766, "step": 4845 }, { "epoch": 0.49, "grad_norm": 1.429808048576468, "learning_rate": 5.326362477457991e-06, "loss": 0.6288, "step": 4846 }, { "epoch": 0.49, "grad_norm": 1.2576883893827544, "learning_rate": 5.32471234218749e-06, "loss": 0.5851, "step": 4847 }, { "epoch": 0.5, "grad_norm": 1.3829318285662524, "learning_rate": 5.32306217139946e-06, "loss": 0.6712, "step": 4848 }, { "epoch": 0.5, "grad_norm": 1.453066169709974, "learning_rate": 5.321411965274397e-06, "loss": 0.6792, "step": 4849 }, { "epoch": 0.5, "grad_norm": 1.3389976660817726, "learning_rate": 5.3197617239928035e-06, "loss": 0.6034, "step": 4850 }, { "epoch": 0.5, "grad_norm": 1.3856956817941322, "learning_rate": 5.318111447735186e-06, "loss": 0.6453, "step": 4851 }, { "epoch": 0.5, "grad_norm": 1.3156434925863825, "learning_rate": 5.316461136682053e-06, "loss": 0.6473, "step": 4852 }, { "epoch": 0.5, "grad_norm": 1.5937199822564503, "learning_rate": 5.31481079101392e-06, "loss": 0.7736, "step": 4853 }, { "epoch": 0.5, "grad_norm": 1.4553941360460152, "learning_rate": 5.313160410911302e-06, "loss": 0.7369, "step": 4854 }, { "epoch": 0.5, "grad_norm": 1.4197715029717755, "learning_rate": 5.311509996554722e-06, "loss": 0.6175, "step": 4855 }, { "epoch": 0.5, "grad_norm": 1.5099451919179556, "learning_rate": 5.309859548124705e-06, "loss": 0.7557, "step": 4856 }, { "epoch": 0.5, "grad_norm": 1.621537085513966, "learning_rate": 5.308209065801777e-06, "loss": 0.6628, "step": 4857 }, { "epoch": 0.5, "grad_norm": 1.4066880026823605, "learning_rate": 5.306558549766473e-06, "loss": 0.6175, "step": 4858 }, { "epoch": 0.5, "grad_norm": 1.2945717766004121, "learning_rate": 5.304908000199328e-06, "loss": 0.5651, "step": 4859 }, { "epoch": 0.5, "grad_norm": 1.5768032281079287, "learning_rate": 5.303257417280883e-06, "loss": 0.6388, "step": 4860 }, { "epoch": 0.5, "grad_norm": 1.5509393290263633, "learning_rate": 5.3016068011916766e-06, "loss": 0.5429, "step": 4861 }, { "epoch": 0.5, "grad_norm": 1.812393345598888, "learning_rate": 5.299956152112261e-06, "loss": 0.7555, "step": 4862 }, { "epoch": 0.5, "grad_norm": 1.476930336947049, "learning_rate": 5.298305470223184e-06, "loss": 0.6767, "step": 4863 }, { "epoch": 0.5, "grad_norm": 1.43349028251896, "learning_rate": 5.296654755705001e-06, "loss": 0.714, "step": 4864 }, { "epoch": 0.5, "grad_norm": 1.7262355271309244, "learning_rate": 5.295004008738268e-06, "loss": 0.6877, "step": 4865 }, { "epoch": 0.5, "grad_norm": 1.4563811943512481, "learning_rate": 5.293353229503548e-06, "loss": 0.6976, "step": 4866 }, { "epoch": 0.5, "grad_norm": 1.623581719356513, "learning_rate": 5.291702418181406e-06, "loss": 0.627, "step": 4867 }, { "epoch": 0.5, "grad_norm": 1.4033317391233033, "learning_rate": 5.290051574952406e-06, "loss": 0.5899, "step": 4868 }, { "epoch": 0.5, "grad_norm": 1.4884655666411586, "learning_rate": 5.288400699997127e-06, "loss": 0.7742, "step": 4869 }, { "epoch": 0.5, "grad_norm": 1.5371630155871552, "learning_rate": 5.286749793496138e-06, "loss": 0.611, "step": 4870 }, { "epoch": 0.5, "grad_norm": 1.5384118229572163, "learning_rate": 5.28509885563002e-06, "loss": 0.7346, "step": 4871 }, { "epoch": 0.5, "grad_norm": 1.4444300170063997, "learning_rate": 5.283447886579355e-06, "loss": 0.751, "step": 4872 }, { "epoch": 0.5, "grad_norm": 1.4689708829042674, "learning_rate": 5.281796886524728e-06, "loss": 0.6544, "step": 4873 }, { "epoch": 0.5, "grad_norm": 1.4496307850983172, "learning_rate": 5.28014585564673e-06, "loss": 0.6519, "step": 4874 }, { "epoch": 0.5, "grad_norm": 1.5700606969040538, "learning_rate": 5.278494794125949e-06, "loss": 0.7177, "step": 4875 }, { "epoch": 0.5, "grad_norm": 1.4995220445423485, "learning_rate": 5.276843702142986e-06, "loss": 0.5947, "step": 4876 }, { "epoch": 0.5, "grad_norm": 1.4999066404841992, "learning_rate": 5.275192579878435e-06, "loss": 0.6319, "step": 4877 }, { "epoch": 0.5, "grad_norm": 1.5582096186927903, "learning_rate": 5.273541427512903e-06, "loss": 0.7911, "step": 4878 }, { "epoch": 0.5, "grad_norm": 1.4878308680173802, "learning_rate": 5.271890245226991e-06, "loss": 0.676, "step": 4879 }, { "epoch": 0.5, "grad_norm": 1.5103392035653038, "learning_rate": 5.27023903320131e-06, "loss": 0.6794, "step": 4880 }, { "epoch": 0.5, "grad_norm": 1.9746650516023623, "learning_rate": 5.268587791616473e-06, "loss": 0.5798, "step": 4881 }, { "epoch": 0.5, "grad_norm": 1.4620949042892102, "learning_rate": 5.266936520653093e-06, "loss": 0.5741, "step": 4882 }, { "epoch": 0.5, "grad_norm": 1.3841332057979168, "learning_rate": 5.265285220491791e-06, "loss": 0.5484, "step": 4883 }, { "epoch": 0.5, "grad_norm": 1.510469419421267, "learning_rate": 5.263633891313188e-06, "loss": 0.7423, "step": 4884 }, { "epoch": 0.5, "grad_norm": 2.0656757982019136, "learning_rate": 5.261982533297907e-06, "loss": 0.7504, "step": 4885 }, { "epoch": 0.5, "grad_norm": 1.5028265471858144, "learning_rate": 5.260331146626579e-06, "loss": 0.6552, "step": 4886 }, { "epoch": 0.5, "grad_norm": 1.6938394666735272, "learning_rate": 5.258679731479832e-06, "loss": 0.7098, "step": 4887 }, { "epoch": 0.5, "grad_norm": 1.4386880180135837, "learning_rate": 5.257028288038304e-06, "loss": 0.6522, "step": 4888 }, { "epoch": 0.5, "grad_norm": 1.9051239818850714, "learning_rate": 5.255376816482627e-06, "loss": 0.6261, "step": 4889 }, { "epoch": 0.5, "grad_norm": 1.459525781606902, "learning_rate": 5.253725316993449e-06, "loss": 0.6144, "step": 4890 }, { "epoch": 0.5, "grad_norm": 1.3564703184477644, "learning_rate": 5.252073789751406e-06, "loss": 0.6198, "step": 4891 }, { "epoch": 0.5, "grad_norm": 1.4472425339043185, "learning_rate": 5.25042223493715e-06, "loss": 0.6561, "step": 4892 }, { "epoch": 0.5, "grad_norm": 1.451610447298204, "learning_rate": 5.248770652731328e-06, "loss": 0.6131, "step": 4893 }, { "epoch": 0.5, "grad_norm": 1.33868186164231, "learning_rate": 5.247119043314592e-06, "loss": 0.7561, "step": 4894 }, { "epoch": 0.5, "grad_norm": 1.5041502480333568, "learning_rate": 5.245467406867601e-06, "loss": 0.7929, "step": 4895 }, { "epoch": 0.5, "grad_norm": 1.6371066009499233, "learning_rate": 5.243815743571009e-06, "loss": 0.6965, "step": 4896 }, { "epoch": 0.5, "grad_norm": 1.448466426099574, "learning_rate": 5.2421640536054815e-06, "loss": 0.6761, "step": 4897 }, { "epoch": 0.5, "grad_norm": 1.6189214713330438, "learning_rate": 5.240512337151681e-06, "loss": 0.6651, "step": 4898 }, { "epoch": 0.5, "grad_norm": 1.5637861280434928, "learning_rate": 5.238860594390274e-06, "loss": 0.6703, "step": 4899 }, { "epoch": 0.5, "grad_norm": 1.561371724698452, "learning_rate": 5.23720882550193e-06, "loss": 0.5894, "step": 4900 }, { "epoch": 0.5, "grad_norm": 1.4405976563447265, "learning_rate": 5.235557030667325e-06, "loss": 0.7116, "step": 4901 }, { "epoch": 0.5, "grad_norm": 1.378184283875972, "learning_rate": 5.233905210067134e-06, "loss": 0.6441, "step": 4902 }, { "epoch": 0.5, "grad_norm": 1.3387518720605938, "learning_rate": 5.2322533638820325e-06, "loss": 0.6162, "step": 4903 }, { "epoch": 0.5, "grad_norm": 1.5565866864734157, "learning_rate": 5.230601492292706e-06, "loss": 0.6636, "step": 4904 }, { "epoch": 0.5, "grad_norm": 1.4605452810776998, "learning_rate": 5.228949595479838e-06, "loss": 0.6935, "step": 4905 }, { "epoch": 0.5, "grad_norm": 1.7531901199725435, "learning_rate": 5.227297673624112e-06, "loss": 0.7132, "step": 4906 }, { "epoch": 0.5, "grad_norm": 1.4149366588299772, "learning_rate": 5.225645726906222e-06, "loss": 0.6954, "step": 4907 }, { "epoch": 0.5, "grad_norm": 1.3533281649979934, "learning_rate": 5.2239937555068575e-06, "loss": 0.5884, "step": 4908 }, { "epoch": 0.5, "grad_norm": 1.5472396329713392, "learning_rate": 5.222341759606717e-06, "loss": 0.6622, "step": 4909 }, { "epoch": 0.5, "grad_norm": 1.5024824639086527, "learning_rate": 5.220689739386493e-06, "loss": 0.6807, "step": 4910 }, { "epoch": 0.5, "grad_norm": 1.539268667270945, "learning_rate": 5.219037695026892e-06, "loss": 0.6493, "step": 4911 }, { "epoch": 0.5, "grad_norm": 1.4722375674340547, "learning_rate": 5.217385626708615e-06, "loss": 0.6685, "step": 4912 }, { "epoch": 0.5, "grad_norm": 1.523984329181406, "learning_rate": 5.215733534612364e-06, "loss": 0.726, "step": 4913 }, { "epoch": 0.5, "grad_norm": 1.618237822744375, "learning_rate": 5.214081418918852e-06, "loss": 0.5869, "step": 4914 }, { "epoch": 0.5, "grad_norm": 1.4431412375376191, "learning_rate": 5.212429279808788e-06, "loss": 0.6527, "step": 4915 }, { "epoch": 0.5, "grad_norm": 1.3536605682761118, "learning_rate": 5.2107771174628875e-06, "loss": 0.5981, "step": 4916 }, { "epoch": 0.5, "grad_norm": 1.3944625830447235, "learning_rate": 5.209124932061862e-06, "loss": 0.6827, "step": 4917 }, { "epoch": 0.5, "grad_norm": 1.460564071215536, "learning_rate": 5.2074727237864345e-06, "loss": 0.6637, "step": 4918 }, { "epoch": 0.5, "grad_norm": 1.5085065785715064, "learning_rate": 5.205820492817325e-06, "loss": 0.715, "step": 4919 }, { "epoch": 0.5, "grad_norm": 1.6411024341753189, "learning_rate": 5.204168239335255e-06, "loss": 0.6186, "step": 4920 }, { "epoch": 0.5, "grad_norm": 1.4668759128515063, "learning_rate": 5.202515963520953e-06, "loss": 0.6825, "step": 4921 }, { "epoch": 0.5, "grad_norm": 1.4166036268147675, "learning_rate": 5.200863665555147e-06, "loss": 0.6046, "step": 4922 }, { "epoch": 0.5, "grad_norm": 1.42791875656659, "learning_rate": 5.199211345618568e-06, "loss": 0.71, "step": 4923 }, { "epoch": 0.5, "grad_norm": 1.6196519020727638, "learning_rate": 5.1975590038919465e-06, "loss": 0.7252, "step": 4924 }, { "epoch": 0.5, "grad_norm": 1.515764278576832, "learning_rate": 5.1959066405560196e-06, "loss": 0.637, "step": 4925 }, { "epoch": 0.5, "grad_norm": 1.4876019214188532, "learning_rate": 5.194254255791529e-06, "loss": 0.7693, "step": 4926 }, { "epoch": 0.5, "grad_norm": 1.346712202919103, "learning_rate": 5.1926018497792105e-06, "loss": 0.6118, "step": 4927 }, { "epoch": 0.5, "grad_norm": 1.7457913899051165, "learning_rate": 5.190949422699808e-06, "loss": 0.6164, "step": 4928 }, { "epoch": 0.5, "grad_norm": 1.655314407846717, "learning_rate": 5.189296974734068e-06, "loss": 0.6371, "step": 4929 }, { "epoch": 0.5, "grad_norm": 1.5380294782151993, "learning_rate": 5.187644506062737e-06, "loss": 0.5816, "step": 4930 }, { "epoch": 0.5, "grad_norm": 1.5062284477503556, "learning_rate": 5.185992016866562e-06, "loss": 0.6904, "step": 4931 }, { "epoch": 0.5, "grad_norm": 1.6171764601650656, "learning_rate": 5.184339507326297e-06, "loss": 0.576, "step": 4932 }, { "epoch": 0.5, "grad_norm": 1.490220066755832, "learning_rate": 5.182686977622699e-06, "loss": 0.6607, "step": 4933 }, { "epoch": 0.5, "grad_norm": 1.4613266892502372, "learning_rate": 5.181034427936517e-06, "loss": 0.5739, "step": 4934 }, { "epoch": 0.5, "grad_norm": 1.7255279926592775, "learning_rate": 5.179381858448517e-06, "loss": 0.7641, "step": 4935 }, { "epoch": 0.5, "grad_norm": 1.2543577852029255, "learning_rate": 5.177729269339454e-06, "loss": 0.5799, "step": 4936 }, { "epoch": 0.5, "grad_norm": 1.4713971412811895, "learning_rate": 5.1760766607900945e-06, "loss": 0.7127, "step": 4937 }, { "epoch": 0.5, "grad_norm": 1.58430814264276, "learning_rate": 5.174424032981201e-06, "loss": 0.7192, "step": 4938 }, { "epoch": 0.5, "grad_norm": 1.5761140250720302, "learning_rate": 5.172771386093539e-06, "loss": 0.6687, "step": 4939 }, { "epoch": 0.5, "grad_norm": 1.5290662808740767, "learning_rate": 5.1711187203078826e-06, "loss": 0.6129, "step": 4940 }, { "epoch": 0.5, "grad_norm": 1.694053633367494, "learning_rate": 5.169466035804999e-06, "loss": 0.7329, "step": 4941 }, { "epoch": 0.5, "grad_norm": 1.4386760993905034, "learning_rate": 5.167813332765662e-06, "loss": 0.5501, "step": 4942 }, { "epoch": 0.5, "grad_norm": 1.4866105701078773, "learning_rate": 5.166160611370647e-06, "loss": 0.7582, "step": 4943 }, { "epoch": 0.5, "grad_norm": 1.4097553052515268, "learning_rate": 5.164507871800731e-06, "loss": 0.5422, "step": 4944 }, { "epoch": 0.5, "grad_norm": 1.581679452902672, "learning_rate": 5.1628551142366955e-06, "loss": 0.7124, "step": 4945 }, { "epoch": 0.51, "grad_norm": 1.6155746088901695, "learning_rate": 5.161202338859317e-06, "loss": 0.6364, "step": 4946 }, { "epoch": 0.51, "grad_norm": 1.5052107760564983, "learning_rate": 5.1595495458493826e-06, "loss": 0.6637, "step": 4947 }, { "epoch": 0.51, "grad_norm": 1.7688502753343571, "learning_rate": 5.157896735387677e-06, "loss": 0.6024, "step": 4948 }, { "epoch": 0.51, "grad_norm": 1.5360205590074685, "learning_rate": 5.1562439076549835e-06, "loss": 0.7544, "step": 4949 }, { "epoch": 0.51, "grad_norm": 1.3141522022129264, "learning_rate": 5.1545910628320935e-06, "loss": 0.6321, "step": 4950 }, { "epoch": 0.51, "grad_norm": 1.2045546564056198, "learning_rate": 5.1529382010997985e-06, "loss": 0.5583, "step": 4951 }, { "epoch": 0.51, "grad_norm": 1.6040701031475255, "learning_rate": 5.151285322638892e-06, "loss": 0.776, "step": 4952 }, { "epoch": 0.51, "grad_norm": 1.3231880970368761, "learning_rate": 5.1496324276301635e-06, "loss": 0.5943, "step": 4953 }, { "epoch": 0.51, "grad_norm": 1.5581848030347514, "learning_rate": 5.147979516254414e-06, "loss": 0.5922, "step": 4954 }, { "epoch": 0.51, "grad_norm": 1.4074897102601152, "learning_rate": 5.146326588692439e-06, "loss": 0.6438, "step": 4955 }, { "epoch": 0.51, "grad_norm": 1.5705324361043873, "learning_rate": 5.144673645125039e-06, "loss": 0.6393, "step": 4956 }, { "epoch": 0.51, "grad_norm": 1.3780184311979784, "learning_rate": 5.143020685733015e-06, "loss": 0.5673, "step": 4957 }, { "epoch": 0.51, "grad_norm": 1.3063613661768763, "learning_rate": 5.14136771069717e-06, "loss": 0.6139, "step": 4958 }, { "epoch": 0.51, "grad_norm": 1.7223331983410537, "learning_rate": 5.139714720198311e-06, "loss": 0.7079, "step": 4959 }, { "epoch": 0.51, "grad_norm": 1.3366566902806138, "learning_rate": 5.1380617144172415e-06, "loss": 0.5359, "step": 4960 }, { "epoch": 0.51, "grad_norm": 1.4690714397508604, "learning_rate": 5.136408693534773e-06, "loss": 0.7119, "step": 4961 }, { "epoch": 0.51, "grad_norm": 1.4234966109696647, "learning_rate": 5.1347556577317136e-06, "loss": 0.6613, "step": 4962 }, { "epoch": 0.51, "grad_norm": 1.573156090801882, "learning_rate": 5.133102607188875e-06, "loss": 0.6949, "step": 4963 }, { "epoch": 0.51, "grad_norm": 1.3583682758059634, "learning_rate": 5.131449542087069e-06, "loss": 0.6741, "step": 4964 }, { "epoch": 0.51, "grad_norm": 1.447830566483162, "learning_rate": 5.129796462607114e-06, "loss": 0.6182, "step": 4965 }, { "epoch": 0.51, "grad_norm": 1.6344414593106147, "learning_rate": 5.128143368929824e-06, "loss": 0.6091, "step": 4966 }, { "epoch": 0.51, "grad_norm": 1.498568914995713, "learning_rate": 5.126490261236015e-06, "loss": 0.6645, "step": 4967 }, { "epoch": 0.51, "grad_norm": 2.4292124655588414, "learning_rate": 5.124837139706511e-06, "loss": 0.6819, "step": 4968 }, { "epoch": 0.51, "grad_norm": 1.6605414424712297, "learning_rate": 5.123184004522129e-06, "loss": 0.6516, "step": 4969 }, { "epoch": 0.51, "grad_norm": 1.459178309421645, "learning_rate": 5.121530855863695e-06, "loss": 0.6264, "step": 4970 }, { "epoch": 0.51, "grad_norm": 1.5517332104224977, "learning_rate": 5.1198776939120285e-06, "loss": 0.6187, "step": 4971 }, { "epoch": 0.51, "grad_norm": 3.860931337750613, "learning_rate": 5.1182245188479585e-06, "loss": 0.6015, "step": 4972 }, { "epoch": 0.51, "grad_norm": 1.4840638931637455, "learning_rate": 5.116571330852311e-06, "loss": 0.6401, "step": 4973 }, { "epoch": 0.51, "grad_norm": 1.510711947453828, "learning_rate": 5.1149181301059116e-06, "loss": 0.7536, "step": 4974 }, { "epoch": 0.51, "grad_norm": 1.3186496474201146, "learning_rate": 5.113264916789594e-06, "loss": 0.6446, "step": 4975 }, { "epoch": 0.51, "grad_norm": 1.4425730773809915, "learning_rate": 5.111611691084187e-06, "loss": 0.6271, "step": 4976 }, { "epoch": 0.51, "grad_norm": 1.4087306256827101, "learning_rate": 5.109958453170524e-06, "loss": 0.5472, "step": 4977 }, { "epoch": 0.51, "grad_norm": 1.6974876784460309, "learning_rate": 5.108305203229437e-06, "loss": 0.7297, "step": 4978 }, { "epoch": 0.51, "grad_norm": 1.6410951995646648, "learning_rate": 5.106651941441761e-06, "loss": 0.748, "step": 4979 }, { "epoch": 0.51, "grad_norm": 1.4415938266368271, "learning_rate": 5.104998667988336e-06, "loss": 0.702, "step": 4980 }, { "epoch": 0.51, "grad_norm": 1.441534857351532, "learning_rate": 5.1033453830499935e-06, "loss": 0.6206, "step": 4981 }, { "epoch": 0.51, "grad_norm": 1.5560943515909176, "learning_rate": 5.101692086807578e-06, "loss": 0.7258, "step": 4982 }, { "epoch": 0.51, "grad_norm": 1.487390744032196, "learning_rate": 5.100038779441926e-06, "loss": 0.6913, "step": 4983 }, { "epoch": 0.51, "grad_norm": 1.348626860830838, "learning_rate": 5.09838546113388e-06, "loss": 0.5673, "step": 4984 }, { "epoch": 0.51, "grad_norm": 1.400664481574003, "learning_rate": 5.096732132064283e-06, "loss": 0.5949, "step": 4985 }, { "epoch": 0.51, "grad_norm": 1.4121842182937854, "learning_rate": 5.095078792413976e-06, "loss": 0.6635, "step": 4986 }, { "epoch": 0.51, "grad_norm": 1.5789745105650288, "learning_rate": 5.09342544236381e-06, "loss": 0.6418, "step": 4987 }, { "epoch": 0.51, "grad_norm": 1.3100668952082206, "learning_rate": 5.091772082094622e-06, "loss": 0.6164, "step": 4988 }, { "epoch": 0.51, "grad_norm": 1.3497087933675025, "learning_rate": 5.090118711787268e-06, "loss": 0.6285, "step": 4989 }, { "epoch": 0.51, "grad_norm": 1.5952472235366046, "learning_rate": 5.088465331622591e-06, "loss": 0.698, "step": 4990 }, { "epoch": 0.51, "grad_norm": 1.4284293059639757, "learning_rate": 5.08681194178144e-06, "loss": 0.7126, "step": 4991 }, { "epoch": 0.51, "grad_norm": 1.2838640569002355, "learning_rate": 5.085158542444667e-06, "loss": 0.5612, "step": 4992 }, { "epoch": 0.51, "grad_norm": 2.1569428736656593, "learning_rate": 5.083505133793123e-06, "loss": 0.6419, "step": 4993 }, { "epoch": 0.51, "grad_norm": 1.3210343878948967, "learning_rate": 5.081851716007662e-06, "loss": 0.6508, "step": 4994 }, { "epoch": 0.51, "grad_norm": 1.6642162376282041, "learning_rate": 5.080198289269133e-06, "loss": 0.7002, "step": 4995 }, { "epoch": 0.51, "grad_norm": 1.5969913008476408, "learning_rate": 5.078544853758395e-06, "loss": 0.6642, "step": 4996 }, { "epoch": 0.51, "grad_norm": 1.536537415433736, "learning_rate": 5.076891409656299e-06, "loss": 0.6805, "step": 4997 }, { "epoch": 0.51, "grad_norm": 1.4208534886263187, "learning_rate": 5.075237957143706e-06, "loss": 0.5801, "step": 4998 }, { "epoch": 0.51, "grad_norm": 1.8760876146126815, "learning_rate": 5.073584496401469e-06, "loss": 0.6804, "step": 4999 }, { "epoch": 0.51, "grad_norm": 1.353527091033723, "learning_rate": 5.071931027610449e-06, "loss": 0.5822, "step": 5000 }, { "epoch": 0.51, "grad_norm": 2.3110483825161476, "learning_rate": 5.070277550951503e-06, "loss": 0.7806, "step": 5001 }, { "epoch": 0.51, "grad_norm": 1.8641243964344296, "learning_rate": 5.0686240666054895e-06, "loss": 0.6869, "step": 5002 }, { "epoch": 0.51, "grad_norm": 1.3584124957397044, "learning_rate": 5.066970574753275e-06, "loss": 0.5183, "step": 5003 }, { "epoch": 0.51, "grad_norm": 1.5339739126046965, "learning_rate": 5.0653170755757146e-06, "loss": 0.6553, "step": 5004 }, { "epoch": 0.51, "grad_norm": 1.6547559979054747, "learning_rate": 5.063663569253673e-06, "loss": 0.6884, "step": 5005 }, { "epoch": 0.51, "grad_norm": 1.2031497824300048, "learning_rate": 5.062010055968013e-06, "loss": 0.5883, "step": 5006 }, { "epoch": 0.51, "grad_norm": 1.3613277042763967, "learning_rate": 5.0603565358996e-06, "loss": 0.5433, "step": 5007 }, { "epoch": 0.51, "grad_norm": 1.3500977013441675, "learning_rate": 5.058703009229297e-06, "loss": 0.5494, "step": 5008 }, { "epoch": 0.51, "grad_norm": 1.4427619525623934, "learning_rate": 5.057049476137968e-06, "loss": 0.6082, "step": 5009 }, { "epoch": 0.51, "grad_norm": 1.3570725976909748, "learning_rate": 5.055395936806482e-06, "loss": 0.607, "step": 5010 }, { "epoch": 0.51, "grad_norm": 1.4070072824115567, "learning_rate": 5.053742391415702e-06, "loss": 0.6645, "step": 5011 }, { "epoch": 0.51, "grad_norm": 1.478058394249482, "learning_rate": 5.0520888401465e-06, "loss": 0.7363, "step": 5012 }, { "epoch": 0.51, "grad_norm": 1.4962459436181044, "learning_rate": 5.05043528317974e-06, "loss": 0.605, "step": 5013 }, { "epoch": 0.51, "grad_norm": 1.5414097893569945, "learning_rate": 5.048781720696291e-06, "loss": 0.6046, "step": 5014 }, { "epoch": 0.51, "grad_norm": 1.3116216018718265, "learning_rate": 5.0471281528770255e-06, "loss": 0.6229, "step": 5015 }, { "epoch": 0.51, "grad_norm": 3.633499646934535, "learning_rate": 5.045474579902808e-06, "loss": 0.6785, "step": 5016 }, { "epoch": 0.51, "grad_norm": 1.6352020552315645, "learning_rate": 5.043821001954514e-06, "loss": 0.737, "step": 5017 }, { "epoch": 0.51, "grad_norm": 1.4790493396106816, "learning_rate": 5.042167419213011e-06, "loss": 0.6562, "step": 5018 }, { "epoch": 0.51, "grad_norm": 1.2106275017395094, "learning_rate": 5.040513831859172e-06, "loss": 0.5554, "step": 5019 }, { "epoch": 0.51, "grad_norm": 1.4384762218637994, "learning_rate": 5.038860240073867e-06, "loss": 0.7201, "step": 5020 }, { "epoch": 0.51, "grad_norm": 1.596554668586441, "learning_rate": 5.0372066440379706e-06, "loss": 0.7073, "step": 5021 }, { "epoch": 0.51, "grad_norm": 1.4680327499575667, "learning_rate": 5.035553043932356e-06, "loss": 0.6546, "step": 5022 }, { "epoch": 0.51, "grad_norm": 1.4515506425616491, "learning_rate": 5.033899439937894e-06, "loss": 0.6024, "step": 5023 }, { "epoch": 0.51, "grad_norm": 1.52877638408685, "learning_rate": 5.032245832235461e-06, "loss": 0.7373, "step": 5024 }, { "epoch": 0.51, "grad_norm": 1.270117641259473, "learning_rate": 5.030592221005928e-06, "loss": 0.6454, "step": 5025 }, { "epoch": 0.51, "grad_norm": 1.3905120175102301, "learning_rate": 5.0289386064301715e-06, "loss": 0.5888, "step": 5026 }, { "epoch": 0.51, "grad_norm": 1.504633049480648, "learning_rate": 5.027284988689066e-06, "loss": 0.6252, "step": 5027 }, { "epoch": 0.51, "grad_norm": 1.415328185796412, "learning_rate": 5.025631367963488e-06, "loss": 0.7357, "step": 5028 }, { "epoch": 0.51, "grad_norm": 1.464874859948634, "learning_rate": 5.023977744434312e-06, "loss": 0.7699, "step": 5029 }, { "epoch": 0.51, "grad_norm": 1.571900145669323, "learning_rate": 5.0223241182824124e-06, "loss": 0.6308, "step": 5030 }, { "epoch": 0.51, "grad_norm": 1.4851221512929402, "learning_rate": 5.020670489688668e-06, "loss": 0.6611, "step": 5031 }, { "epoch": 0.51, "grad_norm": 1.2775978901942309, "learning_rate": 5.019016858833954e-06, "loss": 0.612, "step": 5032 }, { "epoch": 0.51, "grad_norm": 1.4209995844655967, "learning_rate": 5.017363225899147e-06, "loss": 0.6401, "step": 5033 }, { "epoch": 0.51, "grad_norm": 1.551257580428177, "learning_rate": 5.015709591065122e-06, "loss": 0.7075, "step": 5034 }, { "epoch": 0.51, "grad_norm": 1.5190348390603574, "learning_rate": 5.01405595451276e-06, "loss": 0.6375, "step": 5035 }, { "epoch": 0.51, "grad_norm": 1.5054526510459487, "learning_rate": 5.012402316422936e-06, "loss": 0.7062, "step": 5036 }, { "epoch": 0.51, "grad_norm": 1.4144873461524095, "learning_rate": 5.010748676976527e-06, "loss": 0.6882, "step": 5037 }, { "epoch": 0.51, "grad_norm": 1.6060289052863614, "learning_rate": 5.009095036354412e-06, "loss": 0.7197, "step": 5038 }, { "epoch": 0.51, "grad_norm": 1.3501319000479117, "learning_rate": 5.007441394737468e-06, "loss": 0.6426, "step": 5039 }, { "epoch": 0.51, "grad_norm": 1.4726059491698178, "learning_rate": 5.0057877523065734e-06, "loss": 0.6592, "step": 5040 }, { "epoch": 0.51, "grad_norm": 1.3964372612073397, "learning_rate": 5.004134109242605e-06, "loss": 0.6736, "step": 5041 }, { "epoch": 0.51, "grad_norm": 1.3558508371837599, "learning_rate": 5.002480465726441e-06, "loss": 0.7253, "step": 5042 }, { "epoch": 0.51, "grad_norm": 1.679701178680816, "learning_rate": 5.000826821938961e-06, "loss": 0.7158, "step": 5043 }, { "epoch": 0.52, "grad_norm": 1.4356562045840129, "learning_rate": 4.99917317806104e-06, "loss": 0.7086, "step": 5044 }, { "epoch": 0.52, "grad_norm": 1.487574149025686, "learning_rate": 4.99751953427356e-06, "loss": 0.6431, "step": 5045 }, { "epoch": 0.52, "grad_norm": 1.4563322601464197, "learning_rate": 4.995865890757397e-06, "loss": 0.6486, "step": 5046 }, { "epoch": 0.52, "grad_norm": 1.3155909327474757, "learning_rate": 4.994212247693428e-06, "loss": 0.6463, "step": 5047 }, { "epoch": 0.52, "grad_norm": 1.6605163596199797, "learning_rate": 4.992558605262534e-06, "loss": 0.6711, "step": 5048 }, { "epoch": 0.52, "grad_norm": 1.5645392176817317, "learning_rate": 4.9909049636455884e-06, "loss": 0.571, "step": 5049 }, { "epoch": 0.52, "grad_norm": 1.551322804782223, "learning_rate": 4.989251323023473e-06, "loss": 0.7201, "step": 5050 }, { "epoch": 0.52, "grad_norm": 1.574032925918227, "learning_rate": 4.987597683577066e-06, "loss": 0.6263, "step": 5051 }, { "epoch": 0.52, "grad_norm": 1.401306155494394, "learning_rate": 4.985944045487242e-06, "loss": 0.5884, "step": 5052 }, { "epoch": 0.52, "grad_norm": 1.4266434479181178, "learning_rate": 4.984290408934878e-06, "loss": 0.6369, "step": 5053 }, { "epoch": 0.52, "grad_norm": 1.685423686288512, "learning_rate": 4.9826367741008555e-06, "loss": 0.586, "step": 5054 }, { "epoch": 0.52, "grad_norm": 1.4870689587865165, "learning_rate": 4.980983141166047e-06, "loss": 0.6421, "step": 5055 }, { "epoch": 0.52, "grad_norm": 1.3306075882973138, "learning_rate": 4.979329510311333e-06, "loss": 0.5352, "step": 5056 }, { "epoch": 0.52, "grad_norm": 4.438503604041962, "learning_rate": 4.977675881717589e-06, "loss": 0.7023, "step": 5057 }, { "epoch": 0.52, "grad_norm": 1.324496389618434, "learning_rate": 4.976022255565689e-06, "loss": 0.5998, "step": 5058 }, { "epoch": 0.52, "grad_norm": 1.628956466442926, "learning_rate": 4.974368632036514e-06, "loss": 0.6427, "step": 5059 }, { "epoch": 0.52, "grad_norm": 1.3682975730076508, "learning_rate": 4.972715011310935e-06, "loss": 0.6521, "step": 5060 }, { "epoch": 0.52, "grad_norm": 1.435922525112437, "learning_rate": 4.97106139356983e-06, "loss": 0.7215, "step": 5061 }, { "epoch": 0.52, "grad_norm": 1.3202454748139303, "learning_rate": 4.969407778994075e-06, "loss": 0.582, "step": 5062 }, { "epoch": 0.52, "grad_norm": 1.5424898330493118, "learning_rate": 4.967754167764542e-06, "loss": 0.6183, "step": 5063 }, { "epoch": 0.52, "grad_norm": 1.5469179761570278, "learning_rate": 4.966100560062107e-06, "loss": 0.5691, "step": 5064 }, { "epoch": 0.52, "grad_norm": 1.3874143550745914, "learning_rate": 4.9644469560676465e-06, "loss": 0.619, "step": 5065 }, { "epoch": 0.52, "grad_norm": 1.4758463712337098, "learning_rate": 4.96279335596203e-06, "loss": 0.5798, "step": 5066 }, { "epoch": 0.52, "grad_norm": 1.634632961900223, "learning_rate": 4.961139759926133e-06, "loss": 0.701, "step": 5067 }, { "epoch": 0.52, "grad_norm": 1.5266335196200596, "learning_rate": 4.959486168140831e-06, "loss": 0.7554, "step": 5068 }, { "epoch": 0.52, "grad_norm": 1.6190740791578107, "learning_rate": 4.95783258078699e-06, "loss": 0.6717, "step": 5069 }, { "epoch": 0.52, "grad_norm": 1.4418882901163554, "learning_rate": 4.956178998045487e-06, "loss": 0.6684, "step": 5070 }, { "epoch": 0.52, "grad_norm": 1.4013305043687052, "learning_rate": 4.954525420097194e-06, "loss": 0.5278, "step": 5071 }, { "epoch": 0.52, "grad_norm": 1.23788733370773, "learning_rate": 4.952871847122976e-06, "loss": 0.6609, "step": 5072 }, { "epoch": 0.52, "grad_norm": 1.6171803397600104, "learning_rate": 4.95121827930371e-06, "loss": 0.6298, "step": 5073 }, { "epoch": 0.52, "grad_norm": 1.372803009893504, "learning_rate": 4.949564716820262e-06, "loss": 0.6323, "step": 5074 }, { "epoch": 0.52, "grad_norm": 1.5510077995844442, "learning_rate": 4.947911159853502e-06, "loss": 0.5958, "step": 5075 }, { "epoch": 0.52, "grad_norm": 1.481774441270819, "learning_rate": 4.946257608584299e-06, "loss": 0.5816, "step": 5076 }, { "epoch": 0.52, "grad_norm": 1.6053235868902367, "learning_rate": 4.94460406319352e-06, "loss": 0.747, "step": 5077 }, { "epoch": 0.52, "grad_norm": 1.4289582086398194, "learning_rate": 4.942950523862033e-06, "loss": 0.6404, "step": 5078 }, { "epoch": 0.52, "grad_norm": 1.43078063058519, "learning_rate": 4.941296990770706e-06, "loss": 0.7775, "step": 5079 }, { "epoch": 0.52, "grad_norm": 1.5726270441612422, "learning_rate": 4.9396434641004024e-06, "loss": 0.6566, "step": 5080 }, { "epoch": 0.52, "grad_norm": 1.60810510470272, "learning_rate": 4.9379899440319864e-06, "loss": 0.6926, "step": 5081 }, { "epoch": 0.52, "grad_norm": 1.6889373988739984, "learning_rate": 4.936336430746328e-06, "loss": 0.5465, "step": 5082 }, { "epoch": 0.52, "grad_norm": 1.5658930652115417, "learning_rate": 4.934682924424287e-06, "loss": 0.6894, "step": 5083 }, { "epoch": 0.52, "grad_norm": 1.4933229354378732, "learning_rate": 4.933029425246726e-06, "loss": 0.6643, "step": 5084 }, { "epoch": 0.52, "grad_norm": 1.5578022926518662, "learning_rate": 4.931375933394511e-06, "loss": 0.7111, "step": 5085 }, { "epoch": 0.52, "grad_norm": 1.604445669002451, "learning_rate": 4.9297224490484986e-06, "loss": 0.6313, "step": 5086 }, { "epoch": 0.52, "grad_norm": 1.4878311102110193, "learning_rate": 4.928068972389554e-06, "loss": 0.6473, "step": 5087 }, { "epoch": 0.52, "grad_norm": 1.5511349061539794, "learning_rate": 4.926415503598532e-06, "loss": 0.6053, "step": 5088 }, { "epoch": 0.52, "grad_norm": 1.4049747357673255, "learning_rate": 4.924762042856296e-06, "loss": 0.6393, "step": 5089 }, { "epoch": 0.52, "grad_norm": 1.705269361677366, "learning_rate": 4.923108590343703e-06, "loss": 0.607, "step": 5090 }, { "epoch": 0.52, "grad_norm": 1.2710617900738468, "learning_rate": 4.921455146241607e-06, "loss": 0.5966, "step": 5091 }, { "epoch": 0.52, "grad_norm": 1.4431959264682097, "learning_rate": 4.919801710730868e-06, "loss": 0.6849, "step": 5092 }, { "epoch": 0.52, "grad_norm": 1.392896697784531, "learning_rate": 4.918148283992341e-06, "loss": 0.6726, "step": 5093 }, { "epoch": 0.52, "grad_norm": 1.3371269744560488, "learning_rate": 4.9164948662068786e-06, "loss": 0.67, "step": 5094 }, { "epoch": 0.52, "grad_norm": 1.3729193509029833, "learning_rate": 4.914841457555333e-06, "loss": 0.5752, "step": 5095 }, { "epoch": 0.52, "grad_norm": 1.3915570466724858, "learning_rate": 4.913188058218561e-06, "loss": 0.6362, "step": 5096 }, { "epoch": 0.52, "grad_norm": 1.685294656208165, "learning_rate": 4.911534668377411e-06, "loss": 0.6482, "step": 5097 }, { "epoch": 0.52, "grad_norm": 1.586441100570038, "learning_rate": 4.909881288212732e-06, "loss": 0.7722, "step": 5098 }, { "epoch": 0.52, "grad_norm": 1.642666416266212, "learning_rate": 4.9082279179053785e-06, "loss": 0.6947, "step": 5099 }, { "epoch": 0.52, "grad_norm": 1.457060574905506, "learning_rate": 4.906574557636192e-06, "loss": 0.6343, "step": 5100 }, { "epoch": 0.52, "grad_norm": 1.406218311490692, "learning_rate": 4.904921207586025e-06, "loss": 0.6435, "step": 5101 }, { "epoch": 0.52, "grad_norm": 1.3881906396713868, "learning_rate": 4.903267867935719e-06, "loss": 0.6581, "step": 5102 }, { "epoch": 0.52, "grad_norm": 1.5862581378828315, "learning_rate": 4.9016145388661215e-06, "loss": 0.747, "step": 5103 }, { "epoch": 0.52, "grad_norm": 1.8231753298494495, "learning_rate": 4.899961220558076e-06, "loss": 0.7072, "step": 5104 }, { "epoch": 0.52, "grad_norm": 1.4549830596872801, "learning_rate": 4.898307913192423e-06, "loss": 0.6543, "step": 5105 }, { "epoch": 0.52, "grad_norm": 1.4756674565121535, "learning_rate": 4.896654616950007e-06, "loss": 0.6234, "step": 5106 }, { "epoch": 0.52, "grad_norm": 1.5398437457211782, "learning_rate": 4.895001332011667e-06, "loss": 0.6068, "step": 5107 }, { "epoch": 0.52, "grad_norm": 1.5005024684773618, "learning_rate": 4.89334805855824e-06, "loss": 0.5817, "step": 5108 }, { "epoch": 0.52, "grad_norm": 1.319987275397712, "learning_rate": 4.891694796770564e-06, "loss": 0.5706, "step": 5109 }, { "epoch": 0.52, "grad_norm": 1.2721995727337785, "learning_rate": 4.8900415468294785e-06, "loss": 0.582, "step": 5110 }, { "epoch": 0.52, "grad_norm": 1.676594938541984, "learning_rate": 4.888388308915814e-06, "loss": 0.7248, "step": 5111 }, { "epoch": 0.52, "grad_norm": 1.547875143877041, "learning_rate": 4.886735083210408e-06, "loss": 0.6247, "step": 5112 }, { "epoch": 0.52, "grad_norm": 1.3297650766207318, "learning_rate": 4.88508186989409e-06, "loss": 0.5616, "step": 5113 }, { "epoch": 0.52, "grad_norm": 1.287825042882977, "learning_rate": 4.883428669147691e-06, "loss": 0.6074, "step": 5114 }, { "epoch": 0.52, "grad_norm": 1.5551428913393643, "learning_rate": 4.881775481152043e-06, "loss": 0.5963, "step": 5115 }, { "epoch": 0.52, "grad_norm": 1.484296911433978, "learning_rate": 4.880122306087973e-06, "loss": 0.6816, "step": 5116 }, { "epoch": 0.52, "grad_norm": 1.6315661186273809, "learning_rate": 4.878469144136306e-06, "loss": 0.7397, "step": 5117 }, { "epoch": 0.52, "grad_norm": 1.3879610185670235, "learning_rate": 4.8768159954778724e-06, "loss": 0.6052, "step": 5118 }, { "epoch": 0.52, "grad_norm": 1.443873359709019, "learning_rate": 4.87516286029349e-06, "loss": 0.7131, "step": 5119 }, { "epoch": 0.52, "grad_norm": 1.5034852725574772, "learning_rate": 4.873509738763985e-06, "loss": 0.6336, "step": 5120 }, { "epoch": 0.52, "grad_norm": 1.7399320382152714, "learning_rate": 4.871856631070179e-06, "loss": 0.7351, "step": 5121 }, { "epoch": 0.52, "grad_norm": 1.376239657637917, "learning_rate": 4.870203537392888e-06, "loss": 0.6372, "step": 5122 }, { "epoch": 0.52, "grad_norm": 1.461715635289006, "learning_rate": 4.868550457912931e-06, "loss": 0.7008, "step": 5123 }, { "epoch": 0.52, "grad_norm": 1.4907106039015938, "learning_rate": 4.866897392811127e-06, "loss": 0.6611, "step": 5124 }, { "epoch": 0.52, "grad_norm": 1.4748140684236994, "learning_rate": 4.865244342268288e-06, "loss": 0.5977, "step": 5125 }, { "epoch": 0.52, "grad_norm": 1.5132859316435168, "learning_rate": 4.863591306465229e-06, "loss": 0.6461, "step": 5126 }, { "epoch": 0.52, "grad_norm": 1.548894602867656, "learning_rate": 4.861938285582759e-06, "loss": 0.6558, "step": 5127 }, { "epoch": 0.52, "grad_norm": 1.589883069043413, "learning_rate": 4.86028527980169e-06, "loss": 0.586, "step": 5128 }, { "epoch": 0.52, "grad_norm": 1.5223411550493087, "learning_rate": 4.858632289302831e-06, "loss": 0.665, "step": 5129 }, { "epoch": 0.52, "grad_norm": 1.378095468936709, "learning_rate": 4.856979314266987e-06, "loss": 0.7081, "step": 5130 }, { "epoch": 0.52, "grad_norm": 1.693558284077491, "learning_rate": 4.855326354874962e-06, "loss": 0.5678, "step": 5131 }, { "epoch": 0.52, "grad_norm": 1.369328407227058, "learning_rate": 4.853673411307564e-06, "loss": 0.6862, "step": 5132 }, { "epoch": 0.52, "grad_norm": 1.3066639078429825, "learning_rate": 4.852020483745587e-06, "loss": 0.6011, "step": 5133 }, { "epoch": 0.52, "grad_norm": 1.1757985507987345, "learning_rate": 4.850367572369837e-06, "loss": 0.572, "step": 5134 }, { "epoch": 0.52, "grad_norm": 1.6233564985933011, "learning_rate": 4.848714677361111e-06, "loss": 0.6394, "step": 5135 }, { "epoch": 0.52, "grad_norm": 1.4478537756509424, "learning_rate": 4.847061798900202e-06, "loss": 0.6915, "step": 5136 }, { "epoch": 0.52, "grad_norm": 1.4351766562506199, "learning_rate": 4.8454089371679064e-06, "loss": 0.7563, "step": 5137 }, { "epoch": 0.52, "grad_norm": 1.5782203206985301, "learning_rate": 4.843756092345018e-06, "loss": 0.7195, "step": 5138 }, { "epoch": 0.52, "grad_norm": 1.3648825563564548, "learning_rate": 4.842103264612326e-06, "loss": 0.7292, "step": 5139 }, { "epoch": 0.52, "grad_norm": 1.4279109668049093, "learning_rate": 4.840450454150619e-06, "loss": 0.7583, "step": 5140 }, { "epoch": 0.52, "grad_norm": 1.4815080591651153, "learning_rate": 4.838797661140685e-06, "loss": 0.5748, "step": 5141 }, { "epoch": 0.53, "grad_norm": 1.4524166023193676, "learning_rate": 4.837144885763306e-06, "loss": 0.6887, "step": 5142 }, { "epoch": 0.53, "grad_norm": 1.3391092445913673, "learning_rate": 4.83549212819927e-06, "loss": 0.6703, "step": 5143 }, { "epoch": 0.53, "grad_norm": 1.5826103129752298, "learning_rate": 4.833839388629354e-06, "loss": 0.626, "step": 5144 }, { "epoch": 0.53, "grad_norm": 1.501004303306393, "learning_rate": 4.8321866672343385e-06, "loss": 0.5975, "step": 5145 }, { "epoch": 0.53, "grad_norm": 1.3820598224144682, "learning_rate": 4.830533964195004e-06, "loss": 0.657, "step": 5146 }, { "epoch": 0.53, "grad_norm": 1.5674996005624984, "learning_rate": 4.828881279692118e-06, "loss": 0.6786, "step": 5147 }, { "epoch": 0.53, "grad_norm": 1.3500016900803011, "learning_rate": 4.827228613906461e-06, "loss": 0.6299, "step": 5148 }, { "epoch": 0.53, "grad_norm": 1.4778206706153247, "learning_rate": 4.825575967018802e-06, "loss": 0.6945, "step": 5149 }, { "epoch": 0.53, "grad_norm": 1.5037102868852157, "learning_rate": 4.823923339209906e-06, "loss": 0.7083, "step": 5150 }, { "epoch": 0.53, "grad_norm": 1.4915667979957397, "learning_rate": 4.822270730660547e-06, "loss": 0.6794, "step": 5151 }, { "epoch": 0.53, "grad_norm": 1.549191703206031, "learning_rate": 4.820618141551485e-06, "loss": 0.631, "step": 5152 }, { "epoch": 0.53, "grad_norm": 1.4418234265259415, "learning_rate": 4.818965572063483e-06, "loss": 0.7471, "step": 5153 }, { "epoch": 0.53, "grad_norm": 1.4208632009881335, "learning_rate": 4.8173130223773045e-06, "loss": 0.6764, "step": 5154 }, { "epoch": 0.53, "grad_norm": 1.5557249982627492, "learning_rate": 4.815660492673704e-06, "loss": 0.6671, "step": 5155 }, { "epoch": 0.53, "grad_norm": 1.4413220570981249, "learning_rate": 4.814007983133439e-06, "loss": 0.6976, "step": 5156 }, { "epoch": 0.53, "grad_norm": 1.3876646844787819, "learning_rate": 4.812355493937266e-06, "loss": 0.6927, "step": 5157 }, { "epoch": 0.53, "grad_norm": 1.4672748361678745, "learning_rate": 4.8107030252659334e-06, "loss": 0.7191, "step": 5158 }, { "epoch": 0.53, "grad_norm": 1.939553018135006, "learning_rate": 4.809050577300192e-06, "loss": 0.6149, "step": 5159 }, { "epoch": 0.53, "grad_norm": 1.640390995190066, "learning_rate": 4.807398150220792e-06, "loss": 0.6563, "step": 5160 }, { "epoch": 0.53, "grad_norm": 1.2956509720192102, "learning_rate": 4.805745744208472e-06, "loss": 0.6374, "step": 5161 }, { "epoch": 0.53, "grad_norm": 1.4859452294854574, "learning_rate": 4.8040933594439796e-06, "loss": 0.6738, "step": 5162 }, { "epoch": 0.53, "grad_norm": 1.5628575965537506, "learning_rate": 4.802440996108055e-06, "loss": 0.7424, "step": 5163 }, { "epoch": 0.53, "grad_norm": 1.9778595878870895, "learning_rate": 4.800788654381434e-06, "loss": 0.6959, "step": 5164 }, { "epoch": 0.53, "grad_norm": 1.3069591942398435, "learning_rate": 4.7991363344448535e-06, "loss": 0.6455, "step": 5165 }, { "epoch": 0.53, "grad_norm": 1.7051055360513894, "learning_rate": 4.7974840364790474e-06, "loss": 0.6356, "step": 5166 }, { "epoch": 0.53, "grad_norm": 1.5275558838065226, "learning_rate": 4.795831760664746e-06, "loss": 0.6765, "step": 5167 }, { "epoch": 0.53, "grad_norm": 1.6416785354749162, "learning_rate": 4.794179507182677e-06, "loss": 0.6244, "step": 5168 }, { "epoch": 0.53, "grad_norm": 1.4024671852666328, "learning_rate": 4.792527276213567e-06, "loss": 0.594, "step": 5169 }, { "epoch": 0.53, "grad_norm": 2.512845183347402, "learning_rate": 4.7908750679381386e-06, "loss": 0.7071, "step": 5170 }, { "epoch": 0.53, "grad_norm": 1.5980466509150542, "learning_rate": 4.789222882537116e-06, "loss": 0.6329, "step": 5171 }, { "epoch": 0.53, "grad_norm": 1.597198518887405, "learning_rate": 4.787570720191214e-06, "loss": 0.704, "step": 5172 }, { "epoch": 0.53, "grad_norm": 1.350809579699784, "learning_rate": 4.785918581081148e-06, "loss": 0.6445, "step": 5173 }, { "epoch": 0.53, "grad_norm": 1.437269049273831, "learning_rate": 4.784266465387638e-06, "loss": 0.5664, "step": 5174 }, { "epoch": 0.53, "grad_norm": 1.4926149496906016, "learning_rate": 4.782614373291388e-06, "loss": 0.6309, "step": 5175 }, { "epoch": 0.53, "grad_norm": 1.2911312144774427, "learning_rate": 4.780962304973109e-06, "loss": 0.6542, "step": 5176 }, { "epoch": 0.53, "grad_norm": 1.6829798495235588, "learning_rate": 4.779310260613508e-06, "loss": 0.7161, "step": 5177 }, { "epoch": 0.53, "grad_norm": 1.389599945473261, "learning_rate": 4.777658240393284e-06, "loss": 0.5921, "step": 5178 }, { "epoch": 0.53, "grad_norm": 2.1828489917647236, "learning_rate": 4.7760062444931425e-06, "loss": 0.7013, "step": 5179 }, { "epoch": 0.53, "grad_norm": 1.5524238225294191, "learning_rate": 4.77435427309378e-06, "loss": 0.7618, "step": 5180 }, { "epoch": 0.53, "grad_norm": 1.3414090800497849, "learning_rate": 4.772702326375889e-06, "loss": 0.6054, "step": 5181 }, { "epoch": 0.53, "grad_norm": 1.9149115054271892, "learning_rate": 4.7710504045201655e-06, "loss": 0.6267, "step": 5182 }, { "epoch": 0.53, "grad_norm": 1.486766546906442, "learning_rate": 4.769398507707295e-06, "loss": 0.7024, "step": 5183 }, { "epoch": 0.53, "grad_norm": 1.3566573377400208, "learning_rate": 4.767746636117968e-06, "loss": 0.7278, "step": 5184 }, { "epoch": 0.53, "grad_norm": 1.3511975434272734, "learning_rate": 4.766094789932868e-06, "loss": 0.6881, "step": 5185 }, { "epoch": 0.53, "grad_norm": 1.4899797103087187, "learning_rate": 4.764442969332676e-06, "loss": 0.6251, "step": 5186 }, { "epoch": 0.53, "grad_norm": 1.2468943572453237, "learning_rate": 4.76279117449807e-06, "loss": 0.7046, "step": 5187 }, { "epoch": 0.53, "grad_norm": 1.3422302349629822, "learning_rate": 4.76113940560973e-06, "loss": 0.5585, "step": 5188 }, { "epoch": 0.53, "grad_norm": 1.3141426331152755, "learning_rate": 4.759487662848321e-06, "loss": 0.6018, "step": 5189 }, { "epoch": 0.53, "grad_norm": 1.333523391564566, "learning_rate": 4.757835946394519e-06, "loss": 0.6746, "step": 5190 }, { "epoch": 0.53, "grad_norm": 1.5375859990780352, "learning_rate": 4.756184256428992e-06, "loss": 0.6466, "step": 5191 }, { "epoch": 0.53, "grad_norm": 1.5130255190981712, "learning_rate": 4.7545325931324e-06, "loss": 0.6932, "step": 5192 }, { "epoch": 0.53, "grad_norm": 1.468724198914403, "learning_rate": 4.752880956685407e-06, "loss": 0.5896, "step": 5193 }, { "epoch": 0.53, "grad_norm": 1.2416244370979619, "learning_rate": 4.751229347268673e-06, "loss": 0.6153, "step": 5194 }, { "epoch": 0.53, "grad_norm": 1.34308567470349, "learning_rate": 4.7495777650628515e-06, "loss": 0.6392, "step": 5195 }, { "epoch": 0.53, "grad_norm": 1.6626798884350902, "learning_rate": 4.747926210248596e-06, "loss": 0.61, "step": 5196 }, { "epoch": 0.53, "grad_norm": 1.2433476893180269, "learning_rate": 4.746274683006553e-06, "loss": 0.6392, "step": 5197 }, { "epoch": 0.53, "grad_norm": 1.5628411120462984, "learning_rate": 4.744623183517373e-06, "loss": 0.6986, "step": 5198 }, { "epoch": 0.53, "grad_norm": 1.730179438901199, "learning_rate": 4.742971711961699e-06, "loss": 0.6128, "step": 5199 }, { "epoch": 0.53, "grad_norm": 1.3007672262190555, "learning_rate": 4.74132026852017e-06, "loss": 0.6092, "step": 5200 }, { "epoch": 0.53, "grad_norm": 1.4942600967670432, "learning_rate": 4.739668853373422e-06, "loss": 0.6057, "step": 5201 }, { "epoch": 0.53, "grad_norm": 1.503125151993474, "learning_rate": 4.738017466702095e-06, "loss": 0.6551, "step": 5202 }, { "epoch": 0.53, "grad_norm": 1.1866846251547951, "learning_rate": 4.736366108686814e-06, "loss": 0.5722, "step": 5203 }, { "epoch": 0.53, "grad_norm": 1.6093725297693409, "learning_rate": 4.73471477950821e-06, "loss": 0.6084, "step": 5204 }, { "epoch": 0.53, "grad_norm": 1.4023818856286567, "learning_rate": 4.733063479346908e-06, "loss": 0.6882, "step": 5205 }, { "epoch": 0.53, "grad_norm": 1.573080898023869, "learning_rate": 4.7314122083835276e-06, "loss": 0.6966, "step": 5206 }, { "epoch": 0.53, "grad_norm": 1.594518930926054, "learning_rate": 4.72976096679869e-06, "loss": 0.652, "step": 5207 }, { "epoch": 0.53, "grad_norm": 1.4838367109652266, "learning_rate": 4.728109754773011e-06, "loss": 0.5964, "step": 5208 }, { "epoch": 0.53, "grad_norm": 1.335386536380599, "learning_rate": 4.7264585724870995e-06, "loss": 0.5672, "step": 5209 }, { "epoch": 0.53, "grad_norm": 1.4995345453150977, "learning_rate": 4.7248074201215665e-06, "loss": 0.6169, "step": 5210 }, { "epoch": 0.53, "grad_norm": 1.353939140187037, "learning_rate": 4.723156297857015e-06, "loss": 0.7286, "step": 5211 }, { "epoch": 0.53, "grad_norm": 1.4202435795171886, "learning_rate": 4.721505205874051e-06, "loss": 0.6126, "step": 5212 }, { "epoch": 0.53, "grad_norm": 1.4908742824785153, "learning_rate": 4.719854144353273e-06, "loss": 0.6459, "step": 5213 }, { "epoch": 0.53, "grad_norm": 1.5445732744600627, "learning_rate": 4.718203113475273e-06, "loss": 0.6464, "step": 5214 }, { "epoch": 0.53, "grad_norm": 1.407606741816937, "learning_rate": 4.716552113420646e-06, "loss": 0.5702, "step": 5215 }, { "epoch": 0.53, "grad_norm": 1.546471156975273, "learning_rate": 4.714901144369982e-06, "loss": 0.6651, "step": 5216 }, { "epoch": 0.53, "grad_norm": 1.537957795447862, "learning_rate": 4.713250206503864e-06, "loss": 0.8074, "step": 5217 }, { "epoch": 0.53, "grad_norm": 1.6609392095148012, "learning_rate": 4.711599300002875e-06, "loss": 0.6346, "step": 5218 }, { "epoch": 0.53, "grad_norm": 1.4623251483251467, "learning_rate": 4.709948425047595e-06, "loss": 0.6457, "step": 5219 }, { "epoch": 0.53, "grad_norm": 2.523668135161958, "learning_rate": 4.708297581818595e-06, "loss": 0.7004, "step": 5220 }, { "epoch": 0.53, "grad_norm": 1.42387197450661, "learning_rate": 4.706646770496453e-06, "loss": 0.6481, "step": 5221 }, { "epoch": 0.53, "grad_norm": 1.4450546335745444, "learning_rate": 4.704995991261733e-06, "loss": 0.7093, "step": 5222 }, { "epoch": 0.53, "grad_norm": 1.6692278228656594, "learning_rate": 4.703345244295001e-06, "loss": 0.6926, "step": 5223 }, { "epoch": 0.53, "grad_norm": 1.3633732487699788, "learning_rate": 4.701694529776818e-06, "loss": 0.6418, "step": 5224 }, { "epoch": 0.53, "grad_norm": 1.5287230644831258, "learning_rate": 4.70004384788774e-06, "loss": 0.7648, "step": 5225 }, { "epoch": 0.53, "grad_norm": 1.4758395679749203, "learning_rate": 4.698393198808323e-06, "loss": 0.6764, "step": 5226 }, { "epoch": 0.53, "grad_norm": 1.3171039468692174, "learning_rate": 4.69674258271912e-06, "loss": 0.6914, "step": 5227 }, { "epoch": 0.53, "grad_norm": 1.3379682303146365, "learning_rate": 4.695091999800674e-06, "loss": 0.6371, "step": 5228 }, { "epoch": 0.53, "grad_norm": 1.5245069559431443, "learning_rate": 4.693441450233527e-06, "loss": 0.7041, "step": 5229 }, { "epoch": 0.53, "grad_norm": 1.3370303126527316, "learning_rate": 4.6917909341982236e-06, "loss": 0.6, "step": 5230 }, { "epoch": 0.53, "grad_norm": 1.411716277904886, "learning_rate": 4.690140451875296e-06, "loss": 0.6607, "step": 5231 }, { "epoch": 0.53, "grad_norm": 1.4694426223330537, "learning_rate": 4.688490003445279e-06, "loss": 0.6676, "step": 5232 }, { "epoch": 0.53, "grad_norm": 7.491094632851655, "learning_rate": 4.6868395890887e-06, "loss": 0.6294, "step": 5233 }, { "epoch": 0.53, "grad_norm": 1.493415141041212, "learning_rate": 4.685189208986081e-06, "loss": 0.5479, "step": 5234 }, { "epoch": 0.53, "grad_norm": 1.6505111764711002, "learning_rate": 4.683538863317949e-06, "loss": 0.7819, "step": 5235 }, { "epoch": 0.53, "grad_norm": 1.493577957879008, "learning_rate": 4.681888552264816e-06, "loss": 0.7005, "step": 5236 }, { "epoch": 0.53, "grad_norm": 1.609960373246649, "learning_rate": 4.680238276007198e-06, "loss": 0.6625, "step": 5237 }, { "epoch": 0.53, "grad_norm": 1.4818379549101568, "learning_rate": 4.678588034725606e-06, "loss": 0.6103, "step": 5238 }, { "epoch": 0.53, "grad_norm": 1.5864129875566726, "learning_rate": 4.676937828600542e-06, "loss": 0.699, "step": 5239 }, { "epoch": 0.54, "grad_norm": 1.778810867806171, "learning_rate": 4.675287657812511e-06, "loss": 0.7381, "step": 5240 }, { "epoch": 0.54, "grad_norm": 1.30970022671948, "learning_rate": 4.673637522542011e-06, "loss": 0.6434, "step": 5241 }, { "epoch": 0.54, "grad_norm": 1.3823578424222094, "learning_rate": 4.671987422969536e-06, "loss": 0.6836, "step": 5242 }, { "epoch": 0.54, "grad_norm": 1.2705904179139178, "learning_rate": 4.670337359275574e-06, "loss": 0.6579, "step": 5243 }, { "epoch": 0.54, "grad_norm": 1.366737617837918, "learning_rate": 4.668687331640617e-06, "loss": 0.7015, "step": 5244 }, { "epoch": 0.54, "grad_norm": 1.4671416510239195, "learning_rate": 4.667037340245142e-06, "loss": 0.723, "step": 5245 }, { "epoch": 0.54, "grad_norm": 1.3885950318596008, "learning_rate": 4.665387385269631e-06, "loss": 0.6057, "step": 5246 }, { "epoch": 0.54, "grad_norm": 1.559789626805106, "learning_rate": 4.6637374668945575e-06, "loss": 0.6422, "step": 5247 }, { "epoch": 0.54, "grad_norm": 1.4593979195086622, "learning_rate": 4.662087585300391e-06, "loss": 0.6222, "step": 5248 }, { "epoch": 0.54, "grad_norm": 1.549111861139767, "learning_rate": 4.6604377406676004e-06, "loss": 0.692, "step": 5249 }, { "epoch": 0.54, "grad_norm": 1.5860920550941846, "learning_rate": 4.6587879331766465e-06, "loss": 0.7005, "step": 5250 }, { "epoch": 0.54, "grad_norm": 1.3508286903095528, "learning_rate": 4.657138163007987e-06, "loss": 0.6653, "step": 5251 }, { "epoch": 0.54, "grad_norm": 1.5701646391880928, "learning_rate": 4.655488430342079e-06, "loss": 0.6958, "step": 5252 }, { "epoch": 0.54, "grad_norm": 1.4999701360792572, "learning_rate": 4.65383873535937e-06, "loss": 0.5723, "step": 5253 }, { "epoch": 0.54, "grad_norm": 1.4438662478648205, "learning_rate": 4.652189078240307e-06, "loss": 0.6514, "step": 5254 }, { "epoch": 0.54, "grad_norm": 1.3454587621837717, "learning_rate": 4.650539459165334e-06, "loss": 0.6103, "step": 5255 }, { "epoch": 0.54, "grad_norm": 1.4611679707843956, "learning_rate": 4.648889878314886e-06, "loss": 0.746, "step": 5256 }, { "epoch": 0.54, "grad_norm": 1.4171001269844896, "learning_rate": 4.647240335869396e-06, "loss": 0.7051, "step": 5257 }, { "epoch": 0.54, "grad_norm": 1.6497668016923739, "learning_rate": 4.6455908320092985e-06, "loss": 0.7477, "step": 5258 }, { "epoch": 0.54, "grad_norm": 1.648772641114029, "learning_rate": 4.643941366915015e-06, "loss": 0.6742, "step": 5259 }, { "epoch": 0.54, "grad_norm": 1.452888269915261, "learning_rate": 4.642291940766966e-06, "loss": 0.7388, "step": 5260 }, { "epoch": 0.54, "grad_norm": 1.3616077240340432, "learning_rate": 4.64064255374557e-06, "loss": 0.6036, "step": 5261 }, { "epoch": 0.54, "grad_norm": 1.4806788203893642, "learning_rate": 4.638993206031238e-06, "loss": 0.5866, "step": 5262 }, { "epoch": 0.54, "grad_norm": 1.2834364411279844, "learning_rate": 4.63734389780438e-06, "loss": 0.6, "step": 5263 }, { "epoch": 0.54, "grad_norm": 1.34292574930547, "learning_rate": 4.635694629245398e-06, "loss": 0.5535, "step": 5264 }, { "epoch": 0.54, "grad_norm": 1.418973039996287, "learning_rate": 4.6340454005346925e-06, "loss": 0.5364, "step": 5265 }, { "epoch": 0.54, "grad_norm": 1.3501962975494448, "learning_rate": 4.63239621185266e-06, "loss": 0.5973, "step": 5266 }, { "epoch": 0.54, "grad_norm": 1.2631405364606418, "learning_rate": 4.6307470633796865e-06, "loss": 0.6127, "step": 5267 }, { "epoch": 0.54, "grad_norm": 1.423084716453999, "learning_rate": 4.629097955296163e-06, "loss": 0.6108, "step": 5268 }, { "epoch": 0.54, "grad_norm": 1.601608011392751, "learning_rate": 4.627448887782471e-06, "loss": 0.7998, "step": 5269 }, { "epoch": 0.54, "grad_norm": 1.401886596937293, "learning_rate": 4.625799861018987e-06, "loss": 0.6913, "step": 5270 }, { "epoch": 0.54, "grad_norm": 1.6787416360125618, "learning_rate": 4.624150875186082e-06, "loss": 0.6181, "step": 5271 }, { "epoch": 0.54, "grad_norm": 1.8260489779706233, "learning_rate": 4.6225019304641285e-06, "loss": 0.5615, "step": 5272 }, { "epoch": 0.54, "grad_norm": 1.6643830682135694, "learning_rate": 4.620853027033489e-06, "loss": 0.7064, "step": 5273 }, { "epoch": 0.54, "grad_norm": 1.5004534378095506, "learning_rate": 4.6192041650745215e-06, "loss": 0.7739, "step": 5274 }, { "epoch": 0.54, "grad_norm": 20.773095998087012, "learning_rate": 4.617555344767585e-06, "loss": 0.7166, "step": 5275 }, { "epoch": 0.54, "grad_norm": 1.1855470943501267, "learning_rate": 4.615906566293024e-06, "loss": 0.6027, "step": 5276 }, { "epoch": 0.54, "grad_norm": 1.4659680651678868, "learning_rate": 4.61425782983119e-06, "loss": 0.6807, "step": 5277 }, { "epoch": 0.54, "grad_norm": 1.2723202568653198, "learning_rate": 4.612609135562422e-06, "loss": 0.6013, "step": 5278 }, { "epoch": 0.54, "grad_norm": 1.437861512122392, "learning_rate": 4.610960483667055e-06, "loss": 0.6709, "step": 5279 }, { "epoch": 0.54, "grad_norm": 1.4630207279066614, "learning_rate": 4.609311874325425e-06, "loss": 0.626, "step": 5280 }, { "epoch": 0.54, "grad_norm": 1.529783805376555, "learning_rate": 4.6076633077178544e-06, "loss": 0.7445, "step": 5281 }, { "epoch": 0.54, "grad_norm": 1.4312452763318562, "learning_rate": 4.6060147840246706e-06, "loss": 0.656, "step": 5282 }, { "epoch": 0.54, "grad_norm": 1.6276510121710557, "learning_rate": 4.60436630342619e-06, "loss": 0.7076, "step": 5283 }, { "epoch": 0.54, "grad_norm": 1.399625018708385, "learning_rate": 4.602717866102726e-06, "loss": 0.6656, "step": 5284 }, { "epoch": 0.54, "grad_norm": 1.4107462253981504, "learning_rate": 4.601069472234584e-06, "loss": 0.574, "step": 5285 }, { "epoch": 0.54, "grad_norm": 1.52088909912699, "learning_rate": 4.599421122002074e-06, "loss": 0.6796, "step": 5286 }, { "epoch": 0.54, "grad_norm": 1.5947000878379403, "learning_rate": 4.597772815585491e-06, "loss": 0.634, "step": 5287 }, { "epoch": 0.54, "grad_norm": 1.3554090418856435, "learning_rate": 4.596124553165129e-06, "loss": 0.6356, "step": 5288 }, { "epoch": 0.54, "grad_norm": 1.4365367537206581, "learning_rate": 4.59447633492128e-06, "loss": 0.5945, "step": 5289 }, { "epoch": 0.54, "grad_norm": 1.3934004253441912, "learning_rate": 4.5928281610342255e-06, "loss": 0.5355, "step": 5290 }, { "epoch": 0.54, "grad_norm": 1.3009669121244878, "learning_rate": 4.591180031684248e-06, "loss": 0.5404, "step": 5291 }, { "epoch": 0.54, "grad_norm": 1.6092122869214622, "learning_rate": 4.5895319470516205e-06, "loss": 0.6707, "step": 5292 }, { "epoch": 0.54, "grad_norm": 1.4490121542022036, "learning_rate": 4.587883907316614e-06, "loss": 0.6504, "step": 5293 }, { "epoch": 0.54, "grad_norm": 1.668658885470753, "learning_rate": 4.586235912659495e-06, "loss": 0.6903, "step": 5294 }, { "epoch": 0.54, "grad_norm": 1.612551675462831, "learning_rate": 4.584587963260518e-06, "loss": 0.6701, "step": 5295 }, { "epoch": 0.54, "grad_norm": 1.424530798098325, "learning_rate": 4.582940059299945e-06, "loss": 0.5934, "step": 5296 }, { "epoch": 0.54, "grad_norm": 1.5596756173114306, "learning_rate": 4.581292200958024e-06, "loss": 0.6734, "step": 5297 }, { "epoch": 0.54, "grad_norm": 1.3912444530392438, "learning_rate": 4.579644388414999e-06, "loss": 0.6106, "step": 5298 }, { "epoch": 0.54, "grad_norm": 1.3917295918109347, "learning_rate": 4.577996621851109e-06, "loss": 0.6559, "step": 5299 }, { "epoch": 0.54, "grad_norm": 2.0207900701307975, "learning_rate": 4.576348901446595e-06, "loss": 0.6005, "step": 5300 }, { "epoch": 0.54, "grad_norm": 1.3587131643169426, "learning_rate": 4.574701227381681e-06, "loss": 0.6143, "step": 5301 }, { "epoch": 0.54, "grad_norm": 1.2039094844608667, "learning_rate": 4.573053599836595e-06, "loss": 0.6007, "step": 5302 }, { "epoch": 0.54, "grad_norm": 1.5383678793327173, "learning_rate": 4.5714060189915575e-06, "loss": 0.668, "step": 5303 }, { "epoch": 0.54, "grad_norm": 1.8740987574963222, "learning_rate": 4.5697584850267806e-06, "loss": 0.54, "step": 5304 }, { "epoch": 0.54, "grad_norm": 1.5150366420588173, "learning_rate": 4.568110998122477e-06, "loss": 0.7092, "step": 5305 }, { "epoch": 0.54, "grad_norm": 1.5250910615049227, "learning_rate": 4.56646355845885e-06, "loss": 0.6988, "step": 5306 }, { "epoch": 0.54, "grad_norm": 1.6131709649037518, "learning_rate": 4.5648161662161e-06, "loss": 0.56, "step": 5307 }, { "epoch": 0.54, "grad_norm": 1.6563270990973344, "learning_rate": 4.56316882157442e-06, "loss": 0.6031, "step": 5308 }, { "epoch": 0.54, "grad_norm": 1.2437667948004965, "learning_rate": 4.561521524713998e-06, "loss": 0.6679, "step": 5309 }, { "epoch": 0.54, "grad_norm": 1.4427545435399312, "learning_rate": 4.55987427581502e-06, "loss": 0.6331, "step": 5310 }, { "epoch": 0.54, "grad_norm": 1.5003203595286836, "learning_rate": 4.558227075057666e-06, "loss": 0.6583, "step": 5311 }, { "epoch": 0.54, "grad_norm": 1.360060907097362, "learning_rate": 4.556579922622105e-06, "loss": 0.6012, "step": 5312 }, { "epoch": 0.54, "grad_norm": 1.2639018283195087, "learning_rate": 4.554932818688508e-06, "loss": 0.6465, "step": 5313 }, { "epoch": 0.54, "grad_norm": 1.6755949800468009, "learning_rate": 4.553285763437039e-06, "loss": 0.6721, "step": 5314 }, { "epoch": 0.54, "grad_norm": 1.4635442160818528, "learning_rate": 4.551638757047852e-06, "loss": 0.6485, "step": 5315 }, { "epoch": 0.54, "grad_norm": 1.4661203805988943, "learning_rate": 4.5499917997011e-06, "loss": 0.6525, "step": 5316 }, { "epoch": 0.54, "grad_norm": 1.4596741616048152, "learning_rate": 4.5483448915769315e-06, "loss": 0.6499, "step": 5317 }, { "epoch": 0.54, "grad_norm": 1.5621703858699647, "learning_rate": 4.546698032855485e-06, "loss": 0.6878, "step": 5318 }, { "epoch": 0.54, "grad_norm": 1.5972378256431543, "learning_rate": 4.545051223716901e-06, "loss": 0.5859, "step": 5319 }, { "epoch": 0.54, "grad_norm": 1.1645522792986305, "learning_rate": 4.543404464341305e-06, "loss": 0.4622, "step": 5320 }, { "epoch": 0.54, "grad_norm": 1.6787101070334902, "learning_rate": 4.541757754908824e-06, "loss": 0.6852, "step": 5321 }, { "epoch": 0.54, "grad_norm": 1.2721040207962602, "learning_rate": 4.540111095599578e-06, "loss": 0.6516, "step": 5322 }, { "epoch": 0.54, "grad_norm": 1.485488609006878, "learning_rate": 4.53846448659368e-06, "loss": 0.6668, "step": 5323 }, { "epoch": 0.54, "grad_norm": 1.4808163972815078, "learning_rate": 4.536817928071241e-06, "loss": 0.7605, "step": 5324 }, { "epoch": 0.54, "grad_norm": 1.4402630002595933, "learning_rate": 4.535171420212362e-06, "loss": 0.7072, "step": 5325 }, { "epoch": 0.54, "grad_norm": 1.2994819057782483, "learning_rate": 4.53352496319714e-06, "loss": 0.6007, "step": 5326 }, { "epoch": 0.54, "grad_norm": 1.7430075150390982, "learning_rate": 4.531878557205668e-06, "loss": 0.6754, "step": 5327 }, { "epoch": 0.54, "grad_norm": 1.427254712244571, "learning_rate": 4.530232202418034e-06, "loss": 0.7242, "step": 5328 }, { "epoch": 0.54, "grad_norm": 1.4875024390334086, "learning_rate": 4.528585899014316e-06, "loss": 0.6375, "step": 5329 }, { "epoch": 0.54, "grad_norm": 1.4347632326472999, "learning_rate": 4.526939647174589e-06, "loss": 0.5799, "step": 5330 }, { "epoch": 0.54, "grad_norm": 1.3921054167676223, "learning_rate": 4.525293447078927e-06, "loss": 0.6384, "step": 5331 }, { "epoch": 0.54, "grad_norm": 1.4501938434921184, "learning_rate": 4.523647298907388e-06, "loss": 0.6951, "step": 5332 }, { "epoch": 0.54, "grad_norm": 1.5661634978496484, "learning_rate": 4.522001202840035e-06, "loss": 0.6032, "step": 5333 }, { "epoch": 0.54, "grad_norm": 1.374683682081489, "learning_rate": 4.520355159056917e-06, "loss": 0.646, "step": 5334 }, { "epoch": 0.54, "grad_norm": 1.475264640390817, "learning_rate": 4.518709167738082e-06, "loss": 0.7166, "step": 5335 }, { "epoch": 0.54, "grad_norm": 1.3813200281895868, "learning_rate": 4.517063229063573e-06, "loss": 0.5455, "step": 5336 }, { "epoch": 0.54, "grad_norm": 3.7176864056307264, "learning_rate": 4.515417343213421e-06, "loss": 0.6308, "step": 5337 }, { "epoch": 0.55, "grad_norm": 1.649298345005116, "learning_rate": 4.513771510367659e-06, "loss": 0.6063, "step": 5338 }, { "epoch": 0.55, "grad_norm": 1.4709160601783628, "learning_rate": 4.512125730706311e-06, "loss": 0.6928, "step": 5339 }, { "epoch": 0.55, "grad_norm": 1.4420347362135029, "learning_rate": 4.510480004409392e-06, "loss": 0.7106, "step": 5340 }, { "epoch": 0.55, "grad_norm": 1.2924275830931748, "learning_rate": 4.508834331656916e-06, "loss": 0.6074, "step": 5341 }, { "epoch": 0.55, "grad_norm": 1.5596499927682108, "learning_rate": 4.50718871262889e-06, "loss": 0.7011, "step": 5342 }, { "epoch": 0.55, "grad_norm": 1.4560182573460856, "learning_rate": 4.505543147505312e-06, "loss": 0.7166, "step": 5343 }, { "epoch": 0.55, "grad_norm": 1.327933629954551, "learning_rate": 4.503897636466177e-06, "loss": 0.6608, "step": 5344 }, { "epoch": 0.55, "grad_norm": 1.5012937015133418, "learning_rate": 4.5022521796914765e-06, "loss": 0.7131, "step": 5345 }, { "epoch": 0.55, "grad_norm": 1.398367997146835, "learning_rate": 4.5006067773611885e-06, "loss": 0.6348, "step": 5346 }, { "epoch": 0.55, "grad_norm": 3.0013650272449612, "learning_rate": 4.498961429655293e-06, "loss": 0.5751, "step": 5347 }, { "epoch": 0.55, "grad_norm": 1.4643143667961946, "learning_rate": 4.497316136753759e-06, "loss": 0.6008, "step": 5348 }, { "epoch": 0.55, "grad_norm": 1.5527052799400078, "learning_rate": 4.49567089883655e-06, "loss": 0.6019, "step": 5349 }, { "epoch": 0.55, "grad_norm": 1.4715769858955774, "learning_rate": 4.4940257160836295e-06, "loss": 0.6772, "step": 5350 }, { "epoch": 0.55, "grad_norm": 1.4172063169521334, "learning_rate": 4.492380588674944e-06, "loss": 0.766, "step": 5351 }, { "epoch": 0.55, "grad_norm": 1.611734775449534, "learning_rate": 4.490735516790443e-06, "loss": 0.6453, "step": 5352 }, { "epoch": 0.55, "grad_norm": 1.5461858516190796, "learning_rate": 4.4890905006100685e-06, "loss": 0.6751, "step": 5353 }, { "epoch": 0.55, "grad_norm": 1.5066940021705275, "learning_rate": 4.487445540313752e-06, "loss": 0.685, "step": 5354 }, { "epoch": 0.55, "grad_norm": 1.5376953988424826, "learning_rate": 4.4858006360814215e-06, "loss": 0.6568, "step": 5355 }, { "epoch": 0.55, "grad_norm": 1.3438037378784644, "learning_rate": 4.484155788093003e-06, "loss": 0.5656, "step": 5356 }, { "epoch": 0.55, "grad_norm": 1.354547523777199, "learning_rate": 4.482510996528408e-06, "loss": 0.733, "step": 5357 }, { "epoch": 0.55, "grad_norm": 1.4817986215792036, "learning_rate": 4.480866261567551e-06, "loss": 0.6697, "step": 5358 }, { "epoch": 0.55, "grad_norm": 1.3252392978734666, "learning_rate": 4.479221583390332e-06, "loss": 0.5904, "step": 5359 }, { "epoch": 0.55, "grad_norm": 1.32305266006179, "learning_rate": 4.477576962176647e-06, "loss": 0.5738, "step": 5360 }, { "epoch": 0.55, "grad_norm": 1.4860554972661597, "learning_rate": 4.475932398106392e-06, "loss": 0.6302, "step": 5361 }, { "epoch": 0.55, "grad_norm": 1.4981078330092743, "learning_rate": 4.4742878913594485e-06, "loss": 0.7866, "step": 5362 }, { "epoch": 0.55, "grad_norm": 1.4019630254377629, "learning_rate": 4.472643442115695e-06, "loss": 0.6736, "step": 5363 }, { "epoch": 0.55, "grad_norm": 1.6117684625687716, "learning_rate": 4.470999050555008e-06, "loss": 0.7259, "step": 5364 }, { "epoch": 0.55, "grad_norm": 1.5534821837742583, "learning_rate": 4.469354716857249e-06, "loss": 0.6703, "step": 5365 }, { "epoch": 0.55, "grad_norm": 1.4100870388911302, "learning_rate": 4.467710441202278e-06, "loss": 0.6267, "step": 5366 }, { "epoch": 0.55, "grad_norm": 1.722735126458676, "learning_rate": 4.4660662237699516e-06, "loss": 0.6957, "step": 5367 }, { "epoch": 0.55, "grad_norm": 1.4353405896874265, "learning_rate": 4.464422064740114e-06, "loss": 0.7584, "step": 5368 }, { "epoch": 0.55, "grad_norm": 1.3627834636072536, "learning_rate": 4.462777964292605e-06, "loss": 0.6459, "step": 5369 }, { "epoch": 0.55, "grad_norm": 1.4237569946505602, "learning_rate": 4.461133922607263e-06, "loss": 0.6697, "step": 5370 }, { "epoch": 0.55, "grad_norm": 1.3212579224087773, "learning_rate": 4.459489939863912e-06, "loss": 0.6902, "step": 5371 }, { "epoch": 0.55, "grad_norm": 1.3105134989522245, "learning_rate": 4.457846016242375e-06, "loss": 0.5687, "step": 5372 }, { "epoch": 0.55, "grad_norm": 1.4276703247941926, "learning_rate": 4.456202151922467e-06, "loss": 0.6439, "step": 5373 }, { "epoch": 0.55, "grad_norm": 1.5919526251516847, "learning_rate": 4.454558347083994e-06, "loss": 0.699, "step": 5374 }, { "epoch": 0.55, "grad_norm": 1.3883290570052644, "learning_rate": 4.452914601906761e-06, "loss": 0.6713, "step": 5375 }, { "epoch": 0.55, "grad_norm": 1.4969878065224185, "learning_rate": 4.451270916570562e-06, "loss": 0.6955, "step": 5376 }, { "epoch": 0.55, "grad_norm": 1.485817957750167, "learning_rate": 4.4496272912551845e-06, "loss": 0.7302, "step": 5377 }, { "epoch": 0.55, "grad_norm": 1.529593209202926, "learning_rate": 4.447983726140415e-06, "loss": 0.7149, "step": 5378 }, { "epoch": 0.55, "grad_norm": 1.7065801378731094, "learning_rate": 4.446340221406023e-06, "loss": 0.7569, "step": 5379 }, { "epoch": 0.55, "grad_norm": 1.3011021661085647, "learning_rate": 4.4446967772317826e-06, "loss": 0.626, "step": 5380 }, { "epoch": 0.55, "grad_norm": 1.3996395136388475, "learning_rate": 4.443053393797455e-06, "loss": 0.6104, "step": 5381 }, { "epoch": 0.55, "grad_norm": 1.4049260592997124, "learning_rate": 4.441410071282795e-06, "loss": 0.671, "step": 5382 }, { "epoch": 0.55, "grad_norm": 1.5909332148834914, "learning_rate": 4.43976680986755e-06, "loss": 0.6086, "step": 5383 }, { "epoch": 0.55, "grad_norm": 1.3922642437182569, "learning_rate": 4.4381236097314675e-06, "loss": 0.6523, "step": 5384 }, { "epoch": 0.55, "grad_norm": 1.537193885831373, "learning_rate": 4.4364804710542785e-06, "loss": 0.661, "step": 5385 }, { "epoch": 0.55, "grad_norm": 1.5839279212868747, "learning_rate": 4.434837394015715e-06, "loss": 0.6978, "step": 5386 }, { "epoch": 0.55, "grad_norm": 1.2883737684890066, "learning_rate": 4.433194378795498e-06, "loss": 0.5962, "step": 5387 }, { "epoch": 0.55, "grad_norm": 1.4575119551780522, "learning_rate": 4.431551425573341e-06, "loss": 0.6728, "step": 5388 }, { "epoch": 0.55, "grad_norm": 1.5985908942728595, "learning_rate": 4.429908534528957e-06, "loss": 0.7049, "step": 5389 }, { "epoch": 0.55, "grad_norm": 1.6277031221455351, "learning_rate": 4.428265705842046e-06, "loss": 0.7037, "step": 5390 }, { "epoch": 0.55, "grad_norm": 1.2341321863888968, "learning_rate": 4.4266229396923e-06, "loss": 0.5329, "step": 5391 }, { "epoch": 0.55, "grad_norm": 1.5882859916062755, "learning_rate": 4.424980236259414e-06, "loss": 0.7012, "step": 5392 }, { "epoch": 0.55, "grad_norm": 1.4784075288126726, "learning_rate": 4.423337595723063e-06, "loss": 0.5864, "step": 5393 }, { "epoch": 0.55, "grad_norm": 1.41030842518903, "learning_rate": 4.421695018262924e-06, "loss": 0.6625, "step": 5394 }, { "epoch": 0.55, "grad_norm": 1.4193563635416164, "learning_rate": 4.420052504058666e-06, "loss": 0.6695, "step": 5395 }, { "epoch": 0.55, "grad_norm": 1.2881931185347064, "learning_rate": 4.418410053289947e-06, "loss": 0.5073, "step": 5396 }, { "epoch": 0.55, "grad_norm": 1.4915942597928842, "learning_rate": 4.416767666136422e-06, "loss": 0.757, "step": 5397 }, { "epoch": 0.55, "grad_norm": 1.4425429377985999, "learning_rate": 4.41512534277774e-06, "loss": 0.665, "step": 5398 }, { "epoch": 0.55, "grad_norm": 1.5397578106857879, "learning_rate": 4.413483083393537e-06, "loss": 0.6959, "step": 5399 }, { "epoch": 0.55, "grad_norm": 1.4754531088574745, "learning_rate": 4.411840888163449e-06, "loss": 0.6944, "step": 5400 }, { "epoch": 0.55, "grad_norm": 1.4172873556492114, "learning_rate": 4.4101987572671e-06, "loss": 0.5605, "step": 5401 }, { "epoch": 0.55, "grad_norm": 1.627276994412115, "learning_rate": 4.408556690884107e-06, "loss": 0.7326, "step": 5402 }, { "epoch": 0.55, "grad_norm": 2.6141773508691615, "learning_rate": 4.406914689194087e-06, "loss": 0.6907, "step": 5403 }, { "epoch": 0.55, "grad_norm": 1.375597036875413, "learning_rate": 4.40527275237664e-06, "loss": 0.6602, "step": 5404 }, { "epoch": 0.55, "grad_norm": 1.3910035880763931, "learning_rate": 4.4036308806113645e-06, "loss": 0.6928, "step": 5405 }, { "epoch": 0.55, "grad_norm": 1.5524534618952834, "learning_rate": 4.4019890740778545e-06, "loss": 0.6111, "step": 5406 }, { "epoch": 0.55, "grad_norm": 1.2767485371660192, "learning_rate": 4.400347332955686e-06, "loss": 0.5678, "step": 5407 }, { "epoch": 0.55, "grad_norm": 1.305463331607472, "learning_rate": 4.398705657424442e-06, "loss": 0.5985, "step": 5408 }, { "epoch": 0.55, "grad_norm": 1.5426943044309955, "learning_rate": 4.39706404766369e-06, "loss": 0.6729, "step": 5409 }, { "epoch": 0.55, "grad_norm": 1.3635755290117637, "learning_rate": 4.395422503852988e-06, "loss": 0.6061, "step": 5410 }, { "epoch": 0.55, "grad_norm": 1.4209343700810675, "learning_rate": 4.393781026171894e-06, "loss": 0.5883, "step": 5411 }, { "epoch": 0.55, "grad_norm": 1.4343674973262328, "learning_rate": 4.392139614799957e-06, "loss": 0.6274, "step": 5412 }, { "epoch": 0.55, "grad_norm": 1.6745433462405979, "learning_rate": 4.3904982699167125e-06, "loss": 0.6224, "step": 5413 }, { "epoch": 0.55, "grad_norm": 1.3996237577448316, "learning_rate": 4.388856991701697e-06, "loss": 0.7759, "step": 5414 }, { "epoch": 0.55, "grad_norm": 1.3891201627534622, "learning_rate": 4.387215780334433e-06, "loss": 0.6085, "step": 5415 }, { "epoch": 0.55, "grad_norm": 1.5645120117324365, "learning_rate": 4.38557463599444e-06, "loss": 0.6713, "step": 5416 }, { "epoch": 0.55, "grad_norm": 1.487100189233245, "learning_rate": 4.3839335588612305e-06, "loss": 0.6576, "step": 5417 }, { "epoch": 0.55, "grad_norm": 1.32895592112279, "learning_rate": 4.382292549114307e-06, "loss": 0.7436, "step": 5418 }, { "epoch": 0.55, "grad_norm": 1.386317843403069, "learning_rate": 4.380651606933162e-06, "loss": 0.5861, "step": 5419 }, { "epoch": 0.55, "grad_norm": 1.353051374437739, "learning_rate": 4.379010732497292e-06, "loss": 0.6623, "step": 5420 }, { "epoch": 0.55, "grad_norm": 1.6322142587973394, "learning_rate": 4.37736992598617e-06, "loss": 0.6232, "step": 5421 }, { "epoch": 0.55, "grad_norm": 1.4711015898353783, "learning_rate": 4.375729187579275e-06, "loss": 0.633, "step": 5422 }, { "epoch": 0.55, "grad_norm": 1.4586751218252527, "learning_rate": 4.374088517456074e-06, "loss": 0.7612, "step": 5423 }, { "epoch": 0.55, "grad_norm": 1.2242224622904088, "learning_rate": 4.372447915796021e-06, "loss": 0.6363, "step": 5424 }, { "epoch": 0.55, "grad_norm": 1.433575973219545, "learning_rate": 4.370807382778573e-06, "loss": 0.6383, "step": 5425 }, { "epoch": 0.55, "grad_norm": 1.6010188789481523, "learning_rate": 4.369166918583172e-06, "loss": 0.5821, "step": 5426 }, { "epoch": 0.55, "grad_norm": 1.6114971377648064, "learning_rate": 4.367526523389253e-06, "loss": 0.7015, "step": 5427 }, { "epoch": 0.55, "grad_norm": 1.3372977890909028, "learning_rate": 4.365886197376248e-06, "loss": 0.5773, "step": 5428 }, { "epoch": 0.55, "grad_norm": 1.4883073723162719, "learning_rate": 4.3642459407235735e-06, "loss": 0.6748, "step": 5429 }, { "epoch": 0.55, "grad_norm": 1.6158864762528506, "learning_rate": 4.3626057536106485e-06, "loss": 0.6791, "step": 5430 }, { "epoch": 0.55, "grad_norm": 1.3254480194339766, "learning_rate": 4.3609656362168766e-06, "loss": 0.6474, "step": 5431 }, { "epoch": 0.55, "grad_norm": 1.671540470558827, "learning_rate": 4.359325588721656e-06, "loss": 0.8018, "step": 5432 }, { "epoch": 0.55, "grad_norm": 1.5653393874586143, "learning_rate": 4.357685611304377e-06, "loss": 0.701, "step": 5433 }, { "epoch": 0.55, "grad_norm": 1.535890409674366, "learning_rate": 4.3560457041444285e-06, "loss": 0.714, "step": 5434 }, { "epoch": 0.55, "grad_norm": 1.309491293102181, "learning_rate": 4.354405867421177e-06, "loss": 0.6584, "step": 5435 }, { "epoch": 0.56, "grad_norm": 1.3869675653840152, "learning_rate": 4.352766101313997e-06, "loss": 0.6564, "step": 5436 }, { "epoch": 0.56, "grad_norm": 1.614138370987418, "learning_rate": 4.351126406002247e-06, "loss": 0.7192, "step": 5437 }, { "epoch": 0.56, "grad_norm": 1.586527268030965, "learning_rate": 4.349486781665277e-06, "loss": 0.7278, "step": 5438 }, { "epoch": 0.56, "grad_norm": 1.5224862574673643, "learning_rate": 4.347847228482434e-06, "loss": 0.6794, "step": 5439 }, { "epoch": 0.56, "grad_norm": 1.407226421797374, "learning_rate": 4.346207746633056e-06, "loss": 0.6385, "step": 5440 }, { "epoch": 0.56, "grad_norm": 1.610124853974338, "learning_rate": 4.344568336296469e-06, "loss": 0.5896, "step": 5441 }, { "epoch": 0.56, "grad_norm": 1.4574018793268713, "learning_rate": 4.342928997651998e-06, "loss": 0.6674, "step": 5442 }, { "epoch": 0.56, "grad_norm": 1.4654349904820054, "learning_rate": 4.341289730878951e-06, "loss": 0.694, "step": 5443 }, { "epoch": 0.56, "grad_norm": 1.7097364221741576, "learning_rate": 4.339650536156637e-06, "loss": 0.6979, "step": 5444 }, { "epoch": 0.56, "grad_norm": 1.670193571819967, "learning_rate": 4.338011413664355e-06, "loss": 0.774, "step": 5445 }, { "epoch": 0.56, "grad_norm": 1.6509303297355815, "learning_rate": 4.336372363581391e-06, "loss": 0.7292, "step": 5446 }, { "epoch": 0.56, "grad_norm": 1.5092402641392044, "learning_rate": 4.3347333860870275e-06, "loss": 0.6398, "step": 5447 }, { "epoch": 0.56, "grad_norm": 1.5394354213289712, "learning_rate": 4.333094481360544e-06, "loss": 0.6929, "step": 5448 }, { "epoch": 0.56, "grad_norm": 1.5727842297384176, "learning_rate": 4.331455649581196e-06, "loss": 0.7623, "step": 5449 }, { "epoch": 0.56, "grad_norm": 1.307722523376641, "learning_rate": 4.32981689092825e-06, "loss": 0.5821, "step": 5450 }, { "epoch": 0.56, "grad_norm": 1.5769405775488012, "learning_rate": 4.3281782055809544e-06, "loss": 0.6459, "step": 5451 }, { "epoch": 0.56, "grad_norm": 1.4310266471839912, "learning_rate": 4.326539593718548e-06, "loss": 0.6755, "step": 5452 }, { "epoch": 0.56, "grad_norm": 1.5676751623794347, "learning_rate": 4.324901055520266e-06, "loss": 0.6491, "step": 5453 }, { "epoch": 0.56, "grad_norm": 1.4176776938506415, "learning_rate": 4.3232625911653376e-06, "loss": 0.7804, "step": 5454 }, { "epoch": 0.56, "grad_norm": 1.393643278142371, "learning_rate": 4.3216242008329755e-06, "loss": 0.6724, "step": 5455 }, { "epoch": 0.56, "grad_norm": 1.348478427204847, "learning_rate": 4.319985884702393e-06, "loss": 0.6787, "step": 5456 }, { "epoch": 0.56, "grad_norm": 1.466806024097277, "learning_rate": 4.318347642952788e-06, "loss": 0.7168, "step": 5457 }, { "epoch": 0.56, "grad_norm": 1.3895616202101693, "learning_rate": 4.3167094757633565e-06, "loss": 0.5715, "step": 5458 }, { "epoch": 0.56, "grad_norm": 1.5679848118429716, "learning_rate": 4.315071383313285e-06, "loss": 0.6734, "step": 5459 }, { "epoch": 0.56, "grad_norm": 1.3705153332387499, "learning_rate": 4.313433365781747e-06, "loss": 0.5581, "step": 5460 }, { "epoch": 0.56, "grad_norm": 1.3961137798464172, "learning_rate": 4.311795423347912e-06, "loss": 0.6724, "step": 5461 }, { "epoch": 0.56, "grad_norm": 1.4022443295495106, "learning_rate": 4.310157556190944e-06, "loss": 0.6567, "step": 5462 }, { "epoch": 0.56, "grad_norm": 1.4117380682919831, "learning_rate": 4.308519764489992e-06, "loss": 0.566, "step": 5463 }, { "epoch": 0.56, "grad_norm": 1.2920419613605376, "learning_rate": 4.306882048424201e-06, "loss": 0.6558, "step": 5464 }, { "epoch": 0.56, "grad_norm": 1.559345243771523, "learning_rate": 4.305244408172709e-06, "loss": 0.7384, "step": 5465 }, { "epoch": 0.56, "grad_norm": 1.399430012090833, "learning_rate": 4.30360684391464e-06, "loss": 0.642, "step": 5466 }, { "epoch": 0.56, "grad_norm": 1.4327670613440584, "learning_rate": 4.301969355829115e-06, "loss": 0.606, "step": 5467 }, { "epoch": 0.56, "grad_norm": 1.2499878537981148, "learning_rate": 4.300331944095246e-06, "loss": 0.5981, "step": 5468 }, { "epoch": 0.56, "grad_norm": 1.6578960593641285, "learning_rate": 4.298694608892134e-06, "loss": 0.6228, "step": 5469 }, { "epoch": 0.56, "grad_norm": 1.4850577772221496, "learning_rate": 4.297057350398875e-06, "loss": 0.6428, "step": 5470 }, { "epoch": 0.56, "grad_norm": 1.4624157828229876, "learning_rate": 4.2954201687945515e-06, "loss": 0.644, "step": 5471 }, { "epoch": 0.56, "grad_norm": 1.3108058857142908, "learning_rate": 4.293783064258243e-06, "loss": 0.6547, "step": 5472 }, { "epoch": 0.56, "grad_norm": 1.4123525832533916, "learning_rate": 4.292146036969021e-06, "loss": 0.5908, "step": 5473 }, { "epoch": 0.56, "grad_norm": 1.356173159171224, "learning_rate": 4.290509087105942e-06, "loss": 0.5511, "step": 5474 }, { "epoch": 0.56, "grad_norm": 1.53999410917426, "learning_rate": 4.288872214848058e-06, "loss": 0.6059, "step": 5475 }, { "epoch": 0.56, "grad_norm": 1.433681845393887, "learning_rate": 4.287235420374417e-06, "loss": 0.6174, "step": 5476 }, { "epoch": 0.56, "grad_norm": 1.4629148147681212, "learning_rate": 4.285598703864049e-06, "loss": 0.7392, "step": 5477 }, { "epoch": 0.56, "grad_norm": 1.5301407531858442, "learning_rate": 4.283962065495984e-06, "loss": 0.6488, "step": 5478 }, { "epoch": 0.56, "grad_norm": 1.3677731872823267, "learning_rate": 4.282325505449239e-06, "loss": 0.6173, "step": 5479 }, { "epoch": 0.56, "grad_norm": 1.324091938994109, "learning_rate": 4.280689023902822e-06, "loss": 0.5664, "step": 5480 }, { "epoch": 0.56, "grad_norm": 1.6633684222599174, "learning_rate": 4.279052621035738e-06, "loss": 0.6968, "step": 5481 }, { "epoch": 0.56, "grad_norm": 1.5396010947249579, "learning_rate": 4.277416297026973e-06, "loss": 0.6099, "step": 5482 }, { "epoch": 0.56, "grad_norm": 1.4402524776533088, "learning_rate": 4.275780052055515e-06, "loss": 0.6077, "step": 5483 }, { "epoch": 0.56, "grad_norm": 1.6224230325683264, "learning_rate": 4.274143886300339e-06, "loss": 0.7351, "step": 5484 }, { "epoch": 0.56, "grad_norm": 1.5077371142879812, "learning_rate": 4.272507799940408e-06, "loss": 0.6593, "step": 5485 }, { "epoch": 0.56, "grad_norm": 1.515494491464966, "learning_rate": 4.270871793154683e-06, "loss": 0.6839, "step": 5486 }, { "epoch": 0.56, "grad_norm": 1.467959390027013, "learning_rate": 4.2692358661221125e-06, "loss": 0.672, "step": 5487 }, { "epoch": 0.56, "grad_norm": 1.4216423116435497, "learning_rate": 4.2676000190216355e-06, "loss": 0.594, "step": 5488 }, { "epoch": 0.56, "grad_norm": 1.4844095311469172, "learning_rate": 4.265964252032182e-06, "loss": 0.5504, "step": 5489 }, { "epoch": 0.56, "grad_norm": 1.4942560978043755, "learning_rate": 4.264328565332679e-06, "loss": 0.6416, "step": 5490 }, { "epoch": 0.56, "grad_norm": 1.6700279396302247, "learning_rate": 4.262692959102037e-06, "loss": 0.7962, "step": 5491 }, { "epoch": 0.56, "grad_norm": 1.1832761095435813, "learning_rate": 4.2610574335191615e-06, "loss": 0.5133, "step": 5492 }, { "epoch": 0.56, "grad_norm": 1.5901290249286246, "learning_rate": 4.259421988762951e-06, "loss": 0.6097, "step": 5493 }, { "epoch": 0.56, "grad_norm": 1.626655450995529, "learning_rate": 4.257786625012289e-06, "loss": 0.593, "step": 5494 }, { "epoch": 0.56, "grad_norm": 1.4644520339719074, "learning_rate": 4.2561513424460585e-06, "loss": 0.589, "step": 5495 }, { "epoch": 0.56, "grad_norm": 2.1821955641777713, "learning_rate": 4.254516141243126e-06, "loss": 0.6983, "step": 5496 }, { "epoch": 0.56, "grad_norm": 2.025863432456575, "learning_rate": 4.252881021582354e-06, "loss": 0.709, "step": 5497 }, { "epoch": 0.56, "grad_norm": 1.3799231101615967, "learning_rate": 4.251245983642594e-06, "loss": 0.6558, "step": 5498 }, { "epoch": 0.56, "grad_norm": 1.2925966659444468, "learning_rate": 4.249611027602686e-06, "loss": 0.6921, "step": 5499 }, { "epoch": 0.56, "grad_norm": 1.275260073195348, "learning_rate": 4.247976153641469e-06, "loss": 0.5874, "step": 5500 }, { "epoch": 0.56, "grad_norm": 1.5507362542280632, "learning_rate": 4.246341361937766e-06, "loss": 0.7351, "step": 5501 }, { "epoch": 0.56, "grad_norm": 1.4145326567409373, "learning_rate": 4.244706652670391e-06, "loss": 0.6755, "step": 5502 }, { "epoch": 0.56, "grad_norm": 1.4400646655129181, "learning_rate": 4.243072026018152e-06, "loss": 0.6751, "step": 5503 }, { "epoch": 0.56, "grad_norm": 1.4438775642066481, "learning_rate": 4.24143748215985e-06, "loss": 0.6399, "step": 5504 }, { "epoch": 0.56, "grad_norm": 1.5947972670884074, "learning_rate": 4.2398030212742704e-06, "loss": 0.7222, "step": 5505 }, { "epoch": 0.56, "grad_norm": 1.3197734080830423, "learning_rate": 4.238168643540194e-06, "loss": 0.5394, "step": 5506 }, { "epoch": 0.56, "grad_norm": 1.6777126021859339, "learning_rate": 4.2365343491363935e-06, "loss": 0.5939, "step": 5507 }, { "epoch": 0.56, "grad_norm": 1.433751126225384, "learning_rate": 4.234900138241626e-06, "loss": 0.6619, "step": 5508 }, { "epoch": 0.56, "grad_norm": 1.437833115318169, "learning_rate": 4.233266011034648e-06, "loss": 0.6893, "step": 5509 }, { "epoch": 0.56, "grad_norm": 1.3270790606579093, "learning_rate": 4.231631967694201e-06, "loss": 0.5878, "step": 5510 }, { "epoch": 0.56, "grad_norm": 1.2791094367394633, "learning_rate": 4.229998008399019e-06, "loss": 0.5511, "step": 5511 }, { "epoch": 0.56, "grad_norm": 1.329830014773949, "learning_rate": 4.22836413332783e-06, "loss": 0.5481, "step": 5512 }, { "epoch": 0.56, "grad_norm": 1.3545690110025637, "learning_rate": 4.226730342659344e-06, "loss": 0.583, "step": 5513 }, { "epoch": 0.56, "grad_norm": 1.4052170614506578, "learning_rate": 4.225096636572272e-06, "loss": 0.6292, "step": 5514 }, { "epoch": 0.56, "grad_norm": 1.7913313137420905, "learning_rate": 4.223463015245311e-06, "loss": 0.5919, "step": 5515 }, { "epoch": 0.56, "grad_norm": 1.4796146653041073, "learning_rate": 4.221829478857148e-06, "loss": 0.6429, "step": 5516 }, { "epoch": 0.56, "grad_norm": 1.4723921371536555, "learning_rate": 4.220196027586459e-06, "loss": 0.6637, "step": 5517 }, { "epoch": 0.56, "grad_norm": 1.3871916831667204, "learning_rate": 4.21856266161192e-06, "loss": 0.6566, "step": 5518 }, { "epoch": 0.56, "grad_norm": 1.541944230976723, "learning_rate": 4.216929381112185e-06, "loss": 0.6575, "step": 5519 }, { "epoch": 0.56, "grad_norm": 5.288322531354145, "learning_rate": 4.215296186265908e-06, "loss": 0.6298, "step": 5520 }, { "epoch": 0.56, "grad_norm": 1.453881608456272, "learning_rate": 4.2136630772517295e-06, "loss": 0.6819, "step": 5521 }, { "epoch": 0.56, "grad_norm": 1.4723261484251993, "learning_rate": 4.2120300542482785e-06, "loss": 0.6811, "step": 5522 }, { "epoch": 0.56, "grad_norm": 1.4037153906538447, "learning_rate": 4.210397117434183e-06, "loss": 0.6461, "step": 5523 }, { "epoch": 0.56, "grad_norm": 1.7171811430433583, "learning_rate": 4.208764266988053e-06, "loss": 0.6771, "step": 5524 }, { "epoch": 0.56, "grad_norm": 1.54855380687389, "learning_rate": 4.207131503088492e-06, "loss": 0.6814, "step": 5525 }, { "epoch": 0.56, "grad_norm": 1.5659452652802968, "learning_rate": 4.205498825914095e-06, "loss": 0.6605, "step": 5526 }, { "epoch": 0.56, "grad_norm": 1.354000669302375, "learning_rate": 4.203866235643445e-06, "loss": 0.7096, "step": 5527 }, { "epoch": 0.56, "grad_norm": 1.4384043757246814, "learning_rate": 4.20223373245512e-06, "loss": 0.6871, "step": 5528 }, { "epoch": 0.56, "grad_norm": 1.4315261434127462, "learning_rate": 4.200601316527686e-06, "loss": 0.6026, "step": 5529 }, { "epoch": 0.56, "grad_norm": 1.6800884666229436, "learning_rate": 4.198968988039695e-06, "loss": 0.736, "step": 5530 }, { "epoch": 0.56, "grad_norm": 1.623564551034726, "learning_rate": 4.1973367471696955e-06, "loss": 0.6303, "step": 5531 }, { "epoch": 0.56, "grad_norm": 1.4536410380571552, "learning_rate": 4.195704594096227e-06, "loss": 0.6134, "step": 5532 }, { "epoch": 0.56, "grad_norm": 1.6128090654936846, "learning_rate": 4.194072528997815e-06, "loss": 0.6741, "step": 5533 }, { "epoch": 0.57, "grad_norm": 1.6258623174273452, "learning_rate": 4.1924405520529756e-06, "loss": 0.5884, "step": 5534 }, { "epoch": 0.57, "grad_norm": 1.5190615459163248, "learning_rate": 4.19080866344022e-06, "loss": 0.6295, "step": 5535 }, { "epoch": 0.57, "grad_norm": 1.3081420253147702, "learning_rate": 4.189176863338043e-06, "loss": 0.6436, "step": 5536 }, { "epoch": 0.57, "grad_norm": 1.5496703708585449, "learning_rate": 4.187545151924939e-06, "loss": 0.7085, "step": 5537 }, { "epoch": 0.57, "grad_norm": 1.4893805177270152, "learning_rate": 4.185913529379381e-06, "loss": 0.7016, "step": 5538 }, { "epoch": 0.57, "grad_norm": 1.757974530432349, "learning_rate": 4.184281995879842e-06, "loss": 0.6817, "step": 5539 }, { "epoch": 0.57, "grad_norm": 1.7901717316400867, "learning_rate": 4.182650551604781e-06, "loss": 0.6533, "step": 5540 }, { "epoch": 0.57, "grad_norm": 1.5690524717547205, "learning_rate": 4.181019196732646e-06, "loss": 0.7161, "step": 5541 }, { "epoch": 0.57, "grad_norm": 1.3285521862149083, "learning_rate": 4.17938793144188e-06, "loss": 0.6076, "step": 5542 }, { "epoch": 0.57, "grad_norm": 1.4670438723330856, "learning_rate": 4.177756755910912e-06, "loss": 0.6923, "step": 5543 }, { "epoch": 0.57, "grad_norm": 1.6234829927122645, "learning_rate": 4.1761256703181615e-06, "loss": 0.6553, "step": 5544 }, { "epoch": 0.57, "grad_norm": 1.444043091126778, "learning_rate": 4.174494674842038e-06, "loss": 0.6657, "step": 5545 }, { "epoch": 0.57, "grad_norm": 1.4450231418993877, "learning_rate": 4.172863769660948e-06, "loss": 0.7511, "step": 5546 }, { "epoch": 0.57, "grad_norm": 1.2478602971148707, "learning_rate": 4.171232954953276e-06, "loss": 0.6023, "step": 5547 }, { "epoch": 0.57, "grad_norm": 1.2531000995027302, "learning_rate": 4.169602230897407e-06, "loss": 0.6701, "step": 5548 }, { "epoch": 0.57, "grad_norm": 1.3894800562316225, "learning_rate": 4.1679715976717115e-06, "loss": 0.6678, "step": 5549 }, { "epoch": 0.57, "grad_norm": 1.3165608192316487, "learning_rate": 4.166341055454547e-06, "loss": 0.656, "step": 5550 }, { "epoch": 0.57, "grad_norm": 1.464769441844475, "learning_rate": 4.1647106044242695e-06, "loss": 0.6839, "step": 5551 }, { "epoch": 0.57, "grad_norm": 1.5014907432727613, "learning_rate": 4.1630802447592176e-06, "loss": 0.765, "step": 5552 }, { "epoch": 0.57, "grad_norm": 1.5207167920971618, "learning_rate": 4.161449976637724e-06, "loss": 0.7147, "step": 5553 }, { "epoch": 0.57, "grad_norm": 1.3794840894745608, "learning_rate": 4.159819800238109e-06, "loss": 0.5639, "step": 5554 }, { "epoch": 0.57, "grad_norm": 1.5254693048873806, "learning_rate": 4.158189715738682e-06, "loss": 0.5978, "step": 5555 }, { "epoch": 0.57, "grad_norm": 1.7073996103303337, "learning_rate": 4.1565597233177465e-06, "loss": 0.6155, "step": 5556 }, { "epoch": 0.57, "grad_norm": 1.3581469792885896, "learning_rate": 4.154929823153595e-06, "loss": 0.7142, "step": 5557 }, { "epoch": 0.57, "grad_norm": 1.4945192653346762, "learning_rate": 4.153300015424505e-06, "loss": 0.629, "step": 5558 }, { "epoch": 0.57, "grad_norm": 1.4198988301255213, "learning_rate": 4.1516703003087476e-06, "loss": 0.6366, "step": 5559 }, { "epoch": 0.57, "grad_norm": 1.4351999376231297, "learning_rate": 4.150040677984587e-06, "loss": 0.7377, "step": 5560 }, { "epoch": 0.57, "grad_norm": 1.4936614073741974, "learning_rate": 4.148411148630271e-06, "loss": 0.6084, "step": 5561 }, { "epoch": 0.57, "grad_norm": 1.4518256366267084, "learning_rate": 4.146781712424039e-06, "loss": 0.6571, "step": 5562 }, { "epoch": 0.57, "grad_norm": 1.619477546468367, "learning_rate": 4.1451523695441255e-06, "loss": 0.6321, "step": 5563 }, { "epoch": 0.57, "grad_norm": 1.3857632500664696, "learning_rate": 4.143523120168745e-06, "loss": 0.7217, "step": 5564 }, { "epoch": 0.57, "grad_norm": 1.4392557948529527, "learning_rate": 4.141893964476113e-06, "loss": 0.6363, "step": 5565 }, { "epoch": 0.57, "grad_norm": 1.6434127260723388, "learning_rate": 4.140264902644424e-06, "loss": 0.6526, "step": 5566 }, { "epoch": 0.57, "grad_norm": 1.3540740494149974, "learning_rate": 4.138635934851869e-06, "loss": 0.705, "step": 5567 }, { "epoch": 0.57, "grad_norm": 1.4538221383917371, "learning_rate": 4.1370070612766296e-06, "loss": 0.6021, "step": 5568 }, { "epoch": 0.57, "grad_norm": 1.3064908673725832, "learning_rate": 4.135378282096869e-06, "loss": 0.63, "step": 5569 }, { "epoch": 0.57, "grad_norm": 1.4872713047225916, "learning_rate": 4.133749597490751e-06, "loss": 0.6687, "step": 5570 }, { "epoch": 0.57, "grad_norm": 1.4200168605629624, "learning_rate": 4.13212100763642e-06, "loss": 0.6414, "step": 5571 }, { "epoch": 0.57, "grad_norm": 1.3501300230865654, "learning_rate": 4.130492512712016e-06, "loss": 0.6473, "step": 5572 }, { "epoch": 0.57, "grad_norm": 1.5098321447611844, "learning_rate": 4.1288641128956635e-06, "loss": 0.5902, "step": 5573 }, { "epoch": 0.57, "grad_norm": 1.6374150705147248, "learning_rate": 4.127235808365483e-06, "loss": 0.6327, "step": 5574 }, { "epoch": 0.57, "grad_norm": 1.2178328331217179, "learning_rate": 4.125607599299579e-06, "loss": 0.4446, "step": 5575 }, { "epoch": 0.57, "grad_norm": 1.4455061819450319, "learning_rate": 4.123979485876045e-06, "loss": 0.6888, "step": 5576 }, { "epoch": 0.57, "grad_norm": 1.5476374134526498, "learning_rate": 4.1223514682729736e-06, "loss": 0.5494, "step": 5577 }, { "epoch": 0.57, "grad_norm": 1.3549681572811831, "learning_rate": 4.120723546668431e-06, "loss": 0.7267, "step": 5578 }, { "epoch": 0.57, "grad_norm": 1.6772214038082798, "learning_rate": 4.119095721240488e-06, "loss": 0.6603, "step": 5579 }, { "epoch": 0.57, "grad_norm": 1.4401122261560948, "learning_rate": 4.117467992167197e-06, "loss": 0.7074, "step": 5580 }, { "epoch": 0.57, "grad_norm": 1.376937447693968, "learning_rate": 4.115840359626599e-06, "loss": 0.7011, "step": 5581 }, { "epoch": 0.57, "grad_norm": 1.294951046296861, "learning_rate": 4.114212823796732e-06, "loss": 0.6007, "step": 5582 }, { "epoch": 0.57, "grad_norm": 1.5569466129454508, "learning_rate": 4.112585384855612e-06, "loss": 0.6851, "step": 5583 }, { "epoch": 0.57, "grad_norm": 1.4087255923091184, "learning_rate": 4.110958042981256e-06, "loss": 0.6672, "step": 5584 }, { "epoch": 0.57, "grad_norm": 1.424560469684909, "learning_rate": 4.109330798351664e-06, "loss": 0.6098, "step": 5585 }, { "epoch": 0.57, "grad_norm": 1.4566510156413328, "learning_rate": 4.107703651144824e-06, "loss": 0.6985, "step": 5586 }, { "epoch": 0.57, "grad_norm": 1.4307621374333104, "learning_rate": 4.106076601538718e-06, "loss": 0.6948, "step": 5587 }, { "epoch": 0.57, "grad_norm": 1.4167757814688946, "learning_rate": 4.1044496497113154e-06, "loss": 0.6026, "step": 5588 }, { "epoch": 0.57, "grad_norm": 1.4386055267453803, "learning_rate": 4.102822795840574e-06, "loss": 0.7416, "step": 5589 }, { "epoch": 0.57, "grad_norm": 1.3986712853082244, "learning_rate": 4.101196040104441e-06, "loss": 0.6637, "step": 5590 }, { "epoch": 0.57, "grad_norm": 1.375278319415761, "learning_rate": 4.099569382680856e-06, "loss": 0.6319, "step": 5591 }, { "epoch": 0.57, "grad_norm": 1.3427972616211359, "learning_rate": 4.0979428237477406e-06, "loss": 0.6314, "step": 5592 }, { "epoch": 0.57, "grad_norm": 1.3943368468419144, "learning_rate": 4.096316363483015e-06, "loss": 0.6919, "step": 5593 }, { "epoch": 0.57, "grad_norm": 1.5069100216309217, "learning_rate": 4.09469000206458e-06, "loss": 0.7125, "step": 5594 }, { "epoch": 0.57, "grad_norm": 1.3884775466985528, "learning_rate": 4.093063739670331e-06, "loss": 0.6089, "step": 5595 }, { "epoch": 0.57, "grad_norm": 1.2789108406248786, "learning_rate": 4.0914375764781554e-06, "loss": 0.5864, "step": 5596 }, { "epoch": 0.57, "grad_norm": 1.3542592183521003, "learning_rate": 4.0898115126659174e-06, "loss": 0.6385, "step": 5597 }, { "epoch": 0.57, "grad_norm": 1.5351254044592584, "learning_rate": 4.088185548411483e-06, "loss": 0.5558, "step": 5598 }, { "epoch": 0.57, "grad_norm": 2.2475083448228776, "learning_rate": 4.0865596838927045e-06, "loss": 0.7346, "step": 5599 }, { "epoch": 0.57, "grad_norm": 1.3608122737918316, "learning_rate": 4.084933919287417e-06, "loss": 0.6665, "step": 5600 }, { "epoch": 0.57, "grad_norm": 1.4172714819743648, "learning_rate": 4.0833082547734495e-06, "loss": 0.5749, "step": 5601 }, { "epoch": 0.57, "grad_norm": 1.3546156227115391, "learning_rate": 4.081682690528625e-06, "loss": 0.6955, "step": 5602 }, { "epoch": 0.57, "grad_norm": 1.2981038843889074, "learning_rate": 4.0800572267307435e-06, "loss": 0.5709, "step": 5603 }, { "epoch": 0.57, "grad_norm": 1.6994346631675132, "learning_rate": 4.078431863557605e-06, "loss": 0.7527, "step": 5604 }, { "epoch": 0.57, "grad_norm": 1.6039231275928532, "learning_rate": 4.076806601186992e-06, "loss": 0.6562, "step": 5605 }, { "epoch": 0.57, "grad_norm": 1.425299162554854, "learning_rate": 4.075181439796679e-06, "loss": 0.6295, "step": 5606 }, { "epoch": 0.57, "grad_norm": 1.770621850276091, "learning_rate": 4.073556379564429e-06, "loss": 0.5785, "step": 5607 }, { "epoch": 0.57, "grad_norm": 1.4762250154193233, "learning_rate": 4.071931420667993e-06, "loss": 0.6902, "step": 5608 }, { "epoch": 0.57, "grad_norm": 1.4188694370435382, "learning_rate": 4.0703065632851104e-06, "loss": 0.7527, "step": 5609 }, { "epoch": 0.57, "grad_norm": 1.2425415001477185, "learning_rate": 4.0686818075935154e-06, "loss": 0.5568, "step": 5610 }, { "epoch": 0.57, "grad_norm": 1.4108991478183261, "learning_rate": 4.067057153770919e-06, "loss": 0.628, "step": 5611 }, { "epoch": 0.57, "grad_norm": 1.5109272353756027, "learning_rate": 4.065432601995033e-06, "loss": 0.6671, "step": 5612 }, { "epoch": 0.57, "grad_norm": 1.5107953947886261, "learning_rate": 4.063808152443555e-06, "loss": 0.736, "step": 5613 }, { "epoch": 0.57, "grad_norm": 1.44465199029822, "learning_rate": 4.062183805294165e-06, "loss": 0.6257, "step": 5614 }, { "epoch": 0.57, "grad_norm": 1.4279997414217802, "learning_rate": 4.060559560724536e-06, "loss": 0.5847, "step": 5615 }, { "epoch": 0.57, "grad_norm": 1.4525131072610693, "learning_rate": 4.058935418912337e-06, "loss": 0.619, "step": 5616 }, { "epoch": 0.57, "grad_norm": 1.5291421173597888, "learning_rate": 4.057311380035214e-06, "loss": 0.6418, "step": 5617 }, { "epoch": 0.57, "grad_norm": 1.0730532477994617, "learning_rate": 4.055687444270808e-06, "loss": 0.5209, "step": 5618 }, { "epoch": 0.57, "grad_norm": 1.5354397888939835, "learning_rate": 4.054063611796747e-06, "loss": 0.7413, "step": 5619 }, { "epoch": 0.57, "grad_norm": 1.6146718267147704, "learning_rate": 4.052439882790648e-06, "loss": 0.7209, "step": 5620 }, { "epoch": 0.57, "grad_norm": 1.5957911988856335, "learning_rate": 4.050816257430119e-06, "loss": 0.6738, "step": 5621 }, { "epoch": 0.57, "grad_norm": 1.4462708079141602, "learning_rate": 4.049192735892754e-06, "loss": 0.652, "step": 5622 }, { "epoch": 0.57, "grad_norm": 1.816045097860981, "learning_rate": 4.047569318356132e-06, "loss": 0.7488, "step": 5623 }, { "epoch": 0.57, "grad_norm": 1.3718721320898084, "learning_rate": 4.045946004997834e-06, "loss": 0.6473, "step": 5624 }, { "epoch": 0.57, "grad_norm": 1.4398352589584589, "learning_rate": 4.044322795995411e-06, "loss": 0.6924, "step": 5625 }, { "epoch": 0.57, "grad_norm": 1.5061525446149033, "learning_rate": 4.0426996915264175e-06, "loss": 0.6752, "step": 5626 }, { "epoch": 0.57, "grad_norm": 1.4134372157081667, "learning_rate": 4.041076691768391e-06, "loss": 0.694, "step": 5627 }, { "epoch": 0.57, "grad_norm": 1.3497914973142593, "learning_rate": 4.0394537968988524e-06, "loss": 0.5863, "step": 5628 }, { "epoch": 0.57, "grad_norm": 1.3820727931440593, "learning_rate": 4.0378310070953234e-06, "loss": 0.6191, "step": 5629 }, { "epoch": 0.57, "grad_norm": 1.6251591010699884, "learning_rate": 4.036208322535304e-06, "loss": 0.5979, "step": 5630 }, { "epoch": 0.58, "grad_norm": 1.438496148462397, "learning_rate": 4.034585743396287e-06, "loss": 0.6553, "step": 5631 }, { "epoch": 0.58, "grad_norm": 1.4118883513971534, "learning_rate": 4.0329632698557504e-06, "loss": 0.623, "step": 5632 }, { "epoch": 0.58, "grad_norm": 1.443359986918247, "learning_rate": 4.031340902091165e-06, "loss": 0.6872, "step": 5633 }, { "epoch": 0.58, "grad_norm": 1.450680587603769, "learning_rate": 4.029718640279985e-06, "loss": 0.6666, "step": 5634 }, { "epoch": 0.58, "grad_norm": 1.5137339614693033, "learning_rate": 4.0280964845996595e-06, "loss": 0.5921, "step": 5635 }, { "epoch": 0.58, "grad_norm": 1.4464011585216838, "learning_rate": 4.026474435227621e-06, "loss": 0.7084, "step": 5636 }, { "epoch": 0.58, "grad_norm": 1.4937545424487197, "learning_rate": 4.024852492341289e-06, "loss": 0.6245, "step": 5637 }, { "epoch": 0.58, "grad_norm": 1.6433160006254817, "learning_rate": 4.023230656118081e-06, "loss": 0.6794, "step": 5638 }, { "epoch": 0.58, "grad_norm": 1.5134099353585821, "learning_rate": 4.0216089267353865e-06, "loss": 0.6227, "step": 5639 }, { "epoch": 0.58, "grad_norm": 1.633631137714618, "learning_rate": 4.019987304370601e-06, "loss": 0.668, "step": 5640 }, { "epoch": 0.58, "grad_norm": 1.2598161667241523, "learning_rate": 4.0183657892010964e-06, "loss": 0.6265, "step": 5641 }, { "epoch": 0.58, "grad_norm": 1.4934665331294865, "learning_rate": 4.016744381404235e-06, "loss": 0.6501, "step": 5642 }, { "epoch": 0.58, "grad_norm": 1.481877306519546, "learning_rate": 4.015123081157371e-06, "loss": 0.7021, "step": 5643 }, { "epoch": 0.58, "grad_norm": 1.414843499465048, "learning_rate": 4.013501888637846e-06, "loss": 0.6625, "step": 5644 }, { "epoch": 0.58, "grad_norm": 1.5818714631752895, "learning_rate": 4.011880804022986e-06, "loss": 0.5597, "step": 5645 }, { "epoch": 0.58, "grad_norm": 1.4835762530420624, "learning_rate": 4.01025982749011e-06, "loss": 0.5568, "step": 5646 }, { "epoch": 0.58, "grad_norm": 1.3870727085870367, "learning_rate": 4.00863895921652e-06, "loss": 0.7104, "step": 5647 }, { "epoch": 0.58, "grad_norm": 1.2623881557104466, "learning_rate": 4.0070181993795084e-06, "loss": 0.671, "step": 5648 }, { "epoch": 0.58, "grad_norm": 1.4598503832646528, "learning_rate": 4.005397548156362e-06, "loss": 0.6721, "step": 5649 }, { "epoch": 0.58, "grad_norm": 2.0771345274023543, "learning_rate": 4.003777005724345e-06, "loss": 0.7045, "step": 5650 }, { "epoch": 0.58, "grad_norm": 1.4967003826296401, "learning_rate": 4.002156572260715e-06, "loss": 0.5946, "step": 5651 }, { "epoch": 0.58, "grad_norm": 1.3622167267403742, "learning_rate": 4.000536247942722e-06, "loss": 0.6653, "step": 5652 }, { "epoch": 0.58, "grad_norm": 1.5907179685593775, "learning_rate": 3.998916032947594e-06, "loss": 0.6797, "step": 5653 }, { "epoch": 0.58, "grad_norm": 1.3085428008559672, "learning_rate": 3.997295927452556e-06, "loss": 0.6014, "step": 5654 }, { "epoch": 0.58, "grad_norm": 1.6603386680704195, "learning_rate": 3.995675931634817e-06, "loss": 0.6415, "step": 5655 }, { "epoch": 0.58, "grad_norm": 1.4510502781495525, "learning_rate": 3.994056045671572e-06, "loss": 0.6406, "step": 5656 }, { "epoch": 0.58, "grad_norm": 1.5809412450206837, "learning_rate": 3.99243626974001e-06, "loss": 0.6932, "step": 5657 }, { "epoch": 0.58, "grad_norm": 1.4947586720349604, "learning_rate": 3.9908166040173045e-06, "loss": 0.7072, "step": 5658 }, { "epoch": 0.58, "grad_norm": 1.4206346513433938, "learning_rate": 3.989197048680615e-06, "loss": 0.5848, "step": 5659 }, { "epoch": 0.58, "grad_norm": 1.4118111772109962, "learning_rate": 3.9875776039070926e-06, "loss": 0.5934, "step": 5660 }, { "epoch": 0.58, "grad_norm": 1.4414297666806957, "learning_rate": 3.985958269873871e-06, "loss": 0.6937, "step": 5661 }, { "epoch": 0.58, "grad_norm": 1.4867874888021133, "learning_rate": 3.98433904675808e-06, "loss": 0.806, "step": 5662 }, { "epoch": 0.58, "grad_norm": 1.5964741913579839, "learning_rate": 3.982719934736832e-06, "loss": 0.6919, "step": 5663 }, { "epoch": 0.58, "grad_norm": 1.4712746588452346, "learning_rate": 3.981100933987226e-06, "loss": 0.6332, "step": 5664 }, { "epoch": 0.58, "grad_norm": 1.3671333782441077, "learning_rate": 3.97948204468635e-06, "loss": 0.647, "step": 5665 }, { "epoch": 0.58, "grad_norm": 1.4704790661449922, "learning_rate": 3.977863267011286e-06, "loss": 0.6726, "step": 5666 }, { "epoch": 0.58, "grad_norm": 1.384818877101077, "learning_rate": 3.976244601139091e-06, "loss": 0.6565, "step": 5667 }, { "epoch": 0.58, "grad_norm": 1.303735840935216, "learning_rate": 3.974626047246821e-06, "loss": 0.6212, "step": 5668 }, { "epoch": 0.58, "grad_norm": 1.4449803421954073, "learning_rate": 3.973007605511518e-06, "loss": 0.6874, "step": 5669 }, { "epoch": 0.58, "grad_norm": 1.7585282918338512, "learning_rate": 3.971389276110204e-06, "loss": 0.7207, "step": 5670 }, { "epoch": 0.58, "grad_norm": 1.4119870709869653, "learning_rate": 3.9697710592199e-06, "loss": 0.6657, "step": 5671 }, { "epoch": 0.58, "grad_norm": 1.5992898731470342, "learning_rate": 3.9681529550176065e-06, "loss": 0.6924, "step": 5672 }, { "epoch": 0.58, "grad_norm": 1.375636513836817, "learning_rate": 3.966534963680314e-06, "loss": 0.6699, "step": 5673 }, { "epoch": 0.58, "grad_norm": 1.471117301189386, "learning_rate": 3.964917085385001e-06, "loss": 0.7, "step": 5674 }, { "epoch": 0.58, "grad_norm": 1.4825192590602916, "learning_rate": 3.9632993203086324e-06, "loss": 0.6182, "step": 5675 }, { "epoch": 0.58, "grad_norm": 1.3942507554442984, "learning_rate": 3.9616816686281636e-06, "loss": 0.6745, "step": 5676 }, { "epoch": 0.58, "grad_norm": 1.4259741994617983, "learning_rate": 3.960064130520537e-06, "loss": 0.5598, "step": 5677 }, { "epoch": 0.58, "grad_norm": 1.3718962560035846, "learning_rate": 3.9584467061626785e-06, "loss": 0.6844, "step": 5678 }, { "epoch": 0.58, "grad_norm": 1.5189285173374978, "learning_rate": 3.956829395731504e-06, "loss": 0.7525, "step": 5679 }, { "epoch": 0.58, "grad_norm": 1.3886287859635091, "learning_rate": 3.955212199403922e-06, "loss": 0.6229, "step": 5680 }, { "epoch": 0.58, "grad_norm": 1.6101445474008567, "learning_rate": 3.953595117356818e-06, "loss": 0.5767, "step": 5681 }, { "epoch": 0.58, "grad_norm": 1.2924646719831092, "learning_rate": 3.9519781497670754e-06, "loss": 0.6111, "step": 5682 }, { "epoch": 0.58, "grad_norm": 1.423510112943191, "learning_rate": 3.950361296811559e-06, "loss": 0.6144, "step": 5683 }, { "epoch": 0.58, "grad_norm": 1.6373182484402797, "learning_rate": 3.948744558667121e-06, "loss": 0.6545, "step": 5684 }, { "epoch": 0.58, "grad_norm": 1.4120563219683684, "learning_rate": 3.947127935510605e-06, "loss": 0.6026, "step": 5685 }, { "epoch": 0.58, "grad_norm": 1.4578070003174721, "learning_rate": 3.94551142751884e-06, "loss": 0.696, "step": 5686 }, { "epoch": 0.58, "grad_norm": 1.2403170961419498, "learning_rate": 3.94389503486864e-06, "loss": 0.527, "step": 5687 }, { "epoch": 0.58, "grad_norm": 1.4248690268391992, "learning_rate": 3.9422787577368105e-06, "loss": 0.7203, "step": 5688 }, { "epoch": 0.58, "grad_norm": 1.6529319767254913, "learning_rate": 3.94066259630014e-06, "loss": 0.6134, "step": 5689 }, { "epoch": 0.58, "grad_norm": 1.4614544660207767, "learning_rate": 3.939046550735408e-06, "loss": 0.707, "step": 5690 }, { "epoch": 0.58, "grad_norm": 1.4274781227995292, "learning_rate": 3.937430621219382e-06, "loss": 0.64, "step": 5691 }, { "epoch": 0.58, "grad_norm": 1.2192695320863245, "learning_rate": 3.935814807928812e-06, "loss": 0.5017, "step": 5692 }, { "epoch": 0.58, "grad_norm": 1.382108796144819, "learning_rate": 3.934199111040437e-06, "loss": 0.6175, "step": 5693 }, { "epoch": 0.58, "grad_norm": 1.4079372715761154, "learning_rate": 3.93258353073099e-06, "loss": 0.5954, "step": 5694 }, { "epoch": 0.58, "grad_norm": 1.5184967160351268, "learning_rate": 3.93096806717718e-06, "loss": 0.7226, "step": 5695 }, { "epoch": 0.58, "grad_norm": 1.5980961528511786, "learning_rate": 3.929352720555711e-06, "loss": 0.6506, "step": 5696 }, { "epoch": 0.58, "grad_norm": 1.3673840003010418, "learning_rate": 3.9277374910432735e-06, "loss": 0.5504, "step": 5697 }, { "epoch": 0.58, "grad_norm": 1.323095093015634, "learning_rate": 3.926122378816539e-06, "loss": 0.5978, "step": 5698 }, { "epoch": 0.58, "grad_norm": 1.4194733805255353, "learning_rate": 3.924507384052177e-06, "loss": 0.606, "step": 5699 }, { "epoch": 0.58, "grad_norm": 1.5357268296162303, "learning_rate": 3.922892506926835e-06, "loss": 0.5887, "step": 5700 }, { "epoch": 0.58, "grad_norm": 1.3797646117397848, "learning_rate": 3.921277747617151e-06, "loss": 0.6793, "step": 5701 }, { "epoch": 0.58, "grad_norm": 1.4319975343363849, "learning_rate": 3.91966310629975e-06, "loss": 0.6269, "step": 5702 }, { "epoch": 0.58, "grad_norm": 1.5746474973908329, "learning_rate": 3.918048583151243e-06, "loss": 0.6226, "step": 5703 }, { "epoch": 0.58, "grad_norm": 1.3368996627171186, "learning_rate": 3.916434178348229e-06, "loss": 0.5198, "step": 5704 }, { "epoch": 0.58, "grad_norm": 1.2712834091266112, "learning_rate": 3.9148198920672975e-06, "loss": 0.477, "step": 5705 }, { "epoch": 0.58, "grad_norm": 1.3319461345648789, "learning_rate": 3.913205724485017e-06, "loss": 0.66, "step": 5706 }, { "epoch": 0.58, "grad_norm": 1.469577453923253, "learning_rate": 3.9115916757779484e-06, "loss": 0.5915, "step": 5707 }, { "epoch": 0.58, "grad_norm": 1.3808680176535568, "learning_rate": 3.909977746122643e-06, "loss": 0.6556, "step": 5708 }, { "epoch": 0.58, "grad_norm": 1.3755295563459289, "learning_rate": 3.90836393569563e-06, "loss": 0.589, "step": 5709 }, { "epoch": 0.58, "grad_norm": 1.5169737844288178, "learning_rate": 3.906750244673433e-06, "loss": 0.6768, "step": 5710 }, { "epoch": 0.58, "grad_norm": 1.3596716259129558, "learning_rate": 3.905136673232559e-06, "loss": 0.6889, "step": 5711 }, { "epoch": 0.58, "grad_norm": 1.4199022843285511, "learning_rate": 3.903523221549502e-06, "loss": 0.6693, "step": 5712 }, { "epoch": 0.58, "grad_norm": 1.5613102445502012, "learning_rate": 3.901909889800746e-06, "loss": 0.6542, "step": 5713 }, { "epoch": 0.58, "grad_norm": 1.4384337498033322, "learning_rate": 3.90029667816276e-06, "loss": 0.6568, "step": 5714 }, { "epoch": 0.58, "grad_norm": 1.4631008456631942, "learning_rate": 3.8986835868119954e-06, "loss": 0.6921, "step": 5715 }, { "epoch": 0.58, "grad_norm": 2.6558929718121327, "learning_rate": 3.8970706159249e-06, "loss": 0.6473, "step": 5716 }, { "epoch": 0.58, "grad_norm": 1.6669797693256538, "learning_rate": 3.895457765677898e-06, "loss": 0.6122, "step": 5717 }, { "epoch": 0.58, "grad_norm": 1.4596660899508063, "learning_rate": 3.893845036247409e-06, "loss": 0.7675, "step": 5718 }, { "epoch": 0.58, "grad_norm": 1.5552588658731132, "learning_rate": 3.892232427809835e-06, "loss": 0.6646, "step": 5719 }, { "epoch": 0.58, "grad_norm": 1.7423241711989432, "learning_rate": 3.890619940541565e-06, "loss": 0.6372, "step": 5720 }, { "epoch": 0.58, "grad_norm": 1.5311007103898833, "learning_rate": 3.889007574618974e-06, "loss": 0.6909, "step": 5721 }, { "epoch": 0.58, "grad_norm": 1.512172121821333, "learning_rate": 3.887395330218429e-06, "loss": 0.6049, "step": 5722 }, { "epoch": 0.58, "grad_norm": 1.689912769841213, "learning_rate": 3.885783207516276e-06, "loss": 0.7018, "step": 5723 }, { "epoch": 0.58, "grad_norm": 2.1314767608984306, "learning_rate": 3.884171206688852e-06, "loss": 0.5879, "step": 5724 }, { "epoch": 0.58, "grad_norm": 2.315084650415222, "learning_rate": 3.882559327912483e-06, "loss": 0.6219, "step": 5725 }, { "epoch": 0.58, "grad_norm": 1.707692224213717, "learning_rate": 3.880947571363474e-06, "loss": 0.7688, "step": 5726 }, { "epoch": 0.58, "grad_norm": 1.5869333008606756, "learning_rate": 3.879335937218126e-06, "loss": 0.6589, "step": 5727 }, { "epoch": 0.58, "grad_norm": 1.5583076738600286, "learning_rate": 3.877724425652719e-06, "loss": 0.7012, "step": 5728 }, { "epoch": 0.59, "grad_norm": 1.3523235761664392, "learning_rate": 3.876113036843523e-06, "loss": 0.566, "step": 5729 }, { "epoch": 0.59, "grad_norm": 1.5455217584443521, "learning_rate": 3.874501770966797e-06, "loss": 0.6559, "step": 5730 }, { "epoch": 0.59, "grad_norm": 1.609056105271873, "learning_rate": 3.8728906281987776e-06, "loss": 0.683, "step": 5731 }, { "epoch": 0.59, "grad_norm": 1.5461239923833527, "learning_rate": 3.8712796087157e-06, "loss": 0.6692, "step": 5732 }, { "epoch": 0.59, "grad_norm": 1.5490483084843802, "learning_rate": 3.869668712693779e-06, "loss": 0.6163, "step": 5733 }, { "epoch": 0.59, "grad_norm": 1.556834323615484, "learning_rate": 3.868057940309214e-06, "loss": 0.7044, "step": 5734 }, { "epoch": 0.59, "grad_norm": 1.526708783191542, "learning_rate": 3.866447291738195e-06, "loss": 0.7261, "step": 5735 }, { "epoch": 0.59, "grad_norm": 1.4033597061557865, "learning_rate": 3.8648367671569e-06, "loss": 0.634, "step": 5736 }, { "epoch": 0.59, "grad_norm": 1.5806877515026714, "learning_rate": 3.863226366741486e-06, "loss": 0.665, "step": 5737 }, { "epoch": 0.59, "grad_norm": 1.5479644405408781, "learning_rate": 3.861616090668104e-06, "loss": 0.7066, "step": 5738 }, { "epoch": 0.59, "grad_norm": 1.355140116629972, "learning_rate": 3.860005939112889e-06, "loss": 0.6541, "step": 5739 }, { "epoch": 0.59, "grad_norm": 1.4319754570708787, "learning_rate": 3.858395912251959e-06, "loss": 0.7148, "step": 5740 }, { "epoch": 0.59, "grad_norm": 1.4097719467237662, "learning_rate": 3.856786010261424e-06, "loss": 0.6136, "step": 5741 }, { "epoch": 0.59, "grad_norm": 1.3292672345624572, "learning_rate": 3.855176233317375e-06, "loss": 0.6613, "step": 5742 }, { "epoch": 0.59, "grad_norm": 1.50662299649042, "learning_rate": 3.853566581595894e-06, "loss": 0.4947, "step": 5743 }, { "epoch": 0.59, "grad_norm": 1.5169286794257135, "learning_rate": 3.8519570552730465e-06, "loss": 0.7597, "step": 5744 }, { "epoch": 0.59, "grad_norm": 1.6872902196757942, "learning_rate": 3.850347654524884e-06, "loss": 0.6605, "step": 5745 }, { "epoch": 0.59, "grad_norm": 1.449781359576417, "learning_rate": 3.848738379527445e-06, "loss": 0.6749, "step": 5746 }, { "epoch": 0.59, "grad_norm": 1.4124340860982538, "learning_rate": 3.847129230456759e-06, "loss": 0.64, "step": 5747 }, { "epoch": 0.59, "grad_norm": 1.5716469433336946, "learning_rate": 3.845520207488831e-06, "loss": 0.7021, "step": 5748 }, { "epoch": 0.59, "grad_norm": 1.4071073420623197, "learning_rate": 3.84391131079966e-06, "loss": 0.601, "step": 5749 }, { "epoch": 0.59, "grad_norm": 1.2971470041251265, "learning_rate": 3.842302540565233e-06, "loss": 0.622, "step": 5750 }, { "epoch": 0.59, "grad_norm": 1.3644547863804024, "learning_rate": 3.840693896961516e-06, "loss": 0.6578, "step": 5751 }, { "epoch": 0.59, "grad_norm": 1.4071679947411606, "learning_rate": 3.839085380164466e-06, "loss": 0.597, "step": 5752 }, { "epoch": 0.59, "grad_norm": 1.5817567889914648, "learning_rate": 3.837476990350026e-06, "loss": 0.6819, "step": 5753 }, { "epoch": 0.59, "grad_norm": 1.5271182391885174, "learning_rate": 3.835868727694122e-06, "loss": 0.7457, "step": 5754 }, { "epoch": 0.59, "grad_norm": 1.366951761698546, "learning_rate": 3.834260592372672e-06, "loss": 0.6388, "step": 5755 }, { "epoch": 0.59, "grad_norm": 1.2246215737332453, "learning_rate": 3.8326525845615715e-06, "loss": 0.4906, "step": 5756 }, { "epoch": 0.59, "grad_norm": 1.4722722502436085, "learning_rate": 3.83104470443671e-06, "loss": 0.7507, "step": 5757 }, { "epoch": 0.59, "grad_norm": 1.3247239831628803, "learning_rate": 3.82943695217396e-06, "loss": 0.6306, "step": 5758 }, { "epoch": 0.59, "grad_norm": 1.382093445515318, "learning_rate": 3.827829327949176e-06, "loss": 0.6325, "step": 5759 }, { "epoch": 0.59, "grad_norm": 1.3284583517185014, "learning_rate": 3.8262218319382076e-06, "loss": 0.6097, "step": 5760 }, { "epoch": 0.59, "grad_norm": 1.4432554189704176, "learning_rate": 3.824614464316883e-06, "loss": 0.6056, "step": 5761 }, { "epoch": 0.59, "grad_norm": 1.237390049379846, "learning_rate": 3.823007225261018e-06, "loss": 0.6834, "step": 5762 }, { "epoch": 0.59, "grad_norm": 2.0416274448349427, "learning_rate": 3.821400114946414e-06, "loss": 0.6659, "step": 5763 }, { "epoch": 0.59, "grad_norm": 1.6248026163025757, "learning_rate": 3.819793133548863e-06, "loss": 0.6567, "step": 5764 }, { "epoch": 0.59, "grad_norm": 1.6911111406978396, "learning_rate": 3.818186281244134e-06, "loss": 0.713, "step": 5765 }, { "epoch": 0.59, "grad_norm": 1.4770728702186795, "learning_rate": 3.81657955820799e-06, "loss": 0.6445, "step": 5766 }, { "epoch": 0.59, "grad_norm": 1.374038826643377, "learning_rate": 3.814972964616178e-06, "loss": 0.7111, "step": 5767 }, { "epoch": 0.59, "grad_norm": 1.4861356436907973, "learning_rate": 3.813366500644426e-06, "loss": 0.7114, "step": 5768 }, { "epoch": 0.59, "grad_norm": 1.5372680903453206, "learning_rate": 3.811760166468455e-06, "loss": 0.5855, "step": 5769 }, { "epoch": 0.59, "grad_norm": 1.448773481419896, "learning_rate": 3.810153962263966e-06, "loss": 0.6501, "step": 5770 }, { "epoch": 0.59, "grad_norm": 1.4753760121351247, "learning_rate": 3.8085478882066486e-06, "loss": 0.7268, "step": 5771 }, { "epoch": 0.59, "grad_norm": 1.3636428595642187, "learning_rate": 3.8069419444721785e-06, "loss": 0.5993, "step": 5772 }, { "epoch": 0.59, "grad_norm": 1.475749726290642, "learning_rate": 3.8053361312362135e-06, "loss": 0.6607, "step": 5773 }, { "epoch": 0.59, "grad_norm": 1.7111220566086547, "learning_rate": 3.803730448674403e-06, "loss": 0.6977, "step": 5774 }, { "epoch": 0.59, "grad_norm": 1.452685898316261, "learning_rate": 3.802124896962379e-06, "loss": 0.6626, "step": 5775 }, { "epoch": 0.59, "grad_norm": 1.3220014273863572, "learning_rate": 3.800519476275757e-06, "loss": 0.6427, "step": 5776 }, { "epoch": 0.59, "grad_norm": 1.347639012021137, "learning_rate": 3.798914186790141e-06, "loss": 0.606, "step": 5777 }, { "epoch": 0.59, "grad_norm": 1.3021759183429196, "learning_rate": 3.7973090286811222e-06, "loss": 0.6223, "step": 5778 }, { "epoch": 0.59, "grad_norm": 1.4856845929659368, "learning_rate": 3.795704002124272e-06, "loss": 0.6607, "step": 5779 }, { "epoch": 0.59, "grad_norm": 1.4881603270439683, "learning_rate": 3.794099107295153e-06, "loss": 0.7534, "step": 5780 }, { "epoch": 0.59, "grad_norm": 1.438953758678423, "learning_rate": 3.792494344369311e-06, "loss": 0.6097, "step": 5781 }, { "epoch": 0.59, "grad_norm": 1.4265999382099572, "learning_rate": 3.7908897135222744e-06, "loss": 0.6158, "step": 5782 }, { "epoch": 0.59, "grad_norm": 1.5388599531758251, "learning_rate": 3.7892852149295643e-06, "loss": 0.6723, "step": 5783 }, { "epoch": 0.59, "grad_norm": 1.3755721542525488, "learning_rate": 3.7876808487666806e-06, "loss": 0.6245, "step": 5784 }, { "epoch": 0.59, "grad_norm": 1.3383639913980083, "learning_rate": 3.786076615209112e-06, "loss": 0.6144, "step": 5785 }, { "epoch": 0.59, "grad_norm": 1.3914419808532839, "learning_rate": 3.7844725144323336e-06, "loss": 0.6374, "step": 5786 }, { "epoch": 0.59, "grad_norm": 1.5247260221845564, "learning_rate": 3.7828685466118e-06, "loss": 0.634, "step": 5787 }, { "epoch": 0.59, "grad_norm": 1.506057630314058, "learning_rate": 3.7812647119229594e-06, "loss": 0.6557, "step": 5788 }, { "epoch": 0.59, "grad_norm": 1.2511917333162104, "learning_rate": 3.7796610105412424e-06, "loss": 0.5726, "step": 5789 }, { "epoch": 0.59, "grad_norm": 1.4302807805817435, "learning_rate": 3.7780574426420614e-06, "loss": 0.5402, "step": 5790 }, { "epoch": 0.59, "grad_norm": 1.4558752021162735, "learning_rate": 3.7764540084008166e-06, "loss": 0.555, "step": 5791 }, { "epoch": 0.59, "grad_norm": 1.323751293948111, "learning_rate": 3.7748507079928985e-06, "loss": 0.6616, "step": 5792 }, { "epoch": 0.59, "grad_norm": 1.5759409924259526, "learning_rate": 3.7732475415936753e-06, "loss": 0.7842, "step": 5793 }, { "epoch": 0.59, "grad_norm": 1.2509856729911224, "learning_rate": 3.771644509378504e-06, "loss": 0.6443, "step": 5794 }, { "epoch": 0.59, "grad_norm": 1.5313884303156329, "learning_rate": 3.770041611522728e-06, "loss": 0.6768, "step": 5795 }, { "epoch": 0.59, "grad_norm": 1.3262218408771986, "learning_rate": 3.768438848201671e-06, "loss": 0.684, "step": 5796 }, { "epoch": 0.59, "grad_norm": 1.7483335391504695, "learning_rate": 3.7668362195906516e-06, "loss": 0.6791, "step": 5797 }, { "epoch": 0.59, "grad_norm": 1.5274505592433845, "learning_rate": 3.7652337258649627e-06, "loss": 0.7189, "step": 5798 }, { "epoch": 0.59, "grad_norm": 1.3278134406938007, "learning_rate": 3.7636313671998903e-06, "loss": 0.6741, "step": 5799 }, { "epoch": 0.59, "grad_norm": 1.4730357588235656, "learning_rate": 3.7620291437707022e-06, "loss": 0.6669, "step": 5800 }, { "epoch": 0.59, "grad_norm": 1.3466426786434078, "learning_rate": 3.76042705575265e-06, "loss": 0.6075, "step": 5801 }, { "epoch": 0.59, "grad_norm": 1.2803759327616167, "learning_rate": 3.7588251033209755e-06, "loss": 0.5032, "step": 5802 }, { "epoch": 0.59, "grad_norm": 1.3405829424908997, "learning_rate": 3.7572232866509022e-06, "loss": 0.7038, "step": 5803 }, { "epoch": 0.59, "grad_norm": 1.6582603395695963, "learning_rate": 3.7556216059176374e-06, "loss": 0.6493, "step": 5804 }, { "epoch": 0.59, "grad_norm": 1.4905283691824067, "learning_rate": 3.754020061296376e-06, "loss": 0.6354, "step": 5805 }, { "epoch": 0.59, "grad_norm": 1.517828742357831, "learning_rate": 3.7524186529623003e-06, "loss": 0.6544, "step": 5806 }, { "epoch": 0.59, "grad_norm": 1.5012275729896365, "learning_rate": 3.7508173810905707e-06, "loss": 0.6717, "step": 5807 }, { "epoch": 0.59, "grad_norm": 1.4668179842728997, "learning_rate": 3.7492162458563376e-06, "loss": 0.6241, "step": 5808 }, { "epoch": 0.59, "grad_norm": 1.5141234905695922, "learning_rate": 3.7476152474347395e-06, "loss": 0.8641, "step": 5809 }, { "epoch": 0.59, "grad_norm": 1.629555673030688, "learning_rate": 3.7460143860008903e-06, "loss": 0.6865, "step": 5810 }, { "epoch": 0.59, "grad_norm": 1.4911915402314113, "learning_rate": 3.744413661729899e-06, "loss": 0.6477, "step": 5811 }, { "epoch": 0.59, "grad_norm": 1.4701744438720654, "learning_rate": 3.7428130747968528e-06, "loss": 0.5888, "step": 5812 }, { "epoch": 0.59, "grad_norm": 1.2937715854904872, "learning_rate": 3.7412126253768266e-06, "loss": 0.5725, "step": 5813 }, { "epoch": 0.59, "grad_norm": 1.561936480093296, "learning_rate": 3.7396123136448824e-06, "loss": 0.7421, "step": 5814 }, { "epoch": 0.59, "grad_norm": 1.3974061638752429, "learning_rate": 3.7380121397760606e-06, "loss": 0.6246, "step": 5815 }, { "epoch": 0.59, "grad_norm": 1.4817508061246734, "learning_rate": 3.7364121039453937e-06, "loss": 0.6304, "step": 5816 }, { "epoch": 0.59, "grad_norm": 1.5322964287212013, "learning_rate": 3.734812206327897e-06, "loss": 0.5288, "step": 5817 }, { "epoch": 0.59, "grad_norm": 1.2994280703556598, "learning_rate": 3.7332124470985663e-06, "loss": 0.6302, "step": 5818 }, { "epoch": 0.59, "grad_norm": 1.319155399427301, "learning_rate": 3.731612826432387e-06, "loss": 0.5375, "step": 5819 }, { "epoch": 0.59, "grad_norm": 1.4877280762343363, "learning_rate": 3.7300133445043306e-06, "loss": 0.722, "step": 5820 }, { "epoch": 0.59, "grad_norm": 1.4161282721423838, "learning_rate": 3.728414001489347e-06, "loss": 0.6422, "step": 5821 }, { "epoch": 0.59, "grad_norm": 1.4220089488255876, "learning_rate": 3.726814797562376e-06, "loss": 0.6569, "step": 5822 }, { "epoch": 0.59, "grad_norm": 1.2408583996462024, "learning_rate": 3.7252157328983448e-06, "loss": 0.5219, "step": 5823 }, { "epoch": 0.59, "grad_norm": 1.212787105018918, "learning_rate": 3.7236168076721544e-06, "loss": 0.6627, "step": 5824 }, { "epoch": 0.59, "grad_norm": 1.5524092595809276, "learning_rate": 3.7220180220587045e-06, "loss": 0.6659, "step": 5825 }, { "epoch": 0.59, "grad_norm": 1.4055197072679209, "learning_rate": 3.720419376232868e-06, "loss": 0.5327, "step": 5826 }, { "epoch": 0.6, "grad_norm": 1.644730400425191, "learning_rate": 3.7188208703695096e-06, "loss": 0.62, "step": 5827 }, { "epoch": 0.6, "grad_norm": 1.3892537655661819, "learning_rate": 3.717222504643476e-06, "loss": 0.5885, "step": 5828 }, { "epoch": 0.6, "grad_norm": 1.3907577523834633, "learning_rate": 3.7156242792295965e-06, "loss": 0.6303, "step": 5829 }, { "epoch": 0.6, "grad_norm": 1.682774774840332, "learning_rate": 3.7140261943026905e-06, "loss": 0.6425, "step": 5830 }, { "epoch": 0.6, "grad_norm": 1.495582992647737, "learning_rate": 3.7124282500375596e-06, "loss": 0.7053, "step": 5831 }, { "epoch": 0.6, "grad_norm": 1.2512839158911135, "learning_rate": 3.710830446608986e-06, "loss": 0.6441, "step": 5832 }, { "epoch": 0.6, "grad_norm": 1.611582329904188, "learning_rate": 3.7092327841917406e-06, "loss": 0.7215, "step": 5833 }, { "epoch": 0.6, "grad_norm": 1.4185562722730767, "learning_rate": 3.707635262960581e-06, "loss": 0.7765, "step": 5834 }, { "epoch": 0.6, "grad_norm": 1.5717262057995414, "learning_rate": 3.706037883090244e-06, "loss": 0.6537, "step": 5835 }, { "epoch": 0.6, "grad_norm": 1.4532164507756304, "learning_rate": 3.7044406447554527e-06, "loss": 0.5854, "step": 5836 }, { "epoch": 0.6, "grad_norm": 1.5590939740070109, "learning_rate": 3.70284354813092e-06, "loss": 0.7376, "step": 5837 }, { "epoch": 0.6, "grad_norm": 1.4242698809112218, "learning_rate": 3.7012465933913324e-06, "loss": 0.6338, "step": 5838 }, { "epoch": 0.6, "grad_norm": 1.3637204808862313, "learning_rate": 3.699649780711373e-06, "loss": 0.6189, "step": 5839 }, { "epoch": 0.6, "grad_norm": 1.3157616533328977, "learning_rate": 3.698053110265699e-06, "loss": 0.6499, "step": 5840 }, { "epoch": 0.6, "grad_norm": 1.4293929391028044, "learning_rate": 3.696456582228959e-06, "loss": 0.6449, "step": 5841 }, { "epoch": 0.6, "grad_norm": 1.406350278299238, "learning_rate": 3.694860196775785e-06, "loss": 0.701, "step": 5842 }, { "epoch": 0.6, "grad_norm": 1.717581105734373, "learning_rate": 3.693263954080788e-06, "loss": 0.7101, "step": 5843 }, { "epoch": 0.6, "grad_norm": 1.3947724290898427, "learning_rate": 3.6916678543185714e-06, "loss": 0.6049, "step": 5844 }, { "epoch": 0.6, "grad_norm": 1.3491059734980697, "learning_rate": 3.6900718976637173e-06, "loss": 0.6524, "step": 5845 }, { "epoch": 0.6, "grad_norm": 1.503334928974776, "learning_rate": 3.688476084290794e-06, "loss": 0.5455, "step": 5846 }, { "epoch": 0.6, "grad_norm": 1.6780581033954478, "learning_rate": 3.6868804143743527e-06, "loss": 0.6612, "step": 5847 }, { "epoch": 0.6, "grad_norm": 1.52884136782703, "learning_rate": 3.6852848880889337e-06, "loss": 0.6708, "step": 5848 }, { "epoch": 0.6, "grad_norm": 1.447913082807931, "learning_rate": 3.6836895056090547e-06, "loss": 0.6139, "step": 5849 }, { "epoch": 0.6, "grad_norm": 1.5276539139199408, "learning_rate": 3.682094267109223e-06, "loss": 0.6672, "step": 5850 }, { "epoch": 0.6, "grad_norm": 1.5555865878957154, "learning_rate": 3.6804991727639273e-06, "loss": 0.6578, "step": 5851 }, { "epoch": 0.6, "grad_norm": 1.7421113447997008, "learning_rate": 3.6789042227476403e-06, "loss": 0.6906, "step": 5852 }, { "epoch": 0.6, "grad_norm": 1.7359684711751733, "learning_rate": 3.6773094172348235e-06, "loss": 0.6783, "step": 5853 }, { "epoch": 0.6, "grad_norm": 1.4764222666228908, "learning_rate": 3.6757147563999164e-06, "loss": 0.6099, "step": 5854 }, { "epoch": 0.6, "grad_norm": 1.3483671714207441, "learning_rate": 3.674120240417345e-06, "loss": 0.7508, "step": 5855 }, { "epoch": 0.6, "grad_norm": 1.441949670646148, "learning_rate": 3.6725258694615234e-06, "loss": 0.623, "step": 5856 }, { "epoch": 0.6, "grad_norm": 1.5770343953981392, "learning_rate": 3.6709316437068414e-06, "loss": 0.6102, "step": 5857 }, { "epoch": 0.6, "grad_norm": 1.4281648816190928, "learning_rate": 3.6693375633276816e-06, "loss": 0.6179, "step": 5858 }, { "epoch": 0.6, "grad_norm": 1.5665195121207125, "learning_rate": 3.6677436284984064e-06, "loss": 0.5821, "step": 5859 }, { "epoch": 0.6, "grad_norm": 1.2783082521215863, "learning_rate": 3.6661498393933612e-06, "loss": 0.5975, "step": 5860 }, { "epoch": 0.6, "grad_norm": 1.4468731646818829, "learning_rate": 3.664556196186877e-06, "loss": 0.6898, "step": 5861 }, { "epoch": 0.6, "grad_norm": 1.2615445473656999, "learning_rate": 3.662962699053272e-06, "loss": 0.639, "step": 5862 }, { "epoch": 0.6, "grad_norm": 1.9060765080683613, "learning_rate": 3.661369348166842e-06, "loss": 0.7351, "step": 5863 }, { "epoch": 0.6, "grad_norm": 1.4465138039885521, "learning_rate": 3.659776143701873e-06, "loss": 0.723, "step": 5864 }, { "epoch": 0.6, "grad_norm": 1.3160492127206214, "learning_rate": 3.6581830858326295e-06, "loss": 0.7435, "step": 5865 }, { "epoch": 0.6, "grad_norm": 1.3782051057607558, "learning_rate": 3.656590174733362e-06, "loss": 0.654, "step": 5866 }, { "epoch": 0.6, "grad_norm": 1.5467167891041749, "learning_rate": 3.65499741057831e-06, "loss": 0.7374, "step": 5867 }, { "epoch": 0.6, "grad_norm": 1.3639507579386676, "learning_rate": 3.653404793541688e-06, "loss": 0.6281, "step": 5868 }, { "epoch": 0.6, "grad_norm": 1.5802316661788485, "learning_rate": 3.6518123237977e-06, "loss": 0.5834, "step": 5869 }, { "epoch": 0.6, "grad_norm": 1.4294851102928812, "learning_rate": 3.650220001520536e-06, "loss": 0.6253, "step": 5870 }, { "epoch": 0.6, "grad_norm": 1.4196973553834706, "learning_rate": 3.6486278268843613e-06, "loss": 0.6343, "step": 5871 }, { "epoch": 0.6, "grad_norm": 1.4337748974341895, "learning_rate": 3.6470358000633337e-06, "loss": 0.6179, "step": 5872 }, { "epoch": 0.6, "grad_norm": 1.392655025364404, "learning_rate": 3.6454439212315918e-06, "loss": 0.5955, "step": 5873 }, { "epoch": 0.6, "grad_norm": 1.537177243122431, "learning_rate": 3.6438521905632553e-06, "loss": 0.6371, "step": 5874 }, { "epoch": 0.6, "grad_norm": 1.4419060624650912, "learning_rate": 3.642260608232432e-06, "loss": 0.5282, "step": 5875 }, { "epoch": 0.6, "grad_norm": 1.5613052295776997, "learning_rate": 3.6406691744132127e-06, "loss": 0.6586, "step": 5876 }, { "epoch": 0.6, "grad_norm": 1.4959328509323662, "learning_rate": 3.639077889279668e-06, "loss": 0.6452, "step": 5877 }, { "epoch": 0.6, "grad_norm": 1.4222607656531545, "learning_rate": 3.6374867530058577e-06, "loss": 0.6174, "step": 5878 }, { "epoch": 0.6, "grad_norm": 1.3679330474544735, "learning_rate": 3.63589576576582e-06, "loss": 0.5774, "step": 5879 }, { "epoch": 0.6, "grad_norm": 1.4898739305137851, "learning_rate": 3.6343049277335808e-06, "loss": 0.65, "step": 5880 }, { "epoch": 0.6, "grad_norm": 1.4924020609078905, "learning_rate": 3.6327142390831503e-06, "loss": 0.6138, "step": 5881 }, { "epoch": 0.6, "grad_norm": 1.3193599178861548, "learning_rate": 3.631123699988517e-06, "loss": 0.5703, "step": 5882 }, { "epoch": 0.6, "grad_norm": 1.3780363222326217, "learning_rate": 3.629533310623658e-06, "loss": 0.5352, "step": 5883 }, { "epoch": 0.6, "grad_norm": 1.4335341062067297, "learning_rate": 3.627943071162535e-06, "loss": 0.5825, "step": 5884 }, { "epoch": 0.6, "grad_norm": 1.4920299817972502, "learning_rate": 3.6263529817790866e-06, "loss": 0.6312, "step": 5885 }, { "epoch": 0.6, "grad_norm": 1.5646217053139868, "learning_rate": 3.624763042647241e-06, "loss": 0.5987, "step": 5886 }, { "epoch": 0.6, "grad_norm": 1.4683781829709148, "learning_rate": 3.6231732539409098e-06, "loss": 0.7932, "step": 5887 }, { "epoch": 0.6, "grad_norm": 1.4576791545430636, "learning_rate": 3.621583615833983e-06, "loss": 0.6867, "step": 5888 }, { "epoch": 0.6, "grad_norm": 1.5061779967802278, "learning_rate": 3.61999412850034e-06, "loss": 0.7082, "step": 5889 }, { "epoch": 0.6, "grad_norm": 1.5625535276055604, "learning_rate": 3.6184047921138433e-06, "loss": 0.6548, "step": 5890 }, { "epoch": 0.6, "grad_norm": 1.4091760943809857, "learning_rate": 3.616815606848332e-06, "loss": 0.5765, "step": 5891 }, { "epoch": 0.6, "grad_norm": 1.7686282214301086, "learning_rate": 3.6152265728776383e-06, "loss": 0.7213, "step": 5892 }, { "epoch": 0.6, "grad_norm": 1.307842453921483, "learning_rate": 3.61363769037557e-06, "loss": 0.5898, "step": 5893 }, { "epoch": 0.6, "grad_norm": 1.497530892288093, "learning_rate": 3.6120489595159216e-06, "loss": 0.608, "step": 5894 }, { "epoch": 0.6, "grad_norm": 1.3887660548460448, "learning_rate": 3.6104603804724735e-06, "loss": 0.6574, "step": 5895 }, { "epoch": 0.6, "grad_norm": 1.5733191785195964, "learning_rate": 3.608871953418984e-06, "loss": 0.6735, "step": 5896 }, { "epoch": 0.6, "grad_norm": 1.3851407374733558, "learning_rate": 3.6072836785291987e-06, "loss": 0.6629, "step": 5897 }, { "epoch": 0.6, "grad_norm": 1.4366397763783854, "learning_rate": 3.605695555976848e-06, "loss": 0.6228, "step": 5898 }, { "epoch": 0.6, "grad_norm": 1.4737409459138822, "learning_rate": 3.6041075859356383e-06, "loss": 0.5484, "step": 5899 }, { "epoch": 0.6, "grad_norm": 1.6283869947398752, "learning_rate": 3.602519768579268e-06, "loss": 0.7294, "step": 5900 }, { "epoch": 0.6, "grad_norm": 1.4707307136115626, "learning_rate": 3.600932104081414e-06, "loss": 0.6544, "step": 5901 }, { "epoch": 0.6, "grad_norm": 1.6150448370384611, "learning_rate": 3.5993445926157357e-06, "loss": 0.6405, "step": 5902 }, { "epoch": 0.6, "grad_norm": 1.5180828857074815, "learning_rate": 3.5977572343558802e-06, "loss": 0.7461, "step": 5903 }, { "epoch": 0.6, "grad_norm": 1.5065361973094022, "learning_rate": 3.596170029475475e-06, "loss": 0.7058, "step": 5904 }, { "epoch": 0.6, "grad_norm": 1.3948969507044906, "learning_rate": 3.594582978148129e-06, "loss": 0.4423, "step": 5905 }, { "epoch": 0.6, "grad_norm": 1.3157519268209996, "learning_rate": 3.5929960805474386e-06, "loss": 0.6121, "step": 5906 }, { "epoch": 0.6, "grad_norm": 1.3579974157294294, "learning_rate": 3.591409336846977e-06, "loss": 0.691, "step": 5907 }, { "epoch": 0.6, "grad_norm": 1.342884201184247, "learning_rate": 3.5898227472203086e-06, "loss": 0.5518, "step": 5908 }, { "epoch": 0.6, "grad_norm": 2.1066791814047097, "learning_rate": 3.588236311840977e-06, "loss": 0.8192, "step": 5909 }, { "epoch": 0.6, "grad_norm": 1.3104235436197118, "learning_rate": 3.586650030882507e-06, "loss": 0.5631, "step": 5910 }, { "epoch": 0.6, "grad_norm": 1.5334047181583979, "learning_rate": 3.5850639045184076e-06, "loss": 0.725, "step": 5911 }, { "epoch": 0.6, "grad_norm": 1.5405718523218872, "learning_rate": 3.583477932922177e-06, "loss": 0.6995, "step": 5912 }, { "epoch": 0.6, "grad_norm": 1.3938241048819453, "learning_rate": 3.5818921162672835e-06, "loss": 0.5778, "step": 5913 }, { "epoch": 0.6, "grad_norm": 1.6269041136855806, "learning_rate": 3.580306454727192e-06, "loss": 0.7199, "step": 5914 }, { "epoch": 0.6, "grad_norm": 1.413780216283675, "learning_rate": 3.5787209484753434e-06, "loss": 0.7157, "step": 5915 }, { "epoch": 0.6, "grad_norm": 1.3981695503464169, "learning_rate": 3.5771355976851597e-06, "loss": 0.6498, "step": 5916 }, { "epoch": 0.6, "grad_norm": 1.5283265181696937, "learning_rate": 3.5755504025300525e-06, "loss": 0.6223, "step": 5917 }, { "epoch": 0.6, "grad_norm": 1.4395611225111178, "learning_rate": 3.573965363183413e-06, "loss": 0.5725, "step": 5918 }, { "epoch": 0.6, "grad_norm": 1.3420613367440672, "learning_rate": 3.5723804798186128e-06, "loss": 0.5223, "step": 5919 }, { "epoch": 0.6, "grad_norm": 1.3759205681075892, "learning_rate": 3.5707957526090108e-06, "loss": 0.5719, "step": 5920 }, { "epoch": 0.6, "grad_norm": 1.309692457109559, "learning_rate": 3.569211181727944e-06, "loss": 0.6053, "step": 5921 }, { "epoch": 0.6, "grad_norm": 1.4106534817626202, "learning_rate": 3.5676267673487384e-06, "loss": 0.7574, "step": 5922 }, { "epoch": 0.6, "grad_norm": 1.419740564402159, "learning_rate": 3.5660425096447e-06, "loss": 0.627, "step": 5923 }, { "epoch": 0.6, "grad_norm": 2.5207396560759676, "learning_rate": 3.5644584087891143e-06, "loss": 0.6236, "step": 5924 }, { "epoch": 0.61, "grad_norm": 1.4991527925300006, "learning_rate": 3.5628744649552528e-06, "loss": 0.6195, "step": 5925 }, { "epoch": 0.61, "grad_norm": 1.4445526567409153, "learning_rate": 3.561290678316375e-06, "loss": 0.631, "step": 5926 }, { "epoch": 0.61, "grad_norm": 1.4235810197303138, "learning_rate": 3.5597070490457097e-06, "loss": 0.6257, "step": 5927 }, { "epoch": 0.61, "grad_norm": 2.030092695457022, "learning_rate": 3.558123577316483e-06, "loss": 0.6697, "step": 5928 }, { "epoch": 0.61, "grad_norm": 1.444406014254821, "learning_rate": 3.5565402633018963e-06, "loss": 0.6751, "step": 5929 }, { "epoch": 0.61, "grad_norm": 1.6569819861112423, "learning_rate": 3.5549571071751315e-06, "loss": 0.6655, "step": 5930 }, { "epoch": 0.61, "grad_norm": 1.410343875634632, "learning_rate": 3.5533741091093606e-06, "loss": 0.5726, "step": 5931 }, { "epoch": 0.61, "grad_norm": 1.624769443418516, "learning_rate": 3.5517912692777332e-06, "loss": 0.7047, "step": 5932 }, { "epoch": 0.61, "grad_norm": 1.4151627547845869, "learning_rate": 3.550208587853382e-06, "loss": 0.6442, "step": 5933 }, { "epoch": 0.61, "grad_norm": 1.3572984140087845, "learning_rate": 3.548626065009424e-06, "loss": 0.624, "step": 5934 }, { "epoch": 0.61, "grad_norm": 1.3923289778328498, "learning_rate": 3.5470437009189564e-06, "loss": 0.6183, "step": 5935 }, { "epoch": 0.61, "grad_norm": 1.427889484208366, "learning_rate": 3.5454614957550616e-06, "loss": 0.6913, "step": 5936 }, { "epoch": 0.61, "grad_norm": 1.312213388387833, "learning_rate": 3.5438794496908053e-06, "loss": 0.5886, "step": 5937 }, { "epoch": 0.61, "grad_norm": 1.620705037732483, "learning_rate": 3.5422975628992307e-06, "loss": 0.7578, "step": 5938 }, { "epoch": 0.61, "grad_norm": 1.4935957594881102, "learning_rate": 3.540715835553369e-06, "loss": 0.6365, "step": 5939 }, { "epoch": 0.61, "grad_norm": 1.2759850970210493, "learning_rate": 3.5391342678262324e-06, "loss": 0.5168, "step": 5940 }, { "epoch": 0.61, "grad_norm": 1.3851047540656087, "learning_rate": 3.537552859890814e-06, "loss": 0.6411, "step": 5941 }, { "epoch": 0.61, "grad_norm": 1.4088212545442844, "learning_rate": 3.5359716119200914e-06, "loss": 0.6326, "step": 5942 }, { "epoch": 0.61, "grad_norm": 1.4160075922617867, "learning_rate": 3.534390524087024e-06, "loss": 0.608, "step": 5943 }, { "epoch": 0.61, "grad_norm": 1.460542414028712, "learning_rate": 3.5328095965645516e-06, "loss": 0.5877, "step": 5944 }, { "epoch": 0.61, "grad_norm": 1.4802075239379031, "learning_rate": 3.531228829525601e-06, "loss": 0.6386, "step": 5945 }, { "epoch": 0.61, "grad_norm": 1.2920356232362777, "learning_rate": 3.529648223143079e-06, "loss": 0.6331, "step": 5946 }, { "epoch": 0.61, "grad_norm": 1.4580325251188946, "learning_rate": 3.5280677775898725e-06, "loss": 0.6254, "step": 5947 }, { "epoch": 0.61, "grad_norm": 1.3771869512999924, "learning_rate": 3.526487493038856e-06, "loss": 0.5943, "step": 5948 }, { "epoch": 0.61, "grad_norm": 1.5647561789307014, "learning_rate": 3.524907369662879e-06, "loss": 0.7317, "step": 5949 }, { "epoch": 0.61, "grad_norm": 1.3801122869339664, "learning_rate": 3.523327407634781e-06, "loss": 0.6729, "step": 5950 }, { "epoch": 0.61, "grad_norm": 1.605354922039335, "learning_rate": 3.5217476071273816e-06, "loss": 0.6171, "step": 5951 }, { "epoch": 0.61, "grad_norm": 1.2877875075697456, "learning_rate": 3.5201679683134793e-06, "loss": 0.5757, "step": 5952 }, { "epoch": 0.61, "grad_norm": 1.4249603298196403, "learning_rate": 3.5185884913658576e-06, "loss": 0.6071, "step": 5953 }, { "epoch": 0.61, "grad_norm": 1.3900407000456172, "learning_rate": 3.5170091764572845e-06, "loss": 0.6314, "step": 5954 }, { "epoch": 0.61, "grad_norm": 1.2630971241576068, "learning_rate": 3.5154300237605056e-06, "loss": 0.552, "step": 5955 }, { "epoch": 0.61, "grad_norm": 1.7305039791440402, "learning_rate": 3.5138510334482516e-06, "loss": 0.6353, "step": 5956 }, { "epoch": 0.61, "grad_norm": 1.391831718708086, "learning_rate": 3.5122722056932364e-06, "loss": 0.7138, "step": 5957 }, { "epoch": 0.61, "grad_norm": 1.386298318839489, "learning_rate": 3.510693540668151e-06, "loss": 0.6368, "step": 5958 }, { "epoch": 0.61, "grad_norm": 1.3965912257590076, "learning_rate": 3.5091150385456744e-06, "loss": 0.6171, "step": 5959 }, { "epoch": 0.61, "grad_norm": 1.4882253253964837, "learning_rate": 3.507536699498467e-06, "loss": 0.6434, "step": 5960 }, { "epoch": 0.61, "grad_norm": 1.4186616507288001, "learning_rate": 3.5059585236991676e-06, "loss": 0.6657, "step": 5961 }, { "epoch": 0.61, "grad_norm": 1.4195084336979533, "learning_rate": 3.504380511320402e-06, "loss": 0.5899, "step": 5962 }, { "epoch": 0.61, "grad_norm": 1.6151521057133575, "learning_rate": 3.502802662534771e-06, "loss": 0.6289, "step": 5963 }, { "epoch": 0.61, "grad_norm": 1.5150972172127437, "learning_rate": 3.5012249775148668e-06, "loss": 0.6196, "step": 5964 }, { "epoch": 0.61, "grad_norm": 1.1879750489101126, "learning_rate": 3.499647456433257e-06, "loss": 0.5623, "step": 5965 }, { "epoch": 0.61, "grad_norm": 1.4818058309187587, "learning_rate": 3.4980700994624928e-06, "loss": 0.5815, "step": 5966 }, { "epoch": 0.61, "grad_norm": 1.2810349332638968, "learning_rate": 3.496492906775108e-06, "loss": 0.4858, "step": 5967 }, { "epoch": 0.61, "grad_norm": 1.4516385668795124, "learning_rate": 3.494915878543621e-06, "loss": 0.7454, "step": 5968 }, { "epoch": 0.61, "grad_norm": 1.4622508308463014, "learning_rate": 3.4933390149405263e-06, "loss": 0.7203, "step": 5969 }, { "epoch": 0.61, "grad_norm": 1.433532130045015, "learning_rate": 3.4917623161383042e-06, "loss": 0.5964, "step": 5970 }, { "epoch": 0.61, "grad_norm": 1.4057461041529407, "learning_rate": 3.4901857823094185e-06, "loss": 0.6921, "step": 5971 }, { "epoch": 0.61, "grad_norm": 1.5702883837755692, "learning_rate": 3.4886094136263094e-06, "loss": 0.5801, "step": 5972 }, { "epoch": 0.61, "grad_norm": 1.5299498312342046, "learning_rate": 3.487033210261407e-06, "loss": 0.6532, "step": 5973 }, { "epoch": 0.61, "grad_norm": 1.8474075428554806, "learning_rate": 3.4854571723871144e-06, "loss": 0.6546, "step": 5974 }, { "epoch": 0.61, "grad_norm": 1.5802309885898538, "learning_rate": 3.483881300175823e-06, "loss": 0.639, "step": 5975 }, { "epoch": 0.61, "grad_norm": 1.440885962756069, "learning_rate": 3.4823055937999053e-06, "loss": 0.6985, "step": 5976 }, { "epoch": 0.61, "grad_norm": 1.342791634939502, "learning_rate": 3.480730053431711e-06, "loss": 0.6976, "step": 5977 }, { "epoch": 0.61, "grad_norm": 1.4878846420453424, "learning_rate": 3.479154679243579e-06, "loss": 0.7715, "step": 5978 }, { "epoch": 0.61, "grad_norm": 1.2999765171394186, "learning_rate": 3.477579471407825e-06, "loss": 0.6239, "step": 5979 }, { "epoch": 0.61, "grad_norm": 1.4930484184799797, "learning_rate": 3.4760044300967456e-06, "loss": 0.6669, "step": 5980 }, { "epoch": 0.61, "grad_norm": 1.5443919035192715, "learning_rate": 3.4744295554826223e-06, "loss": 0.667, "step": 5981 }, { "epoch": 0.61, "grad_norm": 1.4174441783755403, "learning_rate": 3.4728548477377196e-06, "loss": 0.6545, "step": 5982 }, { "epoch": 0.61, "grad_norm": 1.4794207570953046, "learning_rate": 3.471280307034278e-06, "loss": 0.7327, "step": 5983 }, { "epoch": 0.61, "grad_norm": 1.571226546249703, "learning_rate": 3.4697059335445247e-06, "loss": 0.6728, "step": 5984 }, { "epoch": 0.61, "grad_norm": 1.6340065375748787, "learning_rate": 3.4681317274406688e-06, "loss": 0.6354, "step": 5985 }, { "epoch": 0.61, "grad_norm": 1.3610630301602047, "learning_rate": 3.4665576888948948e-06, "loss": 0.6203, "step": 5986 }, { "epoch": 0.61, "grad_norm": 1.647520761276073, "learning_rate": 3.4649838180793787e-06, "loss": 0.7066, "step": 5987 }, { "epoch": 0.61, "grad_norm": 1.381953553043616, "learning_rate": 3.4634101151662693e-06, "loss": 0.5642, "step": 5988 }, { "epoch": 0.61, "grad_norm": 1.3297713029322862, "learning_rate": 3.4618365803277016e-06, "loss": 0.5612, "step": 5989 }, { "epoch": 0.61, "grad_norm": 1.410419512404083, "learning_rate": 3.460263213735793e-06, "loss": 0.691, "step": 5990 }, { "epoch": 0.61, "grad_norm": 1.368607277931334, "learning_rate": 3.4586900155626356e-06, "loss": 0.6485, "step": 5991 }, { "epoch": 0.61, "grad_norm": 1.4298256684297879, "learning_rate": 3.457116985980314e-06, "loss": 0.7332, "step": 5992 }, { "epoch": 0.61, "grad_norm": 1.4380659884943898, "learning_rate": 3.4555441251608868e-06, "loss": 0.7195, "step": 5993 }, { "epoch": 0.61, "grad_norm": 5.529839619063211, "learning_rate": 3.453971433276395e-06, "loss": 0.6633, "step": 5994 }, { "epoch": 0.61, "grad_norm": 1.6014411184183392, "learning_rate": 3.4523989104988603e-06, "loss": 0.7266, "step": 5995 }, { "epoch": 0.61, "grad_norm": 1.5983894548933302, "learning_rate": 3.450826557000293e-06, "loss": 0.5509, "step": 5996 }, { "epoch": 0.61, "grad_norm": 1.425784189678634, "learning_rate": 3.449254372952674e-06, "loss": 0.6257, "step": 5997 }, { "epoch": 0.61, "grad_norm": 1.5394925460901516, "learning_rate": 3.4476823585279745e-06, "loss": 0.7263, "step": 5998 }, { "epoch": 0.61, "grad_norm": 1.4072495118658388, "learning_rate": 3.446110513898143e-06, "loss": 0.6606, "step": 5999 }, { "epoch": 0.61, "grad_norm": 1.3505101935811616, "learning_rate": 3.4445388392351086e-06, "loss": 0.6327, "step": 6000 }, { "epoch": 0.61, "grad_norm": 1.3410108430774537, "learning_rate": 3.442967334710787e-06, "loss": 0.6648, "step": 6001 }, { "epoch": 0.61, "grad_norm": 1.2411103694710324, "learning_rate": 3.4413960004970687e-06, "loss": 0.694, "step": 6002 }, { "epoch": 0.61, "grad_norm": 1.3707996299332368, "learning_rate": 3.439824836765829e-06, "loss": 0.6422, "step": 6003 }, { "epoch": 0.61, "grad_norm": 1.5268369789037786, "learning_rate": 3.4382538436889268e-06, "loss": 0.7018, "step": 6004 }, { "epoch": 0.61, "grad_norm": 1.484404363466475, "learning_rate": 3.4366830214381953e-06, "loss": 0.6692, "step": 6005 }, { "epoch": 0.61, "grad_norm": 1.3602004552030174, "learning_rate": 3.4351123701854567e-06, "loss": 0.6312, "step": 6006 }, { "epoch": 0.61, "grad_norm": 1.4653189384461238, "learning_rate": 3.4335418901025113e-06, "loss": 0.599, "step": 6007 }, { "epoch": 0.61, "grad_norm": 1.4810396646786135, "learning_rate": 3.431971581361139e-06, "loss": 0.6992, "step": 6008 }, { "epoch": 0.61, "grad_norm": 1.4088682181577075, "learning_rate": 3.4304014441331014e-06, "loss": 0.5621, "step": 6009 }, { "epoch": 0.61, "grad_norm": 1.261242122190614, "learning_rate": 3.428831478590147e-06, "loss": 0.6347, "step": 6010 }, { "epoch": 0.61, "grad_norm": 1.249848028266468, "learning_rate": 3.4272616849039973e-06, "loss": 0.6045, "step": 6011 }, { "epoch": 0.61, "grad_norm": 1.4996285540044825, "learning_rate": 3.4256920632463597e-06, "loss": 0.6501, "step": 6012 }, { "epoch": 0.61, "grad_norm": 1.416033257994486, "learning_rate": 3.4241226137889232e-06, "loss": 0.6019, "step": 6013 }, { "epoch": 0.61, "grad_norm": 1.3107240514687735, "learning_rate": 3.422553336703353e-06, "loss": 0.5988, "step": 6014 }, { "epoch": 0.61, "grad_norm": 1.78774625726849, "learning_rate": 3.4209842321613034e-06, "loss": 0.6247, "step": 6015 }, { "epoch": 0.61, "grad_norm": 1.509889707218149, "learning_rate": 3.419415300334402e-06, "loss": 0.7054, "step": 6016 }, { "epoch": 0.61, "grad_norm": 1.3064681331807375, "learning_rate": 3.4178465413942628e-06, "loss": 0.6414, "step": 6017 }, { "epoch": 0.61, "grad_norm": 1.5220697110879156, "learning_rate": 3.416277955512479e-06, "loss": 0.7846, "step": 6018 }, { "epoch": 0.61, "grad_norm": 1.5668676091728349, "learning_rate": 3.414709542860623e-06, "loss": 0.7002, "step": 6019 }, { "epoch": 0.61, "grad_norm": 1.2759526209323178, "learning_rate": 3.4131413036102525e-06, "loss": 0.5632, "step": 6020 }, { "epoch": 0.61, "grad_norm": 1.660009434888298, "learning_rate": 3.4115732379329038e-06, "loss": 0.8195, "step": 6021 }, { "epoch": 0.61, "grad_norm": 1.48172590261552, "learning_rate": 3.4100053460000924e-06, "loss": 0.7176, "step": 6022 }, { "epoch": 0.62, "grad_norm": 1.4769376294417267, "learning_rate": 3.4084376279833164e-06, "loss": 0.6305, "step": 6023 }, { "epoch": 0.62, "grad_norm": 1.5998206360186336, "learning_rate": 3.406870084054059e-06, "loss": 0.636, "step": 6024 }, { "epoch": 0.62, "grad_norm": 1.5425235300725435, "learning_rate": 3.405302714383778e-06, "loss": 0.7708, "step": 6025 }, { "epoch": 0.62, "grad_norm": 1.4126970931828038, "learning_rate": 3.4037355191439138e-06, "loss": 0.5421, "step": 6026 }, { "epoch": 0.62, "grad_norm": 1.4569938589684546, "learning_rate": 3.402168498505891e-06, "loss": 0.7289, "step": 6027 }, { "epoch": 0.62, "grad_norm": 1.374507681155127, "learning_rate": 3.400601652641109e-06, "loss": 0.5169, "step": 6028 }, { "epoch": 0.62, "grad_norm": 1.524498706518424, "learning_rate": 3.399034981720957e-06, "loss": 0.5894, "step": 6029 }, { "epoch": 0.62, "grad_norm": 1.646625702652355, "learning_rate": 3.397468485916795e-06, "loss": 0.5761, "step": 6030 }, { "epoch": 0.62, "grad_norm": 1.5178186129425761, "learning_rate": 3.3959021653999707e-06, "loss": 0.6233, "step": 6031 }, { "epoch": 0.62, "grad_norm": 1.327522670093017, "learning_rate": 3.3943360203418117e-06, "loss": 0.6361, "step": 6032 }, { "epoch": 0.62, "grad_norm": 1.2987605690304511, "learning_rate": 3.3927700509136224e-06, "loss": 0.5347, "step": 6033 }, { "epoch": 0.62, "grad_norm": 1.6319288610773603, "learning_rate": 3.391204257286694e-06, "loss": 0.6745, "step": 6034 }, { "epoch": 0.62, "grad_norm": 1.4709808661097115, "learning_rate": 3.3896386396322946e-06, "loss": 0.6619, "step": 6035 }, { "epoch": 0.62, "grad_norm": 1.3600030472019409, "learning_rate": 3.388073198121673e-06, "loss": 0.6129, "step": 6036 }, { "epoch": 0.62, "grad_norm": 1.7214575709471005, "learning_rate": 3.3865079329260587e-06, "loss": 0.7768, "step": 6037 }, { "epoch": 0.62, "grad_norm": 1.4896764459309961, "learning_rate": 3.3849428442166655e-06, "loss": 0.6217, "step": 6038 }, { "epoch": 0.62, "grad_norm": 1.4650039455176815, "learning_rate": 3.383377932164683e-06, "loss": 0.5322, "step": 6039 }, { "epoch": 0.62, "grad_norm": 1.3497883054514443, "learning_rate": 3.3818131969412835e-06, "loss": 0.7382, "step": 6040 }, { "epoch": 0.62, "grad_norm": 1.47668097203659, "learning_rate": 3.380248638717625e-06, "loss": 0.6893, "step": 6041 }, { "epoch": 0.62, "grad_norm": 1.528431324623948, "learning_rate": 3.3786842576648327e-06, "loss": 0.6952, "step": 6042 }, { "epoch": 0.62, "grad_norm": 1.3600040902325123, "learning_rate": 3.3771200539540285e-06, "loss": 0.5721, "step": 6043 }, { "epoch": 0.62, "grad_norm": 1.5952752206158216, "learning_rate": 3.3755560277563028e-06, "loss": 0.6524, "step": 6044 }, { "epoch": 0.62, "grad_norm": 1.3102702413716092, "learning_rate": 3.3739921792427333e-06, "loss": 0.6061, "step": 6045 }, { "epoch": 0.62, "grad_norm": 1.5113669357156332, "learning_rate": 3.3724285085843767e-06, "loss": 0.5581, "step": 6046 }, { "epoch": 0.62, "grad_norm": 1.7533307566123915, "learning_rate": 3.370865015952266e-06, "loss": 0.8133, "step": 6047 }, { "epoch": 0.62, "grad_norm": 1.3998030672950914, "learning_rate": 3.369301701517422e-06, "loss": 0.6374, "step": 6048 }, { "epoch": 0.62, "grad_norm": 1.270924921287205, "learning_rate": 3.367738565450843e-06, "loss": 0.5353, "step": 6049 }, { "epoch": 0.62, "grad_norm": 1.5110985231205623, "learning_rate": 3.3661756079235054e-06, "loss": 0.7085, "step": 6050 }, { "epoch": 0.62, "grad_norm": 1.3330296806378452, "learning_rate": 3.3646128291063664e-06, "loss": 0.6019, "step": 6051 }, { "epoch": 0.62, "grad_norm": 1.40102676886076, "learning_rate": 3.363050229170368e-06, "loss": 0.6461, "step": 6052 }, { "epoch": 0.62, "grad_norm": 1.3123908055139706, "learning_rate": 3.361487808286429e-06, "loss": 0.5184, "step": 6053 }, { "epoch": 0.62, "grad_norm": 1.3832527405665282, "learning_rate": 3.359925566625447e-06, "loss": 0.5843, "step": 6054 }, { "epoch": 0.62, "grad_norm": 1.4118523409256394, "learning_rate": 3.3583635043583073e-06, "loss": 0.5855, "step": 6055 }, { "epoch": 0.62, "grad_norm": 1.6254793211132599, "learning_rate": 3.3568016216558653e-06, "loss": 0.6717, "step": 6056 }, { "epoch": 0.62, "grad_norm": 1.3854536672054698, "learning_rate": 3.3552399186889663e-06, "loss": 0.6308, "step": 6057 }, { "epoch": 0.62, "grad_norm": 1.4544715510712496, "learning_rate": 3.3536783956284293e-06, "loss": 0.6745, "step": 6058 }, { "epoch": 0.62, "grad_norm": 1.5506004367130697, "learning_rate": 3.352117052645056e-06, "loss": 0.6487, "step": 6059 }, { "epoch": 0.62, "grad_norm": 1.3532909063066219, "learning_rate": 3.35055588990963e-06, "loss": 0.6487, "step": 6060 }, { "epoch": 0.62, "grad_norm": 1.358185451941173, "learning_rate": 3.348994907592912e-06, "loss": 0.6721, "step": 6061 }, { "epoch": 0.62, "grad_norm": 1.4285190250205027, "learning_rate": 3.3474341058656455e-06, "loss": 0.6451, "step": 6062 }, { "epoch": 0.62, "grad_norm": 1.4168376576886414, "learning_rate": 3.3458734848985554e-06, "loss": 0.7104, "step": 6063 }, { "epoch": 0.62, "grad_norm": 1.4599133405570013, "learning_rate": 3.3443130448623407e-06, "loss": 0.7116, "step": 6064 }, { "epoch": 0.62, "grad_norm": 1.284126798011852, "learning_rate": 3.3427527859276863e-06, "loss": 0.6543, "step": 6065 }, { "epoch": 0.62, "grad_norm": 1.3957342647770508, "learning_rate": 3.341192708265258e-06, "loss": 0.5967, "step": 6066 }, { "epoch": 0.62, "grad_norm": 1.2654647324390003, "learning_rate": 3.3396328120456968e-06, "loss": 0.7457, "step": 6067 }, { "epoch": 0.62, "grad_norm": 1.3072261138902062, "learning_rate": 3.3380730974396258e-06, "loss": 0.5475, "step": 6068 }, { "epoch": 0.62, "grad_norm": 1.3604318262636115, "learning_rate": 3.3365135646176537e-06, "loss": 0.6283, "step": 6069 }, { "epoch": 0.62, "grad_norm": 1.4413235593351081, "learning_rate": 3.3349542137503586e-06, "loss": 0.6669, "step": 6070 }, { "epoch": 0.62, "grad_norm": 1.4137947359617598, "learning_rate": 3.3333950450083097e-06, "loss": 0.5914, "step": 6071 }, { "epoch": 0.62, "grad_norm": 1.4437353220935627, "learning_rate": 3.331836058562048e-06, "loss": 0.5963, "step": 6072 }, { "epoch": 0.62, "grad_norm": 1.1934117751448452, "learning_rate": 3.3302772545820975e-06, "loss": 0.5628, "step": 6073 }, { "epoch": 0.62, "grad_norm": 1.4056255353929625, "learning_rate": 3.3287186332389676e-06, "loss": 0.6096, "step": 6074 }, { "epoch": 0.62, "grad_norm": 1.3854891466628037, "learning_rate": 3.327160194703135e-06, "loss": 0.7055, "step": 6075 }, { "epoch": 0.62, "grad_norm": 1.3180845158236671, "learning_rate": 3.3256019391450696e-06, "loss": 0.5902, "step": 6076 }, { "epoch": 0.62, "grad_norm": 1.4957171172346588, "learning_rate": 3.324043866735215e-06, "loss": 0.619, "step": 6077 }, { "epoch": 0.62, "grad_norm": 1.45123281279908, "learning_rate": 3.322485977643994e-06, "loss": 0.6451, "step": 6078 }, { "epoch": 0.62, "grad_norm": 1.379211256132044, "learning_rate": 3.3209282720418103e-06, "loss": 0.6962, "step": 6079 }, { "epoch": 0.62, "grad_norm": 1.5326217933481483, "learning_rate": 3.319370750099051e-06, "loss": 0.5712, "step": 6080 }, { "epoch": 0.62, "grad_norm": 1.476113583686184, "learning_rate": 3.3178134119860783e-06, "loss": 0.6199, "step": 6081 }, { "epoch": 0.62, "grad_norm": 1.173506696781226, "learning_rate": 3.3162562578732354e-06, "loss": 0.6123, "step": 6082 }, { "epoch": 0.62, "grad_norm": 1.6017296946967876, "learning_rate": 3.3146992879308505e-06, "loss": 0.7309, "step": 6083 }, { "epoch": 0.62, "grad_norm": 1.4832289880730352, "learning_rate": 3.313142502329221e-06, "loss": 0.5275, "step": 6084 }, { "epoch": 0.62, "grad_norm": 1.3440260161328879, "learning_rate": 3.311585901238636e-06, "loss": 0.6671, "step": 6085 }, { "epoch": 0.62, "grad_norm": 3.8787649031490408, "learning_rate": 3.3100294848293552e-06, "loss": 0.6824, "step": 6086 }, { "epoch": 0.62, "grad_norm": 1.4048617227532276, "learning_rate": 3.3084732532716223e-06, "loss": 0.6571, "step": 6087 }, { "epoch": 0.62, "grad_norm": 1.588046300928949, "learning_rate": 3.306917206735664e-06, "loss": 0.7133, "step": 6088 }, { "epoch": 0.62, "grad_norm": 1.3379154219566032, "learning_rate": 3.3053613453916778e-06, "loss": 0.549, "step": 6089 }, { "epoch": 0.62, "grad_norm": 1.3059353546078023, "learning_rate": 3.3038056694098485e-06, "loss": 0.6516, "step": 6090 }, { "epoch": 0.62, "grad_norm": 1.692246163120367, "learning_rate": 3.30225017896034e-06, "loss": 0.693, "step": 6091 }, { "epoch": 0.62, "grad_norm": 1.7955391414336188, "learning_rate": 3.3006948742132916e-06, "loss": 0.6076, "step": 6092 }, { "epoch": 0.62, "grad_norm": 1.6414040728220611, "learning_rate": 3.299139755338825e-06, "loss": 0.6883, "step": 6093 }, { "epoch": 0.62, "grad_norm": 1.32875914666326, "learning_rate": 3.2975848225070443e-06, "loss": 0.6231, "step": 6094 }, { "epoch": 0.62, "grad_norm": 1.324677141649655, "learning_rate": 3.2960300758880275e-06, "loss": 0.5862, "step": 6095 }, { "epoch": 0.62, "grad_norm": 1.44465631307878, "learning_rate": 3.294475515651837e-06, "loss": 0.6215, "step": 6096 }, { "epoch": 0.62, "grad_norm": 1.510530321405028, "learning_rate": 3.2929211419685112e-06, "loss": 0.6256, "step": 6097 }, { "epoch": 0.62, "grad_norm": 1.570978457160257, "learning_rate": 3.29136695500807e-06, "loss": 0.7084, "step": 6098 }, { "epoch": 0.62, "grad_norm": 1.7517878980277022, "learning_rate": 3.2898129549405158e-06, "loss": 0.6067, "step": 6099 }, { "epoch": 0.62, "grad_norm": 1.4217758230053774, "learning_rate": 3.2882591419358234e-06, "loss": 0.6004, "step": 6100 }, { "epoch": 0.62, "grad_norm": 1.4099437154921846, "learning_rate": 3.286705516163952e-06, "loss": 0.5874, "step": 6101 }, { "epoch": 0.62, "grad_norm": 1.3514407650290792, "learning_rate": 3.2851520777948443e-06, "loss": 0.6366, "step": 6102 }, { "epoch": 0.62, "grad_norm": 1.5438062047543113, "learning_rate": 3.2835988269984104e-06, "loss": 0.638, "step": 6103 }, { "epoch": 0.62, "grad_norm": 1.418031183520446, "learning_rate": 3.2820457639445525e-06, "loss": 0.5805, "step": 6104 }, { "epoch": 0.62, "grad_norm": 1.4807917956249466, "learning_rate": 3.2804928888031463e-06, "loss": 0.6904, "step": 6105 }, { "epoch": 0.62, "grad_norm": 1.7892966742157497, "learning_rate": 3.278940201744044e-06, "loss": 0.624, "step": 6106 }, { "epoch": 0.62, "grad_norm": 1.4521916371169519, "learning_rate": 3.277387702937085e-06, "loss": 0.6007, "step": 6107 }, { "epoch": 0.62, "grad_norm": 1.6364104789259335, "learning_rate": 3.2758353925520835e-06, "loss": 0.6177, "step": 6108 }, { "epoch": 0.62, "grad_norm": 1.4892936193846689, "learning_rate": 3.2742832707588325e-06, "loss": 0.5818, "step": 6109 }, { "epoch": 0.62, "grad_norm": 1.5534630199087702, "learning_rate": 3.2727313377271055e-06, "loss": 0.5961, "step": 6110 }, { "epoch": 0.62, "grad_norm": 1.3114343512510023, "learning_rate": 3.2711795936266546e-06, "loss": 0.5629, "step": 6111 }, { "epoch": 0.62, "grad_norm": 1.3674318553910672, "learning_rate": 3.2696280386272117e-06, "loss": 0.6506, "step": 6112 }, { "epoch": 0.62, "grad_norm": 1.558541891407465, "learning_rate": 3.268076672898492e-06, "loss": 0.6219, "step": 6113 }, { "epoch": 0.62, "grad_norm": 1.66932905297693, "learning_rate": 3.2665254966101823e-06, "loss": 0.6592, "step": 6114 }, { "epoch": 0.62, "grad_norm": 1.3413203207002191, "learning_rate": 3.2649745099319525e-06, "loss": 0.6129, "step": 6115 }, { "epoch": 0.62, "grad_norm": 1.3851712709113242, "learning_rate": 3.263423713033457e-06, "loss": 0.6353, "step": 6116 }, { "epoch": 0.62, "grad_norm": 1.3501177007886531, "learning_rate": 3.2618731060843167e-06, "loss": 0.6504, "step": 6117 }, { "epoch": 0.62, "grad_norm": 1.550366411232182, "learning_rate": 3.260322689254144e-06, "loss": 0.6503, "step": 6118 }, { "epoch": 0.62, "grad_norm": 1.6755554006971567, "learning_rate": 3.2587724627125265e-06, "loss": 0.6411, "step": 6119 }, { "epoch": 0.62, "grad_norm": 1.6245023462648198, "learning_rate": 3.2572224266290276e-06, "loss": 0.5754, "step": 6120 }, { "epoch": 0.63, "grad_norm": 1.6225286555999103, "learning_rate": 3.2556725811731936e-06, "loss": 0.6369, "step": 6121 }, { "epoch": 0.63, "grad_norm": 1.4307785889474447, "learning_rate": 3.254122926514551e-06, "loss": 0.6873, "step": 6122 }, { "epoch": 0.63, "grad_norm": 1.5514311868560873, "learning_rate": 3.252573462822601e-06, "loss": 0.6557, "step": 6123 }, { "epoch": 0.63, "grad_norm": 1.5392877969623813, "learning_rate": 3.251024190266828e-06, "loss": 0.727, "step": 6124 }, { "epoch": 0.63, "grad_norm": 1.5546771419416379, "learning_rate": 3.249475109016691e-06, "loss": 0.6558, "step": 6125 }, { "epoch": 0.63, "grad_norm": 1.3630458718494023, "learning_rate": 3.2479262192416316e-06, "loss": 0.6414, "step": 6126 }, { "epoch": 0.63, "grad_norm": 1.4679388898537356, "learning_rate": 3.2463775211110736e-06, "loss": 0.7243, "step": 6127 }, { "epoch": 0.63, "grad_norm": 1.544872625524128, "learning_rate": 3.2448290147944117e-06, "loss": 0.6155, "step": 6128 }, { "epoch": 0.63, "grad_norm": 1.4127449301213202, "learning_rate": 3.2432807004610246e-06, "loss": 0.6992, "step": 6129 }, { "epoch": 0.63, "grad_norm": 1.6235072543320173, "learning_rate": 3.2417325782802722e-06, "loss": 0.57, "step": 6130 }, { "epoch": 0.63, "grad_norm": 1.6199531311495765, "learning_rate": 3.2401846484214863e-06, "loss": 0.6816, "step": 6131 }, { "epoch": 0.63, "grad_norm": 1.3948854478952832, "learning_rate": 3.238636911053984e-06, "loss": 0.655, "step": 6132 }, { "epoch": 0.63, "grad_norm": 1.4793481998889093, "learning_rate": 3.237089366347059e-06, "loss": 0.6453, "step": 6133 }, { "epoch": 0.63, "grad_norm": 1.3318416381729603, "learning_rate": 3.2355420144699834e-06, "loss": 0.5758, "step": 6134 }, { "epoch": 0.63, "grad_norm": 1.482757083617996, "learning_rate": 3.2339948555920103e-06, "loss": 0.6098, "step": 6135 }, { "epoch": 0.63, "grad_norm": 1.7391480865019886, "learning_rate": 3.232447889882371e-06, "loss": 0.6265, "step": 6136 }, { "epoch": 0.63, "grad_norm": 1.373817220101042, "learning_rate": 3.2309011175102713e-06, "loss": 0.5611, "step": 6137 }, { "epoch": 0.63, "grad_norm": 2.2153250939538354, "learning_rate": 3.2293545386449034e-06, "loss": 0.6816, "step": 6138 }, { "epoch": 0.63, "grad_norm": 1.4900835031407866, "learning_rate": 3.2278081534554306e-06, "loss": 0.5586, "step": 6139 }, { "epoch": 0.63, "grad_norm": 1.3668627177508639, "learning_rate": 3.2262619621110018e-06, "loss": 0.7055, "step": 6140 }, { "epoch": 0.63, "grad_norm": 1.5240048853370303, "learning_rate": 3.2247159647807426e-06, "loss": 0.6756, "step": 6141 }, { "epoch": 0.63, "grad_norm": 1.4101681240163517, "learning_rate": 3.2231701616337534e-06, "loss": 0.6833, "step": 6142 }, { "epoch": 0.63, "grad_norm": 1.4937428111107391, "learning_rate": 3.2216245528391173e-06, "loss": 0.6297, "step": 6143 }, { "epoch": 0.63, "grad_norm": 1.472723586960376, "learning_rate": 3.220079138565899e-06, "loss": 0.6741, "step": 6144 }, { "epoch": 0.63, "grad_norm": 1.4713816075689732, "learning_rate": 3.2185339189831323e-06, "loss": 0.653, "step": 6145 }, { "epoch": 0.63, "grad_norm": 1.5423786468617364, "learning_rate": 3.2169888942598392e-06, "loss": 0.6146, "step": 6146 }, { "epoch": 0.63, "grad_norm": 1.244517446438543, "learning_rate": 3.2154440645650188e-06, "loss": 0.6171, "step": 6147 }, { "epoch": 0.63, "grad_norm": 1.4919876214305074, "learning_rate": 3.213899430067641e-06, "loss": 0.7106, "step": 6148 }, { "epoch": 0.63, "grad_norm": 1.6012658684728025, "learning_rate": 3.212354990936666e-06, "loss": 0.6598, "step": 6149 }, { "epoch": 0.63, "grad_norm": 1.52205074647301, "learning_rate": 3.2108107473410243e-06, "loss": 0.6469, "step": 6150 }, { "epoch": 0.63, "grad_norm": 1.7462313297131777, "learning_rate": 3.209266699449628e-06, "loss": 0.6273, "step": 6151 }, { "epoch": 0.63, "grad_norm": 2.2022914416550976, "learning_rate": 3.2077228474313682e-06, "loss": 0.4965, "step": 6152 }, { "epoch": 0.63, "grad_norm": 1.4475138466859727, "learning_rate": 3.206179191455111e-06, "loss": 0.6306, "step": 6153 }, { "epoch": 0.63, "grad_norm": 1.5809621709414503, "learning_rate": 3.204635731689707e-06, "loss": 0.6698, "step": 6154 }, { "epoch": 0.63, "grad_norm": 1.3616720134024822, "learning_rate": 3.203092468303981e-06, "loss": 0.6945, "step": 6155 }, { "epoch": 0.63, "grad_norm": 1.532918512286863, "learning_rate": 3.2015494014667377e-06, "loss": 0.7477, "step": 6156 }, { "epoch": 0.63, "grad_norm": 1.664347864067833, "learning_rate": 3.2000065313467587e-06, "loss": 0.6242, "step": 6157 }, { "epoch": 0.63, "grad_norm": 1.4916172633422078, "learning_rate": 3.198463858112809e-06, "loss": 0.6748, "step": 6158 }, { "epoch": 0.63, "grad_norm": 1.4317056374112644, "learning_rate": 3.196921381933624e-06, "loss": 0.6228, "step": 6159 }, { "epoch": 0.63, "grad_norm": 1.350409838165453, "learning_rate": 3.1953791029779247e-06, "loss": 0.6869, "step": 6160 }, { "epoch": 0.63, "grad_norm": 1.566568787722793, "learning_rate": 3.193837021414409e-06, "loss": 0.6051, "step": 6161 }, { "epoch": 0.63, "grad_norm": 1.3802711749504384, "learning_rate": 3.192295137411748e-06, "loss": 0.5943, "step": 6162 }, { "epoch": 0.63, "grad_norm": 1.3988237224356073, "learning_rate": 3.1907534511386002e-06, "loss": 0.6355, "step": 6163 }, { "epoch": 0.63, "grad_norm": 1.6499499066850225, "learning_rate": 3.1892119627635954e-06, "loss": 0.7488, "step": 6164 }, { "epoch": 0.63, "grad_norm": 1.844738760505008, "learning_rate": 3.1876706724553436e-06, "loss": 0.7245, "step": 6165 }, { "epoch": 0.63, "grad_norm": 1.3130226523530004, "learning_rate": 3.1861295803824345e-06, "loss": 0.5586, "step": 6166 }, { "epoch": 0.63, "grad_norm": 1.8819620743622216, "learning_rate": 3.1845886867134324e-06, "loss": 0.7241, "step": 6167 }, { "epoch": 0.63, "grad_norm": 1.5482047524081761, "learning_rate": 3.1830479916168856e-06, "loss": 0.7318, "step": 6168 }, { "epoch": 0.63, "grad_norm": 1.3658667826795896, "learning_rate": 3.181507495261318e-06, "loss": 0.5608, "step": 6169 }, { "epoch": 0.63, "grad_norm": 1.6333722053610424, "learning_rate": 3.179967197815229e-06, "loss": 0.6059, "step": 6170 }, { "epoch": 0.63, "grad_norm": 1.4765894246962419, "learning_rate": 3.1784270994470984e-06, "loss": 0.6443, "step": 6171 }, { "epoch": 0.63, "grad_norm": 1.4828372068479312, "learning_rate": 3.1768872003253883e-06, "loss": 0.5618, "step": 6172 }, { "epoch": 0.63, "grad_norm": 1.3111511178098916, "learning_rate": 3.175347500618532e-06, "loss": 0.5917, "step": 6173 }, { "epoch": 0.63, "grad_norm": 2.016648733776084, "learning_rate": 3.1738080004949444e-06, "loss": 0.6084, "step": 6174 }, { "epoch": 0.63, "grad_norm": 2.953979425025716, "learning_rate": 3.17226870012302e-06, "loss": 0.7469, "step": 6175 }, { "epoch": 0.63, "grad_norm": 1.5285512176219724, "learning_rate": 3.1707295996711274e-06, "loss": 0.6396, "step": 6176 }, { "epoch": 0.63, "grad_norm": 1.435687630186954, "learning_rate": 3.169190699307618e-06, "loss": 0.6928, "step": 6177 }, { "epoch": 0.63, "grad_norm": 1.5281168718792812, "learning_rate": 3.167651999200819e-06, "loss": 0.8024, "step": 6178 }, { "epoch": 0.63, "grad_norm": 1.363039516883269, "learning_rate": 3.1661134995190347e-06, "loss": 0.6637, "step": 6179 }, { "epoch": 0.63, "grad_norm": 1.4171779320151683, "learning_rate": 3.164575200430549e-06, "loss": 0.7436, "step": 6180 }, { "epoch": 0.63, "grad_norm": 1.278171141549526, "learning_rate": 3.1630371021036215e-06, "loss": 0.6653, "step": 6181 }, { "epoch": 0.63, "grad_norm": 1.4376409311908813, "learning_rate": 3.1614992047064947e-06, "loss": 0.6523, "step": 6182 }, { "epoch": 0.63, "grad_norm": 1.506841179014819, "learning_rate": 3.1599615084073855e-06, "loss": 0.7052, "step": 6183 }, { "epoch": 0.63, "grad_norm": 1.7314597995868386, "learning_rate": 3.158424013374488e-06, "loss": 0.6712, "step": 6184 }, { "epoch": 0.63, "grad_norm": 1.4107815270731503, "learning_rate": 3.1568867197759758e-06, "loss": 0.6511, "step": 6185 }, { "epoch": 0.63, "grad_norm": 1.6087109156564434, "learning_rate": 3.1553496277800032e-06, "loss": 0.5439, "step": 6186 }, { "epoch": 0.63, "grad_norm": 1.4302460912398673, "learning_rate": 3.1538127375546966e-06, "loss": 0.6855, "step": 6187 }, { "epoch": 0.63, "grad_norm": 1.4359459047918903, "learning_rate": 3.152276049268165e-06, "loss": 0.6915, "step": 6188 }, { "epoch": 0.63, "grad_norm": 1.3911964354339383, "learning_rate": 3.1507395630884936e-06, "loss": 0.6204, "step": 6189 }, { "epoch": 0.63, "grad_norm": 1.5665979882113275, "learning_rate": 3.1492032791837435e-06, "loss": 0.6491, "step": 6190 }, { "epoch": 0.63, "grad_norm": 1.5035730421224296, "learning_rate": 3.1476671977219587e-06, "loss": 0.6323, "step": 6191 }, { "epoch": 0.63, "grad_norm": 1.7106502893970792, "learning_rate": 3.1461313188711574e-06, "loss": 0.7265, "step": 6192 }, { "epoch": 0.63, "grad_norm": 1.5123371063710125, "learning_rate": 3.1445956427993353e-06, "loss": 0.6788, "step": 6193 }, { "epoch": 0.63, "grad_norm": 1.4314615138481264, "learning_rate": 3.1430601696744676e-06, "loss": 0.7465, "step": 6194 }, { "epoch": 0.63, "grad_norm": 1.4317739970264858, "learning_rate": 3.141524899664506e-06, "loss": 0.6715, "step": 6195 }, { "epoch": 0.63, "grad_norm": 1.4921324655551909, "learning_rate": 3.1399898329373813e-06, "loss": 0.675, "step": 6196 }, { "epoch": 0.63, "grad_norm": 1.5592819991183149, "learning_rate": 3.1384549696610035e-06, "loss": 0.5904, "step": 6197 }, { "epoch": 0.63, "grad_norm": 1.4077046853640214, "learning_rate": 3.136920310003254e-06, "loss": 0.6806, "step": 6198 }, { "epoch": 0.63, "grad_norm": 1.1998280648672293, "learning_rate": 3.135385854131999e-06, "loss": 0.4787, "step": 6199 }, { "epoch": 0.63, "grad_norm": 1.3999226203510131, "learning_rate": 3.13385160221508e-06, "loss": 0.5826, "step": 6200 }, { "epoch": 0.63, "grad_norm": 1.2385272416172448, "learning_rate": 3.1323175544203143e-06, "loss": 0.5988, "step": 6201 }, { "epoch": 0.63, "grad_norm": 1.5702226858475001, "learning_rate": 3.1307837109154983e-06, "loss": 0.6661, "step": 6202 }, { "epoch": 0.63, "grad_norm": 1.4975576396449428, "learning_rate": 3.1292500718684083e-06, "loss": 0.7049, "step": 6203 }, { "epoch": 0.63, "grad_norm": 1.4415236729557084, "learning_rate": 3.1277166374467928e-06, "loss": 0.5992, "step": 6204 }, { "epoch": 0.63, "grad_norm": 1.3587469314479006, "learning_rate": 3.126183407818384e-06, "loss": 0.6299, "step": 6205 }, { "epoch": 0.63, "grad_norm": 1.253449636441452, "learning_rate": 3.1246503831508884e-06, "loss": 0.5734, "step": 6206 }, { "epoch": 0.63, "grad_norm": 1.6722462710878603, "learning_rate": 3.1231175636119894e-06, "loss": 0.6257, "step": 6207 }, { "epoch": 0.63, "grad_norm": 1.8179826495588196, "learning_rate": 3.1215849493693507e-06, "loss": 0.5447, "step": 6208 }, { "epoch": 0.63, "grad_norm": 1.4641887199031873, "learning_rate": 3.120052540590609e-06, "loss": 0.5536, "step": 6209 }, { "epoch": 0.63, "grad_norm": 1.4380513381035271, "learning_rate": 3.1185203374433847e-06, "loss": 0.6876, "step": 6210 }, { "epoch": 0.63, "grad_norm": 1.487247636075599, "learning_rate": 3.116988340095272e-06, "loss": 0.6504, "step": 6211 }, { "epoch": 0.63, "grad_norm": 1.5143291253629458, "learning_rate": 3.115456548713842e-06, "loss": 0.6014, "step": 6212 }, { "epoch": 0.63, "grad_norm": 1.6397431611438382, "learning_rate": 3.1139249634666436e-06, "loss": 0.7241, "step": 6213 }, { "epoch": 0.63, "grad_norm": 1.548093904825367, "learning_rate": 3.1123935845212074e-06, "loss": 0.4666, "step": 6214 }, { "epoch": 0.63, "grad_norm": 2.317521899920328, "learning_rate": 3.110862412045035e-06, "loss": 0.6849, "step": 6215 }, { "epoch": 0.63, "grad_norm": 1.5492957961502516, "learning_rate": 3.109331446205608e-06, "loss": 0.6299, "step": 6216 }, { "epoch": 0.63, "grad_norm": 1.352210735881462, "learning_rate": 3.1078006871703893e-06, "loss": 0.7187, "step": 6217 }, { "epoch": 0.63, "grad_norm": 1.4966864881812925, "learning_rate": 3.1062701351068104e-06, "loss": 0.6922, "step": 6218 }, { "epoch": 0.64, "grad_norm": 1.411023986290196, "learning_rate": 3.10473979018229e-06, "loss": 0.6381, "step": 6219 }, { "epoch": 0.64, "grad_norm": 1.675060429905627, "learning_rate": 3.1032096525642176e-06, "loss": 0.7581, "step": 6220 }, { "epoch": 0.64, "grad_norm": 1.7121890181315695, "learning_rate": 3.1016797224199613e-06, "loss": 0.7121, "step": 6221 }, { "epoch": 0.64, "grad_norm": 1.5250650375984511, "learning_rate": 3.1001499999168704e-06, "loss": 0.6391, "step": 6222 }, { "epoch": 0.64, "grad_norm": 1.5343952640493124, "learning_rate": 3.098620485222263e-06, "loss": 0.7136, "step": 6223 }, { "epoch": 0.64, "grad_norm": 1.429479967171606, "learning_rate": 3.097091178503444e-06, "loss": 0.6315, "step": 6224 }, { "epoch": 0.64, "grad_norm": 1.446349717422005, "learning_rate": 3.095562079927691e-06, "loss": 0.6289, "step": 6225 }, { "epoch": 0.64, "grad_norm": 1.513929495343194, "learning_rate": 3.0940331896622573e-06, "loss": 0.7187, "step": 6226 }, { "epoch": 0.64, "grad_norm": 1.450738352714757, "learning_rate": 3.0925045078743747e-06, "loss": 0.6164, "step": 6227 }, { "epoch": 0.64, "grad_norm": 1.4603416211888438, "learning_rate": 3.090976034731257e-06, "loss": 0.6346, "step": 6228 }, { "epoch": 0.64, "grad_norm": 1.455027314320186, "learning_rate": 3.0894477704000857e-06, "loss": 0.6511, "step": 6229 }, { "epoch": 0.64, "grad_norm": 1.586310402038789, "learning_rate": 3.0879197150480278e-06, "loss": 0.6909, "step": 6230 }, { "epoch": 0.64, "grad_norm": 1.5395229360428844, "learning_rate": 3.0863918688422247e-06, "loss": 0.6, "step": 6231 }, { "epoch": 0.64, "grad_norm": 1.523025576457585, "learning_rate": 3.0848642319497914e-06, "loss": 0.6143, "step": 6232 }, { "epoch": 0.64, "grad_norm": 1.4819614820842806, "learning_rate": 3.083336804537827e-06, "loss": 0.6911, "step": 6233 }, { "epoch": 0.64, "grad_norm": 1.5506207457212178, "learning_rate": 3.081809586773401e-06, "loss": 0.6858, "step": 6234 }, { "epoch": 0.64, "grad_norm": 1.4724123584162276, "learning_rate": 3.080282578823564e-06, "loss": 0.6396, "step": 6235 }, { "epoch": 0.64, "grad_norm": 1.5211463907940883, "learning_rate": 3.078755780855343e-06, "loss": 0.6117, "step": 6236 }, { "epoch": 0.64, "grad_norm": 1.2902797806630049, "learning_rate": 3.077229193035739e-06, "loss": 0.5805, "step": 6237 }, { "epoch": 0.64, "grad_norm": 1.5719152987520961, "learning_rate": 3.075702815531735e-06, "loss": 0.6679, "step": 6238 }, { "epoch": 0.64, "grad_norm": 1.4865655999531262, "learning_rate": 3.0741766485102888e-06, "loss": 0.5646, "step": 6239 }, { "epoch": 0.64, "grad_norm": 1.553396135572237, "learning_rate": 3.0726506921383324e-06, "loss": 0.5938, "step": 6240 }, { "epoch": 0.64, "grad_norm": 1.4411057005967323, "learning_rate": 3.0711249465827788e-06, "loss": 0.679, "step": 6241 }, { "epoch": 0.64, "grad_norm": 1.8530243603261178, "learning_rate": 3.069599412010518e-06, "loss": 0.6504, "step": 6242 }, { "epoch": 0.64, "grad_norm": 1.4060367667067932, "learning_rate": 3.0680740885884113e-06, "loss": 0.6142, "step": 6243 }, { "epoch": 0.64, "grad_norm": 1.6567125027142453, "learning_rate": 3.066548976483304e-06, "loss": 0.6228, "step": 6244 }, { "epoch": 0.64, "grad_norm": 1.4705826112468077, "learning_rate": 3.0650240758620153e-06, "loss": 0.6838, "step": 6245 }, { "epoch": 0.64, "grad_norm": 1.3107773530979248, "learning_rate": 3.0634993868913375e-06, "loss": 0.698, "step": 6246 }, { "epoch": 0.64, "grad_norm": 1.479813009232958, "learning_rate": 3.061974909738049e-06, "loss": 0.6895, "step": 6247 }, { "epoch": 0.64, "grad_norm": 1.4423200442015127, "learning_rate": 3.0604506445688947e-06, "loss": 0.6585, "step": 6248 }, { "epoch": 0.64, "grad_norm": 1.8839878005559254, "learning_rate": 3.0589265915506032e-06, "loss": 0.6883, "step": 6249 }, { "epoch": 0.64, "grad_norm": 1.3452690507379055, "learning_rate": 3.0574027508498783e-06, "loss": 0.6131, "step": 6250 }, { "epoch": 0.64, "grad_norm": 1.5489694502437843, "learning_rate": 3.0558791226333974e-06, "loss": 0.6508, "step": 6251 }, { "epoch": 0.64, "grad_norm": 1.5218662116814203, "learning_rate": 3.0543557070678193e-06, "loss": 0.6766, "step": 6252 }, { "epoch": 0.64, "grad_norm": 1.4250055835657354, "learning_rate": 3.0528325043197786e-06, "loss": 0.6022, "step": 6253 }, { "epoch": 0.64, "grad_norm": 1.3472367513457686, "learning_rate": 3.051309514555883e-06, "loss": 0.632, "step": 6254 }, { "epoch": 0.64, "grad_norm": 1.817459936080279, "learning_rate": 3.0497867379427194e-06, "loss": 0.6786, "step": 6255 }, { "epoch": 0.64, "grad_norm": 1.5515223373006553, "learning_rate": 3.048264174646855e-06, "loss": 0.6754, "step": 6256 }, { "epoch": 0.64, "grad_norm": 1.7414372133180556, "learning_rate": 3.046741824834827e-06, "loss": 0.6907, "step": 6257 }, { "epoch": 0.64, "grad_norm": 1.4917512242619566, "learning_rate": 3.0452196886731523e-06, "loss": 0.7181, "step": 6258 }, { "epoch": 0.64, "grad_norm": 1.6212421307524614, "learning_rate": 3.0436977663283264e-06, "loss": 0.5656, "step": 6259 }, { "epoch": 0.64, "grad_norm": 1.4409348626950984, "learning_rate": 3.042176057966816e-06, "loss": 0.6839, "step": 6260 }, { "epoch": 0.64, "grad_norm": 1.5489816331349733, "learning_rate": 3.0406545637550723e-06, "loss": 0.6, "step": 6261 }, { "epoch": 0.64, "grad_norm": 1.5155183083760808, "learning_rate": 3.0391332838595156e-06, "loss": 0.7142, "step": 6262 }, { "epoch": 0.64, "grad_norm": 1.2539267257848774, "learning_rate": 3.037612218446546e-06, "loss": 0.612, "step": 6263 }, { "epoch": 0.64, "grad_norm": 1.4528222935279693, "learning_rate": 3.036091367682543e-06, "loss": 0.6157, "step": 6264 }, { "epoch": 0.64, "grad_norm": 1.528404358838542, "learning_rate": 3.0345707317338545e-06, "loss": 0.7379, "step": 6265 }, { "epoch": 0.64, "grad_norm": 1.340697413058255, "learning_rate": 3.0330503107668136e-06, "loss": 0.6375, "step": 6266 }, { "epoch": 0.64, "grad_norm": 1.4883408493215926, "learning_rate": 3.0315301049477266e-06, "loss": 0.6826, "step": 6267 }, { "epoch": 0.64, "grad_norm": 2.0158519549131864, "learning_rate": 3.030010114442874e-06, "loss": 0.6472, "step": 6268 }, { "epoch": 0.64, "grad_norm": 1.5336245731168359, "learning_rate": 3.028490339418514e-06, "loss": 0.6194, "step": 6269 }, { "epoch": 0.64, "grad_norm": 1.2719711987790407, "learning_rate": 3.026970780040885e-06, "loss": 0.5145, "step": 6270 }, { "epoch": 0.64, "grad_norm": 1.2777176944614004, "learning_rate": 3.025451436476196e-06, "loss": 0.5047, "step": 6271 }, { "epoch": 0.64, "grad_norm": 1.5212493266306166, "learning_rate": 3.023932308890636e-06, "loss": 0.7336, "step": 6272 }, { "epoch": 0.64, "grad_norm": 1.5711774761042825, "learning_rate": 3.0224133974503715e-06, "loss": 0.6497, "step": 6273 }, { "epoch": 0.64, "grad_norm": 1.8120747938276456, "learning_rate": 3.020894702321539e-06, "loss": 0.6532, "step": 6274 }, { "epoch": 0.64, "grad_norm": 3.3813773416570356, "learning_rate": 3.0193762236702602e-06, "loss": 0.5601, "step": 6275 }, { "epoch": 0.64, "grad_norm": 1.7123634455232395, "learning_rate": 3.0178579616626256e-06, "loss": 0.73, "step": 6276 }, { "epoch": 0.64, "grad_norm": 1.587121985388908, "learning_rate": 3.0163399164647058e-06, "loss": 0.6079, "step": 6277 }, { "epoch": 0.64, "grad_norm": 1.436583448716487, "learning_rate": 3.014822088242548e-06, "loss": 0.6788, "step": 6278 }, { "epoch": 0.64, "grad_norm": 1.5761761182974692, "learning_rate": 3.0133044771621713e-06, "loss": 0.5606, "step": 6279 }, { "epoch": 0.64, "grad_norm": 1.4811370860964195, "learning_rate": 3.011787083389578e-06, "loss": 0.6458, "step": 6280 }, { "epoch": 0.64, "grad_norm": 1.5360043571824757, "learning_rate": 3.0102699070907426e-06, "loss": 0.595, "step": 6281 }, { "epoch": 0.64, "grad_norm": 1.5124173703609927, "learning_rate": 3.008752948431614e-06, "loss": 0.6398, "step": 6282 }, { "epoch": 0.64, "grad_norm": 1.5185563224278174, "learning_rate": 3.00723620757812e-06, "loss": 0.6847, "step": 6283 }, { "epoch": 0.64, "grad_norm": 1.451724497004235, "learning_rate": 3.0057196846961663e-06, "loss": 0.7183, "step": 6284 }, { "epoch": 0.64, "grad_norm": 1.4292575121038171, "learning_rate": 3.0042033799516297e-06, "loss": 0.6036, "step": 6285 }, { "epoch": 0.64, "grad_norm": 1.414393106779586, "learning_rate": 3.0026872935103658e-06, "loss": 0.6473, "step": 6286 }, { "epoch": 0.64, "grad_norm": 1.3904924576191362, "learning_rate": 3.001171425538211e-06, "loss": 0.6445, "step": 6287 }, { "epoch": 0.64, "grad_norm": 1.340809497690376, "learning_rate": 2.9996557762009665e-06, "loss": 0.5911, "step": 6288 }, { "epoch": 0.64, "grad_norm": 1.4795598555537177, "learning_rate": 2.998140345664422e-06, "loss": 0.6723, "step": 6289 }, { "epoch": 0.64, "grad_norm": 1.3429906856825558, "learning_rate": 2.996625134094334e-06, "loss": 0.6563, "step": 6290 }, { "epoch": 0.64, "grad_norm": 1.7776727748091583, "learning_rate": 2.9951101416564405e-06, "loss": 0.668, "step": 6291 }, { "epoch": 0.64, "grad_norm": 1.413735252732777, "learning_rate": 2.9935953685164544e-06, "loss": 0.6716, "step": 6292 }, { "epoch": 0.64, "grad_norm": 1.46228980664125, "learning_rate": 2.992080814840059e-06, "loss": 0.579, "step": 6293 }, { "epoch": 0.64, "grad_norm": 1.5718740873283976, "learning_rate": 2.9905664807929247e-06, "loss": 0.6378, "step": 6294 }, { "epoch": 0.64, "grad_norm": 1.414033008526787, "learning_rate": 2.98905236654069e-06, "loss": 0.712, "step": 6295 }, { "epoch": 0.64, "grad_norm": 1.5550813454475438, "learning_rate": 2.987538472248969e-06, "loss": 0.6864, "step": 6296 }, { "epoch": 0.64, "grad_norm": 1.3453084844498067, "learning_rate": 2.9860247980833534e-06, "loss": 0.627, "step": 6297 }, { "epoch": 0.64, "grad_norm": 2.733162757049139, "learning_rate": 2.984511344209415e-06, "loss": 0.6007, "step": 6298 }, { "epoch": 0.64, "grad_norm": 1.601210546433336, "learning_rate": 2.9829981107926943e-06, "loss": 0.6104, "step": 6299 }, { "epoch": 0.64, "grad_norm": 1.6118117545123167, "learning_rate": 2.981485097998711e-06, "loss": 0.6515, "step": 6300 }, { "epoch": 0.64, "grad_norm": 1.401040368103085, "learning_rate": 2.9799723059929654e-06, "loss": 0.5989, "step": 6301 }, { "epoch": 0.64, "grad_norm": 1.4270363442980343, "learning_rate": 2.9784597349409217e-06, "loss": 0.5874, "step": 6302 }, { "epoch": 0.64, "grad_norm": 1.4885367920552874, "learning_rate": 2.9769473850080333e-06, "loss": 0.6294, "step": 6303 }, { "epoch": 0.64, "grad_norm": 1.4716835417979641, "learning_rate": 2.97543525635972e-06, "loss": 0.674, "step": 6304 }, { "epoch": 0.64, "grad_norm": 1.5993043396888689, "learning_rate": 2.9739233491613813e-06, "loss": 0.6515, "step": 6305 }, { "epoch": 0.64, "grad_norm": 1.5554513779983121, "learning_rate": 2.972411663578395e-06, "loss": 0.6448, "step": 6306 }, { "epoch": 0.64, "grad_norm": 1.4887979401972986, "learning_rate": 2.9709001997761056e-06, "loss": 0.6096, "step": 6307 }, { "epoch": 0.64, "grad_norm": 1.4804013935031177, "learning_rate": 2.9693889579198443e-06, "loss": 0.6661, "step": 6308 }, { "epoch": 0.64, "grad_norm": 1.3007915144159736, "learning_rate": 2.9678779381749117e-06, "loss": 0.6247, "step": 6309 }, { "epoch": 0.64, "grad_norm": 1.4198761490833345, "learning_rate": 2.9663671407065846e-06, "loss": 0.5608, "step": 6310 }, { "epoch": 0.64, "grad_norm": 1.555412922562676, "learning_rate": 2.964856565680116e-06, "loss": 0.651, "step": 6311 }, { "epoch": 0.64, "grad_norm": 1.444412870129231, "learning_rate": 2.963346213260737e-06, "loss": 0.6643, "step": 6312 }, { "epoch": 0.64, "grad_norm": 1.4565123992624334, "learning_rate": 2.961836083613651e-06, "loss": 0.6201, "step": 6313 }, { "epoch": 0.64, "grad_norm": 1.5263634477939372, "learning_rate": 2.960326176904037e-06, "loss": 0.6902, "step": 6314 }, { "epoch": 0.64, "grad_norm": 1.4883610821162112, "learning_rate": 2.9588164932970553e-06, "loss": 0.6016, "step": 6315 }, { "epoch": 0.64, "grad_norm": 1.3737398832810428, "learning_rate": 2.957307032957831e-06, "loss": 0.6945, "step": 6316 }, { "epoch": 0.65, "grad_norm": 1.4340692123337537, "learning_rate": 2.955797796051476e-06, "loss": 0.6757, "step": 6317 }, { "epoch": 0.65, "grad_norm": 1.5572026965363044, "learning_rate": 2.954288782743071e-06, "loss": 0.8165, "step": 6318 }, { "epoch": 0.65, "grad_norm": 1.411047667528869, "learning_rate": 2.9527799931976734e-06, "loss": 0.5891, "step": 6319 }, { "epoch": 0.65, "grad_norm": 1.4075446864716346, "learning_rate": 2.951271427580321e-06, "loss": 0.7115, "step": 6320 }, { "epoch": 0.65, "grad_norm": 1.469029755188453, "learning_rate": 2.949763086056018e-06, "loss": 0.6258, "step": 6321 }, { "epoch": 0.65, "grad_norm": 1.3039276704520963, "learning_rate": 2.948254968789751e-06, "loss": 0.5572, "step": 6322 }, { "epoch": 0.65, "grad_norm": 1.3968953259612733, "learning_rate": 2.9467470759464813e-06, "loss": 0.5968, "step": 6323 }, { "epoch": 0.65, "grad_norm": 1.5705184733306863, "learning_rate": 2.945239407691143e-06, "loss": 0.7519, "step": 6324 }, { "epoch": 0.65, "grad_norm": 1.3539599676743508, "learning_rate": 2.9437319641886463e-06, "loss": 0.614, "step": 6325 }, { "epoch": 0.65, "grad_norm": 1.5006239160237411, "learning_rate": 2.942224745603882e-06, "loss": 0.7167, "step": 6326 }, { "epoch": 0.65, "grad_norm": 1.4779028869032724, "learning_rate": 2.940717752101707e-06, "loss": 0.6786, "step": 6327 }, { "epoch": 0.65, "grad_norm": 1.2124658121747873, "learning_rate": 2.9392109838469596e-06, "loss": 0.5918, "step": 6328 }, { "epoch": 0.65, "grad_norm": 1.567053072957663, "learning_rate": 2.937704441004457e-06, "loss": 0.6448, "step": 6329 }, { "epoch": 0.65, "grad_norm": 1.3399445341282785, "learning_rate": 2.9361981237389804e-06, "loss": 0.6339, "step": 6330 }, { "epoch": 0.65, "grad_norm": 1.3089833723566615, "learning_rate": 2.934692032215298e-06, "loss": 0.5873, "step": 6331 }, { "epoch": 0.65, "grad_norm": 1.403409472583162, "learning_rate": 2.933186166598145e-06, "loss": 0.6133, "step": 6332 }, { "epoch": 0.65, "grad_norm": 1.375856750255954, "learning_rate": 2.9316805270522365e-06, "loss": 0.5449, "step": 6333 }, { "epoch": 0.65, "grad_norm": 1.5225789019494724, "learning_rate": 2.9301751137422653e-06, "loss": 0.6407, "step": 6334 }, { "epoch": 0.65, "grad_norm": 1.4347263269388426, "learning_rate": 2.9286699268328887e-06, "loss": 0.7306, "step": 6335 }, { "epoch": 0.65, "grad_norm": 1.6445081787251334, "learning_rate": 2.927164966488752e-06, "loss": 0.6363, "step": 6336 }, { "epoch": 0.65, "grad_norm": 2.539172277024234, "learning_rate": 2.9256602328744687e-06, "loss": 0.6933, "step": 6337 }, { "epoch": 0.65, "grad_norm": 1.60392829308606, "learning_rate": 2.924155726154626e-06, "loss": 0.6722, "step": 6338 }, { "epoch": 0.65, "grad_norm": 1.7707133415062768, "learning_rate": 2.9226514464937934e-06, "loss": 0.6166, "step": 6339 }, { "epoch": 0.65, "grad_norm": 1.4619017092371782, "learning_rate": 2.9211473940565094e-06, "loss": 0.6468, "step": 6340 }, { "epoch": 0.65, "grad_norm": 1.2202129044703032, "learning_rate": 2.9196435690072887e-06, "loss": 0.5681, "step": 6341 }, { "epoch": 0.65, "grad_norm": 1.6727353367043045, "learning_rate": 2.918139971510624e-06, "loss": 0.7062, "step": 6342 }, { "epoch": 0.65, "grad_norm": 1.2501683816046059, "learning_rate": 2.91663660173098e-06, "loss": 0.5831, "step": 6343 }, { "epoch": 0.65, "grad_norm": 1.3542105727756104, "learning_rate": 2.9151334598327954e-06, "loss": 0.6961, "step": 6344 }, { "epoch": 0.65, "grad_norm": 1.4566736444191226, "learning_rate": 2.91363054598049e-06, "loss": 0.566, "step": 6345 }, { "epoch": 0.65, "grad_norm": 1.4853807206836653, "learning_rate": 2.9121278603384514e-06, "loss": 0.6034, "step": 6346 }, { "epoch": 0.65, "grad_norm": 1.5315635216940613, "learning_rate": 2.910625403071049e-06, "loss": 0.7235, "step": 6347 }, { "epoch": 0.65, "grad_norm": 1.3218010262048516, "learning_rate": 2.9091231743426217e-06, "loss": 0.6418, "step": 6348 }, { "epoch": 0.65, "grad_norm": 1.5056783080445262, "learning_rate": 2.907621174317485e-06, "loss": 0.6766, "step": 6349 }, { "epoch": 0.65, "grad_norm": 1.4827876681934709, "learning_rate": 2.9061194031599306e-06, "loss": 0.6033, "step": 6350 }, { "epoch": 0.65, "grad_norm": 1.4581159279516782, "learning_rate": 2.904617861034227e-06, "loss": 0.5549, "step": 6351 }, { "epoch": 0.65, "grad_norm": 1.4741349335490208, "learning_rate": 2.9031165481046125e-06, "loss": 0.7414, "step": 6352 }, { "epoch": 0.65, "grad_norm": 1.4065088088823552, "learning_rate": 2.9016154645353016e-06, "loss": 0.6041, "step": 6353 }, { "epoch": 0.65, "grad_norm": 1.4953897303142585, "learning_rate": 2.90011461049049e-06, "loss": 0.6485, "step": 6354 }, { "epoch": 0.65, "grad_norm": 1.6431570628091234, "learning_rate": 2.898613986134337e-06, "loss": 0.7029, "step": 6355 }, { "epoch": 0.65, "grad_norm": 1.5670291532887424, "learning_rate": 2.8971135916309896e-06, "loss": 0.6472, "step": 6356 }, { "epoch": 0.65, "grad_norm": 1.5462951142343346, "learning_rate": 2.8956134271445597e-06, "loss": 0.7316, "step": 6357 }, { "epoch": 0.65, "grad_norm": 1.4583377898924208, "learning_rate": 2.8941134928391363e-06, "loss": 0.7027, "step": 6358 }, { "epoch": 0.65, "grad_norm": 1.5851602102973859, "learning_rate": 2.8926137888787887e-06, "loss": 0.7219, "step": 6359 }, { "epoch": 0.65, "grad_norm": 1.497551008292902, "learning_rate": 2.891114315427552e-06, "loss": 0.5623, "step": 6360 }, { "epoch": 0.65, "grad_norm": 1.5680396118952158, "learning_rate": 2.8896150726494454e-06, "loss": 0.6594, "step": 6361 }, { "epoch": 0.65, "grad_norm": 1.4924551323631774, "learning_rate": 2.8881160607084556e-06, "loss": 0.772, "step": 6362 }, { "epoch": 0.65, "grad_norm": 1.3714387940395967, "learning_rate": 2.8866172797685466e-06, "loss": 0.6036, "step": 6363 }, { "epoch": 0.65, "grad_norm": 1.5172974963951098, "learning_rate": 2.885118729993658e-06, "loss": 0.633, "step": 6364 }, { "epoch": 0.65, "grad_norm": 1.5148107811157603, "learning_rate": 2.8836204115477053e-06, "loss": 0.6868, "step": 6365 }, { "epoch": 0.65, "grad_norm": 1.4670008063491322, "learning_rate": 2.882122324594575e-06, "loss": 0.6264, "step": 6366 }, { "epoch": 0.65, "grad_norm": 1.4447329989203221, "learning_rate": 2.880624469298129e-06, "loss": 0.6416, "step": 6367 }, { "epoch": 0.65, "grad_norm": 1.4126105549580177, "learning_rate": 2.8791268458222076e-06, "loss": 0.5865, "step": 6368 }, { "epoch": 0.65, "grad_norm": 1.7004421230306175, "learning_rate": 2.8776294543306204e-06, "loss": 0.712, "step": 6369 }, { "epoch": 0.65, "grad_norm": 1.566444906793875, "learning_rate": 2.8761322949871584e-06, "loss": 0.6719, "step": 6370 }, { "epoch": 0.65, "grad_norm": 1.2795309447156162, "learning_rate": 2.8746353679555794e-06, "loss": 0.6507, "step": 6371 }, { "epoch": 0.65, "grad_norm": 2.4333965941189453, "learning_rate": 2.8731386733996196e-06, "loss": 0.6099, "step": 6372 }, { "epoch": 0.65, "grad_norm": 1.3994013283312903, "learning_rate": 2.8716422114829927e-06, "loss": 0.7121, "step": 6373 }, { "epoch": 0.65, "grad_norm": 1.3750814133981843, "learning_rate": 2.87014598236938e-06, "loss": 0.6168, "step": 6374 }, { "epoch": 0.65, "grad_norm": 1.5592893998476691, "learning_rate": 2.868649986222446e-06, "loss": 0.691, "step": 6375 }, { "epoch": 0.65, "grad_norm": 1.6308798389281525, "learning_rate": 2.8671542232058226e-06, "loss": 0.6036, "step": 6376 }, { "epoch": 0.65, "grad_norm": 1.6407636190696575, "learning_rate": 2.865658693483116e-06, "loss": 0.6185, "step": 6377 }, { "epoch": 0.65, "grad_norm": 1.1930997329468196, "learning_rate": 2.864163397217913e-06, "loss": 0.6108, "step": 6378 }, { "epoch": 0.65, "grad_norm": 1.4496425873543948, "learning_rate": 2.8626683345737715e-06, "loss": 0.5553, "step": 6379 }, { "epoch": 0.65, "grad_norm": 1.303641402245621, "learning_rate": 2.861173505714223e-06, "loss": 0.5946, "step": 6380 }, { "epoch": 0.65, "grad_norm": 1.5511346020028212, "learning_rate": 2.8596789108027724e-06, "loss": 0.6072, "step": 6381 }, { "epoch": 0.65, "grad_norm": 1.6543822140073503, "learning_rate": 2.8581845500029037e-06, "loss": 0.5823, "step": 6382 }, { "epoch": 0.65, "grad_norm": 1.6056104642149907, "learning_rate": 2.856690423478069e-06, "loss": 0.6493, "step": 6383 }, { "epoch": 0.65, "grad_norm": 1.3304095013172106, "learning_rate": 2.855196531391702e-06, "loss": 0.5336, "step": 6384 }, { "epoch": 0.65, "grad_norm": 1.5029266002771515, "learning_rate": 2.853702873907205e-06, "loss": 0.6649, "step": 6385 }, { "epoch": 0.65, "grad_norm": 1.3715790396529084, "learning_rate": 2.8522094511879537e-06, "loss": 0.6924, "step": 6386 }, { "epoch": 0.65, "grad_norm": 1.4089921807095824, "learning_rate": 2.8507162633973066e-06, "loss": 0.6604, "step": 6387 }, { "epoch": 0.65, "grad_norm": 1.5364000852565922, "learning_rate": 2.849223310698586e-06, "loss": 0.643, "step": 6388 }, { "epoch": 0.65, "grad_norm": 1.6113926687075462, "learning_rate": 2.847730593255097e-06, "loss": 0.6822, "step": 6389 }, { "epoch": 0.65, "grad_norm": 1.733469933277628, "learning_rate": 2.846238111230114e-06, "loss": 0.6705, "step": 6390 }, { "epoch": 0.65, "grad_norm": 1.3036145701304962, "learning_rate": 2.8447458647868843e-06, "loss": 0.5658, "step": 6391 }, { "epoch": 0.65, "grad_norm": 1.4357321687734759, "learning_rate": 2.8432538540886344e-06, "loss": 0.5777, "step": 6392 }, { "epoch": 0.65, "grad_norm": 1.3607664827376587, "learning_rate": 2.8417620792985645e-06, "loss": 0.5915, "step": 6393 }, { "epoch": 0.65, "grad_norm": 1.342993538227548, "learning_rate": 2.8402705405798457e-06, "loss": 0.5339, "step": 6394 }, { "epoch": 0.65, "grad_norm": 1.5724888958604522, "learning_rate": 2.8387792380956225e-06, "loss": 0.6222, "step": 6395 }, { "epoch": 0.65, "grad_norm": 1.4727094471509237, "learning_rate": 2.8372881720090194e-06, "loss": 0.6125, "step": 6396 }, { "epoch": 0.65, "grad_norm": 1.41673638548374, "learning_rate": 2.8357973424831276e-06, "loss": 0.6621, "step": 6397 }, { "epoch": 0.65, "grad_norm": 1.6304850824721961, "learning_rate": 2.834306749681021e-06, "loss": 0.6256, "step": 6398 }, { "epoch": 0.65, "grad_norm": 2.4937991379696425, "learning_rate": 2.8328163937657405e-06, "loss": 0.6423, "step": 6399 }, { "epoch": 0.65, "grad_norm": 1.3778467818882234, "learning_rate": 2.8313262749003013e-06, "loss": 0.5944, "step": 6400 }, { "epoch": 0.65, "grad_norm": 1.3358301870343534, "learning_rate": 2.8298363932476993e-06, "loss": 0.6749, "step": 6401 }, { "epoch": 0.65, "grad_norm": 1.4613345049850666, "learning_rate": 2.8283467489708953e-06, "loss": 0.5834, "step": 6402 }, { "epoch": 0.65, "grad_norm": 1.5490841239928823, "learning_rate": 2.8268573422328337e-06, "loss": 0.5803, "step": 6403 }, { "epoch": 0.65, "grad_norm": 1.52289402272259, "learning_rate": 2.8253681731964254e-06, "loss": 0.5964, "step": 6404 }, { "epoch": 0.65, "grad_norm": 1.3078782636919881, "learning_rate": 2.8238792420245564e-06, "loss": 0.6052, "step": 6405 }, { "epoch": 0.65, "grad_norm": 1.3343879508704823, "learning_rate": 2.822390548880091e-06, "loss": 0.5986, "step": 6406 }, { "epoch": 0.65, "grad_norm": 1.3638945857557625, "learning_rate": 2.820902093925865e-06, "loss": 0.6973, "step": 6407 }, { "epoch": 0.65, "grad_norm": 1.5802281447040045, "learning_rate": 2.8194138773246867e-06, "loss": 0.8419, "step": 6408 }, { "epoch": 0.65, "grad_norm": 1.560846393073594, "learning_rate": 2.817925899239338e-06, "loss": 0.6105, "step": 6409 }, { "epoch": 0.65, "grad_norm": 1.3012418091249802, "learning_rate": 2.8164381598325786e-06, "loss": 0.5908, "step": 6410 }, { "epoch": 0.65, "grad_norm": 1.287676683401651, "learning_rate": 2.8149506592671377e-06, "loss": 0.6089, "step": 6411 }, { "epoch": 0.65, "grad_norm": 1.568949603644473, "learning_rate": 2.8134633977057236e-06, "loss": 0.6772, "step": 6412 }, { "epoch": 0.65, "grad_norm": 1.4973883682222577, "learning_rate": 2.811976375311013e-06, "loss": 0.6379, "step": 6413 }, { "epoch": 0.65, "grad_norm": 1.640155094910212, "learning_rate": 2.8104895922456565e-06, "loss": 0.6954, "step": 6414 }, { "epoch": 0.66, "grad_norm": 1.4865765889084956, "learning_rate": 2.809003048672285e-06, "loss": 0.6297, "step": 6415 }, { "epoch": 0.66, "grad_norm": 1.7309858125850357, "learning_rate": 2.8075167447534955e-06, "loss": 0.6224, "step": 6416 }, { "epoch": 0.66, "grad_norm": 1.5291352744587434, "learning_rate": 2.8060306806518658e-06, "loss": 0.6892, "step": 6417 }, { "epoch": 0.66, "grad_norm": 1.3756272766659532, "learning_rate": 2.804544856529941e-06, "loss": 0.5774, "step": 6418 }, { "epoch": 0.66, "grad_norm": 1.425628601205826, "learning_rate": 2.8030592725502415e-06, "loss": 0.6158, "step": 6419 }, { "epoch": 0.66, "grad_norm": 1.563697631553271, "learning_rate": 2.8015739288752653e-06, "loss": 0.7038, "step": 6420 }, { "epoch": 0.66, "grad_norm": 1.438640574959616, "learning_rate": 2.8000888256674824e-06, "loss": 0.6863, "step": 6421 }, { "epoch": 0.66, "grad_norm": 1.5076811927767588, "learning_rate": 2.7986039630893347e-06, "loss": 0.6201, "step": 6422 }, { "epoch": 0.66, "grad_norm": 1.22837530023094, "learning_rate": 2.797119341303236e-06, "loss": 0.578, "step": 6423 }, { "epoch": 0.66, "grad_norm": 1.3778887866555907, "learning_rate": 2.7956349604715802e-06, "loss": 0.5722, "step": 6424 }, { "epoch": 0.66, "grad_norm": 1.405823211646825, "learning_rate": 2.794150820756728e-06, "loss": 0.6141, "step": 6425 }, { "epoch": 0.66, "grad_norm": 1.4976640638125895, "learning_rate": 2.7926669223210213e-06, "loss": 0.6658, "step": 6426 }, { "epoch": 0.66, "grad_norm": 1.6406551973680021, "learning_rate": 2.7911832653267673e-06, "loss": 0.7101, "step": 6427 }, { "epoch": 0.66, "grad_norm": 1.3960284265852794, "learning_rate": 2.7896998499362504e-06, "loss": 0.5347, "step": 6428 }, { "epoch": 0.66, "grad_norm": 1.4791885153642796, "learning_rate": 2.788216676311732e-06, "loss": 0.6308, "step": 6429 }, { "epoch": 0.66, "grad_norm": 1.5556897687433235, "learning_rate": 2.7867337446154397e-06, "loss": 0.697, "step": 6430 }, { "epoch": 0.66, "grad_norm": 1.6070687681068778, "learning_rate": 2.785251055009584e-06, "loss": 0.6963, "step": 6431 }, { "epoch": 0.66, "grad_norm": 1.2873850657249248, "learning_rate": 2.783768607656341e-06, "loss": 0.6259, "step": 6432 }, { "epoch": 0.66, "grad_norm": 1.1802258093690086, "learning_rate": 2.7822864027178596e-06, "loss": 0.5572, "step": 6433 }, { "epoch": 0.66, "grad_norm": 1.4793626543894682, "learning_rate": 2.7808044403562707e-06, "loss": 0.6703, "step": 6434 }, { "epoch": 0.66, "grad_norm": 1.5247631152721803, "learning_rate": 2.7793227207336733e-06, "loss": 0.569, "step": 6435 }, { "epoch": 0.66, "grad_norm": 1.6395449794241632, "learning_rate": 2.7778412440121384e-06, "loss": 0.6653, "step": 6436 }, { "epoch": 0.66, "grad_norm": 1.5004288255295772, "learning_rate": 2.7763600103537114e-06, "loss": 0.5748, "step": 6437 }, { "epoch": 0.66, "grad_norm": 1.4030830863580155, "learning_rate": 2.7748790199204145e-06, "loss": 0.6309, "step": 6438 }, { "epoch": 0.66, "grad_norm": 1.6037942965113123, "learning_rate": 2.773398272874237e-06, "loss": 0.6677, "step": 6439 }, { "epoch": 0.66, "grad_norm": 1.472220556153932, "learning_rate": 2.7719177693771504e-06, "loss": 0.6778, "step": 6440 }, { "epoch": 0.66, "grad_norm": 1.440883227003682, "learning_rate": 2.7704375095910907e-06, "loss": 0.6669, "step": 6441 }, { "epoch": 0.66, "grad_norm": 1.4842045496235856, "learning_rate": 2.7689574936779706e-06, "loss": 0.6411, "step": 6442 }, { "epoch": 0.66, "grad_norm": 1.5194296257349331, "learning_rate": 2.7674777217996797e-06, "loss": 0.7353, "step": 6443 }, { "epoch": 0.66, "grad_norm": 1.5473086821803406, "learning_rate": 2.765998194118073e-06, "loss": 0.61, "step": 6444 }, { "epoch": 0.66, "grad_norm": 1.4812536938092689, "learning_rate": 2.764518910794989e-06, "loss": 0.6107, "step": 6445 }, { "epoch": 0.66, "grad_norm": 1.5795983784263374, "learning_rate": 2.76303987199223e-06, "loss": 0.6699, "step": 6446 }, { "epoch": 0.66, "grad_norm": 1.7481974775540285, "learning_rate": 2.761561077871576e-06, "loss": 0.6407, "step": 6447 }, { "epoch": 0.66, "grad_norm": 1.4898687101908692, "learning_rate": 2.7600825285947795e-06, "loss": 0.6169, "step": 6448 }, { "epoch": 0.66, "grad_norm": 1.7830962728267818, "learning_rate": 2.75860422432357e-06, "loss": 0.641, "step": 6449 }, { "epoch": 0.66, "grad_norm": 1.700340972802717, "learning_rate": 2.7571261652196435e-06, "loss": 0.6001, "step": 6450 }, { "epoch": 0.66, "grad_norm": 1.575560370994334, "learning_rate": 2.755648351444671e-06, "loss": 0.6724, "step": 6451 }, { "epoch": 0.66, "grad_norm": 1.4465337626565267, "learning_rate": 2.7541707831603025e-06, "loss": 0.652, "step": 6452 }, { "epoch": 0.66, "grad_norm": 1.7301157760535928, "learning_rate": 2.752693460528152e-06, "loss": 0.7344, "step": 6453 }, { "epoch": 0.66, "grad_norm": 1.7407121995897943, "learning_rate": 2.7512163837098156e-06, "loss": 0.6916, "step": 6454 }, { "epoch": 0.66, "grad_norm": 1.4021654151094294, "learning_rate": 2.7497395528668557e-06, "loss": 0.713, "step": 6455 }, { "epoch": 0.66, "grad_norm": 1.5027364347292254, "learning_rate": 2.7482629681608087e-06, "loss": 0.7174, "step": 6456 }, { "epoch": 0.66, "grad_norm": 1.7353971637894214, "learning_rate": 2.74678662975319e-06, "loss": 0.7359, "step": 6457 }, { "epoch": 0.66, "grad_norm": 1.4655623415962165, "learning_rate": 2.745310537805479e-06, "loss": 0.5954, "step": 6458 }, { "epoch": 0.66, "grad_norm": 1.693494550392666, "learning_rate": 2.743834692479137e-06, "loss": 0.727, "step": 6459 }, { "epoch": 0.66, "grad_norm": 1.5686069816563486, "learning_rate": 2.7423590939355927e-06, "loss": 0.6531, "step": 6460 }, { "epoch": 0.66, "grad_norm": 1.313000123891892, "learning_rate": 2.740883742336247e-06, "loss": 0.5415, "step": 6461 }, { "epoch": 0.66, "grad_norm": 1.7821617945425408, "learning_rate": 2.7394086378424787e-06, "loss": 0.6962, "step": 6462 }, { "epoch": 0.66, "grad_norm": 1.4841332207758517, "learning_rate": 2.737933780615638e-06, "loss": 0.6147, "step": 6463 }, { "epoch": 0.66, "grad_norm": 1.4684551298613155, "learning_rate": 2.7364591708170457e-06, "loss": 0.7071, "step": 6464 }, { "epoch": 0.66, "grad_norm": 1.4107700564794454, "learning_rate": 2.7349848086079965e-06, "loss": 0.5648, "step": 6465 }, { "epoch": 0.66, "grad_norm": 1.5720096179059075, "learning_rate": 2.733510694149757e-06, "loss": 0.6713, "step": 6466 }, { "epoch": 0.66, "grad_norm": 1.55552650110587, "learning_rate": 2.732036827603569e-06, "loss": 0.6527, "step": 6467 }, { "epoch": 0.66, "grad_norm": 1.5075184880933323, "learning_rate": 2.730563209130649e-06, "loss": 0.6502, "step": 6468 }, { "epoch": 0.66, "grad_norm": 1.47456645982568, "learning_rate": 2.7290898388921815e-06, "loss": 0.6285, "step": 6469 }, { "epoch": 0.66, "grad_norm": 1.4146208042128523, "learning_rate": 2.727616717049325e-06, "loss": 0.595, "step": 6470 }, { "epoch": 0.66, "grad_norm": 2.329409026015677, "learning_rate": 2.726143843763214e-06, "loss": 0.7682, "step": 6471 }, { "epoch": 0.66, "grad_norm": 1.5582052000979771, "learning_rate": 2.7246712191949504e-06, "loss": 0.7234, "step": 6472 }, { "epoch": 0.66, "grad_norm": 1.5519612891741574, "learning_rate": 2.7231988435056166e-06, "loss": 0.7168, "step": 6473 }, { "epoch": 0.66, "grad_norm": 1.3686089530975571, "learning_rate": 2.72172671685626e-06, "loss": 0.5848, "step": 6474 }, { "epoch": 0.66, "grad_norm": 1.4131262984549464, "learning_rate": 2.7202548394079038e-06, "loss": 0.5584, "step": 6475 }, { "epoch": 0.66, "grad_norm": 1.5012222000114122, "learning_rate": 2.718783211321545e-06, "loss": 0.6406, "step": 6476 }, { "epoch": 0.66, "grad_norm": 1.5512694976025823, "learning_rate": 2.717311832758155e-06, "loss": 0.7459, "step": 6477 }, { "epoch": 0.66, "grad_norm": 1.6665516025562215, "learning_rate": 2.715840703878673e-06, "loss": 0.625, "step": 6478 }, { "epoch": 0.66, "grad_norm": 1.3853812775358385, "learning_rate": 2.7143698248440137e-06, "loss": 0.5721, "step": 6479 }, { "epoch": 0.66, "grad_norm": 1.6731172598148734, "learning_rate": 2.7128991958150617e-06, "loss": 0.6824, "step": 6480 }, { "epoch": 0.66, "grad_norm": 1.610102797701347, "learning_rate": 2.711428816952679e-06, "loss": 0.6685, "step": 6481 }, { "epoch": 0.66, "grad_norm": 1.3810185757167002, "learning_rate": 2.7099586884177003e-06, "loss": 0.6735, "step": 6482 }, { "epoch": 0.66, "grad_norm": 1.339133499132961, "learning_rate": 2.7084888103709274e-06, "loss": 0.5526, "step": 6483 }, { "epoch": 0.66, "grad_norm": 1.2230560660758871, "learning_rate": 2.7070191829731367e-06, "loss": 0.5901, "step": 6484 }, { "epoch": 0.66, "grad_norm": 1.4821911734560933, "learning_rate": 2.705549806385082e-06, "loss": 0.7374, "step": 6485 }, { "epoch": 0.66, "grad_norm": 1.5121979631274494, "learning_rate": 2.704080680767482e-06, "loss": 0.6507, "step": 6486 }, { "epoch": 0.66, "grad_norm": 1.5138068864666399, "learning_rate": 2.702611806281036e-06, "loss": 0.6281, "step": 6487 }, { "epoch": 0.66, "grad_norm": 1.4479670885450548, "learning_rate": 2.7011431830864087e-06, "loss": 0.5499, "step": 6488 }, { "epoch": 0.66, "grad_norm": 1.3368422244638316, "learning_rate": 2.6996748113442397e-06, "loss": 0.5925, "step": 6489 }, { "epoch": 0.66, "grad_norm": 1.7121584464943027, "learning_rate": 2.6982066912151435e-06, "loss": 0.6309, "step": 6490 }, { "epoch": 0.66, "grad_norm": 1.7070140642717813, "learning_rate": 2.696738822859707e-06, "loss": 0.6795, "step": 6491 }, { "epoch": 0.66, "grad_norm": 1.4895793981470213, "learning_rate": 2.6952712064384855e-06, "loss": 0.6334, "step": 6492 }, { "epoch": 0.66, "grad_norm": 1.3468808328137127, "learning_rate": 2.693803842112009e-06, "loss": 0.6288, "step": 6493 }, { "epoch": 0.66, "grad_norm": 1.3914552728632776, "learning_rate": 2.69233673004078e-06, "loss": 0.6095, "step": 6494 }, { "epoch": 0.66, "grad_norm": 1.6816894376487477, "learning_rate": 2.690869870385273e-06, "loss": 0.6722, "step": 6495 }, { "epoch": 0.66, "grad_norm": 1.489482875625957, "learning_rate": 2.689403263305939e-06, "loss": 0.648, "step": 6496 }, { "epoch": 0.66, "grad_norm": 1.5702947372160103, "learning_rate": 2.6879369089631944e-06, "loss": 0.5879, "step": 6497 }, { "epoch": 0.66, "grad_norm": 1.3241548066610018, "learning_rate": 2.6864708075174305e-06, "loss": 0.5726, "step": 6498 }, { "epoch": 0.66, "grad_norm": 1.364603132352305, "learning_rate": 2.6850049591290154e-06, "loss": 0.4981, "step": 6499 }, { "epoch": 0.66, "grad_norm": 1.6539644170916439, "learning_rate": 2.6835393639582814e-06, "loss": 0.5892, "step": 6500 }, { "epoch": 0.66, "grad_norm": 1.4983471550946852, "learning_rate": 2.6820740221655417e-06, "loss": 0.722, "step": 6501 }, { "epoch": 0.66, "grad_norm": 1.4272171853127025, "learning_rate": 2.6806089339110753e-06, "loss": 0.6557, "step": 6502 }, { "epoch": 0.66, "grad_norm": 1.4384196476703832, "learning_rate": 2.679144099355134e-06, "loss": 0.637, "step": 6503 }, { "epoch": 0.66, "grad_norm": 1.3725972901399122, "learning_rate": 2.6776795186579466e-06, "loss": 0.6571, "step": 6504 }, { "epoch": 0.66, "grad_norm": 1.7052393711036953, "learning_rate": 2.676215191979712e-06, "loss": 0.6876, "step": 6505 }, { "epoch": 0.66, "grad_norm": 1.6258011333596658, "learning_rate": 2.6747511194805985e-06, "loss": 0.6854, "step": 6506 }, { "epoch": 0.66, "grad_norm": 1.2491953834774008, "learning_rate": 2.6732873013207492e-06, "loss": 0.5422, "step": 6507 }, { "epoch": 0.66, "grad_norm": 1.710601769035771, "learning_rate": 2.671823737660277e-06, "loss": 0.7077, "step": 6508 }, { "epoch": 0.66, "grad_norm": 1.4910277972243944, "learning_rate": 2.6703604286592698e-06, "loss": 0.5028, "step": 6509 }, { "epoch": 0.66, "grad_norm": 1.6598735915897394, "learning_rate": 2.6688973744777884e-06, "loss": 0.6053, "step": 6510 }, { "epoch": 0.66, "grad_norm": 1.4284725467416837, "learning_rate": 2.6674345752758628e-06, "loss": 0.6581, "step": 6511 }, { "epoch": 0.66, "grad_norm": 1.1707947362576039, "learning_rate": 2.665972031213494e-06, "loss": 0.5238, "step": 6512 }, { "epoch": 0.67, "grad_norm": 1.3449104356976411, "learning_rate": 2.664509742450661e-06, "loss": 0.6473, "step": 6513 }, { "epoch": 0.67, "grad_norm": 1.5664500031986524, "learning_rate": 2.6630477091473074e-06, "loss": 0.6136, "step": 6514 }, { "epoch": 0.67, "grad_norm": 1.5138860663633422, "learning_rate": 2.661585931463356e-06, "loss": 0.6826, "step": 6515 }, { "epoch": 0.67, "grad_norm": 1.6457934892620698, "learning_rate": 2.6601244095586975e-06, "loss": 0.7062, "step": 6516 }, { "epoch": 0.67, "grad_norm": 1.4189680602843069, "learning_rate": 2.658663143593193e-06, "loss": 0.6708, "step": 6517 }, { "epoch": 0.67, "grad_norm": 1.5024960774346852, "learning_rate": 2.657202133726679e-06, "loss": 0.6408, "step": 6518 }, { "epoch": 0.67, "grad_norm": 1.5979846813540932, "learning_rate": 2.655741380118966e-06, "loss": 0.6128, "step": 6519 }, { "epoch": 0.67, "grad_norm": 1.6219740074707791, "learning_rate": 2.6542808829298305e-06, "loss": 0.6511, "step": 6520 }, { "epoch": 0.67, "grad_norm": 1.4617724010641961, "learning_rate": 2.6528206423190256e-06, "loss": 0.5499, "step": 6521 }, { "epoch": 0.67, "grad_norm": 1.4745593016163099, "learning_rate": 2.6513606584462715e-06, "loss": 0.6694, "step": 6522 }, { "epoch": 0.67, "grad_norm": 1.545312830845279, "learning_rate": 2.649900931471265e-06, "loss": 0.6485, "step": 6523 }, { "epoch": 0.67, "grad_norm": 1.4695340864854345, "learning_rate": 2.6484414615536757e-06, "loss": 0.6497, "step": 6524 }, { "epoch": 0.67, "grad_norm": 1.4227364594201022, "learning_rate": 2.646982248853141e-06, "loss": 0.6677, "step": 6525 }, { "epoch": 0.67, "grad_norm": 1.3492465766308124, "learning_rate": 2.64552329352927e-06, "loss": 0.5758, "step": 6526 }, { "epoch": 0.67, "grad_norm": 1.4514949171523013, "learning_rate": 2.6440645957416483e-06, "loss": 0.6081, "step": 6527 }, { "epoch": 0.67, "grad_norm": 1.7941818157142309, "learning_rate": 2.642606155649827e-06, "loss": 0.6255, "step": 6528 }, { "epoch": 0.67, "grad_norm": 1.6014887846172698, "learning_rate": 2.6411479734133375e-06, "loss": 0.653, "step": 6529 }, { "epoch": 0.67, "grad_norm": 1.9874487049897755, "learning_rate": 2.6396900491916756e-06, "loss": 0.635, "step": 6530 }, { "epoch": 0.67, "grad_norm": 1.5850100982357662, "learning_rate": 2.638232383144308e-06, "loss": 0.7025, "step": 6531 }, { "epoch": 0.67, "grad_norm": 1.4750464159713772, "learning_rate": 2.6367749754306793e-06, "loss": 0.6249, "step": 6532 }, { "epoch": 0.67, "grad_norm": 1.476014832707811, "learning_rate": 2.6353178262102053e-06, "loss": 0.5883, "step": 6533 }, { "epoch": 0.67, "grad_norm": 1.3946122567911767, "learning_rate": 2.6338609356422684e-06, "loss": 0.565, "step": 6534 }, { "epoch": 0.67, "grad_norm": 1.481483271849542, "learning_rate": 2.632404303886227e-06, "loss": 0.6981, "step": 6535 }, { "epoch": 0.67, "grad_norm": 1.5554729244291605, "learning_rate": 2.630947931101406e-06, "loss": 0.6467, "step": 6536 }, { "epoch": 0.67, "grad_norm": 1.57377017132857, "learning_rate": 2.6294918174471083e-06, "loss": 0.6423, "step": 6537 }, { "epoch": 0.67, "grad_norm": 1.517013314177305, "learning_rate": 2.628035963082608e-06, "loss": 0.6716, "step": 6538 }, { "epoch": 0.67, "grad_norm": 1.4150342648993222, "learning_rate": 2.6265803681671466e-06, "loss": 0.6021, "step": 6539 }, { "epoch": 0.67, "grad_norm": 1.3348770939813268, "learning_rate": 2.625125032859937e-06, "loss": 0.5935, "step": 6540 }, { "epoch": 0.67, "grad_norm": 1.4189561700021447, "learning_rate": 2.6236699573201706e-06, "loss": 0.6275, "step": 6541 }, { "epoch": 0.67, "grad_norm": 1.4461521136319229, "learning_rate": 2.622215141707001e-06, "loss": 0.6369, "step": 6542 }, { "epoch": 0.67, "grad_norm": 1.4127310029136018, "learning_rate": 2.6207605861795624e-06, "loss": 0.6427, "step": 6543 }, { "epoch": 0.67, "grad_norm": 1.4385795479318262, "learning_rate": 2.619306290896955e-06, "loss": 0.6266, "step": 6544 }, { "epoch": 0.67, "grad_norm": 1.5045592427774523, "learning_rate": 2.6178522560182497e-06, "loss": 0.6793, "step": 6545 }, { "epoch": 0.67, "grad_norm": 1.293184069601259, "learning_rate": 2.6163984817024922e-06, "loss": 0.6233, "step": 6546 }, { "epoch": 0.67, "grad_norm": 1.3798213001871809, "learning_rate": 2.6149449681087004e-06, "loss": 0.6877, "step": 6547 }, { "epoch": 0.67, "grad_norm": 1.4872676996049676, "learning_rate": 2.613491715395861e-06, "loss": 0.6699, "step": 6548 }, { "epoch": 0.67, "grad_norm": 1.4612662542416632, "learning_rate": 2.6120387237229326e-06, "loss": 0.6416, "step": 6549 }, { "epoch": 0.67, "grad_norm": 1.3211598936438456, "learning_rate": 2.610585993248843e-06, "loss": 0.6095, "step": 6550 }, { "epoch": 0.67, "grad_norm": 1.596280585467262, "learning_rate": 2.6091335241324965e-06, "loss": 0.6992, "step": 6551 }, { "epoch": 0.67, "grad_norm": 1.4851171119323285, "learning_rate": 2.607681316532769e-06, "loss": 0.6096, "step": 6552 }, { "epoch": 0.67, "grad_norm": 1.5130276007801173, "learning_rate": 2.6062293706085023e-06, "loss": 0.642, "step": 6553 }, { "epoch": 0.67, "grad_norm": 1.8430210329194865, "learning_rate": 2.6047776865185106e-06, "loss": 0.8316, "step": 6554 }, { "epoch": 0.67, "grad_norm": 1.4491938431713127, "learning_rate": 2.6033262644215858e-06, "loss": 0.5995, "step": 6555 }, { "epoch": 0.67, "grad_norm": 1.5752058043265003, "learning_rate": 2.6018751044764822e-06, "loss": 0.6491, "step": 6556 }, { "epoch": 0.67, "grad_norm": 1.5204280361071107, "learning_rate": 2.600424206841934e-06, "loss": 0.631, "step": 6557 }, { "epoch": 0.67, "grad_norm": 1.4541346232318655, "learning_rate": 2.598973571676641e-06, "loss": 0.5925, "step": 6558 }, { "epoch": 0.67, "grad_norm": 1.5455502787370752, "learning_rate": 2.5975231991392722e-06, "loss": 0.6644, "step": 6559 }, { "epoch": 0.67, "grad_norm": 1.376740781816252, "learning_rate": 2.5960730893884757e-06, "loss": 0.6033, "step": 6560 }, { "epoch": 0.67, "grad_norm": 1.7221370385394577, "learning_rate": 2.594623242582868e-06, "loss": 0.679, "step": 6561 }, { "epoch": 0.67, "grad_norm": 1.4877416261575223, "learning_rate": 2.593173658881033e-06, "loss": 0.6782, "step": 6562 }, { "epoch": 0.67, "grad_norm": 1.4225421702905412, "learning_rate": 2.591724338441529e-06, "loss": 0.5688, "step": 6563 }, { "epoch": 0.67, "grad_norm": 1.4041769342385526, "learning_rate": 2.5902752814228833e-06, "loss": 0.5999, "step": 6564 }, { "epoch": 0.67, "grad_norm": 1.392317158687761, "learning_rate": 2.588826487983597e-06, "loss": 0.6442, "step": 6565 }, { "epoch": 0.67, "grad_norm": 1.5670213988454065, "learning_rate": 2.5873779582821427e-06, "loss": 0.613, "step": 6566 }, { "epoch": 0.67, "grad_norm": 1.4405992452802145, "learning_rate": 2.5859296924769628e-06, "loss": 0.6716, "step": 6567 }, { "epoch": 0.67, "grad_norm": 1.3705811327747346, "learning_rate": 2.584481690726467e-06, "loss": 0.5365, "step": 6568 }, { "epoch": 0.67, "grad_norm": 1.3645847789275842, "learning_rate": 2.5830339531890454e-06, "loss": 0.6374, "step": 6569 }, { "epoch": 0.67, "grad_norm": 1.5229493136157146, "learning_rate": 2.5815864800230483e-06, "loss": 0.5911, "step": 6570 }, { "epoch": 0.67, "grad_norm": 3.820838613936916, "learning_rate": 2.5801392713868077e-06, "loss": 0.6001, "step": 6571 }, { "epoch": 0.67, "grad_norm": 1.429210014328348, "learning_rate": 2.5786923274386193e-06, "loss": 0.6792, "step": 6572 }, { "epoch": 0.67, "grad_norm": 1.4976517868367336, "learning_rate": 2.57724564833675e-06, "loss": 0.6399, "step": 6573 }, { "epoch": 0.67, "grad_norm": 2.000897300882692, "learning_rate": 2.575799234239441e-06, "loss": 0.6405, "step": 6574 }, { "epoch": 0.67, "grad_norm": 1.434740884044923, "learning_rate": 2.574353085304906e-06, "loss": 0.733, "step": 6575 }, { "epoch": 0.67, "grad_norm": 1.7182830346635272, "learning_rate": 2.5729072016913243e-06, "loss": 0.7081, "step": 6576 }, { "epoch": 0.67, "grad_norm": 1.5118111856301204, "learning_rate": 2.5714615835568496e-06, "loss": 0.6905, "step": 6577 }, { "epoch": 0.67, "grad_norm": 1.5396932770098521, "learning_rate": 2.5700162310596037e-06, "loss": 0.6428, "step": 6578 }, { "epoch": 0.67, "grad_norm": 1.6627257912573299, "learning_rate": 2.568571144357683e-06, "loss": 0.6307, "step": 6579 }, { "epoch": 0.67, "grad_norm": 2.1208878865290868, "learning_rate": 2.567126323609156e-06, "loss": 0.5251, "step": 6580 }, { "epoch": 0.67, "grad_norm": 1.5599328760843625, "learning_rate": 2.565681768972057e-06, "loss": 0.6941, "step": 6581 }, { "epoch": 0.67, "grad_norm": 1.4234219668982382, "learning_rate": 2.5642374806043914e-06, "loss": 0.6859, "step": 6582 }, { "epoch": 0.67, "grad_norm": 1.533907912153899, "learning_rate": 2.5627934586641416e-06, "loss": 0.6636, "step": 6583 }, { "epoch": 0.67, "grad_norm": 1.536108023341844, "learning_rate": 2.5613497033092538e-06, "loss": 0.6015, "step": 6584 }, { "epoch": 0.67, "grad_norm": 1.530586854172694, "learning_rate": 2.559906214697651e-06, "loss": 0.5861, "step": 6585 }, { "epoch": 0.67, "grad_norm": 1.2601537920719936, "learning_rate": 2.558462992987223e-06, "loss": 0.601, "step": 6586 }, { "epoch": 0.67, "grad_norm": 1.4169748118477474, "learning_rate": 2.557020038335829e-06, "loss": 0.6491, "step": 6587 }, { "epoch": 0.67, "grad_norm": 1.7128064312061109, "learning_rate": 2.555577350901306e-06, "loss": 0.6093, "step": 6588 }, { "epoch": 0.67, "grad_norm": 1.509260774426736, "learning_rate": 2.5541349308414532e-06, "loss": 0.6658, "step": 6589 }, { "epoch": 0.67, "grad_norm": 1.52085306456118, "learning_rate": 2.552692778314049e-06, "loss": 0.7382, "step": 6590 }, { "epoch": 0.67, "grad_norm": 1.3282857444974363, "learning_rate": 2.5512508934768356e-06, "loss": 0.5056, "step": 6591 }, { "epoch": 0.67, "grad_norm": 1.3123471004118001, "learning_rate": 2.5498092764875267e-06, "loss": 0.5964, "step": 6592 }, { "epoch": 0.67, "grad_norm": 1.6426480111376651, "learning_rate": 2.548367927503811e-06, "loss": 0.6782, "step": 6593 }, { "epoch": 0.67, "grad_norm": 1.5369135123975508, "learning_rate": 2.5469268466833475e-06, "loss": 0.7448, "step": 6594 }, { "epoch": 0.67, "grad_norm": 1.365046409445616, "learning_rate": 2.5454860341837612e-06, "loss": 0.5898, "step": 6595 }, { "epoch": 0.67, "grad_norm": 1.6241734764204283, "learning_rate": 2.5440454901626487e-06, "loss": 0.6884, "step": 6596 }, { "epoch": 0.67, "grad_norm": 1.4783438442146886, "learning_rate": 2.5426052147775827e-06, "loss": 0.6083, "step": 6597 }, { "epoch": 0.67, "grad_norm": 1.5089164338948342, "learning_rate": 2.5411652081860994e-06, "loss": 0.7306, "step": 6598 }, { "epoch": 0.67, "grad_norm": 1.5141885820870347, "learning_rate": 2.5397254705457127e-06, "loss": 0.6957, "step": 6599 }, { "epoch": 0.67, "grad_norm": 1.5763226431849238, "learning_rate": 2.5382860020139e-06, "loss": 0.6808, "step": 6600 }, { "epoch": 0.67, "grad_norm": 1.581414854602593, "learning_rate": 2.5368468027481124e-06, "loss": 0.7817, "step": 6601 }, { "epoch": 0.67, "grad_norm": 2.0481178011192442, "learning_rate": 2.5354078729057748e-06, "loss": 0.6339, "step": 6602 }, { "epoch": 0.67, "grad_norm": 2.807657303316275, "learning_rate": 2.5339692126442756e-06, "loss": 0.6486, "step": 6603 }, { "epoch": 0.67, "grad_norm": 1.5064415231225015, "learning_rate": 2.5325308221209814e-06, "loss": 0.6359, "step": 6604 }, { "epoch": 0.67, "grad_norm": 1.5106479954547172, "learning_rate": 2.531092701493224e-06, "loss": 0.6659, "step": 6605 }, { "epoch": 0.67, "grad_norm": 1.70940771919715, "learning_rate": 2.529654850918305e-06, "loss": 0.6711, "step": 6606 }, { "epoch": 0.67, "grad_norm": 1.5008781112086602, "learning_rate": 2.5282172705535013e-06, "loss": 0.7075, "step": 6607 }, { "epoch": 0.67, "grad_norm": 1.4109006811007945, "learning_rate": 2.5267799605560583e-06, "loss": 0.643, "step": 6608 }, { "epoch": 0.67, "grad_norm": 1.3471240135172962, "learning_rate": 2.5253429210831904e-06, "loss": 0.6056, "step": 6609 }, { "epoch": 0.67, "grad_norm": 1.4273263720067444, "learning_rate": 2.5239061522920806e-06, "loss": 0.5505, "step": 6610 }, { "epoch": 0.68, "grad_norm": 1.782862501260514, "learning_rate": 2.5224696543398887e-06, "loss": 0.7075, "step": 6611 }, { "epoch": 0.68, "grad_norm": 1.3571304949887888, "learning_rate": 2.521033427383737e-06, "loss": 0.626, "step": 6612 }, { "epoch": 0.68, "grad_norm": 1.4354855097008643, "learning_rate": 2.519597471580727e-06, "loss": 0.6375, "step": 6613 }, { "epoch": 0.68, "grad_norm": 1.6202015236270597, "learning_rate": 2.518161787087923e-06, "loss": 0.7375, "step": 6614 }, { "epoch": 0.68, "grad_norm": 1.4475785118978781, "learning_rate": 2.516726374062361e-06, "loss": 0.6128, "step": 6615 }, { "epoch": 0.68, "grad_norm": 1.4192636493728181, "learning_rate": 2.5152912326610523e-06, "loss": 0.5785, "step": 6616 }, { "epoch": 0.68, "grad_norm": 1.424440399002253, "learning_rate": 2.51385636304097e-06, "loss": 0.5664, "step": 6617 }, { "epoch": 0.68, "grad_norm": 1.3857813868231272, "learning_rate": 2.512421765359068e-06, "loss": 0.6266, "step": 6618 }, { "epoch": 0.68, "grad_norm": 1.3196362732487235, "learning_rate": 2.510987439772261e-06, "loss": 0.5971, "step": 6619 }, { "epoch": 0.68, "grad_norm": 1.570951577102357, "learning_rate": 2.509553386437437e-06, "loss": 0.4765, "step": 6620 }, { "epoch": 0.68, "grad_norm": 1.7059558044369674, "learning_rate": 2.5081196055114563e-06, "loss": 0.8429, "step": 6621 }, { "epoch": 0.68, "grad_norm": 1.5075779750018465, "learning_rate": 2.506686097151151e-06, "loss": 0.6371, "step": 6622 }, { "epoch": 0.68, "grad_norm": 1.4275401062486779, "learning_rate": 2.505252861513317e-06, "loss": 0.5446, "step": 6623 }, { "epoch": 0.68, "grad_norm": 1.485577169664157, "learning_rate": 2.5038198987547225e-06, "loss": 0.6157, "step": 6624 }, { "epoch": 0.68, "grad_norm": 1.4902379855604808, "learning_rate": 2.5023872090321115e-06, "loss": 0.6233, "step": 6625 }, { "epoch": 0.68, "grad_norm": 1.5721334425790876, "learning_rate": 2.5009547925021895e-06, "loss": 0.6681, "step": 6626 }, { "epoch": 0.68, "grad_norm": 1.6221695253903352, "learning_rate": 2.4995226493216403e-06, "loss": 0.6322, "step": 6627 }, { "epoch": 0.68, "grad_norm": 1.4238266365673584, "learning_rate": 2.4980907796471115e-06, "loss": 0.6479, "step": 6628 }, { "epoch": 0.68, "grad_norm": 1.4595558005061462, "learning_rate": 2.4966591836352222e-06, "loss": 0.6561, "step": 6629 }, { "epoch": 0.68, "grad_norm": 1.458835988770342, "learning_rate": 2.495227861442566e-06, "loss": 0.6555, "step": 6630 }, { "epoch": 0.68, "grad_norm": 1.4818969329027825, "learning_rate": 2.4937968132256986e-06, "loss": 0.5545, "step": 6631 }, { "epoch": 0.68, "grad_norm": 1.4544550783566736, "learning_rate": 2.492366039141155e-06, "loss": 0.7538, "step": 6632 }, { "epoch": 0.68, "grad_norm": 1.4696962351553955, "learning_rate": 2.4909355393454327e-06, "loss": 0.6251, "step": 6633 }, { "epoch": 0.68, "grad_norm": 1.3087968362682922, "learning_rate": 2.489505313995001e-06, "loss": 0.593, "step": 6634 }, { "epoch": 0.68, "grad_norm": 1.5934962499527614, "learning_rate": 2.488075363246301e-06, "loss": 0.6706, "step": 6635 }, { "epoch": 0.68, "grad_norm": 1.4575904667853339, "learning_rate": 2.486645687255746e-06, "loss": 0.6042, "step": 6636 }, { "epoch": 0.68, "grad_norm": 1.5591992654131415, "learning_rate": 2.485216286179713e-06, "loss": 0.6098, "step": 6637 }, { "epoch": 0.68, "grad_norm": 1.4550363423505184, "learning_rate": 2.4837871601745513e-06, "loss": 0.6109, "step": 6638 }, { "epoch": 0.68, "grad_norm": 1.4379060918349242, "learning_rate": 2.482358309396584e-06, "loss": 0.5724, "step": 6639 }, { "epoch": 0.68, "grad_norm": 1.5406844948868377, "learning_rate": 2.480929734002097e-06, "loss": 0.665, "step": 6640 }, { "epoch": 0.68, "grad_norm": 1.5102752538544715, "learning_rate": 2.479501434147352e-06, "loss": 0.7498, "step": 6641 }, { "epoch": 0.68, "grad_norm": 1.4474148212975264, "learning_rate": 2.478073409988583e-06, "loss": 0.5998, "step": 6642 }, { "epoch": 0.68, "grad_norm": 1.3742309404117465, "learning_rate": 2.476645661681982e-06, "loss": 0.6077, "step": 6643 }, { "epoch": 0.68, "grad_norm": 1.433026574919233, "learning_rate": 2.475218189383723e-06, "loss": 0.7036, "step": 6644 }, { "epoch": 0.68, "grad_norm": 1.4488609505053824, "learning_rate": 2.4737909932499414e-06, "loss": 0.651, "step": 6645 }, { "epoch": 0.68, "grad_norm": 1.5835410604527103, "learning_rate": 2.4723640734367504e-06, "loss": 0.6227, "step": 6646 }, { "epoch": 0.68, "grad_norm": 1.471175166454474, "learning_rate": 2.470937430100227e-06, "loss": 0.6504, "step": 6647 }, { "epoch": 0.68, "grad_norm": 1.419566535163038, "learning_rate": 2.4695110633964176e-06, "loss": 0.6944, "step": 6648 }, { "epoch": 0.68, "grad_norm": 1.6310241219443236, "learning_rate": 2.468084973481342e-06, "loss": 0.6081, "step": 6649 }, { "epoch": 0.68, "grad_norm": 1.6146717161609572, "learning_rate": 2.4666591605109893e-06, "loss": 0.7314, "step": 6650 }, { "epoch": 0.68, "grad_norm": 1.7321457308044717, "learning_rate": 2.4652336246413163e-06, "loss": 0.6891, "step": 6651 }, { "epoch": 0.68, "grad_norm": 1.4030901051235938, "learning_rate": 2.4638083660282482e-06, "loss": 0.521, "step": 6652 }, { "epoch": 0.68, "grad_norm": 1.5522086697864168, "learning_rate": 2.4623833848276852e-06, "loss": 0.6318, "step": 6653 }, { "epoch": 0.68, "grad_norm": 1.263284825701101, "learning_rate": 2.46095868119549e-06, "loss": 0.5888, "step": 6654 }, { "epoch": 0.68, "grad_norm": 1.644054473615998, "learning_rate": 2.459534255287502e-06, "loss": 0.6077, "step": 6655 }, { "epoch": 0.68, "grad_norm": 1.4214575913737333, "learning_rate": 2.4581101072595298e-06, "loss": 0.6506, "step": 6656 }, { "epoch": 0.68, "grad_norm": 1.631098526348583, "learning_rate": 2.456686237267342e-06, "loss": 0.644, "step": 6657 }, { "epoch": 0.68, "grad_norm": 1.5124832643003685, "learning_rate": 2.4552626454666884e-06, "loss": 0.6834, "step": 6658 }, { "epoch": 0.68, "grad_norm": 1.5147252611569335, "learning_rate": 2.4538393320132803e-06, "loss": 0.7049, "step": 6659 }, { "epoch": 0.68, "grad_norm": 1.581639913098277, "learning_rate": 2.4524162970628055e-06, "loss": 0.649, "step": 6660 }, { "epoch": 0.68, "grad_norm": 1.1868397104885648, "learning_rate": 2.4509935407709166e-06, "loss": 0.6704, "step": 6661 }, { "epoch": 0.68, "grad_norm": 1.4583312583734698, "learning_rate": 2.4495710632932347e-06, "loss": 0.5854, "step": 6662 }, { "epoch": 0.68, "grad_norm": 1.3959034424820613, "learning_rate": 2.4481488647853543e-06, "loss": 0.5255, "step": 6663 }, { "epoch": 0.68, "grad_norm": 1.2470212229870625, "learning_rate": 2.4467269454028387e-06, "loss": 0.5909, "step": 6664 }, { "epoch": 0.68, "grad_norm": 1.4980047029602015, "learning_rate": 2.4453053053012187e-06, "loss": 0.6376, "step": 6665 }, { "epoch": 0.68, "grad_norm": 1.4447690967925402, "learning_rate": 2.4438839446359936e-06, "loss": 0.6532, "step": 6666 }, { "epoch": 0.68, "grad_norm": 1.5101486956354206, "learning_rate": 2.4424628635626374e-06, "loss": 0.6138, "step": 6667 }, { "epoch": 0.68, "grad_norm": 1.336296462543787, "learning_rate": 2.441042062236586e-06, "loss": 0.6718, "step": 6668 }, { "epoch": 0.68, "grad_norm": 1.4185352290626376, "learning_rate": 2.4396215408132517e-06, "loss": 0.6904, "step": 6669 }, { "epoch": 0.68, "grad_norm": 1.5906755601788154, "learning_rate": 2.438201299448016e-06, "loss": 0.6592, "step": 6670 }, { "epoch": 0.68, "grad_norm": 1.5029539736152295, "learning_rate": 2.43678133829622e-06, "loss": 0.6778, "step": 6671 }, { "epoch": 0.68, "grad_norm": 1.4759742288130497, "learning_rate": 2.4353616575131873e-06, "loss": 0.6201, "step": 6672 }, { "epoch": 0.68, "grad_norm": 1.4630035414144544, "learning_rate": 2.4339422572542005e-06, "loss": 0.6368, "step": 6673 }, { "epoch": 0.68, "grad_norm": 1.4709232401182415, "learning_rate": 2.4325231376745177e-06, "loss": 0.6575, "step": 6674 }, { "epoch": 0.68, "grad_norm": 1.6178311486481884, "learning_rate": 2.431104298929368e-06, "loss": 0.6622, "step": 6675 }, { "epoch": 0.68, "grad_norm": 3.0376104552372722, "learning_rate": 2.4296857411739387e-06, "loss": 0.6306, "step": 6676 }, { "epoch": 0.68, "grad_norm": 1.622677421902503, "learning_rate": 2.4282674645633974e-06, "loss": 0.744, "step": 6677 }, { "epoch": 0.68, "grad_norm": 1.6257196731768613, "learning_rate": 2.42684946925288e-06, "loss": 0.6565, "step": 6678 }, { "epoch": 0.68, "grad_norm": 1.2982498784548677, "learning_rate": 2.4254317553974865e-06, "loss": 0.5488, "step": 6679 }, { "epoch": 0.68, "grad_norm": 1.2882489876130758, "learning_rate": 2.4240143231522863e-06, "loss": 0.5747, "step": 6680 }, { "epoch": 0.68, "grad_norm": 1.204385284170719, "learning_rate": 2.422597172672325e-06, "loss": 0.5328, "step": 6681 }, { "epoch": 0.68, "grad_norm": 1.4213003134820246, "learning_rate": 2.421180304112608e-06, "loss": 0.5794, "step": 6682 }, { "epoch": 0.68, "grad_norm": 1.4649984820483533, "learning_rate": 2.4197637176281174e-06, "loss": 0.6462, "step": 6683 }, { "epoch": 0.68, "grad_norm": 2.168239865400646, "learning_rate": 2.4183474133738043e-06, "loss": 0.6821, "step": 6684 }, { "epoch": 0.68, "grad_norm": 1.4244728134873217, "learning_rate": 2.4169313915045794e-06, "loss": 0.7114, "step": 6685 }, { "epoch": 0.68, "grad_norm": 1.4487263752826887, "learning_rate": 2.4155156521753354e-06, "loss": 0.5708, "step": 6686 }, { "epoch": 0.68, "grad_norm": 1.3838077723036908, "learning_rate": 2.4141001955409227e-06, "loss": 0.5959, "step": 6687 }, { "epoch": 0.68, "grad_norm": 1.4274349961721866, "learning_rate": 2.41268502175617e-06, "loss": 0.578, "step": 6688 }, { "epoch": 0.68, "grad_norm": 1.5582919988974324, "learning_rate": 2.411270130975873e-06, "loss": 0.6282, "step": 6689 }, { "epoch": 0.68, "grad_norm": 1.4582321402072498, "learning_rate": 2.4098555233547883e-06, "loss": 0.6569, "step": 6690 }, { "epoch": 0.68, "grad_norm": 1.5071396592894535, "learning_rate": 2.408441199047651e-06, "loss": 0.5701, "step": 6691 }, { "epoch": 0.68, "grad_norm": 1.627139796438223, "learning_rate": 2.4070271582091642e-06, "loss": 0.6877, "step": 6692 }, { "epoch": 0.68, "grad_norm": 1.5270248888641225, "learning_rate": 2.405613400993996e-06, "loss": 0.6676, "step": 6693 }, { "epoch": 0.68, "grad_norm": 1.4939242320470616, "learning_rate": 2.4041999275567836e-06, "loss": 0.6277, "step": 6694 }, { "epoch": 0.68, "grad_norm": 1.5367973941111095, "learning_rate": 2.402786738052138e-06, "loss": 0.6431, "step": 6695 }, { "epoch": 0.68, "grad_norm": 1.5112564167517906, "learning_rate": 2.4013738326346326e-06, "loss": 0.707, "step": 6696 }, { "epoch": 0.68, "grad_norm": 1.5359232761627102, "learning_rate": 2.3999612114588156e-06, "loss": 0.5619, "step": 6697 }, { "epoch": 0.68, "grad_norm": 1.851151394951761, "learning_rate": 2.3985488746792044e-06, "loss": 0.6754, "step": 6698 }, { "epoch": 0.68, "grad_norm": 1.47824824800627, "learning_rate": 2.397136822450276e-06, "loss": 0.5715, "step": 6699 }, { "epoch": 0.68, "grad_norm": 1.4710524250612007, "learning_rate": 2.395725054926488e-06, "loss": 0.6897, "step": 6700 }, { "epoch": 0.68, "grad_norm": 1.5496478977422006, "learning_rate": 2.3943135722622573e-06, "loss": 0.6699, "step": 6701 }, { "epoch": 0.68, "grad_norm": 1.387092626083672, "learning_rate": 2.3929023746119774e-06, "loss": 0.5785, "step": 6702 }, { "epoch": 0.68, "grad_norm": 1.4845626144023203, "learning_rate": 2.3914914621300094e-06, "loss": 0.5771, "step": 6703 }, { "epoch": 0.68, "grad_norm": 1.3489629395724603, "learning_rate": 2.3900808349706743e-06, "loss": 0.5788, "step": 6704 }, { "epoch": 0.68, "grad_norm": 1.5382629041512226, "learning_rate": 2.3886704932882725e-06, "loss": 0.6394, "step": 6705 }, { "epoch": 0.68, "grad_norm": 1.4112606704544637, "learning_rate": 2.3872604372370718e-06, "loss": 0.6239, "step": 6706 }, { "epoch": 0.68, "grad_norm": 1.569725283700519, "learning_rate": 2.385850666971301e-06, "loss": 0.5929, "step": 6707 }, { "epoch": 0.68, "grad_norm": 1.5032576333532583, "learning_rate": 2.3844411826451673e-06, "loss": 0.6389, "step": 6708 }, { "epoch": 0.69, "grad_norm": 1.3810527884825434, "learning_rate": 2.383031984412841e-06, "loss": 0.6937, "step": 6709 }, { "epoch": 0.69, "grad_norm": 1.5863820840618936, "learning_rate": 2.3816230724284602e-06, "loss": 0.6042, "step": 6710 }, { "epoch": 0.69, "grad_norm": 1.3587764563772438, "learning_rate": 2.380214446846137e-06, "loss": 0.6407, "step": 6711 }, { "epoch": 0.69, "grad_norm": 1.4894964483937108, "learning_rate": 2.3788061078199476e-06, "loss": 0.6114, "step": 6712 }, { "epoch": 0.69, "grad_norm": 1.483870623588566, "learning_rate": 2.377398055503936e-06, "loss": 0.5696, "step": 6713 }, { "epoch": 0.69, "grad_norm": 1.7561008185397575, "learning_rate": 2.375990290052122e-06, "loss": 0.6299, "step": 6714 }, { "epoch": 0.69, "grad_norm": 1.464261604092227, "learning_rate": 2.3745828116184837e-06, "loss": 0.6066, "step": 6715 }, { "epoch": 0.69, "grad_norm": 1.2604950319957797, "learning_rate": 2.3731756203569766e-06, "loss": 0.5738, "step": 6716 }, { "epoch": 0.69, "grad_norm": 1.4993120473395523, "learning_rate": 2.3717687164215246e-06, "loss": 0.7462, "step": 6717 }, { "epoch": 0.69, "grad_norm": 1.4380388489861173, "learning_rate": 2.3703620999660094e-06, "loss": 0.6378, "step": 6718 }, { "epoch": 0.69, "grad_norm": 1.2722437211039954, "learning_rate": 2.3689557711442923e-06, "loss": 0.6012, "step": 6719 }, { "epoch": 0.69, "grad_norm": 1.49993521285216, "learning_rate": 2.367549730110202e-06, "loss": 0.6474, "step": 6720 }, { "epoch": 0.69, "grad_norm": 1.4161041172248077, "learning_rate": 2.366143977017529e-06, "loss": 0.6807, "step": 6721 }, { "epoch": 0.69, "grad_norm": 1.5432062090880119, "learning_rate": 2.3647385120200424e-06, "loss": 0.6443, "step": 6722 }, { "epoch": 0.69, "grad_norm": 1.4130527496768084, "learning_rate": 2.36333333527147e-06, "loss": 0.6472, "step": 6723 }, { "epoch": 0.69, "grad_norm": 1.6281459875209243, "learning_rate": 2.3619284469255112e-06, "loss": 0.6215, "step": 6724 }, { "epoch": 0.69, "grad_norm": 1.5592157887392983, "learning_rate": 2.360523847135838e-06, "loss": 0.6092, "step": 6725 }, { "epoch": 0.69, "grad_norm": 1.4899983809203552, "learning_rate": 2.359119536056087e-06, "loss": 0.5941, "step": 6726 }, { "epoch": 0.69, "grad_norm": 1.4586833517218407, "learning_rate": 2.3577155138398617e-06, "loss": 0.6373, "step": 6727 }, { "epoch": 0.69, "grad_norm": 1.516056973634367, "learning_rate": 2.3563117806407387e-06, "loss": 0.6475, "step": 6728 }, { "epoch": 0.69, "grad_norm": 1.368065012056804, "learning_rate": 2.354908336612258e-06, "loss": 0.6464, "step": 6729 }, { "epoch": 0.69, "grad_norm": 1.3487603836503466, "learning_rate": 2.3535051819079317e-06, "loss": 0.5541, "step": 6730 }, { "epoch": 0.69, "grad_norm": 1.3288138644486094, "learning_rate": 2.3521023166812434e-06, "loss": 0.5838, "step": 6731 }, { "epoch": 0.69, "grad_norm": 1.4507857537420625, "learning_rate": 2.3506997410856325e-06, "loss": 0.7238, "step": 6732 }, { "epoch": 0.69, "grad_norm": 1.4307458837692253, "learning_rate": 2.3492974552745186e-06, "loss": 0.6644, "step": 6733 }, { "epoch": 0.69, "grad_norm": 1.403091477299209, "learning_rate": 2.3478954594012884e-06, "loss": 0.6221, "step": 6734 }, { "epoch": 0.69, "grad_norm": 1.5734906811234666, "learning_rate": 2.3464937536192895e-06, "loss": 0.6514, "step": 6735 }, { "epoch": 0.69, "grad_norm": 1.7897189379387248, "learning_rate": 2.3450923380818473e-06, "loss": 0.4374, "step": 6736 }, { "epoch": 0.69, "grad_norm": 1.4133101217391586, "learning_rate": 2.3436912129422483e-06, "loss": 0.613, "step": 6737 }, { "epoch": 0.69, "grad_norm": 1.3769263376995937, "learning_rate": 2.3422903783537485e-06, "loss": 0.6381, "step": 6738 }, { "epoch": 0.69, "grad_norm": 1.386810068831772, "learning_rate": 2.340889834469577e-06, "loss": 0.6347, "step": 6739 }, { "epoch": 0.69, "grad_norm": 1.3678307156269867, "learning_rate": 2.339489581442923e-06, "loss": 0.7662, "step": 6740 }, { "epoch": 0.69, "grad_norm": 1.499363850669477, "learning_rate": 2.338089619426952e-06, "loss": 0.5343, "step": 6741 }, { "epoch": 0.69, "grad_norm": 1.6261867719991219, "learning_rate": 2.3366899485747927e-06, "loss": 0.6729, "step": 6742 }, { "epoch": 0.69, "grad_norm": 1.4467641432637048, "learning_rate": 2.3352905690395415e-06, "loss": 0.6035, "step": 6743 }, { "epoch": 0.69, "grad_norm": 1.3564934880101334, "learning_rate": 2.3338914809742664e-06, "loss": 0.6766, "step": 6744 }, { "epoch": 0.69, "grad_norm": 1.4704214551283228, "learning_rate": 2.3324926845320047e-06, "loss": 0.6545, "step": 6745 }, { "epoch": 0.69, "grad_norm": 1.994384228312812, "learning_rate": 2.331094179865752e-06, "loss": 0.6401, "step": 6746 }, { "epoch": 0.69, "grad_norm": 1.5328991809057209, "learning_rate": 2.3296959671284837e-06, "loss": 0.7347, "step": 6747 }, { "epoch": 0.69, "grad_norm": 1.5124252627074959, "learning_rate": 2.328298046473138e-06, "loss": 0.712, "step": 6748 }, { "epoch": 0.69, "grad_norm": 1.5608536068046106, "learning_rate": 2.32690041805262e-06, "loss": 0.662, "step": 6749 }, { "epoch": 0.69, "grad_norm": 1.331358726165446, "learning_rate": 2.3255030820198066e-06, "loss": 0.5989, "step": 6750 }, { "epoch": 0.69, "grad_norm": 1.678932943156736, "learning_rate": 2.3241060385275397e-06, "loss": 0.6969, "step": 6751 }, { "epoch": 0.69, "grad_norm": 1.4459661513173545, "learning_rate": 2.3227092877286276e-06, "loss": 0.6803, "step": 6752 }, { "epoch": 0.69, "grad_norm": 1.4056477366263427, "learning_rate": 2.3213128297758538e-06, "loss": 0.6254, "step": 6753 }, { "epoch": 0.69, "grad_norm": 1.5005980638184646, "learning_rate": 2.3199166648219594e-06, "loss": 0.6761, "step": 6754 }, { "epoch": 0.69, "grad_norm": 1.496160835949055, "learning_rate": 2.318520793019664e-06, "loss": 0.6613, "step": 6755 }, { "epoch": 0.69, "grad_norm": 1.600642880042157, "learning_rate": 2.317125214521648e-06, "loss": 0.5957, "step": 6756 }, { "epoch": 0.69, "grad_norm": 1.608250517969409, "learning_rate": 2.3157299294805613e-06, "loss": 0.6243, "step": 6757 }, { "epoch": 0.69, "grad_norm": 1.3654669923891487, "learning_rate": 2.314334938049022e-06, "loss": 0.6517, "step": 6758 }, { "epoch": 0.69, "grad_norm": 1.306252944723049, "learning_rate": 2.3129402403796213e-06, "loss": 0.5812, "step": 6759 }, { "epoch": 0.69, "grad_norm": 1.2828236186111126, "learning_rate": 2.3115458366249062e-06, "loss": 0.6207, "step": 6760 }, { "epoch": 0.69, "grad_norm": 1.545649467044935, "learning_rate": 2.310151726937402e-06, "loss": 0.6969, "step": 6761 }, { "epoch": 0.69, "grad_norm": 1.5708371152381688, "learning_rate": 2.3087579114696003e-06, "loss": 0.7541, "step": 6762 }, { "epoch": 0.69, "grad_norm": 1.4017849688892605, "learning_rate": 2.3073643903739544e-06, "loss": 0.6093, "step": 6763 }, { "epoch": 0.69, "grad_norm": 1.479924886971965, "learning_rate": 2.3059711638028955e-06, "loss": 0.6041, "step": 6764 }, { "epoch": 0.69, "grad_norm": 1.437396509149011, "learning_rate": 2.304578231908813e-06, "loss": 0.6877, "step": 6765 }, { "epoch": 0.69, "grad_norm": 1.4780951317744946, "learning_rate": 2.303185594844067e-06, "loss": 0.5299, "step": 6766 }, { "epoch": 0.69, "grad_norm": 1.4028289083010042, "learning_rate": 2.3017932527609894e-06, "loss": 0.6147, "step": 6767 }, { "epoch": 0.69, "grad_norm": 1.4996795690170377, "learning_rate": 2.300401205811873e-06, "loss": 0.659, "step": 6768 }, { "epoch": 0.69, "grad_norm": 1.746189531160315, "learning_rate": 2.2990094541489864e-06, "loss": 0.7049, "step": 6769 }, { "epoch": 0.69, "grad_norm": 1.4538086792968494, "learning_rate": 2.29761799792456e-06, "loss": 0.6646, "step": 6770 }, { "epoch": 0.69, "grad_norm": 1.7199397256315454, "learning_rate": 2.29622683729079e-06, "loss": 0.7452, "step": 6771 }, { "epoch": 0.69, "grad_norm": 1.4345729194433523, "learning_rate": 2.2948359723998466e-06, "loss": 0.6357, "step": 6772 }, { "epoch": 0.69, "grad_norm": 1.5941860917054858, "learning_rate": 2.2934454034038666e-06, "loss": 0.7348, "step": 6773 }, { "epoch": 0.69, "grad_norm": 1.3247258767033943, "learning_rate": 2.2920551304549504e-06, "loss": 0.617, "step": 6774 }, { "epoch": 0.69, "grad_norm": 1.448351590491561, "learning_rate": 2.290665153705166e-06, "loss": 0.5833, "step": 6775 }, { "epoch": 0.69, "grad_norm": 1.68316085092694, "learning_rate": 2.2892754733065557e-06, "loss": 0.6587, "step": 6776 }, { "epoch": 0.69, "grad_norm": 1.7410593030280272, "learning_rate": 2.2878860894111206e-06, "loss": 0.6435, "step": 6777 }, { "epoch": 0.69, "grad_norm": 1.5439132423542796, "learning_rate": 2.286497002170837e-06, "loss": 0.5481, "step": 6778 }, { "epoch": 0.69, "grad_norm": 1.5029792136868025, "learning_rate": 2.2851082117376443e-06, "loss": 0.6365, "step": 6779 }, { "epoch": 0.69, "grad_norm": 1.379513812331831, "learning_rate": 2.2837197182634484e-06, "loss": 0.6263, "step": 6780 }, { "epoch": 0.69, "grad_norm": 1.4910363494625323, "learning_rate": 2.2823315219001284e-06, "loss": 0.499, "step": 6781 }, { "epoch": 0.69, "grad_norm": 1.6281118867663456, "learning_rate": 2.2809436227995242e-06, "loss": 0.616, "step": 6782 }, { "epoch": 0.69, "grad_norm": 1.4151089640905399, "learning_rate": 2.279556021113449e-06, "loss": 0.65, "step": 6783 }, { "epoch": 0.69, "grad_norm": 1.4743054337720838, "learning_rate": 2.2781687169936794e-06, "loss": 0.582, "step": 6784 }, { "epoch": 0.69, "grad_norm": 1.5302272790131888, "learning_rate": 2.2767817105919586e-06, "loss": 0.6106, "step": 6785 }, { "epoch": 0.69, "grad_norm": 1.4260839026050367, "learning_rate": 2.2753950020600023e-06, "loss": 0.6418, "step": 6786 }, { "epoch": 0.69, "grad_norm": 1.6863439608258501, "learning_rate": 2.2740085915494924e-06, "loss": 0.712, "step": 6787 }, { "epoch": 0.69, "grad_norm": 1.9076911248661115, "learning_rate": 2.2726224792120737e-06, "loss": 0.7253, "step": 6788 }, { "epoch": 0.69, "grad_norm": 1.4453568354264152, "learning_rate": 2.2712366651993614e-06, "loss": 0.6502, "step": 6789 }, { "epoch": 0.69, "grad_norm": 1.2846257165374992, "learning_rate": 2.269851149662939e-06, "loss": 0.6086, "step": 6790 }, { "epoch": 0.69, "grad_norm": 1.445871701464327, "learning_rate": 2.2684659327543545e-06, "loss": 0.7119, "step": 6791 }, { "epoch": 0.69, "grad_norm": 1.5084287129072242, "learning_rate": 2.2670810146251283e-06, "loss": 0.7165, "step": 6792 }, { "epoch": 0.69, "grad_norm": 1.4457248707573414, "learning_rate": 2.265696395426743e-06, "loss": 0.648, "step": 6793 }, { "epoch": 0.69, "grad_norm": 1.9884593417598768, "learning_rate": 2.2643120753106484e-06, "loss": 0.5933, "step": 6794 }, { "epoch": 0.69, "grad_norm": 1.5471224427257775, "learning_rate": 2.2629280544282678e-06, "loss": 0.7094, "step": 6795 }, { "epoch": 0.69, "grad_norm": 1.6531928526619613, "learning_rate": 2.2615443329309825e-06, "loss": 0.639, "step": 6796 }, { "epoch": 0.69, "grad_norm": 1.5505013892638868, "learning_rate": 2.260160910970151e-06, "loss": 0.5861, "step": 6797 }, { "epoch": 0.69, "grad_norm": 1.2226386739390334, "learning_rate": 2.2587777886970918e-06, "loss": 0.5828, "step": 6798 }, { "epoch": 0.69, "grad_norm": 1.9776848693042548, "learning_rate": 2.2573949662630913e-06, "loss": 0.649, "step": 6799 }, { "epoch": 0.69, "grad_norm": 1.516939095118093, "learning_rate": 2.2560124438194057e-06, "loss": 0.7096, "step": 6800 }, { "epoch": 0.69, "grad_norm": 1.5809268828846093, "learning_rate": 2.2546302215172604e-06, "loss": 0.725, "step": 6801 }, { "epoch": 0.69, "grad_norm": 1.5965968039263552, "learning_rate": 2.253248299507843e-06, "loss": 0.6435, "step": 6802 }, { "epoch": 0.69, "grad_norm": 1.4409568999773572, "learning_rate": 2.2518666779423078e-06, "loss": 0.6597, "step": 6803 }, { "epoch": 0.69, "grad_norm": 1.426676335050352, "learning_rate": 2.250485356971782e-06, "loss": 0.5596, "step": 6804 }, { "epoch": 0.69, "grad_norm": 1.5589950887311368, "learning_rate": 2.2491043367473544e-06, "loss": 0.5785, "step": 6805 }, { "epoch": 0.69, "grad_norm": 1.979720405073228, "learning_rate": 2.2477236174200857e-06, "loss": 0.708, "step": 6806 }, { "epoch": 0.7, "grad_norm": 1.3062340964268926, "learning_rate": 2.246343199140999e-06, "loss": 0.5308, "step": 6807 }, { "epoch": 0.7, "grad_norm": 1.4060862558985288, "learning_rate": 2.2449630820610857e-06, "loss": 0.6776, "step": 6808 }, { "epoch": 0.7, "grad_norm": 1.5438378190807067, "learning_rate": 2.2435832663313077e-06, "loss": 0.6, "step": 6809 }, { "epoch": 0.7, "grad_norm": 2.0330272787819443, "learning_rate": 2.2422037521025888e-06, "loss": 0.6633, "step": 6810 }, { "epoch": 0.7, "grad_norm": 1.4535810741995994, "learning_rate": 2.240824539525825e-06, "loss": 0.5473, "step": 6811 }, { "epoch": 0.7, "grad_norm": 1.7140565655016835, "learning_rate": 2.2394456287518756e-06, "loss": 0.615, "step": 6812 }, { "epoch": 0.7, "grad_norm": 1.9085411296165957, "learning_rate": 2.2380670199315657e-06, "loss": 0.6027, "step": 6813 }, { "epoch": 0.7, "grad_norm": 1.3460509737928148, "learning_rate": 2.236688713215691e-06, "loss": 0.633, "step": 6814 }, { "epoch": 0.7, "grad_norm": 1.5624335759353822, "learning_rate": 2.2353107087550163e-06, "loss": 0.6481, "step": 6815 }, { "epoch": 0.7, "grad_norm": 1.6127045841084198, "learning_rate": 2.2339330067002663e-06, "loss": 0.6231, "step": 6816 }, { "epoch": 0.7, "grad_norm": 1.5433512415779733, "learning_rate": 2.2325556072021347e-06, "loss": 0.6468, "step": 6817 }, { "epoch": 0.7, "grad_norm": 1.4315337861658448, "learning_rate": 2.2311785104112875e-06, "loss": 0.6595, "step": 6818 }, { "epoch": 0.7, "grad_norm": 1.6608646230989104, "learning_rate": 2.22980171647835e-06, "loss": 0.7132, "step": 6819 }, { "epoch": 0.7, "grad_norm": 1.607874163065441, "learning_rate": 2.228425225553922e-06, "loss": 0.6528, "step": 6820 }, { "epoch": 0.7, "grad_norm": 1.4153741301244878, "learning_rate": 2.2270490377885635e-06, "loss": 0.5914, "step": 6821 }, { "epoch": 0.7, "grad_norm": 1.5985830849100486, "learning_rate": 2.225673153332803e-06, "loss": 0.5728, "step": 6822 }, { "epoch": 0.7, "grad_norm": 1.493039362994748, "learning_rate": 2.2242975723371396e-06, "loss": 0.6303, "step": 6823 }, { "epoch": 0.7, "grad_norm": 1.3252671360719654, "learning_rate": 2.2229222949520334e-06, "loss": 0.5683, "step": 6824 }, { "epoch": 0.7, "grad_norm": 1.4248896876739412, "learning_rate": 2.221547321327918e-06, "loss": 0.6044, "step": 6825 }, { "epoch": 0.7, "grad_norm": 1.4337490098296686, "learning_rate": 2.2201726516151885e-06, "loss": 0.633, "step": 6826 }, { "epoch": 0.7, "grad_norm": 1.4875318107328814, "learning_rate": 2.218798285964206e-06, "loss": 0.7387, "step": 6827 }, { "epoch": 0.7, "grad_norm": 1.6686789366298915, "learning_rate": 2.2174242245253027e-06, "loss": 0.6321, "step": 6828 }, { "epoch": 0.7, "grad_norm": 1.5786405526816476, "learning_rate": 2.216050467448778e-06, "loss": 0.5851, "step": 6829 }, { "epoch": 0.7, "grad_norm": 1.443333103649339, "learning_rate": 2.214677014884892e-06, "loss": 0.6288, "step": 6830 }, { "epoch": 0.7, "grad_norm": 1.4476839647971318, "learning_rate": 2.213303866983875e-06, "loss": 0.6585, "step": 6831 }, { "epoch": 0.7, "grad_norm": 1.4606712306841265, "learning_rate": 2.211931023895927e-06, "loss": 0.5465, "step": 6832 }, { "epoch": 0.7, "grad_norm": 1.5766100329228443, "learning_rate": 2.2105584857712075e-06, "loss": 0.6803, "step": 6833 }, { "epoch": 0.7, "grad_norm": 1.475615452932063, "learning_rate": 2.2091862527598513e-06, "loss": 0.5538, "step": 6834 }, { "epoch": 0.7, "grad_norm": 1.5368608473147423, "learning_rate": 2.2078143250119534e-06, "loss": 0.7692, "step": 6835 }, { "epoch": 0.7, "grad_norm": 1.5429072442641156, "learning_rate": 2.206442702677575e-06, "loss": 0.6464, "step": 6836 }, { "epoch": 0.7, "grad_norm": 1.4168246504738315, "learning_rate": 2.20507138590675e-06, "loss": 0.6252, "step": 6837 }, { "epoch": 0.7, "grad_norm": 1.6668702354652563, "learning_rate": 2.2037003748494725e-06, "loss": 0.6991, "step": 6838 }, { "epoch": 0.7, "grad_norm": 1.3841116568967853, "learning_rate": 2.2023296696557077e-06, "loss": 0.5761, "step": 6839 }, { "epoch": 0.7, "grad_norm": 2.05200589899544, "learning_rate": 2.2009592704753846e-06, "loss": 0.4845, "step": 6840 }, { "epoch": 0.7, "grad_norm": 1.6179147754741665, "learning_rate": 2.1995891774583982e-06, "loss": 0.6597, "step": 6841 }, { "epoch": 0.7, "grad_norm": 1.5160803741194253, "learning_rate": 2.1982193907546117e-06, "loss": 0.6856, "step": 6842 }, { "epoch": 0.7, "grad_norm": 1.7248873087119319, "learning_rate": 2.196849910513858e-06, "loss": 0.5903, "step": 6843 }, { "epoch": 0.7, "grad_norm": 3.2144834836916765, "learning_rate": 2.19548073688593e-06, "loss": 0.7785, "step": 6844 }, { "epoch": 0.7, "grad_norm": 1.440572068653336, "learning_rate": 2.1941118700205884e-06, "loss": 0.5645, "step": 6845 }, { "epoch": 0.7, "grad_norm": 1.587641209240114, "learning_rate": 2.1927433100675657e-06, "loss": 0.6606, "step": 6846 }, { "epoch": 0.7, "grad_norm": 1.396850474537762, "learning_rate": 2.1913750571765534e-06, "loss": 0.6126, "step": 6847 }, { "epoch": 0.7, "grad_norm": 1.6088382412060038, "learning_rate": 2.1900071114972167e-06, "loss": 0.7734, "step": 6848 }, { "epoch": 0.7, "grad_norm": 1.438377734979212, "learning_rate": 2.1886394731791814e-06, "loss": 0.5902, "step": 6849 }, { "epoch": 0.7, "grad_norm": 1.7511831640224071, "learning_rate": 2.187272142372041e-06, "loss": 0.7085, "step": 6850 }, { "epoch": 0.7, "grad_norm": 1.8087249707407533, "learning_rate": 2.1859051192253588e-06, "loss": 0.7274, "step": 6851 }, { "epoch": 0.7, "grad_norm": 1.6492994290066605, "learning_rate": 2.184538403888659e-06, "loss": 0.7611, "step": 6852 }, { "epoch": 0.7, "grad_norm": 1.304350510565379, "learning_rate": 2.1831719965114383e-06, "loss": 0.5767, "step": 6853 }, { "epoch": 0.7, "grad_norm": 1.4427960990929458, "learning_rate": 2.1818058972431545e-06, "loss": 0.7035, "step": 6854 }, { "epoch": 0.7, "grad_norm": 1.4820900336448566, "learning_rate": 2.180440106233232e-06, "loss": 0.6388, "step": 6855 }, { "epoch": 0.7, "grad_norm": 1.444903831408709, "learning_rate": 2.179074623631065e-06, "loss": 0.5459, "step": 6856 }, { "epoch": 0.7, "grad_norm": 1.5439769388714093, "learning_rate": 2.1777094495860136e-06, "loss": 0.6528, "step": 6857 }, { "epoch": 0.7, "grad_norm": 1.4045492369653163, "learning_rate": 2.176344584247401e-06, "loss": 0.6028, "step": 6858 }, { "epoch": 0.7, "grad_norm": 1.4751477803285018, "learning_rate": 2.1749800277645167e-06, "loss": 0.6667, "step": 6859 }, { "epoch": 0.7, "grad_norm": 1.491606799825751, "learning_rate": 2.173615780286621e-06, "loss": 0.5849, "step": 6860 }, { "epoch": 0.7, "grad_norm": 1.4882994395471818, "learning_rate": 2.1722518419629334e-06, "loss": 0.6312, "step": 6861 }, { "epoch": 0.7, "grad_norm": 1.214508774302571, "learning_rate": 2.170888212942649e-06, "loss": 0.5749, "step": 6862 }, { "epoch": 0.7, "grad_norm": 1.3434895455896605, "learning_rate": 2.1695248933749198e-06, "loss": 0.6493, "step": 6863 }, { "epoch": 0.7, "grad_norm": 1.4491751490103573, "learning_rate": 2.1681618834088668e-06, "loss": 0.5999, "step": 6864 }, { "epoch": 0.7, "grad_norm": 1.62989164380225, "learning_rate": 2.166799183193582e-06, "loss": 0.562, "step": 6865 }, { "epoch": 0.7, "grad_norm": 1.5816806281158393, "learning_rate": 2.1654367928781158e-06, "loss": 0.6532, "step": 6866 }, { "epoch": 0.7, "grad_norm": 1.337348342363228, "learning_rate": 2.164074712611491e-06, "loss": 0.6166, "step": 6867 }, { "epoch": 0.7, "grad_norm": 1.4957592513821956, "learning_rate": 2.1627129425426946e-06, "loss": 0.5617, "step": 6868 }, { "epoch": 0.7, "grad_norm": 1.5715639678274322, "learning_rate": 2.1613514828206755e-06, "loss": 0.6657, "step": 6869 }, { "epoch": 0.7, "grad_norm": 1.4836796016721407, "learning_rate": 2.1599903335943538e-06, "loss": 0.6016, "step": 6870 }, { "epoch": 0.7, "grad_norm": 1.5374142812655445, "learning_rate": 2.1586294950126173e-06, "loss": 0.6448, "step": 6871 }, { "epoch": 0.7, "grad_norm": 1.4671863802713854, "learning_rate": 2.157268967224314e-06, "loss": 0.6025, "step": 6872 }, { "epoch": 0.7, "grad_norm": 1.5761490496686412, "learning_rate": 2.1559087503782584e-06, "loss": 0.7356, "step": 6873 }, { "epoch": 0.7, "grad_norm": 1.6271684803021853, "learning_rate": 2.154548844623237e-06, "loss": 0.6138, "step": 6874 }, { "epoch": 0.7, "grad_norm": 1.5562716963984549, "learning_rate": 2.1531892501079956e-06, "loss": 0.7611, "step": 6875 }, { "epoch": 0.7, "grad_norm": 1.6461834139785538, "learning_rate": 2.1518299669812503e-06, "loss": 0.6316, "step": 6876 }, { "epoch": 0.7, "grad_norm": 1.5920710216259777, "learning_rate": 2.150470995391682e-06, "loss": 0.6447, "step": 6877 }, { "epoch": 0.7, "grad_norm": 1.379889060462039, "learning_rate": 2.1491123354879336e-06, "loss": 0.6901, "step": 6878 }, { "epoch": 0.7, "grad_norm": 1.2977493199921712, "learning_rate": 2.147753987418622e-06, "loss": 0.6709, "step": 6879 }, { "epoch": 0.7, "grad_norm": 1.5413697597839267, "learning_rate": 2.1463959513323212e-06, "loss": 0.722, "step": 6880 }, { "epoch": 0.7, "grad_norm": 1.3823312580699125, "learning_rate": 2.145038227377579e-06, "loss": 0.6898, "step": 6881 }, { "epoch": 0.7, "grad_norm": 1.4828453231790826, "learning_rate": 2.143680815702904e-06, "loss": 0.5929, "step": 6882 }, { "epoch": 0.7, "grad_norm": 1.6118312861473554, "learning_rate": 2.1423237164567696e-06, "loss": 0.6192, "step": 6883 }, { "epoch": 0.7, "grad_norm": 1.447331608395856, "learning_rate": 2.140966929787619e-06, "loss": 0.6616, "step": 6884 }, { "epoch": 0.7, "grad_norm": 1.5102034181089503, "learning_rate": 2.139610455843862e-06, "loss": 0.7085, "step": 6885 }, { "epoch": 0.7, "grad_norm": 1.4903671815710373, "learning_rate": 2.13825429477387e-06, "loss": 0.5739, "step": 6886 }, { "epoch": 0.7, "grad_norm": 1.597230142017093, "learning_rate": 2.136898446725979e-06, "loss": 0.7115, "step": 6887 }, { "epoch": 0.7, "grad_norm": 1.4604830941853104, "learning_rate": 2.1355429118484988e-06, "loss": 0.6304, "step": 6888 }, { "epoch": 0.7, "grad_norm": 1.5597918235927486, "learning_rate": 2.1341876902896957e-06, "loss": 0.5294, "step": 6889 }, { "epoch": 0.7, "grad_norm": 1.4375377921329375, "learning_rate": 2.1328327821978096e-06, "loss": 0.6388, "step": 6890 }, { "epoch": 0.7, "grad_norm": 1.4401357134713104, "learning_rate": 2.1314781877210398e-06, "loss": 0.6497, "step": 6891 }, { "epoch": 0.7, "grad_norm": 1.3338029930684512, "learning_rate": 2.1301239070075536e-06, "loss": 0.5907, "step": 6892 }, { "epoch": 0.7, "grad_norm": 1.5239168180206928, "learning_rate": 2.128769940205487e-06, "loss": 0.6039, "step": 6893 }, { "epoch": 0.7, "grad_norm": 1.5159953137932038, "learning_rate": 2.127416287462935e-06, "loss": 0.6534, "step": 6894 }, { "epoch": 0.7, "grad_norm": 1.5925051415318097, "learning_rate": 2.1260629489279662e-06, "loss": 0.6433, "step": 6895 }, { "epoch": 0.7, "grad_norm": 1.4805425898131346, "learning_rate": 2.124709924748609e-06, "loss": 0.738, "step": 6896 }, { "epoch": 0.7, "grad_norm": 1.5004077290396005, "learning_rate": 2.123357215072858e-06, "loss": 0.6526, "step": 6897 }, { "epoch": 0.7, "grad_norm": 1.3676829432725475, "learning_rate": 2.1220048200486755e-06, "loss": 0.6986, "step": 6898 }, { "epoch": 0.7, "grad_norm": 1.4142454514874796, "learning_rate": 2.120652739823991e-06, "loss": 0.6195, "step": 6899 }, { "epoch": 0.7, "grad_norm": 1.624023879812534, "learning_rate": 2.119300974546695e-06, "loss": 0.638, "step": 6900 }, { "epoch": 0.7, "grad_norm": 1.4749916394007607, "learning_rate": 2.1179495243646436e-06, "loss": 0.6314, "step": 6901 }, { "epoch": 0.7, "grad_norm": 1.3716927344846273, "learning_rate": 2.1165983894256647e-06, "loss": 0.5913, "step": 6902 }, { "epoch": 0.7, "grad_norm": 1.6109484770358562, "learning_rate": 2.1152475698775437e-06, "loss": 0.747, "step": 6903 }, { "epoch": 0.7, "grad_norm": 1.3879036849152198, "learning_rate": 2.1138970658680386e-06, "loss": 0.6682, "step": 6904 }, { "epoch": 0.71, "grad_norm": 1.436969991791569, "learning_rate": 2.112546877544868e-06, "loss": 0.6463, "step": 6905 }, { "epoch": 0.71, "grad_norm": 1.4496115698401406, "learning_rate": 2.1111970050557155e-06, "loss": 0.6059, "step": 6906 }, { "epoch": 0.71, "grad_norm": 1.4572728085479696, "learning_rate": 2.109847448548237e-06, "loss": 0.6737, "step": 6907 }, { "epoch": 0.71, "grad_norm": 1.4586554741230795, "learning_rate": 2.1084982081700432e-06, "loss": 0.6597, "step": 6908 }, { "epoch": 0.71, "grad_norm": 1.2502958596636993, "learning_rate": 2.107149284068722e-06, "loss": 0.6326, "step": 6909 }, { "epoch": 0.71, "grad_norm": 1.4738593031413425, "learning_rate": 2.1058006763918173e-06, "loss": 0.7, "step": 6910 }, { "epoch": 0.71, "grad_norm": 1.438803193204573, "learning_rate": 2.1044523852868404e-06, "loss": 0.6474, "step": 6911 }, { "epoch": 0.71, "grad_norm": 1.57967004477298, "learning_rate": 2.1031044109012722e-06, "loss": 0.731, "step": 6912 }, { "epoch": 0.71, "grad_norm": 1.6762822018695525, "learning_rate": 2.1017567533825566e-06, "loss": 0.6871, "step": 6913 }, { "epoch": 0.71, "grad_norm": 1.445666841369969, "learning_rate": 2.100409412878102e-06, "loss": 0.5924, "step": 6914 }, { "epoch": 0.71, "grad_norm": 1.8313648041266106, "learning_rate": 2.0990623895352797e-06, "loss": 0.7019, "step": 6915 }, { "epoch": 0.71, "grad_norm": 1.490785406803361, "learning_rate": 2.0977156835014334e-06, "loss": 0.7417, "step": 6916 }, { "epoch": 0.71, "grad_norm": 1.7076925342135962, "learning_rate": 2.096369294923864e-06, "loss": 0.5932, "step": 6917 }, { "epoch": 0.71, "grad_norm": 1.2979263589650214, "learning_rate": 2.095023223949845e-06, "loss": 0.6204, "step": 6918 }, { "epoch": 0.71, "grad_norm": 1.5218734437444503, "learning_rate": 2.09367747072661e-06, "loss": 0.6877, "step": 6919 }, { "epoch": 0.71, "grad_norm": 1.5791198712862127, "learning_rate": 2.092332035401358e-06, "loss": 0.668, "step": 6920 }, { "epoch": 0.71, "grad_norm": 1.4018877224569535, "learning_rate": 2.0909869181212576e-06, "loss": 0.6237, "step": 6921 }, { "epoch": 0.71, "grad_norm": 1.5785427068317168, "learning_rate": 2.0896421190334378e-06, "loss": 0.5794, "step": 6922 }, { "epoch": 0.71, "grad_norm": 1.467476720928993, "learning_rate": 2.088297638284996e-06, "loss": 0.7037, "step": 6923 }, { "epoch": 0.71, "grad_norm": 1.5033585014794784, "learning_rate": 2.0869534760229943e-06, "loss": 0.679, "step": 6924 }, { "epoch": 0.71, "grad_norm": 1.634509568009155, "learning_rate": 2.0856096323944554e-06, "loss": 0.733, "step": 6925 }, { "epoch": 0.71, "grad_norm": 1.677267459231214, "learning_rate": 2.084266107546373e-06, "loss": 0.5503, "step": 6926 }, { "epoch": 0.71, "grad_norm": 1.5352783917855823, "learning_rate": 2.0829229016257074e-06, "loss": 0.637, "step": 6927 }, { "epoch": 0.71, "grad_norm": 1.4666835536182081, "learning_rate": 2.081580014779377e-06, "loss": 0.7213, "step": 6928 }, { "epoch": 0.71, "grad_norm": 1.7380032227481803, "learning_rate": 2.0802374471542676e-06, "loss": 0.7035, "step": 6929 }, { "epoch": 0.71, "grad_norm": 1.6275122085839577, "learning_rate": 2.0788951988972343e-06, "loss": 0.6676, "step": 6930 }, { "epoch": 0.71, "grad_norm": 1.4912467941516983, "learning_rate": 2.077553270155092e-06, "loss": 0.7288, "step": 6931 }, { "epoch": 0.71, "grad_norm": 1.533944065036136, "learning_rate": 2.0762116610746253e-06, "loss": 0.6323, "step": 6932 }, { "epoch": 0.71, "grad_norm": 1.441177979598929, "learning_rate": 2.0748703718025803e-06, "loss": 0.5862, "step": 6933 }, { "epoch": 0.71, "grad_norm": 1.4494044547431166, "learning_rate": 2.0735294024856665e-06, "loss": 0.6522, "step": 6934 }, { "epoch": 0.71, "grad_norm": 1.316518035061543, "learning_rate": 2.0721887532705665e-06, "loss": 0.5929, "step": 6935 }, { "epoch": 0.71, "grad_norm": 1.3481585123202549, "learning_rate": 2.070848424303917e-06, "loss": 0.669, "step": 6936 }, { "epoch": 0.71, "grad_norm": 1.4207586171064073, "learning_rate": 2.0695084157323303e-06, "loss": 0.6584, "step": 6937 }, { "epoch": 0.71, "grad_norm": 1.5157694385869587, "learning_rate": 2.068168727702376e-06, "loss": 0.581, "step": 6938 }, { "epoch": 0.71, "grad_norm": 1.7440030597048903, "learning_rate": 2.06682936036059e-06, "loss": 0.6818, "step": 6939 }, { "epoch": 0.71, "grad_norm": 1.460686205930441, "learning_rate": 2.0654903138534753e-06, "loss": 0.6232, "step": 6940 }, { "epoch": 0.71, "grad_norm": 1.4633033206219392, "learning_rate": 2.064151588327501e-06, "loss": 0.6022, "step": 6941 }, { "epoch": 0.71, "grad_norm": 1.6588576814553189, "learning_rate": 2.0628131839290978e-06, "loss": 0.656, "step": 6942 }, { "epoch": 0.71, "grad_norm": 1.5053407299300992, "learning_rate": 2.0614751008046597e-06, "loss": 0.6729, "step": 6943 }, { "epoch": 0.71, "grad_norm": 1.4474345643497986, "learning_rate": 2.0601373391005526e-06, "loss": 0.6736, "step": 6944 }, { "epoch": 0.71, "grad_norm": 1.468899951692121, "learning_rate": 2.0587998989630984e-06, "loss": 0.5802, "step": 6945 }, { "epoch": 0.71, "grad_norm": 1.570771744790822, "learning_rate": 2.0574627805385934e-06, "loss": 0.6758, "step": 6946 }, { "epoch": 0.71, "grad_norm": 1.3175782613819425, "learning_rate": 2.05612598397329e-06, "loss": 0.6249, "step": 6947 }, { "epoch": 0.71, "grad_norm": 1.5225682440592085, "learning_rate": 2.0547895094134086e-06, "loss": 0.6856, "step": 6948 }, { "epoch": 0.71, "grad_norm": 1.428485798761619, "learning_rate": 2.053453357005138e-06, "loss": 0.7213, "step": 6949 }, { "epoch": 0.71, "grad_norm": 1.3420864208858678, "learning_rate": 2.052117526894625e-06, "loss": 0.6135, "step": 6950 }, { "epoch": 0.71, "grad_norm": 1.4992452963752732, "learning_rate": 2.0507820192279883e-06, "loss": 0.6966, "step": 6951 }, { "epoch": 0.71, "grad_norm": 1.3976540708018146, "learning_rate": 2.0494468341513056e-06, "loss": 0.6889, "step": 6952 }, { "epoch": 0.71, "grad_norm": 1.419189319123629, "learning_rate": 2.0481119718106205e-06, "loss": 0.6411, "step": 6953 }, { "epoch": 0.71, "grad_norm": 1.5654211130045186, "learning_rate": 2.0467774323519442e-06, "loss": 0.5943, "step": 6954 }, { "epoch": 0.71, "grad_norm": 1.5204993941010305, "learning_rate": 2.0454432159212513e-06, "loss": 0.6672, "step": 6955 }, { "epoch": 0.71, "grad_norm": 1.4986115089193626, "learning_rate": 2.0441093226644793e-06, "loss": 0.6896, "step": 6956 }, { "epoch": 0.71, "grad_norm": 2.627980327222461, "learning_rate": 2.042775752727531e-06, "loss": 0.6241, "step": 6957 }, { "epoch": 0.71, "grad_norm": 1.597509273253095, "learning_rate": 2.041442506256273e-06, "loss": 0.7207, "step": 6958 }, { "epoch": 0.71, "grad_norm": 1.5212453088278197, "learning_rate": 2.04010958339654e-06, "loss": 0.6815, "step": 6959 }, { "epoch": 0.71, "grad_norm": 1.5089767982377373, "learning_rate": 2.0387769842941307e-06, "loss": 0.5462, "step": 6960 }, { "epoch": 0.71, "grad_norm": 1.3414490454315755, "learning_rate": 2.037444709094804e-06, "loss": 0.6286, "step": 6961 }, { "epoch": 0.71, "grad_norm": 1.3396192049864073, "learning_rate": 2.0361127579442858e-06, "loss": 0.5836, "step": 6962 }, { "epoch": 0.71, "grad_norm": 1.4969327119666869, "learning_rate": 2.0347811309882697e-06, "loss": 0.5236, "step": 6963 }, { "epoch": 0.71, "grad_norm": 1.6129377066733024, "learning_rate": 2.033449828372408e-06, "loss": 0.7178, "step": 6964 }, { "epoch": 0.71, "grad_norm": 1.5306834635441111, "learning_rate": 2.0321188502423234e-06, "loss": 0.6394, "step": 6965 }, { "epoch": 0.71, "grad_norm": 1.5266463338518785, "learning_rate": 2.0307881967435993e-06, "loss": 0.7449, "step": 6966 }, { "epoch": 0.71, "grad_norm": 2.467000532922343, "learning_rate": 2.0294578680217826e-06, "loss": 0.6536, "step": 6967 }, { "epoch": 0.71, "grad_norm": 1.6693585963406874, "learning_rate": 2.0281278642223885e-06, "loss": 0.6961, "step": 6968 }, { "epoch": 0.71, "grad_norm": 1.3064523331413018, "learning_rate": 2.026798185490896e-06, "loss": 0.5513, "step": 6969 }, { "epoch": 0.71, "grad_norm": 1.5057477409722448, "learning_rate": 2.025468831972747e-06, "loss": 0.5523, "step": 6970 }, { "epoch": 0.71, "grad_norm": 1.4176395769188574, "learning_rate": 2.0241398038133468e-06, "loss": 0.6244, "step": 6971 }, { "epoch": 0.71, "grad_norm": 1.438905759149313, "learning_rate": 2.022811101158066e-06, "loss": 0.4751, "step": 6972 }, { "epoch": 0.71, "grad_norm": 1.464881559152636, "learning_rate": 2.0214827241522407e-06, "loss": 0.6405, "step": 6973 }, { "epoch": 0.71, "grad_norm": 1.4449788922041737, "learning_rate": 2.0201546729411727e-06, "loss": 0.5401, "step": 6974 }, { "epoch": 0.71, "grad_norm": 2.0011751812785348, "learning_rate": 2.0188269476701255e-06, "loss": 0.6778, "step": 6975 }, { "epoch": 0.71, "grad_norm": 1.6517439992382483, "learning_rate": 2.017499548484325e-06, "loss": 0.6219, "step": 6976 }, { "epoch": 0.71, "grad_norm": 1.4247751104973954, "learning_rate": 2.016172475528968e-06, "loss": 0.6919, "step": 6977 }, { "epoch": 0.71, "grad_norm": 1.3470944340473785, "learning_rate": 2.0148457289492084e-06, "loss": 0.6387, "step": 6978 }, { "epoch": 0.71, "grad_norm": 1.5613758700480278, "learning_rate": 2.013519308890171e-06, "loss": 0.6736, "step": 6979 }, { "epoch": 0.71, "grad_norm": 1.705149999599108, "learning_rate": 2.01219321549694e-06, "loss": 0.651, "step": 6980 }, { "epoch": 0.71, "grad_norm": 1.46644247766161, "learning_rate": 2.0108674489145637e-06, "loss": 0.6034, "step": 6981 }, { "epoch": 0.71, "grad_norm": 1.5499459268598836, "learning_rate": 2.0095420092880583e-06, "loss": 0.678, "step": 6982 }, { "epoch": 0.71, "grad_norm": 1.6144289116791561, "learning_rate": 2.0082168967624046e-06, "loss": 0.6135, "step": 6983 }, { "epoch": 0.71, "grad_norm": 1.3584365492745363, "learning_rate": 2.0068921114825425e-06, "loss": 0.6378, "step": 6984 }, { "epoch": 0.71, "grad_norm": 1.6840628943450087, "learning_rate": 2.0055676535933807e-06, "loss": 0.5384, "step": 6985 }, { "epoch": 0.71, "grad_norm": 1.4613310867540958, "learning_rate": 2.004243523239787e-06, "loss": 0.5428, "step": 6986 }, { "epoch": 0.71, "grad_norm": 1.3422401453658783, "learning_rate": 2.002919720566599e-06, "loss": 0.5573, "step": 6987 }, { "epoch": 0.71, "grad_norm": 1.3286427025103777, "learning_rate": 2.0015962457186184e-06, "loss": 0.6459, "step": 6988 }, { "epoch": 0.71, "grad_norm": 1.5821499013924456, "learning_rate": 2.000273098840607e-06, "loss": 0.6504, "step": 6989 }, { "epoch": 0.71, "grad_norm": 1.2899898190935035, "learning_rate": 1.9989502800772912e-06, "loss": 0.6814, "step": 6990 }, { "epoch": 0.71, "grad_norm": 1.555385282290105, "learning_rate": 1.9976277895733664e-06, "loss": 0.7313, "step": 6991 }, { "epoch": 0.71, "grad_norm": 1.403354169541362, "learning_rate": 1.9963056274734843e-06, "loss": 0.6901, "step": 6992 }, { "epoch": 0.71, "grad_norm": 1.424596592698739, "learning_rate": 1.9949837939222693e-06, "loss": 0.6644, "step": 6993 }, { "epoch": 0.71, "grad_norm": 1.5030168128051562, "learning_rate": 1.9936622890643037e-06, "loss": 0.7485, "step": 6994 }, { "epoch": 0.71, "grad_norm": 1.3710265885040966, "learning_rate": 1.9923411130441333e-06, "loss": 0.6467, "step": 6995 }, { "epoch": 0.71, "grad_norm": 1.4112808665298944, "learning_rate": 1.991020266006273e-06, "loss": 0.6405, "step": 6996 }, { "epoch": 0.71, "grad_norm": 1.6308257731448204, "learning_rate": 1.9896997480952003e-06, "loss": 0.6302, "step": 6997 }, { "epoch": 0.71, "grad_norm": 1.6381446463189788, "learning_rate": 1.988379559455354e-06, "loss": 0.6676, "step": 6998 }, { "epoch": 0.71, "grad_norm": 1.490888557333161, "learning_rate": 1.9870597002311383e-06, "loss": 0.6508, "step": 6999 }, { "epoch": 0.71, "grad_norm": 1.5372835771427735, "learning_rate": 1.985740170566919e-06, "loss": 0.6416, "step": 7000 }, { "epoch": 0.71, "grad_norm": 1.5652927171292594, "learning_rate": 1.9844209706070305e-06, "loss": 0.6334, "step": 7001 }, { "epoch": 0.72, "grad_norm": 1.4690035946590896, "learning_rate": 1.983102100495771e-06, "loss": 0.6368, "step": 7002 }, { "epoch": 0.72, "grad_norm": 1.6128812121401024, "learning_rate": 1.981783560377399e-06, "loss": 0.6538, "step": 7003 }, { "epoch": 0.72, "grad_norm": 1.302326520001272, "learning_rate": 1.980465350396136e-06, "loss": 0.6151, "step": 7004 }, { "epoch": 0.72, "grad_norm": 1.5974706707681943, "learning_rate": 1.979147470696173e-06, "loss": 0.6211, "step": 7005 }, { "epoch": 0.72, "grad_norm": 1.4711192770927701, "learning_rate": 1.9778299214216585e-06, "loss": 0.5718, "step": 7006 }, { "epoch": 0.72, "grad_norm": 1.5029200594747765, "learning_rate": 1.976512702716712e-06, "loss": 0.6158, "step": 7007 }, { "epoch": 0.72, "grad_norm": 1.9103893613581022, "learning_rate": 1.975195814725411e-06, "loss": 0.5837, "step": 7008 }, { "epoch": 0.72, "grad_norm": 1.4625838540446583, "learning_rate": 1.973879257591797e-06, "loss": 0.6642, "step": 7009 }, { "epoch": 0.72, "grad_norm": 1.402221972716386, "learning_rate": 1.972563031459878e-06, "loss": 0.5818, "step": 7010 }, { "epoch": 0.72, "grad_norm": 1.687680454095671, "learning_rate": 1.9712471364736275e-06, "loss": 0.7308, "step": 7011 }, { "epoch": 0.72, "grad_norm": 1.6068915332543718, "learning_rate": 1.969931572776978e-06, "loss": 0.5945, "step": 7012 }, { "epoch": 0.72, "grad_norm": 2.0452854807298775, "learning_rate": 1.9686163405138276e-06, "loss": 0.639, "step": 7013 }, { "epoch": 0.72, "grad_norm": 1.3940877454717353, "learning_rate": 1.967301439828037e-06, "loss": 0.6429, "step": 7014 }, { "epoch": 0.72, "grad_norm": 1.4406165855393676, "learning_rate": 1.965986870863434e-06, "loss": 0.6839, "step": 7015 }, { "epoch": 0.72, "grad_norm": 1.562199426155801, "learning_rate": 1.9646726337638096e-06, "loss": 0.6959, "step": 7016 }, { "epoch": 0.72, "grad_norm": 1.6474496494543527, "learning_rate": 1.963358728672915e-06, "loss": 0.6472, "step": 7017 }, { "epoch": 0.72, "grad_norm": 1.365208336908609, "learning_rate": 1.962045155734466e-06, "loss": 0.6187, "step": 7018 }, { "epoch": 0.72, "grad_norm": 1.5023559937455127, "learning_rate": 1.960731915092146e-06, "loss": 0.6147, "step": 7019 }, { "epoch": 0.72, "grad_norm": 1.4289183085308548, "learning_rate": 1.9594190068895967e-06, "loss": 0.5618, "step": 7020 }, { "epoch": 0.72, "grad_norm": 1.5086744912337566, "learning_rate": 1.958106431270429e-06, "loss": 0.6162, "step": 7021 }, { "epoch": 0.72, "grad_norm": 1.6678599830398453, "learning_rate": 1.9567941883782122e-06, "loss": 0.6842, "step": 7022 }, { "epoch": 0.72, "grad_norm": 1.341217985993772, "learning_rate": 1.95548227835648e-06, "loss": 0.5302, "step": 7023 }, { "epoch": 0.72, "grad_norm": 1.4328782792277095, "learning_rate": 1.954170701348734e-06, "loss": 0.5912, "step": 7024 }, { "epoch": 0.72, "grad_norm": 1.4352808502694585, "learning_rate": 1.9528594574984363e-06, "loss": 0.5951, "step": 7025 }, { "epoch": 0.72, "grad_norm": 1.8144281390661365, "learning_rate": 1.951548546949012e-06, "loss": 0.6441, "step": 7026 }, { "epoch": 0.72, "grad_norm": 1.495753663902644, "learning_rate": 1.9502379698438506e-06, "loss": 0.6364, "step": 7027 }, { "epoch": 0.72, "grad_norm": 1.374396639950476, "learning_rate": 1.948927726326303e-06, "loss": 0.6245, "step": 7028 }, { "epoch": 0.72, "grad_norm": 1.5782097701359927, "learning_rate": 1.9476178165396875e-06, "loss": 0.5676, "step": 7029 }, { "epoch": 0.72, "grad_norm": 1.5669807046047977, "learning_rate": 1.946308240627286e-06, "loss": 0.6664, "step": 7030 }, { "epoch": 0.72, "grad_norm": 1.407042840176263, "learning_rate": 1.9449989987323396e-06, "loss": 0.5494, "step": 7031 }, { "epoch": 0.72, "grad_norm": 1.4856235648800606, "learning_rate": 1.943690090998054e-06, "loss": 0.6745, "step": 7032 }, { "epoch": 0.72, "grad_norm": 1.4682781147751574, "learning_rate": 1.9423815175676027e-06, "loss": 0.624, "step": 7033 }, { "epoch": 0.72, "grad_norm": 1.5336054308042548, "learning_rate": 1.941073278584116e-06, "loss": 0.667, "step": 7034 }, { "epoch": 0.72, "grad_norm": 1.4847746838294578, "learning_rate": 1.9397653741906947e-06, "loss": 0.639, "step": 7035 }, { "epoch": 0.72, "grad_norm": 1.3443273985966757, "learning_rate": 1.938457804530397e-06, "loss": 0.657, "step": 7036 }, { "epoch": 0.72, "grad_norm": 1.438982716427876, "learning_rate": 1.937150569746246e-06, "loss": 0.6928, "step": 7037 }, { "epoch": 0.72, "grad_norm": 1.5003539902818204, "learning_rate": 1.9358436699812306e-06, "loss": 0.6873, "step": 7038 }, { "epoch": 0.72, "grad_norm": 1.535226698033308, "learning_rate": 1.9345371053783024e-06, "loss": 0.6584, "step": 7039 }, { "epoch": 0.72, "grad_norm": 1.4386736173737797, "learning_rate": 1.9332308760803747e-06, "loss": 0.5651, "step": 7040 }, { "epoch": 0.72, "grad_norm": 1.7019841729341738, "learning_rate": 1.9319249822303242e-06, "loss": 0.659, "step": 7041 }, { "epoch": 0.72, "grad_norm": 1.4445667041414973, "learning_rate": 1.9306194239709908e-06, "loss": 0.5398, "step": 7042 }, { "epoch": 0.72, "grad_norm": 1.2727169769010356, "learning_rate": 1.929314201445179e-06, "loss": 0.5843, "step": 7043 }, { "epoch": 0.72, "grad_norm": 1.5133679269286917, "learning_rate": 1.928009314795658e-06, "loss": 0.5845, "step": 7044 }, { "epoch": 0.72, "grad_norm": 1.502609940432546, "learning_rate": 1.926704764165157e-06, "loss": 0.6658, "step": 7045 }, { "epoch": 0.72, "grad_norm": 1.5366969257359533, "learning_rate": 1.925400549696368e-06, "loss": 0.5499, "step": 7046 }, { "epoch": 0.72, "grad_norm": 1.6472787259391617, "learning_rate": 1.924096671531951e-06, "loss": 0.6313, "step": 7047 }, { "epoch": 0.72, "grad_norm": 1.4378999208449166, "learning_rate": 1.922793129814523e-06, "loss": 0.6384, "step": 7048 }, { "epoch": 0.72, "grad_norm": 1.5711676396714782, "learning_rate": 1.921489924686671e-06, "loss": 0.6697, "step": 7049 }, { "epoch": 0.72, "grad_norm": 1.4953679803903357, "learning_rate": 1.920187056290939e-06, "loss": 0.6336, "step": 7050 }, { "epoch": 0.72, "grad_norm": 1.412869983815941, "learning_rate": 1.9188845247698358e-06, "loss": 0.6161, "step": 7051 }, { "epoch": 0.72, "grad_norm": 1.5682862416814993, "learning_rate": 1.917582330265836e-06, "loss": 0.6255, "step": 7052 }, { "epoch": 0.72, "grad_norm": 1.4450971016930416, "learning_rate": 1.9162804729213775e-06, "loss": 0.6176, "step": 7053 }, { "epoch": 0.72, "grad_norm": 1.447270748777921, "learning_rate": 1.9149789528788566e-06, "loss": 0.696, "step": 7054 }, { "epoch": 0.72, "grad_norm": 1.3664205817118342, "learning_rate": 1.913677770280637e-06, "loss": 0.6902, "step": 7055 }, { "epoch": 0.72, "grad_norm": 1.3252094437419457, "learning_rate": 1.912376925269041e-06, "loss": 0.5776, "step": 7056 }, { "epoch": 0.72, "grad_norm": 1.4578195082452394, "learning_rate": 1.91107641798636e-06, "loss": 0.6613, "step": 7057 }, { "epoch": 0.72, "grad_norm": 1.31733321005593, "learning_rate": 1.9097762485748463e-06, "loss": 0.4911, "step": 7058 }, { "epoch": 0.72, "grad_norm": 1.524792503417221, "learning_rate": 1.908476417176713e-06, "loss": 0.6358, "step": 7059 }, { "epoch": 0.72, "grad_norm": 1.5419288973114036, "learning_rate": 1.9071769239341354e-06, "loss": 0.669, "step": 7060 }, { "epoch": 0.72, "grad_norm": 1.4681557556729001, "learning_rate": 1.9058777689892583e-06, "loss": 0.6289, "step": 7061 }, { "epoch": 0.72, "grad_norm": 1.6560571398863555, "learning_rate": 1.9045789524841819e-06, "loss": 0.708, "step": 7062 }, { "epoch": 0.72, "grad_norm": 1.7766784358191992, "learning_rate": 1.903280474560975e-06, "loss": 0.6293, "step": 7063 }, { "epoch": 0.72, "grad_norm": 1.312618104028948, "learning_rate": 1.9019823353616663e-06, "loss": 0.6082, "step": 7064 }, { "epoch": 0.72, "grad_norm": 1.2498283388733453, "learning_rate": 1.9006845350282466e-06, "loss": 0.5726, "step": 7065 }, { "epoch": 0.72, "grad_norm": 1.286158696326655, "learning_rate": 1.899387073702672e-06, "loss": 0.5569, "step": 7066 }, { "epoch": 0.72, "grad_norm": 1.6897025850760807, "learning_rate": 1.8980899515268642e-06, "loss": 0.7029, "step": 7067 }, { "epoch": 0.72, "grad_norm": 1.3473619949008642, "learning_rate": 1.896793168642701e-06, "loss": 0.5783, "step": 7068 }, { "epoch": 0.72, "grad_norm": 1.5423911395435168, "learning_rate": 1.8954967251920276e-06, "loss": 0.7124, "step": 7069 }, { "epoch": 0.72, "grad_norm": 1.6710019600948778, "learning_rate": 1.8942006213166487e-06, "loss": 0.7075, "step": 7070 }, { "epoch": 0.72, "grad_norm": 1.7205101967487664, "learning_rate": 1.892904857158336e-06, "loss": 0.6893, "step": 7071 }, { "epoch": 0.72, "grad_norm": 1.3069918213549958, "learning_rate": 1.8916094328588236e-06, "loss": 0.535, "step": 7072 }, { "epoch": 0.72, "grad_norm": 1.5475951227388538, "learning_rate": 1.8903143485598059e-06, "loss": 0.7192, "step": 7073 }, { "epoch": 0.72, "grad_norm": 1.8312257472248499, "learning_rate": 1.8890196044029384e-06, "loss": 0.6384, "step": 7074 }, { "epoch": 0.72, "grad_norm": 1.6080441067831273, "learning_rate": 1.887725200529847e-06, "loss": 0.5872, "step": 7075 }, { "epoch": 0.72, "grad_norm": 1.5212649852857563, "learning_rate": 1.8864311370821114e-06, "loss": 0.6497, "step": 7076 }, { "epoch": 0.72, "grad_norm": 1.4134062286721267, "learning_rate": 1.8851374142012812e-06, "loss": 0.6608, "step": 7077 }, { "epoch": 0.72, "grad_norm": 1.701590148874178, "learning_rate": 1.8838440320288653e-06, "loss": 0.7581, "step": 7078 }, { "epoch": 0.72, "grad_norm": 1.5591084928889876, "learning_rate": 1.8825509907063328e-06, "loss": 0.6851, "step": 7079 }, { "epoch": 0.72, "grad_norm": 1.453904561692101, "learning_rate": 1.8812582903751225e-06, "loss": 0.4594, "step": 7080 }, { "epoch": 0.72, "grad_norm": 1.6556138332899522, "learning_rate": 1.8799659311766288e-06, "loss": 0.7192, "step": 7081 }, { "epoch": 0.72, "grad_norm": 1.7102406137186656, "learning_rate": 1.8786739132522153e-06, "loss": 0.5961, "step": 7082 }, { "epoch": 0.72, "grad_norm": 1.3803842719649806, "learning_rate": 1.8773822367432025e-06, "loss": 0.6693, "step": 7083 }, { "epoch": 0.72, "grad_norm": 1.5285524048893164, "learning_rate": 1.8760909017908745e-06, "loss": 0.65, "step": 7084 }, { "epoch": 0.72, "grad_norm": 1.4884704014997483, "learning_rate": 1.874799908536482e-06, "loss": 0.6153, "step": 7085 }, { "epoch": 0.72, "grad_norm": 1.429722785224867, "learning_rate": 1.8735092571212366e-06, "loss": 0.7307, "step": 7086 }, { "epoch": 0.72, "grad_norm": 1.5554305494594, "learning_rate": 1.87221894768631e-06, "loss": 0.6401, "step": 7087 }, { "epoch": 0.72, "grad_norm": 1.5066406221284547, "learning_rate": 1.8709289803728375e-06, "loss": 0.6397, "step": 7088 }, { "epoch": 0.72, "grad_norm": 1.4422253721539537, "learning_rate": 1.86963935532192e-06, "loss": 0.5865, "step": 7089 }, { "epoch": 0.72, "grad_norm": 1.4171712557957123, "learning_rate": 1.8683500726746157e-06, "loss": 0.6215, "step": 7090 }, { "epoch": 0.72, "grad_norm": 1.3632015214721198, "learning_rate": 1.8670611325719513e-06, "loss": 0.6436, "step": 7091 }, { "epoch": 0.72, "grad_norm": 1.4240301338283108, "learning_rate": 1.8657725351549122e-06, "loss": 0.5693, "step": 7092 }, { "epoch": 0.72, "grad_norm": 1.7075573495174285, "learning_rate": 1.8644842805644448e-06, "loss": 0.6303, "step": 7093 }, { "epoch": 0.72, "grad_norm": 1.7426166841776758, "learning_rate": 1.8631963689414633e-06, "loss": 0.703, "step": 7094 }, { "epoch": 0.72, "grad_norm": 1.4219955582773995, "learning_rate": 1.8619088004268392e-06, "loss": 0.6881, "step": 7095 }, { "epoch": 0.72, "grad_norm": 1.4097850769766567, "learning_rate": 1.8606215751614115e-06, "loss": 0.738, "step": 7096 }, { "epoch": 0.72, "grad_norm": 1.3791417994323525, "learning_rate": 1.8593346932859775e-06, "loss": 0.6346, "step": 7097 }, { "epoch": 0.72, "grad_norm": 1.4726156485982262, "learning_rate": 1.8580481549412955e-06, "loss": 0.57, "step": 7098 }, { "epoch": 0.72, "grad_norm": 1.473040001700311, "learning_rate": 1.8567619602680919e-06, "loss": 0.6577, "step": 7099 }, { "epoch": 0.73, "grad_norm": 1.6555801463008382, "learning_rate": 1.8554761094070539e-06, "loss": 0.6667, "step": 7100 }, { "epoch": 0.73, "grad_norm": 1.6793673344504407, "learning_rate": 1.8541906024988283e-06, "loss": 0.6523, "step": 7101 }, { "epoch": 0.73, "grad_norm": 1.6594415135770313, "learning_rate": 1.8529054396840234e-06, "loss": 0.6473, "step": 7102 }, { "epoch": 0.73, "grad_norm": 1.5125414537338913, "learning_rate": 1.8516206211032172e-06, "loss": 0.7376, "step": 7103 }, { "epoch": 0.73, "grad_norm": 2.2110981179136746, "learning_rate": 1.85033614689694e-06, "loss": 0.6862, "step": 7104 }, { "epoch": 0.73, "grad_norm": 1.3858523811028773, "learning_rate": 1.8490520172056942e-06, "loss": 0.5911, "step": 7105 }, { "epoch": 0.73, "grad_norm": 1.5666397224115949, "learning_rate": 1.847768232169937e-06, "loss": 0.602, "step": 7106 }, { "epoch": 0.73, "grad_norm": 1.496017746791471, "learning_rate": 1.8464847919300898e-06, "loss": 0.6769, "step": 7107 }, { "epoch": 0.73, "grad_norm": 1.4199330315267558, "learning_rate": 1.8452016966265413e-06, "loss": 0.6928, "step": 7108 }, { "epoch": 0.73, "grad_norm": 1.3452812852266207, "learning_rate": 1.8439189463996337e-06, "loss": 0.6437, "step": 7109 }, { "epoch": 0.73, "grad_norm": 1.3881207024705482, "learning_rate": 1.84263654138968e-06, "loss": 0.6908, "step": 7110 }, { "epoch": 0.73, "grad_norm": 1.624160952286602, "learning_rate": 1.8413544817369504e-06, "loss": 0.6115, "step": 7111 }, { "epoch": 0.73, "grad_norm": 1.6896877978355438, "learning_rate": 1.8400727675816765e-06, "loss": 0.7447, "step": 7112 }, { "epoch": 0.73, "grad_norm": 1.4575277863807414, "learning_rate": 1.838791399064056e-06, "loss": 0.6307, "step": 7113 }, { "epoch": 0.73, "grad_norm": 1.698415876558895, "learning_rate": 1.837510376324248e-06, "loss": 0.7, "step": 7114 }, { "epoch": 0.73, "grad_norm": 1.9884764791243825, "learning_rate": 1.836229699502372e-06, "loss": 0.5676, "step": 7115 }, { "epoch": 0.73, "grad_norm": 1.506599114094634, "learning_rate": 1.8349493687385078e-06, "loss": 0.6545, "step": 7116 }, { "epoch": 0.73, "grad_norm": 1.643204241685302, "learning_rate": 1.8336693841727037e-06, "loss": 0.6999, "step": 7117 }, { "epoch": 0.73, "grad_norm": 1.4757879889638967, "learning_rate": 1.8323897459449635e-06, "loss": 0.6553, "step": 7118 }, { "epoch": 0.73, "grad_norm": 1.560206036893013, "learning_rate": 1.8311104541952567e-06, "loss": 0.6048, "step": 7119 }, { "epoch": 0.73, "grad_norm": 1.49199028393497, "learning_rate": 1.8298315090635182e-06, "loss": 0.6792, "step": 7120 }, { "epoch": 0.73, "grad_norm": 1.42507751998251, "learning_rate": 1.8285529106896337e-06, "loss": 0.6856, "step": 7121 }, { "epoch": 0.73, "grad_norm": 1.3075334746215184, "learning_rate": 1.8272746592134639e-06, "loss": 0.5958, "step": 7122 }, { "epoch": 0.73, "grad_norm": 1.5411578766501322, "learning_rate": 1.8259967547748226e-06, "loss": 0.6811, "step": 7123 }, { "epoch": 0.73, "grad_norm": 1.466026690330997, "learning_rate": 1.8247191975134914e-06, "loss": 0.6651, "step": 7124 }, { "epoch": 0.73, "grad_norm": 1.5133274457197332, "learning_rate": 1.8234419875692105e-06, "loss": 0.6552, "step": 7125 }, { "epoch": 0.73, "grad_norm": 1.5965127055596922, "learning_rate": 1.8221651250816812e-06, "loss": 0.7337, "step": 7126 }, { "epoch": 0.73, "grad_norm": 1.5416918042234224, "learning_rate": 1.8208886101905703e-06, "loss": 0.6798, "step": 7127 }, { "epoch": 0.73, "grad_norm": 1.6544621022636408, "learning_rate": 1.8196124430355066e-06, "loss": 0.6642, "step": 7128 }, { "epoch": 0.73, "grad_norm": 1.4017730516535154, "learning_rate": 1.818336623756078e-06, "loss": 0.6157, "step": 7129 }, { "epoch": 0.73, "grad_norm": 1.4399804801744658, "learning_rate": 1.8170611524918331e-06, "loss": 0.6278, "step": 7130 }, { "epoch": 0.73, "grad_norm": 1.445180129634464, "learning_rate": 1.8157860293822887e-06, "loss": 0.6087, "step": 7131 }, { "epoch": 0.73, "grad_norm": 1.6275123816504276, "learning_rate": 1.8145112545669163e-06, "loss": 0.6811, "step": 7132 }, { "epoch": 0.73, "grad_norm": 1.468254168407696, "learning_rate": 1.8132368281851549e-06, "loss": 0.5763, "step": 7133 }, { "epoch": 0.73, "grad_norm": 1.7668567191253752, "learning_rate": 1.8119627503764053e-06, "loss": 0.6165, "step": 7134 }, { "epoch": 0.73, "grad_norm": 1.4242754846582046, "learning_rate": 1.810689021280022e-06, "loss": 0.5666, "step": 7135 }, { "epoch": 0.73, "grad_norm": 1.37022958351938, "learning_rate": 1.809415641035333e-06, "loss": 0.6168, "step": 7136 }, { "epoch": 0.73, "grad_norm": 1.3848760841345849, "learning_rate": 1.8081426097816191e-06, "loss": 0.616, "step": 7137 }, { "epoch": 0.73, "grad_norm": 1.484023574895057, "learning_rate": 1.8068699276581286e-06, "loss": 0.5953, "step": 7138 }, { "epoch": 0.73, "grad_norm": 1.4903503849667026, "learning_rate": 1.8055975948040694e-06, "loss": 0.6091, "step": 7139 }, { "epoch": 0.73, "grad_norm": 1.6900724692383622, "learning_rate": 1.8043256113586078e-06, "loss": 0.7754, "step": 7140 }, { "epoch": 0.73, "grad_norm": 1.5883907172640648, "learning_rate": 1.8030539774608784e-06, "loss": 0.6237, "step": 7141 }, { "epoch": 0.73, "grad_norm": 1.4750129022781966, "learning_rate": 1.8017826932499754e-06, "loss": 0.7726, "step": 7142 }, { "epoch": 0.73, "grad_norm": 1.4966514340515884, "learning_rate": 1.8005117588649524e-06, "loss": 0.7286, "step": 7143 }, { "epoch": 0.73, "grad_norm": 1.4696143200315692, "learning_rate": 1.7992411744448242e-06, "loss": 0.6143, "step": 7144 }, { "epoch": 0.73, "grad_norm": 1.35283797061391, "learning_rate": 1.7979709401285728e-06, "loss": 0.5711, "step": 7145 }, { "epoch": 0.73, "grad_norm": 1.5615921177833167, "learning_rate": 1.7967010560551352e-06, "loss": 0.6552, "step": 7146 }, { "epoch": 0.73, "grad_norm": 1.4347147008353782, "learning_rate": 1.7954315223634144e-06, "loss": 0.7272, "step": 7147 }, { "epoch": 0.73, "grad_norm": 1.4745861587995088, "learning_rate": 1.7941623391922775e-06, "loss": 0.6804, "step": 7148 }, { "epoch": 0.73, "grad_norm": 1.4054235675264435, "learning_rate": 1.7928935066805437e-06, "loss": 0.5193, "step": 7149 }, { "epoch": 0.73, "grad_norm": 1.6813535656502046, "learning_rate": 1.7916250249670036e-06, "loss": 0.8046, "step": 7150 }, { "epoch": 0.73, "grad_norm": 1.5381650408990708, "learning_rate": 1.7903568941904032e-06, "loss": 0.6838, "step": 7151 }, { "epoch": 0.73, "grad_norm": 1.5901970775122551, "learning_rate": 1.789089114489454e-06, "loss": 0.7388, "step": 7152 }, { "epoch": 0.73, "grad_norm": 5.167775889650768, "learning_rate": 1.7878216860028307e-06, "loss": 0.6367, "step": 7153 }, { "epoch": 0.73, "grad_norm": 1.3709335556250433, "learning_rate": 1.786554608869161e-06, "loss": 0.5348, "step": 7154 }, { "epoch": 0.73, "grad_norm": 1.7759960749993844, "learning_rate": 1.7852878832270415e-06, "loss": 0.5362, "step": 7155 }, { "epoch": 0.73, "grad_norm": 1.4588948007923148, "learning_rate": 1.784021509215031e-06, "loss": 0.6861, "step": 7156 }, { "epoch": 0.73, "grad_norm": 1.3593419843424617, "learning_rate": 1.7827554869716463e-06, "loss": 0.65, "step": 7157 }, { "epoch": 0.73, "grad_norm": 1.3970502304469798, "learning_rate": 1.7814898166353644e-06, "loss": 0.6627, "step": 7158 }, { "epoch": 0.73, "grad_norm": 1.6260828679642085, "learning_rate": 1.7802244983446294e-06, "loss": 0.6798, "step": 7159 }, { "epoch": 0.73, "grad_norm": 1.812201314626979, "learning_rate": 1.7789595322378411e-06, "loss": 0.731, "step": 7160 }, { "epoch": 0.73, "grad_norm": 1.4396783088124614, "learning_rate": 1.7776949184533648e-06, "loss": 0.6056, "step": 7161 }, { "epoch": 0.73, "grad_norm": 1.3296376290806498, "learning_rate": 1.7764306571295297e-06, "loss": 0.6186, "step": 7162 }, { "epoch": 0.73, "grad_norm": 1.6969542841799607, "learning_rate": 1.7751667484046148e-06, "loss": 0.7633, "step": 7163 }, { "epoch": 0.73, "grad_norm": 1.5222926006879256, "learning_rate": 1.7739031924168743e-06, "loss": 0.6336, "step": 7164 }, { "epoch": 0.73, "grad_norm": 1.527524028188327, "learning_rate": 1.7726399893045144e-06, "loss": 0.6944, "step": 7165 }, { "epoch": 0.73, "grad_norm": 1.525862241114473, "learning_rate": 1.7713771392057077e-06, "loss": 0.6019, "step": 7166 }, { "epoch": 0.73, "grad_norm": 1.4125907732749172, "learning_rate": 1.7701146422585902e-06, "loss": 0.5489, "step": 7167 }, { "epoch": 0.73, "grad_norm": 1.6420139016455837, "learning_rate": 1.7688524986012483e-06, "loss": 0.6967, "step": 7168 }, { "epoch": 0.73, "grad_norm": 1.445353344263539, "learning_rate": 1.7675907083717414e-06, "loss": 0.5797, "step": 7169 }, { "epoch": 0.73, "grad_norm": 1.5570741794582406, "learning_rate": 1.7663292717080877e-06, "loss": 0.6189, "step": 7170 }, { "epoch": 0.73, "grad_norm": 1.4414291012919718, "learning_rate": 1.7650681887482628e-06, "loss": 0.4974, "step": 7171 }, { "epoch": 0.73, "grad_norm": 1.5288604627168414, "learning_rate": 1.7638074596302052e-06, "loss": 0.6901, "step": 7172 }, { "epoch": 0.73, "grad_norm": 1.488547116411061, "learning_rate": 1.7625470844918174e-06, "loss": 0.5824, "step": 7173 }, { "epoch": 0.73, "grad_norm": 1.372215850932591, "learning_rate": 1.761287063470959e-06, "loss": 0.5849, "step": 7174 }, { "epoch": 0.73, "grad_norm": 1.6238887113912728, "learning_rate": 1.7600273967054537e-06, "loss": 0.7025, "step": 7175 }, { "epoch": 0.73, "grad_norm": 1.4571564070273977, "learning_rate": 1.7587680843330896e-06, "loss": 0.5984, "step": 7176 }, { "epoch": 0.73, "grad_norm": 1.6639822302712148, "learning_rate": 1.7575091264916056e-06, "loss": 0.6967, "step": 7177 }, { "epoch": 0.73, "grad_norm": 1.4100596460964177, "learning_rate": 1.7562505233187133e-06, "loss": 0.6558, "step": 7178 }, { "epoch": 0.73, "grad_norm": 1.4336310655517701, "learning_rate": 1.754992274952077e-06, "loss": 0.6024, "step": 7179 }, { "epoch": 0.73, "grad_norm": 1.5383034661022081, "learning_rate": 1.7537343815293284e-06, "loss": 0.7426, "step": 7180 }, { "epoch": 0.73, "grad_norm": 1.5975867175261336, "learning_rate": 1.75247684318806e-06, "loss": 0.6422, "step": 7181 }, { "epoch": 0.73, "grad_norm": 1.327441784598497, "learning_rate": 1.7512196600658177e-06, "loss": 0.5345, "step": 7182 }, { "epoch": 0.73, "grad_norm": 1.312575013905032, "learning_rate": 1.7499628323001167e-06, "loss": 0.6041, "step": 7183 }, { "epoch": 0.73, "grad_norm": 1.5367417899790718, "learning_rate": 1.748706360028432e-06, "loss": 0.6325, "step": 7184 }, { "epoch": 0.73, "grad_norm": 1.4832992663166404, "learning_rate": 1.7474502433881963e-06, "loss": 0.7042, "step": 7185 }, { "epoch": 0.73, "grad_norm": 1.5161036067738451, "learning_rate": 1.746194482516808e-06, "loss": 0.6777, "step": 7186 }, { "epoch": 0.73, "grad_norm": 1.7456185521851098, "learning_rate": 1.7449390775516228e-06, "loss": 0.6741, "step": 7187 }, { "epoch": 0.73, "grad_norm": 1.5739979701576543, "learning_rate": 1.7436840286299573e-06, "loss": 0.6968, "step": 7188 }, { "epoch": 0.73, "grad_norm": 1.4064510961122185, "learning_rate": 1.742429335889092e-06, "loss": 0.6067, "step": 7189 }, { "epoch": 0.73, "grad_norm": 1.7741457435042567, "learning_rate": 1.7411749994662709e-06, "loss": 0.7088, "step": 7190 }, { "epoch": 0.73, "grad_norm": 1.4396089749468286, "learning_rate": 1.7399210194986877e-06, "loss": 0.6293, "step": 7191 }, { "epoch": 0.73, "grad_norm": 1.4743179647867342, "learning_rate": 1.7386673961235102e-06, "loss": 0.6108, "step": 7192 }, { "epoch": 0.73, "grad_norm": 1.3262174261019728, "learning_rate": 1.7374141294778585e-06, "loss": 0.6327, "step": 7193 }, { "epoch": 0.73, "grad_norm": 1.4378007772793107, "learning_rate": 1.7361612196988177e-06, "loss": 0.6469, "step": 7194 }, { "epoch": 0.73, "grad_norm": 1.4307404136974355, "learning_rate": 1.7349086669234372e-06, "loss": 0.6151, "step": 7195 }, { "epoch": 0.73, "grad_norm": 1.4724304255921867, "learning_rate": 1.733656471288716e-06, "loss": 0.6461, "step": 7196 }, { "epoch": 0.73, "grad_norm": 1.4054875204177777, "learning_rate": 1.7324046329316253e-06, "loss": 0.6038, "step": 7197 }, { "epoch": 0.74, "grad_norm": 1.4308749584469902, "learning_rate": 1.7311531519890933e-06, "loss": 0.6258, "step": 7198 }, { "epoch": 0.74, "grad_norm": 1.6064819101598533, "learning_rate": 1.7299020285980066e-06, "loss": 0.7939, "step": 7199 }, { "epoch": 0.74, "grad_norm": 1.5736793297589184, "learning_rate": 1.728651262895218e-06, "loss": 0.6245, "step": 7200 }, { "epoch": 0.74, "grad_norm": 1.5991668213703045, "learning_rate": 1.7274008550175363e-06, "loss": 0.5824, "step": 7201 }, { "epoch": 0.74, "grad_norm": 1.4068745295676897, "learning_rate": 1.7261508051017317e-06, "loss": 0.652, "step": 7202 }, { "epoch": 0.74, "grad_norm": 1.6322341015811956, "learning_rate": 1.72490111328454e-06, "loss": 0.7238, "step": 7203 }, { "epoch": 0.74, "grad_norm": 1.8584338365043307, "learning_rate": 1.723651779702652e-06, "loss": 0.6559, "step": 7204 }, { "epoch": 0.74, "grad_norm": 1.5989545547914747, "learning_rate": 1.722402804492721e-06, "loss": 0.6528, "step": 7205 }, { "epoch": 0.74, "grad_norm": 1.3478084394325343, "learning_rate": 1.7211541877913645e-06, "loss": 0.5626, "step": 7206 }, { "epoch": 0.74, "grad_norm": 1.4949063470947808, "learning_rate": 1.719905929735155e-06, "loss": 0.6717, "step": 7207 }, { "epoch": 0.74, "grad_norm": 1.3059382526311978, "learning_rate": 1.7186580304606305e-06, "loss": 0.5592, "step": 7208 }, { "epoch": 0.74, "grad_norm": 1.3950543642893176, "learning_rate": 1.7174104901042916e-06, "loss": 0.7121, "step": 7209 }, { "epoch": 0.74, "grad_norm": 1.44414791282777, "learning_rate": 1.7161633088025892e-06, "loss": 0.6757, "step": 7210 }, { "epoch": 0.74, "grad_norm": 1.6606435626499954, "learning_rate": 1.7149164866919454e-06, "loss": 0.6752, "step": 7211 }, { "epoch": 0.74, "grad_norm": 1.4248489719833428, "learning_rate": 1.7136700239087412e-06, "loss": 0.5797, "step": 7212 }, { "epoch": 0.74, "grad_norm": 1.6309305399750555, "learning_rate": 1.7124239205893129e-06, "loss": 0.6153, "step": 7213 }, { "epoch": 0.74, "grad_norm": 1.3955772822008459, "learning_rate": 1.7111781768699647e-06, "loss": 0.6021, "step": 7214 }, { "epoch": 0.74, "grad_norm": 1.3013636650405487, "learning_rate": 1.7099327928869569e-06, "loss": 0.565, "step": 7215 }, { "epoch": 0.74, "grad_norm": 1.464272843364794, "learning_rate": 1.7086877687765091e-06, "loss": 0.618, "step": 7216 }, { "epoch": 0.74, "grad_norm": 1.56390983832316, "learning_rate": 1.7074431046748075e-06, "loss": 0.659, "step": 7217 }, { "epoch": 0.74, "grad_norm": 1.8431752293607566, "learning_rate": 1.7061988007179919e-06, "loss": 0.6356, "step": 7218 }, { "epoch": 0.74, "grad_norm": 1.333846107620469, "learning_rate": 1.7049548570421698e-06, "loss": 0.5766, "step": 7219 }, { "epoch": 0.74, "grad_norm": 1.474896715602011, "learning_rate": 1.7037112737834038e-06, "loss": 0.7377, "step": 7220 }, { "epoch": 0.74, "grad_norm": 1.6389032730901036, "learning_rate": 1.702468051077717e-06, "loss": 0.6337, "step": 7221 }, { "epoch": 0.74, "grad_norm": 1.4256351197916555, "learning_rate": 1.701225189061097e-06, "loss": 0.6127, "step": 7222 }, { "epoch": 0.74, "grad_norm": 1.5974061344038037, "learning_rate": 1.699982687869493e-06, "loss": 0.6004, "step": 7223 }, { "epoch": 0.74, "grad_norm": 1.396201802796438, "learning_rate": 1.6987405476388057e-06, "loss": 0.56, "step": 7224 }, { "epoch": 0.74, "grad_norm": 1.4269633229426797, "learning_rate": 1.697498768504905e-06, "loss": 0.5547, "step": 7225 }, { "epoch": 0.74, "grad_norm": 1.5894125477490906, "learning_rate": 1.69625735060362e-06, "loss": 0.6155, "step": 7226 }, { "epoch": 0.74, "grad_norm": 1.475383845204122, "learning_rate": 1.695016294070736e-06, "loss": 0.572, "step": 7227 }, { "epoch": 0.74, "grad_norm": 1.439993848826222, "learning_rate": 1.693775599042005e-06, "loss": 0.5802, "step": 7228 }, { "epoch": 0.74, "grad_norm": 1.421582588223992, "learning_rate": 1.6925352656531346e-06, "loss": 0.6824, "step": 7229 }, { "epoch": 0.74, "grad_norm": 1.507246626758285, "learning_rate": 1.691295294039792e-06, "loss": 0.5942, "step": 7230 }, { "epoch": 0.74, "grad_norm": 1.5677078287434838, "learning_rate": 1.6900556843376116e-06, "loss": 0.6374, "step": 7231 }, { "epoch": 0.74, "grad_norm": 1.5815116493983576, "learning_rate": 1.688816436682179e-06, "loss": 0.5617, "step": 7232 }, { "epoch": 0.74, "grad_norm": 1.8266815567240513, "learning_rate": 1.6875775512090498e-06, "loss": 0.6182, "step": 7233 }, { "epoch": 0.74, "grad_norm": 1.6929653323962872, "learning_rate": 1.6863390280537328e-06, "loss": 0.581, "step": 7234 }, { "epoch": 0.74, "grad_norm": 1.4491641227276189, "learning_rate": 1.6851008673516977e-06, "loss": 0.5685, "step": 7235 }, { "epoch": 0.74, "grad_norm": 1.4895705022143313, "learning_rate": 1.6838630692383784e-06, "loss": 0.6195, "step": 7236 }, { "epoch": 0.74, "grad_norm": 1.4979138456631393, "learning_rate": 1.6826256338491703e-06, "loss": 0.5747, "step": 7237 }, { "epoch": 0.74, "grad_norm": 1.4185537494022276, "learning_rate": 1.6813885613194197e-06, "loss": 0.7013, "step": 7238 }, { "epoch": 0.74, "grad_norm": 1.3572294090121417, "learning_rate": 1.6801518517844423e-06, "loss": 0.5228, "step": 7239 }, { "epoch": 0.74, "grad_norm": 1.3852732640564522, "learning_rate": 1.678915505379513e-06, "loss": 0.5464, "step": 7240 }, { "epoch": 0.74, "grad_norm": 1.4166828101335256, "learning_rate": 1.6776795222398622e-06, "loss": 0.6193, "step": 7241 }, { "epoch": 0.74, "grad_norm": 1.4111053411655463, "learning_rate": 1.6764439025006869e-06, "loss": 0.6695, "step": 7242 }, { "epoch": 0.74, "grad_norm": 1.426751620081846, "learning_rate": 1.675208646297139e-06, "loss": 0.6526, "step": 7243 }, { "epoch": 0.74, "grad_norm": 1.740152804551947, "learning_rate": 1.673973753764332e-06, "loss": 0.6776, "step": 7244 }, { "epoch": 0.74, "grad_norm": 1.4360738803769706, "learning_rate": 1.672739225037342e-06, "loss": 0.5915, "step": 7245 }, { "epoch": 0.74, "grad_norm": 1.5472603477560918, "learning_rate": 1.6715050602512024e-06, "loss": 0.6439, "step": 7246 }, { "epoch": 0.74, "grad_norm": 1.5111297046703878, "learning_rate": 1.6702712595409093e-06, "loss": 0.7162, "step": 7247 }, { "epoch": 0.74, "grad_norm": 1.6105684036932162, "learning_rate": 1.669037823041418e-06, "loss": 0.7601, "step": 7248 }, { "epoch": 0.74, "grad_norm": 1.4158525965769142, "learning_rate": 1.6678047508876406e-06, "loss": 0.5303, "step": 7249 }, { "epoch": 0.74, "grad_norm": 1.3809164216561292, "learning_rate": 1.6665720432144539e-06, "loss": 0.6305, "step": 7250 }, { "epoch": 0.74, "grad_norm": 1.633953108188704, "learning_rate": 1.6653397001566962e-06, "loss": 0.5736, "step": 7251 }, { "epoch": 0.74, "grad_norm": 1.516376337749537, "learning_rate": 1.6641077218491607e-06, "loss": 0.6256, "step": 7252 }, { "epoch": 0.74, "grad_norm": 1.3659079688938949, "learning_rate": 1.6628761084266015e-06, "loss": 0.6074, "step": 7253 }, { "epoch": 0.74, "grad_norm": 1.4054267303993027, "learning_rate": 1.6616448600237372e-06, "loss": 0.65, "step": 7254 }, { "epoch": 0.74, "grad_norm": 1.4802667808677554, "learning_rate": 1.6604139767752415e-06, "loss": 0.5984, "step": 7255 }, { "epoch": 0.74, "grad_norm": 1.4764679122940243, "learning_rate": 1.6591834588157523e-06, "loss": 0.6387, "step": 7256 }, { "epoch": 0.74, "grad_norm": 1.4671602417249505, "learning_rate": 1.6579533062798646e-06, "loss": 0.6236, "step": 7257 }, { "epoch": 0.74, "grad_norm": 1.5829704015697583, "learning_rate": 1.656723519302133e-06, "loss": 0.6441, "step": 7258 }, { "epoch": 0.74, "grad_norm": 1.499646319158702, "learning_rate": 1.6554940980170758e-06, "loss": 0.6971, "step": 7259 }, { "epoch": 0.74, "grad_norm": 1.5994940081713664, "learning_rate": 1.654265042559166e-06, "loss": 0.5709, "step": 7260 }, { "epoch": 0.74, "grad_norm": 1.4807061999501547, "learning_rate": 1.6530363530628434e-06, "loss": 0.6041, "step": 7261 }, { "epoch": 0.74, "grad_norm": 1.5099022415364673, "learning_rate": 1.6518080296625022e-06, "loss": 0.6204, "step": 7262 }, { "epoch": 0.74, "grad_norm": 1.445841616635078, "learning_rate": 1.650580072492496e-06, "loss": 0.6558, "step": 7263 }, { "epoch": 0.74, "grad_norm": 1.494473682033679, "learning_rate": 1.6493524816871425e-06, "loss": 0.6222, "step": 7264 }, { "epoch": 0.74, "grad_norm": 1.6789583330492088, "learning_rate": 1.6481252573807195e-06, "loss": 0.7056, "step": 7265 }, { "epoch": 0.74, "grad_norm": 1.4672944886294708, "learning_rate": 1.6468983997074606e-06, "loss": 0.5781, "step": 7266 }, { "epoch": 0.74, "grad_norm": 1.464005298770488, "learning_rate": 1.64567190880156e-06, "loss": 0.5826, "step": 7267 }, { "epoch": 0.74, "grad_norm": 1.530445244934773, "learning_rate": 1.644445784797176e-06, "loss": 0.6061, "step": 7268 }, { "epoch": 0.74, "grad_norm": 1.5362271422551508, "learning_rate": 1.643220027828421e-06, "loss": 0.619, "step": 7269 }, { "epoch": 0.74, "grad_norm": 1.9072181867892082, "learning_rate": 1.6419946380293738e-06, "loss": 0.6763, "step": 7270 }, { "epoch": 0.74, "grad_norm": 1.6152617702989347, "learning_rate": 1.6407696155340668e-06, "loss": 0.6474, "step": 7271 }, { "epoch": 0.74, "grad_norm": 1.6346040283498473, "learning_rate": 1.6395449604764941e-06, "loss": 0.7757, "step": 7272 }, { "epoch": 0.74, "grad_norm": 1.6245925823243035, "learning_rate": 1.6383206729906132e-06, "loss": 0.6445, "step": 7273 }, { "epoch": 0.74, "grad_norm": 1.368211896483931, "learning_rate": 1.637096753210335e-06, "loss": 0.6034, "step": 7274 }, { "epoch": 0.74, "grad_norm": 1.4553031502027869, "learning_rate": 1.6358732012695377e-06, "loss": 0.5993, "step": 7275 }, { "epoch": 0.74, "grad_norm": 1.3170160960541761, "learning_rate": 1.634650017302053e-06, "loss": 0.5872, "step": 7276 }, { "epoch": 0.74, "grad_norm": 1.4811384338246485, "learning_rate": 1.633427201441673e-06, "loss": 0.606, "step": 7277 }, { "epoch": 0.74, "grad_norm": 1.56416938087889, "learning_rate": 1.632204753822153e-06, "loss": 0.6537, "step": 7278 }, { "epoch": 0.74, "grad_norm": 1.5665991853352463, "learning_rate": 1.6309826745772078e-06, "loss": 0.6809, "step": 7279 }, { "epoch": 0.74, "grad_norm": 1.588986271062177, "learning_rate": 1.6297609638405093e-06, "loss": 0.6241, "step": 7280 }, { "epoch": 0.74, "grad_norm": 1.4391741757863776, "learning_rate": 1.6285396217456871e-06, "loss": 0.5627, "step": 7281 }, { "epoch": 0.74, "grad_norm": 1.2614618978016985, "learning_rate": 1.6273186484263375e-06, "loss": 0.5336, "step": 7282 }, { "epoch": 0.74, "grad_norm": 1.6928995082502905, "learning_rate": 1.6260980440160096e-06, "loss": 0.6604, "step": 7283 }, { "epoch": 0.74, "grad_norm": 1.5220239461378915, "learning_rate": 1.6248778086482174e-06, "loss": 0.7387, "step": 7284 }, { "epoch": 0.74, "grad_norm": 1.8555708296458167, "learning_rate": 1.6236579424564308e-06, "loss": 0.7341, "step": 7285 }, { "epoch": 0.74, "grad_norm": 1.4221752233103238, "learning_rate": 1.6224384455740788e-06, "loss": 0.6056, "step": 7286 }, { "epoch": 0.74, "grad_norm": 1.6006735856393013, "learning_rate": 1.6212193181345554e-06, "loss": 0.5915, "step": 7287 }, { "epoch": 0.74, "grad_norm": 1.7509711041426077, "learning_rate": 1.6200005602712077e-06, "loss": 0.6789, "step": 7288 }, { "epoch": 0.74, "grad_norm": 1.5891494734300948, "learning_rate": 1.6187821721173475e-06, "loss": 0.6965, "step": 7289 }, { "epoch": 0.74, "grad_norm": 1.3843541838469073, "learning_rate": 1.617564153806243e-06, "loss": 0.654, "step": 7290 }, { "epoch": 0.74, "grad_norm": 1.5971780871998338, "learning_rate": 1.6163465054711213e-06, "loss": 0.6877, "step": 7291 }, { "epoch": 0.74, "grad_norm": 1.7192128702110983, "learning_rate": 1.6151292272451718e-06, "loss": 0.6602, "step": 7292 }, { "epoch": 0.74, "grad_norm": 1.5292278468830256, "learning_rate": 1.6139123192615447e-06, "loss": 0.6469, "step": 7293 }, { "epoch": 0.74, "grad_norm": 1.5555116060057437, "learning_rate": 1.612695781653345e-06, "loss": 0.6599, "step": 7294 }, { "epoch": 0.74, "grad_norm": 1.347311063584775, "learning_rate": 1.6114796145536376e-06, "loss": 0.5784, "step": 7295 }, { "epoch": 0.75, "grad_norm": 1.6789845343109107, "learning_rate": 1.6102638180954527e-06, "loss": 0.6337, "step": 7296 }, { "epoch": 0.75, "grad_norm": 1.357662705092093, "learning_rate": 1.6090483924117723e-06, "loss": 0.7303, "step": 7297 }, { "epoch": 0.75, "grad_norm": 1.58957523245924, "learning_rate": 1.607833337635545e-06, "loss": 0.6593, "step": 7298 }, { "epoch": 0.75, "grad_norm": 1.452189373299087, "learning_rate": 1.6066186538996725e-06, "loss": 0.6282, "step": 7299 }, { "epoch": 0.75, "grad_norm": 1.5186735657823573, "learning_rate": 1.605404341337019e-06, "loss": 0.7089, "step": 7300 }, { "epoch": 0.75, "grad_norm": 1.7504844595441835, "learning_rate": 1.6041904000804105e-06, "loss": 0.6262, "step": 7301 }, { "epoch": 0.75, "grad_norm": 1.5180995340715115, "learning_rate": 1.6029768302626253e-06, "loss": 0.6231, "step": 7302 }, { "epoch": 0.75, "grad_norm": 1.4145459127678408, "learning_rate": 1.6017636320164105e-06, "loss": 0.5961, "step": 7303 }, { "epoch": 0.75, "grad_norm": 1.491071556925564, "learning_rate": 1.6005508054744646e-06, "loss": 0.649, "step": 7304 }, { "epoch": 0.75, "grad_norm": 1.698881901891784, "learning_rate": 1.5993383507694476e-06, "loss": 0.6972, "step": 7305 }, { "epoch": 0.75, "grad_norm": 1.5525814075866808, "learning_rate": 1.598126268033981e-06, "loss": 0.7673, "step": 7306 }, { "epoch": 0.75, "grad_norm": 1.442916070435873, "learning_rate": 1.5969145574006456e-06, "loss": 0.6199, "step": 7307 }, { "epoch": 0.75, "grad_norm": 1.4170141051437395, "learning_rate": 1.5957032190019788e-06, "loss": 0.6793, "step": 7308 }, { "epoch": 0.75, "grad_norm": 1.653335426829468, "learning_rate": 1.5944922529704776e-06, "loss": 0.6173, "step": 7309 }, { "epoch": 0.75, "grad_norm": 1.4741558313950387, "learning_rate": 1.5932816594386014e-06, "loss": 0.623, "step": 7310 }, { "epoch": 0.75, "grad_norm": 1.5609592333865656, "learning_rate": 1.5920714385387643e-06, "loss": 0.5563, "step": 7311 }, { "epoch": 0.75, "grad_norm": 1.354163248918382, "learning_rate": 1.5908615904033453e-06, "loss": 0.6134, "step": 7312 }, { "epoch": 0.75, "grad_norm": 1.5555927326587573, "learning_rate": 1.589652115164677e-06, "loss": 0.6668, "step": 7313 }, { "epoch": 0.75, "grad_norm": 1.4938678198540618, "learning_rate": 1.5884430129550539e-06, "loss": 0.6793, "step": 7314 }, { "epoch": 0.75, "grad_norm": 1.287229298241916, "learning_rate": 1.5872342839067305e-06, "loss": 0.571, "step": 7315 }, { "epoch": 0.75, "grad_norm": 1.4375433877401858, "learning_rate": 1.5860259281519174e-06, "loss": 0.5505, "step": 7316 }, { "epoch": 0.75, "grad_norm": 1.3822697679049583, "learning_rate": 1.5848179458227892e-06, "loss": 0.5941, "step": 7317 }, { "epoch": 0.75, "grad_norm": 1.630131448389114, "learning_rate": 1.5836103370514756e-06, "loss": 0.6215, "step": 7318 }, { "epoch": 0.75, "grad_norm": 1.4780882246526221, "learning_rate": 1.5824031019700647e-06, "loss": 0.6402, "step": 7319 }, { "epoch": 0.75, "grad_norm": 2.4152072161877247, "learning_rate": 1.581196240710608e-06, "loss": 0.6726, "step": 7320 }, { "epoch": 0.75, "grad_norm": 1.7983539042154741, "learning_rate": 1.5799897534051146e-06, "loss": 0.6705, "step": 7321 }, { "epoch": 0.75, "grad_norm": 1.7049587519723524, "learning_rate": 1.5787836401855506e-06, "loss": 0.7003, "step": 7322 }, { "epoch": 0.75, "grad_norm": 1.6976370414159132, "learning_rate": 1.577577901183841e-06, "loss": 0.7727, "step": 7323 }, { "epoch": 0.75, "grad_norm": 1.6575755879748084, "learning_rate": 1.5763725365318744e-06, "loss": 0.6331, "step": 7324 }, { "epoch": 0.75, "grad_norm": 1.460312313774725, "learning_rate": 1.5751675463614925e-06, "loss": 0.5461, "step": 7325 }, { "epoch": 0.75, "grad_norm": 1.3398382153664157, "learning_rate": 1.5739629308045018e-06, "loss": 0.5959, "step": 7326 }, { "epoch": 0.75, "grad_norm": 1.5533804024572078, "learning_rate": 1.572758689992664e-06, "loss": 0.6603, "step": 7327 }, { "epoch": 0.75, "grad_norm": 1.4535009723683339, "learning_rate": 1.5715548240576983e-06, "loss": 0.6263, "step": 7328 }, { "epoch": 0.75, "grad_norm": 1.4876300984670603, "learning_rate": 1.5703513331312893e-06, "loss": 0.5338, "step": 7329 }, { "epoch": 0.75, "grad_norm": 1.5020305967813807, "learning_rate": 1.5691482173450734e-06, "loss": 0.7689, "step": 7330 }, { "epoch": 0.75, "grad_norm": 1.442862969892466, "learning_rate": 1.5679454768306518e-06, "loss": 0.6292, "step": 7331 }, { "epoch": 0.75, "grad_norm": 1.532517181940735, "learning_rate": 1.5667431117195814e-06, "loss": 0.709, "step": 7332 }, { "epoch": 0.75, "grad_norm": 1.5299096150053422, "learning_rate": 1.5655411221433764e-06, "loss": 0.641, "step": 7333 }, { "epoch": 0.75, "grad_norm": 1.4266857816535616, "learning_rate": 1.5643395082335138e-06, "loss": 0.6729, "step": 7334 }, { "epoch": 0.75, "grad_norm": 1.8960736985668776, "learning_rate": 1.5631382701214309e-06, "loss": 0.6631, "step": 7335 }, { "epoch": 0.75, "grad_norm": 1.587185549820486, "learning_rate": 1.5619374079385175e-06, "loss": 0.7261, "step": 7336 }, { "epoch": 0.75, "grad_norm": 1.4414925034028587, "learning_rate": 1.5607369218161255e-06, "loss": 0.6228, "step": 7337 }, { "epoch": 0.75, "grad_norm": 1.5363859471023338, "learning_rate": 1.559536811885568e-06, "loss": 0.5438, "step": 7338 }, { "epoch": 0.75, "grad_norm": 1.3640414075613911, "learning_rate": 1.5583370782781127e-06, "loss": 0.6023, "step": 7339 }, { "epoch": 0.75, "grad_norm": 1.687457695051226, "learning_rate": 1.557137721124991e-06, "loss": 0.6266, "step": 7340 }, { "epoch": 0.75, "grad_norm": 1.4381043727987766, "learning_rate": 1.555938740557389e-06, "loss": 0.5932, "step": 7341 }, { "epoch": 0.75, "grad_norm": 1.6149421562734192, "learning_rate": 1.5547401367064507e-06, "loss": 0.6134, "step": 7342 }, { "epoch": 0.75, "grad_norm": 1.5875392468560228, "learning_rate": 1.5535419097032856e-06, "loss": 0.7358, "step": 7343 }, { "epoch": 0.75, "grad_norm": 1.5341203295942274, "learning_rate": 1.5523440596789534e-06, "loss": 0.7465, "step": 7344 }, { "epoch": 0.75, "grad_norm": 1.3731352880416638, "learning_rate": 1.5511465867644803e-06, "loss": 0.6585, "step": 7345 }, { "epoch": 0.75, "grad_norm": 1.3371930295590062, "learning_rate": 1.5499494910908464e-06, "loss": 0.6057, "step": 7346 }, { "epoch": 0.75, "grad_norm": 1.8030073524314307, "learning_rate": 1.5487527727889901e-06, "loss": 0.672, "step": 7347 }, { "epoch": 0.75, "grad_norm": 1.5534934031864673, "learning_rate": 1.5475564319898112e-06, "loss": 0.6709, "step": 7348 }, { "epoch": 0.75, "grad_norm": 1.4933303438615824, "learning_rate": 1.5463604688241695e-06, "loss": 0.5484, "step": 7349 }, { "epoch": 0.75, "grad_norm": 1.5578095436418609, "learning_rate": 1.54516488342288e-06, "loss": 0.7395, "step": 7350 }, { "epoch": 0.75, "grad_norm": 1.4717830807284824, "learning_rate": 1.5439696759167156e-06, "loss": 0.5825, "step": 7351 }, { "epoch": 0.75, "grad_norm": 1.507289262734542, "learning_rate": 1.5427748464364129e-06, "loss": 0.6624, "step": 7352 }, { "epoch": 0.75, "grad_norm": 1.494713098276861, "learning_rate": 1.5415803951126619e-06, "loss": 0.6631, "step": 7353 }, { "epoch": 0.75, "grad_norm": 1.4599680005513425, "learning_rate": 1.540386322076115e-06, "loss": 0.6371, "step": 7354 }, { "epoch": 0.75, "grad_norm": 1.471179524836089, "learning_rate": 1.539192627457382e-06, "loss": 0.6859, "step": 7355 }, { "epoch": 0.75, "grad_norm": 1.337019960276604, "learning_rate": 1.5379993113870285e-06, "loss": 0.6576, "step": 7356 }, { "epoch": 0.75, "grad_norm": 1.5525770831997712, "learning_rate": 1.5368063739955847e-06, "loss": 0.5896, "step": 7357 }, { "epoch": 0.75, "grad_norm": 1.7302454440915955, "learning_rate": 1.5356138154135319e-06, "loss": 0.705, "step": 7358 }, { "epoch": 0.75, "grad_norm": 1.4777925821611677, "learning_rate": 1.5344216357713183e-06, "loss": 0.6901, "step": 7359 }, { "epoch": 0.75, "grad_norm": 1.667117854441295, "learning_rate": 1.533229835199344e-06, "loss": 0.5845, "step": 7360 }, { "epoch": 0.75, "grad_norm": 1.5074016285969356, "learning_rate": 1.5320384138279688e-06, "loss": 0.6805, "step": 7361 }, { "epoch": 0.75, "grad_norm": 1.3503505539899199, "learning_rate": 1.5308473717875139e-06, "loss": 0.6564, "step": 7362 }, { "epoch": 0.75, "grad_norm": 1.5621905376772671, "learning_rate": 1.5296567092082582e-06, "loss": 0.6096, "step": 7363 }, { "epoch": 0.75, "grad_norm": 1.649470380105998, "learning_rate": 1.5284664262204368e-06, "loss": 0.7001, "step": 7364 }, { "epoch": 0.75, "grad_norm": 1.414974356286896, "learning_rate": 1.5272765229542435e-06, "loss": 0.6546, "step": 7365 }, { "epoch": 0.75, "grad_norm": 1.386668664036886, "learning_rate": 1.5260869995398347e-06, "loss": 0.6126, "step": 7366 }, { "epoch": 0.75, "grad_norm": 1.6023197202993307, "learning_rate": 1.5248978561073191e-06, "loss": 0.6607, "step": 7367 }, { "epoch": 0.75, "grad_norm": 1.3360708487147455, "learning_rate": 1.5237090927867704e-06, "loss": 0.5691, "step": 7368 }, { "epoch": 0.75, "grad_norm": 1.5459873229070058, "learning_rate": 1.5225207097082151e-06, "loss": 0.6231, "step": 7369 }, { "epoch": 0.75, "grad_norm": 1.4442315192039592, "learning_rate": 1.5213327070016393e-06, "loss": 0.5845, "step": 7370 }, { "epoch": 0.75, "grad_norm": 1.3471359141407477, "learning_rate": 1.5201450847969912e-06, "loss": 0.682, "step": 7371 }, { "epoch": 0.75, "grad_norm": 1.5400265309231917, "learning_rate": 1.5189578432241724e-06, "loss": 0.6978, "step": 7372 }, { "epoch": 0.75, "grad_norm": 1.5034399966646892, "learning_rate": 1.5177709824130483e-06, "loss": 0.6478, "step": 7373 }, { "epoch": 0.75, "grad_norm": 1.5328641473489208, "learning_rate": 1.5165845024934366e-06, "loss": 0.718, "step": 7374 }, { "epoch": 0.75, "grad_norm": 1.6228535344358765, "learning_rate": 1.5153984035951164e-06, "loss": 0.5687, "step": 7375 }, { "epoch": 0.75, "grad_norm": 1.6113631953969196, "learning_rate": 1.5142126858478256e-06, "loss": 0.6431, "step": 7376 }, { "epoch": 0.75, "grad_norm": 1.587897211538607, "learning_rate": 1.513027349381262e-06, "loss": 0.7411, "step": 7377 }, { "epoch": 0.75, "grad_norm": 1.6040336267468136, "learning_rate": 1.511842394325077e-06, "loss": 0.6977, "step": 7378 }, { "epoch": 0.75, "grad_norm": 1.4996951414757154, "learning_rate": 1.5106578208088822e-06, "loss": 0.6785, "step": 7379 }, { "epoch": 0.75, "grad_norm": 1.7080997271936715, "learning_rate": 1.5094736289622508e-06, "loss": 0.7038, "step": 7380 }, { "epoch": 0.75, "grad_norm": 1.4755561642832777, "learning_rate": 1.5082898189147083e-06, "loss": 0.6477, "step": 7381 }, { "epoch": 0.75, "grad_norm": 1.5894666008516325, "learning_rate": 1.5071063907957451e-06, "loss": 0.6225, "step": 7382 }, { "epoch": 0.75, "grad_norm": 1.363644778918092, "learning_rate": 1.5059233447348043e-06, "loss": 0.5707, "step": 7383 }, { "epoch": 0.75, "grad_norm": 1.4384095771285281, "learning_rate": 1.5047406808612874e-06, "loss": 0.6475, "step": 7384 }, { "epoch": 0.75, "grad_norm": 1.556959190848458, "learning_rate": 1.5035583993045605e-06, "loss": 0.6997, "step": 7385 }, { "epoch": 0.75, "grad_norm": 1.4525944861123818, "learning_rate": 1.5023765001939389e-06, "loss": 0.6708, "step": 7386 }, { "epoch": 0.75, "grad_norm": 1.5892480706241003, "learning_rate": 1.5011949836587047e-06, "loss": 0.6701, "step": 7387 }, { "epoch": 0.75, "grad_norm": 1.3413110126406855, "learning_rate": 1.5000138498280914e-06, "loss": 0.5588, "step": 7388 }, { "epoch": 0.75, "grad_norm": 1.8227077820418505, "learning_rate": 1.498833098831292e-06, "loss": 0.734, "step": 7389 }, { "epoch": 0.75, "grad_norm": 1.644893169521546, "learning_rate": 1.497652730797461e-06, "loss": 0.6859, "step": 7390 }, { "epoch": 0.75, "grad_norm": 1.4608220746300806, "learning_rate": 1.4964727458557099e-06, "loss": 0.6447, "step": 7391 }, { "epoch": 0.75, "grad_norm": 1.671358593296037, "learning_rate": 1.4952931441351054e-06, "loss": 0.6775, "step": 7392 }, { "epoch": 0.75, "grad_norm": 1.4991879735666924, "learning_rate": 1.494113925764673e-06, "loss": 0.5811, "step": 7393 }, { "epoch": 0.76, "grad_norm": 1.3639719014067213, "learning_rate": 1.4929350908734002e-06, "loss": 0.5821, "step": 7394 }, { "epoch": 0.76, "grad_norm": 1.747615729736322, "learning_rate": 1.4917566395902272e-06, "loss": 0.521, "step": 7395 }, { "epoch": 0.76, "grad_norm": 1.4042647165361768, "learning_rate": 1.4905785720440575e-06, "loss": 0.6201, "step": 7396 }, { "epoch": 0.76, "grad_norm": 1.867555762367394, "learning_rate": 1.4894008883637485e-06, "loss": 0.6262, "step": 7397 }, { "epoch": 0.76, "grad_norm": 1.5560143887715254, "learning_rate": 1.4882235886781155e-06, "loss": 0.746, "step": 7398 }, { "epoch": 0.76, "grad_norm": 1.5876081287896608, "learning_rate": 1.487046673115936e-06, "loss": 0.7386, "step": 7399 }, { "epoch": 0.76, "grad_norm": 1.7521341136144453, "learning_rate": 1.4858701418059402e-06, "loss": 0.6953, "step": 7400 }, { "epoch": 0.76, "grad_norm": 1.4685114443357907, "learning_rate": 1.4846939948768218e-06, "loss": 0.613, "step": 7401 }, { "epoch": 0.76, "grad_norm": 1.4235136455626707, "learning_rate": 1.4835182324572283e-06, "loss": 0.5805, "step": 7402 }, { "epoch": 0.76, "grad_norm": 1.6326394231573436, "learning_rate": 1.4823428546757646e-06, "loss": 0.5777, "step": 7403 }, { "epoch": 0.76, "grad_norm": 1.638849849574501, "learning_rate": 1.4811678616609964e-06, "loss": 0.7391, "step": 7404 }, { "epoch": 0.76, "grad_norm": 1.4838627374554871, "learning_rate": 1.4799932535414484e-06, "loss": 0.6124, "step": 7405 }, { "epoch": 0.76, "grad_norm": 1.6435152676653122, "learning_rate": 1.4788190304455996e-06, "loss": 0.6866, "step": 7406 }, { "epoch": 0.76, "grad_norm": 1.763241724513356, "learning_rate": 1.4776451925018864e-06, "loss": 0.7134, "step": 7407 }, { "epoch": 0.76, "grad_norm": 1.5280207495574765, "learning_rate": 1.476471739838708e-06, "loss": 0.6428, "step": 7408 }, { "epoch": 0.76, "grad_norm": 1.5257544208445342, "learning_rate": 1.4752986725844154e-06, "loss": 0.6051, "step": 7409 }, { "epoch": 0.76, "grad_norm": 1.5491078514892818, "learning_rate": 1.4741259908673239e-06, "loss": 0.5835, "step": 7410 }, { "epoch": 0.76, "grad_norm": 1.496492038420946, "learning_rate": 1.4729536948157009e-06, "loss": 0.5533, "step": 7411 }, { "epoch": 0.76, "grad_norm": 1.3592461414876373, "learning_rate": 1.4717817845577736e-06, "loss": 0.662, "step": 7412 }, { "epoch": 0.76, "grad_norm": 1.8190071330393172, "learning_rate": 1.4706102602217292e-06, "loss": 0.6789, "step": 7413 }, { "epoch": 0.76, "grad_norm": 1.376017590191034, "learning_rate": 1.4694391219357084e-06, "loss": 0.664, "step": 7414 }, { "epoch": 0.76, "grad_norm": 1.4745326038888833, "learning_rate": 1.468268369827815e-06, "loss": 0.6813, "step": 7415 }, { "epoch": 0.76, "grad_norm": 1.4255065328052139, "learning_rate": 1.4670980040261063e-06, "loss": 0.7429, "step": 7416 }, { "epoch": 0.76, "grad_norm": 1.6929193730357854, "learning_rate": 1.4659280246585965e-06, "loss": 0.7207, "step": 7417 }, { "epoch": 0.76, "grad_norm": 1.5025258755748054, "learning_rate": 1.4647584318532627e-06, "loss": 0.6204, "step": 7418 }, { "epoch": 0.76, "grad_norm": 1.3092451495848014, "learning_rate": 1.4635892257380364e-06, "loss": 0.61, "step": 7419 }, { "epoch": 0.76, "grad_norm": 1.3979836124576925, "learning_rate": 1.462420406440807e-06, "loss": 0.6021, "step": 7420 }, { "epoch": 0.76, "grad_norm": 1.4044631858995154, "learning_rate": 1.46125197408942e-06, "loss": 0.6136, "step": 7421 }, { "epoch": 0.76, "grad_norm": 1.6487208761074201, "learning_rate": 1.4600839288116824e-06, "loss": 0.594, "step": 7422 }, { "epoch": 0.76, "grad_norm": 1.5346347267471527, "learning_rate": 1.458916270735355e-06, "loss": 0.6609, "step": 7423 }, { "epoch": 0.76, "grad_norm": 1.538751006443979, "learning_rate": 1.45774899998816e-06, "loss": 0.7273, "step": 7424 }, { "epoch": 0.76, "grad_norm": 1.7185465322986606, "learning_rate": 1.4565821166977744e-06, "loss": 0.6106, "step": 7425 }, { "epoch": 0.76, "grad_norm": 3.6361420432147185, "learning_rate": 1.4554156209918324e-06, "loss": 0.7441, "step": 7426 }, { "epoch": 0.76, "grad_norm": 1.3751052382556475, "learning_rate": 1.4542495129979296e-06, "loss": 0.6274, "step": 7427 }, { "epoch": 0.76, "grad_norm": 1.5125143773763565, "learning_rate": 1.453083792843613e-06, "loss": 0.6216, "step": 7428 }, { "epoch": 0.76, "grad_norm": 1.8508867801980242, "learning_rate": 1.4519184606563951e-06, "loss": 0.6946, "step": 7429 }, { "epoch": 0.76, "grad_norm": 1.6483740792106294, "learning_rate": 1.45075351656374e-06, "loss": 0.6641, "step": 7430 }, { "epoch": 0.76, "grad_norm": 1.5653056261226501, "learning_rate": 1.449588960693069e-06, "loss": 0.6404, "step": 7431 }, { "epoch": 0.76, "grad_norm": 1.4305533695864638, "learning_rate": 1.4484247931717643e-06, "loss": 0.6042, "step": 7432 }, { "epoch": 0.76, "grad_norm": 1.4133946937527133, "learning_rate": 1.447261014127167e-06, "loss": 0.6352, "step": 7433 }, { "epoch": 0.76, "grad_norm": 1.3043106695967481, "learning_rate": 1.4460976236865704e-06, "loss": 0.6994, "step": 7434 }, { "epoch": 0.76, "grad_norm": 1.4419150743791196, "learning_rate": 1.444934621977227e-06, "loss": 0.604, "step": 7435 }, { "epoch": 0.76, "grad_norm": 1.8736608867256348, "learning_rate": 1.4437720091263503e-06, "loss": 0.5938, "step": 7436 }, { "epoch": 0.76, "grad_norm": 1.5458337820659358, "learning_rate": 1.442609785261106e-06, "loss": 0.6482, "step": 7437 }, { "epoch": 0.76, "grad_norm": 1.4142944622374, "learning_rate": 1.4414479505086227e-06, "loss": 0.6215, "step": 7438 }, { "epoch": 0.76, "grad_norm": 1.8198273671653662, "learning_rate": 1.4402865049959824e-06, "loss": 0.5781, "step": 7439 }, { "epoch": 0.76, "grad_norm": 1.496039795621027, "learning_rate": 1.4391254488502243e-06, "loss": 0.6832, "step": 7440 }, { "epoch": 0.76, "grad_norm": 1.6589563211966603, "learning_rate": 1.4379647821983488e-06, "loss": 0.6907, "step": 7441 }, { "epoch": 0.76, "grad_norm": 1.4140011789897446, "learning_rate": 1.436804505167309e-06, "loss": 0.6627, "step": 7442 }, { "epoch": 0.76, "grad_norm": 1.4829694421876343, "learning_rate": 1.435644617884021e-06, "loss": 0.5814, "step": 7443 }, { "epoch": 0.76, "grad_norm": 2.3399557405989153, "learning_rate": 1.434485120475353e-06, "loss": 0.4852, "step": 7444 }, { "epoch": 0.76, "grad_norm": 1.454612735682287, "learning_rate": 1.4333260130681314e-06, "loss": 0.6852, "step": 7445 }, { "epoch": 0.76, "grad_norm": 1.3395863688455905, "learning_rate": 1.4321672957891426e-06, "loss": 0.6753, "step": 7446 }, { "epoch": 0.76, "grad_norm": 1.5145444167294209, "learning_rate": 1.4310089687651302e-06, "loss": 0.623, "step": 7447 }, { "epoch": 0.76, "grad_norm": 1.4375360134196349, "learning_rate": 1.4298510321227921e-06, "loss": 0.6114, "step": 7448 }, { "epoch": 0.76, "grad_norm": 1.5506277657254195, "learning_rate": 1.428693485988784e-06, "loss": 0.5552, "step": 7449 }, { "epoch": 0.76, "grad_norm": 1.4596454202414195, "learning_rate": 1.4275363304897233e-06, "loss": 0.6195, "step": 7450 }, { "epoch": 0.76, "grad_norm": 1.6201245961317978, "learning_rate": 1.4263795657521779e-06, "loss": 0.6559, "step": 7451 }, { "epoch": 0.76, "grad_norm": 1.2940248202337261, "learning_rate": 1.42522319190268e-06, "loss": 0.5935, "step": 7452 }, { "epoch": 0.76, "grad_norm": 1.3236235185358753, "learning_rate": 1.4240672090677137e-06, "loss": 0.725, "step": 7453 }, { "epoch": 0.76, "grad_norm": 1.4768429555408147, "learning_rate": 1.4229116173737205e-06, "loss": 0.6626, "step": 7454 }, { "epoch": 0.76, "grad_norm": 1.5239164730037331, "learning_rate": 1.4217564169471037e-06, "loss": 0.5757, "step": 7455 }, { "epoch": 0.76, "grad_norm": 1.414245218424471, "learning_rate": 1.4206016079142188e-06, "loss": 0.6331, "step": 7456 }, { "epoch": 0.76, "grad_norm": 1.4877449700794843, "learning_rate": 1.4194471904013829e-06, "loss": 0.5944, "step": 7457 }, { "epoch": 0.76, "grad_norm": 1.527076620548822, "learning_rate": 1.4182931645348664e-06, "loss": 0.6334, "step": 7458 }, { "epoch": 0.76, "grad_norm": 1.5940303673613647, "learning_rate": 1.4171395304408963e-06, "loss": 0.7443, "step": 7459 }, { "epoch": 0.76, "grad_norm": 1.8076358402988744, "learning_rate": 1.415986288245662e-06, "loss": 0.733, "step": 7460 }, { "epoch": 0.76, "grad_norm": 1.474938498444208, "learning_rate": 1.4148334380753071e-06, "loss": 0.6497, "step": 7461 }, { "epoch": 0.76, "grad_norm": 1.329043674904074, "learning_rate": 1.4136809800559308e-06, "loss": 0.5789, "step": 7462 }, { "epoch": 0.76, "grad_norm": 1.775405105534157, "learning_rate": 1.4125289143135912e-06, "loss": 0.6721, "step": 7463 }, { "epoch": 0.76, "grad_norm": 1.4132001915006844, "learning_rate": 1.4113772409743009e-06, "loss": 0.6342, "step": 7464 }, { "epoch": 0.76, "grad_norm": 1.6281959393092416, "learning_rate": 1.4102259601640334e-06, "loss": 0.6915, "step": 7465 }, { "epoch": 0.76, "grad_norm": 1.4567082056445493, "learning_rate": 1.4090750720087193e-06, "loss": 0.665, "step": 7466 }, { "epoch": 0.76, "grad_norm": 1.42985761935289, "learning_rate": 1.4079245766342425e-06, "loss": 0.6299, "step": 7467 }, { "epoch": 0.76, "grad_norm": 1.6160840153395328, "learning_rate": 1.4067744741664451e-06, "loss": 0.6751, "step": 7468 }, { "epoch": 0.76, "grad_norm": 1.4963917039903145, "learning_rate": 1.4056247647311294e-06, "loss": 0.5866, "step": 7469 }, { "epoch": 0.76, "grad_norm": 1.4026808673906184, "learning_rate": 1.40447544845405e-06, "loss": 0.6404, "step": 7470 }, { "epoch": 0.76, "grad_norm": 1.4665064638499588, "learning_rate": 1.4033265254609235e-06, "loss": 0.5961, "step": 7471 }, { "epoch": 0.76, "grad_norm": 1.4352904011825345, "learning_rate": 1.402177995877419e-06, "loss": 0.6213, "step": 7472 }, { "epoch": 0.76, "grad_norm": 1.428792189247403, "learning_rate": 1.4010298598291637e-06, "loss": 0.5438, "step": 7473 }, { "epoch": 0.76, "grad_norm": 1.5261484532125367, "learning_rate": 1.399882117441743e-06, "loss": 0.6066, "step": 7474 }, { "epoch": 0.76, "grad_norm": 1.533006063707809, "learning_rate": 1.3987347688407016e-06, "loss": 0.7664, "step": 7475 }, { "epoch": 0.76, "grad_norm": 1.7006283327944038, "learning_rate": 1.3975878141515353e-06, "loss": 0.6137, "step": 7476 }, { "epoch": 0.76, "grad_norm": 1.6533861530799216, "learning_rate": 1.3964412534997007e-06, "loss": 0.5952, "step": 7477 }, { "epoch": 0.76, "grad_norm": 1.4005575913042192, "learning_rate": 1.3952950870106081e-06, "loss": 0.7028, "step": 7478 }, { "epoch": 0.76, "grad_norm": 1.6281020969665776, "learning_rate": 1.3941493148096291e-06, "loss": 0.6919, "step": 7479 }, { "epoch": 0.76, "grad_norm": 1.3765730145696522, "learning_rate": 1.393003937022091e-06, "loss": 0.5571, "step": 7480 }, { "epoch": 0.76, "grad_norm": 1.6659018662390253, "learning_rate": 1.3918589537732763e-06, "loss": 0.5741, "step": 7481 }, { "epoch": 0.76, "grad_norm": 1.4798590419017732, "learning_rate": 1.3907143651884225e-06, "loss": 0.6531, "step": 7482 }, { "epoch": 0.76, "grad_norm": 1.648269013436018, "learning_rate": 1.3895701713927301e-06, "loss": 0.7775, "step": 7483 }, { "epoch": 0.76, "grad_norm": 1.6805808240087554, "learning_rate": 1.388426372511349e-06, "loss": 0.7218, "step": 7484 }, { "epoch": 0.76, "grad_norm": 1.7064810184766563, "learning_rate": 1.387282968669394e-06, "loss": 0.616, "step": 7485 }, { "epoch": 0.76, "grad_norm": 1.6384359295048494, "learning_rate": 1.3861399599919295e-06, "loss": 0.7433, "step": 7486 }, { "epoch": 0.76, "grad_norm": 1.6311646761862608, "learning_rate": 1.3849973466039785e-06, "loss": 0.6444, "step": 7487 }, { "epoch": 0.76, "grad_norm": 1.456939923994128, "learning_rate": 1.3838551286305236e-06, "loss": 0.613, "step": 7488 }, { "epoch": 0.76, "grad_norm": 1.6208604596038574, "learning_rate": 1.3827133061965037e-06, "loss": 0.5723, "step": 7489 }, { "epoch": 0.76, "grad_norm": 1.5320857519251108, "learning_rate": 1.3815718794268112e-06, "loss": 0.5577, "step": 7490 }, { "epoch": 0.76, "grad_norm": 1.5556072754251562, "learning_rate": 1.3804308484462975e-06, "loss": 0.5999, "step": 7491 }, { "epoch": 0.77, "grad_norm": 1.5118317135290904, "learning_rate": 1.3792902133797692e-06, "loss": 0.6807, "step": 7492 }, { "epoch": 0.77, "grad_norm": 1.336434571627137, "learning_rate": 1.3781499743519911e-06, "loss": 0.5918, "step": 7493 }, { "epoch": 0.77, "grad_norm": 1.5119448900055172, "learning_rate": 1.3770101314876865e-06, "loss": 0.6412, "step": 7494 }, { "epoch": 0.77, "grad_norm": 1.571091500844124, "learning_rate": 1.3758706849115317e-06, "loss": 0.6471, "step": 7495 }, { "epoch": 0.77, "grad_norm": 1.5092948234249826, "learning_rate": 1.3747316347481593e-06, "loss": 0.7539, "step": 7496 }, { "epoch": 0.77, "grad_norm": 1.7221216286490084, "learning_rate": 1.3735929811221639e-06, "loss": 0.6655, "step": 7497 }, { "epoch": 0.77, "grad_norm": 1.4769041625925161, "learning_rate": 1.3724547241580888e-06, "loss": 0.609, "step": 7498 }, { "epoch": 0.77, "grad_norm": 1.9832807080284358, "learning_rate": 1.3713168639804432e-06, "loss": 0.6492, "step": 7499 }, { "epoch": 0.77, "grad_norm": 2.2216522541062873, "learning_rate": 1.3701794007136848e-06, "loss": 0.6042, "step": 7500 }, { "epoch": 0.77, "grad_norm": 1.3803512803602886, "learning_rate": 1.3690423344822307e-06, "loss": 0.5972, "step": 7501 }, { "epoch": 0.77, "grad_norm": 1.6428997544009425, "learning_rate": 1.3679056654104556e-06, "loss": 0.7124, "step": 7502 }, { "epoch": 0.77, "grad_norm": 1.4214040277607596, "learning_rate": 1.3667693936226923e-06, "loss": 0.5372, "step": 7503 }, { "epoch": 0.77, "grad_norm": 1.5168379175829787, "learning_rate": 1.3656335192432258e-06, "loss": 0.6461, "step": 7504 }, { "epoch": 0.77, "grad_norm": 1.6321287062309786, "learning_rate": 1.3644980423963006e-06, "loss": 0.6777, "step": 7505 }, { "epoch": 0.77, "grad_norm": 1.4374007066172936, "learning_rate": 1.3633629632061146e-06, "loss": 0.5638, "step": 7506 }, { "epoch": 0.77, "grad_norm": 1.4547600267806617, "learning_rate": 1.3622282817968264e-06, "loss": 0.6756, "step": 7507 }, { "epoch": 0.77, "grad_norm": 1.5787660705763575, "learning_rate": 1.3610939982925504e-06, "loss": 0.5301, "step": 7508 }, { "epoch": 0.77, "grad_norm": 1.2834875783582795, "learning_rate": 1.359960112817355e-06, "loss": 0.6045, "step": 7509 }, { "epoch": 0.77, "grad_norm": 1.4256505945632423, "learning_rate": 1.3588266254952648e-06, "loss": 0.6542, "step": 7510 }, { "epoch": 0.77, "grad_norm": 1.430973575517246, "learning_rate": 1.3576935364502653e-06, "loss": 0.6669, "step": 7511 }, { "epoch": 0.77, "grad_norm": 1.4828397053472595, "learning_rate": 1.3565608458062924e-06, "loss": 0.6534, "step": 7512 }, { "epoch": 0.77, "grad_norm": 1.442452209770207, "learning_rate": 1.3554285536872447e-06, "loss": 0.6003, "step": 7513 }, { "epoch": 0.77, "grad_norm": 1.4908386817935928, "learning_rate": 1.3542966602169722e-06, "loss": 0.6937, "step": 7514 }, { "epoch": 0.77, "grad_norm": 1.3081826654020006, "learning_rate": 1.3531651655192824e-06, "loss": 0.5777, "step": 7515 }, { "epoch": 0.77, "grad_norm": 1.5588800478081781, "learning_rate": 1.3520340697179406e-06, "loss": 0.6334, "step": 7516 }, { "epoch": 0.77, "grad_norm": 1.4435015802890807, "learning_rate": 1.35090337293667e-06, "loss": 0.6155, "step": 7517 }, { "epoch": 0.77, "grad_norm": 1.5045587983726434, "learning_rate": 1.3497730752991456e-06, "loss": 0.5801, "step": 7518 }, { "epoch": 0.77, "grad_norm": 1.8304544116015424, "learning_rate": 1.3486431769290014e-06, "loss": 0.6556, "step": 7519 }, { "epoch": 0.77, "grad_norm": 1.5397348671300812, "learning_rate": 1.3475136779498265e-06, "loss": 0.6199, "step": 7520 }, { "epoch": 0.77, "grad_norm": 1.4528276348964886, "learning_rate": 1.346384578485168e-06, "loss": 0.6785, "step": 7521 }, { "epoch": 0.77, "grad_norm": 2.0763618897886706, "learning_rate": 1.34525587865853e-06, "loss": 0.5639, "step": 7522 }, { "epoch": 0.77, "grad_norm": 1.4661940970617013, "learning_rate": 1.344127578593371e-06, "loss": 0.6195, "step": 7523 }, { "epoch": 0.77, "grad_norm": 1.5282965310971763, "learning_rate": 1.3429996784131033e-06, "loss": 0.6197, "step": 7524 }, { "epoch": 0.77, "grad_norm": 1.5927892627430313, "learning_rate": 1.3418721782411015e-06, "loss": 0.5926, "step": 7525 }, { "epoch": 0.77, "grad_norm": 1.696716939605621, "learning_rate": 1.3407450782006915e-06, "loss": 0.6933, "step": 7526 }, { "epoch": 0.77, "grad_norm": 1.8592581532082033, "learning_rate": 1.339618378415159e-06, "loss": 0.6207, "step": 7527 }, { "epoch": 0.77, "grad_norm": 1.6216524675353639, "learning_rate": 1.3384920790077427e-06, "loss": 0.75, "step": 7528 }, { "epoch": 0.77, "grad_norm": 1.604137609217292, "learning_rate": 1.337366180101638e-06, "loss": 0.6895, "step": 7529 }, { "epoch": 0.77, "grad_norm": 1.5436099820186933, "learning_rate": 1.3362406818199985e-06, "loss": 0.6932, "step": 7530 }, { "epoch": 0.77, "grad_norm": 1.3618467990617875, "learning_rate": 1.3351155842859354e-06, "loss": 0.5566, "step": 7531 }, { "epoch": 0.77, "grad_norm": 1.4864341867695285, "learning_rate": 1.3339908876225105e-06, "loss": 0.6272, "step": 7532 }, { "epoch": 0.77, "grad_norm": 1.4622664292579424, "learning_rate": 1.332866591952746e-06, "loss": 0.5519, "step": 7533 }, { "epoch": 0.77, "grad_norm": 1.4592542398088437, "learning_rate": 1.3317426973996173e-06, "loss": 0.666, "step": 7534 }, { "epoch": 0.77, "grad_norm": 1.3927370275525837, "learning_rate": 1.3306192040860588e-06, "loss": 0.6557, "step": 7535 }, { "epoch": 0.77, "grad_norm": 1.446151885414369, "learning_rate": 1.329496112134962e-06, "loss": 0.5726, "step": 7536 }, { "epoch": 0.77, "grad_norm": 1.4851736402146314, "learning_rate": 1.3283734216691701e-06, "loss": 0.6589, "step": 7537 }, { "epoch": 0.77, "grad_norm": 1.407684110655719, "learning_rate": 1.3272511328114846e-06, "loss": 0.6196, "step": 7538 }, { "epoch": 0.77, "grad_norm": 1.5666942930701993, "learning_rate": 1.3261292456846648e-06, "loss": 0.6438, "step": 7539 }, { "epoch": 0.77, "grad_norm": 1.4425474100697155, "learning_rate": 1.3250077604114215e-06, "loss": 0.6254, "step": 7540 }, { "epoch": 0.77, "grad_norm": 1.599880013844955, "learning_rate": 1.3238866771144282e-06, "loss": 0.6154, "step": 7541 }, { "epoch": 0.77, "grad_norm": 1.374602399464072, "learning_rate": 1.322765995916309e-06, "loss": 0.5668, "step": 7542 }, { "epoch": 0.77, "grad_norm": 1.5536261221609062, "learning_rate": 1.3216457169396441e-06, "loss": 0.7111, "step": 7543 }, { "epoch": 0.77, "grad_norm": 1.7049474798224096, "learning_rate": 1.3205258403069732e-06, "loss": 0.6355, "step": 7544 }, { "epoch": 0.77, "grad_norm": 1.489969996888538, "learning_rate": 1.3194063661407907e-06, "loss": 0.6386, "step": 7545 }, { "epoch": 0.77, "grad_norm": 1.6446105907717121, "learning_rate": 1.3182872945635456e-06, "loss": 0.5628, "step": 7546 }, { "epoch": 0.77, "grad_norm": 1.5572912030704136, "learning_rate": 1.3171686256976429e-06, "loss": 0.7271, "step": 7547 }, { "epoch": 0.77, "grad_norm": 1.437792063768832, "learning_rate": 1.316050359665444e-06, "loss": 0.5699, "step": 7548 }, { "epoch": 0.77, "grad_norm": 1.4375492532827043, "learning_rate": 1.3149324965892673e-06, "loss": 0.5766, "step": 7549 }, { "epoch": 0.77, "grad_norm": 1.2850172615993145, "learning_rate": 1.3138150365913882e-06, "loss": 0.5328, "step": 7550 }, { "epoch": 0.77, "grad_norm": 1.5113821291679994, "learning_rate": 1.3126979797940336e-06, "loss": 0.5604, "step": 7551 }, { "epoch": 0.77, "grad_norm": 1.421825198070663, "learning_rate": 1.3115813263193893e-06, "loss": 0.6982, "step": 7552 }, { "epoch": 0.77, "grad_norm": 1.3784315847699689, "learning_rate": 1.3104650762895975e-06, "loss": 0.6122, "step": 7553 }, { "epoch": 0.77, "grad_norm": 1.896749349050073, "learning_rate": 1.309349229826754e-06, "loss": 0.5917, "step": 7554 }, { "epoch": 0.77, "grad_norm": 4.308216768322502, "learning_rate": 1.3082337870529138e-06, "loss": 0.6292, "step": 7555 }, { "epoch": 0.77, "grad_norm": 1.515681754321134, "learning_rate": 1.307118748090085e-06, "loss": 0.5142, "step": 7556 }, { "epoch": 0.77, "grad_norm": 1.4944195464136554, "learning_rate": 1.3060041130602296e-06, "loss": 0.746, "step": 7557 }, { "epoch": 0.77, "grad_norm": 1.5806642773838455, "learning_rate": 1.3048898820852707e-06, "loss": 0.664, "step": 7558 }, { "epoch": 0.77, "grad_norm": 1.3898095423009957, "learning_rate": 1.303776055287086e-06, "loss": 0.6063, "step": 7559 }, { "epoch": 0.77, "grad_norm": 1.3995382842210413, "learning_rate": 1.3026626327875052e-06, "loss": 0.6201, "step": 7560 }, { "epoch": 0.77, "grad_norm": 1.5408514520059085, "learning_rate": 1.3015496147083168e-06, "loss": 0.6072, "step": 7561 }, { "epoch": 0.77, "grad_norm": 1.3600305986278585, "learning_rate": 1.3004370011712624e-06, "loss": 0.6042, "step": 7562 }, { "epoch": 0.77, "grad_norm": 1.58157758218613, "learning_rate": 1.2993247922980435e-06, "loss": 0.6094, "step": 7563 }, { "epoch": 0.77, "grad_norm": 1.6235793563306953, "learning_rate": 1.2982129882103168e-06, "loss": 0.6205, "step": 7564 }, { "epoch": 0.77, "grad_norm": 1.258578190955494, "learning_rate": 1.2971015890296906e-06, "loss": 0.6282, "step": 7565 }, { "epoch": 0.77, "grad_norm": 1.7025030269521326, "learning_rate": 1.2959905948777313e-06, "loss": 0.618, "step": 7566 }, { "epoch": 0.77, "grad_norm": 1.6249646317329254, "learning_rate": 1.294880005875963e-06, "loss": 0.7457, "step": 7567 }, { "epoch": 0.77, "grad_norm": 1.5155232292620249, "learning_rate": 1.2937698221458612e-06, "loss": 0.595, "step": 7568 }, { "epoch": 0.77, "grad_norm": 1.5730713067473436, "learning_rate": 1.2926600438088622e-06, "loss": 0.67, "step": 7569 }, { "epoch": 0.77, "grad_norm": 1.5162541516429253, "learning_rate": 1.291550670986354e-06, "loss": 0.6922, "step": 7570 }, { "epoch": 0.77, "grad_norm": 1.5672050351274422, "learning_rate": 1.2904417037996797e-06, "loss": 0.664, "step": 7571 }, { "epoch": 0.77, "grad_norm": 1.4785947538706534, "learning_rate": 1.2893331423701416e-06, "loss": 0.5959, "step": 7572 }, { "epoch": 0.77, "grad_norm": 1.4829609429634218, "learning_rate": 1.2882249868189979e-06, "loss": 0.6803, "step": 7573 }, { "epoch": 0.77, "grad_norm": 1.5744187784035995, "learning_rate": 1.2871172372674573e-06, "loss": 0.6653, "step": 7574 }, { "epoch": 0.77, "grad_norm": 1.6529212548003152, "learning_rate": 1.2860098938366882e-06, "loss": 0.7066, "step": 7575 }, { "epoch": 0.77, "grad_norm": 1.4485503948896588, "learning_rate": 1.2849029566478115e-06, "loss": 0.6403, "step": 7576 }, { "epoch": 0.77, "grad_norm": 1.718077709323386, "learning_rate": 1.2837964258219082e-06, "loss": 0.5889, "step": 7577 }, { "epoch": 0.77, "grad_norm": 1.6907242003773586, "learning_rate": 1.282690301480013e-06, "loss": 0.659, "step": 7578 }, { "epoch": 0.77, "grad_norm": 1.6558806436989186, "learning_rate": 1.2815845837431135e-06, "loss": 0.7889, "step": 7579 }, { "epoch": 0.77, "grad_norm": 1.7118231225980254, "learning_rate": 1.2804792727321546e-06, "loss": 0.6691, "step": 7580 }, { "epoch": 0.77, "grad_norm": 1.3696563316700772, "learning_rate": 1.2793743685680387e-06, "loss": 0.4832, "step": 7581 }, { "epoch": 0.77, "grad_norm": 1.3033690509213465, "learning_rate": 1.2782698713716196e-06, "loss": 0.6633, "step": 7582 }, { "epoch": 0.77, "grad_norm": 2.291504618069588, "learning_rate": 1.27716578126371e-06, "loss": 0.6344, "step": 7583 }, { "epoch": 0.77, "grad_norm": 1.5308321482847058, "learning_rate": 1.2760620983650802e-06, "loss": 0.6485, "step": 7584 }, { "epoch": 0.77, "grad_norm": 1.3836716896707337, "learning_rate": 1.2749588227964466e-06, "loss": 0.6835, "step": 7585 }, { "epoch": 0.77, "grad_norm": 1.3169075887412136, "learning_rate": 1.2738559546784917e-06, "loss": 0.603, "step": 7586 }, { "epoch": 0.77, "grad_norm": 1.5101918024655172, "learning_rate": 1.272753494131846e-06, "loss": 0.6436, "step": 7587 }, { "epoch": 0.77, "grad_norm": 1.5066334978069358, "learning_rate": 1.2716514412771009e-06, "loss": 0.609, "step": 7588 }, { "epoch": 0.77, "grad_norm": 1.3136647319097534, "learning_rate": 1.2705497962347996e-06, "loss": 0.59, "step": 7589 }, { "epoch": 0.78, "grad_norm": 1.3575056727909895, "learning_rate": 1.2694485591254402e-06, "loss": 0.6027, "step": 7590 }, { "epoch": 0.78, "grad_norm": 1.5808877303020583, "learning_rate": 1.268347730069479e-06, "loss": 0.6232, "step": 7591 }, { "epoch": 0.78, "grad_norm": 1.4224953775398463, "learning_rate": 1.2672473091873278e-06, "loss": 0.6354, "step": 7592 }, { "epoch": 0.78, "grad_norm": 1.4337209109868816, "learning_rate": 1.266147296599351e-06, "loss": 0.5074, "step": 7593 }, { "epoch": 0.78, "grad_norm": 1.4916067800990322, "learning_rate": 1.2650476924258676e-06, "loss": 0.6865, "step": 7594 }, { "epoch": 0.78, "grad_norm": 1.3670784437533248, "learning_rate": 1.263948496787158e-06, "loss": 0.5465, "step": 7595 }, { "epoch": 0.78, "grad_norm": 1.518243336475746, "learning_rate": 1.2628497098034497e-06, "loss": 0.6453, "step": 7596 }, { "epoch": 0.78, "grad_norm": 1.5690918677209054, "learning_rate": 1.2617513315949325e-06, "loss": 0.6862, "step": 7597 }, { "epoch": 0.78, "grad_norm": 1.6389764031898053, "learning_rate": 1.2606533622817508e-06, "loss": 0.6911, "step": 7598 }, { "epoch": 0.78, "grad_norm": 1.64395631317259, "learning_rate": 1.259555801983996e-06, "loss": 0.6839, "step": 7599 }, { "epoch": 0.78, "grad_norm": 1.3864266800063176, "learning_rate": 1.2584586508217262e-06, "loss": 0.5814, "step": 7600 }, { "epoch": 0.78, "grad_norm": 1.5019899395511662, "learning_rate": 1.2573619089149458e-06, "loss": 0.6298, "step": 7601 }, { "epoch": 0.78, "grad_norm": 1.5131926753329012, "learning_rate": 1.2562655763836219e-06, "loss": 0.6491, "step": 7602 }, { "epoch": 0.78, "grad_norm": 1.8571255330209313, "learning_rate": 1.2551696533476704e-06, "loss": 0.6105, "step": 7603 }, { "epoch": 0.78, "grad_norm": 1.3861859703079087, "learning_rate": 1.254074139926964e-06, "loss": 0.5754, "step": 7604 }, { "epoch": 0.78, "grad_norm": 1.4240700410106473, "learning_rate": 1.2529790362413335e-06, "loss": 0.5624, "step": 7605 }, { "epoch": 0.78, "grad_norm": 1.54016694735379, "learning_rate": 1.2518843424105643e-06, "loss": 0.6238, "step": 7606 }, { "epoch": 0.78, "grad_norm": 1.5527078475161946, "learning_rate": 1.2507900585543935e-06, "loss": 0.6078, "step": 7607 }, { "epoch": 0.78, "grad_norm": 1.6698991947726751, "learning_rate": 1.2496961847925154e-06, "loss": 0.6991, "step": 7608 }, { "epoch": 0.78, "grad_norm": 1.6027089110327006, "learning_rate": 1.2486027212445812e-06, "loss": 0.6092, "step": 7609 }, { "epoch": 0.78, "grad_norm": 1.4256042392846517, "learning_rate": 1.2475096680301934e-06, "loss": 0.6013, "step": 7610 }, { "epoch": 0.78, "grad_norm": 1.329929501773687, "learning_rate": 1.2464170252689133e-06, "loss": 0.5474, "step": 7611 }, { "epoch": 0.78, "grad_norm": 1.2065102693578893, "learning_rate": 1.2453247930802587e-06, "loss": 0.5459, "step": 7612 }, { "epoch": 0.78, "grad_norm": 1.4704327940477522, "learning_rate": 1.2442329715836937e-06, "loss": 0.6064, "step": 7613 }, { "epoch": 0.78, "grad_norm": 1.9568646575207607, "learning_rate": 1.2431415608986475e-06, "loss": 0.6166, "step": 7614 }, { "epoch": 0.78, "grad_norm": 1.5549677633642869, "learning_rate": 1.2420505611444977e-06, "loss": 0.6757, "step": 7615 }, { "epoch": 0.78, "grad_norm": 1.6457643620677616, "learning_rate": 1.2409599724405807e-06, "loss": 0.6185, "step": 7616 }, { "epoch": 0.78, "grad_norm": 1.4739992555708186, "learning_rate": 1.2398697949061905e-06, "loss": 0.6899, "step": 7617 }, { "epoch": 0.78, "grad_norm": 1.4142594991915518, "learning_rate": 1.238780028660565e-06, "loss": 0.6248, "step": 7618 }, { "epoch": 0.78, "grad_norm": 1.6715361836757234, "learning_rate": 1.2376906738229088e-06, "loss": 0.6259, "step": 7619 }, { "epoch": 0.78, "grad_norm": 1.5862759021130162, "learning_rate": 1.2366017305123784e-06, "loss": 0.6762, "step": 7620 }, { "epoch": 0.78, "grad_norm": 1.465755056709163, "learning_rate": 1.2355131988480817e-06, "loss": 0.6047, "step": 7621 }, { "epoch": 0.78, "grad_norm": 1.4981881890673852, "learning_rate": 1.2344250789490836e-06, "loss": 0.6658, "step": 7622 }, { "epoch": 0.78, "grad_norm": 1.3694097324262557, "learning_rate": 1.2333373709344065e-06, "loss": 0.6535, "step": 7623 }, { "epoch": 0.78, "grad_norm": 1.5238242571526717, "learning_rate": 1.2322500749230232e-06, "loss": 0.6755, "step": 7624 }, { "epoch": 0.78, "grad_norm": 1.6607771457741791, "learning_rate": 1.231163191033865e-06, "loss": 0.71, "step": 7625 }, { "epoch": 0.78, "grad_norm": 1.4603533991444053, "learning_rate": 1.23007671938582e-06, "loss": 0.6847, "step": 7626 }, { "epoch": 0.78, "grad_norm": 1.4385968943087333, "learning_rate": 1.228990660097722e-06, "loss": 0.6553, "step": 7627 }, { "epoch": 0.78, "grad_norm": 1.516503683462993, "learning_rate": 1.2279050132883703e-06, "loss": 0.756, "step": 7628 }, { "epoch": 0.78, "grad_norm": 1.449033649675857, "learning_rate": 1.2268197790765113e-06, "loss": 0.633, "step": 7629 }, { "epoch": 0.78, "grad_norm": 1.413191011710665, "learning_rate": 1.2257349575808514e-06, "loss": 0.5878, "step": 7630 }, { "epoch": 0.78, "grad_norm": 1.528915612796928, "learning_rate": 1.2246505489200534e-06, "loss": 0.6317, "step": 7631 }, { "epoch": 0.78, "grad_norm": 1.8416097383554795, "learning_rate": 1.223566553212725e-06, "loss": 0.6765, "step": 7632 }, { "epoch": 0.78, "grad_norm": 1.7312202556139256, "learning_rate": 1.2224829705774389e-06, "loss": 0.655, "step": 7633 }, { "epoch": 0.78, "grad_norm": 1.4964371842224187, "learning_rate": 1.2213998011327194e-06, "loss": 0.6831, "step": 7634 }, { "epoch": 0.78, "grad_norm": 1.3614956376296223, "learning_rate": 1.2203170449970452e-06, "loss": 0.6016, "step": 7635 }, { "epoch": 0.78, "grad_norm": 1.6240286839646152, "learning_rate": 1.2192347022888473e-06, "loss": 0.6671, "step": 7636 }, { "epoch": 0.78, "grad_norm": 1.4979155362862904, "learning_rate": 1.218152773126517e-06, "loss": 0.6587, "step": 7637 }, { "epoch": 0.78, "grad_norm": 1.5600581515265262, "learning_rate": 1.2170712576283949e-06, "loss": 0.5884, "step": 7638 }, { "epoch": 0.78, "grad_norm": 1.4841931458579216, "learning_rate": 1.2159901559127801e-06, "loss": 0.6114, "step": 7639 }, { "epoch": 0.78, "grad_norm": 1.8033643667477792, "learning_rate": 1.2149094680979279e-06, "loss": 0.6242, "step": 7640 }, { "epoch": 0.78, "grad_norm": 1.603506657532786, "learning_rate": 1.2138291943020396e-06, "loss": 0.6422, "step": 7641 }, { "epoch": 0.78, "grad_norm": 1.5813192052717566, "learning_rate": 1.2127493346432822e-06, "loss": 0.6354, "step": 7642 }, { "epoch": 0.78, "grad_norm": 1.584706336493702, "learning_rate": 1.2116698892397693e-06, "loss": 0.6794, "step": 7643 }, { "epoch": 0.78, "grad_norm": 1.5578683458089366, "learning_rate": 1.2105908582095733e-06, "loss": 0.6231, "step": 7644 }, { "epoch": 0.78, "grad_norm": 1.9387149365555327, "learning_rate": 1.2095122416707233e-06, "loss": 0.6804, "step": 7645 }, { "epoch": 0.78, "grad_norm": 1.429655224255674, "learning_rate": 1.2084340397411941e-06, "loss": 0.6599, "step": 7646 }, { "epoch": 0.78, "grad_norm": 1.4779724913501304, "learning_rate": 1.2073562525389243e-06, "loss": 0.5486, "step": 7647 }, { "epoch": 0.78, "grad_norm": 1.3732018332724565, "learning_rate": 1.2062788801818053e-06, "loss": 0.6141, "step": 7648 }, { "epoch": 0.78, "grad_norm": 3.820987088792916, "learning_rate": 1.2052019227876793e-06, "loss": 0.658, "step": 7649 }, { "epoch": 0.78, "grad_norm": 1.4846204765670896, "learning_rate": 1.2041253804743475e-06, "loss": 0.6041, "step": 7650 }, { "epoch": 0.78, "grad_norm": 1.397871284335363, "learning_rate": 1.2030492533595623e-06, "loss": 0.579, "step": 7651 }, { "epoch": 0.78, "grad_norm": 1.4967930035144794, "learning_rate": 1.2019735415610318e-06, "loss": 0.6269, "step": 7652 }, { "epoch": 0.78, "grad_norm": 1.4362117798694054, "learning_rate": 1.200898245196419e-06, "loss": 0.7188, "step": 7653 }, { "epoch": 0.78, "grad_norm": 1.5375120991189977, "learning_rate": 1.1998233643833457e-06, "loss": 0.544, "step": 7654 }, { "epoch": 0.78, "grad_norm": 1.4176380560579347, "learning_rate": 1.198748899239377e-06, "loss": 0.6564, "step": 7655 }, { "epoch": 0.78, "grad_norm": 1.4992518199754719, "learning_rate": 1.1976748498820452e-06, "loss": 0.6559, "step": 7656 }, { "epoch": 0.78, "grad_norm": 1.5465326565151665, "learning_rate": 1.196601216428827e-06, "loss": 0.6983, "step": 7657 }, { "epoch": 0.78, "grad_norm": 1.5002157947454984, "learning_rate": 1.1955279989971608e-06, "loss": 0.5849, "step": 7658 }, { "epoch": 0.78, "grad_norm": 1.3784918208574752, "learning_rate": 1.194455197704439e-06, "loss": 0.5384, "step": 7659 }, { "epoch": 0.78, "grad_norm": 1.4786422649200555, "learning_rate": 1.1933828126680009e-06, "loss": 0.7009, "step": 7660 }, { "epoch": 0.78, "grad_norm": 1.7246111270119977, "learning_rate": 1.1923108440051478e-06, "loss": 0.6539, "step": 7661 }, { "epoch": 0.78, "grad_norm": 1.524609346777287, "learning_rate": 1.191239291833135e-06, "loss": 0.6629, "step": 7662 }, { "epoch": 0.78, "grad_norm": 1.2745946330317277, "learning_rate": 1.190168156269168e-06, "loss": 0.5425, "step": 7663 }, { "epoch": 0.78, "grad_norm": 1.4921548631370996, "learning_rate": 1.189097437430412e-06, "loss": 0.6106, "step": 7664 }, { "epoch": 0.78, "grad_norm": 1.480689669927413, "learning_rate": 1.1880271354339824e-06, "loss": 0.6059, "step": 7665 }, { "epoch": 0.78, "grad_norm": 1.6019091527849183, "learning_rate": 1.186957250396949e-06, "loss": 0.764, "step": 7666 }, { "epoch": 0.78, "grad_norm": 1.5857545188055628, "learning_rate": 1.1858877824363385e-06, "loss": 0.572, "step": 7667 }, { "epoch": 0.78, "grad_norm": 1.6064273999809489, "learning_rate": 1.184818731669134e-06, "loss": 0.6629, "step": 7668 }, { "epoch": 0.78, "grad_norm": 1.5548468598969982, "learning_rate": 1.1837500982122646e-06, "loss": 0.6584, "step": 7669 }, { "epoch": 0.78, "grad_norm": 1.4210056398511268, "learning_rate": 1.1826818821826231e-06, "loss": 0.6018, "step": 7670 }, { "epoch": 0.78, "grad_norm": 1.4515807297934675, "learning_rate": 1.181614083697049e-06, "loss": 0.6363, "step": 7671 }, { "epoch": 0.78, "grad_norm": 1.3852139288072889, "learning_rate": 1.1805467028723427e-06, "loss": 0.6057, "step": 7672 }, { "epoch": 0.78, "grad_norm": 1.5721267761967461, "learning_rate": 1.1794797398252578e-06, "loss": 0.6962, "step": 7673 }, { "epoch": 0.78, "grad_norm": 1.3075355888037203, "learning_rate": 1.1784131946724942e-06, "loss": 0.6179, "step": 7674 }, { "epoch": 0.78, "grad_norm": 1.498661381809851, "learning_rate": 1.1773470675307158e-06, "loss": 0.6296, "step": 7675 }, { "epoch": 0.78, "grad_norm": 1.3910162311801675, "learning_rate": 1.176281358516539e-06, "loss": 0.5647, "step": 7676 }, { "epoch": 0.78, "grad_norm": 1.4669483917195918, "learning_rate": 1.1752160677465286e-06, "loss": 0.7118, "step": 7677 }, { "epoch": 0.78, "grad_norm": 1.5297679315120745, "learning_rate": 1.1741511953372114e-06, "loss": 0.6498, "step": 7678 }, { "epoch": 0.78, "grad_norm": 1.3856897362022664, "learning_rate": 1.1730867414050633e-06, "loss": 0.5901, "step": 7679 }, { "epoch": 0.78, "grad_norm": 1.6128227300184208, "learning_rate": 1.1720227060665145e-06, "loss": 0.5681, "step": 7680 }, { "epoch": 0.78, "grad_norm": 1.3299198636118506, "learning_rate": 1.1709590894379525e-06, "loss": 0.6434, "step": 7681 }, { "epoch": 0.78, "grad_norm": 1.4459481105199508, "learning_rate": 1.1698958916357177e-06, "loss": 0.6543, "step": 7682 }, { "epoch": 0.78, "grad_norm": 1.359701780390959, "learning_rate": 1.1688331127761039e-06, "loss": 0.6219, "step": 7683 }, { "epoch": 0.78, "grad_norm": 1.5259533884504135, "learning_rate": 1.167770752975359e-06, "loss": 0.6546, "step": 7684 }, { "epoch": 0.78, "grad_norm": 1.7078389815320496, "learning_rate": 1.166708812349684e-06, "loss": 0.6577, "step": 7685 }, { "epoch": 0.78, "grad_norm": 1.4995194580703455, "learning_rate": 1.1656472910152377e-06, "loss": 0.6749, "step": 7686 }, { "epoch": 0.78, "grad_norm": 1.5141155030922526, "learning_rate": 1.1645861890881327e-06, "loss": 0.6636, "step": 7687 }, { "epoch": 0.79, "grad_norm": 1.6096874849943321, "learning_rate": 1.1635255066844292e-06, "loss": 0.5237, "step": 7688 }, { "epoch": 0.79, "grad_norm": 1.090935575348966, "learning_rate": 1.1624652439201483e-06, "loss": 0.5428, "step": 7689 }, { "epoch": 0.79, "grad_norm": 1.3820758382888205, "learning_rate": 1.1614054009112657e-06, "loss": 0.6036, "step": 7690 }, { "epoch": 0.79, "grad_norm": 1.592321937979131, "learning_rate": 1.1603459777737046e-06, "loss": 0.7446, "step": 7691 }, { "epoch": 0.79, "grad_norm": 1.5164515293120258, "learning_rate": 1.1592869746233498e-06, "loss": 0.6964, "step": 7692 }, { "epoch": 0.79, "grad_norm": 1.4744191049477073, "learning_rate": 1.158228391576035e-06, "loss": 0.5686, "step": 7693 }, { "epoch": 0.79, "grad_norm": 1.3956915029829193, "learning_rate": 1.1571702287475484e-06, "loss": 0.6232, "step": 7694 }, { "epoch": 0.79, "grad_norm": 1.6319819208535717, "learning_rate": 1.156112486253635e-06, "loss": 0.66, "step": 7695 }, { "epoch": 0.79, "grad_norm": 1.6279055914134295, "learning_rate": 1.1550551642099934e-06, "loss": 0.7106, "step": 7696 }, { "epoch": 0.79, "grad_norm": 1.6284643355652153, "learning_rate": 1.1539982627322732e-06, "loss": 0.7288, "step": 7697 }, { "epoch": 0.79, "grad_norm": 1.3821002168302332, "learning_rate": 1.1529417819360812e-06, "loss": 0.5198, "step": 7698 }, { "epoch": 0.79, "grad_norm": 1.5616001665533452, "learning_rate": 1.1518857219369744e-06, "loss": 0.734, "step": 7699 }, { "epoch": 0.79, "grad_norm": 1.6265946122320585, "learning_rate": 1.1508300828504682e-06, "loss": 0.7193, "step": 7700 }, { "epoch": 0.79, "grad_norm": 1.5781493034645944, "learning_rate": 1.1497748647920326e-06, "loss": 0.6208, "step": 7701 }, { "epoch": 0.79, "grad_norm": 1.445598784960246, "learning_rate": 1.1487200678770833e-06, "loss": 0.6396, "step": 7702 }, { "epoch": 0.79, "grad_norm": 1.7065394674618228, "learning_rate": 1.1476656922209984e-06, "loss": 0.6984, "step": 7703 }, { "epoch": 0.79, "grad_norm": 1.3965705723730186, "learning_rate": 1.1466117379391089e-06, "loss": 0.6367, "step": 7704 }, { "epoch": 0.79, "grad_norm": 1.5096109286849138, "learning_rate": 1.1455582051466947e-06, "loss": 0.7411, "step": 7705 }, { "epoch": 0.79, "grad_norm": 1.3996820871417104, "learning_rate": 1.1445050939589958e-06, "loss": 0.69, "step": 7706 }, { "epoch": 0.79, "grad_norm": 1.5543919336207226, "learning_rate": 1.143452404491201e-06, "loss": 0.5906, "step": 7707 }, { "epoch": 0.79, "grad_norm": 2.2906701016397104, "learning_rate": 1.1424001368584553e-06, "loss": 0.6526, "step": 7708 }, { "epoch": 0.79, "grad_norm": 1.632116896951027, "learning_rate": 1.1413482911758584e-06, "loss": 0.5416, "step": 7709 }, { "epoch": 0.79, "grad_norm": 1.8020832465428882, "learning_rate": 1.1402968675584608e-06, "loss": 0.733, "step": 7710 }, { "epoch": 0.79, "grad_norm": 1.3427270781488208, "learning_rate": 1.1392458661212718e-06, "loss": 0.7026, "step": 7711 }, { "epoch": 0.79, "grad_norm": 1.7147865008769525, "learning_rate": 1.1381952869792501e-06, "loss": 0.6639, "step": 7712 }, { "epoch": 0.79, "grad_norm": 1.3802827344419835, "learning_rate": 1.1371451302473075e-06, "loss": 0.5966, "step": 7713 }, { "epoch": 0.79, "grad_norm": 1.6088326335612564, "learning_rate": 1.1360953960403144e-06, "loss": 0.6892, "step": 7714 }, { "epoch": 0.79, "grad_norm": 1.4005934237663131, "learning_rate": 1.135046084473092e-06, "loss": 0.6557, "step": 7715 }, { "epoch": 0.79, "grad_norm": 1.6528502270218768, "learning_rate": 1.1339971956604163e-06, "loss": 0.6019, "step": 7716 }, { "epoch": 0.79, "grad_norm": 1.7152611302497534, "learning_rate": 1.1329487297170128e-06, "loss": 0.6628, "step": 7717 }, { "epoch": 0.79, "grad_norm": 1.3826097819301866, "learning_rate": 1.1319006867575688e-06, "loss": 0.5807, "step": 7718 }, { "epoch": 0.79, "grad_norm": 1.5836090802813554, "learning_rate": 1.1308530668967172e-06, "loss": 0.6752, "step": 7719 }, { "epoch": 0.79, "grad_norm": 1.5592105973956079, "learning_rate": 1.1298058702490517e-06, "loss": 0.7415, "step": 7720 }, { "epoch": 0.79, "grad_norm": 1.5082349428136699, "learning_rate": 1.128759096929114e-06, "loss": 0.6337, "step": 7721 }, { "epoch": 0.79, "grad_norm": 2.4424689553356997, "learning_rate": 1.1277127470514005e-06, "loss": 0.5539, "step": 7722 }, { "epoch": 0.79, "grad_norm": 1.4312963724157608, "learning_rate": 1.126666820730366e-06, "loss": 0.6456, "step": 7723 }, { "epoch": 0.79, "grad_norm": 1.5620373615107985, "learning_rate": 1.1256213180804121e-06, "loss": 0.599, "step": 7724 }, { "epoch": 0.79, "grad_norm": 1.4306895572771765, "learning_rate": 1.124576239215901e-06, "loss": 0.6422, "step": 7725 }, { "epoch": 0.79, "grad_norm": 1.4749366563478883, "learning_rate": 1.1235315842511424e-06, "loss": 0.5874, "step": 7726 }, { "epoch": 0.79, "grad_norm": 1.5949066634179252, "learning_rate": 1.1224873533004022e-06, "loss": 0.6353, "step": 7727 }, { "epoch": 0.79, "grad_norm": 1.5580951882867187, "learning_rate": 1.1214435464779006e-06, "loss": 0.6487, "step": 7728 }, { "epoch": 0.79, "grad_norm": 1.6718543824710692, "learning_rate": 1.1204001638978119e-06, "loss": 0.6764, "step": 7729 }, { "epoch": 0.79, "grad_norm": 1.643882059282261, "learning_rate": 1.1193572056742625e-06, "loss": 0.6202, "step": 7730 }, { "epoch": 0.79, "grad_norm": 1.4880275430254875, "learning_rate": 1.1183146719213306e-06, "loss": 0.6774, "step": 7731 }, { "epoch": 0.79, "grad_norm": 1.724569159128822, "learning_rate": 1.1172725627530528e-06, "loss": 0.6902, "step": 7732 }, { "epoch": 0.79, "grad_norm": 1.5058565679998455, "learning_rate": 1.116230878283414e-06, "loss": 0.6175, "step": 7733 }, { "epoch": 0.79, "grad_norm": 1.3660942067970727, "learning_rate": 1.1151896186263584e-06, "loss": 0.6585, "step": 7734 }, { "epoch": 0.79, "grad_norm": 1.5506014238180315, "learning_rate": 1.1141487838957787e-06, "loss": 0.5614, "step": 7735 }, { "epoch": 0.79, "grad_norm": 1.547044446462108, "learning_rate": 1.113108374205522e-06, "loss": 0.6981, "step": 7736 }, { "epoch": 0.79, "grad_norm": 1.4259241528224136, "learning_rate": 1.1120683896693923e-06, "loss": 0.4851, "step": 7737 }, { "epoch": 0.79, "grad_norm": 1.7326678569941836, "learning_rate": 1.1110288304011423e-06, "loss": 0.7074, "step": 7738 }, { "epoch": 0.79, "grad_norm": 1.6049083401146322, "learning_rate": 1.109989696514483e-06, "loss": 0.6872, "step": 7739 }, { "epoch": 0.79, "grad_norm": 1.7560187270637801, "learning_rate": 1.1089509881230753e-06, "loss": 0.6781, "step": 7740 }, { "epoch": 0.79, "grad_norm": 1.605090266704069, "learning_rate": 1.1079127053405331e-06, "loss": 0.6585, "step": 7741 }, { "epoch": 0.79, "grad_norm": 1.4038194544703009, "learning_rate": 1.1068748482804265e-06, "loss": 0.627, "step": 7742 }, { "epoch": 0.79, "grad_norm": 1.417848253901707, "learning_rate": 1.1058374170562796e-06, "loss": 0.6175, "step": 7743 }, { "epoch": 0.79, "grad_norm": 1.5271184618432039, "learning_rate": 1.1048004117815676e-06, "loss": 0.664, "step": 7744 }, { "epoch": 0.79, "grad_norm": 1.5136097895116631, "learning_rate": 1.103763832569717e-06, "loss": 0.5298, "step": 7745 }, { "epoch": 0.79, "grad_norm": 1.3689469126851648, "learning_rate": 1.1027276795341135e-06, "loss": 0.5681, "step": 7746 }, { "epoch": 0.79, "grad_norm": 1.5032708779731339, "learning_rate": 1.1016919527880909e-06, "loss": 0.7109, "step": 7747 }, { "epoch": 0.79, "grad_norm": 1.565904512468844, "learning_rate": 1.1006566524449413e-06, "loss": 0.6218, "step": 7748 }, { "epoch": 0.79, "grad_norm": 1.65353067228069, "learning_rate": 1.0996217786179053e-06, "loss": 0.6399, "step": 7749 }, { "epoch": 0.79, "grad_norm": 1.7212097378193678, "learning_rate": 1.0985873314201784e-06, "loss": 0.6044, "step": 7750 }, { "epoch": 0.79, "grad_norm": 1.4983202644395635, "learning_rate": 1.0975533109649117e-06, "loss": 0.7346, "step": 7751 }, { "epoch": 0.79, "grad_norm": 1.7574145056161727, "learning_rate": 1.0965197173652064e-06, "loss": 0.7125, "step": 7752 }, { "epoch": 0.79, "grad_norm": 1.5657222193612284, "learning_rate": 1.0954865507341212e-06, "loss": 0.7408, "step": 7753 }, { "epoch": 0.79, "grad_norm": 1.4299400399842472, "learning_rate": 1.094453811184663e-06, "loss": 0.6722, "step": 7754 }, { "epoch": 0.79, "grad_norm": 1.5302250555166417, "learning_rate": 1.0934214988297942e-06, "loss": 0.6141, "step": 7755 }, { "epoch": 0.79, "grad_norm": 1.746787411061387, "learning_rate": 1.0923896137824308e-06, "loss": 0.7011, "step": 7756 }, { "epoch": 0.79, "grad_norm": 13.003628720899947, "learning_rate": 1.0913581561554448e-06, "loss": 0.6721, "step": 7757 }, { "epoch": 0.79, "grad_norm": 1.6031361971764118, "learning_rate": 1.0903271260616565e-06, "loss": 0.5811, "step": 7758 }, { "epoch": 0.79, "grad_norm": 1.429074006357601, "learning_rate": 1.0892965236138398e-06, "loss": 0.5769, "step": 7759 }, { "epoch": 0.79, "grad_norm": 1.4834588260431447, "learning_rate": 1.088266348924727e-06, "loss": 0.6264, "step": 7760 }, { "epoch": 0.79, "grad_norm": 1.5397268545143168, "learning_rate": 1.0872366021069974e-06, "loss": 0.7191, "step": 7761 }, { "epoch": 0.79, "grad_norm": 1.460280667428486, "learning_rate": 1.0862072832732883e-06, "loss": 0.6126, "step": 7762 }, { "epoch": 0.79, "grad_norm": 1.521241671196791, "learning_rate": 1.0851783925361875e-06, "loss": 0.7366, "step": 7763 }, { "epoch": 0.79, "grad_norm": 1.6211570416856558, "learning_rate": 1.0841499300082348e-06, "loss": 0.7343, "step": 7764 }, { "epoch": 0.79, "grad_norm": 1.349787504975366, "learning_rate": 1.0831218958019284e-06, "loss": 0.4716, "step": 7765 }, { "epoch": 0.79, "grad_norm": 1.544794095371157, "learning_rate": 1.0820942900297127e-06, "loss": 0.7261, "step": 7766 }, { "epoch": 0.79, "grad_norm": 1.7257852503802409, "learning_rate": 1.0810671128039919e-06, "loss": 0.6642, "step": 7767 }, { "epoch": 0.79, "grad_norm": 1.3791630661401082, "learning_rate": 1.0800403642371194e-06, "loss": 0.6941, "step": 7768 }, { "epoch": 0.79, "grad_norm": 1.4468775930261861, "learning_rate": 1.0790140444414e-06, "loss": 0.6885, "step": 7769 }, { "epoch": 0.79, "grad_norm": 1.397200307558499, "learning_rate": 1.077988153529096e-06, "loss": 0.5642, "step": 7770 }, { "epoch": 0.79, "grad_norm": 1.3437049826404306, "learning_rate": 1.0769626916124227e-06, "loss": 0.576, "step": 7771 }, { "epoch": 0.79, "grad_norm": 1.703015890908198, "learning_rate": 1.0759376588035448e-06, "loss": 0.5986, "step": 7772 }, { "epoch": 0.79, "grad_norm": 1.528398514807234, "learning_rate": 1.0749130552145809e-06, "loss": 0.6364, "step": 7773 }, { "epoch": 0.79, "grad_norm": 1.486837284101053, "learning_rate": 1.073888880957606e-06, "loss": 0.701, "step": 7774 }, { "epoch": 0.79, "grad_norm": 1.5348104385277181, "learning_rate": 1.0728651361446435e-06, "loss": 0.6466, "step": 7775 }, { "epoch": 0.79, "grad_norm": 1.3868803089688722, "learning_rate": 1.0718418208876746e-06, "loss": 0.6879, "step": 7776 }, { "epoch": 0.79, "grad_norm": 1.5371420569565875, "learning_rate": 1.0708189352986303e-06, "loss": 0.583, "step": 7777 }, { "epoch": 0.79, "grad_norm": 1.3366198840506285, "learning_rate": 1.069796479489394e-06, "loss": 0.6105, "step": 7778 }, { "epoch": 0.79, "grad_norm": 1.5270894002529622, "learning_rate": 1.0687744535718049e-06, "loss": 0.6624, "step": 7779 }, { "epoch": 0.79, "grad_norm": 1.7089712488817062, "learning_rate": 1.0677528576576524e-06, "loss": 0.6967, "step": 7780 }, { "epoch": 0.79, "grad_norm": 1.499460358006582, "learning_rate": 1.0667316918586828e-06, "loss": 0.6781, "step": 7781 }, { "epoch": 0.79, "grad_norm": 1.4694743778717068, "learning_rate": 1.0657109562865908e-06, "loss": 0.6263, "step": 7782 }, { "epoch": 0.79, "grad_norm": 1.4175694368061202, "learning_rate": 1.0646906510530247e-06, "loss": 0.7416, "step": 7783 }, { "epoch": 0.79, "grad_norm": 1.402879218152048, "learning_rate": 1.0636707762695892e-06, "loss": 0.6741, "step": 7784 }, { "epoch": 0.79, "grad_norm": 1.5255624739793714, "learning_rate": 1.0626513320478405e-06, "loss": 0.619, "step": 7785 }, { "epoch": 0.8, "grad_norm": 1.445312992399636, "learning_rate": 1.0616323184992855e-06, "loss": 0.616, "step": 7786 }, { "epoch": 0.8, "grad_norm": 1.479498767899308, "learning_rate": 1.060613735735384e-06, "loss": 0.7352, "step": 7787 }, { "epoch": 0.8, "grad_norm": 1.5722762425334824, "learning_rate": 1.0595955838675531e-06, "loss": 0.7028, "step": 7788 }, { "epoch": 0.8, "grad_norm": 1.5906642573235397, "learning_rate": 1.0585778630071574e-06, "loss": 0.5737, "step": 7789 }, { "epoch": 0.8, "grad_norm": 1.3779991738065849, "learning_rate": 1.0575605732655197e-06, "loss": 0.6399, "step": 7790 }, { "epoch": 0.8, "grad_norm": 1.4537932692060815, "learning_rate": 1.0565437147539103e-06, "loss": 0.6287, "step": 7791 }, { "epoch": 0.8, "grad_norm": 1.4267133225319226, "learning_rate": 1.0555272875835537e-06, "loss": 0.671, "step": 7792 }, { "epoch": 0.8, "grad_norm": 1.570728593480659, "learning_rate": 1.0545112918656315e-06, "loss": 0.6473, "step": 7793 }, { "epoch": 0.8, "grad_norm": 1.6273468691143254, "learning_rate": 1.0534957277112718e-06, "loss": 0.6865, "step": 7794 }, { "epoch": 0.8, "grad_norm": 1.6019468983157257, "learning_rate": 1.052480595231561e-06, "loss": 0.7683, "step": 7795 }, { "epoch": 0.8, "grad_norm": 1.939564871151703, "learning_rate": 1.051465894537535e-06, "loss": 0.6529, "step": 7796 }, { "epoch": 0.8, "grad_norm": 1.5733723424798538, "learning_rate": 1.0504516257401815e-06, "loss": 0.6745, "step": 7797 }, { "epoch": 0.8, "grad_norm": 1.5800056782619354, "learning_rate": 1.0494377889504448e-06, "loss": 0.6273, "step": 7798 }, { "epoch": 0.8, "grad_norm": 1.384481699452186, "learning_rate": 1.0484243842792203e-06, "loss": 0.5416, "step": 7799 }, { "epoch": 0.8, "grad_norm": 1.5228345480198886, "learning_rate": 1.047411411837355e-06, "loss": 0.5874, "step": 7800 }, { "epoch": 0.8, "grad_norm": 1.4369860463331883, "learning_rate": 1.0463988717356482e-06, "loss": 0.6766, "step": 7801 }, { "epoch": 0.8, "grad_norm": 1.4909089224632546, "learning_rate": 1.0453867640848554e-06, "loss": 0.6209, "step": 7802 }, { "epoch": 0.8, "grad_norm": 1.451544281018697, "learning_rate": 1.04437508899568e-06, "loss": 0.5414, "step": 7803 }, { "epoch": 0.8, "grad_norm": 1.6547379555205082, "learning_rate": 1.0433638465787827e-06, "loss": 0.6647, "step": 7804 }, { "epoch": 0.8, "grad_norm": 1.40397144015547, "learning_rate": 1.0423530369447738e-06, "loss": 0.5822, "step": 7805 }, { "epoch": 0.8, "grad_norm": 1.6208009379340265, "learning_rate": 1.0413426602042165e-06, "loss": 0.6488, "step": 7806 }, { "epoch": 0.8, "grad_norm": 1.6287164262490197, "learning_rate": 1.0403327164676286e-06, "loss": 0.6627, "step": 7807 }, { "epoch": 0.8, "grad_norm": 1.474035228969944, "learning_rate": 1.0393232058454777e-06, "loss": 0.5438, "step": 7808 }, { "epoch": 0.8, "grad_norm": 1.4512196891598326, "learning_rate": 1.0383141284481885e-06, "loss": 0.6195, "step": 7809 }, { "epoch": 0.8, "grad_norm": 1.603975661906316, "learning_rate": 1.037305484386133e-06, "loss": 0.6468, "step": 7810 }, { "epoch": 0.8, "grad_norm": 1.5401435898628006, "learning_rate": 1.0362972737696375e-06, "loss": 0.6411, "step": 7811 }, { "epoch": 0.8, "grad_norm": 1.6349295845022196, "learning_rate": 1.0352894967089832e-06, "loss": 0.7667, "step": 7812 }, { "epoch": 0.8, "grad_norm": 1.5674110533227885, "learning_rate": 1.0342821533144031e-06, "loss": 0.6716, "step": 7813 }, { "epoch": 0.8, "grad_norm": 1.3918096834123312, "learning_rate": 1.0332752436960809e-06, "loss": 0.6416, "step": 7814 }, { "epoch": 0.8, "grad_norm": 1.4522569317192466, "learning_rate": 1.0322687679641523e-06, "loss": 0.6883, "step": 7815 }, { "epoch": 0.8, "grad_norm": 1.485392444028183, "learning_rate": 1.0312627262287106e-06, "loss": 0.6542, "step": 7816 }, { "epoch": 0.8, "grad_norm": 1.2781758985928398, "learning_rate": 1.0302571185997944e-06, "loss": 0.5223, "step": 7817 }, { "epoch": 0.8, "grad_norm": 1.3825862963909, "learning_rate": 1.0292519451874023e-06, "loss": 0.592, "step": 7818 }, { "epoch": 0.8, "grad_norm": 1.5288105483600163, "learning_rate": 1.0282472061014797e-06, "loss": 0.6327, "step": 7819 }, { "epoch": 0.8, "grad_norm": 1.4112074424865229, "learning_rate": 1.0272429014519253e-06, "loss": 0.6827, "step": 7820 }, { "epoch": 0.8, "grad_norm": 1.577780415094555, "learning_rate": 1.0262390313485943e-06, "loss": 0.7082, "step": 7821 }, { "epoch": 0.8, "grad_norm": 1.633696312791135, "learning_rate": 1.0252355959012884e-06, "loss": 0.6932, "step": 7822 }, { "epoch": 0.8, "grad_norm": 1.4150433216292637, "learning_rate": 1.0242325952197678e-06, "loss": 0.5972, "step": 7823 }, { "epoch": 0.8, "grad_norm": 1.5975764383266413, "learning_rate": 1.0232300294137415e-06, "loss": 0.6668, "step": 7824 }, { "epoch": 0.8, "grad_norm": 1.5206748298230703, "learning_rate": 1.0222278985928702e-06, "loss": 0.5379, "step": 7825 }, { "epoch": 0.8, "grad_norm": 1.5509515809067533, "learning_rate": 1.0212262028667686e-06, "loss": 0.597, "step": 7826 }, { "epoch": 0.8, "grad_norm": 1.7010379636769608, "learning_rate": 1.0202249423450067e-06, "loss": 0.6701, "step": 7827 }, { "epoch": 0.8, "grad_norm": 1.5212237569349463, "learning_rate": 1.0192241171371014e-06, "loss": 0.6335, "step": 7828 }, { "epoch": 0.8, "grad_norm": 1.6137608198780105, "learning_rate": 1.0182237273525237e-06, "loss": 0.6681, "step": 7829 }, { "epoch": 0.8, "grad_norm": 1.4906630136607584, "learning_rate": 1.0172237731007e-06, "loss": 0.6834, "step": 7830 }, { "epoch": 0.8, "grad_norm": 1.384484677545781, "learning_rate": 1.0162242544910045e-06, "loss": 0.5961, "step": 7831 }, { "epoch": 0.8, "grad_norm": 1.3906616164694587, "learning_rate": 1.0152251716327687e-06, "loss": 0.7078, "step": 7832 }, { "epoch": 0.8, "grad_norm": 1.2979471823899533, "learning_rate": 1.0142265246352728e-06, "loss": 0.6742, "step": 7833 }, { "epoch": 0.8, "grad_norm": 1.3030971080437068, "learning_rate": 1.013228313607748e-06, "loss": 0.5198, "step": 7834 }, { "epoch": 0.8, "grad_norm": 1.4496771716743333, "learning_rate": 1.0122305386593839e-06, "loss": 0.5538, "step": 7835 }, { "epoch": 0.8, "grad_norm": 1.5514225507802175, "learning_rate": 1.0112331998993158e-06, "loss": 0.7398, "step": 7836 }, { "epoch": 0.8, "grad_norm": 1.3425030594539449, "learning_rate": 1.0102362974366359e-06, "loss": 0.4949, "step": 7837 }, { "epoch": 0.8, "grad_norm": 1.4785671937573897, "learning_rate": 1.0092398313803864e-06, "loss": 0.6529, "step": 7838 }, { "epoch": 0.8, "grad_norm": 1.6558185591650547, "learning_rate": 1.0082438018395607e-06, "loss": 0.6387, "step": 7839 }, { "epoch": 0.8, "grad_norm": 1.514521182445367, "learning_rate": 1.0072482089231078e-06, "loss": 0.6623, "step": 7840 }, { "epoch": 0.8, "grad_norm": 1.5033463327656216, "learning_rate": 1.0062530527399277e-06, "loss": 0.6659, "step": 7841 }, { "epoch": 0.8, "grad_norm": 1.5115669935722376, "learning_rate": 1.0052583333988713e-06, "loss": 0.6841, "step": 7842 }, { "epoch": 0.8, "grad_norm": 1.5943221252265727, "learning_rate": 1.004264051008741e-06, "loss": 0.7252, "step": 7843 }, { "epoch": 0.8, "grad_norm": 1.579731302380841, "learning_rate": 1.0032702056782955e-06, "loss": 0.6053, "step": 7844 }, { "epoch": 0.8, "grad_norm": 1.7813215488252103, "learning_rate": 1.0022767975162407e-06, "loss": 0.5938, "step": 7845 }, { "epoch": 0.8, "grad_norm": 1.7306493103115979, "learning_rate": 1.0012838266312397e-06, "loss": 0.7428, "step": 7846 }, { "epoch": 0.8, "grad_norm": 1.658018178591535, "learning_rate": 1.0002912931319036e-06, "loss": 0.6931, "step": 7847 }, { "epoch": 0.8, "grad_norm": 1.3494946747046193, "learning_rate": 9.992991971267962e-07, "loss": 0.6404, "step": 7848 }, { "epoch": 0.8, "grad_norm": 1.5578651468251987, "learning_rate": 9.983075387244369e-07, "loss": 0.6697, "step": 7849 }, { "epoch": 0.8, "grad_norm": 1.5188894867740566, "learning_rate": 9.973163180332923e-07, "loss": 0.6515, "step": 7850 }, { "epoch": 0.8, "grad_norm": 1.7110906912969568, "learning_rate": 9.963255351617861e-07, "loss": 0.7115, "step": 7851 }, { "epoch": 0.8, "grad_norm": 1.5414324499722967, "learning_rate": 9.953351902182912e-07, "loss": 0.6964, "step": 7852 }, { "epoch": 0.8, "grad_norm": 1.4589362019999474, "learning_rate": 9.943452833111307e-07, "loss": 0.606, "step": 7853 }, { "epoch": 0.8, "grad_norm": 1.535566494348708, "learning_rate": 9.933558145485833e-07, "loss": 0.5823, "step": 7854 }, { "epoch": 0.8, "grad_norm": 1.7976309840667164, "learning_rate": 9.9236678403888e-07, "loss": 0.7372, "step": 7855 }, { "epoch": 0.8, "grad_norm": 1.623719330419492, "learning_rate": 9.913781918902021e-07, "loss": 0.617, "step": 7856 }, { "epoch": 0.8, "grad_norm": 1.4988712286609593, "learning_rate": 9.903900382106812e-07, "loss": 0.6212, "step": 7857 }, { "epoch": 0.8, "grad_norm": 1.5696475471251994, "learning_rate": 9.89402323108406e-07, "loss": 0.6236, "step": 7858 }, { "epoch": 0.8, "grad_norm": 1.4785452515813833, "learning_rate": 9.884150466914117e-07, "loss": 0.5993, "step": 7859 }, { "epoch": 0.8, "grad_norm": 1.629162459165618, "learning_rate": 9.8742820906769e-07, "loss": 0.6971, "step": 7860 }, { "epoch": 0.8, "grad_norm": 1.6768636505148076, "learning_rate": 9.86441810345183e-07, "loss": 0.6473, "step": 7861 }, { "epoch": 0.8, "grad_norm": 1.5717252432924864, "learning_rate": 9.854558506317808e-07, "loss": 0.6701, "step": 7862 }, { "epoch": 0.8, "grad_norm": 1.5807575315515583, "learning_rate": 9.844703300353336e-07, "loss": 0.6725, "step": 7863 }, { "epoch": 0.8, "grad_norm": 1.529030545478011, "learning_rate": 9.834852486636364e-07, "loss": 0.5264, "step": 7864 }, { "epoch": 0.8, "grad_norm": 1.597679866645455, "learning_rate": 9.825006066244407e-07, "loss": 0.6732, "step": 7865 }, { "epoch": 0.8, "grad_norm": 1.2283344800175195, "learning_rate": 9.815164040254477e-07, "loss": 0.4988, "step": 7866 }, { "epoch": 0.8, "grad_norm": 1.5229405173892352, "learning_rate": 9.805326409743088e-07, "loss": 0.675, "step": 7867 }, { "epoch": 0.8, "grad_norm": 1.7760791995382863, "learning_rate": 9.795493175786319e-07, "loss": 0.6316, "step": 7868 }, { "epoch": 0.8, "grad_norm": 1.4384483281292813, "learning_rate": 9.785664339459744e-07, "loss": 0.6795, "step": 7869 }, { "epoch": 0.8, "grad_norm": 1.677670229930112, "learning_rate": 9.775839901838452e-07, "loss": 0.6579, "step": 7870 }, { "epoch": 0.8, "grad_norm": 1.624311635521605, "learning_rate": 9.766019863997046e-07, "loss": 0.6926, "step": 7871 }, { "epoch": 0.8, "grad_norm": 1.528162933832132, "learning_rate": 9.75620422700967e-07, "loss": 0.6409, "step": 7872 }, { "epoch": 0.8, "grad_norm": 1.2753161209620847, "learning_rate": 9.746392991949953e-07, "loss": 0.5224, "step": 7873 }, { "epoch": 0.8, "grad_norm": 1.5826919762549319, "learning_rate": 9.736586159891092e-07, "loss": 0.6874, "step": 7874 }, { "epoch": 0.8, "grad_norm": 1.556234672686325, "learning_rate": 9.726783731905759e-07, "loss": 0.5974, "step": 7875 }, { "epoch": 0.8, "grad_norm": 1.6466663685715066, "learning_rate": 9.716985709066146e-07, "loss": 0.6354, "step": 7876 }, { "epoch": 0.8, "grad_norm": 1.4972562961586657, "learning_rate": 9.707192092443995e-07, "loss": 0.7167, "step": 7877 }, { "epoch": 0.8, "grad_norm": 1.4189183248780974, "learning_rate": 9.69740288311053e-07, "loss": 0.6023, "step": 7878 }, { "epoch": 0.8, "grad_norm": 1.4042473190417963, "learning_rate": 9.68761808213653e-07, "loss": 0.6777, "step": 7879 }, { "epoch": 0.8, "grad_norm": 1.5446003874396128, "learning_rate": 9.677837690592262e-07, "loss": 0.6461, "step": 7880 }, { "epoch": 0.8, "grad_norm": 1.6530244512362482, "learning_rate": 9.668061709547505e-07, "loss": 0.7192, "step": 7881 }, { "epoch": 0.8, "grad_norm": 1.477693739935862, "learning_rate": 9.65829014007158e-07, "loss": 0.575, "step": 7882 }, { "epoch": 0.8, "grad_norm": 1.6122255893328337, "learning_rate": 9.648522983233333e-07, "loss": 0.6969, "step": 7883 }, { "epoch": 0.81, "grad_norm": 1.5469436944975838, "learning_rate": 9.638760240101102e-07, "loss": 0.6285, "step": 7884 }, { "epoch": 0.81, "grad_norm": 1.4345382875298855, "learning_rate": 9.62900191174273e-07, "loss": 0.6147, "step": 7885 }, { "epoch": 0.81, "grad_norm": 1.5141248305116943, "learning_rate": 9.619247999225624e-07, "loss": 0.5854, "step": 7886 }, { "epoch": 0.81, "grad_norm": 1.2711894631873286, "learning_rate": 9.609498503616665e-07, "loss": 0.5344, "step": 7887 }, { "epoch": 0.81, "grad_norm": 1.3520031073901275, "learning_rate": 9.599753425982283e-07, "loss": 0.5277, "step": 7888 }, { "epoch": 0.81, "grad_norm": 1.4189124959149542, "learning_rate": 9.590012767388402e-07, "loss": 0.6095, "step": 7889 }, { "epoch": 0.81, "grad_norm": 1.4725681717814387, "learning_rate": 9.580276528900456e-07, "loss": 0.5041, "step": 7890 }, { "epoch": 0.81, "grad_norm": 1.5312552667958341, "learning_rate": 9.570544711583435e-07, "loss": 0.673, "step": 7891 }, { "epoch": 0.81, "grad_norm": 1.7956624553986138, "learning_rate": 9.56081731650179e-07, "loss": 0.6179, "step": 7892 }, { "epoch": 0.81, "grad_norm": 1.4576794228190177, "learning_rate": 9.551094344719558e-07, "loss": 0.6151, "step": 7893 }, { "epoch": 0.81, "grad_norm": 1.4996152440756425, "learning_rate": 9.541375797300223e-07, "loss": 0.6685, "step": 7894 }, { "epoch": 0.81, "grad_norm": 1.616218274724954, "learning_rate": 9.531661675306814e-07, "loss": 0.5938, "step": 7895 }, { "epoch": 0.81, "grad_norm": 1.4941363579072715, "learning_rate": 9.52195197980188e-07, "loss": 0.6062, "step": 7896 }, { "epoch": 0.81, "grad_norm": 1.2748920588461574, "learning_rate": 9.512246711847501e-07, "loss": 0.6525, "step": 7897 }, { "epoch": 0.81, "grad_norm": 1.5555631589222292, "learning_rate": 9.502545872505242e-07, "loss": 0.6015, "step": 7898 }, { "epoch": 0.81, "grad_norm": 1.4901731797417825, "learning_rate": 9.492849462836178e-07, "loss": 0.6441, "step": 7899 }, { "epoch": 0.81, "grad_norm": 1.7750556842261502, "learning_rate": 9.483157483900945e-07, "loss": 0.7334, "step": 7900 }, { "epoch": 0.81, "grad_norm": 1.9371841345473275, "learning_rate": 9.473469936759639e-07, "loss": 0.7189, "step": 7901 }, { "epoch": 0.81, "grad_norm": 1.5288990710119563, "learning_rate": 9.463786822471926e-07, "loss": 0.594, "step": 7902 }, { "epoch": 0.81, "grad_norm": 1.472349448000494, "learning_rate": 9.454108142096951e-07, "loss": 0.7974, "step": 7903 }, { "epoch": 0.81, "grad_norm": 1.429600876162708, "learning_rate": 9.444433896693361e-07, "loss": 0.5812, "step": 7904 }, { "epoch": 0.81, "grad_norm": 1.5780609937993906, "learning_rate": 9.434764087319376e-07, "loss": 0.5659, "step": 7905 }, { "epoch": 0.81, "grad_norm": 1.6100292608545987, "learning_rate": 9.42509871503266e-07, "loss": 0.6704, "step": 7906 }, { "epoch": 0.81, "grad_norm": 1.451834719963165, "learning_rate": 9.415437780890452e-07, "loss": 0.6299, "step": 7907 }, { "epoch": 0.81, "grad_norm": 1.4059243145726028, "learning_rate": 9.405781285949473e-07, "loss": 0.6028, "step": 7908 }, { "epoch": 0.81, "grad_norm": 1.5314771671077323, "learning_rate": 9.396129231265949e-07, "loss": 0.6273, "step": 7909 }, { "epoch": 0.81, "grad_norm": 1.430745352921108, "learning_rate": 9.386481617895648e-07, "loss": 0.6151, "step": 7910 }, { "epoch": 0.81, "grad_norm": 1.5309627608450624, "learning_rate": 9.37683844689386e-07, "loss": 0.647, "step": 7911 }, { "epoch": 0.81, "grad_norm": 1.376860997061823, "learning_rate": 9.367199719315345e-07, "loss": 0.6436, "step": 7912 }, { "epoch": 0.81, "grad_norm": 2.0561248225146134, "learning_rate": 9.357565436214399e-07, "loss": 0.6059, "step": 7913 }, { "epoch": 0.81, "grad_norm": 1.3376502902617067, "learning_rate": 9.347935598644858e-07, "loss": 0.5631, "step": 7914 }, { "epoch": 0.81, "grad_norm": 1.4878383279240166, "learning_rate": 9.338310207660018e-07, "loss": 0.5907, "step": 7915 }, { "epoch": 0.81, "grad_norm": 1.4490597775186485, "learning_rate": 9.328689264312751e-07, "loss": 0.6447, "step": 7916 }, { "epoch": 0.81, "grad_norm": 1.477172796414305, "learning_rate": 9.319072769655391e-07, "loss": 0.6371, "step": 7917 }, { "epoch": 0.81, "grad_norm": 1.5419675702581945, "learning_rate": 9.309460724739799e-07, "loss": 0.5802, "step": 7918 }, { "epoch": 0.81, "grad_norm": 1.601880143373059, "learning_rate": 9.299853130617376e-07, "loss": 0.6577, "step": 7919 }, { "epoch": 0.81, "grad_norm": 1.5770822836006217, "learning_rate": 9.290249988338989e-07, "loss": 0.655, "step": 7920 }, { "epoch": 0.81, "grad_norm": 1.7133202387217557, "learning_rate": 9.280651298955074e-07, "loss": 0.6856, "step": 7921 }, { "epoch": 0.81, "grad_norm": 1.5149127391670223, "learning_rate": 9.271057063515538e-07, "loss": 0.6104, "step": 7922 }, { "epoch": 0.81, "grad_norm": 1.636566624841712, "learning_rate": 9.261467283069797e-07, "loss": 0.6542, "step": 7923 }, { "epoch": 0.81, "grad_norm": 1.4925657759918265, "learning_rate": 9.251881958666803e-07, "loss": 0.6086, "step": 7924 }, { "epoch": 0.81, "grad_norm": 1.5035293437729136, "learning_rate": 9.242301091355038e-07, "loss": 0.7307, "step": 7925 }, { "epoch": 0.81, "grad_norm": 1.448087188255731, "learning_rate": 9.232724682182453e-07, "loss": 0.6376, "step": 7926 }, { "epoch": 0.81, "grad_norm": 1.7042133622264872, "learning_rate": 9.223152732196522e-07, "loss": 0.7268, "step": 7927 }, { "epoch": 0.81, "grad_norm": 1.602449484762208, "learning_rate": 9.213585242444256e-07, "loss": 0.7103, "step": 7928 }, { "epoch": 0.81, "grad_norm": 1.6465360258065083, "learning_rate": 9.204022213972141e-07, "loss": 0.6206, "step": 7929 }, { "epoch": 0.81, "grad_norm": 1.499779418775624, "learning_rate": 9.194463647826224e-07, "loss": 0.6682, "step": 7930 }, { "epoch": 0.81, "grad_norm": 1.4268170978164743, "learning_rate": 9.184909545052018e-07, "loss": 0.6429, "step": 7931 }, { "epoch": 0.81, "grad_norm": 2.1142475655390105, "learning_rate": 9.175359906694554e-07, "loss": 0.5957, "step": 7932 }, { "epoch": 0.81, "grad_norm": 1.5938253218827523, "learning_rate": 9.165814733798412e-07, "loss": 0.7005, "step": 7933 }, { "epoch": 0.81, "grad_norm": 1.5412845136605189, "learning_rate": 9.15627402740763e-07, "loss": 0.6332, "step": 7934 }, { "epoch": 0.81, "grad_norm": 1.441425173398181, "learning_rate": 9.146737788565818e-07, "loss": 0.6407, "step": 7935 }, { "epoch": 0.81, "grad_norm": 1.593338020699402, "learning_rate": 9.137206018316042e-07, "loss": 0.7221, "step": 7936 }, { "epoch": 0.81, "grad_norm": 1.489092398831421, "learning_rate": 9.127678717700894e-07, "loss": 0.6128, "step": 7937 }, { "epoch": 0.81, "grad_norm": 1.392791469228403, "learning_rate": 9.118155887762498e-07, "loss": 0.6566, "step": 7938 }, { "epoch": 0.81, "grad_norm": 2.1485092897333677, "learning_rate": 9.108637529542485e-07, "loss": 0.6399, "step": 7939 }, { "epoch": 0.81, "grad_norm": 1.3464427348892096, "learning_rate": 9.099123644081981e-07, "loss": 0.6965, "step": 7940 }, { "epoch": 0.81, "grad_norm": 1.3626697400828405, "learning_rate": 9.089614232421606e-07, "loss": 0.6614, "step": 7941 }, { "epoch": 0.81, "grad_norm": 1.428159378286396, "learning_rate": 9.080109295601547e-07, "loss": 0.5986, "step": 7942 }, { "epoch": 0.81, "grad_norm": 1.6568570679344266, "learning_rate": 9.070608834661443e-07, "loss": 0.6301, "step": 7943 }, { "epoch": 0.81, "grad_norm": 1.5640468259308593, "learning_rate": 9.061112850640491e-07, "loss": 0.5797, "step": 7944 }, { "epoch": 0.81, "grad_norm": 1.552490438604101, "learning_rate": 9.051621344577371e-07, "loss": 0.5775, "step": 7945 }, { "epoch": 0.81, "grad_norm": 1.454031216343597, "learning_rate": 9.042134317510253e-07, "loss": 0.796, "step": 7946 }, { "epoch": 0.81, "grad_norm": 1.4532056300774767, "learning_rate": 9.03265177047688e-07, "loss": 0.6545, "step": 7947 }, { "epoch": 0.81, "grad_norm": 1.4005812138208331, "learning_rate": 9.023173704514432e-07, "loss": 0.6573, "step": 7948 }, { "epoch": 0.81, "grad_norm": 1.5723406542824148, "learning_rate": 9.013700120659668e-07, "loss": 0.6012, "step": 7949 }, { "epoch": 0.81, "grad_norm": 1.4998635616761047, "learning_rate": 9.004231019948806e-07, "loss": 0.5536, "step": 7950 }, { "epoch": 0.81, "grad_norm": 1.6461044069152644, "learning_rate": 8.994766403417577e-07, "loss": 0.7382, "step": 7951 }, { "epoch": 0.81, "grad_norm": 1.7392143443355874, "learning_rate": 8.985306272101251e-07, "loss": 0.575, "step": 7952 }, { "epoch": 0.81, "grad_norm": 1.435300440139219, "learning_rate": 8.975850627034605e-07, "loss": 0.5967, "step": 7953 }, { "epoch": 0.81, "grad_norm": 1.3811790294315567, "learning_rate": 8.966399469251896e-07, "loss": 0.5929, "step": 7954 }, { "epoch": 0.81, "grad_norm": 1.386969969724558, "learning_rate": 8.956952799786905e-07, "loss": 0.6673, "step": 7955 }, { "epoch": 0.81, "grad_norm": 1.4616642269990556, "learning_rate": 8.947510619672916e-07, "loss": 0.698, "step": 7956 }, { "epoch": 0.81, "grad_norm": 1.463220220772884, "learning_rate": 8.938072929942737e-07, "loss": 0.5557, "step": 7957 }, { "epoch": 0.81, "grad_norm": 1.674681919127804, "learning_rate": 8.928639731628686e-07, "loss": 0.7406, "step": 7958 }, { "epoch": 0.81, "grad_norm": 1.5395045733812662, "learning_rate": 8.919211025762581e-07, "loss": 0.6771, "step": 7959 }, { "epoch": 0.81, "grad_norm": 1.3394901715772725, "learning_rate": 8.90978681337572e-07, "loss": 0.6009, "step": 7960 }, { "epoch": 0.81, "grad_norm": 1.3065306666466223, "learning_rate": 8.900367095498974e-07, "loss": 0.487, "step": 7961 }, { "epoch": 0.81, "grad_norm": 1.4942195105804736, "learning_rate": 8.890951873162657e-07, "loss": 0.6295, "step": 7962 }, { "epoch": 0.81, "grad_norm": 1.4540201483948507, "learning_rate": 8.88154114739665e-07, "loss": 0.7399, "step": 7963 }, { "epoch": 0.81, "grad_norm": 1.5057758741753726, "learning_rate": 8.87213491923029e-07, "loss": 0.6901, "step": 7964 }, { "epoch": 0.81, "grad_norm": 1.8248597131114799, "learning_rate": 8.862733189692441e-07, "loss": 0.7585, "step": 7965 }, { "epoch": 0.81, "grad_norm": 1.2637141312534452, "learning_rate": 8.853335959811482e-07, "loss": 0.5409, "step": 7966 }, { "epoch": 0.81, "grad_norm": 1.2792534958721997, "learning_rate": 8.843943230615321e-07, "loss": 0.517, "step": 7967 }, { "epoch": 0.81, "grad_norm": 1.3422176595657742, "learning_rate": 8.834555003131329e-07, "loss": 0.5274, "step": 7968 }, { "epoch": 0.81, "grad_norm": 1.6137890513980886, "learning_rate": 8.825171278386402e-07, "loss": 0.7594, "step": 7969 }, { "epoch": 0.81, "grad_norm": 1.5691370074367896, "learning_rate": 8.815792057406941e-07, "loss": 0.6593, "step": 7970 }, { "epoch": 0.81, "grad_norm": 1.4608322796442696, "learning_rate": 8.806417341218864e-07, "loss": 0.5956, "step": 7971 }, { "epoch": 0.81, "grad_norm": 1.3617813506217256, "learning_rate": 8.797047130847614e-07, "loss": 0.6378, "step": 7972 }, { "epoch": 0.81, "grad_norm": 1.3320950120175612, "learning_rate": 8.787681427318096e-07, "loss": 0.5546, "step": 7973 }, { "epoch": 0.81, "grad_norm": 1.4971986290240518, "learning_rate": 8.778320231654741e-07, "loss": 0.6156, "step": 7974 }, { "epoch": 0.81, "grad_norm": 1.6693762916568402, "learning_rate": 8.768963544881509e-07, "loss": 0.6335, "step": 7975 }, { "epoch": 0.81, "grad_norm": 1.5006396516788172, "learning_rate": 8.759611368021831e-07, "loss": 0.6771, "step": 7976 }, { "epoch": 0.81, "grad_norm": 1.4718547296842532, "learning_rate": 8.750263702098682e-07, "loss": 0.6537, "step": 7977 }, { "epoch": 0.81, "grad_norm": 1.421599298202169, "learning_rate": 8.740920548134513e-07, "loss": 0.7708, "step": 7978 }, { "epoch": 0.81, "grad_norm": 1.545565702307587, "learning_rate": 8.731581907151276e-07, "loss": 0.5914, "step": 7979 }, { "epoch": 0.81, "grad_norm": 1.6008281842718055, "learning_rate": 8.722247780170462e-07, "loss": 0.6955, "step": 7980 }, { "epoch": 0.81, "grad_norm": 1.6665866532693827, "learning_rate": 8.71291816821307e-07, "loss": 0.6882, "step": 7981 }, { "epoch": 0.82, "grad_norm": 1.4764629946270174, "learning_rate": 8.703593072299559e-07, "loss": 0.6532, "step": 7982 }, { "epoch": 0.82, "grad_norm": 1.3653704844433232, "learning_rate": 8.69427249344994e-07, "loss": 0.5231, "step": 7983 }, { "epoch": 0.82, "grad_norm": 1.6386707905313294, "learning_rate": 8.684956432683688e-07, "loss": 0.6746, "step": 7984 }, { "epoch": 0.82, "grad_norm": 1.5156910231416851, "learning_rate": 8.67564489101983e-07, "loss": 0.5761, "step": 7985 }, { "epoch": 0.82, "grad_norm": 1.57993437801197, "learning_rate": 8.666337869476876e-07, "loss": 0.6741, "step": 7986 }, { "epoch": 0.82, "grad_norm": 1.462092034218477, "learning_rate": 8.657035369072841e-07, "loss": 0.6548, "step": 7987 }, { "epoch": 0.82, "grad_norm": 1.6898691731569861, "learning_rate": 8.647737390825228e-07, "loss": 0.5676, "step": 7988 }, { "epoch": 0.82, "grad_norm": 1.4879288155437054, "learning_rate": 8.638443935751095e-07, "loss": 0.5634, "step": 7989 }, { "epoch": 0.82, "grad_norm": 1.5994009303271102, "learning_rate": 8.62915500486694e-07, "loss": 0.5942, "step": 7990 }, { "epoch": 0.82, "grad_norm": 1.687676772109495, "learning_rate": 8.619870599188834e-07, "loss": 0.6277, "step": 7991 }, { "epoch": 0.82, "grad_norm": 1.4146781038851992, "learning_rate": 8.610590719732303e-07, "loss": 0.638, "step": 7992 }, { "epoch": 0.82, "grad_norm": 1.5073591466761327, "learning_rate": 8.601315367512386e-07, "loss": 0.7063, "step": 7993 }, { "epoch": 0.82, "grad_norm": 1.501110036636014, "learning_rate": 8.592044543543643e-07, "loss": 0.6648, "step": 7994 }, { "epoch": 0.82, "grad_norm": 1.6437738708157212, "learning_rate": 8.58277824884014e-07, "loss": 0.5777, "step": 7995 }, { "epoch": 0.82, "grad_norm": 1.5484612542800527, "learning_rate": 8.573516484415434e-07, "loss": 0.6171, "step": 7996 }, { "epoch": 0.82, "grad_norm": 1.4856401697100352, "learning_rate": 8.564259251282587e-07, "loss": 0.645, "step": 7997 }, { "epoch": 0.82, "grad_norm": 1.4672724652844422, "learning_rate": 8.555006550454159e-07, "loss": 0.5902, "step": 7998 }, { "epoch": 0.82, "grad_norm": 1.4043438625238562, "learning_rate": 8.545758382942232e-07, "loss": 0.7292, "step": 7999 }, { "epoch": 0.82, "grad_norm": 1.5173400198900495, "learning_rate": 8.536514749758401e-07, "loss": 0.6502, "step": 8000 }, { "epoch": 0.82, "grad_norm": 1.2974548642444832, "learning_rate": 8.527275651913736e-07, "loss": 0.582, "step": 8001 }, { "epoch": 0.82, "grad_norm": 1.9397260923857205, "learning_rate": 8.51804109041881e-07, "loss": 0.7065, "step": 8002 }, { "epoch": 0.82, "grad_norm": 2.2442182793990737, "learning_rate": 8.50881106628374e-07, "loss": 0.6812, "step": 8003 }, { "epoch": 0.82, "grad_norm": 1.4300950762114009, "learning_rate": 8.499585580518093e-07, "loss": 0.6387, "step": 8004 }, { "epoch": 0.82, "grad_norm": 1.7721202788278403, "learning_rate": 8.490364634130998e-07, "loss": 0.6546, "step": 8005 }, { "epoch": 0.82, "grad_norm": 1.4957556711648297, "learning_rate": 8.481148228131031e-07, "loss": 0.6779, "step": 8006 }, { "epoch": 0.82, "grad_norm": 1.4001660285006488, "learning_rate": 8.471936363526296e-07, "loss": 0.5853, "step": 8007 }, { "epoch": 0.82, "grad_norm": 1.6699209240583501, "learning_rate": 8.462729041324408e-07, "loss": 0.647, "step": 8008 }, { "epoch": 0.82, "grad_norm": 1.4554301728532066, "learning_rate": 8.453526262532491e-07, "loss": 0.5889, "step": 8009 }, { "epoch": 0.82, "grad_norm": 1.3842488246489741, "learning_rate": 8.444328028157145e-07, "loss": 0.5706, "step": 8010 }, { "epoch": 0.82, "grad_norm": 1.4664843725983665, "learning_rate": 8.435134339204493e-07, "loss": 0.6214, "step": 8011 }, { "epoch": 0.82, "grad_norm": 1.4800804350270045, "learning_rate": 8.425945196680135e-07, "loss": 0.6763, "step": 8012 }, { "epoch": 0.82, "grad_norm": 1.5492769626106522, "learning_rate": 8.416760601589207e-07, "loss": 0.6568, "step": 8013 }, { "epoch": 0.82, "grad_norm": 1.419639749499582, "learning_rate": 8.407580554936351e-07, "loss": 0.6288, "step": 8014 }, { "epoch": 0.82, "grad_norm": 1.9000488423146364, "learning_rate": 8.398405057725678e-07, "loss": 0.665, "step": 8015 }, { "epoch": 0.82, "grad_norm": 1.5508792464036814, "learning_rate": 8.389234110960804e-07, "loss": 0.6344, "step": 8016 }, { "epoch": 0.82, "grad_norm": 1.3669169893793087, "learning_rate": 8.380067715644891e-07, "loss": 0.5892, "step": 8017 }, { "epoch": 0.82, "grad_norm": 1.4167089039588414, "learning_rate": 8.370905872780544e-07, "loss": 0.6455, "step": 8018 }, { "epoch": 0.82, "grad_norm": 1.6223963636879783, "learning_rate": 8.361748583369928e-07, "loss": 0.6498, "step": 8019 }, { "epoch": 0.82, "grad_norm": 1.7600335742248145, "learning_rate": 8.352595848414669e-07, "loss": 0.6842, "step": 8020 }, { "epoch": 0.82, "grad_norm": 1.6328406958980437, "learning_rate": 8.343447668915889e-07, "loss": 0.6499, "step": 8021 }, { "epoch": 0.82, "grad_norm": 1.5126976090125734, "learning_rate": 8.334304045874248e-07, "loss": 0.642, "step": 8022 }, { "epoch": 0.82, "grad_norm": 1.3992985527612747, "learning_rate": 8.325164980289896e-07, "loss": 0.6853, "step": 8023 }, { "epoch": 0.82, "grad_norm": 1.452397291160402, "learning_rate": 8.316030473162473e-07, "loss": 0.6357, "step": 8024 }, { "epoch": 0.82, "grad_norm": 1.4692591112648534, "learning_rate": 8.306900525491124e-07, "loss": 0.616, "step": 8025 }, { "epoch": 0.82, "grad_norm": 1.505774245048501, "learning_rate": 8.297775138274478e-07, "loss": 0.6741, "step": 8026 }, { "epoch": 0.82, "grad_norm": 1.4110471070685737, "learning_rate": 8.288654312510697e-07, "loss": 0.608, "step": 8027 }, { "epoch": 0.82, "grad_norm": 1.7260490122569971, "learning_rate": 8.27953804919745e-07, "loss": 0.6365, "step": 8028 }, { "epoch": 0.82, "grad_norm": 1.369415229293305, "learning_rate": 8.270426349331873e-07, "loss": 0.6359, "step": 8029 }, { "epoch": 0.82, "grad_norm": 1.35243602298469, "learning_rate": 8.261319213910602e-07, "loss": 0.5308, "step": 8030 }, { "epoch": 0.82, "grad_norm": 1.7288929643785511, "learning_rate": 8.252216643929811e-07, "loss": 0.6651, "step": 8031 }, { "epoch": 0.82, "grad_norm": 1.5080175879453535, "learning_rate": 8.243118640385139e-07, "loss": 0.6437, "step": 8032 }, { "epoch": 0.82, "grad_norm": 1.6069684278935998, "learning_rate": 8.234025204271756e-07, "loss": 0.6112, "step": 8033 }, { "epoch": 0.82, "grad_norm": 1.507720450565099, "learning_rate": 8.224936336584305e-07, "loss": 0.6305, "step": 8034 }, { "epoch": 0.82, "grad_norm": 1.51438201322628, "learning_rate": 8.21585203831693e-07, "loss": 0.6252, "step": 8035 }, { "epoch": 0.82, "grad_norm": 1.6923884445654873, "learning_rate": 8.206772310463296e-07, "loss": 0.6165, "step": 8036 }, { "epoch": 0.82, "grad_norm": 1.4432228630722486, "learning_rate": 8.197697154016571e-07, "loss": 0.6131, "step": 8037 }, { "epoch": 0.82, "grad_norm": 1.441074903808991, "learning_rate": 8.188626569969393e-07, "loss": 0.6354, "step": 8038 }, { "epoch": 0.82, "grad_norm": 1.7466310476041516, "learning_rate": 8.17956055931392e-07, "loss": 0.6582, "step": 8039 }, { "epoch": 0.82, "grad_norm": 1.4286183970819513, "learning_rate": 8.170499123041798e-07, "loss": 0.6481, "step": 8040 }, { "epoch": 0.82, "grad_norm": 1.4375949162635002, "learning_rate": 8.161442262144182e-07, "loss": 0.5674, "step": 8041 }, { "epoch": 0.82, "grad_norm": 1.5320875405790289, "learning_rate": 8.152389977611747e-07, "loss": 0.6259, "step": 8042 }, { "epoch": 0.82, "grad_norm": 1.5566705327693033, "learning_rate": 8.143342270434629e-07, "loss": 0.5606, "step": 8043 }, { "epoch": 0.82, "grad_norm": 1.3223647476713, "learning_rate": 8.13429914160247e-07, "loss": 0.5605, "step": 8044 }, { "epoch": 0.82, "grad_norm": 1.5343202801389282, "learning_rate": 8.125260592104445e-07, "loss": 0.5979, "step": 8045 }, { "epoch": 0.82, "grad_norm": 1.4532397061735824, "learning_rate": 8.11622662292918e-07, "loss": 0.7003, "step": 8046 }, { "epoch": 0.82, "grad_norm": 1.7611223265159965, "learning_rate": 8.107197235064845e-07, "loss": 0.6588, "step": 8047 }, { "epoch": 0.82, "grad_norm": 1.767867833382875, "learning_rate": 8.098172429499079e-07, "loss": 0.6925, "step": 8048 }, { "epoch": 0.82, "grad_norm": 1.4446007224220134, "learning_rate": 8.08915220721902e-07, "loss": 0.703, "step": 8049 }, { "epoch": 0.82, "grad_norm": 1.4795642989586686, "learning_rate": 8.080136569211323e-07, "loss": 0.6208, "step": 8050 }, { "epoch": 0.82, "grad_norm": 1.5293908639944327, "learning_rate": 8.071125516462141e-07, "loss": 0.7033, "step": 8051 }, { "epoch": 0.82, "grad_norm": 1.5880351330102662, "learning_rate": 8.062119049957106e-07, "loss": 0.6012, "step": 8052 }, { "epoch": 0.82, "grad_norm": 1.5428078478042797, "learning_rate": 8.053117170681368e-07, "loss": 0.5997, "step": 8053 }, { "epoch": 0.82, "grad_norm": 1.505990470906476, "learning_rate": 8.044119879619544e-07, "loss": 0.6479, "step": 8054 }, { "epoch": 0.82, "grad_norm": 1.454509753080357, "learning_rate": 8.035127177755781e-07, "loss": 0.6775, "step": 8055 }, { "epoch": 0.82, "grad_norm": 1.7944556498542712, "learning_rate": 8.026139066073741e-07, "loss": 0.5522, "step": 8056 }, { "epoch": 0.82, "grad_norm": 1.494725642696435, "learning_rate": 8.017155545556526e-07, "loss": 0.694, "step": 8057 }, { "epoch": 0.82, "grad_norm": 1.430538552541751, "learning_rate": 8.008176617186775e-07, "loss": 0.6622, "step": 8058 }, { "epoch": 0.82, "grad_norm": 1.4027249395156867, "learning_rate": 7.999202281946622e-07, "loss": 0.6701, "step": 8059 }, { "epoch": 0.82, "grad_norm": 1.4739905688294521, "learning_rate": 7.990232540817683e-07, "loss": 0.6561, "step": 8060 }, { "epoch": 0.82, "grad_norm": 2.160855055777263, "learning_rate": 7.98126739478109e-07, "loss": 0.6053, "step": 8061 }, { "epoch": 0.82, "grad_norm": 1.5304738581841006, "learning_rate": 7.97230684481749e-07, "loss": 0.6538, "step": 8062 }, { "epoch": 0.82, "grad_norm": 1.6133332358103203, "learning_rate": 7.963350891906946e-07, "loss": 0.7031, "step": 8063 }, { "epoch": 0.82, "grad_norm": 1.562452046583617, "learning_rate": 7.954399537029106e-07, "loss": 0.7158, "step": 8064 }, { "epoch": 0.82, "grad_norm": 1.5292474184718996, "learning_rate": 7.945452781163094e-07, "loss": 0.6661, "step": 8065 }, { "epoch": 0.82, "grad_norm": 1.5335936356573503, "learning_rate": 7.936510625287502e-07, "loss": 0.7366, "step": 8066 }, { "epoch": 0.82, "grad_norm": 1.5632586768402248, "learning_rate": 7.927573070380446e-07, "loss": 0.6605, "step": 8067 }, { "epoch": 0.82, "grad_norm": 1.4711712509860944, "learning_rate": 7.918640117419507e-07, "loss": 0.5766, "step": 8068 }, { "epoch": 0.82, "grad_norm": 1.4080848497738108, "learning_rate": 7.9097117673818e-07, "loss": 0.5674, "step": 8069 }, { "epoch": 0.82, "grad_norm": 1.4606267162664464, "learning_rate": 7.900788021243943e-07, "loss": 0.6269, "step": 8070 }, { "epoch": 0.82, "grad_norm": 1.592014585373285, "learning_rate": 7.891868879982001e-07, "loss": 0.6773, "step": 8071 }, { "epoch": 0.82, "grad_norm": 1.426027977264686, "learning_rate": 7.882954344571564e-07, "loss": 0.6213, "step": 8072 }, { "epoch": 0.82, "grad_norm": 1.4459881284328133, "learning_rate": 7.874044415987736e-07, "loss": 0.638, "step": 8073 }, { "epoch": 0.82, "grad_norm": 1.4026705635927346, "learning_rate": 7.865139095205076e-07, "loss": 0.7031, "step": 8074 }, { "epoch": 0.82, "grad_norm": 1.3496948589933044, "learning_rate": 7.856238383197673e-07, "loss": 0.6432, "step": 8075 }, { "epoch": 0.82, "grad_norm": 1.4823790854357717, "learning_rate": 7.847342280939124e-07, "loss": 0.6596, "step": 8076 }, { "epoch": 0.82, "grad_norm": 1.6379327486021447, "learning_rate": 7.838450789402452e-07, "loss": 0.7056, "step": 8077 }, { "epoch": 0.82, "grad_norm": 1.800567661599365, "learning_rate": 7.829563909560256e-07, "loss": 0.6021, "step": 8078 }, { "epoch": 0.82, "grad_norm": 1.496793937760175, "learning_rate": 7.820681642384576e-07, "loss": 0.6201, "step": 8079 }, { "epoch": 0.83, "grad_norm": 1.4632458882742558, "learning_rate": 7.811803988846984e-07, "loss": 0.5455, "step": 8080 }, { "epoch": 0.83, "grad_norm": 1.3151731537106217, "learning_rate": 7.80293094991853e-07, "loss": 0.5625, "step": 8081 }, { "epoch": 0.83, "grad_norm": 1.4786782986182487, "learning_rate": 7.794062526569735e-07, "loss": 0.6106, "step": 8082 }, { "epoch": 0.83, "grad_norm": 1.4926265213182286, "learning_rate": 7.785198719770665e-07, "loss": 0.6374, "step": 8083 }, { "epoch": 0.83, "grad_norm": 1.6935406574812941, "learning_rate": 7.776339530490867e-07, "loss": 0.6633, "step": 8084 }, { "epoch": 0.83, "grad_norm": 1.437824111176668, "learning_rate": 7.767484959699351e-07, "loss": 0.6665, "step": 8085 }, { "epoch": 0.83, "grad_norm": 1.4124851946255415, "learning_rate": 7.758635008364645e-07, "loss": 0.7156, "step": 8086 }, { "epoch": 0.83, "grad_norm": 1.3753500262593874, "learning_rate": 7.749789677454783e-07, "loss": 0.6576, "step": 8087 }, { "epoch": 0.83, "grad_norm": 1.3156772923701718, "learning_rate": 7.740948967937261e-07, "loss": 0.6161, "step": 8088 }, { "epoch": 0.83, "grad_norm": 1.6200706719542002, "learning_rate": 7.732112880779102e-07, "loss": 0.6272, "step": 8089 }, { "epoch": 0.83, "grad_norm": 1.5367343718498716, "learning_rate": 7.723281416946837e-07, "loss": 0.6506, "step": 8090 }, { "epoch": 0.83, "grad_norm": 1.5914712164902145, "learning_rate": 7.71445457740641e-07, "loss": 0.6846, "step": 8091 }, { "epoch": 0.83, "grad_norm": 1.4519710308478213, "learning_rate": 7.705632363123356e-07, "loss": 0.5026, "step": 8092 }, { "epoch": 0.83, "grad_norm": 1.4826125072979441, "learning_rate": 7.696814775062639e-07, "loss": 0.6792, "step": 8093 }, { "epoch": 0.83, "grad_norm": 1.6340895007307288, "learning_rate": 7.688001814188745e-07, "loss": 0.6676, "step": 8094 }, { "epoch": 0.83, "grad_norm": 1.3788373946898005, "learning_rate": 7.67919348146568e-07, "loss": 0.5778, "step": 8095 }, { "epoch": 0.83, "grad_norm": 1.5921495337133489, "learning_rate": 7.670389777856862e-07, "loss": 0.6356, "step": 8096 }, { "epoch": 0.83, "grad_norm": 1.428046641608907, "learning_rate": 7.661590704325278e-07, "loss": 0.5609, "step": 8097 }, { "epoch": 0.83, "grad_norm": 1.6944880383822494, "learning_rate": 7.652796261833401e-07, "loss": 0.6682, "step": 8098 }, { "epoch": 0.83, "grad_norm": 1.45015944162541, "learning_rate": 7.644006451343155e-07, "loss": 0.5288, "step": 8099 }, { "epoch": 0.83, "grad_norm": 1.6435714446731344, "learning_rate": 7.635221273815985e-07, "loss": 0.6386, "step": 8100 }, { "epoch": 0.83, "grad_norm": 1.5042510089472285, "learning_rate": 7.626440730212841e-07, "loss": 0.6268, "step": 8101 }, { "epoch": 0.83, "grad_norm": 1.7782879154862354, "learning_rate": 7.617664821494142e-07, "loss": 0.6631, "step": 8102 }, { "epoch": 0.83, "grad_norm": 1.2334532531540277, "learning_rate": 7.608893548619806e-07, "loss": 0.5511, "step": 8103 }, { "epoch": 0.83, "grad_norm": 1.4642073713116837, "learning_rate": 7.600126912549277e-07, "loss": 0.603, "step": 8104 }, { "epoch": 0.83, "grad_norm": 1.5654254186719163, "learning_rate": 7.591364914241422e-07, "loss": 0.6993, "step": 8105 }, { "epoch": 0.83, "grad_norm": 1.3725727850198397, "learning_rate": 7.58260755465467e-07, "loss": 0.6347, "step": 8106 }, { "epoch": 0.83, "grad_norm": 1.5973309432182545, "learning_rate": 7.573854834746897e-07, "loss": 0.6771, "step": 8107 }, { "epoch": 0.83, "grad_norm": 1.4590418362644906, "learning_rate": 7.565106755475494e-07, "loss": 0.6105, "step": 8108 }, { "epoch": 0.83, "grad_norm": 1.3679904829565848, "learning_rate": 7.556363317797366e-07, "loss": 0.6521, "step": 8109 }, { "epoch": 0.83, "grad_norm": 1.6352973190315954, "learning_rate": 7.547624522668839e-07, "loss": 0.6986, "step": 8110 }, { "epoch": 0.83, "grad_norm": 1.4723529322074085, "learning_rate": 7.538890371045798e-07, "loss": 0.595, "step": 8111 }, { "epoch": 0.83, "grad_norm": 1.4782905495567016, "learning_rate": 7.530160863883606e-07, "loss": 0.5734, "step": 8112 }, { "epoch": 0.83, "grad_norm": 1.481809517948316, "learning_rate": 7.521436002137101e-07, "loss": 0.5957, "step": 8113 }, { "epoch": 0.83, "grad_norm": 1.5858686411866834, "learning_rate": 7.512715786760604e-07, "loss": 0.6799, "step": 8114 }, { "epoch": 0.83, "grad_norm": 1.4074784872978297, "learning_rate": 7.50400021870798e-07, "loss": 0.593, "step": 8115 }, { "epoch": 0.83, "grad_norm": 1.550011953948433, "learning_rate": 7.495289298932517e-07, "loss": 0.5763, "step": 8116 }, { "epoch": 0.83, "grad_norm": 1.610410867140989, "learning_rate": 7.486583028387051e-07, "loss": 0.703, "step": 8117 }, { "epoch": 0.83, "grad_norm": 1.7088612047227651, "learning_rate": 7.477881408023901e-07, "loss": 0.6305, "step": 8118 }, { "epoch": 0.83, "grad_norm": 1.9676754937158938, "learning_rate": 7.469184438794819e-07, "loss": 0.7464, "step": 8119 }, { "epoch": 0.83, "grad_norm": 1.503955178574936, "learning_rate": 7.46049212165113e-07, "loss": 0.5909, "step": 8120 }, { "epoch": 0.83, "grad_norm": 1.9206129731007626, "learning_rate": 7.451804457543594e-07, "loss": 0.7236, "step": 8121 }, { "epoch": 0.83, "grad_norm": 1.6968223158187348, "learning_rate": 7.44312144742248e-07, "loss": 0.6006, "step": 8122 }, { "epoch": 0.83, "grad_norm": 1.4325634574303818, "learning_rate": 7.434443092237581e-07, "loss": 0.5282, "step": 8123 }, { "epoch": 0.83, "grad_norm": 1.5650605671321185, "learning_rate": 7.425769392938104e-07, "loss": 0.566, "step": 8124 }, { "epoch": 0.83, "grad_norm": 1.6242968613903335, "learning_rate": 7.417100350472811e-07, "loss": 0.7368, "step": 8125 }, { "epoch": 0.83, "grad_norm": 1.4534719878327826, "learning_rate": 7.408435965789945e-07, "loss": 0.6568, "step": 8126 }, { "epoch": 0.83, "grad_norm": 1.5545083686224774, "learning_rate": 7.399776239837208e-07, "loss": 0.6732, "step": 8127 }, { "epoch": 0.83, "grad_norm": 1.3649795031333414, "learning_rate": 7.391121173561839e-07, "loss": 0.6066, "step": 8128 }, { "epoch": 0.83, "grad_norm": 1.703808711334017, "learning_rate": 7.382470767910527e-07, "loss": 0.6086, "step": 8129 }, { "epoch": 0.83, "grad_norm": 1.6156932268381201, "learning_rate": 7.373825023829462e-07, "loss": 0.6434, "step": 8130 }, { "epoch": 0.83, "grad_norm": 1.321100058284042, "learning_rate": 7.365183942264332e-07, "loss": 0.6107, "step": 8131 }, { "epoch": 0.83, "grad_norm": 1.4816629853568277, "learning_rate": 7.356547524160334e-07, "loss": 0.6843, "step": 8132 }, { "epoch": 0.83, "grad_norm": 1.4602885968695305, "learning_rate": 7.3479157704621e-07, "loss": 0.6827, "step": 8133 }, { "epoch": 0.83, "grad_norm": 1.5089958594407877, "learning_rate": 7.339288682113804e-07, "loss": 0.7031, "step": 8134 }, { "epoch": 0.83, "grad_norm": 1.638150052011867, "learning_rate": 7.330666260059072e-07, "loss": 0.6301, "step": 8135 }, { "epoch": 0.83, "grad_norm": 1.6147327921229504, "learning_rate": 7.322048505241053e-07, "loss": 0.6712, "step": 8136 }, { "epoch": 0.83, "grad_norm": 1.5732720105526252, "learning_rate": 7.313435418602388e-07, "loss": 0.6408, "step": 8137 }, { "epoch": 0.83, "grad_norm": 1.532604965387981, "learning_rate": 7.304827001085146e-07, "loss": 0.5928, "step": 8138 }, { "epoch": 0.83, "grad_norm": 1.5236450278868392, "learning_rate": 7.296223253630951e-07, "loss": 0.6494, "step": 8139 }, { "epoch": 0.83, "grad_norm": 1.3824982494935814, "learning_rate": 7.287624177180908e-07, "loss": 0.6357, "step": 8140 }, { "epoch": 0.83, "grad_norm": 1.4715984521606087, "learning_rate": 7.279029772675572e-07, "loss": 0.6104, "step": 8141 }, { "epoch": 0.83, "grad_norm": 1.4823450617808192, "learning_rate": 7.270440041055032e-07, "loss": 0.6613, "step": 8142 }, { "epoch": 0.83, "grad_norm": 1.5440003405405747, "learning_rate": 7.261854983258837e-07, "loss": 0.6277, "step": 8143 }, { "epoch": 0.83, "grad_norm": 1.5375448188019403, "learning_rate": 7.253274600226024e-07, "loss": 0.6709, "step": 8144 }, { "epoch": 0.83, "grad_norm": 1.5665221145552195, "learning_rate": 7.244698892895141e-07, "loss": 0.6746, "step": 8145 }, { "epoch": 0.83, "grad_norm": 1.5088412745898598, "learning_rate": 7.236127862204229e-07, "loss": 0.6989, "step": 8146 }, { "epoch": 0.83, "grad_norm": 1.6632667466113296, "learning_rate": 7.227561509090758e-07, "loss": 0.647, "step": 8147 }, { "epoch": 0.83, "grad_norm": 1.6269888160384265, "learning_rate": 7.218999834491763e-07, "loss": 0.5675, "step": 8148 }, { "epoch": 0.83, "grad_norm": 1.536883541713167, "learning_rate": 7.210442839343712e-07, "loss": 0.6123, "step": 8149 }, { "epoch": 0.83, "grad_norm": 1.3672824420607366, "learning_rate": 7.201890524582589e-07, "loss": 0.5857, "step": 8150 }, { "epoch": 0.83, "grad_norm": 1.4233950979822925, "learning_rate": 7.19334289114389e-07, "loss": 0.5491, "step": 8151 }, { "epoch": 0.83, "grad_norm": 1.5832999156912688, "learning_rate": 7.184799939962511e-07, "loss": 0.6234, "step": 8152 }, { "epoch": 0.83, "grad_norm": 1.3911619909299466, "learning_rate": 7.176261671972923e-07, "loss": 0.5814, "step": 8153 }, { "epoch": 0.83, "grad_norm": 1.559545658616537, "learning_rate": 7.167728088109072e-07, "loss": 0.5714, "step": 8154 }, { "epoch": 0.83, "grad_norm": 1.4691534401434725, "learning_rate": 7.159199189304339e-07, "loss": 0.8128, "step": 8155 }, { "epoch": 0.83, "grad_norm": 1.4334482308109207, "learning_rate": 7.150674976491656e-07, "loss": 0.6241, "step": 8156 }, { "epoch": 0.83, "grad_norm": 1.5263350684172958, "learning_rate": 7.142155450603405e-07, "loss": 0.6063, "step": 8157 }, { "epoch": 0.83, "grad_norm": 1.5106198271122318, "learning_rate": 7.133640612571452e-07, "loss": 0.5958, "step": 8158 }, { "epoch": 0.83, "grad_norm": 1.4109972934945765, "learning_rate": 7.125130463327174e-07, "loss": 0.5939, "step": 8159 }, { "epoch": 0.83, "grad_norm": 1.4033605538769143, "learning_rate": 7.116625003801436e-07, "loss": 0.626, "step": 8160 }, { "epoch": 0.83, "grad_norm": 1.6211875998520402, "learning_rate": 7.108124234924563e-07, "loss": 0.6865, "step": 8161 }, { "epoch": 0.83, "grad_norm": 1.371557427846467, "learning_rate": 7.099628157626392e-07, "loss": 0.5935, "step": 8162 }, { "epoch": 0.83, "grad_norm": 1.5654883628992688, "learning_rate": 7.091136772836215e-07, "loss": 0.606, "step": 8163 }, { "epoch": 0.83, "grad_norm": 1.6148482510887294, "learning_rate": 7.082650081482851e-07, "loss": 0.7088, "step": 8164 }, { "epoch": 0.83, "grad_norm": 1.411694886702397, "learning_rate": 7.074168084494603e-07, "loss": 0.6064, "step": 8165 }, { "epoch": 0.83, "grad_norm": 1.6604415148602774, "learning_rate": 7.065690782799206e-07, "loss": 0.6721, "step": 8166 }, { "epoch": 0.83, "grad_norm": 1.535094369893159, "learning_rate": 7.057218177323938e-07, "loss": 0.6254, "step": 8167 }, { "epoch": 0.83, "grad_norm": 1.5528637377328638, "learning_rate": 7.048750268995558e-07, "loss": 0.5149, "step": 8168 }, { "epoch": 0.83, "grad_norm": 1.5434676567038557, "learning_rate": 7.040287058740281e-07, "loss": 0.666, "step": 8169 }, { "epoch": 0.83, "grad_norm": 1.477782115366861, "learning_rate": 7.031828547483837e-07, "loss": 0.6632, "step": 8170 }, { "epoch": 0.83, "grad_norm": 1.5234367534236934, "learning_rate": 7.023374736151434e-07, "loss": 0.622, "step": 8171 }, { "epoch": 0.83, "grad_norm": 1.4913540333067843, "learning_rate": 7.014925625667745e-07, "loss": 0.616, "step": 8172 }, { "epoch": 0.83, "grad_norm": 1.7724510355389322, "learning_rate": 7.00648121695695e-07, "loss": 0.5958, "step": 8173 }, { "epoch": 0.83, "grad_norm": 1.5795800839657037, "learning_rate": 6.998041510942738e-07, "loss": 0.7324, "step": 8174 }, { "epoch": 0.83, "grad_norm": 1.4876539425420365, "learning_rate": 6.989606508548235e-07, "loss": 0.5633, "step": 8175 }, { "epoch": 0.83, "grad_norm": 3.6850968613690203, "learning_rate": 6.981176210696078e-07, "loss": 0.6673, "step": 8176 }, { "epoch": 0.83, "grad_norm": 1.5978499603948053, "learning_rate": 6.972750618308372e-07, "loss": 0.5948, "step": 8177 }, { "epoch": 0.84, "grad_norm": 1.4105861593475295, "learning_rate": 6.964329732306729e-07, "loss": 0.6214, "step": 8178 }, { "epoch": 0.84, "grad_norm": 1.5187887744203978, "learning_rate": 6.95591355361227e-07, "loss": 0.6427, "step": 8179 }, { "epoch": 0.84, "grad_norm": 1.5854006128192641, "learning_rate": 6.947502083145525e-07, "loss": 0.6019, "step": 8180 }, { "epoch": 0.84, "grad_norm": 1.4106852715583325, "learning_rate": 6.939095321826561e-07, "loss": 0.5844, "step": 8181 }, { "epoch": 0.84, "grad_norm": 1.4269494951387134, "learning_rate": 6.930693270574951e-07, "loss": 0.6483, "step": 8182 }, { "epoch": 0.84, "grad_norm": 1.8927713497991765, "learning_rate": 6.922295930309691e-07, "loss": 0.6753, "step": 8183 }, { "epoch": 0.84, "grad_norm": 1.690686739747217, "learning_rate": 6.913903301949321e-07, "loss": 0.6887, "step": 8184 }, { "epoch": 0.84, "grad_norm": 1.5228011642873667, "learning_rate": 6.905515386411832e-07, "loss": 0.6778, "step": 8185 }, { "epoch": 0.84, "grad_norm": 1.573010417283269, "learning_rate": 6.897132184614686e-07, "loss": 0.7371, "step": 8186 }, { "epoch": 0.84, "grad_norm": 1.6286264974161926, "learning_rate": 6.88875369747487e-07, "loss": 0.7508, "step": 8187 }, { "epoch": 0.84, "grad_norm": 1.5569427839754781, "learning_rate": 6.880379925908848e-07, "loss": 0.625, "step": 8188 }, { "epoch": 0.84, "grad_norm": 1.6853236809482297, "learning_rate": 6.872010870832535e-07, "loss": 0.6669, "step": 8189 }, { "epoch": 0.84, "grad_norm": 1.2822673170110617, "learning_rate": 6.863646533161356e-07, "loss": 0.5386, "step": 8190 }, { "epoch": 0.84, "grad_norm": 1.474363385068406, "learning_rate": 6.855286913810211e-07, "loss": 0.6132, "step": 8191 }, { "epoch": 0.84, "grad_norm": 1.2924661049502804, "learning_rate": 6.846932013693486e-07, "loss": 0.5766, "step": 8192 }, { "epoch": 0.84, "grad_norm": 1.5720691939332831, "learning_rate": 6.838581833725078e-07, "loss": 0.6674, "step": 8193 }, { "epoch": 0.84, "grad_norm": 1.5109949088632544, "learning_rate": 6.830236374818317e-07, "loss": 0.6534, "step": 8194 }, { "epoch": 0.84, "grad_norm": 1.486865528579052, "learning_rate": 6.821895637886039e-07, "loss": 0.5792, "step": 8195 }, { "epoch": 0.84, "grad_norm": 1.4926918940445058, "learning_rate": 6.813559623840588e-07, "loss": 0.732, "step": 8196 }, { "epoch": 0.84, "grad_norm": 1.8093903561013371, "learning_rate": 6.805228333593749e-07, "loss": 0.563, "step": 8197 }, { "epoch": 0.84, "grad_norm": 1.380767459510228, "learning_rate": 6.79690176805683e-07, "loss": 0.5135, "step": 8198 }, { "epoch": 0.84, "grad_norm": 1.5431302162887768, "learning_rate": 6.788579928140593e-07, "loss": 0.6977, "step": 8199 }, { "epoch": 0.84, "grad_norm": 1.3990981346364075, "learning_rate": 6.780262814755284e-07, "loss": 0.585, "step": 8200 }, { "epoch": 0.84, "grad_norm": 1.7898179368788683, "learning_rate": 6.771950428810664e-07, "loss": 0.6629, "step": 8201 }, { "epoch": 0.84, "grad_norm": 1.6956549630139184, "learning_rate": 6.763642771215934e-07, "loss": 0.6516, "step": 8202 }, { "epoch": 0.84, "grad_norm": 1.384451803952058, "learning_rate": 6.755339842879815e-07, "loss": 0.5005, "step": 8203 }, { "epoch": 0.84, "grad_norm": 1.4959515307277045, "learning_rate": 6.747041644710484e-07, "loss": 0.6996, "step": 8204 }, { "epoch": 0.84, "grad_norm": 1.9266307848788715, "learning_rate": 6.738748177615601e-07, "loss": 0.6678, "step": 8205 }, { "epoch": 0.84, "grad_norm": 1.5180255279233024, "learning_rate": 6.73045944250233e-07, "loss": 0.5826, "step": 8206 }, { "epoch": 0.84, "grad_norm": 1.484857584088971, "learning_rate": 6.722175440277318e-07, "loss": 0.6754, "step": 8207 }, { "epoch": 0.84, "grad_norm": 1.5068724765949664, "learning_rate": 6.713896171846668e-07, "loss": 0.6156, "step": 8208 }, { "epoch": 0.84, "grad_norm": 1.5556491057326105, "learning_rate": 6.705621638115966e-07, "loss": 0.638, "step": 8209 }, { "epoch": 0.84, "grad_norm": 1.558035981261281, "learning_rate": 6.697351839990318e-07, "loss": 0.5536, "step": 8210 }, { "epoch": 0.84, "grad_norm": 1.5552059828840665, "learning_rate": 6.689086778374265e-07, "loss": 0.627, "step": 8211 }, { "epoch": 0.84, "grad_norm": 1.3678515471758845, "learning_rate": 6.680826454171874e-07, "loss": 0.5768, "step": 8212 }, { "epoch": 0.84, "grad_norm": 1.6613304646248985, "learning_rate": 6.67257086828666e-07, "loss": 0.6326, "step": 8213 }, { "epoch": 0.84, "grad_norm": 1.5605495223229604, "learning_rate": 6.664320021621623e-07, "loss": 0.6204, "step": 8214 }, { "epoch": 0.84, "grad_norm": 1.536745620424854, "learning_rate": 6.65607391507927e-07, "loss": 0.5789, "step": 8215 }, { "epoch": 0.84, "grad_norm": 1.5593752052900889, "learning_rate": 6.647832549561556e-07, "loss": 0.6778, "step": 8216 }, { "epoch": 0.84, "grad_norm": 1.4846377295176012, "learning_rate": 6.639595925969955e-07, "loss": 0.554, "step": 8217 }, { "epoch": 0.84, "grad_norm": 1.6166279250552555, "learning_rate": 6.631364045205391e-07, "loss": 0.69, "step": 8218 }, { "epoch": 0.84, "grad_norm": 1.4062341847965694, "learning_rate": 6.62313690816826e-07, "loss": 0.6526, "step": 8219 }, { "epoch": 0.84, "grad_norm": 1.3864705786620668, "learning_rate": 6.614914515758481e-07, "loss": 0.6306, "step": 8220 }, { "epoch": 0.84, "grad_norm": 1.6225505640662872, "learning_rate": 6.606696868875439e-07, "loss": 0.5884, "step": 8221 }, { "epoch": 0.84, "grad_norm": 1.52258889634963, "learning_rate": 6.598483968417984e-07, "loss": 0.656, "step": 8222 }, { "epoch": 0.84, "grad_norm": 1.6907039022358363, "learning_rate": 6.590275815284436e-07, "loss": 0.7294, "step": 8223 }, { "epoch": 0.84, "grad_norm": 1.7312697759249405, "learning_rate": 6.582072410372647e-07, "loss": 0.6748, "step": 8224 }, { "epoch": 0.84, "grad_norm": 1.5522101310611656, "learning_rate": 6.573873754579895e-07, "loss": 0.6328, "step": 8225 }, { "epoch": 0.84, "grad_norm": 1.5394648334444276, "learning_rate": 6.565679848802975e-07, "loss": 0.5814, "step": 8226 }, { "epoch": 0.84, "grad_norm": 1.3515064106314145, "learning_rate": 6.557490693938146e-07, "loss": 0.5625, "step": 8227 }, { "epoch": 0.84, "grad_norm": 1.522061480604903, "learning_rate": 6.549306290881141e-07, "loss": 0.692, "step": 8228 }, { "epoch": 0.84, "grad_norm": 1.5753862066865167, "learning_rate": 6.541126640527195e-07, "loss": 0.6659, "step": 8229 }, { "epoch": 0.84, "grad_norm": 1.4998169182017043, "learning_rate": 6.532951743770993e-07, "loss": 0.6214, "step": 8230 }, { "epoch": 0.84, "grad_norm": 1.4338955271448042, "learning_rate": 6.524781601506746e-07, "loss": 0.5971, "step": 8231 }, { "epoch": 0.84, "grad_norm": 1.508906772564834, "learning_rate": 6.516616214628096e-07, "loss": 0.7272, "step": 8232 }, { "epoch": 0.84, "grad_norm": 1.6851877685679653, "learning_rate": 6.508455584028178e-07, "loss": 0.6414, "step": 8233 }, { "epoch": 0.84, "grad_norm": 1.4288174729292578, "learning_rate": 6.500299710599628e-07, "loss": 0.5572, "step": 8234 }, { "epoch": 0.84, "grad_norm": 1.5624158507195844, "learning_rate": 6.492148595234554e-07, "loss": 0.7373, "step": 8235 }, { "epoch": 0.84, "grad_norm": 1.3112767746560696, "learning_rate": 6.484002238824533e-07, "loss": 0.632, "step": 8236 }, { "epoch": 0.84, "grad_norm": 1.489067167434328, "learning_rate": 6.475860642260606e-07, "loss": 0.6139, "step": 8237 }, { "epoch": 0.84, "grad_norm": 1.4734714468730243, "learning_rate": 6.467723806433346e-07, "loss": 0.7355, "step": 8238 }, { "epoch": 0.84, "grad_norm": 1.5925803846058468, "learning_rate": 6.459591732232739e-07, "loss": 0.6355, "step": 8239 }, { "epoch": 0.84, "grad_norm": 1.5321275365942049, "learning_rate": 6.451464420548315e-07, "loss": 0.5867, "step": 8240 }, { "epoch": 0.84, "grad_norm": 1.5399601950849973, "learning_rate": 6.443341872269038e-07, "loss": 0.6086, "step": 8241 }, { "epoch": 0.84, "grad_norm": 1.4666684690665097, "learning_rate": 6.435224088283354e-07, "loss": 0.6165, "step": 8242 }, { "epoch": 0.84, "grad_norm": 1.4182919590347531, "learning_rate": 6.427111069479214e-07, "loss": 0.6332, "step": 8243 }, { "epoch": 0.84, "grad_norm": 1.485900581393493, "learning_rate": 6.419002816744019e-07, "loss": 0.5922, "step": 8244 }, { "epoch": 0.84, "grad_norm": 1.405069421594442, "learning_rate": 6.410899330964676e-07, "loss": 0.6055, "step": 8245 }, { "epoch": 0.84, "grad_norm": 1.580170355431309, "learning_rate": 6.402800613027554e-07, "loss": 0.8069, "step": 8246 }, { "epoch": 0.84, "grad_norm": 1.660636724232408, "learning_rate": 6.39470666381849e-07, "loss": 0.6931, "step": 8247 }, { "epoch": 0.84, "grad_norm": 1.47458034072016, "learning_rate": 6.386617484222823e-07, "loss": 0.6298, "step": 8248 }, { "epoch": 0.84, "grad_norm": 1.5294871074348317, "learning_rate": 6.378533075125364e-07, "loss": 0.6612, "step": 8249 }, { "epoch": 0.84, "grad_norm": 1.5517985405250088, "learning_rate": 6.370453437410395e-07, "loss": 0.6715, "step": 8250 }, { "epoch": 0.84, "grad_norm": 1.6342086567489664, "learning_rate": 6.36237857196167e-07, "loss": 0.6613, "step": 8251 }, { "epoch": 0.84, "grad_norm": 1.7147744444051747, "learning_rate": 6.354308479662447e-07, "loss": 0.6416, "step": 8252 }, { "epoch": 0.84, "grad_norm": 1.4994361923511799, "learning_rate": 6.346243161395422e-07, "loss": 0.6283, "step": 8253 }, { "epoch": 0.84, "grad_norm": 1.6415231432549084, "learning_rate": 6.33818261804281e-07, "loss": 0.5492, "step": 8254 }, { "epoch": 0.84, "grad_norm": 1.3404688774971703, "learning_rate": 6.330126850486285e-07, "loss": 0.5593, "step": 8255 }, { "epoch": 0.84, "grad_norm": 1.3949739710231668, "learning_rate": 6.322075859606986e-07, "loss": 0.6451, "step": 8256 }, { "epoch": 0.84, "grad_norm": 1.604900289829505, "learning_rate": 6.314029646285558e-07, "loss": 0.6318, "step": 8257 }, { "epoch": 0.84, "grad_norm": 1.3844244816514326, "learning_rate": 6.305988211402092e-07, "loss": 0.648, "step": 8258 }, { "epoch": 0.84, "grad_norm": 1.4511877002413227, "learning_rate": 6.29795155583619e-07, "loss": 0.5543, "step": 8259 }, { "epoch": 0.84, "grad_norm": 1.5391191960954351, "learning_rate": 6.28991968046691e-07, "loss": 0.5548, "step": 8260 }, { "epoch": 0.84, "grad_norm": 1.744685544573428, "learning_rate": 6.281892586172772e-07, "loss": 0.5942, "step": 8261 }, { "epoch": 0.84, "grad_norm": 1.3493729893264377, "learning_rate": 6.273870273831806e-07, "loss": 0.5531, "step": 8262 }, { "epoch": 0.84, "grad_norm": 1.5323261933402137, "learning_rate": 6.26585274432151e-07, "loss": 0.6747, "step": 8263 }, { "epoch": 0.84, "grad_norm": 1.468162512564061, "learning_rate": 6.257839998518856e-07, "loss": 0.6402, "step": 8264 }, { "epoch": 0.84, "grad_norm": 1.5222759313316985, "learning_rate": 6.249832037300269e-07, "loss": 0.6419, "step": 8265 }, { "epoch": 0.84, "grad_norm": 1.475056853108193, "learning_rate": 6.241828861541694e-07, "loss": 0.6878, "step": 8266 }, { "epoch": 0.84, "grad_norm": 1.2589646815108337, "learning_rate": 6.233830472118507e-07, "loss": 0.5357, "step": 8267 }, { "epoch": 0.84, "grad_norm": 1.684909275597284, "learning_rate": 6.225836869905616e-07, "loss": 0.6331, "step": 8268 }, { "epoch": 0.84, "grad_norm": 1.6974286851333316, "learning_rate": 6.217848055777353e-07, "loss": 0.6523, "step": 8269 }, { "epoch": 0.84, "grad_norm": 1.7394303832013749, "learning_rate": 6.209864030607543e-07, "loss": 0.7378, "step": 8270 }, { "epoch": 0.84, "grad_norm": 1.4868099040901577, "learning_rate": 6.201884795269508e-07, "loss": 0.6065, "step": 8271 }, { "epoch": 0.84, "grad_norm": 1.3138837457487689, "learning_rate": 6.193910350636007e-07, "loss": 0.549, "step": 8272 }, { "epoch": 0.84, "grad_norm": 1.4118618184942464, "learning_rate": 6.185940697579318e-07, "loss": 0.5773, "step": 8273 }, { "epoch": 0.84, "grad_norm": 1.4915404532015364, "learning_rate": 6.177975836971162e-07, "loss": 0.6496, "step": 8274 }, { "epoch": 0.84, "grad_norm": 1.6901659488274163, "learning_rate": 6.170015769682742e-07, "loss": 0.6699, "step": 8275 }, { "epoch": 0.85, "grad_norm": 1.2917074009689373, "learning_rate": 6.162060496584749e-07, "loss": 0.5924, "step": 8276 }, { "epoch": 0.85, "grad_norm": 2.7017383362810934, "learning_rate": 6.154110018547355e-07, "loss": 0.6541, "step": 8277 }, { "epoch": 0.85, "grad_norm": 1.4526385097692618, "learning_rate": 6.146164336440185e-07, "loss": 0.6825, "step": 8278 }, { "epoch": 0.85, "grad_norm": 1.568756221773626, "learning_rate": 6.138223451132336e-07, "loss": 0.5616, "step": 8279 }, { "epoch": 0.85, "grad_norm": 1.615582960513864, "learning_rate": 6.130287363492421e-07, "loss": 0.7282, "step": 8280 }, { "epoch": 0.85, "grad_norm": 1.4246111886927721, "learning_rate": 6.12235607438848e-07, "loss": 0.5941, "step": 8281 }, { "epoch": 0.85, "grad_norm": 1.4951654996624455, "learning_rate": 6.114429584688064e-07, "loss": 0.5986, "step": 8282 }, { "epoch": 0.85, "grad_norm": 1.5436476117455362, "learning_rate": 6.106507895258179e-07, "loss": 0.655, "step": 8283 }, { "epoch": 0.85, "grad_norm": 9.649157403090573, "learning_rate": 6.098591006965299e-07, "loss": 0.6437, "step": 8284 }, { "epoch": 0.85, "grad_norm": 1.948535412676141, "learning_rate": 6.090678920675402e-07, "loss": 0.6312, "step": 8285 }, { "epoch": 0.85, "grad_norm": 1.4901164310665584, "learning_rate": 6.082771637253915e-07, "loss": 0.6465, "step": 8286 }, { "epoch": 0.85, "grad_norm": 1.3693170355214088, "learning_rate": 6.07486915756576e-07, "loss": 0.615, "step": 8287 }, { "epoch": 0.85, "grad_norm": 1.5999335990359738, "learning_rate": 6.066971482475315e-07, "loss": 0.6964, "step": 8288 }, { "epoch": 0.85, "grad_norm": 1.5460289953170054, "learning_rate": 6.059078612846431e-07, "loss": 0.6317, "step": 8289 }, { "epoch": 0.85, "grad_norm": 1.7010829566885377, "learning_rate": 6.051190549542446e-07, "loss": 0.6527, "step": 8290 }, { "epoch": 0.85, "grad_norm": 1.6907811152579315, "learning_rate": 6.043307293426187e-07, "loss": 0.6516, "step": 8291 }, { "epoch": 0.85, "grad_norm": 1.5977122394634389, "learning_rate": 6.03542884535992e-07, "loss": 0.6976, "step": 8292 }, { "epoch": 0.85, "grad_norm": 1.9017182246626252, "learning_rate": 6.027555206205393e-07, "loss": 0.6313, "step": 8293 }, { "epoch": 0.85, "grad_norm": 1.3534181437439219, "learning_rate": 6.019686376823858e-07, "loss": 0.584, "step": 8294 }, { "epoch": 0.85, "grad_norm": 1.5653476686498358, "learning_rate": 6.011822358075997e-07, "loss": 0.6438, "step": 8295 }, { "epoch": 0.85, "grad_norm": 1.3640876670361406, "learning_rate": 6.003963150822018e-07, "loss": 0.5975, "step": 8296 }, { "epoch": 0.85, "grad_norm": 1.6188851653801741, "learning_rate": 5.996108755921548e-07, "loss": 0.719, "step": 8297 }, { "epoch": 0.85, "grad_norm": 1.4740218916270773, "learning_rate": 5.988259174233713e-07, "loss": 0.6787, "step": 8298 }, { "epoch": 0.85, "grad_norm": 1.4663844666310035, "learning_rate": 5.98041440661713e-07, "loss": 0.5615, "step": 8299 }, { "epoch": 0.85, "grad_norm": 1.7579689142497752, "learning_rate": 5.972574453929847e-07, "loss": 0.6278, "step": 8300 }, { "epoch": 0.85, "grad_norm": 1.6512612572613707, "learning_rate": 5.964739317029439e-07, "loss": 0.6581, "step": 8301 }, { "epoch": 0.85, "grad_norm": 1.5761750133002228, "learning_rate": 5.956908996772909e-07, "loss": 0.6468, "step": 8302 }, { "epoch": 0.85, "grad_norm": 1.4174007000526485, "learning_rate": 5.949083494016738e-07, "loss": 0.619, "step": 8303 }, { "epoch": 0.85, "grad_norm": 1.4093550041022647, "learning_rate": 5.941262809616905e-07, "loss": 0.6376, "step": 8304 }, { "epoch": 0.85, "grad_norm": 1.475733983690639, "learning_rate": 5.933446944428861e-07, "loss": 0.6245, "step": 8305 }, { "epoch": 0.85, "grad_norm": 1.506274991558796, "learning_rate": 5.925635899307503e-07, "loss": 0.6474, "step": 8306 }, { "epoch": 0.85, "grad_norm": 1.318291050661714, "learning_rate": 5.9178296751072e-07, "loss": 0.6359, "step": 8307 }, { "epoch": 0.85, "grad_norm": 1.483423237045092, "learning_rate": 5.910028272681839e-07, "loss": 0.617, "step": 8308 }, { "epoch": 0.85, "grad_norm": 1.4808393209738968, "learning_rate": 5.902231692884718e-07, "loss": 0.5981, "step": 8309 }, { "epoch": 0.85, "grad_norm": 1.4414328870433797, "learning_rate": 5.894439936568674e-07, "loss": 0.6315, "step": 8310 }, { "epoch": 0.85, "grad_norm": 1.4400711415837932, "learning_rate": 5.886653004585957e-07, "loss": 0.5927, "step": 8311 }, { "epoch": 0.85, "grad_norm": 1.6000618061339595, "learning_rate": 5.878870897788313e-07, "loss": 0.5284, "step": 8312 }, { "epoch": 0.85, "grad_norm": 1.6039541412320186, "learning_rate": 5.871093617026974e-07, "loss": 0.7186, "step": 8313 }, { "epoch": 0.85, "grad_norm": 1.3183965495301542, "learning_rate": 5.863321163152614e-07, "loss": 0.5534, "step": 8314 }, { "epoch": 0.85, "grad_norm": 1.2818467730165202, "learning_rate": 5.855553537015413e-07, "loss": 0.6214, "step": 8315 }, { "epoch": 0.85, "grad_norm": 1.9168911037242071, "learning_rate": 5.847790739465003e-07, "loss": 0.6186, "step": 8316 }, { "epoch": 0.85, "grad_norm": 1.6187458670948942, "learning_rate": 5.840032771350473e-07, "loss": 0.5138, "step": 8317 }, { "epoch": 0.85, "grad_norm": 1.483543411323309, "learning_rate": 5.832279633520411e-07, "loss": 0.5893, "step": 8318 }, { "epoch": 0.85, "grad_norm": 1.5628892835310257, "learning_rate": 5.824531326822885e-07, "loss": 0.4515, "step": 8319 }, { "epoch": 0.85, "grad_norm": 1.479439778471549, "learning_rate": 5.816787852105399e-07, "loss": 0.6753, "step": 8320 }, { "epoch": 0.85, "grad_norm": 1.7251343120656581, "learning_rate": 5.80904921021494e-07, "loss": 0.6067, "step": 8321 }, { "epoch": 0.85, "grad_norm": 1.454475226066595, "learning_rate": 5.801315401997992e-07, "loss": 0.6254, "step": 8322 }, { "epoch": 0.85, "grad_norm": 1.6994020074596234, "learning_rate": 5.793586428300469e-07, "loss": 0.7192, "step": 8323 }, { "epoch": 0.85, "grad_norm": 1.628828300270803, "learning_rate": 5.785862289967798e-07, "loss": 0.6641, "step": 8324 }, { "epoch": 0.85, "grad_norm": 1.5754503603138164, "learning_rate": 5.778142987844848e-07, "loss": 0.5824, "step": 8325 }, { "epoch": 0.85, "grad_norm": 1.6159436844555382, "learning_rate": 5.770428522775956e-07, "loss": 0.6671, "step": 8326 }, { "epoch": 0.85, "grad_norm": 1.6458043008734933, "learning_rate": 5.762718895604963e-07, "loss": 0.6527, "step": 8327 }, { "epoch": 0.85, "grad_norm": 1.3504080002874113, "learning_rate": 5.755014107175144e-07, "loss": 0.6209, "step": 8328 }, { "epoch": 0.85, "grad_norm": 1.3968716119358318, "learning_rate": 5.747314158329275e-07, "loss": 0.5733, "step": 8329 }, { "epoch": 0.85, "grad_norm": 1.31532228834063, "learning_rate": 5.739619049909584e-07, "loss": 0.5953, "step": 8330 }, { "epoch": 0.85, "grad_norm": 1.282314092959793, "learning_rate": 5.731928782757756e-07, "loss": 0.6297, "step": 8331 }, { "epoch": 0.85, "grad_norm": 1.4848625304070167, "learning_rate": 5.724243357714981e-07, "loss": 0.628, "step": 8332 }, { "epoch": 0.85, "grad_norm": 1.4298178362320915, "learning_rate": 5.716562775621909e-07, "loss": 0.6204, "step": 8333 }, { "epoch": 0.85, "grad_norm": 1.4649114994379449, "learning_rate": 5.708887037318644e-07, "loss": 0.6069, "step": 8334 }, { "epoch": 0.85, "grad_norm": 1.6092478712179432, "learning_rate": 5.701216143644755e-07, "loss": 0.6949, "step": 8335 }, { "epoch": 0.85, "grad_norm": 1.4563301522965326, "learning_rate": 5.693550095439326e-07, "loss": 0.6867, "step": 8336 }, { "epoch": 0.85, "grad_norm": 1.4883876963283853, "learning_rate": 5.685888893540858e-07, "loss": 0.6147, "step": 8337 }, { "epoch": 0.85, "grad_norm": 1.622333465783311, "learning_rate": 5.67823253878736e-07, "loss": 0.7052, "step": 8338 }, { "epoch": 0.85, "grad_norm": 1.5431798327328758, "learning_rate": 5.670581032016292e-07, "loss": 0.6348, "step": 8339 }, { "epoch": 0.85, "grad_norm": 1.5252206564248834, "learning_rate": 5.662934374064571e-07, "loss": 0.6392, "step": 8340 }, { "epoch": 0.85, "grad_norm": 1.45555213970988, "learning_rate": 5.655292565768622e-07, "loss": 0.6383, "step": 8341 }, { "epoch": 0.85, "grad_norm": 1.5614434628770484, "learning_rate": 5.647655607964303e-07, "loss": 0.7284, "step": 8342 }, { "epoch": 0.85, "grad_norm": 1.573024085937965, "learning_rate": 5.640023501486975e-07, "loss": 0.6709, "step": 8343 }, { "epoch": 0.85, "grad_norm": 1.3666285967479341, "learning_rate": 5.632396247171429e-07, "loss": 0.6564, "step": 8344 }, { "epoch": 0.85, "grad_norm": 1.5116435345138743, "learning_rate": 5.62477384585195e-07, "loss": 0.6437, "step": 8345 }, { "epoch": 0.85, "grad_norm": 1.4299508029962849, "learning_rate": 5.617156298362286e-07, "loss": 0.6173, "step": 8346 }, { "epoch": 0.85, "grad_norm": 1.4537124558755758, "learning_rate": 5.609543605535672e-07, "loss": 0.6489, "step": 8347 }, { "epoch": 0.85, "grad_norm": 1.4761996194576066, "learning_rate": 5.601935768204786e-07, "loss": 0.5758, "step": 8348 }, { "epoch": 0.85, "grad_norm": 2.0262927217576356, "learning_rate": 5.594332787201773e-07, "loss": 0.6763, "step": 8349 }, { "epoch": 0.85, "grad_norm": 1.5496419320801458, "learning_rate": 5.586734663358284e-07, "loss": 0.7677, "step": 8350 }, { "epoch": 0.85, "grad_norm": 1.4953909103558514, "learning_rate": 5.579141397505383e-07, "loss": 0.6286, "step": 8351 }, { "epoch": 0.85, "grad_norm": 1.584809698941534, "learning_rate": 5.571552990473661e-07, "loss": 0.6561, "step": 8352 }, { "epoch": 0.85, "grad_norm": 1.2886024942119423, "learning_rate": 5.563969443093142e-07, "loss": 0.5581, "step": 8353 }, { "epoch": 0.85, "grad_norm": 1.4306017387858074, "learning_rate": 5.556390756193303e-07, "loss": 0.574, "step": 8354 }, { "epoch": 0.85, "grad_norm": 1.6436190368838781, "learning_rate": 5.548816930603146e-07, "loss": 0.5894, "step": 8355 }, { "epoch": 0.85, "grad_norm": 1.3139293632542934, "learning_rate": 5.541247967151081e-07, "loss": 0.6202, "step": 8356 }, { "epoch": 0.85, "grad_norm": 1.5014654791853277, "learning_rate": 5.533683866665035e-07, "loss": 0.4658, "step": 8357 }, { "epoch": 0.85, "grad_norm": 1.5516893704164503, "learning_rate": 5.526124629972368e-07, "loss": 0.598, "step": 8358 }, { "epoch": 0.85, "grad_norm": 1.7299177471910059, "learning_rate": 5.518570257899913e-07, "loss": 0.6712, "step": 8359 }, { "epoch": 0.85, "grad_norm": 1.378197718454547, "learning_rate": 5.511020751273993e-07, "loss": 0.6292, "step": 8360 }, { "epoch": 0.85, "grad_norm": 1.511765831890286, "learning_rate": 5.503476110920386e-07, "loss": 0.6622, "step": 8361 }, { "epoch": 0.85, "grad_norm": 1.4026530545892235, "learning_rate": 5.495936337664326e-07, "loss": 0.5199, "step": 8362 }, { "epoch": 0.85, "grad_norm": 1.7623615079113697, "learning_rate": 5.488401432330526e-07, "loss": 0.6608, "step": 8363 }, { "epoch": 0.85, "grad_norm": 1.5485038613174094, "learning_rate": 5.480871395743176e-07, "loss": 0.5931, "step": 8364 }, { "epoch": 0.85, "grad_norm": 1.6024867713127715, "learning_rate": 5.473346228725901e-07, "loss": 0.696, "step": 8365 }, { "epoch": 0.85, "grad_norm": 1.459243552830268, "learning_rate": 5.465825932101848e-07, "loss": 0.6551, "step": 8366 }, { "epoch": 0.85, "grad_norm": 1.629407536876995, "learning_rate": 5.45831050669357e-07, "loss": 0.6362, "step": 8367 }, { "epoch": 0.85, "grad_norm": 1.4835952182131042, "learning_rate": 5.450799953323127e-07, "loss": 0.5508, "step": 8368 }, { "epoch": 0.85, "grad_norm": 1.8509821429476363, "learning_rate": 5.443294272812033e-07, "loss": 0.6872, "step": 8369 }, { "epoch": 0.85, "grad_norm": 1.8154571671892392, "learning_rate": 5.43579346598127e-07, "loss": 0.6932, "step": 8370 }, { "epoch": 0.85, "grad_norm": 1.6582923102472666, "learning_rate": 5.428297533651295e-07, "loss": 0.7212, "step": 8371 }, { "epoch": 0.85, "grad_norm": 1.4376150927235312, "learning_rate": 5.420806476642016e-07, "loss": 0.6741, "step": 8372 }, { "epoch": 0.85, "grad_norm": 1.5583533172325206, "learning_rate": 5.413320295772812e-07, "loss": 0.7314, "step": 8373 }, { "epoch": 0.86, "grad_norm": 1.3578184388021128, "learning_rate": 5.405838991862539e-07, "loss": 0.5479, "step": 8374 }, { "epoch": 0.86, "grad_norm": 1.4385342702621253, "learning_rate": 5.398362565729521e-07, "loss": 0.5916, "step": 8375 }, { "epoch": 0.86, "grad_norm": 1.4632042289464817, "learning_rate": 5.39089101819154e-07, "loss": 0.6648, "step": 8376 }, { "epoch": 0.86, "grad_norm": 1.4667332596529021, "learning_rate": 5.383424350065825e-07, "loss": 0.6544, "step": 8377 }, { "epoch": 0.86, "grad_norm": 1.5378014206806787, "learning_rate": 5.375962562169113e-07, "loss": 0.6538, "step": 8378 }, { "epoch": 0.86, "grad_norm": 1.5412633632389368, "learning_rate": 5.368505655317574e-07, "loss": 0.6513, "step": 8379 }, { "epoch": 0.86, "grad_norm": 1.3261631059728474, "learning_rate": 5.361053630326868e-07, "loss": 0.6646, "step": 8380 }, { "epoch": 0.86, "grad_norm": 1.6039585604328273, "learning_rate": 5.353606488012097e-07, "loss": 0.5922, "step": 8381 }, { "epoch": 0.86, "grad_norm": 1.5031190312400649, "learning_rate": 5.346164229187839e-07, "loss": 0.6962, "step": 8382 }, { "epoch": 0.86, "grad_norm": 1.532344474242775, "learning_rate": 5.338726854668158e-07, "loss": 0.6217, "step": 8383 }, { "epoch": 0.86, "grad_norm": 1.4053206472666506, "learning_rate": 5.331294365266537e-07, "loss": 0.6052, "step": 8384 }, { "epoch": 0.86, "grad_norm": 1.5414287880390782, "learning_rate": 5.323866761795981e-07, "loss": 0.6913, "step": 8385 }, { "epoch": 0.86, "grad_norm": 1.3859051943812526, "learning_rate": 5.316444045068919e-07, "loss": 0.5377, "step": 8386 }, { "epoch": 0.86, "grad_norm": 1.3986970933002771, "learning_rate": 5.309026215897256e-07, "loss": 0.5747, "step": 8387 }, { "epoch": 0.86, "grad_norm": 1.5614200226401656, "learning_rate": 5.301613275092366e-07, "loss": 0.7551, "step": 8388 }, { "epoch": 0.86, "grad_norm": 1.6206718654249164, "learning_rate": 5.294205223465098e-07, "loss": 0.6406, "step": 8389 }, { "epoch": 0.86, "grad_norm": 1.6005173104122188, "learning_rate": 5.286802061825752e-07, "loss": 0.6197, "step": 8390 }, { "epoch": 0.86, "grad_norm": 1.5983905724946799, "learning_rate": 5.279403790984089e-07, "loss": 0.6085, "step": 8391 }, { "epoch": 0.86, "grad_norm": 1.7236955766381514, "learning_rate": 5.27201041174935e-07, "loss": 0.6707, "step": 8392 }, { "epoch": 0.86, "grad_norm": 1.4046613081636465, "learning_rate": 5.26462192493023e-07, "loss": 0.5687, "step": 8393 }, { "epoch": 0.86, "grad_norm": 1.3855485880884444, "learning_rate": 5.257238331334908e-07, "loss": 0.6222, "step": 8394 }, { "epoch": 0.86, "grad_norm": 1.6147703661324508, "learning_rate": 5.249859631770993e-07, "loss": 0.6841, "step": 8395 }, { "epoch": 0.86, "grad_norm": 1.5138131031171929, "learning_rate": 5.242485827045579e-07, "loss": 0.6003, "step": 8396 }, { "epoch": 0.86, "grad_norm": 1.5274914959636066, "learning_rate": 5.235116917965244e-07, "loss": 0.6238, "step": 8397 }, { "epoch": 0.86, "grad_norm": 1.2944288048370003, "learning_rate": 5.227752905335981e-07, "loss": 0.6075, "step": 8398 }, { "epoch": 0.86, "grad_norm": 1.6524601609540492, "learning_rate": 5.220393789963307e-07, "loss": 0.6189, "step": 8399 }, { "epoch": 0.86, "grad_norm": 1.4874992880929072, "learning_rate": 5.213039572652162e-07, "loss": 0.7309, "step": 8400 }, { "epoch": 0.86, "grad_norm": 1.2544810275078289, "learning_rate": 5.205690254206946e-07, "loss": 0.6169, "step": 8401 }, { "epoch": 0.86, "grad_norm": 1.518926233606073, "learning_rate": 5.198345835431551e-07, "loss": 0.5372, "step": 8402 }, { "epoch": 0.86, "grad_norm": 1.5008344248191452, "learning_rate": 5.191006317129332e-07, "loss": 0.6079, "step": 8403 }, { "epoch": 0.86, "grad_norm": 1.4259627162676216, "learning_rate": 5.183671700103088e-07, "loss": 0.6494, "step": 8404 }, { "epoch": 0.86, "grad_norm": 1.504760896184214, "learning_rate": 5.176341985155075e-07, "loss": 0.5963, "step": 8405 }, { "epoch": 0.86, "grad_norm": 1.5521174473156505, "learning_rate": 5.169017173087055e-07, "loss": 0.7151, "step": 8406 }, { "epoch": 0.86, "grad_norm": 1.170861823384683, "learning_rate": 5.161697264700205e-07, "loss": 0.6613, "step": 8407 }, { "epoch": 0.86, "grad_norm": 1.3925771575999946, "learning_rate": 5.154382260795204e-07, "loss": 0.6375, "step": 8408 }, { "epoch": 0.86, "grad_norm": 1.546536191276654, "learning_rate": 5.147072162172173e-07, "loss": 0.5118, "step": 8409 }, { "epoch": 0.86, "grad_norm": 1.321313164326701, "learning_rate": 5.139766969630689e-07, "loss": 0.5461, "step": 8410 }, { "epoch": 0.86, "grad_norm": 1.4155397344375444, "learning_rate": 5.132466683969827e-07, "loss": 0.5455, "step": 8411 }, { "epoch": 0.86, "grad_norm": 1.5328652428608147, "learning_rate": 5.12517130598808e-07, "loss": 0.6087, "step": 8412 }, { "epoch": 0.86, "grad_norm": 1.4448796886640898, "learning_rate": 5.117880836483452e-07, "loss": 0.6359, "step": 8413 }, { "epoch": 0.86, "grad_norm": 1.3751510724480818, "learning_rate": 5.110595276253377e-07, "loss": 0.5686, "step": 8414 }, { "epoch": 0.86, "grad_norm": 1.4061375074356663, "learning_rate": 5.103314626094741e-07, "loss": 0.6329, "step": 8415 }, { "epoch": 0.86, "grad_norm": 1.5923272603373413, "learning_rate": 5.096038886803934e-07, "loss": 0.6042, "step": 8416 }, { "epoch": 0.86, "grad_norm": 1.5934170818816018, "learning_rate": 5.088768059176785e-07, "loss": 0.5952, "step": 8417 }, { "epoch": 0.86, "grad_norm": 1.7056964679867606, "learning_rate": 5.081502144008593e-07, "loss": 0.5639, "step": 8418 }, { "epoch": 0.86, "grad_norm": 1.445686583487393, "learning_rate": 5.074241142094094e-07, "loss": 0.6857, "step": 8419 }, { "epoch": 0.86, "grad_norm": 1.5774953072485935, "learning_rate": 5.06698505422753e-07, "loss": 0.5992, "step": 8420 }, { "epoch": 0.86, "grad_norm": 1.7604019441754637, "learning_rate": 5.059733881202567e-07, "loss": 0.6492, "step": 8421 }, { "epoch": 0.86, "grad_norm": 1.5078573310082273, "learning_rate": 5.052487623812363e-07, "loss": 0.6417, "step": 8422 }, { "epoch": 0.86, "grad_norm": 1.367924700155452, "learning_rate": 5.045246282849519e-07, "loss": 0.5716, "step": 8423 }, { "epoch": 0.86, "grad_norm": 1.3826751445454568, "learning_rate": 5.038009859106091e-07, "loss": 0.6268, "step": 8424 }, { "epoch": 0.86, "grad_norm": 1.5335676329110162, "learning_rate": 5.030778353373633e-07, "loss": 0.585, "step": 8425 }, { "epoch": 0.86, "grad_norm": 1.6689232411767958, "learning_rate": 5.023551766443119e-07, "loss": 0.6941, "step": 8426 }, { "epoch": 0.86, "grad_norm": 1.581253951546467, "learning_rate": 5.016330099105015e-07, "loss": 0.6675, "step": 8427 }, { "epoch": 0.86, "grad_norm": 1.5382610284296834, "learning_rate": 5.009113352149242e-07, "loss": 0.703, "step": 8428 }, { "epoch": 0.86, "grad_norm": 1.5334100209142572, "learning_rate": 5.001901526365161e-07, "loss": 0.6016, "step": 8429 }, { "epoch": 0.86, "grad_norm": 1.56449396147012, "learning_rate": 4.994694622541618e-07, "loss": 0.6035, "step": 8430 }, { "epoch": 0.86, "grad_norm": 1.4210498439519235, "learning_rate": 4.987492641466935e-07, "loss": 0.5861, "step": 8431 }, { "epoch": 0.86, "grad_norm": 1.4492890238519727, "learning_rate": 4.980295583928856e-07, "loss": 0.6719, "step": 8432 }, { "epoch": 0.86, "grad_norm": 1.6558981305712466, "learning_rate": 4.973103450714606e-07, "loss": 0.6217, "step": 8433 }, { "epoch": 0.86, "grad_norm": 1.5080370841759965, "learning_rate": 4.965916242610885e-07, "loss": 0.5783, "step": 8434 }, { "epoch": 0.86, "grad_norm": 1.4625096609968042, "learning_rate": 4.958733960403822e-07, "loss": 0.6632, "step": 8435 }, { "epoch": 0.86, "grad_norm": 1.675057974507685, "learning_rate": 4.951556604879049e-07, "loss": 0.6772, "step": 8436 }, { "epoch": 0.86, "grad_norm": 1.42232733461508, "learning_rate": 4.94438417682162e-07, "loss": 0.6572, "step": 8437 }, { "epoch": 0.86, "grad_norm": 1.5886780109890544, "learning_rate": 4.93721667701606e-07, "loss": 0.5818, "step": 8438 }, { "epoch": 0.86, "grad_norm": 1.3643713231149859, "learning_rate": 4.930054106246379e-07, "loss": 0.5784, "step": 8439 }, { "epoch": 0.86, "grad_norm": 1.5710910515530054, "learning_rate": 4.922896465296012e-07, "loss": 0.658, "step": 8440 }, { "epoch": 0.86, "grad_norm": 1.462188394505344, "learning_rate": 4.915743754947894e-07, "loss": 0.6105, "step": 8441 }, { "epoch": 0.86, "grad_norm": 1.5487213573950944, "learning_rate": 4.908595975984387e-07, "loss": 0.6271, "step": 8442 }, { "epoch": 0.86, "grad_norm": 1.5832100285907333, "learning_rate": 4.901453129187311e-07, "loss": 0.6873, "step": 8443 }, { "epoch": 0.86, "grad_norm": 1.6660616511783934, "learning_rate": 4.894315215337986e-07, "loss": 0.7494, "step": 8444 }, { "epoch": 0.86, "grad_norm": 1.5959128515463958, "learning_rate": 4.887182235217164e-07, "loss": 0.6212, "step": 8445 }, { "epoch": 0.86, "grad_norm": 1.5020296807132572, "learning_rate": 4.880054189605055e-07, "loss": 0.6798, "step": 8446 }, { "epoch": 0.86, "grad_norm": 1.5034935351561352, "learning_rate": 4.872931079281335e-07, "loss": 0.6304, "step": 8447 }, { "epoch": 0.86, "grad_norm": 1.3878848045672456, "learning_rate": 4.865812905025135e-07, "loss": 0.5698, "step": 8448 }, { "epoch": 0.86, "grad_norm": 1.754333587773369, "learning_rate": 4.858699667615058e-07, "loss": 0.6809, "step": 8449 }, { "epoch": 0.86, "grad_norm": 1.5245078169595585, "learning_rate": 4.851591367829172e-07, "loss": 0.6445, "step": 8450 }, { "epoch": 0.86, "grad_norm": 1.3127562945026214, "learning_rate": 4.84448800644498e-07, "loss": 0.627, "step": 8451 }, { "epoch": 0.86, "grad_norm": 1.4490648983641954, "learning_rate": 4.837389584239455e-07, "loss": 0.5932, "step": 8452 }, { "epoch": 0.86, "grad_norm": 1.5368399586718544, "learning_rate": 4.830296101989052e-07, "loss": 0.6125, "step": 8453 }, { "epoch": 0.86, "grad_norm": 1.558088239447658, "learning_rate": 4.823207560469639e-07, "loss": 0.6554, "step": 8454 }, { "epoch": 0.86, "grad_norm": 1.5133157008230584, "learning_rate": 4.816123960456603e-07, "loss": 0.528, "step": 8455 }, { "epoch": 0.86, "grad_norm": 1.6534927323522133, "learning_rate": 4.80904530272474e-07, "loss": 0.5984, "step": 8456 }, { "epoch": 0.86, "grad_norm": 1.5754489016610482, "learning_rate": 4.801971588048321e-07, "loss": 0.6827, "step": 8457 }, { "epoch": 0.86, "grad_norm": 1.4366110972307613, "learning_rate": 4.79490281720108e-07, "loss": 0.7227, "step": 8458 }, { "epoch": 0.86, "grad_norm": 1.4010869361578602, "learning_rate": 4.787838990956228e-07, "loss": 0.6487, "step": 8459 }, { "epoch": 0.86, "grad_norm": 1.460941083738212, "learning_rate": 4.780780110086408e-07, "loss": 0.5043, "step": 8460 }, { "epoch": 0.86, "grad_norm": 1.3970990386969422, "learning_rate": 4.77372617536373e-07, "loss": 0.5989, "step": 8461 }, { "epoch": 0.86, "grad_norm": 1.6592137310692616, "learning_rate": 4.766677187559748e-07, "loss": 0.6558, "step": 8462 }, { "epoch": 0.86, "grad_norm": 1.5427088413656924, "learning_rate": 4.7596331474455084e-07, "loss": 0.6318, "step": 8463 }, { "epoch": 0.86, "grad_norm": 1.4189917408660464, "learning_rate": 4.752594055791504e-07, "loss": 0.6034, "step": 8464 }, { "epoch": 0.86, "grad_norm": 1.4840020804169276, "learning_rate": 4.7455599133676754e-07, "loss": 0.5949, "step": 8465 }, { "epoch": 0.86, "grad_norm": 1.4461928190250237, "learning_rate": 4.7385307209434126e-07, "loss": 0.6566, "step": 8466 }, { "epoch": 0.86, "grad_norm": 1.597319394935247, "learning_rate": 4.7315064792876066e-07, "loss": 0.6622, "step": 8467 }, { "epoch": 0.86, "grad_norm": 1.7135659535489625, "learning_rate": 4.7244871891685587e-07, "loss": 0.7255, "step": 8468 }, { "epoch": 0.86, "grad_norm": 1.5636513459322416, "learning_rate": 4.71747285135406e-07, "loss": 0.7464, "step": 8469 }, { "epoch": 0.86, "grad_norm": 1.6474375973524755, "learning_rate": 4.710463466611348e-07, "loss": 0.6775, "step": 8470 }, { "epoch": 0.87, "grad_norm": 1.7739683937638735, "learning_rate": 4.703459035707109e-07, "loss": 0.637, "step": 8471 }, { "epoch": 0.87, "grad_norm": 1.3748485030061934, "learning_rate": 4.696459559407507e-07, "loss": 0.5265, "step": 8472 }, { "epoch": 0.87, "grad_norm": 1.3391793991427794, "learning_rate": 4.689465038478164e-07, "loss": 0.5467, "step": 8473 }, { "epoch": 0.87, "grad_norm": 1.692941177338358, "learning_rate": 4.682475473684145e-07, "loss": 0.5626, "step": 8474 }, { "epoch": 0.87, "grad_norm": 1.458664976187386, "learning_rate": 4.675490865789978e-07, "loss": 0.6306, "step": 8475 }, { "epoch": 0.87, "grad_norm": 1.430614493473169, "learning_rate": 4.6685112155596343e-07, "loss": 0.7223, "step": 8476 }, { "epoch": 0.87, "grad_norm": 1.3203214362277875, "learning_rate": 4.6615365237565767e-07, "loss": 0.5861, "step": 8477 }, { "epoch": 0.87, "grad_norm": 1.5458834136061679, "learning_rate": 4.6545667911437164e-07, "loss": 0.5775, "step": 8478 }, { "epoch": 0.87, "grad_norm": 1.5155866592058398, "learning_rate": 4.647602018483399e-07, "loss": 0.5468, "step": 8479 }, { "epoch": 0.87, "grad_norm": 1.6399885012808195, "learning_rate": 4.640642206537438e-07, "loss": 0.6576, "step": 8480 }, { "epoch": 0.87, "grad_norm": 1.5513697505521171, "learning_rate": 4.6336873560671235e-07, "loss": 0.6814, "step": 8481 }, { "epoch": 0.87, "grad_norm": 1.5335836981001356, "learning_rate": 4.62673746783317e-07, "loss": 0.6753, "step": 8482 }, { "epoch": 0.87, "grad_norm": 1.6319051149532722, "learning_rate": 4.61979254259578e-07, "loss": 0.6761, "step": 8483 }, { "epoch": 0.87, "grad_norm": 1.6332293984686868, "learning_rate": 4.612852581114602e-07, "loss": 0.6787, "step": 8484 }, { "epoch": 0.87, "grad_norm": 1.7774486613183855, "learning_rate": 4.605917584148717e-07, "loss": 0.6369, "step": 8485 }, { "epoch": 0.87, "grad_norm": 1.6628224816093866, "learning_rate": 4.5989875524567075e-07, "loss": 0.7848, "step": 8486 }, { "epoch": 0.87, "grad_norm": 1.6799645440966924, "learning_rate": 4.5920624867965944e-07, "loss": 0.6653, "step": 8487 }, { "epoch": 0.87, "grad_norm": 1.5919378723327813, "learning_rate": 4.5851423879258394e-07, "loss": 0.6268, "step": 8488 }, { "epoch": 0.87, "grad_norm": 1.7888346254465557, "learning_rate": 4.578227256601381e-07, "loss": 0.6738, "step": 8489 }, { "epoch": 0.87, "grad_norm": 1.54433657129782, "learning_rate": 4.571317093579586e-07, "loss": 0.6881, "step": 8490 }, { "epoch": 0.87, "grad_norm": 1.353906829113509, "learning_rate": 4.564411899616322e-07, "loss": 0.6508, "step": 8491 }, { "epoch": 0.87, "grad_norm": 1.5837974731075133, "learning_rate": 4.5575116754668913e-07, "loss": 0.6029, "step": 8492 }, { "epoch": 0.87, "grad_norm": 1.5897205966387224, "learning_rate": 4.5506164218860393e-07, "loss": 0.6168, "step": 8493 }, { "epoch": 0.87, "grad_norm": 1.4384715282763412, "learning_rate": 4.5437261396279743e-07, "loss": 0.6168, "step": 8494 }, { "epoch": 0.87, "grad_norm": 1.542944154745217, "learning_rate": 4.5368408294463873e-07, "loss": 0.6011, "step": 8495 }, { "epoch": 0.87, "grad_norm": 1.3827247594714196, "learning_rate": 4.5299604920943764e-07, "loss": 0.6287, "step": 8496 }, { "epoch": 0.87, "grad_norm": 1.61160282212534, "learning_rate": 4.5230851283245504e-07, "loss": 0.6354, "step": 8497 }, { "epoch": 0.87, "grad_norm": 1.5428860139301381, "learning_rate": 4.51621473888893e-07, "loss": 0.5523, "step": 8498 }, { "epoch": 0.87, "grad_norm": 1.5346836639637262, "learning_rate": 4.5093493245390084e-07, "loss": 0.6066, "step": 8499 }, { "epoch": 0.87, "grad_norm": 1.6429685559385092, "learning_rate": 4.5024888860257356e-07, "loss": 0.8056, "step": 8500 }, { "epoch": 0.87, "grad_norm": 1.4195621406818772, "learning_rate": 4.4956334240995326e-07, "loss": 0.6616, "step": 8501 }, { "epoch": 0.87, "grad_norm": 1.5433139633249318, "learning_rate": 4.48878293951025e-07, "loss": 0.5949, "step": 8502 }, { "epoch": 0.87, "grad_norm": 1.4397805436489464, "learning_rate": 4.4819374330072054e-07, "loss": 0.5776, "step": 8503 }, { "epoch": 0.87, "grad_norm": 1.6473476991789247, "learning_rate": 4.4750969053391547e-07, "loss": 0.6929, "step": 8504 }, { "epoch": 0.87, "grad_norm": 1.6442674125676515, "learning_rate": 4.468261357254339e-07, "loss": 0.67, "step": 8505 }, { "epoch": 0.87, "grad_norm": 1.4224783361588944, "learning_rate": 4.461430789500454e-07, "loss": 0.5942, "step": 8506 }, { "epoch": 0.87, "grad_norm": 1.6880933518493528, "learning_rate": 4.454605202824619e-07, "loss": 0.5387, "step": 8507 }, { "epoch": 0.87, "grad_norm": 1.59587567065954, "learning_rate": 4.4477845979734256e-07, "loss": 0.6513, "step": 8508 }, { "epoch": 0.87, "grad_norm": 1.7092264945169708, "learning_rate": 4.4409689756929384e-07, "loss": 0.6539, "step": 8509 }, { "epoch": 0.87, "grad_norm": 1.5276980861778566, "learning_rate": 4.4341583367286445e-07, "loss": 0.6147, "step": 8510 }, { "epoch": 0.87, "grad_norm": 1.47863599208939, "learning_rate": 4.427352681825514e-07, "loss": 0.5706, "step": 8511 }, { "epoch": 0.87, "grad_norm": 1.4273847060087643, "learning_rate": 4.4205520117279524e-07, "loss": 0.5508, "step": 8512 }, { "epoch": 0.87, "grad_norm": 1.885419929401766, "learning_rate": 4.4137563271798243e-07, "loss": 0.6699, "step": 8513 }, { "epoch": 0.87, "grad_norm": 1.5081778089423838, "learning_rate": 4.4069656289244587e-07, "loss": 0.6465, "step": 8514 }, { "epoch": 0.87, "grad_norm": 1.6161570589467904, "learning_rate": 4.4001799177046335e-07, "loss": 0.6428, "step": 8515 }, { "epoch": 0.87, "grad_norm": 1.577834714413167, "learning_rate": 4.393399194262582e-07, "loss": 0.6698, "step": 8516 }, { "epoch": 0.87, "grad_norm": 1.4032961142418996, "learning_rate": 4.3866234593399837e-07, "loss": 0.7608, "step": 8517 }, { "epoch": 0.87, "grad_norm": 1.6107993028219547, "learning_rate": 4.3798527136779734e-07, "loss": 0.6779, "step": 8518 }, { "epoch": 0.87, "grad_norm": 1.4820120880197476, "learning_rate": 4.3730869580171475e-07, "loss": 0.6072, "step": 8519 }, { "epoch": 0.87, "grad_norm": 1.6876305823799242, "learning_rate": 4.3663261930975755e-07, "loss": 0.6619, "step": 8520 }, { "epoch": 0.87, "grad_norm": 1.518273436230212, "learning_rate": 4.3595704196587427e-07, "loss": 0.5856, "step": 8521 }, { "epoch": 0.87, "grad_norm": 1.5632615588971965, "learning_rate": 4.352819638439598e-07, "loss": 0.7224, "step": 8522 }, { "epoch": 0.87, "grad_norm": 1.6022388263522909, "learning_rate": 4.346073850178567e-07, "loss": 0.5797, "step": 8523 }, { "epoch": 0.87, "grad_norm": 1.5492346035249498, "learning_rate": 4.339333055613504e-07, "loss": 0.6679, "step": 8524 }, { "epoch": 0.87, "grad_norm": 1.616669781783032, "learning_rate": 4.3325972554817406e-07, "loss": 0.6282, "step": 8525 }, { "epoch": 0.87, "grad_norm": 1.6828718208759301, "learning_rate": 4.325866450520044e-07, "loss": 0.635, "step": 8526 }, { "epoch": 0.87, "grad_norm": 1.2755560401703436, "learning_rate": 4.319140641464625e-07, "loss": 0.5693, "step": 8527 }, { "epoch": 0.87, "grad_norm": 1.4252349121336336, "learning_rate": 4.312419829051173e-07, "loss": 0.6017, "step": 8528 }, { "epoch": 0.87, "grad_norm": 1.6173636080998701, "learning_rate": 4.3057040140148334e-07, "loss": 0.5334, "step": 8529 }, { "epoch": 0.87, "grad_norm": 1.5496598265641912, "learning_rate": 4.298993197090179e-07, "loss": 0.6994, "step": 8530 }, { "epoch": 0.87, "grad_norm": 1.5028124031234145, "learning_rate": 4.2922873790112565e-07, "loss": 0.6145, "step": 8531 }, { "epoch": 0.87, "grad_norm": 1.6905607908308546, "learning_rate": 4.28558656051154e-07, "loss": 0.6329, "step": 8532 }, { "epoch": 0.87, "grad_norm": 1.4264927842554191, "learning_rate": 4.2788907423239877e-07, "loss": 0.7211, "step": 8533 }, { "epoch": 0.87, "grad_norm": 1.4872979221084797, "learning_rate": 4.272199925181014e-07, "loss": 0.6704, "step": 8534 }, { "epoch": 0.87, "grad_norm": 1.4478114396137431, "learning_rate": 4.2655141098144506e-07, "loss": 0.6818, "step": 8535 }, { "epoch": 0.87, "grad_norm": 1.5473425532419516, "learning_rate": 4.2588332969556e-07, "loss": 0.6293, "step": 8536 }, { "epoch": 0.87, "grad_norm": 1.4273218847049984, "learning_rate": 4.2521574873352346e-07, "loss": 0.5503, "step": 8537 }, { "epoch": 0.87, "grad_norm": 1.4837166582226906, "learning_rate": 4.245486681683547e-07, "loss": 0.6369, "step": 8538 }, { "epoch": 0.87, "grad_norm": 1.3794258242387318, "learning_rate": 4.2388208807302156e-07, "loss": 0.5857, "step": 8539 }, { "epoch": 0.87, "grad_norm": 1.5010525231250929, "learning_rate": 4.2321600852043623e-07, "loss": 0.6457, "step": 8540 }, { "epoch": 0.87, "grad_norm": 1.4471814524994056, "learning_rate": 4.225504295834532e-07, "loss": 0.6683, "step": 8541 }, { "epoch": 0.87, "grad_norm": 1.515963485324267, "learning_rate": 4.2188535133487527e-07, "loss": 0.6929, "step": 8542 }, { "epoch": 0.87, "grad_norm": 1.4884729488303592, "learning_rate": 4.2122077384745107e-07, "loss": 0.6763, "step": 8543 }, { "epoch": 0.87, "grad_norm": 1.8431039617975506, "learning_rate": 4.205566971938718e-07, "loss": 0.6328, "step": 8544 }, { "epoch": 0.87, "grad_norm": 1.4275219760888338, "learning_rate": 4.198931214467755e-07, "loss": 0.5303, "step": 8545 }, { "epoch": 0.87, "grad_norm": 1.879440199704678, "learning_rate": 4.192300466787447e-07, "loss": 0.7345, "step": 8546 }, { "epoch": 0.87, "grad_norm": 1.3799759617687042, "learning_rate": 4.1856747296230737e-07, "loss": 0.6239, "step": 8547 }, { "epoch": 0.87, "grad_norm": 1.5935499388212169, "learning_rate": 4.179054003699384e-07, "loss": 0.7182, "step": 8548 }, { "epoch": 0.87, "grad_norm": 1.377044150472, "learning_rate": 4.1724382897405545e-07, "loss": 0.5677, "step": 8549 }, { "epoch": 0.87, "grad_norm": 1.5250990579396007, "learning_rate": 4.1658275884702113e-07, "loss": 0.6724, "step": 8550 }, { "epoch": 0.87, "grad_norm": 1.5387096268282987, "learning_rate": 4.1592219006114597e-07, "loss": 0.5934, "step": 8551 }, { "epoch": 0.87, "grad_norm": 1.481874309599043, "learning_rate": 4.1526212268868215e-07, "loss": 0.6746, "step": 8552 }, { "epoch": 0.87, "grad_norm": 1.4673180627920306, "learning_rate": 4.1460255680183027e-07, "loss": 0.6058, "step": 8553 }, { "epoch": 0.87, "grad_norm": 1.4873077058765856, "learning_rate": 4.139434924727359e-07, "loss": 0.5924, "step": 8554 }, { "epoch": 0.87, "grad_norm": 1.7473788280739069, "learning_rate": 4.132849297734848e-07, "loss": 0.694, "step": 8555 }, { "epoch": 0.87, "grad_norm": 1.541861047221313, "learning_rate": 4.1262686877611424e-07, "loss": 0.7072, "step": 8556 }, { "epoch": 0.87, "grad_norm": 1.6161143639681346, "learning_rate": 4.119693095526034e-07, "loss": 0.6176, "step": 8557 }, { "epoch": 0.87, "grad_norm": 1.5197365610915037, "learning_rate": 4.11312252174878e-07, "loss": 0.5869, "step": 8558 }, { "epoch": 0.87, "grad_norm": 1.5978992959317566, "learning_rate": 4.106556967148062e-07, "loss": 0.6184, "step": 8559 }, { "epoch": 0.87, "grad_norm": 1.5722141801546299, "learning_rate": 4.0999964324420327e-07, "loss": 0.6968, "step": 8560 }, { "epoch": 0.87, "grad_norm": 1.540673889305145, "learning_rate": 4.0934409183483017e-07, "loss": 0.7268, "step": 8561 }, { "epoch": 0.87, "grad_norm": 1.4307890561284198, "learning_rate": 4.086890425583928e-07, "loss": 0.662, "step": 8562 }, { "epoch": 0.87, "grad_norm": 1.6849399881480602, "learning_rate": 4.0803449548654005e-07, "loss": 0.6119, "step": 8563 }, { "epoch": 0.87, "grad_norm": 1.5796712103006285, "learning_rate": 4.073804506908674e-07, "loss": 0.6812, "step": 8564 }, { "epoch": 0.87, "grad_norm": 1.556044524714824, "learning_rate": 4.067269082429159e-07, "loss": 0.6491, "step": 8565 }, { "epoch": 0.87, "grad_norm": 2.312664962901317, "learning_rate": 4.0607386821417015e-07, "loss": 0.6032, "step": 8566 }, { "epoch": 0.87, "grad_norm": 1.651986225115617, "learning_rate": 4.0542133067606125e-07, "loss": 0.7555, "step": 8567 }, { "epoch": 0.87, "grad_norm": 1.4129469245926871, "learning_rate": 4.047692956999666e-07, "loss": 0.6814, "step": 8568 }, { "epoch": 0.88, "grad_norm": 1.4451899518621452, "learning_rate": 4.04117763357203e-07, "loss": 0.6328, "step": 8569 }, { "epoch": 0.88, "grad_norm": 1.699394856235744, "learning_rate": 4.0346673371903897e-07, "loss": 0.6025, "step": 8570 }, { "epoch": 0.88, "grad_norm": 1.4395065829887383, "learning_rate": 4.028162068566832e-07, "loss": 0.5983, "step": 8571 }, { "epoch": 0.88, "grad_norm": 1.601163915787562, "learning_rate": 4.0216618284129205e-07, "loss": 0.5696, "step": 8572 }, { "epoch": 0.88, "grad_norm": 1.4795660386185845, "learning_rate": 4.0151666174396807e-07, "loss": 0.7488, "step": 8573 }, { "epoch": 0.88, "grad_norm": 1.4587856206560228, "learning_rate": 4.008676436357539e-07, "loss": 0.5998, "step": 8574 }, { "epoch": 0.88, "grad_norm": 1.5536593825866913, "learning_rate": 4.002191285876411e-07, "loss": 0.626, "step": 8575 }, { "epoch": 0.88, "grad_norm": 1.5806960442990226, "learning_rate": 3.9957111667056623e-07, "loss": 0.6822, "step": 8576 }, { "epoch": 0.88, "grad_norm": 1.5640506343837917, "learning_rate": 3.9892360795540873e-07, "loss": 0.5498, "step": 8577 }, { "epoch": 0.88, "grad_norm": 2.512141091180076, "learning_rate": 3.982766025129942e-07, "loss": 0.653, "step": 8578 }, { "epoch": 0.88, "grad_norm": 1.6975644330062964, "learning_rate": 3.9763010041409364e-07, "loss": 0.6453, "step": 8579 }, { "epoch": 0.88, "grad_norm": 1.472169771543175, "learning_rate": 3.9698410172942126e-07, "loss": 0.7054, "step": 8580 }, { "epoch": 0.88, "grad_norm": 1.4929915998385648, "learning_rate": 3.9633860652963873e-07, "loss": 0.6917, "step": 8581 }, { "epoch": 0.88, "grad_norm": 1.363002527889631, "learning_rate": 3.9569361488535187e-07, "loss": 0.5585, "step": 8582 }, { "epoch": 0.88, "grad_norm": 1.5565912906584431, "learning_rate": 3.950491268671086e-07, "loss": 0.5502, "step": 8583 }, { "epoch": 0.88, "grad_norm": 1.5251959201462355, "learning_rate": 3.9440514254540595e-07, "loss": 0.6029, "step": 8584 }, { "epoch": 0.88, "grad_norm": 1.4125524797534288, "learning_rate": 3.93761661990682e-07, "loss": 0.6298, "step": 8585 }, { "epoch": 0.88, "grad_norm": 1.5197917771547507, "learning_rate": 3.9311868527332267e-07, "loss": 0.6257, "step": 8586 }, { "epoch": 0.88, "grad_norm": 1.5428256193032435, "learning_rate": 3.9247621246365996e-07, "loss": 0.7182, "step": 8587 }, { "epoch": 0.88, "grad_norm": 1.4494417540969577, "learning_rate": 3.918342436319644e-07, "loss": 0.6442, "step": 8588 }, { "epoch": 0.88, "grad_norm": 1.5409043386474297, "learning_rate": 3.911927788484576e-07, "loss": 0.6791, "step": 8589 }, { "epoch": 0.88, "grad_norm": 1.3610891630214024, "learning_rate": 3.905518181833046e-07, "loss": 0.6506, "step": 8590 }, { "epoch": 0.88, "grad_norm": 1.622783620005543, "learning_rate": 3.8991136170661414e-07, "loss": 0.7179, "step": 8591 }, { "epoch": 0.88, "grad_norm": 1.4907920590888608, "learning_rate": 3.8927140948843924e-07, "loss": 0.5636, "step": 8592 }, { "epoch": 0.88, "grad_norm": 1.4052935206224129, "learning_rate": 3.886319615987805e-07, "loss": 0.53, "step": 8593 }, { "epoch": 0.88, "grad_norm": 1.5737558355038874, "learning_rate": 3.879930181075797e-07, "loss": 0.6139, "step": 8594 }, { "epoch": 0.88, "grad_norm": 1.8228925727244438, "learning_rate": 3.873545790847272e-07, "loss": 0.6104, "step": 8595 }, { "epoch": 0.88, "grad_norm": 1.7079118253640206, "learning_rate": 3.86716644600057e-07, "loss": 0.7732, "step": 8596 }, { "epoch": 0.88, "grad_norm": 1.5203642620545275, "learning_rate": 3.86079214723345e-07, "loss": 0.6964, "step": 8597 }, { "epoch": 0.88, "grad_norm": 1.3651613283892061, "learning_rate": 3.854422895243165e-07, "loss": 0.6204, "step": 8598 }, { "epoch": 0.88, "grad_norm": 1.5167635256921137, "learning_rate": 3.8480586907263695e-07, "loss": 0.5953, "step": 8599 }, { "epoch": 0.88, "grad_norm": 1.4115271075713707, "learning_rate": 3.841699534379206e-07, "loss": 0.6252, "step": 8600 }, { "epoch": 0.88, "grad_norm": 1.5307070675475345, "learning_rate": 3.8353454268972634e-07, "loss": 0.6117, "step": 8601 }, { "epoch": 0.88, "grad_norm": 1.5096343295431656, "learning_rate": 3.8289963689755293e-07, "loss": 0.5504, "step": 8602 }, { "epoch": 0.88, "grad_norm": 1.4520841812560699, "learning_rate": 3.822652361308493e-07, "loss": 0.5897, "step": 8603 }, { "epoch": 0.88, "grad_norm": 1.5551489846667013, "learning_rate": 3.8163134045900774e-07, "loss": 0.5861, "step": 8604 }, { "epoch": 0.88, "grad_norm": 1.8478686225240697, "learning_rate": 3.809979499513633e-07, "loss": 0.6891, "step": 8605 }, { "epoch": 0.88, "grad_norm": 1.3847021466517988, "learning_rate": 3.8036506467719834e-07, "loss": 0.6781, "step": 8606 }, { "epoch": 0.88, "grad_norm": 1.5727982652783439, "learning_rate": 3.797326847057381e-07, "loss": 0.6972, "step": 8607 }, { "epoch": 0.88, "grad_norm": 1.7002089206494682, "learning_rate": 3.7910081010615274e-07, "loss": 0.6758, "step": 8608 }, { "epoch": 0.88, "grad_norm": 1.3946204165548444, "learning_rate": 3.7846944094755867e-07, "loss": 0.5777, "step": 8609 }, { "epoch": 0.88, "grad_norm": 1.8875356247551716, "learning_rate": 3.778385772990173e-07, "loss": 0.6812, "step": 8610 }, { "epoch": 0.88, "grad_norm": 1.272038543504152, "learning_rate": 3.772082192295301e-07, "loss": 0.6312, "step": 8611 }, { "epoch": 0.88, "grad_norm": 1.524095294153642, "learning_rate": 3.765783668080497e-07, "loss": 0.6686, "step": 8612 }, { "epoch": 0.88, "grad_norm": 2.3212636150345807, "learning_rate": 3.759490201034677e-07, "loss": 0.6537, "step": 8613 }, { "epoch": 0.88, "grad_norm": 1.4634024608931613, "learning_rate": 3.753201791846245e-07, "loss": 0.6528, "step": 8614 }, { "epoch": 0.88, "grad_norm": 1.4724959598617737, "learning_rate": 3.7469184412030514e-07, "loss": 0.7003, "step": 8615 }, { "epoch": 0.88, "grad_norm": 1.3392976813651607, "learning_rate": 3.7406401497923404e-07, "loss": 0.554, "step": 8616 }, { "epoch": 0.88, "grad_norm": 1.3218672826676647, "learning_rate": 3.734366918300869e-07, "loss": 0.59, "step": 8617 }, { "epoch": 0.88, "grad_norm": 1.4920908297732736, "learning_rate": 3.7280987474148103e-07, "loss": 0.6044, "step": 8618 }, { "epoch": 0.88, "grad_norm": 1.76321108867294, "learning_rate": 3.7218356378197817e-07, "loss": 0.6913, "step": 8619 }, { "epoch": 0.88, "grad_norm": 1.594586136337676, "learning_rate": 3.7155775902008527e-07, "loss": 0.5576, "step": 8620 }, { "epoch": 0.88, "grad_norm": 1.52982886232493, "learning_rate": 3.7093246052425424e-07, "loss": 0.6486, "step": 8621 }, { "epoch": 0.88, "grad_norm": 1.4956332669913441, "learning_rate": 3.703076683628798e-07, "loss": 0.6525, "step": 8622 }, { "epoch": 0.88, "grad_norm": 1.4354176847634748, "learning_rate": 3.696833826043039e-07, "loss": 0.6174, "step": 8623 }, { "epoch": 0.88, "grad_norm": 1.3593581050050858, "learning_rate": 3.6905960331681254e-07, "loss": 0.651, "step": 8624 }, { "epoch": 0.88, "grad_norm": 1.3955901042381713, "learning_rate": 3.6843633056863393e-07, "loss": 0.5996, "step": 8625 }, { "epoch": 0.88, "grad_norm": 1.519980758862165, "learning_rate": 3.6781356442794346e-07, "loss": 0.6742, "step": 8626 }, { "epoch": 0.88, "grad_norm": 1.638379661190817, "learning_rate": 3.671913049628595e-07, "loss": 0.7138, "step": 8627 }, { "epoch": 0.88, "grad_norm": 1.4166031732152395, "learning_rate": 3.665695522414464e-07, "loss": 0.4945, "step": 8628 }, { "epoch": 0.88, "grad_norm": 1.599412580840232, "learning_rate": 3.659483063317143e-07, "loss": 0.5337, "step": 8629 }, { "epoch": 0.88, "grad_norm": 1.480428026935247, "learning_rate": 3.653275673016121e-07, "loss": 0.6105, "step": 8630 }, { "epoch": 0.88, "grad_norm": 1.3574622142786759, "learning_rate": 3.6470733521903945e-07, "loss": 0.7593, "step": 8631 }, { "epoch": 0.88, "grad_norm": 1.3832280978167601, "learning_rate": 3.640876101518387e-07, "loss": 0.6865, "step": 8632 }, { "epoch": 0.88, "grad_norm": 1.5571358612143693, "learning_rate": 3.6346839216779506e-07, "loss": 0.696, "step": 8633 }, { "epoch": 0.88, "grad_norm": 1.621904919256557, "learning_rate": 3.628496813346405e-07, "loss": 0.63, "step": 8634 }, { "epoch": 0.88, "grad_norm": 1.3802106175656896, "learning_rate": 3.622314777200503e-07, "loss": 0.6138, "step": 8635 }, { "epoch": 0.88, "grad_norm": 1.423662404152667, "learning_rate": 3.616137813916437e-07, "loss": 0.6318, "step": 8636 }, { "epoch": 0.88, "grad_norm": 1.6512580882233656, "learning_rate": 3.6099659241698616e-07, "loss": 0.5163, "step": 8637 }, { "epoch": 0.88, "grad_norm": 1.5097647918912873, "learning_rate": 3.6037991086358693e-07, "loss": 0.6254, "step": 8638 }, { "epoch": 0.88, "grad_norm": 1.4638389286584115, "learning_rate": 3.5976373679889933e-07, "loss": 0.6289, "step": 8639 }, { "epoch": 0.88, "grad_norm": 1.4694601898430253, "learning_rate": 3.5914807029032104e-07, "loss": 0.6591, "step": 8640 }, { "epoch": 0.88, "grad_norm": 1.679275901747964, "learning_rate": 3.585329114051944e-07, "loss": 0.643, "step": 8641 }, { "epoch": 0.88, "grad_norm": 1.5886383600616474, "learning_rate": 3.5791826021080665e-07, "loss": 0.6103, "step": 8642 }, { "epoch": 0.88, "grad_norm": 1.340088988346388, "learning_rate": 3.5730411677439125e-07, "loss": 0.5428, "step": 8643 }, { "epoch": 0.88, "grad_norm": 1.4550154246710745, "learning_rate": 3.566904811631211e-07, "loss": 0.6661, "step": 8644 }, { "epoch": 0.88, "grad_norm": 1.7038788185789233, "learning_rate": 3.560773534441175e-07, "loss": 0.7447, "step": 8645 }, { "epoch": 0.88, "grad_norm": 1.5731458423652698, "learning_rate": 3.5546473368444635e-07, "loss": 0.7273, "step": 8646 }, { "epoch": 0.88, "grad_norm": 1.6001812685283756, "learning_rate": 3.5485262195111557e-07, "loss": 0.6769, "step": 8647 }, { "epoch": 0.88, "grad_norm": 1.302831302762893, "learning_rate": 3.5424101831108006e-07, "loss": 0.6959, "step": 8648 }, { "epoch": 0.88, "grad_norm": 1.5000791354168832, "learning_rate": 3.5362992283123785e-07, "loss": 0.6061, "step": 8649 }, { "epoch": 0.88, "grad_norm": 1.5604711285404589, "learning_rate": 3.5301933557843006e-07, "loss": 0.6706, "step": 8650 }, { "epoch": 0.88, "grad_norm": 1.3739070836996923, "learning_rate": 3.524092566194448e-07, "loss": 0.5525, "step": 8651 }, { "epoch": 0.88, "grad_norm": 1.4807627273460275, "learning_rate": 3.5179968602101434e-07, "loss": 0.6455, "step": 8652 }, { "epoch": 0.88, "grad_norm": 1.327479908713475, "learning_rate": 3.511906238498136e-07, "loss": 0.6329, "step": 8653 }, { "epoch": 0.88, "grad_norm": 1.3476143168588963, "learning_rate": 3.5058207017246215e-07, "loss": 0.6105, "step": 8654 }, { "epoch": 0.88, "grad_norm": 1.5733431147121548, "learning_rate": 3.499740250555239e-07, "loss": 0.723, "step": 8655 }, { "epoch": 0.88, "grad_norm": 1.4049122235370552, "learning_rate": 3.4936648856550903e-07, "loss": 0.5648, "step": 8656 }, { "epoch": 0.88, "grad_norm": 1.3641606533625714, "learning_rate": 3.4875946076887214e-07, "loss": 0.6275, "step": 8657 }, { "epoch": 0.88, "grad_norm": 1.3939929021578548, "learning_rate": 3.4815294173200786e-07, "loss": 0.6112, "step": 8658 }, { "epoch": 0.88, "grad_norm": 1.4452317960656857, "learning_rate": 3.475469315212593e-07, "loss": 0.7136, "step": 8659 }, { "epoch": 0.88, "grad_norm": 1.5616360468114983, "learning_rate": 3.4694143020291395e-07, "loss": 0.6504, "step": 8660 }, { "epoch": 0.88, "grad_norm": 1.5355193682546455, "learning_rate": 3.4633643784320103e-07, "loss": 0.6644, "step": 8661 }, { "epoch": 0.88, "grad_norm": 1.653144563917384, "learning_rate": 3.4573195450829654e-07, "loss": 0.6816, "step": 8662 }, { "epoch": 0.88, "grad_norm": 1.5755693019077681, "learning_rate": 3.451279802643193e-07, "loss": 0.6763, "step": 8663 }, { "epoch": 0.88, "grad_norm": 1.4889530472462384, "learning_rate": 3.445245151773324e-07, "loss": 0.5905, "step": 8664 }, { "epoch": 0.88, "grad_norm": 1.538148848737708, "learning_rate": 3.4392155931334434e-07, "loss": 0.706, "step": 8665 }, { "epoch": 0.88, "grad_norm": 1.3255037494941424, "learning_rate": 3.433191127383079e-07, "loss": 0.7342, "step": 8666 }, { "epoch": 0.89, "grad_norm": 1.4123316709588598, "learning_rate": 3.427171755181191e-07, "loss": 0.6675, "step": 8667 }, { "epoch": 0.89, "grad_norm": 1.4432599563729913, "learning_rate": 3.421157477186182e-07, "loss": 0.5888, "step": 8668 }, { "epoch": 0.89, "grad_norm": 1.446284374298967, "learning_rate": 3.4151482940559086e-07, "loss": 0.6725, "step": 8669 }, { "epoch": 0.89, "grad_norm": 1.628769133575037, "learning_rate": 3.4091442064476565e-07, "loss": 0.5778, "step": 8670 }, { "epoch": 0.89, "grad_norm": 1.3488359436897266, "learning_rate": 3.403145215018183e-07, "loss": 0.6456, "step": 8671 }, { "epoch": 0.89, "grad_norm": 1.3153540752296937, "learning_rate": 3.397151320423647e-07, "loss": 0.6244, "step": 8672 }, { "epoch": 0.89, "grad_norm": 1.5714050909704476, "learning_rate": 3.3911625233196687e-07, "loss": 0.687, "step": 8673 }, { "epoch": 0.89, "grad_norm": 1.4301860892083673, "learning_rate": 3.385178824361329e-07, "loss": 0.5864, "step": 8674 }, { "epoch": 0.89, "grad_norm": 1.4052455151465293, "learning_rate": 3.3792002242031164e-07, "loss": 0.6752, "step": 8675 }, { "epoch": 0.89, "grad_norm": 1.4757924017479684, "learning_rate": 3.37322672349899e-07, "loss": 0.6552, "step": 8676 }, { "epoch": 0.89, "grad_norm": 3.3763513626755106, "learning_rate": 3.3672583229023446e-07, "loss": 0.5982, "step": 8677 }, { "epoch": 0.89, "grad_norm": 1.5581985170934305, "learning_rate": 3.361295023065991e-07, "loss": 0.4949, "step": 8678 }, { "epoch": 0.89, "grad_norm": 1.392100101688895, "learning_rate": 3.3553368246422245e-07, "loss": 0.5729, "step": 8679 }, { "epoch": 0.89, "grad_norm": 1.455535526579705, "learning_rate": 3.349383728282757e-07, "loss": 0.7141, "step": 8680 }, { "epoch": 0.89, "grad_norm": 1.7817456320561047, "learning_rate": 3.34343573463875e-07, "loss": 0.5129, "step": 8681 }, { "epoch": 0.89, "grad_norm": 1.641281205863446, "learning_rate": 3.337492844360801e-07, "loss": 0.6196, "step": 8682 }, { "epoch": 0.89, "grad_norm": 1.570765366190393, "learning_rate": 3.33155505809894e-07, "loss": 0.6357, "step": 8683 }, { "epoch": 0.89, "grad_norm": 1.4445892161344882, "learning_rate": 3.3256223765026685e-07, "loss": 0.5999, "step": 8684 }, { "epoch": 0.89, "grad_norm": 1.4763301033248353, "learning_rate": 3.319694800220907e-07, "loss": 0.6803, "step": 8685 }, { "epoch": 0.89, "grad_norm": 1.4878457877016755, "learning_rate": 3.3137723299020265e-07, "loss": 0.7034, "step": 8686 }, { "epoch": 0.89, "grad_norm": 1.425741684797958, "learning_rate": 3.307854966193824e-07, "loss": 0.65, "step": 8687 }, { "epoch": 0.89, "grad_norm": 1.4195643118451682, "learning_rate": 3.301942709743566e-07, "loss": 0.6196, "step": 8688 }, { "epoch": 0.89, "grad_norm": 1.5749198261708197, "learning_rate": 3.2960355611979245e-07, "loss": 0.682, "step": 8689 }, { "epoch": 0.89, "grad_norm": 1.7099940440786943, "learning_rate": 3.290133521203054e-07, "loss": 0.6809, "step": 8690 }, { "epoch": 0.89, "grad_norm": 1.5595138921334784, "learning_rate": 3.2842365904045213e-07, "loss": 0.7685, "step": 8691 }, { "epoch": 0.89, "grad_norm": 1.3079512277335628, "learning_rate": 3.2783447694473224e-07, "loss": 0.5932, "step": 8692 }, { "epoch": 0.89, "grad_norm": 1.5535113143849646, "learning_rate": 3.2724580589759414e-07, "loss": 0.6456, "step": 8693 }, { "epoch": 0.89, "grad_norm": 1.21606565305491, "learning_rate": 3.266576459634257e-07, "loss": 0.6291, "step": 8694 }, { "epoch": 0.89, "grad_norm": 1.7221652455148473, "learning_rate": 3.2606999720656227e-07, "loss": 0.6395, "step": 8695 }, { "epoch": 0.89, "grad_norm": 1.4661305473148962, "learning_rate": 3.254828596912807e-07, "loss": 0.5872, "step": 8696 }, { "epoch": 0.89, "grad_norm": 2.1099216375761913, "learning_rate": 3.2489623348180287e-07, "loss": 0.6295, "step": 8697 }, { "epoch": 0.89, "grad_norm": 1.2227763831188472, "learning_rate": 3.243101186422948e-07, "loss": 0.6445, "step": 8698 }, { "epoch": 0.89, "grad_norm": 1.5312051616837432, "learning_rate": 3.2372451523686785e-07, "loss": 0.5656, "step": 8699 }, { "epoch": 0.89, "grad_norm": 1.3319742413679168, "learning_rate": 3.231394233295754e-07, "loss": 0.4846, "step": 8700 }, { "epoch": 0.89, "grad_norm": 1.3438593074995324, "learning_rate": 3.22554842984415e-07, "loss": 0.6042, "step": 8701 }, { "epoch": 0.89, "grad_norm": 1.4898553638781804, "learning_rate": 3.219707742653305e-07, "loss": 0.7009, "step": 8702 }, { "epoch": 0.89, "grad_norm": 1.5339087817667125, "learning_rate": 3.213872172362065e-07, "loss": 0.5972, "step": 8703 }, { "epoch": 0.89, "grad_norm": 1.5684022963094184, "learning_rate": 3.2080417196087455e-07, "loss": 0.5383, "step": 8704 }, { "epoch": 0.89, "grad_norm": 1.6355960695301983, "learning_rate": 3.2022163850310925e-07, "loss": 0.7067, "step": 8705 }, { "epoch": 0.89, "grad_norm": 1.4858022497769965, "learning_rate": 3.196396169266275e-07, "loss": 0.6277, "step": 8706 }, { "epoch": 0.89, "grad_norm": 1.5865044628017344, "learning_rate": 3.190581072950932e-07, "loss": 0.6226, "step": 8707 }, { "epoch": 0.89, "grad_norm": 1.5530220677104283, "learning_rate": 3.184771096721118e-07, "loss": 0.6878, "step": 8708 }, { "epoch": 0.89, "grad_norm": 1.394697964734026, "learning_rate": 3.1789662412123444e-07, "loss": 0.6731, "step": 8709 }, { "epoch": 0.89, "grad_norm": 1.5128275650966174, "learning_rate": 3.1731665070595554e-07, "loss": 0.6151, "step": 8710 }, { "epoch": 0.89, "grad_norm": 1.511463469311002, "learning_rate": 3.16737189489712e-07, "loss": 0.6151, "step": 8711 }, { "epoch": 0.89, "grad_norm": 1.4459953339384666, "learning_rate": 3.1615824053588764e-07, "loss": 0.6762, "step": 8712 }, { "epoch": 0.89, "grad_norm": 1.6120582188758898, "learning_rate": 3.1557980390780894e-07, "loss": 0.6307, "step": 8713 }, { "epoch": 0.89, "grad_norm": 1.47016417057337, "learning_rate": 3.150018796687454e-07, "loss": 0.5847, "step": 8714 }, { "epoch": 0.89, "grad_norm": 1.731645370445526, "learning_rate": 3.1442446788191137e-07, "loss": 0.6741, "step": 8715 }, { "epoch": 0.89, "grad_norm": 1.427634685360872, "learning_rate": 3.1384756861046584e-07, "loss": 0.5879, "step": 8716 }, { "epoch": 0.89, "grad_norm": 2.020409381145353, "learning_rate": 3.1327118191750884e-07, "loss": 0.7095, "step": 8717 }, { "epoch": 0.89, "grad_norm": 1.5674602180083927, "learning_rate": 3.1269530786608935e-07, "loss": 0.5918, "step": 8718 }, { "epoch": 0.89, "grad_norm": 1.4777904876332628, "learning_rate": 3.121199465191954e-07, "loss": 0.5404, "step": 8719 }, { "epoch": 0.89, "grad_norm": 1.3498345913534797, "learning_rate": 3.115450979397605e-07, "loss": 0.622, "step": 8720 }, { "epoch": 0.89, "grad_norm": 1.669264298076038, "learning_rate": 3.1097076219066436e-07, "loss": 0.5804, "step": 8721 }, { "epoch": 0.89, "grad_norm": 1.5934435218144773, "learning_rate": 3.103969393347267e-07, "loss": 0.6449, "step": 8722 }, { "epoch": 0.89, "grad_norm": 1.4022984528745586, "learning_rate": 3.0982362943471514e-07, "loss": 0.6338, "step": 8723 }, { "epoch": 0.89, "grad_norm": 1.619035364374532, "learning_rate": 3.0925083255333777e-07, "loss": 0.7182, "step": 8724 }, { "epoch": 0.89, "grad_norm": 1.6350590542168488, "learning_rate": 3.086785487532479e-07, "loss": 0.6155, "step": 8725 }, { "epoch": 0.89, "grad_norm": 1.484064541991886, "learning_rate": 3.0810677809704317e-07, "loss": 0.5265, "step": 8726 }, { "epoch": 0.89, "grad_norm": 1.4656591116848507, "learning_rate": 3.075355206472652e-07, "loss": 0.659, "step": 8727 }, { "epoch": 0.89, "grad_norm": 1.490879749072011, "learning_rate": 3.069647764663991e-07, "loss": 0.6363, "step": 8728 }, { "epoch": 0.89, "grad_norm": 1.6711431798679288, "learning_rate": 3.063945456168721e-07, "loss": 0.7784, "step": 8729 }, { "epoch": 0.89, "grad_norm": 1.825938735737826, "learning_rate": 3.058248281610593e-07, "loss": 0.6394, "step": 8730 }, { "epoch": 0.89, "grad_norm": 1.5077202405796026, "learning_rate": 3.0525562416127473e-07, "loss": 0.619, "step": 8731 }, { "epoch": 0.89, "grad_norm": 1.5817489415114092, "learning_rate": 3.046869336797814e-07, "loss": 0.6025, "step": 8732 }, { "epoch": 0.89, "grad_norm": 1.4751367127922963, "learning_rate": 3.0411875677878177e-07, "loss": 0.5849, "step": 8733 }, { "epoch": 0.89, "grad_norm": 1.6093394614720669, "learning_rate": 3.035510935204233e-07, "loss": 0.5748, "step": 8734 }, { "epoch": 0.89, "grad_norm": 1.429941979483186, "learning_rate": 3.029839439668003e-07, "loss": 0.6636, "step": 8735 }, { "epoch": 0.89, "grad_norm": 1.4495398965316135, "learning_rate": 3.0241730817994586e-07, "loss": 0.648, "step": 8736 }, { "epoch": 0.89, "grad_norm": 1.5504250581224732, "learning_rate": 3.0185118622184204e-07, "loss": 0.6412, "step": 8737 }, { "epoch": 0.89, "grad_norm": 1.4435493767366294, "learning_rate": 3.0128557815440994e-07, "loss": 0.5985, "step": 8738 }, { "epoch": 0.89, "grad_norm": 1.582735062295263, "learning_rate": 3.007204840395173e-07, "loss": 0.6932, "step": 8739 }, { "epoch": 0.89, "grad_norm": 1.4627159654751833, "learning_rate": 3.001559039389751e-07, "loss": 0.7065, "step": 8740 }, { "epoch": 0.89, "grad_norm": 1.4190378089060693, "learning_rate": 2.995918379145385e-07, "loss": 0.5619, "step": 8741 }, { "epoch": 0.89, "grad_norm": 1.3845032480706216, "learning_rate": 2.9902828602790537e-07, "loss": 0.6424, "step": 8742 }, { "epoch": 0.89, "grad_norm": 1.3773587411125559, "learning_rate": 2.984652483407169e-07, "loss": 0.6431, "step": 8743 }, { "epoch": 0.89, "grad_norm": 1.9042817722086856, "learning_rate": 2.979027249145611e-07, "loss": 0.7581, "step": 8744 }, { "epoch": 0.89, "grad_norm": 1.3078796658630982, "learning_rate": 2.973407158109659e-07, "loss": 0.6465, "step": 8745 }, { "epoch": 0.89, "grad_norm": 1.5735938751121727, "learning_rate": 2.967792210914061e-07, "loss": 0.7569, "step": 8746 }, { "epoch": 0.89, "grad_norm": 1.596004255505339, "learning_rate": 2.9621824081729755e-07, "loss": 0.6242, "step": 8747 }, { "epoch": 0.89, "grad_norm": 1.488617127988243, "learning_rate": 2.956577750500017e-07, "loss": 0.6763, "step": 8748 }, { "epoch": 0.89, "grad_norm": 1.5803839090617884, "learning_rate": 2.950978238508234e-07, "loss": 0.5847, "step": 8749 }, { "epoch": 0.89, "grad_norm": 1.4576835805365056, "learning_rate": 2.945383872810098e-07, "loss": 0.7466, "step": 8750 }, { "epoch": 0.89, "grad_norm": 1.6281675233179744, "learning_rate": 2.939794654017547e-07, "loss": 0.5936, "step": 8751 }, { "epoch": 0.89, "grad_norm": 1.4298889100867067, "learning_rate": 2.934210582741931e-07, "loss": 0.7982, "step": 8752 }, { "epoch": 0.89, "grad_norm": 2.2391683674319487, "learning_rate": 2.9286316595940334e-07, "loss": 0.7188, "step": 8753 }, { "epoch": 0.89, "grad_norm": 1.480727416130141, "learning_rate": 2.9230578851840997e-07, "loss": 0.5495, "step": 8754 }, { "epoch": 0.89, "grad_norm": 1.4701118556544879, "learning_rate": 2.917489260121792e-07, "loss": 0.6098, "step": 8755 }, { "epoch": 0.89, "grad_norm": 1.6835404912201348, "learning_rate": 2.911925785016223e-07, "loss": 0.6425, "step": 8756 }, { "epoch": 0.89, "grad_norm": 1.341383659494788, "learning_rate": 2.906367460475912e-07, "loss": 0.6303, "step": 8757 }, { "epoch": 0.89, "grad_norm": 1.5345109948809967, "learning_rate": 2.9008142871088665e-07, "loss": 0.6876, "step": 8758 }, { "epoch": 0.89, "grad_norm": 1.4653717193755411, "learning_rate": 2.895266265522473e-07, "loss": 0.6246, "step": 8759 }, { "epoch": 0.89, "grad_norm": 1.681384525925425, "learning_rate": 2.889723396323607e-07, "loss": 0.6769, "step": 8760 }, { "epoch": 0.89, "grad_norm": 1.668887066865573, "learning_rate": 2.884185680118545e-07, "loss": 0.6381, "step": 8761 }, { "epoch": 0.89, "grad_norm": 1.603328214786395, "learning_rate": 2.8786531175130026e-07, "loss": 0.6805, "step": 8762 }, { "epoch": 0.89, "grad_norm": 1.2915903954694528, "learning_rate": 2.873125709112151e-07, "loss": 0.5471, "step": 8763 }, { "epoch": 0.89, "grad_norm": 1.5016619821282025, "learning_rate": 2.867603455520579e-07, "loss": 0.5893, "step": 8764 }, { "epoch": 0.9, "grad_norm": 1.700105837842569, "learning_rate": 2.862086357342331e-07, "loss": 0.6518, "step": 8765 }, { "epoch": 0.9, "grad_norm": 1.6209167870874992, "learning_rate": 2.856574415180874e-07, "loss": 0.656, "step": 8766 }, { "epoch": 0.9, "grad_norm": 1.5575110568587813, "learning_rate": 2.8510676296390925e-07, "loss": 0.6736, "step": 8767 }, { "epoch": 0.9, "grad_norm": 1.570942464988319, "learning_rate": 2.8455660013193444e-07, "loss": 0.7287, "step": 8768 }, { "epoch": 0.9, "grad_norm": 1.7054672200910634, "learning_rate": 2.8400695308234084e-07, "loss": 0.5755, "step": 8769 }, { "epoch": 0.9, "grad_norm": 1.549574176586758, "learning_rate": 2.8345782187524926e-07, "loss": 0.7608, "step": 8770 }, { "epoch": 0.9, "grad_norm": 1.7918212500268502, "learning_rate": 2.82909206570724e-07, "loss": 0.7463, "step": 8771 }, { "epoch": 0.9, "grad_norm": 1.3965511212892918, "learning_rate": 2.8236110722877406e-07, "loss": 0.7345, "step": 8772 }, { "epoch": 0.9, "grad_norm": 2.091534361557751, "learning_rate": 2.8181352390935057e-07, "loss": 0.6616, "step": 8773 }, { "epoch": 0.9, "grad_norm": 1.2437013788772533, "learning_rate": 2.8126645667235055e-07, "loss": 0.4618, "step": 8774 }, { "epoch": 0.9, "grad_norm": 1.8694474407071289, "learning_rate": 2.807199055776122e-07, "loss": 0.6294, "step": 8775 }, { "epoch": 0.9, "grad_norm": 1.5906677075737632, "learning_rate": 2.801738706849172e-07, "loss": 0.6554, "step": 8776 }, { "epoch": 0.9, "grad_norm": 1.3222248866236255, "learning_rate": 2.7962835205399277e-07, "loss": 0.6358, "step": 8777 }, { "epoch": 0.9, "grad_norm": 1.5612354824780534, "learning_rate": 2.7908334974450835e-07, "loss": 0.6193, "step": 8778 }, { "epoch": 0.9, "grad_norm": 1.4501828581163505, "learning_rate": 2.785388638160774e-07, "loss": 0.6608, "step": 8779 }, { "epoch": 0.9, "grad_norm": 1.55992036360197, "learning_rate": 2.779948943282562e-07, "loss": 0.6793, "step": 8780 }, { "epoch": 0.9, "grad_norm": 1.493929246497157, "learning_rate": 2.774514413405444e-07, "loss": 0.6692, "step": 8781 }, { "epoch": 0.9, "grad_norm": 1.561928343165129, "learning_rate": 2.769085049123865e-07, "loss": 0.589, "step": 8782 }, { "epoch": 0.9, "grad_norm": 1.3473090573795583, "learning_rate": 2.7636608510317075e-07, "loss": 0.6046, "step": 8783 }, { "epoch": 0.9, "grad_norm": 1.5614025151600486, "learning_rate": 2.758241819722263e-07, "loss": 0.5786, "step": 8784 }, { "epoch": 0.9, "grad_norm": 1.5067921556431687, "learning_rate": 2.752827955788268e-07, "loss": 0.6245, "step": 8785 }, { "epoch": 0.9, "grad_norm": 1.4369806550724273, "learning_rate": 2.7474192598219154e-07, "loss": 0.587, "step": 8786 }, { "epoch": 0.9, "grad_norm": 1.33993158013526, "learning_rate": 2.74201573241481e-07, "loss": 0.5197, "step": 8787 }, { "epoch": 0.9, "grad_norm": 1.583952489069777, "learning_rate": 2.736617374157996e-07, "loss": 0.5855, "step": 8788 }, { "epoch": 0.9, "grad_norm": 1.5524950011228391, "learning_rate": 2.731224185641962e-07, "loss": 0.6076, "step": 8789 }, { "epoch": 0.9, "grad_norm": 1.691920392069634, "learning_rate": 2.725836167456608e-07, "loss": 0.645, "step": 8790 }, { "epoch": 0.9, "grad_norm": 1.3826153373017274, "learning_rate": 2.720453320191296e-07, "loss": 0.5609, "step": 8791 }, { "epoch": 0.9, "grad_norm": 1.765385207518334, "learning_rate": 2.715075644434806e-07, "loss": 0.6702, "step": 8792 }, { "epoch": 0.9, "grad_norm": 1.4910296332155615, "learning_rate": 2.7097031407753604e-07, "loss": 0.5696, "step": 8793 }, { "epoch": 0.9, "grad_norm": 1.6768690164146975, "learning_rate": 2.7043358098006067e-07, "loss": 0.7567, "step": 8794 }, { "epoch": 0.9, "grad_norm": 1.6569220988241227, "learning_rate": 2.698973652097625e-07, "loss": 0.6634, "step": 8795 }, { "epoch": 0.9, "grad_norm": 1.5749816795592069, "learning_rate": 2.693616668252952e-07, "loss": 0.578, "step": 8796 }, { "epoch": 0.9, "grad_norm": 1.3125276358131748, "learning_rate": 2.688264858852535e-07, "loss": 0.5665, "step": 8797 }, { "epoch": 0.9, "grad_norm": 1.5741365437599388, "learning_rate": 2.6829182244817675e-07, "loss": 0.6856, "step": 8798 }, { "epoch": 0.9, "grad_norm": 1.4884215090600064, "learning_rate": 2.6775767657254594e-07, "loss": 0.5784, "step": 8799 }, { "epoch": 0.9, "grad_norm": 1.3708250246399623, "learning_rate": 2.6722404831678873e-07, "loss": 0.6218, "step": 8800 }, { "epoch": 0.9, "grad_norm": 1.558824011629495, "learning_rate": 2.666909377392718e-07, "loss": 0.6773, "step": 8801 }, { "epoch": 0.9, "grad_norm": 1.4277205326199713, "learning_rate": 2.661583448983102e-07, "loss": 0.581, "step": 8802 }, { "epoch": 0.9, "grad_norm": 1.8869201983977826, "learning_rate": 2.6562626985215834e-07, "loss": 0.7248, "step": 8803 }, { "epoch": 0.9, "grad_norm": 1.5017458779475312, "learning_rate": 2.6509471265901476e-07, "loss": 0.5745, "step": 8804 }, { "epoch": 0.9, "grad_norm": 1.3538831116358512, "learning_rate": 2.645636733770235e-07, "loss": 0.542, "step": 8805 }, { "epoch": 0.9, "grad_norm": 1.3992349765601357, "learning_rate": 2.6403315206426917e-07, "loss": 0.6106, "step": 8806 }, { "epoch": 0.9, "grad_norm": 1.4500166391910945, "learning_rate": 2.6350314877878203e-07, "loss": 0.5327, "step": 8807 }, { "epoch": 0.9, "grad_norm": 1.5621097137456925, "learning_rate": 2.6297366357853407e-07, "loss": 0.7692, "step": 8808 }, { "epoch": 0.9, "grad_norm": 1.9842759346627907, "learning_rate": 2.624446965214406e-07, "loss": 0.6257, "step": 8809 }, { "epoch": 0.9, "grad_norm": 1.4949822401362782, "learning_rate": 2.6191624766536195e-07, "loss": 0.6564, "step": 8810 }, { "epoch": 0.9, "grad_norm": 1.6507249946558493, "learning_rate": 2.613883170681003e-07, "loss": 0.5717, "step": 8811 }, { "epoch": 0.9, "grad_norm": 1.4614083225908896, "learning_rate": 2.608609047874017e-07, "loss": 0.5718, "step": 8812 }, { "epoch": 0.9, "grad_norm": 1.6297085476512405, "learning_rate": 2.6033401088095433e-07, "loss": 0.6278, "step": 8813 }, { "epoch": 0.9, "grad_norm": 2.1132910362140453, "learning_rate": 2.598076354063922e-07, "loss": 0.5754, "step": 8814 }, { "epoch": 0.9, "grad_norm": 1.3932428104635062, "learning_rate": 2.592817784212892e-07, "loss": 0.6757, "step": 8815 }, { "epoch": 0.9, "grad_norm": 1.8020823538719262, "learning_rate": 2.5875643998316657e-07, "loss": 0.6175, "step": 8816 }, { "epoch": 0.9, "grad_norm": 1.4973496506790012, "learning_rate": 2.58231620149485e-07, "loss": 0.6508, "step": 8817 }, { "epoch": 0.9, "grad_norm": 1.5219393616973127, "learning_rate": 2.577073189776502e-07, "loss": 0.6196, "step": 8818 }, { "epoch": 0.9, "grad_norm": 1.266454255533209, "learning_rate": 2.571835365250119e-07, "loss": 0.677, "step": 8819 }, { "epoch": 0.9, "grad_norm": 1.3512839389130697, "learning_rate": 2.5666027284886095e-07, "loss": 0.6089, "step": 8820 }, { "epoch": 0.9, "grad_norm": 1.5723265459322628, "learning_rate": 2.5613752800643374e-07, "loss": 0.5727, "step": 8821 }, { "epoch": 0.9, "grad_norm": 1.4689907607138653, "learning_rate": 2.5561530205490903e-07, "loss": 0.6355, "step": 8822 }, { "epoch": 0.9, "grad_norm": 1.854923366449092, "learning_rate": 2.5509359505140776e-07, "loss": 0.6854, "step": 8823 }, { "epoch": 0.9, "grad_norm": 1.4055503913709948, "learning_rate": 2.5457240705299534e-07, "loss": 0.5891, "step": 8824 }, { "epoch": 0.9, "grad_norm": 1.6159162168141477, "learning_rate": 2.5405173811668126e-07, "loss": 0.6386, "step": 8825 }, { "epoch": 0.9, "grad_norm": 1.3097617157465324, "learning_rate": 2.5353158829941605e-07, "loss": 0.5973, "step": 8826 }, { "epoch": 0.9, "grad_norm": 1.5422668713008931, "learning_rate": 2.530119576580936e-07, "loss": 0.6595, "step": 8827 }, { "epoch": 0.9, "grad_norm": 1.4265959441813574, "learning_rate": 2.524928462495535e-07, "loss": 0.575, "step": 8828 }, { "epoch": 0.9, "grad_norm": 1.6958250615154005, "learning_rate": 2.5197425413057587e-07, "loss": 0.7628, "step": 8829 }, { "epoch": 0.9, "grad_norm": 1.4159532945897708, "learning_rate": 2.514561813578864e-07, "loss": 0.5829, "step": 8830 }, { "epoch": 0.9, "grad_norm": 1.3900599863752554, "learning_rate": 2.509386279881515e-07, "loss": 0.6131, "step": 8831 }, { "epoch": 0.9, "grad_norm": 1.5455847892503296, "learning_rate": 2.5042159407798193e-07, "loss": 0.5711, "step": 8832 }, { "epoch": 0.9, "grad_norm": 1.9214305235703555, "learning_rate": 2.499050796839325e-07, "loss": 0.6679, "step": 8833 }, { "epoch": 0.9, "grad_norm": 1.4727881976677046, "learning_rate": 2.4938908486249913e-07, "loss": 0.611, "step": 8834 }, { "epoch": 0.9, "grad_norm": 2.557723015189511, "learning_rate": 2.4887360967012386e-07, "loss": 0.5696, "step": 8835 }, { "epoch": 0.9, "grad_norm": 1.5139770551397886, "learning_rate": 2.483586541631888e-07, "loss": 0.6922, "step": 8836 }, { "epoch": 0.9, "grad_norm": 1.415328601992093, "learning_rate": 2.4784421839802007e-07, "loss": 0.6066, "step": 8837 }, { "epoch": 0.9, "grad_norm": 1.401297479013677, "learning_rate": 2.4733030243088875e-07, "loss": 0.5833, "step": 8838 }, { "epoch": 0.9, "grad_norm": 1.4682073121280357, "learning_rate": 2.468169063180076e-07, "loss": 0.5665, "step": 8839 }, { "epoch": 0.9, "grad_norm": 1.487274611132967, "learning_rate": 2.4630403011553226e-07, "loss": 0.5657, "step": 8840 }, { "epoch": 0.9, "grad_norm": 1.5603917129564049, "learning_rate": 2.4579167387956125e-07, "loss": 0.617, "step": 8841 }, { "epoch": 0.9, "grad_norm": 1.3091005901272743, "learning_rate": 2.452798376661386e-07, "loss": 0.5398, "step": 8842 }, { "epoch": 0.9, "grad_norm": 1.3886817067322927, "learning_rate": 2.4476852153124786e-07, "loss": 0.584, "step": 8843 }, { "epoch": 0.9, "grad_norm": 1.459053313490199, "learning_rate": 2.442577255308193e-07, "loss": 0.6546, "step": 8844 }, { "epoch": 0.9, "grad_norm": 1.6007578072531412, "learning_rate": 2.437474497207237e-07, "loss": 0.5305, "step": 8845 }, { "epoch": 0.9, "grad_norm": 1.529304638957831, "learning_rate": 2.432376941567749e-07, "loss": 0.6307, "step": 8846 }, { "epoch": 0.9, "grad_norm": 1.491184527821187, "learning_rate": 2.427284588947326e-07, "loss": 0.5839, "step": 8847 }, { "epoch": 0.9, "grad_norm": 1.6756379119239881, "learning_rate": 2.4221974399029626e-07, "loss": 0.7957, "step": 8848 }, { "epoch": 0.9, "grad_norm": 1.4979743615038936, "learning_rate": 2.417115494991107e-07, "loss": 0.6936, "step": 8849 }, { "epoch": 0.9, "grad_norm": 1.6287918289785717, "learning_rate": 2.412038754767626e-07, "loss": 0.6298, "step": 8850 }, { "epoch": 0.9, "grad_norm": 1.4164873325066343, "learning_rate": 2.4069672197878204e-07, "loss": 0.5957, "step": 8851 }, { "epoch": 0.9, "grad_norm": 1.5598972535682902, "learning_rate": 2.4019008906064236e-07, "loss": 0.5842, "step": 8852 }, { "epoch": 0.9, "grad_norm": 1.7714077297303452, "learning_rate": 2.396839767777603e-07, "loss": 0.7623, "step": 8853 }, { "epoch": 0.9, "grad_norm": 1.4700290514125927, "learning_rate": 2.391783851854951e-07, "loss": 0.6515, "step": 8854 }, { "epoch": 0.9, "grad_norm": 1.5901975390253895, "learning_rate": 2.3867331433914787e-07, "loss": 0.7472, "step": 8855 }, { "epoch": 0.9, "grad_norm": 1.4754914583276664, "learning_rate": 2.3816876429396618e-07, "loss": 0.6129, "step": 8856 }, { "epoch": 0.9, "grad_norm": 1.4704106788104305, "learning_rate": 2.3766473510513587e-07, "loss": 0.6246, "step": 8857 }, { "epoch": 0.9, "grad_norm": 1.6159319083306278, "learning_rate": 2.3716122682779118e-07, "loss": 0.6256, "step": 8858 }, { "epoch": 0.9, "grad_norm": 1.6234081055983165, "learning_rate": 2.366582395170053e-07, "loss": 0.6836, "step": 8859 }, { "epoch": 0.9, "grad_norm": 1.7848838867589663, "learning_rate": 2.361557732277947e-07, "loss": 0.6573, "step": 8860 }, { "epoch": 0.9, "grad_norm": 1.6945219317596902, "learning_rate": 2.3565382801512214e-07, "loss": 0.6591, "step": 8861 }, { "epoch": 0.9, "grad_norm": 1.5569316975131253, "learning_rate": 2.351524039338887e-07, "loss": 0.7246, "step": 8862 }, { "epoch": 0.91, "grad_norm": 1.4992463420277544, "learning_rate": 2.3465150103894328e-07, "loss": 0.6351, "step": 8863 }, { "epoch": 0.91, "grad_norm": 4.241964937712917, "learning_rate": 2.3415111938507428e-07, "loss": 0.6141, "step": 8864 }, { "epoch": 0.91, "grad_norm": 1.5948507714388853, "learning_rate": 2.3365125902701346e-07, "loss": 0.71, "step": 8865 }, { "epoch": 0.91, "grad_norm": 1.5084492019688798, "learning_rate": 2.331519200194371e-07, "loss": 0.5959, "step": 8866 }, { "epoch": 0.91, "grad_norm": 1.8075700611570027, "learning_rate": 2.3265310241696426e-07, "loss": 0.6366, "step": 8867 }, { "epoch": 0.91, "grad_norm": 1.3098189817522372, "learning_rate": 2.3215480627415575e-07, "loss": 0.5041, "step": 8868 }, { "epoch": 0.91, "grad_norm": 1.5687263117296903, "learning_rate": 2.3165703164551523e-07, "loss": 0.744, "step": 8869 }, { "epoch": 0.91, "grad_norm": 1.4187451802980202, "learning_rate": 2.3115977858549133e-07, "loss": 0.6409, "step": 8870 }, { "epoch": 0.91, "grad_norm": 1.6274303635407472, "learning_rate": 2.3066304714847333e-07, "loss": 0.6049, "step": 8871 }, { "epoch": 0.91, "grad_norm": 1.2811410152964755, "learning_rate": 2.3016683738879552e-07, "loss": 0.6058, "step": 8872 }, { "epoch": 0.91, "grad_norm": 1.5774417499286046, "learning_rate": 2.2967114936073342e-07, "loss": 0.7301, "step": 8873 }, { "epoch": 0.91, "grad_norm": 1.465735800006111, "learning_rate": 2.2917598311850587e-07, "loss": 0.7951, "step": 8874 }, { "epoch": 0.91, "grad_norm": 1.6718729781570019, "learning_rate": 2.2868133871627561e-07, "loss": 0.7997, "step": 8875 }, { "epoch": 0.91, "grad_norm": 1.3313721793890527, "learning_rate": 2.2818721620814667e-07, "loss": 0.6341, "step": 8876 }, { "epoch": 0.91, "grad_norm": 1.4693258844395771, "learning_rate": 2.2769361564816794e-07, "loss": 0.6089, "step": 8877 }, { "epoch": 0.91, "grad_norm": 2.4669159229261246, "learning_rate": 2.272005370903302e-07, "loss": 0.632, "step": 8878 }, { "epoch": 0.91, "grad_norm": 1.4215986800541598, "learning_rate": 2.267079805885658e-07, "loss": 0.6252, "step": 8879 }, { "epoch": 0.91, "grad_norm": 1.566970643452274, "learning_rate": 2.2621594619675168e-07, "loss": 0.6232, "step": 8880 }, { "epoch": 0.91, "grad_norm": 1.5549451136787558, "learning_rate": 2.2572443396870926e-07, "loss": 0.5355, "step": 8881 }, { "epoch": 0.91, "grad_norm": 1.7325005860052505, "learning_rate": 2.2523344395819936e-07, "loss": 0.6875, "step": 8882 }, { "epoch": 0.91, "grad_norm": 1.6586367059964562, "learning_rate": 2.2474297621892626e-07, "loss": 0.5952, "step": 8883 }, { "epoch": 0.91, "grad_norm": 1.643412975960106, "learning_rate": 2.2425303080453986e-07, "loss": 0.6389, "step": 8884 }, { "epoch": 0.91, "grad_norm": 1.3488336533796446, "learning_rate": 2.2376360776863004e-07, "loss": 0.6278, "step": 8885 }, { "epoch": 0.91, "grad_norm": 1.66232384680095, "learning_rate": 2.2327470716473175e-07, "loss": 0.6596, "step": 8886 }, { "epoch": 0.91, "grad_norm": 1.6170856080197744, "learning_rate": 2.2278632904632058e-07, "loss": 0.7313, "step": 8887 }, { "epoch": 0.91, "grad_norm": 1.4773757019181, "learning_rate": 2.22298473466816e-07, "loss": 0.7273, "step": 8888 }, { "epoch": 0.91, "grad_norm": 1.4011861062991244, "learning_rate": 2.218111404795814e-07, "loss": 0.5951, "step": 8889 }, { "epoch": 0.91, "grad_norm": 1.3400241519271214, "learning_rate": 2.2132433013792088e-07, "loss": 0.6171, "step": 8890 }, { "epoch": 0.91, "grad_norm": 1.637167691832323, "learning_rate": 2.2083804249508345e-07, "loss": 0.741, "step": 8891 }, { "epoch": 0.91, "grad_norm": 1.858493745987426, "learning_rate": 2.2035227760425992e-07, "loss": 0.7267, "step": 8892 }, { "epoch": 0.91, "grad_norm": 1.4352832873798926, "learning_rate": 2.198670355185828e-07, "loss": 0.5901, "step": 8893 }, { "epoch": 0.91, "grad_norm": 1.6597409719235303, "learning_rate": 2.193823162911296e-07, "loss": 0.6365, "step": 8894 }, { "epoch": 0.91, "grad_norm": 1.49845334994192, "learning_rate": 2.1889811997492017e-07, "loss": 0.5578, "step": 8895 }, { "epoch": 0.91, "grad_norm": 1.465584380368628, "learning_rate": 2.1841444662291543e-07, "loss": 0.5416, "step": 8896 }, { "epoch": 0.91, "grad_norm": 1.5912544688260528, "learning_rate": 2.179312962880209e-07, "loss": 0.5682, "step": 8897 }, { "epoch": 0.91, "grad_norm": 1.6110248911459233, "learning_rate": 2.1744866902308482e-07, "loss": 0.6061, "step": 8898 }, { "epoch": 0.91, "grad_norm": 1.517830792261943, "learning_rate": 2.1696656488089607e-07, "loss": 0.6093, "step": 8899 }, { "epoch": 0.91, "grad_norm": 1.3821105943867422, "learning_rate": 2.1648498391419026e-07, "loss": 0.6157, "step": 8900 }, { "epoch": 0.91, "grad_norm": 1.3925705690737142, "learning_rate": 2.1600392617564192e-07, "loss": 0.5523, "step": 8901 }, { "epoch": 0.91, "grad_norm": 1.5474892867627235, "learning_rate": 2.155233917178695e-07, "loss": 0.5981, "step": 8902 }, { "epoch": 0.91, "grad_norm": 1.290336096206725, "learning_rate": 2.15043380593436e-07, "loss": 0.614, "step": 8903 }, { "epoch": 0.91, "grad_norm": 1.681283841450226, "learning_rate": 2.1456389285484436e-07, "loss": 0.5923, "step": 8904 }, { "epoch": 0.91, "grad_norm": 1.429991147794917, "learning_rate": 2.1408492855454265e-07, "loss": 0.6447, "step": 8905 }, { "epoch": 0.91, "grad_norm": 1.400300686645346, "learning_rate": 2.1360648774492064e-07, "loss": 0.6916, "step": 8906 }, { "epoch": 0.91, "grad_norm": 1.530906041520893, "learning_rate": 2.1312857047831038e-07, "loss": 0.619, "step": 8907 }, { "epoch": 0.91, "grad_norm": 1.4730838997110223, "learning_rate": 2.1265117680698665e-07, "loss": 0.68, "step": 8908 }, { "epoch": 0.91, "grad_norm": 1.7137990337161673, "learning_rate": 2.1217430678316942e-07, "loss": 0.6713, "step": 8909 }, { "epoch": 0.91, "grad_norm": 1.4613225303178106, "learning_rate": 2.1169796045901857e-07, "loss": 0.5826, "step": 8910 }, { "epoch": 0.91, "grad_norm": 1.4315314030169968, "learning_rate": 2.1122213788663693e-07, "loss": 0.5798, "step": 8911 }, { "epoch": 0.91, "grad_norm": 1.5633900642850025, "learning_rate": 2.1074683911807115e-07, "loss": 0.628, "step": 8912 }, { "epoch": 0.91, "grad_norm": 1.6735213608319064, "learning_rate": 2.1027206420530965e-07, "loss": 0.6669, "step": 8913 }, { "epoch": 0.91, "grad_norm": 1.8480440019342175, "learning_rate": 2.0979781320028535e-07, "loss": 0.6015, "step": 8914 }, { "epoch": 0.91, "grad_norm": 1.4459414167462625, "learning_rate": 2.0932408615487177e-07, "loss": 0.618, "step": 8915 }, { "epoch": 0.91, "grad_norm": 1.3788444071519463, "learning_rate": 2.088508831208852e-07, "loss": 0.5853, "step": 8916 }, { "epoch": 0.91, "grad_norm": 1.4228988619987806, "learning_rate": 2.0837820415008647e-07, "loss": 0.5764, "step": 8917 }, { "epoch": 0.91, "grad_norm": 1.5811029333482902, "learning_rate": 2.07906049294177e-07, "loss": 0.6471, "step": 8918 }, { "epoch": 0.91, "grad_norm": 1.4389373710576723, "learning_rate": 2.0743441860480218e-07, "loss": 0.625, "step": 8919 }, { "epoch": 0.91, "grad_norm": 1.6097805046486886, "learning_rate": 2.069633121335507e-07, "loss": 0.612, "step": 8920 }, { "epoch": 0.91, "grad_norm": 1.3456046402791602, "learning_rate": 2.064927299319508e-07, "loss": 0.5783, "step": 8921 }, { "epoch": 0.91, "grad_norm": 1.6037860171329812, "learning_rate": 2.060226720514763e-07, "loss": 0.6445, "step": 8922 }, { "epoch": 0.91, "grad_norm": 1.5250242839470594, "learning_rate": 2.0555313854354386e-07, "loss": 0.6473, "step": 8923 }, { "epoch": 0.91, "grad_norm": 1.538794412628983, "learning_rate": 2.0508412945951127e-07, "loss": 0.6296, "step": 8924 }, { "epoch": 0.91, "grad_norm": 1.652054165060249, "learning_rate": 2.0461564485067865e-07, "loss": 0.683, "step": 8925 }, { "epoch": 0.91, "grad_norm": 1.7998107268030286, "learning_rate": 2.0414768476829105e-07, "loss": 0.5913, "step": 8926 }, { "epoch": 0.91, "grad_norm": 1.367327124051124, "learning_rate": 2.0368024926353258e-07, "loss": 0.6502, "step": 8927 }, { "epoch": 0.91, "grad_norm": 1.4839109270909174, "learning_rate": 2.0321333838753453e-07, "loss": 0.6096, "step": 8928 }, { "epoch": 0.91, "grad_norm": 1.644260740981053, "learning_rate": 2.0274695219136665e-07, "loss": 0.6642, "step": 8929 }, { "epoch": 0.91, "grad_norm": 1.545377934647103, "learning_rate": 2.0228109072604252e-07, "loss": 0.5236, "step": 8930 }, { "epoch": 0.91, "grad_norm": 1.3819270333257943, "learning_rate": 2.018157540425203e-07, "loss": 0.6527, "step": 8931 }, { "epoch": 0.91, "grad_norm": 1.910709394889136, "learning_rate": 2.0135094219169816e-07, "loss": 0.7017, "step": 8932 }, { "epoch": 0.91, "grad_norm": 1.4693374324320603, "learning_rate": 2.0088665522441874e-07, "loss": 0.6329, "step": 8933 }, { "epoch": 0.91, "grad_norm": 1.6936618557881769, "learning_rate": 2.0042289319146645e-07, "loss": 0.6896, "step": 8934 }, { "epoch": 0.91, "grad_norm": 1.5478558521880192, "learning_rate": 1.9995965614356684e-07, "loss": 0.6288, "step": 8935 }, { "epoch": 0.91, "grad_norm": 1.4489106880809557, "learning_rate": 1.994969441313904e-07, "loss": 0.6326, "step": 8936 }, { "epoch": 0.91, "grad_norm": 1.5776991675405876, "learning_rate": 1.990347572055501e-07, "loss": 0.6763, "step": 8937 }, { "epoch": 0.91, "grad_norm": 1.7277515015770377, "learning_rate": 1.9857309541659986e-07, "loss": 0.7902, "step": 8938 }, { "epoch": 0.91, "grad_norm": 1.4224572520296894, "learning_rate": 1.981119588150371e-07, "loss": 0.6508, "step": 8939 }, { "epoch": 0.91, "grad_norm": 1.4707760759023896, "learning_rate": 1.9765134745130145e-07, "loss": 0.5303, "step": 8940 }, { "epoch": 0.91, "grad_norm": 1.5040658204102966, "learning_rate": 1.9719126137577481e-07, "loss": 0.5716, "step": 8941 }, { "epoch": 0.91, "grad_norm": 1.614150167293157, "learning_rate": 1.967317006387831e-07, "loss": 0.7245, "step": 8942 }, { "epoch": 0.91, "grad_norm": 1.5494717125740787, "learning_rate": 1.9627266529059385e-07, "loss": 0.6909, "step": 8943 }, { "epoch": 0.91, "grad_norm": 1.388078525665698, "learning_rate": 1.958141553814158e-07, "loss": 0.5838, "step": 8944 }, { "epoch": 0.91, "grad_norm": 1.5626022494793832, "learning_rate": 1.953561709614027e-07, "loss": 0.6003, "step": 8945 }, { "epoch": 0.91, "grad_norm": 1.4876739781786639, "learning_rate": 1.9489871208064835e-07, "loss": 0.6107, "step": 8946 }, { "epoch": 0.91, "grad_norm": 1.2708173396256168, "learning_rate": 1.944417787891917e-07, "loss": 0.5308, "step": 8947 }, { "epoch": 0.91, "grad_norm": 1.8450563328495564, "learning_rate": 1.939853711370121e-07, "loss": 0.6305, "step": 8948 }, { "epoch": 0.91, "grad_norm": 1.6292456109352607, "learning_rate": 1.935294891740319e-07, "loss": 0.5426, "step": 8949 }, { "epoch": 0.91, "grad_norm": 1.7099185291833527, "learning_rate": 1.9307413295011622e-07, "loss": 0.6484, "step": 8950 }, { "epoch": 0.91, "grad_norm": 1.4931203622173137, "learning_rate": 1.9261930251507356e-07, "loss": 0.7436, "step": 8951 }, { "epoch": 0.91, "grad_norm": 1.6346462801842094, "learning_rate": 1.9216499791865296e-07, "loss": 0.6398, "step": 8952 }, { "epoch": 0.91, "grad_norm": 1.4328460990941987, "learning_rate": 1.9171121921054747e-07, "loss": 0.6056, "step": 8953 }, { "epoch": 0.91, "grad_norm": 1.4583313237954136, "learning_rate": 1.9125796644039073e-07, "loss": 0.545, "step": 8954 }, { "epoch": 0.91, "grad_norm": 1.4147341317767286, "learning_rate": 1.90805239657762e-07, "loss": 0.7231, "step": 8955 }, { "epoch": 0.91, "grad_norm": 1.5298887058481436, "learning_rate": 1.9035303891218105e-07, "loss": 0.7078, "step": 8956 }, { "epoch": 0.91, "grad_norm": 1.5006502590756006, "learning_rate": 1.8990136425310946e-07, "loss": 0.622, "step": 8957 }, { "epoch": 0.91, "grad_norm": 1.41393956523508, "learning_rate": 1.8945021572995214e-07, "loss": 0.5585, "step": 8958 }, { "epoch": 0.91, "grad_norm": 1.5132339376914776, "learning_rate": 1.8899959339205687e-07, "loss": 0.718, "step": 8959 }, { "epoch": 0.91, "grad_norm": 1.4552865619739341, "learning_rate": 1.885494972887131e-07, "loss": 0.6006, "step": 8960 }, { "epoch": 0.92, "grad_norm": 1.4540440803200136, "learning_rate": 1.8809992746915307e-07, "loss": 0.5669, "step": 8961 }, { "epoch": 0.92, "grad_norm": 1.3541069580862013, "learning_rate": 1.876508839825525e-07, "loss": 0.5728, "step": 8962 }, { "epoch": 0.92, "grad_norm": 1.512590147534574, "learning_rate": 1.8720236687802595e-07, "loss": 0.6236, "step": 8963 }, { "epoch": 0.92, "grad_norm": 1.390350936598574, "learning_rate": 1.8675437620463476e-07, "loss": 0.6405, "step": 8964 }, { "epoch": 0.92, "grad_norm": 1.5303211327585764, "learning_rate": 1.863069120113814e-07, "loss": 0.6143, "step": 8965 }, { "epoch": 0.92, "grad_norm": 1.5762697483887436, "learning_rate": 1.8585997434720893e-07, "loss": 0.7607, "step": 8966 }, { "epoch": 0.92, "grad_norm": 1.5278821436833225, "learning_rate": 1.8541356326100436e-07, "loss": 0.7163, "step": 8967 }, { "epoch": 0.92, "grad_norm": 1.5335729503921558, "learning_rate": 1.8496767880159584e-07, "loss": 0.7338, "step": 8968 }, { "epoch": 0.92, "grad_norm": 1.4328265507513633, "learning_rate": 1.8452232101775658e-07, "loss": 0.549, "step": 8969 }, { "epoch": 0.92, "grad_norm": 1.595322689067029, "learning_rate": 1.840774899582004e-07, "loss": 0.7288, "step": 8970 }, { "epoch": 0.92, "grad_norm": 1.4409041670404386, "learning_rate": 1.8363318567158227e-07, "loss": 0.6147, "step": 8971 }, { "epoch": 0.92, "grad_norm": 1.6296189179746432, "learning_rate": 1.8318940820650167e-07, "loss": 0.6261, "step": 8972 }, { "epoch": 0.92, "grad_norm": 1.4554982605634172, "learning_rate": 1.8274615761150027e-07, "loss": 0.6228, "step": 8973 }, { "epoch": 0.92, "grad_norm": 1.4040404681537062, "learning_rate": 1.823034339350599e-07, "loss": 0.6047, "step": 8974 }, { "epoch": 0.92, "grad_norm": 1.6048059524916896, "learning_rate": 1.818612372256079e-07, "loss": 0.6996, "step": 8975 }, { "epoch": 0.92, "grad_norm": 1.723797465304836, "learning_rate": 1.8141956753151223e-07, "loss": 0.6744, "step": 8976 }, { "epoch": 0.92, "grad_norm": 1.5383012426598788, "learning_rate": 1.809784249010821e-07, "loss": 0.6478, "step": 8977 }, { "epoch": 0.92, "grad_norm": 1.4554845447523044, "learning_rate": 1.8053780938257103e-07, "loss": 0.6418, "step": 8978 }, { "epoch": 0.92, "grad_norm": 1.7285283493045125, "learning_rate": 1.80097721024175e-07, "loss": 0.6197, "step": 8979 }, { "epoch": 0.92, "grad_norm": 1.8265537901574704, "learning_rate": 1.79658159874031e-07, "loss": 0.5944, "step": 8980 }, { "epoch": 0.92, "grad_norm": 1.5477200153177533, "learning_rate": 1.7921912598021897e-07, "loss": 0.6516, "step": 8981 }, { "epoch": 0.92, "grad_norm": 1.5018147080477093, "learning_rate": 1.7878061939076042e-07, "loss": 0.5535, "step": 8982 }, { "epoch": 0.92, "grad_norm": 1.528546914440315, "learning_rate": 1.7834264015362035e-07, "loss": 0.6857, "step": 8983 }, { "epoch": 0.92, "grad_norm": 1.5955374658769026, "learning_rate": 1.7790518831670601e-07, "loss": 0.6368, "step": 8984 }, { "epoch": 0.92, "grad_norm": 5.819462984866896, "learning_rate": 1.7746826392786688e-07, "loss": 0.5898, "step": 8985 }, { "epoch": 0.92, "grad_norm": 1.3954632751557698, "learning_rate": 1.770318670348925e-07, "loss": 0.5909, "step": 8986 }, { "epoch": 0.92, "grad_norm": 1.3686655521390023, "learning_rate": 1.765959976855186e-07, "loss": 0.6259, "step": 8987 }, { "epoch": 0.92, "grad_norm": 1.532362332532603, "learning_rate": 1.7616065592742038e-07, "loss": 0.6849, "step": 8988 }, { "epoch": 0.92, "grad_norm": 1.8335560723126272, "learning_rate": 1.757258418082164e-07, "loss": 0.648, "step": 8989 }, { "epoch": 0.92, "grad_norm": 1.4597936982925275, "learning_rate": 1.752915553754675e-07, "loss": 0.6632, "step": 8990 }, { "epoch": 0.92, "grad_norm": 3.654518268089268, "learning_rate": 1.7485779667667569e-07, "loss": 0.5696, "step": 8991 }, { "epoch": 0.92, "grad_norm": 1.7780678116992712, "learning_rate": 1.7442456575928635e-07, "loss": 0.5842, "step": 8992 }, { "epoch": 0.92, "grad_norm": 1.4820603928147433, "learning_rate": 1.739918626706888e-07, "loss": 0.5253, "step": 8993 }, { "epoch": 0.92, "grad_norm": 1.5920634912479827, "learning_rate": 1.7355968745821073e-07, "loss": 0.5704, "step": 8994 }, { "epoch": 0.92, "grad_norm": 1.6437483285472212, "learning_rate": 1.7312804016912432e-07, "loss": 0.7471, "step": 8995 }, { "epoch": 0.92, "grad_norm": 1.6494992339785968, "learning_rate": 1.726969208506446e-07, "loss": 0.634, "step": 8996 }, { "epoch": 0.92, "grad_norm": 1.415651723044762, "learning_rate": 1.722663295499266e-07, "loss": 0.6239, "step": 8997 }, { "epoch": 0.92, "grad_norm": 1.5507566748357555, "learning_rate": 1.7183626631407157e-07, "loss": 0.6723, "step": 8998 }, { "epoch": 0.92, "grad_norm": 1.6267108137636588, "learning_rate": 1.7140673119011853e-07, "loss": 0.6391, "step": 8999 }, { "epoch": 0.92, "grad_norm": 1.4125662323653907, "learning_rate": 1.7097772422505098e-07, "loss": 0.5732, "step": 9000 }, { "epoch": 0.92, "grad_norm": 1.4331207757413564, "learning_rate": 1.7054924546579476e-07, "loss": 0.7084, "step": 9001 }, { "epoch": 0.92, "grad_norm": 1.6740207857578036, "learning_rate": 1.7012129495921682e-07, "loss": 0.6057, "step": 9002 }, { "epoch": 0.92, "grad_norm": 2.820195569555371, "learning_rate": 1.69693872752128e-07, "loss": 0.6448, "step": 9003 }, { "epoch": 0.92, "grad_norm": 1.53172271088499, "learning_rate": 1.6926697889128042e-07, "loss": 0.5987, "step": 9004 }, { "epoch": 0.92, "grad_norm": 1.6603128438826427, "learning_rate": 1.6884061342336665e-07, "loss": 0.7008, "step": 9005 }, { "epoch": 0.92, "grad_norm": 1.4910002053153901, "learning_rate": 1.68414776395025e-07, "loss": 0.6163, "step": 9006 }, { "epoch": 0.92, "grad_norm": 1.3216682748067952, "learning_rate": 1.6798946785283376e-07, "loss": 0.5899, "step": 9007 }, { "epoch": 0.92, "grad_norm": 1.7471806838205672, "learning_rate": 1.6756468784331403e-07, "loss": 0.6775, "step": 9008 }, { "epoch": 0.92, "grad_norm": 1.5789882359575838, "learning_rate": 1.6714043641292866e-07, "loss": 0.6724, "step": 9009 }, { "epoch": 0.92, "grad_norm": 1.4260643566898115, "learning_rate": 1.667167136080816e-07, "loss": 0.6556, "step": 9010 }, { "epoch": 0.92, "grad_norm": 1.5287250740279796, "learning_rate": 1.6629351947512195e-07, "loss": 0.6846, "step": 9011 }, { "epoch": 0.92, "grad_norm": 1.4785725450877831, "learning_rate": 1.658708540603393e-07, "loss": 0.6281, "step": 9012 }, { "epoch": 0.92, "grad_norm": 1.4377670485861653, "learning_rate": 1.6544871740996505e-07, "loss": 0.6177, "step": 9013 }, { "epoch": 0.92, "grad_norm": 1.377186914349795, "learning_rate": 1.6502710957017275e-07, "loss": 0.5792, "step": 9014 }, { "epoch": 0.92, "grad_norm": 1.4492725030685512, "learning_rate": 1.6460603058707892e-07, "loss": 0.5825, "step": 9015 }, { "epoch": 0.92, "grad_norm": 1.390766990567971, "learning_rate": 1.6418548050674165e-07, "loss": 0.6243, "step": 9016 }, { "epoch": 0.92, "grad_norm": 1.5333869989780151, "learning_rate": 1.637654593751614e-07, "loss": 0.6034, "step": 9017 }, { "epoch": 0.92, "grad_norm": 1.7767822594221276, "learning_rate": 1.6334596723828146e-07, "loss": 0.5542, "step": 9018 }, { "epoch": 0.92, "grad_norm": 1.5457748538414013, "learning_rate": 1.6292700414198504e-07, "loss": 0.5859, "step": 9019 }, { "epoch": 0.92, "grad_norm": 1.9879197735211236, "learning_rate": 1.6250857013209997e-07, "loss": 0.6373, "step": 9020 }, { "epoch": 0.92, "grad_norm": 1.3702532388895936, "learning_rate": 1.620906652543952e-07, "loss": 0.6464, "step": 9021 }, { "epoch": 0.92, "grad_norm": 1.5264104110330101, "learning_rate": 1.6167328955458194e-07, "loss": 0.6299, "step": 9022 }, { "epoch": 0.92, "grad_norm": 1.8337654959478455, "learning_rate": 1.612564430783131e-07, "loss": 0.751, "step": 9023 }, { "epoch": 0.92, "grad_norm": 1.5242772663183464, "learning_rate": 1.6084012587118282e-07, "loss": 0.6379, "step": 9024 }, { "epoch": 0.92, "grad_norm": 1.410945824012768, "learning_rate": 1.6042433797873015e-07, "loss": 0.6628, "step": 9025 }, { "epoch": 0.92, "grad_norm": 1.5240033115221965, "learning_rate": 1.6000907944643428e-07, "loss": 0.6201, "step": 9026 }, { "epoch": 0.92, "grad_norm": 1.3664199113003137, "learning_rate": 1.595943503197167e-07, "loss": 0.6895, "step": 9027 }, { "epoch": 0.92, "grad_norm": 1.5451521940639643, "learning_rate": 1.5918015064394053e-07, "loss": 0.5761, "step": 9028 }, { "epoch": 0.92, "grad_norm": 1.4687985978153373, "learning_rate": 1.587664804644129e-07, "loss": 0.5911, "step": 9029 }, { "epoch": 0.92, "grad_norm": 1.3739451364027089, "learning_rate": 1.583533398263798e-07, "loss": 0.6214, "step": 9030 }, { "epoch": 0.92, "grad_norm": 1.519536525499759, "learning_rate": 1.579407287750323e-07, "loss": 0.5849, "step": 9031 }, { "epoch": 0.92, "grad_norm": 1.720374268076885, "learning_rate": 1.5752864735550322e-07, "loss": 0.6254, "step": 9032 }, { "epoch": 0.92, "grad_norm": 1.5646783531656991, "learning_rate": 1.5711709561286536e-07, "loss": 0.641, "step": 9033 }, { "epoch": 0.92, "grad_norm": 1.4104669837300825, "learning_rate": 1.5670607359213442e-07, "loss": 0.5562, "step": 9034 }, { "epoch": 0.92, "grad_norm": 1.6630735715169986, "learning_rate": 1.562955813382705e-07, "loss": 0.7033, "step": 9035 }, { "epoch": 0.92, "grad_norm": 1.516195696614286, "learning_rate": 1.5588561889617326e-07, "loss": 0.5697, "step": 9036 }, { "epoch": 0.92, "grad_norm": 1.376924387126675, "learning_rate": 1.5547618631068406e-07, "loss": 0.567, "step": 9037 }, { "epoch": 0.92, "grad_norm": 1.5703725161092446, "learning_rate": 1.5506728362658708e-07, "loss": 0.6544, "step": 9038 }, { "epoch": 0.92, "grad_norm": 1.6458364453161516, "learning_rate": 1.5465891088860985e-07, "loss": 0.7157, "step": 9039 }, { "epoch": 0.92, "grad_norm": 1.2723956750872125, "learning_rate": 1.542510681414211e-07, "loss": 0.5371, "step": 9040 }, { "epoch": 0.92, "grad_norm": 1.4413058286722982, "learning_rate": 1.5384375542963014e-07, "loss": 0.5459, "step": 9041 }, { "epoch": 0.92, "grad_norm": 1.6885801075093452, "learning_rate": 1.5343697279778968e-07, "loss": 0.7616, "step": 9042 }, { "epoch": 0.92, "grad_norm": 1.6360913985821055, "learning_rate": 1.5303072029039466e-07, "loss": 0.6135, "step": 9043 }, { "epoch": 0.92, "grad_norm": 1.4123201831701737, "learning_rate": 1.5262499795188125e-07, "loss": 0.6134, "step": 9044 }, { "epoch": 0.92, "grad_norm": 1.4560368154934915, "learning_rate": 1.5221980582662777e-07, "loss": 0.6144, "step": 9045 }, { "epoch": 0.92, "grad_norm": 1.440437083912585, "learning_rate": 1.5181514395895657e-07, "loss": 0.5446, "step": 9046 }, { "epoch": 0.92, "grad_norm": 1.517188341158572, "learning_rate": 1.5141101239312728e-07, "loss": 0.5413, "step": 9047 }, { "epoch": 0.92, "grad_norm": 1.4691263649648323, "learning_rate": 1.5100741117334562e-07, "loss": 0.653, "step": 9048 }, { "epoch": 0.92, "grad_norm": 1.5270333417263355, "learning_rate": 1.506043403437596e-07, "loss": 0.6407, "step": 9049 }, { "epoch": 0.92, "grad_norm": 1.6241031306935327, "learning_rate": 1.5020179994845564e-07, "loss": 0.7281, "step": 9050 }, { "epoch": 0.92, "grad_norm": 1.902368658559564, "learning_rate": 1.4979979003146572e-07, "loss": 0.6283, "step": 9051 }, { "epoch": 0.92, "grad_norm": 1.5842964728007123, "learning_rate": 1.493983106367608e-07, "loss": 0.617, "step": 9052 }, { "epoch": 0.92, "grad_norm": 1.45185894085046, "learning_rate": 1.4899736180825631e-07, "loss": 0.6689, "step": 9053 }, { "epoch": 0.92, "grad_norm": 1.669838207364021, "learning_rate": 1.485969435898088e-07, "loss": 0.7093, "step": 9054 }, { "epoch": 0.92, "grad_norm": 1.6050442644048382, "learning_rate": 1.4819705602521607e-07, "loss": 0.6617, "step": 9055 }, { "epoch": 0.92, "grad_norm": 1.6236501625659627, "learning_rate": 1.477976991582186e-07, "loss": 0.668, "step": 9056 }, { "epoch": 0.92, "grad_norm": 1.8251509422991676, "learning_rate": 1.4739887303249877e-07, "loss": 0.6472, "step": 9057 }, { "epoch": 0.92, "grad_norm": 1.5434247029037753, "learning_rate": 1.4700057769168052e-07, "loss": 0.5493, "step": 9058 }, { "epoch": 0.93, "grad_norm": 1.4900791020360296, "learning_rate": 1.4660281317933013e-07, "loss": 0.551, "step": 9059 }, { "epoch": 0.93, "grad_norm": 1.4040181700649652, "learning_rate": 1.4620557953895665e-07, "loss": 0.6545, "step": 9060 }, { "epoch": 0.93, "grad_norm": 1.4236003343614485, "learning_rate": 1.4580887681400812e-07, "loss": 0.6446, "step": 9061 }, { "epoch": 0.93, "grad_norm": 1.527188319633793, "learning_rate": 1.454127050478782e-07, "loss": 0.682, "step": 9062 }, { "epoch": 0.93, "grad_norm": 1.473601472683252, "learning_rate": 1.4501706428389995e-07, "loss": 0.6882, "step": 9063 }, { "epoch": 0.93, "grad_norm": 1.5445297038421293, "learning_rate": 1.4462195456534878e-07, "loss": 0.5606, "step": 9064 }, { "epoch": 0.93, "grad_norm": 1.487019855804535, "learning_rate": 1.4422737593544455e-07, "loss": 0.7248, "step": 9065 }, { "epoch": 0.93, "grad_norm": 2.7508706866134234, "learning_rate": 1.4383332843734388e-07, "loss": 0.6479, "step": 9066 }, { "epoch": 0.93, "grad_norm": 1.5838520258031152, "learning_rate": 1.4343981211414947e-07, "loss": 0.5184, "step": 9067 }, { "epoch": 0.93, "grad_norm": 1.4666794344474092, "learning_rate": 1.4304682700890582e-07, "loss": 0.6226, "step": 9068 }, { "epoch": 0.93, "grad_norm": 1.4075915488858801, "learning_rate": 1.426543731645974e-07, "loss": 0.642, "step": 9069 }, { "epoch": 0.93, "grad_norm": 1.526809109084442, "learning_rate": 1.42262450624151e-07, "loss": 0.6741, "step": 9070 }, { "epoch": 0.93, "grad_norm": 1.6762839532835445, "learning_rate": 1.4187105943043622e-07, "loss": 0.5545, "step": 9071 }, { "epoch": 0.93, "grad_norm": 1.7558448137234108, "learning_rate": 1.4148019962626326e-07, "loss": 0.6508, "step": 9072 }, { "epoch": 0.93, "grad_norm": 1.459255562594741, "learning_rate": 1.410898712543862e-07, "loss": 0.5864, "step": 9073 }, { "epoch": 0.93, "grad_norm": 1.3828492123374754, "learning_rate": 1.4070007435749932e-07, "loss": 0.6259, "step": 9074 }, { "epoch": 0.93, "grad_norm": 1.4906898907091979, "learning_rate": 1.4031080897823847e-07, "loss": 0.6398, "step": 9075 }, { "epoch": 0.93, "grad_norm": 1.8804312503972682, "learning_rate": 1.3992207515918344e-07, "loss": 0.6431, "step": 9076 }, { "epoch": 0.93, "grad_norm": 1.4676085588691474, "learning_rate": 1.3953387294285304e-07, "loss": 0.6725, "step": 9077 }, { "epoch": 0.93, "grad_norm": 1.4601938633009737, "learning_rate": 1.3914620237170994e-07, "loss": 0.6122, "step": 9078 }, { "epoch": 0.93, "grad_norm": 1.4853072836461678, "learning_rate": 1.3875906348815914e-07, "loss": 0.5815, "step": 9079 }, { "epoch": 0.93, "grad_norm": 1.4996905275149548, "learning_rate": 1.3837245633454512e-07, "loss": 0.6762, "step": 9080 }, { "epoch": 0.93, "grad_norm": 1.3557643854132522, "learning_rate": 1.3798638095315564e-07, "loss": 0.6409, "step": 9081 }, { "epoch": 0.93, "grad_norm": 1.5711252069326924, "learning_rate": 1.3760083738622143e-07, "loss": 0.6666, "step": 9082 }, { "epoch": 0.93, "grad_norm": 1.4529230997292213, "learning_rate": 1.3721582567591207e-07, "loss": 0.6744, "step": 9083 }, { "epoch": 0.93, "grad_norm": 1.4659027929481925, "learning_rate": 1.3683134586434278e-07, "loss": 0.7259, "step": 9084 }, { "epoch": 0.93, "grad_norm": 1.5420988584571778, "learning_rate": 1.3644739799356765e-07, "loss": 0.6955, "step": 9085 }, { "epoch": 0.93, "grad_norm": 1.5985137849249074, "learning_rate": 1.3606398210558203e-07, "loss": 0.6577, "step": 9086 }, { "epoch": 0.93, "grad_norm": 1.503759555714863, "learning_rate": 1.3568109824232622e-07, "loss": 0.5947, "step": 9087 }, { "epoch": 0.93, "grad_norm": 1.5865145172386428, "learning_rate": 1.3529874644568174e-07, "loss": 0.6015, "step": 9088 }, { "epoch": 0.93, "grad_norm": 1.6144857350625805, "learning_rate": 1.3491692675746738e-07, "loss": 0.5872, "step": 9089 }, { "epoch": 0.93, "grad_norm": 2.6305464195313424, "learning_rate": 1.3453563921945024e-07, "loss": 0.6259, "step": 9090 }, { "epoch": 0.93, "grad_norm": 1.406512885168509, "learning_rate": 1.341548838733342e-07, "loss": 0.5626, "step": 9091 }, { "epoch": 0.93, "grad_norm": 1.5817885637822513, "learning_rate": 1.3377466076076762e-07, "loss": 0.6583, "step": 9092 }, { "epoch": 0.93, "grad_norm": 1.5299801420822399, "learning_rate": 1.3339496992334112e-07, "loss": 0.6034, "step": 9093 }, { "epoch": 0.93, "grad_norm": 1.6591208197386123, "learning_rate": 1.3301581140258368e-07, "loss": 0.702, "step": 9094 }, { "epoch": 0.93, "grad_norm": 2.5030467567577106, "learning_rate": 1.3263718523996884e-07, "loss": 0.6418, "step": 9095 }, { "epoch": 0.93, "grad_norm": 1.3786523123443937, "learning_rate": 1.3225909147691285e-07, "loss": 0.6824, "step": 9096 }, { "epoch": 0.93, "grad_norm": 1.3635781412373889, "learning_rate": 1.318815301547699e-07, "loss": 0.7021, "step": 9097 }, { "epoch": 0.93, "grad_norm": 4.169878931142314, "learning_rate": 1.315045013148397e-07, "loss": 0.7333, "step": 9098 }, { "epoch": 0.93, "grad_norm": 1.291580371214496, "learning_rate": 1.311280049983621e-07, "loss": 0.5876, "step": 9099 }, { "epoch": 0.93, "grad_norm": 1.5652172890375782, "learning_rate": 1.3075204124651853e-07, "loss": 0.5965, "step": 9100 }, { "epoch": 0.93, "grad_norm": 1.539040534351086, "learning_rate": 1.3037661010043168e-07, "loss": 0.5986, "step": 9101 }, { "epoch": 0.93, "grad_norm": 1.415283251219546, "learning_rate": 1.3000171160116926e-07, "loss": 0.6782, "step": 9102 }, { "epoch": 0.93, "grad_norm": 1.470720268005382, "learning_rate": 1.2962734578973568e-07, "loss": 0.7185, "step": 9103 }, { "epoch": 0.93, "grad_norm": 2.1660097569476315, "learning_rate": 1.2925351270708098e-07, "loss": 0.686, "step": 9104 }, { "epoch": 0.93, "grad_norm": 1.4159344642796632, "learning_rate": 1.2888021239409465e-07, "loss": 0.6234, "step": 9105 }, { "epoch": 0.93, "grad_norm": 1.5876417350478793, "learning_rate": 1.2850744489160906e-07, "loss": 0.7126, "step": 9106 }, { "epoch": 0.93, "grad_norm": 1.4811795985783747, "learning_rate": 1.2813521024039988e-07, "loss": 0.6536, "step": 9107 }, { "epoch": 0.93, "grad_norm": 1.6825285865178556, "learning_rate": 1.2776350848118014e-07, "loss": 0.6452, "step": 9108 }, { "epoch": 0.93, "grad_norm": 1.608812437986994, "learning_rate": 1.273923396546084e-07, "loss": 0.61, "step": 9109 }, { "epoch": 0.93, "grad_norm": 1.45110995198195, "learning_rate": 1.2702170380128331e-07, "loss": 0.5608, "step": 9110 }, { "epoch": 0.93, "grad_norm": 1.3806451330997658, "learning_rate": 1.2665160096174635e-07, "loss": 0.7323, "step": 9111 }, { "epoch": 0.93, "grad_norm": 1.5367271734049188, "learning_rate": 1.2628203117647897e-07, "loss": 0.7602, "step": 9112 }, { "epoch": 0.93, "grad_norm": 1.5104832859286907, "learning_rate": 1.2591299448590612e-07, "loss": 0.6619, "step": 9113 }, { "epoch": 0.93, "grad_norm": 1.618319729305726, "learning_rate": 1.2554449093039267e-07, "loss": 0.7007, "step": 9114 }, { "epoch": 0.93, "grad_norm": 1.671346942928796, "learning_rate": 1.2517652055024644e-07, "loss": 0.659, "step": 9115 }, { "epoch": 0.93, "grad_norm": 1.3584698778847895, "learning_rate": 1.248090833857174e-07, "loss": 0.6645, "step": 9116 }, { "epoch": 0.93, "grad_norm": 1.634024144949674, "learning_rate": 1.2444217947699566e-07, "loss": 0.7222, "step": 9117 }, { "epoch": 0.93, "grad_norm": 1.5080813679309404, "learning_rate": 1.240758088642141e-07, "loss": 0.6331, "step": 9118 }, { "epoch": 0.93, "grad_norm": 1.5047117991738086, "learning_rate": 1.2370997158744569e-07, "loss": 0.6512, "step": 9119 }, { "epoch": 0.93, "grad_norm": 1.3317032863601277, "learning_rate": 1.2334466768670727e-07, "loss": 0.5776, "step": 9120 }, { "epoch": 0.93, "grad_norm": 1.6080633665866564, "learning_rate": 1.2297989720195746e-07, "loss": 0.6102, "step": 9121 }, { "epoch": 0.93, "grad_norm": 1.4063259458153543, "learning_rate": 1.2261566017309322e-07, "loss": 0.6251, "step": 9122 }, { "epoch": 0.93, "grad_norm": 1.4828980716488687, "learning_rate": 1.2225195663995605e-07, "loss": 0.5663, "step": 9123 }, { "epoch": 0.93, "grad_norm": 1.4752239059223509, "learning_rate": 1.2188878664232907e-07, "loss": 0.6586, "step": 9124 }, { "epoch": 0.93, "grad_norm": 1.4895616429498086, "learning_rate": 1.2152615021993553e-07, "loss": 0.5815, "step": 9125 }, { "epoch": 0.93, "grad_norm": 1.4743414742474064, "learning_rate": 1.2116404741244204e-07, "loss": 0.5864, "step": 9126 }, { "epoch": 0.93, "grad_norm": 1.337698922161406, "learning_rate": 1.2080247825945579e-07, "loss": 0.6466, "step": 9127 }, { "epoch": 0.93, "grad_norm": 1.6481866285660036, "learning_rate": 1.2044144280052518e-07, "loss": 0.7024, "step": 9128 }, { "epoch": 0.93, "grad_norm": 2.412467228139299, "learning_rate": 1.200809410751408e-07, "loss": 0.6951, "step": 9129 }, { "epoch": 0.93, "grad_norm": 1.5710187885756624, "learning_rate": 1.1972097312273555e-07, "loss": 0.5809, "step": 9130 }, { "epoch": 0.93, "grad_norm": 1.685184742005442, "learning_rate": 1.19361538982683e-07, "loss": 0.6198, "step": 9131 }, { "epoch": 0.93, "grad_norm": 1.7027790431467928, "learning_rate": 1.1900263869429885e-07, "loss": 0.6913, "step": 9132 }, { "epoch": 0.93, "grad_norm": 1.6777174976731402, "learning_rate": 1.1864427229683895e-07, "loss": 0.7286, "step": 9133 }, { "epoch": 0.93, "grad_norm": 1.4346086989455116, "learning_rate": 1.1828643982950305e-07, "loss": 0.6063, "step": 9134 }, { "epoch": 0.93, "grad_norm": 1.6386351989468166, "learning_rate": 1.1792914133143207e-07, "loss": 0.6072, "step": 9135 }, { "epoch": 0.93, "grad_norm": 1.5408108486075571, "learning_rate": 1.1757237684170585e-07, "loss": 0.636, "step": 9136 }, { "epoch": 0.93, "grad_norm": 1.4182629488097955, "learning_rate": 1.1721614639934931e-07, "loss": 0.6249, "step": 9137 }, { "epoch": 0.93, "grad_norm": 1.7406897509453834, "learning_rate": 1.1686045004332736e-07, "loss": 0.6044, "step": 9138 }, { "epoch": 0.93, "grad_norm": 1.4959915527337928, "learning_rate": 1.1650528781254611e-07, "loss": 0.6269, "step": 9139 }, { "epoch": 0.93, "grad_norm": 1.6466077766217497, "learning_rate": 1.1615065974585449e-07, "loss": 0.6046, "step": 9140 }, { "epoch": 0.93, "grad_norm": 1.5218074619683173, "learning_rate": 1.1579656588204202e-07, "loss": 0.5947, "step": 9141 }, { "epoch": 0.93, "grad_norm": 1.6258632959006982, "learning_rate": 1.1544300625983884e-07, "loss": 0.5579, "step": 9142 }, { "epoch": 0.93, "grad_norm": 1.4597429641956385, "learning_rate": 1.1508998091791956e-07, "loss": 0.6246, "step": 9143 }, { "epoch": 0.93, "grad_norm": 1.4023503530565766, "learning_rate": 1.1473748989489775e-07, "loss": 0.5704, "step": 9144 }, { "epoch": 0.93, "grad_norm": 1.6383290155078383, "learning_rate": 1.1438553322932977e-07, "loss": 0.6764, "step": 9145 }, { "epoch": 0.93, "grad_norm": 1.3770503976337194, "learning_rate": 1.1403411095971262e-07, "loss": 0.5851, "step": 9146 }, { "epoch": 0.93, "grad_norm": 1.5013267588669965, "learning_rate": 1.1368322312448555e-07, "loss": 0.6603, "step": 9147 }, { "epoch": 0.93, "grad_norm": 1.4839432005789839, "learning_rate": 1.1333286976202951e-07, "loss": 0.6337, "step": 9148 }, { "epoch": 0.93, "grad_norm": 1.5193036226894001, "learning_rate": 1.1298305091066664e-07, "loss": 0.6906, "step": 9149 }, { "epoch": 0.93, "grad_norm": 1.4501975413093606, "learning_rate": 1.1263376660866076e-07, "loss": 0.566, "step": 9150 }, { "epoch": 0.93, "grad_norm": 1.4678453222347871, "learning_rate": 1.1228501689421689e-07, "loss": 0.5857, "step": 9151 }, { "epoch": 0.93, "grad_norm": 1.5254035248332114, "learning_rate": 1.1193680180548172e-07, "loss": 0.6358, "step": 9152 }, { "epoch": 0.93, "grad_norm": 1.542477645380156, "learning_rate": 1.1158912138054367e-07, "loss": 0.7532, "step": 9153 }, { "epoch": 0.93, "grad_norm": 1.7550799028060318, "learning_rate": 1.112419756574329e-07, "loss": 0.6994, "step": 9154 }, { "epoch": 0.93, "grad_norm": 1.6275448527193184, "learning_rate": 1.108953646741201e-07, "loss": 0.6871, "step": 9155 }, { "epoch": 0.93, "grad_norm": 1.510796943951617, "learning_rate": 1.1054928846851831e-07, "loss": 0.5913, "step": 9156 }, { "epoch": 0.94, "grad_norm": 1.5011533629551155, "learning_rate": 1.102037470784817e-07, "loss": 0.6176, "step": 9157 }, { "epoch": 0.94, "grad_norm": 1.5942154303092335, "learning_rate": 1.0985874054180723e-07, "loss": 0.6326, "step": 9158 }, { "epoch": 0.94, "grad_norm": 1.6056378516605572, "learning_rate": 1.095142688962314e-07, "loss": 0.5885, "step": 9159 }, { "epoch": 0.94, "grad_norm": 1.2591207817072154, "learning_rate": 1.091703321794324e-07, "loss": 0.5599, "step": 9160 }, { "epoch": 0.94, "grad_norm": 1.6127968218935136, "learning_rate": 1.0882693042903125e-07, "loss": 0.683, "step": 9161 }, { "epoch": 0.94, "grad_norm": 1.4340598885737001, "learning_rate": 1.0848406368258957e-07, "loss": 0.6802, "step": 9162 }, { "epoch": 0.94, "grad_norm": 1.515654772235277, "learning_rate": 1.0814173197761179e-07, "loss": 0.5317, "step": 9163 }, { "epoch": 0.94, "grad_norm": 1.428359542791679, "learning_rate": 1.0779993535154076e-07, "loss": 0.5945, "step": 9164 }, { "epoch": 0.94, "grad_norm": 1.3950467290613857, "learning_rate": 1.0745867384176378e-07, "loss": 0.6584, "step": 9165 }, { "epoch": 0.94, "grad_norm": 1.4896634979649557, "learning_rate": 1.0711794748560878e-07, "loss": 0.7436, "step": 9166 }, { "epoch": 0.94, "grad_norm": 1.6769424875252703, "learning_rate": 1.0677775632034371e-07, "loss": 0.6836, "step": 9167 }, { "epoch": 0.94, "grad_norm": 3.235251578557978, "learning_rate": 1.0643810038318102e-07, "loss": 0.6632, "step": 9168 }, { "epoch": 0.94, "grad_norm": 1.6402422770185026, "learning_rate": 1.0609897971127103e-07, "loss": 0.5892, "step": 9169 }, { "epoch": 0.94, "grad_norm": 1.3838556586934538, "learning_rate": 1.0576039434170849e-07, "loss": 0.6431, "step": 9170 }, { "epoch": 0.94, "grad_norm": 1.6043550542875176, "learning_rate": 1.0542234431152709e-07, "loss": 0.6681, "step": 9171 }, { "epoch": 0.94, "grad_norm": 1.370580412032326, "learning_rate": 1.0508482965770506e-07, "loss": 0.6371, "step": 9172 }, { "epoch": 0.94, "grad_norm": 1.3793337142926372, "learning_rate": 1.0474785041715951e-07, "loss": 0.6318, "step": 9173 }, { "epoch": 0.94, "grad_norm": 1.5997743208452029, "learning_rate": 1.0441140662674931e-07, "loss": 0.6662, "step": 9174 }, { "epoch": 0.94, "grad_norm": 1.4928453201416412, "learning_rate": 1.0407549832327501e-07, "loss": 0.6276, "step": 9175 }, { "epoch": 0.94, "grad_norm": 1.5338083121390982, "learning_rate": 1.037401255434789e-07, "loss": 0.703, "step": 9176 }, { "epoch": 0.94, "grad_norm": 1.5098215702456643, "learning_rate": 1.0340528832404551e-07, "loss": 0.5768, "step": 9177 }, { "epoch": 0.94, "grad_norm": 2.7072846639062593, "learning_rate": 1.0307098670159943e-07, "loss": 0.6411, "step": 9178 }, { "epoch": 0.94, "grad_norm": 1.5110487846970764, "learning_rate": 1.0273722071270586e-07, "loss": 0.6315, "step": 9179 }, { "epoch": 0.94, "grad_norm": 1.5849364219685147, "learning_rate": 1.024039903938745e-07, "loss": 0.6284, "step": 9180 }, { "epoch": 0.94, "grad_norm": 1.6171099415095098, "learning_rate": 1.020712957815534e-07, "loss": 0.6072, "step": 9181 }, { "epoch": 0.94, "grad_norm": 1.4892975252102123, "learning_rate": 1.0173913691213344e-07, "loss": 0.5854, "step": 9182 }, { "epoch": 0.94, "grad_norm": 1.5233049918511132, "learning_rate": 1.0140751382194724e-07, "loss": 0.6022, "step": 9183 }, { "epoch": 0.94, "grad_norm": 1.540665351083934, "learning_rate": 1.0107642654726691e-07, "loss": 0.5912, "step": 9184 }, { "epoch": 0.94, "grad_norm": 1.56264549166859, "learning_rate": 1.0074587512430844e-07, "loss": 0.6481, "step": 9185 }, { "epoch": 0.94, "grad_norm": 1.7246461883134296, "learning_rate": 1.0041585958922739e-07, "loss": 0.6686, "step": 9186 }, { "epoch": 0.94, "grad_norm": 1.528330736202911, "learning_rate": 1.0008637997812265e-07, "loss": 0.6478, "step": 9187 }, { "epoch": 0.94, "grad_norm": 1.6239531857032827, "learning_rate": 9.97574363270315e-08, "loss": 0.611, "step": 9188 }, { "epoch": 0.94, "grad_norm": 1.5407302578090987, "learning_rate": 9.942902867193516e-08, "loss": 0.5906, "step": 9189 }, { "epoch": 0.94, "grad_norm": 1.4573373577440318, "learning_rate": 9.91011570487549e-08, "loss": 0.5201, "step": 9190 }, { "epoch": 0.94, "grad_norm": 1.6500222074773983, "learning_rate": 9.87738214933548e-08, "loss": 0.7913, "step": 9191 }, { "epoch": 0.94, "grad_norm": 1.687256515178071, "learning_rate": 9.844702204153844e-08, "loss": 0.7188, "step": 9192 }, { "epoch": 0.94, "grad_norm": 1.4780033010418057, "learning_rate": 9.812075872905114e-08, "loss": 0.585, "step": 9193 }, { "epoch": 0.94, "grad_norm": 1.5121634838052203, "learning_rate": 9.779503159158211e-08, "loss": 0.6233, "step": 9194 }, { "epoch": 0.94, "grad_norm": 1.4524839939047802, "learning_rate": 9.746984066475728e-08, "loss": 0.5997, "step": 9195 }, { "epoch": 0.94, "grad_norm": 1.5447534477251794, "learning_rate": 9.71451859841488e-08, "loss": 0.6428, "step": 9196 }, { "epoch": 0.94, "grad_norm": 1.5496651840401006, "learning_rate": 9.682106758526654e-08, "loss": 0.6246, "step": 9197 }, { "epoch": 0.94, "grad_norm": 1.4067750869232674, "learning_rate": 9.64974855035633e-08, "loss": 0.5917, "step": 9198 }, { "epoch": 0.94, "grad_norm": 1.647989347383343, "learning_rate": 9.617443977443353e-08, "loss": 0.6961, "step": 9199 }, { "epoch": 0.94, "grad_norm": 1.5297116105760837, "learning_rate": 9.585193043321172e-08, "loss": 0.6155, "step": 9200 }, { "epoch": 0.94, "grad_norm": 1.4245790957419093, "learning_rate": 9.552995751517524e-08, "loss": 0.5726, "step": 9201 }, { "epoch": 0.94, "grad_norm": 1.3778722312214151, "learning_rate": 9.520852105554146e-08, "loss": 0.6186, "step": 9202 }, { "epoch": 0.94, "grad_norm": 1.7337154105691595, "learning_rate": 9.488762108946891e-08, "loss": 0.7183, "step": 9203 }, { "epoch": 0.94, "grad_norm": 1.593924568813525, "learning_rate": 9.456725765205898e-08, "loss": 0.6026, "step": 9204 }, { "epoch": 0.94, "grad_norm": 1.5445130103736402, "learning_rate": 9.424743077835363e-08, "loss": 0.6261, "step": 9205 }, { "epoch": 0.94, "grad_norm": 1.6361651267503896, "learning_rate": 9.392814050333599e-08, "loss": 0.6093, "step": 9206 }, { "epoch": 0.94, "grad_norm": 1.7895386777903457, "learning_rate": 9.360938686193032e-08, "loss": 0.7046, "step": 9207 }, { "epoch": 0.94, "grad_norm": 1.5617456790599213, "learning_rate": 9.329116988900211e-08, "loss": 0.6816, "step": 9208 }, { "epoch": 0.94, "grad_norm": 1.3975236919384175, "learning_rate": 9.297348961935848e-08, "loss": 0.6141, "step": 9209 }, { "epoch": 0.94, "grad_norm": 1.556690043817484, "learning_rate": 9.265634608774832e-08, "loss": 0.7342, "step": 9210 }, { "epoch": 0.94, "grad_norm": 1.3791155081949715, "learning_rate": 9.233973932886108e-08, "loss": 0.5052, "step": 9211 }, { "epoch": 0.94, "grad_norm": 1.56551428625885, "learning_rate": 9.202366937732687e-08, "loss": 0.6354, "step": 9212 }, { "epoch": 0.94, "grad_norm": 1.4170453726157328, "learning_rate": 9.170813626771857e-08, "loss": 0.6571, "step": 9213 }, { "epoch": 0.94, "grad_norm": 1.5708141173449743, "learning_rate": 9.139314003454969e-08, "loss": 0.6257, "step": 9214 }, { "epoch": 0.94, "grad_norm": 1.4598806230071484, "learning_rate": 9.107868071227544e-08, "loss": 0.6342, "step": 9215 }, { "epoch": 0.94, "grad_norm": 1.499703895749498, "learning_rate": 9.076475833529164e-08, "loss": 0.6421, "step": 9216 }, { "epoch": 0.94, "grad_norm": 1.4375953160415955, "learning_rate": 9.045137293793416e-08, "loss": 0.6357, "step": 9217 }, { "epoch": 0.94, "grad_norm": 1.3929596378622477, "learning_rate": 9.013852455448335e-08, "loss": 0.644, "step": 9218 }, { "epoch": 0.94, "grad_norm": 1.5221867147007166, "learning_rate": 8.982621321915852e-08, "loss": 0.5788, "step": 9219 }, { "epoch": 0.94, "grad_norm": 1.4609198011031899, "learning_rate": 8.951443896612122e-08, "loss": 0.6399, "step": 9220 }, { "epoch": 0.94, "grad_norm": 1.2277530912966965, "learning_rate": 8.920320182947251e-08, "loss": 0.5913, "step": 9221 }, { "epoch": 0.94, "grad_norm": 2.003348200335067, "learning_rate": 8.889250184325681e-08, "loss": 0.682, "step": 9222 }, { "epoch": 0.94, "grad_norm": 1.6559259081906268, "learning_rate": 8.858233904145863e-08, "loss": 0.7072, "step": 9223 }, { "epoch": 0.94, "grad_norm": 1.4896229236254868, "learning_rate": 8.82727134580047e-08, "loss": 0.6115, "step": 9224 }, { "epoch": 0.94, "grad_norm": 1.4309810259220146, "learning_rate": 8.796362512676238e-08, "loss": 0.6579, "step": 9225 }, { "epoch": 0.94, "grad_norm": 1.5680230992750661, "learning_rate": 8.765507408153906e-08, "loss": 0.5792, "step": 9226 }, { "epoch": 0.94, "grad_norm": 1.6291215353498598, "learning_rate": 8.734706035608553e-08, "loss": 0.6783, "step": 9227 }, { "epoch": 0.94, "grad_norm": 1.414574919149123, "learning_rate": 8.703958398409206e-08, "loss": 0.5994, "step": 9228 }, { "epoch": 0.94, "grad_norm": 1.5262895585005551, "learning_rate": 8.673264499919177e-08, "loss": 0.6937, "step": 9229 }, { "epoch": 0.94, "grad_norm": 1.3776903496829882, "learning_rate": 8.642624343495776e-08, "loss": 0.5598, "step": 9230 }, { "epoch": 0.94, "grad_norm": 1.5891272567569665, "learning_rate": 8.612037932490436e-08, "loss": 0.6072, "step": 9231 }, { "epoch": 0.94, "grad_norm": 1.6751908311363115, "learning_rate": 8.581505270248758e-08, "loss": 0.5907, "step": 9232 }, { "epoch": 0.94, "grad_norm": 1.5109934655434225, "learning_rate": 8.55102636011046e-08, "loss": 0.6055, "step": 9233 }, { "epoch": 0.94, "grad_norm": 1.555217153295929, "learning_rate": 8.520601205409428e-08, "loss": 0.7198, "step": 9234 }, { "epoch": 0.94, "grad_norm": 1.4262291961260782, "learning_rate": 8.490229809473505e-08, "loss": 0.6858, "step": 9235 }, { "epoch": 0.94, "grad_norm": 1.544104744895338, "learning_rate": 8.459912175624862e-08, "loss": 0.5551, "step": 9236 }, { "epoch": 0.94, "grad_norm": 1.7729104041699775, "learning_rate": 8.429648307179572e-08, "loss": 0.6021, "step": 9237 }, { "epoch": 0.94, "grad_norm": 1.5497211399312285, "learning_rate": 8.399438207448096e-08, "loss": 0.6001, "step": 9238 }, { "epoch": 0.94, "grad_norm": 1.712036926380521, "learning_rate": 8.369281879734737e-08, "loss": 0.6474, "step": 9239 }, { "epoch": 0.94, "grad_norm": 1.6678998123551612, "learning_rate": 8.339179327338076e-08, "loss": 0.5949, "step": 9240 }, { "epoch": 0.94, "grad_norm": 1.3766855864369059, "learning_rate": 8.309130553550815e-08, "loss": 0.5878, "step": 9241 }, { "epoch": 0.94, "grad_norm": 1.6157211508024683, "learning_rate": 8.279135561659601e-08, "loss": 0.7801, "step": 9242 }, { "epoch": 0.94, "grad_norm": 1.6044068637613742, "learning_rate": 8.249194354945533e-08, "loss": 0.6936, "step": 9243 }, { "epoch": 0.94, "grad_norm": 1.7245948762947165, "learning_rate": 8.219306936683491e-08, "loss": 0.5368, "step": 9244 }, { "epoch": 0.94, "grad_norm": 1.4925765967746472, "learning_rate": 8.189473310142638e-08, "loss": 0.6217, "step": 9245 }, { "epoch": 0.94, "grad_norm": 1.58116124950654, "learning_rate": 8.159693478586139e-08, "loss": 0.6618, "step": 9246 }, { "epoch": 0.94, "grad_norm": 1.519092147582634, "learning_rate": 8.129967445271558e-08, "loss": 0.6162, "step": 9247 }, { "epoch": 0.94, "grad_norm": 2.084517275537521, "learning_rate": 8.100295213450237e-08, "loss": 0.5654, "step": 9248 }, { "epoch": 0.94, "grad_norm": 1.364108070397601, "learning_rate": 8.070676786367749e-08, "loss": 0.6107, "step": 9249 }, { "epoch": 0.94, "grad_norm": 1.7444770317981537, "learning_rate": 8.041112167263832e-08, "loss": 0.6869, "step": 9250 }, { "epoch": 0.94, "grad_norm": 1.4077916755740976, "learning_rate": 8.011601359372345e-08, "loss": 0.6007, "step": 9251 }, { "epoch": 0.94, "grad_norm": 1.2868711783521978, "learning_rate": 7.982144365921208e-08, "loss": 0.5382, "step": 9252 }, { "epoch": 0.94, "grad_norm": 1.5165119174858912, "learning_rate": 7.952741190132452e-08, "loss": 0.6527, "step": 9253 }, { "epoch": 0.94, "grad_norm": 1.66927540527606, "learning_rate": 7.923391835222227e-08, "loss": 0.5862, "step": 9254 }, { "epoch": 0.95, "grad_norm": 1.6496879817417982, "learning_rate": 7.894096304400911e-08, "loss": 0.6337, "step": 9255 }, { "epoch": 0.95, "grad_norm": 1.3183477911237174, "learning_rate": 7.864854600872718e-08, "loss": 0.5328, "step": 9256 }, { "epoch": 0.95, "grad_norm": 1.554978730149234, "learning_rate": 7.835666727836367e-08, "loss": 0.6313, "step": 9257 }, { "epoch": 0.95, "grad_norm": 1.4989921891264464, "learning_rate": 7.806532688484302e-08, "loss": 0.6514, "step": 9258 }, { "epoch": 0.95, "grad_norm": 1.3749036378452635, "learning_rate": 7.777452486003256e-08, "loss": 0.5933, "step": 9259 }, { "epoch": 0.95, "grad_norm": 1.3955592049873418, "learning_rate": 7.748426123574126e-08, "loss": 0.6002, "step": 9260 }, { "epoch": 0.95, "grad_norm": 1.4972992528640547, "learning_rate": 7.719453604371874e-08, "loss": 0.7346, "step": 9261 }, { "epoch": 0.95, "grad_norm": 1.4269039541954585, "learning_rate": 7.690534931565519e-08, "loss": 0.5392, "step": 9262 }, { "epoch": 0.95, "grad_norm": 1.561017259266783, "learning_rate": 7.661670108318197e-08, "loss": 0.6867, "step": 9263 }, { "epoch": 0.95, "grad_norm": 1.3799499547100353, "learning_rate": 7.632859137787329e-08, "loss": 0.6304, "step": 9264 }, { "epoch": 0.95, "grad_norm": 1.4733447034489424, "learning_rate": 7.604102023124116e-08, "loss": 0.6214, "step": 9265 }, { "epoch": 0.95, "grad_norm": 1.3738246428198637, "learning_rate": 7.575398767474208e-08, "loss": 0.6062, "step": 9266 }, { "epoch": 0.95, "grad_norm": 1.6544132252680723, "learning_rate": 7.546749373977091e-08, "loss": 0.6307, "step": 9267 }, { "epoch": 0.95, "grad_norm": 1.4581865895851185, "learning_rate": 7.518153845766541e-08, "loss": 0.6675, "step": 9268 }, { "epoch": 0.95, "grad_norm": 1.4041582569059878, "learning_rate": 7.489612185970385e-08, "loss": 0.6829, "step": 9269 }, { "epoch": 0.95, "grad_norm": 1.6771908758450187, "learning_rate": 7.461124397710518e-08, "loss": 0.6596, "step": 9270 }, { "epoch": 0.95, "grad_norm": 1.6133179575545158, "learning_rate": 7.432690484103055e-08, "loss": 0.69, "step": 9271 }, { "epoch": 0.95, "grad_norm": 1.4859859894474656, "learning_rate": 7.404310448258068e-08, "loss": 0.6627, "step": 9272 }, { "epoch": 0.95, "grad_norm": 1.5229704667647264, "learning_rate": 7.375984293279792e-08, "loss": 0.6374, "step": 9273 }, { "epoch": 0.95, "grad_norm": 1.535088185709902, "learning_rate": 7.34771202226664e-08, "loss": 0.6539, "step": 9274 }, { "epoch": 0.95, "grad_norm": 1.6794383979176901, "learning_rate": 7.319493638311081e-08, "loss": 0.6759, "step": 9275 }, { "epoch": 0.95, "grad_norm": 1.6166635335391442, "learning_rate": 7.291329144499648e-08, "loss": 0.6015, "step": 9276 }, { "epoch": 0.95, "grad_norm": 1.4862592498411322, "learning_rate": 7.263218543912987e-08, "loss": 0.5297, "step": 9277 }, { "epoch": 0.95, "grad_norm": 1.546073570039631, "learning_rate": 7.235161839625971e-08, "loss": 0.6297, "step": 9278 }, { "epoch": 0.95, "grad_norm": 1.44644452923411, "learning_rate": 7.207159034707368e-08, "loss": 0.6566, "step": 9279 }, { "epoch": 0.95, "grad_norm": 1.4242104167950957, "learning_rate": 7.179210132220282e-08, "loss": 0.6604, "step": 9280 }, { "epoch": 0.95, "grad_norm": 1.7231363098932881, "learning_rate": 7.15131513522177e-08, "loss": 0.7224, "step": 9281 }, { "epoch": 0.95, "grad_norm": 1.680824758029011, "learning_rate": 7.123474046763001e-08, "loss": 0.6577, "step": 9282 }, { "epoch": 0.95, "grad_norm": 1.5963063964019997, "learning_rate": 7.095686869889317e-08, "loss": 0.7114, "step": 9283 }, { "epoch": 0.95, "grad_norm": 1.5891630837939639, "learning_rate": 7.067953607640066e-08, "loss": 0.6269, "step": 9284 }, { "epoch": 0.95, "grad_norm": 1.5606464745060618, "learning_rate": 7.040274263048763e-08, "loss": 0.6831, "step": 9285 }, { "epoch": 0.95, "grad_norm": 1.452204075604082, "learning_rate": 7.0126488391431e-08, "loss": 0.669, "step": 9286 }, { "epoch": 0.95, "grad_norm": 1.5220992183711848, "learning_rate": 6.985077338944656e-08, "loss": 0.6939, "step": 9287 }, { "epoch": 0.95, "grad_norm": 1.5289811148318055, "learning_rate": 6.957559765469301e-08, "loss": 0.5914, "step": 9288 }, { "epoch": 0.95, "grad_norm": 1.5109307907764835, "learning_rate": 6.93009612172707e-08, "loss": 0.6387, "step": 9289 }, { "epoch": 0.95, "grad_norm": 1.7808847443711442, "learning_rate": 6.902686410721781e-08, "loss": 0.6327, "step": 9290 }, { "epoch": 0.95, "grad_norm": 1.449018403253235, "learning_rate": 6.875330635451649e-08, "loss": 0.6683, "step": 9291 }, { "epoch": 0.95, "grad_norm": 1.403013459882213, "learning_rate": 6.848028798908946e-08, "loss": 0.635, "step": 9292 }, { "epoch": 0.95, "grad_norm": 1.5851414439277973, "learning_rate": 6.820780904079838e-08, "loss": 0.7244, "step": 9293 }, { "epoch": 0.95, "grad_norm": 1.3559231581832953, "learning_rate": 6.793586953944886e-08, "loss": 0.6292, "step": 9294 }, { "epoch": 0.95, "grad_norm": 1.485815388244738, "learning_rate": 6.766446951478545e-08, "loss": 0.5865, "step": 9295 }, { "epoch": 0.95, "grad_norm": 1.5235912821602395, "learning_rate": 6.739360899649383e-08, "loss": 0.6888, "step": 9296 }, { "epoch": 0.95, "grad_norm": 1.4138209587310602, "learning_rate": 6.712328801420199e-08, "loss": 0.6174, "step": 9297 }, { "epoch": 0.95, "grad_norm": 1.6250945626701065, "learning_rate": 6.685350659747735e-08, "loss": 0.6069, "step": 9298 }, { "epoch": 0.95, "grad_norm": 1.2517617222818487, "learning_rate": 6.658426477582969e-08, "loss": 0.5487, "step": 9299 }, { "epoch": 0.95, "grad_norm": 1.6819901818499992, "learning_rate": 6.631556257870875e-08, "loss": 0.7207, "step": 9300 }, { "epoch": 0.95, "grad_norm": 1.4878035800027334, "learning_rate": 6.604740003550547e-08, "loss": 0.7005, "step": 9301 }, { "epoch": 0.95, "grad_norm": 1.661569656164749, "learning_rate": 6.57797771755514e-08, "loss": 0.4923, "step": 9302 }, { "epoch": 0.95, "grad_norm": 1.383364457712765, "learning_rate": 6.551269402812089e-08, "loss": 0.5239, "step": 9303 }, { "epoch": 0.95, "grad_norm": 1.8899501158660454, "learning_rate": 6.524615062242723e-08, "loss": 0.5993, "step": 9304 }, { "epoch": 0.95, "grad_norm": 1.4341524498288618, "learning_rate": 6.498014698762434e-08, "loss": 0.7249, "step": 9305 }, { "epoch": 0.95, "grad_norm": 1.392288395900637, "learning_rate": 6.471468315281005e-08, "loss": 0.6174, "step": 9306 }, { "epoch": 0.95, "grad_norm": 1.4883976623934825, "learning_rate": 6.444975914701945e-08, "loss": 0.5683, "step": 9307 }, { "epoch": 0.95, "grad_norm": 1.6178088775513486, "learning_rate": 6.418537499923105e-08, "loss": 0.6478, "step": 9308 }, { "epoch": 0.95, "grad_norm": 1.4142575310989831, "learning_rate": 6.392153073836394e-08, "loss": 0.613, "step": 9309 }, { "epoch": 0.95, "grad_norm": 1.5967239395034452, "learning_rate": 6.365822639327724e-08, "loss": 0.6556, "step": 9310 }, { "epoch": 0.95, "grad_norm": 1.602339325184464, "learning_rate": 6.33954619927718e-08, "loss": 0.6184, "step": 9311 }, { "epoch": 0.95, "grad_norm": 1.404830872167317, "learning_rate": 6.313323756558908e-08, "loss": 0.6429, "step": 9312 }, { "epoch": 0.95, "grad_norm": 1.541817448770246, "learning_rate": 6.287155314041171e-08, "loss": 0.6945, "step": 9313 }, { "epoch": 0.95, "grad_norm": 1.4728038025736763, "learning_rate": 6.261040874586344e-08, "loss": 0.6357, "step": 9314 }, { "epoch": 0.95, "grad_norm": 1.7317537348114014, "learning_rate": 6.234980441050809e-08, "loss": 0.6439, "step": 9315 }, { "epoch": 0.95, "grad_norm": 2.1167453626170745, "learning_rate": 6.208974016285063e-08, "loss": 0.5719, "step": 9316 }, { "epoch": 0.95, "grad_norm": 1.2824801588918158, "learning_rate": 6.183021603133888e-08, "loss": 0.641, "step": 9317 }, { "epoch": 0.95, "grad_norm": 1.6214904730429691, "learning_rate": 6.157123204435844e-08, "loss": 0.6465, "step": 9318 }, { "epoch": 0.95, "grad_norm": 1.4269560815283218, "learning_rate": 6.131278823023723e-08, "loss": 0.586, "step": 9319 }, { "epoch": 0.95, "grad_norm": 1.5015246458699771, "learning_rate": 6.105488461724596e-08, "loss": 0.5872, "step": 9320 }, { "epoch": 0.95, "grad_norm": 1.424100617886919, "learning_rate": 6.079752123359261e-08, "loss": 0.5792, "step": 9321 }, { "epoch": 0.95, "grad_norm": 1.4080940513377163, "learning_rate": 6.054069810742912e-08, "loss": 0.5704, "step": 9322 }, { "epoch": 0.95, "grad_norm": 1.647463561918846, "learning_rate": 6.028441526684691e-08, "loss": 0.6923, "step": 9323 }, { "epoch": 0.95, "grad_norm": 1.870294826819821, "learning_rate": 6.0028672739878e-08, "loss": 0.7254, "step": 9324 }, { "epoch": 0.95, "grad_norm": 2.497296045678718, "learning_rate": 5.977347055449723e-08, "loss": 0.6625, "step": 9325 }, { "epoch": 0.95, "grad_norm": 1.548182828994596, "learning_rate": 5.9518808738617816e-08, "loss": 0.6995, "step": 9326 }, { "epoch": 0.95, "grad_norm": 1.4572451157344986, "learning_rate": 5.926468732009527e-08, "loss": 0.6024, "step": 9327 }, { "epoch": 0.95, "grad_norm": 1.7243074758759687, "learning_rate": 5.901110632672624e-08, "loss": 0.6492, "step": 9328 }, { "epoch": 0.95, "grad_norm": 1.5737798895875799, "learning_rate": 5.8758065786247434e-08, "loss": 0.567, "step": 9329 }, { "epoch": 0.95, "grad_norm": 1.3979308690566947, "learning_rate": 5.85055657263367e-08, "loss": 0.6142, "step": 9330 }, { "epoch": 0.95, "grad_norm": 1.4282032361798083, "learning_rate": 5.825360617461362e-08, "loss": 0.5736, "step": 9331 }, { "epoch": 0.95, "grad_norm": 1.4081950286863187, "learning_rate": 5.8002187158637256e-08, "loss": 0.5958, "step": 9332 }, { "epoch": 0.95, "grad_norm": 1.4125866396526894, "learning_rate": 5.775130870590784e-08, "loss": 0.652, "step": 9333 }, { "epoch": 0.95, "grad_norm": 1.5311971871478498, "learning_rate": 5.7500970843867853e-08, "loss": 0.7068, "step": 9334 }, { "epoch": 0.95, "grad_norm": 2.686016045434846, "learning_rate": 5.7251173599898737e-08, "loss": 0.7213, "step": 9335 }, { "epoch": 0.95, "grad_norm": 1.4763172138224683, "learning_rate": 5.7001917001324184e-08, "loss": 0.5898, "step": 9336 }, { "epoch": 0.95, "grad_norm": 2.2491428025278353, "learning_rate": 5.6753201075407935e-08, "loss": 0.6993, "step": 9337 }, { "epoch": 0.95, "grad_norm": 1.4985046018297525, "learning_rate": 5.6505025849355465e-08, "loss": 0.6738, "step": 9338 }, { "epoch": 0.95, "grad_norm": 1.7505945399780134, "learning_rate": 5.625739135031172e-08, "loss": 0.6535, "step": 9339 }, { "epoch": 0.95, "grad_norm": 1.7436220538889873, "learning_rate": 5.6010297605363914e-08, "loss": 0.6806, "step": 9340 }, { "epoch": 0.95, "grad_norm": 1.4760530125834241, "learning_rate": 5.576374464153988e-08, "loss": 0.6323, "step": 9341 }, { "epoch": 0.95, "grad_norm": 1.3902263236729995, "learning_rate": 5.55177324858075e-08, "loss": 0.669, "step": 9342 }, { "epoch": 0.95, "grad_norm": 1.3408422536561049, "learning_rate": 5.527226116507578e-08, "loss": 0.5914, "step": 9343 }, { "epoch": 0.95, "grad_norm": 1.4149240150967517, "learning_rate": 5.5027330706194926e-08, "loss": 0.5621, "step": 9344 }, { "epoch": 0.95, "grad_norm": 1.5305267500725024, "learning_rate": 5.478294113595572e-08, "loss": 0.6001, "step": 9345 }, { "epoch": 0.95, "grad_norm": 1.5021996226874452, "learning_rate": 5.453909248109013e-08, "loss": 0.6343, "step": 9346 }, { "epoch": 0.95, "grad_norm": 1.2442835984980278, "learning_rate": 5.4295784768270685e-08, "loss": 0.5859, "step": 9347 }, { "epoch": 0.95, "grad_norm": 2.099216689985148, "learning_rate": 5.405301802411056e-08, "loss": 0.6775, "step": 9348 }, { "epoch": 0.95, "grad_norm": 1.473762008357598, "learning_rate": 5.38107922751635e-08, "loss": 0.6574, "step": 9349 }, { "epoch": 0.95, "grad_norm": 1.3814085694592253, "learning_rate": 5.356910754792555e-08, "loss": 0.6748, "step": 9350 }, { "epoch": 0.95, "grad_norm": 1.5084588797667264, "learning_rate": 5.332796386883221e-08, "loss": 0.6998, "step": 9351 }, { "epoch": 0.95, "grad_norm": 1.5748797880046312, "learning_rate": 5.308736126425962e-08, "loss": 0.6694, "step": 9352 }, { "epoch": 0.96, "grad_norm": 1.3664576735103033, "learning_rate": 5.284729976052561e-08, "loss": 0.5693, "step": 9353 }, { "epoch": 0.96, "grad_norm": 1.6100368043570255, "learning_rate": 5.260777938388806e-08, "loss": 0.7336, "step": 9354 }, { "epoch": 0.96, "grad_norm": 1.489312090389565, "learning_rate": 5.236880016054713e-08, "loss": 0.5324, "step": 9355 }, { "epoch": 0.96, "grad_norm": 1.279169231589291, "learning_rate": 5.213036211664191e-08, "loss": 0.5136, "step": 9356 }, { "epoch": 0.96, "grad_norm": 1.5280383155709014, "learning_rate": 5.18924652782532e-08, "loss": 0.6085, "step": 9357 }, { "epoch": 0.96, "grad_norm": 1.540643268435026, "learning_rate": 5.165510967140241e-08, "loss": 0.6587, "step": 9358 }, { "epoch": 0.96, "grad_norm": 1.5034583852741847, "learning_rate": 5.141829532205211e-08, "loss": 0.6958, "step": 9359 }, { "epoch": 0.96, "grad_norm": 1.6766985009494104, "learning_rate": 5.1182022256106025e-08, "loss": 0.6255, "step": 9360 }, { "epoch": 0.96, "grad_norm": 1.5943168512623174, "learning_rate": 5.094629049940625e-08, "loss": 0.5953, "step": 9361 }, { "epoch": 0.96, "grad_norm": 1.5430294341777837, "learning_rate": 5.071110007773938e-08, "loss": 0.6101, "step": 9362 }, { "epoch": 0.96, "grad_norm": 1.5730042507046562, "learning_rate": 5.047645101682985e-08, "loss": 0.6158, "step": 9363 }, { "epoch": 0.96, "grad_norm": 1.4436141572562793, "learning_rate": 5.0242343342344904e-08, "loss": 0.5164, "step": 9364 }, { "epoch": 0.96, "grad_norm": 1.9463501675948507, "learning_rate": 5.0008777079890155e-08, "loss": 0.6756, "step": 9365 }, { "epoch": 0.96, "grad_norm": 1.5974876750787963, "learning_rate": 4.977575225501463e-08, "loss": 0.6601, "step": 9366 }, { "epoch": 0.96, "grad_norm": 1.4619449084719731, "learning_rate": 4.9543268893206263e-08, "loss": 0.5241, "step": 9367 }, { "epoch": 0.96, "grad_norm": 1.4823859033386375, "learning_rate": 4.931132701989472e-08, "loss": 0.6127, "step": 9368 }, { "epoch": 0.96, "grad_norm": 1.6517726849769745, "learning_rate": 4.907992666045025e-08, "loss": 0.6716, "step": 9369 }, { "epoch": 0.96, "grad_norm": 1.260595283321709, "learning_rate": 4.884906784018373e-08, "loss": 0.5663, "step": 9370 }, { "epoch": 0.96, "grad_norm": 1.5427144918196483, "learning_rate": 4.861875058434607e-08, "loss": 0.6787, "step": 9371 }, { "epoch": 0.96, "grad_norm": 1.5786568942120809, "learning_rate": 4.8388974918130996e-08, "loss": 0.6624, "step": 9372 }, { "epoch": 0.96, "grad_norm": 1.4327615271254157, "learning_rate": 4.815974086667119e-08, "loss": 0.6541, "step": 9373 }, { "epoch": 0.96, "grad_norm": 1.4356568992707548, "learning_rate": 4.793104845504049e-08, "loss": 0.5971, "step": 9374 }, { "epoch": 0.96, "grad_norm": 1.7312984151925876, "learning_rate": 4.770289770825387e-08, "loss": 0.7624, "step": 9375 }, { "epoch": 0.96, "grad_norm": 1.289519484589368, "learning_rate": 4.747528865126638e-08, "loss": 0.5246, "step": 9376 }, { "epoch": 0.96, "grad_norm": 1.6639338265223345, "learning_rate": 4.7248221308974215e-08, "loss": 0.5104, "step": 9377 }, { "epoch": 0.96, "grad_norm": 1.3523398417394286, "learning_rate": 4.702169570621529e-08, "loss": 0.5656, "step": 9378 }, { "epoch": 0.96, "grad_norm": 1.4455864874141147, "learning_rate": 4.6795711867766436e-08, "loss": 0.6363, "step": 9379 }, { "epoch": 0.96, "grad_norm": 1.6620282359143654, "learning_rate": 4.657026981834623e-08, "loss": 0.6723, "step": 9380 }, { "epoch": 0.96, "grad_norm": 1.7487624245479254, "learning_rate": 4.634536958261437e-08, "loss": 0.596, "step": 9381 }, { "epoch": 0.96, "grad_norm": 1.5543972595395688, "learning_rate": 4.612101118516954e-08, "loss": 0.7005, "step": 9382 }, { "epoch": 0.96, "grad_norm": 1.4665277679165114, "learning_rate": 4.589719465055431e-08, "loss": 0.5857, "step": 9383 }, { "epoch": 0.96, "grad_norm": 1.7797501557885524, "learning_rate": 4.5673920003248554e-08, "loss": 0.6935, "step": 9384 }, { "epoch": 0.96, "grad_norm": 1.4133591684683813, "learning_rate": 4.5451187267674393e-08, "loss": 0.5927, "step": 9385 }, { "epoch": 0.96, "grad_norm": 1.4041203332946937, "learning_rate": 4.5228996468195674e-08, "loss": 0.6463, "step": 9386 }, { "epoch": 0.96, "grad_norm": 1.5425417205702003, "learning_rate": 4.500734762911518e-08, "loss": 0.6561, "step": 9387 }, { "epoch": 0.96, "grad_norm": 1.4730903984626964, "learning_rate": 4.478624077467797e-08, "loss": 0.6362, "step": 9388 }, { "epoch": 0.96, "grad_norm": 1.6207000465321488, "learning_rate": 4.456567592906802e-08, "loss": 0.6638, "step": 9389 }, { "epoch": 0.96, "grad_norm": 1.5828513065682737, "learning_rate": 4.4345653116411593e-08, "loss": 0.6643, "step": 9390 }, { "epoch": 0.96, "grad_norm": 1.4646927727458883, "learning_rate": 4.412617236077554e-08, "loss": 0.6003, "step": 9391 }, { "epoch": 0.96, "grad_norm": 1.4094380802414852, "learning_rate": 4.3907233686166226e-08, "loss": 0.6517, "step": 9392 }, { "epoch": 0.96, "grad_norm": 1.6519696627594909, "learning_rate": 4.3688837116531715e-08, "loss": 0.6859, "step": 9393 }, { "epoch": 0.96, "grad_norm": 1.515485567349805, "learning_rate": 4.347098267576066e-08, "loss": 0.589, "step": 9394 }, { "epoch": 0.96, "grad_norm": 1.5421483950093815, "learning_rate": 4.325367038768235e-08, "loss": 0.6025, "step": 9395 }, { "epoch": 0.96, "grad_norm": 1.5755394735235089, "learning_rate": 4.3036900276066664e-08, "loss": 0.605, "step": 9396 }, { "epoch": 0.96, "grad_norm": 1.5642184240707244, "learning_rate": 4.2820672364624613e-08, "loss": 0.6937, "step": 9397 }, { "epoch": 0.96, "grad_norm": 1.5042154875602307, "learning_rate": 4.2604986677006745e-08, "loss": 0.5991, "step": 9398 }, { "epoch": 0.96, "grad_norm": 1.4711429809122738, "learning_rate": 4.238984323680584e-08, "loss": 0.5925, "step": 9399 }, { "epoch": 0.96, "grad_norm": 1.3439750420823287, "learning_rate": 4.217524206755419e-08, "loss": 0.5801, "step": 9400 }, { "epoch": 0.96, "grad_norm": 1.494154624498565, "learning_rate": 4.1961183192725244e-08, "loss": 0.6498, "step": 9401 }, { "epoch": 0.96, "grad_norm": 1.6637551401033446, "learning_rate": 4.1747666635733594e-08, "loss": 0.7893, "step": 9402 }, { "epoch": 0.96, "grad_norm": 2.0723243602068675, "learning_rate": 4.15346924199328e-08, "loss": 0.5767, "step": 9403 }, { "epoch": 0.96, "grad_norm": 1.3751895751556376, "learning_rate": 4.1322260568619763e-08, "loss": 0.6575, "step": 9404 }, { "epoch": 0.96, "grad_norm": 1.5487679393524165, "learning_rate": 4.11103711050298e-08, "loss": 0.5708, "step": 9405 }, { "epoch": 0.96, "grad_norm": 1.4777021786180333, "learning_rate": 4.089902405233992e-08, "loss": 0.6201, "step": 9406 }, { "epoch": 0.96, "grad_norm": 1.5235820019077049, "learning_rate": 4.0688219433667185e-08, "loss": 0.7089, "step": 9407 }, { "epoch": 0.96, "grad_norm": 1.6416881736647722, "learning_rate": 4.047795727207038e-08, "loss": 0.7791, "step": 9408 }, { "epoch": 0.96, "grad_norm": 1.4948884198629635, "learning_rate": 4.0268237590547764e-08, "loss": 0.6831, "step": 9409 }, { "epoch": 0.96, "grad_norm": 1.4245826231034229, "learning_rate": 4.0059060412038774e-08, "loss": 0.745, "step": 9410 }, { "epoch": 0.96, "grad_norm": 1.4507355269704374, "learning_rate": 3.9850425759423995e-08, "loss": 0.6455, "step": 9411 }, { "epoch": 0.96, "grad_norm": 1.3757264786812686, "learning_rate": 3.9642333655524054e-08, "loss": 0.5169, "step": 9412 }, { "epoch": 0.96, "grad_norm": 1.422838291103526, "learning_rate": 3.943478412309965e-08, "loss": 0.585, "step": 9413 }, { "epoch": 0.96, "grad_norm": 1.289871790062913, "learning_rate": 3.9227777184854264e-08, "loss": 0.5851, "step": 9414 }, { "epoch": 0.96, "grad_norm": 1.481080371514655, "learning_rate": 3.902131286342925e-08, "loss": 0.6614, "step": 9415 }, { "epoch": 0.96, "grad_norm": 1.4643003138882325, "learning_rate": 3.8815391181409314e-08, "loss": 0.6683, "step": 9416 }, { "epoch": 0.96, "grad_norm": 1.6056308351148396, "learning_rate": 3.861001216131699e-08, "loss": 0.7033, "step": 9417 }, { "epoch": 0.96, "grad_norm": 1.4886407804189836, "learning_rate": 3.84051758256182e-08, "loss": 0.6397, "step": 9418 }, { "epoch": 0.96, "grad_norm": 1.474925949999393, "learning_rate": 3.8200882196717805e-08, "loss": 0.6785, "step": 9419 }, { "epoch": 0.96, "grad_norm": 1.4421938122439248, "learning_rate": 3.799713129696181e-08, "loss": 0.6061, "step": 9420 }, { "epoch": 0.96, "grad_norm": 1.492304173344918, "learning_rate": 3.779392314863628e-08, "loss": 0.7051, "step": 9421 }, { "epoch": 0.96, "grad_norm": 1.5695039908567952, "learning_rate": 3.759125777396899e-08, "loss": 0.5709, "step": 9422 }, { "epoch": 0.96, "grad_norm": 1.5710551734115086, "learning_rate": 3.7389135195128325e-08, "loss": 0.6484, "step": 9423 }, { "epoch": 0.96, "grad_norm": 1.4220127494002819, "learning_rate": 3.7187555434221033e-08, "loss": 0.6607, "step": 9424 }, { "epoch": 0.96, "grad_norm": 1.4822329025105656, "learning_rate": 3.698651851329838e-08, "loss": 0.6127, "step": 9425 }, { "epoch": 0.96, "grad_norm": 1.4709498715155724, "learning_rate": 3.678602445434831e-08, "loss": 0.6325, "step": 9426 }, { "epoch": 0.96, "grad_norm": 1.4225925800436143, "learning_rate": 3.6586073279302193e-08, "loss": 0.462, "step": 9427 }, { "epoch": 0.96, "grad_norm": 1.7392043802379435, "learning_rate": 3.6386665010030295e-08, "loss": 0.604, "step": 9428 }, { "epoch": 0.96, "grad_norm": 1.4712842517670366, "learning_rate": 3.618779966834462e-08, "loss": 0.5437, "step": 9429 }, { "epoch": 0.96, "grad_norm": 2.008167342595605, "learning_rate": 3.5989477275997773e-08, "loss": 0.6754, "step": 9430 }, { "epoch": 0.96, "grad_norm": 1.6415911076327552, "learning_rate": 3.579169785468184e-08, "loss": 0.6637, "step": 9431 }, { "epoch": 0.96, "grad_norm": 1.4780829205226242, "learning_rate": 3.559446142603007e-08, "loss": 0.54, "step": 9432 }, { "epoch": 0.96, "grad_norm": 1.396130391512226, "learning_rate": 3.5397768011616874e-08, "loss": 0.6376, "step": 9433 }, { "epoch": 0.96, "grad_norm": 1.394822462467549, "learning_rate": 3.520161763295726e-08, "loss": 0.5478, "step": 9434 }, { "epoch": 0.96, "grad_norm": 1.5614348272489516, "learning_rate": 3.50060103115063e-08, "loss": 0.6196, "step": 9435 }, { "epoch": 0.96, "grad_norm": 1.4783044976678987, "learning_rate": 3.481094606865909e-08, "loss": 0.6448, "step": 9436 }, { "epoch": 0.96, "grad_norm": 1.67034655397275, "learning_rate": 3.461642492575301e-08, "loss": 0.6713, "step": 9437 }, { "epoch": 0.96, "grad_norm": 1.5320168804393355, "learning_rate": 3.4422446904064376e-08, "loss": 0.6754, "step": 9438 }, { "epoch": 0.96, "grad_norm": 1.4893363767336825, "learning_rate": 3.4229012024811215e-08, "loss": 0.7239, "step": 9439 }, { "epoch": 0.96, "grad_norm": 1.449410564546985, "learning_rate": 3.40361203091516e-08, "loss": 0.6864, "step": 9440 }, { "epoch": 0.96, "grad_norm": 1.5790641110606707, "learning_rate": 3.384377177818421e-08, "loss": 0.5809, "step": 9441 }, { "epoch": 0.96, "grad_norm": 1.4718523237377992, "learning_rate": 3.3651966452948326e-08, "loss": 0.6253, "step": 9442 }, { "epoch": 0.96, "grad_norm": 1.8302967342302094, "learning_rate": 3.346070435442439e-08, "loss": 0.6067, "step": 9443 }, { "epoch": 0.96, "grad_norm": 1.4479227980119889, "learning_rate": 3.326998550353289e-08, "loss": 0.6099, "step": 9444 }, { "epoch": 0.96, "grad_norm": 1.569157141465105, "learning_rate": 3.307980992113435e-08, "loss": 0.6198, "step": 9445 }, { "epoch": 0.96, "grad_norm": 1.4122537247736853, "learning_rate": 3.289017762803104e-08, "loss": 0.6009, "step": 9446 }, { "epoch": 0.96, "grad_norm": 1.3767793515698878, "learning_rate": 3.2701088644964686e-08, "loss": 0.5874, "step": 9447 }, { "epoch": 0.96, "grad_norm": 1.572374915958022, "learning_rate": 3.251254299261875e-08, "loss": 0.5822, "step": 9448 }, { "epoch": 0.96, "grad_norm": 1.383751382998061, "learning_rate": 3.232454069161617e-08, "loss": 0.7212, "step": 9449 }, { "epoch": 0.96, "grad_norm": 2.205108058308176, "learning_rate": 3.213708176252106e-08, "loss": 0.6572, "step": 9450 }, { "epoch": 0.97, "grad_norm": 1.5861785714249201, "learning_rate": 3.195016622583813e-08, "loss": 0.6193, "step": 9451 }, { "epoch": 0.97, "grad_norm": 1.4353512781113837, "learning_rate": 3.1763794102012135e-08, "loss": 0.6734, "step": 9452 }, { "epoch": 0.97, "grad_norm": 1.6997203838925397, "learning_rate": 3.157796541142899e-08, "loss": 0.6776, "step": 9453 }, { "epoch": 0.97, "grad_norm": 1.5073788907320964, "learning_rate": 3.1392680174415214e-08, "loss": 0.6769, "step": 9454 }, { "epoch": 0.97, "grad_norm": 1.5448013606836852, "learning_rate": 3.1207938411236816e-08, "loss": 0.6838, "step": 9455 }, { "epoch": 0.97, "grad_norm": 1.6003102529619684, "learning_rate": 3.1023740142102076e-08, "loss": 0.6106, "step": 9456 }, { "epoch": 0.97, "grad_norm": 1.4800691609383752, "learning_rate": 3.0840085387158214e-08, "loss": 0.6442, "step": 9457 }, { "epoch": 0.97, "grad_norm": 1.436778125243212, "learning_rate": 3.06569741664936e-08, "loss": 0.5792, "step": 9458 }, { "epoch": 0.97, "grad_norm": 1.6038873937710498, "learning_rate": 3.047440650013778e-08, "loss": 0.7628, "step": 9459 }, { "epoch": 0.97, "grad_norm": 1.5306994910096334, "learning_rate": 3.0292382408059764e-08, "loss": 0.6283, "step": 9460 }, { "epoch": 0.97, "grad_norm": 1.5948126419397817, "learning_rate": 3.011090191016974e-08, "loss": 0.6066, "step": 9461 }, { "epoch": 0.97, "grad_norm": 1.3010869505925862, "learning_rate": 2.992996502631851e-08, "loss": 0.5962, "step": 9462 }, { "epoch": 0.97, "grad_norm": 1.4042782884951486, "learning_rate": 2.9749571776296892e-08, "loss": 0.6795, "step": 9463 }, { "epoch": 0.97, "grad_norm": 1.7080343592400642, "learning_rate": 2.956972217983689e-08, "loss": 0.6044, "step": 9464 }, { "epoch": 0.97, "grad_norm": 1.3777655747207955, "learning_rate": 2.9390416256611097e-08, "loss": 0.7379, "step": 9465 }, { "epoch": 0.97, "grad_norm": 1.5326136643919397, "learning_rate": 2.9211654026231607e-08, "loss": 0.6077, "step": 9466 }, { "epoch": 0.97, "grad_norm": 1.3873698668939964, "learning_rate": 2.903343550825166e-08, "loss": 0.6316, "step": 9467 }, { "epoch": 0.97, "grad_norm": 2.208617046309752, "learning_rate": 2.885576072216567e-08, "loss": 0.6481, "step": 9468 }, { "epoch": 0.97, "grad_norm": 1.4636051580273912, "learning_rate": 2.8678629687406977e-08, "loss": 0.5825, "step": 9469 }, { "epoch": 0.97, "grad_norm": 1.4129321612217725, "learning_rate": 2.8502042423351752e-08, "loss": 0.6479, "step": 9470 }, { "epoch": 0.97, "grad_norm": 1.3186727004759067, "learning_rate": 2.8325998949314536e-08, "loss": 0.6182, "step": 9471 }, { "epoch": 0.97, "grad_norm": 1.8625312080787795, "learning_rate": 2.8150499284551048e-08, "loss": 0.7176, "step": 9472 }, { "epoch": 0.97, "grad_norm": 3.460157690297204, "learning_rate": 2.79755434482587e-08, "loss": 0.6482, "step": 9473 }, { "epoch": 0.97, "grad_norm": 1.6153323478954764, "learning_rate": 2.7801131459573304e-08, "loss": 0.6449, "step": 9474 }, { "epoch": 0.97, "grad_norm": 3.616812160812912, "learning_rate": 2.762726333757293e-08, "loss": 0.6383, "step": 9475 }, { "epoch": 0.97, "grad_norm": 1.5930915607496137, "learning_rate": 2.7453939101275695e-08, "loss": 0.7114, "step": 9476 }, { "epoch": 0.97, "grad_norm": 1.4680278325543872, "learning_rate": 2.7281158769639215e-08, "loss": 0.6082, "step": 9477 }, { "epoch": 0.97, "grad_norm": 1.2683209737635033, "learning_rate": 2.7108922361563372e-08, "loss": 0.5859, "step": 9478 }, { "epoch": 0.97, "grad_norm": 1.5438413329300738, "learning_rate": 2.693722989588754e-08, "loss": 0.628, "step": 9479 }, { "epoch": 0.97, "grad_norm": 1.7209170207474787, "learning_rate": 2.676608139139114e-08, "loss": 0.655, "step": 9480 }, { "epoch": 0.97, "grad_norm": 1.252346307578328, "learning_rate": 2.659547686679531e-08, "loss": 0.5644, "step": 9481 }, { "epoch": 0.97, "grad_norm": 1.2538762100693628, "learning_rate": 2.6425416340760124e-08, "loss": 0.5561, "step": 9482 }, { "epoch": 0.97, "grad_norm": 1.5573335674101871, "learning_rate": 2.625589983188792e-08, "loss": 0.6006, "step": 9483 }, { "epoch": 0.97, "grad_norm": 1.7008472114745894, "learning_rate": 2.6086927358720538e-08, "loss": 0.6502, "step": 9484 }, { "epoch": 0.97, "grad_norm": 1.3861243241714543, "learning_rate": 2.591849893974041e-08, "loss": 0.5641, "step": 9485 }, { "epoch": 0.97, "grad_norm": 1.3385455915957833, "learning_rate": 2.5750614593370028e-08, "loss": 0.5804, "step": 9486 }, { "epoch": 0.97, "grad_norm": 1.540250805016295, "learning_rate": 2.5583274337973583e-08, "loss": 0.6954, "step": 9487 }, { "epoch": 0.97, "grad_norm": 1.5376763558178745, "learning_rate": 2.541647819185422e-08, "loss": 0.6237, "step": 9488 }, { "epoch": 0.97, "grad_norm": 1.4771690207752861, "learning_rate": 2.525022617325734e-08, "loss": 0.6555, "step": 9489 }, { "epoch": 0.97, "grad_norm": 1.718442394876994, "learning_rate": 2.5084518300366735e-08, "loss": 0.6189, "step": 9490 }, { "epoch": 0.97, "grad_norm": 1.5106150321956533, "learning_rate": 2.4919354591308453e-08, "loss": 0.6891, "step": 9491 }, { "epoch": 0.97, "grad_norm": 1.362648356709891, "learning_rate": 2.475473506414805e-08, "loss": 0.6012, "step": 9492 }, { "epoch": 0.97, "grad_norm": 1.544277001412474, "learning_rate": 2.4590659736892785e-08, "loss": 0.695, "step": 9493 }, { "epoch": 0.97, "grad_norm": 1.44204094624376, "learning_rate": 2.442712862748775e-08, "loss": 0.561, "step": 9494 }, { "epoch": 0.97, "grad_norm": 1.552476958932822, "learning_rate": 2.4264141753821412e-08, "loss": 0.5899, "step": 9495 }, { "epoch": 0.97, "grad_norm": 1.540269915022559, "learning_rate": 2.4101699133721733e-08, "loss": 0.6109, "step": 9496 }, { "epoch": 0.97, "grad_norm": 1.5430836156894219, "learning_rate": 2.393980078495617e-08, "loss": 0.652, "step": 9497 }, { "epoch": 0.97, "grad_norm": 1.5795776069584206, "learning_rate": 2.3778446725233328e-08, "loss": 0.6175, "step": 9498 }, { "epoch": 0.97, "grad_norm": 1.2264946905064062, "learning_rate": 2.361763697220354e-08, "loss": 0.563, "step": 9499 }, { "epoch": 0.97, "grad_norm": 1.6048173499066054, "learning_rate": 2.345737154345551e-08, "loss": 0.6789, "step": 9500 }, { "epoch": 0.97, "grad_norm": 1.4399277332629137, "learning_rate": 2.3297650456519106e-08, "loss": 0.7164, "step": 9501 }, { "epoch": 0.97, "grad_norm": 1.4225754310621443, "learning_rate": 2.3138473728865353e-08, "loss": 0.6053, "step": 9502 }, { "epoch": 0.97, "grad_norm": 1.4768501107547936, "learning_rate": 2.2979841377904765e-08, "loss": 0.6358, "step": 9503 }, { "epoch": 0.97, "grad_norm": 1.6770986725935393, "learning_rate": 2.2821753420989577e-08, "loss": 0.7057, "step": 9504 }, { "epoch": 0.97, "grad_norm": 1.4032909877456952, "learning_rate": 2.2664209875411513e-08, "loss": 0.5645, "step": 9505 }, { "epoch": 0.97, "grad_norm": 1.4607864107001889, "learning_rate": 2.2507210758401787e-08, "loss": 0.6774, "step": 9506 }, { "epoch": 0.97, "grad_norm": 1.4916995720820383, "learning_rate": 2.2350756087134995e-08, "loss": 0.6546, "step": 9507 }, { "epoch": 0.97, "grad_norm": 1.5755474573627173, "learning_rate": 2.2194845878723002e-08, "loss": 0.6689, "step": 9508 }, { "epoch": 0.97, "grad_norm": 1.601609554254628, "learning_rate": 2.2039480150219394e-08, "loss": 0.6052, "step": 9509 }, { "epoch": 0.97, "grad_norm": 1.6375182546975644, "learning_rate": 2.1884658918620017e-08, "loss": 0.6899, "step": 9510 }, { "epoch": 0.97, "grad_norm": 1.5210757782084234, "learning_rate": 2.1730382200857435e-08, "loss": 0.7074, "step": 9511 }, { "epoch": 0.97, "grad_norm": 1.6752120015955532, "learning_rate": 2.15766500138076e-08, "loss": 0.6999, "step": 9512 }, { "epoch": 0.97, "grad_norm": 1.305028606146919, "learning_rate": 2.1423462374286496e-08, "loss": 0.5677, "step": 9513 }, { "epoch": 0.97, "grad_norm": 1.6789027513675376, "learning_rate": 2.1270819299049063e-08, "loss": 0.6357, "step": 9514 }, { "epoch": 0.97, "grad_norm": 2.100493293011954, "learning_rate": 2.111872080479249e-08, "loss": 0.54, "step": 9515 }, { "epoch": 0.97, "grad_norm": 1.444305214080133, "learning_rate": 2.096716690815237e-08, "loss": 0.5328, "step": 9516 }, { "epoch": 0.97, "grad_norm": 1.4615927260864348, "learning_rate": 2.0816157625706547e-08, "loss": 0.6459, "step": 9517 }, { "epoch": 0.97, "grad_norm": 1.47445239675484, "learning_rate": 2.0665692973973473e-08, "loss": 0.6521, "step": 9518 }, { "epoch": 0.97, "grad_norm": 1.611414877597268, "learning_rate": 2.0515772969409984e-08, "loss": 0.7632, "step": 9519 }, { "epoch": 0.97, "grad_norm": 1.464754817325807, "learning_rate": 2.0366397628414636e-08, "loss": 0.6376, "step": 9520 }, { "epoch": 0.97, "grad_norm": 1.6184652225255216, "learning_rate": 2.021756696732713e-08, "loss": 0.7242, "step": 9521 }, { "epoch": 0.97, "grad_norm": 1.3784653618842169, "learning_rate": 2.006928100242611e-08, "loss": 0.6153, "step": 9522 }, { "epoch": 0.97, "grad_norm": 1.4995160781061385, "learning_rate": 1.992153974993194e-08, "loss": 0.583, "step": 9523 }, { "epoch": 0.97, "grad_norm": 1.6395355398883653, "learning_rate": 1.9774343226004466e-08, "loss": 0.6334, "step": 9524 }, { "epoch": 0.97, "grad_norm": 1.4267683501956452, "learning_rate": 1.9627691446743592e-08, "loss": 0.5631, "step": 9525 }, { "epoch": 0.97, "grad_norm": 1.2794484100578725, "learning_rate": 1.948158442819148e-08, "loss": 0.6169, "step": 9526 }, { "epoch": 0.97, "grad_norm": 1.36136106996072, "learning_rate": 1.9336022186328685e-08, "loss": 0.6532, "step": 9527 }, { "epoch": 0.97, "grad_norm": 1.407722471420409, "learning_rate": 1.9191004737077467e-08, "loss": 0.6581, "step": 9528 }, { "epoch": 0.97, "grad_norm": 1.39431101708437, "learning_rate": 1.904653209630014e-08, "loss": 0.5937, "step": 9529 }, { "epoch": 0.97, "grad_norm": 1.4721111637400615, "learning_rate": 1.8902604279798508e-08, "loss": 0.5765, "step": 9530 }, { "epoch": 0.97, "grad_norm": 1.541225886190947, "learning_rate": 1.8759221303316642e-08, "loss": 0.6137, "step": 9531 }, { "epoch": 0.97, "grad_norm": 1.4226418679685167, "learning_rate": 1.8616383182537555e-08, "loss": 0.6667, "step": 9532 }, { "epoch": 0.97, "grad_norm": 1.4728238879684101, "learning_rate": 1.8474089933085416e-08, "loss": 0.6197, "step": 9533 }, { "epoch": 0.97, "grad_norm": 1.9793617388922484, "learning_rate": 1.8332341570523328e-08, "loss": 0.752, "step": 9534 }, { "epoch": 0.97, "grad_norm": 1.6623077489883098, "learning_rate": 1.8191138110357777e-08, "loss": 0.7097, "step": 9535 }, { "epoch": 0.97, "grad_norm": 1.4802749326583364, "learning_rate": 1.8050479568031964e-08, "loss": 0.5705, "step": 9536 }, { "epoch": 0.97, "grad_norm": 1.3699819989509643, "learning_rate": 1.7910365958932475e-08, "loss": 0.6661, "step": 9537 }, { "epoch": 0.97, "grad_norm": 1.5363192313529188, "learning_rate": 1.7770797298385377e-08, "loss": 0.7235, "step": 9538 }, { "epoch": 0.97, "grad_norm": 1.5460412208409227, "learning_rate": 1.7631773601656243e-08, "loss": 0.5792, "step": 9539 }, { "epoch": 0.97, "grad_norm": 1.392427386094642, "learning_rate": 1.749329488395124e-08, "loss": 0.5785, "step": 9540 }, { "epoch": 0.97, "grad_norm": 1.6063156921551478, "learning_rate": 1.7355361160418248e-08, "loss": 0.7933, "step": 9541 }, { "epoch": 0.97, "grad_norm": 1.3980301342484154, "learning_rate": 1.7217972446144648e-08, "loss": 0.5964, "step": 9542 }, { "epoch": 0.97, "grad_norm": 1.487773919364931, "learning_rate": 1.7081128756158416e-08, "loss": 0.6414, "step": 9543 }, { "epoch": 0.97, "grad_norm": 1.3995864921550474, "learning_rate": 1.6944830105426468e-08, "loss": 0.6938, "step": 9544 }, { "epoch": 0.97, "grad_norm": 1.4955141028192804, "learning_rate": 1.6809076508858547e-08, "loss": 0.6039, "step": 9545 }, { "epoch": 0.97, "grad_norm": 1.599091820680417, "learning_rate": 1.6673867981303328e-08, "loss": 0.8118, "step": 9546 }, { "epoch": 0.97, "grad_norm": 1.3594271858108895, "learning_rate": 1.653920453755009e-08, "loss": 0.5844, "step": 9547 }, { "epoch": 0.97, "grad_norm": 1.4104249564290037, "learning_rate": 1.640508619232817e-08, "loss": 0.5964, "step": 9548 }, { "epoch": 0.98, "grad_norm": 1.5186935703893452, "learning_rate": 1.6271512960308045e-08, "loss": 0.6371, "step": 9549 }, { "epoch": 0.98, "grad_norm": 1.551904865264792, "learning_rate": 1.61384848560997e-08, "loss": 0.7157, "step": 9550 }, { "epoch": 0.98, "grad_norm": 1.5942425847572026, "learning_rate": 1.600600189425483e-08, "loss": 0.726, "step": 9551 }, { "epoch": 0.98, "grad_norm": 1.5423710437869467, "learning_rate": 1.5874064089264062e-08, "loss": 0.7058, "step": 9552 }, { "epoch": 0.98, "grad_norm": 1.5106260721786404, "learning_rate": 1.5742671455558633e-08, "loss": 0.6033, "step": 9553 }, { "epoch": 0.98, "grad_norm": 1.4061110535698125, "learning_rate": 1.561182400751149e-08, "loss": 0.5419, "step": 9554 }, { "epoch": 0.98, "grad_norm": 1.5729518381470515, "learning_rate": 1.5481521759433404e-08, "loss": 0.6855, "step": 9555 }, { "epoch": 0.98, "grad_norm": 1.5618513936150635, "learning_rate": 1.5351764725577977e-08, "loss": 0.5799, "step": 9556 }, { "epoch": 0.98, "grad_norm": 2.7182886374901702, "learning_rate": 1.5222552920138855e-08, "loss": 0.6175, "step": 9557 }, { "epoch": 0.98, "grad_norm": 1.6782109001759202, "learning_rate": 1.5093886357248066e-08, "loss": 0.724, "step": 9558 }, { "epoch": 0.98, "grad_norm": 1.5477513575471298, "learning_rate": 1.4965765050980462e-08, "loss": 0.6188, "step": 9559 }, { "epoch": 0.98, "grad_norm": 1.2943498985646336, "learning_rate": 1.483818901534928e-08, "loss": 0.6172, "step": 9560 }, { "epoch": 0.98, "grad_norm": 1.5500140248764511, "learning_rate": 1.4711158264309467e-08, "loss": 0.6068, "step": 9561 }, { "epoch": 0.98, "grad_norm": 1.52389886314574, "learning_rate": 1.4584672811756018e-08, "loss": 0.5867, "step": 9562 }, { "epoch": 0.98, "grad_norm": 1.3394670413000012, "learning_rate": 1.4458732671523978e-08, "loss": 0.5536, "step": 9563 }, { "epoch": 0.98, "grad_norm": 1.6170865536662717, "learning_rate": 1.4333337857387885e-08, "loss": 0.6374, "step": 9564 }, { "epoch": 0.98, "grad_norm": 1.5906561606219314, "learning_rate": 1.42084883830651e-08, "loss": 0.6513, "step": 9565 }, { "epoch": 0.98, "grad_norm": 1.467095933762201, "learning_rate": 1.4084184262211365e-08, "loss": 0.6866, "step": 9566 }, { "epoch": 0.98, "grad_norm": 1.4118014760869555, "learning_rate": 1.3960425508422471e-08, "loss": 0.6508, "step": 9567 }, { "epoch": 0.98, "grad_norm": 1.51019536821475, "learning_rate": 1.3837212135236478e-08, "loss": 0.6513, "step": 9568 }, { "epoch": 0.98, "grad_norm": 1.519372288177394, "learning_rate": 1.3714544156129827e-08, "loss": 0.5795, "step": 9569 }, { "epoch": 0.98, "grad_norm": 1.6688532333792716, "learning_rate": 1.3592421584520677e-08, "loss": 0.6525, "step": 9570 }, { "epoch": 0.98, "grad_norm": 1.3364364802384412, "learning_rate": 1.3470844433767227e-08, "loss": 0.6, "step": 9571 }, { "epoch": 0.98, "grad_norm": 1.8557229056091067, "learning_rate": 1.3349812717166622e-08, "loss": 0.6892, "step": 9572 }, { "epoch": 0.98, "grad_norm": 1.7320748403896775, "learning_rate": 1.3229326447958268e-08, "loss": 0.6122, "step": 9573 }, { "epoch": 0.98, "grad_norm": 1.7447467725676808, "learning_rate": 1.310938563932107e-08, "loss": 0.5814, "step": 9574 }, { "epoch": 0.98, "grad_norm": 1.452570733639149, "learning_rate": 1.2989990304373978e-08, "loss": 0.6511, "step": 9575 }, { "epoch": 0.98, "grad_norm": 1.581995588120747, "learning_rate": 1.2871140456177101e-08, "loss": 0.5915, "step": 9576 }, { "epoch": 0.98, "grad_norm": 1.4520976191042256, "learning_rate": 1.2752836107730593e-08, "loss": 0.6734, "step": 9577 }, { "epoch": 0.98, "grad_norm": 1.5377904094745622, "learning_rate": 1.2635077271974661e-08, "loss": 0.6364, "step": 9578 }, { "epoch": 0.98, "grad_norm": 1.460011401195632, "learning_rate": 1.2517863961789e-08, "loss": 0.5298, "step": 9579 }, { "epoch": 0.98, "grad_norm": 1.539046586893701, "learning_rate": 1.2401196189996135e-08, "loss": 0.6337, "step": 9580 }, { "epoch": 0.98, "grad_norm": 1.5832441634757912, "learning_rate": 1.2285073969355854e-08, "loss": 0.6705, "step": 9581 }, { "epoch": 0.98, "grad_norm": 1.4822506895694616, "learning_rate": 1.2169497312570777e-08, "loss": 0.6243, "step": 9582 }, { "epoch": 0.98, "grad_norm": 1.618394099146983, "learning_rate": 1.2054466232282458e-08, "loss": 0.7527, "step": 9583 }, { "epoch": 0.98, "grad_norm": 1.5690870704374174, "learning_rate": 1.1939980741073055e-08, "loss": 0.5899, "step": 9584 }, { "epoch": 0.98, "grad_norm": 1.480872529458142, "learning_rate": 1.1826040851465882e-08, "loss": 0.5863, "step": 9585 }, { "epoch": 0.98, "grad_norm": 1.5185981718740813, "learning_rate": 1.1712646575922637e-08, "loss": 0.6567, "step": 9586 }, { "epoch": 0.98, "grad_norm": 1.5884113011260548, "learning_rate": 1.1599797926847844e-08, "loss": 0.6043, "step": 9587 }, { "epoch": 0.98, "grad_norm": 1.6588216346482938, "learning_rate": 1.1487494916584407e-08, "loss": 0.6023, "step": 9588 }, { "epoch": 0.98, "grad_norm": 1.3548128423948815, "learning_rate": 1.137573755741639e-08, "loss": 0.5377, "step": 9589 }, { "epoch": 0.98, "grad_norm": 1.527850128931261, "learning_rate": 1.1264525861567344e-08, "loss": 0.7627, "step": 9590 }, { "epoch": 0.98, "grad_norm": 1.4336002726221933, "learning_rate": 1.1153859841203096e-08, "loss": 0.6465, "step": 9591 }, { "epoch": 0.98, "grad_norm": 1.4961369116613297, "learning_rate": 1.1043739508426743e-08, "loss": 0.6191, "step": 9592 }, { "epoch": 0.98, "grad_norm": 1.6354842076390332, "learning_rate": 1.0934164875284758e-08, "loss": 0.629, "step": 9593 }, { "epoch": 0.98, "grad_norm": 1.4545435793877037, "learning_rate": 1.0825135953762556e-08, "loss": 0.6445, "step": 9594 }, { "epoch": 0.98, "grad_norm": 1.669321924526181, "learning_rate": 1.071665275578504e-08, "loss": 0.6691, "step": 9595 }, { "epoch": 0.98, "grad_norm": 1.2976236458700001, "learning_rate": 1.0608715293218829e-08, "loss": 0.5909, "step": 9596 }, { "epoch": 0.98, "grad_norm": 1.6675293836168437, "learning_rate": 1.0501323577870037e-08, "loss": 0.6489, "step": 9597 }, { "epoch": 0.98, "grad_norm": 1.6423431566196567, "learning_rate": 1.0394477621485377e-08, "loss": 0.6342, "step": 9598 }, { "epoch": 0.98, "grad_norm": 1.5651791015667473, "learning_rate": 1.0288177435752723e-08, "loss": 0.6597, "step": 9599 }, { "epoch": 0.98, "grad_norm": 1.969219541465743, "learning_rate": 1.0182423032297772e-08, "loss": 0.6622, "step": 9600 }, { "epoch": 0.98, "grad_norm": 1.379921891665149, "learning_rate": 1.0077214422689052e-08, "loss": 0.633, "step": 9601 }, { "epoch": 0.98, "grad_norm": 1.3140410001399743, "learning_rate": 9.972551618434023e-09, "loss": 0.6781, "step": 9602 }, { "epoch": 0.98, "grad_norm": 1.4909030438633086, "learning_rate": 9.868434630981304e-09, "loss": 0.6445, "step": 9603 }, { "epoch": 0.98, "grad_norm": 1.4985543637559902, "learning_rate": 9.764863471719566e-09, "loss": 0.6297, "step": 9604 }, { "epoch": 0.98, "grad_norm": 1.5383085340238314, "learning_rate": 9.661838151976966e-09, "loss": 0.6764, "step": 9605 }, { "epoch": 0.98, "grad_norm": 1.400436712139019, "learning_rate": 9.559358683022268e-09, "loss": 0.5659, "step": 9606 }, { "epoch": 0.98, "grad_norm": 1.5227662649398284, "learning_rate": 9.45742507606595e-09, "loss": 0.609, "step": 9607 }, { "epoch": 0.98, "grad_norm": 2.025495231536977, "learning_rate": 9.356037342256874e-09, "loss": 0.6799, "step": 9608 }, { "epoch": 0.98, "grad_norm": 2.532128623382475, "learning_rate": 9.25519549268561e-09, "loss": 0.6422, "step": 9609 }, { "epoch": 0.98, "grad_norm": 1.7107468440387275, "learning_rate": 9.154899538381112e-09, "loss": 0.6086, "step": 9610 }, { "epoch": 0.98, "grad_norm": 1.6747347964855384, "learning_rate": 9.055149490315163e-09, "loss": 0.6515, "step": 9611 }, { "epoch": 0.98, "grad_norm": 1.398092573844909, "learning_rate": 8.955945359398477e-09, "loss": 0.6733, "step": 9612 }, { "epoch": 0.98, "grad_norm": 1.7232992676763668, "learning_rate": 8.85728715648182e-09, "loss": 0.7262, "step": 9613 }, { "epoch": 0.98, "grad_norm": 1.3788457796366946, "learning_rate": 8.759174892356004e-09, "loss": 0.5546, "step": 9614 }, { "epoch": 0.98, "grad_norm": 1.4212229869946034, "learning_rate": 8.661608577753556e-09, "loss": 0.533, "step": 9615 }, { "epoch": 0.98, "grad_norm": 1.4602700452717534, "learning_rate": 8.564588223346493e-09, "loss": 0.6215, "step": 9616 }, { "epoch": 0.98, "grad_norm": 1.487830991080886, "learning_rate": 8.468113839746328e-09, "loss": 0.5754, "step": 9617 }, { "epoch": 0.98, "grad_norm": 1.590362118944485, "learning_rate": 8.372185437506286e-09, "loss": 0.6638, "step": 9618 }, { "epoch": 0.98, "grad_norm": 1.4919300339819201, "learning_rate": 8.27680302711853e-09, "loss": 0.7199, "step": 9619 }, { "epoch": 0.98, "grad_norm": 1.4180778990695693, "learning_rate": 8.181966619016379e-09, "loss": 0.5584, "step": 9620 }, { "epoch": 0.98, "grad_norm": 1.411068158026272, "learning_rate": 8.08767622357376e-09, "loss": 0.6236, "step": 9621 }, { "epoch": 0.98, "grad_norm": 1.5412564305986995, "learning_rate": 7.993931851103531e-09, "loss": 0.5979, "step": 9622 }, { "epoch": 0.98, "grad_norm": 1.3929102080558047, "learning_rate": 7.900733511859715e-09, "loss": 0.6961, "step": 9623 }, { "epoch": 0.98, "grad_norm": 1.631441027148086, "learning_rate": 7.808081216036933e-09, "loss": 0.6635, "step": 9624 }, { "epoch": 0.98, "grad_norm": 1.658924549019276, "learning_rate": 7.715974973769302e-09, "loss": 0.5623, "step": 9625 }, { "epoch": 0.98, "grad_norm": 1.502715574517317, "learning_rate": 7.624414795130986e-09, "loss": 0.6305, "step": 9626 }, { "epoch": 0.98, "grad_norm": 1.5920727766206846, "learning_rate": 7.533400690138415e-09, "loss": 0.6287, "step": 9627 }, { "epoch": 0.98, "grad_norm": 1.3827157955568568, "learning_rate": 7.442932668745295e-09, "loss": 0.6379, "step": 9628 }, { "epoch": 0.98, "grad_norm": 1.3536746144053928, "learning_rate": 7.353010740848155e-09, "loss": 0.6451, "step": 9629 }, { "epoch": 0.98, "grad_norm": 1.470662227374944, "learning_rate": 7.263634916282458e-09, "loss": 0.5911, "step": 9630 }, { "epoch": 0.98, "grad_norm": 1.4340343553519803, "learning_rate": 7.174805204824276e-09, "loss": 0.6334, "step": 9631 }, { "epoch": 0.98, "grad_norm": 1.458749973624847, "learning_rate": 7.0865216161902785e-09, "loss": 0.6736, "step": 9632 }, { "epoch": 0.98, "grad_norm": 1.449095829404101, "learning_rate": 6.998784160036632e-09, "loss": 0.6277, "step": 9633 }, { "epoch": 0.98, "grad_norm": 1.4225387957650704, "learning_rate": 6.911592845960102e-09, "loss": 0.6713, "step": 9634 }, { "epoch": 0.98, "grad_norm": 1.6072988489340203, "learning_rate": 6.8249476834980625e-09, "loss": 0.5914, "step": 9635 }, { "epoch": 0.98, "grad_norm": 1.4155396817292893, "learning_rate": 6.73884868212793e-09, "loss": 0.5284, "step": 9636 }, { "epoch": 0.98, "grad_norm": 1.5789531927197882, "learning_rate": 6.653295851267172e-09, "loss": 0.7297, "step": 9637 }, { "epoch": 0.98, "grad_norm": 1.4860231377186517, "learning_rate": 6.568289200273859e-09, "loss": 0.6876, "step": 9638 }, { "epoch": 0.98, "grad_norm": 1.5044677918912848, "learning_rate": 6.483828738446107e-09, "loss": 0.5192, "step": 9639 }, { "epoch": 0.98, "grad_norm": 1.6305232906366622, "learning_rate": 6.399914475022084e-09, "loss": 0.585, "step": 9640 }, { "epoch": 0.98, "grad_norm": 1.4350185703743634, "learning_rate": 6.316546419181113e-09, "loss": 0.5448, "step": 9641 }, { "epoch": 0.98, "grad_norm": 1.5189646964766415, "learning_rate": 6.233724580040901e-09, "loss": 0.6722, "step": 9642 }, { "epoch": 0.98, "grad_norm": 1.384754882826774, "learning_rate": 6.151448966661977e-09, "loss": 0.5648, "step": 9643 }, { "epoch": 0.98, "grad_norm": 1.4278505159801094, "learning_rate": 6.069719588043255e-09, "loss": 0.6897, "step": 9644 }, { "epoch": 0.98, "grad_norm": 1.737869086239696, "learning_rate": 5.98853645312425e-09, "loss": 0.5691, "step": 9645 }, { "epoch": 0.98, "grad_norm": 1.6855816425044736, "learning_rate": 5.907899570784525e-09, "loss": 0.6653, "step": 9646 }, { "epoch": 0.99, "grad_norm": 1.5874669652013742, "learning_rate": 5.827808949845359e-09, "loss": 0.6148, "step": 9647 }, { "epoch": 0.99, "grad_norm": 1.4670304481023275, "learning_rate": 5.748264599066411e-09, "loss": 0.653, "step": 9648 }, { "epoch": 0.99, "grad_norm": 1.5687263266527312, "learning_rate": 5.669266527148498e-09, "loss": 0.7096, "step": 9649 }, { "epoch": 0.99, "grad_norm": 1.5269553275650523, "learning_rate": 5.590814742732486e-09, "loss": 0.6336, "step": 9650 }, { "epoch": 0.99, "grad_norm": 1.4509574993145307, "learning_rate": 5.512909254399845e-09, "loss": 0.6127, "step": 9651 }, { "epoch": 0.99, "grad_norm": 1.3468429903285597, "learning_rate": 5.435550070671536e-09, "loss": 0.5546, "step": 9652 }, { "epoch": 0.99, "grad_norm": 1.8318858352600145, "learning_rate": 5.358737200009678e-09, "loss": 0.6959, "step": 9653 }, { "epoch": 0.99, "grad_norm": 1.40344275190469, "learning_rate": 5.2824706508158855e-09, "loss": 0.5552, "step": 9654 }, { "epoch": 0.99, "grad_norm": 1.547920908924042, "learning_rate": 5.2067504314323725e-09, "loss": 0.5972, "step": 9655 }, { "epoch": 0.99, "grad_norm": 1.361504822251125, "learning_rate": 5.131576550141404e-09, "loss": 0.5628, "step": 9656 }, { "epoch": 0.99, "grad_norm": 1.5008274940056472, "learning_rate": 5.056949015165846e-09, "loss": 0.6187, "step": 9657 }, { "epoch": 0.99, "grad_norm": 1.6682755215667744, "learning_rate": 4.982867834668614e-09, "loss": 0.6471, "step": 9658 }, { "epoch": 0.99, "grad_norm": 1.5976851103357599, "learning_rate": 4.909333016752671e-09, "loss": 0.6072, "step": 9659 }, { "epoch": 0.99, "grad_norm": 1.6118312868078366, "learning_rate": 4.836344569461582e-09, "loss": 0.5835, "step": 9660 }, { "epoch": 0.99, "grad_norm": 1.4845451823270177, "learning_rate": 4.763902500778961e-09, "loss": 0.6023, "step": 9661 }, { "epoch": 0.99, "grad_norm": 1.4718206888068046, "learning_rate": 4.692006818627914e-09, "loss": 0.6399, "step": 9662 }, { "epoch": 0.99, "grad_norm": 1.4219940153366744, "learning_rate": 4.620657530872708e-09, "loss": 0.6592, "step": 9663 }, { "epoch": 0.99, "grad_norm": 1.6102032602204943, "learning_rate": 4.5498546453187634e-09, "loss": 0.711, "step": 9664 }, { "epoch": 0.99, "grad_norm": 1.5629638263391414, "learning_rate": 4.479598169709331e-09, "loss": 0.6571, "step": 9665 }, { "epoch": 0.99, "grad_norm": 1.5843684890635379, "learning_rate": 4.409888111729932e-09, "loss": 0.8813, "step": 9666 }, { "epoch": 0.99, "grad_norm": 1.2415311759295211, "learning_rate": 4.340724479005021e-09, "loss": 0.5145, "step": 9667 }, { "epoch": 0.99, "grad_norm": 1.3403486819478472, "learning_rate": 4.272107279099658e-09, "loss": 0.6006, "step": 9668 }, { "epoch": 0.99, "grad_norm": 1.4355207375653836, "learning_rate": 4.2040365195206154e-09, "loss": 0.6333, "step": 9669 }, { "epoch": 0.99, "grad_norm": 1.7300291783652522, "learning_rate": 4.13651220771194e-09, "loss": 0.6637, "step": 9670 }, { "epoch": 0.99, "grad_norm": 1.4936306723571515, "learning_rate": 4.0695343510605e-09, "loss": 0.5608, "step": 9671 }, { "epoch": 0.99, "grad_norm": 1.6998069499931696, "learning_rate": 4.003102956892102e-09, "loss": 0.6102, "step": 9672 }, { "epoch": 0.99, "grad_norm": 1.5239516407660527, "learning_rate": 3.937218032473155e-09, "loss": 0.5773, "step": 9673 }, { "epoch": 0.99, "grad_norm": 1.5229409771083264, "learning_rate": 3.871879585010674e-09, "loss": 0.6467, "step": 9674 }, { "epoch": 0.99, "grad_norm": 1.542775776162664, "learning_rate": 3.807087621650607e-09, "loss": 0.582, "step": 9675 }, { "epoch": 0.99, "grad_norm": 1.5433728819431, "learning_rate": 3.742842149480619e-09, "loss": 0.6688, "step": 9676 }, { "epoch": 0.99, "grad_norm": 1.4239085600841104, "learning_rate": 3.679143175527866e-09, "loss": 0.6578, "step": 9677 }, { "epoch": 0.99, "grad_norm": 1.6074772554325634, "learning_rate": 3.6159907067601086e-09, "loss": 0.7086, "step": 9678 }, { "epoch": 0.99, "grad_norm": 1.5942678422818897, "learning_rate": 3.5533847500845985e-09, "loss": 0.6635, "step": 9679 }, { "epoch": 0.99, "grad_norm": 1.5104787191194404, "learning_rate": 3.491325312349192e-09, "loss": 0.5939, "step": 9680 }, { "epoch": 0.99, "grad_norm": 1.388129643595257, "learning_rate": 3.4298124003429025e-09, "loss": 0.6319, "step": 9681 }, { "epoch": 0.99, "grad_norm": 1.3640641316351465, "learning_rate": 3.368846020793126e-09, "loss": 0.5904, "step": 9682 }, { "epoch": 0.99, "grad_norm": 1.5393792622059357, "learning_rate": 3.3084261803684182e-09, "loss": 0.5855, "step": 9683 }, { "epoch": 0.99, "grad_norm": 1.8281779575370551, "learning_rate": 3.248552885678491e-09, "loss": 0.6867, "step": 9684 }, { "epoch": 0.99, "grad_norm": 1.4404753016986491, "learning_rate": 3.189226143271995e-09, "loss": 0.7171, "step": 9685 }, { "epoch": 0.99, "grad_norm": 1.5580407609130673, "learning_rate": 3.130445959638184e-09, "loss": 0.6646, "step": 9686 }, { "epoch": 0.99, "grad_norm": 1.6315771168974995, "learning_rate": 3.0722123412058045e-09, "loss": 0.7138, "step": 9687 }, { "epoch": 0.99, "grad_norm": 1.3343377796745288, "learning_rate": 3.0145252943458715e-09, "loss": 0.5623, "step": 9688 }, { "epoch": 0.99, "grad_norm": 1.7951095696690056, "learning_rate": 2.9573848253672267e-09, "loss": 0.6455, "step": 9689 }, { "epoch": 0.99, "grad_norm": 1.7277152445326442, "learning_rate": 2.900790940519871e-09, "loss": 0.6597, "step": 9690 }, { "epoch": 0.99, "grad_norm": 1.5169387863339818, "learning_rate": 2.844743645994963e-09, "loss": 0.6815, "step": 9691 }, { "epoch": 0.99, "grad_norm": 1.4169086687007837, "learning_rate": 2.789242947923154e-09, "loss": 0.6819, "step": 9692 }, { "epoch": 0.99, "grad_norm": 15.49118046392866, "learning_rate": 2.734288852374034e-09, "loss": 0.6843, "step": 9693 }, { "epoch": 0.99, "grad_norm": 1.5129919097998923, "learning_rate": 2.6798813653594603e-09, "loss": 0.7065, "step": 9694 }, { "epoch": 0.99, "grad_norm": 1.4807809239132388, "learning_rate": 2.626020492830228e-09, "loss": 0.6451, "step": 9695 }, { "epoch": 0.99, "grad_norm": 1.4349931652215273, "learning_rate": 2.572706240678291e-09, "loss": 0.6514, "step": 9696 }, { "epoch": 0.99, "grad_norm": 1.3889823481285286, "learning_rate": 2.519938614734541e-09, "loss": 0.6087, "step": 9697 }, { "epoch": 0.99, "grad_norm": 1.506421683247402, "learning_rate": 2.467717620771026e-09, "loss": 0.5914, "step": 9698 }, { "epoch": 0.99, "grad_norm": 1.6523346790281626, "learning_rate": 2.4160432644998455e-09, "loss": 0.669, "step": 9699 }, { "epoch": 0.99, "grad_norm": 1.564807363801362, "learning_rate": 2.3649155515731436e-09, "loss": 0.6107, "step": 9700 }, { "epoch": 0.99, "grad_norm": 1.5996525584904608, "learning_rate": 2.314334487583114e-09, "loss": 0.7003, "step": 9701 }, { "epoch": 0.99, "grad_norm": 1.4016112568814116, "learning_rate": 2.2643000780631087e-09, "loss": 0.6578, "step": 9702 }, { "epoch": 0.99, "grad_norm": 1.4605888737271775, "learning_rate": 2.214812328485416e-09, "loss": 0.6353, "step": 9703 }, { "epoch": 0.99, "grad_norm": 1.3987733804433502, "learning_rate": 2.1658712442629293e-09, "loss": 0.6388, "step": 9704 }, { "epoch": 0.99, "grad_norm": 1.474848430136338, "learning_rate": 2.1174768307496984e-09, "loss": 0.6453, "step": 9705 }, { "epoch": 0.99, "grad_norm": 1.550813708731834, "learning_rate": 2.069629093238157e-09, "loss": 0.6385, "step": 9706 }, { "epoch": 0.99, "grad_norm": 1.5312359561860154, "learning_rate": 2.022328036962451e-09, "loss": 0.6696, "step": 9707 }, { "epoch": 0.99, "grad_norm": 1.465613933304486, "learning_rate": 1.97557366709622e-09, "loss": 0.6385, "step": 9708 }, { "epoch": 0.99, "grad_norm": 1.6059201668994194, "learning_rate": 1.9293659887542615e-09, "loss": 0.728, "step": 9709 }, { "epoch": 0.99, "grad_norm": 1.4167840796174016, "learning_rate": 1.8837050069897555e-09, "loss": 0.6102, "step": 9710 }, { "epoch": 0.99, "grad_norm": 1.404546933084224, "learning_rate": 1.8385907267981506e-09, "loss": 0.6397, "step": 9711 }, { "epoch": 0.99, "grad_norm": 1.4258766354157997, "learning_rate": 1.7940231531132778e-09, "loss": 0.5353, "step": 9712 }, { "epoch": 0.99, "grad_norm": 1.3324697789373736, "learning_rate": 1.7500022908106817e-09, "loss": 0.6138, "step": 9713 }, { "epoch": 0.99, "grad_norm": 1.4133328455675875, "learning_rate": 1.7065281447053994e-09, "loss": 0.7041, "step": 9714 }, { "epoch": 0.99, "grad_norm": 1.5461237632905902, "learning_rate": 1.663600719551961e-09, "loss": 0.6847, "step": 9715 }, { "epoch": 0.99, "grad_norm": 1.6624712462344833, "learning_rate": 1.62122002004661e-09, "loss": 0.5698, "step": 9716 }, { "epoch": 0.99, "grad_norm": 1.4670845621368576, "learning_rate": 1.5793860508250825e-09, "loss": 0.6363, "step": 9717 }, { "epoch": 0.99, "grad_norm": 1.4159317478602829, "learning_rate": 1.5380988164626076e-09, "loss": 0.629, "step": 9718 }, { "epoch": 0.99, "grad_norm": 1.497941139893596, "learning_rate": 1.4973583214755726e-09, "loss": 0.5859, "step": 9719 }, { "epoch": 0.99, "grad_norm": 1.400029409654289, "learning_rate": 1.4571645703198578e-09, "loss": 0.6723, "step": 9720 }, { "epoch": 0.99, "grad_norm": 1.4201082523629474, "learning_rate": 1.4175175673925013e-09, "loss": 0.6333, "step": 9721 }, { "epoch": 0.99, "grad_norm": 1.5531442451672428, "learning_rate": 1.3784173170300342e-09, "loss": 0.6848, "step": 9722 }, { "epoch": 0.99, "grad_norm": 2.3272009573206747, "learning_rate": 1.3398638235090356e-09, "loss": 0.6826, "step": 9723 }, { "epoch": 0.99, "grad_norm": 1.5628938447615883, "learning_rate": 1.3018570910466876e-09, "loss": 0.5971, "step": 9724 }, { "epoch": 0.99, "grad_norm": 1.5365375985868488, "learning_rate": 1.2643971238002206e-09, "loss": 0.6491, "step": 9725 }, { "epoch": 0.99, "grad_norm": 1.5388284602279392, "learning_rate": 1.2274839258669124e-09, "loss": 0.6021, "step": 9726 }, { "epoch": 0.99, "grad_norm": 1.7397424311981038, "learning_rate": 1.1911175012846443e-09, "loss": 0.6525, "step": 9727 }, { "epoch": 0.99, "grad_norm": 1.6218700057101765, "learning_rate": 1.1552978540313452e-09, "loss": 0.6543, "step": 9728 }, { "epoch": 0.99, "grad_norm": 1.5076661274670013, "learning_rate": 1.1200249880249925e-09, "loss": 0.6684, "step": 9729 }, { "epoch": 0.99, "grad_norm": 1.546445166255431, "learning_rate": 1.0852989071230558e-09, "loss": 0.6926, "step": 9730 }, { "epoch": 0.99, "grad_norm": 1.467258186100157, "learning_rate": 1.0511196151247182e-09, "loss": 0.5928, "step": 9731 }, { "epoch": 0.99, "grad_norm": 1.6319158161903253, "learning_rate": 1.017487115768101e-09, "loss": 0.6024, "step": 9732 }, { "epoch": 0.99, "grad_norm": 1.5795260529503725, "learning_rate": 9.844014127324831e-10, "loss": 0.6565, "step": 9733 }, { "epoch": 0.99, "grad_norm": 1.4991175686396554, "learning_rate": 9.518625096366364e-10, "loss": 0.674, "step": 9734 }, { "epoch": 0.99, "grad_norm": 1.5977158151339932, "learning_rate": 9.198704100393807e-10, "loss": 0.6809, "step": 9735 }, { "epoch": 0.99, "grad_norm": 1.6689265477479922, "learning_rate": 8.884251174401392e-10, "loss": 0.716, "step": 9736 }, { "epoch": 0.99, "grad_norm": 1.5873792213155111, "learning_rate": 8.57526635278938e-10, "loss": 0.5845, "step": 9737 }, { "epoch": 0.99, "grad_norm": 1.3657205673969224, "learning_rate": 8.271749669347406e-10, "loss": 0.5762, "step": 9738 }, { "epoch": 0.99, "grad_norm": 1.6064026409664631, "learning_rate": 7.973701157282243e-10, "loss": 0.7068, "step": 9739 }, { "epoch": 0.99, "grad_norm": 1.619935731982748, "learning_rate": 7.681120849190037e-10, "loss": 0.6103, "step": 9740 }, { "epoch": 0.99, "grad_norm": 1.612899042348941, "learning_rate": 7.394008777072969e-10, "loss": 0.629, "step": 9741 }, { "epoch": 0.99, "grad_norm": 1.507347414064453, "learning_rate": 7.112364972344799e-10, "loss": 0.6057, "step": 9742 }, { "epoch": 0.99, "grad_norm": 1.6122217027485068, "learning_rate": 6.836189465797561e-10, "loss": 0.6133, "step": 9743 }, { "epoch": 0.99, "grad_norm": 1.7649486835989425, "learning_rate": 6.565482287651526e-10, "loss": 0.74, "step": 9744 }, { "epoch": 1.0, "grad_norm": 1.5412225486642994, "learning_rate": 6.300243467516342e-10, "loss": 0.7517, "step": 9745 }, { "epoch": 1.0, "grad_norm": 1.5310671524011448, "learning_rate": 6.040473034396588e-10, "loss": 0.6251, "step": 9746 }, { "epoch": 1.0, "grad_norm": 1.6011442864244636, "learning_rate": 5.78617101670842e-10, "loss": 0.6369, "step": 9747 }, { "epoch": 1.0, "grad_norm": 1.5426214025154255, "learning_rate": 5.537337442274027e-10, "loss": 0.7247, "step": 9748 }, { "epoch": 1.0, "grad_norm": 1.4124637027591862, "learning_rate": 5.293972338304976e-10, "loss": 0.6432, "step": 9749 }, { "epoch": 1.0, "grad_norm": 1.4321462149083883, "learning_rate": 5.056075731424415e-10, "loss": 0.5616, "step": 9750 }, { "epoch": 1.0, "grad_norm": 1.5851327305245122, "learning_rate": 4.82364764765042e-10, "loss": 0.7051, "step": 9751 }, { "epoch": 1.0, "grad_norm": 1.476041411248275, "learning_rate": 4.5966881124126505e-10, "loss": 0.6212, "step": 9752 }, { "epoch": 1.0, "grad_norm": 1.5109080993867312, "learning_rate": 4.3751971505301414e-10, "loss": 0.5843, "step": 9753 }, { "epoch": 1.0, "grad_norm": 1.3612292459343538, "learning_rate": 4.1591747862279597e-10, "loss": 0.5747, "step": 9754 }, { "epoch": 1.0, "grad_norm": 1.5226310345363472, "learning_rate": 3.9486210431427527e-10, "loss": 0.6775, "step": 9755 }, { "epoch": 1.0, "grad_norm": 1.5168814549585654, "learning_rate": 3.743535944300547e-10, "loss": 0.6346, "step": 9756 }, { "epoch": 1.0, "grad_norm": 1.6100085622297715, "learning_rate": 3.543919512138949e-10, "loss": 0.6224, "step": 9757 }, { "epoch": 1.0, "grad_norm": 1.564930782360089, "learning_rate": 3.3497717684793927e-10, "loss": 0.7064, "step": 9758 }, { "epoch": 1.0, "grad_norm": 1.5515093240991964, "learning_rate": 3.161092734571547e-10, "loss": 0.7538, "step": 9759 }, { "epoch": 1.0, "grad_norm": 1.5721422010960984, "learning_rate": 2.9778824310489065e-10, "loss": 0.544, "step": 9760 }, { "epoch": 1.0, "grad_norm": 1.4126963616072905, "learning_rate": 2.8001408779509964e-10, "loss": 0.5745, "step": 9761 }, { "epoch": 1.0, "grad_norm": 1.6912835746930446, "learning_rate": 2.6278680947178225e-10, "loss": 0.639, "step": 9762 }, { "epoch": 1.0, "grad_norm": 1.7832165640880153, "learning_rate": 2.4610641001954206e-10, "loss": 0.6445, "step": 9763 }, { "epoch": 1.0, "grad_norm": 1.515577913840514, "learning_rate": 2.2997289126303057e-10, "loss": 0.6205, "step": 9764 }, { "epoch": 1.0, "grad_norm": 1.362720786627688, "learning_rate": 2.143862549663922e-10, "loss": 0.6124, "step": 9765 }, { "epoch": 1.0, "grad_norm": 1.5292554429098162, "learning_rate": 1.993465028349295e-10, "loss": 0.6543, "step": 9766 }, { "epoch": 1.0, "grad_norm": 1.4235642435632703, "learning_rate": 1.848536365134379e-10, "loss": 0.6381, "step": 9767 }, { "epoch": 1.0, "grad_norm": 1.4068187804478691, "learning_rate": 1.7090765758787097e-10, "loss": 0.5757, "step": 9768 }, { "epoch": 1.0, "grad_norm": 1.7665237496066866, "learning_rate": 1.5750856758256494e-10, "loss": 0.7046, "step": 9769 }, { "epoch": 1.0, "grad_norm": 1.675426268759316, "learning_rate": 1.4465636796412442e-10, "loss": 0.6894, "step": 9770 }, { "epoch": 1.0, "grad_norm": 1.5372339725492525, "learning_rate": 1.3235106013809173e-10, "loss": 0.5876, "step": 9771 }, { "epoch": 1.0, "grad_norm": 1.5142264139058843, "learning_rate": 1.2059264545005723e-10, "loss": 0.7134, "step": 9772 }, { "epoch": 1.0, "grad_norm": 1.4620421674936936, "learning_rate": 1.0938112518621424e-10, "loss": 0.6198, "step": 9773 }, { "epoch": 1.0, "grad_norm": 1.3752364662839611, "learning_rate": 9.871650057335924e-11, "loss": 0.5233, "step": 9774 }, { "epoch": 1.0, "grad_norm": 1.4234571341942484, "learning_rate": 8.85987727777815e-11, "loss": 0.6349, "step": 9775 }, { "epoch": 1.0, "grad_norm": 1.596260142175504, "learning_rate": 7.902794290581828e-11, "loss": 0.5668, "step": 9776 }, { "epoch": 1.0, "grad_norm": 1.4194140579115158, "learning_rate": 7.000401200496498e-11, "loss": 0.6272, "step": 9777 }, { "epoch": 1.0, "grad_norm": 1.6170513955013064, "learning_rate": 6.152698106165477e-11, "loss": 0.7103, "step": 9778 }, { "epoch": 1.0, "grad_norm": 1.4666633137936131, "learning_rate": 5.3596851004034114e-11, "loss": 0.6884, "step": 9779 }, { "epoch": 1.0, "grad_norm": 1.4778248521144932, "learning_rate": 4.621362269807694e-11, "loss": 0.5986, "step": 9780 }, { "epoch": 1.0, "grad_norm": 1.8224762167299273, "learning_rate": 3.937729695258075e-11, "loss": 0.6221, "step": 9781 }, { "epoch": 1.0, "grad_norm": 1.4432501002561167, "learning_rate": 3.3087874515280724e-11, "loss": 0.586, "step": 9782 }, { "epoch": 1.0, "grad_norm": 1.4706023856639587, "learning_rate": 2.7345356073404937e-11, "loss": 0.6278, "step": 9783 }, { "epoch": 1.0, "grad_norm": 1.482248031609527, "learning_rate": 2.2149742255339613e-11, "loss": 0.6505, "step": 9784 }, { "epoch": 1.0, "grad_norm": 1.5029947844766511, "learning_rate": 1.7501033630074048e-11, "loss": 0.6704, "step": 9785 }, { "epoch": 1.0, "grad_norm": 1.3105918305588435, "learning_rate": 1.339923070498017e-11, "loss": 0.6456, "step": 9786 }, { "epoch": 1.0, "grad_norm": 1.6088042506091658, "learning_rate": 9.844333929143191e-12, "loss": 0.6712, "step": 9787 }, { "epoch": 1.0, "grad_norm": 1.5980253412700542, "learning_rate": 6.836343691696279e-12, "loss": 0.6855, "step": 9788 }, { "epoch": 1.0, "grad_norm": 1.6288274775373628, "learning_rate": 4.375260321820563e-12, "loss": 0.6115, "step": 9789 }, { "epoch": 1.0, "grad_norm": 1.4857096011872029, "learning_rate": 2.4610840881900134e-12, "loss": 0.6275, "step": 9790 }, { "epoch": 1.0, "grad_norm": 1.320221866371686, "learning_rate": 1.0938152000816715e-12, "loss": 0.566, "step": 9791 }, { "epoch": 1.0, "grad_norm": 1.488415117991151, "learning_rate": 2.7345380737564543e-13, "loss": 0.6101, "step": 9792 }, { "epoch": 1.0, "grad_norm": 1.5383374979816096, "learning_rate": 0.0, "loss": 0.6084, "step": 9793 }, { "epoch": 1.0, "step": 9793, "total_flos": 3315151218180096.0, "train_loss": 0.6612185692502325, "train_runtime": 449372.0646, "train_samples_per_second": 2.789, "train_steps_per_second": 0.022 } ], "logging_steps": 1.0, "max_steps": 9793, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 3315151218180096.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }