{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.999827482101268, "eval_steps": 100, "global_step": 11592, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017251789873199344, "grad_norm": 43.25, "learning_rate": 4.0000000000000003e-07, "loss": 2.642, "step": 1 }, { "epoch": 0.0003450357974639869, "grad_norm": 406.0, "learning_rate": 8.000000000000001e-07, "loss": 2.9249, "step": 2 }, { "epoch": 0.0005175536961959803, "grad_norm": 65.0, "learning_rate": 1.2000000000000002e-06, "loss": 3.0206, "step": 3 }, { "epoch": 0.0006900715949279737, "grad_norm": 55.75, "learning_rate": 1.6000000000000001e-06, "loss": 2.7799, "step": 4 }, { "epoch": 0.0008625894936599673, "grad_norm": 41.5, "learning_rate": 2.0000000000000003e-06, "loss": 2.7781, "step": 5 }, { "epoch": 0.0010351073923919607, "grad_norm": 47.0, "learning_rate": 2.4000000000000003e-06, "loss": 2.8404, "step": 6 }, { "epoch": 0.001207625291123954, "grad_norm": 49.5, "learning_rate": 2.8000000000000003e-06, "loss": 2.9029, "step": 7 }, { "epoch": 0.0013801431898559475, "grad_norm": 47.25, "learning_rate": 3.2000000000000003e-06, "loss": 2.8079, "step": 8 }, { "epoch": 0.001552661088587941, "grad_norm": 32.0, "learning_rate": 3.6000000000000003e-06, "loss": 2.794, "step": 9 }, { "epoch": 0.0017251789873199345, "grad_norm": 36.5, "learning_rate": 4.000000000000001e-06, "loss": 2.7301, "step": 10 }, { "epoch": 0.001897696886051928, "grad_norm": 38.75, "learning_rate": 4.4e-06, "loss": 2.5871, "step": 11 }, { "epoch": 0.0020702147847839214, "grad_norm": 44.75, "learning_rate": 4.800000000000001e-06, "loss": 2.7294, "step": 12 }, { "epoch": 0.0022427326835159148, "grad_norm": 41.5, "learning_rate": 5.2e-06, "loss": 2.4985, "step": 13 }, { "epoch": 0.002415250582247908, "grad_norm": 25.125, "learning_rate": 5.600000000000001e-06, "loss": 2.5737, "step": 14 }, { "epoch": 0.0025877684809799016, "grad_norm": 29.25, "learning_rate": 6e-06, "loss": 2.6294, "step": 15 }, { "epoch": 0.002760286379711895, "grad_norm": 22.375, "learning_rate": 6.4000000000000006e-06, "loss": 2.4837, "step": 16 }, { "epoch": 0.0029328042784438884, "grad_norm": 37.25, "learning_rate": 6.800000000000001e-06, "loss": 2.5322, "step": 17 }, { "epoch": 0.003105322177175882, "grad_norm": 28.125, "learning_rate": 7.2000000000000005e-06, "loss": 2.3859, "step": 18 }, { "epoch": 0.0032778400759078757, "grad_norm": 16.25, "learning_rate": 7.600000000000001e-06, "loss": 2.3999, "step": 19 }, { "epoch": 0.003450357974639869, "grad_norm": 19.25, "learning_rate": 8.000000000000001e-06, "loss": 2.418, "step": 20 }, { "epoch": 0.0036228758733718625, "grad_norm": 15.25, "learning_rate": 8.400000000000001e-06, "loss": 2.3585, "step": 21 }, { "epoch": 0.003795393772103856, "grad_norm": 12.8125, "learning_rate": 8.8e-06, "loss": 2.2626, "step": 22 }, { "epoch": 0.003967911670835849, "grad_norm": 9.0, "learning_rate": 9.200000000000002e-06, "loss": 2.3482, "step": 23 }, { "epoch": 0.004140429569567843, "grad_norm": 7.125, "learning_rate": 9.600000000000001e-06, "loss": 2.1597, "step": 24 }, { "epoch": 0.004312947468299836, "grad_norm": 27.25, "learning_rate": 1e-05, "loss": 2.1392, "step": 25 }, { "epoch": 0.0044854653670318295, "grad_norm": 12.5625, "learning_rate": 1.04e-05, "loss": 2.218, "step": 26 }, { "epoch": 0.004657983265763823, "grad_norm": 32.75, "learning_rate": 1.0800000000000002e-05, "loss": 2.2254, "step": 27 }, { "epoch": 0.004830501164495816, "grad_norm": 4.59375, "learning_rate": 1.1200000000000001e-05, "loss": 2.1076, "step": 28 }, { "epoch": 0.00500301906322781, "grad_norm": 24.125, "learning_rate": 1.16e-05, "loss": 2.0355, "step": 29 }, { "epoch": 0.005175536961959803, "grad_norm": 7.8125, "learning_rate": 1.2e-05, "loss": 2.1556, "step": 30 }, { "epoch": 0.005348054860691797, "grad_norm": 8.75, "learning_rate": 1.2400000000000002e-05, "loss": 2.2392, "step": 31 }, { "epoch": 0.00552057275942379, "grad_norm": 11.1875, "learning_rate": 1.2800000000000001e-05, "loss": 2.1823, "step": 32 }, { "epoch": 0.005693090658155784, "grad_norm": 3.875, "learning_rate": 1.3200000000000002e-05, "loss": 2.0795, "step": 33 }, { "epoch": 0.005865608556887777, "grad_norm": 20.375, "learning_rate": 1.3600000000000002e-05, "loss": 2.0121, "step": 34 }, { "epoch": 0.006038126455619771, "grad_norm": 16.125, "learning_rate": 1.4e-05, "loss": 2.0659, "step": 35 }, { "epoch": 0.006210644354351764, "grad_norm": 9.625, "learning_rate": 1.4400000000000001e-05, "loss": 2.1046, "step": 36 }, { "epoch": 0.0063831622530837575, "grad_norm": 3.546875, "learning_rate": 1.48e-05, "loss": 1.968, "step": 37 }, { "epoch": 0.006555680151815751, "grad_norm": 44.75, "learning_rate": 1.5200000000000002e-05, "loss": 2.0737, "step": 38 }, { "epoch": 0.006728198050547744, "grad_norm": 5.6875, "learning_rate": 1.5600000000000003e-05, "loss": 2.0571, "step": 39 }, { "epoch": 0.006900715949279738, "grad_norm": 5.40625, "learning_rate": 1.6000000000000003e-05, "loss": 2.0511, "step": 40 }, { "epoch": 0.007073233848011731, "grad_norm": 2.5625, "learning_rate": 1.64e-05, "loss": 1.996, "step": 41 }, { "epoch": 0.007245751746743725, "grad_norm": 5.21875, "learning_rate": 1.6800000000000002e-05, "loss": 2.0255, "step": 42 }, { "epoch": 0.007418269645475718, "grad_norm": 3.453125, "learning_rate": 1.72e-05, "loss": 1.9345, "step": 43 }, { "epoch": 0.007590787544207712, "grad_norm": 4.25, "learning_rate": 1.76e-05, "loss": 2.069, "step": 44 }, { "epoch": 0.007763305442939705, "grad_norm": 2.5625, "learning_rate": 1.8e-05, "loss": 2.0591, "step": 45 }, { "epoch": 0.007935823341671698, "grad_norm": 3.34375, "learning_rate": 1.8400000000000003e-05, "loss": 2.0095, "step": 46 }, { "epoch": 0.008108341240403692, "grad_norm": 2.9375, "learning_rate": 1.88e-05, "loss": 1.9536, "step": 47 }, { "epoch": 0.008280859139135685, "grad_norm": 2.140625, "learning_rate": 1.9200000000000003e-05, "loss": 2.0756, "step": 48 }, { "epoch": 0.00845337703786768, "grad_norm": 2.625, "learning_rate": 1.9600000000000002e-05, "loss": 1.878, "step": 49 }, { "epoch": 0.008625894936599671, "grad_norm": 6.1875, "learning_rate": 2e-05, "loss": 1.8865, "step": 50 }, { "epoch": 0.008798412835331665, "grad_norm": 1.8046875, "learning_rate": 1.999999962956876e-05, "loss": 1.8895, "step": 51 }, { "epoch": 0.008970930734063659, "grad_norm": 1.8984375, "learning_rate": 1.9999998518275062e-05, "loss": 1.97, "step": 52 }, { "epoch": 0.009143448632795653, "grad_norm": 1.9609375, "learning_rate": 1.9999996666118996e-05, "loss": 1.9397, "step": 53 }, { "epoch": 0.009315966531527647, "grad_norm": 2.109375, "learning_rate": 1.9999994073100687e-05, "loss": 1.8746, "step": 54 }, { "epoch": 0.009488484430259639, "grad_norm": 1.59375, "learning_rate": 1.9999990739220338e-05, "loss": 1.8565, "step": 55 }, { "epoch": 0.009661002328991633, "grad_norm": 2.203125, "learning_rate": 1.999998666447819e-05, "loss": 1.8719, "step": 56 }, { "epoch": 0.009833520227723627, "grad_norm": 1.90625, "learning_rate": 1.999998184887455e-05, "loss": 1.8919, "step": 57 }, { "epoch": 0.01000603812645562, "grad_norm": 2.390625, "learning_rate": 1.999997629240977e-05, "loss": 1.8949, "step": 58 }, { "epoch": 0.010178556025187612, "grad_norm": 1.546875, "learning_rate": 1.9999969995084264e-05, "loss": 1.8742, "step": 59 }, { "epoch": 0.010351073923919606, "grad_norm": 1.40625, "learning_rate": 1.99999629568985e-05, "loss": 1.7554, "step": 60 }, { "epoch": 0.0105235918226516, "grad_norm": 2.875, "learning_rate": 1.9999955177852994e-05, "loss": 1.8714, "step": 61 }, { "epoch": 0.010696109721383594, "grad_norm": 8.6875, "learning_rate": 1.999994665794833e-05, "loss": 1.776, "step": 62 }, { "epoch": 0.010868627620115588, "grad_norm": 1.328125, "learning_rate": 1.999993739718513e-05, "loss": 1.8771, "step": 63 }, { "epoch": 0.01104114551884758, "grad_norm": 1.5, "learning_rate": 1.9999927395564087e-05, "loss": 1.7822, "step": 64 }, { "epoch": 0.011213663417579574, "grad_norm": 1.796875, "learning_rate": 1.999991665308594e-05, "loss": 1.9068, "step": 65 }, { "epoch": 0.011386181316311568, "grad_norm": 1.1953125, "learning_rate": 1.9999905169751486e-05, "loss": 1.999, "step": 66 }, { "epoch": 0.011558699215043561, "grad_norm": 1.5, "learning_rate": 1.9999892945561578e-05, "loss": 1.8624, "step": 67 }, { "epoch": 0.011731217113775554, "grad_norm": 2.5625, "learning_rate": 1.999987998051711e-05, "loss": 1.9439, "step": 68 }, { "epoch": 0.011903735012507547, "grad_norm": 1.3359375, "learning_rate": 1.9999866274619057e-05, "loss": 1.8826, "step": 69 }, { "epoch": 0.012076252911239541, "grad_norm": 1.328125, "learning_rate": 1.9999851827868428e-05, "loss": 1.7159, "step": 70 }, { "epoch": 0.012248770809971535, "grad_norm": 1.3828125, "learning_rate": 1.9999836640266292e-05, "loss": 1.8327, "step": 71 }, { "epoch": 0.012421288708703527, "grad_norm": 1.265625, "learning_rate": 1.9999820711813776e-05, "loss": 1.9363, "step": 72 }, { "epoch": 0.012593806607435521, "grad_norm": 1.28125, "learning_rate": 1.999980404251206e-05, "loss": 1.8603, "step": 73 }, { "epoch": 0.012766324506167515, "grad_norm": 1.03125, "learning_rate": 1.999978663236238e-05, "loss": 1.8476, "step": 74 }, { "epoch": 0.012938842404899509, "grad_norm": 1.4453125, "learning_rate": 1.9999768481366026e-05, "loss": 1.786, "step": 75 }, { "epoch": 0.013111360303631503, "grad_norm": 2.15625, "learning_rate": 1.9999749589524338e-05, "loss": 1.8373, "step": 76 }, { "epoch": 0.013283878202363495, "grad_norm": 1.328125, "learning_rate": 1.999972995683872e-05, "loss": 1.7419, "step": 77 }, { "epoch": 0.013456396101095489, "grad_norm": 1.7109375, "learning_rate": 1.9999709583310624e-05, "loss": 1.8733, "step": 78 }, { "epoch": 0.013628913999827482, "grad_norm": 3.578125, "learning_rate": 1.9999688468941565e-05, "loss": 1.7432, "step": 79 }, { "epoch": 0.013801431898559476, "grad_norm": 1.4453125, "learning_rate": 1.9999666613733102e-05, "loss": 1.8525, "step": 80 }, { "epoch": 0.013973949797291468, "grad_norm": 1.1484375, "learning_rate": 1.9999644017686855e-05, "loss": 1.8241, "step": 81 }, { "epoch": 0.014146467696023462, "grad_norm": 1.390625, "learning_rate": 1.9999620680804495e-05, "loss": 1.768, "step": 82 }, { "epoch": 0.014318985594755456, "grad_norm": 1.3515625, "learning_rate": 1.999959660308776e-05, "loss": 1.8484, "step": 83 }, { "epoch": 0.01449150349348745, "grad_norm": 0.90234375, "learning_rate": 1.9999571784538428e-05, "loss": 1.8453, "step": 84 }, { "epoch": 0.014664021392219442, "grad_norm": 2.171875, "learning_rate": 1.9999546225158335e-05, "loss": 1.8202, "step": 85 }, { "epoch": 0.014836539290951436, "grad_norm": 1.5078125, "learning_rate": 1.9999519924949376e-05, "loss": 1.8652, "step": 86 }, { "epoch": 0.01500905718968343, "grad_norm": 0.8828125, "learning_rate": 1.9999492883913506e-05, "loss": 1.7482, "step": 87 }, { "epoch": 0.015181575088415424, "grad_norm": 1.03125, "learning_rate": 1.999946510205272e-05, "loss": 1.7646, "step": 88 }, { "epoch": 0.015354092987147417, "grad_norm": 1.21875, "learning_rate": 1.999943657936908e-05, "loss": 1.8786, "step": 89 }, { "epoch": 0.01552661088587941, "grad_norm": 0.90625, "learning_rate": 1.99994073158647e-05, "loss": 1.7141, "step": 90 }, { "epoch": 0.015699128784611403, "grad_norm": 0.97265625, "learning_rate": 1.9999377311541748e-05, "loss": 1.8988, "step": 91 }, { "epoch": 0.015871646683343395, "grad_norm": 1.046875, "learning_rate": 1.9999346566402444e-05, "loss": 1.8017, "step": 92 }, { "epoch": 0.01604416458207539, "grad_norm": 0.91796875, "learning_rate": 1.999931508044907e-05, "loss": 1.8711, "step": 93 }, { "epoch": 0.016216682480807383, "grad_norm": 0.98046875, "learning_rate": 1.999928285368395e-05, "loss": 1.7997, "step": 94 }, { "epoch": 0.01638920037953938, "grad_norm": 1.515625, "learning_rate": 1.9999249886109485e-05, "loss": 1.7473, "step": 95 }, { "epoch": 0.01656171827827137, "grad_norm": 1.0703125, "learning_rate": 1.9999216177728106e-05, "loss": 1.9087, "step": 96 }, { "epoch": 0.016734236177003363, "grad_norm": 1.15625, "learning_rate": 1.9999181728542316e-05, "loss": 1.8097, "step": 97 }, { "epoch": 0.01690675407573536, "grad_norm": 1.0546875, "learning_rate": 1.9999146538554663e-05, "loss": 1.7901, "step": 98 }, { "epoch": 0.01707927197446735, "grad_norm": 1.4609375, "learning_rate": 1.9999110607767763e-05, "loss": 1.8119, "step": 99 }, { "epoch": 0.017251789873199343, "grad_norm": 0.9609375, "learning_rate": 1.999907393618427e-05, "loss": 1.7627, "step": 100 }, { "epoch": 0.017251789873199343, "eval_loss": 1.7699980735778809, "eval_runtime": 10.881, "eval_samples_per_second": 94.109, "eval_steps_per_second": 23.527, "step": 100 }, { "epoch": 0.01742430777193134, "grad_norm": 0.96484375, "learning_rate": 1.9999036523806897e-05, "loss": 1.8264, "step": 101 }, { "epoch": 0.01759682567066333, "grad_norm": 1.0, "learning_rate": 1.9998998370638427e-05, "loss": 1.9073, "step": 102 }, { "epoch": 0.017769343569395326, "grad_norm": 0.921875, "learning_rate": 1.9998959476681676e-05, "loss": 1.7857, "step": 103 }, { "epoch": 0.017941861468127318, "grad_norm": 1.1796875, "learning_rate": 1.9998919841939536e-05, "loss": 1.8095, "step": 104 }, { "epoch": 0.01811437936685931, "grad_norm": 1.046875, "learning_rate": 1.9998879466414937e-05, "loss": 1.7011, "step": 105 }, { "epoch": 0.018286897265591306, "grad_norm": 0.98046875, "learning_rate": 1.9998838350110867e-05, "loss": 1.7574, "step": 106 }, { "epoch": 0.018459415164323298, "grad_norm": 1.375, "learning_rate": 1.9998796493030382e-05, "loss": 1.7882, "step": 107 }, { "epoch": 0.018631933063055293, "grad_norm": 6.75, "learning_rate": 1.9998753895176576e-05, "loss": 1.7849, "step": 108 }, { "epoch": 0.018804450961787286, "grad_norm": 0.921875, "learning_rate": 1.9998710556552603e-05, "loss": 1.7914, "step": 109 }, { "epoch": 0.018976968860519278, "grad_norm": 0.85546875, "learning_rate": 1.9998666477161678e-05, "loss": 1.7382, "step": 110 }, { "epoch": 0.019149486759251273, "grad_norm": 0.74609375, "learning_rate": 1.9998621657007068e-05, "loss": 1.7785, "step": 111 }, { "epoch": 0.019322004657983265, "grad_norm": 0.7734375, "learning_rate": 1.9998576096092093e-05, "loss": 1.7812, "step": 112 }, { "epoch": 0.01949452255671526, "grad_norm": 0.9140625, "learning_rate": 1.9998529794420124e-05, "loss": 1.7871, "step": 113 }, { "epoch": 0.019667040455447253, "grad_norm": 1.125, "learning_rate": 1.9998482751994596e-05, "loss": 1.6526, "step": 114 }, { "epoch": 0.019839558354179245, "grad_norm": 1.0234375, "learning_rate": 1.999843496881899e-05, "loss": 1.7308, "step": 115 }, { "epoch": 0.02001207625291124, "grad_norm": 0.8359375, "learning_rate": 1.999838644489685e-05, "loss": 1.7452, "step": 116 }, { "epoch": 0.020184594151643233, "grad_norm": 1.140625, "learning_rate": 1.9998337180231768e-05, "loss": 1.7223, "step": 117 }, { "epoch": 0.020357112050375225, "grad_norm": 1.21875, "learning_rate": 1.9998287174827396e-05, "loss": 1.7875, "step": 118 }, { "epoch": 0.02052962994910722, "grad_norm": 0.8359375, "learning_rate": 1.999823642868744e-05, "loss": 1.7478, "step": 119 }, { "epoch": 0.020702147847839213, "grad_norm": 1.1015625, "learning_rate": 1.9998184941815653e-05, "loss": 1.7594, "step": 120 }, { "epoch": 0.020874665746571208, "grad_norm": 0.81640625, "learning_rate": 1.9998132714215855e-05, "loss": 1.8166, "step": 121 }, { "epoch": 0.0210471836453032, "grad_norm": 1.078125, "learning_rate": 1.9998079745891918e-05, "loss": 1.7075, "step": 122 }, { "epoch": 0.021219701544035192, "grad_norm": 1.109375, "learning_rate": 1.999802603684776e-05, "loss": 1.8377, "step": 123 }, { "epoch": 0.021392219442767188, "grad_norm": 1.1171875, "learning_rate": 1.999797158708736e-05, "loss": 1.7289, "step": 124 }, { "epoch": 0.02156473734149918, "grad_norm": 0.89453125, "learning_rate": 1.999791639661476e-05, "loss": 1.8432, "step": 125 }, { "epoch": 0.021737255240231176, "grad_norm": 1.0234375, "learning_rate": 1.999786046543404e-05, "loss": 1.7785, "step": 126 }, { "epoch": 0.021909773138963168, "grad_norm": 0.79296875, "learning_rate": 1.9997803793549347e-05, "loss": 1.7906, "step": 127 }, { "epoch": 0.02208229103769516, "grad_norm": 0.82421875, "learning_rate": 1.999774638096488e-05, "loss": 1.837, "step": 128 }, { "epoch": 0.022254808936427155, "grad_norm": 1.109375, "learning_rate": 1.9997688227684896e-05, "loss": 1.754, "step": 129 }, { "epoch": 0.022427326835159148, "grad_norm": 0.89453125, "learning_rate": 1.9997629333713697e-05, "loss": 1.8045, "step": 130 }, { "epoch": 0.02259984473389114, "grad_norm": 0.7578125, "learning_rate": 1.999756969905565e-05, "loss": 1.7617, "step": 131 }, { "epoch": 0.022772362632623135, "grad_norm": 0.84375, "learning_rate": 1.9997509323715166e-05, "loss": 1.8347, "step": 132 }, { "epoch": 0.022944880531355127, "grad_norm": 1.171875, "learning_rate": 1.999744820769673e-05, "loss": 1.7765, "step": 133 }, { "epoch": 0.023117398430087123, "grad_norm": 0.76171875, "learning_rate": 1.9997386351004864e-05, "loss": 1.8108, "step": 134 }, { "epoch": 0.023289916328819115, "grad_norm": 0.84765625, "learning_rate": 1.9997323753644148e-05, "loss": 1.7178, "step": 135 }, { "epoch": 0.023462434227551107, "grad_norm": 0.73046875, "learning_rate": 1.9997260415619223e-05, "loss": 1.7446, "step": 136 }, { "epoch": 0.023634952126283103, "grad_norm": 1.2421875, "learning_rate": 1.999719633693478e-05, "loss": 1.8163, "step": 137 }, { "epoch": 0.023807470025015095, "grad_norm": 0.8984375, "learning_rate": 1.999713151759557e-05, "loss": 1.7499, "step": 138 }, { "epoch": 0.02397998792374709, "grad_norm": 1.1796875, "learning_rate": 1.999706595760639e-05, "loss": 1.8029, "step": 139 }, { "epoch": 0.024152505822479083, "grad_norm": 0.95703125, "learning_rate": 1.99969996569721e-05, "loss": 1.6801, "step": 140 }, { "epoch": 0.024325023721211075, "grad_norm": 0.84765625, "learning_rate": 1.999693261569761e-05, "loss": 1.7381, "step": 141 }, { "epoch": 0.02449754161994307, "grad_norm": 1.046875, "learning_rate": 1.999686483378789e-05, "loss": 1.828, "step": 142 }, { "epoch": 0.024670059518675062, "grad_norm": 1.34375, "learning_rate": 1.9996796311247956e-05, "loss": 1.7018, "step": 143 }, { "epoch": 0.024842577417407054, "grad_norm": 0.71875, "learning_rate": 1.999672704808289e-05, "loss": 1.8166, "step": 144 }, { "epoch": 0.02501509531613905, "grad_norm": 1.0078125, "learning_rate": 1.9996657044297824e-05, "loss": 1.7758, "step": 145 }, { "epoch": 0.025187613214871042, "grad_norm": 1.0625, "learning_rate": 1.9996586299897944e-05, "loss": 1.8275, "step": 146 }, { "epoch": 0.025360131113603038, "grad_norm": 0.765625, "learning_rate": 1.9996514814888483e-05, "loss": 1.7491, "step": 147 }, { "epoch": 0.02553264901233503, "grad_norm": 1.125, "learning_rate": 1.999644258927475e-05, "loss": 1.8093, "step": 148 }, { "epoch": 0.025705166911067022, "grad_norm": 0.8671875, "learning_rate": 1.9996369623062083e-05, "loss": 1.7722, "step": 149 }, { "epoch": 0.025877684809799018, "grad_norm": 0.80859375, "learning_rate": 1.9996295916255898e-05, "loss": 1.6843, "step": 150 }, { "epoch": 0.02605020270853101, "grad_norm": 0.890625, "learning_rate": 1.999622146886165e-05, "loss": 1.6833, "step": 151 }, { "epoch": 0.026222720607263005, "grad_norm": 0.84375, "learning_rate": 1.999614628088486e-05, "loss": 1.779, "step": 152 }, { "epoch": 0.026395238505994997, "grad_norm": 0.7734375, "learning_rate": 1.999607035233109e-05, "loss": 1.7611, "step": 153 }, { "epoch": 0.02656775640472699, "grad_norm": 0.78515625, "learning_rate": 1.999599368320597e-05, "loss": 1.6703, "step": 154 }, { "epoch": 0.026740274303458985, "grad_norm": 0.734375, "learning_rate": 1.9995916273515185e-05, "loss": 1.731, "step": 155 }, { "epoch": 0.026912792202190977, "grad_norm": 0.7734375, "learning_rate": 1.9995838123264463e-05, "loss": 1.746, "step": 156 }, { "epoch": 0.02708531010092297, "grad_norm": 0.875, "learning_rate": 1.9995759232459594e-05, "loss": 1.6949, "step": 157 }, { "epoch": 0.027257827999654965, "grad_norm": 0.81640625, "learning_rate": 1.9995679601106426e-05, "loss": 1.7415, "step": 158 }, { "epoch": 0.027430345898386957, "grad_norm": 1.1953125, "learning_rate": 1.9995599229210855e-05, "loss": 1.6621, "step": 159 }, { "epoch": 0.027602863797118952, "grad_norm": 1.203125, "learning_rate": 1.999551811677884e-05, "loss": 1.6831, "step": 160 }, { "epoch": 0.027775381695850945, "grad_norm": 2.03125, "learning_rate": 1.9995436263816387e-05, "loss": 1.7927, "step": 161 }, { "epoch": 0.027947899594582937, "grad_norm": 1.0859375, "learning_rate": 1.9995353670329565e-05, "loss": 1.8223, "step": 162 }, { "epoch": 0.028120417493314932, "grad_norm": 0.890625, "learning_rate": 1.9995270336324487e-05, "loss": 1.7593, "step": 163 }, { "epoch": 0.028292935392046924, "grad_norm": 0.984375, "learning_rate": 1.9995186261807326e-05, "loss": 1.7412, "step": 164 }, { "epoch": 0.02846545329077892, "grad_norm": 1.1171875, "learning_rate": 1.9995101446784317e-05, "loss": 1.7231, "step": 165 }, { "epoch": 0.028637971189510912, "grad_norm": 1.1015625, "learning_rate": 1.999501589126174e-05, "loss": 1.6749, "step": 166 }, { "epoch": 0.028810489088242904, "grad_norm": 0.90625, "learning_rate": 1.9994929595245932e-05, "loss": 1.7105, "step": 167 }, { "epoch": 0.0289830069869749, "grad_norm": 1.140625, "learning_rate": 1.9994842558743293e-05, "loss": 1.7118, "step": 168 }, { "epoch": 0.029155524885706892, "grad_norm": 1.359375, "learning_rate": 1.9994754781760264e-05, "loss": 1.6403, "step": 169 }, { "epoch": 0.029328042784438884, "grad_norm": 1.0234375, "learning_rate": 1.9994666264303347e-05, "loss": 1.7117, "step": 170 }, { "epoch": 0.02950056068317088, "grad_norm": 1.09375, "learning_rate": 1.999457700637911e-05, "loss": 1.6726, "step": 171 }, { "epoch": 0.02967307858190287, "grad_norm": 0.703125, "learning_rate": 1.9994487007994156e-05, "loss": 1.7028, "step": 172 }, { "epoch": 0.029845596480634867, "grad_norm": 0.90625, "learning_rate": 1.9994396269155153e-05, "loss": 1.7862, "step": 173 }, { "epoch": 0.03001811437936686, "grad_norm": 1.0625, "learning_rate": 1.9994304789868832e-05, "loss": 1.7097, "step": 174 }, { "epoch": 0.03019063227809885, "grad_norm": 1.6953125, "learning_rate": 1.999421257014196e-05, "loss": 1.7168, "step": 175 }, { "epoch": 0.030363150176830847, "grad_norm": 0.8359375, "learning_rate": 1.999411960998138e-05, "loss": 1.8136, "step": 176 }, { "epoch": 0.03053566807556284, "grad_norm": 0.87109375, "learning_rate": 1.999402590939397e-05, "loss": 1.6356, "step": 177 }, { "epoch": 0.030708185974294835, "grad_norm": 0.8046875, "learning_rate": 1.9993931468386675e-05, "loss": 1.7556, "step": 178 }, { "epoch": 0.030880703873026827, "grad_norm": 0.984375, "learning_rate": 1.9993836286966492e-05, "loss": 1.7139, "step": 179 }, { "epoch": 0.03105322177175882, "grad_norm": 0.8984375, "learning_rate": 1.999374036514047e-05, "loss": 1.7707, "step": 180 }, { "epoch": 0.031225739670490815, "grad_norm": 0.78125, "learning_rate": 1.999364370291572e-05, "loss": 1.6573, "step": 181 }, { "epoch": 0.03139825756922281, "grad_norm": 0.82421875, "learning_rate": 1.9993546300299404e-05, "loss": 1.8058, "step": 182 }, { "epoch": 0.0315707754679548, "grad_norm": 0.734375, "learning_rate": 1.9993448157298733e-05, "loss": 1.7508, "step": 183 }, { "epoch": 0.03174329336668679, "grad_norm": 0.7578125, "learning_rate": 1.999334927392098e-05, "loss": 1.7117, "step": 184 }, { "epoch": 0.031915811265418786, "grad_norm": 3.78125, "learning_rate": 1.9993249650173475e-05, "loss": 1.6337, "step": 185 }, { "epoch": 0.03208832916415078, "grad_norm": 0.85546875, "learning_rate": 1.999314928606359e-05, "loss": 1.7379, "step": 186 }, { "epoch": 0.03226084706288277, "grad_norm": 0.734375, "learning_rate": 1.999304818159877e-05, "loss": 1.7688, "step": 187 }, { "epoch": 0.032433364961614766, "grad_norm": 0.73828125, "learning_rate": 1.9992946336786502e-05, "loss": 1.6814, "step": 188 }, { "epoch": 0.03260588286034676, "grad_norm": 0.75, "learning_rate": 1.9992843751634327e-05, "loss": 1.6128, "step": 189 }, { "epoch": 0.03277840075907876, "grad_norm": 0.70703125, "learning_rate": 1.9992740426149847e-05, "loss": 1.7347, "step": 190 }, { "epoch": 0.032950918657810746, "grad_norm": 0.703125, "learning_rate": 1.9992636360340722e-05, "loss": 1.8182, "step": 191 }, { "epoch": 0.03312343655654274, "grad_norm": 0.73046875, "learning_rate": 1.999253155421466e-05, "loss": 1.6953, "step": 192 }, { "epoch": 0.03329595445527474, "grad_norm": 0.7265625, "learning_rate": 1.999242600777942e-05, "loss": 1.7346, "step": 193 }, { "epoch": 0.033468472354006726, "grad_norm": 0.6875, "learning_rate": 1.999231972104283e-05, "loss": 1.7494, "step": 194 }, { "epoch": 0.03364099025273872, "grad_norm": 0.73828125, "learning_rate": 1.9992212694012757e-05, "loss": 1.7586, "step": 195 }, { "epoch": 0.03381350815147072, "grad_norm": 1.1015625, "learning_rate": 1.9992104926697137e-05, "loss": 1.7297, "step": 196 }, { "epoch": 0.033986026050202706, "grad_norm": 0.7421875, "learning_rate": 1.9991996419103947e-05, "loss": 1.6872, "step": 197 }, { "epoch": 0.0341585439489347, "grad_norm": 0.7265625, "learning_rate": 1.999188717124123e-05, "loss": 1.7465, "step": 198 }, { "epoch": 0.0343310618476667, "grad_norm": 0.765625, "learning_rate": 1.999177718311708e-05, "loss": 1.7722, "step": 199 }, { "epoch": 0.034503579746398685, "grad_norm": 0.78515625, "learning_rate": 1.9991666454739644e-05, "loss": 1.696, "step": 200 }, { "epoch": 0.034503579746398685, "eval_loss": 1.701870322227478, "eval_runtime": 11.0727, "eval_samples_per_second": 92.48, "eval_steps_per_second": 23.12, "step": 200 }, { "epoch": 0.03467609764513068, "grad_norm": 0.671875, "learning_rate": 1.9991554986117127e-05, "loss": 1.6589, "step": 201 }, { "epoch": 0.03484861554386268, "grad_norm": 1.0234375, "learning_rate": 1.9991442777257786e-05, "loss": 1.7258, "step": 202 }, { "epoch": 0.03502113344259467, "grad_norm": 0.83984375, "learning_rate": 1.9991329828169936e-05, "loss": 1.75, "step": 203 }, { "epoch": 0.03519365134132666, "grad_norm": 0.72265625, "learning_rate": 1.999121613886194e-05, "loss": 1.5888, "step": 204 }, { "epoch": 0.035366169240058656, "grad_norm": 0.8984375, "learning_rate": 1.9991101709342228e-05, "loss": 1.6331, "step": 205 }, { "epoch": 0.03553868713879065, "grad_norm": 0.83203125, "learning_rate": 1.9990986539619274e-05, "loss": 1.7844, "step": 206 }, { "epoch": 0.03571120503752264, "grad_norm": 0.98046875, "learning_rate": 1.999087062970161e-05, "loss": 1.68, "step": 207 }, { "epoch": 0.035883722936254636, "grad_norm": 0.8984375, "learning_rate": 1.999075397959782e-05, "loss": 1.7034, "step": 208 }, { "epoch": 0.03605624083498663, "grad_norm": 0.703125, "learning_rate": 1.9990636589316556e-05, "loss": 1.7639, "step": 209 }, { "epoch": 0.03622875873371862, "grad_norm": 1.0703125, "learning_rate": 1.9990518458866506e-05, "loss": 1.687, "step": 210 }, { "epoch": 0.036401276632450616, "grad_norm": 1.0546875, "learning_rate": 1.9990399588256425e-05, "loss": 1.6867, "step": 211 }, { "epoch": 0.03657379453118261, "grad_norm": 1.1328125, "learning_rate": 1.999027997749512e-05, "loss": 1.7076, "step": 212 }, { "epoch": 0.0367463124299146, "grad_norm": 0.8671875, "learning_rate": 1.9990159626591455e-05, "loss": 1.6697, "step": 213 }, { "epoch": 0.036918830328646596, "grad_norm": 1.109375, "learning_rate": 1.999003853555434e-05, "loss": 1.7503, "step": 214 }, { "epoch": 0.03709134822737859, "grad_norm": 0.73046875, "learning_rate": 1.9989916704392748e-05, "loss": 1.7105, "step": 215 }, { "epoch": 0.03726386612611059, "grad_norm": 1.09375, "learning_rate": 1.998979413311571e-05, "loss": 1.7114, "step": 216 }, { "epoch": 0.037436384024842576, "grad_norm": 0.8359375, "learning_rate": 1.99896708217323e-05, "loss": 1.6787, "step": 217 }, { "epoch": 0.03760890192357457, "grad_norm": 0.97265625, "learning_rate": 1.9989546770251662e-05, "loss": 1.714, "step": 218 }, { "epoch": 0.03778141982230657, "grad_norm": 0.8671875, "learning_rate": 1.9989421978682978e-05, "loss": 1.7059, "step": 219 }, { "epoch": 0.037953937721038555, "grad_norm": 0.7109375, "learning_rate": 1.99892964470355e-05, "loss": 1.702, "step": 220 }, { "epoch": 0.03812645561977055, "grad_norm": 0.73046875, "learning_rate": 1.9989170175318523e-05, "loss": 1.6866, "step": 221 }, { "epoch": 0.038298973518502546, "grad_norm": 0.7890625, "learning_rate": 1.9989043163541403e-05, "loss": 1.7692, "step": 222 }, { "epoch": 0.038471491417234535, "grad_norm": 0.72265625, "learning_rate": 1.9988915411713553e-05, "loss": 1.7088, "step": 223 }, { "epoch": 0.03864400931596653, "grad_norm": 0.75390625, "learning_rate": 1.9988786919844437e-05, "loss": 1.6951, "step": 224 }, { "epoch": 0.038816527214698526, "grad_norm": 0.890625, "learning_rate": 1.998865768794357e-05, "loss": 1.6612, "step": 225 }, { "epoch": 0.03898904511343052, "grad_norm": 0.84765625, "learning_rate": 1.9988527716020532e-05, "loss": 1.6171, "step": 226 }, { "epoch": 0.03916156301216251, "grad_norm": 0.7890625, "learning_rate": 1.998839700408495e-05, "loss": 1.7984, "step": 227 }, { "epoch": 0.039334080910894506, "grad_norm": 1.7890625, "learning_rate": 1.9988265552146508e-05, "loss": 1.6983, "step": 228 }, { "epoch": 0.0395065988096265, "grad_norm": 1.59375, "learning_rate": 1.998813336021494e-05, "loss": 1.6833, "step": 229 }, { "epoch": 0.03967911670835849, "grad_norm": 1.3828125, "learning_rate": 1.9988000428300047e-05, "loss": 1.6599, "step": 230 }, { "epoch": 0.039851634607090486, "grad_norm": 0.69921875, "learning_rate": 1.9987866756411676e-05, "loss": 1.7222, "step": 231 }, { "epoch": 0.04002415250582248, "grad_norm": 0.80078125, "learning_rate": 1.9987732344559724e-05, "loss": 1.7105, "step": 232 }, { "epoch": 0.04019667040455447, "grad_norm": 0.93359375, "learning_rate": 1.9987597192754155e-05, "loss": 1.671, "step": 233 }, { "epoch": 0.040369188303286466, "grad_norm": 0.80859375, "learning_rate": 1.9987461301004984e-05, "loss": 1.6531, "step": 234 }, { "epoch": 0.04054170620201846, "grad_norm": 1.015625, "learning_rate": 1.9987324669322274e-05, "loss": 1.7921, "step": 235 }, { "epoch": 0.04071422410075045, "grad_norm": 0.91796875, "learning_rate": 1.9987187297716145e-05, "loss": 1.8698, "step": 236 }, { "epoch": 0.040886741999482445, "grad_norm": 0.7890625, "learning_rate": 1.9987049186196782e-05, "loss": 1.7407, "step": 237 }, { "epoch": 0.04105925989821444, "grad_norm": 0.80859375, "learning_rate": 1.9986910334774415e-05, "loss": 1.6458, "step": 238 }, { "epoch": 0.04123177779694644, "grad_norm": 0.7734375, "learning_rate": 1.9986770743459325e-05, "loss": 1.6818, "step": 239 }, { "epoch": 0.041404295695678425, "grad_norm": 1.0703125, "learning_rate": 1.9986630412261857e-05, "loss": 1.6233, "step": 240 }, { "epoch": 0.04157681359441042, "grad_norm": 0.75390625, "learning_rate": 1.9986489341192416e-05, "loss": 1.649, "step": 241 }, { "epoch": 0.041749331493142416, "grad_norm": 0.76953125, "learning_rate": 1.998634753026144e-05, "loss": 1.7436, "step": 242 }, { "epoch": 0.041921849391874405, "grad_norm": 0.74609375, "learning_rate": 1.9986204979479443e-05, "loss": 1.6854, "step": 243 }, { "epoch": 0.0420943672906064, "grad_norm": 0.8359375, "learning_rate": 1.9986061688856983e-05, "loss": 1.6537, "step": 244 }, { "epoch": 0.042266885189338396, "grad_norm": 0.71484375, "learning_rate": 1.9985917658404677e-05, "loss": 1.767, "step": 245 }, { "epoch": 0.042439403088070385, "grad_norm": 1.0390625, "learning_rate": 1.99857728881332e-05, "loss": 1.68, "step": 246 }, { "epoch": 0.04261192098680238, "grad_norm": 0.73046875, "learning_rate": 1.998562737805327e-05, "loss": 1.6439, "step": 247 }, { "epoch": 0.042784438885534376, "grad_norm": 0.828125, "learning_rate": 1.9985481128175673e-05, "loss": 1.8946, "step": 248 }, { "epoch": 0.042956956784266365, "grad_norm": 0.96875, "learning_rate": 1.998533413851124e-05, "loss": 1.6761, "step": 249 }, { "epoch": 0.04312947468299836, "grad_norm": 0.734375, "learning_rate": 1.9985186409070863e-05, "loss": 1.6496, "step": 250 }, { "epoch": 0.043301992581730356, "grad_norm": 0.86328125, "learning_rate": 1.998503793986549e-05, "loss": 1.7303, "step": 251 }, { "epoch": 0.04347451048046235, "grad_norm": 0.796875, "learning_rate": 1.9984888730906112e-05, "loss": 1.7383, "step": 252 }, { "epoch": 0.04364702837919434, "grad_norm": 0.71484375, "learning_rate": 1.998473878220379e-05, "loss": 1.5984, "step": 253 }, { "epoch": 0.043819546277926336, "grad_norm": 1.2890625, "learning_rate": 1.9984588093769633e-05, "loss": 1.6765, "step": 254 }, { "epoch": 0.04399206417665833, "grad_norm": 1.0078125, "learning_rate": 1.99844366656148e-05, "loss": 1.7208, "step": 255 }, { "epoch": 0.04416458207539032, "grad_norm": 0.91015625, "learning_rate": 1.9984284497750516e-05, "loss": 1.7428, "step": 256 }, { "epoch": 0.044337099974122315, "grad_norm": 0.77734375, "learning_rate": 1.998413159018805e-05, "loss": 1.7136, "step": 257 }, { "epoch": 0.04450961787285431, "grad_norm": 0.82421875, "learning_rate": 1.9983977942938735e-05, "loss": 1.7301, "step": 258 }, { "epoch": 0.0446821357715863, "grad_norm": 0.703125, "learning_rate": 1.998382355601395e-05, "loss": 1.5718, "step": 259 }, { "epoch": 0.044854653670318295, "grad_norm": 0.796875, "learning_rate": 1.998366842942513e-05, "loss": 1.6996, "step": 260 }, { "epoch": 0.04502717156905029, "grad_norm": 0.76171875, "learning_rate": 1.998351256318378e-05, "loss": 1.6608, "step": 261 }, { "epoch": 0.04519968946778228, "grad_norm": 0.6640625, "learning_rate": 1.998335595730143e-05, "loss": 1.6553, "step": 262 }, { "epoch": 0.045372207366514275, "grad_norm": 0.84375, "learning_rate": 1.99831986117897e-05, "loss": 1.6318, "step": 263 }, { "epoch": 0.04554472526524627, "grad_norm": 0.7421875, "learning_rate": 1.9983040526660236e-05, "loss": 1.6407, "step": 264 }, { "epoch": 0.045717243163978266, "grad_norm": 0.7734375, "learning_rate": 1.998288170192475e-05, "loss": 1.6473, "step": 265 }, { "epoch": 0.045889761062710255, "grad_norm": 1.1875, "learning_rate": 1.9982722137595015e-05, "loss": 1.7061, "step": 266 }, { "epoch": 0.04606227896144225, "grad_norm": 0.66796875, "learning_rate": 1.9982561833682848e-05, "loss": 1.6742, "step": 267 }, { "epoch": 0.046234796860174246, "grad_norm": 0.69921875, "learning_rate": 1.9982400790200127e-05, "loss": 1.6009, "step": 268 }, { "epoch": 0.046407314758906235, "grad_norm": 0.86328125, "learning_rate": 1.9982239007158784e-05, "loss": 1.6577, "step": 269 }, { "epoch": 0.04657983265763823, "grad_norm": 1.03125, "learning_rate": 1.99820764845708e-05, "loss": 1.7364, "step": 270 }, { "epoch": 0.046752350556370226, "grad_norm": 0.87109375, "learning_rate": 1.998191322244822e-05, "loss": 1.6277, "step": 271 }, { "epoch": 0.046924868455102214, "grad_norm": 0.85546875, "learning_rate": 1.9981749220803142e-05, "loss": 1.6939, "step": 272 }, { "epoch": 0.04709738635383421, "grad_norm": 0.703125, "learning_rate": 1.998158447964771e-05, "loss": 1.7526, "step": 273 }, { "epoch": 0.047269904252566206, "grad_norm": 0.7265625, "learning_rate": 1.998141899899413e-05, "loss": 1.6889, "step": 274 }, { "epoch": 0.047442422151298194, "grad_norm": 0.73046875, "learning_rate": 1.998125277885467e-05, "loss": 1.6421, "step": 275 }, { "epoch": 0.04761494005003019, "grad_norm": 1.1796875, "learning_rate": 1.998108581924163e-05, "loss": 1.6843, "step": 276 }, { "epoch": 0.047787457948762185, "grad_norm": 0.8671875, "learning_rate": 1.9980918120167395e-05, "loss": 1.6682, "step": 277 }, { "epoch": 0.04795997584749418, "grad_norm": 0.8671875, "learning_rate": 1.9980749681644378e-05, "loss": 1.7816, "step": 278 }, { "epoch": 0.04813249374622617, "grad_norm": 0.8125, "learning_rate": 1.9980580503685064e-05, "loss": 1.6331, "step": 279 }, { "epoch": 0.048305011644958165, "grad_norm": 0.71875, "learning_rate": 1.9980410586301983e-05, "loss": 1.7004, "step": 280 }, { "epoch": 0.04847752954369016, "grad_norm": 0.9609375, "learning_rate": 1.9980239929507724e-05, "loss": 1.7132, "step": 281 }, { "epoch": 0.04865004744242215, "grad_norm": 3.21875, "learning_rate": 1.9980068533314937e-05, "loss": 1.6185, "step": 282 }, { "epoch": 0.048822565341154145, "grad_norm": 0.82421875, "learning_rate": 1.9979896397736308e-05, "loss": 1.6983, "step": 283 }, { "epoch": 0.04899508323988614, "grad_norm": 0.78515625, "learning_rate": 1.9979723522784602e-05, "loss": 1.6439, "step": 284 }, { "epoch": 0.04916760113861813, "grad_norm": 0.9453125, "learning_rate": 1.997954990847262e-05, "loss": 1.66, "step": 285 }, { "epoch": 0.049340119037350125, "grad_norm": 0.72265625, "learning_rate": 1.9979375554813223e-05, "loss": 1.704, "step": 286 }, { "epoch": 0.04951263693608212, "grad_norm": 0.765625, "learning_rate": 1.9979200461819334e-05, "loss": 1.7866, "step": 287 }, { "epoch": 0.04968515483481411, "grad_norm": 0.92578125, "learning_rate": 1.997902462950392e-05, "loss": 1.6967, "step": 288 }, { "epoch": 0.049857672733546105, "grad_norm": 0.71484375, "learning_rate": 1.9978848057880008e-05, "loss": 1.6726, "step": 289 }, { "epoch": 0.0500301906322781, "grad_norm": 0.9296875, "learning_rate": 1.9978670746960687e-05, "loss": 1.7372, "step": 290 }, { "epoch": 0.050202708531010096, "grad_norm": 0.921875, "learning_rate": 1.9978492696759084e-05, "loss": 1.714, "step": 291 }, { "epoch": 0.050375226429742084, "grad_norm": 0.98046875, "learning_rate": 1.9978313907288395e-05, "loss": 1.693, "step": 292 }, { "epoch": 0.05054774432847408, "grad_norm": 0.80859375, "learning_rate": 1.997813437856186e-05, "loss": 1.6223, "step": 293 }, { "epoch": 0.050720262227206075, "grad_norm": 0.8046875, "learning_rate": 1.9977954110592787e-05, "loss": 1.7357, "step": 294 }, { "epoch": 0.050892780125938064, "grad_norm": 0.890625, "learning_rate": 1.9977773103394527e-05, "loss": 1.6369, "step": 295 }, { "epoch": 0.05106529802467006, "grad_norm": 0.7109375, "learning_rate": 1.9977591356980493e-05, "loss": 1.7504, "step": 296 }, { "epoch": 0.051237815923402055, "grad_norm": 0.74609375, "learning_rate": 1.997740887136415e-05, "loss": 1.7014, "step": 297 }, { "epoch": 0.051410333822134044, "grad_norm": 0.71875, "learning_rate": 1.9977225646559013e-05, "loss": 1.6612, "step": 298 }, { "epoch": 0.05158285172086604, "grad_norm": 0.75, "learning_rate": 1.9977041682578662e-05, "loss": 1.773, "step": 299 }, { "epoch": 0.051755369619598035, "grad_norm": 0.8984375, "learning_rate": 1.997685697943672e-05, "loss": 1.6947, "step": 300 }, { "epoch": 0.051755369619598035, "eval_loss": 1.6562981605529785, "eval_runtime": 10.8538, "eval_samples_per_second": 94.345, "eval_steps_per_second": 23.586, "step": 300 }, { "epoch": 0.051927887518330024, "grad_norm": 0.71875, "learning_rate": 1.9976671537146877e-05, "loss": 1.7933, "step": 301 }, { "epoch": 0.05210040541706202, "grad_norm": 0.74609375, "learning_rate": 1.997648535572287e-05, "loss": 1.6316, "step": 302 }, { "epoch": 0.052272923315794015, "grad_norm": 0.671875, "learning_rate": 1.9976298435178493e-05, "loss": 1.6905, "step": 303 }, { "epoch": 0.05244544121452601, "grad_norm": 1.125, "learning_rate": 1.9976110775527592e-05, "loss": 1.6914, "step": 304 }, { "epoch": 0.052617959113258, "grad_norm": 0.80859375, "learning_rate": 1.997592237678407e-05, "loss": 1.5772, "step": 305 }, { "epoch": 0.052790477011989995, "grad_norm": 0.75, "learning_rate": 1.9975733238961885e-05, "loss": 1.6322, "step": 306 }, { "epoch": 0.05296299491072199, "grad_norm": 1.3984375, "learning_rate": 1.997554336207505e-05, "loss": 1.6677, "step": 307 }, { "epoch": 0.05313551280945398, "grad_norm": 0.9609375, "learning_rate": 1.9975352746137636e-05, "loss": 1.658, "step": 308 }, { "epoch": 0.053308030708185974, "grad_norm": 0.76171875, "learning_rate": 1.997516139116376e-05, "loss": 1.6323, "step": 309 }, { "epoch": 0.05348054860691797, "grad_norm": 1.1328125, "learning_rate": 1.99749692971676e-05, "loss": 1.7478, "step": 310 }, { "epoch": 0.05365306650564996, "grad_norm": 0.79296875, "learning_rate": 1.9974776464163387e-05, "loss": 1.6917, "step": 311 }, { "epoch": 0.053825584404381954, "grad_norm": 0.7890625, "learning_rate": 1.997458289216541e-05, "loss": 1.7139, "step": 312 }, { "epoch": 0.05399810230311395, "grad_norm": 0.82421875, "learning_rate": 1.9974388581188008e-05, "loss": 1.6154, "step": 313 }, { "epoch": 0.05417062020184594, "grad_norm": 0.80078125, "learning_rate": 1.997419353124558e-05, "loss": 1.5962, "step": 314 }, { "epoch": 0.054343138100577934, "grad_norm": 0.84765625, "learning_rate": 1.997399774235257e-05, "loss": 1.642, "step": 315 }, { "epoch": 0.05451565599930993, "grad_norm": 0.72265625, "learning_rate": 1.997380121452349e-05, "loss": 1.6652, "step": 316 }, { "epoch": 0.054688173898041925, "grad_norm": 0.76171875, "learning_rate": 1.9973603947772893e-05, "loss": 1.6442, "step": 317 }, { "epoch": 0.054860691796773914, "grad_norm": 1.671875, "learning_rate": 1.99734059421154e-05, "loss": 1.6428, "step": 318 }, { "epoch": 0.05503320969550591, "grad_norm": 0.9921875, "learning_rate": 1.9973207197565678e-05, "loss": 1.6888, "step": 319 }, { "epoch": 0.055205727594237905, "grad_norm": 0.69921875, "learning_rate": 1.9973007714138447e-05, "loss": 1.6007, "step": 320 }, { "epoch": 0.055378245492969894, "grad_norm": 0.8515625, "learning_rate": 1.9972807491848494e-05, "loss": 1.6779, "step": 321 }, { "epoch": 0.05555076339170189, "grad_norm": 0.953125, "learning_rate": 1.997260653071065e-05, "loss": 1.6187, "step": 322 }, { "epoch": 0.055723281290433885, "grad_norm": 0.82421875, "learning_rate": 1.99724048307398e-05, "loss": 1.6346, "step": 323 }, { "epoch": 0.05589579918916587, "grad_norm": 1.1015625, "learning_rate": 1.9972202391950893e-05, "loss": 1.782, "step": 324 }, { "epoch": 0.05606831708789787, "grad_norm": 0.6875, "learning_rate": 1.9971999214358918e-05, "loss": 1.6016, "step": 325 }, { "epoch": 0.056240834986629865, "grad_norm": 0.83984375, "learning_rate": 1.9971795297978937e-05, "loss": 1.6269, "step": 326 }, { "epoch": 0.05641335288536185, "grad_norm": 0.83984375, "learning_rate": 1.9971590642826056e-05, "loss": 1.6898, "step": 327 }, { "epoch": 0.05658587078409385, "grad_norm": 0.74609375, "learning_rate": 1.997138524891543e-05, "loss": 1.6505, "step": 328 }, { "epoch": 0.056758388682825844, "grad_norm": 0.8125, "learning_rate": 1.9971179116262284e-05, "loss": 1.6379, "step": 329 }, { "epoch": 0.05693090658155784, "grad_norm": 0.87109375, "learning_rate": 1.9970972244881886e-05, "loss": 1.6406, "step": 330 }, { "epoch": 0.05710342448028983, "grad_norm": 0.6796875, "learning_rate": 1.997076463478956e-05, "loss": 1.6551, "step": 331 }, { "epoch": 0.057275942379021824, "grad_norm": 0.7265625, "learning_rate": 1.9970556286000693e-05, "loss": 1.647, "step": 332 }, { "epoch": 0.05744846027775382, "grad_norm": 3.6875, "learning_rate": 1.997034719853072e-05, "loss": 1.686, "step": 333 }, { "epoch": 0.05762097817648581, "grad_norm": 0.84375, "learning_rate": 1.9970137372395123e-05, "loss": 1.6334, "step": 334 }, { "epoch": 0.057793496075217804, "grad_norm": 0.96484375, "learning_rate": 1.9969926807609453e-05, "loss": 1.5861, "step": 335 }, { "epoch": 0.0579660139739498, "grad_norm": 0.9296875, "learning_rate": 1.9969715504189312e-05, "loss": 1.7133, "step": 336 }, { "epoch": 0.05813853187268179, "grad_norm": 0.67578125, "learning_rate": 1.9969503462150352e-05, "loss": 1.6659, "step": 337 }, { "epoch": 0.058311049771413784, "grad_norm": 0.96484375, "learning_rate": 1.9969290681508284e-05, "loss": 1.7634, "step": 338 }, { "epoch": 0.05848356767014578, "grad_norm": 0.8671875, "learning_rate": 1.996907716227887e-05, "loss": 1.7041, "step": 339 }, { "epoch": 0.05865608556887777, "grad_norm": 0.7421875, "learning_rate": 1.9968862904477936e-05, "loss": 1.6879, "step": 340 }, { "epoch": 0.058828603467609764, "grad_norm": 0.88671875, "learning_rate": 1.9968647908121342e-05, "loss": 1.6305, "step": 341 }, { "epoch": 0.05900112136634176, "grad_norm": 1.5078125, "learning_rate": 1.996843217322503e-05, "loss": 1.6204, "step": 342 }, { "epoch": 0.059173639265073755, "grad_norm": 0.75390625, "learning_rate": 1.996821569980497e-05, "loss": 1.6207, "step": 343 }, { "epoch": 0.05934615716380574, "grad_norm": 0.83203125, "learning_rate": 1.9967998487877212e-05, "loss": 1.7491, "step": 344 }, { "epoch": 0.05951867506253774, "grad_norm": 0.84765625, "learning_rate": 1.9967780537457842e-05, "loss": 1.6341, "step": 345 }, { "epoch": 0.059691192961269735, "grad_norm": 0.9609375, "learning_rate": 1.996756184856301e-05, "loss": 1.6297, "step": 346 }, { "epoch": 0.05986371086000172, "grad_norm": 0.84765625, "learning_rate": 1.996734242120891e-05, "loss": 1.6929, "step": 347 }, { "epoch": 0.06003622875873372, "grad_norm": 0.83984375, "learning_rate": 1.9967122255411812e-05, "loss": 1.7607, "step": 348 }, { "epoch": 0.060208746657465714, "grad_norm": 1.546875, "learning_rate": 1.9966901351188018e-05, "loss": 1.7494, "step": 349 }, { "epoch": 0.0603812645561977, "grad_norm": 0.87890625, "learning_rate": 1.996667970855389e-05, "loss": 1.6567, "step": 350 }, { "epoch": 0.0605537824549297, "grad_norm": 0.73046875, "learning_rate": 1.9966457327525864e-05, "loss": 1.666, "step": 351 }, { "epoch": 0.060726300353661694, "grad_norm": 0.6796875, "learning_rate": 1.9966234208120398e-05, "loss": 1.6929, "step": 352 }, { "epoch": 0.06089881825239368, "grad_norm": 0.859375, "learning_rate": 1.9966010350354032e-05, "loss": 1.7106, "step": 353 }, { "epoch": 0.06107133615112568, "grad_norm": 0.7421875, "learning_rate": 1.996578575424335e-05, "loss": 1.6795, "step": 354 }, { "epoch": 0.061243854049857674, "grad_norm": 0.7578125, "learning_rate": 1.996556041980499e-05, "loss": 1.6676, "step": 355 }, { "epoch": 0.06141637194858967, "grad_norm": 0.703125, "learning_rate": 1.9965334347055646e-05, "loss": 1.6441, "step": 356 }, { "epoch": 0.06158888984732166, "grad_norm": 0.80078125, "learning_rate": 1.9965107536012067e-05, "loss": 1.7209, "step": 357 }, { "epoch": 0.061761407746053654, "grad_norm": 0.8203125, "learning_rate": 1.996487998669106e-05, "loss": 1.7478, "step": 358 }, { "epoch": 0.06193392564478565, "grad_norm": 0.9375, "learning_rate": 1.9964651699109476e-05, "loss": 1.6541, "step": 359 }, { "epoch": 0.06210644354351764, "grad_norm": 1.515625, "learning_rate": 1.996442267328423e-05, "loss": 1.5692, "step": 360 }, { "epoch": 0.062278961442249633, "grad_norm": 0.96484375, "learning_rate": 1.9964192909232297e-05, "loss": 1.6048, "step": 361 }, { "epoch": 0.06245147934098163, "grad_norm": 0.7109375, "learning_rate": 1.9963962406970695e-05, "loss": 1.7004, "step": 362 }, { "epoch": 0.06262399723971362, "grad_norm": 0.859375, "learning_rate": 1.9963731166516494e-05, "loss": 1.61, "step": 363 }, { "epoch": 0.06279651513844561, "grad_norm": 0.73828125, "learning_rate": 1.996349918788684e-05, "loss": 1.641, "step": 364 }, { "epoch": 0.0629690330371776, "grad_norm": 0.7421875, "learning_rate": 1.9963266471098905e-05, "loss": 1.6621, "step": 365 }, { "epoch": 0.0631415509359096, "grad_norm": 0.71875, "learning_rate": 1.996303301616994e-05, "loss": 1.5699, "step": 366 }, { "epoch": 0.0633140688346416, "grad_norm": 0.703125, "learning_rate": 1.9962798823117232e-05, "loss": 1.7367, "step": 367 }, { "epoch": 0.06348658673337358, "grad_norm": 0.75390625, "learning_rate": 1.996256389195814e-05, "loss": 1.6866, "step": 368 }, { "epoch": 0.06365910463210558, "grad_norm": 0.81640625, "learning_rate": 1.996232822271007e-05, "loss": 1.7105, "step": 369 }, { "epoch": 0.06383162253083757, "grad_norm": 0.671875, "learning_rate": 1.9962091815390475e-05, "loss": 1.5961, "step": 370 }, { "epoch": 0.06400414042956956, "grad_norm": 0.8671875, "learning_rate": 1.9961854670016868e-05, "loss": 1.69, "step": 371 }, { "epoch": 0.06417665832830156, "grad_norm": 0.73828125, "learning_rate": 1.996161678660683e-05, "loss": 1.689, "step": 372 }, { "epoch": 0.06434917622703355, "grad_norm": 0.74609375, "learning_rate": 1.9961378165177972e-05, "loss": 1.6235, "step": 373 }, { "epoch": 0.06452169412576554, "grad_norm": 1.0390625, "learning_rate": 1.9961138805747977e-05, "loss": 1.653, "step": 374 }, { "epoch": 0.06469421202449754, "grad_norm": 0.7265625, "learning_rate": 1.996089870833458e-05, "loss": 1.5524, "step": 375 }, { "epoch": 0.06486672992322953, "grad_norm": 0.796875, "learning_rate": 1.996065787295557e-05, "loss": 1.6183, "step": 376 }, { "epoch": 0.06503924782196153, "grad_norm": 3.734375, "learning_rate": 1.9960416299628788e-05, "loss": 1.5917, "step": 377 }, { "epoch": 0.06521176572069352, "grad_norm": 0.67578125, "learning_rate": 1.996017398837213e-05, "loss": 1.7018, "step": 378 }, { "epoch": 0.06538428361942551, "grad_norm": 0.80078125, "learning_rate": 1.995993093920355e-05, "loss": 1.7019, "step": 379 }, { "epoch": 0.06555680151815751, "grad_norm": 0.75390625, "learning_rate": 1.9959687152141052e-05, "loss": 1.7515, "step": 380 }, { "epoch": 0.0657293194168895, "grad_norm": 1.3203125, "learning_rate": 1.99594426272027e-05, "loss": 1.6587, "step": 381 }, { "epoch": 0.06590183731562149, "grad_norm": 0.75390625, "learning_rate": 1.9959197364406607e-05, "loss": 1.6349, "step": 382 }, { "epoch": 0.0660743552143535, "grad_norm": 0.7578125, "learning_rate": 1.9958951363770946e-05, "loss": 1.6287, "step": 383 }, { "epoch": 0.06624687311308548, "grad_norm": 0.6796875, "learning_rate": 1.9958704625313942e-05, "loss": 1.6268, "step": 384 }, { "epoch": 0.06641939101181747, "grad_norm": 0.7265625, "learning_rate": 1.9958457149053876e-05, "loss": 1.5422, "step": 385 }, { "epoch": 0.06659190891054947, "grad_norm": 0.8203125, "learning_rate": 1.995820893500908e-05, "loss": 1.5975, "step": 386 }, { "epoch": 0.06676442680928146, "grad_norm": 0.85546875, "learning_rate": 1.9957959983197944e-05, "loss": 1.5995, "step": 387 }, { "epoch": 0.06693694470801345, "grad_norm": 1.0234375, "learning_rate": 1.995771029363891e-05, "loss": 1.6374, "step": 388 }, { "epoch": 0.06710946260674545, "grad_norm": 0.67578125, "learning_rate": 1.995745986635048e-05, "loss": 1.5406, "step": 389 }, { "epoch": 0.06728198050547744, "grad_norm": 0.796875, "learning_rate": 1.995720870135121e-05, "loss": 1.6726, "step": 390 }, { "epoch": 0.06745449840420943, "grad_norm": 0.70703125, "learning_rate": 1.99569567986597e-05, "loss": 1.6981, "step": 391 }, { "epoch": 0.06762701630294143, "grad_norm": 0.75390625, "learning_rate": 1.9956704158294614e-05, "loss": 1.7608, "step": 392 }, { "epoch": 0.06779953420167342, "grad_norm": 0.70703125, "learning_rate": 1.9956450780274677e-05, "loss": 1.7732, "step": 393 }, { "epoch": 0.06797205210040541, "grad_norm": 0.74609375, "learning_rate": 1.9956196664618652e-05, "loss": 1.6464, "step": 394 }, { "epoch": 0.06814456999913741, "grad_norm": 0.70703125, "learning_rate": 1.995594181134537e-05, "loss": 1.7028, "step": 395 }, { "epoch": 0.0683170878978694, "grad_norm": 1.0703125, "learning_rate": 1.9955686220473712e-05, "loss": 1.6572, "step": 396 }, { "epoch": 0.06848960579660139, "grad_norm": 0.7265625, "learning_rate": 1.9955429892022612e-05, "loss": 1.612, "step": 397 }, { "epoch": 0.0686621236953334, "grad_norm": 0.76953125, "learning_rate": 1.995517282601106e-05, "loss": 1.6071, "step": 398 }, { "epoch": 0.06883464159406538, "grad_norm": 0.84765625, "learning_rate": 1.9954915022458105e-05, "loss": 1.6205, "step": 399 }, { "epoch": 0.06900715949279737, "grad_norm": 0.71484375, "learning_rate": 1.995465648138284e-05, "loss": 1.5928, "step": 400 }, { "epoch": 0.06900715949279737, "eval_loss": 1.6248373985290527, "eval_runtime": 10.8965, "eval_samples_per_second": 93.975, "eval_steps_per_second": 23.494, "step": 400 }, { "epoch": 0.06917967739152937, "grad_norm": 0.91796875, "learning_rate": 1.9954397202804426e-05, "loss": 1.6632, "step": 401 }, { "epoch": 0.06935219529026136, "grad_norm": 0.71875, "learning_rate": 1.995413718674207e-05, "loss": 1.663, "step": 402 }, { "epoch": 0.06952471318899336, "grad_norm": 0.77734375, "learning_rate": 1.9953876433215035e-05, "loss": 1.6414, "step": 403 }, { "epoch": 0.06969723108772535, "grad_norm": 0.75390625, "learning_rate": 1.9953614942242635e-05, "loss": 1.6355, "step": 404 }, { "epoch": 0.06986974898645734, "grad_norm": 0.68359375, "learning_rate": 1.9953352713844253e-05, "loss": 1.517, "step": 405 }, { "epoch": 0.07004226688518934, "grad_norm": 0.63671875, "learning_rate": 1.9953089748039306e-05, "loss": 1.657, "step": 406 }, { "epoch": 0.07021478478392133, "grad_norm": 1.0078125, "learning_rate": 1.9952826044847282e-05, "loss": 1.5441, "step": 407 }, { "epoch": 0.07038730268265332, "grad_norm": 0.61328125, "learning_rate": 1.9952561604287717e-05, "loss": 1.6777, "step": 408 }, { "epoch": 0.07055982058138532, "grad_norm": 2.984375, "learning_rate": 1.9952296426380198e-05, "loss": 1.6177, "step": 409 }, { "epoch": 0.07073233848011731, "grad_norm": 0.71875, "learning_rate": 1.9952030511144384e-05, "loss": 1.6362, "step": 410 }, { "epoch": 0.0709048563788493, "grad_norm": 0.90625, "learning_rate": 1.995176385859996e-05, "loss": 1.5383, "step": 411 }, { "epoch": 0.0710773742775813, "grad_norm": 0.65234375, "learning_rate": 1.9951496468766687e-05, "loss": 1.7264, "step": 412 }, { "epoch": 0.07124989217631329, "grad_norm": 0.75, "learning_rate": 1.9951228341664376e-05, "loss": 1.7352, "step": 413 }, { "epoch": 0.07142241007504528, "grad_norm": 0.66796875, "learning_rate": 1.9950959477312895e-05, "loss": 1.6589, "step": 414 }, { "epoch": 0.07159492797377728, "grad_norm": 0.6796875, "learning_rate": 1.9950689875732157e-05, "loss": 1.6992, "step": 415 }, { "epoch": 0.07176744587250927, "grad_norm": 0.83984375, "learning_rate": 1.9950419536942137e-05, "loss": 1.6052, "step": 416 }, { "epoch": 0.07193996377124126, "grad_norm": 0.69140625, "learning_rate": 1.9950148460962867e-05, "loss": 1.561, "step": 417 }, { "epoch": 0.07211248166997326, "grad_norm": 0.9296875, "learning_rate": 1.9949876647814428e-05, "loss": 1.6689, "step": 418 }, { "epoch": 0.07228499956870525, "grad_norm": 0.7578125, "learning_rate": 1.9949604097516956e-05, "loss": 1.5868, "step": 419 }, { "epoch": 0.07245751746743724, "grad_norm": 0.734375, "learning_rate": 1.9949330810090643e-05, "loss": 1.5644, "step": 420 }, { "epoch": 0.07263003536616924, "grad_norm": 0.7890625, "learning_rate": 1.9949056785555738e-05, "loss": 1.5376, "step": 421 }, { "epoch": 0.07280255326490123, "grad_norm": 1.0234375, "learning_rate": 1.9948782023932545e-05, "loss": 1.6105, "step": 422 }, { "epoch": 0.07297507116363322, "grad_norm": 0.828125, "learning_rate": 1.9948506525241414e-05, "loss": 1.6057, "step": 423 }, { "epoch": 0.07314758906236522, "grad_norm": 0.6875, "learning_rate": 1.9948230289502758e-05, "loss": 1.7268, "step": 424 }, { "epoch": 0.07332010696109721, "grad_norm": 1.046875, "learning_rate": 1.9947953316737045e-05, "loss": 1.6582, "step": 425 }, { "epoch": 0.0734926248598292, "grad_norm": 0.67578125, "learning_rate": 1.9947675606964793e-05, "loss": 1.6357, "step": 426 }, { "epoch": 0.0736651427585612, "grad_norm": 0.6953125, "learning_rate": 1.994739716020657e-05, "loss": 1.5585, "step": 427 }, { "epoch": 0.07383766065729319, "grad_norm": 0.703125, "learning_rate": 1.9947117976483018e-05, "loss": 1.6731, "step": 428 }, { "epoch": 0.0740101785560252, "grad_norm": 0.765625, "learning_rate": 1.994683805581481e-05, "loss": 1.6636, "step": 429 }, { "epoch": 0.07418269645475718, "grad_norm": 0.71484375, "learning_rate": 1.9946557398222686e-05, "loss": 1.7249, "step": 430 }, { "epoch": 0.07435521435348917, "grad_norm": 0.62890625, "learning_rate": 1.9946276003727447e-05, "loss": 1.5798, "step": 431 }, { "epoch": 0.07452773225222117, "grad_norm": 0.671875, "learning_rate": 1.994599387234993e-05, "loss": 1.6511, "step": 432 }, { "epoch": 0.07470025015095316, "grad_norm": 0.77734375, "learning_rate": 1.9945711004111045e-05, "loss": 1.6225, "step": 433 }, { "epoch": 0.07487276804968515, "grad_norm": 0.69140625, "learning_rate": 1.9945427399031744e-05, "loss": 1.5686, "step": 434 }, { "epoch": 0.07504528594841715, "grad_norm": 0.82421875, "learning_rate": 1.9945143057133037e-05, "loss": 1.6841, "step": 435 }, { "epoch": 0.07521780384714914, "grad_norm": 0.87109375, "learning_rate": 1.9944857978435996e-05, "loss": 1.6935, "step": 436 }, { "epoch": 0.07539032174588113, "grad_norm": 0.83984375, "learning_rate": 1.9944572162961735e-05, "loss": 1.5725, "step": 437 }, { "epoch": 0.07556283964461313, "grad_norm": 0.8203125, "learning_rate": 1.9944285610731433e-05, "loss": 1.675, "step": 438 }, { "epoch": 0.07573535754334512, "grad_norm": 0.9453125, "learning_rate": 1.9943998321766318e-05, "loss": 1.7037, "step": 439 }, { "epoch": 0.07590787544207711, "grad_norm": 0.94921875, "learning_rate": 1.9943710296087672e-05, "loss": 1.6607, "step": 440 }, { "epoch": 0.07608039334080911, "grad_norm": 0.78515625, "learning_rate": 1.994342153371684e-05, "loss": 1.6181, "step": 441 }, { "epoch": 0.0762529112395411, "grad_norm": 1.046875, "learning_rate": 1.9943132034675208e-05, "loss": 1.6794, "step": 442 }, { "epoch": 0.07642542913827309, "grad_norm": 0.78515625, "learning_rate": 1.9942841798984228e-05, "loss": 1.6142, "step": 443 }, { "epoch": 0.07659794703700509, "grad_norm": 0.8203125, "learning_rate": 1.9942550826665404e-05, "loss": 1.7234, "step": 444 }, { "epoch": 0.07677046493573708, "grad_norm": 1.0390625, "learning_rate": 1.994225911774029e-05, "loss": 1.6427, "step": 445 }, { "epoch": 0.07694298283446907, "grad_norm": 0.81640625, "learning_rate": 1.9941966672230494e-05, "loss": 1.7516, "step": 446 }, { "epoch": 0.07711550073320107, "grad_norm": 1.3828125, "learning_rate": 1.9941673490157694e-05, "loss": 1.6401, "step": 447 }, { "epoch": 0.07728801863193306, "grad_norm": 0.90625, "learning_rate": 1.9941379571543597e-05, "loss": 1.6463, "step": 448 }, { "epoch": 0.07746053653066505, "grad_norm": 0.6875, "learning_rate": 1.9941084916409988e-05, "loss": 1.5744, "step": 449 }, { "epoch": 0.07763305442939705, "grad_norm": 1.3203125, "learning_rate": 1.994078952477869e-05, "loss": 1.6328, "step": 450 }, { "epoch": 0.07780557232812904, "grad_norm": 0.8984375, "learning_rate": 1.9940493396671598e-05, "loss": 1.6568, "step": 451 }, { "epoch": 0.07797809022686104, "grad_norm": 0.890625, "learning_rate": 1.994019653211064e-05, "loss": 1.5954, "step": 452 }, { "epoch": 0.07815060812559303, "grad_norm": 1.125, "learning_rate": 1.9939898931117813e-05, "loss": 1.6227, "step": 453 }, { "epoch": 0.07832312602432502, "grad_norm": 0.96875, "learning_rate": 1.9939600593715166e-05, "loss": 1.5917, "step": 454 }, { "epoch": 0.07849564392305702, "grad_norm": 0.77734375, "learning_rate": 1.99393015199248e-05, "loss": 1.6043, "step": 455 }, { "epoch": 0.07866816182178901, "grad_norm": 0.80078125, "learning_rate": 1.993900170976888e-05, "loss": 1.66, "step": 456 }, { "epoch": 0.078840679720521, "grad_norm": 0.8828125, "learning_rate": 1.9938701163269607e-05, "loss": 1.5998, "step": 457 }, { "epoch": 0.079013197619253, "grad_norm": 0.875, "learning_rate": 1.9938399880449254e-05, "loss": 1.6249, "step": 458 }, { "epoch": 0.07918571551798499, "grad_norm": 0.84765625, "learning_rate": 1.9938097861330138e-05, "loss": 1.623, "step": 459 }, { "epoch": 0.07935823341671698, "grad_norm": 0.79296875, "learning_rate": 1.9937795105934637e-05, "loss": 1.5886, "step": 460 }, { "epoch": 0.07953075131544898, "grad_norm": 0.68359375, "learning_rate": 1.9937491614285182e-05, "loss": 1.6607, "step": 461 }, { "epoch": 0.07970326921418097, "grad_norm": 0.79296875, "learning_rate": 1.993718738640425e-05, "loss": 1.6488, "step": 462 }, { "epoch": 0.07987578711291296, "grad_norm": 0.7109375, "learning_rate": 1.993688242231439e-05, "loss": 1.5408, "step": 463 }, { "epoch": 0.08004830501164496, "grad_norm": 0.625, "learning_rate": 1.9936576722038192e-05, "loss": 1.6059, "step": 464 }, { "epoch": 0.08022082291037695, "grad_norm": 0.66796875, "learning_rate": 1.9936270285598306e-05, "loss": 1.5628, "step": 465 }, { "epoch": 0.08039334080910894, "grad_norm": 0.67578125, "learning_rate": 1.993596311301743e-05, "loss": 1.6705, "step": 466 }, { "epoch": 0.08056585870784094, "grad_norm": 0.68359375, "learning_rate": 1.9935655204318323e-05, "loss": 1.6953, "step": 467 }, { "epoch": 0.08073837660657293, "grad_norm": 0.71484375, "learning_rate": 1.99353465595238e-05, "loss": 1.6976, "step": 468 }, { "epoch": 0.08091089450530492, "grad_norm": 0.6875, "learning_rate": 1.993503717865672e-05, "loss": 1.7231, "step": 469 }, { "epoch": 0.08108341240403692, "grad_norm": 1.7265625, "learning_rate": 1.9934727061740013e-05, "loss": 1.6958, "step": 470 }, { "epoch": 0.08125593030276891, "grad_norm": 0.6875, "learning_rate": 1.993441620879665e-05, "loss": 1.5575, "step": 471 }, { "epoch": 0.0814284482015009, "grad_norm": 0.73046875, "learning_rate": 1.993410461984966e-05, "loss": 1.6419, "step": 472 }, { "epoch": 0.0816009661002329, "grad_norm": 0.68359375, "learning_rate": 1.993379229492213e-05, "loss": 1.5573, "step": 473 }, { "epoch": 0.08177348399896489, "grad_norm": 0.7265625, "learning_rate": 1.99334792340372e-05, "loss": 1.5639, "step": 474 }, { "epoch": 0.08194600189769688, "grad_norm": 0.78125, "learning_rate": 1.9933165437218057e-05, "loss": 1.6752, "step": 475 }, { "epoch": 0.08211851979642888, "grad_norm": 0.9765625, "learning_rate": 1.993285090448795e-05, "loss": 1.6365, "step": 476 }, { "epoch": 0.08229103769516087, "grad_norm": 0.67578125, "learning_rate": 1.993253563587019e-05, "loss": 1.6038, "step": 477 }, { "epoch": 0.08246355559389287, "grad_norm": 0.68359375, "learning_rate": 1.993221963138813e-05, "loss": 1.6083, "step": 478 }, { "epoch": 0.08263607349262486, "grad_norm": 0.6875, "learning_rate": 1.993190289106518e-05, "loss": 1.6, "step": 479 }, { "epoch": 0.08280859139135685, "grad_norm": 0.9765625, "learning_rate": 1.99315854149248e-05, "loss": 1.5528, "step": 480 }, { "epoch": 0.08298110929008885, "grad_norm": 0.68359375, "learning_rate": 1.9931267202990524e-05, "loss": 1.6537, "step": 481 }, { "epoch": 0.08315362718882084, "grad_norm": 0.7890625, "learning_rate": 1.9930948255285915e-05, "loss": 1.5673, "step": 482 }, { "epoch": 0.08332614508755283, "grad_norm": 0.7421875, "learning_rate": 1.9930628571834608e-05, "loss": 1.6501, "step": 483 }, { "epoch": 0.08349866298628483, "grad_norm": 0.7265625, "learning_rate": 1.993030815266029e-05, "loss": 1.6432, "step": 484 }, { "epoch": 0.08367118088501682, "grad_norm": 0.6484375, "learning_rate": 1.9929986997786698e-05, "loss": 1.5725, "step": 485 }, { "epoch": 0.08384369878374881, "grad_norm": 0.77734375, "learning_rate": 1.992966510723762e-05, "loss": 1.6375, "step": 486 }, { "epoch": 0.08401621668248081, "grad_norm": 0.6328125, "learning_rate": 1.992934248103691e-05, "loss": 1.6855, "step": 487 }, { "epoch": 0.0841887345812128, "grad_norm": 0.75, "learning_rate": 1.992901911920847e-05, "loss": 1.6522, "step": 488 }, { "epoch": 0.08436125247994479, "grad_norm": 0.62109375, "learning_rate": 1.992869502177625e-05, "loss": 1.6277, "step": 489 }, { "epoch": 0.08453377037867679, "grad_norm": 0.6328125, "learning_rate": 1.9928370188764265e-05, "loss": 1.6811, "step": 490 }, { "epoch": 0.08470628827740878, "grad_norm": 0.78125, "learning_rate": 1.9928044620196582e-05, "loss": 1.7146, "step": 491 }, { "epoch": 0.08487880617614077, "grad_norm": 0.70703125, "learning_rate": 1.9927718316097322e-05, "loss": 1.6449, "step": 492 }, { "epoch": 0.08505132407487277, "grad_norm": 0.7265625, "learning_rate": 1.9927391276490657e-05, "loss": 1.5758, "step": 493 }, { "epoch": 0.08522384197360476, "grad_norm": 0.6171875, "learning_rate": 1.9927063501400817e-05, "loss": 1.5599, "step": 494 }, { "epoch": 0.08539635987233675, "grad_norm": 0.67578125, "learning_rate": 1.9926734990852084e-05, "loss": 1.6281, "step": 495 }, { "epoch": 0.08556887777106875, "grad_norm": 0.6171875, "learning_rate": 1.99264057448688e-05, "loss": 1.7188, "step": 496 }, { "epoch": 0.08574139566980074, "grad_norm": 0.6328125, "learning_rate": 1.9926075763475353e-05, "loss": 1.606, "step": 497 }, { "epoch": 0.08591391356853273, "grad_norm": 0.66015625, "learning_rate": 1.9925745046696196e-05, "loss": 1.5543, "step": 498 }, { "epoch": 0.08608643146726473, "grad_norm": 0.6953125, "learning_rate": 1.9925413594555824e-05, "loss": 1.6025, "step": 499 }, { "epoch": 0.08625894936599672, "grad_norm": 1.9296875, "learning_rate": 1.9925081407078798e-05, "loss": 1.69, "step": 500 }, { "epoch": 0.08625894936599672, "eval_loss": 1.5997848510742188, "eval_runtime": 10.7856, "eval_samples_per_second": 94.941, "eval_steps_per_second": 23.735, "step": 500 }, { "epoch": 0.08643146726472871, "grad_norm": 0.6640625, "learning_rate": 1.9924748484289723e-05, "loss": 1.6629, "step": 501 }, { "epoch": 0.08660398516346071, "grad_norm": 18.75, "learning_rate": 1.992441482621327e-05, "loss": 1.6557, "step": 502 }, { "epoch": 0.0867765030621927, "grad_norm": 0.671875, "learning_rate": 1.9924080432874158e-05, "loss": 1.5788, "step": 503 }, { "epoch": 0.0869490209609247, "grad_norm": 1.015625, "learning_rate": 1.992374530429716e-05, "loss": 1.5682, "step": 504 }, { "epoch": 0.08712153885965669, "grad_norm": 0.8046875, "learning_rate": 1.99234094405071e-05, "loss": 1.5251, "step": 505 }, { "epoch": 0.08729405675838868, "grad_norm": 0.6796875, "learning_rate": 1.9923072841528862e-05, "loss": 1.5827, "step": 506 }, { "epoch": 0.08746657465712068, "grad_norm": 0.73828125, "learning_rate": 1.9922735507387393e-05, "loss": 1.6767, "step": 507 }, { "epoch": 0.08763909255585267, "grad_norm": 0.61328125, "learning_rate": 1.992239743810767e-05, "loss": 1.4635, "step": 508 }, { "epoch": 0.08781161045458466, "grad_norm": 0.63671875, "learning_rate": 1.9922058633714752e-05, "loss": 1.6189, "step": 509 }, { "epoch": 0.08798412835331666, "grad_norm": 0.69140625, "learning_rate": 1.992171909423373e-05, "loss": 1.5658, "step": 510 }, { "epoch": 0.08815664625204865, "grad_norm": 0.62890625, "learning_rate": 1.9921378819689767e-05, "loss": 1.6472, "step": 511 }, { "epoch": 0.08832916415078064, "grad_norm": 0.7265625, "learning_rate": 1.992103781010807e-05, "loss": 1.6193, "step": 512 }, { "epoch": 0.08850168204951264, "grad_norm": 0.76171875, "learning_rate": 1.9920696065513902e-05, "loss": 1.5667, "step": 513 }, { "epoch": 0.08867419994824463, "grad_norm": 0.6796875, "learning_rate": 1.992035358593258e-05, "loss": 1.5772, "step": 514 }, { "epoch": 0.08884671784697662, "grad_norm": 1.015625, "learning_rate": 1.992001037138948e-05, "loss": 1.6495, "step": 515 }, { "epoch": 0.08901923574570862, "grad_norm": 0.6953125, "learning_rate": 1.991966642191003e-05, "loss": 1.6067, "step": 516 }, { "epoch": 0.08919175364444061, "grad_norm": 1.078125, "learning_rate": 1.991932173751971e-05, "loss": 1.5596, "step": 517 }, { "epoch": 0.0893642715431726, "grad_norm": 0.72265625, "learning_rate": 1.9918976318244056e-05, "loss": 1.5892, "step": 518 }, { "epoch": 0.0895367894419046, "grad_norm": 0.83984375, "learning_rate": 1.991863016410866e-05, "loss": 1.5691, "step": 519 }, { "epoch": 0.08970930734063659, "grad_norm": 0.6953125, "learning_rate": 1.9918283275139167e-05, "loss": 1.5445, "step": 520 }, { "epoch": 0.08988182523936858, "grad_norm": 0.6640625, "learning_rate": 1.991793565136128e-05, "loss": 1.7354, "step": 521 }, { "epoch": 0.09005434313810058, "grad_norm": 0.69140625, "learning_rate": 1.991758729280074e-05, "loss": 1.5286, "step": 522 }, { "epoch": 0.09022686103683257, "grad_norm": 0.9296875, "learning_rate": 1.9917238199483374e-05, "loss": 1.6222, "step": 523 }, { "epoch": 0.09039937893556456, "grad_norm": 0.73828125, "learning_rate": 1.9916888371435036e-05, "loss": 1.6243, "step": 524 }, { "epoch": 0.09057189683429656, "grad_norm": 0.9140625, "learning_rate": 1.9916537808681643e-05, "loss": 1.5857, "step": 525 }, { "epoch": 0.09074441473302855, "grad_norm": 0.8515625, "learning_rate": 1.9916186511249167e-05, "loss": 1.5818, "step": 526 }, { "epoch": 0.09091693263176054, "grad_norm": 0.7421875, "learning_rate": 1.9915834479163634e-05, "loss": 1.5379, "step": 527 }, { "epoch": 0.09108945053049254, "grad_norm": 0.8984375, "learning_rate": 1.991548171245113e-05, "loss": 1.6106, "step": 528 }, { "epoch": 0.09126196842922453, "grad_norm": 0.765625, "learning_rate": 1.991512821113778e-05, "loss": 1.6018, "step": 529 }, { "epoch": 0.09143448632795653, "grad_norm": 0.75390625, "learning_rate": 1.9914773975249782e-05, "loss": 1.5822, "step": 530 }, { "epoch": 0.09160700422668852, "grad_norm": 0.82421875, "learning_rate": 1.991441900481338e-05, "loss": 1.627, "step": 531 }, { "epoch": 0.09177952212542051, "grad_norm": 0.75390625, "learning_rate": 1.9914063299854866e-05, "loss": 1.6365, "step": 532 }, { "epoch": 0.09195204002415251, "grad_norm": 0.859375, "learning_rate": 1.99137068604006e-05, "loss": 1.6972, "step": 533 }, { "epoch": 0.0921245579228845, "grad_norm": 1.078125, "learning_rate": 1.9913349686476983e-05, "loss": 1.6787, "step": 534 }, { "epoch": 0.09229707582161649, "grad_norm": 0.73046875, "learning_rate": 1.9912991778110485e-05, "loss": 1.583, "step": 535 }, { "epoch": 0.09246959372034849, "grad_norm": 1.1640625, "learning_rate": 1.991263313532761e-05, "loss": 1.6112, "step": 536 }, { "epoch": 0.09264211161908048, "grad_norm": 0.921875, "learning_rate": 1.9912273758154937e-05, "loss": 1.5964, "step": 537 }, { "epoch": 0.09281462951781247, "grad_norm": 0.90625, "learning_rate": 1.9911913646619094e-05, "loss": 1.6758, "step": 538 }, { "epoch": 0.09298714741654447, "grad_norm": 0.890625, "learning_rate": 1.991155280074675e-05, "loss": 1.6041, "step": 539 }, { "epoch": 0.09315966531527646, "grad_norm": 0.80859375, "learning_rate": 1.991119122056465e-05, "loss": 1.7256, "step": 540 }, { "epoch": 0.09333218321400845, "grad_norm": 0.7890625, "learning_rate": 1.9910828906099573e-05, "loss": 1.6746, "step": 541 }, { "epoch": 0.09350470111274045, "grad_norm": 1.0859375, "learning_rate": 1.9910465857378367e-05, "loss": 1.5857, "step": 542 }, { "epoch": 0.09367721901147244, "grad_norm": 0.8671875, "learning_rate": 1.991010207442792e-05, "loss": 1.6412, "step": 543 }, { "epoch": 0.09384973691020443, "grad_norm": 0.8828125, "learning_rate": 1.99097375572752e-05, "loss": 1.5818, "step": 544 }, { "epoch": 0.09402225480893643, "grad_norm": 1.171875, "learning_rate": 1.9909372305947196e-05, "loss": 1.6015, "step": 545 }, { "epoch": 0.09419477270766842, "grad_norm": 0.9375, "learning_rate": 1.9909006320470977e-05, "loss": 1.7097, "step": 546 }, { "epoch": 0.09436729060640041, "grad_norm": 0.94921875, "learning_rate": 1.9908639600873654e-05, "loss": 1.6574, "step": 547 }, { "epoch": 0.09453980850513241, "grad_norm": 1.0234375, "learning_rate": 1.99082721471824e-05, "loss": 1.6898, "step": 548 }, { "epoch": 0.0947123264038644, "grad_norm": 0.97265625, "learning_rate": 1.9907903959424436e-05, "loss": 1.566, "step": 549 }, { "epoch": 0.09488484430259639, "grad_norm": 0.6796875, "learning_rate": 1.9907535037627034e-05, "loss": 1.601, "step": 550 }, { "epoch": 0.09505736220132839, "grad_norm": 0.9921875, "learning_rate": 1.990716538181754e-05, "loss": 1.6122, "step": 551 }, { "epoch": 0.09522988010006038, "grad_norm": 0.8125, "learning_rate": 1.9906794992023325e-05, "loss": 1.6707, "step": 552 }, { "epoch": 0.09540239799879237, "grad_norm": 0.61328125, "learning_rate": 1.9906423868271837e-05, "loss": 1.6544, "step": 553 }, { "epoch": 0.09557491589752437, "grad_norm": 0.75390625, "learning_rate": 1.990605201059057e-05, "loss": 1.5394, "step": 554 }, { "epoch": 0.09574743379625636, "grad_norm": 0.875, "learning_rate": 1.9905679419007078e-05, "loss": 1.5378, "step": 555 }, { "epoch": 0.09591995169498836, "grad_norm": 0.625, "learning_rate": 1.990530609354896e-05, "loss": 1.5238, "step": 556 }, { "epoch": 0.09609246959372035, "grad_norm": 0.77734375, "learning_rate": 1.9904932034243872e-05, "loss": 1.6568, "step": 557 }, { "epoch": 0.09626498749245234, "grad_norm": 0.58203125, "learning_rate": 1.9904557241119534e-05, "loss": 1.6512, "step": 558 }, { "epoch": 0.09643750539118434, "grad_norm": 0.6484375, "learning_rate": 1.9904181714203707e-05, "loss": 1.6527, "step": 559 }, { "epoch": 0.09661002328991633, "grad_norm": 0.7578125, "learning_rate": 1.9903805453524213e-05, "loss": 1.6576, "step": 560 }, { "epoch": 0.09678254118864832, "grad_norm": 0.77734375, "learning_rate": 1.990342845910893e-05, "loss": 1.6159, "step": 561 }, { "epoch": 0.09695505908738032, "grad_norm": 0.88671875, "learning_rate": 1.990305073098579e-05, "loss": 1.5827, "step": 562 }, { "epoch": 0.09712757698611231, "grad_norm": 0.80078125, "learning_rate": 1.9902672269182773e-05, "loss": 1.6479, "step": 563 }, { "epoch": 0.0973000948848443, "grad_norm": 0.6484375, "learning_rate": 1.990229307372792e-05, "loss": 1.5307, "step": 564 }, { "epoch": 0.0974726127835763, "grad_norm": 0.6953125, "learning_rate": 1.9901913144649323e-05, "loss": 1.6036, "step": 565 }, { "epoch": 0.09764513068230829, "grad_norm": 0.95703125, "learning_rate": 1.9901532481975133e-05, "loss": 1.5556, "step": 566 }, { "epoch": 0.09781764858104028, "grad_norm": 0.7265625, "learning_rate": 1.9901151085733542e-05, "loss": 1.6605, "step": 567 }, { "epoch": 0.09799016647977228, "grad_norm": 0.70703125, "learning_rate": 1.990076895595282e-05, "loss": 1.6237, "step": 568 }, { "epoch": 0.09816268437850427, "grad_norm": 0.734375, "learning_rate": 1.990038609266127e-05, "loss": 1.5957, "step": 569 }, { "epoch": 0.09833520227723626, "grad_norm": 0.66796875, "learning_rate": 1.9900002495887255e-05, "loss": 1.651, "step": 570 }, { "epoch": 0.09850772017596826, "grad_norm": 0.73046875, "learning_rate": 1.98996181656592e-05, "loss": 1.6785, "step": 571 }, { "epoch": 0.09868023807470025, "grad_norm": 0.74609375, "learning_rate": 1.9899233102005573e-05, "loss": 1.5852, "step": 572 }, { "epoch": 0.09885275597343224, "grad_norm": 0.6875, "learning_rate": 1.9898847304954906e-05, "loss": 1.5645, "step": 573 }, { "epoch": 0.09902527387216424, "grad_norm": 0.7421875, "learning_rate": 1.989846077453578e-05, "loss": 1.711, "step": 574 }, { "epoch": 0.09919779177089623, "grad_norm": 0.63671875, "learning_rate": 1.989807351077683e-05, "loss": 1.5826, "step": 575 }, { "epoch": 0.09937030966962822, "grad_norm": 0.71875, "learning_rate": 1.989768551370675e-05, "loss": 1.5717, "step": 576 }, { "epoch": 0.09954282756836022, "grad_norm": 0.6875, "learning_rate": 1.9897296783354283e-05, "loss": 1.6394, "step": 577 }, { "epoch": 0.09971534546709221, "grad_norm": 0.66796875, "learning_rate": 1.9896907319748227e-05, "loss": 1.7012, "step": 578 }, { "epoch": 0.0998878633658242, "grad_norm": 0.73046875, "learning_rate": 1.9896517122917442e-05, "loss": 1.6539, "step": 579 }, { "epoch": 0.1000603812645562, "grad_norm": 0.640625, "learning_rate": 1.989612619289083e-05, "loss": 1.582, "step": 580 }, { "epoch": 0.10023289916328819, "grad_norm": 0.984375, "learning_rate": 1.9895734529697356e-05, "loss": 1.5482, "step": 581 }, { "epoch": 0.10040541706202019, "grad_norm": 0.6640625, "learning_rate": 1.9895342133366036e-05, "loss": 1.7008, "step": 582 }, { "epoch": 0.10057793496075218, "grad_norm": 0.64453125, "learning_rate": 1.9894949003925942e-05, "loss": 1.5771, "step": 583 }, { "epoch": 0.10075045285948417, "grad_norm": 0.81640625, "learning_rate": 1.98945551414062e-05, "loss": 1.5932, "step": 584 }, { "epoch": 0.10092297075821617, "grad_norm": 0.8046875, "learning_rate": 1.989416054583599e-05, "loss": 1.6266, "step": 585 }, { "epoch": 0.10109548865694816, "grad_norm": 0.640625, "learning_rate": 1.9893765217244544e-05, "loss": 1.588, "step": 586 }, { "epoch": 0.10126800655568015, "grad_norm": 0.78515625, "learning_rate": 1.9893369155661152e-05, "loss": 1.6614, "step": 587 }, { "epoch": 0.10144052445441215, "grad_norm": 0.70703125, "learning_rate": 1.9892972361115154e-05, "loss": 1.5868, "step": 588 }, { "epoch": 0.10161304235314414, "grad_norm": 1.53125, "learning_rate": 1.989257483363595e-05, "loss": 1.5864, "step": 589 }, { "epoch": 0.10178556025187613, "grad_norm": 0.7421875, "learning_rate": 1.9892176573252993e-05, "loss": 1.5276, "step": 590 }, { "epoch": 0.10195807815060813, "grad_norm": 0.71484375, "learning_rate": 1.9891777579995787e-05, "loss": 1.616, "step": 591 }, { "epoch": 0.10213059604934012, "grad_norm": 0.64453125, "learning_rate": 1.9891377853893888e-05, "loss": 1.621, "step": 592 }, { "epoch": 0.10230311394807211, "grad_norm": 0.61328125, "learning_rate": 1.9890977394976915e-05, "loss": 1.5442, "step": 593 }, { "epoch": 0.10247563184680411, "grad_norm": 0.58203125, "learning_rate": 1.9890576203274534e-05, "loss": 1.5709, "step": 594 }, { "epoch": 0.1026481497455361, "grad_norm": 0.875, "learning_rate": 1.989017427881647e-05, "loss": 1.5972, "step": 595 }, { "epoch": 0.10282066764426809, "grad_norm": 0.76953125, "learning_rate": 1.9889771621632497e-05, "loss": 1.636, "step": 596 }, { "epoch": 0.10299318554300009, "grad_norm": 0.640625, "learning_rate": 1.9889368231752452e-05, "loss": 1.6101, "step": 597 }, { "epoch": 0.10316570344173208, "grad_norm": 0.640625, "learning_rate": 1.9888964109206213e-05, "loss": 1.6646, "step": 598 }, { "epoch": 0.10333822134046407, "grad_norm": 0.61328125, "learning_rate": 1.9888559254023723e-05, "loss": 1.5002, "step": 599 }, { "epoch": 0.10351073923919607, "grad_norm": 0.74609375, "learning_rate": 1.988815366623498e-05, "loss": 1.5775, "step": 600 }, { "epoch": 0.10351073923919607, "eval_loss": 1.5823333263397217, "eval_runtime": 10.7916, "eval_samples_per_second": 94.889, "eval_steps_per_second": 23.722, "step": 600 }, { "epoch": 0.10368325713792806, "grad_norm": 0.78515625, "learning_rate": 1.988774734587003e-05, "loss": 1.6637, "step": 601 }, { "epoch": 0.10385577503666005, "grad_norm": 0.875, "learning_rate": 1.988734029295897e-05, "loss": 1.556, "step": 602 }, { "epoch": 0.10402829293539205, "grad_norm": 0.73046875, "learning_rate": 1.9886932507531966e-05, "loss": 1.5965, "step": 603 }, { "epoch": 0.10420081083412404, "grad_norm": 0.640625, "learning_rate": 1.9886523989619224e-05, "loss": 1.6596, "step": 604 }, { "epoch": 0.10437332873285603, "grad_norm": 0.83203125, "learning_rate": 1.988611473925101e-05, "loss": 1.6136, "step": 605 }, { "epoch": 0.10454584663158803, "grad_norm": 0.68359375, "learning_rate": 1.988570475645765e-05, "loss": 1.5358, "step": 606 }, { "epoch": 0.10471836453032002, "grad_norm": 0.85546875, "learning_rate": 1.988529404126951e-05, "loss": 1.609, "step": 607 }, { "epoch": 0.10489088242905202, "grad_norm": 0.8203125, "learning_rate": 1.9884882593717018e-05, "loss": 1.6632, "step": 608 }, { "epoch": 0.10506340032778401, "grad_norm": 0.8125, "learning_rate": 1.9884470413830662e-05, "loss": 1.6104, "step": 609 }, { "epoch": 0.105235918226516, "grad_norm": 0.68359375, "learning_rate": 1.988405750164098e-05, "loss": 1.5754, "step": 610 }, { "epoch": 0.105408436125248, "grad_norm": 0.61328125, "learning_rate": 1.9883643857178554e-05, "loss": 1.614, "step": 611 }, { "epoch": 0.10558095402397999, "grad_norm": 0.8125, "learning_rate": 1.988322948047404e-05, "loss": 1.6469, "step": 612 }, { "epoch": 0.10575347192271198, "grad_norm": 0.6484375, "learning_rate": 1.988281437155813e-05, "loss": 1.6428, "step": 613 }, { "epoch": 0.10592598982144398, "grad_norm": 0.6484375, "learning_rate": 1.9882398530461582e-05, "loss": 1.6195, "step": 614 }, { "epoch": 0.10609850772017597, "grad_norm": 0.6953125, "learning_rate": 1.98819819572152e-05, "loss": 1.6814, "step": 615 }, { "epoch": 0.10627102561890796, "grad_norm": 0.6484375, "learning_rate": 1.9881564651849855e-05, "loss": 1.5624, "step": 616 }, { "epoch": 0.10644354351763996, "grad_norm": 0.875, "learning_rate": 1.9881146614396454e-05, "loss": 1.6194, "step": 617 }, { "epoch": 0.10661606141637195, "grad_norm": 0.80859375, "learning_rate": 1.988072784488597e-05, "loss": 1.5799, "step": 618 }, { "epoch": 0.10678857931510394, "grad_norm": 0.5546875, "learning_rate": 1.9880308343349432e-05, "loss": 1.5319, "step": 619 }, { "epoch": 0.10696109721383594, "grad_norm": 0.953125, "learning_rate": 1.9879888109817916e-05, "loss": 1.5623, "step": 620 }, { "epoch": 0.10713361511256793, "grad_norm": 0.79296875, "learning_rate": 1.9879467144322557e-05, "loss": 1.6315, "step": 621 }, { "epoch": 0.10730613301129992, "grad_norm": 0.63671875, "learning_rate": 1.9879045446894546e-05, "loss": 1.5334, "step": 622 }, { "epoch": 0.10747865091003192, "grad_norm": 0.69921875, "learning_rate": 1.9878623017565115e-05, "loss": 1.5988, "step": 623 }, { "epoch": 0.10765116880876391, "grad_norm": 0.625, "learning_rate": 1.987819985636557e-05, "loss": 1.537, "step": 624 }, { "epoch": 0.1078236867074959, "grad_norm": 0.7578125, "learning_rate": 1.9877775963327254e-05, "loss": 1.5755, "step": 625 }, { "epoch": 0.1079962046062279, "grad_norm": 0.62890625, "learning_rate": 1.987735133848158e-05, "loss": 1.5418, "step": 626 }, { "epoch": 0.10816872250495989, "grad_norm": 0.6328125, "learning_rate": 1.987692598186e-05, "loss": 1.595, "step": 627 }, { "epoch": 0.10834124040369188, "grad_norm": 0.69921875, "learning_rate": 1.9876499893494028e-05, "loss": 1.4856, "step": 628 }, { "epoch": 0.10851375830242388, "grad_norm": 0.6796875, "learning_rate": 1.9876073073415233e-05, "loss": 1.5476, "step": 629 }, { "epoch": 0.10868627620115587, "grad_norm": 0.7734375, "learning_rate": 1.987564552165524e-05, "loss": 1.5228, "step": 630 }, { "epoch": 0.10885879409988786, "grad_norm": 0.71484375, "learning_rate": 1.9875217238245715e-05, "loss": 1.5727, "step": 631 }, { "epoch": 0.10903131199861986, "grad_norm": 0.6875, "learning_rate": 1.9874788223218398e-05, "loss": 1.6439, "step": 632 }, { "epoch": 0.10920382989735185, "grad_norm": 0.6953125, "learning_rate": 1.987435847660507e-05, "loss": 1.4864, "step": 633 }, { "epoch": 0.10937634779608385, "grad_norm": 0.7265625, "learning_rate": 1.9873927998437566e-05, "loss": 1.5274, "step": 634 }, { "epoch": 0.10954886569481584, "grad_norm": 0.7578125, "learning_rate": 1.9873496788747778e-05, "loss": 1.6405, "step": 635 }, { "epoch": 0.10972138359354783, "grad_norm": 0.64453125, "learning_rate": 1.987306484756766e-05, "loss": 1.6233, "step": 636 }, { "epoch": 0.10989390149227983, "grad_norm": 0.6328125, "learning_rate": 1.9872632174929208e-05, "loss": 1.585, "step": 637 }, { "epoch": 0.11006641939101182, "grad_norm": 0.75, "learning_rate": 1.9872198770864476e-05, "loss": 1.6029, "step": 638 }, { "epoch": 0.11023893728974381, "grad_norm": 0.66796875, "learning_rate": 1.9871764635405574e-05, "loss": 1.6057, "step": 639 }, { "epoch": 0.11041145518847581, "grad_norm": 0.765625, "learning_rate": 1.9871329768584666e-05, "loss": 1.5482, "step": 640 }, { "epoch": 0.1105839730872078, "grad_norm": 0.75390625, "learning_rate": 1.987089417043397e-05, "loss": 1.6146, "step": 641 }, { "epoch": 0.11075649098593979, "grad_norm": 0.6875, "learning_rate": 1.987045784098576e-05, "loss": 1.6118, "step": 642 }, { "epoch": 0.11092900888467179, "grad_norm": 0.90625, "learning_rate": 1.9870020780272357e-05, "loss": 1.5789, "step": 643 }, { "epoch": 0.11110152678340378, "grad_norm": 0.86328125, "learning_rate": 1.9869582988326145e-05, "loss": 1.5449, "step": 644 }, { "epoch": 0.11127404468213577, "grad_norm": 0.703125, "learning_rate": 1.9869144465179557e-05, "loss": 1.5871, "step": 645 }, { "epoch": 0.11144656258086777, "grad_norm": 0.59375, "learning_rate": 1.9868705210865083e-05, "loss": 1.5773, "step": 646 }, { "epoch": 0.11161908047959976, "grad_norm": 0.796875, "learning_rate": 1.9868265225415263e-05, "loss": 1.6108, "step": 647 }, { "epoch": 0.11179159837833175, "grad_norm": 0.8671875, "learning_rate": 1.9867824508862696e-05, "loss": 1.6162, "step": 648 }, { "epoch": 0.11196411627706375, "grad_norm": 0.8125, "learning_rate": 1.986738306124003e-05, "loss": 1.5618, "step": 649 }, { "epoch": 0.11213663417579574, "grad_norm": 0.65234375, "learning_rate": 1.9866940882579976e-05, "loss": 1.6397, "step": 650 }, { "epoch": 0.11230915207452773, "grad_norm": 0.6640625, "learning_rate": 1.986649797291529e-05, "loss": 1.632, "step": 651 }, { "epoch": 0.11248166997325973, "grad_norm": 1.1640625, "learning_rate": 1.9866054332278784e-05, "loss": 1.6181, "step": 652 }, { "epoch": 0.11265418787199172, "grad_norm": 0.625, "learning_rate": 1.9865609960703325e-05, "loss": 1.5809, "step": 653 }, { "epoch": 0.1128267057707237, "grad_norm": 0.71484375, "learning_rate": 1.9865164858221838e-05, "loss": 1.5046, "step": 654 }, { "epoch": 0.11299922366945571, "grad_norm": 0.66796875, "learning_rate": 1.9864719024867303e-05, "loss": 1.6255, "step": 655 }, { "epoch": 0.1131717415681877, "grad_norm": 0.609375, "learning_rate": 1.9864272460672738e-05, "loss": 1.613, "step": 656 }, { "epoch": 0.11334425946691969, "grad_norm": 0.7734375, "learning_rate": 1.9863825165671238e-05, "loss": 1.6889, "step": 657 }, { "epoch": 0.11351677736565169, "grad_norm": 1.2734375, "learning_rate": 1.9863377139895935e-05, "loss": 1.6039, "step": 658 }, { "epoch": 0.11368929526438368, "grad_norm": 0.640625, "learning_rate": 1.986292838338003e-05, "loss": 1.6027, "step": 659 }, { "epoch": 0.11386181316311568, "grad_norm": 0.64453125, "learning_rate": 1.986247889615676e-05, "loss": 1.5537, "step": 660 }, { "epoch": 0.11403433106184767, "grad_norm": 0.65234375, "learning_rate": 1.9862028678259427e-05, "loss": 1.5859, "step": 661 }, { "epoch": 0.11420684896057966, "grad_norm": 0.75390625, "learning_rate": 1.986157772972139e-05, "loss": 1.5783, "step": 662 }, { "epoch": 0.11437936685931166, "grad_norm": 0.82421875, "learning_rate": 1.9861126050576063e-05, "loss": 1.5792, "step": 663 }, { "epoch": 0.11455188475804365, "grad_norm": 0.65625, "learning_rate": 1.9860673640856895e-05, "loss": 1.5931, "step": 664 }, { "epoch": 0.11472440265677564, "grad_norm": 0.6484375, "learning_rate": 1.9860220500597415e-05, "loss": 1.6084, "step": 665 }, { "epoch": 0.11489692055550764, "grad_norm": 0.72265625, "learning_rate": 1.9859766629831192e-05, "loss": 1.6543, "step": 666 }, { "epoch": 0.11506943845423963, "grad_norm": 0.61328125, "learning_rate": 1.9859312028591852e-05, "loss": 1.61, "step": 667 }, { "epoch": 0.11524195635297162, "grad_norm": 0.640625, "learning_rate": 1.9858856696913068e-05, "loss": 1.5262, "step": 668 }, { "epoch": 0.11541447425170362, "grad_norm": 0.67578125, "learning_rate": 1.985840063482858e-05, "loss": 1.5316, "step": 669 }, { "epoch": 0.11558699215043561, "grad_norm": 0.62890625, "learning_rate": 1.9857943842372175e-05, "loss": 1.5745, "step": 670 }, { "epoch": 0.1157595100491676, "grad_norm": 0.69921875, "learning_rate": 1.9857486319577697e-05, "loss": 1.5823, "step": 671 }, { "epoch": 0.1159320279478996, "grad_norm": 0.6875, "learning_rate": 1.9857028066479042e-05, "loss": 1.5883, "step": 672 }, { "epoch": 0.11610454584663159, "grad_norm": 0.74609375, "learning_rate": 1.9856569083110152e-05, "loss": 1.6125, "step": 673 }, { "epoch": 0.11627706374536358, "grad_norm": 0.671875, "learning_rate": 1.985610936950504e-05, "loss": 1.5899, "step": 674 }, { "epoch": 0.11644958164409558, "grad_norm": 0.8828125, "learning_rate": 1.9855648925697762e-05, "loss": 1.6004, "step": 675 }, { "epoch": 0.11662209954282757, "grad_norm": 0.8828125, "learning_rate": 1.9855187751722432e-05, "loss": 1.5435, "step": 676 }, { "epoch": 0.11679461744155956, "grad_norm": 0.625, "learning_rate": 1.9854725847613216e-05, "loss": 1.4848, "step": 677 }, { "epoch": 0.11696713534029156, "grad_norm": 0.9921875, "learning_rate": 1.9854263213404333e-05, "loss": 1.5351, "step": 678 }, { "epoch": 0.11713965323902355, "grad_norm": 0.640625, "learning_rate": 1.985379984913006e-05, "loss": 1.6054, "step": 679 }, { "epoch": 0.11731217113775554, "grad_norm": 1.0234375, "learning_rate": 1.985333575482472e-05, "loss": 1.5781, "step": 680 }, { "epoch": 0.11748468903648754, "grad_norm": 0.96484375, "learning_rate": 1.9852870930522704e-05, "loss": 1.5546, "step": 681 }, { "epoch": 0.11765720693521953, "grad_norm": 0.65625, "learning_rate": 1.985240537625845e-05, "loss": 1.7013, "step": 682 }, { "epoch": 0.11782972483395152, "grad_norm": 0.90625, "learning_rate": 1.985193909206644e-05, "loss": 1.6284, "step": 683 }, { "epoch": 0.11800224273268352, "grad_norm": 0.9609375, "learning_rate": 1.9851472077981226e-05, "loss": 1.6304, "step": 684 }, { "epoch": 0.1181747606314155, "grad_norm": 0.69921875, "learning_rate": 1.98510043340374e-05, "loss": 1.6323, "step": 685 }, { "epoch": 0.11834727853014751, "grad_norm": 0.85546875, "learning_rate": 1.9850535860269628e-05, "loss": 1.6954, "step": 686 }, { "epoch": 0.1185197964288795, "grad_norm": 0.625, "learning_rate": 1.985006665671261e-05, "loss": 1.5755, "step": 687 }, { "epoch": 0.11869231432761149, "grad_norm": 0.66015625, "learning_rate": 1.984959672340111e-05, "loss": 1.6307, "step": 688 }, { "epoch": 0.11886483222634349, "grad_norm": 0.70703125, "learning_rate": 1.9849126060369933e-05, "loss": 1.6133, "step": 689 }, { "epoch": 0.11903735012507548, "grad_norm": 0.7109375, "learning_rate": 1.9848654667653964e-05, "loss": 1.649, "step": 690 }, { "epoch": 0.11920986802380747, "grad_norm": 0.64453125, "learning_rate": 1.9848182545288117e-05, "loss": 1.5091, "step": 691 }, { "epoch": 0.11938238592253947, "grad_norm": 0.78125, "learning_rate": 1.9847709693307375e-05, "loss": 1.6429, "step": 692 }, { "epoch": 0.11955490382127146, "grad_norm": 0.84765625, "learning_rate": 1.9847236111746767e-05, "loss": 1.5849, "step": 693 }, { "epoch": 0.11972742172000345, "grad_norm": 0.7734375, "learning_rate": 1.9846761800641377e-05, "loss": 1.5365, "step": 694 }, { "epoch": 0.11989993961873545, "grad_norm": 0.91796875, "learning_rate": 1.984628676002635e-05, "loss": 1.5412, "step": 695 }, { "epoch": 0.12007245751746744, "grad_norm": 0.6328125, "learning_rate": 1.9845810989936882e-05, "loss": 1.4677, "step": 696 }, { "epoch": 0.12024497541619943, "grad_norm": 0.94140625, "learning_rate": 1.984533449040821e-05, "loss": 1.5982, "step": 697 }, { "epoch": 0.12041749331493143, "grad_norm": 0.63671875, "learning_rate": 1.984485726147564e-05, "loss": 1.7411, "step": 698 }, { "epoch": 0.12059001121366342, "grad_norm": 0.75, "learning_rate": 1.9844379303174537e-05, "loss": 1.5752, "step": 699 }, { "epoch": 0.1207625291123954, "grad_norm": 0.66015625, "learning_rate": 1.9843900615540305e-05, "loss": 1.5738, "step": 700 }, { "epoch": 0.1207625291123954, "eval_loss": 1.5691492557525635, "eval_runtime": 11.0575, "eval_samples_per_second": 92.607, "eval_steps_per_second": 23.152, "step": 700 }, { "epoch": 0.12093504701112741, "grad_norm": 0.6171875, "learning_rate": 1.9843421198608404e-05, "loss": 1.5288, "step": 701 }, { "epoch": 0.1211075649098594, "grad_norm": 0.69921875, "learning_rate": 1.9842941052414356e-05, "loss": 1.5642, "step": 702 }, { "epoch": 0.12128008280859139, "grad_norm": 0.6640625, "learning_rate": 1.9842460176993734e-05, "loss": 1.5566, "step": 703 }, { "epoch": 0.12145260070732339, "grad_norm": 0.82421875, "learning_rate": 1.9841978572382162e-05, "loss": 1.5657, "step": 704 }, { "epoch": 0.12162511860605538, "grad_norm": 0.7109375, "learning_rate": 1.9841496238615326e-05, "loss": 1.5543, "step": 705 }, { "epoch": 0.12179763650478737, "grad_norm": 0.94140625, "learning_rate": 1.9841013175728953e-05, "loss": 1.5774, "step": 706 }, { "epoch": 0.12197015440351937, "grad_norm": 0.7109375, "learning_rate": 1.9840529383758834e-05, "loss": 1.6671, "step": 707 }, { "epoch": 0.12214267230225136, "grad_norm": 0.64453125, "learning_rate": 1.9840044862740814e-05, "loss": 1.6022, "step": 708 }, { "epoch": 0.12231519020098335, "grad_norm": 0.7578125, "learning_rate": 1.983955961271079e-05, "loss": 1.5639, "step": 709 }, { "epoch": 0.12248770809971535, "grad_norm": 0.59765625, "learning_rate": 1.98390736337047e-05, "loss": 1.6281, "step": 710 }, { "epoch": 0.12266022599844734, "grad_norm": 0.77734375, "learning_rate": 1.9838586925758566e-05, "loss": 1.6136, "step": 711 }, { "epoch": 0.12283274389717934, "grad_norm": 0.625, "learning_rate": 1.9838099488908437e-05, "loss": 1.581, "step": 712 }, { "epoch": 0.12300526179591133, "grad_norm": 0.69140625, "learning_rate": 1.9837611323190423e-05, "loss": 1.6113, "step": 713 }, { "epoch": 0.12317777969464332, "grad_norm": 0.84765625, "learning_rate": 1.9837122428640695e-05, "loss": 1.5996, "step": 714 }, { "epoch": 0.12335029759337532, "grad_norm": 0.70703125, "learning_rate": 1.9836632805295477e-05, "loss": 1.7063, "step": 715 }, { "epoch": 0.12352281549210731, "grad_norm": 0.91015625, "learning_rate": 1.9836142453191032e-05, "loss": 1.5825, "step": 716 }, { "epoch": 0.1236953333908393, "grad_norm": 0.63671875, "learning_rate": 1.98356513723637e-05, "loss": 1.5981, "step": 717 }, { "epoch": 0.1238678512895713, "grad_norm": 0.796875, "learning_rate": 1.9835159562849857e-05, "loss": 1.5686, "step": 718 }, { "epoch": 0.12404036918830329, "grad_norm": 0.71484375, "learning_rate": 1.983466702468594e-05, "loss": 1.5379, "step": 719 }, { "epoch": 0.12421288708703528, "grad_norm": 0.6875, "learning_rate": 1.9834173757908442e-05, "loss": 1.5893, "step": 720 }, { "epoch": 0.12438540498576728, "grad_norm": 0.734375, "learning_rate": 1.9833679762553905e-05, "loss": 1.6384, "step": 721 }, { "epoch": 0.12455792288449927, "grad_norm": 0.70703125, "learning_rate": 1.9833185038658923e-05, "loss": 1.6167, "step": 722 }, { "epoch": 0.12473044078323126, "grad_norm": 0.89453125, "learning_rate": 1.9832689586260157e-05, "loss": 1.5078, "step": 723 }, { "epoch": 0.12490295868196326, "grad_norm": 0.83984375, "learning_rate": 1.9832193405394306e-05, "loss": 1.5687, "step": 724 }, { "epoch": 0.12507547658069526, "grad_norm": 0.7421875, "learning_rate": 1.9831696496098135e-05, "loss": 1.561, "step": 725 }, { "epoch": 0.12524799447942725, "grad_norm": 0.6796875, "learning_rate": 1.9831198858408456e-05, "loss": 1.5419, "step": 726 }, { "epoch": 0.12542051237815924, "grad_norm": 0.62890625, "learning_rate": 1.983070049236214e-05, "loss": 1.5969, "step": 727 }, { "epoch": 0.12559303027689123, "grad_norm": 0.69140625, "learning_rate": 1.9830201397996104e-05, "loss": 1.4889, "step": 728 }, { "epoch": 0.12576554817562322, "grad_norm": 0.8125, "learning_rate": 1.9829701575347323e-05, "loss": 1.5763, "step": 729 }, { "epoch": 0.1259380660743552, "grad_norm": 0.71484375, "learning_rate": 1.9829201024452834e-05, "loss": 1.5959, "step": 730 }, { "epoch": 0.12611058397308722, "grad_norm": 0.73046875, "learning_rate": 1.982869974534972e-05, "loss": 1.6139, "step": 731 }, { "epoch": 0.1262831018718192, "grad_norm": 0.71875, "learning_rate": 1.9828197738075114e-05, "loss": 1.5739, "step": 732 }, { "epoch": 0.1264556197705512, "grad_norm": 1.0078125, "learning_rate": 1.982769500266621e-05, "loss": 1.6035, "step": 733 }, { "epoch": 0.1266281376692832, "grad_norm": 0.8515625, "learning_rate": 1.9827191539160253e-05, "loss": 1.5015, "step": 734 }, { "epoch": 0.12680065556801517, "grad_norm": 0.76953125, "learning_rate": 1.9826687347594548e-05, "loss": 1.6663, "step": 735 }, { "epoch": 0.12697317346674716, "grad_norm": 0.8046875, "learning_rate": 1.9826182428006437e-05, "loss": 1.4977, "step": 736 }, { "epoch": 0.12714569136547918, "grad_norm": 0.93359375, "learning_rate": 1.9825676780433342e-05, "loss": 1.5396, "step": 737 }, { "epoch": 0.12731820926421117, "grad_norm": 0.6484375, "learning_rate": 1.9825170404912712e-05, "loss": 1.5833, "step": 738 }, { "epoch": 0.12749072716294316, "grad_norm": 0.66796875, "learning_rate": 1.982466330148207e-05, "loss": 1.6297, "step": 739 }, { "epoch": 0.12766324506167515, "grad_norm": 0.6328125, "learning_rate": 1.9824155470178983e-05, "loss": 1.5306, "step": 740 }, { "epoch": 0.12783576296040713, "grad_norm": 0.703125, "learning_rate": 1.9823646911041076e-05, "loss": 1.529, "step": 741 }, { "epoch": 0.12800828085913912, "grad_norm": 0.65625, "learning_rate": 1.9823137624106023e-05, "loss": 1.5001, "step": 742 }, { "epoch": 0.12818079875787114, "grad_norm": 0.625, "learning_rate": 1.982262760941156e-05, "loss": 1.573, "step": 743 }, { "epoch": 0.12835331665660313, "grad_norm": 0.65234375, "learning_rate": 1.9822116866995466e-05, "loss": 1.6003, "step": 744 }, { "epoch": 0.12852583455533512, "grad_norm": 0.85546875, "learning_rate": 1.9821605396895584e-05, "loss": 1.6081, "step": 745 }, { "epoch": 0.1286983524540671, "grad_norm": 0.71484375, "learning_rate": 1.9821093199149806e-05, "loss": 1.6188, "step": 746 }, { "epoch": 0.1288708703527991, "grad_norm": 0.6484375, "learning_rate": 1.982058027379608e-05, "loss": 1.5679, "step": 747 }, { "epoch": 0.12904338825153108, "grad_norm": 0.6484375, "learning_rate": 1.9820066620872403e-05, "loss": 1.582, "step": 748 }, { "epoch": 0.1292159061502631, "grad_norm": 0.7109375, "learning_rate": 1.9819552240416832e-05, "loss": 1.613, "step": 749 }, { "epoch": 0.1293884240489951, "grad_norm": 1.8671875, "learning_rate": 1.9819037132467478e-05, "loss": 1.6215, "step": 750 }, { "epoch": 0.12956094194772708, "grad_norm": 0.79296875, "learning_rate": 1.98185212970625e-05, "loss": 1.5503, "step": 751 }, { "epoch": 0.12973345984645906, "grad_norm": 0.9453125, "learning_rate": 1.9818004734240115e-05, "loss": 1.6983, "step": 752 }, { "epoch": 0.12990597774519105, "grad_norm": 1.0859375, "learning_rate": 1.9817487444038594e-05, "loss": 1.5299, "step": 753 }, { "epoch": 0.13007849564392307, "grad_norm": 0.90625, "learning_rate": 1.981696942649626e-05, "loss": 1.5083, "step": 754 }, { "epoch": 0.13025101354265506, "grad_norm": 0.6875, "learning_rate": 1.9816450681651495e-05, "loss": 1.5681, "step": 755 }, { "epoch": 0.13042353144138705, "grad_norm": 0.96875, "learning_rate": 1.9815931209542723e-05, "loss": 1.5735, "step": 756 }, { "epoch": 0.13059604934011904, "grad_norm": 0.71484375, "learning_rate": 1.9815411010208438e-05, "loss": 1.5884, "step": 757 }, { "epoch": 0.13076856723885102, "grad_norm": 0.7109375, "learning_rate": 1.9814890083687172e-05, "loss": 1.5198, "step": 758 }, { "epoch": 0.130941085137583, "grad_norm": 0.65234375, "learning_rate": 1.9814368430017526e-05, "loss": 1.5571, "step": 759 }, { "epoch": 0.13111360303631503, "grad_norm": 0.8984375, "learning_rate": 1.9813846049238143e-05, "loss": 1.597, "step": 760 }, { "epoch": 0.13128612093504702, "grad_norm": 1.0234375, "learning_rate": 1.981332294138772e-05, "loss": 1.5261, "step": 761 }, { "epoch": 0.131458638833779, "grad_norm": 1.4140625, "learning_rate": 1.981279910650502e-05, "loss": 1.711, "step": 762 }, { "epoch": 0.131631156732511, "grad_norm": 0.84765625, "learning_rate": 1.981227454462885e-05, "loss": 1.6753, "step": 763 }, { "epoch": 0.13180367463124298, "grad_norm": 0.74609375, "learning_rate": 1.9811749255798074e-05, "loss": 1.559, "step": 764 }, { "epoch": 0.13197619252997497, "grad_norm": 0.76953125, "learning_rate": 1.98112232400516e-05, "loss": 1.648, "step": 765 }, { "epoch": 0.132148710428707, "grad_norm": 0.83984375, "learning_rate": 1.9810696497428412e-05, "loss": 1.6339, "step": 766 }, { "epoch": 0.13232122832743898, "grad_norm": 0.75390625, "learning_rate": 1.9810169027967524e-05, "loss": 1.5987, "step": 767 }, { "epoch": 0.13249374622617097, "grad_norm": 0.8671875, "learning_rate": 1.9809640831708018e-05, "loss": 1.538, "step": 768 }, { "epoch": 0.13266626412490296, "grad_norm": 0.703125, "learning_rate": 1.9809111908689028e-05, "loss": 1.4946, "step": 769 }, { "epoch": 0.13283878202363494, "grad_norm": 0.83203125, "learning_rate": 1.9808582258949735e-05, "loss": 1.574, "step": 770 }, { "epoch": 0.13301129992236693, "grad_norm": 0.796875, "learning_rate": 1.980805188252938e-05, "loss": 1.529, "step": 771 }, { "epoch": 0.13318381782109895, "grad_norm": 0.70703125, "learning_rate": 1.980752077946726e-05, "loss": 1.519, "step": 772 }, { "epoch": 0.13335633571983094, "grad_norm": 0.84765625, "learning_rate": 1.9806988949802722e-05, "loss": 1.6591, "step": 773 }, { "epoch": 0.13352885361856293, "grad_norm": 0.70703125, "learning_rate": 1.9806456393575164e-05, "loss": 1.5509, "step": 774 }, { "epoch": 0.13370137151729491, "grad_norm": 0.83984375, "learning_rate": 1.980592311082404e-05, "loss": 1.5898, "step": 775 }, { "epoch": 0.1338738894160269, "grad_norm": 0.73046875, "learning_rate": 1.9805389101588868e-05, "loss": 1.5729, "step": 776 }, { "epoch": 0.13404640731475892, "grad_norm": 0.83203125, "learning_rate": 1.9804854365909202e-05, "loss": 1.6551, "step": 777 }, { "epoch": 0.1342189252134909, "grad_norm": 1.0390625, "learning_rate": 1.980431890382466e-05, "loss": 1.5785, "step": 778 }, { "epoch": 0.1343914431122229, "grad_norm": 0.62890625, "learning_rate": 1.9803782715374912e-05, "loss": 1.6861, "step": 779 }, { "epoch": 0.13456396101095489, "grad_norm": 0.76953125, "learning_rate": 1.9803245800599685e-05, "loss": 1.4989, "step": 780 }, { "epoch": 0.13473647890968687, "grad_norm": 0.76171875, "learning_rate": 1.9802708159538755e-05, "loss": 1.6312, "step": 781 }, { "epoch": 0.13490899680841886, "grad_norm": 0.87109375, "learning_rate": 1.9802169792231955e-05, "loss": 1.5757, "step": 782 }, { "epoch": 0.13508151470715088, "grad_norm": 0.85546875, "learning_rate": 1.980163069871917e-05, "loss": 1.649, "step": 783 }, { "epoch": 0.13525403260588287, "grad_norm": 0.77734375, "learning_rate": 1.9801090879040342e-05, "loss": 1.6765, "step": 784 }, { "epoch": 0.13542655050461486, "grad_norm": 0.62890625, "learning_rate": 1.9800550333235455e-05, "loss": 1.5596, "step": 785 }, { "epoch": 0.13559906840334685, "grad_norm": 1.03125, "learning_rate": 1.980000906134457e-05, "loss": 1.5821, "step": 786 }, { "epoch": 0.13577158630207883, "grad_norm": 0.8828125, "learning_rate": 1.9799467063407777e-05, "loss": 1.4838, "step": 787 }, { "epoch": 0.13594410420081082, "grad_norm": 0.62890625, "learning_rate": 1.9798924339465232e-05, "loss": 1.6303, "step": 788 }, { "epoch": 0.13611662209954284, "grad_norm": 0.65625, "learning_rate": 1.979838088955715e-05, "loss": 1.5389, "step": 789 }, { "epoch": 0.13628913999827483, "grad_norm": 0.60546875, "learning_rate": 1.9797836713723786e-05, "loss": 1.5639, "step": 790 }, { "epoch": 0.13646165789700682, "grad_norm": 0.671875, "learning_rate": 1.9797291812005458e-05, "loss": 1.5259, "step": 791 }, { "epoch": 0.1366341757957388, "grad_norm": 0.62890625, "learning_rate": 1.9796746184442538e-05, "loss": 1.6243, "step": 792 }, { "epoch": 0.1368066936944708, "grad_norm": 0.78515625, "learning_rate": 1.9796199831075445e-05, "loss": 1.5038, "step": 793 }, { "epoch": 0.13697921159320278, "grad_norm": 0.66796875, "learning_rate": 1.9795652751944662e-05, "loss": 1.5741, "step": 794 }, { "epoch": 0.1371517294919348, "grad_norm": 0.6015625, "learning_rate": 1.9795104947090714e-05, "loss": 1.5581, "step": 795 }, { "epoch": 0.1373242473906668, "grad_norm": 0.6328125, "learning_rate": 1.9794556416554193e-05, "loss": 1.539, "step": 796 }, { "epoch": 0.13749676528939878, "grad_norm": 0.6484375, "learning_rate": 1.979400716037573e-05, "loss": 1.6923, "step": 797 }, { "epoch": 0.13766928318813076, "grad_norm": 0.7578125, "learning_rate": 1.979345717859602e-05, "loss": 1.5974, "step": 798 }, { "epoch": 0.13784180108686275, "grad_norm": 0.7109375, "learning_rate": 1.9792906471255814e-05, "loss": 1.6278, "step": 799 }, { "epoch": 0.13801431898559474, "grad_norm": 0.75, "learning_rate": 1.9792355038395906e-05, "loss": 1.6722, "step": 800 }, { "epoch": 0.13801431898559474, "eval_loss": 1.557534098625183, "eval_runtime": 10.7703, "eval_samples_per_second": 95.077, "eval_steps_per_second": 23.769, "step": 800 }, { "epoch": 0.13818683688432676, "grad_norm": 0.62890625, "learning_rate": 1.979180288005715e-05, "loss": 1.5548, "step": 801 }, { "epoch": 0.13835935478305875, "grad_norm": 0.58203125, "learning_rate": 1.9791249996280456e-05, "loss": 1.5351, "step": 802 }, { "epoch": 0.13853187268179074, "grad_norm": 0.62109375, "learning_rate": 1.9790696387106782e-05, "loss": 1.4778, "step": 803 }, { "epoch": 0.13870439058052272, "grad_norm": 0.7265625, "learning_rate": 1.9790142052577148e-05, "loss": 1.5723, "step": 804 }, { "epoch": 0.1388769084792547, "grad_norm": 0.7421875, "learning_rate": 1.9789586992732615e-05, "loss": 1.642, "step": 805 }, { "epoch": 0.13904942637798673, "grad_norm": 0.75390625, "learning_rate": 1.9789031207614312e-05, "loss": 1.5624, "step": 806 }, { "epoch": 0.13922194427671872, "grad_norm": 0.7890625, "learning_rate": 1.978847469726341e-05, "loss": 1.5436, "step": 807 }, { "epoch": 0.1393944621754507, "grad_norm": 0.66015625, "learning_rate": 1.9787917461721143e-05, "loss": 1.5336, "step": 808 }, { "epoch": 0.1395669800741827, "grad_norm": 0.62109375, "learning_rate": 1.9787359501028795e-05, "loss": 1.5797, "step": 809 }, { "epoch": 0.13973949797291468, "grad_norm": 0.6328125, "learning_rate": 1.97868008152277e-05, "loss": 1.4998, "step": 810 }, { "epoch": 0.13991201587164667, "grad_norm": 0.6328125, "learning_rate": 1.9786241404359247e-05, "loss": 1.6379, "step": 811 }, { "epoch": 0.1400845337703787, "grad_norm": 0.671875, "learning_rate": 1.978568126846488e-05, "loss": 1.6053, "step": 812 }, { "epoch": 0.14025705166911068, "grad_norm": 0.75390625, "learning_rate": 1.978512040758611e-05, "loss": 1.6019, "step": 813 }, { "epoch": 0.14042956956784267, "grad_norm": 0.62109375, "learning_rate": 1.9784558821764476e-05, "loss": 1.6052, "step": 814 }, { "epoch": 0.14060208746657465, "grad_norm": 0.83203125, "learning_rate": 1.978399651104159e-05, "loss": 1.6307, "step": 815 }, { "epoch": 0.14077460536530664, "grad_norm": 0.6796875, "learning_rate": 1.9783433475459103e-05, "loss": 1.5099, "step": 816 }, { "epoch": 0.14094712326403863, "grad_norm": 0.85546875, "learning_rate": 1.9782869715058738e-05, "loss": 1.5855, "step": 817 }, { "epoch": 0.14111964116277065, "grad_norm": 0.59375, "learning_rate": 1.978230522988226e-05, "loss": 1.5681, "step": 818 }, { "epoch": 0.14129215906150264, "grad_norm": 0.640625, "learning_rate": 1.9781740019971485e-05, "loss": 1.6274, "step": 819 }, { "epoch": 0.14146467696023463, "grad_norm": 0.70703125, "learning_rate": 1.9781174085368292e-05, "loss": 1.6156, "step": 820 }, { "epoch": 0.14163719485896661, "grad_norm": 0.67578125, "learning_rate": 1.9780607426114606e-05, "loss": 1.6525, "step": 821 }, { "epoch": 0.1418097127576986, "grad_norm": 0.67578125, "learning_rate": 1.9780040042252412e-05, "loss": 1.4553, "step": 822 }, { "epoch": 0.1419822306564306, "grad_norm": 0.66796875, "learning_rate": 1.977947193382374e-05, "loss": 1.5795, "step": 823 }, { "epoch": 0.1421547485551626, "grad_norm": 0.625, "learning_rate": 1.9778903100870687e-05, "loss": 1.5352, "step": 824 }, { "epoch": 0.1423272664538946, "grad_norm": 0.70703125, "learning_rate": 1.9778333543435387e-05, "loss": 1.4478, "step": 825 }, { "epoch": 0.14249978435262659, "grad_norm": 0.609375, "learning_rate": 1.977776326156004e-05, "loss": 1.609, "step": 826 }, { "epoch": 0.14267230225135857, "grad_norm": 0.6171875, "learning_rate": 1.9777192255286897e-05, "loss": 1.6362, "step": 827 }, { "epoch": 0.14284482015009056, "grad_norm": 1.3984375, "learning_rate": 1.977662052465826e-05, "loss": 1.5537, "step": 828 }, { "epoch": 0.14301733804882258, "grad_norm": 0.64453125, "learning_rate": 1.977604806971649e-05, "loss": 1.6069, "step": 829 }, { "epoch": 0.14318985594755457, "grad_norm": 0.625, "learning_rate": 1.9775474890503996e-05, "loss": 1.5911, "step": 830 }, { "epoch": 0.14336237384628656, "grad_norm": 0.59765625, "learning_rate": 1.9774900987063237e-05, "loss": 1.5657, "step": 831 }, { "epoch": 0.14353489174501854, "grad_norm": 0.62890625, "learning_rate": 1.9774326359436743e-05, "loss": 1.605, "step": 832 }, { "epoch": 0.14370740964375053, "grad_norm": 0.609375, "learning_rate": 1.9773751007667074e-05, "loss": 1.5102, "step": 833 }, { "epoch": 0.14387992754248252, "grad_norm": 0.56640625, "learning_rate": 1.9773174931796864e-05, "loss": 1.5579, "step": 834 }, { "epoch": 0.14405244544121454, "grad_norm": 0.6796875, "learning_rate": 1.977259813186879e-05, "loss": 1.558, "step": 835 }, { "epoch": 0.14422496333994653, "grad_norm": 0.6484375, "learning_rate": 1.977202060792558e-05, "loss": 1.5766, "step": 836 }, { "epoch": 0.14439748123867852, "grad_norm": 0.62109375, "learning_rate": 1.977144236001003e-05, "loss": 1.5227, "step": 837 }, { "epoch": 0.1445699991374105, "grad_norm": 0.61328125, "learning_rate": 1.977086338816497e-05, "loss": 1.6174, "step": 838 }, { "epoch": 0.1447425170361425, "grad_norm": 0.71484375, "learning_rate": 1.9770283692433306e-05, "loss": 1.5583, "step": 839 }, { "epoch": 0.14491503493487448, "grad_norm": 0.703125, "learning_rate": 1.9769703272857976e-05, "loss": 1.4618, "step": 840 }, { "epoch": 0.1450875528336065, "grad_norm": 2.84375, "learning_rate": 1.976912212948198e-05, "loss": 1.5849, "step": 841 }, { "epoch": 0.1452600707323385, "grad_norm": 0.7890625, "learning_rate": 1.976854026234838e-05, "loss": 1.5698, "step": 842 }, { "epoch": 0.14543258863107048, "grad_norm": 0.6953125, "learning_rate": 1.9767957671500277e-05, "loss": 1.5552, "step": 843 }, { "epoch": 0.14560510652980246, "grad_norm": 0.7578125, "learning_rate": 1.9767374356980838e-05, "loss": 1.5065, "step": 844 }, { "epoch": 0.14577762442853445, "grad_norm": 0.65234375, "learning_rate": 1.976679031883328e-05, "loss": 1.5852, "step": 845 }, { "epoch": 0.14595014232726644, "grad_norm": 0.9765625, "learning_rate": 1.976620555710087e-05, "loss": 1.6179, "step": 846 }, { "epoch": 0.14612266022599846, "grad_norm": 0.7109375, "learning_rate": 1.9765620071826928e-05, "loss": 1.5693, "step": 847 }, { "epoch": 0.14629517812473045, "grad_norm": 0.7578125, "learning_rate": 1.976503386305483e-05, "loss": 1.4705, "step": 848 }, { "epoch": 0.14646769602346243, "grad_norm": 0.72265625, "learning_rate": 1.9764446930828015e-05, "loss": 1.5468, "step": 849 }, { "epoch": 0.14664021392219442, "grad_norm": 0.65234375, "learning_rate": 1.9763859275189956e-05, "loss": 1.4668, "step": 850 }, { "epoch": 0.1468127318209264, "grad_norm": 1.234375, "learning_rate": 1.9763270896184195e-05, "loss": 1.5784, "step": 851 }, { "epoch": 0.1469852497196584, "grad_norm": 0.6953125, "learning_rate": 1.9762681793854323e-05, "loss": 1.5303, "step": 852 }, { "epoch": 0.14715776761839042, "grad_norm": 0.65234375, "learning_rate": 1.9762091968243982e-05, "loss": 1.5035, "step": 853 }, { "epoch": 0.1473302855171224, "grad_norm": 0.69140625, "learning_rate": 1.9761501419396875e-05, "loss": 1.6431, "step": 854 }, { "epoch": 0.1475028034158544, "grad_norm": 0.765625, "learning_rate": 1.9760910147356743e-05, "loss": 1.586, "step": 855 }, { "epoch": 0.14767532131458638, "grad_norm": 0.65234375, "learning_rate": 1.9760318152167406e-05, "loss": 1.5336, "step": 856 }, { "epoch": 0.14784783921331837, "grad_norm": 0.765625, "learning_rate": 1.9759725433872713e-05, "loss": 1.5582, "step": 857 }, { "epoch": 0.1480203571120504, "grad_norm": 0.65625, "learning_rate": 1.9759131992516575e-05, "loss": 1.607, "step": 858 }, { "epoch": 0.14819287501078238, "grad_norm": 0.94140625, "learning_rate": 1.9758537828142966e-05, "loss": 1.5921, "step": 859 }, { "epoch": 0.14836539290951437, "grad_norm": 0.6953125, "learning_rate": 1.9757942940795897e-05, "loss": 1.5577, "step": 860 }, { "epoch": 0.14853791080824635, "grad_norm": 0.69140625, "learning_rate": 1.975734733051945e-05, "loss": 1.6636, "step": 861 }, { "epoch": 0.14871042870697834, "grad_norm": 0.671875, "learning_rate": 1.9756750997357738e-05, "loss": 1.6487, "step": 862 }, { "epoch": 0.14888294660571033, "grad_norm": 0.61328125, "learning_rate": 1.9756153941354955e-05, "loss": 1.5294, "step": 863 }, { "epoch": 0.14905546450444235, "grad_norm": 0.63671875, "learning_rate": 1.9755556162555323e-05, "loss": 1.5988, "step": 864 }, { "epoch": 0.14922798240317434, "grad_norm": 0.625, "learning_rate": 1.975495766100314e-05, "loss": 1.5239, "step": 865 }, { "epoch": 0.14940050030190632, "grad_norm": 0.93359375, "learning_rate": 1.975435843674274e-05, "loss": 1.6436, "step": 866 }, { "epoch": 0.1495730182006383, "grad_norm": 0.62890625, "learning_rate": 1.975375848981852e-05, "loss": 1.6203, "step": 867 }, { "epoch": 0.1497455360993703, "grad_norm": 0.63671875, "learning_rate": 1.9753157820274924e-05, "loss": 1.6343, "step": 868 }, { "epoch": 0.1499180539981023, "grad_norm": 0.72265625, "learning_rate": 1.975255642815646e-05, "loss": 1.6688, "step": 869 }, { "epoch": 0.1500905718968343, "grad_norm": 0.69921875, "learning_rate": 1.9751954313507674e-05, "loss": 1.6492, "step": 870 }, { "epoch": 0.1502630897955663, "grad_norm": 0.66015625, "learning_rate": 1.9751351476373184e-05, "loss": 1.6285, "step": 871 }, { "epoch": 0.15043560769429828, "grad_norm": 0.6953125, "learning_rate": 1.975074791679765e-05, "loss": 1.6182, "step": 872 }, { "epoch": 0.15060812559303027, "grad_norm": 0.66796875, "learning_rate": 1.9750143634825776e-05, "loss": 1.6117, "step": 873 }, { "epoch": 0.15078064349176226, "grad_norm": 0.75, "learning_rate": 1.9749538630502346e-05, "loss": 1.5948, "step": 874 }, { "epoch": 0.15095316139049425, "grad_norm": 0.6796875, "learning_rate": 1.9748932903872176e-05, "loss": 1.5501, "step": 875 }, { "epoch": 0.15112567928922627, "grad_norm": 0.69921875, "learning_rate": 1.974832645498014e-05, "loss": 1.5538, "step": 876 }, { "epoch": 0.15129819718795826, "grad_norm": 0.671875, "learning_rate": 1.9747719283871172e-05, "loss": 1.534, "step": 877 }, { "epoch": 0.15147071508669024, "grad_norm": 0.79296875, "learning_rate": 1.974711139059025e-05, "loss": 1.5364, "step": 878 }, { "epoch": 0.15164323298542223, "grad_norm": 0.625, "learning_rate": 1.9746502775182415e-05, "loss": 1.5412, "step": 879 }, { "epoch": 0.15181575088415422, "grad_norm": 0.68359375, "learning_rate": 1.9745893437692757e-05, "loss": 1.4718, "step": 880 }, { "epoch": 0.15198826878288624, "grad_norm": 0.83984375, "learning_rate": 1.9745283378166417e-05, "loss": 1.6271, "step": 881 }, { "epoch": 0.15216078668161823, "grad_norm": 0.609375, "learning_rate": 1.9744672596648593e-05, "loss": 1.6166, "step": 882 }, { "epoch": 0.15233330458035022, "grad_norm": 0.7578125, "learning_rate": 1.9744061093184537e-05, "loss": 1.5178, "step": 883 }, { "epoch": 0.1525058224790822, "grad_norm": 0.75390625, "learning_rate": 1.974344886781955e-05, "loss": 1.5554, "step": 884 }, { "epoch": 0.1526783403778142, "grad_norm": 0.72265625, "learning_rate": 1.9742835920598988e-05, "loss": 1.5596, "step": 885 }, { "epoch": 0.15285085827654618, "grad_norm": 0.7578125, "learning_rate": 1.974222225156827e-05, "loss": 1.6621, "step": 886 }, { "epoch": 0.1530233761752782, "grad_norm": 0.69921875, "learning_rate": 1.974160786077285e-05, "loss": 1.6319, "step": 887 }, { "epoch": 0.15319589407401019, "grad_norm": 0.70703125, "learning_rate": 1.9740992748258258e-05, "loss": 1.5838, "step": 888 }, { "epoch": 0.15336841197274217, "grad_norm": 0.79296875, "learning_rate": 1.9740376914070055e-05, "loss": 1.5944, "step": 889 }, { "epoch": 0.15354092987147416, "grad_norm": 0.73046875, "learning_rate": 1.9739760358253867e-05, "loss": 1.6164, "step": 890 }, { "epoch": 0.15371344777020615, "grad_norm": 0.66015625, "learning_rate": 1.973914308085538e-05, "loss": 1.6087, "step": 891 }, { "epoch": 0.15388596566893814, "grad_norm": 0.671875, "learning_rate": 1.9738525081920316e-05, "loss": 1.5904, "step": 892 }, { "epoch": 0.15405848356767016, "grad_norm": 0.58203125, "learning_rate": 1.9737906361494467e-05, "loss": 1.5325, "step": 893 }, { "epoch": 0.15423100146640215, "grad_norm": 0.640625, "learning_rate": 1.973728691962367e-05, "loss": 1.5605, "step": 894 }, { "epoch": 0.15440351936513413, "grad_norm": 0.8359375, "learning_rate": 1.973666675635382e-05, "loss": 1.6023, "step": 895 }, { "epoch": 0.15457603726386612, "grad_norm": 0.64453125, "learning_rate": 1.9736045871730854e-05, "loss": 1.5483, "step": 896 }, { "epoch": 0.1547485551625981, "grad_norm": 0.6484375, "learning_rate": 1.9735424265800775e-05, "loss": 1.587, "step": 897 }, { "epoch": 0.1549210730613301, "grad_norm": 0.73046875, "learning_rate": 1.973480193860964e-05, "loss": 1.5754, "step": 898 }, { "epoch": 0.15509359096006212, "grad_norm": 0.640625, "learning_rate": 1.973417889020355e-05, "loss": 1.6912, "step": 899 }, { "epoch": 0.1552661088587941, "grad_norm": 0.96875, "learning_rate": 1.9733555120628666e-05, "loss": 1.4927, "step": 900 }, { "epoch": 0.1552661088587941, "eval_loss": 1.5473740100860596, "eval_runtime": 10.7618, "eval_samples_per_second": 95.151, "eval_steps_per_second": 23.788, "step": 900 }, { "epoch": 0.1554386267575261, "grad_norm": 0.6328125, "learning_rate": 1.97329306299312e-05, "loss": 1.4499, "step": 901 }, { "epoch": 0.15561114465625808, "grad_norm": 0.734375, "learning_rate": 1.9732305418157423e-05, "loss": 1.5901, "step": 902 }, { "epoch": 0.15578366255499007, "grad_norm": 0.81640625, "learning_rate": 1.9731679485353645e-05, "loss": 1.4815, "step": 903 }, { "epoch": 0.1559561804537221, "grad_norm": 0.66015625, "learning_rate": 1.9731052831566248e-05, "loss": 1.5613, "step": 904 }, { "epoch": 0.15612869835245408, "grad_norm": 0.67578125, "learning_rate": 1.9730425456841652e-05, "loss": 1.5202, "step": 905 }, { "epoch": 0.15630121625118606, "grad_norm": 0.76171875, "learning_rate": 1.9729797361226342e-05, "loss": 1.5046, "step": 906 }, { "epoch": 0.15647373414991805, "grad_norm": 0.7265625, "learning_rate": 1.9729168544766848e-05, "loss": 1.6204, "step": 907 }, { "epoch": 0.15664625204865004, "grad_norm": 0.6328125, "learning_rate": 1.9728539007509756e-05, "loss": 1.5183, "step": 908 }, { "epoch": 0.15681876994738203, "grad_norm": 0.640625, "learning_rate": 1.972790874950171e-05, "loss": 1.4783, "step": 909 }, { "epoch": 0.15699128784611405, "grad_norm": 0.5859375, "learning_rate": 1.9727277770789398e-05, "loss": 1.5652, "step": 910 }, { "epoch": 0.15716380574484604, "grad_norm": 0.6015625, "learning_rate": 1.9726646071419573e-05, "loss": 1.4987, "step": 911 }, { "epoch": 0.15733632364357802, "grad_norm": 0.71484375, "learning_rate": 1.9726013651439033e-05, "loss": 1.5341, "step": 912 }, { "epoch": 0.15750884154231, "grad_norm": 0.6484375, "learning_rate": 1.972538051089463e-05, "loss": 1.609, "step": 913 }, { "epoch": 0.157681359441042, "grad_norm": 0.67578125, "learning_rate": 1.972474664983327e-05, "loss": 1.5571, "step": 914 }, { "epoch": 0.157853877339774, "grad_norm": 0.71875, "learning_rate": 1.9724112068301914e-05, "loss": 1.5965, "step": 915 }, { "epoch": 0.158026395238506, "grad_norm": 0.6875, "learning_rate": 1.972347676634758e-05, "loss": 1.567, "step": 916 }, { "epoch": 0.158198913137238, "grad_norm": 0.6328125, "learning_rate": 1.9722840744017332e-05, "loss": 1.5872, "step": 917 }, { "epoch": 0.15837143103596998, "grad_norm": 0.625, "learning_rate": 1.972220400135829e-05, "loss": 1.5357, "step": 918 }, { "epoch": 0.15854394893470197, "grad_norm": 0.61328125, "learning_rate": 1.9721566538417626e-05, "loss": 1.6097, "step": 919 }, { "epoch": 0.15871646683343396, "grad_norm": 0.76953125, "learning_rate": 1.972092835524257e-05, "loss": 1.5192, "step": 920 }, { "epoch": 0.15888898473216595, "grad_norm": 0.6953125, "learning_rate": 1.9720289451880407e-05, "loss": 1.5389, "step": 921 }, { "epoch": 0.15906150263089797, "grad_norm": 0.60546875, "learning_rate": 1.9719649828378464e-05, "loss": 1.6057, "step": 922 }, { "epoch": 0.15923402052962995, "grad_norm": 0.671875, "learning_rate": 1.9719009484784125e-05, "loss": 1.5346, "step": 923 }, { "epoch": 0.15940653842836194, "grad_norm": 1.234375, "learning_rate": 1.9718368421144842e-05, "loss": 1.6481, "step": 924 }, { "epoch": 0.15957905632709393, "grad_norm": 0.703125, "learning_rate": 1.97177266375081e-05, "loss": 1.6184, "step": 925 }, { "epoch": 0.15975157422582592, "grad_norm": 0.6328125, "learning_rate": 1.9717084133921456e-05, "loss": 1.6208, "step": 926 }, { "epoch": 0.1599240921245579, "grad_norm": 0.7734375, "learning_rate": 1.97164409104325e-05, "loss": 1.6104, "step": 927 }, { "epoch": 0.16009661002328993, "grad_norm": 0.6640625, "learning_rate": 1.9715796967088888e-05, "loss": 1.6334, "step": 928 }, { "epoch": 0.16026912792202191, "grad_norm": 0.859375, "learning_rate": 1.971515230393833e-05, "loss": 1.5719, "step": 929 }, { "epoch": 0.1604416458207539, "grad_norm": 0.78125, "learning_rate": 1.971450692102859e-05, "loss": 1.6079, "step": 930 }, { "epoch": 0.1606141637194859, "grad_norm": 0.640625, "learning_rate": 1.9713860818407474e-05, "loss": 1.5634, "step": 931 }, { "epoch": 0.16078668161821788, "grad_norm": 3.171875, "learning_rate": 1.9713213996122857e-05, "loss": 1.5833, "step": 932 }, { "epoch": 0.1609591995169499, "grad_norm": 0.97265625, "learning_rate": 1.9712566454222653e-05, "loss": 1.5331, "step": 933 }, { "epoch": 0.16113171741568189, "grad_norm": 0.6875, "learning_rate": 1.9711918192754842e-05, "loss": 1.5762, "step": 934 }, { "epoch": 0.16130423531441387, "grad_norm": 1.234375, "learning_rate": 1.9711269211767446e-05, "loss": 1.63, "step": 935 }, { "epoch": 0.16147675321314586, "grad_norm": 0.96875, "learning_rate": 1.971061951130855e-05, "loss": 1.6418, "step": 936 }, { "epoch": 0.16164927111187785, "grad_norm": 0.8125, "learning_rate": 1.9709969091426288e-05, "loss": 1.5717, "step": 937 }, { "epoch": 0.16182178901060984, "grad_norm": 0.76953125, "learning_rate": 1.970931795216884e-05, "loss": 1.524, "step": 938 }, { "epoch": 0.16199430690934186, "grad_norm": 0.61328125, "learning_rate": 1.9708666093584456e-05, "loss": 1.5546, "step": 939 }, { "epoch": 0.16216682480807385, "grad_norm": 0.9609375, "learning_rate": 1.970801351572142e-05, "loss": 1.6206, "step": 940 }, { "epoch": 0.16233934270680583, "grad_norm": 0.9140625, "learning_rate": 1.970736021862809e-05, "loss": 1.5482, "step": 941 }, { "epoch": 0.16251186060553782, "grad_norm": 0.65234375, "learning_rate": 1.9706706202352856e-05, "loss": 1.5676, "step": 942 }, { "epoch": 0.1626843785042698, "grad_norm": 0.63671875, "learning_rate": 1.970605146694418e-05, "loss": 1.625, "step": 943 }, { "epoch": 0.1628568964030018, "grad_norm": 0.67578125, "learning_rate": 1.9705396012450563e-05, "loss": 1.5227, "step": 944 }, { "epoch": 0.16302941430173382, "grad_norm": 0.6796875, "learning_rate": 1.9704739838920565e-05, "loss": 1.6658, "step": 945 }, { "epoch": 0.1632019322004658, "grad_norm": 0.62890625, "learning_rate": 1.9704082946402805e-05, "loss": 1.5264, "step": 946 }, { "epoch": 0.1633744500991978, "grad_norm": 0.62109375, "learning_rate": 1.9703425334945945e-05, "loss": 1.5827, "step": 947 }, { "epoch": 0.16354696799792978, "grad_norm": 0.6640625, "learning_rate": 1.9702767004598708e-05, "loss": 1.5444, "step": 948 }, { "epoch": 0.16371948589666177, "grad_norm": 0.71875, "learning_rate": 1.9702107955409862e-05, "loss": 1.5957, "step": 949 }, { "epoch": 0.16389200379539376, "grad_norm": 0.7890625, "learning_rate": 1.9701448187428244e-05, "loss": 1.5006, "step": 950 }, { "epoch": 0.16406452169412578, "grad_norm": 0.6171875, "learning_rate": 1.970078770070272e-05, "loss": 1.5879, "step": 951 }, { "epoch": 0.16423703959285776, "grad_norm": 0.74609375, "learning_rate": 1.9700126495282234e-05, "loss": 1.5882, "step": 952 }, { "epoch": 0.16440955749158975, "grad_norm": 0.80859375, "learning_rate": 1.9699464571215765e-05, "loss": 1.5719, "step": 953 }, { "epoch": 0.16458207539032174, "grad_norm": 0.734375, "learning_rate": 1.9698801928552358e-05, "loss": 1.5701, "step": 954 }, { "epoch": 0.16475459328905373, "grad_norm": 0.64453125, "learning_rate": 1.96981385673411e-05, "loss": 1.6497, "step": 955 }, { "epoch": 0.16492711118778575, "grad_norm": 0.73828125, "learning_rate": 1.9697474487631143e-05, "loss": 1.5415, "step": 956 }, { "epoch": 0.16509962908651774, "grad_norm": 0.67578125, "learning_rate": 1.9696809689471682e-05, "loss": 1.5721, "step": 957 }, { "epoch": 0.16527214698524972, "grad_norm": 0.8515625, "learning_rate": 1.9696144172911974e-05, "loss": 1.5413, "step": 958 }, { "epoch": 0.1654446648839817, "grad_norm": 0.6796875, "learning_rate": 1.9695477938001316e-05, "loss": 1.5264, "step": 959 }, { "epoch": 0.1656171827827137, "grad_norm": 0.60546875, "learning_rate": 1.9694810984789074e-05, "loss": 1.5174, "step": 960 }, { "epoch": 0.1657897006814457, "grad_norm": 2.015625, "learning_rate": 1.969414331332466e-05, "loss": 1.5387, "step": 961 }, { "epoch": 0.1659622185801777, "grad_norm": 0.77734375, "learning_rate": 1.9693474923657536e-05, "loss": 1.5397, "step": 962 }, { "epoch": 0.1661347364789097, "grad_norm": 0.671875, "learning_rate": 1.9692805815837224e-05, "loss": 1.5345, "step": 963 }, { "epoch": 0.16630725437764168, "grad_norm": 0.71484375, "learning_rate": 1.9692135989913294e-05, "loss": 1.487, "step": 964 }, { "epoch": 0.16647977227637367, "grad_norm": 0.69140625, "learning_rate": 1.969146544593537e-05, "loss": 1.6524, "step": 965 }, { "epoch": 0.16665229017510566, "grad_norm": 0.62890625, "learning_rate": 1.9690794183953128e-05, "loss": 1.667, "step": 966 }, { "epoch": 0.16682480807383765, "grad_norm": 0.6640625, "learning_rate": 1.96901222040163e-05, "loss": 1.5474, "step": 967 }, { "epoch": 0.16699732597256967, "grad_norm": 0.67578125, "learning_rate": 1.968944950617468e-05, "loss": 1.5477, "step": 968 }, { "epoch": 0.16716984387130165, "grad_norm": 0.89453125, "learning_rate": 1.9688776090478096e-05, "loss": 1.5634, "step": 969 }, { "epoch": 0.16734236177003364, "grad_norm": 0.6484375, "learning_rate": 1.9688101956976436e-05, "loss": 1.5918, "step": 970 }, { "epoch": 0.16751487966876563, "grad_norm": 0.75390625, "learning_rate": 1.968742710571965e-05, "loss": 1.6563, "step": 971 }, { "epoch": 0.16768739756749762, "grad_norm": 0.6015625, "learning_rate": 1.968675153675774e-05, "loss": 1.5484, "step": 972 }, { "epoch": 0.1678599154662296, "grad_norm": 0.67578125, "learning_rate": 1.9686075250140745e-05, "loss": 1.5401, "step": 973 }, { "epoch": 0.16803243336496163, "grad_norm": 0.70703125, "learning_rate": 1.9685398245918778e-05, "loss": 1.5453, "step": 974 }, { "epoch": 0.16820495126369361, "grad_norm": 0.66015625, "learning_rate": 1.9684720524141988e-05, "loss": 1.5738, "step": 975 }, { "epoch": 0.1683774691624256, "grad_norm": 0.6484375, "learning_rate": 1.9684042084860594e-05, "loss": 1.5588, "step": 976 }, { "epoch": 0.1685499870611576, "grad_norm": 0.7734375, "learning_rate": 1.968336292812485e-05, "loss": 1.5041, "step": 977 }, { "epoch": 0.16872250495988958, "grad_norm": 0.65625, "learning_rate": 1.9682683053985073e-05, "loss": 1.5881, "step": 978 }, { "epoch": 0.16889502285862157, "grad_norm": 0.921875, "learning_rate": 1.968200246249164e-05, "loss": 1.5471, "step": 979 }, { "epoch": 0.16906754075735358, "grad_norm": 0.68359375, "learning_rate": 1.9681321153694967e-05, "loss": 1.5248, "step": 980 }, { "epoch": 0.16924005865608557, "grad_norm": 0.921875, "learning_rate": 1.968063912764553e-05, "loss": 1.5456, "step": 981 }, { "epoch": 0.16941257655481756, "grad_norm": 0.68359375, "learning_rate": 1.967995638439386e-05, "loss": 1.56, "step": 982 }, { "epoch": 0.16958509445354955, "grad_norm": 0.640625, "learning_rate": 1.967927292399054e-05, "loss": 1.5441, "step": 983 }, { "epoch": 0.16975761235228154, "grad_norm": 0.72265625, "learning_rate": 1.9678588746486198e-05, "loss": 1.5252, "step": 984 }, { "epoch": 0.16993013025101356, "grad_norm": 0.640625, "learning_rate": 1.967790385193153e-05, "loss": 1.54, "step": 985 }, { "epoch": 0.17010264814974554, "grad_norm": 0.6953125, "learning_rate": 1.9677218240377272e-05, "loss": 1.6079, "step": 986 }, { "epoch": 0.17027516604847753, "grad_norm": 0.60546875, "learning_rate": 1.9676531911874223e-05, "loss": 1.5549, "step": 987 }, { "epoch": 0.17044768394720952, "grad_norm": 0.6171875, "learning_rate": 1.967584486647323e-05, "loss": 1.6047, "step": 988 }, { "epoch": 0.1706202018459415, "grad_norm": 0.7734375, "learning_rate": 1.9675157104225188e-05, "loss": 1.6402, "step": 989 }, { "epoch": 0.1707927197446735, "grad_norm": 0.9140625, "learning_rate": 1.9674468625181058e-05, "loss": 1.5565, "step": 990 }, { "epoch": 0.17096523764340552, "grad_norm": 0.69921875, "learning_rate": 1.967377942939184e-05, "loss": 1.4881, "step": 991 }, { "epoch": 0.1711377555421375, "grad_norm": 0.75, "learning_rate": 1.96730895169086e-05, "loss": 1.6077, "step": 992 }, { "epoch": 0.1713102734408695, "grad_norm": 0.984375, "learning_rate": 1.9672398887782448e-05, "loss": 1.486, "step": 993 }, { "epoch": 0.17148279133960148, "grad_norm": 0.6328125, "learning_rate": 1.967170754206455e-05, "loss": 1.6017, "step": 994 }, { "epoch": 0.17165530923833347, "grad_norm": 0.69921875, "learning_rate": 1.9671015479806126e-05, "loss": 1.6084, "step": 995 }, { "epoch": 0.17182782713706546, "grad_norm": 0.7578125, "learning_rate": 1.9670322701058447e-05, "loss": 1.5449, "step": 996 }, { "epoch": 0.17200034503579748, "grad_norm": 0.609375, "learning_rate": 1.966962920587284e-05, "loss": 1.5863, "step": 997 }, { "epoch": 0.17217286293452946, "grad_norm": 0.74609375, "learning_rate": 1.9668934994300684e-05, "loss": 1.5316, "step": 998 }, { "epoch": 0.17234538083326145, "grad_norm": 0.6875, "learning_rate": 1.9668240066393406e-05, "loss": 1.56, "step": 999 }, { "epoch": 0.17251789873199344, "grad_norm": 0.6953125, "learning_rate": 1.9667544422202497e-05, "loss": 1.5559, "step": 1000 }, { "epoch": 0.17251789873199344, "eval_loss": 1.538590908050537, "eval_runtime": 11.0756, "eval_samples_per_second": 92.455, "eval_steps_per_second": 23.114, "step": 1000 }, { "epoch": 0.17269041663072543, "grad_norm": 1.046875, "learning_rate": 1.9666848061779487e-05, "loss": 1.6472, "step": 1001 }, { "epoch": 0.17286293452945742, "grad_norm": 0.77734375, "learning_rate": 1.966615098517598e-05, "loss": 1.4592, "step": 1002 }, { "epoch": 0.17303545242818943, "grad_norm": 0.79296875, "learning_rate": 1.9665453192443603e-05, "loss": 1.5818, "step": 1003 }, { "epoch": 0.17320797032692142, "grad_norm": 0.8984375, "learning_rate": 1.9664754683634064e-05, "loss": 1.6736, "step": 1004 }, { "epoch": 0.1733804882256534, "grad_norm": 0.6953125, "learning_rate": 1.966405545879911e-05, "loss": 1.5858, "step": 1005 }, { "epoch": 0.1735530061243854, "grad_norm": 0.8203125, "learning_rate": 1.9663355517990543e-05, "loss": 1.5809, "step": 1006 }, { "epoch": 0.1737255240231174, "grad_norm": 0.7421875, "learning_rate": 1.966265486126022e-05, "loss": 1.6794, "step": 1007 }, { "epoch": 0.1738980419218494, "grad_norm": 0.63671875, "learning_rate": 1.966195348866005e-05, "loss": 1.6081, "step": 1008 }, { "epoch": 0.1740705598205814, "grad_norm": 0.71484375, "learning_rate": 1.9661251400241994e-05, "loss": 1.572, "step": 1009 }, { "epoch": 0.17424307771931338, "grad_norm": 0.7109375, "learning_rate": 1.9660548596058068e-05, "loss": 1.4952, "step": 1010 }, { "epoch": 0.17441559561804537, "grad_norm": 0.71875, "learning_rate": 1.9659845076160345e-05, "loss": 1.6122, "step": 1011 }, { "epoch": 0.17458811351677736, "grad_norm": 0.89453125, "learning_rate": 1.9659140840600934e-05, "loss": 1.4911, "step": 1012 }, { "epoch": 0.17476063141550935, "grad_norm": 0.6875, "learning_rate": 1.9658435889432022e-05, "loss": 1.4473, "step": 1013 }, { "epoch": 0.17493314931424137, "grad_norm": 0.8515625, "learning_rate": 1.9657730222705828e-05, "loss": 1.5437, "step": 1014 }, { "epoch": 0.17510566721297335, "grad_norm": 0.94140625, "learning_rate": 1.9657023840474637e-05, "loss": 1.5346, "step": 1015 }, { "epoch": 0.17527818511170534, "grad_norm": 0.63671875, "learning_rate": 1.9656316742790778e-05, "loss": 1.6122, "step": 1016 }, { "epoch": 0.17545070301043733, "grad_norm": 0.8828125, "learning_rate": 1.9655608929706636e-05, "loss": 1.635, "step": 1017 }, { "epoch": 0.17562322090916932, "grad_norm": 1.6328125, "learning_rate": 1.965490040127466e-05, "loss": 1.6069, "step": 1018 }, { "epoch": 0.1757957388079013, "grad_norm": 0.76171875, "learning_rate": 1.965419115754733e-05, "loss": 1.5818, "step": 1019 }, { "epoch": 0.17596825670663332, "grad_norm": 0.68359375, "learning_rate": 1.96534811985772e-05, "loss": 1.4853, "step": 1020 }, { "epoch": 0.1761407746053653, "grad_norm": 0.6796875, "learning_rate": 1.9652770524416865e-05, "loss": 1.5027, "step": 1021 }, { "epoch": 0.1763132925040973, "grad_norm": 0.70703125, "learning_rate": 1.9652059135118976e-05, "loss": 1.5647, "step": 1022 }, { "epoch": 0.1764858104028293, "grad_norm": 0.734375, "learning_rate": 1.965134703073624e-05, "loss": 1.5769, "step": 1023 }, { "epoch": 0.17665832830156128, "grad_norm": 0.54296875, "learning_rate": 1.9650634211321406e-05, "loss": 1.4441, "step": 1024 }, { "epoch": 0.17683084620029327, "grad_norm": 0.62109375, "learning_rate": 1.9649920676927292e-05, "loss": 1.5214, "step": 1025 }, { "epoch": 0.17700336409902528, "grad_norm": 0.6328125, "learning_rate": 1.964920642760676e-05, "loss": 1.5326, "step": 1026 }, { "epoch": 0.17717588199775727, "grad_norm": 0.671875, "learning_rate": 1.9648491463412724e-05, "loss": 1.5217, "step": 1027 }, { "epoch": 0.17734839989648926, "grad_norm": 0.671875, "learning_rate": 1.9647775784398154e-05, "loss": 1.6123, "step": 1028 }, { "epoch": 0.17752091779522125, "grad_norm": 0.73828125, "learning_rate": 1.9647059390616072e-05, "loss": 1.5351, "step": 1029 }, { "epoch": 0.17769343569395324, "grad_norm": 1.171875, "learning_rate": 1.964634228211955e-05, "loss": 1.5681, "step": 1030 }, { "epoch": 0.17786595359268523, "grad_norm": 0.70703125, "learning_rate": 1.9645624458961722e-05, "loss": 1.5436, "step": 1031 }, { "epoch": 0.17803847149141724, "grad_norm": 0.94921875, "learning_rate": 1.9644905921195763e-05, "loss": 1.6017, "step": 1032 }, { "epoch": 0.17821098939014923, "grad_norm": 0.6328125, "learning_rate": 1.9644186668874914e-05, "loss": 1.55, "step": 1033 }, { "epoch": 0.17838350728888122, "grad_norm": 0.87109375, "learning_rate": 1.9643466702052453e-05, "loss": 1.5529, "step": 1034 }, { "epoch": 0.1785560251876132, "grad_norm": 0.87109375, "learning_rate": 1.9642746020781723e-05, "loss": 1.5194, "step": 1035 }, { "epoch": 0.1787285430863452, "grad_norm": 0.87890625, "learning_rate": 1.9642024625116117e-05, "loss": 1.6674, "step": 1036 }, { "epoch": 0.17890106098507721, "grad_norm": 0.94140625, "learning_rate": 1.9641302515109084e-05, "loss": 1.6355, "step": 1037 }, { "epoch": 0.1790735788838092, "grad_norm": 0.9609375, "learning_rate": 1.9640579690814118e-05, "loss": 1.4246, "step": 1038 }, { "epoch": 0.1792460967825412, "grad_norm": 0.66015625, "learning_rate": 1.9639856152284768e-05, "loss": 1.5162, "step": 1039 }, { "epoch": 0.17941861468127318, "grad_norm": 0.8359375, "learning_rate": 1.9639131899574643e-05, "loss": 1.5773, "step": 1040 }, { "epoch": 0.17959113258000517, "grad_norm": 0.8125, "learning_rate": 1.9638406932737402e-05, "loss": 1.6291, "step": 1041 }, { "epoch": 0.17976365047873716, "grad_norm": 0.81640625, "learning_rate": 1.963768125182675e-05, "loss": 1.554, "step": 1042 }, { "epoch": 0.17993616837746917, "grad_norm": 1.2734375, "learning_rate": 1.9636954856896452e-05, "loss": 1.4847, "step": 1043 }, { "epoch": 0.18010868627620116, "grad_norm": 0.62109375, "learning_rate": 1.9636227748000322e-05, "loss": 1.5396, "step": 1044 }, { "epoch": 0.18028120417493315, "grad_norm": 0.625, "learning_rate": 1.963549992519223e-05, "loss": 1.5237, "step": 1045 }, { "epoch": 0.18045372207366514, "grad_norm": 0.8046875, "learning_rate": 1.9634771388526103e-05, "loss": 1.5847, "step": 1046 }, { "epoch": 0.18062623997239713, "grad_norm": 0.671875, "learning_rate": 1.9634042138055905e-05, "loss": 1.6293, "step": 1047 }, { "epoch": 0.18079875787112912, "grad_norm": 0.90625, "learning_rate": 1.9633312173835674e-05, "loss": 1.6017, "step": 1048 }, { "epoch": 0.18097127576986113, "grad_norm": 0.61328125, "learning_rate": 1.963258149591948e-05, "loss": 1.4581, "step": 1049 }, { "epoch": 0.18114379366859312, "grad_norm": 0.671875, "learning_rate": 1.9631850104361467e-05, "loss": 1.615, "step": 1050 }, { "epoch": 0.1813163115673251, "grad_norm": 0.73046875, "learning_rate": 1.9631117999215812e-05, "loss": 1.4861, "step": 1051 }, { "epoch": 0.1814888294660571, "grad_norm": 0.69140625, "learning_rate": 1.9630385180536758e-05, "loss": 1.5275, "step": 1052 }, { "epoch": 0.1816613473647891, "grad_norm": 0.6328125, "learning_rate": 1.96296516483786e-05, "loss": 1.5503, "step": 1053 }, { "epoch": 0.18183386526352108, "grad_norm": 0.69921875, "learning_rate": 1.9628917402795677e-05, "loss": 1.5439, "step": 1054 }, { "epoch": 0.1820063831622531, "grad_norm": 0.6171875, "learning_rate": 1.9628182443842388e-05, "loss": 1.6039, "step": 1055 }, { "epoch": 0.18217890106098508, "grad_norm": 0.59765625, "learning_rate": 1.962744677157318e-05, "loss": 1.6551, "step": 1056 }, { "epoch": 0.18235141895971707, "grad_norm": 0.66015625, "learning_rate": 1.9626710386042567e-05, "loss": 1.4362, "step": 1057 }, { "epoch": 0.18252393685844906, "grad_norm": 0.609375, "learning_rate": 1.9625973287305093e-05, "loss": 1.5739, "step": 1058 }, { "epoch": 0.18269645475718105, "grad_norm": 0.72265625, "learning_rate": 1.9625235475415377e-05, "loss": 1.5182, "step": 1059 }, { "epoch": 0.18286897265591306, "grad_norm": 0.61328125, "learning_rate": 1.962449695042807e-05, "loss": 1.4969, "step": 1060 }, { "epoch": 0.18304149055464505, "grad_norm": 0.62109375, "learning_rate": 1.9623757712397896e-05, "loss": 1.581, "step": 1061 }, { "epoch": 0.18321400845337704, "grad_norm": 0.71875, "learning_rate": 1.962301776137962e-05, "loss": 1.6426, "step": 1062 }, { "epoch": 0.18338652635210903, "grad_norm": 0.5859375, "learning_rate": 1.9622277097428058e-05, "loss": 1.5438, "step": 1063 }, { "epoch": 0.18355904425084102, "grad_norm": 0.578125, "learning_rate": 1.9621535720598085e-05, "loss": 1.5331, "step": 1064 }, { "epoch": 0.183731562149573, "grad_norm": 0.609375, "learning_rate": 1.9620793630944632e-05, "loss": 1.4752, "step": 1065 }, { "epoch": 0.18390408004830502, "grad_norm": 0.671875, "learning_rate": 1.962005082852267e-05, "loss": 1.4254, "step": 1066 }, { "epoch": 0.184076597947037, "grad_norm": 0.6328125, "learning_rate": 1.961930731338723e-05, "loss": 1.5386, "step": 1067 }, { "epoch": 0.184249115845769, "grad_norm": 0.66015625, "learning_rate": 1.961856308559341e-05, "loss": 1.5059, "step": 1068 }, { "epoch": 0.184421633744501, "grad_norm": 0.6875, "learning_rate": 1.961781814519633e-05, "loss": 1.5058, "step": 1069 }, { "epoch": 0.18459415164323298, "grad_norm": 0.72265625, "learning_rate": 1.9617072492251187e-05, "loss": 1.5646, "step": 1070 }, { "epoch": 0.18476666954196497, "grad_norm": 0.86328125, "learning_rate": 1.9616326126813224e-05, "loss": 1.5363, "step": 1071 }, { "epoch": 0.18493918744069698, "grad_norm": 0.60546875, "learning_rate": 1.961557904893774e-05, "loss": 1.6104, "step": 1072 }, { "epoch": 0.18511170533942897, "grad_norm": 0.66015625, "learning_rate": 1.9614831258680073e-05, "loss": 1.5752, "step": 1073 }, { "epoch": 0.18528422323816096, "grad_norm": 0.61328125, "learning_rate": 1.9614082756095632e-05, "loss": 1.5502, "step": 1074 }, { "epoch": 0.18545674113689295, "grad_norm": 0.68359375, "learning_rate": 1.961333354123987e-05, "loss": 1.6264, "step": 1075 }, { "epoch": 0.18562925903562494, "grad_norm": 0.63671875, "learning_rate": 1.9612583614168295e-05, "loss": 1.5407, "step": 1076 }, { "epoch": 0.18580177693435693, "grad_norm": 0.75, "learning_rate": 1.961183297493646e-05, "loss": 1.5649, "step": 1077 }, { "epoch": 0.18597429483308894, "grad_norm": 0.62890625, "learning_rate": 1.961108162359998e-05, "loss": 1.4631, "step": 1078 }, { "epoch": 0.18614681273182093, "grad_norm": 0.640625, "learning_rate": 1.9610329560214524e-05, "loss": 1.5548, "step": 1079 }, { "epoch": 0.18631933063055292, "grad_norm": 0.671875, "learning_rate": 1.9609576784835803e-05, "loss": 1.52, "step": 1080 }, { "epoch": 0.1864918485292849, "grad_norm": 0.6640625, "learning_rate": 1.960882329751959e-05, "loss": 1.5762, "step": 1081 }, { "epoch": 0.1866643664280169, "grad_norm": 0.60546875, "learning_rate": 1.9608069098321712e-05, "loss": 1.4734, "step": 1082 }, { "epoch": 0.1868368843267489, "grad_norm": 0.6484375, "learning_rate": 1.960731418729804e-05, "loss": 1.4512, "step": 1083 }, { "epoch": 0.1870094022254809, "grad_norm": 0.6484375, "learning_rate": 1.9606558564504503e-05, "loss": 1.584, "step": 1084 }, { "epoch": 0.1871819201242129, "grad_norm": 0.6953125, "learning_rate": 1.9605802229997086e-05, "loss": 1.6183, "step": 1085 }, { "epoch": 0.18735443802294488, "grad_norm": 0.62109375, "learning_rate": 1.9605045183831814e-05, "loss": 1.5524, "step": 1086 }, { "epoch": 0.18752695592167687, "grad_norm": 0.7109375, "learning_rate": 1.960428742606478e-05, "loss": 1.4852, "step": 1087 }, { "epoch": 0.18769947382040886, "grad_norm": 0.73046875, "learning_rate": 1.9603528956752126e-05, "loss": 1.5881, "step": 1088 }, { "epoch": 0.18787199171914087, "grad_norm": 1.0078125, "learning_rate": 1.9602769775950044e-05, "loss": 1.5719, "step": 1089 }, { "epoch": 0.18804450961787286, "grad_norm": 0.82421875, "learning_rate": 1.960200988371477e-05, "loss": 1.6392, "step": 1090 }, { "epoch": 0.18821702751660485, "grad_norm": 0.60546875, "learning_rate": 1.9601249280102613e-05, "loss": 1.6041, "step": 1091 }, { "epoch": 0.18838954541533684, "grad_norm": 0.9609375, "learning_rate": 1.9600487965169917e-05, "loss": 1.5457, "step": 1092 }, { "epoch": 0.18856206331406883, "grad_norm": 0.7734375, "learning_rate": 1.9599725938973085e-05, "loss": 1.5748, "step": 1093 }, { "epoch": 0.18873458121280082, "grad_norm": 0.78125, "learning_rate": 1.959896320156857e-05, "loss": 1.5051, "step": 1094 }, { "epoch": 0.18890709911153283, "grad_norm": 0.66015625, "learning_rate": 1.959819975301289e-05, "loss": 1.5309, "step": 1095 }, { "epoch": 0.18907961701026482, "grad_norm": 0.65234375, "learning_rate": 1.9597435593362597e-05, "loss": 1.5352, "step": 1096 }, { "epoch": 0.1892521349089968, "grad_norm": 0.68359375, "learning_rate": 1.9596670722674307e-05, "loss": 1.6632, "step": 1097 }, { "epoch": 0.1894246528077288, "grad_norm": 0.64453125, "learning_rate": 1.9595905141004687e-05, "loss": 1.5162, "step": 1098 }, { "epoch": 0.1895971707064608, "grad_norm": 0.7109375, "learning_rate": 1.959513884841046e-05, "loss": 1.4775, "step": 1099 }, { "epoch": 0.18976968860519278, "grad_norm": 0.6484375, "learning_rate": 1.959437184494839e-05, "loss": 1.4998, "step": 1100 }, { "epoch": 0.18976968860519278, "eval_loss": 1.5296015739440918, "eval_runtime": 10.937, "eval_samples_per_second": 93.627, "eval_steps_per_second": 23.407, "step": 1100 }, { "epoch": 0.1899422065039248, "grad_norm": 0.76171875, "learning_rate": 1.9593604130675306e-05, "loss": 1.5226, "step": 1101 }, { "epoch": 0.19011472440265678, "grad_norm": 0.60546875, "learning_rate": 1.9592835705648087e-05, "loss": 1.5056, "step": 1102 }, { "epoch": 0.19028724230138877, "grad_norm": 0.88671875, "learning_rate": 1.9592066569923654e-05, "loss": 1.5678, "step": 1103 }, { "epoch": 0.19045976020012076, "grad_norm": 0.5625, "learning_rate": 1.9591296723559e-05, "loss": 1.5051, "step": 1104 }, { "epoch": 0.19063227809885275, "grad_norm": 1.1640625, "learning_rate": 1.9590526166611153e-05, "loss": 1.534, "step": 1105 }, { "epoch": 0.19080479599758474, "grad_norm": 0.609375, "learning_rate": 1.9589754899137207e-05, "loss": 1.4893, "step": 1106 }, { "epoch": 0.19097731389631675, "grad_norm": 0.58203125, "learning_rate": 1.9588982921194296e-05, "loss": 1.4873, "step": 1107 }, { "epoch": 0.19114983179504874, "grad_norm": 0.58984375, "learning_rate": 1.9588210232839617e-05, "loss": 1.5466, "step": 1108 }, { "epoch": 0.19132234969378073, "grad_norm": 0.68359375, "learning_rate": 1.9587436834130413e-05, "loss": 1.5609, "step": 1109 }, { "epoch": 0.19149486759251272, "grad_norm": 0.76953125, "learning_rate": 1.9586662725123984e-05, "loss": 1.5532, "step": 1110 }, { "epoch": 0.1916673854912447, "grad_norm": 0.734375, "learning_rate": 1.958588790587768e-05, "loss": 1.545, "step": 1111 }, { "epoch": 0.19183990338997672, "grad_norm": 0.7421875, "learning_rate": 1.9585112376448902e-05, "loss": 1.4745, "step": 1112 }, { "epoch": 0.1920124212887087, "grad_norm": 1.203125, "learning_rate": 1.9584336136895114e-05, "loss": 1.465, "step": 1113 }, { "epoch": 0.1921849391874407, "grad_norm": 0.640625, "learning_rate": 1.9583559187273816e-05, "loss": 1.4556, "step": 1114 }, { "epoch": 0.1923574570861727, "grad_norm": 0.76953125, "learning_rate": 1.958278152764257e-05, "loss": 1.5155, "step": 1115 }, { "epoch": 0.19252997498490468, "grad_norm": 0.984375, "learning_rate": 1.9582003158058996e-05, "loss": 1.5266, "step": 1116 }, { "epoch": 0.19270249288363667, "grad_norm": 0.7109375, "learning_rate": 1.9581224078580755e-05, "loss": 1.4815, "step": 1117 }, { "epoch": 0.19287501078236868, "grad_norm": 0.78515625, "learning_rate": 1.9580444289265567e-05, "loss": 1.5429, "step": 1118 }, { "epoch": 0.19304752868110067, "grad_norm": 0.68359375, "learning_rate": 1.957966379017121e-05, "loss": 1.5509, "step": 1119 }, { "epoch": 0.19322004657983266, "grad_norm": 0.625, "learning_rate": 1.9578882581355497e-05, "loss": 1.5378, "step": 1120 }, { "epoch": 0.19339256447856465, "grad_norm": 0.91015625, "learning_rate": 1.9578100662876314e-05, "loss": 1.5299, "step": 1121 }, { "epoch": 0.19356508237729664, "grad_norm": 0.6953125, "learning_rate": 1.9577318034791586e-05, "loss": 1.5234, "step": 1122 }, { "epoch": 0.19373760027602863, "grad_norm": 0.88671875, "learning_rate": 1.9576534697159298e-05, "loss": 1.4997, "step": 1123 }, { "epoch": 0.19391011817476064, "grad_norm": 0.80859375, "learning_rate": 1.957575065003748e-05, "loss": 1.4906, "step": 1124 }, { "epoch": 0.19408263607349263, "grad_norm": 0.77734375, "learning_rate": 1.9574965893484223e-05, "loss": 1.5698, "step": 1125 }, { "epoch": 0.19425515397222462, "grad_norm": 0.59765625, "learning_rate": 1.9574180427557666e-05, "loss": 1.4581, "step": 1126 }, { "epoch": 0.1944276718709566, "grad_norm": 0.9140625, "learning_rate": 1.9573394252316e-05, "loss": 1.5503, "step": 1127 }, { "epoch": 0.1946001897696886, "grad_norm": 0.78125, "learning_rate": 1.957260736781747e-05, "loss": 1.5962, "step": 1128 }, { "epoch": 0.19477270766842059, "grad_norm": 0.70703125, "learning_rate": 1.9571819774120375e-05, "loss": 1.5985, "step": 1129 }, { "epoch": 0.1949452255671526, "grad_norm": 0.90625, "learning_rate": 1.957103147128306e-05, "loss": 1.5408, "step": 1130 }, { "epoch": 0.1951177434658846, "grad_norm": 0.83984375, "learning_rate": 1.9570242459363937e-05, "loss": 1.6447, "step": 1131 }, { "epoch": 0.19529026136461658, "grad_norm": 0.69921875, "learning_rate": 1.956945273842145e-05, "loss": 1.5605, "step": 1132 }, { "epoch": 0.19546277926334857, "grad_norm": 0.93359375, "learning_rate": 1.9568662308514116e-05, "loss": 1.5723, "step": 1133 }, { "epoch": 0.19563529716208056, "grad_norm": 0.67578125, "learning_rate": 1.9567871169700486e-05, "loss": 1.6163, "step": 1134 }, { "epoch": 0.19580781506081255, "grad_norm": 0.75390625, "learning_rate": 1.956707932203918e-05, "loss": 1.5854, "step": 1135 }, { "epoch": 0.19598033295954456, "grad_norm": 0.82421875, "learning_rate": 1.9566286765588857e-05, "loss": 1.4622, "step": 1136 }, { "epoch": 0.19615285085827655, "grad_norm": 0.59765625, "learning_rate": 1.956549350040824e-05, "loss": 1.5131, "step": 1137 }, { "epoch": 0.19632536875700854, "grad_norm": 0.70703125, "learning_rate": 1.9564699526556093e-05, "loss": 1.5931, "step": 1138 }, { "epoch": 0.19649788665574053, "grad_norm": 0.83203125, "learning_rate": 1.9563904844091248e-05, "loss": 1.6076, "step": 1139 }, { "epoch": 0.19667040455447252, "grad_norm": 0.5859375, "learning_rate": 1.956310945307257e-05, "loss": 1.5771, "step": 1140 }, { "epoch": 0.19684292245320453, "grad_norm": 0.7109375, "learning_rate": 1.9562313353558992e-05, "loss": 1.5983, "step": 1141 }, { "epoch": 0.19701544035193652, "grad_norm": 0.80078125, "learning_rate": 1.9561516545609493e-05, "loss": 1.6035, "step": 1142 }, { "epoch": 0.1971879582506685, "grad_norm": 0.61328125, "learning_rate": 1.9560719029283104e-05, "loss": 1.6043, "step": 1143 }, { "epoch": 0.1973604761494005, "grad_norm": 2.359375, "learning_rate": 1.955992080463891e-05, "loss": 1.5247, "step": 1144 }, { "epoch": 0.1975329940481325, "grad_norm": 3.171875, "learning_rate": 1.9559121871736055e-05, "loss": 1.5868, "step": 1145 }, { "epoch": 0.19770551194686448, "grad_norm": 1.984375, "learning_rate": 1.9558322230633718e-05, "loss": 1.5682, "step": 1146 }, { "epoch": 0.1978780298455965, "grad_norm": 0.78515625, "learning_rate": 1.955752188139115e-05, "loss": 1.5546, "step": 1147 }, { "epoch": 0.19805054774432848, "grad_norm": 0.60546875, "learning_rate": 1.955672082406764e-05, "loss": 1.5099, "step": 1148 }, { "epoch": 0.19822306564306047, "grad_norm": 0.71875, "learning_rate": 1.955591905872254e-05, "loss": 1.5612, "step": 1149 }, { "epoch": 0.19839558354179246, "grad_norm": 0.69140625, "learning_rate": 1.9555116585415247e-05, "loss": 1.5709, "step": 1150 }, { "epoch": 0.19856810144052445, "grad_norm": 0.69140625, "learning_rate": 1.9554313404205216e-05, "loss": 1.5448, "step": 1151 }, { "epoch": 0.19874061933925644, "grad_norm": 0.609375, "learning_rate": 1.955350951515195e-05, "loss": 1.5552, "step": 1152 }, { "epoch": 0.19891313723798845, "grad_norm": 1.015625, "learning_rate": 1.9552704918315006e-05, "loss": 1.4922, "step": 1153 }, { "epoch": 0.19908565513672044, "grad_norm": 0.67578125, "learning_rate": 1.9551899613753994e-05, "loss": 1.5521, "step": 1154 }, { "epoch": 0.19925817303545243, "grad_norm": 0.66796875, "learning_rate": 1.9551093601528573e-05, "loss": 1.5857, "step": 1155 }, { "epoch": 0.19943069093418442, "grad_norm": 0.73046875, "learning_rate": 1.955028688169846e-05, "loss": 1.586, "step": 1156 }, { "epoch": 0.1996032088329164, "grad_norm": 0.765625, "learning_rate": 1.9549479454323423e-05, "loss": 1.5905, "step": 1157 }, { "epoch": 0.1997757267316484, "grad_norm": 0.71484375, "learning_rate": 1.954867131946328e-05, "loss": 1.5615, "step": 1158 }, { "epoch": 0.1999482446303804, "grad_norm": 0.8203125, "learning_rate": 1.9547862477177904e-05, "loss": 1.4858, "step": 1159 }, { "epoch": 0.2001207625291124, "grad_norm": 0.71875, "learning_rate": 1.9547052927527217e-05, "loss": 1.5963, "step": 1160 }, { "epoch": 0.2002932804278444, "grad_norm": 0.66015625, "learning_rate": 1.9546242670571198e-05, "loss": 1.535, "step": 1161 }, { "epoch": 0.20046579832657638, "grad_norm": 0.89453125, "learning_rate": 1.954543170636987e-05, "loss": 1.5349, "step": 1162 }, { "epoch": 0.20063831622530837, "grad_norm": 0.625, "learning_rate": 1.9544620034983322e-05, "loss": 1.5516, "step": 1163 }, { "epoch": 0.20081083412404038, "grad_norm": 0.6015625, "learning_rate": 1.9543807656471683e-05, "loss": 1.5135, "step": 1164 }, { "epoch": 0.20098335202277237, "grad_norm": 1.03125, "learning_rate": 1.954299457089514e-05, "loss": 1.6499, "step": 1165 }, { "epoch": 0.20115586992150436, "grad_norm": 0.72265625, "learning_rate": 1.9542180778313936e-05, "loss": 1.6134, "step": 1166 }, { "epoch": 0.20132838782023635, "grad_norm": 0.5625, "learning_rate": 1.954136627878835e-05, "loss": 1.5139, "step": 1167 }, { "epoch": 0.20150090571896834, "grad_norm": 0.92578125, "learning_rate": 1.9540551072378738e-05, "loss": 1.5382, "step": 1168 }, { "epoch": 0.20167342361770033, "grad_norm": 0.5859375, "learning_rate": 1.953973515914549e-05, "loss": 1.4794, "step": 1169 }, { "epoch": 0.20184594151643234, "grad_norm": 0.80078125, "learning_rate": 1.9538918539149054e-05, "loss": 1.4757, "step": 1170 }, { "epoch": 0.20201845941516433, "grad_norm": 0.83203125, "learning_rate": 1.9538101212449932e-05, "loss": 1.5418, "step": 1171 }, { "epoch": 0.20219097731389632, "grad_norm": 0.80078125, "learning_rate": 1.953728317910867e-05, "loss": 1.6522, "step": 1172 }, { "epoch": 0.2023634952126283, "grad_norm": 0.640625, "learning_rate": 1.953646443918589e-05, "loss": 1.5378, "step": 1173 }, { "epoch": 0.2025360131113603, "grad_norm": 0.69921875, "learning_rate": 1.9535644992742225e-05, "loss": 1.578, "step": 1174 }, { "epoch": 0.20270853101009229, "grad_norm": 0.625, "learning_rate": 1.9534824839838406e-05, "loss": 1.5171, "step": 1175 }, { "epoch": 0.2028810489088243, "grad_norm": 0.609375, "learning_rate": 1.953400398053518e-05, "loss": 1.6417, "step": 1176 }, { "epoch": 0.2030535668075563, "grad_norm": 0.5703125, "learning_rate": 1.953318241489337e-05, "loss": 1.4812, "step": 1177 }, { "epoch": 0.20322608470628828, "grad_norm": 0.65234375, "learning_rate": 1.9532360142973842e-05, "loss": 1.6705, "step": 1178 }, { "epoch": 0.20339860260502027, "grad_norm": 0.71875, "learning_rate": 1.9531537164837516e-05, "loss": 1.4646, "step": 1179 }, { "epoch": 0.20357112050375226, "grad_norm": 0.6875, "learning_rate": 1.9530713480545357e-05, "loss": 1.6521, "step": 1180 }, { "epoch": 0.20374363840248425, "grad_norm": 0.68359375, "learning_rate": 1.9529889090158394e-05, "loss": 1.5087, "step": 1181 }, { "epoch": 0.20391615630121626, "grad_norm": 0.61328125, "learning_rate": 1.9529063993737703e-05, "loss": 1.5173, "step": 1182 }, { "epoch": 0.20408867419994825, "grad_norm": 0.578125, "learning_rate": 1.952823819134441e-05, "loss": 1.5581, "step": 1183 }, { "epoch": 0.20426119209868024, "grad_norm": 0.69921875, "learning_rate": 1.9527411683039697e-05, "loss": 1.5374, "step": 1184 }, { "epoch": 0.20443370999741223, "grad_norm": 0.640625, "learning_rate": 1.952658446888479e-05, "loss": 1.523, "step": 1185 }, { "epoch": 0.20460622789614422, "grad_norm": 0.79296875, "learning_rate": 1.952575654894099e-05, "loss": 1.3737, "step": 1186 }, { "epoch": 0.2047787457948762, "grad_norm": 0.64453125, "learning_rate": 1.9524927923269623e-05, "loss": 1.5357, "step": 1187 }, { "epoch": 0.20495126369360822, "grad_norm": 0.6953125, "learning_rate": 1.9524098591932078e-05, "loss": 1.5545, "step": 1188 }, { "epoch": 0.2051237815923402, "grad_norm": 0.67578125, "learning_rate": 1.9523268554989806e-05, "loss": 1.6352, "step": 1189 }, { "epoch": 0.2052962994910722, "grad_norm": 0.6171875, "learning_rate": 1.9522437812504285e-05, "loss": 1.5834, "step": 1190 }, { "epoch": 0.2054688173898042, "grad_norm": 0.71484375, "learning_rate": 1.952160636453708e-05, "loss": 1.5966, "step": 1191 }, { "epoch": 0.20564133528853618, "grad_norm": 0.62890625, "learning_rate": 1.9520774211149783e-05, "loss": 1.4937, "step": 1192 }, { "epoch": 0.2058138531872682, "grad_norm": 0.61328125, "learning_rate": 1.951994135240404e-05, "loss": 1.4849, "step": 1193 }, { "epoch": 0.20598637108600018, "grad_norm": 0.5703125, "learning_rate": 1.9519107788361557e-05, "loss": 1.5178, "step": 1194 }, { "epoch": 0.20615888898473217, "grad_norm": 0.69140625, "learning_rate": 1.9518273519084093e-05, "loss": 1.4902, "step": 1195 }, { "epoch": 0.20633140688346416, "grad_norm": 0.67578125, "learning_rate": 1.9517438544633455e-05, "loss": 1.5649, "step": 1196 }, { "epoch": 0.20650392478219615, "grad_norm": 0.7578125, "learning_rate": 1.95166028650715e-05, "loss": 1.5275, "step": 1197 }, { "epoch": 0.20667644268092814, "grad_norm": 0.578125, "learning_rate": 1.951576648046014e-05, "loss": 1.4538, "step": 1198 }, { "epoch": 0.20684896057966015, "grad_norm": 0.58203125, "learning_rate": 1.951492939086135e-05, "loss": 1.5145, "step": 1199 }, { "epoch": 0.20702147847839214, "grad_norm": 0.671875, "learning_rate": 1.951409159633713e-05, "loss": 1.3968, "step": 1200 }, { "epoch": 0.20702147847839214, "eval_loss": 1.5226820707321167, "eval_runtime": 10.8512, "eval_samples_per_second": 94.368, "eval_steps_per_second": 23.592, "step": 1200 }, { "epoch": 0.20719399637712413, "grad_norm": 0.6484375, "learning_rate": 1.951325309694956e-05, "loss": 1.5766, "step": 1201 }, { "epoch": 0.20736651427585612, "grad_norm": 0.62109375, "learning_rate": 1.951241389276076e-05, "loss": 1.4671, "step": 1202 }, { "epoch": 0.2075390321745881, "grad_norm": 0.72265625, "learning_rate": 1.9511573983832903e-05, "loss": 1.4517, "step": 1203 }, { "epoch": 0.2077115500733201, "grad_norm": 4.59375, "learning_rate": 1.9510733370228214e-05, "loss": 1.5248, "step": 1204 }, { "epoch": 0.2078840679720521, "grad_norm": 0.76953125, "learning_rate": 1.950989205200897e-05, "loss": 1.5138, "step": 1205 }, { "epoch": 0.2080565858707841, "grad_norm": 0.6640625, "learning_rate": 1.9509050029237506e-05, "loss": 1.5073, "step": 1206 }, { "epoch": 0.2082291037695161, "grad_norm": 0.578125, "learning_rate": 1.9508207301976197e-05, "loss": 1.5608, "step": 1207 }, { "epoch": 0.20840162166824808, "grad_norm": 0.859375, "learning_rate": 1.9507363870287482e-05, "loss": 1.5454, "step": 1208 }, { "epoch": 0.20857413956698007, "grad_norm": 0.65234375, "learning_rate": 1.950651973423385e-05, "loss": 1.4313, "step": 1209 }, { "epoch": 0.20874665746571205, "grad_norm": 0.73828125, "learning_rate": 1.950567489387783e-05, "loss": 1.5429, "step": 1210 }, { "epoch": 0.20891917536444407, "grad_norm": 0.60546875, "learning_rate": 1.950482934928202e-05, "loss": 1.4819, "step": 1211 }, { "epoch": 0.20909169326317606, "grad_norm": 0.671875, "learning_rate": 1.9503983100509067e-05, "loss": 1.5763, "step": 1212 }, { "epoch": 0.20926421116190805, "grad_norm": 0.6640625, "learning_rate": 1.9503136147621662e-05, "loss": 1.4882, "step": 1213 }, { "epoch": 0.20943672906064004, "grad_norm": 0.6796875, "learning_rate": 1.9502288490682553e-05, "loss": 1.5377, "step": 1214 }, { "epoch": 0.20960924695937203, "grad_norm": 0.625, "learning_rate": 1.950144012975454e-05, "loss": 1.5165, "step": 1215 }, { "epoch": 0.20978176485810404, "grad_norm": 0.6875, "learning_rate": 1.950059106490047e-05, "loss": 1.5467, "step": 1216 }, { "epoch": 0.20995428275683603, "grad_norm": 0.7109375, "learning_rate": 1.9499741296183255e-05, "loss": 1.6396, "step": 1217 }, { "epoch": 0.21012680065556802, "grad_norm": 0.6328125, "learning_rate": 1.9498890823665846e-05, "loss": 1.6346, "step": 1218 }, { "epoch": 0.2102993185543, "grad_norm": 0.6015625, "learning_rate": 1.9498039647411255e-05, "loss": 1.592, "step": 1219 }, { "epoch": 0.210471836453032, "grad_norm": 0.69921875, "learning_rate": 1.9497187767482538e-05, "loss": 1.5075, "step": 1220 }, { "epoch": 0.21064435435176398, "grad_norm": 0.61328125, "learning_rate": 1.9496335183942814e-05, "loss": 1.5514, "step": 1221 }, { "epoch": 0.210816872250496, "grad_norm": 0.83984375, "learning_rate": 1.949548189685524e-05, "loss": 1.5402, "step": 1222 }, { "epoch": 0.210989390149228, "grad_norm": 0.65234375, "learning_rate": 1.949462790628304e-05, "loss": 1.5735, "step": 1223 }, { "epoch": 0.21116190804795998, "grad_norm": 0.6953125, "learning_rate": 1.9493773212289475e-05, "loss": 1.5145, "step": 1224 }, { "epoch": 0.21133442594669197, "grad_norm": 0.58984375, "learning_rate": 1.9492917814937874e-05, "loss": 1.4504, "step": 1225 }, { "epoch": 0.21150694384542396, "grad_norm": 0.59765625, "learning_rate": 1.9492061714291605e-05, "loss": 1.4625, "step": 1226 }, { "epoch": 0.21167946174415594, "grad_norm": 0.6171875, "learning_rate": 1.9491204910414097e-05, "loss": 1.4586, "step": 1227 }, { "epoch": 0.21185197964288796, "grad_norm": 0.58984375, "learning_rate": 1.9490347403368823e-05, "loss": 1.4972, "step": 1228 }, { "epoch": 0.21202449754161995, "grad_norm": 0.69921875, "learning_rate": 1.948948919321932e-05, "loss": 1.5251, "step": 1229 }, { "epoch": 0.21219701544035194, "grad_norm": 0.6875, "learning_rate": 1.9488630280029154e-05, "loss": 1.5978, "step": 1230 }, { "epoch": 0.21236953333908393, "grad_norm": 0.625, "learning_rate": 1.948777066386198e-05, "loss": 1.5903, "step": 1231 }, { "epoch": 0.21254205123781592, "grad_norm": 0.73828125, "learning_rate": 1.9486910344781467e-05, "loss": 1.5965, "step": 1232 }, { "epoch": 0.2127145691365479, "grad_norm": 0.73046875, "learning_rate": 1.9486049322851358e-05, "loss": 1.5626, "step": 1233 }, { "epoch": 0.21288708703527992, "grad_norm": 0.66015625, "learning_rate": 1.9485187598135445e-05, "loss": 1.5257, "step": 1234 }, { "epoch": 0.2130596049340119, "grad_norm": 0.75390625, "learning_rate": 1.9484325170697574e-05, "loss": 1.5239, "step": 1235 }, { "epoch": 0.2132321228327439, "grad_norm": 0.7578125, "learning_rate": 1.9483462040601627e-05, "loss": 1.5665, "step": 1236 }, { "epoch": 0.2134046407314759, "grad_norm": 0.99609375, "learning_rate": 1.948259820791156e-05, "loss": 1.4892, "step": 1237 }, { "epoch": 0.21357715863020788, "grad_norm": 0.6328125, "learning_rate": 1.9481733672691365e-05, "loss": 1.5117, "step": 1238 }, { "epoch": 0.21374967652893986, "grad_norm": 0.88671875, "learning_rate": 1.9480868435005098e-05, "loss": 1.6343, "step": 1239 }, { "epoch": 0.21392219442767188, "grad_norm": 0.65234375, "learning_rate": 1.9480002494916857e-05, "loss": 1.5402, "step": 1240 }, { "epoch": 0.21409471232640387, "grad_norm": 0.8359375, "learning_rate": 1.9479135852490794e-05, "loss": 1.4328, "step": 1241 }, { "epoch": 0.21426723022513586, "grad_norm": 1.1328125, "learning_rate": 1.9478268507791124e-05, "loss": 1.6286, "step": 1242 }, { "epoch": 0.21443974812386785, "grad_norm": 0.8046875, "learning_rate": 1.9477400460882096e-05, "loss": 1.5382, "step": 1243 }, { "epoch": 0.21461226602259983, "grad_norm": 0.859375, "learning_rate": 1.9476531711828027e-05, "loss": 1.5822, "step": 1244 }, { "epoch": 0.21478478392133185, "grad_norm": 0.58203125, "learning_rate": 1.9475662260693275e-05, "loss": 1.5945, "step": 1245 }, { "epoch": 0.21495730182006384, "grad_norm": 0.59375, "learning_rate": 1.9474792107542258e-05, "loss": 1.552, "step": 1246 }, { "epoch": 0.21512981971879583, "grad_norm": 0.7578125, "learning_rate": 1.9473921252439438e-05, "loss": 1.5303, "step": 1247 }, { "epoch": 0.21530233761752782, "grad_norm": 0.68359375, "learning_rate": 1.947304969544934e-05, "loss": 1.4816, "step": 1248 }, { "epoch": 0.2154748555162598, "grad_norm": 0.68359375, "learning_rate": 1.9472177436636523e-05, "loss": 1.5002, "step": 1249 }, { "epoch": 0.2156473734149918, "grad_norm": 0.640625, "learning_rate": 1.9471304476065624e-05, "loss": 1.5424, "step": 1250 }, { "epoch": 0.2158198913137238, "grad_norm": 3.140625, "learning_rate": 1.94704308138013e-05, "loss": 1.5329, "step": 1251 }, { "epoch": 0.2159924092124558, "grad_norm": 0.6328125, "learning_rate": 1.94695564499083e-05, "loss": 1.5575, "step": 1252 }, { "epoch": 0.2161649271111878, "grad_norm": 0.64453125, "learning_rate": 1.946868138445138e-05, "loss": 1.5801, "step": 1253 }, { "epoch": 0.21633744500991978, "grad_norm": 0.734375, "learning_rate": 1.9467805617495384e-05, "loss": 1.5072, "step": 1254 }, { "epoch": 0.21650996290865177, "grad_norm": 0.91015625, "learning_rate": 1.9466929149105193e-05, "loss": 1.5689, "step": 1255 }, { "epoch": 0.21668248080738375, "grad_norm": 0.60546875, "learning_rate": 1.9466051979345734e-05, "loss": 1.5702, "step": 1256 }, { "epoch": 0.21685499870611577, "grad_norm": 0.76953125, "learning_rate": 1.9465174108281995e-05, "loss": 1.5755, "step": 1257 }, { "epoch": 0.21702751660484776, "grad_norm": 0.7421875, "learning_rate": 1.9464295535979022e-05, "loss": 1.4259, "step": 1258 }, { "epoch": 0.21720003450357975, "grad_norm": 0.6796875, "learning_rate": 1.9463416262501898e-05, "loss": 1.4476, "step": 1259 }, { "epoch": 0.21737255240231174, "grad_norm": 0.59765625, "learning_rate": 1.9462536287915773e-05, "loss": 1.5078, "step": 1260 }, { "epoch": 0.21754507030104372, "grad_norm": 0.6328125, "learning_rate": 1.9461655612285827e-05, "loss": 1.5395, "step": 1261 }, { "epoch": 0.2177175881997757, "grad_norm": 0.67578125, "learning_rate": 1.946077423567732e-05, "loss": 1.4661, "step": 1262 }, { "epoch": 0.21789010609850773, "grad_norm": 0.640625, "learning_rate": 1.945989215815554e-05, "loss": 1.55, "step": 1263 }, { "epoch": 0.21806262399723972, "grad_norm": 0.59375, "learning_rate": 1.9459009379785842e-05, "loss": 1.5413, "step": 1264 }, { "epoch": 0.2182351418959717, "grad_norm": 0.62109375, "learning_rate": 1.9458125900633627e-05, "loss": 1.4655, "step": 1265 }, { "epoch": 0.2184076597947037, "grad_norm": 0.66015625, "learning_rate": 1.945724172076435e-05, "loss": 1.608, "step": 1266 }, { "epoch": 0.21858017769343568, "grad_norm": 0.63671875, "learning_rate": 1.945635684024351e-05, "loss": 1.5109, "step": 1267 }, { "epoch": 0.2187526955921677, "grad_norm": 0.59765625, "learning_rate": 1.945547125913667e-05, "loss": 1.4214, "step": 1268 }, { "epoch": 0.2189252134908997, "grad_norm": 0.61328125, "learning_rate": 1.9454584977509443e-05, "loss": 1.5579, "step": 1269 }, { "epoch": 0.21909773138963168, "grad_norm": 0.6015625, "learning_rate": 1.9453697995427483e-05, "loss": 1.5788, "step": 1270 }, { "epoch": 0.21927024928836367, "grad_norm": 5.40625, "learning_rate": 1.9452810312956507e-05, "loss": 1.6086, "step": 1271 }, { "epoch": 0.21944276718709566, "grad_norm": 0.61328125, "learning_rate": 1.945192193016228e-05, "loss": 1.5086, "step": 1272 }, { "epoch": 0.21961528508582764, "grad_norm": 0.69140625, "learning_rate": 1.9451032847110615e-05, "loss": 1.6391, "step": 1273 }, { "epoch": 0.21978780298455966, "grad_norm": 0.609375, "learning_rate": 1.9450143063867385e-05, "loss": 1.5566, "step": 1274 }, { "epoch": 0.21996032088329165, "grad_norm": 0.66015625, "learning_rate": 1.944925258049851e-05, "loss": 1.6582, "step": 1275 }, { "epoch": 0.22013283878202364, "grad_norm": 0.60546875, "learning_rate": 1.9448361397069962e-05, "loss": 1.5291, "step": 1276 }, { "epoch": 0.22030535668075563, "grad_norm": 0.6328125, "learning_rate": 1.9447469513647767e-05, "loss": 1.6006, "step": 1277 }, { "epoch": 0.22047787457948761, "grad_norm": 0.6328125, "learning_rate": 1.9446576930298e-05, "loss": 1.4931, "step": 1278 }, { "epoch": 0.2206503924782196, "grad_norm": 0.66015625, "learning_rate": 1.9445683647086788e-05, "loss": 1.4638, "step": 1279 }, { "epoch": 0.22082291037695162, "grad_norm": 0.58203125, "learning_rate": 1.944478966408031e-05, "loss": 1.5128, "step": 1280 }, { "epoch": 0.2209954282756836, "grad_norm": 0.62109375, "learning_rate": 1.9443894981344802e-05, "loss": 1.5028, "step": 1281 }, { "epoch": 0.2211679461744156, "grad_norm": 0.6171875, "learning_rate": 1.9442999598946545e-05, "loss": 1.5222, "step": 1282 }, { "epoch": 0.22134046407314759, "grad_norm": 0.63671875, "learning_rate": 1.9442103516951878e-05, "loss": 1.5544, "step": 1283 }, { "epoch": 0.22151298197187957, "grad_norm": 0.6796875, "learning_rate": 1.944120673542718e-05, "loss": 1.6135, "step": 1284 }, { "epoch": 0.22168549987061156, "grad_norm": 0.65625, "learning_rate": 1.94403092544389e-05, "loss": 1.5499, "step": 1285 }, { "epoch": 0.22185801776934358, "grad_norm": 0.67578125, "learning_rate": 1.9439411074053525e-05, "loss": 1.6011, "step": 1286 }, { "epoch": 0.22203053566807557, "grad_norm": 0.5625, "learning_rate": 1.9438512194337595e-05, "loss": 1.4681, "step": 1287 }, { "epoch": 0.22220305356680756, "grad_norm": 0.58984375, "learning_rate": 1.9437612615357708e-05, "loss": 1.4965, "step": 1288 }, { "epoch": 0.22237557146553955, "grad_norm": 0.64453125, "learning_rate": 1.943671233718051e-05, "loss": 1.5009, "step": 1289 }, { "epoch": 0.22254808936427153, "grad_norm": 0.6796875, "learning_rate": 1.9435811359872696e-05, "loss": 1.4423, "step": 1290 }, { "epoch": 0.22272060726300355, "grad_norm": 0.84765625, "learning_rate": 1.9434909683501023e-05, "loss": 1.5931, "step": 1291 }, { "epoch": 0.22289312516173554, "grad_norm": 0.73828125, "learning_rate": 1.9434007308132287e-05, "loss": 1.536, "step": 1292 }, { "epoch": 0.22306564306046753, "grad_norm": 0.73046875, "learning_rate": 1.943310423383334e-05, "loss": 1.4643, "step": 1293 }, { "epoch": 0.22323816095919952, "grad_norm": 0.80859375, "learning_rate": 1.9432200460671096e-05, "loss": 1.5385, "step": 1294 }, { "epoch": 0.2234106788579315, "grad_norm": 0.6875, "learning_rate": 1.9431295988712504e-05, "loss": 1.6433, "step": 1295 }, { "epoch": 0.2235831967566635, "grad_norm": 0.58203125, "learning_rate": 1.9430390818024575e-05, "loss": 1.5004, "step": 1296 }, { "epoch": 0.2237557146553955, "grad_norm": 0.68359375, "learning_rate": 1.9429484948674374e-05, "loss": 1.5267, "step": 1297 }, { "epoch": 0.2239282325541275, "grad_norm": 0.73046875, "learning_rate": 1.9428578380729006e-05, "loss": 1.562, "step": 1298 }, { "epoch": 0.2241007504528595, "grad_norm": 0.6484375, "learning_rate": 1.942767111425564e-05, "loss": 1.5171, "step": 1299 }, { "epoch": 0.22427326835159148, "grad_norm": 0.67578125, "learning_rate": 1.942676314932149e-05, "loss": 1.4994, "step": 1300 }, { "epoch": 0.22427326835159148, "eval_loss": 1.515897512435913, "eval_runtime": 11.0746, "eval_samples_per_second": 92.464, "eval_steps_per_second": 23.116, "step": 1300 }, { "epoch": 0.22444578625032346, "grad_norm": 0.81640625, "learning_rate": 1.9425854485993828e-05, "loss": 1.6136, "step": 1301 }, { "epoch": 0.22461830414905545, "grad_norm": 0.65625, "learning_rate": 1.9424945124339965e-05, "loss": 1.5522, "step": 1302 }, { "epoch": 0.22479082204778747, "grad_norm": 0.57421875, "learning_rate": 1.9424035064427286e-05, "loss": 1.4482, "step": 1303 }, { "epoch": 0.22496333994651946, "grad_norm": 0.6875, "learning_rate": 1.9423124306323197e-05, "loss": 1.6717, "step": 1304 }, { "epoch": 0.22513585784525145, "grad_norm": 0.66796875, "learning_rate": 1.9422212850095183e-05, "loss": 1.4648, "step": 1305 }, { "epoch": 0.22530837574398344, "grad_norm": 0.73828125, "learning_rate": 1.9421300695810773e-05, "loss": 1.5514, "step": 1306 }, { "epoch": 0.22548089364271542, "grad_norm": 0.65234375, "learning_rate": 1.9420387843537533e-05, "loss": 1.5525, "step": 1307 }, { "epoch": 0.2256534115414474, "grad_norm": 0.6796875, "learning_rate": 1.9419474293343107e-05, "loss": 1.5457, "step": 1308 }, { "epoch": 0.22582592944017943, "grad_norm": 0.66796875, "learning_rate": 1.9418560045295166e-05, "loss": 1.5831, "step": 1309 }, { "epoch": 0.22599844733891142, "grad_norm": 0.65234375, "learning_rate": 1.9417645099461446e-05, "loss": 1.407, "step": 1310 }, { "epoch": 0.2261709652376434, "grad_norm": 0.671875, "learning_rate": 1.9416729455909737e-05, "loss": 1.6039, "step": 1311 }, { "epoch": 0.2263434831363754, "grad_norm": 0.6484375, "learning_rate": 1.941581311470787e-05, "loss": 1.5484, "step": 1312 }, { "epoch": 0.22651600103510738, "grad_norm": 0.734375, "learning_rate": 1.9414896075923732e-05, "loss": 1.5051, "step": 1313 }, { "epoch": 0.22668851893383937, "grad_norm": 0.6328125, "learning_rate": 1.9413978339625267e-05, "loss": 1.4133, "step": 1314 }, { "epoch": 0.2268610368325714, "grad_norm": 0.6875, "learning_rate": 1.9413059905880466e-05, "loss": 1.5823, "step": 1315 }, { "epoch": 0.22703355473130338, "grad_norm": 0.734375, "learning_rate": 1.941214077475737e-05, "loss": 1.5071, "step": 1316 }, { "epoch": 0.22720607263003537, "grad_norm": 0.78515625, "learning_rate": 1.941122094632408e-05, "loss": 1.5494, "step": 1317 }, { "epoch": 0.22737859052876735, "grad_norm": 0.73828125, "learning_rate": 1.9410300420648735e-05, "loss": 1.4567, "step": 1318 }, { "epoch": 0.22755110842749934, "grad_norm": 0.71875, "learning_rate": 1.9409379197799537e-05, "loss": 1.5043, "step": 1319 }, { "epoch": 0.22772362632623136, "grad_norm": 0.8671875, "learning_rate": 1.9408457277844738e-05, "loss": 1.5131, "step": 1320 }, { "epoch": 0.22789614422496335, "grad_norm": 0.63671875, "learning_rate": 1.9407534660852632e-05, "loss": 1.5591, "step": 1321 }, { "epoch": 0.22806866212369534, "grad_norm": 0.74609375, "learning_rate": 1.940661134689158e-05, "loss": 1.4588, "step": 1322 }, { "epoch": 0.22824118002242733, "grad_norm": 0.64453125, "learning_rate": 1.9405687336029985e-05, "loss": 1.519, "step": 1323 }, { "epoch": 0.22841369792115931, "grad_norm": 0.73046875, "learning_rate": 1.94047626283363e-05, "loss": 1.5361, "step": 1324 }, { "epoch": 0.2285862158198913, "grad_norm": 0.8671875, "learning_rate": 1.9403837223879038e-05, "loss": 1.5026, "step": 1325 }, { "epoch": 0.22875873371862332, "grad_norm": 0.71484375, "learning_rate": 1.9402911122726756e-05, "loss": 1.5397, "step": 1326 }, { "epoch": 0.2289312516173553, "grad_norm": 0.8046875, "learning_rate": 1.9401984324948067e-05, "loss": 1.4693, "step": 1327 }, { "epoch": 0.2291037695160873, "grad_norm": 0.84765625, "learning_rate": 1.9401056830611634e-05, "loss": 1.5675, "step": 1328 }, { "epoch": 0.22927628741481929, "grad_norm": 0.765625, "learning_rate": 1.9400128639786168e-05, "loss": 1.4772, "step": 1329 }, { "epoch": 0.22944880531355127, "grad_norm": 0.83984375, "learning_rate": 1.9399199752540435e-05, "loss": 1.3506, "step": 1330 }, { "epoch": 0.22962132321228326, "grad_norm": 0.640625, "learning_rate": 1.939827016894326e-05, "loss": 1.4486, "step": 1331 }, { "epoch": 0.22979384111101528, "grad_norm": 0.6953125, "learning_rate": 1.9397339889063506e-05, "loss": 1.5139, "step": 1332 }, { "epoch": 0.22996635900974727, "grad_norm": 0.765625, "learning_rate": 1.9396408912970096e-05, "loss": 1.5306, "step": 1333 }, { "epoch": 0.23013887690847926, "grad_norm": 0.62890625, "learning_rate": 1.9395477240732003e-05, "loss": 1.4321, "step": 1334 }, { "epoch": 0.23031139480721124, "grad_norm": 0.84765625, "learning_rate": 1.939454487241825e-05, "loss": 1.5289, "step": 1335 }, { "epoch": 0.23048391270594323, "grad_norm": 0.65234375, "learning_rate": 1.9393611808097913e-05, "loss": 1.5167, "step": 1336 }, { "epoch": 0.23065643060467522, "grad_norm": 0.75, "learning_rate": 1.939267804784012e-05, "loss": 1.509, "step": 1337 }, { "epoch": 0.23082894850340724, "grad_norm": 0.67578125, "learning_rate": 1.9391743591714046e-05, "loss": 1.4916, "step": 1338 }, { "epoch": 0.23100146640213923, "grad_norm": 0.66015625, "learning_rate": 1.9390808439788928e-05, "loss": 1.5742, "step": 1339 }, { "epoch": 0.23117398430087122, "grad_norm": 0.75, "learning_rate": 1.9389872592134044e-05, "loss": 1.5068, "step": 1340 }, { "epoch": 0.2313465021996032, "grad_norm": 1.1484375, "learning_rate": 1.9388936048818725e-05, "loss": 1.6008, "step": 1341 }, { "epoch": 0.2315190200983352, "grad_norm": 0.59375, "learning_rate": 1.938799880991236e-05, "loss": 1.4813, "step": 1342 }, { "epoch": 0.2316915379970672, "grad_norm": 0.63671875, "learning_rate": 1.9387060875484388e-05, "loss": 1.5196, "step": 1343 }, { "epoch": 0.2318640558957992, "grad_norm": 0.78125, "learning_rate": 1.9386122245604285e-05, "loss": 1.5514, "step": 1344 }, { "epoch": 0.2320365737945312, "grad_norm": 0.63671875, "learning_rate": 1.938518292034161e-05, "loss": 1.5391, "step": 1345 }, { "epoch": 0.23220909169326318, "grad_norm": 0.703125, "learning_rate": 1.9384242899765933e-05, "loss": 1.6399, "step": 1346 }, { "epoch": 0.23238160959199516, "grad_norm": 0.6484375, "learning_rate": 1.938330218394691e-05, "loss": 1.6369, "step": 1347 }, { "epoch": 0.23255412749072715, "grad_norm": 0.65625, "learning_rate": 1.938236077295423e-05, "loss": 1.5085, "step": 1348 }, { "epoch": 0.23272664538945917, "grad_norm": 0.61328125, "learning_rate": 1.9381418666857645e-05, "loss": 1.4524, "step": 1349 }, { "epoch": 0.23289916328819116, "grad_norm": 0.67578125, "learning_rate": 1.938047586572694e-05, "loss": 1.5181, "step": 1350 }, { "epoch": 0.23307168118692315, "grad_norm": 0.63671875, "learning_rate": 1.937953236963198e-05, "loss": 1.5584, "step": 1351 }, { "epoch": 0.23324419908565514, "grad_norm": 0.70703125, "learning_rate": 1.937858817864265e-05, "loss": 1.489, "step": 1352 }, { "epoch": 0.23341671698438712, "grad_norm": 0.8671875, "learning_rate": 1.937764329282891e-05, "loss": 1.595, "step": 1353 }, { "epoch": 0.2335892348831191, "grad_norm": 0.671875, "learning_rate": 1.9376697712260758e-05, "loss": 1.5573, "step": 1354 }, { "epoch": 0.23376175278185113, "grad_norm": 0.64453125, "learning_rate": 1.9375751437008253e-05, "loss": 1.4401, "step": 1355 }, { "epoch": 0.23393427068058312, "grad_norm": 0.87890625, "learning_rate": 1.9374804467141497e-05, "loss": 1.4892, "step": 1356 }, { "epoch": 0.2341067885793151, "grad_norm": 0.85546875, "learning_rate": 1.937385680273065e-05, "loss": 1.5684, "step": 1357 }, { "epoch": 0.2342793064780471, "grad_norm": 1.3984375, "learning_rate": 1.9372908443845926e-05, "loss": 1.6208, "step": 1358 }, { "epoch": 0.23445182437677908, "grad_norm": 0.8984375, "learning_rate": 1.9371959390557573e-05, "loss": 1.5651, "step": 1359 }, { "epoch": 0.23462434227551107, "grad_norm": 0.6015625, "learning_rate": 1.9371009642935913e-05, "loss": 1.4277, "step": 1360 }, { "epoch": 0.2347968601742431, "grad_norm": 0.8125, "learning_rate": 1.9370059201051304e-05, "loss": 1.5014, "step": 1361 }, { "epoch": 0.23496937807297508, "grad_norm": 0.71484375, "learning_rate": 1.936910806497417e-05, "loss": 1.4315, "step": 1362 }, { "epoch": 0.23514189597170707, "grad_norm": 0.71484375, "learning_rate": 1.936815623477496e-05, "loss": 1.6346, "step": 1363 }, { "epoch": 0.23531441387043905, "grad_norm": 0.7421875, "learning_rate": 1.9367203710524204e-05, "loss": 1.5522, "step": 1364 }, { "epoch": 0.23548693176917104, "grad_norm": 0.640625, "learning_rate": 1.936625049229247e-05, "loss": 1.5137, "step": 1365 }, { "epoch": 0.23565944966790303, "grad_norm": 0.640625, "learning_rate": 1.9365296580150374e-05, "loss": 1.473, "step": 1366 }, { "epoch": 0.23583196756663505, "grad_norm": 0.64453125, "learning_rate": 1.9364341974168594e-05, "loss": 1.5776, "step": 1367 }, { "epoch": 0.23600448546536704, "grad_norm": 8.9375, "learning_rate": 1.9363386674417844e-05, "loss": 1.5133, "step": 1368 }, { "epoch": 0.23617700336409903, "grad_norm": 0.7265625, "learning_rate": 1.9362430680968907e-05, "loss": 1.5902, "step": 1369 }, { "epoch": 0.236349521262831, "grad_norm": 0.65234375, "learning_rate": 1.9361473993892604e-05, "loss": 1.4907, "step": 1370 }, { "epoch": 0.236522039161563, "grad_norm": 0.609375, "learning_rate": 1.936051661325982e-05, "loss": 1.569, "step": 1371 }, { "epoch": 0.23669455706029502, "grad_norm": 0.77734375, "learning_rate": 1.9359558539141474e-05, "loss": 1.5697, "step": 1372 }, { "epoch": 0.236867074959027, "grad_norm": 0.59375, "learning_rate": 1.935859977160855e-05, "loss": 1.4963, "step": 1373 }, { "epoch": 0.237039592857759, "grad_norm": 0.7265625, "learning_rate": 1.935764031073208e-05, "loss": 1.5429, "step": 1374 }, { "epoch": 0.23721211075649098, "grad_norm": 0.671875, "learning_rate": 1.9356680156583147e-05, "loss": 1.5264, "step": 1375 }, { "epoch": 0.23738462865522297, "grad_norm": 0.65234375, "learning_rate": 1.9355719309232885e-05, "loss": 1.5749, "step": 1376 }, { "epoch": 0.23755714655395496, "grad_norm": 0.6796875, "learning_rate": 1.935475776875248e-05, "loss": 1.6136, "step": 1377 }, { "epoch": 0.23772966445268698, "grad_norm": 0.61328125, "learning_rate": 1.9353795535213167e-05, "loss": 1.4908, "step": 1378 }, { "epoch": 0.23790218235141897, "grad_norm": 0.734375, "learning_rate": 1.9352832608686234e-05, "loss": 1.439, "step": 1379 }, { "epoch": 0.23807470025015096, "grad_norm": 0.62890625, "learning_rate": 1.9351868989243026e-05, "loss": 1.5656, "step": 1380 }, { "epoch": 0.23824721814888294, "grad_norm": 0.56640625, "learning_rate": 1.9350904676954927e-05, "loss": 1.4963, "step": 1381 }, { "epoch": 0.23841973604761493, "grad_norm": 0.73046875, "learning_rate": 1.9349939671893384e-05, "loss": 1.5902, "step": 1382 }, { "epoch": 0.23859225394634692, "grad_norm": 0.6015625, "learning_rate": 1.934897397412989e-05, "loss": 1.457, "step": 1383 }, { "epoch": 0.23876477184507894, "grad_norm": 0.6796875, "learning_rate": 1.9348007583735985e-05, "loss": 1.5349, "step": 1384 }, { "epoch": 0.23893728974381093, "grad_norm": 0.62109375, "learning_rate": 1.9347040500783272e-05, "loss": 1.4851, "step": 1385 }, { "epoch": 0.23910980764254292, "grad_norm": 0.6015625, "learning_rate": 1.9346072725343394e-05, "loss": 1.5019, "step": 1386 }, { "epoch": 0.2392823255412749, "grad_norm": 0.57421875, "learning_rate": 1.934510425748805e-05, "loss": 1.5755, "step": 1387 }, { "epoch": 0.2394548434400069, "grad_norm": 0.625, "learning_rate": 1.9344135097288997e-05, "loss": 1.5478, "step": 1388 }, { "epoch": 0.23962736133873888, "grad_norm": 0.83203125, "learning_rate": 1.934316524481803e-05, "loss": 1.5787, "step": 1389 }, { "epoch": 0.2397998792374709, "grad_norm": 0.609375, "learning_rate": 1.9342194700146998e-05, "loss": 1.5794, "step": 1390 }, { "epoch": 0.2399723971362029, "grad_norm": 0.75390625, "learning_rate": 1.9341223463347815e-05, "loss": 1.5546, "step": 1391 }, { "epoch": 0.24014491503493487, "grad_norm": 1.1171875, "learning_rate": 1.9340251534492428e-05, "loss": 1.5541, "step": 1392 }, { "epoch": 0.24031743293366686, "grad_norm": 0.7578125, "learning_rate": 1.933927891365285e-05, "loss": 1.644, "step": 1393 }, { "epoch": 0.24048995083239885, "grad_norm": 0.58203125, "learning_rate": 1.9338305600901135e-05, "loss": 1.514, "step": 1394 }, { "epoch": 0.24066246873113087, "grad_norm": 0.67578125, "learning_rate": 1.933733159630939e-05, "loss": 1.5331, "step": 1395 }, { "epoch": 0.24083498662986286, "grad_norm": 0.8046875, "learning_rate": 1.933635689994978e-05, "loss": 1.4759, "step": 1396 }, { "epoch": 0.24100750452859485, "grad_norm": 0.6875, "learning_rate": 1.933538151189451e-05, "loss": 1.4967, "step": 1397 }, { "epoch": 0.24118002242732683, "grad_norm": 0.734375, "learning_rate": 1.9334405432215857e-05, "loss": 1.5873, "step": 1398 }, { "epoch": 0.24135254032605882, "grad_norm": 0.5859375, "learning_rate": 1.9333428660986118e-05, "loss": 1.537, "step": 1399 }, { "epoch": 0.2415250582247908, "grad_norm": 0.734375, "learning_rate": 1.9332451198277668e-05, "loss": 1.5899, "step": 1400 }, { "epoch": 0.2415250582247908, "eval_loss": 1.509263277053833, "eval_runtime": 11.0113, "eval_samples_per_second": 92.996, "eval_steps_per_second": 23.249, "step": 1400 }, { "epoch": 0.24169757612352283, "grad_norm": 0.6875, "learning_rate": 1.933147304416292e-05, "loss": 1.555, "step": 1401 }, { "epoch": 0.24187009402225482, "grad_norm": 0.734375, "learning_rate": 1.9330494198714347e-05, "loss": 1.5731, "step": 1402 }, { "epoch": 0.2420426119209868, "grad_norm": 0.8515625, "learning_rate": 1.932951466200446e-05, "loss": 1.5459, "step": 1403 }, { "epoch": 0.2422151298197188, "grad_norm": 0.6484375, "learning_rate": 1.9328534434105835e-05, "loss": 1.5302, "step": 1404 }, { "epoch": 0.24238764771845078, "grad_norm": 0.70703125, "learning_rate": 1.9327553515091092e-05, "loss": 1.452, "step": 1405 }, { "epoch": 0.24256016561718277, "grad_norm": 0.77734375, "learning_rate": 1.93265719050329e-05, "loss": 1.516, "step": 1406 }, { "epoch": 0.2427326835159148, "grad_norm": 0.8359375, "learning_rate": 1.9325589604003992e-05, "loss": 1.5422, "step": 1407 }, { "epoch": 0.24290520141464678, "grad_norm": 0.6640625, "learning_rate": 1.9324606612077135e-05, "loss": 1.5611, "step": 1408 }, { "epoch": 0.24307771931337877, "grad_norm": 0.703125, "learning_rate": 1.9323622929325155e-05, "loss": 1.5626, "step": 1409 }, { "epoch": 0.24325023721211075, "grad_norm": 0.6953125, "learning_rate": 1.9322638555820934e-05, "loss": 1.5338, "step": 1410 }, { "epoch": 0.24342275511084274, "grad_norm": 0.71875, "learning_rate": 1.9321653491637397e-05, "loss": 1.6135, "step": 1411 }, { "epoch": 0.24359527300957473, "grad_norm": 0.796875, "learning_rate": 1.9320667736847526e-05, "loss": 1.5297, "step": 1412 }, { "epoch": 0.24376779090830675, "grad_norm": 0.8125, "learning_rate": 1.931968129152435e-05, "loss": 1.5739, "step": 1413 }, { "epoch": 0.24394030880703874, "grad_norm": 0.83203125, "learning_rate": 1.931869415574095e-05, "loss": 1.4873, "step": 1414 }, { "epoch": 0.24411282670577072, "grad_norm": 0.85546875, "learning_rate": 1.9317706329570467e-05, "loss": 1.5725, "step": 1415 }, { "epoch": 0.2442853446045027, "grad_norm": 0.65234375, "learning_rate": 1.9316717813086073e-05, "loss": 1.4288, "step": 1416 }, { "epoch": 0.2444578625032347, "grad_norm": 0.7265625, "learning_rate": 1.9315728606361012e-05, "loss": 1.4411, "step": 1417 }, { "epoch": 0.2446303804019667, "grad_norm": 0.7578125, "learning_rate": 1.931473870946857e-05, "loss": 1.4901, "step": 1418 }, { "epoch": 0.2448028983006987, "grad_norm": 0.65625, "learning_rate": 1.9313748122482085e-05, "loss": 1.5489, "step": 1419 }, { "epoch": 0.2449754161994307, "grad_norm": 0.828125, "learning_rate": 1.9312756845474937e-05, "loss": 1.4666, "step": 1420 }, { "epoch": 0.24514793409816268, "grad_norm": 3.109375, "learning_rate": 1.931176487852058e-05, "loss": 1.5141, "step": 1421 }, { "epoch": 0.24532045199689467, "grad_norm": 0.60546875, "learning_rate": 1.9310772221692495e-05, "loss": 1.5155, "step": 1422 }, { "epoch": 0.24549296989562666, "grad_norm": 0.703125, "learning_rate": 1.9309778875064228e-05, "loss": 1.5135, "step": 1423 }, { "epoch": 0.24566548779435868, "grad_norm": 0.97265625, "learning_rate": 1.930878483870937e-05, "loss": 1.509, "step": 1424 }, { "epoch": 0.24583800569309067, "grad_norm": 0.6796875, "learning_rate": 1.9307790112701573e-05, "loss": 1.5216, "step": 1425 }, { "epoch": 0.24601052359182266, "grad_norm": 0.66796875, "learning_rate": 1.9306794697114522e-05, "loss": 1.5448, "step": 1426 }, { "epoch": 0.24618304149055464, "grad_norm": 0.6640625, "learning_rate": 1.9305798592021968e-05, "loss": 1.459, "step": 1427 }, { "epoch": 0.24635555938928663, "grad_norm": 0.6328125, "learning_rate": 1.930480179749771e-05, "loss": 1.5335, "step": 1428 }, { "epoch": 0.24652807728801862, "grad_norm": 0.6171875, "learning_rate": 1.9303804313615596e-05, "loss": 1.5651, "step": 1429 }, { "epoch": 0.24670059518675064, "grad_norm": 0.66015625, "learning_rate": 1.9302806140449525e-05, "loss": 1.5367, "step": 1430 }, { "epoch": 0.24687311308548263, "grad_norm": 0.6328125, "learning_rate": 1.9301807278073447e-05, "loss": 1.503, "step": 1431 }, { "epoch": 0.24704563098421461, "grad_norm": 0.68359375, "learning_rate": 1.9300807726561368e-05, "loss": 1.5215, "step": 1432 }, { "epoch": 0.2472181488829466, "grad_norm": 0.5859375, "learning_rate": 1.929980748598734e-05, "loss": 1.5557, "step": 1433 }, { "epoch": 0.2473906667816786, "grad_norm": 0.69140625, "learning_rate": 1.9298806556425462e-05, "loss": 1.495, "step": 1434 }, { "epoch": 0.24756318468041058, "grad_norm": 0.73046875, "learning_rate": 1.9297804937949894e-05, "loss": 1.5105, "step": 1435 }, { "epoch": 0.2477357025791426, "grad_norm": 0.6640625, "learning_rate": 1.929680263063484e-05, "loss": 1.5457, "step": 1436 }, { "epoch": 0.24790822047787459, "grad_norm": 0.8984375, "learning_rate": 1.929579963455456e-05, "loss": 1.5825, "step": 1437 }, { "epoch": 0.24808073837660657, "grad_norm": 0.85546875, "learning_rate": 1.929479594978336e-05, "loss": 1.4412, "step": 1438 }, { "epoch": 0.24825325627533856, "grad_norm": 0.6484375, "learning_rate": 1.9293791576395597e-05, "loss": 1.5191, "step": 1439 }, { "epoch": 0.24842577417407055, "grad_norm": 0.72265625, "learning_rate": 1.9292786514465685e-05, "loss": 1.4836, "step": 1440 }, { "epoch": 0.24859829207280254, "grad_norm": 0.6953125, "learning_rate": 1.9291780764068085e-05, "loss": 1.4466, "step": 1441 }, { "epoch": 0.24877080997153456, "grad_norm": 0.85546875, "learning_rate": 1.9290774325277305e-05, "loss": 1.6016, "step": 1442 }, { "epoch": 0.24894332787026655, "grad_norm": 0.82421875, "learning_rate": 1.9289767198167918e-05, "loss": 1.5898, "step": 1443 }, { "epoch": 0.24911584576899853, "grad_norm": 0.79296875, "learning_rate": 1.9288759382814526e-05, "loss": 1.5457, "step": 1444 }, { "epoch": 0.24928836366773052, "grad_norm": 0.70703125, "learning_rate": 1.9287750879291802e-05, "loss": 1.4422, "step": 1445 }, { "epoch": 0.2494608815664625, "grad_norm": 0.66015625, "learning_rate": 1.928674168767446e-05, "loss": 1.6013, "step": 1446 }, { "epoch": 0.24963339946519453, "grad_norm": 0.6640625, "learning_rate": 1.928573180803727e-05, "loss": 1.5306, "step": 1447 }, { "epoch": 0.24980591736392652, "grad_norm": 0.77734375, "learning_rate": 1.9284721240455045e-05, "loss": 1.4907, "step": 1448 }, { "epoch": 0.2499784352626585, "grad_norm": 0.72265625, "learning_rate": 1.9283709985002655e-05, "loss": 1.5263, "step": 1449 }, { "epoch": 0.2501509531613905, "grad_norm": 0.79296875, "learning_rate": 1.9282698041755027e-05, "loss": 1.5184, "step": 1450 }, { "epoch": 0.2503234710601225, "grad_norm": 0.6953125, "learning_rate": 1.9281685410787125e-05, "loss": 1.5459, "step": 1451 }, { "epoch": 0.2504959889588545, "grad_norm": 1.1796875, "learning_rate": 1.928067209217397e-05, "loss": 1.6034, "step": 1452 }, { "epoch": 0.25066850685758646, "grad_norm": 0.75390625, "learning_rate": 1.9279658085990642e-05, "loss": 1.5055, "step": 1453 }, { "epoch": 0.2508410247563185, "grad_norm": 0.6015625, "learning_rate": 1.927864339231226e-05, "loss": 1.4582, "step": 1454 }, { "epoch": 0.25101354265505044, "grad_norm": 0.640625, "learning_rate": 1.9277628011214e-05, "loss": 1.5079, "step": 1455 }, { "epoch": 0.25118606055378245, "grad_norm": 0.6953125, "learning_rate": 1.9276611942771083e-05, "loss": 1.4217, "step": 1456 }, { "epoch": 0.25135857845251447, "grad_norm": 0.66796875, "learning_rate": 1.92755951870588e-05, "loss": 1.5337, "step": 1457 }, { "epoch": 0.25153109635124643, "grad_norm": 0.92578125, "learning_rate": 1.927457774415246e-05, "loss": 1.4909, "step": 1458 }, { "epoch": 0.25170361424997845, "grad_norm": 0.61328125, "learning_rate": 1.9273559614127455e-05, "loss": 1.4921, "step": 1459 }, { "epoch": 0.2518761321487104, "grad_norm": 0.625, "learning_rate": 1.9272540797059208e-05, "loss": 1.4744, "step": 1460 }, { "epoch": 0.2520486500474424, "grad_norm": 0.65234375, "learning_rate": 1.9271521293023202e-05, "loss": 1.5649, "step": 1461 }, { "epoch": 0.25222116794617444, "grad_norm": 0.625, "learning_rate": 1.927050110209497e-05, "loss": 1.5705, "step": 1462 }, { "epoch": 0.2523936858449064, "grad_norm": 0.65234375, "learning_rate": 1.9269480224350087e-05, "loss": 1.6142, "step": 1463 }, { "epoch": 0.2525662037436384, "grad_norm": 0.76171875, "learning_rate": 1.9268458659864194e-05, "loss": 1.6079, "step": 1464 }, { "epoch": 0.2527387216423704, "grad_norm": 0.66015625, "learning_rate": 1.926743640871297e-05, "loss": 1.5139, "step": 1465 }, { "epoch": 0.2529112395411024, "grad_norm": 0.6328125, "learning_rate": 1.9266413470972153e-05, "loss": 1.5096, "step": 1466 }, { "epoch": 0.25308375743983436, "grad_norm": 0.65625, "learning_rate": 1.9265389846717522e-05, "loss": 1.5319, "step": 1467 }, { "epoch": 0.2532562753385664, "grad_norm": 0.765625, "learning_rate": 1.926436553602492e-05, "loss": 1.61, "step": 1468 }, { "epoch": 0.2534287932372984, "grad_norm": 0.6171875, "learning_rate": 1.9263340538970237e-05, "loss": 1.5454, "step": 1469 }, { "epoch": 0.25360131113603035, "grad_norm": 0.77734375, "learning_rate": 1.9262314855629405e-05, "loss": 1.5231, "step": 1470 }, { "epoch": 0.25377382903476237, "grad_norm": 0.71484375, "learning_rate": 1.9261288486078414e-05, "loss": 1.477, "step": 1471 }, { "epoch": 0.2539463469334943, "grad_norm": 0.6640625, "learning_rate": 1.9260261430393306e-05, "loss": 1.4894, "step": 1472 }, { "epoch": 0.25411886483222634, "grad_norm": 0.6484375, "learning_rate": 1.9259233688650167e-05, "loss": 1.4668, "step": 1473 }, { "epoch": 0.25429138273095836, "grad_norm": 0.67578125, "learning_rate": 1.9258205260925143e-05, "loss": 1.4983, "step": 1474 }, { "epoch": 0.2544639006296903, "grad_norm": 0.79296875, "learning_rate": 1.925717614729443e-05, "loss": 1.5638, "step": 1475 }, { "epoch": 0.25463641852842234, "grad_norm": 0.78515625, "learning_rate": 1.9256146347834262e-05, "loss": 1.5986, "step": 1476 }, { "epoch": 0.2548089364271543, "grad_norm": 1.2578125, "learning_rate": 1.9255115862620938e-05, "loss": 1.4828, "step": 1477 }, { "epoch": 0.2549814543258863, "grad_norm": 0.69921875, "learning_rate": 1.9254084691730804e-05, "loss": 1.6261, "step": 1478 }, { "epoch": 0.25515397222461833, "grad_norm": 0.62890625, "learning_rate": 1.9253052835240252e-05, "loss": 1.5251, "step": 1479 }, { "epoch": 0.2553264901233503, "grad_norm": 0.83984375, "learning_rate": 1.925202029322573e-05, "loss": 1.6185, "step": 1480 }, { "epoch": 0.2554990080220823, "grad_norm": 0.81640625, "learning_rate": 1.9250987065763737e-05, "loss": 1.5007, "step": 1481 }, { "epoch": 0.25567152592081427, "grad_norm": 0.83203125, "learning_rate": 1.9249953152930818e-05, "loss": 1.6077, "step": 1482 }, { "epoch": 0.2558440438195463, "grad_norm": 1.0234375, "learning_rate": 1.9248918554803576e-05, "loss": 1.4667, "step": 1483 }, { "epoch": 0.25601656171827825, "grad_norm": 0.79296875, "learning_rate": 1.9247883271458653e-05, "loss": 1.5333, "step": 1484 }, { "epoch": 0.25618907961701026, "grad_norm": 1.40625, "learning_rate": 1.9246847302972754e-05, "loss": 1.5543, "step": 1485 }, { "epoch": 0.2563615975157423, "grad_norm": 0.77734375, "learning_rate": 1.9245810649422633e-05, "loss": 1.5163, "step": 1486 }, { "epoch": 0.25653411541447424, "grad_norm": 0.73046875, "learning_rate": 1.924477331088509e-05, "loss": 1.5476, "step": 1487 }, { "epoch": 0.25670663331320626, "grad_norm": 0.67578125, "learning_rate": 1.924373528743697e-05, "loss": 1.4741, "step": 1488 }, { "epoch": 0.2568791512119382, "grad_norm": 0.859375, "learning_rate": 1.924269657915519e-05, "loss": 1.4808, "step": 1489 }, { "epoch": 0.25705166911067023, "grad_norm": 0.78125, "learning_rate": 1.9241657186116688e-05, "loss": 1.4003, "step": 1490 }, { "epoch": 0.25722418700940225, "grad_norm": 0.61328125, "learning_rate": 1.9240617108398482e-05, "loss": 1.4317, "step": 1491 }, { "epoch": 0.2573967049081342, "grad_norm": 0.91796875, "learning_rate": 1.923957634607762e-05, "loss": 1.4747, "step": 1492 }, { "epoch": 0.2575692228068662, "grad_norm": 0.8203125, "learning_rate": 1.9238534899231216e-05, "loss": 1.5198, "step": 1493 }, { "epoch": 0.2577417407055982, "grad_norm": 0.71484375, "learning_rate": 1.923749276793642e-05, "loss": 1.4781, "step": 1494 }, { "epoch": 0.2579142586043302, "grad_norm": 0.8203125, "learning_rate": 1.9236449952270437e-05, "loss": 1.5068, "step": 1495 }, { "epoch": 0.25808677650306217, "grad_norm": 0.83984375, "learning_rate": 1.923540645231053e-05, "loss": 1.5955, "step": 1496 }, { "epoch": 0.2582592944017942, "grad_norm": 0.80859375, "learning_rate": 1.923436226813401e-05, "loss": 1.5487, "step": 1497 }, { "epoch": 0.2584318123005262, "grad_norm": 0.89453125, "learning_rate": 1.9233317399818237e-05, "loss": 1.6363, "step": 1498 }, { "epoch": 0.25860433019925816, "grad_norm": 0.6875, "learning_rate": 1.9232271847440614e-05, "loss": 1.588, "step": 1499 }, { "epoch": 0.2587768480979902, "grad_norm": 1.140625, "learning_rate": 1.923122561107861e-05, "loss": 1.5108, "step": 1500 }, { "epoch": 0.2587768480979902, "eval_loss": 1.5035676956176758, "eval_runtime": 10.9566, "eval_samples_per_second": 93.46, "eval_steps_per_second": 23.365, "step": 1500 }, { "epoch": 0.25894936599672214, "grad_norm": 1.0859375, "learning_rate": 1.923017869080973e-05, "loss": 1.5098, "step": 1501 }, { "epoch": 0.25912188389545415, "grad_norm": 0.7109375, "learning_rate": 1.9229131086711542e-05, "loss": 1.386, "step": 1502 }, { "epoch": 0.25929440179418617, "grad_norm": 0.8359375, "learning_rate": 1.9228082798861656e-05, "loss": 1.5323, "step": 1503 }, { "epoch": 0.25946691969291813, "grad_norm": 0.7734375, "learning_rate": 1.9227033827337735e-05, "loss": 1.5277, "step": 1504 }, { "epoch": 0.25963943759165015, "grad_norm": 0.76171875, "learning_rate": 1.9225984172217497e-05, "loss": 1.5729, "step": 1505 }, { "epoch": 0.2598119554903821, "grad_norm": 0.82421875, "learning_rate": 1.9224933833578706e-05, "loss": 1.5361, "step": 1506 }, { "epoch": 0.2599844733891141, "grad_norm": 0.99609375, "learning_rate": 1.9223882811499175e-05, "loss": 1.465, "step": 1507 }, { "epoch": 0.26015699128784614, "grad_norm": 0.8125, "learning_rate": 1.9222831106056768e-05, "loss": 1.4906, "step": 1508 }, { "epoch": 0.2603295091865781, "grad_norm": 0.828125, "learning_rate": 1.922177871732941e-05, "loss": 1.5643, "step": 1509 }, { "epoch": 0.2605020270853101, "grad_norm": 0.98828125, "learning_rate": 1.9220725645395066e-05, "loss": 1.596, "step": 1510 }, { "epoch": 0.2606745449840421, "grad_norm": 0.7421875, "learning_rate": 1.921967189033175e-05, "loss": 1.482, "step": 1511 }, { "epoch": 0.2608470628827741, "grad_norm": 0.5703125, "learning_rate": 1.9218617452217534e-05, "loss": 1.5303, "step": 1512 }, { "epoch": 0.26101958078150606, "grad_norm": 0.91796875, "learning_rate": 1.9217562331130536e-05, "loss": 1.4882, "step": 1513 }, { "epoch": 0.26119209868023807, "grad_norm": 0.80078125, "learning_rate": 1.9216506527148926e-05, "loss": 1.5004, "step": 1514 }, { "epoch": 0.2613646165789701, "grad_norm": 0.703125, "learning_rate": 1.9215450040350924e-05, "loss": 1.5241, "step": 1515 }, { "epoch": 0.26153713447770205, "grad_norm": 0.65625, "learning_rate": 1.9214392870814805e-05, "loss": 1.5178, "step": 1516 }, { "epoch": 0.26170965237643407, "grad_norm": 0.94140625, "learning_rate": 1.921333501861889e-05, "loss": 1.5519, "step": 1517 }, { "epoch": 0.261882170275166, "grad_norm": 0.88671875, "learning_rate": 1.9212276483841544e-05, "loss": 1.5706, "step": 1518 }, { "epoch": 0.26205468817389804, "grad_norm": 0.66015625, "learning_rate": 1.9211217266561197e-05, "loss": 1.504, "step": 1519 }, { "epoch": 0.26222720607263006, "grad_norm": 0.8125, "learning_rate": 1.921015736685632e-05, "loss": 1.5253, "step": 1520 }, { "epoch": 0.262399723971362, "grad_norm": 0.81640625, "learning_rate": 1.920909678480544e-05, "loss": 1.4976, "step": 1521 }, { "epoch": 0.26257224187009404, "grad_norm": 0.73046875, "learning_rate": 1.9208035520487125e-05, "loss": 1.5356, "step": 1522 }, { "epoch": 0.262744759768826, "grad_norm": 0.6484375, "learning_rate": 1.920697357398001e-05, "loss": 1.4926, "step": 1523 }, { "epoch": 0.262917277667558, "grad_norm": 0.734375, "learning_rate": 1.920591094536276e-05, "loss": 1.539, "step": 1524 }, { "epoch": 0.26308979556629003, "grad_norm": 0.81640625, "learning_rate": 1.9204847634714105e-05, "loss": 1.5248, "step": 1525 }, { "epoch": 0.263262313465022, "grad_norm": 0.6484375, "learning_rate": 1.9203783642112825e-05, "loss": 1.4683, "step": 1526 }, { "epoch": 0.263434831363754, "grad_norm": 0.60546875, "learning_rate": 1.9202718967637745e-05, "loss": 1.6038, "step": 1527 }, { "epoch": 0.26360734926248597, "grad_norm": 0.65234375, "learning_rate": 1.9201653611367742e-05, "loss": 1.5553, "step": 1528 }, { "epoch": 0.263779867161218, "grad_norm": 0.671875, "learning_rate": 1.9200587573381747e-05, "loss": 1.5876, "step": 1529 }, { "epoch": 0.26395238505994995, "grad_norm": 0.6171875, "learning_rate": 1.9199520853758734e-05, "loss": 1.6082, "step": 1530 }, { "epoch": 0.26412490295868196, "grad_norm": 0.67578125, "learning_rate": 1.9198453452577738e-05, "loss": 1.5431, "step": 1531 }, { "epoch": 0.264297420857414, "grad_norm": 0.671875, "learning_rate": 1.9197385369917834e-05, "loss": 1.5877, "step": 1532 }, { "epoch": 0.26446993875614594, "grad_norm": 0.61328125, "learning_rate": 1.9196316605858152e-05, "loss": 1.5066, "step": 1533 }, { "epoch": 0.26464245665487796, "grad_norm": 0.6640625, "learning_rate": 1.9195247160477874e-05, "loss": 1.496, "step": 1534 }, { "epoch": 0.2648149745536099, "grad_norm": 0.65234375, "learning_rate": 1.9194177033856233e-05, "loss": 1.4877, "step": 1535 }, { "epoch": 0.26498749245234193, "grad_norm": 0.62109375, "learning_rate": 1.919310622607251e-05, "loss": 1.532, "step": 1536 }, { "epoch": 0.26516001035107395, "grad_norm": 0.640625, "learning_rate": 1.9192034737206037e-05, "loss": 1.5576, "step": 1537 }, { "epoch": 0.2653325282498059, "grad_norm": 0.65625, "learning_rate": 1.91909625673362e-05, "loss": 1.4709, "step": 1538 }, { "epoch": 0.2655050461485379, "grad_norm": 0.6796875, "learning_rate": 1.9189889716542423e-05, "loss": 1.5417, "step": 1539 }, { "epoch": 0.2656775640472699, "grad_norm": 0.671875, "learning_rate": 1.9188816184904194e-05, "loss": 1.5503, "step": 1540 }, { "epoch": 0.2658500819460019, "grad_norm": 0.640625, "learning_rate": 1.9187741972501052e-05, "loss": 1.5246, "step": 1541 }, { "epoch": 0.26602259984473386, "grad_norm": 0.6484375, "learning_rate": 1.9186667079412575e-05, "loss": 1.4977, "step": 1542 }, { "epoch": 0.2661951177434659, "grad_norm": 0.59375, "learning_rate": 1.91855915057184e-05, "loss": 1.4235, "step": 1543 }, { "epoch": 0.2663676356421979, "grad_norm": 0.6171875, "learning_rate": 1.918451525149821e-05, "loss": 1.6124, "step": 1544 }, { "epoch": 0.26654015354092986, "grad_norm": 1.046875, "learning_rate": 1.9183438316831743e-05, "loss": 1.5644, "step": 1545 }, { "epoch": 0.2667126714396619, "grad_norm": 0.62890625, "learning_rate": 1.918236070179879e-05, "loss": 1.4849, "step": 1546 }, { "epoch": 0.26688518933839384, "grad_norm": 0.62890625, "learning_rate": 1.9181282406479175e-05, "loss": 1.4897, "step": 1547 }, { "epoch": 0.26705770723712585, "grad_norm": 0.6796875, "learning_rate": 1.9180203430952794e-05, "loss": 1.4912, "step": 1548 }, { "epoch": 0.26723022513585787, "grad_norm": 0.640625, "learning_rate": 1.9179123775299584e-05, "loss": 1.5156, "step": 1549 }, { "epoch": 0.26740274303458983, "grad_norm": 0.94921875, "learning_rate": 1.9178043439599528e-05, "loss": 1.6099, "step": 1550 }, { "epoch": 0.26757526093332185, "grad_norm": 0.58984375, "learning_rate": 1.9176962423932674e-05, "loss": 1.4946, "step": 1551 }, { "epoch": 0.2677477788320538, "grad_norm": 0.66015625, "learning_rate": 1.9175880728379094e-05, "loss": 1.5194, "step": 1552 }, { "epoch": 0.2679202967307858, "grad_norm": 0.58203125, "learning_rate": 1.917479835301894e-05, "loss": 1.5548, "step": 1553 }, { "epoch": 0.26809281462951784, "grad_norm": 0.61328125, "learning_rate": 1.91737152979324e-05, "loss": 1.4607, "step": 1554 }, { "epoch": 0.2682653325282498, "grad_norm": 0.671875, "learning_rate": 1.917263156319971e-05, "loss": 1.5371, "step": 1555 }, { "epoch": 0.2684378504269818, "grad_norm": 0.7421875, "learning_rate": 1.9171547148901158e-05, "loss": 1.5787, "step": 1556 }, { "epoch": 0.2686103683257138, "grad_norm": 0.734375, "learning_rate": 1.9170462055117086e-05, "loss": 1.4766, "step": 1557 }, { "epoch": 0.2687828862244458, "grad_norm": 0.703125, "learning_rate": 1.916937628192789e-05, "loss": 1.4528, "step": 1558 }, { "epoch": 0.26895540412317775, "grad_norm": 0.7265625, "learning_rate": 1.9168289829414002e-05, "loss": 1.5461, "step": 1559 }, { "epoch": 0.26912792202190977, "grad_norm": 0.64453125, "learning_rate": 1.916720269765592e-05, "loss": 1.4871, "step": 1560 }, { "epoch": 0.2693004399206418, "grad_norm": 0.6796875, "learning_rate": 1.916611488673418e-05, "loss": 1.3621, "step": 1561 }, { "epoch": 0.26947295781937375, "grad_norm": 0.66015625, "learning_rate": 1.9165026396729377e-05, "loss": 1.5283, "step": 1562 }, { "epoch": 0.26964547571810576, "grad_norm": 0.609375, "learning_rate": 1.9163937227722154e-05, "loss": 1.4035, "step": 1563 }, { "epoch": 0.2698179936168377, "grad_norm": 0.609375, "learning_rate": 1.9162847379793203e-05, "loss": 1.5184, "step": 1564 }, { "epoch": 0.26999051151556974, "grad_norm": 0.62890625, "learning_rate": 1.9161756853023266e-05, "loss": 1.4852, "step": 1565 }, { "epoch": 0.27016302941430176, "grad_norm": 0.6015625, "learning_rate": 1.9160665647493136e-05, "loss": 1.542, "step": 1566 }, { "epoch": 0.2703355473130337, "grad_norm": 0.6484375, "learning_rate": 1.9159573763283654e-05, "loss": 1.4912, "step": 1567 }, { "epoch": 0.27050806521176574, "grad_norm": 0.61328125, "learning_rate": 1.9158481200475715e-05, "loss": 1.5102, "step": 1568 }, { "epoch": 0.2706805831104977, "grad_norm": 0.6328125, "learning_rate": 1.9157387959150265e-05, "loss": 1.4076, "step": 1569 }, { "epoch": 0.2708531010092297, "grad_norm": 0.65234375, "learning_rate": 1.91562940393883e-05, "loss": 1.4054, "step": 1570 }, { "epoch": 0.2710256189079617, "grad_norm": 0.64453125, "learning_rate": 1.9155199441270863e-05, "loss": 1.5908, "step": 1571 }, { "epoch": 0.2711981368066937, "grad_norm": 0.6640625, "learning_rate": 1.915410416487904e-05, "loss": 1.518, "step": 1572 }, { "epoch": 0.2713706547054257, "grad_norm": 0.625, "learning_rate": 1.9153008210293988e-05, "loss": 1.4726, "step": 1573 }, { "epoch": 0.27154317260415767, "grad_norm": 1.09375, "learning_rate": 1.9151911577596895e-05, "loss": 1.5939, "step": 1574 }, { "epoch": 0.2717156905028897, "grad_norm": 0.625, "learning_rate": 1.915081426686901e-05, "loss": 1.5122, "step": 1575 }, { "epoch": 0.27188820840162164, "grad_norm": 0.76171875, "learning_rate": 1.9149716278191625e-05, "loss": 1.4648, "step": 1576 }, { "epoch": 0.27206072630035366, "grad_norm": 0.6171875, "learning_rate": 1.914861761164609e-05, "loss": 1.4813, "step": 1577 }, { "epoch": 0.2722332441990857, "grad_norm": 0.69921875, "learning_rate": 1.91475182673138e-05, "loss": 1.5682, "step": 1578 }, { "epoch": 0.27240576209781764, "grad_norm": 0.65625, "learning_rate": 1.91464182452762e-05, "loss": 1.6082, "step": 1579 }, { "epoch": 0.27257827999654966, "grad_norm": 0.9140625, "learning_rate": 1.9145317545614787e-05, "loss": 1.6143, "step": 1580 }, { "epoch": 0.2727507978952816, "grad_norm": 0.8828125, "learning_rate": 1.9144216168411105e-05, "loss": 1.5168, "step": 1581 }, { "epoch": 0.27292331579401363, "grad_norm": 0.77734375, "learning_rate": 1.9143114113746755e-05, "loss": 1.4507, "step": 1582 }, { "epoch": 0.27309583369274565, "grad_norm": 0.6328125, "learning_rate": 1.9142011381703384e-05, "loss": 1.738, "step": 1583 }, { "epoch": 0.2732683515914776, "grad_norm": 0.859375, "learning_rate": 1.9140907972362684e-05, "loss": 1.5551, "step": 1584 }, { "epoch": 0.2734408694902096, "grad_norm": 0.65625, "learning_rate": 1.9139803885806413e-05, "loss": 1.4516, "step": 1585 }, { "epoch": 0.2736133873889416, "grad_norm": 0.7265625, "learning_rate": 1.9138699122116355e-05, "loss": 1.5982, "step": 1586 }, { "epoch": 0.2737859052876736, "grad_norm": 0.7421875, "learning_rate": 1.913759368137437e-05, "loss": 1.5159, "step": 1587 }, { "epoch": 0.27395842318640556, "grad_norm": 0.58203125, "learning_rate": 1.913648756366235e-05, "loss": 1.5375, "step": 1588 }, { "epoch": 0.2741309410851376, "grad_norm": 0.578125, "learning_rate": 1.913538076906224e-05, "loss": 1.5159, "step": 1589 }, { "epoch": 0.2743034589838696, "grad_norm": 0.76171875, "learning_rate": 1.913427329765604e-05, "loss": 1.5594, "step": 1590 }, { "epoch": 0.27447597688260156, "grad_norm": 0.64453125, "learning_rate": 1.913316514952581e-05, "loss": 1.5788, "step": 1591 }, { "epoch": 0.2746484947813336, "grad_norm": 0.65625, "learning_rate": 1.9132056324753634e-05, "loss": 1.4806, "step": 1592 }, { "epoch": 0.27482101268006554, "grad_norm": 0.59765625, "learning_rate": 1.9130946823421666e-05, "loss": 1.6308, "step": 1593 }, { "epoch": 0.27499353057879755, "grad_norm": 0.78125, "learning_rate": 1.9129836645612107e-05, "loss": 1.5213, "step": 1594 }, { "epoch": 0.27516604847752957, "grad_norm": 0.609375, "learning_rate": 1.91287257914072e-05, "loss": 1.3756, "step": 1595 }, { "epoch": 0.27533856637626153, "grad_norm": 0.62890625, "learning_rate": 1.912761426088925e-05, "loss": 1.591, "step": 1596 }, { "epoch": 0.27551108427499355, "grad_norm": 0.8046875, "learning_rate": 1.91265020541406e-05, "loss": 1.4783, "step": 1597 }, { "epoch": 0.2756836021737255, "grad_norm": 0.6015625, "learning_rate": 1.9125389171243656e-05, "loss": 1.4279, "step": 1598 }, { "epoch": 0.2758561200724575, "grad_norm": 0.74609375, "learning_rate": 1.912427561228086e-05, "loss": 1.487, "step": 1599 }, { "epoch": 0.2760286379711895, "grad_norm": 0.87890625, "learning_rate": 1.912316137733472e-05, "loss": 1.4767, "step": 1600 }, { "epoch": 0.2760286379711895, "eval_loss": 1.498151421546936, "eval_runtime": 10.8117, "eval_samples_per_second": 94.712, "eval_steps_per_second": 23.678, "step": 1600 }, { "epoch": 0.2762011558699215, "grad_norm": 0.72265625, "learning_rate": 1.9122046466487776e-05, "loss": 1.4793, "step": 1601 }, { "epoch": 0.2763736737686535, "grad_norm": 1.046875, "learning_rate": 1.912093087982264e-05, "loss": 1.6061, "step": 1602 }, { "epoch": 0.2765461916673855, "grad_norm": 0.94140625, "learning_rate": 1.911981461742195e-05, "loss": 1.4781, "step": 1603 }, { "epoch": 0.2767187095661175, "grad_norm": 0.6796875, "learning_rate": 1.9118697679368412e-05, "loss": 1.5748, "step": 1604 }, { "epoch": 0.27689122746484945, "grad_norm": 1.046875, "learning_rate": 1.911758006574477e-05, "loss": 1.5885, "step": 1605 }, { "epoch": 0.27706374536358147, "grad_norm": 1.078125, "learning_rate": 1.911646177663383e-05, "loss": 1.533, "step": 1606 }, { "epoch": 0.2772362632623135, "grad_norm": 0.60546875, "learning_rate": 1.9115342812118437e-05, "loss": 1.4983, "step": 1607 }, { "epoch": 0.27740878116104545, "grad_norm": 0.7265625, "learning_rate": 1.9114223172281498e-05, "loss": 1.4582, "step": 1608 }, { "epoch": 0.27758129905977746, "grad_norm": 0.71875, "learning_rate": 1.9113102857205952e-05, "loss": 1.4498, "step": 1609 }, { "epoch": 0.2777538169585094, "grad_norm": 0.61328125, "learning_rate": 1.911198186697481e-05, "loss": 1.575, "step": 1610 }, { "epoch": 0.27792633485724144, "grad_norm": 0.66015625, "learning_rate": 1.9110860201671112e-05, "loss": 1.5972, "step": 1611 }, { "epoch": 0.27809885275597346, "grad_norm": 0.7265625, "learning_rate": 1.9109737861377967e-05, "loss": 1.5344, "step": 1612 }, { "epoch": 0.2782713706547054, "grad_norm": 0.66796875, "learning_rate": 1.910861484617852e-05, "loss": 1.4919, "step": 1613 }, { "epoch": 0.27844388855343744, "grad_norm": 0.58984375, "learning_rate": 1.9107491156155974e-05, "loss": 1.4568, "step": 1614 }, { "epoch": 0.2786164064521694, "grad_norm": 0.96484375, "learning_rate": 1.9106366791393573e-05, "loss": 1.5189, "step": 1615 }, { "epoch": 0.2787889243509014, "grad_norm": 0.66015625, "learning_rate": 1.9105241751974624e-05, "loss": 1.4218, "step": 1616 }, { "epoch": 0.2789614422496334, "grad_norm": 0.65625, "learning_rate": 1.910411603798247e-05, "loss": 1.5128, "step": 1617 }, { "epoch": 0.2791339601483654, "grad_norm": 0.6640625, "learning_rate": 1.910298964950052e-05, "loss": 1.5849, "step": 1618 }, { "epoch": 0.2793064780470974, "grad_norm": 0.66015625, "learning_rate": 1.9101862586612214e-05, "loss": 1.4772, "step": 1619 }, { "epoch": 0.27947899594582937, "grad_norm": 0.5703125, "learning_rate": 1.9100734849401063e-05, "loss": 1.5158, "step": 1620 }, { "epoch": 0.2796515138445614, "grad_norm": 0.83984375, "learning_rate": 1.9099606437950605e-05, "loss": 1.5205, "step": 1621 }, { "epoch": 0.27982403174329334, "grad_norm": 0.71484375, "learning_rate": 1.9098477352344443e-05, "loss": 1.4296, "step": 1622 }, { "epoch": 0.27999654964202536, "grad_norm": 0.64453125, "learning_rate": 1.9097347592666232e-05, "loss": 1.5096, "step": 1623 }, { "epoch": 0.2801690675407574, "grad_norm": 0.71875, "learning_rate": 1.9096217158999667e-05, "loss": 1.4957, "step": 1624 }, { "epoch": 0.28034158543948934, "grad_norm": 0.640625, "learning_rate": 1.90950860514285e-05, "loss": 1.4951, "step": 1625 }, { "epoch": 0.28051410333822135, "grad_norm": 0.703125, "learning_rate": 1.909395427003653e-05, "loss": 1.5029, "step": 1626 }, { "epoch": 0.2806866212369533, "grad_norm": 0.73046875, "learning_rate": 1.909282181490761e-05, "loss": 1.512, "step": 1627 }, { "epoch": 0.28085913913568533, "grad_norm": 0.625, "learning_rate": 1.9091688686125628e-05, "loss": 1.5529, "step": 1628 }, { "epoch": 0.28103165703441735, "grad_norm": 0.6171875, "learning_rate": 1.9090554883774547e-05, "loss": 1.5084, "step": 1629 }, { "epoch": 0.2812041749331493, "grad_norm": 0.58984375, "learning_rate": 1.9089420407938354e-05, "loss": 1.4911, "step": 1630 }, { "epoch": 0.2813766928318813, "grad_norm": 0.65625, "learning_rate": 1.9088285258701108e-05, "loss": 1.5517, "step": 1631 }, { "epoch": 0.2815492107306133, "grad_norm": 0.6953125, "learning_rate": 1.90871494361469e-05, "loss": 1.3719, "step": 1632 }, { "epoch": 0.2817217286293453, "grad_norm": 0.57421875, "learning_rate": 1.9086012940359887e-05, "loss": 1.4742, "step": 1633 }, { "epoch": 0.28189424652807726, "grad_norm": 0.66015625, "learning_rate": 1.908487577142426e-05, "loss": 1.4639, "step": 1634 }, { "epoch": 0.2820667644268093, "grad_norm": 0.6796875, "learning_rate": 1.9083737929424272e-05, "loss": 1.5469, "step": 1635 }, { "epoch": 0.2822392823255413, "grad_norm": 0.59765625, "learning_rate": 1.9082599414444222e-05, "loss": 1.5729, "step": 1636 }, { "epoch": 0.28241180022427326, "grad_norm": 0.625, "learning_rate": 1.9081460226568456e-05, "loss": 1.5184, "step": 1637 }, { "epoch": 0.2825843181230053, "grad_norm": 1.0234375, "learning_rate": 1.908032036588137e-05, "loss": 1.5415, "step": 1638 }, { "epoch": 0.28275683602173723, "grad_norm": 0.71484375, "learning_rate": 1.9079179832467417e-05, "loss": 1.5335, "step": 1639 }, { "epoch": 0.28292935392046925, "grad_norm": 0.609375, "learning_rate": 1.9078038626411093e-05, "loss": 1.5226, "step": 1640 }, { "epoch": 0.28310187181920127, "grad_norm": 0.5703125, "learning_rate": 1.9076896747796945e-05, "loss": 1.4499, "step": 1641 }, { "epoch": 0.28327438971793323, "grad_norm": 0.60546875, "learning_rate": 1.9075754196709574e-05, "loss": 1.5911, "step": 1642 }, { "epoch": 0.28344690761666524, "grad_norm": 0.640625, "learning_rate": 1.9074610973233622e-05, "loss": 1.5831, "step": 1643 }, { "epoch": 0.2836194255153972, "grad_norm": 0.62109375, "learning_rate": 1.9073467077453783e-05, "loss": 1.4548, "step": 1644 }, { "epoch": 0.2837919434141292, "grad_norm": 0.60546875, "learning_rate": 1.9072322509454814e-05, "loss": 1.5288, "step": 1645 }, { "epoch": 0.2839644613128612, "grad_norm": 0.671875, "learning_rate": 1.9071177269321507e-05, "loss": 1.4839, "step": 1646 }, { "epoch": 0.2841369792115932, "grad_norm": 0.6640625, "learning_rate": 1.907003135713871e-05, "loss": 1.5464, "step": 1647 }, { "epoch": 0.2843094971103252, "grad_norm": 0.60546875, "learning_rate": 1.9068884772991313e-05, "loss": 1.6519, "step": 1648 }, { "epoch": 0.2844820150090572, "grad_norm": 1.890625, "learning_rate": 1.9067737516964274e-05, "loss": 1.4762, "step": 1649 }, { "epoch": 0.2846545329077892, "grad_norm": 0.6328125, "learning_rate": 1.9066589589142577e-05, "loss": 1.5852, "step": 1650 }, { "epoch": 0.28482705080652115, "grad_norm": 0.64453125, "learning_rate": 1.9065440989611274e-05, "loss": 1.6728, "step": 1651 }, { "epoch": 0.28499956870525317, "grad_norm": 0.6015625, "learning_rate": 1.9064291718455455e-05, "loss": 1.4933, "step": 1652 }, { "epoch": 0.2851720866039852, "grad_norm": 0.72265625, "learning_rate": 1.9063141775760274e-05, "loss": 1.5006, "step": 1653 }, { "epoch": 0.28534460450271715, "grad_norm": 0.6171875, "learning_rate": 1.906199116161092e-05, "loss": 1.4257, "step": 1654 }, { "epoch": 0.28551712240144916, "grad_norm": 0.828125, "learning_rate": 1.906083987609264e-05, "loss": 1.5513, "step": 1655 }, { "epoch": 0.2856896403001811, "grad_norm": 0.63671875, "learning_rate": 1.9059687919290727e-05, "loss": 1.5997, "step": 1656 }, { "epoch": 0.28586215819891314, "grad_norm": 0.9375, "learning_rate": 1.9058535291290524e-05, "loss": 1.6176, "step": 1657 }, { "epoch": 0.28603467609764516, "grad_norm": 0.65234375, "learning_rate": 1.9057381992177426e-05, "loss": 1.5312, "step": 1658 }, { "epoch": 0.2862071939963771, "grad_norm": 0.6875, "learning_rate": 1.905622802203688e-05, "loss": 1.6063, "step": 1659 }, { "epoch": 0.28637971189510913, "grad_norm": 0.76953125, "learning_rate": 1.9055073380954372e-05, "loss": 1.5179, "step": 1660 }, { "epoch": 0.2865522297938411, "grad_norm": 0.67578125, "learning_rate": 1.905391806901545e-05, "loss": 1.4903, "step": 1661 }, { "epoch": 0.2867247476925731, "grad_norm": 0.76953125, "learning_rate": 1.9052762086305706e-05, "loss": 1.583, "step": 1662 }, { "epoch": 0.2868972655913051, "grad_norm": 0.70703125, "learning_rate": 1.9051605432910783e-05, "loss": 1.5453, "step": 1663 }, { "epoch": 0.2870697834900371, "grad_norm": 0.75390625, "learning_rate": 1.9050448108916373e-05, "loss": 1.474, "step": 1664 }, { "epoch": 0.2872423013887691, "grad_norm": 0.6484375, "learning_rate": 1.904929011440822e-05, "loss": 1.4971, "step": 1665 }, { "epoch": 0.28741481928750107, "grad_norm": 0.65234375, "learning_rate": 1.9048131449472107e-05, "loss": 1.5126, "step": 1666 }, { "epoch": 0.2875873371862331, "grad_norm": 0.61328125, "learning_rate": 1.9046972114193884e-05, "loss": 1.4895, "step": 1667 }, { "epoch": 0.28775985508496504, "grad_norm": 1.2890625, "learning_rate": 1.904581210865944e-05, "loss": 1.4972, "step": 1668 }, { "epoch": 0.28793237298369706, "grad_norm": 0.76953125, "learning_rate": 1.904465143295471e-05, "loss": 1.5122, "step": 1669 }, { "epoch": 0.2881048908824291, "grad_norm": 0.71875, "learning_rate": 1.904349008716569e-05, "loss": 1.4859, "step": 1670 }, { "epoch": 0.28827740878116104, "grad_norm": 0.9296875, "learning_rate": 1.904232807137842e-05, "loss": 1.5311, "step": 1671 }, { "epoch": 0.28844992667989305, "grad_norm": 0.640625, "learning_rate": 1.9041165385678984e-05, "loss": 1.4518, "step": 1672 }, { "epoch": 0.288622444578625, "grad_norm": 0.72265625, "learning_rate": 1.9040002030153532e-05, "loss": 1.5663, "step": 1673 }, { "epoch": 0.28879496247735703, "grad_norm": 0.82421875, "learning_rate": 1.903883800488824e-05, "loss": 1.4741, "step": 1674 }, { "epoch": 0.288967480376089, "grad_norm": 0.62109375, "learning_rate": 1.9037673309969347e-05, "loss": 1.5701, "step": 1675 }, { "epoch": 0.289139998274821, "grad_norm": 0.71875, "learning_rate": 1.9036507945483152e-05, "loss": 1.5381, "step": 1676 }, { "epoch": 0.289312516173553, "grad_norm": 0.57421875, "learning_rate": 1.9035341911515983e-05, "loss": 1.4083, "step": 1677 }, { "epoch": 0.289485034072285, "grad_norm": 0.58203125, "learning_rate": 1.9034175208154227e-05, "loss": 1.5595, "step": 1678 }, { "epoch": 0.289657551971017, "grad_norm": 0.5859375, "learning_rate": 1.903300783548433e-05, "loss": 1.5761, "step": 1679 }, { "epoch": 0.28983006986974896, "grad_norm": 0.578125, "learning_rate": 1.9031839793592764e-05, "loss": 1.5564, "step": 1680 }, { "epoch": 0.290002587768481, "grad_norm": 0.63671875, "learning_rate": 1.9030671082566076e-05, "loss": 1.4697, "step": 1681 }, { "epoch": 0.290175105667213, "grad_norm": 0.6953125, "learning_rate": 1.9029501702490848e-05, "loss": 1.4602, "step": 1682 }, { "epoch": 0.29034762356594496, "grad_norm": 0.67578125, "learning_rate": 1.9028331653453715e-05, "loss": 1.5515, "step": 1683 }, { "epoch": 0.290520141464677, "grad_norm": 0.67578125, "learning_rate": 1.9027160935541365e-05, "loss": 1.3977, "step": 1684 }, { "epoch": 0.29069265936340893, "grad_norm": 0.73828125, "learning_rate": 1.902598954884052e-05, "loss": 1.5655, "step": 1685 }, { "epoch": 0.29086517726214095, "grad_norm": 0.5703125, "learning_rate": 1.902481749343798e-05, "loss": 1.5035, "step": 1686 }, { "epoch": 0.29103769516087297, "grad_norm": 0.99609375, "learning_rate": 1.9023644769420567e-05, "loss": 1.5112, "step": 1687 }, { "epoch": 0.2912102130596049, "grad_norm": 1.5234375, "learning_rate": 1.902247137687517e-05, "loss": 1.5383, "step": 1688 }, { "epoch": 0.29138273095833694, "grad_norm": 0.69921875, "learning_rate": 1.9021297315888715e-05, "loss": 1.5061, "step": 1689 }, { "epoch": 0.2915552488570689, "grad_norm": 0.8125, "learning_rate": 1.902012258654819e-05, "loss": 1.5495, "step": 1690 }, { "epoch": 0.2917277667558009, "grad_norm": 0.6328125, "learning_rate": 1.9018947188940623e-05, "loss": 1.5872, "step": 1691 }, { "epoch": 0.2919002846545329, "grad_norm": 0.90625, "learning_rate": 1.901777112315309e-05, "loss": 1.4907, "step": 1692 }, { "epoch": 0.2920728025532649, "grad_norm": 0.78515625, "learning_rate": 1.9016594389272734e-05, "loss": 1.5275, "step": 1693 }, { "epoch": 0.2922453204519969, "grad_norm": 0.75, "learning_rate": 1.9015416987386725e-05, "loss": 1.5049, "step": 1694 }, { "epoch": 0.2924178383507289, "grad_norm": 0.8203125, "learning_rate": 1.9014238917582297e-05, "loss": 1.6413, "step": 1695 }, { "epoch": 0.2925903562494609, "grad_norm": 0.72265625, "learning_rate": 1.9013060179946722e-05, "loss": 1.4572, "step": 1696 }, { "epoch": 0.29276287414819285, "grad_norm": 0.83203125, "learning_rate": 1.901188077456733e-05, "loss": 1.4106, "step": 1697 }, { "epoch": 0.29293539204692487, "grad_norm": 0.6875, "learning_rate": 1.901070070153151e-05, "loss": 1.5794, "step": 1698 }, { "epoch": 0.2931079099456569, "grad_norm": 0.671875, "learning_rate": 1.900951996092667e-05, "loss": 1.5461, "step": 1699 }, { "epoch": 0.29328042784438885, "grad_norm": 0.64453125, "learning_rate": 1.9008338552840308e-05, "loss": 1.4498, "step": 1700 }, { "epoch": 0.29328042784438885, "eval_loss": 1.4925167560577393, "eval_runtime": 11.161, "eval_samples_per_second": 91.748, "eval_steps_per_second": 22.937, "step": 1700 }, { "epoch": 0.29345294574312086, "grad_norm": 0.87109375, "learning_rate": 1.9007156477359935e-05, "loss": 1.5066, "step": 1701 }, { "epoch": 0.2936254636418528, "grad_norm": 0.76171875, "learning_rate": 1.900597373457313e-05, "loss": 1.4833, "step": 1702 }, { "epoch": 0.29379798154058484, "grad_norm": 0.875, "learning_rate": 1.900479032456752e-05, "loss": 1.4667, "step": 1703 }, { "epoch": 0.2939704994393168, "grad_norm": 1.625, "learning_rate": 1.9003606247430774e-05, "loss": 1.5012, "step": 1704 }, { "epoch": 0.2941430173380488, "grad_norm": 0.75, "learning_rate": 1.9002421503250626e-05, "loss": 1.5049, "step": 1705 }, { "epoch": 0.29431553523678083, "grad_norm": 0.68359375, "learning_rate": 1.900123609211484e-05, "loss": 1.5594, "step": 1706 }, { "epoch": 0.2944880531355128, "grad_norm": 0.671875, "learning_rate": 1.9000050014111245e-05, "loss": 1.4642, "step": 1707 }, { "epoch": 0.2946605710342448, "grad_norm": 0.8125, "learning_rate": 1.8998863269327706e-05, "loss": 1.5367, "step": 1708 }, { "epoch": 0.2948330889329768, "grad_norm": 0.6640625, "learning_rate": 1.8997675857852148e-05, "loss": 1.3788, "step": 1709 }, { "epoch": 0.2950056068317088, "grad_norm": 0.65625, "learning_rate": 1.899648777977255e-05, "loss": 1.5682, "step": 1710 }, { "epoch": 0.2951781247304408, "grad_norm": 0.66796875, "learning_rate": 1.8995299035176914e-05, "loss": 1.4895, "step": 1711 }, { "epoch": 0.29535064262917277, "grad_norm": 0.66796875, "learning_rate": 1.8994109624153327e-05, "loss": 1.3757, "step": 1712 }, { "epoch": 0.2955231605279048, "grad_norm": 0.69921875, "learning_rate": 1.89929195467899e-05, "loss": 1.4907, "step": 1713 }, { "epoch": 0.29569567842663674, "grad_norm": 0.875, "learning_rate": 1.8991728803174804e-05, "loss": 1.3716, "step": 1714 }, { "epoch": 0.29586819632536876, "grad_norm": 0.66015625, "learning_rate": 1.899053739339625e-05, "loss": 1.5605, "step": 1715 }, { "epoch": 0.2960407142241008, "grad_norm": 0.83203125, "learning_rate": 1.8989345317542516e-05, "loss": 1.5747, "step": 1716 }, { "epoch": 0.29621323212283274, "grad_norm": 0.95703125, "learning_rate": 1.8988152575701912e-05, "loss": 1.5484, "step": 1717 }, { "epoch": 0.29638575002156475, "grad_norm": 0.71484375, "learning_rate": 1.8986959167962806e-05, "loss": 1.4819, "step": 1718 }, { "epoch": 0.2965582679202967, "grad_norm": 0.70703125, "learning_rate": 1.8985765094413607e-05, "loss": 1.526, "step": 1719 }, { "epoch": 0.29673078581902873, "grad_norm": 0.7265625, "learning_rate": 1.8984570355142787e-05, "loss": 1.5645, "step": 1720 }, { "epoch": 0.2969033037177607, "grad_norm": 0.6171875, "learning_rate": 1.8983374950238854e-05, "loss": 1.5015, "step": 1721 }, { "epoch": 0.2970758216164927, "grad_norm": 0.6015625, "learning_rate": 1.8982178879790377e-05, "loss": 1.5595, "step": 1722 }, { "epoch": 0.2972483395152247, "grad_norm": 0.62890625, "learning_rate": 1.8980982143885965e-05, "loss": 1.5058, "step": 1723 }, { "epoch": 0.2974208574139567, "grad_norm": 0.87109375, "learning_rate": 1.8979784742614283e-05, "loss": 1.5412, "step": 1724 }, { "epoch": 0.2975933753126887, "grad_norm": 0.64453125, "learning_rate": 1.8978586676064036e-05, "loss": 1.4126, "step": 1725 }, { "epoch": 0.29776589321142066, "grad_norm": 0.828125, "learning_rate": 1.8977387944323985e-05, "loss": 1.5317, "step": 1726 }, { "epoch": 0.2979384111101527, "grad_norm": 0.62109375, "learning_rate": 1.8976188547482944e-05, "loss": 1.5283, "step": 1727 }, { "epoch": 0.2981109290088847, "grad_norm": 0.6484375, "learning_rate": 1.897498848562977e-05, "loss": 1.5747, "step": 1728 }, { "epoch": 0.29828344690761666, "grad_norm": 0.6953125, "learning_rate": 1.8973787758853367e-05, "loss": 1.4914, "step": 1729 }, { "epoch": 0.2984559648063487, "grad_norm": 0.59375, "learning_rate": 1.89725863672427e-05, "loss": 1.5348, "step": 1730 }, { "epoch": 0.29862848270508063, "grad_norm": 0.63671875, "learning_rate": 1.8971384310886773e-05, "loss": 1.468, "step": 1731 }, { "epoch": 0.29880100060381265, "grad_norm": 0.625, "learning_rate": 1.8970181589874637e-05, "loss": 1.4735, "step": 1732 }, { "epoch": 0.29897351850254467, "grad_norm": 0.59765625, "learning_rate": 1.89689782042954e-05, "loss": 1.46, "step": 1733 }, { "epoch": 0.2991460364012766, "grad_norm": 0.671875, "learning_rate": 1.896777415423822e-05, "loss": 1.5731, "step": 1734 }, { "epoch": 0.29931855430000864, "grad_norm": 0.57421875, "learning_rate": 1.8966569439792294e-05, "loss": 1.4503, "step": 1735 }, { "epoch": 0.2994910721987406, "grad_norm": 0.76171875, "learning_rate": 1.896536406104688e-05, "loss": 1.5463, "step": 1736 }, { "epoch": 0.2996635900974726, "grad_norm": 0.65234375, "learning_rate": 1.8964158018091278e-05, "loss": 1.5302, "step": 1737 }, { "epoch": 0.2998361079962046, "grad_norm": 0.640625, "learning_rate": 1.8962951311014842e-05, "loss": 1.4495, "step": 1738 }, { "epoch": 0.3000086258949366, "grad_norm": 0.7265625, "learning_rate": 1.896174393990697e-05, "loss": 1.5159, "step": 1739 }, { "epoch": 0.3001811437936686, "grad_norm": 0.60546875, "learning_rate": 1.8960535904857103e-05, "loss": 1.4311, "step": 1740 }, { "epoch": 0.3003536616924006, "grad_norm": 0.69140625, "learning_rate": 1.8959327205954757e-05, "loss": 1.5733, "step": 1741 }, { "epoch": 0.3005261795911326, "grad_norm": 1.0078125, "learning_rate": 1.895811784328947e-05, "loss": 1.4511, "step": 1742 }, { "epoch": 0.30069869748986455, "grad_norm": 0.74609375, "learning_rate": 1.8956907816950837e-05, "loss": 1.4835, "step": 1743 }, { "epoch": 0.30087121538859657, "grad_norm": 0.640625, "learning_rate": 1.895569712702851e-05, "loss": 1.4882, "step": 1744 }, { "epoch": 0.3010437332873286, "grad_norm": 0.578125, "learning_rate": 1.895448577361218e-05, "loss": 1.5585, "step": 1745 }, { "epoch": 0.30121625118606055, "grad_norm": 0.6328125, "learning_rate": 1.8953273756791595e-05, "loss": 1.4595, "step": 1746 }, { "epoch": 0.30138876908479256, "grad_norm": 0.66015625, "learning_rate": 1.8952061076656547e-05, "loss": 1.56, "step": 1747 }, { "epoch": 0.3015612869835245, "grad_norm": 0.6953125, "learning_rate": 1.8950847733296877e-05, "loss": 1.4842, "step": 1748 }, { "epoch": 0.30173380488225654, "grad_norm": 0.82421875, "learning_rate": 1.8949633726802484e-05, "loss": 1.5915, "step": 1749 }, { "epoch": 0.3019063227809885, "grad_norm": 0.89453125, "learning_rate": 1.89484190572633e-05, "loss": 1.5447, "step": 1750 }, { "epoch": 0.3020788406797205, "grad_norm": 0.9140625, "learning_rate": 1.8947203724769324e-05, "loss": 1.5063, "step": 1751 }, { "epoch": 0.30225135857845253, "grad_norm": 0.68359375, "learning_rate": 1.894598772941059e-05, "loss": 1.4247, "step": 1752 }, { "epoch": 0.3024238764771845, "grad_norm": 0.83203125, "learning_rate": 1.8944771071277188e-05, "loss": 1.5098, "step": 1753 }, { "epoch": 0.3025963943759165, "grad_norm": 0.70703125, "learning_rate": 1.8943553750459256e-05, "loss": 1.6315, "step": 1754 }, { "epoch": 0.30276891227464847, "grad_norm": 0.75390625, "learning_rate": 1.8942335767046978e-05, "loss": 1.5184, "step": 1755 }, { "epoch": 0.3029414301733805, "grad_norm": 0.98828125, "learning_rate": 1.8941117121130594e-05, "loss": 1.429, "step": 1756 }, { "epoch": 0.3031139480721125, "grad_norm": 0.62890625, "learning_rate": 1.8939897812800385e-05, "loss": 1.5151, "step": 1757 }, { "epoch": 0.30328646597084447, "grad_norm": 0.65234375, "learning_rate": 1.893867784214669e-05, "loss": 1.491, "step": 1758 }, { "epoch": 0.3034589838695765, "grad_norm": 0.6484375, "learning_rate": 1.8937457209259888e-05, "loss": 1.4795, "step": 1759 }, { "epoch": 0.30363150176830844, "grad_norm": 0.68359375, "learning_rate": 1.893623591423041e-05, "loss": 1.5306, "step": 1760 }, { "epoch": 0.30380401966704046, "grad_norm": 0.69140625, "learning_rate": 1.893501395714874e-05, "loss": 1.4723, "step": 1761 }, { "epoch": 0.3039765375657725, "grad_norm": 0.6875, "learning_rate": 1.893379133810541e-05, "loss": 1.4674, "step": 1762 }, { "epoch": 0.30414905546450444, "grad_norm": 0.56640625, "learning_rate": 1.8932568057190995e-05, "loss": 1.4916, "step": 1763 }, { "epoch": 0.30432157336323645, "grad_norm": 0.60546875, "learning_rate": 1.8931344114496127e-05, "loss": 1.4793, "step": 1764 }, { "epoch": 0.3044940912619684, "grad_norm": 0.83203125, "learning_rate": 1.8930119510111476e-05, "loss": 1.5519, "step": 1765 }, { "epoch": 0.30466660916070043, "grad_norm": 0.83984375, "learning_rate": 1.8928894244127782e-05, "loss": 1.5546, "step": 1766 }, { "epoch": 0.3048391270594324, "grad_norm": 0.80859375, "learning_rate": 1.8927668316635804e-05, "loss": 1.6417, "step": 1767 }, { "epoch": 0.3050116449581644, "grad_norm": 0.87109375, "learning_rate": 1.892644172772638e-05, "loss": 1.5674, "step": 1768 }, { "epoch": 0.3051841628568964, "grad_norm": 0.77734375, "learning_rate": 1.8925214477490373e-05, "loss": 1.5066, "step": 1769 }, { "epoch": 0.3053566807556284, "grad_norm": 0.80078125, "learning_rate": 1.8923986566018717e-05, "loss": 1.571, "step": 1770 }, { "epoch": 0.3055291986543604, "grad_norm": 0.75, "learning_rate": 1.892275799340237e-05, "loss": 1.5209, "step": 1771 }, { "epoch": 0.30570171655309236, "grad_norm": 0.66015625, "learning_rate": 1.8921528759732363e-05, "loss": 1.388, "step": 1772 }, { "epoch": 0.3058742344518244, "grad_norm": 0.77734375, "learning_rate": 1.892029886509976e-05, "loss": 1.5195, "step": 1773 }, { "epoch": 0.3060467523505564, "grad_norm": 0.7109375, "learning_rate": 1.891906830959568e-05, "loss": 1.5106, "step": 1774 }, { "epoch": 0.30621927024928836, "grad_norm": 0.59765625, "learning_rate": 1.891783709331129e-05, "loss": 1.3891, "step": 1775 }, { "epoch": 0.30639178814802037, "grad_norm": 0.66015625, "learning_rate": 1.8916605216337807e-05, "loss": 1.5229, "step": 1776 }, { "epoch": 0.30656430604675233, "grad_norm": 0.78515625, "learning_rate": 1.8915372678766497e-05, "loss": 1.4953, "step": 1777 }, { "epoch": 0.30673682394548435, "grad_norm": 0.6015625, "learning_rate": 1.8914139480688672e-05, "loss": 1.5302, "step": 1778 }, { "epoch": 0.3069093418442163, "grad_norm": 0.59765625, "learning_rate": 1.89129056221957e-05, "loss": 1.4382, "step": 1779 }, { "epoch": 0.3070818597429483, "grad_norm": 0.69921875, "learning_rate": 1.8911671103378983e-05, "loss": 1.4877, "step": 1780 }, { "epoch": 0.30725437764168034, "grad_norm": 0.6484375, "learning_rate": 1.8910435924329993e-05, "loss": 1.4971, "step": 1781 }, { "epoch": 0.3074268955404123, "grad_norm": 0.640625, "learning_rate": 1.8909200085140233e-05, "loss": 1.6221, "step": 1782 }, { "epoch": 0.3075994134391443, "grad_norm": 0.609375, "learning_rate": 1.890796358590126e-05, "loss": 1.5684, "step": 1783 }, { "epoch": 0.3077719313378763, "grad_norm": 0.6328125, "learning_rate": 1.890672642670469e-05, "loss": 1.4768, "step": 1784 }, { "epoch": 0.3079444492366083, "grad_norm": 0.6328125, "learning_rate": 1.8905488607642172e-05, "loss": 1.5532, "step": 1785 }, { "epoch": 0.3081169671353403, "grad_norm": 0.69921875, "learning_rate": 1.8904250128805418e-05, "loss": 1.5317, "step": 1786 }, { "epoch": 0.3082894850340723, "grad_norm": 0.69140625, "learning_rate": 1.8903010990286174e-05, "loss": 1.5159, "step": 1787 }, { "epoch": 0.3084620029328043, "grad_norm": 0.67578125, "learning_rate": 1.8901771192176248e-05, "loss": 1.4687, "step": 1788 }, { "epoch": 0.30863452083153625, "grad_norm": 0.70703125, "learning_rate": 1.8900530734567492e-05, "loss": 1.5036, "step": 1789 }, { "epoch": 0.30880703873026827, "grad_norm": 1.1484375, "learning_rate": 1.8899289617551803e-05, "loss": 1.5833, "step": 1790 }, { "epoch": 0.3089795566290003, "grad_norm": 0.68359375, "learning_rate": 1.889804784122114e-05, "loss": 1.442, "step": 1791 }, { "epoch": 0.30915207452773225, "grad_norm": 0.76953125, "learning_rate": 1.889680540566749e-05, "loss": 1.468, "step": 1792 }, { "epoch": 0.30932459242646426, "grad_norm": 1.0078125, "learning_rate": 1.8895562310982907e-05, "loss": 1.4224, "step": 1793 }, { "epoch": 0.3094971103251962, "grad_norm": 0.62890625, "learning_rate": 1.8894318557259485e-05, "loss": 1.4192, "step": 1794 }, { "epoch": 0.30966962822392824, "grad_norm": 0.81640625, "learning_rate": 1.889307414458937e-05, "loss": 1.4496, "step": 1795 }, { "epoch": 0.3098421461226602, "grad_norm": 0.6640625, "learning_rate": 1.8891829073064757e-05, "loss": 1.5036, "step": 1796 }, { "epoch": 0.3100146640213922, "grad_norm": 0.6875, "learning_rate": 1.889058334277789e-05, "loss": 1.5235, "step": 1797 }, { "epoch": 0.31018718192012423, "grad_norm": 0.6796875, "learning_rate": 1.8889336953821055e-05, "loss": 1.6028, "step": 1798 }, { "epoch": 0.3103596998188562, "grad_norm": 0.6015625, "learning_rate": 1.8888089906286598e-05, "loss": 1.3856, "step": 1799 }, { "epoch": 0.3105322177175882, "grad_norm": 0.80078125, "learning_rate": 1.8886842200266905e-05, "loss": 1.473, "step": 1800 }, { "epoch": 0.3105322177175882, "eval_loss": 1.4874216318130493, "eval_runtime": 10.8464, "eval_samples_per_second": 94.409, "eval_steps_per_second": 23.602, "step": 1800 }, { "epoch": 0.31070473561632017, "grad_norm": 0.80078125, "learning_rate": 1.888559383585441e-05, "loss": 1.6011, "step": 1801 }, { "epoch": 0.3108772535150522, "grad_norm": 3.390625, "learning_rate": 1.888434481314161e-05, "loss": 1.4107, "step": 1802 }, { "epoch": 0.3110497714137842, "grad_norm": 0.70703125, "learning_rate": 1.888309513222103e-05, "loss": 1.5343, "step": 1803 }, { "epoch": 0.31122228931251616, "grad_norm": 0.70703125, "learning_rate": 1.8881844793185257e-05, "loss": 1.5289, "step": 1804 }, { "epoch": 0.3113948072112482, "grad_norm": 0.62890625, "learning_rate": 1.8880593796126925e-05, "loss": 1.436, "step": 1805 }, { "epoch": 0.31156732510998014, "grad_norm": 0.76171875, "learning_rate": 1.887934214113872e-05, "loss": 1.5412, "step": 1806 }, { "epoch": 0.31173984300871216, "grad_norm": 0.84375, "learning_rate": 1.887808982831337e-05, "loss": 1.5912, "step": 1807 }, { "epoch": 0.3119123609074442, "grad_norm": 0.7265625, "learning_rate": 1.887683685774365e-05, "loss": 1.4521, "step": 1808 }, { "epoch": 0.31208487880617614, "grad_norm": 0.7734375, "learning_rate": 1.887558322952239e-05, "loss": 1.5337, "step": 1809 }, { "epoch": 0.31225739670490815, "grad_norm": 0.9609375, "learning_rate": 1.887432894374247e-05, "loss": 1.6269, "step": 1810 }, { "epoch": 0.3124299146036401, "grad_norm": 0.6328125, "learning_rate": 1.8873074000496808e-05, "loss": 1.469, "step": 1811 }, { "epoch": 0.31260243250237213, "grad_norm": 0.84375, "learning_rate": 1.8871818399878387e-05, "loss": 1.495, "step": 1812 }, { "epoch": 0.3127749504011041, "grad_norm": 0.8125, "learning_rate": 1.887056214198022e-05, "loss": 1.5321, "step": 1813 }, { "epoch": 0.3129474682998361, "grad_norm": 0.62109375, "learning_rate": 1.8869305226895386e-05, "loss": 1.5165, "step": 1814 }, { "epoch": 0.3131199861985681, "grad_norm": 0.640625, "learning_rate": 1.8868047654717005e-05, "loss": 1.5412, "step": 1815 }, { "epoch": 0.3132925040973001, "grad_norm": 0.73046875, "learning_rate": 1.886678942553824e-05, "loss": 1.4872, "step": 1816 }, { "epoch": 0.3134650219960321, "grad_norm": 0.64453125, "learning_rate": 1.8865530539452316e-05, "loss": 1.5929, "step": 1817 }, { "epoch": 0.31363753989476406, "grad_norm": 0.68359375, "learning_rate": 1.8864270996552494e-05, "loss": 1.5027, "step": 1818 }, { "epoch": 0.3138100577934961, "grad_norm": 0.74609375, "learning_rate": 1.886301079693209e-05, "loss": 1.5103, "step": 1819 }, { "epoch": 0.3139825756922281, "grad_norm": 0.6484375, "learning_rate": 1.8861749940684464e-05, "loss": 1.5923, "step": 1820 }, { "epoch": 0.31415509359096006, "grad_norm": 0.6640625, "learning_rate": 1.8860488427903038e-05, "loss": 1.4984, "step": 1821 }, { "epoch": 0.31432761148969207, "grad_norm": 0.65234375, "learning_rate": 1.8859226258681262e-05, "loss": 1.5425, "step": 1822 }, { "epoch": 0.31450012938842403, "grad_norm": 0.7421875, "learning_rate": 1.885796343311265e-05, "loss": 1.425, "step": 1823 }, { "epoch": 0.31467264728715605, "grad_norm": 0.62109375, "learning_rate": 1.885669995129076e-05, "loss": 1.4554, "step": 1824 }, { "epoch": 0.314845165185888, "grad_norm": 0.59375, "learning_rate": 1.8855435813309196e-05, "loss": 1.4866, "step": 1825 }, { "epoch": 0.31501768308462, "grad_norm": 0.81640625, "learning_rate": 1.885417101926162e-05, "loss": 1.5125, "step": 1826 }, { "epoch": 0.31519020098335204, "grad_norm": 0.69140625, "learning_rate": 1.885290556924173e-05, "loss": 1.4906, "step": 1827 }, { "epoch": 0.315362718882084, "grad_norm": 0.63671875, "learning_rate": 1.885163946334328e-05, "loss": 1.5114, "step": 1828 }, { "epoch": 0.315535236780816, "grad_norm": 0.69921875, "learning_rate": 1.8850372701660072e-05, "loss": 1.4366, "step": 1829 }, { "epoch": 0.315707754679548, "grad_norm": 0.84375, "learning_rate": 1.8849105284285954e-05, "loss": 1.4678, "step": 1830 }, { "epoch": 0.31588027257828, "grad_norm": 0.67578125, "learning_rate": 1.8847837211314822e-05, "loss": 1.5652, "step": 1831 }, { "epoch": 0.316052790477012, "grad_norm": 0.76171875, "learning_rate": 1.8846568482840628e-05, "loss": 1.5195, "step": 1832 }, { "epoch": 0.316225308375744, "grad_norm": 0.6484375, "learning_rate": 1.8845299098957366e-05, "loss": 1.5487, "step": 1833 }, { "epoch": 0.316397826274476, "grad_norm": 0.81640625, "learning_rate": 1.8844029059759076e-05, "loss": 1.4679, "step": 1834 }, { "epoch": 0.31657034417320795, "grad_norm": 0.75, "learning_rate": 1.8842758365339856e-05, "loss": 1.4689, "step": 1835 }, { "epoch": 0.31674286207193997, "grad_norm": 0.6171875, "learning_rate": 1.884148701579384e-05, "loss": 1.5574, "step": 1836 }, { "epoch": 0.316915379970672, "grad_norm": 1.375, "learning_rate": 1.884021501121523e-05, "loss": 1.5667, "step": 1837 }, { "epoch": 0.31708789786940395, "grad_norm": 0.62109375, "learning_rate": 1.883894235169825e-05, "loss": 1.4987, "step": 1838 }, { "epoch": 0.31726041576813596, "grad_norm": 0.65234375, "learning_rate": 1.8837669037337188e-05, "loss": 1.5074, "step": 1839 }, { "epoch": 0.3174329336668679, "grad_norm": 0.68359375, "learning_rate": 1.883639506822639e-05, "loss": 1.4288, "step": 1840 }, { "epoch": 0.31760545156559994, "grad_norm": 0.6328125, "learning_rate": 1.883512044446023e-05, "loss": 1.5972, "step": 1841 }, { "epoch": 0.3177779694643319, "grad_norm": 0.57421875, "learning_rate": 1.8833845166133145e-05, "loss": 1.3981, "step": 1842 }, { "epoch": 0.3179504873630639, "grad_norm": 0.65234375, "learning_rate": 1.883256923333961e-05, "loss": 1.5039, "step": 1843 }, { "epoch": 0.31812300526179593, "grad_norm": 0.734375, "learning_rate": 1.8831292646174163e-05, "loss": 1.4579, "step": 1844 }, { "epoch": 0.3182955231605279, "grad_norm": 0.60546875, "learning_rate": 1.8830015404731375e-05, "loss": 1.5289, "step": 1845 }, { "epoch": 0.3184680410592599, "grad_norm": 0.58984375, "learning_rate": 1.8828737509105873e-05, "loss": 1.5776, "step": 1846 }, { "epoch": 0.31864055895799187, "grad_norm": 0.6015625, "learning_rate": 1.882745895939233e-05, "loss": 1.4838, "step": 1847 }, { "epoch": 0.3188130768567239, "grad_norm": 0.63671875, "learning_rate": 1.882617975568547e-05, "loss": 1.4642, "step": 1848 }, { "epoch": 0.3189855947554559, "grad_norm": 0.69921875, "learning_rate": 1.882489989808007e-05, "loss": 1.4736, "step": 1849 }, { "epoch": 0.31915811265418786, "grad_norm": 1.9609375, "learning_rate": 1.882361938667094e-05, "loss": 1.5478, "step": 1850 }, { "epoch": 0.3193306305529199, "grad_norm": 0.70703125, "learning_rate": 1.8822338221552955e-05, "loss": 1.5369, "step": 1851 }, { "epoch": 0.31950314845165184, "grad_norm": 0.72265625, "learning_rate": 1.882105640282103e-05, "loss": 1.4478, "step": 1852 }, { "epoch": 0.31967566635038386, "grad_norm": 0.671875, "learning_rate": 1.881977393057013e-05, "loss": 1.5034, "step": 1853 }, { "epoch": 0.3198481842491158, "grad_norm": 0.62890625, "learning_rate": 1.881849080489527e-05, "loss": 1.5675, "step": 1854 }, { "epoch": 0.32002070214784784, "grad_norm": 0.63671875, "learning_rate": 1.881720702589151e-05, "loss": 1.5108, "step": 1855 }, { "epoch": 0.32019322004657985, "grad_norm": 0.6328125, "learning_rate": 1.881592259365396e-05, "loss": 1.5053, "step": 1856 }, { "epoch": 0.3203657379453118, "grad_norm": 0.61328125, "learning_rate": 1.881463750827778e-05, "loss": 1.5251, "step": 1857 }, { "epoch": 0.32053825584404383, "grad_norm": 0.65234375, "learning_rate": 1.881335176985818e-05, "loss": 1.4477, "step": 1858 }, { "epoch": 0.3207107737427758, "grad_norm": 0.703125, "learning_rate": 1.881206537849041e-05, "loss": 1.5098, "step": 1859 }, { "epoch": 0.3208832916415078, "grad_norm": 0.671875, "learning_rate": 1.8810778334269778e-05, "loss": 1.4897, "step": 1860 }, { "epoch": 0.3210558095402398, "grad_norm": 0.640625, "learning_rate": 1.880949063729163e-05, "loss": 1.4835, "step": 1861 }, { "epoch": 0.3212283274389718, "grad_norm": 0.609375, "learning_rate": 1.8808202287651375e-05, "loss": 1.5093, "step": 1862 }, { "epoch": 0.3214008453377038, "grad_norm": 0.84375, "learning_rate": 1.880691328544446e-05, "loss": 1.5226, "step": 1863 }, { "epoch": 0.32157336323643576, "grad_norm": 0.734375, "learning_rate": 1.880562363076638e-05, "loss": 1.498, "step": 1864 }, { "epoch": 0.3217458811351678, "grad_norm": 0.82421875, "learning_rate": 1.880433332371268e-05, "loss": 1.543, "step": 1865 }, { "epoch": 0.3219183990338998, "grad_norm": 0.6796875, "learning_rate": 1.8803042364378955e-05, "loss": 1.5232, "step": 1866 }, { "epoch": 0.32209091693263175, "grad_norm": 0.640625, "learning_rate": 1.8801750752860847e-05, "loss": 1.574, "step": 1867 }, { "epoch": 0.32226343483136377, "grad_norm": 0.875, "learning_rate": 1.880045848925405e-05, "loss": 1.5205, "step": 1868 }, { "epoch": 0.32243595273009573, "grad_norm": 0.93359375, "learning_rate": 1.87991655736543e-05, "loss": 1.5055, "step": 1869 }, { "epoch": 0.32260847062882775, "grad_norm": 0.6328125, "learning_rate": 1.8797872006157382e-05, "loss": 1.5527, "step": 1870 }, { "epoch": 0.3227809885275597, "grad_norm": 1.1796875, "learning_rate": 1.8796577786859136e-05, "loss": 1.5301, "step": 1871 }, { "epoch": 0.3229535064262917, "grad_norm": 0.890625, "learning_rate": 1.8795282915855445e-05, "loss": 1.5342, "step": 1872 }, { "epoch": 0.32312602432502374, "grad_norm": 0.8515625, "learning_rate": 1.8793987393242236e-05, "loss": 1.5328, "step": 1873 }, { "epoch": 0.3232985422237557, "grad_norm": 1.1796875, "learning_rate": 1.8792691219115496e-05, "loss": 1.4506, "step": 1874 }, { "epoch": 0.3234710601224877, "grad_norm": 0.609375, "learning_rate": 1.8791394393571255e-05, "loss": 1.5116, "step": 1875 }, { "epoch": 0.3236435780212197, "grad_norm": 0.65234375, "learning_rate": 1.879009691670558e-05, "loss": 1.5311, "step": 1876 }, { "epoch": 0.3238160959199517, "grad_norm": 0.75, "learning_rate": 1.8788798788614606e-05, "loss": 1.4899, "step": 1877 }, { "epoch": 0.3239886138186837, "grad_norm": 1.015625, "learning_rate": 1.8787500009394503e-05, "loss": 1.5621, "step": 1878 }, { "epoch": 0.3241611317174157, "grad_norm": 0.66015625, "learning_rate": 1.8786200579141488e-05, "loss": 1.5273, "step": 1879 }, { "epoch": 0.3243336496161477, "grad_norm": 0.6328125, "learning_rate": 1.878490049795184e-05, "loss": 1.4859, "step": 1880 }, { "epoch": 0.32450616751487965, "grad_norm": 0.76953125, "learning_rate": 1.878359976592187e-05, "loss": 1.4424, "step": 1881 }, { "epoch": 0.32467868541361167, "grad_norm": 0.8359375, "learning_rate": 1.8782298383147946e-05, "loss": 1.4807, "step": 1882 }, { "epoch": 0.32485120331234363, "grad_norm": 0.6640625, "learning_rate": 1.8780996349726488e-05, "loss": 1.5346, "step": 1883 }, { "epoch": 0.32502372121107564, "grad_norm": 0.72265625, "learning_rate": 1.8779693665753954e-05, "loss": 1.5185, "step": 1884 }, { "epoch": 0.32519623910980766, "grad_norm": 0.8984375, "learning_rate": 1.877839033132685e-05, "loss": 1.5993, "step": 1885 }, { "epoch": 0.3253687570085396, "grad_norm": 0.765625, "learning_rate": 1.8777086346541743e-05, "loss": 1.5468, "step": 1886 }, { "epoch": 0.32554127490727164, "grad_norm": 0.609375, "learning_rate": 1.8775781711495237e-05, "loss": 1.5053, "step": 1887 }, { "epoch": 0.3257137928060036, "grad_norm": 0.92578125, "learning_rate": 1.8774476426283993e-05, "loss": 1.4857, "step": 1888 }, { "epoch": 0.3258863107047356, "grad_norm": 0.96875, "learning_rate": 1.8773170491004704e-05, "loss": 1.52, "step": 1889 }, { "epoch": 0.32605882860346763, "grad_norm": 0.82421875, "learning_rate": 1.877186390575413e-05, "loss": 1.5068, "step": 1890 }, { "epoch": 0.3262313465021996, "grad_norm": 0.94140625, "learning_rate": 1.877055667062907e-05, "loss": 1.4402, "step": 1891 }, { "epoch": 0.3264038644009316, "grad_norm": 0.63671875, "learning_rate": 1.8769248785726367e-05, "loss": 1.4986, "step": 1892 }, { "epoch": 0.32657638229966357, "grad_norm": 1.2734375, "learning_rate": 1.8767940251142924e-05, "loss": 1.4641, "step": 1893 }, { "epoch": 0.3267489001983956, "grad_norm": 1.015625, "learning_rate": 1.8766631066975684e-05, "loss": 1.5435, "step": 1894 }, { "epoch": 0.3269214180971276, "grad_norm": 0.8046875, "learning_rate": 1.8765321233321634e-05, "loss": 1.5584, "step": 1895 }, { "epoch": 0.32709393599585956, "grad_norm": 0.7734375, "learning_rate": 1.876401075027782e-05, "loss": 1.5136, "step": 1896 }, { "epoch": 0.3272664538945916, "grad_norm": 1.234375, "learning_rate": 1.8762699617941333e-05, "loss": 1.5428, "step": 1897 }, { "epoch": 0.32743897179332354, "grad_norm": 1.1875, "learning_rate": 1.87613878364093e-05, "loss": 1.5206, "step": 1898 }, { "epoch": 0.32761148969205556, "grad_norm": 0.75, "learning_rate": 1.8760075405778918e-05, "loss": 1.5195, "step": 1899 }, { "epoch": 0.3277840075907875, "grad_norm": 0.87890625, "learning_rate": 1.8758762326147414e-05, "loss": 1.4737, "step": 1900 }, { "epoch": 0.3277840075907875, "eval_loss": 1.4825938940048218, "eval_runtime": 10.8069, "eval_samples_per_second": 94.755, "eval_steps_per_second": 23.689, "step": 1900 }, { "epoch": 0.32795652548951953, "grad_norm": 1.09375, "learning_rate": 1.875744859761207e-05, "loss": 1.6023, "step": 1901 }, { "epoch": 0.32812904338825155, "grad_norm": 0.8828125, "learning_rate": 1.875613422027021e-05, "loss": 1.5445, "step": 1902 }, { "epoch": 0.3283015612869835, "grad_norm": 0.65234375, "learning_rate": 1.875481919421922e-05, "loss": 1.4026, "step": 1903 }, { "epoch": 0.32847407918571553, "grad_norm": 1.0859375, "learning_rate": 1.875350351955652e-05, "loss": 1.5081, "step": 1904 }, { "epoch": 0.3286465970844475, "grad_norm": 1.046875, "learning_rate": 1.8752187196379585e-05, "loss": 1.5393, "step": 1905 }, { "epoch": 0.3288191149831795, "grad_norm": 0.6328125, "learning_rate": 1.875087022478594e-05, "loss": 1.5503, "step": 1906 }, { "epoch": 0.3289916328819115, "grad_norm": 0.78125, "learning_rate": 1.8749552604873146e-05, "loss": 1.4422, "step": 1907 }, { "epoch": 0.3291641507806435, "grad_norm": 0.98046875, "learning_rate": 1.874823433673883e-05, "loss": 1.4446, "step": 1908 }, { "epoch": 0.3293366686793755, "grad_norm": 0.73828125, "learning_rate": 1.8746915420480646e-05, "loss": 1.6119, "step": 1909 }, { "epoch": 0.32950918657810746, "grad_norm": 0.83984375, "learning_rate": 1.8745595856196318e-05, "loss": 1.4752, "step": 1910 }, { "epoch": 0.3296817044768395, "grad_norm": 0.64453125, "learning_rate": 1.8744275643983606e-05, "loss": 1.4901, "step": 1911 }, { "epoch": 0.3298542223755715, "grad_norm": 0.87890625, "learning_rate": 1.8742954783940313e-05, "loss": 1.4867, "step": 1912 }, { "epoch": 0.33002674027430345, "grad_norm": 0.96484375, "learning_rate": 1.8741633276164305e-05, "loss": 1.446, "step": 1913 }, { "epoch": 0.33019925817303547, "grad_norm": 1.359375, "learning_rate": 1.8740311120753482e-05, "loss": 1.5655, "step": 1914 }, { "epoch": 0.33037177607176743, "grad_norm": 0.78515625, "learning_rate": 1.87389883178058e-05, "loss": 1.4538, "step": 1915 }, { "epoch": 0.33054429397049945, "grad_norm": 0.859375, "learning_rate": 1.8737664867419262e-05, "loss": 1.4357, "step": 1916 }, { "epoch": 0.3307168118692314, "grad_norm": 0.7421875, "learning_rate": 1.8736340769691912e-05, "loss": 1.5557, "step": 1917 }, { "epoch": 0.3308893297679634, "grad_norm": 0.62890625, "learning_rate": 1.873501602472185e-05, "loss": 1.5924, "step": 1918 }, { "epoch": 0.33106184766669544, "grad_norm": 0.71875, "learning_rate": 1.8733690632607223e-05, "loss": 1.5446, "step": 1919 }, { "epoch": 0.3312343655654274, "grad_norm": 0.69140625, "learning_rate": 1.8732364593446223e-05, "loss": 1.4492, "step": 1920 }, { "epoch": 0.3314068834641594, "grad_norm": 0.71875, "learning_rate": 1.8731037907337094e-05, "loss": 1.4965, "step": 1921 }, { "epoch": 0.3315794013628914, "grad_norm": 0.58203125, "learning_rate": 1.872971057437812e-05, "loss": 1.49, "step": 1922 }, { "epoch": 0.3317519192616234, "grad_norm": 0.59765625, "learning_rate": 1.872838259466764e-05, "loss": 1.4051, "step": 1923 }, { "epoch": 0.3319244371603554, "grad_norm": 0.734375, "learning_rate": 1.8727053968304044e-05, "loss": 1.5263, "step": 1924 }, { "epoch": 0.3320969550590874, "grad_norm": 0.73046875, "learning_rate": 1.8725724695385757e-05, "loss": 1.5625, "step": 1925 }, { "epoch": 0.3322694729578194, "grad_norm": 0.6640625, "learning_rate": 1.8724394776011264e-05, "loss": 1.461, "step": 1926 }, { "epoch": 0.33244199085655135, "grad_norm": 0.68359375, "learning_rate": 1.8723064210279096e-05, "loss": 1.4699, "step": 1927 }, { "epoch": 0.33261450875528337, "grad_norm": 0.66796875, "learning_rate": 1.8721732998287825e-05, "loss": 1.4403, "step": 1928 }, { "epoch": 0.3327870266540153, "grad_norm": 0.6640625, "learning_rate": 1.872040114013608e-05, "loss": 1.5245, "step": 1929 }, { "epoch": 0.33295954455274734, "grad_norm": 0.6875, "learning_rate": 1.871906863592253e-05, "loss": 1.5167, "step": 1930 }, { "epoch": 0.33313206245147936, "grad_norm": 0.66015625, "learning_rate": 1.8717735485745895e-05, "loss": 1.5321, "step": 1931 }, { "epoch": 0.3333045803502113, "grad_norm": 0.6328125, "learning_rate": 1.8716401689704945e-05, "loss": 1.538, "step": 1932 }, { "epoch": 0.33347709824894334, "grad_norm": 0.7890625, "learning_rate": 1.8715067247898493e-05, "loss": 1.4214, "step": 1933 }, { "epoch": 0.3336496161476753, "grad_norm": 0.58984375, "learning_rate": 1.871373216042541e-05, "loss": 1.491, "step": 1934 }, { "epoch": 0.3338221340464073, "grad_norm": 0.65234375, "learning_rate": 1.8712396427384595e-05, "loss": 1.5567, "step": 1935 }, { "epoch": 0.33399465194513933, "grad_norm": 0.7421875, "learning_rate": 1.8711060048875023e-05, "loss": 1.526, "step": 1936 }, { "epoch": 0.3341671698438713, "grad_norm": 0.7578125, "learning_rate": 1.870972302499569e-05, "loss": 1.4795, "step": 1937 }, { "epoch": 0.3343396877426033, "grad_norm": 0.82421875, "learning_rate": 1.8708385355845654e-05, "loss": 1.3779, "step": 1938 }, { "epoch": 0.33451220564133527, "grad_norm": 0.671875, "learning_rate": 1.870704704152402e-05, "loss": 1.3989, "step": 1939 }, { "epoch": 0.3346847235400673, "grad_norm": 0.6875, "learning_rate": 1.8705708082129935e-05, "loss": 1.4858, "step": 1940 }, { "epoch": 0.3348572414387993, "grad_norm": 0.7265625, "learning_rate": 1.87043684777626e-05, "loss": 1.5937, "step": 1941 }, { "epoch": 0.33502975933753126, "grad_norm": 0.65234375, "learning_rate": 1.8703028228521263e-05, "loss": 1.5191, "step": 1942 }, { "epoch": 0.3352022772362633, "grad_norm": 0.6953125, "learning_rate": 1.8701687334505215e-05, "loss": 1.4377, "step": 1943 }, { "epoch": 0.33537479513499524, "grad_norm": 0.70703125, "learning_rate": 1.8700345795813794e-05, "loss": 1.4953, "step": 1944 }, { "epoch": 0.33554731303372726, "grad_norm": 0.5859375, "learning_rate": 1.8699003612546397e-05, "loss": 1.5811, "step": 1945 }, { "epoch": 0.3357198309324592, "grad_norm": 0.64453125, "learning_rate": 1.8697660784802463e-05, "loss": 1.577, "step": 1946 }, { "epoch": 0.33589234883119123, "grad_norm": 0.6953125, "learning_rate": 1.8696317312681466e-05, "loss": 1.3651, "step": 1947 }, { "epoch": 0.33606486672992325, "grad_norm": 0.71875, "learning_rate": 1.869497319628295e-05, "loss": 1.5111, "step": 1948 }, { "epoch": 0.3362373846286552, "grad_norm": 0.61328125, "learning_rate": 1.8693628435706487e-05, "loss": 1.4774, "step": 1949 }, { "epoch": 0.33640990252738723, "grad_norm": 0.921875, "learning_rate": 1.8692283031051714e-05, "loss": 1.5871, "step": 1950 }, { "epoch": 0.3365824204261192, "grad_norm": 0.578125, "learning_rate": 1.86909369824183e-05, "loss": 1.4237, "step": 1951 }, { "epoch": 0.3367549383248512, "grad_norm": 0.67578125, "learning_rate": 1.868959028990597e-05, "loss": 1.4145, "step": 1952 }, { "epoch": 0.3369274562235832, "grad_norm": 0.6796875, "learning_rate": 1.8688242953614496e-05, "loss": 1.4504, "step": 1953 }, { "epoch": 0.3370999741223152, "grad_norm": 0.65625, "learning_rate": 1.8686894973643698e-05, "loss": 1.5252, "step": 1954 }, { "epoch": 0.3372724920210472, "grad_norm": 0.6484375, "learning_rate": 1.868554635009344e-05, "loss": 1.4336, "step": 1955 }, { "epoch": 0.33744500991977916, "grad_norm": 0.61328125, "learning_rate": 1.868419708306364e-05, "loss": 1.4343, "step": 1956 }, { "epoch": 0.3376175278185112, "grad_norm": 0.64453125, "learning_rate": 1.868284717265426e-05, "loss": 1.4825, "step": 1957 }, { "epoch": 0.33779004571724314, "grad_norm": 0.6328125, "learning_rate": 1.8681496618965308e-05, "loss": 1.5555, "step": 1958 }, { "epoch": 0.33796256361597515, "grad_norm": 0.5703125, "learning_rate": 1.8680145422096844e-05, "loss": 1.5049, "step": 1959 }, { "epoch": 0.33813508151470717, "grad_norm": 0.57421875, "learning_rate": 1.867879358214897e-05, "loss": 1.3421, "step": 1960 }, { "epoch": 0.33830759941343913, "grad_norm": 0.625, "learning_rate": 1.8677441099221836e-05, "loss": 1.4644, "step": 1961 }, { "epoch": 0.33848011731217115, "grad_norm": 0.62109375, "learning_rate": 1.867608797341565e-05, "loss": 1.6562, "step": 1962 }, { "epoch": 0.3386526352109031, "grad_norm": 0.671875, "learning_rate": 1.8674734204830655e-05, "loss": 1.4937, "step": 1963 }, { "epoch": 0.3388251531096351, "grad_norm": 0.78515625, "learning_rate": 1.867337979356715e-05, "loss": 1.5014, "step": 1964 }, { "epoch": 0.33899767100836714, "grad_norm": 0.6484375, "learning_rate": 1.8672024739725473e-05, "loss": 1.487, "step": 1965 }, { "epoch": 0.3391701889070991, "grad_norm": 0.77734375, "learning_rate": 1.867066904340602e-05, "loss": 1.5148, "step": 1966 }, { "epoch": 0.3393427068058311, "grad_norm": 0.60546875, "learning_rate": 1.8669312704709224e-05, "loss": 1.4718, "step": 1967 }, { "epoch": 0.3395152247045631, "grad_norm": 0.68359375, "learning_rate": 1.8667955723735577e-05, "loss": 1.5579, "step": 1968 }, { "epoch": 0.3396877426032951, "grad_norm": 0.6015625, "learning_rate": 1.8666598100585613e-05, "loss": 1.5187, "step": 1969 }, { "epoch": 0.3398602605020271, "grad_norm": 0.65234375, "learning_rate": 1.8665239835359904e-05, "loss": 1.4936, "step": 1970 }, { "epoch": 0.3400327784007591, "grad_norm": 0.578125, "learning_rate": 1.866388092815909e-05, "loss": 1.5325, "step": 1971 }, { "epoch": 0.3402052962994911, "grad_norm": 0.625, "learning_rate": 1.8662521379083843e-05, "loss": 1.439, "step": 1972 }, { "epoch": 0.34037781419822305, "grad_norm": 0.640625, "learning_rate": 1.8661161188234882e-05, "loss": 1.4927, "step": 1973 }, { "epoch": 0.34055033209695507, "grad_norm": 0.61328125, "learning_rate": 1.8659800355712984e-05, "loss": 1.556, "step": 1974 }, { "epoch": 0.340722849995687, "grad_norm": 0.77734375, "learning_rate": 1.865843888161897e-05, "loss": 1.4048, "step": 1975 }, { "epoch": 0.34089536789441904, "grad_norm": 0.58203125, "learning_rate": 1.86570767660537e-05, "loss": 1.3183, "step": 1976 }, { "epoch": 0.34106788579315106, "grad_norm": 0.61328125, "learning_rate": 1.865571400911809e-05, "loss": 1.4521, "step": 1977 }, { "epoch": 0.341240403691883, "grad_norm": 0.5546875, "learning_rate": 1.8654350610913106e-05, "loss": 1.5282, "step": 1978 }, { "epoch": 0.34141292159061504, "grad_norm": 0.640625, "learning_rate": 1.8652986571539754e-05, "loss": 1.467, "step": 1979 }, { "epoch": 0.341585439489347, "grad_norm": 0.6484375, "learning_rate": 1.865162189109909e-05, "loss": 1.4525, "step": 1980 }, { "epoch": 0.341757957388079, "grad_norm": 0.98828125, "learning_rate": 1.8650256569692215e-05, "loss": 1.5296, "step": 1981 }, { "epoch": 0.34193047528681103, "grad_norm": 0.71875, "learning_rate": 1.864889060742029e-05, "loss": 1.5094, "step": 1982 }, { "epoch": 0.342102993185543, "grad_norm": 0.87890625, "learning_rate": 1.8647524004384503e-05, "loss": 1.5443, "step": 1983 }, { "epoch": 0.342275511084275, "grad_norm": 0.81640625, "learning_rate": 1.8646156760686108e-05, "loss": 1.5589, "step": 1984 }, { "epoch": 0.34244802898300697, "grad_norm": 0.76171875, "learning_rate": 1.8644788876426395e-05, "loss": 1.5559, "step": 1985 }, { "epoch": 0.342620546881739, "grad_norm": 0.69921875, "learning_rate": 1.8643420351706707e-05, "loss": 1.4188, "step": 1986 }, { "epoch": 0.34279306478047095, "grad_norm": 0.6328125, "learning_rate": 1.8642051186628434e-05, "loss": 1.499, "step": 1987 }, { "epoch": 0.34296558267920296, "grad_norm": 0.63671875, "learning_rate": 1.864068138129301e-05, "loss": 1.5922, "step": 1988 }, { "epoch": 0.343138100577935, "grad_norm": 0.765625, "learning_rate": 1.8639310935801922e-05, "loss": 1.4714, "step": 1989 }, { "epoch": 0.34331061847666694, "grad_norm": 0.65625, "learning_rate": 1.8637939850256697e-05, "loss": 1.5472, "step": 1990 }, { "epoch": 0.34348313637539896, "grad_norm": 0.7890625, "learning_rate": 1.8636568124758917e-05, "loss": 1.4931, "step": 1991 }, { "epoch": 0.3436556542741309, "grad_norm": 0.71484375, "learning_rate": 1.8635195759410205e-05, "loss": 1.4338, "step": 1992 }, { "epoch": 0.34382817217286293, "grad_norm": 0.625, "learning_rate": 1.8633822754312233e-05, "loss": 1.4708, "step": 1993 }, { "epoch": 0.34400069007159495, "grad_norm": 0.6484375, "learning_rate": 1.863244910956673e-05, "loss": 1.4769, "step": 1994 }, { "epoch": 0.3441732079703269, "grad_norm": 0.60546875, "learning_rate": 1.8631074825275457e-05, "loss": 1.4068, "step": 1995 }, { "epoch": 0.3443457258690589, "grad_norm": 0.76953125, "learning_rate": 1.8629699901540232e-05, "loss": 1.4729, "step": 1996 }, { "epoch": 0.3445182437677909, "grad_norm": 0.63671875, "learning_rate": 1.8628324338462915e-05, "loss": 1.513, "step": 1997 }, { "epoch": 0.3446907616665229, "grad_norm": 0.609375, "learning_rate": 1.8626948136145422e-05, "loss": 1.5212, "step": 1998 }, { "epoch": 0.3448632795652549, "grad_norm": 0.69140625, "learning_rate": 1.8625571294689705e-05, "loss": 1.5238, "step": 1999 }, { "epoch": 0.3450357974639869, "grad_norm": 0.70703125, "learning_rate": 1.8624193814197774e-05, "loss": 1.542, "step": 2000 }, { "epoch": 0.3450357974639869, "eval_loss": 1.4780633449554443, "eval_runtime": 11.8304, "eval_samples_per_second": 86.556, "eval_steps_per_second": 21.639, "step": 2000 }, { "epoch": 0.3452083153627189, "grad_norm": 0.6328125, "learning_rate": 1.8622815694771675e-05, "loss": 1.5014, "step": 2001 }, { "epoch": 0.34538083326145086, "grad_norm": 0.703125, "learning_rate": 1.8621436936513517e-05, "loss": 1.5214, "step": 2002 }, { "epoch": 0.3455533511601829, "grad_norm": 0.59765625, "learning_rate": 1.862005753952544e-05, "loss": 1.4511, "step": 2003 }, { "epoch": 0.34572586905891484, "grad_norm": 0.625, "learning_rate": 1.8618677503909637e-05, "loss": 1.5055, "step": 2004 }, { "epoch": 0.34589838695764685, "grad_norm": 0.69921875, "learning_rate": 1.8617296829768354e-05, "loss": 1.4664, "step": 2005 }, { "epoch": 0.34607090485637887, "grad_norm": 0.828125, "learning_rate": 1.8615915517203878e-05, "loss": 1.5586, "step": 2006 }, { "epoch": 0.34624342275511083, "grad_norm": 0.63671875, "learning_rate": 1.8614533566318544e-05, "loss": 1.5295, "step": 2007 }, { "epoch": 0.34641594065384285, "grad_norm": 0.59765625, "learning_rate": 1.861315097721474e-05, "loss": 1.4864, "step": 2008 }, { "epoch": 0.3465884585525748, "grad_norm": 0.796875, "learning_rate": 1.8611767749994894e-05, "loss": 1.4829, "step": 2009 }, { "epoch": 0.3467609764513068, "grad_norm": 0.671875, "learning_rate": 1.8610383884761486e-05, "loss": 1.4033, "step": 2010 }, { "epoch": 0.34693349435003884, "grad_norm": 0.72265625, "learning_rate": 1.8608999381617035e-05, "loss": 1.5237, "step": 2011 }, { "epoch": 0.3471060122487708, "grad_norm": 0.62109375, "learning_rate": 1.860761424066412e-05, "loss": 1.5261, "step": 2012 }, { "epoch": 0.3472785301475028, "grad_norm": 0.65234375, "learning_rate": 1.860622846200536e-05, "loss": 1.5967, "step": 2013 }, { "epoch": 0.3474510480462348, "grad_norm": 0.7890625, "learning_rate": 1.860484204574342e-05, "loss": 1.4249, "step": 2014 }, { "epoch": 0.3476235659449668, "grad_norm": 0.59375, "learning_rate": 1.860345499198102e-05, "loss": 1.5006, "step": 2015 }, { "epoch": 0.3477960838436988, "grad_norm": 0.68359375, "learning_rate": 1.860206730082091e-05, "loss": 1.4401, "step": 2016 }, { "epoch": 0.34796860174243077, "grad_norm": 0.66015625, "learning_rate": 1.860067897236591e-05, "loss": 1.5685, "step": 2017 }, { "epoch": 0.3481411196411628, "grad_norm": 0.671875, "learning_rate": 1.8599290006718878e-05, "loss": 1.528, "step": 2018 }, { "epoch": 0.34831363753989475, "grad_norm": 0.65625, "learning_rate": 1.8597900403982705e-05, "loss": 1.3706, "step": 2019 }, { "epoch": 0.34848615543862677, "grad_norm": 0.6171875, "learning_rate": 1.8596510164260347e-05, "loss": 1.4227, "step": 2020 }, { "epoch": 0.3486586733373587, "grad_norm": 0.64453125, "learning_rate": 1.8595119287654808e-05, "loss": 1.4319, "step": 2021 }, { "epoch": 0.34883119123609074, "grad_norm": 0.61328125, "learning_rate": 1.8593727774269122e-05, "loss": 1.5133, "step": 2022 }, { "epoch": 0.34900370913482276, "grad_norm": 0.65625, "learning_rate": 1.859233562420639e-05, "loss": 1.553, "step": 2023 }, { "epoch": 0.3491762270335547, "grad_norm": 0.63671875, "learning_rate": 1.8590942837569743e-05, "loss": 1.4991, "step": 2024 }, { "epoch": 0.34934874493228674, "grad_norm": 0.66015625, "learning_rate": 1.8589549414462375e-05, "loss": 1.5245, "step": 2025 }, { "epoch": 0.3495212628310187, "grad_norm": 0.7578125, "learning_rate": 1.8588155354987517e-05, "loss": 1.5654, "step": 2026 }, { "epoch": 0.3496937807297507, "grad_norm": 0.67578125, "learning_rate": 1.8586760659248447e-05, "loss": 1.4404, "step": 2027 }, { "epoch": 0.34986629862848273, "grad_norm": 0.6015625, "learning_rate": 1.8585365327348497e-05, "loss": 1.502, "step": 2028 }, { "epoch": 0.3500388165272147, "grad_norm": 0.7265625, "learning_rate": 1.858396935939104e-05, "loss": 1.4986, "step": 2029 }, { "epoch": 0.3502113344259467, "grad_norm": 0.703125, "learning_rate": 1.8582572755479494e-05, "loss": 1.49, "step": 2030 }, { "epoch": 0.35038385232467867, "grad_norm": 1.0703125, "learning_rate": 1.8581175515717337e-05, "loss": 1.5376, "step": 2031 }, { "epoch": 0.3505563702234107, "grad_norm": 0.7578125, "learning_rate": 1.8579777640208076e-05, "loss": 1.6789, "step": 2032 }, { "epoch": 0.35072888812214265, "grad_norm": 0.63671875, "learning_rate": 1.857837912905528e-05, "loss": 1.5079, "step": 2033 }, { "epoch": 0.35090140602087466, "grad_norm": 0.8515625, "learning_rate": 1.857697998236256e-05, "loss": 1.4556, "step": 2034 }, { "epoch": 0.3510739239196067, "grad_norm": 0.61328125, "learning_rate": 1.857558020023357e-05, "loss": 1.4302, "step": 2035 }, { "epoch": 0.35124644181833864, "grad_norm": 0.6484375, "learning_rate": 1.8574179782772012e-05, "loss": 1.4337, "step": 2036 }, { "epoch": 0.35141895971707066, "grad_norm": 0.6953125, "learning_rate": 1.8572778730081644e-05, "loss": 1.543, "step": 2037 }, { "epoch": 0.3515914776158026, "grad_norm": 0.609375, "learning_rate": 1.8571377042266267e-05, "loss": 1.5619, "step": 2038 }, { "epoch": 0.35176399551453463, "grad_norm": 0.62890625, "learning_rate": 1.8569974719429716e-05, "loss": 1.4384, "step": 2039 }, { "epoch": 0.35193651341326665, "grad_norm": 0.6953125, "learning_rate": 1.8568571761675893e-05, "loss": 1.5401, "step": 2040 }, { "epoch": 0.3521090313119986, "grad_norm": 0.68359375, "learning_rate": 1.8567168169108735e-05, "loss": 1.4847, "step": 2041 }, { "epoch": 0.3522815492107306, "grad_norm": 0.65234375, "learning_rate": 1.856576394183223e-05, "loss": 1.4928, "step": 2042 }, { "epoch": 0.3524540671094626, "grad_norm": 0.6640625, "learning_rate": 1.856435907995041e-05, "loss": 1.5223, "step": 2043 }, { "epoch": 0.3526265850081946, "grad_norm": 0.75390625, "learning_rate": 1.8562953583567357e-05, "loss": 1.4796, "step": 2044 }, { "epoch": 0.3527991029069266, "grad_norm": 0.66796875, "learning_rate": 1.85615474527872e-05, "loss": 1.4604, "step": 2045 }, { "epoch": 0.3529716208056586, "grad_norm": 0.703125, "learning_rate": 1.856014068771411e-05, "loss": 1.5434, "step": 2046 }, { "epoch": 0.3531441387043906, "grad_norm": 0.71875, "learning_rate": 1.855873328845231e-05, "loss": 1.6396, "step": 2047 }, { "epoch": 0.35331665660312256, "grad_norm": 0.8515625, "learning_rate": 1.8557325255106074e-05, "loss": 1.498, "step": 2048 }, { "epoch": 0.3534891745018546, "grad_norm": 0.7578125, "learning_rate": 1.8555916587779713e-05, "loss": 1.4599, "step": 2049 }, { "epoch": 0.35366169240058654, "grad_norm": 0.61328125, "learning_rate": 1.855450728657759e-05, "loss": 1.5217, "step": 2050 }, { "epoch": 0.35383421029931855, "grad_norm": 0.58203125, "learning_rate": 1.855309735160412e-05, "loss": 1.4141, "step": 2051 }, { "epoch": 0.35400672819805057, "grad_norm": 0.69140625, "learning_rate": 1.8551686782963757e-05, "loss": 1.4475, "step": 2052 }, { "epoch": 0.35417924609678253, "grad_norm": 0.68359375, "learning_rate": 1.8550275580761e-05, "loss": 1.5216, "step": 2053 }, { "epoch": 0.35435176399551455, "grad_norm": 0.62890625, "learning_rate": 1.8548863745100403e-05, "loss": 1.5242, "step": 2054 }, { "epoch": 0.3545242818942465, "grad_norm": 0.64453125, "learning_rate": 1.8547451276086565e-05, "loss": 1.5738, "step": 2055 }, { "epoch": 0.3546967997929785, "grad_norm": 0.61328125, "learning_rate": 1.854603817382413e-05, "loss": 1.5531, "step": 2056 }, { "epoch": 0.35486931769171054, "grad_norm": 0.61328125, "learning_rate": 1.854462443841779e-05, "loss": 1.4921, "step": 2057 }, { "epoch": 0.3550418355904425, "grad_norm": 0.875, "learning_rate": 1.854321006997228e-05, "loss": 1.4861, "step": 2058 }, { "epoch": 0.3552143534891745, "grad_norm": 0.6015625, "learning_rate": 1.8541795068592388e-05, "loss": 1.5131, "step": 2059 }, { "epoch": 0.3553868713879065, "grad_norm": 0.6875, "learning_rate": 1.8540379434382946e-05, "loss": 1.4874, "step": 2060 }, { "epoch": 0.3555593892866385, "grad_norm": 0.7265625, "learning_rate": 1.853896316744883e-05, "loss": 1.4067, "step": 2061 }, { "epoch": 0.35573190718537046, "grad_norm": 0.68359375, "learning_rate": 1.8537546267894975e-05, "loss": 1.5423, "step": 2062 }, { "epoch": 0.35590442508410247, "grad_norm": 0.64453125, "learning_rate": 1.8536128735826344e-05, "loss": 1.5336, "step": 2063 }, { "epoch": 0.3560769429828345, "grad_norm": 0.73046875, "learning_rate": 1.853471057134796e-05, "loss": 1.5081, "step": 2064 }, { "epoch": 0.35624946088156645, "grad_norm": 0.6171875, "learning_rate": 1.8533291774564887e-05, "loss": 1.4295, "step": 2065 }, { "epoch": 0.35642197878029847, "grad_norm": 0.59375, "learning_rate": 1.8531872345582247e-05, "loss": 1.5259, "step": 2066 }, { "epoch": 0.3565944966790304, "grad_norm": 1.5859375, "learning_rate": 1.853045228450519e-05, "loss": 1.4738, "step": 2067 }, { "epoch": 0.35676701457776244, "grad_norm": 0.64453125, "learning_rate": 1.8529031591438926e-05, "loss": 1.484, "step": 2068 }, { "epoch": 0.35693953247649446, "grad_norm": 1.203125, "learning_rate": 1.8527610266488714e-05, "loss": 1.4609, "step": 2069 }, { "epoch": 0.3571120503752264, "grad_norm": 2.515625, "learning_rate": 1.8526188309759847e-05, "loss": 1.5005, "step": 2070 }, { "epoch": 0.35728456827395844, "grad_norm": 0.703125, "learning_rate": 1.8524765721357676e-05, "loss": 1.495, "step": 2071 }, { "epoch": 0.3574570861726904, "grad_norm": 0.64453125, "learning_rate": 1.8523342501387595e-05, "loss": 1.48, "step": 2072 }, { "epoch": 0.3576296040714224, "grad_norm": 0.73046875, "learning_rate": 1.8521918649955047e-05, "loss": 1.3926, "step": 2073 }, { "epoch": 0.35780212197015443, "grad_norm": 0.6640625, "learning_rate": 1.8520494167165517e-05, "loss": 1.5589, "step": 2074 }, { "epoch": 0.3579746398688864, "grad_norm": 0.609375, "learning_rate": 1.851906905312454e-05, "loss": 1.51, "step": 2075 }, { "epoch": 0.3581471577676184, "grad_norm": 1.359375, "learning_rate": 1.85176433079377e-05, "loss": 1.6039, "step": 2076 }, { "epoch": 0.35831967566635037, "grad_norm": 0.77734375, "learning_rate": 1.8516216931710622e-05, "loss": 1.4755, "step": 2077 }, { "epoch": 0.3584921935650824, "grad_norm": 0.640625, "learning_rate": 1.8514789924548982e-05, "loss": 1.46, "step": 2078 }, { "epoch": 0.35866471146381435, "grad_norm": 0.7109375, "learning_rate": 1.85133622865585e-05, "loss": 1.4687, "step": 2079 }, { "epoch": 0.35883722936254636, "grad_norm": 0.8203125, "learning_rate": 1.851193401784495e-05, "loss": 1.4535, "step": 2080 }, { "epoch": 0.3590097472612784, "grad_norm": 0.6171875, "learning_rate": 1.8510505118514138e-05, "loss": 1.5059, "step": 2081 }, { "epoch": 0.35918226516001034, "grad_norm": 0.69921875, "learning_rate": 1.8509075588671934e-05, "loss": 1.5184, "step": 2082 }, { "epoch": 0.35935478305874236, "grad_norm": 0.76171875, "learning_rate": 1.850764542842424e-05, "loss": 1.5078, "step": 2083 }, { "epoch": 0.3595273009574743, "grad_norm": 0.76953125, "learning_rate": 1.8506214637877017e-05, "loss": 1.4467, "step": 2084 }, { "epoch": 0.35969981885620633, "grad_norm": 0.9453125, "learning_rate": 1.8504783217136265e-05, "loss": 1.5681, "step": 2085 }, { "epoch": 0.35987233675493835, "grad_norm": 0.6015625, "learning_rate": 1.8503351166308027e-05, "loss": 1.5403, "step": 2086 }, { "epoch": 0.3600448546536703, "grad_norm": 0.73046875, "learning_rate": 1.850191848549841e-05, "loss": 1.5065, "step": 2087 }, { "epoch": 0.3602173725524023, "grad_norm": 0.8125, "learning_rate": 1.8500485174813545e-05, "loss": 1.5095, "step": 2088 }, { "epoch": 0.3603898904511343, "grad_norm": 0.640625, "learning_rate": 1.8499051234359627e-05, "loss": 1.4728, "step": 2089 }, { "epoch": 0.3605624083498663, "grad_norm": 0.70703125, "learning_rate": 1.849761666424289e-05, "loss": 1.4538, "step": 2090 }, { "epoch": 0.36073492624859826, "grad_norm": 0.62109375, "learning_rate": 1.8496181464569608e-05, "loss": 1.5757, "step": 2091 }, { "epoch": 0.3609074441473303, "grad_norm": 0.71484375, "learning_rate": 1.8494745635446124e-05, "loss": 1.4783, "step": 2092 }, { "epoch": 0.3610799620460623, "grad_norm": 0.625, "learning_rate": 1.8493309176978802e-05, "loss": 1.4694, "step": 2093 }, { "epoch": 0.36125247994479426, "grad_norm": 0.609375, "learning_rate": 1.849187208927407e-05, "loss": 1.4574, "step": 2094 }, { "epoch": 0.3614249978435263, "grad_norm": 0.6796875, "learning_rate": 1.849043437243839e-05, "loss": 1.5345, "step": 2095 }, { "epoch": 0.36159751574225824, "grad_norm": 0.58984375, "learning_rate": 1.8488996026578286e-05, "loss": 1.4606, "step": 2096 }, { "epoch": 0.36177003364099025, "grad_norm": 0.66796875, "learning_rate": 1.848755705180031e-05, "loss": 1.4965, "step": 2097 }, { "epoch": 0.36194255153972227, "grad_norm": 0.65625, "learning_rate": 1.8486117448211078e-05, "loss": 1.5199, "step": 2098 }, { "epoch": 0.36211506943845423, "grad_norm": 0.703125, "learning_rate": 1.8484677215917243e-05, "loss": 1.4694, "step": 2099 }, { "epoch": 0.36228758733718625, "grad_norm": 0.65234375, "learning_rate": 1.84832363550255e-05, "loss": 1.6452, "step": 2100 }, { "epoch": 0.36228758733718625, "eval_loss": 1.473473072052002, "eval_runtime": 10.9071, "eval_samples_per_second": 93.884, "eval_steps_per_second": 23.471, "step": 2100 }, { "epoch": 0.3624601052359182, "grad_norm": 0.7109375, "learning_rate": 1.8481794865642607e-05, "loss": 1.6222, "step": 2101 }, { "epoch": 0.3626326231346502, "grad_norm": 0.859375, "learning_rate": 1.848035274787535e-05, "loss": 1.4544, "step": 2102 }, { "epoch": 0.36280514103338224, "grad_norm": 0.6328125, "learning_rate": 1.8478910001830578e-05, "loss": 1.4132, "step": 2103 }, { "epoch": 0.3629776589321142, "grad_norm": 0.796875, "learning_rate": 1.8477466627615172e-05, "loss": 1.468, "step": 2104 }, { "epoch": 0.3631501768308462, "grad_norm": 0.59765625, "learning_rate": 1.8476022625336067e-05, "loss": 1.3662, "step": 2105 }, { "epoch": 0.3633226947295782, "grad_norm": 0.61328125, "learning_rate": 1.8474577995100247e-05, "loss": 1.5097, "step": 2106 }, { "epoch": 0.3634952126283102, "grad_norm": 0.62890625, "learning_rate": 1.847313273701474e-05, "loss": 1.3676, "step": 2107 }, { "epoch": 0.36366773052704215, "grad_norm": 0.73828125, "learning_rate": 1.847168685118661e-05, "loss": 1.5271, "step": 2108 }, { "epoch": 0.36384024842577417, "grad_norm": 0.6484375, "learning_rate": 1.847024033772299e-05, "loss": 1.4227, "step": 2109 }, { "epoch": 0.3640127663245062, "grad_norm": 0.68359375, "learning_rate": 1.846879319673104e-05, "loss": 1.3847, "step": 2110 }, { "epoch": 0.36418528422323815, "grad_norm": 0.640625, "learning_rate": 1.8467345428317976e-05, "loss": 1.4954, "step": 2111 }, { "epoch": 0.36435780212197016, "grad_norm": 0.67578125, "learning_rate": 1.8465897032591057e-05, "loss": 1.5108, "step": 2112 }, { "epoch": 0.3645303200207021, "grad_norm": 0.66796875, "learning_rate": 1.8464448009657582e-05, "loss": 1.4386, "step": 2113 }, { "epoch": 0.36470283791943414, "grad_norm": 0.74609375, "learning_rate": 1.8462998359624914e-05, "loss": 1.4996, "step": 2114 }, { "epoch": 0.36487535581816616, "grad_norm": 0.73046875, "learning_rate": 1.846154808260045e-05, "loss": 1.5164, "step": 2115 }, { "epoch": 0.3650478737168981, "grad_norm": 0.6953125, "learning_rate": 1.8460097178691634e-05, "loss": 1.5406, "step": 2116 }, { "epoch": 0.36522039161563014, "grad_norm": 0.640625, "learning_rate": 1.8458645648005957e-05, "loss": 1.4248, "step": 2117 }, { "epoch": 0.3653929095143621, "grad_norm": 0.77734375, "learning_rate": 1.8457193490650957e-05, "loss": 1.4584, "step": 2118 }, { "epoch": 0.3655654274130941, "grad_norm": 0.66015625, "learning_rate": 1.845574070673422e-05, "loss": 1.4591, "step": 2119 }, { "epoch": 0.36573794531182613, "grad_norm": 0.875, "learning_rate": 1.8454287296363382e-05, "loss": 1.3891, "step": 2120 }, { "epoch": 0.3659104632105581, "grad_norm": 0.671875, "learning_rate": 1.8452833259646113e-05, "loss": 1.4364, "step": 2121 }, { "epoch": 0.3660829811092901, "grad_norm": 0.72265625, "learning_rate": 1.8451378596690138e-05, "loss": 1.3792, "step": 2122 }, { "epoch": 0.36625549900802207, "grad_norm": 1.046875, "learning_rate": 1.8449923307603234e-05, "loss": 1.6282, "step": 2123 }, { "epoch": 0.3664280169067541, "grad_norm": 0.671875, "learning_rate": 1.844846739249321e-05, "loss": 1.5017, "step": 2124 }, { "epoch": 0.36660053480548604, "grad_norm": 0.7109375, "learning_rate": 1.8447010851467936e-05, "loss": 1.52, "step": 2125 }, { "epoch": 0.36677305270421806, "grad_norm": 1.0546875, "learning_rate": 1.844555368463532e-05, "loss": 1.5371, "step": 2126 }, { "epoch": 0.3669455706029501, "grad_norm": 0.6015625, "learning_rate": 1.8444095892103314e-05, "loss": 1.4878, "step": 2127 }, { "epoch": 0.36711808850168204, "grad_norm": 0.73046875, "learning_rate": 1.844263747397992e-05, "loss": 1.4632, "step": 2128 }, { "epoch": 0.36729060640041405, "grad_norm": 0.77734375, "learning_rate": 1.8441178430373197e-05, "loss": 1.4249, "step": 2129 }, { "epoch": 0.367463124299146, "grad_norm": 0.67578125, "learning_rate": 1.843971876139123e-05, "loss": 1.4135, "step": 2130 }, { "epoch": 0.36763564219787803, "grad_norm": 0.66015625, "learning_rate": 1.843825846714216e-05, "loss": 1.3984, "step": 2131 }, { "epoch": 0.36780816009661005, "grad_norm": 0.890625, "learning_rate": 1.8436797547734185e-05, "loss": 1.4832, "step": 2132 }, { "epoch": 0.367980677995342, "grad_norm": 0.68359375, "learning_rate": 1.8435336003275525e-05, "loss": 1.476, "step": 2133 }, { "epoch": 0.368153195894074, "grad_norm": 0.59765625, "learning_rate": 1.8433873833874473e-05, "loss": 1.4526, "step": 2134 }, { "epoch": 0.368325713792806, "grad_norm": 0.78125, "learning_rate": 1.843241103963935e-05, "loss": 1.4606, "step": 2135 }, { "epoch": 0.368498231691538, "grad_norm": 0.9140625, "learning_rate": 1.8430947620678522e-05, "loss": 1.463, "step": 2136 }, { "epoch": 0.36867074959026996, "grad_norm": 0.68359375, "learning_rate": 1.8429483577100424e-05, "loss": 1.5067, "step": 2137 }, { "epoch": 0.368843267489002, "grad_norm": 0.91796875, "learning_rate": 1.842801890901351e-05, "loss": 1.4845, "step": 2138 }, { "epoch": 0.369015785387734, "grad_norm": 0.9375, "learning_rate": 1.842655361652629e-05, "loss": 1.5341, "step": 2139 }, { "epoch": 0.36918830328646596, "grad_norm": 0.69921875, "learning_rate": 1.842508769974733e-05, "loss": 1.5092, "step": 2140 }, { "epoch": 0.369360821185198, "grad_norm": 1.03125, "learning_rate": 1.8423621158785232e-05, "loss": 1.4552, "step": 2141 }, { "epoch": 0.36953333908392993, "grad_norm": 0.98046875, "learning_rate": 1.8422153993748645e-05, "loss": 1.5087, "step": 2142 }, { "epoch": 0.36970585698266195, "grad_norm": 0.6328125, "learning_rate": 1.8420686204746264e-05, "loss": 1.6148, "step": 2143 }, { "epoch": 0.36987837488139397, "grad_norm": 0.703125, "learning_rate": 1.8419217791886838e-05, "loss": 1.3866, "step": 2144 }, { "epoch": 0.37005089278012593, "grad_norm": 0.8515625, "learning_rate": 1.8417748755279146e-05, "loss": 1.5025, "step": 2145 }, { "epoch": 0.37022341067885794, "grad_norm": 0.63671875, "learning_rate": 1.8416279095032036e-05, "loss": 1.4978, "step": 2146 }, { "epoch": 0.3703959285775899, "grad_norm": 0.703125, "learning_rate": 1.8414808811254378e-05, "loss": 1.4778, "step": 2147 }, { "epoch": 0.3705684464763219, "grad_norm": 0.81640625, "learning_rate": 1.841333790405511e-05, "loss": 1.4975, "step": 2148 }, { "epoch": 0.37074096437505394, "grad_norm": 0.90234375, "learning_rate": 1.8411866373543196e-05, "loss": 1.5895, "step": 2149 }, { "epoch": 0.3709134822737859, "grad_norm": 0.6015625, "learning_rate": 1.8410394219827665e-05, "loss": 1.4017, "step": 2150 }, { "epoch": 0.3710860001725179, "grad_norm": 0.7890625, "learning_rate": 1.840892144301758e-05, "loss": 1.5247, "step": 2151 }, { "epoch": 0.3712585180712499, "grad_norm": 0.64453125, "learning_rate": 1.8407448043222052e-05, "loss": 1.5868, "step": 2152 }, { "epoch": 0.3714310359699819, "grad_norm": 0.796875, "learning_rate": 1.8405974020550238e-05, "loss": 1.4678, "step": 2153 }, { "epoch": 0.37160355386871385, "grad_norm": 0.63671875, "learning_rate": 1.840449937511135e-05, "loss": 1.4668, "step": 2154 }, { "epoch": 0.37177607176744587, "grad_norm": 0.60546875, "learning_rate": 1.8403024107014633e-05, "loss": 1.4756, "step": 2155 }, { "epoch": 0.3719485896661779, "grad_norm": 0.60546875, "learning_rate": 1.8401548216369387e-05, "loss": 1.4489, "step": 2156 }, { "epoch": 0.37212110756490985, "grad_norm": 0.9296875, "learning_rate": 1.8400071703284952e-05, "loss": 1.5294, "step": 2157 }, { "epoch": 0.37229362546364186, "grad_norm": 0.58203125, "learning_rate": 1.839859456787072e-05, "loss": 1.4748, "step": 2158 }, { "epoch": 0.3724661433623738, "grad_norm": 0.63671875, "learning_rate": 1.8397116810236123e-05, "loss": 1.5687, "step": 2159 }, { "epoch": 0.37263866126110584, "grad_norm": 0.60546875, "learning_rate": 1.839563843049065e-05, "loss": 1.4819, "step": 2160 }, { "epoch": 0.37281117915983786, "grad_norm": 0.67578125, "learning_rate": 1.8394159428743818e-05, "loss": 1.507, "step": 2161 }, { "epoch": 0.3729836970585698, "grad_norm": 0.69140625, "learning_rate": 1.839267980510521e-05, "loss": 1.4811, "step": 2162 }, { "epoch": 0.37315621495730183, "grad_norm": 0.58984375, "learning_rate": 1.839119955968444e-05, "loss": 1.4537, "step": 2163 }, { "epoch": 0.3733287328560338, "grad_norm": 0.7734375, "learning_rate": 1.8389718692591177e-05, "loss": 1.5256, "step": 2164 }, { "epoch": 0.3735012507547658, "grad_norm": 0.59765625, "learning_rate": 1.8388237203935134e-05, "loss": 1.3834, "step": 2165 }, { "epoch": 0.3736737686534978, "grad_norm": 0.640625, "learning_rate": 1.838675509382606e-05, "loss": 1.5364, "step": 2166 }, { "epoch": 0.3738462865522298, "grad_norm": 0.65234375, "learning_rate": 1.8385272362373775e-05, "loss": 1.5554, "step": 2167 }, { "epoch": 0.3740188044509618, "grad_norm": 0.5625, "learning_rate": 1.8383789009688117e-05, "loss": 1.4733, "step": 2168 }, { "epoch": 0.37419132234969377, "grad_norm": 0.6953125, "learning_rate": 1.8382305035878983e-05, "loss": 1.4997, "step": 2169 }, { "epoch": 0.3743638402484258, "grad_norm": 0.84375, "learning_rate": 1.8380820441056317e-05, "loss": 1.4882, "step": 2170 }, { "epoch": 0.37453635814715774, "grad_norm": 0.66015625, "learning_rate": 1.8379335225330108e-05, "loss": 1.5061, "step": 2171 }, { "epoch": 0.37470887604588976, "grad_norm": 0.6328125, "learning_rate": 1.8377849388810386e-05, "loss": 1.5237, "step": 2172 }, { "epoch": 0.3748813939446218, "grad_norm": 0.76953125, "learning_rate": 1.8376362931607237e-05, "loss": 1.485, "step": 2173 }, { "epoch": 0.37505391184335374, "grad_norm": 0.7734375, "learning_rate": 1.8374875853830784e-05, "loss": 1.437, "step": 2174 }, { "epoch": 0.37522642974208575, "grad_norm": 0.921875, "learning_rate": 1.8373388155591197e-05, "loss": 1.4549, "step": 2175 }, { "epoch": 0.3753989476408177, "grad_norm": 0.68359375, "learning_rate": 1.8371899836998697e-05, "loss": 1.464, "step": 2176 }, { "epoch": 0.37557146553954973, "grad_norm": 0.84375, "learning_rate": 1.8370410898163553e-05, "loss": 1.508, "step": 2177 }, { "epoch": 0.37574398343828175, "grad_norm": 0.62890625, "learning_rate": 1.836892133919606e-05, "loss": 1.5496, "step": 2178 }, { "epoch": 0.3759165013370137, "grad_norm": 0.63671875, "learning_rate": 1.8367431160206586e-05, "loss": 1.4058, "step": 2179 }, { "epoch": 0.3760890192357457, "grad_norm": 0.60546875, "learning_rate": 1.8365940361305528e-05, "loss": 1.4875, "step": 2180 }, { "epoch": 0.3762615371344777, "grad_norm": 0.6640625, "learning_rate": 1.836444894260334e-05, "loss": 1.5064, "step": 2181 }, { "epoch": 0.3764340550332097, "grad_norm": 0.6015625, "learning_rate": 1.8362956904210507e-05, "loss": 1.4744, "step": 2182 }, { "epoch": 0.37660657293194166, "grad_norm": 0.65625, "learning_rate": 1.8361464246237575e-05, "loss": 1.483, "step": 2183 }, { "epoch": 0.3767790908306737, "grad_norm": 0.6484375, "learning_rate": 1.835997096879512e-05, "loss": 1.4102, "step": 2184 }, { "epoch": 0.3769516087294057, "grad_norm": 0.71484375, "learning_rate": 1.8358477071993787e-05, "loss": 1.5903, "step": 2185 }, { "epoch": 0.37712412662813766, "grad_norm": 0.765625, "learning_rate": 1.8356982555944245e-05, "loss": 1.4794, "step": 2186 }, { "epoch": 0.3772966445268697, "grad_norm": 0.68359375, "learning_rate": 1.8355487420757218e-05, "loss": 1.4062, "step": 2187 }, { "epoch": 0.37746916242560163, "grad_norm": 0.703125, "learning_rate": 1.8353991666543477e-05, "loss": 1.4301, "step": 2188 }, { "epoch": 0.37764168032433365, "grad_norm": 0.74609375, "learning_rate": 1.8352495293413833e-05, "loss": 1.4539, "step": 2189 }, { "epoch": 0.37781419822306567, "grad_norm": 0.7578125, "learning_rate": 1.8350998301479147e-05, "loss": 1.5107, "step": 2190 }, { "epoch": 0.37798671612179763, "grad_norm": 0.7890625, "learning_rate": 1.834950069085033e-05, "loss": 1.4468, "step": 2191 }, { "epoch": 0.37815923402052964, "grad_norm": 0.58984375, "learning_rate": 1.8348002461638333e-05, "loss": 1.4679, "step": 2192 }, { "epoch": 0.3783317519192616, "grad_norm": 0.59765625, "learning_rate": 1.834650361395415e-05, "loss": 1.4242, "step": 2193 }, { "epoch": 0.3785042698179936, "grad_norm": 0.66796875, "learning_rate": 1.8345004147908828e-05, "loss": 1.4978, "step": 2194 }, { "epoch": 0.37867678771672564, "grad_norm": 0.734375, "learning_rate": 1.834350406361346e-05, "loss": 1.4935, "step": 2195 }, { "epoch": 0.3788493056154576, "grad_norm": 0.765625, "learning_rate": 1.834200336117918e-05, "loss": 1.5364, "step": 2196 }, { "epoch": 0.3790218235141896, "grad_norm": 0.60546875, "learning_rate": 1.8340502040717162e-05, "loss": 1.5463, "step": 2197 }, { "epoch": 0.3791943414129216, "grad_norm": 0.63671875, "learning_rate": 1.833900010233864e-05, "loss": 1.4713, "step": 2198 }, { "epoch": 0.3793668593116536, "grad_norm": 0.5859375, "learning_rate": 1.8337497546154888e-05, "loss": 1.548, "step": 2199 }, { "epoch": 0.37953937721038555, "grad_norm": 0.7890625, "learning_rate": 1.833599437227722e-05, "loss": 1.4916, "step": 2200 }, { "epoch": 0.37953937721038555, "eval_loss": 1.4695838689804077, "eval_runtime": 10.8414, "eval_samples_per_second": 94.453, "eval_steps_per_second": 23.613, "step": 2200 }, { "epoch": 0.37971189510911757, "grad_norm": 0.67578125, "learning_rate": 1.8334490580817005e-05, "loss": 1.5291, "step": 2201 }, { "epoch": 0.3798844130078496, "grad_norm": 0.74609375, "learning_rate": 1.8332986171885652e-05, "loss": 1.5046, "step": 2202 }, { "epoch": 0.38005693090658155, "grad_norm": 0.6484375, "learning_rate": 1.8331481145594617e-05, "loss": 1.4797, "step": 2203 }, { "epoch": 0.38022944880531356, "grad_norm": 0.7109375, "learning_rate": 1.83299755020554e-05, "loss": 1.5314, "step": 2204 }, { "epoch": 0.3804019667040455, "grad_norm": 0.83984375, "learning_rate": 1.8328469241379546e-05, "loss": 1.5771, "step": 2205 }, { "epoch": 0.38057448460277754, "grad_norm": 0.6875, "learning_rate": 1.8326962363678656e-05, "loss": 1.474, "step": 2206 }, { "epoch": 0.38074700250150956, "grad_norm": 0.87890625, "learning_rate": 1.8325454869064366e-05, "loss": 1.4271, "step": 2207 }, { "epoch": 0.3809195204002415, "grad_norm": 0.703125, "learning_rate": 1.8323946757648357e-05, "loss": 1.5268, "step": 2208 }, { "epoch": 0.38109203829897353, "grad_norm": 1.0234375, "learning_rate": 1.8322438029542364e-05, "loss": 1.5208, "step": 2209 }, { "epoch": 0.3812645561977055, "grad_norm": 0.9296875, "learning_rate": 1.8320928684858162e-05, "loss": 1.4641, "step": 2210 }, { "epoch": 0.3814370740964375, "grad_norm": 0.77734375, "learning_rate": 1.8319418723707568e-05, "loss": 1.4852, "step": 2211 }, { "epoch": 0.3816095919951695, "grad_norm": 1.03125, "learning_rate": 1.8317908146202455e-05, "loss": 1.4708, "step": 2212 }, { "epoch": 0.3817821098939015, "grad_norm": 0.890625, "learning_rate": 1.831639695245473e-05, "loss": 1.5509, "step": 2213 }, { "epoch": 0.3819546277926335, "grad_norm": 0.6484375, "learning_rate": 1.831488514257636e-05, "loss": 1.5525, "step": 2214 }, { "epoch": 0.38212714569136547, "grad_norm": 0.828125, "learning_rate": 1.8313372716679344e-05, "loss": 1.4788, "step": 2215 }, { "epoch": 0.3822996635900975, "grad_norm": 0.86328125, "learning_rate": 1.8311859674875728e-05, "loss": 1.5408, "step": 2216 }, { "epoch": 0.38247218148882944, "grad_norm": 0.8828125, "learning_rate": 1.8310346017277618e-05, "loss": 1.4896, "step": 2217 }, { "epoch": 0.38264469938756146, "grad_norm": 0.78515625, "learning_rate": 1.8308831743997147e-05, "loss": 1.4985, "step": 2218 }, { "epoch": 0.3828172172862935, "grad_norm": 0.7734375, "learning_rate": 1.8307316855146507e-05, "loss": 1.5242, "step": 2219 }, { "epoch": 0.38298973518502544, "grad_norm": 0.7578125, "learning_rate": 1.8305801350837926e-05, "loss": 1.5283, "step": 2220 }, { "epoch": 0.38316225308375745, "grad_norm": 0.63671875, "learning_rate": 1.8304285231183683e-05, "loss": 1.4785, "step": 2221 }, { "epoch": 0.3833347709824894, "grad_norm": 0.77734375, "learning_rate": 1.8302768496296105e-05, "loss": 1.4535, "step": 2222 }, { "epoch": 0.38350728888122143, "grad_norm": 0.9296875, "learning_rate": 1.8301251146287557e-05, "loss": 1.5102, "step": 2223 }, { "epoch": 0.38367980677995345, "grad_norm": 0.62109375, "learning_rate": 1.8299733181270455e-05, "loss": 1.4878, "step": 2224 }, { "epoch": 0.3838523246786854, "grad_norm": 0.58984375, "learning_rate": 1.829821460135726e-05, "loss": 1.5581, "step": 2225 }, { "epoch": 0.3840248425774174, "grad_norm": 0.7265625, "learning_rate": 1.8296695406660477e-05, "loss": 1.4887, "step": 2226 }, { "epoch": 0.3841973604761494, "grad_norm": 0.6953125, "learning_rate": 1.829517559729266e-05, "loss": 1.3807, "step": 2227 }, { "epoch": 0.3843698783748814, "grad_norm": 0.77734375, "learning_rate": 1.8293655173366405e-05, "loss": 1.355, "step": 2228 }, { "epoch": 0.38454239627361336, "grad_norm": 0.61328125, "learning_rate": 1.829213413499435e-05, "loss": 1.516, "step": 2229 }, { "epoch": 0.3847149141723454, "grad_norm": 0.65625, "learning_rate": 1.829061248228919e-05, "loss": 1.4854, "step": 2230 }, { "epoch": 0.3848874320710774, "grad_norm": 0.640625, "learning_rate": 1.8289090215363653e-05, "loss": 1.4833, "step": 2231 }, { "epoch": 0.38505994996980936, "grad_norm": 0.72265625, "learning_rate": 1.8287567334330522e-05, "loss": 1.6318, "step": 2232 }, { "epoch": 0.3852324678685414, "grad_norm": 0.8046875, "learning_rate": 1.8286043839302618e-05, "loss": 1.5281, "step": 2233 }, { "epoch": 0.38540498576727333, "grad_norm": 0.64453125, "learning_rate": 1.8284519730392813e-05, "loss": 1.5238, "step": 2234 }, { "epoch": 0.38557750366600535, "grad_norm": 0.75390625, "learning_rate": 1.828299500771402e-05, "loss": 1.4661, "step": 2235 }, { "epoch": 0.38575002156473737, "grad_norm": 0.66796875, "learning_rate": 1.8281469671379208e-05, "loss": 1.4864, "step": 2236 }, { "epoch": 0.3859225394634693, "grad_norm": 0.73828125, "learning_rate": 1.8279943721501376e-05, "loss": 1.5738, "step": 2237 }, { "epoch": 0.38609505736220134, "grad_norm": 0.71875, "learning_rate": 1.8278417158193575e-05, "loss": 1.5128, "step": 2238 }, { "epoch": 0.3862675752609333, "grad_norm": 0.73046875, "learning_rate": 1.827688998156891e-05, "loss": 1.4928, "step": 2239 }, { "epoch": 0.3864400931596653, "grad_norm": 0.59375, "learning_rate": 1.8275362191740514e-05, "loss": 1.4992, "step": 2240 }, { "epoch": 0.3866126110583973, "grad_norm": 1.0390625, "learning_rate": 1.827383378882158e-05, "loss": 1.5278, "step": 2241 }, { "epoch": 0.3867851289571293, "grad_norm": 0.59375, "learning_rate": 1.8272304772925342e-05, "loss": 1.494, "step": 2242 }, { "epoch": 0.3869576468558613, "grad_norm": 0.7578125, "learning_rate": 1.8270775144165082e-05, "loss": 1.3458, "step": 2243 }, { "epoch": 0.3871301647545933, "grad_norm": 0.6875, "learning_rate": 1.8269244902654116e-05, "loss": 1.4868, "step": 2244 }, { "epoch": 0.3873026826533253, "grad_norm": 0.6171875, "learning_rate": 1.8267714048505822e-05, "loss": 1.5205, "step": 2245 }, { "epoch": 0.38747520055205725, "grad_norm": 0.73828125, "learning_rate": 1.8266182581833615e-05, "loss": 1.4998, "step": 2246 }, { "epoch": 0.38764771845078927, "grad_norm": 0.609375, "learning_rate": 1.8264650502750946e-05, "loss": 1.5161, "step": 2247 }, { "epoch": 0.3878202363495213, "grad_norm": 0.65625, "learning_rate": 1.8263117811371333e-05, "loss": 1.4975, "step": 2248 }, { "epoch": 0.38799275424825325, "grad_norm": 0.80859375, "learning_rate": 1.8261584507808318e-05, "loss": 1.4637, "step": 2249 }, { "epoch": 0.38816527214698526, "grad_norm": 0.64453125, "learning_rate": 1.8260050592175507e-05, "loss": 1.4393, "step": 2250 }, { "epoch": 0.3883377900457172, "grad_norm": 0.5859375, "learning_rate": 1.8258516064586532e-05, "loss": 1.4482, "step": 2251 }, { "epoch": 0.38851030794444924, "grad_norm": 0.8359375, "learning_rate": 1.825698092515509e-05, "loss": 1.5549, "step": 2252 }, { "epoch": 0.38868282584318126, "grad_norm": 0.6640625, "learning_rate": 1.8255445173994907e-05, "loss": 1.4593, "step": 2253 }, { "epoch": 0.3888553437419132, "grad_norm": 0.828125, "learning_rate": 1.8253908811219764e-05, "loss": 1.5047, "step": 2254 }, { "epoch": 0.38902786164064523, "grad_norm": 0.69921875, "learning_rate": 1.8252371836943483e-05, "loss": 1.5405, "step": 2255 }, { "epoch": 0.3892003795393772, "grad_norm": 0.9296875, "learning_rate": 1.825083425127993e-05, "loss": 1.4635, "step": 2256 }, { "epoch": 0.3893728974381092, "grad_norm": 0.66796875, "learning_rate": 1.8249296054343026e-05, "loss": 1.5867, "step": 2257 }, { "epoch": 0.38954541533684117, "grad_norm": 0.87109375, "learning_rate": 1.8247757246246726e-05, "loss": 1.4289, "step": 2258 }, { "epoch": 0.3897179332355732, "grad_norm": 0.65625, "learning_rate": 1.824621782710503e-05, "loss": 1.4538, "step": 2259 }, { "epoch": 0.3898904511343052, "grad_norm": 0.72265625, "learning_rate": 1.8244677797032e-05, "loss": 1.4613, "step": 2260 }, { "epoch": 0.39006296903303717, "grad_norm": 0.76171875, "learning_rate": 1.824313715614172e-05, "loss": 1.5135, "step": 2261 }, { "epoch": 0.3902354869317692, "grad_norm": 0.62890625, "learning_rate": 1.8241595904548336e-05, "loss": 1.4293, "step": 2262 }, { "epoch": 0.39040800483050114, "grad_norm": 0.703125, "learning_rate": 1.8240054042366026e-05, "loss": 1.4713, "step": 2263 }, { "epoch": 0.39058052272923316, "grad_norm": 0.73046875, "learning_rate": 1.8238511569709033e-05, "loss": 1.5722, "step": 2264 }, { "epoch": 0.3907530406279652, "grad_norm": 0.625, "learning_rate": 1.823696848669162e-05, "loss": 1.5584, "step": 2265 }, { "epoch": 0.39092555852669714, "grad_norm": 0.84375, "learning_rate": 1.823542479342812e-05, "loss": 1.5078, "step": 2266 }, { "epoch": 0.39109807642542915, "grad_norm": 0.7421875, "learning_rate": 1.823388049003289e-05, "loss": 1.5375, "step": 2267 }, { "epoch": 0.3912705943241611, "grad_norm": 0.62109375, "learning_rate": 1.823233557662035e-05, "loss": 1.5285, "step": 2268 }, { "epoch": 0.39144311222289313, "grad_norm": 0.9609375, "learning_rate": 1.823079005330495e-05, "loss": 1.4881, "step": 2269 }, { "epoch": 0.3916156301216251, "grad_norm": 0.7109375, "learning_rate": 1.8229243920201194e-05, "loss": 1.5836, "step": 2270 }, { "epoch": 0.3917881480203571, "grad_norm": 0.625, "learning_rate": 1.822769717742363e-05, "loss": 1.498, "step": 2271 }, { "epoch": 0.3919606659190891, "grad_norm": 0.75390625, "learning_rate": 1.822614982508685e-05, "loss": 1.5081, "step": 2272 }, { "epoch": 0.3921331838178211, "grad_norm": 0.66796875, "learning_rate": 1.8224601863305495e-05, "loss": 1.4186, "step": 2273 }, { "epoch": 0.3923057017165531, "grad_norm": 0.62109375, "learning_rate": 1.822305329219424e-05, "loss": 1.4982, "step": 2274 }, { "epoch": 0.39247821961528506, "grad_norm": 0.83984375, "learning_rate": 1.8221504111867817e-05, "loss": 1.4796, "step": 2275 }, { "epoch": 0.3926507375140171, "grad_norm": 0.76171875, "learning_rate": 1.8219954322441e-05, "loss": 1.5749, "step": 2276 }, { "epoch": 0.3928232554127491, "grad_norm": 0.859375, "learning_rate": 1.8218403924028608e-05, "loss": 1.4309, "step": 2277 }, { "epoch": 0.39299577331148106, "grad_norm": 0.85546875, "learning_rate": 1.82168529167455e-05, "loss": 1.5298, "step": 2278 }, { "epoch": 0.3931682912102131, "grad_norm": 0.65234375, "learning_rate": 1.8215301300706584e-05, "loss": 1.4536, "step": 2279 }, { "epoch": 0.39334080910894503, "grad_norm": 0.71875, "learning_rate": 1.821374907602682e-05, "loss": 1.4692, "step": 2280 }, { "epoch": 0.39351332700767705, "grad_norm": 0.890625, "learning_rate": 1.8212196242821206e-05, "loss": 1.4213, "step": 2281 }, { "epoch": 0.39368584490640907, "grad_norm": 0.59765625, "learning_rate": 1.8210642801204775e-05, "loss": 1.4134, "step": 2282 }, { "epoch": 0.393858362805141, "grad_norm": 0.66015625, "learning_rate": 1.820908875129263e-05, "loss": 1.4815, "step": 2283 }, { "epoch": 0.39403088070387304, "grad_norm": 0.65234375, "learning_rate": 1.820753409319989e-05, "loss": 1.4424, "step": 2284 }, { "epoch": 0.394203398602605, "grad_norm": 0.6484375, "learning_rate": 1.8205978827041745e-05, "loss": 1.4839, "step": 2285 }, { "epoch": 0.394375916501337, "grad_norm": 1.171875, "learning_rate": 1.8204422952933416e-05, "loss": 1.4836, "step": 2286 }, { "epoch": 0.394548434400069, "grad_norm": 0.74609375, "learning_rate": 1.8202866470990172e-05, "loss": 1.5224, "step": 2287 }, { "epoch": 0.394720952298801, "grad_norm": 0.6484375, "learning_rate": 1.8201309381327324e-05, "loss": 1.3705, "step": 2288 }, { "epoch": 0.394893470197533, "grad_norm": 0.67578125, "learning_rate": 1.819975168406023e-05, "loss": 1.4925, "step": 2289 }, { "epoch": 0.395065988096265, "grad_norm": 0.77734375, "learning_rate": 1.81981933793043e-05, "loss": 1.5281, "step": 2290 }, { "epoch": 0.395238505994997, "grad_norm": 0.85546875, "learning_rate": 1.8196634467174982e-05, "loss": 1.5847, "step": 2291 }, { "epoch": 0.39541102389372895, "grad_norm": 0.859375, "learning_rate": 1.8195074947787764e-05, "loss": 1.5413, "step": 2292 }, { "epoch": 0.39558354179246097, "grad_norm": 0.734375, "learning_rate": 1.8193514821258188e-05, "loss": 1.306, "step": 2293 }, { "epoch": 0.395756059691193, "grad_norm": 0.5859375, "learning_rate": 1.819195408770184e-05, "loss": 1.4656, "step": 2294 }, { "epoch": 0.39592857758992495, "grad_norm": 0.65234375, "learning_rate": 1.819039274723435e-05, "loss": 1.4589, "step": 2295 }, { "epoch": 0.39610109548865696, "grad_norm": 0.69921875, "learning_rate": 1.818883079997139e-05, "loss": 1.5112, "step": 2296 }, { "epoch": 0.3962736133873889, "grad_norm": 0.6640625, "learning_rate": 1.8187268246028672e-05, "loss": 1.4949, "step": 2297 }, { "epoch": 0.39644613128612094, "grad_norm": 0.59375, "learning_rate": 1.818570508552197e-05, "loss": 1.4868, "step": 2298 }, { "epoch": 0.39661864918485296, "grad_norm": 0.57421875, "learning_rate": 1.818414131856709e-05, "loss": 1.521, "step": 2299 }, { "epoch": 0.3967911670835849, "grad_norm": 0.6484375, "learning_rate": 1.818257694527988e-05, "loss": 1.3609, "step": 2300 }, { "epoch": 0.3967911670835849, "eval_loss": 1.4656120538711548, "eval_runtime": 10.8637, "eval_samples_per_second": 94.259, "eval_steps_per_second": 23.565, "step": 2300 }, { "epoch": 0.39696368498231693, "grad_norm": 0.6328125, "learning_rate": 1.8181011965776244e-05, "loss": 1.4911, "step": 2301 }, { "epoch": 0.3971362028810489, "grad_norm": 0.60546875, "learning_rate": 1.8179446380172127e-05, "loss": 1.6361, "step": 2302 }, { "epoch": 0.3973087207797809, "grad_norm": 0.625, "learning_rate": 1.8177880188583513e-05, "loss": 1.4886, "step": 2303 }, { "epoch": 0.39748123867851287, "grad_norm": 0.578125, "learning_rate": 1.8176313391126438e-05, "loss": 1.3953, "step": 2304 }, { "epoch": 0.3976537565772449, "grad_norm": 0.76953125, "learning_rate": 1.817474598791698e-05, "loss": 1.5849, "step": 2305 }, { "epoch": 0.3978262744759769, "grad_norm": 0.59375, "learning_rate": 1.8173177979071256e-05, "loss": 1.434, "step": 2306 }, { "epoch": 0.39799879237470887, "grad_norm": 0.640625, "learning_rate": 1.8171609364705443e-05, "loss": 1.6028, "step": 2307 }, { "epoch": 0.3981713102734409, "grad_norm": 0.7734375, "learning_rate": 1.8170040144935747e-05, "loss": 1.4931, "step": 2308 }, { "epoch": 0.39834382817217284, "grad_norm": 0.65625, "learning_rate": 1.816847031987843e-05, "loss": 1.4978, "step": 2309 }, { "epoch": 0.39851634607090486, "grad_norm": 0.79296875, "learning_rate": 1.8166899889649795e-05, "loss": 1.4645, "step": 2310 }, { "epoch": 0.3986888639696369, "grad_norm": 0.67578125, "learning_rate": 1.8165328854366183e-05, "loss": 1.5468, "step": 2311 }, { "epoch": 0.39886138186836884, "grad_norm": 0.66796875, "learning_rate": 1.8163757214143993e-05, "loss": 1.5296, "step": 2312 }, { "epoch": 0.39903389976710085, "grad_norm": 0.671875, "learning_rate": 1.8162184969099658e-05, "loss": 1.4244, "step": 2313 }, { "epoch": 0.3992064176658328, "grad_norm": 0.59765625, "learning_rate": 1.816061211934966e-05, "loss": 1.5313, "step": 2314 }, { "epoch": 0.39937893556456483, "grad_norm": 0.65625, "learning_rate": 1.8159038665010528e-05, "loss": 1.4613, "step": 2315 }, { "epoch": 0.3995514534632968, "grad_norm": 0.65625, "learning_rate": 1.8157464606198832e-05, "loss": 1.4243, "step": 2316 }, { "epoch": 0.3997239713620288, "grad_norm": 0.609375, "learning_rate": 1.8155889943031186e-05, "loss": 1.5109, "step": 2317 }, { "epoch": 0.3998964892607608, "grad_norm": 0.6171875, "learning_rate": 1.815431467562425e-05, "loss": 1.5602, "step": 2318 }, { "epoch": 0.4000690071594928, "grad_norm": 0.65625, "learning_rate": 1.8152738804094737e-05, "loss": 1.499, "step": 2319 }, { "epoch": 0.4002415250582248, "grad_norm": 0.62890625, "learning_rate": 1.815116232855939e-05, "loss": 1.4352, "step": 2320 }, { "epoch": 0.40041404295695676, "grad_norm": 0.67578125, "learning_rate": 1.8149585249135008e-05, "loss": 1.552, "step": 2321 }, { "epoch": 0.4005865608556888, "grad_norm": 0.64453125, "learning_rate": 1.814800756593843e-05, "loss": 1.5098, "step": 2322 }, { "epoch": 0.4007590787544208, "grad_norm": 0.95703125, "learning_rate": 1.814642927908654e-05, "loss": 1.5874, "step": 2323 }, { "epoch": 0.40093159665315276, "grad_norm": 0.81640625, "learning_rate": 1.814485038869627e-05, "loss": 1.5504, "step": 2324 }, { "epoch": 0.40110411455188477, "grad_norm": 0.83984375, "learning_rate": 1.814327089488459e-05, "loss": 1.5221, "step": 2325 }, { "epoch": 0.40127663245061673, "grad_norm": 0.62109375, "learning_rate": 1.814169079776852e-05, "loss": 1.512, "step": 2326 }, { "epoch": 0.40144915034934875, "grad_norm": 0.73046875, "learning_rate": 1.8140110097465123e-05, "loss": 1.5012, "step": 2327 }, { "epoch": 0.40162166824808077, "grad_norm": 0.828125, "learning_rate": 1.8138528794091514e-05, "loss": 1.5045, "step": 2328 }, { "epoch": 0.4017941861468127, "grad_norm": 1.1796875, "learning_rate": 1.813694688776483e-05, "loss": 1.4746, "step": 2329 }, { "epoch": 0.40196670404554474, "grad_norm": 0.7734375, "learning_rate": 1.8135364378602288e-05, "loss": 1.5166, "step": 2330 }, { "epoch": 0.4021392219442767, "grad_norm": 0.59765625, "learning_rate": 1.8133781266721114e-05, "loss": 1.4975, "step": 2331 }, { "epoch": 0.4023117398430087, "grad_norm": 0.6796875, "learning_rate": 1.8132197552238608e-05, "loss": 1.5039, "step": 2332 }, { "epoch": 0.4024842577417407, "grad_norm": 0.65234375, "learning_rate": 1.813061323527209e-05, "loss": 1.5281, "step": 2333 }, { "epoch": 0.4026567756404727, "grad_norm": 0.67578125, "learning_rate": 1.8129028315938944e-05, "loss": 1.5463, "step": 2334 }, { "epoch": 0.4028292935392047, "grad_norm": 0.59765625, "learning_rate": 1.8127442794356585e-05, "loss": 1.4572, "step": 2335 }, { "epoch": 0.4030018114379367, "grad_norm": 0.8359375, "learning_rate": 1.8125856670642485e-05, "loss": 1.5668, "step": 2336 }, { "epoch": 0.4031743293366687, "grad_norm": 0.703125, "learning_rate": 1.8124269944914147e-05, "loss": 1.4871, "step": 2337 }, { "epoch": 0.40334684723540065, "grad_norm": 1.046875, "learning_rate": 1.812268261728913e-05, "loss": 1.5133, "step": 2338 }, { "epoch": 0.40351936513413267, "grad_norm": 0.69140625, "learning_rate": 1.812109468788503e-05, "loss": 1.3911, "step": 2339 }, { "epoch": 0.4036918830328647, "grad_norm": 0.90234375, "learning_rate": 1.8119506156819495e-05, "loss": 1.5736, "step": 2340 }, { "epoch": 0.40386440093159665, "grad_norm": 0.8125, "learning_rate": 1.811791702421021e-05, "loss": 1.5105, "step": 2341 }, { "epoch": 0.40403691883032866, "grad_norm": 0.87890625, "learning_rate": 1.811632729017491e-05, "loss": 1.5973, "step": 2342 }, { "epoch": 0.4042094367290606, "grad_norm": 0.890625, "learning_rate": 1.8114736954831367e-05, "loss": 1.4065, "step": 2343 }, { "epoch": 0.40438195462779264, "grad_norm": 0.90234375, "learning_rate": 1.8113146018297413e-05, "loss": 1.4329, "step": 2344 }, { "epoch": 0.4045544725265246, "grad_norm": 0.6875, "learning_rate": 1.81115544806909e-05, "loss": 1.55, "step": 2345 }, { "epoch": 0.4047269904252566, "grad_norm": 0.75390625, "learning_rate": 1.8109962342129757e-05, "loss": 1.4663, "step": 2346 }, { "epoch": 0.40489950832398863, "grad_norm": 0.69921875, "learning_rate": 1.8108369602731928e-05, "loss": 1.5616, "step": 2347 }, { "epoch": 0.4050720262227206, "grad_norm": 0.88671875, "learning_rate": 1.810677626261541e-05, "loss": 1.4522, "step": 2348 }, { "epoch": 0.4052445441214526, "grad_norm": 0.66796875, "learning_rate": 1.810518232189826e-05, "loss": 1.5224, "step": 2349 }, { "epoch": 0.40541706202018457, "grad_norm": 0.640625, "learning_rate": 1.8103587780698556e-05, "loss": 1.428, "step": 2350 }, { "epoch": 0.4055895799189166, "grad_norm": 2.296875, "learning_rate": 1.8101992639134438e-05, "loss": 1.5132, "step": 2351 }, { "epoch": 0.4057620978176486, "grad_norm": 0.7421875, "learning_rate": 1.810039689732408e-05, "loss": 1.4774, "step": 2352 }, { "epoch": 0.40593461571638056, "grad_norm": 0.65234375, "learning_rate": 1.8098800555385707e-05, "loss": 1.5203, "step": 2353 }, { "epoch": 0.4061071336151126, "grad_norm": 0.640625, "learning_rate": 1.8097203613437586e-05, "loss": 1.4957, "step": 2354 }, { "epoch": 0.40627965151384454, "grad_norm": 0.625, "learning_rate": 1.8095606071598028e-05, "loss": 1.4267, "step": 2355 }, { "epoch": 0.40645216941257656, "grad_norm": 0.67578125, "learning_rate": 1.8094007929985387e-05, "loss": 1.5352, "step": 2356 }, { "epoch": 0.4066246873113086, "grad_norm": 0.55859375, "learning_rate": 1.8092409188718064e-05, "loss": 1.5219, "step": 2357 }, { "epoch": 0.40679720521004054, "grad_norm": 0.59375, "learning_rate": 1.8090809847914506e-05, "loss": 1.4196, "step": 2358 }, { "epoch": 0.40696972310877255, "grad_norm": 0.65625, "learning_rate": 1.80892099076932e-05, "loss": 1.5713, "step": 2359 }, { "epoch": 0.4071422410075045, "grad_norm": 0.6015625, "learning_rate": 1.808760936817268e-05, "loss": 1.4479, "step": 2360 }, { "epoch": 0.40731475890623653, "grad_norm": 0.609375, "learning_rate": 1.8086008229471527e-05, "loss": 1.5341, "step": 2361 }, { "epoch": 0.4074872768049685, "grad_norm": 0.62109375, "learning_rate": 1.8084406491708358e-05, "loss": 1.5566, "step": 2362 }, { "epoch": 0.4076597947037005, "grad_norm": 0.64453125, "learning_rate": 1.8082804155001842e-05, "loss": 1.4349, "step": 2363 }, { "epoch": 0.4078323126024325, "grad_norm": 0.78515625, "learning_rate": 1.8081201219470694e-05, "loss": 1.4505, "step": 2364 }, { "epoch": 0.4080048305011645, "grad_norm": 0.640625, "learning_rate": 1.807959768523366e-05, "loss": 1.4418, "step": 2365 }, { "epoch": 0.4081773483998965, "grad_norm": 0.66796875, "learning_rate": 1.807799355240955e-05, "loss": 1.5607, "step": 2366 }, { "epoch": 0.40834986629862846, "grad_norm": 0.87109375, "learning_rate": 1.8076388821117204e-05, "loss": 1.5266, "step": 2367 }, { "epoch": 0.4085223841973605, "grad_norm": 0.609375, "learning_rate": 1.807478349147551e-05, "loss": 1.5094, "step": 2368 }, { "epoch": 0.4086949020960925, "grad_norm": 0.71875, "learning_rate": 1.8073177563603403e-05, "loss": 1.4372, "step": 2369 }, { "epoch": 0.40886741999482445, "grad_norm": 0.69921875, "learning_rate": 1.8071571037619856e-05, "loss": 1.5019, "step": 2370 }, { "epoch": 0.40903993789355647, "grad_norm": 0.60546875, "learning_rate": 1.8069963913643893e-05, "loss": 1.5055, "step": 2371 }, { "epoch": 0.40921245579228843, "grad_norm": 0.62109375, "learning_rate": 1.806835619179458e-05, "loss": 1.4157, "step": 2372 }, { "epoch": 0.40938497369102045, "grad_norm": 0.69140625, "learning_rate": 1.8066747872191028e-05, "loss": 1.4324, "step": 2373 }, { "epoch": 0.4095574915897524, "grad_norm": 0.60546875, "learning_rate": 1.806513895495239e-05, "loss": 1.4795, "step": 2374 }, { "epoch": 0.4097300094884844, "grad_norm": 0.6015625, "learning_rate": 1.8063529440197866e-05, "loss": 1.504, "step": 2375 }, { "epoch": 0.40990252738721644, "grad_norm": 0.703125, "learning_rate": 1.8061919328046695e-05, "loss": 1.4982, "step": 2376 }, { "epoch": 0.4100750452859484, "grad_norm": 1.0625, "learning_rate": 1.806030861861817e-05, "loss": 1.4587, "step": 2377 }, { "epoch": 0.4102475631846804, "grad_norm": 0.59765625, "learning_rate": 1.8058697312031615e-05, "loss": 1.4981, "step": 2378 }, { "epoch": 0.4104200810834124, "grad_norm": 0.72265625, "learning_rate": 1.8057085408406415e-05, "loss": 1.4198, "step": 2379 }, { "epoch": 0.4105925989821444, "grad_norm": 0.78515625, "learning_rate": 1.805547290786198e-05, "loss": 1.4076, "step": 2380 }, { "epoch": 0.4107651168808764, "grad_norm": 0.640625, "learning_rate": 1.8053859810517785e-05, "loss": 1.4019, "step": 2381 }, { "epoch": 0.4109376347796084, "grad_norm": 0.7734375, "learning_rate": 1.805224611649333e-05, "loss": 1.4232, "step": 2382 }, { "epoch": 0.4111101526783404, "grad_norm": 0.77734375, "learning_rate": 1.805063182590817e-05, "loss": 1.4582, "step": 2383 }, { "epoch": 0.41128267057707235, "grad_norm": 0.60546875, "learning_rate": 1.8049016938881897e-05, "loss": 1.4875, "step": 2384 }, { "epoch": 0.41145518847580437, "grad_norm": 0.7265625, "learning_rate": 1.8047401455534162e-05, "loss": 1.4733, "step": 2385 }, { "epoch": 0.4116277063745364, "grad_norm": 0.703125, "learning_rate": 1.8045785375984642e-05, "loss": 1.5484, "step": 2386 }, { "epoch": 0.41180022427326834, "grad_norm": 0.6640625, "learning_rate": 1.8044168700353073e-05, "loss": 1.4825, "step": 2387 }, { "epoch": 0.41197274217200036, "grad_norm": 0.8984375, "learning_rate": 1.804255142875922e-05, "loss": 1.5638, "step": 2388 }, { "epoch": 0.4121452600707323, "grad_norm": 0.80078125, "learning_rate": 1.8040933561322905e-05, "loss": 1.5131, "step": 2389 }, { "epoch": 0.41231777796946434, "grad_norm": 0.85546875, "learning_rate": 1.8039315098163993e-05, "loss": 1.421, "step": 2390 }, { "epoch": 0.4124902958681963, "grad_norm": 1.0, "learning_rate": 1.8037696039402385e-05, "loss": 1.4433, "step": 2391 }, { "epoch": 0.4126628137669283, "grad_norm": 0.69140625, "learning_rate": 1.8036076385158034e-05, "loss": 1.4687, "step": 2392 }, { "epoch": 0.41283533166566033, "grad_norm": 0.8359375, "learning_rate": 1.803445613555093e-05, "loss": 1.5166, "step": 2393 }, { "epoch": 0.4130078495643923, "grad_norm": 0.58984375, "learning_rate": 1.8032835290701115e-05, "loss": 1.4204, "step": 2394 }, { "epoch": 0.4131803674631243, "grad_norm": 0.73046875, "learning_rate": 1.803121385072867e-05, "loss": 1.4271, "step": 2395 }, { "epoch": 0.41335288536185627, "grad_norm": 1.359375, "learning_rate": 1.802959181575372e-05, "loss": 1.4582, "step": 2396 }, { "epoch": 0.4135254032605883, "grad_norm": 0.76953125, "learning_rate": 1.802796918589644e-05, "loss": 1.5298, "step": 2397 }, { "epoch": 0.4136979211593203, "grad_norm": 0.578125, "learning_rate": 1.802634596127704e-05, "loss": 1.459, "step": 2398 }, { "epoch": 0.41387043905805226, "grad_norm": 0.625, "learning_rate": 1.8024722142015784e-05, "loss": 1.4324, "step": 2399 }, { "epoch": 0.4140429569567843, "grad_norm": 0.90234375, "learning_rate": 1.8023097728232967e-05, "loss": 1.437, "step": 2400 }, { "epoch": 0.4140429569567843, "eval_loss": 1.4613255262374878, "eval_runtime": 10.8914, "eval_samples_per_second": 94.019, "eval_steps_per_second": 23.505, "step": 2400 }, { "epoch": 0.41421547485551624, "grad_norm": 0.62109375, "learning_rate": 1.802147272004894e-05, "loss": 1.4324, "step": 2401 }, { "epoch": 0.41438799275424826, "grad_norm": 0.99609375, "learning_rate": 1.8019847117584092e-05, "loss": 1.441, "step": 2402 }, { "epoch": 0.4145605106529803, "grad_norm": 0.61328125, "learning_rate": 1.8018220920958864e-05, "loss": 1.4981, "step": 2403 }, { "epoch": 0.41473302855171224, "grad_norm": 0.71484375, "learning_rate": 1.8016594130293725e-05, "loss": 1.5251, "step": 2404 }, { "epoch": 0.41490554645044425, "grad_norm": 0.6640625, "learning_rate": 1.8014966745709202e-05, "loss": 1.5364, "step": 2405 }, { "epoch": 0.4150780643491762, "grad_norm": 0.75, "learning_rate": 1.8013338767325866e-05, "loss": 1.4724, "step": 2406 }, { "epoch": 0.41525058224790823, "grad_norm": 0.83203125, "learning_rate": 1.8011710195264323e-05, "loss": 1.5819, "step": 2407 }, { "epoch": 0.4154231001466402, "grad_norm": 0.65625, "learning_rate": 1.8010081029645232e-05, "loss": 1.4644, "step": 2408 }, { "epoch": 0.4155956180453722, "grad_norm": 0.67578125, "learning_rate": 1.8008451270589288e-05, "loss": 1.519, "step": 2409 }, { "epoch": 0.4157681359441042, "grad_norm": 0.58984375, "learning_rate": 1.8006820918217233e-05, "loss": 1.4889, "step": 2410 }, { "epoch": 0.4159406538428362, "grad_norm": 0.6640625, "learning_rate": 1.8005189972649856e-05, "loss": 1.4611, "step": 2411 }, { "epoch": 0.4161131717415682, "grad_norm": 0.66015625, "learning_rate": 1.800355843400799e-05, "loss": 1.5453, "step": 2412 }, { "epoch": 0.41628568964030016, "grad_norm": 0.82421875, "learning_rate": 1.8001926302412503e-05, "loss": 1.4353, "step": 2413 }, { "epoch": 0.4164582075390322, "grad_norm": 0.65234375, "learning_rate": 1.8000293577984318e-05, "loss": 1.4559, "step": 2414 }, { "epoch": 0.4166307254377642, "grad_norm": 0.66015625, "learning_rate": 1.7998660260844397e-05, "loss": 1.395, "step": 2415 }, { "epoch": 0.41680324333649615, "grad_norm": 0.73828125, "learning_rate": 1.7997026351113746e-05, "loss": 1.3569, "step": 2416 }, { "epoch": 0.41697576123522817, "grad_norm": 0.66796875, "learning_rate": 1.7995391848913414e-05, "loss": 1.5016, "step": 2417 }, { "epoch": 0.41714827913396013, "grad_norm": 0.67578125, "learning_rate": 1.7993756754364497e-05, "loss": 1.5362, "step": 2418 }, { "epoch": 0.41732079703269215, "grad_norm": 0.71875, "learning_rate": 1.7992121067588134e-05, "loss": 1.4501, "step": 2419 }, { "epoch": 0.4174933149314241, "grad_norm": 0.9140625, "learning_rate": 1.7990484788705506e-05, "loss": 1.4937, "step": 2420 }, { "epoch": 0.4176658328301561, "grad_norm": 0.6328125, "learning_rate": 1.7988847917837832e-05, "loss": 1.4663, "step": 2421 }, { "epoch": 0.41783835072888814, "grad_norm": 0.61328125, "learning_rate": 1.7987210455106395e-05, "loss": 1.5524, "step": 2422 }, { "epoch": 0.4180108686276201, "grad_norm": 0.76171875, "learning_rate": 1.7985572400632496e-05, "loss": 1.5553, "step": 2423 }, { "epoch": 0.4181833865263521, "grad_norm": 0.71484375, "learning_rate": 1.7983933754537498e-05, "loss": 1.4366, "step": 2424 }, { "epoch": 0.4183559044250841, "grad_norm": 24.875, "learning_rate": 1.7982294516942804e-05, "loss": 1.5607, "step": 2425 }, { "epoch": 0.4185284223238161, "grad_norm": 0.69921875, "learning_rate": 1.7980654687969853e-05, "loss": 1.4021, "step": 2426 }, { "epoch": 0.4187009402225481, "grad_norm": 0.69140625, "learning_rate": 1.797901426774014e-05, "loss": 1.4621, "step": 2427 }, { "epoch": 0.4188734581212801, "grad_norm": 0.6640625, "learning_rate": 1.7977373256375194e-05, "loss": 1.4756, "step": 2428 }, { "epoch": 0.4190459760200121, "grad_norm": 0.67578125, "learning_rate": 1.7975731653996594e-05, "loss": 1.4211, "step": 2429 }, { "epoch": 0.41921849391874405, "grad_norm": 0.62890625, "learning_rate": 1.7974089460725958e-05, "loss": 1.4056, "step": 2430 }, { "epoch": 0.41939101181747607, "grad_norm": 0.640625, "learning_rate": 1.797244667668495e-05, "loss": 1.4528, "step": 2431 }, { "epoch": 0.4195635297162081, "grad_norm": 0.98046875, "learning_rate": 1.7970803301995278e-05, "loss": 1.4564, "step": 2432 }, { "epoch": 0.41973604761494004, "grad_norm": 0.65234375, "learning_rate": 1.796915933677869e-05, "loss": 1.3605, "step": 2433 }, { "epoch": 0.41990856551367206, "grad_norm": 0.78515625, "learning_rate": 1.7967514781156988e-05, "loss": 1.4233, "step": 2434 }, { "epoch": 0.420081083412404, "grad_norm": 0.703125, "learning_rate": 1.7965869635252005e-05, "loss": 1.3589, "step": 2435 }, { "epoch": 0.42025360131113604, "grad_norm": 0.78125, "learning_rate": 1.7964223899185633e-05, "loss": 1.564, "step": 2436 }, { "epoch": 0.420426119209868, "grad_norm": 0.86328125, "learning_rate": 1.7962577573079785e-05, "loss": 1.4458, "step": 2437 }, { "epoch": 0.4205986371086, "grad_norm": 0.73828125, "learning_rate": 1.796093065705644e-05, "loss": 1.4488, "step": 2438 }, { "epoch": 0.42077115500733203, "grad_norm": 1.0234375, "learning_rate": 1.795928315123761e-05, "loss": 1.6364, "step": 2439 }, { "epoch": 0.420943672906064, "grad_norm": 1.03125, "learning_rate": 1.7957635055745354e-05, "loss": 1.5654, "step": 2440 }, { "epoch": 0.421116190804796, "grad_norm": 0.921875, "learning_rate": 1.7955986370701768e-05, "loss": 1.5302, "step": 2441 }, { "epoch": 0.42128870870352797, "grad_norm": 1.734375, "learning_rate": 1.7954337096229e-05, "loss": 1.4639, "step": 2442 }, { "epoch": 0.42146122660226, "grad_norm": 0.94921875, "learning_rate": 1.7952687232449242e-05, "loss": 1.422, "step": 2443 }, { "epoch": 0.421633744500992, "grad_norm": 0.71875, "learning_rate": 1.7951036779484723e-05, "loss": 1.4756, "step": 2444 }, { "epoch": 0.42180626239972396, "grad_norm": 0.59375, "learning_rate": 1.7949385737457716e-05, "loss": 1.4052, "step": 2445 }, { "epoch": 0.421978780298456, "grad_norm": 0.77734375, "learning_rate": 1.7947734106490545e-05, "loss": 1.4661, "step": 2446 }, { "epoch": 0.42215129819718794, "grad_norm": 0.75390625, "learning_rate": 1.7946081886705574e-05, "loss": 1.5564, "step": 2447 }, { "epoch": 0.42232381609591996, "grad_norm": 0.6953125, "learning_rate": 1.7944429078225204e-05, "loss": 1.4868, "step": 2448 }, { "epoch": 0.4224963339946519, "grad_norm": 0.640625, "learning_rate": 1.794277568117189e-05, "loss": 1.4422, "step": 2449 }, { "epoch": 0.42266885189338393, "grad_norm": 0.74609375, "learning_rate": 1.794112169566813e-05, "loss": 1.4882, "step": 2450 }, { "epoch": 0.42284136979211595, "grad_norm": 0.83984375, "learning_rate": 1.793946712183645e-05, "loss": 1.6392, "step": 2451 }, { "epoch": 0.4230138876908479, "grad_norm": 0.66015625, "learning_rate": 1.793781195979944e-05, "loss": 1.517, "step": 2452 }, { "epoch": 0.42318640558957993, "grad_norm": 0.79296875, "learning_rate": 1.793615620967972e-05, "loss": 1.4967, "step": 2453 }, { "epoch": 0.4233589234883119, "grad_norm": 0.6953125, "learning_rate": 1.7934499871599962e-05, "loss": 1.5063, "step": 2454 }, { "epoch": 0.4235314413870439, "grad_norm": 0.609375, "learning_rate": 1.7932842945682877e-05, "loss": 1.4665, "step": 2455 }, { "epoch": 0.4237039592857759, "grad_norm": 0.765625, "learning_rate": 1.7931185432051216e-05, "loss": 1.4857, "step": 2456 }, { "epoch": 0.4238764771845079, "grad_norm": 0.8828125, "learning_rate": 1.7929527330827788e-05, "loss": 1.4581, "step": 2457 }, { "epoch": 0.4240489950832399, "grad_norm": 0.72265625, "learning_rate": 1.7927868642135427e-05, "loss": 1.5367, "step": 2458 }, { "epoch": 0.42422151298197186, "grad_norm": 0.78125, "learning_rate": 1.792620936609702e-05, "loss": 1.5317, "step": 2459 }, { "epoch": 0.4243940308807039, "grad_norm": 0.625, "learning_rate": 1.79245495028355e-05, "loss": 1.4564, "step": 2460 }, { "epoch": 0.4245665487794359, "grad_norm": 0.66796875, "learning_rate": 1.7922889052473834e-05, "loss": 1.4537, "step": 2461 }, { "epoch": 0.42473906667816785, "grad_norm": 0.91015625, "learning_rate": 1.792122801513505e-05, "loss": 1.4462, "step": 2462 }, { "epoch": 0.42491158457689987, "grad_norm": 0.71875, "learning_rate": 1.7919566390942193e-05, "loss": 1.4502, "step": 2463 }, { "epoch": 0.42508410247563183, "grad_norm": 0.671875, "learning_rate": 1.791790418001838e-05, "loss": 1.5052, "step": 2464 }, { "epoch": 0.42525662037436385, "grad_norm": 0.76171875, "learning_rate": 1.7916241382486745e-05, "loss": 1.4147, "step": 2465 }, { "epoch": 0.4254291382730958, "grad_norm": 0.62109375, "learning_rate": 1.791457799847049e-05, "loss": 1.4445, "step": 2466 }, { "epoch": 0.4256016561718278, "grad_norm": 0.71875, "learning_rate": 1.7912914028092845e-05, "loss": 1.5408, "step": 2467 }, { "epoch": 0.42577417407055984, "grad_norm": 0.6796875, "learning_rate": 1.7911249471477085e-05, "loss": 1.4918, "step": 2468 }, { "epoch": 0.4259466919692918, "grad_norm": 0.66796875, "learning_rate": 1.7909584328746533e-05, "loss": 1.3957, "step": 2469 }, { "epoch": 0.4261192098680238, "grad_norm": 0.5859375, "learning_rate": 1.790791860002455e-05, "loss": 1.4524, "step": 2470 }, { "epoch": 0.4262917277667558, "grad_norm": 0.6171875, "learning_rate": 1.790625228543455e-05, "loss": 1.5006, "step": 2471 }, { "epoch": 0.4264642456654878, "grad_norm": 0.6640625, "learning_rate": 1.790458538509998e-05, "loss": 1.6025, "step": 2472 }, { "epoch": 0.4266367635642198, "grad_norm": 0.58984375, "learning_rate": 1.790291789914433e-05, "loss": 1.5626, "step": 2473 }, { "epoch": 0.4268092814629518, "grad_norm": 0.734375, "learning_rate": 1.7901249827691148e-05, "loss": 1.5277, "step": 2474 }, { "epoch": 0.4269817993616838, "grad_norm": 0.609375, "learning_rate": 1.7899581170864003e-05, "loss": 1.4732, "step": 2475 }, { "epoch": 0.42715431726041575, "grad_norm": 0.8203125, "learning_rate": 1.7897911928786535e-05, "loss": 1.5614, "step": 2476 }, { "epoch": 0.42732683515914777, "grad_norm": 0.62109375, "learning_rate": 1.7896242101582393e-05, "loss": 1.5511, "step": 2477 }, { "epoch": 0.4274993530578797, "grad_norm": 0.5703125, "learning_rate": 1.7894571689375305e-05, "loss": 1.4692, "step": 2478 }, { "epoch": 0.42767187095661174, "grad_norm": 0.59375, "learning_rate": 1.7892900692289016e-05, "loss": 1.4351, "step": 2479 }, { "epoch": 0.42784438885534376, "grad_norm": 0.60546875, "learning_rate": 1.7891229110447325e-05, "loss": 1.4667, "step": 2480 }, { "epoch": 0.4280169067540757, "grad_norm": 0.625, "learning_rate": 1.7889556943974078e-05, "loss": 1.4757, "step": 2481 }, { "epoch": 0.42818942465280774, "grad_norm": 0.640625, "learning_rate": 1.7887884192993155e-05, "loss": 1.5014, "step": 2482 }, { "epoch": 0.4283619425515397, "grad_norm": 0.6953125, "learning_rate": 1.7886210857628485e-05, "loss": 1.5169, "step": 2483 }, { "epoch": 0.4285344604502717, "grad_norm": 0.75, "learning_rate": 1.788453693800404e-05, "loss": 1.4848, "step": 2484 }, { "epoch": 0.42870697834900373, "grad_norm": 0.7265625, "learning_rate": 1.7882862434243835e-05, "loss": 1.4559, "step": 2485 }, { "epoch": 0.4288794962477357, "grad_norm": 0.7265625, "learning_rate": 1.7881187346471924e-05, "loss": 1.5918, "step": 2486 }, { "epoch": 0.4290520141464677, "grad_norm": 0.84765625, "learning_rate": 1.7879511674812413e-05, "loss": 1.4414, "step": 2487 }, { "epoch": 0.42922453204519967, "grad_norm": 0.828125, "learning_rate": 1.7877835419389444e-05, "loss": 1.4403, "step": 2488 }, { "epoch": 0.4293970499439317, "grad_norm": 0.6796875, "learning_rate": 1.78761585803272e-05, "loss": 1.57, "step": 2489 }, { "epoch": 0.4295695678426637, "grad_norm": 0.87109375, "learning_rate": 1.787448115774992e-05, "loss": 1.4331, "step": 2490 }, { "epoch": 0.42974208574139566, "grad_norm": 1.0390625, "learning_rate": 1.787280315178187e-05, "loss": 1.4587, "step": 2491 }, { "epoch": 0.4299146036401277, "grad_norm": 0.61328125, "learning_rate": 1.7871124562547372e-05, "loss": 1.537, "step": 2492 }, { "epoch": 0.43008712153885964, "grad_norm": 0.921875, "learning_rate": 1.7869445390170787e-05, "loss": 1.4619, "step": 2493 }, { "epoch": 0.43025963943759166, "grad_norm": 0.59375, "learning_rate": 1.7867765634776516e-05, "loss": 1.4177, "step": 2494 }, { "epoch": 0.4304321573363236, "grad_norm": 0.578125, "learning_rate": 1.786608529648901e-05, "loss": 1.4781, "step": 2495 }, { "epoch": 0.43060467523505563, "grad_norm": 0.73046875, "learning_rate": 1.786440437543275e-05, "loss": 1.5143, "step": 2496 }, { "epoch": 0.43077719313378765, "grad_norm": 0.6328125, "learning_rate": 1.786272287173228e-05, "loss": 1.383, "step": 2497 }, { "epoch": 0.4309497110325196, "grad_norm": 0.73046875, "learning_rate": 1.786104078551217e-05, "loss": 1.551, "step": 2498 }, { "epoch": 0.4311222289312516, "grad_norm": 0.60546875, "learning_rate": 1.7859358116897034e-05, "loss": 1.4651, "step": 2499 }, { "epoch": 0.4312947468299836, "grad_norm": 0.77734375, "learning_rate": 1.7857674866011546e-05, "loss": 1.5387, "step": 2500 }, { "epoch": 0.4312947468299836, "eval_loss": 1.4578332901000977, "eval_runtime": 10.8462, "eval_samples_per_second": 94.411, "eval_steps_per_second": 23.603, "step": 2500 }, { "epoch": 0.4314672647287156, "grad_norm": 0.7578125, "learning_rate": 1.785599103298041e-05, "loss": 1.3842, "step": 2501 }, { "epoch": 0.4316397826274476, "grad_norm": 0.6171875, "learning_rate": 1.7854306617928366e-05, "loss": 1.5601, "step": 2502 }, { "epoch": 0.4318123005261796, "grad_norm": 0.66796875, "learning_rate": 1.7852621620980216e-05, "loss": 1.4351, "step": 2503 }, { "epoch": 0.4319848184249116, "grad_norm": 0.7578125, "learning_rate": 1.785093604226079e-05, "loss": 1.5191, "step": 2504 }, { "epoch": 0.43215733632364356, "grad_norm": 0.69140625, "learning_rate": 1.784924988189497e-05, "loss": 1.4955, "step": 2505 }, { "epoch": 0.4323298542223756, "grad_norm": 0.58203125, "learning_rate": 1.7847563140007665e-05, "loss": 1.435, "step": 2506 }, { "epoch": 0.4325023721211076, "grad_norm": 0.71484375, "learning_rate": 1.7845875816723855e-05, "loss": 1.4883, "step": 2507 }, { "epoch": 0.43267489001983955, "grad_norm": 0.58984375, "learning_rate": 1.7844187912168543e-05, "loss": 1.4455, "step": 2508 }, { "epoch": 0.43284740791857157, "grad_norm": 0.65625, "learning_rate": 1.784249942646678e-05, "loss": 1.4211, "step": 2509 }, { "epoch": 0.43301992581730353, "grad_norm": 0.87890625, "learning_rate": 1.784081035974365e-05, "loss": 1.4285, "step": 2510 }, { "epoch": 0.43319244371603555, "grad_norm": 0.65234375, "learning_rate": 1.7839120712124297e-05, "loss": 1.3663, "step": 2511 }, { "epoch": 0.4333649616147675, "grad_norm": 0.765625, "learning_rate": 1.7837430483733908e-05, "loss": 1.5058, "step": 2512 }, { "epoch": 0.4335374795134995, "grad_norm": 0.74609375, "learning_rate": 1.783573967469769e-05, "loss": 1.555, "step": 2513 }, { "epoch": 0.43370999741223154, "grad_norm": 0.66796875, "learning_rate": 1.7834048285140923e-05, "loss": 1.5479, "step": 2514 }, { "epoch": 0.4338825153109635, "grad_norm": 0.7109375, "learning_rate": 1.7832356315188907e-05, "loss": 1.5398, "step": 2515 }, { "epoch": 0.4340550332096955, "grad_norm": 0.70703125, "learning_rate": 1.7830663764966995e-05, "loss": 1.4866, "step": 2516 }, { "epoch": 0.4342275511084275, "grad_norm": 0.65625, "learning_rate": 1.7828970634600584e-05, "loss": 1.4726, "step": 2517 }, { "epoch": 0.4344000690071595, "grad_norm": 0.62109375, "learning_rate": 1.7827276924215113e-05, "loss": 1.43, "step": 2518 }, { "epoch": 0.4345725869058915, "grad_norm": 0.69140625, "learning_rate": 1.7825582633936058e-05, "loss": 1.4749, "step": 2519 }, { "epoch": 0.4347451048046235, "grad_norm": 0.8671875, "learning_rate": 1.7823887763888944e-05, "loss": 1.5154, "step": 2520 }, { "epoch": 0.4349176227033555, "grad_norm": 0.65625, "learning_rate": 1.782219231419934e-05, "loss": 1.4378, "step": 2521 }, { "epoch": 0.43509014060208745, "grad_norm": 0.8125, "learning_rate": 1.7820496284992853e-05, "loss": 1.5858, "step": 2522 }, { "epoch": 0.43526265850081947, "grad_norm": 0.57421875, "learning_rate": 1.781879967639514e-05, "loss": 1.5369, "step": 2523 }, { "epoch": 0.4354351763995514, "grad_norm": 0.78125, "learning_rate": 1.781710248853189e-05, "loss": 1.4153, "step": 2524 }, { "epoch": 0.43560769429828344, "grad_norm": 0.68359375, "learning_rate": 1.7815404721528848e-05, "loss": 1.4206, "step": 2525 }, { "epoch": 0.43578021219701546, "grad_norm": 0.6796875, "learning_rate": 1.7813706375511784e-05, "loss": 1.5314, "step": 2526 }, { "epoch": 0.4359527300957474, "grad_norm": 0.58984375, "learning_rate": 1.7812007450606536e-05, "loss": 1.4995, "step": 2527 }, { "epoch": 0.43612524799447944, "grad_norm": 0.640625, "learning_rate": 1.781030794693896e-05, "loss": 1.4864, "step": 2528 }, { "epoch": 0.4362977658932114, "grad_norm": 0.69921875, "learning_rate": 1.7808607864634976e-05, "loss": 1.475, "step": 2529 }, { "epoch": 0.4364702837919434, "grad_norm": 0.91796875, "learning_rate": 1.7806907203820525e-05, "loss": 1.4901, "step": 2530 }, { "epoch": 0.43664280169067543, "grad_norm": 0.6328125, "learning_rate": 1.780520596462161e-05, "loss": 1.4808, "step": 2531 }, { "epoch": 0.4368153195894074, "grad_norm": 0.80859375, "learning_rate": 1.780350414716427e-05, "loss": 1.5526, "step": 2532 }, { "epoch": 0.4369878374881394, "grad_norm": 0.62890625, "learning_rate": 1.7801801751574583e-05, "loss": 1.4987, "step": 2533 }, { "epoch": 0.43716035538687137, "grad_norm": 0.7734375, "learning_rate": 1.780009877797867e-05, "loss": 1.455, "step": 2534 }, { "epoch": 0.4373328732856034, "grad_norm": 0.671875, "learning_rate": 1.779839522650271e-05, "loss": 1.5205, "step": 2535 }, { "epoch": 0.4375053911843354, "grad_norm": 0.703125, "learning_rate": 1.7796691097272902e-05, "loss": 1.5363, "step": 2536 }, { "epoch": 0.43767790908306736, "grad_norm": 0.78125, "learning_rate": 1.77949863904155e-05, "loss": 1.4779, "step": 2537 }, { "epoch": 0.4378504269817994, "grad_norm": 0.734375, "learning_rate": 1.77932811060568e-05, "loss": 1.4271, "step": 2538 }, { "epoch": 0.43802294488053134, "grad_norm": 0.82421875, "learning_rate": 1.7791575244323143e-05, "loss": 1.5411, "step": 2539 }, { "epoch": 0.43819546277926336, "grad_norm": 0.625, "learning_rate": 1.7789868805340908e-05, "loss": 1.551, "step": 2540 }, { "epoch": 0.4383679806779953, "grad_norm": 0.9140625, "learning_rate": 1.7788161789236518e-05, "loss": 1.4683, "step": 2541 }, { "epoch": 0.43854049857672733, "grad_norm": 0.8046875, "learning_rate": 1.778645419613644e-05, "loss": 1.4342, "step": 2542 }, { "epoch": 0.43871301647545935, "grad_norm": 1.078125, "learning_rate": 1.7784746026167184e-05, "loss": 1.4436, "step": 2543 }, { "epoch": 0.4388855343741913, "grad_norm": 0.84375, "learning_rate": 1.77830372794553e-05, "loss": 1.4579, "step": 2544 }, { "epoch": 0.4390580522729233, "grad_norm": 0.625, "learning_rate": 1.7781327956127385e-05, "loss": 1.5987, "step": 2545 }, { "epoch": 0.4392305701716553, "grad_norm": 1.4375, "learning_rate": 1.7779618056310074e-05, "loss": 1.5104, "step": 2546 }, { "epoch": 0.4394030880703873, "grad_norm": 0.59765625, "learning_rate": 1.777790758013005e-05, "loss": 1.5887, "step": 2547 }, { "epoch": 0.4395756059691193, "grad_norm": 0.82421875, "learning_rate": 1.777619652771403e-05, "loss": 1.5033, "step": 2548 }, { "epoch": 0.4397481238678513, "grad_norm": 0.72265625, "learning_rate": 1.7774484899188788e-05, "loss": 1.5848, "step": 2549 }, { "epoch": 0.4399206417665833, "grad_norm": 0.75390625, "learning_rate": 1.7772772694681123e-05, "loss": 1.5275, "step": 2550 }, { "epoch": 0.44009315966531526, "grad_norm": 0.8046875, "learning_rate": 1.777105991431789e-05, "loss": 1.4234, "step": 2551 }, { "epoch": 0.4402656775640473, "grad_norm": 0.78515625, "learning_rate": 1.7769346558225987e-05, "loss": 1.5312, "step": 2552 }, { "epoch": 0.44043819546277924, "grad_norm": 0.6171875, "learning_rate": 1.7767632626532344e-05, "loss": 1.5402, "step": 2553 }, { "epoch": 0.44061071336151125, "grad_norm": 0.734375, "learning_rate": 1.7765918119363942e-05, "loss": 1.4823, "step": 2554 }, { "epoch": 0.44078323126024327, "grad_norm": 1.0234375, "learning_rate": 1.77642030368478e-05, "loss": 1.5659, "step": 2555 }, { "epoch": 0.44095574915897523, "grad_norm": 0.80859375, "learning_rate": 1.7762487379110984e-05, "loss": 1.518, "step": 2556 }, { "epoch": 0.44112826705770725, "grad_norm": 0.90234375, "learning_rate": 1.7760771146280603e-05, "loss": 1.4378, "step": 2557 }, { "epoch": 0.4413007849564392, "grad_norm": 0.70703125, "learning_rate": 1.7759054338483803e-05, "loss": 1.4941, "step": 2558 }, { "epoch": 0.4414733028551712, "grad_norm": 0.7265625, "learning_rate": 1.7757336955847775e-05, "loss": 1.5745, "step": 2559 }, { "epoch": 0.44164582075390324, "grad_norm": 0.63671875, "learning_rate": 1.7755618998499757e-05, "loss": 1.5342, "step": 2560 }, { "epoch": 0.4418183386526352, "grad_norm": 0.87109375, "learning_rate": 1.7753900466567024e-05, "loss": 1.5415, "step": 2561 }, { "epoch": 0.4419908565513672, "grad_norm": 0.69921875, "learning_rate": 1.7752181360176895e-05, "loss": 1.4617, "step": 2562 }, { "epoch": 0.4421633744500992, "grad_norm": 0.62890625, "learning_rate": 1.7750461679456737e-05, "loss": 1.5255, "step": 2563 }, { "epoch": 0.4423358923488312, "grad_norm": 0.7421875, "learning_rate": 1.7748741424533947e-05, "loss": 1.454, "step": 2564 }, { "epoch": 0.4425084102475632, "grad_norm": 0.8203125, "learning_rate": 1.7747020595535976e-05, "loss": 1.5193, "step": 2565 }, { "epoch": 0.44268092814629517, "grad_norm": 0.7109375, "learning_rate": 1.7745299192590317e-05, "loss": 1.4076, "step": 2566 }, { "epoch": 0.4428534460450272, "grad_norm": 0.83203125, "learning_rate": 1.7743577215824494e-05, "loss": 1.4981, "step": 2567 }, { "epoch": 0.44302596394375915, "grad_norm": 0.68359375, "learning_rate": 1.774185466536609e-05, "loss": 1.5218, "step": 2568 }, { "epoch": 0.44319848184249117, "grad_norm": 0.81640625, "learning_rate": 1.774013154134272e-05, "loss": 1.5258, "step": 2569 }, { "epoch": 0.4433709997412231, "grad_norm": 1.03125, "learning_rate": 1.7738407843882037e-05, "loss": 1.5437, "step": 2570 }, { "epoch": 0.44354351763995514, "grad_norm": 0.6484375, "learning_rate": 1.773668357311175e-05, "loss": 1.45, "step": 2571 }, { "epoch": 0.44371603553868716, "grad_norm": 0.859375, "learning_rate": 1.773495872915961e-05, "loss": 1.4216, "step": 2572 }, { "epoch": 0.4438885534374191, "grad_norm": 0.84765625, "learning_rate": 1.773323331215339e-05, "loss": 1.342, "step": 2573 }, { "epoch": 0.44406107133615114, "grad_norm": 0.7265625, "learning_rate": 1.7731507322220932e-05, "loss": 1.5443, "step": 2574 }, { "epoch": 0.4442335892348831, "grad_norm": 0.62890625, "learning_rate": 1.77297807594901e-05, "loss": 1.5211, "step": 2575 }, { "epoch": 0.4444061071336151, "grad_norm": 0.84765625, "learning_rate": 1.7728053624088812e-05, "loss": 1.4566, "step": 2576 }, { "epoch": 0.44457862503234713, "grad_norm": 0.97265625, "learning_rate": 1.7726325916145027e-05, "loss": 1.4819, "step": 2577 }, { "epoch": 0.4447511429310791, "grad_norm": 0.6015625, "learning_rate": 1.7724597635786737e-05, "loss": 1.4937, "step": 2578 }, { "epoch": 0.4449236608298111, "grad_norm": 0.83984375, "learning_rate": 1.7722868783141992e-05, "loss": 1.5356, "step": 2579 }, { "epoch": 0.44509617872854307, "grad_norm": 0.78515625, "learning_rate": 1.772113935833887e-05, "loss": 1.4269, "step": 2580 }, { "epoch": 0.4452686966272751, "grad_norm": 0.69140625, "learning_rate": 1.7719409361505503e-05, "loss": 1.5314, "step": 2581 }, { "epoch": 0.4454412145260071, "grad_norm": 0.71875, "learning_rate": 1.7717678792770056e-05, "loss": 1.4201, "step": 2582 }, { "epoch": 0.44561373242473906, "grad_norm": 0.8046875, "learning_rate": 1.771594765226074e-05, "loss": 1.4507, "step": 2583 }, { "epoch": 0.4457862503234711, "grad_norm": 0.79296875, "learning_rate": 1.7714215940105813e-05, "loss": 1.4426, "step": 2584 }, { "epoch": 0.44595876822220304, "grad_norm": 0.69140625, "learning_rate": 1.771248365643357e-05, "loss": 1.4883, "step": 2585 }, { "epoch": 0.44613128612093506, "grad_norm": 0.7421875, "learning_rate": 1.7710750801372345e-05, "loss": 1.4473, "step": 2586 }, { "epoch": 0.446303804019667, "grad_norm": 0.63671875, "learning_rate": 1.7709017375050525e-05, "loss": 1.4357, "step": 2587 }, { "epoch": 0.44647632191839903, "grad_norm": 0.65625, "learning_rate": 1.7707283377596526e-05, "loss": 1.372, "step": 2588 }, { "epoch": 0.44664883981713105, "grad_norm": 0.66015625, "learning_rate": 1.770554880913882e-05, "loss": 1.5774, "step": 2589 }, { "epoch": 0.446821357715863, "grad_norm": 0.6875, "learning_rate": 1.770381366980591e-05, "loss": 1.4317, "step": 2590 }, { "epoch": 0.446993875614595, "grad_norm": 0.6796875, "learning_rate": 1.7702077959726346e-05, "loss": 1.5231, "step": 2591 }, { "epoch": 0.447166393513327, "grad_norm": 0.7109375, "learning_rate": 1.7700341679028725e-05, "loss": 1.4977, "step": 2592 }, { "epoch": 0.447338911412059, "grad_norm": 0.79296875, "learning_rate": 1.769860482784168e-05, "loss": 1.5746, "step": 2593 }, { "epoch": 0.447511429310791, "grad_norm": 0.77734375, "learning_rate": 1.769686740629388e-05, "loss": 1.474, "step": 2594 }, { "epoch": 0.447683947209523, "grad_norm": 0.55859375, "learning_rate": 1.7695129414514057e-05, "loss": 1.4457, "step": 2595 }, { "epoch": 0.447856465108255, "grad_norm": 0.81640625, "learning_rate": 1.769339085263096e-05, "loss": 1.5435, "step": 2596 }, { "epoch": 0.44802898300698696, "grad_norm": 0.67578125, "learning_rate": 1.76916517207734e-05, "loss": 1.4355, "step": 2597 }, { "epoch": 0.448201500905719, "grad_norm": 0.6796875, "learning_rate": 1.7689912019070223e-05, "loss": 1.4525, "step": 2598 }, { "epoch": 0.44837401880445094, "grad_norm": 0.59375, "learning_rate": 1.7688171747650313e-05, "loss": 1.4333, "step": 2599 }, { "epoch": 0.44854653670318295, "grad_norm": 0.671875, "learning_rate": 1.7686430906642602e-05, "loss": 1.5239, "step": 2600 }, { "epoch": 0.44854653670318295, "eval_loss": 1.4545358419418335, "eval_runtime": 11.0064, "eval_samples_per_second": 93.037, "eval_steps_per_second": 23.259, "step": 2600 }, { "epoch": 0.44871905460191497, "grad_norm": 0.6171875, "learning_rate": 1.7684689496176065e-05, "loss": 1.4694, "step": 2601 }, { "epoch": 0.44889157250064693, "grad_norm": 0.6171875, "learning_rate": 1.7682947516379706e-05, "loss": 1.4667, "step": 2602 }, { "epoch": 0.44906409039937895, "grad_norm": 0.65625, "learning_rate": 1.7681204967382597e-05, "loss": 1.5195, "step": 2603 }, { "epoch": 0.4492366082981109, "grad_norm": 0.86328125, "learning_rate": 1.767946184931383e-05, "loss": 1.4439, "step": 2604 }, { "epoch": 0.4494091261968429, "grad_norm": 0.625, "learning_rate": 1.7677718162302546e-05, "loss": 1.5034, "step": 2605 }, { "epoch": 0.44958164409557494, "grad_norm": 0.71484375, "learning_rate": 1.7675973906477924e-05, "loss": 1.4178, "step": 2606 }, { "epoch": 0.4497541619943069, "grad_norm": 0.625, "learning_rate": 1.7674229081969195e-05, "loss": 1.3571, "step": 2607 }, { "epoch": 0.4499266798930389, "grad_norm": 0.59375, "learning_rate": 1.7672483688905622e-05, "loss": 1.3652, "step": 2608 }, { "epoch": 0.4500991977917709, "grad_norm": 0.703125, "learning_rate": 1.767073772741652e-05, "loss": 1.4889, "step": 2609 }, { "epoch": 0.4502717156905029, "grad_norm": 0.6171875, "learning_rate": 1.766899119763124e-05, "loss": 1.5149, "step": 2610 }, { "epoch": 0.4504442335892349, "grad_norm": 0.6875, "learning_rate": 1.7667244099679172e-05, "loss": 1.5902, "step": 2611 }, { "epoch": 0.45061675148796687, "grad_norm": 0.68359375, "learning_rate": 1.7665496433689754e-05, "loss": 1.458, "step": 2612 }, { "epoch": 0.4507892693866989, "grad_norm": 0.79296875, "learning_rate": 1.7663748199792463e-05, "loss": 1.4436, "step": 2613 }, { "epoch": 0.45096178728543085, "grad_norm": 0.69140625, "learning_rate": 1.7661999398116824e-05, "loss": 1.3657, "step": 2614 }, { "epoch": 0.45113430518416286, "grad_norm": 0.6875, "learning_rate": 1.7660250028792392e-05, "loss": 1.5338, "step": 2615 }, { "epoch": 0.4513068230828948, "grad_norm": 0.625, "learning_rate": 1.7658500091948774e-05, "loss": 1.4237, "step": 2616 }, { "epoch": 0.45147934098162684, "grad_norm": 0.65625, "learning_rate": 1.7656749587715617e-05, "loss": 1.4443, "step": 2617 }, { "epoch": 0.45165185888035886, "grad_norm": 0.78515625, "learning_rate": 1.765499851622261e-05, "loss": 1.5702, "step": 2618 }, { "epoch": 0.4518243767790908, "grad_norm": 0.72265625, "learning_rate": 1.765324687759948e-05, "loss": 1.4471, "step": 2619 }, { "epoch": 0.45199689467782284, "grad_norm": 0.5859375, "learning_rate": 1.7651494671976003e-05, "loss": 1.5225, "step": 2620 }, { "epoch": 0.4521694125765548, "grad_norm": 0.62890625, "learning_rate": 1.7649741899481997e-05, "loss": 1.4638, "step": 2621 }, { "epoch": 0.4523419304752868, "grad_norm": 0.6875, "learning_rate": 1.7647988560247305e-05, "loss": 1.4911, "step": 2622 }, { "epoch": 0.45251444837401883, "grad_norm": 0.79296875, "learning_rate": 1.764623465440184e-05, "loss": 1.396, "step": 2623 }, { "epoch": 0.4526869662727508, "grad_norm": 0.609375, "learning_rate": 1.764448018207553e-05, "loss": 1.4932, "step": 2624 }, { "epoch": 0.4528594841714828, "grad_norm": 0.77734375, "learning_rate": 1.764272514339837e-05, "loss": 1.4553, "step": 2625 }, { "epoch": 0.45303200207021477, "grad_norm": 0.7265625, "learning_rate": 1.764096953850037e-05, "loss": 1.5165, "step": 2626 }, { "epoch": 0.4532045199689468, "grad_norm": 0.63671875, "learning_rate": 1.7639213367511608e-05, "loss": 1.5094, "step": 2627 }, { "epoch": 0.45337703786767874, "grad_norm": 0.6953125, "learning_rate": 1.763745663056219e-05, "loss": 1.4608, "step": 2628 }, { "epoch": 0.45354955576641076, "grad_norm": 0.8984375, "learning_rate": 1.7635699327782257e-05, "loss": 1.4027, "step": 2629 }, { "epoch": 0.4537220736651428, "grad_norm": 0.73046875, "learning_rate": 1.7633941459302013e-05, "loss": 1.4297, "step": 2630 }, { "epoch": 0.45389459156387474, "grad_norm": 0.80078125, "learning_rate": 1.763218302525169e-05, "loss": 1.3769, "step": 2631 }, { "epoch": 0.45406710946260675, "grad_norm": 0.71484375, "learning_rate": 1.7630424025761554e-05, "loss": 1.585, "step": 2632 }, { "epoch": 0.4542396273613387, "grad_norm": 0.59765625, "learning_rate": 1.7628664460961928e-05, "loss": 1.4986, "step": 2633 }, { "epoch": 0.45441214526007073, "grad_norm": 0.91796875, "learning_rate": 1.7626904330983176e-05, "loss": 1.5802, "step": 2634 }, { "epoch": 0.45458466315880275, "grad_norm": 0.64453125, "learning_rate": 1.7625143635955697e-05, "loss": 1.3242, "step": 2635 }, { "epoch": 0.4547571810575347, "grad_norm": 0.5859375, "learning_rate": 1.7623382376009928e-05, "loss": 1.5192, "step": 2636 }, { "epoch": 0.4549296989562667, "grad_norm": 0.77734375, "learning_rate": 1.7621620551276366e-05, "loss": 1.4385, "step": 2637 }, { "epoch": 0.4551022168549987, "grad_norm": 0.77734375, "learning_rate": 1.761985816188553e-05, "loss": 1.4715, "step": 2638 }, { "epoch": 0.4552747347537307, "grad_norm": 0.6171875, "learning_rate": 1.7618095207967988e-05, "loss": 1.5298, "step": 2639 }, { "epoch": 0.4554472526524627, "grad_norm": 0.84375, "learning_rate": 1.7616331689654352e-05, "loss": 1.4869, "step": 2640 }, { "epoch": 0.4556197705511947, "grad_norm": 0.6484375, "learning_rate": 1.7614567607075278e-05, "loss": 1.5208, "step": 2641 }, { "epoch": 0.4557922884499267, "grad_norm": 0.828125, "learning_rate": 1.761280296036146e-05, "loss": 1.4899, "step": 2642 }, { "epoch": 0.45596480634865866, "grad_norm": 0.8828125, "learning_rate": 1.761103774964363e-05, "loss": 1.3997, "step": 2643 }, { "epoch": 0.4561373242473907, "grad_norm": 0.58984375, "learning_rate": 1.7609271975052563e-05, "loss": 1.4327, "step": 2644 }, { "epoch": 0.45630984214612264, "grad_norm": 0.78515625, "learning_rate": 1.7607505636719085e-05, "loss": 1.4269, "step": 2645 }, { "epoch": 0.45648236004485465, "grad_norm": 0.7265625, "learning_rate": 1.7605738734774062e-05, "loss": 1.4966, "step": 2646 }, { "epoch": 0.45665487794358667, "grad_norm": 0.7265625, "learning_rate": 1.7603971269348383e-05, "loss": 1.4565, "step": 2647 }, { "epoch": 0.45682739584231863, "grad_norm": 0.796875, "learning_rate": 1.7602203240573004e-05, "loss": 1.3994, "step": 2648 }, { "epoch": 0.45699991374105065, "grad_norm": 0.80078125, "learning_rate": 1.7600434648578906e-05, "loss": 1.4751, "step": 2649 }, { "epoch": 0.4571724316397826, "grad_norm": 0.96875, "learning_rate": 1.7598665493497122e-05, "loss": 1.5806, "step": 2650 }, { "epoch": 0.4573449495385146, "grad_norm": 0.6328125, "learning_rate": 1.759689577545872e-05, "loss": 1.5674, "step": 2651 }, { "epoch": 0.45751746743724664, "grad_norm": 0.7734375, "learning_rate": 1.759512549459481e-05, "loss": 1.478, "step": 2652 }, { "epoch": 0.4576899853359786, "grad_norm": 0.828125, "learning_rate": 1.7593354651036544e-05, "loss": 1.4421, "step": 2653 }, { "epoch": 0.4578625032347106, "grad_norm": 0.765625, "learning_rate": 1.759158324491512e-05, "loss": 1.5771, "step": 2654 }, { "epoch": 0.4580350211334426, "grad_norm": 0.828125, "learning_rate": 1.758981127636178e-05, "loss": 1.4393, "step": 2655 }, { "epoch": 0.4582075390321746, "grad_norm": 0.921875, "learning_rate": 1.7588038745507797e-05, "loss": 1.5219, "step": 2656 }, { "epoch": 0.45838005693090655, "grad_norm": 0.7734375, "learning_rate": 1.7586265652484488e-05, "loss": 1.4692, "step": 2657 }, { "epoch": 0.45855257482963857, "grad_norm": 0.86328125, "learning_rate": 1.758449199742322e-05, "loss": 1.4827, "step": 2658 }, { "epoch": 0.4587250927283706, "grad_norm": 1.2734375, "learning_rate": 1.7582717780455395e-05, "loss": 1.509, "step": 2659 }, { "epoch": 0.45889761062710255, "grad_norm": 1.3359375, "learning_rate": 1.7580943001712457e-05, "loss": 1.61, "step": 2660 }, { "epoch": 0.45907012852583456, "grad_norm": 0.67578125, "learning_rate": 1.7579167661325892e-05, "loss": 1.5574, "step": 2661 }, { "epoch": 0.4592426464245665, "grad_norm": 0.7890625, "learning_rate": 1.757739175942723e-05, "loss": 1.4575, "step": 2662 }, { "epoch": 0.45941516432329854, "grad_norm": 0.66796875, "learning_rate": 1.7575615296148044e-05, "loss": 1.429, "step": 2663 }, { "epoch": 0.45958768222203056, "grad_norm": 0.6328125, "learning_rate": 1.757383827161994e-05, "loss": 1.3583, "step": 2664 }, { "epoch": 0.4597602001207625, "grad_norm": 0.69140625, "learning_rate": 1.7572060685974577e-05, "loss": 1.4718, "step": 2665 }, { "epoch": 0.45993271801949454, "grad_norm": 0.75, "learning_rate": 1.7570282539343643e-05, "loss": 1.3904, "step": 2666 }, { "epoch": 0.4601052359182265, "grad_norm": 0.81640625, "learning_rate": 1.7568503831858875e-05, "loss": 1.5472, "step": 2667 }, { "epoch": 0.4602777538169585, "grad_norm": 0.56640625, "learning_rate": 1.7566724563652052e-05, "loss": 1.4417, "step": 2668 }, { "epoch": 0.46045027171569053, "grad_norm": 0.671875, "learning_rate": 1.7564944734855e-05, "loss": 1.5233, "step": 2669 }, { "epoch": 0.4606227896144225, "grad_norm": 0.69921875, "learning_rate": 1.756316434559957e-05, "loss": 1.4207, "step": 2670 }, { "epoch": 0.4607953075131545, "grad_norm": 0.62109375, "learning_rate": 1.7561383396017672e-05, "loss": 1.5302, "step": 2671 }, { "epoch": 0.46096782541188647, "grad_norm": 0.60546875, "learning_rate": 1.7559601886241245e-05, "loss": 1.4491, "step": 2672 }, { "epoch": 0.4611403433106185, "grad_norm": 0.65234375, "learning_rate": 1.7557819816402273e-05, "loss": 1.5596, "step": 2673 }, { "epoch": 0.46131286120935044, "grad_norm": 0.6484375, "learning_rate": 1.7556037186632787e-05, "loss": 1.4611, "step": 2674 }, { "epoch": 0.46148537910808246, "grad_norm": 0.6796875, "learning_rate": 1.7554253997064854e-05, "loss": 1.4785, "step": 2675 }, { "epoch": 0.4616578970068145, "grad_norm": 0.64453125, "learning_rate": 1.755247024783058e-05, "loss": 1.4997, "step": 2676 }, { "epoch": 0.46183041490554644, "grad_norm": 0.62109375, "learning_rate": 1.7550685939062125e-05, "loss": 1.4593, "step": 2677 }, { "epoch": 0.46200293280427845, "grad_norm": 0.6171875, "learning_rate": 1.754890107089168e-05, "loss": 1.5093, "step": 2678 }, { "epoch": 0.4621754507030104, "grad_norm": 0.65625, "learning_rate": 1.754711564345147e-05, "loss": 1.436, "step": 2679 }, { "epoch": 0.46234796860174243, "grad_norm": 0.58984375, "learning_rate": 1.754532965687378e-05, "loss": 1.4643, "step": 2680 }, { "epoch": 0.46252048650047445, "grad_norm": 0.625, "learning_rate": 1.754354311129092e-05, "loss": 1.4679, "step": 2681 }, { "epoch": 0.4626930043992064, "grad_norm": 0.78515625, "learning_rate": 1.7541756006835253e-05, "loss": 1.5252, "step": 2682 }, { "epoch": 0.4628655222979384, "grad_norm": 0.7109375, "learning_rate": 1.753996834363918e-05, "loss": 1.5314, "step": 2683 }, { "epoch": 0.4630380401966704, "grad_norm": 0.66796875, "learning_rate": 1.753818012183514e-05, "loss": 1.4043, "step": 2684 }, { "epoch": 0.4632105580954024, "grad_norm": 0.73046875, "learning_rate": 1.7536391341555613e-05, "loss": 1.5028, "step": 2685 }, { "epoch": 0.4633830759941344, "grad_norm": 0.5859375, "learning_rate": 1.7534602002933128e-05, "loss": 1.4713, "step": 2686 }, { "epoch": 0.4635555938928664, "grad_norm": 0.62890625, "learning_rate": 1.7532812106100247e-05, "loss": 1.4458, "step": 2687 }, { "epoch": 0.4637281117915984, "grad_norm": 0.61328125, "learning_rate": 1.7531021651189578e-05, "loss": 1.4422, "step": 2688 }, { "epoch": 0.46390062969033036, "grad_norm": 0.7578125, "learning_rate": 1.752923063833377e-05, "loss": 1.4305, "step": 2689 }, { "epoch": 0.4640731475890624, "grad_norm": 0.64453125, "learning_rate": 1.7527439067665516e-05, "loss": 1.4937, "step": 2690 }, { "epoch": 0.46424566548779433, "grad_norm": 0.62109375, "learning_rate": 1.7525646939317535e-05, "loss": 1.4594, "step": 2691 }, { "epoch": 0.46441818338652635, "grad_norm": 0.66015625, "learning_rate": 1.752385425342261e-05, "loss": 1.4699, "step": 2692 }, { "epoch": 0.46459070128525837, "grad_norm": 0.625, "learning_rate": 1.752206101011355e-05, "loss": 1.5202, "step": 2693 }, { "epoch": 0.46476321918399033, "grad_norm": 0.8515625, "learning_rate": 1.752026720952321e-05, "loss": 1.5505, "step": 2694 }, { "epoch": 0.46493573708272234, "grad_norm": 0.62109375, "learning_rate": 1.7518472851784485e-05, "loss": 1.4083, "step": 2695 }, { "epoch": 0.4651082549814543, "grad_norm": 0.7578125, "learning_rate": 1.7516677937030318e-05, "loss": 1.5286, "step": 2696 }, { "epoch": 0.4652807728801863, "grad_norm": 0.83203125, "learning_rate": 1.751488246539368e-05, "loss": 1.5077, "step": 2697 }, { "epoch": 0.46545329077891834, "grad_norm": 0.6640625, "learning_rate": 1.7513086437007593e-05, "loss": 1.5907, "step": 2698 }, { "epoch": 0.4656258086776503, "grad_norm": 0.66015625, "learning_rate": 1.751128985200512e-05, "loss": 1.498, "step": 2699 }, { "epoch": 0.4657983265763823, "grad_norm": 0.78515625, "learning_rate": 1.750949271051936e-05, "loss": 1.5613, "step": 2700 }, { "epoch": 0.4657983265763823, "eval_loss": 1.451514720916748, "eval_runtime": 11.0249, "eval_samples_per_second": 92.88, "eval_steps_per_second": 23.22, "step": 2700 }, { "epoch": 0.4659708444751143, "grad_norm": 0.6640625, "learning_rate": 1.7507695012683463e-05, "loss": 1.4832, "step": 2701 }, { "epoch": 0.4661433623738463, "grad_norm": 0.73046875, "learning_rate": 1.7505896758630606e-05, "loss": 1.508, "step": 2702 }, { "epoch": 0.46631588027257825, "grad_norm": 0.76953125, "learning_rate": 1.750409794849402e-05, "loss": 1.3877, "step": 2703 }, { "epoch": 0.46648839817131027, "grad_norm": 0.6328125, "learning_rate": 1.7502298582406967e-05, "loss": 1.4419, "step": 2704 }, { "epoch": 0.4666609160700423, "grad_norm": 0.84375, "learning_rate": 1.7500498660502757e-05, "loss": 1.4838, "step": 2705 }, { "epoch": 0.46683343396877425, "grad_norm": 0.8203125, "learning_rate": 1.7498698182914746e-05, "loss": 1.5293, "step": 2706 }, { "epoch": 0.46700595186750626, "grad_norm": 0.6875, "learning_rate": 1.749689714977632e-05, "loss": 1.5346, "step": 2707 }, { "epoch": 0.4671784697662382, "grad_norm": 0.7578125, "learning_rate": 1.7495095561220908e-05, "loss": 1.4503, "step": 2708 }, { "epoch": 0.46735098766497024, "grad_norm": 0.828125, "learning_rate": 1.7493293417381985e-05, "loss": 1.4146, "step": 2709 }, { "epoch": 0.46752350556370226, "grad_norm": 3.15625, "learning_rate": 1.7491490718393067e-05, "loss": 1.554, "step": 2710 }, { "epoch": 0.4676960234624342, "grad_norm": 0.8984375, "learning_rate": 1.7489687464387705e-05, "loss": 1.5255, "step": 2711 }, { "epoch": 0.46786854136116623, "grad_norm": 0.89453125, "learning_rate": 1.74878836554995e-05, "loss": 1.4677, "step": 2712 }, { "epoch": 0.4680410592598982, "grad_norm": 0.73046875, "learning_rate": 1.748607929186209e-05, "loss": 1.5083, "step": 2713 }, { "epoch": 0.4682135771586302, "grad_norm": 0.7578125, "learning_rate": 1.7484274373609143e-05, "loss": 1.5612, "step": 2714 }, { "epoch": 0.46838609505736223, "grad_norm": 0.82421875, "learning_rate": 1.7482468900874393e-05, "loss": 1.4519, "step": 2715 }, { "epoch": 0.4685586129560942, "grad_norm": 0.5859375, "learning_rate": 1.7480662873791592e-05, "loss": 1.4596, "step": 2716 }, { "epoch": 0.4687311308548262, "grad_norm": 1.0, "learning_rate": 1.7478856292494543e-05, "loss": 1.5029, "step": 2717 }, { "epoch": 0.46890364875355817, "grad_norm": 0.8515625, "learning_rate": 1.7477049157117093e-05, "loss": 1.4348, "step": 2718 }, { "epoch": 0.4690761666522902, "grad_norm": 0.6796875, "learning_rate": 1.747524146779312e-05, "loss": 1.531, "step": 2719 }, { "epoch": 0.46924868455102214, "grad_norm": 0.87890625, "learning_rate": 1.7473433224656554e-05, "loss": 1.4822, "step": 2720 }, { "epoch": 0.46942120244975416, "grad_norm": 0.71875, "learning_rate": 1.7471624427841356e-05, "loss": 1.3717, "step": 2721 }, { "epoch": 0.4695937203484862, "grad_norm": 0.72265625, "learning_rate": 1.7469815077481537e-05, "loss": 1.462, "step": 2722 }, { "epoch": 0.46976623824721814, "grad_norm": 0.703125, "learning_rate": 1.746800517371114e-05, "loss": 1.4464, "step": 2723 }, { "epoch": 0.46993875614595015, "grad_norm": 0.625, "learning_rate": 1.7466194716664262e-05, "loss": 1.4288, "step": 2724 }, { "epoch": 0.4701112740446821, "grad_norm": 0.7734375, "learning_rate": 1.7464383706475028e-05, "loss": 1.4812, "step": 2725 }, { "epoch": 0.47028379194341413, "grad_norm": 0.5546875, "learning_rate": 1.7462572143277606e-05, "loss": 1.388, "step": 2726 }, { "epoch": 0.47045630984214615, "grad_norm": 0.62109375, "learning_rate": 1.7460760027206215e-05, "loss": 1.4307, "step": 2727 }, { "epoch": 0.4706288277408781, "grad_norm": 0.75, "learning_rate": 1.7458947358395102e-05, "loss": 1.521, "step": 2728 }, { "epoch": 0.4708013456396101, "grad_norm": 0.75, "learning_rate": 1.7457134136978566e-05, "loss": 1.525, "step": 2729 }, { "epoch": 0.4709738635383421, "grad_norm": 0.640625, "learning_rate": 1.7455320363090936e-05, "loss": 1.4898, "step": 2730 }, { "epoch": 0.4711463814370741, "grad_norm": 0.94921875, "learning_rate": 1.7453506036866592e-05, "loss": 1.416, "step": 2731 }, { "epoch": 0.47131889933580606, "grad_norm": 0.62109375, "learning_rate": 1.745169115843995e-05, "loss": 1.4478, "step": 2732 }, { "epoch": 0.4714914172345381, "grad_norm": 0.6171875, "learning_rate": 1.7449875727945463e-05, "loss": 1.4715, "step": 2733 }, { "epoch": 0.4716639351332701, "grad_norm": 0.69921875, "learning_rate": 1.7448059745517635e-05, "loss": 1.5544, "step": 2734 }, { "epoch": 0.47183645303200206, "grad_norm": 0.74609375, "learning_rate": 1.7446243211291003e-05, "loss": 1.4665, "step": 2735 }, { "epoch": 0.4720089709307341, "grad_norm": 0.58984375, "learning_rate": 1.7444426125400148e-05, "loss": 1.4048, "step": 2736 }, { "epoch": 0.47218148882946603, "grad_norm": 0.984375, "learning_rate": 1.7442608487979692e-05, "loss": 1.4598, "step": 2737 }, { "epoch": 0.47235400672819805, "grad_norm": 0.83984375, "learning_rate": 1.7440790299164295e-05, "loss": 1.5017, "step": 2738 }, { "epoch": 0.47252652462693007, "grad_norm": 0.60546875, "learning_rate": 1.7438971559088658e-05, "loss": 1.4438, "step": 2739 }, { "epoch": 0.472699042525662, "grad_norm": 0.734375, "learning_rate": 1.743715226788753e-05, "loss": 1.4198, "step": 2740 }, { "epoch": 0.47287156042439404, "grad_norm": 0.625, "learning_rate": 1.743533242569569e-05, "loss": 1.4941, "step": 2741 }, { "epoch": 0.473044078323126, "grad_norm": 0.6328125, "learning_rate": 1.7433512032647968e-05, "loss": 1.4281, "step": 2742 }, { "epoch": 0.473216596221858, "grad_norm": 0.59765625, "learning_rate": 1.7431691088879228e-05, "loss": 1.4557, "step": 2743 }, { "epoch": 0.47338911412059004, "grad_norm": 0.6328125, "learning_rate": 1.7429869594524375e-05, "loss": 1.4984, "step": 2744 }, { "epoch": 0.473561632019322, "grad_norm": 0.58203125, "learning_rate": 1.742804754971836e-05, "loss": 1.537, "step": 2745 }, { "epoch": 0.473734149918054, "grad_norm": 0.6015625, "learning_rate": 1.742622495459617e-05, "loss": 1.4307, "step": 2746 }, { "epoch": 0.473906667816786, "grad_norm": 0.6328125, "learning_rate": 1.7424401809292833e-05, "loss": 1.5323, "step": 2747 }, { "epoch": 0.474079185715518, "grad_norm": 0.8125, "learning_rate": 1.742257811394342e-05, "loss": 1.5329, "step": 2748 }, { "epoch": 0.47425170361424995, "grad_norm": 1.1796875, "learning_rate": 1.7420753868683044e-05, "loss": 1.533, "step": 2749 }, { "epoch": 0.47442422151298197, "grad_norm": 0.7109375, "learning_rate": 1.7418929073646855e-05, "loss": 1.4591, "step": 2750 }, { "epoch": 0.474596739411714, "grad_norm": 0.6796875, "learning_rate": 1.741710372897004e-05, "loss": 1.4273, "step": 2751 }, { "epoch": 0.47476925731044595, "grad_norm": 0.67578125, "learning_rate": 1.741527783478784e-05, "loss": 1.4346, "step": 2752 }, { "epoch": 0.47494177520917796, "grad_norm": 0.7578125, "learning_rate": 1.7413451391235524e-05, "loss": 1.4941, "step": 2753 }, { "epoch": 0.4751142931079099, "grad_norm": 0.72265625, "learning_rate": 1.7411624398448408e-05, "loss": 1.4746, "step": 2754 }, { "epoch": 0.47528681100664194, "grad_norm": 0.8359375, "learning_rate": 1.7409796856561847e-05, "loss": 1.4099, "step": 2755 }, { "epoch": 0.47545932890537396, "grad_norm": 0.7734375, "learning_rate": 1.7407968765711234e-05, "loss": 1.5676, "step": 2756 }, { "epoch": 0.4756318468041059, "grad_norm": 0.6796875, "learning_rate": 1.740614012603201e-05, "loss": 1.4248, "step": 2757 }, { "epoch": 0.47580436470283793, "grad_norm": 0.6015625, "learning_rate": 1.740431093765965e-05, "loss": 1.5019, "step": 2758 }, { "epoch": 0.4759768826015699, "grad_norm": 0.71875, "learning_rate": 1.740248120072967e-05, "loss": 1.4743, "step": 2759 }, { "epoch": 0.4761494005003019, "grad_norm": 0.73046875, "learning_rate": 1.740065091537763e-05, "loss": 1.3471, "step": 2760 }, { "epoch": 0.4763219183990339, "grad_norm": 0.78125, "learning_rate": 1.7398820081739128e-05, "loss": 1.5532, "step": 2761 }, { "epoch": 0.4764944362977659, "grad_norm": 0.62890625, "learning_rate": 1.739698869994981e-05, "loss": 1.4123, "step": 2762 }, { "epoch": 0.4766669541964979, "grad_norm": 0.7734375, "learning_rate": 1.7395156770145343e-05, "loss": 1.5422, "step": 2763 }, { "epoch": 0.47683947209522987, "grad_norm": 0.640625, "learning_rate": 1.739332429246146e-05, "loss": 1.5009, "step": 2764 }, { "epoch": 0.4770119899939619, "grad_norm": 0.81640625, "learning_rate": 1.7391491267033916e-05, "loss": 1.3644, "step": 2765 }, { "epoch": 0.47718450789269384, "grad_norm": 0.65625, "learning_rate": 1.7389657693998515e-05, "loss": 1.4097, "step": 2766 }, { "epoch": 0.47735702579142586, "grad_norm": 0.60546875, "learning_rate": 1.73878235734911e-05, "loss": 1.4486, "step": 2767 }, { "epoch": 0.4775295436901579, "grad_norm": 0.62109375, "learning_rate": 1.7385988905647556e-05, "loss": 1.3957, "step": 2768 }, { "epoch": 0.47770206158888984, "grad_norm": 0.7734375, "learning_rate": 1.73841536906038e-05, "loss": 1.3941, "step": 2769 }, { "epoch": 0.47787457948762185, "grad_norm": 0.71875, "learning_rate": 1.7382317928495803e-05, "loss": 1.5116, "step": 2770 }, { "epoch": 0.4780470973863538, "grad_norm": 0.6953125, "learning_rate": 1.7380481619459564e-05, "loss": 1.4201, "step": 2771 }, { "epoch": 0.47821961528508583, "grad_norm": 1.515625, "learning_rate": 1.7378644763631133e-05, "loss": 1.4355, "step": 2772 }, { "epoch": 0.47839213318381785, "grad_norm": 0.6171875, "learning_rate": 1.7376807361146594e-05, "loss": 1.6016, "step": 2773 }, { "epoch": 0.4785646510825498, "grad_norm": 0.66796875, "learning_rate": 1.7374969412142072e-05, "loss": 1.433, "step": 2774 }, { "epoch": 0.4787371689812818, "grad_norm": 0.73046875, "learning_rate": 1.7373130916753737e-05, "loss": 1.4282, "step": 2775 }, { "epoch": 0.4789096868800138, "grad_norm": 0.62890625, "learning_rate": 1.737129187511779e-05, "loss": 1.4499, "step": 2776 }, { "epoch": 0.4790822047787458, "grad_norm": 0.7578125, "learning_rate": 1.736945228737049e-05, "loss": 1.5561, "step": 2777 }, { "epoch": 0.47925472267747776, "grad_norm": 0.59765625, "learning_rate": 1.7367612153648113e-05, "loss": 1.3707, "step": 2778 }, { "epoch": 0.4794272405762098, "grad_norm": 0.6796875, "learning_rate": 1.7365771474086993e-05, "loss": 1.5786, "step": 2779 }, { "epoch": 0.4795997584749418, "grad_norm": 1.015625, "learning_rate": 1.73639302488235e-05, "loss": 1.5003, "step": 2780 }, { "epoch": 0.47977227637367376, "grad_norm": 0.66015625, "learning_rate": 1.7362088477994043e-05, "loss": 1.5189, "step": 2781 }, { "epoch": 0.4799447942724058, "grad_norm": 0.66796875, "learning_rate": 1.736024616173507e-05, "loss": 1.4467, "step": 2782 }, { "epoch": 0.48011731217113773, "grad_norm": 0.6640625, "learning_rate": 1.735840330018307e-05, "loss": 1.5065, "step": 2783 }, { "epoch": 0.48028983006986975, "grad_norm": 0.74609375, "learning_rate": 1.7356559893474578e-05, "loss": 1.5442, "step": 2784 }, { "epoch": 0.48046234796860177, "grad_norm": 0.7265625, "learning_rate": 1.735471594174616e-05, "loss": 1.41, "step": 2785 }, { "epoch": 0.4806348658673337, "grad_norm": 0.55078125, "learning_rate": 1.735287144513444e-05, "loss": 1.4113, "step": 2786 }, { "epoch": 0.48080738376606574, "grad_norm": 0.6015625, "learning_rate": 1.7351026403776054e-05, "loss": 1.4412, "step": 2787 }, { "epoch": 0.4809799016647977, "grad_norm": 0.66015625, "learning_rate": 1.73491808178077e-05, "loss": 1.4297, "step": 2788 }, { "epoch": 0.4811524195635297, "grad_norm": 0.734375, "learning_rate": 1.7347334687366114e-05, "loss": 1.6249, "step": 2789 }, { "epoch": 0.48132493746226174, "grad_norm": 0.60546875, "learning_rate": 1.7345488012588064e-05, "loss": 1.3788, "step": 2790 }, { "epoch": 0.4814974553609937, "grad_norm": 0.640625, "learning_rate": 1.7343640793610366e-05, "loss": 1.476, "step": 2791 }, { "epoch": 0.4816699732597257, "grad_norm": 0.62890625, "learning_rate": 1.7341793030569874e-05, "loss": 1.4452, "step": 2792 }, { "epoch": 0.4818424911584577, "grad_norm": 0.5703125, "learning_rate": 1.733994472360348e-05, "loss": 1.5272, "step": 2793 }, { "epoch": 0.4820150090571897, "grad_norm": 0.60546875, "learning_rate": 1.733809587284812e-05, "loss": 1.416, "step": 2794 }, { "epoch": 0.48218752695592165, "grad_norm": 0.6484375, "learning_rate": 1.733624647844076e-05, "loss": 1.4474, "step": 2795 }, { "epoch": 0.48236004485465367, "grad_norm": 0.59765625, "learning_rate": 1.733439654051843e-05, "loss": 1.5665, "step": 2796 }, { "epoch": 0.4825325627533857, "grad_norm": 0.7578125, "learning_rate": 1.7332546059218174e-05, "loss": 1.4993, "step": 2797 }, { "epoch": 0.48270508065211765, "grad_norm": 0.5703125, "learning_rate": 1.733069503467709e-05, "loss": 1.3398, "step": 2798 }, { "epoch": 0.48287759855084966, "grad_norm": 0.6484375, "learning_rate": 1.7328843467032314e-05, "loss": 1.502, "step": 2799 }, { "epoch": 0.4830501164495816, "grad_norm": 0.609375, "learning_rate": 1.7326991356421023e-05, "loss": 1.5295, "step": 2800 }, { "epoch": 0.4830501164495816, "eval_loss": 1.4480316638946533, "eval_runtime": 10.8875, "eval_samples_per_second": 94.053, "eval_steps_per_second": 23.513, "step": 2800 }, { "epoch": 0.48322263434831364, "grad_norm": 0.7109375, "learning_rate": 1.7325138702980427e-05, "loss": 1.4666, "step": 2801 }, { "epoch": 0.48339515224704566, "grad_norm": 0.578125, "learning_rate": 1.7323285506847788e-05, "loss": 1.4056, "step": 2802 }, { "epoch": 0.4835676701457776, "grad_norm": 0.6796875, "learning_rate": 1.7321431768160402e-05, "loss": 1.4883, "step": 2803 }, { "epoch": 0.48374018804450963, "grad_norm": 0.76171875, "learning_rate": 1.7319577487055603e-05, "loss": 1.4309, "step": 2804 }, { "epoch": 0.4839127059432416, "grad_norm": 0.578125, "learning_rate": 1.7317722663670767e-05, "loss": 1.5414, "step": 2805 }, { "epoch": 0.4840852238419736, "grad_norm": 0.57421875, "learning_rate": 1.731586729814332e-05, "loss": 1.3881, "step": 2806 }, { "epoch": 0.48425774174070557, "grad_norm": 0.94921875, "learning_rate": 1.7314011390610705e-05, "loss": 1.5173, "step": 2807 }, { "epoch": 0.4844302596394376, "grad_norm": 0.765625, "learning_rate": 1.731215494121043e-05, "loss": 1.3449, "step": 2808 }, { "epoch": 0.4846027775381696, "grad_norm": 0.61328125, "learning_rate": 1.7310297950080022e-05, "loss": 1.4315, "step": 2809 }, { "epoch": 0.48477529543690157, "grad_norm": 0.73828125, "learning_rate": 1.730844041735707e-05, "loss": 1.4923, "step": 2810 }, { "epoch": 0.4849478133356336, "grad_norm": 0.890625, "learning_rate": 1.730658234317919e-05, "loss": 1.5253, "step": 2811 }, { "epoch": 0.48512033123436554, "grad_norm": 0.7734375, "learning_rate": 1.7304723727684033e-05, "loss": 1.5575, "step": 2812 }, { "epoch": 0.48529284913309756, "grad_norm": 0.6484375, "learning_rate": 1.7302864571009296e-05, "loss": 1.4681, "step": 2813 }, { "epoch": 0.4854653670318296, "grad_norm": 0.67578125, "learning_rate": 1.7301004873292727e-05, "loss": 1.5213, "step": 2814 }, { "epoch": 0.48563788493056154, "grad_norm": 0.64453125, "learning_rate": 1.7299144634672096e-05, "loss": 1.4503, "step": 2815 }, { "epoch": 0.48581040282929355, "grad_norm": 0.71875, "learning_rate": 1.729728385528522e-05, "loss": 1.4475, "step": 2816 }, { "epoch": 0.4859829207280255, "grad_norm": 0.64453125, "learning_rate": 1.729542253526997e-05, "loss": 1.507, "step": 2817 }, { "epoch": 0.48615543862675753, "grad_norm": 0.5859375, "learning_rate": 1.7293560674764224e-05, "loss": 1.4533, "step": 2818 }, { "epoch": 0.48632795652548955, "grad_norm": 0.58984375, "learning_rate": 1.7291698273905936e-05, "loss": 1.4052, "step": 2819 }, { "epoch": 0.4865004744242215, "grad_norm": 0.72265625, "learning_rate": 1.7289835332833083e-05, "loss": 1.3862, "step": 2820 }, { "epoch": 0.4866729923229535, "grad_norm": 0.63671875, "learning_rate": 1.7287971851683675e-05, "loss": 1.4409, "step": 2821 }, { "epoch": 0.4868455102216855, "grad_norm": 0.625, "learning_rate": 1.7286107830595774e-05, "loss": 1.436, "step": 2822 }, { "epoch": 0.4870180281204175, "grad_norm": 0.8671875, "learning_rate": 1.7284243269707478e-05, "loss": 1.4044, "step": 2823 }, { "epoch": 0.48719054601914946, "grad_norm": 0.66015625, "learning_rate": 1.728237816915693e-05, "loss": 1.5035, "step": 2824 }, { "epoch": 0.4873630639178815, "grad_norm": 0.59765625, "learning_rate": 1.7280512529082306e-05, "loss": 1.4622, "step": 2825 }, { "epoch": 0.4875355818166135, "grad_norm": 0.765625, "learning_rate": 1.727864634962182e-05, "loss": 1.5554, "step": 2826 }, { "epoch": 0.48770809971534546, "grad_norm": 0.66015625, "learning_rate": 1.7276779630913734e-05, "loss": 1.5669, "step": 2827 }, { "epoch": 0.48788061761407747, "grad_norm": 0.71484375, "learning_rate": 1.7274912373096345e-05, "loss": 1.3718, "step": 2828 }, { "epoch": 0.48805313551280943, "grad_norm": 0.69140625, "learning_rate": 1.7273044576307993e-05, "loss": 1.3165, "step": 2829 }, { "epoch": 0.48822565341154145, "grad_norm": 0.6953125, "learning_rate": 1.7271176240687054e-05, "loss": 1.455, "step": 2830 }, { "epoch": 0.48839817131027347, "grad_norm": 0.6953125, "learning_rate": 1.7269307366371948e-05, "loss": 1.5077, "step": 2831 }, { "epoch": 0.4885706892090054, "grad_norm": 0.5859375, "learning_rate": 1.726743795350113e-05, "loss": 1.4871, "step": 2832 }, { "epoch": 0.48874320710773744, "grad_norm": 0.6875, "learning_rate": 1.7265568002213103e-05, "loss": 1.5284, "step": 2833 }, { "epoch": 0.4889157250064694, "grad_norm": 0.81640625, "learning_rate": 1.7263697512646397e-05, "loss": 1.5449, "step": 2834 }, { "epoch": 0.4890882429052014, "grad_norm": 0.66796875, "learning_rate": 1.7261826484939592e-05, "loss": 1.4365, "step": 2835 }, { "epoch": 0.4892607608039334, "grad_norm": 0.85546875, "learning_rate": 1.725995491923131e-05, "loss": 1.5388, "step": 2836 }, { "epoch": 0.4894332787026654, "grad_norm": 0.9375, "learning_rate": 1.7258082815660203e-05, "loss": 1.5065, "step": 2837 }, { "epoch": 0.4896057966013974, "grad_norm": 0.70703125, "learning_rate": 1.7256210174364975e-05, "loss": 1.5679, "step": 2838 }, { "epoch": 0.4897783145001294, "grad_norm": 0.8828125, "learning_rate": 1.7254336995484355e-05, "loss": 1.4383, "step": 2839 }, { "epoch": 0.4899508323988614, "grad_norm": 0.80859375, "learning_rate": 1.7252463279157123e-05, "loss": 1.4321, "step": 2840 }, { "epoch": 0.49012335029759335, "grad_norm": 0.62890625, "learning_rate": 1.72505890255221e-05, "loss": 1.47, "step": 2841 }, { "epoch": 0.49029586819632537, "grad_norm": 0.76953125, "learning_rate": 1.724871423471813e-05, "loss": 1.4975, "step": 2842 }, { "epoch": 0.4904683860950574, "grad_norm": 0.7890625, "learning_rate": 1.7246838906884125e-05, "loss": 1.4325, "step": 2843 }, { "epoch": 0.49064090399378935, "grad_norm": 0.58203125, "learning_rate": 1.724496304215901e-05, "loss": 1.5435, "step": 2844 }, { "epoch": 0.49081342189252136, "grad_norm": 0.7734375, "learning_rate": 1.724308664068176e-05, "loss": 1.4481, "step": 2845 }, { "epoch": 0.4909859397912533, "grad_norm": 0.61328125, "learning_rate": 1.7241209702591405e-05, "loss": 1.4997, "step": 2846 }, { "epoch": 0.49115845768998534, "grad_norm": 0.6484375, "learning_rate": 1.7239332228026982e-05, "loss": 1.3803, "step": 2847 }, { "epoch": 0.49133097558871736, "grad_norm": 1.46875, "learning_rate": 1.72374542171276e-05, "loss": 1.4905, "step": 2848 }, { "epoch": 0.4915034934874493, "grad_norm": 0.609375, "learning_rate": 1.7235575670032382e-05, "loss": 1.4539, "step": 2849 }, { "epoch": 0.49167601138618133, "grad_norm": 0.61328125, "learning_rate": 1.7233696586880513e-05, "loss": 1.4779, "step": 2850 }, { "epoch": 0.4918485292849133, "grad_norm": 0.65234375, "learning_rate": 1.72318169678112e-05, "loss": 1.4413, "step": 2851 }, { "epoch": 0.4920210471836453, "grad_norm": 0.76953125, "learning_rate": 1.7229936812963697e-05, "loss": 1.5197, "step": 2852 }, { "epoch": 0.49219356508237727, "grad_norm": 0.65625, "learning_rate": 1.7228056122477307e-05, "loss": 1.5067, "step": 2853 }, { "epoch": 0.4923660829811093, "grad_norm": 0.6796875, "learning_rate": 1.7226174896491354e-05, "loss": 1.5387, "step": 2854 }, { "epoch": 0.4925386008798413, "grad_norm": 0.6953125, "learning_rate": 1.7224293135145213e-05, "loss": 1.5383, "step": 2855 }, { "epoch": 0.49271111877857326, "grad_norm": 0.58984375, "learning_rate": 1.722241083857829e-05, "loss": 1.4639, "step": 2856 }, { "epoch": 0.4928836366773053, "grad_norm": 0.671875, "learning_rate": 1.7220528006930056e-05, "loss": 1.441, "step": 2857 }, { "epoch": 0.49305615457603724, "grad_norm": 0.73828125, "learning_rate": 1.7218644640339986e-05, "loss": 1.5281, "step": 2858 }, { "epoch": 0.49322867247476926, "grad_norm": 0.65625, "learning_rate": 1.7216760738947614e-05, "loss": 1.4893, "step": 2859 }, { "epoch": 0.4934011903735013, "grad_norm": 0.65234375, "learning_rate": 1.721487630289252e-05, "loss": 1.4929, "step": 2860 }, { "epoch": 0.49357370827223324, "grad_norm": 0.65625, "learning_rate": 1.7212991332314303e-05, "loss": 1.4151, "step": 2861 }, { "epoch": 0.49374622617096525, "grad_norm": 0.6640625, "learning_rate": 1.7211105827352624e-05, "loss": 1.5097, "step": 2862 }, { "epoch": 0.4939187440696972, "grad_norm": 0.71484375, "learning_rate": 1.7209219788147166e-05, "loss": 1.559, "step": 2863 }, { "epoch": 0.49409126196842923, "grad_norm": 0.72265625, "learning_rate": 1.720733321483766e-05, "loss": 1.399, "step": 2864 }, { "epoch": 0.4942637798671612, "grad_norm": 0.671875, "learning_rate": 1.7205446107563876e-05, "loss": 1.4557, "step": 2865 }, { "epoch": 0.4944362977658932, "grad_norm": 0.67578125, "learning_rate": 1.7203558466465626e-05, "loss": 1.5592, "step": 2866 }, { "epoch": 0.4946088156646252, "grad_norm": 0.7265625, "learning_rate": 1.7201670291682754e-05, "loss": 1.4551, "step": 2867 }, { "epoch": 0.4947813335633572, "grad_norm": 0.5859375, "learning_rate": 1.7199781583355144e-05, "loss": 1.5005, "step": 2868 }, { "epoch": 0.4949538514620892, "grad_norm": 0.61328125, "learning_rate": 1.719789234162273e-05, "loss": 1.3738, "step": 2869 }, { "epoch": 0.49512636936082116, "grad_norm": 0.62890625, "learning_rate": 1.719600256662548e-05, "loss": 1.5196, "step": 2870 }, { "epoch": 0.4952988872595532, "grad_norm": 0.6796875, "learning_rate": 1.7194112258503395e-05, "loss": 1.4637, "step": 2871 }, { "epoch": 0.4954714051582852, "grad_norm": 0.6484375, "learning_rate": 1.7192221417396524e-05, "loss": 1.5419, "step": 2872 }, { "epoch": 0.49564392305701716, "grad_norm": 0.5859375, "learning_rate": 1.7190330043444953e-05, "loss": 1.5005, "step": 2873 }, { "epoch": 0.49581644095574917, "grad_norm": 0.578125, "learning_rate": 1.71884381367888e-05, "loss": 1.5498, "step": 2874 }, { "epoch": 0.49598895885448113, "grad_norm": 0.765625, "learning_rate": 1.7186545697568236e-05, "loss": 1.4867, "step": 2875 }, { "epoch": 0.49616147675321315, "grad_norm": 0.80859375, "learning_rate": 1.7184652725923465e-05, "loss": 1.5072, "step": 2876 }, { "epoch": 0.49633399465194517, "grad_norm": 0.66796875, "learning_rate": 1.7182759221994727e-05, "loss": 1.4238, "step": 2877 }, { "epoch": 0.4965065125506771, "grad_norm": 1.0625, "learning_rate": 1.7180865185922307e-05, "loss": 1.4501, "step": 2878 }, { "epoch": 0.49667903044940914, "grad_norm": 0.6875, "learning_rate": 1.7178970617846524e-05, "loss": 1.4374, "step": 2879 }, { "epoch": 0.4968515483481411, "grad_norm": 0.59765625, "learning_rate": 1.7177075517907745e-05, "loss": 1.4367, "step": 2880 }, { "epoch": 0.4970240662468731, "grad_norm": 0.85546875, "learning_rate": 1.7175179886246365e-05, "loss": 1.4153, "step": 2881 }, { "epoch": 0.4971965841456051, "grad_norm": 0.6953125, "learning_rate": 1.7173283723002825e-05, "loss": 1.5074, "step": 2882 }, { "epoch": 0.4973691020443371, "grad_norm": 0.64453125, "learning_rate": 1.7171387028317606e-05, "loss": 1.4974, "step": 2883 }, { "epoch": 0.4975416199430691, "grad_norm": 0.65234375, "learning_rate": 1.716948980233123e-05, "loss": 1.4671, "step": 2884 }, { "epoch": 0.4977141378418011, "grad_norm": 0.6953125, "learning_rate": 1.716759204518425e-05, "loss": 1.4103, "step": 2885 }, { "epoch": 0.4978866557405331, "grad_norm": 0.65625, "learning_rate": 1.7165693757017267e-05, "loss": 1.5011, "step": 2886 }, { "epoch": 0.49805917363926505, "grad_norm": 0.61328125, "learning_rate": 1.7163794937970916e-05, "loss": 1.4511, "step": 2887 }, { "epoch": 0.49823169153799707, "grad_norm": 0.61328125, "learning_rate": 1.7161895588185878e-05, "loss": 1.5568, "step": 2888 }, { "epoch": 0.4984042094367291, "grad_norm": 0.69140625, "learning_rate": 1.7159995707802863e-05, "loss": 1.5091, "step": 2889 }, { "epoch": 0.49857672733546105, "grad_norm": 0.6796875, "learning_rate": 1.7158095296962627e-05, "loss": 1.4798, "step": 2890 }, { "epoch": 0.49874924523419306, "grad_norm": 0.6640625, "learning_rate": 1.7156194355805968e-05, "loss": 1.4575, "step": 2891 }, { "epoch": 0.498921763132925, "grad_norm": 0.66796875, "learning_rate": 1.7154292884473712e-05, "loss": 1.5086, "step": 2892 }, { "epoch": 0.49909428103165704, "grad_norm": 0.71484375, "learning_rate": 1.7152390883106743e-05, "loss": 1.4749, "step": 2893 }, { "epoch": 0.49926679893038906, "grad_norm": 0.67578125, "learning_rate": 1.7150488351845965e-05, "loss": 1.5043, "step": 2894 }, { "epoch": 0.499439316829121, "grad_norm": 0.8125, "learning_rate": 1.7148585290832333e-05, "loss": 1.539, "step": 2895 }, { "epoch": 0.49961183472785303, "grad_norm": 0.59765625, "learning_rate": 1.7146681700206834e-05, "loss": 1.434, "step": 2896 }, { "epoch": 0.499784352626585, "grad_norm": 0.59375, "learning_rate": 1.71447775801105e-05, "loss": 1.4453, "step": 2897 }, { "epoch": 0.499956870525317, "grad_norm": 0.59375, "learning_rate": 1.71428729306844e-05, "loss": 1.456, "step": 2898 }, { "epoch": 0.500129388424049, "grad_norm": 0.62109375, "learning_rate": 1.7140967752069645e-05, "loss": 1.4775, "step": 2899 }, { "epoch": 0.500301906322781, "grad_norm": 0.60546875, "learning_rate": 1.713906204440738e-05, "loss": 1.3852, "step": 2900 }, { "epoch": 0.500301906322781, "eval_loss": 1.445255160331726, "eval_runtime": 10.763, "eval_samples_per_second": 95.141, "eval_steps_per_second": 23.785, "step": 2900 }, { "epoch": 0.500474424221513, "grad_norm": 0.62890625, "learning_rate": 1.713715580783879e-05, "loss": 1.5041, "step": 2901 }, { "epoch": 0.500646942120245, "grad_norm": 0.69921875, "learning_rate": 1.71352490425051e-05, "loss": 1.4627, "step": 2902 }, { "epoch": 0.5008194600189769, "grad_norm": 0.6171875, "learning_rate": 1.7133341748547586e-05, "loss": 1.503, "step": 2903 }, { "epoch": 0.500991977917709, "grad_norm": 0.73828125, "learning_rate": 1.7131433926107536e-05, "loss": 1.4275, "step": 2904 }, { "epoch": 0.501164495816441, "grad_norm": 0.8046875, "learning_rate": 1.7129525575326307e-05, "loss": 1.49, "step": 2905 }, { "epoch": 0.5013370137151729, "grad_norm": 0.66796875, "learning_rate": 1.7127616696345273e-05, "loss": 1.5291, "step": 2906 }, { "epoch": 0.501509531613905, "grad_norm": 0.9453125, "learning_rate": 1.7125707289305862e-05, "loss": 1.4875, "step": 2907 }, { "epoch": 0.501682049512637, "grad_norm": 0.79296875, "learning_rate": 1.7123797354349524e-05, "loss": 1.4227, "step": 2908 }, { "epoch": 0.5018545674113689, "grad_norm": 0.609375, "learning_rate": 1.7121886891617774e-05, "loss": 1.4348, "step": 2909 }, { "epoch": 0.5020270853101009, "grad_norm": 0.6796875, "learning_rate": 1.711997590125214e-05, "loss": 1.4808, "step": 2910 }, { "epoch": 0.502199603208833, "grad_norm": 0.62109375, "learning_rate": 1.7118064383394206e-05, "loss": 1.4416, "step": 2911 }, { "epoch": 0.5023721211075649, "grad_norm": 0.58203125, "learning_rate": 1.7116152338185584e-05, "loss": 1.4522, "step": 2912 }, { "epoch": 0.5025446390062969, "grad_norm": 0.61328125, "learning_rate": 1.711423976576794e-05, "loss": 1.4111, "step": 2913 }, { "epoch": 0.5027171569050289, "grad_norm": 0.609375, "learning_rate": 1.7112326666282953e-05, "loss": 1.4947, "step": 2914 }, { "epoch": 0.5028896748037609, "grad_norm": 0.61328125, "learning_rate": 1.7110413039872372e-05, "loss": 1.4555, "step": 2915 }, { "epoch": 0.5030621927024929, "grad_norm": 0.6328125, "learning_rate": 1.710849888667796e-05, "loss": 1.5209, "step": 2916 }, { "epoch": 0.5032347106012248, "grad_norm": 0.578125, "learning_rate": 1.710658420684154e-05, "loss": 1.4655, "step": 2917 }, { "epoch": 0.5034072284999569, "grad_norm": 0.68359375, "learning_rate": 1.7104669000504955e-05, "loss": 1.4579, "step": 2918 }, { "epoch": 0.5035797463986889, "grad_norm": 0.59765625, "learning_rate": 1.71027532678101e-05, "loss": 1.5267, "step": 2919 }, { "epoch": 0.5037522642974208, "grad_norm": 0.72265625, "learning_rate": 1.7100837008898903e-05, "loss": 1.484, "step": 2920 }, { "epoch": 0.5039247821961529, "grad_norm": 0.69921875, "learning_rate": 1.709892022391333e-05, "loss": 1.4201, "step": 2921 }, { "epoch": 0.5040973000948848, "grad_norm": 0.5859375, "learning_rate": 1.7097002912995392e-05, "loss": 1.4935, "step": 2922 }, { "epoch": 0.5042698179936168, "grad_norm": 0.609375, "learning_rate": 1.7095085076287135e-05, "loss": 1.5663, "step": 2923 }, { "epoch": 0.5044423358923489, "grad_norm": 0.61328125, "learning_rate": 1.7093166713930642e-05, "loss": 1.4487, "step": 2924 }, { "epoch": 0.5046148537910808, "grad_norm": 0.578125, "learning_rate": 1.709124782606804e-05, "loss": 1.3968, "step": 2925 }, { "epoch": 0.5047873716898128, "grad_norm": 0.66796875, "learning_rate": 1.708932841284149e-05, "loss": 1.3832, "step": 2926 }, { "epoch": 0.5049598895885448, "grad_norm": 0.7265625, "learning_rate": 1.7087408474393193e-05, "loss": 1.4526, "step": 2927 }, { "epoch": 0.5051324074872768, "grad_norm": 0.60546875, "learning_rate": 1.7085488010865398e-05, "loss": 1.4902, "step": 2928 }, { "epoch": 0.5053049253860088, "grad_norm": 0.640625, "learning_rate": 1.7083567022400376e-05, "loss": 1.4707, "step": 2929 }, { "epoch": 0.5054774432847408, "grad_norm": 0.6953125, "learning_rate": 1.7081645509140448e-05, "loss": 1.491, "step": 2930 }, { "epoch": 0.5056499611834728, "grad_norm": 0.63671875, "learning_rate": 1.707972347122797e-05, "loss": 1.4669, "step": 2931 }, { "epoch": 0.5058224790822048, "grad_norm": 0.625, "learning_rate": 1.7077800908805343e-05, "loss": 1.4441, "step": 2932 }, { "epoch": 0.5059949969809368, "grad_norm": 0.6953125, "learning_rate": 1.7075877822015003e-05, "loss": 1.4583, "step": 2933 }, { "epoch": 0.5061675148796687, "grad_norm": 1.6328125, "learning_rate": 1.707395421099942e-05, "loss": 1.5226, "step": 2934 }, { "epoch": 0.5063400327784008, "grad_norm": 0.73828125, "learning_rate": 1.707203007590111e-05, "loss": 1.49, "step": 2935 }, { "epoch": 0.5065125506771327, "grad_norm": 0.65234375, "learning_rate": 1.707010541686262e-05, "loss": 1.479, "step": 2936 }, { "epoch": 0.5066850685758647, "grad_norm": 0.72265625, "learning_rate": 1.706818023402655e-05, "loss": 1.4068, "step": 2937 }, { "epoch": 0.5068575864745968, "grad_norm": 0.609375, "learning_rate": 1.7066254527535518e-05, "loss": 1.4789, "step": 2938 }, { "epoch": 0.5070301043733287, "grad_norm": 0.65625, "learning_rate": 1.7064328297532203e-05, "loss": 1.5497, "step": 2939 }, { "epoch": 0.5072026222720607, "grad_norm": 0.57421875, "learning_rate": 1.706240154415931e-05, "loss": 1.4872, "step": 2940 }, { "epoch": 0.5073751401707928, "grad_norm": 0.65625, "learning_rate": 1.706047426755958e-05, "loss": 1.4383, "step": 2941 }, { "epoch": 0.5075476580695247, "grad_norm": 0.60546875, "learning_rate": 1.70585464678758e-05, "loss": 1.4348, "step": 2942 }, { "epoch": 0.5077201759682567, "grad_norm": 0.671875, "learning_rate": 1.7056618145250797e-05, "loss": 1.4141, "step": 2943 }, { "epoch": 0.5078926938669887, "grad_norm": 0.66015625, "learning_rate": 1.7054689299827425e-05, "loss": 1.5521, "step": 2944 }, { "epoch": 0.5080652117657207, "grad_norm": 0.6484375, "learning_rate": 1.7052759931748593e-05, "loss": 1.4734, "step": 2945 }, { "epoch": 0.5082377296644527, "grad_norm": 0.625, "learning_rate": 1.7050830041157234e-05, "loss": 1.4657, "step": 2946 }, { "epoch": 0.5084102475631846, "grad_norm": 0.6015625, "learning_rate": 1.7048899628196337e-05, "loss": 1.4567, "step": 2947 }, { "epoch": 0.5085827654619167, "grad_norm": 0.62109375, "learning_rate": 1.7046968693008903e-05, "loss": 1.4203, "step": 2948 }, { "epoch": 0.5087552833606487, "grad_norm": 0.67578125, "learning_rate": 1.7045037235738005e-05, "loss": 1.4839, "step": 2949 }, { "epoch": 0.5089278012593806, "grad_norm": 0.65625, "learning_rate": 1.7043105256526723e-05, "loss": 1.5014, "step": 2950 }, { "epoch": 0.5091003191581126, "grad_norm": 0.71484375, "learning_rate": 1.70411727555182e-05, "loss": 1.4932, "step": 2951 }, { "epoch": 0.5092728370568447, "grad_norm": 0.5859375, "learning_rate": 1.70392397328556e-05, "loss": 1.5016, "step": 2952 }, { "epoch": 0.5094453549555766, "grad_norm": 0.84765625, "learning_rate": 1.7037306188682142e-05, "loss": 1.4502, "step": 2953 }, { "epoch": 0.5096178728543086, "grad_norm": 0.7578125, "learning_rate": 1.7035372123141067e-05, "loss": 1.4817, "step": 2954 }, { "epoch": 0.5097903907530407, "grad_norm": 0.78125, "learning_rate": 1.703343753637567e-05, "loss": 1.4951, "step": 2955 }, { "epoch": 0.5099629086517726, "grad_norm": 0.64453125, "learning_rate": 1.703150242852927e-05, "loss": 1.5164, "step": 2956 }, { "epoch": 0.5101354265505046, "grad_norm": 0.66796875, "learning_rate": 1.7029566799745233e-05, "loss": 1.4479, "step": 2957 }, { "epoch": 0.5103079444492367, "grad_norm": 0.796875, "learning_rate": 1.702763065016697e-05, "loss": 1.4953, "step": 2958 }, { "epoch": 0.5104804623479686, "grad_norm": 0.7109375, "learning_rate": 1.7025693979937915e-05, "loss": 1.5578, "step": 2959 }, { "epoch": 0.5106529802467006, "grad_norm": 0.6640625, "learning_rate": 1.7023756789201553e-05, "loss": 1.4928, "step": 2960 }, { "epoch": 0.5108254981454325, "grad_norm": 0.74609375, "learning_rate": 1.70218190781014e-05, "loss": 1.4897, "step": 2961 }, { "epoch": 0.5109980160441646, "grad_norm": 0.6875, "learning_rate": 1.7019880846781017e-05, "loss": 1.5874, "step": 2962 }, { "epoch": 0.5111705339428966, "grad_norm": 0.6640625, "learning_rate": 1.7017942095383997e-05, "loss": 1.5494, "step": 2963 }, { "epoch": 0.5113430518416285, "grad_norm": 0.8125, "learning_rate": 1.701600282405398e-05, "loss": 1.3314, "step": 2964 }, { "epoch": 0.5115155697403606, "grad_norm": 0.61328125, "learning_rate": 1.7014063032934632e-05, "loss": 1.3961, "step": 2965 }, { "epoch": 0.5116880876390926, "grad_norm": 0.75390625, "learning_rate": 1.701212272216967e-05, "loss": 1.5077, "step": 2966 }, { "epoch": 0.5118606055378245, "grad_norm": 0.6875, "learning_rate": 1.701018189190284e-05, "loss": 1.4418, "step": 2967 }, { "epoch": 0.5120331234365565, "grad_norm": 0.62890625, "learning_rate": 1.7008240542277933e-05, "loss": 1.3953, "step": 2968 }, { "epoch": 0.5122056413352886, "grad_norm": 0.59765625, "learning_rate": 1.7006298673438784e-05, "loss": 1.4377, "step": 2969 }, { "epoch": 0.5123781592340205, "grad_norm": 0.66015625, "learning_rate": 1.7004356285529246e-05, "loss": 1.4127, "step": 2970 }, { "epoch": 0.5125506771327525, "grad_norm": 0.60546875, "learning_rate": 1.7002413378693236e-05, "loss": 1.5438, "step": 2971 }, { "epoch": 0.5127231950314846, "grad_norm": 0.70703125, "learning_rate": 1.7000469953074682e-05, "loss": 1.4592, "step": 2972 }, { "epoch": 0.5128957129302165, "grad_norm": 0.65234375, "learning_rate": 1.6998526008817577e-05, "loss": 1.4799, "step": 2973 }, { "epoch": 0.5130682308289485, "grad_norm": 0.58203125, "learning_rate": 1.6996581546065933e-05, "loss": 1.5223, "step": 2974 }, { "epoch": 0.5132407487276806, "grad_norm": 0.5859375, "learning_rate": 1.6994636564963817e-05, "loss": 1.4836, "step": 2975 }, { "epoch": 0.5134132666264125, "grad_norm": 0.6328125, "learning_rate": 1.6992691065655316e-05, "loss": 1.3625, "step": 2976 }, { "epoch": 0.5135857845251445, "grad_norm": 0.6953125, "learning_rate": 1.699074504828457e-05, "loss": 1.4768, "step": 2977 }, { "epoch": 0.5137583024238764, "grad_norm": 0.640625, "learning_rate": 1.6988798512995747e-05, "loss": 1.5272, "step": 2978 }, { "epoch": 0.5139308203226085, "grad_norm": 0.62890625, "learning_rate": 1.6986851459933067e-05, "loss": 1.4577, "step": 2979 }, { "epoch": 0.5141033382213405, "grad_norm": 0.90234375, "learning_rate": 1.698490388924077e-05, "loss": 1.5431, "step": 2980 }, { "epoch": 0.5142758561200724, "grad_norm": 0.65234375, "learning_rate": 1.6982955801063155e-05, "loss": 1.469, "step": 2981 }, { "epoch": 0.5144483740188045, "grad_norm": 0.68359375, "learning_rate": 1.698100719554454e-05, "loss": 1.4332, "step": 2982 }, { "epoch": 0.5146208919175365, "grad_norm": 0.63671875, "learning_rate": 1.6979058072829292e-05, "loss": 1.552, "step": 2983 }, { "epoch": 0.5147934098162684, "grad_norm": 0.69140625, "learning_rate": 1.6977108433061812e-05, "loss": 1.4409, "step": 2984 }, { "epoch": 0.5149659277150004, "grad_norm": 0.6328125, "learning_rate": 1.697515827638655e-05, "loss": 1.4671, "step": 2985 }, { "epoch": 0.5151384456137325, "grad_norm": 0.609375, "learning_rate": 1.6973207602947977e-05, "loss": 1.5082, "step": 2986 }, { "epoch": 0.5153109635124644, "grad_norm": 0.6640625, "learning_rate": 1.697125641289062e-05, "loss": 1.4577, "step": 2987 }, { "epoch": 0.5154834814111964, "grad_norm": 0.7109375, "learning_rate": 1.696930470635902e-05, "loss": 1.4511, "step": 2988 }, { "epoch": 0.5156559993099284, "grad_norm": 0.6640625, "learning_rate": 1.6967352483497785e-05, "loss": 1.4251, "step": 2989 }, { "epoch": 0.5158285172086604, "grad_norm": 0.66796875, "learning_rate": 1.6965399744451544e-05, "loss": 1.3204, "step": 2990 }, { "epoch": 0.5160010351073924, "grad_norm": 0.6015625, "learning_rate": 1.696344648936497e-05, "loss": 1.5144, "step": 2991 }, { "epoch": 0.5161735530061243, "grad_norm": 0.64453125, "learning_rate": 1.696149271838277e-05, "loss": 1.4833, "step": 2992 }, { "epoch": 0.5163460709048564, "grad_norm": 0.80078125, "learning_rate": 1.695953843164969e-05, "loss": 1.5172, "step": 2993 }, { "epoch": 0.5165185888035884, "grad_norm": 0.66796875, "learning_rate": 1.695758362931052e-05, "loss": 1.6615, "step": 2994 }, { "epoch": 0.5166911067023203, "grad_norm": 0.64453125, "learning_rate": 1.695562831151008e-05, "loss": 1.4564, "step": 2995 }, { "epoch": 0.5168636246010524, "grad_norm": 0.5703125, "learning_rate": 1.6953672478393235e-05, "loss": 1.3986, "step": 2996 }, { "epoch": 0.5170361424997844, "grad_norm": 0.67578125, "learning_rate": 1.6951716130104884e-05, "loss": 1.4119, "step": 2997 }, { "epoch": 0.5172086603985163, "grad_norm": 0.66015625, "learning_rate": 1.6949759266789963e-05, "loss": 1.4871, "step": 2998 }, { "epoch": 0.5173811782972484, "grad_norm": 0.65234375, "learning_rate": 1.6947801888593452e-05, "loss": 1.4817, "step": 2999 }, { "epoch": 0.5175536961959804, "grad_norm": 0.7890625, "learning_rate": 1.694584399566037e-05, "loss": 1.4331, "step": 3000 }, { "epoch": 0.5175536961959804, "eval_loss": 1.4426935911178589, "eval_runtime": 11.0486, "eval_samples_per_second": 92.681, "eval_steps_per_second": 23.17, "step": 3000 }, { "epoch": 0.5177262140947123, "grad_norm": 0.6640625, "learning_rate": 1.6943885588135763e-05, "loss": 1.4386, "step": 3001 }, { "epoch": 0.5178987319934443, "grad_norm": 0.58984375, "learning_rate": 1.694192666616472e-05, "loss": 1.4496, "step": 3002 }, { "epoch": 0.5180712498921763, "grad_norm": 0.87890625, "learning_rate": 1.693996722989238e-05, "loss": 1.4264, "step": 3003 }, { "epoch": 0.5182437677909083, "grad_norm": 0.65234375, "learning_rate": 1.6938007279463903e-05, "loss": 1.5147, "step": 3004 }, { "epoch": 0.5184162856896403, "grad_norm": 0.73828125, "learning_rate": 1.6936046815024495e-05, "loss": 1.5204, "step": 3005 }, { "epoch": 0.5185888035883723, "grad_norm": 0.77734375, "learning_rate": 1.69340858367194e-05, "loss": 1.4578, "step": 3006 }, { "epoch": 0.5187613214871043, "grad_norm": 0.8671875, "learning_rate": 1.6932124344693904e-05, "loss": 1.4739, "step": 3007 }, { "epoch": 0.5189338393858363, "grad_norm": 0.69140625, "learning_rate": 1.693016233909332e-05, "loss": 1.4174, "step": 3008 }, { "epoch": 0.5191063572845682, "grad_norm": 0.734375, "learning_rate": 1.692819982006301e-05, "loss": 1.4694, "step": 3009 }, { "epoch": 0.5192788751833003, "grad_norm": 0.66015625, "learning_rate": 1.692623678774836e-05, "loss": 1.4741, "step": 3010 }, { "epoch": 0.5194513930820323, "grad_norm": 0.6640625, "learning_rate": 1.692427324229482e-05, "loss": 1.4905, "step": 3011 }, { "epoch": 0.5196239109807642, "grad_norm": 0.60546875, "learning_rate": 1.692230918384785e-05, "loss": 1.4186, "step": 3012 }, { "epoch": 0.5197964288794963, "grad_norm": 0.7734375, "learning_rate": 1.6920344612552965e-05, "loss": 1.4845, "step": 3013 }, { "epoch": 0.5199689467782282, "grad_norm": 0.71484375, "learning_rate": 1.691837952855571e-05, "loss": 1.5879, "step": 3014 }, { "epoch": 0.5201414646769602, "grad_norm": 0.7421875, "learning_rate": 1.6916413932001673e-05, "loss": 1.4893, "step": 3015 }, { "epoch": 0.5203139825756923, "grad_norm": 0.87109375, "learning_rate": 1.6914447823036474e-05, "loss": 1.5236, "step": 3016 }, { "epoch": 0.5204865004744242, "grad_norm": 0.91015625, "learning_rate": 1.691248120180578e-05, "loss": 1.5532, "step": 3017 }, { "epoch": 0.5206590183731562, "grad_norm": 0.9140625, "learning_rate": 1.691051406845528e-05, "loss": 1.4801, "step": 3018 }, { "epoch": 0.5208315362718882, "grad_norm": 0.69921875, "learning_rate": 1.6908546423130726e-05, "loss": 1.4341, "step": 3019 }, { "epoch": 0.5210040541706202, "grad_norm": 0.6484375, "learning_rate": 1.6906578265977885e-05, "loss": 1.5411, "step": 3020 }, { "epoch": 0.5211765720693522, "grad_norm": 0.62890625, "learning_rate": 1.690460959714257e-05, "loss": 1.4389, "step": 3021 }, { "epoch": 0.5213490899680842, "grad_norm": 0.64453125, "learning_rate": 1.6902640416770635e-05, "loss": 1.5066, "step": 3022 }, { "epoch": 0.5215216078668162, "grad_norm": 0.66796875, "learning_rate": 1.6900670725007968e-05, "loss": 1.4621, "step": 3023 }, { "epoch": 0.5216941257655482, "grad_norm": 0.78125, "learning_rate": 1.6898700522000498e-05, "loss": 1.4797, "step": 3024 }, { "epoch": 0.5218666436642801, "grad_norm": 0.65234375, "learning_rate": 1.6896729807894187e-05, "loss": 1.5472, "step": 3025 }, { "epoch": 0.5220391615630121, "grad_norm": 0.60546875, "learning_rate": 1.6894758582835038e-05, "loss": 1.4273, "step": 3026 }, { "epoch": 0.5222116794617442, "grad_norm": 0.671875, "learning_rate": 1.6892786846969095e-05, "loss": 1.4918, "step": 3027 }, { "epoch": 0.5223841973604761, "grad_norm": 0.63671875, "learning_rate": 1.689081460044243e-05, "loss": 1.4427, "step": 3028 }, { "epoch": 0.5225567152592081, "grad_norm": 0.7109375, "learning_rate": 1.6888841843401164e-05, "loss": 1.3589, "step": 3029 }, { "epoch": 0.5227292331579402, "grad_norm": 0.6015625, "learning_rate": 1.688686857599146e-05, "loss": 1.3723, "step": 3030 }, { "epoch": 0.5229017510566721, "grad_norm": 0.60546875, "learning_rate": 1.688489479835949e-05, "loss": 1.5148, "step": 3031 }, { "epoch": 0.5230742689554041, "grad_norm": 0.59375, "learning_rate": 1.6882920510651497e-05, "loss": 1.4609, "step": 3032 }, { "epoch": 0.5232467868541362, "grad_norm": 0.6484375, "learning_rate": 1.6880945713013747e-05, "loss": 1.3951, "step": 3033 }, { "epoch": 0.5234193047528681, "grad_norm": 0.65625, "learning_rate": 1.6878970405592545e-05, "loss": 1.4334, "step": 3034 }, { "epoch": 0.5235918226516001, "grad_norm": 0.6484375, "learning_rate": 1.6876994588534234e-05, "loss": 1.4421, "step": 3035 }, { "epoch": 0.523764340550332, "grad_norm": 0.59765625, "learning_rate": 1.6875018261985193e-05, "loss": 1.4525, "step": 3036 }, { "epoch": 0.5239368584490641, "grad_norm": 0.60546875, "learning_rate": 1.6873041426091845e-05, "loss": 1.42, "step": 3037 }, { "epoch": 0.5241093763477961, "grad_norm": 0.61328125, "learning_rate": 1.6871064081000643e-05, "loss": 1.5279, "step": 3038 }, { "epoch": 0.524281894246528, "grad_norm": 0.6796875, "learning_rate": 1.686908622685808e-05, "loss": 1.4593, "step": 3039 }, { "epoch": 0.5244544121452601, "grad_norm": 0.64453125, "learning_rate": 1.6867107863810687e-05, "loss": 1.5149, "step": 3040 }, { "epoch": 0.5246269300439921, "grad_norm": 0.56640625, "learning_rate": 1.6865128992005037e-05, "loss": 1.3865, "step": 3041 }, { "epoch": 0.524799447942724, "grad_norm": 0.58984375, "learning_rate": 1.6863149611587738e-05, "loss": 1.4426, "step": 3042 }, { "epoch": 0.524971965841456, "grad_norm": 0.5859375, "learning_rate": 1.686116972270543e-05, "loss": 1.4312, "step": 3043 }, { "epoch": 0.5251444837401881, "grad_norm": 0.71875, "learning_rate": 1.68591893255048e-05, "loss": 1.5589, "step": 3044 }, { "epoch": 0.52531700163892, "grad_norm": 0.6640625, "learning_rate": 1.685720842013257e-05, "loss": 1.441, "step": 3045 }, { "epoch": 0.525489519537652, "grad_norm": 0.7265625, "learning_rate": 1.6855227006735492e-05, "loss": 1.3209, "step": 3046 }, { "epoch": 0.5256620374363841, "grad_norm": 0.67578125, "learning_rate": 1.6853245085460362e-05, "loss": 1.4729, "step": 3047 }, { "epoch": 0.525834555335116, "grad_norm": 0.64453125, "learning_rate": 1.6851262656454016e-05, "loss": 1.5194, "step": 3048 }, { "epoch": 0.526007073233848, "grad_norm": 0.7734375, "learning_rate": 1.684927971986332e-05, "loss": 1.5129, "step": 3049 }, { "epoch": 0.5261795911325801, "grad_norm": 0.6328125, "learning_rate": 1.684729627583519e-05, "loss": 1.4712, "step": 3050 }, { "epoch": 0.526352109031312, "grad_norm": 0.58203125, "learning_rate": 1.684531232451657e-05, "loss": 1.473, "step": 3051 }, { "epoch": 0.526524626930044, "grad_norm": 0.64453125, "learning_rate": 1.684332786605444e-05, "loss": 1.4668, "step": 3052 }, { "epoch": 0.5266971448287759, "grad_norm": 0.66796875, "learning_rate": 1.684134290059582e-05, "loss": 1.3984, "step": 3053 }, { "epoch": 0.526869662727508, "grad_norm": 0.63671875, "learning_rate": 1.6839357428287775e-05, "loss": 1.447, "step": 3054 }, { "epoch": 0.52704218062624, "grad_norm": 0.6484375, "learning_rate": 1.6837371449277395e-05, "loss": 1.4159, "step": 3055 }, { "epoch": 0.5272146985249719, "grad_norm": 0.76953125, "learning_rate": 1.6835384963711815e-05, "loss": 1.5567, "step": 3056 }, { "epoch": 0.527387216423704, "grad_norm": 0.71875, "learning_rate": 1.683339797173821e-05, "loss": 1.5057, "step": 3057 }, { "epoch": 0.527559734322436, "grad_norm": 0.66015625, "learning_rate": 1.6831410473503788e-05, "loss": 1.5606, "step": 3058 }, { "epoch": 0.5277322522211679, "grad_norm": 0.69140625, "learning_rate": 1.682942246915579e-05, "loss": 1.4107, "step": 3059 }, { "epoch": 0.5279047701198999, "grad_norm": 0.5546875, "learning_rate": 1.6827433958841504e-05, "loss": 1.3407, "step": 3060 }, { "epoch": 0.528077288018632, "grad_norm": 0.96484375, "learning_rate": 1.682544494270825e-05, "loss": 1.5163, "step": 3061 }, { "epoch": 0.5282498059173639, "grad_norm": 0.65625, "learning_rate": 1.682345542090339e-05, "loss": 1.4255, "step": 3062 }, { "epoch": 0.5284223238160959, "grad_norm": 0.61328125, "learning_rate": 1.6821465393574315e-05, "loss": 1.3823, "step": 3063 }, { "epoch": 0.528594841714828, "grad_norm": 0.69921875, "learning_rate": 1.681947486086846e-05, "loss": 1.489, "step": 3064 }, { "epoch": 0.5287673596135599, "grad_norm": 0.70703125, "learning_rate": 1.6817483822933298e-05, "loss": 1.3998, "step": 3065 }, { "epoch": 0.5289398775122919, "grad_norm": 0.73828125, "learning_rate": 1.681549227991634e-05, "loss": 1.4426, "step": 3066 }, { "epoch": 0.5291123954110238, "grad_norm": 0.63671875, "learning_rate": 1.6813500231965127e-05, "loss": 1.486, "step": 3067 }, { "epoch": 0.5292849133097559, "grad_norm": 0.70703125, "learning_rate": 1.6811507679227242e-05, "loss": 1.4526, "step": 3068 }, { "epoch": 0.5294574312084879, "grad_norm": 0.6328125, "learning_rate": 1.6809514621850313e-05, "loss": 1.4536, "step": 3069 }, { "epoch": 0.5296299491072198, "grad_norm": 0.58984375, "learning_rate": 1.680752105998199e-05, "loss": 1.4325, "step": 3070 }, { "epoch": 0.5298024670059519, "grad_norm": 0.62109375, "learning_rate": 1.680552699376997e-05, "loss": 1.3994, "step": 3071 }, { "epoch": 0.5299749849046839, "grad_norm": 0.6171875, "learning_rate": 1.6803532423361985e-05, "loss": 1.516, "step": 3072 }, { "epoch": 0.5301475028034158, "grad_norm": 0.640625, "learning_rate": 1.6801537348905813e-05, "loss": 1.4652, "step": 3073 }, { "epoch": 0.5303200207021479, "grad_norm": 0.65625, "learning_rate": 1.6799541770549256e-05, "loss": 1.5399, "step": 3074 }, { "epoch": 0.5304925386008799, "grad_norm": 0.82421875, "learning_rate": 1.6797545688440156e-05, "loss": 1.4818, "step": 3075 }, { "epoch": 0.5306650564996118, "grad_norm": 0.671875, "learning_rate": 1.67955491027264e-05, "loss": 1.4993, "step": 3076 }, { "epoch": 0.5308375743983438, "grad_norm": 0.7265625, "learning_rate": 1.6793552013555905e-05, "loss": 1.3808, "step": 3077 }, { "epoch": 0.5310100922970759, "grad_norm": 0.72265625, "learning_rate": 1.679155442107663e-05, "loss": 1.4844, "step": 3078 }, { "epoch": 0.5311826101958078, "grad_norm": 0.578125, "learning_rate": 1.6789556325436566e-05, "loss": 1.4248, "step": 3079 }, { "epoch": 0.5313551280945398, "grad_norm": 0.65234375, "learning_rate": 1.678755772678375e-05, "loss": 1.4678, "step": 3080 }, { "epoch": 0.5315276459932718, "grad_norm": 0.72265625, "learning_rate": 1.6785558625266243e-05, "loss": 1.4931, "step": 3081 }, { "epoch": 0.5317001638920038, "grad_norm": 0.6640625, "learning_rate": 1.6783559021032158e-05, "loss": 1.3671, "step": 3082 }, { "epoch": 0.5318726817907358, "grad_norm": 0.7265625, "learning_rate": 1.678155891422963e-05, "loss": 1.5186, "step": 3083 }, { "epoch": 0.5320451996894677, "grad_norm": 0.68359375, "learning_rate": 1.677955830500685e-05, "loss": 1.3654, "step": 3084 }, { "epoch": 0.5322177175881998, "grad_norm": 0.6328125, "learning_rate": 1.6777557193512025e-05, "loss": 1.4802, "step": 3085 }, { "epoch": 0.5323902354869318, "grad_norm": 0.70703125, "learning_rate": 1.6775555579893422e-05, "loss": 1.5506, "step": 3086 }, { "epoch": 0.5325627533856637, "grad_norm": 0.7421875, "learning_rate": 1.677355346429932e-05, "loss": 1.392, "step": 3087 }, { "epoch": 0.5327352712843958, "grad_norm": 0.5703125, "learning_rate": 1.6771550846878056e-05, "loss": 1.5255, "step": 3088 }, { "epoch": 0.5329077891831278, "grad_norm": 0.72265625, "learning_rate": 1.6769547727777997e-05, "loss": 1.4502, "step": 3089 }, { "epoch": 0.5330803070818597, "grad_norm": 0.7890625, "learning_rate": 1.6767544107147542e-05, "loss": 1.4886, "step": 3090 }, { "epoch": 0.5332528249805918, "grad_norm": 0.66015625, "learning_rate": 1.6765539985135134e-05, "loss": 1.4446, "step": 3091 }, { "epoch": 0.5334253428793237, "grad_norm": 0.73828125, "learning_rate": 1.6763535361889252e-05, "loss": 1.4988, "step": 3092 }, { "epoch": 0.5335978607780557, "grad_norm": 0.6796875, "learning_rate": 1.676153023755841e-05, "loss": 1.4266, "step": 3093 }, { "epoch": 0.5337703786767877, "grad_norm": 0.66796875, "learning_rate": 1.675952461229116e-05, "loss": 1.5449, "step": 3094 }, { "epoch": 0.5339428965755197, "grad_norm": 0.7109375, "learning_rate": 1.6757518486236088e-05, "loss": 1.469, "step": 3095 }, { "epoch": 0.5341154144742517, "grad_norm": 0.6640625, "learning_rate": 1.6755511859541827e-05, "loss": 1.5697, "step": 3096 }, { "epoch": 0.5342879323729837, "grad_norm": 0.609375, "learning_rate": 1.6753504732357035e-05, "loss": 1.4817, "step": 3097 }, { "epoch": 0.5344604502717157, "grad_norm": 0.6484375, "learning_rate": 1.6751497104830416e-05, "loss": 1.5511, "step": 3098 }, { "epoch": 0.5346329681704477, "grad_norm": 0.73828125, "learning_rate": 1.6749488977110706e-05, "loss": 1.4915, "step": 3099 }, { "epoch": 0.5348054860691797, "grad_norm": 0.63671875, "learning_rate": 1.6747480349346678e-05, "loss": 1.4205, "step": 3100 }, { "epoch": 0.5348054860691797, "eval_loss": 1.4400311708450317, "eval_runtime": 10.8978, "eval_samples_per_second": 93.964, "eval_steps_per_second": 23.491, "step": 3100 }, { "epoch": 0.5349780039679116, "grad_norm": 0.7109375, "learning_rate": 1.674547122168715e-05, "loss": 1.4605, "step": 3101 }, { "epoch": 0.5351505218666437, "grad_norm": 0.67578125, "learning_rate": 1.6743461594280962e-05, "loss": 1.4807, "step": 3102 }, { "epoch": 0.5353230397653757, "grad_norm": 0.6171875, "learning_rate": 1.6741451467277006e-05, "loss": 1.4194, "step": 3103 }, { "epoch": 0.5354955576641076, "grad_norm": 0.6328125, "learning_rate": 1.6739440840824203e-05, "loss": 1.4102, "step": 3104 }, { "epoch": 0.5356680755628397, "grad_norm": 0.625, "learning_rate": 1.6737429715071512e-05, "loss": 1.4654, "step": 3105 }, { "epoch": 0.5358405934615716, "grad_norm": 0.71484375, "learning_rate": 1.673541809016793e-05, "loss": 1.3424, "step": 3106 }, { "epoch": 0.5360131113603036, "grad_norm": 0.7578125, "learning_rate": 1.673340596626249e-05, "loss": 1.4412, "step": 3107 }, { "epoch": 0.5361856292590357, "grad_norm": 0.6015625, "learning_rate": 1.6731393343504266e-05, "loss": 1.4353, "step": 3108 }, { "epoch": 0.5363581471577676, "grad_norm": 0.66015625, "learning_rate": 1.6729380222042363e-05, "loss": 1.4199, "step": 3109 }, { "epoch": 0.5365306650564996, "grad_norm": 0.66796875, "learning_rate": 1.672736660202592e-05, "loss": 1.4368, "step": 3110 }, { "epoch": 0.5367031829552316, "grad_norm": 0.609375, "learning_rate": 1.6725352483604133e-05, "loss": 1.4803, "step": 3111 }, { "epoch": 0.5368757008539636, "grad_norm": 0.72265625, "learning_rate": 1.672333786692621e-05, "loss": 1.5334, "step": 3112 }, { "epoch": 0.5370482187526956, "grad_norm": 0.67578125, "learning_rate": 1.6721322752141404e-05, "loss": 1.412, "step": 3113 }, { "epoch": 0.5372207366514276, "grad_norm": 0.61328125, "learning_rate": 1.6719307139399018e-05, "loss": 1.5448, "step": 3114 }, { "epoch": 0.5373932545501596, "grad_norm": 0.73046875, "learning_rate": 1.671729102884837e-05, "loss": 1.3743, "step": 3115 }, { "epoch": 0.5375657724488916, "grad_norm": 0.609375, "learning_rate": 1.6715274420638833e-05, "loss": 1.3251, "step": 3116 }, { "epoch": 0.5377382903476235, "grad_norm": 0.7578125, "learning_rate": 1.671325731491981e-05, "loss": 1.4829, "step": 3117 }, { "epoch": 0.5379108082463555, "grad_norm": 0.6171875, "learning_rate": 1.6711239711840736e-05, "loss": 1.4443, "step": 3118 }, { "epoch": 0.5380833261450876, "grad_norm": 0.734375, "learning_rate": 1.670922161155109e-05, "loss": 1.5348, "step": 3119 }, { "epoch": 0.5382558440438195, "grad_norm": 0.640625, "learning_rate": 1.670720301420039e-05, "loss": 1.4214, "step": 3120 }, { "epoch": 0.5384283619425515, "grad_norm": 0.72265625, "learning_rate": 1.670518391993818e-05, "loss": 1.5121, "step": 3121 }, { "epoch": 0.5386008798412836, "grad_norm": 0.64453125, "learning_rate": 1.6703164328914048e-05, "loss": 1.4731, "step": 3122 }, { "epoch": 0.5387733977400155, "grad_norm": 0.65234375, "learning_rate": 1.670114424127762e-05, "loss": 1.4324, "step": 3123 }, { "epoch": 0.5389459156387475, "grad_norm": 0.703125, "learning_rate": 1.6699123657178553e-05, "loss": 1.4381, "step": 3124 }, { "epoch": 0.5391184335374796, "grad_norm": 0.80859375, "learning_rate": 1.6697102576766552e-05, "loss": 1.45, "step": 3125 }, { "epoch": 0.5392909514362115, "grad_norm": 0.62109375, "learning_rate": 1.6695081000191345e-05, "loss": 1.414, "step": 3126 }, { "epoch": 0.5394634693349435, "grad_norm": 0.78515625, "learning_rate": 1.6693058927602704e-05, "loss": 1.5434, "step": 3127 }, { "epoch": 0.5396359872336755, "grad_norm": 0.76171875, "learning_rate": 1.6691036359150435e-05, "loss": 1.4602, "step": 3128 }, { "epoch": 0.5398085051324075, "grad_norm": 0.625, "learning_rate": 1.668901329498439e-05, "loss": 1.5231, "step": 3129 }, { "epoch": 0.5399810230311395, "grad_norm": 0.6015625, "learning_rate": 1.6686989735254442e-05, "loss": 1.3867, "step": 3130 }, { "epoch": 0.5401535409298714, "grad_norm": 0.59765625, "learning_rate": 1.6684965680110514e-05, "loss": 1.4377, "step": 3131 }, { "epoch": 0.5403260588286035, "grad_norm": 0.6953125, "learning_rate": 1.668294112970256e-05, "loss": 1.523, "step": 3132 }, { "epoch": 0.5404985767273355, "grad_norm": 1.5078125, "learning_rate": 1.6680916084180566e-05, "loss": 1.5506, "step": 3133 }, { "epoch": 0.5406710946260674, "grad_norm": 0.58984375, "learning_rate": 1.667889054369457e-05, "loss": 1.5098, "step": 3134 }, { "epoch": 0.5408436125247994, "grad_norm": 0.60546875, "learning_rate": 1.6676864508394624e-05, "loss": 1.4797, "step": 3135 }, { "epoch": 0.5410161304235315, "grad_norm": 0.66796875, "learning_rate": 1.667483797843084e-05, "loss": 1.5096, "step": 3136 }, { "epoch": 0.5411886483222634, "grad_norm": 0.640625, "learning_rate": 1.6672810953953352e-05, "loss": 1.5928, "step": 3137 }, { "epoch": 0.5413611662209954, "grad_norm": 0.59765625, "learning_rate": 1.6670783435112334e-05, "loss": 1.4941, "step": 3138 }, { "epoch": 0.5415336841197275, "grad_norm": 0.671875, "learning_rate": 1.6668755422058e-05, "loss": 1.4992, "step": 3139 }, { "epoch": 0.5417062020184594, "grad_norm": 0.58984375, "learning_rate": 1.6666726914940594e-05, "loss": 1.4193, "step": 3140 }, { "epoch": 0.5418787199171914, "grad_norm": 0.6015625, "learning_rate": 1.6664697913910405e-05, "loss": 1.4341, "step": 3141 }, { "epoch": 0.5420512378159233, "grad_norm": 0.5625, "learning_rate": 1.6662668419117748e-05, "loss": 1.4427, "step": 3142 }, { "epoch": 0.5422237557146554, "grad_norm": 0.60546875, "learning_rate": 1.666063843071299e-05, "loss": 1.4435, "step": 3143 }, { "epoch": 0.5423962736133874, "grad_norm": 0.80859375, "learning_rate": 1.6658607948846513e-05, "loss": 1.4828, "step": 3144 }, { "epoch": 0.5425687915121193, "grad_norm": 0.62890625, "learning_rate": 1.6656576973668754e-05, "loss": 1.5457, "step": 3145 }, { "epoch": 0.5427413094108514, "grad_norm": 0.640625, "learning_rate": 1.6654545505330184e-05, "loss": 1.374, "step": 3146 }, { "epoch": 0.5429138273095834, "grad_norm": 0.65625, "learning_rate": 1.6652513543981302e-05, "loss": 1.496, "step": 3147 }, { "epoch": 0.5430863452083153, "grad_norm": 0.60546875, "learning_rate": 1.6650481089772652e-05, "loss": 1.4306, "step": 3148 }, { "epoch": 0.5432588631070474, "grad_norm": 0.59765625, "learning_rate": 1.664844814285481e-05, "loss": 1.3942, "step": 3149 }, { "epoch": 0.5434313810057794, "grad_norm": 0.7578125, "learning_rate": 1.6646414703378387e-05, "loss": 1.4703, "step": 3150 }, { "epoch": 0.5436038989045113, "grad_norm": 0.64453125, "learning_rate": 1.6644380771494032e-05, "loss": 1.6057, "step": 3151 }, { "epoch": 0.5437764168032433, "grad_norm": 0.58984375, "learning_rate": 1.664234634735243e-05, "loss": 1.3779, "step": 3152 }, { "epoch": 0.5439489347019754, "grad_norm": 0.73046875, "learning_rate": 1.6640311431104314e-05, "loss": 1.4757, "step": 3153 }, { "epoch": 0.5441214526007073, "grad_norm": 0.65625, "learning_rate": 1.6638276022900434e-05, "loss": 1.4549, "step": 3154 }, { "epoch": 0.5442939704994393, "grad_norm": 0.68359375, "learning_rate": 1.6636240122891587e-05, "loss": 1.4703, "step": 3155 }, { "epoch": 0.5444664883981714, "grad_norm": 0.76171875, "learning_rate": 1.663420373122861e-05, "loss": 1.528, "step": 3156 }, { "epoch": 0.5446390062969033, "grad_norm": 0.66015625, "learning_rate": 1.6632166848062367e-05, "loss": 1.4214, "step": 3157 }, { "epoch": 0.5448115241956353, "grad_norm": 0.6640625, "learning_rate": 1.6630129473543762e-05, "loss": 1.5069, "step": 3158 }, { "epoch": 0.5449840420943672, "grad_norm": 0.66796875, "learning_rate": 1.662809160782374e-05, "loss": 1.4371, "step": 3159 }, { "epoch": 0.5451565599930993, "grad_norm": 0.6796875, "learning_rate": 1.662605325105328e-05, "loss": 1.5044, "step": 3160 }, { "epoch": 0.5453290778918313, "grad_norm": 0.5703125, "learning_rate": 1.662401440338339e-05, "loss": 1.4541, "step": 3161 }, { "epoch": 0.5455015957905632, "grad_norm": 0.66796875, "learning_rate": 1.6621975064965122e-05, "loss": 1.5276, "step": 3162 }, { "epoch": 0.5456741136892953, "grad_norm": 0.640625, "learning_rate": 1.661993523594957e-05, "loss": 1.5695, "step": 3163 }, { "epoch": 0.5458466315880273, "grad_norm": 0.60546875, "learning_rate": 1.6617894916487852e-05, "loss": 1.5393, "step": 3164 }, { "epoch": 0.5460191494867592, "grad_norm": 0.640625, "learning_rate": 1.6615854106731127e-05, "loss": 1.4206, "step": 3165 }, { "epoch": 0.5461916673854913, "grad_norm": 0.64453125, "learning_rate": 1.661381280683059e-05, "loss": 1.5095, "step": 3166 }, { "epoch": 0.5463641852842233, "grad_norm": 0.65625, "learning_rate": 1.661177101693748e-05, "loss": 1.5578, "step": 3167 }, { "epoch": 0.5465367031829552, "grad_norm": 0.609375, "learning_rate": 1.6609728737203058e-05, "loss": 1.5203, "step": 3168 }, { "epoch": 0.5467092210816872, "grad_norm": 0.609375, "learning_rate": 1.660768596777863e-05, "loss": 1.5172, "step": 3169 }, { "epoch": 0.5468817389804193, "grad_norm": 0.671875, "learning_rate": 1.660564270881554e-05, "loss": 1.4111, "step": 3170 }, { "epoch": 0.5470542568791512, "grad_norm": 0.62109375, "learning_rate": 1.6603598960465166e-05, "loss": 1.4433, "step": 3171 }, { "epoch": 0.5472267747778832, "grad_norm": 0.609375, "learning_rate": 1.660155472287892e-05, "loss": 1.3998, "step": 3172 }, { "epoch": 0.5473992926766152, "grad_norm": 0.62890625, "learning_rate": 1.659950999620825e-05, "loss": 1.4611, "step": 3173 }, { "epoch": 0.5475718105753472, "grad_norm": 0.609375, "learning_rate": 1.6597464780604646e-05, "loss": 1.4887, "step": 3174 }, { "epoch": 0.5477443284740792, "grad_norm": 0.640625, "learning_rate": 1.6595419076219625e-05, "loss": 1.5055, "step": 3175 }, { "epoch": 0.5479168463728111, "grad_norm": 0.63671875, "learning_rate": 1.659337288320475e-05, "loss": 1.4551, "step": 3176 }, { "epoch": 0.5480893642715432, "grad_norm": 0.63671875, "learning_rate": 1.6591326201711612e-05, "loss": 1.5612, "step": 3177 }, { "epoch": 0.5482618821702752, "grad_norm": 0.70703125, "learning_rate": 1.6589279031891847e-05, "loss": 1.4727, "step": 3178 }, { "epoch": 0.5484344000690071, "grad_norm": 0.58984375, "learning_rate": 1.658723137389712e-05, "loss": 1.5481, "step": 3179 }, { "epoch": 0.5486069179677392, "grad_norm": 0.609375, "learning_rate": 1.6585183227879132e-05, "loss": 1.4579, "step": 3180 }, { "epoch": 0.5487794358664712, "grad_norm": 0.75, "learning_rate": 1.6583134593989627e-05, "loss": 1.466, "step": 3181 }, { "epoch": 0.5489519537652031, "grad_norm": 0.5703125, "learning_rate": 1.658108547238038e-05, "loss": 1.4213, "step": 3182 }, { "epoch": 0.5491244716639352, "grad_norm": 0.64453125, "learning_rate": 1.6579035863203197e-05, "loss": 1.457, "step": 3183 }, { "epoch": 0.5492969895626671, "grad_norm": 0.671875, "learning_rate": 1.6576985766609926e-05, "loss": 1.4649, "step": 3184 }, { "epoch": 0.5494695074613991, "grad_norm": 0.66015625, "learning_rate": 1.657493518275246e-05, "loss": 1.426, "step": 3185 }, { "epoch": 0.5496420253601311, "grad_norm": 0.59375, "learning_rate": 1.6572884111782716e-05, "loss": 1.3793, "step": 3186 }, { "epoch": 0.5498145432588631, "grad_norm": 0.609375, "learning_rate": 1.6570832553852643e-05, "loss": 1.3756, "step": 3187 }, { "epoch": 0.5499870611575951, "grad_norm": 0.60546875, "learning_rate": 1.6568780509114237e-05, "loss": 1.458, "step": 3188 }, { "epoch": 0.5501595790563271, "grad_norm": 0.55859375, "learning_rate": 1.656672797771953e-05, "loss": 1.3458, "step": 3189 }, { "epoch": 0.5503320969550591, "grad_norm": 0.71875, "learning_rate": 1.6564674959820585e-05, "loss": 1.4256, "step": 3190 }, { "epoch": 0.5505046148537911, "grad_norm": 0.6796875, "learning_rate": 1.6562621455569495e-05, "loss": 1.4807, "step": 3191 }, { "epoch": 0.5506771327525231, "grad_norm": 0.6171875, "learning_rate": 1.6560567465118407e-05, "loss": 1.5037, "step": 3192 }, { "epoch": 0.550849650651255, "grad_norm": 0.66015625, "learning_rate": 1.655851298861949e-05, "loss": 1.606, "step": 3193 }, { "epoch": 0.5510221685499871, "grad_norm": 0.58984375, "learning_rate": 1.6556458026224948e-05, "loss": 1.4176, "step": 3194 }, { "epoch": 0.551194686448719, "grad_norm": 0.5625, "learning_rate": 1.655440257808703e-05, "loss": 1.3671, "step": 3195 }, { "epoch": 0.551367204347451, "grad_norm": 0.62890625, "learning_rate": 1.6552346644358014e-05, "loss": 1.4281, "step": 3196 }, { "epoch": 0.5515397222461831, "grad_norm": 0.62890625, "learning_rate": 1.6550290225190217e-05, "loss": 1.3226, "step": 3197 }, { "epoch": 0.551712240144915, "grad_norm": 0.59765625, "learning_rate": 1.6548233320735997e-05, "loss": 1.2947, "step": 3198 }, { "epoch": 0.551884758043647, "grad_norm": 0.67578125, "learning_rate": 1.6546175931147733e-05, "loss": 1.4838, "step": 3199 }, { "epoch": 0.552057275942379, "grad_norm": 0.6328125, "learning_rate": 1.6544118056577856e-05, "loss": 1.3835, "step": 3200 }, { "epoch": 0.552057275942379, "eval_loss": 1.4378929138183594, "eval_runtime": 11.3818, "eval_samples_per_second": 89.968, "eval_steps_per_second": 22.492, "step": 3200 }, { "epoch": 0.552229793841111, "grad_norm": 0.65625, "learning_rate": 1.6542059697178822e-05, "loss": 1.5091, "step": 3201 }, { "epoch": 0.552402311739843, "grad_norm": 0.65234375, "learning_rate": 1.6540000853103132e-05, "loss": 1.4104, "step": 3202 }, { "epoch": 0.552574829638575, "grad_norm": 0.60546875, "learning_rate": 1.653794152450331e-05, "loss": 1.3491, "step": 3203 }, { "epoch": 0.552747347537307, "grad_norm": 0.6953125, "learning_rate": 1.653588171153193e-05, "loss": 1.4691, "step": 3204 }, { "epoch": 0.552919865436039, "grad_norm": 0.60546875, "learning_rate": 1.6533821414341597e-05, "loss": 1.473, "step": 3205 }, { "epoch": 0.553092383334771, "grad_norm": 0.6640625, "learning_rate": 1.6531760633084948e-05, "loss": 1.5411, "step": 3206 }, { "epoch": 0.553264901233503, "grad_norm": 0.59765625, "learning_rate": 1.652969936791466e-05, "loss": 1.4346, "step": 3207 }, { "epoch": 0.553437419132235, "grad_norm": 0.62890625, "learning_rate": 1.6527637618983443e-05, "loss": 1.4768, "step": 3208 }, { "epoch": 0.553609937030967, "grad_norm": 0.77734375, "learning_rate": 1.6525575386444042e-05, "loss": 1.4556, "step": 3209 }, { "epoch": 0.5537824549296989, "grad_norm": 0.62109375, "learning_rate": 1.6523512670449246e-05, "loss": 1.4614, "step": 3210 }, { "epoch": 0.553954972828431, "grad_norm": 0.57421875, "learning_rate": 1.6521449471151867e-05, "loss": 1.3232, "step": 3211 }, { "epoch": 0.5541274907271629, "grad_norm": 0.65234375, "learning_rate": 1.6519385788704766e-05, "loss": 1.2933, "step": 3212 }, { "epoch": 0.5543000086258949, "grad_norm": 0.6328125, "learning_rate": 1.6517321623260828e-05, "loss": 1.4747, "step": 3213 }, { "epoch": 0.554472526524627, "grad_norm": 0.60546875, "learning_rate": 1.6515256974972985e-05, "loss": 1.4267, "step": 3214 }, { "epoch": 0.5546450444233589, "grad_norm": 0.59375, "learning_rate": 1.6513191843994195e-05, "loss": 1.4805, "step": 3215 }, { "epoch": 0.5548175623220909, "grad_norm": 0.65234375, "learning_rate": 1.6511126230477458e-05, "loss": 1.374, "step": 3216 }, { "epoch": 0.5549900802208229, "grad_norm": 0.6640625, "learning_rate": 1.6509060134575807e-05, "loss": 1.4438, "step": 3217 }, { "epoch": 0.5551625981195549, "grad_norm": 0.6875, "learning_rate": 1.6506993556442307e-05, "loss": 1.3694, "step": 3218 }, { "epoch": 0.5553351160182869, "grad_norm": 0.6015625, "learning_rate": 1.650492649623007e-05, "loss": 1.4282, "step": 3219 }, { "epoch": 0.5555076339170189, "grad_norm": 0.7265625, "learning_rate": 1.6502858954092233e-05, "loss": 1.4742, "step": 3220 }, { "epoch": 0.5556801518157509, "grad_norm": 0.6640625, "learning_rate": 1.6500790930181973e-05, "loss": 1.4559, "step": 3221 }, { "epoch": 0.5558526697144829, "grad_norm": 0.6484375, "learning_rate": 1.64987224246525e-05, "loss": 1.474, "step": 3222 }, { "epoch": 0.5560251876132148, "grad_norm": 0.58203125, "learning_rate": 1.6496653437657068e-05, "loss": 1.5381, "step": 3223 }, { "epoch": 0.5561977055119469, "grad_norm": 0.73828125, "learning_rate": 1.6494583969348953e-05, "loss": 1.4106, "step": 3224 }, { "epoch": 0.5563702234106789, "grad_norm": 0.60546875, "learning_rate": 1.6492514019881476e-05, "loss": 1.5662, "step": 3225 }, { "epoch": 0.5565427413094108, "grad_norm": 0.671875, "learning_rate": 1.6490443589407996e-05, "loss": 1.4847, "step": 3226 }, { "epoch": 0.5567152592081428, "grad_norm": 0.625, "learning_rate": 1.6488372678081902e-05, "loss": 1.4211, "step": 3227 }, { "epoch": 0.5568877771068749, "grad_norm": 0.86328125, "learning_rate": 1.6486301286056617e-05, "loss": 1.4936, "step": 3228 }, { "epoch": 0.5570602950056068, "grad_norm": 0.640625, "learning_rate": 1.64842294134856e-05, "loss": 1.3648, "step": 3229 }, { "epoch": 0.5572328129043388, "grad_norm": 0.60546875, "learning_rate": 1.6482157060522363e-05, "loss": 1.5151, "step": 3230 }, { "epoch": 0.5574053308030709, "grad_norm": 0.57421875, "learning_rate": 1.6480084227320422e-05, "loss": 1.5417, "step": 3231 }, { "epoch": 0.5575778487018028, "grad_norm": 0.6640625, "learning_rate": 1.647801091403335e-05, "loss": 1.4683, "step": 3232 }, { "epoch": 0.5577503666005348, "grad_norm": 0.640625, "learning_rate": 1.647593712081476e-05, "loss": 1.5107, "step": 3233 }, { "epoch": 0.5579228844992667, "grad_norm": 0.7109375, "learning_rate": 1.647386284781828e-05, "loss": 1.398, "step": 3234 }, { "epoch": 0.5580954023979988, "grad_norm": 0.59375, "learning_rate": 1.6471788095197587e-05, "loss": 1.4902, "step": 3235 }, { "epoch": 0.5582679202967308, "grad_norm": 0.61328125, "learning_rate": 1.64697128631064e-05, "loss": 1.457, "step": 3236 }, { "epoch": 0.5584404381954627, "grad_norm": 0.65625, "learning_rate": 1.6467637151698457e-05, "loss": 1.6169, "step": 3237 }, { "epoch": 0.5586129560941948, "grad_norm": 0.6171875, "learning_rate": 1.646556096112754e-05, "loss": 1.5738, "step": 3238 }, { "epoch": 0.5587854739929268, "grad_norm": 0.5703125, "learning_rate": 1.6463484291547472e-05, "loss": 1.3692, "step": 3239 }, { "epoch": 0.5589579918916587, "grad_norm": 0.62109375, "learning_rate": 1.64614071431121e-05, "loss": 1.4634, "step": 3240 }, { "epoch": 0.5591305097903908, "grad_norm": 0.59765625, "learning_rate": 1.6459329515975313e-05, "loss": 1.4855, "step": 3241 }, { "epoch": 0.5593030276891228, "grad_norm": 0.58203125, "learning_rate": 1.645725141029104e-05, "loss": 1.4872, "step": 3242 }, { "epoch": 0.5594755455878547, "grad_norm": 0.671875, "learning_rate": 1.6455172826213228e-05, "loss": 1.5038, "step": 3243 }, { "epoch": 0.5596480634865867, "grad_norm": 0.75, "learning_rate": 1.6453093763895885e-05, "loss": 1.4403, "step": 3244 }, { "epoch": 0.5598205813853188, "grad_norm": 0.66796875, "learning_rate": 1.6451014223493035e-05, "loss": 1.3938, "step": 3245 }, { "epoch": 0.5599930992840507, "grad_norm": 1.5078125, "learning_rate": 1.6448934205158743e-05, "loss": 1.4209, "step": 3246 }, { "epoch": 0.5601656171827827, "grad_norm": 0.64453125, "learning_rate": 1.644685370904711e-05, "loss": 1.4142, "step": 3247 }, { "epoch": 0.5603381350815148, "grad_norm": 0.5703125, "learning_rate": 1.6444772735312272e-05, "loss": 1.508, "step": 3248 }, { "epoch": 0.5605106529802467, "grad_norm": 0.66796875, "learning_rate": 1.6442691284108403e-05, "loss": 1.4892, "step": 3249 }, { "epoch": 0.5606831708789787, "grad_norm": 0.6953125, "learning_rate": 1.6440609355589704e-05, "loss": 1.4895, "step": 3250 }, { "epoch": 0.5608556887777106, "grad_norm": 0.80859375, "learning_rate": 1.6438526949910425e-05, "loss": 1.4742, "step": 3251 }, { "epoch": 0.5610282066764427, "grad_norm": 0.62890625, "learning_rate": 1.643644406722484e-05, "loss": 1.5226, "step": 3252 }, { "epoch": 0.5612007245751747, "grad_norm": 0.69921875, "learning_rate": 1.643436070768726e-05, "loss": 1.4427, "step": 3253 }, { "epoch": 0.5613732424739066, "grad_norm": 0.62890625, "learning_rate": 1.6432276871452036e-05, "loss": 1.4517, "step": 3254 }, { "epoch": 0.5615457603726387, "grad_norm": 0.65625, "learning_rate": 1.643019255867355e-05, "loss": 1.4601, "step": 3255 }, { "epoch": 0.5617182782713707, "grad_norm": 0.640625, "learning_rate": 1.642810776950622e-05, "loss": 1.5211, "step": 3256 }, { "epoch": 0.5618907961701026, "grad_norm": 0.6796875, "learning_rate": 1.6426022504104505e-05, "loss": 1.5343, "step": 3257 }, { "epoch": 0.5620633140688347, "grad_norm": 0.6328125, "learning_rate": 1.6423936762622887e-05, "loss": 1.4592, "step": 3258 }, { "epoch": 0.5622358319675667, "grad_norm": 0.73828125, "learning_rate": 1.6421850545215897e-05, "loss": 1.4003, "step": 3259 }, { "epoch": 0.5624083498662986, "grad_norm": 0.86328125, "learning_rate": 1.6419763852038092e-05, "loss": 1.4569, "step": 3260 }, { "epoch": 0.5625808677650306, "grad_norm": 0.6328125, "learning_rate": 1.641767668324407e-05, "loss": 1.4049, "step": 3261 }, { "epoch": 0.5627533856637627, "grad_norm": 0.80859375, "learning_rate": 1.641558903898846e-05, "loss": 1.4446, "step": 3262 }, { "epoch": 0.5629259035624946, "grad_norm": 0.91015625, "learning_rate": 1.6413500919425927e-05, "loss": 1.481, "step": 3263 }, { "epoch": 0.5630984214612266, "grad_norm": 0.66796875, "learning_rate": 1.6411412324711174e-05, "loss": 1.5068, "step": 3264 }, { "epoch": 0.5632709393599586, "grad_norm": 0.79296875, "learning_rate": 1.6409323254998932e-05, "loss": 1.4457, "step": 3265 }, { "epoch": 0.5634434572586906, "grad_norm": 0.7265625, "learning_rate": 1.640723371044398e-05, "loss": 1.4314, "step": 3266 }, { "epoch": 0.5636159751574226, "grad_norm": 0.6484375, "learning_rate": 1.640514369120112e-05, "loss": 1.3798, "step": 3267 }, { "epoch": 0.5637884930561545, "grad_norm": 0.79296875, "learning_rate": 1.640305319742519e-05, "loss": 1.5154, "step": 3268 }, { "epoch": 0.5639610109548866, "grad_norm": 0.69140625, "learning_rate": 1.6400962229271074e-05, "loss": 1.3999, "step": 3269 }, { "epoch": 0.5641335288536186, "grad_norm": 0.63671875, "learning_rate": 1.639887078689368e-05, "loss": 1.473, "step": 3270 }, { "epoch": 0.5643060467523505, "grad_norm": 0.734375, "learning_rate": 1.639677887044796e-05, "loss": 1.3974, "step": 3271 }, { "epoch": 0.5644785646510826, "grad_norm": 0.9453125, "learning_rate": 1.639468648008889e-05, "loss": 1.5242, "step": 3272 }, { "epoch": 0.5646510825498146, "grad_norm": 0.765625, "learning_rate": 1.6392593615971487e-05, "loss": 1.5228, "step": 3273 }, { "epoch": 0.5648236004485465, "grad_norm": 0.6171875, "learning_rate": 1.6390500278250806e-05, "loss": 1.4572, "step": 3274 }, { "epoch": 0.5649961183472785, "grad_norm": 0.62109375, "learning_rate": 1.6388406467081936e-05, "loss": 1.5359, "step": 3275 }, { "epoch": 0.5651686362460105, "grad_norm": 0.6328125, "learning_rate": 1.6386312182619997e-05, "loss": 1.4999, "step": 3276 }, { "epoch": 0.5653411541447425, "grad_norm": 0.640625, "learning_rate": 1.638421742502015e-05, "loss": 1.4103, "step": 3277 }, { "epoch": 0.5655136720434745, "grad_norm": 0.6796875, "learning_rate": 1.638212219443759e-05, "loss": 1.3701, "step": 3278 }, { "epoch": 0.5656861899422065, "grad_norm": 0.67578125, "learning_rate": 1.6380026491027537e-05, "loss": 1.5241, "step": 3279 }, { "epoch": 0.5658587078409385, "grad_norm": 0.6484375, "learning_rate": 1.6377930314945254e-05, "loss": 1.45, "step": 3280 }, { "epoch": 0.5660312257396705, "grad_norm": 0.83984375, "learning_rate": 1.6375833666346048e-05, "loss": 1.4207, "step": 3281 }, { "epoch": 0.5662037436384025, "grad_norm": 0.609375, "learning_rate": 1.637373654538524e-05, "loss": 1.5209, "step": 3282 }, { "epoch": 0.5663762615371345, "grad_norm": 0.71875, "learning_rate": 1.637163895221821e-05, "loss": 1.5028, "step": 3283 }, { "epoch": 0.5665487794358665, "grad_norm": 0.6484375, "learning_rate": 1.636954088700035e-05, "loss": 1.4265, "step": 3284 }, { "epoch": 0.5667212973345984, "grad_norm": 0.578125, "learning_rate": 1.63674423498871e-05, "loss": 1.4944, "step": 3285 }, { "epoch": 0.5668938152333305, "grad_norm": 0.72265625, "learning_rate": 1.636534334103394e-05, "loss": 1.4467, "step": 3286 }, { "epoch": 0.5670663331320625, "grad_norm": 0.66015625, "learning_rate": 1.636324386059637e-05, "loss": 1.4922, "step": 3287 }, { "epoch": 0.5672388510307944, "grad_norm": 0.578125, "learning_rate": 1.636114390872994e-05, "loss": 1.3776, "step": 3288 }, { "epoch": 0.5674113689295265, "grad_norm": 0.82421875, "learning_rate": 1.6359043485590217e-05, "loss": 1.5595, "step": 3289 }, { "epoch": 0.5675838868282584, "grad_norm": 0.8125, "learning_rate": 1.635694259133282e-05, "loss": 1.552, "step": 3290 }, { "epoch": 0.5677564047269904, "grad_norm": 0.59765625, "learning_rate": 1.63548412261134e-05, "loss": 1.4487, "step": 3291 }, { "epoch": 0.5679289226257224, "grad_norm": 0.6328125, "learning_rate": 1.635273939008763e-05, "loss": 1.4105, "step": 3292 }, { "epoch": 0.5681014405244544, "grad_norm": 0.6875, "learning_rate": 1.6350637083411238e-05, "loss": 1.4208, "step": 3293 }, { "epoch": 0.5682739584231864, "grad_norm": 0.58203125, "learning_rate": 1.634853430623997e-05, "loss": 1.4819, "step": 3294 }, { "epoch": 0.5684464763219184, "grad_norm": 0.609375, "learning_rate": 1.6346431058729607e-05, "loss": 1.4907, "step": 3295 }, { "epoch": 0.5686189942206504, "grad_norm": 0.88671875, "learning_rate": 1.6344327341035982e-05, "loss": 1.4593, "step": 3296 }, { "epoch": 0.5687915121193824, "grad_norm": 0.59765625, "learning_rate": 1.6342223153314946e-05, "loss": 1.4974, "step": 3297 }, { "epoch": 0.5689640300181144, "grad_norm": 0.62890625, "learning_rate": 1.634011849572239e-05, "loss": 1.52, "step": 3298 }, { "epoch": 0.5691365479168464, "grad_norm": 0.73828125, "learning_rate": 1.6338013368414237e-05, "loss": 1.4899, "step": 3299 }, { "epoch": 0.5693090658155784, "grad_norm": 0.6171875, "learning_rate": 1.6335907771546458e-05, "loss": 1.4335, "step": 3300 }, { "epoch": 0.5693090658155784, "eval_loss": 1.4357802867889404, "eval_runtime": 10.7789, "eval_samples_per_second": 95.0, "eval_steps_per_second": 23.75, "step": 3300 }, { "epoch": 0.5694815837143103, "grad_norm": 0.6015625, "learning_rate": 1.633380170527504e-05, "loss": 1.4017, "step": 3301 }, { "epoch": 0.5696541016130423, "grad_norm": 0.59375, "learning_rate": 1.6331695169756015e-05, "loss": 1.4695, "step": 3302 }, { "epoch": 0.5698266195117744, "grad_norm": 0.6953125, "learning_rate": 1.6329588165145452e-05, "loss": 1.4122, "step": 3303 }, { "epoch": 0.5699991374105063, "grad_norm": 0.58984375, "learning_rate": 1.6327480691599448e-05, "loss": 1.4928, "step": 3304 }, { "epoch": 0.5701716553092383, "grad_norm": 0.60546875, "learning_rate": 1.632537274927414e-05, "loss": 1.3496, "step": 3305 }, { "epoch": 0.5703441732079704, "grad_norm": 0.62890625, "learning_rate": 1.632326433832569e-05, "loss": 1.4946, "step": 3306 }, { "epoch": 0.5705166911067023, "grad_norm": 0.6328125, "learning_rate": 1.6321155458910314e-05, "loss": 1.4751, "step": 3307 }, { "epoch": 0.5706892090054343, "grad_norm": 0.73046875, "learning_rate": 1.631904611118424e-05, "loss": 1.5496, "step": 3308 }, { "epoch": 0.5708617269041663, "grad_norm": 0.81640625, "learning_rate": 1.6316936295303754e-05, "loss": 1.4039, "step": 3309 }, { "epoch": 0.5710342448028983, "grad_norm": 0.7578125, "learning_rate": 1.6314826011425153e-05, "loss": 1.5043, "step": 3310 }, { "epoch": 0.5712067627016303, "grad_norm": 0.7265625, "learning_rate": 1.6312715259704786e-05, "loss": 1.5031, "step": 3311 }, { "epoch": 0.5713792806003622, "grad_norm": 0.6171875, "learning_rate": 1.6310604040299026e-05, "loss": 1.4799, "step": 3312 }, { "epoch": 0.5715517984990943, "grad_norm": 0.734375, "learning_rate": 1.630849235336429e-05, "loss": 1.4857, "step": 3313 }, { "epoch": 0.5717243163978263, "grad_norm": 0.6640625, "learning_rate": 1.6306380199057024e-05, "loss": 1.4491, "step": 3314 }, { "epoch": 0.5718968342965582, "grad_norm": 0.6015625, "learning_rate": 1.6304267577533706e-05, "loss": 1.4579, "step": 3315 }, { "epoch": 0.5720693521952903, "grad_norm": 0.6015625, "learning_rate": 1.630215448895086e-05, "loss": 1.4994, "step": 3316 }, { "epoch": 0.5722418700940223, "grad_norm": 0.72265625, "learning_rate": 1.6300040933465025e-05, "loss": 1.4655, "step": 3317 }, { "epoch": 0.5724143879927542, "grad_norm": 0.8203125, "learning_rate": 1.6297926911232796e-05, "loss": 1.4915, "step": 3318 }, { "epoch": 0.5725869058914862, "grad_norm": 0.58984375, "learning_rate": 1.6295812422410794e-05, "loss": 1.5393, "step": 3319 }, { "epoch": 0.5727594237902183, "grad_norm": 0.69140625, "learning_rate": 1.6293697467155667e-05, "loss": 1.5414, "step": 3320 }, { "epoch": 0.5729319416889502, "grad_norm": 0.6484375, "learning_rate": 1.6291582045624107e-05, "loss": 1.4912, "step": 3321 }, { "epoch": 0.5731044595876822, "grad_norm": 0.703125, "learning_rate": 1.6289466157972835e-05, "loss": 1.3418, "step": 3322 }, { "epoch": 0.5732769774864143, "grad_norm": 1.7109375, "learning_rate": 1.628734980435861e-05, "loss": 1.5682, "step": 3323 }, { "epoch": 0.5734494953851462, "grad_norm": 0.671875, "learning_rate": 1.6285232984938234e-05, "loss": 1.4293, "step": 3324 }, { "epoch": 0.5736220132838782, "grad_norm": 0.66015625, "learning_rate": 1.6283115699868522e-05, "loss": 1.4405, "step": 3325 }, { "epoch": 0.5737945311826101, "grad_norm": 0.57421875, "learning_rate": 1.6280997949306342e-05, "loss": 1.3624, "step": 3326 }, { "epoch": 0.5739670490813422, "grad_norm": 0.6640625, "learning_rate": 1.6278879733408587e-05, "loss": 1.4126, "step": 3327 }, { "epoch": 0.5741395669800742, "grad_norm": 0.73046875, "learning_rate": 1.627676105233219e-05, "loss": 1.429, "step": 3328 }, { "epoch": 0.5743120848788061, "grad_norm": 0.6015625, "learning_rate": 1.6274641906234113e-05, "loss": 1.4055, "step": 3329 }, { "epoch": 0.5744846027775382, "grad_norm": 0.59375, "learning_rate": 1.627252229527136e-05, "loss": 1.4173, "step": 3330 }, { "epoch": 0.5746571206762702, "grad_norm": 2.0, "learning_rate": 1.627040221960096e-05, "loss": 1.5033, "step": 3331 }, { "epoch": 0.5748296385750021, "grad_norm": 0.6015625, "learning_rate": 1.6268281679379983e-05, "loss": 1.4267, "step": 3332 }, { "epoch": 0.5750021564737342, "grad_norm": 0.65625, "learning_rate": 1.6266160674765542e-05, "loss": 1.4609, "step": 3333 }, { "epoch": 0.5751746743724662, "grad_norm": 0.7734375, "learning_rate": 1.626403920591476e-05, "loss": 1.4585, "step": 3334 }, { "epoch": 0.5753471922711981, "grad_norm": 0.734375, "learning_rate": 1.626191727298481e-05, "loss": 1.4806, "step": 3335 }, { "epoch": 0.5755197101699301, "grad_norm": 0.69921875, "learning_rate": 1.625979487613291e-05, "loss": 1.4952, "step": 3336 }, { "epoch": 0.5756922280686622, "grad_norm": 0.6875, "learning_rate": 1.6257672015516287e-05, "loss": 1.463, "step": 3337 }, { "epoch": 0.5758647459673941, "grad_norm": 0.6640625, "learning_rate": 1.6255548691292223e-05, "loss": 1.4367, "step": 3338 }, { "epoch": 0.5760372638661261, "grad_norm": 0.58984375, "learning_rate": 1.6253424903618023e-05, "loss": 1.4492, "step": 3339 }, { "epoch": 0.5762097817648582, "grad_norm": 0.58984375, "learning_rate": 1.6251300652651037e-05, "loss": 1.4922, "step": 3340 }, { "epoch": 0.5763822996635901, "grad_norm": 0.67578125, "learning_rate": 1.6249175938548635e-05, "loss": 1.4813, "step": 3341 }, { "epoch": 0.5765548175623221, "grad_norm": 0.7890625, "learning_rate": 1.6247050761468236e-05, "loss": 1.4617, "step": 3342 }, { "epoch": 0.576727335461054, "grad_norm": 0.609375, "learning_rate": 1.6244925121567283e-05, "loss": 1.4464, "step": 3343 }, { "epoch": 0.5768998533597861, "grad_norm": 0.61328125, "learning_rate": 1.6242799019003256e-05, "loss": 1.4271, "step": 3344 }, { "epoch": 0.5770723712585181, "grad_norm": 0.6484375, "learning_rate": 1.6240672453933673e-05, "loss": 1.4269, "step": 3345 }, { "epoch": 0.57724488915725, "grad_norm": 0.6640625, "learning_rate": 1.623854542651608e-05, "loss": 1.326, "step": 3346 }, { "epoch": 0.5774174070559821, "grad_norm": 0.65625, "learning_rate": 1.6236417936908058e-05, "loss": 1.4414, "step": 3347 }, { "epoch": 0.5775899249547141, "grad_norm": 0.77734375, "learning_rate": 1.623428998526723e-05, "loss": 1.5371, "step": 3348 }, { "epoch": 0.577762442853446, "grad_norm": 0.87890625, "learning_rate": 1.6232161571751248e-05, "loss": 1.5019, "step": 3349 }, { "epoch": 0.577934960752178, "grad_norm": 0.83984375, "learning_rate": 1.6230032696517794e-05, "loss": 1.475, "step": 3350 }, { "epoch": 0.5781074786509101, "grad_norm": 0.88671875, "learning_rate": 1.6227903359724595e-05, "loss": 1.4802, "step": 3351 }, { "epoch": 0.578279996549642, "grad_norm": 0.59375, "learning_rate": 1.62257735615294e-05, "loss": 1.4384, "step": 3352 }, { "epoch": 0.578452514448374, "grad_norm": 0.78515625, "learning_rate": 1.6223643302089994e-05, "loss": 1.4694, "step": 3353 }, { "epoch": 0.578625032347106, "grad_norm": 0.71484375, "learning_rate": 1.622151258156421e-05, "loss": 1.4337, "step": 3354 }, { "epoch": 0.578797550245838, "grad_norm": 0.58203125, "learning_rate": 1.6219381400109898e-05, "loss": 1.6329, "step": 3355 }, { "epoch": 0.57897006814457, "grad_norm": 0.83203125, "learning_rate": 1.6217249757884954e-05, "loss": 1.5241, "step": 3356 }, { "epoch": 0.579142586043302, "grad_norm": 0.6875, "learning_rate": 1.62151176550473e-05, "loss": 1.4375, "step": 3357 }, { "epoch": 0.579315103942034, "grad_norm": 0.703125, "learning_rate": 1.6212985091754893e-05, "loss": 1.4399, "step": 3358 }, { "epoch": 0.579487621840766, "grad_norm": 0.71484375, "learning_rate": 1.621085206816573e-05, "loss": 1.5598, "step": 3359 }, { "epoch": 0.5796601397394979, "grad_norm": 2.375, "learning_rate": 1.6208718584437845e-05, "loss": 1.465, "step": 3360 }, { "epoch": 0.57983265763823, "grad_norm": 0.5859375, "learning_rate": 1.620658464072929e-05, "loss": 1.4114, "step": 3361 }, { "epoch": 0.580005175536962, "grad_norm": 0.62109375, "learning_rate": 1.620445023719816e-05, "loss": 1.4664, "step": 3362 }, { "epoch": 0.5801776934356939, "grad_norm": 0.84375, "learning_rate": 1.6202315374002594e-05, "loss": 1.2958, "step": 3363 }, { "epoch": 0.580350211334426, "grad_norm": 0.65234375, "learning_rate": 1.620018005130075e-05, "loss": 1.4864, "step": 3364 }, { "epoch": 0.580522729233158, "grad_norm": 0.60546875, "learning_rate": 1.619804426925083e-05, "loss": 1.5509, "step": 3365 }, { "epoch": 0.5806952471318899, "grad_norm": 0.609375, "learning_rate": 1.619590802801106e-05, "loss": 1.3632, "step": 3366 }, { "epoch": 0.5808677650306219, "grad_norm": 0.671875, "learning_rate": 1.6193771327739712e-05, "loss": 1.4981, "step": 3367 }, { "epoch": 0.581040282929354, "grad_norm": 0.640625, "learning_rate": 1.619163416859508e-05, "loss": 1.3906, "step": 3368 }, { "epoch": 0.5812128008280859, "grad_norm": 0.63671875, "learning_rate": 1.6189496550735503e-05, "loss": 1.4839, "step": 3369 }, { "epoch": 0.5813853187268179, "grad_norm": 0.9296875, "learning_rate": 1.6187358474319347e-05, "loss": 1.4431, "step": 3370 }, { "epoch": 0.5815578366255499, "grad_norm": 0.6171875, "learning_rate": 1.6185219939505016e-05, "loss": 1.4361, "step": 3371 }, { "epoch": 0.5817303545242819, "grad_norm": 0.66015625, "learning_rate": 1.6183080946450945e-05, "loss": 1.3862, "step": 3372 }, { "epoch": 0.5819028724230139, "grad_norm": 0.56640625, "learning_rate": 1.6180941495315606e-05, "loss": 1.3674, "step": 3373 }, { "epoch": 0.5820753903217459, "grad_norm": 0.66796875, "learning_rate": 1.6178801586257494e-05, "loss": 1.4896, "step": 3374 }, { "epoch": 0.5822479082204779, "grad_norm": 0.68359375, "learning_rate": 1.617666121943516e-05, "loss": 1.3988, "step": 3375 }, { "epoch": 0.5824204261192099, "grad_norm": 0.64453125, "learning_rate": 1.6174520395007168e-05, "loss": 1.3873, "step": 3376 }, { "epoch": 0.5825929440179418, "grad_norm": 0.78515625, "learning_rate": 1.6172379113132123e-05, "loss": 1.52, "step": 3377 }, { "epoch": 0.5827654619166739, "grad_norm": 0.6171875, "learning_rate": 1.617023737396867e-05, "loss": 1.46, "step": 3378 }, { "epoch": 0.5829379798154058, "grad_norm": 0.6953125, "learning_rate": 1.6168095177675476e-05, "loss": 1.5046, "step": 3379 }, { "epoch": 0.5831104977141378, "grad_norm": 0.625, "learning_rate": 1.6165952524411253e-05, "loss": 1.4513, "step": 3380 }, { "epoch": 0.5832830156128699, "grad_norm": 0.65625, "learning_rate": 1.616380941433474e-05, "loss": 1.547, "step": 3381 }, { "epoch": 0.5834555335116018, "grad_norm": 0.66796875, "learning_rate": 1.616166584760471e-05, "loss": 1.5419, "step": 3382 }, { "epoch": 0.5836280514103338, "grad_norm": 0.77734375, "learning_rate": 1.6159521824379977e-05, "loss": 1.4538, "step": 3383 }, { "epoch": 0.5838005693090658, "grad_norm": 0.6640625, "learning_rate": 1.6157377344819377e-05, "loss": 1.5394, "step": 3384 }, { "epoch": 0.5839730872077978, "grad_norm": 0.65625, "learning_rate": 1.6155232409081794e-05, "loss": 1.4276, "step": 3385 }, { "epoch": 0.5841456051065298, "grad_norm": 0.58203125, "learning_rate": 1.615308701732613e-05, "loss": 1.5032, "step": 3386 }, { "epoch": 0.5843181230052618, "grad_norm": 0.6875, "learning_rate": 1.615094116971134e-05, "loss": 1.4876, "step": 3387 }, { "epoch": 0.5844906409039938, "grad_norm": 0.7421875, "learning_rate": 1.6148794866396392e-05, "loss": 1.5329, "step": 3388 }, { "epoch": 0.5846631588027258, "grad_norm": 0.74609375, "learning_rate": 1.6146648107540303e-05, "loss": 1.4204, "step": 3389 }, { "epoch": 0.5848356767014578, "grad_norm": 0.578125, "learning_rate": 1.614450089330211e-05, "loss": 1.4151, "step": 3390 }, { "epoch": 0.5850081946001898, "grad_norm": 0.69140625, "learning_rate": 1.6142353223840902e-05, "loss": 1.467, "step": 3391 }, { "epoch": 0.5851807124989218, "grad_norm": 0.73828125, "learning_rate": 1.6140205099315787e-05, "loss": 1.4764, "step": 3392 }, { "epoch": 0.5853532303976537, "grad_norm": 0.7421875, "learning_rate": 1.6138056519885916e-05, "loss": 1.4966, "step": 3393 }, { "epoch": 0.5855257482963857, "grad_norm": 0.59765625, "learning_rate": 1.6135907485710462e-05, "loss": 1.4324, "step": 3394 }, { "epoch": 0.5856982661951178, "grad_norm": 0.84765625, "learning_rate": 1.6133757996948645e-05, "loss": 1.4643, "step": 3395 }, { "epoch": 0.5858707840938497, "grad_norm": 0.59765625, "learning_rate": 1.613160805375971e-05, "loss": 1.3835, "step": 3396 }, { "epoch": 0.5860433019925817, "grad_norm": 0.6484375, "learning_rate": 1.6129457656302935e-05, "loss": 1.4762, "step": 3397 }, { "epoch": 0.5862158198913138, "grad_norm": 0.6015625, "learning_rate": 1.612730680473764e-05, "loss": 1.4536, "step": 3398 }, { "epoch": 0.5863883377900457, "grad_norm": 0.60546875, "learning_rate": 1.6125155499223174e-05, "loss": 1.5047, "step": 3399 }, { "epoch": 0.5865608556887777, "grad_norm": 0.703125, "learning_rate": 1.6123003739918915e-05, "loss": 1.5773, "step": 3400 }, { "epoch": 0.5865608556887777, "eval_loss": 1.433887004852295, "eval_runtime": 10.8057, "eval_samples_per_second": 94.765, "eval_steps_per_second": 23.691, "step": 3400 }, { "epoch": 0.5867333735875097, "grad_norm": 0.6015625, "learning_rate": 1.6120851526984282e-05, "loss": 1.5089, "step": 3401 }, { "epoch": 0.5869058914862417, "grad_norm": 0.55859375, "learning_rate": 1.611869886057872e-05, "loss": 1.3859, "step": 3402 }, { "epoch": 0.5870784093849737, "grad_norm": 0.91015625, "learning_rate": 1.6116545740861718e-05, "loss": 1.4056, "step": 3403 }, { "epoch": 0.5872509272837056, "grad_norm": 0.625, "learning_rate": 1.6114392167992784e-05, "loss": 1.5003, "step": 3404 }, { "epoch": 0.5874234451824377, "grad_norm": 0.6640625, "learning_rate": 1.611223814213148e-05, "loss": 1.5606, "step": 3405 }, { "epoch": 0.5875959630811697, "grad_norm": 0.95703125, "learning_rate": 1.611008366343738e-05, "loss": 1.4371, "step": 3406 }, { "epoch": 0.5877684809799016, "grad_norm": 0.59765625, "learning_rate": 1.6107928732070107e-05, "loss": 1.4536, "step": 3407 }, { "epoch": 0.5879409988786336, "grad_norm": 0.7421875, "learning_rate": 1.6105773348189312e-05, "loss": 1.3856, "step": 3408 }, { "epoch": 0.5881135167773657, "grad_norm": 0.83984375, "learning_rate": 1.610361751195467e-05, "loss": 1.4591, "step": 3409 }, { "epoch": 0.5882860346760976, "grad_norm": 0.6484375, "learning_rate": 1.6101461223525908e-05, "loss": 1.4714, "step": 3410 }, { "epoch": 0.5884585525748296, "grad_norm": 0.64453125, "learning_rate": 1.6099304483062776e-05, "loss": 1.5012, "step": 3411 }, { "epoch": 0.5886310704735617, "grad_norm": 0.703125, "learning_rate": 1.609714729072506e-05, "loss": 1.4031, "step": 3412 }, { "epoch": 0.5888035883722936, "grad_norm": 0.73828125, "learning_rate": 1.6094989646672573e-05, "loss": 1.3807, "step": 3413 }, { "epoch": 0.5889761062710256, "grad_norm": 0.75390625, "learning_rate": 1.609283155106517e-05, "loss": 1.476, "step": 3414 }, { "epoch": 0.5891486241697577, "grad_norm": 0.62109375, "learning_rate": 1.6090673004062734e-05, "loss": 1.4646, "step": 3415 }, { "epoch": 0.5893211420684896, "grad_norm": 0.60546875, "learning_rate": 1.608851400582519e-05, "loss": 1.4054, "step": 3416 }, { "epoch": 0.5894936599672216, "grad_norm": 0.6328125, "learning_rate": 1.608635455651248e-05, "loss": 1.4973, "step": 3417 }, { "epoch": 0.5896661778659535, "grad_norm": 0.60546875, "learning_rate": 1.6084194656284598e-05, "loss": 1.3328, "step": 3418 }, { "epoch": 0.5898386957646856, "grad_norm": 0.734375, "learning_rate": 1.6082034305301563e-05, "loss": 1.5572, "step": 3419 }, { "epoch": 0.5900112136634176, "grad_norm": 0.63671875, "learning_rate": 1.607987350372342e-05, "loss": 1.4634, "step": 3420 }, { "epoch": 0.5901837315621495, "grad_norm": 0.7734375, "learning_rate": 1.607771225171026e-05, "loss": 1.4979, "step": 3421 }, { "epoch": 0.5903562494608816, "grad_norm": 0.70703125, "learning_rate": 1.6075550549422204e-05, "loss": 1.3774, "step": 3422 }, { "epoch": 0.5905287673596136, "grad_norm": 0.6875, "learning_rate": 1.6073388397019397e-05, "loss": 1.4982, "step": 3423 }, { "epoch": 0.5907012852583455, "grad_norm": 0.7890625, "learning_rate": 1.6071225794662033e-05, "loss": 1.6353, "step": 3424 }, { "epoch": 0.5908738031570775, "grad_norm": 0.79296875, "learning_rate": 1.6069062742510326e-05, "loss": 1.4391, "step": 3425 }, { "epoch": 0.5910463210558096, "grad_norm": 0.6875, "learning_rate": 1.606689924072453e-05, "loss": 1.3626, "step": 3426 }, { "epoch": 0.5912188389545415, "grad_norm": 0.84765625, "learning_rate": 1.606473528946493e-05, "loss": 1.4261, "step": 3427 }, { "epoch": 0.5913913568532735, "grad_norm": 0.78125, "learning_rate": 1.6062570888891847e-05, "loss": 1.4534, "step": 3428 }, { "epoch": 0.5915638747520056, "grad_norm": 0.65625, "learning_rate": 1.6060406039165627e-05, "loss": 1.5243, "step": 3429 }, { "epoch": 0.5917363926507375, "grad_norm": 0.69921875, "learning_rate": 1.6058240740446666e-05, "loss": 1.458, "step": 3430 }, { "epoch": 0.5919089105494695, "grad_norm": 0.72265625, "learning_rate": 1.6056074992895378e-05, "loss": 1.4456, "step": 3431 }, { "epoch": 0.5920814284482016, "grad_norm": 0.671875, "learning_rate": 1.605390879667221e-05, "loss": 1.4622, "step": 3432 }, { "epoch": 0.5922539463469335, "grad_norm": 0.75390625, "learning_rate": 1.6051742151937655e-05, "loss": 1.4636, "step": 3433 }, { "epoch": 0.5924264642456655, "grad_norm": 0.6328125, "learning_rate": 1.6049575058852223e-05, "loss": 1.3617, "step": 3434 }, { "epoch": 0.5925989821443974, "grad_norm": 0.62890625, "learning_rate": 1.6047407517576478e-05, "loss": 1.5374, "step": 3435 }, { "epoch": 0.5927715000431295, "grad_norm": 0.56640625, "learning_rate": 1.6045239528270993e-05, "loss": 1.3766, "step": 3436 }, { "epoch": 0.5929440179418615, "grad_norm": 0.703125, "learning_rate": 1.6043071091096397e-05, "loss": 1.3774, "step": 3437 }, { "epoch": 0.5931165358405934, "grad_norm": 0.7421875, "learning_rate": 1.6040902206213332e-05, "loss": 1.5118, "step": 3438 }, { "epoch": 0.5932890537393255, "grad_norm": 0.73046875, "learning_rate": 1.6038732873782485e-05, "loss": 1.4876, "step": 3439 }, { "epoch": 0.5934615716380575, "grad_norm": 0.58984375, "learning_rate": 1.6036563093964577e-05, "loss": 1.4963, "step": 3440 }, { "epoch": 0.5936340895367894, "grad_norm": 0.68359375, "learning_rate": 1.6034392866920354e-05, "loss": 1.5198, "step": 3441 }, { "epoch": 0.5938066074355214, "grad_norm": 0.6484375, "learning_rate": 1.6032222192810607e-05, "loss": 1.3917, "step": 3442 }, { "epoch": 0.5939791253342535, "grad_norm": 0.6015625, "learning_rate": 1.6030051071796146e-05, "loss": 1.4103, "step": 3443 }, { "epoch": 0.5941516432329854, "grad_norm": 0.69140625, "learning_rate": 1.6027879504037826e-05, "loss": 1.4817, "step": 3444 }, { "epoch": 0.5943241611317174, "grad_norm": 0.59765625, "learning_rate": 1.6025707489696527e-05, "loss": 1.5543, "step": 3445 }, { "epoch": 0.5944966790304494, "grad_norm": 0.72265625, "learning_rate": 1.6023535028933167e-05, "loss": 1.5289, "step": 3446 }, { "epoch": 0.5946691969291814, "grad_norm": 0.73046875, "learning_rate": 1.6021362121908697e-05, "loss": 1.5147, "step": 3447 }, { "epoch": 0.5948417148279134, "grad_norm": 0.6796875, "learning_rate": 1.60191887687841e-05, "loss": 1.5132, "step": 3448 }, { "epoch": 0.5950142327266454, "grad_norm": 0.60546875, "learning_rate": 1.6017014969720382e-05, "loss": 1.5291, "step": 3449 }, { "epoch": 0.5951867506253774, "grad_norm": 0.73046875, "learning_rate": 1.6014840724878603e-05, "loss": 1.3743, "step": 3450 }, { "epoch": 0.5953592685241094, "grad_norm": 0.9765625, "learning_rate": 1.6012666034419838e-05, "loss": 1.3932, "step": 3451 }, { "epoch": 0.5955317864228413, "grad_norm": 0.578125, "learning_rate": 1.601049089850521e-05, "loss": 1.4492, "step": 3452 }, { "epoch": 0.5957043043215734, "grad_norm": 0.78515625, "learning_rate": 1.6008315317295857e-05, "loss": 1.4471, "step": 3453 }, { "epoch": 0.5958768222203054, "grad_norm": 0.8359375, "learning_rate": 1.6006139290952964e-05, "loss": 1.453, "step": 3454 }, { "epoch": 0.5960493401190373, "grad_norm": 0.6796875, "learning_rate": 1.6003962819637744e-05, "loss": 1.5166, "step": 3455 }, { "epoch": 0.5962218580177694, "grad_norm": 0.65234375, "learning_rate": 1.6001785903511446e-05, "loss": 1.5024, "step": 3456 }, { "epoch": 0.5963943759165014, "grad_norm": 0.6953125, "learning_rate": 1.5999608542735345e-05, "loss": 1.4908, "step": 3457 }, { "epoch": 0.5965668938152333, "grad_norm": 0.65625, "learning_rate": 1.599743073747076e-05, "loss": 1.3899, "step": 3458 }, { "epoch": 0.5967394117139653, "grad_norm": 0.59765625, "learning_rate": 1.5995252487879025e-05, "loss": 1.4486, "step": 3459 }, { "epoch": 0.5969119296126973, "grad_norm": 0.7734375, "learning_rate": 1.599307379412153e-05, "loss": 1.5181, "step": 3460 }, { "epoch": 0.5970844475114293, "grad_norm": 0.62890625, "learning_rate": 1.599089465635968e-05, "loss": 1.4457, "step": 3461 }, { "epoch": 0.5972569654101613, "grad_norm": 0.59375, "learning_rate": 1.5988715074754923e-05, "loss": 1.3981, "step": 3462 }, { "epoch": 0.5974294833088933, "grad_norm": 0.6015625, "learning_rate": 1.5986535049468736e-05, "loss": 1.4419, "step": 3463 }, { "epoch": 0.5976020012076253, "grad_norm": 0.84375, "learning_rate": 1.5984354580662623e-05, "loss": 1.389, "step": 3464 }, { "epoch": 0.5977745191063573, "grad_norm": 0.5859375, "learning_rate": 1.5982173668498135e-05, "loss": 1.4545, "step": 3465 }, { "epoch": 0.5979470370050893, "grad_norm": 0.66796875, "learning_rate": 1.5979992313136836e-05, "loss": 1.5201, "step": 3466 }, { "epoch": 0.5981195549038213, "grad_norm": 0.609375, "learning_rate": 1.597781051474035e-05, "loss": 1.4027, "step": 3467 }, { "epoch": 0.5982920728025533, "grad_norm": 0.6640625, "learning_rate": 1.5975628273470303e-05, "loss": 1.439, "step": 3468 }, { "epoch": 0.5984645907012852, "grad_norm": 0.71484375, "learning_rate": 1.597344558948838e-05, "loss": 1.5406, "step": 3469 }, { "epoch": 0.5986371086000173, "grad_norm": 0.7265625, "learning_rate": 1.5971262462956283e-05, "loss": 1.425, "step": 3470 }, { "epoch": 0.5988096264987492, "grad_norm": 0.62890625, "learning_rate": 1.5969078894035753e-05, "loss": 1.5376, "step": 3471 }, { "epoch": 0.5989821443974812, "grad_norm": 0.71875, "learning_rate": 1.596689488288856e-05, "loss": 1.3337, "step": 3472 }, { "epoch": 0.5991546622962133, "grad_norm": 0.76953125, "learning_rate": 1.5964710429676514e-05, "loss": 1.4247, "step": 3473 }, { "epoch": 0.5993271801949452, "grad_norm": 0.77734375, "learning_rate": 1.596252553456145e-05, "loss": 1.4202, "step": 3474 }, { "epoch": 0.5994996980936772, "grad_norm": 0.6640625, "learning_rate": 1.5960340197705235e-05, "loss": 1.4148, "step": 3475 }, { "epoch": 0.5996722159924092, "grad_norm": 0.6484375, "learning_rate": 1.5958154419269782e-05, "loss": 1.4566, "step": 3476 }, { "epoch": 0.5998447338911412, "grad_norm": 0.59375, "learning_rate": 1.5955968199417018e-05, "loss": 1.4067, "step": 3477 }, { "epoch": 0.6000172517898732, "grad_norm": 0.62109375, "learning_rate": 1.5953781538308914e-05, "loss": 1.4475, "step": 3478 }, { "epoch": 0.6001897696886052, "grad_norm": 0.57421875, "learning_rate": 1.5951594436107475e-05, "loss": 1.4135, "step": 3479 }, { "epoch": 0.6003622875873372, "grad_norm": 0.640625, "learning_rate": 1.5949406892974733e-05, "loss": 1.459, "step": 3480 }, { "epoch": 0.6005348054860692, "grad_norm": 0.81640625, "learning_rate": 1.5947218909072755e-05, "loss": 1.4852, "step": 3481 }, { "epoch": 0.6007073233848012, "grad_norm": 0.63671875, "learning_rate": 1.594503048456364e-05, "loss": 1.3822, "step": 3482 }, { "epoch": 0.6008798412835331, "grad_norm": 0.625, "learning_rate": 1.5942841619609515e-05, "loss": 1.4175, "step": 3483 }, { "epoch": 0.6010523591822652, "grad_norm": 0.6171875, "learning_rate": 1.5940652314372558e-05, "loss": 1.4183, "step": 3484 }, { "epoch": 0.6012248770809971, "grad_norm": 0.6640625, "learning_rate": 1.5938462569014954e-05, "loss": 1.4889, "step": 3485 }, { "epoch": 0.6013973949797291, "grad_norm": 0.5703125, "learning_rate": 1.5936272383698937e-05, "loss": 1.473, "step": 3486 }, { "epoch": 0.6015699128784612, "grad_norm": 0.67578125, "learning_rate": 1.5934081758586775e-05, "loss": 1.45, "step": 3487 }, { "epoch": 0.6017424307771931, "grad_norm": 0.6875, "learning_rate": 1.5931890693840755e-05, "loss": 1.3931, "step": 3488 }, { "epoch": 0.6019149486759251, "grad_norm": 0.6171875, "learning_rate": 1.592969918962321e-05, "loss": 1.5415, "step": 3489 }, { "epoch": 0.6020874665746572, "grad_norm": 0.64453125, "learning_rate": 1.5927507246096497e-05, "loss": 1.4022, "step": 3490 }, { "epoch": 0.6022599844733891, "grad_norm": 0.55859375, "learning_rate": 1.592531486342301e-05, "loss": 1.4198, "step": 3491 }, { "epoch": 0.6024325023721211, "grad_norm": 0.625, "learning_rate": 1.592312204176518e-05, "loss": 1.3621, "step": 3492 }, { "epoch": 0.602605020270853, "grad_norm": 0.58203125, "learning_rate": 1.5920928781285456e-05, "loss": 1.414, "step": 3493 }, { "epoch": 0.6027775381695851, "grad_norm": 0.61328125, "learning_rate": 1.591873508214633e-05, "loss": 1.4441, "step": 3494 }, { "epoch": 0.6029500560683171, "grad_norm": 0.6640625, "learning_rate": 1.591654094451033e-05, "loss": 1.4143, "step": 3495 }, { "epoch": 0.603122573967049, "grad_norm": 0.63671875, "learning_rate": 1.5914346368540007e-05, "loss": 1.5217, "step": 3496 }, { "epoch": 0.6032950918657811, "grad_norm": 0.66796875, "learning_rate": 1.591215135439795e-05, "loss": 1.4534, "step": 3497 }, { "epoch": 0.6034676097645131, "grad_norm": 0.69140625, "learning_rate": 1.5909955902246782e-05, "loss": 1.4631, "step": 3498 }, { "epoch": 0.603640127663245, "grad_norm": 0.73828125, "learning_rate": 1.5907760012249148e-05, "loss": 1.4084, "step": 3499 }, { "epoch": 0.603812645561977, "grad_norm": 0.67578125, "learning_rate": 1.590556368456775e-05, "loss": 1.4021, "step": 3500 }, { "epoch": 0.603812645561977, "eval_loss": 1.4319883584976196, "eval_runtime": 10.882, "eval_samples_per_second": 94.101, "eval_steps_per_second": 23.525, "step": 3500 }, { "epoch": 0.6039851634607091, "grad_norm": 0.64453125, "learning_rate": 1.5903366919365283e-05, "loss": 1.4274, "step": 3501 }, { "epoch": 0.604157681359441, "grad_norm": 0.65625, "learning_rate": 1.5901169716804516e-05, "loss": 1.6006, "step": 3502 }, { "epoch": 0.604330199258173, "grad_norm": 0.69140625, "learning_rate": 1.589897207704822e-05, "loss": 1.4748, "step": 3503 }, { "epoch": 0.6045027171569051, "grad_norm": 0.64453125, "learning_rate": 1.5896774000259218e-05, "loss": 1.4845, "step": 3504 }, { "epoch": 0.604675235055637, "grad_norm": 0.73828125, "learning_rate": 1.5894575486600354e-05, "loss": 1.5827, "step": 3505 }, { "epoch": 0.604847752954369, "grad_norm": 0.7265625, "learning_rate": 1.58923765362345e-05, "loss": 1.4438, "step": 3506 }, { "epoch": 0.6050202708531011, "grad_norm": 0.56640625, "learning_rate": 1.5890177149324583e-05, "loss": 1.363, "step": 3507 }, { "epoch": 0.605192788751833, "grad_norm": 0.6171875, "learning_rate": 1.5887977326033533e-05, "loss": 1.4079, "step": 3508 }, { "epoch": 0.605365306650565, "grad_norm": 0.66015625, "learning_rate": 1.5885777066524335e-05, "loss": 1.4491, "step": 3509 }, { "epoch": 0.6055378245492969, "grad_norm": 0.80859375, "learning_rate": 1.588357637096e-05, "loss": 1.4404, "step": 3510 }, { "epoch": 0.605710342448029, "grad_norm": 0.63671875, "learning_rate": 1.5881375239503558e-05, "loss": 1.4348, "step": 3511 }, { "epoch": 0.605882860346761, "grad_norm": 0.609375, "learning_rate": 1.5879173672318095e-05, "loss": 1.3538, "step": 3512 }, { "epoch": 0.6060553782454929, "grad_norm": 0.94140625, "learning_rate": 1.587697166956671e-05, "loss": 1.4551, "step": 3513 }, { "epoch": 0.606227896144225, "grad_norm": 0.58203125, "learning_rate": 1.587476923141254e-05, "loss": 1.3779, "step": 3514 }, { "epoch": 0.606400414042957, "grad_norm": 0.68359375, "learning_rate": 1.587256635801876e-05, "loss": 1.4017, "step": 3515 }, { "epoch": 0.6065729319416889, "grad_norm": 0.6328125, "learning_rate": 1.5870363049548573e-05, "loss": 1.3796, "step": 3516 }, { "epoch": 0.6067454498404209, "grad_norm": 0.67578125, "learning_rate": 1.5868159306165208e-05, "loss": 1.4247, "step": 3517 }, { "epoch": 0.606917967739153, "grad_norm": 0.6328125, "learning_rate": 1.5865955128031937e-05, "loss": 1.4333, "step": 3518 }, { "epoch": 0.6070904856378849, "grad_norm": 0.74609375, "learning_rate": 1.586375051531206e-05, "loss": 1.4746, "step": 3519 }, { "epoch": 0.6072630035366169, "grad_norm": 0.6640625, "learning_rate": 1.5861545468168907e-05, "loss": 1.4033, "step": 3520 }, { "epoch": 0.607435521435349, "grad_norm": 0.64453125, "learning_rate": 1.5859339986765837e-05, "loss": 1.4576, "step": 3521 }, { "epoch": 0.6076080393340809, "grad_norm": 0.7265625, "learning_rate": 1.5857134071266253e-05, "loss": 1.4398, "step": 3522 }, { "epoch": 0.6077805572328129, "grad_norm": 0.69140625, "learning_rate": 1.5854927721833577e-05, "loss": 1.4282, "step": 3523 }, { "epoch": 0.607953075131545, "grad_norm": 0.828125, "learning_rate": 1.5852720938631273e-05, "loss": 1.3739, "step": 3524 }, { "epoch": 0.6081255930302769, "grad_norm": 0.609375, "learning_rate": 1.5850513721822832e-05, "loss": 1.4447, "step": 3525 }, { "epoch": 0.6082981109290089, "grad_norm": 0.6796875, "learning_rate": 1.5848306071571782e-05, "loss": 1.5001, "step": 3526 }, { "epoch": 0.6084706288277408, "grad_norm": 0.62109375, "learning_rate": 1.5846097988041672e-05, "loss": 1.4358, "step": 3527 }, { "epoch": 0.6086431467264729, "grad_norm": 0.63671875, "learning_rate": 1.58438894713961e-05, "loss": 1.3999, "step": 3528 }, { "epoch": 0.6088156646252049, "grad_norm": 0.9140625, "learning_rate": 1.584168052179868e-05, "loss": 1.5327, "step": 3529 }, { "epoch": 0.6089881825239368, "grad_norm": 0.703125, "learning_rate": 1.5839471139413065e-05, "loss": 1.4658, "step": 3530 }, { "epoch": 0.6091607004226689, "grad_norm": 0.6171875, "learning_rate": 1.583726132440294e-05, "loss": 1.5247, "step": 3531 }, { "epoch": 0.6093332183214009, "grad_norm": 0.71875, "learning_rate": 1.5835051076932028e-05, "loss": 1.528, "step": 3532 }, { "epoch": 0.6095057362201328, "grad_norm": 0.62890625, "learning_rate": 1.5832840397164067e-05, "loss": 1.4172, "step": 3533 }, { "epoch": 0.6096782541188648, "grad_norm": 0.63671875, "learning_rate": 1.5830629285262848e-05, "loss": 1.5032, "step": 3534 }, { "epoch": 0.6098507720175969, "grad_norm": 0.77734375, "learning_rate": 1.582841774139218e-05, "loss": 1.4678, "step": 3535 }, { "epoch": 0.6100232899163288, "grad_norm": 0.64453125, "learning_rate": 1.5826205765715905e-05, "loss": 1.4701, "step": 3536 }, { "epoch": 0.6101958078150608, "grad_norm": 0.62890625, "learning_rate": 1.5823993358397906e-05, "loss": 1.4376, "step": 3537 }, { "epoch": 0.6103683257137928, "grad_norm": 0.7109375, "learning_rate": 1.582178051960209e-05, "loss": 1.4783, "step": 3538 }, { "epoch": 0.6105408436125248, "grad_norm": 0.6953125, "learning_rate": 1.5819567249492392e-05, "loss": 1.4107, "step": 3539 }, { "epoch": 0.6107133615112568, "grad_norm": 0.58984375, "learning_rate": 1.5817353548232795e-05, "loss": 1.3483, "step": 3540 }, { "epoch": 0.6108858794099888, "grad_norm": 0.6875, "learning_rate": 1.5815139415987296e-05, "loss": 1.3945, "step": 3541 }, { "epoch": 0.6110583973087208, "grad_norm": 0.69140625, "learning_rate": 1.5812924852919936e-05, "loss": 1.3612, "step": 3542 }, { "epoch": 0.6112309152074528, "grad_norm": 0.640625, "learning_rate": 1.581070985919478e-05, "loss": 1.4229, "step": 3543 }, { "epoch": 0.6114034331061847, "grad_norm": 0.63671875, "learning_rate": 1.580849443497593e-05, "loss": 1.4903, "step": 3544 }, { "epoch": 0.6115759510049168, "grad_norm": 0.73828125, "learning_rate": 1.5806278580427523e-05, "loss": 1.4872, "step": 3545 }, { "epoch": 0.6117484689036488, "grad_norm": 0.6640625, "learning_rate": 1.580406229571372e-05, "loss": 1.4751, "step": 3546 }, { "epoch": 0.6119209868023807, "grad_norm": 0.6796875, "learning_rate": 1.5801845580998708e-05, "loss": 1.3846, "step": 3547 }, { "epoch": 0.6120935047011128, "grad_norm": 0.83984375, "learning_rate": 1.5799628436446733e-05, "loss": 1.5369, "step": 3548 }, { "epoch": 0.6122660225998448, "grad_norm": 0.62890625, "learning_rate": 1.5797410862222043e-05, "loss": 1.4508, "step": 3549 }, { "epoch": 0.6124385404985767, "grad_norm": 0.63671875, "learning_rate": 1.579519285848893e-05, "loss": 1.3334, "step": 3550 }, { "epoch": 0.6126110583973087, "grad_norm": 0.73828125, "learning_rate": 1.579297442541172e-05, "loss": 1.4288, "step": 3551 }, { "epoch": 0.6127835762960407, "grad_norm": 0.6796875, "learning_rate": 1.5790755563154773e-05, "loss": 1.4861, "step": 3552 }, { "epoch": 0.6129560941947727, "grad_norm": 0.578125, "learning_rate": 1.578853627188247e-05, "loss": 1.5465, "step": 3553 }, { "epoch": 0.6131286120935047, "grad_norm": 0.6328125, "learning_rate": 1.5786316551759232e-05, "loss": 1.3731, "step": 3554 }, { "epoch": 0.6133011299922367, "grad_norm": 0.9296875, "learning_rate": 1.5784096402949507e-05, "loss": 1.4603, "step": 3555 }, { "epoch": 0.6134736478909687, "grad_norm": 0.640625, "learning_rate": 1.578187582561778e-05, "loss": 1.3989, "step": 3556 }, { "epoch": 0.6136461657897007, "grad_norm": 0.6015625, "learning_rate": 1.5779654819928565e-05, "loss": 1.5056, "step": 3557 }, { "epoch": 0.6138186836884326, "grad_norm": 0.69140625, "learning_rate": 1.577743338604641e-05, "loss": 1.5337, "step": 3558 }, { "epoch": 0.6139912015871647, "grad_norm": 0.62109375, "learning_rate": 1.5775211524135888e-05, "loss": 1.3694, "step": 3559 }, { "epoch": 0.6141637194858967, "grad_norm": 0.58984375, "learning_rate": 1.5772989234361612e-05, "loss": 1.4417, "step": 3560 }, { "epoch": 0.6143362373846286, "grad_norm": 0.609375, "learning_rate": 1.5770766516888224e-05, "loss": 1.5343, "step": 3561 }, { "epoch": 0.6145087552833607, "grad_norm": 0.65625, "learning_rate": 1.5768543371880394e-05, "loss": 1.4724, "step": 3562 }, { "epoch": 0.6146812731820926, "grad_norm": 0.5859375, "learning_rate": 1.5766319799502822e-05, "loss": 1.4003, "step": 3563 }, { "epoch": 0.6148537910808246, "grad_norm": 0.62109375, "learning_rate": 1.5764095799920256e-05, "loss": 1.4674, "step": 3564 }, { "epoch": 0.6150263089795567, "grad_norm": 0.64453125, "learning_rate": 1.576187137329746e-05, "loss": 1.4818, "step": 3565 }, { "epoch": 0.6151988268782886, "grad_norm": 0.703125, "learning_rate": 1.5759646519799227e-05, "loss": 1.4898, "step": 3566 }, { "epoch": 0.6153713447770206, "grad_norm": 0.609375, "learning_rate": 1.5757421239590388e-05, "loss": 1.4803, "step": 3567 }, { "epoch": 0.6155438626757526, "grad_norm": 0.60546875, "learning_rate": 1.5755195532835814e-05, "loss": 1.5081, "step": 3568 }, { "epoch": 0.6157163805744846, "grad_norm": 0.7265625, "learning_rate": 1.5752969399700396e-05, "loss": 1.4798, "step": 3569 }, { "epoch": 0.6158888984732166, "grad_norm": 0.62109375, "learning_rate": 1.5750742840349054e-05, "loss": 1.4868, "step": 3570 }, { "epoch": 0.6160614163719486, "grad_norm": 0.61328125, "learning_rate": 1.5748515854946753e-05, "loss": 1.4558, "step": 3571 }, { "epoch": 0.6162339342706806, "grad_norm": 0.59375, "learning_rate": 1.574628844365848e-05, "loss": 1.4448, "step": 3572 }, { "epoch": 0.6164064521694126, "grad_norm": 0.859375, "learning_rate": 1.5744060606649253e-05, "loss": 1.4433, "step": 3573 }, { "epoch": 0.6165789700681445, "grad_norm": 0.66796875, "learning_rate": 1.5741832344084126e-05, "loss": 1.4628, "step": 3574 }, { "epoch": 0.6167514879668765, "grad_norm": 0.93359375, "learning_rate": 1.5739603656128183e-05, "loss": 1.4946, "step": 3575 }, { "epoch": 0.6169240058656086, "grad_norm": 0.79296875, "learning_rate": 1.573737454294654e-05, "loss": 1.5086, "step": 3576 }, { "epoch": 0.6170965237643405, "grad_norm": 0.74609375, "learning_rate": 1.573514500470434e-05, "loss": 1.4041, "step": 3577 }, { "epoch": 0.6172690416630725, "grad_norm": 0.6640625, "learning_rate": 1.5732915041566764e-05, "loss": 1.4129, "step": 3578 }, { "epoch": 0.6174415595618046, "grad_norm": 0.765625, "learning_rate": 1.5730684653699017e-05, "loss": 1.4428, "step": 3579 }, { "epoch": 0.6176140774605365, "grad_norm": 0.8203125, "learning_rate": 1.5728453841266345e-05, "loss": 1.4511, "step": 3580 }, { "epoch": 0.6177865953592685, "grad_norm": 0.67578125, "learning_rate": 1.5726222604434023e-05, "loss": 1.4721, "step": 3581 }, { "epoch": 0.6179591132580006, "grad_norm": 0.65625, "learning_rate": 1.572399094336735e-05, "loss": 1.4831, "step": 3582 }, { "epoch": 0.6181316311567325, "grad_norm": 0.90625, "learning_rate": 1.5721758858231662e-05, "loss": 1.4647, "step": 3583 }, { "epoch": 0.6183041490554645, "grad_norm": 0.67578125, "learning_rate": 1.571952634919233e-05, "loss": 1.4295, "step": 3584 }, { "epoch": 0.6184766669541965, "grad_norm": 0.6875, "learning_rate": 1.5717293416414743e-05, "loss": 1.4365, "step": 3585 }, { "epoch": 0.6186491848529285, "grad_norm": 0.671875, "learning_rate": 1.571506006006434e-05, "loss": 1.3541, "step": 3586 }, { "epoch": 0.6188217027516605, "grad_norm": 0.7109375, "learning_rate": 1.5712826280306578e-05, "loss": 1.3913, "step": 3587 }, { "epoch": 0.6189942206503924, "grad_norm": 0.7109375, "learning_rate": 1.571059207730695e-05, "loss": 1.4749, "step": 3588 }, { "epoch": 0.6191667385491245, "grad_norm": 0.61328125, "learning_rate": 1.5708357451230978e-05, "loss": 1.5027, "step": 3589 }, { "epoch": 0.6193392564478565, "grad_norm": 0.61328125, "learning_rate": 1.570612240224422e-05, "loss": 1.3575, "step": 3590 }, { "epoch": 0.6195117743465884, "grad_norm": 0.75390625, "learning_rate": 1.570388693051226e-05, "loss": 1.4667, "step": 3591 }, { "epoch": 0.6196842922453204, "grad_norm": 0.66015625, "learning_rate": 1.570165103620072e-05, "loss": 1.4951, "step": 3592 }, { "epoch": 0.6198568101440525, "grad_norm": 0.6796875, "learning_rate": 1.5699414719475243e-05, "loss": 1.4742, "step": 3593 }, { "epoch": 0.6200293280427844, "grad_norm": 0.62890625, "learning_rate": 1.5697177980501507e-05, "loss": 1.4339, "step": 3594 }, { "epoch": 0.6202018459415164, "grad_norm": 0.609375, "learning_rate": 1.5694940819445234e-05, "loss": 1.505, "step": 3595 }, { "epoch": 0.6203743638402485, "grad_norm": 0.5859375, "learning_rate": 1.5692703236472162e-05, "loss": 1.483, "step": 3596 }, { "epoch": 0.6205468817389804, "grad_norm": 0.734375, "learning_rate": 1.5690465231748064e-05, "loss": 1.5035, "step": 3597 }, { "epoch": 0.6207193996377124, "grad_norm": 0.73046875, "learning_rate": 1.5688226805438745e-05, "loss": 1.3519, "step": 3598 }, { "epoch": 0.6208919175364445, "grad_norm": 0.8984375, "learning_rate": 1.5685987957710043e-05, "loss": 1.4842, "step": 3599 }, { "epoch": 0.6210644354351764, "grad_norm": 0.72265625, "learning_rate": 1.5683748688727827e-05, "loss": 1.3299, "step": 3600 }, { "epoch": 0.6210644354351764, "eval_loss": 1.4302067756652832, "eval_runtime": 10.8016, "eval_samples_per_second": 94.801, "eval_steps_per_second": 23.7, "step": 3600 }, { "epoch": 0.6212369533339084, "grad_norm": 0.89453125, "learning_rate": 1.568150899865799e-05, "loss": 1.4975, "step": 3601 }, { "epoch": 0.6214094712326403, "grad_norm": 0.73828125, "learning_rate": 1.567926888766647e-05, "loss": 1.406, "step": 3602 }, { "epoch": 0.6215819891313724, "grad_norm": 0.60546875, "learning_rate": 1.5677028355919225e-05, "loss": 1.4159, "step": 3603 }, { "epoch": 0.6217545070301044, "grad_norm": 0.7421875, "learning_rate": 1.5674787403582247e-05, "loss": 1.3803, "step": 3604 }, { "epoch": 0.6219270249288363, "grad_norm": 0.72265625, "learning_rate": 1.567254603082156e-05, "loss": 1.4789, "step": 3605 }, { "epoch": 0.6220995428275684, "grad_norm": 0.65625, "learning_rate": 1.567030423780322e-05, "loss": 1.4876, "step": 3606 }, { "epoch": 0.6222720607263004, "grad_norm": 0.609375, "learning_rate": 1.5668062024693314e-05, "loss": 1.4734, "step": 3607 }, { "epoch": 0.6224445786250323, "grad_norm": 0.6328125, "learning_rate": 1.5665819391657955e-05, "loss": 1.3378, "step": 3608 }, { "epoch": 0.6226170965237643, "grad_norm": 0.796875, "learning_rate": 1.5663576338863294e-05, "loss": 1.4462, "step": 3609 }, { "epoch": 0.6227896144224964, "grad_norm": 0.66015625, "learning_rate": 1.5661332866475514e-05, "loss": 1.4788, "step": 3610 }, { "epoch": 0.6229621323212283, "grad_norm": 0.6171875, "learning_rate": 1.5659088974660813e-05, "loss": 1.5331, "step": 3611 }, { "epoch": 0.6231346502199603, "grad_norm": 0.73828125, "learning_rate": 1.5656844663585448e-05, "loss": 1.4245, "step": 3612 }, { "epoch": 0.6233071681186924, "grad_norm": 0.6484375, "learning_rate": 1.5654599933415683e-05, "loss": 1.3844, "step": 3613 }, { "epoch": 0.6234796860174243, "grad_norm": 0.640625, "learning_rate": 1.565235478431783e-05, "loss": 1.3807, "step": 3614 }, { "epoch": 0.6236522039161563, "grad_norm": 0.62890625, "learning_rate": 1.5650109216458208e-05, "loss": 1.4553, "step": 3615 }, { "epoch": 0.6238247218148883, "grad_norm": 0.7578125, "learning_rate": 1.5647863230003193e-05, "loss": 1.398, "step": 3616 }, { "epoch": 0.6239972397136203, "grad_norm": 0.66796875, "learning_rate": 1.564561682511918e-05, "loss": 1.4887, "step": 3617 }, { "epoch": 0.6241697576123523, "grad_norm": 0.67578125, "learning_rate": 1.5643370001972602e-05, "loss": 1.4707, "step": 3618 }, { "epoch": 0.6243422755110842, "grad_norm": 0.8671875, "learning_rate": 1.564112276072991e-05, "loss": 1.4133, "step": 3619 }, { "epoch": 0.6245147934098163, "grad_norm": 0.84375, "learning_rate": 1.5638875101557596e-05, "loss": 1.4796, "step": 3620 }, { "epoch": 0.6246873113085483, "grad_norm": 0.7578125, "learning_rate": 1.5636627024622183e-05, "loss": 1.3614, "step": 3621 }, { "epoch": 0.6248598292072802, "grad_norm": 0.6953125, "learning_rate": 1.5634378530090217e-05, "loss": 1.3653, "step": 3622 }, { "epoch": 0.6250323471060123, "grad_norm": 0.8203125, "learning_rate": 1.5632129618128285e-05, "loss": 1.4674, "step": 3623 }, { "epoch": 0.6252048650047443, "grad_norm": 0.68359375, "learning_rate": 1.5629880288903002e-05, "loss": 1.427, "step": 3624 }, { "epoch": 0.6253773829034762, "grad_norm": 0.61328125, "learning_rate": 1.5627630542581013e-05, "loss": 1.4733, "step": 3625 }, { "epoch": 0.6255499008022082, "grad_norm": 0.69140625, "learning_rate": 1.5625380379328984e-05, "loss": 1.5152, "step": 3626 }, { "epoch": 0.6257224187009403, "grad_norm": 0.71875, "learning_rate": 1.562312979931363e-05, "loss": 1.482, "step": 3627 }, { "epoch": 0.6258949365996722, "grad_norm": 0.80078125, "learning_rate": 1.5620878802701687e-05, "loss": 1.4838, "step": 3628 }, { "epoch": 0.6260674544984042, "grad_norm": 0.65625, "learning_rate": 1.561862738965992e-05, "loss": 1.4425, "step": 3629 }, { "epoch": 0.6262399723971362, "grad_norm": 0.76953125, "learning_rate": 1.561637556035513e-05, "loss": 1.5229, "step": 3630 }, { "epoch": 0.6264124902958682, "grad_norm": 0.8984375, "learning_rate": 1.5614123314954144e-05, "loss": 1.457, "step": 3631 }, { "epoch": 0.6265850081946002, "grad_norm": 0.9140625, "learning_rate": 1.5611870653623826e-05, "loss": 1.5144, "step": 3632 }, { "epoch": 0.6267575260933321, "grad_norm": 0.734375, "learning_rate": 1.5609617576531065e-05, "loss": 1.4189, "step": 3633 }, { "epoch": 0.6269300439920642, "grad_norm": 0.91796875, "learning_rate": 1.560736408384278e-05, "loss": 1.5909, "step": 3634 }, { "epoch": 0.6271025618907962, "grad_norm": 0.60546875, "learning_rate": 1.560511017572593e-05, "loss": 1.4118, "step": 3635 }, { "epoch": 0.6272750797895281, "grad_norm": 0.62109375, "learning_rate": 1.56028558523475e-05, "loss": 1.4832, "step": 3636 }, { "epoch": 0.6274475976882602, "grad_norm": 0.73828125, "learning_rate": 1.5600601113874497e-05, "loss": 1.4742, "step": 3637 }, { "epoch": 0.6276201155869922, "grad_norm": 0.69140625, "learning_rate": 1.5598345960473965e-05, "loss": 1.4938, "step": 3638 }, { "epoch": 0.6277926334857241, "grad_norm": 0.84375, "learning_rate": 1.559609039231299e-05, "loss": 1.4624, "step": 3639 }, { "epoch": 0.6279651513844562, "grad_norm": 0.6796875, "learning_rate": 1.559383440955867e-05, "loss": 1.4947, "step": 3640 }, { "epoch": 0.6281376692831881, "grad_norm": 0.80078125, "learning_rate": 1.5591578012378148e-05, "loss": 1.4709, "step": 3641 }, { "epoch": 0.6283101871819201, "grad_norm": 0.70703125, "learning_rate": 1.5589321200938584e-05, "loss": 1.4289, "step": 3642 }, { "epoch": 0.6284827050806521, "grad_norm": 0.66015625, "learning_rate": 1.5587063975407185e-05, "loss": 1.4556, "step": 3643 }, { "epoch": 0.6286552229793841, "grad_norm": 0.64453125, "learning_rate": 1.5584806335951177e-05, "loss": 1.5221, "step": 3644 }, { "epoch": 0.6288277408781161, "grad_norm": 0.69921875, "learning_rate": 1.558254828273782e-05, "loss": 1.407, "step": 3645 }, { "epoch": 0.6290002587768481, "grad_norm": 1.34375, "learning_rate": 1.55802898159344e-05, "loss": 1.5986, "step": 3646 }, { "epoch": 0.6291727766755801, "grad_norm": 0.78515625, "learning_rate": 1.557803093570825e-05, "loss": 1.4878, "step": 3647 }, { "epoch": 0.6293452945743121, "grad_norm": 0.58203125, "learning_rate": 1.5575771642226715e-05, "loss": 1.45, "step": 3648 }, { "epoch": 0.6295178124730441, "grad_norm": 0.8125, "learning_rate": 1.5573511935657174e-05, "loss": 1.3846, "step": 3649 }, { "epoch": 0.629690330371776, "grad_norm": 0.91015625, "learning_rate": 1.5571251816167047e-05, "loss": 1.4499, "step": 3650 }, { "epoch": 0.6298628482705081, "grad_norm": 0.66796875, "learning_rate": 1.5568991283923772e-05, "loss": 1.5331, "step": 3651 }, { "epoch": 0.63003536616924, "grad_norm": 0.6953125, "learning_rate": 1.5566730339094827e-05, "loss": 1.4142, "step": 3652 }, { "epoch": 0.630207884067972, "grad_norm": 0.8828125, "learning_rate": 1.5564468981847716e-05, "loss": 1.3816, "step": 3653 }, { "epoch": 0.6303804019667041, "grad_norm": 0.8671875, "learning_rate": 1.5562207212349975e-05, "loss": 1.3857, "step": 3654 }, { "epoch": 0.630552919865436, "grad_norm": 0.6328125, "learning_rate": 1.555994503076917e-05, "loss": 1.5437, "step": 3655 }, { "epoch": 0.630725437764168, "grad_norm": 0.68359375, "learning_rate": 1.5557682437272898e-05, "loss": 1.4104, "step": 3656 }, { "epoch": 0.6308979556629001, "grad_norm": 0.69921875, "learning_rate": 1.555541943202878e-05, "loss": 1.54, "step": 3657 }, { "epoch": 0.631070473561632, "grad_norm": 0.6484375, "learning_rate": 1.555315601520448e-05, "loss": 1.4845, "step": 3658 }, { "epoch": 0.631242991460364, "grad_norm": 0.671875, "learning_rate": 1.5550892186967685e-05, "loss": 1.4748, "step": 3659 }, { "epoch": 0.631415509359096, "grad_norm": 0.859375, "learning_rate": 1.5548627947486114e-05, "loss": 1.3876, "step": 3660 }, { "epoch": 0.631588027257828, "grad_norm": 0.76171875, "learning_rate": 1.5546363296927518e-05, "loss": 1.4506, "step": 3661 }, { "epoch": 0.63176054515656, "grad_norm": 0.8359375, "learning_rate": 1.5544098235459673e-05, "loss": 1.4053, "step": 3662 }, { "epoch": 0.631933063055292, "grad_norm": 0.60546875, "learning_rate": 1.5541832763250386e-05, "loss": 1.4933, "step": 3663 }, { "epoch": 0.632105580954024, "grad_norm": 1.1796875, "learning_rate": 1.5539566880467502e-05, "loss": 1.5495, "step": 3664 }, { "epoch": 0.632278098852756, "grad_norm": 0.828125, "learning_rate": 1.553730058727889e-05, "loss": 1.3917, "step": 3665 }, { "epoch": 0.632450616751488, "grad_norm": 0.8125, "learning_rate": 1.5535033883852456e-05, "loss": 1.521, "step": 3666 }, { "epoch": 0.6326231346502199, "grad_norm": 0.734375, "learning_rate": 1.5532766770356125e-05, "loss": 1.5505, "step": 3667 }, { "epoch": 0.632795652548952, "grad_norm": 0.99609375, "learning_rate": 1.553049924695786e-05, "loss": 1.563, "step": 3668 }, { "epoch": 0.6329681704476839, "grad_norm": 0.8359375, "learning_rate": 1.552823131382566e-05, "loss": 1.4697, "step": 3669 }, { "epoch": 0.6331406883464159, "grad_norm": 0.640625, "learning_rate": 1.5525962971127536e-05, "loss": 1.3944, "step": 3670 }, { "epoch": 0.633313206245148, "grad_norm": 0.9296875, "learning_rate": 1.5523694219031548e-05, "loss": 1.4171, "step": 3671 }, { "epoch": 0.6334857241438799, "grad_norm": 0.8125, "learning_rate": 1.552142505770578e-05, "loss": 1.4392, "step": 3672 }, { "epoch": 0.6336582420426119, "grad_norm": 0.7109375, "learning_rate": 1.5519155487318345e-05, "loss": 1.4225, "step": 3673 }, { "epoch": 0.633830759941344, "grad_norm": 0.796875, "learning_rate": 1.5516885508037388e-05, "loss": 1.438, "step": 3674 }, { "epoch": 0.6340032778400759, "grad_norm": 0.81640625, "learning_rate": 1.5514615120031077e-05, "loss": 1.448, "step": 3675 }, { "epoch": 0.6341757957388079, "grad_norm": 0.83203125, "learning_rate": 1.551234432346762e-05, "loss": 1.5713, "step": 3676 }, { "epoch": 0.6343483136375399, "grad_norm": 1.7421875, "learning_rate": 1.551007311851526e-05, "loss": 1.4049, "step": 3677 }, { "epoch": 0.6345208315362719, "grad_norm": 0.95703125, "learning_rate": 1.550780150534225e-05, "loss": 1.4776, "step": 3678 }, { "epoch": 0.6346933494350039, "grad_norm": 0.72265625, "learning_rate": 1.550552948411689e-05, "loss": 1.3499, "step": 3679 }, { "epoch": 0.6348658673337358, "grad_norm": 0.76953125, "learning_rate": 1.5503257055007502e-05, "loss": 1.4269, "step": 3680 }, { "epoch": 0.6350383852324679, "grad_norm": 0.57421875, "learning_rate": 1.5500984218182452e-05, "loss": 1.5338, "step": 3681 }, { "epoch": 0.6352109031311999, "grad_norm": 0.66796875, "learning_rate": 1.549871097381012e-05, "loss": 1.5733, "step": 3682 }, { "epoch": 0.6353834210299318, "grad_norm": 0.734375, "learning_rate": 1.5496437322058915e-05, "loss": 1.518, "step": 3683 }, { "epoch": 0.6355559389286638, "grad_norm": 0.53515625, "learning_rate": 1.5494163263097294e-05, "loss": 1.4033, "step": 3684 }, { "epoch": 0.6357284568273959, "grad_norm": 0.6015625, "learning_rate": 1.549188879709373e-05, "loss": 1.5143, "step": 3685 }, { "epoch": 0.6359009747261278, "grad_norm": 0.6015625, "learning_rate": 1.5489613924216728e-05, "loss": 1.4921, "step": 3686 }, { "epoch": 0.6360734926248598, "grad_norm": 0.6640625, "learning_rate": 1.5487338644634825e-05, "loss": 1.5659, "step": 3687 }, { "epoch": 0.6362460105235919, "grad_norm": 0.609375, "learning_rate": 1.548506295851659e-05, "loss": 1.5478, "step": 3688 }, { "epoch": 0.6364185284223238, "grad_norm": 0.609375, "learning_rate": 1.5482786866030618e-05, "loss": 1.5006, "step": 3689 }, { "epoch": 0.6365910463210558, "grad_norm": 1.0546875, "learning_rate": 1.5480510367345537e-05, "loss": 1.4564, "step": 3690 }, { "epoch": 0.6367635642197877, "grad_norm": 0.73828125, "learning_rate": 1.547823346263001e-05, "loss": 1.4481, "step": 3691 }, { "epoch": 0.6369360821185198, "grad_norm": 0.59375, "learning_rate": 1.5475956152052706e-05, "loss": 1.5064, "step": 3692 }, { "epoch": 0.6371086000172518, "grad_norm": 0.61328125, "learning_rate": 1.5473678435782365e-05, "loss": 1.4855, "step": 3693 }, { "epoch": 0.6372811179159837, "grad_norm": 0.59765625, "learning_rate": 1.547140031398772e-05, "loss": 1.3907, "step": 3694 }, { "epoch": 0.6374536358147158, "grad_norm": 0.7109375, "learning_rate": 1.5469121786837553e-05, "loss": 1.3983, "step": 3695 }, { "epoch": 0.6376261537134478, "grad_norm": 0.6953125, "learning_rate": 1.546684285450067e-05, "loss": 1.3785, "step": 3696 }, { "epoch": 0.6377986716121797, "grad_norm": 0.6015625, "learning_rate": 1.5464563517145916e-05, "loss": 1.5285, "step": 3697 }, { "epoch": 0.6379711895109118, "grad_norm": 0.70703125, "learning_rate": 1.546228377494215e-05, "loss": 1.4713, "step": 3698 }, { "epoch": 0.6381437074096438, "grad_norm": 0.64453125, "learning_rate": 1.546000362805827e-05, "loss": 1.5182, "step": 3699 }, { "epoch": 0.6383162253083757, "grad_norm": 0.6328125, "learning_rate": 1.5457723076663206e-05, "loss": 1.5226, "step": 3700 }, { "epoch": 0.6383162253083757, "eval_loss": 1.4285202026367188, "eval_runtime": 10.826, "eval_samples_per_second": 94.587, "eval_steps_per_second": 23.647, "step": 3700 }, { "epoch": 0.6384887432071077, "grad_norm": 0.58203125, "learning_rate": 1.5455442120925916e-05, "loss": 1.393, "step": 3701 }, { "epoch": 0.6386612611058398, "grad_norm": 0.59765625, "learning_rate": 1.5453160761015386e-05, "loss": 1.4783, "step": 3702 }, { "epoch": 0.6388337790045717, "grad_norm": 0.6171875, "learning_rate": 1.5450878997100634e-05, "loss": 1.4916, "step": 3703 }, { "epoch": 0.6390062969033037, "grad_norm": 0.66015625, "learning_rate": 1.5448596829350706e-05, "loss": 1.4923, "step": 3704 }, { "epoch": 0.6391788148020358, "grad_norm": 0.625, "learning_rate": 1.544631425793468e-05, "loss": 1.4529, "step": 3705 }, { "epoch": 0.6393513327007677, "grad_norm": 2.890625, "learning_rate": 1.5444031283021668e-05, "loss": 1.4464, "step": 3706 }, { "epoch": 0.6395238505994997, "grad_norm": 0.609375, "learning_rate": 1.54417479047808e-05, "loss": 1.4512, "step": 3707 }, { "epoch": 0.6396963684982316, "grad_norm": 0.61328125, "learning_rate": 1.5439464123381243e-05, "loss": 1.4565, "step": 3708 }, { "epoch": 0.6398688863969637, "grad_norm": 0.5859375, "learning_rate": 1.54371799389922e-05, "loss": 1.4812, "step": 3709 }, { "epoch": 0.6400414042956957, "grad_norm": 0.62109375, "learning_rate": 1.543489535178289e-05, "loss": 1.4413, "step": 3710 }, { "epoch": 0.6402139221944276, "grad_norm": 0.6015625, "learning_rate": 1.5432610361922578e-05, "loss": 1.3908, "step": 3711 }, { "epoch": 0.6403864400931597, "grad_norm": 0.73828125, "learning_rate": 1.5430324969580545e-05, "loss": 1.4154, "step": 3712 }, { "epoch": 0.6405589579918917, "grad_norm": 0.5625, "learning_rate": 1.5428039174926106e-05, "loss": 1.4293, "step": 3713 }, { "epoch": 0.6407314758906236, "grad_norm": 0.72265625, "learning_rate": 1.5425752978128612e-05, "loss": 1.4786, "step": 3714 }, { "epoch": 0.6409039937893557, "grad_norm": 0.58984375, "learning_rate": 1.5423466379357433e-05, "loss": 1.4136, "step": 3715 }, { "epoch": 0.6410765116880877, "grad_norm": 0.69140625, "learning_rate": 1.542117937878198e-05, "loss": 1.3674, "step": 3716 }, { "epoch": 0.6412490295868196, "grad_norm": 0.73046875, "learning_rate": 1.5418891976571682e-05, "loss": 1.3893, "step": 3717 }, { "epoch": 0.6414215474855516, "grad_norm": 4.875, "learning_rate": 1.541660417289601e-05, "loss": 1.524, "step": 3718 }, { "epoch": 0.6415940653842837, "grad_norm": 0.73046875, "learning_rate": 1.5414315967924454e-05, "loss": 1.422, "step": 3719 }, { "epoch": 0.6417665832830156, "grad_norm": 0.72265625, "learning_rate": 1.5412027361826544e-05, "loss": 1.486, "step": 3720 }, { "epoch": 0.6419391011817476, "grad_norm": 0.64453125, "learning_rate": 1.5409738354771832e-05, "loss": 1.443, "step": 3721 }, { "epoch": 0.6421116190804796, "grad_norm": 0.59375, "learning_rate": 1.5407448946929898e-05, "loss": 1.4336, "step": 3722 }, { "epoch": 0.6422841369792116, "grad_norm": 0.7265625, "learning_rate": 1.540515913847036e-05, "loss": 1.4494, "step": 3723 }, { "epoch": 0.6424566548779436, "grad_norm": 0.640625, "learning_rate": 1.5402868929562857e-05, "loss": 1.4959, "step": 3724 }, { "epoch": 0.6426291727766755, "grad_norm": 0.6953125, "learning_rate": 1.540057832037707e-05, "loss": 1.3744, "step": 3725 }, { "epoch": 0.6428016906754076, "grad_norm": 0.6796875, "learning_rate": 1.539828731108269e-05, "loss": 1.4017, "step": 3726 }, { "epoch": 0.6429742085741396, "grad_norm": 0.60546875, "learning_rate": 1.539599590184946e-05, "loss": 1.5099, "step": 3727 }, { "epoch": 0.6431467264728715, "grad_norm": 0.7265625, "learning_rate": 1.5393704092847143e-05, "loss": 1.4387, "step": 3728 }, { "epoch": 0.6433192443716036, "grad_norm": 0.59765625, "learning_rate": 1.5391411884245517e-05, "loss": 1.3447, "step": 3729 }, { "epoch": 0.6434917622703356, "grad_norm": 0.6953125, "learning_rate": 1.5389119276214415e-05, "loss": 1.5693, "step": 3730 }, { "epoch": 0.6436642801690675, "grad_norm": 0.59375, "learning_rate": 1.5386826268923685e-05, "loss": 1.4577, "step": 3731 }, { "epoch": 0.6438367980677996, "grad_norm": 0.63671875, "learning_rate": 1.5384532862543207e-05, "loss": 1.4509, "step": 3732 }, { "epoch": 0.6440093159665315, "grad_norm": 0.6796875, "learning_rate": 1.538223905724289e-05, "loss": 1.3712, "step": 3733 }, { "epoch": 0.6441818338652635, "grad_norm": 0.609375, "learning_rate": 1.537994485319267e-05, "loss": 1.4282, "step": 3734 }, { "epoch": 0.6443543517639955, "grad_norm": 0.62109375, "learning_rate": 1.537765025056252e-05, "loss": 1.4045, "step": 3735 }, { "epoch": 0.6445268696627275, "grad_norm": 0.56640625, "learning_rate": 1.5375355249522444e-05, "loss": 1.4643, "step": 3736 }, { "epoch": 0.6446993875614595, "grad_norm": 0.58984375, "learning_rate": 1.537305985024246e-05, "loss": 1.5169, "step": 3737 }, { "epoch": 0.6448719054601915, "grad_norm": 0.64453125, "learning_rate": 1.5370764052892634e-05, "loss": 1.4313, "step": 3738 }, { "epoch": 0.6450444233589235, "grad_norm": 0.640625, "learning_rate": 1.5368467857643045e-05, "loss": 1.534, "step": 3739 }, { "epoch": 0.6452169412576555, "grad_norm": 0.6484375, "learning_rate": 1.5366171264663816e-05, "loss": 1.4798, "step": 3740 }, { "epoch": 0.6453894591563875, "grad_norm": 0.61328125, "learning_rate": 1.5363874274125086e-05, "loss": 1.4276, "step": 3741 }, { "epoch": 0.6455619770551194, "grad_norm": 0.640625, "learning_rate": 1.5361576886197037e-05, "loss": 1.478, "step": 3742 }, { "epoch": 0.6457344949538515, "grad_norm": 0.66796875, "learning_rate": 1.535927910104987e-05, "loss": 1.516, "step": 3743 }, { "epoch": 0.6459070128525835, "grad_norm": 0.63671875, "learning_rate": 1.5356980918853827e-05, "loss": 1.4182, "step": 3744 }, { "epoch": 0.6460795307513154, "grad_norm": 0.66015625, "learning_rate": 1.535468233977916e-05, "loss": 1.5294, "step": 3745 }, { "epoch": 0.6462520486500475, "grad_norm": 0.6796875, "learning_rate": 1.5352383363996172e-05, "loss": 1.518, "step": 3746 }, { "epoch": 0.6464245665487794, "grad_norm": 0.73046875, "learning_rate": 1.5350083991675177e-05, "loss": 1.4986, "step": 3747 }, { "epoch": 0.6465970844475114, "grad_norm": 0.67578125, "learning_rate": 1.534778422298653e-05, "loss": 1.4611, "step": 3748 }, { "epoch": 0.6467696023462435, "grad_norm": 0.6875, "learning_rate": 1.534548405810062e-05, "loss": 1.5083, "step": 3749 }, { "epoch": 0.6469421202449754, "grad_norm": 0.6875, "learning_rate": 1.5343183497187844e-05, "loss": 1.5194, "step": 3750 }, { "epoch": 0.6471146381437074, "grad_norm": 0.71875, "learning_rate": 1.534088254041865e-05, "loss": 1.4361, "step": 3751 }, { "epoch": 0.6472871560424394, "grad_norm": 0.6953125, "learning_rate": 1.533858118796351e-05, "loss": 1.4349, "step": 3752 }, { "epoch": 0.6474596739411714, "grad_norm": 0.703125, "learning_rate": 1.5336279439992918e-05, "loss": 1.4821, "step": 3753 }, { "epoch": 0.6476321918399034, "grad_norm": 0.65625, "learning_rate": 1.53339772966774e-05, "loss": 1.3555, "step": 3754 }, { "epoch": 0.6478047097386354, "grad_norm": 0.6328125, "learning_rate": 1.5331674758187516e-05, "loss": 1.3945, "step": 3755 }, { "epoch": 0.6479772276373674, "grad_norm": 0.69140625, "learning_rate": 1.532937182469385e-05, "loss": 1.4914, "step": 3756 }, { "epoch": 0.6481497455360994, "grad_norm": 0.6484375, "learning_rate": 1.5327068496367023e-05, "loss": 1.5468, "step": 3757 }, { "epoch": 0.6483222634348313, "grad_norm": 0.6171875, "learning_rate": 1.5324764773377677e-05, "loss": 1.471, "step": 3758 }, { "epoch": 0.6484947813335633, "grad_norm": 0.5859375, "learning_rate": 1.532246065589648e-05, "loss": 1.468, "step": 3759 }, { "epoch": 0.6486672992322954, "grad_norm": 0.6484375, "learning_rate": 1.532015614409415e-05, "loss": 1.4026, "step": 3760 }, { "epoch": 0.6488398171310273, "grad_norm": 0.78125, "learning_rate": 1.5317851238141406e-05, "loss": 1.5021, "step": 3761 }, { "epoch": 0.6490123350297593, "grad_norm": 0.69140625, "learning_rate": 1.5315545938209016e-05, "loss": 1.3502, "step": 3762 }, { "epoch": 0.6491848529284914, "grad_norm": 0.5703125, "learning_rate": 1.531324024446777e-05, "loss": 1.4444, "step": 3763 }, { "epoch": 0.6493573708272233, "grad_norm": 0.59375, "learning_rate": 1.531093415708849e-05, "loss": 1.4432, "step": 3764 }, { "epoch": 0.6495298887259553, "grad_norm": 0.6328125, "learning_rate": 1.530862767624202e-05, "loss": 1.4107, "step": 3765 }, { "epoch": 0.6497024066246873, "grad_norm": 0.6328125, "learning_rate": 1.5306320802099243e-05, "loss": 1.4785, "step": 3766 }, { "epoch": 0.6498749245234193, "grad_norm": 0.57421875, "learning_rate": 1.5304013534831064e-05, "loss": 1.4472, "step": 3767 }, { "epoch": 0.6500474424221513, "grad_norm": 0.66796875, "learning_rate": 1.5301705874608423e-05, "loss": 1.4185, "step": 3768 }, { "epoch": 0.6502199603208833, "grad_norm": 0.68359375, "learning_rate": 1.5299397821602284e-05, "loss": 1.5577, "step": 3769 }, { "epoch": 0.6503924782196153, "grad_norm": 0.5859375, "learning_rate": 1.5297089375983644e-05, "loss": 1.4669, "step": 3770 }, { "epoch": 0.6505649961183473, "grad_norm": 0.6015625, "learning_rate": 1.5294780537923523e-05, "loss": 1.5208, "step": 3771 }, { "epoch": 0.6507375140170792, "grad_norm": 0.70703125, "learning_rate": 1.5292471307592975e-05, "loss": 1.4643, "step": 3772 }, { "epoch": 0.6509100319158113, "grad_norm": 0.62109375, "learning_rate": 1.5290161685163086e-05, "loss": 1.4022, "step": 3773 }, { "epoch": 0.6510825498145433, "grad_norm": 0.796875, "learning_rate": 1.5287851670804963e-05, "loss": 1.4932, "step": 3774 }, { "epoch": 0.6512550677132752, "grad_norm": 0.71484375, "learning_rate": 1.528554126468975e-05, "loss": 1.4705, "step": 3775 }, { "epoch": 0.6514275856120072, "grad_norm": 0.67578125, "learning_rate": 1.5283230466988615e-05, "loss": 1.3752, "step": 3776 }, { "epoch": 0.6516001035107393, "grad_norm": 0.640625, "learning_rate": 1.5280919277872753e-05, "loss": 1.4108, "step": 3777 }, { "epoch": 0.6517726214094712, "grad_norm": 0.72265625, "learning_rate": 1.5278607697513396e-05, "loss": 1.4798, "step": 3778 }, { "epoch": 0.6519451393082032, "grad_norm": 0.6171875, "learning_rate": 1.52762957260818e-05, "loss": 1.4351, "step": 3779 }, { "epoch": 0.6521176572069353, "grad_norm": 0.76171875, "learning_rate": 1.5273983363749246e-05, "loss": 1.4795, "step": 3780 }, { "epoch": 0.6522901751056672, "grad_norm": 0.5703125, "learning_rate": 1.5271670610687058e-05, "loss": 1.4506, "step": 3781 }, { "epoch": 0.6524626930043992, "grad_norm": 0.7578125, "learning_rate": 1.5269357467066566e-05, "loss": 1.4128, "step": 3782 }, { "epoch": 0.6526352109031311, "grad_norm": 0.6640625, "learning_rate": 1.5267043933059147e-05, "loss": 1.5003, "step": 3783 }, { "epoch": 0.6528077288018632, "grad_norm": 0.58984375, "learning_rate": 1.5264730008836205e-05, "loss": 1.4195, "step": 3784 }, { "epoch": 0.6529802467005952, "grad_norm": 0.6015625, "learning_rate": 1.526241569456917e-05, "loss": 1.4756, "step": 3785 }, { "epoch": 0.6531527645993271, "grad_norm": 0.625, "learning_rate": 1.52601009904295e-05, "loss": 1.4688, "step": 3786 }, { "epoch": 0.6533252824980592, "grad_norm": 0.69921875, "learning_rate": 1.525778589658868e-05, "loss": 1.4437, "step": 3787 }, { "epoch": 0.6534978003967912, "grad_norm": 0.6796875, "learning_rate": 1.5255470413218228e-05, "loss": 1.485, "step": 3788 }, { "epoch": 0.6536703182955231, "grad_norm": 0.85546875, "learning_rate": 1.525315454048969e-05, "loss": 1.3897, "step": 3789 }, { "epoch": 0.6538428361942552, "grad_norm": 0.61328125, "learning_rate": 1.525083827857464e-05, "loss": 1.5565, "step": 3790 }, { "epoch": 0.6540153540929872, "grad_norm": 0.640625, "learning_rate": 1.5248521627644684e-05, "loss": 1.3609, "step": 3791 }, { "epoch": 0.6541878719917191, "grad_norm": 0.6328125, "learning_rate": 1.524620458787145e-05, "loss": 1.4696, "step": 3792 }, { "epoch": 0.6543603898904511, "grad_norm": 0.5859375, "learning_rate": 1.5243887159426603e-05, "loss": 1.4728, "step": 3793 }, { "epoch": 0.6545329077891832, "grad_norm": 0.59765625, "learning_rate": 1.5241569342481826e-05, "loss": 1.4916, "step": 3794 }, { "epoch": 0.6547054256879151, "grad_norm": 0.65625, "learning_rate": 1.5239251137208844e-05, "loss": 1.3874, "step": 3795 }, { "epoch": 0.6548779435866471, "grad_norm": 0.70703125, "learning_rate": 1.52369325437794e-05, "loss": 1.4444, "step": 3796 }, { "epoch": 0.6550504614853792, "grad_norm": 0.55859375, "learning_rate": 1.5234613562365272e-05, "loss": 1.3956, "step": 3797 }, { "epoch": 0.6552229793841111, "grad_norm": 0.61328125, "learning_rate": 1.5232294193138264e-05, "loss": 1.357, "step": 3798 }, { "epoch": 0.6553954972828431, "grad_norm": 0.578125, "learning_rate": 1.5229974436270207e-05, "loss": 1.4616, "step": 3799 }, { "epoch": 0.655568015181575, "grad_norm": 0.75390625, "learning_rate": 1.5227654291932967e-05, "loss": 1.4532, "step": 3800 }, { "epoch": 0.655568015181575, "eval_loss": 1.4269640445709229, "eval_runtime": 10.8642, "eval_samples_per_second": 94.254, "eval_steps_per_second": 23.564, "step": 3800 }, { "epoch": 0.6557405330803071, "grad_norm": 0.6953125, "learning_rate": 1.5225333760298435e-05, "loss": 1.5473, "step": 3801 }, { "epoch": 0.6559130509790391, "grad_norm": 0.59765625, "learning_rate": 1.5223012841538527e-05, "loss": 1.5213, "step": 3802 }, { "epoch": 0.656085568877771, "grad_norm": 0.58984375, "learning_rate": 1.5220691535825194e-05, "loss": 1.4789, "step": 3803 }, { "epoch": 0.6562580867765031, "grad_norm": 0.6640625, "learning_rate": 1.5218369843330409e-05, "loss": 1.5483, "step": 3804 }, { "epoch": 0.6564306046752351, "grad_norm": 0.59375, "learning_rate": 1.5216047764226183e-05, "loss": 1.3859, "step": 3805 }, { "epoch": 0.656603122573967, "grad_norm": 0.67578125, "learning_rate": 1.5213725298684546e-05, "loss": 1.4533, "step": 3806 }, { "epoch": 0.6567756404726991, "grad_norm": 0.6171875, "learning_rate": 1.5211402446877561e-05, "loss": 1.417, "step": 3807 }, { "epoch": 0.6569481583714311, "grad_norm": 0.63671875, "learning_rate": 1.5209079208977322e-05, "loss": 1.5, "step": 3808 }, { "epoch": 0.657120676270163, "grad_norm": 0.64453125, "learning_rate": 1.5206755585155946e-05, "loss": 1.494, "step": 3809 }, { "epoch": 0.657293194168895, "grad_norm": 0.640625, "learning_rate": 1.5204431575585586e-05, "loss": 1.4712, "step": 3810 }, { "epoch": 0.657465712067627, "grad_norm": 0.73046875, "learning_rate": 1.520210718043841e-05, "loss": 1.355, "step": 3811 }, { "epoch": 0.657638229966359, "grad_norm": 0.61328125, "learning_rate": 1.5199782399886633e-05, "loss": 1.4093, "step": 3812 }, { "epoch": 0.657810747865091, "grad_norm": 0.72265625, "learning_rate": 1.519745723410249e-05, "loss": 1.4952, "step": 3813 }, { "epoch": 0.657983265763823, "grad_norm": 0.90625, "learning_rate": 1.5195131683258235e-05, "loss": 1.5056, "step": 3814 }, { "epoch": 0.658155783662555, "grad_norm": 0.8203125, "learning_rate": 1.5192805747526168e-05, "loss": 1.4134, "step": 3815 }, { "epoch": 0.658328301561287, "grad_norm": 0.64453125, "learning_rate": 1.5190479427078602e-05, "loss": 1.5335, "step": 3816 }, { "epoch": 0.6585008194600189, "grad_norm": 0.85546875, "learning_rate": 1.518815272208789e-05, "loss": 1.4615, "step": 3817 }, { "epoch": 0.658673337358751, "grad_norm": 0.69921875, "learning_rate": 1.5185825632726405e-05, "loss": 1.3429, "step": 3818 }, { "epoch": 0.658845855257483, "grad_norm": 0.60546875, "learning_rate": 1.5183498159166557e-05, "loss": 1.4464, "step": 3819 }, { "epoch": 0.6590183731562149, "grad_norm": 0.6328125, "learning_rate": 1.5181170301580776e-05, "loss": 1.5144, "step": 3820 }, { "epoch": 0.659190891054947, "grad_norm": 0.6484375, "learning_rate": 1.5178842060141526e-05, "loss": 1.4413, "step": 3821 }, { "epoch": 0.659363408953679, "grad_norm": 0.59765625, "learning_rate": 1.5176513435021297e-05, "loss": 1.4433, "step": 3822 }, { "epoch": 0.6595359268524109, "grad_norm": 0.640625, "learning_rate": 1.5174184426392609e-05, "loss": 1.5579, "step": 3823 }, { "epoch": 0.659708444751143, "grad_norm": 0.8125, "learning_rate": 1.5171855034428006e-05, "loss": 1.2912, "step": 3824 }, { "epoch": 0.659880962649875, "grad_norm": 0.6328125, "learning_rate": 1.5169525259300071e-05, "loss": 1.518, "step": 3825 }, { "epoch": 0.6600534805486069, "grad_norm": 0.62890625, "learning_rate": 1.5167195101181405e-05, "loss": 1.4642, "step": 3826 }, { "epoch": 0.6602259984473389, "grad_norm": 0.640625, "learning_rate": 1.5164864560244636e-05, "loss": 1.3841, "step": 3827 }, { "epoch": 0.6603985163460709, "grad_norm": 0.76171875, "learning_rate": 1.516253363666243e-05, "loss": 1.5328, "step": 3828 }, { "epoch": 0.6605710342448029, "grad_norm": 0.625, "learning_rate": 1.5160202330607476e-05, "loss": 1.4713, "step": 3829 }, { "epoch": 0.6607435521435349, "grad_norm": 0.71875, "learning_rate": 1.515787064225249e-05, "loss": 1.4119, "step": 3830 }, { "epoch": 0.6609160700422669, "grad_norm": 0.71484375, "learning_rate": 1.515553857177022e-05, "loss": 1.5059, "step": 3831 }, { "epoch": 0.6610885879409989, "grad_norm": 0.671875, "learning_rate": 1.5153206119333436e-05, "loss": 1.4441, "step": 3832 }, { "epoch": 0.6612611058397309, "grad_norm": 1.21875, "learning_rate": 1.5150873285114948e-05, "loss": 1.4158, "step": 3833 }, { "epoch": 0.6614336237384628, "grad_norm": 0.828125, "learning_rate": 1.5148540069287583e-05, "loss": 1.437, "step": 3834 }, { "epoch": 0.6616061416371949, "grad_norm": 0.61328125, "learning_rate": 1.5146206472024196e-05, "loss": 1.4809, "step": 3835 }, { "epoch": 0.6617786595359268, "grad_norm": 0.67578125, "learning_rate": 1.5143872493497683e-05, "loss": 1.4009, "step": 3836 }, { "epoch": 0.6619511774346588, "grad_norm": 0.6015625, "learning_rate": 1.5141538133880951e-05, "loss": 1.4531, "step": 3837 }, { "epoch": 0.6621236953333909, "grad_norm": 0.6640625, "learning_rate": 1.5139203393346953e-05, "loss": 1.4045, "step": 3838 }, { "epoch": 0.6622962132321228, "grad_norm": 1.203125, "learning_rate": 1.5136868272068653e-05, "loss": 1.4653, "step": 3839 }, { "epoch": 0.6624687311308548, "grad_norm": 0.671875, "learning_rate": 1.5134532770219054e-05, "loss": 1.5117, "step": 3840 }, { "epoch": 0.6626412490295868, "grad_norm": 0.6015625, "learning_rate": 1.513219688797119e-05, "loss": 1.4574, "step": 3841 }, { "epoch": 0.6628137669283188, "grad_norm": 0.578125, "learning_rate": 1.512986062549811e-05, "loss": 1.4325, "step": 3842 }, { "epoch": 0.6629862848270508, "grad_norm": 0.73828125, "learning_rate": 1.51275239829729e-05, "loss": 1.6238, "step": 3843 }, { "epoch": 0.6631588027257828, "grad_norm": 0.71484375, "learning_rate": 1.5125186960568678e-05, "loss": 1.4831, "step": 3844 }, { "epoch": 0.6633313206245148, "grad_norm": 0.71484375, "learning_rate": 1.5122849558458583e-05, "loss": 1.3717, "step": 3845 }, { "epoch": 0.6635038385232468, "grad_norm": 0.6171875, "learning_rate": 1.512051177681578e-05, "loss": 1.3688, "step": 3846 }, { "epoch": 0.6636763564219788, "grad_norm": 0.6953125, "learning_rate": 1.5118173615813474e-05, "loss": 1.4871, "step": 3847 }, { "epoch": 0.6638488743207108, "grad_norm": 0.6796875, "learning_rate": 1.5115835075624885e-05, "loss": 1.4088, "step": 3848 }, { "epoch": 0.6640213922194428, "grad_norm": 0.640625, "learning_rate": 1.5113496156423271e-05, "loss": 1.3805, "step": 3849 }, { "epoch": 0.6641939101181747, "grad_norm": 0.671875, "learning_rate": 1.5111156858381906e-05, "loss": 1.531, "step": 3850 }, { "epoch": 0.6643664280169067, "grad_norm": 0.7109375, "learning_rate": 1.510881718167411e-05, "loss": 1.494, "step": 3851 }, { "epoch": 0.6645389459156388, "grad_norm": 0.6015625, "learning_rate": 1.5106477126473209e-05, "loss": 1.4728, "step": 3852 }, { "epoch": 0.6647114638143707, "grad_norm": 0.6484375, "learning_rate": 1.5104136692952582e-05, "loss": 1.4782, "step": 3853 }, { "epoch": 0.6648839817131027, "grad_norm": 0.62109375, "learning_rate": 1.5101795881285614e-05, "loss": 1.4778, "step": 3854 }, { "epoch": 0.6650564996118348, "grad_norm": 0.5625, "learning_rate": 1.5099454691645731e-05, "loss": 1.4344, "step": 3855 }, { "epoch": 0.6652290175105667, "grad_norm": 0.6875, "learning_rate": 1.509711312420638e-05, "loss": 1.4528, "step": 3856 }, { "epoch": 0.6654015354092987, "grad_norm": 0.63671875, "learning_rate": 1.5094771179141043e-05, "loss": 1.4104, "step": 3857 }, { "epoch": 0.6655740533080307, "grad_norm": 0.66015625, "learning_rate": 1.5092428856623222e-05, "loss": 1.4662, "step": 3858 }, { "epoch": 0.6657465712067627, "grad_norm": 0.64453125, "learning_rate": 1.509008615682645e-05, "loss": 1.4011, "step": 3859 }, { "epoch": 0.6659190891054947, "grad_norm": 0.58203125, "learning_rate": 1.5087743079924293e-05, "loss": 1.3801, "step": 3860 }, { "epoch": 0.6660916070042266, "grad_norm": 0.7109375, "learning_rate": 1.5085399626090343e-05, "loss": 1.478, "step": 3861 }, { "epoch": 0.6662641249029587, "grad_norm": 0.625, "learning_rate": 1.5083055795498209e-05, "loss": 1.5616, "step": 3862 }, { "epoch": 0.6664366428016907, "grad_norm": 0.66796875, "learning_rate": 1.5080711588321544e-05, "loss": 1.4164, "step": 3863 }, { "epoch": 0.6666091607004226, "grad_norm": 0.8828125, "learning_rate": 1.5078367004734014e-05, "loss": 1.4833, "step": 3864 }, { "epoch": 0.6667816785991547, "grad_norm": 0.59375, "learning_rate": 1.507602204490933e-05, "loss": 1.4232, "step": 3865 }, { "epoch": 0.6669541964978867, "grad_norm": 0.68359375, "learning_rate": 1.5073676709021214e-05, "loss": 1.4294, "step": 3866 }, { "epoch": 0.6671267143966186, "grad_norm": 0.60546875, "learning_rate": 1.5071330997243427e-05, "loss": 1.4871, "step": 3867 }, { "epoch": 0.6672992322953506, "grad_norm": 0.59375, "learning_rate": 1.506898490974975e-05, "loss": 1.3962, "step": 3868 }, { "epoch": 0.6674717501940827, "grad_norm": 0.68359375, "learning_rate": 1.5066638446714002e-05, "loss": 1.452, "step": 3869 }, { "epoch": 0.6676442680928146, "grad_norm": 0.6640625, "learning_rate": 1.5064291608310017e-05, "loss": 1.5522, "step": 3870 }, { "epoch": 0.6678167859915466, "grad_norm": 0.609375, "learning_rate": 1.5061944394711669e-05, "loss": 1.5069, "step": 3871 }, { "epoch": 0.6679893038902787, "grad_norm": 0.59765625, "learning_rate": 1.505959680609285e-05, "loss": 1.4598, "step": 3872 }, { "epoch": 0.6681618217890106, "grad_norm": 0.66796875, "learning_rate": 1.5057248842627488e-05, "loss": 1.4333, "step": 3873 }, { "epoch": 0.6683343396877426, "grad_norm": 0.58984375, "learning_rate": 1.505490050448953e-05, "loss": 1.4226, "step": 3874 }, { "epoch": 0.6685068575864745, "grad_norm": 0.62890625, "learning_rate": 1.505255179185296e-05, "loss": 1.4309, "step": 3875 }, { "epoch": 0.6686793754852066, "grad_norm": 0.890625, "learning_rate": 1.5050202704891783e-05, "loss": 1.4525, "step": 3876 }, { "epoch": 0.6688518933839386, "grad_norm": 0.703125, "learning_rate": 1.5047853243780033e-05, "loss": 1.4938, "step": 3877 }, { "epoch": 0.6690244112826705, "grad_norm": 0.6328125, "learning_rate": 1.5045503408691776e-05, "loss": 1.3831, "step": 3878 }, { "epoch": 0.6691969291814026, "grad_norm": 0.5703125, "learning_rate": 1.5043153199801101e-05, "loss": 1.5183, "step": 3879 }, { "epoch": 0.6693694470801346, "grad_norm": 0.6171875, "learning_rate": 1.5040802617282124e-05, "loss": 1.4216, "step": 3880 }, { "epoch": 0.6695419649788665, "grad_norm": 0.71875, "learning_rate": 1.5038451661308994e-05, "loss": 1.3953, "step": 3881 }, { "epoch": 0.6697144828775986, "grad_norm": 0.69921875, "learning_rate": 1.5036100332055884e-05, "loss": 1.5554, "step": 3882 }, { "epoch": 0.6698870007763306, "grad_norm": 0.7109375, "learning_rate": 1.5033748629696994e-05, "loss": 1.4274, "step": 3883 }, { "epoch": 0.6700595186750625, "grad_norm": 0.6328125, "learning_rate": 1.5031396554406548e-05, "loss": 1.4097, "step": 3884 }, { "epoch": 0.6702320365737945, "grad_norm": 0.6796875, "learning_rate": 1.5029044106358815e-05, "loss": 1.4944, "step": 3885 }, { "epoch": 0.6704045544725266, "grad_norm": 0.7421875, "learning_rate": 1.5026691285728067e-05, "loss": 1.4382, "step": 3886 }, { "epoch": 0.6705770723712585, "grad_norm": 0.65234375, "learning_rate": 1.5024338092688622e-05, "loss": 1.3788, "step": 3887 }, { "epoch": 0.6707495902699905, "grad_norm": 0.59765625, "learning_rate": 1.502198452741482e-05, "loss": 1.4075, "step": 3888 }, { "epoch": 0.6709221081687226, "grad_norm": 0.70703125, "learning_rate": 1.5019630590081025e-05, "loss": 1.5182, "step": 3889 }, { "epoch": 0.6710946260674545, "grad_norm": 0.640625, "learning_rate": 1.5017276280861628e-05, "loss": 1.5196, "step": 3890 }, { "epoch": 0.6712671439661865, "grad_norm": 0.6015625, "learning_rate": 1.5014921599931056e-05, "loss": 1.3461, "step": 3891 }, { "epoch": 0.6714396618649184, "grad_norm": 0.59765625, "learning_rate": 1.5012566547463758e-05, "loss": 1.4824, "step": 3892 }, { "epoch": 0.6716121797636505, "grad_norm": 0.6484375, "learning_rate": 1.501021112363421e-05, "loss": 1.5494, "step": 3893 }, { "epoch": 0.6717846976623825, "grad_norm": 0.6640625, "learning_rate": 1.5007855328616917e-05, "loss": 1.4709, "step": 3894 }, { "epoch": 0.6719572155611144, "grad_norm": 0.6171875, "learning_rate": 1.500549916258641e-05, "loss": 1.3718, "step": 3895 }, { "epoch": 0.6721297334598465, "grad_norm": 0.59765625, "learning_rate": 1.500314262571725e-05, "loss": 1.347, "step": 3896 }, { "epoch": 0.6723022513585785, "grad_norm": 0.75, "learning_rate": 1.5000785718184026e-05, "loss": 1.525, "step": 3897 }, { "epoch": 0.6724747692573104, "grad_norm": 0.7421875, "learning_rate": 1.4998428440161345e-05, "loss": 1.4657, "step": 3898 }, { "epoch": 0.6726472871560425, "grad_norm": 0.6328125, "learning_rate": 1.4996070791823856e-05, "loss": 1.4022, "step": 3899 }, { "epoch": 0.6728198050547745, "grad_norm": 0.69140625, "learning_rate": 1.4993712773346225e-05, "loss": 1.3463, "step": 3900 }, { "epoch": 0.6728198050547745, "eval_loss": 1.4253987073898315, "eval_runtime": 10.893, "eval_samples_per_second": 94.005, "eval_steps_per_second": 23.501, "step": 3900 }, { "epoch": 0.6729923229535064, "grad_norm": 1.6171875, "learning_rate": 1.4991354384903148e-05, "loss": 1.4465, "step": 3901 }, { "epoch": 0.6731648408522384, "grad_norm": 0.66796875, "learning_rate": 1.4988995626669352e-05, "loss": 1.4454, "step": 3902 }, { "epoch": 0.6733373587509704, "grad_norm": 0.66796875, "learning_rate": 1.4986636498819586e-05, "loss": 1.3895, "step": 3903 }, { "epoch": 0.6735098766497024, "grad_norm": 0.66015625, "learning_rate": 1.4984277001528634e-05, "loss": 1.4343, "step": 3904 }, { "epoch": 0.6736823945484344, "grad_norm": 0.609375, "learning_rate": 1.4981917134971298e-05, "loss": 1.5163, "step": 3905 }, { "epoch": 0.6738549124471664, "grad_norm": 0.7578125, "learning_rate": 1.497955689932241e-05, "loss": 1.4312, "step": 3906 }, { "epoch": 0.6740274303458984, "grad_norm": 0.75, "learning_rate": 1.4977196294756832e-05, "loss": 1.4852, "step": 3907 }, { "epoch": 0.6741999482446304, "grad_norm": 0.62109375, "learning_rate": 1.4974835321449454e-05, "loss": 1.3581, "step": 3908 }, { "epoch": 0.6743724661433623, "grad_norm": 0.68359375, "learning_rate": 1.4972473979575195e-05, "loss": 1.4458, "step": 3909 }, { "epoch": 0.6745449840420944, "grad_norm": 0.640625, "learning_rate": 1.4970112269308991e-05, "loss": 1.5073, "step": 3910 }, { "epoch": 0.6747175019408264, "grad_norm": 0.6875, "learning_rate": 1.4967750190825816e-05, "loss": 1.4186, "step": 3911 }, { "epoch": 0.6748900198395583, "grad_norm": 1.078125, "learning_rate": 1.4965387744300665e-05, "loss": 1.498, "step": 3912 }, { "epoch": 0.6750625377382904, "grad_norm": 0.640625, "learning_rate": 1.4963024929908568e-05, "loss": 1.541, "step": 3913 }, { "epoch": 0.6752350556370224, "grad_norm": 0.5625, "learning_rate": 1.4960661747824575e-05, "loss": 1.451, "step": 3914 }, { "epoch": 0.6754075735357543, "grad_norm": 0.6796875, "learning_rate": 1.495829819822376e-05, "loss": 1.4844, "step": 3915 }, { "epoch": 0.6755800914344863, "grad_norm": 0.64453125, "learning_rate": 1.4955934281281234e-05, "loss": 1.3985, "step": 3916 }, { "epoch": 0.6757526093332183, "grad_norm": 0.66015625, "learning_rate": 1.4953569997172133e-05, "loss": 1.4535, "step": 3917 }, { "epoch": 0.6759251272319503, "grad_norm": 0.6953125, "learning_rate": 1.4951205346071613e-05, "loss": 1.4962, "step": 3918 }, { "epoch": 0.6760976451306823, "grad_norm": 0.6328125, "learning_rate": 1.4948840328154863e-05, "loss": 1.4271, "step": 3919 }, { "epoch": 0.6762701630294143, "grad_norm": 0.65625, "learning_rate": 1.4946474943597103e-05, "loss": 1.3688, "step": 3920 }, { "epoch": 0.6764426809281463, "grad_norm": 0.67578125, "learning_rate": 1.4944109192573572e-05, "loss": 1.4197, "step": 3921 }, { "epoch": 0.6766151988268783, "grad_norm": 0.578125, "learning_rate": 1.4941743075259539e-05, "loss": 1.4688, "step": 3922 }, { "epoch": 0.6767877167256103, "grad_norm": 0.6640625, "learning_rate": 1.4939376591830301e-05, "loss": 1.4973, "step": 3923 }, { "epoch": 0.6769602346243423, "grad_norm": 0.60546875, "learning_rate": 1.4937009742461181e-05, "loss": 1.3811, "step": 3924 }, { "epoch": 0.6771327525230743, "grad_norm": 0.58984375, "learning_rate": 1.4934642527327537e-05, "loss": 1.477, "step": 3925 }, { "epoch": 0.6773052704218062, "grad_norm": 0.58203125, "learning_rate": 1.4932274946604736e-05, "loss": 1.3482, "step": 3926 }, { "epoch": 0.6774777883205383, "grad_norm": 0.609375, "learning_rate": 1.492990700046819e-05, "loss": 1.4613, "step": 3927 }, { "epoch": 0.6776503062192702, "grad_norm": 0.625, "learning_rate": 1.492753868909333e-05, "loss": 1.4796, "step": 3928 }, { "epoch": 0.6778228241180022, "grad_norm": 0.5859375, "learning_rate": 1.4925170012655614e-05, "loss": 1.4654, "step": 3929 }, { "epoch": 0.6779953420167343, "grad_norm": 0.59765625, "learning_rate": 1.4922800971330533e-05, "loss": 1.4711, "step": 3930 }, { "epoch": 0.6781678599154662, "grad_norm": 0.63671875, "learning_rate": 1.4920431565293596e-05, "loss": 1.5027, "step": 3931 }, { "epoch": 0.6783403778141982, "grad_norm": 0.59765625, "learning_rate": 1.4918061794720342e-05, "loss": 1.4228, "step": 3932 }, { "epoch": 0.6785128957129302, "grad_norm": 0.58203125, "learning_rate": 1.4915691659786342e-05, "loss": 1.4788, "step": 3933 }, { "epoch": 0.6786854136116622, "grad_norm": 0.7109375, "learning_rate": 1.4913321160667189e-05, "loss": 1.5247, "step": 3934 }, { "epoch": 0.6788579315103942, "grad_norm": 0.734375, "learning_rate": 1.4910950297538505e-05, "loss": 1.4611, "step": 3935 }, { "epoch": 0.6790304494091262, "grad_norm": 0.625, "learning_rate": 1.4908579070575936e-05, "loss": 1.5271, "step": 3936 }, { "epoch": 0.6792029673078582, "grad_norm": 0.796875, "learning_rate": 1.4906207479955164e-05, "loss": 1.4239, "step": 3937 }, { "epoch": 0.6793754852065902, "grad_norm": 0.921875, "learning_rate": 1.4903835525851884e-05, "loss": 1.413, "step": 3938 }, { "epoch": 0.6795480031053222, "grad_norm": 0.69140625, "learning_rate": 1.4901463208441827e-05, "loss": 1.4711, "step": 3939 }, { "epoch": 0.6797205210040542, "grad_norm": 0.9609375, "learning_rate": 1.489909052790075e-05, "loss": 1.4645, "step": 3940 }, { "epoch": 0.6798930389027862, "grad_norm": 0.75, "learning_rate": 1.4896717484404437e-05, "loss": 1.4735, "step": 3941 }, { "epoch": 0.6800655568015181, "grad_norm": 0.65234375, "learning_rate": 1.4894344078128696e-05, "loss": 1.4367, "step": 3942 }, { "epoch": 0.6802380747002501, "grad_norm": 0.66796875, "learning_rate": 1.4891970309249361e-05, "loss": 1.5428, "step": 3943 }, { "epoch": 0.6804105925989822, "grad_norm": 0.796875, "learning_rate": 1.4889596177942303e-05, "loss": 1.3409, "step": 3944 }, { "epoch": 0.6805831104977141, "grad_norm": 0.6875, "learning_rate": 1.4887221684383407e-05, "loss": 1.5084, "step": 3945 }, { "epoch": 0.6807556283964461, "grad_norm": 0.578125, "learning_rate": 1.4884846828748594e-05, "loss": 1.4532, "step": 3946 }, { "epoch": 0.6809281462951782, "grad_norm": 0.66796875, "learning_rate": 1.4882471611213802e-05, "loss": 1.4367, "step": 3947 }, { "epoch": 0.6811006641939101, "grad_norm": 0.8359375, "learning_rate": 1.488009603195501e-05, "loss": 1.4749, "step": 3948 }, { "epoch": 0.6812731820926421, "grad_norm": 0.6875, "learning_rate": 1.4877720091148209e-05, "loss": 1.4333, "step": 3949 }, { "epoch": 0.681445699991374, "grad_norm": 0.64453125, "learning_rate": 1.4875343788969426e-05, "loss": 1.3904, "step": 3950 }, { "epoch": 0.6816182178901061, "grad_norm": 0.609375, "learning_rate": 1.4872967125594713e-05, "loss": 1.4519, "step": 3951 }, { "epoch": 0.6817907357888381, "grad_norm": 0.58203125, "learning_rate": 1.4870590101200148e-05, "loss": 1.4636, "step": 3952 }, { "epoch": 0.68196325368757, "grad_norm": 0.73828125, "learning_rate": 1.4868212715961838e-05, "loss": 1.5242, "step": 3953 }, { "epoch": 0.6821357715863021, "grad_norm": 0.66015625, "learning_rate": 1.486583497005591e-05, "loss": 1.4555, "step": 3954 }, { "epoch": 0.6823082894850341, "grad_norm": 0.609375, "learning_rate": 1.4863456863658522e-05, "loss": 1.5198, "step": 3955 }, { "epoch": 0.682480807383766, "grad_norm": 0.6640625, "learning_rate": 1.4861078396945865e-05, "loss": 1.3825, "step": 3956 }, { "epoch": 0.6826533252824981, "grad_norm": 0.66796875, "learning_rate": 1.4858699570094144e-05, "loss": 1.3163, "step": 3957 }, { "epoch": 0.6828258431812301, "grad_norm": 0.7265625, "learning_rate": 1.4856320383279603e-05, "loss": 1.3736, "step": 3958 }, { "epoch": 0.682998361079962, "grad_norm": 0.65625, "learning_rate": 1.4853940836678504e-05, "loss": 1.361, "step": 3959 }, { "epoch": 0.683170878978694, "grad_norm": 0.734375, "learning_rate": 1.4851560930467137e-05, "loss": 1.4556, "step": 3960 }, { "epoch": 0.6833433968774261, "grad_norm": 0.73828125, "learning_rate": 1.4849180664821822e-05, "loss": 1.5069, "step": 3961 }, { "epoch": 0.683515914776158, "grad_norm": 0.60546875, "learning_rate": 1.4846800039918908e-05, "loss": 1.4745, "step": 3962 }, { "epoch": 0.68368843267489, "grad_norm": 0.77734375, "learning_rate": 1.4844419055934761e-05, "loss": 1.415, "step": 3963 }, { "epoch": 0.6838609505736221, "grad_norm": 0.6015625, "learning_rate": 1.4842037713045778e-05, "loss": 1.3378, "step": 3964 }, { "epoch": 0.684033468472354, "grad_norm": 0.66796875, "learning_rate": 1.483965601142839e-05, "loss": 1.5044, "step": 3965 }, { "epoch": 0.684205986371086, "grad_norm": 0.64453125, "learning_rate": 1.4837273951259044e-05, "loss": 1.4319, "step": 3966 }, { "epoch": 0.6843785042698179, "grad_norm": 0.67578125, "learning_rate": 1.4834891532714218e-05, "loss": 1.3895, "step": 3967 }, { "epoch": 0.68455102216855, "grad_norm": 0.6484375, "learning_rate": 1.4832508755970418e-05, "loss": 1.3703, "step": 3968 }, { "epoch": 0.684723540067282, "grad_norm": 0.5859375, "learning_rate": 1.4830125621204177e-05, "loss": 1.4502, "step": 3969 }, { "epoch": 0.6848960579660139, "grad_norm": 0.57421875, "learning_rate": 1.4827742128592046e-05, "loss": 1.4776, "step": 3970 }, { "epoch": 0.685068575864746, "grad_norm": 0.6328125, "learning_rate": 1.4825358278310615e-05, "loss": 1.4527, "step": 3971 }, { "epoch": 0.685241093763478, "grad_norm": 0.63671875, "learning_rate": 1.4822974070536493e-05, "loss": 1.4456, "step": 3972 }, { "epoch": 0.6854136116622099, "grad_norm": 0.61328125, "learning_rate": 1.4820589505446316e-05, "loss": 1.4968, "step": 3973 }, { "epoch": 0.6855861295609419, "grad_norm": 0.60546875, "learning_rate": 1.4818204583216749e-05, "loss": 1.4718, "step": 3974 }, { "epoch": 0.685758647459674, "grad_norm": 0.66796875, "learning_rate": 1.4815819304024482e-05, "loss": 1.4568, "step": 3975 }, { "epoch": 0.6859311653584059, "grad_norm": 0.63671875, "learning_rate": 1.4813433668046227e-05, "loss": 1.4359, "step": 3976 }, { "epoch": 0.6861036832571379, "grad_norm": 0.640625, "learning_rate": 1.4811047675458729e-05, "loss": 1.4681, "step": 3977 }, { "epoch": 0.68627620115587, "grad_norm": 0.59765625, "learning_rate": 1.4808661326438765e-05, "loss": 1.4685, "step": 3978 }, { "epoch": 0.6864487190546019, "grad_norm": 0.75390625, "learning_rate": 1.480627462116312e-05, "loss": 1.4094, "step": 3979 }, { "epoch": 0.6866212369533339, "grad_norm": 0.5859375, "learning_rate": 1.4803887559808618e-05, "loss": 1.4911, "step": 3980 }, { "epoch": 0.686793754852066, "grad_norm": 0.63671875, "learning_rate": 1.4801500142552112e-05, "loss": 1.4285, "step": 3981 }, { "epoch": 0.6869662727507979, "grad_norm": 0.6640625, "learning_rate": 1.4799112369570475e-05, "loss": 1.4582, "step": 3982 }, { "epoch": 0.6871387906495299, "grad_norm": 0.703125, "learning_rate": 1.4796724241040604e-05, "loss": 1.4502, "step": 3983 }, { "epoch": 0.6873113085482618, "grad_norm": 0.62109375, "learning_rate": 1.479433575713943e-05, "loss": 1.5991, "step": 3984 }, { "epoch": 0.6874838264469939, "grad_norm": 0.7109375, "learning_rate": 1.4791946918043911e-05, "loss": 1.5316, "step": 3985 }, { "epoch": 0.6876563443457259, "grad_norm": 0.6015625, "learning_rate": 1.478955772393102e-05, "loss": 1.4841, "step": 3986 }, { "epoch": 0.6878288622444578, "grad_norm": 0.60546875, "learning_rate": 1.4787168174977768e-05, "loss": 1.4967, "step": 3987 }, { "epoch": 0.6880013801431899, "grad_norm": 0.6484375, "learning_rate": 1.4784778271361185e-05, "loss": 1.4086, "step": 3988 }, { "epoch": 0.6881738980419219, "grad_norm": 0.671875, "learning_rate": 1.478238801325833e-05, "loss": 1.4017, "step": 3989 }, { "epoch": 0.6883464159406538, "grad_norm": 0.7109375, "learning_rate": 1.4779997400846292e-05, "loss": 1.3941, "step": 3990 }, { "epoch": 0.6885189338393858, "grad_norm": 0.9296875, "learning_rate": 1.4777606434302176e-05, "loss": 1.3313, "step": 3991 }, { "epoch": 0.6886914517381179, "grad_norm": 0.8359375, "learning_rate": 1.4775215113803127e-05, "loss": 1.3779, "step": 3992 }, { "epoch": 0.6888639696368498, "grad_norm": 0.703125, "learning_rate": 1.4772823439526302e-05, "loss": 1.4789, "step": 3993 }, { "epoch": 0.6890364875355818, "grad_norm": 0.7265625, "learning_rate": 1.4770431411648898e-05, "loss": 1.5234, "step": 3994 }, { "epoch": 0.6892090054343138, "grad_norm": 0.80078125, "learning_rate": 1.4768039030348127e-05, "loss": 1.5431, "step": 3995 }, { "epoch": 0.6893815233330458, "grad_norm": 0.609375, "learning_rate": 1.4765646295801231e-05, "loss": 1.395, "step": 3996 }, { "epoch": 0.6895540412317778, "grad_norm": 0.66015625, "learning_rate": 1.4763253208185482e-05, "loss": 1.391, "step": 3997 }, { "epoch": 0.6897265591305098, "grad_norm": 0.8046875, "learning_rate": 1.476085976767817e-05, "loss": 1.4651, "step": 3998 }, { "epoch": 0.6898990770292418, "grad_norm": 0.64453125, "learning_rate": 1.4758465974456622e-05, "loss": 1.5271, "step": 3999 }, { "epoch": 0.6900715949279738, "grad_norm": 0.8515625, "learning_rate": 1.4756071828698182e-05, "loss": 1.4793, "step": 4000 }, { "epoch": 0.6900715949279738, "eval_loss": 1.4241275787353516, "eval_runtime": 10.9175, "eval_samples_per_second": 93.794, "eval_steps_per_second": 23.449, "step": 4000 }, { "epoch": 0.6902441128267057, "grad_norm": 0.640625, "learning_rate": 1.4753677330580223e-05, "loss": 1.4217, "step": 4001 }, { "epoch": 0.6904166307254378, "grad_norm": 0.6796875, "learning_rate": 1.4751282480280147e-05, "loss": 1.4645, "step": 4002 }, { "epoch": 0.6905891486241698, "grad_norm": 0.578125, "learning_rate": 1.4748887277975376e-05, "loss": 1.4786, "step": 4003 }, { "epoch": 0.6907616665229017, "grad_norm": 0.6953125, "learning_rate": 1.4746491723843364e-05, "loss": 1.4642, "step": 4004 }, { "epoch": 0.6909341844216338, "grad_norm": 0.59765625, "learning_rate": 1.4744095818061586e-05, "loss": 1.4822, "step": 4005 }, { "epoch": 0.6911067023203658, "grad_norm": 0.6640625, "learning_rate": 1.474169956080755e-05, "loss": 1.4705, "step": 4006 }, { "epoch": 0.6912792202190977, "grad_norm": 0.890625, "learning_rate": 1.473930295225878e-05, "loss": 1.512, "step": 4007 }, { "epoch": 0.6914517381178297, "grad_norm": 0.59375, "learning_rate": 1.4736905992592837e-05, "loss": 1.5389, "step": 4008 }, { "epoch": 0.6916242560165617, "grad_norm": 0.5703125, "learning_rate": 1.4734508681987296e-05, "loss": 1.306, "step": 4009 }, { "epoch": 0.6917967739152937, "grad_norm": 0.625, "learning_rate": 1.4732111020619775e-05, "loss": 1.4237, "step": 4010 }, { "epoch": 0.6919692918140257, "grad_norm": 0.78515625, "learning_rate": 1.4729713008667901e-05, "loss": 1.4676, "step": 4011 }, { "epoch": 0.6921418097127577, "grad_norm": 0.625, "learning_rate": 1.4727314646309333e-05, "loss": 1.5024, "step": 4012 }, { "epoch": 0.6923143276114897, "grad_norm": 0.859375, "learning_rate": 1.4724915933721758e-05, "loss": 1.3662, "step": 4013 }, { "epoch": 0.6924868455102217, "grad_norm": 0.63671875, "learning_rate": 1.4722516871082886e-05, "loss": 1.4298, "step": 4014 }, { "epoch": 0.6926593634089537, "grad_norm": 0.6953125, "learning_rate": 1.472011745857046e-05, "loss": 1.3987, "step": 4015 }, { "epoch": 0.6928318813076857, "grad_norm": 0.70703125, "learning_rate": 1.471771769636224e-05, "loss": 1.4815, "step": 4016 }, { "epoch": 0.6930043992064177, "grad_norm": 3.40625, "learning_rate": 1.4715317584636012e-05, "loss": 1.524, "step": 4017 }, { "epoch": 0.6931769171051496, "grad_norm": 0.76171875, "learning_rate": 1.47129171235696e-05, "loss": 1.4881, "step": 4018 }, { "epoch": 0.6933494350038817, "grad_norm": 0.65234375, "learning_rate": 1.4710516313340836e-05, "loss": 1.5024, "step": 4019 }, { "epoch": 0.6935219529026136, "grad_norm": 0.94140625, "learning_rate": 1.470811515412759e-05, "loss": 1.4709, "step": 4020 }, { "epoch": 0.6936944708013456, "grad_norm": 0.6953125, "learning_rate": 1.470571364610776e-05, "loss": 1.4479, "step": 4021 }, { "epoch": 0.6938669887000777, "grad_norm": 0.75, "learning_rate": 1.4703311789459261e-05, "loss": 1.4529, "step": 4022 }, { "epoch": 0.6940395065988096, "grad_norm": 0.61328125, "learning_rate": 1.4700909584360032e-05, "loss": 1.3779, "step": 4023 }, { "epoch": 0.6942120244975416, "grad_norm": 0.7578125, "learning_rate": 1.4698507030988049e-05, "loss": 1.482, "step": 4024 }, { "epoch": 0.6943845423962736, "grad_norm": 0.734375, "learning_rate": 1.469610412952131e-05, "loss": 1.5018, "step": 4025 }, { "epoch": 0.6945570602950056, "grad_norm": 0.6328125, "learning_rate": 1.4693700880137835e-05, "loss": 1.4349, "step": 4026 }, { "epoch": 0.6947295781937376, "grad_norm": 0.85546875, "learning_rate": 1.4691297283015669e-05, "loss": 1.4857, "step": 4027 }, { "epoch": 0.6949020960924696, "grad_norm": 0.83203125, "learning_rate": 1.4688893338332888e-05, "loss": 1.4458, "step": 4028 }, { "epoch": 0.6950746139912016, "grad_norm": 0.5625, "learning_rate": 1.4686489046267591e-05, "loss": 1.3835, "step": 4029 }, { "epoch": 0.6952471318899336, "grad_norm": 0.6875, "learning_rate": 1.4684084406997903e-05, "loss": 1.4679, "step": 4030 }, { "epoch": 0.6954196497886656, "grad_norm": 0.71484375, "learning_rate": 1.4681679420701972e-05, "loss": 1.4534, "step": 4031 }, { "epoch": 0.6955921676873976, "grad_norm": 0.82421875, "learning_rate": 1.4679274087557981e-05, "loss": 1.4716, "step": 4032 }, { "epoch": 0.6957646855861296, "grad_norm": 0.60546875, "learning_rate": 1.4676868407744126e-05, "loss": 1.4279, "step": 4033 }, { "epoch": 0.6959372034848615, "grad_norm": 0.94921875, "learning_rate": 1.467446238143864e-05, "loss": 1.3526, "step": 4034 }, { "epoch": 0.6961097213835935, "grad_norm": 0.7265625, "learning_rate": 1.467205600881977e-05, "loss": 1.4274, "step": 4035 }, { "epoch": 0.6962822392823256, "grad_norm": 0.5859375, "learning_rate": 1.4669649290065801e-05, "loss": 1.3703, "step": 4036 }, { "epoch": 0.6964547571810575, "grad_norm": 0.70703125, "learning_rate": 1.4667242225355034e-05, "loss": 1.4281, "step": 4037 }, { "epoch": 0.6966272750797895, "grad_norm": 0.66796875, "learning_rate": 1.4664834814865802e-05, "loss": 1.5386, "step": 4038 }, { "epoch": 0.6967997929785216, "grad_norm": 0.59375, "learning_rate": 1.4662427058776459e-05, "loss": 1.5138, "step": 4039 }, { "epoch": 0.6969723108772535, "grad_norm": 0.55859375, "learning_rate": 1.4660018957265386e-05, "loss": 1.4275, "step": 4040 }, { "epoch": 0.6971448287759855, "grad_norm": 0.75390625, "learning_rate": 1.4657610510510992e-05, "loss": 1.415, "step": 4041 }, { "epoch": 0.6973173466747175, "grad_norm": 0.6328125, "learning_rate": 1.4655201718691712e-05, "loss": 1.5508, "step": 4042 }, { "epoch": 0.6974898645734495, "grad_norm": 0.66796875, "learning_rate": 1.4652792581985997e-05, "loss": 1.4285, "step": 4043 }, { "epoch": 0.6976623824721815, "grad_norm": 0.640625, "learning_rate": 1.4650383100572338e-05, "loss": 1.4697, "step": 4044 }, { "epoch": 0.6978349003709134, "grad_norm": 0.65625, "learning_rate": 1.464797327462924e-05, "loss": 1.4522, "step": 4045 }, { "epoch": 0.6980074182696455, "grad_norm": 0.60546875, "learning_rate": 1.4645563104335243e-05, "loss": 1.5441, "step": 4046 }, { "epoch": 0.6981799361683775, "grad_norm": 0.6484375, "learning_rate": 1.4643152589868904e-05, "loss": 1.499, "step": 4047 }, { "epoch": 0.6983524540671094, "grad_norm": 0.7734375, "learning_rate": 1.4640741731408805e-05, "loss": 1.4008, "step": 4048 }, { "epoch": 0.6985249719658414, "grad_norm": 0.69140625, "learning_rate": 1.4638330529133566e-05, "loss": 1.3955, "step": 4049 }, { "epoch": 0.6986974898645735, "grad_norm": 0.7890625, "learning_rate": 1.4635918983221823e-05, "loss": 1.4477, "step": 4050 }, { "epoch": 0.6988700077633054, "grad_norm": 0.73046875, "learning_rate": 1.4633507093852229e-05, "loss": 1.5475, "step": 4051 }, { "epoch": 0.6990425256620374, "grad_norm": 0.57421875, "learning_rate": 1.4631094861203478e-05, "loss": 1.3691, "step": 4052 }, { "epoch": 0.6992150435607695, "grad_norm": 0.78125, "learning_rate": 1.4628682285454288e-05, "loss": 1.4156, "step": 4053 }, { "epoch": 0.6993875614595014, "grad_norm": 0.65234375, "learning_rate": 1.462626936678339e-05, "loss": 1.4583, "step": 4054 }, { "epoch": 0.6995600793582334, "grad_norm": 0.68359375, "learning_rate": 1.4623856105369552e-05, "loss": 1.4267, "step": 4055 }, { "epoch": 0.6997325972569655, "grad_norm": 0.58984375, "learning_rate": 1.462144250139156e-05, "loss": 1.4379, "step": 4056 }, { "epoch": 0.6999051151556974, "grad_norm": 0.734375, "learning_rate": 1.4619028555028234e-05, "loss": 1.4115, "step": 4057 }, { "epoch": 0.7000776330544294, "grad_norm": 0.7421875, "learning_rate": 1.4616614266458413e-05, "loss": 1.4709, "step": 4058 }, { "epoch": 0.7002501509531613, "grad_norm": 0.7578125, "learning_rate": 1.4614199635860958e-05, "loss": 1.4998, "step": 4059 }, { "epoch": 0.7004226688518934, "grad_norm": 0.6953125, "learning_rate": 1.4611784663414765e-05, "loss": 1.5, "step": 4060 }, { "epoch": 0.7005951867506254, "grad_norm": 0.73828125, "learning_rate": 1.4609369349298745e-05, "loss": 1.4265, "step": 4061 }, { "epoch": 0.7007677046493573, "grad_norm": 0.69140625, "learning_rate": 1.4606953693691848e-05, "loss": 1.4047, "step": 4062 }, { "epoch": 0.7009402225480894, "grad_norm": 0.66796875, "learning_rate": 1.4604537696773031e-05, "loss": 1.4074, "step": 4063 }, { "epoch": 0.7011127404468214, "grad_norm": 0.7109375, "learning_rate": 1.4602121358721295e-05, "loss": 1.5996, "step": 4064 }, { "epoch": 0.7012852583455533, "grad_norm": 0.76171875, "learning_rate": 1.4599704679715649e-05, "loss": 1.448, "step": 4065 }, { "epoch": 0.7014577762442853, "grad_norm": 0.62890625, "learning_rate": 1.4597287659935146e-05, "loss": 1.46, "step": 4066 }, { "epoch": 0.7016302941430174, "grad_norm": 0.6015625, "learning_rate": 1.4594870299558842e-05, "loss": 1.5046, "step": 4067 }, { "epoch": 0.7018028120417493, "grad_norm": 0.66796875, "learning_rate": 1.459245259876584e-05, "loss": 1.5037, "step": 4068 }, { "epoch": 0.7019753299404813, "grad_norm": 0.7265625, "learning_rate": 1.4590034557735253e-05, "loss": 1.3754, "step": 4069 }, { "epoch": 0.7021478478392134, "grad_norm": 0.66796875, "learning_rate": 1.4587616176646229e-05, "loss": 1.3533, "step": 4070 }, { "epoch": 0.7023203657379453, "grad_norm": 0.65234375, "learning_rate": 1.458519745567793e-05, "loss": 1.4342, "step": 4071 }, { "epoch": 0.7024928836366773, "grad_norm": 0.7265625, "learning_rate": 1.4582778395009556e-05, "loss": 1.408, "step": 4072 }, { "epoch": 0.7026654015354094, "grad_norm": 0.5625, "learning_rate": 1.4580358994820322e-05, "loss": 1.423, "step": 4073 }, { "epoch": 0.7028379194341413, "grad_norm": 0.69921875, "learning_rate": 1.4577939255289477e-05, "loss": 1.4098, "step": 4074 }, { "epoch": 0.7030104373328733, "grad_norm": 0.75390625, "learning_rate": 1.4575519176596286e-05, "loss": 1.4431, "step": 4075 }, { "epoch": 0.7031829552316052, "grad_norm": 0.64453125, "learning_rate": 1.4573098758920046e-05, "loss": 1.408, "step": 4076 }, { "epoch": 0.7033554731303373, "grad_norm": 0.75390625, "learning_rate": 1.457067800244007e-05, "loss": 1.4398, "step": 4077 }, { "epoch": 0.7035279910290693, "grad_norm": 0.7734375, "learning_rate": 1.4568256907335717e-05, "loss": 1.4929, "step": 4078 }, { "epoch": 0.7037005089278012, "grad_norm": 1.03125, "learning_rate": 1.4565835473786345e-05, "loss": 1.4999, "step": 4079 }, { "epoch": 0.7038730268265333, "grad_norm": 0.71875, "learning_rate": 1.4563413701971354e-05, "loss": 1.432, "step": 4080 }, { "epoch": 0.7040455447252653, "grad_norm": 0.62109375, "learning_rate": 1.4560991592070159e-05, "loss": 1.4448, "step": 4081 }, { "epoch": 0.7042180626239972, "grad_norm": 0.65234375, "learning_rate": 1.4558569144262213e-05, "loss": 1.3809, "step": 4082 }, { "epoch": 0.7043905805227292, "grad_norm": 0.71875, "learning_rate": 1.455614635872698e-05, "loss": 1.4151, "step": 4083 }, { "epoch": 0.7045630984214613, "grad_norm": 0.625, "learning_rate": 1.4553723235643955e-05, "loss": 1.4452, "step": 4084 }, { "epoch": 0.7047356163201932, "grad_norm": 0.671875, "learning_rate": 1.455129977519266e-05, "loss": 1.3651, "step": 4085 }, { "epoch": 0.7049081342189252, "grad_norm": 0.65234375, "learning_rate": 1.4548875977552643e-05, "loss": 1.3227, "step": 4086 }, { "epoch": 0.7050806521176572, "grad_norm": 0.609375, "learning_rate": 1.4546451842903468e-05, "loss": 1.4218, "step": 4087 }, { "epoch": 0.7052531700163892, "grad_norm": 0.7890625, "learning_rate": 1.4544027371424732e-05, "loss": 1.4026, "step": 4088 }, { "epoch": 0.7054256879151212, "grad_norm": 0.73828125, "learning_rate": 1.4541602563296058e-05, "loss": 1.3546, "step": 4089 }, { "epoch": 0.7055982058138532, "grad_norm": 0.6484375, "learning_rate": 1.453917741869709e-05, "loss": 1.4975, "step": 4090 }, { "epoch": 0.7057707237125852, "grad_norm": 0.69140625, "learning_rate": 1.4536751937807493e-05, "loss": 1.4868, "step": 4091 }, { "epoch": 0.7059432416113172, "grad_norm": 0.71875, "learning_rate": 1.4534326120806968e-05, "loss": 1.4914, "step": 4092 }, { "epoch": 0.7061157595100491, "grad_norm": 0.69140625, "learning_rate": 1.4531899967875229e-05, "loss": 1.4802, "step": 4093 }, { "epoch": 0.7062882774087812, "grad_norm": 0.6953125, "learning_rate": 1.4529473479192027e-05, "loss": 1.442, "step": 4094 }, { "epoch": 0.7064607953075132, "grad_norm": 0.84765625, "learning_rate": 1.4527046654937128e-05, "loss": 1.5061, "step": 4095 }, { "epoch": 0.7066333132062451, "grad_norm": 0.65234375, "learning_rate": 1.4524619495290324e-05, "loss": 1.3328, "step": 4096 }, { "epoch": 0.7068058311049772, "grad_norm": 0.58984375, "learning_rate": 1.4522192000431439e-05, "loss": 1.5061, "step": 4097 }, { "epoch": 0.7069783490037092, "grad_norm": 0.71875, "learning_rate": 1.4519764170540315e-05, "loss": 1.4898, "step": 4098 }, { "epoch": 0.7071508669024411, "grad_norm": 0.89453125, "learning_rate": 1.451733600579682e-05, "loss": 1.4641, "step": 4099 }, { "epoch": 0.7073233848011731, "grad_norm": 0.62109375, "learning_rate": 1.451490750638085e-05, "loss": 1.3865, "step": 4100 }, { "epoch": 0.7073233848011731, "eval_loss": 1.4227409362792969, "eval_runtime": 10.9937, "eval_samples_per_second": 93.145, "eval_steps_per_second": 23.286, "step": 4100 }, { "epoch": 0.7074959026999051, "grad_norm": 0.62890625, "learning_rate": 1.4512478672472318e-05, "loss": 1.4121, "step": 4101 }, { "epoch": 0.7076684205986371, "grad_norm": 0.76171875, "learning_rate": 1.4510049504251174e-05, "loss": 1.3894, "step": 4102 }, { "epoch": 0.7078409384973691, "grad_norm": 0.62890625, "learning_rate": 1.450762000189738e-05, "loss": 1.4696, "step": 4103 }, { "epoch": 0.7080134563961011, "grad_norm": 0.83203125, "learning_rate": 1.4505190165590932e-05, "loss": 1.4553, "step": 4104 }, { "epoch": 0.7081859742948331, "grad_norm": 0.80078125, "learning_rate": 1.4502759995511845e-05, "loss": 1.4934, "step": 4105 }, { "epoch": 0.7083584921935651, "grad_norm": 0.69140625, "learning_rate": 1.4500329491840165e-05, "loss": 1.5194, "step": 4106 }, { "epoch": 0.7085310100922971, "grad_norm": 0.64453125, "learning_rate": 1.4497898654755957e-05, "loss": 1.4484, "step": 4107 }, { "epoch": 0.7087035279910291, "grad_norm": 0.6015625, "learning_rate": 1.4495467484439312e-05, "loss": 1.5005, "step": 4108 }, { "epoch": 0.708876045889761, "grad_norm": 0.66015625, "learning_rate": 1.4493035981070347e-05, "loss": 1.4317, "step": 4109 }, { "epoch": 0.709048563788493, "grad_norm": 0.66796875, "learning_rate": 1.4490604144829204e-05, "loss": 1.4461, "step": 4110 }, { "epoch": 0.7092210816872251, "grad_norm": 0.8046875, "learning_rate": 1.4488171975896044e-05, "loss": 1.5005, "step": 4111 }, { "epoch": 0.709393599585957, "grad_norm": 0.64453125, "learning_rate": 1.4485739474451061e-05, "loss": 1.5018, "step": 4112 }, { "epoch": 0.709566117484689, "grad_norm": 0.63671875, "learning_rate": 1.4483306640674473e-05, "loss": 1.4708, "step": 4113 }, { "epoch": 0.7097386353834211, "grad_norm": 0.56640625, "learning_rate": 1.4480873474746515e-05, "loss": 1.3709, "step": 4114 }, { "epoch": 0.709911153282153, "grad_norm": 0.74609375, "learning_rate": 1.4478439976847452e-05, "loss": 1.4258, "step": 4115 }, { "epoch": 0.710083671180885, "grad_norm": 0.609375, "learning_rate": 1.4476006147157571e-05, "loss": 1.4294, "step": 4116 }, { "epoch": 0.710256189079617, "grad_norm": 0.59765625, "learning_rate": 1.4473571985857189e-05, "loss": 1.4314, "step": 4117 }, { "epoch": 0.710428706978349, "grad_norm": 0.6640625, "learning_rate": 1.4471137493126643e-05, "loss": 1.4131, "step": 4118 }, { "epoch": 0.710601224877081, "grad_norm": 0.62109375, "learning_rate": 1.4468702669146292e-05, "loss": 1.3848, "step": 4119 }, { "epoch": 0.710773742775813, "grad_norm": 0.6171875, "learning_rate": 1.4466267514096527e-05, "loss": 1.4524, "step": 4120 }, { "epoch": 0.710946260674545, "grad_norm": 0.65625, "learning_rate": 1.4463832028157758e-05, "loss": 1.5744, "step": 4121 }, { "epoch": 0.711118778573277, "grad_norm": 0.671875, "learning_rate": 1.4461396211510421e-05, "loss": 1.5232, "step": 4122 }, { "epoch": 0.711291296472009, "grad_norm": 0.61328125, "learning_rate": 1.4458960064334977e-05, "loss": 1.4307, "step": 4123 }, { "epoch": 0.7114638143707409, "grad_norm": 0.6484375, "learning_rate": 1.4456523586811911e-05, "loss": 1.4285, "step": 4124 }, { "epoch": 0.711636332269473, "grad_norm": 0.625, "learning_rate": 1.445408677912173e-05, "loss": 1.3565, "step": 4125 }, { "epoch": 0.7118088501682049, "grad_norm": 0.6328125, "learning_rate": 1.445164964144497e-05, "loss": 1.4898, "step": 4126 }, { "epoch": 0.7119813680669369, "grad_norm": 0.59765625, "learning_rate": 1.444921217396219e-05, "loss": 1.4365, "step": 4127 }, { "epoch": 0.712153885965669, "grad_norm": 0.59765625, "learning_rate": 1.4446774376853973e-05, "loss": 1.4543, "step": 4128 }, { "epoch": 0.7123264038644009, "grad_norm": 0.63671875, "learning_rate": 1.4444336250300926e-05, "loss": 1.4361, "step": 4129 }, { "epoch": 0.7124989217631329, "grad_norm": 0.859375, "learning_rate": 1.4441897794483679e-05, "loss": 1.4084, "step": 4130 }, { "epoch": 0.712671439661865, "grad_norm": 0.578125, "learning_rate": 1.443945900958289e-05, "loss": 1.4301, "step": 4131 }, { "epoch": 0.7128439575605969, "grad_norm": 0.62890625, "learning_rate": 1.4437019895779235e-05, "loss": 1.6045, "step": 4132 }, { "epoch": 0.7130164754593289, "grad_norm": 0.66796875, "learning_rate": 1.4434580453253426e-05, "loss": 1.4778, "step": 4133 }, { "epoch": 0.7131889933580609, "grad_norm": 0.6796875, "learning_rate": 1.4432140682186192e-05, "loss": 1.4129, "step": 4134 }, { "epoch": 0.7133615112567929, "grad_norm": 0.6484375, "learning_rate": 1.4429700582758276e-05, "loss": 1.4951, "step": 4135 }, { "epoch": 0.7135340291555249, "grad_norm": 1.1640625, "learning_rate": 1.4427260155150466e-05, "loss": 1.4966, "step": 4136 }, { "epoch": 0.7137065470542568, "grad_norm": 0.62890625, "learning_rate": 1.4424819399543559e-05, "loss": 1.3954, "step": 4137 }, { "epoch": 0.7138790649529889, "grad_norm": 0.58984375, "learning_rate": 1.4422378316118384e-05, "loss": 1.4021, "step": 4138 }, { "epoch": 0.7140515828517209, "grad_norm": 0.640625, "learning_rate": 1.4419936905055794e-05, "loss": 1.4526, "step": 4139 }, { "epoch": 0.7142241007504528, "grad_norm": 0.62890625, "learning_rate": 1.4417495166536659e-05, "loss": 1.3207, "step": 4140 }, { "epoch": 0.7143966186491848, "grad_norm": 0.59375, "learning_rate": 1.4415053100741879e-05, "loss": 1.4194, "step": 4141 }, { "epoch": 0.7145691365479169, "grad_norm": 0.73046875, "learning_rate": 1.4412610707852378e-05, "loss": 1.4431, "step": 4142 }, { "epoch": 0.7147416544466488, "grad_norm": 0.64453125, "learning_rate": 1.4410167988049106e-05, "loss": 1.5113, "step": 4143 }, { "epoch": 0.7149141723453808, "grad_norm": 0.82421875, "learning_rate": 1.4407724941513035e-05, "loss": 1.5396, "step": 4144 }, { "epoch": 0.7150866902441129, "grad_norm": 0.84375, "learning_rate": 1.4405281568425158e-05, "loss": 1.5188, "step": 4145 }, { "epoch": 0.7152592081428448, "grad_norm": 0.58984375, "learning_rate": 1.4402837868966498e-05, "loss": 1.3558, "step": 4146 }, { "epoch": 0.7154317260415768, "grad_norm": 0.59375, "learning_rate": 1.4400393843318097e-05, "loss": 1.4856, "step": 4147 }, { "epoch": 0.7156042439403089, "grad_norm": 0.61328125, "learning_rate": 1.4397949491661027e-05, "loss": 1.3808, "step": 4148 }, { "epoch": 0.7157767618390408, "grad_norm": 0.60546875, "learning_rate": 1.4395504814176376e-05, "loss": 1.3694, "step": 4149 }, { "epoch": 0.7159492797377728, "grad_norm": 0.6171875, "learning_rate": 1.4393059811045267e-05, "loss": 1.3789, "step": 4150 }, { "epoch": 0.7161217976365047, "grad_norm": 0.6640625, "learning_rate": 1.4390614482448837e-05, "loss": 1.4717, "step": 4151 }, { "epoch": 0.7162943155352368, "grad_norm": 0.5859375, "learning_rate": 1.4388168828568252e-05, "loss": 1.4533, "step": 4152 }, { "epoch": 0.7164668334339688, "grad_norm": 0.66015625, "learning_rate": 1.4385722849584702e-05, "loss": 1.4272, "step": 4153 }, { "epoch": 0.7166393513327007, "grad_norm": 2.0625, "learning_rate": 1.4383276545679398e-05, "loss": 1.4254, "step": 4154 }, { "epoch": 0.7168118692314328, "grad_norm": 0.6640625, "learning_rate": 1.4380829917033585e-05, "loss": 1.5077, "step": 4155 }, { "epoch": 0.7169843871301648, "grad_norm": 0.58984375, "learning_rate": 1.4378382963828515e-05, "loss": 1.3822, "step": 4156 }, { "epoch": 0.7171569050288967, "grad_norm": 0.6484375, "learning_rate": 1.4375935686245477e-05, "loss": 1.5273, "step": 4157 }, { "epoch": 0.7173294229276287, "grad_norm": 0.66015625, "learning_rate": 1.4373488084465783e-05, "loss": 1.4555, "step": 4158 }, { "epoch": 0.7175019408263608, "grad_norm": 0.60546875, "learning_rate": 1.4371040158670763e-05, "loss": 1.5219, "step": 4159 }, { "epoch": 0.7176744587250927, "grad_norm": 0.59765625, "learning_rate": 1.4368591909041778e-05, "loss": 1.4713, "step": 4160 }, { "epoch": 0.7178469766238247, "grad_norm": 0.58984375, "learning_rate": 1.4366143335760208e-05, "loss": 1.5836, "step": 4161 }, { "epoch": 0.7180194945225568, "grad_norm": 0.7890625, "learning_rate": 1.436369443900746e-05, "loss": 1.4414, "step": 4162 }, { "epoch": 0.7181920124212887, "grad_norm": 0.66796875, "learning_rate": 1.4361245218964961e-05, "loss": 1.4173, "step": 4163 }, { "epoch": 0.7183645303200207, "grad_norm": 0.625, "learning_rate": 1.4358795675814165e-05, "loss": 1.3769, "step": 4164 }, { "epoch": 0.7185370482187527, "grad_norm": 0.640625, "learning_rate": 1.4356345809736552e-05, "loss": 1.4559, "step": 4165 }, { "epoch": 0.7187095661174847, "grad_norm": 0.578125, "learning_rate": 1.4353895620913619e-05, "loss": 1.4834, "step": 4166 }, { "epoch": 0.7188820840162167, "grad_norm": 0.60546875, "learning_rate": 1.4351445109526897e-05, "loss": 1.4671, "step": 4167 }, { "epoch": 0.7190546019149486, "grad_norm": 0.609375, "learning_rate": 1.4348994275757933e-05, "loss": 1.4191, "step": 4168 }, { "epoch": 0.7192271198136807, "grad_norm": 0.64453125, "learning_rate": 1.4346543119788297e-05, "loss": 1.4187, "step": 4169 }, { "epoch": 0.7193996377124127, "grad_norm": 1.0234375, "learning_rate": 1.4344091641799587e-05, "loss": 1.4377, "step": 4170 }, { "epoch": 0.7195721556111446, "grad_norm": 0.62109375, "learning_rate": 1.434163984197343e-05, "loss": 1.512, "step": 4171 }, { "epoch": 0.7197446735098767, "grad_norm": 0.625, "learning_rate": 1.433918772049146e-05, "loss": 1.5385, "step": 4172 }, { "epoch": 0.7199171914086087, "grad_norm": 0.640625, "learning_rate": 1.4336735277535355e-05, "loss": 1.4165, "step": 4173 }, { "epoch": 0.7200897093073406, "grad_norm": 0.57421875, "learning_rate": 1.4334282513286799e-05, "loss": 1.5712, "step": 4174 }, { "epoch": 0.7202622272060726, "grad_norm": 0.62109375, "learning_rate": 1.4331829427927518e-05, "loss": 1.4407, "step": 4175 }, { "epoch": 0.7204347451048047, "grad_norm": 0.66796875, "learning_rate": 1.4329376021639244e-05, "loss": 1.5025, "step": 4176 }, { "epoch": 0.7206072630035366, "grad_norm": 0.6328125, "learning_rate": 1.4326922294603743e-05, "loss": 1.4318, "step": 4177 }, { "epoch": 0.7207797809022686, "grad_norm": 0.640625, "learning_rate": 1.4324468247002802e-05, "loss": 1.3621, "step": 4178 }, { "epoch": 0.7209522988010006, "grad_norm": 0.73046875, "learning_rate": 1.4322013879018233e-05, "loss": 1.4792, "step": 4179 }, { "epoch": 0.7211248166997326, "grad_norm": 0.60546875, "learning_rate": 1.4319559190831872e-05, "loss": 1.3502, "step": 4180 }, { "epoch": 0.7212973345984646, "grad_norm": 0.6015625, "learning_rate": 1.4317104182625573e-05, "loss": 1.5503, "step": 4181 }, { "epoch": 0.7214698524971965, "grad_norm": 0.65625, "learning_rate": 1.4314648854581225e-05, "loss": 1.429, "step": 4182 }, { "epoch": 0.7216423703959286, "grad_norm": 0.625, "learning_rate": 1.431219320688073e-05, "loss": 1.3568, "step": 4183 }, { "epoch": 0.7218148882946606, "grad_norm": 0.640625, "learning_rate": 1.4309737239706019e-05, "loss": 1.4368, "step": 4184 }, { "epoch": 0.7219874061933925, "grad_norm": 0.703125, "learning_rate": 1.4307280953239044e-05, "loss": 1.4313, "step": 4185 }, { "epoch": 0.7221599240921246, "grad_norm": 0.71484375, "learning_rate": 1.4304824347661783e-05, "loss": 1.399, "step": 4186 }, { "epoch": 0.7223324419908566, "grad_norm": 0.5859375, "learning_rate": 1.4302367423156236e-05, "loss": 1.4066, "step": 4187 }, { "epoch": 0.7225049598895885, "grad_norm": 0.64453125, "learning_rate": 1.4299910179904429e-05, "loss": 1.3609, "step": 4188 }, { "epoch": 0.7226774777883206, "grad_norm": 0.90234375, "learning_rate": 1.4297452618088407e-05, "loss": 1.4241, "step": 4189 }, { "epoch": 0.7228499956870525, "grad_norm": 0.58984375, "learning_rate": 1.4294994737890245e-05, "loss": 1.3455, "step": 4190 }, { "epoch": 0.7230225135857845, "grad_norm": 0.671875, "learning_rate": 1.4292536539492034e-05, "loss": 1.5229, "step": 4191 }, { "epoch": 0.7231950314845165, "grad_norm": 0.8984375, "learning_rate": 1.4290078023075897e-05, "loss": 1.5029, "step": 4192 }, { "epoch": 0.7233675493832485, "grad_norm": 0.703125, "learning_rate": 1.4287619188823975e-05, "loss": 1.4714, "step": 4193 }, { "epoch": 0.7235400672819805, "grad_norm": 0.75390625, "learning_rate": 1.4285160036918431e-05, "loss": 1.4747, "step": 4194 }, { "epoch": 0.7237125851807125, "grad_norm": 0.8203125, "learning_rate": 1.4282700567541461e-05, "loss": 1.4821, "step": 4195 }, { "epoch": 0.7238851030794445, "grad_norm": 0.71875, "learning_rate": 1.428024078087527e-05, "loss": 1.4791, "step": 4196 }, { "epoch": 0.7240576209781765, "grad_norm": 0.58203125, "learning_rate": 1.4277780677102098e-05, "loss": 1.3904, "step": 4197 }, { "epoch": 0.7242301388769085, "grad_norm": 0.9609375, "learning_rate": 1.4275320256404203e-05, "loss": 1.5428, "step": 4198 }, { "epoch": 0.7244026567756404, "grad_norm": 0.74609375, "learning_rate": 1.4272859518963874e-05, "loss": 1.4717, "step": 4199 }, { "epoch": 0.7245751746743725, "grad_norm": 0.66796875, "learning_rate": 1.4270398464963411e-05, "loss": 1.5452, "step": 4200 }, { "epoch": 0.7245751746743725, "eval_loss": 1.4216216802597046, "eval_runtime": 10.8689, "eval_samples_per_second": 94.213, "eval_steps_per_second": 23.553, "step": 4200 }, { "epoch": 0.7247476925731045, "grad_norm": 0.734375, "learning_rate": 1.4267937094585148e-05, "loss": 1.445, "step": 4201 }, { "epoch": 0.7249202104718364, "grad_norm": 0.71875, "learning_rate": 1.4265475408011438e-05, "loss": 1.5107, "step": 4202 }, { "epoch": 0.7250927283705685, "grad_norm": 0.671875, "learning_rate": 1.4263013405424657e-05, "loss": 1.4799, "step": 4203 }, { "epoch": 0.7252652462693004, "grad_norm": 0.62890625, "learning_rate": 1.4260551087007205e-05, "loss": 1.4503, "step": 4204 }, { "epoch": 0.7254377641680324, "grad_norm": 0.75, "learning_rate": 1.425808845294151e-05, "loss": 1.4823, "step": 4205 }, { "epoch": 0.7256102820667645, "grad_norm": 0.69921875, "learning_rate": 1.4255625503410015e-05, "loss": 1.3246, "step": 4206 }, { "epoch": 0.7257827999654964, "grad_norm": 0.578125, "learning_rate": 1.4253162238595192e-05, "loss": 1.4137, "step": 4207 }, { "epoch": 0.7259553178642284, "grad_norm": 0.578125, "learning_rate": 1.4250698658679535e-05, "loss": 1.359, "step": 4208 }, { "epoch": 0.7261278357629604, "grad_norm": 0.7109375, "learning_rate": 1.4248234763845565e-05, "loss": 1.4221, "step": 4209 }, { "epoch": 0.7263003536616924, "grad_norm": 0.59765625, "learning_rate": 1.4245770554275817e-05, "loss": 1.4439, "step": 4210 }, { "epoch": 0.7264728715604244, "grad_norm": 0.67578125, "learning_rate": 1.424330603015286e-05, "loss": 1.3822, "step": 4211 }, { "epoch": 0.7266453894591564, "grad_norm": 0.62109375, "learning_rate": 1.4240841191659276e-05, "loss": 1.4447, "step": 4212 }, { "epoch": 0.7268179073578884, "grad_norm": 0.67578125, "learning_rate": 1.423837603897768e-05, "loss": 1.4547, "step": 4213 }, { "epoch": 0.7269904252566204, "grad_norm": 0.58984375, "learning_rate": 1.4235910572290704e-05, "loss": 1.5104, "step": 4214 }, { "epoch": 0.7271629431553523, "grad_norm": 0.65234375, "learning_rate": 1.4233444791781005e-05, "loss": 1.4798, "step": 4215 }, { "epoch": 0.7273354610540843, "grad_norm": 0.61328125, "learning_rate": 1.4230978697631266e-05, "loss": 1.4985, "step": 4216 }, { "epoch": 0.7275079789528164, "grad_norm": 0.63671875, "learning_rate": 1.4228512290024185e-05, "loss": 1.3978, "step": 4217 }, { "epoch": 0.7276804968515483, "grad_norm": 0.69921875, "learning_rate": 1.4226045569142497e-05, "loss": 1.4935, "step": 4218 }, { "epoch": 0.7278530147502803, "grad_norm": 0.640625, "learning_rate": 1.4223578535168947e-05, "loss": 1.4479, "step": 4219 }, { "epoch": 0.7280255326490124, "grad_norm": 0.6484375, "learning_rate": 1.4221111188286307e-05, "loss": 1.4613, "step": 4220 }, { "epoch": 0.7281980505477443, "grad_norm": 0.59375, "learning_rate": 1.4218643528677377e-05, "loss": 1.4544, "step": 4221 }, { "epoch": 0.7283705684464763, "grad_norm": 0.60546875, "learning_rate": 1.421617555652497e-05, "loss": 1.4306, "step": 4222 }, { "epoch": 0.7285430863452084, "grad_norm": 0.609375, "learning_rate": 1.421370727201194e-05, "loss": 1.5617, "step": 4223 }, { "epoch": 0.7287156042439403, "grad_norm": 0.625, "learning_rate": 1.4211238675321143e-05, "loss": 1.3514, "step": 4224 }, { "epoch": 0.7288881221426723, "grad_norm": 0.5859375, "learning_rate": 1.4208769766635475e-05, "loss": 1.5241, "step": 4225 }, { "epoch": 0.7290606400414043, "grad_norm": 0.59375, "learning_rate": 1.4206300546137844e-05, "loss": 1.554, "step": 4226 }, { "epoch": 0.7292331579401363, "grad_norm": 0.8046875, "learning_rate": 1.4203831014011186e-05, "loss": 1.4732, "step": 4227 }, { "epoch": 0.7294056758388683, "grad_norm": 0.703125, "learning_rate": 1.4201361170438461e-05, "loss": 1.4524, "step": 4228 }, { "epoch": 0.7295781937376002, "grad_norm": 0.67578125, "learning_rate": 1.4198891015602648e-05, "loss": 1.4721, "step": 4229 }, { "epoch": 0.7297507116363323, "grad_norm": 0.640625, "learning_rate": 1.419642054968675e-05, "loss": 1.4939, "step": 4230 }, { "epoch": 0.7299232295350643, "grad_norm": 0.578125, "learning_rate": 1.4193949772873804e-05, "loss": 1.4559, "step": 4231 }, { "epoch": 0.7300957474337962, "grad_norm": 0.6640625, "learning_rate": 1.4191478685346849e-05, "loss": 1.3952, "step": 4232 }, { "epoch": 0.7302682653325282, "grad_norm": 0.578125, "learning_rate": 1.4189007287288963e-05, "loss": 1.4461, "step": 4233 }, { "epoch": 0.7304407832312603, "grad_norm": 0.61328125, "learning_rate": 1.4186535578883244e-05, "loss": 1.4821, "step": 4234 }, { "epoch": 0.7306133011299922, "grad_norm": 0.703125, "learning_rate": 1.4184063560312814e-05, "loss": 1.4381, "step": 4235 }, { "epoch": 0.7307858190287242, "grad_norm": 0.59765625, "learning_rate": 1.4181591231760807e-05, "loss": 1.4034, "step": 4236 }, { "epoch": 0.7309583369274563, "grad_norm": 0.56640625, "learning_rate": 1.4179118593410395e-05, "loss": 1.3891, "step": 4237 }, { "epoch": 0.7311308548261882, "grad_norm": 0.64453125, "learning_rate": 1.4176645645444765e-05, "loss": 1.523, "step": 4238 }, { "epoch": 0.7313033727249202, "grad_norm": 0.58203125, "learning_rate": 1.4174172388047132e-05, "loss": 1.4271, "step": 4239 }, { "epoch": 0.7314758906236523, "grad_norm": 0.75390625, "learning_rate": 1.4171698821400724e-05, "loss": 1.4633, "step": 4240 }, { "epoch": 0.7316484085223842, "grad_norm": 0.63671875, "learning_rate": 1.4169224945688801e-05, "loss": 1.4301, "step": 4241 }, { "epoch": 0.7318209264211162, "grad_norm": 0.5625, "learning_rate": 1.4166750761094646e-05, "loss": 1.3832, "step": 4242 }, { "epoch": 0.7319934443198481, "grad_norm": 0.6171875, "learning_rate": 1.416427626780156e-05, "loss": 1.4208, "step": 4243 }, { "epoch": 0.7321659622185802, "grad_norm": 0.59375, "learning_rate": 1.4161801465992867e-05, "loss": 1.4036, "step": 4244 }, { "epoch": 0.7323384801173122, "grad_norm": 0.58984375, "learning_rate": 1.4159326355851915e-05, "loss": 1.3705, "step": 4245 }, { "epoch": 0.7325109980160441, "grad_norm": 0.6171875, "learning_rate": 1.4156850937562079e-05, "loss": 1.4796, "step": 4246 }, { "epoch": 0.7326835159147762, "grad_norm": 3.203125, "learning_rate": 1.4154375211306754e-05, "loss": 1.4343, "step": 4247 }, { "epoch": 0.7328560338135082, "grad_norm": 0.61328125, "learning_rate": 1.4151899177269357e-05, "loss": 1.3781, "step": 4248 }, { "epoch": 0.7330285517122401, "grad_norm": 0.69140625, "learning_rate": 1.4149422835633324e-05, "loss": 1.3809, "step": 4249 }, { "epoch": 0.7332010696109721, "grad_norm": 0.578125, "learning_rate": 1.414694618658212e-05, "loss": 1.5156, "step": 4250 }, { "epoch": 0.7333735875097042, "grad_norm": 0.63671875, "learning_rate": 1.4144469230299234e-05, "loss": 1.454, "step": 4251 }, { "epoch": 0.7335461054084361, "grad_norm": 0.65625, "learning_rate": 1.4141991966968169e-05, "loss": 1.4892, "step": 4252 }, { "epoch": 0.7337186233071681, "grad_norm": 0.6171875, "learning_rate": 1.413951439677246e-05, "loss": 1.3443, "step": 4253 }, { "epoch": 0.7338911412059002, "grad_norm": 0.71484375, "learning_rate": 1.4137036519895656e-05, "loss": 1.5462, "step": 4254 }, { "epoch": 0.7340636591046321, "grad_norm": 0.609375, "learning_rate": 1.4134558336521342e-05, "loss": 1.4905, "step": 4255 }, { "epoch": 0.7342361770033641, "grad_norm": 0.60546875, "learning_rate": 1.413207984683311e-05, "loss": 1.4553, "step": 4256 }, { "epoch": 0.734408694902096, "grad_norm": 0.61328125, "learning_rate": 1.4129601051014586e-05, "loss": 1.4901, "step": 4257 }, { "epoch": 0.7345812128008281, "grad_norm": 0.7109375, "learning_rate": 1.4127121949249412e-05, "loss": 1.4201, "step": 4258 }, { "epoch": 0.7347537306995601, "grad_norm": 0.59765625, "learning_rate": 1.4124642541721258e-05, "loss": 1.5189, "step": 4259 }, { "epoch": 0.734926248598292, "grad_norm": 0.5859375, "learning_rate": 1.4122162828613812e-05, "loss": 1.4489, "step": 4260 }, { "epoch": 0.7350987664970241, "grad_norm": 0.77734375, "learning_rate": 1.4119682810110787e-05, "loss": 1.4045, "step": 4261 }, { "epoch": 0.7352712843957561, "grad_norm": 0.65625, "learning_rate": 1.4117202486395918e-05, "loss": 1.5362, "step": 4262 }, { "epoch": 0.735443802294488, "grad_norm": 0.6328125, "learning_rate": 1.4114721857652965e-05, "loss": 1.4357, "step": 4263 }, { "epoch": 0.7356163201932201, "grad_norm": 0.6171875, "learning_rate": 1.4112240924065706e-05, "loss": 1.4519, "step": 4264 }, { "epoch": 0.7357888380919521, "grad_norm": 0.75390625, "learning_rate": 1.4109759685817943e-05, "loss": 1.4781, "step": 4265 }, { "epoch": 0.735961355990684, "grad_norm": 0.6328125, "learning_rate": 1.4107278143093505e-05, "loss": 1.505, "step": 4266 }, { "epoch": 0.736133873889416, "grad_norm": 0.59375, "learning_rate": 1.410479629607624e-05, "loss": 1.3641, "step": 4267 }, { "epoch": 0.736306391788148, "grad_norm": 0.828125, "learning_rate": 1.4102314144950016e-05, "loss": 1.451, "step": 4268 }, { "epoch": 0.73647890968688, "grad_norm": 0.8359375, "learning_rate": 1.4099831689898728e-05, "loss": 1.441, "step": 4269 }, { "epoch": 0.736651427585612, "grad_norm": 0.67578125, "learning_rate": 1.409734893110629e-05, "loss": 1.4513, "step": 4270 }, { "epoch": 0.736823945484344, "grad_norm": 0.671875, "learning_rate": 1.4094865868756644e-05, "loss": 1.4197, "step": 4271 }, { "epoch": 0.736996463383076, "grad_norm": 0.6640625, "learning_rate": 1.4092382503033746e-05, "loss": 1.3671, "step": 4272 }, { "epoch": 0.737168981281808, "grad_norm": 0.57421875, "learning_rate": 1.4089898834121583e-05, "loss": 1.3355, "step": 4273 }, { "epoch": 0.7373414991805399, "grad_norm": 0.59765625, "learning_rate": 1.4087414862204161e-05, "loss": 1.4562, "step": 4274 }, { "epoch": 0.737514017079272, "grad_norm": 0.6875, "learning_rate": 1.4084930587465506e-05, "loss": 1.4651, "step": 4275 }, { "epoch": 0.737686534978004, "grad_norm": 0.61328125, "learning_rate": 1.4082446010089667e-05, "loss": 1.4966, "step": 4276 }, { "epoch": 0.7378590528767359, "grad_norm": 0.63671875, "learning_rate": 1.4079961130260722e-05, "loss": 1.3972, "step": 4277 }, { "epoch": 0.738031570775468, "grad_norm": 0.6328125, "learning_rate": 1.4077475948162762e-05, "loss": 1.4247, "step": 4278 }, { "epoch": 0.7382040886742, "grad_norm": 0.72265625, "learning_rate": 1.407499046397991e-05, "loss": 1.4448, "step": 4279 }, { "epoch": 0.7383766065729319, "grad_norm": 0.8359375, "learning_rate": 1.40725046778963e-05, "loss": 1.4223, "step": 4280 }, { "epoch": 0.738549124471664, "grad_norm": 0.578125, "learning_rate": 1.4070018590096096e-05, "loss": 1.3966, "step": 4281 }, { "epoch": 0.738721642370396, "grad_norm": 0.6953125, "learning_rate": 1.4067532200763484e-05, "loss": 1.4662, "step": 4282 }, { "epoch": 0.7388941602691279, "grad_norm": 0.60546875, "learning_rate": 1.4065045510082673e-05, "loss": 1.5419, "step": 4283 }, { "epoch": 0.7390666781678599, "grad_norm": 0.73046875, "learning_rate": 1.4062558518237893e-05, "loss": 1.4291, "step": 4284 }, { "epoch": 0.7392391960665919, "grad_norm": 0.6328125, "learning_rate": 1.4060071225413392e-05, "loss": 1.4024, "step": 4285 }, { "epoch": 0.7394117139653239, "grad_norm": 0.5703125, "learning_rate": 1.4057583631793443e-05, "loss": 1.4185, "step": 4286 }, { "epoch": 0.7395842318640559, "grad_norm": 0.65625, "learning_rate": 1.405509573756235e-05, "loss": 1.4524, "step": 4287 }, { "epoch": 0.7397567497627879, "grad_norm": 0.8046875, "learning_rate": 1.4052607542904427e-05, "loss": 1.5358, "step": 4288 }, { "epoch": 0.7399292676615199, "grad_norm": 0.609375, "learning_rate": 1.4050119048004012e-05, "loss": 1.4615, "step": 4289 }, { "epoch": 0.7401017855602519, "grad_norm": 0.63671875, "learning_rate": 1.4047630253045475e-05, "loss": 1.3796, "step": 4290 }, { "epoch": 0.7402743034589838, "grad_norm": 0.625, "learning_rate": 1.4045141158213197e-05, "loss": 1.5141, "step": 4291 }, { "epoch": 0.7404468213577159, "grad_norm": 0.640625, "learning_rate": 1.4042651763691586e-05, "loss": 1.4644, "step": 4292 }, { "epoch": 0.7406193392564479, "grad_norm": 0.609375, "learning_rate": 1.4040162069665076e-05, "loss": 1.3469, "step": 4293 }, { "epoch": 0.7407918571551798, "grad_norm": 0.83203125, "learning_rate": 1.4037672076318112e-05, "loss": 1.4975, "step": 4294 }, { "epoch": 0.7409643750539119, "grad_norm": 0.63671875, "learning_rate": 1.4035181783835174e-05, "loss": 1.4385, "step": 4295 }, { "epoch": 0.7411368929526438, "grad_norm": 0.78515625, "learning_rate": 1.4032691192400756e-05, "loss": 1.4352, "step": 4296 }, { "epoch": 0.7413094108513758, "grad_norm": 0.578125, "learning_rate": 1.4030200302199375e-05, "loss": 1.4521, "step": 4297 }, { "epoch": 0.7414819287501079, "grad_norm": 0.62890625, "learning_rate": 1.4027709113415578e-05, "loss": 1.4853, "step": 4298 }, { "epoch": 0.7416544466488398, "grad_norm": 0.63671875, "learning_rate": 1.4025217626233919e-05, "loss": 1.4171, "step": 4299 }, { "epoch": 0.7418269645475718, "grad_norm": 0.578125, "learning_rate": 1.402272584083899e-05, "loss": 1.4167, "step": 4300 }, { "epoch": 0.7418269645475718, "eval_loss": 1.4205090999603271, "eval_runtime": 11.0817, "eval_samples_per_second": 92.405, "eval_steps_per_second": 23.101, "step": 4300 }, { "epoch": 0.7419994824463038, "grad_norm": 0.6640625, "learning_rate": 1.4020233757415396e-05, "loss": 1.4844, "step": 4301 }, { "epoch": 0.7421720003450358, "grad_norm": 0.703125, "learning_rate": 1.4017741376147762e-05, "loss": 1.4134, "step": 4302 }, { "epoch": 0.7423445182437678, "grad_norm": 0.6328125, "learning_rate": 1.4015248697220746e-05, "loss": 1.4863, "step": 4303 }, { "epoch": 0.7425170361424998, "grad_norm": 1.5625, "learning_rate": 1.401275572081902e-05, "loss": 1.4767, "step": 4304 }, { "epoch": 0.7426895540412318, "grad_norm": 0.7109375, "learning_rate": 1.4010262447127275e-05, "loss": 1.4214, "step": 4305 }, { "epoch": 0.7428620719399638, "grad_norm": 0.7890625, "learning_rate": 1.400776887633023e-05, "loss": 1.4982, "step": 4306 }, { "epoch": 0.7430345898386957, "grad_norm": 0.6328125, "learning_rate": 1.4005275008612627e-05, "loss": 1.4187, "step": 4307 }, { "epoch": 0.7432071077374277, "grad_norm": 0.671875, "learning_rate": 1.4002780844159225e-05, "loss": 1.5067, "step": 4308 }, { "epoch": 0.7433796256361598, "grad_norm": 0.67578125, "learning_rate": 1.4000286383154804e-05, "loss": 1.4092, "step": 4309 }, { "epoch": 0.7435521435348917, "grad_norm": 0.703125, "learning_rate": 1.3997791625784176e-05, "loss": 1.5382, "step": 4310 }, { "epoch": 0.7437246614336237, "grad_norm": 0.56640625, "learning_rate": 1.3995296572232166e-05, "loss": 1.4975, "step": 4311 }, { "epoch": 0.7438971793323558, "grad_norm": 0.77734375, "learning_rate": 1.399280122268362e-05, "loss": 1.4389, "step": 4312 }, { "epoch": 0.7440696972310877, "grad_norm": 0.6015625, "learning_rate": 1.399030557732341e-05, "loss": 1.4836, "step": 4313 }, { "epoch": 0.7442422151298197, "grad_norm": 0.61328125, "learning_rate": 1.3987809636336434e-05, "loss": 1.4865, "step": 4314 }, { "epoch": 0.7444147330285518, "grad_norm": 0.578125, "learning_rate": 1.39853133999076e-05, "loss": 1.5212, "step": 4315 }, { "epoch": 0.7445872509272837, "grad_norm": 0.6328125, "learning_rate": 1.398281686822185e-05, "loss": 1.4433, "step": 4316 }, { "epoch": 0.7447597688260157, "grad_norm": 0.6640625, "learning_rate": 1.398032004146414e-05, "loss": 1.4844, "step": 4317 }, { "epoch": 0.7449322867247476, "grad_norm": 0.70703125, "learning_rate": 1.3977822919819448e-05, "loss": 1.4672, "step": 4318 }, { "epoch": 0.7451048046234797, "grad_norm": 0.625, "learning_rate": 1.3975325503472786e-05, "loss": 1.4728, "step": 4319 }, { "epoch": 0.7452773225222117, "grad_norm": 0.62890625, "learning_rate": 1.3972827792609168e-05, "loss": 1.4121, "step": 4320 }, { "epoch": 0.7454498404209436, "grad_norm": 0.65625, "learning_rate": 1.397032978741364e-05, "loss": 1.4833, "step": 4321 }, { "epoch": 0.7456223583196757, "grad_norm": 0.69921875, "learning_rate": 1.3967831488071279e-05, "loss": 1.4789, "step": 4322 }, { "epoch": 0.7457948762184077, "grad_norm": 0.66015625, "learning_rate": 1.396533289476717e-05, "loss": 1.3527, "step": 4323 }, { "epoch": 0.7459673941171396, "grad_norm": 0.69921875, "learning_rate": 1.3962834007686418e-05, "loss": 1.4939, "step": 4324 }, { "epoch": 0.7461399120158716, "grad_norm": 0.609375, "learning_rate": 1.3960334827014168e-05, "loss": 1.4151, "step": 4325 }, { "epoch": 0.7463124299146037, "grad_norm": 0.80078125, "learning_rate": 1.3957835352935566e-05, "loss": 1.4769, "step": 4326 }, { "epoch": 0.7464849478133356, "grad_norm": 0.78515625, "learning_rate": 1.395533558563579e-05, "loss": 1.3579, "step": 4327 }, { "epoch": 0.7466574657120676, "grad_norm": 0.640625, "learning_rate": 1.395283552530004e-05, "loss": 1.4144, "step": 4328 }, { "epoch": 0.7468299836107997, "grad_norm": 0.609375, "learning_rate": 1.3950335172113539e-05, "loss": 1.4561, "step": 4329 }, { "epoch": 0.7470025015095316, "grad_norm": 0.6796875, "learning_rate": 1.3947834526261521e-05, "loss": 1.4137, "step": 4330 }, { "epoch": 0.7471750194082636, "grad_norm": 0.66015625, "learning_rate": 1.3945333587929257e-05, "loss": 1.5995, "step": 4331 }, { "epoch": 0.7473475373069955, "grad_norm": 0.75, "learning_rate": 1.394283235730203e-05, "loss": 1.4139, "step": 4332 }, { "epoch": 0.7475200552057276, "grad_norm": 0.6875, "learning_rate": 1.3940330834565144e-05, "loss": 1.4916, "step": 4333 }, { "epoch": 0.7476925731044596, "grad_norm": 0.640625, "learning_rate": 1.393782901990393e-05, "loss": 1.4787, "step": 4334 }, { "epoch": 0.7478650910031915, "grad_norm": 0.60546875, "learning_rate": 1.3935326913503737e-05, "loss": 1.341, "step": 4335 }, { "epoch": 0.7480376089019236, "grad_norm": 0.65234375, "learning_rate": 1.3932824515549936e-05, "loss": 1.5001, "step": 4336 }, { "epoch": 0.7482101268006556, "grad_norm": 0.6171875, "learning_rate": 1.3930321826227924e-05, "loss": 1.4086, "step": 4337 }, { "epoch": 0.7483826446993875, "grad_norm": 0.703125, "learning_rate": 1.3927818845723114e-05, "loss": 1.491, "step": 4338 }, { "epoch": 0.7485551625981196, "grad_norm": 0.6484375, "learning_rate": 1.392531557422094e-05, "loss": 1.4783, "step": 4339 }, { "epoch": 0.7487276804968516, "grad_norm": 0.89453125, "learning_rate": 1.3922812011906862e-05, "loss": 1.403, "step": 4340 }, { "epoch": 0.7489001983955835, "grad_norm": 0.62890625, "learning_rate": 1.3920308158966358e-05, "loss": 1.3879, "step": 4341 }, { "epoch": 0.7490727162943155, "grad_norm": 0.59375, "learning_rate": 1.3917804015584932e-05, "loss": 1.4304, "step": 4342 }, { "epoch": 0.7492452341930476, "grad_norm": 0.62109375, "learning_rate": 1.3915299581948106e-05, "loss": 1.4423, "step": 4343 }, { "epoch": 0.7494177520917795, "grad_norm": 0.875, "learning_rate": 1.3912794858241423e-05, "loss": 1.483, "step": 4344 }, { "epoch": 0.7495902699905115, "grad_norm": 0.75, "learning_rate": 1.3910289844650446e-05, "loss": 1.3615, "step": 4345 }, { "epoch": 0.7497627878892436, "grad_norm": 0.59375, "learning_rate": 1.3907784541360765e-05, "loss": 1.5126, "step": 4346 }, { "epoch": 0.7499353057879755, "grad_norm": 1.0078125, "learning_rate": 1.390527894855799e-05, "loss": 1.4707, "step": 4347 }, { "epoch": 0.7501078236867075, "grad_norm": 0.7109375, "learning_rate": 1.390277306642775e-05, "loss": 1.385, "step": 4348 }, { "epoch": 0.7502803415854394, "grad_norm": 0.61328125, "learning_rate": 1.3900266895155694e-05, "loss": 1.3342, "step": 4349 }, { "epoch": 0.7504528594841715, "grad_norm": 0.7109375, "learning_rate": 1.3897760434927495e-05, "loss": 1.4733, "step": 4350 }, { "epoch": 0.7506253773829035, "grad_norm": 0.7265625, "learning_rate": 1.3895253685928851e-05, "loss": 1.4812, "step": 4351 }, { "epoch": 0.7507978952816354, "grad_norm": 0.63671875, "learning_rate": 1.3892746648345475e-05, "loss": 1.3713, "step": 4352 }, { "epoch": 0.7509704131803675, "grad_norm": 0.69140625, "learning_rate": 1.3890239322363102e-05, "loss": 1.4003, "step": 4353 }, { "epoch": 0.7511429310790995, "grad_norm": 0.79296875, "learning_rate": 1.3887731708167493e-05, "loss": 1.4404, "step": 4354 }, { "epoch": 0.7513154489778314, "grad_norm": 0.6640625, "learning_rate": 1.388522380594443e-05, "loss": 1.4032, "step": 4355 }, { "epoch": 0.7514879668765635, "grad_norm": 0.64453125, "learning_rate": 1.3882715615879709e-05, "loss": 1.398, "step": 4356 }, { "epoch": 0.7516604847752955, "grad_norm": 0.6015625, "learning_rate": 1.3880207138159158e-05, "loss": 1.4063, "step": 4357 }, { "epoch": 0.7518330026740274, "grad_norm": 0.66796875, "learning_rate": 1.3877698372968612e-05, "loss": 1.4327, "step": 4358 }, { "epoch": 0.7520055205727594, "grad_norm": 0.7578125, "learning_rate": 1.3875189320493947e-05, "loss": 1.6136, "step": 4359 }, { "epoch": 0.7521780384714915, "grad_norm": 0.6484375, "learning_rate": 1.3872679980921044e-05, "loss": 1.353, "step": 4360 }, { "epoch": 0.7523505563702234, "grad_norm": 0.75390625, "learning_rate": 1.3870170354435808e-05, "loss": 1.5392, "step": 4361 }, { "epoch": 0.7525230742689554, "grad_norm": 0.60546875, "learning_rate": 1.3867660441224172e-05, "loss": 1.4298, "step": 4362 }, { "epoch": 0.7526955921676874, "grad_norm": 0.5859375, "learning_rate": 1.386515024147208e-05, "loss": 1.4125, "step": 4363 }, { "epoch": 0.7528681100664194, "grad_norm": 0.59375, "learning_rate": 1.3862639755365512e-05, "loss": 1.5319, "step": 4364 }, { "epoch": 0.7530406279651514, "grad_norm": 0.61328125, "learning_rate": 1.3860128983090455e-05, "loss": 1.3204, "step": 4365 }, { "epoch": 0.7532131458638833, "grad_norm": 0.68359375, "learning_rate": 1.3857617924832923e-05, "loss": 1.468, "step": 4366 }, { "epoch": 0.7533856637626154, "grad_norm": 0.62109375, "learning_rate": 1.385510658077895e-05, "loss": 1.4754, "step": 4367 }, { "epoch": 0.7535581816613474, "grad_norm": 0.66015625, "learning_rate": 1.3852594951114594e-05, "loss": 1.448, "step": 4368 }, { "epoch": 0.7537306995600793, "grad_norm": 0.77734375, "learning_rate": 1.3850083036025934e-05, "loss": 1.3702, "step": 4369 }, { "epoch": 0.7539032174588114, "grad_norm": 0.6015625, "learning_rate": 1.3847570835699066e-05, "loss": 1.557, "step": 4370 }, { "epoch": 0.7540757353575434, "grad_norm": 0.703125, "learning_rate": 1.3845058350320109e-05, "loss": 1.3292, "step": 4371 }, { "epoch": 0.7542482532562753, "grad_norm": 0.71875, "learning_rate": 1.3842545580075206e-05, "loss": 1.5168, "step": 4372 }, { "epoch": 0.7544207711550074, "grad_norm": 0.625, "learning_rate": 1.3840032525150516e-05, "loss": 1.47, "step": 4373 }, { "epoch": 0.7545932890537393, "grad_norm": 0.64453125, "learning_rate": 1.3837519185732222e-05, "loss": 1.3561, "step": 4374 }, { "epoch": 0.7547658069524713, "grad_norm": 0.82421875, "learning_rate": 1.3835005562006529e-05, "loss": 1.3902, "step": 4375 }, { "epoch": 0.7549383248512033, "grad_norm": 0.6640625, "learning_rate": 1.3832491654159663e-05, "loss": 1.4831, "step": 4376 }, { "epoch": 0.7551108427499353, "grad_norm": 0.640625, "learning_rate": 1.3829977462377868e-05, "loss": 1.4294, "step": 4377 }, { "epoch": 0.7552833606486673, "grad_norm": 0.765625, "learning_rate": 1.3827462986847411e-05, "loss": 1.4088, "step": 4378 }, { "epoch": 0.7554558785473993, "grad_norm": 0.62890625, "learning_rate": 1.3824948227754583e-05, "loss": 1.4768, "step": 4379 }, { "epoch": 0.7556283964461313, "grad_norm": 0.6875, "learning_rate": 1.3822433185285691e-05, "loss": 1.4937, "step": 4380 }, { "epoch": 0.7558009143448633, "grad_norm": 0.76171875, "learning_rate": 1.3819917859627063e-05, "loss": 1.3964, "step": 4381 }, { "epoch": 0.7559734322435953, "grad_norm": 0.7109375, "learning_rate": 1.381740225096505e-05, "loss": 1.3588, "step": 4382 }, { "epoch": 0.7561459501423272, "grad_norm": 0.73046875, "learning_rate": 1.3814886359486028e-05, "loss": 1.5209, "step": 4383 }, { "epoch": 0.7563184680410593, "grad_norm": 0.9296875, "learning_rate": 1.3812370185376389e-05, "loss": 1.4714, "step": 4384 }, { "epoch": 0.7564909859397912, "grad_norm": 0.6953125, "learning_rate": 1.3809853728822545e-05, "loss": 1.3945, "step": 4385 }, { "epoch": 0.7566635038385232, "grad_norm": 0.62890625, "learning_rate": 1.3807336990010934e-05, "loss": 1.4341, "step": 4386 }, { "epoch": 0.7568360217372553, "grad_norm": 0.69921875, "learning_rate": 1.3804819969128006e-05, "loss": 1.3775, "step": 4387 }, { "epoch": 0.7570085396359872, "grad_norm": 0.73828125, "learning_rate": 1.3802302666360244e-05, "loss": 1.4069, "step": 4388 }, { "epoch": 0.7571810575347192, "grad_norm": 0.71484375, "learning_rate": 1.379978508189414e-05, "loss": 1.4814, "step": 4389 }, { "epoch": 0.7573535754334513, "grad_norm": 0.62109375, "learning_rate": 1.3797267215916215e-05, "loss": 1.364, "step": 4390 }, { "epoch": 0.7575260933321832, "grad_norm": 0.90234375, "learning_rate": 1.3794749068613009e-05, "loss": 1.4798, "step": 4391 }, { "epoch": 0.7576986112309152, "grad_norm": 0.58203125, "learning_rate": 1.3792230640171085e-05, "loss": 1.4018, "step": 4392 }, { "epoch": 0.7578711291296472, "grad_norm": 0.68359375, "learning_rate": 1.3789711930777017e-05, "loss": 1.4674, "step": 4393 }, { "epoch": 0.7580436470283792, "grad_norm": 0.6171875, "learning_rate": 1.3787192940617408e-05, "loss": 1.394, "step": 4394 }, { "epoch": 0.7582161649271112, "grad_norm": 0.6796875, "learning_rate": 1.3784673669878883e-05, "loss": 1.4453, "step": 4395 }, { "epoch": 0.7583886828258432, "grad_norm": 0.62109375, "learning_rate": 1.378215411874809e-05, "loss": 1.5361, "step": 4396 }, { "epoch": 0.7585612007245752, "grad_norm": 0.66796875, "learning_rate": 1.3779634287411683e-05, "loss": 1.4041, "step": 4397 }, { "epoch": 0.7587337186233072, "grad_norm": 0.73046875, "learning_rate": 1.3777114176056353e-05, "loss": 1.4944, "step": 4398 }, { "epoch": 0.7589062365220391, "grad_norm": 0.64453125, "learning_rate": 1.3774593784868802e-05, "loss": 1.4541, "step": 4399 }, { "epoch": 0.7590787544207711, "grad_norm": 0.58984375, "learning_rate": 1.3772073114035762e-05, "loss": 1.3346, "step": 4400 }, { "epoch": 0.7590787544207711, "eval_loss": 1.4194252490997314, "eval_runtime": 10.7734, "eval_samples_per_second": 95.049, "eval_steps_per_second": 23.762, "step": 4400 }, { "epoch": 0.7592512723195032, "grad_norm": 0.66015625, "learning_rate": 1.3769552163743974e-05, "loss": 1.5201, "step": 4401 }, { "epoch": 0.7594237902182351, "grad_norm": 0.7578125, "learning_rate": 1.376703093418021e-05, "loss": 1.4532, "step": 4402 }, { "epoch": 0.7595963081169671, "grad_norm": 0.6171875, "learning_rate": 1.3764509425531256e-05, "loss": 1.4176, "step": 4403 }, { "epoch": 0.7597688260156992, "grad_norm": 0.6015625, "learning_rate": 1.3761987637983924e-05, "loss": 1.4204, "step": 4404 }, { "epoch": 0.7599413439144311, "grad_norm": 0.65234375, "learning_rate": 1.375946557172504e-05, "loss": 1.4231, "step": 4405 }, { "epoch": 0.7601138618131631, "grad_norm": 0.59375, "learning_rate": 1.3756943226941458e-05, "loss": 1.3763, "step": 4406 }, { "epoch": 0.760286379711895, "grad_norm": 0.625, "learning_rate": 1.3754420603820045e-05, "loss": 1.4556, "step": 4407 }, { "epoch": 0.7604588976106271, "grad_norm": 0.75, "learning_rate": 1.3751897702547698e-05, "loss": 1.4294, "step": 4408 }, { "epoch": 0.7606314155093591, "grad_norm": 0.74609375, "learning_rate": 1.3749374523311325e-05, "loss": 1.4873, "step": 4409 }, { "epoch": 0.760803933408091, "grad_norm": 0.6328125, "learning_rate": 1.374685106629786e-05, "loss": 1.3729, "step": 4410 }, { "epoch": 0.7609764513068231, "grad_norm": 0.6328125, "learning_rate": 1.3744327331694254e-05, "loss": 1.419, "step": 4411 }, { "epoch": 0.7611489692055551, "grad_norm": 0.63671875, "learning_rate": 1.3741803319687488e-05, "loss": 1.4207, "step": 4412 }, { "epoch": 0.761321487104287, "grad_norm": 0.640625, "learning_rate": 1.373927903046455e-05, "loss": 1.4772, "step": 4413 }, { "epoch": 0.7614940050030191, "grad_norm": 0.61328125, "learning_rate": 1.3736754464212456e-05, "loss": 1.4847, "step": 4414 }, { "epoch": 0.7616665229017511, "grad_norm": 0.6640625, "learning_rate": 1.3734229621118243e-05, "loss": 1.3985, "step": 4415 }, { "epoch": 0.761839040800483, "grad_norm": 0.59765625, "learning_rate": 1.373170450136897e-05, "loss": 1.4265, "step": 4416 }, { "epoch": 0.762011558699215, "grad_norm": 0.6796875, "learning_rate": 1.3729179105151708e-05, "loss": 1.4387, "step": 4417 }, { "epoch": 0.7621840765979471, "grad_norm": 0.6328125, "learning_rate": 1.3726653432653559e-05, "loss": 1.514, "step": 4418 }, { "epoch": 0.762356594496679, "grad_norm": 0.76953125, "learning_rate": 1.3724127484061635e-05, "loss": 1.5758, "step": 4419 }, { "epoch": 0.762529112395411, "grad_norm": 0.56640625, "learning_rate": 1.3721601259563083e-05, "loss": 1.4226, "step": 4420 }, { "epoch": 0.7627016302941431, "grad_norm": 0.5703125, "learning_rate": 1.3719074759345052e-05, "loss": 1.4775, "step": 4421 }, { "epoch": 0.762874148192875, "grad_norm": 0.75, "learning_rate": 1.3716547983594726e-05, "loss": 1.5176, "step": 4422 }, { "epoch": 0.763046666091607, "grad_norm": 0.71484375, "learning_rate": 1.3714020932499303e-05, "loss": 1.3939, "step": 4423 }, { "epoch": 0.763219183990339, "grad_norm": 0.65234375, "learning_rate": 1.3711493606246005e-05, "loss": 1.4731, "step": 4424 }, { "epoch": 0.763391701889071, "grad_norm": 0.61328125, "learning_rate": 1.370896600502207e-05, "loss": 1.5322, "step": 4425 }, { "epoch": 0.763564219787803, "grad_norm": 0.58984375, "learning_rate": 1.3706438129014756e-05, "loss": 1.4391, "step": 4426 }, { "epoch": 0.7637367376865349, "grad_norm": 0.73046875, "learning_rate": 1.3703909978411348e-05, "loss": 1.3965, "step": 4427 }, { "epoch": 0.763909255585267, "grad_norm": 0.60546875, "learning_rate": 1.3701381553399147e-05, "loss": 1.4971, "step": 4428 }, { "epoch": 0.764081773483999, "grad_norm": 0.58984375, "learning_rate": 1.369885285416547e-05, "loss": 1.4536, "step": 4429 }, { "epoch": 0.7642542913827309, "grad_norm": 0.6171875, "learning_rate": 1.3696323880897664e-05, "loss": 1.4337, "step": 4430 }, { "epoch": 0.764426809281463, "grad_norm": 0.65234375, "learning_rate": 1.3693794633783087e-05, "loss": 1.3907, "step": 4431 }, { "epoch": 0.764599327180195, "grad_norm": 0.71484375, "learning_rate": 1.3691265113009126e-05, "loss": 1.5093, "step": 4432 }, { "epoch": 0.7647718450789269, "grad_norm": 0.7578125, "learning_rate": 1.3688735318763183e-05, "loss": 1.374, "step": 4433 }, { "epoch": 0.7649443629776589, "grad_norm": 0.74609375, "learning_rate": 1.3686205251232676e-05, "loss": 1.4475, "step": 4434 }, { "epoch": 0.765116880876391, "grad_norm": 0.6015625, "learning_rate": 1.3683674910605053e-05, "loss": 1.3778, "step": 4435 }, { "epoch": 0.7652893987751229, "grad_norm": 0.6953125, "learning_rate": 1.3681144297067777e-05, "loss": 1.3342, "step": 4436 }, { "epoch": 0.7654619166738549, "grad_norm": 0.61328125, "learning_rate": 1.367861341080833e-05, "loss": 1.3892, "step": 4437 }, { "epoch": 0.765634434572587, "grad_norm": 0.70703125, "learning_rate": 1.3676082252014213e-05, "loss": 1.4011, "step": 4438 }, { "epoch": 0.7658069524713189, "grad_norm": 0.62890625, "learning_rate": 1.3673550820872957e-05, "loss": 1.465, "step": 4439 }, { "epoch": 0.7659794703700509, "grad_norm": 0.55859375, "learning_rate": 1.3671019117572104e-05, "loss": 1.3866, "step": 4440 }, { "epoch": 0.7661519882687828, "grad_norm": 0.6171875, "learning_rate": 1.3668487142299217e-05, "loss": 1.527, "step": 4441 }, { "epoch": 0.7663245061675149, "grad_norm": 0.60546875, "learning_rate": 1.3665954895241877e-05, "loss": 1.4585, "step": 4442 }, { "epoch": 0.7664970240662469, "grad_norm": 0.72265625, "learning_rate": 1.3663422376587695e-05, "loss": 1.4673, "step": 4443 }, { "epoch": 0.7666695419649788, "grad_norm": 0.6953125, "learning_rate": 1.3660889586524295e-05, "loss": 1.4605, "step": 4444 }, { "epoch": 0.7668420598637109, "grad_norm": 0.59765625, "learning_rate": 1.3658356525239316e-05, "loss": 1.4475, "step": 4445 }, { "epoch": 0.7670145777624429, "grad_norm": 0.63671875, "learning_rate": 1.365582319292043e-05, "loss": 1.5515, "step": 4446 }, { "epoch": 0.7671870956611748, "grad_norm": 0.59765625, "learning_rate": 1.3653289589755314e-05, "loss": 1.4148, "step": 4447 }, { "epoch": 0.7673596135599069, "grad_norm": 0.61328125, "learning_rate": 1.3650755715931685e-05, "loss": 1.4041, "step": 4448 }, { "epoch": 0.7675321314586389, "grad_norm": 0.75390625, "learning_rate": 1.3648221571637259e-05, "loss": 1.4792, "step": 4449 }, { "epoch": 0.7677046493573708, "grad_norm": 0.6640625, "learning_rate": 1.3645687157059783e-05, "loss": 1.3566, "step": 4450 }, { "epoch": 0.7678771672561028, "grad_norm": 0.6328125, "learning_rate": 1.3643152472387024e-05, "loss": 1.3547, "step": 4451 }, { "epoch": 0.7680496851548348, "grad_norm": 0.60546875, "learning_rate": 1.364061751780677e-05, "loss": 1.4479, "step": 4452 }, { "epoch": 0.7682222030535668, "grad_norm": 0.62890625, "learning_rate": 1.3638082293506818e-05, "loss": 1.4658, "step": 4453 }, { "epoch": 0.7683947209522988, "grad_norm": 0.6328125, "learning_rate": 1.3635546799674999e-05, "loss": 1.4645, "step": 4454 }, { "epoch": 0.7685672388510308, "grad_norm": 0.625, "learning_rate": 1.3633011036499158e-05, "loss": 1.4945, "step": 4455 }, { "epoch": 0.7687397567497628, "grad_norm": 0.55078125, "learning_rate": 1.3630475004167159e-05, "loss": 1.4176, "step": 4456 }, { "epoch": 0.7689122746484948, "grad_norm": 0.6875, "learning_rate": 1.3627938702866885e-05, "loss": 1.3811, "step": 4457 }, { "epoch": 0.7690847925472267, "grad_norm": 0.57421875, "learning_rate": 1.3625402132786247e-05, "loss": 1.5251, "step": 4458 }, { "epoch": 0.7692573104459588, "grad_norm": 0.69921875, "learning_rate": 1.3622865294113164e-05, "loss": 1.5809, "step": 4459 }, { "epoch": 0.7694298283446908, "grad_norm": 0.71875, "learning_rate": 1.3620328187035585e-05, "loss": 1.4454, "step": 4460 }, { "epoch": 0.7696023462434227, "grad_norm": 0.71484375, "learning_rate": 1.3617790811741473e-05, "loss": 1.5201, "step": 4461 }, { "epoch": 0.7697748641421548, "grad_norm": 0.64453125, "learning_rate": 1.3615253168418811e-05, "loss": 1.5174, "step": 4462 }, { "epoch": 0.7699473820408868, "grad_norm": 0.7109375, "learning_rate": 1.3612715257255604e-05, "loss": 1.5593, "step": 4463 }, { "epoch": 0.7701198999396187, "grad_norm": 0.640625, "learning_rate": 1.3610177078439882e-05, "loss": 1.4046, "step": 4464 }, { "epoch": 0.7702924178383507, "grad_norm": 0.5859375, "learning_rate": 1.3607638632159681e-05, "loss": 1.5045, "step": 4465 }, { "epoch": 0.7704649357370827, "grad_norm": 0.62109375, "learning_rate": 1.3605099918603069e-05, "loss": 1.3441, "step": 4466 }, { "epoch": 0.7706374536358147, "grad_norm": 0.61328125, "learning_rate": 1.360256093795813e-05, "loss": 1.3589, "step": 4467 }, { "epoch": 0.7708099715345467, "grad_norm": 0.61328125, "learning_rate": 1.3600021690412968e-05, "loss": 1.3647, "step": 4468 }, { "epoch": 0.7709824894332787, "grad_norm": 0.66796875, "learning_rate": 1.3597482176155705e-05, "loss": 1.5231, "step": 4469 }, { "epoch": 0.7711550073320107, "grad_norm": 0.59765625, "learning_rate": 1.3594942395374482e-05, "loss": 1.3733, "step": 4470 }, { "epoch": 0.7713275252307427, "grad_norm": 0.75390625, "learning_rate": 1.3592402348257465e-05, "loss": 1.4228, "step": 4471 }, { "epoch": 0.7715000431294747, "grad_norm": 0.6640625, "learning_rate": 1.3589862034992838e-05, "loss": 1.5187, "step": 4472 }, { "epoch": 0.7716725610282067, "grad_norm": 0.625, "learning_rate": 1.3587321455768798e-05, "loss": 1.5008, "step": 4473 }, { "epoch": 0.7718450789269387, "grad_norm": 0.65625, "learning_rate": 1.3584780610773572e-05, "loss": 1.5196, "step": 4474 }, { "epoch": 0.7720175968256706, "grad_norm": 0.6171875, "learning_rate": 1.3582239500195399e-05, "loss": 1.3848, "step": 4475 }, { "epoch": 0.7721901147244027, "grad_norm": 0.58203125, "learning_rate": 1.357969812422254e-05, "loss": 1.4315, "step": 4476 }, { "epoch": 0.7723626326231346, "grad_norm": 0.59375, "learning_rate": 1.3577156483043278e-05, "loss": 1.4297, "step": 4477 }, { "epoch": 0.7725351505218666, "grad_norm": 0.79296875, "learning_rate": 1.3574614576845912e-05, "loss": 1.5183, "step": 4478 }, { "epoch": 0.7727076684205987, "grad_norm": 0.68359375, "learning_rate": 1.3572072405818762e-05, "loss": 1.3574, "step": 4479 }, { "epoch": 0.7728801863193306, "grad_norm": 0.69921875, "learning_rate": 1.356952997015017e-05, "loss": 1.3782, "step": 4480 }, { "epoch": 0.7730527042180626, "grad_norm": 0.77734375, "learning_rate": 1.3566987270028495e-05, "loss": 1.3915, "step": 4481 }, { "epoch": 0.7732252221167946, "grad_norm": 0.63671875, "learning_rate": 1.3564444305642116e-05, "loss": 1.4268, "step": 4482 }, { "epoch": 0.7733977400155266, "grad_norm": 0.59375, "learning_rate": 1.3561901077179429e-05, "loss": 1.4138, "step": 4483 }, { "epoch": 0.7735702579142586, "grad_norm": 0.83984375, "learning_rate": 1.3559357584828857e-05, "loss": 1.384, "step": 4484 }, { "epoch": 0.7737427758129906, "grad_norm": 0.76953125, "learning_rate": 1.3556813828778833e-05, "loss": 1.454, "step": 4485 }, { "epoch": 0.7739152937117226, "grad_norm": 0.5703125, "learning_rate": 1.3554269809217817e-05, "loss": 1.3827, "step": 4486 }, { "epoch": 0.7740878116104546, "grad_norm": 0.69921875, "learning_rate": 1.3551725526334286e-05, "loss": 1.4953, "step": 4487 }, { "epoch": 0.7742603295091866, "grad_norm": 0.703125, "learning_rate": 1.3549180980316737e-05, "loss": 1.3566, "step": 4488 }, { "epoch": 0.7744328474079186, "grad_norm": 0.859375, "learning_rate": 1.3546636171353681e-05, "loss": 1.4721, "step": 4489 }, { "epoch": 0.7746053653066506, "grad_norm": 0.68359375, "learning_rate": 1.354409109963366e-05, "loss": 1.4763, "step": 4490 }, { "epoch": 0.7747778832053825, "grad_norm": 0.83984375, "learning_rate": 1.3541545765345222e-05, "loss": 1.4729, "step": 4491 }, { "epoch": 0.7749504011041145, "grad_norm": 0.6640625, "learning_rate": 1.353900016867695e-05, "loss": 1.4975, "step": 4492 }, { "epoch": 0.7751229190028466, "grad_norm": 0.625, "learning_rate": 1.3536454309817433e-05, "loss": 1.4912, "step": 4493 }, { "epoch": 0.7752954369015785, "grad_norm": 1.015625, "learning_rate": 1.3533908188955281e-05, "loss": 1.4514, "step": 4494 }, { "epoch": 0.7754679548003105, "grad_norm": 0.65625, "learning_rate": 1.3531361806279128e-05, "loss": 1.5305, "step": 4495 }, { "epoch": 0.7756404726990426, "grad_norm": 0.6484375, "learning_rate": 1.352881516197763e-05, "loss": 1.4676, "step": 4496 }, { "epoch": 0.7758129905977745, "grad_norm": 0.70703125, "learning_rate": 1.3526268256239456e-05, "loss": 1.4794, "step": 4497 }, { "epoch": 0.7759855084965065, "grad_norm": 0.6171875, "learning_rate": 1.3523721089253296e-05, "loss": 1.4701, "step": 4498 }, { "epoch": 0.7761580263952385, "grad_norm": 0.765625, "learning_rate": 1.352117366120786e-05, "loss": 1.483, "step": 4499 }, { "epoch": 0.7763305442939705, "grad_norm": 0.89453125, "learning_rate": 1.351862597229188e-05, "loss": 1.4353, "step": 4500 }, { "epoch": 0.7763305442939705, "eval_loss": 1.418400526046753, "eval_runtime": 10.861, "eval_samples_per_second": 94.282, "eval_steps_per_second": 23.571, "step": 4500 }, { "epoch": 0.7765030621927025, "grad_norm": 0.58984375, "learning_rate": 1.35160780226941e-05, "loss": 1.5238, "step": 4501 }, { "epoch": 0.7766755800914344, "grad_norm": 0.66796875, "learning_rate": 1.351352981260329e-05, "loss": 1.4664, "step": 4502 }, { "epoch": 0.7768480979901665, "grad_norm": 0.7421875, "learning_rate": 1.351098134220824e-05, "loss": 1.379, "step": 4503 }, { "epoch": 0.7770206158888985, "grad_norm": 0.7890625, "learning_rate": 1.3508432611697755e-05, "loss": 1.3869, "step": 4504 }, { "epoch": 0.7771931337876304, "grad_norm": 0.7421875, "learning_rate": 1.350588362126066e-05, "loss": 1.5221, "step": 4505 }, { "epoch": 0.7773656516863625, "grad_norm": 0.625, "learning_rate": 1.35033343710858e-05, "loss": 1.3187, "step": 4506 }, { "epoch": 0.7775381695850945, "grad_norm": 0.68359375, "learning_rate": 1.350078486136204e-05, "loss": 1.3784, "step": 4507 }, { "epoch": 0.7777106874838264, "grad_norm": 1.0078125, "learning_rate": 1.3498235092278263e-05, "loss": 1.4881, "step": 4508 }, { "epoch": 0.7778832053825584, "grad_norm": 0.62109375, "learning_rate": 1.3495685064023374e-05, "loss": 1.3369, "step": 4509 }, { "epoch": 0.7780557232812905, "grad_norm": 0.61328125, "learning_rate": 1.3493134776786292e-05, "loss": 1.4683, "step": 4510 }, { "epoch": 0.7782282411800224, "grad_norm": 0.9453125, "learning_rate": 1.3490584230755956e-05, "loss": 1.4639, "step": 4511 }, { "epoch": 0.7784007590787544, "grad_norm": 0.59765625, "learning_rate": 1.3488033426121336e-05, "loss": 1.3748, "step": 4512 }, { "epoch": 0.7785732769774865, "grad_norm": 0.5859375, "learning_rate": 1.3485482363071402e-05, "loss": 1.3758, "step": 4513 }, { "epoch": 0.7787457948762184, "grad_norm": 0.61328125, "learning_rate": 1.3482931041795158e-05, "loss": 1.3721, "step": 4514 }, { "epoch": 0.7789183127749504, "grad_norm": 0.671875, "learning_rate": 1.3480379462481619e-05, "loss": 1.4502, "step": 4515 }, { "epoch": 0.7790908306736823, "grad_norm": 0.765625, "learning_rate": 1.3477827625319826e-05, "loss": 1.4347, "step": 4516 }, { "epoch": 0.7792633485724144, "grad_norm": 0.65234375, "learning_rate": 1.3475275530498828e-05, "loss": 1.4338, "step": 4517 }, { "epoch": 0.7794358664711464, "grad_norm": 0.5703125, "learning_rate": 1.3472723178207708e-05, "loss": 1.4674, "step": 4518 }, { "epoch": 0.7796083843698783, "grad_norm": 0.62890625, "learning_rate": 1.3470170568635552e-05, "loss": 1.51, "step": 4519 }, { "epoch": 0.7797809022686104, "grad_norm": 0.65234375, "learning_rate": 1.346761770197148e-05, "loss": 1.4031, "step": 4520 }, { "epoch": 0.7799534201673424, "grad_norm": 0.62890625, "learning_rate": 1.3465064578404623e-05, "loss": 1.4428, "step": 4521 }, { "epoch": 0.7801259380660743, "grad_norm": 0.65234375, "learning_rate": 1.346251119812413e-05, "loss": 1.5229, "step": 4522 }, { "epoch": 0.7802984559648064, "grad_norm": 0.78125, "learning_rate": 1.3459957561319175e-05, "loss": 1.4638, "step": 4523 }, { "epoch": 0.7804709738635384, "grad_norm": 0.625, "learning_rate": 1.3457403668178941e-05, "loss": 1.384, "step": 4524 }, { "epoch": 0.7806434917622703, "grad_norm": 0.7578125, "learning_rate": 1.3454849518892644e-05, "loss": 1.4915, "step": 4525 }, { "epoch": 0.7808160096610023, "grad_norm": 0.703125, "learning_rate": 1.3452295113649505e-05, "loss": 1.4184, "step": 4526 }, { "epoch": 0.7809885275597344, "grad_norm": 0.67578125, "learning_rate": 1.3449740452638773e-05, "loss": 1.3411, "step": 4527 }, { "epoch": 0.7811610454584663, "grad_norm": 0.61328125, "learning_rate": 1.3447185536049716e-05, "loss": 1.4498, "step": 4528 }, { "epoch": 0.7813335633571983, "grad_norm": 0.58984375, "learning_rate": 1.3444630364071614e-05, "loss": 1.4373, "step": 4529 }, { "epoch": 0.7815060812559304, "grad_norm": 0.61328125, "learning_rate": 1.344207493689377e-05, "loss": 1.5779, "step": 4530 }, { "epoch": 0.7816785991546623, "grad_norm": 0.77734375, "learning_rate": 1.3439519254705509e-05, "loss": 1.395, "step": 4531 }, { "epoch": 0.7818511170533943, "grad_norm": 0.609375, "learning_rate": 1.3436963317696172e-05, "loss": 1.498, "step": 4532 }, { "epoch": 0.7820236349521262, "grad_norm": 0.6796875, "learning_rate": 1.3434407126055115e-05, "loss": 1.4939, "step": 4533 }, { "epoch": 0.7821961528508583, "grad_norm": 0.578125, "learning_rate": 1.343185067997172e-05, "loss": 1.411, "step": 4534 }, { "epoch": 0.7823686707495903, "grad_norm": 0.6875, "learning_rate": 1.3429293979635382e-05, "loss": 1.4517, "step": 4535 }, { "epoch": 0.7825411886483222, "grad_norm": 0.640625, "learning_rate": 1.3426737025235521e-05, "loss": 1.4377, "step": 4536 }, { "epoch": 0.7827137065470543, "grad_norm": 0.7890625, "learning_rate": 1.3424179816961572e-05, "loss": 1.4659, "step": 4537 }, { "epoch": 0.7828862244457863, "grad_norm": 0.83984375, "learning_rate": 1.3421622355002981e-05, "loss": 1.5162, "step": 4538 }, { "epoch": 0.7830587423445182, "grad_norm": 0.59765625, "learning_rate": 1.3419064639549232e-05, "loss": 1.4428, "step": 4539 }, { "epoch": 0.7832312602432502, "grad_norm": 0.65234375, "learning_rate": 1.3416506670789807e-05, "loss": 1.454, "step": 4540 }, { "epoch": 0.7834037781419823, "grad_norm": 0.625, "learning_rate": 1.3413948448914223e-05, "loss": 1.4996, "step": 4541 }, { "epoch": 0.7835762960407142, "grad_norm": 0.6484375, "learning_rate": 1.3411389974112006e-05, "loss": 1.4176, "step": 4542 }, { "epoch": 0.7837488139394462, "grad_norm": 0.6015625, "learning_rate": 1.3408831246572703e-05, "loss": 1.4108, "step": 4543 }, { "epoch": 0.7839213318381782, "grad_norm": 0.80078125, "learning_rate": 1.3406272266485882e-05, "loss": 1.4055, "step": 4544 }, { "epoch": 0.7840938497369102, "grad_norm": 0.59375, "learning_rate": 1.340371303404113e-05, "loss": 1.4123, "step": 4545 }, { "epoch": 0.7842663676356422, "grad_norm": 0.7890625, "learning_rate": 1.3401153549428048e-05, "loss": 1.4729, "step": 4546 }, { "epoch": 0.7844388855343742, "grad_norm": 0.671875, "learning_rate": 1.3398593812836259e-05, "loss": 1.5255, "step": 4547 }, { "epoch": 0.7846114034331062, "grad_norm": 0.79296875, "learning_rate": 1.3396033824455408e-05, "loss": 1.4295, "step": 4548 }, { "epoch": 0.7847839213318382, "grad_norm": 0.64453125, "learning_rate": 1.339347358447515e-05, "loss": 1.3872, "step": 4549 }, { "epoch": 0.7849564392305701, "grad_norm": 0.69921875, "learning_rate": 1.3390913093085165e-05, "loss": 1.3967, "step": 4550 }, { "epoch": 0.7851289571293022, "grad_norm": 0.671875, "learning_rate": 1.338835235047515e-05, "loss": 1.4759, "step": 4551 }, { "epoch": 0.7853014750280342, "grad_norm": 0.83984375, "learning_rate": 1.3385791356834821e-05, "loss": 1.3507, "step": 4552 }, { "epoch": 0.7854739929267661, "grad_norm": 0.578125, "learning_rate": 1.3383230112353917e-05, "loss": 1.3755, "step": 4553 }, { "epoch": 0.7856465108254982, "grad_norm": 0.59765625, "learning_rate": 1.3380668617222183e-05, "loss": 1.4981, "step": 4554 }, { "epoch": 0.7858190287242302, "grad_norm": 0.578125, "learning_rate": 1.3378106871629395e-05, "loss": 1.4185, "step": 4555 }, { "epoch": 0.7859915466229621, "grad_norm": 0.60546875, "learning_rate": 1.3375544875765343e-05, "loss": 1.5073, "step": 4556 }, { "epoch": 0.7861640645216941, "grad_norm": 0.6328125, "learning_rate": 1.3372982629819837e-05, "loss": 1.4959, "step": 4557 }, { "epoch": 0.7863365824204261, "grad_norm": 0.62109375, "learning_rate": 1.33704201339827e-05, "loss": 1.4126, "step": 4558 }, { "epoch": 0.7865091003191581, "grad_norm": 0.55859375, "learning_rate": 1.336785738844378e-05, "loss": 1.456, "step": 4559 }, { "epoch": 0.7866816182178901, "grad_norm": 0.75390625, "learning_rate": 1.3365294393392942e-05, "loss": 1.545, "step": 4560 }, { "epoch": 0.7868541361166221, "grad_norm": 0.6171875, "learning_rate": 1.336273114902007e-05, "loss": 1.4324, "step": 4561 }, { "epoch": 0.7870266540153541, "grad_norm": 0.59375, "learning_rate": 1.336016765551506e-05, "loss": 1.4027, "step": 4562 }, { "epoch": 0.7871991719140861, "grad_norm": 0.671875, "learning_rate": 1.3357603913067836e-05, "loss": 1.4484, "step": 4563 }, { "epoch": 0.7873716898128181, "grad_norm": 0.6015625, "learning_rate": 1.3355039921868334e-05, "loss": 1.447, "step": 4564 }, { "epoch": 0.7875442077115501, "grad_norm": 0.6484375, "learning_rate": 1.3352475682106515e-05, "loss": 1.5331, "step": 4565 }, { "epoch": 0.787716725610282, "grad_norm": 0.57421875, "learning_rate": 1.3349911193972345e-05, "loss": 1.5616, "step": 4566 }, { "epoch": 0.787889243509014, "grad_norm": 0.62890625, "learning_rate": 1.3347346457655826e-05, "loss": 1.4837, "step": 4567 }, { "epoch": 0.7880617614077461, "grad_norm": 0.8359375, "learning_rate": 1.3344781473346964e-05, "loss": 1.4843, "step": 4568 }, { "epoch": 0.788234279306478, "grad_norm": 0.57421875, "learning_rate": 1.3342216241235794e-05, "loss": 1.3826, "step": 4569 }, { "epoch": 0.78840679720521, "grad_norm": 0.57421875, "learning_rate": 1.3339650761512361e-05, "loss": 1.4971, "step": 4570 }, { "epoch": 0.7885793151039421, "grad_norm": 0.68359375, "learning_rate": 1.333708503436673e-05, "loss": 1.4259, "step": 4571 }, { "epoch": 0.788751833002674, "grad_norm": 0.68359375, "learning_rate": 1.333451905998899e-05, "loss": 1.5063, "step": 4572 }, { "epoch": 0.788924350901406, "grad_norm": 0.640625, "learning_rate": 1.3331952838569246e-05, "loss": 1.4997, "step": 4573 }, { "epoch": 0.789096868800138, "grad_norm": 0.703125, "learning_rate": 1.3329386370297615e-05, "loss": 1.4358, "step": 4574 }, { "epoch": 0.78926938669887, "grad_norm": 0.6484375, "learning_rate": 1.3326819655364236e-05, "loss": 1.4828, "step": 4575 }, { "epoch": 0.789441904597602, "grad_norm": 0.63671875, "learning_rate": 1.3324252693959271e-05, "loss": 1.4323, "step": 4576 }, { "epoch": 0.789614422496334, "grad_norm": 0.6640625, "learning_rate": 1.3321685486272898e-05, "loss": 1.5302, "step": 4577 }, { "epoch": 0.789786940395066, "grad_norm": 0.76953125, "learning_rate": 1.3319118032495306e-05, "loss": 1.5162, "step": 4578 }, { "epoch": 0.789959458293798, "grad_norm": 0.7578125, "learning_rate": 1.3316550332816713e-05, "loss": 1.5533, "step": 4579 }, { "epoch": 0.79013197619253, "grad_norm": 0.64453125, "learning_rate": 1.3313982387427346e-05, "loss": 1.3622, "step": 4580 }, { "epoch": 0.790304494091262, "grad_norm": 0.5703125, "learning_rate": 1.3311414196517462e-05, "loss": 1.3823, "step": 4581 }, { "epoch": 0.790477011989994, "grad_norm": 0.68359375, "learning_rate": 1.3308845760277322e-05, "loss": 1.4654, "step": 4582 }, { "epoch": 0.7906495298887259, "grad_norm": 0.58984375, "learning_rate": 1.3306277078897211e-05, "loss": 1.3795, "step": 4583 }, { "epoch": 0.7908220477874579, "grad_norm": 0.63671875, "learning_rate": 1.3303708152567439e-05, "loss": 1.4473, "step": 4584 }, { "epoch": 0.79099456568619, "grad_norm": 0.6484375, "learning_rate": 1.3301138981478322e-05, "loss": 1.4669, "step": 4585 }, { "epoch": 0.7911670835849219, "grad_norm": 1.8125, "learning_rate": 1.3298569565820205e-05, "loss": 1.4592, "step": 4586 }, { "epoch": 0.7913396014836539, "grad_norm": 0.6640625, "learning_rate": 1.3295999905783444e-05, "loss": 1.463, "step": 4587 }, { "epoch": 0.791512119382386, "grad_norm": 0.609375, "learning_rate": 1.3293430001558411e-05, "loss": 1.4708, "step": 4588 }, { "epoch": 0.7916846372811179, "grad_norm": 0.58203125, "learning_rate": 1.3290859853335512e-05, "loss": 1.4816, "step": 4589 }, { "epoch": 0.7918571551798499, "grad_norm": 0.5859375, "learning_rate": 1.3288289461305149e-05, "loss": 1.4373, "step": 4590 }, { "epoch": 0.7920296730785819, "grad_norm": 0.6875, "learning_rate": 1.3285718825657754e-05, "loss": 1.4675, "step": 4591 }, { "epoch": 0.7922021909773139, "grad_norm": 0.6875, "learning_rate": 1.3283147946583781e-05, "loss": 1.3418, "step": 4592 }, { "epoch": 0.7923747088760459, "grad_norm": 0.65234375, "learning_rate": 1.3280576824273694e-05, "loss": 1.4304, "step": 4593 }, { "epoch": 0.7925472267747778, "grad_norm": 0.61328125, "learning_rate": 1.3278005458917978e-05, "loss": 1.4713, "step": 4594 }, { "epoch": 0.7927197446735099, "grad_norm": 0.68359375, "learning_rate": 1.3275433850707136e-05, "loss": 1.3984, "step": 4595 }, { "epoch": 0.7928922625722419, "grad_norm": 0.5546875, "learning_rate": 1.3272861999831688e-05, "loss": 1.3734, "step": 4596 }, { "epoch": 0.7930647804709738, "grad_norm": 0.59375, "learning_rate": 1.3270289906482174e-05, "loss": 1.4992, "step": 4597 }, { "epoch": 0.7932372983697059, "grad_norm": 0.625, "learning_rate": 1.326771757084915e-05, "loss": 1.416, "step": 4598 }, { "epoch": 0.7934098162684379, "grad_norm": 0.609375, "learning_rate": 1.326514499312319e-05, "loss": 1.4168, "step": 4599 }, { "epoch": 0.7935823341671698, "grad_norm": 0.66796875, "learning_rate": 1.3262572173494888e-05, "loss": 1.481, "step": 4600 }, { "epoch": 0.7935823341671698, "eval_loss": 1.4177677631378174, "eval_runtime": 10.9052, "eval_samples_per_second": 93.9, "eval_steps_per_second": 23.475, "step": 4600 }, { "epoch": 0.7937548520659018, "grad_norm": 2.765625, "learning_rate": 1.3259999112154854e-05, "loss": 1.446, "step": 4601 }, { "epoch": 0.7939273699646339, "grad_norm": 0.60546875, "learning_rate": 1.3257425809293714e-05, "loss": 1.5257, "step": 4602 }, { "epoch": 0.7940998878633658, "grad_norm": 0.609375, "learning_rate": 1.3254852265102118e-05, "loss": 1.4193, "step": 4603 }, { "epoch": 0.7942724057620978, "grad_norm": 0.5859375, "learning_rate": 1.325227847977073e-05, "loss": 1.4235, "step": 4604 }, { "epoch": 0.7944449236608299, "grad_norm": 0.60546875, "learning_rate": 1.324970445349023e-05, "loss": 1.3991, "step": 4605 }, { "epoch": 0.7946174415595618, "grad_norm": 0.66015625, "learning_rate": 1.3247130186451321e-05, "loss": 1.4472, "step": 4606 }, { "epoch": 0.7947899594582938, "grad_norm": 0.609375, "learning_rate": 1.3244555678844717e-05, "loss": 1.4479, "step": 4607 }, { "epoch": 0.7949624773570257, "grad_norm": 2.09375, "learning_rate": 1.3241980930861153e-05, "loss": 1.3988, "step": 4608 }, { "epoch": 0.7951349952557578, "grad_norm": 0.7890625, "learning_rate": 1.3239405942691388e-05, "loss": 1.5663, "step": 4609 }, { "epoch": 0.7953075131544898, "grad_norm": 0.66796875, "learning_rate": 1.323683071452619e-05, "loss": 1.4208, "step": 4610 }, { "epoch": 0.7954800310532217, "grad_norm": 0.62890625, "learning_rate": 1.3234255246556347e-05, "loss": 1.3979, "step": 4611 }, { "epoch": 0.7956525489519538, "grad_norm": 0.63671875, "learning_rate": 1.3231679538972668e-05, "loss": 1.4996, "step": 4612 }, { "epoch": 0.7958250668506858, "grad_norm": 0.86328125, "learning_rate": 1.3229103591965977e-05, "loss": 1.4751, "step": 4613 }, { "epoch": 0.7959975847494177, "grad_norm": 0.58984375, "learning_rate": 1.3226527405727115e-05, "loss": 1.4927, "step": 4614 }, { "epoch": 0.7961701026481497, "grad_norm": 0.62109375, "learning_rate": 1.322395098044694e-05, "loss": 1.5126, "step": 4615 }, { "epoch": 0.7963426205468818, "grad_norm": 0.66796875, "learning_rate": 1.3221374316316334e-05, "loss": 1.4083, "step": 4616 }, { "epoch": 0.7965151384456137, "grad_norm": 0.66796875, "learning_rate": 1.321879741352619e-05, "loss": 1.4313, "step": 4617 }, { "epoch": 0.7966876563443457, "grad_norm": 0.67578125, "learning_rate": 1.3216220272267421e-05, "loss": 1.4374, "step": 4618 }, { "epoch": 0.7968601742430778, "grad_norm": 0.55078125, "learning_rate": 1.321364289273096e-05, "loss": 1.4172, "step": 4619 }, { "epoch": 0.7970326921418097, "grad_norm": 0.609375, "learning_rate": 1.3211065275107754e-05, "loss": 1.469, "step": 4620 }, { "epoch": 0.7972052100405417, "grad_norm": 0.6796875, "learning_rate": 1.3208487419588768e-05, "loss": 1.4686, "step": 4621 }, { "epoch": 0.7973777279392738, "grad_norm": 0.75, "learning_rate": 1.3205909326364986e-05, "loss": 1.5068, "step": 4622 }, { "epoch": 0.7975502458380057, "grad_norm": 0.6796875, "learning_rate": 1.320333099562741e-05, "loss": 1.4224, "step": 4623 }, { "epoch": 0.7977227637367377, "grad_norm": 0.62890625, "learning_rate": 1.3200752427567056e-05, "loss": 1.4457, "step": 4624 }, { "epoch": 0.7978952816354696, "grad_norm": 0.609375, "learning_rate": 1.3198173622374966e-05, "loss": 1.3616, "step": 4625 }, { "epoch": 0.7980677995342017, "grad_norm": 0.57421875, "learning_rate": 1.319559458024219e-05, "loss": 1.4188, "step": 4626 }, { "epoch": 0.7982403174329337, "grad_norm": 0.59765625, "learning_rate": 1.31930153013598e-05, "loss": 1.4014, "step": 4627 }, { "epoch": 0.7984128353316656, "grad_norm": 0.85546875, "learning_rate": 1.3190435785918887e-05, "loss": 1.4106, "step": 4628 }, { "epoch": 0.7985853532303977, "grad_norm": 0.59375, "learning_rate": 1.3187856034110555e-05, "loss": 1.4869, "step": 4629 }, { "epoch": 0.7987578711291297, "grad_norm": 0.63671875, "learning_rate": 1.318527604612593e-05, "loss": 1.5005, "step": 4630 }, { "epoch": 0.7989303890278616, "grad_norm": 0.6875, "learning_rate": 1.3182695822156153e-05, "loss": 1.5098, "step": 4631 }, { "epoch": 0.7991029069265936, "grad_norm": 0.6328125, "learning_rate": 1.3180115362392383e-05, "loss": 1.5083, "step": 4632 }, { "epoch": 0.7992754248253257, "grad_norm": 0.58984375, "learning_rate": 1.31775346670258e-05, "loss": 1.4761, "step": 4633 }, { "epoch": 0.7994479427240576, "grad_norm": 0.55859375, "learning_rate": 1.3174953736247589e-05, "loss": 1.4419, "step": 4634 }, { "epoch": 0.7996204606227896, "grad_norm": 0.56640625, "learning_rate": 1.317237257024897e-05, "loss": 1.3964, "step": 4635 }, { "epoch": 0.7997929785215216, "grad_norm": 0.69921875, "learning_rate": 1.3169791169221168e-05, "loss": 1.3349, "step": 4636 }, { "epoch": 0.7999654964202536, "grad_norm": 0.6875, "learning_rate": 1.3167209533355432e-05, "loss": 1.3971, "step": 4637 }, { "epoch": 0.8001380143189856, "grad_norm": 0.546875, "learning_rate": 1.3164627662843024e-05, "loss": 1.4125, "step": 4638 }, { "epoch": 0.8003105322177176, "grad_norm": 0.7265625, "learning_rate": 1.3162045557875223e-05, "loss": 1.4942, "step": 4639 }, { "epoch": 0.8004830501164496, "grad_norm": 0.60546875, "learning_rate": 1.315946321864333e-05, "loss": 1.469, "step": 4640 }, { "epoch": 0.8006555680151816, "grad_norm": 0.69921875, "learning_rate": 1.3156880645338663e-05, "loss": 1.4833, "step": 4641 }, { "epoch": 0.8008280859139135, "grad_norm": 0.61328125, "learning_rate": 1.315429783815255e-05, "loss": 1.5352, "step": 4642 }, { "epoch": 0.8010006038126456, "grad_norm": 0.7109375, "learning_rate": 1.3151714797276345e-05, "loss": 1.4352, "step": 4643 }, { "epoch": 0.8011731217113776, "grad_norm": 0.74609375, "learning_rate": 1.3149131522901417e-05, "loss": 1.367, "step": 4644 }, { "epoch": 0.8013456396101095, "grad_norm": 0.64453125, "learning_rate": 1.3146548015219147e-05, "loss": 1.3862, "step": 4645 }, { "epoch": 0.8015181575088416, "grad_norm": 0.62890625, "learning_rate": 1.3143964274420941e-05, "loss": 1.3852, "step": 4646 }, { "epoch": 0.8016906754075735, "grad_norm": 0.65234375, "learning_rate": 1.3141380300698217e-05, "loss": 1.4443, "step": 4647 }, { "epoch": 0.8018631933063055, "grad_norm": 0.62109375, "learning_rate": 1.313879609424241e-05, "loss": 1.4288, "step": 4648 }, { "epoch": 0.8020357112050375, "grad_norm": 0.640625, "learning_rate": 1.3136211655244979e-05, "loss": 1.4431, "step": 4649 }, { "epoch": 0.8022082291037695, "grad_norm": 0.69921875, "learning_rate": 1.3133626983897392e-05, "loss": 1.4145, "step": 4650 }, { "epoch": 0.8023807470025015, "grad_norm": 0.61328125, "learning_rate": 1.3131042080391136e-05, "loss": 1.3694, "step": 4651 }, { "epoch": 0.8025532649012335, "grad_norm": 0.6015625, "learning_rate": 1.312845694491772e-05, "loss": 1.4015, "step": 4652 }, { "epoch": 0.8027257827999655, "grad_norm": 0.59765625, "learning_rate": 1.3125871577668665e-05, "loss": 1.4519, "step": 4653 }, { "epoch": 0.8028983006986975, "grad_norm": 0.671875, "learning_rate": 1.3123285978835517e-05, "loss": 1.4086, "step": 4654 }, { "epoch": 0.8030708185974295, "grad_norm": 0.7265625, "learning_rate": 1.3120700148609824e-05, "loss": 1.4072, "step": 4655 }, { "epoch": 0.8032433364961615, "grad_norm": 0.71875, "learning_rate": 1.3118114087183164e-05, "loss": 1.3822, "step": 4656 }, { "epoch": 0.8034158543948935, "grad_norm": 0.59375, "learning_rate": 1.3115527794747134e-05, "loss": 1.3953, "step": 4657 }, { "epoch": 0.8035883722936255, "grad_norm": 0.62890625, "learning_rate": 1.3112941271493336e-05, "loss": 1.4183, "step": 4658 }, { "epoch": 0.8037608901923574, "grad_norm": 0.64453125, "learning_rate": 1.3110354517613396e-05, "loss": 1.4297, "step": 4659 }, { "epoch": 0.8039334080910895, "grad_norm": 0.6640625, "learning_rate": 1.310776753329896e-05, "loss": 1.5198, "step": 4660 }, { "epoch": 0.8041059259898214, "grad_norm": 0.6328125, "learning_rate": 1.3105180318741691e-05, "loss": 1.3614, "step": 4661 }, { "epoch": 0.8042784438885534, "grad_norm": 0.63671875, "learning_rate": 1.3102592874133257e-05, "loss": 1.3834, "step": 4662 }, { "epoch": 0.8044509617872855, "grad_norm": 0.6171875, "learning_rate": 1.310000519966536e-05, "loss": 1.5917, "step": 4663 }, { "epoch": 0.8046234796860174, "grad_norm": 0.62890625, "learning_rate": 1.3097417295529706e-05, "loss": 1.5054, "step": 4664 }, { "epoch": 0.8047959975847494, "grad_norm": 0.60546875, "learning_rate": 1.3094829161918028e-05, "loss": 1.5035, "step": 4665 }, { "epoch": 0.8049685154834814, "grad_norm": 0.58984375, "learning_rate": 1.3092240799022065e-05, "loss": 1.3778, "step": 4666 }, { "epoch": 0.8051410333822134, "grad_norm": 0.6953125, "learning_rate": 1.3089652207033583e-05, "loss": 1.5439, "step": 4667 }, { "epoch": 0.8053135512809454, "grad_norm": 0.62890625, "learning_rate": 1.3087063386144361e-05, "loss": 1.491, "step": 4668 }, { "epoch": 0.8054860691796774, "grad_norm": 0.62109375, "learning_rate": 1.3084474336546196e-05, "loss": 1.4339, "step": 4669 }, { "epoch": 0.8056585870784094, "grad_norm": 0.6015625, "learning_rate": 1.3081885058430899e-05, "loss": 1.4916, "step": 4670 }, { "epoch": 0.8058311049771414, "grad_norm": 0.72265625, "learning_rate": 1.30792955519903e-05, "loss": 1.4565, "step": 4671 }, { "epoch": 0.8060036228758733, "grad_norm": 0.63671875, "learning_rate": 1.3076705817416242e-05, "loss": 1.5596, "step": 4672 }, { "epoch": 0.8061761407746053, "grad_norm": 0.67578125, "learning_rate": 1.3074115854900598e-05, "loss": 1.4778, "step": 4673 }, { "epoch": 0.8063486586733374, "grad_norm": 0.65625, "learning_rate": 1.3071525664635241e-05, "loss": 1.4988, "step": 4674 }, { "epoch": 0.8065211765720693, "grad_norm": 0.6875, "learning_rate": 1.306893524681207e-05, "loss": 1.3927, "step": 4675 }, { "epoch": 0.8066936944708013, "grad_norm": 0.6640625, "learning_rate": 1.3066344601623004e-05, "loss": 1.4619, "step": 4676 }, { "epoch": 0.8068662123695334, "grad_norm": 0.74609375, "learning_rate": 1.3063753729259972e-05, "loss": 1.4088, "step": 4677 }, { "epoch": 0.8070387302682653, "grad_norm": 0.60546875, "learning_rate": 1.3061162629914917e-05, "loss": 1.491, "step": 4678 }, { "epoch": 0.8072112481669973, "grad_norm": 0.58984375, "learning_rate": 1.3058571303779806e-05, "loss": 1.4972, "step": 4679 }, { "epoch": 0.8073837660657294, "grad_norm": 0.66015625, "learning_rate": 1.3055979751046624e-05, "loss": 1.4425, "step": 4680 }, { "epoch": 0.8075562839644613, "grad_norm": 0.6484375, "learning_rate": 1.3053387971907368e-05, "loss": 1.4029, "step": 4681 }, { "epoch": 0.8077288018631933, "grad_norm": 0.6015625, "learning_rate": 1.3050795966554051e-05, "loss": 1.4645, "step": 4682 }, { "epoch": 0.8079013197619253, "grad_norm": 0.55078125, "learning_rate": 1.3048203735178709e-05, "loss": 1.4078, "step": 4683 }, { "epoch": 0.8080738376606573, "grad_norm": 0.64453125, "learning_rate": 1.3045611277973385e-05, "loss": 1.5524, "step": 4684 }, { "epoch": 0.8082463555593893, "grad_norm": 0.5859375, "learning_rate": 1.3043018595130148e-05, "loss": 1.4131, "step": 4685 }, { "epoch": 0.8084188734581212, "grad_norm": 0.6484375, "learning_rate": 1.3040425686841083e-05, "loss": 1.4879, "step": 4686 }, { "epoch": 0.8085913913568533, "grad_norm": 0.66015625, "learning_rate": 1.3037832553298282e-05, "loss": 1.3294, "step": 4687 }, { "epoch": 0.8087639092555853, "grad_norm": 0.68359375, "learning_rate": 1.3035239194693865e-05, "loss": 1.4486, "step": 4688 }, { "epoch": 0.8089364271543172, "grad_norm": 0.62890625, "learning_rate": 1.3032645611219965e-05, "loss": 1.4143, "step": 4689 }, { "epoch": 0.8091089450530492, "grad_norm": 0.7578125, "learning_rate": 1.3030051803068729e-05, "loss": 1.4405, "step": 4690 }, { "epoch": 0.8092814629517813, "grad_norm": 0.61328125, "learning_rate": 1.3027457770432322e-05, "loss": 1.4022, "step": 4691 }, { "epoch": 0.8094539808505132, "grad_norm": 0.6796875, "learning_rate": 1.3024863513502926e-05, "loss": 1.4181, "step": 4692 }, { "epoch": 0.8096264987492452, "grad_norm": 0.57421875, "learning_rate": 1.3022269032472745e-05, "loss": 1.5205, "step": 4693 }, { "epoch": 0.8097990166479773, "grad_norm": 0.66796875, "learning_rate": 1.3019674327533984e-05, "loss": 1.4996, "step": 4694 }, { "epoch": 0.8099715345467092, "grad_norm": 0.62109375, "learning_rate": 1.3017079398878884e-05, "loss": 1.3902, "step": 4695 }, { "epoch": 0.8101440524454412, "grad_norm": 0.66015625, "learning_rate": 1.3014484246699693e-05, "loss": 1.4089, "step": 4696 }, { "epoch": 0.8103165703441733, "grad_norm": 0.6484375, "learning_rate": 1.3011888871188669e-05, "loss": 1.4944, "step": 4697 }, { "epoch": 0.8104890882429052, "grad_norm": 0.64453125, "learning_rate": 1.3009293272538104e-05, "loss": 1.438, "step": 4698 }, { "epoch": 0.8106616061416372, "grad_norm": 0.578125, "learning_rate": 1.3006697450940284e-05, "loss": 1.4211, "step": 4699 }, { "epoch": 0.8108341240403691, "grad_norm": 0.93359375, "learning_rate": 1.3004101406587535e-05, "loss": 1.3598, "step": 4700 }, { "epoch": 0.8108341240403691, "eval_loss": 1.4166769981384277, "eval_runtime": 10.9127, "eval_samples_per_second": 93.836, "eval_steps_per_second": 23.459, "step": 4700 }, { "epoch": 0.8110066419391012, "grad_norm": 0.62890625, "learning_rate": 1.300150513967218e-05, "loss": 1.4421, "step": 4701 }, { "epoch": 0.8111791598378332, "grad_norm": 0.69921875, "learning_rate": 1.2998908650386573e-05, "loss": 1.4649, "step": 4702 }, { "epoch": 0.8113516777365651, "grad_norm": 0.68359375, "learning_rate": 1.2996311938923074e-05, "loss": 1.5004, "step": 4703 }, { "epoch": 0.8115241956352972, "grad_norm": 0.671875, "learning_rate": 1.299371500547406e-05, "loss": 1.3757, "step": 4704 }, { "epoch": 0.8116967135340292, "grad_norm": 0.68359375, "learning_rate": 1.299111785023194e-05, "loss": 1.471, "step": 4705 }, { "epoch": 0.8118692314327611, "grad_norm": 0.5859375, "learning_rate": 1.2988520473389117e-05, "loss": 1.4475, "step": 4706 }, { "epoch": 0.8120417493314931, "grad_norm": 0.56640625, "learning_rate": 1.2985922875138025e-05, "loss": 1.417, "step": 4707 }, { "epoch": 0.8122142672302252, "grad_norm": 0.66015625, "learning_rate": 1.2983325055671108e-05, "loss": 1.4515, "step": 4708 }, { "epoch": 0.8123867851289571, "grad_norm": 0.63671875, "learning_rate": 1.2980727015180833e-05, "loss": 1.4563, "step": 4709 }, { "epoch": 0.8125593030276891, "grad_norm": 0.92578125, "learning_rate": 1.2978128753859674e-05, "loss": 1.4191, "step": 4710 }, { "epoch": 0.8127318209264212, "grad_norm": 0.6171875, "learning_rate": 1.2975530271900127e-05, "loss": 1.3702, "step": 4711 }, { "epoch": 0.8129043388251531, "grad_norm": 0.69140625, "learning_rate": 1.2972931569494707e-05, "loss": 1.4901, "step": 4712 }, { "epoch": 0.8130768567238851, "grad_norm": 0.7734375, "learning_rate": 1.2970332646835942e-05, "loss": 1.4811, "step": 4713 }, { "epoch": 0.8132493746226171, "grad_norm": 0.6328125, "learning_rate": 1.2967733504116375e-05, "loss": 1.4553, "step": 4714 }, { "epoch": 0.8134218925213491, "grad_norm": 0.578125, "learning_rate": 1.2965134141528565e-05, "loss": 1.4019, "step": 4715 }, { "epoch": 0.8135944104200811, "grad_norm": 0.61328125, "learning_rate": 1.2962534559265092e-05, "loss": 1.435, "step": 4716 }, { "epoch": 0.813766928318813, "grad_norm": 0.765625, "learning_rate": 1.2959934757518548e-05, "loss": 1.4376, "step": 4717 }, { "epoch": 0.8139394462175451, "grad_norm": 0.59375, "learning_rate": 1.2957334736481544e-05, "loss": 1.4521, "step": 4718 }, { "epoch": 0.8141119641162771, "grad_norm": 0.73046875, "learning_rate": 1.2954734496346704e-05, "loss": 1.4023, "step": 4719 }, { "epoch": 0.814284482015009, "grad_norm": 0.87109375, "learning_rate": 1.2952134037306667e-05, "loss": 1.4733, "step": 4720 }, { "epoch": 0.8144569999137411, "grad_norm": 0.59375, "learning_rate": 1.2949533359554099e-05, "loss": 1.4591, "step": 4721 }, { "epoch": 0.8146295178124731, "grad_norm": 0.6328125, "learning_rate": 1.2946932463281667e-05, "loss": 1.4012, "step": 4722 }, { "epoch": 0.814802035711205, "grad_norm": 0.83203125, "learning_rate": 1.2944331348682067e-05, "loss": 1.476, "step": 4723 }, { "epoch": 0.814974553609937, "grad_norm": 0.6015625, "learning_rate": 1.2941730015948001e-05, "loss": 1.4007, "step": 4724 }, { "epoch": 0.815147071508669, "grad_norm": 0.63671875, "learning_rate": 1.2939128465272197e-05, "loss": 1.3761, "step": 4725 }, { "epoch": 0.815319589407401, "grad_norm": 0.640625, "learning_rate": 1.2936526696847392e-05, "loss": 1.3495, "step": 4726 }, { "epoch": 0.815492107306133, "grad_norm": 0.6171875, "learning_rate": 1.2933924710866342e-05, "loss": 1.4033, "step": 4727 }, { "epoch": 0.815664625204865, "grad_norm": 0.5859375, "learning_rate": 1.2931322507521813e-05, "loss": 1.456, "step": 4728 }, { "epoch": 0.815837143103597, "grad_norm": 0.56640625, "learning_rate": 1.29287200870066e-05, "loss": 1.3995, "step": 4729 }, { "epoch": 0.816009661002329, "grad_norm": 0.6640625, "learning_rate": 1.2926117449513505e-05, "loss": 1.4896, "step": 4730 }, { "epoch": 0.816182178901061, "grad_norm": 0.7265625, "learning_rate": 1.2923514595235345e-05, "loss": 1.4255, "step": 4731 }, { "epoch": 0.816354696799793, "grad_norm": 0.671875, "learning_rate": 1.2920911524364954e-05, "loss": 1.4791, "step": 4732 }, { "epoch": 0.816527214698525, "grad_norm": 0.57421875, "learning_rate": 1.291830823709519e-05, "loss": 1.4537, "step": 4733 }, { "epoch": 0.8166997325972569, "grad_norm": 0.69140625, "learning_rate": 1.2915704733618917e-05, "loss": 1.5038, "step": 4734 }, { "epoch": 0.816872250495989, "grad_norm": 0.84375, "learning_rate": 1.2913101014129017e-05, "loss": 1.5189, "step": 4735 }, { "epoch": 0.817044768394721, "grad_norm": 0.62109375, "learning_rate": 1.291049707881839e-05, "loss": 1.3664, "step": 4736 }, { "epoch": 0.8172172862934529, "grad_norm": 0.6875, "learning_rate": 1.2907892927879959e-05, "loss": 1.3988, "step": 4737 }, { "epoch": 0.817389804192185, "grad_norm": 0.62890625, "learning_rate": 1.2905288561506649e-05, "loss": 1.4323, "step": 4738 }, { "epoch": 0.817562322090917, "grad_norm": 0.60546875, "learning_rate": 1.2902683979891406e-05, "loss": 1.4768, "step": 4739 }, { "epoch": 0.8177348399896489, "grad_norm": 0.640625, "learning_rate": 1.29000791832272e-05, "loss": 1.4621, "step": 4740 }, { "epoch": 0.8179073578883809, "grad_norm": 0.65234375, "learning_rate": 1.2897474171707008e-05, "loss": 1.3043, "step": 4741 }, { "epoch": 0.8180798757871129, "grad_norm": 0.68359375, "learning_rate": 1.2894868945523826e-05, "loss": 1.4202, "step": 4742 }, { "epoch": 0.8182523936858449, "grad_norm": 0.6484375, "learning_rate": 1.2892263504870662e-05, "loss": 1.4528, "step": 4743 }, { "epoch": 0.8184249115845769, "grad_norm": 0.65625, "learning_rate": 1.2889657849940547e-05, "loss": 1.4524, "step": 4744 }, { "epoch": 0.8185974294833089, "grad_norm": 0.63671875, "learning_rate": 1.2887051980926522e-05, "loss": 1.4295, "step": 4745 }, { "epoch": 0.8187699473820409, "grad_norm": 0.68359375, "learning_rate": 1.2884445898021649e-05, "loss": 1.4305, "step": 4746 }, { "epoch": 0.8189424652807729, "grad_norm": 0.59765625, "learning_rate": 1.2881839601419e-05, "loss": 1.5478, "step": 4747 }, { "epoch": 0.8191149831795048, "grad_norm": 0.5859375, "learning_rate": 1.2879233091311667e-05, "loss": 1.6081, "step": 4748 }, { "epoch": 0.8192875010782369, "grad_norm": 0.6171875, "learning_rate": 1.2876626367892754e-05, "loss": 1.4978, "step": 4749 }, { "epoch": 0.8194600189769689, "grad_norm": 0.62109375, "learning_rate": 1.2874019431355392e-05, "loss": 1.4072, "step": 4750 }, { "epoch": 0.8196325368757008, "grad_norm": 0.57421875, "learning_rate": 1.2871412281892705e-05, "loss": 1.4681, "step": 4751 }, { "epoch": 0.8198050547744329, "grad_norm": 0.58984375, "learning_rate": 1.2868804919697858e-05, "loss": 1.4266, "step": 4752 }, { "epoch": 0.8199775726731648, "grad_norm": 0.58203125, "learning_rate": 1.286619734496402e-05, "loss": 1.4385, "step": 4753 }, { "epoch": 0.8201500905718968, "grad_norm": 0.6875, "learning_rate": 1.2863589557884371e-05, "loss": 1.5308, "step": 4754 }, { "epoch": 0.8203226084706289, "grad_norm": 0.62890625, "learning_rate": 1.2860981558652114e-05, "loss": 1.3169, "step": 4755 }, { "epoch": 0.8204951263693608, "grad_norm": 2.109375, "learning_rate": 1.2858373347460469e-05, "loss": 1.4768, "step": 4756 }, { "epoch": 0.8206676442680928, "grad_norm": 0.765625, "learning_rate": 1.2855764924502665e-05, "loss": 1.4356, "step": 4757 }, { "epoch": 0.8208401621668248, "grad_norm": 0.61328125, "learning_rate": 1.2853156289971955e-05, "loss": 1.5222, "step": 4758 }, { "epoch": 0.8210126800655568, "grad_norm": 0.5859375, "learning_rate": 1.2850547444061597e-05, "loss": 1.523, "step": 4759 }, { "epoch": 0.8211851979642888, "grad_norm": 0.65625, "learning_rate": 1.2847938386964871e-05, "loss": 1.4683, "step": 4760 }, { "epoch": 0.8213577158630208, "grad_norm": 0.62109375, "learning_rate": 1.2845329118875079e-05, "loss": 1.4199, "step": 4761 }, { "epoch": 0.8215302337617528, "grad_norm": 0.65625, "learning_rate": 1.2842719639985525e-05, "loss": 1.4601, "step": 4762 }, { "epoch": 0.8217027516604848, "grad_norm": 0.56640625, "learning_rate": 1.2840109950489538e-05, "loss": 1.3938, "step": 4763 }, { "epoch": 0.8218752695592167, "grad_norm": 1.109375, "learning_rate": 1.2837500050580463e-05, "loss": 1.3915, "step": 4764 }, { "epoch": 0.8220477874579487, "grad_norm": 0.69921875, "learning_rate": 1.2834889940451652e-05, "loss": 1.3742, "step": 4765 }, { "epoch": 0.8222203053566808, "grad_norm": 0.62109375, "learning_rate": 1.2832279620296481e-05, "loss": 1.4963, "step": 4766 }, { "epoch": 0.8223928232554127, "grad_norm": 0.60546875, "learning_rate": 1.2829669090308339e-05, "loss": 1.4451, "step": 4767 }, { "epoch": 0.8225653411541447, "grad_norm": 0.66015625, "learning_rate": 1.2827058350680632e-05, "loss": 1.4309, "step": 4768 }, { "epoch": 0.8227378590528768, "grad_norm": 0.59375, "learning_rate": 1.2824447401606776e-05, "loss": 1.5286, "step": 4769 }, { "epoch": 0.8229103769516087, "grad_norm": 0.72265625, "learning_rate": 1.2821836243280209e-05, "loss": 1.41, "step": 4770 }, { "epoch": 0.8230828948503407, "grad_norm": 0.73828125, "learning_rate": 1.281922487589438e-05, "loss": 1.5432, "step": 4771 }, { "epoch": 0.8232554127490728, "grad_norm": 0.60546875, "learning_rate": 1.2816613299642758e-05, "loss": 1.4329, "step": 4772 }, { "epoch": 0.8234279306478047, "grad_norm": 0.62109375, "learning_rate": 1.2814001514718824e-05, "loss": 1.4705, "step": 4773 }, { "epoch": 0.8236004485465367, "grad_norm": 0.73046875, "learning_rate": 1.2811389521316077e-05, "loss": 1.4188, "step": 4774 }, { "epoch": 0.8237729664452687, "grad_norm": 0.77734375, "learning_rate": 1.2808777319628025e-05, "loss": 1.4128, "step": 4775 }, { "epoch": 0.8239454843440007, "grad_norm": 0.609375, "learning_rate": 1.28061649098482e-05, "loss": 1.4184, "step": 4776 }, { "epoch": 0.8241180022427327, "grad_norm": 0.64453125, "learning_rate": 1.2803552292170145e-05, "loss": 1.4397, "step": 4777 }, { "epoch": 0.8242905201414646, "grad_norm": 0.7578125, "learning_rate": 1.280093946678742e-05, "loss": 1.5588, "step": 4778 }, { "epoch": 0.8244630380401967, "grad_norm": 0.6796875, "learning_rate": 1.2798326433893598e-05, "loss": 1.5087, "step": 4779 }, { "epoch": 0.8246355559389287, "grad_norm": 0.92578125, "learning_rate": 1.2795713193682266e-05, "loss": 1.4846, "step": 4780 }, { "epoch": 0.8248080738376606, "grad_norm": 0.6328125, "learning_rate": 1.2793099746347034e-05, "loss": 1.448, "step": 4781 }, { "epoch": 0.8249805917363926, "grad_norm": 0.640625, "learning_rate": 1.2790486092081522e-05, "loss": 1.4624, "step": 4782 }, { "epoch": 0.8251531096351247, "grad_norm": 0.62109375, "learning_rate": 1.2787872231079363e-05, "loss": 1.4425, "step": 4783 }, { "epoch": 0.8253256275338566, "grad_norm": 0.59765625, "learning_rate": 1.2785258163534211e-05, "loss": 1.4563, "step": 4784 }, { "epoch": 0.8254981454325886, "grad_norm": 0.6171875, "learning_rate": 1.2782643889639727e-05, "loss": 1.472, "step": 4785 }, { "epoch": 0.8256706633313207, "grad_norm": 0.5859375, "learning_rate": 1.2780029409589603e-05, "loss": 1.4048, "step": 4786 }, { "epoch": 0.8258431812300526, "grad_norm": 0.6875, "learning_rate": 1.2777414723577527e-05, "loss": 1.5023, "step": 4787 }, { "epoch": 0.8260156991287846, "grad_norm": 0.640625, "learning_rate": 1.2774799831797214e-05, "loss": 1.4089, "step": 4788 }, { "epoch": 0.8261882170275167, "grad_norm": 0.60546875, "learning_rate": 1.277218473444239e-05, "loss": 1.4833, "step": 4789 }, { "epoch": 0.8263607349262486, "grad_norm": 0.70703125, "learning_rate": 1.2769569431706804e-05, "loss": 1.4398, "step": 4790 }, { "epoch": 0.8265332528249806, "grad_norm": 0.60546875, "learning_rate": 1.2766953923784207e-05, "loss": 1.4302, "step": 4791 }, { "epoch": 0.8267057707237125, "grad_norm": 0.64453125, "learning_rate": 1.2764338210868372e-05, "loss": 1.4379, "step": 4792 }, { "epoch": 0.8268782886224446, "grad_norm": 0.59765625, "learning_rate": 1.2761722293153094e-05, "loss": 1.3314, "step": 4793 }, { "epoch": 0.8270508065211766, "grad_norm": 0.8046875, "learning_rate": 1.2759106170832172e-05, "loss": 1.4575, "step": 4794 }, { "epoch": 0.8272233244199085, "grad_norm": 0.71484375, "learning_rate": 1.2756489844099424e-05, "loss": 1.4598, "step": 4795 }, { "epoch": 0.8273958423186406, "grad_norm": 0.609375, "learning_rate": 1.2753873313148683e-05, "loss": 1.3463, "step": 4796 }, { "epoch": 0.8275683602173726, "grad_norm": 0.64453125, "learning_rate": 1.2751256578173803e-05, "loss": 1.4494, "step": 4797 }, { "epoch": 0.8277408781161045, "grad_norm": 0.6875, "learning_rate": 1.2748639639368645e-05, "loss": 1.5213, "step": 4798 }, { "epoch": 0.8279133960148365, "grad_norm": 0.67578125, "learning_rate": 1.2746022496927086e-05, "loss": 1.3503, "step": 4799 }, { "epoch": 0.8280859139135686, "grad_norm": 0.6796875, "learning_rate": 1.2743405151043025e-05, "loss": 1.4791, "step": 4800 }, { "epoch": 0.8280859139135686, "eval_loss": 1.4158532619476318, "eval_runtime": 10.8421, "eval_samples_per_second": 94.447, "eval_steps_per_second": 23.612, "step": 4800 }, { "epoch": 0.8282584318123005, "grad_norm": 0.5859375, "learning_rate": 1.2740787601910365e-05, "loss": 1.495, "step": 4801 }, { "epoch": 0.8284309497110325, "grad_norm": 0.6484375, "learning_rate": 1.2738169849723039e-05, "loss": 1.428, "step": 4802 }, { "epoch": 0.8286034676097646, "grad_norm": 0.62890625, "learning_rate": 1.2735551894674978e-05, "loss": 1.4787, "step": 4803 }, { "epoch": 0.8287759855084965, "grad_norm": 0.609375, "learning_rate": 1.2732933736960143e-05, "loss": 1.4055, "step": 4804 }, { "epoch": 0.8289485034072285, "grad_norm": 0.7578125, "learning_rate": 1.2730315376772498e-05, "loss": 1.3993, "step": 4805 }, { "epoch": 0.8291210213059605, "grad_norm": 0.7109375, "learning_rate": 1.2727696814306034e-05, "loss": 1.343, "step": 4806 }, { "epoch": 0.8292935392046925, "grad_norm": 0.671875, "learning_rate": 1.2725078049754742e-05, "loss": 1.4253, "step": 4807 }, { "epoch": 0.8294660571034245, "grad_norm": 0.62109375, "learning_rate": 1.2722459083312645e-05, "loss": 1.3983, "step": 4808 }, { "epoch": 0.8296385750021564, "grad_norm": 0.6171875, "learning_rate": 1.2719839915173764e-05, "loss": 1.3923, "step": 4809 }, { "epoch": 0.8298110929008885, "grad_norm": 0.69140625, "learning_rate": 1.2717220545532151e-05, "loss": 1.3616, "step": 4810 }, { "epoch": 0.8299836107996205, "grad_norm": 0.6328125, "learning_rate": 1.271460097458186e-05, "loss": 1.3973, "step": 4811 }, { "epoch": 0.8301561286983524, "grad_norm": 0.6953125, "learning_rate": 1.271198120251697e-05, "loss": 1.3502, "step": 4812 }, { "epoch": 0.8303286465970845, "grad_norm": 0.67578125, "learning_rate": 1.2709361229531565e-05, "loss": 1.3808, "step": 4813 }, { "epoch": 0.8305011644958165, "grad_norm": 0.65625, "learning_rate": 1.2706741055819753e-05, "loss": 1.4364, "step": 4814 }, { "epoch": 0.8306736823945484, "grad_norm": 0.640625, "learning_rate": 1.270412068157565e-05, "loss": 1.3211, "step": 4815 }, { "epoch": 0.8308462002932804, "grad_norm": 0.72265625, "learning_rate": 1.2701500106993389e-05, "loss": 1.4494, "step": 4816 }, { "epoch": 0.8310187181920125, "grad_norm": 0.6875, "learning_rate": 1.269887933226712e-05, "loss": 1.4341, "step": 4817 }, { "epoch": 0.8311912360907444, "grad_norm": 0.703125, "learning_rate": 1.269625835759101e-05, "loss": 1.3866, "step": 4818 }, { "epoch": 0.8313637539894764, "grad_norm": 0.7109375, "learning_rate": 1.2693637183159231e-05, "loss": 1.4771, "step": 4819 }, { "epoch": 0.8315362718882084, "grad_norm": 0.83203125, "learning_rate": 1.269101580916598e-05, "loss": 1.4753, "step": 4820 }, { "epoch": 0.8317087897869404, "grad_norm": 0.65234375, "learning_rate": 1.2688394235805466e-05, "loss": 1.4644, "step": 4821 }, { "epoch": 0.8318813076856724, "grad_norm": 0.625, "learning_rate": 1.2685772463271909e-05, "loss": 1.4329, "step": 4822 }, { "epoch": 0.8320538255844043, "grad_norm": 0.8203125, "learning_rate": 1.2683150491759544e-05, "loss": 1.4185, "step": 4823 }, { "epoch": 0.8322263434831364, "grad_norm": 0.77734375, "learning_rate": 1.2680528321462624e-05, "loss": 1.4757, "step": 4824 }, { "epoch": 0.8323988613818684, "grad_norm": 0.58203125, "learning_rate": 1.2677905952575417e-05, "loss": 1.4213, "step": 4825 }, { "epoch": 0.8325713792806003, "grad_norm": 0.6796875, "learning_rate": 1.2675283385292212e-05, "loss": 1.4002, "step": 4826 }, { "epoch": 0.8327438971793324, "grad_norm": 0.7734375, "learning_rate": 1.2672660619807291e-05, "loss": 1.3573, "step": 4827 }, { "epoch": 0.8329164150780644, "grad_norm": 0.7421875, "learning_rate": 1.2670037656314973e-05, "loss": 1.3828, "step": 4828 }, { "epoch": 0.8330889329767963, "grad_norm": 0.63671875, "learning_rate": 1.2667414495009583e-05, "loss": 1.394, "step": 4829 }, { "epoch": 0.8332614508755284, "grad_norm": 0.58984375, "learning_rate": 1.2664791136085462e-05, "loss": 1.4278, "step": 4830 }, { "epoch": 0.8334339687742603, "grad_norm": 0.64453125, "learning_rate": 1.2662167579736961e-05, "loss": 1.4386, "step": 4831 }, { "epoch": 0.8336064866729923, "grad_norm": 0.66796875, "learning_rate": 1.2659543826158451e-05, "loss": 1.3753, "step": 4832 }, { "epoch": 0.8337790045717243, "grad_norm": 0.62890625, "learning_rate": 1.2656919875544316e-05, "loss": 1.3979, "step": 4833 }, { "epoch": 0.8339515224704563, "grad_norm": 0.62890625, "learning_rate": 1.2654295728088959e-05, "loss": 1.5161, "step": 4834 }, { "epoch": 0.8341240403691883, "grad_norm": 0.64453125, "learning_rate": 1.2651671383986788e-05, "loss": 1.4717, "step": 4835 }, { "epoch": 0.8342965582679203, "grad_norm": 0.68359375, "learning_rate": 1.2649046843432232e-05, "loss": 1.4729, "step": 4836 }, { "epoch": 0.8344690761666523, "grad_norm": 0.5546875, "learning_rate": 1.2646422106619733e-05, "loss": 1.4337, "step": 4837 }, { "epoch": 0.8346415940653843, "grad_norm": 0.62109375, "learning_rate": 1.2643797173743753e-05, "loss": 1.3985, "step": 4838 }, { "epoch": 0.8348141119641163, "grad_norm": 3.234375, "learning_rate": 1.2641172044998754e-05, "loss": 1.3862, "step": 4839 }, { "epoch": 0.8349866298628482, "grad_norm": 0.69921875, "learning_rate": 1.263854672057923e-05, "loss": 1.2768, "step": 4840 }, { "epoch": 0.8351591477615803, "grad_norm": 0.65234375, "learning_rate": 1.2635921200679677e-05, "loss": 1.4262, "step": 4841 }, { "epoch": 0.8353316656603123, "grad_norm": 0.65234375, "learning_rate": 1.2633295485494614e-05, "loss": 1.3765, "step": 4842 }, { "epoch": 0.8355041835590442, "grad_norm": 0.73046875, "learning_rate": 1.2630669575218568e-05, "loss": 1.3913, "step": 4843 }, { "epoch": 0.8356767014577763, "grad_norm": 0.7109375, "learning_rate": 1.2628043470046078e-05, "loss": 1.4059, "step": 4844 }, { "epoch": 0.8358492193565082, "grad_norm": 0.65234375, "learning_rate": 1.2625417170171712e-05, "loss": 1.4799, "step": 4845 }, { "epoch": 0.8360217372552402, "grad_norm": 0.69140625, "learning_rate": 1.2622790675790033e-05, "loss": 1.3965, "step": 4846 }, { "epoch": 0.8361942551539723, "grad_norm": 0.62890625, "learning_rate": 1.2620163987095637e-05, "loss": 1.4537, "step": 4847 }, { "epoch": 0.8363667730527042, "grad_norm": 0.6015625, "learning_rate": 1.2617537104283119e-05, "loss": 1.3701, "step": 4848 }, { "epoch": 0.8365392909514362, "grad_norm": 0.59375, "learning_rate": 1.2614910027547096e-05, "loss": 1.3818, "step": 4849 }, { "epoch": 0.8367118088501682, "grad_norm": 0.6640625, "learning_rate": 1.2612282757082204e-05, "loss": 1.5375, "step": 4850 }, { "epoch": 0.8368843267489002, "grad_norm": 0.68359375, "learning_rate": 1.2609655293083079e-05, "loss": 1.4179, "step": 4851 }, { "epoch": 0.8370568446476322, "grad_norm": 0.68359375, "learning_rate": 1.2607027635744384e-05, "loss": 1.3935, "step": 4852 }, { "epoch": 0.8372293625463642, "grad_norm": 0.7265625, "learning_rate": 1.2604399785260794e-05, "loss": 1.4925, "step": 4853 }, { "epoch": 0.8374018804450962, "grad_norm": 0.69921875, "learning_rate": 1.2601771741826996e-05, "loss": 1.4307, "step": 4854 }, { "epoch": 0.8375743983438282, "grad_norm": 0.72265625, "learning_rate": 1.259914350563769e-05, "loss": 1.3686, "step": 4855 }, { "epoch": 0.8377469162425601, "grad_norm": 0.60546875, "learning_rate": 1.259651507688759e-05, "loss": 1.3841, "step": 4856 }, { "epoch": 0.8379194341412921, "grad_norm": 0.703125, "learning_rate": 1.259388645577143e-05, "loss": 1.3666, "step": 4857 }, { "epoch": 0.8380919520400242, "grad_norm": 0.6328125, "learning_rate": 1.2591257642483959e-05, "loss": 1.532, "step": 4858 }, { "epoch": 0.8382644699387561, "grad_norm": 0.7109375, "learning_rate": 1.2588628637219927e-05, "loss": 1.4264, "step": 4859 }, { "epoch": 0.8384369878374881, "grad_norm": 0.6171875, "learning_rate": 1.258599944017411e-05, "loss": 1.4054, "step": 4860 }, { "epoch": 0.8386095057362202, "grad_norm": 0.640625, "learning_rate": 1.2583370051541298e-05, "loss": 1.3527, "step": 4861 }, { "epoch": 0.8387820236349521, "grad_norm": 0.6875, "learning_rate": 1.258074047151629e-05, "loss": 1.4412, "step": 4862 }, { "epoch": 0.8389545415336841, "grad_norm": 0.76171875, "learning_rate": 1.2578110700293906e-05, "loss": 1.6089, "step": 4863 }, { "epoch": 0.8391270594324162, "grad_norm": 0.68359375, "learning_rate": 1.2575480738068971e-05, "loss": 1.4927, "step": 4864 }, { "epoch": 0.8392995773311481, "grad_norm": 0.65625, "learning_rate": 1.2572850585036325e-05, "loss": 1.3671, "step": 4865 }, { "epoch": 0.8394720952298801, "grad_norm": 0.7421875, "learning_rate": 1.2570220241390838e-05, "loss": 1.3574, "step": 4866 }, { "epoch": 0.839644613128612, "grad_norm": 0.671875, "learning_rate": 1.2567589707327375e-05, "loss": 1.4596, "step": 4867 }, { "epoch": 0.8398171310273441, "grad_norm": 0.671875, "learning_rate": 1.256495898304082e-05, "loss": 1.5785, "step": 4868 }, { "epoch": 0.8399896489260761, "grad_norm": 0.67578125, "learning_rate": 1.2562328068726081e-05, "loss": 1.3818, "step": 4869 }, { "epoch": 0.840162166824808, "grad_norm": 0.59375, "learning_rate": 1.2559696964578068e-05, "loss": 1.4781, "step": 4870 }, { "epoch": 0.8403346847235401, "grad_norm": 0.640625, "learning_rate": 1.2557065670791708e-05, "loss": 1.3785, "step": 4871 }, { "epoch": 0.8405072026222721, "grad_norm": 0.59765625, "learning_rate": 1.2554434187561949e-05, "loss": 1.4297, "step": 4872 }, { "epoch": 0.840679720521004, "grad_norm": 0.60546875, "learning_rate": 1.2551802515083742e-05, "loss": 1.5078, "step": 4873 }, { "epoch": 0.840852238419736, "grad_norm": 0.63671875, "learning_rate": 1.2549170653552062e-05, "loss": 1.6393, "step": 4874 }, { "epoch": 0.8410247563184681, "grad_norm": 0.6484375, "learning_rate": 1.254653860316189e-05, "loss": 1.4881, "step": 4875 }, { "epoch": 0.8411972742172, "grad_norm": 0.6015625, "learning_rate": 1.2543906364108227e-05, "loss": 1.4819, "step": 4876 }, { "epoch": 0.841369792115932, "grad_norm": 0.60546875, "learning_rate": 1.2541273936586088e-05, "loss": 1.288, "step": 4877 }, { "epoch": 0.8415423100146641, "grad_norm": 0.67578125, "learning_rate": 1.2538641320790494e-05, "loss": 1.5058, "step": 4878 }, { "epoch": 0.841714827913396, "grad_norm": 0.61328125, "learning_rate": 1.2536008516916491e-05, "loss": 1.5025, "step": 4879 }, { "epoch": 0.841887345812128, "grad_norm": 0.60546875, "learning_rate": 1.253337552515913e-05, "loss": 1.5237, "step": 4880 }, { "epoch": 0.8420598637108601, "grad_norm": 0.609375, "learning_rate": 1.253074234571348e-05, "loss": 1.3913, "step": 4881 }, { "epoch": 0.842232381609592, "grad_norm": 0.60546875, "learning_rate": 1.2528108978774627e-05, "loss": 1.4599, "step": 4882 }, { "epoch": 0.842404899508324, "grad_norm": 0.6171875, "learning_rate": 1.2525475424537664e-05, "loss": 1.468, "step": 4883 }, { "epoch": 0.8425774174070559, "grad_norm": 0.6640625, "learning_rate": 1.25228416831977e-05, "loss": 1.5137, "step": 4884 }, { "epoch": 0.842749935305788, "grad_norm": 0.6484375, "learning_rate": 1.2520207754949861e-05, "loss": 1.3381, "step": 4885 }, { "epoch": 0.84292245320452, "grad_norm": 0.6171875, "learning_rate": 1.2517573639989284e-05, "loss": 1.449, "step": 4886 }, { "epoch": 0.8430949711032519, "grad_norm": 0.62890625, "learning_rate": 1.2514939338511123e-05, "loss": 1.486, "step": 4887 }, { "epoch": 0.843267489001984, "grad_norm": 0.60546875, "learning_rate": 1.2512304850710542e-05, "loss": 1.5363, "step": 4888 }, { "epoch": 0.843440006900716, "grad_norm": 0.6484375, "learning_rate": 1.2509670176782718e-05, "loss": 1.3917, "step": 4889 }, { "epoch": 0.8436125247994479, "grad_norm": 0.60546875, "learning_rate": 1.2507035316922847e-05, "loss": 1.4611, "step": 4890 }, { "epoch": 0.8437850426981799, "grad_norm": 0.5546875, "learning_rate": 1.2504400271326136e-05, "loss": 1.3827, "step": 4891 }, { "epoch": 0.843957560596912, "grad_norm": 0.70703125, "learning_rate": 1.2501765040187804e-05, "loss": 1.4403, "step": 4892 }, { "epoch": 0.8441300784956439, "grad_norm": 0.57421875, "learning_rate": 1.2499129623703086e-05, "loss": 1.4834, "step": 4893 }, { "epoch": 0.8443025963943759, "grad_norm": 0.6328125, "learning_rate": 1.249649402206723e-05, "loss": 1.4435, "step": 4894 }, { "epoch": 0.844475114293108, "grad_norm": 0.671875, "learning_rate": 1.2493858235475497e-05, "loss": 1.4356, "step": 4895 }, { "epoch": 0.8446476321918399, "grad_norm": 0.59375, "learning_rate": 1.2491222264123166e-05, "loss": 1.4481, "step": 4896 }, { "epoch": 0.8448201500905719, "grad_norm": 0.6171875, "learning_rate": 1.2488586108205521e-05, "loss": 1.497, "step": 4897 }, { "epoch": 0.8449926679893038, "grad_norm": 0.640625, "learning_rate": 1.2485949767917869e-05, "loss": 1.3873, "step": 4898 }, { "epoch": 0.8451651858880359, "grad_norm": 0.8515625, "learning_rate": 1.2483313243455526e-05, "loss": 1.4457, "step": 4899 }, { "epoch": 0.8453377037867679, "grad_norm": 0.70703125, "learning_rate": 1.2480676535013821e-05, "loss": 1.3509, "step": 4900 }, { "epoch": 0.8453377037867679, "eval_loss": 1.4150875806808472, "eval_runtime": 10.8763, "eval_samples_per_second": 94.15, "eval_steps_per_second": 23.537, "step": 4900 }, { "epoch": 0.8455102216854998, "grad_norm": 0.71875, "learning_rate": 1.2478039642788097e-05, "loss": 1.3508, "step": 4901 }, { "epoch": 0.8456827395842319, "grad_norm": 0.58203125, "learning_rate": 1.2475402566973715e-05, "loss": 1.431, "step": 4902 }, { "epoch": 0.8458552574829639, "grad_norm": 0.5859375, "learning_rate": 1.2472765307766045e-05, "loss": 1.5606, "step": 4903 }, { "epoch": 0.8460277753816958, "grad_norm": 0.69921875, "learning_rate": 1.2470127865360467e-05, "loss": 1.4353, "step": 4904 }, { "epoch": 0.8462002932804279, "grad_norm": 0.609375, "learning_rate": 1.2467490239952382e-05, "loss": 1.3749, "step": 4905 }, { "epoch": 0.8463728111791599, "grad_norm": 0.64453125, "learning_rate": 1.2464852431737205e-05, "loss": 1.4545, "step": 4906 }, { "epoch": 0.8465453290778918, "grad_norm": 0.57421875, "learning_rate": 1.2462214440910359e-05, "loss": 1.4226, "step": 4907 }, { "epoch": 0.8467178469766238, "grad_norm": 0.8046875, "learning_rate": 1.2459576267667281e-05, "loss": 1.3848, "step": 4908 }, { "epoch": 0.8468903648753558, "grad_norm": 0.5625, "learning_rate": 1.2456937912203426e-05, "loss": 1.4049, "step": 4909 }, { "epoch": 0.8470628827740878, "grad_norm": 0.609375, "learning_rate": 1.2454299374714258e-05, "loss": 1.4062, "step": 4910 }, { "epoch": 0.8472354006728198, "grad_norm": 0.90625, "learning_rate": 1.2451660655395258e-05, "loss": 1.4654, "step": 4911 }, { "epoch": 0.8474079185715518, "grad_norm": 0.6796875, "learning_rate": 1.2449021754441919e-05, "loss": 1.4948, "step": 4912 }, { "epoch": 0.8475804364702838, "grad_norm": 0.64453125, "learning_rate": 1.2446382672049741e-05, "loss": 1.4434, "step": 4913 }, { "epoch": 0.8477529543690158, "grad_norm": 0.61328125, "learning_rate": 1.2443743408414256e-05, "loss": 1.4044, "step": 4914 }, { "epoch": 0.8479254722677477, "grad_norm": 0.64453125, "learning_rate": 1.2441103963730985e-05, "loss": 1.4474, "step": 4915 }, { "epoch": 0.8480979901664798, "grad_norm": 0.62890625, "learning_rate": 1.2438464338195481e-05, "loss": 1.4127, "step": 4916 }, { "epoch": 0.8482705080652118, "grad_norm": 0.60546875, "learning_rate": 1.2435824532003304e-05, "loss": 1.4637, "step": 4917 }, { "epoch": 0.8484430259639437, "grad_norm": 0.63671875, "learning_rate": 1.2433184545350026e-05, "loss": 1.4083, "step": 4918 }, { "epoch": 0.8486155438626758, "grad_norm": 0.5625, "learning_rate": 1.2430544378431233e-05, "loss": 1.4099, "step": 4919 }, { "epoch": 0.8487880617614078, "grad_norm": 0.58203125, "learning_rate": 1.2427904031442526e-05, "loss": 1.3869, "step": 4920 }, { "epoch": 0.8489605796601397, "grad_norm": 0.609375, "learning_rate": 1.2425263504579517e-05, "loss": 1.439, "step": 4921 }, { "epoch": 0.8491330975588718, "grad_norm": 0.58984375, "learning_rate": 1.2422622798037833e-05, "loss": 1.3557, "step": 4922 }, { "epoch": 0.8493056154576037, "grad_norm": 0.58984375, "learning_rate": 1.2419981912013116e-05, "loss": 1.5707, "step": 4923 }, { "epoch": 0.8494781333563357, "grad_norm": 0.625, "learning_rate": 1.241734084670102e-05, "loss": 1.4085, "step": 4924 }, { "epoch": 0.8496506512550677, "grad_norm": 0.578125, "learning_rate": 1.2414699602297206e-05, "loss": 1.4046, "step": 4925 }, { "epoch": 0.8498231691537997, "grad_norm": 0.6015625, "learning_rate": 1.2412058178997362e-05, "loss": 1.34, "step": 4926 }, { "epoch": 0.8499956870525317, "grad_norm": 0.640625, "learning_rate": 1.2409416576997176e-05, "loss": 1.3569, "step": 4927 }, { "epoch": 0.8501682049512637, "grad_norm": 0.5859375, "learning_rate": 1.2406774796492353e-05, "loss": 1.4416, "step": 4928 }, { "epoch": 0.8503407228499957, "grad_norm": 0.90234375, "learning_rate": 1.2404132837678613e-05, "loss": 1.4531, "step": 4929 }, { "epoch": 0.8505132407487277, "grad_norm": 0.5703125, "learning_rate": 1.2401490700751695e-05, "loss": 1.4516, "step": 4930 }, { "epoch": 0.8506857586474597, "grad_norm": 0.703125, "learning_rate": 1.239884838590734e-05, "loss": 1.45, "step": 4931 }, { "epoch": 0.8508582765461916, "grad_norm": 0.61328125, "learning_rate": 1.2396205893341306e-05, "loss": 1.5007, "step": 4932 }, { "epoch": 0.8510307944449237, "grad_norm": 0.61328125, "learning_rate": 1.2393563223249367e-05, "loss": 1.4125, "step": 4933 }, { "epoch": 0.8512033123436556, "grad_norm": 0.578125, "learning_rate": 1.239092037582731e-05, "loss": 1.4413, "step": 4934 }, { "epoch": 0.8513758302423876, "grad_norm": 0.68359375, "learning_rate": 1.2388277351270936e-05, "loss": 1.5108, "step": 4935 }, { "epoch": 0.8515483481411197, "grad_norm": 0.77734375, "learning_rate": 1.2385634149776049e-05, "loss": 1.5181, "step": 4936 }, { "epoch": 0.8517208660398516, "grad_norm": 0.625, "learning_rate": 1.2382990771538479e-05, "loss": 1.4072, "step": 4937 }, { "epoch": 0.8518933839385836, "grad_norm": 0.56640625, "learning_rate": 1.238034721675406e-05, "loss": 1.4797, "step": 4938 }, { "epoch": 0.8520659018373157, "grad_norm": 0.87890625, "learning_rate": 1.237770348561865e-05, "loss": 1.4337, "step": 4939 }, { "epoch": 0.8522384197360476, "grad_norm": 0.6640625, "learning_rate": 1.237505957832811e-05, "loss": 1.4196, "step": 4940 }, { "epoch": 0.8524109376347796, "grad_norm": 0.81640625, "learning_rate": 1.2372415495078314e-05, "loss": 1.515, "step": 4941 }, { "epoch": 0.8525834555335116, "grad_norm": 0.6484375, "learning_rate": 1.2369771236065154e-05, "loss": 1.361, "step": 4942 }, { "epoch": 0.8527559734322436, "grad_norm": 0.875, "learning_rate": 1.2367126801484537e-05, "loss": 1.4598, "step": 4943 }, { "epoch": 0.8529284913309756, "grad_norm": 0.72265625, "learning_rate": 1.2364482191532371e-05, "loss": 1.4902, "step": 4944 }, { "epoch": 0.8531010092297076, "grad_norm": 0.5859375, "learning_rate": 1.2361837406404592e-05, "loss": 1.4504, "step": 4945 }, { "epoch": 0.8532735271284396, "grad_norm": 0.62109375, "learning_rate": 1.2359192446297143e-05, "loss": 1.4503, "step": 4946 }, { "epoch": 0.8534460450271716, "grad_norm": 0.65625, "learning_rate": 1.2356547311405977e-05, "loss": 1.4309, "step": 4947 }, { "epoch": 0.8536185629259035, "grad_norm": 0.6484375, "learning_rate": 1.2353902001927058e-05, "loss": 1.3899, "step": 4948 }, { "epoch": 0.8537910808246355, "grad_norm": 0.70703125, "learning_rate": 1.2351256518056373e-05, "loss": 1.371, "step": 4949 }, { "epoch": 0.8539635987233676, "grad_norm": 0.59765625, "learning_rate": 1.2348610859989913e-05, "loss": 1.4957, "step": 4950 }, { "epoch": 0.8541361166220995, "grad_norm": 0.625, "learning_rate": 1.234596502792369e-05, "loss": 1.351, "step": 4951 }, { "epoch": 0.8543086345208315, "grad_norm": 0.64453125, "learning_rate": 1.2343319022053715e-05, "loss": 1.4467, "step": 4952 }, { "epoch": 0.8544811524195636, "grad_norm": 0.68359375, "learning_rate": 1.2340672842576025e-05, "loss": 1.4147, "step": 4953 }, { "epoch": 0.8546536703182955, "grad_norm": 0.6328125, "learning_rate": 1.2338026489686668e-05, "loss": 1.4445, "step": 4954 }, { "epoch": 0.8548261882170275, "grad_norm": 0.68359375, "learning_rate": 1.2335379963581699e-05, "loss": 1.4351, "step": 4955 }, { "epoch": 0.8549987061157595, "grad_norm": 0.61328125, "learning_rate": 1.2332733264457188e-05, "loss": 1.5089, "step": 4956 }, { "epoch": 0.8551712240144915, "grad_norm": 0.62109375, "learning_rate": 1.2330086392509224e-05, "loss": 1.5096, "step": 4957 }, { "epoch": 0.8553437419132235, "grad_norm": 0.5703125, "learning_rate": 1.2327439347933901e-05, "loss": 1.3495, "step": 4958 }, { "epoch": 0.8555162598119554, "grad_norm": 0.56640625, "learning_rate": 1.2324792130927328e-05, "loss": 1.3313, "step": 4959 }, { "epoch": 0.8556887777106875, "grad_norm": 0.59765625, "learning_rate": 1.2322144741685627e-05, "loss": 1.4532, "step": 4960 }, { "epoch": 0.8558612956094195, "grad_norm": 0.61328125, "learning_rate": 1.231949718040493e-05, "loss": 1.5288, "step": 4961 }, { "epoch": 0.8560338135081514, "grad_norm": 0.6015625, "learning_rate": 1.2316849447281395e-05, "loss": 1.4836, "step": 4962 }, { "epoch": 0.8562063314068835, "grad_norm": 0.59375, "learning_rate": 1.2314201542511176e-05, "loss": 1.4735, "step": 4963 }, { "epoch": 0.8563788493056155, "grad_norm": 0.62109375, "learning_rate": 1.2311553466290447e-05, "loss": 1.2235, "step": 4964 }, { "epoch": 0.8565513672043474, "grad_norm": 0.703125, "learning_rate": 1.2308905218815392e-05, "loss": 1.4499, "step": 4965 }, { "epoch": 0.8567238851030794, "grad_norm": 0.63671875, "learning_rate": 1.2306256800282213e-05, "loss": 1.4595, "step": 4966 }, { "epoch": 0.8568964030018115, "grad_norm": 0.6015625, "learning_rate": 1.2303608210887121e-05, "loss": 1.4724, "step": 4967 }, { "epoch": 0.8570689209005434, "grad_norm": 1.2109375, "learning_rate": 1.230095945082634e-05, "loss": 1.5173, "step": 4968 }, { "epoch": 0.8572414387992754, "grad_norm": 0.7265625, "learning_rate": 1.2298310520296101e-05, "loss": 1.5255, "step": 4969 }, { "epoch": 0.8574139566980075, "grad_norm": 0.59375, "learning_rate": 1.2295661419492662e-05, "loss": 1.4696, "step": 4970 }, { "epoch": 0.8575864745967394, "grad_norm": 0.61328125, "learning_rate": 1.2293012148612281e-05, "loss": 1.484, "step": 4971 }, { "epoch": 0.8577589924954714, "grad_norm": 0.70703125, "learning_rate": 1.229036270785123e-05, "loss": 1.4593, "step": 4972 }, { "epoch": 0.8579315103942033, "grad_norm": 0.6796875, "learning_rate": 1.2287713097405802e-05, "loss": 1.4657, "step": 4973 }, { "epoch": 0.8581040282929354, "grad_norm": 0.59375, "learning_rate": 1.2285063317472293e-05, "loss": 1.3391, "step": 4974 }, { "epoch": 0.8582765461916674, "grad_norm": 0.6171875, "learning_rate": 1.2282413368247015e-05, "loss": 1.3832, "step": 4975 }, { "epoch": 0.8584490640903993, "grad_norm": 0.7578125, "learning_rate": 1.2279763249926293e-05, "loss": 1.5026, "step": 4976 }, { "epoch": 0.8586215819891314, "grad_norm": 0.875, "learning_rate": 1.2277112962706463e-05, "loss": 1.4334, "step": 4977 }, { "epoch": 0.8587940998878634, "grad_norm": 0.7421875, "learning_rate": 1.2274462506783877e-05, "loss": 1.515, "step": 4978 }, { "epoch": 0.8589666177865953, "grad_norm": 0.703125, "learning_rate": 1.22718118823549e-05, "loss": 1.4328, "step": 4979 }, { "epoch": 0.8591391356853274, "grad_norm": 0.578125, "learning_rate": 1.2269161089615902e-05, "loss": 1.442, "step": 4980 }, { "epoch": 0.8593116535840594, "grad_norm": 0.58984375, "learning_rate": 1.2266510128763271e-05, "loss": 1.4677, "step": 4981 }, { "epoch": 0.8594841714827913, "grad_norm": 0.68359375, "learning_rate": 1.226385899999341e-05, "loss": 1.3962, "step": 4982 }, { "epoch": 0.8596566893815233, "grad_norm": 0.68359375, "learning_rate": 1.2261207703502731e-05, "loss": 1.4035, "step": 4983 }, { "epoch": 0.8598292072802554, "grad_norm": 0.62890625, "learning_rate": 1.2258556239487654e-05, "loss": 1.4359, "step": 4984 }, { "epoch": 0.8600017251789873, "grad_norm": 1.0234375, "learning_rate": 1.2255904608144618e-05, "loss": 1.3858, "step": 4985 }, { "epoch": 0.8601742430777193, "grad_norm": 0.6015625, "learning_rate": 1.2253252809670074e-05, "loss": 1.4108, "step": 4986 }, { "epoch": 0.8603467609764514, "grad_norm": 0.58203125, "learning_rate": 1.2250600844260482e-05, "loss": 1.4605, "step": 4987 }, { "epoch": 0.8605192788751833, "grad_norm": 0.58203125, "learning_rate": 1.2247948712112318e-05, "loss": 1.419, "step": 4988 }, { "epoch": 0.8606917967739153, "grad_norm": 0.6171875, "learning_rate": 1.2245296413422067e-05, "loss": 1.3961, "step": 4989 }, { "epoch": 0.8608643146726472, "grad_norm": 0.609375, "learning_rate": 1.2242643948386231e-05, "loss": 1.494, "step": 4990 }, { "epoch": 0.8610368325713793, "grad_norm": 0.609375, "learning_rate": 1.2239991317201316e-05, "loss": 1.4661, "step": 4991 }, { "epoch": 0.8612093504701113, "grad_norm": 0.609375, "learning_rate": 1.2237338520063848e-05, "loss": 1.4005, "step": 4992 }, { "epoch": 0.8613818683688432, "grad_norm": 0.59765625, "learning_rate": 1.2234685557170365e-05, "loss": 1.491, "step": 4993 }, { "epoch": 0.8615543862675753, "grad_norm": 0.6328125, "learning_rate": 1.2232032428717408e-05, "loss": 1.3907, "step": 4994 }, { "epoch": 0.8617269041663073, "grad_norm": 0.6171875, "learning_rate": 1.2229379134901546e-05, "loss": 1.3841, "step": 4995 }, { "epoch": 0.8618994220650392, "grad_norm": 0.6015625, "learning_rate": 1.2226725675919349e-05, "loss": 1.52, "step": 4996 }, { "epoch": 0.8620719399637713, "grad_norm": 0.70703125, "learning_rate": 1.2224072051967398e-05, "loss": 1.3878, "step": 4997 }, { "epoch": 0.8622444578625033, "grad_norm": 0.57421875, "learning_rate": 1.2221418263242292e-05, "loss": 1.4099, "step": 4998 }, { "epoch": 0.8624169757612352, "grad_norm": 0.6484375, "learning_rate": 1.2218764309940647e-05, "loss": 1.4386, "step": 4999 }, { "epoch": 0.8625894936599672, "grad_norm": 0.70703125, "learning_rate": 1.2216110192259075e-05, "loss": 1.4912, "step": 5000 }, { "epoch": 0.8625894936599672, "eval_loss": 1.414391040802002, "eval_runtime": 10.8891, "eval_samples_per_second": 94.039, "eval_steps_per_second": 23.51, "step": 5000 }, { "epoch": 0.8627620115586992, "grad_norm": 0.61328125, "learning_rate": 1.221345591039421e-05, "loss": 1.4483, "step": 5001 }, { "epoch": 0.8629345294574312, "grad_norm": 0.63671875, "learning_rate": 1.2210801464542704e-05, "loss": 1.4031, "step": 5002 }, { "epoch": 0.8631070473561632, "grad_norm": 0.6015625, "learning_rate": 1.2208146854901213e-05, "loss": 1.5115, "step": 5003 }, { "epoch": 0.8632795652548952, "grad_norm": 0.6328125, "learning_rate": 1.2205492081666404e-05, "loss": 1.4754, "step": 5004 }, { "epoch": 0.8634520831536272, "grad_norm": 0.69921875, "learning_rate": 1.220283714503496e-05, "loss": 1.4836, "step": 5005 }, { "epoch": 0.8636246010523592, "grad_norm": 0.6015625, "learning_rate": 1.2200182045203576e-05, "loss": 1.4632, "step": 5006 }, { "epoch": 0.8637971189510911, "grad_norm": 0.625, "learning_rate": 1.2197526782368962e-05, "loss": 1.3843, "step": 5007 }, { "epoch": 0.8639696368498232, "grad_norm": 0.7109375, "learning_rate": 1.219487135672783e-05, "loss": 1.4724, "step": 5008 }, { "epoch": 0.8641421547485552, "grad_norm": 0.70703125, "learning_rate": 1.2192215768476915e-05, "loss": 1.4989, "step": 5009 }, { "epoch": 0.8643146726472871, "grad_norm": 0.59375, "learning_rate": 1.2189560017812955e-05, "loss": 1.4918, "step": 5010 }, { "epoch": 0.8644871905460192, "grad_norm": 1.0, "learning_rate": 1.2186904104932716e-05, "loss": 1.3991, "step": 5011 }, { "epoch": 0.8646597084447512, "grad_norm": 0.6640625, "learning_rate": 1.218424803003295e-05, "loss": 1.4472, "step": 5012 }, { "epoch": 0.8648322263434831, "grad_norm": 0.59375, "learning_rate": 1.2181591793310444e-05, "loss": 1.2846, "step": 5013 }, { "epoch": 0.8650047442422152, "grad_norm": 0.609375, "learning_rate": 1.2178935394961985e-05, "loss": 1.4859, "step": 5014 }, { "epoch": 0.8651772621409471, "grad_norm": 0.65625, "learning_rate": 1.2176278835184381e-05, "loss": 1.4976, "step": 5015 }, { "epoch": 0.8653497800396791, "grad_norm": 1.5703125, "learning_rate": 1.2173622114174439e-05, "loss": 1.3716, "step": 5016 }, { "epoch": 0.8655222979384111, "grad_norm": 0.68359375, "learning_rate": 1.2170965232128991e-05, "loss": 1.4576, "step": 5017 }, { "epoch": 0.8656948158371431, "grad_norm": 0.62109375, "learning_rate": 1.2168308189244872e-05, "loss": 1.4493, "step": 5018 }, { "epoch": 0.8658673337358751, "grad_norm": 0.58984375, "learning_rate": 1.216565098571894e-05, "loss": 1.5467, "step": 5019 }, { "epoch": 0.8660398516346071, "grad_norm": 0.62109375, "learning_rate": 1.2162993621748048e-05, "loss": 1.4384, "step": 5020 }, { "epoch": 0.8662123695333391, "grad_norm": 1.046875, "learning_rate": 1.216033609752907e-05, "loss": 1.4942, "step": 5021 }, { "epoch": 0.8663848874320711, "grad_norm": 0.64453125, "learning_rate": 1.2157678413258898e-05, "loss": 1.5091, "step": 5022 }, { "epoch": 0.866557405330803, "grad_norm": 0.6015625, "learning_rate": 1.2155020569134428e-05, "loss": 1.4179, "step": 5023 }, { "epoch": 0.866729923229535, "grad_norm": 0.6328125, "learning_rate": 1.2152362565352571e-05, "loss": 1.4006, "step": 5024 }, { "epoch": 0.8669024411282671, "grad_norm": 0.6328125, "learning_rate": 1.2149704402110243e-05, "loss": 1.431, "step": 5025 }, { "epoch": 0.867074959026999, "grad_norm": 0.66796875, "learning_rate": 1.2147046079604378e-05, "loss": 1.517, "step": 5026 }, { "epoch": 0.867247476925731, "grad_norm": 1.1796875, "learning_rate": 1.2144387598031933e-05, "loss": 1.5725, "step": 5027 }, { "epoch": 0.8674199948244631, "grad_norm": 0.75390625, "learning_rate": 1.2141728957589848e-05, "loss": 1.4621, "step": 5028 }, { "epoch": 0.867592512723195, "grad_norm": 0.6015625, "learning_rate": 1.2139070158475104e-05, "loss": 1.4756, "step": 5029 }, { "epoch": 0.867765030621927, "grad_norm": 0.578125, "learning_rate": 1.2136411200884676e-05, "loss": 1.4426, "step": 5030 }, { "epoch": 0.867937548520659, "grad_norm": 0.625, "learning_rate": 1.2133752085015558e-05, "loss": 1.4562, "step": 5031 }, { "epoch": 0.868110066419391, "grad_norm": 0.75390625, "learning_rate": 1.2131092811064753e-05, "loss": 1.4278, "step": 5032 }, { "epoch": 0.868282584318123, "grad_norm": 0.6875, "learning_rate": 1.2128433379229276e-05, "loss": 1.5037, "step": 5033 }, { "epoch": 0.868455102216855, "grad_norm": 0.57421875, "learning_rate": 1.2125773789706155e-05, "loss": 1.4705, "step": 5034 }, { "epoch": 0.868627620115587, "grad_norm": 0.73046875, "learning_rate": 1.2123114042692432e-05, "loss": 1.4946, "step": 5035 }, { "epoch": 0.868800138014319, "grad_norm": 0.6796875, "learning_rate": 1.212045413838515e-05, "loss": 1.4562, "step": 5036 }, { "epoch": 0.868972655913051, "grad_norm": 0.65234375, "learning_rate": 1.2117794076981381e-05, "loss": 1.4539, "step": 5037 }, { "epoch": 0.869145173811783, "grad_norm": 0.62109375, "learning_rate": 1.2115133858678192e-05, "loss": 1.4397, "step": 5038 }, { "epoch": 0.869317691710515, "grad_norm": 0.68359375, "learning_rate": 1.2112473483672671e-05, "loss": 1.3853, "step": 5039 }, { "epoch": 0.869490209609247, "grad_norm": 0.66796875, "learning_rate": 1.2109812952161916e-05, "loss": 1.5076, "step": 5040 }, { "epoch": 0.8696627275079789, "grad_norm": 0.58203125, "learning_rate": 1.2107152264343033e-05, "loss": 1.4681, "step": 5041 }, { "epoch": 0.869835245406711, "grad_norm": 0.58203125, "learning_rate": 1.2104491420413143e-05, "loss": 1.4733, "step": 5042 }, { "epoch": 0.8700077633054429, "grad_norm": 0.6015625, "learning_rate": 1.2101830420569382e-05, "loss": 1.3826, "step": 5043 }, { "epoch": 0.8701802812041749, "grad_norm": 0.62890625, "learning_rate": 1.209916926500889e-05, "loss": 1.454, "step": 5044 }, { "epoch": 0.870352799102907, "grad_norm": 0.66796875, "learning_rate": 1.2096507953928823e-05, "loss": 1.4646, "step": 5045 }, { "epoch": 0.8705253170016389, "grad_norm": 0.640625, "learning_rate": 1.2093846487526344e-05, "loss": 1.5317, "step": 5046 }, { "epoch": 0.8706978349003709, "grad_norm": 0.61328125, "learning_rate": 1.209118486599864e-05, "loss": 1.4352, "step": 5047 }, { "epoch": 0.8708703527991029, "grad_norm": 0.7890625, "learning_rate": 1.208852308954289e-05, "loss": 1.4748, "step": 5048 }, { "epoch": 0.8710428706978349, "grad_norm": 0.65625, "learning_rate": 1.20858611583563e-05, "loss": 1.5263, "step": 5049 }, { "epoch": 0.8712153885965669, "grad_norm": 0.6640625, "learning_rate": 1.2083199072636084e-05, "loss": 1.4742, "step": 5050 }, { "epoch": 0.8713879064952988, "grad_norm": 0.62890625, "learning_rate": 1.2080536832579466e-05, "loss": 1.4146, "step": 5051 }, { "epoch": 0.8715604243940309, "grad_norm": 0.71484375, "learning_rate": 1.2077874438383676e-05, "loss": 1.3773, "step": 5052 }, { "epoch": 0.8717329422927629, "grad_norm": 0.58203125, "learning_rate": 1.2075211890245965e-05, "loss": 1.4945, "step": 5053 }, { "epoch": 0.8719054601914948, "grad_norm": 0.578125, "learning_rate": 1.2072549188363594e-05, "loss": 1.5134, "step": 5054 }, { "epoch": 0.8720779780902269, "grad_norm": 0.79296875, "learning_rate": 1.2069886332933824e-05, "loss": 1.4979, "step": 5055 }, { "epoch": 0.8722504959889589, "grad_norm": 0.63671875, "learning_rate": 1.2067223324153947e-05, "loss": 1.4189, "step": 5056 }, { "epoch": 0.8724230138876908, "grad_norm": 0.65625, "learning_rate": 1.2064560162221246e-05, "loss": 1.4177, "step": 5057 }, { "epoch": 0.8725955317864228, "grad_norm": 0.625, "learning_rate": 1.2061896847333025e-05, "loss": 1.4413, "step": 5058 }, { "epoch": 0.8727680496851549, "grad_norm": 0.609375, "learning_rate": 1.205923337968661e-05, "loss": 1.4782, "step": 5059 }, { "epoch": 0.8729405675838868, "grad_norm": 0.64453125, "learning_rate": 1.2056569759479315e-05, "loss": 1.4539, "step": 5060 }, { "epoch": 0.8731130854826188, "grad_norm": 0.62890625, "learning_rate": 1.205390598690848e-05, "loss": 1.4785, "step": 5061 }, { "epoch": 0.8732856033813509, "grad_norm": 0.57421875, "learning_rate": 1.205124206217146e-05, "loss": 1.4304, "step": 5062 }, { "epoch": 0.8734581212800828, "grad_norm": 0.7109375, "learning_rate": 1.2048577985465613e-05, "loss": 1.4521, "step": 5063 }, { "epoch": 0.8736306391788148, "grad_norm": 0.58203125, "learning_rate": 1.2045913756988305e-05, "loss": 1.4276, "step": 5064 }, { "epoch": 0.8738031570775467, "grad_norm": 0.6328125, "learning_rate": 1.2043249376936923e-05, "loss": 1.4619, "step": 5065 }, { "epoch": 0.8739756749762788, "grad_norm": 0.61328125, "learning_rate": 1.204058484550886e-05, "loss": 1.475, "step": 5066 }, { "epoch": 0.8741481928750108, "grad_norm": 0.66015625, "learning_rate": 1.2037920162901522e-05, "loss": 1.4353, "step": 5067 }, { "epoch": 0.8743207107737427, "grad_norm": 0.640625, "learning_rate": 1.2035255329312325e-05, "loss": 1.5358, "step": 5068 }, { "epoch": 0.8744932286724748, "grad_norm": 0.61328125, "learning_rate": 1.2032590344938697e-05, "loss": 1.4719, "step": 5069 }, { "epoch": 0.8746657465712068, "grad_norm": 0.734375, "learning_rate": 1.2029925209978075e-05, "loss": 1.4687, "step": 5070 }, { "epoch": 0.8748382644699387, "grad_norm": 0.62109375, "learning_rate": 1.202725992462791e-05, "loss": 1.4727, "step": 5071 }, { "epoch": 0.8750107823686708, "grad_norm": 0.68359375, "learning_rate": 1.2024594489085665e-05, "loss": 1.4396, "step": 5072 }, { "epoch": 0.8751833002674028, "grad_norm": 0.7578125, "learning_rate": 1.2021928903548807e-05, "loss": 1.3646, "step": 5073 }, { "epoch": 0.8753558181661347, "grad_norm": 0.65625, "learning_rate": 1.2019263168214822e-05, "loss": 1.5268, "step": 5074 }, { "epoch": 0.8755283360648667, "grad_norm": 0.5625, "learning_rate": 1.2016597283281209e-05, "loss": 1.4303, "step": 5075 }, { "epoch": 0.8757008539635988, "grad_norm": 0.63671875, "learning_rate": 1.2013931248945463e-05, "loss": 1.5223, "step": 5076 }, { "epoch": 0.8758733718623307, "grad_norm": 0.671875, "learning_rate": 1.2011265065405108e-05, "loss": 1.5449, "step": 5077 }, { "epoch": 0.8760458897610627, "grad_norm": 0.86328125, "learning_rate": 1.2008598732857673e-05, "loss": 1.4209, "step": 5078 }, { "epoch": 0.8762184076597948, "grad_norm": 0.58984375, "learning_rate": 1.2005932251500693e-05, "loss": 1.4707, "step": 5079 }, { "epoch": 0.8763909255585267, "grad_norm": 0.640625, "learning_rate": 1.2003265621531716e-05, "loss": 1.4294, "step": 5080 }, { "epoch": 0.8765634434572587, "grad_norm": 0.60546875, "learning_rate": 1.2000598843148306e-05, "loss": 1.404, "step": 5081 }, { "epoch": 0.8767359613559906, "grad_norm": 0.6328125, "learning_rate": 1.1997931916548034e-05, "loss": 1.4738, "step": 5082 }, { "epoch": 0.8769084792547227, "grad_norm": 0.61328125, "learning_rate": 1.199526484192848e-05, "loss": 1.4521, "step": 5083 }, { "epoch": 0.8770809971534547, "grad_norm": 0.5703125, "learning_rate": 1.1992597619487242e-05, "loss": 1.4946, "step": 5084 }, { "epoch": 0.8772535150521866, "grad_norm": 0.59765625, "learning_rate": 1.198993024942192e-05, "loss": 1.3803, "step": 5085 }, { "epoch": 0.8774260329509187, "grad_norm": 0.6015625, "learning_rate": 1.1987262731930132e-05, "loss": 1.4693, "step": 5086 }, { "epoch": 0.8775985508496507, "grad_norm": 0.6015625, "learning_rate": 1.1984595067209503e-05, "loss": 1.3118, "step": 5087 }, { "epoch": 0.8777710687483826, "grad_norm": 0.66015625, "learning_rate": 1.1981927255457675e-05, "loss": 1.4328, "step": 5088 }, { "epoch": 0.8779435866471147, "grad_norm": 0.609375, "learning_rate": 1.1979259296872287e-05, "loss": 1.3952, "step": 5089 }, { "epoch": 0.8781161045458467, "grad_norm": 0.69140625, "learning_rate": 1.1976591191651003e-05, "loss": 1.3956, "step": 5090 }, { "epoch": 0.8782886224445786, "grad_norm": 0.7578125, "learning_rate": 1.19739229399915e-05, "loss": 1.3836, "step": 5091 }, { "epoch": 0.8784611403433106, "grad_norm": 0.62109375, "learning_rate": 1.1971254542091446e-05, "loss": 1.411, "step": 5092 }, { "epoch": 0.8786336582420426, "grad_norm": 0.5390625, "learning_rate": 1.1968585998148541e-05, "loss": 1.3845, "step": 5093 }, { "epoch": 0.8788061761407746, "grad_norm": 1.0859375, "learning_rate": 1.1965917308360484e-05, "loss": 1.3995, "step": 5094 }, { "epoch": 0.8789786940395066, "grad_norm": 0.625, "learning_rate": 1.196324847292499e-05, "loss": 1.426, "step": 5095 }, { "epoch": 0.8791512119382386, "grad_norm": 0.66015625, "learning_rate": 1.1960579492039783e-05, "loss": 1.5251, "step": 5096 }, { "epoch": 0.8793237298369706, "grad_norm": 0.62109375, "learning_rate": 1.1957910365902594e-05, "loss": 1.4423, "step": 5097 }, { "epoch": 0.8794962477357026, "grad_norm": 0.6328125, "learning_rate": 1.1955241094711174e-05, "loss": 1.4563, "step": 5098 }, { "epoch": 0.8796687656344345, "grad_norm": 0.59375, "learning_rate": 1.1952571678663274e-05, "loss": 1.3736, "step": 5099 }, { "epoch": 0.8798412835331666, "grad_norm": 0.625, "learning_rate": 1.194990211795667e-05, "loss": 1.5295, "step": 5100 }, { "epoch": 0.8798412835331666, "eval_loss": 1.4137705564498901, "eval_runtime": 10.9003, "eval_samples_per_second": 93.943, "eval_steps_per_second": 23.486, "step": 5100 }, { "epoch": 0.8800138014318986, "grad_norm": 0.59375, "learning_rate": 1.1947232412789127e-05, "loss": 1.4976, "step": 5101 }, { "epoch": 0.8801863193306305, "grad_norm": 0.67578125, "learning_rate": 1.1944562563358442e-05, "loss": 1.3986, "step": 5102 }, { "epoch": 0.8803588372293626, "grad_norm": 0.62109375, "learning_rate": 1.1941892569862413e-05, "loss": 1.4513, "step": 5103 }, { "epoch": 0.8805313551280946, "grad_norm": 0.8984375, "learning_rate": 1.1939222432498849e-05, "loss": 1.4324, "step": 5104 }, { "epoch": 0.8807038730268265, "grad_norm": 0.82421875, "learning_rate": 1.193655215146557e-05, "loss": 1.4404, "step": 5105 }, { "epoch": 0.8808763909255585, "grad_norm": 0.78515625, "learning_rate": 1.1933881726960403e-05, "loss": 1.4431, "step": 5106 }, { "epoch": 0.8810489088242905, "grad_norm": 0.73828125, "learning_rate": 1.1931211159181201e-05, "loss": 1.4725, "step": 5107 }, { "epoch": 0.8812214267230225, "grad_norm": 0.71484375, "learning_rate": 1.1928540448325807e-05, "loss": 1.4504, "step": 5108 }, { "epoch": 0.8813939446217545, "grad_norm": 0.63671875, "learning_rate": 1.1925869594592086e-05, "loss": 1.3811, "step": 5109 }, { "epoch": 0.8815664625204865, "grad_norm": 0.61328125, "learning_rate": 1.1923198598177912e-05, "loss": 1.4481, "step": 5110 }, { "epoch": 0.8817389804192185, "grad_norm": 0.94921875, "learning_rate": 1.192052745928117e-05, "loss": 1.4472, "step": 5111 }, { "epoch": 0.8819114983179505, "grad_norm": 0.62109375, "learning_rate": 1.1917856178099756e-05, "loss": 1.4421, "step": 5112 }, { "epoch": 0.8820840162166825, "grad_norm": 0.65234375, "learning_rate": 1.191518475483157e-05, "loss": 1.4525, "step": 5113 }, { "epoch": 0.8822565341154145, "grad_norm": 0.73046875, "learning_rate": 1.191251318967453e-05, "loss": 1.4304, "step": 5114 }, { "epoch": 0.8824290520141465, "grad_norm": 0.62109375, "learning_rate": 1.1909841482826564e-05, "loss": 1.4208, "step": 5115 }, { "epoch": 0.8826015699128784, "grad_norm": 0.640625, "learning_rate": 1.190716963448561e-05, "loss": 1.5275, "step": 5116 }, { "epoch": 0.8827740878116105, "grad_norm": 0.6171875, "learning_rate": 1.1904497644849611e-05, "loss": 1.4372, "step": 5117 }, { "epoch": 0.8829466057103424, "grad_norm": 0.66015625, "learning_rate": 1.1901825514116526e-05, "loss": 1.4102, "step": 5118 }, { "epoch": 0.8831191236090744, "grad_norm": 0.62109375, "learning_rate": 1.1899153242484322e-05, "loss": 1.4247, "step": 5119 }, { "epoch": 0.8832916415078065, "grad_norm": 0.71484375, "learning_rate": 1.1896480830150985e-05, "loss": 1.4263, "step": 5120 }, { "epoch": 0.8834641594065384, "grad_norm": 0.63671875, "learning_rate": 1.1893808277314494e-05, "loss": 1.3491, "step": 5121 }, { "epoch": 0.8836366773052704, "grad_norm": 0.61328125, "learning_rate": 1.189113558417285e-05, "loss": 1.4428, "step": 5122 }, { "epoch": 0.8838091952040024, "grad_norm": 0.65625, "learning_rate": 1.1888462750924072e-05, "loss": 1.4553, "step": 5123 }, { "epoch": 0.8839817131027344, "grad_norm": 0.6484375, "learning_rate": 1.188578977776617e-05, "loss": 1.4678, "step": 5124 }, { "epoch": 0.8841542310014664, "grad_norm": 0.60546875, "learning_rate": 1.1883116664897179e-05, "loss": 1.3806, "step": 5125 }, { "epoch": 0.8843267489001984, "grad_norm": 0.66796875, "learning_rate": 1.1880443412515138e-05, "loss": 1.4011, "step": 5126 }, { "epoch": 0.8844992667989304, "grad_norm": 0.62109375, "learning_rate": 1.1877770020818103e-05, "loss": 1.4743, "step": 5127 }, { "epoch": 0.8846717846976624, "grad_norm": 0.56640625, "learning_rate": 1.187509649000413e-05, "loss": 1.4523, "step": 5128 }, { "epoch": 0.8848443025963943, "grad_norm": 0.8515625, "learning_rate": 1.1872422820271294e-05, "loss": 1.4416, "step": 5129 }, { "epoch": 0.8850168204951264, "grad_norm": 0.63671875, "learning_rate": 1.1869749011817675e-05, "loss": 1.4134, "step": 5130 }, { "epoch": 0.8851893383938584, "grad_norm": 0.66796875, "learning_rate": 1.1867075064841365e-05, "loss": 1.3652, "step": 5131 }, { "epoch": 0.8853618562925903, "grad_norm": 3.609375, "learning_rate": 1.1864400979540472e-05, "loss": 1.4375, "step": 5132 }, { "epoch": 0.8855343741913223, "grad_norm": 0.6171875, "learning_rate": 1.1861726756113101e-05, "loss": 1.4964, "step": 5133 }, { "epoch": 0.8857068920900544, "grad_norm": 0.60546875, "learning_rate": 1.185905239475738e-05, "loss": 1.4896, "step": 5134 }, { "epoch": 0.8858794099887863, "grad_norm": 0.59375, "learning_rate": 1.1856377895671442e-05, "loss": 1.4705, "step": 5135 }, { "epoch": 0.8860519278875183, "grad_norm": 0.578125, "learning_rate": 1.1853703259053436e-05, "loss": 1.4255, "step": 5136 }, { "epoch": 0.8862244457862504, "grad_norm": 0.59375, "learning_rate": 1.1851028485101504e-05, "loss": 1.4481, "step": 5137 }, { "epoch": 0.8863969636849823, "grad_norm": 0.67578125, "learning_rate": 1.1848353574013813e-05, "loss": 1.4347, "step": 5138 }, { "epoch": 0.8865694815837143, "grad_norm": 0.796875, "learning_rate": 1.1845678525988547e-05, "loss": 1.4899, "step": 5139 }, { "epoch": 0.8867419994824463, "grad_norm": 0.5859375, "learning_rate": 1.184300334122388e-05, "loss": 1.5167, "step": 5140 }, { "epoch": 0.8869145173811783, "grad_norm": 0.59765625, "learning_rate": 1.1840328019918011e-05, "loss": 1.3896, "step": 5141 }, { "epoch": 0.8870870352799103, "grad_norm": 0.609375, "learning_rate": 1.1837652562269141e-05, "loss": 1.4038, "step": 5142 }, { "epoch": 0.8872595531786422, "grad_norm": 0.75, "learning_rate": 1.1834976968475488e-05, "loss": 1.4514, "step": 5143 }, { "epoch": 0.8874320710773743, "grad_norm": 0.59375, "learning_rate": 1.183230123873528e-05, "loss": 1.4866, "step": 5144 }, { "epoch": 0.8876045889761063, "grad_norm": 0.65625, "learning_rate": 1.1829625373246745e-05, "loss": 1.4061, "step": 5145 }, { "epoch": 0.8877771068748382, "grad_norm": 0.59765625, "learning_rate": 1.1826949372208128e-05, "loss": 1.4768, "step": 5146 }, { "epoch": 0.8879496247735703, "grad_norm": 0.671875, "learning_rate": 1.1824273235817687e-05, "loss": 1.3845, "step": 5147 }, { "epoch": 0.8881221426723023, "grad_norm": 0.66796875, "learning_rate": 1.1821596964273689e-05, "loss": 1.6053, "step": 5148 }, { "epoch": 0.8882946605710342, "grad_norm": 0.58203125, "learning_rate": 1.1818920557774402e-05, "loss": 1.44, "step": 5149 }, { "epoch": 0.8884671784697662, "grad_norm": 0.6015625, "learning_rate": 1.1816244016518117e-05, "loss": 1.4575, "step": 5150 }, { "epoch": 0.8886396963684983, "grad_norm": 0.828125, "learning_rate": 1.1813567340703128e-05, "loss": 1.4115, "step": 5151 }, { "epoch": 0.8888122142672302, "grad_norm": 0.69140625, "learning_rate": 1.1810890530527738e-05, "loss": 1.4147, "step": 5152 }, { "epoch": 0.8889847321659622, "grad_norm": 0.6484375, "learning_rate": 1.180821358619026e-05, "loss": 1.3576, "step": 5153 }, { "epoch": 0.8891572500646943, "grad_norm": 0.60546875, "learning_rate": 1.1805536507889021e-05, "loss": 1.4656, "step": 5154 }, { "epoch": 0.8893297679634262, "grad_norm": 0.66796875, "learning_rate": 1.1802859295822358e-05, "loss": 1.4959, "step": 5155 }, { "epoch": 0.8895022858621582, "grad_norm": 0.69921875, "learning_rate": 1.1800181950188617e-05, "loss": 1.4124, "step": 5156 }, { "epoch": 0.8896748037608901, "grad_norm": 0.61328125, "learning_rate": 1.1797504471186146e-05, "loss": 1.4673, "step": 5157 }, { "epoch": 0.8898473216596222, "grad_norm": 0.6171875, "learning_rate": 1.179482685901331e-05, "loss": 1.4117, "step": 5158 }, { "epoch": 0.8900198395583542, "grad_norm": 0.59765625, "learning_rate": 1.1792149113868488e-05, "loss": 1.5153, "step": 5159 }, { "epoch": 0.8901923574570861, "grad_norm": 0.62109375, "learning_rate": 1.1789471235950062e-05, "loss": 1.4547, "step": 5160 }, { "epoch": 0.8903648753558182, "grad_norm": 0.57421875, "learning_rate": 1.1786793225456428e-05, "loss": 1.4885, "step": 5161 }, { "epoch": 0.8905373932545502, "grad_norm": 0.6015625, "learning_rate": 1.1784115082585982e-05, "loss": 1.4485, "step": 5162 }, { "epoch": 0.8907099111532821, "grad_norm": 0.57421875, "learning_rate": 1.1781436807537148e-05, "loss": 1.4793, "step": 5163 }, { "epoch": 0.8908824290520142, "grad_norm": 0.7265625, "learning_rate": 1.1778758400508343e-05, "loss": 1.485, "step": 5164 }, { "epoch": 0.8910549469507462, "grad_norm": 0.57421875, "learning_rate": 1.1776079861698e-05, "loss": 1.4215, "step": 5165 }, { "epoch": 0.8912274648494781, "grad_norm": 0.65625, "learning_rate": 1.1773401191304564e-05, "loss": 1.4582, "step": 5166 }, { "epoch": 0.8913999827482101, "grad_norm": 0.61328125, "learning_rate": 1.1770722389526487e-05, "loss": 1.4509, "step": 5167 }, { "epoch": 0.8915725006469422, "grad_norm": 0.59375, "learning_rate": 1.1768043456562233e-05, "loss": 1.5415, "step": 5168 }, { "epoch": 0.8917450185456741, "grad_norm": 0.67578125, "learning_rate": 1.176536439261027e-05, "loss": 1.4549, "step": 5169 }, { "epoch": 0.8919175364444061, "grad_norm": 0.640625, "learning_rate": 1.176268519786908e-05, "loss": 1.4992, "step": 5170 }, { "epoch": 0.8920900543431382, "grad_norm": 0.62109375, "learning_rate": 1.1760005872537161e-05, "loss": 1.4561, "step": 5171 }, { "epoch": 0.8922625722418701, "grad_norm": 0.6328125, "learning_rate": 1.175732641681301e-05, "loss": 1.4543, "step": 5172 }, { "epoch": 0.8924350901406021, "grad_norm": 0.6484375, "learning_rate": 1.1754646830895138e-05, "loss": 1.4101, "step": 5173 }, { "epoch": 0.892607608039334, "grad_norm": 0.67578125, "learning_rate": 1.1751967114982063e-05, "loss": 1.4677, "step": 5174 }, { "epoch": 0.8927801259380661, "grad_norm": 0.609375, "learning_rate": 1.1749287269272318e-05, "loss": 1.4727, "step": 5175 }, { "epoch": 0.8929526438367981, "grad_norm": 0.57421875, "learning_rate": 1.1746607293964446e-05, "loss": 1.4093, "step": 5176 }, { "epoch": 0.89312516173553, "grad_norm": 0.66796875, "learning_rate": 1.1743927189256988e-05, "loss": 1.569, "step": 5177 }, { "epoch": 0.8932976796342621, "grad_norm": 0.6171875, "learning_rate": 1.1741246955348506e-05, "loss": 1.5247, "step": 5178 }, { "epoch": 0.8934701975329941, "grad_norm": 0.66015625, "learning_rate": 1.1738566592437573e-05, "loss": 1.3986, "step": 5179 }, { "epoch": 0.893642715431726, "grad_norm": 0.6328125, "learning_rate": 1.1735886100722764e-05, "loss": 1.3666, "step": 5180 }, { "epoch": 0.893815233330458, "grad_norm": 0.64453125, "learning_rate": 1.1733205480402663e-05, "loss": 1.5267, "step": 5181 }, { "epoch": 0.89398775122919, "grad_norm": 0.6796875, "learning_rate": 1.1730524731675872e-05, "loss": 1.3956, "step": 5182 }, { "epoch": 0.894160269127922, "grad_norm": 0.60546875, "learning_rate": 1.1727843854740997e-05, "loss": 1.4123, "step": 5183 }, { "epoch": 0.894332787026654, "grad_norm": 0.578125, "learning_rate": 1.1725162849796653e-05, "loss": 1.4602, "step": 5184 }, { "epoch": 0.894505304925386, "grad_norm": 0.64453125, "learning_rate": 1.1722481717041467e-05, "loss": 1.3849, "step": 5185 }, { "epoch": 0.894677822824118, "grad_norm": 0.65625, "learning_rate": 1.171980045667407e-05, "loss": 1.454, "step": 5186 }, { "epoch": 0.89485034072285, "grad_norm": 0.65625, "learning_rate": 1.1717119068893108e-05, "loss": 1.4808, "step": 5187 }, { "epoch": 0.895022858621582, "grad_norm": 0.6328125, "learning_rate": 1.171443755389724e-05, "loss": 1.5093, "step": 5188 }, { "epoch": 0.895195376520314, "grad_norm": 0.5859375, "learning_rate": 1.1711755911885126e-05, "loss": 1.3697, "step": 5189 }, { "epoch": 0.895367894419046, "grad_norm": 0.68359375, "learning_rate": 1.1709074143055435e-05, "loss": 1.3519, "step": 5190 }, { "epoch": 0.8955404123177779, "grad_norm": 0.609375, "learning_rate": 1.1706392247606855e-05, "loss": 1.4844, "step": 5191 }, { "epoch": 0.89571293021651, "grad_norm": 0.6796875, "learning_rate": 1.1703710225738077e-05, "loss": 1.4182, "step": 5192 }, { "epoch": 0.895885448115242, "grad_norm": 0.61328125, "learning_rate": 1.1701028077647798e-05, "loss": 1.4381, "step": 5193 }, { "epoch": 0.8960579660139739, "grad_norm": 0.60546875, "learning_rate": 1.169834580353473e-05, "loss": 1.3898, "step": 5194 }, { "epoch": 0.896230483912706, "grad_norm": 0.6796875, "learning_rate": 1.1695663403597592e-05, "loss": 1.3194, "step": 5195 }, { "epoch": 0.896403001811438, "grad_norm": 0.578125, "learning_rate": 1.1692980878035119e-05, "loss": 1.4205, "step": 5196 }, { "epoch": 0.8965755197101699, "grad_norm": 0.671875, "learning_rate": 1.1690298227046041e-05, "loss": 1.48, "step": 5197 }, { "epoch": 0.8967480376089019, "grad_norm": 1.0703125, "learning_rate": 1.1687615450829108e-05, "loss": 1.4633, "step": 5198 }, { "epoch": 0.8969205555076339, "grad_norm": 0.6875, "learning_rate": 1.1684932549583079e-05, "loss": 1.3481, "step": 5199 }, { "epoch": 0.8970930734063659, "grad_norm": 0.6640625, "learning_rate": 1.1682249523506721e-05, "loss": 1.4053, "step": 5200 }, { "epoch": 0.8970930734063659, "eval_loss": 1.41326105594635, "eval_runtime": 10.8591, "eval_samples_per_second": 94.299, "eval_steps_per_second": 23.575, "step": 5200 }, { "epoch": 0.8972655913050979, "grad_norm": 0.62890625, "learning_rate": 1.1679566372798803e-05, "loss": 1.4101, "step": 5201 }, { "epoch": 0.8974381092038299, "grad_norm": 0.59765625, "learning_rate": 1.1676883097658117e-05, "loss": 1.43, "step": 5202 }, { "epoch": 0.8976106271025619, "grad_norm": 0.6328125, "learning_rate": 1.1674199698283448e-05, "loss": 1.3728, "step": 5203 }, { "epoch": 0.8977831450012939, "grad_norm": 0.88671875, "learning_rate": 1.167151617487361e-05, "loss": 1.4642, "step": 5204 }, { "epoch": 0.8979556629000259, "grad_norm": 0.5859375, "learning_rate": 1.1668832527627407e-05, "loss": 1.4412, "step": 5205 }, { "epoch": 0.8981281807987579, "grad_norm": 0.73046875, "learning_rate": 1.1666148756743665e-05, "loss": 1.5616, "step": 5206 }, { "epoch": 0.8983006986974899, "grad_norm": 0.59765625, "learning_rate": 1.166346486242121e-05, "loss": 1.444, "step": 5207 }, { "epoch": 0.8984732165962218, "grad_norm": 0.70703125, "learning_rate": 1.1660780844858887e-05, "loss": 1.4412, "step": 5208 }, { "epoch": 0.8986457344949539, "grad_norm": 0.63671875, "learning_rate": 1.1658096704255542e-05, "loss": 1.3195, "step": 5209 }, { "epoch": 0.8988182523936858, "grad_norm": 0.59765625, "learning_rate": 1.165541244081003e-05, "loss": 1.371, "step": 5210 }, { "epoch": 0.8989907702924178, "grad_norm": 0.59375, "learning_rate": 1.1652728054721223e-05, "loss": 1.4934, "step": 5211 }, { "epoch": 0.8991632881911499, "grad_norm": 0.6171875, "learning_rate": 1.1650043546187994e-05, "loss": 1.412, "step": 5212 }, { "epoch": 0.8993358060898818, "grad_norm": 0.70703125, "learning_rate": 1.1647358915409231e-05, "loss": 1.4778, "step": 5213 }, { "epoch": 0.8995083239886138, "grad_norm": 0.66015625, "learning_rate": 1.1644674162583825e-05, "loss": 1.5873, "step": 5214 }, { "epoch": 0.8996808418873458, "grad_norm": 0.62890625, "learning_rate": 1.164198928791068e-05, "loss": 1.5613, "step": 5215 }, { "epoch": 0.8998533597860778, "grad_norm": 0.61328125, "learning_rate": 1.1639304291588708e-05, "loss": 1.4511, "step": 5216 }, { "epoch": 0.9000258776848098, "grad_norm": 0.6796875, "learning_rate": 1.1636619173816834e-05, "loss": 1.406, "step": 5217 }, { "epoch": 0.9001983955835418, "grad_norm": 0.69140625, "learning_rate": 1.1633933934793984e-05, "loss": 1.5064, "step": 5218 }, { "epoch": 0.9003709134822738, "grad_norm": 0.65234375, "learning_rate": 1.1631248574719098e-05, "loss": 1.3637, "step": 5219 }, { "epoch": 0.9005434313810058, "grad_norm": 0.7734375, "learning_rate": 1.1628563093791128e-05, "loss": 1.501, "step": 5220 }, { "epoch": 0.9007159492797377, "grad_norm": 0.65234375, "learning_rate": 1.1625877492209025e-05, "loss": 1.523, "step": 5221 }, { "epoch": 0.9008884671784698, "grad_norm": 0.75, "learning_rate": 1.1623191770171761e-05, "loss": 1.4947, "step": 5222 }, { "epoch": 0.9010609850772018, "grad_norm": 0.734375, "learning_rate": 1.1620505927878305e-05, "loss": 1.5754, "step": 5223 }, { "epoch": 0.9012335029759337, "grad_norm": 0.63671875, "learning_rate": 1.161781996552765e-05, "loss": 1.3632, "step": 5224 }, { "epoch": 0.9014060208746657, "grad_norm": 0.58203125, "learning_rate": 1.1615133883318778e-05, "loss": 1.4967, "step": 5225 }, { "epoch": 0.9015785387733978, "grad_norm": 1.0078125, "learning_rate": 1.1612447681450697e-05, "loss": 1.3705, "step": 5226 }, { "epoch": 0.9017510566721297, "grad_norm": 0.6328125, "learning_rate": 1.160976136012242e-05, "loss": 1.4213, "step": 5227 }, { "epoch": 0.9019235745708617, "grad_norm": 1.015625, "learning_rate": 1.1607074919532964e-05, "loss": 1.487, "step": 5228 }, { "epoch": 0.9020960924695938, "grad_norm": 0.58203125, "learning_rate": 1.160438835988135e-05, "loss": 1.4603, "step": 5229 }, { "epoch": 0.9022686103683257, "grad_norm": 0.83984375, "learning_rate": 1.1601701681366625e-05, "loss": 1.4169, "step": 5230 }, { "epoch": 0.9024411282670577, "grad_norm": 0.69140625, "learning_rate": 1.1599014884187834e-05, "loss": 1.4951, "step": 5231 }, { "epoch": 0.9026136461657897, "grad_norm": 0.61328125, "learning_rate": 1.1596327968544025e-05, "loss": 1.4138, "step": 5232 }, { "epoch": 0.9027861640645217, "grad_norm": 0.73828125, "learning_rate": 1.1593640934634272e-05, "loss": 1.4008, "step": 5233 }, { "epoch": 0.9029586819632537, "grad_norm": 0.61328125, "learning_rate": 1.1590953782657635e-05, "loss": 1.5109, "step": 5234 }, { "epoch": 0.9031311998619856, "grad_norm": 0.64453125, "learning_rate": 1.15882665128132e-05, "loss": 1.4836, "step": 5235 }, { "epoch": 0.9033037177607177, "grad_norm": 0.76953125, "learning_rate": 1.1585579125300063e-05, "loss": 1.4583, "step": 5236 }, { "epoch": 0.9034762356594497, "grad_norm": 0.75, "learning_rate": 1.1582891620317316e-05, "loss": 1.3559, "step": 5237 }, { "epoch": 0.9036487535581816, "grad_norm": 0.84375, "learning_rate": 1.1580203998064066e-05, "loss": 1.51, "step": 5238 }, { "epoch": 0.9038212714569136, "grad_norm": 0.609375, "learning_rate": 1.157751625873943e-05, "loss": 1.4308, "step": 5239 }, { "epoch": 0.9039937893556457, "grad_norm": 0.64453125, "learning_rate": 1.1574828402542535e-05, "loss": 1.4897, "step": 5240 }, { "epoch": 0.9041663072543776, "grad_norm": 0.8203125, "learning_rate": 1.157214042967251e-05, "loss": 1.4364, "step": 5241 }, { "epoch": 0.9043388251531096, "grad_norm": 0.71484375, "learning_rate": 1.1569452340328497e-05, "loss": 1.4573, "step": 5242 }, { "epoch": 0.9045113430518417, "grad_norm": 0.76171875, "learning_rate": 1.1566764134709652e-05, "loss": 1.4314, "step": 5243 }, { "epoch": 0.9046838609505736, "grad_norm": 0.59765625, "learning_rate": 1.156407581301513e-05, "loss": 1.532, "step": 5244 }, { "epoch": 0.9048563788493056, "grad_norm": 0.58984375, "learning_rate": 1.1561387375444098e-05, "loss": 1.4244, "step": 5245 }, { "epoch": 0.9050288967480377, "grad_norm": 0.69140625, "learning_rate": 1.155869882219573e-05, "loss": 1.4332, "step": 5246 }, { "epoch": 0.9052014146467696, "grad_norm": 0.671875, "learning_rate": 1.1556010153469219e-05, "loss": 1.3592, "step": 5247 }, { "epoch": 0.9053739325455016, "grad_norm": 0.62890625, "learning_rate": 1.155332136946375e-05, "loss": 1.4865, "step": 5248 }, { "epoch": 0.9055464504442335, "grad_norm": 0.59765625, "learning_rate": 1.155063247037853e-05, "loss": 1.5179, "step": 5249 }, { "epoch": 0.9057189683429656, "grad_norm": 0.58203125, "learning_rate": 1.154794345641277e-05, "loss": 1.4468, "step": 5250 }, { "epoch": 0.9058914862416976, "grad_norm": 0.58203125, "learning_rate": 1.154525432776568e-05, "loss": 1.4827, "step": 5251 }, { "epoch": 0.9060640041404295, "grad_norm": 1.15625, "learning_rate": 1.15425650846365e-05, "loss": 1.4675, "step": 5252 }, { "epoch": 0.9062365220391616, "grad_norm": 0.61328125, "learning_rate": 1.153987572722446e-05, "loss": 1.46, "step": 5253 }, { "epoch": 0.9064090399378936, "grad_norm": 0.58984375, "learning_rate": 1.1537186255728803e-05, "loss": 1.3107, "step": 5254 }, { "epoch": 0.9065815578366255, "grad_norm": 0.6171875, "learning_rate": 1.1534496670348783e-05, "loss": 1.5258, "step": 5255 }, { "epoch": 0.9067540757353575, "grad_norm": 0.61328125, "learning_rate": 1.1531806971283663e-05, "loss": 1.4652, "step": 5256 }, { "epoch": 0.9069265936340896, "grad_norm": 0.578125, "learning_rate": 1.152911715873271e-05, "loss": 1.5179, "step": 5257 }, { "epoch": 0.9070991115328215, "grad_norm": 0.578125, "learning_rate": 1.1526427232895205e-05, "loss": 1.493, "step": 5258 }, { "epoch": 0.9072716294315535, "grad_norm": 0.59765625, "learning_rate": 1.1523737193970432e-05, "loss": 1.4812, "step": 5259 }, { "epoch": 0.9074441473302856, "grad_norm": 0.5859375, "learning_rate": 1.1521047042157684e-05, "loss": 1.4551, "step": 5260 }, { "epoch": 0.9076166652290175, "grad_norm": 0.62109375, "learning_rate": 1.1518356777656274e-05, "loss": 1.3966, "step": 5261 }, { "epoch": 0.9077891831277495, "grad_norm": 0.58203125, "learning_rate": 1.1515666400665504e-05, "loss": 1.3979, "step": 5262 }, { "epoch": 0.9079617010264815, "grad_norm": 0.56640625, "learning_rate": 1.1512975911384695e-05, "loss": 1.3519, "step": 5263 }, { "epoch": 0.9081342189252135, "grad_norm": 0.59375, "learning_rate": 1.1510285310013176e-05, "loss": 1.4521, "step": 5264 }, { "epoch": 0.9083067368239455, "grad_norm": 0.55078125, "learning_rate": 1.1507594596750288e-05, "loss": 1.445, "step": 5265 }, { "epoch": 0.9084792547226774, "grad_norm": 0.61328125, "learning_rate": 1.150490377179537e-05, "loss": 1.3316, "step": 5266 }, { "epoch": 0.9086517726214095, "grad_norm": 0.61328125, "learning_rate": 1.1502212835347776e-05, "loss": 1.4873, "step": 5267 }, { "epoch": 0.9088242905201415, "grad_norm": 0.6171875, "learning_rate": 1.1499521787606874e-05, "loss": 1.446, "step": 5268 }, { "epoch": 0.9089968084188734, "grad_norm": 0.65234375, "learning_rate": 1.1496830628772025e-05, "loss": 1.4837, "step": 5269 }, { "epoch": 0.9091693263176055, "grad_norm": 0.65234375, "learning_rate": 1.1494139359042612e-05, "loss": 1.5096, "step": 5270 }, { "epoch": 0.9093418442163375, "grad_norm": 0.5859375, "learning_rate": 1.1491447978618015e-05, "loss": 1.4487, "step": 5271 }, { "epoch": 0.9095143621150694, "grad_norm": 0.609375, "learning_rate": 1.148875648769764e-05, "loss": 1.4519, "step": 5272 }, { "epoch": 0.9096868800138014, "grad_norm": 0.59765625, "learning_rate": 1.1486064886480876e-05, "loss": 1.4319, "step": 5273 }, { "epoch": 0.9098593979125335, "grad_norm": 0.5859375, "learning_rate": 1.1483373175167142e-05, "loss": 1.3451, "step": 5274 }, { "epoch": 0.9100319158112654, "grad_norm": 0.625, "learning_rate": 1.1480681353955856e-05, "loss": 1.4576, "step": 5275 }, { "epoch": 0.9102044337099974, "grad_norm": 0.67578125, "learning_rate": 1.1477989423046442e-05, "loss": 1.4853, "step": 5276 }, { "epoch": 0.9103769516087294, "grad_norm": 0.59375, "learning_rate": 1.147529738263834e-05, "loss": 1.4957, "step": 5277 }, { "epoch": 0.9105494695074614, "grad_norm": 0.6484375, "learning_rate": 1.1472605232930985e-05, "loss": 1.4662, "step": 5278 }, { "epoch": 0.9107219874061934, "grad_norm": 0.80078125, "learning_rate": 1.1469912974123835e-05, "loss": 1.3621, "step": 5279 }, { "epoch": 0.9108945053049254, "grad_norm": 0.609375, "learning_rate": 1.1467220606416348e-05, "loss": 1.4839, "step": 5280 }, { "epoch": 0.9110670232036574, "grad_norm": 0.73046875, "learning_rate": 1.1464528130007992e-05, "loss": 1.3379, "step": 5281 }, { "epoch": 0.9112395411023894, "grad_norm": 0.80859375, "learning_rate": 1.146183554509824e-05, "loss": 1.4744, "step": 5282 }, { "epoch": 0.9114120590011213, "grad_norm": 0.6640625, "learning_rate": 1.1459142851886573e-05, "loss": 1.4277, "step": 5283 }, { "epoch": 0.9115845768998534, "grad_norm": 0.625, "learning_rate": 1.1456450050572491e-05, "loss": 1.5622, "step": 5284 }, { "epoch": 0.9117570947985854, "grad_norm": 0.63671875, "learning_rate": 1.1453757141355488e-05, "loss": 1.4224, "step": 5285 }, { "epoch": 0.9119296126973173, "grad_norm": 0.69921875, "learning_rate": 1.1451064124435072e-05, "loss": 1.4353, "step": 5286 }, { "epoch": 0.9121021305960494, "grad_norm": 0.6015625, "learning_rate": 1.1448371000010758e-05, "loss": 1.3761, "step": 5287 }, { "epoch": 0.9122746484947813, "grad_norm": 0.796875, "learning_rate": 1.1445677768282073e-05, "loss": 1.4288, "step": 5288 }, { "epoch": 0.9124471663935133, "grad_norm": 0.65625, "learning_rate": 1.1442984429448545e-05, "loss": 1.4021, "step": 5289 }, { "epoch": 0.9126196842922453, "grad_norm": 0.64453125, "learning_rate": 1.1440290983709715e-05, "loss": 1.4077, "step": 5290 }, { "epoch": 0.9127922021909773, "grad_norm": 0.5703125, "learning_rate": 1.1437597431265126e-05, "loss": 1.403, "step": 5291 }, { "epoch": 0.9129647200897093, "grad_norm": 0.60546875, "learning_rate": 1.143490377231434e-05, "loss": 1.37, "step": 5292 }, { "epoch": 0.9131372379884413, "grad_norm": 0.609375, "learning_rate": 1.1432210007056919e-05, "loss": 1.4122, "step": 5293 }, { "epoch": 0.9133097558871733, "grad_norm": 0.66015625, "learning_rate": 1.1429516135692427e-05, "loss": 1.4618, "step": 5294 }, { "epoch": 0.9134822737859053, "grad_norm": 0.63671875, "learning_rate": 1.1426822158420449e-05, "loss": 1.3283, "step": 5295 }, { "epoch": 0.9136547916846373, "grad_norm": 0.72265625, "learning_rate": 1.1424128075440572e-05, "loss": 1.438, "step": 5296 }, { "epoch": 0.9138273095833693, "grad_norm": 0.6875, "learning_rate": 1.142143388695239e-05, "loss": 1.4426, "step": 5297 }, { "epoch": 0.9139998274821013, "grad_norm": 0.6328125, "learning_rate": 1.14187395931555e-05, "loss": 1.4818, "step": 5298 }, { "epoch": 0.9141723453808333, "grad_norm": 0.6484375, "learning_rate": 1.1416045194249517e-05, "loss": 1.4444, "step": 5299 }, { "epoch": 0.9143448632795652, "grad_norm": 0.63671875, "learning_rate": 1.141335069043406e-05, "loss": 1.4915, "step": 5300 }, { "epoch": 0.9143448632795652, "eval_loss": 1.4127678871154785, "eval_runtime": 10.8242, "eval_samples_per_second": 94.603, "eval_steps_per_second": 23.651, "step": 5300 }, { "epoch": 0.9145173811782973, "grad_norm": 0.67578125, "learning_rate": 1.1410656081908754e-05, "loss": 1.3751, "step": 5301 }, { "epoch": 0.9146898990770292, "grad_norm": 1.046875, "learning_rate": 1.1407961368873226e-05, "loss": 1.4008, "step": 5302 }, { "epoch": 0.9148624169757612, "grad_norm": 0.6796875, "learning_rate": 1.1405266551527126e-05, "loss": 1.4782, "step": 5303 }, { "epoch": 0.9150349348744933, "grad_norm": 0.7109375, "learning_rate": 1.1402571630070098e-05, "loss": 1.4454, "step": 5304 }, { "epoch": 0.9152074527732252, "grad_norm": 0.625, "learning_rate": 1.1399876604701805e-05, "loss": 1.3988, "step": 5305 }, { "epoch": 0.9153799706719572, "grad_norm": 0.765625, "learning_rate": 1.1397181475621901e-05, "loss": 1.3682, "step": 5306 }, { "epoch": 0.9155524885706892, "grad_norm": 0.625, "learning_rate": 1.1394486243030066e-05, "loss": 1.438, "step": 5307 }, { "epoch": 0.9157250064694212, "grad_norm": 0.61328125, "learning_rate": 1.1391790907125975e-05, "loss": 1.4038, "step": 5308 }, { "epoch": 0.9158975243681532, "grad_norm": 0.72265625, "learning_rate": 1.138909546810932e-05, "loss": 1.4956, "step": 5309 }, { "epoch": 0.9160700422668852, "grad_norm": 0.68359375, "learning_rate": 1.138639992617979e-05, "loss": 1.5037, "step": 5310 }, { "epoch": 0.9162425601656172, "grad_norm": 0.625, "learning_rate": 1.138370428153709e-05, "loss": 1.4534, "step": 5311 }, { "epoch": 0.9164150780643492, "grad_norm": 0.58203125, "learning_rate": 1.1381008534380933e-05, "loss": 1.4813, "step": 5312 }, { "epoch": 0.9165875959630811, "grad_norm": 0.6875, "learning_rate": 1.1378312684911036e-05, "loss": 1.4618, "step": 5313 }, { "epoch": 0.9167601138618131, "grad_norm": 0.66015625, "learning_rate": 1.1375616733327125e-05, "loss": 1.3733, "step": 5314 }, { "epoch": 0.9169326317605452, "grad_norm": 0.9609375, "learning_rate": 1.1372920679828922e-05, "loss": 1.4316, "step": 5315 }, { "epoch": 0.9171051496592771, "grad_norm": 0.65625, "learning_rate": 1.1370224524616187e-05, "loss": 1.419, "step": 5316 }, { "epoch": 0.9172776675580091, "grad_norm": 0.59375, "learning_rate": 1.1367528267888653e-05, "loss": 1.4067, "step": 5317 }, { "epoch": 0.9174501854567412, "grad_norm": 0.8046875, "learning_rate": 1.136483190984608e-05, "loss": 1.4496, "step": 5318 }, { "epoch": 0.9176227033554731, "grad_norm": 0.87890625, "learning_rate": 1.1362135450688232e-05, "loss": 1.4289, "step": 5319 }, { "epoch": 0.9177952212542051, "grad_norm": 0.65234375, "learning_rate": 1.1359438890614878e-05, "loss": 1.4319, "step": 5320 }, { "epoch": 0.9179677391529372, "grad_norm": 0.62890625, "learning_rate": 1.13567422298258e-05, "loss": 1.4672, "step": 5321 }, { "epoch": 0.9181402570516691, "grad_norm": 0.6875, "learning_rate": 1.1354045468520777e-05, "loss": 1.4084, "step": 5322 }, { "epoch": 0.9183127749504011, "grad_norm": 0.671875, "learning_rate": 1.1351348606899605e-05, "loss": 1.3973, "step": 5323 }, { "epoch": 0.918485292849133, "grad_norm": 0.66015625, "learning_rate": 1.1348651645162088e-05, "loss": 1.4132, "step": 5324 }, { "epoch": 0.9186578107478651, "grad_norm": 0.57421875, "learning_rate": 1.1345954583508028e-05, "loss": 1.3457, "step": 5325 }, { "epoch": 0.9188303286465971, "grad_norm": 0.6328125, "learning_rate": 1.1343257422137244e-05, "loss": 1.3226, "step": 5326 }, { "epoch": 0.919002846545329, "grad_norm": 0.578125, "learning_rate": 1.1340560161249554e-05, "loss": 1.3878, "step": 5327 }, { "epoch": 0.9191753644440611, "grad_norm": 0.57421875, "learning_rate": 1.1337862801044792e-05, "loss": 1.4944, "step": 5328 }, { "epoch": 0.9193478823427931, "grad_norm": 1.609375, "learning_rate": 1.1335165341722798e-05, "loss": 1.4164, "step": 5329 }, { "epoch": 0.919520400241525, "grad_norm": 0.6640625, "learning_rate": 1.133246778348341e-05, "loss": 1.5615, "step": 5330 }, { "epoch": 0.919692918140257, "grad_norm": 0.74609375, "learning_rate": 1.132977012652648e-05, "loss": 1.3541, "step": 5331 }, { "epoch": 0.9198654360389891, "grad_norm": 0.6875, "learning_rate": 1.1327072371051873e-05, "loss": 1.3902, "step": 5332 }, { "epoch": 0.920037953937721, "grad_norm": 0.609375, "learning_rate": 1.1324374517259455e-05, "loss": 1.4545, "step": 5333 }, { "epoch": 0.920210471836453, "grad_norm": 0.60546875, "learning_rate": 1.1321676565349096e-05, "loss": 1.4162, "step": 5334 }, { "epoch": 0.9203829897351851, "grad_norm": 0.58203125, "learning_rate": 1.1318978515520678e-05, "loss": 1.3777, "step": 5335 }, { "epoch": 0.920555507633917, "grad_norm": 0.58984375, "learning_rate": 1.1316280367974091e-05, "loss": 1.4113, "step": 5336 }, { "epoch": 0.920728025532649, "grad_norm": 0.5546875, "learning_rate": 1.131358212290923e-05, "loss": 1.5264, "step": 5337 }, { "epoch": 0.9209005434313811, "grad_norm": 0.59375, "learning_rate": 1.1310883780525996e-05, "loss": 1.4073, "step": 5338 }, { "epoch": 0.921073061330113, "grad_norm": 0.72265625, "learning_rate": 1.1308185341024303e-05, "loss": 1.3889, "step": 5339 }, { "epoch": 0.921245579228845, "grad_norm": 0.63671875, "learning_rate": 1.1305486804604065e-05, "loss": 1.4394, "step": 5340 }, { "epoch": 0.9214180971275769, "grad_norm": 0.6640625, "learning_rate": 1.1302788171465208e-05, "loss": 1.2919, "step": 5341 }, { "epoch": 0.921590615026309, "grad_norm": 0.6015625, "learning_rate": 1.1300089441807664e-05, "loss": 1.4688, "step": 5342 }, { "epoch": 0.921763132925041, "grad_norm": 0.62109375, "learning_rate": 1.129739061583137e-05, "loss": 1.5077, "step": 5343 }, { "epoch": 0.9219356508237729, "grad_norm": 0.67578125, "learning_rate": 1.129469169373627e-05, "loss": 1.5521, "step": 5344 }, { "epoch": 0.922108168722505, "grad_norm": 0.62109375, "learning_rate": 1.1291992675722325e-05, "loss": 1.55, "step": 5345 }, { "epoch": 0.922280686621237, "grad_norm": 0.58203125, "learning_rate": 1.1289293561989486e-05, "loss": 1.3981, "step": 5346 }, { "epoch": 0.9224532045199689, "grad_norm": 0.7734375, "learning_rate": 1.1286594352737723e-05, "loss": 1.4134, "step": 5347 }, { "epoch": 0.9226257224187009, "grad_norm": 0.61328125, "learning_rate": 1.1283895048167013e-05, "loss": 1.3881, "step": 5348 }, { "epoch": 0.922798240317433, "grad_norm": 0.578125, "learning_rate": 1.1281195648477336e-05, "loss": 1.4075, "step": 5349 }, { "epoch": 0.9229707582161649, "grad_norm": 0.625, "learning_rate": 1.1278496153868681e-05, "loss": 1.3477, "step": 5350 }, { "epoch": 0.9231432761148969, "grad_norm": 0.6796875, "learning_rate": 1.127579656454104e-05, "loss": 1.4386, "step": 5351 }, { "epoch": 0.923315794013629, "grad_norm": 0.58984375, "learning_rate": 1.1273096880694419e-05, "loss": 1.4019, "step": 5352 }, { "epoch": 0.9234883119123609, "grad_norm": 0.6484375, "learning_rate": 1.127039710252883e-05, "loss": 1.4122, "step": 5353 }, { "epoch": 0.9236608298110929, "grad_norm": 0.60546875, "learning_rate": 1.1267697230244281e-05, "loss": 1.4157, "step": 5354 }, { "epoch": 0.923833347709825, "grad_norm": 0.64453125, "learning_rate": 1.1264997264040802e-05, "loss": 1.392, "step": 5355 }, { "epoch": 0.9240058656085569, "grad_norm": 0.60546875, "learning_rate": 1.1262297204118422e-05, "loss": 1.4343, "step": 5356 }, { "epoch": 0.9241783835072889, "grad_norm": 0.70703125, "learning_rate": 1.1259597050677178e-05, "loss": 1.4186, "step": 5357 }, { "epoch": 0.9243509014060208, "grad_norm": 0.76953125, "learning_rate": 1.1256896803917115e-05, "loss": 1.3255, "step": 5358 }, { "epoch": 0.9245234193047529, "grad_norm": 0.84765625, "learning_rate": 1.1254196464038281e-05, "loss": 1.4194, "step": 5359 }, { "epoch": 0.9246959372034849, "grad_norm": 0.61328125, "learning_rate": 1.1251496031240736e-05, "loss": 1.5005, "step": 5360 }, { "epoch": 0.9248684551022168, "grad_norm": 0.65625, "learning_rate": 1.1248795505724548e-05, "loss": 1.3214, "step": 5361 }, { "epoch": 0.9250409730009489, "grad_norm": 0.60546875, "learning_rate": 1.1246094887689784e-05, "loss": 1.4798, "step": 5362 }, { "epoch": 0.9252134908996809, "grad_norm": 0.69921875, "learning_rate": 1.1243394177336524e-05, "loss": 1.4729, "step": 5363 }, { "epoch": 0.9253860087984128, "grad_norm": 0.76171875, "learning_rate": 1.1240693374864854e-05, "loss": 1.5883, "step": 5364 }, { "epoch": 0.9255585266971448, "grad_norm": 0.62109375, "learning_rate": 1.123799248047487e-05, "loss": 1.4788, "step": 5365 }, { "epoch": 0.9257310445958769, "grad_norm": 0.625, "learning_rate": 1.1235291494366668e-05, "loss": 1.4362, "step": 5366 }, { "epoch": 0.9259035624946088, "grad_norm": 0.66015625, "learning_rate": 1.123259041674035e-05, "loss": 1.4446, "step": 5367 }, { "epoch": 0.9260760803933408, "grad_norm": 0.66015625, "learning_rate": 1.1229889247796033e-05, "loss": 1.416, "step": 5368 }, { "epoch": 0.9262485982920728, "grad_norm": 0.64453125, "learning_rate": 1.1227187987733838e-05, "loss": 1.4744, "step": 5369 }, { "epoch": 0.9264211161908048, "grad_norm": 0.64453125, "learning_rate": 1.1224486636753888e-05, "loss": 1.3834, "step": 5370 }, { "epoch": 0.9265936340895368, "grad_norm": 0.578125, "learning_rate": 1.1221785195056316e-05, "loss": 1.4061, "step": 5371 }, { "epoch": 0.9267661519882688, "grad_norm": 0.6015625, "learning_rate": 1.1219083662841263e-05, "loss": 1.473, "step": 5372 }, { "epoch": 0.9269386698870008, "grad_norm": 0.6640625, "learning_rate": 1.1216382040308877e-05, "loss": 1.3396, "step": 5373 }, { "epoch": 0.9271111877857328, "grad_norm": 0.703125, "learning_rate": 1.121368032765931e-05, "loss": 1.4057, "step": 5374 }, { "epoch": 0.9272837056844647, "grad_norm": 0.6484375, "learning_rate": 1.1210978525092717e-05, "loss": 1.4684, "step": 5375 }, { "epoch": 0.9274562235831968, "grad_norm": 0.6015625, "learning_rate": 1.120827663280927e-05, "loss": 1.4043, "step": 5376 }, { "epoch": 0.9276287414819288, "grad_norm": 0.60546875, "learning_rate": 1.120557465100914e-05, "loss": 1.4343, "step": 5377 }, { "epoch": 0.9278012593806607, "grad_norm": 0.58984375, "learning_rate": 1.1202872579892507e-05, "loss": 1.4044, "step": 5378 }, { "epoch": 0.9279737772793928, "grad_norm": 0.6484375, "learning_rate": 1.1200170419659558e-05, "loss": 1.3756, "step": 5379 }, { "epoch": 0.9281462951781247, "grad_norm": 0.62890625, "learning_rate": 1.1197468170510483e-05, "loss": 1.4508, "step": 5380 }, { "epoch": 0.9283188130768567, "grad_norm": 0.62109375, "learning_rate": 1.1194765832645489e-05, "loss": 1.5315, "step": 5381 }, { "epoch": 0.9284913309755887, "grad_norm": 0.5625, "learning_rate": 1.1192063406264772e-05, "loss": 1.4169, "step": 5382 }, { "epoch": 0.9286638488743207, "grad_norm": 0.63671875, "learning_rate": 1.1189360891568551e-05, "loss": 1.483, "step": 5383 }, { "epoch": 0.9288363667730527, "grad_norm": 0.72265625, "learning_rate": 1.1186658288757044e-05, "loss": 1.4337, "step": 5384 }, { "epoch": 0.9290088846717847, "grad_norm": 0.64453125, "learning_rate": 1.1183955598030479e-05, "loss": 1.5479, "step": 5385 }, { "epoch": 0.9291814025705167, "grad_norm": 0.62890625, "learning_rate": 1.1181252819589081e-05, "loss": 1.4495, "step": 5386 }, { "epoch": 0.9293539204692487, "grad_norm": 0.63671875, "learning_rate": 1.1178549953633096e-05, "loss": 1.4111, "step": 5387 }, { "epoch": 0.9295264383679807, "grad_norm": 0.62890625, "learning_rate": 1.1175847000362765e-05, "loss": 1.453, "step": 5388 }, { "epoch": 0.9296989562667126, "grad_norm": 0.62109375, "learning_rate": 1.1173143959978344e-05, "loss": 1.5223, "step": 5389 }, { "epoch": 0.9298714741654447, "grad_norm": 0.65234375, "learning_rate": 1.1170440832680086e-05, "loss": 1.4029, "step": 5390 }, { "epoch": 0.9300439920641767, "grad_norm": 0.625, "learning_rate": 1.1167737618668258e-05, "loss": 1.3839, "step": 5391 }, { "epoch": 0.9302165099629086, "grad_norm": 0.60546875, "learning_rate": 1.1165034318143129e-05, "loss": 1.4541, "step": 5392 }, { "epoch": 0.9303890278616407, "grad_norm": 0.68359375, "learning_rate": 1.116233093130498e-05, "loss": 1.3738, "step": 5393 }, { "epoch": 0.9305615457603726, "grad_norm": 0.62109375, "learning_rate": 1.1159627458354096e-05, "loss": 1.4244, "step": 5394 }, { "epoch": 0.9307340636591046, "grad_norm": 0.62890625, "learning_rate": 1.1156923899490761e-05, "loss": 1.3742, "step": 5395 }, { "epoch": 0.9309065815578367, "grad_norm": 0.66015625, "learning_rate": 1.115422025491527e-05, "loss": 1.4678, "step": 5396 }, { "epoch": 0.9310790994565686, "grad_norm": 0.58984375, "learning_rate": 1.1151516524827938e-05, "loss": 1.3177, "step": 5397 }, { "epoch": 0.9312516173553006, "grad_norm": 0.8125, "learning_rate": 1.1148812709429067e-05, "loss": 1.3989, "step": 5398 }, { "epoch": 0.9314241352540326, "grad_norm": 0.70703125, "learning_rate": 1.114610880891897e-05, "loss": 1.3871, "step": 5399 }, { "epoch": 0.9315966531527646, "grad_norm": 0.65234375, "learning_rate": 1.1143404823497969e-05, "loss": 1.4404, "step": 5400 }, { "epoch": 0.9315966531527646, "eval_loss": 1.4122982025146484, "eval_runtime": 10.8508, "eval_samples_per_second": 94.371, "eval_steps_per_second": 23.593, "step": 5400 }, { "epoch": 0.9317691710514966, "grad_norm": 0.62109375, "learning_rate": 1.1140700753366397e-05, "loss": 1.3977, "step": 5401 }, { "epoch": 0.9319416889502286, "grad_norm": 0.578125, "learning_rate": 1.1137996598724588e-05, "loss": 1.4804, "step": 5402 }, { "epoch": 0.9321142068489606, "grad_norm": 0.62109375, "learning_rate": 1.1135292359772875e-05, "loss": 1.4298, "step": 5403 }, { "epoch": 0.9322867247476926, "grad_norm": 0.609375, "learning_rate": 1.1132588036711614e-05, "loss": 1.4478, "step": 5404 }, { "epoch": 0.9324592426464245, "grad_norm": 0.609375, "learning_rate": 1.1129883629741156e-05, "loss": 1.4497, "step": 5405 }, { "epoch": 0.9326317605451565, "grad_norm": 0.60546875, "learning_rate": 1.1127179139061854e-05, "loss": 1.4567, "step": 5406 }, { "epoch": 0.9328042784438886, "grad_norm": 0.60546875, "learning_rate": 1.112447456487408e-05, "loss": 1.3938, "step": 5407 }, { "epoch": 0.9329767963426205, "grad_norm": 0.625, "learning_rate": 1.1121769907378206e-05, "loss": 1.5709, "step": 5408 }, { "epoch": 0.9331493142413525, "grad_norm": 0.7890625, "learning_rate": 1.1119065166774608e-05, "loss": 1.4594, "step": 5409 }, { "epoch": 0.9333218321400846, "grad_norm": 0.609375, "learning_rate": 1.1116360343263671e-05, "loss": 1.3834, "step": 5410 }, { "epoch": 0.9334943500388165, "grad_norm": 0.65234375, "learning_rate": 1.1113655437045782e-05, "loss": 1.5306, "step": 5411 }, { "epoch": 0.9336668679375485, "grad_norm": 0.6484375, "learning_rate": 1.1110950448321338e-05, "loss": 1.3906, "step": 5412 }, { "epoch": 0.9338393858362806, "grad_norm": 0.58984375, "learning_rate": 1.1108245377290748e-05, "loss": 1.4253, "step": 5413 }, { "epoch": 0.9340119037350125, "grad_norm": 0.59375, "learning_rate": 1.1105540224154413e-05, "loss": 1.5626, "step": 5414 }, { "epoch": 0.9341844216337445, "grad_norm": 0.625, "learning_rate": 1.1102834989112752e-05, "loss": 1.3905, "step": 5415 }, { "epoch": 0.9343569395324764, "grad_norm": 0.58984375, "learning_rate": 1.1100129672366182e-05, "loss": 1.404, "step": 5416 }, { "epoch": 0.9345294574312085, "grad_norm": 0.59765625, "learning_rate": 1.1097424274115136e-05, "loss": 1.4319, "step": 5417 }, { "epoch": 0.9347019753299405, "grad_norm": 0.61328125, "learning_rate": 1.1094718794560037e-05, "loss": 1.5673, "step": 5418 }, { "epoch": 0.9348744932286724, "grad_norm": 0.56640625, "learning_rate": 1.1092013233901334e-05, "loss": 1.3819, "step": 5419 }, { "epoch": 0.9350470111274045, "grad_norm": 0.58203125, "learning_rate": 1.1089307592339467e-05, "loss": 1.4296, "step": 5420 }, { "epoch": 0.9352195290261365, "grad_norm": 0.59765625, "learning_rate": 1.1086601870074887e-05, "loss": 1.4434, "step": 5421 }, { "epoch": 0.9353920469248684, "grad_norm": 0.59765625, "learning_rate": 1.1083896067308052e-05, "loss": 1.5113, "step": 5422 }, { "epoch": 0.9355645648236004, "grad_norm": 0.56640625, "learning_rate": 1.1081190184239418e-05, "loss": 1.4677, "step": 5423 }, { "epoch": 0.9357370827223325, "grad_norm": 0.59375, "learning_rate": 1.1078484221069465e-05, "loss": 1.4168, "step": 5424 }, { "epoch": 0.9359096006210644, "grad_norm": 0.60546875, "learning_rate": 1.107577817799866e-05, "loss": 1.3563, "step": 5425 }, { "epoch": 0.9360821185197964, "grad_norm": 0.65625, "learning_rate": 1.107307205522749e-05, "loss": 1.3891, "step": 5426 }, { "epoch": 0.9362546364185285, "grad_norm": 0.59375, "learning_rate": 1.1070365852956432e-05, "loss": 1.4826, "step": 5427 }, { "epoch": 0.9364271543172604, "grad_norm": 0.609375, "learning_rate": 1.1067659571385985e-05, "loss": 1.3225, "step": 5428 }, { "epoch": 0.9365996722159924, "grad_norm": 0.59375, "learning_rate": 1.1064953210716647e-05, "loss": 1.4668, "step": 5429 }, { "epoch": 0.9367721901147245, "grad_norm": 0.61328125, "learning_rate": 1.1062246771148922e-05, "loss": 1.4022, "step": 5430 }, { "epoch": 0.9369447080134564, "grad_norm": 0.61328125, "learning_rate": 1.1059540252883317e-05, "loss": 1.4116, "step": 5431 }, { "epoch": 0.9371172259121884, "grad_norm": 0.609375, "learning_rate": 1.105683365612035e-05, "loss": 1.5058, "step": 5432 }, { "epoch": 0.9372897438109203, "grad_norm": 0.59765625, "learning_rate": 1.1054126981060542e-05, "loss": 1.4929, "step": 5433 }, { "epoch": 0.9374622617096524, "grad_norm": 0.62109375, "learning_rate": 1.1051420227904423e-05, "loss": 1.4555, "step": 5434 }, { "epoch": 0.9376347796083844, "grad_norm": 0.60546875, "learning_rate": 1.1048713396852522e-05, "loss": 1.4708, "step": 5435 }, { "epoch": 0.9378072975071163, "grad_norm": 0.6171875, "learning_rate": 1.1046006488105379e-05, "loss": 1.456, "step": 5436 }, { "epoch": 0.9379798154058484, "grad_norm": 0.6171875, "learning_rate": 1.104329950186354e-05, "loss": 1.3934, "step": 5437 }, { "epoch": 0.9381523333045804, "grad_norm": 0.59765625, "learning_rate": 1.1040592438327558e-05, "loss": 1.4241, "step": 5438 }, { "epoch": 0.9383248512033123, "grad_norm": 0.69140625, "learning_rate": 1.1037885297697985e-05, "loss": 1.4923, "step": 5439 }, { "epoch": 0.9384973691020443, "grad_norm": 0.65234375, "learning_rate": 1.1035178080175382e-05, "loss": 1.4042, "step": 5440 }, { "epoch": 0.9386698870007764, "grad_norm": 0.62890625, "learning_rate": 1.1032470785960321e-05, "loss": 1.488, "step": 5441 }, { "epoch": 0.9388424048995083, "grad_norm": 0.68359375, "learning_rate": 1.1029763415253374e-05, "loss": 1.4338, "step": 5442 }, { "epoch": 0.9390149227982403, "grad_norm": 1.75, "learning_rate": 1.1027055968255116e-05, "loss": 1.3755, "step": 5443 }, { "epoch": 0.9391874406969724, "grad_norm": 0.703125, "learning_rate": 1.1024348445166133e-05, "loss": 1.472, "step": 5444 }, { "epoch": 0.9393599585957043, "grad_norm": 0.625, "learning_rate": 1.1021640846187021e-05, "loss": 1.3511, "step": 5445 }, { "epoch": 0.9395324764944363, "grad_norm": 0.66796875, "learning_rate": 1.1018933171518369e-05, "loss": 1.4285, "step": 5446 }, { "epoch": 0.9397049943931682, "grad_norm": 0.69140625, "learning_rate": 1.1016225421360783e-05, "loss": 1.4036, "step": 5447 }, { "epoch": 0.9398775122919003, "grad_norm": 0.59765625, "learning_rate": 1.1013517595914867e-05, "loss": 1.4262, "step": 5448 }, { "epoch": 0.9400500301906323, "grad_norm": 0.671875, "learning_rate": 1.1010809695381235e-05, "loss": 1.3865, "step": 5449 }, { "epoch": 0.9402225480893642, "grad_norm": 0.796875, "learning_rate": 1.1008101719960504e-05, "loss": 1.4087, "step": 5450 }, { "epoch": 0.9403950659880963, "grad_norm": 0.6015625, "learning_rate": 1.10053936698533e-05, "loss": 1.3771, "step": 5451 }, { "epoch": 0.9405675838868283, "grad_norm": 0.703125, "learning_rate": 1.100268554526025e-05, "loss": 1.4645, "step": 5452 }, { "epoch": 0.9407401017855602, "grad_norm": 0.796875, "learning_rate": 1.099997734638199e-05, "loss": 1.4652, "step": 5453 }, { "epoch": 0.9409126196842923, "grad_norm": 0.60546875, "learning_rate": 1.0997269073419162e-05, "loss": 1.5207, "step": 5454 }, { "epoch": 0.9410851375830243, "grad_norm": 0.78125, "learning_rate": 1.0994560726572408e-05, "loss": 1.4699, "step": 5455 }, { "epoch": 0.9412576554817562, "grad_norm": 0.5546875, "learning_rate": 1.0991852306042381e-05, "loss": 1.3744, "step": 5456 }, { "epoch": 0.9414301733804882, "grad_norm": 0.56640625, "learning_rate": 1.0989143812029736e-05, "loss": 1.4185, "step": 5457 }, { "epoch": 0.9416026912792202, "grad_norm": 0.68359375, "learning_rate": 1.0986435244735141e-05, "loss": 1.3928, "step": 5458 }, { "epoch": 0.9417752091779522, "grad_norm": 0.625, "learning_rate": 1.0983726604359257e-05, "loss": 1.422, "step": 5459 }, { "epoch": 0.9419477270766842, "grad_norm": 0.73828125, "learning_rate": 1.0981017891102757e-05, "loss": 1.4217, "step": 5460 }, { "epoch": 0.9421202449754162, "grad_norm": 0.625, "learning_rate": 1.0978309105166328e-05, "loss": 1.455, "step": 5461 }, { "epoch": 0.9422927628741482, "grad_norm": 0.77734375, "learning_rate": 1.0975600246750644e-05, "loss": 1.4021, "step": 5462 }, { "epoch": 0.9424652807728802, "grad_norm": 0.60546875, "learning_rate": 1.0972891316056397e-05, "loss": 1.4949, "step": 5463 }, { "epoch": 0.9426377986716121, "grad_norm": 0.57421875, "learning_rate": 1.0970182313284283e-05, "loss": 1.4539, "step": 5464 }, { "epoch": 0.9428103165703442, "grad_norm": 0.640625, "learning_rate": 1.0967473238635005e-05, "loss": 1.4881, "step": 5465 }, { "epoch": 0.9429828344690762, "grad_norm": 0.60546875, "learning_rate": 1.0964764092309261e-05, "loss": 1.423, "step": 5466 }, { "epoch": 0.9431553523678081, "grad_norm": 0.64453125, "learning_rate": 1.0962054874507766e-05, "loss": 1.5349, "step": 5467 }, { "epoch": 0.9433278702665402, "grad_norm": 0.58984375, "learning_rate": 1.0959345585431233e-05, "loss": 1.3661, "step": 5468 }, { "epoch": 0.9435003881652722, "grad_norm": 0.64453125, "learning_rate": 1.0956636225280386e-05, "loss": 1.5621, "step": 5469 }, { "epoch": 0.9436729060640041, "grad_norm": 0.625, "learning_rate": 1.0953926794255952e-05, "loss": 1.4969, "step": 5470 }, { "epoch": 0.9438454239627362, "grad_norm": 0.63671875, "learning_rate": 1.0951217292558659e-05, "loss": 1.5013, "step": 5471 }, { "epoch": 0.9440179418614681, "grad_norm": 0.609375, "learning_rate": 1.0948507720389242e-05, "loss": 1.4703, "step": 5472 }, { "epoch": 0.9441904597602001, "grad_norm": 0.6328125, "learning_rate": 1.094579807794845e-05, "loss": 1.4418, "step": 5473 }, { "epoch": 0.9443629776589321, "grad_norm": 0.60546875, "learning_rate": 1.094308836543703e-05, "loss": 1.3773, "step": 5474 }, { "epoch": 0.9445354955576641, "grad_norm": 0.6484375, "learning_rate": 1.0940378583055727e-05, "loss": 1.4028, "step": 5475 }, { "epoch": 0.9447080134563961, "grad_norm": 0.60546875, "learning_rate": 1.09376687310053e-05, "loss": 1.3619, "step": 5476 }, { "epoch": 0.9448805313551281, "grad_norm": 0.60546875, "learning_rate": 1.093495880948652e-05, "loss": 1.4371, "step": 5477 }, { "epoch": 0.9450530492538601, "grad_norm": 0.671875, "learning_rate": 1.0932248818700146e-05, "loss": 1.5281, "step": 5478 }, { "epoch": 0.9452255671525921, "grad_norm": 0.60546875, "learning_rate": 1.0929538758846956e-05, "loss": 1.4308, "step": 5479 }, { "epoch": 0.945398085051324, "grad_norm": 0.59375, "learning_rate": 1.0926828630127728e-05, "loss": 1.4224, "step": 5480 }, { "epoch": 0.945570602950056, "grad_norm": 0.61328125, "learning_rate": 1.0924118432743243e-05, "loss": 1.4619, "step": 5481 }, { "epoch": 0.9457431208487881, "grad_norm": 0.68359375, "learning_rate": 1.0921408166894292e-05, "loss": 1.5235, "step": 5482 }, { "epoch": 0.94591563874752, "grad_norm": 0.60546875, "learning_rate": 1.0918697832781663e-05, "loss": 1.3516, "step": 5483 }, { "epoch": 0.946088156646252, "grad_norm": 0.5859375, "learning_rate": 1.0915987430606161e-05, "loss": 1.4879, "step": 5484 }, { "epoch": 0.9462606745449841, "grad_norm": 1.4140625, "learning_rate": 1.0913276960568583e-05, "loss": 1.551, "step": 5485 }, { "epoch": 0.946433192443716, "grad_norm": 0.609375, "learning_rate": 1.0910566422869748e-05, "loss": 1.4284, "step": 5486 }, { "epoch": 0.946605710342448, "grad_norm": 0.63671875, "learning_rate": 1.0907855817710457e-05, "loss": 1.3764, "step": 5487 }, { "epoch": 0.9467782282411801, "grad_norm": 0.59765625, "learning_rate": 1.0905145145291537e-05, "loss": 1.4533, "step": 5488 }, { "epoch": 0.946950746139912, "grad_norm": 0.640625, "learning_rate": 1.0902434405813809e-05, "loss": 1.3727, "step": 5489 }, { "epoch": 0.947123264038644, "grad_norm": 0.63671875, "learning_rate": 1.0899723599478103e-05, "loss": 1.4027, "step": 5490 }, { "epoch": 0.947295781937376, "grad_norm": 0.66796875, "learning_rate": 1.089701272648525e-05, "loss": 1.4288, "step": 5491 }, { "epoch": 0.947468299836108, "grad_norm": 0.640625, "learning_rate": 1.0894301787036085e-05, "loss": 1.3706, "step": 5492 }, { "epoch": 0.94764081773484, "grad_norm": 0.66015625, "learning_rate": 1.0891590781331463e-05, "loss": 1.4171, "step": 5493 }, { "epoch": 0.947813335633572, "grad_norm": 0.68359375, "learning_rate": 1.088887970957222e-05, "loss": 1.3683, "step": 5494 }, { "epoch": 0.947985853532304, "grad_norm": 0.5859375, "learning_rate": 1.0886168571959214e-05, "loss": 1.3923, "step": 5495 }, { "epoch": 0.948158371431036, "grad_norm": 0.625, "learning_rate": 1.0883457368693307e-05, "loss": 1.4652, "step": 5496 }, { "epoch": 0.948330889329768, "grad_norm": 0.7109375, "learning_rate": 1.0880746099975355e-05, "loss": 1.5098, "step": 5497 }, { "epoch": 0.9485034072284999, "grad_norm": 0.671875, "learning_rate": 1.087803476600623e-05, "loss": 1.4231, "step": 5498 }, { "epoch": 0.948675925127232, "grad_norm": 0.65625, "learning_rate": 1.0875323366986803e-05, "loss": 1.4784, "step": 5499 }, { "epoch": 0.9488484430259639, "grad_norm": 0.58984375, "learning_rate": 1.0872611903117951e-05, "loss": 1.4145, "step": 5500 }, { "epoch": 0.9488484430259639, "eval_loss": 1.4118043184280396, "eval_runtime": 11.0208, "eval_samples_per_second": 92.915, "eval_steps_per_second": 23.229, "step": 5500 }, { "epoch": 0.9490209609246959, "grad_norm": 0.66796875, "learning_rate": 1.0869900374600557e-05, "loss": 1.4202, "step": 5501 }, { "epoch": 0.949193478823428, "grad_norm": 0.7578125, "learning_rate": 1.086718878163551e-05, "loss": 1.4019, "step": 5502 }, { "epoch": 0.9493659967221599, "grad_norm": 0.71484375, "learning_rate": 1.0864477124423698e-05, "loss": 1.4151, "step": 5503 }, { "epoch": 0.9495385146208919, "grad_norm": 0.640625, "learning_rate": 1.0861765403166018e-05, "loss": 1.471, "step": 5504 }, { "epoch": 0.949711032519624, "grad_norm": 0.73046875, "learning_rate": 1.0859053618063372e-05, "loss": 1.4649, "step": 5505 }, { "epoch": 0.9498835504183559, "grad_norm": 1.0, "learning_rate": 1.0856341769316672e-05, "loss": 1.426, "step": 5506 }, { "epoch": 0.9500560683170879, "grad_norm": 0.6953125, "learning_rate": 1.0853629857126817e-05, "loss": 1.3961, "step": 5507 }, { "epoch": 0.9502285862158198, "grad_norm": 0.60546875, "learning_rate": 1.085091788169473e-05, "loss": 1.4524, "step": 5508 }, { "epoch": 0.9504011041145519, "grad_norm": 0.61328125, "learning_rate": 1.084820584322133e-05, "loss": 1.4308, "step": 5509 }, { "epoch": 0.9505736220132839, "grad_norm": 0.66796875, "learning_rate": 1.084549374190754e-05, "loss": 1.4239, "step": 5510 }, { "epoch": 0.9507461399120158, "grad_norm": 0.61328125, "learning_rate": 1.0842781577954294e-05, "loss": 1.3743, "step": 5511 }, { "epoch": 0.9509186578107479, "grad_norm": 0.640625, "learning_rate": 1.0840069351562519e-05, "loss": 1.3684, "step": 5512 }, { "epoch": 0.9510911757094799, "grad_norm": 1.078125, "learning_rate": 1.0837357062933158e-05, "loss": 1.3119, "step": 5513 }, { "epoch": 0.9512636936082118, "grad_norm": 0.58203125, "learning_rate": 1.0834644712267158e-05, "loss": 1.462, "step": 5514 }, { "epoch": 0.9514362115069438, "grad_norm": 0.63671875, "learning_rate": 1.0831932299765458e-05, "loss": 1.4212, "step": 5515 }, { "epoch": 0.9516087294056759, "grad_norm": 0.57421875, "learning_rate": 1.0829219825629016e-05, "loss": 1.4816, "step": 5516 }, { "epoch": 0.9517812473044078, "grad_norm": 0.79296875, "learning_rate": 1.0826507290058787e-05, "loss": 1.4519, "step": 5517 }, { "epoch": 0.9519537652031398, "grad_norm": 0.63671875, "learning_rate": 1.0823794693255738e-05, "loss": 1.4065, "step": 5518 }, { "epoch": 0.9521262831018719, "grad_norm": 0.61328125, "learning_rate": 1.0821082035420829e-05, "loss": 1.4785, "step": 5519 }, { "epoch": 0.9522988010006038, "grad_norm": 0.58984375, "learning_rate": 1.0818369316755031e-05, "loss": 1.41, "step": 5520 }, { "epoch": 0.9524713188993358, "grad_norm": 0.5859375, "learning_rate": 1.081565653745932e-05, "loss": 1.3799, "step": 5521 }, { "epoch": 0.9526438367980677, "grad_norm": 0.5703125, "learning_rate": 1.081294369773468e-05, "loss": 1.3599, "step": 5522 }, { "epoch": 0.9528163546967998, "grad_norm": 0.71484375, "learning_rate": 1.0810230797782088e-05, "loss": 1.4379, "step": 5523 }, { "epoch": 0.9529888725955318, "grad_norm": 0.625, "learning_rate": 1.0807517837802535e-05, "loss": 1.447, "step": 5524 }, { "epoch": 0.9531613904942637, "grad_norm": 0.63671875, "learning_rate": 1.0804804817997015e-05, "loss": 1.5195, "step": 5525 }, { "epoch": 0.9533339083929958, "grad_norm": 0.640625, "learning_rate": 1.080209173856653e-05, "loss": 1.4686, "step": 5526 }, { "epoch": 0.9535064262917278, "grad_norm": 0.59375, "learning_rate": 1.0799378599712073e-05, "loss": 1.4608, "step": 5527 }, { "epoch": 0.9536789441904597, "grad_norm": 0.63671875, "learning_rate": 1.0796665401634657e-05, "loss": 1.3722, "step": 5528 }, { "epoch": 0.9538514620891918, "grad_norm": 0.66015625, "learning_rate": 1.0793952144535289e-05, "loss": 1.4454, "step": 5529 }, { "epoch": 0.9540239799879238, "grad_norm": 0.62109375, "learning_rate": 1.0791238828614987e-05, "loss": 1.3737, "step": 5530 }, { "epoch": 0.9541964978866557, "grad_norm": 0.59765625, "learning_rate": 1.0788525454074765e-05, "loss": 1.3756, "step": 5531 }, { "epoch": 0.9543690157853877, "grad_norm": 0.73828125, "learning_rate": 1.0785812021115654e-05, "loss": 1.4899, "step": 5532 }, { "epoch": 0.9545415336841198, "grad_norm": 0.546875, "learning_rate": 1.0783098529938675e-05, "loss": 1.3998, "step": 5533 }, { "epoch": 0.9547140515828517, "grad_norm": 0.609375, "learning_rate": 1.078038498074487e-05, "loss": 1.437, "step": 5534 }, { "epoch": 0.9548865694815837, "grad_norm": 0.671875, "learning_rate": 1.0777671373735266e-05, "loss": 1.4801, "step": 5535 }, { "epoch": 0.9550590873803158, "grad_norm": 0.75390625, "learning_rate": 1.0774957709110905e-05, "loss": 1.4471, "step": 5536 }, { "epoch": 0.9552316052790477, "grad_norm": 0.6328125, "learning_rate": 1.0772243987072838e-05, "loss": 1.4805, "step": 5537 }, { "epoch": 0.9554041231777797, "grad_norm": 0.625, "learning_rate": 1.076953020782211e-05, "loss": 1.4201, "step": 5538 }, { "epoch": 0.9555766410765116, "grad_norm": 0.60546875, "learning_rate": 1.0766816371559776e-05, "loss": 1.4411, "step": 5539 }, { "epoch": 0.9557491589752437, "grad_norm": 0.60546875, "learning_rate": 1.0764102478486894e-05, "loss": 1.4425, "step": 5540 }, { "epoch": 0.9559216768739757, "grad_norm": 0.68359375, "learning_rate": 1.0761388528804524e-05, "loss": 1.488, "step": 5541 }, { "epoch": 0.9560941947727076, "grad_norm": 0.625, "learning_rate": 1.0758674522713737e-05, "loss": 1.4224, "step": 5542 }, { "epoch": 0.9562667126714397, "grad_norm": 0.5390625, "learning_rate": 1.07559604604156e-05, "loss": 1.4734, "step": 5543 }, { "epoch": 0.9564392305701717, "grad_norm": 0.625, "learning_rate": 1.075324634211119e-05, "loss": 1.3959, "step": 5544 }, { "epoch": 0.9566117484689036, "grad_norm": 0.61328125, "learning_rate": 1.0750532168001581e-05, "loss": 1.5127, "step": 5545 }, { "epoch": 0.9567842663676357, "grad_norm": 0.59765625, "learning_rate": 1.0747817938287866e-05, "loss": 1.5154, "step": 5546 }, { "epoch": 0.9569567842663677, "grad_norm": 0.734375, "learning_rate": 1.074510365317112e-05, "loss": 1.3529, "step": 5547 }, { "epoch": 0.9571293021650996, "grad_norm": 0.671875, "learning_rate": 1.0742389312852441e-05, "loss": 1.3786, "step": 5548 }, { "epoch": 0.9573018200638316, "grad_norm": 0.734375, "learning_rate": 1.0739674917532923e-05, "loss": 1.3981, "step": 5549 }, { "epoch": 0.9574743379625636, "grad_norm": 0.67578125, "learning_rate": 1.0736960467413668e-05, "loss": 1.4618, "step": 5550 }, { "epoch": 0.9576468558612956, "grad_norm": 0.60546875, "learning_rate": 1.0734245962695775e-05, "loss": 1.3747, "step": 5551 }, { "epoch": 0.9578193737600276, "grad_norm": 0.62890625, "learning_rate": 1.073153140358035e-05, "loss": 1.3701, "step": 5552 }, { "epoch": 0.9579918916587596, "grad_norm": 0.57421875, "learning_rate": 1.0728816790268513e-05, "loss": 1.4192, "step": 5553 }, { "epoch": 0.9581644095574916, "grad_norm": 0.6171875, "learning_rate": 1.0726102122961373e-05, "loss": 1.4915, "step": 5554 }, { "epoch": 0.9583369274562236, "grad_norm": 0.67578125, "learning_rate": 1.0723387401860048e-05, "loss": 1.4411, "step": 5555 }, { "epoch": 0.9585094453549555, "grad_norm": 0.83203125, "learning_rate": 1.0720672627165665e-05, "loss": 1.4482, "step": 5556 }, { "epoch": 0.9586819632536876, "grad_norm": 0.62890625, "learning_rate": 1.071795779907935e-05, "loss": 1.4209, "step": 5557 }, { "epoch": 0.9588544811524196, "grad_norm": 0.61328125, "learning_rate": 1.071524291780224e-05, "loss": 1.4751, "step": 5558 }, { "epoch": 0.9590269990511515, "grad_norm": 0.6875, "learning_rate": 1.0712527983535463e-05, "loss": 1.4628, "step": 5559 }, { "epoch": 0.9591995169498836, "grad_norm": 0.546875, "learning_rate": 1.070981299648016e-05, "loss": 1.3229, "step": 5560 }, { "epoch": 0.9593720348486156, "grad_norm": 0.640625, "learning_rate": 1.0707097956837475e-05, "loss": 1.3905, "step": 5561 }, { "epoch": 0.9595445527473475, "grad_norm": 0.57421875, "learning_rate": 1.0704382864808558e-05, "loss": 1.4366, "step": 5562 }, { "epoch": 0.9597170706460796, "grad_norm": 0.68359375, "learning_rate": 1.0701667720594555e-05, "loss": 1.4201, "step": 5563 }, { "epoch": 0.9598895885448115, "grad_norm": 0.6328125, "learning_rate": 1.0698952524396621e-05, "loss": 1.4332, "step": 5564 }, { "epoch": 0.9600621064435435, "grad_norm": 0.640625, "learning_rate": 1.069623727641592e-05, "loss": 1.5507, "step": 5565 }, { "epoch": 0.9602346243422755, "grad_norm": 0.67578125, "learning_rate": 1.0693521976853612e-05, "loss": 1.4058, "step": 5566 }, { "epoch": 0.9604071422410075, "grad_norm": 0.59765625, "learning_rate": 1.0690806625910862e-05, "loss": 1.5297, "step": 5567 }, { "epoch": 0.9605796601397395, "grad_norm": 0.62890625, "learning_rate": 1.068809122378884e-05, "loss": 1.5256, "step": 5568 }, { "epoch": 0.9607521780384715, "grad_norm": 0.578125, "learning_rate": 1.0685375770688717e-05, "loss": 1.4706, "step": 5569 }, { "epoch": 0.9609246959372035, "grad_norm": 0.63671875, "learning_rate": 1.068266026681168e-05, "loss": 1.4291, "step": 5570 }, { "epoch": 0.9610972138359355, "grad_norm": 0.6953125, "learning_rate": 1.0679944712358903e-05, "loss": 1.3866, "step": 5571 }, { "epoch": 0.9612697317346675, "grad_norm": 0.6484375, "learning_rate": 1.067722910753157e-05, "loss": 1.4339, "step": 5572 }, { "epoch": 0.9614422496333994, "grad_norm": 0.58984375, "learning_rate": 1.0674513452530877e-05, "loss": 1.3447, "step": 5573 }, { "epoch": 0.9616147675321315, "grad_norm": 0.60546875, "learning_rate": 1.0671797747558013e-05, "loss": 1.4081, "step": 5574 }, { "epoch": 0.9617872854308634, "grad_norm": 0.625, "learning_rate": 1.0669081992814174e-05, "loss": 1.4666, "step": 5575 }, { "epoch": 0.9619598033295954, "grad_norm": 0.58203125, "learning_rate": 1.0666366188500559e-05, "loss": 1.3793, "step": 5576 }, { "epoch": 0.9621323212283275, "grad_norm": 0.73828125, "learning_rate": 1.0663650334818374e-05, "loss": 1.4518, "step": 5577 }, { "epoch": 0.9623048391270594, "grad_norm": 0.69921875, "learning_rate": 1.0660934431968829e-05, "loss": 1.4876, "step": 5578 }, { "epoch": 0.9624773570257914, "grad_norm": 0.64453125, "learning_rate": 1.0658218480153127e-05, "loss": 1.3871, "step": 5579 }, { "epoch": 0.9626498749245235, "grad_norm": 0.671875, "learning_rate": 1.0655502479572486e-05, "loss": 1.4577, "step": 5580 }, { "epoch": 0.9628223928232554, "grad_norm": 0.81640625, "learning_rate": 1.0652786430428128e-05, "loss": 1.3674, "step": 5581 }, { "epoch": 0.9629949107219874, "grad_norm": 0.75, "learning_rate": 1.0650070332921275e-05, "loss": 1.4384, "step": 5582 }, { "epoch": 0.9631674286207194, "grad_norm": 0.59375, "learning_rate": 1.0647354187253145e-05, "loss": 1.3278, "step": 5583 }, { "epoch": 0.9633399465194514, "grad_norm": 0.76171875, "learning_rate": 1.0644637993624973e-05, "loss": 1.4714, "step": 5584 }, { "epoch": 0.9635124644181834, "grad_norm": 0.7734375, "learning_rate": 1.0641921752237992e-05, "loss": 1.4265, "step": 5585 }, { "epoch": 0.9636849823169154, "grad_norm": 0.72265625, "learning_rate": 1.0639205463293436e-05, "loss": 1.4047, "step": 5586 }, { "epoch": 0.9638575002156474, "grad_norm": 0.76953125, "learning_rate": 1.0636489126992548e-05, "loss": 1.4471, "step": 5587 }, { "epoch": 0.9640300181143794, "grad_norm": 0.671875, "learning_rate": 1.0633772743536563e-05, "loss": 1.438, "step": 5588 }, { "epoch": 0.9642025360131113, "grad_norm": 0.625, "learning_rate": 1.0631056313126736e-05, "loss": 1.4633, "step": 5589 }, { "epoch": 0.9643750539118433, "grad_norm": 0.7578125, "learning_rate": 1.0628339835964317e-05, "loss": 1.3694, "step": 5590 }, { "epoch": 0.9645475718105754, "grad_norm": 0.734375, "learning_rate": 1.0625623312250554e-05, "loss": 1.4391, "step": 5591 }, { "epoch": 0.9647200897093073, "grad_norm": 0.6484375, "learning_rate": 1.0622906742186707e-05, "loss": 1.409, "step": 5592 }, { "epoch": 0.9648926076080393, "grad_norm": 0.78515625, "learning_rate": 1.0620190125974036e-05, "loss": 1.5127, "step": 5593 }, { "epoch": 0.9650651255067714, "grad_norm": 0.7265625, "learning_rate": 1.0617473463813808e-05, "loss": 1.4054, "step": 5594 }, { "epoch": 0.9652376434055033, "grad_norm": 3.453125, "learning_rate": 1.0614756755907284e-05, "loss": 1.462, "step": 5595 }, { "epoch": 0.9654101613042353, "grad_norm": 0.7734375, "learning_rate": 1.0612040002455742e-05, "loss": 1.403, "step": 5596 }, { "epoch": 0.9655826792029673, "grad_norm": 0.78125, "learning_rate": 1.0609323203660451e-05, "loss": 1.4555, "step": 5597 }, { "epoch": 0.9657551971016993, "grad_norm": 0.5703125, "learning_rate": 1.0606606359722691e-05, "loss": 1.3381, "step": 5598 }, { "epoch": 0.9659277150004313, "grad_norm": 0.6015625, "learning_rate": 1.060388947084374e-05, "loss": 1.2738, "step": 5599 }, { "epoch": 0.9661002328991632, "grad_norm": 0.73828125, "learning_rate": 1.0601172537224881e-05, "loss": 1.3784, "step": 5600 }, { "epoch": 0.9661002328991632, "eval_loss": 1.411436915397644, "eval_runtime": 10.8561, "eval_samples_per_second": 94.325, "eval_steps_per_second": 23.581, "step": 5600 }, { "epoch": 0.9662727507978953, "grad_norm": 1.171875, "learning_rate": 1.0598455559067409e-05, "loss": 1.3455, "step": 5601 }, { "epoch": 0.9664452686966273, "grad_norm": 0.65625, "learning_rate": 1.0595738536572607e-05, "loss": 1.5216, "step": 5602 }, { "epoch": 0.9666177865953592, "grad_norm": 0.69921875, "learning_rate": 1.0593021469941773e-05, "loss": 1.4521, "step": 5603 }, { "epoch": 0.9667903044940913, "grad_norm": 0.65625, "learning_rate": 1.0590304359376202e-05, "loss": 1.3953, "step": 5604 }, { "epoch": 0.9669628223928233, "grad_norm": 0.74609375, "learning_rate": 1.0587587205077196e-05, "loss": 1.4433, "step": 5605 }, { "epoch": 0.9671353402915552, "grad_norm": 0.7578125, "learning_rate": 1.0584870007246059e-05, "loss": 1.4629, "step": 5606 }, { "epoch": 0.9673078581902872, "grad_norm": 0.63671875, "learning_rate": 1.05821527660841e-05, "loss": 1.4307, "step": 5607 }, { "epoch": 0.9674803760890193, "grad_norm": 0.59375, "learning_rate": 1.0579435481792621e-05, "loss": 1.3582, "step": 5608 }, { "epoch": 0.9676528939877512, "grad_norm": 0.69140625, "learning_rate": 1.0576718154572944e-05, "loss": 1.372, "step": 5609 }, { "epoch": 0.9678254118864832, "grad_norm": 0.6796875, "learning_rate": 1.0574000784626386e-05, "loss": 1.4447, "step": 5610 }, { "epoch": 0.9679979297852153, "grad_norm": 0.6640625, "learning_rate": 1.057128337215426e-05, "loss": 1.4734, "step": 5611 }, { "epoch": 0.9681704476839472, "grad_norm": 0.6328125, "learning_rate": 1.0568565917357892e-05, "loss": 1.4525, "step": 5612 }, { "epoch": 0.9683429655826792, "grad_norm": 0.60546875, "learning_rate": 1.0565848420438608e-05, "loss": 1.4177, "step": 5613 }, { "epoch": 0.9685154834814111, "grad_norm": 0.5703125, "learning_rate": 1.0563130881597739e-05, "loss": 1.3963, "step": 5614 }, { "epoch": 0.9686880013801432, "grad_norm": 0.64453125, "learning_rate": 1.0560413301036614e-05, "loss": 1.3608, "step": 5615 }, { "epoch": 0.9688605192788752, "grad_norm": 0.65234375, "learning_rate": 1.0557695678956569e-05, "loss": 1.4067, "step": 5616 }, { "epoch": 0.9690330371776071, "grad_norm": 0.91015625, "learning_rate": 1.0554978015558946e-05, "loss": 1.4527, "step": 5617 }, { "epoch": 0.9692055550763392, "grad_norm": 0.59765625, "learning_rate": 1.0552260311045082e-05, "loss": 1.4399, "step": 5618 }, { "epoch": 0.9693780729750712, "grad_norm": 0.6015625, "learning_rate": 1.0549542565616326e-05, "loss": 1.5231, "step": 5619 }, { "epoch": 0.9695505908738031, "grad_norm": 0.5703125, "learning_rate": 1.0546824779474022e-05, "loss": 1.4008, "step": 5620 }, { "epoch": 0.9697231087725352, "grad_norm": 0.65625, "learning_rate": 1.054410695281952e-05, "loss": 1.4296, "step": 5621 }, { "epoch": 0.9698956266712672, "grad_norm": 0.65625, "learning_rate": 1.0541389085854177e-05, "loss": 1.4242, "step": 5622 }, { "epoch": 0.9700681445699991, "grad_norm": 0.62109375, "learning_rate": 1.0538671178779346e-05, "loss": 1.3831, "step": 5623 }, { "epoch": 0.9702406624687311, "grad_norm": 0.6484375, "learning_rate": 1.053595323179639e-05, "loss": 1.3795, "step": 5624 }, { "epoch": 0.9704131803674632, "grad_norm": 0.58203125, "learning_rate": 1.0533235245106668e-05, "loss": 1.3784, "step": 5625 }, { "epoch": 0.9705856982661951, "grad_norm": 0.56640625, "learning_rate": 1.053051721891155e-05, "loss": 1.4076, "step": 5626 }, { "epoch": 0.9707582161649271, "grad_norm": 0.59765625, "learning_rate": 1.0527799153412402e-05, "loss": 1.3815, "step": 5627 }, { "epoch": 0.9709307340636592, "grad_norm": 0.6015625, "learning_rate": 1.052508104881059e-05, "loss": 1.4889, "step": 5628 }, { "epoch": 0.9711032519623911, "grad_norm": 0.671875, "learning_rate": 1.0522362905307497e-05, "loss": 1.4448, "step": 5629 }, { "epoch": 0.9712757698611231, "grad_norm": 0.64453125, "learning_rate": 1.0519644723104494e-05, "loss": 1.3009, "step": 5630 }, { "epoch": 0.971448287759855, "grad_norm": 0.640625, "learning_rate": 1.0516926502402966e-05, "loss": 1.5176, "step": 5631 }, { "epoch": 0.9716208056585871, "grad_norm": 0.61328125, "learning_rate": 1.0514208243404291e-05, "loss": 1.4149, "step": 5632 }, { "epoch": 0.9717933235573191, "grad_norm": 0.578125, "learning_rate": 1.0511489946309856e-05, "loss": 1.4443, "step": 5633 }, { "epoch": 0.971965841456051, "grad_norm": 0.5859375, "learning_rate": 1.050877161132105e-05, "loss": 1.4321, "step": 5634 }, { "epoch": 0.9721383593547831, "grad_norm": 0.609375, "learning_rate": 1.0506053238639267e-05, "loss": 1.4988, "step": 5635 }, { "epoch": 0.9723108772535151, "grad_norm": 0.58984375, "learning_rate": 1.0503334828465895e-05, "loss": 1.4718, "step": 5636 }, { "epoch": 0.972483395152247, "grad_norm": 0.58984375, "learning_rate": 1.0500616381002331e-05, "loss": 1.3965, "step": 5637 }, { "epoch": 0.9726559130509791, "grad_norm": 0.58984375, "learning_rate": 1.0497897896449983e-05, "loss": 1.5349, "step": 5638 }, { "epoch": 0.972828430949711, "grad_norm": 0.62890625, "learning_rate": 1.0495179375010244e-05, "loss": 1.3807, "step": 5639 }, { "epoch": 0.973000948848443, "grad_norm": 0.66015625, "learning_rate": 1.0492460816884524e-05, "loss": 1.5651, "step": 5640 }, { "epoch": 0.973173466747175, "grad_norm": 0.87890625, "learning_rate": 1.0489742222274231e-05, "loss": 1.3553, "step": 5641 }, { "epoch": 0.973345984645907, "grad_norm": 0.546875, "learning_rate": 1.0487023591380775e-05, "loss": 1.4698, "step": 5642 }, { "epoch": 0.973518502544639, "grad_norm": 0.59765625, "learning_rate": 1.0484304924405566e-05, "loss": 1.4298, "step": 5643 }, { "epoch": 0.973691020443371, "grad_norm": 6.875, "learning_rate": 1.0481586221550022e-05, "loss": 1.5664, "step": 5644 }, { "epoch": 0.973863538342103, "grad_norm": 0.58984375, "learning_rate": 1.0478867483015563e-05, "loss": 1.4564, "step": 5645 }, { "epoch": 0.974036056240835, "grad_norm": 1.9765625, "learning_rate": 1.0476148709003607e-05, "loss": 1.3828, "step": 5646 }, { "epoch": 0.974208574139567, "grad_norm": 0.58984375, "learning_rate": 1.047342989971558e-05, "loss": 1.4288, "step": 5647 }, { "epoch": 0.9743810920382989, "grad_norm": 0.60546875, "learning_rate": 1.0470711055352912e-05, "loss": 1.5092, "step": 5648 }, { "epoch": 0.974553609937031, "grad_norm": 0.56640625, "learning_rate": 1.0467992176117024e-05, "loss": 1.4224, "step": 5649 }, { "epoch": 0.974726127835763, "grad_norm": 0.6796875, "learning_rate": 1.046527326220935e-05, "loss": 1.5092, "step": 5650 }, { "epoch": 0.9748986457344949, "grad_norm": 0.6328125, "learning_rate": 1.0462554313831327e-05, "loss": 1.459, "step": 5651 }, { "epoch": 0.975071163633227, "grad_norm": 0.58203125, "learning_rate": 1.0459835331184392e-05, "loss": 1.413, "step": 5652 }, { "epoch": 0.975243681531959, "grad_norm": 0.63671875, "learning_rate": 1.045711631446998e-05, "loss": 1.3732, "step": 5653 }, { "epoch": 0.9754161994306909, "grad_norm": 0.57421875, "learning_rate": 1.0454397263889538e-05, "loss": 1.4446, "step": 5654 }, { "epoch": 0.975588717329423, "grad_norm": 0.6796875, "learning_rate": 1.0451678179644504e-05, "loss": 1.4371, "step": 5655 }, { "epoch": 0.9757612352281549, "grad_norm": 0.75390625, "learning_rate": 1.044895906193633e-05, "loss": 1.5077, "step": 5656 }, { "epoch": 0.9759337531268869, "grad_norm": 0.59765625, "learning_rate": 1.0446239910966462e-05, "loss": 1.4009, "step": 5657 }, { "epoch": 0.9761062710256189, "grad_norm": 0.66796875, "learning_rate": 1.0443520726936356e-05, "loss": 1.4608, "step": 5658 }, { "epoch": 0.9762787889243509, "grad_norm": 0.640625, "learning_rate": 1.0440801510047462e-05, "loss": 1.4192, "step": 5659 }, { "epoch": 0.9764513068230829, "grad_norm": 0.6171875, "learning_rate": 1.0438082260501239e-05, "loss": 1.4076, "step": 5660 }, { "epoch": 0.9766238247218149, "grad_norm": 0.67578125, "learning_rate": 1.0435362978499143e-05, "loss": 1.3499, "step": 5661 }, { "epoch": 0.9767963426205469, "grad_norm": 0.63671875, "learning_rate": 1.043264366424264e-05, "loss": 1.4749, "step": 5662 }, { "epoch": 0.9769688605192789, "grad_norm": 0.5859375, "learning_rate": 1.0429924317933189e-05, "loss": 1.3881, "step": 5663 }, { "epoch": 0.9771413784180109, "grad_norm": 0.6015625, "learning_rate": 1.0427204939772257e-05, "loss": 1.4782, "step": 5664 }, { "epoch": 0.9773138963167428, "grad_norm": 0.60546875, "learning_rate": 1.0424485529961314e-05, "loss": 1.4459, "step": 5665 }, { "epoch": 0.9774864142154749, "grad_norm": 0.625, "learning_rate": 1.0421766088701832e-05, "loss": 1.3883, "step": 5666 }, { "epoch": 0.9776589321142068, "grad_norm": 0.65625, "learning_rate": 1.0419046616195285e-05, "loss": 1.5965, "step": 5667 }, { "epoch": 0.9778314500129388, "grad_norm": 0.609375, "learning_rate": 1.0416327112643143e-05, "loss": 1.3919, "step": 5668 }, { "epoch": 0.9780039679116709, "grad_norm": 0.62109375, "learning_rate": 1.0413607578246886e-05, "loss": 1.4496, "step": 5669 }, { "epoch": 0.9781764858104028, "grad_norm": 0.55859375, "learning_rate": 1.0410888013208001e-05, "loss": 1.4421, "step": 5670 }, { "epoch": 0.9783490037091348, "grad_norm": 0.7578125, "learning_rate": 1.040816841772796e-05, "loss": 1.5079, "step": 5671 }, { "epoch": 0.9785215216078668, "grad_norm": 0.6953125, "learning_rate": 1.0405448792008252e-05, "loss": 1.4715, "step": 5672 }, { "epoch": 0.9786940395065988, "grad_norm": 0.58203125, "learning_rate": 1.0402729136250364e-05, "loss": 1.4881, "step": 5673 }, { "epoch": 0.9788665574053308, "grad_norm": 0.64453125, "learning_rate": 1.0400009450655789e-05, "loss": 1.3597, "step": 5674 }, { "epoch": 0.9790390753040628, "grad_norm": 0.67578125, "learning_rate": 1.0397289735426012e-05, "loss": 1.5159, "step": 5675 }, { "epoch": 0.9792115932027948, "grad_norm": 0.6796875, "learning_rate": 1.0394569990762528e-05, "loss": 1.4996, "step": 5676 }, { "epoch": 0.9793841111015268, "grad_norm": 0.68359375, "learning_rate": 1.0391850216866834e-05, "loss": 1.4435, "step": 5677 }, { "epoch": 0.9795566290002587, "grad_norm": 0.58984375, "learning_rate": 1.038913041394043e-05, "loss": 1.335, "step": 5678 }, { "epoch": 0.9797291468989908, "grad_norm": 0.609375, "learning_rate": 1.0386410582184813e-05, "loss": 1.3793, "step": 5679 }, { "epoch": 0.9799016647977228, "grad_norm": 0.7109375, "learning_rate": 1.0383690721801485e-05, "loss": 1.4007, "step": 5680 }, { "epoch": 0.9800741826964547, "grad_norm": 0.62109375, "learning_rate": 1.038097083299195e-05, "loss": 1.3936, "step": 5681 }, { "epoch": 0.9802467005951867, "grad_norm": 0.609375, "learning_rate": 1.0378250915957716e-05, "loss": 1.5647, "step": 5682 }, { "epoch": 0.9804192184939188, "grad_norm": 0.5703125, "learning_rate": 1.0375530970900292e-05, "loss": 1.3682, "step": 5683 }, { "epoch": 0.9805917363926507, "grad_norm": 0.62890625, "learning_rate": 1.0372810998021185e-05, "loss": 1.4399, "step": 5684 }, { "epoch": 0.9807642542913827, "grad_norm": 0.6484375, "learning_rate": 1.0370090997521906e-05, "loss": 1.4918, "step": 5685 }, { "epoch": 0.9809367721901148, "grad_norm": 0.80078125, "learning_rate": 1.0367370969603981e-05, "loss": 1.4271, "step": 5686 }, { "epoch": 0.9811092900888467, "grad_norm": 0.60546875, "learning_rate": 1.0364650914468917e-05, "loss": 1.4676, "step": 5687 }, { "epoch": 0.9812818079875787, "grad_norm": 0.6015625, "learning_rate": 1.0361930832318232e-05, "loss": 1.3725, "step": 5688 }, { "epoch": 0.9814543258863107, "grad_norm": 0.65625, "learning_rate": 1.0359210723353451e-05, "loss": 1.3957, "step": 5689 }, { "epoch": 0.9816268437850427, "grad_norm": 1.25, "learning_rate": 1.0356490587776095e-05, "loss": 1.4496, "step": 5690 }, { "epoch": 0.9817993616837747, "grad_norm": 0.71484375, "learning_rate": 1.0353770425787693e-05, "loss": 1.4051, "step": 5691 }, { "epoch": 0.9819718795825066, "grad_norm": 0.63671875, "learning_rate": 1.0351050237589763e-05, "loss": 1.4122, "step": 5692 }, { "epoch": 0.9821443974812387, "grad_norm": 0.640625, "learning_rate": 1.034833002338384e-05, "loss": 1.4373, "step": 5693 }, { "epoch": 0.9823169153799707, "grad_norm": 0.60546875, "learning_rate": 1.0345609783371448e-05, "loss": 1.4795, "step": 5694 }, { "epoch": 0.9824894332787026, "grad_norm": 0.78125, "learning_rate": 1.0342889517754131e-05, "loss": 1.4264, "step": 5695 }, { "epoch": 0.9826619511774347, "grad_norm": 0.58984375, "learning_rate": 1.034016922673341e-05, "loss": 1.3468, "step": 5696 }, { "epoch": 0.9828344690761667, "grad_norm": 0.59765625, "learning_rate": 1.033744891051083e-05, "loss": 1.3907, "step": 5697 }, { "epoch": 0.9830069869748986, "grad_norm": 0.63671875, "learning_rate": 1.0334728569287924e-05, "loss": 1.4186, "step": 5698 }, { "epoch": 0.9831795048736306, "grad_norm": 0.5859375, "learning_rate": 1.0332008203266237e-05, "loss": 1.4321, "step": 5699 }, { "epoch": 0.9833520227723627, "grad_norm": 0.609375, "learning_rate": 1.0329287812647307e-05, "loss": 1.3987, "step": 5700 }, { "epoch": 0.9833520227723627, "eval_loss": 1.4110112190246582, "eval_runtime": 10.8773, "eval_samples_per_second": 94.141, "eval_steps_per_second": 23.535, "step": 5700 }, { "epoch": 0.9835245406710946, "grad_norm": 0.65234375, "learning_rate": 1.0326567397632675e-05, "loss": 1.449, "step": 5701 }, { "epoch": 0.9836970585698266, "grad_norm": 0.7890625, "learning_rate": 1.0323846958423894e-05, "loss": 1.4914, "step": 5702 }, { "epoch": 0.9838695764685587, "grad_norm": 0.55859375, "learning_rate": 1.0321126495222505e-05, "loss": 1.4824, "step": 5703 }, { "epoch": 0.9840420943672906, "grad_norm": 0.58984375, "learning_rate": 1.031840600823006e-05, "loss": 1.4821, "step": 5704 }, { "epoch": 0.9842146122660226, "grad_norm": 0.5859375, "learning_rate": 1.0315685497648107e-05, "loss": 1.4482, "step": 5705 }, { "epoch": 0.9843871301647545, "grad_norm": 0.55078125, "learning_rate": 1.0312964963678198e-05, "loss": 1.5063, "step": 5706 }, { "epoch": 0.9845596480634866, "grad_norm": 0.578125, "learning_rate": 1.0310244406521895e-05, "loss": 1.4312, "step": 5707 }, { "epoch": 0.9847321659622186, "grad_norm": 0.59375, "learning_rate": 1.0307523826380743e-05, "loss": 1.4777, "step": 5708 }, { "epoch": 0.9849046838609505, "grad_norm": 0.671875, "learning_rate": 1.0304803223456305e-05, "loss": 1.4131, "step": 5709 }, { "epoch": 0.9850772017596826, "grad_norm": 0.62109375, "learning_rate": 1.0302082597950141e-05, "loss": 1.4888, "step": 5710 }, { "epoch": 0.9852497196584146, "grad_norm": 0.69140625, "learning_rate": 1.029936195006381e-05, "loss": 1.5442, "step": 5711 }, { "epoch": 0.9854222375571465, "grad_norm": 0.6328125, "learning_rate": 1.0296641279998876e-05, "loss": 1.4095, "step": 5712 }, { "epoch": 0.9855947554558786, "grad_norm": 0.6875, "learning_rate": 1.02939205879569e-05, "loss": 1.4457, "step": 5713 }, { "epoch": 0.9857672733546106, "grad_norm": 0.68359375, "learning_rate": 1.0291199874139453e-05, "loss": 1.4679, "step": 5714 }, { "epoch": 0.9859397912533425, "grad_norm": 0.65625, "learning_rate": 1.02884791387481e-05, "loss": 1.4651, "step": 5715 }, { "epoch": 0.9861123091520745, "grad_norm": 0.6015625, "learning_rate": 1.0285758381984408e-05, "loss": 1.5139, "step": 5716 }, { "epoch": 0.9862848270508066, "grad_norm": 0.75, "learning_rate": 1.0283037604049948e-05, "loss": 1.4568, "step": 5717 }, { "epoch": 0.9864573449495385, "grad_norm": 0.671875, "learning_rate": 1.0280316805146295e-05, "loss": 1.4287, "step": 5718 }, { "epoch": 0.9866298628482705, "grad_norm": 0.63671875, "learning_rate": 1.0277595985475024e-05, "loss": 1.5319, "step": 5719 }, { "epoch": 0.9868023807470025, "grad_norm": 0.64453125, "learning_rate": 1.0274875145237706e-05, "loss": 1.4455, "step": 5720 }, { "epoch": 0.9869748986457345, "grad_norm": 0.62890625, "learning_rate": 1.027215428463592e-05, "loss": 1.4713, "step": 5721 }, { "epoch": 0.9871474165444665, "grad_norm": 0.65234375, "learning_rate": 1.0269433403871246e-05, "loss": 1.4784, "step": 5722 }, { "epoch": 0.9873199344431984, "grad_norm": 0.609375, "learning_rate": 1.0266712503145262e-05, "loss": 1.5312, "step": 5723 }, { "epoch": 0.9874924523419305, "grad_norm": 0.66796875, "learning_rate": 1.0263991582659547e-05, "loss": 1.5036, "step": 5724 }, { "epoch": 0.9876649702406625, "grad_norm": 0.65234375, "learning_rate": 1.0261270642615687e-05, "loss": 1.3068, "step": 5725 }, { "epoch": 0.9878374881393944, "grad_norm": 0.609375, "learning_rate": 1.0258549683215262e-05, "loss": 1.4302, "step": 5726 }, { "epoch": 0.9880100060381265, "grad_norm": 0.796875, "learning_rate": 1.0255828704659868e-05, "loss": 1.4892, "step": 5727 }, { "epoch": 0.9881825239368585, "grad_norm": 0.796875, "learning_rate": 1.025310770715108e-05, "loss": 1.4428, "step": 5728 }, { "epoch": 0.9883550418355904, "grad_norm": 0.6015625, "learning_rate": 1.0250386690890493e-05, "loss": 1.4245, "step": 5729 }, { "epoch": 0.9885275597343224, "grad_norm": 0.64453125, "learning_rate": 1.0247665656079692e-05, "loss": 1.4714, "step": 5730 }, { "epoch": 0.9887000776330545, "grad_norm": 0.6015625, "learning_rate": 1.0244944602920277e-05, "loss": 1.3732, "step": 5731 }, { "epoch": 0.9888725955317864, "grad_norm": 0.609375, "learning_rate": 1.0242223531613834e-05, "loss": 1.3682, "step": 5732 }, { "epoch": 0.9890451134305184, "grad_norm": 0.64453125, "learning_rate": 1.0239502442361955e-05, "loss": 1.433, "step": 5733 }, { "epoch": 0.9892176313292504, "grad_norm": 0.64453125, "learning_rate": 1.0236781335366239e-05, "loss": 1.4126, "step": 5734 }, { "epoch": 0.9893901492279824, "grad_norm": 0.6015625, "learning_rate": 1.0234060210828288e-05, "loss": 1.3825, "step": 5735 }, { "epoch": 0.9895626671267144, "grad_norm": 0.76171875, "learning_rate": 1.0231339068949688e-05, "loss": 1.4591, "step": 5736 }, { "epoch": 0.9897351850254464, "grad_norm": 0.734375, "learning_rate": 1.0228617909932046e-05, "loss": 1.3771, "step": 5737 }, { "epoch": 0.9899077029241784, "grad_norm": 0.57421875, "learning_rate": 1.0225896733976962e-05, "loss": 1.4394, "step": 5738 }, { "epoch": 0.9900802208229104, "grad_norm": 0.59765625, "learning_rate": 1.0223175541286036e-05, "loss": 1.4656, "step": 5739 }, { "epoch": 0.9902527387216423, "grad_norm": 0.6953125, "learning_rate": 1.0220454332060872e-05, "loss": 1.5557, "step": 5740 }, { "epoch": 0.9904252566203744, "grad_norm": 0.5859375, "learning_rate": 1.0217733106503072e-05, "loss": 1.5004, "step": 5741 }, { "epoch": 0.9905977745191064, "grad_norm": 0.58203125, "learning_rate": 1.0215011864814244e-05, "loss": 1.4148, "step": 5742 }, { "epoch": 0.9907702924178383, "grad_norm": 0.609375, "learning_rate": 1.0212290607195997e-05, "loss": 1.4913, "step": 5743 }, { "epoch": 0.9909428103165704, "grad_norm": 0.7265625, "learning_rate": 1.020956933384993e-05, "loss": 1.4065, "step": 5744 }, { "epoch": 0.9911153282153023, "grad_norm": 0.6484375, "learning_rate": 1.0206848044977658e-05, "loss": 1.4257, "step": 5745 }, { "epoch": 0.9912878461140343, "grad_norm": 0.66796875, "learning_rate": 1.0204126740780791e-05, "loss": 1.4675, "step": 5746 }, { "epoch": 0.9914603640127663, "grad_norm": 0.5703125, "learning_rate": 1.0201405421460942e-05, "loss": 1.508, "step": 5747 }, { "epoch": 0.9916328819114983, "grad_norm": 0.58984375, "learning_rate": 1.0198684087219718e-05, "loss": 1.472, "step": 5748 }, { "epoch": 0.9918053998102303, "grad_norm": 0.59765625, "learning_rate": 1.0195962738258736e-05, "loss": 1.4882, "step": 5749 }, { "epoch": 0.9919779177089623, "grad_norm": 0.625, "learning_rate": 1.0193241374779607e-05, "loss": 1.4459, "step": 5750 }, { "epoch": 0.9921504356076943, "grad_norm": 0.58984375, "learning_rate": 1.0190519996983956e-05, "loss": 1.4201, "step": 5751 }, { "epoch": 0.9923229535064263, "grad_norm": 0.859375, "learning_rate": 1.0187798605073389e-05, "loss": 1.4262, "step": 5752 }, { "epoch": 0.9924954714051583, "grad_norm": 0.69921875, "learning_rate": 1.018507719924953e-05, "loss": 1.3847, "step": 5753 }, { "epoch": 0.9926679893038903, "grad_norm": 0.66796875, "learning_rate": 1.0182355779713992e-05, "loss": 1.4505, "step": 5754 }, { "epoch": 0.9928405072026223, "grad_norm": 0.6171875, "learning_rate": 1.0179634346668406e-05, "loss": 1.4877, "step": 5755 }, { "epoch": 0.9930130251013543, "grad_norm": 0.59375, "learning_rate": 1.0176912900314378e-05, "loss": 1.4688, "step": 5756 }, { "epoch": 0.9931855430000862, "grad_norm": 0.94921875, "learning_rate": 1.0174191440853541e-05, "loss": 1.5153, "step": 5757 }, { "epoch": 0.9933580608988183, "grad_norm": 0.6328125, "learning_rate": 1.017146996848751e-05, "loss": 1.5, "step": 5758 }, { "epoch": 0.9935305787975502, "grad_norm": 0.71484375, "learning_rate": 1.0168748483417916e-05, "loss": 1.4298, "step": 5759 }, { "epoch": 0.9937030966962822, "grad_norm": 0.5546875, "learning_rate": 1.0166026985846377e-05, "loss": 1.4473, "step": 5760 }, { "epoch": 0.9938756145950143, "grad_norm": 0.66015625, "learning_rate": 1.0163305475974523e-05, "loss": 1.3173, "step": 5761 }, { "epoch": 0.9940481324937462, "grad_norm": 0.6953125, "learning_rate": 1.0160583954003978e-05, "loss": 1.4909, "step": 5762 }, { "epoch": 0.9942206503924782, "grad_norm": 0.6171875, "learning_rate": 1.0157862420136371e-05, "loss": 1.4453, "step": 5763 }, { "epoch": 0.9943931682912102, "grad_norm": 0.94921875, "learning_rate": 1.015514087457333e-05, "loss": 1.5272, "step": 5764 }, { "epoch": 0.9945656861899422, "grad_norm": 0.5859375, "learning_rate": 1.0152419317516482e-05, "loss": 1.4821, "step": 5765 }, { "epoch": 0.9947382040886742, "grad_norm": 0.62109375, "learning_rate": 1.0149697749167459e-05, "loss": 1.3855, "step": 5766 }, { "epoch": 0.9949107219874062, "grad_norm": 0.58984375, "learning_rate": 1.0146976169727893e-05, "loss": 1.4995, "step": 5767 }, { "epoch": 0.9950832398861382, "grad_norm": 0.60546875, "learning_rate": 1.0144254579399413e-05, "loss": 1.4256, "step": 5768 }, { "epoch": 0.9952557577848702, "grad_norm": 0.5703125, "learning_rate": 1.0141532978383653e-05, "loss": 1.35, "step": 5769 }, { "epoch": 0.9954282756836021, "grad_norm": 0.625, "learning_rate": 1.0138811366882243e-05, "loss": 1.4445, "step": 5770 }, { "epoch": 0.9956007935823342, "grad_norm": 0.59375, "learning_rate": 1.0136089745096824e-05, "loss": 1.4354, "step": 5771 }, { "epoch": 0.9957733114810662, "grad_norm": 0.58984375, "learning_rate": 1.0133368113229026e-05, "loss": 1.3907, "step": 5772 }, { "epoch": 0.9959458293797981, "grad_norm": 0.6328125, "learning_rate": 1.013064647148048e-05, "loss": 1.4249, "step": 5773 }, { "epoch": 0.9961183472785301, "grad_norm": 0.609375, "learning_rate": 1.0127924820052831e-05, "loss": 1.4933, "step": 5774 }, { "epoch": 0.9962908651772622, "grad_norm": 0.61328125, "learning_rate": 1.0125203159147712e-05, "loss": 1.4064, "step": 5775 }, { "epoch": 0.9964633830759941, "grad_norm": 0.66796875, "learning_rate": 1.012248148896676e-05, "loss": 1.3864, "step": 5776 }, { "epoch": 0.9966359009747261, "grad_norm": 0.671875, "learning_rate": 1.0119759809711614e-05, "loss": 1.3759, "step": 5777 }, { "epoch": 0.9968084188734582, "grad_norm": 0.64453125, "learning_rate": 1.0117038121583911e-05, "loss": 1.4703, "step": 5778 }, { "epoch": 0.9969809367721901, "grad_norm": 0.7421875, "learning_rate": 1.0114316424785295e-05, "loss": 1.3742, "step": 5779 }, { "epoch": 0.9971534546709221, "grad_norm": 0.6484375, "learning_rate": 1.0111594719517406e-05, "loss": 1.4711, "step": 5780 }, { "epoch": 0.997325972569654, "grad_norm": 0.60546875, "learning_rate": 1.0108873005981876e-05, "loss": 1.3791, "step": 5781 }, { "epoch": 0.9974984904683861, "grad_norm": 0.58203125, "learning_rate": 1.0106151284380359e-05, "loss": 1.4406, "step": 5782 }, { "epoch": 0.9976710083671181, "grad_norm": 0.66796875, "learning_rate": 1.010342955491449e-05, "loss": 1.3951, "step": 5783 }, { "epoch": 0.99784352626585, "grad_norm": 0.58203125, "learning_rate": 1.0100707817785915e-05, "loss": 1.3135, "step": 5784 }, { "epoch": 0.9980160441645821, "grad_norm": 0.62890625, "learning_rate": 1.0097986073196273e-05, "loss": 1.4802, "step": 5785 }, { "epoch": 0.9981885620633141, "grad_norm": 0.75390625, "learning_rate": 1.0095264321347212e-05, "loss": 1.5621, "step": 5786 }, { "epoch": 0.998361079962046, "grad_norm": 0.64453125, "learning_rate": 1.0092542562440375e-05, "loss": 1.3868, "step": 5787 }, { "epoch": 0.9985335978607781, "grad_norm": 0.58984375, "learning_rate": 1.0089820796677407e-05, "loss": 1.4492, "step": 5788 }, { "epoch": 0.9987061157595101, "grad_norm": 0.6484375, "learning_rate": 1.0087099024259951e-05, "loss": 1.3685, "step": 5789 }, { "epoch": 0.998878633658242, "grad_norm": 0.6484375, "learning_rate": 1.0084377245389656e-05, "loss": 1.4912, "step": 5790 }, { "epoch": 0.999051151556974, "grad_norm": 0.8828125, "learning_rate": 1.008165546026817e-05, "loss": 1.463, "step": 5791 }, { "epoch": 0.9992236694557061, "grad_norm": 0.6171875, "learning_rate": 1.0078933669097135e-05, "loss": 1.4201, "step": 5792 }, { "epoch": 0.999396187354438, "grad_norm": 0.61328125, "learning_rate": 1.00762118720782e-05, "loss": 1.5223, "step": 5793 }, { "epoch": 0.99956870525317, "grad_norm": 0.61328125, "learning_rate": 1.0073490069413014e-05, "loss": 1.4212, "step": 5794 }, { "epoch": 0.9997412231519021, "grad_norm": 0.89453125, "learning_rate": 1.0070768261303226e-05, "loss": 1.4884, "step": 5795 }, { "epoch": 0.999913741050634, "grad_norm": 0.640625, "learning_rate": 1.006804644795048e-05, "loss": 1.4197, "step": 5796 }, { "epoch": 1.000086258949366, "grad_norm": 0.60546875, "learning_rate": 1.006532462955643e-05, "loss": 1.4797, "step": 5797 }, { "epoch": 1.000258776848098, "grad_norm": 0.59375, "learning_rate": 1.006260280632272e-05, "loss": 1.4291, "step": 5798 }, { "epoch": 1.00043129474683, "grad_norm": 0.66796875, "learning_rate": 1.0059880978451009e-05, "loss": 1.4776, "step": 5799 }, { "epoch": 1.000603812645562, "grad_norm": 0.765625, "learning_rate": 1.0057159146142937e-05, "loss": 1.466, "step": 5800 }, { "epoch": 1.000603812645562, "eval_loss": 1.4107141494750977, "eval_runtime": 10.9354, "eval_samples_per_second": 93.641, "eval_steps_per_second": 23.41, "step": 5800 }, { "epoch": 1.000776330544294, "grad_norm": 0.6171875, "learning_rate": 1.0054437309600159e-05, "loss": 1.5335, "step": 5801 }, { "epoch": 1.000948848443026, "grad_norm": 0.61328125, "learning_rate": 1.0051715469024325e-05, "loss": 1.3836, "step": 5802 }, { "epoch": 1.001121366341758, "grad_norm": 0.61328125, "learning_rate": 1.0048993624617087e-05, "loss": 1.5043, "step": 5803 }, { "epoch": 1.00129388424049, "grad_norm": 0.6171875, "learning_rate": 1.0046271776580094e-05, "loss": 1.4079, "step": 5804 }, { "epoch": 1.001466402139222, "grad_norm": 0.58203125, "learning_rate": 1.0043549925115e-05, "loss": 1.4617, "step": 5805 }, { "epoch": 1.0016389200379539, "grad_norm": 0.58984375, "learning_rate": 1.0040828070423451e-05, "loss": 1.3933, "step": 5806 }, { "epoch": 1.001811437936686, "grad_norm": 0.58984375, "learning_rate": 1.0038106212707108e-05, "loss": 1.4361, "step": 5807 }, { "epoch": 1.001983955835418, "grad_norm": 0.5859375, "learning_rate": 1.0035384352167619e-05, "loss": 1.5073, "step": 5808 }, { "epoch": 1.00215647373415, "grad_norm": 0.58203125, "learning_rate": 1.0032662489006634e-05, "loss": 1.3779, "step": 5809 }, { "epoch": 1.002328991632882, "grad_norm": 0.5703125, "learning_rate": 1.0029940623425807e-05, "loss": 1.4819, "step": 5810 }, { "epoch": 1.0025015095316139, "grad_norm": 0.6953125, "learning_rate": 1.0027218755626793e-05, "loss": 1.3348, "step": 5811 }, { "epoch": 1.0026740274303458, "grad_norm": 0.5703125, "learning_rate": 1.0024496885811245e-05, "loss": 1.3513, "step": 5812 }, { "epoch": 1.0028465453290778, "grad_norm": 0.6015625, "learning_rate": 1.002177501418081e-05, "loss": 1.4489, "step": 5813 }, { "epoch": 1.00301906322781, "grad_norm": 0.5703125, "learning_rate": 1.001905314093715e-05, "loss": 1.4161, "step": 5814 }, { "epoch": 1.003191581126542, "grad_norm": 0.52734375, "learning_rate": 1.0016331266281913e-05, "loss": 1.3771, "step": 5815 }, { "epoch": 1.003364099025274, "grad_norm": 0.69140625, "learning_rate": 1.0013609390416753e-05, "loss": 1.2211, "step": 5816 }, { "epoch": 1.0035366169240059, "grad_norm": 0.62890625, "learning_rate": 1.0010887513543325e-05, "loss": 1.5178, "step": 5817 }, { "epoch": 1.0037091348227378, "grad_norm": 0.60546875, "learning_rate": 1.0008165635863283e-05, "loss": 1.5569, "step": 5818 }, { "epoch": 1.0038816527214698, "grad_norm": 0.6015625, "learning_rate": 1.0005443757578279e-05, "loss": 1.3907, "step": 5819 }, { "epoch": 1.0040541706202017, "grad_norm": 0.58984375, "learning_rate": 1.0002721878889967e-05, "loss": 1.4906, "step": 5820 }, { "epoch": 1.004226688518934, "grad_norm": 0.6015625, "learning_rate": 1e-05, "loss": 1.4273, "step": 5821 }, { "epoch": 1.004399206417666, "grad_norm": 0.6328125, "learning_rate": 9.997278121110033e-06, "loss": 1.5296, "step": 5822 }, { "epoch": 1.0045717243163979, "grad_norm": 0.6015625, "learning_rate": 9.994556242421723e-06, "loss": 1.3968, "step": 5823 }, { "epoch": 1.0047442422151298, "grad_norm": 0.6328125, "learning_rate": 9.991834364136719e-06, "loss": 1.5477, "step": 5824 }, { "epoch": 1.0049167601138618, "grad_norm": 0.61328125, "learning_rate": 9.989112486456677e-06, "loss": 1.5011, "step": 5825 }, { "epoch": 1.0050892780125937, "grad_norm": 0.58203125, "learning_rate": 9.986390609583246e-06, "loss": 1.5186, "step": 5826 }, { "epoch": 1.0052617959113257, "grad_norm": 0.60546875, "learning_rate": 9.983668733718089e-06, "loss": 1.5327, "step": 5827 }, { "epoch": 1.0054343138100579, "grad_norm": 0.6953125, "learning_rate": 9.980946859062852e-06, "loss": 1.4618, "step": 5828 }, { "epoch": 1.0056068317087898, "grad_norm": 0.59765625, "learning_rate": 9.978224985819193e-06, "loss": 1.3387, "step": 5829 }, { "epoch": 1.0057793496075218, "grad_norm": 0.58984375, "learning_rate": 9.975503114188758e-06, "loss": 1.4009, "step": 5830 }, { "epoch": 1.0059518675062538, "grad_norm": 0.61328125, "learning_rate": 9.97278124437321e-06, "loss": 1.3932, "step": 5831 }, { "epoch": 1.0061243854049857, "grad_norm": 0.578125, "learning_rate": 9.970059376574195e-06, "loss": 1.4174, "step": 5832 }, { "epoch": 1.0062969033037177, "grad_norm": 0.59765625, "learning_rate": 9.96733751099337e-06, "loss": 1.4151, "step": 5833 }, { "epoch": 1.0064694212024499, "grad_norm": 0.5625, "learning_rate": 9.964615647832384e-06, "loss": 1.4401, "step": 5834 }, { "epoch": 1.0066419391011818, "grad_norm": 0.60546875, "learning_rate": 9.961893787292895e-06, "loss": 1.4587, "step": 5835 }, { "epoch": 1.0068144569999138, "grad_norm": 0.59375, "learning_rate": 9.95917192957655e-06, "loss": 1.4193, "step": 5836 }, { "epoch": 1.0069869748986457, "grad_norm": 0.578125, "learning_rate": 9.956450074885007e-06, "loss": 1.4307, "step": 5837 }, { "epoch": 1.0071594927973777, "grad_norm": 0.60546875, "learning_rate": 9.953728223419908e-06, "loss": 1.397, "step": 5838 }, { "epoch": 1.0073320106961097, "grad_norm": 0.6953125, "learning_rate": 9.951006375382915e-06, "loss": 1.5258, "step": 5839 }, { "epoch": 1.0075045285948416, "grad_norm": 0.578125, "learning_rate": 9.948284530975678e-06, "loss": 1.3965, "step": 5840 }, { "epoch": 1.0076770464935738, "grad_norm": 0.60546875, "learning_rate": 9.945562690399841e-06, "loss": 1.4365, "step": 5841 }, { "epoch": 1.0078495643923058, "grad_norm": 0.703125, "learning_rate": 9.942840853857065e-06, "loss": 1.5303, "step": 5842 }, { "epoch": 1.0080220822910377, "grad_norm": 0.64453125, "learning_rate": 9.940119021548994e-06, "loss": 1.45, "step": 5843 }, { "epoch": 1.0081946001897697, "grad_norm": 0.58203125, "learning_rate": 9.937397193677281e-06, "loss": 1.4291, "step": 5844 }, { "epoch": 1.0083671180885017, "grad_norm": 0.63671875, "learning_rate": 9.934675370443571e-06, "loss": 1.3639, "step": 5845 }, { "epoch": 1.0085396359872336, "grad_norm": 0.6328125, "learning_rate": 9.931953552049524e-06, "loss": 1.316, "step": 5846 }, { "epoch": 1.0087121538859656, "grad_norm": 0.70703125, "learning_rate": 9.929231738696779e-06, "loss": 1.377, "step": 5847 }, { "epoch": 1.0088846717846978, "grad_norm": 0.6640625, "learning_rate": 9.926509930586991e-06, "loss": 1.4196, "step": 5848 }, { "epoch": 1.0090571896834297, "grad_norm": 0.67578125, "learning_rate": 9.923788127921801e-06, "loss": 1.386, "step": 5849 }, { "epoch": 1.0092297075821617, "grad_norm": 0.6015625, "learning_rate": 9.92106633090287e-06, "loss": 1.3856, "step": 5850 }, { "epoch": 1.0094022254808936, "grad_norm": 0.66015625, "learning_rate": 9.918344539731832e-06, "loss": 1.4346, "step": 5851 }, { "epoch": 1.0095747433796256, "grad_norm": 0.765625, "learning_rate": 9.915622754610349e-06, "loss": 1.4435, "step": 5852 }, { "epoch": 1.0097472612783576, "grad_norm": 0.73046875, "learning_rate": 9.912900975740052e-06, "loss": 1.3264, "step": 5853 }, { "epoch": 1.0099197791770895, "grad_norm": 0.57421875, "learning_rate": 9.910179203322595e-06, "loss": 1.3786, "step": 5854 }, { "epoch": 1.0100922970758217, "grad_norm": 0.578125, "learning_rate": 9.907457437559626e-06, "loss": 1.3746, "step": 5855 }, { "epoch": 1.0102648149745537, "grad_norm": 0.65625, "learning_rate": 9.90473567865279e-06, "loss": 1.5273, "step": 5856 }, { "epoch": 1.0104373328732856, "grad_norm": 0.65625, "learning_rate": 9.90201392680373e-06, "loss": 1.3937, "step": 5857 }, { "epoch": 1.0106098507720176, "grad_norm": 0.66015625, "learning_rate": 9.899292182214087e-06, "loss": 1.4236, "step": 5858 }, { "epoch": 1.0107823686707496, "grad_norm": 0.62109375, "learning_rate": 9.896570445085511e-06, "loss": 1.391, "step": 5859 }, { "epoch": 1.0109548865694815, "grad_norm": 0.70703125, "learning_rate": 9.893848715619643e-06, "loss": 1.4368, "step": 5860 }, { "epoch": 1.0111274044682135, "grad_norm": 0.61328125, "learning_rate": 9.891126994018126e-06, "loss": 1.4629, "step": 5861 }, { "epoch": 1.0112999223669457, "grad_norm": 1.015625, "learning_rate": 9.888405280482598e-06, "loss": 1.4976, "step": 5862 }, { "epoch": 1.0114724402656776, "grad_norm": 0.58984375, "learning_rate": 9.885683575214709e-06, "loss": 1.4451, "step": 5863 }, { "epoch": 1.0116449581644096, "grad_norm": 0.56640625, "learning_rate": 9.88296187841609e-06, "loss": 1.4385, "step": 5864 }, { "epoch": 1.0118174760631415, "grad_norm": 0.58203125, "learning_rate": 9.880240190288391e-06, "loss": 1.4685, "step": 5865 }, { "epoch": 1.0119899939618735, "grad_norm": 0.5703125, "learning_rate": 9.877518511033243e-06, "loss": 1.4252, "step": 5866 }, { "epoch": 1.0121625118606055, "grad_norm": 0.60546875, "learning_rate": 9.874796840852292e-06, "loss": 1.416, "step": 5867 }, { "epoch": 1.0123350297593374, "grad_norm": 0.671875, "learning_rate": 9.87207517994717e-06, "loss": 1.3435, "step": 5868 }, { "epoch": 1.0125075476580696, "grad_norm": 0.57421875, "learning_rate": 9.869353528519523e-06, "loss": 1.4463, "step": 5869 }, { "epoch": 1.0126800655568016, "grad_norm": 0.8203125, "learning_rate": 9.866631886770979e-06, "loss": 1.389, "step": 5870 }, { "epoch": 1.0128525834555335, "grad_norm": 0.61328125, "learning_rate": 9.86391025490318e-06, "loss": 1.3701, "step": 5871 }, { "epoch": 1.0130251013542655, "grad_norm": 0.58203125, "learning_rate": 9.861188633117758e-06, "loss": 1.4432, "step": 5872 }, { "epoch": 1.0131976192529975, "grad_norm": 0.60546875, "learning_rate": 9.858467021616349e-06, "loss": 1.3332, "step": 5873 }, { "epoch": 1.0133701371517294, "grad_norm": 0.6171875, "learning_rate": 9.855745420600589e-06, "loss": 1.3983, "step": 5874 }, { "epoch": 1.0135426550504616, "grad_norm": 0.6953125, "learning_rate": 9.85302383027211e-06, "loss": 1.4667, "step": 5875 }, { "epoch": 1.0137151729491936, "grad_norm": 0.56640625, "learning_rate": 9.850302250832544e-06, "loss": 1.3457, "step": 5876 }, { "epoch": 1.0138876908479255, "grad_norm": 0.62109375, "learning_rate": 9.84758068248352e-06, "loss": 1.4847, "step": 5877 }, { "epoch": 1.0140602087466575, "grad_norm": 0.59375, "learning_rate": 9.844859125426674e-06, "loss": 1.4181, "step": 5878 }, { "epoch": 1.0142327266453894, "grad_norm": 0.5703125, "learning_rate": 9.842137579863632e-06, "loss": 1.3819, "step": 5879 }, { "epoch": 1.0144052445441214, "grad_norm": 0.76953125, "learning_rate": 9.839416045996027e-06, "loss": 1.4222, "step": 5880 }, { "epoch": 1.0145777624428534, "grad_norm": 0.57421875, "learning_rate": 9.83669452402548e-06, "loss": 1.551, "step": 5881 }, { "epoch": 1.0147502803415855, "grad_norm": 0.6953125, "learning_rate": 9.833973014153628e-06, "loss": 1.5052, "step": 5882 }, { "epoch": 1.0149227982403175, "grad_norm": 0.6171875, "learning_rate": 9.831251516582087e-06, "loss": 1.4471, "step": 5883 }, { "epoch": 1.0150953161390495, "grad_norm": 0.5703125, "learning_rate": 9.828530031512493e-06, "loss": 1.426, "step": 5884 }, { "epoch": 1.0152678340377814, "grad_norm": 0.61328125, "learning_rate": 9.825808559146464e-06, "loss": 1.3441, "step": 5885 }, { "epoch": 1.0154403519365134, "grad_norm": 0.57421875, "learning_rate": 9.823087099685622e-06, "loss": 1.48, "step": 5886 }, { "epoch": 1.0156128698352453, "grad_norm": 0.67578125, "learning_rate": 9.820365653331599e-06, "loss": 1.5439, "step": 5887 }, { "epoch": 1.0157853877339773, "grad_norm": 0.625, "learning_rate": 9.817644220286006e-06, "loss": 1.381, "step": 5888 }, { "epoch": 1.0159579056327095, "grad_norm": 0.7265625, "learning_rate": 9.814922800750474e-06, "loss": 1.4234, "step": 5889 }, { "epoch": 1.0161304235314415, "grad_norm": 0.59375, "learning_rate": 9.81220139492661e-06, "loss": 1.5136, "step": 5890 }, { "epoch": 1.0163029414301734, "grad_norm": 0.5703125, "learning_rate": 9.809480003016046e-06, "loss": 1.3156, "step": 5891 }, { "epoch": 1.0164754593289054, "grad_norm": 0.58984375, "learning_rate": 9.806758625220391e-06, "loss": 1.3625, "step": 5892 }, { "epoch": 1.0166479772276373, "grad_norm": 0.58984375, "learning_rate": 9.804037261741269e-06, "loss": 1.3784, "step": 5893 }, { "epoch": 1.0168204951263693, "grad_norm": 0.6171875, "learning_rate": 9.801315912780283e-06, "loss": 1.4707, "step": 5894 }, { "epoch": 1.0169930130251013, "grad_norm": 0.640625, "learning_rate": 9.798594578539063e-06, "loss": 1.4436, "step": 5895 }, { "epoch": 1.0171655309238334, "grad_norm": 0.6015625, "learning_rate": 9.795873259219212e-06, "loss": 1.4904, "step": 5896 }, { "epoch": 1.0173380488225654, "grad_norm": 0.56640625, "learning_rate": 9.793151955022346e-06, "loss": 1.4208, "step": 5897 }, { "epoch": 1.0175105667212974, "grad_norm": 0.75390625, "learning_rate": 9.790430666150073e-06, "loss": 1.4334, "step": 5898 }, { "epoch": 1.0176830846200293, "grad_norm": 0.609375, "learning_rate": 9.78770939280401e-06, "loss": 1.4077, "step": 5899 }, { "epoch": 1.0178556025187613, "grad_norm": 0.55078125, "learning_rate": 9.78498813518576e-06, "loss": 1.425, "step": 5900 }, { "epoch": 1.0178556025187613, "eval_loss": 1.410369634628296, "eval_runtime": 10.8142, "eval_samples_per_second": 94.69, "eval_steps_per_second": 23.673, "step": 5900 }, { "epoch": 1.0180281204174932, "grad_norm": 0.62109375, "learning_rate": 9.782266893496933e-06, "loss": 1.3682, "step": 5901 }, { "epoch": 1.0182006383162252, "grad_norm": 0.58203125, "learning_rate": 9.779545667939132e-06, "loss": 1.4094, "step": 5902 }, { "epoch": 1.0183731562149574, "grad_norm": 0.6328125, "learning_rate": 9.776824458713965e-06, "loss": 1.4804, "step": 5903 }, { "epoch": 1.0185456741136893, "grad_norm": 0.66015625, "learning_rate": 9.774103266023042e-06, "loss": 1.4789, "step": 5904 }, { "epoch": 1.0187181920124213, "grad_norm": 0.578125, "learning_rate": 9.771382090067954e-06, "loss": 1.4821, "step": 5905 }, { "epoch": 1.0188907099111533, "grad_norm": 0.59765625, "learning_rate": 9.768660931050314e-06, "loss": 1.2989, "step": 5906 }, { "epoch": 1.0190632278098852, "grad_norm": 0.703125, "learning_rate": 9.765939789171717e-06, "loss": 1.456, "step": 5907 }, { "epoch": 1.0192357457086172, "grad_norm": 0.62109375, "learning_rate": 9.763218664633763e-06, "loss": 1.4041, "step": 5908 }, { "epoch": 1.0194082636073492, "grad_norm": 0.61328125, "learning_rate": 9.760497557638047e-06, "loss": 1.5247, "step": 5909 }, { "epoch": 1.0195807815060813, "grad_norm": 0.56640625, "learning_rate": 9.757776468386171e-06, "loss": 1.3292, "step": 5910 }, { "epoch": 1.0197532994048133, "grad_norm": 0.609375, "learning_rate": 9.755055397079724e-06, "loss": 1.5492, "step": 5911 }, { "epoch": 1.0199258173035453, "grad_norm": 0.578125, "learning_rate": 9.75233434392031e-06, "loss": 1.3097, "step": 5912 }, { "epoch": 1.0200983352022772, "grad_norm": 0.70703125, "learning_rate": 9.749613309109512e-06, "loss": 1.3986, "step": 5913 }, { "epoch": 1.0202708531010092, "grad_norm": 0.58203125, "learning_rate": 9.746892292848925e-06, "loss": 1.4641, "step": 5914 }, { "epoch": 1.0204433709997411, "grad_norm": 0.6015625, "learning_rate": 9.744171295340136e-06, "loss": 1.5802, "step": 5915 }, { "epoch": 1.0206158888984733, "grad_norm": 0.64453125, "learning_rate": 9.74145031678474e-06, "loss": 1.4543, "step": 5916 }, { "epoch": 1.0207884067972053, "grad_norm": 0.58984375, "learning_rate": 9.738729357384318e-06, "loss": 1.4212, "step": 5917 }, { "epoch": 1.0209609246959372, "grad_norm": 0.62109375, "learning_rate": 9.736008417340455e-06, "loss": 1.3583, "step": 5918 }, { "epoch": 1.0211334425946692, "grad_norm": 0.5859375, "learning_rate": 9.733287496854743e-06, "loss": 1.5124, "step": 5919 }, { "epoch": 1.0213059604934012, "grad_norm": 0.6484375, "learning_rate": 9.730566596128756e-06, "loss": 1.4214, "step": 5920 }, { "epoch": 1.0214784783921331, "grad_norm": 0.62890625, "learning_rate": 9.727845715364081e-06, "loss": 1.4619, "step": 5921 }, { "epoch": 1.021650996290865, "grad_norm": 0.58984375, "learning_rate": 9.725124854762294e-06, "loss": 1.329, "step": 5922 }, { "epoch": 1.0218235141895973, "grad_norm": 0.6015625, "learning_rate": 9.722404014524978e-06, "loss": 1.4048, "step": 5923 }, { "epoch": 1.0219960320883292, "grad_norm": 0.59375, "learning_rate": 9.719683194853705e-06, "loss": 1.3779, "step": 5924 }, { "epoch": 1.0221685499870612, "grad_norm": 0.6328125, "learning_rate": 9.716962395950055e-06, "loss": 1.3623, "step": 5925 }, { "epoch": 1.0223410678857932, "grad_norm": 0.59375, "learning_rate": 9.714241618015596e-06, "loss": 1.3491, "step": 5926 }, { "epoch": 1.0225135857845251, "grad_norm": 1.109375, "learning_rate": 9.711520861251904e-06, "loss": 1.4558, "step": 5927 }, { "epoch": 1.022686103683257, "grad_norm": 0.62890625, "learning_rate": 9.708800125860552e-06, "loss": 1.3601, "step": 5928 }, { "epoch": 1.022858621581989, "grad_norm": 1.0546875, "learning_rate": 9.706079412043105e-06, "loss": 1.4023, "step": 5929 }, { "epoch": 1.0230311394807212, "grad_norm": 0.60546875, "learning_rate": 9.703358720001129e-06, "loss": 1.5388, "step": 5930 }, { "epoch": 1.0232036573794532, "grad_norm": 0.578125, "learning_rate": 9.700638049936194e-06, "loss": 1.4385, "step": 5931 }, { "epoch": 1.0233761752781851, "grad_norm": 0.58984375, "learning_rate": 9.697917402049864e-06, "loss": 1.5253, "step": 5932 }, { "epoch": 1.023548693176917, "grad_norm": 0.73046875, "learning_rate": 9.695196776543701e-06, "loss": 1.4444, "step": 5933 }, { "epoch": 1.023721211075649, "grad_norm": 0.60546875, "learning_rate": 9.69247617361926e-06, "loss": 1.3254, "step": 5934 }, { "epoch": 1.023893728974381, "grad_norm": 0.61328125, "learning_rate": 9.689755593478109e-06, "loss": 1.3835, "step": 5935 }, { "epoch": 1.024066246873113, "grad_norm": 0.65625, "learning_rate": 9.687035036321804e-06, "loss": 1.3976, "step": 5936 }, { "epoch": 1.0242387647718452, "grad_norm": 0.65234375, "learning_rate": 9.684314502351895e-06, "loss": 1.5053, "step": 5937 }, { "epoch": 1.0244112826705771, "grad_norm": 0.59765625, "learning_rate": 9.681593991769944e-06, "loss": 1.423, "step": 5938 }, { "epoch": 1.024583800569309, "grad_norm": 0.5703125, "learning_rate": 9.678873504777495e-06, "loss": 1.4018, "step": 5939 }, { "epoch": 1.024756318468041, "grad_norm": 0.61328125, "learning_rate": 9.676153041576111e-06, "loss": 1.4337, "step": 5940 }, { "epoch": 1.024928836366773, "grad_norm": 0.6015625, "learning_rate": 9.673432602367325e-06, "loss": 1.4924, "step": 5941 }, { "epoch": 1.025101354265505, "grad_norm": 0.7890625, "learning_rate": 9.670712187352698e-06, "loss": 1.4626, "step": 5942 }, { "epoch": 1.025273872164237, "grad_norm": 0.63671875, "learning_rate": 9.667991796733764e-06, "loss": 1.5558, "step": 5943 }, { "epoch": 1.0254463900629691, "grad_norm": 0.578125, "learning_rate": 9.665271430712079e-06, "loss": 1.4, "step": 5944 }, { "epoch": 1.025618907961701, "grad_norm": 1.359375, "learning_rate": 9.662551089489174e-06, "loss": 1.474, "step": 5945 }, { "epoch": 1.025791425860433, "grad_norm": 0.63671875, "learning_rate": 9.659830773266593e-06, "loss": 1.4282, "step": 5946 }, { "epoch": 1.025963943759165, "grad_norm": 0.60546875, "learning_rate": 9.657110482245872e-06, "loss": 1.4422, "step": 5947 }, { "epoch": 1.026136461657897, "grad_norm": 0.60546875, "learning_rate": 9.654390216628554e-06, "loss": 1.3946, "step": 5948 }, { "epoch": 1.026308979556629, "grad_norm": 0.6796875, "learning_rate": 9.651669976616165e-06, "loss": 1.547, "step": 5949 }, { "epoch": 1.026481497455361, "grad_norm": 0.65234375, "learning_rate": 9.648949762410238e-06, "loss": 1.449, "step": 5950 }, { "epoch": 1.026654015354093, "grad_norm": 0.58203125, "learning_rate": 9.64622957421231e-06, "loss": 1.4005, "step": 5951 }, { "epoch": 1.026826533252825, "grad_norm": 0.75, "learning_rate": 9.643509412223905e-06, "loss": 1.3911, "step": 5952 }, { "epoch": 1.026999051151557, "grad_norm": 0.640625, "learning_rate": 9.64078927664655e-06, "loss": 1.5121, "step": 5953 }, { "epoch": 1.027171569050289, "grad_norm": 0.6171875, "learning_rate": 9.638069167681768e-06, "loss": 1.3723, "step": 5954 }, { "epoch": 1.027344086949021, "grad_norm": 0.703125, "learning_rate": 9.635349085531088e-06, "loss": 1.3944, "step": 5955 }, { "epoch": 1.0275166048477529, "grad_norm": 0.62109375, "learning_rate": 9.63262903039602e-06, "loss": 1.4605, "step": 5956 }, { "epoch": 1.027689122746485, "grad_norm": 0.58984375, "learning_rate": 9.629909002478095e-06, "loss": 1.5529, "step": 5957 }, { "epoch": 1.027861640645217, "grad_norm": 0.61328125, "learning_rate": 9.627189001978818e-06, "loss": 1.5283, "step": 5958 }, { "epoch": 1.028034158543949, "grad_norm": 0.6328125, "learning_rate": 9.624469029099713e-06, "loss": 1.4798, "step": 5959 }, { "epoch": 1.028206676442681, "grad_norm": 0.73046875, "learning_rate": 9.621749084042288e-06, "loss": 1.4375, "step": 5960 }, { "epoch": 1.028379194341413, "grad_norm": 0.7109375, "learning_rate": 9.619029167008056e-06, "loss": 1.4525, "step": 5961 }, { "epoch": 1.0285517122401449, "grad_norm": 0.7265625, "learning_rate": 9.61630927819852e-06, "loss": 1.4167, "step": 5962 }, { "epoch": 1.0287242301388768, "grad_norm": 0.5859375, "learning_rate": 9.613589417815192e-06, "loss": 1.4118, "step": 5963 }, { "epoch": 1.028896748037609, "grad_norm": 0.6328125, "learning_rate": 9.610869586059574e-06, "loss": 1.4564, "step": 5964 }, { "epoch": 1.029069265936341, "grad_norm": 0.578125, "learning_rate": 9.608149783133169e-06, "loss": 1.4491, "step": 5965 }, { "epoch": 1.029241783835073, "grad_norm": 0.5703125, "learning_rate": 9.605430009237474e-06, "loss": 1.4347, "step": 5966 }, { "epoch": 1.0294143017338049, "grad_norm": 0.6015625, "learning_rate": 9.602710264573991e-06, "loss": 1.4235, "step": 5967 }, { "epoch": 1.0295868196325368, "grad_norm": 0.5703125, "learning_rate": 9.599990549344216e-06, "loss": 1.3666, "step": 5968 }, { "epoch": 1.0297593375312688, "grad_norm": 0.58984375, "learning_rate": 9.597270863749635e-06, "loss": 1.4757, "step": 5969 }, { "epoch": 1.0299318554300008, "grad_norm": 0.55859375, "learning_rate": 9.59455120799175e-06, "loss": 1.4431, "step": 5970 }, { "epoch": 1.030104373328733, "grad_norm": 0.5859375, "learning_rate": 9.59183158227204e-06, "loss": 1.4592, "step": 5971 }, { "epoch": 1.030276891227465, "grad_norm": 0.6328125, "learning_rate": 9.589111986792004e-06, "loss": 1.5002, "step": 5972 }, { "epoch": 1.0304494091261969, "grad_norm": 0.6484375, "learning_rate": 9.586392421753115e-06, "loss": 1.4541, "step": 5973 }, { "epoch": 1.0306219270249288, "grad_norm": 1.0625, "learning_rate": 9.583672887356862e-06, "loss": 1.3925, "step": 5974 }, { "epoch": 1.0307944449236608, "grad_norm": 0.6484375, "learning_rate": 9.580953383804718e-06, "loss": 1.5073, "step": 5975 }, { "epoch": 1.0309669628223928, "grad_norm": 0.6328125, "learning_rate": 9.57823391129817e-06, "loss": 1.4615, "step": 5976 }, { "epoch": 1.0311394807211247, "grad_norm": 0.61328125, "learning_rate": 9.575514470038688e-06, "loss": 1.525, "step": 5977 }, { "epoch": 1.031311998619857, "grad_norm": 0.62890625, "learning_rate": 9.572795060227748e-06, "loss": 1.4352, "step": 5978 }, { "epoch": 1.0314845165185889, "grad_norm": 0.765625, "learning_rate": 9.570075682066815e-06, "loss": 1.4863, "step": 5979 }, { "epoch": 1.0316570344173208, "grad_norm": 0.609375, "learning_rate": 9.567356335757366e-06, "loss": 1.4522, "step": 5980 }, { "epoch": 1.0318295523160528, "grad_norm": 0.66015625, "learning_rate": 9.56463702150086e-06, "loss": 1.4746, "step": 5981 }, { "epoch": 1.0320020702147847, "grad_norm": 0.60546875, "learning_rate": 9.561917739498761e-06, "loss": 1.4016, "step": 5982 }, { "epoch": 1.0321745881135167, "grad_norm": 0.61328125, "learning_rate": 9.55919848995254e-06, "loss": 1.4406, "step": 5983 }, { "epoch": 1.0323471060122489, "grad_norm": 0.6484375, "learning_rate": 9.556479273063644e-06, "loss": 1.4135, "step": 5984 }, { "epoch": 1.0325196239109808, "grad_norm": 0.75390625, "learning_rate": 9.55376008903354e-06, "loss": 1.4579, "step": 5985 }, { "epoch": 1.0326921418097128, "grad_norm": 0.625, "learning_rate": 9.551040938063671e-06, "loss": 1.4233, "step": 5986 }, { "epoch": 1.0328646597084448, "grad_norm": 0.6953125, "learning_rate": 9.548321820355498e-06, "loss": 1.4576, "step": 5987 }, { "epoch": 1.0330371776071767, "grad_norm": 0.6953125, "learning_rate": 9.545602736110467e-06, "loss": 1.4506, "step": 5988 }, { "epoch": 1.0332096955059087, "grad_norm": 0.61328125, "learning_rate": 9.542883685530024e-06, "loss": 1.4305, "step": 5989 }, { "epoch": 1.0333822134046406, "grad_norm": 0.6015625, "learning_rate": 9.54016466881561e-06, "loss": 1.4593, "step": 5990 }, { "epoch": 1.0335547313033728, "grad_norm": 0.58984375, "learning_rate": 9.537445686168676e-06, "loss": 1.3949, "step": 5991 }, { "epoch": 1.0337272492021048, "grad_norm": 0.64453125, "learning_rate": 9.534726737790652e-06, "loss": 1.3817, "step": 5992 }, { "epoch": 1.0338997671008368, "grad_norm": 0.62109375, "learning_rate": 9.532007823882983e-06, "loss": 1.3964, "step": 5993 }, { "epoch": 1.0340722849995687, "grad_norm": 0.70703125, "learning_rate": 9.529288944647093e-06, "loss": 1.4328, "step": 5994 }, { "epoch": 1.0342448028983007, "grad_norm": 0.578125, "learning_rate": 9.526570100284423e-06, "loss": 1.4962, "step": 5995 }, { "epoch": 1.0344173207970326, "grad_norm": 0.63671875, "learning_rate": 9.523851290996397e-06, "loss": 1.4092, "step": 5996 }, { "epoch": 1.0345898386957646, "grad_norm": 0.6328125, "learning_rate": 9.521132516984442e-06, "loss": 1.3904, "step": 5997 }, { "epoch": 1.0347623565944968, "grad_norm": 0.58203125, "learning_rate": 9.518413778449981e-06, "loss": 1.3421, "step": 5998 }, { "epoch": 1.0349348744932287, "grad_norm": 0.859375, "learning_rate": 9.515695075594434e-06, "loss": 1.3831, "step": 5999 }, { "epoch": 1.0351073923919607, "grad_norm": 0.66796875, "learning_rate": 9.512976408619227e-06, "loss": 1.4474, "step": 6000 }, { "epoch": 1.0351073923919607, "eval_loss": 1.4101243019104004, "eval_runtime": 10.873, "eval_samples_per_second": 94.178, "eval_steps_per_second": 23.545, "step": 6000 }, { "epoch": 1.0352799102906927, "grad_norm": 0.86328125, "learning_rate": 9.510257777725769e-06, "loss": 1.3563, "step": 6001 }, { "epoch": 1.0354524281894246, "grad_norm": 0.6015625, "learning_rate": 9.507539183115479e-06, "loss": 1.3893, "step": 6002 }, { "epoch": 1.0356249460881566, "grad_norm": 0.62890625, "learning_rate": 9.504820624989756e-06, "loss": 1.3792, "step": 6003 }, { "epoch": 1.0357974639868885, "grad_norm": 0.61328125, "learning_rate": 9.50210210355002e-06, "loss": 1.5117, "step": 6004 }, { "epoch": 1.0359699818856207, "grad_norm": 0.578125, "learning_rate": 9.499383618997669e-06, "loss": 1.3749, "step": 6005 }, { "epoch": 1.0361424997843527, "grad_norm": 0.58984375, "learning_rate": 9.49666517153411e-06, "loss": 1.5192, "step": 6006 }, { "epoch": 1.0363150176830846, "grad_norm": 0.5703125, "learning_rate": 9.493946761360736e-06, "loss": 1.4583, "step": 6007 }, { "epoch": 1.0364875355818166, "grad_norm": 0.55078125, "learning_rate": 9.491228388678952e-06, "loss": 1.4411, "step": 6008 }, { "epoch": 1.0366600534805486, "grad_norm": 0.66796875, "learning_rate": 9.488510053690147e-06, "loss": 1.4219, "step": 6009 }, { "epoch": 1.0368325713792805, "grad_norm": 0.58984375, "learning_rate": 9.485791756595714e-06, "loss": 1.4116, "step": 6010 }, { "epoch": 1.0370050892780125, "grad_norm": 0.64453125, "learning_rate": 9.483073497597037e-06, "loss": 1.4544, "step": 6011 }, { "epoch": 1.0371776071767447, "grad_norm": 0.6875, "learning_rate": 9.480355276895508e-06, "loss": 1.381, "step": 6012 }, { "epoch": 1.0373501250754766, "grad_norm": 0.62109375, "learning_rate": 9.477637094692505e-06, "loss": 1.5196, "step": 6013 }, { "epoch": 1.0375226429742086, "grad_norm": 0.734375, "learning_rate": 9.47491895118941e-06, "loss": 1.4163, "step": 6014 }, { "epoch": 1.0376951608729406, "grad_norm": 0.6484375, "learning_rate": 9.472200846587603e-06, "loss": 1.4176, "step": 6015 }, { "epoch": 1.0378676787716725, "grad_norm": 0.5859375, "learning_rate": 9.469482781088451e-06, "loss": 1.435, "step": 6016 }, { "epoch": 1.0380401966704045, "grad_norm": 0.6328125, "learning_rate": 9.466764754893334e-06, "loss": 1.4506, "step": 6017 }, { "epoch": 1.0382127145691364, "grad_norm": 0.8671875, "learning_rate": 9.464046768203611e-06, "loss": 1.3685, "step": 6018 }, { "epoch": 1.0383852324678686, "grad_norm": 0.81640625, "learning_rate": 9.461328821220657e-06, "loss": 1.4066, "step": 6019 }, { "epoch": 1.0385577503666006, "grad_norm": 0.64453125, "learning_rate": 9.458610914145826e-06, "loss": 1.3749, "step": 6020 }, { "epoch": 1.0387302682653325, "grad_norm": 0.6015625, "learning_rate": 9.455893047180485e-06, "loss": 1.3851, "step": 6021 }, { "epoch": 1.0389027861640645, "grad_norm": 0.60546875, "learning_rate": 9.453175220525981e-06, "loss": 1.4537, "step": 6022 }, { "epoch": 1.0390753040627965, "grad_norm": 0.5859375, "learning_rate": 9.450457434383679e-06, "loss": 1.3407, "step": 6023 }, { "epoch": 1.0392478219615284, "grad_norm": 0.65625, "learning_rate": 9.44773968895492e-06, "loss": 1.4205, "step": 6024 }, { "epoch": 1.0394203398602606, "grad_norm": 0.609375, "learning_rate": 9.445021984441059e-06, "loss": 1.4163, "step": 6025 }, { "epoch": 1.0395928577589926, "grad_norm": 0.57421875, "learning_rate": 9.442304321043433e-06, "loss": 1.4961, "step": 6026 }, { "epoch": 1.0397653756577245, "grad_norm": 0.609375, "learning_rate": 9.43958669896339e-06, "loss": 1.3901, "step": 6027 }, { "epoch": 1.0399378935564565, "grad_norm": 0.578125, "learning_rate": 9.436869118402265e-06, "loss": 1.4272, "step": 6028 }, { "epoch": 1.0401104114551885, "grad_norm": 0.68359375, "learning_rate": 9.434151579561397e-06, "loss": 1.4163, "step": 6029 }, { "epoch": 1.0402829293539204, "grad_norm": 0.6328125, "learning_rate": 9.431434082642112e-06, "loss": 1.5161, "step": 6030 }, { "epoch": 1.0404554472526524, "grad_norm": 0.5859375, "learning_rate": 9.428716627845742e-06, "loss": 1.3931, "step": 6031 }, { "epoch": 1.0406279651513846, "grad_norm": 0.6328125, "learning_rate": 9.425999215373617e-06, "loss": 1.4832, "step": 6032 }, { "epoch": 1.0408004830501165, "grad_norm": 0.63671875, "learning_rate": 9.423281845427056e-06, "loss": 1.4823, "step": 6033 }, { "epoch": 1.0409730009488485, "grad_norm": 0.58984375, "learning_rate": 9.420564518207382e-06, "loss": 1.4338, "step": 6034 }, { "epoch": 1.0411455188475804, "grad_norm": 0.609375, "learning_rate": 9.417847233915902e-06, "loss": 1.5086, "step": 6035 }, { "epoch": 1.0413180367463124, "grad_norm": 0.61328125, "learning_rate": 9.415129992753943e-06, "loss": 1.4712, "step": 6036 }, { "epoch": 1.0414905546450444, "grad_norm": 0.60546875, "learning_rate": 9.412412794922805e-06, "loss": 1.4372, "step": 6037 }, { "epoch": 1.0416630725437763, "grad_norm": 0.5390625, "learning_rate": 9.409695640623802e-06, "loss": 1.3527, "step": 6038 }, { "epoch": 1.0418355904425085, "grad_norm": 0.6484375, "learning_rate": 9.406978530058229e-06, "loss": 1.4659, "step": 6039 }, { "epoch": 1.0420081083412405, "grad_norm": 0.80859375, "learning_rate": 9.404261463427396e-06, "loss": 1.3717, "step": 6040 }, { "epoch": 1.0421806262399724, "grad_norm": 0.91015625, "learning_rate": 9.401544440932596e-06, "loss": 1.3971, "step": 6041 }, { "epoch": 1.0423531441387044, "grad_norm": 0.6796875, "learning_rate": 9.398827462775122e-06, "loss": 1.408, "step": 6042 }, { "epoch": 1.0425256620374364, "grad_norm": 0.70703125, "learning_rate": 9.396110529156263e-06, "loss": 1.4351, "step": 6043 }, { "epoch": 1.0426981799361683, "grad_norm": 0.609375, "learning_rate": 9.393393640277316e-06, "loss": 1.4595, "step": 6044 }, { "epoch": 1.0428706978349003, "grad_norm": 0.57421875, "learning_rate": 9.390676796339552e-06, "loss": 1.4187, "step": 6045 }, { "epoch": 1.0430432157336325, "grad_norm": 0.59375, "learning_rate": 9.387959997544263e-06, "loss": 1.3626, "step": 6046 }, { "epoch": 1.0432157336323644, "grad_norm": 0.5859375, "learning_rate": 9.385243244092717e-06, "loss": 1.6359, "step": 6047 }, { "epoch": 1.0433882515310964, "grad_norm": 0.6484375, "learning_rate": 9.382526536186194e-06, "loss": 1.3945, "step": 6048 }, { "epoch": 1.0435607694298283, "grad_norm": 0.70703125, "learning_rate": 9.379809874025967e-06, "loss": 1.5335, "step": 6049 }, { "epoch": 1.0437332873285603, "grad_norm": 0.57421875, "learning_rate": 9.377093257813293e-06, "loss": 1.3544, "step": 6050 }, { "epoch": 1.0439058052272923, "grad_norm": 0.62109375, "learning_rate": 9.374376687749449e-06, "loss": 1.4395, "step": 6051 }, { "epoch": 1.0440783231260242, "grad_norm": 0.63671875, "learning_rate": 9.371660164035687e-06, "loss": 1.4785, "step": 6052 }, { "epoch": 1.0442508410247564, "grad_norm": 0.54296875, "learning_rate": 9.368943686873266e-06, "loss": 1.4134, "step": 6053 }, { "epoch": 1.0444233589234884, "grad_norm": 0.65234375, "learning_rate": 9.366227256463437e-06, "loss": 1.4454, "step": 6054 }, { "epoch": 1.0445958768222203, "grad_norm": 0.5859375, "learning_rate": 9.363510873007458e-06, "loss": 1.4406, "step": 6055 }, { "epoch": 1.0447683947209523, "grad_norm": 0.69921875, "learning_rate": 9.360794536706566e-06, "loss": 1.5395, "step": 6056 }, { "epoch": 1.0449409126196842, "grad_norm": 0.7109375, "learning_rate": 9.358078247762012e-06, "loss": 1.4343, "step": 6057 }, { "epoch": 1.0451134305184162, "grad_norm": 0.5703125, "learning_rate": 9.355362006375029e-06, "loss": 1.4209, "step": 6058 }, { "epoch": 1.0452859484171482, "grad_norm": 0.69921875, "learning_rate": 9.35264581274686e-06, "loss": 1.4438, "step": 6059 }, { "epoch": 1.0454584663158804, "grad_norm": 0.6015625, "learning_rate": 9.349929667078729e-06, "loss": 1.4041, "step": 6060 }, { "epoch": 1.0456309842146123, "grad_norm": 0.640625, "learning_rate": 9.347213569571876e-06, "loss": 1.4805, "step": 6061 }, { "epoch": 1.0458035021133443, "grad_norm": 0.6015625, "learning_rate": 9.344497520427517e-06, "loss": 1.4596, "step": 6062 }, { "epoch": 1.0459760200120762, "grad_norm": 0.6015625, "learning_rate": 9.341781519846875e-06, "loss": 1.4914, "step": 6063 }, { "epoch": 1.0461485379108082, "grad_norm": 0.65625, "learning_rate": 9.339065568031176e-06, "loss": 1.4308, "step": 6064 }, { "epoch": 1.0463210558095402, "grad_norm": 0.640625, "learning_rate": 9.336349665181628e-06, "loss": 1.3531, "step": 6065 }, { "epoch": 1.0464935737082723, "grad_norm": 0.58984375, "learning_rate": 9.333633811499444e-06, "loss": 1.4219, "step": 6066 }, { "epoch": 1.0466660916070043, "grad_norm": 0.6015625, "learning_rate": 9.330918007185828e-06, "loss": 1.4653, "step": 6067 }, { "epoch": 1.0468386095057363, "grad_norm": 0.546875, "learning_rate": 9.328202252441989e-06, "loss": 1.332, "step": 6068 }, { "epoch": 1.0470111274044682, "grad_norm": 0.69140625, "learning_rate": 9.325486547469124e-06, "loss": 1.3493, "step": 6069 }, { "epoch": 1.0471836453032002, "grad_norm": 0.65234375, "learning_rate": 9.322770892468433e-06, "loss": 1.4375, "step": 6070 }, { "epoch": 1.0473561632019321, "grad_norm": 0.6640625, "learning_rate": 9.3200552876411e-06, "loss": 1.3934, "step": 6071 }, { "epoch": 1.047528681100664, "grad_norm": 0.60546875, "learning_rate": 9.317339733188324e-06, "loss": 1.4507, "step": 6072 }, { "epoch": 1.0477011989993963, "grad_norm": 0.56640625, "learning_rate": 9.314624229311285e-06, "loss": 1.4125, "step": 6073 }, { "epoch": 1.0478737168981282, "grad_norm": 0.609375, "learning_rate": 9.311908776211167e-06, "loss": 1.4712, "step": 6074 }, { "epoch": 1.0480462347968602, "grad_norm": 0.59765625, "learning_rate": 9.309193374089141e-06, "loss": 1.4621, "step": 6075 }, { "epoch": 1.0482187526955922, "grad_norm": 0.609375, "learning_rate": 9.306478023146393e-06, "loss": 1.5664, "step": 6076 }, { "epoch": 1.0483912705943241, "grad_norm": 0.5625, "learning_rate": 9.303762723584082e-06, "loss": 1.4725, "step": 6077 }, { "epoch": 1.048563788493056, "grad_norm": 0.63671875, "learning_rate": 9.301047475603382e-06, "loss": 1.3968, "step": 6078 }, { "epoch": 1.048736306391788, "grad_norm": 0.75390625, "learning_rate": 9.298332279405447e-06, "loss": 1.4459, "step": 6079 }, { "epoch": 1.0489088242905202, "grad_norm": 0.6171875, "learning_rate": 9.295617135191445e-06, "loss": 1.353, "step": 6080 }, { "epoch": 1.0490813421892522, "grad_norm": 0.62109375, "learning_rate": 9.292902043162526e-06, "loss": 1.428, "step": 6081 }, { "epoch": 1.0492538600879842, "grad_norm": 0.59375, "learning_rate": 9.290187003519841e-06, "loss": 1.3785, "step": 6082 }, { "epoch": 1.0494263779867161, "grad_norm": 0.5859375, "learning_rate": 9.28747201646454e-06, "loss": 1.4798, "step": 6083 }, { "epoch": 1.049598895885448, "grad_norm": 0.59765625, "learning_rate": 9.284757082197763e-06, "loss": 1.3638, "step": 6084 }, { "epoch": 1.04977141378418, "grad_norm": 0.6796875, "learning_rate": 9.282042200920652e-06, "loss": 1.3908, "step": 6085 }, { "epoch": 1.049943931682912, "grad_norm": 0.6015625, "learning_rate": 9.279327372834335e-06, "loss": 1.3872, "step": 6086 }, { "epoch": 1.0501164495816442, "grad_norm": 0.63671875, "learning_rate": 9.276612598139956e-06, "loss": 1.5071, "step": 6087 }, { "epoch": 1.0502889674803761, "grad_norm": 0.58203125, "learning_rate": 9.27389787703863e-06, "loss": 1.454, "step": 6088 }, { "epoch": 1.050461485379108, "grad_norm": 0.59765625, "learning_rate": 9.27118320973149e-06, "loss": 1.4289, "step": 6089 }, { "epoch": 1.05063400327784, "grad_norm": 0.6875, "learning_rate": 9.268468596419651e-06, "loss": 1.473, "step": 6090 }, { "epoch": 1.050806521176572, "grad_norm": 1.1171875, "learning_rate": 9.265754037304232e-06, "loss": 1.4485, "step": 6091 }, { "epoch": 1.050979039075304, "grad_norm": 0.62890625, "learning_rate": 9.263039532586336e-06, "loss": 1.4458, "step": 6092 }, { "epoch": 1.051151556974036, "grad_norm": 0.578125, "learning_rate": 9.26032508246708e-06, "loss": 1.3861, "step": 6093 }, { "epoch": 1.0513240748727681, "grad_norm": 0.58984375, "learning_rate": 9.257610687147562e-06, "loss": 1.4952, "step": 6094 }, { "epoch": 1.0514965927715, "grad_norm": 0.56640625, "learning_rate": 9.25489634682888e-06, "loss": 1.419, "step": 6095 }, { "epoch": 1.051669110670232, "grad_norm": 0.53515625, "learning_rate": 9.252182061712137e-06, "loss": 1.3722, "step": 6096 }, { "epoch": 1.051841628568964, "grad_norm": 0.59375, "learning_rate": 9.249467831998417e-06, "loss": 1.4752, "step": 6097 }, { "epoch": 1.052014146467696, "grad_norm": 0.58984375, "learning_rate": 9.246753657888814e-06, "loss": 1.4241, "step": 6098 }, { "epoch": 1.052186664366428, "grad_norm": 0.640625, "learning_rate": 9.244039539584399e-06, "loss": 1.3496, "step": 6099 }, { "epoch": 1.05235918226516, "grad_norm": 0.58203125, "learning_rate": 9.241325477286265e-06, "loss": 1.4379, "step": 6100 }, { "epoch": 1.05235918226516, "eval_loss": 1.4098589420318604, "eval_runtime": 10.8253, "eval_samples_per_second": 94.593, "eval_steps_per_second": 23.648, "step": 6100 }, { "epoch": 1.052531700163892, "grad_norm": 0.58203125, "learning_rate": 9.238611471195477e-06, "loss": 1.3604, "step": 6101 }, { "epoch": 1.052704218062624, "grad_norm": 0.59765625, "learning_rate": 9.235897521513111e-06, "loss": 1.401, "step": 6102 }, { "epoch": 1.052876735961356, "grad_norm": 0.609375, "learning_rate": 9.233183628440227e-06, "loss": 1.4434, "step": 6103 }, { "epoch": 1.053049253860088, "grad_norm": 0.59765625, "learning_rate": 9.230469792177894e-06, "loss": 1.3778, "step": 6104 }, { "epoch": 1.05322177175882, "grad_norm": 0.62109375, "learning_rate": 9.227756012927166e-06, "loss": 1.3925, "step": 6105 }, { "epoch": 1.0533942896575519, "grad_norm": 0.62109375, "learning_rate": 9.2250422908891e-06, "loss": 1.4522, "step": 6106 }, { "epoch": 1.053566807556284, "grad_norm": 0.5390625, "learning_rate": 9.222328626264738e-06, "loss": 1.3521, "step": 6107 }, { "epoch": 1.053739325455016, "grad_norm": 0.59375, "learning_rate": 9.219615019255136e-06, "loss": 1.4829, "step": 6108 }, { "epoch": 1.053911843353748, "grad_norm": 0.6796875, "learning_rate": 9.216901470061326e-06, "loss": 1.3929, "step": 6109 }, { "epoch": 1.05408436125248, "grad_norm": 0.58984375, "learning_rate": 9.21418797888435e-06, "loss": 1.4325, "step": 6110 }, { "epoch": 1.054256879151212, "grad_norm": 0.6015625, "learning_rate": 9.211474545925237e-06, "loss": 1.4691, "step": 6111 }, { "epoch": 1.0544293970499439, "grad_norm": 0.59765625, "learning_rate": 9.208761171385017e-06, "loss": 1.4809, "step": 6112 }, { "epoch": 1.0546019149486758, "grad_norm": 0.5859375, "learning_rate": 9.206047855464715e-06, "loss": 1.4147, "step": 6113 }, { "epoch": 1.054774432847408, "grad_norm": 0.59765625, "learning_rate": 9.203334598365345e-06, "loss": 1.3983, "step": 6114 }, { "epoch": 1.05494695074614, "grad_norm": 0.71875, "learning_rate": 9.200621400287929e-06, "loss": 1.4555, "step": 6115 }, { "epoch": 1.055119468644872, "grad_norm": 0.5703125, "learning_rate": 9.19790826143347e-06, "loss": 1.4068, "step": 6116 }, { "epoch": 1.055291986543604, "grad_norm": 0.65234375, "learning_rate": 9.195195182002987e-06, "loss": 1.381, "step": 6117 }, { "epoch": 1.0554645044423359, "grad_norm": 0.58203125, "learning_rate": 9.192482162197466e-06, "loss": 1.4942, "step": 6118 }, { "epoch": 1.0556370223410678, "grad_norm": 0.59375, "learning_rate": 9.189769202217918e-06, "loss": 1.4388, "step": 6119 }, { "epoch": 1.0558095402397998, "grad_norm": 0.61328125, "learning_rate": 9.187056302265324e-06, "loss": 1.5254, "step": 6120 }, { "epoch": 1.055982058138532, "grad_norm": 0.6171875, "learning_rate": 9.184343462540683e-06, "loss": 1.3021, "step": 6121 }, { "epoch": 1.056154576037264, "grad_norm": 0.6015625, "learning_rate": 9.181630683244972e-06, "loss": 1.3728, "step": 6122 }, { "epoch": 1.0563270939359959, "grad_norm": 0.609375, "learning_rate": 9.178917964579176e-06, "loss": 1.3719, "step": 6123 }, { "epoch": 1.0564996118347278, "grad_norm": 0.5859375, "learning_rate": 9.176205306744265e-06, "loss": 1.4278, "step": 6124 }, { "epoch": 1.0566721297334598, "grad_norm": 0.62109375, "learning_rate": 9.173492709941215e-06, "loss": 1.459, "step": 6125 }, { "epoch": 1.0568446476321918, "grad_norm": 0.65625, "learning_rate": 9.170780174370988e-06, "loss": 1.4969, "step": 6126 }, { "epoch": 1.0570171655309237, "grad_norm": 0.625, "learning_rate": 9.168067700234542e-06, "loss": 1.4081, "step": 6127 }, { "epoch": 1.057189683429656, "grad_norm": 0.67578125, "learning_rate": 9.165355287732846e-06, "loss": 1.3269, "step": 6128 }, { "epoch": 1.0573622013283879, "grad_norm": 0.609375, "learning_rate": 9.162642937066843e-06, "loss": 1.3879, "step": 6129 }, { "epoch": 1.0575347192271198, "grad_norm": 0.60546875, "learning_rate": 9.159930648437484e-06, "loss": 1.5195, "step": 6130 }, { "epoch": 1.0577072371258518, "grad_norm": 0.6328125, "learning_rate": 9.157218422045708e-06, "loss": 1.5369, "step": 6131 }, { "epoch": 1.0578797550245838, "grad_norm": 0.69921875, "learning_rate": 9.154506258092462e-06, "loss": 1.4377, "step": 6132 }, { "epoch": 1.0580522729233157, "grad_norm": 0.65234375, "learning_rate": 9.151794156778673e-06, "loss": 1.4826, "step": 6133 }, { "epoch": 1.058224790822048, "grad_norm": 0.5546875, "learning_rate": 9.149082118305274e-06, "loss": 1.4615, "step": 6134 }, { "epoch": 1.0583973087207799, "grad_norm": 0.58203125, "learning_rate": 9.146370142873185e-06, "loss": 1.5071, "step": 6135 }, { "epoch": 1.0585698266195118, "grad_norm": 0.6015625, "learning_rate": 9.143658230683335e-06, "loss": 1.412, "step": 6136 }, { "epoch": 1.0587423445182438, "grad_norm": 0.640625, "learning_rate": 9.140946381936629e-06, "loss": 1.322, "step": 6137 }, { "epoch": 1.0589148624169757, "grad_norm": 0.625, "learning_rate": 9.138234596833987e-06, "loss": 1.4451, "step": 6138 }, { "epoch": 1.0590873803157077, "grad_norm": 0.5703125, "learning_rate": 9.135522875576305e-06, "loss": 1.3518, "step": 6139 }, { "epoch": 1.0592598982144397, "grad_norm": 0.74609375, "learning_rate": 9.132811218364494e-06, "loss": 1.46, "step": 6140 }, { "epoch": 1.0594324161131718, "grad_norm": 0.8125, "learning_rate": 9.130099625399446e-06, "loss": 1.4321, "step": 6141 }, { "epoch": 1.0596049340119038, "grad_norm": 0.63671875, "learning_rate": 9.127388096882054e-06, "loss": 1.4989, "step": 6142 }, { "epoch": 1.0597774519106358, "grad_norm": 0.796875, "learning_rate": 9.1246766330132e-06, "loss": 1.4134, "step": 6143 }, { "epoch": 1.0599499698093677, "grad_norm": 0.609375, "learning_rate": 9.121965233993773e-06, "loss": 1.3734, "step": 6144 }, { "epoch": 1.0601224877080997, "grad_norm": 0.671875, "learning_rate": 9.119253900024649e-06, "loss": 1.4286, "step": 6145 }, { "epoch": 1.0602950056068317, "grad_norm": 0.9609375, "learning_rate": 9.116542631306695e-06, "loss": 1.5147, "step": 6146 }, { "epoch": 1.0604675235055636, "grad_norm": 0.6484375, "learning_rate": 9.113831428040789e-06, "loss": 1.4119, "step": 6147 }, { "epoch": 1.0606400414042958, "grad_norm": 0.6171875, "learning_rate": 9.11112029042778e-06, "loss": 1.4222, "step": 6148 }, { "epoch": 1.0608125593030278, "grad_norm": 0.65234375, "learning_rate": 9.108409218668542e-06, "loss": 1.3741, "step": 6149 }, { "epoch": 1.0609850772017597, "grad_norm": 0.58984375, "learning_rate": 9.105698212963915e-06, "loss": 1.4079, "step": 6150 }, { "epoch": 1.0611575951004917, "grad_norm": 0.6953125, "learning_rate": 9.102987273514757e-06, "loss": 1.4237, "step": 6151 }, { "epoch": 1.0613301129992236, "grad_norm": 0.6171875, "learning_rate": 9.100276400521898e-06, "loss": 1.3756, "step": 6152 }, { "epoch": 1.0615026308979556, "grad_norm": 0.60546875, "learning_rate": 9.097565594186194e-06, "loss": 1.4186, "step": 6153 }, { "epoch": 1.0616751487966876, "grad_norm": 0.72265625, "learning_rate": 9.094854854708464e-06, "loss": 1.4798, "step": 6154 }, { "epoch": 1.0618476666954197, "grad_norm": 0.609375, "learning_rate": 9.092144182289546e-06, "loss": 1.4217, "step": 6155 }, { "epoch": 1.0620201845941517, "grad_norm": 0.59375, "learning_rate": 9.089433577130256e-06, "loss": 1.4153, "step": 6156 }, { "epoch": 1.0621927024928837, "grad_norm": 1.125, "learning_rate": 9.086723039431418e-06, "loss": 1.3932, "step": 6157 }, { "epoch": 1.0623652203916156, "grad_norm": 0.6640625, "learning_rate": 9.084012569393842e-06, "loss": 1.4848, "step": 6158 }, { "epoch": 1.0625377382903476, "grad_norm": 0.6015625, "learning_rate": 9.081302167218339e-06, "loss": 1.4844, "step": 6159 }, { "epoch": 1.0627102561890795, "grad_norm": 0.6328125, "learning_rate": 9.078591833105712e-06, "loss": 1.4191, "step": 6160 }, { "epoch": 1.0628827740878115, "grad_norm": 0.578125, "learning_rate": 9.075881567256759e-06, "loss": 1.3469, "step": 6161 }, { "epoch": 1.0630552919865437, "grad_norm": 0.7421875, "learning_rate": 9.073171369872275e-06, "loss": 1.4488, "step": 6162 }, { "epoch": 1.0632278098852757, "grad_norm": 0.6015625, "learning_rate": 9.070461241153044e-06, "loss": 1.3692, "step": 6163 }, { "epoch": 1.0634003277840076, "grad_norm": 0.55859375, "learning_rate": 9.067751181299856e-06, "loss": 1.3982, "step": 6164 }, { "epoch": 1.0635728456827396, "grad_norm": 0.73046875, "learning_rate": 9.065041190513483e-06, "loss": 1.406, "step": 6165 }, { "epoch": 1.0637453635814715, "grad_norm": 0.61328125, "learning_rate": 9.062331268994704e-06, "loss": 1.4365, "step": 6166 }, { "epoch": 1.0639178814802035, "grad_norm": 0.640625, "learning_rate": 9.059621416944277e-06, "loss": 1.3919, "step": 6167 }, { "epoch": 1.0640903993789355, "grad_norm": 0.6171875, "learning_rate": 9.056911634562975e-06, "loss": 1.2976, "step": 6168 }, { "epoch": 1.0642629172776676, "grad_norm": 0.625, "learning_rate": 9.054201922051552e-06, "loss": 1.3825, "step": 6169 }, { "epoch": 1.0644354351763996, "grad_norm": 0.53125, "learning_rate": 9.051492279610763e-06, "loss": 1.3458, "step": 6170 }, { "epoch": 1.0646079530751316, "grad_norm": 0.6171875, "learning_rate": 9.048782707441346e-06, "loss": 1.3666, "step": 6171 }, { "epoch": 1.0647804709738635, "grad_norm": 0.6015625, "learning_rate": 9.046073205744053e-06, "loss": 1.4677, "step": 6172 }, { "epoch": 1.0649529888725955, "grad_norm": 0.6328125, "learning_rate": 9.043363774719618e-06, "loss": 1.4026, "step": 6173 }, { "epoch": 1.0651255067713274, "grad_norm": 0.6171875, "learning_rate": 9.040654414568772e-06, "loss": 1.5799, "step": 6174 }, { "epoch": 1.0652980246700596, "grad_norm": 0.5859375, "learning_rate": 9.037945125492238e-06, "loss": 1.4668, "step": 6175 }, { "epoch": 1.0654705425687916, "grad_norm": 0.6484375, "learning_rate": 9.035235907690739e-06, "loss": 1.4445, "step": 6176 }, { "epoch": 1.0656430604675236, "grad_norm": 0.6171875, "learning_rate": 9.032526761364999e-06, "loss": 1.4619, "step": 6177 }, { "epoch": 1.0658155783662555, "grad_norm": 0.6484375, "learning_rate": 9.029817686715717e-06, "loss": 1.4089, "step": 6178 }, { "epoch": 1.0659880962649875, "grad_norm": 0.62890625, "learning_rate": 9.027108683943605e-06, "loss": 1.4093, "step": 6179 }, { "epoch": 1.0661606141637194, "grad_norm": 0.66796875, "learning_rate": 9.024399753249358e-06, "loss": 1.4857, "step": 6180 }, { "epoch": 1.0663331320624514, "grad_norm": 0.578125, "learning_rate": 9.021690894833676e-06, "loss": 1.4078, "step": 6181 }, { "epoch": 1.0665056499611834, "grad_norm": 0.6015625, "learning_rate": 9.018982108897243e-06, "loss": 1.4882, "step": 6182 }, { "epoch": 1.0666781678599155, "grad_norm": 0.5703125, "learning_rate": 9.016273395640748e-06, "loss": 1.362, "step": 6183 }, { "epoch": 1.0668506857586475, "grad_norm": 0.80859375, "learning_rate": 9.013564755264862e-06, "loss": 1.411, "step": 6184 }, { "epoch": 1.0670232036573795, "grad_norm": 0.69140625, "learning_rate": 9.010856187970267e-06, "loss": 1.3623, "step": 6185 }, { "epoch": 1.0671957215561114, "grad_norm": 0.6875, "learning_rate": 9.008147693957624e-06, "loss": 1.4222, "step": 6186 }, { "epoch": 1.0673682394548434, "grad_norm": 0.578125, "learning_rate": 9.005439273427597e-06, "loss": 1.3998, "step": 6187 }, { "epoch": 1.0675407573535753, "grad_norm": 0.59375, "learning_rate": 9.00273092658084e-06, "loss": 1.4059, "step": 6188 }, { "epoch": 1.0677132752523075, "grad_norm": 0.6015625, "learning_rate": 9.000022653618012e-06, "loss": 1.3935, "step": 6189 }, { "epoch": 1.0678857931510395, "grad_norm": 0.8046875, "learning_rate": 8.997314454739752e-06, "loss": 1.2628, "step": 6190 }, { "epoch": 1.0680583110497714, "grad_norm": 0.64453125, "learning_rate": 8.9946063301467e-06, "loss": 1.4216, "step": 6191 }, { "epoch": 1.0682308289485034, "grad_norm": 0.62890625, "learning_rate": 8.991898280039498e-06, "loss": 1.4487, "step": 6192 }, { "epoch": 1.0684033468472354, "grad_norm": 0.61328125, "learning_rate": 8.989190304618767e-06, "loss": 1.4705, "step": 6193 }, { "epoch": 1.0685758647459673, "grad_norm": 0.60546875, "learning_rate": 8.986482404085137e-06, "loss": 1.4954, "step": 6194 }, { "epoch": 1.0687483826446993, "grad_norm": 1.3515625, "learning_rate": 8.983774578639219e-06, "loss": 1.4177, "step": 6195 }, { "epoch": 1.0689209005434315, "grad_norm": 0.55859375, "learning_rate": 8.981066828481635e-06, "loss": 1.4546, "step": 6196 }, { "epoch": 1.0690934184421634, "grad_norm": 0.58984375, "learning_rate": 8.978359153812982e-06, "loss": 1.4029, "step": 6197 }, { "epoch": 1.0692659363408954, "grad_norm": 0.6328125, "learning_rate": 8.975651554833869e-06, "loss": 1.3723, "step": 6198 }, { "epoch": 1.0694384542396274, "grad_norm": 0.625, "learning_rate": 8.972944031744886e-06, "loss": 1.4171, "step": 6199 }, { "epoch": 1.0696109721383593, "grad_norm": 0.578125, "learning_rate": 8.970236584746631e-06, "loss": 1.4983, "step": 6200 }, { "epoch": 1.0696109721383593, "eval_loss": 1.4095999002456665, "eval_runtime": 11.2941, "eval_samples_per_second": 90.667, "eval_steps_per_second": 22.667, "step": 6200 }, { "epoch": 1.0697834900370913, "grad_norm": 0.609375, "learning_rate": 8.967529214039682e-06, "loss": 1.3528, "step": 6201 }, { "epoch": 1.0699560079358232, "grad_norm": 0.734375, "learning_rate": 8.964821919824623e-06, "loss": 1.4585, "step": 6202 }, { "epoch": 1.0701285258345554, "grad_norm": 6.25, "learning_rate": 8.962114702302018e-06, "loss": 1.4366, "step": 6203 }, { "epoch": 1.0703010437332874, "grad_norm": 0.640625, "learning_rate": 8.959407561672447e-06, "loss": 1.4571, "step": 6204 }, { "epoch": 1.0704735616320193, "grad_norm": 0.546875, "learning_rate": 8.956700498136461e-06, "loss": 1.4125, "step": 6205 }, { "epoch": 1.0706460795307513, "grad_norm": 0.609375, "learning_rate": 8.953993511894626e-06, "loss": 1.4351, "step": 6206 }, { "epoch": 1.0708185974294833, "grad_norm": 0.58984375, "learning_rate": 8.951286603147481e-06, "loss": 1.4297, "step": 6207 }, { "epoch": 1.0709911153282152, "grad_norm": 1.765625, "learning_rate": 8.948579772095578e-06, "loss": 1.4696, "step": 6208 }, { "epoch": 1.0711636332269472, "grad_norm": 0.625, "learning_rate": 8.94587301893946e-06, "loss": 1.4402, "step": 6209 }, { "epoch": 1.0713361511256794, "grad_norm": 0.63671875, "learning_rate": 8.943166343879652e-06, "loss": 1.4923, "step": 6210 }, { "epoch": 1.0715086690244113, "grad_norm": 0.57421875, "learning_rate": 8.940459747116688e-06, "loss": 1.403, "step": 6211 }, { "epoch": 1.0716811869231433, "grad_norm": 0.6875, "learning_rate": 8.93775322885108e-06, "loss": 1.4063, "step": 6212 }, { "epoch": 1.0718537048218753, "grad_norm": 0.70703125, "learning_rate": 8.935046789283356e-06, "loss": 1.4808, "step": 6213 }, { "epoch": 1.0720262227206072, "grad_norm": 0.609375, "learning_rate": 8.932340428614016e-06, "loss": 1.4331, "step": 6214 }, { "epoch": 1.0721987406193392, "grad_norm": 0.63671875, "learning_rate": 8.929634147043573e-06, "loss": 1.4485, "step": 6215 }, { "epoch": 1.0723712585180714, "grad_norm": 0.6328125, "learning_rate": 8.926927944772514e-06, "loss": 1.4356, "step": 6216 }, { "epoch": 1.0725437764168033, "grad_norm": 0.62890625, "learning_rate": 8.924221822001342e-06, "loss": 1.4756, "step": 6217 }, { "epoch": 1.0727162943155353, "grad_norm": 0.7109375, "learning_rate": 8.921515778930538e-06, "loss": 1.4703, "step": 6218 }, { "epoch": 1.0728888122142672, "grad_norm": 0.6640625, "learning_rate": 8.918809815760585e-06, "loss": 1.3884, "step": 6219 }, { "epoch": 1.0730613301129992, "grad_norm": 0.578125, "learning_rate": 8.916103932691953e-06, "loss": 1.4818, "step": 6220 }, { "epoch": 1.0732338480117312, "grad_norm": 0.65625, "learning_rate": 8.913398129925118e-06, "loss": 1.5478, "step": 6221 }, { "epoch": 1.0734063659104631, "grad_norm": 0.7109375, "learning_rate": 8.910692407660538e-06, "loss": 1.3961, "step": 6222 }, { "epoch": 1.0735788838091953, "grad_norm": 0.7421875, "learning_rate": 8.907986766098666e-06, "loss": 1.5076, "step": 6223 }, { "epoch": 1.0737514017079273, "grad_norm": 0.69140625, "learning_rate": 8.905281205439965e-06, "loss": 1.4828, "step": 6224 }, { "epoch": 1.0739239196066592, "grad_norm": 0.5859375, "learning_rate": 8.902575725884867e-06, "loss": 1.41, "step": 6225 }, { "epoch": 1.0740964375053912, "grad_norm": 0.734375, "learning_rate": 8.89987032763382e-06, "loss": 1.431, "step": 6226 }, { "epoch": 1.0742689554041231, "grad_norm": 0.7578125, "learning_rate": 8.89716501088725e-06, "loss": 1.3323, "step": 6227 }, { "epoch": 1.074441473302855, "grad_norm": 0.6875, "learning_rate": 8.89445977584559e-06, "loss": 1.4044, "step": 6228 }, { "epoch": 1.074613991201587, "grad_norm": 0.58984375, "learning_rate": 8.891754622709254e-06, "loss": 1.4322, "step": 6229 }, { "epoch": 1.0747865091003193, "grad_norm": 0.59375, "learning_rate": 8.889049551678664e-06, "loss": 1.4238, "step": 6230 }, { "epoch": 1.0749590269990512, "grad_norm": 0.60546875, "learning_rate": 8.886344562954221e-06, "loss": 1.3572, "step": 6231 }, { "epoch": 1.0751315448977832, "grad_norm": 0.734375, "learning_rate": 8.883639656736334e-06, "loss": 1.4339, "step": 6232 }, { "epoch": 1.0753040627965151, "grad_norm": 0.6171875, "learning_rate": 8.880934833225395e-06, "loss": 1.4185, "step": 6233 }, { "epoch": 1.075476580695247, "grad_norm": 0.625, "learning_rate": 8.878230092621799e-06, "loss": 1.4326, "step": 6234 }, { "epoch": 1.075649098593979, "grad_norm": 0.64453125, "learning_rate": 8.87552543512592e-06, "loss": 1.4075, "step": 6235 }, { "epoch": 1.075821616492711, "grad_norm": 0.60546875, "learning_rate": 8.872820860938149e-06, "loss": 1.315, "step": 6236 }, { "epoch": 1.0759941343914432, "grad_norm": 0.66796875, "learning_rate": 8.870116370258847e-06, "loss": 1.3736, "step": 6237 }, { "epoch": 1.0761666522901752, "grad_norm": 0.70703125, "learning_rate": 8.86741196328839e-06, "loss": 1.4035, "step": 6238 }, { "epoch": 1.0763391701889071, "grad_norm": 0.6015625, "learning_rate": 8.864707640227127e-06, "loss": 1.3408, "step": 6239 }, { "epoch": 1.076511688087639, "grad_norm": 0.640625, "learning_rate": 8.862003401275414e-06, "loss": 1.4941, "step": 6240 }, { "epoch": 1.076684205986371, "grad_norm": 0.71484375, "learning_rate": 8.859299246633604e-06, "loss": 1.3956, "step": 6241 }, { "epoch": 1.076856723885103, "grad_norm": 0.59765625, "learning_rate": 8.85659517650203e-06, "loss": 1.329, "step": 6242 }, { "epoch": 1.0770292417838352, "grad_norm": 0.60546875, "learning_rate": 8.853891191081035e-06, "loss": 1.3659, "step": 6243 }, { "epoch": 1.0772017596825672, "grad_norm": 0.60546875, "learning_rate": 8.851187290570934e-06, "loss": 1.4607, "step": 6244 }, { "epoch": 1.0773742775812991, "grad_norm": 0.6015625, "learning_rate": 8.848483475172063e-06, "loss": 1.4711, "step": 6245 }, { "epoch": 1.077546795480031, "grad_norm": 0.578125, "learning_rate": 8.845779745084728e-06, "loss": 1.4921, "step": 6246 }, { "epoch": 1.077719313378763, "grad_norm": 0.56640625, "learning_rate": 8.843076100509244e-06, "loss": 1.4246, "step": 6247 }, { "epoch": 1.077891831277495, "grad_norm": 0.609375, "learning_rate": 8.840372541645907e-06, "loss": 1.4466, "step": 6248 }, { "epoch": 1.078064349176227, "grad_norm": 0.69140625, "learning_rate": 8.837669068695023e-06, "loss": 1.4841, "step": 6249 }, { "epoch": 1.078236867074959, "grad_norm": 0.62890625, "learning_rate": 8.834965681856873e-06, "loss": 1.4025, "step": 6250 }, { "epoch": 1.078409384973691, "grad_norm": 0.62109375, "learning_rate": 8.832262381331747e-06, "loss": 1.4657, "step": 6251 }, { "epoch": 1.078581902872423, "grad_norm": 0.66796875, "learning_rate": 8.829559167319917e-06, "loss": 1.3731, "step": 6252 }, { "epoch": 1.078754420771155, "grad_norm": 0.5859375, "learning_rate": 8.826856040021661e-06, "loss": 1.5165, "step": 6253 }, { "epoch": 1.078926938669887, "grad_norm": 0.578125, "learning_rate": 8.824152999637237e-06, "loss": 1.3957, "step": 6254 }, { "epoch": 1.079099456568619, "grad_norm": 0.6328125, "learning_rate": 8.821450046366909e-06, "loss": 1.4961, "step": 6255 }, { "epoch": 1.079271974467351, "grad_norm": 0.57421875, "learning_rate": 8.81874718041092e-06, "loss": 1.2706, "step": 6256 }, { "epoch": 1.079444492366083, "grad_norm": 0.6328125, "learning_rate": 8.816044401969524e-06, "loss": 1.5226, "step": 6257 }, { "epoch": 1.079617010264815, "grad_norm": 0.578125, "learning_rate": 8.813341711242959e-06, "loss": 1.4211, "step": 6258 }, { "epoch": 1.079789528163547, "grad_norm": 0.703125, "learning_rate": 8.810639108431449e-06, "loss": 1.4257, "step": 6259 }, { "epoch": 1.079962046062279, "grad_norm": 0.65625, "learning_rate": 8.80793659373523e-06, "loss": 1.5166, "step": 6260 }, { "epoch": 1.080134563961011, "grad_norm": 0.625, "learning_rate": 8.805234167354515e-06, "loss": 1.4059, "step": 6261 }, { "epoch": 1.080307081859743, "grad_norm": 0.62890625, "learning_rate": 8.80253182948952e-06, "loss": 1.4157, "step": 6262 }, { "epoch": 1.0804795997584749, "grad_norm": 0.68359375, "learning_rate": 8.799829580340444e-06, "loss": 1.4555, "step": 6263 }, { "epoch": 1.080652117657207, "grad_norm": 0.65234375, "learning_rate": 8.797127420107496e-06, "loss": 1.4794, "step": 6264 }, { "epoch": 1.080824635555939, "grad_norm": 0.5859375, "learning_rate": 8.794425348990861e-06, "loss": 1.4365, "step": 6265 }, { "epoch": 1.080997153454671, "grad_norm": 0.578125, "learning_rate": 8.791723367190736e-06, "loss": 1.4629, "step": 6266 }, { "epoch": 1.081169671353403, "grad_norm": 0.578125, "learning_rate": 8.789021474907286e-06, "loss": 1.4438, "step": 6267 }, { "epoch": 1.0813421892521349, "grad_norm": 0.5859375, "learning_rate": 8.786319672340696e-06, "loss": 1.4537, "step": 6268 }, { "epoch": 1.0815147071508668, "grad_norm": 0.6328125, "learning_rate": 8.783617959691126e-06, "loss": 1.4335, "step": 6269 }, { "epoch": 1.0816872250495988, "grad_norm": 0.59765625, "learning_rate": 8.780916337158739e-06, "loss": 1.4098, "step": 6270 }, { "epoch": 1.081859742948331, "grad_norm": 0.57421875, "learning_rate": 8.778214804943687e-06, "loss": 1.4097, "step": 6271 }, { "epoch": 1.082032260847063, "grad_norm": 0.59765625, "learning_rate": 8.775513363246113e-06, "loss": 1.4887, "step": 6272 }, { "epoch": 1.082204778745795, "grad_norm": 0.59765625, "learning_rate": 8.772812012266165e-06, "loss": 1.3853, "step": 6273 }, { "epoch": 1.0823772966445269, "grad_norm": 0.60546875, "learning_rate": 8.770110752203968e-06, "loss": 1.3535, "step": 6274 }, { "epoch": 1.0825498145432588, "grad_norm": 0.61328125, "learning_rate": 8.767409583259654e-06, "loss": 1.4843, "step": 6275 }, { "epoch": 1.0827223324419908, "grad_norm": 1.078125, "learning_rate": 8.764708505633334e-06, "loss": 1.4779, "step": 6276 }, { "epoch": 1.0828948503407227, "grad_norm": 0.60546875, "learning_rate": 8.762007519525132e-06, "loss": 1.306, "step": 6277 }, { "epoch": 1.083067368239455, "grad_norm": 0.61328125, "learning_rate": 8.759306625135147e-06, "loss": 1.4475, "step": 6278 }, { "epoch": 1.083239886138187, "grad_norm": 0.6796875, "learning_rate": 8.75660582266348e-06, "loss": 1.4575, "step": 6279 }, { "epoch": 1.0834124040369189, "grad_norm": 0.58203125, "learning_rate": 8.753905112310217e-06, "loss": 1.3896, "step": 6280 }, { "epoch": 1.0835849219356508, "grad_norm": 0.64453125, "learning_rate": 8.751204494275457e-06, "loss": 1.3266, "step": 6281 }, { "epoch": 1.0837574398343828, "grad_norm": 0.6171875, "learning_rate": 8.748503968759267e-06, "loss": 1.4157, "step": 6282 }, { "epoch": 1.0839299577331147, "grad_norm": 0.53125, "learning_rate": 8.745803535961725e-06, "loss": 1.3011, "step": 6283 }, { "epoch": 1.084102475631847, "grad_norm": 0.59765625, "learning_rate": 8.74310319608289e-06, "loss": 1.3948, "step": 6284 }, { "epoch": 1.0842749935305789, "grad_norm": 0.6171875, "learning_rate": 8.740402949322827e-06, "loss": 1.3987, "step": 6285 }, { "epoch": 1.0844475114293108, "grad_norm": 0.6015625, "learning_rate": 8.737702795881581e-06, "loss": 1.4962, "step": 6286 }, { "epoch": 1.0846200293280428, "grad_norm": 0.578125, "learning_rate": 8.735002735959203e-06, "loss": 1.4318, "step": 6287 }, { "epoch": 1.0847925472267748, "grad_norm": 0.609375, "learning_rate": 8.732302769755722e-06, "loss": 1.4156, "step": 6288 }, { "epoch": 1.0849650651255067, "grad_norm": 0.55859375, "learning_rate": 8.729602897471175e-06, "loss": 1.3992, "step": 6289 }, { "epoch": 1.0851375830242387, "grad_norm": 0.58984375, "learning_rate": 8.726903119305583e-06, "loss": 1.3411, "step": 6290 }, { "epoch": 1.0853101009229706, "grad_norm": 0.78515625, "learning_rate": 8.72420343545896e-06, "loss": 1.4526, "step": 6291 }, { "epoch": 1.0854826188217028, "grad_norm": 0.58203125, "learning_rate": 8.721503846131322e-06, "loss": 1.4865, "step": 6292 }, { "epoch": 1.0856551367204348, "grad_norm": 0.63671875, "learning_rate": 8.718804351522666e-06, "loss": 1.3999, "step": 6293 }, { "epoch": 1.0858276546191667, "grad_norm": 0.609375, "learning_rate": 8.716104951832992e-06, "loss": 1.3276, "step": 6294 }, { "epoch": 1.0860001725178987, "grad_norm": 0.58203125, "learning_rate": 8.713405647262279e-06, "loss": 1.3087, "step": 6295 }, { "epoch": 1.0861726904166307, "grad_norm": 0.6796875, "learning_rate": 8.71070643801052e-06, "loss": 1.3814, "step": 6296 }, { "epoch": 1.0863452083153626, "grad_norm": 0.60546875, "learning_rate": 8.708007324277678e-06, "loss": 1.4415, "step": 6297 }, { "epoch": 1.0865177262140948, "grad_norm": 0.7578125, "learning_rate": 8.705308306263732e-06, "loss": 1.2513, "step": 6298 }, { "epoch": 1.0866902441128268, "grad_norm": 0.59765625, "learning_rate": 8.702609384168634e-06, "loss": 1.4319, "step": 6299 }, { "epoch": 1.0868627620115587, "grad_norm": 0.671875, "learning_rate": 8.69991055819234e-06, "loss": 1.4037, "step": 6300 }, { "epoch": 1.0868627620115587, "eval_loss": 1.4092991352081299, "eval_runtime": 10.9216, "eval_samples_per_second": 93.759, "eval_steps_per_second": 23.44, "step": 6300 }, { "epoch": 1.0870352799102907, "grad_norm": 0.7421875, "learning_rate": 8.697211828534793e-06, "loss": 1.4708, "step": 6301 }, { "epoch": 1.0872077978090227, "grad_norm": 0.6015625, "learning_rate": 8.694513195395937e-06, "loss": 1.2764, "step": 6302 }, { "epoch": 1.0873803157077546, "grad_norm": 0.58984375, "learning_rate": 8.691814658975699e-06, "loss": 1.4155, "step": 6303 }, { "epoch": 1.0875528336064866, "grad_norm": 0.6015625, "learning_rate": 8.689116219474004e-06, "loss": 1.3576, "step": 6304 }, { "epoch": 1.0877253515052188, "grad_norm": 0.640625, "learning_rate": 8.686417877090772e-06, "loss": 1.4314, "step": 6305 }, { "epoch": 1.0878978694039507, "grad_norm": 0.68359375, "learning_rate": 8.68371963202591e-06, "loss": 1.4714, "step": 6306 }, { "epoch": 1.0880703873026827, "grad_norm": 0.61328125, "learning_rate": 8.681021484479327e-06, "loss": 1.465, "step": 6307 }, { "epoch": 1.0882429052014146, "grad_norm": 0.71875, "learning_rate": 8.678323434650906e-06, "loss": 1.4521, "step": 6308 }, { "epoch": 1.0884154231001466, "grad_norm": 0.609375, "learning_rate": 8.675625482740549e-06, "loss": 1.5038, "step": 6309 }, { "epoch": 1.0885879409988786, "grad_norm": 0.69921875, "learning_rate": 8.672927628948128e-06, "loss": 1.4076, "step": 6310 }, { "epoch": 1.0887604588976105, "grad_norm": 0.609375, "learning_rate": 8.670229873473524e-06, "loss": 1.5556, "step": 6311 }, { "epoch": 1.0889329767963427, "grad_norm": 0.6640625, "learning_rate": 8.667532216516594e-06, "loss": 1.4564, "step": 6312 }, { "epoch": 1.0891054946950747, "grad_norm": 0.60546875, "learning_rate": 8.664834658277208e-06, "loss": 1.4006, "step": 6313 }, { "epoch": 1.0892780125938066, "grad_norm": 0.66015625, "learning_rate": 8.662137198955211e-06, "loss": 1.4791, "step": 6314 }, { "epoch": 1.0894505304925386, "grad_norm": 0.5625, "learning_rate": 8.659439838750451e-06, "loss": 1.4249, "step": 6315 }, { "epoch": 1.0896230483912706, "grad_norm": 0.62109375, "learning_rate": 8.656742577862761e-06, "loss": 1.4844, "step": 6316 }, { "epoch": 1.0897955662900025, "grad_norm": 0.65234375, "learning_rate": 8.654045416491975e-06, "loss": 1.3683, "step": 6317 }, { "epoch": 1.0899680841887345, "grad_norm": 0.6171875, "learning_rate": 8.651348354837917e-06, "loss": 1.4811, "step": 6318 }, { "epoch": 1.0901406020874667, "grad_norm": 0.609375, "learning_rate": 8.6486513931004e-06, "loss": 1.4244, "step": 6319 }, { "epoch": 1.0903131199861986, "grad_norm": 0.578125, "learning_rate": 8.645954531479226e-06, "loss": 1.3503, "step": 6320 }, { "epoch": 1.0904856378849306, "grad_norm": 0.6875, "learning_rate": 8.643257770174202e-06, "loss": 1.4309, "step": 6321 }, { "epoch": 1.0906581557836625, "grad_norm": 0.58203125, "learning_rate": 8.640561109385125e-06, "loss": 1.4698, "step": 6322 }, { "epoch": 1.0908306736823945, "grad_norm": 0.52734375, "learning_rate": 8.637864549311768e-06, "loss": 1.299, "step": 6323 }, { "epoch": 1.0910031915811265, "grad_norm": 0.57421875, "learning_rate": 8.635168090153922e-06, "loss": 1.3882, "step": 6324 }, { "epoch": 1.0911757094798586, "grad_norm": 0.640625, "learning_rate": 8.632471732111349e-06, "loss": 1.348, "step": 6325 }, { "epoch": 1.0913482273785906, "grad_norm": 0.5625, "learning_rate": 8.629775475383816e-06, "loss": 1.4612, "step": 6326 }, { "epoch": 1.0915207452773226, "grad_norm": 0.578125, "learning_rate": 8.627079320171076e-06, "loss": 1.3347, "step": 6327 }, { "epoch": 1.0916932631760545, "grad_norm": 0.6796875, "learning_rate": 8.624383266672882e-06, "loss": 1.432, "step": 6328 }, { "epoch": 1.0918657810747865, "grad_norm": 0.625, "learning_rate": 8.621687315088965e-06, "loss": 1.4275, "step": 6329 }, { "epoch": 1.0920382989735185, "grad_norm": 0.61328125, "learning_rate": 8.618991465619068e-06, "loss": 1.4922, "step": 6330 }, { "epoch": 1.0922108168722504, "grad_norm": 0.58984375, "learning_rate": 8.616295718462913e-06, "loss": 1.3674, "step": 6331 }, { "epoch": 1.0923833347709824, "grad_norm": 0.69140625, "learning_rate": 8.613600073820216e-06, "loss": 1.5326, "step": 6332 }, { "epoch": 1.0925558526697146, "grad_norm": 0.60546875, "learning_rate": 8.610904531890685e-06, "loss": 1.432, "step": 6333 }, { "epoch": 1.0927283705684465, "grad_norm": 0.6875, "learning_rate": 8.60820909287403e-06, "loss": 1.434, "step": 6334 }, { "epoch": 1.0929008884671785, "grad_norm": 0.6328125, "learning_rate": 8.60551375696994e-06, "loss": 1.4066, "step": 6335 }, { "epoch": 1.0930734063659104, "grad_norm": 0.62890625, "learning_rate": 8.602818524378098e-06, "loss": 1.4648, "step": 6336 }, { "epoch": 1.0932459242646424, "grad_norm": 0.70703125, "learning_rate": 8.600123395298198e-06, "loss": 1.3308, "step": 6337 }, { "epoch": 1.0934184421633744, "grad_norm": 0.59765625, "learning_rate": 8.597428369929902e-06, "loss": 1.36, "step": 6338 }, { "epoch": 1.0935909600621065, "grad_norm": 0.59375, "learning_rate": 8.594733448472876e-06, "loss": 1.4629, "step": 6339 }, { "epoch": 1.0937634779608385, "grad_norm": 0.609375, "learning_rate": 8.592038631126774e-06, "loss": 1.4091, "step": 6340 }, { "epoch": 1.0939359958595705, "grad_norm": 0.6640625, "learning_rate": 8.589343918091251e-06, "loss": 1.3806, "step": 6341 }, { "epoch": 1.0941085137583024, "grad_norm": 0.58203125, "learning_rate": 8.586649309565942e-06, "loss": 1.4089, "step": 6342 }, { "epoch": 1.0942810316570344, "grad_norm": 0.578125, "learning_rate": 8.583954805750488e-06, "loss": 1.5163, "step": 6343 }, { "epoch": 1.0944535495557663, "grad_norm": 0.59765625, "learning_rate": 8.581260406844503e-06, "loss": 1.3662, "step": 6344 }, { "epoch": 1.0946260674544983, "grad_norm": 0.63671875, "learning_rate": 8.578566113047616e-06, "loss": 1.4788, "step": 6345 }, { "epoch": 1.0947985853532305, "grad_norm": 0.60546875, "learning_rate": 8.575871924559431e-06, "loss": 1.4255, "step": 6346 }, { "epoch": 1.0949711032519625, "grad_norm": 0.640625, "learning_rate": 8.573177841579556e-06, "loss": 1.3274, "step": 6347 }, { "epoch": 1.0951436211506944, "grad_norm": 0.64453125, "learning_rate": 8.570483864307575e-06, "loss": 1.3774, "step": 6348 }, { "epoch": 1.0953161390494264, "grad_norm": 0.796875, "learning_rate": 8.567789992943088e-06, "loss": 1.5031, "step": 6349 }, { "epoch": 1.0954886569481583, "grad_norm": 0.66015625, "learning_rate": 8.565096227685663e-06, "loss": 1.5199, "step": 6350 }, { "epoch": 1.0956611748468903, "grad_norm": 0.65625, "learning_rate": 8.562402568734879e-06, "loss": 1.4346, "step": 6351 }, { "epoch": 1.0958336927456223, "grad_norm": 0.703125, "learning_rate": 8.559709016290288e-06, "loss": 1.3992, "step": 6352 }, { "epoch": 1.0960062106443544, "grad_norm": 0.5703125, "learning_rate": 8.557015570551455e-06, "loss": 1.3908, "step": 6353 }, { "epoch": 1.0961787285430864, "grad_norm": 0.66015625, "learning_rate": 8.55432223171793e-06, "loss": 1.3654, "step": 6354 }, { "epoch": 1.0963512464418184, "grad_norm": 0.625, "learning_rate": 8.551628999989242e-06, "loss": 1.4114, "step": 6355 }, { "epoch": 1.0965237643405503, "grad_norm": 0.59375, "learning_rate": 8.548935875564931e-06, "loss": 1.4118, "step": 6356 }, { "epoch": 1.0966962822392823, "grad_norm": 0.57421875, "learning_rate": 8.546242858644513e-06, "loss": 1.4611, "step": 6357 }, { "epoch": 1.0968688001380142, "grad_norm": 0.61328125, "learning_rate": 8.543549949427512e-06, "loss": 1.4758, "step": 6358 }, { "epoch": 1.0970413180367462, "grad_norm": 0.6171875, "learning_rate": 8.540857148113429e-06, "loss": 1.3938, "step": 6359 }, { "epoch": 1.0972138359354784, "grad_norm": 0.65234375, "learning_rate": 8.538164454901766e-06, "loss": 1.4961, "step": 6360 }, { "epoch": 1.0973863538342103, "grad_norm": 0.6171875, "learning_rate": 8.535471869992011e-06, "loss": 1.4775, "step": 6361 }, { "epoch": 1.0975588717329423, "grad_norm": 0.5625, "learning_rate": 8.532779393583656e-06, "loss": 1.4067, "step": 6362 }, { "epoch": 1.0977313896316743, "grad_norm": 0.58984375, "learning_rate": 8.530087025876168e-06, "loss": 1.3677, "step": 6363 }, { "epoch": 1.0979039075304062, "grad_norm": 0.546875, "learning_rate": 8.52739476706902e-06, "loss": 1.2977, "step": 6364 }, { "epoch": 1.0980764254291382, "grad_norm": 0.58203125, "learning_rate": 8.524702617361665e-06, "loss": 1.4171, "step": 6365 }, { "epoch": 1.0982489433278704, "grad_norm": 0.578125, "learning_rate": 8.522010576953561e-06, "loss": 1.3564, "step": 6366 }, { "epoch": 1.0984214612266023, "grad_norm": 0.7109375, "learning_rate": 8.519318646044147e-06, "loss": 1.4582, "step": 6367 }, { "epoch": 1.0985939791253343, "grad_norm": 0.58984375, "learning_rate": 8.516626824832858e-06, "loss": 1.4448, "step": 6368 }, { "epoch": 1.0987664970240663, "grad_norm": 0.61328125, "learning_rate": 8.513935113519126e-06, "loss": 1.3989, "step": 6369 }, { "epoch": 1.0989390149227982, "grad_norm": 0.59375, "learning_rate": 8.511243512302362e-06, "loss": 1.4482, "step": 6370 }, { "epoch": 1.0991115328215302, "grad_norm": 0.578125, "learning_rate": 8.508552021381987e-06, "loss": 1.345, "step": 6371 }, { "epoch": 1.0992840507202621, "grad_norm": 0.6484375, "learning_rate": 8.50586064095739e-06, "loss": 1.3918, "step": 6372 }, { "epoch": 1.0994565686189943, "grad_norm": 0.58203125, "learning_rate": 8.503169371227978e-06, "loss": 1.5311, "step": 6373 }, { "epoch": 1.0996290865177263, "grad_norm": 0.60546875, "learning_rate": 8.50047821239313e-06, "loss": 1.4589, "step": 6374 }, { "epoch": 1.0998016044164582, "grad_norm": 0.921875, "learning_rate": 8.497787164652227e-06, "loss": 1.3397, "step": 6375 }, { "epoch": 1.0999741223151902, "grad_norm": 0.6015625, "learning_rate": 8.495096228204632e-06, "loss": 1.4847, "step": 6376 }, { "epoch": 1.1001466402139222, "grad_norm": 0.60546875, "learning_rate": 8.492405403249717e-06, "loss": 1.4609, "step": 6377 }, { "epoch": 1.1003191581126541, "grad_norm": 0.6484375, "learning_rate": 8.489714689986826e-06, "loss": 1.4853, "step": 6378 }, { "epoch": 1.100491676011386, "grad_norm": 0.6015625, "learning_rate": 8.48702408861531e-06, "loss": 1.5215, "step": 6379 }, { "epoch": 1.1006641939101183, "grad_norm": 0.5703125, "learning_rate": 8.4843335993345e-06, "loss": 1.3049, "step": 6380 }, { "epoch": 1.1008367118088502, "grad_norm": 0.62109375, "learning_rate": 8.48164322234373e-06, "loss": 1.464, "step": 6381 }, { "epoch": 1.1010092297075822, "grad_norm": 0.57421875, "learning_rate": 8.478952957842317e-06, "loss": 1.4325, "step": 6382 }, { "epoch": 1.1011817476063142, "grad_norm": 0.6328125, "learning_rate": 8.476262806029573e-06, "loss": 1.3597, "step": 6383 }, { "epoch": 1.1013542655050461, "grad_norm": 0.61328125, "learning_rate": 8.473572767104799e-06, "loss": 1.3886, "step": 6384 }, { "epoch": 1.101526783403778, "grad_norm": 0.6328125, "learning_rate": 8.47088284126729e-06, "loss": 1.3507, "step": 6385 }, { "epoch": 1.10169930130251, "grad_norm": 0.59765625, "learning_rate": 8.46819302871634e-06, "loss": 1.541, "step": 6386 }, { "epoch": 1.1018718192012422, "grad_norm": 0.6171875, "learning_rate": 8.465503329651219e-06, "loss": 1.405, "step": 6387 }, { "epoch": 1.1020443370999742, "grad_norm": 0.74609375, "learning_rate": 8.462813744271202e-06, "loss": 1.519, "step": 6388 }, { "epoch": 1.1022168549987061, "grad_norm": 0.6796875, "learning_rate": 8.460124272775542e-06, "loss": 1.3921, "step": 6389 }, { "epoch": 1.102389372897438, "grad_norm": 0.6484375, "learning_rate": 8.457434915363501e-06, "loss": 1.3868, "step": 6390 }, { "epoch": 1.10256189079617, "grad_norm": 0.58984375, "learning_rate": 8.454745672234321e-06, "loss": 1.4336, "step": 6391 }, { "epoch": 1.102734408694902, "grad_norm": 0.5703125, "learning_rate": 8.452056543587236e-06, "loss": 1.3515, "step": 6392 }, { "epoch": 1.1029069265936342, "grad_norm": 0.640625, "learning_rate": 8.44936752962147e-06, "loss": 1.4774, "step": 6393 }, { "epoch": 1.1030794444923662, "grad_norm": 0.671875, "learning_rate": 8.446678630536252e-06, "loss": 1.4492, "step": 6394 }, { "epoch": 1.1032519623910981, "grad_norm": 0.5859375, "learning_rate": 8.443989846530784e-06, "loss": 1.4031, "step": 6395 }, { "epoch": 1.10342448028983, "grad_norm": 0.55859375, "learning_rate": 8.441301177804273e-06, "loss": 1.3862, "step": 6396 }, { "epoch": 1.103596998188562, "grad_norm": 0.65625, "learning_rate": 8.438612624555905e-06, "loss": 1.3648, "step": 6397 }, { "epoch": 1.103769516087294, "grad_norm": 0.73046875, "learning_rate": 8.435924186984875e-06, "loss": 1.4533, "step": 6398 }, { "epoch": 1.103942033986026, "grad_norm": 0.80859375, "learning_rate": 8.433235865290351e-06, "loss": 1.4442, "step": 6399 }, { "epoch": 1.104114551884758, "grad_norm": 0.60546875, "learning_rate": 8.430547659671503e-06, "loss": 1.4276, "step": 6400 }, { "epoch": 1.104114551884758, "eval_loss": 1.4093081951141357, "eval_runtime": 10.9921, "eval_samples_per_second": 93.158, "eval_steps_per_second": 23.29, "step": 6400 }, { "epoch": 1.1042870697834901, "grad_norm": 0.59765625, "learning_rate": 8.427859570327494e-06, "loss": 1.4678, "step": 6401 }, { "epoch": 1.104459587682222, "grad_norm": 0.6171875, "learning_rate": 8.425171597457469e-06, "loss": 1.4255, "step": 6402 }, { "epoch": 1.104632105580954, "grad_norm": 0.6328125, "learning_rate": 8.422483741260575e-06, "loss": 1.4794, "step": 6403 }, { "epoch": 1.104804623479686, "grad_norm": 0.63671875, "learning_rate": 8.419796001935935e-06, "loss": 1.4145, "step": 6404 }, { "epoch": 1.104977141378418, "grad_norm": 0.58984375, "learning_rate": 8.417108379682688e-06, "loss": 1.4402, "step": 6405 }, { "epoch": 1.10514965927715, "grad_norm": 0.61328125, "learning_rate": 8.41442087469994e-06, "loss": 1.4904, "step": 6406 }, { "epoch": 1.105322177175882, "grad_norm": 0.59765625, "learning_rate": 8.411733487186802e-06, "loss": 1.3821, "step": 6407 }, { "epoch": 1.105494695074614, "grad_norm": 0.6484375, "learning_rate": 8.409046217342367e-06, "loss": 1.4523, "step": 6408 }, { "epoch": 1.105667212973346, "grad_norm": 0.578125, "learning_rate": 8.406359065365735e-06, "loss": 1.3564, "step": 6409 }, { "epoch": 1.105839730872078, "grad_norm": 0.5703125, "learning_rate": 8.403672031455977e-06, "loss": 1.456, "step": 6410 }, { "epoch": 1.10601224877081, "grad_norm": 0.625, "learning_rate": 8.40098511581217e-06, "loss": 1.4374, "step": 6411 }, { "epoch": 1.106184766669542, "grad_norm": 0.65234375, "learning_rate": 8.398298318633376e-06, "loss": 1.449, "step": 6412 }, { "epoch": 1.1063572845682739, "grad_norm": 0.62109375, "learning_rate": 8.395611640118653e-06, "loss": 1.3961, "step": 6413 }, { "epoch": 1.106529802467006, "grad_norm": 0.62890625, "learning_rate": 8.39292508046704e-06, "loss": 1.3924, "step": 6414 }, { "epoch": 1.106702320365738, "grad_norm": 0.56640625, "learning_rate": 8.390238639877584e-06, "loss": 1.4491, "step": 6415 }, { "epoch": 1.10687483826447, "grad_norm": 0.58984375, "learning_rate": 8.387552318549304e-06, "loss": 1.4087, "step": 6416 }, { "epoch": 1.107047356163202, "grad_norm": 0.5703125, "learning_rate": 8.384866116681221e-06, "loss": 1.5279, "step": 6417 }, { "epoch": 1.107219874061934, "grad_norm": 0.6328125, "learning_rate": 8.382180034472353e-06, "loss": 1.4544, "step": 6418 }, { "epoch": 1.1073923919606659, "grad_norm": 0.66796875, "learning_rate": 8.379494072121695e-06, "loss": 1.5367, "step": 6419 }, { "epoch": 1.1075649098593978, "grad_norm": 0.6328125, "learning_rate": 8.376808229828242e-06, "loss": 1.4524, "step": 6420 }, { "epoch": 1.10773742775813, "grad_norm": 0.6015625, "learning_rate": 8.374122507790975e-06, "loss": 1.4472, "step": 6421 }, { "epoch": 1.107909945656862, "grad_norm": 0.56640625, "learning_rate": 8.371436906208876e-06, "loss": 1.5322, "step": 6422 }, { "epoch": 1.108082463555594, "grad_norm": 0.77734375, "learning_rate": 8.368751425280904e-06, "loss": 1.4576, "step": 6423 }, { "epoch": 1.1082549814543259, "grad_norm": 0.59765625, "learning_rate": 8.36606606520602e-06, "loss": 1.4851, "step": 6424 }, { "epoch": 1.1084274993530578, "grad_norm": 0.65625, "learning_rate": 8.363380826183167e-06, "loss": 1.3564, "step": 6425 }, { "epoch": 1.1086000172517898, "grad_norm": 0.65625, "learning_rate": 8.360695708411295e-06, "loss": 1.428, "step": 6426 }, { "epoch": 1.1087725351505218, "grad_norm": 0.63671875, "learning_rate": 8.358010712089324e-06, "loss": 1.4501, "step": 6427 }, { "epoch": 1.108945053049254, "grad_norm": 0.6640625, "learning_rate": 8.355325837416182e-06, "loss": 1.363, "step": 6428 }, { "epoch": 1.109117570947986, "grad_norm": 0.640625, "learning_rate": 8.352641084590772e-06, "loss": 1.4789, "step": 6429 }, { "epoch": 1.1092900888467179, "grad_norm": 0.63671875, "learning_rate": 8.349956453812009e-06, "loss": 1.4532, "step": 6430 }, { "epoch": 1.1094626067454498, "grad_norm": 0.60546875, "learning_rate": 8.34727194527878e-06, "loss": 1.412, "step": 6431 }, { "epoch": 1.1096351246441818, "grad_norm": 0.58203125, "learning_rate": 8.34458755918997e-06, "loss": 1.401, "step": 6432 }, { "epoch": 1.1098076425429138, "grad_norm": 0.625, "learning_rate": 8.341903295744463e-06, "loss": 1.4473, "step": 6433 }, { "epoch": 1.109980160441646, "grad_norm": 0.54296875, "learning_rate": 8.339219155141115e-06, "loss": 1.3622, "step": 6434 }, { "epoch": 1.110152678340378, "grad_norm": 0.6015625, "learning_rate": 8.336535137578792e-06, "loss": 1.4421, "step": 6435 }, { "epoch": 1.1103251962391099, "grad_norm": 0.6796875, "learning_rate": 8.333851243256337e-06, "loss": 1.4733, "step": 6436 }, { "epoch": 1.1104977141378418, "grad_norm": 0.640625, "learning_rate": 8.331167472372596e-06, "loss": 1.3693, "step": 6437 }, { "epoch": 1.1106702320365738, "grad_norm": 0.58203125, "learning_rate": 8.328483825126393e-06, "loss": 1.4495, "step": 6438 }, { "epoch": 1.1108427499353057, "grad_norm": 0.5703125, "learning_rate": 8.325800301716555e-06, "loss": 1.4169, "step": 6439 }, { "epoch": 1.1110152678340377, "grad_norm": 0.6796875, "learning_rate": 8.323116902341888e-06, "loss": 1.4493, "step": 6440 }, { "epoch": 1.1111877857327697, "grad_norm": 0.68359375, "learning_rate": 8.320433627201202e-06, "loss": 1.3748, "step": 6441 }, { "epoch": 1.1113603036315018, "grad_norm": 0.60546875, "learning_rate": 8.317750476493282e-06, "loss": 1.5276, "step": 6442 }, { "epoch": 1.1115328215302338, "grad_norm": 0.55859375, "learning_rate": 8.315067450416926e-06, "loss": 1.3542, "step": 6443 }, { "epoch": 1.1117053394289658, "grad_norm": 0.61328125, "learning_rate": 8.312384549170894e-06, "loss": 1.426, "step": 6444 }, { "epoch": 1.1118778573276977, "grad_norm": 0.62109375, "learning_rate": 8.309701772953964e-06, "loss": 1.4683, "step": 6445 }, { "epoch": 1.1120503752264297, "grad_norm": 0.58984375, "learning_rate": 8.307019121964885e-06, "loss": 1.4291, "step": 6446 }, { "epoch": 1.1122228931251616, "grad_norm": 0.58203125, "learning_rate": 8.30433659640241e-06, "loss": 1.4038, "step": 6447 }, { "epoch": 1.1123954110238938, "grad_norm": 0.59375, "learning_rate": 8.301654196465273e-06, "loss": 1.422, "step": 6448 }, { "epoch": 1.1125679289226258, "grad_norm": 0.59375, "learning_rate": 8.298971922352203e-06, "loss": 1.3519, "step": 6449 }, { "epoch": 1.1127404468213578, "grad_norm": 0.59375, "learning_rate": 8.296289774261926e-06, "loss": 1.4286, "step": 6450 }, { "epoch": 1.1129129647200897, "grad_norm": 0.5625, "learning_rate": 8.293607752393145e-06, "loss": 1.3807, "step": 6451 }, { "epoch": 1.1130854826188217, "grad_norm": 0.59765625, "learning_rate": 8.290925856944567e-06, "loss": 1.3515, "step": 6452 }, { "epoch": 1.1132580005175536, "grad_norm": 0.6171875, "learning_rate": 8.288244088114876e-06, "loss": 1.3681, "step": 6453 }, { "epoch": 1.1134305184162856, "grad_norm": 0.578125, "learning_rate": 8.285562446102761e-06, "loss": 1.4408, "step": 6454 }, { "epoch": 1.1136030363150178, "grad_norm": 0.65625, "learning_rate": 8.282880931106893e-06, "loss": 1.4276, "step": 6455 }, { "epoch": 1.1137755542137497, "grad_norm": 0.65234375, "learning_rate": 8.280199543325935e-06, "loss": 1.467, "step": 6456 }, { "epoch": 1.1139480721124817, "grad_norm": 0.59765625, "learning_rate": 8.277518282958536e-06, "loss": 1.4664, "step": 6457 }, { "epoch": 1.1141205900112137, "grad_norm": 0.6484375, "learning_rate": 8.274837150203352e-06, "loss": 1.4037, "step": 6458 }, { "epoch": 1.1142931079099456, "grad_norm": 0.6171875, "learning_rate": 8.272156145259006e-06, "loss": 1.4159, "step": 6459 }, { "epoch": 1.1144656258086776, "grad_norm": 0.578125, "learning_rate": 8.269475268324131e-06, "loss": 1.3251, "step": 6460 }, { "epoch": 1.1146381437074095, "grad_norm": 0.54296875, "learning_rate": 8.266794519597339e-06, "loss": 1.4099, "step": 6461 }, { "epoch": 1.1148106616061417, "grad_norm": 0.61328125, "learning_rate": 8.264113899277241e-06, "loss": 1.3554, "step": 6462 }, { "epoch": 1.1149831795048737, "grad_norm": 0.63671875, "learning_rate": 8.26143340756243e-06, "loss": 1.3989, "step": 6463 }, { "epoch": 1.1151556974036057, "grad_norm": 0.65234375, "learning_rate": 8.258753044651499e-06, "loss": 1.4215, "step": 6464 }, { "epoch": 1.1153282153023376, "grad_norm": 0.56640625, "learning_rate": 8.256072810743015e-06, "loss": 1.4646, "step": 6465 }, { "epoch": 1.1155007332010696, "grad_norm": 0.58203125, "learning_rate": 8.253392706035558e-06, "loss": 1.4461, "step": 6466 }, { "epoch": 1.1156732510998015, "grad_norm": 0.5546875, "learning_rate": 8.250712730727685e-06, "loss": 1.4213, "step": 6467 }, { "epoch": 1.1158457689985335, "grad_norm": 0.58203125, "learning_rate": 8.248032885017937e-06, "loss": 1.3849, "step": 6468 }, { "epoch": 1.1160182868972657, "grad_norm": 0.58984375, "learning_rate": 8.245353169104865e-06, "loss": 1.4312, "step": 6469 }, { "epoch": 1.1161908047959976, "grad_norm": 0.5859375, "learning_rate": 8.242673583186991e-06, "loss": 1.3815, "step": 6470 }, { "epoch": 1.1163633226947296, "grad_norm": 0.6015625, "learning_rate": 8.239994127462842e-06, "loss": 1.2654, "step": 6471 }, { "epoch": 1.1165358405934616, "grad_norm": 0.62890625, "learning_rate": 8.237314802130919e-06, "loss": 1.4075, "step": 6472 }, { "epoch": 1.1167083584921935, "grad_norm": 0.59375, "learning_rate": 8.234635607389733e-06, "loss": 1.3882, "step": 6473 }, { "epoch": 1.1168808763909255, "grad_norm": 0.609375, "learning_rate": 8.231956543437768e-06, "loss": 1.4488, "step": 6474 }, { "epoch": 1.1170533942896577, "grad_norm": 0.625, "learning_rate": 8.229277610473516e-06, "loss": 1.4515, "step": 6475 }, { "epoch": 1.1172259121883896, "grad_norm": 0.57421875, "learning_rate": 8.226598808695438e-06, "loss": 1.4751, "step": 6476 }, { "epoch": 1.1173984300871216, "grad_norm": 0.6171875, "learning_rate": 8.223920138302006e-06, "loss": 1.4667, "step": 6477 }, { "epoch": 1.1175709479858535, "grad_norm": 0.55859375, "learning_rate": 8.22124159949166e-06, "loss": 1.4327, "step": 6478 }, { "epoch": 1.1177434658845855, "grad_norm": 0.65234375, "learning_rate": 8.218563192462857e-06, "loss": 1.4853, "step": 6479 }, { "epoch": 1.1179159837833175, "grad_norm": 0.58984375, "learning_rate": 8.21588491741402e-06, "loss": 1.432, "step": 6480 }, { "epoch": 1.1180885016820494, "grad_norm": 0.59765625, "learning_rate": 8.213206774543574e-06, "loss": 1.5029, "step": 6481 }, { "epoch": 1.1182610195807814, "grad_norm": 0.6171875, "learning_rate": 8.21052876404994e-06, "loss": 1.4315, "step": 6482 }, { "epoch": 1.1184335374795136, "grad_norm": 0.62890625, "learning_rate": 8.207850886131512e-06, "loss": 1.3277, "step": 6483 }, { "epoch": 1.1186060553782455, "grad_norm": 0.609375, "learning_rate": 8.205173140986691e-06, "loss": 1.4484, "step": 6484 }, { "epoch": 1.1187785732769775, "grad_norm": 0.58984375, "learning_rate": 8.202495528813856e-06, "loss": 1.4537, "step": 6485 }, { "epoch": 1.1189510911757095, "grad_norm": 0.6171875, "learning_rate": 8.199818049811387e-06, "loss": 1.4245, "step": 6486 }, { "epoch": 1.1191236090744414, "grad_norm": 0.64453125, "learning_rate": 8.197140704177642e-06, "loss": 1.412, "step": 6487 }, { "epoch": 1.1192961269731734, "grad_norm": 0.65625, "learning_rate": 8.194463492110982e-06, "loss": 1.4611, "step": 6488 }, { "epoch": 1.1194686448719056, "grad_norm": 0.625, "learning_rate": 8.191786413809742e-06, "loss": 1.502, "step": 6489 }, { "epoch": 1.1196411627706375, "grad_norm": 0.6171875, "learning_rate": 8.189109469472267e-06, "loss": 1.44, "step": 6490 }, { "epoch": 1.1198136806693695, "grad_norm": 0.55078125, "learning_rate": 8.186432659296876e-06, "loss": 1.4693, "step": 6491 }, { "epoch": 1.1199861985681014, "grad_norm": 0.57421875, "learning_rate": 8.183755983481888e-06, "loss": 1.3884, "step": 6492 }, { "epoch": 1.1201587164668334, "grad_norm": 0.625, "learning_rate": 8.1810794422256e-06, "loss": 1.4728, "step": 6493 }, { "epoch": 1.1203312343655654, "grad_norm": 0.734375, "learning_rate": 8.178403035726316e-06, "loss": 1.5149, "step": 6494 }, { "epoch": 1.1205037522642973, "grad_norm": 0.578125, "learning_rate": 8.175726764182315e-06, "loss": 1.4063, "step": 6495 }, { "epoch": 1.1206762701630295, "grad_norm": 0.65234375, "learning_rate": 8.173050627791877e-06, "loss": 1.4264, "step": 6496 }, { "epoch": 1.1208487880617615, "grad_norm": 0.62109375, "learning_rate": 8.170374626753259e-06, "loss": 1.483, "step": 6497 }, { "epoch": 1.1210213059604934, "grad_norm": 0.640625, "learning_rate": 8.167698761264723e-06, "loss": 1.4241, "step": 6498 }, { "epoch": 1.1211938238592254, "grad_norm": 0.58984375, "learning_rate": 8.165023031524513e-06, "loss": 1.3938, "step": 6499 }, { "epoch": 1.1213663417579574, "grad_norm": 0.578125, "learning_rate": 8.162347437730859e-06, "loss": 1.4848, "step": 6500 }, { "epoch": 1.1213663417579574, "eval_loss": 1.4090793132781982, "eval_runtime": 10.9381, "eval_samples_per_second": 93.618, "eval_steps_per_second": 23.405, "step": 6500 }, { "epoch": 1.1215388596566893, "grad_norm": 0.6015625, "learning_rate": 8.159671980081994e-06, "loss": 1.3506, "step": 6501 }, { "epoch": 1.1217113775554213, "grad_norm": 0.62109375, "learning_rate": 8.156996658776121e-06, "loss": 1.3836, "step": 6502 }, { "epoch": 1.1218838954541535, "grad_norm": 0.6171875, "learning_rate": 8.154321474011457e-06, "loss": 1.368, "step": 6503 }, { "epoch": 1.1220564133528854, "grad_norm": 0.7109375, "learning_rate": 8.151646425986187e-06, "loss": 1.3849, "step": 6504 }, { "epoch": 1.1222289312516174, "grad_norm": 0.62109375, "learning_rate": 8.148971514898503e-06, "loss": 1.4842, "step": 6505 }, { "epoch": 1.1224014491503493, "grad_norm": 0.5625, "learning_rate": 8.14629674094657e-06, "loss": 1.5188, "step": 6506 }, { "epoch": 1.1225739670490813, "grad_norm": 0.56640625, "learning_rate": 8.14362210432856e-06, "loss": 1.3948, "step": 6507 }, { "epoch": 1.1227464849478133, "grad_norm": 0.61328125, "learning_rate": 8.140947605242622e-06, "loss": 1.3766, "step": 6508 }, { "epoch": 1.1229190028465452, "grad_norm": 0.61328125, "learning_rate": 8.138273243886902e-06, "loss": 1.4794, "step": 6509 }, { "epoch": 1.1230915207452774, "grad_norm": 0.578125, "learning_rate": 8.135599020459531e-06, "loss": 1.4689, "step": 6510 }, { "epoch": 1.1232640386440094, "grad_norm": 0.5859375, "learning_rate": 8.132924935158638e-06, "loss": 1.4573, "step": 6511 }, { "epoch": 1.1234365565427413, "grad_norm": 0.5625, "learning_rate": 8.130250988182328e-06, "loss": 1.4931, "step": 6512 }, { "epoch": 1.1236090744414733, "grad_norm": 0.58984375, "learning_rate": 8.127577179728708e-06, "loss": 1.3708, "step": 6513 }, { "epoch": 1.1237815923402052, "grad_norm": 0.63671875, "learning_rate": 8.124903509995872e-06, "loss": 1.4501, "step": 6514 }, { "epoch": 1.1239541102389372, "grad_norm": 0.61328125, "learning_rate": 8.122229979181899e-06, "loss": 1.3516, "step": 6515 }, { "epoch": 1.1241266281376694, "grad_norm": 0.61328125, "learning_rate": 8.119556587484863e-06, "loss": 1.4109, "step": 6516 }, { "epoch": 1.1242991460364014, "grad_norm": 0.59375, "learning_rate": 8.116883335102821e-06, "loss": 1.4759, "step": 6517 }, { "epoch": 1.1244716639351333, "grad_norm": 0.62109375, "learning_rate": 8.114210222233832e-06, "loss": 1.3568, "step": 6518 }, { "epoch": 1.1246441818338653, "grad_norm": 0.6328125, "learning_rate": 8.11153724907593e-06, "loss": 1.3548, "step": 6519 }, { "epoch": 1.1248166997325972, "grad_norm": 0.62109375, "learning_rate": 8.108864415827152e-06, "loss": 1.4087, "step": 6520 }, { "epoch": 1.1249892176313292, "grad_norm": 0.55859375, "learning_rate": 8.10619172268551e-06, "loss": 1.4023, "step": 6521 }, { "epoch": 1.1251617355300612, "grad_norm": 0.6328125, "learning_rate": 8.10351916984902e-06, "loss": 1.4742, "step": 6522 }, { "epoch": 1.1253342534287931, "grad_norm": 0.58984375, "learning_rate": 8.10084675751568e-06, "loss": 1.4095, "step": 6523 }, { "epoch": 1.1255067713275253, "grad_norm": 0.5625, "learning_rate": 8.09817448588348e-06, "loss": 1.4618, "step": 6524 }, { "epoch": 1.1256792892262573, "grad_norm": 0.5703125, "learning_rate": 8.095502355150392e-06, "loss": 1.3808, "step": 6525 }, { "epoch": 1.1258518071249892, "grad_norm": 0.7890625, "learning_rate": 8.092830365514395e-06, "loss": 1.4675, "step": 6526 }, { "epoch": 1.1260243250237212, "grad_norm": 0.62890625, "learning_rate": 8.090158517173438e-06, "loss": 1.4233, "step": 6527 }, { "epoch": 1.1261968429224531, "grad_norm": 0.6328125, "learning_rate": 8.087486810325475e-06, "loss": 1.5255, "step": 6528 }, { "epoch": 1.126369360821185, "grad_norm": 0.66015625, "learning_rate": 8.084815245168434e-06, "loss": 1.4081, "step": 6529 }, { "epoch": 1.1265418787199173, "grad_norm": 0.6015625, "learning_rate": 8.082143821900246e-06, "loss": 1.4038, "step": 6530 }, { "epoch": 1.1267143966186492, "grad_norm": 0.57421875, "learning_rate": 8.079472540718833e-06, "loss": 1.4907, "step": 6531 }, { "epoch": 1.1268869145173812, "grad_norm": 0.55078125, "learning_rate": 8.076801401822088e-06, "loss": 1.413, "step": 6532 }, { "epoch": 1.1270594324161132, "grad_norm": 0.56640625, "learning_rate": 8.074130405407915e-06, "loss": 1.4993, "step": 6533 }, { "epoch": 1.1272319503148451, "grad_norm": 0.5859375, "learning_rate": 8.071459551674193e-06, "loss": 1.4363, "step": 6534 }, { "epoch": 1.127404468213577, "grad_norm": 0.5703125, "learning_rate": 8.0687888408188e-06, "loss": 1.4667, "step": 6535 }, { "epoch": 1.127576986112309, "grad_norm": 0.6015625, "learning_rate": 8.066118273039597e-06, "loss": 1.5419, "step": 6536 }, { "epoch": 1.1277495040110412, "grad_norm": 0.58203125, "learning_rate": 8.063447848534435e-06, "loss": 1.4496, "step": 6537 }, { "epoch": 1.1279220219097732, "grad_norm": 0.61328125, "learning_rate": 8.060777567501154e-06, "loss": 1.5354, "step": 6538 }, { "epoch": 1.1280945398085052, "grad_norm": 0.65234375, "learning_rate": 8.05810743013759e-06, "loss": 1.5538, "step": 6539 }, { "epoch": 1.1282670577072371, "grad_norm": 0.5625, "learning_rate": 8.05543743664156e-06, "loss": 1.4138, "step": 6540 }, { "epoch": 1.128439575605969, "grad_norm": 0.53125, "learning_rate": 8.052767587210878e-06, "loss": 1.3768, "step": 6541 }, { "epoch": 1.128612093504701, "grad_norm": 0.6328125, "learning_rate": 8.050097882043334e-06, "loss": 1.494, "step": 6542 }, { "epoch": 1.1287846114034332, "grad_norm": 0.5703125, "learning_rate": 8.047428321336728e-06, "loss": 1.3528, "step": 6543 }, { "epoch": 1.1289571293021652, "grad_norm": 0.63671875, "learning_rate": 8.04475890528883e-06, "loss": 1.4149, "step": 6544 }, { "epoch": 1.1291296472008971, "grad_norm": 0.60546875, "learning_rate": 8.042089634097406e-06, "loss": 1.5056, "step": 6545 }, { "epoch": 1.129302165099629, "grad_norm": 0.59765625, "learning_rate": 8.03942050796022e-06, "loss": 1.4004, "step": 6546 }, { "epoch": 1.129474682998361, "grad_norm": 0.60546875, "learning_rate": 8.036751527075011e-06, "loss": 1.476, "step": 6547 }, { "epoch": 1.129647200897093, "grad_norm": 0.5859375, "learning_rate": 8.034082691639519e-06, "loss": 1.4687, "step": 6548 }, { "epoch": 1.129819718795825, "grad_norm": 0.6328125, "learning_rate": 8.031414001851459e-06, "loss": 1.5199, "step": 6549 }, { "epoch": 1.129992236694557, "grad_norm": 0.5625, "learning_rate": 8.028745457908555e-06, "loss": 1.3649, "step": 6550 }, { "epoch": 1.1301647545932891, "grad_norm": 0.609375, "learning_rate": 8.026077060008503e-06, "loss": 1.4537, "step": 6551 }, { "epoch": 1.130337272492021, "grad_norm": 0.5546875, "learning_rate": 8.023408808348999e-06, "loss": 1.4067, "step": 6552 }, { "epoch": 1.130509790390753, "grad_norm": 0.58984375, "learning_rate": 8.020740703127715e-06, "loss": 1.4795, "step": 6553 }, { "epoch": 1.130682308289485, "grad_norm": 0.5703125, "learning_rate": 8.01807274454233e-06, "loss": 1.472, "step": 6554 }, { "epoch": 1.130854826188217, "grad_norm": 0.640625, "learning_rate": 8.015404932790499e-06, "loss": 1.4342, "step": 6555 }, { "epoch": 1.131027344086949, "grad_norm": 0.58203125, "learning_rate": 8.012737268069873e-06, "loss": 1.3848, "step": 6556 }, { "epoch": 1.1311998619856811, "grad_norm": 0.59375, "learning_rate": 8.010069750578082e-06, "loss": 1.3666, "step": 6557 }, { "epoch": 1.131372379884413, "grad_norm": 0.5703125, "learning_rate": 8.007402380512763e-06, "loss": 1.3574, "step": 6558 }, { "epoch": 1.131544897783145, "grad_norm": 0.60546875, "learning_rate": 8.004735158071524e-06, "loss": 1.5037, "step": 6559 }, { "epoch": 1.131717415681877, "grad_norm": 0.6171875, "learning_rate": 8.002068083451973e-06, "loss": 1.4382, "step": 6560 }, { "epoch": 1.131889933580609, "grad_norm": 0.5703125, "learning_rate": 7.999401156851697e-06, "loss": 1.452, "step": 6561 }, { "epoch": 1.132062451479341, "grad_norm": 0.60546875, "learning_rate": 7.996734378468284e-06, "loss": 1.4871, "step": 6562 }, { "epoch": 1.1322349693780729, "grad_norm": 0.60546875, "learning_rate": 7.99406774849931e-06, "loss": 1.4092, "step": 6563 }, { "epoch": 1.1324074872768048, "grad_norm": 0.57421875, "learning_rate": 7.991401267142329e-06, "loss": 1.3844, "step": 6564 }, { "epoch": 1.132580005175537, "grad_norm": 0.578125, "learning_rate": 7.988734934594893e-06, "loss": 1.4034, "step": 6565 }, { "epoch": 1.132752523074269, "grad_norm": 0.58203125, "learning_rate": 7.986068751054537e-06, "loss": 1.4734, "step": 6566 }, { "epoch": 1.132925040973001, "grad_norm": 0.6171875, "learning_rate": 7.983402716718796e-06, "loss": 1.3407, "step": 6567 }, { "epoch": 1.133097558871733, "grad_norm": 0.58203125, "learning_rate": 7.980736831785178e-06, "loss": 1.5068, "step": 6568 }, { "epoch": 1.1332700767704649, "grad_norm": 0.60546875, "learning_rate": 7.978071096451198e-06, "loss": 1.4126, "step": 6569 }, { "epoch": 1.1334425946691968, "grad_norm": 0.5859375, "learning_rate": 7.975405510914338e-06, "loss": 1.3754, "step": 6570 }, { "epoch": 1.133615112567929, "grad_norm": 0.5625, "learning_rate": 7.972740075372094e-06, "loss": 1.4839, "step": 6571 }, { "epoch": 1.133787630466661, "grad_norm": 0.5859375, "learning_rate": 7.970074790021928e-06, "loss": 1.379, "step": 6572 }, { "epoch": 1.133960148365393, "grad_norm": 0.59375, "learning_rate": 7.967409655061308e-06, "loss": 1.4018, "step": 6573 }, { "epoch": 1.134132666264125, "grad_norm": 0.640625, "learning_rate": 7.964744670687676e-06, "loss": 1.3949, "step": 6574 }, { "epoch": 1.1343051841628569, "grad_norm": 0.578125, "learning_rate": 7.962079837098481e-06, "loss": 1.4509, "step": 6575 }, { "epoch": 1.1344777020615888, "grad_norm": 0.55859375, "learning_rate": 7.959415154491142e-06, "loss": 1.3816, "step": 6576 }, { "epoch": 1.1346502199603208, "grad_norm": 0.5546875, "learning_rate": 7.956750623063077e-06, "loss": 1.4659, "step": 6577 }, { "epoch": 1.134822737859053, "grad_norm": 0.5546875, "learning_rate": 7.954086243011698e-06, "loss": 1.3871, "step": 6578 }, { "epoch": 1.134995255757785, "grad_norm": 0.609375, "learning_rate": 7.95142201453439e-06, "loss": 1.354, "step": 6579 }, { "epoch": 1.1351677736565169, "grad_norm": 0.57421875, "learning_rate": 7.948757937828542e-06, "loss": 1.405, "step": 6580 }, { "epoch": 1.1353402915552488, "grad_norm": 0.609375, "learning_rate": 7.946094013091518e-06, "loss": 1.4215, "step": 6581 }, { "epoch": 1.1355128094539808, "grad_norm": 0.59375, "learning_rate": 7.943430240520689e-06, "loss": 1.3913, "step": 6582 }, { "epoch": 1.1356853273527128, "grad_norm": 0.578125, "learning_rate": 7.940766620313394e-06, "loss": 1.394, "step": 6583 }, { "epoch": 1.135857845251445, "grad_norm": 0.58984375, "learning_rate": 7.938103152666976e-06, "loss": 1.374, "step": 6584 }, { "epoch": 1.136030363150177, "grad_norm": 0.64453125, "learning_rate": 7.935439837778757e-06, "loss": 1.3971, "step": 6585 }, { "epoch": 1.1362028810489089, "grad_norm": 0.66796875, "learning_rate": 7.932776675846058e-06, "loss": 1.4629, "step": 6586 }, { "epoch": 1.1363753989476408, "grad_norm": 0.59375, "learning_rate": 7.930113667066177e-06, "loss": 1.4402, "step": 6587 }, { "epoch": 1.1365479168463728, "grad_norm": 0.62890625, "learning_rate": 7.927450811636413e-06, "loss": 1.3988, "step": 6588 }, { "epoch": 1.1367204347451048, "grad_norm": 0.6015625, "learning_rate": 7.924788109754036e-06, "loss": 1.355, "step": 6589 }, { "epoch": 1.1368929526438367, "grad_norm": 0.62109375, "learning_rate": 7.922125561616329e-06, "loss": 1.397, "step": 6590 }, { "epoch": 1.1370654705425687, "grad_norm": 0.66796875, "learning_rate": 7.919463167420538e-06, "loss": 1.5036, "step": 6591 }, { "epoch": 1.1372379884413009, "grad_norm": 0.64453125, "learning_rate": 7.91680092736392e-06, "loss": 1.4029, "step": 6592 }, { "epoch": 1.1374105063400328, "grad_norm": 0.57421875, "learning_rate": 7.914138841643702e-06, "loss": 1.4439, "step": 6593 }, { "epoch": 1.1375830242387648, "grad_norm": 0.6328125, "learning_rate": 7.91147691045711e-06, "loss": 1.4961, "step": 6594 }, { "epoch": 1.1377555421374967, "grad_norm": 0.60546875, "learning_rate": 7.908815134001363e-06, "loss": 1.3272, "step": 6595 }, { "epoch": 1.1379280600362287, "grad_norm": 0.62890625, "learning_rate": 7.906153512473656e-06, "loss": 1.4737, "step": 6596 }, { "epoch": 1.1381005779349607, "grad_norm": 0.55859375, "learning_rate": 7.903492046071182e-06, "loss": 1.3569, "step": 6597 }, { "epoch": 1.1382730958336928, "grad_norm": 0.609375, "learning_rate": 7.900830734991111e-06, "loss": 1.4397, "step": 6598 }, { "epoch": 1.1384456137324248, "grad_norm": 0.69921875, "learning_rate": 7.89816957943062e-06, "loss": 1.3377, "step": 6599 }, { "epoch": 1.1386181316311568, "grad_norm": 0.5859375, "learning_rate": 7.895508579586857e-06, "loss": 1.5259, "step": 6600 }, { "epoch": 1.1386181316311568, "eval_loss": 1.408766746520996, "eval_runtime": 10.7693, "eval_samples_per_second": 95.085, "eval_steps_per_second": 23.771, "step": 6600 }, { "epoch": 1.1387906495298887, "grad_norm": 0.6328125, "learning_rate": 7.892847735656972e-06, "loss": 1.528, "step": 6601 }, { "epoch": 1.1389631674286207, "grad_norm": 0.58984375, "learning_rate": 7.890187047838087e-06, "loss": 1.4519, "step": 6602 }, { "epoch": 1.1391356853273527, "grad_norm": 0.609375, "learning_rate": 7.887526516327334e-06, "loss": 1.4995, "step": 6603 }, { "epoch": 1.1393082032260846, "grad_norm": 0.66015625, "learning_rate": 7.884866141321811e-06, "loss": 1.4422, "step": 6604 }, { "epoch": 1.1394807211248166, "grad_norm": 0.54296875, "learning_rate": 7.882205923018624e-06, "loss": 1.4153, "step": 6605 }, { "epoch": 1.1396532390235488, "grad_norm": 0.6171875, "learning_rate": 7.879545861614851e-06, "loss": 1.4228, "step": 6606 }, { "epoch": 1.1398257569222807, "grad_norm": 0.5390625, "learning_rate": 7.876885957307573e-06, "loss": 1.3649, "step": 6607 }, { "epoch": 1.1399982748210127, "grad_norm": 0.63671875, "learning_rate": 7.874226210293847e-06, "loss": 1.4664, "step": 6608 }, { "epoch": 1.1401707927197446, "grad_norm": 0.66796875, "learning_rate": 7.871566620770726e-06, "loss": 1.4722, "step": 6609 }, { "epoch": 1.1403433106184766, "grad_norm": 0.67578125, "learning_rate": 7.86890718893525e-06, "loss": 1.3472, "step": 6610 }, { "epoch": 1.1405158285172088, "grad_norm": 0.8515625, "learning_rate": 7.866247914984444e-06, "loss": 1.3521, "step": 6611 }, { "epoch": 1.1406883464159407, "grad_norm": 0.82421875, "learning_rate": 7.863588799115327e-06, "loss": 1.3674, "step": 6612 }, { "epoch": 1.1408608643146727, "grad_norm": 0.62890625, "learning_rate": 7.860929841524898e-06, "loss": 1.3509, "step": 6613 }, { "epoch": 1.1410333822134047, "grad_norm": 0.63671875, "learning_rate": 7.858271042410153e-06, "loss": 1.4391, "step": 6614 }, { "epoch": 1.1412059001121366, "grad_norm": 0.8125, "learning_rate": 7.855612401968072e-06, "loss": 1.5101, "step": 6615 }, { "epoch": 1.1413784180108686, "grad_norm": 0.62109375, "learning_rate": 7.852953920395623e-06, "loss": 1.4194, "step": 6616 }, { "epoch": 1.1415509359096006, "grad_norm": 0.56640625, "learning_rate": 7.85029559788976e-06, "loss": 1.458, "step": 6617 }, { "epoch": 1.1417234538083325, "grad_norm": 0.67578125, "learning_rate": 7.847637434647436e-06, "loss": 1.4617, "step": 6618 }, { "epoch": 1.1418959717070647, "grad_norm": 0.59765625, "learning_rate": 7.844979430865575e-06, "loss": 1.3958, "step": 6619 }, { "epoch": 1.1420684896057967, "grad_norm": 0.7109375, "learning_rate": 7.842321586741107e-06, "loss": 1.516, "step": 6620 }, { "epoch": 1.1422410075045286, "grad_norm": 0.86328125, "learning_rate": 7.839663902470933e-06, "loss": 1.4452, "step": 6621 }, { "epoch": 1.1424135254032606, "grad_norm": 4.40625, "learning_rate": 7.837006378251959e-06, "loss": 1.3733, "step": 6622 }, { "epoch": 1.1425860433019925, "grad_norm": 0.60546875, "learning_rate": 7.834349014281065e-06, "loss": 1.4408, "step": 6623 }, { "epoch": 1.1427585612007245, "grad_norm": 0.6640625, "learning_rate": 7.83169181075513e-06, "loss": 1.4705, "step": 6624 }, { "epoch": 1.1429310790994567, "grad_norm": 0.62109375, "learning_rate": 7.82903476787101e-06, "loss": 1.3875, "step": 6625 }, { "epoch": 1.1431035969981886, "grad_norm": 0.59765625, "learning_rate": 7.826377885825561e-06, "loss": 1.5407, "step": 6626 }, { "epoch": 1.1432761148969206, "grad_norm": 0.6328125, "learning_rate": 7.823721164815624e-06, "loss": 1.4636, "step": 6627 }, { "epoch": 1.1434486327956526, "grad_norm": 0.640625, "learning_rate": 7.821064605038016e-06, "loss": 1.4006, "step": 6628 }, { "epoch": 1.1436211506943845, "grad_norm": 0.6171875, "learning_rate": 7.818408206689561e-06, "loss": 1.4363, "step": 6629 }, { "epoch": 1.1437936685931165, "grad_norm": 0.62109375, "learning_rate": 7.815751969967052e-06, "loss": 1.3846, "step": 6630 }, { "epoch": 1.1439661864918484, "grad_norm": 0.57421875, "learning_rate": 7.813095895067289e-06, "loss": 1.4288, "step": 6631 }, { "epoch": 1.1441387043905804, "grad_norm": 0.63671875, "learning_rate": 7.810439982187045e-06, "loss": 1.3967, "step": 6632 }, { "epoch": 1.1443112222893126, "grad_norm": 0.66796875, "learning_rate": 7.80778423152309e-06, "loss": 1.3882, "step": 6633 }, { "epoch": 1.1444837401880446, "grad_norm": 0.58203125, "learning_rate": 7.805128643272171e-06, "loss": 1.4812, "step": 6634 }, { "epoch": 1.1446562580867765, "grad_norm": 0.6953125, "learning_rate": 7.802473217631043e-06, "loss": 1.4707, "step": 6635 }, { "epoch": 1.1448287759855085, "grad_norm": 0.58984375, "learning_rate": 7.799817954796427e-06, "loss": 1.4223, "step": 6636 }, { "epoch": 1.1450012938842404, "grad_norm": 0.67578125, "learning_rate": 7.797162854965046e-06, "loss": 1.401, "step": 6637 }, { "epoch": 1.1451738117829724, "grad_norm": 0.6796875, "learning_rate": 7.7945079183336e-06, "loss": 1.3815, "step": 6638 }, { "epoch": 1.1453463296817046, "grad_norm": 0.58203125, "learning_rate": 7.791853145098792e-06, "loss": 1.4514, "step": 6639 }, { "epoch": 1.1455188475804365, "grad_norm": 0.6484375, "learning_rate": 7.7891985354573e-06, "loss": 1.4598, "step": 6640 }, { "epoch": 1.1456913654791685, "grad_norm": 0.62109375, "learning_rate": 7.78654408960579e-06, "loss": 1.3204, "step": 6641 }, { "epoch": 1.1458638833779005, "grad_norm": 0.6171875, "learning_rate": 7.78388980774093e-06, "loss": 1.5014, "step": 6642 }, { "epoch": 1.1460364012766324, "grad_norm": 0.578125, "learning_rate": 7.781235690059356e-06, "loss": 1.3385, "step": 6643 }, { "epoch": 1.1462089191753644, "grad_norm": 0.59765625, "learning_rate": 7.77858173675771e-06, "loss": 1.4659, "step": 6644 }, { "epoch": 1.1463814370740963, "grad_norm": 0.5859375, "learning_rate": 7.775927948032602e-06, "loss": 1.3328, "step": 6645 }, { "epoch": 1.1465539549728285, "grad_norm": 2.5625, "learning_rate": 7.773274324080655e-06, "loss": 1.4962, "step": 6646 }, { "epoch": 1.1467264728715605, "grad_norm": 0.66796875, "learning_rate": 7.770620865098455e-06, "loss": 1.3894, "step": 6647 }, { "epoch": 1.1468989907702924, "grad_norm": 0.62109375, "learning_rate": 7.767967571282595e-06, "loss": 1.4642, "step": 6648 }, { "epoch": 1.1470715086690244, "grad_norm": 0.66796875, "learning_rate": 7.76531444282964e-06, "loss": 1.4378, "step": 6649 }, { "epoch": 1.1472440265677564, "grad_norm": 0.76171875, "learning_rate": 7.762661479936157e-06, "loss": 1.4204, "step": 6650 }, { "epoch": 1.1474165444664883, "grad_norm": 0.5703125, "learning_rate": 7.760008682798687e-06, "loss": 1.5035, "step": 6651 }, { "epoch": 1.1475890623652205, "grad_norm": 0.671875, "learning_rate": 7.757356051613774e-06, "loss": 1.4768, "step": 6652 }, { "epoch": 1.1477615802639525, "grad_norm": 0.640625, "learning_rate": 7.754703586577935e-06, "loss": 1.4607, "step": 6653 }, { "epoch": 1.1479340981626844, "grad_norm": 0.5546875, "learning_rate": 7.752051287887685e-06, "loss": 1.3546, "step": 6654 }, { "epoch": 1.1481066160614164, "grad_norm": 0.5703125, "learning_rate": 7.74939915573952e-06, "loss": 1.3577, "step": 6655 }, { "epoch": 1.1482791339601484, "grad_norm": 0.59375, "learning_rate": 7.74674719032993e-06, "loss": 1.402, "step": 6656 }, { "epoch": 1.1484516518588803, "grad_norm": 0.5703125, "learning_rate": 7.744095391855386e-06, "loss": 1.4069, "step": 6657 }, { "epoch": 1.1486241697576123, "grad_norm": 0.55859375, "learning_rate": 7.741443760512348e-06, "loss": 1.4762, "step": 6658 }, { "epoch": 1.1487966876563442, "grad_norm": 0.6171875, "learning_rate": 7.738792296497272e-06, "loss": 1.416, "step": 6659 }, { "epoch": 1.1489692055550764, "grad_norm": 0.62890625, "learning_rate": 7.736141000006589e-06, "loss": 1.4218, "step": 6660 }, { "epoch": 1.1491417234538084, "grad_norm": 0.734375, "learning_rate": 7.73348987123673e-06, "loss": 1.4855, "step": 6661 }, { "epoch": 1.1493142413525403, "grad_norm": 0.62890625, "learning_rate": 7.730838910384098e-06, "loss": 1.4101, "step": 6662 }, { "epoch": 1.1494867592512723, "grad_norm": 0.70703125, "learning_rate": 7.728188117645103e-06, "loss": 1.3949, "step": 6663 }, { "epoch": 1.1496592771500043, "grad_norm": 0.72265625, "learning_rate": 7.725537493216125e-06, "loss": 1.3943, "step": 6664 }, { "epoch": 1.1498317950487362, "grad_norm": 0.5859375, "learning_rate": 7.72288703729354e-06, "loss": 1.3493, "step": 6665 }, { "epoch": 1.1500043129474684, "grad_norm": 1.0546875, "learning_rate": 7.72023675007371e-06, "loss": 1.3391, "step": 6666 }, { "epoch": 1.1501768308462004, "grad_norm": 0.609375, "learning_rate": 7.71758663175299e-06, "loss": 1.3756, "step": 6667 }, { "epoch": 1.1503493487449323, "grad_norm": 0.625, "learning_rate": 7.714936682527712e-06, "loss": 1.4111, "step": 6668 }, { "epoch": 1.1505218666436643, "grad_norm": 0.58203125, "learning_rate": 7.712286902594205e-06, "loss": 1.3056, "step": 6669 }, { "epoch": 1.1506943845423963, "grad_norm": 0.5859375, "learning_rate": 7.709637292148771e-06, "loss": 1.4415, "step": 6670 }, { "epoch": 1.1508669024411282, "grad_norm": 0.6171875, "learning_rate": 7.706987851387724e-06, "loss": 1.4628, "step": 6671 }, { "epoch": 1.1510394203398602, "grad_norm": 0.66015625, "learning_rate": 7.704338580507341e-06, "loss": 1.4112, "step": 6672 }, { "epoch": 1.1512119382385921, "grad_norm": 0.6171875, "learning_rate": 7.701689479703899e-06, "loss": 1.422, "step": 6673 }, { "epoch": 1.1513844561373243, "grad_norm": 0.75390625, "learning_rate": 7.699040549173664e-06, "loss": 1.4545, "step": 6674 }, { "epoch": 1.1515569740360563, "grad_norm": 0.5625, "learning_rate": 7.69639178911288e-06, "loss": 1.3825, "step": 6675 }, { "epoch": 1.1517294919347882, "grad_norm": 0.6875, "learning_rate": 7.693743199717789e-06, "loss": 1.4206, "step": 6676 }, { "epoch": 1.1519020098335202, "grad_norm": 0.5859375, "learning_rate": 7.691094781184608e-06, "loss": 1.4968, "step": 6677 }, { "epoch": 1.1520745277322522, "grad_norm": 0.59375, "learning_rate": 7.688446533709556e-06, "loss": 1.4569, "step": 6678 }, { "epoch": 1.1522470456309841, "grad_norm": 0.60546875, "learning_rate": 7.685798457488824e-06, "loss": 1.435, "step": 6679 }, { "epoch": 1.1524195635297163, "grad_norm": 0.60546875, "learning_rate": 7.683150552718608e-06, "loss": 1.4661, "step": 6680 }, { "epoch": 1.1525920814284483, "grad_norm": 0.60546875, "learning_rate": 7.680502819595067e-06, "loss": 1.3847, "step": 6681 }, { "epoch": 1.1527645993271802, "grad_norm": 0.62890625, "learning_rate": 7.677855258314378e-06, "loss": 1.5205, "step": 6682 }, { "epoch": 1.1529371172259122, "grad_norm": 0.63671875, "learning_rate": 7.675207869072675e-06, "loss": 1.4652, "step": 6683 }, { "epoch": 1.1531096351246442, "grad_norm": 0.5546875, "learning_rate": 7.672560652066104e-06, "loss": 1.3736, "step": 6684 }, { "epoch": 1.153282153023376, "grad_norm": 0.578125, "learning_rate": 7.66991360749078e-06, "loss": 1.4111, "step": 6685 }, { "epoch": 1.153454670922108, "grad_norm": 0.60546875, "learning_rate": 7.667266735542816e-06, "loss": 1.4514, "step": 6686 }, { "epoch": 1.1536271888208403, "grad_norm": 0.6875, "learning_rate": 7.664620036418304e-06, "loss": 1.3545, "step": 6687 }, { "epoch": 1.1537997067195722, "grad_norm": 0.65234375, "learning_rate": 7.661973510313336e-06, "loss": 1.2812, "step": 6688 }, { "epoch": 1.1539722246183042, "grad_norm": 0.8359375, "learning_rate": 7.659327157423977e-06, "loss": 1.4696, "step": 6689 }, { "epoch": 1.1541447425170361, "grad_norm": 0.62109375, "learning_rate": 7.656680977946286e-06, "loss": 1.4847, "step": 6690 }, { "epoch": 1.154317260415768, "grad_norm": 0.66796875, "learning_rate": 7.654034972076314e-06, "loss": 1.3304, "step": 6691 }, { "epoch": 1.1544897783145, "grad_norm": 0.59375, "learning_rate": 7.651389140010087e-06, "loss": 1.3784, "step": 6692 }, { "epoch": 1.1546622962132322, "grad_norm": 0.78125, "learning_rate": 7.648743481943628e-06, "loss": 1.4342, "step": 6693 }, { "epoch": 1.1548348141119642, "grad_norm": 0.609375, "learning_rate": 7.646097998072941e-06, "loss": 1.4824, "step": 6694 }, { "epoch": 1.1550073320106962, "grad_norm": 0.61328125, "learning_rate": 7.643452688594026e-06, "loss": 1.3911, "step": 6695 }, { "epoch": 1.1551798499094281, "grad_norm": 0.578125, "learning_rate": 7.640807553702858e-06, "loss": 1.3827, "step": 6696 }, { "epoch": 1.15535236780816, "grad_norm": 0.71484375, "learning_rate": 7.63816259359541e-06, "loss": 1.3959, "step": 6697 }, { "epoch": 1.155524885706892, "grad_norm": 0.58984375, "learning_rate": 7.63551780846763e-06, "loss": 1.4016, "step": 6698 }, { "epoch": 1.155697403605624, "grad_norm": 0.62890625, "learning_rate": 7.632873198515468e-06, "loss": 1.4195, "step": 6699 }, { "epoch": 1.155869921504356, "grad_norm": 0.5625, "learning_rate": 7.630228763934848e-06, "loss": 1.396, "step": 6700 }, { "epoch": 1.155869921504356, "eval_loss": 1.408650517463684, "eval_runtime": 10.8332, "eval_samples_per_second": 94.525, "eval_steps_per_second": 23.631, "step": 6700 }, { "epoch": 1.1560424394030882, "grad_norm": 0.63671875, "learning_rate": 7.6275845049216914e-06, "loss": 1.4485, "step": 6701 }, { "epoch": 1.1562149573018201, "grad_norm": 0.5859375, "learning_rate": 7.624940421671893e-06, "loss": 1.4597, "step": 6702 }, { "epoch": 1.156387475200552, "grad_norm": 0.72265625, "learning_rate": 7.622296514381353e-06, "loss": 1.4152, "step": 6703 }, { "epoch": 1.156559993099284, "grad_norm": 0.5859375, "learning_rate": 7.619652783245941e-06, "loss": 1.4197, "step": 6704 }, { "epoch": 1.156732510998016, "grad_norm": 0.66796875, "learning_rate": 7.617009228461527e-06, "loss": 1.4941, "step": 6705 }, { "epoch": 1.156905028896748, "grad_norm": 0.60546875, "learning_rate": 7.6143658502239546e-06, "loss": 1.4696, "step": 6706 }, { "epoch": 1.1570775467954801, "grad_norm": 0.640625, "learning_rate": 7.611722648729065e-06, "loss": 1.3503, "step": 6707 }, { "epoch": 1.157250064694212, "grad_norm": 0.59765625, "learning_rate": 7.609079624172692e-06, "loss": 1.4071, "step": 6708 }, { "epoch": 1.157422582592944, "grad_norm": 0.609375, "learning_rate": 7.606436776750632e-06, "loss": 1.3535, "step": 6709 }, { "epoch": 1.157595100491676, "grad_norm": 0.6171875, "learning_rate": 7.603794106658696e-06, "loss": 1.515, "step": 6710 }, { "epoch": 1.157767618390408, "grad_norm": 0.59375, "learning_rate": 7.601151614092661e-06, "loss": 1.4618, "step": 6711 }, { "epoch": 1.15794013628914, "grad_norm": 0.6015625, "learning_rate": 7.598509299248307e-06, "loss": 1.4592, "step": 6712 }, { "epoch": 1.158112654187872, "grad_norm": 0.5703125, "learning_rate": 7.595867162321388e-06, "loss": 1.4808, "step": 6713 }, { "epoch": 1.1582851720866039, "grad_norm": 0.8125, "learning_rate": 7.593225203507652e-06, "loss": 1.3933, "step": 6714 }, { "epoch": 1.158457689985336, "grad_norm": 0.6171875, "learning_rate": 7.590583423002828e-06, "loss": 1.5316, "step": 6715 }, { "epoch": 1.158630207884068, "grad_norm": 0.6640625, "learning_rate": 7.5879418210026425e-06, "loss": 1.4468, "step": 6716 }, { "epoch": 1.1588027257828, "grad_norm": 0.6875, "learning_rate": 7.585300397702795e-06, "loss": 1.4647, "step": 6717 }, { "epoch": 1.158975243681532, "grad_norm": 0.6015625, "learning_rate": 7.5826591532989855e-06, "loss": 1.4042, "step": 6718 }, { "epoch": 1.159147761580264, "grad_norm": 0.5625, "learning_rate": 7.580018087986886e-06, "loss": 1.4184, "step": 6719 }, { "epoch": 1.1593202794789959, "grad_norm": 0.60546875, "learning_rate": 7.57737720196217e-06, "loss": 1.5034, "step": 6720 }, { "epoch": 1.159492797377728, "grad_norm": 0.7109375, "learning_rate": 7.574736495420487e-06, "loss": 1.4559, "step": 6721 }, { "epoch": 1.15966531527646, "grad_norm": 0.66015625, "learning_rate": 7.572095968557476e-06, "loss": 1.4681, "step": 6722 }, { "epoch": 1.159837833175192, "grad_norm": 0.74609375, "learning_rate": 7.56945562156877e-06, "loss": 1.4418, "step": 6723 }, { "epoch": 1.160010351073924, "grad_norm": 0.625, "learning_rate": 7.566815454649976e-06, "loss": 1.3457, "step": 6724 }, { "epoch": 1.1601828689726559, "grad_norm": 0.61328125, "learning_rate": 7.5641754679967e-06, "loss": 1.3257, "step": 6725 }, { "epoch": 1.1603553868713878, "grad_norm": 0.68359375, "learning_rate": 7.561535661804519e-06, "loss": 1.4105, "step": 6726 }, { "epoch": 1.1605279047701198, "grad_norm": 0.75390625, "learning_rate": 7.558896036269017e-06, "loss": 1.4474, "step": 6727 }, { "epoch": 1.160700422668852, "grad_norm": 0.6328125, "learning_rate": 7.556256591585747e-06, "loss": 1.4649, "step": 6728 }, { "epoch": 1.160872940567584, "grad_norm": 0.625, "learning_rate": 7.5536173279502615e-06, "loss": 1.423, "step": 6729 }, { "epoch": 1.161045458466316, "grad_norm": 0.5625, "learning_rate": 7.550978245558084e-06, "loss": 1.4107, "step": 6730 }, { "epoch": 1.1612179763650479, "grad_norm": 0.59765625, "learning_rate": 7.548339344604745e-06, "loss": 1.4978, "step": 6731 }, { "epoch": 1.1613904942637798, "grad_norm": 0.66015625, "learning_rate": 7.5457006252857445e-06, "loss": 1.5156, "step": 6732 }, { "epoch": 1.1615630121625118, "grad_norm": 0.6328125, "learning_rate": 7.543062087796579e-06, "loss": 1.3659, "step": 6733 }, { "epoch": 1.161735530061244, "grad_norm": 0.6015625, "learning_rate": 7.540423732332721e-06, "loss": 1.4551, "step": 6734 }, { "epoch": 1.161908047959976, "grad_norm": 0.59375, "learning_rate": 7.537785559089646e-06, "loss": 1.3231, "step": 6735 }, { "epoch": 1.162080565858708, "grad_norm": 0.5859375, "learning_rate": 7.535147568262799e-06, "loss": 1.4627, "step": 6736 }, { "epoch": 1.1622530837574399, "grad_norm": 0.62890625, "learning_rate": 7.532509760047621e-06, "loss": 1.4135, "step": 6737 }, { "epoch": 1.1624256016561718, "grad_norm": 0.59765625, "learning_rate": 7.529872134639537e-06, "loss": 1.3578, "step": 6738 }, { "epoch": 1.1625981195549038, "grad_norm": 0.65625, "learning_rate": 7.527234692233957e-06, "loss": 1.3597, "step": 6739 }, { "epoch": 1.1627706374536357, "grad_norm": 0.58203125, "learning_rate": 7.524597433026286e-06, "loss": 1.3543, "step": 6740 }, { "epoch": 1.1629431553523677, "grad_norm": 0.6328125, "learning_rate": 7.521960357211904e-06, "loss": 1.4293, "step": 6741 }, { "epoch": 1.1631156732510999, "grad_norm": 0.62109375, "learning_rate": 7.519323464986182e-06, "loss": 1.4101, "step": 6742 }, { "epoch": 1.1632881911498318, "grad_norm": 1.1640625, "learning_rate": 7.516686756544475e-06, "loss": 1.3559, "step": 6743 }, { "epoch": 1.1634607090485638, "grad_norm": 0.66796875, "learning_rate": 7.514050232082133e-06, "loss": 1.5265, "step": 6744 }, { "epoch": 1.1636332269472958, "grad_norm": 0.63671875, "learning_rate": 7.511413891794482e-06, "loss": 1.4018, "step": 6745 }, { "epoch": 1.1638057448460277, "grad_norm": 0.65234375, "learning_rate": 7.508777735876839e-06, "loss": 1.4592, "step": 6746 }, { "epoch": 1.1639782627447597, "grad_norm": 0.61328125, "learning_rate": 7.5061417645245034e-06, "loss": 1.3904, "step": 6747 }, { "epoch": 1.1641507806434919, "grad_norm": 0.5703125, "learning_rate": 7.503505977932775e-06, "loss": 1.4482, "step": 6748 }, { "epoch": 1.1643232985422238, "grad_norm": 0.640625, "learning_rate": 7.500870376296918e-06, "loss": 1.4214, "step": 6749 }, { "epoch": 1.1644958164409558, "grad_norm": 0.62890625, "learning_rate": 7.4982349598122e-06, "loss": 1.3156, "step": 6750 }, { "epoch": 1.1646683343396877, "grad_norm": 0.6484375, "learning_rate": 7.495599728673867e-06, "loss": 1.3671, "step": 6751 }, { "epoch": 1.1648408522384197, "grad_norm": 0.6328125, "learning_rate": 7.492964683077156e-06, "loss": 1.4576, "step": 6752 }, { "epoch": 1.1650133701371517, "grad_norm": 0.58203125, "learning_rate": 7.490329823217286e-06, "loss": 1.321, "step": 6753 }, { "epoch": 1.1651858880358836, "grad_norm": 0.5859375, "learning_rate": 7.48769514928946e-06, "loss": 1.3814, "step": 6754 }, { "epoch": 1.1653584059346156, "grad_norm": 0.62109375, "learning_rate": 7.485060661488879e-06, "loss": 1.4575, "step": 6755 }, { "epoch": 1.1655309238333478, "grad_norm": 0.65625, "learning_rate": 7.482426360010717e-06, "loss": 1.442, "step": 6756 }, { "epoch": 1.1657034417320797, "grad_norm": 0.58203125, "learning_rate": 7.479792245050142e-06, "loss": 1.4058, "step": 6757 }, { "epoch": 1.1658759596308117, "grad_norm": 0.6171875, "learning_rate": 7.477158316802302e-06, "loss": 1.4523, "step": 6758 }, { "epoch": 1.1660484775295437, "grad_norm": 0.66015625, "learning_rate": 7.474524575462341e-06, "loss": 1.4831, "step": 6759 }, { "epoch": 1.1662209954282756, "grad_norm": 0.671875, "learning_rate": 7.471891021225376e-06, "loss": 1.5114, "step": 6760 }, { "epoch": 1.1663935133270076, "grad_norm": 0.62890625, "learning_rate": 7.469257654286524e-06, "loss": 1.4559, "step": 6761 }, { "epoch": 1.1665660312257398, "grad_norm": 0.53515625, "learning_rate": 7.466624474840872e-06, "loss": 1.3233, "step": 6762 }, { "epoch": 1.1667385491244717, "grad_norm": 0.578125, "learning_rate": 7.463991483083512e-06, "loss": 1.432, "step": 6763 }, { "epoch": 1.1669110670232037, "grad_norm": 0.8203125, "learning_rate": 7.461358679209509e-06, "loss": 1.3795, "step": 6764 }, { "epoch": 1.1670835849219356, "grad_norm": 0.609375, "learning_rate": 7.458726063413918e-06, "loss": 1.2887, "step": 6765 }, { "epoch": 1.1672561028206676, "grad_norm": 0.55859375, "learning_rate": 7.4560936358917745e-06, "loss": 1.4637, "step": 6766 }, { "epoch": 1.1674286207193996, "grad_norm": 0.5703125, "learning_rate": 7.4534613968381146e-06, "loss": 1.4384, "step": 6767 }, { "epoch": 1.1676011386181315, "grad_norm": 0.58984375, "learning_rate": 7.450829346447941e-06, "loss": 1.4334, "step": 6768 }, { "epoch": 1.1677736565168637, "grad_norm": 0.5703125, "learning_rate": 7.448197484916264e-06, "loss": 1.4752, "step": 6769 }, { "epoch": 1.1679461744155957, "grad_norm": 0.61328125, "learning_rate": 7.4455658124380545e-06, "loss": 1.3971, "step": 6770 }, { "epoch": 1.1681186923143276, "grad_norm": 0.67578125, "learning_rate": 7.442934329208291e-06, "loss": 1.457, "step": 6771 }, { "epoch": 1.1682912102130596, "grad_norm": 0.5625, "learning_rate": 7.440303035421934e-06, "loss": 1.3178, "step": 6772 }, { "epoch": 1.1684637281117916, "grad_norm": 0.59375, "learning_rate": 7.43767193127392e-06, "loss": 1.4777, "step": 6773 }, { "epoch": 1.1686362460105235, "grad_norm": 0.60546875, "learning_rate": 7.4350410169591815e-06, "loss": 1.3621, "step": 6774 }, { "epoch": 1.1688087639092557, "grad_norm": 0.59765625, "learning_rate": 7.432410292672627e-06, "loss": 1.3563, "step": 6775 }, { "epoch": 1.1689812818079877, "grad_norm": 0.65625, "learning_rate": 7.429779758609165e-06, "loss": 1.3639, "step": 6776 }, { "epoch": 1.1691537997067196, "grad_norm": 0.6171875, "learning_rate": 7.427149414963676e-06, "loss": 1.4728, "step": 6777 }, { "epoch": 1.1693263176054516, "grad_norm": 0.5625, "learning_rate": 7.424519261931036e-06, "loss": 1.426, "step": 6778 }, { "epoch": 1.1694988355041835, "grad_norm": 0.76171875, "learning_rate": 7.421889299706098e-06, "loss": 1.4206, "step": 6779 }, { "epoch": 1.1696713534029155, "grad_norm": 0.6484375, "learning_rate": 7.419259528483713e-06, "loss": 1.4275, "step": 6780 }, { "epoch": 1.1698438713016475, "grad_norm": 0.6015625, "learning_rate": 7.416629948458705e-06, "loss": 1.5017, "step": 6781 }, { "epoch": 1.1700163892003794, "grad_norm": 0.74609375, "learning_rate": 7.414000559825893e-06, "loss": 1.3737, "step": 6782 }, { "epoch": 1.1701889070991116, "grad_norm": 0.60546875, "learning_rate": 7.411371362780076e-06, "loss": 1.4951, "step": 6783 }, { "epoch": 1.1703614249978436, "grad_norm": 0.66796875, "learning_rate": 7.408742357516046e-06, "loss": 1.4681, "step": 6784 }, { "epoch": 1.1705339428965755, "grad_norm": 0.58984375, "learning_rate": 7.406113544228571e-06, "loss": 1.4076, "step": 6785 }, { "epoch": 1.1707064607953075, "grad_norm": 0.6640625, "learning_rate": 7.40348492311241e-06, "loss": 1.5682, "step": 6786 }, { "epoch": 1.1708789786940395, "grad_norm": 0.640625, "learning_rate": 7.400856494362314e-06, "loss": 1.4475, "step": 6787 }, { "epoch": 1.1710514965927714, "grad_norm": 0.65234375, "learning_rate": 7.398228258173006e-06, "loss": 1.3377, "step": 6788 }, { "epoch": 1.1712240144915036, "grad_norm": 0.56640625, "learning_rate": 7.395600214739209e-06, "loss": 1.4687, "step": 6789 }, { "epoch": 1.1713965323902356, "grad_norm": 0.59765625, "learning_rate": 7.392972364255615e-06, "loss": 1.4127, "step": 6790 }, { "epoch": 1.1715690502889675, "grad_norm": 0.578125, "learning_rate": 7.390344706916923e-06, "loss": 1.4401, "step": 6791 }, { "epoch": 1.1717415681876995, "grad_norm": 0.62109375, "learning_rate": 7.3877172429178e-06, "loss": 1.4409, "step": 6792 }, { "epoch": 1.1719140860864314, "grad_norm": 0.59375, "learning_rate": 7.385089972452907e-06, "loss": 1.3877, "step": 6793 }, { "epoch": 1.1720866039851634, "grad_norm": 0.63671875, "learning_rate": 7.382462895716882e-06, "loss": 1.3832, "step": 6794 }, { "epoch": 1.1722591218838954, "grad_norm": 0.62109375, "learning_rate": 7.379836012904367e-06, "loss": 1.3738, "step": 6795 }, { "epoch": 1.1724316397826275, "grad_norm": 0.59765625, "learning_rate": 7.377209324209968e-06, "loss": 1.4374, "step": 6796 }, { "epoch": 1.1726041576813595, "grad_norm": 0.58984375, "learning_rate": 7.374582829828294e-06, "loss": 1.3218, "step": 6797 }, { "epoch": 1.1727766755800915, "grad_norm": 0.6171875, "learning_rate": 7.3719565299539235e-06, "loss": 1.457, "step": 6798 }, { "epoch": 1.1729491934788234, "grad_norm": 0.62109375, "learning_rate": 7.369330424781438e-06, "loss": 1.4775, "step": 6799 }, { "epoch": 1.1731217113775554, "grad_norm": 0.546875, "learning_rate": 7.3667045145053875e-06, "loss": 1.3732, "step": 6800 }, { "epoch": 1.1731217113775554, "eval_loss": 1.4084804058074951, "eval_runtime": 10.8179, "eval_samples_per_second": 94.658, "eval_steps_per_second": 23.665, "step": 6800 }, { "epoch": 1.1732942292762873, "grad_norm": 0.61328125, "learning_rate": 7.364078799320324e-06, "loss": 1.3311, "step": 6801 }, { "epoch": 1.1734667471750195, "grad_norm": 0.56640625, "learning_rate": 7.3614532794207714e-06, "loss": 1.3944, "step": 6802 }, { "epoch": 1.1736392650737515, "grad_norm": 0.6796875, "learning_rate": 7.358827955001244e-06, "loss": 1.3655, "step": 6803 }, { "epoch": 1.1738117829724835, "grad_norm": 0.5859375, "learning_rate": 7.35620282625625e-06, "loss": 1.3973, "step": 6804 }, { "epoch": 1.1739843008712154, "grad_norm": 0.60546875, "learning_rate": 7.353577893380266e-06, "loss": 1.4846, "step": 6805 }, { "epoch": 1.1741568187699474, "grad_norm": 0.95703125, "learning_rate": 7.350953156567771e-06, "loss": 1.422, "step": 6806 }, { "epoch": 1.1743293366686793, "grad_norm": 0.640625, "learning_rate": 7.348328616013213e-06, "loss": 1.3805, "step": 6807 }, { "epoch": 1.1745018545674113, "grad_norm": 0.59375, "learning_rate": 7.345704271911043e-06, "loss": 1.427, "step": 6808 }, { "epoch": 1.1746743724661433, "grad_norm": 0.6015625, "learning_rate": 7.343080124455684e-06, "loss": 1.4001, "step": 6809 }, { "epoch": 1.1748468903648754, "grad_norm": 0.6328125, "learning_rate": 7.340456173841552e-06, "loss": 1.468, "step": 6810 }, { "epoch": 1.1750194082636074, "grad_norm": 0.5859375, "learning_rate": 7.337832420263042e-06, "loss": 1.4489, "step": 6811 }, { "epoch": 1.1751919261623394, "grad_norm": 0.62109375, "learning_rate": 7.3352088639145425e-06, "loss": 1.4517, "step": 6812 }, { "epoch": 1.1753644440610713, "grad_norm": 0.6640625, "learning_rate": 7.332585504990419e-06, "loss": 1.3657, "step": 6813 }, { "epoch": 1.1755369619598033, "grad_norm": 0.59375, "learning_rate": 7.329962343685031e-06, "loss": 1.336, "step": 6814 }, { "epoch": 1.1757094798585352, "grad_norm": 0.57421875, "learning_rate": 7.327339380192712e-06, "loss": 1.3534, "step": 6815 }, { "epoch": 1.1758819977572674, "grad_norm": 0.61328125, "learning_rate": 7.324716614707794e-06, "loss": 1.5315, "step": 6816 }, { "epoch": 1.1760545156559994, "grad_norm": 0.69921875, "learning_rate": 7.322094047424584e-06, "loss": 1.4136, "step": 6817 }, { "epoch": 1.1762270335547313, "grad_norm": 0.5703125, "learning_rate": 7.319471678537376e-06, "loss": 1.4698, "step": 6818 }, { "epoch": 1.1763995514534633, "grad_norm": 0.59765625, "learning_rate": 7.31684950824046e-06, "loss": 1.3532, "step": 6819 }, { "epoch": 1.1765720693521953, "grad_norm": 0.6796875, "learning_rate": 7.314227536728096e-06, "loss": 1.4208, "step": 6820 }, { "epoch": 1.1767445872509272, "grad_norm": 0.57421875, "learning_rate": 7.311605764194538e-06, "loss": 1.4014, "step": 6821 }, { "epoch": 1.1769171051496592, "grad_norm": 0.546875, "learning_rate": 7.308984190834019e-06, "loss": 1.4367, "step": 6822 }, { "epoch": 1.1770896230483912, "grad_norm": 0.62890625, "learning_rate": 7.306362816840771e-06, "loss": 1.4118, "step": 6823 }, { "epoch": 1.1772621409471233, "grad_norm": 0.57421875, "learning_rate": 7.3037416424089925e-06, "loss": 1.4064, "step": 6824 }, { "epoch": 1.1774346588458553, "grad_norm": 0.59375, "learning_rate": 7.301120667732884e-06, "loss": 1.4058, "step": 6825 }, { "epoch": 1.1776071767445873, "grad_norm": 0.5859375, "learning_rate": 7.2984998930066145e-06, "loss": 1.5118, "step": 6826 }, { "epoch": 1.1777796946433192, "grad_norm": 0.6171875, "learning_rate": 7.295879318424356e-06, "loss": 1.559, "step": 6827 }, { "epoch": 1.1779522125420512, "grad_norm": 0.62109375, "learning_rate": 7.2932589441802506e-06, "loss": 1.4733, "step": 6828 }, { "epoch": 1.1781247304407831, "grad_norm": 0.6328125, "learning_rate": 7.290638770468439e-06, "loss": 1.412, "step": 6829 }, { "epoch": 1.1782972483395153, "grad_norm": 0.56640625, "learning_rate": 7.288018797483034e-06, "loss": 1.3695, "step": 6830 }, { "epoch": 1.1784697662382473, "grad_norm": 0.98828125, "learning_rate": 7.285399025418144e-06, "loss": 1.4087, "step": 6831 }, { "epoch": 1.1786422841369792, "grad_norm": 0.57421875, "learning_rate": 7.282779454467851e-06, "loss": 1.4296, "step": 6832 }, { "epoch": 1.1788148020357112, "grad_norm": 0.640625, "learning_rate": 7.280160084826239e-06, "loss": 1.4193, "step": 6833 }, { "epoch": 1.1789873199344432, "grad_norm": 0.65625, "learning_rate": 7.2775409166873604e-06, "loss": 1.4459, "step": 6834 }, { "epoch": 1.1791598378331751, "grad_norm": 0.609375, "learning_rate": 7.274921950245258e-06, "loss": 1.4489, "step": 6835 }, { "epoch": 1.179332355731907, "grad_norm": 0.578125, "learning_rate": 7.27230318569397e-06, "loss": 1.3256, "step": 6836 }, { "epoch": 1.1795048736306393, "grad_norm": 0.53515625, "learning_rate": 7.269684623227502e-06, "loss": 1.3541, "step": 6837 }, { "epoch": 1.1796773915293712, "grad_norm": 0.63671875, "learning_rate": 7.267066263039862e-06, "loss": 1.4544, "step": 6838 }, { "epoch": 1.1798499094281032, "grad_norm": 0.55859375, "learning_rate": 7.2644481053250215e-06, "loss": 1.3666, "step": 6839 }, { "epoch": 1.1800224273268352, "grad_norm": 0.625, "learning_rate": 7.261830150276964e-06, "loss": 1.3835, "step": 6840 }, { "epoch": 1.1801949452255671, "grad_norm": 0.5703125, "learning_rate": 7.259212398089636e-06, "loss": 1.4254, "step": 6841 }, { "epoch": 1.180367463124299, "grad_norm": 0.8359375, "learning_rate": 7.25659484895698e-06, "loss": 1.4535, "step": 6842 }, { "epoch": 1.1805399810230313, "grad_norm": 0.5859375, "learning_rate": 7.253977503072916e-06, "loss": 1.3889, "step": 6843 }, { "epoch": 1.1807124989217632, "grad_norm": 0.60546875, "learning_rate": 7.251360360631359e-06, "loss": 1.4511, "step": 6844 }, { "epoch": 1.1808850168204952, "grad_norm": 0.66015625, "learning_rate": 7.2487434218262e-06, "loss": 1.4949, "step": 6845 }, { "epoch": 1.1810575347192271, "grad_norm": 0.6171875, "learning_rate": 7.24612668685132e-06, "loss": 1.4106, "step": 6846 }, { "epoch": 1.181230052617959, "grad_norm": 0.59375, "learning_rate": 7.24351015590058e-06, "loss": 1.4674, "step": 6847 }, { "epoch": 1.181402570516691, "grad_norm": 0.58984375, "learning_rate": 7.240893829167834e-06, "loss": 1.4853, "step": 6848 }, { "epoch": 1.181575088415423, "grad_norm": 0.59765625, "learning_rate": 7.23827770684691e-06, "loss": 1.4398, "step": 6849 }, { "epoch": 1.181747606314155, "grad_norm": 0.6015625, "learning_rate": 7.2356617891316275e-06, "loss": 1.4522, "step": 6850 }, { "epoch": 1.1819201242128872, "grad_norm": 0.66015625, "learning_rate": 7.233046076215798e-06, "loss": 1.5239, "step": 6851 }, { "epoch": 1.1820926421116191, "grad_norm": 0.59375, "learning_rate": 7.230430568293199e-06, "loss": 1.5447, "step": 6852 }, { "epoch": 1.182265160010351, "grad_norm": 0.6484375, "learning_rate": 7.227815265557611e-06, "loss": 1.4014, "step": 6853 }, { "epoch": 1.182437677909083, "grad_norm": 0.65234375, "learning_rate": 7.225200168202789e-06, "loss": 1.4451, "step": 6854 }, { "epoch": 1.182610195807815, "grad_norm": 0.58203125, "learning_rate": 7.222585276422477e-06, "loss": 1.4846, "step": 6855 }, { "epoch": 1.182782713706547, "grad_norm": 0.59765625, "learning_rate": 7.219970590410399e-06, "loss": 1.4114, "step": 6856 }, { "epoch": 1.1829552316052792, "grad_norm": 0.59765625, "learning_rate": 7.217356110360275e-06, "loss": 1.4652, "step": 6857 }, { "epoch": 1.1831277495040111, "grad_norm": 0.6484375, "learning_rate": 7.214741836465793e-06, "loss": 1.4706, "step": 6858 }, { "epoch": 1.183300267402743, "grad_norm": 0.62109375, "learning_rate": 7.21212776892064e-06, "loss": 1.4584, "step": 6859 }, { "epoch": 1.183472785301475, "grad_norm": 0.59375, "learning_rate": 7.209513907918479e-06, "loss": 1.4025, "step": 6860 }, { "epoch": 1.183645303200207, "grad_norm": 0.61328125, "learning_rate": 7.206900253652969e-06, "loss": 1.3999, "step": 6861 }, { "epoch": 1.183817821098939, "grad_norm": 0.59765625, "learning_rate": 7.204286806317736e-06, "loss": 1.3659, "step": 6862 }, { "epoch": 1.183990338997671, "grad_norm": 0.63671875, "learning_rate": 7.2016735661064086e-06, "loss": 1.4903, "step": 6863 }, { "epoch": 1.1841628568964029, "grad_norm": 0.65234375, "learning_rate": 7.1990605332125825e-06, "loss": 1.4975, "step": 6864 }, { "epoch": 1.184335374795135, "grad_norm": 0.6171875, "learning_rate": 7.1964477078298574e-06, "loss": 1.3395, "step": 6865 }, { "epoch": 1.184507892693867, "grad_norm": 0.640625, "learning_rate": 7.193835090151803e-06, "loss": 1.3884, "step": 6866 }, { "epoch": 1.184680410592599, "grad_norm": 0.6015625, "learning_rate": 7.191222680371975e-06, "loss": 1.3982, "step": 6867 }, { "epoch": 1.184852928491331, "grad_norm": 0.58984375, "learning_rate": 7.188610478683926e-06, "loss": 1.3685, "step": 6868 }, { "epoch": 1.185025446390063, "grad_norm": 0.5625, "learning_rate": 7.1859984852811775e-06, "loss": 1.4035, "step": 6869 }, { "epoch": 1.1851979642887949, "grad_norm": 0.65234375, "learning_rate": 7.183386700357245e-06, "loss": 1.3266, "step": 6870 }, { "epoch": 1.185370482187527, "grad_norm": 0.69140625, "learning_rate": 7.1807751241056215e-06, "loss": 1.4277, "step": 6871 }, { "epoch": 1.185543000086259, "grad_norm": 0.57421875, "learning_rate": 7.178163756719795e-06, "loss": 1.3906, "step": 6872 }, { "epoch": 1.185715517984991, "grad_norm": 0.6171875, "learning_rate": 7.175552598393227e-06, "loss": 1.4967, "step": 6873 }, { "epoch": 1.185888035883723, "grad_norm": 0.65234375, "learning_rate": 7.172941649319374e-06, "loss": 1.3458, "step": 6874 }, { "epoch": 1.186060553782455, "grad_norm": 0.59765625, "learning_rate": 7.1703309096916625e-06, "loss": 1.4214, "step": 6875 }, { "epoch": 1.1862330716811869, "grad_norm": 0.55078125, "learning_rate": 7.167720379703522e-06, "loss": 1.3223, "step": 6876 }, { "epoch": 1.1864055895799188, "grad_norm": 0.6015625, "learning_rate": 7.165110059548353e-06, "loss": 1.4065, "step": 6877 }, { "epoch": 1.186578107478651, "grad_norm": 0.7109375, "learning_rate": 7.162499949419543e-06, "loss": 1.4116, "step": 6878 }, { "epoch": 1.186750625377383, "grad_norm": 0.59375, "learning_rate": 7.159890049510463e-06, "loss": 1.431, "step": 6879 }, { "epoch": 1.186923143276115, "grad_norm": 0.64453125, "learning_rate": 7.157280360014478e-06, "loss": 1.3818, "step": 6880 }, { "epoch": 1.1870956611748469, "grad_norm": 0.5859375, "learning_rate": 7.154670881124925e-06, "loss": 1.4078, "step": 6881 }, { "epoch": 1.1872681790735788, "grad_norm": 0.59375, "learning_rate": 7.152061613035128e-06, "loss": 1.3175, "step": 6882 }, { "epoch": 1.1874406969723108, "grad_norm": 0.56640625, "learning_rate": 7.149452555938407e-06, "loss": 1.4258, "step": 6883 }, { "epoch": 1.187613214871043, "grad_norm": 0.55859375, "learning_rate": 7.146843710028049e-06, "loss": 1.3981, "step": 6884 }, { "epoch": 1.187785732769775, "grad_norm": 0.609375, "learning_rate": 7.144235075497339e-06, "loss": 1.4337, "step": 6885 }, { "epoch": 1.187958250668507, "grad_norm": 0.56640625, "learning_rate": 7.141626652539533e-06, "loss": 1.4078, "step": 6886 }, { "epoch": 1.1881307685672389, "grad_norm": 0.6015625, "learning_rate": 7.139018441347889e-06, "loss": 1.3799, "step": 6887 }, { "epoch": 1.1883032864659708, "grad_norm": 0.7109375, "learning_rate": 7.136410442115631e-06, "loss": 1.4632, "step": 6888 }, { "epoch": 1.1884758043647028, "grad_norm": 0.62890625, "learning_rate": 7.133802655035984e-06, "loss": 1.4898, "step": 6889 }, { "epoch": 1.1886483222634348, "grad_norm": 0.5859375, "learning_rate": 7.131195080302144e-06, "loss": 1.4195, "step": 6890 }, { "epoch": 1.1888208401621667, "grad_norm": 0.5625, "learning_rate": 7.128587718107298e-06, "loss": 1.4984, "step": 6891 }, { "epoch": 1.188993358060899, "grad_norm": 0.63671875, "learning_rate": 7.125980568644612e-06, "loss": 1.4518, "step": 6892 }, { "epoch": 1.1891658759596309, "grad_norm": 0.6328125, "learning_rate": 7.123373632107247e-06, "loss": 1.4977, "step": 6893 }, { "epoch": 1.1893383938583628, "grad_norm": 0.6328125, "learning_rate": 7.1207669086883366e-06, "loss": 1.3991, "step": 6894 }, { "epoch": 1.1895109117570948, "grad_norm": 0.53125, "learning_rate": 7.118160398581004e-06, "loss": 1.3576, "step": 6895 }, { "epoch": 1.1896834296558267, "grad_norm": 0.60546875, "learning_rate": 7.115554101978354e-06, "loss": 1.4036, "step": 6896 }, { "epoch": 1.1898559475545587, "grad_norm": 0.6953125, "learning_rate": 7.112948019073481e-06, "loss": 1.3978, "step": 6897 }, { "epoch": 1.1900284654532909, "grad_norm": 0.63671875, "learning_rate": 7.110342150059457e-06, "loss": 1.4511, "step": 6898 }, { "epoch": 1.1902009833520228, "grad_norm": 0.65625, "learning_rate": 7.107736495129338e-06, "loss": 1.4181, "step": 6899 }, { "epoch": 1.1903735012507548, "grad_norm": 0.5625, "learning_rate": 7.1051310544761775e-06, "loss": 1.3462, "step": 6900 }, { "epoch": 1.1903735012507548, "eval_loss": 1.408332109451294, "eval_runtime": 10.8573, "eval_samples_per_second": 94.315, "eval_steps_per_second": 23.579, "step": 6900 }, { "epoch": 1.1905460191494868, "grad_norm": 0.62109375, "learning_rate": 7.102525828292993e-06, "loss": 1.5048, "step": 6901 }, { "epoch": 1.1907185370482187, "grad_norm": 0.62109375, "learning_rate": 7.099920816772803e-06, "loss": 1.4587, "step": 6902 }, { "epoch": 1.1908910549469507, "grad_norm": 0.59765625, "learning_rate": 7.097316020108594e-06, "loss": 1.4227, "step": 6903 }, { "epoch": 1.1910635728456826, "grad_norm": 0.5703125, "learning_rate": 7.094711438493355e-06, "loss": 1.349, "step": 6904 }, { "epoch": 1.1912360907444146, "grad_norm": 0.5703125, "learning_rate": 7.0921070721200445e-06, "loss": 1.4222, "step": 6905 }, { "epoch": 1.1914086086431468, "grad_norm": 0.89453125, "learning_rate": 7.089502921181613e-06, "loss": 1.3994, "step": 6906 }, { "epoch": 1.1915811265418788, "grad_norm": 0.63671875, "learning_rate": 7.086898985870987e-06, "loss": 1.4528, "step": 6907 }, { "epoch": 1.1917536444406107, "grad_norm": 0.5703125, "learning_rate": 7.084295266381089e-06, "loss": 1.4027, "step": 6908 }, { "epoch": 1.1919261623393427, "grad_norm": 0.55859375, "learning_rate": 7.081691762904814e-06, "loss": 1.4115, "step": 6909 }, { "epoch": 1.1920986802380746, "grad_norm": 0.5703125, "learning_rate": 7.079088475635051e-06, "loss": 1.4856, "step": 6910 }, { "epoch": 1.1922711981368066, "grad_norm": 0.62109375, "learning_rate": 7.076485404764659e-06, "loss": 1.4102, "step": 6911 }, { "epoch": 1.1924437160355388, "grad_norm": 0.6171875, "learning_rate": 7.073882550486501e-06, "loss": 1.3498, "step": 6912 }, { "epoch": 1.1926162339342707, "grad_norm": 0.60546875, "learning_rate": 7.071279912993403e-06, "loss": 1.4475, "step": 6913 }, { "epoch": 1.1927887518330027, "grad_norm": 0.578125, "learning_rate": 7.068677492478191e-06, "loss": 1.4666, "step": 6914 }, { "epoch": 1.1929612697317347, "grad_norm": 0.57421875, "learning_rate": 7.066075289133662e-06, "loss": 1.4479, "step": 6915 }, { "epoch": 1.1931337876304666, "grad_norm": 0.5859375, "learning_rate": 7.063473303152608e-06, "loss": 1.3826, "step": 6916 }, { "epoch": 1.1933063055291986, "grad_norm": 0.76171875, "learning_rate": 7.0608715347278045e-06, "loss": 1.4623, "step": 6917 }, { "epoch": 1.1934788234279305, "grad_norm": 0.6328125, "learning_rate": 7.0582699840519996e-06, "loss": 1.3564, "step": 6918 }, { "epoch": 1.1936513413266627, "grad_norm": 0.640625, "learning_rate": 7.055668651317937e-06, "loss": 1.3768, "step": 6919 }, { "epoch": 1.1938238592253947, "grad_norm": 0.6171875, "learning_rate": 7.053067536718334e-06, "loss": 1.2849, "step": 6920 }, { "epoch": 1.1939963771241267, "grad_norm": 0.5859375, "learning_rate": 7.050466640445906e-06, "loss": 1.3967, "step": 6921 }, { "epoch": 1.1941688950228586, "grad_norm": 0.55078125, "learning_rate": 7.0478659626933345e-06, "loss": 1.3141, "step": 6922 }, { "epoch": 1.1943414129215906, "grad_norm": 0.56640625, "learning_rate": 7.045265503653302e-06, "loss": 1.4541, "step": 6923 }, { "epoch": 1.1945139308203225, "grad_norm": 0.62890625, "learning_rate": 7.042665263518458e-06, "loss": 1.481, "step": 6924 }, { "epoch": 1.1946864487190547, "grad_norm": 0.63671875, "learning_rate": 7.040065242481455e-06, "loss": 1.3615, "step": 6925 }, { "epoch": 1.1948589666177867, "grad_norm": 0.59765625, "learning_rate": 7.0374654407349095e-06, "loss": 1.3896, "step": 6926 }, { "epoch": 1.1950314845165186, "grad_norm": 0.62890625, "learning_rate": 7.034865858471438e-06, "loss": 1.4479, "step": 6927 }, { "epoch": 1.1952040024152506, "grad_norm": 0.5859375, "learning_rate": 7.032266495883627e-06, "loss": 1.473, "step": 6928 }, { "epoch": 1.1953765203139826, "grad_norm": 0.578125, "learning_rate": 7.029667353164061e-06, "loss": 1.4585, "step": 6929 }, { "epoch": 1.1955490382127145, "grad_norm": 0.63671875, "learning_rate": 7.027068430505295e-06, "loss": 1.3895, "step": 6930 }, { "epoch": 1.1957215561114465, "grad_norm": 0.60546875, "learning_rate": 7.024469728099873e-06, "loss": 1.4092, "step": 6931 }, { "epoch": 1.1958940740101784, "grad_norm": 0.60546875, "learning_rate": 7.02187124614033e-06, "loss": 1.4369, "step": 6932 }, { "epoch": 1.1960665919089106, "grad_norm": 0.63671875, "learning_rate": 7.019272984819171e-06, "loss": 1.4359, "step": 6933 }, { "epoch": 1.1962391098076426, "grad_norm": 0.5625, "learning_rate": 7.016674944328896e-06, "loss": 1.3549, "step": 6934 }, { "epoch": 1.1964116277063745, "grad_norm": 0.625, "learning_rate": 7.014077124861978e-06, "loss": 1.447, "step": 6935 }, { "epoch": 1.1965841456051065, "grad_norm": 0.55078125, "learning_rate": 7.011479526610887e-06, "loss": 1.4539, "step": 6936 }, { "epoch": 1.1967566635038385, "grad_norm": 0.84375, "learning_rate": 7.0088821497680635e-06, "loss": 1.348, "step": 6937 }, { "epoch": 1.1969291814025704, "grad_norm": 0.5546875, "learning_rate": 7.006284994525943e-06, "loss": 1.3846, "step": 6938 }, { "epoch": 1.1971016993013026, "grad_norm": 0.5625, "learning_rate": 7.003688061076929e-06, "loss": 1.404, "step": 6939 }, { "epoch": 1.1972742172000346, "grad_norm": 0.67578125, "learning_rate": 7.001091349613433e-06, "loss": 1.4452, "step": 6940 }, { "epoch": 1.1974467350987665, "grad_norm": 0.59765625, "learning_rate": 6.9984948603278225e-06, "loss": 1.4511, "step": 6941 }, { "epoch": 1.1976192529974985, "grad_norm": 0.6953125, "learning_rate": 6.995898593412471e-06, "loss": 1.4435, "step": 6942 }, { "epoch": 1.1977917708962305, "grad_norm": 0.59375, "learning_rate": 6.993302549059717e-06, "loss": 1.425, "step": 6943 }, { "epoch": 1.1979642887949624, "grad_norm": 0.671875, "learning_rate": 6.9907067274619025e-06, "loss": 1.427, "step": 6944 }, { "epoch": 1.1981368066936944, "grad_norm": 0.70703125, "learning_rate": 6.988111128811334e-06, "loss": 1.4501, "step": 6945 }, { "epoch": 1.1983093245924266, "grad_norm": 0.8046875, "learning_rate": 6.985515753300314e-06, "loss": 1.4704, "step": 6946 }, { "epoch": 1.1984818424911585, "grad_norm": 0.625, "learning_rate": 6.982920601121117e-06, "loss": 1.3816, "step": 6947 }, { "epoch": 1.1986543603898905, "grad_norm": 0.61328125, "learning_rate": 6.980325672466015e-06, "loss": 1.4104, "step": 6948 }, { "epoch": 1.1988268782886224, "grad_norm": 0.59375, "learning_rate": 6.977730967527259e-06, "loss": 1.4798, "step": 6949 }, { "epoch": 1.1989993961873544, "grad_norm": 0.62890625, "learning_rate": 6.975136486497074e-06, "loss": 1.4175, "step": 6950 }, { "epoch": 1.1991719140860864, "grad_norm": 0.69140625, "learning_rate": 6.972542229567682e-06, "loss": 1.3979, "step": 6951 }, { "epoch": 1.1993444319848185, "grad_norm": 0.625, "learning_rate": 6.969948196931272e-06, "loss": 1.3222, "step": 6952 }, { "epoch": 1.1995169498835505, "grad_norm": 0.59375, "learning_rate": 6.967354388780037e-06, "loss": 1.3931, "step": 6953 }, { "epoch": 1.1996894677822825, "grad_norm": 0.57421875, "learning_rate": 6.964760805306137e-06, "loss": 1.4464, "step": 6954 }, { "epoch": 1.1998619856810144, "grad_norm": 0.609375, "learning_rate": 6.962167446701722e-06, "loss": 1.4952, "step": 6955 }, { "epoch": 1.2000345035797464, "grad_norm": 0.65625, "learning_rate": 6.95957431315892e-06, "loss": 1.4631, "step": 6956 }, { "epoch": 1.2002070214784784, "grad_norm": 0.6484375, "learning_rate": 6.956981404869855e-06, "loss": 1.5089, "step": 6957 }, { "epoch": 1.2003795393772103, "grad_norm": 0.62109375, "learning_rate": 6.954388722026618e-06, "loss": 1.4603, "step": 6958 }, { "epoch": 1.2005520572759423, "grad_norm": 0.56640625, "learning_rate": 6.951796264821298e-06, "loss": 1.4433, "step": 6959 }, { "epoch": 1.2007245751746745, "grad_norm": 0.59375, "learning_rate": 6.949204033445951e-06, "loss": 1.4816, "step": 6960 }, { "epoch": 1.2008970930734064, "grad_norm": 0.58203125, "learning_rate": 6.946612028092636e-06, "loss": 1.4718, "step": 6961 }, { "epoch": 1.2010696109721384, "grad_norm": 0.59765625, "learning_rate": 6.944020248953379e-06, "loss": 1.4309, "step": 6962 }, { "epoch": 1.2012421288708703, "grad_norm": 0.6953125, "learning_rate": 6.941428696220195e-06, "loss": 1.5134, "step": 6963 }, { "epoch": 1.2014146467696023, "grad_norm": 0.58984375, "learning_rate": 6.938837370085087e-06, "loss": 1.3666, "step": 6964 }, { "epoch": 1.2015871646683343, "grad_norm": 0.6484375, "learning_rate": 6.9362462707400325e-06, "loss": 1.4257, "step": 6965 }, { "epoch": 1.2017596825670664, "grad_norm": 0.73828125, "learning_rate": 6.9336553983769995e-06, "loss": 1.43, "step": 6966 }, { "epoch": 1.2019322004657984, "grad_norm": 0.62109375, "learning_rate": 6.931064753187929e-06, "loss": 1.4498, "step": 6967 }, { "epoch": 1.2021047183645304, "grad_norm": 0.6328125, "learning_rate": 6.928474335364761e-06, "loss": 1.3838, "step": 6968 }, { "epoch": 1.2022772362632623, "grad_norm": 0.5859375, "learning_rate": 6.925884145099405e-06, "loss": 1.3933, "step": 6969 }, { "epoch": 1.2024497541619943, "grad_norm": 0.59765625, "learning_rate": 6.923294182583762e-06, "loss": 1.4558, "step": 6970 }, { "epoch": 1.2026222720607262, "grad_norm": 0.64453125, "learning_rate": 6.920704448009705e-06, "loss": 1.4645, "step": 6971 }, { "epoch": 1.2027947899594582, "grad_norm": 0.5625, "learning_rate": 6.918114941569108e-06, "loss": 1.4875, "step": 6972 }, { "epoch": 1.2029673078581902, "grad_norm": 0.58984375, "learning_rate": 6.915525663453808e-06, "loss": 1.4333, "step": 6973 }, { "epoch": 1.2031398257569224, "grad_norm": 0.6171875, "learning_rate": 6.912936613855643e-06, "loss": 1.4254, "step": 6974 }, { "epoch": 1.2033123436556543, "grad_norm": 0.7265625, "learning_rate": 6.910347792966418e-06, "loss": 1.4324, "step": 6975 }, { "epoch": 1.2034848615543863, "grad_norm": 0.65625, "learning_rate": 6.907759200977939e-06, "loss": 1.5757, "step": 6976 }, { "epoch": 1.2036573794531182, "grad_norm": 0.57421875, "learning_rate": 6.905170838081976e-06, "loss": 1.3948, "step": 6977 }, { "epoch": 1.2038298973518502, "grad_norm": 0.640625, "learning_rate": 6.902582704470298e-06, "loss": 1.5119, "step": 6978 }, { "epoch": 1.2040024152505822, "grad_norm": 0.5546875, "learning_rate": 6.899994800334644e-06, "loss": 1.4551, "step": 6979 }, { "epoch": 1.2041749331493143, "grad_norm": 0.609375, "learning_rate": 6.897407125866743e-06, "loss": 1.364, "step": 6980 }, { "epoch": 1.2043474510480463, "grad_norm": 0.57421875, "learning_rate": 6.894819681258312e-06, "loss": 1.2945, "step": 6981 }, { "epoch": 1.2045199689467783, "grad_norm": 0.62109375, "learning_rate": 6.89223246670104e-06, "loss": 1.428, "step": 6982 }, { "epoch": 1.2046924868455102, "grad_norm": 0.5703125, "learning_rate": 6.889645482386607e-06, "loss": 1.4469, "step": 6983 }, { "epoch": 1.2048650047442422, "grad_norm": 0.6640625, "learning_rate": 6.887058728506666e-06, "loss": 1.3184, "step": 6984 }, { "epoch": 1.2050375226429741, "grad_norm": 0.625, "learning_rate": 6.8844722052528704e-06, "loss": 1.4146, "step": 6985 }, { "epoch": 1.205210040541706, "grad_norm": 0.6171875, "learning_rate": 6.881885912816837e-06, "loss": 1.4958, "step": 6986 }, { "epoch": 1.2053825584404383, "grad_norm": 0.625, "learning_rate": 6.87929985139018e-06, "loss": 1.3745, "step": 6987 }, { "epoch": 1.2055550763391703, "grad_norm": 0.5859375, "learning_rate": 6.876714021164486e-06, "loss": 1.3671, "step": 6988 }, { "epoch": 1.2057275942379022, "grad_norm": 0.65625, "learning_rate": 6.874128422331336e-06, "loss": 1.4298, "step": 6989 }, { "epoch": 1.2059001121366342, "grad_norm": 0.6328125, "learning_rate": 6.871543055082283e-06, "loss": 1.4688, "step": 6990 }, { "epoch": 1.2060726300353661, "grad_norm": 0.66015625, "learning_rate": 6.8689579196088694e-06, "loss": 1.3904, "step": 6991 }, { "epoch": 1.206245147934098, "grad_norm": 0.5859375, "learning_rate": 6.8663730161026125e-06, "loss": 1.4428, "step": 6992 }, { "epoch": 1.2064176658328303, "grad_norm": 0.65625, "learning_rate": 6.863788344755026e-06, "loss": 1.4108, "step": 6993 }, { "epoch": 1.2065901837315622, "grad_norm": 0.609375, "learning_rate": 6.861203905757593e-06, "loss": 1.3966, "step": 6994 }, { "epoch": 1.2067627016302942, "grad_norm": 0.58984375, "learning_rate": 6.858619699301785e-06, "loss": 1.4039, "step": 6995 }, { "epoch": 1.2069352195290262, "grad_norm": 0.6015625, "learning_rate": 6.856035725579062e-06, "loss": 1.5806, "step": 6996 }, { "epoch": 1.2071077374277581, "grad_norm": 0.70703125, "learning_rate": 6.853451984780854e-06, "loss": 1.4333, "step": 6997 }, { "epoch": 1.20728025532649, "grad_norm": 0.5546875, "learning_rate": 6.850868477098587e-06, "loss": 1.3247, "step": 6998 }, { "epoch": 1.207452773225222, "grad_norm": 0.578125, "learning_rate": 6.848285202723655e-06, "loss": 1.4973, "step": 6999 }, { "epoch": 1.207625291123954, "grad_norm": 0.5625, "learning_rate": 6.8457021618474514e-06, "loss": 1.4564, "step": 7000 }, { "epoch": 1.207625291123954, "eval_loss": 1.4081740379333496, "eval_runtime": 11.0377, "eval_samples_per_second": 92.773, "eval_steps_per_second": 23.193, "step": 7000 }, { "epoch": 1.2077978090226862, "grad_norm": 0.63671875, "learning_rate": 6.843119354661341e-06, "loss": 1.4374, "step": 7001 }, { "epoch": 1.2079703269214181, "grad_norm": 0.62890625, "learning_rate": 6.840536781356674e-06, "loss": 1.3482, "step": 7002 }, { "epoch": 1.20814284482015, "grad_norm": 0.6796875, "learning_rate": 6.837954442124779e-06, "loss": 1.5448, "step": 7003 }, { "epoch": 1.208315362718882, "grad_norm": 0.6875, "learning_rate": 6.835372337156981e-06, "loss": 1.5004, "step": 7004 }, { "epoch": 1.208487880617614, "grad_norm": 0.78125, "learning_rate": 6.83279046664457e-06, "loss": 1.4206, "step": 7005 }, { "epoch": 1.208660398516346, "grad_norm": 0.59375, "learning_rate": 6.830208830778837e-06, "loss": 1.4897, "step": 7006 }, { "epoch": 1.2088329164150782, "grad_norm": 0.609375, "learning_rate": 6.8276274297510336e-06, "loss": 1.4595, "step": 7007 }, { "epoch": 1.2090054343138101, "grad_norm": 0.55859375, "learning_rate": 6.825046263752415e-06, "loss": 1.4146, "step": 7008 }, { "epoch": 1.209177952212542, "grad_norm": 0.609375, "learning_rate": 6.822465332974204e-06, "loss": 1.386, "step": 7009 }, { "epoch": 1.209350470111274, "grad_norm": 0.5625, "learning_rate": 6.819884637607619e-06, "loss": 1.3649, "step": 7010 }, { "epoch": 1.209522988010006, "grad_norm": 0.66015625, "learning_rate": 6.817304177843848e-06, "loss": 1.4369, "step": 7011 }, { "epoch": 1.209695505908738, "grad_norm": 0.55078125, "learning_rate": 6.8147239538740695e-06, "loss": 1.3845, "step": 7012 }, { "epoch": 1.20986802380747, "grad_norm": 0.60546875, "learning_rate": 6.812143965889446e-06, "loss": 1.4777, "step": 7013 }, { "epoch": 1.210040541706202, "grad_norm": 0.66015625, "learning_rate": 6.809564214081114e-06, "loss": 1.5328, "step": 7014 }, { "epoch": 1.210213059604934, "grad_norm": 0.59765625, "learning_rate": 6.806984698640202e-06, "loss": 1.4277, "step": 7015 }, { "epoch": 1.210385577503666, "grad_norm": 0.578125, "learning_rate": 6.8044054197578115e-06, "loss": 1.4582, "step": 7016 }, { "epoch": 1.210558095402398, "grad_norm": 0.59375, "learning_rate": 6.801826377625036e-06, "loss": 1.4857, "step": 7017 }, { "epoch": 1.21073061330113, "grad_norm": 0.67578125, "learning_rate": 6.799247572432945e-06, "loss": 1.5168, "step": 7018 }, { "epoch": 1.210903131199862, "grad_norm": 0.6171875, "learning_rate": 6.796669004372596e-06, "loss": 1.4525, "step": 7019 }, { "epoch": 1.2110756490985939, "grad_norm": 0.609375, "learning_rate": 6.794090673635017e-06, "loss": 1.4734, "step": 7020 }, { "epoch": 1.211248166997326, "grad_norm": 0.5859375, "learning_rate": 6.791512580411237e-06, "loss": 1.3496, "step": 7021 }, { "epoch": 1.211420684896058, "grad_norm": 0.625, "learning_rate": 6.788934724892251e-06, "loss": 1.4423, "step": 7022 }, { "epoch": 1.21159320279479, "grad_norm": 0.62890625, "learning_rate": 6.786357107269045e-06, "loss": 1.3485, "step": 7023 }, { "epoch": 1.211765720693522, "grad_norm": 0.76171875, "learning_rate": 6.78377972773258e-06, "loss": 1.4124, "step": 7024 }, { "epoch": 1.211938238592254, "grad_norm": 0.5859375, "learning_rate": 6.781202586473814e-06, "loss": 1.4283, "step": 7025 }, { "epoch": 1.2121107564909859, "grad_norm": 0.56640625, "learning_rate": 6.778625683683671e-06, "loss": 1.4206, "step": 7026 }, { "epoch": 1.2122832743897178, "grad_norm": 0.703125, "learning_rate": 6.776049019553062e-06, "loss": 1.6036, "step": 7027 }, { "epoch": 1.21245579228845, "grad_norm": 0.62109375, "learning_rate": 6.773472594272889e-06, "loss": 1.4514, "step": 7028 }, { "epoch": 1.212628310187182, "grad_norm": 0.71484375, "learning_rate": 6.7708964080340265e-06, "loss": 1.4119, "step": 7029 }, { "epoch": 1.212800828085914, "grad_norm": 0.640625, "learning_rate": 6.768320461027336e-06, "loss": 1.4975, "step": 7030 }, { "epoch": 1.212973345984646, "grad_norm": 0.5859375, "learning_rate": 6.765744753443654e-06, "loss": 1.3481, "step": 7031 }, { "epoch": 1.2131458638833779, "grad_norm": 0.66796875, "learning_rate": 6.763169285473813e-06, "loss": 1.4414, "step": 7032 }, { "epoch": 1.2133183817821098, "grad_norm": 0.57421875, "learning_rate": 6.760594057308614e-06, "loss": 1.3994, "step": 7033 }, { "epoch": 1.213490899680842, "grad_norm": 0.80078125, "learning_rate": 6.758019069138851e-06, "loss": 1.3516, "step": 7034 }, { "epoch": 1.213663417579574, "grad_norm": 0.60546875, "learning_rate": 6.7554443211552864e-06, "loss": 1.4604, "step": 7035 }, { "epoch": 1.213835935478306, "grad_norm": 0.58984375, "learning_rate": 6.752869813548684e-06, "loss": 1.4742, "step": 7036 }, { "epoch": 1.2140084533770379, "grad_norm": 0.625, "learning_rate": 6.750295546509771e-06, "loss": 1.4507, "step": 7037 }, { "epoch": 1.2141809712757698, "grad_norm": 0.69140625, "learning_rate": 6.747721520229273e-06, "loss": 1.4602, "step": 7038 }, { "epoch": 1.2143534891745018, "grad_norm": 0.65625, "learning_rate": 6.7451477348978835e-06, "loss": 1.3104, "step": 7039 }, { "epoch": 1.2145260070732338, "grad_norm": 0.58203125, "learning_rate": 6.74257419070629e-06, "loss": 1.416, "step": 7040 }, { "epoch": 1.2146985249719657, "grad_norm": 0.5859375, "learning_rate": 6.740000887845149e-06, "loss": 1.4529, "step": 7041 }, { "epoch": 1.214871042870698, "grad_norm": 0.890625, "learning_rate": 6.737427826505116e-06, "loss": 1.3673, "step": 7042 }, { "epoch": 1.2150435607694299, "grad_norm": 0.578125, "learning_rate": 6.734855006876814e-06, "loss": 1.4521, "step": 7043 }, { "epoch": 1.2152160786681618, "grad_norm": 0.5703125, "learning_rate": 6.732282429150852e-06, "loss": 1.3779, "step": 7044 }, { "epoch": 1.2153885965668938, "grad_norm": 0.5703125, "learning_rate": 6.729710093517829e-06, "loss": 1.4222, "step": 7045 }, { "epoch": 1.2155611144656258, "grad_norm": 0.84765625, "learning_rate": 6.727138000168314e-06, "loss": 1.4788, "step": 7046 }, { "epoch": 1.2157336323643577, "grad_norm": 0.69140625, "learning_rate": 6.7245661492928674e-06, "loss": 1.4366, "step": 7047 }, { "epoch": 1.21590615026309, "grad_norm": 0.65234375, "learning_rate": 6.721994541082022e-06, "loss": 1.4501, "step": 7048 }, { "epoch": 1.2160786681618219, "grad_norm": 0.7734375, "learning_rate": 6.719423175726308e-06, "loss": 1.3713, "step": 7049 }, { "epoch": 1.2162511860605538, "grad_norm": 0.578125, "learning_rate": 6.716852053416221e-06, "loss": 1.47, "step": 7050 }, { "epoch": 1.2164237039592858, "grad_norm": 0.59765625, "learning_rate": 6.7142811743422495e-06, "loss": 1.4462, "step": 7051 }, { "epoch": 1.2165962218580177, "grad_norm": 0.6171875, "learning_rate": 6.711710538694855e-06, "loss": 1.4658, "step": 7052 }, { "epoch": 1.2167687397567497, "grad_norm": 0.578125, "learning_rate": 6.709140146664494e-06, "loss": 1.4135, "step": 7053 }, { "epoch": 1.2169412576554817, "grad_norm": 0.578125, "learning_rate": 6.706569998441591e-06, "loss": 1.4711, "step": 7054 }, { "epoch": 1.2171137755542136, "grad_norm": 0.5859375, "learning_rate": 6.704000094216563e-06, "loss": 1.4631, "step": 7055 }, { "epoch": 1.2172862934529458, "grad_norm": 0.625, "learning_rate": 6.701430434179799e-06, "loss": 1.452, "step": 7056 }, { "epoch": 1.2174588113516778, "grad_norm": 0.58984375, "learning_rate": 6.698861018521681e-06, "loss": 1.5188, "step": 7057 }, { "epoch": 1.2176313292504097, "grad_norm": 0.57421875, "learning_rate": 6.696291847432565e-06, "loss": 1.4292, "step": 7058 }, { "epoch": 1.2178038471491417, "grad_norm": 0.62890625, "learning_rate": 6.693722921102788e-06, "loss": 1.4083, "step": 7059 }, { "epoch": 1.2179763650478737, "grad_norm": 0.61328125, "learning_rate": 6.691154239722681e-06, "loss": 1.4582, "step": 7060 }, { "epoch": 1.2181488829466056, "grad_norm": 0.5703125, "learning_rate": 6.688585803482539e-06, "loss": 1.53, "step": 7061 }, { "epoch": 1.2183214008453378, "grad_norm": 0.640625, "learning_rate": 6.686017612572655e-06, "loss": 1.432, "step": 7062 }, { "epoch": 1.2184939187440698, "grad_norm": 0.60546875, "learning_rate": 6.683449667183288e-06, "loss": 1.3916, "step": 7063 }, { "epoch": 1.2186664366428017, "grad_norm": 0.609375, "learning_rate": 6.680881967504698e-06, "loss": 1.4147, "step": 7064 }, { "epoch": 1.2188389545415337, "grad_norm": 0.62109375, "learning_rate": 6.678314513727105e-06, "loss": 1.4874, "step": 7065 }, { "epoch": 1.2190114724402656, "grad_norm": 0.6015625, "learning_rate": 6.675747306040732e-06, "loss": 1.4433, "step": 7066 }, { "epoch": 1.2191839903389976, "grad_norm": 0.64453125, "learning_rate": 6.673180344635767e-06, "loss": 1.4913, "step": 7067 }, { "epoch": 1.2193565082377296, "grad_norm": 0.6015625, "learning_rate": 6.670613629702391e-06, "loss": 1.4379, "step": 7068 }, { "epoch": 1.2195290261364617, "grad_norm": 0.6015625, "learning_rate": 6.6680471614307576e-06, "loss": 1.4167, "step": 7069 }, { "epoch": 1.2197015440351937, "grad_norm": 0.63671875, "learning_rate": 6.665480940011012e-06, "loss": 1.5605, "step": 7070 }, { "epoch": 1.2198740619339257, "grad_norm": 0.609375, "learning_rate": 6.662914965633272e-06, "loss": 1.4221, "step": 7071 }, { "epoch": 1.2200465798326576, "grad_norm": 0.6328125, "learning_rate": 6.660349238487644e-06, "loss": 1.5069, "step": 7072 }, { "epoch": 1.2202190977313896, "grad_norm": 0.578125, "learning_rate": 6.657783758764208e-06, "loss": 1.4822, "step": 7073 }, { "epoch": 1.2203916156301216, "grad_norm": 0.66015625, "learning_rate": 6.655218526653038e-06, "loss": 1.3407, "step": 7074 }, { "epoch": 1.2205641335288537, "grad_norm": 0.6796875, "learning_rate": 6.6526535423441775e-06, "loss": 1.4482, "step": 7075 }, { "epoch": 1.2207366514275857, "grad_norm": 0.578125, "learning_rate": 6.650088806027655e-06, "loss": 1.3931, "step": 7076 }, { "epoch": 1.2209091693263177, "grad_norm": 0.62109375, "learning_rate": 6.647524317893489e-06, "loss": 1.4404, "step": 7077 }, { "epoch": 1.2210816872250496, "grad_norm": 0.57421875, "learning_rate": 6.644960078131667e-06, "loss": 1.3433, "step": 7078 }, { "epoch": 1.2212542051237816, "grad_norm": 0.59765625, "learning_rate": 6.642396086932168e-06, "loss": 1.4951, "step": 7079 }, { "epoch": 1.2214267230225135, "grad_norm": 0.56640625, "learning_rate": 6.639832344484942e-06, "loss": 1.4083, "step": 7080 }, { "epoch": 1.2215992409212455, "grad_norm": 0.59765625, "learning_rate": 6.637268850979934e-06, "loss": 1.3391, "step": 7081 }, { "epoch": 1.2217717588199775, "grad_norm": 0.609375, "learning_rate": 6.6347056066070605e-06, "loss": 1.5557, "step": 7082 }, { "epoch": 1.2219442767187096, "grad_norm": 0.66015625, "learning_rate": 6.632142611556225e-06, "loss": 1.4448, "step": 7083 }, { "epoch": 1.2221167946174416, "grad_norm": 0.671875, "learning_rate": 6.629579866017303e-06, "loss": 1.4612, "step": 7084 }, { "epoch": 1.2222893125161736, "grad_norm": 0.65625, "learning_rate": 6.6270173701801685e-06, "loss": 1.4145, "step": 7085 }, { "epoch": 1.2224618304149055, "grad_norm": 0.58984375, "learning_rate": 6.62445512423466e-06, "loss": 1.4511, "step": 7086 }, { "epoch": 1.2226343483136375, "grad_norm": 0.6015625, "learning_rate": 6.621893128370609e-06, "loss": 1.3963, "step": 7087 }, { "epoch": 1.2228068662123694, "grad_norm": 0.5859375, "learning_rate": 6.61933138277782e-06, "loss": 1.4633, "step": 7088 }, { "epoch": 1.2229793841111016, "grad_norm": 0.609375, "learning_rate": 6.616769887646088e-06, "loss": 1.4498, "step": 7089 }, { "epoch": 1.2231519020098336, "grad_norm": 0.58984375, "learning_rate": 6.614208643165181e-06, "loss": 1.3724, "step": 7090 }, { "epoch": 1.2233244199085656, "grad_norm": 0.578125, "learning_rate": 6.61164764952485e-06, "loss": 1.4823, "step": 7091 }, { "epoch": 1.2234969378072975, "grad_norm": 0.703125, "learning_rate": 6.609086906914839e-06, "loss": 1.3061, "step": 7092 }, { "epoch": 1.2236694557060295, "grad_norm": 0.671875, "learning_rate": 6.606526415524852e-06, "loss": 1.4084, "step": 7093 }, { "epoch": 1.2238419736047614, "grad_norm": 0.58203125, "learning_rate": 6.603966175544595e-06, "loss": 1.5003, "step": 7094 }, { "epoch": 1.2240144915034934, "grad_norm": 0.60546875, "learning_rate": 6.601406187163741e-06, "loss": 1.4785, "step": 7095 }, { "epoch": 1.2241870094022254, "grad_norm": 0.6015625, "learning_rate": 6.598846450571956e-06, "loss": 1.3297, "step": 7096 }, { "epoch": 1.2243595273009575, "grad_norm": 0.578125, "learning_rate": 6.596286965958872e-06, "loss": 1.4479, "step": 7097 }, { "epoch": 1.2245320451996895, "grad_norm": 0.60546875, "learning_rate": 6.593727733514119e-06, "loss": 1.4002, "step": 7098 }, { "epoch": 1.2247045630984215, "grad_norm": 0.5703125, "learning_rate": 6.5911687534272995e-06, "loss": 1.4138, "step": 7099 }, { "epoch": 1.2248770809971534, "grad_norm": 0.6015625, "learning_rate": 6.588610025887999e-06, "loss": 1.3873, "step": 7100 }, { "epoch": 1.2248770809971534, "eval_loss": 1.408066987991333, "eval_runtime": 10.9497, "eval_samples_per_second": 93.518, "eval_steps_per_second": 23.38, "step": 7100 }, { "epoch": 1.2250495988958854, "grad_norm": 0.65625, "learning_rate": 6.5860515510857795e-06, "loss": 1.4459, "step": 7101 }, { "epoch": 1.2252221167946176, "grad_norm": 0.8515625, "learning_rate": 6.583493329210197e-06, "loss": 1.3754, "step": 7102 }, { "epoch": 1.2253946346933495, "grad_norm": 0.578125, "learning_rate": 6.5809353604507735e-06, "loss": 1.4302, "step": 7103 }, { "epoch": 1.2255671525920815, "grad_norm": 0.59375, "learning_rate": 6.578377644997022e-06, "loss": 1.4781, "step": 7104 }, { "epoch": 1.2257396704908134, "grad_norm": 0.58984375, "learning_rate": 6.575820183038433e-06, "loss": 1.4126, "step": 7105 }, { "epoch": 1.2259121883895454, "grad_norm": 0.70703125, "learning_rate": 6.573262974764483e-06, "loss": 1.3968, "step": 7106 }, { "epoch": 1.2260847062882774, "grad_norm": 0.61328125, "learning_rate": 6.570706020364619e-06, "loss": 1.4346, "step": 7107 }, { "epoch": 1.2262572241870093, "grad_norm": 0.58984375, "learning_rate": 6.568149320028281e-06, "loss": 1.4194, "step": 7108 }, { "epoch": 1.2264297420857413, "grad_norm": 0.6171875, "learning_rate": 6.5655928739448874e-06, "loss": 1.465, "step": 7109 }, { "epoch": 1.2266022599844735, "grad_norm": 0.68359375, "learning_rate": 6.56303668230383e-06, "loss": 1.3937, "step": 7110 }, { "epoch": 1.2267747778832054, "grad_norm": 0.59375, "learning_rate": 6.560480745294493e-06, "loss": 1.4778, "step": 7111 }, { "epoch": 1.2269472957819374, "grad_norm": 0.59375, "learning_rate": 6.557925063106229e-06, "loss": 1.3817, "step": 7112 }, { "epoch": 1.2271198136806694, "grad_norm": 0.6015625, "learning_rate": 6.5553696359283905e-06, "loss": 1.4118, "step": 7113 }, { "epoch": 1.2272923315794013, "grad_norm": 0.625, "learning_rate": 6.552814463950288e-06, "loss": 1.3556, "step": 7114 }, { "epoch": 1.2274648494781333, "grad_norm": 0.6171875, "learning_rate": 6.5502595473612304e-06, "loss": 1.5199, "step": 7115 }, { "epoch": 1.2276373673768655, "grad_norm": 0.62890625, "learning_rate": 6.5477048863504965e-06, "loss": 1.5357, "step": 7116 }, { "epoch": 1.2278098852755974, "grad_norm": 0.578125, "learning_rate": 6.5451504811073604e-06, "loss": 1.3962, "step": 7117 }, { "epoch": 1.2279824031743294, "grad_norm": 0.7109375, "learning_rate": 6.542596331821061e-06, "loss": 1.4034, "step": 7118 }, { "epoch": 1.2281549210730613, "grad_norm": 0.57421875, "learning_rate": 6.540042438680832e-06, "loss": 1.3123, "step": 7119 }, { "epoch": 1.2283274389717933, "grad_norm": 0.56640625, "learning_rate": 6.537488801875872e-06, "loss": 1.3925, "step": 7120 }, { "epoch": 1.2284999568705253, "grad_norm": 0.55078125, "learning_rate": 6.534935421595381e-06, "loss": 1.3863, "step": 7121 }, { "epoch": 1.2286724747692572, "grad_norm": 0.5625, "learning_rate": 6.5323822980285224e-06, "loss": 1.4513, "step": 7122 }, { "epoch": 1.2288449926679892, "grad_norm": 0.6796875, "learning_rate": 6.5298294313644515e-06, "loss": 1.4978, "step": 7123 }, { "epoch": 1.2290175105667214, "grad_norm": 0.60546875, "learning_rate": 6.527276821792297e-06, "loss": 1.3822, "step": 7124 }, { "epoch": 1.2291900284654533, "grad_norm": 0.67578125, "learning_rate": 6.524724469501172e-06, "loss": 1.363, "step": 7125 }, { "epoch": 1.2293625463641853, "grad_norm": 0.6015625, "learning_rate": 6.522172374680177e-06, "loss": 1.4761, "step": 7126 }, { "epoch": 1.2295350642629173, "grad_norm": 0.5546875, "learning_rate": 6.5196205375183804e-06, "loss": 1.419, "step": 7127 }, { "epoch": 1.2297075821616492, "grad_norm": 0.58203125, "learning_rate": 6.517068958204844e-06, "loss": 1.3345, "step": 7128 }, { "epoch": 1.2298801000603812, "grad_norm": 0.671875, "learning_rate": 6.514517636928598e-06, "loss": 1.4148, "step": 7129 }, { "epoch": 1.2300526179591134, "grad_norm": 0.62890625, "learning_rate": 6.511966573878667e-06, "loss": 1.4199, "step": 7130 }, { "epoch": 1.2302251358578453, "grad_norm": 0.5625, "learning_rate": 6.509415769244044e-06, "loss": 1.3635, "step": 7131 }, { "epoch": 1.2303976537565773, "grad_norm": 0.64453125, "learning_rate": 6.506865223213714e-06, "loss": 1.3639, "step": 7132 }, { "epoch": 1.2305701716553092, "grad_norm": 0.578125, "learning_rate": 6.50431493597663e-06, "loss": 1.359, "step": 7133 }, { "epoch": 1.2307426895540412, "grad_norm": 0.59375, "learning_rate": 6.5017649077217415e-06, "loss": 1.4499, "step": 7134 }, { "epoch": 1.2309152074527732, "grad_norm": 0.57421875, "learning_rate": 6.499215138637964e-06, "loss": 1.4122, "step": 7135 }, { "epoch": 1.2310877253515051, "grad_norm": 0.58984375, "learning_rate": 6.496665628914205e-06, "loss": 1.5291, "step": 7136 }, { "epoch": 1.2312602432502373, "grad_norm": 0.5625, "learning_rate": 6.494116378739344e-06, "loss": 1.3532, "step": 7137 }, { "epoch": 1.2314327611489693, "grad_norm": 0.609375, "learning_rate": 6.491567388302249e-06, "loss": 1.4024, "step": 7138 }, { "epoch": 1.2316052790477012, "grad_norm": 0.57421875, "learning_rate": 6.489018657791763e-06, "loss": 1.3638, "step": 7139 }, { "epoch": 1.2317777969464332, "grad_norm": 3.328125, "learning_rate": 6.48647018739671e-06, "loss": 1.5342, "step": 7140 }, { "epoch": 1.2319503148451652, "grad_norm": 0.62890625, "learning_rate": 6.483921977305903e-06, "loss": 1.4922, "step": 7141 }, { "epoch": 1.2321228327438971, "grad_norm": 0.66015625, "learning_rate": 6.481374027708123e-06, "loss": 1.4724, "step": 7142 }, { "epoch": 1.2322953506426293, "grad_norm": 0.62109375, "learning_rate": 6.478826338792144e-06, "loss": 1.4799, "step": 7143 }, { "epoch": 1.2324678685413613, "grad_norm": 0.58984375, "learning_rate": 6.476278910746705e-06, "loss": 1.4143, "step": 7144 }, { "epoch": 1.2326403864400932, "grad_norm": 0.65234375, "learning_rate": 6.4737317437605475e-06, "loss": 1.3686, "step": 7145 }, { "epoch": 1.2328129043388252, "grad_norm": 0.62109375, "learning_rate": 6.471184838022372e-06, "loss": 1.406, "step": 7146 }, { "epoch": 1.2329854222375571, "grad_norm": 0.70703125, "learning_rate": 6.468638193720875e-06, "loss": 1.4347, "step": 7147 }, { "epoch": 1.233157940136289, "grad_norm": 0.57421875, "learning_rate": 6.4660918110447215e-06, "loss": 1.3784, "step": 7148 }, { "epoch": 1.233330458035021, "grad_norm": 0.63671875, "learning_rate": 6.463545690182573e-06, "loss": 1.4489, "step": 7149 }, { "epoch": 1.233502975933753, "grad_norm": 0.609375, "learning_rate": 6.460999831323054e-06, "loss": 1.3989, "step": 7150 }, { "epoch": 1.2336754938324852, "grad_norm": 0.61328125, "learning_rate": 6.458454234654781e-06, "loss": 1.4118, "step": 7151 }, { "epoch": 1.2338480117312172, "grad_norm": 0.62890625, "learning_rate": 6.4559089003663434e-06, "loss": 1.4362, "step": 7152 }, { "epoch": 1.2340205296299491, "grad_norm": 0.609375, "learning_rate": 6.453363828646323e-06, "loss": 1.4336, "step": 7153 }, { "epoch": 1.234193047528681, "grad_norm": 0.60546875, "learning_rate": 6.450819019683267e-06, "loss": 1.501, "step": 7154 }, { "epoch": 1.234365565427413, "grad_norm": 0.55078125, "learning_rate": 6.448274473665718e-06, "loss": 1.3423, "step": 7155 }, { "epoch": 1.234538083326145, "grad_norm": 0.58984375, "learning_rate": 6.445730190782187e-06, "loss": 1.4127, "step": 7156 }, { "epoch": 1.2347106012248772, "grad_norm": 0.62890625, "learning_rate": 6.443186171221167e-06, "loss": 1.4934, "step": 7157 }, { "epoch": 1.2348831191236092, "grad_norm": 0.5625, "learning_rate": 6.4406424151711456e-06, "loss": 1.4181, "step": 7158 }, { "epoch": 1.2350556370223411, "grad_norm": 0.61328125, "learning_rate": 6.438098922820573e-06, "loss": 1.44, "step": 7159 }, { "epoch": 1.235228154921073, "grad_norm": 0.61328125, "learning_rate": 6.435555694357888e-06, "loss": 1.3861, "step": 7160 }, { "epoch": 1.235400672819805, "grad_norm": 0.65625, "learning_rate": 6.433012729971506e-06, "loss": 1.4417, "step": 7161 }, { "epoch": 1.235573190718537, "grad_norm": 0.6171875, "learning_rate": 6.430470029849832e-06, "loss": 1.5808, "step": 7162 }, { "epoch": 1.235745708617269, "grad_norm": 0.59375, "learning_rate": 6.4279275941812395e-06, "loss": 1.3912, "step": 7163 }, { "epoch": 1.235918226516001, "grad_norm": 0.59375, "learning_rate": 6.425385423154091e-06, "loss": 1.3512, "step": 7164 }, { "epoch": 1.236090744414733, "grad_norm": 0.6328125, "learning_rate": 6.422843516956724e-06, "loss": 1.3951, "step": 7165 }, { "epoch": 1.236263262313465, "grad_norm": 0.5625, "learning_rate": 6.420301875777464e-06, "loss": 1.4184, "step": 7166 }, { "epoch": 1.236435780212197, "grad_norm": 0.59765625, "learning_rate": 6.417760499804604e-06, "loss": 1.4245, "step": 7167 }, { "epoch": 1.236608298110929, "grad_norm": 0.6171875, "learning_rate": 6.415219389226432e-06, "loss": 1.4479, "step": 7168 }, { "epoch": 1.236780816009661, "grad_norm": 0.578125, "learning_rate": 6.412678544231203e-06, "loss": 1.4777, "step": 7169 }, { "epoch": 1.236953333908393, "grad_norm": 0.58984375, "learning_rate": 6.410137965007166e-06, "loss": 1.3913, "step": 7170 }, { "epoch": 1.237125851807125, "grad_norm": 0.578125, "learning_rate": 6.4075976517425365e-06, "loss": 1.4419, "step": 7171 }, { "epoch": 1.237298369705857, "grad_norm": 0.58203125, "learning_rate": 6.4050576046255176e-06, "loss": 1.4678, "step": 7172 }, { "epoch": 1.237470887604589, "grad_norm": 0.58203125, "learning_rate": 6.402517823844299e-06, "loss": 1.4013, "step": 7173 }, { "epoch": 1.237643405503321, "grad_norm": 0.57421875, "learning_rate": 6.399978309587034e-06, "loss": 1.4265, "step": 7174 }, { "epoch": 1.237815923402053, "grad_norm": 0.6171875, "learning_rate": 6.397439062041873e-06, "loss": 1.3889, "step": 7175 }, { "epoch": 1.237988441300785, "grad_norm": 0.5859375, "learning_rate": 6.394900081396931e-06, "loss": 1.4773, "step": 7176 }, { "epoch": 1.2381609591995169, "grad_norm": 0.5703125, "learning_rate": 6.392361367840322e-06, "loss": 1.4759, "step": 7177 }, { "epoch": 1.238333477098249, "grad_norm": 0.58203125, "learning_rate": 6.3898229215601215e-06, "loss": 1.3627, "step": 7178 }, { "epoch": 1.238505994996981, "grad_norm": 0.5625, "learning_rate": 6.3872847427443985e-06, "loss": 1.5092, "step": 7179 }, { "epoch": 1.238678512895713, "grad_norm": 0.65234375, "learning_rate": 6.384746831581191e-06, "loss": 1.4332, "step": 7180 }, { "epoch": 1.238851030794445, "grad_norm": 0.6015625, "learning_rate": 6.382209188258533e-06, "loss": 1.3706, "step": 7181 }, { "epoch": 1.2390235486931769, "grad_norm": 0.5859375, "learning_rate": 6.379671812964416e-06, "loss": 1.4427, "step": 7182 }, { "epoch": 1.2391960665919088, "grad_norm": 0.5859375, "learning_rate": 6.3771347058868404e-06, "loss": 1.4604, "step": 7183 }, { "epoch": 1.239368584490641, "grad_norm": 0.59765625, "learning_rate": 6.374597867213756e-06, "loss": 1.4729, "step": 7184 }, { "epoch": 1.239541102389373, "grad_norm": 0.59375, "learning_rate": 6.372061297133119e-06, "loss": 1.3559, "step": 7185 }, { "epoch": 1.239713620288105, "grad_norm": 0.55859375, "learning_rate": 6.369524995832844e-06, "loss": 1.4233, "step": 7186 }, { "epoch": 1.239886138186837, "grad_norm": 0.66796875, "learning_rate": 6.366988963500846e-06, "loss": 1.452, "step": 7187 }, { "epoch": 1.2400586560855689, "grad_norm": 0.57421875, "learning_rate": 6.364453200325005e-06, "loss": 1.4131, "step": 7188 }, { "epoch": 1.2402311739843008, "grad_norm": 0.5625, "learning_rate": 6.361917706493184e-06, "loss": 1.4352, "step": 7189 }, { "epoch": 1.2404036918830328, "grad_norm": 0.58203125, "learning_rate": 6.359382482193234e-06, "loss": 1.4955, "step": 7190 }, { "epoch": 1.2405762097817647, "grad_norm": 0.546875, "learning_rate": 6.356847527612976e-06, "loss": 1.3062, "step": 7191 }, { "epoch": 1.240748727680497, "grad_norm": 0.57421875, "learning_rate": 6.354312842940219e-06, "loss": 1.4652, "step": 7192 }, { "epoch": 1.240921245579229, "grad_norm": 0.58984375, "learning_rate": 6.351778428362742e-06, "loss": 1.4002, "step": 7193 }, { "epoch": 1.2410937634779609, "grad_norm": 0.6015625, "learning_rate": 6.349244284068318e-06, "loss": 1.3301, "step": 7194 }, { "epoch": 1.2412662813766928, "grad_norm": 0.59375, "learning_rate": 6.346710410244685e-06, "loss": 1.4002, "step": 7195 }, { "epoch": 1.2414387992754248, "grad_norm": 0.62109375, "learning_rate": 6.344176807079576e-06, "loss": 1.3778, "step": 7196 }, { "epoch": 1.2416113171741567, "grad_norm": 0.58984375, "learning_rate": 6.341643474760686e-06, "loss": 1.4565, "step": 7197 }, { "epoch": 1.241783835072889, "grad_norm": 0.609375, "learning_rate": 6.339110413475711e-06, "loss": 1.3695, "step": 7198 }, { "epoch": 1.2419563529716209, "grad_norm": 0.59765625, "learning_rate": 6.336577623412308e-06, "loss": 1.2846, "step": 7199 }, { "epoch": 1.2421288708703528, "grad_norm": 0.5703125, "learning_rate": 6.3340451047581275e-06, "loss": 1.4077, "step": 7200 }, { "epoch": 1.2421288708703528, "eval_loss": 1.4079400300979614, "eval_runtime": 10.842, "eval_samples_per_second": 94.447, "eval_steps_per_second": 23.612, "step": 7200 }, { "epoch": 1.2423013887690848, "grad_norm": 0.62890625, "learning_rate": 6.3315128577007874e-06, "loss": 1.3817, "step": 7201 }, { "epoch": 1.2424739066678168, "grad_norm": 0.60546875, "learning_rate": 6.3289808824279e-06, "loss": 1.3751, "step": 7202 }, { "epoch": 1.2426464245665487, "grad_norm": 0.55078125, "learning_rate": 6.3264491791270455e-06, "loss": 1.3145, "step": 7203 }, { "epoch": 1.2428189424652807, "grad_norm": 0.578125, "learning_rate": 6.323917747985786e-06, "loss": 1.4272, "step": 7204 }, { "epoch": 1.2429914603640126, "grad_norm": 0.59765625, "learning_rate": 6.321386589191674e-06, "loss": 1.3329, "step": 7205 }, { "epoch": 1.2431639782627448, "grad_norm": 0.56640625, "learning_rate": 6.318855702932225e-06, "loss": 1.4386, "step": 7206 }, { "epoch": 1.2433364961614768, "grad_norm": 0.58984375, "learning_rate": 6.31632508939495e-06, "loss": 1.5471, "step": 7207 }, { "epoch": 1.2435090140602088, "grad_norm": 0.55859375, "learning_rate": 6.313794748767324e-06, "loss": 1.3682, "step": 7208 }, { "epoch": 1.2436815319589407, "grad_norm": 0.6328125, "learning_rate": 6.31126468123682e-06, "loss": 1.3712, "step": 7209 }, { "epoch": 1.2438540498576727, "grad_norm": 0.8203125, "learning_rate": 6.308734886990875e-06, "loss": 1.3938, "step": 7210 }, { "epoch": 1.2440265677564046, "grad_norm": 0.61328125, "learning_rate": 6.306205366216915e-06, "loss": 1.4611, "step": 7211 }, { "epoch": 1.2441990856551368, "grad_norm": 0.68359375, "learning_rate": 6.3036761191023374e-06, "loss": 1.4358, "step": 7212 }, { "epoch": 1.2443716035538688, "grad_norm": 0.6171875, "learning_rate": 6.301147145834534e-06, "loss": 1.4512, "step": 7213 }, { "epoch": 1.2445441214526007, "grad_norm": 0.68359375, "learning_rate": 6.298618446600856e-06, "loss": 1.4666, "step": 7214 }, { "epoch": 1.2447166393513327, "grad_norm": 0.64453125, "learning_rate": 6.2960900215886556e-06, "loss": 1.3919, "step": 7215 }, { "epoch": 1.2448891572500647, "grad_norm": 0.58984375, "learning_rate": 6.293561870985248e-06, "loss": 1.3503, "step": 7216 }, { "epoch": 1.2450616751487966, "grad_norm": 0.64453125, "learning_rate": 6.291033994977935e-06, "loss": 1.4322, "step": 7217 }, { "epoch": 1.2452341930475286, "grad_norm": 0.5703125, "learning_rate": 6.288506393753997e-06, "loss": 1.3567, "step": 7218 }, { "epoch": 1.2454067109462608, "grad_norm": 0.5859375, "learning_rate": 6.285979067500699e-06, "loss": 1.4231, "step": 7219 }, { "epoch": 1.2455792288449927, "grad_norm": 0.5546875, "learning_rate": 6.283452016405276e-06, "loss": 1.3525, "step": 7220 }, { "epoch": 1.2457517467437247, "grad_norm": 0.5703125, "learning_rate": 6.280925240654948e-06, "loss": 1.4268, "step": 7221 }, { "epoch": 1.2459242646424566, "grad_norm": 0.58984375, "learning_rate": 6.27839874043692e-06, "loss": 1.438, "step": 7222 }, { "epoch": 1.2460967825411886, "grad_norm": 0.58984375, "learning_rate": 6.275872515938365e-06, "loss": 1.4832, "step": 7223 }, { "epoch": 1.2462693004399206, "grad_norm": 0.58203125, "learning_rate": 6.2733465673464456e-06, "loss": 1.49, "step": 7224 }, { "epoch": 1.2464418183386528, "grad_norm": 0.6015625, "learning_rate": 6.270820894848293e-06, "loss": 1.3996, "step": 7225 }, { "epoch": 1.2466143362373847, "grad_norm": 1.046875, "learning_rate": 6.268295498631034e-06, "loss": 1.3661, "step": 7226 }, { "epoch": 1.2467868541361167, "grad_norm": 0.578125, "learning_rate": 6.265770378881759e-06, "loss": 1.3727, "step": 7227 }, { "epoch": 1.2469593720348486, "grad_norm": 0.68359375, "learning_rate": 6.263245535787548e-06, "loss": 1.4667, "step": 7228 }, { "epoch": 1.2471318899335806, "grad_norm": 0.640625, "learning_rate": 6.260720969535453e-06, "loss": 1.439, "step": 7229 }, { "epoch": 1.2473044078323126, "grad_norm": 0.57421875, "learning_rate": 6.258196680312517e-06, "loss": 1.341, "step": 7230 }, { "epoch": 1.2474769257310445, "grad_norm": 0.69921875, "learning_rate": 6.255672668305748e-06, "loss": 1.4412, "step": 7231 }, { "epoch": 1.2476494436297765, "grad_norm": 0.58203125, "learning_rate": 6.253148933702147e-06, "loss": 1.3463, "step": 7232 }, { "epoch": 1.2478219615285087, "grad_norm": 0.58203125, "learning_rate": 6.250625476688679e-06, "loss": 1.3639, "step": 7233 }, { "epoch": 1.2479944794272406, "grad_norm": 0.59375, "learning_rate": 6.248102297452307e-06, "loss": 1.4123, "step": 7234 }, { "epoch": 1.2481669973259726, "grad_norm": 0.59375, "learning_rate": 6.245579396179957e-06, "loss": 1.5242, "step": 7235 }, { "epoch": 1.2483395152247045, "grad_norm": 0.57421875, "learning_rate": 6.243056773058542e-06, "loss": 1.541, "step": 7236 }, { "epoch": 1.2485120331234365, "grad_norm": 0.73828125, "learning_rate": 6.240534428274961e-06, "loss": 1.5029, "step": 7237 }, { "epoch": 1.2486845510221685, "grad_norm": 0.61328125, "learning_rate": 6.238012362016077e-06, "loss": 1.4879, "step": 7238 }, { "epoch": 1.2488570689209006, "grad_norm": 0.61328125, "learning_rate": 6.235490574468745e-06, "loss": 1.4711, "step": 7239 }, { "epoch": 1.2490295868196326, "grad_norm": 0.609375, "learning_rate": 6.232969065819791e-06, "loss": 1.4517, "step": 7240 }, { "epoch": 1.2492021047183646, "grad_norm": 0.62890625, "learning_rate": 6.230447836256028e-06, "loss": 1.4584, "step": 7241 }, { "epoch": 1.2493746226170965, "grad_norm": 0.58203125, "learning_rate": 6.2279268859642396e-06, "loss": 1.4252, "step": 7242 }, { "epoch": 1.2495471405158285, "grad_norm": 0.62890625, "learning_rate": 6.2254062151312e-06, "loss": 1.3844, "step": 7243 }, { "epoch": 1.2497196584145605, "grad_norm": 0.58984375, "learning_rate": 6.222885823943651e-06, "loss": 1.4895, "step": 7244 }, { "epoch": 1.2498921763132924, "grad_norm": 0.5859375, "learning_rate": 6.220365712588322e-06, "loss": 1.4183, "step": 7245 }, { "epoch": 1.2500646942120244, "grad_norm": 0.59765625, "learning_rate": 6.217845881251913e-06, "loss": 1.3925, "step": 7246 }, { "epoch": 1.2502372121107566, "grad_norm": 0.57421875, "learning_rate": 6.215326330121119e-06, "loss": 1.4546, "step": 7247 }, { "epoch": 1.2504097300094885, "grad_norm": 0.56640625, "learning_rate": 6.212807059382595e-06, "loss": 1.3991, "step": 7248 }, { "epoch": 1.2505822479082205, "grad_norm": 0.6171875, "learning_rate": 6.210288069222989e-06, "loss": 1.4733, "step": 7249 }, { "epoch": 1.2507547658069524, "grad_norm": 0.8671875, "learning_rate": 6.207769359828919e-06, "loss": 1.4307, "step": 7250 }, { "epoch": 1.2509272837056844, "grad_norm": 0.55078125, "learning_rate": 6.205250931386992e-06, "loss": 1.4177, "step": 7251 }, { "epoch": 1.2510998016044166, "grad_norm": 0.6015625, "learning_rate": 6.202732784083787e-06, "loss": 1.3977, "step": 7252 }, { "epoch": 1.2512723195031485, "grad_norm": 0.5625, "learning_rate": 6.20021491810586e-06, "loss": 1.4586, "step": 7253 }, { "epoch": 1.2514448374018805, "grad_norm": 0.6015625, "learning_rate": 6.197697333639759e-06, "loss": 1.5194, "step": 7254 }, { "epoch": 1.2516173553006125, "grad_norm": 0.79296875, "learning_rate": 6.195180030871995e-06, "loss": 1.3996, "step": 7255 }, { "epoch": 1.2517898731993444, "grad_norm": 0.58984375, "learning_rate": 6.19266300998907e-06, "loss": 1.3985, "step": 7256 }, { "epoch": 1.2519623910980764, "grad_norm": 0.6484375, "learning_rate": 6.190146271177455e-06, "loss": 1.3829, "step": 7257 }, { "epoch": 1.2521349089968083, "grad_norm": 0.60546875, "learning_rate": 6.187629814623613e-06, "loss": 1.4638, "step": 7258 }, { "epoch": 1.2523074268955403, "grad_norm": 0.64453125, "learning_rate": 6.1851136405139735e-06, "loss": 1.4106, "step": 7259 }, { "epoch": 1.2524799447942723, "grad_norm": 0.56640625, "learning_rate": 6.182597749034953e-06, "loss": 1.3867, "step": 7260 }, { "epoch": 1.2526524626930045, "grad_norm": 0.6875, "learning_rate": 6.1800821403729405e-06, "loss": 1.3823, "step": 7261 }, { "epoch": 1.2528249805917364, "grad_norm": 0.63671875, "learning_rate": 6.177566814714316e-06, "loss": 1.4487, "step": 7262 }, { "epoch": 1.2529974984904684, "grad_norm": 0.6328125, "learning_rate": 6.175051772245421e-06, "loss": 1.4645, "step": 7263 }, { "epoch": 1.2531700163892003, "grad_norm": 0.6015625, "learning_rate": 6.172537013152593e-06, "loss": 1.4401, "step": 7264 }, { "epoch": 1.2533425342879323, "grad_norm": 0.5859375, "learning_rate": 6.170022537622135e-06, "loss": 1.5324, "step": 7265 }, { "epoch": 1.2535150521866645, "grad_norm": 0.62890625, "learning_rate": 6.1675083458403405e-06, "loss": 1.3808, "step": 7266 }, { "epoch": 1.2536875700853964, "grad_norm": 0.5859375, "learning_rate": 6.164994437993474e-06, "loss": 1.4153, "step": 7267 }, { "epoch": 1.2538600879841284, "grad_norm": 0.6328125, "learning_rate": 6.162480814267779e-06, "loss": 1.492, "step": 7268 }, { "epoch": 1.2540326058828604, "grad_norm": 0.625, "learning_rate": 6.1599674748494875e-06, "loss": 1.3307, "step": 7269 }, { "epoch": 1.2542051237815923, "grad_norm": 0.6015625, "learning_rate": 6.1574544199247955e-06, "loss": 1.4724, "step": 7270 }, { "epoch": 1.2543776416803243, "grad_norm": 0.61328125, "learning_rate": 6.154941649679894e-06, "loss": 1.4853, "step": 7271 }, { "epoch": 1.2545501595790562, "grad_norm": 0.58203125, "learning_rate": 6.152429164300935e-06, "loss": 1.3948, "step": 7272 }, { "epoch": 1.2547226774777882, "grad_norm": 0.59765625, "learning_rate": 6.149916963974068e-06, "loss": 1.4597, "step": 7273 }, { "epoch": 1.2548951953765204, "grad_norm": 0.5703125, "learning_rate": 6.1474050488854055e-06, "loss": 1.3844, "step": 7274 }, { "epoch": 1.2550677132752524, "grad_norm": 0.57421875, "learning_rate": 6.144893419221052e-06, "loss": 1.4039, "step": 7275 }, { "epoch": 1.2552402311739843, "grad_norm": 0.5703125, "learning_rate": 6.142382075167082e-06, "loss": 1.4968, "step": 7276 }, { "epoch": 1.2554127490727163, "grad_norm": 0.58984375, "learning_rate": 6.139871016909551e-06, "loss": 1.5144, "step": 7277 }, { "epoch": 1.2555852669714482, "grad_norm": 0.5703125, "learning_rate": 6.1373602446344904e-06, "loss": 1.404, "step": 7278 }, { "epoch": 1.2557577848701804, "grad_norm": 0.5859375, "learning_rate": 6.134849758527923e-06, "loss": 1.4736, "step": 7279 }, { "epoch": 1.2559303027689124, "grad_norm": 0.6015625, "learning_rate": 6.1323395587758325e-06, "loss": 1.3478, "step": 7280 }, { "epoch": 1.2561028206676443, "grad_norm": 0.5390625, "learning_rate": 6.129829645564197e-06, "loss": 1.3936, "step": 7281 }, { "epoch": 1.2562753385663763, "grad_norm": 0.62890625, "learning_rate": 6.127320019078959e-06, "loss": 1.4741, "step": 7282 }, { "epoch": 1.2564478564651083, "grad_norm": 0.625, "learning_rate": 6.124810679506056e-06, "loss": 1.4305, "step": 7283 }, { "epoch": 1.2566203743638402, "grad_norm": 0.578125, "learning_rate": 6.122301627031388e-06, "loss": 1.5184, "step": 7284 }, { "epoch": 1.2567928922625722, "grad_norm": 0.625, "learning_rate": 6.119792861840843e-06, "loss": 1.4044, "step": 7285 }, { "epoch": 1.2569654101613041, "grad_norm": 0.63671875, "learning_rate": 6.117284384120292e-06, "loss": 1.422, "step": 7286 }, { "epoch": 1.257137928060036, "grad_norm": 0.6015625, "learning_rate": 6.1147761940555714e-06, "loss": 1.4325, "step": 7287 }, { "epoch": 1.2573104459587683, "grad_norm": 0.57421875, "learning_rate": 6.112268291832509e-06, "loss": 1.387, "step": 7288 }, { "epoch": 1.2574829638575002, "grad_norm": 0.6171875, "learning_rate": 6.1097606776369e-06, "loss": 1.4262, "step": 7289 }, { "epoch": 1.2576554817562322, "grad_norm": 0.57421875, "learning_rate": 6.107253351654529e-06, "loss": 1.3348, "step": 7290 }, { "epoch": 1.2578279996549642, "grad_norm": 0.62109375, "learning_rate": 6.104746314071153e-06, "loss": 1.3996, "step": 7291 }, { "epoch": 1.2580005175536961, "grad_norm": 0.59375, "learning_rate": 6.1022395650725095e-06, "loss": 1.3755, "step": 7292 }, { "epoch": 1.2581730354524283, "grad_norm": 0.65234375, "learning_rate": 6.09973310484431e-06, "loss": 1.4054, "step": 7293 }, { "epoch": 1.2583455533511603, "grad_norm": 0.69921875, "learning_rate": 6.0972269335722555e-06, "loss": 1.4913, "step": 7294 }, { "epoch": 1.2585180712498922, "grad_norm": 0.62109375, "learning_rate": 6.094721051442013e-06, "loss": 1.5199, "step": 7295 }, { "epoch": 1.2586905891486242, "grad_norm": 0.6640625, "learning_rate": 6.092215458639239e-06, "loss": 1.4474, "step": 7296 }, { "epoch": 1.2588631070473562, "grad_norm": 0.65234375, "learning_rate": 6.089710155349558e-06, "loss": 1.3976, "step": 7297 }, { "epoch": 1.2590356249460881, "grad_norm": 0.59765625, "learning_rate": 6.087205141758583e-06, "loss": 1.4169, "step": 7298 }, { "epoch": 1.25920814284482, "grad_norm": 0.609375, "learning_rate": 6.0847004180518985e-06, "loss": 1.4104, "step": 7299 }, { "epoch": 1.259380660743552, "grad_norm": 0.57421875, "learning_rate": 6.082195984415069e-06, "loss": 1.3966, "step": 7300 }, { "epoch": 1.259380660743552, "eval_loss": 1.4078376293182373, "eval_runtime": 10.8355, "eval_samples_per_second": 94.504, "eval_steps_per_second": 23.626, "step": 7300 }, { "epoch": 1.259553178642284, "grad_norm": 0.58203125, "learning_rate": 6.079691841033643e-06, "loss": 1.5522, "step": 7301 }, { "epoch": 1.2597256965410162, "grad_norm": 0.703125, "learning_rate": 6.077187988093138e-06, "loss": 1.414, "step": 7302 }, { "epoch": 1.2598982144397481, "grad_norm": 0.58984375, "learning_rate": 6.074684425779063e-06, "loss": 1.4165, "step": 7303 }, { "epoch": 1.26007073233848, "grad_norm": 0.5625, "learning_rate": 6.072181154276888e-06, "loss": 1.3574, "step": 7304 }, { "epoch": 1.260243250237212, "grad_norm": 0.578125, "learning_rate": 6.069678173772079e-06, "loss": 1.3784, "step": 7305 }, { "epoch": 1.260415768135944, "grad_norm": 0.5546875, "learning_rate": 6.067175484450063e-06, "loss": 1.4636, "step": 7306 }, { "epoch": 1.2605882860346762, "grad_norm": 0.5859375, "learning_rate": 6.064673086496267e-06, "loss": 1.3886, "step": 7307 }, { "epoch": 1.2607608039334082, "grad_norm": 0.66796875, "learning_rate": 6.062170980096073e-06, "loss": 1.376, "step": 7308 }, { "epoch": 1.2609333218321401, "grad_norm": 0.625, "learning_rate": 6.05966916543486e-06, "loss": 1.3851, "step": 7309 }, { "epoch": 1.261105839730872, "grad_norm": 0.60546875, "learning_rate": 6.057167642697973e-06, "loss": 1.483, "step": 7310 }, { "epoch": 1.261278357629604, "grad_norm": 0.61328125, "learning_rate": 6.054666412070746e-06, "loss": 1.4069, "step": 7311 }, { "epoch": 1.261450875528336, "grad_norm": 0.63671875, "learning_rate": 6.0521654737384804e-06, "loss": 1.3993, "step": 7312 }, { "epoch": 1.261623393427068, "grad_norm": 0.578125, "learning_rate": 6.049664827886468e-06, "loss": 1.4738, "step": 7313 }, { "epoch": 1.2617959113258, "grad_norm": 0.73828125, "learning_rate": 6.047164474699962e-06, "loss": 1.5021, "step": 7314 }, { "epoch": 1.2619684292245321, "grad_norm": 0.625, "learning_rate": 6.044664414364214e-06, "loss": 1.4437, "step": 7315 }, { "epoch": 1.262140947123264, "grad_norm": 0.62890625, "learning_rate": 6.0421646470644394e-06, "loss": 1.4966, "step": 7316 }, { "epoch": 1.262313465021996, "grad_norm": 0.6484375, "learning_rate": 6.039665172985834e-06, "loss": 1.4586, "step": 7317 }, { "epoch": 1.262485982920728, "grad_norm": 0.59765625, "learning_rate": 6.0371659923135825e-06, "loss": 1.4142, "step": 7318 }, { "epoch": 1.26265850081946, "grad_norm": 0.58203125, "learning_rate": 6.0346671052328345e-06, "loss": 1.5371, "step": 7319 }, { "epoch": 1.2628310187181921, "grad_norm": 0.61328125, "learning_rate": 6.0321685119287245e-06, "loss": 1.4882, "step": 7320 }, { "epoch": 1.263003536616924, "grad_norm": 0.65625, "learning_rate": 6.0296702125863586e-06, "loss": 1.4118, "step": 7321 }, { "epoch": 1.263176054515656, "grad_norm": 0.6953125, "learning_rate": 6.027172207390836e-06, "loss": 1.4162, "step": 7322 }, { "epoch": 1.263348572414388, "grad_norm": 0.6015625, "learning_rate": 6.024674496527219e-06, "loss": 1.3656, "step": 7323 }, { "epoch": 1.26352109031312, "grad_norm": 0.6640625, "learning_rate": 6.022177080180553e-06, "loss": 1.4969, "step": 7324 }, { "epoch": 1.263693608211852, "grad_norm": 0.58203125, "learning_rate": 6.019679958535862e-06, "loss": 1.378, "step": 7325 }, { "epoch": 1.263866126110584, "grad_norm": 0.59765625, "learning_rate": 6.017183131778154e-06, "loss": 1.4315, "step": 7326 }, { "epoch": 1.2640386440093159, "grad_norm": 0.64453125, "learning_rate": 6.0146866000924035e-06, "loss": 1.4713, "step": 7327 }, { "epoch": 1.2642111619080478, "grad_norm": 0.6015625, "learning_rate": 6.012190363663571e-06, "loss": 1.3467, "step": 7328 }, { "epoch": 1.26438367980678, "grad_norm": 0.5703125, "learning_rate": 6.009694422676591e-06, "loss": 1.4483, "step": 7329 }, { "epoch": 1.264556197705512, "grad_norm": 0.7890625, "learning_rate": 6.007198777316385e-06, "loss": 1.4766, "step": 7330 }, { "epoch": 1.264728715604244, "grad_norm": 0.5859375, "learning_rate": 6.004703427767837e-06, "loss": 1.3881, "step": 7331 }, { "epoch": 1.264901233502976, "grad_norm": 0.6328125, "learning_rate": 6.002208374215829e-06, "loss": 1.4425, "step": 7332 }, { "epoch": 1.2650737514017079, "grad_norm": 0.63671875, "learning_rate": 5.999713616845197e-06, "loss": 1.4362, "step": 7333 }, { "epoch": 1.26524626930044, "grad_norm": 0.640625, "learning_rate": 5.997219155840777e-06, "loss": 1.5301, "step": 7334 }, { "epoch": 1.265418787199172, "grad_norm": 0.625, "learning_rate": 5.994724991387375e-06, "loss": 1.4292, "step": 7335 }, { "epoch": 1.265591305097904, "grad_norm": 0.63671875, "learning_rate": 5.992231123669771e-06, "loss": 1.4772, "step": 7336 }, { "epoch": 1.265763822996636, "grad_norm": 0.57421875, "learning_rate": 5.989737552872729e-06, "loss": 1.4154, "step": 7337 }, { "epoch": 1.2659363408953679, "grad_norm": 0.5703125, "learning_rate": 5.9872442791809815e-06, "loss": 1.4353, "step": 7338 }, { "epoch": 1.2661088587940998, "grad_norm": 0.62109375, "learning_rate": 5.984751302779255e-06, "loss": 1.4214, "step": 7339 }, { "epoch": 1.2662813766928318, "grad_norm": 0.60546875, "learning_rate": 5.982258623852239e-06, "loss": 1.4753, "step": 7340 }, { "epoch": 1.2664538945915638, "grad_norm": 0.6015625, "learning_rate": 5.979766242584608e-06, "loss": 1.5378, "step": 7341 }, { "epoch": 1.266626412490296, "grad_norm": 0.5390625, "learning_rate": 5.977274159161012e-06, "loss": 1.3289, "step": 7342 }, { "epoch": 1.266798930389028, "grad_norm": 0.58203125, "learning_rate": 5.974782373766084e-06, "loss": 1.405, "step": 7343 }, { "epoch": 1.2669714482877599, "grad_norm": 0.578125, "learning_rate": 5.972290886584426e-06, "loss": 1.4091, "step": 7344 }, { "epoch": 1.2671439661864918, "grad_norm": 0.60546875, "learning_rate": 5.969799697800629e-06, "loss": 1.3101, "step": 7345 }, { "epoch": 1.2673164840852238, "grad_norm": 0.5625, "learning_rate": 5.967308807599248e-06, "loss": 1.472, "step": 7346 }, { "epoch": 1.2674890019839558, "grad_norm": 0.6015625, "learning_rate": 5.964818216164832e-06, "loss": 1.5133, "step": 7347 }, { "epoch": 1.267661519882688, "grad_norm": 0.5859375, "learning_rate": 5.962327923681892e-06, "loss": 1.3993, "step": 7348 }, { "epoch": 1.26783403778142, "grad_norm": 0.6640625, "learning_rate": 5.959837930334926e-06, "loss": 1.4841, "step": 7349 }, { "epoch": 1.2680065556801519, "grad_norm": 0.56640625, "learning_rate": 5.9573482363084155e-06, "loss": 1.4165, "step": 7350 }, { "epoch": 1.2681790735788838, "grad_norm": 0.57421875, "learning_rate": 5.954858841786806e-06, "loss": 1.4749, "step": 7351 }, { "epoch": 1.2683515914776158, "grad_norm": 0.58984375, "learning_rate": 5.952369746954529e-06, "loss": 1.3882, "step": 7352 }, { "epoch": 1.2685241093763477, "grad_norm": 0.58984375, "learning_rate": 5.9498809519959875e-06, "loss": 1.5357, "step": 7353 }, { "epoch": 1.2686966272750797, "grad_norm": 0.54296875, "learning_rate": 5.9473924570955776e-06, "loss": 1.3692, "step": 7354 }, { "epoch": 1.2688691451738117, "grad_norm": 0.578125, "learning_rate": 5.944904262437653e-06, "loss": 1.3353, "step": 7355 }, { "epoch": 1.2690416630725438, "grad_norm": 0.58984375, "learning_rate": 5.94241636820656e-06, "loss": 1.4578, "step": 7356 }, { "epoch": 1.2692141809712758, "grad_norm": 0.58203125, "learning_rate": 5.939928774586612e-06, "loss": 1.4813, "step": 7357 }, { "epoch": 1.2693866988700078, "grad_norm": 0.58984375, "learning_rate": 5.937441481762112e-06, "loss": 1.464, "step": 7358 }, { "epoch": 1.2695592167687397, "grad_norm": 0.59765625, "learning_rate": 5.934954489917329e-06, "loss": 1.4479, "step": 7359 }, { "epoch": 1.2697317346674717, "grad_norm": 0.6328125, "learning_rate": 5.93246779923652e-06, "loss": 1.3553, "step": 7360 }, { "epoch": 1.2699042525662039, "grad_norm": 0.65625, "learning_rate": 5.929981409903907e-06, "loss": 1.4249, "step": 7361 }, { "epoch": 1.2700767704649358, "grad_norm": 0.60546875, "learning_rate": 5.927495322103707e-06, "loss": 1.4009, "step": 7362 }, { "epoch": 1.2702492883636678, "grad_norm": 0.66796875, "learning_rate": 5.925009536020094e-06, "loss": 1.4347, "step": 7363 }, { "epoch": 1.2704218062623998, "grad_norm": 0.578125, "learning_rate": 5.922524051837241e-06, "loss": 1.4666, "step": 7364 }, { "epoch": 1.2705943241611317, "grad_norm": 0.6875, "learning_rate": 5.9200388697392805e-06, "loss": 1.4344, "step": 7365 }, { "epoch": 1.2707668420598637, "grad_norm": 0.69140625, "learning_rate": 5.9175539899103315e-06, "loss": 1.351, "step": 7366 }, { "epoch": 1.2709393599585956, "grad_norm": 0.55859375, "learning_rate": 5.915069412534498e-06, "loss": 1.347, "step": 7367 }, { "epoch": 1.2711118778573276, "grad_norm": 0.671875, "learning_rate": 5.912585137795841e-06, "loss": 1.4984, "step": 7368 }, { "epoch": 1.2712843957560596, "grad_norm": 0.59375, "learning_rate": 5.910101165878419e-06, "loss": 1.4683, "step": 7369 }, { "epoch": 1.2714569136547917, "grad_norm": 0.56640625, "learning_rate": 5.907617496966254e-06, "loss": 1.3853, "step": 7370 }, { "epoch": 1.2716294315535237, "grad_norm": 0.59765625, "learning_rate": 5.90513413124336e-06, "loss": 1.4295, "step": 7371 }, { "epoch": 1.2718019494522557, "grad_norm": 0.609375, "learning_rate": 5.902651068893712e-06, "loss": 1.5122, "step": 7372 }, { "epoch": 1.2719744673509876, "grad_norm": 0.6484375, "learning_rate": 5.9001683101012775e-06, "loss": 1.3092, "step": 7373 }, { "epoch": 1.2721469852497196, "grad_norm": 0.63671875, "learning_rate": 5.897685855049986e-06, "loss": 1.4631, "step": 7374 }, { "epoch": 1.2723195031484518, "grad_norm": 0.60546875, "learning_rate": 5.895203703923764e-06, "loss": 1.4884, "step": 7375 }, { "epoch": 1.2724920210471837, "grad_norm": 0.60546875, "learning_rate": 5.892721856906498e-06, "loss": 1.3907, "step": 7376 }, { "epoch": 1.2726645389459157, "grad_norm": 0.5859375, "learning_rate": 5.890240314182061e-06, "loss": 1.4388, "step": 7377 }, { "epoch": 1.2728370568446477, "grad_norm": 0.6484375, "learning_rate": 5.887759075934297e-06, "loss": 1.4239, "step": 7378 }, { "epoch": 1.2730095747433796, "grad_norm": 0.6171875, "learning_rate": 5.8852781423470395e-06, "loss": 1.407, "step": 7379 }, { "epoch": 1.2731820926421116, "grad_norm": 0.62109375, "learning_rate": 5.882797513604085e-06, "loss": 1.4078, "step": 7380 }, { "epoch": 1.2733546105408435, "grad_norm": 0.6640625, "learning_rate": 5.880317189889213e-06, "loss": 1.528, "step": 7381 }, { "epoch": 1.2735271284395755, "grad_norm": 0.625, "learning_rate": 5.87783717138619e-06, "loss": 1.4042, "step": 7382 }, { "epoch": 1.2736996463383077, "grad_norm": 0.5703125, "learning_rate": 5.8753574582787435e-06, "loss": 1.325, "step": 7383 }, { "epoch": 1.2738721642370396, "grad_norm": 0.55859375, "learning_rate": 5.87287805075059e-06, "loss": 1.5028, "step": 7384 }, { "epoch": 1.2740446821357716, "grad_norm": 0.6953125, "learning_rate": 5.870398948985414e-06, "loss": 1.4158, "step": 7385 }, { "epoch": 1.2742172000345036, "grad_norm": 0.6796875, "learning_rate": 5.867920153166892e-06, "loss": 1.4555, "step": 7386 }, { "epoch": 1.2743897179332355, "grad_norm": 0.57421875, "learning_rate": 5.865441663478661e-06, "loss": 1.4297, "step": 7387 }, { "epoch": 1.2745622358319677, "grad_norm": 0.69140625, "learning_rate": 5.862963480104347e-06, "loss": 1.4885, "step": 7388 }, { "epoch": 1.2747347537306997, "grad_norm": 0.76171875, "learning_rate": 5.860485603227544e-06, "loss": 1.4481, "step": 7389 }, { "epoch": 1.2749072716294316, "grad_norm": 0.63671875, "learning_rate": 5.858008033031836e-06, "loss": 1.4486, "step": 7390 }, { "epoch": 1.2750797895281636, "grad_norm": 0.9453125, "learning_rate": 5.855530769700769e-06, "loss": 1.4396, "step": 7391 }, { "epoch": 1.2752523074268955, "grad_norm": 0.5625, "learning_rate": 5.853053813417883e-06, "loss": 1.5185, "step": 7392 }, { "epoch": 1.2754248253256275, "grad_norm": 0.5703125, "learning_rate": 5.85057716436668e-06, "loss": 1.3693, "step": 7393 }, { "epoch": 1.2755973432243595, "grad_norm": 0.609375, "learning_rate": 5.848100822730649e-06, "loss": 1.494, "step": 7394 }, { "epoch": 1.2757698611230914, "grad_norm": 0.59765625, "learning_rate": 5.8456247886932475e-06, "loss": 1.4265, "step": 7395 }, { "epoch": 1.2759423790218234, "grad_norm": 0.5625, "learning_rate": 5.843149062437923e-06, "loss": 1.4153, "step": 7396 }, { "epoch": 1.2761148969205556, "grad_norm": 0.6328125, "learning_rate": 5.840673644148087e-06, "loss": 1.4869, "step": 7397 }, { "epoch": 1.2762874148192875, "grad_norm": 0.62890625, "learning_rate": 5.838198534007138e-06, "loss": 1.4606, "step": 7398 }, { "epoch": 1.2764599327180195, "grad_norm": 0.60546875, "learning_rate": 5.835723732198444e-06, "loss": 1.4313, "step": 7399 }, { "epoch": 1.2766324506167515, "grad_norm": 0.6015625, "learning_rate": 5.833249238905357e-06, "loss": 1.446, "step": 7400 }, { "epoch": 1.2766324506167515, "eval_loss": 1.4078235626220703, "eval_runtime": 10.7712, "eval_samples_per_second": 95.069, "eval_steps_per_second": 23.767, "step": 7400 }, { "epoch": 1.2768049685154834, "grad_norm": 0.74609375, "learning_rate": 5.8307750543111996e-06, "loss": 1.41, "step": 7401 }, { "epoch": 1.2769774864142156, "grad_norm": 0.58984375, "learning_rate": 5.828301178599277e-06, "loss": 1.4281, "step": 7402 }, { "epoch": 1.2771500043129476, "grad_norm": 0.66015625, "learning_rate": 5.825827611952874e-06, "loss": 1.5065, "step": 7403 }, { "epoch": 1.2773225222116795, "grad_norm": 0.6171875, "learning_rate": 5.823354354555234e-06, "loss": 1.3859, "step": 7404 }, { "epoch": 1.2774950401104115, "grad_norm": 0.875, "learning_rate": 5.820881406589609e-06, "loss": 1.4492, "step": 7405 }, { "epoch": 1.2776675580091434, "grad_norm": 0.578125, "learning_rate": 5.818408768239197e-06, "loss": 1.4723, "step": 7406 }, { "epoch": 1.2778400759078754, "grad_norm": 0.5859375, "learning_rate": 5.815936439687192e-06, "loss": 1.3366, "step": 7407 }, { "epoch": 1.2780125938066074, "grad_norm": 0.609375, "learning_rate": 5.813464421116759e-06, "loss": 1.389, "step": 7408 }, { "epoch": 1.2781851117053393, "grad_norm": 0.5859375, "learning_rate": 5.810992712711039e-06, "loss": 1.4805, "step": 7409 }, { "epoch": 1.2783576296040713, "grad_norm": 0.6015625, "learning_rate": 5.808521314653155e-06, "loss": 1.4856, "step": 7410 }, { "epoch": 1.2785301475028035, "grad_norm": 0.625, "learning_rate": 5.806050227126203e-06, "loss": 1.4922, "step": 7411 }, { "epoch": 1.2787026654015354, "grad_norm": 0.6484375, "learning_rate": 5.803579450313249e-06, "loss": 1.4169, "step": 7412 }, { "epoch": 1.2788751833002674, "grad_norm": 0.55859375, "learning_rate": 5.801108984397355e-06, "loss": 1.4809, "step": 7413 }, { "epoch": 1.2790477011989994, "grad_norm": 0.61328125, "learning_rate": 5.798638829561545e-06, "loss": 1.4217, "step": 7414 }, { "epoch": 1.2792202190977313, "grad_norm": 0.609375, "learning_rate": 5.796168985988814e-06, "loss": 1.3787, "step": 7415 }, { "epoch": 1.2793927369964635, "grad_norm": 0.65625, "learning_rate": 5.793699453862161e-06, "loss": 1.5047, "step": 7416 }, { "epoch": 1.2795652548951955, "grad_norm": 0.5703125, "learning_rate": 5.791230233364529e-06, "loss": 1.4522, "step": 7417 }, { "epoch": 1.2797377727939274, "grad_norm": 0.80859375, "learning_rate": 5.788761324678859e-06, "loss": 1.3559, "step": 7418 }, { "epoch": 1.2799102906926594, "grad_norm": 0.62890625, "learning_rate": 5.7862927279880635e-06, "loss": 1.4391, "step": 7419 }, { "epoch": 1.2800828085913913, "grad_norm": 0.59375, "learning_rate": 5.78382444347503e-06, "loss": 1.4629, "step": 7420 }, { "epoch": 1.2802553264901233, "grad_norm": 0.63671875, "learning_rate": 5.781356471322628e-06, "loss": 1.4712, "step": 7421 }, { "epoch": 1.2804278443888553, "grad_norm": 0.62890625, "learning_rate": 5.7788888117136964e-06, "loss": 1.4271, "step": 7422 }, { "epoch": 1.2806003622875872, "grad_norm": 0.6015625, "learning_rate": 5.7764214648310564e-06, "loss": 1.4672, "step": 7423 }, { "epoch": 1.2807728801863194, "grad_norm": 0.63671875, "learning_rate": 5.773954430857509e-06, "loss": 1.3926, "step": 7424 }, { "epoch": 1.2809453980850514, "grad_norm": 0.55859375, "learning_rate": 5.771487709975814e-06, "loss": 1.3844, "step": 7425 }, { "epoch": 1.2811179159837833, "grad_norm": 0.578125, "learning_rate": 5.769021302368739e-06, "loss": 1.4494, "step": 7426 }, { "epoch": 1.2812904338825153, "grad_norm": 0.56640625, "learning_rate": 5.766555208218998e-06, "loss": 1.4814, "step": 7427 }, { "epoch": 1.2814629517812473, "grad_norm": 0.59765625, "learning_rate": 5.7640894277093e-06, "loss": 1.5364, "step": 7428 }, { "epoch": 1.2816354696799794, "grad_norm": 0.64453125, "learning_rate": 5.761623961022323e-06, "loss": 1.4008, "step": 7429 }, { "epoch": 1.2818079875787114, "grad_norm": 0.6171875, "learning_rate": 5.759158808340726e-06, "loss": 1.3162, "step": 7430 }, { "epoch": 1.2819805054774434, "grad_norm": 0.62109375, "learning_rate": 5.756693969847142e-06, "loss": 1.3761, "step": 7431 }, { "epoch": 1.2821530233761753, "grad_norm": 0.984375, "learning_rate": 5.754229445724184e-06, "loss": 1.4603, "step": 7432 }, { "epoch": 1.2823255412749073, "grad_norm": 1.3046875, "learning_rate": 5.751765236154436e-06, "loss": 1.4425, "step": 7433 }, { "epoch": 1.2824980591736392, "grad_norm": 0.609375, "learning_rate": 5.749301341320464e-06, "loss": 1.4418, "step": 7434 }, { "epoch": 1.2826705770723712, "grad_norm": 0.58203125, "learning_rate": 5.746837761404811e-06, "loss": 1.4066, "step": 7435 }, { "epoch": 1.2828430949711032, "grad_norm": 0.59375, "learning_rate": 5.744374496589985e-06, "loss": 1.3874, "step": 7436 }, { "epoch": 1.2830156128698351, "grad_norm": 0.59375, "learning_rate": 5.741911547058494e-06, "loss": 1.4895, "step": 7437 }, { "epoch": 1.2831881307685673, "grad_norm": 0.59375, "learning_rate": 5.739448912992798e-06, "loss": 1.4584, "step": 7438 }, { "epoch": 1.2833606486672993, "grad_norm": 0.5859375, "learning_rate": 5.736986594575347e-06, "loss": 1.3845, "step": 7439 }, { "epoch": 1.2835331665660312, "grad_norm": 0.61328125, "learning_rate": 5.734524591988566e-06, "loss": 1.3687, "step": 7440 }, { "epoch": 1.2837056844647632, "grad_norm": 0.58984375, "learning_rate": 5.732062905414855e-06, "loss": 1.4917, "step": 7441 }, { "epoch": 1.2838782023634951, "grad_norm": 0.546875, "learning_rate": 5.7296015350365905e-06, "loss": 1.4639, "step": 7442 }, { "epoch": 1.2840507202622273, "grad_norm": 0.64453125, "learning_rate": 5.727140481036133e-06, "loss": 1.4227, "step": 7443 }, { "epoch": 1.2842232381609593, "grad_norm": 0.5703125, "learning_rate": 5.7246797435957965e-06, "loss": 1.4035, "step": 7444 }, { "epoch": 1.2843957560596913, "grad_norm": 0.58984375, "learning_rate": 5.722219322897904e-06, "loss": 1.3356, "step": 7445 }, { "epoch": 1.2845682739584232, "grad_norm": 0.57421875, "learning_rate": 5.719759219124735e-06, "loss": 1.4335, "step": 7446 }, { "epoch": 1.2847407918571552, "grad_norm": 0.69140625, "learning_rate": 5.71729943245854e-06, "loss": 1.4359, "step": 7447 }, { "epoch": 1.2849133097558871, "grad_norm": 0.578125, "learning_rate": 5.714839963081571e-06, "loss": 1.383, "step": 7448 }, { "epoch": 1.285085827654619, "grad_norm": 0.58984375, "learning_rate": 5.712380811176024e-06, "loss": 1.4616, "step": 7449 }, { "epoch": 1.285258345553351, "grad_norm": 0.5703125, "learning_rate": 5.709921976924106e-06, "loss": 1.4082, "step": 7450 }, { "epoch": 1.285430863452083, "grad_norm": 0.58203125, "learning_rate": 5.707463460507967e-06, "loss": 1.5528, "step": 7451 }, { "epoch": 1.2856033813508152, "grad_norm": 0.6953125, "learning_rate": 5.705005262109759e-06, "loss": 1.4262, "step": 7452 }, { "epoch": 1.2857758992495472, "grad_norm": 0.859375, "learning_rate": 5.702547381911595e-06, "loss": 1.4013, "step": 7453 }, { "epoch": 1.2859484171482791, "grad_norm": 0.60546875, "learning_rate": 5.700089820095573e-06, "loss": 1.3427, "step": 7454 }, { "epoch": 1.286120935047011, "grad_norm": 0.5546875, "learning_rate": 5.697632576843766e-06, "loss": 1.3756, "step": 7455 }, { "epoch": 1.286293452945743, "grad_norm": 0.57421875, "learning_rate": 5.695175652338223e-06, "loss": 1.4234, "step": 7456 }, { "epoch": 1.2864659708444752, "grad_norm": 0.6328125, "learning_rate": 5.692719046760957e-06, "loss": 1.4469, "step": 7457 }, { "epoch": 1.2866384887432072, "grad_norm": 0.58203125, "learning_rate": 5.690262760293986e-06, "loss": 1.4501, "step": 7458 }, { "epoch": 1.2868110066419391, "grad_norm": 0.5625, "learning_rate": 5.687806793119273e-06, "loss": 1.3342, "step": 7459 }, { "epoch": 1.286983524540671, "grad_norm": 0.58984375, "learning_rate": 5.685351145418778e-06, "loss": 1.427, "step": 7460 }, { "epoch": 1.287156042439403, "grad_norm": 0.609375, "learning_rate": 5.682895817374429e-06, "loss": 1.4885, "step": 7461 }, { "epoch": 1.287328560338135, "grad_norm": 0.6015625, "learning_rate": 5.680440809168131e-06, "loss": 1.4662, "step": 7462 }, { "epoch": 1.287501078236867, "grad_norm": 0.56640625, "learning_rate": 5.677986120981769e-06, "loss": 1.4228, "step": 7463 }, { "epoch": 1.287673596135599, "grad_norm": 0.578125, "learning_rate": 5.675531752997201e-06, "loss": 1.4616, "step": 7464 }, { "epoch": 1.2878461140343311, "grad_norm": 0.56640625, "learning_rate": 5.67307770539626e-06, "loss": 1.3393, "step": 7465 }, { "epoch": 1.288018631933063, "grad_norm": 0.62109375, "learning_rate": 5.670623978360759e-06, "loss": 1.374, "step": 7466 }, { "epoch": 1.288191149831795, "grad_norm": 0.546875, "learning_rate": 5.668170572072489e-06, "loss": 1.4372, "step": 7467 }, { "epoch": 1.288363667730527, "grad_norm": 0.578125, "learning_rate": 5.6657174867132e-06, "loss": 1.4121, "step": 7468 }, { "epoch": 1.288536185629259, "grad_norm": 0.61328125, "learning_rate": 5.663264722464651e-06, "loss": 1.4619, "step": 7469 }, { "epoch": 1.2887087035279912, "grad_norm": 0.578125, "learning_rate": 5.6608122795085444e-06, "loss": 1.4514, "step": 7470 }, { "epoch": 1.2888812214267231, "grad_norm": 0.5546875, "learning_rate": 5.658360158026577e-06, "loss": 1.3991, "step": 7471 }, { "epoch": 1.289053739325455, "grad_norm": 0.66015625, "learning_rate": 5.655908358200415e-06, "loss": 1.4462, "step": 7472 }, { "epoch": 1.289226257224187, "grad_norm": 0.58203125, "learning_rate": 5.653456880211707e-06, "loss": 1.4311, "step": 7473 }, { "epoch": 1.289398775122919, "grad_norm": 0.59375, "learning_rate": 5.651005724242072e-06, "loss": 1.4442, "step": 7474 }, { "epoch": 1.289571293021651, "grad_norm": 0.60546875, "learning_rate": 5.648554890473108e-06, "loss": 1.5117, "step": 7475 }, { "epoch": 1.289743810920383, "grad_norm": 0.65625, "learning_rate": 5.646104379086381e-06, "loss": 1.4792, "step": 7476 }, { "epoch": 1.2899163288191149, "grad_norm": 0.625, "learning_rate": 5.64365419026345e-06, "loss": 1.4261, "step": 7477 }, { "epoch": 1.2900888467178468, "grad_norm": 0.58984375, "learning_rate": 5.64120432418584e-06, "loss": 1.3697, "step": 7478 }, { "epoch": 1.290261364616579, "grad_norm": 0.54296875, "learning_rate": 5.63875478103504e-06, "loss": 1.379, "step": 7479 }, { "epoch": 1.290433882515311, "grad_norm": 0.57421875, "learning_rate": 5.636305560992545e-06, "loss": 1.3918, "step": 7480 }, { "epoch": 1.290606400414043, "grad_norm": 0.671875, "learning_rate": 5.6338566642397915e-06, "loss": 1.4982, "step": 7481 }, { "epoch": 1.290778918312775, "grad_norm": 0.5859375, "learning_rate": 5.631408090958225e-06, "loss": 1.4698, "step": 7482 }, { "epoch": 1.2909514362115069, "grad_norm": 0.60546875, "learning_rate": 5.62895984132924e-06, "loss": 1.4574, "step": 7483 }, { "epoch": 1.291123954110239, "grad_norm": 0.63671875, "learning_rate": 5.626511915534221e-06, "loss": 1.3892, "step": 7484 }, { "epoch": 1.291296472008971, "grad_norm": 0.6015625, "learning_rate": 5.624064313754525e-06, "loss": 1.4437, "step": 7485 }, { "epoch": 1.291468989907703, "grad_norm": 0.60546875, "learning_rate": 5.621617036171487e-06, "loss": 1.3831, "step": 7486 }, { "epoch": 1.291641507806435, "grad_norm": 0.58203125, "learning_rate": 5.619170082966419e-06, "loss": 1.3423, "step": 7487 }, { "epoch": 1.291814025705167, "grad_norm": 0.54296875, "learning_rate": 5.616723454320605e-06, "loss": 1.4161, "step": 7488 }, { "epoch": 1.2919865436038989, "grad_norm": 0.59765625, "learning_rate": 5.6142771504152995e-06, "loss": 1.4779, "step": 7489 }, { "epoch": 1.2921590615026308, "grad_norm": 0.5859375, "learning_rate": 5.611831171431752e-06, "loss": 1.4647, "step": 7490 }, { "epoch": 1.2923315794013628, "grad_norm": 0.58203125, "learning_rate": 5.609385517551167e-06, "loss": 1.3995, "step": 7491 }, { "epoch": 1.292504097300095, "grad_norm": 0.62890625, "learning_rate": 5.606940188954737e-06, "loss": 1.5, "step": 7492 }, { "epoch": 1.292676615198827, "grad_norm": 0.59375, "learning_rate": 5.604495185823626e-06, "loss": 1.4466, "step": 7493 }, { "epoch": 1.292849133097559, "grad_norm": 0.609375, "learning_rate": 5.602050508338976e-06, "loss": 1.5505, "step": 7494 }, { "epoch": 1.2930216509962908, "grad_norm": 0.609375, "learning_rate": 5.599606156681905e-06, "loss": 1.3898, "step": 7495 }, { "epoch": 1.2931941688950228, "grad_norm": 0.5859375, "learning_rate": 5.597162131033505e-06, "loss": 1.4221, "step": 7496 }, { "epoch": 1.2933666867937548, "grad_norm": 0.57421875, "learning_rate": 5.594718431574844e-06, "loss": 1.3453, "step": 7497 }, { "epoch": 1.293539204692487, "grad_norm": 0.59765625, "learning_rate": 5.592275058486967e-06, "loss": 1.4244, "step": 7498 }, { "epoch": 1.293711722591219, "grad_norm": 0.58984375, "learning_rate": 5.589832011950897e-06, "loss": 1.3701, "step": 7499 }, { "epoch": 1.2938842404899509, "grad_norm": 0.671875, "learning_rate": 5.5873892921476215e-06, "loss": 1.4186, "step": 7500 }, { "epoch": 1.2938842404899509, "eval_loss": 1.4076613187789917, "eval_runtime": 10.8861, "eval_samples_per_second": 94.065, "eval_steps_per_second": 23.516, "step": 7500 }, { "epoch": 1.2940567583886828, "grad_norm": 0.58984375, "learning_rate": 5.584946899258125e-06, "loss": 1.3333, "step": 7501 }, { "epoch": 1.2942292762874148, "grad_norm": 0.578125, "learning_rate": 5.582504833463347e-06, "loss": 1.4381, "step": 7502 }, { "epoch": 1.2944017941861468, "grad_norm": 0.578125, "learning_rate": 5.58006309494421e-06, "loss": 1.4592, "step": 7503 }, { "epoch": 1.2945743120848787, "grad_norm": 0.57421875, "learning_rate": 5.577621683881618e-06, "loss": 1.4971, "step": 7504 }, { "epoch": 1.2947468299836107, "grad_norm": 0.59765625, "learning_rate": 5.5751806004564435e-06, "loss": 1.396, "step": 7505 }, { "epoch": 1.2949193478823429, "grad_norm": 0.5703125, "learning_rate": 5.572739844849537e-06, "loss": 1.4507, "step": 7506 }, { "epoch": 1.2950918657810748, "grad_norm": 0.57421875, "learning_rate": 5.57029941724173e-06, "loss": 1.3708, "step": 7507 }, { "epoch": 1.2952643836798068, "grad_norm": 0.6171875, "learning_rate": 5.5678593178138125e-06, "loss": 1.4396, "step": 7508 }, { "epoch": 1.2954369015785387, "grad_norm": 0.56640625, "learning_rate": 5.565419546746574e-06, "loss": 1.4847, "step": 7509 }, { "epoch": 1.2956094194772707, "grad_norm": 0.6796875, "learning_rate": 5.562980104220763e-06, "loss": 1.3488, "step": 7510 }, { "epoch": 1.295781937376003, "grad_norm": 0.56640625, "learning_rate": 5.560540990417111e-06, "loss": 1.3707, "step": 7511 }, { "epoch": 1.2959544552747349, "grad_norm": 0.58203125, "learning_rate": 5.558102205516325e-06, "loss": 1.4186, "step": 7512 }, { "epoch": 1.2961269731734668, "grad_norm": 0.625, "learning_rate": 5.555663749699074e-06, "loss": 1.3705, "step": 7513 }, { "epoch": 1.2962994910721988, "grad_norm": 0.59375, "learning_rate": 5.55322562314603e-06, "loss": 1.5207, "step": 7514 }, { "epoch": 1.2964720089709307, "grad_norm": 0.66015625, "learning_rate": 5.5507878260378115e-06, "loss": 1.4564, "step": 7515 }, { "epoch": 1.2966445268696627, "grad_norm": 0.578125, "learning_rate": 5.5483503585550326e-06, "loss": 1.4188, "step": 7516 }, { "epoch": 1.2968170447683947, "grad_norm": 0.5859375, "learning_rate": 5.545913220878272e-06, "loss": 1.3734, "step": 7517 }, { "epoch": 1.2969895626671266, "grad_norm": 0.640625, "learning_rate": 5.5434764131880915e-06, "loss": 1.3631, "step": 7518 }, { "epoch": 1.2971620805658586, "grad_norm": 0.5859375, "learning_rate": 5.541039935665025e-06, "loss": 1.4003, "step": 7519 }, { "epoch": 1.2973345984645908, "grad_norm": 0.56640625, "learning_rate": 5.538603788489584e-06, "loss": 1.454, "step": 7520 }, { "epoch": 1.2975071163633227, "grad_norm": 0.640625, "learning_rate": 5.5361679718422426e-06, "loss": 1.3602, "step": 7521 }, { "epoch": 1.2976796342620547, "grad_norm": 0.61328125, "learning_rate": 5.533732485903477e-06, "loss": 1.4205, "step": 7522 }, { "epoch": 1.2978521521607866, "grad_norm": 0.640625, "learning_rate": 5.531297330853711e-06, "loss": 1.425, "step": 7523 }, { "epoch": 1.2980246700595186, "grad_norm": 0.56640625, "learning_rate": 5.528862506873361e-06, "loss": 1.419, "step": 7524 }, { "epoch": 1.2981971879582508, "grad_norm": 0.671875, "learning_rate": 5.526428014142814e-06, "loss": 1.5736, "step": 7525 }, { "epoch": 1.2983697058569827, "grad_norm": 0.60546875, "learning_rate": 5.523993852842431e-06, "loss": 1.3834, "step": 7526 }, { "epoch": 1.2985422237557147, "grad_norm": 0.58984375, "learning_rate": 5.521560023152552e-06, "loss": 1.4649, "step": 7527 }, { "epoch": 1.2987147416544467, "grad_norm": 0.58203125, "learning_rate": 5.519126525253486e-06, "loss": 1.465, "step": 7528 }, { "epoch": 1.2988872595531786, "grad_norm": 0.6328125, "learning_rate": 5.516693359325528e-06, "loss": 1.4627, "step": 7529 }, { "epoch": 1.2990597774519106, "grad_norm": 0.6484375, "learning_rate": 5.514260525548938e-06, "loss": 1.3427, "step": 7530 }, { "epoch": 1.2992322953506426, "grad_norm": 1.4375, "learning_rate": 5.51182802410396e-06, "loss": 1.3953, "step": 7531 }, { "epoch": 1.2994048132493745, "grad_norm": 0.66015625, "learning_rate": 5.509395855170798e-06, "loss": 1.345, "step": 7532 }, { "epoch": 1.2995773311481067, "grad_norm": 0.62890625, "learning_rate": 5.506964018929657e-06, "loss": 1.3181, "step": 7533 }, { "epoch": 1.2997498490468387, "grad_norm": 0.5859375, "learning_rate": 5.5045325155606925e-06, "loss": 1.5654, "step": 7534 }, { "epoch": 1.2999223669455706, "grad_norm": 0.5859375, "learning_rate": 5.502101345244047e-06, "loss": 1.441, "step": 7535 }, { "epoch": 1.3000948848443026, "grad_norm": 0.5546875, "learning_rate": 5.499670508159838e-06, "loss": 1.4018, "step": 7536 }, { "epoch": 1.3002674027430345, "grad_norm": 0.58203125, "learning_rate": 5.497240004488158e-06, "loss": 1.3829, "step": 7537 }, { "epoch": 1.3004399206417665, "grad_norm": 1.0, "learning_rate": 5.494809834409071e-06, "loss": 1.4306, "step": 7538 }, { "epoch": 1.3006124385404987, "grad_norm": 0.59765625, "learning_rate": 5.492379998102627e-06, "loss": 1.4568, "step": 7539 }, { "epoch": 1.3007849564392306, "grad_norm": 0.578125, "learning_rate": 5.48995049574883e-06, "loss": 1.5109, "step": 7540 }, { "epoch": 1.3009574743379626, "grad_norm": 0.5703125, "learning_rate": 5.4875213275276875e-06, "loss": 1.3706, "step": 7541 }, { "epoch": 1.3011299922366946, "grad_norm": 0.6015625, "learning_rate": 5.485092493619153e-06, "loss": 1.3853, "step": 7542 }, { "epoch": 1.3013025101354265, "grad_norm": 0.58984375, "learning_rate": 5.482663994203179e-06, "loss": 1.426, "step": 7543 }, { "epoch": 1.3014750280341585, "grad_norm": 0.66796875, "learning_rate": 5.480235829459688e-06, "loss": 1.4036, "step": 7544 }, { "epoch": 1.3016475459328904, "grad_norm": 0.59375, "learning_rate": 5.477807999568558e-06, "loss": 1.4661, "step": 7545 }, { "epoch": 1.3018200638316224, "grad_norm": 0.56640625, "learning_rate": 5.475380504709678e-06, "loss": 1.3434, "step": 7546 }, { "epoch": 1.3019925817303546, "grad_norm": 0.5859375, "learning_rate": 5.472953345062875e-06, "loss": 1.3539, "step": 7547 }, { "epoch": 1.3021650996290866, "grad_norm": 0.63671875, "learning_rate": 5.470526520807975e-06, "loss": 1.4634, "step": 7548 }, { "epoch": 1.3023376175278185, "grad_norm": 0.6015625, "learning_rate": 5.4681000321247725e-06, "loss": 1.3594, "step": 7549 }, { "epoch": 1.3025101354265505, "grad_norm": 0.59375, "learning_rate": 5.465673879193035e-06, "loss": 1.5056, "step": 7550 }, { "epoch": 1.3026826533252824, "grad_norm": 0.61328125, "learning_rate": 5.46324806219251e-06, "loss": 1.3833, "step": 7551 }, { "epoch": 1.3028551712240146, "grad_norm": 0.5859375, "learning_rate": 5.460822581302918e-06, "loss": 1.3744, "step": 7552 }, { "epoch": 1.3030276891227466, "grad_norm": 0.67578125, "learning_rate": 5.458397436703944e-06, "loss": 1.4649, "step": 7553 }, { "epoch": 1.3032002070214785, "grad_norm": 0.58984375, "learning_rate": 5.455972628575272e-06, "loss": 1.4986, "step": 7554 }, { "epoch": 1.3033727249202105, "grad_norm": 0.67578125, "learning_rate": 5.453548157096538e-06, "loss": 1.5157, "step": 7555 }, { "epoch": 1.3035452428189425, "grad_norm": 0.58203125, "learning_rate": 5.4511240224473625e-06, "loss": 1.4927, "step": 7556 }, { "epoch": 1.3037177607176744, "grad_norm": 0.6171875, "learning_rate": 5.448700224807342e-06, "loss": 1.4667, "step": 7557 }, { "epoch": 1.3038902786164064, "grad_norm": 0.60546875, "learning_rate": 5.446276764356048e-06, "loss": 1.4254, "step": 7558 }, { "epoch": 1.3040627965151383, "grad_norm": 1.484375, "learning_rate": 5.443853641273024e-06, "loss": 1.338, "step": 7559 }, { "epoch": 1.3042353144138703, "grad_norm": 0.5703125, "learning_rate": 5.441430855737789e-06, "loss": 1.3658, "step": 7560 }, { "epoch": 1.3044078323126025, "grad_norm": 0.61328125, "learning_rate": 5.43900840792984e-06, "loss": 1.4133, "step": 7561 }, { "epoch": 1.3045803502113344, "grad_norm": 0.60546875, "learning_rate": 5.436586298028647e-06, "loss": 1.4209, "step": 7562 }, { "epoch": 1.3047528681100664, "grad_norm": 0.58203125, "learning_rate": 5.434164526213659e-06, "loss": 1.4423, "step": 7563 }, { "epoch": 1.3049253860087984, "grad_norm": 0.6171875, "learning_rate": 5.431743092664283e-06, "loss": 1.3585, "step": 7564 }, { "epoch": 1.3050979039075303, "grad_norm": 0.58984375, "learning_rate": 5.429321997559931e-06, "loss": 1.4644, "step": 7565 }, { "epoch": 1.3052704218062625, "grad_norm": 0.69140625, "learning_rate": 5.42690124107996e-06, "loss": 1.4258, "step": 7566 }, { "epoch": 1.3054429397049945, "grad_norm": 0.609375, "learning_rate": 5.4244808234037195e-06, "loss": 1.3962, "step": 7567 }, { "epoch": 1.3056154576037264, "grad_norm": 0.6328125, "learning_rate": 5.422060744710527e-06, "loss": 1.3899, "step": 7568 }, { "epoch": 1.3057879755024584, "grad_norm": 0.62890625, "learning_rate": 5.419641005179681e-06, "loss": 1.474, "step": 7569 }, { "epoch": 1.3059604934011904, "grad_norm": 0.81640625, "learning_rate": 5.417221604990448e-06, "loss": 1.5104, "step": 7570 }, { "epoch": 1.3061330112999223, "grad_norm": 0.6015625, "learning_rate": 5.414802544322072e-06, "loss": 1.5238, "step": 7571 }, { "epoch": 1.3063055291986543, "grad_norm": 0.60546875, "learning_rate": 5.412383823353774e-06, "loss": 1.3963, "step": 7572 }, { "epoch": 1.3064780470973862, "grad_norm": 0.5703125, "learning_rate": 5.409965442264751e-06, "loss": 1.3876, "step": 7573 }, { "epoch": 1.3066505649961184, "grad_norm": 0.578125, "learning_rate": 5.407547401234161e-06, "loss": 1.4604, "step": 7574 }, { "epoch": 1.3068230828948504, "grad_norm": 0.6875, "learning_rate": 5.405129700441157e-06, "loss": 1.5196, "step": 7575 }, { "epoch": 1.3069956007935823, "grad_norm": 0.6171875, "learning_rate": 5.402712340064859e-06, "loss": 1.3634, "step": 7576 }, { "epoch": 1.3071681186923143, "grad_norm": 0.64453125, "learning_rate": 5.400295320284348e-06, "loss": 1.5047, "step": 7577 }, { "epoch": 1.3073406365910463, "grad_norm": 0.58984375, "learning_rate": 5.397878641278709e-06, "loss": 1.3541, "step": 7578 }, { "epoch": 1.3075131544897785, "grad_norm": 0.578125, "learning_rate": 5.3954623032269705e-06, "loss": 1.4108, "step": 7579 }, { "epoch": 1.3076856723885104, "grad_norm": 0.671875, "learning_rate": 5.3930463063081564e-06, "loss": 1.4113, "step": 7580 }, { "epoch": 1.3078581902872424, "grad_norm": 0.6484375, "learning_rate": 5.390630650701257e-06, "loss": 1.4792, "step": 7581 }, { "epoch": 1.3080307081859743, "grad_norm": 0.6015625, "learning_rate": 5.388215336585239e-06, "loss": 1.4458, "step": 7582 }, { "epoch": 1.3082032260847063, "grad_norm": 0.6328125, "learning_rate": 5.385800364139044e-06, "loss": 1.4197, "step": 7583 }, { "epoch": 1.3083757439834383, "grad_norm": 0.5546875, "learning_rate": 5.383385733541594e-06, "loss": 1.3762, "step": 7584 }, { "epoch": 1.3085482618821702, "grad_norm": 0.61328125, "learning_rate": 5.380971444971766e-06, "loss": 1.4279, "step": 7585 }, { "epoch": 1.3087207797809022, "grad_norm": 0.58984375, "learning_rate": 5.3785574986084435e-06, "loss": 1.4792, "step": 7586 }, { "epoch": 1.3088932976796341, "grad_norm": 0.609375, "learning_rate": 5.376143894630454e-06, "loss": 1.4466, "step": 7587 }, { "epoch": 1.3090658155783663, "grad_norm": 0.546875, "learning_rate": 5.373730633216614e-06, "loss": 1.4329, "step": 7588 }, { "epoch": 1.3092383334770983, "grad_norm": 0.578125, "learning_rate": 5.3713177145457165e-06, "loss": 1.4848, "step": 7589 }, { "epoch": 1.3094108513758302, "grad_norm": 0.58203125, "learning_rate": 5.368905138796523e-06, "loss": 1.4444, "step": 7590 }, { "epoch": 1.3095833692745622, "grad_norm": 0.5859375, "learning_rate": 5.366492906147775e-06, "loss": 1.466, "step": 7591 }, { "epoch": 1.3097558871732942, "grad_norm": 0.5859375, "learning_rate": 5.364081016778182e-06, "loss": 1.406, "step": 7592 }, { "epoch": 1.3099284050720263, "grad_norm": 0.5625, "learning_rate": 5.361669470866435e-06, "loss": 1.3943, "step": 7593 }, { "epoch": 1.3101009229707583, "grad_norm": 0.69140625, "learning_rate": 5.359258268591195e-06, "loss": 1.4474, "step": 7594 }, { "epoch": 1.3102734408694903, "grad_norm": 0.625, "learning_rate": 5.356847410131103e-06, "loss": 1.4458, "step": 7595 }, { "epoch": 1.3104459587682222, "grad_norm": 0.62890625, "learning_rate": 5.354436895664759e-06, "loss": 1.4278, "step": 7596 }, { "epoch": 1.3106184766669542, "grad_norm": 0.55078125, "learning_rate": 5.352026725370763e-06, "loss": 1.4711, "step": 7597 }, { "epoch": 1.3107909945656862, "grad_norm": 0.5859375, "learning_rate": 5.3496168994276635e-06, "loss": 1.423, "step": 7598 }, { "epoch": 1.3109635124644181, "grad_norm": 0.609375, "learning_rate": 5.347207418014006e-06, "loss": 1.4974, "step": 7599 }, { "epoch": 1.31113603036315, "grad_norm": 0.75, "learning_rate": 5.344798281308295e-06, "loss": 1.3676, "step": 7600 }, { "epoch": 1.31113603036315, "eval_loss": 1.407556176185608, "eval_runtime": 10.9219, "eval_samples_per_second": 93.756, "eval_steps_per_second": 23.439, "step": 7600 }, { "epoch": 1.311308548261882, "grad_norm": 0.59765625, "learning_rate": 5.34238948948901e-06, "loss": 1.477, "step": 7601 }, { "epoch": 1.3114810661606142, "grad_norm": 0.578125, "learning_rate": 5.339981042734617e-06, "loss": 1.4956, "step": 7602 }, { "epoch": 1.3116535840593462, "grad_norm": 0.66796875, "learning_rate": 5.337572941223544e-06, "loss": 1.4456, "step": 7603 }, { "epoch": 1.3118261019580781, "grad_norm": 0.57421875, "learning_rate": 5.3351651851342e-06, "loss": 1.5027, "step": 7604 }, { "epoch": 1.31199861985681, "grad_norm": 0.578125, "learning_rate": 5.33275777464497e-06, "loss": 1.4509, "step": 7605 }, { "epoch": 1.312171137755542, "grad_norm": 0.59375, "learning_rate": 5.3303507099342e-06, "loss": 1.4749, "step": 7606 }, { "epoch": 1.3123436556542742, "grad_norm": 0.59765625, "learning_rate": 5.3279439911802286e-06, "loss": 1.4949, "step": 7607 }, { "epoch": 1.3125161735530062, "grad_norm": 0.83203125, "learning_rate": 5.325537618561364e-06, "loss": 1.5091, "step": 7608 }, { "epoch": 1.3126886914517382, "grad_norm": 0.578125, "learning_rate": 5.323131592255871e-06, "loss": 1.5434, "step": 7609 }, { "epoch": 1.3128612093504701, "grad_norm": 0.578125, "learning_rate": 5.3207259124420205e-06, "loss": 1.4441, "step": 7610 }, { "epoch": 1.313033727249202, "grad_norm": 0.73828125, "learning_rate": 5.318320579298028e-06, "loss": 1.4234, "step": 7611 }, { "epoch": 1.313206245147934, "grad_norm": 0.59765625, "learning_rate": 5.3159155930021e-06, "loss": 1.4529, "step": 7612 }, { "epoch": 1.313378763046666, "grad_norm": 0.62890625, "learning_rate": 5.313510953732411e-06, "loss": 1.4523, "step": 7613 }, { "epoch": 1.313551280945398, "grad_norm": 0.59765625, "learning_rate": 5.311106661667115e-06, "loss": 1.4488, "step": 7614 }, { "epoch": 1.3137237988441302, "grad_norm": 0.5703125, "learning_rate": 5.308702716984333e-06, "loss": 1.3764, "step": 7615 }, { "epoch": 1.3138963167428621, "grad_norm": 0.65234375, "learning_rate": 5.306299119862171e-06, "loss": 1.3542, "step": 7616 }, { "epoch": 1.314068834641594, "grad_norm": 0.5546875, "learning_rate": 5.30389587047869e-06, "loss": 1.4034, "step": 7617 }, { "epoch": 1.314241352540326, "grad_norm": 0.6328125, "learning_rate": 5.301492969011954e-06, "loss": 1.4876, "step": 7618 }, { "epoch": 1.314413870439058, "grad_norm": 0.59375, "learning_rate": 5.299090415639973e-06, "loss": 1.5078, "step": 7619 }, { "epoch": 1.3145863883377902, "grad_norm": 0.58984375, "learning_rate": 5.296688210540746e-06, "loss": 1.4435, "step": 7620 }, { "epoch": 1.3147589062365221, "grad_norm": 0.9375, "learning_rate": 5.294286353892243e-06, "loss": 1.5348, "step": 7621 }, { "epoch": 1.314931424135254, "grad_norm": 0.6328125, "learning_rate": 5.29188484587241e-06, "loss": 1.3537, "step": 7622 }, { "epoch": 1.315103942033986, "grad_norm": 0.609375, "learning_rate": 5.2894836866591655e-06, "loss": 1.4277, "step": 7623 }, { "epoch": 1.315276459932718, "grad_norm": 0.609375, "learning_rate": 5.287082876430403e-06, "loss": 1.3934, "step": 7624 }, { "epoch": 1.31544897783145, "grad_norm": 0.62890625, "learning_rate": 5.284682415363988e-06, "loss": 1.4736, "step": 7625 }, { "epoch": 1.315621495730182, "grad_norm": 0.66015625, "learning_rate": 5.2822823036377625e-06, "loss": 1.3568, "step": 7626 }, { "epoch": 1.315794013628914, "grad_norm": 0.5390625, "learning_rate": 5.279882541429544e-06, "loss": 1.36, "step": 7627 }, { "epoch": 1.3159665315276459, "grad_norm": 0.55859375, "learning_rate": 5.2774831289171136e-06, "loss": 1.4064, "step": 7628 }, { "epoch": 1.316139049426378, "grad_norm": 0.59765625, "learning_rate": 5.275084066278248e-06, "loss": 1.4406, "step": 7629 }, { "epoch": 1.31631156732511, "grad_norm": 0.6484375, "learning_rate": 5.27268535369067e-06, "loss": 1.4174, "step": 7630 }, { "epoch": 1.316484085223842, "grad_norm": 0.62890625, "learning_rate": 5.270286991332106e-06, "loss": 1.2914, "step": 7631 }, { "epoch": 1.316656603122574, "grad_norm": 0.6015625, "learning_rate": 5.267888979380229e-06, "loss": 1.3725, "step": 7632 }, { "epoch": 1.316829121021306, "grad_norm": 0.55078125, "learning_rate": 5.265491318012705e-06, "loss": 1.392, "step": 7633 }, { "epoch": 1.317001638920038, "grad_norm": 0.63671875, "learning_rate": 5.263094007407168e-06, "loss": 1.4334, "step": 7634 }, { "epoch": 1.31717415681877, "grad_norm": 0.5859375, "learning_rate": 5.2606970477412236e-06, "loss": 1.4093, "step": 7635 }, { "epoch": 1.317346674717502, "grad_norm": 0.58203125, "learning_rate": 5.258300439192454e-06, "loss": 1.4355, "step": 7636 }, { "epoch": 1.317519192616234, "grad_norm": 0.60546875, "learning_rate": 5.255904181938419e-06, "loss": 1.4449, "step": 7637 }, { "epoch": 1.317691710514966, "grad_norm": 0.56640625, "learning_rate": 5.253508276156638e-06, "loss": 1.447, "step": 7638 }, { "epoch": 1.3178642284136979, "grad_norm": 0.91796875, "learning_rate": 5.251112722024625e-06, "loss": 1.4952, "step": 7639 }, { "epoch": 1.3180367463124298, "grad_norm": 0.5859375, "learning_rate": 5.248717519719857e-06, "loss": 1.4281, "step": 7640 }, { "epoch": 1.3182092642111618, "grad_norm": 0.62890625, "learning_rate": 5.246322669419775e-06, "loss": 1.3986, "step": 7641 }, { "epoch": 1.3183817821098938, "grad_norm": 0.59375, "learning_rate": 5.2439281713018196e-06, "loss": 1.4189, "step": 7642 }, { "epoch": 1.318554300008626, "grad_norm": 0.55078125, "learning_rate": 5.24153402554338e-06, "loss": 1.4394, "step": 7643 }, { "epoch": 1.318726817907358, "grad_norm": 0.6484375, "learning_rate": 5.239140232321831e-06, "loss": 1.4017, "step": 7644 }, { "epoch": 1.3188993358060899, "grad_norm": 0.5703125, "learning_rate": 5.236746791814522e-06, "loss": 1.4656, "step": 7645 }, { "epoch": 1.3190718537048218, "grad_norm": 0.6640625, "learning_rate": 5.2343537041987715e-06, "loss": 1.4927, "step": 7646 }, { "epoch": 1.3192443716035538, "grad_norm": 0.6328125, "learning_rate": 5.231960969651876e-06, "loss": 1.4623, "step": 7647 }, { "epoch": 1.319416889502286, "grad_norm": 0.578125, "learning_rate": 5.2295685883511086e-06, "loss": 1.5245, "step": 7648 }, { "epoch": 1.319589407401018, "grad_norm": 0.6015625, "learning_rate": 5.227176560473698e-06, "loss": 1.3987, "step": 7649 }, { "epoch": 1.31976192529975, "grad_norm": 0.57421875, "learning_rate": 5.224784886196878e-06, "loss": 1.4556, "step": 7650 }, { "epoch": 1.3199344431984819, "grad_norm": 0.59765625, "learning_rate": 5.222393565697828e-06, "loss": 1.4211, "step": 7651 }, { "epoch": 1.3201069610972138, "grad_norm": 0.578125, "learning_rate": 5.2200025991537126e-06, "loss": 1.4429, "step": 7652 }, { "epoch": 1.3202794789959458, "grad_norm": 0.65234375, "learning_rate": 5.217611986741673e-06, "loss": 1.4242, "step": 7653 }, { "epoch": 1.3204519968946777, "grad_norm": 0.62890625, "learning_rate": 5.215221728638815e-06, "loss": 1.4632, "step": 7654 }, { "epoch": 1.3206245147934097, "grad_norm": 0.5859375, "learning_rate": 5.2128318250222355e-06, "loss": 1.4538, "step": 7655 }, { "epoch": 1.3207970326921419, "grad_norm": 0.57421875, "learning_rate": 5.210442276068981e-06, "loss": 1.4823, "step": 7656 }, { "epoch": 1.3209695505908738, "grad_norm": 0.68359375, "learning_rate": 5.208053081956091e-06, "loss": 1.3459, "step": 7657 }, { "epoch": 1.3211420684896058, "grad_norm": 0.55859375, "learning_rate": 5.205664242860568e-06, "loss": 1.402, "step": 7658 }, { "epoch": 1.3213145863883378, "grad_norm": 0.59375, "learning_rate": 5.203275758959396e-06, "loss": 1.376, "step": 7659 }, { "epoch": 1.3214871042870697, "grad_norm": 0.57421875, "learning_rate": 5.200887630429528e-06, "loss": 1.3909, "step": 7660 }, { "epoch": 1.321659622185802, "grad_norm": 0.56640625, "learning_rate": 5.198499857447894e-06, "loss": 1.4473, "step": 7661 }, { "epoch": 1.3218321400845339, "grad_norm": 0.671875, "learning_rate": 5.196112440191383e-06, "loss": 1.3559, "step": 7662 }, { "epoch": 1.3220046579832658, "grad_norm": 0.5703125, "learning_rate": 5.193725378836886e-06, "loss": 1.4613, "step": 7663 }, { "epoch": 1.3221771758819978, "grad_norm": 0.56640625, "learning_rate": 5.19133867356124e-06, "loss": 1.3656, "step": 7664 }, { "epoch": 1.3223496937807298, "grad_norm": 0.58984375, "learning_rate": 5.188952324541272e-06, "loss": 1.4035, "step": 7665 }, { "epoch": 1.3225222116794617, "grad_norm": 0.5859375, "learning_rate": 5.1865663319537764e-06, "loss": 1.4547, "step": 7666 }, { "epoch": 1.3226947295781937, "grad_norm": 0.6484375, "learning_rate": 5.184180695975522e-06, "loss": 1.4893, "step": 7667 }, { "epoch": 1.3228672474769256, "grad_norm": 0.5625, "learning_rate": 5.181795416783253e-06, "loss": 1.4425, "step": 7668 }, { "epoch": 1.3230397653756576, "grad_norm": 0.59765625, "learning_rate": 5.1794104945536886e-06, "loss": 1.4272, "step": 7669 }, { "epoch": 1.3232122832743898, "grad_norm": 2.5, "learning_rate": 5.1770259294635075e-06, "loss": 1.4514, "step": 7670 }, { "epoch": 1.3233848011731217, "grad_norm": 0.7890625, "learning_rate": 5.1746417216893845e-06, "loss": 1.4506, "step": 7671 }, { "epoch": 1.3235573190718537, "grad_norm": 0.5859375, "learning_rate": 5.172257871407957e-06, "loss": 1.4556, "step": 7672 }, { "epoch": 1.3237298369705857, "grad_norm": 0.58203125, "learning_rate": 5.169874378795824e-06, "loss": 1.3554, "step": 7673 }, { "epoch": 1.3239023548693176, "grad_norm": 0.640625, "learning_rate": 5.167491244029584e-06, "loss": 1.4715, "step": 7674 }, { "epoch": 1.3240748727680498, "grad_norm": 3.78125, "learning_rate": 5.165108467285784e-06, "loss": 1.4466, "step": 7675 }, { "epoch": 1.3242473906667818, "grad_norm": 0.70703125, "learning_rate": 5.16272604874096e-06, "loss": 1.4296, "step": 7676 }, { "epoch": 1.3244199085655137, "grad_norm": 0.59765625, "learning_rate": 5.160343988571614e-06, "loss": 1.4277, "step": 7677 }, { "epoch": 1.3245924264642457, "grad_norm": 0.625, "learning_rate": 5.157962286954224e-06, "loss": 1.5272, "step": 7678 }, { "epoch": 1.3247649443629776, "grad_norm": 0.6171875, "learning_rate": 5.155580944065244e-06, "loss": 1.5264, "step": 7679 }, { "epoch": 1.3249374622617096, "grad_norm": 0.62109375, "learning_rate": 5.153199960081099e-06, "loss": 1.3906, "step": 7680 }, { "epoch": 1.3251099801604416, "grad_norm": 0.65234375, "learning_rate": 5.150819335178179e-06, "loss": 1.4098, "step": 7681 }, { "epoch": 1.3252824980591735, "grad_norm": 0.73828125, "learning_rate": 5.148439069532868e-06, "loss": 1.3736, "step": 7682 }, { "epoch": 1.3254550159579057, "grad_norm": 0.66796875, "learning_rate": 5.1460591633215015e-06, "loss": 1.36, "step": 7683 }, { "epoch": 1.3256275338566377, "grad_norm": 0.60546875, "learning_rate": 5.143679616720401e-06, "loss": 1.4018, "step": 7684 }, { "epoch": 1.3258000517553696, "grad_norm": 0.58203125, "learning_rate": 5.141300429905858e-06, "loss": 1.4803, "step": 7685 }, { "epoch": 1.3259725696541016, "grad_norm": 0.55859375, "learning_rate": 5.1389216030541345e-06, "loss": 1.3943, "step": 7686 }, { "epoch": 1.3261450875528336, "grad_norm": 0.546875, "learning_rate": 5.1365431363414784e-06, "loss": 1.3702, "step": 7687 }, { "epoch": 1.3263176054515655, "grad_norm": 0.58984375, "learning_rate": 5.134165029944094e-06, "loss": 1.5292, "step": 7688 }, { "epoch": 1.3264901233502977, "grad_norm": 0.60546875, "learning_rate": 5.1317872840381645e-06, "loss": 1.4318, "step": 7689 }, { "epoch": 1.3266626412490297, "grad_norm": 0.62109375, "learning_rate": 5.129409898799852e-06, "loss": 1.4077, "step": 7690 }, { "epoch": 1.3268351591477616, "grad_norm": 0.57421875, "learning_rate": 5.1270328744052864e-06, "loss": 1.4297, "step": 7691 }, { "epoch": 1.3270076770464936, "grad_norm": 0.640625, "learning_rate": 5.124656211030574e-06, "loss": 1.4418, "step": 7692 }, { "epoch": 1.3271801949452255, "grad_norm": 0.6015625, "learning_rate": 5.122279908851796e-06, "loss": 1.4186, "step": 7693 }, { "epoch": 1.3273527128439575, "grad_norm": 0.65625, "learning_rate": 5.119903968044992e-06, "loss": 1.4164, "step": 7694 }, { "epoch": 1.3275252307426895, "grad_norm": 0.59765625, "learning_rate": 5.117528388786201e-06, "loss": 1.4222, "step": 7695 }, { "epoch": 1.3276977486414214, "grad_norm": 0.58984375, "learning_rate": 5.1151531712514115e-06, "loss": 1.3717, "step": 7696 }, { "epoch": 1.3278702665401536, "grad_norm": 0.58203125, "learning_rate": 5.112778315616596e-06, "loss": 1.4239, "step": 7697 }, { "epoch": 1.3280427844388856, "grad_norm": 0.65234375, "learning_rate": 5.1104038220577e-06, "loss": 1.4251, "step": 7698 }, { "epoch": 1.3282153023376175, "grad_norm": 0.6015625, "learning_rate": 5.108029690750641e-06, "loss": 1.3776, "step": 7699 }, { "epoch": 1.3283878202363495, "grad_norm": 0.60546875, "learning_rate": 5.105655921871309e-06, "loss": 1.4722, "step": 7700 }, { "epoch": 1.3283878202363495, "eval_loss": 1.4075722694396973, "eval_runtime": 11.0299, "eval_samples_per_second": 92.839, "eval_steps_per_second": 23.21, "step": 7700 }, { "epoch": 1.3285603381350815, "grad_norm": 0.5625, "learning_rate": 5.10328251559557e-06, "loss": 1.4283, "step": 7701 }, { "epoch": 1.3287328560338136, "grad_norm": 0.62109375, "learning_rate": 5.100909472099251e-06, "loss": 1.4533, "step": 7702 }, { "epoch": 1.3289053739325456, "grad_norm": 0.58984375, "learning_rate": 5.098536791558175e-06, "loss": 1.2669, "step": 7703 }, { "epoch": 1.3290778918312776, "grad_norm": 0.55859375, "learning_rate": 5.096164474148122e-06, "loss": 1.3087, "step": 7704 }, { "epoch": 1.3292504097300095, "grad_norm": 0.66796875, "learning_rate": 5.093792520044837e-06, "loss": 1.3846, "step": 7705 }, { "epoch": 1.3294229276287415, "grad_norm": 0.5625, "learning_rate": 5.091420929424065e-06, "loss": 1.4604, "step": 7706 }, { "epoch": 1.3295954455274734, "grad_norm": 0.58203125, "learning_rate": 5.089049702461497e-06, "loss": 1.4529, "step": 7707 }, { "epoch": 1.3297679634262054, "grad_norm": 0.578125, "learning_rate": 5.086678839332813e-06, "loss": 1.4119, "step": 7708 }, { "epoch": 1.3299404813249374, "grad_norm": 0.63671875, "learning_rate": 5.084308340213661e-06, "loss": 1.4156, "step": 7709 }, { "epoch": 1.3301129992236693, "grad_norm": 0.6015625, "learning_rate": 5.08193820527966e-06, "loss": 1.5028, "step": 7710 }, { "epoch": 1.3302855171224015, "grad_norm": 0.63671875, "learning_rate": 5.079568434706408e-06, "loss": 1.4327, "step": 7711 }, { "epoch": 1.3304580350211335, "grad_norm": 0.640625, "learning_rate": 5.0771990286694725e-06, "loss": 1.5038, "step": 7712 }, { "epoch": 1.3306305529198654, "grad_norm": 0.5703125, "learning_rate": 5.0748299873443855e-06, "loss": 1.4443, "step": 7713 }, { "epoch": 1.3308030708185974, "grad_norm": 0.59765625, "learning_rate": 5.072461310906675e-06, "loss": 1.4156, "step": 7714 }, { "epoch": 1.3309755887173293, "grad_norm": 0.61328125, "learning_rate": 5.07009299953181e-06, "loss": 1.4279, "step": 7715 }, { "epoch": 1.3311481066160615, "grad_norm": 0.64453125, "learning_rate": 5.0677250533952695e-06, "loss": 1.5014, "step": 7716 }, { "epoch": 1.3313206245147935, "grad_norm": 0.58984375, "learning_rate": 5.065357472672469e-06, "loss": 1.4707, "step": 7717 }, { "epoch": 1.3314931424135255, "grad_norm": 0.578125, "learning_rate": 5.0629902575388165e-06, "loss": 1.4062, "step": 7718 }, { "epoch": 1.3316656603122574, "grad_norm": 0.5625, "learning_rate": 5.060623408169703e-06, "loss": 1.4613, "step": 7719 }, { "epoch": 1.3318381782109894, "grad_norm": 0.60546875, "learning_rate": 5.058256924740463e-06, "loss": 1.4527, "step": 7720 }, { "epoch": 1.3320106961097213, "grad_norm": 0.70703125, "learning_rate": 5.0558908074264315e-06, "loss": 1.5059, "step": 7721 }, { "epoch": 1.3321832140084533, "grad_norm": 0.74609375, "learning_rate": 5.053525056402898e-06, "loss": 1.4908, "step": 7722 }, { "epoch": 1.3323557319071853, "grad_norm": 0.69140625, "learning_rate": 5.051159671845136e-06, "loss": 1.529, "step": 7723 }, { "epoch": 1.3325282498059174, "grad_norm": 0.61328125, "learning_rate": 5.048794653928389e-06, "loss": 1.4887, "step": 7724 }, { "epoch": 1.3327007677046494, "grad_norm": 0.62109375, "learning_rate": 5.046430002827874e-06, "loss": 1.3807, "step": 7725 }, { "epoch": 1.3328732856033814, "grad_norm": 0.69140625, "learning_rate": 5.044065718718766e-06, "loss": 1.3835, "step": 7726 }, { "epoch": 1.3330458035021133, "grad_norm": 0.5546875, "learning_rate": 5.041701801776244e-06, "loss": 1.3587, "step": 7727 }, { "epoch": 1.3332183214008453, "grad_norm": 0.578125, "learning_rate": 5.039338252175431e-06, "loss": 1.4684, "step": 7728 }, { "epoch": 1.3333908392995775, "grad_norm": 0.71484375, "learning_rate": 5.0369750700914345e-06, "loss": 1.3529, "step": 7729 }, { "epoch": 1.3335633571983094, "grad_norm": 0.578125, "learning_rate": 5.034612255699336e-06, "loss": 1.448, "step": 7730 }, { "epoch": 1.3337358750970414, "grad_norm": 0.68359375, "learning_rate": 5.032249809174187e-06, "loss": 1.361, "step": 7731 }, { "epoch": 1.3339083929957734, "grad_norm": 0.6328125, "learning_rate": 5.029887730691011e-06, "loss": 1.3377, "step": 7732 }, { "epoch": 1.3340809108945053, "grad_norm": 0.578125, "learning_rate": 5.027526020424811e-06, "loss": 1.4446, "step": 7733 }, { "epoch": 1.3342534287932373, "grad_norm": 0.6640625, "learning_rate": 5.025164678550545e-06, "loss": 1.3311, "step": 7734 }, { "epoch": 1.3344259466919692, "grad_norm": 0.6328125, "learning_rate": 5.0228037052431685e-06, "loss": 1.4285, "step": 7735 }, { "epoch": 1.3345984645907012, "grad_norm": 0.609375, "learning_rate": 5.020443100677595e-06, "loss": 1.3957, "step": 7736 }, { "epoch": 1.3347709824894332, "grad_norm": 0.59375, "learning_rate": 5.018082865028704e-06, "loss": 1.4813, "step": 7737 }, { "epoch": 1.3349435003881653, "grad_norm": 0.59375, "learning_rate": 5.01572299847137e-06, "loss": 1.3646, "step": 7738 }, { "epoch": 1.3351160182868973, "grad_norm": 0.625, "learning_rate": 5.013363501180415e-06, "loss": 1.5413, "step": 7739 }, { "epoch": 1.3352885361856293, "grad_norm": 0.65625, "learning_rate": 5.01100437333065e-06, "loss": 1.4785, "step": 7740 }, { "epoch": 1.3354610540843612, "grad_norm": 0.578125, "learning_rate": 5.008645615096855e-06, "loss": 1.4102, "step": 7741 }, { "epoch": 1.3356335719830932, "grad_norm": 0.56640625, "learning_rate": 5.006287226653779e-06, "loss": 1.4611, "step": 7742 }, { "epoch": 1.3358060898818254, "grad_norm": 0.68359375, "learning_rate": 5.003929208176148e-06, "loss": 1.4491, "step": 7743 }, { "epoch": 1.3359786077805573, "grad_norm": 0.6484375, "learning_rate": 5.00157155983866e-06, "loss": 1.5095, "step": 7744 }, { "epoch": 1.3361511256792893, "grad_norm": 0.5859375, "learning_rate": 4.999214281815977e-06, "loss": 1.496, "step": 7745 }, { "epoch": 1.3363236435780212, "grad_norm": 0.6015625, "learning_rate": 4.996857374282754e-06, "loss": 1.4236, "step": 7746 }, { "epoch": 1.3364961614767532, "grad_norm": 0.5703125, "learning_rate": 4.99450083741359e-06, "loss": 1.2766, "step": 7747 }, { "epoch": 1.3366686793754852, "grad_norm": 0.59375, "learning_rate": 4.992144671383087e-06, "loss": 1.3265, "step": 7748 }, { "epoch": 1.3368411972742171, "grad_norm": 0.61328125, "learning_rate": 4.989788876365793e-06, "loss": 1.4192, "step": 7749 }, { "epoch": 1.337013715172949, "grad_norm": 0.62109375, "learning_rate": 4.987433452536244e-06, "loss": 1.476, "step": 7750 }, { "epoch": 1.337186233071681, "grad_norm": 0.58203125, "learning_rate": 4.985078400068947e-06, "loss": 1.4746, "step": 7751 }, { "epoch": 1.3373587509704132, "grad_norm": 0.5703125, "learning_rate": 4.982723719138375e-06, "loss": 1.4289, "step": 7752 }, { "epoch": 1.3375312688691452, "grad_norm": 1.375, "learning_rate": 4.980369409918979e-06, "loss": 1.3759, "step": 7753 }, { "epoch": 1.3377037867678772, "grad_norm": 0.609375, "learning_rate": 4.978015472585183e-06, "loss": 1.4064, "step": 7754 }, { "epoch": 1.3378763046666091, "grad_norm": 0.5859375, "learning_rate": 4.975661907311377e-06, "loss": 1.4212, "step": 7755 }, { "epoch": 1.338048822565341, "grad_norm": 0.55859375, "learning_rate": 4.973308714271933e-06, "loss": 1.4666, "step": 7756 }, { "epoch": 1.3382213404640733, "grad_norm": 0.59375, "learning_rate": 4.97095589364119e-06, "loss": 1.4525, "step": 7757 }, { "epoch": 1.3383938583628052, "grad_norm": 0.7734375, "learning_rate": 4.96860344559345e-06, "loss": 1.4663, "step": 7758 }, { "epoch": 1.3385663762615372, "grad_norm": 0.5546875, "learning_rate": 4.966251370303011e-06, "loss": 1.3687, "step": 7759 }, { "epoch": 1.3387388941602691, "grad_norm": 0.5625, "learning_rate": 4.963899667944121e-06, "loss": 1.415, "step": 7760 }, { "epoch": 1.338911412059001, "grad_norm": 0.6171875, "learning_rate": 4.961548338691009e-06, "loss": 1.45, "step": 7761 }, { "epoch": 1.339083929957733, "grad_norm": 0.55078125, "learning_rate": 4.959197382717878e-06, "loss": 1.4356, "step": 7762 }, { "epoch": 1.339256447856465, "grad_norm": 0.62109375, "learning_rate": 4.956846800198902e-06, "loss": 1.4391, "step": 7763 }, { "epoch": 1.339428965755197, "grad_norm": 0.5703125, "learning_rate": 4.954496591308227e-06, "loss": 1.4465, "step": 7764 }, { "epoch": 1.3396014836539292, "grad_norm": 0.6171875, "learning_rate": 4.952146756219972e-06, "loss": 1.3614, "step": 7765 }, { "epoch": 1.3397740015526611, "grad_norm": 0.63671875, "learning_rate": 4.949797295108218e-06, "loss": 1.3784, "step": 7766 }, { "epoch": 1.339946519451393, "grad_norm": 0.5625, "learning_rate": 4.947448208147041e-06, "loss": 1.424, "step": 7767 }, { "epoch": 1.340119037350125, "grad_norm": 0.5859375, "learning_rate": 4.9450994955104736e-06, "loss": 1.4743, "step": 7768 }, { "epoch": 1.340291555248857, "grad_norm": 0.7109375, "learning_rate": 4.9427511573725125e-06, "loss": 1.3808, "step": 7769 }, { "epoch": 1.3404640731475892, "grad_norm": 0.578125, "learning_rate": 4.940403193907153e-06, "loss": 1.3319, "step": 7770 }, { "epoch": 1.3406365910463212, "grad_norm": 0.55859375, "learning_rate": 4.938055605288334e-06, "loss": 1.5072, "step": 7771 }, { "epoch": 1.3408091089450531, "grad_norm": 1.0234375, "learning_rate": 4.935708391689985e-06, "loss": 1.4005, "step": 7772 }, { "epoch": 1.340981626843785, "grad_norm": 0.58203125, "learning_rate": 4.9333615532860005e-06, "loss": 1.4497, "step": 7773 }, { "epoch": 1.341154144742517, "grad_norm": 0.6015625, "learning_rate": 4.931015090250251e-06, "loss": 1.4624, "step": 7774 }, { "epoch": 1.341326662641249, "grad_norm": 1.140625, "learning_rate": 4.928669002756576e-06, "loss": 1.4006, "step": 7775 }, { "epoch": 1.341499180539981, "grad_norm": 0.66015625, "learning_rate": 4.926323290978787e-06, "loss": 1.5053, "step": 7776 }, { "epoch": 1.341671698438713, "grad_norm": 0.61328125, "learning_rate": 4.923977955090672e-06, "loss": 1.515, "step": 7777 }, { "epoch": 1.3418442163374449, "grad_norm": 0.578125, "learning_rate": 4.9216329952659895e-06, "loss": 1.385, "step": 7778 }, { "epoch": 1.342016734236177, "grad_norm": 0.58203125, "learning_rate": 4.919288411678459e-06, "loss": 1.4394, "step": 7779 }, { "epoch": 1.342189252134909, "grad_norm": 0.6484375, "learning_rate": 4.916944204501796e-06, "loss": 1.4153, "step": 7780 }, { "epoch": 1.342361770033641, "grad_norm": 0.5703125, "learning_rate": 4.914600373909662e-06, "loss": 1.356, "step": 7781 }, { "epoch": 1.342534287932373, "grad_norm": 0.62109375, "learning_rate": 4.912256920075708e-06, "loss": 1.4713, "step": 7782 }, { "epoch": 1.342706805831105, "grad_norm": 0.671875, "learning_rate": 4.909913843173552e-06, "loss": 1.3787, "step": 7783 }, { "epoch": 1.342879323729837, "grad_norm": 0.69921875, "learning_rate": 4.907571143376782e-06, "loss": 1.4151, "step": 7784 }, { "epoch": 1.343051841628569, "grad_norm": 0.73828125, "learning_rate": 4.905228820858959e-06, "loss": 1.5189, "step": 7785 }, { "epoch": 1.343224359527301, "grad_norm": 0.5625, "learning_rate": 4.902886875793621e-06, "loss": 1.3454, "step": 7786 }, { "epoch": 1.343396877426033, "grad_norm": 0.5859375, "learning_rate": 4.900545308354271e-06, "loss": 1.4785, "step": 7787 }, { "epoch": 1.343569395324765, "grad_norm": 0.5625, "learning_rate": 4.898204118714387e-06, "loss": 1.4196, "step": 7788 }, { "epoch": 1.343741913223497, "grad_norm": 0.5703125, "learning_rate": 4.895863307047423e-06, "loss": 1.4437, "step": 7789 }, { "epoch": 1.3439144311222289, "grad_norm": 0.5625, "learning_rate": 4.89352287352679e-06, "loss": 1.4041, "step": 7790 }, { "epoch": 1.3440869490209608, "grad_norm": 0.58984375, "learning_rate": 4.891182818325897e-06, "loss": 1.4244, "step": 7791 }, { "epoch": 1.3442594669196928, "grad_norm": 1.3359375, "learning_rate": 4.888843141618098e-06, "loss": 1.4494, "step": 7792 }, { "epoch": 1.344431984818425, "grad_norm": 0.55859375, "learning_rate": 4.886503843576736e-06, "loss": 1.4727, "step": 7793 }, { "epoch": 1.344604502717157, "grad_norm": 0.578125, "learning_rate": 4.884164924375119e-06, "loss": 1.3916, "step": 7794 }, { "epoch": 1.3447770206158889, "grad_norm": 0.58203125, "learning_rate": 4.881826384186529e-06, "loss": 1.3834, "step": 7795 }, { "epoch": 1.3449495385146208, "grad_norm": 0.59765625, "learning_rate": 4.879488223184221e-06, "loss": 1.3876, "step": 7796 }, { "epoch": 1.3451220564133528, "grad_norm": 0.58203125, "learning_rate": 4.877150441541424e-06, "loss": 1.3679, "step": 7797 }, { "epoch": 1.345294574312085, "grad_norm": 0.5546875, "learning_rate": 4.8748130394313234e-06, "loss": 1.4242, "step": 7798 }, { "epoch": 1.345467092210817, "grad_norm": 0.5625, "learning_rate": 4.8724760170271e-06, "loss": 1.4362, "step": 7799 }, { "epoch": 1.345639610109549, "grad_norm": 0.546875, "learning_rate": 4.870139374501895e-06, "loss": 1.3885, "step": 7800 }, { "epoch": 1.345639610109549, "eval_loss": 1.407551646232605, "eval_runtime": 10.8526, "eval_samples_per_second": 94.355, "eval_steps_per_second": 23.589, "step": 7800 }, { "epoch": 1.3458121280082809, "grad_norm": 0.578125, "learning_rate": 4.8678031120288115e-06, "loss": 1.452, "step": 7801 }, { "epoch": 1.3459846459070128, "grad_norm": 0.62109375, "learning_rate": 4.865467229780948e-06, "loss": 1.4555, "step": 7802 }, { "epoch": 1.3461571638057448, "grad_norm": 0.61328125, "learning_rate": 4.863131727931347e-06, "loss": 1.6427, "step": 7803 }, { "epoch": 1.3463296817044768, "grad_norm": 0.57421875, "learning_rate": 4.860796606653051e-06, "loss": 1.3934, "step": 7804 }, { "epoch": 1.3465021996032087, "grad_norm": 0.59375, "learning_rate": 4.858461866119051e-06, "loss": 1.5244, "step": 7805 }, { "epoch": 1.346674717501941, "grad_norm": 0.68359375, "learning_rate": 4.856127506502321e-06, "loss": 1.4147, "step": 7806 }, { "epoch": 1.3468472354006729, "grad_norm": 0.60546875, "learning_rate": 4.853793527975806e-06, "loss": 1.5024, "step": 7807 }, { "epoch": 1.3470197532994048, "grad_norm": 0.609375, "learning_rate": 4.85145993071242e-06, "loss": 1.4646, "step": 7808 }, { "epoch": 1.3471922711981368, "grad_norm": 0.62109375, "learning_rate": 4.849126714885053e-06, "loss": 1.4857, "step": 7809 }, { "epoch": 1.3473647890968687, "grad_norm": 0.5859375, "learning_rate": 4.846793880666567e-06, "loss": 1.3657, "step": 7810 }, { "epoch": 1.347537306995601, "grad_norm": 0.5703125, "learning_rate": 4.844461428229782e-06, "loss": 1.3712, "step": 7811 }, { "epoch": 1.3477098248943329, "grad_norm": 0.55859375, "learning_rate": 4.8421293577475145e-06, "loss": 1.385, "step": 7812 }, { "epoch": 1.3478823427930648, "grad_norm": 0.609375, "learning_rate": 4.839797669392528e-06, "loss": 1.4094, "step": 7813 }, { "epoch": 1.3480548606917968, "grad_norm": 0.609375, "learning_rate": 4.837466363337573e-06, "loss": 1.4854, "step": 7814 }, { "epoch": 1.3482273785905288, "grad_norm": 0.5546875, "learning_rate": 4.835135439755367e-06, "loss": 1.3394, "step": 7815 }, { "epoch": 1.3483998964892607, "grad_norm": 0.6484375, "learning_rate": 4.832804898818599e-06, "loss": 1.4, "step": 7816 }, { "epoch": 1.3485724143879927, "grad_norm": 0.5390625, "learning_rate": 4.8304747406999304e-06, "loss": 1.2918, "step": 7817 }, { "epoch": 1.3487449322867247, "grad_norm": 0.5703125, "learning_rate": 4.828144965571994e-06, "loss": 1.2981, "step": 7818 }, { "epoch": 1.3489174501854566, "grad_norm": 0.58984375, "learning_rate": 4.825815573607393e-06, "loss": 1.4247, "step": 7819 }, { "epoch": 1.3490899680841888, "grad_norm": 0.62890625, "learning_rate": 4.823486564978705e-06, "loss": 1.5022, "step": 7820 }, { "epoch": 1.3492624859829208, "grad_norm": 0.5703125, "learning_rate": 4.821157939858479e-06, "loss": 1.3706, "step": 7821 }, { "epoch": 1.3494350038816527, "grad_norm": 0.68359375, "learning_rate": 4.818829698419225e-06, "loss": 1.4452, "step": 7822 }, { "epoch": 1.3496075217803847, "grad_norm": 0.55859375, "learning_rate": 4.816501840833448e-06, "loss": 1.4214, "step": 7823 }, { "epoch": 1.3497800396791166, "grad_norm": 0.5859375, "learning_rate": 4.814174367273599e-06, "loss": 1.4343, "step": 7824 }, { "epoch": 1.3499525575778488, "grad_norm": 0.55078125, "learning_rate": 4.811847277912115e-06, "loss": 1.4916, "step": 7825 }, { "epoch": 1.3501250754765808, "grad_norm": 0.61328125, "learning_rate": 4.8095205729214015e-06, "loss": 1.5595, "step": 7826 }, { "epoch": 1.3502975933753127, "grad_norm": 0.578125, "learning_rate": 4.8071942524738355e-06, "loss": 1.4548, "step": 7827 }, { "epoch": 1.3504701112740447, "grad_norm": 0.6484375, "learning_rate": 4.8048683167417664e-06, "loss": 1.3287, "step": 7828 }, { "epoch": 1.3506426291727767, "grad_norm": 0.625, "learning_rate": 4.802542765897516e-06, "loss": 1.4488, "step": 7829 }, { "epoch": 1.3508151470715086, "grad_norm": 0.57421875, "learning_rate": 4.800217600113366e-06, "loss": 1.4453, "step": 7830 }, { "epoch": 1.3509876649702406, "grad_norm": 0.625, "learning_rate": 4.797892819561589e-06, "loss": 1.4369, "step": 7831 }, { "epoch": 1.3511601828689725, "grad_norm": 0.5546875, "learning_rate": 4.795568424414421e-06, "loss": 1.3552, "step": 7832 }, { "epoch": 1.3513327007677047, "grad_norm": 0.5703125, "learning_rate": 4.793244414844054e-06, "loss": 1.3656, "step": 7833 }, { "epoch": 1.3515052186664367, "grad_norm": 0.578125, "learning_rate": 4.790920791022682e-06, "loss": 1.4695, "step": 7834 }, { "epoch": 1.3516777365651687, "grad_norm": 0.63671875, "learning_rate": 4.788597553122438e-06, "loss": 1.405, "step": 7835 }, { "epoch": 1.3518502544639006, "grad_norm": 0.60546875, "learning_rate": 4.786274701315458e-06, "loss": 1.3898, "step": 7836 }, { "epoch": 1.3520227723626326, "grad_norm": 0.5859375, "learning_rate": 4.7839522357738196e-06, "loss": 1.5197, "step": 7837 }, { "epoch": 1.3521952902613645, "grad_norm": 1.46875, "learning_rate": 4.781630156669592e-06, "loss": 1.4992, "step": 7838 }, { "epoch": 1.3523678081600967, "grad_norm": 0.59375, "learning_rate": 4.7793084641748085e-06, "loss": 1.5419, "step": 7839 }, { "epoch": 1.3525403260588287, "grad_norm": 0.60546875, "learning_rate": 4.776987158461475e-06, "loss": 1.365, "step": 7840 }, { "epoch": 1.3527128439575606, "grad_norm": 0.62890625, "learning_rate": 4.774666239701566e-06, "loss": 1.4914, "step": 7841 }, { "epoch": 1.3528853618562926, "grad_norm": 0.61328125, "learning_rate": 4.772345708067035e-06, "loss": 1.4306, "step": 7842 }, { "epoch": 1.3530578797550246, "grad_norm": 0.59375, "learning_rate": 4.770025563729792e-06, "loss": 1.4419, "step": 7843 }, { "epoch": 1.3532303976537565, "grad_norm": 0.6015625, "learning_rate": 4.767705806861741e-06, "loss": 1.4563, "step": 7844 }, { "epoch": 1.3534029155524885, "grad_norm": 0.65234375, "learning_rate": 4.765386437634732e-06, "loss": 1.3103, "step": 7845 }, { "epoch": 1.3535754334512204, "grad_norm": 0.5859375, "learning_rate": 4.763067456220604e-06, "loss": 1.4048, "step": 7846 }, { "epoch": 1.3537479513499526, "grad_norm": 0.5625, "learning_rate": 4.760748862791159e-06, "loss": 1.3718, "step": 7847 }, { "epoch": 1.3539204692486846, "grad_norm": 0.62890625, "learning_rate": 4.758430657518176e-06, "loss": 1.4345, "step": 7848 }, { "epoch": 1.3540929871474165, "grad_norm": 0.5390625, "learning_rate": 4.7561128405734e-06, "loss": 1.3932, "step": 7849 }, { "epoch": 1.3542655050461485, "grad_norm": 0.9375, "learning_rate": 4.753795412128552e-06, "loss": 1.4566, "step": 7850 }, { "epoch": 1.3544380229448805, "grad_norm": 0.578125, "learning_rate": 4.751478372355317e-06, "loss": 1.3935, "step": 7851 }, { "epoch": 1.3546105408436127, "grad_norm": 0.6015625, "learning_rate": 4.749161721425359e-06, "loss": 1.4629, "step": 7852 }, { "epoch": 1.3547830587423446, "grad_norm": 0.65625, "learning_rate": 4.746845459510314e-06, "loss": 1.4346, "step": 7853 }, { "epoch": 1.3549555766410766, "grad_norm": 0.6328125, "learning_rate": 4.744529586781773e-06, "loss": 1.3455, "step": 7854 }, { "epoch": 1.3551280945398085, "grad_norm": 0.66796875, "learning_rate": 4.742214103411325e-06, "loss": 1.5296, "step": 7855 }, { "epoch": 1.3553006124385405, "grad_norm": 0.57421875, "learning_rate": 4.739899009570506e-06, "loss": 1.4611, "step": 7856 }, { "epoch": 1.3554731303372725, "grad_norm": 0.6328125, "learning_rate": 4.7375843054308335e-06, "loss": 1.4042, "step": 7857 }, { "epoch": 1.3556456482360044, "grad_norm": 0.58203125, "learning_rate": 4.735269991163798e-06, "loss": 1.4087, "step": 7858 }, { "epoch": 1.3558181661347364, "grad_norm": 0.671875, "learning_rate": 4.732956066940856e-06, "loss": 1.3889, "step": 7859 }, { "epoch": 1.3559906840334683, "grad_norm": 0.5859375, "learning_rate": 4.7306425329334386e-06, "loss": 1.4786, "step": 7860 }, { "epoch": 1.3561632019322005, "grad_norm": 0.98046875, "learning_rate": 4.7283293893129515e-06, "loss": 1.4151, "step": 7861 }, { "epoch": 1.3563357198309325, "grad_norm": 0.56640625, "learning_rate": 4.726016636250753e-06, "loss": 1.4943, "step": 7862 }, { "epoch": 1.3565082377296644, "grad_norm": 0.67578125, "learning_rate": 4.7237042739182006e-06, "loss": 1.4195, "step": 7863 }, { "epoch": 1.3566807556283964, "grad_norm": 0.58203125, "learning_rate": 4.721392302486602e-06, "loss": 1.3886, "step": 7864 }, { "epoch": 1.3568532735271284, "grad_norm": 0.671875, "learning_rate": 4.719080722127246e-06, "loss": 1.5363, "step": 7865 }, { "epoch": 1.3570257914258606, "grad_norm": 0.60546875, "learning_rate": 4.716769533011389e-06, "loss": 1.4005, "step": 7866 }, { "epoch": 1.3571983093245925, "grad_norm": 0.59765625, "learning_rate": 4.714458735310249e-06, "loss": 1.4047, "step": 7867 }, { "epoch": 1.3573708272233245, "grad_norm": 0.6015625, "learning_rate": 4.71214832919504e-06, "loss": 1.3229, "step": 7868 }, { "epoch": 1.3575433451220564, "grad_norm": 0.61328125, "learning_rate": 4.709838314836918e-06, "loss": 1.4652, "step": 7869 }, { "epoch": 1.3577158630207884, "grad_norm": 0.56640625, "learning_rate": 4.707528692407027e-06, "loss": 1.4012, "step": 7870 }, { "epoch": 1.3578883809195204, "grad_norm": 0.640625, "learning_rate": 4.705219462076481e-06, "loss": 1.4204, "step": 7871 }, { "epoch": 1.3580608988182523, "grad_norm": 0.5859375, "learning_rate": 4.70291062401636e-06, "loss": 1.4563, "step": 7872 }, { "epoch": 1.3582334167169843, "grad_norm": 0.5703125, "learning_rate": 4.700602178397719e-06, "loss": 1.468, "step": 7873 }, { "epoch": 1.3584059346157165, "grad_norm": 1.390625, "learning_rate": 4.698294125391583e-06, "loss": 1.3792, "step": 7874 }, { "epoch": 1.3585784525144484, "grad_norm": 0.58203125, "learning_rate": 4.695986465168937e-06, "loss": 1.4291, "step": 7875 }, { "epoch": 1.3587509704131804, "grad_norm": 0.59375, "learning_rate": 4.693679197900763e-06, "loss": 1.3912, "step": 7876 }, { "epoch": 1.3589234883119123, "grad_norm": 0.5859375, "learning_rate": 4.691372323757985e-06, "loss": 1.4333, "step": 7877 }, { "epoch": 1.3590960062106443, "grad_norm": 0.5625, "learning_rate": 4.689065842911517e-06, "loss": 1.3318, "step": 7878 }, { "epoch": 1.3592685241093765, "grad_norm": 0.59375, "learning_rate": 4.686759755532234e-06, "loss": 1.4298, "step": 7879 }, { "epoch": 1.3594410420081084, "grad_norm": 0.6015625, "learning_rate": 4.684454061790987e-06, "loss": 1.5446, "step": 7880 }, { "epoch": 1.3596135599068404, "grad_norm": 0.65234375, "learning_rate": 4.6821487618585956e-06, "loss": 1.4827, "step": 7881 }, { "epoch": 1.3597860778055724, "grad_norm": 0.5703125, "learning_rate": 4.679843855905853e-06, "loss": 1.3706, "step": 7882 }, { "epoch": 1.3599585957043043, "grad_norm": 0.61328125, "learning_rate": 4.6775393441035185e-06, "loss": 1.4048, "step": 7883 }, { "epoch": 1.3601311136030363, "grad_norm": 0.59375, "learning_rate": 4.6752352266223255e-06, "loss": 1.3972, "step": 7884 }, { "epoch": 1.3603036315017683, "grad_norm": 0.60546875, "learning_rate": 4.672931503632981e-06, "loss": 1.4176, "step": 7885 }, { "epoch": 1.3604761494005002, "grad_norm": 0.6015625, "learning_rate": 4.67062817530615e-06, "loss": 1.4636, "step": 7886 }, { "epoch": 1.3606486672992322, "grad_norm": 0.703125, "learning_rate": 4.6683252418124895e-06, "loss": 1.3784, "step": 7887 }, { "epoch": 1.3608211851979644, "grad_norm": 0.58203125, "learning_rate": 4.666022703322605e-06, "loss": 1.4577, "step": 7888 }, { "epoch": 1.3609937030966963, "grad_norm": 0.58984375, "learning_rate": 4.663720560007087e-06, "loss": 1.4598, "step": 7889 }, { "epoch": 1.3611662209954283, "grad_norm": 0.58984375, "learning_rate": 4.661418812036492e-06, "loss": 1.3935, "step": 7890 }, { "epoch": 1.3613387388941602, "grad_norm": 0.6328125, "learning_rate": 4.659117459581351e-06, "loss": 1.3132, "step": 7891 }, { "epoch": 1.3615112567928922, "grad_norm": 0.58984375, "learning_rate": 4.656816502812157e-06, "loss": 1.498, "step": 7892 }, { "epoch": 1.3616837746916244, "grad_norm": 0.55859375, "learning_rate": 4.6545159418993866e-06, "loss": 1.4276, "step": 7893 }, { "epoch": 1.3618562925903563, "grad_norm": 0.5546875, "learning_rate": 4.652215777013469e-06, "loss": 1.3338, "step": 7894 }, { "epoch": 1.3620288104890883, "grad_norm": 0.6015625, "learning_rate": 4.649916008324824e-06, "loss": 1.4057, "step": 7895 }, { "epoch": 1.3622013283878203, "grad_norm": 0.578125, "learning_rate": 4.64761663600383e-06, "loss": 1.4477, "step": 7896 }, { "epoch": 1.3623738462865522, "grad_norm": 0.640625, "learning_rate": 4.645317660220838e-06, "loss": 1.4506, "step": 7897 }, { "epoch": 1.3625463641852842, "grad_norm": 0.57421875, "learning_rate": 4.643019081146177e-06, "loss": 1.4392, "step": 7898 }, { "epoch": 1.3627188820840161, "grad_norm": 0.60546875, "learning_rate": 4.640720898950126e-06, "loss": 1.2814, "step": 7899 }, { "epoch": 1.362891399982748, "grad_norm": 0.60546875, "learning_rate": 4.638423113802964e-06, "loss": 1.4289, "step": 7900 }, { "epoch": 1.362891399982748, "eval_loss": 1.4074419736862183, "eval_runtime": 10.7675, "eval_samples_per_second": 95.101, "eval_steps_per_second": 23.775, "step": 7900 }, { "epoch": 1.36306391788148, "grad_norm": 0.58984375, "learning_rate": 4.636125725874916e-06, "loss": 1.4032, "step": 7901 }, { "epoch": 1.3632364357802123, "grad_norm": 0.5859375, "learning_rate": 4.6338287353361875e-06, "loss": 1.4314, "step": 7902 }, { "epoch": 1.3634089536789442, "grad_norm": 0.57421875, "learning_rate": 4.631532142356957e-06, "loss": 1.409, "step": 7903 }, { "epoch": 1.3635814715776762, "grad_norm": 0.5703125, "learning_rate": 4.629235947107369e-06, "loss": 1.4498, "step": 7904 }, { "epoch": 1.3637539894764081, "grad_norm": 0.57421875, "learning_rate": 4.626940149757541e-06, "loss": 1.3947, "step": 7905 }, { "epoch": 1.36392650737514, "grad_norm": 0.625, "learning_rate": 4.62464475047756e-06, "loss": 1.4714, "step": 7906 }, { "epoch": 1.3640990252738723, "grad_norm": 0.65625, "learning_rate": 4.622349749437478e-06, "loss": 1.3314, "step": 7907 }, { "epoch": 1.3642715431726042, "grad_norm": 0.58984375, "learning_rate": 4.620055146807334e-06, "loss": 1.3417, "step": 7908 }, { "epoch": 1.3644440610713362, "grad_norm": 0.578125, "learning_rate": 4.617760942757117e-06, "loss": 1.5331, "step": 7909 }, { "epoch": 1.3646165789700682, "grad_norm": 0.6171875, "learning_rate": 4.615467137456798e-06, "loss": 1.3129, "step": 7910 }, { "epoch": 1.3647890968688001, "grad_norm": 0.65234375, "learning_rate": 4.613173731076319e-06, "loss": 1.5621, "step": 7911 }, { "epoch": 1.364961614767532, "grad_norm": 2.0625, "learning_rate": 4.610880723785588e-06, "loss": 1.4238, "step": 7912 }, { "epoch": 1.365134132666264, "grad_norm": 0.58203125, "learning_rate": 4.608588115754486e-06, "loss": 1.4943, "step": 7913 }, { "epoch": 1.365306650564996, "grad_norm": 0.625, "learning_rate": 4.606295907152862e-06, "loss": 1.4239, "step": 7914 }, { "epoch": 1.3654791684637282, "grad_norm": 0.60546875, "learning_rate": 4.6040040981505395e-06, "loss": 1.4424, "step": 7915 }, { "epoch": 1.3656516863624601, "grad_norm": 0.55859375, "learning_rate": 4.601712688917309e-06, "loss": 1.3706, "step": 7916 }, { "epoch": 1.365824204261192, "grad_norm": 0.63671875, "learning_rate": 4.599421679622936e-06, "loss": 1.4369, "step": 7917 }, { "epoch": 1.365996722159924, "grad_norm": 0.6328125, "learning_rate": 4.597131070437143e-06, "loss": 1.3608, "step": 7918 }, { "epoch": 1.366169240058656, "grad_norm": 1.0, "learning_rate": 4.594840861529646e-06, "loss": 1.4133, "step": 7919 }, { "epoch": 1.3663417579573882, "grad_norm": 0.546875, "learning_rate": 4.5925510530701065e-06, "loss": 1.4296, "step": 7920 }, { "epoch": 1.3665142758561202, "grad_norm": 0.63671875, "learning_rate": 4.590261645228173e-06, "loss": 1.421, "step": 7921 }, { "epoch": 1.3666867937548521, "grad_norm": 0.59765625, "learning_rate": 4.587972638173459e-06, "loss": 1.4531, "step": 7922 }, { "epoch": 1.366859311653584, "grad_norm": 0.60546875, "learning_rate": 4.5856840320755465e-06, "loss": 1.3976, "step": 7923 }, { "epoch": 1.367031829552316, "grad_norm": 0.5859375, "learning_rate": 4.583395827103992e-06, "loss": 1.4801, "step": 7924 }, { "epoch": 1.367204347451048, "grad_norm": 0.5703125, "learning_rate": 4.581108023428319e-06, "loss": 1.4647, "step": 7925 }, { "epoch": 1.36737686534978, "grad_norm": 0.58203125, "learning_rate": 4.578820621218023e-06, "loss": 1.4242, "step": 7926 }, { "epoch": 1.367549383248512, "grad_norm": 0.578125, "learning_rate": 4.576533620642568e-06, "loss": 1.4149, "step": 7927 }, { "epoch": 1.367721901147244, "grad_norm": 0.5625, "learning_rate": 4.57424702187139e-06, "loss": 1.3514, "step": 7928 }, { "epoch": 1.367894419045976, "grad_norm": 0.61328125, "learning_rate": 4.5719608250738936e-06, "loss": 1.4513, "step": 7929 }, { "epoch": 1.368066936944708, "grad_norm": 0.58984375, "learning_rate": 4.569675030419459e-06, "loss": 1.4672, "step": 7930 }, { "epoch": 1.36823945484344, "grad_norm": 0.58984375, "learning_rate": 4.567389638077421e-06, "loss": 1.3883, "step": 7931 }, { "epoch": 1.368411972742172, "grad_norm": 0.66015625, "learning_rate": 4.565104648217111e-06, "loss": 1.4488, "step": 7932 }, { "epoch": 1.368584490640904, "grad_norm": 0.59765625, "learning_rate": 4.562820061007803e-06, "loss": 1.3655, "step": 7933 }, { "epoch": 1.368757008539636, "grad_norm": 0.59375, "learning_rate": 4.560535876618759e-06, "loss": 1.3948, "step": 7934 }, { "epoch": 1.368929526438368, "grad_norm": 0.62890625, "learning_rate": 4.558252095219204e-06, "loss": 1.5201, "step": 7935 }, { "epoch": 1.3691020443371, "grad_norm": 0.55859375, "learning_rate": 4.5559687169783354e-06, "loss": 1.4503, "step": 7936 }, { "epoch": 1.369274562235832, "grad_norm": 0.58984375, "learning_rate": 4.55368574206532e-06, "loss": 1.388, "step": 7937 }, { "epoch": 1.369447080134564, "grad_norm": 0.59765625, "learning_rate": 4.551403170649299e-06, "loss": 1.4712, "step": 7938 }, { "epoch": 1.369619598033296, "grad_norm": 0.5703125, "learning_rate": 4.5491210028993685e-06, "loss": 1.3087, "step": 7939 }, { "epoch": 1.3697921159320279, "grad_norm": 0.65625, "learning_rate": 4.5468392389846195e-06, "loss": 1.488, "step": 7940 }, { "epoch": 1.3699646338307598, "grad_norm": 0.59765625, "learning_rate": 4.544557879074088e-06, "loss": 1.4399, "step": 7941 }, { "epoch": 1.3701371517294918, "grad_norm": 0.65234375, "learning_rate": 4.542276923336798e-06, "loss": 1.4177, "step": 7942 }, { "epoch": 1.370309669628224, "grad_norm": 0.54296875, "learning_rate": 4.539996371941734e-06, "loss": 1.3468, "step": 7943 }, { "epoch": 1.370482187526956, "grad_norm": 0.890625, "learning_rate": 4.5377162250578545e-06, "loss": 1.3186, "step": 7944 }, { "epoch": 1.370654705425688, "grad_norm": 0.56640625, "learning_rate": 4.535436482854087e-06, "loss": 1.4187, "step": 7945 }, { "epoch": 1.3708272233244199, "grad_norm": 0.59375, "learning_rate": 4.533157145499328e-06, "loss": 1.3719, "step": 7946 }, { "epoch": 1.3709997412231518, "grad_norm": 0.60546875, "learning_rate": 4.530878213162447e-06, "loss": 1.4092, "step": 7947 }, { "epoch": 1.371172259121884, "grad_norm": 0.59375, "learning_rate": 4.528599686012281e-06, "loss": 1.4266, "step": 7948 }, { "epoch": 1.371344777020616, "grad_norm": 0.59765625, "learning_rate": 4.526321564217641e-06, "loss": 1.4805, "step": 7949 }, { "epoch": 1.371517294919348, "grad_norm": 0.58203125, "learning_rate": 4.5240438479472926e-06, "loss": 1.4013, "step": 7950 }, { "epoch": 1.37168981281808, "grad_norm": 0.57421875, "learning_rate": 4.521766537369998e-06, "loss": 1.4912, "step": 7951 }, { "epoch": 1.3718623307168119, "grad_norm": 0.54296875, "learning_rate": 4.519489632654461e-06, "loss": 1.3681, "step": 7952 }, { "epoch": 1.3720348486155438, "grad_norm": 0.59375, "learning_rate": 4.517213133969385e-06, "loss": 1.516, "step": 7953 }, { "epoch": 1.3722073665142758, "grad_norm": 0.609375, "learning_rate": 4.5149370414834125e-06, "loss": 1.4078, "step": 7954 }, { "epoch": 1.3723798844130077, "grad_norm": 0.57421875, "learning_rate": 4.512661355365177e-06, "loss": 1.3906, "step": 7955 }, { "epoch": 1.37255240231174, "grad_norm": 0.60546875, "learning_rate": 4.510386075783274e-06, "loss": 1.4647, "step": 7956 }, { "epoch": 1.3727249202104719, "grad_norm": 0.54296875, "learning_rate": 4.508111202906271e-06, "loss": 1.4007, "step": 7957 }, { "epoch": 1.3728974381092038, "grad_norm": 0.55859375, "learning_rate": 4.5058367369027054e-06, "loss": 1.4573, "step": 7958 }, { "epoch": 1.3730699560079358, "grad_norm": 0.58984375, "learning_rate": 4.503562677941088e-06, "loss": 1.4147, "step": 7959 }, { "epoch": 1.3732424739066678, "grad_norm": 0.61328125, "learning_rate": 4.501289026189882e-06, "loss": 1.3457, "step": 7960 }, { "epoch": 1.3734149918054, "grad_norm": 0.58984375, "learning_rate": 4.499015781817547e-06, "loss": 1.4553, "step": 7961 }, { "epoch": 1.373587509704132, "grad_norm": 0.57421875, "learning_rate": 4.496742944992499e-06, "loss": 1.3613, "step": 7962 }, { "epoch": 1.3737600276028639, "grad_norm": 0.62890625, "learning_rate": 4.494470515883111e-06, "loss": 1.4739, "step": 7963 }, { "epoch": 1.3739325455015958, "grad_norm": 0.57421875, "learning_rate": 4.492198494657755e-06, "loss": 1.428, "step": 7964 }, { "epoch": 1.3741050634003278, "grad_norm": 0.6015625, "learning_rate": 4.4899268814847455e-06, "loss": 1.4915, "step": 7965 }, { "epoch": 1.3742775812990597, "grad_norm": 0.5703125, "learning_rate": 4.4876556765323805e-06, "loss": 1.4217, "step": 7966 }, { "epoch": 1.3744500991977917, "grad_norm": 0.61328125, "learning_rate": 4.485384879968926e-06, "loss": 1.4559, "step": 7967 }, { "epoch": 1.3746226170965237, "grad_norm": 0.578125, "learning_rate": 4.483114491962617e-06, "loss": 1.4177, "step": 7968 }, { "epoch": 1.3747951349952556, "grad_norm": 0.58203125, "learning_rate": 4.480844512681657e-06, "loss": 1.4573, "step": 7969 }, { "epoch": 1.3749676528939878, "grad_norm": 0.62109375, "learning_rate": 4.478574942294225e-06, "loss": 1.4633, "step": 7970 }, { "epoch": 1.3751401707927198, "grad_norm": 0.55078125, "learning_rate": 4.476305780968452e-06, "loss": 1.3839, "step": 7971 }, { "epoch": 1.3753126886914517, "grad_norm": 0.60546875, "learning_rate": 4.474037028872468e-06, "loss": 1.4457, "step": 7972 }, { "epoch": 1.3754852065901837, "grad_norm": 0.58984375, "learning_rate": 4.471768686174347e-06, "loss": 1.3865, "step": 7973 }, { "epoch": 1.3756577244889157, "grad_norm": 1.2578125, "learning_rate": 4.469500753042142e-06, "loss": 1.3655, "step": 7974 }, { "epoch": 1.3758302423876478, "grad_norm": 0.5859375, "learning_rate": 4.467233229643878e-06, "loss": 1.4127, "step": 7975 }, { "epoch": 1.3760027602863798, "grad_norm": 0.5703125, "learning_rate": 4.464966116147546e-06, "loss": 1.4156, "step": 7976 }, { "epoch": 1.3761752781851118, "grad_norm": 0.6015625, "learning_rate": 4.46269941272111e-06, "loss": 1.4055, "step": 7977 }, { "epoch": 1.3763477960838437, "grad_norm": 0.58203125, "learning_rate": 4.460433119532499e-06, "loss": 1.4684, "step": 7978 }, { "epoch": 1.3765203139825757, "grad_norm": 0.5625, "learning_rate": 4.458167236749616e-06, "loss": 1.415, "step": 7979 }, { "epoch": 1.3766928318813076, "grad_norm": 0.5703125, "learning_rate": 4.45590176454033e-06, "loss": 1.5098, "step": 7980 }, { "epoch": 1.3768653497800396, "grad_norm": 0.61328125, "learning_rate": 4.453636703072487e-06, "loss": 1.5133, "step": 7981 }, { "epoch": 1.3770378676787716, "grad_norm": 0.59375, "learning_rate": 4.451372052513884e-06, "loss": 1.394, "step": 7982 }, { "epoch": 1.3772103855775037, "grad_norm": 0.7421875, "learning_rate": 4.4491078130323174e-06, "loss": 1.394, "step": 7983 }, { "epoch": 1.3773829034762357, "grad_norm": 0.625, "learning_rate": 4.446843984795519e-06, "loss": 1.4961, "step": 7984 }, { "epoch": 1.3775554213749677, "grad_norm": 0.58203125, "learning_rate": 4.444580567971224e-06, "loss": 1.5189, "step": 7985 }, { "epoch": 1.3777279392736996, "grad_norm": 0.58984375, "learning_rate": 4.442317562727109e-06, "loss": 1.4859, "step": 7986 }, { "epoch": 1.3779004571724316, "grad_norm": 0.62890625, "learning_rate": 4.440054969230834e-06, "loss": 1.4891, "step": 7987 }, { "epoch": 1.3780729750711636, "grad_norm": 0.59375, "learning_rate": 4.437792787650028e-06, "loss": 1.3835, "step": 7988 }, { "epoch": 1.3782454929698957, "grad_norm": 0.6171875, "learning_rate": 4.435531018152286e-06, "loss": 1.3922, "step": 7989 }, { "epoch": 1.3784180108686277, "grad_norm": 0.54296875, "learning_rate": 4.433269660905175e-06, "loss": 1.4601, "step": 7990 }, { "epoch": 1.3785905287673597, "grad_norm": 0.6015625, "learning_rate": 4.431008716076232e-06, "loss": 1.4917, "step": 7991 }, { "epoch": 1.3787630466660916, "grad_norm": 0.56640625, "learning_rate": 4.428748183832955e-06, "loss": 1.5076, "step": 7992 }, { "epoch": 1.3789355645648236, "grad_norm": 0.640625, "learning_rate": 4.426488064342826e-06, "loss": 1.498, "step": 7993 }, { "epoch": 1.3791080824635555, "grad_norm": 0.5703125, "learning_rate": 4.4242283577732905e-06, "loss": 1.3959, "step": 7994 }, { "epoch": 1.3792806003622875, "grad_norm": 0.62890625, "learning_rate": 4.421969064291749e-06, "loss": 1.4679, "step": 7995 }, { "epoch": 1.3794531182610195, "grad_norm": 0.58203125, "learning_rate": 4.4197101840656e-06, "loss": 1.4727, "step": 7996 }, { "epoch": 1.3796256361597516, "grad_norm": 0.55859375, "learning_rate": 4.417451717262184e-06, "loss": 1.467, "step": 7997 }, { "epoch": 1.3797981540584836, "grad_norm": 0.61328125, "learning_rate": 4.415193664048827e-06, "loss": 1.4182, "step": 7998 }, { "epoch": 1.3799706719572156, "grad_norm": 0.6328125, "learning_rate": 4.412936024592818e-06, "loss": 1.5055, "step": 7999 }, { "epoch": 1.3801431898559475, "grad_norm": 0.5859375, "learning_rate": 4.410678799061417e-06, "loss": 1.4059, "step": 8000 }, { "epoch": 1.3801431898559475, "eval_loss": 1.407374382019043, "eval_runtime": 11.0415, "eval_samples_per_second": 92.741, "eval_steps_per_second": 23.185, "step": 8000 }, { "epoch": 1.3803157077546795, "grad_norm": 0.59765625, "learning_rate": 4.408421987621856e-06, "loss": 1.4668, "step": 8001 }, { "epoch": 1.3804882256534117, "grad_norm": 0.609375, "learning_rate": 4.406165590441335e-06, "loss": 1.4717, "step": 8002 }, { "epoch": 1.3806607435521436, "grad_norm": 0.578125, "learning_rate": 4.403909607687012e-06, "loss": 1.4689, "step": 8003 }, { "epoch": 1.3808332614508756, "grad_norm": 0.640625, "learning_rate": 4.4016540395260375e-06, "loss": 1.4595, "step": 8004 }, { "epoch": 1.3810057793496076, "grad_norm": 0.62109375, "learning_rate": 4.399398886125508e-06, "loss": 1.4481, "step": 8005 }, { "epoch": 1.3811782972483395, "grad_norm": 0.5625, "learning_rate": 4.397144147652506e-06, "loss": 1.4332, "step": 8006 }, { "epoch": 1.3813508151470715, "grad_norm": 0.66796875, "learning_rate": 4.3948898242740715e-06, "loss": 1.3817, "step": 8007 }, { "epoch": 1.3815233330458034, "grad_norm": 0.54296875, "learning_rate": 4.392635916157221e-06, "loss": 1.3656, "step": 8008 }, { "epoch": 1.3816958509445354, "grad_norm": 0.5625, "learning_rate": 4.390382423468938e-06, "loss": 1.2959, "step": 8009 }, { "epoch": 1.3818683688432674, "grad_norm": 0.59765625, "learning_rate": 4.388129346376177e-06, "loss": 1.4901, "step": 8010 }, { "epoch": 1.3820408867419995, "grad_norm": 0.5546875, "learning_rate": 4.385876685045858e-06, "loss": 1.3806, "step": 8011 }, { "epoch": 1.3822134046407315, "grad_norm": 0.578125, "learning_rate": 4.3836244396448725e-06, "loss": 1.3983, "step": 8012 }, { "epoch": 1.3823859225394635, "grad_norm": 0.55859375, "learning_rate": 4.3813726103400825e-06, "loss": 1.4383, "step": 8013 }, { "epoch": 1.3825584404381954, "grad_norm": 0.69140625, "learning_rate": 4.379121197298315e-06, "loss": 1.3774, "step": 8014 }, { "epoch": 1.3827309583369274, "grad_norm": 0.6015625, "learning_rate": 4.3768702006863735e-06, "loss": 1.4498, "step": 8015 }, { "epoch": 1.3829034762356596, "grad_norm": 0.64453125, "learning_rate": 4.374619620671015e-06, "loss": 1.3819, "step": 8016 }, { "epoch": 1.3830759941343915, "grad_norm": 0.5859375, "learning_rate": 4.372369457418994e-06, "loss": 1.5088, "step": 8017 }, { "epoch": 1.3832485120331235, "grad_norm": 0.6171875, "learning_rate": 4.370119711097e-06, "loss": 1.3558, "step": 8018 }, { "epoch": 1.3834210299318555, "grad_norm": 0.6015625, "learning_rate": 4.3678703818717165e-06, "loss": 1.3744, "step": 8019 }, { "epoch": 1.3835935478305874, "grad_norm": 0.6171875, "learning_rate": 4.365621469909785e-06, "loss": 1.4178, "step": 8020 }, { "epoch": 1.3837660657293194, "grad_norm": 0.640625, "learning_rate": 4.363372975377821e-06, "loss": 1.4425, "step": 8021 }, { "epoch": 1.3839385836280513, "grad_norm": 0.61328125, "learning_rate": 4.361124898442406e-06, "loss": 1.4155, "step": 8022 }, { "epoch": 1.3841111015267833, "grad_norm": 0.60546875, "learning_rate": 4.358877239270096e-06, "loss": 1.5466, "step": 8023 }, { "epoch": 1.3842836194255155, "grad_norm": 0.59765625, "learning_rate": 4.3566299980273995e-06, "loss": 1.4899, "step": 8024 }, { "epoch": 1.3844561373242474, "grad_norm": 0.59375, "learning_rate": 4.354383174880819e-06, "loss": 1.3355, "step": 8025 }, { "epoch": 1.3846286552229794, "grad_norm": 0.59765625, "learning_rate": 4.3521367699968105e-06, "loss": 1.3952, "step": 8026 }, { "epoch": 1.3848011731217114, "grad_norm": 0.5859375, "learning_rate": 4.349890783541793e-06, "loss": 1.3639, "step": 8027 }, { "epoch": 1.3849736910204433, "grad_norm": 0.88671875, "learning_rate": 4.3476452156821765e-06, "loss": 1.3987, "step": 8028 }, { "epoch": 1.3851462089191753, "grad_norm": 0.61328125, "learning_rate": 4.345400066584318e-06, "loss": 1.4278, "step": 8029 }, { "epoch": 1.3853187268179075, "grad_norm": 0.609375, "learning_rate": 4.343155336414553e-06, "loss": 1.4388, "step": 8030 }, { "epoch": 1.3854912447166394, "grad_norm": 0.62890625, "learning_rate": 4.340911025339186e-06, "loss": 1.4773, "step": 8031 }, { "epoch": 1.3856637626153714, "grad_norm": 0.6328125, "learning_rate": 4.33866713352449e-06, "loss": 1.4371, "step": 8032 }, { "epoch": 1.3858362805141033, "grad_norm": 0.578125, "learning_rate": 4.336423661136708e-06, "loss": 1.4436, "step": 8033 }, { "epoch": 1.3860087984128353, "grad_norm": 0.6015625, "learning_rate": 4.3341806083420504e-06, "loss": 1.4122, "step": 8034 }, { "epoch": 1.3861813163115673, "grad_norm": 0.55859375, "learning_rate": 4.331937975306687e-06, "loss": 1.3701, "step": 8035 }, { "epoch": 1.3863538342102992, "grad_norm": 0.5625, "learning_rate": 4.329695762196783e-06, "loss": 1.4333, "step": 8036 }, { "epoch": 1.3865263521090312, "grad_norm": 0.5703125, "learning_rate": 4.3274539691784434e-06, "loss": 1.448, "step": 8037 }, { "epoch": 1.3866988700077634, "grad_norm": 0.5703125, "learning_rate": 4.325212596417756e-06, "loss": 1.4208, "step": 8038 }, { "epoch": 1.3868713879064953, "grad_norm": 0.6171875, "learning_rate": 4.322971644080778e-06, "loss": 1.3671, "step": 8039 }, { "epoch": 1.3870439058052273, "grad_norm": 0.58984375, "learning_rate": 4.320731112333528e-06, "loss": 1.4953, "step": 8040 }, { "epoch": 1.3872164237039593, "grad_norm": 0.6015625, "learning_rate": 4.318491001342011e-06, "loss": 1.4839, "step": 8041 }, { "epoch": 1.3873889416026912, "grad_norm": 0.5703125, "learning_rate": 4.316251311272177e-06, "loss": 1.5123, "step": 8042 }, { "epoch": 1.3875614595014234, "grad_norm": 0.63671875, "learning_rate": 4.314012042289959e-06, "loss": 1.5034, "step": 8043 }, { "epoch": 1.3877339774001554, "grad_norm": 0.56640625, "learning_rate": 4.311773194561256e-06, "loss": 1.367, "step": 8044 }, { "epoch": 1.3879064952988873, "grad_norm": 0.74609375, "learning_rate": 4.309534768251937e-06, "loss": 1.5059, "step": 8045 }, { "epoch": 1.3880790131976193, "grad_norm": 0.5859375, "learning_rate": 4.307296763527838e-06, "loss": 1.4364, "step": 8046 }, { "epoch": 1.3882515310963512, "grad_norm": 0.625, "learning_rate": 4.3050591805547696e-06, "loss": 1.4842, "step": 8047 }, { "epoch": 1.3884240489950832, "grad_norm": 0.65625, "learning_rate": 4.302822019498492e-06, "loss": 1.4707, "step": 8048 }, { "epoch": 1.3885965668938152, "grad_norm": 0.59375, "learning_rate": 4.300585280524764e-06, "loss": 1.3719, "step": 8049 }, { "epoch": 1.3887690847925471, "grad_norm": 1.078125, "learning_rate": 4.2983489637992855e-06, "loss": 1.4976, "step": 8050 }, { "epoch": 1.388941602691279, "grad_norm": 0.625, "learning_rate": 4.296113069487743e-06, "loss": 1.3982, "step": 8051 }, { "epoch": 1.3891141205900113, "grad_norm": 0.671875, "learning_rate": 4.293877597755783e-06, "loss": 1.4678, "step": 8052 }, { "epoch": 1.3892866384887432, "grad_norm": 0.80078125, "learning_rate": 4.291642548769024e-06, "loss": 1.4979, "step": 8053 }, { "epoch": 1.3894591563874752, "grad_norm": 0.66015625, "learning_rate": 4.289407922693053e-06, "loss": 1.4575, "step": 8054 }, { "epoch": 1.3896316742862072, "grad_norm": 0.5859375, "learning_rate": 4.287173719693427e-06, "loss": 1.4439, "step": 8055 }, { "epoch": 1.3898041921849391, "grad_norm": 0.62890625, "learning_rate": 4.284939939935662e-06, "loss": 1.4507, "step": 8056 }, { "epoch": 1.3899767100836713, "grad_norm": 0.83984375, "learning_rate": 4.282706583585258e-06, "loss": 1.4287, "step": 8057 }, { "epoch": 1.3901492279824033, "grad_norm": 0.625, "learning_rate": 4.280473650807676e-06, "loss": 1.5049, "step": 8058 }, { "epoch": 1.3903217458811352, "grad_norm": 0.61328125, "learning_rate": 4.278241141768338e-06, "loss": 1.3884, "step": 8059 }, { "epoch": 1.3904942637798672, "grad_norm": 0.625, "learning_rate": 4.276009056632653e-06, "loss": 1.4438, "step": 8060 }, { "epoch": 1.3906667816785991, "grad_norm": 0.6171875, "learning_rate": 4.27377739556598e-06, "loss": 1.3712, "step": 8061 }, { "epoch": 1.390839299577331, "grad_norm": 0.5859375, "learning_rate": 4.271546158733656e-06, "loss": 1.3682, "step": 8062 }, { "epoch": 1.391011817476063, "grad_norm": 0.70703125, "learning_rate": 4.269315346300985e-06, "loss": 1.4151, "step": 8063 }, { "epoch": 1.391184335374795, "grad_norm": 0.5625, "learning_rate": 4.2670849584332405e-06, "loss": 1.5293, "step": 8064 }, { "epoch": 1.3913568532735272, "grad_norm": 0.58984375, "learning_rate": 4.264854995295664e-06, "loss": 1.4793, "step": 8065 }, { "epoch": 1.3915293711722592, "grad_norm": 0.5625, "learning_rate": 4.262625457053467e-06, "loss": 1.5049, "step": 8066 }, { "epoch": 1.3917018890709911, "grad_norm": 0.6171875, "learning_rate": 4.260396343871819e-06, "loss": 1.4196, "step": 8067 }, { "epoch": 1.391874406969723, "grad_norm": 0.6171875, "learning_rate": 4.258167655915878e-06, "loss": 1.4206, "step": 8068 }, { "epoch": 1.392046924868455, "grad_norm": 0.62109375, "learning_rate": 4.255939393350751e-06, "loss": 1.3965, "step": 8069 }, { "epoch": 1.3922194427671872, "grad_norm": 0.65625, "learning_rate": 4.253711556341524e-06, "loss": 1.4317, "step": 8070 }, { "epoch": 1.3923919606659192, "grad_norm": 0.609375, "learning_rate": 4.25148414505325e-06, "loss": 1.3929, "step": 8071 }, { "epoch": 1.3925644785646512, "grad_norm": 0.58203125, "learning_rate": 4.249257159650944e-06, "loss": 1.3943, "step": 8072 }, { "epoch": 1.3927369964633831, "grad_norm": 0.66015625, "learning_rate": 4.2470306002996085e-06, "loss": 1.5479, "step": 8073 }, { "epoch": 1.392909514362115, "grad_norm": 0.578125, "learning_rate": 4.244804467164189e-06, "loss": 1.4135, "step": 8074 }, { "epoch": 1.393082032260847, "grad_norm": 0.5703125, "learning_rate": 4.242578760409614e-06, "loss": 1.3848, "step": 8075 }, { "epoch": 1.393254550159579, "grad_norm": 0.55859375, "learning_rate": 4.240353480200777e-06, "loss": 1.3422, "step": 8076 }, { "epoch": 1.393427068058311, "grad_norm": 0.5859375, "learning_rate": 4.238128626702545e-06, "loss": 1.4875, "step": 8077 }, { "epoch": 1.393599585957043, "grad_norm": 0.59375, "learning_rate": 4.2359042000797434e-06, "loss": 1.3744, "step": 8078 }, { "epoch": 1.393772103855775, "grad_norm": 0.6015625, "learning_rate": 4.23368020049718e-06, "loss": 1.393, "step": 8079 }, { "epoch": 1.393944621754507, "grad_norm": 0.6328125, "learning_rate": 4.231456628119609e-06, "loss": 1.4639, "step": 8080 }, { "epoch": 1.394117139653239, "grad_norm": 0.60546875, "learning_rate": 4.229233483111781e-06, "loss": 1.4048, "step": 8081 }, { "epoch": 1.394289657551971, "grad_norm": 0.640625, "learning_rate": 4.227010765638392e-06, "loss": 1.3948, "step": 8082 }, { "epoch": 1.394462175450703, "grad_norm": 0.6015625, "learning_rate": 4.2247884758641155e-06, "loss": 1.4422, "step": 8083 }, { "epoch": 1.3946346933494351, "grad_norm": 0.578125, "learning_rate": 4.222566613953594e-06, "loss": 1.5084, "step": 8084 }, { "epoch": 1.394807211248167, "grad_norm": 0.5546875, "learning_rate": 4.220345180071437e-06, "loss": 1.4686, "step": 8085 }, { "epoch": 1.394979729146899, "grad_norm": 0.62890625, "learning_rate": 4.218124174382222e-06, "loss": 1.4242, "step": 8086 }, { "epoch": 1.395152247045631, "grad_norm": 0.609375, "learning_rate": 4.215903597050499e-06, "loss": 1.4581, "step": 8087 }, { "epoch": 1.395324764944363, "grad_norm": 0.65234375, "learning_rate": 4.213683448240771e-06, "loss": 1.5284, "step": 8088 }, { "epoch": 1.395497282843095, "grad_norm": 0.5859375, "learning_rate": 4.211463728117531e-06, "loss": 1.3873, "step": 8089 }, { "epoch": 1.395669800741827, "grad_norm": 0.58203125, "learning_rate": 4.20924443684523e-06, "loss": 1.3575, "step": 8090 }, { "epoch": 1.3958423186405589, "grad_norm": 0.734375, "learning_rate": 4.207025574588276e-06, "loss": 1.3796, "step": 8091 }, { "epoch": 1.3960148365392908, "grad_norm": 0.54296875, "learning_rate": 4.204807141511071e-06, "loss": 1.3057, "step": 8092 }, { "epoch": 1.396187354438023, "grad_norm": 0.55859375, "learning_rate": 4.20258913777796e-06, "loss": 1.398, "step": 8093 }, { "epoch": 1.396359872336755, "grad_norm": 0.58203125, "learning_rate": 4.200371563553269e-06, "loss": 1.5357, "step": 8094 }, { "epoch": 1.396532390235487, "grad_norm": 0.56640625, "learning_rate": 4.1981544190012915e-06, "loss": 1.3717, "step": 8095 }, { "epoch": 1.3967049081342189, "grad_norm": 0.62109375, "learning_rate": 4.195937704286285e-06, "loss": 1.4642, "step": 8096 }, { "epoch": 1.3968774260329508, "grad_norm": 0.59375, "learning_rate": 4.193721419572478e-06, "loss": 1.376, "step": 8097 }, { "epoch": 1.397049943931683, "grad_norm": 0.56640625, "learning_rate": 4.191505565024073e-06, "loss": 1.4358, "step": 8098 }, { "epoch": 1.397222461830415, "grad_norm": 0.609375, "learning_rate": 4.189290140805221e-06, "loss": 1.4701, "step": 8099 }, { "epoch": 1.397394979729147, "grad_norm": 0.6484375, "learning_rate": 4.18707514708007e-06, "loss": 1.5173, "step": 8100 }, { "epoch": 1.397394979729147, "eval_loss": 1.4073193073272705, "eval_runtime": 11.2049, "eval_samples_per_second": 91.389, "eval_steps_per_second": 22.847, "step": 8100 }, { "epoch": 1.397567497627879, "grad_norm": 0.58203125, "learning_rate": 4.184860584012704e-06, "loss": 1.38, "step": 8101 }, { "epoch": 1.3977400155266109, "grad_norm": 0.58203125, "learning_rate": 4.182646451767209e-06, "loss": 1.4285, "step": 8102 }, { "epoch": 1.3979125334253428, "grad_norm": 0.5625, "learning_rate": 4.18043275050761e-06, "loss": 1.4443, "step": 8103 }, { "epoch": 1.3980850513240748, "grad_norm": 0.55078125, "learning_rate": 4.178219480397911e-06, "loss": 1.4382, "step": 8104 }, { "epoch": 1.3982575692228068, "grad_norm": 0.55859375, "learning_rate": 4.1760066416020975e-06, "loss": 1.5393, "step": 8105 }, { "epoch": 1.398430087121539, "grad_norm": 0.640625, "learning_rate": 4.173794234284096e-06, "loss": 1.3635, "step": 8106 }, { "epoch": 1.398602605020271, "grad_norm": 0.52734375, "learning_rate": 4.171582258607824e-06, "loss": 1.2441, "step": 8107 }, { "epoch": 1.3987751229190029, "grad_norm": 0.5625, "learning_rate": 4.169370714737155e-06, "loss": 1.3819, "step": 8108 }, { "epoch": 1.3989476408177348, "grad_norm": 0.61328125, "learning_rate": 4.167159602835934e-06, "loss": 1.419, "step": 8109 }, { "epoch": 1.3991201587164668, "grad_norm": 0.56640625, "learning_rate": 4.164948923067976e-06, "loss": 1.3778, "step": 8110 }, { "epoch": 1.399292676615199, "grad_norm": 0.58203125, "learning_rate": 4.162738675597065e-06, "loss": 1.4298, "step": 8111 }, { "epoch": 1.399465194513931, "grad_norm": 0.58984375, "learning_rate": 4.1605288605869365e-06, "loss": 1.4636, "step": 8112 }, { "epoch": 1.3996377124126629, "grad_norm": 0.5859375, "learning_rate": 4.158319478201325e-06, "loss": 1.3767, "step": 8113 }, { "epoch": 1.3998102303113948, "grad_norm": 0.578125, "learning_rate": 4.156110528603904e-06, "loss": 1.4037, "step": 8114 }, { "epoch": 1.3999827482101268, "grad_norm": 0.61328125, "learning_rate": 4.153902011958329e-06, "loss": 1.4435, "step": 8115 }, { "epoch": 1.4001552661088588, "grad_norm": 0.5625, "learning_rate": 4.151693928428221e-06, "loss": 1.4166, "step": 8116 }, { "epoch": 1.4003277840075907, "grad_norm": 0.69140625, "learning_rate": 4.149486278177168e-06, "loss": 1.4646, "step": 8117 }, { "epoch": 1.4005003019063227, "grad_norm": 0.61328125, "learning_rate": 4.147279061368729e-06, "loss": 1.4447, "step": 8118 }, { "epoch": 1.4006728198050546, "grad_norm": 0.578125, "learning_rate": 4.145072278166428e-06, "loss": 1.3519, "step": 8119 }, { "epoch": 1.4008453377037868, "grad_norm": 0.859375, "learning_rate": 4.142865928733751e-06, "loss": 1.3877, "step": 8120 }, { "epoch": 1.4010178556025188, "grad_norm": 0.55078125, "learning_rate": 4.1406600132341645e-06, "loss": 1.385, "step": 8121 }, { "epoch": 1.4011903735012508, "grad_norm": 0.6015625, "learning_rate": 4.1384545318311e-06, "loss": 1.4587, "step": 8122 }, { "epoch": 1.4013628913999827, "grad_norm": 0.5703125, "learning_rate": 4.1362494846879405e-06, "loss": 1.4534, "step": 8123 }, { "epoch": 1.4015354092987147, "grad_norm": 0.58203125, "learning_rate": 4.134044871968065e-06, "loss": 1.3575, "step": 8124 }, { "epoch": 1.4017079271974469, "grad_norm": 0.63671875, "learning_rate": 4.131840693834794e-06, "loss": 1.3504, "step": 8125 }, { "epoch": 1.4018804450961788, "grad_norm": 0.53515625, "learning_rate": 4.12963695045143e-06, "loss": 1.3188, "step": 8126 }, { "epoch": 1.4020529629949108, "grad_norm": 0.62109375, "learning_rate": 4.127433641981241e-06, "loss": 1.481, "step": 8127 }, { "epoch": 1.4022254808936427, "grad_norm": 0.57421875, "learning_rate": 4.125230768587461e-06, "loss": 1.3856, "step": 8128 }, { "epoch": 1.4023979987923747, "grad_norm": 0.609375, "learning_rate": 4.123028330433294e-06, "loss": 1.3378, "step": 8129 }, { "epoch": 1.4025705166911067, "grad_norm": 0.5703125, "learning_rate": 4.120826327681911e-06, "loss": 1.3412, "step": 8130 }, { "epoch": 1.4027430345898386, "grad_norm": 0.578125, "learning_rate": 4.1186247604964425e-06, "loss": 1.3749, "step": 8131 }, { "epoch": 1.4029155524885706, "grad_norm": 0.6015625, "learning_rate": 4.116423629040007e-06, "loss": 1.4766, "step": 8132 }, { "epoch": 1.4030880703873025, "grad_norm": 0.9609375, "learning_rate": 4.1142229334756645e-06, "loss": 1.4402, "step": 8133 }, { "epoch": 1.4032605882860347, "grad_norm": 0.66015625, "learning_rate": 4.112022673966471e-06, "loss": 1.4126, "step": 8134 }, { "epoch": 1.4034331061847667, "grad_norm": 0.6484375, "learning_rate": 4.109822850675422e-06, "loss": 1.3777, "step": 8135 }, { "epoch": 1.4036056240834986, "grad_norm": 1.7109375, "learning_rate": 4.107623463765498e-06, "loss": 1.5189, "step": 8136 }, { "epoch": 1.4037781419822306, "grad_norm": 0.58203125, "learning_rate": 4.105424513399652e-06, "loss": 1.3747, "step": 8137 }, { "epoch": 1.4039506598809626, "grad_norm": 0.63671875, "learning_rate": 4.1032259997407844e-06, "loss": 1.4141, "step": 8138 }, { "epoch": 1.4041231777796948, "grad_norm": 0.59375, "learning_rate": 4.10102792295178e-06, "loss": 1.417, "step": 8139 }, { "epoch": 1.4042956956784267, "grad_norm": 0.6171875, "learning_rate": 4.098830283195485e-06, "loss": 1.4213, "step": 8140 }, { "epoch": 1.4044682135771587, "grad_norm": 0.86328125, "learning_rate": 4.096633080634717e-06, "loss": 1.4074, "step": 8141 }, { "epoch": 1.4046407314758906, "grad_norm": 0.61328125, "learning_rate": 4.094436315432254e-06, "loss": 1.5289, "step": 8142 }, { "epoch": 1.4048132493746226, "grad_norm": 0.58984375, "learning_rate": 4.092239987750852e-06, "loss": 1.4553, "step": 8143 }, { "epoch": 1.4049857672733546, "grad_norm": 0.59765625, "learning_rate": 4.09004409775322e-06, "loss": 1.4543, "step": 8144 }, { "epoch": 1.4051582851720865, "grad_norm": 0.5859375, "learning_rate": 4.0878486456020535e-06, "loss": 1.5197, "step": 8145 }, { "epoch": 1.4053308030708185, "grad_norm": 0.6015625, "learning_rate": 4.085653631459997e-06, "loss": 1.4162, "step": 8146 }, { "epoch": 1.4055033209695507, "grad_norm": 1.21875, "learning_rate": 4.083459055489675e-06, "loss": 1.4667, "step": 8147 }, { "epoch": 1.4056758388682826, "grad_norm": 0.56640625, "learning_rate": 4.081264917853674e-06, "loss": 1.4256, "step": 8148 }, { "epoch": 1.4058483567670146, "grad_norm": 0.640625, "learning_rate": 4.0790712187145486e-06, "loss": 1.5337, "step": 8149 }, { "epoch": 1.4060208746657465, "grad_norm": 0.63671875, "learning_rate": 4.076877958234825e-06, "loss": 1.4118, "step": 8150 }, { "epoch": 1.4061933925644785, "grad_norm": 0.56640625, "learning_rate": 4.074685136576993e-06, "loss": 1.384, "step": 8151 }, { "epoch": 1.4063659104632107, "grad_norm": 0.63671875, "learning_rate": 4.0724927539035045e-06, "loss": 1.4003, "step": 8152 }, { "epoch": 1.4065384283619426, "grad_norm": 0.55859375, "learning_rate": 4.070300810376792e-06, "loss": 1.4395, "step": 8153 }, { "epoch": 1.4067109462606746, "grad_norm": 0.59375, "learning_rate": 4.0681093061592495e-06, "loss": 1.411, "step": 8154 }, { "epoch": 1.4068834641594066, "grad_norm": 0.609375, "learning_rate": 4.065918241413226e-06, "loss": 1.4444, "step": 8155 }, { "epoch": 1.4070559820581385, "grad_norm": 0.59375, "learning_rate": 4.063727616301064e-06, "loss": 1.4602, "step": 8156 }, { "epoch": 1.4072284999568705, "grad_norm": 0.59375, "learning_rate": 4.061537430985049e-06, "loss": 1.4107, "step": 8157 }, { "epoch": 1.4074010178556025, "grad_norm": 0.6484375, "learning_rate": 4.059347685627446e-06, "loss": 1.4984, "step": 8158 }, { "epoch": 1.4075735357543344, "grad_norm": 0.59375, "learning_rate": 4.057158380390486e-06, "loss": 1.3805, "step": 8159 }, { "epoch": 1.4077460536530664, "grad_norm": 0.5703125, "learning_rate": 4.054969515436366e-06, "loss": 1.3743, "step": 8160 }, { "epoch": 1.4079185715517986, "grad_norm": 0.63671875, "learning_rate": 4.052781090927249e-06, "loss": 1.4984, "step": 8161 }, { "epoch": 1.4080910894505305, "grad_norm": 0.58203125, "learning_rate": 4.050593107025269e-06, "loss": 1.4587, "step": 8162 }, { "epoch": 1.4082636073492625, "grad_norm": 0.5703125, "learning_rate": 4.048405563892527e-06, "loss": 1.4661, "step": 8163 }, { "epoch": 1.4084361252479944, "grad_norm": 0.5859375, "learning_rate": 4.046218461691089e-06, "loss": 1.4686, "step": 8164 }, { "epoch": 1.4086086431467264, "grad_norm": 0.57421875, "learning_rate": 4.044031800582984e-06, "loss": 1.376, "step": 8165 }, { "epoch": 1.4087811610454586, "grad_norm": 0.58203125, "learning_rate": 4.041845580730223e-06, "loss": 1.3829, "step": 8166 }, { "epoch": 1.4089536789441905, "grad_norm": 0.578125, "learning_rate": 4.039659802294767e-06, "loss": 1.418, "step": 8167 }, { "epoch": 1.4091261968429225, "grad_norm": 0.56640625, "learning_rate": 4.037474465438551e-06, "loss": 1.4028, "step": 8168 }, { "epoch": 1.4092987147416545, "grad_norm": 0.5546875, "learning_rate": 4.035289570323489e-06, "loss": 1.5105, "step": 8169 }, { "epoch": 1.4094712326403864, "grad_norm": 0.5625, "learning_rate": 4.033105117111441e-06, "loss": 1.4076, "step": 8170 }, { "epoch": 1.4096437505391184, "grad_norm": 0.55859375, "learning_rate": 4.030921105964249e-06, "loss": 1.3904, "step": 8171 }, { "epoch": 1.4098162684378504, "grad_norm": 0.55078125, "learning_rate": 4.028737537043719e-06, "loss": 1.4178, "step": 8172 }, { "epoch": 1.4099887863365823, "grad_norm": 0.71484375, "learning_rate": 4.026554410511622e-06, "loss": 1.4416, "step": 8173 }, { "epoch": 1.4101613042353145, "grad_norm": 0.59765625, "learning_rate": 4.024371726529698e-06, "loss": 1.514, "step": 8174 }, { "epoch": 1.4103338221340465, "grad_norm": 0.57421875, "learning_rate": 4.022189485259656e-06, "loss": 1.5538, "step": 8175 }, { "epoch": 1.4105063400327784, "grad_norm": 0.61328125, "learning_rate": 4.020007686863164e-06, "loss": 1.3949, "step": 8176 }, { "epoch": 1.4106788579315104, "grad_norm": 0.5546875, "learning_rate": 4.017826331501872e-06, "loss": 1.3972, "step": 8177 }, { "epoch": 1.4108513758302423, "grad_norm": 0.546875, "learning_rate": 4.015645419337381e-06, "loss": 1.3803, "step": 8178 }, { "epoch": 1.4110238937289743, "grad_norm": 0.54296875, "learning_rate": 4.013464950531268e-06, "loss": 1.4745, "step": 8179 }, { "epoch": 1.4111964116277065, "grad_norm": 0.6328125, "learning_rate": 4.011284925245079e-06, "loss": 1.385, "step": 8180 }, { "epoch": 1.4113689295264384, "grad_norm": 0.58984375, "learning_rate": 4.009105343640321e-06, "loss": 1.4544, "step": 8181 }, { "epoch": 1.4115414474251704, "grad_norm": 0.55078125, "learning_rate": 4.006926205878472e-06, "loss": 1.3837, "step": 8182 }, { "epoch": 1.4117139653239024, "grad_norm": 0.57421875, "learning_rate": 4.00474751212098e-06, "loss": 1.4387, "step": 8183 }, { "epoch": 1.4118864832226343, "grad_norm": 0.62109375, "learning_rate": 4.002569262529244e-06, "loss": 1.2922, "step": 8184 }, { "epoch": 1.4120590011213663, "grad_norm": 0.6171875, "learning_rate": 4.000391457264656e-06, "loss": 1.4863, "step": 8185 }, { "epoch": 1.4122315190200982, "grad_norm": 0.5859375, "learning_rate": 3.99821409648856e-06, "loss": 1.4103, "step": 8186 }, { "epoch": 1.4124040369188302, "grad_norm": 0.61328125, "learning_rate": 3.996037180362256e-06, "loss": 1.3828, "step": 8187 }, { "epoch": 1.4125765548175624, "grad_norm": 0.609375, "learning_rate": 3.99386070904704e-06, "loss": 1.3571, "step": 8188 }, { "epoch": 1.4127490727162944, "grad_norm": 0.58203125, "learning_rate": 3.991684682704143e-06, "loss": 1.5166, "step": 8189 }, { "epoch": 1.4129215906150263, "grad_norm": 0.61328125, "learning_rate": 3.989509101494794e-06, "loss": 1.5186, "step": 8190 }, { "epoch": 1.4130941085137583, "grad_norm": 0.62109375, "learning_rate": 3.987333965580163e-06, "loss": 1.4764, "step": 8191 }, { "epoch": 1.4132666264124902, "grad_norm": 0.6015625, "learning_rate": 3.9851592751213995e-06, "loss": 1.4711, "step": 8192 }, { "epoch": 1.4134391443112224, "grad_norm": 0.5703125, "learning_rate": 3.98298503027962e-06, "loss": 1.4253, "step": 8193 }, { "epoch": 1.4136116622099544, "grad_norm": 0.72265625, "learning_rate": 3.980811231215905e-06, "loss": 1.3507, "step": 8194 }, { "epoch": 1.4137841801086863, "grad_norm": 0.5546875, "learning_rate": 3.978637878091305e-06, "loss": 1.4504, "step": 8195 }, { "epoch": 1.4139566980074183, "grad_norm": 0.59375, "learning_rate": 3.976464971066837e-06, "loss": 1.4436, "step": 8196 }, { "epoch": 1.4141292159061503, "grad_norm": 0.6484375, "learning_rate": 3.974292510303473e-06, "loss": 1.4446, "step": 8197 }, { "epoch": 1.4143017338048822, "grad_norm": 0.625, "learning_rate": 3.972120495962178e-06, "loss": 1.4351, "step": 8198 }, { "epoch": 1.4144742517036142, "grad_norm": 0.58984375, "learning_rate": 3.969948928203856e-06, "loss": 1.3947, "step": 8199 }, { "epoch": 1.4146467696023461, "grad_norm": 0.5625, "learning_rate": 3.967777807189396e-06, "loss": 1.454, "step": 8200 }, { "epoch": 1.4146467696023461, "eval_loss": 1.4072892665863037, "eval_runtime": 10.8508, "eval_samples_per_second": 94.371, "eval_steps_per_second": 23.593, "step": 8200 }, { "epoch": 1.414819287501078, "grad_norm": 0.6171875, "learning_rate": 3.9656071330796475e-06, "loss": 1.343, "step": 8201 }, { "epoch": 1.4149918053998103, "grad_norm": 0.58203125, "learning_rate": 3.963436906035426e-06, "loss": 1.4482, "step": 8202 }, { "epoch": 1.4151643232985422, "grad_norm": 0.58984375, "learning_rate": 3.961267126217517e-06, "loss": 1.4088, "step": 8203 }, { "epoch": 1.4153368411972742, "grad_norm": 0.6328125, "learning_rate": 3.9590977937866715e-06, "loss": 1.3249, "step": 8204 }, { "epoch": 1.4155093590960062, "grad_norm": 0.703125, "learning_rate": 3.956928908903607e-06, "loss": 1.5044, "step": 8205 }, { "epoch": 1.4156818769947381, "grad_norm": 0.6171875, "learning_rate": 3.954760471729007e-06, "loss": 1.3926, "step": 8206 }, { "epoch": 1.4158543948934703, "grad_norm": 0.56640625, "learning_rate": 3.952592482423527e-06, "loss": 1.3704, "step": 8207 }, { "epoch": 1.4160269127922023, "grad_norm": 0.6484375, "learning_rate": 3.950424941147776e-06, "loss": 1.446, "step": 8208 }, { "epoch": 1.4161994306909342, "grad_norm": 0.64453125, "learning_rate": 3.948257848062351e-06, "loss": 1.4207, "step": 8209 }, { "epoch": 1.4163719485896662, "grad_norm": 0.6015625, "learning_rate": 3.946091203327794e-06, "loss": 1.3249, "step": 8210 }, { "epoch": 1.4165444664883982, "grad_norm": 0.7734375, "learning_rate": 3.9439250071046274e-06, "loss": 1.4373, "step": 8211 }, { "epoch": 1.4167169843871301, "grad_norm": 0.56640625, "learning_rate": 3.941759259553336e-06, "loss": 1.4268, "step": 8212 }, { "epoch": 1.416889502285862, "grad_norm": 0.93359375, "learning_rate": 3.939593960834374e-06, "loss": 1.3892, "step": 8213 }, { "epoch": 1.417062020184594, "grad_norm": 0.625, "learning_rate": 3.937429111108157e-06, "loss": 1.3337, "step": 8214 }, { "epoch": 1.4172345380833262, "grad_norm": 0.5546875, "learning_rate": 3.935264710535076e-06, "loss": 1.4283, "step": 8215 }, { "epoch": 1.4174070559820582, "grad_norm": 0.58984375, "learning_rate": 3.9331007592754725e-06, "loss": 1.3613, "step": 8216 }, { "epoch": 1.4175795738807901, "grad_norm": 0.59375, "learning_rate": 3.9309372574896755e-06, "loss": 1.4294, "step": 8217 }, { "epoch": 1.417752091779522, "grad_norm": 0.55078125, "learning_rate": 3.928774205337972e-06, "loss": 1.5224, "step": 8218 }, { "epoch": 1.417924609678254, "grad_norm": 0.57421875, "learning_rate": 3.926611602980603e-06, "loss": 1.3941, "step": 8219 }, { "epoch": 1.4180971275769862, "grad_norm": 0.6015625, "learning_rate": 3.924449450577801e-06, "loss": 1.4262, "step": 8220 }, { "epoch": 1.4182696454757182, "grad_norm": 0.59375, "learning_rate": 3.922287748289739e-06, "loss": 1.4873, "step": 8221 }, { "epoch": 1.4184421633744502, "grad_norm": 0.5625, "learning_rate": 3.920126496276583e-06, "loss": 1.5009, "step": 8222 }, { "epoch": 1.4186146812731821, "grad_norm": 0.61328125, "learning_rate": 3.917965694698441e-06, "loss": 1.3267, "step": 8223 }, { "epoch": 1.418787199171914, "grad_norm": 0.65234375, "learning_rate": 3.9158053437154035e-06, "loss": 1.4316, "step": 8224 }, { "epoch": 1.418959717070646, "grad_norm": 0.55859375, "learning_rate": 3.9136454434875216e-06, "loss": 1.4051, "step": 8225 }, { "epoch": 1.419132234969378, "grad_norm": 0.59375, "learning_rate": 3.911485994174814e-06, "loss": 1.3459, "step": 8226 }, { "epoch": 1.41930475286811, "grad_norm": 0.58984375, "learning_rate": 3.909326995937267e-06, "loss": 1.4291, "step": 8227 }, { "epoch": 1.419477270766842, "grad_norm": 0.6015625, "learning_rate": 3.907168448934836e-06, "loss": 1.4023, "step": 8228 }, { "epoch": 1.4196497886655741, "grad_norm": 0.6171875, "learning_rate": 3.9050103533274295e-06, "loss": 1.54, "step": 8229 }, { "epoch": 1.419822306564306, "grad_norm": 0.609375, "learning_rate": 3.902852709274946e-06, "loss": 1.4052, "step": 8230 }, { "epoch": 1.419994824463038, "grad_norm": 0.5625, "learning_rate": 3.900695516937226e-06, "loss": 1.4389, "step": 8231 }, { "epoch": 1.42016734236177, "grad_norm": 0.59765625, "learning_rate": 3.898538776474095e-06, "loss": 1.4603, "step": 8232 }, { "epoch": 1.420339860260502, "grad_norm": 0.54296875, "learning_rate": 3.896382488045333e-06, "loss": 1.4328, "step": 8233 }, { "epoch": 1.4205123781592341, "grad_norm": 0.62109375, "learning_rate": 3.894226651810693e-06, "loss": 1.3588, "step": 8234 }, { "epoch": 1.420684896057966, "grad_norm": 0.6484375, "learning_rate": 3.892071267929894e-06, "loss": 1.4438, "step": 8235 }, { "epoch": 1.420857413956698, "grad_norm": 0.55078125, "learning_rate": 3.88991633656262e-06, "loss": 1.4963, "step": 8236 }, { "epoch": 1.42102993185543, "grad_norm": 0.625, "learning_rate": 3.887761857868522e-06, "loss": 1.3939, "step": 8237 }, { "epoch": 1.421202449754162, "grad_norm": 0.59765625, "learning_rate": 3.885607832007214e-06, "loss": 1.3764, "step": 8238 }, { "epoch": 1.421374967652894, "grad_norm": 0.6171875, "learning_rate": 3.883454259138289e-06, "loss": 1.3638, "step": 8239 }, { "epoch": 1.421547485551626, "grad_norm": 0.57421875, "learning_rate": 3.881301139421281e-06, "loss": 1.3644, "step": 8240 }, { "epoch": 1.4217200034503579, "grad_norm": 0.5859375, "learning_rate": 3.879148473015723e-06, "loss": 1.4693, "step": 8241 }, { "epoch": 1.4218925213490898, "grad_norm": 0.60546875, "learning_rate": 3.87699626008109e-06, "loss": 1.4157, "step": 8242 }, { "epoch": 1.422065039247822, "grad_norm": 0.62109375, "learning_rate": 3.87484450077683e-06, "loss": 1.4135, "step": 8243 }, { "epoch": 1.422237557146554, "grad_norm": 0.59375, "learning_rate": 3.872693195262361e-06, "loss": 1.5038, "step": 8244 }, { "epoch": 1.422410075045286, "grad_norm": 0.6484375, "learning_rate": 3.870542343697067e-06, "loss": 1.4158, "step": 8245 }, { "epoch": 1.422582592944018, "grad_norm": 0.6171875, "learning_rate": 3.868391946240294e-06, "loss": 1.4482, "step": 8246 }, { "epoch": 1.4227551108427499, "grad_norm": 0.6328125, "learning_rate": 3.86624200305136e-06, "loss": 1.3736, "step": 8247 }, { "epoch": 1.422927628741482, "grad_norm": 0.58984375, "learning_rate": 3.864092514289539e-06, "loss": 1.3976, "step": 8248 }, { "epoch": 1.423100146640214, "grad_norm": 0.58203125, "learning_rate": 3.861943480114086e-06, "loss": 1.3573, "step": 8249 }, { "epoch": 1.423272664538946, "grad_norm": 0.63671875, "learning_rate": 3.859794900684212e-06, "loss": 1.4126, "step": 8250 }, { "epoch": 1.423445182437678, "grad_norm": 0.55859375, "learning_rate": 3.857646776159098e-06, "loss": 1.4187, "step": 8251 }, { "epoch": 1.4236177003364099, "grad_norm": 0.62890625, "learning_rate": 3.855499106697893e-06, "loss": 1.4546, "step": 8252 }, { "epoch": 1.4237902182351418, "grad_norm": 0.57421875, "learning_rate": 3.8533518924597e-06, "loss": 1.3757, "step": 8253 }, { "epoch": 1.4239627361338738, "grad_norm": 0.5703125, "learning_rate": 3.851205133603611e-06, "loss": 1.4586, "step": 8254 }, { "epoch": 1.4241352540326058, "grad_norm": 0.60546875, "learning_rate": 3.849058830288663e-06, "loss": 1.4201, "step": 8255 }, { "epoch": 1.424307771931338, "grad_norm": 0.69140625, "learning_rate": 3.846912982673869e-06, "loss": 1.5396, "step": 8256 }, { "epoch": 1.42448028983007, "grad_norm": 0.578125, "learning_rate": 3.8447675909182095e-06, "loss": 1.4887, "step": 8257 }, { "epoch": 1.4246528077288019, "grad_norm": 0.578125, "learning_rate": 3.842622655180625e-06, "loss": 1.4912, "step": 8258 }, { "epoch": 1.4248253256275338, "grad_norm": 0.66796875, "learning_rate": 3.840478175620026e-06, "loss": 1.402, "step": 8259 }, { "epoch": 1.4249978435262658, "grad_norm": 0.57421875, "learning_rate": 3.838334152395295e-06, "loss": 1.4811, "step": 8260 }, { "epoch": 1.425170361424998, "grad_norm": 0.55078125, "learning_rate": 3.836190585665263e-06, "loss": 1.4547, "step": 8261 }, { "epoch": 1.42534287932373, "grad_norm": 0.640625, "learning_rate": 3.834047475588753e-06, "loss": 1.4213, "step": 8262 }, { "epoch": 1.425515397222462, "grad_norm": 0.58984375, "learning_rate": 3.831904822324527e-06, "loss": 1.5176, "step": 8263 }, { "epoch": 1.4256879151211939, "grad_norm": 0.59375, "learning_rate": 3.829762626031333e-06, "loss": 1.4017, "step": 8264 }, { "epoch": 1.4258604330199258, "grad_norm": 0.578125, "learning_rate": 3.827620886867878e-06, "loss": 1.4718, "step": 8265 }, { "epoch": 1.4260329509186578, "grad_norm": 0.65234375, "learning_rate": 3.825479604992835e-06, "loss": 1.3909, "step": 8266 }, { "epoch": 1.4262054688173897, "grad_norm": 0.6171875, "learning_rate": 3.823338780564841e-06, "loss": 1.4747, "step": 8267 }, { "epoch": 1.4263779867161217, "grad_norm": 0.6171875, "learning_rate": 3.821198413742505e-06, "loss": 1.4617, "step": 8268 }, { "epoch": 1.4265505046148537, "grad_norm": 0.82421875, "learning_rate": 3.8190585046843965e-06, "loss": 1.3563, "step": 8269 }, { "epoch": 1.4267230225135858, "grad_norm": 0.6171875, "learning_rate": 3.8169190535490555e-06, "loss": 1.5484, "step": 8270 }, { "epoch": 1.4268955404123178, "grad_norm": 0.9296875, "learning_rate": 3.814780060494987e-06, "loss": 1.4492, "step": 8271 }, { "epoch": 1.4270680583110498, "grad_norm": 0.5546875, "learning_rate": 3.8126415256806527e-06, "loss": 1.3372, "step": 8272 }, { "epoch": 1.4272405762097817, "grad_norm": 0.671875, "learning_rate": 3.810503449264501e-06, "loss": 1.4254, "step": 8273 }, { "epoch": 1.4274130941085137, "grad_norm": 0.640625, "learning_rate": 3.808365831404924e-06, "loss": 1.4118, "step": 8274 }, { "epoch": 1.4275856120072459, "grad_norm": 0.58984375, "learning_rate": 3.8062286722602936e-06, "loss": 1.3553, "step": 8275 }, { "epoch": 1.4277581299059778, "grad_norm": 0.625, "learning_rate": 3.8040919719889435e-06, "loss": 1.4825, "step": 8276 }, { "epoch": 1.4279306478047098, "grad_norm": 0.546875, "learning_rate": 3.801955730749174e-06, "loss": 1.4204, "step": 8277 }, { "epoch": 1.4281031657034418, "grad_norm": 0.60546875, "learning_rate": 3.7998199486992502e-06, "loss": 1.4068, "step": 8278 }, { "epoch": 1.4282756836021737, "grad_norm": 0.5859375, "learning_rate": 3.7976846259974098e-06, "loss": 1.3543, "step": 8279 }, { "epoch": 1.4284482015009057, "grad_norm": 0.609375, "learning_rate": 3.795549762801839e-06, "loss": 1.4565, "step": 8280 }, { "epoch": 1.4286207193996376, "grad_norm": 0.5625, "learning_rate": 3.7934153592707125e-06, "loss": 1.3196, "step": 8281 }, { "epoch": 1.4287932372983696, "grad_norm": 0.6015625, "learning_rate": 3.7912814155621568e-06, "loss": 1.3837, "step": 8282 }, { "epoch": 1.4289657551971016, "grad_norm": 0.59375, "learning_rate": 3.789147931834267e-06, "loss": 1.4508, "step": 8283 }, { "epoch": 1.4291382730958337, "grad_norm": 0.60546875, "learning_rate": 3.7870149082451104e-06, "loss": 1.4073, "step": 8284 }, { "epoch": 1.4293107909945657, "grad_norm": 0.66015625, "learning_rate": 3.784882344952702e-06, "loss": 1.5276, "step": 8285 }, { "epoch": 1.4294833088932977, "grad_norm": 0.58203125, "learning_rate": 3.7827502421150497e-06, "loss": 1.3499, "step": 8286 }, { "epoch": 1.4296558267920296, "grad_norm": 0.58203125, "learning_rate": 3.7806185998901034e-06, "loss": 1.4606, "step": 8287 }, { "epoch": 1.4298283446907616, "grad_norm": 0.65234375, "learning_rate": 3.7784874184357923e-06, "loss": 1.3935, "step": 8288 }, { "epoch": 1.4300008625894938, "grad_norm": 0.65234375, "learning_rate": 3.7763566979100076e-06, "loss": 1.4289, "step": 8289 }, { "epoch": 1.4301733804882257, "grad_norm": 0.609375, "learning_rate": 3.774226438470605e-06, "loss": 1.3783, "step": 8290 }, { "epoch": 1.4303458983869577, "grad_norm": 0.62109375, "learning_rate": 3.7720966402754076e-06, "loss": 1.3995, "step": 8291 }, { "epoch": 1.4305184162856897, "grad_norm": 0.58984375, "learning_rate": 3.7699673034822095e-06, "loss": 1.4908, "step": 8292 }, { "epoch": 1.4306909341844216, "grad_norm": 0.625, "learning_rate": 3.7678384282487535e-06, "loss": 1.4742, "step": 8293 }, { "epoch": 1.4308634520831536, "grad_norm": 0.671875, "learning_rate": 3.765710014732774e-06, "loss": 1.398, "step": 8294 }, { "epoch": 1.4310359699818855, "grad_norm": 0.5859375, "learning_rate": 3.7635820630919464e-06, "loss": 1.3722, "step": 8295 }, { "epoch": 1.4312084878806175, "grad_norm": 0.62109375, "learning_rate": 3.761454573483927e-06, "loss": 1.49, "step": 8296 }, { "epoch": 1.4313810057793497, "grad_norm": 0.58203125, "learning_rate": 3.7593275460663323e-06, "loss": 1.4273, "step": 8297 }, { "epoch": 1.4315535236780816, "grad_norm": 0.59375, "learning_rate": 3.757200980996747e-06, "loss": 1.4282, "step": 8298 }, { "epoch": 1.4317260415768136, "grad_norm": 0.5859375, "learning_rate": 3.7550748784327195e-06, "loss": 1.4377, "step": 8299 }, { "epoch": 1.4318985594755456, "grad_norm": 0.65625, "learning_rate": 3.752949238531766e-06, "loss": 1.4625, "step": 8300 }, { "epoch": 1.4318985594755456, "eval_loss": 1.4072835445404053, "eval_runtime": 10.8629, "eval_samples_per_second": 94.266, "eval_steps_per_second": 23.566, "step": 8300 }, { "epoch": 1.4320710773742775, "grad_norm": 0.62109375, "learning_rate": 3.750824061451365e-06, "loss": 1.3686, "step": 8301 }, { "epoch": 1.4322435952730097, "grad_norm": 0.609375, "learning_rate": 3.7486993473489654e-06, "loss": 1.4269, "step": 8302 }, { "epoch": 1.4324161131717417, "grad_norm": 1.0078125, "learning_rate": 3.7465750963819802e-06, "loss": 1.4262, "step": 8303 }, { "epoch": 1.4325886310704736, "grad_norm": 0.58984375, "learning_rate": 3.7444513087077793e-06, "loss": 1.3747, "step": 8304 }, { "epoch": 1.4327611489692056, "grad_norm": 0.59765625, "learning_rate": 3.742327984483718e-06, "loss": 1.4343, "step": 8305 }, { "epoch": 1.4329336668679375, "grad_norm": 0.61328125, "learning_rate": 3.740205123867097e-06, "loss": 1.3822, "step": 8306 }, { "epoch": 1.4331061847666695, "grad_norm": 0.57421875, "learning_rate": 3.738082727015192e-06, "loss": 1.4472, "step": 8307 }, { "epoch": 1.4332787026654015, "grad_norm": 0.546875, "learning_rate": 3.735960794085246e-06, "loss": 1.405, "step": 8308 }, { "epoch": 1.4334512205641334, "grad_norm": 0.5859375, "learning_rate": 3.733839325234463e-06, "loss": 1.4689, "step": 8309 }, { "epoch": 1.4336237384628654, "grad_norm": 0.5703125, "learning_rate": 3.731718320620017e-06, "loss": 1.4762, "step": 8310 }, { "epoch": 1.4337962563615976, "grad_norm": 0.6328125, "learning_rate": 3.7295977803990426e-06, "loss": 1.4476, "step": 8311 }, { "epoch": 1.4339687742603295, "grad_norm": 0.58203125, "learning_rate": 3.7274777047286435e-06, "loss": 1.4185, "step": 8312 }, { "epoch": 1.4341412921590615, "grad_norm": 0.55078125, "learning_rate": 3.7253580937658896e-06, "loss": 1.2961, "step": 8313 }, { "epoch": 1.4343138100577935, "grad_norm": 0.5625, "learning_rate": 3.723238947667813e-06, "loss": 1.4259, "step": 8314 }, { "epoch": 1.4344863279565254, "grad_norm": 0.6015625, "learning_rate": 3.721120266591416e-06, "loss": 1.5421, "step": 8315 }, { "epoch": 1.4346588458552576, "grad_norm": 0.6796875, "learning_rate": 3.719002050693663e-06, "loss": 1.4687, "step": 8316 }, { "epoch": 1.4348313637539896, "grad_norm": 0.61328125, "learning_rate": 3.716884300131478e-06, "loss": 1.4174, "step": 8317 }, { "epoch": 1.4350038816527215, "grad_norm": 0.62109375, "learning_rate": 3.7147670150617698e-06, "loss": 1.4209, "step": 8318 }, { "epoch": 1.4351763995514535, "grad_norm": 0.59375, "learning_rate": 3.7126501956413898e-06, "loss": 1.3623, "step": 8319 }, { "epoch": 1.4353489174501854, "grad_norm": 0.6328125, "learning_rate": 3.7105338420271673e-06, "loss": 1.428, "step": 8320 }, { "epoch": 1.4355214353489174, "grad_norm": 0.6015625, "learning_rate": 3.7084179543758968e-06, "loss": 1.4367, "step": 8321 }, { "epoch": 1.4356939532476494, "grad_norm": 0.59375, "learning_rate": 3.706302532844336e-06, "loss": 1.4826, "step": 8322 }, { "epoch": 1.4358664711463813, "grad_norm": 0.6015625, "learning_rate": 3.7041875775892077e-06, "loss": 1.4315, "step": 8323 }, { "epoch": 1.4360389890451135, "grad_norm": 0.56640625, "learning_rate": 3.7020730887672063e-06, "loss": 1.4028, "step": 8324 }, { "epoch": 1.4362115069438455, "grad_norm": 0.5859375, "learning_rate": 3.699959066534974e-06, "loss": 1.2957, "step": 8325 }, { "epoch": 1.4363840248425774, "grad_norm": 0.6484375, "learning_rate": 3.697845511049146e-06, "loss": 1.3867, "step": 8326 }, { "epoch": 1.4365565427413094, "grad_norm": 0.63671875, "learning_rate": 3.695732422466296e-06, "loss": 1.4988, "step": 8327 }, { "epoch": 1.4367290606400414, "grad_norm": 0.55078125, "learning_rate": 3.69361980094298e-06, "loss": 1.384, "step": 8328 }, { "epoch": 1.4369015785387733, "grad_norm": 0.609375, "learning_rate": 3.6915076466357123e-06, "loss": 1.3995, "step": 8329 }, { "epoch": 1.4370740964375055, "grad_norm": 0.62109375, "learning_rate": 3.6893959597009766e-06, "loss": 1.3905, "step": 8330 }, { "epoch": 1.4372466143362375, "grad_norm": 0.640625, "learning_rate": 3.687284740295217e-06, "loss": 1.4135, "step": 8331 }, { "epoch": 1.4374191322349694, "grad_norm": 0.53125, "learning_rate": 3.6851739885748495e-06, "loss": 1.3253, "step": 8332 }, { "epoch": 1.4375916501337014, "grad_norm": 0.546875, "learning_rate": 3.683063704696248e-06, "loss": 1.4941, "step": 8333 }, { "epoch": 1.4377641680324333, "grad_norm": 0.61328125, "learning_rate": 3.680953888815758e-06, "loss": 1.4896, "step": 8334 }, { "epoch": 1.4379366859311653, "grad_norm": 0.5625, "learning_rate": 3.678844541089691e-06, "loss": 1.4125, "step": 8335 }, { "epoch": 1.4381092038298973, "grad_norm": 0.57421875, "learning_rate": 3.6767356616743098e-06, "loss": 1.3701, "step": 8336 }, { "epoch": 1.4382817217286292, "grad_norm": 0.6015625, "learning_rate": 3.674627250725866e-06, "loss": 1.4929, "step": 8337 }, { "epoch": 1.4384542396273614, "grad_norm": 0.6171875, "learning_rate": 3.6725193084005527e-06, "loss": 1.4682, "step": 8338 }, { "epoch": 1.4386267575260934, "grad_norm": 0.609375, "learning_rate": 3.6704118348545516e-06, "loss": 1.3607, "step": 8339 }, { "epoch": 1.4387992754248253, "grad_norm": 0.5859375, "learning_rate": 3.668304830243987e-06, "loss": 1.4724, "step": 8340 }, { "epoch": 1.4389717933235573, "grad_norm": 0.56640625, "learning_rate": 3.666198294724963e-06, "loss": 1.4779, "step": 8341 }, { "epoch": 1.4391443112222893, "grad_norm": 0.62890625, "learning_rate": 3.664092228453545e-06, "loss": 1.4672, "step": 8342 }, { "epoch": 1.4393168291210214, "grad_norm": 0.65234375, "learning_rate": 3.6619866315857632e-06, "loss": 1.3252, "step": 8343 }, { "epoch": 1.4394893470197534, "grad_norm": 0.5859375, "learning_rate": 3.6598815042776135e-06, "loss": 1.4026, "step": 8344 }, { "epoch": 1.4396618649184854, "grad_norm": 0.58203125, "learning_rate": 3.657776846685057e-06, "loss": 1.5287, "step": 8345 }, { "epoch": 1.4398343828172173, "grad_norm": 0.5703125, "learning_rate": 3.655672658964019e-06, "loss": 1.4182, "step": 8346 }, { "epoch": 1.4400069007159493, "grad_norm": 0.58984375, "learning_rate": 3.6535689412703923e-06, "loss": 1.4253, "step": 8347 }, { "epoch": 1.4401794186146812, "grad_norm": 0.57421875, "learning_rate": 3.651465693760037e-06, "loss": 1.4129, "step": 8348 }, { "epoch": 1.4403519365134132, "grad_norm": 1.4765625, "learning_rate": 3.6493629165887623e-06, "loss": 1.3906, "step": 8349 }, { "epoch": 1.4405244544121452, "grad_norm": 0.57421875, "learning_rate": 3.647260609912371e-06, "loss": 1.5282, "step": 8350 }, { "epoch": 1.4406969723108771, "grad_norm": 0.6328125, "learning_rate": 3.6451587738866035e-06, "loss": 1.4269, "step": 8351 }, { "epoch": 1.4408694902096093, "grad_norm": 0.59375, "learning_rate": 3.643057408667181e-06, "loss": 1.4873, "step": 8352 }, { "epoch": 1.4410420081083413, "grad_norm": 0.61328125, "learning_rate": 3.6409565144097856e-06, "loss": 1.4226, "step": 8353 }, { "epoch": 1.4412145260070732, "grad_norm": 0.58203125, "learning_rate": 3.6388560912700642e-06, "loss": 1.3989, "step": 8354 }, { "epoch": 1.4413870439058052, "grad_norm": 0.62109375, "learning_rate": 3.636756139403631e-06, "loss": 1.4151, "step": 8355 }, { "epoch": 1.4415595618045371, "grad_norm": 0.62109375, "learning_rate": 3.634656658966066e-06, "loss": 1.4418, "step": 8356 }, { "epoch": 1.4417320797032693, "grad_norm": 0.609375, "learning_rate": 3.6325576501129002e-06, "loss": 1.3659, "step": 8357 }, { "epoch": 1.4419045976020013, "grad_norm": 0.58984375, "learning_rate": 3.630459112999657e-06, "loss": 1.3226, "step": 8358 }, { "epoch": 1.4420771155007333, "grad_norm": 0.59765625, "learning_rate": 3.6283610477817975e-06, "loss": 1.5103, "step": 8359 }, { "epoch": 1.4422496333994652, "grad_norm": 0.66796875, "learning_rate": 3.6262634546147635e-06, "loss": 1.4522, "step": 8360 }, { "epoch": 1.4424221512981972, "grad_norm": 0.5859375, "learning_rate": 3.624166333653958e-06, "loss": 1.4022, "step": 8361 }, { "epoch": 1.4425946691969291, "grad_norm": 0.56640625, "learning_rate": 3.622069685054749e-06, "loss": 1.547, "step": 8362 }, { "epoch": 1.442767187095661, "grad_norm": 0.6640625, "learning_rate": 3.6199735089724676e-06, "loss": 1.4204, "step": 8363 }, { "epoch": 1.442939704994393, "grad_norm": 0.5703125, "learning_rate": 3.617877805562413e-06, "loss": 1.376, "step": 8364 }, { "epoch": 1.4431122228931252, "grad_norm": 0.60546875, "learning_rate": 3.6157825749798493e-06, "loss": 1.4068, "step": 8365 }, { "epoch": 1.4432847407918572, "grad_norm": 0.546875, "learning_rate": 3.613687817380002e-06, "loss": 1.3867, "step": 8366 }, { "epoch": 1.4434572586905892, "grad_norm": 0.5546875, "learning_rate": 3.611593532918064e-06, "loss": 1.3909, "step": 8367 }, { "epoch": 1.4436297765893211, "grad_norm": 0.66015625, "learning_rate": 3.6094997217491944e-06, "loss": 1.4072, "step": 8368 }, { "epoch": 1.443802294488053, "grad_norm": 0.56640625, "learning_rate": 3.607406384028519e-06, "loss": 1.4459, "step": 8369 }, { "epoch": 1.4439748123867853, "grad_norm": 0.87109375, "learning_rate": 3.6053135199111144e-06, "loss": 1.398, "step": 8370 }, { "epoch": 1.4441473302855172, "grad_norm": 0.58984375, "learning_rate": 3.603221129552047e-06, "loss": 1.5272, "step": 8371 }, { "epoch": 1.4443198481842492, "grad_norm": 0.6796875, "learning_rate": 3.6011292131063227e-06, "loss": 1.467, "step": 8372 }, { "epoch": 1.4444923660829811, "grad_norm": 0.609375, "learning_rate": 3.5990377707289292e-06, "loss": 1.4917, "step": 8373 }, { "epoch": 1.444664883981713, "grad_norm": 0.58203125, "learning_rate": 3.5969468025748135e-06, "loss": 1.4118, "step": 8374 }, { "epoch": 1.444837401880445, "grad_norm": 0.625, "learning_rate": 3.594856308798885e-06, "loss": 1.3988, "step": 8375 }, { "epoch": 1.445009919779177, "grad_norm": 0.6640625, "learning_rate": 3.5927662895560235e-06, "loss": 1.562, "step": 8376 }, { "epoch": 1.445182437677909, "grad_norm": 0.65234375, "learning_rate": 3.5906767450010694e-06, "loss": 1.4244, "step": 8377 }, { "epoch": 1.445354955576641, "grad_norm": 0.5703125, "learning_rate": 3.5885876752888295e-06, "loss": 1.3598, "step": 8378 }, { "epoch": 1.4455274734753731, "grad_norm": 0.5703125, "learning_rate": 3.586499080574074e-06, "loss": 1.4699, "step": 8379 }, { "epoch": 1.445699991374105, "grad_norm": 0.59765625, "learning_rate": 3.584410961011544e-06, "loss": 1.5031, "step": 8380 }, { "epoch": 1.445872509272837, "grad_norm": 0.625, "learning_rate": 3.58232331675593e-06, "loss": 1.3617, "step": 8381 }, { "epoch": 1.446045027171569, "grad_norm": 0.6015625, "learning_rate": 3.580236147961911e-06, "loss": 1.4913, "step": 8382 }, { "epoch": 1.446217545070301, "grad_norm": 0.6484375, "learning_rate": 3.5781494547841066e-06, "loss": 1.4094, "step": 8383 }, { "epoch": 1.4463900629690332, "grad_norm": 0.6328125, "learning_rate": 3.5760632373771163e-06, "loss": 1.3552, "step": 8384 }, { "epoch": 1.4465625808677651, "grad_norm": 0.62109375, "learning_rate": 3.573977495895501e-06, "loss": 1.4264, "step": 8385 }, { "epoch": 1.446735098766497, "grad_norm": 0.546875, "learning_rate": 3.571892230493783e-06, "loss": 1.4048, "step": 8386 }, { "epoch": 1.446907616665229, "grad_norm": 0.59375, "learning_rate": 3.569807441326454e-06, "loss": 1.384, "step": 8387 }, { "epoch": 1.447080134563961, "grad_norm": 0.609375, "learning_rate": 3.567723128547971e-06, "loss": 1.3813, "step": 8388 }, { "epoch": 1.447252652462693, "grad_norm": 0.57421875, "learning_rate": 3.5656392923127424e-06, "loss": 1.3643, "step": 8389 }, { "epoch": 1.447425170361425, "grad_norm": 0.57421875, "learning_rate": 3.5635559327751655e-06, "loss": 1.4991, "step": 8390 }, { "epoch": 1.447597688260157, "grad_norm": 0.61328125, "learning_rate": 3.561473050089579e-06, "loss": 1.4303, "step": 8391 }, { "epoch": 1.4477702061588889, "grad_norm": 0.62109375, "learning_rate": 3.559390644410298e-06, "loss": 1.4665, "step": 8392 }, { "epoch": 1.447942724057621, "grad_norm": 0.57421875, "learning_rate": 3.557308715891601e-06, "loss": 1.4102, "step": 8393 }, { "epoch": 1.448115241956353, "grad_norm": 0.63671875, "learning_rate": 3.555227264687726e-06, "loss": 1.5032, "step": 8394 }, { "epoch": 1.448287759855085, "grad_norm": 0.58203125, "learning_rate": 3.5531462909528925e-06, "loss": 1.4227, "step": 8395 }, { "epoch": 1.448460277753817, "grad_norm": 0.5703125, "learning_rate": 3.5510657948412587e-06, "loss": 1.3451, "step": 8396 }, { "epoch": 1.4486327956525489, "grad_norm": 0.7265625, "learning_rate": 3.548985776506967e-06, "loss": 1.4781, "step": 8397 }, { "epoch": 1.448805313551281, "grad_norm": 0.59765625, "learning_rate": 3.5469062361041152e-06, "loss": 1.4004, "step": 8398 }, { "epoch": 1.448977831450013, "grad_norm": 0.6328125, "learning_rate": 3.5448271737867713e-06, "loss": 1.4395, "step": 8399 }, { "epoch": 1.449150349348745, "grad_norm": 0.62109375, "learning_rate": 3.542748589708963e-06, "loss": 1.4146, "step": 8400 }, { "epoch": 1.449150349348745, "eval_loss": 1.4072437286376953, "eval_runtime": 10.8573, "eval_samples_per_second": 94.315, "eval_steps_per_second": 23.579, "step": 8400 }, { "epoch": 1.449322867247477, "grad_norm": 0.54296875, "learning_rate": 3.540670484024691e-06, "loss": 1.2855, "step": 8401 }, { "epoch": 1.449495385146209, "grad_norm": 0.578125, "learning_rate": 3.5385928568879012e-06, "loss": 1.3483, "step": 8402 }, { "epoch": 1.4496679030449409, "grad_norm": 0.64453125, "learning_rate": 3.5365157084525326e-06, "loss": 1.48, "step": 8403 }, { "epoch": 1.4498404209436728, "grad_norm": 0.58203125, "learning_rate": 3.5344390388724625e-06, "loss": 1.388, "step": 8404 }, { "epoch": 1.4500129388424048, "grad_norm": 0.61328125, "learning_rate": 3.532362848301547e-06, "loss": 1.4408, "step": 8405 }, { "epoch": 1.450185456741137, "grad_norm": 0.57421875, "learning_rate": 3.5302871368936043e-06, "loss": 1.3503, "step": 8406 }, { "epoch": 1.450357974639869, "grad_norm": 0.59765625, "learning_rate": 3.5282119048024146e-06, "loss": 1.3585, "step": 8407 }, { "epoch": 1.450530492538601, "grad_norm": 0.70703125, "learning_rate": 3.5261371521817247e-06, "loss": 1.4256, "step": 8408 }, { "epoch": 1.4507030104373329, "grad_norm": 0.57421875, "learning_rate": 3.524062879185247e-06, "loss": 1.333, "step": 8409 }, { "epoch": 1.4508755283360648, "grad_norm": 0.6953125, "learning_rate": 3.5219890859666493e-06, "loss": 1.5221, "step": 8410 }, { "epoch": 1.451048046234797, "grad_norm": 0.59375, "learning_rate": 3.519915772679581e-06, "loss": 1.431, "step": 8411 }, { "epoch": 1.451220564133529, "grad_norm": 0.875, "learning_rate": 3.5178429394776436e-06, "loss": 1.5443, "step": 8412 }, { "epoch": 1.451393082032261, "grad_norm": 0.578125, "learning_rate": 3.5157705865143964e-06, "loss": 1.4704, "step": 8413 }, { "epoch": 1.4515655999309929, "grad_norm": 0.8203125, "learning_rate": 3.5136987139433874e-06, "loss": 1.5263, "step": 8414 }, { "epoch": 1.4517381178297248, "grad_norm": 0.60546875, "learning_rate": 3.511627321918102e-06, "loss": 1.4345, "step": 8415 }, { "epoch": 1.4519106357284568, "grad_norm": 0.5703125, "learning_rate": 3.5095564105920065e-06, "loss": 1.4462, "step": 8416 }, { "epoch": 1.4520831536271888, "grad_norm": 0.62890625, "learning_rate": 3.5074859801185256e-06, "loss": 1.4408, "step": 8417 }, { "epoch": 1.4522556715259207, "grad_norm": 0.55078125, "learning_rate": 3.505416030651051e-06, "loss": 1.3899, "step": 8418 }, { "epoch": 1.4524281894246527, "grad_norm": 0.55078125, "learning_rate": 3.5033465623429362e-06, "loss": 1.4151, "step": 8419 }, { "epoch": 1.4526007073233849, "grad_norm": 0.59765625, "learning_rate": 3.501277575347505e-06, "loss": 1.4596, "step": 8420 }, { "epoch": 1.4527732252221168, "grad_norm": 0.5859375, "learning_rate": 3.4992090698180293e-06, "loss": 1.3831, "step": 8421 }, { "epoch": 1.4529457431208488, "grad_norm": 0.609375, "learning_rate": 3.4971410459077716e-06, "loss": 1.4109, "step": 8422 }, { "epoch": 1.4531182610195807, "grad_norm": 0.578125, "learning_rate": 3.4950735037699334e-06, "loss": 1.3633, "step": 8423 }, { "epoch": 1.4532907789183127, "grad_norm": 0.57421875, "learning_rate": 3.493006443557696e-06, "loss": 1.5085, "step": 8424 }, { "epoch": 1.453463296817045, "grad_norm": 0.6328125, "learning_rate": 3.490939865424198e-06, "loss": 1.4088, "step": 8425 }, { "epoch": 1.4536358147157769, "grad_norm": 0.625, "learning_rate": 3.4888737695225416e-06, "loss": 1.4707, "step": 8426 }, { "epoch": 1.4538083326145088, "grad_norm": 0.60546875, "learning_rate": 3.4868081560058066e-06, "loss": 1.4511, "step": 8427 }, { "epoch": 1.4539808505132408, "grad_norm": 0.5546875, "learning_rate": 3.4847430250270165e-06, "loss": 1.428, "step": 8428 }, { "epoch": 1.4541533684119727, "grad_norm": 0.6171875, "learning_rate": 3.4826783767391727e-06, "loss": 1.5579, "step": 8429 }, { "epoch": 1.4543258863107047, "grad_norm": 0.70703125, "learning_rate": 3.4806142112952356e-06, "loss": 1.5196, "step": 8430 }, { "epoch": 1.4544984042094367, "grad_norm": 0.6484375, "learning_rate": 3.478550528848134e-06, "loss": 1.4335, "step": 8431 }, { "epoch": 1.4546709221081686, "grad_norm": 0.578125, "learning_rate": 3.4764873295507563e-06, "loss": 1.4063, "step": 8432 }, { "epoch": 1.4548434400069006, "grad_norm": 0.60546875, "learning_rate": 3.4744246135559623e-06, "loss": 1.3783, "step": 8433 }, { "epoch": 1.4550159579056328, "grad_norm": 0.61328125, "learning_rate": 3.4723623810165584e-06, "loss": 1.4814, "step": 8434 }, { "epoch": 1.4551884758043647, "grad_norm": 0.64453125, "learning_rate": 3.470300632085344e-06, "loss": 1.4284, "step": 8435 }, { "epoch": 1.4553609937030967, "grad_norm": 0.6796875, "learning_rate": 3.4682393669150547e-06, "loss": 1.4607, "step": 8436 }, { "epoch": 1.4555335116018286, "grad_norm": 0.71875, "learning_rate": 3.466178585658405e-06, "loss": 1.454, "step": 8437 }, { "epoch": 1.4557060295005606, "grad_norm": 0.5703125, "learning_rate": 3.464118288468071e-06, "loss": 1.4989, "step": 8438 }, { "epoch": 1.4558785473992928, "grad_norm": 0.57421875, "learning_rate": 3.462058475496692e-06, "loss": 1.4011, "step": 8439 }, { "epoch": 1.4560510652980247, "grad_norm": 0.70703125, "learning_rate": 3.459999146896873e-06, "loss": 1.4045, "step": 8440 }, { "epoch": 1.4562235831967567, "grad_norm": 0.5703125, "learning_rate": 3.4579403028211835e-06, "loss": 1.3958, "step": 8441 }, { "epoch": 1.4563961010954887, "grad_norm": 0.6015625, "learning_rate": 3.4558819434221456e-06, "loss": 1.4219, "step": 8442 }, { "epoch": 1.4565686189942206, "grad_norm": 0.5390625, "learning_rate": 3.4538240688522684e-06, "loss": 1.3098, "step": 8443 }, { "epoch": 1.4567411368929526, "grad_norm": 0.58203125, "learning_rate": 3.451766679264008e-06, "loss": 1.4552, "step": 8444 }, { "epoch": 1.4569136547916846, "grad_norm": 0.58984375, "learning_rate": 3.449709774809782e-06, "loss": 1.3745, "step": 8445 }, { "epoch": 1.4570861726904165, "grad_norm": 0.63671875, "learning_rate": 3.447653355641989e-06, "loss": 1.4049, "step": 8446 }, { "epoch": 1.4572586905891487, "grad_norm": 0.58203125, "learning_rate": 3.445597421912974e-06, "loss": 1.4325, "step": 8447 }, { "epoch": 1.4574312084878807, "grad_norm": 0.63671875, "learning_rate": 3.4435419737750566e-06, "loss": 1.5556, "step": 8448 }, { "epoch": 1.4576037263866126, "grad_norm": 0.58203125, "learning_rate": 3.4414870113805156e-06, "loss": 1.4107, "step": 8449 }, { "epoch": 1.4577762442853446, "grad_norm": 0.61328125, "learning_rate": 3.439432534881596e-06, "loss": 1.4473, "step": 8450 }, { "epoch": 1.4579487621840765, "grad_norm": 0.62109375, "learning_rate": 3.437378544430506e-06, "loss": 1.4035, "step": 8451 }, { "epoch": 1.4581212800828087, "grad_norm": 0.5703125, "learning_rate": 3.4353250401794238e-06, "loss": 1.4244, "step": 8452 }, { "epoch": 1.4582937979815407, "grad_norm": 0.578125, "learning_rate": 3.433272022280472e-06, "loss": 1.508, "step": 8453 }, { "epoch": 1.4584663158802726, "grad_norm": 0.6015625, "learning_rate": 3.431219490885768e-06, "loss": 1.399, "step": 8454 }, { "epoch": 1.4586388337790046, "grad_norm": 0.546875, "learning_rate": 3.429167446147359e-06, "loss": 1.473, "step": 8455 }, { "epoch": 1.4588113516777366, "grad_norm": 0.7109375, "learning_rate": 3.4271158882172904e-06, "loss": 1.3995, "step": 8456 }, { "epoch": 1.4589838695764685, "grad_norm": 0.55859375, "learning_rate": 3.425064817247542e-06, "loss": 1.4109, "step": 8457 }, { "epoch": 1.4591563874752005, "grad_norm": 0.609375, "learning_rate": 3.4230142333900705e-06, "loss": 1.3566, "step": 8458 }, { "epoch": 1.4593289053739325, "grad_norm": 0.6015625, "learning_rate": 3.420964136796807e-06, "loss": 1.4539, "step": 8459 }, { "epoch": 1.4595014232726644, "grad_norm": 0.578125, "learning_rate": 3.4189145276196244e-06, "loss": 1.3497, "step": 8460 }, { "epoch": 1.4596739411713966, "grad_norm": 0.59375, "learning_rate": 3.4168654060103735e-06, "loss": 1.5443, "step": 8461 }, { "epoch": 1.4598464590701286, "grad_norm": 0.58984375, "learning_rate": 3.414816772120867e-06, "loss": 1.4199, "step": 8462 }, { "epoch": 1.4600189769688605, "grad_norm": 0.66015625, "learning_rate": 3.4127686261028803e-06, "loss": 1.4814, "step": 8463 }, { "epoch": 1.4601914948675925, "grad_norm": 0.59765625, "learning_rate": 3.410720968108153e-06, "loss": 1.5426, "step": 8464 }, { "epoch": 1.4603640127663244, "grad_norm": 0.58984375, "learning_rate": 3.408673798288391e-06, "loss": 1.4995, "step": 8465 }, { "epoch": 1.4605365306650566, "grad_norm": 0.60546875, "learning_rate": 3.4066271167952524e-06, "loss": 1.4793, "step": 8466 }, { "epoch": 1.4607090485637886, "grad_norm": 0.60546875, "learning_rate": 3.4045809237803794e-06, "loss": 1.442, "step": 8467 }, { "epoch": 1.4608815664625205, "grad_norm": 0.71484375, "learning_rate": 3.40253521939536e-06, "loss": 1.5188, "step": 8468 }, { "epoch": 1.4610540843612525, "grad_norm": 0.55859375, "learning_rate": 3.4004900037917545e-06, "loss": 1.4222, "step": 8469 }, { "epoch": 1.4612266022599845, "grad_norm": 0.5625, "learning_rate": 3.398445277121084e-06, "loss": 1.5028, "step": 8470 }, { "epoch": 1.4613991201587164, "grad_norm": 0.6484375, "learning_rate": 3.3964010395348355e-06, "loss": 1.3889, "step": 8471 }, { "epoch": 1.4615716380574484, "grad_norm": 0.58203125, "learning_rate": 3.3943572911844603e-06, "loss": 1.3595, "step": 8472 }, { "epoch": 1.4617441559561803, "grad_norm": 0.7265625, "learning_rate": 3.3923140322213744e-06, "loss": 1.3079, "step": 8473 }, { "epoch": 1.4619166738549125, "grad_norm": 0.625, "learning_rate": 3.3902712627969446e-06, "loss": 1.4147, "step": 8474 }, { "epoch": 1.4620891917536445, "grad_norm": 0.5546875, "learning_rate": 3.3882289830625227e-06, "loss": 1.3372, "step": 8475 }, { "epoch": 1.4622617096523765, "grad_norm": 0.65625, "learning_rate": 3.3861871931694124e-06, "loss": 1.4014, "step": 8476 }, { "epoch": 1.4624342275511084, "grad_norm": 0.5625, "learning_rate": 3.3841458932688744e-06, "loss": 1.5151, "step": 8477 }, { "epoch": 1.4626067454498404, "grad_norm": 0.625, "learning_rate": 3.382105083512152e-06, "loss": 1.5069, "step": 8478 }, { "epoch": 1.4627792633485723, "grad_norm": 0.6015625, "learning_rate": 3.3800647640504325e-06, "loss": 1.3539, "step": 8479 }, { "epoch": 1.4629517812473045, "grad_norm": 0.546875, "learning_rate": 3.378024935034879e-06, "loss": 1.3442, "step": 8480 }, { "epoch": 1.4631242991460365, "grad_norm": 0.59765625, "learning_rate": 3.3759855966166144e-06, "loss": 1.4406, "step": 8481 }, { "epoch": 1.4632968170447684, "grad_norm": 0.546875, "learning_rate": 3.3739467489467247e-06, "loss": 1.446, "step": 8482 }, { "epoch": 1.4634693349435004, "grad_norm": 0.6640625, "learning_rate": 3.371908392176262e-06, "loss": 1.5256, "step": 8483 }, { "epoch": 1.4636418528422324, "grad_norm": 0.63671875, "learning_rate": 3.3698705264562426e-06, "loss": 1.4524, "step": 8484 }, { "epoch": 1.4638143707409643, "grad_norm": 0.546875, "learning_rate": 3.367833151937636e-06, "loss": 1.4549, "step": 8485 }, { "epoch": 1.4639868886396963, "grad_norm": 0.62109375, "learning_rate": 3.365796268771395e-06, "loss": 1.4497, "step": 8486 }, { "epoch": 1.4641594065384282, "grad_norm": 0.59375, "learning_rate": 3.3637598771084125e-06, "loss": 1.3537, "step": 8487 }, { "epoch": 1.4643319244371604, "grad_norm": 0.62890625, "learning_rate": 3.3617239770995704e-06, "loss": 1.3475, "step": 8488 }, { "epoch": 1.4645044423358924, "grad_norm": 0.61328125, "learning_rate": 3.3596885688956893e-06, "loss": 1.3748, "step": 8489 }, { "epoch": 1.4646769602346243, "grad_norm": 0.59765625, "learning_rate": 3.3576536526475678e-06, "loss": 1.3505, "step": 8490 }, { "epoch": 1.4648494781333563, "grad_norm": 0.625, "learning_rate": 3.355619228505973e-06, "loss": 1.308, "step": 8491 }, { "epoch": 1.4650219960320883, "grad_norm": 0.55859375, "learning_rate": 3.3535852966216176e-06, "loss": 1.3639, "step": 8492 }, { "epoch": 1.4651945139308205, "grad_norm": 0.6484375, "learning_rate": 3.3515518571451933e-06, "loss": 1.5198, "step": 8493 }, { "epoch": 1.4653670318295524, "grad_norm": 0.5859375, "learning_rate": 3.3495189102273484e-06, "loss": 1.4561, "step": 8494 }, { "epoch": 1.4655395497282844, "grad_norm": 0.56640625, "learning_rate": 3.347486456018697e-06, "loss": 1.3444, "step": 8495 }, { "epoch": 1.4657120676270163, "grad_norm": 0.61328125, "learning_rate": 3.3454544946698153e-06, "loss": 1.4556, "step": 8496 }, { "epoch": 1.4658845855257483, "grad_norm": 0.59375, "learning_rate": 3.3434230263312483e-06, "loss": 1.4214, "step": 8497 }, { "epoch": 1.4660571034244803, "grad_norm": 0.65625, "learning_rate": 3.3413920511534883e-06, "loss": 1.4896, "step": 8498 }, { "epoch": 1.4662296213232122, "grad_norm": 0.5859375, "learning_rate": 3.3393615692870175e-06, "loss": 1.3647, "step": 8499 }, { "epoch": 1.4664021392219442, "grad_norm": 0.5859375, "learning_rate": 3.3373315808822547e-06, "loss": 1.4469, "step": 8500 }, { "epoch": 1.4664021392219442, "eval_loss": 1.4072179794311523, "eval_runtime": 10.9263, "eval_samples_per_second": 93.719, "eval_steps_per_second": 23.43, "step": 8500 }, { "epoch": 1.4665746571206761, "grad_norm": 0.64453125, "learning_rate": 3.3353020860896002e-06, "loss": 1.4388, "step": 8501 }, { "epoch": 1.4667471750194083, "grad_norm": 0.60546875, "learning_rate": 3.3332730850594088e-06, "loss": 1.5377, "step": 8502 }, { "epoch": 1.4669196929181403, "grad_norm": 0.62890625, "learning_rate": 3.3312445779420033e-06, "loss": 1.3911, "step": 8503 }, { "epoch": 1.4670922108168722, "grad_norm": 0.5703125, "learning_rate": 3.3292165648876683e-06, "loss": 1.4262, "step": 8504 }, { "epoch": 1.4672647287156042, "grad_norm": 0.58203125, "learning_rate": 3.3271890460466537e-06, "loss": 1.417, "step": 8505 }, { "epoch": 1.4674372466143362, "grad_norm": 0.68359375, "learning_rate": 3.3251620215691614e-06, "loss": 1.3963, "step": 8506 }, { "epoch": 1.4676097645130683, "grad_norm": 0.640625, "learning_rate": 3.323135491605377e-06, "loss": 1.4469, "step": 8507 }, { "epoch": 1.4677822824118003, "grad_norm": 0.578125, "learning_rate": 3.321109456305438e-06, "loss": 1.5031, "step": 8508 }, { "epoch": 1.4679548003105323, "grad_norm": 0.55078125, "learning_rate": 3.3190839158194344e-06, "loss": 1.3735, "step": 8509 }, { "epoch": 1.4681273182092642, "grad_norm": 0.58984375, "learning_rate": 3.317058870297446e-06, "loss": 1.4582, "step": 8510 }, { "epoch": 1.4682998361079962, "grad_norm": 0.53515625, "learning_rate": 3.3150343198894897e-06, "loss": 1.3811, "step": 8511 }, { "epoch": 1.4684723540067282, "grad_norm": 0.5625, "learning_rate": 3.3130102647455608e-06, "loss": 1.4364, "step": 8512 }, { "epoch": 1.4686448719054601, "grad_norm": 0.640625, "learning_rate": 3.310986705015613e-06, "loss": 1.3751, "step": 8513 }, { "epoch": 1.468817389804192, "grad_norm": 0.57421875, "learning_rate": 3.3089636408495662e-06, "loss": 1.504, "step": 8514 }, { "epoch": 1.4689899077029243, "grad_norm": 0.65625, "learning_rate": 3.3069410723972995e-06, "loss": 1.5245, "step": 8515 }, { "epoch": 1.4691624256016562, "grad_norm": 0.6015625, "learning_rate": 3.3049189998086584e-06, "loss": 1.4545, "step": 8516 }, { "epoch": 1.4693349435003882, "grad_norm": 0.5859375, "learning_rate": 3.302897423233451e-06, "loss": 1.3767, "step": 8517 }, { "epoch": 1.4695074613991201, "grad_norm": 0.80859375, "learning_rate": 3.300876342821451e-06, "loss": 1.3456, "step": 8518 }, { "epoch": 1.469679979297852, "grad_norm": 0.63671875, "learning_rate": 3.2988557587223837e-06, "loss": 1.4042, "step": 8519 }, { "epoch": 1.469852497196584, "grad_norm": 0.61328125, "learning_rate": 3.296835671085957e-06, "loss": 1.4078, "step": 8520 }, { "epoch": 1.4700250150953162, "grad_norm": 0.6640625, "learning_rate": 3.2948160800618254e-06, "loss": 1.3702, "step": 8521 }, { "epoch": 1.4701975329940482, "grad_norm": 0.5703125, "learning_rate": 3.292796985799611e-06, "loss": 1.4343, "step": 8522 }, { "epoch": 1.4703700508927802, "grad_norm": 0.5859375, "learning_rate": 3.290778388448912e-06, "loss": 1.4191, "step": 8523 }, { "epoch": 1.4705425687915121, "grad_norm": 0.55078125, "learning_rate": 3.288760288159266e-06, "loss": 1.3871, "step": 8524 }, { "epoch": 1.470715086690244, "grad_norm": 0.57421875, "learning_rate": 3.2867426850801932e-06, "loss": 1.4247, "step": 8525 }, { "epoch": 1.470887604588976, "grad_norm": 0.5703125, "learning_rate": 3.2847255793611674e-06, "loss": 1.3644, "step": 8526 }, { "epoch": 1.471060122487708, "grad_norm": 0.58203125, "learning_rate": 3.2827089711516303e-06, "loss": 1.4524, "step": 8527 }, { "epoch": 1.47123264038644, "grad_norm": 0.55859375, "learning_rate": 3.2806928606009836e-06, "loss": 1.4674, "step": 8528 }, { "epoch": 1.4714051582851722, "grad_norm": 0.62109375, "learning_rate": 3.278677247858598e-06, "loss": 1.4114, "step": 8529 }, { "epoch": 1.4715776761839041, "grad_norm": 0.58203125, "learning_rate": 3.2766621330737923e-06, "loss": 1.3431, "step": 8530 }, { "epoch": 1.471750194082636, "grad_norm": 0.60546875, "learning_rate": 3.2746475163958714e-06, "loss": 1.4278, "step": 8531 }, { "epoch": 1.471922711981368, "grad_norm": 0.62890625, "learning_rate": 3.272633397974081e-06, "loss": 1.4383, "step": 8532 }, { "epoch": 1.4720952298801, "grad_norm": 0.6015625, "learning_rate": 3.270619777957642e-06, "loss": 1.4291, "step": 8533 }, { "epoch": 1.4722677477788322, "grad_norm": 0.6171875, "learning_rate": 3.2686066564957387e-06, "loss": 1.3628, "step": 8534 }, { "epoch": 1.4724402656775641, "grad_norm": 0.640625, "learning_rate": 3.2665940337375125e-06, "loss": 1.4406, "step": 8535 }, { "epoch": 1.472612783576296, "grad_norm": 0.61328125, "learning_rate": 3.264581909832073e-06, "loss": 1.4555, "step": 8536 }, { "epoch": 1.472785301475028, "grad_norm": 0.6953125, "learning_rate": 3.2625702849284947e-06, "loss": 1.4099, "step": 8537 }, { "epoch": 1.47295781937376, "grad_norm": 0.90625, "learning_rate": 3.2605591591758e-06, "loss": 1.4453, "step": 8538 }, { "epoch": 1.473130337272492, "grad_norm": 0.59375, "learning_rate": 3.258548532722995e-06, "loss": 1.4416, "step": 8539 }, { "epoch": 1.473302855171224, "grad_norm": 0.66015625, "learning_rate": 3.256538405719042e-06, "loss": 1.3456, "step": 8540 }, { "epoch": 1.473475373069956, "grad_norm": 0.609375, "learning_rate": 3.254528778312852e-06, "loss": 1.4016, "step": 8541 }, { "epoch": 1.4736478909686879, "grad_norm": 0.6171875, "learning_rate": 3.2525196506533243e-06, "loss": 1.4421, "step": 8542 }, { "epoch": 1.47382040886742, "grad_norm": 0.60546875, "learning_rate": 3.250511022889293e-06, "loss": 1.4036, "step": 8543 }, { "epoch": 1.473992926766152, "grad_norm": 0.56640625, "learning_rate": 3.248502895169586e-06, "loss": 1.3537, "step": 8544 }, { "epoch": 1.474165444664884, "grad_norm": 0.5703125, "learning_rate": 3.246495267642967e-06, "loss": 1.5216, "step": 8545 }, { "epoch": 1.474337962563616, "grad_norm": 0.62109375, "learning_rate": 3.244488140458175e-06, "loss": 1.4198, "step": 8546 }, { "epoch": 1.474510480462348, "grad_norm": 0.62890625, "learning_rate": 3.2424815137639132e-06, "loss": 1.3578, "step": 8547 }, { "epoch": 1.47468299836108, "grad_norm": 0.62109375, "learning_rate": 3.2404753877088437e-06, "loss": 1.4625, "step": 8548 }, { "epoch": 1.474855516259812, "grad_norm": 0.66015625, "learning_rate": 3.2384697624415915e-06, "loss": 1.4955, "step": 8549 }, { "epoch": 1.475028034158544, "grad_norm": 0.55078125, "learning_rate": 3.236464638110752e-06, "loss": 1.3116, "step": 8550 }, { "epoch": 1.475200552057276, "grad_norm": 0.68359375, "learning_rate": 3.2344600148648652e-06, "loss": 1.4343, "step": 8551 }, { "epoch": 1.475373069956008, "grad_norm": 0.66015625, "learning_rate": 3.232455892852461e-06, "loss": 1.4746, "step": 8552 }, { "epoch": 1.4755455878547399, "grad_norm": 0.73046875, "learning_rate": 3.230452272222007e-06, "loss": 1.3621, "step": 8553 }, { "epoch": 1.4757181057534718, "grad_norm": 0.55078125, "learning_rate": 3.228449153121942e-06, "loss": 1.3907, "step": 8554 }, { "epoch": 1.4758906236522038, "grad_norm": 0.6171875, "learning_rate": 3.2264465357006814e-06, "loss": 1.5396, "step": 8555 }, { "epoch": 1.476063141550936, "grad_norm": 0.578125, "learning_rate": 3.2244444201065815e-06, "loss": 1.4031, "step": 8556 }, { "epoch": 1.476235659449668, "grad_norm": 0.5703125, "learning_rate": 3.2224428064879743e-06, "loss": 1.3998, "step": 8557 }, { "epoch": 1.4764081773484, "grad_norm": 0.578125, "learning_rate": 3.2204416949931516e-06, "loss": 1.4665, "step": 8558 }, { "epoch": 1.4765806952471319, "grad_norm": 0.5625, "learning_rate": 3.2184410857703695e-06, "loss": 1.4619, "step": 8559 }, { "epoch": 1.4767532131458638, "grad_norm": 1.0234375, "learning_rate": 3.2164409789678453e-06, "loss": 1.4198, "step": 8560 }, { "epoch": 1.476925731044596, "grad_norm": 0.625, "learning_rate": 3.2144413747337622e-06, "loss": 1.4841, "step": 8561 }, { "epoch": 1.477098248943328, "grad_norm": 0.59765625, "learning_rate": 3.212442273216253e-06, "loss": 1.3529, "step": 8562 }, { "epoch": 1.47727076684206, "grad_norm": 0.59765625, "learning_rate": 3.210443674563437e-06, "loss": 1.4495, "step": 8563 }, { "epoch": 1.477443284740792, "grad_norm": 0.55859375, "learning_rate": 3.2084455789233748e-06, "loss": 1.3776, "step": 8564 }, { "epoch": 1.4776158026395239, "grad_norm": 0.57421875, "learning_rate": 3.206447986444099e-06, "loss": 1.4294, "step": 8565 }, { "epoch": 1.4777883205382558, "grad_norm": 0.625, "learning_rate": 3.2044508972736044e-06, "loss": 1.4277, "step": 8566 }, { "epoch": 1.4779608384369878, "grad_norm": 0.58984375, "learning_rate": 3.202454311559847e-06, "loss": 1.4677, "step": 8567 }, { "epoch": 1.4781333563357197, "grad_norm": 0.640625, "learning_rate": 3.200458229450749e-06, "loss": 1.3658, "step": 8568 }, { "epoch": 1.4783058742344517, "grad_norm": 0.5859375, "learning_rate": 3.198462651094193e-06, "loss": 1.3626, "step": 8569 }, { "epoch": 1.4784783921331839, "grad_norm": 0.59375, "learning_rate": 3.1964675766380148e-06, "loss": 1.4493, "step": 8570 }, { "epoch": 1.4786509100319158, "grad_norm": 0.58984375, "learning_rate": 3.194473006230033e-06, "loss": 1.4275, "step": 8571 }, { "epoch": 1.4788234279306478, "grad_norm": 0.640625, "learning_rate": 3.192478940018018e-06, "loss": 1.4172, "step": 8572 }, { "epoch": 1.4789959458293798, "grad_norm": 0.55859375, "learning_rate": 3.19048537814969e-06, "loss": 1.404, "step": 8573 }, { "epoch": 1.4791684637281117, "grad_norm": 0.57421875, "learning_rate": 3.18849232077276e-06, "loss": 1.4071, "step": 8574 }, { "epoch": 1.479340981626844, "grad_norm": 0.57421875, "learning_rate": 3.186499768034874e-06, "loss": 1.4131, "step": 8575 }, { "epoch": 1.4795134995255759, "grad_norm": 0.57421875, "learning_rate": 3.1845077200836638e-06, "loss": 1.4327, "step": 8576 }, { "epoch": 1.4796860174243078, "grad_norm": 0.58203125, "learning_rate": 3.1825161770667023e-06, "loss": 1.4534, "step": 8577 }, { "epoch": 1.4798585353230398, "grad_norm": 0.56640625, "learning_rate": 3.180525139131542e-06, "loss": 1.432, "step": 8578 }, { "epoch": 1.4800310532217718, "grad_norm": 0.67578125, "learning_rate": 3.1785346064256884e-06, "loss": 1.4916, "step": 8579 }, { "epoch": 1.4802035711205037, "grad_norm": 0.70703125, "learning_rate": 3.176544579096613e-06, "loss": 1.4756, "step": 8580 }, { "epoch": 1.4803760890192357, "grad_norm": 0.59375, "learning_rate": 3.1745550572917503e-06, "loss": 1.378, "step": 8581 }, { "epoch": 1.4805486069179676, "grad_norm": 0.56640625, "learning_rate": 3.1725660411585e-06, "loss": 1.4055, "step": 8582 }, { "epoch": 1.4807211248166996, "grad_norm": 0.74609375, "learning_rate": 3.170577530844211e-06, "loss": 1.437, "step": 8583 }, { "epoch": 1.4808936427154318, "grad_norm": 0.62109375, "learning_rate": 3.1685895264962173e-06, "loss": 1.4833, "step": 8584 }, { "epoch": 1.4810661606141637, "grad_norm": 0.60546875, "learning_rate": 3.1666020282617925e-06, "loss": 1.4276, "step": 8585 }, { "epoch": 1.4812386785128957, "grad_norm": 0.58203125, "learning_rate": 3.1646150362881835e-06, "loss": 1.4181, "step": 8586 }, { "epoch": 1.4814111964116277, "grad_norm": 0.62109375, "learning_rate": 3.1626285507226072e-06, "loss": 1.4481, "step": 8587 }, { "epoch": 1.4815837143103596, "grad_norm": 0.59375, "learning_rate": 3.160642571712228e-06, "loss": 1.4209, "step": 8588 }, { "epoch": 1.4817562322090918, "grad_norm": 0.58984375, "learning_rate": 3.158657099404181e-06, "loss": 1.4144, "step": 8589 }, { "epoch": 1.4819287501078238, "grad_norm": 0.65625, "learning_rate": 3.156672133945563e-06, "loss": 1.4058, "step": 8590 }, { "epoch": 1.4821012680065557, "grad_norm": 0.55078125, "learning_rate": 3.154687675483432e-06, "loss": 1.3381, "step": 8591 }, { "epoch": 1.4822737859052877, "grad_norm": 0.55078125, "learning_rate": 3.1527037241648096e-06, "loss": 1.3612, "step": 8592 }, { "epoch": 1.4824463038040196, "grad_norm": 0.7265625, "learning_rate": 3.150720280136682e-06, "loss": 1.3687, "step": 8593 }, { "epoch": 1.4826188217027516, "grad_norm": 0.5625, "learning_rate": 3.1487373435459865e-06, "loss": 1.5162, "step": 8594 }, { "epoch": 1.4827913396014836, "grad_norm": 0.609375, "learning_rate": 3.1467549145396437e-06, "loss": 1.2491, "step": 8595 }, { "epoch": 1.4829638575002155, "grad_norm": 0.59375, "learning_rate": 3.1447729932645145e-06, "loss": 1.4794, "step": 8596 }, { "epoch": 1.4831363753989477, "grad_norm": 0.578125, "learning_rate": 3.142791579867435e-06, "loss": 1.3974, "step": 8597 }, { "epoch": 1.4833088932976797, "grad_norm": 0.5546875, "learning_rate": 3.1408106744952018e-06, "loss": 1.3553, "step": 8598 }, { "epoch": 1.4834814111964116, "grad_norm": 0.78125, "learning_rate": 3.1388302772945713e-06, "loss": 1.5289, "step": 8599 }, { "epoch": 1.4836539290951436, "grad_norm": 0.6015625, "learning_rate": 3.136850388412265e-06, "loss": 1.4299, "step": 8600 }, { "epoch": 1.4836539290951436, "eval_loss": 1.4071738719940186, "eval_runtime": 10.7878, "eval_samples_per_second": 94.922, "eval_steps_per_second": 23.731, "step": 8600 }, { "epoch": 1.4838264469938756, "grad_norm": 0.56640625, "learning_rate": 3.1348710079949675e-06, "loss": 1.4985, "step": 8601 }, { "epoch": 1.4839989648926077, "grad_norm": 0.609375, "learning_rate": 3.1328921361893148e-06, "loss": 1.4663, "step": 8602 }, { "epoch": 1.4841714827913397, "grad_norm": 0.6171875, "learning_rate": 3.1309137731419236e-06, "loss": 1.3719, "step": 8603 }, { "epoch": 1.4843440006900717, "grad_norm": 0.6171875, "learning_rate": 3.1289359189993607e-06, "loss": 1.5156, "step": 8604 }, { "epoch": 1.4845165185888036, "grad_norm": 0.59765625, "learning_rate": 3.1269585739081564e-06, "loss": 1.373, "step": 8605 }, { "epoch": 1.4846890364875356, "grad_norm": 0.5546875, "learning_rate": 3.1249817380148097e-06, "loss": 1.4311, "step": 8606 }, { "epoch": 1.4848615543862675, "grad_norm": 0.67578125, "learning_rate": 3.123005411465766e-06, "loss": 1.4612, "step": 8607 }, { "epoch": 1.4850340722849995, "grad_norm": 0.640625, "learning_rate": 3.1210295944074574e-06, "loss": 1.4437, "step": 8608 }, { "epoch": 1.4852065901837315, "grad_norm": 0.6328125, "learning_rate": 3.119054286986255e-06, "loss": 1.4048, "step": 8609 }, { "epoch": 1.4853791080824634, "grad_norm": 0.6640625, "learning_rate": 3.1170794893485047e-06, "loss": 1.403, "step": 8610 }, { "epoch": 1.4855516259811956, "grad_norm": 0.578125, "learning_rate": 3.1151052016405125e-06, "loss": 1.3551, "step": 8611 }, { "epoch": 1.4857241438799276, "grad_norm": 0.5859375, "learning_rate": 3.1131314240085465e-06, "loss": 1.4862, "step": 8612 }, { "epoch": 1.4858966617786595, "grad_norm": 0.5390625, "learning_rate": 3.111158156598836e-06, "loss": 1.3378, "step": 8613 }, { "epoch": 1.4860691796773915, "grad_norm": 0.59765625, "learning_rate": 3.109185399557575e-06, "loss": 1.413, "step": 8614 }, { "epoch": 1.4862416975761235, "grad_norm": 0.5625, "learning_rate": 3.107213153030909e-06, "loss": 1.4823, "step": 8615 }, { "epoch": 1.4864142154748556, "grad_norm": 0.60546875, "learning_rate": 3.105241417164967e-06, "loss": 1.3963, "step": 8616 }, { "epoch": 1.4865867333735876, "grad_norm": 0.54296875, "learning_rate": 3.1032701921058184e-06, "loss": 1.3688, "step": 8617 }, { "epoch": 1.4867592512723196, "grad_norm": 0.60546875, "learning_rate": 3.1012994779995077e-06, "loss": 1.3553, "step": 8618 }, { "epoch": 1.4869317691710515, "grad_norm": 0.66796875, "learning_rate": 3.0993292749920355e-06, "loss": 1.4014, "step": 8619 }, { "epoch": 1.4871042870697835, "grad_norm": 0.58984375, "learning_rate": 3.097359583229368e-06, "loss": 1.5023, "step": 8620 }, { "epoch": 1.4872768049685154, "grad_norm": 0.56640625, "learning_rate": 3.095390402857432e-06, "loss": 1.2465, "step": 8621 }, { "epoch": 1.4874493228672474, "grad_norm": 0.5859375, "learning_rate": 3.0934217340221183e-06, "loss": 1.4298, "step": 8622 }, { "epoch": 1.4876218407659794, "grad_norm": 0.578125, "learning_rate": 3.0914535768692753e-06, "loss": 1.3638, "step": 8623 }, { "epoch": 1.4877943586647115, "grad_norm": 0.5546875, "learning_rate": 3.089485931544719e-06, "loss": 1.4358, "step": 8624 }, { "epoch": 1.4879668765634435, "grad_norm": 0.5546875, "learning_rate": 3.0875187981942266e-06, "loss": 1.3866, "step": 8625 }, { "epoch": 1.4881393944621755, "grad_norm": 0.6484375, "learning_rate": 3.0855521769635266e-06, "loss": 1.4508, "step": 8626 }, { "epoch": 1.4883119123609074, "grad_norm": 0.5625, "learning_rate": 3.0835860679983308e-06, "loss": 1.3201, "step": 8627 }, { "epoch": 1.4884844302596394, "grad_norm": 0.56640625, "learning_rate": 3.081620471444292e-06, "loss": 1.4419, "step": 8628 }, { "epoch": 1.4886569481583714, "grad_norm": 0.5703125, "learning_rate": 3.079655387447037e-06, "loss": 1.3916, "step": 8629 }, { "epoch": 1.4888294660571035, "grad_norm": 0.578125, "learning_rate": 3.077690816152151e-06, "loss": 1.3884, "step": 8630 }, { "epoch": 1.4890019839558355, "grad_norm": 0.66796875, "learning_rate": 3.075726757705182e-06, "loss": 1.4443, "step": 8631 }, { "epoch": 1.4891745018545675, "grad_norm": 0.578125, "learning_rate": 3.073763212251639e-06, "loss": 1.3651, "step": 8632 }, { "epoch": 1.4893470197532994, "grad_norm": 0.58984375, "learning_rate": 3.071800179936998e-06, "loss": 1.4278, "step": 8633 }, { "epoch": 1.4895195376520314, "grad_norm": 0.6953125, "learning_rate": 3.0698376609066828e-06, "loss": 1.48, "step": 8634 }, { "epoch": 1.4896920555507633, "grad_norm": 1.0078125, "learning_rate": 3.0678756553060984e-06, "loss": 1.3465, "step": 8635 }, { "epoch": 1.4898645734494953, "grad_norm": 0.546875, "learning_rate": 3.0659141632805987e-06, "loss": 1.3635, "step": 8636 }, { "epoch": 1.4900370913482273, "grad_norm": 0.62109375, "learning_rate": 3.0639531849755044e-06, "loss": 1.3396, "step": 8637 }, { "epoch": 1.4902096092469594, "grad_norm": 0.62109375, "learning_rate": 3.0619927205360998e-06, "loss": 1.4531, "step": 8638 }, { "epoch": 1.4903821271456914, "grad_norm": 0.6484375, "learning_rate": 3.0600327701076193e-06, "loss": 1.4887, "step": 8639 }, { "epoch": 1.4905546450444234, "grad_norm": 0.5546875, "learning_rate": 3.058073333835281e-06, "loss": 1.4059, "step": 8640 }, { "epoch": 1.4907271629431553, "grad_norm": 0.56640625, "learning_rate": 3.0561144118642406e-06, "loss": 1.3808, "step": 8641 }, { "epoch": 1.4908996808418873, "grad_norm": 0.5625, "learning_rate": 3.0541560043396322e-06, "loss": 1.5228, "step": 8642 }, { "epoch": 1.4910721987406195, "grad_norm": 0.60546875, "learning_rate": 3.052198111406547e-06, "loss": 1.3432, "step": 8643 }, { "epoch": 1.4912447166393514, "grad_norm": 0.59765625, "learning_rate": 3.0502407332100382e-06, "loss": 1.4385, "step": 8644 }, { "epoch": 1.4914172345380834, "grad_norm": 0.59765625, "learning_rate": 3.048283869895119e-06, "loss": 1.4864, "step": 8645 }, { "epoch": 1.4915897524368154, "grad_norm": 0.5703125, "learning_rate": 3.04632752160677e-06, "loss": 1.3705, "step": 8646 }, { "epoch": 1.4917622703355473, "grad_norm": 0.7109375, "learning_rate": 3.044371688489921e-06, "loss": 1.4323, "step": 8647 }, { "epoch": 1.4919347882342793, "grad_norm": 0.578125, "learning_rate": 3.042416370689485e-06, "loss": 1.4554, "step": 8648 }, { "epoch": 1.4921073061330112, "grad_norm": 0.6015625, "learning_rate": 3.0404615683503136e-06, "loss": 1.4668, "step": 8649 }, { "epoch": 1.4922798240317432, "grad_norm": 0.65234375, "learning_rate": 3.0385072816172344e-06, "loss": 1.3353, "step": 8650 }, { "epoch": 1.4924523419304752, "grad_norm": 0.57421875, "learning_rate": 3.036553510635033e-06, "loss": 1.3784, "step": 8651 }, { "epoch": 1.4926248598292073, "grad_norm": 0.5703125, "learning_rate": 3.0346002555484567e-06, "loss": 1.469, "step": 8652 }, { "epoch": 1.4927973777279393, "grad_norm": 0.59765625, "learning_rate": 3.0326475165022164e-06, "loss": 1.3914, "step": 8653 }, { "epoch": 1.4929698956266713, "grad_norm": 0.58984375, "learning_rate": 3.030695293640982e-06, "loss": 1.3753, "step": 8654 }, { "epoch": 1.4931424135254032, "grad_norm": 0.5703125, "learning_rate": 3.028743587109385e-06, "loss": 1.4428, "step": 8655 }, { "epoch": 1.4933149314241352, "grad_norm": 0.59765625, "learning_rate": 3.026792397052023e-06, "loss": 1.4154, "step": 8656 }, { "epoch": 1.4934874493228674, "grad_norm": 0.61328125, "learning_rate": 3.024841723613453e-06, "loss": 1.4908, "step": 8657 }, { "epoch": 1.4936599672215993, "grad_norm": 0.66796875, "learning_rate": 3.022891566938185e-06, "loss": 1.2884, "step": 8658 }, { "epoch": 1.4938324851203313, "grad_norm": 0.60546875, "learning_rate": 3.0209419271707118e-06, "loss": 1.4292, "step": 8659 }, { "epoch": 1.4940050030190632, "grad_norm": 0.59765625, "learning_rate": 3.018992804455464e-06, "loss": 1.4774, "step": 8660 }, { "epoch": 1.4941775209177952, "grad_norm": 0.578125, "learning_rate": 3.017044198936848e-06, "loss": 1.4197, "step": 8661 }, { "epoch": 1.4943500388165272, "grad_norm": 0.58203125, "learning_rate": 3.01509611075923e-06, "loss": 1.3863, "step": 8662 }, { "epoch": 1.4945225567152591, "grad_norm": 0.6171875, "learning_rate": 3.013148540066936e-06, "loss": 1.5052, "step": 8663 }, { "epoch": 1.494695074613991, "grad_norm": 0.6171875, "learning_rate": 3.011201487004254e-06, "loss": 1.3792, "step": 8664 }, { "epoch": 1.4948675925127233, "grad_norm": 0.671875, "learning_rate": 3.0092549517154336e-06, "loss": 1.4096, "step": 8665 }, { "epoch": 1.4950401104114552, "grad_norm": 0.6015625, "learning_rate": 3.007308934344686e-06, "loss": 1.4694, "step": 8666 }, { "epoch": 1.4952126283101872, "grad_norm": 0.5625, "learning_rate": 3.005363435036186e-06, "loss": 1.408, "step": 8667 }, { "epoch": 1.4953851462089192, "grad_norm": 0.5625, "learning_rate": 3.0034184539340663e-06, "loss": 1.4432, "step": 8668 }, { "epoch": 1.4955576641076511, "grad_norm": 0.57421875, "learning_rate": 3.0014739911824244e-06, "loss": 1.4187, "step": 8669 }, { "epoch": 1.495730182006383, "grad_norm": 0.65234375, "learning_rate": 2.999530046925322e-06, "loss": 1.4553, "step": 8670 }, { "epoch": 1.4959026999051153, "grad_norm": 0.61328125, "learning_rate": 2.9975866213067673e-06, "loss": 1.4767, "step": 8671 }, { "epoch": 1.4960752178038472, "grad_norm": 0.55078125, "learning_rate": 2.9956437144707552e-06, "loss": 1.4951, "step": 8672 }, { "epoch": 1.4962477357025792, "grad_norm": 0.58203125, "learning_rate": 2.9937013265612192e-06, "loss": 1.4536, "step": 8673 }, { "epoch": 1.4964202536013111, "grad_norm": 0.70703125, "learning_rate": 2.9917594577220665e-06, "loss": 1.4156, "step": 8674 }, { "epoch": 1.496592771500043, "grad_norm": 0.6328125, "learning_rate": 2.989818108097162e-06, "loss": 1.4029, "step": 8675 }, { "epoch": 1.496765289398775, "grad_norm": 0.58203125, "learning_rate": 2.9878772778303344e-06, "loss": 1.4684, "step": 8676 }, { "epoch": 1.496937807297507, "grad_norm": 0.61328125, "learning_rate": 2.9859369670653715e-06, "loss": 1.4403, "step": 8677 }, { "epoch": 1.497110325196239, "grad_norm": 0.58984375, "learning_rate": 2.983997175946027e-06, "loss": 1.4305, "step": 8678 }, { "epoch": 1.4972828430949712, "grad_norm": 0.62890625, "learning_rate": 2.982057904616004e-06, "loss": 1.4958, "step": 8679 }, { "epoch": 1.4974553609937031, "grad_norm": 0.59375, "learning_rate": 2.9801191532189876e-06, "loss": 1.4899, "step": 8680 }, { "epoch": 1.497627878892435, "grad_norm": 0.55078125, "learning_rate": 2.9781809218986036e-06, "loss": 1.4251, "step": 8681 }, { "epoch": 1.497800396791167, "grad_norm": 0.640625, "learning_rate": 2.9762432107984508e-06, "loss": 1.3534, "step": 8682 }, { "epoch": 1.497972914689899, "grad_norm": 0.6015625, "learning_rate": 2.974306020062088e-06, "loss": 1.3928, "step": 8683 }, { "epoch": 1.4981454325886312, "grad_norm": 0.5703125, "learning_rate": 2.972369349833033e-06, "loss": 1.3605, "step": 8684 }, { "epoch": 1.4983179504873632, "grad_norm": 0.5625, "learning_rate": 2.9704332002547677e-06, "loss": 1.3593, "step": 8685 }, { "epoch": 1.4984904683860951, "grad_norm": 0.5390625, "learning_rate": 2.9684975714707333e-06, "loss": 1.3329, "step": 8686 }, { "epoch": 1.498662986284827, "grad_norm": 0.6796875, "learning_rate": 2.966562463624334e-06, "loss": 1.468, "step": 8687 }, { "epoch": 1.498835504183559, "grad_norm": 0.578125, "learning_rate": 2.9646278768589345e-06, "loss": 1.4801, "step": 8688 }, { "epoch": 1.499008022082291, "grad_norm": 0.64453125, "learning_rate": 2.962693811317863e-06, "loss": 1.5002, "step": 8689 }, { "epoch": 1.499180539981023, "grad_norm": 0.55078125, "learning_rate": 2.9607602671443993e-06, "loss": 1.3173, "step": 8690 }, { "epoch": 1.499353057879755, "grad_norm": 0.5859375, "learning_rate": 2.9588272444818056e-06, "loss": 1.4184, "step": 8691 }, { "epoch": 1.4995255757784869, "grad_norm": 0.6015625, "learning_rate": 2.9568947434732777e-06, "loss": 1.4755, "step": 8692 }, { "epoch": 1.499698093677219, "grad_norm": 0.59375, "learning_rate": 2.9549627642620005e-06, "loss": 1.344, "step": 8693 }, { "epoch": 1.499870611575951, "grad_norm": 0.609375, "learning_rate": 2.9530313069910986e-06, "loss": 1.4186, "step": 8694 }, { "epoch": 1.500043129474683, "grad_norm": 0.609375, "learning_rate": 2.951100371803669e-06, "loss": 1.3953, "step": 8695 }, { "epoch": 1.500215647373415, "grad_norm": 0.56640625, "learning_rate": 2.949169958842767e-06, "loss": 1.4259, "step": 8696 }, { "epoch": 1.5003881652721471, "grad_norm": 1.03125, "learning_rate": 2.9472400682514104e-06, "loss": 1.4243, "step": 8697 }, { "epoch": 1.500560683170879, "grad_norm": 0.57421875, "learning_rate": 2.945310700172577e-06, "loss": 1.4715, "step": 8698 }, { "epoch": 1.500733201069611, "grad_norm": 0.61328125, "learning_rate": 2.9433818547492067e-06, "loss": 1.4727, "step": 8699 }, { "epoch": 1.500905718968343, "grad_norm": 0.57421875, "learning_rate": 2.941453532124201e-06, "loss": 1.4355, "step": 8700 }, { "epoch": 1.500905718968343, "eval_loss": 1.4072085618972778, "eval_runtime": 10.8369, "eval_samples_per_second": 94.492, "eval_steps_per_second": 23.623, "step": 8700 }, { "epoch": 1.501078236867075, "grad_norm": 0.58984375, "learning_rate": 2.9395257324404204e-06, "loss": 1.5278, "step": 8701 }, { "epoch": 1.501250754765807, "grad_norm": 0.6171875, "learning_rate": 2.9375984558406934e-06, "loss": 1.4436, "step": 8702 }, { "epoch": 1.501423272664539, "grad_norm": 0.5703125, "learning_rate": 2.935671702467794e-06, "loss": 1.4489, "step": 8703 }, { "epoch": 1.5015957905632709, "grad_norm": 0.55859375, "learning_rate": 2.933745472464481e-06, "loss": 1.4356, "step": 8704 }, { "epoch": 1.5017683084620028, "grad_norm": 0.58984375, "learning_rate": 2.9318197659734527e-06, "loss": 1.3582, "step": 8705 }, { "epoch": 1.5019408263607348, "grad_norm": 0.5859375, "learning_rate": 2.9298945831373803e-06, "loss": 1.4326, "step": 8706 }, { "epoch": 1.502113344259467, "grad_norm": 0.671875, "learning_rate": 2.9279699240988936e-06, "loss": 1.4332, "step": 8707 }, { "epoch": 1.502285862158199, "grad_norm": 0.9453125, "learning_rate": 2.9260457890005823e-06, "loss": 1.3855, "step": 8708 }, { "epoch": 1.5024583800569309, "grad_norm": 0.57421875, "learning_rate": 2.924122177984998e-06, "loss": 1.4103, "step": 8709 }, { "epoch": 1.5026308979556628, "grad_norm": 0.5703125, "learning_rate": 2.9221990911946595e-06, "loss": 1.4533, "step": 8710 }, { "epoch": 1.502803415854395, "grad_norm": 0.65234375, "learning_rate": 2.92027652877203e-06, "loss": 1.5407, "step": 8711 }, { "epoch": 1.502975933753127, "grad_norm": 0.5859375, "learning_rate": 2.9183544908595573e-06, "loss": 1.4197, "step": 8712 }, { "epoch": 1.503148451651859, "grad_norm": 0.60546875, "learning_rate": 2.9164329775996293e-06, "loss": 1.4315, "step": 8713 }, { "epoch": 1.503320969550591, "grad_norm": 0.62109375, "learning_rate": 2.9145119891346062e-06, "loss": 1.4136, "step": 8714 }, { "epoch": 1.5034934874493229, "grad_norm": 0.828125, "learning_rate": 2.912591525606807e-06, "loss": 1.3841, "step": 8715 }, { "epoch": 1.5036660053480548, "grad_norm": 1.15625, "learning_rate": 2.9106715871585124e-06, "loss": 1.3314, "step": 8716 }, { "epoch": 1.5038385232467868, "grad_norm": 1.5390625, "learning_rate": 2.9087521739319624e-06, "loss": 1.4136, "step": 8717 }, { "epoch": 1.5040110411455188, "grad_norm": 0.5625, "learning_rate": 2.9068332860693594e-06, "loss": 1.4267, "step": 8718 }, { "epoch": 1.5041835590442507, "grad_norm": 0.578125, "learning_rate": 2.904914923712867e-06, "loss": 1.3874, "step": 8719 }, { "epoch": 1.5043560769429827, "grad_norm": 0.609375, "learning_rate": 2.902997087004609e-06, "loss": 1.415, "step": 8720 }, { "epoch": 1.5045285948417149, "grad_norm": 0.62109375, "learning_rate": 2.901079776086674e-06, "loss": 1.3695, "step": 8721 }, { "epoch": 1.5047011127404468, "grad_norm": 0.5546875, "learning_rate": 2.8991629911011e-06, "loss": 1.4224, "step": 8722 }, { "epoch": 1.5048736306391788, "grad_norm": 0.59765625, "learning_rate": 2.8972467321899045e-06, "loss": 1.4014, "step": 8723 }, { "epoch": 1.505046148537911, "grad_norm": 0.58203125, "learning_rate": 2.895330999495045e-06, "loss": 1.4829, "step": 8724 }, { "epoch": 1.505218666436643, "grad_norm": 0.57421875, "learning_rate": 2.893415793158464e-06, "loss": 1.4623, "step": 8725 }, { "epoch": 1.5053911843353749, "grad_norm": 0.55859375, "learning_rate": 2.891501113322042e-06, "loss": 1.5667, "step": 8726 }, { "epoch": 1.5055637022341068, "grad_norm": 0.625, "learning_rate": 2.8895869601276326e-06, "loss": 1.453, "step": 8727 }, { "epoch": 1.5057362201328388, "grad_norm": 0.70703125, "learning_rate": 2.8876733337170503e-06, "loss": 1.4474, "step": 8728 }, { "epoch": 1.5059087380315708, "grad_norm": 0.5859375, "learning_rate": 2.8857602342320666e-06, "loss": 1.4552, "step": 8729 }, { "epoch": 1.5060812559303027, "grad_norm": 0.578125, "learning_rate": 2.883847661814416e-06, "loss": 1.4481, "step": 8730 }, { "epoch": 1.5062537738290347, "grad_norm": 0.578125, "learning_rate": 2.8819356166057953e-06, "loss": 1.419, "step": 8731 }, { "epoch": 1.5064262917277667, "grad_norm": 0.62109375, "learning_rate": 2.88002409874786e-06, "loss": 1.38, "step": 8732 }, { "epoch": 1.5065988096264986, "grad_norm": 0.59765625, "learning_rate": 2.8781131083822267e-06, "loss": 1.3751, "step": 8733 }, { "epoch": 1.5067713275252308, "grad_norm": 0.64453125, "learning_rate": 2.8762026456504767e-06, "loss": 1.4008, "step": 8734 }, { "epoch": 1.5069438454239628, "grad_norm": 1.1484375, "learning_rate": 2.874292710694141e-06, "loss": 1.4236, "step": 8735 }, { "epoch": 1.5071163633226947, "grad_norm": 0.6484375, "learning_rate": 2.87238330365473e-06, "loss": 1.3983, "step": 8736 }, { "epoch": 1.5072888812214267, "grad_norm": 0.56640625, "learning_rate": 2.8704744246736972e-06, "loss": 1.4195, "step": 8737 }, { "epoch": 1.5074613991201589, "grad_norm": 0.65234375, "learning_rate": 2.868566073892465e-06, "loss": 1.5079, "step": 8738 }, { "epoch": 1.5076339170188908, "grad_norm": 0.6328125, "learning_rate": 2.8666582514524175e-06, "loss": 1.4868, "step": 8739 }, { "epoch": 1.5078064349176228, "grad_norm": 0.59765625, "learning_rate": 2.8647509574948997e-06, "loss": 1.4152, "step": 8740 }, { "epoch": 1.5079789528163547, "grad_norm": 0.61328125, "learning_rate": 2.8628441921612117e-06, "loss": 1.4388, "step": 8741 }, { "epoch": 1.5081514707150867, "grad_norm": 0.5625, "learning_rate": 2.8609379555926255e-06, "loss": 1.4466, "step": 8742 }, { "epoch": 1.5083239886138187, "grad_norm": 0.6015625, "learning_rate": 2.8590322479303554e-06, "loss": 1.3899, "step": 8743 }, { "epoch": 1.5084965065125506, "grad_norm": 0.59765625, "learning_rate": 2.8571270693156028e-06, "loss": 1.5198, "step": 8744 }, { "epoch": 1.5086690244112826, "grad_norm": 0.5859375, "learning_rate": 2.855222419889503e-06, "loss": 1.4558, "step": 8745 }, { "epoch": 1.5088415423100145, "grad_norm": 0.57421875, "learning_rate": 2.8533182997931695e-06, "loss": 1.4012, "step": 8746 }, { "epoch": 1.5090140602087465, "grad_norm": 0.55859375, "learning_rate": 2.8514147091676713e-06, "loss": 1.4706, "step": 8747 }, { "epoch": 1.5091865781074787, "grad_norm": 0.578125, "learning_rate": 2.8495116481540375e-06, "loss": 1.4089, "step": 8748 }, { "epoch": 1.5093590960062107, "grad_norm": 0.671875, "learning_rate": 2.8476091168932594e-06, "loss": 1.4138, "step": 8749 }, { "epoch": 1.5095316139049426, "grad_norm": 0.62109375, "learning_rate": 2.8457071155262885e-06, "loss": 1.3905, "step": 8750 }, { "epoch": 1.5097041318036746, "grad_norm": 0.56640625, "learning_rate": 2.8438056441940353e-06, "loss": 1.4285, "step": 8751 }, { "epoch": 1.5098766497024068, "grad_norm": 0.578125, "learning_rate": 2.8419047030373746e-06, "loss": 1.4192, "step": 8752 }, { "epoch": 1.5100491676011387, "grad_norm": 0.63671875, "learning_rate": 2.8400042921971395e-06, "loss": 1.3904, "step": 8753 }, { "epoch": 1.5102216854998707, "grad_norm": 0.62890625, "learning_rate": 2.8381044118141244e-06, "loss": 1.5384, "step": 8754 }, { "epoch": 1.5103942033986026, "grad_norm": 0.56640625, "learning_rate": 2.836205062029086e-06, "loss": 1.3682, "step": 8755 }, { "epoch": 1.5105667212973346, "grad_norm": 0.59765625, "learning_rate": 2.834306242982733e-06, "loss": 1.4479, "step": 8756 }, { "epoch": 1.5107392391960666, "grad_norm": 0.5546875, "learning_rate": 2.8324079548157525e-06, "loss": 1.5293, "step": 8757 }, { "epoch": 1.5109117570947985, "grad_norm": 0.57421875, "learning_rate": 2.830510197668773e-06, "loss": 1.4863, "step": 8758 }, { "epoch": 1.5110842749935305, "grad_norm": 0.62890625, "learning_rate": 2.828612971682395e-06, "loss": 1.3653, "step": 8759 }, { "epoch": 1.5112567928922624, "grad_norm": 0.5703125, "learning_rate": 2.826716276997177e-06, "loss": 1.3778, "step": 8760 }, { "epoch": 1.5114293107909944, "grad_norm": 0.609375, "learning_rate": 2.824820113753638e-06, "loss": 1.4826, "step": 8761 }, { "epoch": 1.5116018286897266, "grad_norm": 0.68359375, "learning_rate": 2.8229244820922573e-06, "loss": 1.4547, "step": 8762 }, { "epoch": 1.5117743465884586, "grad_norm": 0.6015625, "learning_rate": 2.8210293821534763e-06, "loss": 1.4667, "step": 8763 }, { "epoch": 1.5119468644871905, "grad_norm": 0.5546875, "learning_rate": 2.8191348140776942e-06, "loss": 1.5144, "step": 8764 }, { "epoch": 1.5121193823859227, "grad_norm": 0.59765625, "learning_rate": 2.8172407780052726e-06, "loss": 1.3551, "step": 8765 }, { "epoch": 1.5122919002846547, "grad_norm": 0.59375, "learning_rate": 2.8153472740765385e-06, "loss": 1.44, "step": 8766 }, { "epoch": 1.5124644181833866, "grad_norm": 0.60546875, "learning_rate": 2.813454302431763e-06, "loss": 1.4299, "step": 8767 }, { "epoch": 1.5126369360821186, "grad_norm": 0.61328125, "learning_rate": 2.8115618632112027e-06, "loss": 1.45, "step": 8768 }, { "epoch": 1.5128094539808505, "grad_norm": 0.68359375, "learning_rate": 2.8096699565550525e-06, "loss": 1.4528, "step": 8769 }, { "epoch": 1.5129819718795825, "grad_norm": 0.61328125, "learning_rate": 2.807778582603479e-06, "loss": 1.4855, "step": 8770 }, { "epoch": 1.5131544897783145, "grad_norm": 0.55859375, "learning_rate": 2.805887741496607e-06, "loss": 1.4791, "step": 8771 }, { "epoch": 1.5133270076770464, "grad_norm": 0.59765625, "learning_rate": 2.803997433374521e-06, "loss": 1.4079, "step": 8772 }, { "epoch": 1.5134995255757784, "grad_norm": 0.796875, "learning_rate": 2.80210765837727e-06, "loss": 1.428, "step": 8773 }, { "epoch": 1.5136720434745103, "grad_norm": 0.65625, "learning_rate": 2.80021841664486e-06, "loss": 1.3966, "step": 8774 }, { "epoch": 1.5138445613732425, "grad_norm": 0.6484375, "learning_rate": 2.7983297083172487e-06, "loss": 1.4717, "step": 8775 }, { "epoch": 1.5140170792719745, "grad_norm": 0.58203125, "learning_rate": 2.7964415335343785e-06, "loss": 1.4614, "step": 8776 }, { "epoch": 1.5141895971707064, "grad_norm": 0.640625, "learning_rate": 2.7945538924361258e-06, "loss": 1.4601, "step": 8777 }, { "epoch": 1.5143621150694384, "grad_norm": 0.65234375, "learning_rate": 2.7926667851623437e-06, "loss": 1.4284, "step": 8778 }, { "epoch": 1.5145346329681706, "grad_norm": 0.5703125, "learning_rate": 2.790780211852838e-06, "loss": 1.459, "step": 8779 }, { "epoch": 1.5147071508669026, "grad_norm": 0.609375, "learning_rate": 2.7888941726473773e-06, "loss": 1.413, "step": 8780 }, { "epoch": 1.5148796687656345, "grad_norm": 0.6015625, "learning_rate": 2.787008667685699e-06, "loss": 1.3772, "step": 8781 }, { "epoch": 1.5150521866643665, "grad_norm": 0.578125, "learning_rate": 2.7851236971074848e-06, "loss": 1.5823, "step": 8782 }, { "epoch": 1.5152247045630984, "grad_norm": 0.609375, "learning_rate": 2.783239261052387e-06, "loss": 1.4732, "step": 8783 }, { "epoch": 1.5153972224618304, "grad_norm": 0.578125, "learning_rate": 2.7813553596600175e-06, "loss": 1.4055, "step": 8784 }, { "epoch": 1.5155697403605624, "grad_norm": 0.58984375, "learning_rate": 2.7794719930699477e-06, "loss": 1.3886, "step": 8785 }, { "epoch": 1.5157422582592943, "grad_norm": 0.58203125, "learning_rate": 2.7775891614217075e-06, "loss": 1.3907, "step": 8786 }, { "epoch": 1.5159147761580263, "grad_norm": 0.61328125, "learning_rate": 2.7757068648547938e-06, "loss": 1.3861, "step": 8787 }, { "epoch": 1.5160872940567582, "grad_norm": 0.58203125, "learning_rate": 2.7738251035086485e-06, "loss": 1.4257, "step": 8788 }, { "epoch": 1.5162598119554904, "grad_norm": 0.6484375, "learning_rate": 2.771943877522697e-06, "loss": 1.4905, "step": 8789 }, { "epoch": 1.5164323298542224, "grad_norm": 0.578125, "learning_rate": 2.7700631870363038e-06, "loss": 1.3708, "step": 8790 }, { "epoch": 1.5166048477529543, "grad_norm": 0.6015625, "learning_rate": 2.7681830321888035e-06, "loss": 1.5362, "step": 8791 }, { "epoch": 1.5167773656516865, "grad_norm": 0.578125, "learning_rate": 2.766303413119491e-06, "loss": 1.3839, "step": 8792 }, { "epoch": 1.5169498835504185, "grad_norm": 0.5390625, "learning_rate": 2.7644243299676197e-06, "loss": 1.405, "step": 8793 }, { "epoch": 1.5171224014491504, "grad_norm": 0.62109375, "learning_rate": 2.7625457828724034e-06, "loss": 1.3364, "step": 8794 }, { "epoch": 1.5172949193478824, "grad_norm": 0.63671875, "learning_rate": 2.760667771973018e-06, "loss": 1.4023, "step": 8795 }, { "epoch": 1.5174674372466144, "grad_norm": 0.59375, "learning_rate": 2.7587902974085977e-06, "loss": 1.3987, "step": 8796 }, { "epoch": 1.5176399551453463, "grad_norm": 0.6015625, "learning_rate": 2.756913359318237e-06, "loss": 1.4989, "step": 8797 }, { "epoch": 1.5178124730440783, "grad_norm": 0.6171875, "learning_rate": 2.755036957840994e-06, "loss": 1.349, "step": 8798 }, { "epoch": 1.5179849909428103, "grad_norm": 0.56640625, "learning_rate": 2.753161093115877e-06, "loss": 1.3201, "step": 8799 }, { "epoch": 1.5181575088415422, "grad_norm": 0.5625, "learning_rate": 2.7512857652818716e-06, "loss": 1.517, "step": 8800 }, { "epoch": 1.5181575088415422, "eval_loss": 1.407160758972168, "eval_runtime": 10.9315, "eval_samples_per_second": 93.674, "eval_steps_per_second": 23.419, "step": 8800 }, { "epoch": 1.5183300267402742, "grad_norm": 0.59765625, "learning_rate": 2.7494109744779062e-06, "loss": 1.3835, "step": 8801 }, { "epoch": 1.5185025446390061, "grad_norm": 0.68359375, "learning_rate": 2.7475367208428793e-06, "loss": 1.5125, "step": 8802 }, { "epoch": 1.5186750625377383, "grad_norm": 0.58984375, "learning_rate": 2.7456630045156473e-06, "loss": 1.4015, "step": 8803 }, { "epoch": 1.5188475804364703, "grad_norm": 0.63671875, "learning_rate": 2.7437898256350284e-06, "loss": 1.3302, "step": 8804 }, { "epoch": 1.5190200983352022, "grad_norm": 0.65234375, "learning_rate": 2.7419171843397975e-06, "loss": 1.4035, "step": 8805 }, { "epoch": 1.5191926162339344, "grad_norm": 0.56640625, "learning_rate": 2.740045080768694e-06, "loss": 1.5455, "step": 8806 }, { "epoch": 1.5193651341326664, "grad_norm": 0.56640625, "learning_rate": 2.7381735150604083e-06, "loss": 1.3257, "step": 8807 }, { "epoch": 1.5195376520313983, "grad_norm": 0.578125, "learning_rate": 2.7363024873536093e-06, "loss": 1.4662, "step": 8808 }, { "epoch": 1.5197101699301303, "grad_norm": 0.91796875, "learning_rate": 2.7344319977869037e-06, "loss": 1.4842, "step": 8809 }, { "epoch": 1.5198826878288623, "grad_norm": 0.58984375, "learning_rate": 2.7325620464988733e-06, "loss": 1.4691, "step": 8810 }, { "epoch": 1.5200552057275942, "grad_norm": 0.59765625, "learning_rate": 2.730692633628055e-06, "loss": 1.3634, "step": 8811 }, { "epoch": 1.5202277236263262, "grad_norm": 0.58203125, "learning_rate": 2.728823759312944e-06, "loss": 1.5168, "step": 8812 }, { "epoch": 1.5204002415250581, "grad_norm": 0.59765625, "learning_rate": 2.726955423692008e-06, "loss": 1.4439, "step": 8813 }, { "epoch": 1.52057275942379, "grad_norm": 0.7578125, "learning_rate": 2.7250876269036564e-06, "loss": 1.3487, "step": 8814 }, { "epoch": 1.520745277322522, "grad_norm": 0.5859375, "learning_rate": 2.723220369086267e-06, "loss": 1.4754, "step": 8815 }, { "epoch": 1.5209177952212543, "grad_norm": 0.55859375, "learning_rate": 2.7213536503781813e-06, "loss": 1.4661, "step": 8816 }, { "epoch": 1.5210903131199862, "grad_norm": 0.609375, "learning_rate": 2.7194874709176967e-06, "loss": 1.4603, "step": 8817 }, { "epoch": 1.5212628310187182, "grad_norm": 0.5625, "learning_rate": 2.7176218308430703e-06, "loss": 1.4373, "step": 8818 }, { "epoch": 1.5214353489174501, "grad_norm": 0.58984375, "learning_rate": 2.7157567302925235e-06, "loss": 1.4028, "step": 8819 }, { "epoch": 1.5216078668161823, "grad_norm": 0.58984375, "learning_rate": 2.7138921694042264e-06, "loss": 1.3868, "step": 8820 }, { "epoch": 1.5217803847149143, "grad_norm": 0.57421875, "learning_rate": 2.71202814831633e-06, "loss": 1.5207, "step": 8821 }, { "epoch": 1.5219529026136462, "grad_norm": 0.55859375, "learning_rate": 2.7101646671669224e-06, "loss": 1.3747, "step": 8822 }, { "epoch": 1.5221254205123782, "grad_norm": 0.5703125, "learning_rate": 2.708301726094065e-06, "loss": 1.3591, "step": 8823 }, { "epoch": 1.5222979384111102, "grad_norm": 0.62890625, "learning_rate": 2.7064393252357757e-06, "loss": 1.4247, "step": 8824 }, { "epoch": 1.5224704563098421, "grad_norm": 0.640625, "learning_rate": 2.7045774647300347e-06, "loss": 1.4324, "step": 8825 }, { "epoch": 1.522642974208574, "grad_norm": 0.71875, "learning_rate": 2.7027161447147786e-06, "loss": 1.4449, "step": 8826 }, { "epoch": 1.522815492107306, "grad_norm": 0.56640625, "learning_rate": 2.700855365327909e-06, "loss": 1.4112, "step": 8827 }, { "epoch": 1.522988010006038, "grad_norm": 0.5625, "learning_rate": 2.6989951267072744e-06, "loss": 1.3578, "step": 8828 }, { "epoch": 1.52316052790477, "grad_norm": 0.59375, "learning_rate": 2.697135428990704e-06, "loss": 1.4005, "step": 8829 }, { "epoch": 1.5233330458035022, "grad_norm": 0.57421875, "learning_rate": 2.695276272315973e-06, "loss": 1.4191, "step": 8830 }, { "epoch": 1.5235055637022341, "grad_norm": 0.63671875, "learning_rate": 2.6934176568208126e-06, "loss": 1.4618, "step": 8831 }, { "epoch": 1.523678081600966, "grad_norm": 0.59375, "learning_rate": 2.691559582642932e-06, "loss": 1.2921, "step": 8832 }, { "epoch": 1.5238505994996983, "grad_norm": 0.5703125, "learning_rate": 2.689702049919979e-06, "loss": 1.4131, "step": 8833 }, { "epoch": 1.5240231173984302, "grad_norm": 0.58984375, "learning_rate": 2.6878450587895754e-06, "loss": 1.3017, "step": 8834 }, { "epoch": 1.5241956352971622, "grad_norm": 0.55859375, "learning_rate": 2.6859886093892983e-06, "loss": 1.5027, "step": 8835 }, { "epoch": 1.5243681531958941, "grad_norm": 0.59375, "learning_rate": 2.6841327018566842e-06, "loss": 1.4793, "step": 8836 }, { "epoch": 1.524540671094626, "grad_norm": 0.57421875, "learning_rate": 2.6822773363292333e-06, "loss": 1.3827, "step": 8837 }, { "epoch": 1.524713188993358, "grad_norm": 0.60546875, "learning_rate": 2.6804225129444016e-06, "loss": 1.4242, "step": 8838 }, { "epoch": 1.52488570689209, "grad_norm": 0.625, "learning_rate": 2.6785682318396e-06, "loss": 1.4618, "step": 8839 }, { "epoch": 1.525058224790822, "grad_norm": 0.5703125, "learning_rate": 2.676714493152216e-06, "loss": 1.3629, "step": 8840 }, { "epoch": 1.525230742689554, "grad_norm": 0.56640625, "learning_rate": 2.674861297019574e-06, "loss": 1.3417, "step": 8841 }, { "epoch": 1.525403260588286, "grad_norm": 0.59375, "learning_rate": 2.6730086435789828e-06, "loss": 1.5157, "step": 8842 }, { "epoch": 1.5255757784870179, "grad_norm": 0.671875, "learning_rate": 2.671156532967689e-06, "loss": 1.4606, "step": 8843 }, { "epoch": 1.52574829638575, "grad_norm": 0.6328125, "learning_rate": 2.66930496532291e-06, "loss": 1.4213, "step": 8844 }, { "epoch": 1.525920814284482, "grad_norm": 0.5859375, "learning_rate": 2.667453940781829e-06, "loss": 1.4048, "step": 8845 }, { "epoch": 1.526093332183214, "grad_norm": 0.59765625, "learning_rate": 2.665603459481573e-06, "loss": 1.4262, "step": 8846 }, { "epoch": 1.5262658500819462, "grad_norm": 0.60546875, "learning_rate": 2.6637535215592405e-06, "loss": 1.4697, "step": 8847 }, { "epoch": 1.5264383679806781, "grad_norm": 0.66015625, "learning_rate": 2.6619041271518854e-06, "loss": 1.5062, "step": 8848 }, { "epoch": 1.52661088587941, "grad_norm": 0.6640625, "learning_rate": 2.6600552763965238e-06, "loss": 1.349, "step": 8849 }, { "epoch": 1.526783403778142, "grad_norm": 0.5859375, "learning_rate": 2.6582069694301284e-06, "loss": 1.4759, "step": 8850 }, { "epoch": 1.526955921676874, "grad_norm": 0.58984375, "learning_rate": 2.6563592063896383e-06, "loss": 1.413, "step": 8851 }, { "epoch": 1.527128439575606, "grad_norm": 0.61328125, "learning_rate": 2.6545119874119364e-06, "loss": 1.3788, "step": 8852 }, { "epoch": 1.527300957474338, "grad_norm": 0.609375, "learning_rate": 2.65266531263389e-06, "loss": 1.4434, "step": 8853 }, { "epoch": 1.5274734753730699, "grad_norm": 0.6015625, "learning_rate": 2.6508191821923023e-06, "loss": 1.4395, "step": 8854 }, { "epoch": 1.5276459932718018, "grad_norm": 0.69921875, "learning_rate": 2.6489735962239495e-06, "loss": 1.4694, "step": 8855 }, { "epoch": 1.5278185111705338, "grad_norm": 0.60546875, "learning_rate": 2.6471285548655644e-06, "loss": 1.4755, "step": 8856 }, { "epoch": 1.527991029069266, "grad_norm": 0.6015625, "learning_rate": 2.6452840582538387e-06, "loss": 1.331, "step": 8857 }, { "epoch": 1.528163546967998, "grad_norm": 0.55859375, "learning_rate": 2.6434401065254234e-06, "loss": 1.4054, "step": 8858 }, { "epoch": 1.52833606486673, "grad_norm": 0.5625, "learning_rate": 2.641596699816935e-06, "loss": 1.4518, "step": 8859 }, { "epoch": 1.5285085827654619, "grad_norm": 0.5546875, "learning_rate": 2.6397538382649334e-06, "loss": 1.4237, "step": 8860 }, { "epoch": 1.528681100664194, "grad_norm": 0.59765625, "learning_rate": 2.6379115220059604e-06, "loss": 1.395, "step": 8861 }, { "epoch": 1.528853618562926, "grad_norm": 0.56640625, "learning_rate": 2.6360697511765053e-06, "loss": 1.429, "step": 8862 }, { "epoch": 1.529026136461658, "grad_norm": 0.58203125, "learning_rate": 2.6342285259130085e-06, "loss": 1.3681, "step": 8863 }, { "epoch": 1.52919865436039, "grad_norm": 0.640625, "learning_rate": 2.6323878463518915e-06, "loss": 1.4454, "step": 8864 }, { "epoch": 1.529371172259122, "grad_norm": 0.62890625, "learning_rate": 2.6305477126295152e-06, "loss": 1.443, "step": 8865 }, { "epoch": 1.5295436901578539, "grad_norm": 0.67578125, "learning_rate": 2.628708124882212e-06, "loss": 1.5633, "step": 8866 }, { "epoch": 1.5297162080565858, "grad_norm": 0.65625, "learning_rate": 2.626869083246267e-06, "loss": 1.4025, "step": 8867 }, { "epoch": 1.5298887259553178, "grad_norm": 0.5859375, "learning_rate": 2.625030587857931e-06, "loss": 1.512, "step": 8868 }, { "epoch": 1.5300612438540497, "grad_norm": 0.5859375, "learning_rate": 2.62319263885341e-06, "loss": 1.4404, "step": 8869 }, { "epoch": 1.5302337617527817, "grad_norm": 0.59375, "learning_rate": 2.6213552363688734e-06, "loss": 1.5006, "step": 8870 }, { "epoch": 1.5304062796515139, "grad_norm": 0.64453125, "learning_rate": 2.619518380540439e-06, "loss": 1.456, "step": 8871 }, { "epoch": 1.5305787975502458, "grad_norm": 0.59375, "learning_rate": 2.617682071504204e-06, "loss": 1.4089, "step": 8872 }, { "epoch": 1.5307513154489778, "grad_norm": 0.60546875, "learning_rate": 2.6158463093962015e-06, "loss": 1.423, "step": 8873 }, { "epoch": 1.53092383334771, "grad_norm": 0.61328125, "learning_rate": 2.61401109435245e-06, "loss": 1.5205, "step": 8874 }, { "epoch": 1.531096351246442, "grad_norm": 0.546875, "learning_rate": 2.612176426508902e-06, "loss": 1.408, "step": 8875 }, { "epoch": 1.531268869145174, "grad_norm": 0.59765625, "learning_rate": 2.610342306001484e-06, "loss": 1.4899, "step": 8876 }, { "epoch": 1.5314413870439059, "grad_norm": 0.66796875, "learning_rate": 2.6085087329660864e-06, "loss": 1.4048, "step": 8877 }, { "epoch": 1.5316139049426378, "grad_norm": 0.5703125, "learning_rate": 2.606675707538542e-06, "loss": 1.4059, "step": 8878 }, { "epoch": 1.5317864228413698, "grad_norm": 0.58203125, "learning_rate": 2.6048432298546576e-06, "loss": 1.3969, "step": 8879 }, { "epoch": 1.5319589407401017, "grad_norm": 0.59375, "learning_rate": 2.6030113000501933e-06, "loss": 1.3853, "step": 8880 }, { "epoch": 1.5321314586388337, "grad_norm": 0.58203125, "learning_rate": 2.601179918260871e-06, "loss": 1.4777, "step": 8881 }, { "epoch": 1.5323039765375657, "grad_norm": 0.56640625, "learning_rate": 2.59934908462237e-06, "loss": 1.321, "step": 8882 }, { "epoch": 1.5324764944362976, "grad_norm": 0.640625, "learning_rate": 2.5975187992703333e-06, "loss": 1.528, "step": 8883 }, { "epoch": 1.5326490123350298, "grad_norm": 0.5703125, "learning_rate": 2.595689062340351e-06, "loss": 1.4284, "step": 8884 }, { "epoch": 1.5328215302337618, "grad_norm": 0.58203125, "learning_rate": 2.5938598739679934e-06, "loss": 1.3888, "step": 8885 }, { "epoch": 1.5329940481324937, "grad_norm": 0.58984375, "learning_rate": 2.5920312342887687e-06, "loss": 1.4692, "step": 8886 }, { "epoch": 1.5331665660312257, "grad_norm": 0.58984375, "learning_rate": 2.590203143438157e-06, "loss": 1.4418, "step": 8887 }, { "epoch": 1.5333390839299579, "grad_norm": 0.59375, "learning_rate": 2.588375601551595e-06, "loss": 1.4874, "step": 8888 }, { "epoch": 1.5335116018286898, "grad_norm": 0.578125, "learning_rate": 2.586548608764479e-06, "loss": 1.4726, "step": 8889 }, { "epoch": 1.5336841197274218, "grad_norm": 0.6015625, "learning_rate": 2.5847221652121634e-06, "loss": 1.3969, "step": 8890 }, { "epoch": 1.5338566376261538, "grad_norm": 0.64453125, "learning_rate": 2.5828962710299655e-06, "loss": 1.3807, "step": 8891 }, { "epoch": 1.5340291555248857, "grad_norm": 0.8828125, "learning_rate": 2.5810709263531496e-06, "loss": 1.4172, "step": 8892 }, { "epoch": 1.5342016734236177, "grad_norm": 0.58984375, "learning_rate": 2.579246131316958e-06, "loss": 1.5168, "step": 8893 }, { "epoch": 1.5343741913223496, "grad_norm": 0.59375, "learning_rate": 2.5774218860565836e-06, "loss": 1.4052, "step": 8894 }, { "epoch": 1.5345467092210816, "grad_norm": 0.578125, "learning_rate": 2.5755981907071683e-06, "loss": 1.393, "step": 8895 }, { "epoch": 1.5347192271198136, "grad_norm": 0.59765625, "learning_rate": 2.5737750454038346e-06, "loss": 1.5, "step": 8896 }, { "epoch": 1.5348917450185455, "grad_norm": 0.55078125, "learning_rate": 2.571952450281643e-06, "loss": 1.4573, "step": 8897 }, { "epoch": 1.5350642629172777, "grad_norm": 0.61328125, "learning_rate": 2.570130405475627e-06, "loss": 1.4203, "step": 8898 }, { "epoch": 1.5352367808160097, "grad_norm": 0.578125, "learning_rate": 2.568308911120775e-06, "loss": 1.3794, "step": 8899 }, { "epoch": 1.5354092987147416, "grad_norm": 0.56640625, "learning_rate": 2.5664879673520337e-06, "loss": 1.4801, "step": 8900 }, { "epoch": 1.5354092987147416, "eval_loss": 1.4071331024169922, "eval_runtime": 10.9461, "eval_samples_per_second": 93.549, "eval_steps_per_second": 23.387, "step": 8900 }, { "epoch": 1.5355818166134736, "grad_norm": 0.58203125, "learning_rate": 2.5646675743043115e-06, "loss": 1.5236, "step": 8901 }, { "epoch": 1.5357543345122058, "grad_norm": 0.61328125, "learning_rate": 2.562847732112472e-06, "loss": 1.4504, "step": 8902 }, { "epoch": 1.5359268524109377, "grad_norm": 0.578125, "learning_rate": 2.561028440911343e-06, "loss": 1.3734, "step": 8903 }, { "epoch": 1.5360993703096697, "grad_norm": 0.828125, "learning_rate": 2.5592097008357107e-06, "loss": 1.4178, "step": 8904 }, { "epoch": 1.5362718882084017, "grad_norm": 0.59375, "learning_rate": 2.5573915120203097e-06, "loss": 1.447, "step": 8905 }, { "epoch": 1.5364444061071336, "grad_norm": 0.55859375, "learning_rate": 2.555573874599856e-06, "loss": 1.3607, "step": 8906 }, { "epoch": 1.5366169240058656, "grad_norm": 0.6484375, "learning_rate": 2.553756788709001e-06, "loss": 1.4766, "step": 8907 }, { "epoch": 1.5367894419045975, "grad_norm": 0.578125, "learning_rate": 2.5519402544823656e-06, "loss": 1.4166, "step": 8908 }, { "epoch": 1.5369619598033295, "grad_norm": 0.6171875, "learning_rate": 2.5501242720545404e-06, "loss": 1.4674, "step": 8909 }, { "epoch": 1.5371344777020615, "grad_norm": 1.2734375, "learning_rate": 2.5483088415600552e-06, "loss": 1.5013, "step": 8910 }, { "epoch": 1.5373069956007934, "grad_norm": 0.5625, "learning_rate": 2.5464939631334106e-06, "loss": 1.4255, "step": 8911 }, { "epoch": 1.5374795134995256, "grad_norm": 0.546875, "learning_rate": 2.5446796369090665e-06, "loss": 1.3638, "step": 8912 }, { "epoch": 1.5376520313982576, "grad_norm": 0.64453125, "learning_rate": 2.5428658630214365e-06, "loss": 1.4622, "step": 8913 }, { "epoch": 1.5378245492969895, "grad_norm": 0.58203125, "learning_rate": 2.5410526416048986e-06, "loss": 1.3916, "step": 8914 }, { "epoch": 1.5379970671957217, "grad_norm": 0.5859375, "learning_rate": 2.539239972793789e-06, "loss": 1.4569, "step": 8915 }, { "epoch": 1.5381695850944537, "grad_norm": 0.55859375, "learning_rate": 2.537427856722393e-06, "loss": 1.4265, "step": 8916 }, { "epoch": 1.5383421029931856, "grad_norm": 0.5625, "learning_rate": 2.5356162935249762e-06, "loss": 1.4431, "step": 8917 }, { "epoch": 1.5385146208919176, "grad_norm": 0.56640625, "learning_rate": 2.5338052833357406e-06, "loss": 1.4271, "step": 8918 }, { "epoch": 1.5386871387906496, "grad_norm": 0.59375, "learning_rate": 2.5319948262888604e-06, "loss": 1.3865, "step": 8919 }, { "epoch": 1.5388596566893815, "grad_norm": 0.59375, "learning_rate": 2.5301849225184673e-06, "loss": 1.497, "step": 8920 }, { "epoch": 1.5390321745881135, "grad_norm": 0.62109375, "learning_rate": 2.528375572158647e-06, "loss": 1.3021, "step": 8921 }, { "epoch": 1.5392046924868454, "grad_norm": 0.55859375, "learning_rate": 2.5265667753434498e-06, "loss": 1.3756, "step": 8922 }, { "epoch": 1.5393772103855774, "grad_norm": 0.6015625, "learning_rate": 2.5247585322068847e-06, "loss": 1.4439, "step": 8923 }, { "epoch": 1.5395497282843094, "grad_norm": 0.59765625, "learning_rate": 2.52295084288291e-06, "loss": 1.4724, "step": 8924 }, { "epoch": 1.5397222461830415, "grad_norm": 0.62109375, "learning_rate": 2.521143707505457e-06, "loss": 1.3917, "step": 8925 }, { "epoch": 1.5398947640817735, "grad_norm": 0.58984375, "learning_rate": 2.519337126208412e-06, "loss": 1.3374, "step": 8926 }, { "epoch": 1.5400672819805055, "grad_norm": 0.58203125, "learning_rate": 2.5175310991256085e-06, "loss": 1.4126, "step": 8927 }, { "epoch": 1.5402397998792374, "grad_norm": 0.61328125, "learning_rate": 2.515725626390859e-06, "loss": 1.4605, "step": 8928 }, { "epoch": 1.5404123177779696, "grad_norm": 0.5859375, "learning_rate": 2.5139207081379134e-06, "loss": 1.4168, "step": 8929 }, { "epoch": 1.5405848356767016, "grad_norm": 0.5703125, "learning_rate": 2.5121163445005025e-06, "loss": 1.5422, "step": 8930 }, { "epoch": 1.5407573535754335, "grad_norm": 0.6015625, "learning_rate": 2.510312535612297e-06, "loss": 1.3716, "step": 8931 }, { "epoch": 1.5409298714741655, "grad_norm": 0.57421875, "learning_rate": 2.5085092816069367e-06, "loss": 1.4441, "step": 8932 }, { "epoch": 1.5411023893728975, "grad_norm": 0.5859375, "learning_rate": 2.506706582618017e-06, "loss": 1.5556, "step": 8933 }, { "epoch": 1.5412749072716294, "grad_norm": 0.60546875, "learning_rate": 2.5049044387790943e-06, "loss": 1.3855, "step": 8934 }, { "epoch": 1.5414474251703614, "grad_norm": 0.58984375, "learning_rate": 2.503102850223682e-06, "loss": 1.4681, "step": 8935 }, { "epoch": 1.5416199430690933, "grad_norm": 0.6328125, "learning_rate": 2.5013018170852566e-06, "loss": 1.4861, "step": 8936 }, { "epoch": 1.5417924609678253, "grad_norm": 0.59765625, "learning_rate": 2.499501339497241e-06, "loss": 1.3168, "step": 8937 }, { "epoch": 1.5419649788665573, "grad_norm": 0.62890625, "learning_rate": 2.4977014175930368e-06, "loss": 1.5329, "step": 8938 }, { "epoch": 1.5421374967652894, "grad_norm": 0.578125, "learning_rate": 2.495902051505986e-06, "loss": 1.4227, "step": 8939 }, { "epoch": 1.5423100146640214, "grad_norm": 0.55078125, "learning_rate": 2.4941032413693955e-06, "loss": 1.4139, "step": 8940 }, { "epoch": 1.5424825325627534, "grad_norm": 0.57421875, "learning_rate": 2.4923049873165415e-06, "loss": 1.3561, "step": 8941 }, { "epoch": 1.5426550504614855, "grad_norm": 0.5703125, "learning_rate": 2.4905072894806414e-06, "loss": 1.3619, "step": 8942 }, { "epoch": 1.5428275683602175, "grad_norm": 0.62109375, "learning_rate": 2.4887101479948826e-06, "loss": 1.438, "step": 8943 }, { "epoch": 1.5430000862589495, "grad_norm": 0.5703125, "learning_rate": 2.486913562992409e-06, "loss": 1.3082, "step": 8944 }, { "epoch": 1.5431726041576814, "grad_norm": 0.56640625, "learning_rate": 2.4851175346063227e-06, "loss": 1.3638, "step": 8945 }, { "epoch": 1.5433451220564134, "grad_norm": 0.56640625, "learning_rate": 2.4833220629696852e-06, "loss": 1.459, "step": 8946 }, { "epoch": 1.5435176399551453, "grad_norm": 0.578125, "learning_rate": 2.481527148215518e-06, "loss": 1.5179, "step": 8947 }, { "epoch": 1.5436901578538773, "grad_norm": 0.5859375, "learning_rate": 2.479732790476791e-06, "loss": 1.3455, "step": 8948 }, { "epoch": 1.5438626757526093, "grad_norm": 0.6171875, "learning_rate": 2.4779389898864538e-06, "loss": 1.5416, "step": 8949 }, { "epoch": 1.5440351936513412, "grad_norm": 0.60546875, "learning_rate": 2.476145746577394e-06, "loss": 1.5077, "step": 8950 }, { "epoch": 1.5442077115500732, "grad_norm": 0.62890625, "learning_rate": 2.474353060682467e-06, "loss": 1.4062, "step": 8951 }, { "epoch": 1.5443802294488052, "grad_norm": 0.58203125, "learning_rate": 2.472560932334489e-06, "loss": 1.4813, "step": 8952 }, { "epoch": 1.5445527473475373, "grad_norm": 0.6328125, "learning_rate": 2.470769361666231e-06, "loss": 1.407, "step": 8953 }, { "epoch": 1.5447252652462693, "grad_norm": 0.6171875, "learning_rate": 2.4689783488104223e-06, "loss": 1.4856, "step": 8954 }, { "epoch": 1.5448977831450013, "grad_norm": 0.65234375, "learning_rate": 2.4671878938997572e-06, "loss": 1.4617, "step": 8955 }, { "epoch": 1.5450703010437334, "grad_norm": 0.58984375, "learning_rate": 2.465397997066874e-06, "loss": 1.4065, "step": 8956 }, { "epoch": 1.5452428189424654, "grad_norm": 0.60546875, "learning_rate": 2.4636086584443885e-06, "loss": 1.4875, "step": 8957 }, { "epoch": 1.5454153368411974, "grad_norm": 0.66796875, "learning_rate": 2.4618198781648663e-06, "loss": 1.3696, "step": 8958 }, { "epoch": 1.5455878547399293, "grad_norm": 0.57421875, "learning_rate": 2.460031656360822e-06, "loss": 1.3794, "step": 8959 }, { "epoch": 1.5457603726386613, "grad_norm": 0.67578125, "learning_rate": 2.4582439931647507e-06, "loss": 1.4181, "step": 8960 }, { "epoch": 1.5459328905373932, "grad_norm": 0.62109375, "learning_rate": 2.4564568887090814e-06, "loss": 1.54, "step": 8961 }, { "epoch": 1.5461054084361252, "grad_norm": 0.59765625, "learning_rate": 2.4546703431262253e-06, "loss": 1.4814, "step": 8962 }, { "epoch": 1.5462779263348572, "grad_norm": 0.609375, "learning_rate": 2.452884356548533e-06, "loss": 1.3949, "step": 8963 }, { "epoch": 1.5464504442335891, "grad_norm": 0.62109375, "learning_rate": 2.4510989291083245e-06, "loss": 1.3333, "step": 8964 }, { "epoch": 1.546622962132321, "grad_norm": 0.5625, "learning_rate": 2.4493140609378753e-06, "loss": 1.3779, "step": 8965 }, { "epoch": 1.5467954800310533, "grad_norm": 0.5703125, "learning_rate": 2.4475297521694187e-06, "loss": 1.4457, "step": 8966 }, { "epoch": 1.5469679979297852, "grad_norm": 0.5703125, "learning_rate": 2.4457460029351476e-06, "loss": 1.4055, "step": 8967 }, { "epoch": 1.5471405158285172, "grad_norm": 0.55859375, "learning_rate": 2.443962813367218e-06, "loss": 1.4068, "step": 8968 }, { "epoch": 1.5473130337272492, "grad_norm": 0.83984375, "learning_rate": 2.4421801835977286e-06, "loss": 1.521, "step": 8969 }, { "epoch": 1.5474855516259813, "grad_norm": 0.58203125, "learning_rate": 2.440398113758761e-06, "loss": 1.4514, "step": 8970 }, { "epoch": 1.5476580695247133, "grad_norm": 0.640625, "learning_rate": 2.438616603982332e-06, "loss": 1.5065, "step": 8971 }, { "epoch": 1.5478305874234453, "grad_norm": 0.6171875, "learning_rate": 2.436835654400429e-06, "loss": 1.4306, "step": 8972 }, { "epoch": 1.5480031053221772, "grad_norm": 0.66796875, "learning_rate": 2.435055265145003e-06, "loss": 1.4412, "step": 8973 }, { "epoch": 1.5481756232209092, "grad_norm": 0.59765625, "learning_rate": 2.4332754363479483e-06, "loss": 1.4578, "step": 8974 }, { "epoch": 1.5483481411196411, "grad_norm": 0.5546875, "learning_rate": 2.4314961681411276e-06, "loss": 1.4251, "step": 8975 }, { "epoch": 1.548520659018373, "grad_norm": 0.6640625, "learning_rate": 2.429717460656361e-06, "loss": 1.5216, "step": 8976 }, { "epoch": 1.548693176917105, "grad_norm": 0.58203125, "learning_rate": 2.427939314025427e-06, "loss": 1.4381, "step": 8977 }, { "epoch": 1.548865694815837, "grad_norm": 0.546875, "learning_rate": 2.4261617283800597e-06, "loss": 1.4402, "step": 8978 }, { "epoch": 1.549038212714569, "grad_norm": 0.625, "learning_rate": 2.42438470385196e-06, "loss": 1.3946, "step": 8979 }, { "epoch": 1.5492107306133012, "grad_norm": 0.58984375, "learning_rate": 2.422608240572768e-06, "loss": 1.4762, "step": 8980 }, { "epoch": 1.5493832485120331, "grad_norm": 0.6640625, "learning_rate": 2.4208323386741107e-06, "loss": 1.3965, "step": 8981 }, { "epoch": 1.549555766410765, "grad_norm": 0.62890625, "learning_rate": 2.419056998287547e-06, "loss": 1.4151, "step": 8982 }, { "epoch": 1.5497282843094973, "grad_norm": 0.88671875, "learning_rate": 2.417282219544609e-06, "loss": 1.538, "step": 8983 }, { "epoch": 1.5499008022082292, "grad_norm": 0.59765625, "learning_rate": 2.415508002576783e-06, "loss": 1.3951, "step": 8984 }, { "epoch": 1.5500733201069612, "grad_norm": 0.69921875, "learning_rate": 2.413734347515514e-06, "loss": 1.3653, "step": 8985 }, { "epoch": 1.5502458380056932, "grad_norm": 0.5859375, "learning_rate": 2.411961254492207e-06, "loss": 1.5114, "step": 8986 }, { "epoch": 1.5504183559044251, "grad_norm": 0.61328125, "learning_rate": 2.4101887236382237e-06, "loss": 1.4353, "step": 8987 }, { "epoch": 1.550590873803157, "grad_norm": 0.58984375, "learning_rate": 2.408416755084878e-06, "loss": 1.3855, "step": 8988 }, { "epoch": 1.550763391701889, "grad_norm": 0.625, "learning_rate": 2.4066453489634565e-06, "loss": 1.4579, "step": 8989 }, { "epoch": 1.550935909600621, "grad_norm": 0.55078125, "learning_rate": 2.4048745054051924e-06, "loss": 1.3656, "step": 8990 }, { "epoch": 1.551108427499353, "grad_norm": 0.80078125, "learning_rate": 2.403104224541283e-06, "loss": 1.4416, "step": 8991 }, { "epoch": 1.551280945398085, "grad_norm": 0.57421875, "learning_rate": 2.4013345065028816e-06, "loss": 1.3904, "step": 8992 }, { "epoch": 1.5514534632968169, "grad_norm": 0.59375, "learning_rate": 2.3995653514210936e-06, "loss": 1.4777, "step": 8993 }, { "epoch": 1.551625981195549, "grad_norm": 0.6484375, "learning_rate": 2.3977967594270003e-06, "loss": 1.5117, "step": 8994 }, { "epoch": 1.551798499094281, "grad_norm": 0.5703125, "learning_rate": 2.3960287306516193e-06, "loss": 1.4209, "step": 8995 }, { "epoch": 1.551971016993013, "grad_norm": 0.578125, "learning_rate": 2.3942612652259436e-06, "loss": 1.4499, "step": 8996 }, { "epoch": 1.5521435348917452, "grad_norm": 0.58203125, "learning_rate": 2.392494363280915e-06, "loss": 1.3548, "step": 8997 }, { "epoch": 1.5523160527904771, "grad_norm": 0.625, "learning_rate": 2.3907280249474384e-06, "loss": 1.4644, "step": 8998 }, { "epoch": 1.552488570689209, "grad_norm": 0.58203125, "learning_rate": 2.3889622503563734e-06, "loss": 1.52, "step": 8999 }, { "epoch": 1.552661088587941, "grad_norm": 0.5625, "learning_rate": 2.3871970396385457e-06, "loss": 1.4037, "step": 9000 }, { "epoch": 1.552661088587941, "eval_loss": 1.4071366786956787, "eval_runtime": 10.8655, "eval_samples_per_second": 94.243, "eval_steps_per_second": 23.561, "step": 9000 }, { "epoch": 1.552833606486673, "grad_norm": 0.5546875, "learning_rate": 2.3854323929247214e-06, "loss": 1.36, "step": 9001 }, { "epoch": 1.553006124385405, "grad_norm": 0.609375, "learning_rate": 2.3836683103456493e-06, "loss": 1.5039, "step": 9002 }, { "epoch": 1.553178642284137, "grad_norm": 0.578125, "learning_rate": 2.381904792032015e-06, "loss": 1.3965, "step": 9003 }, { "epoch": 1.553351160182869, "grad_norm": 0.59375, "learning_rate": 2.3801418381144712e-06, "loss": 1.4204, "step": 9004 }, { "epoch": 1.5535236780816009, "grad_norm": 8.9375, "learning_rate": 2.3783794487236367e-06, "loss": 1.4602, "step": 9005 }, { "epoch": 1.5536961959803328, "grad_norm": 0.59765625, "learning_rate": 2.3766176239900717e-06, "loss": 1.4365, "step": 9006 }, { "epoch": 1.553868713879065, "grad_norm": 0.6875, "learning_rate": 2.3748563640443066e-06, "loss": 1.3844, "step": 9007 }, { "epoch": 1.554041231777797, "grad_norm": 0.59375, "learning_rate": 2.373095669016825e-06, "loss": 1.4673, "step": 9008 }, { "epoch": 1.554213749676529, "grad_norm": 0.65234375, "learning_rate": 2.371335539038073e-06, "loss": 1.3874, "step": 9009 }, { "epoch": 1.5543862675752609, "grad_norm": 0.6171875, "learning_rate": 2.3695759742384495e-06, "loss": 1.485, "step": 9010 }, { "epoch": 1.554558785473993, "grad_norm": 0.703125, "learning_rate": 2.367816974748317e-06, "loss": 1.4039, "step": 9011 }, { "epoch": 1.554731303372725, "grad_norm": 0.58984375, "learning_rate": 2.3660585406979865e-06, "loss": 1.3879, "step": 9012 }, { "epoch": 1.554903821271457, "grad_norm": 0.59765625, "learning_rate": 2.364300672217744e-06, "loss": 1.3803, "step": 9013 }, { "epoch": 1.555076339170189, "grad_norm": 0.578125, "learning_rate": 2.3625433694378143e-06, "loss": 1.3202, "step": 9014 }, { "epoch": 1.555248857068921, "grad_norm": 0.59375, "learning_rate": 2.3607866324883934e-06, "loss": 1.4688, "step": 9015 }, { "epoch": 1.5554213749676529, "grad_norm": 0.671875, "learning_rate": 2.3590304614996305e-06, "loss": 1.4, "step": 9016 }, { "epoch": 1.5555938928663848, "grad_norm": 0.55859375, "learning_rate": 2.3572748566016345e-06, "loss": 1.4108, "step": 9017 }, { "epoch": 1.5557664107651168, "grad_norm": 0.5703125, "learning_rate": 2.3555198179244707e-06, "loss": 1.3687, "step": 9018 }, { "epoch": 1.5559389286638488, "grad_norm": 0.68359375, "learning_rate": 2.3537653455981655e-06, "loss": 1.4195, "step": 9019 }, { "epoch": 1.5561114465625807, "grad_norm": 0.7734375, "learning_rate": 2.352011439752695e-06, "loss": 1.3848, "step": 9020 }, { "epoch": 1.556283964461313, "grad_norm": 0.55859375, "learning_rate": 2.350258100518007e-06, "loss": 1.4125, "step": 9021 }, { "epoch": 1.5564564823600449, "grad_norm": 0.6015625, "learning_rate": 2.3485053280239955e-06, "loss": 1.4128, "step": 9022 }, { "epoch": 1.5566290002587768, "grad_norm": 0.58203125, "learning_rate": 2.3467531224005192e-06, "loss": 1.4206, "step": 9023 }, { "epoch": 1.556801518157509, "grad_norm": 0.5546875, "learning_rate": 2.3450014837773937e-06, "loss": 1.4219, "step": 9024 }, { "epoch": 1.556974036056241, "grad_norm": 0.5703125, "learning_rate": 2.3432504122843826e-06, "loss": 1.4805, "step": 9025 }, { "epoch": 1.557146553954973, "grad_norm": 0.703125, "learning_rate": 2.341499908051229e-06, "loss": 1.3797, "step": 9026 }, { "epoch": 1.5573190718537049, "grad_norm": 0.56640625, "learning_rate": 2.3397499712076124e-06, "loss": 1.3865, "step": 9027 }, { "epoch": 1.5574915897524368, "grad_norm": 0.62890625, "learning_rate": 2.3380006018831804e-06, "loss": 1.3843, "step": 9028 }, { "epoch": 1.5576641076511688, "grad_norm": 0.58203125, "learning_rate": 2.3362518002075375e-06, "loss": 1.3683, "step": 9029 }, { "epoch": 1.5578366255499008, "grad_norm": 0.734375, "learning_rate": 2.3345035663102477e-06, "loss": 1.5048, "step": 9030 }, { "epoch": 1.5580091434486327, "grad_norm": 0.60546875, "learning_rate": 2.33275590032083e-06, "loss": 1.4577, "step": 9031 }, { "epoch": 1.5581816613473647, "grad_norm": 0.59375, "learning_rate": 2.331008802368765e-06, "loss": 1.3813, "step": 9032 }, { "epoch": 1.5583541792460966, "grad_norm": 0.56640625, "learning_rate": 2.32926227258348e-06, "loss": 1.4236, "step": 9033 }, { "epoch": 1.5585266971448286, "grad_norm": 0.59375, "learning_rate": 2.3275163110943812e-06, "loss": 1.3342, "step": 9034 }, { "epoch": 1.5586992150435608, "grad_norm": 0.609375, "learning_rate": 2.325770918030811e-06, "loss": 1.4575, "step": 9035 }, { "epoch": 1.5588717329422928, "grad_norm": 0.60546875, "learning_rate": 2.3240260935220804e-06, "loss": 1.4458, "step": 9036 }, { "epoch": 1.5590442508410247, "grad_norm": 0.59375, "learning_rate": 2.32228183769746e-06, "loss": 1.4072, "step": 9037 }, { "epoch": 1.559216768739757, "grad_norm": 0.5703125, "learning_rate": 2.3205381506861735e-06, "loss": 1.3684, "step": 9038 }, { "epoch": 1.5593892866384889, "grad_norm": 0.65234375, "learning_rate": 2.3187950326174048e-06, "loss": 1.4303, "step": 9039 }, { "epoch": 1.5595618045372208, "grad_norm": 0.58203125, "learning_rate": 2.3170524836202936e-06, "loss": 1.4655, "step": 9040 }, { "epoch": 1.5597343224359528, "grad_norm": 0.60546875, "learning_rate": 2.31531050382394e-06, "loss": 1.3948, "step": 9041 }, { "epoch": 1.5599068403346847, "grad_norm": 0.58203125, "learning_rate": 2.3135690933574007e-06, "loss": 1.4346, "step": 9042 }, { "epoch": 1.5600793582334167, "grad_norm": 0.640625, "learning_rate": 2.3118282523496917e-06, "loss": 1.4503, "step": 9043 }, { "epoch": 1.5602518761321487, "grad_norm": 0.5859375, "learning_rate": 2.3100879809297793e-06, "loss": 1.5228, "step": 9044 }, { "epoch": 1.5604243940308806, "grad_norm": 0.5625, "learning_rate": 2.3083482792266032e-06, "loss": 1.4708, "step": 9045 }, { "epoch": 1.5605969119296126, "grad_norm": 0.5546875, "learning_rate": 2.3066091473690433e-06, "loss": 1.3849, "step": 9046 }, { "epoch": 1.5607694298283445, "grad_norm": 0.625, "learning_rate": 2.304870585485949e-06, "loss": 1.4502, "step": 9047 }, { "epoch": 1.5609419477270767, "grad_norm": 0.640625, "learning_rate": 2.3031325937061222e-06, "loss": 1.3883, "step": 9048 }, { "epoch": 1.5611144656258087, "grad_norm": 0.55859375, "learning_rate": 2.301395172158325e-06, "loss": 1.4215, "step": 9049 }, { "epoch": 1.5612869835245407, "grad_norm": 0.578125, "learning_rate": 2.2996583209712776e-06, "loss": 1.4415, "step": 9050 }, { "epoch": 1.5614595014232726, "grad_norm": 0.57421875, "learning_rate": 2.2979220402736547e-06, "loss": 1.4267, "step": 9051 }, { "epoch": 1.5616320193220048, "grad_norm": 0.55859375, "learning_rate": 2.2961863301940924e-06, "loss": 1.4367, "step": 9052 }, { "epoch": 1.5618045372207368, "grad_norm": 0.6171875, "learning_rate": 2.294451190861182e-06, "loss": 1.4087, "step": 9053 }, { "epoch": 1.5619770551194687, "grad_norm": 0.640625, "learning_rate": 2.292716622403475e-06, "loss": 1.4869, "step": 9054 }, { "epoch": 1.5621495730182007, "grad_norm": 0.6171875, "learning_rate": 2.290982624949477e-06, "loss": 1.4822, "step": 9055 }, { "epoch": 1.5623220909169326, "grad_norm": 0.5625, "learning_rate": 2.2892491986276578e-06, "loss": 1.437, "step": 9056 }, { "epoch": 1.5624946088156646, "grad_norm": 0.62109375, "learning_rate": 2.2875163435664306e-06, "loss": 1.4063, "step": 9057 }, { "epoch": 1.5626671267143966, "grad_norm": 0.58203125, "learning_rate": 2.285784059894188e-06, "loss": 1.4217, "step": 9058 }, { "epoch": 1.5628396446131285, "grad_norm": 0.6015625, "learning_rate": 2.2840523477392606e-06, "loss": 1.3661, "step": 9059 }, { "epoch": 1.5630121625118605, "grad_norm": 0.625, "learning_rate": 2.2823212072299463e-06, "loss": 1.4059, "step": 9060 }, { "epoch": 1.5631846804105924, "grad_norm": 0.56640625, "learning_rate": 2.2805906384945e-06, "loss": 1.4256, "step": 9061 }, { "epoch": 1.5633571983093246, "grad_norm": 0.578125, "learning_rate": 2.2788606416611314e-06, "loss": 1.514, "step": 9062 }, { "epoch": 1.5635297162080566, "grad_norm": 0.58984375, "learning_rate": 2.277131216858011e-06, "loss": 1.5115, "step": 9063 }, { "epoch": 1.5637022341067885, "grad_norm": 0.5859375, "learning_rate": 2.275402364213267e-06, "loss": 1.4034, "step": 9064 }, { "epoch": 1.5638747520055207, "grad_norm": 0.5546875, "learning_rate": 2.2736740838549765e-06, "loss": 1.3902, "step": 9065 }, { "epoch": 1.5640472699042527, "grad_norm": 0.58203125, "learning_rate": 2.2719463759111914e-06, "loss": 1.5219, "step": 9066 }, { "epoch": 1.5642197878029847, "grad_norm": 0.55859375, "learning_rate": 2.2702192405099035e-06, "loss": 1.414, "step": 9067 }, { "epoch": 1.5643923057017166, "grad_norm": 0.5703125, "learning_rate": 2.2684926777790717e-06, "loss": 1.5138, "step": 9068 }, { "epoch": 1.5645648236004486, "grad_norm": 0.57421875, "learning_rate": 2.266766687846611e-06, "loss": 1.4112, "step": 9069 }, { "epoch": 1.5647373414991805, "grad_norm": 0.578125, "learning_rate": 2.2650412708403934e-06, "loss": 1.4174, "step": 9070 }, { "epoch": 1.5649098593979125, "grad_norm": 0.609375, "learning_rate": 2.2633164268882492e-06, "loss": 1.4252, "step": 9071 }, { "epoch": 1.5650823772966445, "grad_norm": 0.5625, "learning_rate": 2.261592156117964e-06, "loss": 1.4543, "step": 9072 }, { "epoch": 1.5652548951953764, "grad_norm": 0.62109375, "learning_rate": 2.259868458657285e-06, "loss": 1.4375, "step": 9073 }, { "epoch": 1.5654274130941084, "grad_norm": 0.61328125, "learning_rate": 2.258145334633912e-06, "loss": 1.4361, "step": 9074 }, { "epoch": 1.5655999309928406, "grad_norm": 0.578125, "learning_rate": 2.2564227841755105e-06, "loss": 1.3977, "step": 9075 }, { "epoch": 1.5657724488915725, "grad_norm": 0.5625, "learning_rate": 2.2547008074096864e-06, "loss": 1.37, "step": 9076 }, { "epoch": 1.5659449667903045, "grad_norm": 0.5703125, "learning_rate": 2.252979404464027e-06, "loss": 1.3545, "step": 9077 }, { "epoch": 1.5661174846890364, "grad_norm": 0.625, "learning_rate": 2.2512585754660533e-06, "loss": 1.4416, "step": 9078 }, { "epoch": 1.5662900025877686, "grad_norm": 0.59765625, "learning_rate": 2.2495383205432665e-06, "loss": 1.4559, "step": 9079 }, { "epoch": 1.5664625204865006, "grad_norm": 0.59375, "learning_rate": 2.247818639823105e-06, "loss": 1.4815, "step": 9080 }, { "epoch": 1.5666350383852325, "grad_norm": 0.61328125, "learning_rate": 2.2460995334329773e-06, "loss": 1.4407, "step": 9081 }, { "epoch": 1.5668075562839645, "grad_norm": 0.56640625, "learning_rate": 2.244381001500244e-06, "loss": 1.3868, "step": 9082 }, { "epoch": 1.5669800741826965, "grad_norm": 0.578125, "learning_rate": 2.2426630441522257e-06, "loss": 1.458, "step": 9083 }, { "epoch": 1.5671525920814284, "grad_norm": 0.55859375, "learning_rate": 2.2409456615161996e-06, "loss": 1.406, "step": 9084 }, { "epoch": 1.5673251099801604, "grad_norm": 0.5859375, "learning_rate": 2.2392288537193987e-06, "loss": 1.4267, "step": 9085 }, { "epoch": 1.5674976278788924, "grad_norm": 0.64453125, "learning_rate": 2.2375126208890164e-06, "loss": 1.4312, "step": 9086 }, { "epoch": 1.5676701457776243, "grad_norm": 0.58984375, "learning_rate": 2.2357969631522016e-06, "loss": 1.3354, "step": 9087 }, { "epoch": 1.5678426636763563, "grad_norm": 0.56640625, "learning_rate": 2.234081880636063e-06, "loss": 1.4659, "step": 9088 }, { "epoch": 1.5680151815750885, "grad_norm": 0.57421875, "learning_rate": 2.2323673734676576e-06, "loss": 1.3875, "step": 9089 }, { "epoch": 1.5681876994738204, "grad_norm": 0.56640625, "learning_rate": 2.2306534417740167e-06, "loss": 1.5242, "step": 9090 }, { "epoch": 1.5683602173725524, "grad_norm": 0.625, "learning_rate": 2.228940085682111e-06, "loss": 1.4999, "step": 9091 }, { "epoch": 1.5685327352712846, "grad_norm": 0.59375, "learning_rate": 2.22722730531888e-06, "loss": 1.3251, "step": 9092 }, { "epoch": 1.5687052531700165, "grad_norm": 0.5859375, "learning_rate": 2.2255151008112164e-06, "loss": 1.4632, "step": 9093 }, { "epoch": 1.5688777710687485, "grad_norm": 0.5859375, "learning_rate": 2.2238034722859715e-06, "loss": 1.4337, "step": 9094 }, { "epoch": 1.5690502889674804, "grad_norm": 0.5625, "learning_rate": 2.2220924198699532e-06, "loss": 1.511, "step": 9095 }, { "epoch": 1.5692228068662124, "grad_norm": 0.625, "learning_rate": 2.2203819436899297e-06, "loss": 1.4009, "step": 9096 }, { "epoch": 1.5693953247649444, "grad_norm": 0.5859375, "learning_rate": 2.2186720438726163e-06, "loss": 1.5326, "step": 9097 }, { "epoch": 1.5695678426636763, "grad_norm": 0.59375, "learning_rate": 2.216962720544703e-06, "loss": 1.3456, "step": 9098 }, { "epoch": 1.5697403605624083, "grad_norm": 0.671875, "learning_rate": 2.2152539738328185e-06, "loss": 1.533, "step": 9099 }, { "epoch": 1.5699128784611402, "grad_norm": 0.71875, "learning_rate": 2.2135458038635614e-06, "loss": 1.4004, "step": 9100 }, { "epoch": 1.5699128784611402, "eval_loss": 1.4070823192596436, "eval_runtime": 10.9402, "eval_samples_per_second": 93.6, "eval_steps_per_second": 23.4, "step": 9100 }, { "epoch": 1.5700853963598722, "grad_norm": 0.68359375, "learning_rate": 2.211838210763484e-06, "loss": 1.4252, "step": 9101 }, { "epoch": 1.5702579142586042, "grad_norm": 0.625, "learning_rate": 2.2101311946590942e-06, "loss": 1.4406, "step": 9102 }, { "epoch": 1.5704304321573364, "grad_norm": 0.578125, "learning_rate": 2.2084247556768577e-06, "loss": 1.4362, "step": 9103 }, { "epoch": 1.5706029500560683, "grad_norm": 0.60546875, "learning_rate": 2.2067188939432006e-06, "loss": 1.4133, "step": 9104 }, { "epoch": 1.5707754679548003, "grad_norm": 0.58984375, "learning_rate": 2.205013609584502e-06, "loss": 1.4302, "step": 9105 }, { "epoch": 1.5709479858535325, "grad_norm": 0.65625, "learning_rate": 2.2033089027271003e-06, "loss": 1.4432, "step": 9106 }, { "epoch": 1.5711205037522644, "grad_norm": 0.57421875, "learning_rate": 2.2016047734972923e-06, "loss": 1.3627, "step": 9107 }, { "epoch": 1.5712930216509964, "grad_norm": 0.62109375, "learning_rate": 2.1999012220213277e-06, "loss": 1.4397, "step": 9108 }, { "epoch": 1.5714655395497283, "grad_norm": 0.58203125, "learning_rate": 2.1981982484254214e-06, "loss": 1.4597, "step": 9109 }, { "epoch": 1.5716380574484603, "grad_norm": 0.58203125, "learning_rate": 2.1964958528357317e-06, "loss": 1.3625, "step": 9110 }, { "epoch": 1.5718105753471923, "grad_norm": 0.5859375, "learning_rate": 2.1947940353783927e-06, "loss": 1.4335, "step": 9111 }, { "epoch": 1.5719830932459242, "grad_norm": 0.921875, "learning_rate": 2.1930927961794777e-06, "loss": 1.4665, "step": 9112 }, { "epoch": 1.5721556111446562, "grad_norm": 0.6796875, "learning_rate": 2.191392135365029e-06, "loss": 1.3722, "step": 9113 }, { "epoch": 1.5723281290433881, "grad_norm": 0.62890625, "learning_rate": 2.189692053061041e-06, "loss": 1.5526, "step": 9114 }, { "epoch": 1.57250064694212, "grad_norm": 0.5703125, "learning_rate": 2.1879925493934663e-06, "loss": 1.4509, "step": 9115 }, { "epoch": 1.5726731648408523, "grad_norm": 0.55078125, "learning_rate": 2.186293624488216e-06, "loss": 1.4767, "step": 9116 }, { "epoch": 1.5728456827395842, "grad_norm": 0.64453125, "learning_rate": 2.1845952784711555e-06, "loss": 1.4349, "step": 9117 }, { "epoch": 1.5730182006383162, "grad_norm": 0.625, "learning_rate": 2.1828975114681097e-06, "loss": 1.4376, "step": 9118 }, { "epoch": 1.5731907185370482, "grad_norm": 0.6484375, "learning_rate": 2.1812003236048607e-06, "loss": 1.4136, "step": 9119 }, { "epoch": 1.5733632364357804, "grad_norm": 0.5703125, "learning_rate": 2.1795037150071473e-06, "loss": 1.4266, "step": 9120 }, { "epoch": 1.5735357543345123, "grad_norm": 0.59375, "learning_rate": 2.177807685800659e-06, "loss": 1.4399, "step": 9121 }, { "epoch": 1.5737082722332443, "grad_norm": 0.57421875, "learning_rate": 2.1761122361110576e-06, "loss": 1.4447, "step": 9122 }, { "epoch": 1.5738807901319762, "grad_norm": 0.66796875, "learning_rate": 2.1744173660639446e-06, "loss": 1.4029, "step": 9123 }, { "epoch": 1.5740533080307082, "grad_norm": 0.54296875, "learning_rate": 2.1727230757848903e-06, "loss": 1.3948, "step": 9124 }, { "epoch": 1.5742258259294402, "grad_norm": 0.578125, "learning_rate": 2.1710293653994173e-06, "loss": 1.4587, "step": 9125 }, { "epoch": 1.5743983438281721, "grad_norm": 0.60546875, "learning_rate": 2.169336235033007e-06, "loss": 1.4837, "step": 9126 }, { "epoch": 1.574570861726904, "grad_norm": 0.5703125, "learning_rate": 2.167643684811096e-06, "loss": 1.4112, "step": 9127 }, { "epoch": 1.574743379625636, "grad_norm": 0.58203125, "learning_rate": 2.1659517148590825e-06, "loss": 1.4641, "step": 9128 }, { "epoch": 1.574915897524368, "grad_norm": 0.58984375, "learning_rate": 2.164260325302311e-06, "loss": 1.4535, "step": 9129 }, { "epoch": 1.5750884154231002, "grad_norm": 0.5546875, "learning_rate": 2.1625695162660986e-06, "loss": 1.2898, "step": 9130 }, { "epoch": 1.5752609333218321, "grad_norm": 0.546875, "learning_rate": 2.1608792878757044e-06, "loss": 1.4519, "step": 9131 }, { "epoch": 1.575433451220564, "grad_norm": 0.6015625, "learning_rate": 2.159189640256354e-06, "loss": 1.5392, "step": 9132 }, { "epoch": 1.5756059691192963, "grad_norm": 0.609375, "learning_rate": 2.1575005735332267e-06, "loss": 1.3497, "step": 9133 }, { "epoch": 1.5757784870180283, "grad_norm": 0.58203125, "learning_rate": 2.155812087831457e-06, "loss": 1.4987, "step": 9134 }, { "epoch": 1.5759510049167602, "grad_norm": 0.578125, "learning_rate": 2.154124183276145e-06, "loss": 1.4392, "step": 9135 }, { "epoch": 1.5761235228154922, "grad_norm": 0.578125, "learning_rate": 2.152436859992335e-06, "loss": 1.3764, "step": 9136 }, { "epoch": 1.5762960407142241, "grad_norm": 0.58203125, "learning_rate": 2.150750118105035e-06, "loss": 1.37, "step": 9137 }, { "epoch": 1.576468558612956, "grad_norm": 0.578125, "learning_rate": 2.1490639577392115e-06, "loss": 1.3533, "step": 9138 }, { "epoch": 1.576641076511688, "grad_norm": 0.56640625, "learning_rate": 2.1473783790197854e-06, "loss": 1.4118, "step": 9139 }, { "epoch": 1.57681359441042, "grad_norm": 0.82421875, "learning_rate": 2.145693382071633e-06, "loss": 1.3812, "step": 9140 }, { "epoch": 1.576986112309152, "grad_norm": 0.58984375, "learning_rate": 2.144008967019595e-06, "loss": 1.5229, "step": 9141 }, { "epoch": 1.577158630207884, "grad_norm": 0.56640625, "learning_rate": 2.1423251339884534e-06, "loss": 1.4583, "step": 9142 }, { "epoch": 1.577331148106616, "grad_norm": 0.6171875, "learning_rate": 2.1406418831029673e-06, "loss": 1.4862, "step": 9143 }, { "epoch": 1.577503666005348, "grad_norm": 0.59765625, "learning_rate": 2.138959214487837e-06, "loss": 1.4565, "step": 9144 }, { "epoch": 1.57767618390408, "grad_norm": 0.60546875, "learning_rate": 2.137277128267725e-06, "loss": 1.4422, "step": 9145 }, { "epoch": 1.577848701802812, "grad_norm": 0.5859375, "learning_rate": 2.1355956245672518e-06, "loss": 1.388, "step": 9146 }, { "epoch": 1.5780212197015442, "grad_norm": 0.546875, "learning_rate": 2.1339147035109943e-06, "loss": 1.26, "step": 9147 }, { "epoch": 1.5781937376002761, "grad_norm": 0.58984375, "learning_rate": 2.1322343652234855e-06, "loss": 1.5043, "step": 9148 }, { "epoch": 1.578366255499008, "grad_norm": 0.59765625, "learning_rate": 2.130554609829214e-06, "loss": 1.4859, "step": 9149 }, { "epoch": 1.57853877339774, "grad_norm": 0.578125, "learning_rate": 2.1288754374526275e-06, "loss": 1.3498, "step": 9150 }, { "epoch": 1.578711291296472, "grad_norm": 0.58984375, "learning_rate": 2.1271968482181306e-06, "loss": 1.4313, "step": 9151 }, { "epoch": 1.578883809195204, "grad_norm": 0.5546875, "learning_rate": 2.125518842250085e-06, "loss": 1.4392, "step": 9152 }, { "epoch": 1.579056327093936, "grad_norm": 0.58984375, "learning_rate": 2.1238414196728007e-06, "loss": 1.3872, "step": 9153 }, { "epoch": 1.579228844992668, "grad_norm": 0.5703125, "learning_rate": 2.1221645806105617e-06, "loss": 1.4653, "step": 9154 }, { "epoch": 1.5794013628913999, "grad_norm": 0.57421875, "learning_rate": 2.120488325187591e-06, "loss": 1.437, "step": 9155 }, { "epoch": 1.5795738807901318, "grad_norm": 0.59375, "learning_rate": 2.118812653528077e-06, "loss": 1.3615, "step": 9156 }, { "epoch": 1.579746398688864, "grad_norm": 0.5703125, "learning_rate": 2.1171375657561677e-06, "loss": 1.5104, "step": 9157 }, { "epoch": 1.579918916587596, "grad_norm": 0.57421875, "learning_rate": 2.1154630619959605e-06, "loss": 1.4759, "step": 9158 }, { "epoch": 1.580091434486328, "grad_norm": 0.5546875, "learning_rate": 2.113789142371515e-06, "loss": 1.3506, "step": 9159 }, { "epoch": 1.58026395238506, "grad_norm": 0.64453125, "learning_rate": 2.112115807006848e-06, "loss": 1.3673, "step": 9160 }, { "epoch": 1.580436470283792, "grad_norm": 0.578125, "learning_rate": 2.1104430560259227e-06, "loss": 1.4195, "step": 9161 }, { "epoch": 1.580608988182524, "grad_norm": 0.58203125, "learning_rate": 2.108770889552677e-06, "loss": 1.5068, "step": 9162 }, { "epoch": 1.580781506081256, "grad_norm": 0.5703125, "learning_rate": 2.107099307710988e-06, "loss": 1.3579, "step": 9163 }, { "epoch": 1.580954023979988, "grad_norm": 0.5703125, "learning_rate": 2.105428310624699e-06, "loss": 1.4347, "step": 9164 }, { "epoch": 1.58112654187872, "grad_norm": 0.65625, "learning_rate": 2.103757898417609e-06, "loss": 1.316, "step": 9165 }, { "epoch": 1.5812990597774519, "grad_norm": 0.578125, "learning_rate": 2.102088071213468e-06, "loss": 1.3706, "step": 9166 }, { "epoch": 1.5814715776761838, "grad_norm": 0.58984375, "learning_rate": 2.1004188291359973e-06, "loss": 1.4241, "step": 9167 }, { "epoch": 1.5816440955749158, "grad_norm": 0.6015625, "learning_rate": 2.0987501723088564e-06, "loss": 1.4717, "step": 9168 }, { "epoch": 1.5818166134736478, "grad_norm": 0.5625, "learning_rate": 2.0970821008556706e-06, "loss": 1.489, "step": 9169 }, { "epoch": 1.5819891313723797, "grad_norm": 0.55859375, "learning_rate": 2.0954146149000243e-06, "loss": 1.3503, "step": 9170 }, { "epoch": 1.582161649271112, "grad_norm": 0.62890625, "learning_rate": 2.093747714565453e-06, "loss": 1.4744, "step": 9171 }, { "epoch": 1.5823341671698439, "grad_norm": 0.671875, "learning_rate": 2.092081399975451e-06, "loss": 1.4649, "step": 9172 }, { "epoch": 1.5825066850685758, "grad_norm": 0.57421875, "learning_rate": 2.0904156712534718e-06, "loss": 1.3834, "step": 9173 }, { "epoch": 1.582679202967308, "grad_norm": 0.59765625, "learning_rate": 2.088750528522917e-06, "loss": 1.5699, "step": 9174 }, { "epoch": 1.58285172086604, "grad_norm": 0.578125, "learning_rate": 2.087085971907159e-06, "loss": 1.438, "step": 9175 }, { "epoch": 1.583024238764772, "grad_norm": 0.56640625, "learning_rate": 2.0854220015295125e-06, "loss": 1.3781, "step": 9176 }, { "epoch": 1.583196756663504, "grad_norm": 0.57421875, "learning_rate": 2.083758617513256e-06, "loss": 1.39, "step": 9177 }, { "epoch": 1.5833692745622359, "grad_norm": 0.57421875, "learning_rate": 2.0820958199816253e-06, "loss": 1.3723, "step": 9178 }, { "epoch": 1.5835417924609678, "grad_norm": 0.55859375, "learning_rate": 2.0804336090578094e-06, "loss": 1.4139, "step": 9179 }, { "epoch": 1.5837143103596998, "grad_norm": 0.60546875, "learning_rate": 2.0787719848649544e-06, "loss": 1.3735, "step": 9180 }, { "epoch": 1.5838868282584317, "grad_norm": 0.59375, "learning_rate": 2.0771109475261654e-06, "loss": 1.4363, "step": 9181 }, { "epoch": 1.5840593461571637, "grad_norm": 0.5859375, "learning_rate": 2.075450497164503e-06, "loss": 1.4841, "step": 9182 }, { "epoch": 1.5842318640558957, "grad_norm": 0.60546875, "learning_rate": 2.0737906339029813e-06, "loss": 1.4085, "step": 9183 }, { "epoch": 1.5844043819546276, "grad_norm": 0.5859375, "learning_rate": 2.0721313578645773e-06, "loss": 1.3854, "step": 9184 }, { "epoch": 1.5845768998533598, "grad_norm": 0.58203125, "learning_rate": 2.0704726691722134e-06, "loss": 1.4014, "step": 9185 }, { "epoch": 1.5847494177520918, "grad_norm": 0.56640625, "learning_rate": 2.068814567948785e-06, "loss": 1.4236, "step": 9186 }, { "epoch": 1.5849219356508237, "grad_norm": 0.61328125, "learning_rate": 2.067157054317127e-06, "loss": 1.4418, "step": 9187 }, { "epoch": 1.585094453549556, "grad_norm": 0.57421875, "learning_rate": 2.065500128400041e-06, "loss": 1.546, "step": 9188 }, { "epoch": 1.5852669714482879, "grad_norm": 0.6875, "learning_rate": 2.0638437903202825e-06, "loss": 1.336, "step": 9189 }, { "epoch": 1.5854394893470198, "grad_norm": 0.6640625, "learning_rate": 2.0621880402005644e-06, "loss": 1.4443, "step": 9190 }, { "epoch": 1.5856120072457518, "grad_norm": 0.61328125, "learning_rate": 2.0605328781635524e-06, "loss": 1.5043, "step": 9191 }, { "epoch": 1.5857845251444838, "grad_norm": 0.609375, "learning_rate": 2.058878304331877e-06, "loss": 1.3014, "step": 9192 }, { "epoch": 1.5859570430432157, "grad_norm": 0.64453125, "learning_rate": 2.0572243188281093e-06, "loss": 1.3639, "step": 9193 }, { "epoch": 1.5861295609419477, "grad_norm": 0.5625, "learning_rate": 2.055570921774799e-06, "loss": 1.2815, "step": 9194 }, { "epoch": 1.5863020788406796, "grad_norm": 0.6015625, "learning_rate": 2.0539181132944273e-06, "loss": 1.4472, "step": 9195 }, { "epoch": 1.5864745967394116, "grad_norm": 0.6328125, "learning_rate": 2.0522658935094565e-06, "loss": 1.3883, "step": 9196 }, { "epoch": 1.5866471146381436, "grad_norm": 0.671875, "learning_rate": 2.0506142625422863e-06, "loss": 1.3905, "step": 9197 }, { "epoch": 1.5868196325368757, "grad_norm": 0.6171875, "learning_rate": 2.0489632205152787e-06, "loss": 1.5331, "step": 9198 }, { "epoch": 1.5869921504356077, "grad_norm": 0.62890625, "learning_rate": 2.047312767550761e-06, "loss": 1.4573, "step": 9199 }, { "epoch": 1.5871646683343397, "grad_norm": 0.5859375, "learning_rate": 2.045662903771002e-06, "loss": 1.4495, "step": 9200 }, { "epoch": 1.5871646683343397, "eval_loss": 1.407097578048706, "eval_runtime": 10.8771, "eval_samples_per_second": 94.143, "eval_steps_per_second": 23.536, "step": 9200 }, { "epoch": 1.5873371862330716, "grad_norm": 0.60546875, "learning_rate": 2.044013629298235e-06, "loss": 1.4389, "step": 9201 }, { "epoch": 1.5875097041318038, "grad_norm": 0.76171875, "learning_rate": 2.042364944254651e-06, "loss": 1.4526, "step": 9202 }, { "epoch": 1.5876822220305358, "grad_norm": 0.5703125, "learning_rate": 2.040716848762393e-06, "loss": 1.3507, "step": 9203 }, { "epoch": 1.5878547399292677, "grad_norm": 0.58203125, "learning_rate": 2.0390693429435626e-06, "loss": 1.4062, "step": 9204 }, { "epoch": 1.5880272578279997, "grad_norm": 0.58984375, "learning_rate": 2.0374224269202204e-06, "loss": 1.4531, "step": 9205 }, { "epoch": 1.5881997757267317, "grad_norm": 0.56640625, "learning_rate": 2.0357761008143715e-06, "loss": 1.451, "step": 9206 }, { "epoch": 1.5883722936254636, "grad_norm": 0.5546875, "learning_rate": 2.034130364747997e-06, "loss": 1.4734, "step": 9207 }, { "epoch": 1.5885448115241956, "grad_norm": 0.58984375, "learning_rate": 2.0324852188430167e-06, "loss": 1.339, "step": 9208 }, { "epoch": 1.5887173294229275, "grad_norm": 0.62109375, "learning_rate": 2.030840663221313e-06, "loss": 1.4443, "step": 9209 }, { "epoch": 1.5888898473216595, "grad_norm": 0.60546875, "learning_rate": 2.0291966980047282e-06, "loss": 1.4756, "step": 9210 }, { "epoch": 1.5890623652203915, "grad_norm": 0.59375, "learning_rate": 2.027553323315055e-06, "loss": 1.4799, "step": 9211 }, { "epoch": 1.5892348831191236, "grad_norm": 0.578125, "learning_rate": 2.0259105392740462e-06, "loss": 1.3639, "step": 9212 }, { "epoch": 1.5894074010178556, "grad_norm": 0.56640625, "learning_rate": 2.0242683460034084e-06, "loss": 1.5483, "step": 9213 }, { "epoch": 1.5895799189165876, "grad_norm": 0.55078125, "learning_rate": 2.022626743624807e-06, "loss": 1.5209, "step": 9214 }, { "epoch": 1.5897524368153197, "grad_norm": 0.5625, "learning_rate": 2.02098573225986e-06, "loss": 1.3927, "step": 9215 }, { "epoch": 1.5899249547140517, "grad_norm": 0.57421875, "learning_rate": 2.0193453120301496e-06, "loss": 1.3861, "step": 9216 }, { "epoch": 1.5900974726127837, "grad_norm": 0.5703125, "learning_rate": 2.0177054830571974e-06, "loss": 1.3874, "step": 9217 }, { "epoch": 1.5902699905115156, "grad_norm": 0.58203125, "learning_rate": 2.0160662454625045e-06, "loss": 1.3126, "step": 9218 }, { "epoch": 1.5904425084102476, "grad_norm": 0.57421875, "learning_rate": 2.0144275993675077e-06, "loss": 1.359, "step": 9219 }, { "epoch": 1.5906150263089796, "grad_norm": 0.6015625, "learning_rate": 2.01278954489361e-06, "loss": 1.5007, "step": 9220 }, { "epoch": 1.5907875442077115, "grad_norm": 0.65625, "learning_rate": 2.0111520821621686e-06, "loss": 1.4547, "step": 9221 }, { "epoch": 1.5909600621064435, "grad_norm": 0.58203125, "learning_rate": 2.0095152112944983e-06, "loss": 1.4354, "step": 9222 }, { "epoch": 1.5911325800051754, "grad_norm": 0.609375, "learning_rate": 2.0078789324118686e-06, "loss": 1.3445, "step": 9223 }, { "epoch": 1.5913050979039074, "grad_norm": 0.5703125, "learning_rate": 2.0062432456355064e-06, "loss": 1.3494, "step": 9224 }, { "epoch": 1.5914776158026396, "grad_norm": 0.57421875, "learning_rate": 2.0046081510865865e-06, "loss": 1.4539, "step": 9225 }, { "epoch": 1.5916501337013715, "grad_norm": 0.5703125, "learning_rate": 2.0029736488862585e-06, "loss": 1.4198, "step": 9226 }, { "epoch": 1.5918226516001035, "grad_norm": 0.6015625, "learning_rate": 2.0013397391556042e-06, "loss": 1.4261, "step": 9227 }, { "epoch": 1.5919951694988355, "grad_norm": 0.62890625, "learning_rate": 1.9997064220156857e-06, "loss": 1.3818, "step": 9228 }, { "epoch": 1.5921676873975676, "grad_norm": 0.5859375, "learning_rate": 1.9980736975875005e-06, "loss": 1.3872, "step": 9229 }, { "epoch": 1.5923402052962996, "grad_norm": 0.6328125, "learning_rate": 1.9964415659920123e-06, "loss": 1.4362, "step": 9230 }, { "epoch": 1.5925127231950316, "grad_norm": 0.5390625, "learning_rate": 1.9948100273501456e-06, "loss": 1.3703, "step": 9231 }, { "epoch": 1.5926852410937635, "grad_norm": 0.58203125, "learning_rate": 1.993179081782769e-06, "loss": 1.3841, "step": 9232 }, { "epoch": 1.5928577589924955, "grad_norm": 0.5859375, "learning_rate": 1.991548729410715e-06, "loss": 1.4737, "step": 9233 }, { "epoch": 1.5930302768912274, "grad_norm": 0.64453125, "learning_rate": 1.9899189703547694e-06, "loss": 1.4497, "step": 9234 }, { "epoch": 1.5932027947899594, "grad_norm": 0.5625, "learning_rate": 1.988289804735677e-06, "loss": 1.337, "step": 9235 }, { "epoch": 1.5933753126886914, "grad_norm": 0.58203125, "learning_rate": 1.986661232674134e-06, "loss": 1.4287, "step": 9236 }, { "epoch": 1.5935478305874233, "grad_norm": 0.60546875, "learning_rate": 1.9850332542908e-06, "loss": 1.4264, "step": 9237 }, { "epoch": 1.5937203484861553, "grad_norm": 0.84375, "learning_rate": 1.9834058697062776e-06, "loss": 1.5309, "step": 9238 }, { "epoch": 1.5938928663848875, "grad_norm": 0.625, "learning_rate": 1.9817790790411428e-06, "loss": 1.3928, "step": 9239 }, { "epoch": 1.5940653842836194, "grad_norm": 0.58984375, "learning_rate": 1.980152882415911e-06, "loss": 1.342, "step": 9240 }, { "epoch": 1.5942379021823514, "grad_norm": 0.68359375, "learning_rate": 1.978527279951065e-06, "loss": 1.3418, "step": 9241 }, { "epoch": 1.5944104200810834, "grad_norm": 0.6171875, "learning_rate": 1.976902271767037e-06, "loss": 1.4355, "step": 9242 }, { "epoch": 1.5945829379798155, "grad_norm": 0.59765625, "learning_rate": 1.9752778579842214e-06, "loss": 1.3677, "step": 9243 }, { "epoch": 1.5947554558785475, "grad_norm": 0.58203125, "learning_rate": 1.973654038722962e-06, "loss": 1.45, "step": 9244 }, { "epoch": 1.5949279737772795, "grad_norm": 0.5625, "learning_rate": 1.9720308141035647e-06, "loss": 1.407, "step": 9245 }, { "epoch": 1.5951004916760114, "grad_norm": 0.5546875, "learning_rate": 1.9704081842462806e-06, "loss": 1.3835, "step": 9246 }, { "epoch": 1.5952730095747434, "grad_norm": 0.56640625, "learning_rate": 1.9687861492713323e-06, "loss": 1.2955, "step": 9247 }, { "epoch": 1.5954455274734753, "grad_norm": 0.6015625, "learning_rate": 1.967164709298889e-06, "loss": 1.3696, "step": 9248 }, { "epoch": 1.5956180453722073, "grad_norm": 0.63671875, "learning_rate": 1.965543864449071e-06, "loss": 1.3885, "step": 9249 }, { "epoch": 1.5957905632709393, "grad_norm": 0.62890625, "learning_rate": 1.9639236148419705e-06, "loss": 1.4505, "step": 9250 }, { "epoch": 1.5959630811696712, "grad_norm": 0.58203125, "learning_rate": 1.962303960597618e-06, "loss": 1.4014, "step": 9251 }, { "epoch": 1.5961355990684032, "grad_norm": 0.5546875, "learning_rate": 1.9606849018360096e-06, "loss": 1.4158, "step": 9252 }, { "epoch": 1.5963081169671354, "grad_norm": 0.5703125, "learning_rate": 1.9590664386770953e-06, "loss": 1.4112, "step": 9253 }, { "epoch": 1.5964806348658673, "grad_norm": 0.6484375, "learning_rate": 1.957448571240782e-06, "loss": 1.3941, "step": 9254 }, { "epoch": 1.5966531527645993, "grad_norm": 0.62890625, "learning_rate": 1.9558312996469296e-06, "loss": 1.4583, "step": 9255 }, { "epoch": 1.5968256706633315, "grad_norm": 0.59375, "learning_rate": 1.954214624015358e-06, "loss": 1.4152, "step": 9256 }, { "epoch": 1.5969981885620634, "grad_norm": 1.0390625, "learning_rate": 1.9525985444658393e-06, "loss": 1.4532, "step": 9257 }, { "epoch": 1.5971707064607954, "grad_norm": 0.88671875, "learning_rate": 1.9509830611181047e-06, "loss": 1.3964, "step": 9258 }, { "epoch": 1.5973432243595274, "grad_norm": 0.57421875, "learning_rate": 1.9493681740918335e-06, "loss": 1.4545, "step": 9259 }, { "epoch": 1.5975157422582593, "grad_norm": 0.6015625, "learning_rate": 1.9477538835066747e-06, "loss": 1.3786, "step": 9260 }, { "epoch": 1.5976882601569913, "grad_norm": 0.57421875, "learning_rate": 1.9461401894822187e-06, "loss": 1.336, "step": 9261 }, { "epoch": 1.5978607780557232, "grad_norm": 0.5859375, "learning_rate": 1.944527092138018e-06, "loss": 1.431, "step": 9262 }, { "epoch": 1.5980332959544552, "grad_norm": 0.578125, "learning_rate": 1.9429145915935886e-06, "loss": 1.4712, "step": 9263 }, { "epoch": 1.5982058138531872, "grad_norm": 0.58203125, "learning_rate": 1.941302687968386e-06, "loss": 1.3915, "step": 9264 }, { "epoch": 1.5983783317519191, "grad_norm": 0.546875, "learning_rate": 1.939691381381834e-06, "loss": 1.3599, "step": 9265 }, { "epoch": 1.5985508496506513, "grad_norm": 0.62109375, "learning_rate": 1.938080671953307e-06, "loss": 1.294, "step": 9266 }, { "epoch": 1.5987233675493833, "grad_norm": 0.58203125, "learning_rate": 1.9364705598021373e-06, "loss": 1.407, "step": 9267 }, { "epoch": 1.5988958854481152, "grad_norm": 0.59765625, "learning_rate": 1.9348610450476112e-06, "loss": 1.3775, "step": 9268 }, { "epoch": 1.5990684033468472, "grad_norm": 0.58203125, "learning_rate": 1.9332521278089756e-06, "loss": 1.473, "step": 9269 }, { "epoch": 1.5992409212455794, "grad_norm": 0.58984375, "learning_rate": 1.9316438082054203e-06, "loss": 1.4518, "step": 9270 }, { "epoch": 1.5994134391443113, "grad_norm": 0.57421875, "learning_rate": 1.930036086356111e-06, "loss": 1.4212, "step": 9271 }, { "epoch": 1.5995859570430433, "grad_norm": 0.546875, "learning_rate": 1.928428962380148e-06, "loss": 1.4948, "step": 9272 }, { "epoch": 1.5997584749417753, "grad_norm": 0.5859375, "learning_rate": 1.9268224363966016e-06, "loss": 1.3819, "step": 9273 }, { "epoch": 1.5999309928405072, "grad_norm": 0.6484375, "learning_rate": 1.925216508524492e-06, "loss": 1.4281, "step": 9274 }, { "epoch": 1.6001035107392392, "grad_norm": 0.5703125, "learning_rate": 1.9236111788827983e-06, "loss": 1.4988, "step": 9275 }, { "epoch": 1.6002760286379711, "grad_norm": 0.703125, "learning_rate": 1.922006447590451e-06, "loss": 1.4607, "step": 9276 }, { "epoch": 1.600448546536703, "grad_norm": 0.55859375, "learning_rate": 1.920402314766343e-06, "loss": 1.3998, "step": 9277 }, { "epoch": 1.600621064435435, "grad_norm": 0.625, "learning_rate": 1.9187987805293096e-06, "loss": 1.4442, "step": 9278 }, { "epoch": 1.600793582334167, "grad_norm": 0.5859375, "learning_rate": 1.9171958449981587e-06, "loss": 1.3879, "step": 9279 }, { "epoch": 1.6009661002328992, "grad_norm": 0.5625, "learning_rate": 1.9155935082916467e-06, "loss": 1.3548, "step": 9280 }, { "epoch": 1.6011386181316312, "grad_norm": 0.5703125, "learning_rate": 1.913991770528475e-06, "loss": 1.5053, "step": 9281 }, { "epoch": 1.6013111360303631, "grad_norm": 0.7578125, "learning_rate": 1.9123906318273234e-06, "loss": 1.4098, "step": 9282 }, { "epoch": 1.6014836539290953, "grad_norm": 0.703125, "learning_rate": 1.9107900923068013e-06, "loss": 1.4627, "step": 9283 }, { "epoch": 1.6016561718278273, "grad_norm": 0.58984375, "learning_rate": 1.909190152085497e-06, "loss": 1.4288, "step": 9284 }, { "epoch": 1.6018286897265592, "grad_norm": 0.76171875, "learning_rate": 1.9075908112819387e-06, "loss": 1.376, "step": 9285 }, { "epoch": 1.6020012076252912, "grad_norm": 0.57421875, "learning_rate": 1.9059920700146163e-06, "loss": 1.3245, "step": 9286 }, { "epoch": 1.6021737255240232, "grad_norm": 0.58203125, "learning_rate": 1.9043939284019753e-06, "loss": 1.4066, "step": 9287 }, { "epoch": 1.6023462434227551, "grad_norm": 0.9375, "learning_rate": 1.9027963865624166e-06, "loss": 1.3476, "step": 9288 }, { "epoch": 1.602518761321487, "grad_norm": 0.6015625, "learning_rate": 1.9011994446142935e-06, "loss": 1.3944, "step": 9289 }, { "epoch": 1.602691279220219, "grad_norm": 0.58203125, "learning_rate": 1.8996031026759232e-06, "loss": 1.4003, "step": 9290 }, { "epoch": 1.602863797118951, "grad_norm": 0.609375, "learning_rate": 1.8980073608655625e-06, "loss": 1.4291, "step": 9291 }, { "epoch": 1.603036315017683, "grad_norm": 0.609375, "learning_rate": 1.8964122193014456e-06, "loss": 1.4436, "step": 9292 }, { "epoch": 1.603208832916415, "grad_norm": 0.5546875, "learning_rate": 1.8948176781017435e-06, "loss": 1.3513, "step": 9293 }, { "epoch": 1.603381350815147, "grad_norm": 0.6484375, "learning_rate": 1.8932237373845874e-06, "loss": 1.4303, "step": 9294 }, { "epoch": 1.603553868713879, "grad_norm": 0.58984375, "learning_rate": 1.891630397268076e-06, "loss": 1.287, "step": 9295 }, { "epoch": 1.603726386612611, "grad_norm": 0.578125, "learning_rate": 1.890037657870245e-06, "loss": 1.4175, "step": 9296 }, { "epoch": 1.6038989045113432, "grad_norm": 0.5703125, "learning_rate": 1.8884455193090989e-06, "loss": 1.4304, "step": 9297 }, { "epoch": 1.6040714224100752, "grad_norm": 0.58984375, "learning_rate": 1.886853981702591e-06, "loss": 1.4971, "step": 9298 }, { "epoch": 1.6042439403088071, "grad_norm": 0.58203125, "learning_rate": 1.8852630451686337e-06, "loss": 1.4024, "step": 9299 }, { "epoch": 1.604416458207539, "grad_norm": 0.59375, "learning_rate": 1.883672709825094e-06, "loss": 1.4162, "step": 9300 }, { "epoch": 1.604416458207539, "eval_loss": 1.4071283340454102, "eval_runtime": 10.92, "eval_samples_per_second": 93.773, "eval_steps_per_second": 23.443, "step": 9300 }, { "epoch": 1.604588976106271, "grad_norm": 0.59375, "learning_rate": 1.8820829757897952e-06, "loss": 1.3499, "step": 9301 }, { "epoch": 1.604761494005003, "grad_norm": 0.65625, "learning_rate": 1.8804938431805064e-06, "loss": 1.4917, "step": 9302 }, { "epoch": 1.604934011903735, "grad_norm": 0.63671875, "learning_rate": 1.8789053121149736e-06, "loss": 1.3938, "step": 9303 }, { "epoch": 1.605106529802467, "grad_norm": 0.609375, "learning_rate": 1.877317382710875e-06, "loss": 1.4025, "step": 9304 }, { "epoch": 1.605279047701199, "grad_norm": 0.6640625, "learning_rate": 1.8757300550858571e-06, "loss": 1.3692, "step": 9305 }, { "epoch": 1.6054515655999309, "grad_norm": 0.60546875, "learning_rate": 1.8741433293575196e-06, "loss": 1.5194, "step": 9306 }, { "epoch": 1.605624083498663, "grad_norm": 0.7265625, "learning_rate": 1.8725572056434172e-06, "loss": 1.4504, "step": 9307 }, { "epoch": 1.605796601397395, "grad_norm": 0.609375, "learning_rate": 1.8709716840610592e-06, "loss": 1.4591, "step": 9308 }, { "epoch": 1.605969119296127, "grad_norm": 0.9921875, "learning_rate": 1.869386764727914e-06, "loss": 1.4517, "step": 9309 }, { "epoch": 1.606141637194859, "grad_norm": 0.609375, "learning_rate": 1.8678024477613954e-06, "loss": 1.4905, "step": 9310 }, { "epoch": 1.606314155093591, "grad_norm": 0.55859375, "learning_rate": 1.8662187332788861e-06, "loss": 1.4565, "step": 9311 }, { "epoch": 1.606486672992323, "grad_norm": 0.57421875, "learning_rate": 1.8646356213977167e-06, "loss": 1.3746, "step": 9312 }, { "epoch": 1.606659190891055, "grad_norm": 0.58984375, "learning_rate": 1.863053112235168e-06, "loss": 1.4626, "step": 9313 }, { "epoch": 1.606831708789787, "grad_norm": 0.60546875, "learning_rate": 1.8614712059084927e-06, "loss": 1.4173, "step": 9314 }, { "epoch": 1.607004226688519, "grad_norm": 0.58984375, "learning_rate": 1.8598899025348771e-06, "loss": 1.3984, "step": 9315 }, { "epoch": 1.607176744587251, "grad_norm": 0.5703125, "learning_rate": 1.858309202231483e-06, "loss": 1.4292, "step": 9316 }, { "epoch": 1.6073492624859829, "grad_norm": 0.55078125, "learning_rate": 1.8567291051154135e-06, "loss": 1.4097, "step": 9317 }, { "epoch": 1.6075217803847148, "grad_norm": 0.6171875, "learning_rate": 1.8551496113037336e-06, "loss": 1.4493, "step": 9318 }, { "epoch": 1.6076942982834468, "grad_norm": 0.5859375, "learning_rate": 1.8535707209134613e-06, "loss": 1.2968, "step": 9319 }, { "epoch": 1.6078668161821787, "grad_norm": 0.578125, "learning_rate": 1.8519924340615713e-06, "loss": 1.4715, "step": 9320 }, { "epoch": 1.608039334080911, "grad_norm": 0.5625, "learning_rate": 1.8504147508649928e-06, "loss": 1.3962, "step": 9321 }, { "epoch": 1.608211851979643, "grad_norm": 0.6015625, "learning_rate": 1.8488376714406131e-06, "loss": 1.4307, "step": 9322 }, { "epoch": 1.6083843698783749, "grad_norm": 0.5390625, "learning_rate": 1.8472611959052644e-06, "loss": 1.4533, "step": 9323 }, { "epoch": 1.608556887777107, "grad_norm": 0.58984375, "learning_rate": 1.8456853243757522e-06, "loss": 1.4285, "step": 9324 }, { "epoch": 1.608729405675839, "grad_norm": 0.59765625, "learning_rate": 1.8441100569688186e-06, "loss": 1.4613, "step": 9325 }, { "epoch": 1.608901923574571, "grad_norm": 0.65234375, "learning_rate": 1.8425353938011702e-06, "loss": 1.4717, "step": 9326 }, { "epoch": 1.609074441473303, "grad_norm": 0.5703125, "learning_rate": 1.8409613349894761e-06, "loss": 1.4131, "step": 9327 }, { "epoch": 1.6092469593720349, "grad_norm": 0.59375, "learning_rate": 1.839387880650343e-06, "loss": 1.4449, "step": 9328 }, { "epoch": 1.6094194772707668, "grad_norm": 0.609375, "learning_rate": 1.837815030900345e-06, "loss": 1.3643, "step": 9329 }, { "epoch": 1.6095919951694988, "grad_norm": 1.15625, "learning_rate": 1.8362427858560094e-06, "loss": 1.379, "step": 9330 }, { "epoch": 1.6097645130682308, "grad_norm": 0.63671875, "learning_rate": 1.8346711456338185e-06, "loss": 1.4487, "step": 9331 }, { "epoch": 1.6099370309669627, "grad_norm": 0.5546875, "learning_rate": 1.8331001103502077e-06, "loss": 1.411, "step": 9332 }, { "epoch": 1.6101095488656947, "grad_norm": 0.578125, "learning_rate": 1.8315296801215721e-06, "loss": 1.3986, "step": 9333 }, { "epoch": 1.6102820667644266, "grad_norm": 0.6015625, "learning_rate": 1.8299598550642528e-06, "loss": 1.5243, "step": 9334 }, { "epoch": 1.6104545846631588, "grad_norm": 0.5859375, "learning_rate": 1.82839063529456e-06, "loss": 1.4422, "step": 9335 }, { "epoch": 1.6106271025618908, "grad_norm": 0.578125, "learning_rate": 1.8268220209287457e-06, "loss": 1.3994, "step": 9336 }, { "epoch": 1.6107996204606227, "grad_norm": 0.55859375, "learning_rate": 1.8252540120830253e-06, "loss": 1.3916, "step": 9337 }, { "epoch": 1.610972138359355, "grad_norm": 0.59765625, "learning_rate": 1.823686608873565e-06, "loss": 1.4315, "step": 9338 }, { "epoch": 1.611144656258087, "grad_norm": 0.59765625, "learning_rate": 1.8221198114164885e-06, "loss": 1.3636, "step": 9339 }, { "epoch": 1.6113171741568189, "grad_norm": 0.5625, "learning_rate": 1.8205536198278739e-06, "loss": 1.4466, "step": 9340 }, { "epoch": 1.6114896920555508, "grad_norm": 0.5625, "learning_rate": 1.8189880342237576e-06, "loss": 1.4002, "step": 9341 }, { "epoch": 1.6116622099542828, "grad_norm": 0.67578125, "learning_rate": 1.81742305472012e-06, "loss": 1.5221, "step": 9342 }, { "epoch": 1.6118347278530147, "grad_norm": 0.5859375, "learning_rate": 1.8158586814329126e-06, "loss": 1.3755, "step": 9343 }, { "epoch": 1.6120072457517467, "grad_norm": 0.55859375, "learning_rate": 1.8142949144780297e-06, "loss": 1.4148, "step": 9344 }, { "epoch": 1.6121797636504787, "grad_norm": 0.58203125, "learning_rate": 1.8127317539713273e-06, "loss": 1.4302, "step": 9345 }, { "epoch": 1.6123522815492106, "grad_norm": 0.59765625, "learning_rate": 1.811169200028615e-06, "loss": 1.4514, "step": 9346 }, { "epoch": 1.6125247994479426, "grad_norm": 0.55859375, "learning_rate": 1.8096072527656505e-06, "loss": 1.4674, "step": 9347 }, { "epoch": 1.6126973173466748, "grad_norm": 0.59765625, "learning_rate": 1.8080459122981609e-06, "loss": 1.5049, "step": 9348 }, { "epoch": 1.6128698352454067, "grad_norm": 0.55859375, "learning_rate": 1.8064851787418126e-06, "loss": 1.4882, "step": 9349 }, { "epoch": 1.6130423531441387, "grad_norm": 0.54296875, "learning_rate": 1.8049250522122386e-06, "loss": 1.4113, "step": 9350 }, { "epoch": 1.6132148710428706, "grad_norm": 0.59765625, "learning_rate": 1.8033655328250222e-06, "loss": 1.5452, "step": 9351 }, { "epoch": 1.6133873889416028, "grad_norm": 0.5859375, "learning_rate": 1.8018066206957007e-06, "loss": 1.5255, "step": 9352 }, { "epoch": 1.6135599068403348, "grad_norm": 0.6015625, "learning_rate": 1.800248315939771e-06, "loss": 1.4721, "step": 9353 }, { "epoch": 1.6137324247390668, "grad_norm": 0.61328125, "learning_rate": 1.7986906186726815e-06, "loss": 1.3949, "step": 9354 }, { "epoch": 1.6139049426377987, "grad_norm": 0.5625, "learning_rate": 1.7971335290098314e-06, "loss": 1.4231, "step": 9355 }, { "epoch": 1.6140774605365307, "grad_norm": 0.578125, "learning_rate": 1.7955770470665879e-06, "loss": 1.441, "step": 9356 }, { "epoch": 1.6142499784352626, "grad_norm": 0.55859375, "learning_rate": 1.794021172958258e-06, "loss": 1.4214, "step": 9357 }, { "epoch": 1.6144224963339946, "grad_norm": 0.62890625, "learning_rate": 1.7924659068001094e-06, "loss": 1.402, "step": 9358 }, { "epoch": 1.6145950142327266, "grad_norm": 0.546875, "learning_rate": 1.7909112487073754e-06, "loss": 1.4344, "step": 9359 }, { "epoch": 1.6147675321314585, "grad_norm": 0.59375, "learning_rate": 1.7893571987952262e-06, "loss": 1.5812, "step": 9360 }, { "epoch": 1.6149400500301905, "grad_norm": 0.625, "learning_rate": 1.7878037571787987e-06, "loss": 1.4667, "step": 9361 }, { "epoch": 1.6151125679289227, "grad_norm": 0.6171875, "learning_rate": 1.7862509239731806e-06, "loss": 1.4051, "step": 9362 }, { "epoch": 1.6152850858276546, "grad_norm": 0.59765625, "learning_rate": 1.7846986992934157e-06, "loss": 1.3622, "step": 9363 }, { "epoch": 1.6154576037263866, "grad_norm": 0.5703125, "learning_rate": 1.7831470832545028e-06, "loss": 1.4268, "step": 9364 }, { "epoch": 1.6156301216251188, "grad_norm": 0.640625, "learning_rate": 1.781596075971398e-06, "loss": 1.3411, "step": 9365 }, { "epoch": 1.6158026395238507, "grad_norm": 0.59375, "learning_rate": 1.7800456775590014e-06, "loss": 1.4521, "step": 9366 }, { "epoch": 1.6159751574225827, "grad_norm": 0.56640625, "learning_rate": 1.7784958881321868e-06, "loss": 1.4027, "step": 9367 }, { "epoch": 1.6161476753213146, "grad_norm": 0.56640625, "learning_rate": 1.7769467078057644e-06, "loss": 1.4094, "step": 9368 }, { "epoch": 1.6163201932200466, "grad_norm": 0.61328125, "learning_rate": 1.7753981366945105e-06, "loss": 1.5092, "step": 9369 }, { "epoch": 1.6164927111187786, "grad_norm": 0.5859375, "learning_rate": 1.773850174913152e-06, "loss": 1.3982, "step": 9370 }, { "epoch": 1.6166652290175105, "grad_norm": 0.6484375, "learning_rate": 1.7723028225763727e-06, "loss": 1.3922, "step": 9371 }, { "epoch": 1.6168377469162425, "grad_norm": 0.5625, "learning_rate": 1.770756079798809e-06, "loss": 1.4047, "step": 9372 }, { "epoch": 1.6170102648149745, "grad_norm": 0.58203125, "learning_rate": 1.7692099466950551e-06, "loss": 1.4964, "step": 9373 }, { "epoch": 1.6171827827137064, "grad_norm": 0.61328125, "learning_rate": 1.7676644233796525e-06, "loss": 1.4487, "step": 9374 }, { "epoch": 1.6173553006124386, "grad_norm": 0.58203125, "learning_rate": 1.766119509967109e-06, "loss": 1.5349, "step": 9375 }, { "epoch": 1.6175278185111706, "grad_norm": 0.5625, "learning_rate": 1.7645752065718814e-06, "loss": 1.4116, "step": 9376 }, { "epoch": 1.6177003364099025, "grad_norm": 0.56640625, "learning_rate": 1.7630315133083786e-06, "loss": 1.3519, "step": 9377 }, { "epoch": 1.6178728543086345, "grad_norm": 0.546875, "learning_rate": 1.7614884302909719e-06, "loss": 1.3943, "step": 9378 }, { "epoch": 1.6180453722073667, "grad_norm": 0.6015625, "learning_rate": 1.7599459576339729e-06, "loss": 1.5073, "step": 9379 }, { "epoch": 1.6182178901060986, "grad_norm": 0.578125, "learning_rate": 1.758404095451669e-06, "loss": 1.402, "step": 9380 }, { "epoch": 1.6183904080048306, "grad_norm": 0.6171875, "learning_rate": 1.7568628438582825e-06, "loss": 1.4218, "step": 9381 }, { "epoch": 1.6185629259035625, "grad_norm": 0.6796875, "learning_rate": 1.7553222029680028e-06, "loss": 1.4593, "step": 9382 }, { "epoch": 1.6187354438022945, "grad_norm": 0.59765625, "learning_rate": 1.7537821728949688e-06, "loss": 1.3169, "step": 9383 }, { "epoch": 1.6189079617010265, "grad_norm": 0.8984375, "learning_rate": 1.752242753753276e-06, "loss": 1.3619, "step": 9384 }, { "epoch": 1.6190804795997584, "grad_norm": 0.54296875, "learning_rate": 1.7507039456569753e-06, "loss": 1.4082, "step": 9385 }, { "epoch": 1.6192529974984904, "grad_norm": 0.58984375, "learning_rate": 1.749165748720072e-06, "loss": 1.4374, "step": 9386 }, { "epoch": 1.6194255153972223, "grad_norm": 0.77734375, "learning_rate": 1.7476281630565195e-06, "loss": 1.4549, "step": 9387 }, { "epoch": 1.6195980332959543, "grad_norm": 0.5625, "learning_rate": 1.74609118878024e-06, "loss": 1.4547, "step": 9388 }, { "epoch": 1.6197705511946865, "grad_norm": 0.58203125, "learning_rate": 1.744554826005096e-06, "loss": 1.4083, "step": 9389 }, { "epoch": 1.6199430690934185, "grad_norm": 0.56640625, "learning_rate": 1.7430190748449105e-06, "loss": 1.4198, "step": 9390 }, { "epoch": 1.6201155869921504, "grad_norm": 0.58203125, "learning_rate": 1.7414839354134684e-06, "loss": 1.3815, "step": 9391 }, { "epoch": 1.6202881048908824, "grad_norm": 0.62890625, "learning_rate": 1.739949407824496e-06, "loss": 1.4645, "step": 9392 }, { "epoch": 1.6204606227896146, "grad_norm": 0.640625, "learning_rate": 1.7384154921916819e-06, "loss": 1.4625, "step": 9393 }, { "epoch": 1.6206331406883465, "grad_norm": 0.59375, "learning_rate": 1.7368821886286691e-06, "loss": 1.3335, "step": 9394 }, { "epoch": 1.6208056585870785, "grad_norm": 0.62109375, "learning_rate": 1.7353494972490548e-06, "loss": 1.5648, "step": 9395 }, { "epoch": 1.6209781764858104, "grad_norm": 0.5546875, "learning_rate": 1.7338174181663891e-06, "loss": 1.4175, "step": 9396 }, { "epoch": 1.6211506943845424, "grad_norm": 0.59375, "learning_rate": 1.7322859514941813e-06, "loss": 1.3912, "step": 9397 }, { "epoch": 1.6213232122832744, "grad_norm": 0.609375, "learning_rate": 1.7307550973458843e-06, "loss": 1.3587, "step": 9398 }, { "epoch": 1.6214957301820063, "grad_norm": 0.609375, "learning_rate": 1.7292248558349233e-06, "loss": 1.5153, "step": 9399 }, { "epoch": 1.6216682480807383, "grad_norm": 0.6015625, "learning_rate": 1.7276952270746606e-06, "loss": 1.3662, "step": 9400 }, { "epoch": 1.6216682480807383, "eval_loss": 1.4071524143218994, "eval_runtime": 10.8335, "eval_samples_per_second": 94.522, "eval_steps_per_second": 23.631, "step": 9400 }, { "epoch": 1.6218407659794702, "grad_norm": 0.58203125, "learning_rate": 1.7261662111784229e-06, "loss": 1.4412, "step": 9401 }, { "epoch": 1.6220132838782022, "grad_norm": 0.5703125, "learning_rate": 1.7246378082594906e-06, "loss": 1.3786, "step": 9402 }, { "epoch": 1.6221858017769344, "grad_norm": 0.609375, "learning_rate": 1.7231100184310955e-06, "loss": 1.3919, "step": 9403 }, { "epoch": 1.6223583196756663, "grad_norm": 0.59765625, "learning_rate": 1.7215828418064263e-06, "loss": 1.3885, "step": 9404 }, { "epoch": 1.6225308375743983, "grad_norm": 0.56640625, "learning_rate": 1.7200562784986263e-06, "loss": 1.3885, "step": 9405 }, { "epoch": 1.6227033554731305, "grad_norm": 0.58984375, "learning_rate": 1.7185303286207932e-06, "loss": 1.4058, "step": 9406 }, { "epoch": 1.6228758733718625, "grad_norm": 0.625, "learning_rate": 1.7170049922859788e-06, "loss": 1.4768, "step": 9407 }, { "epoch": 1.6230483912705944, "grad_norm": 0.60546875, "learning_rate": 1.7154802696071882e-06, "loss": 1.4446, "step": 9408 }, { "epoch": 1.6232209091693264, "grad_norm": 0.5625, "learning_rate": 1.7139561606973832e-06, "loss": 1.4344, "step": 9409 }, { "epoch": 1.6233934270680583, "grad_norm": 0.59765625, "learning_rate": 1.7124326656694823e-06, "loss": 1.4298, "step": 9410 }, { "epoch": 1.6235659449667903, "grad_norm": 0.5859375, "learning_rate": 1.7109097846363476e-06, "loss": 1.435, "step": 9411 }, { "epoch": 1.6237384628655223, "grad_norm": 0.57421875, "learning_rate": 1.7093875177108131e-06, "loss": 1.4331, "step": 9412 }, { "epoch": 1.6239109807642542, "grad_norm": 0.59765625, "learning_rate": 1.7078658650056524e-06, "loss": 1.405, "step": 9413 }, { "epoch": 1.6240834986629862, "grad_norm": 0.640625, "learning_rate": 1.7063448266335991e-06, "loss": 1.5126, "step": 9414 }, { "epoch": 1.6242560165617181, "grad_norm": 0.59375, "learning_rate": 1.7048244027073424e-06, "loss": 1.3834, "step": 9415 }, { "epoch": 1.6244285344604503, "grad_norm": 0.57421875, "learning_rate": 1.7033045933395242e-06, "loss": 1.4099, "step": 9416 }, { "epoch": 1.6246010523591823, "grad_norm": 0.64453125, "learning_rate": 1.7017853986427423e-06, "loss": 1.3877, "step": 9417 }, { "epoch": 1.6247735702579142, "grad_norm": 0.5546875, "learning_rate": 1.70026681872955e-06, "loss": 1.3025, "step": 9418 }, { "epoch": 1.6249460881566462, "grad_norm": 0.625, "learning_rate": 1.6987488537124454e-06, "loss": 1.4223, "step": 9419 }, { "epoch": 1.6251186060553784, "grad_norm": 0.5625, "learning_rate": 1.6972315037039e-06, "loss": 1.4389, "step": 9420 }, { "epoch": 1.6252911239541104, "grad_norm": 0.58203125, "learning_rate": 1.69571476881632e-06, "loss": 1.4033, "step": 9421 }, { "epoch": 1.6254636418528423, "grad_norm": 0.55859375, "learning_rate": 1.6941986491620754e-06, "loss": 1.3848, "step": 9422 }, { "epoch": 1.6256361597515743, "grad_norm": 0.5625, "learning_rate": 1.6926831448534964e-06, "loss": 1.448, "step": 9423 }, { "epoch": 1.6258086776503062, "grad_norm": 0.77734375, "learning_rate": 1.6911682560028552e-06, "loss": 1.4198, "step": 9424 }, { "epoch": 1.6259811955490382, "grad_norm": 0.55859375, "learning_rate": 1.6896539827223845e-06, "loss": 1.4735, "step": 9425 }, { "epoch": 1.6261537134477702, "grad_norm": 0.66015625, "learning_rate": 1.6881403251242723e-06, "loss": 1.2885, "step": 9426 }, { "epoch": 1.6263262313465021, "grad_norm": 0.60546875, "learning_rate": 1.6866272833206598e-06, "loss": 1.332, "step": 9427 }, { "epoch": 1.626498749245234, "grad_norm": 0.54296875, "learning_rate": 1.685114857423643e-06, "loss": 1.3159, "step": 9428 }, { "epoch": 1.626671267143966, "grad_norm": 0.5625, "learning_rate": 1.6836030475452737e-06, "loss": 1.3439, "step": 9429 }, { "epoch": 1.6268437850426982, "grad_norm": 0.57421875, "learning_rate": 1.6820918537975484e-06, "loss": 1.4719, "step": 9430 }, { "epoch": 1.6270163029414302, "grad_norm": 0.625, "learning_rate": 1.6805812762924367e-06, "loss": 1.4097, "step": 9431 }, { "epoch": 1.6271888208401621, "grad_norm": 0.5859375, "learning_rate": 1.6790713151418403e-06, "loss": 1.3939, "step": 9432 }, { "epoch": 1.6273613387388943, "grad_norm": 0.59765625, "learning_rate": 1.6775619704576385e-06, "loss": 1.4855, "step": 9433 }, { "epoch": 1.6275338566376263, "grad_norm": 0.9453125, "learning_rate": 1.676053242351644e-06, "loss": 1.3427, "step": 9434 }, { "epoch": 1.6277063745363582, "grad_norm": 0.57421875, "learning_rate": 1.6745451309356354e-06, "loss": 1.4919, "step": 9435 }, { "epoch": 1.6278788924350902, "grad_norm": 0.578125, "learning_rate": 1.673037636321344e-06, "loss": 1.4468, "step": 9436 }, { "epoch": 1.6280514103338222, "grad_norm": 0.5625, "learning_rate": 1.6715307586204533e-06, "loss": 1.4331, "step": 9437 }, { "epoch": 1.6282239282325541, "grad_norm": 0.61328125, "learning_rate": 1.6700244979446034e-06, "loss": 1.3679, "step": 9438 }, { "epoch": 1.628396446131286, "grad_norm": 0.5625, "learning_rate": 1.6685188544053855e-06, "loss": 1.317, "step": 9439 }, { "epoch": 1.628568964030018, "grad_norm": 0.59765625, "learning_rate": 1.66701382811435e-06, "loss": 1.4306, "step": 9440 }, { "epoch": 1.62874148192875, "grad_norm": 0.57421875, "learning_rate": 1.6655094191829957e-06, "loss": 1.3777, "step": 9441 }, { "epoch": 1.628913999827482, "grad_norm": 0.53515625, "learning_rate": 1.6640056277227824e-06, "loss": 1.3518, "step": 9442 }, { "epoch": 1.629086517726214, "grad_norm": 0.5859375, "learning_rate": 1.662502453845114e-06, "loss": 1.5547, "step": 9443 }, { "epoch": 1.6292590356249461, "grad_norm": 0.56640625, "learning_rate": 1.660999897661363e-06, "loss": 1.4116, "step": 9444 }, { "epoch": 1.629431553523678, "grad_norm": 0.5703125, "learning_rate": 1.659497959282842e-06, "loss": 1.3623, "step": 9445 }, { "epoch": 1.62960407142241, "grad_norm": 0.6640625, "learning_rate": 1.6579966388208257e-06, "loss": 1.4435, "step": 9446 }, { "epoch": 1.6297765893211422, "grad_norm": 0.57421875, "learning_rate": 1.6564959363865418e-06, "loss": 1.4531, "step": 9447 }, { "epoch": 1.6299491072198742, "grad_norm": 0.58984375, "learning_rate": 1.6549958520911713e-06, "loss": 1.3742, "step": 9448 }, { "epoch": 1.6301216251186061, "grad_norm": 0.55859375, "learning_rate": 1.653496386045851e-06, "loss": 1.341, "step": 9449 }, { "epoch": 1.630294143017338, "grad_norm": 0.5703125, "learning_rate": 1.6519975383616716e-06, "loss": 1.3705, "step": 9450 }, { "epoch": 1.63046666091607, "grad_norm": 0.57421875, "learning_rate": 1.6504993091496701e-06, "loss": 1.4054, "step": 9451 }, { "epoch": 1.630639178814802, "grad_norm": 0.58984375, "learning_rate": 1.6490016985208546e-06, "loss": 1.4413, "step": 9452 }, { "epoch": 1.630811696713534, "grad_norm": 0.671875, "learning_rate": 1.6475047065861705e-06, "loss": 1.4356, "step": 9453 }, { "epoch": 1.630984214612266, "grad_norm": 1.046875, "learning_rate": 1.6460083334565248e-06, "loss": 1.3905, "step": 9454 }, { "epoch": 1.631156732510998, "grad_norm": 0.78515625, "learning_rate": 1.644512579242784e-06, "loss": 1.3791, "step": 9455 }, { "epoch": 1.6313292504097299, "grad_norm": 0.5625, "learning_rate": 1.6430174440557567e-06, "loss": 1.4764, "step": 9456 }, { "epoch": 1.631501768308462, "grad_norm": 0.53515625, "learning_rate": 1.6415229280062139e-06, "loss": 1.39, "step": 9457 }, { "epoch": 1.631674286207194, "grad_norm": 0.55859375, "learning_rate": 1.6400290312048794e-06, "loss": 1.4338, "step": 9458 }, { "epoch": 1.631846804105926, "grad_norm": 0.578125, "learning_rate": 1.638535753762429e-06, "loss": 1.4187, "step": 9459 }, { "epoch": 1.632019322004658, "grad_norm": 0.6171875, "learning_rate": 1.6370430957894945e-06, "loss": 1.4055, "step": 9460 }, { "epoch": 1.6321918399033901, "grad_norm": 0.5703125, "learning_rate": 1.6355510573966648e-06, "loss": 1.5129, "step": 9461 }, { "epoch": 1.632364357802122, "grad_norm": 0.55859375, "learning_rate": 1.6340596386944707e-06, "loss": 1.3567, "step": 9462 }, { "epoch": 1.632536875700854, "grad_norm": 0.59765625, "learning_rate": 1.632568839793417e-06, "loss": 1.379, "step": 9463 }, { "epoch": 1.632709393599586, "grad_norm": 0.6015625, "learning_rate": 1.6310786608039408e-06, "loss": 1.4042, "step": 9464 }, { "epoch": 1.632881911498318, "grad_norm": 0.57421875, "learning_rate": 1.629589101836454e-06, "loss": 1.3955, "step": 9465 }, { "epoch": 1.63305442939705, "grad_norm": 0.53515625, "learning_rate": 1.6281001630013038e-06, "loss": 1.3853, "step": 9466 }, { "epoch": 1.6332269472957819, "grad_norm": 0.55078125, "learning_rate": 1.6266118444088041e-06, "loss": 1.3801, "step": 9467 }, { "epoch": 1.6333994651945138, "grad_norm": 0.61328125, "learning_rate": 1.625124146169218e-06, "loss": 1.5522, "step": 9468 }, { "epoch": 1.6335719830932458, "grad_norm": 0.59375, "learning_rate": 1.623637068392765e-06, "loss": 1.4954, "step": 9469 }, { "epoch": 1.6337445009919778, "grad_norm": 0.65625, "learning_rate": 1.6221506111896147e-06, "loss": 1.4894, "step": 9470 }, { "epoch": 1.63391701889071, "grad_norm": 0.6328125, "learning_rate": 1.6206647746698945e-06, "loss": 1.4969, "step": 9471 }, { "epoch": 1.634089536789442, "grad_norm": 0.57421875, "learning_rate": 1.6191795589436843e-06, "loss": 1.4825, "step": 9472 }, { "epoch": 1.6342620546881739, "grad_norm": 0.609375, "learning_rate": 1.6176949641210183e-06, "loss": 1.4227, "step": 9473 }, { "epoch": 1.634434572586906, "grad_norm": 0.5625, "learning_rate": 1.6162109903118872e-06, "loss": 1.4529, "step": 9474 }, { "epoch": 1.634607090485638, "grad_norm": 0.58984375, "learning_rate": 1.6147276376262256e-06, "loss": 1.4036, "step": 9475 }, { "epoch": 1.63477960838437, "grad_norm": 0.56640625, "learning_rate": 1.6132449061739386e-06, "loss": 1.367, "step": 9476 }, { "epoch": 1.634952126283102, "grad_norm": 0.56640625, "learning_rate": 1.6117627960648684e-06, "loss": 1.4574, "step": 9477 }, { "epoch": 1.635124644181834, "grad_norm": 0.59375, "learning_rate": 1.6102813074088242e-06, "loss": 1.4849, "step": 9478 }, { "epoch": 1.6352971620805659, "grad_norm": 0.69140625, "learning_rate": 1.6088004403155611e-06, "loss": 1.3788, "step": 9479 }, { "epoch": 1.6354696799792978, "grad_norm": 0.5703125, "learning_rate": 1.6073201948947925e-06, "loss": 1.3557, "step": 9480 }, { "epoch": 1.6356421978780298, "grad_norm": 0.6015625, "learning_rate": 1.605840571256183e-06, "loss": 1.5011, "step": 9481 }, { "epoch": 1.6358147157767617, "grad_norm": 0.58984375, "learning_rate": 1.6043615695093561e-06, "loss": 1.3889, "step": 9482 }, { "epoch": 1.6359872336754937, "grad_norm": 0.61328125, "learning_rate": 1.6028831897638774e-06, "loss": 1.3383, "step": 9483 }, { "epoch": 1.6361597515742257, "grad_norm": 0.5546875, "learning_rate": 1.6014054321292848e-06, "loss": 1.3963, "step": 9484 }, { "epoch": 1.6363322694729578, "grad_norm": 0.6171875, "learning_rate": 1.599928296715052e-06, "loss": 1.4085, "step": 9485 }, { "epoch": 1.6365047873716898, "grad_norm": 0.59375, "learning_rate": 1.5984517836306168e-06, "loss": 1.4987, "step": 9486 }, { "epoch": 1.6366773052704218, "grad_norm": 0.57421875, "learning_rate": 1.5969758929853697e-06, "loss": 1.4016, "step": 9487 }, { "epoch": 1.636849823169154, "grad_norm": 0.59375, "learning_rate": 1.5955006248886519e-06, "loss": 1.386, "step": 9488 }, { "epoch": 1.637022341067886, "grad_norm": 0.65234375, "learning_rate": 1.5940259794497614e-06, "loss": 1.3809, "step": 9489 }, { "epoch": 1.6371948589666179, "grad_norm": 0.56640625, "learning_rate": 1.5925519567779502e-06, "loss": 1.5296, "step": 9490 }, { "epoch": 1.6373673768653498, "grad_norm": 0.59375, "learning_rate": 1.5910785569824217e-06, "loss": 1.4201, "step": 9491 }, { "epoch": 1.6375398947640818, "grad_norm": 0.546875, "learning_rate": 1.5896057801723352e-06, "loss": 1.467, "step": 9492 }, { "epoch": 1.6377124126628138, "grad_norm": 0.74609375, "learning_rate": 1.5881336264568037e-06, "loss": 1.4942, "step": 9493 }, { "epoch": 1.6378849305615457, "grad_norm": 0.5859375, "learning_rate": 1.586662095944892e-06, "loss": 1.4433, "step": 9494 }, { "epoch": 1.6380574484602777, "grad_norm": 0.5859375, "learning_rate": 1.585191188745624e-06, "loss": 1.3434, "step": 9495 }, { "epoch": 1.6382299663590096, "grad_norm": 0.58203125, "learning_rate": 1.5837209049679657e-06, "loss": 1.5629, "step": 9496 }, { "epoch": 1.6384024842577416, "grad_norm": 0.56640625, "learning_rate": 1.582251244720855e-06, "loss": 1.4401, "step": 9497 }, { "epoch": 1.6385750021564738, "grad_norm": 0.67578125, "learning_rate": 1.5807822081131664e-06, "loss": 1.4043, "step": 9498 }, { "epoch": 1.6387475200552057, "grad_norm": 0.71484375, "learning_rate": 1.5793137952537374e-06, "loss": 1.4337, "step": 9499 }, { "epoch": 1.6389200379539377, "grad_norm": 0.578125, "learning_rate": 1.5778460062513578e-06, "loss": 1.4094, "step": 9500 }, { "epoch": 1.6389200379539377, "eval_loss": 1.4071273803710938, "eval_runtime": 11.0018, "eval_samples_per_second": 93.076, "eval_steps_per_second": 23.269, "step": 9500 }, { "epoch": 1.6390925558526697, "grad_norm": 0.58203125, "learning_rate": 1.5763788412147695e-06, "loss": 1.3746, "step": 9501 }, { "epoch": 1.6392650737514018, "grad_norm": 0.59375, "learning_rate": 1.57491230025267e-06, "loss": 1.4003, "step": 9502 }, { "epoch": 1.6394375916501338, "grad_norm": 0.6171875, "learning_rate": 1.5734463834737102e-06, "loss": 1.4765, "step": 9503 }, { "epoch": 1.6396101095488658, "grad_norm": 0.62109375, "learning_rate": 1.5719810909864941e-06, "loss": 1.4375, "step": 9504 }, { "epoch": 1.6397826274475977, "grad_norm": 0.640625, "learning_rate": 1.5705164228995783e-06, "loss": 1.4209, "step": 9505 }, { "epoch": 1.6399551453463297, "grad_norm": 0.66015625, "learning_rate": 1.5690523793214785e-06, "loss": 1.4444, "step": 9506 }, { "epoch": 1.6401276632450617, "grad_norm": 0.5625, "learning_rate": 1.567588960360653e-06, "loss": 1.4155, "step": 9507 }, { "epoch": 1.6403001811437936, "grad_norm": 0.59765625, "learning_rate": 1.56612616612553e-06, "loss": 1.5217, "step": 9508 }, { "epoch": 1.6404726990425256, "grad_norm": 0.58203125, "learning_rate": 1.5646639967244758e-06, "loss": 1.5132, "step": 9509 }, { "epoch": 1.6406452169412575, "grad_norm": 0.5625, "learning_rate": 1.5632024522658185e-06, "loss": 1.3342, "step": 9510 }, { "epoch": 1.6408177348399895, "grad_norm": 0.640625, "learning_rate": 1.56174153285784e-06, "loss": 1.457, "step": 9511 }, { "epoch": 1.6409902527387217, "grad_norm": 0.56640625, "learning_rate": 1.560281238608773e-06, "loss": 1.3921, "step": 9512 }, { "epoch": 1.6411627706374536, "grad_norm": 0.5625, "learning_rate": 1.558821569626805e-06, "loss": 1.4043, "step": 9513 }, { "epoch": 1.6413352885361856, "grad_norm": 0.56640625, "learning_rate": 1.557362526020081e-06, "loss": 1.4605, "step": 9514 }, { "epoch": 1.6415078064349178, "grad_norm": 0.578125, "learning_rate": 1.5559041078966885e-06, "loss": 1.3708, "step": 9515 }, { "epoch": 1.6416803243336497, "grad_norm": 0.6640625, "learning_rate": 1.554446315364685e-06, "loss": 1.35, "step": 9516 }, { "epoch": 1.6418528422323817, "grad_norm": 0.5859375, "learning_rate": 1.552989148532067e-06, "loss": 1.529, "step": 9517 }, { "epoch": 1.6420253601311137, "grad_norm": 0.59765625, "learning_rate": 1.5515326075067916e-06, "loss": 1.3917, "step": 9518 }, { "epoch": 1.6421978780298456, "grad_norm": 0.5859375, "learning_rate": 1.5500766923967692e-06, "loss": 1.4447, "step": 9519 }, { "epoch": 1.6423703959285776, "grad_norm": 0.5859375, "learning_rate": 1.5486214033098613e-06, "loss": 1.3875, "step": 9520 }, { "epoch": 1.6425429138273095, "grad_norm": 0.6171875, "learning_rate": 1.5471667403538904e-06, "loss": 1.5287, "step": 9521 }, { "epoch": 1.6427154317260415, "grad_norm": 0.609375, "learning_rate": 1.5457127036366216e-06, "loss": 1.3733, "step": 9522 }, { "epoch": 1.6428879496247735, "grad_norm": 0.56640625, "learning_rate": 1.5442592932657797e-06, "loss": 1.3913, "step": 9523 }, { "epoch": 1.6430604675235054, "grad_norm": 0.609375, "learning_rate": 1.5428065093490441e-06, "loss": 1.3989, "step": 9524 }, { "epoch": 1.6432329854222374, "grad_norm": 0.60546875, "learning_rate": 1.5413543519940443e-06, "loss": 1.3217, "step": 9525 }, { "epoch": 1.6434055033209696, "grad_norm": 0.578125, "learning_rate": 1.5399028213083666e-06, "loss": 1.3971, "step": 9526 }, { "epoch": 1.6435780212197015, "grad_norm": 0.59375, "learning_rate": 1.5384519173995515e-06, "loss": 1.4748, "step": 9527 }, { "epoch": 1.6437505391184335, "grad_norm": 0.56640625, "learning_rate": 1.5370016403750843e-06, "loss": 1.3221, "step": 9528 }, { "epoch": 1.6439230570171657, "grad_norm": 0.55078125, "learning_rate": 1.5355519903424187e-06, "loss": 1.4242, "step": 9529 }, { "epoch": 1.6440955749158976, "grad_norm": 0.59375, "learning_rate": 1.534102967408948e-06, "loss": 1.4049, "step": 9530 }, { "epoch": 1.6442680928146296, "grad_norm": 0.62109375, "learning_rate": 1.5326545716820273e-06, "loss": 1.2972, "step": 9531 }, { "epoch": 1.6444406107133616, "grad_norm": 0.578125, "learning_rate": 1.531206803268962e-06, "loss": 1.4253, "step": 9532 }, { "epoch": 1.6446131286120935, "grad_norm": 0.5859375, "learning_rate": 1.5297596622770117e-06, "loss": 1.4365, "step": 9533 }, { "epoch": 1.6447856465108255, "grad_norm": 0.61328125, "learning_rate": 1.5283131488133906e-06, "loss": 1.3451, "step": 9534 }, { "epoch": 1.6449581644095574, "grad_norm": 0.6015625, "learning_rate": 1.5268672629852655e-06, "loss": 1.4688, "step": 9535 }, { "epoch": 1.6451306823082894, "grad_norm": 0.5859375, "learning_rate": 1.5254220048997548e-06, "loss": 1.4259, "step": 9536 }, { "epoch": 1.6453032002070214, "grad_norm": 0.5703125, "learning_rate": 1.523977374663934e-06, "loss": 1.3756, "step": 9537 }, { "epoch": 1.6454757181057533, "grad_norm": 0.55859375, "learning_rate": 1.5225333723848335e-06, "loss": 1.4103, "step": 9538 }, { "epoch": 1.6456482360044855, "grad_norm": 0.62890625, "learning_rate": 1.5210899981694238e-06, "loss": 1.5031, "step": 9539 }, { "epoch": 1.6458207539032175, "grad_norm": 0.5703125, "learning_rate": 1.5196472521246518e-06, "loss": 1.405, "step": 9540 }, { "epoch": 1.6459932718019494, "grad_norm": 0.6015625, "learning_rate": 1.5182051343573968e-06, "loss": 1.507, "step": 9541 }, { "epoch": 1.6461657897006814, "grad_norm": 0.57421875, "learning_rate": 1.5167636449745015e-06, "loss": 1.4083, "step": 9542 }, { "epoch": 1.6463383075994136, "grad_norm": 0.5546875, "learning_rate": 1.515322784082761e-06, "loss": 1.3217, "step": 9543 }, { "epoch": 1.6465108254981455, "grad_norm": 0.56640625, "learning_rate": 1.5138825517889232e-06, "loss": 1.4405, "step": 9544 }, { "epoch": 1.6466833433968775, "grad_norm": 0.6328125, "learning_rate": 1.5124429481996904e-06, "loss": 1.3475, "step": 9545 }, { "epoch": 1.6468558612956095, "grad_norm": 0.609375, "learning_rate": 1.5110039734217186e-06, "loss": 1.4315, "step": 9546 }, { "epoch": 1.6470283791943414, "grad_norm": 0.5703125, "learning_rate": 1.50956562756161e-06, "loss": 1.3847, "step": 9547 }, { "epoch": 1.6472008970930734, "grad_norm": 0.54296875, "learning_rate": 1.508127910725934e-06, "loss": 1.362, "step": 9548 }, { "epoch": 1.6473734149918053, "grad_norm": 0.57421875, "learning_rate": 1.5066908230212008e-06, "loss": 1.448, "step": 9549 }, { "epoch": 1.6475459328905373, "grad_norm": 0.62890625, "learning_rate": 1.5052543645538798e-06, "loss": 1.5037, "step": 9550 }, { "epoch": 1.6477184507892693, "grad_norm": 0.58203125, "learning_rate": 1.5038185354303924e-06, "loss": 1.4538, "step": 9551 }, { "epoch": 1.6478909686880012, "grad_norm": 0.5859375, "learning_rate": 1.5023833357571128e-06, "loss": 1.4369, "step": 9552 }, { "epoch": 1.6480634865867334, "grad_norm": 0.59765625, "learning_rate": 1.5009487656403765e-06, "loss": 1.385, "step": 9553 }, { "epoch": 1.6482360044854654, "grad_norm": 0.56640625, "learning_rate": 1.4995148251864566e-06, "loss": 1.331, "step": 9554 }, { "epoch": 1.6484085223841973, "grad_norm": 0.5703125, "learning_rate": 1.4980815145015925e-06, "loss": 1.3528, "step": 9555 }, { "epoch": 1.6485810402829295, "grad_norm": 0.6640625, "learning_rate": 1.4966488336919728e-06, "loss": 1.3618, "step": 9556 }, { "epoch": 1.6487535581816615, "grad_norm": 0.6015625, "learning_rate": 1.4952167828637377e-06, "loss": 1.3758, "step": 9557 }, { "epoch": 1.6489260760803934, "grad_norm": 0.58984375, "learning_rate": 1.4937853621229848e-06, "loss": 1.5034, "step": 9558 }, { "epoch": 1.6490985939791254, "grad_norm": 0.60546875, "learning_rate": 1.4923545715757625e-06, "loss": 1.445, "step": 9559 }, { "epoch": 1.6492711118778574, "grad_norm": 0.55078125, "learning_rate": 1.4909244113280685e-06, "loss": 1.4184, "step": 9560 }, { "epoch": 1.6494436297765893, "grad_norm": 0.6015625, "learning_rate": 1.4894948814858656e-06, "loss": 1.4357, "step": 9561 }, { "epoch": 1.6496161476753213, "grad_norm": 0.67578125, "learning_rate": 1.4880659821550547e-06, "loss": 1.4171, "step": 9562 }, { "epoch": 1.6497886655740532, "grad_norm": 0.59375, "learning_rate": 1.4866377134415022e-06, "loss": 1.4275, "step": 9563 }, { "epoch": 1.6499611834727852, "grad_norm": 0.61328125, "learning_rate": 1.4852100754510213e-06, "loss": 1.4104, "step": 9564 }, { "epoch": 1.6501337013715172, "grad_norm": 0.73046875, "learning_rate": 1.4837830682893806e-06, "loss": 1.4573, "step": 9565 }, { "epoch": 1.6503062192702493, "grad_norm": 0.56640625, "learning_rate": 1.4823566920623022e-06, "loss": 1.3716, "step": 9566 }, { "epoch": 1.6504787371689813, "grad_norm": 0.546875, "learning_rate": 1.4809309468754608e-06, "loss": 1.3767, "step": 9567 }, { "epoch": 1.6506512550677133, "grad_norm": 0.5859375, "learning_rate": 1.4795058328344847e-06, "loss": 1.4028, "step": 9568 }, { "epoch": 1.6508237729664452, "grad_norm": 0.5625, "learning_rate": 1.4780813500449541e-06, "loss": 1.3859, "step": 9569 }, { "epoch": 1.6509962908651774, "grad_norm": 0.56640625, "learning_rate": 1.4766574986124082e-06, "loss": 1.4154, "step": 9570 }, { "epoch": 1.6511688087639094, "grad_norm": 0.5625, "learning_rate": 1.4752342786423257e-06, "loss": 1.4186, "step": 9571 }, { "epoch": 1.6513413266626413, "grad_norm": 0.59765625, "learning_rate": 1.4738116902401567e-06, "loss": 1.3528, "step": 9572 }, { "epoch": 1.6515138445613733, "grad_norm": 0.5859375, "learning_rate": 1.4723897335112913e-06, "loss": 1.397, "step": 9573 }, { "epoch": 1.6516863624601053, "grad_norm": 0.546875, "learning_rate": 1.4709684085610765e-06, "loss": 1.3941, "step": 9574 }, { "epoch": 1.6518588803588372, "grad_norm": 0.546875, "learning_rate": 1.4695477154948134e-06, "loss": 1.479, "step": 9575 }, { "epoch": 1.6520313982575692, "grad_norm": 0.61328125, "learning_rate": 1.468127654417757e-06, "loss": 1.3031, "step": 9576 }, { "epoch": 1.6522039161563011, "grad_norm": 0.59375, "learning_rate": 1.4667082254351127e-06, "loss": 1.4715, "step": 9577 }, { "epoch": 1.652376434055033, "grad_norm": 0.59765625, "learning_rate": 1.4652894286520446e-06, "loss": 1.4589, "step": 9578 }, { "epoch": 1.652548951953765, "grad_norm": 0.5859375, "learning_rate": 1.4638712641736585e-06, "loss": 1.3271, "step": 9579 }, { "epoch": 1.6527214698524972, "grad_norm": 0.625, "learning_rate": 1.4624537321050292e-06, "loss": 1.4969, "step": 9580 }, { "epoch": 1.6528939877512292, "grad_norm": 0.62109375, "learning_rate": 1.4610368325511693e-06, "loss": 1.3658, "step": 9581 }, { "epoch": 1.6530665056499612, "grad_norm": 0.60546875, "learning_rate": 1.4596205656170581e-06, "loss": 1.4521, "step": 9582 }, { "epoch": 1.6532390235486933, "grad_norm": 0.5546875, "learning_rate": 1.4582049314076163e-06, "loss": 1.4213, "step": 9583 }, { "epoch": 1.6534115414474253, "grad_norm": 0.5859375, "learning_rate": 1.4567899300277222e-06, "loss": 1.4668, "step": 9584 }, { "epoch": 1.6535840593461573, "grad_norm": 0.59765625, "learning_rate": 1.4553755615822151e-06, "loss": 1.4904, "step": 9585 }, { "epoch": 1.6537565772448892, "grad_norm": 0.62890625, "learning_rate": 1.4539618261758737e-06, "loss": 1.3653, "step": 9586 }, { "epoch": 1.6539290951436212, "grad_norm": 0.578125, "learning_rate": 1.4525487239134373e-06, "loss": 1.333, "step": 9587 }, { "epoch": 1.6541016130423531, "grad_norm": 0.609375, "learning_rate": 1.4511362548996e-06, "loss": 1.519, "step": 9588 }, { "epoch": 1.654274130941085, "grad_norm": 0.60546875, "learning_rate": 1.4497244192390037e-06, "loss": 1.3489, "step": 9589 }, { "epoch": 1.654446648839817, "grad_norm": 0.578125, "learning_rate": 1.4483132170362468e-06, "loss": 1.4774, "step": 9590 }, { "epoch": 1.654619166738549, "grad_norm": 0.71875, "learning_rate": 1.4469026483958837e-06, "loss": 1.3916, "step": 9591 }, { "epoch": 1.654791684637281, "grad_norm": 0.6015625, "learning_rate": 1.4454927134224085e-06, "loss": 1.4945, "step": 9592 }, { "epoch": 1.654964202536013, "grad_norm": 0.5703125, "learning_rate": 1.4440834122202896e-06, "loss": 1.4431, "step": 9593 }, { "epoch": 1.6551367204347451, "grad_norm": 0.5703125, "learning_rate": 1.4426747448939294e-06, "loss": 1.4583, "step": 9594 }, { "epoch": 1.655309238333477, "grad_norm": 0.58203125, "learning_rate": 1.4412667115476918e-06, "loss": 1.3336, "step": 9595 }, { "epoch": 1.655481756232209, "grad_norm": 0.56640625, "learning_rate": 1.439859312285894e-06, "loss": 1.387, "step": 9596 }, { "epoch": 1.6556542741309412, "grad_norm": 0.5625, "learning_rate": 1.4384525472128052e-06, "loss": 1.4272, "step": 9597 }, { "epoch": 1.6558267920296732, "grad_norm": 0.57421875, "learning_rate": 1.4370464164326458e-06, "loss": 1.4258, "step": 9598 }, { "epoch": 1.6559993099284052, "grad_norm": 0.6015625, "learning_rate": 1.4356409200495924e-06, "loss": 1.4453, "step": 9599 }, { "epoch": 1.6561718278271371, "grad_norm": 0.5625, "learning_rate": 1.4342360581677717e-06, "loss": 1.4543, "step": 9600 }, { "epoch": 1.6561718278271371, "eval_loss": 1.4071323871612549, "eval_runtime": 10.9314, "eval_samples_per_second": 93.675, "eval_steps_per_second": 23.419, "step": 9600 }, { "epoch": 1.656344345725869, "grad_norm": 0.62890625, "learning_rate": 1.4328318308912647e-06, "loss": 1.3847, "step": 9601 }, { "epoch": 1.656516863624601, "grad_norm": 0.81640625, "learning_rate": 1.4314282383241097e-06, "loss": 1.4592, "step": 9602 }, { "epoch": 1.656689381523333, "grad_norm": 0.6015625, "learning_rate": 1.430025280570284e-06, "loss": 1.5076, "step": 9603 }, { "epoch": 1.656861899422065, "grad_norm": 0.5625, "learning_rate": 1.4286229577337373e-06, "loss": 1.3705, "step": 9604 }, { "epoch": 1.657034417320797, "grad_norm": 0.59375, "learning_rate": 1.4272212699183574e-06, "loss": 1.4033, "step": 9605 }, { "epoch": 1.6572069352195289, "grad_norm": 0.54296875, "learning_rate": 1.42582021722799e-06, "loss": 1.4395, "step": 9606 }, { "epoch": 1.657379453118261, "grad_norm": 0.55078125, "learning_rate": 1.4244197997664354e-06, "loss": 1.3369, "step": 9607 }, { "epoch": 1.657551971016993, "grad_norm": 0.58984375, "learning_rate": 1.4230200176374442e-06, "loss": 1.4236, "step": 9608 }, { "epoch": 1.657724488915725, "grad_norm": 0.5625, "learning_rate": 1.421620870944722e-06, "loss": 1.4639, "step": 9609 }, { "epoch": 1.657897006814457, "grad_norm": 1.046875, "learning_rate": 1.4202223597919285e-06, "loss": 1.4062, "step": 9610 }, { "epoch": 1.6580695247131891, "grad_norm": 0.6328125, "learning_rate": 1.4188244842826659e-06, "loss": 1.4875, "step": 9611 }, { "epoch": 1.658242042611921, "grad_norm": 0.625, "learning_rate": 1.4174272445205084e-06, "loss": 1.3593, "step": 9612 }, { "epoch": 1.658414560510653, "grad_norm": 0.59765625, "learning_rate": 1.416030640608963e-06, "loss": 1.4083, "step": 9613 }, { "epoch": 1.658587078409385, "grad_norm": 0.5390625, "learning_rate": 1.414634672651506e-06, "loss": 1.391, "step": 9614 }, { "epoch": 1.658759596308117, "grad_norm": 0.61328125, "learning_rate": 1.4132393407515555e-06, "loss": 1.5134, "step": 9615 }, { "epoch": 1.658932114206849, "grad_norm": 0.62890625, "learning_rate": 1.4118446450124834e-06, "loss": 1.3834, "step": 9616 }, { "epoch": 1.659104632105581, "grad_norm": 0.578125, "learning_rate": 1.4104505855376271e-06, "loss": 1.4039, "step": 9617 }, { "epoch": 1.6592771500043129, "grad_norm": 0.65234375, "learning_rate": 1.4090571624302585e-06, "loss": 1.5073, "step": 9618 }, { "epoch": 1.6594496679030448, "grad_norm": 1.6015625, "learning_rate": 1.4076643757936147e-06, "loss": 1.5064, "step": 9619 }, { "epoch": 1.6596221858017768, "grad_norm": 1.421875, "learning_rate": 1.4062722257308803e-06, "loss": 1.3995, "step": 9620 }, { "epoch": 1.659794703700509, "grad_norm": 0.59765625, "learning_rate": 1.4048807123451958e-06, "loss": 1.4615, "step": 9621 }, { "epoch": 1.659967221599241, "grad_norm": 0.62890625, "learning_rate": 1.4034898357396532e-06, "loss": 1.406, "step": 9622 }, { "epoch": 1.6601397394979729, "grad_norm": 0.5625, "learning_rate": 1.4020995960173001e-06, "loss": 1.4212, "step": 9623 }, { "epoch": 1.660312257396705, "grad_norm": 0.70703125, "learning_rate": 1.4007099932811253e-06, "loss": 1.4811, "step": 9624 }, { "epoch": 1.660484775295437, "grad_norm": 0.59375, "learning_rate": 1.3993210276340895e-06, "loss": 1.4382, "step": 9625 }, { "epoch": 1.660657293194169, "grad_norm": 0.58984375, "learning_rate": 1.3979326991790898e-06, "loss": 1.3531, "step": 9626 }, { "epoch": 1.660829811092901, "grad_norm": 0.58984375, "learning_rate": 1.3965450080189836e-06, "loss": 1.4453, "step": 9627 }, { "epoch": 1.661002328991633, "grad_norm": 0.6015625, "learning_rate": 1.3951579542565808e-06, "loss": 1.4147, "step": 9628 }, { "epoch": 1.6611748468903649, "grad_norm": 0.56640625, "learning_rate": 1.3937715379946414e-06, "loss": 1.3453, "step": 9629 }, { "epoch": 1.6613473647890968, "grad_norm": 0.62890625, "learning_rate": 1.3923857593358813e-06, "loss": 1.4904, "step": 9630 }, { "epoch": 1.6615198826878288, "grad_norm": 0.57421875, "learning_rate": 1.3910006183829671e-06, "loss": 1.3955, "step": 9631 }, { "epoch": 1.6616924005865608, "grad_norm": 0.58203125, "learning_rate": 1.3896161152385178e-06, "loss": 1.3752, "step": 9632 }, { "epoch": 1.6618649184852927, "grad_norm": 0.66015625, "learning_rate": 1.3882322500051072e-06, "loss": 1.4852, "step": 9633 }, { "epoch": 1.6620374363840247, "grad_norm": 0.58203125, "learning_rate": 1.3868490227852626e-06, "loss": 1.305, "step": 9634 }, { "epoch": 1.6622099542827569, "grad_norm": 0.578125, "learning_rate": 1.385466433681456e-06, "loss": 1.3817, "step": 9635 }, { "epoch": 1.6623824721814888, "grad_norm": 0.9609375, "learning_rate": 1.3840844827961263e-06, "loss": 1.4308, "step": 9636 }, { "epoch": 1.6625549900802208, "grad_norm": 0.5859375, "learning_rate": 1.3827031702316495e-06, "loss": 1.4287, "step": 9637 }, { "epoch": 1.662727507978953, "grad_norm": 0.640625, "learning_rate": 1.381322496090367e-06, "loss": 1.4174, "step": 9638 }, { "epoch": 1.662900025877685, "grad_norm": 0.55078125, "learning_rate": 1.3799424604745648e-06, "loss": 1.4176, "step": 9639 }, { "epoch": 1.663072543776417, "grad_norm": 0.55078125, "learning_rate": 1.3785630634864855e-06, "loss": 1.4426, "step": 9640 }, { "epoch": 1.6632450616751489, "grad_norm": 0.61328125, "learning_rate": 1.377184305228324e-06, "loss": 1.4198, "step": 9641 }, { "epoch": 1.6634175795738808, "grad_norm": 0.57421875, "learning_rate": 1.3758061858022276e-06, "loss": 1.3411, "step": 9642 }, { "epoch": 1.6635900974726128, "grad_norm": 0.58203125, "learning_rate": 1.3744287053102944e-06, "loss": 1.4388, "step": 9643 }, { "epoch": 1.6637626153713447, "grad_norm": 0.578125, "learning_rate": 1.3730518638545809e-06, "loss": 1.4606, "step": 9644 }, { "epoch": 1.6639351332700767, "grad_norm": 0.546875, "learning_rate": 1.3716756615370842e-06, "loss": 1.3962, "step": 9645 }, { "epoch": 1.6641076511688087, "grad_norm": 0.61328125, "learning_rate": 1.3703000984597714e-06, "loss": 1.3739, "step": 9646 }, { "epoch": 1.6642801690675406, "grad_norm": 0.59765625, "learning_rate": 1.3689251747245458e-06, "loss": 1.4007, "step": 9647 }, { "epoch": 1.6644526869662728, "grad_norm": 0.59765625, "learning_rate": 1.3675508904332703e-06, "loss": 1.3208, "step": 9648 }, { "epoch": 1.6646252048650048, "grad_norm": 0.5859375, "learning_rate": 1.3661772456877675e-06, "loss": 1.3696, "step": 9649 }, { "epoch": 1.6647977227637367, "grad_norm": 0.609375, "learning_rate": 1.3648042405897987e-06, "loss": 1.3645, "step": 9650 }, { "epoch": 1.6649702406624687, "grad_norm": 0.64453125, "learning_rate": 1.3634318752410868e-06, "loss": 1.4679, "step": 9651 }, { "epoch": 1.6651427585612009, "grad_norm": 0.58203125, "learning_rate": 1.362060149743305e-06, "loss": 1.4031, "step": 9652 }, { "epoch": 1.6653152764599328, "grad_norm": 0.5625, "learning_rate": 1.3606890641980809e-06, "loss": 1.4893, "step": 9653 }, { "epoch": 1.6654877943586648, "grad_norm": 0.55859375, "learning_rate": 1.3593186187069907e-06, "loss": 1.4081, "step": 9654 }, { "epoch": 1.6656603122573967, "grad_norm": 0.59375, "learning_rate": 1.357948813371569e-06, "loss": 1.3926, "step": 9655 }, { "epoch": 1.6658328301561287, "grad_norm": 0.57421875, "learning_rate": 1.3565796482932936e-06, "loss": 1.3133, "step": 9656 }, { "epoch": 1.6660053480548607, "grad_norm": 0.61328125, "learning_rate": 1.3552111235736077e-06, "loss": 1.3755, "step": 9657 }, { "epoch": 1.6661778659535926, "grad_norm": 0.5546875, "learning_rate": 1.3538432393138957e-06, "loss": 1.4162, "step": 9658 }, { "epoch": 1.6663503838523246, "grad_norm": 0.56640625, "learning_rate": 1.3524759956155e-06, "loss": 1.3238, "step": 9659 }, { "epoch": 1.6665229017510566, "grad_norm": 0.5859375, "learning_rate": 1.3511093925797136e-06, "loss": 1.4309, "step": 9660 }, { "epoch": 1.6666954196497885, "grad_norm": 0.6015625, "learning_rate": 1.3497434303077861e-06, "loss": 1.5292, "step": 9661 }, { "epoch": 1.6668679375485207, "grad_norm": 0.54296875, "learning_rate": 1.348378108900913e-06, "loss": 1.3987, "step": 9662 }, { "epoch": 1.6670404554472527, "grad_norm": 0.56640625, "learning_rate": 1.3470134284602487e-06, "loss": 1.4505, "step": 9663 }, { "epoch": 1.6672129733459846, "grad_norm": 0.5625, "learning_rate": 1.345649389086895e-06, "loss": 1.4911, "step": 9664 }, { "epoch": 1.6673854912447168, "grad_norm": 0.59375, "learning_rate": 1.3442859908819095e-06, "loss": 1.3742, "step": 9665 }, { "epoch": 1.6675580091434488, "grad_norm": 0.59375, "learning_rate": 1.342923233946304e-06, "loss": 1.3935, "step": 9666 }, { "epoch": 1.6677305270421807, "grad_norm": 0.5390625, "learning_rate": 1.3415611183810329e-06, "loss": 1.3412, "step": 9667 }, { "epoch": 1.6679030449409127, "grad_norm": 0.58203125, "learning_rate": 1.340199644287018e-06, "loss": 1.3944, "step": 9668 }, { "epoch": 1.6680755628396446, "grad_norm": 0.5546875, "learning_rate": 1.3388388117651186e-06, "loss": 1.3959, "step": 9669 }, { "epoch": 1.6682480807383766, "grad_norm": 0.60546875, "learning_rate": 1.3374786209161617e-06, "loss": 1.4506, "step": 9670 }, { "epoch": 1.6684205986371086, "grad_norm": 0.640625, "learning_rate": 1.3361190718409123e-06, "loss": 1.4873, "step": 9671 }, { "epoch": 1.6685931165358405, "grad_norm": 0.6015625, "learning_rate": 1.3347601646400965e-06, "loss": 1.4537, "step": 9672 }, { "epoch": 1.6687656344345725, "grad_norm": 0.62109375, "learning_rate": 1.3334018994143916e-06, "loss": 1.388, "step": 9673 }, { "epoch": 1.6689381523333044, "grad_norm": 0.6796875, "learning_rate": 1.3320442762644236e-06, "loss": 1.4557, "step": 9674 }, { "epoch": 1.6691106702320364, "grad_norm": 0.5703125, "learning_rate": 1.3306872952907769e-06, "loss": 1.4547, "step": 9675 }, { "epoch": 1.6692831881307686, "grad_norm": 0.5703125, "learning_rate": 1.329330956593985e-06, "loss": 1.4649, "step": 9676 }, { "epoch": 1.6694557060295006, "grad_norm": 0.59375, "learning_rate": 1.3279752602745287e-06, "loss": 1.4182, "step": 9677 }, { "epoch": 1.6696282239282325, "grad_norm": 0.578125, "learning_rate": 1.3266202064328548e-06, "loss": 1.5169, "step": 9678 }, { "epoch": 1.6698007418269647, "grad_norm": 0.67578125, "learning_rate": 1.325265795169348e-06, "loss": 1.4113, "step": 9679 }, { "epoch": 1.6699732597256967, "grad_norm": 0.546875, "learning_rate": 1.3239120265843508e-06, "loss": 1.4695, "step": 9680 }, { "epoch": 1.6701457776244286, "grad_norm": 0.53125, "learning_rate": 1.3225589007781658e-06, "loss": 1.4003, "step": 9681 }, { "epoch": 1.6703182955231606, "grad_norm": 0.59375, "learning_rate": 1.3212064178510342e-06, "loss": 1.373, "step": 9682 }, { "epoch": 1.6704908134218925, "grad_norm": 0.5859375, "learning_rate": 1.319854577903159e-06, "loss": 1.3848, "step": 9683 }, { "epoch": 1.6706633313206245, "grad_norm": 0.578125, "learning_rate": 1.3185033810346926e-06, "loss": 1.4805, "step": 9684 }, { "epoch": 1.6708358492193565, "grad_norm": 0.58984375, "learning_rate": 1.3171528273457402e-06, "loss": 1.4182, "step": 9685 }, { "epoch": 1.6710083671180884, "grad_norm": 0.5625, "learning_rate": 1.3158029169363595e-06, "loss": 1.4491, "step": 9686 }, { "epoch": 1.6711808850168204, "grad_norm": 0.6171875, "learning_rate": 1.314453649906562e-06, "loss": 1.4682, "step": 9687 }, { "epoch": 1.6713534029155523, "grad_norm": 0.6015625, "learning_rate": 1.313105026356304e-06, "loss": 1.4939, "step": 9688 }, { "epoch": 1.6715259208142845, "grad_norm": 0.58203125, "learning_rate": 1.311757046385508e-06, "loss": 1.348, "step": 9689 }, { "epoch": 1.6716984387130165, "grad_norm": 0.60546875, "learning_rate": 1.310409710094035e-06, "loss": 1.5174, "step": 9690 }, { "epoch": 1.6718709566117484, "grad_norm": 0.60546875, "learning_rate": 1.3090630175817042e-06, "loss": 1.4019, "step": 9691 }, { "epoch": 1.6720434745104804, "grad_norm": 0.55859375, "learning_rate": 1.3077169689482893e-06, "loss": 1.3465, "step": 9692 }, { "epoch": 1.6722159924092126, "grad_norm": 0.5859375, "learning_rate": 1.3063715642935137e-06, "loss": 1.4102, "step": 9693 }, { "epoch": 1.6723885103079446, "grad_norm": 0.55078125, "learning_rate": 1.3050268037170533e-06, "loss": 1.3047, "step": 9694 }, { "epoch": 1.6725610282066765, "grad_norm": 0.64453125, "learning_rate": 1.303682687318537e-06, "loss": 1.4796, "step": 9695 }, { "epoch": 1.6727335461054085, "grad_norm": 0.62109375, "learning_rate": 1.30233921519754e-06, "loss": 1.42, "step": 9696 }, { "epoch": 1.6729060640041404, "grad_norm": 0.55078125, "learning_rate": 1.3009963874536025e-06, "loss": 1.4029, "step": 9697 }, { "epoch": 1.6730785819028724, "grad_norm": 0.59765625, "learning_rate": 1.299654204186208e-06, "loss": 1.4715, "step": 9698 }, { "epoch": 1.6732510998016044, "grad_norm": 0.55859375, "learning_rate": 1.2983126654947886e-06, "loss": 1.3625, "step": 9699 }, { "epoch": 1.6734236177003363, "grad_norm": 0.5859375, "learning_rate": 1.2969717714787411e-06, "loss": 1.4229, "step": 9700 }, { "epoch": 1.6734236177003363, "eval_loss": 1.407073736190796, "eval_runtime": 10.8926, "eval_samples_per_second": 94.009, "eval_steps_per_second": 23.502, "step": 9700 }, { "epoch": 1.6735961355990683, "grad_norm": 0.63671875, "learning_rate": 1.2956315222374006e-06, "loss": 1.4324, "step": 9701 }, { "epoch": 1.6737686534978002, "grad_norm": 0.58203125, "learning_rate": 1.2942919178700674e-06, "loss": 1.4398, "step": 9702 }, { "epoch": 1.6739411713965324, "grad_norm": 0.65234375, "learning_rate": 1.2929529584759836e-06, "loss": 1.5529, "step": 9703 }, { "epoch": 1.6741136892952644, "grad_norm": 0.5859375, "learning_rate": 1.2916146441543487e-06, "loss": 1.4504, "step": 9704 }, { "epoch": 1.6742862071939963, "grad_norm": 0.58984375, "learning_rate": 1.290276975004312e-06, "loss": 1.3953, "step": 9705 }, { "epoch": 1.6744587250927285, "grad_norm": 0.58984375, "learning_rate": 1.28893995112498e-06, "loss": 1.4363, "step": 9706 }, { "epoch": 1.6746312429914605, "grad_norm": 0.58203125, "learning_rate": 1.2876035726154046e-06, "loss": 1.3643, "step": 9707 }, { "epoch": 1.6748037608901925, "grad_norm": 0.64453125, "learning_rate": 1.2862678395745954e-06, "loss": 1.4318, "step": 9708 }, { "epoch": 1.6749762787889244, "grad_norm": 0.55859375, "learning_rate": 1.2849327521015076e-06, "loss": 1.4568, "step": 9709 }, { "epoch": 1.6751487966876564, "grad_norm": 0.640625, "learning_rate": 1.283598310295059e-06, "loss": 1.2942, "step": 9710 }, { "epoch": 1.6753213145863883, "grad_norm": 0.59765625, "learning_rate": 1.282264514254109e-06, "loss": 1.3316, "step": 9711 }, { "epoch": 1.6754938324851203, "grad_norm": 0.5625, "learning_rate": 1.2809313640774723e-06, "loss": 1.4064, "step": 9712 }, { "epoch": 1.6756663503838523, "grad_norm": 0.5859375, "learning_rate": 1.279598859863923e-06, "loss": 1.4209, "step": 9713 }, { "epoch": 1.6758388682825842, "grad_norm": 0.58984375, "learning_rate": 1.2782670017121768e-06, "loss": 1.3198, "step": 9714 }, { "epoch": 1.6760113861813162, "grad_norm": 0.5859375, "learning_rate": 1.2769357897209056e-06, "loss": 1.467, "step": 9715 }, { "epoch": 1.6761839040800484, "grad_norm": 0.58984375, "learning_rate": 1.2756052239887362e-06, "loss": 1.3825, "step": 9716 }, { "epoch": 1.6763564219787803, "grad_norm": 0.55859375, "learning_rate": 1.2742753046142442e-06, "loss": 1.4707, "step": 9717 }, { "epoch": 1.6765289398775123, "grad_norm": 0.5625, "learning_rate": 1.2729460316959586e-06, "loss": 1.4252, "step": 9718 }, { "epoch": 1.6767014577762442, "grad_norm": 0.640625, "learning_rate": 1.2716174053323628e-06, "loss": 1.4965, "step": 9719 }, { "epoch": 1.6768739756749764, "grad_norm": 0.6015625, "learning_rate": 1.2702894256218823e-06, "loss": 1.482, "step": 9720 }, { "epoch": 1.6770464935737084, "grad_norm": 0.5859375, "learning_rate": 1.2689620926629108e-06, "loss": 1.3392, "step": 9721 }, { "epoch": 1.6772190114724403, "grad_norm": 0.57421875, "learning_rate": 1.26763540655378e-06, "loss": 1.4667, "step": 9722 }, { "epoch": 1.6773915293711723, "grad_norm": 0.59765625, "learning_rate": 1.2663093673927796e-06, "loss": 1.3934, "step": 9723 }, { "epoch": 1.6775640472699043, "grad_norm": 0.66796875, "learning_rate": 1.2649839752781522e-06, "loss": 1.4194, "step": 9724 }, { "epoch": 1.6777365651686362, "grad_norm": 0.59765625, "learning_rate": 1.263659230308092e-06, "loss": 1.5728, "step": 9725 }, { "epoch": 1.6779090830673682, "grad_norm": 0.5546875, "learning_rate": 1.262335132580742e-06, "loss": 1.4046, "step": 9726 }, { "epoch": 1.6780816009661002, "grad_norm": 0.56640625, "learning_rate": 1.2610116821942032e-06, "loss": 1.4356, "step": 9727 }, { "epoch": 1.6782541188648321, "grad_norm": 0.6015625, "learning_rate": 1.259688879246519e-06, "loss": 1.4939, "step": 9728 }, { "epoch": 1.678426636763564, "grad_norm": 0.5859375, "learning_rate": 1.2583667238356956e-06, "loss": 1.4155, "step": 9729 }, { "epoch": 1.6785991546622963, "grad_norm": 0.640625, "learning_rate": 1.2570452160596859e-06, "loss": 1.3951, "step": 9730 }, { "epoch": 1.6787716725610282, "grad_norm": 0.5546875, "learning_rate": 1.2557243560163955e-06, "loss": 1.419, "step": 9731 }, { "epoch": 1.6789441904597602, "grad_norm": 0.57421875, "learning_rate": 1.2544041438036836e-06, "loss": 1.474, "step": 9732 }, { "epoch": 1.6791167083584921, "grad_norm": 0.6953125, "learning_rate": 1.2530845795193536e-06, "loss": 1.3757, "step": 9733 }, { "epoch": 1.6792892262572243, "grad_norm": 0.578125, "learning_rate": 1.2517656632611753e-06, "loss": 1.4638, "step": 9734 }, { "epoch": 1.6794617441559563, "grad_norm": 0.58203125, "learning_rate": 1.2504473951268559e-06, "loss": 1.3717, "step": 9735 }, { "epoch": 1.6796342620546882, "grad_norm": 0.62890625, "learning_rate": 1.249129775214064e-06, "loss": 1.3255, "step": 9736 }, { "epoch": 1.6798067799534202, "grad_norm": 0.55859375, "learning_rate": 1.2478128036204151e-06, "loss": 1.3849, "step": 9737 }, { "epoch": 1.6799792978521522, "grad_norm": 0.546875, "learning_rate": 1.246496480443481e-06, "loss": 1.4408, "step": 9738 }, { "epoch": 1.6801518157508841, "grad_norm": 0.5546875, "learning_rate": 1.2451808057807813e-06, "loss": 1.4953, "step": 9739 }, { "epoch": 1.680324333649616, "grad_norm": 0.55078125, "learning_rate": 1.2438657797297927e-06, "loss": 1.4011, "step": 9740 }, { "epoch": 1.680496851548348, "grad_norm": 0.5703125, "learning_rate": 1.2425514023879338e-06, "loss": 1.3963, "step": 9741 }, { "epoch": 1.68066936944708, "grad_norm": 0.65625, "learning_rate": 1.24123767385259e-06, "loss": 1.3753, "step": 9742 }, { "epoch": 1.680841887345812, "grad_norm": 0.5859375, "learning_rate": 1.2399245942210847e-06, "loss": 1.3894, "step": 9743 }, { "epoch": 1.6810144052445442, "grad_norm": 0.671875, "learning_rate": 1.2386121635906978e-06, "loss": 1.5035, "step": 9744 }, { "epoch": 1.6811869231432761, "grad_norm": 0.6015625, "learning_rate": 1.2373003820586715e-06, "loss": 1.4379, "step": 9745 }, { "epoch": 1.681359441042008, "grad_norm": 0.6171875, "learning_rate": 1.2359892497221815e-06, "loss": 1.4879, "step": 9746 }, { "epoch": 1.6815319589407403, "grad_norm": 0.6015625, "learning_rate": 1.2346787666783678e-06, "loss": 1.434, "step": 9747 }, { "epoch": 1.6817044768394722, "grad_norm": 0.62890625, "learning_rate": 1.2333689330243204e-06, "loss": 1.3453, "step": 9748 }, { "epoch": 1.6818769947382042, "grad_norm": 0.5703125, "learning_rate": 1.2320597488570774e-06, "loss": 1.3516, "step": 9749 }, { "epoch": 1.6820495126369361, "grad_norm": 0.84765625, "learning_rate": 1.2307512142736344e-06, "loss": 1.4055, "step": 9750 }, { "epoch": 1.682222030535668, "grad_norm": 0.5703125, "learning_rate": 1.2294433293709352e-06, "loss": 1.4123, "step": 9751 }, { "epoch": 1.6823945484344, "grad_norm": 0.5546875, "learning_rate": 1.2281360942458708e-06, "loss": 1.4203, "step": 9752 }, { "epoch": 1.682567066333132, "grad_norm": 0.56640625, "learning_rate": 1.2268295089952986e-06, "loss": 1.4716, "step": 9753 }, { "epoch": 1.682739584231864, "grad_norm": 0.5625, "learning_rate": 1.225523573716012e-06, "loss": 1.4118, "step": 9754 }, { "epoch": 1.682912102130596, "grad_norm": 0.59375, "learning_rate": 1.2242182885047638e-06, "loss": 1.4714, "step": 9755 }, { "epoch": 1.683084620029328, "grad_norm": 0.59765625, "learning_rate": 1.2229136534582586e-06, "loss": 1.4043, "step": 9756 }, { "epoch": 1.68325713792806, "grad_norm": 0.58984375, "learning_rate": 1.2216096686731515e-06, "loss": 1.3979, "step": 9757 }, { "epoch": 1.683429655826792, "grad_norm": 0.6015625, "learning_rate": 1.2203063342460496e-06, "loss": 1.2974, "step": 9758 }, { "epoch": 1.683602173725524, "grad_norm": 0.60546875, "learning_rate": 1.2190036502735158e-06, "loss": 1.3953, "step": 9759 }, { "epoch": 1.683774691624256, "grad_norm": 0.6015625, "learning_rate": 1.2177016168520528e-06, "loss": 1.4185, "step": 9760 }, { "epoch": 1.6839472095229882, "grad_norm": 0.59765625, "learning_rate": 1.216400234078131e-06, "loss": 1.5038, "step": 9761 }, { "epoch": 1.6841197274217201, "grad_norm": 0.55859375, "learning_rate": 1.2150995020481616e-06, "loss": 1.4263, "step": 9762 }, { "epoch": 1.684292245320452, "grad_norm": 0.6015625, "learning_rate": 1.2137994208585125e-06, "loss": 1.4546, "step": 9763 }, { "epoch": 1.684464763219184, "grad_norm": 0.59765625, "learning_rate": 1.2124999906055024e-06, "loss": 1.3523, "step": 9764 }, { "epoch": 1.684637281117916, "grad_norm": 0.55859375, "learning_rate": 1.2112012113853955e-06, "loss": 1.2748, "step": 9765 }, { "epoch": 1.684809799016648, "grad_norm": 0.64453125, "learning_rate": 1.2099030832944224e-06, "loss": 1.4521, "step": 9766 }, { "epoch": 1.68498231691538, "grad_norm": 0.578125, "learning_rate": 1.2086056064287498e-06, "loss": 1.3665, "step": 9767 }, { "epoch": 1.6851548348141119, "grad_norm": 0.578125, "learning_rate": 1.2073087808845052e-06, "loss": 1.433, "step": 9768 }, { "epoch": 1.6853273527128438, "grad_norm": 0.56640625, "learning_rate": 1.206012606757765e-06, "loss": 1.4158, "step": 9769 }, { "epoch": 1.6854998706115758, "grad_norm": 0.61328125, "learning_rate": 1.2047170841445589e-06, "loss": 1.47, "step": 9770 }, { "epoch": 1.685672388510308, "grad_norm": 0.63671875, "learning_rate": 1.2034222131408669e-06, "loss": 1.4627, "step": 9771 }, { "epoch": 1.68584490640904, "grad_norm": 0.59765625, "learning_rate": 1.2021279938426223e-06, "loss": 1.4755, "step": 9772 }, { "epoch": 1.686017424307772, "grad_norm": 0.58984375, "learning_rate": 1.2008344263457029e-06, "loss": 1.4738, "step": 9773 }, { "epoch": 1.686189942206504, "grad_norm": 0.59375, "learning_rate": 1.199541510745954e-06, "loss": 1.3455, "step": 9774 }, { "epoch": 1.686362460105236, "grad_norm": 0.578125, "learning_rate": 1.1982492471391549e-06, "loss": 1.4601, "step": 9775 }, { "epoch": 1.686534978003968, "grad_norm": 0.6015625, "learning_rate": 1.1969576356210466e-06, "loss": 1.4357, "step": 9776 }, { "epoch": 1.6867074959027, "grad_norm": 0.578125, "learning_rate": 1.1956666762873236e-06, "loss": 1.4608, "step": 9777 }, { "epoch": 1.686880013801432, "grad_norm": 0.58203125, "learning_rate": 1.194376369233624e-06, "loss": 1.4586, "step": 9778 }, { "epoch": 1.687052531700164, "grad_norm": 0.61328125, "learning_rate": 1.1930867145555424e-06, "loss": 1.4273, "step": 9779 }, { "epoch": 1.6872250495988959, "grad_norm": 0.59765625, "learning_rate": 1.1917977123486258e-06, "loss": 1.3533, "step": 9780 }, { "epoch": 1.6873975674976278, "grad_norm": 0.5625, "learning_rate": 1.1905093627083698e-06, "loss": 1.3954, "step": 9781 }, { "epoch": 1.6875700853963598, "grad_norm": 0.5859375, "learning_rate": 1.1892216657302247e-06, "loss": 1.4152, "step": 9782 }, { "epoch": 1.6877426032950917, "grad_norm": 0.56640625, "learning_rate": 1.1879346215095932e-06, "loss": 1.3656, "step": 9783 }, { "epoch": 1.6879151211938237, "grad_norm": 0.609375, "learning_rate": 1.1866482301418213e-06, "loss": 1.4807, "step": 9784 }, { "epoch": 1.6880876390925559, "grad_norm": 0.5625, "learning_rate": 1.1853624917222217e-06, "loss": 1.3826, "step": 9785 }, { "epoch": 1.6882601569912878, "grad_norm": 0.6484375, "learning_rate": 1.1840774063460403e-06, "loss": 1.4644, "step": 9786 }, { "epoch": 1.6884326748900198, "grad_norm": 0.609375, "learning_rate": 1.1827929741084931e-06, "loss": 1.4286, "step": 9787 }, { "epoch": 1.688605192788752, "grad_norm": 0.59765625, "learning_rate": 1.1815091951047331e-06, "loss": 1.4263, "step": 9788 }, { "epoch": 1.688777710687484, "grad_norm": 0.59765625, "learning_rate": 1.1802260694298717e-06, "loss": 1.4465, "step": 9789 }, { "epoch": 1.688950228586216, "grad_norm": 0.56640625, "learning_rate": 1.178943597178972e-06, "loss": 1.398, "step": 9790 }, { "epoch": 1.6891227464849479, "grad_norm": 0.71875, "learning_rate": 1.1776617784470469e-06, "loss": 1.4605, "step": 9791 }, { "epoch": 1.6892952643836798, "grad_norm": 0.58984375, "learning_rate": 1.1763806133290623e-06, "loss": 1.4018, "step": 9792 }, { "epoch": 1.6894677822824118, "grad_norm": 0.58203125, "learning_rate": 1.1751001019199337e-06, "loss": 1.4055, "step": 9793 }, { "epoch": 1.6896403001811438, "grad_norm": 0.5546875, "learning_rate": 1.1738202443145307e-06, "loss": 1.4657, "step": 9794 }, { "epoch": 1.6898128180798757, "grad_norm": 0.59375, "learning_rate": 1.172541040607672e-06, "loss": 1.4122, "step": 9795 }, { "epoch": 1.6899853359786077, "grad_norm": 0.78125, "learning_rate": 1.1712624908941318e-06, "loss": 1.3335, "step": 9796 }, { "epoch": 1.6901578538773396, "grad_norm": 0.578125, "learning_rate": 1.1699845952686273e-06, "loss": 1.3836, "step": 9797 }, { "epoch": 1.6903303717760718, "grad_norm": 0.56640625, "learning_rate": 1.1687073538258398e-06, "loss": 1.4557, "step": 9798 }, { "epoch": 1.6905028896748038, "grad_norm": 0.625, "learning_rate": 1.1674307666603901e-06, "loss": 1.4109, "step": 9799 }, { "epoch": 1.6906754075735357, "grad_norm": 0.6015625, "learning_rate": 1.1661548338668572e-06, "loss": 1.3921, "step": 9800 }, { "epoch": 1.6906754075735357, "eval_loss": 1.4071011543273926, "eval_runtime": 10.8438, "eval_samples_per_second": 94.432, "eval_steps_per_second": 23.608, "step": 9800 }, { "epoch": 1.6908479254722677, "grad_norm": 0.57421875, "learning_rate": 1.1648795555397719e-06, "loss": 1.3999, "step": 9801 }, { "epoch": 1.6910204433709999, "grad_norm": 0.6015625, "learning_rate": 1.163604931773612e-06, "loss": 1.5643, "step": 9802 }, { "epoch": 1.6911929612697318, "grad_norm": 0.58984375, "learning_rate": 1.1623309626628121e-06, "loss": 1.4941, "step": 9803 }, { "epoch": 1.6913654791684638, "grad_norm": 0.66015625, "learning_rate": 1.1610576483017566e-06, "loss": 1.5024, "step": 9804 }, { "epoch": 1.6915379970671958, "grad_norm": 0.640625, "learning_rate": 1.1597849887847746e-06, "loss": 1.4233, "step": 9805 }, { "epoch": 1.6917105149659277, "grad_norm": 0.53515625, "learning_rate": 1.1585129842061605e-06, "loss": 1.3673, "step": 9806 }, { "epoch": 1.6918830328646597, "grad_norm": 0.578125, "learning_rate": 1.1572416346601467e-06, "loss": 1.4561, "step": 9807 }, { "epoch": 1.6920555507633916, "grad_norm": 0.546875, "learning_rate": 1.1559709402409236e-06, "loss": 1.3763, "step": 9808 }, { "epoch": 1.6922280686621236, "grad_norm": 0.5859375, "learning_rate": 1.1547009010426368e-06, "loss": 1.423, "step": 9809 }, { "epoch": 1.6924005865608556, "grad_norm": 0.59375, "learning_rate": 1.1534315171593736e-06, "loss": 1.5238, "step": 9810 }, { "epoch": 1.6925731044595875, "grad_norm": 0.625, "learning_rate": 1.1521627886851794e-06, "loss": 1.4278, "step": 9811 }, { "epoch": 1.6927456223583197, "grad_norm": 0.6484375, "learning_rate": 1.1508947157140493e-06, "loss": 1.3774, "step": 9812 }, { "epoch": 1.6929181402570517, "grad_norm": 0.61328125, "learning_rate": 1.1496272983399303e-06, "loss": 1.439, "step": 9813 }, { "epoch": 1.6930906581557836, "grad_norm": 0.58984375, "learning_rate": 1.1483605366567208e-06, "loss": 1.3043, "step": 9814 }, { "epoch": 1.6932631760545158, "grad_norm": 0.58203125, "learning_rate": 1.147094430758272e-06, "loss": 1.4669, "step": 9815 }, { "epoch": 1.6934356939532478, "grad_norm": 0.57421875, "learning_rate": 1.1458289807383804e-06, "loss": 1.4208, "step": 9816 }, { "epoch": 1.6936082118519797, "grad_norm": 0.5546875, "learning_rate": 1.1445641866908042e-06, "loss": 1.4478, "step": 9817 }, { "epoch": 1.6937807297507117, "grad_norm": 0.59375, "learning_rate": 1.1433000487092415e-06, "loss": 1.4366, "step": 9818 }, { "epoch": 1.6939532476494437, "grad_norm": 0.625, "learning_rate": 1.1420365668873535e-06, "loss": 1.5139, "step": 9819 }, { "epoch": 1.6941257655481756, "grad_norm": 0.6484375, "learning_rate": 1.1407737413187426e-06, "loss": 1.4421, "step": 9820 }, { "epoch": 1.6942982834469076, "grad_norm": 0.59765625, "learning_rate": 1.1395115720969663e-06, "loss": 1.3685, "step": 9821 }, { "epoch": 1.6944708013456395, "grad_norm": 0.6328125, "learning_rate": 1.1382500593155377e-06, "loss": 1.443, "step": 9822 }, { "epoch": 1.6946433192443715, "grad_norm": 0.546875, "learning_rate": 1.1369892030679142e-06, "loss": 1.4063, "step": 9823 }, { "epoch": 1.6948158371431035, "grad_norm": 0.58203125, "learning_rate": 1.1357290034475087e-06, "loss": 1.3731, "step": 9824 }, { "epoch": 1.6949883550418354, "grad_norm": 0.59375, "learning_rate": 1.1344694605476859e-06, "loss": 1.3671, "step": 9825 }, { "epoch": 1.6951608729405676, "grad_norm": 0.5546875, "learning_rate": 1.1332105744617605e-06, "loss": 1.4597, "step": 9826 }, { "epoch": 1.6953333908392996, "grad_norm": 0.58203125, "learning_rate": 1.1319523452829973e-06, "loss": 1.3508, "step": 9827 }, { "epoch": 1.6955059087380315, "grad_norm": 0.609375, "learning_rate": 1.1306947731046169e-06, "loss": 1.4448, "step": 9828 }, { "epoch": 1.6956784266367637, "grad_norm": 0.578125, "learning_rate": 1.1294378580197806e-06, "loss": 1.3837, "step": 9829 }, { "epoch": 1.6958509445354957, "grad_norm": 0.5625, "learning_rate": 1.1281816001216183e-06, "loss": 1.3462, "step": 9830 }, { "epoch": 1.6960234624342276, "grad_norm": 0.54296875, "learning_rate": 1.1269259995031955e-06, "loss": 1.4266, "step": 9831 }, { "epoch": 1.6961959803329596, "grad_norm": 0.58203125, "learning_rate": 1.1256710562575346e-06, "loss": 1.4333, "step": 9832 }, { "epoch": 1.6963684982316916, "grad_norm": 0.5625, "learning_rate": 1.124416770477612e-06, "loss": 1.3963, "step": 9833 }, { "epoch": 1.6965410161304235, "grad_norm": 0.5546875, "learning_rate": 1.1231631422563526e-06, "loss": 1.4522, "step": 9834 }, { "epoch": 1.6967135340291555, "grad_norm": 0.55078125, "learning_rate": 1.1219101716866332e-06, "loss": 1.3365, "step": 9835 }, { "epoch": 1.6968860519278874, "grad_norm": 0.63671875, "learning_rate": 1.1206578588612815e-06, "loss": 1.4551, "step": 9836 }, { "epoch": 1.6970585698266194, "grad_norm": 0.5546875, "learning_rate": 1.1194062038730735e-06, "loss": 1.5414, "step": 9837 }, { "epoch": 1.6972310877253514, "grad_norm": 0.59375, "learning_rate": 1.118155206814746e-06, "loss": 1.4245, "step": 9838 }, { "epoch": 1.6974036056240835, "grad_norm": 0.6328125, "learning_rate": 1.1169048677789751e-06, "loss": 1.4564, "step": 9839 }, { "epoch": 1.6975761235228155, "grad_norm": 0.734375, "learning_rate": 1.1156551868583942e-06, "loss": 1.4433, "step": 9840 }, { "epoch": 1.6977486414215475, "grad_norm": 0.6171875, "learning_rate": 1.1144061641455927e-06, "loss": 1.4318, "step": 9841 }, { "epoch": 1.6979211593202794, "grad_norm": 0.60546875, "learning_rate": 1.1131577997331e-06, "loss": 1.4245, "step": 9842 }, { "epoch": 1.6980936772190116, "grad_norm": 0.57421875, "learning_rate": 1.1119100937134052e-06, "loss": 1.5177, "step": 9843 }, { "epoch": 1.6982661951177436, "grad_norm": 0.55859375, "learning_rate": 1.1106630461789459e-06, "loss": 1.4323, "step": 9844 }, { "epoch": 1.6984387130164755, "grad_norm": 0.59375, "learning_rate": 1.109416657222112e-06, "loss": 1.38, "step": 9845 }, { "epoch": 1.6986112309152075, "grad_norm": 0.55859375, "learning_rate": 1.1081709269352426e-06, "loss": 1.5297, "step": 9846 }, { "epoch": 1.6987837488139395, "grad_norm": 0.546875, "learning_rate": 1.1069258554106289e-06, "loss": 1.3105, "step": 9847 }, { "epoch": 1.6989562667126714, "grad_norm": 0.6875, "learning_rate": 1.1056814427405148e-06, "loss": 1.4188, "step": 9848 }, { "epoch": 1.6991287846114034, "grad_norm": 0.5546875, "learning_rate": 1.1044376890170971e-06, "loss": 1.4763, "step": 9849 }, { "epoch": 1.6993013025101353, "grad_norm": 1.0390625, "learning_rate": 1.1031945943325118e-06, "loss": 1.3321, "step": 9850 }, { "epoch": 1.6994738204088673, "grad_norm": 0.58203125, "learning_rate": 1.101952158778865e-06, "loss": 1.3957, "step": 9851 }, { "epoch": 1.6996463383075993, "grad_norm": 0.59765625, "learning_rate": 1.100710382448198e-06, "loss": 1.5045, "step": 9852 }, { "epoch": 1.6998188562063314, "grad_norm": 1.75, "learning_rate": 1.0994692654325111e-06, "loss": 1.5065, "step": 9853 }, { "epoch": 1.6999913741050634, "grad_norm": 0.640625, "learning_rate": 1.0982288078237547e-06, "loss": 1.3886, "step": 9854 }, { "epoch": 1.7001638920037954, "grad_norm": 0.56640625, "learning_rate": 1.096989009713828e-06, "loss": 1.4662, "step": 9855 }, { "epoch": 1.7003364099025275, "grad_norm": 0.5703125, "learning_rate": 1.0957498711945858e-06, "loss": 1.3959, "step": 9856 }, { "epoch": 1.7005089278012595, "grad_norm": 0.63671875, "learning_rate": 1.0945113923578277e-06, "loss": 1.5277, "step": 9857 }, { "epoch": 1.7006814456999915, "grad_norm": 0.54296875, "learning_rate": 1.0932735732953103e-06, "loss": 1.4183, "step": 9858 }, { "epoch": 1.7008539635987234, "grad_norm": 0.57421875, "learning_rate": 1.0920364140987383e-06, "loss": 1.3968, "step": 9859 }, { "epoch": 1.7010264814974554, "grad_norm": 0.5703125, "learning_rate": 1.0907999148597703e-06, "loss": 1.4088, "step": 9860 }, { "epoch": 1.7011989993961874, "grad_norm": 0.6171875, "learning_rate": 1.0895640756700087e-06, "loss": 1.3208, "step": 9861 }, { "epoch": 1.7013715172949193, "grad_norm": 0.6328125, "learning_rate": 1.0883288966210181e-06, "loss": 1.4109, "step": 9862 }, { "epoch": 1.7015440351936513, "grad_norm": 0.58203125, "learning_rate": 1.0870943778043041e-06, "loss": 1.4039, "step": 9863 }, { "epoch": 1.7017165530923832, "grad_norm": 0.89453125, "learning_rate": 1.0858605193113292e-06, "loss": 1.3308, "step": 9864 }, { "epoch": 1.7018890709911152, "grad_norm": 0.546875, "learning_rate": 1.0846273212335046e-06, "loss": 1.3447, "step": 9865 }, { "epoch": 1.7020615888898474, "grad_norm": 0.5703125, "learning_rate": 1.083394783662194e-06, "loss": 1.4377, "step": 9866 }, { "epoch": 1.7022341067885793, "grad_norm": 0.5859375, "learning_rate": 1.0821629066887118e-06, "loss": 1.4277, "step": 9867 }, { "epoch": 1.7024066246873113, "grad_norm": 0.55859375, "learning_rate": 1.0809316904043243e-06, "loss": 1.3596, "step": 9868 }, { "epoch": 1.7025791425860433, "grad_norm": 0.56640625, "learning_rate": 1.0797011349002418e-06, "loss": 1.3799, "step": 9869 }, { "epoch": 1.7027516604847754, "grad_norm": 0.62890625, "learning_rate": 1.0784712402676412e-06, "loss": 1.5748, "step": 9870 }, { "epoch": 1.7029241783835074, "grad_norm": 0.5625, "learning_rate": 1.0772420065976319e-06, "loss": 1.4519, "step": 9871 }, { "epoch": 1.7030966962822394, "grad_norm": 0.55859375, "learning_rate": 1.0760134339812856e-06, "loss": 1.4332, "step": 9872 }, { "epoch": 1.7032692141809713, "grad_norm": 1.6640625, "learning_rate": 1.0747855225096272e-06, "loss": 1.376, "step": 9873 }, { "epoch": 1.7034417320797033, "grad_norm": 0.5859375, "learning_rate": 1.0735582722736205e-06, "loss": 1.3404, "step": 9874 }, { "epoch": 1.7036142499784352, "grad_norm": 0.5546875, "learning_rate": 1.0723316833641961e-06, "loss": 1.3909, "step": 9875 }, { "epoch": 1.7037867678771672, "grad_norm": 0.578125, "learning_rate": 1.0711057558722216e-06, "loss": 1.3385, "step": 9876 }, { "epoch": 1.7039592857758992, "grad_norm": 0.5859375, "learning_rate": 1.069880489888523e-06, "loss": 1.4349, "step": 9877 }, { "epoch": 1.7041318036746311, "grad_norm": 0.6328125, "learning_rate": 1.0686558855038753e-06, "loss": 1.4143, "step": 9878 }, { "epoch": 1.704304321573363, "grad_norm": 0.56640625, "learning_rate": 1.0674319428090052e-06, "loss": 1.489, "step": 9879 }, { "epoch": 1.7044768394720953, "grad_norm": 0.55859375, "learning_rate": 1.0662086618945911e-06, "loss": 1.4414, "step": 9880 }, { "epoch": 1.7046493573708272, "grad_norm": 0.5546875, "learning_rate": 1.0649860428512604e-06, "loss": 1.3912, "step": 9881 }, { "epoch": 1.7048218752695592, "grad_norm": 0.578125, "learning_rate": 1.0637640857695897e-06, "loss": 1.408, "step": 9882 }, { "epoch": 1.7049943931682912, "grad_norm": 0.58984375, "learning_rate": 1.0625427907401154e-06, "loss": 1.3399, "step": 9883 }, { "epoch": 1.7051669110670233, "grad_norm": 0.58984375, "learning_rate": 1.0613221578533128e-06, "loss": 1.3745, "step": 9884 }, { "epoch": 1.7053394289657553, "grad_norm": 0.58203125, "learning_rate": 1.0601021871996154e-06, "loss": 1.302, "step": 9885 }, { "epoch": 1.7055119468644873, "grad_norm": 0.59765625, "learning_rate": 1.0588828788694082e-06, "loss": 1.4234, "step": 9886 }, { "epoch": 1.7056844647632192, "grad_norm": 0.578125, "learning_rate": 1.057664232953024e-06, "loss": 1.436, "step": 9887 }, { "epoch": 1.7058569826619512, "grad_norm": 0.55859375, "learning_rate": 1.0564462495407468e-06, "loss": 1.3744, "step": 9888 }, { "epoch": 1.7060295005606831, "grad_norm": 0.83203125, "learning_rate": 1.0552289287228145e-06, "loss": 1.5192, "step": 9889 }, { "epoch": 1.706202018459415, "grad_norm": 0.7109375, "learning_rate": 1.0540122705894117e-06, "loss": 1.4054, "step": 9890 }, { "epoch": 1.706374536358147, "grad_norm": 0.625, "learning_rate": 1.0527962752306776e-06, "loss": 1.4689, "step": 9891 }, { "epoch": 1.706547054256879, "grad_norm": 0.65234375, "learning_rate": 1.051580942736702e-06, "loss": 1.4161, "step": 9892 }, { "epoch": 1.706719572155611, "grad_norm": 0.640625, "learning_rate": 1.0503662731975184e-06, "loss": 1.3389, "step": 9893 }, { "epoch": 1.7068920900543432, "grad_norm": 0.546875, "learning_rate": 1.049152266703124e-06, "loss": 1.4677, "step": 9894 }, { "epoch": 1.7070646079530751, "grad_norm": 0.5546875, "learning_rate": 1.0479389233434566e-06, "loss": 1.3986, "step": 9895 }, { "epoch": 1.707237125851807, "grad_norm": 0.61328125, "learning_rate": 1.0467262432084092e-06, "loss": 1.497, "step": 9896 }, { "epoch": 1.7074096437505393, "grad_norm": 0.71484375, "learning_rate": 1.0455142263878226e-06, "loss": 1.4305, "step": 9897 }, { "epoch": 1.7075821616492712, "grad_norm": 0.62890625, "learning_rate": 1.0443028729714944e-06, "loss": 1.4552, "step": 9898 }, { "epoch": 1.7077546795480032, "grad_norm": 0.57421875, "learning_rate": 1.0430921830491657e-06, "loss": 1.3979, "step": 9899 }, { "epoch": 1.7079271974467352, "grad_norm": 0.55859375, "learning_rate": 1.0418821567105353e-06, "loss": 1.3842, "step": 9900 }, { "epoch": 1.7079271974467352, "eval_loss": 1.4070851802825928, "eval_runtime": 10.7355, "eval_samples_per_second": 95.384, "eval_steps_per_second": 23.846, "step": 9900 }, { "epoch": 1.7080997153454671, "grad_norm": 0.59765625, "learning_rate": 1.0406727940452443e-06, "loss": 1.5596, "step": 9901 }, { "epoch": 1.708272233244199, "grad_norm": 0.66796875, "learning_rate": 1.0394640951428968e-06, "loss": 1.416, "step": 9902 }, { "epoch": 1.708444751142931, "grad_norm": 0.609375, "learning_rate": 1.038256060093036e-06, "loss": 1.4251, "step": 9903 }, { "epoch": 1.708617269041663, "grad_norm": 0.5703125, "learning_rate": 1.0370486889851616e-06, "loss": 1.3457, "step": 9904 }, { "epoch": 1.708789786940395, "grad_norm": 0.5625, "learning_rate": 1.0358419819087228e-06, "loss": 1.434, "step": 9905 }, { "epoch": 1.708962304839127, "grad_norm": 0.5703125, "learning_rate": 1.034635938953119e-06, "loss": 1.3788, "step": 9906 }, { "epoch": 1.709134822737859, "grad_norm": 0.6015625, "learning_rate": 1.0334305602077067e-06, "loss": 1.4269, "step": 9907 }, { "epoch": 1.709307340636591, "grad_norm": 0.78515625, "learning_rate": 1.0322258457617828e-06, "loss": 1.4744, "step": 9908 }, { "epoch": 1.709479858535323, "grad_norm": 0.5546875, "learning_rate": 1.0310217957046009e-06, "loss": 1.3707, "step": 9909 }, { "epoch": 1.709652376434055, "grad_norm": 0.58203125, "learning_rate": 1.029818410125365e-06, "loss": 1.4436, "step": 9910 }, { "epoch": 1.7098248943327872, "grad_norm": 0.5703125, "learning_rate": 1.0286156891132303e-06, "loss": 1.3644, "step": 9911 }, { "epoch": 1.7099974122315191, "grad_norm": 0.58203125, "learning_rate": 1.0274136327573004e-06, "loss": 1.4127, "step": 9912 }, { "epoch": 1.710169930130251, "grad_norm": 0.53125, "learning_rate": 1.0262122411466346e-06, "loss": 1.3324, "step": 9913 }, { "epoch": 1.710342448028983, "grad_norm": 0.671875, "learning_rate": 1.0250115143702321e-06, "loss": 1.5323, "step": 9914 }, { "epoch": 1.710514965927715, "grad_norm": 0.5703125, "learning_rate": 1.0238114525170595e-06, "loss": 1.4241, "step": 9915 }, { "epoch": 1.710687483826447, "grad_norm": 0.55859375, "learning_rate": 1.0226120556760178e-06, "loss": 1.4542, "step": 9916 }, { "epoch": 1.710860001725179, "grad_norm": 0.5625, "learning_rate": 1.021413323935969e-06, "loss": 1.4219, "step": 9917 }, { "epoch": 1.711032519623911, "grad_norm": 0.56640625, "learning_rate": 1.0202152573857216e-06, "loss": 1.3185, "step": 9918 }, { "epoch": 1.7112050375226429, "grad_norm": 0.59765625, "learning_rate": 1.0190178561140363e-06, "loss": 1.4635, "step": 9919 }, { "epoch": 1.7113775554213748, "grad_norm": 0.66015625, "learning_rate": 1.0178211202096233e-06, "loss": 1.3594, "step": 9920 }, { "epoch": 1.711550073320107, "grad_norm": 0.6015625, "learning_rate": 1.0166250497611462e-06, "loss": 1.4801, "step": 9921 }, { "epoch": 1.711722591218839, "grad_norm": 0.58984375, "learning_rate": 1.0154296448572154e-06, "loss": 1.392, "step": 9922 }, { "epoch": 1.711895109117571, "grad_norm": 0.57421875, "learning_rate": 1.0142349055863942e-06, "loss": 1.434, "step": 9923 }, { "epoch": 1.712067627016303, "grad_norm": 0.56640625, "learning_rate": 1.0130408320371998e-06, "loss": 1.4354, "step": 9924 }, { "epoch": 1.712240144915035, "grad_norm": 0.5546875, "learning_rate": 1.0118474242980892e-06, "loss": 1.3889, "step": 9925 }, { "epoch": 1.712412662813767, "grad_norm": 0.5859375, "learning_rate": 1.010654682457486e-06, "loss": 1.4636, "step": 9926 }, { "epoch": 1.712585180712499, "grad_norm": 0.55859375, "learning_rate": 1.0094626066037506e-06, "loss": 1.4264, "step": 9927 }, { "epoch": 1.712757698611231, "grad_norm": 0.60546875, "learning_rate": 1.0082711968252e-06, "loss": 1.4446, "step": 9928 }, { "epoch": 1.712930216509963, "grad_norm": 0.59765625, "learning_rate": 1.0070804532101019e-06, "loss": 1.4269, "step": 9929 }, { "epoch": 1.7131027344086949, "grad_norm": 0.6015625, "learning_rate": 1.0058903758466743e-06, "loss": 1.4745, "step": 9930 }, { "epoch": 1.7132752523074268, "grad_norm": 0.578125, "learning_rate": 1.0047009648230865e-06, "loss": 1.5025, "step": 9931 }, { "epoch": 1.7134477702061588, "grad_norm": 0.62890625, "learning_rate": 1.0035122202274572e-06, "loss": 1.5489, "step": 9932 }, { "epoch": 1.7136202881048908, "grad_norm": 0.60546875, "learning_rate": 1.0023241421478513e-06, "loss": 1.3831, "step": 9933 }, { "epoch": 1.7137928060036227, "grad_norm": 0.625, "learning_rate": 1.0011367306722975e-06, "loss": 1.3894, "step": 9934 }, { "epoch": 1.713965323902355, "grad_norm": 0.55859375, "learning_rate": 9.999499858887585e-07, "loss": 1.3981, "step": 9935 }, { "epoch": 1.7141378418010869, "grad_norm": 0.54296875, "learning_rate": 9.987639078851629e-07, "loss": 1.3585, "step": 9936 }, { "epoch": 1.7143103596998188, "grad_norm": 0.71875, "learning_rate": 9.975784967493774e-07, "loss": 1.4269, "step": 9937 }, { "epoch": 1.714482877598551, "grad_norm": 0.5625, "learning_rate": 9.963937525692247e-07, "loss": 1.4254, "step": 9938 }, { "epoch": 1.714655395497283, "grad_norm": 0.59375, "learning_rate": 9.952096754324847e-07, "loss": 1.5058, "step": 9939 }, { "epoch": 1.714827913396015, "grad_norm": 0.5546875, "learning_rate": 9.940262654268729e-07, "loss": 1.4928, "step": 9940 }, { "epoch": 1.7150004312947469, "grad_norm": 0.55859375, "learning_rate": 9.928435226400678e-07, "loss": 1.3425, "step": 9941 }, { "epoch": 1.7151729491934788, "grad_norm": 0.71875, "learning_rate": 9.916614471596953e-07, "loss": 1.456, "step": 9942 }, { "epoch": 1.7153454670922108, "grad_norm": 0.5703125, "learning_rate": 9.904800390733283e-07, "loss": 1.4231, "step": 9943 }, { "epoch": 1.7155179849909428, "grad_norm": 0.5859375, "learning_rate": 9.892992984684935e-07, "loss": 1.3841, "step": 9944 }, { "epoch": 1.7156905028896747, "grad_norm": 0.6015625, "learning_rate": 9.88119225432671e-07, "loss": 1.4788, "step": 9945 }, { "epoch": 1.7158630207884067, "grad_norm": 0.58203125, "learning_rate": 9.86939820053281e-07, "loss": 1.4304, "step": 9946 }, { "epoch": 1.7160355386871387, "grad_norm": 0.66796875, "learning_rate": 9.857610824177088e-07, "loss": 1.3979, "step": 9947 }, { "epoch": 1.7162080565858708, "grad_norm": 0.71484375, "learning_rate": 9.845830126132782e-07, "loss": 1.3864, "step": 9948 }, { "epoch": 1.7163805744846028, "grad_norm": 0.78125, "learning_rate": 9.834056107272682e-07, "loss": 1.4578, "step": 9949 }, { "epoch": 1.7165530923833348, "grad_norm": 0.78515625, "learning_rate": 9.82228876846909e-07, "loss": 1.4952, "step": 9950 }, { "epoch": 1.7167256102820667, "grad_norm": 0.56640625, "learning_rate": 9.810528110593798e-07, "loss": 1.4029, "step": 9951 }, { "epoch": 1.716898128180799, "grad_norm": 0.60546875, "learning_rate": 9.798774134518119e-07, "loss": 1.446, "step": 9952 }, { "epoch": 1.7170706460795309, "grad_norm": 0.60546875, "learning_rate": 9.787026841112856e-07, "loss": 1.2539, "step": 9953 }, { "epoch": 1.7172431639782628, "grad_norm": 0.59765625, "learning_rate": 9.775286231248316e-07, "loss": 1.4176, "step": 9954 }, { "epoch": 1.7174156818769948, "grad_norm": 0.5625, "learning_rate": 9.763552305794323e-07, "loss": 1.3271, "step": 9955 }, { "epoch": 1.7175881997757267, "grad_norm": 0.578125, "learning_rate": 9.751825065620223e-07, "loss": 1.5245, "step": 9956 }, { "epoch": 1.7177607176744587, "grad_norm": 0.640625, "learning_rate": 9.740104511594783e-07, "loss": 1.5093, "step": 9957 }, { "epoch": 1.7179332355731907, "grad_norm": 0.59375, "learning_rate": 9.728390644586394e-07, "loss": 1.4256, "step": 9958 }, { "epoch": 1.7181057534719226, "grad_norm": 0.6171875, "learning_rate": 9.716683465462862e-07, "loss": 1.3404, "step": 9959 }, { "epoch": 1.7182782713706546, "grad_norm": 0.56640625, "learning_rate": 9.704982975091538e-07, "loss": 1.4166, "step": 9960 }, { "epoch": 1.7184507892693865, "grad_norm": 0.5546875, "learning_rate": 9.69328917433926e-07, "loss": 1.3496, "step": 9961 }, { "epoch": 1.7186233071681187, "grad_norm": 0.61328125, "learning_rate": 9.681602064072382e-07, "loss": 1.3654, "step": 9962 }, { "epoch": 1.7187958250668507, "grad_norm": 0.578125, "learning_rate": 9.669921645156755e-07, "loss": 1.4074, "step": 9963 }, { "epoch": 1.7189683429655827, "grad_norm": 0.56640625, "learning_rate": 9.658247918457763e-07, "loss": 1.3444, "step": 9964 }, { "epoch": 1.7191408608643148, "grad_norm": 0.8125, "learning_rate": 9.646580884840207e-07, "loss": 1.5314, "step": 9965 }, { "epoch": 1.7193133787630468, "grad_norm": 0.546875, "learning_rate": 9.634920545168535e-07, "loss": 1.4694, "step": 9966 }, { "epoch": 1.7194858966617788, "grad_norm": 0.578125, "learning_rate": 9.623266900306538e-07, "loss": 1.421, "step": 9967 }, { "epoch": 1.7196584145605107, "grad_norm": 0.58203125, "learning_rate": 9.611619951117657e-07, "loss": 1.4543, "step": 9968 }, { "epoch": 1.7198309324592427, "grad_norm": 0.6171875, "learning_rate": 9.599979698464733e-07, "loss": 1.4106, "step": 9969 }, { "epoch": 1.7200034503579746, "grad_norm": 0.57421875, "learning_rate": 9.588346143210137e-07, "loss": 1.4361, "step": 9970 }, { "epoch": 1.7201759682567066, "grad_norm": 0.5859375, "learning_rate": 9.576719286215818e-07, "loss": 1.3925, "step": 9971 }, { "epoch": 1.7203484861554386, "grad_norm": 0.6328125, "learning_rate": 9.565099128343103e-07, "loss": 1.4657, "step": 9972 }, { "epoch": 1.7205210040541705, "grad_norm": 0.96484375, "learning_rate": 9.553485670452911e-07, "loss": 1.3915, "step": 9973 }, { "epoch": 1.7206935219529025, "grad_norm": 0.5859375, "learning_rate": 9.541878913405633e-07, "loss": 1.4143, "step": 9974 }, { "epoch": 1.7208660398516344, "grad_norm": 0.5859375, "learning_rate": 9.530278858061171e-07, "loss": 1.4653, "step": 9975 }, { "epoch": 1.7210385577503666, "grad_norm": 0.5703125, "learning_rate": 9.518685505278935e-07, "loss": 1.4554, "step": 9976 }, { "epoch": 1.7212110756490986, "grad_norm": 0.59765625, "learning_rate": 9.507098855917851e-07, "loss": 1.5149, "step": 9977 }, { "epoch": 1.7213835935478305, "grad_norm": 0.578125, "learning_rate": 9.495518910836276e-07, "loss": 1.555, "step": 9978 }, { "epoch": 1.7215561114465627, "grad_norm": 0.55078125, "learning_rate": 9.483945670892191e-07, "loss": 1.4126, "step": 9979 }, { "epoch": 1.7217286293452947, "grad_norm": 0.578125, "learning_rate": 9.472379136942955e-07, "loss": 1.4291, "step": 9980 }, { "epoch": 1.7219011472440267, "grad_norm": 0.5859375, "learning_rate": 9.460819309845526e-07, "loss": 1.3575, "step": 9981 }, { "epoch": 1.7220736651427586, "grad_norm": 0.625, "learning_rate": 9.449266190456318e-07, "loss": 1.4072, "step": 9982 }, { "epoch": 1.7222461830414906, "grad_norm": 0.578125, "learning_rate": 9.437719779631249e-07, "loss": 1.4664, "step": 9983 }, { "epoch": 1.7224187009402225, "grad_norm": 0.5546875, "learning_rate": 9.426180078225766e-07, "loss": 1.4289, "step": 9984 }, { "epoch": 1.7225912188389545, "grad_norm": 0.58984375, "learning_rate": 9.414647087094786e-07, "loss": 1.3887, "step": 9985 }, { "epoch": 1.7227637367376865, "grad_norm": 0.57421875, "learning_rate": 9.403120807092759e-07, "loss": 1.4219, "step": 9986 }, { "epoch": 1.7229362546364184, "grad_norm": 0.56640625, "learning_rate": 9.391601239073611e-07, "loss": 1.3415, "step": 9987 }, { "epoch": 1.7231087725351504, "grad_norm": 0.56640625, "learning_rate": 9.380088383890818e-07, "loss": 1.3889, "step": 9988 }, { "epoch": 1.7232812904338826, "grad_norm": 0.6328125, "learning_rate": 9.368582242397262e-07, "loss": 1.3631, "step": 9989 }, { "epoch": 1.7234538083326145, "grad_norm": 0.6015625, "learning_rate": 9.357082815445451e-07, "loss": 1.4397, "step": 9990 }, { "epoch": 1.7236263262313465, "grad_norm": 0.5546875, "learning_rate": 9.345590103887292e-07, "loss": 1.4168, "step": 9991 }, { "epoch": 1.7237988441300784, "grad_norm": 0.56640625, "learning_rate": 9.33410410857426e-07, "loss": 1.4388, "step": 9992 }, { "epoch": 1.7239713620288106, "grad_norm": 0.60546875, "learning_rate": 9.322624830357297e-07, "loss": 1.3092, "step": 9993 }, { "epoch": 1.7241438799275426, "grad_norm": 0.56640625, "learning_rate": 9.311152270086865e-07, "loss": 1.3909, "step": 9994 }, { "epoch": 1.7243163978262745, "grad_norm": 0.6171875, "learning_rate": 9.29968642861293e-07, "loss": 1.4871, "step": 9995 }, { "epoch": 1.7244889157250065, "grad_norm": 0.5546875, "learning_rate": 9.288227306784936e-07, "loss": 1.403, "step": 9996 }, { "epoch": 1.7246614336237385, "grad_norm": 0.71484375, "learning_rate": 9.276774905451868e-07, "loss": 1.4458, "step": 9997 }, { "epoch": 1.7248339515224704, "grad_norm": 0.5859375, "learning_rate": 9.265329225462183e-07, "loss": 1.4917, "step": 9998 }, { "epoch": 1.7250064694212024, "grad_norm": 0.5546875, "learning_rate": 9.253890267663824e-07, "loss": 1.4892, "step": 9999 }, { "epoch": 1.7251789873199344, "grad_norm": 0.70703125, "learning_rate": 9.242458032904311e-07, "loss": 1.5056, "step": 10000 }, { "epoch": 1.7251789873199344, "eval_loss": 1.4070346355438232, "eval_runtime": 10.8088, "eval_samples_per_second": 94.738, "eval_steps_per_second": 23.684, "step": 10000 }, { "epoch": 1.7253515052186663, "grad_norm": 0.5859375, "learning_rate": 9.231032522030569e-07, "loss": 1.508, "step": 10001 }, { "epoch": 1.7255240231173983, "grad_norm": 0.6171875, "learning_rate": 9.219613735889066e-07, "loss": 1.4105, "step": 10002 }, { "epoch": 1.7256965410161305, "grad_norm": 0.59375, "learning_rate": 9.208201675325834e-07, "loss": 1.4321, "step": 10003 }, { "epoch": 1.7258690589148624, "grad_norm": 0.58203125, "learning_rate": 9.19679634118631e-07, "loss": 1.3757, "step": 10004 }, { "epoch": 1.7260415768135944, "grad_norm": 0.56640625, "learning_rate": 9.185397734315471e-07, "loss": 1.4183, "step": 10005 }, { "epoch": 1.7262140947123266, "grad_norm": 0.640625, "learning_rate": 9.174005855557799e-07, "loss": 1.3735, "step": 10006 }, { "epoch": 1.7263866126110585, "grad_norm": 0.609375, "learning_rate": 9.162620705757286e-07, "loss": 1.4336, "step": 10007 }, { "epoch": 1.7265591305097905, "grad_norm": 0.55859375, "learning_rate": 9.1512422857574e-07, "loss": 1.4212, "step": 10008 }, { "epoch": 1.7267316484085224, "grad_norm": 0.60546875, "learning_rate": 9.139870596401168e-07, "loss": 1.493, "step": 10009 }, { "epoch": 1.7269041663072544, "grad_norm": 0.59765625, "learning_rate": 9.128505638530993e-07, "loss": 1.4634, "step": 10010 }, { "epoch": 1.7270766842059864, "grad_norm": 0.61328125, "learning_rate": 9.117147412988958e-07, "loss": 1.3975, "step": 10011 }, { "epoch": 1.7272492021047183, "grad_norm": 0.55859375, "learning_rate": 9.105795920616478e-07, "loss": 1.4103, "step": 10012 }, { "epoch": 1.7274217200034503, "grad_norm": 0.55078125, "learning_rate": 9.09445116225458e-07, "loss": 1.3861, "step": 10013 }, { "epoch": 1.7275942379021823, "grad_norm": 0.58203125, "learning_rate": 9.083113138743738e-07, "loss": 1.3988, "step": 10014 }, { "epoch": 1.7277667558009142, "grad_norm": 0.58203125, "learning_rate": 9.071781850923944e-07, "loss": 1.3857, "step": 10015 }, { "epoch": 1.7279392736996462, "grad_norm": 0.578125, "learning_rate": 9.060457299634706e-07, "loss": 1.4469, "step": 10016 }, { "epoch": 1.7281117915983784, "grad_norm": 0.58984375, "learning_rate": 9.049139485714997e-07, "loss": 1.4281, "step": 10017 }, { "epoch": 1.7282843094971103, "grad_norm": 0.640625, "learning_rate": 9.037828410003336e-07, "loss": 1.4445, "step": 10018 }, { "epoch": 1.7284568273958423, "grad_norm": 0.61328125, "learning_rate": 9.026524073337695e-07, "loss": 1.3679, "step": 10019 }, { "epoch": 1.7286293452945745, "grad_norm": 0.6015625, "learning_rate": 9.015226476555594e-07, "loss": 1.4261, "step": 10020 }, { "epoch": 1.7288018631933064, "grad_norm": 0.58203125, "learning_rate": 9.003935620493987e-07, "loss": 1.5309, "step": 10021 }, { "epoch": 1.7289743810920384, "grad_norm": 0.56640625, "learning_rate": 8.992651505989425e-07, "loss": 1.3974, "step": 10022 }, { "epoch": 1.7291468989907703, "grad_norm": 0.59375, "learning_rate": 8.981374133877851e-07, "loss": 1.4646, "step": 10023 }, { "epoch": 1.7293194168895023, "grad_norm": 0.59765625, "learning_rate": 8.970103504994832e-07, "loss": 1.5218, "step": 10024 }, { "epoch": 1.7294919347882343, "grad_norm": 0.578125, "learning_rate": 8.958839620175297e-07, "loss": 1.4581, "step": 10025 }, { "epoch": 1.7296644526869662, "grad_norm": 0.57421875, "learning_rate": 8.94758248025378e-07, "loss": 1.3696, "step": 10026 }, { "epoch": 1.7298369705856982, "grad_norm": 0.61328125, "learning_rate": 8.936332086064281e-07, "loss": 1.4554, "step": 10027 }, { "epoch": 1.7300094884844301, "grad_norm": 0.55859375, "learning_rate": 8.925088438440288e-07, "loss": 1.4383, "step": 10028 }, { "epoch": 1.730182006383162, "grad_norm": 0.62109375, "learning_rate": 8.913851538214802e-07, "loss": 1.4926, "step": 10029 }, { "epoch": 1.7303545242818943, "grad_norm": 0.609375, "learning_rate": 8.902621386220355e-07, "loss": 1.3719, "step": 10030 }, { "epoch": 1.7305270421806263, "grad_norm": 0.54296875, "learning_rate": 8.891397983288874e-07, "loss": 1.4848, "step": 10031 }, { "epoch": 1.7306995600793582, "grad_norm": 0.54296875, "learning_rate": 8.880181330251935e-07, "loss": 1.4134, "step": 10032 }, { "epoch": 1.7308720779780902, "grad_norm": 0.58984375, "learning_rate": 8.868971427940498e-07, "loss": 1.4186, "step": 10033 }, { "epoch": 1.7310445958768224, "grad_norm": 0.70703125, "learning_rate": 8.857768277185041e-07, "loss": 1.4711, "step": 10034 }, { "epoch": 1.7312171137755543, "grad_norm": 0.56640625, "learning_rate": 8.846571878815647e-07, "loss": 1.4353, "step": 10035 }, { "epoch": 1.7313896316742863, "grad_norm": 0.609375, "learning_rate": 8.835382233661727e-07, "loss": 1.4302, "step": 10036 }, { "epoch": 1.7315621495730182, "grad_norm": 0.5703125, "learning_rate": 8.82419934255232e-07, "loss": 1.4478, "step": 10037 }, { "epoch": 1.7317346674717502, "grad_norm": 0.578125, "learning_rate": 8.813023206315918e-07, "loss": 1.3949, "step": 10038 }, { "epoch": 1.7319071853704822, "grad_norm": 0.55078125, "learning_rate": 8.801853825780516e-07, "loss": 1.3945, "step": 10039 }, { "epoch": 1.7320797032692141, "grad_norm": 0.5546875, "learning_rate": 8.790691201773616e-07, "loss": 1.4858, "step": 10040 }, { "epoch": 1.732252221167946, "grad_norm": 0.59375, "learning_rate": 8.779535335122236e-07, "loss": 1.4134, "step": 10041 }, { "epoch": 1.732424739066678, "grad_norm": 0.6015625, "learning_rate": 8.768386226652814e-07, "loss": 1.5136, "step": 10042 }, { "epoch": 1.73259725696541, "grad_norm": 1.5234375, "learning_rate": 8.757243877191412e-07, "loss": 1.4558, "step": 10043 }, { "epoch": 1.7327697748641422, "grad_norm": 0.5703125, "learning_rate": 8.746108287563482e-07, "loss": 1.5203, "step": 10044 }, { "epoch": 1.7329422927628741, "grad_norm": 0.80078125, "learning_rate": 8.734979458594028e-07, "loss": 1.5167, "step": 10045 }, { "epoch": 1.733114810661606, "grad_norm": 0.58203125, "learning_rate": 8.723857391107549e-07, "loss": 1.5184, "step": 10046 }, { "epoch": 1.7332873285603383, "grad_norm": 0.734375, "learning_rate": 8.712742085928027e-07, "loss": 1.5099, "step": 10047 }, { "epoch": 1.7334598464590703, "grad_norm": 0.6015625, "learning_rate": 8.701633543878974e-07, "loss": 1.4642, "step": 10048 }, { "epoch": 1.7336323643578022, "grad_norm": 0.5703125, "learning_rate": 8.69053176578335e-07, "loss": 1.3658, "step": 10049 }, { "epoch": 1.7338048822565342, "grad_norm": 0.5859375, "learning_rate": 8.679436752463677e-07, "loss": 1.467, "step": 10050 }, { "epoch": 1.7339774001552661, "grad_norm": 0.58203125, "learning_rate": 8.668348504741919e-07, "loss": 1.4236, "step": 10051 }, { "epoch": 1.734149918053998, "grad_norm": 0.56640625, "learning_rate": 8.657267023439586e-07, "loss": 1.3282, "step": 10052 }, { "epoch": 1.73432243595273, "grad_norm": 0.56640625, "learning_rate": 8.64619230937761e-07, "loss": 1.5454, "step": 10053 }, { "epoch": 1.734494953851462, "grad_norm": 0.66015625, "learning_rate": 8.635124363376557e-07, "loss": 1.3545, "step": 10054 }, { "epoch": 1.734667471750194, "grad_norm": 0.59375, "learning_rate": 8.624063186256327e-07, "loss": 1.3744, "step": 10055 }, { "epoch": 1.734839989648926, "grad_norm": 0.58203125, "learning_rate": 8.613008778836463e-07, "loss": 1.3671, "step": 10056 }, { "epoch": 1.7350125075476581, "grad_norm": 0.6171875, "learning_rate": 8.601961141935922e-07, "loss": 1.4429, "step": 10057 }, { "epoch": 1.73518502544639, "grad_norm": 0.5546875, "learning_rate": 8.590920276373172e-07, "loss": 1.4882, "step": 10058 }, { "epoch": 1.735357543345122, "grad_norm": 0.89453125, "learning_rate": 8.579886182966191e-07, "loss": 1.4334, "step": 10059 }, { "epoch": 1.735530061243854, "grad_norm": 0.5859375, "learning_rate": 8.56885886253247e-07, "loss": 1.4344, "step": 10060 }, { "epoch": 1.7357025791425862, "grad_norm": 0.58203125, "learning_rate": 8.557838315888966e-07, "loss": 1.3999, "step": 10061 }, { "epoch": 1.7358750970413181, "grad_norm": 0.61328125, "learning_rate": 8.546824543852183e-07, "loss": 1.4345, "step": 10062 }, { "epoch": 1.73604761494005, "grad_norm": 0.625, "learning_rate": 8.535817547238023e-07, "loss": 1.4524, "step": 10063 }, { "epoch": 1.736220132838782, "grad_norm": 0.60546875, "learning_rate": 8.524817326862034e-07, "loss": 1.5366, "step": 10064 }, { "epoch": 1.736392650737514, "grad_norm": 0.5625, "learning_rate": 8.513823883539118e-07, "loss": 1.2663, "step": 10065 }, { "epoch": 1.736565168636246, "grad_norm": 0.55078125, "learning_rate": 8.502837218083737e-07, "loss": 1.3951, "step": 10066 }, { "epoch": 1.736737686534978, "grad_norm": 0.6171875, "learning_rate": 8.491857331309928e-07, "loss": 1.3688, "step": 10067 }, { "epoch": 1.73691020443371, "grad_norm": 0.61328125, "learning_rate": 8.480884224031061e-07, "loss": 1.378, "step": 10068 }, { "epoch": 1.7370827223324419, "grad_norm": 0.578125, "learning_rate": 8.469917897060142e-07, "loss": 1.3862, "step": 10069 }, { "epoch": 1.7372552402311738, "grad_norm": 0.55078125, "learning_rate": 8.458958351209601e-07, "loss": 1.4146, "step": 10070 }, { "epoch": 1.737427758129906, "grad_norm": 0.609375, "learning_rate": 8.448005587291408e-07, "loss": 1.3553, "step": 10071 }, { "epoch": 1.737600276028638, "grad_norm": 0.59375, "learning_rate": 8.437059606117004e-07, "loss": 1.5589, "step": 10072 }, { "epoch": 1.73777279392737, "grad_norm": 0.6015625, "learning_rate": 8.426120408497351e-07, "loss": 1.3337, "step": 10073 }, { "epoch": 1.7379453118261021, "grad_norm": 0.87109375, "learning_rate": 8.415187995242846e-07, "loss": 1.4074, "step": 10074 }, { "epoch": 1.738117829724834, "grad_norm": 0.5625, "learning_rate": 8.404262367163495e-07, "loss": 1.3617, "step": 10075 }, { "epoch": 1.738290347623566, "grad_norm": 0.609375, "learning_rate": 8.393343525068687e-07, "loss": 1.4669, "step": 10076 }, { "epoch": 1.738462865522298, "grad_norm": 0.6796875, "learning_rate": 8.382431469767372e-07, "loss": 1.4331, "step": 10077 }, { "epoch": 1.73863538342103, "grad_norm": 0.98828125, "learning_rate": 8.371526202067993e-07, "loss": 1.3406, "step": 10078 }, { "epoch": 1.738807901319762, "grad_norm": 0.62890625, "learning_rate": 8.360627722778469e-07, "loss": 1.3818, "step": 10079 }, { "epoch": 1.738980419218494, "grad_norm": 0.5859375, "learning_rate": 8.349736032706234e-07, "loss": 1.3747, "step": 10080 }, { "epoch": 1.7391529371172259, "grad_norm": 0.57421875, "learning_rate": 8.338851132658221e-07, "loss": 1.5443, "step": 10081 }, { "epoch": 1.7393254550159578, "grad_norm": 0.640625, "learning_rate": 8.327973023440827e-07, "loss": 1.4706, "step": 10082 }, { "epoch": 1.7394979729146898, "grad_norm": 0.53125, "learning_rate": 8.317101705859986e-07, "loss": 1.4077, "step": 10083 }, { "epoch": 1.7396704908134217, "grad_norm": 0.57421875, "learning_rate": 8.306237180721121e-07, "loss": 1.3999, "step": 10084 }, { "epoch": 1.739843008712154, "grad_norm": 0.62109375, "learning_rate": 8.295379448829133e-07, "loss": 1.422, "step": 10085 }, { "epoch": 1.7400155266108859, "grad_norm": 0.6171875, "learning_rate": 8.284528510988444e-07, "loss": 1.3776, "step": 10086 }, { "epoch": 1.7401880445096178, "grad_norm": 0.59765625, "learning_rate": 8.273684368002922e-07, "loss": 1.4917, "step": 10087 }, { "epoch": 1.74036056240835, "grad_norm": 0.71875, "learning_rate": 8.262847020676024e-07, "loss": 1.5477, "step": 10088 }, { "epoch": 1.740533080307082, "grad_norm": 0.59765625, "learning_rate": 8.252016469810597e-07, "loss": 1.4891, "step": 10089 }, { "epoch": 1.740705598205814, "grad_norm": 0.6640625, "learning_rate": 8.241192716209056e-07, "loss": 1.404, "step": 10090 }, { "epoch": 1.740878116104546, "grad_norm": 0.59375, "learning_rate": 8.230375760673304e-07, "loss": 1.3418, "step": 10091 }, { "epoch": 1.7410506340032779, "grad_norm": 0.625, "learning_rate": 8.219565604004709e-07, "loss": 1.4332, "step": 10092 }, { "epoch": 1.7412231519020098, "grad_norm": 0.57421875, "learning_rate": 8.208762247004176e-07, "loss": 1.3749, "step": 10093 }, { "epoch": 1.7413956698007418, "grad_norm": 0.7890625, "learning_rate": 8.197965690472088e-07, "loss": 1.4559, "step": 10094 }, { "epoch": 1.7415681876994737, "grad_norm": 0.57421875, "learning_rate": 8.187175935208269e-07, "loss": 1.3704, "step": 10095 }, { "epoch": 1.7417407055982057, "grad_norm": 0.54296875, "learning_rate": 8.17639298201216e-07, "loss": 1.4201, "step": 10096 }, { "epoch": 1.7419132234969377, "grad_norm": 0.55859375, "learning_rate": 8.165616831682588e-07, "loss": 1.3645, "step": 10097 }, { "epoch": 1.7420857413956699, "grad_norm": 0.5859375, "learning_rate": 8.154847485017913e-07, "loss": 1.5445, "step": 10098 }, { "epoch": 1.7422582592944018, "grad_norm": 0.57421875, "learning_rate": 8.144084942816043e-07, "loss": 1.4455, "step": 10099 }, { "epoch": 1.7424307771931338, "grad_norm": 0.6171875, "learning_rate": 8.133329205874286e-07, "loss": 1.4443, "step": 10100 }, { "epoch": 1.7424307771931338, "eval_loss": 1.4070847034454346, "eval_runtime": 11.2683, "eval_samples_per_second": 90.874, "eval_steps_per_second": 22.719, "step": 10100 }, { "epoch": 1.7426032950918657, "grad_norm": 0.57421875, "learning_rate": 8.122580274989511e-07, "loss": 1.3178, "step": 10101 }, { "epoch": 1.742775812990598, "grad_norm": 0.58984375, "learning_rate": 8.111838150958062e-07, "loss": 1.4056, "step": 10102 }, { "epoch": 1.7429483308893299, "grad_norm": 0.6171875, "learning_rate": 8.101102834575792e-07, "loss": 1.4306, "step": 10103 }, { "epoch": 1.7431208487880618, "grad_norm": 0.57421875, "learning_rate": 8.090374326638028e-07, "loss": 1.3321, "step": 10104 }, { "epoch": 1.7432933666867938, "grad_norm": 0.57421875, "learning_rate": 8.079652627939638e-07, "loss": 1.4098, "step": 10105 }, { "epoch": 1.7434658845855258, "grad_norm": 0.56640625, "learning_rate": 8.068937739274885e-07, "loss": 1.4515, "step": 10106 }, { "epoch": 1.7436384024842577, "grad_norm": 0.70703125, "learning_rate": 8.058229661437677e-07, "loss": 1.4637, "step": 10107 }, { "epoch": 1.7438109203829897, "grad_norm": 0.58984375, "learning_rate": 8.047528395221271e-07, "loss": 1.4579, "step": 10108 }, { "epoch": 1.7439834382817216, "grad_norm": 0.625, "learning_rate": 8.03683394141851e-07, "loss": 1.4756, "step": 10109 }, { "epoch": 1.7441559561804536, "grad_norm": 0.5546875, "learning_rate": 8.026146300821702e-07, "loss": 1.3693, "step": 10110 }, { "epoch": 1.7443284740791856, "grad_norm": 0.68359375, "learning_rate": 8.015465474222661e-07, "loss": 1.3065, "step": 10111 }, { "epoch": 1.7445009919779177, "grad_norm": 0.66796875, "learning_rate": 8.004791462412675e-07, "loss": 1.4108, "step": 10112 }, { "epoch": 1.7446735098766497, "grad_norm": 0.5390625, "learning_rate": 7.994124266182568e-07, "loss": 1.4297, "step": 10113 }, { "epoch": 1.7448460277753817, "grad_norm": 0.5703125, "learning_rate": 7.983463886322584e-07, "loss": 1.4389, "step": 10114 }, { "epoch": 1.7450185456741139, "grad_norm": 0.6171875, "learning_rate": 7.97281032362256e-07, "loss": 1.4527, "step": 10115 }, { "epoch": 1.7451910635728458, "grad_norm": 0.55859375, "learning_rate": 7.962163578871751e-07, "loss": 1.3734, "step": 10116 }, { "epoch": 1.7453635814715778, "grad_norm": 0.58203125, "learning_rate": 7.95152365285895e-07, "loss": 1.4244, "step": 10117 }, { "epoch": 1.7455360993703097, "grad_norm": 0.5546875, "learning_rate": 7.940890546372437e-07, "loss": 1.408, "step": 10118 }, { "epoch": 1.7457086172690417, "grad_norm": 0.58984375, "learning_rate": 7.930264260199938e-07, "loss": 1.4781, "step": 10119 }, { "epoch": 1.7458811351677737, "grad_norm": 0.60546875, "learning_rate": 7.919644795128767e-07, "loss": 1.393, "step": 10120 }, { "epoch": 1.7460536530665056, "grad_norm": 0.61328125, "learning_rate": 7.909032151945639e-07, "loss": 1.5179, "step": 10121 }, { "epoch": 1.7462261709652376, "grad_norm": 0.796875, "learning_rate": 7.898426331436815e-07, "loss": 1.4342, "step": 10122 }, { "epoch": 1.7463986888639695, "grad_norm": 0.57421875, "learning_rate": 7.887827334388054e-07, "loss": 1.4451, "step": 10123 }, { "epoch": 1.7465712067627015, "grad_norm": 0.625, "learning_rate": 7.877235161584584e-07, "loss": 1.4059, "step": 10124 }, { "epoch": 1.7467437246614335, "grad_norm": 0.55859375, "learning_rate": 7.866649813811145e-07, "loss": 1.4599, "step": 10125 }, { "epoch": 1.7469162425601656, "grad_norm": 0.67578125, "learning_rate": 7.856071291851975e-07, "loss": 1.4546, "step": 10126 }, { "epoch": 1.7470887604588976, "grad_norm": 0.57421875, "learning_rate": 7.845499596490758e-07, "loss": 1.3923, "step": 10127 }, { "epoch": 1.7472612783576296, "grad_norm": 0.59765625, "learning_rate": 7.834934728510768e-07, "loss": 1.3865, "step": 10128 }, { "epoch": 1.7474337962563617, "grad_norm": 0.609375, "learning_rate": 7.824376688694668e-07, "loss": 1.3667, "step": 10129 }, { "epoch": 1.7476063141550937, "grad_norm": 0.5859375, "learning_rate": 7.813825477824665e-07, "loss": 1.5013, "step": 10130 }, { "epoch": 1.7477788320538257, "grad_norm": 0.58203125, "learning_rate": 7.803281096682524e-07, "loss": 1.4107, "step": 10131 }, { "epoch": 1.7479513499525576, "grad_norm": 0.59375, "learning_rate": 7.792743546049364e-07, "loss": 1.4919, "step": 10132 }, { "epoch": 1.7481238678512896, "grad_norm": 0.625, "learning_rate": 7.782212826705892e-07, "loss": 1.3758, "step": 10133 }, { "epoch": 1.7482963857500216, "grad_norm": 0.58984375, "learning_rate": 7.771688939432309e-07, "loss": 1.5687, "step": 10134 }, { "epoch": 1.7484689036487535, "grad_norm": 0.56640625, "learning_rate": 7.761171885008279e-07, "loss": 1.3382, "step": 10135 }, { "epoch": 1.7486414215474855, "grad_norm": 0.58203125, "learning_rate": 7.750661664212966e-07, "loss": 1.3175, "step": 10136 }, { "epoch": 1.7488139394462174, "grad_norm": 0.62109375, "learning_rate": 7.74015827782505e-07, "loss": 1.522, "step": 10137 }, { "epoch": 1.7489864573449494, "grad_norm": 0.60546875, "learning_rate": 7.72966172662265e-07, "loss": 1.5441, "step": 10138 }, { "epoch": 1.7491589752436816, "grad_norm": 0.57421875, "learning_rate": 7.719172011383468e-07, "loss": 1.3934, "step": 10139 }, { "epoch": 1.7493314931424135, "grad_norm": 0.56640625, "learning_rate": 7.708689132884606e-07, "loss": 1.4005, "step": 10140 }, { "epoch": 1.7495040110411455, "grad_norm": 0.6171875, "learning_rate": 7.698213091902718e-07, "loss": 1.5491, "step": 10141 }, { "epoch": 1.7496765289398775, "grad_norm": 0.890625, "learning_rate": 7.687743889213939e-07, "loss": 1.431, "step": 10142 }, { "epoch": 1.7498490468386096, "grad_norm": 0.8984375, "learning_rate": 7.677281525593871e-07, "loss": 1.4588, "step": 10143 }, { "epoch": 1.7500215647373416, "grad_norm": 0.5859375, "learning_rate": 7.666826001817651e-07, "loss": 1.4738, "step": 10144 }, { "epoch": 1.7501940826360736, "grad_norm": 0.56640625, "learning_rate": 7.656377318659891e-07, "loss": 1.3973, "step": 10145 }, { "epoch": 1.7503666005348055, "grad_norm": 0.56640625, "learning_rate": 7.645935476894684e-07, "loss": 1.4568, "step": 10146 }, { "epoch": 1.7505391184335375, "grad_norm": 0.578125, "learning_rate": 7.635500477295632e-07, "loss": 1.3341, "step": 10147 }, { "epoch": 1.7507116363322694, "grad_norm": 0.7421875, "learning_rate": 7.625072320635829e-07, "loss": 1.3969, "step": 10148 }, { "epoch": 1.7508841542310014, "grad_norm": 0.5859375, "learning_rate": 7.614651007687857e-07, "loss": 1.5004, "step": 10149 }, { "epoch": 1.7510566721297334, "grad_norm": 0.59375, "learning_rate": 7.604236539223797e-07, "loss": 1.4903, "step": 10150 }, { "epoch": 1.7512291900284653, "grad_norm": 0.5546875, "learning_rate": 7.593828916015178e-07, "loss": 1.5149, "step": 10151 }, { "epoch": 1.7514017079271973, "grad_norm": 0.60546875, "learning_rate": 7.583428138833126e-07, "loss": 1.3694, "step": 10152 }, { "epoch": 1.7515742258259295, "grad_norm": 0.56640625, "learning_rate": 7.573034208448149e-07, "loss": 1.3829, "step": 10153 }, { "epoch": 1.7517467437246614, "grad_norm": 0.59765625, "learning_rate": 7.562647125630307e-07, "loss": 1.4432, "step": 10154 }, { "epoch": 1.7519192616233934, "grad_norm": 0.58984375, "learning_rate": 7.55226689114914e-07, "loss": 1.4537, "step": 10155 }, { "epoch": 1.7520917795221256, "grad_norm": 0.5859375, "learning_rate": 7.541893505773679e-07, "loss": 1.4597, "step": 10156 }, { "epoch": 1.7522642974208575, "grad_norm": 0.6015625, "learning_rate": 7.531526970272463e-07, "loss": 1.3658, "step": 10157 }, { "epoch": 1.7524368153195895, "grad_norm": 0.61328125, "learning_rate": 7.5211672854135e-07, "loss": 1.4581, "step": 10158 }, { "epoch": 1.7526093332183215, "grad_norm": 0.58984375, "learning_rate": 7.510814451964277e-07, "loss": 1.4452, "step": 10159 }, { "epoch": 1.7527818511170534, "grad_norm": 0.55078125, "learning_rate": 7.500468470691846e-07, "loss": 1.4134, "step": 10160 }, { "epoch": 1.7529543690157854, "grad_norm": 0.546875, "learning_rate": 7.490129342362662e-07, "loss": 1.3958, "step": 10161 }, { "epoch": 1.7531268869145173, "grad_norm": 0.59375, "learning_rate": 7.479797067742711e-07, "loss": 1.4252, "step": 10162 }, { "epoch": 1.7532994048132493, "grad_norm": 0.578125, "learning_rate": 7.469471647597515e-07, "loss": 1.4293, "step": 10163 }, { "epoch": 1.7534719227119813, "grad_norm": 0.5625, "learning_rate": 7.459153082691994e-07, "loss": 1.4295, "step": 10164 }, { "epoch": 1.7536444406107132, "grad_norm": 0.5625, "learning_rate": 7.448841373790639e-07, "loss": 1.3745, "step": 10165 }, { "epoch": 1.7538169585094452, "grad_norm": 0.6015625, "learning_rate": 7.438536521657402e-07, "loss": 1.401, "step": 10166 }, { "epoch": 1.7539894764081774, "grad_norm": 0.59765625, "learning_rate": 7.428238527055731e-07, "loss": 1.4449, "step": 10167 }, { "epoch": 1.7541619943069093, "grad_norm": 0.60546875, "learning_rate": 7.41794739074857e-07, "loss": 1.3888, "step": 10168 }, { "epoch": 1.7543345122056413, "grad_norm": 0.64453125, "learning_rate": 7.407663113498353e-07, "loss": 1.4028, "step": 10169 }, { "epoch": 1.7545070301043735, "grad_norm": 0.5390625, "learning_rate": 7.397385696066972e-07, "loss": 1.3718, "step": 10170 }, { "epoch": 1.7546795480031054, "grad_norm": 0.5859375, "learning_rate": 7.387115139215895e-07, "loss": 1.3943, "step": 10171 }, { "epoch": 1.7548520659018374, "grad_norm": 0.60546875, "learning_rate": 7.376851443705968e-07, "loss": 1.5253, "step": 10172 }, { "epoch": 1.7550245838005694, "grad_norm": 0.578125, "learning_rate": 7.36659461029765e-07, "loss": 1.4728, "step": 10173 }, { "epoch": 1.7551971016993013, "grad_norm": 0.61328125, "learning_rate": 7.35634463975079e-07, "loss": 1.4457, "step": 10174 }, { "epoch": 1.7553696195980333, "grad_norm": 0.9921875, "learning_rate": 7.346101532824789e-07, "loss": 1.398, "step": 10175 }, { "epoch": 1.7555421374967652, "grad_norm": 0.9921875, "learning_rate": 7.33586529027851e-07, "loss": 1.4072, "step": 10176 }, { "epoch": 1.7557146553954972, "grad_norm": 0.640625, "learning_rate": 7.325635912870321e-07, "loss": 1.4049, "step": 10177 }, { "epoch": 1.7558871732942292, "grad_norm": 0.5625, "learning_rate": 7.315413401358084e-07, "loss": 1.529, "step": 10178 }, { "epoch": 1.7560596911929611, "grad_norm": 0.55859375, "learning_rate": 7.305197756499139e-07, "loss": 1.4471, "step": 10179 }, { "epoch": 1.7562322090916933, "grad_norm": 0.59375, "learning_rate": 7.294988979050333e-07, "loss": 1.4572, "step": 10180 }, { "epoch": 1.7564047269904253, "grad_norm": 0.61328125, "learning_rate": 7.284787069767984e-07, "loss": 1.3271, "step": 10181 }, { "epoch": 1.7565772448891572, "grad_norm": 0.5703125, "learning_rate": 7.274592029407946e-07, "loss": 1.4513, "step": 10182 }, { "epoch": 1.7567497627878892, "grad_norm": 0.58984375, "learning_rate": 7.264403858725466e-07, "loss": 1.504, "step": 10183 }, { "epoch": 1.7569222806866214, "grad_norm": 0.55078125, "learning_rate": 7.254222558475421e-07, "loss": 1.3852, "step": 10184 }, { "epoch": 1.7570947985853533, "grad_norm": 0.5859375, "learning_rate": 7.244048129412051e-07, "loss": 1.5207, "step": 10185 }, { "epoch": 1.7572673164840853, "grad_norm": 0.5703125, "learning_rate": 7.233880572289165e-07, "loss": 1.4163, "step": 10186 }, { "epoch": 1.7574398343828173, "grad_norm": 0.60546875, "learning_rate": 7.223719887860037e-07, "loss": 1.4681, "step": 10187 }, { "epoch": 1.7576123522815492, "grad_norm": 0.625, "learning_rate": 7.213566076877431e-07, "loss": 1.3599, "step": 10188 }, { "epoch": 1.7577848701802812, "grad_norm": 0.6796875, "learning_rate": 7.203419140093604e-07, "loss": 1.3668, "step": 10189 }, { "epoch": 1.7579573880790131, "grad_norm": 0.59375, "learning_rate": 7.193279078260329e-07, "loss": 1.4273, "step": 10190 }, { "epoch": 1.758129905977745, "grad_norm": 0.59375, "learning_rate": 7.183145892128785e-07, "loss": 1.4715, "step": 10191 }, { "epoch": 1.758302423876477, "grad_norm": 0.58203125, "learning_rate": 7.17301958244978e-07, "loss": 1.4836, "step": 10192 }, { "epoch": 1.758474941775209, "grad_norm": 0.66015625, "learning_rate": 7.162900149973473e-07, "loss": 1.362, "step": 10193 }, { "epoch": 1.7586474596739412, "grad_norm": 0.6328125, "learning_rate": 7.152787595449573e-07, "loss": 1.3473, "step": 10194 }, { "epoch": 1.7588199775726732, "grad_norm": 0.59765625, "learning_rate": 7.142681919627348e-07, "loss": 1.4041, "step": 10195 }, { "epoch": 1.7589924954714051, "grad_norm": 0.578125, "learning_rate": 7.132583123255421e-07, "loss": 1.3846, "step": 10196 }, { "epoch": 1.7591650133701373, "grad_norm": 0.72265625, "learning_rate": 7.122491207082006e-07, "loss": 1.3585, "step": 10197 }, { "epoch": 1.7593375312688693, "grad_norm": 0.6171875, "learning_rate": 7.112406171854758e-07, "loss": 1.4366, "step": 10198 }, { "epoch": 1.7595100491676012, "grad_norm": 0.59765625, "learning_rate": 7.102328018320859e-07, "loss": 1.4385, "step": 10199 }, { "epoch": 1.7596825670663332, "grad_norm": 0.62890625, "learning_rate": 7.092256747226944e-07, "loss": 1.4074, "step": 10200 }, { "epoch": 1.7596825670663332, "eval_loss": 1.4070675373077393, "eval_runtime": 10.9117, "eval_samples_per_second": 93.844, "eval_steps_per_second": 23.461, "step": 10200 }, { "epoch": 1.7598550849650652, "grad_norm": 0.578125, "learning_rate": 7.082192359319184e-07, "loss": 1.3849, "step": 10201 }, { "epoch": 1.7600276028637971, "grad_norm": 0.59765625, "learning_rate": 7.07213485534316e-07, "loss": 1.3763, "step": 10202 }, { "epoch": 1.760200120762529, "grad_norm": 0.6171875, "learning_rate": 7.062084236044065e-07, "loss": 1.4973, "step": 10203 }, { "epoch": 1.760372638661261, "grad_norm": 0.59375, "learning_rate": 7.052040502166424e-07, "loss": 1.359, "step": 10204 }, { "epoch": 1.760545156559993, "grad_norm": 0.5703125, "learning_rate": 7.042003654454432e-07, "loss": 1.4431, "step": 10205 }, { "epoch": 1.760717674458725, "grad_norm": 0.5703125, "learning_rate": 7.031973693651617e-07, "loss": 1.4034, "step": 10206 }, { "epoch": 1.7608901923574571, "grad_norm": 0.58203125, "learning_rate": 7.021950620501084e-07, "loss": 1.5532, "step": 10207 }, { "epoch": 1.761062710256189, "grad_norm": 0.64453125, "learning_rate": 7.011934435745404e-07, "loss": 1.3747, "step": 10208 }, { "epoch": 1.761235228154921, "grad_norm": 0.55859375, "learning_rate": 7.001925140126631e-07, "loss": 1.4181, "step": 10209 }, { "epoch": 1.761407746053653, "grad_norm": 0.578125, "learning_rate": 6.991922734386336e-07, "loss": 1.3301, "step": 10210 }, { "epoch": 1.7615802639523852, "grad_norm": 0.59765625, "learning_rate": 6.981927219265527e-07, "loss": 1.5361, "step": 10211 }, { "epoch": 1.7617527818511172, "grad_norm": 0.7265625, "learning_rate": 6.971938595504768e-07, "loss": 1.4583, "step": 10212 }, { "epoch": 1.7619252997498491, "grad_norm": 0.62109375, "learning_rate": 6.961956863844055e-07, "loss": 1.4323, "step": 10213 }, { "epoch": 1.762097817648581, "grad_norm": 0.60546875, "learning_rate": 6.951982025022929e-07, "loss": 1.3942, "step": 10214 }, { "epoch": 1.762270335547313, "grad_norm": 0.5703125, "learning_rate": 6.942014079780335e-07, "loss": 1.4092, "step": 10215 }, { "epoch": 1.762442853446045, "grad_norm": 0.5625, "learning_rate": 6.932053028854813e-07, "loss": 1.4492, "step": 10216 }, { "epoch": 1.762615371344777, "grad_norm": 0.578125, "learning_rate": 6.922098872984317e-07, "loss": 1.4895, "step": 10217 }, { "epoch": 1.762787889243509, "grad_norm": 0.56640625, "learning_rate": 6.912151612906303e-07, "loss": 1.4247, "step": 10218 }, { "epoch": 1.762960407142241, "grad_norm": 0.5859375, "learning_rate": 6.902211249357738e-07, "loss": 1.4815, "step": 10219 }, { "epoch": 1.7631329250409729, "grad_norm": 0.57421875, "learning_rate": 6.892277783075063e-07, "loss": 1.4463, "step": 10220 }, { "epoch": 1.763305442939705, "grad_norm": 0.59765625, "learning_rate": 6.882351214794225e-07, "loss": 1.422, "step": 10221 }, { "epoch": 1.763477960838437, "grad_norm": 0.60546875, "learning_rate": 6.872431545250636e-07, "loss": 1.5413, "step": 10222 }, { "epoch": 1.763650478737169, "grad_norm": 0.58984375, "learning_rate": 6.862518775179183e-07, "loss": 1.4846, "step": 10223 }, { "epoch": 1.763822996635901, "grad_norm": 0.58984375, "learning_rate": 6.852612905314326e-07, "loss": 1.4419, "step": 10224 }, { "epoch": 1.763995514534633, "grad_norm": 0.56640625, "learning_rate": 6.842713936389889e-07, "loss": 1.3773, "step": 10225 }, { "epoch": 1.764168032433365, "grad_norm": 0.59375, "learning_rate": 6.832821869139272e-07, "loss": 1.4648, "step": 10226 }, { "epoch": 1.764340550332097, "grad_norm": 0.890625, "learning_rate": 6.822936704295369e-07, "loss": 1.3654, "step": 10227 }, { "epoch": 1.764513068230829, "grad_norm": 0.5859375, "learning_rate": 6.813058442590504e-07, "loss": 1.4976, "step": 10228 }, { "epoch": 1.764685586129561, "grad_norm": 0.6015625, "learning_rate": 6.803187084756524e-07, "loss": 1.371, "step": 10229 }, { "epoch": 1.764858104028293, "grad_norm": 0.60546875, "learning_rate": 6.793322631524768e-07, "loss": 1.4333, "step": 10230 }, { "epoch": 1.7650306219270249, "grad_norm": 0.59765625, "learning_rate": 6.78346508362605e-07, "loss": 1.4147, "step": 10231 }, { "epoch": 1.7652031398257568, "grad_norm": 0.62890625, "learning_rate": 6.773614441790677e-07, "loss": 1.3943, "step": 10232 }, { "epoch": 1.7653756577244888, "grad_norm": 0.58984375, "learning_rate": 6.763770706748462e-07, "loss": 1.4275, "step": 10233 }, { "epoch": 1.7655481756232208, "grad_norm": 0.6328125, "learning_rate": 6.75393387922868e-07, "loss": 1.3645, "step": 10234 }, { "epoch": 1.765720693521953, "grad_norm": 0.68359375, "learning_rate": 6.744103959960113e-07, "loss": 1.3754, "step": 10235 }, { "epoch": 1.765893211420685, "grad_norm": 0.58203125, "learning_rate": 6.734280949670991e-07, "loss": 1.3763, "step": 10236 }, { "epoch": 1.7660657293194169, "grad_norm": 0.578125, "learning_rate": 6.724464849089107e-07, "loss": 1.4541, "step": 10237 }, { "epoch": 1.766238247218149, "grad_norm": 0.578125, "learning_rate": 6.714655658941671e-07, "loss": 1.3945, "step": 10238 }, { "epoch": 1.766410765116881, "grad_norm": 0.57421875, "learning_rate": 6.704853379955423e-07, "loss": 1.3877, "step": 10239 }, { "epoch": 1.766583283015613, "grad_norm": 0.5546875, "learning_rate": 6.69505801285657e-07, "loss": 1.4581, "step": 10240 }, { "epoch": 1.766755800914345, "grad_norm": 0.77734375, "learning_rate": 6.68526955837081e-07, "loss": 1.4175, "step": 10241 }, { "epoch": 1.7669283188130769, "grad_norm": 0.59375, "learning_rate": 6.675488017223342e-07, "loss": 1.4392, "step": 10242 }, { "epoch": 1.7671008367118088, "grad_norm": 0.609375, "learning_rate": 6.665713390138839e-07, "loss": 1.3622, "step": 10243 }, { "epoch": 1.7672733546105408, "grad_norm": 0.5703125, "learning_rate": 6.655945677841457e-07, "loss": 1.3939, "step": 10244 }, { "epoch": 1.7674458725092728, "grad_norm": 0.5703125, "learning_rate": 6.646184881054874e-07, "loss": 1.3592, "step": 10245 }, { "epoch": 1.7676183904080047, "grad_norm": 0.86328125, "learning_rate": 6.636431000502231e-07, "loss": 1.4977, "step": 10246 }, { "epoch": 1.7677909083067367, "grad_norm": 0.59765625, "learning_rate": 6.626684036906106e-07, "loss": 1.4954, "step": 10247 }, { "epoch": 1.7679634262054689, "grad_norm": 0.58203125, "learning_rate": 6.616943990988689e-07, "loss": 1.4395, "step": 10248 }, { "epoch": 1.7681359441042008, "grad_norm": 0.5546875, "learning_rate": 6.607210863471525e-07, "loss": 1.4819, "step": 10249 }, { "epoch": 1.7683084620029328, "grad_norm": 0.55859375, "learning_rate": 6.597484655075726e-07, "loss": 1.3285, "step": 10250 }, { "epoch": 1.7684809799016648, "grad_norm": 0.58984375, "learning_rate": 6.58776536652187e-07, "loss": 1.4535, "step": 10251 }, { "epoch": 1.768653497800397, "grad_norm": 0.5625, "learning_rate": 6.578052998530016e-07, "loss": 1.539, "step": 10252 }, { "epoch": 1.768826015699129, "grad_norm": 0.578125, "learning_rate": 6.568347551819731e-07, "loss": 1.4043, "step": 10253 }, { "epoch": 1.7689985335978609, "grad_norm": 0.7109375, "learning_rate": 6.558649027110054e-07, "loss": 1.3375, "step": 10254 }, { "epoch": 1.7691710514965928, "grad_norm": 0.5859375, "learning_rate": 6.548957425119484e-07, "loss": 1.4756, "step": 10255 }, { "epoch": 1.7693435693953248, "grad_norm": 0.59375, "learning_rate": 6.539272746566083e-07, "loss": 1.4516, "step": 10256 }, { "epoch": 1.7695160872940567, "grad_norm": 0.6015625, "learning_rate": 6.529594992167321e-07, "loss": 1.3337, "step": 10257 }, { "epoch": 1.7696886051927887, "grad_norm": 0.57421875, "learning_rate": 6.519924162640168e-07, "loss": 1.5003, "step": 10258 }, { "epoch": 1.7698611230915207, "grad_norm": 0.6875, "learning_rate": 6.510260258701151e-07, "loss": 1.476, "step": 10259 }, { "epoch": 1.7700336409902526, "grad_norm": 0.58203125, "learning_rate": 6.500603281066175e-07, "loss": 1.3975, "step": 10260 }, { "epoch": 1.7702061588889846, "grad_norm": 0.59375, "learning_rate": 6.490953230450758e-07, "loss": 1.4206, "step": 10261 }, { "epoch": 1.7703786767877168, "grad_norm": 0.69140625, "learning_rate": 6.481310107569772e-07, "loss": 1.4545, "step": 10262 }, { "epoch": 1.7705511946864487, "grad_norm": 0.55859375, "learning_rate": 6.471673913137666e-07, "loss": 1.5057, "step": 10263 }, { "epoch": 1.7707237125851807, "grad_norm": 0.56640625, "learning_rate": 6.46204464786836e-07, "loss": 1.3801, "step": 10264 }, { "epoch": 1.7708962304839129, "grad_norm": 0.5546875, "learning_rate": 6.452422312475226e-07, "loss": 1.4318, "step": 10265 }, { "epoch": 1.7710687483826448, "grad_norm": 0.640625, "learning_rate": 6.442806907671162e-07, "loss": 1.4907, "step": 10266 }, { "epoch": 1.7712412662813768, "grad_norm": 0.65625, "learning_rate": 6.433198434168552e-07, "loss": 1.512, "step": 10267 }, { "epoch": 1.7714137841801088, "grad_norm": 0.53125, "learning_rate": 6.423596892679207e-07, "loss": 1.3861, "step": 10268 }, { "epoch": 1.7715863020788407, "grad_norm": 0.5859375, "learning_rate": 6.414002283914522e-07, "loss": 1.4541, "step": 10269 }, { "epoch": 1.7717588199775727, "grad_norm": 0.5625, "learning_rate": 6.404414608585285e-07, "loss": 1.5342, "step": 10270 }, { "epoch": 1.7719313378763046, "grad_norm": 0.55859375, "learning_rate": 6.394833867401829e-07, "loss": 1.4425, "step": 10271 }, { "epoch": 1.7721038557750366, "grad_norm": 0.73828125, "learning_rate": 6.385260061073962e-07, "loss": 1.4565, "step": 10272 }, { "epoch": 1.7722763736737686, "grad_norm": 0.59765625, "learning_rate": 6.37569319031095e-07, "loss": 1.4543, "step": 10273 }, { "epoch": 1.7724488915725005, "grad_norm": 0.609375, "learning_rate": 6.366133255821572e-07, "loss": 1.4817, "step": 10274 }, { "epoch": 1.7726214094712325, "grad_norm": 0.5703125, "learning_rate": 6.356580258314105e-07, "loss": 1.3968, "step": 10275 }, { "epoch": 1.7727939273699647, "grad_norm": 0.578125, "learning_rate": 6.347034198496271e-07, "loss": 1.4356, "step": 10276 }, { "epoch": 1.7729664452686966, "grad_norm": 0.70703125, "learning_rate": 6.337495077075328e-07, "loss": 1.4766, "step": 10277 }, { "epoch": 1.7731389631674286, "grad_norm": 0.609375, "learning_rate": 6.327962894757988e-07, "loss": 1.5474, "step": 10278 }, { "epoch": 1.7733114810661608, "grad_norm": 0.5703125, "learning_rate": 6.31843765225042e-07, "loss": 1.4506, "step": 10279 }, { "epoch": 1.7734839989648927, "grad_norm": 0.62109375, "learning_rate": 6.308919350258369e-07, "loss": 1.4491, "step": 10280 }, { "epoch": 1.7736565168636247, "grad_norm": 0.59375, "learning_rate": 6.29940798948696e-07, "loss": 1.4158, "step": 10281 }, { "epoch": 1.7738290347623566, "grad_norm": 0.56640625, "learning_rate": 6.289903570640887e-07, "loss": 1.419, "step": 10282 }, { "epoch": 1.7740015526610886, "grad_norm": 0.59765625, "learning_rate": 6.280406094424285e-07, "loss": 1.4394, "step": 10283 }, { "epoch": 1.7741740705598206, "grad_norm": 1.4296875, "learning_rate": 6.27091556154078e-07, "loss": 1.5351, "step": 10284 }, { "epoch": 1.7743465884585525, "grad_norm": 0.5703125, "learning_rate": 6.261431972693499e-07, "loss": 1.3564, "step": 10285 }, { "epoch": 1.7745191063572845, "grad_norm": 0.55078125, "learning_rate": 6.251955328585057e-07, "loss": 1.3911, "step": 10286 }, { "epoch": 1.7746916242560165, "grad_norm": 0.5546875, "learning_rate": 6.242485629917494e-07, "loss": 1.4094, "step": 10287 }, { "epoch": 1.7748641421547484, "grad_norm": 0.609375, "learning_rate": 6.233022877392458e-07, "loss": 1.4178, "step": 10288 }, { "epoch": 1.7750366600534806, "grad_norm": 0.60546875, "learning_rate": 6.223567071710946e-07, "loss": 1.3358, "step": 10289 }, { "epoch": 1.7752091779522126, "grad_norm": 0.60546875, "learning_rate": 6.214118213573517e-07, "loss": 1.4563, "step": 10290 }, { "epoch": 1.7753816958509445, "grad_norm": 0.62109375, "learning_rate": 6.204676303680246e-07, "loss": 1.373, "step": 10291 }, { "epoch": 1.7755542137496765, "grad_norm": 0.59375, "learning_rate": 6.195241342730585e-07, "loss": 1.5764, "step": 10292 }, { "epoch": 1.7757267316484087, "grad_norm": 0.6171875, "learning_rate": 6.185813331423584e-07, "loss": 1.4124, "step": 10293 }, { "epoch": 1.7758992495471406, "grad_norm": 0.6171875, "learning_rate": 6.176392270457709e-07, "loss": 1.3338, "step": 10294 }, { "epoch": 1.7760717674458726, "grad_norm": 0.6640625, "learning_rate": 6.166978160530923e-07, "loss": 1.4685, "step": 10295 }, { "epoch": 1.7762442853446045, "grad_norm": 0.58984375, "learning_rate": 6.15757100234069e-07, "loss": 1.4325, "step": 10296 }, { "epoch": 1.7764168032433365, "grad_norm": 0.5859375, "learning_rate": 6.148170796583963e-07, "loss": 1.3772, "step": 10297 }, { "epoch": 1.7765893211420685, "grad_norm": 0.578125, "learning_rate": 6.138777543957141e-07, "loss": 1.4324, "step": 10298 }, { "epoch": 1.7767618390408004, "grad_norm": 0.59765625, "learning_rate": 6.129391245156168e-07, "loss": 1.5199, "step": 10299 }, { "epoch": 1.7769343569395324, "grad_norm": 0.55859375, "learning_rate": 6.1200119008764e-07, "loss": 1.4584, "step": 10300 }, { "epoch": 1.7769343569395324, "eval_loss": 1.4070483446121216, "eval_runtime": 11.0405, "eval_samples_per_second": 92.75, "eval_steps_per_second": 23.187, "step": 10300 }, { "epoch": 1.7771068748382643, "grad_norm": 0.6171875, "learning_rate": 6.110639511812765e-07, "loss": 1.3487, "step": 10301 }, { "epoch": 1.7772793927369963, "grad_norm": 0.58984375, "learning_rate": 6.101274078659591e-07, "loss": 1.4138, "step": 10302 }, { "epoch": 1.7774519106357285, "grad_norm": 0.61328125, "learning_rate": 6.091915602110743e-07, "loss": 1.4851, "step": 10303 }, { "epoch": 1.7776244285344605, "grad_norm": 0.66796875, "learning_rate": 6.082564082859543e-07, "loss": 1.4141, "step": 10304 }, { "epoch": 1.7777969464331924, "grad_norm": 0.58984375, "learning_rate": 6.073219521598828e-07, "loss": 1.369, "step": 10305 }, { "epoch": 1.7779694643319246, "grad_norm": 0.6171875, "learning_rate": 6.063881919020887e-07, "loss": 1.3713, "step": 10306 }, { "epoch": 1.7781419822306566, "grad_norm": 0.84765625, "learning_rate": 6.05455127581751e-07, "loss": 1.5123, "step": 10307 }, { "epoch": 1.7783145001293885, "grad_norm": 0.609375, "learning_rate": 6.04522759267997e-07, "loss": 1.4518, "step": 10308 }, { "epoch": 1.7784870180281205, "grad_norm": 0.578125, "learning_rate": 6.035910870299033e-07, "loss": 1.435, "step": 10309 }, { "epoch": 1.7786595359268524, "grad_norm": 0.640625, "learning_rate": 6.026601109364949e-07, "loss": 1.5177, "step": 10310 }, { "epoch": 1.7788320538255844, "grad_norm": 0.58203125, "learning_rate": 6.017298310567399e-07, "loss": 1.4735, "step": 10311 }, { "epoch": 1.7790045717243164, "grad_norm": 0.55859375, "learning_rate": 6.008002474595653e-07, "loss": 1.4801, "step": 10312 }, { "epoch": 1.7791770896230483, "grad_norm": 0.55859375, "learning_rate": 5.998713602138351e-07, "loss": 1.3985, "step": 10313 }, { "epoch": 1.7793496075217803, "grad_norm": 0.61328125, "learning_rate": 5.989431693883696e-07, "loss": 1.4779, "step": 10314 }, { "epoch": 1.7795221254205122, "grad_norm": 0.69921875, "learning_rate": 5.98015675051935e-07, "loss": 1.415, "step": 10315 }, { "epoch": 1.7796946433192442, "grad_norm": 0.59375, "learning_rate": 5.970888772732453e-07, "loss": 1.4775, "step": 10316 }, { "epoch": 1.7798671612179764, "grad_norm": 0.6015625, "learning_rate": 5.961627761209632e-07, "loss": 1.4578, "step": 10317 }, { "epoch": 1.7800396791167084, "grad_norm": 0.609375, "learning_rate": 5.952373716637016e-07, "loss": 1.415, "step": 10318 }, { "epoch": 1.7802121970154403, "grad_norm": 0.54296875, "learning_rate": 5.94312663970017e-07, "loss": 1.3462, "step": 10319 }, { "epoch": 1.7803847149141725, "grad_norm": 0.578125, "learning_rate": 5.933886531084232e-07, "loss": 1.4286, "step": 10320 }, { "epoch": 1.7805572328129045, "grad_norm": 0.58984375, "learning_rate": 5.924653391473689e-07, "loss": 1.4845, "step": 10321 }, { "epoch": 1.7807297507116364, "grad_norm": 0.58203125, "learning_rate": 5.915427221552672e-07, "loss": 1.4518, "step": 10322 }, { "epoch": 1.7809022686103684, "grad_norm": 0.609375, "learning_rate": 5.906208022004656e-07, "loss": 1.4583, "step": 10323 }, { "epoch": 1.7810747865091003, "grad_norm": 0.59375, "learning_rate": 5.89699579351266e-07, "loss": 1.3691, "step": 10324 }, { "epoch": 1.7812473044078323, "grad_norm": 0.5625, "learning_rate": 5.88779053675923e-07, "loss": 1.4326, "step": 10325 }, { "epoch": 1.7814198223065643, "grad_norm": 0.53515625, "learning_rate": 5.878592252426296e-07, "loss": 1.4378, "step": 10326 }, { "epoch": 1.7815923402052962, "grad_norm": 0.6015625, "learning_rate": 5.869400941195357e-07, "loss": 1.488, "step": 10327 }, { "epoch": 1.7817648581040282, "grad_norm": 0.58203125, "learning_rate": 5.860216603747349e-07, "loss": 1.3521, "step": 10328 }, { "epoch": 1.7819373760027601, "grad_norm": 0.56640625, "learning_rate": 5.851039240762702e-07, "loss": 1.4167, "step": 10329 }, { "epoch": 1.7821098939014923, "grad_norm": 0.5859375, "learning_rate": 5.84186885292134e-07, "loss": 1.5135, "step": 10330 }, { "epoch": 1.7822824118002243, "grad_norm": 0.6171875, "learning_rate": 5.832705440902675e-07, "loss": 1.4273, "step": 10331 }, { "epoch": 1.7824549296989562, "grad_norm": 0.5703125, "learning_rate": 5.823549005385543e-07, "loss": 1.5056, "step": 10332 }, { "epoch": 1.7826274475976882, "grad_norm": 0.56640625, "learning_rate": 5.814399547048378e-07, "loss": 1.4058, "step": 10333 }, { "epoch": 1.7827999654964204, "grad_norm": 0.62109375, "learning_rate": 5.80525706656897e-07, "loss": 1.4132, "step": 10334 }, { "epoch": 1.7829724833951524, "grad_norm": 0.60546875, "learning_rate": 5.796121564624679e-07, "loss": 1.4934, "step": 10335 }, { "epoch": 1.7831450012938843, "grad_norm": 0.56640625, "learning_rate": 5.786993041892319e-07, "loss": 1.3976, "step": 10336 }, { "epoch": 1.7833175191926163, "grad_norm": 0.578125, "learning_rate": 5.777871499048182e-07, "loss": 1.5118, "step": 10337 }, { "epoch": 1.7834900370913482, "grad_norm": 0.58984375, "learning_rate": 5.76875693676805e-07, "loss": 1.3587, "step": 10338 }, { "epoch": 1.7836625549900802, "grad_norm": 0.58984375, "learning_rate": 5.759649355727182e-07, "loss": 1.4272, "step": 10339 }, { "epoch": 1.7838350728888122, "grad_norm": 0.63671875, "learning_rate": 5.750548756600338e-07, "loss": 1.4136, "step": 10340 }, { "epoch": 1.7840075907875441, "grad_norm": 0.57421875, "learning_rate": 5.741455140061747e-07, "loss": 1.3687, "step": 10341 }, { "epoch": 1.784180108686276, "grad_norm": 0.59765625, "learning_rate": 5.732368506785113e-07, "loss": 1.471, "step": 10342 }, { "epoch": 1.784352626585008, "grad_norm": 0.6953125, "learning_rate": 5.72328885744361e-07, "loss": 1.3807, "step": 10343 }, { "epoch": 1.7845251444837402, "grad_norm": 0.6171875, "learning_rate": 5.714216192709976e-07, "loss": 1.4743, "step": 10344 }, { "epoch": 1.7846976623824722, "grad_norm": 0.5859375, "learning_rate": 5.705150513256297e-07, "loss": 1.3449, "step": 10345 }, { "epoch": 1.7848701802812041, "grad_norm": 0.5859375, "learning_rate": 5.696091819754268e-07, "loss": 1.4331, "step": 10346 }, { "epoch": 1.7850426981799363, "grad_norm": 0.59765625, "learning_rate": 5.687040112874986e-07, "loss": 1.3974, "step": 10347 }, { "epoch": 1.7852152160786683, "grad_norm": 0.57421875, "learning_rate": 5.67799539328906e-07, "loss": 1.4973, "step": 10348 }, { "epoch": 1.7853877339774002, "grad_norm": 0.62109375, "learning_rate": 5.668957661666597e-07, "loss": 1.46, "step": 10349 }, { "epoch": 1.7855602518761322, "grad_norm": 0.61328125, "learning_rate": 5.659926918677172e-07, "loss": 1.3491, "step": 10350 }, { "epoch": 1.7857327697748642, "grad_norm": 0.5625, "learning_rate": 5.650903164989784e-07, "loss": 1.4681, "step": 10351 }, { "epoch": 1.7859052876735961, "grad_norm": 0.5546875, "learning_rate": 5.641886401273056e-07, "loss": 1.4334, "step": 10352 }, { "epoch": 1.786077805572328, "grad_norm": 0.59375, "learning_rate": 5.632876628194917e-07, "loss": 1.3925, "step": 10353 }, { "epoch": 1.78625032347106, "grad_norm": 0.6015625, "learning_rate": 5.623873846422945e-07, "loss": 1.4241, "step": 10354 }, { "epoch": 1.786422841369792, "grad_norm": 0.7421875, "learning_rate": 5.614878056624074e-07, "loss": 1.3811, "step": 10355 }, { "epoch": 1.786595359268524, "grad_norm": 0.5625, "learning_rate": 5.60588925946477e-07, "loss": 1.4115, "step": 10356 }, { "epoch": 1.7867678771672562, "grad_norm": 0.6953125, "learning_rate": 5.596907455611011e-07, "loss": 1.3852, "step": 10357 }, { "epoch": 1.7869403950659881, "grad_norm": 0.56640625, "learning_rate": 5.5879326457282e-07, "loss": 1.4252, "step": 10358 }, { "epoch": 1.78711291296472, "grad_norm": 0.63671875, "learning_rate": 5.578964830481249e-07, "loss": 1.4649, "step": 10359 }, { "epoch": 1.787285430863452, "grad_norm": 0.59375, "learning_rate": 5.570004010534557e-07, "loss": 1.436, "step": 10360 }, { "epoch": 1.7874579487621842, "grad_norm": 0.5859375, "learning_rate": 5.561050186551986e-07, "loss": 1.457, "step": 10361 }, { "epoch": 1.7876304666609162, "grad_norm": 0.5703125, "learning_rate": 5.552103359196914e-07, "loss": 1.4263, "step": 10362 }, { "epoch": 1.7878029845596481, "grad_norm": 0.55859375, "learning_rate": 5.543163529132168e-07, "loss": 1.4285, "step": 10363 }, { "epoch": 1.78797550245838, "grad_norm": 0.60546875, "learning_rate": 5.534230697020027e-07, "loss": 1.4432, "step": 10364 }, { "epoch": 1.788148020357112, "grad_norm": 0.578125, "learning_rate": 5.525304863522363e-07, "loss": 1.394, "step": 10365 }, { "epoch": 1.788320538255844, "grad_norm": 0.6328125, "learning_rate": 5.516386029300391e-07, "loss": 1.4578, "step": 10366 }, { "epoch": 1.788493056154576, "grad_norm": 0.52734375, "learning_rate": 5.507474195014917e-07, "loss": 1.3135, "step": 10367 }, { "epoch": 1.788665574053308, "grad_norm": 0.56640625, "learning_rate": 5.498569361326168e-07, "loss": 1.402, "step": 10368 }, { "epoch": 1.78883809195204, "grad_norm": 0.71484375, "learning_rate": 5.489671528893869e-07, "loss": 1.4344, "step": 10369 }, { "epoch": 1.7890106098507719, "grad_norm": 0.5703125, "learning_rate": 5.480780698377241e-07, "loss": 1.3027, "step": 10370 }, { "epoch": 1.789183127749504, "grad_norm": 0.5625, "learning_rate": 5.471896870434957e-07, "loss": 1.4547, "step": 10371 }, { "epoch": 1.789355645648236, "grad_norm": 0.52734375, "learning_rate": 5.463020045725187e-07, "loss": 1.3488, "step": 10372 }, { "epoch": 1.789528163546968, "grad_norm": 0.52734375, "learning_rate": 5.454150224905586e-07, "loss": 1.4106, "step": 10373 }, { "epoch": 1.7897006814457, "grad_norm": 0.59375, "learning_rate": 5.445287408633304e-07, "loss": 1.3909, "step": 10374 }, { "epoch": 1.7898731993444321, "grad_norm": 0.58984375, "learning_rate": 5.436431597564907e-07, "loss": 1.5508, "step": 10375 }, { "epoch": 1.790045717243164, "grad_norm": 0.5859375, "learning_rate": 5.427582792356545e-07, "loss": 1.4281, "step": 10376 }, { "epoch": 1.790218235141896, "grad_norm": 0.66796875, "learning_rate": 5.418740993663751e-07, "loss": 1.4708, "step": 10377 }, { "epoch": 1.790390753040628, "grad_norm": 0.5625, "learning_rate": 5.409906202141602e-07, "loss": 1.4345, "step": 10378 }, { "epoch": 1.79056327093936, "grad_norm": 0.578125, "learning_rate": 5.401078418444617e-07, "loss": 1.4302, "step": 10379 }, { "epoch": 1.790735788838092, "grad_norm": 0.5859375, "learning_rate": 5.392257643226828e-07, "loss": 1.4859, "step": 10380 }, { "epoch": 1.7909083067368239, "grad_norm": 0.578125, "learning_rate": 5.383443877141737e-07, "loss": 1.4114, "step": 10381 }, { "epoch": 1.7910808246355558, "grad_norm": 0.5859375, "learning_rate": 5.374637120842308e-07, "loss": 1.476, "step": 10382 }, { "epoch": 1.7912533425342878, "grad_norm": 0.59375, "learning_rate": 5.365837374981009e-07, "loss": 1.531, "step": 10383 }, { "epoch": 1.7914258604330198, "grad_norm": 0.58984375, "learning_rate": 5.357044640209796e-07, "loss": 1.4339, "step": 10384 }, { "epoch": 1.791598378331752, "grad_norm": 0.6015625, "learning_rate": 5.348258917180038e-07, "loss": 1.3312, "step": 10385 }, { "epoch": 1.791770896230484, "grad_norm": 0.83203125, "learning_rate": 5.339480206542702e-07, "loss": 1.4726, "step": 10386 }, { "epoch": 1.7919434141292159, "grad_norm": 0.578125, "learning_rate": 5.330708508948123e-07, "loss": 1.3928, "step": 10387 }, { "epoch": 1.792115932027948, "grad_norm": 0.57421875, "learning_rate": 5.321943825046171e-07, "loss": 1.4194, "step": 10388 }, { "epoch": 1.79228844992668, "grad_norm": 0.5625, "learning_rate": 5.313186155486216e-07, "loss": 1.4375, "step": 10389 }, { "epoch": 1.792460967825412, "grad_norm": 0.578125, "learning_rate": 5.304435500917049e-07, "loss": 1.4913, "step": 10390 }, { "epoch": 1.792633485724144, "grad_norm": 0.5703125, "learning_rate": 5.295691861986985e-07, "loss": 1.3996, "step": 10391 }, { "epoch": 1.792806003622876, "grad_norm": 0.578125, "learning_rate": 5.286955239343816e-07, "loss": 1.4013, "step": 10392 }, { "epoch": 1.7929785215216079, "grad_norm": 0.59765625, "learning_rate": 5.278225633634793e-07, "loss": 1.3898, "step": 10393 }, { "epoch": 1.7931510394203398, "grad_norm": 0.578125, "learning_rate": 5.269503045506652e-07, "loss": 1.4631, "step": 10394 }, { "epoch": 1.7933235573190718, "grad_norm": 0.59375, "learning_rate": 5.260787475605656e-07, "loss": 1.4925, "step": 10395 }, { "epoch": 1.7934960752178037, "grad_norm": 0.59375, "learning_rate": 5.252078924577453e-07, "loss": 1.5018, "step": 10396 }, { "epoch": 1.7936685931165357, "grad_norm": 0.6484375, "learning_rate": 5.243377393067284e-07, "loss": 1.3756, "step": 10397 }, { "epoch": 1.7938411110152679, "grad_norm": 0.5625, "learning_rate": 5.234682881719766e-07, "loss": 1.4001, "step": 10398 }, { "epoch": 1.7940136289139998, "grad_norm": 0.58203125, "learning_rate": 5.225995391179061e-07, "loss": 1.419, "step": 10399 }, { "epoch": 1.7941861468127318, "grad_norm": 0.5625, "learning_rate": 5.21731492208879e-07, "loss": 1.3098, "step": 10400 }, { "epoch": 1.7941861468127318, "eval_loss": 1.4070560932159424, "eval_runtime": 10.8247, "eval_samples_per_second": 94.598, "eval_steps_per_second": 23.65, "step": 10400 }, { "epoch": 1.7943586647114638, "grad_norm": 0.59765625, "learning_rate": 5.208641475092069e-07, "loss": 1.3357, "step": 10401 }, { "epoch": 1.794531182610196, "grad_norm": 0.58203125, "learning_rate": 5.199975050831463e-07, "loss": 1.5601, "step": 10402 }, { "epoch": 1.794703700508928, "grad_norm": 0.80859375, "learning_rate": 5.191315649949046e-07, "loss": 1.363, "step": 10403 }, { "epoch": 1.7948762184076599, "grad_norm": 0.58203125, "learning_rate": 5.182663273086364e-07, "loss": 1.38, "step": 10404 }, { "epoch": 1.7950487363063918, "grad_norm": 0.57421875, "learning_rate": 5.174017920884423e-07, "loss": 1.3737, "step": 10405 }, { "epoch": 1.7952212542051238, "grad_norm": 0.5703125, "learning_rate": 5.165379593983755e-07, "loss": 1.5047, "step": 10406 }, { "epoch": 1.7953937721038558, "grad_norm": 0.73046875, "learning_rate": 5.156748293024283e-07, "loss": 1.4924, "step": 10407 }, { "epoch": 1.7955662900025877, "grad_norm": 0.6015625, "learning_rate": 5.148124018645539e-07, "loss": 1.3867, "step": 10408 }, { "epoch": 1.7957388079013197, "grad_norm": 0.55859375, "learning_rate": 5.139506771486414e-07, "loss": 1.3976, "step": 10409 }, { "epoch": 1.7959113258000516, "grad_norm": 0.59375, "learning_rate": 5.130896552185349e-07, "loss": 1.4595, "step": 10410 }, { "epoch": 1.7960838436987836, "grad_norm": 0.578125, "learning_rate": 5.122293361380238e-07, "loss": 1.4081, "step": 10411 }, { "epoch": 1.7962563615975158, "grad_norm": 0.62890625, "learning_rate": 5.113697199708456e-07, "loss": 1.4327, "step": 10412 }, { "epoch": 1.7964288794962477, "grad_norm": 0.578125, "learning_rate": 5.105108067806863e-07, "loss": 1.3137, "step": 10413 }, { "epoch": 1.7966013973949797, "grad_norm": 0.578125, "learning_rate": 5.096525966311794e-07, "loss": 1.4744, "step": 10414 }, { "epoch": 1.7967739152937119, "grad_norm": 0.6015625, "learning_rate": 5.087950895859062e-07, "loss": 1.5153, "step": 10415 }, { "epoch": 1.7969464331924438, "grad_norm": 0.6484375, "learning_rate": 5.079382857083981e-07, "loss": 1.4555, "step": 10416 }, { "epoch": 1.7971189510911758, "grad_norm": 0.5546875, "learning_rate": 5.070821850621277e-07, "loss": 1.3925, "step": 10417 }, { "epoch": 1.7972914689899078, "grad_norm": 0.5625, "learning_rate": 5.062267877105275e-07, "loss": 1.4293, "step": 10418 }, { "epoch": 1.7974639868886397, "grad_norm": 0.61328125, "learning_rate": 5.053720937169648e-07, "loss": 1.3991, "step": 10419 }, { "epoch": 1.7976365047873717, "grad_norm": 0.56640625, "learning_rate": 5.04518103144761e-07, "loss": 1.4303, "step": 10420 }, { "epoch": 1.7978090226861037, "grad_norm": 0.6484375, "learning_rate": 5.036648160571889e-07, "loss": 1.4, "step": 10421 }, { "epoch": 1.7979815405848356, "grad_norm": 0.640625, "learning_rate": 5.028122325174623e-07, "loss": 1.4848, "step": 10422 }, { "epoch": 1.7981540584835676, "grad_norm": 0.6171875, "learning_rate": 5.019603525887462e-07, "loss": 1.4521, "step": 10423 }, { "epoch": 1.7983265763822995, "grad_norm": 0.55859375, "learning_rate": 5.011091763341547e-07, "loss": 1.4574, "step": 10424 }, { "epoch": 1.7984990942810315, "grad_norm": 0.59375, "learning_rate": 5.002587038167461e-07, "loss": 1.3833, "step": 10425 }, { "epoch": 1.7986716121797637, "grad_norm": 0.5859375, "learning_rate": 4.994089350995301e-07, "loss": 1.5584, "step": 10426 }, { "epoch": 1.7988441300784956, "grad_norm": 0.6328125, "learning_rate": 4.985598702454653e-07, "loss": 1.3979, "step": 10427 }, { "epoch": 1.7990166479772276, "grad_norm": 0.625, "learning_rate": 4.977115093174489e-07, "loss": 1.5396, "step": 10428 }, { "epoch": 1.7991891658759598, "grad_norm": 0.6328125, "learning_rate": 4.968638523783398e-07, "loss": 1.5011, "step": 10429 }, { "epoch": 1.7993616837746917, "grad_norm": 0.6015625, "learning_rate": 4.960168994909343e-07, "loss": 1.3286, "step": 10430 }, { "epoch": 1.7995342016734237, "grad_norm": 0.65625, "learning_rate": 4.951706507179788e-07, "loss": 1.3573, "step": 10431 }, { "epoch": 1.7997067195721557, "grad_norm": 0.640625, "learning_rate": 4.943251061221721e-07, "loss": 1.4431, "step": 10432 }, { "epoch": 1.7998792374708876, "grad_norm": 0.58984375, "learning_rate": 4.934802657661553e-07, "loss": 1.3382, "step": 10433 }, { "epoch": 1.8000517553696196, "grad_norm": 0.6640625, "learning_rate": 4.926361297125193e-07, "loss": 1.516, "step": 10434 }, { "epoch": 1.8002242732683515, "grad_norm": 0.55859375, "learning_rate": 4.917926980238041e-07, "loss": 1.3845, "step": 10435 }, { "epoch": 1.8003967911670835, "grad_norm": 0.58984375, "learning_rate": 4.909499707624966e-07, "loss": 1.3371, "step": 10436 }, { "epoch": 1.8005693090658155, "grad_norm": 0.6171875, "learning_rate": 4.901079479910287e-07, "loss": 1.462, "step": 10437 }, { "epoch": 1.8007418269645474, "grad_norm": 0.58984375, "learning_rate": 4.892666297717874e-07, "loss": 1.4135, "step": 10438 }, { "epoch": 1.8009143448632796, "grad_norm": 0.578125, "learning_rate": 4.884260161670972e-07, "loss": 1.4871, "step": 10439 }, { "epoch": 1.8010868627620116, "grad_norm": 0.6328125, "learning_rate": 4.875861072392408e-07, "loss": 1.3797, "step": 10440 }, { "epoch": 1.8012593806607435, "grad_norm": 0.58203125, "learning_rate": 4.86746903050439e-07, "loss": 1.4785, "step": 10441 }, { "epoch": 1.8014318985594755, "grad_norm": 0.64453125, "learning_rate": 4.859084036628714e-07, "loss": 1.5303, "step": 10442 }, { "epoch": 1.8016044164582077, "grad_norm": 0.65234375, "learning_rate": 4.850706091386548e-07, "loss": 1.4003, "step": 10443 }, { "epoch": 1.8017769343569396, "grad_norm": 1.3046875, "learning_rate": 4.842335195398595e-07, "loss": 1.3405, "step": 10444 }, { "epoch": 1.8019494522556716, "grad_norm": 0.63671875, "learning_rate": 4.833971349285027e-07, "loss": 1.3997, "step": 10445 }, { "epoch": 1.8021219701544036, "grad_norm": 0.59765625, "learning_rate": 4.825614553665481e-07, "loss": 1.4876, "step": 10446 }, { "epoch": 1.8022944880531355, "grad_norm": 0.56640625, "learning_rate": 4.817264809159084e-07, "loss": 1.41, "step": 10447 }, { "epoch": 1.8024670059518675, "grad_norm": 0.58203125, "learning_rate": 4.808922116384451e-07, "loss": 1.4368, "step": 10448 }, { "epoch": 1.8026395238505994, "grad_norm": 0.60546875, "learning_rate": 4.800586475959623e-07, "loss": 1.4177, "step": 10449 }, { "epoch": 1.8028120417493314, "grad_norm": 0.578125, "learning_rate": 4.792257888502217e-07, "loss": 1.4365, "step": 10450 }, { "epoch": 1.8029845596480634, "grad_norm": 0.62109375, "learning_rate": 4.783936354629215e-07, "loss": 1.3633, "step": 10451 }, { "epoch": 1.8031570775467953, "grad_norm": 0.5859375, "learning_rate": 4.775621874957126e-07, "loss": 1.4407, "step": 10452 }, { "epoch": 1.8033295954455275, "grad_norm": 0.56640625, "learning_rate": 4.7673144501019897e-07, "loss": 1.3938, "step": 10453 }, { "epoch": 1.8035021133442595, "grad_norm": 0.58203125, "learning_rate": 4.759014080679225e-07, "loss": 1.4431, "step": 10454 }, { "epoch": 1.8036746312429914, "grad_norm": 0.5625, "learning_rate": 4.7507207673037956e-07, "loss": 1.4637, "step": 10455 }, { "epoch": 1.8038471491417236, "grad_norm": 0.6484375, "learning_rate": 4.7424345105901105e-07, "loss": 1.459, "step": 10456 }, { "epoch": 1.8040196670404556, "grad_norm": 0.625, "learning_rate": 4.7341553111520776e-07, "loss": 1.4838, "step": 10457 }, { "epoch": 1.8041921849391875, "grad_norm": 0.546875, "learning_rate": 4.7258831696030624e-07, "loss": 1.3738, "step": 10458 }, { "epoch": 1.8043647028379195, "grad_norm": 0.578125, "learning_rate": 4.7176180865559416e-07, "loss": 1.393, "step": 10459 }, { "epoch": 1.8045372207366515, "grad_norm": 0.55078125, "learning_rate": 4.709360062622992e-07, "loss": 1.4474, "step": 10460 }, { "epoch": 1.8047097386353834, "grad_norm": 0.59375, "learning_rate": 4.701109098416079e-07, "loss": 1.5006, "step": 10461 }, { "epoch": 1.8048822565341154, "grad_norm": 0.6171875, "learning_rate": 4.6928651945464585e-07, "loss": 1.4166, "step": 10462 }, { "epoch": 1.8050547744328473, "grad_norm": 0.58203125, "learning_rate": 4.6846283516248756e-07, "loss": 1.5211, "step": 10463 }, { "epoch": 1.8052272923315793, "grad_norm": 0.55078125, "learning_rate": 4.6763985702615864e-07, "loss": 1.4285, "step": 10464 }, { "epoch": 1.8053998102303113, "grad_norm": 0.546875, "learning_rate": 4.6681758510663035e-07, "loss": 1.4389, "step": 10465 }, { "epoch": 1.8055723281290432, "grad_norm": 0.6015625, "learning_rate": 4.6599601946482164e-07, "loss": 1.4591, "step": 10466 }, { "epoch": 1.8057448460277754, "grad_norm": 0.6015625, "learning_rate": 4.651751601615984e-07, "loss": 1.4747, "step": 10467 }, { "epoch": 1.8059173639265074, "grad_norm": 0.6484375, "learning_rate": 4.6435500725777627e-07, "loss": 1.3947, "step": 10468 }, { "epoch": 1.8060898818252393, "grad_norm": 0.578125, "learning_rate": 4.635355608141168e-07, "loss": 1.4092, "step": 10469 }, { "epoch": 1.8062623997239715, "grad_norm": 0.61328125, "learning_rate": 4.6271682089132796e-07, "loss": 1.3991, "step": 10470 }, { "epoch": 1.8064349176227035, "grad_norm": 0.59765625, "learning_rate": 4.618987875500702e-07, "loss": 1.4512, "step": 10471 }, { "epoch": 1.8066074355214354, "grad_norm": 0.578125, "learning_rate": 4.610814608509484e-07, "loss": 1.3993, "step": 10472 }, { "epoch": 1.8067799534201674, "grad_norm": 0.55078125, "learning_rate": 4.602648408545107e-07, "loss": 1.4226, "step": 10473 }, { "epoch": 1.8069524713188994, "grad_norm": 0.63671875, "learning_rate": 4.594489276212633e-07, "loss": 1.3826, "step": 10474 }, { "epoch": 1.8071249892176313, "grad_norm": 0.60546875, "learning_rate": 4.5863372121165095e-07, "loss": 1.4057, "step": 10475 }, { "epoch": 1.8072975071163633, "grad_norm": 0.57421875, "learning_rate": 4.5781922168606883e-07, "loss": 1.5323, "step": 10476 }, { "epoch": 1.8074700250150952, "grad_norm": 0.61328125, "learning_rate": 4.570054291048609e-07, "loss": 1.3534, "step": 10477 }, { "epoch": 1.8076425429138272, "grad_norm": 0.57421875, "learning_rate": 4.561923435283189e-07, "loss": 1.4982, "step": 10478 }, { "epoch": 1.8078150608125592, "grad_norm": 0.5546875, "learning_rate": 4.553799650166801e-07, "loss": 1.376, "step": 10479 }, { "epoch": 1.8079875787112913, "grad_norm": 0.6015625, "learning_rate": 4.545682936301321e-07, "loss": 1.4337, "step": 10480 }, { "epoch": 1.8081600966100233, "grad_norm": 0.671875, "learning_rate": 4.5375732942880557e-07, "loss": 1.4383, "step": 10481 }, { "epoch": 1.8083326145087553, "grad_norm": 0.5859375, "learning_rate": 4.529470724727858e-07, "loss": 1.4463, "step": 10482 }, { "epoch": 1.8085051324074872, "grad_norm": 0.59765625, "learning_rate": 4.5213752282209924e-07, "loss": 1.4528, "step": 10483 }, { "epoch": 1.8086776503062194, "grad_norm": 0.66796875, "learning_rate": 4.513286805367212e-07, "loss": 1.4562, "step": 10484 }, { "epoch": 1.8088501682049514, "grad_norm": 0.56640625, "learning_rate": 4.505205456765793e-07, "loss": 1.384, "step": 10485 }, { "epoch": 1.8090226861036833, "grad_norm": 0.625, "learning_rate": 4.497131183015424e-07, "loss": 1.3599, "step": 10486 }, { "epoch": 1.8091952040024153, "grad_norm": 0.57421875, "learning_rate": 4.4890639847143035e-07, "loss": 1.4611, "step": 10487 }, { "epoch": 1.8093677219011473, "grad_norm": 0.58203125, "learning_rate": 4.4810038624601095e-07, "loss": 1.4362, "step": 10488 }, { "epoch": 1.8095402397998792, "grad_norm": 0.60546875, "learning_rate": 4.472950816849975e-07, "loss": 1.4141, "step": 10489 }, { "epoch": 1.8097127576986112, "grad_norm": 0.5859375, "learning_rate": 4.464904848480522e-07, "loss": 1.4283, "step": 10490 }, { "epoch": 1.8098852755973431, "grad_norm": 0.57421875, "learning_rate": 4.4568659579478647e-07, "loss": 1.4313, "step": 10491 }, { "epoch": 1.810057793496075, "grad_norm": 0.58203125, "learning_rate": 4.4488341458475247e-07, "loss": 1.4505, "step": 10492 }, { "epoch": 1.810230311394807, "grad_norm": 0.60546875, "learning_rate": 4.440809412774616e-07, "loss": 1.3547, "step": 10493 }, { "epoch": 1.8104028292935392, "grad_norm": 0.609375, "learning_rate": 4.432791759323618e-07, "loss": 1.5555, "step": 10494 }, { "epoch": 1.8105753471922712, "grad_norm": 0.63671875, "learning_rate": 4.4247811860885335e-07, "loss": 1.394, "step": 10495 }, { "epoch": 1.8107478650910032, "grad_norm": 0.69140625, "learning_rate": 4.416777693662844e-07, "loss": 1.3825, "step": 10496 }, { "epoch": 1.8109203829897353, "grad_norm": 0.625, "learning_rate": 4.408781282639485e-07, "loss": 1.586, "step": 10497 }, { "epoch": 1.8110929008884673, "grad_norm": 0.7265625, "learning_rate": 4.4007919536109057e-07, "loss": 1.392, "step": 10498 }, { "epoch": 1.8112654187871993, "grad_norm": 0.58203125, "learning_rate": 4.392809707168977e-07, "loss": 1.4427, "step": 10499 }, { "epoch": 1.8114379366859312, "grad_norm": 0.5625, "learning_rate": 4.384834543905092e-07, "loss": 1.4485, "step": 10500 }, { "epoch": 1.8114379366859312, "eval_loss": 1.4070926904678345, "eval_runtime": 11.9829, "eval_samples_per_second": 85.455, "eval_steps_per_second": 21.364, "step": 10500 }, { "epoch": 1.8116104545846632, "grad_norm": 0.58984375, "learning_rate": 4.37686646441009e-07, "loss": 1.3746, "step": 10501 }, { "epoch": 1.8117829724833951, "grad_norm": 0.63671875, "learning_rate": 4.3689054692743094e-07, "loss": 1.4008, "step": 10502 }, { "epoch": 1.811955490382127, "grad_norm": 0.625, "learning_rate": 4.360951559087534e-07, "loss": 1.4617, "step": 10503 }, { "epoch": 1.812128008280859, "grad_norm": 0.5703125, "learning_rate": 4.353004734439059e-07, "loss": 1.3902, "step": 10504 }, { "epoch": 1.812300526179591, "grad_norm": 0.625, "learning_rate": 4.345064995917603e-07, "loss": 1.3388, "step": 10505 }, { "epoch": 1.812473044078323, "grad_norm": 0.59375, "learning_rate": 4.337132344111439e-07, "loss": 1.4352, "step": 10506 }, { "epoch": 1.812645561977055, "grad_norm": 0.59765625, "learning_rate": 4.3292067796082304e-07, "loss": 1.4476, "step": 10507 }, { "epoch": 1.8128180798757871, "grad_norm": 0.5625, "learning_rate": 4.3212883029951523e-07, "loss": 1.3818, "step": 10508 }, { "epoch": 1.812990597774519, "grad_norm": 0.59765625, "learning_rate": 4.313376914858869e-07, "loss": 1.4204, "step": 10509 }, { "epoch": 1.813163115673251, "grad_norm": 0.61328125, "learning_rate": 4.3054726157855e-07, "loss": 1.2998, "step": 10510 }, { "epoch": 1.8133356335719832, "grad_norm": 0.59765625, "learning_rate": 4.2975754063606544e-07, "loss": 1.376, "step": 10511 }, { "epoch": 1.8135081514707152, "grad_norm": 0.5859375, "learning_rate": 4.289685287169398e-07, "loss": 1.3711, "step": 10512 }, { "epoch": 1.8136806693694472, "grad_norm": 0.57421875, "learning_rate": 4.281802258796275e-07, "loss": 1.429, "step": 10513 }, { "epoch": 1.8138531872681791, "grad_norm": 1.8125, "learning_rate": 4.273926321825328e-07, "loss": 1.4026, "step": 10514 }, { "epoch": 1.814025705166911, "grad_norm": 0.69921875, "learning_rate": 4.266057476840024e-07, "loss": 1.5232, "step": 10515 }, { "epoch": 1.814198223065643, "grad_norm": 0.6015625, "learning_rate": 4.258195724423353e-07, "loss": 1.4122, "step": 10516 }, { "epoch": 1.814370740964375, "grad_norm": 0.54296875, "learning_rate": 4.250341065157793e-07, "loss": 1.4027, "step": 10517 }, { "epoch": 1.814543258863107, "grad_norm": 0.5546875, "learning_rate": 4.242493499625222e-07, "loss": 1.3615, "step": 10518 }, { "epoch": 1.814715776761839, "grad_norm": 0.63671875, "learning_rate": 4.234653028407054e-07, "loss": 1.4586, "step": 10519 }, { "epoch": 1.8148882946605709, "grad_norm": 1.109375, "learning_rate": 4.2268196520841574e-07, "loss": 1.4106, "step": 10520 }, { "epoch": 1.815060812559303, "grad_norm": 0.56640625, "learning_rate": 4.218993371236879e-07, "loss": 1.4377, "step": 10521 }, { "epoch": 1.815233330458035, "grad_norm": 0.63671875, "learning_rate": 4.211174186445033e-07, "loss": 1.4085, "step": 10522 }, { "epoch": 1.815405848356767, "grad_norm": 0.58984375, "learning_rate": 4.203362098287944e-07, "loss": 1.3718, "step": 10523 }, { "epoch": 1.815578366255499, "grad_norm": 0.61328125, "learning_rate": 4.195557107344328e-07, "loss": 1.4049, "step": 10524 }, { "epoch": 1.8157508841542311, "grad_norm": 0.72265625, "learning_rate": 4.187759214192477e-07, "loss": 1.5663, "step": 10525 }, { "epoch": 1.815923402052963, "grad_norm": 0.9453125, "learning_rate": 4.179968419410063e-07, "loss": 1.41, "step": 10526 }, { "epoch": 1.816095919951695, "grad_norm": 0.55078125, "learning_rate": 4.172184723574324e-07, "loss": 1.3764, "step": 10527 }, { "epoch": 1.816268437850427, "grad_norm": 0.56640625, "learning_rate": 4.1644081272618874e-07, "loss": 1.406, "step": 10528 }, { "epoch": 1.816440955749159, "grad_norm": 0.59375, "learning_rate": 4.1566386310489035e-07, "loss": 1.4075, "step": 10529 }, { "epoch": 1.816613473647891, "grad_norm": 0.60546875, "learning_rate": 4.14887623551099e-07, "loss": 1.3793, "step": 10530 }, { "epoch": 1.816785991546623, "grad_norm": 0.61328125, "learning_rate": 4.141120941223231e-07, "loss": 1.441, "step": 10531 }, { "epoch": 1.8169585094453549, "grad_norm": 0.578125, "learning_rate": 4.133372748760178e-07, "loss": 1.4461, "step": 10532 }, { "epoch": 1.8171310273440868, "grad_norm": 0.796875, "learning_rate": 4.1256316586958835e-07, "loss": 1.3624, "step": 10533 }, { "epoch": 1.8173035452428188, "grad_norm": 0.59375, "learning_rate": 4.117897671603843e-07, "loss": 1.3699, "step": 10534 }, { "epoch": 1.817476063141551, "grad_norm": 0.60546875, "learning_rate": 4.110170788057044e-07, "loss": 1.3984, "step": 10535 }, { "epoch": 1.817648581040283, "grad_norm": 0.60546875, "learning_rate": 4.10245100862795e-07, "loss": 1.4331, "step": 10536 }, { "epoch": 1.817821098939015, "grad_norm": 0.58984375, "learning_rate": 4.0947383338884594e-07, "loss": 1.4868, "step": 10537 }, { "epoch": 1.817993616837747, "grad_norm": 0.62890625, "learning_rate": 4.087032764410015e-07, "loss": 1.423, "step": 10538 }, { "epoch": 1.818166134736479, "grad_norm": 0.82421875, "learning_rate": 4.0793343007634713e-07, "loss": 1.4135, "step": 10539 }, { "epoch": 1.818338652635211, "grad_norm": 0.60546875, "learning_rate": 4.071642943519183e-07, "loss": 1.404, "step": 10540 }, { "epoch": 1.818511170533943, "grad_norm": 0.62109375, "learning_rate": 4.063958693246961e-07, "loss": 1.4487, "step": 10541 }, { "epoch": 1.818683688432675, "grad_norm": 0.58984375, "learning_rate": 4.056281550516128e-07, "loss": 1.4732, "step": 10542 }, { "epoch": 1.8188562063314069, "grad_norm": 0.62890625, "learning_rate": 4.0486115158954396e-07, "loss": 1.4363, "step": 10543 }, { "epoch": 1.8190287242301388, "grad_norm": 0.60546875, "learning_rate": 4.040948589953153e-07, "loss": 1.4726, "step": 10544 }, { "epoch": 1.8192012421288708, "grad_norm": 0.5546875, "learning_rate": 4.0332927732569473e-07, "loss": 1.4246, "step": 10545 }, { "epoch": 1.8193737600276028, "grad_norm": 0.57421875, "learning_rate": 4.025644066374068e-07, "loss": 1.2917, "step": 10546 }, { "epoch": 1.8195462779263347, "grad_norm": 0.55078125, "learning_rate": 4.0180024698711404e-07, "loss": 1.3544, "step": 10547 }, { "epoch": 1.819718795825067, "grad_norm": 0.59765625, "learning_rate": 4.0103679843142895e-07, "loss": 1.4888, "step": 10548 }, { "epoch": 1.8198913137237989, "grad_norm": 0.58984375, "learning_rate": 4.002740610269185e-07, "loss": 1.3407, "step": 10549 }, { "epoch": 1.8200638316225308, "grad_norm": 0.60546875, "learning_rate": 3.995120348300863e-07, "loss": 1.3905, "step": 10550 }, { "epoch": 1.8202363495212628, "grad_norm": 0.65625, "learning_rate": 3.9875071989738943e-07, "loss": 1.432, "step": 10551 }, { "epoch": 1.820408867419995, "grad_norm": 0.56640625, "learning_rate": 3.9799011628522953e-07, "loss": 1.4171, "step": 10552 }, { "epoch": 1.820581385318727, "grad_norm": 0.6484375, "learning_rate": 3.9723022404995926e-07, "loss": 1.5395, "step": 10553 }, { "epoch": 1.820753903217459, "grad_norm": 0.5546875, "learning_rate": 3.964710432478736e-07, "loss": 1.436, "step": 10554 }, { "epoch": 1.8209264211161909, "grad_norm": 0.6171875, "learning_rate": 3.957125739352208e-07, "loss": 1.5376, "step": 10555 }, { "epoch": 1.8210989390149228, "grad_norm": 0.60546875, "learning_rate": 3.949548161681882e-07, "loss": 1.4383, "step": 10556 }, { "epoch": 1.8212714569136548, "grad_norm": 0.57421875, "learning_rate": 3.941977700029198e-07, "loss": 1.5523, "step": 10557 }, { "epoch": 1.8214439748123867, "grad_norm": 0.58984375, "learning_rate": 3.934414354954985e-07, "loss": 1.4397, "step": 10558 }, { "epoch": 1.8216164927111187, "grad_norm": 0.62890625, "learning_rate": 3.9268581270196284e-07, "loss": 1.4584, "step": 10559 }, { "epoch": 1.8217890106098507, "grad_norm": 0.609375, "learning_rate": 3.919309016782902e-07, "loss": 1.3746, "step": 10560 }, { "epoch": 1.8219615285085826, "grad_norm": 0.625, "learning_rate": 3.911767024804092e-07, "loss": 1.3883, "step": 10561 }, { "epoch": 1.8221340464073148, "grad_norm": 0.5390625, "learning_rate": 3.9042321516419844e-07, "loss": 1.4587, "step": 10562 }, { "epoch": 1.8223065643060468, "grad_norm": 0.54296875, "learning_rate": 3.896704397854778e-07, "loss": 1.2624, "step": 10563 }, { "epoch": 1.8224790822047787, "grad_norm": 0.60546875, "learning_rate": 3.889183764000204e-07, "loss": 1.3084, "step": 10564 }, { "epoch": 1.822651600103511, "grad_norm": 0.56640625, "learning_rate": 3.8816702506354163e-07, "loss": 1.3891, "step": 10565 }, { "epoch": 1.8228241180022429, "grad_norm": 0.62890625, "learning_rate": 3.8741638583170814e-07, "loss": 1.4988, "step": 10566 }, { "epoch": 1.8229966359009748, "grad_norm": 0.55859375, "learning_rate": 3.866664587601299e-07, "loss": 1.4436, "step": 10567 }, { "epoch": 1.8231691537997068, "grad_norm": 0.58203125, "learning_rate": 3.8591724390436904e-07, "loss": 1.4547, "step": 10568 }, { "epoch": 1.8233416716984387, "grad_norm": 0.55859375, "learning_rate": 3.851687413199279e-07, "loss": 1.3426, "step": 10569 }, { "epoch": 1.8235141895971707, "grad_norm": 0.5625, "learning_rate": 3.844209510622643e-07, "loss": 1.4252, "step": 10570 }, { "epoch": 1.8236867074959027, "grad_norm": 0.59765625, "learning_rate": 3.836738731867773e-07, "loss": 1.4587, "step": 10571 }, { "epoch": 1.8238592253946346, "grad_norm": 0.609375, "learning_rate": 3.8292750774881483e-07, "loss": 1.5709, "step": 10572 }, { "epoch": 1.8240317432933666, "grad_norm": 0.58203125, "learning_rate": 3.8218185480367264e-07, "loss": 1.3629, "step": 10573 }, { "epoch": 1.8242042611920986, "grad_norm": 0.53515625, "learning_rate": 3.814369144065944e-07, "loss": 1.3875, "step": 10574 }, { "epoch": 1.8243767790908305, "grad_norm": 0.59375, "learning_rate": 3.8069268661276916e-07, "loss": 1.452, "step": 10575 }, { "epoch": 1.8245492969895627, "grad_norm": 0.56640625, "learning_rate": 3.79949171477334e-07, "loss": 1.4527, "step": 10576 }, { "epoch": 1.8247218148882947, "grad_norm": 0.56640625, "learning_rate": 3.792063690553716e-07, "loss": 1.4349, "step": 10577 }, { "epoch": 1.8248943327870266, "grad_norm": 0.65625, "learning_rate": 3.7846427940191663e-07, "loss": 1.4183, "step": 10578 }, { "epoch": 1.8250668506857588, "grad_norm": 0.56640625, "learning_rate": 3.777229025719453e-07, "loss": 1.467, "step": 10579 }, { "epoch": 1.8252393685844908, "grad_norm": 0.6328125, "learning_rate": 3.7698223862038254e-07, "loss": 1.3971, "step": 10580 }, { "epoch": 1.8254118864832227, "grad_norm": 0.55859375, "learning_rate": 3.7624228760210545e-07, "loss": 1.3448, "step": 10581 }, { "epoch": 1.8255844043819547, "grad_norm": 0.5625, "learning_rate": 3.755030495719303e-07, "loss": 1.3441, "step": 10582 }, { "epoch": 1.8257569222806866, "grad_norm": 0.69921875, "learning_rate": 3.7476452458462654e-07, "loss": 1.328, "step": 10583 }, { "epoch": 1.8259294401794186, "grad_norm": 0.5546875, "learning_rate": 3.740267126949071e-07, "loss": 1.3725, "step": 10584 }, { "epoch": 1.8261019580781506, "grad_norm": 0.5625, "learning_rate": 3.732896139574349e-07, "loss": 1.4206, "step": 10585 }, { "epoch": 1.8262744759768825, "grad_norm": 0.58984375, "learning_rate": 3.7255322842681963e-07, "loss": 1.5134, "step": 10586 }, { "epoch": 1.8264469938756145, "grad_norm": 0.609375, "learning_rate": 3.718175561576154e-07, "loss": 1.4835, "step": 10587 }, { "epoch": 1.8266195117743464, "grad_norm": 0.59765625, "learning_rate": 3.710825972043264e-07, "loss": 1.4358, "step": 10588 }, { "epoch": 1.8267920296730786, "grad_norm": 0.59375, "learning_rate": 3.7034835162140347e-07, "loss": 1.5118, "step": 10589 }, { "epoch": 1.8269645475718106, "grad_norm": 0.578125, "learning_rate": 3.6961481946324205e-07, "loss": 1.5324, "step": 10590 }, { "epoch": 1.8271370654705426, "grad_norm": 0.5703125, "learning_rate": 3.688820007841898e-07, "loss": 1.4644, "step": 10591 }, { "epoch": 1.8273095833692745, "grad_norm": 0.59765625, "learning_rate": 3.6814989563853654e-07, "loss": 1.4278, "step": 10592 }, { "epoch": 1.8274821012680067, "grad_norm": 0.5703125, "learning_rate": 3.6741850408052114e-07, "loss": 1.4077, "step": 10593 }, { "epoch": 1.8276546191667387, "grad_norm": 0.65625, "learning_rate": 3.666878261643303e-07, "loss": 1.4508, "step": 10594 }, { "epoch": 1.8278271370654706, "grad_norm": 0.625, "learning_rate": 3.659578619440962e-07, "loss": 1.4561, "step": 10595 }, { "epoch": 1.8279996549642026, "grad_norm": 0.65234375, "learning_rate": 3.652286114739012e-07, "loss": 1.376, "step": 10596 }, { "epoch": 1.8281721728629345, "grad_norm": 0.56640625, "learning_rate": 3.645000748077709e-07, "loss": 1.4252, "step": 10597 }, { "epoch": 1.8283446907616665, "grad_norm": 0.60546875, "learning_rate": 3.6377225199968003e-07, "loss": 1.4235, "step": 10598 }, { "epoch": 1.8285172086603985, "grad_norm": 0.57421875, "learning_rate": 3.6304514310355086e-07, "loss": 1.4719, "step": 10599 }, { "epoch": 1.8286897265591304, "grad_norm": 0.5390625, "learning_rate": 3.6231874817325375e-07, "loss": 1.4056, "step": 10600 }, { "epoch": 1.8286897265591304, "eval_loss": 1.4070792198181152, "eval_runtime": 10.8107, "eval_samples_per_second": 94.721, "eval_steps_per_second": 23.68, "step": 10600 }, { "epoch": 1.8288622444578624, "grad_norm": 0.65625, "learning_rate": 3.615930672626e-07, "loss": 1.4744, "step": 10601 }, { "epoch": 1.8290347623565943, "grad_norm": 0.55859375, "learning_rate": 3.608681004253578e-07, "loss": 1.3409, "step": 10602 }, { "epoch": 1.8292072802553265, "grad_norm": 0.5859375, "learning_rate": 3.60143847715233e-07, "loss": 1.4295, "step": 10603 }, { "epoch": 1.8293797981540585, "grad_norm": 0.57421875, "learning_rate": 3.594203091858861e-07, "loss": 1.4565, "step": 10604 }, { "epoch": 1.8295523160527905, "grad_norm": 0.5546875, "learning_rate": 3.586974848909186e-07, "loss": 1.3717, "step": 10605 }, { "epoch": 1.8297248339515226, "grad_norm": 0.60546875, "learning_rate": 3.5797537488388326e-07, "loss": 1.5116, "step": 10606 }, { "epoch": 1.8298973518502546, "grad_norm": 0.59375, "learning_rate": 3.572539792182783e-07, "loss": 1.4081, "step": 10607 }, { "epoch": 1.8300698697489866, "grad_norm": 0.55859375, "learning_rate": 3.5653329794755e-07, "loss": 1.4567, "step": 10608 }, { "epoch": 1.8302423876477185, "grad_norm": 0.57421875, "learning_rate": 3.558133311250889e-07, "loss": 1.3465, "step": 10609 }, { "epoch": 1.8304149055464505, "grad_norm": 0.609375, "learning_rate": 3.5509407880423783e-07, "loss": 1.4702, "step": 10610 }, { "epoch": 1.8305874234451824, "grad_norm": 0.58984375, "learning_rate": 3.5437554103827985e-07, "loss": 1.4834, "step": 10611 }, { "epoch": 1.8307599413439144, "grad_norm": 0.609375, "learning_rate": 3.53657717880449e-07, "loss": 1.4094, "step": 10612 }, { "epoch": 1.8309324592426464, "grad_norm": 0.5859375, "learning_rate": 3.5294060938393046e-07, "loss": 1.4454, "step": 10613 }, { "epoch": 1.8311049771413783, "grad_norm": 0.56640625, "learning_rate": 3.522242156018474e-07, "loss": 1.48, "step": 10614 }, { "epoch": 1.8312774950401103, "grad_norm": 0.73046875, "learning_rate": 3.5150853658727837e-07, "loss": 1.3688, "step": 10615 }, { "epoch": 1.8314500129388422, "grad_norm": 0.67578125, "learning_rate": 3.5079357239324205e-07, "loss": 1.3982, "step": 10616 }, { "epoch": 1.8316225308375744, "grad_norm": 0.58984375, "learning_rate": 3.500793230727095e-07, "loss": 1.4082, "step": 10617 }, { "epoch": 1.8317950487363064, "grad_norm": 0.58203125, "learning_rate": 3.493657886785962e-07, "loss": 1.3345, "step": 10618 }, { "epoch": 1.8319675666350383, "grad_norm": 0.54296875, "learning_rate": 3.486529692637641e-07, "loss": 1.3584, "step": 10619 }, { "epoch": 1.8321400845337705, "grad_norm": 0.73828125, "learning_rate": 3.4794086488102564e-07, "loss": 1.3406, "step": 10620 }, { "epoch": 1.8323126024325025, "grad_norm": 0.57421875, "learning_rate": 3.4722947558313737e-07, "loss": 1.4438, "step": 10621 }, { "epoch": 1.8324851203312345, "grad_norm": 0.59375, "learning_rate": 3.4651880142280047e-07, "loss": 1.3952, "step": 10622 }, { "epoch": 1.8326576382299664, "grad_norm": 0.5859375, "learning_rate": 3.4580884245267064e-07, "loss": 1.3912, "step": 10623 }, { "epoch": 1.8328301561286984, "grad_norm": 0.5625, "learning_rate": 3.450995987253436e-07, "loss": 1.4326, "step": 10624 }, { "epoch": 1.8330026740274303, "grad_norm": 0.61328125, "learning_rate": 3.443910702933639e-07, "loss": 1.4591, "step": 10625 }, { "epoch": 1.8331751919261623, "grad_norm": 0.67578125, "learning_rate": 3.436832572092264e-07, "loss": 1.357, "step": 10626 }, { "epoch": 1.8333477098248943, "grad_norm": 0.57421875, "learning_rate": 3.429761595253667e-07, "loss": 1.3664, "step": 10627 }, { "epoch": 1.8335202277236262, "grad_norm": 0.5546875, "learning_rate": 3.422697772941741e-07, "loss": 1.433, "step": 10628 }, { "epoch": 1.8336927456223582, "grad_norm": 0.6875, "learning_rate": 3.415641105679801e-07, "loss": 1.4772, "step": 10629 }, { "epoch": 1.8338652635210904, "grad_norm": 0.6328125, "learning_rate": 3.408591593990662e-07, "loss": 1.5794, "step": 10630 }, { "epoch": 1.8340377814198223, "grad_norm": 0.5625, "learning_rate": 3.401549238396584e-07, "loss": 1.3741, "step": 10631 }, { "epoch": 1.8342102993185543, "grad_norm": 0.546875, "learning_rate": 3.3945140394193276e-07, "loss": 1.4539, "step": 10632 }, { "epoch": 1.8343828172172862, "grad_norm": 0.61328125, "learning_rate": 3.3874859975800644e-07, "loss": 1.3585, "step": 10633 }, { "epoch": 1.8345553351160184, "grad_norm": 0.57421875, "learning_rate": 3.3804651133995226e-07, "loss": 1.4456, "step": 10634 }, { "epoch": 1.8347278530147504, "grad_norm": 0.671875, "learning_rate": 3.3734513873978193e-07, "loss": 1.4126, "step": 10635 }, { "epoch": 1.8349003709134823, "grad_norm": 0.58984375, "learning_rate": 3.3664448200945943e-07, "loss": 1.3672, "step": 10636 }, { "epoch": 1.8350728888122143, "grad_norm": 0.62109375, "learning_rate": 3.3594454120089216e-07, "loss": 1.4352, "step": 10637 }, { "epoch": 1.8352454067109463, "grad_norm": 0.8046875, "learning_rate": 3.352453163659386e-07, "loss": 1.3739, "step": 10638 }, { "epoch": 1.8354179246096782, "grad_norm": 0.58984375, "learning_rate": 3.3454680755639847e-07, "loss": 1.5121, "step": 10639 }, { "epoch": 1.8355904425084102, "grad_norm": 0.62890625, "learning_rate": 3.3384901482402585e-07, "loss": 1.3696, "step": 10640 }, { "epoch": 1.8357629604071422, "grad_norm": 0.578125, "learning_rate": 3.3315193822051283e-07, "loss": 1.3752, "step": 10641 }, { "epoch": 1.8359354783058741, "grad_norm": 0.59375, "learning_rate": 3.3245557779750693e-07, "loss": 1.4764, "step": 10642 }, { "epoch": 1.836107996204606, "grad_norm": 0.59765625, "learning_rate": 3.3175993360659684e-07, "loss": 1.4615, "step": 10643 }, { "epoch": 1.8362805141033383, "grad_norm": 0.5703125, "learning_rate": 3.310650056993192e-07, "loss": 1.4907, "step": 10644 }, { "epoch": 1.8364530320020702, "grad_norm": 0.5859375, "learning_rate": 3.3037079412716276e-07, "loss": 1.4502, "step": 10645 }, { "epoch": 1.8366255499008022, "grad_norm": 0.6171875, "learning_rate": 3.296772989415542e-07, "loss": 1.461, "step": 10646 }, { "epoch": 1.8367980677995344, "grad_norm": 0.61328125, "learning_rate": 3.2898452019387685e-07, "loss": 1.4372, "step": 10647 }, { "epoch": 1.8369705856982663, "grad_norm": 0.63671875, "learning_rate": 3.2829245793545186e-07, "loss": 1.5597, "step": 10648 }, { "epoch": 1.8371431035969983, "grad_norm": 6.84375, "learning_rate": 3.2760111221755375e-07, "loss": 1.4422, "step": 10649 }, { "epoch": 1.8373156214957302, "grad_norm": 0.578125, "learning_rate": 3.269104830914016e-07, "loss": 1.4101, "step": 10650 }, { "epoch": 1.8374881393944622, "grad_norm": 0.5703125, "learning_rate": 3.2622057060816004e-07, "loss": 1.4499, "step": 10651 }, { "epoch": 1.8376606572931942, "grad_norm": 0.609375, "learning_rate": 3.255313748189437e-07, "loss": 1.4314, "step": 10652 }, { "epoch": 1.8378331751919261, "grad_norm": 1.2578125, "learning_rate": 3.2484289577481286e-07, "loss": 1.34, "step": 10653 }, { "epoch": 1.838005693090658, "grad_norm": 0.62109375, "learning_rate": 3.241551335267712e-07, "loss": 1.3133, "step": 10654 }, { "epoch": 1.83817821098939, "grad_norm": 0.5703125, "learning_rate": 3.234680881257779e-07, "loss": 1.438, "step": 10655 }, { "epoch": 1.838350728888122, "grad_norm": 1.1953125, "learning_rate": 3.2278175962272783e-07, "loss": 1.4366, "step": 10656 }, { "epoch": 1.838523246786854, "grad_norm": 0.56640625, "learning_rate": 3.2209614806847256e-07, "loss": 1.4324, "step": 10657 }, { "epoch": 1.8386957646855862, "grad_norm": 0.609375, "learning_rate": 3.2141125351380363e-07, "loss": 1.4163, "step": 10658 }, { "epoch": 1.8388682825843181, "grad_norm": 0.5859375, "learning_rate": 3.20727076009465e-07, "loss": 1.4913, "step": 10659 }, { "epoch": 1.83904080048305, "grad_norm": 0.55078125, "learning_rate": 3.200436156061426e-07, "loss": 1.4095, "step": 10660 }, { "epoch": 1.8392133183817823, "grad_norm": 0.55078125, "learning_rate": 3.1936087235447165e-07, "loss": 1.3978, "step": 10661 }, { "epoch": 1.8393858362805142, "grad_norm": 0.578125, "learning_rate": 3.186788463050361e-07, "loss": 1.4577, "step": 10662 }, { "epoch": 1.8395583541792462, "grad_norm": 0.60546875, "learning_rate": 3.1799753750836215e-07, "loss": 1.4572, "step": 10663 }, { "epoch": 1.8397308720779781, "grad_norm": 0.60546875, "learning_rate": 3.1731694601492834e-07, "loss": 1.464, "step": 10664 }, { "epoch": 1.83990338997671, "grad_norm": 0.609375, "learning_rate": 3.1663707187515325e-07, "loss": 1.4048, "step": 10665 }, { "epoch": 1.840075907875442, "grad_norm": 0.55078125, "learning_rate": 3.1595791513941097e-07, "loss": 1.5117, "step": 10666 }, { "epoch": 1.840248425774174, "grad_norm": 0.671875, "learning_rate": 3.1527947585801246e-07, "loss": 1.394, "step": 10667 }, { "epoch": 1.840420943672906, "grad_norm": 0.5703125, "learning_rate": 3.146017540812241e-07, "loss": 1.4003, "step": 10668 }, { "epoch": 1.840593461571638, "grad_norm": 0.640625, "learning_rate": 3.139247498592557e-07, "loss": 1.4011, "step": 10669 }, { "epoch": 1.84076597947037, "grad_norm": 0.60546875, "learning_rate": 3.1324846324226165e-07, "loss": 1.3679, "step": 10670 }, { "epoch": 1.840938497369102, "grad_norm": 0.59375, "learning_rate": 3.1257289428034854e-07, "loss": 1.437, "step": 10671 }, { "epoch": 1.841111015267834, "grad_norm": 0.61328125, "learning_rate": 3.1189804302356517e-07, "loss": 1.4845, "step": 10672 }, { "epoch": 1.841283533166566, "grad_norm": 0.58203125, "learning_rate": 3.112239095219072e-07, "loss": 1.4396, "step": 10673 }, { "epoch": 1.841456051065298, "grad_norm": 0.6484375, "learning_rate": 3.105504938253223e-07, "loss": 1.4616, "step": 10674 }, { "epoch": 1.8416285689640302, "grad_norm": 0.56640625, "learning_rate": 3.098777959836974e-07, "loss": 1.4047, "step": 10675 }, { "epoch": 1.8418010868627621, "grad_norm": 0.5625, "learning_rate": 3.092058160468736e-07, "loss": 1.3966, "step": 10676 }, { "epoch": 1.841973604761494, "grad_norm": 0.546875, "learning_rate": 3.085345540646345e-07, "loss": 1.4355, "step": 10677 }, { "epoch": 1.842146122660226, "grad_norm": 0.62109375, "learning_rate": 3.0786401008670806e-07, "loss": 1.4064, "step": 10678 }, { "epoch": 1.842318640558958, "grad_norm": 0.578125, "learning_rate": 3.071941841627779e-07, "loss": 1.3451, "step": 10679 }, { "epoch": 1.84249115845769, "grad_norm": 0.5390625, "learning_rate": 3.065250763424643e-07, "loss": 1.4058, "step": 10680 }, { "epoch": 1.842663676356422, "grad_norm": 0.546875, "learning_rate": 3.0585668667534097e-07, "loss": 1.3844, "step": 10681 }, { "epoch": 1.8428361942551539, "grad_norm": 0.57421875, "learning_rate": 3.0518901521092605e-07, "loss": 1.4213, "step": 10682 }, { "epoch": 1.8430087121538858, "grad_norm": 0.59375, "learning_rate": 3.045220619986844e-07, "loss": 1.5024, "step": 10683 }, { "epoch": 1.8431812300526178, "grad_norm": 0.6171875, "learning_rate": 3.038558270880287e-07, "loss": 1.3808, "step": 10684 }, { "epoch": 1.84335374795135, "grad_norm": 0.57421875, "learning_rate": 3.031903105283196e-07, "loss": 1.3525, "step": 10685 }, { "epoch": 1.843526265850082, "grad_norm": 0.5703125, "learning_rate": 3.025255123688575e-07, "loss": 1.4255, "step": 10686 }, { "epoch": 1.843698783748814, "grad_norm": 0.5703125, "learning_rate": 3.018614326589009e-07, "loss": 1.424, "step": 10687 }, { "epoch": 1.843871301647546, "grad_norm": 0.5859375, "learning_rate": 3.011980714476448e-07, "loss": 1.4183, "step": 10688 }, { "epoch": 1.844043819546278, "grad_norm": 0.6640625, "learning_rate": 3.0053542878423657e-07, "loss": 1.4809, "step": 10689 }, { "epoch": 1.84421633744501, "grad_norm": 0.58203125, "learning_rate": 2.998735047177692e-07, "loss": 1.4014, "step": 10690 }, { "epoch": 1.844388855343742, "grad_norm": 0.60546875, "learning_rate": 2.992122992972812e-07, "loss": 1.3376, "step": 10691 }, { "epoch": 1.844561373242474, "grad_norm": 0.60546875, "learning_rate": 2.9855181257176015e-07, "loss": 1.4756, "step": 10692 }, { "epoch": 1.844733891141206, "grad_norm": 0.5859375, "learning_rate": 2.978920445901379e-07, "loss": 1.3947, "step": 10693 }, { "epoch": 1.8449064090399379, "grad_norm": 0.5390625, "learning_rate": 2.9723299540129423e-07, "loss": 1.4171, "step": 10694 }, { "epoch": 1.8450789269386698, "grad_norm": 0.59765625, "learning_rate": 2.9657466505405573e-07, "loss": 1.4056, "step": 10695 }, { "epoch": 1.8452514448374018, "grad_norm": 0.58203125, "learning_rate": 2.959170535971978e-07, "loss": 1.4627, "step": 10696 }, { "epoch": 1.8454239627361337, "grad_norm": 0.6328125, "learning_rate": 2.952601610794359e-07, "loss": 1.4116, "step": 10697 }, { "epoch": 1.845596480634866, "grad_norm": 0.5625, "learning_rate": 2.9460398754944106e-07, "loss": 1.4963, "step": 10698 }, { "epoch": 1.8457689985335979, "grad_norm": 0.5546875, "learning_rate": 2.9394853305582337e-07, "loss": 1.4373, "step": 10699 }, { "epoch": 1.8459415164323298, "grad_norm": 0.578125, "learning_rate": 2.9329379764714615e-07, "loss": 1.3966, "step": 10700 }, { "epoch": 1.8459415164323298, "eval_loss": 1.4070720672607422, "eval_runtime": 12.0233, "eval_samples_per_second": 85.168, "eval_steps_per_second": 21.292, "step": 10700 }, { "epoch": 1.8461140343310618, "grad_norm": 0.63671875, "learning_rate": 2.9263978137191396e-07, "loss": 1.5311, "step": 10701 }, { "epoch": 1.846286552229794, "grad_norm": 0.61328125, "learning_rate": 2.919864842785802e-07, "loss": 1.4487, "step": 10702 }, { "epoch": 1.846459070128526, "grad_norm": 0.6484375, "learning_rate": 2.9133390641554736e-07, "loss": 1.4173, "step": 10703 }, { "epoch": 1.846631588027258, "grad_norm": 0.66015625, "learning_rate": 2.9068204783116227e-07, "loss": 1.4152, "step": 10704 }, { "epoch": 1.8468041059259899, "grad_norm": 0.55859375, "learning_rate": 2.900309085737152e-07, "loss": 1.4875, "step": 10705 }, { "epoch": 1.8469766238247218, "grad_norm": 0.59765625, "learning_rate": 2.8938048869145087e-07, "loss": 1.4071, "step": 10706 }, { "epoch": 1.8471491417234538, "grad_norm": 0.57421875, "learning_rate": 2.8873078823255297e-07, "loss": 1.4231, "step": 10707 }, { "epoch": 1.8473216596221858, "grad_norm": 0.57421875, "learning_rate": 2.8808180724515856e-07, "loss": 1.3817, "step": 10708 }, { "epoch": 1.8474941775209177, "grad_norm": 0.55859375, "learning_rate": 2.8743354577734805e-07, "loss": 1.3916, "step": 10709 }, { "epoch": 1.8476666954196497, "grad_norm": 0.671875, "learning_rate": 2.8678600387714417e-07, "loss": 1.3462, "step": 10710 }, { "epoch": 1.8478392133183816, "grad_norm": 0.60546875, "learning_rate": 2.8613918159252627e-07, "loss": 1.5002, "step": 10711 }, { "epoch": 1.8480117312171138, "grad_norm": 0.5859375, "learning_rate": 2.8549307897141274e-07, "loss": 1.3669, "step": 10712 }, { "epoch": 1.8481842491158458, "grad_norm": 0.5625, "learning_rate": 2.8484769606167085e-07, "loss": 1.4325, "step": 10713 }, { "epoch": 1.8483567670145777, "grad_norm": 0.71484375, "learning_rate": 2.842030329111134e-07, "loss": 1.4155, "step": 10714 }, { "epoch": 1.8485292849133097, "grad_norm": 0.5703125, "learning_rate": 2.8355908956750335e-07, "loss": 1.4419, "step": 10715 }, { "epoch": 1.8487018028120419, "grad_norm": 0.609375, "learning_rate": 2.8291586607854693e-07, "loss": 1.3997, "step": 10716 }, { "epoch": 1.8488743207107738, "grad_norm": 0.55859375, "learning_rate": 2.822733624918994e-07, "loss": 1.3909, "step": 10717 }, { "epoch": 1.8490468386095058, "grad_norm": 0.609375, "learning_rate": 2.8163157885515824e-07, "loss": 1.4904, "step": 10718 }, { "epoch": 1.8492193565082378, "grad_norm": 0.77734375, "learning_rate": 2.809905152158754e-07, "loss": 1.435, "step": 10719 }, { "epoch": 1.8493918744069697, "grad_norm": 0.6953125, "learning_rate": 2.8035017162154063e-07, "loss": 1.4916, "step": 10720 }, { "epoch": 1.8495643923057017, "grad_norm": 0.62890625, "learning_rate": 2.7971054811959717e-07, "loss": 1.481, "step": 10721 }, { "epoch": 1.8497369102044336, "grad_norm": 0.56640625, "learning_rate": 2.790716447574304e-07, "loss": 1.4353, "step": 10722 }, { "epoch": 1.8499094281031656, "grad_norm": 0.58984375, "learning_rate": 2.7843346158237586e-07, "loss": 1.3761, "step": 10723 }, { "epoch": 1.8500819460018976, "grad_norm": 0.56640625, "learning_rate": 2.777959986417134e-07, "loss": 1.4107, "step": 10724 }, { "epoch": 1.8502544639006295, "grad_norm": 0.6015625, "learning_rate": 2.771592559826708e-07, "loss": 1.4496, "step": 10725 }, { "epoch": 1.8504269817993617, "grad_norm": 0.58984375, "learning_rate": 2.765232336524215e-07, "loss": 1.4275, "step": 10726 }, { "epoch": 1.8505994996980937, "grad_norm": 0.5859375, "learning_rate": 2.758879316980867e-07, "loss": 1.4414, "step": 10727 }, { "epoch": 1.8507720175968256, "grad_norm": 0.5625, "learning_rate": 2.7525335016673315e-07, "loss": 1.4265, "step": 10728 }, { "epoch": 1.8509445354955578, "grad_norm": 0.6171875, "learning_rate": 2.746194891053733e-07, "loss": 1.4261, "step": 10729 }, { "epoch": 1.8511170533942898, "grad_norm": 0.5703125, "learning_rate": 2.739863485609695e-07, "loss": 1.4396, "step": 10730 }, { "epoch": 1.8512895712930217, "grad_norm": 0.59375, "learning_rate": 2.7335392858042764e-07, "loss": 1.4106, "step": 10731 }, { "epoch": 1.8514620891917537, "grad_norm": 0.59375, "learning_rate": 2.727222292106024e-07, "loss": 1.3709, "step": 10732 }, { "epoch": 1.8516346070904857, "grad_norm": 0.57421875, "learning_rate": 2.72091250498292e-07, "loss": 1.4935, "step": 10733 }, { "epoch": 1.8518071249892176, "grad_norm": 0.55078125, "learning_rate": 2.714609924902445e-07, "loss": 1.4064, "step": 10734 }, { "epoch": 1.8519796428879496, "grad_norm": 0.58984375, "learning_rate": 2.708314552331548e-07, "loss": 1.5138, "step": 10735 }, { "epoch": 1.8521521607866815, "grad_norm": 0.5546875, "learning_rate": 2.7020263877366005e-07, "loss": 1.3863, "step": 10736 }, { "epoch": 1.8523246786854135, "grad_norm": 0.6015625, "learning_rate": 2.6957454315834965e-07, "loss": 1.4406, "step": 10737 }, { "epoch": 1.8524971965841455, "grad_norm": 0.6015625, "learning_rate": 2.6894716843375523e-07, "loss": 1.5074, "step": 10738 }, { "epoch": 1.8526697144828776, "grad_norm": 0.5703125, "learning_rate": 2.6832051464635636e-07, "loss": 1.3569, "step": 10739 }, { "epoch": 1.8528422323816096, "grad_norm": 0.59375, "learning_rate": 2.6769458184258134e-07, "loss": 1.4224, "step": 10740 }, { "epoch": 1.8530147502803416, "grad_norm": 0.6015625, "learning_rate": 2.6706937006880095e-07, "loss": 1.4153, "step": 10741 }, { "epoch": 1.8531872681790735, "grad_norm": 0.578125, "learning_rate": 2.664448793713348e-07, "loss": 1.5044, "step": 10742 }, { "epoch": 1.8533597860778057, "grad_norm": 0.58203125, "learning_rate": 2.6582110979645246e-07, "loss": 1.5223, "step": 10743 }, { "epoch": 1.8535323039765377, "grad_norm": 0.59375, "learning_rate": 2.651980613903626e-07, "loss": 1.4345, "step": 10744 }, { "epoch": 1.8537048218752696, "grad_norm": 0.62890625, "learning_rate": 2.645757341992261e-07, "loss": 1.3272, "step": 10745 }, { "epoch": 1.8538773397740016, "grad_norm": 0.58203125, "learning_rate": 2.6395412826915046e-07, "loss": 1.3986, "step": 10746 }, { "epoch": 1.8540498576727336, "grad_norm": 0.578125, "learning_rate": 2.6333324364618553e-07, "loss": 1.3335, "step": 10747 }, { "epoch": 1.8542223755714655, "grad_norm": 0.5625, "learning_rate": 2.6271308037633113e-07, "loss": 1.3878, "step": 10748 }, { "epoch": 1.8543948934701975, "grad_norm": 0.56640625, "learning_rate": 2.6209363850553393e-07, "loss": 1.3827, "step": 10749 }, { "epoch": 1.8545674113689294, "grad_norm": 0.64453125, "learning_rate": 2.6147491807968385e-07, "loss": 1.4321, "step": 10750 }, { "epoch": 1.8547399292676614, "grad_norm": 0.578125, "learning_rate": 2.6085691914462306e-07, "loss": 1.4484, "step": 10751 }, { "epoch": 1.8549124471663934, "grad_norm": 0.61328125, "learning_rate": 2.6023964174613393e-07, "loss": 1.3907, "step": 10752 }, { "epoch": 1.8550849650651255, "grad_norm": 0.609375, "learning_rate": 2.596230859299487e-07, "loss": 1.3456, "step": 10753 }, { "epoch": 1.8552574829638575, "grad_norm": 0.69921875, "learning_rate": 2.5900725174174524e-07, "loss": 1.347, "step": 10754 }, { "epoch": 1.8554300008625895, "grad_norm": 0.58203125, "learning_rate": 2.5839213922714936e-07, "loss": 1.4167, "step": 10755 }, { "epoch": 1.8556025187613217, "grad_norm": 0.5546875, "learning_rate": 2.5777774843173233e-07, "loss": 1.3874, "step": 10756 }, { "epoch": 1.8557750366600536, "grad_norm": 0.55078125, "learning_rate": 2.5716407940101217e-07, "loss": 1.4425, "step": 10757 }, { "epoch": 1.8559475545587856, "grad_norm": 0.69140625, "learning_rate": 2.565511321804537e-07, "loss": 1.2792, "step": 10758 }, { "epoch": 1.8561200724575175, "grad_norm": 0.5703125, "learning_rate": 2.559389068154661e-07, "loss": 1.4789, "step": 10759 }, { "epoch": 1.8562925903562495, "grad_norm": 0.61328125, "learning_rate": 2.553274033514097e-07, "loss": 1.5425, "step": 10760 }, { "epoch": 1.8564651082549815, "grad_norm": 0.62109375, "learning_rate": 2.5471662183358394e-07, "loss": 1.4059, "step": 10761 }, { "epoch": 1.8566376261537134, "grad_norm": 0.56640625, "learning_rate": 2.5410656230724475e-07, "loss": 1.3846, "step": 10762 }, { "epoch": 1.8568101440524454, "grad_norm": 0.5859375, "learning_rate": 2.5349722481758487e-07, "loss": 1.427, "step": 10763 }, { "epoch": 1.8569826619511773, "grad_norm": 0.6015625, "learning_rate": 2.5288860940975046e-07, "loss": 1.476, "step": 10764 }, { "epoch": 1.8571551798499093, "grad_norm": 0.53515625, "learning_rate": 2.52280716128831e-07, "loss": 1.2507, "step": 10765 }, { "epoch": 1.8573276977486413, "grad_norm": 0.55078125, "learning_rate": 2.516735450198615e-07, "loss": 1.4192, "step": 10766 }, { "epoch": 1.8575002156473734, "grad_norm": 0.59765625, "learning_rate": 2.510670961278272e-07, "loss": 1.477, "step": 10767 }, { "epoch": 1.8576727335461054, "grad_norm": 0.7265625, "learning_rate": 2.5046136949765544e-07, "loss": 1.5125, "step": 10768 }, { "epoch": 1.8578452514448374, "grad_norm": 0.5703125, "learning_rate": 2.4985636517422365e-07, "loss": 1.3664, "step": 10769 }, { "epoch": 1.8580177693435695, "grad_norm": 0.55859375, "learning_rate": 2.4925208320235593e-07, "loss": 1.428, "step": 10770 }, { "epoch": 1.8581902872423015, "grad_norm": 0.5859375, "learning_rate": 2.486485236268166e-07, "loss": 1.4213, "step": 10771 }, { "epoch": 1.8583628051410335, "grad_norm": 0.609375, "learning_rate": 2.4804568649232643e-07, "loss": 1.4684, "step": 10772 }, { "epoch": 1.8585353230397654, "grad_norm": 0.5625, "learning_rate": 2.4744357184354305e-07, "loss": 1.3504, "step": 10773 }, { "epoch": 1.8587078409384974, "grad_norm": 0.58984375, "learning_rate": 2.468421797250764e-07, "loss": 1.4598, "step": 10774 }, { "epoch": 1.8588803588372294, "grad_norm": 0.69921875, "learning_rate": 2.46241510181483e-07, "loss": 1.4404, "step": 10775 }, { "epoch": 1.8590528767359613, "grad_norm": 0.58984375, "learning_rate": 2.456415632572617e-07, "loss": 1.4462, "step": 10776 }, { "epoch": 1.8592253946346933, "grad_norm": 0.62109375, "learning_rate": 2.450423389968626e-07, "loss": 1.4246, "step": 10777 }, { "epoch": 1.8593979125334252, "grad_norm": 0.57421875, "learning_rate": 2.444438374446778e-07, "loss": 1.4082, "step": 10778 }, { "epoch": 1.8595704304321572, "grad_norm": 0.55078125, "learning_rate": 2.4384605864504973e-07, "loss": 1.4425, "step": 10779 }, { "epoch": 1.8597429483308894, "grad_norm": 0.6015625, "learning_rate": 2.4324900264226405e-07, "loss": 1.4271, "step": 10780 }, { "epoch": 1.8599154662296213, "grad_norm": 0.56640625, "learning_rate": 2.426526694805564e-07, "loss": 1.5036, "step": 10781 }, { "epoch": 1.8600879841283533, "grad_norm": 0.5859375, "learning_rate": 2.420570592041038e-07, "loss": 1.3449, "step": 10782 }, { "epoch": 1.8602605020270853, "grad_norm": 0.6796875, "learning_rate": 2.4146217185703755e-07, "loss": 1.4024, "step": 10783 }, { "epoch": 1.8604330199258174, "grad_norm": 0.72265625, "learning_rate": 2.4086800748342577e-07, "loss": 1.5128, "step": 10784 }, { "epoch": 1.8606055378245494, "grad_norm": 0.59765625, "learning_rate": 2.4027456612728985e-07, "loss": 1.537, "step": 10785 }, { "epoch": 1.8607780557232814, "grad_norm": 0.58203125, "learning_rate": 2.396818478325968e-07, "loss": 1.362, "step": 10786 }, { "epoch": 1.8609505736220133, "grad_norm": 0.62109375, "learning_rate": 2.3908985264325614e-07, "loss": 1.4572, "step": 10787 }, { "epoch": 1.8611230915207453, "grad_norm": 0.71484375, "learning_rate": 2.3849858060312924e-07, "loss": 1.4654, "step": 10788 }, { "epoch": 1.8612956094194772, "grad_norm": 0.55078125, "learning_rate": 2.3790803175602007e-07, "loss": 1.4219, "step": 10789 }, { "epoch": 1.8614681273182092, "grad_norm": 0.58203125, "learning_rate": 2.3731820614568023e-07, "loss": 1.396, "step": 10790 }, { "epoch": 1.8616406452169412, "grad_norm": 0.640625, "learning_rate": 2.3672910381580817e-07, "loss": 1.4431, "step": 10791 }, { "epoch": 1.8618131631156731, "grad_norm": 0.6015625, "learning_rate": 2.3614072481004778e-07, "loss": 1.4451, "step": 10792 }, { "epoch": 1.861985681014405, "grad_norm": 0.58203125, "learning_rate": 2.3555306917198872e-07, "loss": 1.2986, "step": 10793 }, { "epoch": 1.8621581989131373, "grad_norm": 0.5859375, "learning_rate": 2.3496613694517056e-07, "loss": 1.4842, "step": 10794 }, { "epoch": 1.8623307168118692, "grad_norm": 0.54296875, "learning_rate": 2.343799281730741e-07, "loss": 1.3365, "step": 10795 }, { "epoch": 1.8625032347106012, "grad_norm": 0.55859375, "learning_rate": 2.3379444289913344e-07, "loss": 1.4792, "step": 10796 }, { "epoch": 1.8626757526093334, "grad_norm": 0.5546875, "learning_rate": 2.3320968116672172e-07, "loss": 1.3473, "step": 10797 }, { "epoch": 1.8628482705080653, "grad_norm": 0.5546875, "learning_rate": 2.32625643019162e-07, "loss": 1.4253, "step": 10798 }, { "epoch": 1.8630207884067973, "grad_norm": 0.61328125, "learning_rate": 2.320423284997242e-07, "loss": 1.4237, "step": 10799 }, { "epoch": 1.8631933063055293, "grad_norm": 0.54296875, "learning_rate": 2.3145973765162367e-07, "loss": 1.4938, "step": 10800 }, { "epoch": 1.8631933063055293, "eval_loss": 1.4070444107055664, "eval_runtime": 10.8998, "eval_samples_per_second": 93.946, "eval_steps_per_second": 23.487, "step": 10800 }, { "epoch": 1.8633658242042612, "grad_norm": 0.56640625, "learning_rate": 2.3087787051802146e-07, "loss": 1.4087, "step": 10801 }, { "epoch": 1.8635383421029932, "grad_norm": 0.58203125, "learning_rate": 2.3029672714202866e-07, "loss": 1.3573, "step": 10802 }, { "epoch": 1.8637108600017251, "grad_norm": 0.56640625, "learning_rate": 2.2971630756669637e-07, "loss": 1.3931, "step": 10803 }, { "epoch": 1.863883377900457, "grad_norm": 0.55078125, "learning_rate": 2.291366118350302e-07, "loss": 1.4099, "step": 10804 }, { "epoch": 1.864055895799189, "grad_norm": 0.5703125, "learning_rate": 2.285576399899736e-07, "loss": 1.4181, "step": 10805 }, { "epoch": 1.864228413697921, "grad_norm": 0.55078125, "learning_rate": 2.2797939207442e-07, "loss": 1.3713, "step": 10806 }, { "epoch": 1.864400931596653, "grad_norm": 0.54296875, "learning_rate": 2.2740186813121402e-07, "loss": 1.46, "step": 10807 }, { "epoch": 1.8645734494953852, "grad_norm": 0.5859375, "learning_rate": 2.268250682031392e-07, "loss": 1.3744, "step": 10808 }, { "epoch": 1.8647459673941171, "grad_norm": 0.5859375, "learning_rate": 2.2624899233292807e-07, "loss": 1.3474, "step": 10809 }, { "epoch": 1.864918485292849, "grad_norm": 0.5859375, "learning_rate": 2.256736405632609e-07, "loss": 1.5073, "step": 10810 }, { "epoch": 1.8650910031915813, "grad_norm": 0.59375, "learning_rate": 2.250990129367636e-07, "loss": 1.427, "step": 10811 }, { "epoch": 1.8652635210903132, "grad_norm": 0.56640625, "learning_rate": 2.245251094960077e-07, "loss": 1.3456, "step": 10812 }, { "epoch": 1.8654360389890452, "grad_norm": 0.91015625, "learning_rate": 2.2395193028351247e-07, "loss": 1.5013, "step": 10813 }, { "epoch": 1.8656085568877772, "grad_norm": 0.59765625, "learning_rate": 2.2337947534174064e-07, "loss": 1.5743, "step": 10814 }, { "epoch": 1.8657810747865091, "grad_norm": 0.5625, "learning_rate": 2.2280774471310496e-07, "loss": 1.3436, "step": 10815 }, { "epoch": 1.865953592685241, "grad_norm": 0.55859375, "learning_rate": 2.2223673843996263e-07, "loss": 1.4284, "step": 10816 }, { "epoch": 1.866126110583973, "grad_norm": 0.59765625, "learning_rate": 2.216664565646165e-07, "loss": 1.449, "step": 10817 }, { "epoch": 1.866298628482705, "grad_norm": 0.578125, "learning_rate": 2.210968991293172e-07, "loss": 1.39, "step": 10818 }, { "epoch": 1.866471146381437, "grad_norm": 0.5546875, "learning_rate": 2.20528066176261e-07, "loss": 1.4709, "step": 10819 }, { "epoch": 1.866643664280169, "grad_norm": 0.5703125, "learning_rate": 2.1995995774759082e-07, "loss": 1.3929, "step": 10820 }, { "epoch": 1.866816182178901, "grad_norm": 0.6015625, "learning_rate": 2.1939257388539525e-07, "loss": 1.4292, "step": 10821 }, { "epoch": 1.866988700077633, "grad_norm": 0.5703125, "learning_rate": 2.1882591463170956e-07, "loss": 1.3822, "step": 10822 }, { "epoch": 1.867161217976365, "grad_norm": 0.59375, "learning_rate": 2.1825998002851566e-07, "loss": 1.4464, "step": 10823 }, { "epoch": 1.867333735875097, "grad_norm": 0.5859375, "learning_rate": 2.1769477011774232e-07, "loss": 1.3908, "step": 10824 }, { "epoch": 1.8675062537738292, "grad_norm": 0.6015625, "learning_rate": 2.1713028494126265e-07, "loss": 1.3778, "step": 10825 }, { "epoch": 1.8676787716725611, "grad_norm": 0.5703125, "learning_rate": 2.1656652454089878e-07, "loss": 1.4023, "step": 10826 }, { "epoch": 1.867851289571293, "grad_norm": 0.640625, "learning_rate": 2.1600348895841394e-07, "loss": 1.5256, "step": 10827 }, { "epoch": 1.868023807470025, "grad_norm": 0.5390625, "learning_rate": 2.1544117823552592e-07, "loss": 1.4666, "step": 10828 }, { "epoch": 1.868196325368757, "grad_norm": 0.5703125, "learning_rate": 2.1487959241389244e-07, "loss": 1.4394, "step": 10829 }, { "epoch": 1.868368843267489, "grad_norm": 0.59375, "learning_rate": 2.1431873153511807e-07, "loss": 1.4317, "step": 10830 }, { "epoch": 1.868541361166221, "grad_norm": 0.59765625, "learning_rate": 2.1375859564075508e-07, "loss": 1.5176, "step": 10831 }, { "epoch": 1.868713879064953, "grad_norm": 0.6796875, "learning_rate": 2.131991847723036e-07, "loss": 1.3555, "step": 10832 }, { "epoch": 1.8688863969636849, "grad_norm": 0.59765625, "learning_rate": 2.126404989712072e-07, "loss": 1.4658, "step": 10833 }, { "epoch": 1.8690589148624168, "grad_norm": 0.55078125, "learning_rate": 2.1208253827885828e-07, "loss": 1.4349, "step": 10834 }, { "epoch": 1.869231432761149, "grad_norm": 0.5859375, "learning_rate": 2.1152530273658932e-07, "loss": 1.4306, "step": 10835 }, { "epoch": 1.869403950659881, "grad_norm": 0.59375, "learning_rate": 2.1096879238569068e-07, "loss": 1.4883, "step": 10836 }, { "epoch": 1.869576468558613, "grad_norm": 0.5625, "learning_rate": 2.10413007267386e-07, "loss": 1.3261, "step": 10837 }, { "epoch": 1.869748986457345, "grad_norm": 0.6484375, "learning_rate": 2.098579474228546e-07, "loss": 1.4961, "step": 10838 }, { "epoch": 1.869921504356077, "grad_norm": 0.53515625, "learning_rate": 2.093036128932191e-07, "loss": 1.4159, "step": 10839 }, { "epoch": 1.870094022254809, "grad_norm": 0.6015625, "learning_rate": 2.0875000371954557e-07, "loss": 1.4955, "step": 10840 }, { "epoch": 1.870266540153541, "grad_norm": 0.62109375, "learning_rate": 2.081971199428512e-07, "loss": 1.4283, "step": 10841 }, { "epoch": 1.870439058052273, "grad_norm": 0.5625, "learning_rate": 2.0764496160409653e-07, "loss": 1.3329, "step": 10842 }, { "epoch": 1.870611575951005, "grad_norm": 0.640625, "learning_rate": 2.0709352874418777e-07, "loss": 1.5467, "step": 10843 }, { "epoch": 1.8707840938497369, "grad_norm": 0.60546875, "learning_rate": 2.0654282140397996e-07, "loss": 1.4522, "step": 10844 }, { "epoch": 1.8709566117484688, "grad_norm": 0.6015625, "learning_rate": 2.0599283962427274e-07, "loss": 1.4411, "step": 10845 }, { "epoch": 1.8711291296472008, "grad_norm": 0.578125, "learning_rate": 2.0544358344580905e-07, "loss": 1.363, "step": 10846 }, { "epoch": 1.8713016475459328, "grad_norm": 0.62109375, "learning_rate": 2.0489505290928747e-07, "loss": 1.4794, "step": 10847 }, { "epoch": 1.871474165444665, "grad_norm": 0.63671875, "learning_rate": 2.04347248055341e-07, "loss": 1.4492, "step": 10848 }, { "epoch": 1.871646683343397, "grad_norm": 1.1796875, "learning_rate": 2.0380016892455611e-07, "loss": 1.4276, "step": 10849 }, { "epoch": 1.8718192012421289, "grad_norm": 0.59765625, "learning_rate": 2.0325381555746483e-07, "loss": 1.5, "step": 10850 }, { "epoch": 1.8719917191408608, "grad_norm": 0.56640625, "learning_rate": 2.0270818799454472e-07, "loss": 1.4219, "step": 10851 }, { "epoch": 1.872164237039593, "grad_norm": 0.6171875, "learning_rate": 2.0216328627621685e-07, "loss": 1.5137, "step": 10852 }, { "epoch": 1.872336754938325, "grad_norm": 0.62109375, "learning_rate": 2.016191104428533e-07, "loss": 1.3509, "step": 10853 }, { "epoch": 1.872509272837057, "grad_norm": 0.56640625, "learning_rate": 2.0107566053476856e-07, "loss": 1.4113, "step": 10854 }, { "epoch": 1.8726817907357889, "grad_norm": 0.5703125, "learning_rate": 2.0053293659222595e-07, "loss": 1.4339, "step": 10855 }, { "epoch": 1.8728543086345208, "grad_norm": 0.57421875, "learning_rate": 1.999909386554333e-07, "loss": 1.5374, "step": 10856 }, { "epoch": 1.8730268265332528, "grad_norm": 0.62109375, "learning_rate": 1.9944966676454402e-07, "loss": 1.4512, "step": 10857 }, { "epoch": 1.8731993444319848, "grad_norm": 0.62109375, "learning_rate": 1.9890912095966274e-07, "loss": 1.3596, "step": 10858 }, { "epoch": 1.8733718623307167, "grad_norm": 0.89453125, "learning_rate": 1.9836930128083076e-07, "loss": 1.3449, "step": 10859 }, { "epoch": 1.8735443802294487, "grad_norm": 0.65625, "learning_rate": 1.9783020776804718e-07, "loss": 1.3641, "step": 10860 }, { "epoch": 1.8737168981281807, "grad_norm": 0.56640625, "learning_rate": 1.9729184046124673e-07, "loss": 1.4446, "step": 10861 }, { "epoch": 1.8738894160269128, "grad_norm": 0.625, "learning_rate": 1.9675419940031748e-07, "loss": 1.4485, "step": 10862 }, { "epoch": 1.8740619339256448, "grad_norm": 0.54296875, "learning_rate": 1.9621728462508981e-07, "loss": 1.4549, "step": 10863 }, { "epoch": 1.8742344518243768, "grad_norm": 0.56640625, "learning_rate": 1.9568109617534304e-07, "loss": 1.4242, "step": 10864 }, { "epoch": 1.8744069697231087, "grad_norm": 0.56640625, "learning_rate": 1.951456340908009e-07, "loss": 1.4486, "step": 10865 }, { "epoch": 1.874579487621841, "grad_norm": 0.60546875, "learning_rate": 1.9461089841113502e-07, "loss": 1.4479, "step": 10866 }, { "epoch": 1.8747520055205729, "grad_norm": 0.5546875, "learning_rate": 1.9407688917595923e-07, "loss": 1.4525, "step": 10867 }, { "epoch": 1.8749245234193048, "grad_norm": 0.5625, "learning_rate": 1.9354360642483862e-07, "loss": 1.3623, "step": 10868 }, { "epoch": 1.8750970413180368, "grad_norm": 0.60546875, "learning_rate": 1.9301105019728038e-07, "loss": 1.4158, "step": 10869 }, { "epoch": 1.8752695592167687, "grad_norm": 0.546875, "learning_rate": 1.9247922053273972e-07, "loss": 1.4427, "step": 10870 }, { "epoch": 1.8754420771155007, "grad_norm": 0.5859375, "learning_rate": 1.9194811747062058e-07, "loss": 1.3908, "step": 10871 }, { "epoch": 1.8756145950142327, "grad_norm": 0.5546875, "learning_rate": 1.914177410502671e-07, "loss": 1.4076, "step": 10872 }, { "epoch": 1.8757871129129646, "grad_norm": 1.0859375, "learning_rate": 1.9088809131097562e-07, "loss": 1.4296, "step": 10873 }, { "epoch": 1.8759596308116966, "grad_norm": 0.6484375, "learning_rate": 1.9035916829198255e-07, "loss": 1.4531, "step": 10874 }, { "epoch": 1.8761321487104285, "grad_norm": 0.578125, "learning_rate": 1.8983097203247647e-07, "loss": 1.4507, "step": 10875 }, { "epoch": 1.8763046666091607, "grad_norm": 0.54296875, "learning_rate": 1.8930350257158946e-07, "loss": 1.4777, "step": 10876 }, { "epoch": 1.8764771845078927, "grad_norm": 0.58984375, "learning_rate": 1.8877675994839918e-07, "loss": 1.513, "step": 10877 }, { "epoch": 1.8766497024066247, "grad_norm": 0.5859375, "learning_rate": 1.882507442019288e-07, "loss": 1.4396, "step": 10878 }, { "epoch": 1.8768222203053568, "grad_norm": 0.60546875, "learning_rate": 1.877254553711505e-07, "loss": 1.4054, "step": 10879 }, { "epoch": 1.8769947382040888, "grad_norm": 0.578125, "learning_rate": 1.8720089349498093e-07, "loss": 1.4699, "step": 10880 }, { "epoch": 1.8771672561028208, "grad_norm": 0.56640625, "learning_rate": 1.8667705861228126e-07, "loss": 1.3528, "step": 10881 }, { "epoch": 1.8773397740015527, "grad_norm": 0.6015625, "learning_rate": 1.861539507618626e-07, "loss": 1.412, "step": 10882 }, { "epoch": 1.8775122919002847, "grad_norm": 0.58203125, "learning_rate": 1.8563156998247844e-07, "loss": 1.3684, "step": 10883 }, { "epoch": 1.8776848097990166, "grad_norm": 0.578125, "learning_rate": 1.8510991631283003e-07, "loss": 1.3471, "step": 10884 }, { "epoch": 1.8778573276977486, "grad_norm": 0.578125, "learning_rate": 1.8458898979156536e-07, "loss": 1.5138, "step": 10885 }, { "epoch": 1.8780298455964806, "grad_norm": 0.56640625, "learning_rate": 1.840687904572791e-07, "loss": 1.422, "step": 10886 }, { "epoch": 1.8782023634952125, "grad_norm": 0.62890625, "learning_rate": 1.835493183485082e-07, "loss": 1.4165, "step": 10887 }, { "epoch": 1.8783748813939445, "grad_norm": 0.55859375, "learning_rate": 1.8303057350374077e-07, "loss": 1.4187, "step": 10888 }, { "epoch": 1.8785473992926767, "grad_norm": 0.609375, "learning_rate": 1.8251255596140716e-07, "loss": 1.3732, "step": 10889 }, { "epoch": 1.8787199171914086, "grad_norm": 0.578125, "learning_rate": 1.819952657598867e-07, "loss": 1.3429, "step": 10890 }, { "epoch": 1.8788924350901406, "grad_norm": 0.5703125, "learning_rate": 1.8147870293750092e-07, "loss": 1.3351, "step": 10891 }, { "epoch": 1.8790649529888725, "grad_norm": 0.5546875, "learning_rate": 1.8096286753252368e-07, "loss": 1.3576, "step": 10892 }, { "epoch": 1.8792374708876047, "grad_norm": 0.640625, "learning_rate": 1.8044775958316884e-07, "loss": 1.4096, "step": 10893 }, { "epoch": 1.8794099887863367, "grad_norm": 0.5703125, "learning_rate": 1.7993337912759924e-07, "loss": 1.4544, "step": 10894 }, { "epoch": 1.8795825066850687, "grad_norm": 0.5703125, "learning_rate": 1.7941972620392322e-07, "loss": 1.4095, "step": 10895 }, { "epoch": 1.8797550245838006, "grad_norm": 0.5859375, "learning_rate": 1.7890680085019597e-07, "loss": 1.4747, "step": 10896 }, { "epoch": 1.8799275424825326, "grad_norm": 0.6171875, "learning_rate": 1.7839460310441814e-07, "loss": 1.3743, "step": 10897 }, { "epoch": 1.8801000603812645, "grad_norm": 0.5625, "learning_rate": 1.778831330045372e-07, "loss": 1.3766, "step": 10898 }, { "epoch": 1.8802725782799965, "grad_norm": 0.56640625, "learning_rate": 1.773723905884428e-07, "loss": 1.4824, "step": 10899 }, { "epoch": 1.8804450961787285, "grad_norm": 0.67578125, "learning_rate": 1.7686237589397914e-07, "loss": 1.5453, "step": 10900 }, { "epoch": 1.8804450961787285, "eval_loss": 1.4070632457733154, "eval_runtime": 10.8553, "eval_samples_per_second": 94.332, "eval_steps_per_second": 23.583, "step": 10900 }, { "epoch": 1.8806176140774604, "grad_norm": 0.56640625, "learning_rate": 1.76353088958926e-07, "loss": 1.4266, "step": 10901 }, { "epoch": 1.8807901319761924, "grad_norm": 0.7109375, "learning_rate": 1.7584452982101764e-07, "loss": 1.5084, "step": 10902 }, { "epoch": 1.8809626498749246, "grad_norm": 0.5546875, "learning_rate": 1.7533669851793166e-07, "loss": 1.3585, "step": 10903 }, { "epoch": 1.8811351677736565, "grad_norm": 0.57421875, "learning_rate": 1.7482959508729024e-07, "loss": 1.4781, "step": 10904 }, { "epoch": 1.8813076856723885, "grad_norm": 0.65234375, "learning_rate": 1.743232195666622e-07, "loss": 1.4678, "step": 10905 }, { "epoch": 1.8814802035711207, "grad_norm": 0.5703125, "learning_rate": 1.738175719935642e-07, "loss": 1.4632, "step": 10906 }, { "epoch": 1.8816527214698526, "grad_norm": 0.65625, "learning_rate": 1.7331265240545624e-07, "loss": 1.3286, "step": 10907 }, { "epoch": 1.8818252393685846, "grad_norm": 0.6015625, "learning_rate": 1.7280846083974735e-07, "loss": 1.4574, "step": 10908 }, { "epoch": 1.8819977572673166, "grad_norm": 0.55078125, "learning_rate": 1.7230499733379202e-07, "loss": 1.4725, "step": 10909 }, { "epoch": 1.8821702751660485, "grad_norm": 0.58984375, "learning_rate": 1.7180226192488715e-07, "loss": 1.4448, "step": 10910 }, { "epoch": 1.8823427930647805, "grad_norm": 0.609375, "learning_rate": 1.7130025465028178e-07, "loss": 1.4886, "step": 10911 }, { "epoch": 1.8825153109635124, "grad_norm": 0.55859375, "learning_rate": 1.7079897554716508e-07, "loss": 1.4193, "step": 10912 }, { "epoch": 1.8826878288622444, "grad_norm": 0.5703125, "learning_rate": 1.7029842465267622e-07, "loss": 1.4011, "step": 10913 }, { "epoch": 1.8828603467609764, "grad_norm": 0.5703125, "learning_rate": 1.6979860200389885e-07, "loss": 1.4604, "step": 10914 }, { "epoch": 1.8830328646597083, "grad_norm": 0.60546875, "learning_rate": 1.692995076378634e-07, "loss": 1.4078, "step": 10915 }, { "epoch": 1.8832053825584403, "grad_norm": 0.59765625, "learning_rate": 1.6880114159154471e-07, "loss": 1.4332, "step": 10916 }, { "epoch": 1.8833779004571725, "grad_norm": 0.546875, "learning_rate": 1.6830350390186546e-07, "loss": 1.3531, "step": 10917 }, { "epoch": 1.8835504183559044, "grad_norm": 0.546875, "learning_rate": 1.6780659460569505e-07, "loss": 1.3574, "step": 10918 }, { "epoch": 1.8837229362546364, "grad_norm": 0.55859375, "learning_rate": 1.6731041373984513e-07, "loss": 1.308, "step": 10919 }, { "epoch": 1.8838954541533686, "grad_norm": 0.609375, "learning_rate": 1.6681496134107856e-07, "loss": 1.4647, "step": 10920 }, { "epoch": 1.8840679720521005, "grad_norm": 0.58984375, "learning_rate": 1.6632023744609925e-07, "loss": 1.5234, "step": 10921 }, { "epoch": 1.8842404899508325, "grad_norm": 0.65625, "learning_rate": 1.658262420915613e-07, "loss": 1.4465, "step": 10922 }, { "epoch": 1.8844130078495644, "grad_norm": 0.63671875, "learning_rate": 1.653329753140609e-07, "loss": 1.3919, "step": 10923 }, { "epoch": 1.8845855257482964, "grad_norm": 0.8046875, "learning_rate": 1.648404371501444e-07, "loss": 1.3689, "step": 10924 }, { "epoch": 1.8847580436470284, "grad_norm": 0.68359375, "learning_rate": 1.6434862763630156e-07, "loss": 1.4015, "step": 10925 }, { "epoch": 1.8849305615457603, "grad_norm": 0.5703125, "learning_rate": 1.6385754680896758e-07, "loss": 1.452, "step": 10926 }, { "epoch": 1.8851030794444923, "grad_norm": 0.60546875, "learning_rate": 1.6336719470452566e-07, "loss": 1.4592, "step": 10927 }, { "epoch": 1.8852755973432243, "grad_norm": 0.578125, "learning_rate": 1.6287757135930448e-07, "loss": 1.5216, "step": 10928 }, { "epoch": 1.8854481152419562, "grad_norm": 0.5703125, "learning_rate": 1.6238867680957726e-07, "loss": 1.3362, "step": 10929 }, { "epoch": 1.8856206331406884, "grad_norm": 0.60546875, "learning_rate": 1.6190051109156613e-07, "loss": 1.4932, "step": 10930 }, { "epoch": 1.8857931510394204, "grad_norm": 0.578125, "learning_rate": 1.6141307424143549e-07, "loss": 1.3702, "step": 10931 }, { "epoch": 1.8859656689381523, "grad_norm": 0.6015625, "learning_rate": 1.609263662952998e-07, "loss": 1.383, "step": 10932 }, { "epoch": 1.8861381868368843, "grad_norm": 0.55859375, "learning_rate": 1.6044038728921575e-07, "loss": 1.3771, "step": 10933 }, { "epoch": 1.8863107047356165, "grad_norm": 0.61328125, "learning_rate": 1.5995513725918676e-07, "loss": 1.4453, "step": 10934 }, { "epoch": 1.8864832226343484, "grad_norm": 0.58984375, "learning_rate": 1.5947061624116634e-07, "loss": 1.4363, "step": 10935 }, { "epoch": 1.8866557405330804, "grad_norm": 0.59765625, "learning_rate": 1.5898682427104905e-07, "loss": 1.4706, "step": 10936 }, { "epoch": 1.8868282584318123, "grad_norm": 0.61328125, "learning_rate": 1.5850376138467626e-07, "loss": 1.2775, "step": 10937 }, { "epoch": 1.8870007763305443, "grad_norm": 0.5703125, "learning_rate": 1.5802142761783824e-07, "loss": 1.5225, "step": 10938 }, { "epoch": 1.8871732942292763, "grad_norm": 0.7421875, "learning_rate": 1.5753982300626859e-07, "loss": 1.3915, "step": 10939 }, { "epoch": 1.8873458121280082, "grad_norm": 0.55078125, "learning_rate": 1.5705894758564654e-07, "loss": 1.4447, "step": 10940 }, { "epoch": 1.8875183300267402, "grad_norm": 0.56640625, "learning_rate": 1.565788013916003e-07, "loss": 1.3441, "step": 10941 }, { "epoch": 1.8876908479254721, "grad_norm": 0.546875, "learning_rate": 1.560993844596992e-07, "loss": 1.3908, "step": 10942 }, { "epoch": 1.887863365824204, "grad_norm": 0.6015625, "learning_rate": 1.5562069682546587e-07, "loss": 1.4749, "step": 10943 }, { "epoch": 1.8880358837229363, "grad_norm": 0.578125, "learning_rate": 1.551427385243587e-07, "loss": 1.3988, "step": 10944 }, { "epoch": 1.8882084016216683, "grad_norm": 0.6015625, "learning_rate": 1.546655095917937e-07, "loss": 1.5236, "step": 10945 }, { "epoch": 1.8883809195204002, "grad_norm": 0.60546875, "learning_rate": 1.541890100631227e-07, "loss": 1.4393, "step": 10946 }, { "epoch": 1.8885534374191324, "grad_norm": 0.58203125, "learning_rate": 1.5371323997364962e-07, "loss": 1.4745, "step": 10947 }, { "epoch": 1.8887259553178644, "grad_norm": 0.57421875, "learning_rate": 1.5323819935862183e-07, "loss": 1.491, "step": 10948 }, { "epoch": 1.8888984732165963, "grad_norm": 0.72265625, "learning_rate": 1.5276388825323452e-07, "loss": 1.3892, "step": 10949 }, { "epoch": 1.8890709911153283, "grad_norm": 0.5546875, "learning_rate": 1.5229030669262622e-07, "loss": 1.3417, "step": 10950 }, { "epoch": 1.8892435090140602, "grad_norm": 0.5625, "learning_rate": 1.5181745471188336e-07, "loss": 1.4163, "step": 10951 }, { "epoch": 1.8894160269127922, "grad_norm": 0.578125, "learning_rate": 1.5134533234603786e-07, "loss": 1.4501, "step": 10952 }, { "epoch": 1.8895885448115242, "grad_norm": 0.56640625, "learning_rate": 1.5087393963006736e-07, "loss": 1.4131, "step": 10953 }, { "epoch": 1.8897610627102561, "grad_norm": 0.61328125, "learning_rate": 1.504032765988961e-07, "loss": 1.4359, "step": 10954 }, { "epoch": 1.889933580608988, "grad_norm": 0.5859375, "learning_rate": 1.4993334328739174e-07, "loss": 1.3766, "step": 10955 }, { "epoch": 1.89010609850772, "grad_norm": 0.71484375, "learning_rate": 1.494641397303731e-07, "loss": 1.4124, "step": 10956 }, { "epoch": 1.890278616406452, "grad_norm": 0.56640625, "learning_rate": 1.4899566596259907e-07, "loss": 1.4417, "step": 10957 }, { "epoch": 1.8904511343051842, "grad_norm": 0.62890625, "learning_rate": 1.4852792201877742e-07, "loss": 1.3079, "step": 10958 }, { "epoch": 1.8906236522039161, "grad_norm": 0.58203125, "learning_rate": 1.4806090793356266e-07, "loss": 1.5389, "step": 10959 }, { "epoch": 1.890796170102648, "grad_norm": 0.56640625, "learning_rate": 1.4759462374155376e-07, "loss": 1.4332, "step": 10960 }, { "epoch": 1.8909686880013803, "grad_norm": 0.6328125, "learning_rate": 1.4712906947729643e-07, "loss": 1.3728, "step": 10961 }, { "epoch": 1.8911412059001123, "grad_norm": 0.62109375, "learning_rate": 1.4666424517528088e-07, "loss": 1.3431, "step": 10962 }, { "epoch": 1.8913137237988442, "grad_norm": 0.546875, "learning_rate": 1.4620015086994398e-07, "loss": 1.3682, "step": 10963 }, { "epoch": 1.8914862416975762, "grad_norm": 0.5625, "learning_rate": 1.457367865956705e-07, "loss": 1.4846, "step": 10964 }, { "epoch": 1.8916587595963081, "grad_norm": 0.578125, "learning_rate": 1.4527415238678622e-07, "loss": 1.4709, "step": 10965 }, { "epoch": 1.89183127749504, "grad_norm": 0.58984375, "learning_rate": 1.4481224827756824e-07, "loss": 1.3932, "step": 10966 }, { "epoch": 1.892003795393772, "grad_norm": 0.8359375, "learning_rate": 1.4435107430223806e-07, "loss": 1.3974, "step": 10967 }, { "epoch": 1.892176313292504, "grad_norm": 0.5703125, "learning_rate": 1.4389063049496165e-07, "loss": 1.4036, "step": 10968 }, { "epoch": 1.892348831191236, "grad_norm": 0.6015625, "learning_rate": 1.4343091688984956e-07, "loss": 1.406, "step": 10969 }, { "epoch": 1.892521349089968, "grad_norm": 0.61328125, "learning_rate": 1.4297193352096228e-07, "loss": 1.3486, "step": 10970 }, { "epoch": 1.8926938669887001, "grad_norm": 0.68359375, "learning_rate": 1.4251368042230485e-07, "loss": 1.4483, "step": 10971 }, { "epoch": 1.892866384887432, "grad_norm": 0.55859375, "learning_rate": 1.4205615762782566e-07, "loss": 1.4674, "step": 10972 }, { "epoch": 1.893038902786164, "grad_norm": 0.56640625, "learning_rate": 1.415993651714209e-07, "loss": 1.3881, "step": 10973 }, { "epoch": 1.893211420684896, "grad_norm": 0.578125, "learning_rate": 1.4114330308693358e-07, "loss": 1.3689, "step": 10974 }, { "epoch": 1.8933839385836282, "grad_norm": 0.57421875, "learning_rate": 1.4068797140815217e-07, "loss": 1.4367, "step": 10975 }, { "epoch": 1.8935564564823602, "grad_norm": 0.546875, "learning_rate": 1.4023337016880856e-07, "loss": 1.3465, "step": 10976 }, { "epoch": 1.8937289743810921, "grad_norm": 0.56640625, "learning_rate": 1.3977949940258474e-07, "loss": 1.4499, "step": 10977 }, { "epoch": 1.893901492279824, "grad_norm": 0.58984375, "learning_rate": 1.3932635914310488e-07, "loss": 1.4315, "step": 10978 }, { "epoch": 1.894074010178556, "grad_norm": 0.59765625, "learning_rate": 1.3887394942393994e-07, "loss": 1.5568, "step": 10979 }, { "epoch": 1.894246528077288, "grad_norm": 0.58984375, "learning_rate": 1.3842227027860866e-07, "loss": 1.4174, "step": 10980 }, { "epoch": 1.89441904597602, "grad_norm": 0.5859375, "learning_rate": 1.3797132174057315e-07, "loss": 1.3474, "step": 10981 }, { "epoch": 1.894591563874752, "grad_norm": 0.59375, "learning_rate": 1.3752110384324336e-07, "loss": 1.4749, "step": 10982 }, { "epoch": 1.8947640817734839, "grad_norm": 0.75390625, "learning_rate": 1.370716166199726e-07, "loss": 1.4026, "step": 10983 }, { "epoch": 1.8949365996722158, "grad_norm": 0.58203125, "learning_rate": 1.3662286010406423e-07, "loss": 1.3766, "step": 10984 }, { "epoch": 1.895109117570948, "grad_norm": 0.66796875, "learning_rate": 1.3617483432876278e-07, "loss": 1.4039, "step": 10985 }, { "epoch": 1.89528163546968, "grad_norm": 0.5859375, "learning_rate": 1.357275393272628e-07, "loss": 1.3893, "step": 10986 }, { "epoch": 1.895454153368412, "grad_norm": 0.609375, "learning_rate": 1.352809751327e-07, "loss": 1.4591, "step": 10987 }, { "epoch": 1.8956266712671441, "grad_norm": 0.55859375, "learning_rate": 1.3483514177816127e-07, "loss": 1.4423, "step": 10988 }, { "epoch": 1.895799189165876, "grad_norm": 0.65234375, "learning_rate": 1.343900392966757e-07, "loss": 1.3756, "step": 10989 }, { "epoch": 1.895971707064608, "grad_norm": 0.73828125, "learning_rate": 1.339456677212192e-07, "loss": 1.4249, "step": 10990 }, { "epoch": 1.89614422496334, "grad_norm": 0.69921875, "learning_rate": 1.3350202708471316e-07, "loss": 1.4022, "step": 10991 }, { "epoch": 1.896316742862072, "grad_norm": 0.65625, "learning_rate": 1.3305911742002575e-07, "loss": 1.3189, "step": 10992 }, { "epoch": 1.896489260760804, "grad_norm": 0.56640625, "learning_rate": 1.3261693875996962e-07, "loss": 1.3691, "step": 10993 }, { "epoch": 1.896661778659536, "grad_norm": 0.5703125, "learning_rate": 1.3217549113730633e-07, "loss": 1.4177, "step": 10994 }, { "epoch": 1.8968342965582679, "grad_norm": 0.5703125, "learning_rate": 1.317347745847386e-07, "loss": 1.4069, "step": 10995 }, { "epoch": 1.8970068144569998, "grad_norm": 0.58203125, "learning_rate": 1.3129478913491923e-07, "loss": 1.4167, "step": 10996 }, { "epoch": 1.8971793323557318, "grad_norm": 0.5859375, "learning_rate": 1.3085553482044434e-07, "loss": 1.4617, "step": 10997 }, { "epoch": 1.8973518502544637, "grad_norm": 0.6328125, "learning_rate": 1.3041701167385567e-07, "loss": 1.4375, "step": 10998 }, { "epoch": 1.897524368153196, "grad_norm": 0.59375, "learning_rate": 1.2997921972764395e-07, "loss": 1.4244, "step": 10999 }, { "epoch": 1.8976968860519279, "grad_norm": 0.62890625, "learning_rate": 1.2954215901424204e-07, "loss": 1.4471, "step": 11000 }, { "epoch": 1.8976968860519279, "eval_loss": 1.4070650339126587, "eval_runtime": 10.8027, "eval_samples_per_second": 94.791, "eval_steps_per_second": 23.698, "step": 11000 }, { "epoch": 1.8978694039506598, "grad_norm": 0.59375, "learning_rate": 1.291058295660308e-07, "loss": 1.426, "step": 11001 }, { "epoch": 1.898041921849392, "grad_norm": 0.6015625, "learning_rate": 1.2867023141533542e-07, "loss": 1.4994, "step": 11002 }, { "epoch": 1.898214439748124, "grad_norm": 0.58203125, "learning_rate": 1.2823536459442788e-07, "loss": 1.4619, "step": 11003 }, { "epoch": 1.898386957646856, "grad_norm": 0.62109375, "learning_rate": 1.2780122913552684e-07, "loss": 1.4617, "step": 11004 }, { "epoch": 1.898559475545588, "grad_norm": 0.6875, "learning_rate": 1.2736782507079436e-07, "loss": 1.3247, "step": 11005 }, { "epoch": 1.8987319934443199, "grad_norm": 0.59765625, "learning_rate": 1.2693515243234146e-07, "loss": 1.5786, "step": 11006 }, { "epoch": 1.8989045113430518, "grad_norm": 0.59375, "learning_rate": 1.2650321125222243e-07, "loss": 1.4376, "step": 11007 }, { "epoch": 1.8990770292417838, "grad_norm": 0.5546875, "learning_rate": 1.2607200156243615e-07, "loss": 1.4922, "step": 11008 }, { "epoch": 1.8992495471405157, "grad_norm": 0.53125, "learning_rate": 1.2564152339493263e-07, "loss": 1.3807, "step": 11009 }, { "epoch": 1.8994220650392477, "grad_norm": 2.203125, "learning_rate": 1.2521177678160302e-07, "loss": 1.3754, "step": 11010 }, { "epoch": 1.8995945829379797, "grad_norm": 0.58203125, "learning_rate": 1.2478276175428516e-07, "loss": 1.451, "step": 11011 }, { "epoch": 1.8997671008367119, "grad_norm": 0.6171875, "learning_rate": 1.2435447834476254e-07, "loss": 1.472, "step": 11012 }, { "epoch": 1.8999396187354438, "grad_norm": 0.55859375, "learning_rate": 1.2392692658476758e-07, "loss": 1.4268, "step": 11013 }, { "epoch": 1.9001121366341758, "grad_norm": 0.58203125, "learning_rate": 1.2350010650597378e-07, "loss": 1.4491, "step": 11014 }, { "epoch": 1.9002846545329077, "grad_norm": 0.57421875, "learning_rate": 1.2307401814000252e-07, "loss": 1.4408, "step": 11015 }, { "epoch": 1.90045717243164, "grad_norm": 0.58203125, "learning_rate": 1.2264866151842304e-07, "loss": 1.3987, "step": 11016 }, { "epoch": 1.9006296903303719, "grad_norm": 0.56640625, "learning_rate": 1.2222403667274673e-07, "loss": 1.3893, "step": 11017 }, { "epoch": 1.9008022082291038, "grad_norm": 0.5703125, "learning_rate": 1.2180014363443404e-07, "loss": 1.3289, "step": 11018 }, { "epoch": 1.9009747261278358, "grad_norm": 0.5859375, "learning_rate": 1.213769824348865e-07, "loss": 1.3839, "step": 11019 }, { "epoch": 1.9011472440265678, "grad_norm": 0.578125, "learning_rate": 1.2095455310545788e-07, "loss": 1.3243, "step": 11020 }, { "epoch": 1.9013197619252997, "grad_norm": 0.5703125, "learning_rate": 1.2053285567744323e-07, "loss": 1.4218, "step": 11021 }, { "epoch": 1.9014922798240317, "grad_norm": 0.5859375, "learning_rate": 1.2011189018208414e-07, "loss": 1.4655, "step": 11022 }, { "epoch": 1.9016647977227636, "grad_norm": 1.453125, "learning_rate": 1.1969165665056904e-07, "loss": 1.4107, "step": 11023 }, { "epoch": 1.9018373156214956, "grad_norm": 0.56640625, "learning_rate": 1.1927215511402968e-07, "loss": 1.4179, "step": 11024 }, { "epoch": 1.9020098335202276, "grad_norm": 0.578125, "learning_rate": 1.1885338560354787e-07, "loss": 1.3512, "step": 11025 }, { "epoch": 1.9021823514189597, "grad_norm": 0.62890625, "learning_rate": 1.1843534815014767e-07, "loss": 1.4255, "step": 11026 }, { "epoch": 1.9023548693176917, "grad_norm": 0.57421875, "learning_rate": 1.1801804278479877e-07, "loss": 1.3959, "step": 11027 }, { "epoch": 1.9025273872164237, "grad_norm": 0.57421875, "learning_rate": 1.1760146953841978e-07, "loss": 1.4338, "step": 11028 }, { "epoch": 1.9026999051151559, "grad_norm": 0.59375, "learning_rate": 1.1718562844187153e-07, "loss": 1.4376, "step": 11029 }, { "epoch": 1.9028724230138878, "grad_norm": 0.58203125, "learning_rate": 1.1677051952596164e-07, "loss": 1.4145, "step": 11030 }, { "epoch": 1.9030449409126198, "grad_norm": 0.58203125, "learning_rate": 1.1635614282144658e-07, "loss": 1.3891, "step": 11031 }, { "epoch": 1.9032174588113517, "grad_norm": 0.64453125, "learning_rate": 1.1594249835902294e-07, "loss": 1.4681, "step": 11032 }, { "epoch": 1.9033899767100837, "grad_norm": 0.5703125, "learning_rate": 1.155295861693384e-07, "loss": 1.4878, "step": 11033 }, { "epoch": 1.9035624946088157, "grad_norm": 0.58984375, "learning_rate": 1.1511740628298296e-07, "loss": 1.343, "step": 11034 }, { "epoch": 1.9037350125075476, "grad_norm": 0.625, "learning_rate": 1.147059587304944e-07, "loss": 1.3924, "step": 11035 }, { "epoch": 1.9039075304062796, "grad_norm": 0.56640625, "learning_rate": 1.1429524354235277e-07, "loss": 1.4782, "step": 11036 }, { "epoch": 1.9040800483050115, "grad_norm": 0.61328125, "learning_rate": 1.1388526074898931e-07, "loss": 1.4173, "step": 11037 }, { "epoch": 1.9042525662037435, "grad_norm": 0.57421875, "learning_rate": 1.134760103807775e-07, "loss": 1.3841, "step": 11038 }, { "epoch": 1.9044250841024757, "grad_norm": 0.6015625, "learning_rate": 1.130674924680364e-07, "loss": 1.4633, "step": 11039 }, { "epoch": 1.9045976020012076, "grad_norm": 0.56640625, "learning_rate": 1.1265970704103069e-07, "loss": 1.3245, "step": 11040 }, { "epoch": 1.9047701198999396, "grad_norm": 0.58203125, "learning_rate": 1.122526541299751e-07, "loss": 1.4491, "step": 11041 }, { "epoch": 1.9049426377986716, "grad_norm": 0.60546875, "learning_rate": 1.1184633376502218e-07, "loss": 1.4918, "step": 11042 }, { "epoch": 1.9051151556974038, "grad_norm": 0.625, "learning_rate": 1.1144074597627785e-07, "loss": 1.4963, "step": 11043 }, { "epoch": 1.9052876735961357, "grad_norm": 0.6328125, "learning_rate": 1.1103589079378918e-07, "loss": 1.3569, "step": 11044 }, { "epoch": 1.9054601914948677, "grad_norm": 0.58203125, "learning_rate": 1.1063176824755107e-07, "loss": 1.4771, "step": 11045 }, { "epoch": 1.9056327093935996, "grad_norm": 0.61328125, "learning_rate": 1.1022837836750399e-07, "loss": 1.3965, "step": 11046 }, { "epoch": 1.9058052272923316, "grad_norm": 0.61328125, "learning_rate": 1.0982572118353186e-07, "loss": 1.5245, "step": 11047 }, { "epoch": 1.9059777451910636, "grad_norm": 0.58203125, "learning_rate": 1.0942379672546743e-07, "loss": 1.3126, "step": 11048 }, { "epoch": 1.9061502630897955, "grad_norm": 0.6484375, "learning_rate": 1.0902260502308692e-07, "loss": 1.4799, "step": 11049 }, { "epoch": 1.9063227809885275, "grad_norm": 0.60546875, "learning_rate": 1.0862214610611432e-07, "loss": 1.3847, "step": 11050 }, { "epoch": 1.9064952988872594, "grad_norm": 0.6171875, "learning_rate": 1.082224200042159e-07, "loss": 1.5114, "step": 11051 }, { "epoch": 1.9066678167859914, "grad_norm": 0.6875, "learning_rate": 1.0782342674700907e-07, "loss": 1.3262, "step": 11052 }, { "epoch": 1.9068403346847236, "grad_norm": 4.6875, "learning_rate": 1.074251663640502e-07, "loss": 1.3784, "step": 11053 }, { "epoch": 1.9070128525834555, "grad_norm": 0.63671875, "learning_rate": 1.0702763888484791e-07, "loss": 1.4332, "step": 11054 }, { "epoch": 1.9071853704821875, "grad_norm": 0.83203125, "learning_rate": 1.0663084433885196e-07, "loss": 1.3191, "step": 11055 }, { "epoch": 1.9073578883809197, "grad_norm": 0.59765625, "learning_rate": 1.0623478275545884e-07, "loss": 1.4464, "step": 11056 }, { "epoch": 1.9075304062796516, "grad_norm": 0.6484375, "learning_rate": 1.0583945416401286e-07, "loss": 1.4451, "step": 11057 }, { "epoch": 1.9077029241783836, "grad_norm": 0.62890625, "learning_rate": 1.0544485859380172e-07, "loss": 1.4179, "step": 11058 }, { "epoch": 1.9078754420771156, "grad_norm": 0.9375, "learning_rate": 1.0505099607405866e-07, "loss": 1.4735, "step": 11059 }, { "epoch": 1.9080479599758475, "grad_norm": 0.5390625, "learning_rate": 1.0465786663396593e-07, "loss": 1.4257, "step": 11060 }, { "epoch": 1.9082204778745795, "grad_norm": 0.58203125, "learning_rate": 1.0426547030264577e-07, "loss": 1.4477, "step": 11061 }, { "epoch": 1.9083929957733115, "grad_norm": 0.55859375, "learning_rate": 1.0387380710917161e-07, "loss": 1.4187, "step": 11062 }, { "epoch": 1.9085655136720434, "grad_norm": 0.578125, "learning_rate": 1.0348287708256021e-07, "loss": 1.4474, "step": 11063 }, { "epoch": 1.9087380315707754, "grad_norm": 0.58203125, "learning_rate": 1.0309268025177288e-07, "loss": 1.4031, "step": 11064 }, { "epoch": 1.9089105494695073, "grad_norm": 0.66796875, "learning_rate": 1.0270321664571981e-07, "loss": 1.4469, "step": 11065 }, { "epoch": 1.9090830673682393, "grad_norm": 0.55859375, "learning_rate": 1.0231448629325236e-07, "loss": 1.2811, "step": 11066 }, { "epoch": 1.9092555852669715, "grad_norm": 0.61328125, "learning_rate": 1.0192648922317084e-07, "loss": 1.4864, "step": 11067 }, { "epoch": 1.9094281031657034, "grad_norm": 0.6015625, "learning_rate": 1.0153922546422223e-07, "loss": 1.4214, "step": 11068 }, { "epoch": 1.9096006210644354, "grad_norm": 0.6171875, "learning_rate": 1.0115269504509583e-07, "loss": 1.4991, "step": 11069 }, { "epoch": 1.9097731389631676, "grad_norm": 0.6015625, "learning_rate": 1.0076689799442874e-07, "loss": 1.3097, "step": 11070 }, { "epoch": 1.9099456568618995, "grad_norm": 0.6328125, "learning_rate": 1.0038183434080363e-07, "loss": 1.3634, "step": 11071 }, { "epoch": 1.9101181747606315, "grad_norm": 0.5859375, "learning_rate": 9.999750411274655e-08, "loss": 1.3467, "step": 11072 }, { "epoch": 1.9102906926593635, "grad_norm": 0.57421875, "learning_rate": 9.961390733873366e-08, "loss": 1.456, "step": 11073 }, { "epoch": 1.9104632105580954, "grad_norm": 0.578125, "learning_rate": 9.92310440471822e-08, "loss": 1.3618, "step": 11074 }, { "epoch": 1.9106357284568274, "grad_norm": 0.61328125, "learning_rate": 9.884891426645837e-08, "loss": 1.4833, "step": 11075 }, { "epoch": 1.9108082463555593, "grad_norm": 0.65625, "learning_rate": 9.846751802487175e-08, "loss": 1.4058, "step": 11076 }, { "epoch": 1.9109807642542913, "grad_norm": 0.5546875, "learning_rate": 9.808685535067863e-08, "loss": 1.5136, "step": 11077 }, { "epoch": 1.9111532821530233, "grad_norm": 0.58203125, "learning_rate": 9.7706926272082e-08, "loss": 1.4108, "step": 11078 }, { "epoch": 1.9113258000517552, "grad_norm": 0.59375, "learning_rate": 9.732773081722824e-08, "loss": 1.4449, "step": 11079 }, { "epoch": 1.9114983179504874, "grad_norm": 0.60546875, "learning_rate": 9.694926901421153e-08, "loss": 1.4676, "step": 11080 }, { "epoch": 1.9116708358492194, "grad_norm": 0.60546875, "learning_rate": 9.657154089106946e-08, "loss": 1.4309, "step": 11081 }, { "epoch": 1.9118433537479513, "grad_norm": 0.546875, "learning_rate": 9.619454647578852e-08, "loss": 1.3618, "step": 11082 }, { "epoch": 1.9120158716466833, "grad_norm": 0.5390625, "learning_rate": 9.581828579629526e-08, "loss": 1.3586, "step": 11083 }, { "epoch": 1.9121883895454155, "grad_norm": 0.5625, "learning_rate": 9.544275888046961e-08, "loss": 1.3806, "step": 11084 }, { "epoch": 1.9123609074441474, "grad_norm": 0.6015625, "learning_rate": 9.506796575612931e-08, "loss": 1.3433, "step": 11085 }, { "epoch": 1.9125334253428794, "grad_norm": 0.59375, "learning_rate": 9.469390645104437e-08, "loss": 1.4082, "step": 11086 }, { "epoch": 1.9127059432416114, "grad_norm": 0.6484375, "learning_rate": 9.432058099292484e-08, "loss": 1.3976, "step": 11087 }, { "epoch": 1.9128784611403433, "grad_norm": 0.5546875, "learning_rate": 9.394798940943083e-08, "loss": 1.353, "step": 11088 }, { "epoch": 1.9130509790390753, "grad_norm": 0.57421875, "learning_rate": 9.357613172816471e-08, "loss": 1.377, "step": 11089 }, { "epoch": 1.9132234969378072, "grad_norm": 0.59765625, "learning_rate": 9.320500797667886e-08, "loss": 1.4407, "step": 11090 }, { "epoch": 1.9133960148365392, "grad_norm": 0.57421875, "learning_rate": 9.283461818246464e-08, "loss": 1.3338, "step": 11091 }, { "epoch": 1.9135685327352712, "grad_norm": 0.6015625, "learning_rate": 9.246496237296565e-08, "loss": 1.5407, "step": 11092 }, { "epoch": 1.9137410506340031, "grad_norm": 0.62890625, "learning_rate": 9.209604057556665e-08, "loss": 1.448, "step": 11093 }, { "epoch": 1.9139135685327353, "grad_norm": 0.5546875, "learning_rate": 9.172785281760132e-08, "loss": 1.381, "step": 11094 }, { "epoch": 1.9140860864314673, "grad_norm": 0.640625, "learning_rate": 9.136039912634675e-08, "loss": 1.4523, "step": 11095 }, { "epoch": 1.9142586043301992, "grad_norm": 0.5703125, "learning_rate": 9.099367952902449e-08, "loss": 1.4942, "step": 11096 }, { "epoch": 1.9144311222289314, "grad_norm": 0.60546875, "learning_rate": 9.062769405280614e-08, "loss": 1.369, "step": 11097 }, { "epoch": 1.9146036401276634, "grad_norm": 0.609375, "learning_rate": 9.026244272480445e-08, "loss": 1.3735, "step": 11098 }, { "epoch": 1.9147761580263953, "grad_norm": 0.59765625, "learning_rate": 8.989792557207889e-08, "loss": 1.4239, "step": 11099 }, { "epoch": 1.9149486759251273, "grad_norm": 0.62109375, "learning_rate": 8.953414262163674e-08, "loss": 1.3988, "step": 11100 }, { "epoch": 1.9149486759251273, "eval_loss": 1.4070982933044434, "eval_runtime": 10.8142, "eval_samples_per_second": 94.69, "eval_steps_per_second": 23.673, "step": 11100 }, { "epoch": 1.9151211938238593, "grad_norm": 0.5703125, "learning_rate": 8.917109390042866e-08, "loss": 1.4235, "step": 11101 }, { "epoch": 1.9152937117225912, "grad_norm": 0.53125, "learning_rate": 8.880877943535204e-08, "loss": 1.3556, "step": 11102 }, { "epoch": 1.9154662296213232, "grad_norm": 0.59765625, "learning_rate": 8.844719925324985e-08, "loss": 1.46, "step": 11103 }, { "epoch": 1.9156387475200551, "grad_norm": 0.58984375, "learning_rate": 8.808635338090732e-08, "loss": 1.4678, "step": 11104 }, { "epoch": 1.915811265418787, "grad_norm": 0.578125, "learning_rate": 8.772624184506196e-08, "loss": 1.4166, "step": 11105 }, { "epoch": 1.915983783317519, "grad_norm": 0.6328125, "learning_rate": 8.736686467239131e-08, "loss": 1.435, "step": 11106 }, { "epoch": 1.916156301216251, "grad_norm": 0.5703125, "learning_rate": 8.700822188951963e-08, "loss": 1.5098, "step": 11107 }, { "epoch": 1.9163288191149832, "grad_norm": 0.66796875, "learning_rate": 8.665031352301789e-08, "loss": 1.3868, "step": 11108 }, { "epoch": 1.9165013370137152, "grad_norm": 0.5703125, "learning_rate": 8.629313959940266e-08, "loss": 1.3686, "step": 11109 }, { "epoch": 1.9166738549124471, "grad_norm": 0.57421875, "learning_rate": 8.5936700145135e-08, "loss": 1.4824, "step": 11110 }, { "epoch": 1.9168463728111793, "grad_norm": 0.8203125, "learning_rate": 8.558099518662378e-08, "loss": 1.4923, "step": 11111 }, { "epoch": 1.9170188907099113, "grad_norm": 0.5625, "learning_rate": 8.522602475021902e-08, "loss": 1.3338, "step": 11112 }, { "epoch": 1.9171914086086432, "grad_norm": 0.59765625, "learning_rate": 8.487178886222192e-08, "loss": 1.3928, "step": 11113 }, { "epoch": 1.9173639265073752, "grad_norm": 0.5625, "learning_rate": 8.451828754887481e-08, "loss": 1.4036, "step": 11114 }, { "epoch": 1.9175364444061072, "grad_norm": 0.6171875, "learning_rate": 8.416552083636676e-08, "loss": 1.4697, "step": 11115 }, { "epoch": 1.9177089623048391, "grad_norm": 0.5859375, "learning_rate": 8.381348875083573e-08, "loss": 1.3408, "step": 11116 }, { "epoch": 1.917881480203571, "grad_norm": 0.57421875, "learning_rate": 8.346219131835976e-08, "loss": 1.5091, "step": 11117 }, { "epoch": 1.918053998102303, "grad_norm": 0.6171875, "learning_rate": 8.31116285649658e-08, "loss": 1.3755, "step": 11118 }, { "epoch": 1.918226516001035, "grad_norm": 0.5859375, "learning_rate": 8.276180051662641e-08, "loss": 1.4617, "step": 11119 }, { "epoch": 1.918399033899767, "grad_norm": 0.57421875, "learning_rate": 8.241270719925865e-08, "loss": 1.414, "step": 11120 }, { "epoch": 1.9185715517984991, "grad_norm": 0.5703125, "learning_rate": 8.206434863872514e-08, "loss": 1.4393, "step": 11121 }, { "epoch": 1.918744069697231, "grad_norm": 0.5703125, "learning_rate": 8.171672486083526e-08, "loss": 1.3922, "step": 11122 }, { "epoch": 1.918916587595963, "grad_norm": 0.5703125, "learning_rate": 8.136983589134173e-08, "loss": 1.4418, "step": 11123 }, { "epoch": 1.919089105494695, "grad_norm": 0.5703125, "learning_rate": 8.102368175594733e-08, "loss": 1.4394, "step": 11124 }, { "epoch": 1.9192616233934272, "grad_norm": 0.56640625, "learning_rate": 8.067826248029264e-08, "loss": 1.3664, "step": 11125 }, { "epoch": 1.9194341412921592, "grad_norm": 0.5625, "learning_rate": 8.033357808997278e-08, "loss": 1.4185, "step": 11126 }, { "epoch": 1.9196066591908911, "grad_norm": 0.61328125, "learning_rate": 7.998962861052173e-08, "loss": 1.3817, "step": 11127 }, { "epoch": 1.919779177089623, "grad_norm": 0.58203125, "learning_rate": 7.964641406742135e-08, "loss": 1.3643, "step": 11128 }, { "epoch": 1.919951694988355, "grad_norm": 0.64453125, "learning_rate": 7.93039344861013e-08, "loss": 1.4467, "step": 11129 }, { "epoch": 1.920124212887087, "grad_norm": 0.59765625, "learning_rate": 7.896218989193239e-08, "loss": 1.4712, "step": 11130 }, { "epoch": 1.920296730785819, "grad_norm": 0.57421875, "learning_rate": 7.862118031023436e-08, "loss": 1.4224, "step": 11131 }, { "epoch": 1.920469248684551, "grad_norm": 0.5859375, "learning_rate": 7.828090576627034e-08, "loss": 1.4686, "step": 11132 }, { "epoch": 1.920641766583283, "grad_norm": 0.55859375, "learning_rate": 7.794136628525129e-08, "loss": 1.4157, "step": 11133 }, { "epoch": 1.9208142844820149, "grad_norm": 0.6171875, "learning_rate": 7.760256189233151e-08, "loss": 1.4661, "step": 11134 }, { "epoch": 1.920986802380747, "grad_norm": 0.6015625, "learning_rate": 7.726449261261205e-08, "loss": 1.4879, "step": 11135 }, { "epoch": 1.921159320279479, "grad_norm": 0.58984375, "learning_rate": 7.69271584711384e-08, "loss": 1.424, "step": 11136 }, { "epoch": 1.921331838178211, "grad_norm": 0.6328125, "learning_rate": 7.659055949290395e-08, "loss": 1.4365, "step": 11137 }, { "epoch": 1.9215043560769431, "grad_norm": 0.76171875, "learning_rate": 7.625469570284427e-08, "loss": 1.4613, "step": 11138 }, { "epoch": 1.921676873975675, "grad_norm": 0.60546875, "learning_rate": 7.591956712584392e-08, "loss": 1.5346, "step": 11139 }, { "epoch": 1.921849391874407, "grad_norm": 1.28125, "learning_rate": 7.55851737867297e-08, "loss": 1.4781, "step": 11140 }, { "epoch": 1.922021909773139, "grad_norm": 0.57421875, "learning_rate": 7.525151571027734e-08, "loss": 1.4212, "step": 11141 }, { "epoch": 1.922194427671871, "grad_norm": 0.56640625, "learning_rate": 7.491859292120484e-08, "loss": 1.4737, "step": 11142 }, { "epoch": 1.922366945570603, "grad_norm": 0.578125, "learning_rate": 7.458640544417806e-08, "loss": 1.4486, "step": 11143 }, { "epoch": 1.922539463469335, "grad_norm": 0.56640625, "learning_rate": 7.425495330380617e-08, "loss": 1.4661, "step": 11144 }, { "epoch": 1.9227119813680669, "grad_norm": 0.60546875, "learning_rate": 7.392423652464731e-08, "loss": 1.4051, "step": 11145 }, { "epoch": 1.9228844992667988, "grad_norm": 0.6015625, "learning_rate": 7.3594255131203e-08, "loss": 1.4672, "step": 11146 }, { "epoch": 1.9230570171655308, "grad_norm": 0.625, "learning_rate": 7.326500914791701e-08, "loss": 1.3742, "step": 11147 }, { "epoch": 1.9232295350642628, "grad_norm": 0.66015625, "learning_rate": 7.29364985991865e-08, "loss": 1.3044, "step": 11148 }, { "epoch": 1.923402052962995, "grad_norm": 0.59765625, "learning_rate": 7.260872350934533e-08, "loss": 1.3507, "step": 11149 }, { "epoch": 1.923574570861727, "grad_norm": 0.59765625, "learning_rate": 7.228168390268075e-08, "loss": 1.3799, "step": 11150 }, { "epoch": 1.9237470887604589, "grad_norm": 0.58203125, "learning_rate": 7.195537980341894e-08, "loss": 1.4878, "step": 11151 }, { "epoch": 1.923919606659191, "grad_norm": 0.5703125, "learning_rate": 7.162981123573609e-08, "loss": 1.4538, "step": 11152 }, { "epoch": 1.924092124557923, "grad_norm": 0.59375, "learning_rate": 7.130497822375293e-08, "loss": 1.5007, "step": 11153 }, { "epoch": 1.924264642456655, "grad_norm": 0.63671875, "learning_rate": 7.098088079153353e-08, "loss": 1.5421, "step": 11154 }, { "epoch": 1.924437160355387, "grad_norm": 1.125, "learning_rate": 7.065751896309092e-08, "loss": 1.3665, "step": 11155 }, { "epoch": 1.9246096782541189, "grad_norm": 0.625, "learning_rate": 7.033489276238037e-08, "loss": 1.4511, "step": 11156 }, { "epoch": 1.9247821961528508, "grad_norm": 0.609375, "learning_rate": 7.001300221330387e-08, "loss": 1.398, "step": 11157 }, { "epoch": 1.9249547140515828, "grad_norm": 0.5859375, "learning_rate": 6.969184733970902e-08, "loss": 1.3415, "step": 11158 }, { "epoch": 1.9251272319503148, "grad_norm": 0.63671875, "learning_rate": 6.937142816539121e-08, "loss": 1.3978, "step": 11159 }, { "epoch": 1.9252997498490467, "grad_norm": 0.56640625, "learning_rate": 6.905174471408594e-08, "loss": 1.4171, "step": 11160 }, { "epoch": 1.9254722677477787, "grad_norm": 0.58203125, "learning_rate": 6.873279700947977e-08, "loss": 1.4101, "step": 11161 }, { "epoch": 1.9256447856465109, "grad_norm": 0.578125, "learning_rate": 6.84145850752016e-08, "loss": 1.4081, "step": 11162 }, { "epoch": 1.9258173035452428, "grad_norm": 0.5625, "learning_rate": 6.809710893482591e-08, "loss": 1.367, "step": 11163 }, { "epoch": 1.9259898214439748, "grad_norm": 0.578125, "learning_rate": 6.778036861187277e-08, "loss": 1.4203, "step": 11164 }, { "epoch": 1.9261623393427068, "grad_norm": 0.6328125, "learning_rate": 6.746436412981117e-08, "loss": 1.5098, "step": 11165 }, { "epoch": 1.926334857241439, "grad_norm": 0.5859375, "learning_rate": 6.714909551204907e-08, "loss": 1.3379, "step": 11166 }, { "epoch": 1.926507375140171, "grad_norm": 0.578125, "learning_rate": 6.683456278194666e-08, "loss": 1.4345, "step": 11167 }, { "epoch": 1.9266798930389029, "grad_norm": 0.6171875, "learning_rate": 6.652076596280422e-08, "loss": 1.4645, "step": 11168 }, { "epoch": 1.9268524109376348, "grad_norm": 0.5859375, "learning_rate": 6.6207705077872e-08, "loss": 1.4089, "step": 11169 }, { "epoch": 1.9270249288363668, "grad_norm": 0.5859375, "learning_rate": 6.589538015034148e-08, "loss": 1.3834, "step": 11170 }, { "epoch": 1.9271974467350987, "grad_norm": 0.6015625, "learning_rate": 6.55837912033519e-08, "loss": 1.4337, "step": 11171 }, { "epoch": 1.9273699646338307, "grad_norm": 0.59765625, "learning_rate": 6.527293825998815e-08, "loss": 1.4587, "step": 11172 }, { "epoch": 1.9275424825325627, "grad_norm": 0.60546875, "learning_rate": 6.496282134328069e-08, "loss": 1.4673, "step": 11173 }, { "epoch": 1.9277150004312946, "grad_norm": 0.61328125, "learning_rate": 6.465344047620336e-08, "loss": 1.416, "step": 11174 }, { "epoch": 1.9278875183300266, "grad_norm": 0.61328125, "learning_rate": 6.434479568167896e-08, "loss": 1.4711, "step": 11175 }, { "epoch": 1.9280600362287588, "grad_norm": 0.5859375, "learning_rate": 6.40368869825736e-08, "loss": 1.4945, "step": 11176 }, { "epoch": 1.9282325541274907, "grad_norm": 0.58984375, "learning_rate": 6.372971440169684e-08, "loss": 1.3634, "step": 11177 }, { "epoch": 1.9284050720262227, "grad_norm": 0.58203125, "learning_rate": 6.342327796180936e-08, "loss": 1.4011, "step": 11178 }, { "epoch": 1.9285775899249549, "grad_norm": 0.56640625, "learning_rate": 6.311757768560967e-08, "loss": 1.4638, "step": 11179 }, { "epoch": 1.9287501078236868, "grad_norm": 0.57421875, "learning_rate": 6.281261359575074e-08, "loss": 1.4616, "step": 11180 }, { "epoch": 1.9289226257224188, "grad_norm": 0.59765625, "learning_rate": 6.250838571482231e-08, "loss": 1.5161, "step": 11181 }, { "epoch": 1.9290951436211508, "grad_norm": 0.5390625, "learning_rate": 6.220489406536523e-08, "loss": 1.3275, "step": 11182 }, { "epoch": 1.9292676615198827, "grad_norm": 0.5703125, "learning_rate": 6.190213866986483e-08, "loss": 1.4386, "step": 11183 }, { "epoch": 1.9294401794186147, "grad_norm": 0.6171875, "learning_rate": 6.160011955074874e-08, "loss": 1.4496, "step": 11184 }, { "epoch": 1.9296126973173466, "grad_norm": 0.5859375, "learning_rate": 6.12988367303946e-08, "loss": 1.3581, "step": 11185 }, { "epoch": 1.9297852152160786, "grad_norm": 0.5703125, "learning_rate": 6.099829023112236e-08, "loss": 1.4737, "step": 11186 }, { "epoch": 1.9299577331148106, "grad_norm": 0.5625, "learning_rate": 6.069848007519863e-08, "loss": 1.3849, "step": 11187 }, { "epoch": 1.9301302510135425, "grad_norm": 0.58203125, "learning_rate": 6.039940628483454e-08, "loss": 1.4742, "step": 11188 }, { "epoch": 1.9303027689122747, "grad_norm": 0.6875, "learning_rate": 6.010106888218792e-08, "loss": 1.4461, "step": 11189 }, { "epoch": 1.9304752868110067, "grad_norm": 0.62890625, "learning_rate": 5.98034678893622e-08, "loss": 1.4375, "step": 11190 }, { "epoch": 1.9306478047097386, "grad_norm": 0.5625, "learning_rate": 5.950660332840419e-08, "loss": 1.3679, "step": 11191 }, { "epoch": 1.9308203226084706, "grad_norm": 0.81640625, "learning_rate": 5.9210475221308515e-08, "loss": 1.4385, "step": 11192 }, { "epoch": 1.9309928405072028, "grad_norm": 0.609375, "learning_rate": 5.8915083590013186e-08, "loss": 1.4601, "step": 11193 }, { "epoch": 1.9311653584059347, "grad_norm": 0.5703125, "learning_rate": 5.862042845640403e-08, "loss": 1.3369, "step": 11194 }, { "epoch": 1.9313378763046667, "grad_norm": 0.55859375, "learning_rate": 5.832650984231025e-08, "loss": 1.4013, "step": 11195 }, { "epoch": 1.9315103942033987, "grad_norm": 0.56640625, "learning_rate": 5.8033327769505546e-08, "loss": 1.4503, "step": 11196 }, { "epoch": 1.9316829121021306, "grad_norm": 0.546875, "learning_rate": 5.774088225971364e-08, "loss": 1.264, "step": 11197 }, { "epoch": 1.9318554300008626, "grad_norm": 0.83984375, "learning_rate": 5.7449173334598316e-08, "loss": 1.4681, "step": 11198 }, { "epoch": 1.9320279478995945, "grad_norm": 0.609375, "learning_rate": 5.7158201015773404e-08, "loss": 1.5123, "step": 11199 }, { "epoch": 1.9322004657983265, "grad_norm": 0.578125, "learning_rate": 5.6867965324793886e-08, "loss": 1.3687, "step": 11200 }, { "epoch": 1.9322004657983265, "eval_loss": 1.4070696830749512, "eval_runtime": 10.8729, "eval_samples_per_second": 94.179, "eval_steps_per_second": 23.545, "step": 11200 }, { "epoch": 1.9323729836970585, "grad_norm": 0.65234375, "learning_rate": 5.657846628316366e-08, "loss": 1.3929, "step": 11201 }, { "epoch": 1.9325455015957904, "grad_norm": 0.58203125, "learning_rate": 5.628970391232891e-08, "loss": 1.5419, "step": 11202 }, { "epoch": 1.9327180194945226, "grad_norm": 0.55859375, "learning_rate": 5.600167823368474e-08, "loss": 1.3657, "step": 11203 }, { "epoch": 1.9328905373932546, "grad_norm": 0.609375, "learning_rate": 5.571438926856964e-08, "loss": 1.4119, "step": 11204 }, { "epoch": 1.9330630552919865, "grad_norm": 0.6484375, "learning_rate": 5.5427837038266595e-08, "loss": 1.409, "step": 11205 }, { "epoch": 1.9332355731907187, "grad_norm": 0.6640625, "learning_rate": 5.5142021564006386e-08, "loss": 1.6224, "step": 11206 }, { "epoch": 1.9334080910894507, "grad_norm": 0.6015625, "learning_rate": 5.485694286696319e-08, "loss": 1.4667, "step": 11207 }, { "epoch": 1.9335806089881826, "grad_norm": 0.58203125, "learning_rate": 5.4572600968257894e-08, "loss": 1.3575, "step": 11208 }, { "epoch": 1.9337531268869146, "grad_norm": 0.609375, "learning_rate": 5.428899588895586e-08, "loss": 1.4499, "step": 11209 }, { "epoch": 1.9339256447856465, "grad_norm": 0.59375, "learning_rate": 5.4006127650069185e-08, "loss": 1.3383, "step": 11210 }, { "epoch": 1.9340981626843785, "grad_norm": 0.58984375, "learning_rate": 5.372399627255442e-08, "loss": 1.5185, "step": 11211 }, { "epoch": 1.9342706805831105, "grad_norm": 0.6015625, "learning_rate": 5.344260177731264e-08, "loss": 1.4495, "step": 11212 }, { "epoch": 1.9344431984818424, "grad_norm": 0.58203125, "learning_rate": 5.31619441851916e-08, "loss": 1.4181, "step": 11213 }, { "epoch": 1.9346157163805744, "grad_norm": 0.5859375, "learning_rate": 5.288202351698468e-08, "loss": 1.4742, "step": 11214 }, { "epoch": 1.9347882342793064, "grad_norm": 0.59765625, "learning_rate": 5.260283979343084e-08, "loss": 1.4408, "step": 11215 }, { "epoch": 1.9349607521780383, "grad_norm": 0.5625, "learning_rate": 5.232439303521131e-08, "loss": 1.5229, "step": 11216 }, { "epoch": 1.9351332700767705, "grad_norm": 0.64453125, "learning_rate": 5.2046683262957366e-08, "loss": 1.441, "step": 11217 }, { "epoch": 1.9353057879755025, "grad_norm": 0.59765625, "learning_rate": 5.1769710497243664e-08, "loss": 1.4484, "step": 11218 }, { "epoch": 1.9354783058742344, "grad_norm": 0.61328125, "learning_rate": 5.149347475858824e-08, "loss": 1.4355, "step": 11219 }, { "epoch": 1.9356508237729666, "grad_norm": 0.5859375, "learning_rate": 5.121797606745804e-08, "loss": 1.3858, "step": 11220 }, { "epoch": 1.9358233416716986, "grad_norm": 0.59375, "learning_rate": 5.094321444426231e-08, "loss": 1.4759, "step": 11221 }, { "epoch": 1.9359958595704305, "grad_norm": 0.58984375, "learning_rate": 5.0669189909358094e-08, "loss": 1.4333, "step": 11222 }, { "epoch": 1.9361683774691625, "grad_norm": 0.6171875, "learning_rate": 5.0395902483046934e-08, "loss": 1.4954, "step": 11223 }, { "epoch": 1.9363408953678944, "grad_norm": 0.60546875, "learning_rate": 5.012335218557374e-08, "loss": 1.3914, "step": 11224 }, { "epoch": 1.9365134132666264, "grad_norm": 0.6171875, "learning_rate": 4.985153903713458e-08, "loss": 1.3743, "step": 11225 }, { "epoch": 1.9366859311653584, "grad_norm": 0.578125, "learning_rate": 4.9580463057863345e-08, "loss": 1.5157, "step": 11226 }, { "epoch": 1.9368584490640903, "grad_norm": 0.65234375, "learning_rate": 4.9310124267845095e-08, "loss": 1.4125, "step": 11227 }, { "epoch": 1.9370309669628223, "grad_norm": 0.55078125, "learning_rate": 4.904052268710713e-08, "loss": 1.4332, "step": 11228 }, { "epoch": 1.9372034848615542, "grad_norm": 0.6796875, "learning_rate": 4.8771658335623476e-08, "loss": 1.4508, "step": 11229 }, { "epoch": 1.9373760027602864, "grad_norm": 0.58984375, "learning_rate": 4.850353123331486e-08, "loss": 1.4796, "step": 11230 }, { "epoch": 1.9375485206590184, "grad_norm": 0.5546875, "learning_rate": 4.823614140004429e-08, "loss": 1.4209, "step": 11231 }, { "epoch": 1.9377210385577504, "grad_norm": 0.58984375, "learning_rate": 4.796948885562036e-08, "loss": 1.3682, "step": 11232 }, { "epoch": 1.9378935564564823, "grad_norm": 0.53125, "learning_rate": 4.7703573619800604e-08, "loss": 1.3958, "step": 11233 }, { "epoch": 1.9380660743552145, "grad_norm": 0.80859375, "learning_rate": 4.743839571228592e-08, "loss": 1.4617, "step": 11234 }, { "epoch": 1.9382385922539465, "grad_norm": 0.6015625, "learning_rate": 4.7173955152719496e-08, "loss": 1.4021, "step": 11235 }, { "epoch": 1.9384111101526784, "grad_norm": 4.59375, "learning_rate": 4.6910251960695655e-08, "loss": 1.43, "step": 11236 }, { "epoch": 1.9385836280514104, "grad_norm": 0.640625, "learning_rate": 4.664728615574987e-08, "loss": 1.5197, "step": 11237 }, { "epoch": 1.9387561459501423, "grad_norm": 0.5625, "learning_rate": 4.638505775736546e-08, "loss": 1.4641, "step": 11238 }, { "epoch": 1.9389286638488743, "grad_norm": 0.64453125, "learning_rate": 4.612356678496799e-08, "loss": 1.4546, "step": 11239 }, { "epoch": 1.9391011817476063, "grad_norm": 0.56640625, "learning_rate": 4.5862813257931957e-08, "loss": 1.4626, "step": 11240 }, { "epoch": 1.9392736996463382, "grad_norm": 0.58203125, "learning_rate": 4.5602797195574143e-08, "loss": 1.4488, "step": 11241 }, { "epoch": 1.9394462175450702, "grad_norm": 0.56640625, "learning_rate": 4.534351861716024e-08, "loss": 1.4772, "step": 11242 }, { "epoch": 1.9396187354438021, "grad_norm": 0.59375, "learning_rate": 4.5084977541897116e-08, "loss": 1.3595, "step": 11243 }, { "epoch": 1.9397912533425343, "grad_norm": 0.56640625, "learning_rate": 4.482717398894165e-08, "loss": 1.303, "step": 11244 }, { "epoch": 1.9399637712412663, "grad_norm": 0.5703125, "learning_rate": 4.4570107977389696e-08, "loss": 1.3287, "step": 11245 }, { "epoch": 1.9401362891399982, "grad_norm": 0.60546875, "learning_rate": 4.4313779526290457e-08, "loss": 1.4473, "step": 11246 }, { "epoch": 1.9403088070387304, "grad_norm": 0.61328125, "learning_rate": 4.4058188654630965e-08, "loss": 1.3413, "step": 11247 }, { "epoch": 1.9404813249374624, "grad_norm": 0.609375, "learning_rate": 4.3803335381349396e-08, "loss": 1.4993, "step": 11248 }, { "epoch": 1.9406538428361944, "grad_norm": 0.62890625, "learning_rate": 4.354921972532511e-08, "loss": 1.3871, "step": 11249 }, { "epoch": 1.9408263607349263, "grad_norm": 0.625, "learning_rate": 4.3295841705386365e-08, "loss": 1.3752, "step": 11250 }, { "epoch": 1.9409988786336583, "grad_norm": 0.59375, "learning_rate": 4.30432013403026e-08, "loss": 1.4766, "step": 11251 }, { "epoch": 1.9411713965323902, "grad_norm": 0.6015625, "learning_rate": 4.279129864879439e-08, "loss": 1.3649, "step": 11252 }, { "epoch": 1.9413439144311222, "grad_norm": 0.62890625, "learning_rate": 4.2540133649520145e-08, "loss": 1.4277, "step": 11253 }, { "epoch": 1.9415164323298542, "grad_norm": 0.58203125, "learning_rate": 4.2289706361091643e-08, "loss": 1.4751, "step": 11254 }, { "epoch": 1.9416889502285861, "grad_norm": 0.6015625, "learning_rate": 4.2040016802059604e-08, "loss": 1.4691, "step": 11255 }, { "epoch": 1.941861468127318, "grad_norm": 0.57421875, "learning_rate": 4.179106499092367e-08, "loss": 1.4345, "step": 11256 }, { "epoch": 1.94203398602605, "grad_norm": 0.57421875, "learning_rate": 4.1542850946126864e-08, "loss": 1.359, "step": 11257 }, { "epoch": 1.9422065039247822, "grad_norm": 0.578125, "learning_rate": 4.129537468605893e-08, "loss": 1.4108, "step": 11258 }, { "epoch": 1.9423790218235142, "grad_norm": 0.59375, "learning_rate": 4.1048636229055194e-08, "loss": 1.4584, "step": 11259 }, { "epoch": 1.9425515397222461, "grad_norm": 0.61328125, "learning_rate": 4.080263559339437e-08, "loss": 1.4231, "step": 11260 }, { "epoch": 1.9427240576209783, "grad_norm": 1.4453125, "learning_rate": 4.0557372797302984e-08, "loss": 1.3745, "step": 11261 }, { "epoch": 1.9428965755197103, "grad_norm": 0.71875, "learning_rate": 4.0312847858949846e-08, "loss": 1.4831, "step": 11262 }, { "epoch": 1.9430690934184423, "grad_norm": 0.53515625, "learning_rate": 4.006906079645267e-08, "loss": 1.3649, "step": 11263 }, { "epoch": 1.9432416113171742, "grad_norm": 0.60546875, "learning_rate": 3.982601162787147e-08, "loss": 1.4526, "step": 11264 }, { "epoch": 1.9434141292159062, "grad_norm": 0.609375, "learning_rate": 3.9583700371214064e-08, "loss": 1.4766, "step": 11265 }, { "epoch": 1.9435866471146381, "grad_norm": 0.6015625, "learning_rate": 3.9342127044430524e-08, "loss": 1.4487, "step": 11266 }, { "epoch": 1.94375916501337, "grad_norm": 0.5625, "learning_rate": 3.910129166541987e-08, "loss": 1.4204, "step": 11267 }, { "epoch": 1.943931682912102, "grad_norm": 0.57421875, "learning_rate": 3.8861194252024504e-08, "loss": 1.3871, "step": 11268 }, { "epoch": 1.944104200810834, "grad_norm": 0.6171875, "learning_rate": 3.86218348220313e-08, "loss": 1.4365, "step": 11269 }, { "epoch": 1.944276718709566, "grad_norm": 0.578125, "learning_rate": 3.8383213393174965e-08, "loss": 1.4831, "step": 11270 }, { "epoch": 1.9444492366082982, "grad_norm": 0.57421875, "learning_rate": 3.814532998313247e-08, "loss": 1.4162, "step": 11271 }, { "epoch": 1.9446217545070301, "grad_norm": 0.6015625, "learning_rate": 3.790818460952861e-08, "loss": 1.356, "step": 11272 }, { "epoch": 1.944794272405762, "grad_norm": 0.60546875, "learning_rate": 3.7671777289932654e-08, "loss": 1.4233, "step": 11273 }, { "epoch": 1.944966790304494, "grad_norm": 0.5859375, "learning_rate": 3.743610804185949e-08, "loss": 1.4831, "step": 11274 }, { "epoch": 1.9451393082032262, "grad_norm": 0.56640625, "learning_rate": 3.720117688276737e-08, "loss": 1.3817, "step": 11275 }, { "epoch": 1.9453118261019582, "grad_norm": 0.5546875, "learning_rate": 3.6966983830063477e-08, "loss": 1.3679, "step": 11276 }, { "epoch": 1.9454843440006901, "grad_norm": 0.578125, "learning_rate": 3.673352890109616e-08, "loss": 1.4826, "step": 11277 }, { "epoch": 1.945656861899422, "grad_norm": 0.5859375, "learning_rate": 3.650081211316381e-08, "loss": 1.4326, "step": 11278 }, { "epoch": 1.945829379798154, "grad_norm": 0.56640625, "learning_rate": 3.626883348350485e-08, "loss": 1.4831, "step": 11279 }, { "epoch": 1.946001897696886, "grad_norm": 0.55859375, "learning_rate": 3.603759302930776e-08, "loss": 1.4117, "step": 11280 }, { "epoch": 1.946174415595618, "grad_norm": 0.69921875, "learning_rate": 3.5807090767703276e-08, "loss": 1.4233, "step": 11281 }, { "epoch": 1.94634693349435, "grad_norm": 0.59375, "learning_rate": 3.557732671576885e-08, "loss": 1.4643, "step": 11282 }, { "epoch": 1.946519451393082, "grad_norm": 0.546875, "learning_rate": 3.534830089052532e-08, "loss": 1.4645, "step": 11283 }, { "epoch": 1.9466919692918139, "grad_norm": 0.57421875, "learning_rate": 3.512001330894355e-08, "loss": 1.3985, "step": 11284 }, { "epoch": 1.946864487190546, "grad_norm": 0.5546875, "learning_rate": 3.4892463987933335e-08, "loss": 1.4634, "step": 11285 }, { "epoch": 1.947037005089278, "grad_norm": 0.57421875, "learning_rate": 3.4665652944355646e-08, "loss": 1.4396, "step": 11286 }, { "epoch": 1.94720952298801, "grad_norm": 0.60546875, "learning_rate": 3.443958019501148e-08, "loss": 1.5846, "step": 11287 }, { "epoch": 1.9473820408867422, "grad_norm": 0.60546875, "learning_rate": 3.421424575665078e-08, "loss": 1.4767, "step": 11288 }, { "epoch": 1.9475545587854741, "grad_norm": 0.59765625, "learning_rate": 3.398964964596907e-08, "loss": 1.524, "step": 11289 }, { "epoch": 1.947727076684206, "grad_norm": 0.58203125, "learning_rate": 3.376579187960305e-08, "loss": 1.3998, "step": 11290 }, { "epoch": 1.947899594582938, "grad_norm": 0.60546875, "learning_rate": 3.354267247414056e-08, "loss": 1.4296, "step": 11291 }, { "epoch": 1.94807211248167, "grad_norm": 0.66796875, "learning_rate": 3.332029144610949e-08, "loss": 1.4121, "step": 11292 }, { "epoch": 1.948244630380402, "grad_norm": 0.6171875, "learning_rate": 3.309864881198555e-08, "loss": 1.4809, "step": 11293 }, { "epoch": 1.948417148279134, "grad_norm": 0.56640625, "learning_rate": 3.2877744588190044e-08, "loss": 1.3625, "step": 11294 }, { "epoch": 1.9485896661778659, "grad_norm": 0.578125, "learning_rate": 3.2657578791088775e-08, "loss": 1.4392, "step": 11295 }, { "epoch": 1.9487621840765978, "grad_norm": 0.6953125, "learning_rate": 3.243815143699314e-08, "loss": 1.3932, "step": 11296 }, { "epoch": 1.9489347019753298, "grad_norm": 0.58203125, "learning_rate": 3.2219462542159016e-08, "loss": 1.4316, "step": 11297 }, { "epoch": 1.9491072198740618, "grad_norm": 0.55859375, "learning_rate": 3.2001512122789014e-08, "loss": 1.5107, "step": 11298 }, { "epoch": 1.949279737772794, "grad_norm": 0.61328125, "learning_rate": 3.17843001950302e-08, "loss": 1.417, "step": 11299 }, { "epoch": 1.949452255671526, "grad_norm": 0.6328125, "learning_rate": 3.1567826774974166e-08, "loss": 1.4208, "step": 11300 }, { "epoch": 1.949452255671526, "eval_loss": 1.4070621728897095, "eval_runtime": 10.985, "eval_samples_per_second": 93.218, "eval_steps_per_second": 23.305, "step": 11300 }, { "epoch": 1.9496247735702579, "grad_norm": 0.57421875, "learning_rate": 3.135209187865917e-08, "loss": 1.4143, "step": 11301 }, { "epoch": 1.94979729146899, "grad_norm": 0.56640625, "learning_rate": 3.1137095522068006e-08, "loss": 1.4008, "step": 11302 }, { "epoch": 1.949969809367722, "grad_norm": 0.56640625, "learning_rate": 3.092283772113014e-08, "loss": 1.5115, "step": 11303 }, { "epoch": 1.950142327266454, "grad_norm": 0.60546875, "learning_rate": 3.070931849171732e-08, "loss": 1.3597, "step": 11304 }, { "epoch": 1.950314845165186, "grad_norm": 0.58203125, "learning_rate": 3.049653784964912e-08, "loss": 1.505, "step": 11305 }, { "epoch": 1.950487363063918, "grad_norm": 0.609375, "learning_rate": 3.028449581068959e-08, "loss": 1.4266, "step": 11306 }, { "epoch": 1.9506598809626499, "grad_norm": 0.63671875, "learning_rate": 3.00731923905484e-08, "loss": 1.4253, "step": 11307 }, { "epoch": 1.9508323988613818, "grad_norm": 0.625, "learning_rate": 2.986262760488079e-08, "loss": 1.5135, "step": 11308 }, { "epoch": 1.9510049167601138, "grad_norm": 0.6015625, "learning_rate": 2.9652801469285396e-08, "loss": 1.4114, "step": 11309 }, { "epoch": 1.9511774346588457, "grad_norm": 2.265625, "learning_rate": 2.9443713999308676e-08, "loss": 1.4138, "step": 11310 }, { "epoch": 1.9513499525575777, "grad_norm": 0.59765625, "learning_rate": 2.923536521044046e-08, "loss": 1.3957, "step": 11311 }, { "epoch": 1.9515224704563099, "grad_norm": 0.5546875, "learning_rate": 2.9027755118116175e-08, "loss": 1.447, "step": 11312 }, { "epoch": 1.9516949883550418, "grad_norm": 0.54296875, "learning_rate": 2.8820883737716853e-08, "loss": 1.3001, "step": 11313 }, { "epoch": 1.9518675062537738, "grad_norm": 0.546875, "learning_rate": 2.8614751084570236e-08, "loss": 1.4411, "step": 11314 }, { "epoch": 1.9520400241525058, "grad_norm": 0.5546875, "learning_rate": 2.8409357173946327e-08, "loss": 1.3716, "step": 11315 }, { "epoch": 1.952212542051238, "grad_norm": 1.4140625, "learning_rate": 2.8204702021062958e-08, "loss": 1.5538, "step": 11316 }, { "epoch": 1.95238505994997, "grad_norm": 0.59375, "learning_rate": 2.800078564108133e-08, "loss": 1.3402, "step": 11317 }, { "epoch": 1.9525575778487019, "grad_norm": 0.578125, "learning_rate": 2.779760804911047e-08, "loss": 1.3476, "step": 11318 }, { "epoch": 1.9527300957474338, "grad_norm": 0.5859375, "learning_rate": 2.759516926020056e-08, "loss": 1.3268, "step": 11319 }, { "epoch": 1.9529026136461658, "grad_norm": 0.65625, "learning_rate": 2.7393469289351825e-08, "loss": 1.4073, "step": 11320 }, { "epoch": 1.9530751315448978, "grad_norm": 0.5546875, "learning_rate": 2.7192508151506758e-08, "loss": 1.412, "step": 11321 }, { "epoch": 1.9532476494436297, "grad_norm": 0.59765625, "learning_rate": 2.6992285861553447e-08, "loss": 1.5015, "step": 11322 }, { "epoch": 1.9534201673423617, "grad_norm": 0.64453125, "learning_rate": 2.6792802434326692e-08, "loss": 1.5071, "step": 11323 }, { "epoch": 1.9535926852410936, "grad_norm": 0.55078125, "learning_rate": 2.6594057884603565e-08, "loss": 1.3677, "step": 11324 }, { "epoch": 1.9537652031398256, "grad_norm": 0.62109375, "learning_rate": 2.639605222710895e-08, "loss": 1.4214, "step": 11325 }, { "epoch": 1.9539377210385578, "grad_norm": 0.6171875, "learning_rate": 2.6198785476513333e-08, "loss": 1.3839, "step": 11326 }, { "epoch": 1.9541102389372897, "grad_norm": 0.62109375, "learning_rate": 2.6002257647431694e-08, "loss": 1.5533, "step": 11327 }, { "epoch": 1.9542827568360217, "grad_norm": 0.5703125, "learning_rate": 2.5806468754422388e-08, "loss": 1.3973, "step": 11328 }, { "epoch": 1.954455274734754, "grad_norm": 0.6015625, "learning_rate": 2.5611418811991586e-08, "loss": 1.4482, "step": 11329 }, { "epoch": 1.9546277926334858, "grad_norm": 0.58203125, "learning_rate": 2.541710783458884e-08, "loss": 1.4284, "step": 11330 }, { "epoch": 1.9548003105322178, "grad_norm": 0.57421875, "learning_rate": 2.5223535836612634e-08, "loss": 1.4421, "step": 11331 }, { "epoch": 1.9549728284309498, "grad_norm": 0.62890625, "learning_rate": 2.503070283240039e-08, "loss": 1.4014, "step": 11332 }, { "epoch": 1.9551453463296817, "grad_norm": 0.59375, "learning_rate": 2.4838608836241783e-08, "loss": 1.3639, "step": 11333 }, { "epoch": 1.9553178642284137, "grad_norm": 0.59375, "learning_rate": 2.4647253862365438e-08, "loss": 1.4609, "step": 11334 }, { "epoch": 1.9554903821271457, "grad_norm": 0.828125, "learning_rate": 2.445663792495001e-08, "loss": 1.3816, "step": 11335 }, { "epoch": 1.9556629000258776, "grad_norm": 0.60546875, "learning_rate": 2.4266761038116428e-08, "loss": 1.418, "step": 11336 }, { "epoch": 1.9558354179246096, "grad_norm": 0.61328125, "learning_rate": 2.4077623215933432e-08, "loss": 1.495, "step": 11337 }, { "epoch": 1.9560079358233415, "grad_norm": 0.609375, "learning_rate": 2.388922447241204e-08, "loss": 1.4245, "step": 11338 }, { "epoch": 1.9561804537220737, "grad_norm": 0.5625, "learning_rate": 2.370156482150998e-08, "loss": 1.4573, "step": 11339 }, { "epoch": 1.9563529716208057, "grad_norm": 0.58984375, "learning_rate": 2.3514644277131682e-08, "loss": 1.4742, "step": 11340 }, { "epoch": 1.9565254895195376, "grad_norm": 0.57421875, "learning_rate": 2.3328462853123846e-08, "loss": 1.5041, "step": 11341 }, { "epoch": 1.9566980074182696, "grad_norm": 0.58984375, "learning_rate": 2.3143020563280993e-08, "loss": 1.4036, "step": 11342 }, { "epoch": 1.9568705253170018, "grad_norm": 0.58984375, "learning_rate": 2.2958317421341026e-08, "loss": 1.3942, "step": 11343 }, { "epoch": 1.9570430432157337, "grad_norm": 0.578125, "learning_rate": 2.277435344098855e-08, "loss": 1.4601, "step": 11344 }, { "epoch": 1.9572155611144657, "grad_norm": 0.5703125, "learning_rate": 2.2591128635852666e-08, "loss": 1.3396, "step": 11345 }, { "epoch": 1.9573880790131977, "grad_norm": 0.56640625, "learning_rate": 2.240864301950807e-08, "loss": 1.4657, "step": 11346 }, { "epoch": 1.9575605969119296, "grad_norm": 0.58203125, "learning_rate": 2.2226896605473945e-08, "loss": 1.4744, "step": 11347 }, { "epoch": 1.9577331148106616, "grad_norm": 1.578125, "learning_rate": 2.2045889407215082e-08, "loss": 1.4355, "step": 11348 }, { "epoch": 1.9579056327093936, "grad_norm": 0.57421875, "learning_rate": 2.186562143814186e-08, "loss": 1.3912, "step": 11349 }, { "epoch": 1.9580781506081255, "grad_norm": 0.6796875, "learning_rate": 2.1686092711609154e-08, "loss": 1.4397, "step": 11350 }, { "epoch": 1.9582506685068575, "grad_norm": 0.59765625, "learning_rate": 2.1507303240918543e-08, "loss": 1.4747, "step": 11351 }, { "epoch": 1.9584231864055894, "grad_norm": 0.625, "learning_rate": 2.132925303931499e-08, "loss": 1.4209, "step": 11352 }, { "epoch": 1.9585957043043216, "grad_norm": 0.5703125, "learning_rate": 2.1151942119991274e-08, "loss": 1.3934, "step": 11353 }, { "epoch": 1.9587682222030536, "grad_norm": 0.578125, "learning_rate": 2.0975370496081336e-08, "loss": 1.4919, "step": 11354 }, { "epoch": 1.9589407401017855, "grad_norm": 0.61328125, "learning_rate": 2.0799538180668044e-08, "loss": 1.4422, "step": 11355 }, { "epoch": 1.9591132580005175, "grad_norm": 0.76171875, "learning_rate": 2.0624445186777643e-08, "loss": 1.4383, "step": 11356 }, { "epoch": 1.9592857758992497, "grad_norm": 0.57421875, "learning_rate": 2.045009152738309e-08, "loss": 1.4113, "step": 11357 }, { "epoch": 1.9594582937979816, "grad_norm": 0.55859375, "learning_rate": 2.0276477215399604e-08, "loss": 1.3592, "step": 11358 }, { "epoch": 1.9596308116967136, "grad_norm": 0.5625, "learning_rate": 2.0103602263692455e-08, "loss": 1.4196, "step": 11359 }, { "epoch": 1.9598033295954456, "grad_norm": 0.65234375, "learning_rate": 1.993146668506585e-08, "loss": 1.4562, "step": 11360 }, { "epoch": 1.9599758474941775, "grad_norm": 0.61328125, "learning_rate": 1.976007049227624e-08, "loss": 1.5406, "step": 11361 }, { "epoch": 1.9601483653929095, "grad_norm": 0.609375, "learning_rate": 1.9589413698019034e-08, "loss": 1.4636, "step": 11362 }, { "epoch": 1.9603208832916414, "grad_norm": 0.546875, "learning_rate": 1.9419496314939667e-08, "loss": 1.4362, "step": 11363 }, { "epoch": 1.9604934011903734, "grad_norm": 0.62890625, "learning_rate": 1.925031835562474e-08, "loss": 1.5589, "step": 11364 }, { "epoch": 1.9606659190891054, "grad_norm": 0.609375, "learning_rate": 1.9081879832608674e-08, "loss": 1.4037, "step": 11365 }, { "epoch": 1.9608384369878373, "grad_norm": 0.58984375, "learning_rate": 1.891418075837037e-08, "loss": 1.5149, "step": 11366 }, { "epoch": 1.9610109548865695, "grad_norm": 0.609375, "learning_rate": 1.8747221145334337e-08, "loss": 1.4395, "step": 11367 }, { "epoch": 1.9611834727853015, "grad_norm": 0.5546875, "learning_rate": 1.858100100587068e-08, "loss": 1.3103, "step": 11368 }, { "epoch": 1.9613559906840334, "grad_norm": 0.64453125, "learning_rate": 1.841552035229288e-08, "loss": 1.4143, "step": 11369 }, { "epoch": 1.9615285085827656, "grad_norm": 0.68359375, "learning_rate": 1.8250779196861136e-08, "loss": 1.31, "step": 11370 }, { "epoch": 1.9617010264814976, "grad_norm": 0.6015625, "learning_rate": 1.8086777551780122e-08, "loss": 1.4315, "step": 11371 }, { "epoch": 1.9618735443802295, "grad_norm": 0.6015625, "learning_rate": 1.7923515429201232e-08, "loss": 1.4189, "step": 11372 }, { "epoch": 1.9620460622789615, "grad_norm": 0.59375, "learning_rate": 1.7760992841219237e-08, "loss": 1.3975, "step": 11373 }, { "epoch": 1.9622185801776935, "grad_norm": 0.6015625, "learning_rate": 1.7599209799874505e-08, "loss": 1.4508, "step": 11374 }, { "epoch": 1.9623910980764254, "grad_norm": 0.55859375, "learning_rate": 1.7438166317153005e-08, "loss": 1.4656, "step": 11375 }, { "epoch": 1.9625636159751574, "grad_norm": 0.546875, "learning_rate": 1.72778624049863e-08, "loss": 1.3995, "step": 11376 }, { "epoch": 1.9627361338738893, "grad_norm": 0.671875, "learning_rate": 1.711829807525045e-08, "loss": 1.3984, "step": 11377 }, { "epoch": 1.9629086517726213, "grad_norm": 0.6484375, "learning_rate": 1.6959473339765997e-08, "loss": 1.4435, "step": 11378 }, { "epoch": 1.9630811696713533, "grad_norm": 0.5625, "learning_rate": 1.6801388210302416e-08, "loss": 1.3658, "step": 11379 }, { "epoch": 1.9632536875700854, "grad_norm": 0.578125, "learning_rate": 1.6644042698569228e-08, "loss": 1.4498, "step": 11380 }, { "epoch": 1.9634262054688174, "grad_norm": 0.625, "learning_rate": 1.648743681622378e-08, "loss": 1.4733, "step": 11381 }, { "epoch": 1.9635987233675494, "grad_norm": 0.59765625, "learning_rate": 1.6331570574869005e-08, "loss": 1.4511, "step": 11382 }, { "epoch": 1.9637712412662813, "grad_norm": 0.55859375, "learning_rate": 1.6176443986052337e-08, "loss": 1.4096, "step": 11383 }, { "epoch": 1.9639437591650135, "grad_norm": 0.625, "learning_rate": 1.6022057061266804e-08, "loss": 1.4136, "step": 11384 }, { "epoch": 1.9641162770637455, "grad_norm": 0.5859375, "learning_rate": 1.5868409811949926e-08, "loss": 1.435, "step": 11385 }, { "epoch": 1.9642887949624774, "grad_norm": 1.171875, "learning_rate": 1.5715502249484816e-08, "loss": 1.3491, "step": 11386 }, { "epoch": 1.9644613128612094, "grad_norm": 0.59375, "learning_rate": 1.55633343852013e-08, "loss": 1.339, "step": 11387 }, { "epoch": 1.9646338307599414, "grad_norm": 0.6328125, "learning_rate": 1.5411906230370366e-08, "loss": 1.3539, "step": 11388 }, { "epoch": 1.9648063486586733, "grad_norm": 0.5703125, "learning_rate": 1.5261217796211923e-08, "loss": 1.4352, "step": 11389 }, { "epoch": 1.9649788665574053, "grad_norm": 0.6171875, "learning_rate": 1.5111269093890378e-08, "loss": 1.4615, "step": 11390 }, { "epoch": 1.9651513844561372, "grad_norm": 0.5859375, "learning_rate": 1.4962060134513512e-08, "loss": 1.3839, "step": 11391 }, { "epoch": 1.9653239023548692, "grad_norm": 0.578125, "learning_rate": 1.4813590929138032e-08, "loss": 1.5242, "step": 11392 }, { "epoch": 1.9654964202536012, "grad_norm": 0.6015625, "learning_rate": 1.4665861488761813e-08, "loss": 1.3994, "step": 11393 }, { "epoch": 1.9656689381523333, "grad_norm": 0.5625, "learning_rate": 1.4518871824329428e-08, "loss": 1.354, "step": 11394 }, { "epoch": 1.9658414560510653, "grad_norm": 0.65625, "learning_rate": 1.4372621946731058e-08, "loss": 1.4321, "step": 11395 }, { "epoch": 1.9660139739497973, "grad_norm": 0.58203125, "learning_rate": 1.4227111866802479e-08, "loss": 1.307, "step": 11396 }, { "epoch": 1.9661864918485294, "grad_norm": 0.5625, "learning_rate": 1.4082341595322846e-08, "loss": 1.4024, "step": 11397 }, { "epoch": 1.9663590097472614, "grad_norm": 0.54296875, "learning_rate": 1.3938311143018024e-08, "loss": 1.3452, "step": 11398 }, { "epoch": 1.9665315276459934, "grad_norm": 0.56640625, "learning_rate": 1.3795020520559477e-08, "loss": 1.3352, "step": 11399 }, { "epoch": 1.9667040455447253, "grad_norm": 0.55859375, "learning_rate": 1.3652469738562046e-08, "loss": 1.3326, "step": 11400 }, { "epoch": 1.9667040455447253, "eval_loss": 1.407050371170044, "eval_runtime": 10.8811, "eval_samples_per_second": 94.108, "eval_steps_per_second": 23.527, "step": 11400 }, { "epoch": 1.9668765634434573, "grad_norm": 0.625, "learning_rate": 1.3510658807588394e-08, "loss": 1.3218, "step": 11401 }, { "epoch": 1.9670490813421893, "grad_norm": 0.9140625, "learning_rate": 1.3369587738142343e-08, "loss": 1.3314, "step": 11402 }, { "epoch": 1.9672215992409212, "grad_norm": 0.578125, "learning_rate": 1.3229256540676638e-08, "loss": 1.4445, "step": 11403 }, { "epoch": 1.9673941171396532, "grad_norm": 0.58203125, "learning_rate": 1.3089665225588522e-08, "loss": 1.3972, "step": 11404 }, { "epoch": 1.9675666350383851, "grad_norm": 0.87109375, "learning_rate": 1.295081380321861e-08, "loss": 1.4608, "step": 11405 }, { "epoch": 1.967739152937117, "grad_norm": 0.58984375, "learning_rate": 1.2812702283855338e-08, "loss": 1.4053, "step": 11406 }, { "epoch": 1.967911670835849, "grad_norm": 0.58984375, "learning_rate": 1.2675330677729413e-08, "loss": 1.4719, "step": 11407 }, { "epoch": 1.9680841887345812, "grad_norm": 0.5859375, "learning_rate": 1.253869899501825e-08, "loss": 1.523, "step": 11408 }, { "epoch": 1.9682567066333132, "grad_norm": 0.57421875, "learning_rate": 1.2402807245844861e-08, "loss": 1.4111, "step": 11409 }, { "epoch": 1.9684292245320452, "grad_norm": 0.59375, "learning_rate": 1.226765544027675e-08, "loss": 1.4143, "step": 11410 }, { "epoch": 1.9686017424307773, "grad_norm": 0.56640625, "learning_rate": 1.2133243588327014e-08, "loss": 1.4456, "step": 11411 }, { "epoch": 1.9687742603295093, "grad_norm": 0.609375, "learning_rate": 1.199957169995436e-08, "loss": 1.4577, "step": 11412 }, { "epoch": 1.9689467782282413, "grad_norm": 0.5546875, "learning_rate": 1.1866639785060862e-08, "loss": 1.3764, "step": 11413 }, { "epoch": 1.9691192961269732, "grad_norm": 0.55078125, "learning_rate": 1.1734447853495312e-08, "loss": 1.3514, "step": 11414 }, { "epoch": 1.9692918140257052, "grad_norm": 0.56640625, "learning_rate": 1.1602995915050985e-08, "loss": 1.5606, "step": 11415 }, { "epoch": 1.9694643319244372, "grad_norm": 0.5625, "learning_rate": 1.1472283979467868e-08, "loss": 1.4479, "step": 11416 }, { "epoch": 1.9696368498231691, "grad_norm": 0.58984375, "learning_rate": 1.1342312056429328e-08, "loss": 1.5718, "step": 11417 }, { "epoch": 1.969809367721901, "grad_norm": 0.640625, "learning_rate": 1.1213080155564327e-08, "loss": 1.5088, "step": 11418 }, { "epoch": 1.969981885620633, "grad_norm": 0.61328125, "learning_rate": 1.1084588286446319e-08, "loss": 1.467, "step": 11419 }, { "epoch": 1.970154403519365, "grad_norm": 0.5703125, "learning_rate": 1.0956836458596576e-08, "loss": 1.3432, "step": 11420 }, { "epoch": 1.9703269214180972, "grad_norm": 0.5625, "learning_rate": 1.082982468147864e-08, "loss": 1.4005, "step": 11421 }, { "epoch": 1.9704994393168291, "grad_norm": 0.6015625, "learning_rate": 1.070355296450165e-08, "loss": 1.3914, "step": 11422 }, { "epoch": 1.970671957215561, "grad_norm": 0.56640625, "learning_rate": 1.0578021317022569e-08, "loss": 1.315, "step": 11423 }, { "epoch": 1.970844475114293, "grad_norm": 0.58984375, "learning_rate": 1.045322974833951e-08, "loss": 1.4263, "step": 11424 }, { "epoch": 1.9710169930130252, "grad_norm": 0.5859375, "learning_rate": 1.0329178267699525e-08, "loss": 1.4316, "step": 11425 }, { "epoch": 1.9711895109117572, "grad_norm": 0.578125, "learning_rate": 1.0205866884291926e-08, "loss": 1.4727, "step": 11426 }, { "epoch": 1.9713620288104892, "grad_norm": 0.5625, "learning_rate": 1.0083295607252741e-08, "loss": 1.3613, "step": 11427 }, { "epoch": 1.9715345467092211, "grad_norm": 0.5859375, "learning_rate": 9.961464445663594e-09, "loss": 1.4517, "step": 11428 }, { "epoch": 1.971707064607953, "grad_norm": 0.65625, "learning_rate": 9.840373408548376e-09, "loss": 1.3807, "step": 11429 }, { "epoch": 1.971879582506685, "grad_norm": 0.5859375, "learning_rate": 9.720022504881022e-09, "loss": 1.36, "step": 11430 }, { "epoch": 1.972052100405417, "grad_norm": 0.59765625, "learning_rate": 9.600411743576621e-09, "loss": 1.4093, "step": 11431 }, { "epoch": 1.972224618304149, "grad_norm": 0.578125, "learning_rate": 9.481541133495864e-09, "loss": 1.4134, "step": 11432 }, { "epoch": 1.972397136202881, "grad_norm": 1.5625, "learning_rate": 9.36341068344615e-09, "loss": 1.4477, "step": 11433 }, { "epoch": 1.972569654101613, "grad_norm": 0.56640625, "learning_rate": 9.246020402179368e-09, "loss": 1.4009, "step": 11434 }, { "epoch": 1.972742172000345, "grad_norm": 0.58203125, "learning_rate": 9.129370298393004e-09, "loss": 1.4254, "step": 11435 }, { "epoch": 1.972914689899077, "grad_norm": 0.57421875, "learning_rate": 9.013460380729033e-09, "loss": 1.414, "step": 11436 }, { "epoch": 1.973087207797809, "grad_norm": 0.609375, "learning_rate": 8.898290657773923e-09, "loss": 1.4608, "step": 11437 }, { "epoch": 1.9732597256965412, "grad_norm": 0.64453125, "learning_rate": 8.783861138060845e-09, "loss": 1.5263, "step": 11438 }, { "epoch": 1.9734322435952731, "grad_norm": 0.56640625, "learning_rate": 8.670171830067464e-09, "loss": 1.3921, "step": 11439 }, { "epoch": 1.973604761494005, "grad_norm": 0.61328125, "learning_rate": 8.557222742215932e-09, "loss": 1.3992, "step": 11440 }, { "epoch": 1.973777279392737, "grad_norm": 0.578125, "learning_rate": 8.445013882875108e-09, "loss": 1.547, "step": 11441 }, { "epoch": 1.973949797291469, "grad_norm": 0.58984375, "learning_rate": 8.333545260357235e-09, "loss": 1.4586, "step": 11442 }, { "epoch": 1.974122315190201, "grad_norm": 0.57421875, "learning_rate": 8.222816882922368e-09, "loss": 1.3983, "step": 11443 }, { "epoch": 1.974294833088933, "grad_norm": 2.140625, "learning_rate": 8.11282875877173e-09, "loss": 1.557, "step": 11444 }, { "epoch": 1.974467350987665, "grad_norm": 0.59375, "learning_rate": 8.003580896055462e-09, "loss": 1.363, "step": 11445 }, { "epoch": 1.9746398688863969, "grad_norm": 0.6171875, "learning_rate": 7.895073302865985e-09, "loss": 1.3117, "step": 11446 }, { "epoch": 1.9748123867851288, "grad_norm": 0.55859375, "learning_rate": 7.787305987243532e-09, "loss": 1.329, "step": 11447 }, { "epoch": 1.9749849046838608, "grad_norm": 0.5625, "learning_rate": 7.680278957171716e-09, "loss": 1.2998, "step": 11448 }, { "epoch": 1.975157422582593, "grad_norm": 0.58203125, "learning_rate": 7.573992220580862e-09, "loss": 1.3532, "step": 11449 }, { "epoch": 1.975329940481325, "grad_norm": 0.64453125, "learning_rate": 7.468445785342448e-09, "loss": 1.341, "step": 11450 }, { "epoch": 1.975502458380057, "grad_norm": 0.62890625, "learning_rate": 7.363639659279109e-09, "loss": 1.4146, "step": 11451 }, { "epoch": 1.975674976278789, "grad_norm": 0.8203125, "learning_rate": 7.259573850153523e-09, "loss": 1.5061, "step": 11452 }, { "epoch": 1.975847494177521, "grad_norm": 0.58984375, "learning_rate": 7.156248365676188e-09, "loss": 1.323, "step": 11453 }, { "epoch": 1.976020012076253, "grad_norm": 0.546875, "learning_rate": 7.053663213502093e-09, "loss": 1.3581, "step": 11454 }, { "epoch": 1.976192529974985, "grad_norm": 0.58203125, "learning_rate": 6.951818401231825e-09, "loss": 1.372, "step": 11455 }, { "epoch": 1.976365047873717, "grad_norm": 0.6328125, "learning_rate": 6.850713936410458e-09, "loss": 1.3757, "step": 11456 }, { "epoch": 1.9765375657724489, "grad_norm": 0.5859375, "learning_rate": 6.750349826527558e-09, "loss": 1.3695, "step": 11457 }, { "epoch": 1.9767100836711808, "grad_norm": 0.57421875, "learning_rate": 6.650726079019398e-09, "loss": 1.3879, "step": 11458 }, { "epoch": 1.9768826015699128, "grad_norm": 0.59765625, "learning_rate": 6.551842701267852e-09, "loss": 1.5441, "step": 11459 }, { "epoch": 1.9770551194686448, "grad_norm": 0.6015625, "learning_rate": 6.45369970059706e-09, "loss": 1.3529, "step": 11460 }, { "epoch": 1.9772276373673767, "grad_norm": 0.68359375, "learning_rate": 6.356297084278984e-09, "loss": 1.5121, "step": 11461 }, { "epoch": 1.977400155266109, "grad_norm": 0.58984375, "learning_rate": 6.259634859528962e-09, "loss": 1.3742, "step": 11462 }, { "epoch": 1.9775726731648409, "grad_norm": 0.59765625, "learning_rate": 6.1637130335090446e-09, "loss": 1.4024, "step": 11463 }, { "epoch": 1.9777451910635728, "grad_norm": 0.5859375, "learning_rate": 6.068531613326878e-09, "loss": 1.4148, "step": 11464 }, { "epoch": 1.9779177089623048, "grad_norm": 0.5703125, "learning_rate": 5.9740906060312685e-09, "loss": 1.4551, "step": 11465 }, { "epoch": 1.978090226861037, "grad_norm": 0.63671875, "learning_rate": 5.880390018621063e-09, "loss": 1.4255, "step": 11466 }, { "epoch": 1.978262744759769, "grad_norm": 0.55859375, "learning_rate": 5.787429858038485e-09, "loss": 1.4293, "step": 11467 }, { "epoch": 1.978435262658501, "grad_norm": 0.625, "learning_rate": 5.695210131169137e-09, "loss": 1.4679, "step": 11468 }, { "epoch": 1.9786077805572329, "grad_norm": 0.57421875, "learning_rate": 5.603730844846444e-09, "loss": 1.3792, "step": 11469 }, { "epoch": 1.9787802984559648, "grad_norm": 0.6328125, "learning_rate": 5.512992005846096e-09, "loss": 1.4969, "step": 11470 }, { "epoch": 1.9789528163546968, "grad_norm": 0.5390625, "learning_rate": 5.422993620892713e-09, "loss": 1.4239, "step": 11471 }, { "epoch": 1.9791253342534287, "grad_norm": 0.62890625, "learning_rate": 5.333735696653186e-09, "loss": 1.442, "step": 11472 }, { "epoch": 1.9792978521521607, "grad_norm": 0.62109375, "learning_rate": 5.245218239740002e-09, "loss": 1.4307, "step": 11473 }, { "epoch": 1.9794703700508927, "grad_norm": 0.578125, "learning_rate": 5.157441256710138e-09, "loss": 1.4257, "step": 11474 }, { "epoch": 1.9796428879496246, "grad_norm": 0.6484375, "learning_rate": 5.070404754068392e-09, "loss": 1.5279, "step": 11475 }, { "epoch": 1.9798154058483568, "grad_norm": 0.58203125, "learning_rate": 4.984108738261828e-09, "loss": 1.4082, "step": 11476 }, { "epoch": 1.9799879237470888, "grad_norm": 0.6015625, "learning_rate": 4.898553215685331e-09, "loss": 1.4421, "step": 11477 }, { "epoch": 1.9801604416458207, "grad_norm": 0.5234375, "learning_rate": 4.813738192676054e-09, "loss": 1.338, "step": 11478 }, { "epoch": 1.980332959544553, "grad_norm": 0.61328125, "learning_rate": 4.729663675516749e-09, "loss": 1.5036, "step": 11479 }, { "epoch": 1.9805054774432849, "grad_norm": 0.5859375, "learning_rate": 4.6463296704379876e-09, "loss": 1.402, "step": 11480 }, { "epoch": 1.9806779953420168, "grad_norm": 0.56640625, "learning_rate": 4.5637361836126106e-09, "loss": 1.39, "step": 11481 }, { "epoch": 1.9808505132407488, "grad_norm": 0.6015625, "learning_rate": 4.481883221160166e-09, "loss": 1.3499, "step": 11482 }, { "epoch": 1.9810230311394808, "grad_norm": 0.69921875, "learning_rate": 4.400770789145803e-09, "loss": 1.5186, "step": 11483 }, { "epoch": 1.9811955490382127, "grad_norm": 0.57421875, "learning_rate": 4.320398893576938e-09, "loss": 1.4402, "step": 11484 }, { "epoch": 1.9813680669369447, "grad_norm": 0.5703125, "learning_rate": 4.240767540407698e-09, "loss": 1.5271, "step": 11485 }, { "epoch": 1.9815405848356766, "grad_norm": 0.5546875, "learning_rate": 4.161876735540027e-09, "loss": 1.4659, "step": 11486 }, { "epoch": 1.9817131027344086, "grad_norm": 0.58203125, "learning_rate": 4.08372648481703e-09, "loss": 1.4919, "step": 11487 }, { "epoch": 1.9818856206331406, "grad_norm": 0.57421875, "learning_rate": 4.00631679402963e-09, "loss": 1.3573, "step": 11488 }, { "epoch": 1.9820581385318725, "grad_norm": 0.66015625, "learning_rate": 3.9296476689110185e-09, "loss": 1.469, "step": 11489 }, { "epoch": 1.9822306564306047, "grad_norm": 0.5546875, "learning_rate": 3.853719115143317e-09, "loss": 1.4017, "step": 11490 }, { "epoch": 1.9824031743293367, "grad_norm": 0.62890625, "learning_rate": 3.778531138350916e-09, "loss": 1.4678, "step": 11491 }, { "epoch": 1.9825756922280686, "grad_norm": 0.62109375, "learning_rate": 3.7040837441038035e-09, "loss": 1.4091, "step": 11492 }, { "epoch": 1.9827482101268008, "grad_norm": 0.56640625, "learning_rate": 3.630376937917568e-09, "loss": 1.4825, "step": 11493 }, { "epoch": 1.9829207280255328, "grad_norm": 0.78515625, "learning_rate": 3.5574107252533963e-09, "loss": 1.3433, "step": 11494 }, { "epoch": 1.9830932459242647, "grad_norm": 0.59765625, "learning_rate": 3.4851851115180745e-09, "loss": 1.4208, "step": 11495 }, { "epoch": 1.9832657638229967, "grad_norm": 0.57421875, "learning_rate": 3.4137001020595473e-09, "loss": 1.5352, "step": 11496 }, { "epoch": 1.9834382817217286, "grad_norm": 0.65234375, "learning_rate": 3.3429557021769087e-09, "loss": 1.3472, "step": 11497 }, { "epoch": 1.9836107996204606, "grad_norm": 0.578125, "learning_rate": 3.2729519171093018e-09, "loss": 1.4102, "step": 11498 }, { "epoch": 1.9837833175191926, "grad_norm": 0.578125, "learning_rate": 3.203688752044798e-09, "loss": 1.4648, "step": 11499 }, { "epoch": 1.9839558354179245, "grad_norm": 0.66015625, "learning_rate": 3.1351662121137384e-09, "loss": 1.2301, "step": 11500 }, { "epoch": 1.9839558354179245, "eval_loss": 1.4070264101028442, "eval_runtime": 10.8116, "eval_samples_per_second": 94.713, "eval_steps_per_second": 23.678, "step": 11500 }, { "epoch": 1.9841283533166565, "grad_norm": 0.5546875, "learning_rate": 3.0673843023920623e-09, "loss": 1.3121, "step": 11501 }, { "epoch": 1.9843008712153885, "grad_norm": 0.5703125, "learning_rate": 3.0003430279024193e-09, "loss": 1.3848, "step": 11502 }, { "epoch": 1.9844733891141206, "grad_norm": 0.53125, "learning_rate": 2.9340423936119466e-09, "loss": 1.4146, "step": 11503 }, { "epoch": 1.9846459070128526, "grad_norm": 0.5703125, "learning_rate": 2.868482404432271e-09, "loss": 1.4652, "step": 11504 }, { "epoch": 1.9848184249115846, "grad_norm": 0.5703125, "learning_rate": 2.8036630652206187e-09, "loss": 1.4103, "step": 11505 }, { "epoch": 1.9849909428103165, "grad_norm": 0.63671875, "learning_rate": 2.7395843807775934e-09, "loss": 1.3924, "step": 11506 }, { "epoch": 1.9851634607090487, "grad_norm": 0.56640625, "learning_rate": 2.67624635585384e-09, "loss": 1.4179, "step": 11507 }, { "epoch": 1.9853359786077807, "grad_norm": 0.609375, "learning_rate": 2.6136489951378295e-09, "loss": 1.3898, "step": 11508 }, { "epoch": 1.9855084965065126, "grad_norm": 0.5859375, "learning_rate": 2.5517923032714053e-09, "loss": 1.457, "step": 11509 }, { "epoch": 1.9856810144052446, "grad_norm": 0.6484375, "learning_rate": 2.490676284833127e-09, "loss": 1.3667, "step": 11510 }, { "epoch": 1.9858535323039765, "grad_norm": 0.59765625, "learning_rate": 2.430300944353814e-09, "loss": 1.4318, "step": 11511 }, { "epoch": 1.9860260502027085, "grad_norm": 0.58984375, "learning_rate": 2.3706662863054452e-09, "loss": 1.3655, "step": 11512 }, { "epoch": 1.9861985681014405, "grad_norm": 0.60546875, "learning_rate": 2.311772315106708e-09, "loss": 1.4294, "step": 11513 }, { "epoch": 1.9863710860001724, "grad_norm": 0.60546875, "learning_rate": 2.2536190351196697e-09, "loss": 1.4547, "step": 11514 }, { "epoch": 1.9865436038989044, "grad_norm": 0.59375, "learning_rate": 2.1962064506542146e-09, "loss": 1.4139, "step": 11515 }, { "epoch": 1.9867161217976363, "grad_norm": 0.578125, "learning_rate": 2.1395345659613877e-09, "loss": 1.4734, "step": 11516 }, { "epoch": 1.9868886396963685, "grad_norm": 0.63671875, "learning_rate": 2.0836033852422723e-09, "loss": 1.3815, "step": 11517 }, { "epoch": 1.9870611575951005, "grad_norm": 0.58203125, "learning_rate": 2.0284129126402207e-09, "loss": 1.4186, "step": 11518 }, { "epoch": 1.9872336754938325, "grad_norm": 0.5859375, "learning_rate": 1.9739631522430746e-09, "loss": 1.3891, "step": 11519 }, { "epoch": 1.9874061933925646, "grad_norm": 0.5859375, "learning_rate": 1.9202541080853844e-09, "loss": 1.489, "step": 11520 }, { "epoch": 1.9875787112912966, "grad_norm": 0.5625, "learning_rate": 1.867285784146189e-09, "loss": 1.4045, "step": 11521 }, { "epoch": 1.9877512291900286, "grad_norm": 0.6171875, "learning_rate": 1.8150581843490167e-09, "loss": 1.3625, "step": 11522 }, { "epoch": 1.9879237470887605, "grad_norm": 0.55078125, "learning_rate": 1.7635713125641052e-09, "loss": 1.473, "step": 11523 }, { "epoch": 1.9880962649874925, "grad_norm": 0.8515625, "learning_rate": 1.7128251726061805e-09, "loss": 1.4697, "step": 11524 }, { "epoch": 1.9882687828862244, "grad_norm": 0.6171875, "learning_rate": 1.6628197682344583e-09, "loss": 1.5217, "step": 11525 }, { "epoch": 1.9884413007849564, "grad_norm": 0.59765625, "learning_rate": 1.613555103152642e-09, "loss": 1.5008, "step": 11526 }, { "epoch": 1.9886138186836884, "grad_norm": 0.59375, "learning_rate": 1.5650311810122555e-09, "loss": 1.3161, "step": 11527 }, { "epoch": 1.9887863365824203, "grad_norm": 0.5859375, "learning_rate": 1.5172480054070903e-09, "loss": 1.4783, "step": 11528 }, { "epoch": 1.9889588544811523, "grad_norm": 0.5703125, "learning_rate": 1.4702055798776482e-09, "loss": 1.5052, "step": 11529 }, { "epoch": 1.9891313723798845, "grad_norm": 0.59765625, "learning_rate": 1.423903907908919e-09, "loss": 1.4766, "step": 11530 }, { "epoch": 1.9893038902786164, "grad_norm": 0.5546875, "learning_rate": 1.3783429929314918e-09, "loss": 1.3936, "step": 11531 }, { "epoch": 1.9894764081773484, "grad_norm": 0.609375, "learning_rate": 1.3335228383215548e-09, "loss": 1.4857, "step": 11532 }, { "epoch": 1.9896489260760803, "grad_norm": 0.6171875, "learning_rate": 1.2894434473975648e-09, "loss": 1.435, "step": 11533 }, { "epoch": 1.9898214439748125, "grad_norm": 0.5859375, "learning_rate": 1.246104823426908e-09, "loss": 1.4502, "step": 11534 }, { "epoch": 1.9899939618735445, "grad_norm": 0.71875, "learning_rate": 1.2035069696203494e-09, "loss": 1.3413, "step": 11535 }, { "epoch": 1.9901664797722765, "grad_norm": 0.61328125, "learning_rate": 1.161649889133143e-09, "loss": 1.4472, "step": 11536 }, { "epoch": 1.9903389976710084, "grad_norm": 0.5625, "learning_rate": 1.1205335850661414e-09, "loss": 1.5178, "step": 11537 }, { "epoch": 1.9905115155697404, "grad_norm": 0.60546875, "learning_rate": 1.080158060465797e-09, "loss": 1.3904, "step": 11538 }, { "epoch": 1.9906840334684723, "grad_norm": 0.61328125, "learning_rate": 1.0405233183241604e-09, "loss": 1.4666, "step": 11539 }, { "epoch": 1.9908565513672043, "grad_norm": 0.57421875, "learning_rate": 1.0016293615766615e-09, "loss": 1.4624, "step": 11540 }, { "epoch": 1.9910290692659363, "grad_norm": 0.6484375, "learning_rate": 9.63476193104329e-10, "loss": 1.3808, "step": 11541 }, { "epoch": 1.9912015871646682, "grad_norm": 0.609375, "learning_rate": 9.260638157360113e-10, "loss": 1.4517, "step": 11542 }, { "epoch": 1.9913741050634002, "grad_norm": 0.578125, "learning_rate": 8.893922322406046e-10, "loss": 1.4662, "step": 11543 }, { "epoch": 1.9915466229621324, "grad_norm": 0.59375, "learning_rate": 8.534614453370449e-10, "loss": 1.4781, "step": 11544 }, { "epoch": 1.9917191408608643, "grad_norm": 0.5859375, "learning_rate": 8.182714576865369e-10, "loss": 1.3584, "step": 11545 }, { "epoch": 1.9918916587595963, "grad_norm": 0.6484375, "learning_rate": 7.838222718958844e-10, "loss": 1.4533, "step": 11546 }, { "epoch": 1.9920641766583285, "grad_norm": 0.5625, "learning_rate": 7.501138905186e-10, "loss": 1.4187, "step": 11547 }, { "epoch": 1.9922366945570604, "grad_norm": 0.56640625, "learning_rate": 7.171463160504655e-10, "loss": 1.5579, "step": 11548 }, { "epoch": 1.9924092124557924, "grad_norm": 0.609375, "learning_rate": 6.849195509339712e-10, "loss": 1.5175, "step": 11549 }, { "epoch": 1.9925817303545243, "grad_norm": 0.6015625, "learning_rate": 6.534335975583173e-10, "loss": 1.3491, "step": 11550 }, { "epoch": 1.9927542482532563, "grad_norm": 0.55859375, "learning_rate": 6.226884582538618e-10, "loss": 1.3489, "step": 11551 }, { "epoch": 1.9929267661519883, "grad_norm": 0.5703125, "learning_rate": 5.926841353010026e-10, "loss": 1.4231, "step": 11552 }, { "epoch": 1.9930992840507202, "grad_norm": 0.65625, "learning_rate": 5.634206309201861e-10, "loss": 1.4619, "step": 11553 }, { "epoch": 1.9932718019494522, "grad_norm": 0.56640625, "learning_rate": 5.34897947280788e-10, "loss": 1.4616, "step": 11554 }, { "epoch": 1.9934443198481842, "grad_norm": 0.60546875, "learning_rate": 5.071160864966729e-10, "loss": 1.3633, "step": 11555 }, { "epoch": 1.9936168377469161, "grad_norm": 0.65625, "learning_rate": 4.800750506239737e-10, "loss": 1.4285, "step": 11556 }, { "epoch": 1.993789355645648, "grad_norm": 0.58203125, "learning_rate": 4.537748416677534e-10, "loss": 1.432, "step": 11557 }, { "epoch": 1.9939618735443803, "grad_norm": 0.609375, "learning_rate": 4.2821546157534313e-10, "loss": 1.3494, "step": 11558 }, { "epoch": 1.9941343914431122, "grad_norm": 0.5703125, "learning_rate": 4.0339691224189346e-10, "loss": 1.457, "step": 11559 }, { "epoch": 1.9943069093418442, "grad_norm": 0.59375, "learning_rate": 3.7931919550482364e-10, "loss": 1.4494, "step": 11560 }, { "epoch": 1.9944794272405764, "grad_norm": 0.6484375, "learning_rate": 3.559823131471518e-10, "loss": 1.345, "step": 11561 }, { "epoch": 1.9946519451393083, "grad_norm": 0.5546875, "learning_rate": 3.3338626690082587e-10, "loss": 1.4648, "step": 11562 }, { "epoch": 1.9948244630380403, "grad_norm": 0.56640625, "learning_rate": 3.115310584367315e-10, "loss": 1.3445, "step": 11563 }, { "epoch": 1.9949969809367722, "grad_norm": 0.59375, "learning_rate": 2.9041668937579426e-10, "loss": 1.3534, "step": 11564 }, { "epoch": 1.9951694988355042, "grad_norm": 0.61328125, "learning_rate": 2.7004316128231844e-10, "loss": 1.428, "step": 11565 }, { "epoch": 1.9953420167342362, "grad_norm": 0.5859375, "learning_rate": 2.504104756639869e-10, "loss": 1.4076, "step": 11566 }, { "epoch": 1.9955145346329681, "grad_norm": 0.609375, "learning_rate": 2.3151863397741225e-10, "loss": 1.406, "step": 11567 }, { "epoch": 1.9956870525317, "grad_norm": 0.56640625, "learning_rate": 2.133676376214755e-10, "loss": 1.4402, "step": 11568 }, { "epoch": 1.995859570430432, "grad_norm": 0.5859375, "learning_rate": 1.9595748794065673e-10, "loss": 1.5273, "step": 11569 }, { "epoch": 1.996032088329164, "grad_norm": 0.6015625, "learning_rate": 1.792881862250351e-10, "loss": 1.457, "step": 11570 }, { "epoch": 1.9962046062278962, "grad_norm": 0.5390625, "learning_rate": 1.6335973370917858e-10, "loss": 1.3983, "step": 11571 }, { "epoch": 1.9963771241266282, "grad_norm": 0.57421875, "learning_rate": 1.481721315743645e-10, "loss": 1.4884, "step": 11572 }, { "epoch": 1.9965496420253601, "grad_norm": 0.58203125, "learning_rate": 1.3372538094413856e-10, "loss": 1.4246, "step": 11573 }, { "epoch": 1.996722159924092, "grad_norm": 0.609375, "learning_rate": 1.2001948288986598e-10, "loss": 1.4212, "step": 11574 }, { "epoch": 1.9968946778228243, "grad_norm": 0.5703125, "learning_rate": 1.070544384262906e-10, "loss": 1.3712, "step": 11575 }, { "epoch": 1.9970671957215562, "grad_norm": 0.59375, "learning_rate": 9.483024851486556e-11, "loss": 1.3957, "step": 11576 }, { "epoch": 1.9972397136202882, "grad_norm": 0.56640625, "learning_rate": 8.334691406042261e-11, "loss": 1.4156, "step": 11577 }, { "epoch": 1.9974122315190201, "grad_norm": 0.53515625, "learning_rate": 7.260443591450283e-11, "loss": 1.4135, "step": 11578 }, { "epoch": 1.997584749417752, "grad_norm": 0.578125, "learning_rate": 6.26028148720259e-11, "loss": 1.3832, "step": 11579 }, { "epoch": 1.997757267316484, "grad_norm": 0.59765625, "learning_rate": 5.334205167462081e-11, "loss": 1.4101, "step": 11580 }, { "epoch": 1.997929785215216, "grad_norm": 0.5625, "learning_rate": 4.482214700729515e-11, "loss": 1.394, "step": 11581 }, { "epoch": 1.998102303113948, "grad_norm": 0.546875, "learning_rate": 3.704310150287604e-11, "loss": 1.4096, "step": 11582 }, { "epoch": 1.99827482101268, "grad_norm": 0.57421875, "learning_rate": 3.000491573756925e-11, "loss": 1.4367, "step": 11583 }, { "epoch": 1.998447338911412, "grad_norm": 0.609375, "learning_rate": 2.3707590230959144e-11, "loss": 1.4118, "step": 11584 }, { "epoch": 1.998619856810144, "grad_norm": 0.57421875, "learning_rate": 1.8151125451559838e-11, "loss": 1.3415, "step": 11585 }, { "epoch": 1.998792374708876, "grad_norm": 0.58984375, "learning_rate": 1.3335521810153851e-11, "loss": 1.4252, "step": 11586 }, { "epoch": 1.998964892607608, "grad_norm": 0.58203125, "learning_rate": 9.260779664233e-12, "loss": 1.4393, "step": 11587 }, { "epoch": 1.9991374105063402, "grad_norm": 0.56640625, "learning_rate": 5.926899314667723e-12, "loss": 1.4364, "step": 11588 }, { "epoch": 1.9993099284050722, "grad_norm": 0.5625, "learning_rate": 3.3338810079275307e-12, "loss": 1.4977, "step": 11589 }, { "epoch": 1.9994824463038041, "grad_norm": 0.55859375, "learning_rate": 1.4817249383014543e-12, "loss": 1.3664, "step": 11590 }, { "epoch": 1.999654964202536, "grad_norm": 0.56640625, "learning_rate": 3.704312412367017e-13, "loss": 1.3706, "step": 11591 }, { "epoch": 1.999827482101268, "grad_norm": 0.55078125, "learning_rate": 0.0, "loss": 1.4666, "step": 11592 } ], "logging_steps": 1, "max_steps": 11592, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 3000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.613498406381342e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }