{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0000662220330163, "eval_steps": 1322, "global_step": 13214, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.568232344732983e-05, "grad_norm": 19.5, "learning_rate": 4.0000000000000003e-07, "loss": 1.0007, "step": 1 }, { "epoch": 7.568232344732983e-05, "eval_loss": 1.1901112794876099, "eval_runtime": 82.5515, "eval_samples_per_second": 58.897, "eval_steps_per_second": 58.897, "step": 1 }, { "epoch": 0.00015136464689465966, "grad_norm": 20.375, "learning_rate": 8.000000000000001e-07, "loss": 1.0341, "step": 2 }, { "epoch": 0.0002270469703419895, "grad_norm": 23.625, "learning_rate": 1.2000000000000002e-06, "loss": 1.2038, "step": 3 }, { "epoch": 0.0003027292937893193, "grad_norm": 25.5, "learning_rate": 1.6000000000000001e-06, "loss": 1.2451, "step": 4 }, { "epoch": 0.00037841161723664915, "grad_norm": 24.25, "learning_rate": 2.0000000000000003e-06, "loss": 1.0426, "step": 5 }, { "epoch": 0.000454093940683979, "grad_norm": 28.5, "learning_rate": 2.4000000000000003e-06, "loss": 1.0197, "step": 6 }, { "epoch": 0.0005297762641313089, "grad_norm": 36.25, "learning_rate": 2.8000000000000003e-06, "loss": 0.843, "step": 7 }, { "epoch": 0.0006054585875786386, "grad_norm": 37.75, "learning_rate": 3.2000000000000003e-06, "loss": 0.7076, "step": 8 }, { "epoch": 0.0006811409110259685, "grad_norm": 36.25, "learning_rate": 3.6000000000000003e-06, "loss": 1.3896, "step": 9 }, { "epoch": 0.0007568232344732983, "grad_norm": 24.25, "learning_rate": 4.000000000000001e-06, "loss": 0.5926, "step": 10 }, { "epoch": 0.0008325055579206282, "grad_norm": 19.125, "learning_rate": 4.4e-06, "loss": 0.5021, "step": 11 }, { "epoch": 0.000908187881367958, "grad_norm": 6.40625, "learning_rate": 4.800000000000001e-06, "loss": 0.4098, "step": 12 }, { "epoch": 0.0009838702048152877, "grad_norm": 4.78125, "learning_rate": 5.2e-06, "loss": 0.4639, "step": 13 }, { "epoch": 0.0010595525282626177, "grad_norm": 4.6875, "learning_rate": 5.600000000000001e-06, "loss": 0.4493, "step": 14 }, { "epoch": 0.0011352348517099475, "grad_norm": 2.953125, "learning_rate": 6e-06, "loss": 0.4178, "step": 15 }, { "epoch": 0.0012109171751572773, "grad_norm": 2.75, "learning_rate": 6.4000000000000006e-06, "loss": 0.3991, "step": 16 }, { "epoch": 0.001286599498604607, "grad_norm": 2.015625, "learning_rate": 6.800000000000001e-06, "loss": 0.4151, "step": 17 }, { "epoch": 0.001362281822051937, "grad_norm": 1.796875, "learning_rate": 7.2000000000000005e-06, "loss": 0.4092, "step": 18 }, { "epoch": 0.0014379641454992668, "grad_norm": 1.6953125, "learning_rate": 7.600000000000001e-06, "loss": 0.3905, "step": 19 }, { "epoch": 0.0015136464689465966, "grad_norm": 1.796875, "learning_rate": 8.000000000000001e-06, "loss": 0.4334, "step": 20 }, { "epoch": 0.0015893287923939264, "grad_norm": 1.640625, "learning_rate": 8.400000000000001e-06, "loss": 0.3904, "step": 21 }, { "epoch": 0.0016650111158412564, "grad_norm": 1.5546875, "learning_rate": 8.8e-06, "loss": 0.3629, "step": 22 }, { "epoch": 0.0017406934392885862, "grad_norm": 1.5078125, "learning_rate": 9.200000000000002e-06, "loss": 0.3723, "step": 23 }, { "epoch": 0.001816375762735916, "grad_norm": 1.546875, "learning_rate": 9.600000000000001e-06, "loss": 0.3533, "step": 24 }, { "epoch": 0.0018920580861832457, "grad_norm": 1.5390625, "learning_rate": 1e-05, "loss": 0.3792, "step": 25 }, { "epoch": 0.0019677404096305755, "grad_norm": 1.75, "learning_rate": 1.04e-05, "loss": 0.3982, "step": 26 }, { "epoch": 0.0020434227330779057, "grad_norm": 1.5234375, "learning_rate": 1.0800000000000002e-05, "loss": 0.4199, "step": 27 }, { "epoch": 0.0021191050565252355, "grad_norm": 1.328125, "learning_rate": 1.1200000000000001e-05, "loss": 0.3335, "step": 28 }, { "epoch": 0.0021947873799725653, "grad_norm": 1.4765625, "learning_rate": 1.16e-05, "loss": 0.3999, "step": 29 }, { "epoch": 0.002270469703419895, "grad_norm": 1.5703125, "learning_rate": 1.2e-05, "loss": 0.3985, "step": 30 }, { "epoch": 0.002346152026867225, "grad_norm": 1.578125, "learning_rate": 1.2400000000000002e-05, "loss": 0.4779, "step": 31 }, { "epoch": 0.0024218343503145546, "grad_norm": 1.265625, "learning_rate": 1.2800000000000001e-05, "loss": 0.3573, "step": 32 }, { "epoch": 0.0024975166737618844, "grad_norm": 1.4140625, "learning_rate": 1.3200000000000002e-05, "loss": 0.3693, "step": 33 }, { "epoch": 0.002573198997209214, "grad_norm": 1.25, "learning_rate": 1.3600000000000002e-05, "loss": 0.3716, "step": 34 }, { "epoch": 0.0026488813206565444, "grad_norm": 1.4140625, "learning_rate": 1.4e-05, "loss": 0.3918, "step": 35 }, { "epoch": 0.002724563644103874, "grad_norm": 1.375, "learning_rate": 1.4400000000000001e-05, "loss": 0.4098, "step": 36 }, { "epoch": 0.002800245967551204, "grad_norm": 1.359375, "learning_rate": 1.48e-05, "loss": 0.3875, "step": 37 }, { "epoch": 0.0028759282909985337, "grad_norm": 1.4140625, "learning_rate": 1.5200000000000002e-05, "loss": 0.391, "step": 38 }, { "epoch": 0.0029516106144458635, "grad_norm": 1.2109375, "learning_rate": 1.5600000000000003e-05, "loss": 0.3678, "step": 39 }, { "epoch": 0.0030272929378931932, "grad_norm": 1.21875, "learning_rate": 1.6000000000000003e-05, "loss": 0.3541, "step": 40 }, { "epoch": 0.003102975261340523, "grad_norm": 1.203125, "learning_rate": 1.64e-05, "loss": 0.3233, "step": 41 }, { "epoch": 0.003178657584787853, "grad_norm": 1.234375, "learning_rate": 1.6800000000000002e-05, "loss": 0.3657, "step": 42 }, { "epoch": 0.003254339908235183, "grad_norm": 1.453125, "learning_rate": 1.72e-05, "loss": 0.3954, "step": 43 }, { "epoch": 0.0033300222316825128, "grad_norm": 1.7734375, "learning_rate": 1.76e-05, "loss": 0.3669, "step": 44 }, { "epoch": 0.0034057045551298426, "grad_norm": 1.359375, "learning_rate": 1.8e-05, "loss": 0.3581, "step": 45 }, { "epoch": 0.0034813868785771723, "grad_norm": 1.2890625, "learning_rate": 1.8400000000000003e-05, "loss": 0.4164, "step": 46 }, { "epoch": 0.003557069202024502, "grad_norm": 1.296875, "learning_rate": 1.88e-05, "loss": 0.3835, "step": 47 }, { "epoch": 0.003632751525471832, "grad_norm": 1.140625, "learning_rate": 1.9200000000000003e-05, "loss": 0.3144, "step": 48 }, { "epoch": 0.0037084338489191617, "grad_norm": 1.296875, "learning_rate": 1.9600000000000002e-05, "loss": 0.4181, "step": 49 }, { "epoch": 0.0037841161723664914, "grad_norm": 1.125, "learning_rate": 2e-05, "loss": 0.3555, "step": 50 }, { "epoch": 0.0038597984958138216, "grad_norm": 1.296875, "learning_rate": 1.9999999929066423e-05, "loss": 0.4083, "step": 51 }, { "epoch": 0.003935480819261151, "grad_norm": 1.2578125, "learning_rate": 1.9999999716265682e-05, "loss": 0.3918, "step": 52 }, { "epoch": 0.004011163142708481, "grad_norm": 1.1796875, "learning_rate": 1.9999999361597785e-05, "loss": 0.3555, "step": 53 }, { "epoch": 0.004086845466155811, "grad_norm": 1.296875, "learning_rate": 1.9999998865062738e-05, "loss": 0.3503, "step": 54 }, { "epoch": 0.004162527789603141, "grad_norm": 1.3046875, "learning_rate": 1.9999998226660544e-05, "loss": 0.3529, "step": 55 }, { "epoch": 0.004238210113050471, "grad_norm": 1.21875, "learning_rate": 1.999999744639122e-05, "loss": 0.3569, "step": 56 }, { "epoch": 0.0043138924364978, "grad_norm": 1.25, "learning_rate": 1.9999996524254763e-05, "loss": 0.449, "step": 57 }, { "epoch": 0.0043895747599451305, "grad_norm": 1.21875, "learning_rate": 1.9999995460251203e-05, "loss": 0.397, "step": 58 }, { "epoch": 0.00446525708339246, "grad_norm": 20.75, "learning_rate": 1.999999425438054e-05, "loss": 0.9439, "step": 59 }, { "epoch": 0.00454093940683979, "grad_norm": 1.3515625, "learning_rate": 1.9999992906642803e-05, "loss": 0.39, "step": 60 }, { "epoch": 0.004616621730287119, "grad_norm": 1.25, "learning_rate": 1.9999991417038003e-05, "loss": 0.3676, "step": 61 }, { "epoch": 0.00469230405373445, "grad_norm": 1.140625, "learning_rate": 1.9999989785566167e-05, "loss": 0.3095, "step": 62 }, { "epoch": 0.00476798637718178, "grad_norm": 1.1328125, "learning_rate": 1.9999988012227312e-05, "loss": 0.3643, "step": 63 }, { "epoch": 0.004843668700629109, "grad_norm": 7.96875, "learning_rate": 1.9999986097021468e-05, "loss": 1.0323, "step": 64 }, { "epoch": 0.004919351024076439, "grad_norm": 1.296875, "learning_rate": 1.9999984039948664e-05, "loss": 0.3486, "step": 65 }, { "epoch": 0.004995033347523769, "grad_norm": 1.34375, "learning_rate": 1.9999981841008923e-05, "loss": 0.3353, "step": 66 }, { "epoch": 0.005070715670971099, "grad_norm": 1.3515625, "learning_rate": 1.9999979500202277e-05, "loss": 0.4026, "step": 67 }, { "epoch": 0.005146397994418428, "grad_norm": 1.2734375, "learning_rate": 1.9999977017528763e-05, "loss": 0.4169, "step": 68 }, { "epoch": 0.0052220803178657585, "grad_norm": 1.171875, "learning_rate": 1.9999974392988414e-05, "loss": 0.3457, "step": 69 }, { "epoch": 0.005297762641313089, "grad_norm": 43.0, "learning_rate": 1.9999971626581268e-05, "loss": 0.9594, "step": 70 }, { "epoch": 0.005373444964760418, "grad_norm": 1.3046875, "learning_rate": 1.9999968718307362e-05, "loss": 0.4245, "step": 71 }, { "epoch": 0.005449127288207748, "grad_norm": 1.140625, "learning_rate": 1.9999965668166744e-05, "loss": 0.3522, "step": 72 }, { "epoch": 0.005524809611655078, "grad_norm": 1.125, "learning_rate": 1.999996247615945e-05, "loss": 0.3849, "step": 73 }, { "epoch": 0.005600491935102408, "grad_norm": 1.1328125, "learning_rate": 1.999995914228553e-05, "loss": 0.3531, "step": 74 }, { "epoch": 0.005676174258549737, "grad_norm": 1.421875, "learning_rate": 1.9999955666545024e-05, "loss": 0.4589, "step": 75 }, { "epoch": 0.005751856581997067, "grad_norm": 1.15625, "learning_rate": 1.9999952048937986e-05, "loss": 0.3464, "step": 76 }, { "epoch": 0.005827538905444398, "grad_norm": 1.140625, "learning_rate": 1.9999948289464473e-05, "loss": 0.3485, "step": 77 }, { "epoch": 0.005903221228891727, "grad_norm": 1.21875, "learning_rate": 1.999994438812453e-05, "loss": 0.3532, "step": 78 }, { "epoch": 0.005978903552339057, "grad_norm": 1.140625, "learning_rate": 1.9999940344918217e-05, "loss": 0.3616, "step": 79 }, { "epoch": 0.0060545858757863865, "grad_norm": 1.1875, "learning_rate": 1.9999936159845586e-05, "loss": 0.3613, "step": 80 }, { "epoch": 0.006130268199233717, "grad_norm": 1.09375, "learning_rate": 1.9999931832906704e-05, "loss": 0.3286, "step": 81 }, { "epoch": 0.006205950522681046, "grad_norm": 1.1484375, "learning_rate": 1.9999927364101624e-05, "loss": 0.4035, "step": 82 }, { "epoch": 0.006281632846128376, "grad_norm": 1.109375, "learning_rate": 1.999992275343042e-05, "loss": 0.3796, "step": 83 }, { "epoch": 0.006357315169575706, "grad_norm": 1.1953125, "learning_rate": 1.999991800089315e-05, "loss": 0.3755, "step": 84 }, { "epoch": 0.006432997493023036, "grad_norm": 1.25, "learning_rate": 1.999991310648988e-05, "loss": 0.4195, "step": 85 }, { "epoch": 0.006508679816470366, "grad_norm": 1.234375, "learning_rate": 1.9999908070220685e-05, "loss": 0.4224, "step": 86 }, { "epoch": 0.006584362139917695, "grad_norm": 1.203125, "learning_rate": 1.999990289208563e-05, "loss": 0.3226, "step": 87 }, { "epoch": 0.0066600444633650255, "grad_norm": 1.1015625, "learning_rate": 1.9999897572084795e-05, "loss": 0.3591, "step": 88 }, { "epoch": 0.006735726786812355, "grad_norm": 1.25, "learning_rate": 1.9999892110218247e-05, "loss": 0.37, "step": 89 }, { "epoch": 0.006811409110259685, "grad_norm": 1.40625, "learning_rate": 1.9999886506486078e-05, "loss": 0.4073, "step": 90 }, { "epoch": 0.0068870914337070144, "grad_norm": 1.171875, "learning_rate": 1.999988076088835e-05, "loss": 0.3899, "step": 91 }, { "epoch": 0.006962773757154345, "grad_norm": 1.109375, "learning_rate": 1.999987487342516e-05, "loss": 0.3489, "step": 92 }, { "epoch": 0.007038456080601675, "grad_norm": 1.2578125, "learning_rate": 1.999986884409658e-05, "loss": 0.37, "step": 93 }, { "epoch": 0.007114138404049004, "grad_norm": 1.15625, "learning_rate": 1.99998626729027e-05, "loss": 0.3446, "step": 94 }, { "epoch": 0.007189820727496334, "grad_norm": 1.2890625, "learning_rate": 1.999985635984361e-05, "loss": 0.3692, "step": 95 }, { "epoch": 0.007265503050943664, "grad_norm": 1.171875, "learning_rate": 1.9999849904919398e-05, "loss": 0.4184, "step": 96 }, { "epoch": 0.007341185374390994, "grad_norm": 1.1953125, "learning_rate": 1.9999843308130155e-05, "loss": 0.4461, "step": 97 }, { "epoch": 0.007416867697838323, "grad_norm": 1.1796875, "learning_rate": 1.999983656947597e-05, "loss": 0.4281, "step": 98 }, { "epoch": 0.0074925500212856535, "grad_norm": 1.0859375, "learning_rate": 1.9999829688956945e-05, "loss": 0.3454, "step": 99 }, { "epoch": 0.007568232344732983, "grad_norm": 1.1640625, "learning_rate": 1.9999822666573176e-05, "loss": 0.314, "step": 100 }, { "epoch": 0.007643914668180313, "grad_norm": 1.21875, "learning_rate": 1.9999815502324763e-05, "loss": 0.3612, "step": 101 }, { "epoch": 0.007719596991627643, "grad_norm": 1.296875, "learning_rate": 1.9999808196211804e-05, "loss": 0.3528, "step": 102 }, { "epoch": 0.007795279315074973, "grad_norm": 1.203125, "learning_rate": 1.999980074823441e-05, "loss": 0.3571, "step": 103 }, { "epoch": 0.007870961638522302, "grad_norm": 1.2734375, "learning_rate": 1.999979315839268e-05, "loss": 0.4314, "step": 104 }, { "epoch": 0.007946643961969632, "grad_norm": 1.15625, "learning_rate": 1.999978542668672e-05, "loss": 0.3485, "step": 105 }, { "epoch": 0.008022326285416962, "grad_norm": 1.140625, "learning_rate": 1.999977755311665e-05, "loss": 0.3732, "step": 106 }, { "epoch": 0.008098008608864293, "grad_norm": 1.125, "learning_rate": 1.9999769537682572e-05, "loss": 0.3563, "step": 107 }, { "epoch": 0.008173690932311623, "grad_norm": 1.171875, "learning_rate": 1.99997613803846e-05, "loss": 0.4039, "step": 108 }, { "epoch": 0.008249373255758951, "grad_norm": 1.1640625, "learning_rate": 1.9999753081222856e-05, "loss": 0.4306, "step": 109 }, { "epoch": 0.008325055579206282, "grad_norm": 1.0703125, "learning_rate": 1.9999744640197457e-05, "loss": 0.3506, "step": 110 }, { "epoch": 0.008400737902653612, "grad_norm": 1.1015625, "learning_rate": 1.9999736057308517e-05, "loss": 0.3868, "step": 111 }, { "epoch": 0.008476420226100942, "grad_norm": 1.1796875, "learning_rate": 1.999972733255616e-05, "loss": 0.3879, "step": 112 }, { "epoch": 0.00855210254954827, "grad_norm": 1.171875, "learning_rate": 1.9999718465940512e-05, "loss": 0.392, "step": 113 }, { "epoch": 0.0086277848729956, "grad_norm": 1.1640625, "learning_rate": 1.9999709457461697e-05, "loss": 0.4244, "step": 114 }, { "epoch": 0.00870346719644293, "grad_norm": 1.15625, "learning_rate": 1.9999700307119846e-05, "loss": 0.4273, "step": 115 }, { "epoch": 0.008779149519890261, "grad_norm": 1.1953125, "learning_rate": 1.999969101491508e-05, "loss": 0.4407, "step": 116 }, { "epoch": 0.008854831843337591, "grad_norm": 1.1484375, "learning_rate": 1.9999681580847544e-05, "loss": 0.3989, "step": 117 }, { "epoch": 0.00893051416678492, "grad_norm": 7.34375, "learning_rate": 1.999967200491736e-05, "loss": 0.9033, "step": 118 }, { "epoch": 0.00900619649023225, "grad_norm": 1.125, "learning_rate": 1.9999662287124667e-05, "loss": 0.3593, "step": 119 }, { "epoch": 0.00908187881367958, "grad_norm": 1.0390625, "learning_rate": 1.999965242746961e-05, "loss": 0.3422, "step": 120 }, { "epoch": 0.00915756113712691, "grad_norm": 1.078125, "learning_rate": 1.999964242595232e-05, "loss": 0.4013, "step": 121 }, { "epoch": 0.009233243460574239, "grad_norm": 1.09375, "learning_rate": 1.9999632282572944e-05, "loss": 0.358, "step": 122 }, { "epoch": 0.009308925784021569, "grad_norm": 1.171875, "learning_rate": 1.9999621997331624e-05, "loss": 0.3844, "step": 123 }, { "epoch": 0.0093846081074689, "grad_norm": 1.15625, "learning_rate": 1.9999611570228503e-05, "loss": 0.3748, "step": 124 }, { "epoch": 0.00946029043091623, "grad_norm": 1.0703125, "learning_rate": 1.9999601001263733e-05, "loss": 0.4099, "step": 125 }, { "epoch": 0.00953597275436356, "grad_norm": 1.2109375, "learning_rate": 1.9999590290437464e-05, "loss": 0.4304, "step": 126 }, { "epoch": 0.009611655077810888, "grad_norm": 1.0078125, "learning_rate": 1.999957943774985e-05, "loss": 0.3123, "step": 127 }, { "epoch": 0.009687337401258218, "grad_norm": 1.1328125, "learning_rate": 1.9999568443201035e-05, "loss": 0.3617, "step": 128 }, { "epoch": 0.009763019724705549, "grad_norm": 1.1875, "learning_rate": 1.9999557306791183e-05, "loss": 0.3849, "step": 129 }, { "epoch": 0.009838702048152879, "grad_norm": 1.046875, "learning_rate": 1.9999546028520456e-05, "loss": 0.3182, "step": 130 }, { "epoch": 0.009914384371600209, "grad_norm": 1.125, "learning_rate": 1.9999534608389004e-05, "loss": 0.3908, "step": 131 }, { "epoch": 0.009990066695047537, "grad_norm": 1.09375, "learning_rate": 1.9999523046396996e-05, "loss": 0.3573, "step": 132 }, { "epoch": 0.010065749018494868, "grad_norm": 1.0390625, "learning_rate": 1.9999511342544593e-05, "loss": 0.3619, "step": 133 }, { "epoch": 0.010141431341942198, "grad_norm": 1.0, "learning_rate": 1.9999499496831964e-05, "loss": 0.3366, "step": 134 }, { "epoch": 0.010217113665389528, "grad_norm": 1.2109375, "learning_rate": 1.9999487509259273e-05, "loss": 0.3631, "step": 135 }, { "epoch": 0.010292795988836857, "grad_norm": 1.0625, "learning_rate": 1.999947537982669e-05, "loss": 0.3707, "step": 136 }, { "epoch": 0.010368478312284187, "grad_norm": 1.0859375, "learning_rate": 1.9999463108534393e-05, "loss": 0.3874, "step": 137 }, { "epoch": 0.010444160635731517, "grad_norm": 1.1328125, "learning_rate": 1.9999450695382547e-05, "loss": 0.3806, "step": 138 }, { "epoch": 0.010519842959178847, "grad_norm": 1.0859375, "learning_rate": 1.9999438140371338e-05, "loss": 0.3772, "step": 139 }, { "epoch": 0.010595525282626177, "grad_norm": 1.1171875, "learning_rate": 1.9999425443500937e-05, "loss": 0.3693, "step": 140 }, { "epoch": 0.010671207606073506, "grad_norm": 1.109375, "learning_rate": 1.9999412604771526e-05, "loss": 0.3593, "step": 141 }, { "epoch": 0.010746889929520836, "grad_norm": 1.453125, "learning_rate": 1.9999399624183293e-05, "loss": 0.4218, "step": 142 }, { "epoch": 0.010822572252968166, "grad_norm": 10.5, "learning_rate": 1.999938650173641e-05, "loss": 1.0997, "step": 143 }, { "epoch": 0.010898254576415497, "grad_norm": 1.15625, "learning_rate": 1.9999373237431073e-05, "loss": 0.3755, "step": 144 }, { "epoch": 0.010973936899862825, "grad_norm": 1.1875, "learning_rate": 1.9999359831267467e-05, "loss": 0.3892, "step": 145 }, { "epoch": 0.011049619223310155, "grad_norm": 1.0703125, "learning_rate": 1.999934628324578e-05, "loss": 0.3264, "step": 146 }, { "epoch": 0.011125301546757485, "grad_norm": 1.0859375, "learning_rate": 1.999933259336621e-05, "loss": 0.3895, "step": 147 }, { "epoch": 0.011200983870204816, "grad_norm": 1.0078125, "learning_rate": 1.9999318761628946e-05, "loss": 0.3766, "step": 148 }, { "epoch": 0.011276666193652146, "grad_norm": 1.0859375, "learning_rate": 1.999930478803419e-05, "loss": 0.3469, "step": 149 }, { "epoch": 0.011352348517099474, "grad_norm": 1.109375, "learning_rate": 1.9999290672582127e-05, "loss": 0.3996, "step": 150 }, { "epoch": 0.011428030840546805, "grad_norm": 5.125, "learning_rate": 1.9999276415272974e-05, "loss": 0.9393, "step": 151 }, { "epoch": 0.011503713163994135, "grad_norm": 3.90625, "learning_rate": 1.9999262016106924e-05, "loss": 0.8319, "step": 152 }, { "epoch": 0.011579395487441465, "grad_norm": 1.0703125, "learning_rate": 1.9999247475084183e-05, "loss": 0.3226, "step": 153 }, { "epoch": 0.011655077810888795, "grad_norm": 1.25, "learning_rate": 1.9999232792204956e-05, "loss": 0.3981, "step": 154 }, { "epoch": 0.011730760134336124, "grad_norm": 1.265625, "learning_rate": 1.999921796746945e-05, "loss": 0.4434, "step": 155 }, { "epoch": 0.011806442457783454, "grad_norm": 1.046875, "learning_rate": 1.9999203000877883e-05, "loss": 0.3647, "step": 156 }, { "epoch": 0.011882124781230784, "grad_norm": 1.1171875, "learning_rate": 1.9999187892430462e-05, "loss": 0.4349, "step": 157 }, { "epoch": 0.011957807104678114, "grad_norm": 1.1328125, "learning_rate": 1.99991726421274e-05, "loss": 0.3511, "step": 158 }, { "epoch": 0.012033489428125443, "grad_norm": 1.15625, "learning_rate": 1.9999157249968915e-05, "loss": 0.4033, "step": 159 }, { "epoch": 0.012109171751572773, "grad_norm": 1.0390625, "learning_rate": 1.9999141715955224e-05, "loss": 0.3764, "step": 160 }, { "epoch": 0.012184854075020103, "grad_norm": 1.09375, "learning_rate": 1.999912604008655e-05, "loss": 0.3644, "step": 161 }, { "epoch": 0.012260536398467433, "grad_norm": 1.0859375, "learning_rate": 1.9999110222363117e-05, "loss": 0.3792, "step": 162 }, { "epoch": 0.012336218721914764, "grad_norm": 1.078125, "learning_rate": 1.999909426278514e-05, "loss": 0.3376, "step": 163 }, { "epoch": 0.012411901045362092, "grad_norm": 1.0625, "learning_rate": 1.9999078161352857e-05, "loss": 0.3844, "step": 164 }, { "epoch": 0.012487583368809422, "grad_norm": 1.0703125, "learning_rate": 1.999906191806649e-05, "loss": 0.3534, "step": 165 }, { "epoch": 0.012563265692256752, "grad_norm": 1.09375, "learning_rate": 1.999904553292627e-05, "loss": 0.36, "step": 166 }, { "epoch": 0.012638948015704083, "grad_norm": 1.046875, "learning_rate": 1.9999029005932428e-05, "loss": 0.3668, "step": 167 }, { "epoch": 0.012714630339151411, "grad_norm": 1.078125, "learning_rate": 1.9999012337085204e-05, "loss": 0.3468, "step": 168 }, { "epoch": 0.012790312662598741, "grad_norm": 1.96875, "learning_rate": 1.999899552638483e-05, "loss": 0.3382, "step": 169 }, { "epoch": 0.012865994986046072, "grad_norm": 1.078125, "learning_rate": 1.9998978573831546e-05, "loss": 0.3992, "step": 170 }, { "epoch": 0.012941677309493402, "grad_norm": 0.9921875, "learning_rate": 1.999896147942559e-05, "loss": 0.3464, "step": 171 }, { "epoch": 0.013017359632940732, "grad_norm": 0.99609375, "learning_rate": 1.9998944243167207e-05, "loss": 0.3193, "step": 172 }, { "epoch": 0.01309304195638806, "grad_norm": 0.98046875, "learning_rate": 1.9998926865056643e-05, "loss": 0.3226, "step": 173 }, { "epoch": 0.01316872427983539, "grad_norm": 1.1640625, "learning_rate": 1.9998909345094146e-05, "loss": 0.3806, "step": 174 }, { "epoch": 0.013244406603282721, "grad_norm": 1.0, "learning_rate": 1.9998891683279955e-05, "loss": 0.3052, "step": 175 }, { "epoch": 0.013320088926730051, "grad_norm": 1.0078125, "learning_rate": 1.999887387961433e-05, "loss": 0.3449, "step": 176 }, { "epoch": 0.013395771250177381, "grad_norm": 1.046875, "learning_rate": 1.999885593409752e-05, "loss": 0.3765, "step": 177 }, { "epoch": 0.01347145357362471, "grad_norm": 1.0703125, "learning_rate": 1.999883784672978e-05, "loss": 0.3538, "step": 178 }, { "epoch": 0.01354713589707204, "grad_norm": 1.3125, "learning_rate": 1.9998819617511363e-05, "loss": 0.3674, "step": 179 }, { "epoch": 0.01362281822051937, "grad_norm": 0.984375, "learning_rate": 1.9998801246442534e-05, "loss": 0.3427, "step": 180 }, { "epoch": 0.0136985005439667, "grad_norm": 1.0078125, "learning_rate": 1.9998782733523553e-05, "loss": 0.3064, "step": 181 }, { "epoch": 0.013774182867414029, "grad_norm": 1.109375, "learning_rate": 1.9998764078754678e-05, "loss": 0.3962, "step": 182 }, { "epoch": 0.013849865190861359, "grad_norm": 1.015625, "learning_rate": 1.9998745282136177e-05, "loss": 0.3672, "step": 183 }, { "epoch": 0.01392554751430869, "grad_norm": 1.21875, "learning_rate": 1.9998726343668314e-05, "loss": 0.3572, "step": 184 }, { "epoch": 0.01400122983775602, "grad_norm": 1.0625, "learning_rate": 1.999870726335136e-05, "loss": 0.3459, "step": 185 }, { "epoch": 0.01407691216120335, "grad_norm": 0.99609375, "learning_rate": 1.9998688041185584e-05, "loss": 0.3594, "step": 186 }, { "epoch": 0.014152594484650678, "grad_norm": 1.078125, "learning_rate": 1.9998668677171262e-05, "loss": 0.3511, "step": 187 }, { "epoch": 0.014228276808098008, "grad_norm": 1.171875, "learning_rate": 1.9998649171308666e-05, "loss": 0.3971, "step": 188 }, { "epoch": 0.014303959131545339, "grad_norm": 1.1484375, "learning_rate": 1.9998629523598073e-05, "loss": 0.392, "step": 189 }, { "epoch": 0.014379641454992669, "grad_norm": 12.875, "learning_rate": 1.999860973403976e-05, "loss": 0.9823, "step": 190 }, { "epoch": 0.014455323778439997, "grad_norm": 1.1796875, "learning_rate": 1.9998589802634013e-05, "loss": 0.3623, "step": 191 }, { "epoch": 0.014531006101887328, "grad_norm": 1.0703125, "learning_rate": 1.9998569729381115e-05, "loss": 0.3461, "step": 192 }, { "epoch": 0.014606688425334658, "grad_norm": 1.0859375, "learning_rate": 1.9998549514281342e-05, "loss": 0.4037, "step": 193 }, { "epoch": 0.014682370748781988, "grad_norm": 1.046875, "learning_rate": 1.9998529157334985e-05, "loss": 0.3782, "step": 194 }, { "epoch": 0.014758053072229318, "grad_norm": 1.03125, "learning_rate": 1.9998508658542336e-05, "loss": 0.3376, "step": 195 }, { "epoch": 0.014833735395676647, "grad_norm": 1.140625, "learning_rate": 1.999848801790368e-05, "loss": 0.4202, "step": 196 }, { "epoch": 0.014909417719123977, "grad_norm": 1.0703125, "learning_rate": 1.9998467235419318e-05, "loss": 0.3973, "step": 197 }, { "epoch": 0.014985100042571307, "grad_norm": 1.0546875, "learning_rate": 1.999844631108954e-05, "loss": 0.3603, "step": 198 }, { "epoch": 0.015060782366018637, "grad_norm": 1.03125, "learning_rate": 1.9998425244914643e-05, "loss": 0.3522, "step": 199 }, { "epoch": 0.015136464689465966, "grad_norm": 1.078125, "learning_rate": 1.9998404036894925e-05, "loss": 0.4143, "step": 200 }, { "epoch": 0.015212147012913296, "grad_norm": 1.0625, "learning_rate": 1.999838268703069e-05, "loss": 0.3269, "step": 201 }, { "epoch": 0.015287829336360626, "grad_norm": 3.671875, "learning_rate": 1.9998361195322235e-05, "loss": 0.8023, "step": 202 }, { "epoch": 0.015363511659807956, "grad_norm": 1.0546875, "learning_rate": 1.9998339561769867e-05, "loss": 0.3638, "step": 203 }, { "epoch": 0.015439193983255287, "grad_norm": 0.9765625, "learning_rate": 1.99983177863739e-05, "loss": 0.367, "step": 204 }, { "epoch": 0.015514876306702615, "grad_norm": 1.1015625, "learning_rate": 1.9998295869134633e-05, "loss": 0.401, "step": 205 }, { "epoch": 0.015590558630149945, "grad_norm": 0.953125, "learning_rate": 1.9998273810052387e-05, "loss": 0.2969, "step": 206 }, { "epoch": 0.015666240953597275, "grad_norm": 1.0234375, "learning_rate": 1.9998251609127465e-05, "loss": 0.3651, "step": 207 }, { "epoch": 0.015741923277044604, "grad_norm": 0.9609375, "learning_rate": 1.9998229266360187e-05, "loss": 0.33, "step": 208 }, { "epoch": 0.015817605600491936, "grad_norm": 1.0625, "learning_rate": 1.9998206781750872e-05, "loss": 0.3632, "step": 209 }, { "epoch": 0.015893287923939264, "grad_norm": 1.0078125, "learning_rate": 1.9998184155299832e-05, "loss": 0.319, "step": 210 }, { "epoch": 0.015968970247386596, "grad_norm": 1.109375, "learning_rate": 1.9998161387007396e-05, "loss": 0.413, "step": 211 }, { "epoch": 0.016044652570833925, "grad_norm": 1.1875, "learning_rate": 1.999813847687388e-05, "loss": 0.3912, "step": 212 }, { "epoch": 0.016120334894281253, "grad_norm": 4.5, "learning_rate": 1.9998115424899616e-05, "loss": 0.7889, "step": 213 }, { "epoch": 0.016196017217728585, "grad_norm": 1.0078125, "learning_rate": 1.9998092231084926e-05, "loss": 0.3191, "step": 214 }, { "epoch": 0.016271699541175914, "grad_norm": 1.046875, "learning_rate": 1.999806889543014e-05, "loss": 0.3763, "step": 215 }, { "epoch": 0.016347381864623246, "grad_norm": 1.1484375, "learning_rate": 1.9998045417935587e-05, "loss": 0.4035, "step": 216 }, { "epoch": 0.016423064188070574, "grad_norm": 1.1640625, "learning_rate": 1.9998021798601607e-05, "loss": 0.389, "step": 217 }, { "epoch": 0.016498746511517903, "grad_norm": 0.9921875, "learning_rate": 1.9997998037428528e-05, "loss": 0.3311, "step": 218 }, { "epoch": 0.016574428834965235, "grad_norm": 1.0234375, "learning_rate": 1.999797413441669e-05, "loss": 0.3615, "step": 219 }, { "epoch": 0.016650111158412563, "grad_norm": 1.1015625, "learning_rate": 1.999795008956643e-05, "loss": 0.4046, "step": 220 }, { "epoch": 0.01672579348185989, "grad_norm": 0.95703125, "learning_rate": 1.9997925902878093e-05, "loss": 0.3066, "step": 221 }, { "epoch": 0.016801475805307223, "grad_norm": 1.0078125, "learning_rate": 1.999790157435202e-05, "loss": 0.3129, "step": 222 }, { "epoch": 0.016877158128754552, "grad_norm": 1.078125, "learning_rate": 1.9997877103988555e-05, "loss": 0.3568, "step": 223 }, { "epoch": 0.016952840452201884, "grad_norm": 1.1328125, "learning_rate": 1.999785249178805e-05, "loss": 0.3858, "step": 224 }, { "epoch": 0.017028522775649212, "grad_norm": 1.1484375, "learning_rate": 1.9997827737750848e-05, "loss": 0.3335, "step": 225 }, { "epoch": 0.01710420509909654, "grad_norm": 1.03125, "learning_rate": 1.9997802841877304e-05, "loss": 0.3605, "step": 226 }, { "epoch": 0.017179887422543873, "grad_norm": 1.0390625, "learning_rate": 1.9997777804167765e-05, "loss": 0.3346, "step": 227 }, { "epoch": 0.0172555697459912, "grad_norm": 1.1015625, "learning_rate": 1.9997752624622596e-05, "loss": 0.4157, "step": 228 }, { "epoch": 0.017331252069438533, "grad_norm": 1.0625, "learning_rate": 1.9997727303242146e-05, "loss": 0.3838, "step": 229 }, { "epoch": 0.01740693439288586, "grad_norm": 1.0234375, "learning_rate": 1.999770184002678e-05, "loss": 0.3644, "step": 230 }, { "epoch": 0.01748261671633319, "grad_norm": 1.0078125, "learning_rate": 1.9997676234976854e-05, "loss": 0.3558, "step": 231 }, { "epoch": 0.017558299039780522, "grad_norm": 1.0859375, "learning_rate": 1.9997650488092737e-05, "loss": 0.4005, "step": 232 }, { "epoch": 0.01763398136322785, "grad_norm": 1.0546875, "learning_rate": 1.999762459937479e-05, "loss": 0.3902, "step": 233 }, { "epoch": 0.017709663686675182, "grad_norm": 0.97265625, "learning_rate": 1.999759856882338e-05, "loss": 0.3187, "step": 234 }, { "epoch": 0.01778534601012251, "grad_norm": 1.0546875, "learning_rate": 1.9997572396438883e-05, "loss": 0.3688, "step": 235 }, { "epoch": 0.01786102833356984, "grad_norm": 1.015625, "learning_rate": 1.9997546082221663e-05, "loss": 0.362, "step": 236 }, { "epoch": 0.01793671065701717, "grad_norm": 1.078125, "learning_rate": 1.9997519626172093e-05, "loss": 0.3781, "step": 237 }, { "epoch": 0.0180123929804645, "grad_norm": 1.1640625, "learning_rate": 1.9997493028290552e-05, "loss": 0.4281, "step": 238 }, { "epoch": 0.018088075303911832, "grad_norm": 1.0625, "learning_rate": 1.9997466288577417e-05, "loss": 0.3922, "step": 239 }, { "epoch": 0.01816375762735916, "grad_norm": 1.015625, "learning_rate": 1.9997439407033066e-05, "loss": 0.3504, "step": 240 }, { "epoch": 0.01823943995080649, "grad_norm": 1.1328125, "learning_rate": 1.999741238365788e-05, "loss": 0.3631, "step": 241 }, { "epoch": 0.01831512227425382, "grad_norm": 1.03125, "learning_rate": 1.999738521845224e-05, "loss": 0.342, "step": 242 }, { "epoch": 0.01839080459770115, "grad_norm": 1.1484375, "learning_rate": 1.999735791141654e-05, "loss": 0.4073, "step": 243 }, { "epoch": 0.018466486921148478, "grad_norm": 0.9921875, "learning_rate": 1.9997330462551158e-05, "loss": 0.3581, "step": 244 }, { "epoch": 0.01854216924459581, "grad_norm": 1.09375, "learning_rate": 1.9997302871856492e-05, "loss": 0.3977, "step": 245 }, { "epoch": 0.018617851568043138, "grad_norm": 1.0078125, "learning_rate": 1.9997275139332926e-05, "loss": 0.32, "step": 246 }, { "epoch": 0.01869353389149047, "grad_norm": 1.1171875, "learning_rate": 1.9997247264980858e-05, "loss": 0.4087, "step": 247 }, { "epoch": 0.0187692162149378, "grad_norm": 1.0, "learning_rate": 1.999721924880068e-05, "loss": 0.3344, "step": 248 }, { "epoch": 0.018844898538385127, "grad_norm": 1.1015625, "learning_rate": 1.999719109079279e-05, "loss": 0.4057, "step": 249 }, { "epoch": 0.01892058086183246, "grad_norm": 1.0546875, "learning_rate": 1.999716279095759e-05, "loss": 0.3431, "step": 250 }, { "epoch": 0.018996263185279787, "grad_norm": 1.109375, "learning_rate": 1.999713434929548e-05, "loss": 0.4121, "step": 251 }, { "epoch": 0.01907194550872712, "grad_norm": 1.1015625, "learning_rate": 1.9997105765806864e-05, "loss": 0.3604, "step": 252 }, { "epoch": 0.019147627832174448, "grad_norm": 1.1875, "learning_rate": 1.9997077040492145e-05, "loss": 0.4072, "step": 253 }, { "epoch": 0.019223310155621776, "grad_norm": 8.875, "learning_rate": 1.9997048173351733e-05, "loss": 0.8261, "step": 254 }, { "epoch": 0.019298992479069108, "grad_norm": 1.1171875, "learning_rate": 1.9997019164386043e-05, "loss": 0.3843, "step": 255 }, { "epoch": 0.019374674802516437, "grad_norm": 1.0859375, "learning_rate": 1.9996990013595473e-05, "loss": 0.3688, "step": 256 }, { "epoch": 0.01945035712596377, "grad_norm": 7.625, "learning_rate": 1.9996960720980447e-05, "loss": 0.7332, "step": 257 }, { "epoch": 0.019526039449411097, "grad_norm": 1.1796875, "learning_rate": 1.999693128654138e-05, "loss": 0.3603, "step": 258 }, { "epoch": 0.019601721772858426, "grad_norm": 1.0625, "learning_rate": 1.9996901710278686e-05, "loss": 0.3348, "step": 259 }, { "epoch": 0.019677404096305758, "grad_norm": 1.046875, "learning_rate": 1.9996871992192784e-05, "loss": 0.3308, "step": 260 }, { "epoch": 0.019753086419753086, "grad_norm": 1.0859375, "learning_rate": 1.99968421322841e-05, "loss": 0.3428, "step": 261 }, { "epoch": 0.019828768743200418, "grad_norm": 1.1875, "learning_rate": 1.9996812130553054e-05, "loss": 0.3764, "step": 262 }, { "epoch": 0.019904451066647746, "grad_norm": 1.0703125, "learning_rate": 1.9996781987000073e-05, "loss": 0.4219, "step": 263 }, { "epoch": 0.019980133390095075, "grad_norm": 1.0234375, "learning_rate": 1.9996751701625584e-05, "loss": 0.3385, "step": 264 }, { "epoch": 0.020055815713542407, "grad_norm": 0.9765625, "learning_rate": 1.9996721274430017e-05, "loss": 0.3173, "step": 265 }, { "epoch": 0.020131498036989735, "grad_norm": 1.0703125, "learning_rate": 1.9996690705413803e-05, "loss": 0.3708, "step": 266 }, { "epoch": 0.020207180360437064, "grad_norm": 0.98828125, "learning_rate": 1.9996659994577378e-05, "loss": 0.3627, "step": 267 }, { "epoch": 0.020282862683884396, "grad_norm": 1.03125, "learning_rate": 1.9996629141921177e-05, "loss": 0.3552, "step": 268 }, { "epoch": 0.020358545007331724, "grad_norm": 1.046875, "learning_rate": 1.9996598147445636e-05, "loss": 0.3997, "step": 269 }, { "epoch": 0.020434227330779056, "grad_norm": 1.109375, "learning_rate": 1.9996567011151196e-05, "loss": 0.4172, "step": 270 }, { "epoch": 0.020509909654226385, "grad_norm": 1.109375, "learning_rate": 1.9996535733038298e-05, "loss": 0.4236, "step": 271 }, { "epoch": 0.020585591977673713, "grad_norm": 1.0703125, "learning_rate": 1.9996504313107384e-05, "loss": 0.3889, "step": 272 }, { "epoch": 0.020661274301121045, "grad_norm": 0.9921875, "learning_rate": 1.9996472751358903e-05, "loss": 0.3753, "step": 273 }, { "epoch": 0.020736956624568374, "grad_norm": 1.0078125, "learning_rate": 1.99964410477933e-05, "loss": 0.3838, "step": 274 }, { "epoch": 0.020812638948015705, "grad_norm": 1.1328125, "learning_rate": 1.999640920241103e-05, "loss": 0.4115, "step": 275 }, { "epoch": 0.020888321271463034, "grad_norm": 1.078125, "learning_rate": 1.999637721521254e-05, "loss": 0.3549, "step": 276 }, { "epoch": 0.020964003594910362, "grad_norm": 1.078125, "learning_rate": 1.9996345086198284e-05, "loss": 0.3832, "step": 277 }, { "epoch": 0.021039685918357694, "grad_norm": 1.0703125, "learning_rate": 1.9996312815368718e-05, "loss": 0.3309, "step": 278 }, { "epoch": 0.021115368241805023, "grad_norm": 1.0859375, "learning_rate": 1.99962804027243e-05, "loss": 0.3958, "step": 279 }, { "epoch": 0.021191050565252355, "grad_norm": 0.96484375, "learning_rate": 1.999624784826549e-05, "loss": 0.3342, "step": 280 }, { "epoch": 0.021266732888699683, "grad_norm": 1.0078125, "learning_rate": 1.9996215151992752e-05, "loss": 0.3387, "step": 281 }, { "epoch": 0.021342415212147012, "grad_norm": 1.046875, "learning_rate": 1.999618231390655e-05, "loss": 0.3561, "step": 282 }, { "epoch": 0.021418097535594344, "grad_norm": 1.0078125, "learning_rate": 1.9996149334007345e-05, "loss": 0.3977, "step": 283 }, { "epoch": 0.021493779859041672, "grad_norm": 1.0546875, "learning_rate": 1.9996116212295605e-05, "loss": 0.3302, "step": 284 }, { "epoch": 0.021569462182489004, "grad_norm": 1.0390625, "learning_rate": 1.9996082948771807e-05, "loss": 0.3461, "step": 285 }, { "epoch": 0.021645144505936333, "grad_norm": 1.1171875, "learning_rate": 1.9996049543436417e-05, "loss": 0.4001, "step": 286 }, { "epoch": 0.02172082682938366, "grad_norm": 0.98828125, "learning_rate": 1.9996015996289908e-05, "loss": 0.358, "step": 287 }, { "epoch": 0.021796509152830993, "grad_norm": 1.8125, "learning_rate": 1.9995982307332763e-05, "loss": 0.3572, "step": 288 }, { "epoch": 0.02187219147627832, "grad_norm": 1.078125, "learning_rate": 1.9995948476565453e-05, "loss": 0.3859, "step": 289 }, { "epoch": 0.02194787379972565, "grad_norm": 1.046875, "learning_rate": 1.999591450398846e-05, "loss": 0.3583, "step": 290 }, { "epoch": 0.022023556123172982, "grad_norm": 1.0234375, "learning_rate": 1.9995880389602265e-05, "loss": 0.352, "step": 291 }, { "epoch": 0.02209923844662031, "grad_norm": 0.9921875, "learning_rate": 1.9995846133407356e-05, "loss": 0.3163, "step": 292 }, { "epoch": 0.022174920770067642, "grad_norm": 1.234375, "learning_rate": 1.9995811735404213e-05, "loss": 0.4019, "step": 293 }, { "epoch": 0.02225060309351497, "grad_norm": 1.046875, "learning_rate": 1.999577719559333e-05, "loss": 0.3729, "step": 294 }, { "epoch": 0.0223262854169623, "grad_norm": 0.90625, "learning_rate": 1.9995742513975195e-05, "loss": 0.3154, "step": 295 }, { "epoch": 0.02240196774040963, "grad_norm": 1.0078125, "learning_rate": 1.9995707690550293e-05, "loss": 0.3759, "step": 296 }, { "epoch": 0.02247765006385696, "grad_norm": 1.078125, "learning_rate": 1.999567272531913e-05, "loss": 0.3599, "step": 297 }, { "epoch": 0.02255333238730429, "grad_norm": 1.0234375, "learning_rate": 1.9995637618282196e-05, "loss": 0.3615, "step": 298 }, { "epoch": 0.02262901471075162, "grad_norm": 1.0546875, "learning_rate": 1.9995602369439987e-05, "loss": 0.3826, "step": 299 }, { "epoch": 0.02270469703419895, "grad_norm": 1.0546875, "learning_rate": 1.9995566978793004e-05, "loss": 0.3432, "step": 300 }, { "epoch": 0.02278037935764628, "grad_norm": 5.53125, "learning_rate": 1.9995531446341755e-05, "loss": 1.1258, "step": 301 }, { "epoch": 0.02285606168109361, "grad_norm": 1.1015625, "learning_rate": 1.9995495772086735e-05, "loss": 0.336, "step": 302 }, { "epoch": 0.02293174400454094, "grad_norm": 1.1015625, "learning_rate": 1.999545995602846e-05, "loss": 0.4079, "step": 303 }, { "epoch": 0.02300742632798827, "grad_norm": 1.0859375, "learning_rate": 1.9995423998167428e-05, "loss": 0.3879, "step": 304 }, { "epoch": 0.023083108651435598, "grad_norm": 1.0859375, "learning_rate": 1.9995387898504154e-05, "loss": 0.4143, "step": 305 }, { "epoch": 0.02315879097488293, "grad_norm": 1.0703125, "learning_rate": 1.9995351657039152e-05, "loss": 0.3996, "step": 306 }, { "epoch": 0.02323447329833026, "grad_norm": 1.015625, "learning_rate": 1.9995315273772933e-05, "loss": 0.3307, "step": 307 }, { "epoch": 0.02331015562177759, "grad_norm": 0.8984375, "learning_rate": 1.9995278748706017e-05, "loss": 0.295, "step": 308 }, { "epoch": 0.02338583794522492, "grad_norm": 1.1328125, "learning_rate": 1.9995242081838913e-05, "loss": 0.3863, "step": 309 }, { "epoch": 0.023461520268672247, "grad_norm": 1.0703125, "learning_rate": 1.9995205273172152e-05, "loss": 0.3738, "step": 310 }, { "epoch": 0.02353720259211958, "grad_norm": 1.0703125, "learning_rate": 1.9995168322706255e-05, "loss": 0.423, "step": 311 }, { "epoch": 0.023612884915566908, "grad_norm": 1.1015625, "learning_rate": 1.9995131230441736e-05, "loss": 0.3942, "step": 312 }, { "epoch": 0.023688567239014236, "grad_norm": 0.98828125, "learning_rate": 1.9995093996379132e-05, "loss": 0.3501, "step": 313 }, { "epoch": 0.023764249562461568, "grad_norm": 0.98046875, "learning_rate": 1.999505662051897e-05, "loss": 0.3541, "step": 314 }, { "epoch": 0.023839931885908897, "grad_norm": 0.96875, "learning_rate": 1.9995019102861773e-05, "loss": 0.3411, "step": 315 }, { "epoch": 0.02391561420935623, "grad_norm": 1.046875, "learning_rate": 1.999498144340808e-05, "loss": 0.3286, "step": 316 }, { "epoch": 0.023991296532803557, "grad_norm": 1.046875, "learning_rate": 1.9994943642158423e-05, "loss": 0.358, "step": 317 }, { "epoch": 0.024066978856250885, "grad_norm": 0.921875, "learning_rate": 1.9994905699113342e-05, "loss": 0.332, "step": 318 }, { "epoch": 0.024142661179698217, "grad_norm": 1.03125, "learning_rate": 1.9994867614273366e-05, "loss": 0.3823, "step": 319 }, { "epoch": 0.024218343503145546, "grad_norm": 1.1328125, "learning_rate": 1.9994829387639044e-05, "loss": 0.4049, "step": 320 }, { "epoch": 0.024294025826592878, "grad_norm": 1.09375, "learning_rate": 1.9994791019210916e-05, "loss": 0.3318, "step": 321 }, { "epoch": 0.024369708150040206, "grad_norm": 3.25, "learning_rate": 1.9994752508989524e-05, "loss": 0.7194, "step": 322 }, { "epoch": 0.024445390473487535, "grad_norm": 0.98828125, "learning_rate": 1.999471385697542e-05, "loss": 0.3353, "step": 323 }, { "epoch": 0.024521072796934867, "grad_norm": 0.94921875, "learning_rate": 1.9994675063169144e-05, "loss": 0.3372, "step": 324 }, { "epoch": 0.024596755120382195, "grad_norm": 1.1015625, "learning_rate": 1.999463612757125e-05, "loss": 0.3642, "step": 325 }, { "epoch": 0.024672437443829527, "grad_norm": 1.015625, "learning_rate": 1.9994597050182296e-05, "loss": 0.3833, "step": 326 }, { "epoch": 0.024748119767276856, "grad_norm": 1.1015625, "learning_rate": 1.9994557831002827e-05, "loss": 0.3944, "step": 327 }, { "epoch": 0.024823802090724184, "grad_norm": 0.9375, "learning_rate": 1.9994518470033404e-05, "loss": 0.3282, "step": 328 }, { "epoch": 0.024899484414171516, "grad_norm": 1.0234375, "learning_rate": 1.999447896727459e-05, "loss": 0.3873, "step": 329 }, { "epoch": 0.024975166737618845, "grad_norm": 1.015625, "learning_rate": 1.999443932272694e-05, "loss": 0.3365, "step": 330 }, { "epoch": 0.025050849061066176, "grad_norm": 1.0, "learning_rate": 1.9994399536391014e-05, "loss": 0.3776, "step": 331 }, { "epoch": 0.025126531384513505, "grad_norm": 0.96875, "learning_rate": 1.999435960826738e-05, "loss": 0.31, "step": 332 }, { "epoch": 0.025202213707960833, "grad_norm": 1.0625, "learning_rate": 1.9994319538356602e-05, "loss": 0.3714, "step": 333 }, { "epoch": 0.025277896031408165, "grad_norm": 0.9609375, "learning_rate": 1.9994279326659254e-05, "loss": 0.3361, "step": 334 }, { "epoch": 0.025353578354855494, "grad_norm": 1.0078125, "learning_rate": 1.9994238973175902e-05, "loss": 0.3584, "step": 335 }, { "epoch": 0.025429260678302822, "grad_norm": 6.6875, "learning_rate": 1.999419847790712e-05, "loss": 0.7323, "step": 336 }, { "epoch": 0.025504943001750154, "grad_norm": 0.98828125, "learning_rate": 1.9994157840853484e-05, "loss": 0.3428, "step": 337 }, { "epoch": 0.025580625325197483, "grad_norm": 1.0703125, "learning_rate": 1.9994117062015563e-05, "loss": 0.3756, "step": 338 }, { "epoch": 0.025656307648644815, "grad_norm": 0.953125, "learning_rate": 1.9994076141393946e-05, "loss": 0.3094, "step": 339 }, { "epoch": 0.025731989972092143, "grad_norm": 0.953125, "learning_rate": 1.9994035078989205e-05, "loss": 0.3173, "step": 340 }, { "epoch": 0.02580767229553947, "grad_norm": 1.21875, "learning_rate": 1.999399387480193e-05, "loss": 0.3591, "step": 341 }, { "epoch": 0.025883354618986804, "grad_norm": 1.15625, "learning_rate": 1.99939525288327e-05, "loss": 0.3846, "step": 342 }, { "epoch": 0.025959036942434132, "grad_norm": 1.2265625, "learning_rate": 1.99939110410821e-05, "loss": 0.381, "step": 343 }, { "epoch": 0.026034719265881464, "grad_norm": 1.0546875, "learning_rate": 1.9993869411550724e-05, "loss": 0.3303, "step": 344 }, { "epoch": 0.026110401589328792, "grad_norm": 1.0390625, "learning_rate": 1.999382764023916e-05, "loss": 0.3957, "step": 345 }, { "epoch": 0.02618608391277612, "grad_norm": 0.921875, "learning_rate": 1.9993785727148004e-05, "loss": 0.311, "step": 346 }, { "epoch": 0.026261766236223453, "grad_norm": 0.91796875, "learning_rate": 1.9993743672277842e-05, "loss": 0.2958, "step": 347 }, { "epoch": 0.02633744855967078, "grad_norm": 1.0859375, "learning_rate": 1.999370147562928e-05, "loss": 0.338, "step": 348 }, { "epoch": 0.026413130883118113, "grad_norm": 0.8984375, "learning_rate": 1.999365913720291e-05, "loss": 0.265, "step": 349 }, { "epoch": 0.026488813206565442, "grad_norm": 0.9921875, "learning_rate": 1.999361665699934e-05, "loss": 0.3467, "step": 350 }, { "epoch": 0.02656449553001277, "grad_norm": 0.99609375, "learning_rate": 1.9993574035019164e-05, "loss": 0.3734, "step": 351 }, { "epoch": 0.026640177853460102, "grad_norm": 9.0625, "learning_rate": 1.999353127126299e-05, "loss": 0.7803, "step": 352 }, { "epoch": 0.02671586017690743, "grad_norm": 1.046875, "learning_rate": 1.9993488365731427e-05, "loss": 0.3987, "step": 353 }, { "epoch": 0.026791542500354763, "grad_norm": 1.03125, "learning_rate": 1.9993445318425086e-05, "loss": 0.3616, "step": 354 }, { "epoch": 0.02686722482380209, "grad_norm": 0.98828125, "learning_rate": 1.999340212934457e-05, "loss": 0.3683, "step": 355 }, { "epoch": 0.02694290714724942, "grad_norm": 1.0703125, "learning_rate": 1.9993358798490498e-05, "loss": 0.3761, "step": 356 }, { "epoch": 0.02701858947069675, "grad_norm": 1.0390625, "learning_rate": 1.999331532586348e-05, "loss": 0.3949, "step": 357 }, { "epoch": 0.02709427179414408, "grad_norm": 0.97265625, "learning_rate": 1.9993271711464133e-05, "loss": 0.3537, "step": 358 }, { "epoch": 0.02716995411759141, "grad_norm": 0.91015625, "learning_rate": 1.999322795529308e-05, "loss": 0.3376, "step": 359 }, { "epoch": 0.02724563644103874, "grad_norm": 0.99609375, "learning_rate": 1.9993184057350937e-05, "loss": 0.3848, "step": 360 }, { "epoch": 0.02732131876448607, "grad_norm": 1.1796875, "learning_rate": 1.9993140017638332e-05, "loss": 0.4399, "step": 361 }, { "epoch": 0.0273970010879334, "grad_norm": 1.125, "learning_rate": 1.999309583615589e-05, "loss": 0.4621, "step": 362 }, { "epoch": 0.02747268341138073, "grad_norm": 1.0078125, "learning_rate": 1.9993051512904233e-05, "loss": 0.3558, "step": 363 }, { "epoch": 0.027548365734828058, "grad_norm": 1.0078125, "learning_rate": 1.9993007047883988e-05, "loss": 0.3733, "step": 364 }, { "epoch": 0.02762404805827539, "grad_norm": 0.99609375, "learning_rate": 1.999296244109579e-05, "loss": 0.3368, "step": 365 }, { "epoch": 0.027699730381722718, "grad_norm": 1.0703125, "learning_rate": 1.9992917692540273e-05, "loss": 0.3937, "step": 366 }, { "epoch": 0.02777541270517005, "grad_norm": 0.96484375, "learning_rate": 1.999287280221807e-05, "loss": 0.3425, "step": 367 }, { "epoch": 0.02785109502861738, "grad_norm": 1.1015625, "learning_rate": 1.9992827770129816e-05, "loss": 0.4042, "step": 368 }, { "epoch": 0.027926777352064707, "grad_norm": 1.015625, "learning_rate": 1.9992782596276155e-05, "loss": 0.3324, "step": 369 }, { "epoch": 0.02800245967551204, "grad_norm": 1.0703125, "learning_rate": 1.9992737280657723e-05, "loss": 0.3725, "step": 370 }, { "epoch": 0.028078141998959368, "grad_norm": 1.078125, "learning_rate": 1.999269182327517e-05, "loss": 0.3757, "step": 371 }, { "epoch": 0.0281538243224067, "grad_norm": 0.984375, "learning_rate": 1.9992646224129127e-05, "loss": 0.362, "step": 372 }, { "epoch": 0.028229506645854028, "grad_norm": 0.921875, "learning_rate": 1.9992600483220254e-05, "loss": 0.3548, "step": 373 }, { "epoch": 0.028305188969301356, "grad_norm": 0.9921875, "learning_rate": 1.9992554600549192e-05, "loss": 0.3442, "step": 374 }, { "epoch": 0.02838087129274869, "grad_norm": 1.03125, "learning_rate": 1.9992508576116595e-05, "loss": 0.3431, "step": 375 }, { "epoch": 0.028456553616196017, "grad_norm": 1.03125, "learning_rate": 1.9992462409923118e-05, "loss": 0.3936, "step": 376 }, { "epoch": 0.02853223593964335, "grad_norm": 1.046875, "learning_rate": 1.9992416101969417e-05, "loss": 0.3671, "step": 377 }, { "epoch": 0.028607918263090677, "grad_norm": 0.8984375, "learning_rate": 1.9992369652256143e-05, "loss": 0.3083, "step": 378 }, { "epoch": 0.028683600586538006, "grad_norm": 1.0703125, "learning_rate": 1.999232306078396e-05, "loss": 0.3725, "step": 379 }, { "epoch": 0.028759282909985338, "grad_norm": 1.171875, "learning_rate": 1.9992276327553525e-05, "loss": 0.3949, "step": 380 }, { "epoch": 0.028834965233432666, "grad_norm": 1.21875, "learning_rate": 1.99922294525655e-05, "loss": 0.4306, "step": 381 }, { "epoch": 0.028910647556879995, "grad_norm": 0.96484375, "learning_rate": 1.999218243582056e-05, "loss": 0.3431, "step": 382 }, { "epoch": 0.028986329880327327, "grad_norm": 0.99609375, "learning_rate": 1.9992135277319356e-05, "loss": 0.343, "step": 383 }, { "epoch": 0.029062012203774655, "grad_norm": 1.1171875, "learning_rate": 1.9992087977062577e-05, "loss": 0.3378, "step": 384 }, { "epoch": 0.029137694527221987, "grad_norm": 1.0, "learning_rate": 1.9992040535050872e-05, "loss": 0.3302, "step": 385 }, { "epoch": 0.029213376850669315, "grad_norm": 1.1015625, "learning_rate": 1.999199295128493e-05, "loss": 0.4263, "step": 386 }, { "epoch": 0.029289059174116644, "grad_norm": 1.0234375, "learning_rate": 1.9991945225765425e-05, "loss": 0.3456, "step": 387 }, { "epoch": 0.029364741497563976, "grad_norm": 1.15625, "learning_rate": 1.9991897358493023e-05, "loss": 0.4506, "step": 388 }, { "epoch": 0.029440423821011304, "grad_norm": 4.0625, "learning_rate": 1.9991849349468414e-05, "loss": 0.7689, "step": 389 }, { "epoch": 0.029516106144458636, "grad_norm": 1.0859375, "learning_rate": 1.999180119869228e-05, "loss": 0.3778, "step": 390 }, { "epoch": 0.029591788467905965, "grad_norm": 0.98828125, "learning_rate": 1.9991752906165292e-05, "loss": 0.3517, "step": 391 }, { "epoch": 0.029667470791353293, "grad_norm": 1.015625, "learning_rate": 1.9991704471888145e-05, "loss": 0.3383, "step": 392 }, { "epoch": 0.029743153114800625, "grad_norm": 6.21875, "learning_rate": 1.9991655895861528e-05, "loss": 0.8147, "step": 393 }, { "epoch": 0.029818835438247954, "grad_norm": 0.984375, "learning_rate": 1.999160717808612e-05, "loss": 0.3261, "step": 394 }, { "epoch": 0.029894517761695286, "grad_norm": 1.0546875, "learning_rate": 1.999155831856262e-05, "loss": 0.3862, "step": 395 }, { "epoch": 0.029970200085142614, "grad_norm": 0.99609375, "learning_rate": 1.9991509317291716e-05, "loss": 0.3471, "step": 396 }, { "epoch": 0.030045882408589943, "grad_norm": 1.046875, "learning_rate": 1.9991460174274112e-05, "loss": 0.4, "step": 397 }, { "epoch": 0.030121564732037275, "grad_norm": 1.046875, "learning_rate": 1.9991410889510498e-05, "loss": 0.4054, "step": 398 }, { "epoch": 0.030197247055484603, "grad_norm": 1.09375, "learning_rate": 1.999136146300157e-05, "loss": 0.3633, "step": 399 }, { "epoch": 0.03027292937893193, "grad_norm": 1.015625, "learning_rate": 1.9991311894748037e-05, "loss": 0.3187, "step": 400 }, { "epoch": 0.030348611702379263, "grad_norm": 0.9765625, "learning_rate": 1.99912621847506e-05, "loss": 0.3414, "step": 401 }, { "epoch": 0.030424294025826592, "grad_norm": 1.046875, "learning_rate": 1.9991212333009965e-05, "loss": 0.3762, "step": 402 }, { "epoch": 0.030499976349273924, "grad_norm": 0.9453125, "learning_rate": 1.9991162339526837e-05, "loss": 0.3128, "step": 403 }, { "epoch": 0.030575658672721252, "grad_norm": 0.9921875, "learning_rate": 1.9991112204301923e-05, "loss": 0.3288, "step": 404 }, { "epoch": 0.03065134099616858, "grad_norm": 1.109375, "learning_rate": 1.9991061927335935e-05, "loss": 0.3776, "step": 405 }, { "epoch": 0.030727023319615913, "grad_norm": 0.9453125, "learning_rate": 1.9991011508629592e-05, "loss": 0.3483, "step": 406 }, { "epoch": 0.03080270564306324, "grad_norm": 1.0078125, "learning_rate": 1.9990960948183605e-05, "loss": 0.375, "step": 407 }, { "epoch": 0.030878387966510573, "grad_norm": 1.015625, "learning_rate": 1.9990910245998693e-05, "loss": 0.3565, "step": 408 }, { "epoch": 0.0309540702899579, "grad_norm": 0.94921875, "learning_rate": 1.9990859402075572e-05, "loss": 0.3278, "step": 409 }, { "epoch": 0.03102975261340523, "grad_norm": 32.25, "learning_rate": 1.999080841641497e-05, "loss": 1.1714, "step": 410 }, { "epoch": 0.031105434936852562, "grad_norm": 1.0078125, "learning_rate": 1.99907572890176e-05, "loss": 0.3316, "step": 411 }, { "epoch": 0.03118111726029989, "grad_norm": 1.171875, "learning_rate": 1.9990706019884195e-05, "loss": 0.4189, "step": 412 }, { "epoch": 0.03125679958374722, "grad_norm": 1.0234375, "learning_rate": 1.9990654609015482e-05, "loss": 0.3271, "step": 413 }, { "epoch": 0.03133248190719455, "grad_norm": 0.921875, "learning_rate": 1.9990603056412187e-05, "loss": 0.3243, "step": 414 }, { "epoch": 0.03140816423064188, "grad_norm": 1.015625, "learning_rate": 1.9990551362075042e-05, "loss": 0.3185, "step": 415 }, { "epoch": 0.03148384655408921, "grad_norm": 15.75, "learning_rate": 1.9990499526004785e-05, "loss": 0.9282, "step": 416 }, { "epoch": 0.03155952887753654, "grad_norm": 1.1484375, "learning_rate": 1.999044754820215e-05, "loss": 0.3902, "step": 417 }, { "epoch": 0.03163521120098387, "grad_norm": 1.078125, "learning_rate": 1.9990395428667864e-05, "loss": 0.3887, "step": 418 }, { "epoch": 0.0317108935244312, "grad_norm": 1.140625, "learning_rate": 1.999034316740268e-05, "loss": 0.3495, "step": 419 }, { "epoch": 0.03178657584787853, "grad_norm": 1.09375, "learning_rate": 1.9990290764407336e-05, "loss": 0.4231, "step": 420 }, { "epoch": 0.03186225817132586, "grad_norm": 0.97265625, "learning_rate": 1.999023821968257e-05, "loss": 0.3373, "step": 421 }, { "epoch": 0.03193794049477319, "grad_norm": 12.9375, "learning_rate": 1.9990185533229134e-05, "loss": 0.6391, "step": 422 }, { "epoch": 0.03201362281822052, "grad_norm": 1.0078125, "learning_rate": 1.999013270504777e-05, "loss": 0.3642, "step": 423 }, { "epoch": 0.03208930514166785, "grad_norm": 1.046875, "learning_rate": 1.999007973513923e-05, "loss": 0.4051, "step": 424 }, { "epoch": 0.03216498746511518, "grad_norm": 0.96484375, "learning_rate": 1.9990026623504267e-05, "loss": 0.3531, "step": 425 }, { "epoch": 0.03224066978856251, "grad_norm": 1.0703125, "learning_rate": 1.9989973370143633e-05, "loss": 0.3922, "step": 426 }, { "epoch": 0.03231635211200984, "grad_norm": 1.0390625, "learning_rate": 1.998991997505808e-05, "loss": 0.3621, "step": 427 }, { "epoch": 0.03239203443545717, "grad_norm": 1.09375, "learning_rate": 1.9989866438248372e-05, "loss": 0.3859, "step": 428 }, { "epoch": 0.032467716758904495, "grad_norm": 1.0, "learning_rate": 1.9989812759715264e-05, "loss": 0.3637, "step": 429 }, { "epoch": 0.03254339908235183, "grad_norm": 0.97265625, "learning_rate": 1.998975893945952e-05, "loss": 0.3548, "step": 430 }, { "epoch": 0.03261908140579916, "grad_norm": 0.94140625, "learning_rate": 1.9989704977481903e-05, "loss": 0.3463, "step": 431 }, { "epoch": 0.03269476372924649, "grad_norm": 1.0, "learning_rate": 1.998965087378318e-05, "loss": 0.3561, "step": 432 }, { "epoch": 0.032770446052693816, "grad_norm": 1.1171875, "learning_rate": 1.998959662836411e-05, "loss": 0.3859, "step": 433 }, { "epoch": 0.03284612837614115, "grad_norm": 1.03125, "learning_rate": 1.9989542241225473e-05, "loss": 0.3837, "step": 434 }, { "epoch": 0.03292181069958848, "grad_norm": 1.0, "learning_rate": 1.9989487712368034e-05, "loss": 0.3426, "step": 435 }, { "epoch": 0.032997493023035805, "grad_norm": 1.0703125, "learning_rate": 1.998943304179257e-05, "loss": 0.3876, "step": 436 }, { "epoch": 0.03307317534648314, "grad_norm": 0.9453125, "learning_rate": 1.998937822949986e-05, "loss": 0.3543, "step": 437 }, { "epoch": 0.03314885766993047, "grad_norm": 1.015625, "learning_rate": 1.9989323275490674e-05, "loss": 0.364, "step": 438 }, { "epoch": 0.033224539993377794, "grad_norm": 1.03125, "learning_rate": 1.9989268179765797e-05, "loss": 0.3972, "step": 439 }, { "epoch": 0.033300222316825126, "grad_norm": 0.921875, "learning_rate": 1.9989212942326005e-05, "loss": 0.3204, "step": 440 }, { "epoch": 0.03337590464027246, "grad_norm": 0.88671875, "learning_rate": 1.998915756317209e-05, "loss": 0.3114, "step": 441 }, { "epoch": 0.03345158696371978, "grad_norm": 0.953125, "learning_rate": 1.998910204230483e-05, "loss": 0.3084, "step": 442 }, { "epoch": 0.033527269287167115, "grad_norm": 0.93359375, "learning_rate": 1.9989046379725016e-05, "loss": 0.337, "step": 443 }, { "epoch": 0.03360295161061445, "grad_norm": 1.0, "learning_rate": 1.998899057543344e-05, "loss": 0.3818, "step": 444 }, { "epoch": 0.03367863393406178, "grad_norm": 1.3828125, "learning_rate": 1.998893462943089e-05, "loss": 0.3819, "step": 445 }, { "epoch": 0.033754316257509104, "grad_norm": 0.96875, "learning_rate": 1.9988878541718154e-05, "loss": 0.3415, "step": 446 }, { "epoch": 0.033829998580956436, "grad_norm": 1.0078125, "learning_rate": 1.998882231229604e-05, "loss": 0.3559, "step": 447 }, { "epoch": 0.03390568090440377, "grad_norm": 1.0546875, "learning_rate": 1.998876594116534e-05, "loss": 0.3875, "step": 448 }, { "epoch": 0.03398136322785109, "grad_norm": 1.0703125, "learning_rate": 1.9988709428326857e-05, "loss": 0.3778, "step": 449 }, { "epoch": 0.034057045551298425, "grad_norm": 1.0859375, "learning_rate": 1.998865277378138e-05, "loss": 0.4017, "step": 450 }, { "epoch": 0.03413272787474576, "grad_norm": 1.0390625, "learning_rate": 1.9988595977529733e-05, "loss": 0.3848, "step": 451 }, { "epoch": 0.03420841019819308, "grad_norm": 0.99609375, "learning_rate": 1.9988539039572705e-05, "loss": 0.3476, "step": 452 }, { "epoch": 0.034284092521640414, "grad_norm": 1.0078125, "learning_rate": 1.9988481959911114e-05, "loss": 0.3858, "step": 453 }, { "epoch": 0.034359774845087745, "grad_norm": 1.0625, "learning_rate": 1.9988424738545762e-05, "loss": 0.3817, "step": 454 }, { "epoch": 0.03443545716853508, "grad_norm": 0.953125, "learning_rate": 1.9988367375477464e-05, "loss": 0.3554, "step": 455 }, { "epoch": 0.0345111394919824, "grad_norm": 1.015625, "learning_rate": 1.9988309870707035e-05, "loss": 0.3682, "step": 456 }, { "epoch": 0.034586821815429734, "grad_norm": 1.078125, "learning_rate": 1.998825222423529e-05, "loss": 0.3893, "step": 457 }, { "epoch": 0.034662504138877066, "grad_norm": 0.9375, "learning_rate": 1.9988194436063047e-05, "loss": 0.3273, "step": 458 }, { "epoch": 0.03473818646232439, "grad_norm": 1.0234375, "learning_rate": 1.9988136506191126e-05, "loss": 0.3448, "step": 459 }, { "epoch": 0.03481386878577172, "grad_norm": 1.03125, "learning_rate": 1.998807843462035e-05, "loss": 0.3604, "step": 460 }, { "epoch": 0.034889551109219055, "grad_norm": 1.4375, "learning_rate": 1.998802022135154e-05, "loss": 0.3509, "step": 461 }, { "epoch": 0.03496523343266638, "grad_norm": 1.03125, "learning_rate": 1.998796186638552e-05, "loss": 0.3841, "step": 462 }, { "epoch": 0.03504091575611371, "grad_norm": 0.984375, "learning_rate": 1.9987903369723126e-05, "loss": 0.3817, "step": 463 }, { "epoch": 0.035116598079561044, "grad_norm": 1.015625, "learning_rate": 1.998784473136518e-05, "loss": 0.362, "step": 464 }, { "epoch": 0.03519228040300837, "grad_norm": 1.1796875, "learning_rate": 1.9987785951312516e-05, "loss": 0.3935, "step": 465 }, { "epoch": 0.0352679627264557, "grad_norm": 1.015625, "learning_rate": 1.9987727029565972e-05, "loss": 0.3549, "step": 466 }, { "epoch": 0.03534364504990303, "grad_norm": 0.984375, "learning_rate": 1.9987667966126377e-05, "loss": 0.3687, "step": 467 }, { "epoch": 0.035419327373350365, "grad_norm": 1.015625, "learning_rate": 1.9987608760994576e-05, "loss": 0.3878, "step": 468 }, { "epoch": 0.03549500969679769, "grad_norm": 1.0234375, "learning_rate": 1.9987549414171402e-05, "loss": 0.3654, "step": 469 }, { "epoch": 0.03557069202024502, "grad_norm": 0.99609375, "learning_rate": 1.99874899256577e-05, "loss": 0.3263, "step": 470 }, { "epoch": 0.035646374343692354, "grad_norm": 0.9296875, "learning_rate": 1.9987430295454318e-05, "loss": 0.3389, "step": 471 }, { "epoch": 0.03572205666713968, "grad_norm": 1.1171875, "learning_rate": 1.9987370523562095e-05, "loss": 0.3571, "step": 472 }, { "epoch": 0.03579773899058701, "grad_norm": 1.0390625, "learning_rate": 1.998731060998188e-05, "loss": 0.3943, "step": 473 }, { "epoch": 0.03587342131403434, "grad_norm": 0.9375, "learning_rate": 1.9987250554714532e-05, "loss": 0.3437, "step": 474 }, { "epoch": 0.03594910363748167, "grad_norm": 1.0546875, "learning_rate": 1.9987190357760893e-05, "loss": 0.4281, "step": 475 }, { "epoch": 0.036024785960929, "grad_norm": 1.0625, "learning_rate": 1.998713001912182e-05, "loss": 0.3613, "step": 476 }, { "epoch": 0.03610046828437633, "grad_norm": 1.140625, "learning_rate": 1.998706953879817e-05, "loss": 0.3518, "step": 477 }, { "epoch": 0.036176150607823664, "grad_norm": 1.0390625, "learning_rate": 1.9987008916790795e-05, "loss": 0.4117, "step": 478 }, { "epoch": 0.03625183293127099, "grad_norm": 0.98828125, "learning_rate": 1.9986948153100566e-05, "loss": 0.3983, "step": 479 }, { "epoch": 0.03632751525471832, "grad_norm": 1.0859375, "learning_rate": 1.998688724772834e-05, "loss": 0.4037, "step": 480 }, { "epoch": 0.03640319757816565, "grad_norm": 1.0625, "learning_rate": 1.9986826200674974e-05, "loss": 0.3783, "step": 481 }, { "epoch": 0.03647887990161298, "grad_norm": 1.0625, "learning_rate": 1.9986765011941346e-05, "loss": 0.3685, "step": 482 }, { "epoch": 0.03655456222506031, "grad_norm": 0.90234375, "learning_rate": 1.9986703681528318e-05, "loss": 0.3238, "step": 483 }, { "epoch": 0.03663024454850764, "grad_norm": 1.078125, "learning_rate": 1.9986642209436758e-05, "loss": 0.3574, "step": 484 }, { "epoch": 0.036705926871954966, "grad_norm": 1.09375, "learning_rate": 1.9986580595667543e-05, "loss": 0.3606, "step": 485 }, { "epoch": 0.0367816091954023, "grad_norm": 9.9375, "learning_rate": 1.9986518840221544e-05, "loss": 0.7674, "step": 486 }, { "epoch": 0.03685729151884963, "grad_norm": 1.0, "learning_rate": 1.998645694309964e-05, "loss": 0.3646, "step": 487 }, { "epoch": 0.036932973842296955, "grad_norm": 1.0078125, "learning_rate": 1.99863949043027e-05, "loss": 0.368, "step": 488 }, { "epoch": 0.03700865616574429, "grad_norm": 1.0390625, "learning_rate": 1.9986332723831615e-05, "loss": 0.3912, "step": 489 }, { "epoch": 0.03708433848919162, "grad_norm": 1.1015625, "learning_rate": 1.9986270401687266e-05, "loss": 0.4062, "step": 490 }, { "epoch": 0.03716002081263895, "grad_norm": 1.046875, "learning_rate": 1.998620793787053e-05, "loss": 0.421, "step": 491 }, { "epoch": 0.037235703136086276, "grad_norm": 0.8984375, "learning_rate": 1.99861453323823e-05, "loss": 0.3463, "step": 492 }, { "epoch": 0.03731138545953361, "grad_norm": 0.953125, "learning_rate": 1.998608258522346e-05, "loss": 0.3751, "step": 493 }, { "epoch": 0.03738706778298094, "grad_norm": 1.0234375, "learning_rate": 1.9986019696394904e-05, "loss": 0.3682, "step": 494 }, { "epoch": 0.037462750106428265, "grad_norm": 1.0234375, "learning_rate": 1.9985956665897517e-05, "loss": 0.3887, "step": 495 }, { "epoch": 0.0375384324298756, "grad_norm": 0.98046875, "learning_rate": 1.9985893493732204e-05, "loss": 0.34, "step": 496 }, { "epoch": 0.03761411475332293, "grad_norm": 0.91796875, "learning_rate": 1.9985830179899854e-05, "loss": 0.3308, "step": 497 }, { "epoch": 0.037689797076770254, "grad_norm": 1.125, "learning_rate": 1.9985766724401365e-05, "loss": 0.3657, "step": 498 }, { "epoch": 0.037765479400217586, "grad_norm": 0.98828125, "learning_rate": 1.9985703127237643e-05, "loss": 0.367, "step": 499 }, { "epoch": 0.03784116172366492, "grad_norm": 0.91015625, "learning_rate": 1.998563938840958e-05, "loss": 0.3113, "step": 500 }, { "epoch": 0.03791684404711225, "grad_norm": 1.0703125, "learning_rate": 1.998557550791809e-05, "loss": 0.3587, "step": 501 }, { "epoch": 0.037992526370559575, "grad_norm": 0.9921875, "learning_rate": 1.9985511485764075e-05, "loss": 0.3151, "step": 502 }, { "epoch": 0.03806820869400691, "grad_norm": 1.125, "learning_rate": 1.9985447321948443e-05, "loss": 0.3864, "step": 503 }, { "epoch": 0.03814389101745424, "grad_norm": 1.0, "learning_rate": 1.9985383016472108e-05, "loss": 0.3827, "step": 504 }, { "epoch": 0.038219573340901564, "grad_norm": 0.9140625, "learning_rate": 1.9985318569335976e-05, "loss": 0.3088, "step": 505 }, { "epoch": 0.038295255664348896, "grad_norm": 0.99609375, "learning_rate": 1.9985253980540967e-05, "loss": 0.3707, "step": 506 }, { "epoch": 0.03837093798779623, "grad_norm": 0.98828125, "learning_rate": 1.9985189250087992e-05, "loss": 0.3577, "step": 507 }, { "epoch": 0.03844662031124355, "grad_norm": 1.015625, "learning_rate": 1.9985124377977975e-05, "loss": 0.3881, "step": 508 }, { "epoch": 0.038522302634690885, "grad_norm": 0.98828125, "learning_rate": 1.9985059364211832e-05, "loss": 0.3535, "step": 509 }, { "epoch": 0.038597984958138216, "grad_norm": 0.94140625, "learning_rate": 1.998499420879049e-05, "loss": 0.3178, "step": 510 }, { "epoch": 0.03867366728158554, "grad_norm": 1.0625, "learning_rate": 1.9984928911714866e-05, "loss": 0.3641, "step": 511 }, { "epoch": 0.03874934960503287, "grad_norm": 0.99609375, "learning_rate": 1.9984863472985892e-05, "loss": 0.3771, "step": 512 }, { "epoch": 0.038825031928480205, "grad_norm": 1.0, "learning_rate": 1.9984797892604496e-05, "loss": 0.4062, "step": 513 }, { "epoch": 0.03890071425192754, "grad_norm": 0.96875, "learning_rate": 1.998473217057161e-05, "loss": 0.3572, "step": 514 }, { "epoch": 0.03897639657537486, "grad_norm": 0.94921875, "learning_rate": 1.998466630688816e-05, "loss": 0.3158, "step": 515 }, { "epoch": 0.039052078898822194, "grad_norm": 1.03125, "learning_rate": 1.9984600301555085e-05, "loss": 0.354, "step": 516 }, { "epoch": 0.039127761222269526, "grad_norm": 6.0625, "learning_rate": 1.998453415457332e-05, "loss": 0.8184, "step": 517 }, { "epoch": 0.03920344354571685, "grad_norm": 1.0, "learning_rate": 1.9984467865943803e-05, "loss": 0.3529, "step": 518 }, { "epoch": 0.03927912586916418, "grad_norm": 3.96875, "learning_rate": 1.998440143566748e-05, "loss": 0.7833, "step": 519 }, { "epoch": 0.039354808192611515, "grad_norm": 0.9765625, "learning_rate": 1.9984334863745284e-05, "loss": 0.3461, "step": 520 }, { "epoch": 0.03943049051605884, "grad_norm": 0.98828125, "learning_rate": 1.998426815017817e-05, "loss": 0.3434, "step": 521 }, { "epoch": 0.03950617283950617, "grad_norm": 1.0234375, "learning_rate": 1.998420129496707e-05, "loss": 0.3631, "step": 522 }, { "epoch": 0.039581855162953504, "grad_norm": 0.85546875, "learning_rate": 1.9984134298112948e-05, "loss": 0.2847, "step": 523 }, { "epoch": 0.039657537486400836, "grad_norm": 0.9453125, "learning_rate": 1.9984067159616746e-05, "loss": 0.3528, "step": 524 }, { "epoch": 0.03973321980984816, "grad_norm": 0.98828125, "learning_rate": 1.998399987947942e-05, "loss": 0.3633, "step": 525 }, { "epoch": 0.03980890213329549, "grad_norm": 1.015625, "learning_rate": 1.9983932457701918e-05, "loss": 0.3944, "step": 526 }, { "epoch": 0.039884584456742825, "grad_norm": 1.0, "learning_rate": 1.9983864894285204e-05, "loss": 0.332, "step": 527 }, { "epoch": 0.03996026678019015, "grad_norm": 0.9453125, "learning_rate": 1.9983797189230232e-05, "loss": 0.3522, "step": 528 }, { "epoch": 0.04003594910363748, "grad_norm": 1.03125, "learning_rate": 1.9983729342537965e-05, "loss": 0.3353, "step": 529 }, { "epoch": 0.040111631427084814, "grad_norm": 0.96484375, "learning_rate": 1.9983661354209365e-05, "loss": 0.3008, "step": 530 }, { "epoch": 0.04018731375053214, "grad_norm": 0.9921875, "learning_rate": 1.9983593224245393e-05, "loss": 0.3544, "step": 531 }, { "epoch": 0.04026299607397947, "grad_norm": 0.94921875, "learning_rate": 1.9983524952647024e-05, "loss": 0.3672, "step": 532 }, { "epoch": 0.0403386783974268, "grad_norm": 1.0, "learning_rate": 1.9983456539415218e-05, "loss": 0.396, "step": 533 }, { "epoch": 0.04041436072087413, "grad_norm": 1.0546875, "learning_rate": 1.998338798455095e-05, "loss": 0.3994, "step": 534 }, { "epoch": 0.04049004304432146, "grad_norm": 0.953125, "learning_rate": 1.998331928805519e-05, "loss": 0.3544, "step": 535 }, { "epoch": 0.04056572536776879, "grad_norm": 1.015625, "learning_rate": 1.9983250449928915e-05, "loss": 0.3955, "step": 536 }, { "epoch": 0.04064140769121612, "grad_norm": 0.95703125, "learning_rate": 1.99831814701731e-05, "loss": 0.3516, "step": 537 }, { "epoch": 0.04071709001466345, "grad_norm": 0.9296875, "learning_rate": 1.9983112348788725e-05, "loss": 0.3386, "step": 538 }, { "epoch": 0.04079277233811078, "grad_norm": 1.15625, "learning_rate": 1.998304308577677e-05, "loss": 0.3939, "step": 539 }, { "epoch": 0.04086845466155811, "grad_norm": 0.9140625, "learning_rate": 1.9982973681138215e-05, "loss": 0.32, "step": 540 }, { "epoch": 0.04094413698500544, "grad_norm": 4.46875, "learning_rate": 1.9982904134874048e-05, "loss": 0.775, "step": 541 }, { "epoch": 0.04101981930845277, "grad_norm": 1.03125, "learning_rate": 1.9982834446985257e-05, "loss": 0.3445, "step": 542 }, { "epoch": 0.0410955016319001, "grad_norm": 1.0234375, "learning_rate": 1.9982764617472825e-05, "loss": 0.3818, "step": 543 }, { "epoch": 0.041171183955347426, "grad_norm": 1.0078125, "learning_rate": 1.9982694646337748e-05, "loss": 0.3759, "step": 544 }, { "epoch": 0.04124686627879476, "grad_norm": 0.953125, "learning_rate": 1.9982624533581015e-05, "loss": 0.3566, "step": 545 }, { "epoch": 0.04132254860224209, "grad_norm": 0.95703125, "learning_rate": 1.9982554279203623e-05, "loss": 0.3193, "step": 546 }, { "epoch": 0.04139823092568942, "grad_norm": 0.90234375, "learning_rate": 1.9982483883206567e-05, "loss": 0.3182, "step": 547 }, { "epoch": 0.04147391324913675, "grad_norm": 1.0703125, "learning_rate": 1.9982413345590848e-05, "loss": 0.3686, "step": 548 }, { "epoch": 0.04154959557258408, "grad_norm": 0.8515625, "learning_rate": 1.9982342666357467e-05, "loss": 0.2526, "step": 549 }, { "epoch": 0.04162527789603141, "grad_norm": 1.0078125, "learning_rate": 1.9982271845507418e-05, "loss": 0.3536, "step": 550 }, { "epoch": 0.041700960219478736, "grad_norm": 0.953125, "learning_rate": 1.998220088304172e-05, "loss": 0.3398, "step": 551 }, { "epoch": 0.04177664254292607, "grad_norm": 1.0, "learning_rate": 1.9982129778961367e-05, "loss": 0.335, "step": 552 }, { "epoch": 0.0418523248663734, "grad_norm": 3.78125, "learning_rate": 1.9982058533267378e-05, "loss": 0.633, "step": 553 }, { "epoch": 0.041928007189820725, "grad_norm": 0.953125, "learning_rate": 1.998198714596076e-05, "loss": 0.3236, "step": 554 }, { "epoch": 0.04200368951326806, "grad_norm": 0.96875, "learning_rate": 1.998191561704252e-05, "loss": 0.3395, "step": 555 }, { "epoch": 0.04207937183671539, "grad_norm": 0.96875, "learning_rate": 1.9981843946513677e-05, "loss": 0.3603, "step": 556 }, { "epoch": 0.042155054160162714, "grad_norm": 1.0390625, "learning_rate": 1.998177213437525e-05, "loss": 0.3889, "step": 557 }, { "epoch": 0.042230736483610046, "grad_norm": 0.9140625, "learning_rate": 1.9981700180628257e-05, "loss": 0.2679, "step": 558 }, { "epoch": 0.04230641880705738, "grad_norm": 1.0, "learning_rate": 1.9981628085273716e-05, "loss": 0.3696, "step": 559 }, { "epoch": 0.04238210113050471, "grad_norm": 1.0546875, "learning_rate": 1.9981555848312654e-05, "loss": 0.3372, "step": 560 }, { "epoch": 0.042457783453952035, "grad_norm": 1.0234375, "learning_rate": 1.998148346974609e-05, "loss": 0.3693, "step": 561 }, { "epoch": 0.04253346577739937, "grad_norm": 1.0, "learning_rate": 1.9981410949575056e-05, "loss": 0.3017, "step": 562 }, { "epoch": 0.0426091481008467, "grad_norm": 1.0078125, "learning_rate": 1.9981338287800578e-05, "loss": 0.331, "step": 563 }, { "epoch": 0.042684830424294024, "grad_norm": 1.0078125, "learning_rate": 1.998126548442369e-05, "loss": 0.3677, "step": 564 }, { "epoch": 0.042760512747741355, "grad_norm": 0.9765625, "learning_rate": 1.998119253944542e-05, "loss": 0.3644, "step": 565 }, { "epoch": 0.04283619507118869, "grad_norm": 1.046875, "learning_rate": 1.998111945286681e-05, "loss": 0.3525, "step": 566 }, { "epoch": 0.04291187739463601, "grad_norm": 1.1640625, "learning_rate": 1.9981046224688886e-05, "loss": 0.3345, "step": 567 }, { "epoch": 0.042987559718083344, "grad_norm": 0.984375, "learning_rate": 1.99809728549127e-05, "loss": 0.3809, "step": 568 }, { "epoch": 0.043063242041530676, "grad_norm": 0.91796875, "learning_rate": 1.9980899343539278e-05, "loss": 0.3317, "step": 569 }, { "epoch": 0.04313892436497801, "grad_norm": 1.0703125, "learning_rate": 1.9980825690569677e-05, "loss": 0.3782, "step": 570 }, { "epoch": 0.04321460668842533, "grad_norm": 1.0390625, "learning_rate": 1.9980751896004936e-05, "loss": 0.366, "step": 571 }, { "epoch": 0.043290289011872665, "grad_norm": 0.98046875, "learning_rate": 1.99806779598461e-05, "loss": 0.3711, "step": 572 }, { "epoch": 0.04336597133532, "grad_norm": 0.9921875, "learning_rate": 1.9980603882094217e-05, "loss": 0.3826, "step": 573 }, { "epoch": 0.04344165365876732, "grad_norm": 0.9765625, "learning_rate": 1.9980529662750347e-05, "loss": 0.3458, "step": 574 }, { "epoch": 0.043517335982214654, "grad_norm": 0.9765625, "learning_rate": 1.9980455301815532e-05, "loss": 0.3253, "step": 575 }, { "epoch": 0.043593018305661986, "grad_norm": 0.9921875, "learning_rate": 1.9980380799290834e-05, "loss": 0.3628, "step": 576 }, { "epoch": 0.04366870062910931, "grad_norm": 1.078125, "learning_rate": 1.9980306155177306e-05, "loss": 0.3591, "step": 577 }, { "epoch": 0.04374438295255664, "grad_norm": 1.03125, "learning_rate": 1.998023136947601e-05, "loss": 0.3509, "step": 578 }, { "epoch": 0.043820065276003975, "grad_norm": 1.0234375, "learning_rate": 1.9980156442188002e-05, "loss": 0.345, "step": 579 }, { "epoch": 0.0438957475994513, "grad_norm": 1.0703125, "learning_rate": 1.998008137331435e-05, "loss": 0.3932, "step": 580 }, { "epoch": 0.04397142992289863, "grad_norm": 0.91796875, "learning_rate": 1.998000616285612e-05, "loss": 0.3204, "step": 581 }, { "epoch": 0.044047112246345964, "grad_norm": 0.88671875, "learning_rate": 1.997993081081437e-05, "loss": 0.297, "step": 582 }, { "epoch": 0.044122794569793296, "grad_norm": 1.015625, "learning_rate": 1.997985531719018e-05, "loss": 0.3705, "step": 583 }, { "epoch": 0.04419847689324062, "grad_norm": 1.125, "learning_rate": 1.9979779681984617e-05, "loss": 0.4036, "step": 584 }, { "epoch": 0.04427415921668795, "grad_norm": 0.96484375, "learning_rate": 1.997970390519875e-05, "loss": 0.3508, "step": 585 }, { "epoch": 0.044349841540135285, "grad_norm": 1.0859375, "learning_rate": 1.997962798683366e-05, "loss": 0.3975, "step": 586 }, { "epoch": 0.04442552386358261, "grad_norm": 0.91015625, "learning_rate": 1.9979551926890418e-05, "loss": 0.3105, "step": 587 }, { "epoch": 0.04450120618702994, "grad_norm": 1.015625, "learning_rate": 1.997947572537011e-05, "loss": 0.3566, "step": 588 }, { "epoch": 0.044576888510477274, "grad_norm": 1.046875, "learning_rate": 1.9979399382273814e-05, "loss": 0.3635, "step": 589 }, { "epoch": 0.0446525708339246, "grad_norm": 0.9453125, "learning_rate": 1.997932289760261e-05, "loss": 0.3219, "step": 590 }, { "epoch": 0.04472825315737193, "grad_norm": 1.0546875, "learning_rate": 1.9979246271357585e-05, "loss": 0.3948, "step": 591 }, { "epoch": 0.04480393548081926, "grad_norm": 1.0546875, "learning_rate": 1.997916950353983e-05, "loss": 0.3301, "step": 592 }, { "epoch": 0.044879617804266594, "grad_norm": 0.93359375, "learning_rate": 1.997909259415043e-05, "loss": 0.3088, "step": 593 }, { "epoch": 0.04495530012771392, "grad_norm": 0.96875, "learning_rate": 1.9979015543190473e-05, "loss": 0.3779, "step": 594 }, { "epoch": 0.04503098245116125, "grad_norm": 1.15625, "learning_rate": 1.9978938350661062e-05, "loss": 0.3463, "step": 595 }, { "epoch": 0.04510666477460858, "grad_norm": 0.9921875, "learning_rate": 1.997886101656328e-05, "loss": 0.352, "step": 596 }, { "epoch": 0.04518234709805591, "grad_norm": 0.94140625, "learning_rate": 1.9978783540898234e-05, "loss": 0.3438, "step": 597 }, { "epoch": 0.04525802942150324, "grad_norm": 0.96875, "learning_rate": 1.997870592366702e-05, "loss": 0.3476, "step": 598 }, { "epoch": 0.04533371174495057, "grad_norm": 0.984375, "learning_rate": 1.9978628164870733e-05, "loss": 0.3411, "step": 599 }, { "epoch": 0.0454093940683979, "grad_norm": 1.0, "learning_rate": 1.9978550264510485e-05, "loss": 0.3547, "step": 600 }, { "epoch": 0.04548507639184523, "grad_norm": 0.89453125, "learning_rate": 1.997847222258738e-05, "loss": 0.3196, "step": 601 }, { "epoch": 0.04556075871529256, "grad_norm": 0.98828125, "learning_rate": 1.997839403910252e-05, "loss": 0.3772, "step": 602 }, { "epoch": 0.045636441038739886, "grad_norm": 0.98828125, "learning_rate": 1.9978315714057017e-05, "loss": 0.3854, "step": 603 }, { "epoch": 0.04571212336218722, "grad_norm": 0.98046875, "learning_rate": 1.9978237247451984e-05, "loss": 0.3589, "step": 604 }, { "epoch": 0.04578780568563455, "grad_norm": 3.09375, "learning_rate": 1.997815863928853e-05, "loss": 0.7429, "step": 605 }, { "epoch": 0.04586348800908188, "grad_norm": 0.9921875, "learning_rate": 1.9978079889567774e-05, "loss": 0.3837, "step": 606 }, { "epoch": 0.04593917033252921, "grad_norm": 0.95703125, "learning_rate": 1.9978000998290833e-05, "loss": 0.3871, "step": 607 }, { "epoch": 0.04601485265597654, "grad_norm": 0.91796875, "learning_rate": 1.9977921965458824e-05, "loss": 0.3267, "step": 608 }, { "epoch": 0.04609053497942387, "grad_norm": 0.96484375, "learning_rate": 1.997784279107287e-05, "loss": 0.335, "step": 609 }, { "epoch": 0.046166217302871196, "grad_norm": 0.94921875, "learning_rate": 1.997776347513409e-05, "loss": 0.3389, "step": 610 }, { "epoch": 0.04624189962631853, "grad_norm": 0.90234375, "learning_rate": 1.9977684017643618e-05, "loss": 0.331, "step": 611 }, { "epoch": 0.04631758194976586, "grad_norm": 0.984375, "learning_rate": 1.997760441860257e-05, "loss": 0.3759, "step": 612 }, { "epoch": 0.046393264273213185, "grad_norm": 1.03125, "learning_rate": 1.9977524678012085e-05, "loss": 0.3465, "step": 613 }, { "epoch": 0.04646894659666052, "grad_norm": 0.9921875, "learning_rate": 1.9977444795873292e-05, "loss": 0.3331, "step": 614 }, { "epoch": 0.04654462892010785, "grad_norm": 0.9140625, "learning_rate": 1.997736477218732e-05, "loss": 0.3442, "step": 615 }, { "epoch": 0.04662031124355518, "grad_norm": 0.95703125, "learning_rate": 1.9977284606955305e-05, "loss": 0.3272, "step": 616 }, { "epoch": 0.046695993567002506, "grad_norm": 1.0078125, "learning_rate": 1.997720430017839e-05, "loss": 0.3522, "step": 617 }, { "epoch": 0.04677167589044984, "grad_norm": 1.0625, "learning_rate": 1.997712385185771e-05, "loss": 0.3632, "step": 618 }, { "epoch": 0.04684735821389717, "grad_norm": 0.9375, "learning_rate": 1.9977043261994405e-05, "loss": 0.3765, "step": 619 }, { "epoch": 0.046923040537344494, "grad_norm": 0.89453125, "learning_rate": 1.997696253058962e-05, "loss": 0.3032, "step": 620 }, { "epoch": 0.046998722860791826, "grad_norm": 12.0, "learning_rate": 1.99768816576445e-05, "loss": 0.7095, "step": 621 }, { "epoch": 0.04707440518423916, "grad_norm": 0.9296875, "learning_rate": 1.9976800643160196e-05, "loss": 0.3323, "step": 622 }, { "epoch": 0.04715008750768648, "grad_norm": 0.9765625, "learning_rate": 1.9976719487137854e-05, "loss": 0.3805, "step": 623 }, { "epoch": 0.047225769831133815, "grad_norm": 0.91796875, "learning_rate": 1.9976638189578623e-05, "loss": 0.3354, "step": 624 }, { "epoch": 0.04730145215458115, "grad_norm": 0.96484375, "learning_rate": 1.9976556750483657e-05, "loss": 0.3668, "step": 625 }, { "epoch": 0.04737713447802847, "grad_norm": 1.109375, "learning_rate": 1.9976475169854117e-05, "loss": 0.4031, "step": 626 }, { "epoch": 0.047452816801475804, "grad_norm": 0.9375, "learning_rate": 1.9976393447691155e-05, "loss": 0.3179, "step": 627 }, { "epoch": 0.047528499124923136, "grad_norm": 0.89453125, "learning_rate": 1.9976311583995933e-05, "loss": 0.325, "step": 628 }, { "epoch": 0.04760418144837047, "grad_norm": 0.94140625, "learning_rate": 1.997622957876961e-05, "loss": 0.3471, "step": 629 }, { "epoch": 0.04767986377181779, "grad_norm": 0.91796875, "learning_rate": 1.997614743201335e-05, "loss": 0.3324, "step": 630 }, { "epoch": 0.047755546095265125, "grad_norm": 1.25, "learning_rate": 1.997606514372832e-05, "loss": 0.3747, "step": 631 }, { "epoch": 0.04783122841871246, "grad_norm": 1.1328125, "learning_rate": 1.9975982713915686e-05, "loss": 0.3654, "step": 632 }, { "epoch": 0.04790691074215978, "grad_norm": 0.99609375, "learning_rate": 1.9975900142576617e-05, "loss": 0.4023, "step": 633 }, { "epoch": 0.047982593065607114, "grad_norm": 1.0234375, "learning_rate": 1.997581742971229e-05, "loss": 0.3849, "step": 634 }, { "epoch": 0.048058275389054446, "grad_norm": 0.91796875, "learning_rate": 1.997573457532387e-05, "loss": 0.3286, "step": 635 }, { "epoch": 0.04813395771250177, "grad_norm": 0.90234375, "learning_rate": 1.9975651579412534e-05, "loss": 0.3205, "step": 636 }, { "epoch": 0.0482096400359491, "grad_norm": 1.0546875, "learning_rate": 1.9975568441979462e-05, "loss": 0.4286, "step": 637 }, { "epoch": 0.048285322359396435, "grad_norm": 0.94921875, "learning_rate": 1.9975485163025837e-05, "loss": 0.3399, "step": 638 }, { "epoch": 0.04836100468284377, "grad_norm": 0.95703125, "learning_rate": 1.9975401742552834e-05, "loss": 0.3474, "step": 639 }, { "epoch": 0.04843668700629109, "grad_norm": 1.0234375, "learning_rate": 1.9975318180561638e-05, "loss": 0.3448, "step": 640 }, { "epoch": 0.048512369329738424, "grad_norm": 0.9609375, "learning_rate": 1.997523447705344e-05, "loss": 0.3819, "step": 641 }, { "epoch": 0.048588051653185756, "grad_norm": 1.046875, "learning_rate": 1.9975150632029417e-05, "loss": 0.3947, "step": 642 }, { "epoch": 0.04866373397663308, "grad_norm": 1.015625, "learning_rate": 1.9975066645490766e-05, "loss": 0.3544, "step": 643 }, { "epoch": 0.04873941630008041, "grad_norm": 1.0078125, "learning_rate": 1.997498251743868e-05, "loss": 0.3741, "step": 644 }, { "epoch": 0.048815098623527745, "grad_norm": 0.92578125, "learning_rate": 1.9974898247874345e-05, "loss": 0.3593, "step": 645 }, { "epoch": 0.04889078094697507, "grad_norm": 1.0, "learning_rate": 1.997481383679896e-05, "loss": 0.3965, "step": 646 }, { "epoch": 0.0489664632704224, "grad_norm": 0.9453125, "learning_rate": 1.997472928421373e-05, "loss": 0.3509, "step": 647 }, { "epoch": 0.04904214559386973, "grad_norm": 1.0, "learning_rate": 1.9974644590119842e-05, "loss": 0.3865, "step": 648 }, { "epoch": 0.04911782791731706, "grad_norm": 2.03125, "learning_rate": 1.9974559754518506e-05, "loss": 0.6595, "step": 649 }, { "epoch": 0.04919351024076439, "grad_norm": 2.046875, "learning_rate": 1.997447477741092e-05, "loss": 0.7222, "step": 650 }, { "epoch": 0.04926919256421172, "grad_norm": 1.078125, "learning_rate": 1.9974389658798296e-05, "loss": 0.3729, "step": 651 }, { "epoch": 0.049344874887659054, "grad_norm": 0.90234375, "learning_rate": 1.9974304398681837e-05, "loss": 0.3121, "step": 652 }, { "epoch": 0.04942055721110638, "grad_norm": 0.97265625, "learning_rate": 1.9974218997062752e-05, "loss": 0.3384, "step": 653 }, { "epoch": 0.04949623953455371, "grad_norm": 0.98046875, "learning_rate": 1.9974133453942256e-05, "loss": 0.3714, "step": 654 }, { "epoch": 0.04957192185800104, "grad_norm": 0.98046875, "learning_rate": 1.997404776932156e-05, "loss": 0.3566, "step": 655 }, { "epoch": 0.04964760418144837, "grad_norm": 0.96484375, "learning_rate": 1.9973961943201882e-05, "loss": 0.3329, "step": 656 }, { "epoch": 0.0497232865048957, "grad_norm": 0.94921875, "learning_rate": 1.9973875975584434e-05, "loss": 0.373, "step": 657 }, { "epoch": 0.04979896882834303, "grad_norm": 0.9375, "learning_rate": 1.9973789866470444e-05, "loss": 0.3513, "step": 658 }, { "epoch": 0.04987465115179036, "grad_norm": 1.0546875, "learning_rate": 1.997370361586113e-05, "loss": 0.379, "step": 659 }, { "epoch": 0.04995033347523769, "grad_norm": 0.9375, "learning_rate": 1.997361722375771e-05, "loss": 0.3525, "step": 660 }, { "epoch": 0.05002601579868502, "grad_norm": 0.96484375, "learning_rate": 1.9973530690161415e-05, "loss": 0.3668, "step": 661 }, { "epoch": 0.05010169812213235, "grad_norm": 0.921875, "learning_rate": 1.9973444015073474e-05, "loss": 0.3448, "step": 662 }, { "epoch": 0.05017738044557968, "grad_norm": 0.9296875, "learning_rate": 1.9973357198495114e-05, "loss": 0.3359, "step": 663 }, { "epoch": 0.05025306276902701, "grad_norm": 0.94921875, "learning_rate": 1.9973270240427566e-05, "loss": 0.3524, "step": 664 }, { "epoch": 0.05032874509247434, "grad_norm": 3.5, "learning_rate": 1.997318314087207e-05, "loss": 0.7076, "step": 665 }, { "epoch": 0.05040442741592167, "grad_norm": 1.140625, "learning_rate": 1.997309589982985e-05, "loss": 0.4001, "step": 666 }, { "epoch": 0.050480109739369, "grad_norm": 1.1171875, "learning_rate": 1.9973008517302156e-05, "loss": 0.3966, "step": 667 }, { "epoch": 0.05055579206281633, "grad_norm": 0.890625, "learning_rate": 1.9972920993290216e-05, "loss": 0.3251, "step": 668 }, { "epoch": 0.050631474386263656, "grad_norm": 1.078125, "learning_rate": 1.997283332779528e-05, "loss": 0.3685, "step": 669 }, { "epoch": 0.05070715670971099, "grad_norm": 1.1328125, "learning_rate": 1.997274552081859e-05, "loss": 0.3826, "step": 670 }, { "epoch": 0.05078283903315832, "grad_norm": 1.0078125, "learning_rate": 1.997265757236139e-05, "loss": 0.3484, "step": 671 }, { "epoch": 0.050858521356605645, "grad_norm": 3.046875, "learning_rate": 1.997256948242493e-05, "loss": 0.5781, "step": 672 }, { "epoch": 0.05093420368005298, "grad_norm": 0.96875, "learning_rate": 1.997248125101046e-05, "loss": 0.3697, "step": 673 }, { "epoch": 0.05100988600350031, "grad_norm": 1.0, "learning_rate": 1.9972392878119226e-05, "loss": 0.3601, "step": 674 }, { "epoch": 0.05108556832694764, "grad_norm": 0.94921875, "learning_rate": 1.9972304363752488e-05, "loss": 0.3388, "step": 675 }, { "epoch": 0.051161250650394965, "grad_norm": 0.97265625, "learning_rate": 1.99722157079115e-05, "loss": 0.3548, "step": 676 }, { "epoch": 0.0512369329738423, "grad_norm": 0.984375, "learning_rate": 1.9972126910597518e-05, "loss": 0.3378, "step": 677 }, { "epoch": 0.05131261529728963, "grad_norm": 1.0078125, "learning_rate": 1.9972037971811802e-05, "loss": 0.3634, "step": 678 }, { "epoch": 0.051388297620736954, "grad_norm": 0.96484375, "learning_rate": 1.9971948891555617e-05, "loss": 0.3471, "step": 679 }, { "epoch": 0.051463979944184286, "grad_norm": 0.98828125, "learning_rate": 1.997185966983022e-05, "loss": 0.3782, "step": 680 }, { "epoch": 0.05153966226763162, "grad_norm": 0.98046875, "learning_rate": 1.997177030663689e-05, "loss": 0.3359, "step": 681 }, { "epoch": 0.05161534459107894, "grad_norm": 1.0, "learning_rate": 1.9971680801976882e-05, "loss": 0.3719, "step": 682 }, { "epoch": 0.051691026914526275, "grad_norm": 3.4375, "learning_rate": 1.997159115585147e-05, "loss": 0.7382, "step": 683 }, { "epoch": 0.05176670923797361, "grad_norm": 0.98046875, "learning_rate": 1.9971501368261922e-05, "loss": 0.386, "step": 684 }, { "epoch": 0.05184239156142094, "grad_norm": 1.0703125, "learning_rate": 1.9971411439209517e-05, "loss": 0.3596, "step": 685 }, { "epoch": 0.051918073884868264, "grad_norm": 0.921875, "learning_rate": 1.997132136869553e-05, "loss": 0.3097, "step": 686 }, { "epoch": 0.051993756208315596, "grad_norm": 1.046875, "learning_rate": 1.997123115672124e-05, "loss": 0.3735, "step": 687 }, { "epoch": 0.05206943853176293, "grad_norm": 0.9453125, "learning_rate": 1.997114080328792e-05, "loss": 0.3549, "step": 688 }, { "epoch": 0.05214512085521025, "grad_norm": 0.9375, "learning_rate": 1.9971050308396862e-05, "loss": 0.3107, "step": 689 }, { "epoch": 0.052220803178657585, "grad_norm": 1.015625, "learning_rate": 1.997095967204934e-05, "loss": 0.3893, "step": 690 }, { "epoch": 0.05229648550210492, "grad_norm": 0.97265625, "learning_rate": 1.9970868894246646e-05, "loss": 0.3742, "step": 691 }, { "epoch": 0.05237216782555224, "grad_norm": 1.0234375, "learning_rate": 1.997077797499007e-05, "loss": 0.3691, "step": 692 }, { "epoch": 0.052447850148999574, "grad_norm": 0.97265625, "learning_rate": 1.9970686914280892e-05, "loss": 0.3697, "step": 693 }, { "epoch": 0.052523532472446906, "grad_norm": 3.203125, "learning_rate": 1.9970595712120414e-05, "loss": 0.6165, "step": 694 }, { "epoch": 0.05259921479589423, "grad_norm": 1.0859375, "learning_rate": 1.9970504368509923e-05, "loss": 0.3582, "step": 695 }, { "epoch": 0.05267489711934156, "grad_norm": 0.99609375, "learning_rate": 1.997041288345072e-05, "loss": 0.3371, "step": 696 }, { "epoch": 0.052750579442788895, "grad_norm": 1.0, "learning_rate": 1.99703212569441e-05, "loss": 0.3712, "step": 697 }, { "epoch": 0.05282626176623623, "grad_norm": 0.94140625, "learning_rate": 1.997022948899136e-05, "loss": 0.3466, "step": 698 }, { "epoch": 0.05290194408968355, "grad_norm": 0.98828125, "learning_rate": 1.9970137579593805e-05, "loss": 0.3577, "step": 699 }, { "epoch": 0.052977626413130884, "grad_norm": 0.95703125, "learning_rate": 1.9970045528752745e-05, "loss": 0.3489, "step": 700 }, { "epoch": 0.053053308736578215, "grad_norm": 1.140625, "learning_rate": 1.9969953336469478e-05, "loss": 0.4045, "step": 701 }, { "epoch": 0.05312899106002554, "grad_norm": 0.94921875, "learning_rate": 1.9969861002745312e-05, "loss": 0.3923, "step": 702 }, { "epoch": 0.05320467338347287, "grad_norm": 0.9609375, "learning_rate": 1.9969768527581558e-05, "loss": 0.3333, "step": 703 }, { "epoch": 0.053280355706920204, "grad_norm": 0.91015625, "learning_rate": 1.996967591097953e-05, "loss": 0.3249, "step": 704 }, { "epoch": 0.05335603803036753, "grad_norm": 1.0, "learning_rate": 1.9969583152940543e-05, "loss": 0.3935, "step": 705 }, { "epoch": 0.05343172035381486, "grad_norm": 1.0078125, "learning_rate": 1.9969490253465907e-05, "loss": 0.3692, "step": 706 }, { "epoch": 0.05350740267726219, "grad_norm": 0.9765625, "learning_rate": 1.9969397212556942e-05, "loss": 0.3336, "step": 707 }, { "epoch": 0.053583085000709525, "grad_norm": 0.921875, "learning_rate": 1.9969304030214976e-05, "loss": 0.3274, "step": 708 }, { "epoch": 0.05365876732415685, "grad_norm": 0.921875, "learning_rate": 1.996921070644132e-05, "loss": 0.3342, "step": 709 }, { "epoch": 0.05373444964760418, "grad_norm": 1.015625, "learning_rate": 1.9969117241237302e-05, "loss": 0.3318, "step": 710 }, { "epoch": 0.053810131971051514, "grad_norm": 1.015625, "learning_rate": 1.996902363460425e-05, "loss": 0.3801, "step": 711 }, { "epoch": 0.05388581429449884, "grad_norm": 1.0859375, "learning_rate": 1.9968929886543487e-05, "loss": 0.3571, "step": 712 }, { "epoch": 0.05396149661794617, "grad_norm": 1.046875, "learning_rate": 1.996883599705635e-05, "loss": 0.3676, "step": 713 }, { "epoch": 0.0540371789413935, "grad_norm": 0.92578125, "learning_rate": 1.9968741966144167e-05, "loss": 0.3179, "step": 714 }, { "epoch": 0.05411286126484083, "grad_norm": 0.96875, "learning_rate": 1.9968647793808272e-05, "loss": 0.3376, "step": 715 }, { "epoch": 0.05418854358828816, "grad_norm": 1.0, "learning_rate": 1.9968553480050002e-05, "loss": 0.3683, "step": 716 }, { "epoch": 0.05426422591173549, "grad_norm": 0.984375, "learning_rate": 1.996845902487069e-05, "loss": 0.3555, "step": 717 }, { "epoch": 0.05433990823518282, "grad_norm": 4.25, "learning_rate": 1.9968364428271684e-05, "loss": 0.7415, "step": 718 }, { "epoch": 0.05441559055863015, "grad_norm": 0.890625, "learning_rate": 1.9968269690254323e-05, "loss": 0.316, "step": 719 }, { "epoch": 0.05449127288207748, "grad_norm": 0.9296875, "learning_rate": 1.996817481081995e-05, "loss": 0.3415, "step": 720 }, { "epoch": 0.05456695520552481, "grad_norm": 0.8984375, "learning_rate": 1.996807978996991e-05, "loss": 0.3331, "step": 721 }, { "epoch": 0.05464263752897214, "grad_norm": 0.9375, "learning_rate": 1.996798462770555e-05, "loss": 0.3407, "step": 722 }, { "epoch": 0.05471831985241947, "grad_norm": 1.0625, "learning_rate": 1.9967889324028228e-05, "loss": 0.3799, "step": 723 }, { "epoch": 0.0547940021758668, "grad_norm": 0.94140625, "learning_rate": 1.9967793878939286e-05, "loss": 0.3156, "step": 724 }, { "epoch": 0.05486968449931413, "grad_norm": 0.96484375, "learning_rate": 1.9967698292440083e-05, "loss": 0.349, "step": 725 }, { "epoch": 0.05494536682276146, "grad_norm": 0.953125, "learning_rate": 1.9967602564531976e-05, "loss": 0.3379, "step": 726 }, { "epoch": 0.05502104914620879, "grad_norm": 0.95703125, "learning_rate": 1.996750669521632e-05, "loss": 0.3445, "step": 727 }, { "epoch": 0.055096731469656116, "grad_norm": 0.92578125, "learning_rate": 1.996741068449448e-05, "loss": 0.3721, "step": 728 }, { "epoch": 0.05517241379310345, "grad_norm": 0.95703125, "learning_rate": 1.9967314532367813e-05, "loss": 0.3768, "step": 729 }, { "epoch": 0.05524809611655078, "grad_norm": 0.89453125, "learning_rate": 1.9967218238837684e-05, "loss": 0.3101, "step": 730 }, { "epoch": 0.05532377843999811, "grad_norm": 3.75, "learning_rate": 1.996712180390546e-05, "loss": 0.7723, "step": 731 }, { "epoch": 0.055399460763445436, "grad_norm": 0.94140625, "learning_rate": 1.996702522757251e-05, "loss": 0.3446, "step": 732 }, { "epoch": 0.05547514308689277, "grad_norm": 0.9375, "learning_rate": 1.9966928509840204e-05, "loss": 0.3165, "step": 733 }, { "epoch": 0.0555508254103401, "grad_norm": 0.9609375, "learning_rate": 1.996683165070991e-05, "loss": 0.3451, "step": 734 }, { "epoch": 0.055626507733787425, "grad_norm": 0.88671875, "learning_rate": 1.9966734650183008e-05, "loss": 0.292, "step": 735 }, { "epoch": 0.05570219005723476, "grad_norm": 0.91796875, "learning_rate": 1.996663750826087e-05, "loss": 0.3305, "step": 736 }, { "epoch": 0.05577787238068209, "grad_norm": 0.96875, "learning_rate": 1.996654022494488e-05, "loss": 0.387, "step": 737 }, { "epoch": 0.055853554704129414, "grad_norm": 0.96484375, "learning_rate": 1.9966442800236407e-05, "loss": 0.3408, "step": 738 }, { "epoch": 0.055929237027576746, "grad_norm": 1.015625, "learning_rate": 1.9966345234136843e-05, "loss": 0.4074, "step": 739 }, { "epoch": 0.05600491935102408, "grad_norm": 0.921875, "learning_rate": 1.9966247526647567e-05, "loss": 0.3149, "step": 740 }, { "epoch": 0.0560806016744714, "grad_norm": 0.91015625, "learning_rate": 1.996614967776997e-05, "loss": 0.3351, "step": 741 }, { "epoch": 0.056156283997918735, "grad_norm": 0.9140625, "learning_rate": 1.9966051687505436e-05, "loss": 0.304, "step": 742 }, { "epoch": 0.05623196632136607, "grad_norm": 0.94921875, "learning_rate": 1.9965953555855356e-05, "loss": 0.3264, "step": 743 }, { "epoch": 0.0563076486448134, "grad_norm": 0.9609375, "learning_rate": 1.9965855282821125e-05, "loss": 0.3736, "step": 744 }, { "epoch": 0.056383330968260724, "grad_norm": 0.9140625, "learning_rate": 1.9965756868404135e-05, "loss": 0.3301, "step": 745 }, { "epoch": 0.056459013291708056, "grad_norm": 1.046875, "learning_rate": 1.996565831260578e-05, "loss": 0.3905, "step": 746 }, { "epoch": 0.05653469561515539, "grad_norm": 0.9453125, "learning_rate": 1.996555961542746e-05, "loss": 0.3321, "step": 747 }, { "epoch": 0.05661037793860271, "grad_norm": 7.375, "learning_rate": 1.9965460776870577e-05, "loss": 0.6833, "step": 748 }, { "epoch": 0.056686060262050045, "grad_norm": 0.984375, "learning_rate": 1.996536179693653e-05, "loss": 0.3706, "step": 749 }, { "epoch": 0.05676174258549738, "grad_norm": 1.0078125, "learning_rate": 1.9965262675626726e-05, "loss": 0.3832, "step": 750 }, { "epoch": 0.0568374249089447, "grad_norm": 0.96484375, "learning_rate": 1.9965163412942567e-05, "loss": 0.3638, "step": 751 }, { "epoch": 0.056913107232392034, "grad_norm": 0.97265625, "learning_rate": 1.9965064008885465e-05, "loss": 0.3278, "step": 752 }, { "epoch": 0.056988789555839366, "grad_norm": 0.91796875, "learning_rate": 1.9964964463456833e-05, "loss": 0.3442, "step": 753 }, { "epoch": 0.0570644718792867, "grad_norm": 1.1796875, "learning_rate": 1.9964864776658076e-05, "loss": 0.3222, "step": 754 }, { "epoch": 0.05714015420273402, "grad_norm": 0.99609375, "learning_rate": 1.9964764948490615e-05, "loss": 0.375, "step": 755 }, { "epoch": 0.057215836526181355, "grad_norm": 0.99609375, "learning_rate": 1.9964664978955863e-05, "loss": 0.359, "step": 756 }, { "epoch": 0.057291518849628686, "grad_norm": 0.9609375, "learning_rate": 1.9964564868055234e-05, "loss": 0.3796, "step": 757 }, { "epoch": 0.05736720117307601, "grad_norm": 0.9921875, "learning_rate": 1.9964464615790154e-05, "loss": 0.3428, "step": 758 }, { "epoch": 0.05744288349652334, "grad_norm": 1.078125, "learning_rate": 1.9964364222162047e-05, "loss": 0.3479, "step": 759 }, { "epoch": 0.057518565819970675, "grad_norm": 0.9140625, "learning_rate": 1.996426368717233e-05, "loss": 0.3721, "step": 760 }, { "epoch": 0.057594248143418, "grad_norm": 0.9375, "learning_rate": 1.9964163010822438e-05, "loss": 0.3827, "step": 761 }, { "epoch": 0.05766993046686533, "grad_norm": 1.046875, "learning_rate": 1.996406219311379e-05, "loss": 0.3407, "step": 762 }, { "epoch": 0.057745612790312664, "grad_norm": 1.0625, "learning_rate": 1.9963961234047825e-05, "loss": 0.3939, "step": 763 }, { "epoch": 0.05782129511375999, "grad_norm": 1.0859375, "learning_rate": 1.9963860133625967e-05, "loss": 0.4425, "step": 764 }, { "epoch": 0.05789697743720732, "grad_norm": 0.9609375, "learning_rate": 1.996375889184966e-05, "loss": 0.3547, "step": 765 }, { "epoch": 0.05797265976065465, "grad_norm": 1.0078125, "learning_rate": 1.996365750872033e-05, "loss": 0.3436, "step": 766 }, { "epoch": 0.058048342084101985, "grad_norm": 1.015625, "learning_rate": 1.996355598423942e-05, "loss": 0.3786, "step": 767 }, { "epoch": 0.05812402440754931, "grad_norm": 0.96875, "learning_rate": 1.9963454318408372e-05, "loss": 0.3625, "step": 768 }, { "epoch": 0.05819970673099664, "grad_norm": 0.9296875, "learning_rate": 1.9963352511228627e-05, "loss": 0.3565, "step": 769 }, { "epoch": 0.058275389054443974, "grad_norm": 0.96875, "learning_rate": 1.9963250562701624e-05, "loss": 0.3537, "step": 770 }, { "epoch": 0.0583510713778913, "grad_norm": 1.0703125, "learning_rate": 1.996314847282882e-05, "loss": 0.3762, "step": 771 }, { "epoch": 0.05842675370133863, "grad_norm": 0.95703125, "learning_rate": 1.996304624161166e-05, "loss": 0.3667, "step": 772 }, { "epoch": 0.05850243602478596, "grad_norm": 1.09375, "learning_rate": 1.9962943869051585e-05, "loss": 0.3633, "step": 773 }, { "epoch": 0.05857811834823329, "grad_norm": 0.984375, "learning_rate": 1.996284135515006e-05, "loss": 0.374, "step": 774 }, { "epoch": 0.05865380067168062, "grad_norm": 0.98046875, "learning_rate": 1.9962738699908527e-05, "loss": 0.3673, "step": 775 }, { "epoch": 0.05872948299512795, "grad_norm": 0.90234375, "learning_rate": 1.9962635903328454e-05, "loss": 0.3383, "step": 776 }, { "epoch": 0.058805165318575284, "grad_norm": 1.0, "learning_rate": 1.9962532965411292e-05, "loss": 0.373, "step": 777 }, { "epoch": 0.05888084764202261, "grad_norm": 0.99609375, "learning_rate": 1.9962429886158503e-05, "loss": 0.3769, "step": 778 }, { "epoch": 0.05895652996546994, "grad_norm": 1.1328125, "learning_rate": 1.9962326665571556e-05, "loss": 0.3535, "step": 779 }, { "epoch": 0.05903221228891727, "grad_norm": 0.9765625, "learning_rate": 1.99622233036519e-05, "loss": 0.353, "step": 780 }, { "epoch": 0.0591078946123646, "grad_norm": 0.9140625, "learning_rate": 1.9962119800401017e-05, "loss": 0.3396, "step": 781 }, { "epoch": 0.05918357693581193, "grad_norm": 3.03125, "learning_rate": 1.9962016155820367e-05, "loss": 0.7116, "step": 782 }, { "epoch": 0.05925925925925926, "grad_norm": 1.0390625, "learning_rate": 1.9961912369911423e-05, "loss": 0.3844, "step": 783 }, { "epoch": 0.059334941582706587, "grad_norm": 0.9921875, "learning_rate": 1.9961808442675655e-05, "loss": 0.4052, "step": 784 }, { "epoch": 0.05941062390615392, "grad_norm": 0.9296875, "learning_rate": 1.996170437411454e-05, "loss": 0.3547, "step": 785 }, { "epoch": 0.05948630622960125, "grad_norm": 1.03125, "learning_rate": 1.9961600164229557e-05, "loss": 0.3654, "step": 786 }, { "epoch": 0.059561988553048575, "grad_norm": 0.97265625, "learning_rate": 1.9961495813022177e-05, "loss": 0.3551, "step": 787 }, { "epoch": 0.05963767087649591, "grad_norm": 1.109375, "learning_rate": 1.9961391320493886e-05, "loss": 0.3956, "step": 788 }, { "epoch": 0.05971335319994324, "grad_norm": 1.7734375, "learning_rate": 1.9961286686646164e-05, "loss": 0.38, "step": 789 }, { "epoch": 0.05978903552339057, "grad_norm": 0.92578125, "learning_rate": 1.9961181911480495e-05, "loss": 0.3607, "step": 790 }, { "epoch": 0.059864717846837896, "grad_norm": 1.0078125, "learning_rate": 1.996107699499837e-05, "loss": 0.4053, "step": 791 }, { "epoch": 0.05994040017028523, "grad_norm": 0.98046875, "learning_rate": 1.9960971937201273e-05, "loss": 0.3192, "step": 792 }, { "epoch": 0.06001608249373256, "grad_norm": 0.90625, "learning_rate": 1.9960866738090694e-05, "loss": 0.3236, "step": 793 }, { "epoch": 0.060091764817179885, "grad_norm": 0.92578125, "learning_rate": 1.996076139766813e-05, "loss": 0.362, "step": 794 }, { "epoch": 0.06016744714062722, "grad_norm": 0.9296875, "learning_rate": 1.9960655915935066e-05, "loss": 0.3447, "step": 795 }, { "epoch": 0.06024312946407455, "grad_norm": 2.265625, "learning_rate": 1.9960550292893012e-05, "loss": 0.6127, "step": 796 }, { "epoch": 0.060318811787521874, "grad_norm": 2.796875, "learning_rate": 1.9960444528543453e-05, "loss": 0.5918, "step": 797 }, { "epoch": 0.060394494110969206, "grad_norm": 1.0546875, "learning_rate": 1.99603386228879e-05, "loss": 0.3917, "step": 798 }, { "epoch": 0.06047017643441654, "grad_norm": 0.98046875, "learning_rate": 1.996023257592785e-05, "loss": 0.3538, "step": 799 }, { "epoch": 0.06054585875786386, "grad_norm": 0.9140625, "learning_rate": 1.9960126387664808e-05, "loss": 0.3274, "step": 800 }, { "epoch": 0.060621541081311195, "grad_norm": 0.921875, "learning_rate": 1.996002005810028e-05, "loss": 0.314, "step": 801 }, { "epoch": 0.06069722340475853, "grad_norm": 1.0625, "learning_rate": 1.9959913587235776e-05, "loss": 0.3665, "step": 802 }, { "epoch": 0.06077290572820586, "grad_norm": 0.91796875, "learning_rate": 1.9959806975072807e-05, "loss": 0.3456, "step": 803 }, { "epoch": 0.060848588051653184, "grad_norm": 0.97265625, "learning_rate": 1.9959700221612887e-05, "loss": 0.3403, "step": 804 }, { "epoch": 0.060924270375100516, "grad_norm": 0.9140625, "learning_rate": 1.9959593326857524e-05, "loss": 0.3457, "step": 805 }, { "epoch": 0.06099995269854785, "grad_norm": 1.0234375, "learning_rate": 1.995948629080824e-05, "loss": 0.3843, "step": 806 }, { "epoch": 0.06107563502199517, "grad_norm": 1.0, "learning_rate": 1.995937911346655e-05, "loss": 0.3499, "step": 807 }, { "epoch": 0.061151317345442505, "grad_norm": 0.984375, "learning_rate": 1.9959271794833977e-05, "loss": 0.3667, "step": 808 }, { "epoch": 0.06122699966888984, "grad_norm": 0.93359375, "learning_rate": 1.9959164334912045e-05, "loss": 0.3389, "step": 809 }, { "epoch": 0.06130268199233716, "grad_norm": 0.98046875, "learning_rate": 1.9959056733702273e-05, "loss": 0.3368, "step": 810 }, { "epoch": 0.061378364315784494, "grad_norm": 0.9375, "learning_rate": 1.9958948991206192e-05, "loss": 0.3538, "step": 811 }, { "epoch": 0.061454046639231825, "grad_norm": 0.89453125, "learning_rate": 1.9958841107425335e-05, "loss": 0.2851, "step": 812 }, { "epoch": 0.06152972896267916, "grad_norm": 0.9375, "learning_rate": 1.995873308236122e-05, "loss": 0.3277, "step": 813 }, { "epoch": 0.06160541128612648, "grad_norm": 1.0, "learning_rate": 1.995862491601539e-05, "loss": 0.377, "step": 814 }, { "epoch": 0.061681093609573814, "grad_norm": 0.87890625, "learning_rate": 1.9958516608389372e-05, "loss": 0.3406, "step": 815 }, { "epoch": 0.061756775933021146, "grad_norm": 0.98828125, "learning_rate": 1.995840815948471e-05, "loss": 0.4069, "step": 816 }, { "epoch": 0.06183245825646847, "grad_norm": 0.97265625, "learning_rate": 1.995829956930294e-05, "loss": 0.3271, "step": 817 }, { "epoch": 0.0619081405799158, "grad_norm": 0.96484375, "learning_rate": 1.99581908378456e-05, "loss": 0.3837, "step": 818 }, { "epoch": 0.061983822903363135, "grad_norm": 1.0078125, "learning_rate": 1.9958081965114232e-05, "loss": 0.4003, "step": 819 }, { "epoch": 0.06205950522681046, "grad_norm": 0.89453125, "learning_rate": 1.9957972951110384e-05, "loss": 0.3105, "step": 820 }, { "epoch": 0.06213518755025779, "grad_norm": 1.5078125, "learning_rate": 1.9957863795835602e-05, "loss": 0.3835, "step": 821 }, { "epoch": 0.062210869873705124, "grad_norm": 0.91015625, "learning_rate": 1.9957754499291436e-05, "loss": 0.3269, "step": 822 }, { "epoch": 0.06228655219715245, "grad_norm": 0.9296875, "learning_rate": 1.995764506147943e-05, "loss": 0.3502, "step": 823 }, { "epoch": 0.06236223452059978, "grad_norm": 0.97265625, "learning_rate": 1.9957535482401146e-05, "loss": 0.3756, "step": 824 }, { "epoch": 0.06243791684404711, "grad_norm": 0.95703125, "learning_rate": 1.995742576205813e-05, "loss": 0.3842, "step": 825 }, { "epoch": 0.06251359916749444, "grad_norm": 0.96484375, "learning_rate": 1.9957315900451938e-05, "loss": 0.3475, "step": 826 }, { "epoch": 0.06258928149094177, "grad_norm": 0.8515625, "learning_rate": 1.995720589758414e-05, "loss": 0.3309, "step": 827 }, { "epoch": 0.0626649638143891, "grad_norm": 0.984375, "learning_rate": 1.9957095753456287e-05, "loss": 0.3971, "step": 828 }, { "epoch": 0.06274064613783643, "grad_norm": 0.8671875, "learning_rate": 1.995698546806994e-05, "loss": 0.3163, "step": 829 }, { "epoch": 0.06281632846128377, "grad_norm": 0.9296875, "learning_rate": 1.995687504142667e-05, "loss": 0.3609, "step": 830 }, { "epoch": 0.0628920107847311, "grad_norm": 0.9609375, "learning_rate": 1.9956764473528043e-05, "loss": 0.3509, "step": 831 }, { "epoch": 0.06296769310817842, "grad_norm": 0.984375, "learning_rate": 1.9956653764375623e-05, "loss": 0.3752, "step": 832 }, { "epoch": 0.06304337543162575, "grad_norm": 0.921875, "learning_rate": 1.9956542913970986e-05, "loss": 0.3339, "step": 833 }, { "epoch": 0.06311905775507308, "grad_norm": 1.015625, "learning_rate": 1.9956431922315696e-05, "loss": 0.4245, "step": 834 }, { "epoch": 0.06319474007852041, "grad_norm": 0.953125, "learning_rate": 1.9956320789411338e-05, "loss": 0.367, "step": 835 }, { "epoch": 0.06327042240196774, "grad_norm": 0.89453125, "learning_rate": 1.9956209515259485e-05, "loss": 0.295, "step": 836 }, { "epoch": 0.06334610472541508, "grad_norm": 0.984375, "learning_rate": 1.9956098099861713e-05, "loss": 0.3758, "step": 837 }, { "epoch": 0.0634217870488624, "grad_norm": 0.99609375, "learning_rate": 1.9955986543219602e-05, "loss": 0.3536, "step": 838 }, { "epoch": 0.06349746937230973, "grad_norm": 9.625, "learning_rate": 1.995587484533474e-05, "loss": 0.697, "step": 839 }, { "epoch": 0.06357315169575706, "grad_norm": 0.890625, "learning_rate": 1.995576300620871e-05, "loss": 0.2943, "step": 840 }, { "epoch": 0.06364883401920439, "grad_norm": 0.921875, "learning_rate": 1.9955651025843092e-05, "loss": 0.3064, "step": 841 }, { "epoch": 0.06372451634265172, "grad_norm": 0.95703125, "learning_rate": 1.9955538904239482e-05, "loss": 0.3422, "step": 842 }, { "epoch": 0.06380019866609905, "grad_norm": 1.0234375, "learning_rate": 1.9955426641399467e-05, "loss": 0.3537, "step": 843 }, { "epoch": 0.06387588098954639, "grad_norm": 0.98828125, "learning_rate": 1.9955314237324642e-05, "loss": 0.3525, "step": 844 }, { "epoch": 0.0639515633129937, "grad_norm": 1.0390625, "learning_rate": 1.99552016920166e-05, "loss": 0.3637, "step": 845 }, { "epoch": 0.06402724563644104, "grad_norm": 0.890625, "learning_rate": 1.995508900547694e-05, "loss": 0.322, "step": 846 }, { "epoch": 0.06410292795988837, "grad_norm": 1.03125, "learning_rate": 1.9954976177707257e-05, "loss": 0.4191, "step": 847 }, { "epoch": 0.0641786102833357, "grad_norm": 0.90625, "learning_rate": 1.9954863208709155e-05, "loss": 0.3535, "step": 848 }, { "epoch": 0.06425429260678303, "grad_norm": 2.40625, "learning_rate": 1.9954750098484233e-05, "loss": 0.669, "step": 849 }, { "epoch": 0.06432997493023036, "grad_norm": 0.97265625, "learning_rate": 1.9954636847034105e-05, "loss": 0.3541, "step": 850 }, { "epoch": 0.06440565725367768, "grad_norm": 0.98828125, "learning_rate": 1.9954523454360365e-05, "loss": 0.3581, "step": 851 }, { "epoch": 0.06448133957712501, "grad_norm": 1.125, "learning_rate": 1.9954409920464627e-05, "loss": 0.3801, "step": 852 }, { "epoch": 0.06455702190057235, "grad_norm": 0.8984375, "learning_rate": 1.9954296245348503e-05, "loss": 0.3088, "step": 853 }, { "epoch": 0.06463270422401968, "grad_norm": 1.0078125, "learning_rate": 1.9954182429013606e-05, "loss": 0.3835, "step": 854 }, { "epoch": 0.06470838654746701, "grad_norm": 1.0390625, "learning_rate": 1.9954068471461542e-05, "loss": 0.3441, "step": 855 }, { "epoch": 0.06478406887091434, "grad_norm": 0.953125, "learning_rate": 1.9953954372693944e-05, "loss": 0.3678, "step": 856 }, { "epoch": 0.06485975119436167, "grad_norm": 0.91796875, "learning_rate": 1.9953840132712415e-05, "loss": 0.3583, "step": 857 }, { "epoch": 0.06493543351780899, "grad_norm": 0.94921875, "learning_rate": 1.9953725751518588e-05, "loss": 0.3647, "step": 858 }, { "epoch": 0.06501111584125632, "grad_norm": 1.0390625, "learning_rate": 1.995361122911408e-05, "loss": 0.418, "step": 859 }, { "epoch": 0.06508679816470365, "grad_norm": 0.9296875, "learning_rate": 1.995349656550051e-05, "loss": 0.3494, "step": 860 }, { "epoch": 0.06516248048815099, "grad_norm": 0.85546875, "learning_rate": 1.9953381760679513e-05, "loss": 0.2929, "step": 861 }, { "epoch": 0.06523816281159832, "grad_norm": 0.921875, "learning_rate": 1.9953266814652715e-05, "loss": 0.2844, "step": 862 }, { "epoch": 0.06531384513504565, "grad_norm": 1.0, "learning_rate": 1.9953151727421748e-05, "loss": 0.4192, "step": 863 }, { "epoch": 0.06538952745849298, "grad_norm": 0.92578125, "learning_rate": 1.9953036498988246e-05, "loss": 0.3211, "step": 864 }, { "epoch": 0.0654652097819403, "grad_norm": 0.96875, "learning_rate": 1.9952921129353838e-05, "loss": 0.3695, "step": 865 }, { "epoch": 0.06554089210538763, "grad_norm": 0.9296875, "learning_rate": 1.9952805618520165e-05, "loss": 0.3541, "step": 866 }, { "epoch": 0.06561657442883496, "grad_norm": 0.9921875, "learning_rate": 1.995268996648886e-05, "loss": 0.3755, "step": 867 }, { "epoch": 0.0656922567522823, "grad_norm": 0.921875, "learning_rate": 1.9952574173261574e-05, "loss": 0.3202, "step": 868 }, { "epoch": 0.06576793907572963, "grad_norm": 0.9921875, "learning_rate": 1.995245823883994e-05, "loss": 0.3728, "step": 869 }, { "epoch": 0.06584362139917696, "grad_norm": 3.46875, "learning_rate": 1.9952342163225614e-05, "loss": 0.6768, "step": 870 }, { "epoch": 0.06591930372262428, "grad_norm": 0.890625, "learning_rate": 1.995222594642023e-05, "loss": 0.305, "step": 871 }, { "epoch": 0.06599498604607161, "grad_norm": 0.9765625, "learning_rate": 1.9952109588425443e-05, "loss": 0.3538, "step": 872 }, { "epoch": 0.06607066836951894, "grad_norm": 2.59375, "learning_rate": 1.9951993089242906e-05, "loss": 0.6232, "step": 873 }, { "epoch": 0.06614635069296627, "grad_norm": 0.984375, "learning_rate": 1.9951876448874264e-05, "loss": 0.3891, "step": 874 }, { "epoch": 0.0662220330164136, "grad_norm": 0.8984375, "learning_rate": 1.995175966732118e-05, "loss": 0.358, "step": 875 }, { "epoch": 0.06629771533986094, "grad_norm": 0.98046875, "learning_rate": 1.9951642744585307e-05, "loss": 0.3655, "step": 876 }, { "epoch": 0.06637339766330827, "grad_norm": 1.0078125, "learning_rate": 1.9951525680668303e-05, "loss": 0.3622, "step": 877 }, { "epoch": 0.06644907998675559, "grad_norm": 0.98046875, "learning_rate": 1.995140847557183e-05, "loss": 0.3467, "step": 878 }, { "epoch": 0.06652476231020292, "grad_norm": 1.0, "learning_rate": 1.995129112929755e-05, "loss": 0.3625, "step": 879 }, { "epoch": 0.06660044463365025, "grad_norm": 0.890625, "learning_rate": 1.995117364184713e-05, "loss": 0.3275, "step": 880 }, { "epoch": 0.06667612695709758, "grad_norm": 0.96875, "learning_rate": 1.9951056013222234e-05, "loss": 0.3637, "step": 881 }, { "epoch": 0.06675180928054492, "grad_norm": 1.03125, "learning_rate": 1.9950938243424533e-05, "loss": 0.3811, "step": 882 }, { "epoch": 0.06682749160399225, "grad_norm": 5.34375, "learning_rate": 1.9950820332455695e-05, "loss": 0.5513, "step": 883 }, { "epoch": 0.06690317392743957, "grad_norm": 0.98828125, "learning_rate": 1.9950702280317396e-05, "loss": 0.3679, "step": 884 }, { "epoch": 0.0669788562508869, "grad_norm": 0.90234375, "learning_rate": 1.9950584087011305e-05, "loss": 0.3307, "step": 885 }, { "epoch": 0.06705453857433423, "grad_norm": 0.90234375, "learning_rate": 1.9950465752539106e-05, "loss": 0.3297, "step": 886 }, { "epoch": 0.06713022089778156, "grad_norm": 0.9375, "learning_rate": 1.9950347276902477e-05, "loss": 0.3548, "step": 887 }, { "epoch": 0.0672059032212289, "grad_norm": 0.84765625, "learning_rate": 1.9950228660103093e-05, "loss": 0.3246, "step": 888 }, { "epoch": 0.06728158554467623, "grad_norm": 0.96484375, "learning_rate": 1.9950109902142644e-05, "loss": 0.3802, "step": 889 }, { "epoch": 0.06735726786812356, "grad_norm": 0.94140625, "learning_rate": 1.994999100302281e-05, "loss": 0.3736, "step": 890 }, { "epoch": 0.06743295019157088, "grad_norm": 0.9296875, "learning_rate": 1.9949871962745275e-05, "loss": 0.3368, "step": 891 }, { "epoch": 0.06750863251501821, "grad_norm": 1.0, "learning_rate": 1.9949752781311734e-05, "loss": 0.3618, "step": 892 }, { "epoch": 0.06758431483846554, "grad_norm": 0.87109375, "learning_rate": 1.9949633458723876e-05, "loss": 0.3114, "step": 893 }, { "epoch": 0.06765999716191287, "grad_norm": 0.8828125, "learning_rate": 1.994951399498339e-05, "loss": 0.2876, "step": 894 }, { "epoch": 0.0677356794853602, "grad_norm": 1.1015625, "learning_rate": 1.9949394390091977e-05, "loss": 0.3384, "step": 895 }, { "epoch": 0.06781136180880754, "grad_norm": 0.875, "learning_rate": 1.994927464405133e-05, "loss": 0.3288, "step": 896 }, { "epoch": 0.06788704413225485, "grad_norm": 0.8828125, "learning_rate": 1.9949154756863152e-05, "loss": 0.343, "step": 897 }, { "epoch": 0.06796272645570219, "grad_norm": 1.0625, "learning_rate": 1.9949034728529136e-05, "loss": 0.3974, "step": 898 }, { "epoch": 0.06803840877914952, "grad_norm": 0.9609375, "learning_rate": 1.994891455905099e-05, "loss": 0.3552, "step": 899 }, { "epoch": 0.06811409110259685, "grad_norm": 0.9765625, "learning_rate": 1.9948794248430425e-05, "loss": 0.3937, "step": 900 }, { "epoch": 0.06818977342604418, "grad_norm": 0.9765625, "learning_rate": 1.994867379666913e-05, "loss": 0.3676, "step": 901 }, { "epoch": 0.06826545574949151, "grad_norm": 0.9921875, "learning_rate": 1.994855320376883e-05, "loss": 0.3359, "step": 902 }, { "epoch": 0.06834113807293885, "grad_norm": 0.9609375, "learning_rate": 1.9948432469731233e-05, "loss": 0.3584, "step": 903 }, { "epoch": 0.06841682039638616, "grad_norm": 0.8984375, "learning_rate": 1.9948311594558044e-05, "loss": 0.3101, "step": 904 }, { "epoch": 0.0684925027198335, "grad_norm": 1.0234375, "learning_rate": 1.9948190578250988e-05, "loss": 0.3242, "step": 905 }, { "epoch": 0.06856818504328083, "grad_norm": 0.93359375, "learning_rate": 1.9948069420811774e-05, "loss": 0.3411, "step": 906 }, { "epoch": 0.06864386736672816, "grad_norm": 0.9609375, "learning_rate": 1.9947948122242122e-05, "loss": 0.3845, "step": 907 }, { "epoch": 0.06871954969017549, "grad_norm": 0.94140625, "learning_rate": 1.9947826682543757e-05, "loss": 0.3438, "step": 908 }, { "epoch": 0.06879523201362282, "grad_norm": 0.9765625, "learning_rate": 1.99477051017184e-05, "loss": 0.3862, "step": 909 }, { "epoch": 0.06887091433707015, "grad_norm": 0.9296875, "learning_rate": 1.9947583379767773e-05, "loss": 0.359, "step": 910 }, { "epoch": 0.06894659666051747, "grad_norm": 0.9375, "learning_rate": 1.994746151669361e-05, "loss": 0.3214, "step": 911 }, { "epoch": 0.0690222789839648, "grad_norm": 0.92578125, "learning_rate": 1.994733951249763e-05, "loss": 0.3485, "step": 912 }, { "epoch": 0.06909796130741214, "grad_norm": 0.91015625, "learning_rate": 1.994721736718157e-05, "loss": 0.339, "step": 913 }, { "epoch": 0.06917364363085947, "grad_norm": 0.94140625, "learning_rate": 1.9947095080747163e-05, "loss": 0.3499, "step": 914 }, { "epoch": 0.0692493259543068, "grad_norm": 0.921875, "learning_rate": 1.9946972653196143e-05, "loss": 0.3234, "step": 915 }, { "epoch": 0.06932500827775413, "grad_norm": 0.99609375, "learning_rate": 1.9946850084530243e-05, "loss": 0.3639, "step": 916 }, { "epoch": 0.06940069060120145, "grad_norm": 0.8828125, "learning_rate": 1.994672737475121e-05, "loss": 0.326, "step": 917 }, { "epoch": 0.06947637292464878, "grad_norm": 0.96875, "learning_rate": 1.9946604523860775e-05, "loss": 0.3545, "step": 918 }, { "epoch": 0.06955205524809611, "grad_norm": 4.9375, "learning_rate": 1.994648153186069e-05, "loss": 0.7124, "step": 919 }, { "epoch": 0.06962773757154345, "grad_norm": 1.09375, "learning_rate": 1.994635839875269e-05, "loss": 0.354, "step": 920 }, { "epoch": 0.06970341989499078, "grad_norm": 0.87890625, "learning_rate": 1.9946235124538534e-05, "loss": 0.3353, "step": 921 }, { "epoch": 0.06977910221843811, "grad_norm": 0.98046875, "learning_rate": 1.994611170921996e-05, "loss": 0.3707, "step": 922 }, { "epoch": 0.06985478454188544, "grad_norm": 0.96875, "learning_rate": 1.9945988152798725e-05, "loss": 0.3184, "step": 923 }, { "epoch": 0.06993046686533276, "grad_norm": 0.92578125, "learning_rate": 1.994586445527658e-05, "loss": 0.3402, "step": 924 }, { "epoch": 0.07000614918878009, "grad_norm": 0.94140625, "learning_rate": 1.994574061665528e-05, "loss": 0.3841, "step": 925 }, { "epoch": 0.07008183151222742, "grad_norm": 0.8359375, "learning_rate": 1.9945616636936584e-05, "loss": 0.2945, "step": 926 }, { "epoch": 0.07015751383567476, "grad_norm": 0.96875, "learning_rate": 1.9945492516122246e-05, "loss": 0.387, "step": 927 }, { "epoch": 0.07023319615912209, "grad_norm": 1.0, "learning_rate": 1.994536825421403e-05, "loss": 0.3854, "step": 928 }, { "epoch": 0.07030887848256942, "grad_norm": 0.91015625, "learning_rate": 1.99452438512137e-05, "loss": 0.3541, "step": 929 }, { "epoch": 0.07038456080601674, "grad_norm": 0.95703125, "learning_rate": 1.994511930712302e-05, "loss": 0.3567, "step": 930 }, { "epoch": 0.07046024312946407, "grad_norm": 0.97265625, "learning_rate": 1.9944994621943753e-05, "loss": 0.4138, "step": 931 }, { "epoch": 0.0705359254529114, "grad_norm": 0.9140625, "learning_rate": 1.9944869795677673e-05, "loss": 0.3298, "step": 932 }, { "epoch": 0.07061160777635873, "grad_norm": 0.90625, "learning_rate": 1.994474482832655e-05, "loss": 0.3151, "step": 933 }, { "epoch": 0.07068729009980607, "grad_norm": 0.98046875, "learning_rate": 1.9944619719892154e-05, "loss": 0.3295, "step": 934 }, { "epoch": 0.0707629724232534, "grad_norm": 0.89453125, "learning_rate": 1.994449447037626e-05, "loss": 0.3373, "step": 935 }, { "epoch": 0.07083865474670073, "grad_norm": 0.90625, "learning_rate": 1.9944369079780648e-05, "loss": 0.3547, "step": 936 }, { "epoch": 0.07091433707014805, "grad_norm": 1.046875, "learning_rate": 1.9944243548107098e-05, "loss": 0.3308, "step": 937 }, { "epoch": 0.07099001939359538, "grad_norm": 0.98046875, "learning_rate": 1.9944117875357384e-05, "loss": 0.3795, "step": 938 }, { "epoch": 0.07106570171704271, "grad_norm": 0.8984375, "learning_rate": 1.9943992061533297e-05, "loss": 0.33, "step": 939 }, { "epoch": 0.07114138404049004, "grad_norm": 0.99609375, "learning_rate": 1.9943866106636614e-05, "loss": 0.3785, "step": 940 }, { "epoch": 0.07121706636393738, "grad_norm": 1.0234375, "learning_rate": 1.9943740010669132e-05, "loss": 0.4174, "step": 941 }, { "epoch": 0.07129274868738471, "grad_norm": 0.8984375, "learning_rate": 1.9943613773632632e-05, "loss": 0.3026, "step": 942 }, { "epoch": 0.07136843101083203, "grad_norm": 0.9296875, "learning_rate": 1.9943487395528903e-05, "loss": 0.3472, "step": 943 }, { "epoch": 0.07144411333427936, "grad_norm": 1.1328125, "learning_rate": 1.994336087635975e-05, "loss": 0.3461, "step": 944 }, { "epoch": 0.07151979565772669, "grad_norm": 1.0546875, "learning_rate": 1.994323421612695e-05, "loss": 0.414, "step": 945 }, { "epoch": 0.07159547798117402, "grad_norm": 0.9140625, "learning_rate": 1.9943107414832314e-05, "loss": 0.3374, "step": 946 }, { "epoch": 0.07167116030462135, "grad_norm": 0.8671875, "learning_rate": 1.9942980472477635e-05, "loss": 0.3116, "step": 947 }, { "epoch": 0.07174684262806869, "grad_norm": 0.89453125, "learning_rate": 1.9942853389064716e-05, "loss": 0.3212, "step": 948 }, { "epoch": 0.07182252495151602, "grad_norm": 0.92578125, "learning_rate": 1.994272616459536e-05, "loss": 0.3534, "step": 949 }, { "epoch": 0.07189820727496334, "grad_norm": 0.94140625, "learning_rate": 1.994259879907137e-05, "loss": 0.3554, "step": 950 }, { "epoch": 0.07197388959841067, "grad_norm": 0.8984375, "learning_rate": 1.9942471292494553e-05, "loss": 0.3476, "step": 951 }, { "epoch": 0.072049571921858, "grad_norm": 0.89453125, "learning_rate": 1.9942343644866723e-05, "loss": 0.3561, "step": 952 }, { "epoch": 0.07212525424530533, "grad_norm": 0.87109375, "learning_rate": 1.9942215856189683e-05, "loss": 0.2968, "step": 953 }, { "epoch": 0.07220093656875266, "grad_norm": 0.9453125, "learning_rate": 1.994208792646525e-05, "loss": 0.3249, "step": 954 }, { "epoch": 0.0722766188922, "grad_norm": 0.94921875, "learning_rate": 1.994195985569524e-05, "loss": 0.3475, "step": 955 }, { "epoch": 0.07235230121564733, "grad_norm": 0.91796875, "learning_rate": 1.9941831643881464e-05, "loss": 0.3324, "step": 956 }, { "epoch": 0.07242798353909465, "grad_norm": 0.890625, "learning_rate": 1.994170329102575e-05, "loss": 0.3268, "step": 957 }, { "epoch": 0.07250366586254198, "grad_norm": 0.9140625, "learning_rate": 1.994157479712991e-05, "loss": 0.348, "step": 958 }, { "epoch": 0.07257934818598931, "grad_norm": 0.953125, "learning_rate": 1.9941446162195775e-05, "loss": 0.3681, "step": 959 }, { "epoch": 0.07265503050943664, "grad_norm": 0.93359375, "learning_rate": 1.9941317386225165e-05, "loss": 0.3651, "step": 960 }, { "epoch": 0.07273071283288397, "grad_norm": 0.90234375, "learning_rate": 1.994118846921991e-05, "loss": 0.3556, "step": 961 }, { "epoch": 0.0728063951563313, "grad_norm": 0.8203125, "learning_rate": 1.9941059411181837e-05, "loss": 0.3084, "step": 962 }, { "epoch": 0.07288207747977862, "grad_norm": 0.99609375, "learning_rate": 1.9940930212112774e-05, "loss": 0.4076, "step": 963 }, { "epoch": 0.07295775980322595, "grad_norm": 0.94921875, "learning_rate": 1.994080087201456e-05, "loss": 0.3301, "step": 964 }, { "epoch": 0.07303344212667329, "grad_norm": 0.921875, "learning_rate": 1.9940671390889023e-05, "loss": 0.3843, "step": 965 }, { "epoch": 0.07310912445012062, "grad_norm": 0.96484375, "learning_rate": 1.9940541768738004e-05, "loss": 0.3571, "step": 966 }, { "epoch": 0.07318480677356795, "grad_norm": 0.96875, "learning_rate": 1.9940412005563342e-05, "loss": 0.3502, "step": 967 }, { "epoch": 0.07326048909701528, "grad_norm": 0.921875, "learning_rate": 1.9940282101366876e-05, "loss": 0.3409, "step": 968 }, { "epoch": 0.07333617142046261, "grad_norm": 0.953125, "learning_rate": 1.9940152056150452e-05, "loss": 0.4069, "step": 969 }, { "epoch": 0.07341185374390993, "grad_norm": 0.86328125, "learning_rate": 1.9940021869915915e-05, "loss": 0.3226, "step": 970 }, { "epoch": 0.07348753606735726, "grad_norm": 0.8984375, "learning_rate": 1.993989154266511e-05, "loss": 0.3521, "step": 971 }, { "epoch": 0.0735632183908046, "grad_norm": 0.8984375, "learning_rate": 1.9939761074399883e-05, "loss": 0.3446, "step": 972 }, { "epoch": 0.07363890071425193, "grad_norm": 0.9296875, "learning_rate": 1.9939630465122088e-05, "loss": 0.3647, "step": 973 }, { "epoch": 0.07371458303769926, "grad_norm": 0.96875, "learning_rate": 1.993949971483358e-05, "loss": 0.3533, "step": 974 }, { "epoch": 0.07379026536114659, "grad_norm": 1.015625, "learning_rate": 1.9939368823536213e-05, "loss": 0.3582, "step": 975 }, { "epoch": 0.07386594768459391, "grad_norm": 0.91796875, "learning_rate": 1.9939237791231838e-05, "loss": 0.358, "step": 976 }, { "epoch": 0.07394163000804124, "grad_norm": 0.90625, "learning_rate": 1.993910661792232e-05, "loss": 0.3443, "step": 977 }, { "epoch": 0.07401731233148857, "grad_norm": 0.9453125, "learning_rate": 1.993897530360952e-05, "loss": 0.354, "step": 978 }, { "epoch": 0.0740929946549359, "grad_norm": 0.83203125, "learning_rate": 1.9938843848295298e-05, "loss": 0.3013, "step": 979 }, { "epoch": 0.07416867697838324, "grad_norm": 1.03125, "learning_rate": 1.9938712251981523e-05, "loss": 0.3587, "step": 980 }, { "epoch": 0.07424435930183057, "grad_norm": 0.91015625, "learning_rate": 1.9938580514670058e-05, "loss": 0.368, "step": 981 }, { "epoch": 0.0743200416252779, "grad_norm": 0.96875, "learning_rate": 1.9938448636362772e-05, "loss": 0.4046, "step": 982 }, { "epoch": 0.07439572394872522, "grad_norm": 0.90234375, "learning_rate": 1.993831661706154e-05, "loss": 0.3271, "step": 983 }, { "epoch": 0.07447140627217255, "grad_norm": 0.90625, "learning_rate": 1.9938184456768227e-05, "loss": 0.3617, "step": 984 }, { "epoch": 0.07454708859561988, "grad_norm": 0.99609375, "learning_rate": 1.993805215548472e-05, "loss": 0.3061, "step": 985 }, { "epoch": 0.07462277091906722, "grad_norm": 4.0, "learning_rate": 1.9937919713212885e-05, "loss": 0.672, "step": 986 }, { "epoch": 0.07469845324251455, "grad_norm": 0.94921875, "learning_rate": 1.9937787129954605e-05, "loss": 0.3752, "step": 987 }, { "epoch": 0.07477413556596188, "grad_norm": 1.0234375, "learning_rate": 1.9937654405711758e-05, "loss": 0.3469, "step": 988 }, { "epoch": 0.0748498178894092, "grad_norm": 0.88671875, "learning_rate": 1.9937521540486232e-05, "loss": 0.3249, "step": 989 }, { "epoch": 0.07492550021285653, "grad_norm": 0.890625, "learning_rate": 1.993738853427991e-05, "loss": 0.3057, "step": 990 }, { "epoch": 0.07500118253630386, "grad_norm": 0.94140625, "learning_rate": 1.9937255387094677e-05, "loss": 0.4027, "step": 991 }, { "epoch": 0.0750768648597512, "grad_norm": 0.83984375, "learning_rate": 1.9937122098932428e-05, "loss": 0.2648, "step": 992 }, { "epoch": 0.07515254718319853, "grad_norm": 0.86328125, "learning_rate": 1.9936988669795046e-05, "loss": 0.3396, "step": 993 }, { "epoch": 0.07522822950664586, "grad_norm": 0.8046875, "learning_rate": 1.9936855099684427e-05, "loss": 0.2744, "step": 994 }, { "epoch": 0.07530391183009319, "grad_norm": 1.078125, "learning_rate": 1.9936721388602466e-05, "loss": 0.4435, "step": 995 }, { "epoch": 0.07537959415354051, "grad_norm": 0.96484375, "learning_rate": 1.993658753655106e-05, "loss": 0.3682, "step": 996 }, { "epoch": 0.07545527647698784, "grad_norm": 0.96484375, "learning_rate": 1.9936453543532107e-05, "loss": 0.3721, "step": 997 }, { "epoch": 0.07553095880043517, "grad_norm": 0.84765625, "learning_rate": 1.9936319409547514e-05, "loss": 0.298, "step": 998 }, { "epoch": 0.0756066411238825, "grad_norm": 0.8828125, "learning_rate": 1.9936185134599176e-05, "loss": 0.3107, "step": 999 }, { "epoch": 0.07568232344732984, "grad_norm": 3.375, "learning_rate": 1.9936050718689e-05, "loss": 0.6367, "step": 1000 }, { "epoch": 0.07575800577077717, "grad_norm": 1.1015625, "learning_rate": 1.9935916161818892e-05, "loss": 0.4079, "step": 1001 }, { "epoch": 0.0758336880942245, "grad_norm": 0.98828125, "learning_rate": 1.9935781463990765e-05, "loss": 0.3226, "step": 1002 }, { "epoch": 0.07590937041767182, "grad_norm": 0.94921875, "learning_rate": 1.993564662520653e-05, "loss": 0.3605, "step": 1003 }, { "epoch": 0.07598505274111915, "grad_norm": 1.0078125, "learning_rate": 1.9935511645468094e-05, "loss": 0.4005, "step": 1004 }, { "epoch": 0.07606073506456648, "grad_norm": 0.94921875, "learning_rate": 1.9935376524777375e-05, "loss": 0.355, "step": 1005 }, { "epoch": 0.07613641738801381, "grad_norm": 0.89453125, "learning_rate": 1.993524126313629e-05, "loss": 0.3335, "step": 1006 }, { "epoch": 0.07621209971146115, "grad_norm": 2.171875, "learning_rate": 1.993510586054676e-05, "loss": 0.6094, "step": 1007 }, { "epoch": 0.07628778203490848, "grad_norm": 0.87890625, "learning_rate": 1.9934970317010706e-05, "loss": 0.3342, "step": 1008 }, { "epoch": 0.0763634643583558, "grad_norm": 0.98828125, "learning_rate": 1.9934834632530043e-05, "loss": 0.3943, "step": 1009 }, { "epoch": 0.07643914668180313, "grad_norm": 4.8125, "learning_rate": 1.9934698807106706e-05, "loss": 0.6409, "step": 1010 }, { "epoch": 0.07651482900525046, "grad_norm": 0.9296875, "learning_rate": 1.993456284074262e-05, "loss": 0.3349, "step": 1011 }, { "epoch": 0.07659051132869779, "grad_norm": 0.94921875, "learning_rate": 1.9934426733439708e-05, "loss": 0.3512, "step": 1012 }, { "epoch": 0.07666619365214512, "grad_norm": 0.94140625, "learning_rate": 1.993429048519991e-05, "loss": 0.2882, "step": 1013 }, { "epoch": 0.07674187597559246, "grad_norm": 0.91015625, "learning_rate": 1.9934154096025148e-05, "loss": 0.368, "step": 1014 }, { "epoch": 0.07681755829903979, "grad_norm": 0.9609375, "learning_rate": 1.993401756591736e-05, "loss": 0.3429, "step": 1015 }, { "epoch": 0.0768932406224871, "grad_norm": 0.97265625, "learning_rate": 1.9933880894878492e-05, "loss": 0.3513, "step": 1016 }, { "epoch": 0.07696892294593444, "grad_norm": 0.90234375, "learning_rate": 1.9933744082910477e-05, "loss": 0.318, "step": 1017 }, { "epoch": 0.07704460526938177, "grad_norm": 0.9609375, "learning_rate": 1.9933607130015252e-05, "loss": 0.3159, "step": 1018 }, { "epoch": 0.0771202875928291, "grad_norm": 0.89453125, "learning_rate": 1.993347003619476e-05, "loss": 0.3614, "step": 1019 }, { "epoch": 0.07719596991627643, "grad_norm": 1.0859375, "learning_rate": 1.9933332801450955e-05, "loss": 0.402, "step": 1020 }, { "epoch": 0.07727165223972376, "grad_norm": 1.0859375, "learning_rate": 1.9933195425785774e-05, "loss": 0.3687, "step": 1021 }, { "epoch": 0.07734733456317108, "grad_norm": 0.9453125, "learning_rate": 1.9933057909201174e-05, "loss": 0.3741, "step": 1022 }, { "epoch": 0.07742301688661841, "grad_norm": 1.0, "learning_rate": 1.9932920251699098e-05, "loss": 0.3556, "step": 1023 }, { "epoch": 0.07749869921006575, "grad_norm": 0.9453125, "learning_rate": 1.9932782453281504e-05, "loss": 0.3586, "step": 1024 }, { "epoch": 0.07757438153351308, "grad_norm": 0.87890625, "learning_rate": 1.993264451395035e-05, "loss": 0.3001, "step": 1025 }, { "epoch": 0.07765006385696041, "grad_norm": 0.90234375, "learning_rate": 1.993250643370758e-05, "loss": 0.337, "step": 1026 }, { "epoch": 0.07772574618040774, "grad_norm": 3.4375, "learning_rate": 1.9932368212555168e-05, "loss": 0.6953, "step": 1027 }, { "epoch": 0.07780142850385507, "grad_norm": 1.0078125, "learning_rate": 1.9932229850495067e-05, "loss": 0.3708, "step": 1028 }, { "epoch": 0.07787711082730239, "grad_norm": 0.92578125, "learning_rate": 1.993209134752924e-05, "loss": 0.3218, "step": 1029 }, { "epoch": 0.07795279315074972, "grad_norm": 0.8984375, "learning_rate": 1.9931952703659655e-05, "loss": 0.3294, "step": 1030 }, { "epoch": 0.07802847547419706, "grad_norm": 0.9296875, "learning_rate": 1.9931813918888273e-05, "loss": 0.3318, "step": 1031 }, { "epoch": 0.07810415779764439, "grad_norm": 0.90625, "learning_rate": 1.993167499321707e-05, "loss": 0.3243, "step": 1032 }, { "epoch": 0.07817984012109172, "grad_norm": 0.90625, "learning_rate": 1.9931535926648016e-05, "loss": 0.3519, "step": 1033 }, { "epoch": 0.07825552244453905, "grad_norm": 0.9296875, "learning_rate": 1.993139671918308e-05, "loss": 0.3414, "step": 1034 }, { "epoch": 0.07833120476798637, "grad_norm": 0.91796875, "learning_rate": 1.9931257370824237e-05, "loss": 0.3233, "step": 1035 }, { "epoch": 0.0784068870914337, "grad_norm": 0.98046875, "learning_rate": 1.9931117881573466e-05, "loss": 0.3536, "step": 1036 }, { "epoch": 0.07848256941488103, "grad_norm": 0.9296875, "learning_rate": 1.9930978251432748e-05, "loss": 0.3469, "step": 1037 }, { "epoch": 0.07855825173832837, "grad_norm": 0.85546875, "learning_rate": 1.993083848040406e-05, "loss": 0.2918, "step": 1038 }, { "epoch": 0.0786339340617757, "grad_norm": 0.98828125, "learning_rate": 1.9930698568489385e-05, "loss": 0.3598, "step": 1039 }, { "epoch": 0.07870961638522303, "grad_norm": 1.0390625, "learning_rate": 1.993055851569071e-05, "loss": 0.3683, "step": 1040 }, { "epoch": 0.07878529870867036, "grad_norm": 0.97265625, "learning_rate": 1.993041832201002e-05, "loss": 0.362, "step": 1041 }, { "epoch": 0.07886098103211768, "grad_norm": 0.8046875, "learning_rate": 1.9930277987449307e-05, "loss": 0.2752, "step": 1042 }, { "epoch": 0.07893666335556501, "grad_norm": 0.94140625, "learning_rate": 1.9930137512010558e-05, "loss": 0.3223, "step": 1043 }, { "epoch": 0.07901234567901234, "grad_norm": 3.21875, "learning_rate": 1.992999689569577e-05, "loss": 0.654, "step": 1044 }, { "epoch": 0.07908802800245968, "grad_norm": 0.98046875, "learning_rate": 1.9929856138506933e-05, "loss": 0.3568, "step": 1045 }, { "epoch": 0.07916371032590701, "grad_norm": 0.89453125, "learning_rate": 1.9929715240446048e-05, "loss": 0.3181, "step": 1046 }, { "epoch": 0.07923939264935434, "grad_norm": 0.87890625, "learning_rate": 1.992957420151511e-05, "loss": 0.3179, "step": 1047 }, { "epoch": 0.07931507497280167, "grad_norm": 0.921875, "learning_rate": 1.9929433021716125e-05, "loss": 0.35, "step": 1048 }, { "epoch": 0.07939075729624899, "grad_norm": 0.94140625, "learning_rate": 1.9929291701051096e-05, "loss": 0.3552, "step": 1049 }, { "epoch": 0.07946643961969632, "grad_norm": 0.98046875, "learning_rate": 1.9929150239522017e-05, "loss": 0.3777, "step": 1050 }, { "epoch": 0.07954212194314365, "grad_norm": 0.890625, "learning_rate": 1.9929008637130906e-05, "loss": 0.3325, "step": 1051 }, { "epoch": 0.07961780426659099, "grad_norm": 0.85546875, "learning_rate": 1.9928866893879773e-05, "loss": 0.3074, "step": 1052 }, { "epoch": 0.07969348659003832, "grad_norm": 1.015625, "learning_rate": 1.992872500977062e-05, "loss": 0.3715, "step": 1053 }, { "epoch": 0.07976916891348565, "grad_norm": 0.96484375, "learning_rate": 1.9928582984805467e-05, "loss": 0.3506, "step": 1054 }, { "epoch": 0.07984485123693297, "grad_norm": 0.9140625, "learning_rate": 1.9928440818986324e-05, "loss": 0.3315, "step": 1055 }, { "epoch": 0.0799205335603803, "grad_norm": 0.9296875, "learning_rate": 1.992829851231521e-05, "loss": 0.3571, "step": 1056 }, { "epoch": 0.07999621588382763, "grad_norm": 0.9609375, "learning_rate": 1.9928156064794148e-05, "loss": 0.4042, "step": 1057 }, { "epoch": 0.08007189820727496, "grad_norm": 0.98046875, "learning_rate": 1.992801347642515e-05, "loss": 0.3798, "step": 1058 }, { "epoch": 0.0801475805307223, "grad_norm": 0.9765625, "learning_rate": 1.9927870747210247e-05, "loss": 0.3656, "step": 1059 }, { "epoch": 0.08022326285416963, "grad_norm": 0.97265625, "learning_rate": 1.992772787715146e-05, "loss": 0.3394, "step": 1060 }, { "epoch": 0.08029894517761696, "grad_norm": 0.99609375, "learning_rate": 1.9927584866250815e-05, "loss": 0.359, "step": 1061 }, { "epoch": 0.08037462750106428, "grad_norm": 0.984375, "learning_rate": 1.9927441714510344e-05, "loss": 0.3533, "step": 1062 }, { "epoch": 0.08045030982451161, "grad_norm": 1.0625, "learning_rate": 1.9927298421932074e-05, "loss": 0.3771, "step": 1063 }, { "epoch": 0.08052599214795894, "grad_norm": 1.0546875, "learning_rate": 1.9927154988518046e-05, "loss": 0.4083, "step": 1064 }, { "epoch": 0.08060167447140627, "grad_norm": 4.75, "learning_rate": 1.9927011414270283e-05, "loss": 0.6568, "step": 1065 }, { "epoch": 0.0806773567948536, "grad_norm": 0.9296875, "learning_rate": 1.9926867699190828e-05, "loss": 0.3587, "step": 1066 }, { "epoch": 0.08075303911830094, "grad_norm": 1.0, "learning_rate": 1.9926723843281723e-05, "loss": 0.3735, "step": 1067 }, { "epoch": 0.08082872144174826, "grad_norm": 0.87109375, "learning_rate": 1.9926579846545e-05, "loss": 0.2796, "step": 1068 }, { "epoch": 0.08090440376519559, "grad_norm": 0.9453125, "learning_rate": 1.9926435708982713e-05, "loss": 0.3424, "step": 1069 }, { "epoch": 0.08098008608864292, "grad_norm": 0.9609375, "learning_rate": 1.99262914305969e-05, "loss": 0.3654, "step": 1070 }, { "epoch": 0.08105576841209025, "grad_norm": 0.91015625, "learning_rate": 1.9926147011389606e-05, "loss": 0.339, "step": 1071 }, { "epoch": 0.08113145073553758, "grad_norm": 0.92578125, "learning_rate": 1.9926002451362886e-05, "loss": 0.3089, "step": 1072 }, { "epoch": 0.08120713305898491, "grad_norm": 1.2265625, "learning_rate": 1.9925857750518785e-05, "loss": 0.3681, "step": 1073 }, { "epoch": 0.08128281538243225, "grad_norm": 1.0234375, "learning_rate": 1.992571290885936e-05, "loss": 0.2929, "step": 1074 }, { "epoch": 0.08135849770587956, "grad_norm": 0.90625, "learning_rate": 1.9925567926386664e-05, "loss": 0.3439, "step": 1075 }, { "epoch": 0.0814341800293269, "grad_norm": 0.9921875, "learning_rate": 1.9925422803102753e-05, "loss": 0.3808, "step": 1076 }, { "epoch": 0.08150986235277423, "grad_norm": 0.94140625, "learning_rate": 1.9925277539009688e-05, "loss": 0.36, "step": 1077 }, { "epoch": 0.08158554467622156, "grad_norm": 0.9765625, "learning_rate": 1.992513213410953e-05, "loss": 0.4312, "step": 1078 }, { "epoch": 0.08166122699966889, "grad_norm": 0.89453125, "learning_rate": 1.992498658840434e-05, "loss": 0.3294, "step": 1079 }, { "epoch": 0.08173690932311622, "grad_norm": 0.96484375, "learning_rate": 1.992484090189618e-05, "loss": 0.3738, "step": 1080 }, { "epoch": 0.08181259164656354, "grad_norm": 3.25, "learning_rate": 1.9924695074587125e-05, "loss": 0.5879, "step": 1081 }, { "epoch": 0.08188827397001087, "grad_norm": 0.9296875, "learning_rate": 1.9924549106479237e-05, "loss": 0.3683, "step": 1082 }, { "epoch": 0.0819639562934582, "grad_norm": 0.9296875, "learning_rate": 1.992440299757459e-05, "loss": 0.3378, "step": 1083 }, { "epoch": 0.08203963861690554, "grad_norm": 0.92578125, "learning_rate": 1.9924256747875253e-05, "loss": 0.3356, "step": 1084 }, { "epoch": 0.08211532094035287, "grad_norm": 0.8984375, "learning_rate": 1.9924110357383304e-05, "loss": 0.3506, "step": 1085 }, { "epoch": 0.0821910032638002, "grad_norm": 0.89453125, "learning_rate": 1.992396382610082e-05, "loss": 0.3318, "step": 1086 }, { "epoch": 0.08226668558724753, "grad_norm": 0.98046875, "learning_rate": 1.9923817154029882e-05, "loss": 0.3563, "step": 1087 }, { "epoch": 0.08234236791069485, "grad_norm": 0.88671875, "learning_rate": 1.9923670341172564e-05, "loss": 0.3335, "step": 1088 }, { "epoch": 0.08241805023414218, "grad_norm": 0.91015625, "learning_rate": 1.9923523387530953e-05, "loss": 0.3402, "step": 1089 }, { "epoch": 0.08249373255758952, "grad_norm": 1.046875, "learning_rate": 1.9923376293107133e-05, "loss": 0.3892, "step": 1090 }, { "epoch": 0.08256941488103685, "grad_norm": 0.83203125, "learning_rate": 1.992322905790319e-05, "loss": 0.2713, "step": 1091 }, { "epoch": 0.08264509720448418, "grad_norm": 0.93359375, "learning_rate": 1.992308168192122e-05, "loss": 0.3563, "step": 1092 }, { "epoch": 0.08272077952793151, "grad_norm": 1.03125, "learning_rate": 1.99229341651633e-05, "loss": 0.3881, "step": 1093 }, { "epoch": 0.08279646185137884, "grad_norm": 2.5625, "learning_rate": 1.9922786507631535e-05, "loss": 0.5227, "step": 1094 }, { "epoch": 0.08287214417482616, "grad_norm": 0.87109375, "learning_rate": 1.9922638709328015e-05, "loss": 0.2893, "step": 1095 }, { "epoch": 0.0829478264982735, "grad_norm": 0.9453125, "learning_rate": 1.9922490770254835e-05, "loss": 0.3714, "step": 1096 }, { "epoch": 0.08302350882172083, "grad_norm": 1.1015625, "learning_rate": 1.99223426904141e-05, "loss": 0.4011, "step": 1097 }, { "epoch": 0.08309919114516816, "grad_norm": 0.8671875, "learning_rate": 1.9922194469807902e-05, "loss": 0.3263, "step": 1098 }, { "epoch": 0.08317487346861549, "grad_norm": 0.9609375, "learning_rate": 1.9922046108438353e-05, "loss": 0.3745, "step": 1099 }, { "epoch": 0.08325055579206282, "grad_norm": 0.91796875, "learning_rate": 1.992189760630755e-05, "loss": 0.3431, "step": 1100 }, { "epoch": 0.08332623811551014, "grad_norm": 0.94140625, "learning_rate": 1.9921748963417603e-05, "loss": 0.3631, "step": 1101 }, { "epoch": 0.08340192043895747, "grad_norm": 0.9765625, "learning_rate": 1.9921600179770623e-05, "loss": 0.4106, "step": 1102 }, { "epoch": 0.0834776027624048, "grad_norm": 0.89453125, "learning_rate": 1.9921451255368716e-05, "loss": 0.3642, "step": 1103 }, { "epoch": 0.08355328508585214, "grad_norm": 0.921875, "learning_rate": 1.9921302190214e-05, "loss": 0.341, "step": 1104 }, { "epoch": 0.08362896740929947, "grad_norm": 1.0546875, "learning_rate": 1.992115298430858e-05, "loss": 0.4104, "step": 1105 }, { "epoch": 0.0837046497327468, "grad_norm": 0.84375, "learning_rate": 1.9921003637654584e-05, "loss": 0.3069, "step": 1106 }, { "epoch": 0.08378033205619413, "grad_norm": 0.91015625, "learning_rate": 1.9920854150254126e-05, "loss": 0.3545, "step": 1107 }, { "epoch": 0.08385601437964145, "grad_norm": 1.03125, "learning_rate": 1.9920704522109328e-05, "loss": 0.3223, "step": 1108 }, { "epoch": 0.08393169670308878, "grad_norm": 0.8828125, "learning_rate": 1.992055475322231e-05, "loss": 0.342, "step": 1109 }, { "epoch": 0.08400737902653611, "grad_norm": 0.94921875, "learning_rate": 1.9920404843595204e-05, "loss": 0.3816, "step": 1110 }, { "epoch": 0.08408306134998345, "grad_norm": 0.9453125, "learning_rate": 1.9920254793230126e-05, "loss": 0.3626, "step": 1111 }, { "epoch": 0.08415874367343078, "grad_norm": 0.9296875, "learning_rate": 1.9920104602129214e-05, "loss": 0.3479, "step": 1112 }, { "epoch": 0.08423442599687811, "grad_norm": 0.92578125, "learning_rate": 1.991995427029459e-05, "loss": 0.3576, "step": 1113 }, { "epoch": 0.08431010832032543, "grad_norm": 0.953125, "learning_rate": 1.9919803797728393e-05, "loss": 0.3229, "step": 1114 }, { "epoch": 0.08438579064377276, "grad_norm": 0.8984375, "learning_rate": 1.991965318443276e-05, "loss": 0.3417, "step": 1115 }, { "epoch": 0.08446147296722009, "grad_norm": 0.875, "learning_rate": 1.9919502430409818e-05, "loss": 0.3444, "step": 1116 }, { "epoch": 0.08453715529066742, "grad_norm": 0.88671875, "learning_rate": 1.9919351535661712e-05, "loss": 0.3481, "step": 1117 }, { "epoch": 0.08461283761411476, "grad_norm": 0.9140625, "learning_rate": 1.9919200500190588e-05, "loss": 0.3171, "step": 1118 }, { "epoch": 0.08468851993756209, "grad_norm": 0.91796875, "learning_rate": 1.9919049323998577e-05, "loss": 0.2945, "step": 1119 }, { "epoch": 0.08476420226100942, "grad_norm": 0.84765625, "learning_rate": 1.9918898007087834e-05, "loss": 0.3139, "step": 1120 }, { "epoch": 0.08483988458445674, "grad_norm": 0.8828125, "learning_rate": 1.9918746549460497e-05, "loss": 0.3494, "step": 1121 }, { "epoch": 0.08491556690790407, "grad_norm": 0.99609375, "learning_rate": 1.9918594951118724e-05, "loss": 0.3829, "step": 1122 }, { "epoch": 0.0849912492313514, "grad_norm": 0.8984375, "learning_rate": 1.9918443212064656e-05, "loss": 0.35, "step": 1123 }, { "epoch": 0.08506693155479873, "grad_norm": 0.90234375, "learning_rate": 1.9918291332300455e-05, "loss": 0.3048, "step": 1124 }, { "epoch": 0.08514261387824607, "grad_norm": 0.9296875, "learning_rate": 1.9918139311828266e-05, "loss": 0.3697, "step": 1125 }, { "epoch": 0.0852182962016934, "grad_norm": 0.96484375, "learning_rate": 1.9917987150650257e-05, "loss": 0.4194, "step": 1126 }, { "epoch": 0.08529397852514072, "grad_norm": 0.96875, "learning_rate": 1.9917834848768573e-05, "loss": 0.362, "step": 1127 }, { "epoch": 0.08536966084858805, "grad_norm": 0.90625, "learning_rate": 1.9917682406185388e-05, "loss": 0.339, "step": 1128 }, { "epoch": 0.08544534317203538, "grad_norm": 0.94921875, "learning_rate": 1.9917529822902855e-05, "loss": 0.3533, "step": 1129 }, { "epoch": 0.08552102549548271, "grad_norm": 0.9609375, "learning_rate": 1.9917377098923145e-05, "loss": 0.3551, "step": 1130 }, { "epoch": 0.08559670781893004, "grad_norm": 0.953125, "learning_rate": 1.9917224234248424e-05, "loss": 0.3815, "step": 1131 }, { "epoch": 0.08567239014237737, "grad_norm": 0.890625, "learning_rate": 1.9917071228880852e-05, "loss": 0.3185, "step": 1132 }, { "epoch": 0.0857480724658247, "grad_norm": 0.87109375, "learning_rate": 1.9916918082822612e-05, "loss": 0.3135, "step": 1133 }, { "epoch": 0.08582375478927202, "grad_norm": 0.85546875, "learning_rate": 1.991676479607587e-05, "loss": 0.3119, "step": 1134 }, { "epoch": 0.08589943711271936, "grad_norm": 4.78125, "learning_rate": 1.99166113686428e-05, "loss": 0.6268, "step": 1135 }, { "epoch": 0.08597511943616669, "grad_norm": 0.96875, "learning_rate": 1.991645780052558e-05, "loss": 0.3447, "step": 1136 }, { "epoch": 0.08605080175961402, "grad_norm": 0.91015625, "learning_rate": 1.991630409172639e-05, "loss": 0.3156, "step": 1137 }, { "epoch": 0.08612648408306135, "grad_norm": 1.0078125, "learning_rate": 1.991615024224741e-05, "loss": 0.3634, "step": 1138 }, { "epoch": 0.08620216640650868, "grad_norm": 0.92578125, "learning_rate": 1.991599625209082e-05, "loss": 0.3391, "step": 1139 }, { "epoch": 0.08627784872995602, "grad_norm": 0.953125, "learning_rate": 1.991584212125881e-05, "loss": 0.3365, "step": 1140 }, { "epoch": 0.08635353105340333, "grad_norm": 0.921875, "learning_rate": 1.991568784975356e-05, "loss": 0.3585, "step": 1141 }, { "epoch": 0.08642921337685067, "grad_norm": 1.0078125, "learning_rate": 1.9915533437577264e-05, "loss": 0.4057, "step": 1142 }, { "epoch": 0.086504895700298, "grad_norm": 1.0, "learning_rate": 1.991537888473211e-05, "loss": 0.3827, "step": 1143 }, { "epoch": 0.08658057802374533, "grad_norm": 1.015625, "learning_rate": 1.991522419122029e-05, "loss": 0.3639, "step": 1144 }, { "epoch": 0.08665626034719266, "grad_norm": 1.046875, "learning_rate": 1.9915069357044005e-05, "loss": 0.3318, "step": 1145 }, { "epoch": 0.08673194267064, "grad_norm": 0.9453125, "learning_rate": 1.991491438220544e-05, "loss": 0.3712, "step": 1146 }, { "epoch": 0.08680762499408731, "grad_norm": 0.875, "learning_rate": 1.99147592667068e-05, "loss": 0.3041, "step": 1147 }, { "epoch": 0.08688330731753464, "grad_norm": 0.9609375, "learning_rate": 1.991460401055029e-05, "loss": 0.3481, "step": 1148 }, { "epoch": 0.08695898964098198, "grad_norm": 0.8671875, "learning_rate": 1.9914448613738107e-05, "loss": 0.3194, "step": 1149 }, { "epoch": 0.08703467196442931, "grad_norm": 0.90234375, "learning_rate": 1.991429307627245e-05, "loss": 0.3523, "step": 1150 }, { "epoch": 0.08711035428787664, "grad_norm": 3.359375, "learning_rate": 1.9914137398155542e-05, "loss": 0.5433, "step": 1151 }, { "epoch": 0.08718603661132397, "grad_norm": 0.89453125, "learning_rate": 1.9913981579389578e-05, "loss": 0.3393, "step": 1152 }, { "epoch": 0.0872617189347713, "grad_norm": 2.078125, "learning_rate": 1.991382561997677e-05, "loss": 0.6192, "step": 1153 }, { "epoch": 0.08733740125821862, "grad_norm": 1.0, "learning_rate": 1.9913669519919337e-05, "loss": 0.3554, "step": 1154 }, { "epoch": 0.08741308358166595, "grad_norm": 0.91015625, "learning_rate": 1.9913513279219485e-05, "loss": 0.3667, "step": 1155 }, { "epoch": 0.08748876590511329, "grad_norm": 0.87109375, "learning_rate": 1.991335689787944e-05, "loss": 0.3198, "step": 1156 }, { "epoch": 0.08756444822856062, "grad_norm": 0.921875, "learning_rate": 1.991320037590141e-05, "loss": 0.363, "step": 1157 }, { "epoch": 0.08764013055200795, "grad_norm": 0.953125, "learning_rate": 1.9913043713287622e-05, "loss": 0.3629, "step": 1158 }, { "epoch": 0.08771581287545528, "grad_norm": 0.88671875, "learning_rate": 1.99128869100403e-05, "loss": 0.3113, "step": 1159 }, { "epoch": 0.0877914951989026, "grad_norm": 1.015625, "learning_rate": 1.9912729966161666e-05, "loss": 0.3086, "step": 1160 }, { "epoch": 0.08786717752234993, "grad_norm": 2.765625, "learning_rate": 1.9912572881653945e-05, "loss": 0.5991, "step": 1161 }, { "epoch": 0.08794285984579726, "grad_norm": 0.87109375, "learning_rate": 1.9912415656519364e-05, "loss": 0.3477, "step": 1162 }, { "epoch": 0.0880185421692446, "grad_norm": 0.8984375, "learning_rate": 1.9912258290760164e-05, "loss": 0.3262, "step": 1163 }, { "epoch": 0.08809422449269193, "grad_norm": 2.46875, "learning_rate": 1.9912100784378566e-05, "loss": 0.605, "step": 1164 }, { "epoch": 0.08816990681613926, "grad_norm": 0.8984375, "learning_rate": 1.991194313737681e-05, "loss": 0.3336, "step": 1165 }, { "epoch": 0.08824558913958659, "grad_norm": 0.9609375, "learning_rate": 1.991178534975713e-05, "loss": 0.3727, "step": 1166 }, { "epoch": 0.08832127146303391, "grad_norm": 0.9296875, "learning_rate": 1.9911627421521763e-05, "loss": 0.3434, "step": 1167 }, { "epoch": 0.08839695378648124, "grad_norm": 0.89453125, "learning_rate": 1.9911469352672958e-05, "loss": 0.3453, "step": 1168 }, { "epoch": 0.08847263610992857, "grad_norm": 0.88671875, "learning_rate": 1.9911311143212946e-05, "loss": 0.3155, "step": 1169 }, { "epoch": 0.0885483184333759, "grad_norm": 1.0, "learning_rate": 1.991115279314398e-05, "loss": 0.3798, "step": 1170 }, { "epoch": 0.08862400075682324, "grad_norm": 0.94140625, "learning_rate": 1.9910994302468306e-05, "loss": 0.3602, "step": 1171 }, { "epoch": 0.08869968308027057, "grad_norm": 0.83203125, "learning_rate": 1.9910835671188167e-05, "loss": 0.2927, "step": 1172 }, { "epoch": 0.08877536540371789, "grad_norm": 0.83203125, "learning_rate": 1.9910676899305814e-05, "loss": 0.3186, "step": 1173 }, { "epoch": 0.08885104772716522, "grad_norm": 0.953125, "learning_rate": 1.9910517986823506e-05, "loss": 0.3651, "step": 1174 }, { "epoch": 0.08892673005061255, "grad_norm": 0.9609375, "learning_rate": 1.991035893374349e-05, "loss": 0.3578, "step": 1175 }, { "epoch": 0.08900241237405988, "grad_norm": 0.98828125, "learning_rate": 1.991019974006803e-05, "loss": 0.3729, "step": 1176 }, { "epoch": 0.08907809469750722, "grad_norm": 0.953125, "learning_rate": 1.9910040405799377e-05, "loss": 0.3555, "step": 1177 }, { "epoch": 0.08915377702095455, "grad_norm": 0.85546875, "learning_rate": 1.9909880930939796e-05, "loss": 0.32, "step": 1178 }, { "epoch": 0.08922945934440188, "grad_norm": 0.98046875, "learning_rate": 1.9909721315491547e-05, "loss": 0.3806, "step": 1179 }, { "epoch": 0.0893051416678492, "grad_norm": 0.92578125, "learning_rate": 1.9909561559456896e-05, "loss": 0.2983, "step": 1180 }, { "epoch": 0.08938082399129653, "grad_norm": 0.94140625, "learning_rate": 1.9909401662838107e-05, "loss": 0.3284, "step": 1181 }, { "epoch": 0.08945650631474386, "grad_norm": 0.96484375, "learning_rate": 1.9909241625637452e-05, "loss": 0.3312, "step": 1182 }, { "epoch": 0.08953218863819119, "grad_norm": 0.9453125, "learning_rate": 1.9909081447857202e-05, "loss": 0.3449, "step": 1183 }, { "epoch": 0.08960787096163852, "grad_norm": 0.890625, "learning_rate": 1.9908921129499624e-05, "loss": 0.3488, "step": 1184 }, { "epoch": 0.08968355328508586, "grad_norm": 0.890625, "learning_rate": 1.9908760670566994e-05, "loss": 0.3126, "step": 1185 }, { "epoch": 0.08975923560853319, "grad_norm": 0.8671875, "learning_rate": 1.9908600071061593e-05, "loss": 0.3421, "step": 1186 }, { "epoch": 0.0898349179319805, "grad_norm": 0.84765625, "learning_rate": 1.9908439330985697e-05, "loss": 0.3024, "step": 1187 }, { "epoch": 0.08991060025542784, "grad_norm": 0.96484375, "learning_rate": 1.9908278450341584e-05, "loss": 0.3767, "step": 1188 }, { "epoch": 0.08998628257887517, "grad_norm": 0.91796875, "learning_rate": 1.9908117429131536e-05, "loss": 0.33, "step": 1189 }, { "epoch": 0.0900619649023225, "grad_norm": 0.9140625, "learning_rate": 1.990795626735784e-05, "loss": 0.3436, "step": 1190 }, { "epoch": 0.09013764722576983, "grad_norm": 0.828125, "learning_rate": 1.9907794965022783e-05, "loss": 0.2992, "step": 1191 }, { "epoch": 0.09021332954921717, "grad_norm": 0.9140625, "learning_rate": 1.9907633522128655e-05, "loss": 0.33, "step": 1192 }, { "epoch": 0.09028901187266448, "grad_norm": 0.89453125, "learning_rate": 1.9907471938677737e-05, "loss": 0.3401, "step": 1193 }, { "epoch": 0.09036469419611182, "grad_norm": 0.95703125, "learning_rate": 1.9907310214672334e-05, "loss": 0.3444, "step": 1194 }, { "epoch": 0.09044037651955915, "grad_norm": 1.0703125, "learning_rate": 1.9907148350114732e-05, "loss": 0.3192, "step": 1195 }, { "epoch": 0.09051605884300648, "grad_norm": 0.921875, "learning_rate": 1.990698634500723e-05, "loss": 0.3068, "step": 1196 }, { "epoch": 0.09059174116645381, "grad_norm": 0.9921875, "learning_rate": 1.990682419935212e-05, "loss": 0.3781, "step": 1197 }, { "epoch": 0.09066742348990114, "grad_norm": 0.90625, "learning_rate": 1.9906661913151716e-05, "loss": 0.3621, "step": 1198 }, { "epoch": 0.09074310581334848, "grad_norm": 3.421875, "learning_rate": 1.9906499486408308e-05, "loss": 0.6082, "step": 1199 }, { "epoch": 0.0908187881367958, "grad_norm": 2.609375, "learning_rate": 1.9906336919124204e-05, "loss": 0.5052, "step": 1200 }, { "epoch": 0.09089447046024313, "grad_norm": 0.9609375, "learning_rate": 1.9906174211301715e-05, "loss": 0.3025, "step": 1201 }, { "epoch": 0.09097015278369046, "grad_norm": 0.94921875, "learning_rate": 1.990601136294314e-05, "loss": 0.3262, "step": 1202 }, { "epoch": 0.09104583510713779, "grad_norm": 0.87109375, "learning_rate": 1.99058483740508e-05, "loss": 0.3044, "step": 1203 }, { "epoch": 0.09112151743058512, "grad_norm": 0.921875, "learning_rate": 1.9905685244627e-05, "loss": 0.3111, "step": 1204 }, { "epoch": 0.09119719975403245, "grad_norm": 1.03125, "learning_rate": 1.9905521974674055e-05, "loss": 0.3582, "step": 1205 }, { "epoch": 0.09127288207747977, "grad_norm": 1.0078125, "learning_rate": 1.990535856419428e-05, "loss": 0.3224, "step": 1206 }, { "epoch": 0.0913485644009271, "grad_norm": 0.8828125, "learning_rate": 1.9905195013189997e-05, "loss": 0.3416, "step": 1207 }, { "epoch": 0.09142424672437444, "grad_norm": 0.9453125, "learning_rate": 1.9905031321663525e-05, "loss": 0.3292, "step": 1208 }, { "epoch": 0.09149992904782177, "grad_norm": 0.96875, "learning_rate": 1.9904867489617186e-05, "loss": 0.3345, "step": 1209 }, { "epoch": 0.0915756113712691, "grad_norm": 0.90234375, "learning_rate": 1.9904703517053306e-05, "loss": 0.3485, "step": 1210 }, { "epoch": 0.09165129369471643, "grad_norm": 0.93359375, "learning_rate": 1.990453940397421e-05, "loss": 0.3735, "step": 1211 }, { "epoch": 0.09172697601816376, "grad_norm": 0.87890625, "learning_rate": 1.9904375150382223e-05, "loss": 0.3538, "step": 1212 }, { "epoch": 0.09180265834161108, "grad_norm": 0.89453125, "learning_rate": 1.990421075627968e-05, "loss": 0.326, "step": 1213 }, { "epoch": 0.09187834066505841, "grad_norm": 0.953125, "learning_rate": 1.9904046221668912e-05, "loss": 0.3567, "step": 1214 }, { "epoch": 0.09195402298850575, "grad_norm": 0.9453125, "learning_rate": 1.990388154655225e-05, "loss": 0.358, "step": 1215 }, { "epoch": 0.09202970531195308, "grad_norm": 1.171875, "learning_rate": 1.9903716730932036e-05, "loss": 0.3086, "step": 1216 }, { "epoch": 0.09210538763540041, "grad_norm": 0.89453125, "learning_rate": 1.9903551774810605e-05, "loss": 0.3333, "step": 1217 }, { "epoch": 0.09218106995884774, "grad_norm": 0.88671875, "learning_rate": 1.990338667819029e-05, "loss": 0.2877, "step": 1218 }, { "epoch": 0.09225675228229506, "grad_norm": 0.89453125, "learning_rate": 1.9903221441073448e-05, "loss": 0.3373, "step": 1219 }, { "epoch": 0.09233243460574239, "grad_norm": 0.93359375, "learning_rate": 1.990305606346241e-05, "loss": 0.371, "step": 1220 }, { "epoch": 0.09240811692918972, "grad_norm": 0.92578125, "learning_rate": 1.990289054535953e-05, "loss": 0.3781, "step": 1221 }, { "epoch": 0.09248379925263706, "grad_norm": 0.875, "learning_rate": 1.9902724886767152e-05, "loss": 0.3388, "step": 1222 }, { "epoch": 0.09255948157608439, "grad_norm": 0.92578125, "learning_rate": 1.9902559087687628e-05, "loss": 0.3448, "step": 1223 }, { "epoch": 0.09263516389953172, "grad_norm": 0.93359375, "learning_rate": 1.9902393148123313e-05, "loss": 0.324, "step": 1224 }, { "epoch": 0.09271084622297905, "grad_norm": 0.9140625, "learning_rate": 1.9902227068076558e-05, "loss": 0.2996, "step": 1225 }, { "epoch": 0.09278652854642637, "grad_norm": 1.0078125, "learning_rate": 1.9902060847549716e-05, "loss": 0.3755, "step": 1226 }, { "epoch": 0.0928622108698737, "grad_norm": 0.92578125, "learning_rate": 1.990189448654515e-05, "loss": 0.3692, "step": 1227 }, { "epoch": 0.09293789319332103, "grad_norm": 1.078125, "learning_rate": 1.9901727985065216e-05, "loss": 0.3504, "step": 1228 }, { "epoch": 0.09301357551676837, "grad_norm": 0.92578125, "learning_rate": 1.9901561343112284e-05, "loss": 0.3251, "step": 1229 }, { "epoch": 0.0930892578402157, "grad_norm": 0.86328125, "learning_rate": 1.990139456068871e-05, "loss": 0.3505, "step": 1230 }, { "epoch": 0.09316494016366303, "grad_norm": 1.5859375, "learning_rate": 1.9901227637796862e-05, "loss": 0.3302, "step": 1231 }, { "epoch": 0.09324062248711036, "grad_norm": 1.3984375, "learning_rate": 1.990106057443911e-05, "loss": 0.3407, "step": 1232 }, { "epoch": 0.09331630481055768, "grad_norm": 0.8828125, "learning_rate": 1.990089337061782e-05, "loss": 0.3539, "step": 1233 }, { "epoch": 0.09339198713400501, "grad_norm": 0.8671875, "learning_rate": 1.990072602633537e-05, "loss": 0.3246, "step": 1234 }, { "epoch": 0.09346766945745234, "grad_norm": 0.90625, "learning_rate": 1.990055854159413e-05, "loss": 0.3173, "step": 1235 }, { "epoch": 0.09354335178089968, "grad_norm": 1.1640625, "learning_rate": 1.9900390916396478e-05, "loss": 0.3823, "step": 1236 }, { "epoch": 0.09361903410434701, "grad_norm": 0.9296875, "learning_rate": 1.990022315074479e-05, "loss": 0.3682, "step": 1237 }, { "epoch": 0.09369471642779434, "grad_norm": 0.953125, "learning_rate": 1.9900055244641448e-05, "loss": 0.3503, "step": 1238 }, { "epoch": 0.09377039875124166, "grad_norm": 0.90625, "learning_rate": 1.9899887198088833e-05, "loss": 0.3366, "step": 1239 }, { "epoch": 0.09384608107468899, "grad_norm": 0.96875, "learning_rate": 1.989971901108933e-05, "loss": 0.3666, "step": 1240 }, { "epoch": 0.09392176339813632, "grad_norm": 0.9453125, "learning_rate": 1.9899550683645325e-05, "loss": 0.3676, "step": 1241 }, { "epoch": 0.09399744572158365, "grad_norm": 0.90234375, "learning_rate": 1.9899382215759202e-05, "loss": 0.3201, "step": 1242 }, { "epoch": 0.09407312804503098, "grad_norm": 0.9375, "learning_rate": 1.9899213607433357e-05, "loss": 0.3366, "step": 1243 }, { "epoch": 0.09414881036847832, "grad_norm": 0.88671875, "learning_rate": 1.9899044858670178e-05, "loss": 0.3319, "step": 1244 }, { "epoch": 0.09422449269192565, "grad_norm": 0.85546875, "learning_rate": 1.989887596947206e-05, "loss": 0.3289, "step": 1245 }, { "epoch": 0.09430017501537297, "grad_norm": 0.890625, "learning_rate": 1.98987069398414e-05, "loss": 0.3422, "step": 1246 }, { "epoch": 0.0943758573388203, "grad_norm": 0.99609375, "learning_rate": 1.98985377697806e-05, "loss": 0.3863, "step": 1247 }, { "epoch": 0.09445153966226763, "grad_norm": 0.89453125, "learning_rate": 1.9898368459292047e-05, "loss": 0.3585, "step": 1248 }, { "epoch": 0.09452722198571496, "grad_norm": 0.9375, "learning_rate": 1.9898199008378155e-05, "loss": 0.3744, "step": 1249 }, { "epoch": 0.0946029043091623, "grad_norm": 0.92578125, "learning_rate": 1.9898029417041328e-05, "loss": 0.3215, "step": 1250 }, { "epoch": 0.09467858663260963, "grad_norm": 1.203125, "learning_rate": 1.9897859685283962e-05, "loss": 0.3393, "step": 1251 }, { "epoch": 0.09475426895605694, "grad_norm": 0.890625, "learning_rate": 1.9897689813108476e-05, "loss": 0.3376, "step": 1252 }, { "epoch": 0.09482995127950428, "grad_norm": 0.91796875, "learning_rate": 1.989751980051727e-05, "loss": 0.3296, "step": 1253 }, { "epoch": 0.09490563360295161, "grad_norm": 0.80078125, "learning_rate": 1.9897349647512762e-05, "loss": 0.2812, "step": 1254 }, { "epoch": 0.09498131592639894, "grad_norm": 0.93359375, "learning_rate": 1.9897179354097366e-05, "loss": 0.3848, "step": 1255 }, { "epoch": 0.09505699824984627, "grad_norm": 0.88671875, "learning_rate": 1.98970089202735e-05, "loss": 0.3526, "step": 1256 }, { "epoch": 0.0951326805732936, "grad_norm": 0.9921875, "learning_rate": 1.9896838346043574e-05, "loss": 0.3867, "step": 1257 }, { "epoch": 0.09520836289674094, "grad_norm": 0.91015625, "learning_rate": 1.9896667631410013e-05, "loss": 0.3833, "step": 1258 }, { "epoch": 0.09528404522018825, "grad_norm": 0.875, "learning_rate": 1.989649677637524e-05, "loss": 0.2948, "step": 1259 }, { "epoch": 0.09535972754363559, "grad_norm": 0.9296875, "learning_rate": 1.9896325780941677e-05, "loss": 0.3575, "step": 1260 }, { "epoch": 0.09543540986708292, "grad_norm": 0.92578125, "learning_rate": 1.9896154645111753e-05, "loss": 0.3494, "step": 1261 }, { "epoch": 0.09551109219053025, "grad_norm": 0.953125, "learning_rate": 1.9895983368887884e-05, "loss": 0.3281, "step": 1262 }, { "epoch": 0.09558677451397758, "grad_norm": 1.703125, "learning_rate": 1.9895811952272515e-05, "loss": 0.3598, "step": 1263 }, { "epoch": 0.09566245683742491, "grad_norm": 0.80859375, "learning_rate": 1.9895640395268073e-05, "loss": 0.297, "step": 1264 }, { "epoch": 0.09573813916087223, "grad_norm": 0.87109375, "learning_rate": 1.989546869787699e-05, "loss": 0.3141, "step": 1265 }, { "epoch": 0.09581382148431956, "grad_norm": 0.8515625, "learning_rate": 1.98952968601017e-05, "loss": 0.3237, "step": 1266 }, { "epoch": 0.0958895038077669, "grad_norm": 1.0078125, "learning_rate": 1.9895124881944644e-05, "loss": 0.3791, "step": 1267 }, { "epoch": 0.09596518613121423, "grad_norm": 0.8828125, "learning_rate": 1.989495276340826e-05, "loss": 0.3047, "step": 1268 }, { "epoch": 0.09604086845466156, "grad_norm": 1.046875, "learning_rate": 1.9894780504494995e-05, "loss": 0.3212, "step": 1269 }, { "epoch": 0.09611655077810889, "grad_norm": 0.96875, "learning_rate": 1.9894608105207285e-05, "loss": 0.3588, "step": 1270 }, { "epoch": 0.09619223310155622, "grad_norm": 1.140625, "learning_rate": 1.989443556554758e-05, "loss": 0.3152, "step": 1271 }, { "epoch": 0.09626791542500354, "grad_norm": 0.90234375, "learning_rate": 1.9894262885518326e-05, "loss": 0.3489, "step": 1272 }, { "epoch": 0.09634359774845087, "grad_norm": 0.98046875, "learning_rate": 1.9894090065121972e-05, "loss": 0.3967, "step": 1273 }, { "epoch": 0.0964192800718982, "grad_norm": 0.859375, "learning_rate": 1.9893917104360977e-05, "loss": 0.3264, "step": 1274 }, { "epoch": 0.09649496239534554, "grad_norm": 0.96484375, "learning_rate": 1.9893744003237786e-05, "loss": 0.3823, "step": 1275 }, { "epoch": 0.09657064471879287, "grad_norm": 0.91015625, "learning_rate": 1.989357076175486e-05, "loss": 0.3194, "step": 1276 }, { "epoch": 0.0966463270422402, "grad_norm": 0.86328125, "learning_rate": 1.9893397379914653e-05, "loss": 0.3245, "step": 1277 }, { "epoch": 0.09672200936568753, "grad_norm": 0.91796875, "learning_rate": 1.9893223857719627e-05, "loss": 0.3, "step": 1278 }, { "epoch": 0.09679769168913485, "grad_norm": 1.046875, "learning_rate": 1.989305019517224e-05, "loss": 0.4236, "step": 1279 }, { "epoch": 0.09687337401258218, "grad_norm": 0.98828125, "learning_rate": 1.9892876392274966e-05, "loss": 0.3957, "step": 1280 }, { "epoch": 0.09694905633602952, "grad_norm": 0.859375, "learning_rate": 1.989270244903026e-05, "loss": 0.3111, "step": 1281 }, { "epoch": 0.09702473865947685, "grad_norm": 0.921875, "learning_rate": 1.989252836544059e-05, "loss": 0.373, "step": 1282 }, { "epoch": 0.09710042098292418, "grad_norm": 0.98828125, "learning_rate": 1.989235414150843e-05, "loss": 0.3485, "step": 1283 }, { "epoch": 0.09717610330637151, "grad_norm": 0.94921875, "learning_rate": 1.9892179777236255e-05, "loss": 0.393, "step": 1284 }, { "epoch": 0.09725178562981883, "grad_norm": 0.87890625, "learning_rate": 1.989200527262653e-05, "loss": 0.3259, "step": 1285 }, { "epoch": 0.09732746795326616, "grad_norm": 0.94140625, "learning_rate": 1.9891830627681738e-05, "loss": 0.3893, "step": 1286 }, { "epoch": 0.0974031502767135, "grad_norm": 0.953125, "learning_rate": 1.989165584240435e-05, "loss": 0.3573, "step": 1287 }, { "epoch": 0.09747883260016083, "grad_norm": 1.015625, "learning_rate": 1.9891480916796854e-05, "loss": 0.4151, "step": 1288 }, { "epoch": 0.09755451492360816, "grad_norm": 0.89453125, "learning_rate": 1.9891305850861723e-05, "loss": 0.3307, "step": 1289 }, { "epoch": 0.09763019724705549, "grad_norm": 0.8828125, "learning_rate": 1.9891130644601447e-05, "loss": 0.3355, "step": 1290 }, { "epoch": 0.09770587957050282, "grad_norm": 0.92578125, "learning_rate": 1.9890955298018507e-05, "loss": 0.3669, "step": 1291 }, { "epoch": 0.09778156189395014, "grad_norm": 0.8984375, "learning_rate": 1.9890779811115395e-05, "loss": 0.3337, "step": 1292 }, { "epoch": 0.09785724421739747, "grad_norm": 3.703125, "learning_rate": 1.9890604183894597e-05, "loss": 0.6069, "step": 1293 }, { "epoch": 0.0979329265408448, "grad_norm": 0.8984375, "learning_rate": 1.9890428416358603e-05, "loss": 0.3069, "step": 1294 }, { "epoch": 0.09800860886429213, "grad_norm": 0.9453125, "learning_rate": 1.9890252508509915e-05, "loss": 0.3752, "step": 1295 }, { "epoch": 0.09808429118773947, "grad_norm": 0.91796875, "learning_rate": 1.989007646035102e-05, "loss": 0.3383, "step": 1296 }, { "epoch": 0.0981599735111868, "grad_norm": 0.875, "learning_rate": 1.9889900271884417e-05, "loss": 0.3412, "step": 1297 }, { "epoch": 0.09823565583463412, "grad_norm": 0.921875, "learning_rate": 1.988972394311261e-05, "loss": 0.3468, "step": 1298 }, { "epoch": 0.09831133815808145, "grad_norm": 0.87890625, "learning_rate": 1.9889547474038094e-05, "loss": 0.3162, "step": 1299 }, { "epoch": 0.09838702048152878, "grad_norm": 0.90625, "learning_rate": 1.9889370864663377e-05, "loss": 0.3562, "step": 1300 }, { "epoch": 0.09846270280497611, "grad_norm": 0.91796875, "learning_rate": 1.9889194114990963e-05, "loss": 0.3937, "step": 1301 }, { "epoch": 0.09853838512842344, "grad_norm": 0.98828125, "learning_rate": 1.9889017225023364e-05, "loss": 0.3661, "step": 1302 }, { "epoch": 0.09861406745187078, "grad_norm": 0.87890625, "learning_rate": 1.9888840194763084e-05, "loss": 0.3365, "step": 1303 }, { "epoch": 0.09868974977531811, "grad_norm": 0.8828125, "learning_rate": 1.9888663024212634e-05, "loss": 0.3522, "step": 1304 }, { "epoch": 0.09876543209876543, "grad_norm": 0.9140625, "learning_rate": 1.988848571337453e-05, "loss": 0.3576, "step": 1305 }, { "epoch": 0.09884111442221276, "grad_norm": 0.86328125, "learning_rate": 1.9888308262251286e-05, "loss": 0.3235, "step": 1306 }, { "epoch": 0.09891679674566009, "grad_norm": 0.91015625, "learning_rate": 1.988813067084542e-05, "loss": 0.3622, "step": 1307 }, { "epoch": 0.09899247906910742, "grad_norm": 0.9765625, "learning_rate": 1.9887952939159454e-05, "loss": 0.3504, "step": 1308 }, { "epoch": 0.09906816139255475, "grad_norm": 0.87890625, "learning_rate": 1.9887775067195908e-05, "loss": 0.3198, "step": 1309 }, { "epoch": 0.09914384371600209, "grad_norm": 0.890625, "learning_rate": 1.9887597054957304e-05, "loss": 0.3141, "step": 1310 }, { "epoch": 0.0992195260394494, "grad_norm": 0.91015625, "learning_rate": 1.988741890244617e-05, "loss": 0.3548, "step": 1311 }, { "epoch": 0.09929520836289674, "grad_norm": 0.83203125, "learning_rate": 1.9887240609665026e-05, "loss": 0.3194, "step": 1312 }, { "epoch": 0.09937089068634407, "grad_norm": 0.875, "learning_rate": 1.988706217661641e-05, "loss": 0.3017, "step": 1313 }, { "epoch": 0.0994465730097914, "grad_norm": 0.89453125, "learning_rate": 1.988688360330285e-05, "loss": 0.3521, "step": 1314 }, { "epoch": 0.09952225533323873, "grad_norm": 0.8515625, "learning_rate": 1.988670488972688e-05, "loss": 0.3294, "step": 1315 }, { "epoch": 0.09959793765668606, "grad_norm": 0.90625, "learning_rate": 1.9886526035891034e-05, "loss": 0.3689, "step": 1316 }, { "epoch": 0.0996736199801334, "grad_norm": 0.87109375, "learning_rate": 1.9886347041797852e-05, "loss": 0.3109, "step": 1317 }, { "epoch": 0.09974930230358071, "grad_norm": 0.921875, "learning_rate": 1.988616790744987e-05, "loss": 0.3409, "step": 1318 }, { "epoch": 0.09982498462702805, "grad_norm": 0.93359375, "learning_rate": 1.9885988632849634e-05, "loss": 0.3369, "step": 1319 }, { "epoch": 0.09990066695047538, "grad_norm": 0.93359375, "learning_rate": 1.988580921799968e-05, "loss": 0.3599, "step": 1320 }, { "epoch": 0.09997634927392271, "grad_norm": 0.9296875, "learning_rate": 1.9885629662902558e-05, "loss": 0.376, "step": 1321 }, { "epoch": 0.10005203159737004, "grad_norm": 1.28125, "learning_rate": 1.9885449967560818e-05, "loss": 0.35, "step": 1322 }, { "epoch": 0.10005203159737004, "eval_loss": 0.3635263442993164, "eval_runtime": 83.398, "eval_samples_per_second": 58.299, "eval_steps_per_second": 58.299, "step": 1322 }, { "epoch": 0.10012771392081737, "grad_norm": 0.99609375, "learning_rate": 1.9885270131977005e-05, "loss": 0.3813, "step": 1323 }, { "epoch": 0.1002033962442647, "grad_norm": 0.953125, "learning_rate": 1.988509015615367e-05, "loss": 0.3917, "step": 1324 }, { "epoch": 0.10027907856771202, "grad_norm": 0.9140625, "learning_rate": 1.988491004009337e-05, "loss": 0.3714, "step": 1325 }, { "epoch": 0.10035476089115936, "grad_norm": 0.9453125, "learning_rate": 1.9884729783798657e-05, "loss": 0.3679, "step": 1326 }, { "epoch": 0.10043044321460669, "grad_norm": 0.96875, "learning_rate": 1.988454938727209e-05, "loss": 0.3629, "step": 1327 }, { "epoch": 0.10050612553805402, "grad_norm": 0.90234375, "learning_rate": 1.9884368850516226e-05, "loss": 0.3793, "step": 1328 }, { "epoch": 0.10058180786150135, "grad_norm": 0.96484375, "learning_rate": 1.9884188173533628e-05, "loss": 0.2992, "step": 1329 }, { "epoch": 0.10065749018494868, "grad_norm": 1.078125, "learning_rate": 1.988400735632686e-05, "loss": 0.3598, "step": 1330 }, { "epoch": 0.100733172508396, "grad_norm": 0.94140625, "learning_rate": 1.9883826398898486e-05, "loss": 0.3551, "step": 1331 }, { "epoch": 0.10080885483184333, "grad_norm": 0.85546875, "learning_rate": 1.988364530125107e-05, "loss": 0.3215, "step": 1332 }, { "epoch": 0.10088453715529067, "grad_norm": 0.921875, "learning_rate": 1.9883464063387187e-05, "loss": 0.3456, "step": 1333 }, { "epoch": 0.100960219478738, "grad_norm": 0.921875, "learning_rate": 1.9883282685309405e-05, "loss": 0.3619, "step": 1334 }, { "epoch": 0.10103590180218533, "grad_norm": 0.98828125, "learning_rate": 1.98831011670203e-05, "loss": 0.3638, "step": 1335 }, { "epoch": 0.10111158412563266, "grad_norm": 0.8984375, "learning_rate": 1.9882919508522445e-05, "loss": 0.3056, "step": 1336 }, { "epoch": 0.10118726644908, "grad_norm": 0.90234375, "learning_rate": 1.9882737709818414e-05, "loss": 0.2907, "step": 1337 }, { "epoch": 0.10126294877252731, "grad_norm": 1.09375, "learning_rate": 1.9882555770910793e-05, "loss": 0.3535, "step": 1338 }, { "epoch": 0.10133863109597464, "grad_norm": 1.046875, "learning_rate": 1.9882373691802156e-05, "loss": 0.3805, "step": 1339 }, { "epoch": 0.10141431341942198, "grad_norm": 0.9140625, "learning_rate": 1.988219147249509e-05, "loss": 0.3563, "step": 1340 }, { "epoch": 0.10148999574286931, "grad_norm": 0.88671875, "learning_rate": 1.988200911299218e-05, "loss": 0.2908, "step": 1341 }, { "epoch": 0.10156567806631664, "grad_norm": 0.90234375, "learning_rate": 1.9881826613296012e-05, "loss": 0.3494, "step": 1342 }, { "epoch": 0.10164136038976397, "grad_norm": 0.90234375, "learning_rate": 1.9881643973409176e-05, "loss": 0.3347, "step": 1343 }, { "epoch": 0.10171704271321129, "grad_norm": 0.83984375, "learning_rate": 1.9881461193334263e-05, "loss": 0.3428, "step": 1344 }, { "epoch": 0.10179272503665862, "grad_norm": 0.9765625, "learning_rate": 1.9881278273073866e-05, "loss": 0.4033, "step": 1345 }, { "epoch": 0.10186840736010595, "grad_norm": 0.9609375, "learning_rate": 1.988109521263058e-05, "loss": 0.4084, "step": 1346 }, { "epoch": 0.10194408968355329, "grad_norm": 0.9375, "learning_rate": 1.9880912012007e-05, "loss": 0.4099, "step": 1347 }, { "epoch": 0.10201977200700062, "grad_norm": 0.9765625, "learning_rate": 1.988072867120573e-05, "loss": 0.3799, "step": 1348 }, { "epoch": 0.10209545433044795, "grad_norm": 0.953125, "learning_rate": 1.9880545190229367e-05, "loss": 0.3609, "step": 1349 }, { "epoch": 0.10217113665389528, "grad_norm": 0.87109375, "learning_rate": 1.9880361569080516e-05, "loss": 0.3397, "step": 1350 }, { "epoch": 0.1022468189773426, "grad_norm": 0.9453125, "learning_rate": 1.988017780776178e-05, "loss": 0.3289, "step": 1351 }, { "epoch": 0.10232250130078993, "grad_norm": 0.91015625, "learning_rate": 1.9879993906275767e-05, "loss": 0.3632, "step": 1352 }, { "epoch": 0.10239818362423726, "grad_norm": 0.8671875, "learning_rate": 1.987980986462508e-05, "loss": 0.3141, "step": 1353 }, { "epoch": 0.1024738659476846, "grad_norm": 0.83984375, "learning_rate": 1.987962568281234e-05, "loss": 0.3365, "step": 1354 }, { "epoch": 0.10254954827113193, "grad_norm": 0.86328125, "learning_rate": 1.987944136084016e-05, "loss": 0.335, "step": 1355 }, { "epoch": 0.10262523059457926, "grad_norm": 0.875, "learning_rate": 1.9879256898711147e-05, "loss": 0.3359, "step": 1356 }, { "epoch": 0.10270091291802658, "grad_norm": 0.9140625, "learning_rate": 1.987907229642792e-05, "loss": 0.3863, "step": 1357 }, { "epoch": 0.10277659524147391, "grad_norm": 0.828125, "learning_rate": 1.98788875539931e-05, "loss": 0.3004, "step": 1358 }, { "epoch": 0.10285227756492124, "grad_norm": 0.87890625, "learning_rate": 1.9878702671409307e-05, "loss": 0.3263, "step": 1359 }, { "epoch": 0.10292795988836857, "grad_norm": 0.9140625, "learning_rate": 1.987851764867917e-05, "loss": 0.3652, "step": 1360 }, { "epoch": 0.1030036422118159, "grad_norm": 0.87890625, "learning_rate": 1.9878332485805298e-05, "loss": 0.349, "step": 1361 }, { "epoch": 0.10307932453526324, "grad_norm": 0.7890625, "learning_rate": 1.9878147182790334e-05, "loss": 0.2997, "step": 1362 }, { "epoch": 0.10315500685871057, "grad_norm": 0.8984375, "learning_rate": 1.9877961739636903e-05, "loss": 0.3623, "step": 1363 }, { "epoch": 0.10323068918215789, "grad_norm": 1.03125, "learning_rate": 1.9877776156347627e-05, "loss": 0.3701, "step": 1364 }, { "epoch": 0.10330637150560522, "grad_norm": 0.9609375, "learning_rate": 1.987759043292515e-05, "loss": 0.3864, "step": 1365 }, { "epoch": 0.10338205382905255, "grad_norm": 0.84765625, "learning_rate": 1.98774045693721e-05, "loss": 0.3243, "step": 1366 }, { "epoch": 0.10345773615249988, "grad_norm": 0.86328125, "learning_rate": 1.987721856569112e-05, "loss": 0.3505, "step": 1367 }, { "epoch": 0.10353341847594721, "grad_norm": 0.89453125, "learning_rate": 1.9877032421884838e-05, "loss": 0.3248, "step": 1368 }, { "epoch": 0.10360910079939455, "grad_norm": 2.03125, "learning_rate": 1.987684613795591e-05, "loss": 0.5852, "step": 1369 }, { "epoch": 0.10368478312284188, "grad_norm": 0.8046875, "learning_rate": 1.9876659713906964e-05, "loss": 0.2823, "step": 1370 }, { "epoch": 0.1037604654462892, "grad_norm": 0.859375, "learning_rate": 1.987647314974065e-05, "loss": 0.2943, "step": 1371 }, { "epoch": 0.10383614776973653, "grad_norm": 0.8828125, "learning_rate": 1.987628644545962e-05, "loss": 0.328, "step": 1372 }, { "epoch": 0.10391183009318386, "grad_norm": 0.9140625, "learning_rate": 1.9876099601066517e-05, "loss": 0.3398, "step": 1373 }, { "epoch": 0.10398751241663119, "grad_norm": 0.85546875, "learning_rate": 1.987591261656399e-05, "loss": 0.2904, "step": 1374 }, { "epoch": 0.10406319474007852, "grad_norm": 0.9140625, "learning_rate": 1.9875725491954698e-05, "loss": 0.3556, "step": 1375 }, { "epoch": 0.10413887706352586, "grad_norm": 1.0625, "learning_rate": 1.9875538227241295e-05, "loss": 0.4004, "step": 1376 }, { "epoch": 0.10421455938697317, "grad_norm": 0.95703125, "learning_rate": 1.9875350822426433e-05, "loss": 0.3746, "step": 1377 }, { "epoch": 0.1042902417104205, "grad_norm": 0.87890625, "learning_rate": 1.9875163277512772e-05, "loss": 0.3478, "step": 1378 }, { "epoch": 0.10436592403386784, "grad_norm": 2.046875, "learning_rate": 1.9874975592502972e-05, "loss": 0.6326, "step": 1379 }, { "epoch": 0.10444160635731517, "grad_norm": 0.91015625, "learning_rate": 1.9874787767399703e-05, "loss": 0.366, "step": 1380 }, { "epoch": 0.1045172886807625, "grad_norm": 0.9140625, "learning_rate": 1.9874599802205617e-05, "loss": 0.3144, "step": 1381 }, { "epoch": 0.10459297100420983, "grad_norm": 0.92578125, "learning_rate": 1.987441169692339e-05, "loss": 0.3168, "step": 1382 }, { "epoch": 0.10466865332765717, "grad_norm": 0.9140625, "learning_rate": 1.987422345155569e-05, "loss": 0.3612, "step": 1383 }, { "epoch": 0.10474433565110448, "grad_norm": 3.859375, "learning_rate": 1.9874035066105184e-05, "loss": 0.602, "step": 1384 }, { "epoch": 0.10482001797455182, "grad_norm": 0.97265625, "learning_rate": 1.9873846540574545e-05, "loss": 0.3752, "step": 1385 }, { "epoch": 0.10489570029799915, "grad_norm": 0.84765625, "learning_rate": 1.987365787496645e-05, "loss": 0.2665, "step": 1386 }, { "epoch": 0.10497138262144648, "grad_norm": 1.015625, "learning_rate": 1.9873469069283576e-05, "loss": 0.4033, "step": 1387 }, { "epoch": 0.10504706494489381, "grad_norm": 0.890625, "learning_rate": 1.98732801235286e-05, "loss": 0.3595, "step": 1388 }, { "epoch": 0.10512274726834114, "grad_norm": 0.91796875, "learning_rate": 1.9873091037704198e-05, "loss": 0.3922, "step": 1389 }, { "epoch": 0.10519842959178846, "grad_norm": 0.94140625, "learning_rate": 1.987290181181306e-05, "loss": 0.3881, "step": 1390 }, { "epoch": 0.1052741119152358, "grad_norm": 1.015625, "learning_rate": 1.9872712445857865e-05, "loss": 0.4403, "step": 1391 }, { "epoch": 0.10534979423868313, "grad_norm": 0.94140625, "learning_rate": 1.9872522939841304e-05, "loss": 0.3736, "step": 1392 }, { "epoch": 0.10542547656213046, "grad_norm": 0.92578125, "learning_rate": 1.9872333293766062e-05, "loss": 0.3414, "step": 1393 }, { "epoch": 0.10550115888557779, "grad_norm": 0.9140625, "learning_rate": 1.987214350763483e-05, "loss": 0.3647, "step": 1394 }, { "epoch": 0.10557684120902512, "grad_norm": 0.83203125, "learning_rate": 1.9871953581450304e-05, "loss": 0.3009, "step": 1395 }, { "epoch": 0.10565252353247245, "grad_norm": 0.98046875, "learning_rate": 1.9871763515215173e-05, "loss": 0.3876, "step": 1396 }, { "epoch": 0.10572820585591977, "grad_norm": 0.84765625, "learning_rate": 1.9871573308932138e-05, "loss": 0.3374, "step": 1397 }, { "epoch": 0.1058038881793671, "grad_norm": 0.8515625, "learning_rate": 1.9871382962603896e-05, "loss": 0.311, "step": 1398 }, { "epoch": 0.10587957050281444, "grad_norm": 0.94921875, "learning_rate": 1.9871192476233143e-05, "loss": 0.3521, "step": 1399 }, { "epoch": 0.10595525282626177, "grad_norm": 0.9375, "learning_rate": 1.987100184982259e-05, "loss": 0.3667, "step": 1400 }, { "epoch": 0.1060309351497091, "grad_norm": 0.90234375, "learning_rate": 1.9870811083374933e-05, "loss": 0.3336, "step": 1401 }, { "epoch": 0.10610661747315643, "grad_norm": 0.94921875, "learning_rate": 1.9870620176892884e-05, "loss": 0.4058, "step": 1402 }, { "epoch": 0.10618229979660375, "grad_norm": 0.87109375, "learning_rate": 1.9870429130379145e-05, "loss": 0.3295, "step": 1403 }, { "epoch": 0.10625798212005108, "grad_norm": 0.90234375, "learning_rate": 1.9870237943836436e-05, "loss": 0.3677, "step": 1404 }, { "epoch": 0.10633366444349841, "grad_norm": 0.90234375, "learning_rate": 1.987004661726746e-05, "loss": 0.3149, "step": 1405 }, { "epoch": 0.10640934676694574, "grad_norm": 0.93359375, "learning_rate": 1.9869855150674938e-05, "loss": 0.3735, "step": 1406 }, { "epoch": 0.10648502909039308, "grad_norm": 0.93359375, "learning_rate": 1.986966354406158e-05, "loss": 0.3692, "step": 1407 }, { "epoch": 0.10656071141384041, "grad_norm": 0.85546875, "learning_rate": 1.9869471797430112e-05, "loss": 0.3129, "step": 1408 }, { "epoch": 0.10663639373728774, "grad_norm": 0.9375, "learning_rate": 1.9869279910783244e-05, "loss": 0.3995, "step": 1409 }, { "epoch": 0.10671207606073506, "grad_norm": 0.890625, "learning_rate": 1.986908788412371e-05, "loss": 0.356, "step": 1410 }, { "epoch": 0.10678775838418239, "grad_norm": 0.9140625, "learning_rate": 1.9868895717454225e-05, "loss": 0.3734, "step": 1411 }, { "epoch": 0.10686344070762972, "grad_norm": 0.9375, "learning_rate": 1.986870341077752e-05, "loss": 0.3857, "step": 1412 }, { "epoch": 0.10693912303107705, "grad_norm": 0.91015625, "learning_rate": 1.9868510964096325e-05, "loss": 0.3592, "step": 1413 }, { "epoch": 0.10701480535452439, "grad_norm": 0.953125, "learning_rate": 1.986831837741336e-05, "loss": 0.388, "step": 1414 }, { "epoch": 0.10709048767797172, "grad_norm": 0.89453125, "learning_rate": 1.986812565073137e-05, "loss": 0.3413, "step": 1415 }, { "epoch": 0.10716617000141905, "grad_norm": 0.85546875, "learning_rate": 1.986793278405308e-05, "loss": 0.3112, "step": 1416 }, { "epoch": 0.10724185232486637, "grad_norm": 1.015625, "learning_rate": 1.986773977738123e-05, "loss": 0.4044, "step": 1417 }, { "epoch": 0.1073175346483137, "grad_norm": 0.90234375, "learning_rate": 1.9867546630718563e-05, "loss": 0.3486, "step": 1418 }, { "epoch": 0.10739321697176103, "grad_norm": 0.859375, "learning_rate": 1.9867353344067806e-05, "loss": 0.3373, "step": 1419 }, { "epoch": 0.10746889929520836, "grad_norm": 0.87890625, "learning_rate": 1.9867159917431712e-05, "loss": 0.3105, "step": 1420 }, { "epoch": 0.1075445816186557, "grad_norm": 0.97265625, "learning_rate": 1.9866966350813025e-05, "loss": 0.3509, "step": 1421 }, { "epoch": 0.10762026394210303, "grad_norm": 0.890625, "learning_rate": 1.9866772644214484e-05, "loss": 0.3267, "step": 1422 }, { "epoch": 0.10769594626555035, "grad_norm": 0.9296875, "learning_rate": 1.986657879763884e-05, "loss": 0.358, "step": 1423 }, { "epoch": 0.10777162858899768, "grad_norm": 0.828125, "learning_rate": 1.986638481108885e-05, "loss": 0.3048, "step": 1424 }, { "epoch": 0.10784731091244501, "grad_norm": 0.93359375, "learning_rate": 1.9866190684567256e-05, "loss": 0.347, "step": 1425 }, { "epoch": 0.10792299323589234, "grad_norm": 0.953125, "learning_rate": 1.9865996418076813e-05, "loss": 0.3552, "step": 1426 }, { "epoch": 0.10799867555933967, "grad_norm": 0.921875, "learning_rate": 1.9865802011620287e-05, "loss": 0.3802, "step": 1427 }, { "epoch": 0.108074357882787, "grad_norm": 0.87109375, "learning_rate": 1.9865607465200426e-05, "loss": 0.3392, "step": 1428 }, { "epoch": 0.10815004020623434, "grad_norm": 0.875, "learning_rate": 1.9865412778819992e-05, "loss": 0.3084, "step": 1429 }, { "epoch": 0.10822572252968166, "grad_norm": 0.89453125, "learning_rate": 1.986521795248175e-05, "loss": 0.3494, "step": 1430 }, { "epoch": 0.10830140485312899, "grad_norm": 0.88671875, "learning_rate": 1.986502298618846e-05, "loss": 0.3739, "step": 1431 }, { "epoch": 0.10837708717657632, "grad_norm": 0.8671875, "learning_rate": 1.986482787994289e-05, "loss": 0.3436, "step": 1432 }, { "epoch": 0.10845276950002365, "grad_norm": 0.9921875, "learning_rate": 1.9864632633747812e-05, "loss": 0.4024, "step": 1433 }, { "epoch": 0.10852845182347098, "grad_norm": 0.91015625, "learning_rate": 1.9864437247605986e-05, "loss": 0.3578, "step": 1434 }, { "epoch": 0.10860413414691832, "grad_norm": 0.91015625, "learning_rate": 1.9864241721520195e-05, "loss": 0.3619, "step": 1435 }, { "epoch": 0.10867981647036563, "grad_norm": 0.90234375, "learning_rate": 1.9864046055493205e-05, "loss": 0.3668, "step": 1436 }, { "epoch": 0.10875549879381297, "grad_norm": 0.890625, "learning_rate": 1.9863850249527793e-05, "loss": 0.3245, "step": 1437 }, { "epoch": 0.1088311811172603, "grad_norm": 0.83203125, "learning_rate": 1.9863654303626744e-05, "loss": 0.302, "step": 1438 }, { "epoch": 0.10890686344070763, "grad_norm": 0.99609375, "learning_rate": 1.9863458217792827e-05, "loss": 0.3959, "step": 1439 }, { "epoch": 0.10898254576415496, "grad_norm": 0.859375, "learning_rate": 1.986326199202883e-05, "loss": 0.2925, "step": 1440 }, { "epoch": 0.1090582280876023, "grad_norm": 0.8984375, "learning_rate": 1.9863065626337534e-05, "loss": 0.3375, "step": 1441 }, { "epoch": 0.10913391041104963, "grad_norm": 0.859375, "learning_rate": 1.986286912072173e-05, "loss": 0.3391, "step": 1442 }, { "epoch": 0.10920959273449694, "grad_norm": 0.796875, "learning_rate": 1.9862672475184198e-05, "loss": 0.2817, "step": 1443 }, { "epoch": 0.10928527505794428, "grad_norm": 0.96875, "learning_rate": 1.9862475689727734e-05, "loss": 0.3436, "step": 1444 }, { "epoch": 0.10936095738139161, "grad_norm": 0.8359375, "learning_rate": 1.986227876435513e-05, "loss": 0.3291, "step": 1445 }, { "epoch": 0.10943663970483894, "grad_norm": 0.875, "learning_rate": 1.9862081699069175e-05, "loss": 0.3227, "step": 1446 }, { "epoch": 0.10951232202828627, "grad_norm": 0.94140625, "learning_rate": 1.9861884493872667e-05, "loss": 0.3482, "step": 1447 }, { "epoch": 0.1095880043517336, "grad_norm": 0.98046875, "learning_rate": 1.9861687148768406e-05, "loss": 0.3299, "step": 1448 }, { "epoch": 0.10966368667518092, "grad_norm": 0.9609375, "learning_rate": 1.9861489663759184e-05, "loss": 0.3637, "step": 1449 }, { "epoch": 0.10973936899862825, "grad_norm": 0.890625, "learning_rate": 1.9861292038847818e-05, "loss": 0.353, "step": 1450 }, { "epoch": 0.10981505132207559, "grad_norm": 0.8515625, "learning_rate": 1.9861094274037095e-05, "loss": 0.3278, "step": 1451 }, { "epoch": 0.10989073364552292, "grad_norm": 0.89453125, "learning_rate": 1.986089636932983e-05, "loss": 0.3232, "step": 1452 }, { "epoch": 0.10996641596897025, "grad_norm": 0.87890625, "learning_rate": 1.9860698324728826e-05, "loss": 0.3348, "step": 1453 }, { "epoch": 0.11004209829241758, "grad_norm": 0.9453125, "learning_rate": 1.9860500140236896e-05, "loss": 0.3276, "step": 1454 }, { "epoch": 0.11011778061586491, "grad_norm": 0.91796875, "learning_rate": 1.986030181585685e-05, "loss": 0.3324, "step": 1455 }, { "epoch": 0.11019346293931223, "grad_norm": 2.34375, "learning_rate": 1.98601033515915e-05, "loss": 0.4928, "step": 1456 }, { "epoch": 0.11026914526275956, "grad_norm": 2.0, "learning_rate": 1.9859904747443665e-05, "loss": 0.5517, "step": 1457 }, { "epoch": 0.1103448275862069, "grad_norm": 0.94921875, "learning_rate": 1.985970600341616e-05, "loss": 0.3612, "step": 1458 }, { "epoch": 0.11042050990965423, "grad_norm": 1.046875, "learning_rate": 1.9859507119511806e-05, "loss": 0.3622, "step": 1459 }, { "epoch": 0.11049619223310156, "grad_norm": 0.859375, "learning_rate": 1.985930809573343e-05, "loss": 0.3018, "step": 1460 }, { "epoch": 0.11057187455654889, "grad_norm": 0.92578125, "learning_rate": 1.985910893208384e-05, "loss": 0.3643, "step": 1461 }, { "epoch": 0.11064755687999622, "grad_norm": 0.8828125, "learning_rate": 1.9858909628565876e-05, "loss": 0.3263, "step": 1462 }, { "epoch": 0.11072323920344354, "grad_norm": 0.83203125, "learning_rate": 1.985871018518236e-05, "loss": 0.2866, "step": 1463 }, { "epoch": 0.11079892152689087, "grad_norm": 0.8671875, "learning_rate": 1.9858510601936124e-05, "loss": 0.3302, "step": 1464 }, { "epoch": 0.1108746038503382, "grad_norm": 0.96875, "learning_rate": 1.9858310878829995e-05, "loss": 0.2757, "step": 1465 }, { "epoch": 0.11095028617378554, "grad_norm": 0.859375, "learning_rate": 1.9858111015866806e-05, "loss": 0.3267, "step": 1466 }, { "epoch": 0.11102596849723287, "grad_norm": 0.8046875, "learning_rate": 1.9857911013049398e-05, "loss": 0.27, "step": 1467 }, { "epoch": 0.1111016508206802, "grad_norm": 0.859375, "learning_rate": 1.9857710870380606e-05, "loss": 0.3408, "step": 1468 }, { "epoch": 0.11117733314412752, "grad_norm": 0.92578125, "learning_rate": 1.9857510587863266e-05, "loss": 0.368, "step": 1469 }, { "epoch": 0.11125301546757485, "grad_norm": 0.84375, "learning_rate": 1.9857310165500227e-05, "loss": 0.3107, "step": 1470 }, { "epoch": 0.11132869779102218, "grad_norm": 0.8984375, "learning_rate": 1.9857109603294326e-05, "loss": 0.3482, "step": 1471 }, { "epoch": 0.11140438011446951, "grad_norm": 0.76171875, "learning_rate": 1.9856908901248407e-05, "loss": 0.2425, "step": 1472 }, { "epoch": 0.11148006243791685, "grad_norm": 0.9296875, "learning_rate": 1.985670805936532e-05, "loss": 0.3987, "step": 1473 }, { "epoch": 0.11155574476136418, "grad_norm": 0.8515625, "learning_rate": 1.9856507077647918e-05, "loss": 0.3245, "step": 1474 }, { "epoch": 0.11163142708481151, "grad_norm": 0.9453125, "learning_rate": 1.985630595609905e-05, "loss": 0.3204, "step": 1475 }, { "epoch": 0.11170710940825883, "grad_norm": 0.859375, "learning_rate": 1.9856104694721562e-05, "loss": 0.3536, "step": 1476 }, { "epoch": 0.11178279173170616, "grad_norm": 4.15625, "learning_rate": 1.9855903293518318e-05, "loss": 0.5511, "step": 1477 }, { "epoch": 0.11185847405515349, "grad_norm": 0.8515625, "learning_rate": 1.9855701752492174e-05, "loss": 0.3087, "step": 1478 }, { "epoch": 0.11193415637860082, "grad_norm": 1.0, "learning_rate": 1.9855500071645987e-05, "loss": 0.3836, "step": 1479 }, { "epoch": 0.11200983870204816, "grad_norm": 0.921875, "learning_rate": 1.985529825098262e-05, "loss": 0.3461, "step": 1480 }, { "epoch": 0.11208552102549549, "grad_norm": 0.87890625, "learning_rate": 1.9855096290504933e-05, "loss": 0.3436, "step": 1481 }, { "epoch": 0.1121612033489428, "grad_norm": 0.94140625, "learning_rate": 1.9854894190215796e-05, "loss": 0.3965, "step": 1482 }, { "epoch": 0.11223688567239014, "grad_norm": 0.85546875, "learning_rate": 1.985469195011807e-05, "loss": 0.3402, "step": 1483 }, { "epoch": 0.11231256799583747, "grad_norm": 0.8984375, "learning_rate": 1.985448957021463e-05, "loss": 0.3621, "step": 1484 }, { "epoch": 0.1123882503192848, "grad_norm": 4.75, "learning_rate": 1.985428705050834e-05, "loss": 0.5376, "step": 1485 }, { "epoch": 0.11246393264273213, "grad_norm": 0.9140625, "learning_rate": 1.9854084391002084e-05, "loss": 0.3574, "step": 1486 }, { "epoch": 0.11253961496617947, "grad_norm": 0.90234375, "learning_rate": 1.9853881591698726e-05, "loss": 0.3643, "step": 1487 }, { "epoch": 0.1126152972896268, "grad_norm": 0.921875, "learning_rate": 1.9853678652601148e-05, "loss": 0.3427, "step": 1488 }, { "epoch": 0.11269097961307412, "grad_norm": 0.8984375, "learning_rate": 1.985347557371223e-05, "loss": 0.3273, "step": 1489 }, { "epoch": 0.11276666193652145, "grad_norm": 0.9375, "learning_rate": 1.9853272355034854e-05, "loss": 0.3498, "step": 1490 }, { "epoch": 0.11284234425996878, "grad_norm": 0.87890625, "learning_rate": 1.98530689965719e-05, "loss": 0.362, "step": 1491 }, { "epoch": 0.11291802658341611, "grad_norm": 1.0078125, "learning_rate": 1.9852865498326253e-05, "loss": 0.3051, "step": 1492 }, { "epoch": 0.11299370890686344, "grad_norm": 0.8828125, "learning_rate": 1.98526618603008e-05, "loss": 0.3488, "step": 1493 }, { "epoch": 0.11306939123031078, "grad_norm": 0.90625, "learning_rate": 1.9852458082498433e-05, "loss": 0.3685, "step": 1494 }, { "epoch": 0.1131450735537581, "grad_norm": 0.87109375, "learning_rate": 1.985225416492204e-05, "loss": 0.3257, "step": 1495 }, { "epoch": 0.11322075587720543, "grad_norm": 0.87890625, "learning_rate": 1.9852050107574508e-05, "loss": 0.3101, "step": 1496 }, { "epoch": 0.11329643820065276, "grad_norm": 0.8515625, "learning_rate": 1.9851845910458746e-05, "loss": 0.3293, "step": 1497 }, { "epoch": 0.11337212052410009, "grad_norm": 0.87890625, "learning_rate": 1.985164157357764e-05, "loss": 0.3627, "step": 1498 }, { "epoch": 0.11344780284754742, "grad_norm": 0.88671875, "learning_rate": 1.9851437096934094e-05, "loss": 0.3515, "step": 1499 }, { "epoch": 0.11352348517099475, "grad_norm": 0.9609375, "learning_rate": 1.9851232480531008e-05, "loss": 0.3587, "step": 1500 }, { "epoch": 0.11359916749444209, "grad_norm": 0.9140625, "learning_rate": 1.9851027724371285e-05, "loss": 0.3453, "step": 1501 }, { "epoch": 0.1136748498178894, "grad_norm": 0.87109375, "learning_rate": 1.9850822828457826e-05, "loss": 0.3111, "step": 1502 }, { "epoch": 0.11375053214133674, "grad_norm": 0.953125, "learning_rate": 1.985061779279354e-05, "loss": 0.3055, "step": 1503 }, { "epoch": 0.11382621446478407, "grad_norm": 0.86328125, "learning_rate": 1.9850412617381338e-05, "loss": 0.2994, "step": 1504 }, { "epoch": 0.1139018967882314, "grad_norm": 0.91015625, "learning_rate": 1.9850207302224128e-05, "loss": 0.3196, "step": 1505 }, { "epoch": 0.11397757911167873, "grad_norm": 0.89453125, "learning_rate": 1.9850001847324825e-05, "loss": 0.3329, "step": 1506 }, { "epoch": 0.11405326143512606, "grad_norm": 0.92578125, "learning_rate": 1.9849796252686344e-05, "loss": 0.357, "step": 1507 }, { "epoch": 0.1141289437585734, "grad_norm": 0.9375, "learning_rate": 1.98495905183116e-05, "loss": 0.3836, "step": 1508 }, { "epoch": 0.11420462608202071, "grad_norm": 0.89453125, "learning_rate": 1.9849384644203508e-05, "loss": 0.3042, "step": 1509 }, { "epoch": 0.11428030840546805, "grad_norm": 0.94140625, "learning_rate": 1.9849178630364995e-05, "loss": 0.3442, "step": 1510 }, { "epoch": 0.11435599072891538, "grad_norm": 0.92578125, "learning_rate": 1.9848972476798982e-05, "loss": 0.3534, "step": 1511 }, { "epoch": 0.11443167305236271, "grad_norm": 0.87109375, "learning_rate": 1.9848766183508393e-05, "loss": 0.3663, "step": 1512 }, { "epoch": 0.11450735537581004, "grad_norm": 0.953125, "learning_rate": 1.9848559750496155e-05, "loss": 0.3408, "step": 1513 }, { "epoch": 0.11458303769925737, "grad_norm": 0.92578125, "learning_rate": 1.9848353177765195e-05, "loss": 0.3982, "step": 1514 }, { "epoch": 0.11465872002270469, "grad_norm": 0.8828125, "learning_rate": 1.9848146465318444e-05, "loss": 0.3445, "step": 1515 }, { "epoch": 0.11473440234615202, "grad_norm": 1.0390625, "learning_rate": 1.9847939613158836e-05, "loss": 0.3814, "step": 1516 }, { "epoch": 0.11481008466959935, "grad_norm": 0.94140625, "learning_rate": 1.984773262128931e-05, "loss": 0.3937, "step": 1517 }, { "epoch": 0.11488576699304669, "grad_norm": 0.95703125, "learning_rate": 1.984752548971279e-05, "loss": 0.363, "step": 1518 }, { "epoch": 0.11496144931649402, "grad_norm": 0.85546875, "learning_rate": 1.9847318218432224e-05, "loss": 0.3344, "step": 1519 }, { "epoch": 0.11503713163994135, "grad_norm": 0.953125, "learning_rate": 1.9847110807450555e-05, "loss": 0.3626, "step": 1520 }, { "epoch": 0.11511281396338868, "grad_norm": 0.9375, "learning_rate": 1.984690325677072e-05, "loss": 0.3889, "step": 1521 }, { "epoch": 0.115188496286836, "grad_norm": 0.90625, "learning_rate": 1.984669556639566e-05, "loss": 0.3635, "step": 1522 }, { "epoch": 0.11526417861028333, "grad_norm": 0.96875, "learning_rate": 1.9846487736328324e-05, "loss": 0.3718, "step": 1523 }, { "epoch": 0.11533986093373066, "grad_norm": 0.921875, "learning_rate": 1.984627976657167e-05, "loss": 0.3556, "step": 1524 }, { "epoch": 0.115415543257178, "grad_norm": 0.9375, "learning_rate": 1.9846071657128635e-05, "loss": 0.3681, "step": 1525 }, { "epoch": 0.11549122558062533, "grad_norm": 0.8359375, "learning_rate": 1.984586340800218e-05, "loss": 0.3138, "step": 1526 }, { "epoch": 0.11556690790407266, "grad_norm": 0.95703125, "learning_rate": 1.9845655019195256e-05, "loss": 0.3893, "step": 1527 }, { "epoch": 0.11564259022751998, "grad_norm": 0.875, "learning_rate": 1.984544649071082e-05, "loss": 0.3246, "step": 1528 }, { "epoch": 0.11571827255096731, "grad_norm": 0.8984375, "learning_rate": 1.9845237822551827e-05, "loss": 0.3781, "step": 1529 }, { "epoch": 0.11579395487441464, "grad_norm": 0.94140625, "learning_rate": 1.9845029014721243e-05, "loss": 0.3288, "step": 1530 }, { "epoch": 0.11586963719786197, "grad_norm": 0.8984375, "learning_rate": 1.9844820067222027e-05, "loss": 0.3261, "step": 1531 }, { "epoch": 0.1159453195213093, "grad_norm": 0.94140625, "learning_rate": 1.9844610980057148e-05, "loss": 0.3284, "step": 1532 }, { "epoch": 0.11602100184475664, "grad_norm": 0.83203125, "learning_rate": 1.9844401753229566e-05, "loss": 0.2881, "step": 1533 }, { "epoch": 0.11609668416820397, "grad_norm": 0.94921875, "learning_rate": 1.984419238674225e-05, "loss": 0.353, "step": 1534 }, { "epoch": 0.11617236649165129, "grad_norm": 0.97265625, "learning_rate": 1.9843982880598172e-05, "loss": 0.3588, "step": 1535 }, { "epoch": 0.11624804881509862, "grad_norm": 0.87109375, "learning_rate": 1.9843773234800302e-05, "loss": 0.3184, "step": 1536 }, { "epoch": 0.11632373113854595, "grad_norm": 0.90625, "learning_rate": 1.984356344935162e-05, "loss": 0.3446, "step": 1537 }, { "epoch": 0.11639941346199328, "grad_norm": 0.8984375, "learning_rate": 1.9843353524255097e-05, "loss": 0.372, "step": 1538 }, { "epoch": 0.11647509578544062, "grad_norm": 0.90625, "learning_rate": 1.984314345951371e-05, "loss": 0.3539, "step": 1539 }, { "epoch": 0.11655077810888795, "grad_norm": 1.0078125, "learning_rate": 1.9842933255130447e-05, "loss": 0.4105, "step": 1540 }, { "epoch": 0.11662646043233527, "grad_norm": 0.93359375, "learning_rate": 1.9842722911108282e-05, "loss": 0.3933, "step": 1541 }, { "epoch": 0.1167021427557826, "grad_norm": 0.83984375, "learning_rate": 1.98425124274502e-05, "loss": 0.347, "step": 1542 }, { "epoch": 0.11677782507922993, "grad_norm": 0.86328125, "learning_rate": 1.9842301804159193e-05, "loss": 0.3461, "step": 1543 }, { "epoch": 0.11685350740267726, "grad_norm": 0.86328125, "learning_rate": 1.9842091041238243e-05, "loss": 0.3468, "step": 1544 }, { "epoch": 0.1169291897261246, "grad_norm": 0.890625, "learning_rate": 1.984188013869034e-05, "loss": 0.3323, "step": 1545 }, { "epoch": 0.11700487204957193, "grad_norm": 0.83984375, "learning_rate": 1.9841669096518478e-05, "loss": 0.3144, "step": 1546 }, { "epoch": 0.11708055437301926, "grad_norm": 0.94921875, "learning_rate": 1.9841457914725654e-05, "loss": 0.4085, "step": 1547 }, { "epoch": 0.11715623669646658, "grad_norm": 0.90234375, "learning_rate": 1.984124659331486e-05, "loss": 0.3584, "step": 1548 }, { "epoch": 0.11723191901991391, "grad_norm": 0.8671875, "learning_rate": 1.9841035132289095e-05, "loss": 0.3212, "step": 1549 }, { "epoch": 0.11730760134336124, "grad_norm": 0.96484375, "learning_rate": 1.9840823531651357e-05, "loss": 0.3118, "step": 1550 }, { "epoch": 0.11738328366680857, "grad_norm": 0.9609375, "learning_rate": 1.984061179140465e-05, "loss": 0.3627, "step": 1551 }, { "epoch": 0.1174589659902559, "grad_norm": 0.90234375, "learning_rate": 1.9840399911551977e-05, "loss": 0.3512, "step": 1552 }, { "epoch": 0.11753464831370324, "grad_norm": 0.9375, "learning_rate": 1.9840187892096348e-05, "loss": 0.3664, "step": 1553 }, { "epoch": 0.11761033063715057, "grad_norm": 0.90625, "learning_rate": 1.9839975733040766e-05, "loss": 0.3595, "step": 1554 }, { "epoch": 0.11768601296059789, "grad_norm": 1.015625, "learning_rate": 1.983976343438824e-05, "loss": 0.3219, "step": 1555 }, { "epoch": 0.11776169528404522, "grad_norm": 0.8671875, "learning_rate": 1.9839550996141787e-05, "loss": 0.2983, "step": 1556 }, { "epoch": 0.11783737760749255, "grad_norm": 0.8984375, "learning_rate": 1.9839338418304418e-05, "loss": 0.3325, "step": 1557 }, { "epoch": 0.11791305993093988, "grad_norm": 0.8828125, "learning_rate": 1.9839125700879146e-05, "loss": 0.2813, "step": 1558 }, { "epoch": 0.11798874225438721, "grad_norm": 0.890625, "learning_rate": 1.9838912843868995e-05, "loss": 0.3223, "step": 1559 }, { "epoch": 0.11806442457783455, "grad_norm": 0.95703125, "learning_rate": 1.9838699847276975e-05, "loss": 0.3164, "step": 1560 }, { "epoch": 0.11814010690128186, "grad_norm": 0.87109375, "learning_rate": 1.983848671110612e-05, "loss": 0.3374, "step": 1561 }, { "epoch": 0.1182157892247292, "grad_norm": 0.84765625, "learning_rate": 1.9838273435359447e-05, "loss": 0.3454, "step": 1562 }, { "epoch": 0.11829147154817653, "grad_norm": 0.90625, "learning_rate": 1.983806002003998e-05, "loss": 0.3665, "step": 1563 }, { "epoch": 0.11836715387162386, "grad_norm": 0.84765625, "learning_rate": 1.983784646515075e-05, "loss": 0.3143, "step": 1564 }, { "epoch": 0.11844283619507119, "grad_norm": 1.078125, "learning_rate": 1.9837632770694783e-05, "loss": 0.3065, "step": 1565 }, { "epoch": 0.11851851851851852, "grad_norm": 0.99609375, "learning_rate": 1.9837418936675115e-05, "loss": 0.3636, "step": 1566 }, { "epoch": 0.11859420084196585, "grad_norm": 0.8515625, "learning_rate": 1.983720496309478e-05, "loss": 0.3259, "step": 1567 }, { "epoch": 0.11866988316541317, "grad_norm": 0.92578125, "learning_rate": 1.983699084995681e-05, "loss": 0.3387, "step": 1568 }, { "epoch": 0.1187455654888605, "grad_norm": 0.94921875, "learning_rate": 1.983677659726424e-05, "loss": 0.3499, "step": 1569 }, { "epoch": 0.11882124781230784, "grad_norm": 1.0, "learning_rate": 1.983656220502012e-05, "loss": 0.3879, "step": 1570 }, { "epoch": 0.11889693013575517, "grad_norm": 0.90234375, "learning_rate": 1.9836347673227482e-05, "loss": 0.3252, "step": 1571 }, { "epoch": 0.1189726124592025, "grad_norm": 0.859375, "learning_rate": 1.983613300188937e-05, "loss": 0.3092, "step": 1572 }, { "epoch": 0.11904829478264983, "grad_norm": 0.93359375, "learning_rate": 1.9835918191008833e-05, "loss": 0.3586, "step": 1573 }, { "epoch": 0.11912397710609715, "grad_norm": 0.82421875, "learning_rate": 1.9835703240588923e-05, "loss": 0.2958, "step": 1574 }, { "epoch": 0.11919965942954448, "grad_norm": 0.94140625, "learning_rate": 1.9835488150632677e-05, "loss": 0.3593, "step": 1575 }, { "epoch": 0.11927534175299181, "grad_norm": 0.9296875, "learning_rate": 1.9835272921143156e-05, "loss": 0.3682, "step": 1576 }, { "epoch": 0.11935102407643915, "grad_norm": 0.921875, "learning_rate": 1.9835057552123412e-05, "loss": 0.3767, "step": 1577 }, { "epoch": 0.11942670639988648, "grad_norm": 0.890625, "learning_rate": 1.9834842043576498e-05, "loss": 0.3574, "step": 1578 }, { "epoch": 0.11950238872333381, "grad_norm": 0.84375, "learning_rate": 1.9834626395505473e-05, "loss": 0.3243, "step": 1579 }, { "epoch": 0.11957807104678114, "grad_norm": 0.92578125, "learning_rate": 1.9834410607913393e-05, "loss": 0.3649, "step": 1580 }, { "epoch": 0.11965375337022846, "grad_norm": 0.90625, "learning_rate": 1.9834194680803325e-05, "loss": 0.3955, "step": 1581 }, { "epoch": 0.11972943569367579, "grad_norm": 0.9140625, "learning_rate": 1.9833978614178328e-05, "loss": 0.3566, "step": 1582 }, { "epoch": 0.11980511801712312, "grad_norm": 0.875, "learning_rate": 1.983376240804147e-05, "loss": 0.369, "step": 1583 }, { "epoch": 0.11988080034057046, "grad_norm": 0.9453125, "learning_rate": 1.9833546062395816e-05, "loss": 0.3886, "step": 1584 }, { "epoch": 0.11995648266401779, "grad_norm": 0.8828125, "learning_rate": 1.9833329577244437e-05, "loss": 0.3211, "step": 1585 }, { "epoch": 0.12003216498746512, "grad_norm": 1.0546875, "learning_rate": 1.98331129525904e-05, "loss": 0.3525, "step": 1586 }, { "epoch": 0.12010784731091244, "grad_norm": 0.828125, "learning_rate": 1.9832896188436784e-05, "loss": 0.3155, "step": 1587 }, { "epoch": 0.12018352963435977, "grad_norm": 0.96484375, "learning_rate": 1.9832679284786663e-05, "loss": 0.3352, "step": 1588 }, { "epoch": 0.1202592119578071, "grad_norm": 0.97265625, "learning_rate": 1.983246224164311e-05, "loss": 0.4124, "step": 1589 }, { "epoch": 0.12033489428125443, "grad_norm": 0.85546875, "learning_rate": 1.983224505900921e-05, "loss": 0.3016, "step": 1590 }, { "epoch": 0.12041057660470177, "grad_norm": 0.87890625, "learning_rate": 1.9832027736888042e-05, "loss": 0.3418, "step": 1591 }, { "epoch": 0.1204862589281491, "grad_norm": 0.90625, "learning_rate": 1.9831810275282685e-05, "loss": 0.375, "step": 1592 }, { "epoch": 0.12056194125159643, "grad_norm": 2.28125, "learning_rate": 1.983159267419623e-05, "loss": 0.5103, "step": 1593 }, { "epoch": 0.12063762357504375, "grad_norm": 0.8984375, "learning_rate": 1.983137493363176e-05, "loss": 0.3601, "step": 1594 }, { "epoch": 0.12071330589849108, "grad_norm": 2.140625, "learning_rate": 1.9831157053592364e-05, "loss": 0.4491, "step": 1595 }, { "epoch": 0.12078898822193841, "grad_norm": 1.0859375, "learning_rate": 1.9830939034081137e-05, "loss": 0.4145, "step": 1596 }, { "epoch": 0.12086467054538574, "grad_norm": 0.9296875, "learning_rate": 1.9830720875101167e-05, "loss": 0.336, "step": 1597 }, { "epoch": 0.12094035286883308, "grad_norm": 0.98046875, "learning_rate": 1.9830502576655553e-05, "loss": 0.4333, "step": 1598 }, { "epoch": 0.12101603519228041, "grad_norm": 1.984375, "learning_rate": 1.9830284138747387e-05, "loss": 0.5905, "step": 1599 }, { "epoch": 0.12109171751572773, "grad_norm": 0.98828125, "learning_rate": 1.9830065561379776e-05, "loss": 0.3769, "step": 1600 }, { "epoch": 0.12116739983917506, "grad_norm": 0.86328125, "learning_rate": 1.9829846844555813e-05, "loss": 0.354, "step": 1601 }, { "epoch": 0.12124308216262239, "grad_norm": 0.83203125, "learning_rate": 1.9829627988278603e-05, "loss": 0.2756, "step": 1602 }, { "epoch": 0.12131876448606972, "grad_norm": 0.9375, "learning_rate": 1.9829408992551255e-05, "loss": 0.3457, "step": 1603 }, { "epoch": 0.12139444680951705, "grad_norm": 0.8046875, "learning_rate": 1.982918985737687e-05, "loss": 0.3041, "step": 1604 }, { "epoch": 0.12147012913296439, "grad_norm": 0.984375, "learning_rate": 1.982897058275856e-05, "loss": 0.3712, "step": 1605 }, { "epoch": 0.12154581145641172, "grad_norm": 0.859375, "learning_rate": 1.9828751168699437e-05, "loss": 0.3419, "step": 1606 }, { "epoch": 0.12162149377985904, "grad_norm": 0.86328125, "learning_rate": 1.9828531615202608e-05, "loss": 0.3345, "step": 1607 }, { "epoch": 0.12169717610330637, "grad_norm": 0.875, "learning_rate": 1.9828311922271192e-05, "loss": 0.3385, "step": 1608 }, { "epoch": 0.1217728584267537, "grad_norm": 0.890625, "learning_rate": 1.982809208990831e-05, "loss": 0.3556, "step": 1609 }, { "epoch": 0.12184854075020103, "grad_norm": 0.97265625, "learning_rate": 1.9827872118117075e-05, "loss": 0.3789, "step": 1610 }, { "epoch": 0.12192422307364836, "grad_norm": 0.88671875, "learning_rate": 1.9827652006900607e-05, "loss": 0.3267, "step": 1611 }, { "epoch": 0.1219999053970957, "grad_norm": 0.91015625, "learning_rate": 1.982743175626203e-05, "loss": 0.3507, "step": 1612 }, { "epoch": 0.12207558772054303, "grad_norm": 1.03125, "learning_rate": 1.9827211366204468e-05, "loss": 0.3723, "step": 1613 }, { "epoch": 0.12215127004399035, "grad_norm": 0.96875, "learning_rate": 1.9826990836731052e-05, "loss": 0.3642, "step": 1614 }, { "epoch": 0.12222695236743768, "grad_norm": 0.76953125, "learning_rate": 1.9826770167844907e-05, "loss": 0.2751, "step": 1615 }, { "epoch": 0.12230263469088501, "grad_norm": 0.953125, "learning_rate": 1.982654935954916e-05, "loss": 0.3746, "step": 1616 }, { "epoch": 0.12237831701433234, "grad_norm": 0.86328125, "learning_rate": 1.982632841184695e-05, "loss": 0.3019, "step": 1617 }, { "epoch": 0.12245399933777967, "grad_norm": 1.0234375, "learning_rate": 1.9826107324741413e-05, "loss": 0.4147, "step": 1618 }, { "epoch": 0.122529681661227, "grad_norm": 0.85546875, "learning_rate": 1.9825886098235677e-05, "loss": 0.3196, "step": 1619 }, { "epoch": 0.12260536398467432, "grad_norm": 0.921875, "learning_rate": 1.9825664732332886e-05, "loss": 0.3676, "step": 1620 }, { "epoch": 0.12268104630812166, "grad_norm": 0.859375, "learning_rate": 1.9825443227036176e-05, "loss": 0.304, "step": 1621 }, { "epoch": 0.12275672863156899, "grad_norm": 0.87109375, "learning_rate": 1.9825221582348696e-05, "loss": 0.2833, "step": 1622 }, { "epoch": 0.12283241095501632, "grad_norm": 9.4375, "learning_rate": 1.982499979827359e-05, "loss": 0.5919, "step": 1623 }, { "epoch": 0.12290809327846365, "grad_norm": 0.87890625, "learning_rate": 1.9824777874813996e-05, "loss": 0.3113, "step": 1624 }, { "epoch": 0.12298377560191098, "grad_norm": 0.95703125, "learning_rate": 1.982455581197307e-05, "loss": 0.3498, "step": 1625 }, { "epoch": 0.12305945792535831, "grad_norm": 0.98046875, "learning_rate": 1.982433360975396e-05, "loss": 0.362, "step": 1626 }, { "epoch": 0.12313514024880563, "grad_norm": 0.890625, "learning_rate": 1.982411126815982e-05, "loss": 0.3568, "step": 1627 }, { "epoch": 0.12321082257225296, "grad_norm": 0.85546875, "learning_rate": 1.9823888787193802e-05, "loss": 0.3472, "step": 1628 }, { "epoch": 0.1232865048957003, "grad_norm": 0.87109375, "learning_rate": 1.9823666166859065e-05, "loss": 0.3349, "step": 1629 }, { "epoch": 0.12336218721914763, "grad_norm": 0.90234375, "learning_rate": 1.982344340715876e-05, "loss": 0.3174, "step": 1630 }, { "epoch": 0.12343786954259496, "grad_norm": 0.92578125, "learning_rate": 1.9823220508096056e-05, "loss": 0.3248, "step": 1631 }, { "epoch": 0.12351355186604229, "grad_norm": 0.90234375, "learning_rate": 1.9822997469674116e-05, "loss": 0.3491, "step": 1632 }, { "epoch": 0.12358923418948961, "grad_norm": 0.8984375, "learning_rate": 1.9822774291896093e-05, "loss": 0.3047, "step": 1633 }, { "epoch": 0.12366491651293694, "grad_norm": 0.86328125, "learning_rate": 1.9822550974765166e-05, "loss": 0.3238, "step": 1634 }, { "epoch": 0.12374059883638427, "grad_norm": 0.9375, "learning_rate": 1.982232751828449e-05, "loss": 0.3835, "step": 1635 }, { "epoch": 0.1238162811598316, "grad_norm": 1.0234375, "learning_rate": 1.9822103922457253e-05, "loss": 0.3873, "step": 1636 }, { "epoch": 0.12389196348327894, "grad_norm": 0.91796875, "learning_rate": 1.9821880187286607e-05, "loss": 0.3509, "step": 1637 }, { "epoch": 0.12396764580672627, "grad_norm": 0.921875, "learning_rate": 1.9821656312775742e-05, "loss": 0.3542, "step": 1638 }, { "epoch": 0.1240433281301736, "grad_norm": 0.90234375, "learning_rate": 1.9821432298927824e-05, "loss": 0.3726, "step": 1639 }, { "epoch": 0.12411901045362092, "grad_norm": 0.921875, "learning_rate": 1.9821208145746036e-05, "loss": 0.3381, "step": 1640 }, { "epoch": 0.12419469277706825, "grad_norm": 0.8984375, "learning_rate": 1.9820983853233556e-05, "loss": 0.3236, "step": 1641 }, { "epoch": 0.12427037510051558, "grad_norm": 0.94140625, "learning_rate": 1.9820759421393568e-05, "loss": 0.3656, "step": 1642 }, { "epoch": 0.12434605742396292, "grad_norm": 0.90234375, "learning_rate": 1.982053485022925e-05, "loss": 0.3291, "step": 1643 }, { "epoch": 0.12442173974741025, "grad_norm": 0.8984375, "learning_rate": 1.98203101397438e-05, "loss": 0.3456, "step": 1644 }, { "epoch": 0.12449742207085758, "grad_norm": 3.484375, "learning_rate": 1.9820085289940393e-05, "loss": 0.6152, "step": 1645 }, { "epoch": 0.1245731043943049, "grad_norm": 0.8125, "learning_rate": 1.9819860300822227e-05, "loss": 0.3138, "step": 1646 }, { "epoch": 0.12464878671775223, "grad_norm": 0.91015625, "learning_rate": 1.9819635172392487e-05, "loss": 0.3357, "step": 1647 }, { "epoch": 0.12472446904119956, "grad_norm": 0.8671875, "learning_rate": 1.9819409904654376e-05, "loss": 0.352, "step": 1648 }, { "epoch": 0.1248001513646469, "grad_norm": 0.87109375, "learning_rate": 1.9819184497611085e-05, "loss": 0.3278, "step": 1649 }, { "epoch": 0.12487583368809423, "grad_norm": 0.83203125, "learning_rate": 1.981895895126581e-05, "loss": 0.3372, "step": 1650 }, { "epoch": 0.12495151601154156, "grad_norm": 2.21875, "learning_rate": 1.981873326562175e-05, "loss": 0.5653, "step": 1651 }, { "epoch": 0.12502719833498888, "grad_norm": 0.96484375, "learning_rate": 1.9818507440682114e-05, "loss": 0.3992, "step": 1652 }, { "epoch": 0.12510288065843622, "grad_norm": 0.8828125, "learning_rate": 1.9818281476450094e-05, "loss": 0.374, "step": 1653 }, { "epoch": 0.12517856298188354, "grad_norm": 0.921875, "learning_rate": 1.9818055372928908e-05, "loss": 0.3889, "step": 1654 }, { "epoch": 0.12525424530533089, "grad_norm": 0.76171875, "learning_rate": 1.9817829130121758e-05, "loss": 0.2519, "step": 1655 }, { "epoch": 0.1253299276287782, "grad_norm": 0.86328125, "learning_rate": 1.9817602748031852e-05, "loss": 0.3447, "step": 1656 }, { "epoch": 0.12540560995222552, "grad_norm": 0.83984375, "learning_rate": 1.9817376226662404e-05, "loss": 0.336, "step": 1657 }, { "epoch": 0.12548129227567287, "grad_norm": 0.875, "learning_rate": 1.9817149566016626e-05, "loss": 0.311, "step": 1658 }, { "epoch": 0.12555697459912019, "grad_norm": 0.90625, "learning_rate": 1.9816922766097736e-05, "loss": 0.3164, "step": 1659 }, { "epoch": 0.12563265692256753, "grad_norm": 0.9140625, "learning_rate": 1.9816695826908948e-05, "loss": 0.3953, "step": 1660 }, { "epoch": 0.12570833924601485, "grad_norm": 0.890625, "learning_rate": 1.9816468748453486e-05, "loss": 0.3657, "step": 1661 }, { "epoch": 0.1257840215694622, "grad_norm": 0.953125, "learning_rate": 1.981624153073457e-05, "loss": 0.3665, "step": 1662 }, { "epoch": 0.1258597038929095, "grad_norm": 0.89453125, "learning_rate": 1.981601417375542e-05, "loss": 0.3693, "step": 1663 }, { "epoch": 0.12593538621635683, "grad_norm": 0.859375, "learning_rate": 1.9815786677519265e-05, "loss": 0.2748, "step": 1664 }, { "epoch": 0.12601106853980418, "grad_norm": 0.83203125, "learning_rate": 1.981555904202933e-05, "loss": 0.3128, "step": 1665 }, { "epoch": 0.1260867508632515, "grad_norm": 0.91015625, "learning_rate": 1.9815331267288847e-05, "loss": 0.3931, "step": 1666 }, { "epoch": 0.12616243318669884, "grad_norm": 0.84765625, "learning_rate": 1.9815103353301048e-05, "loss": 0.3138, "step": 1667 }, { "epoch": 0.12623811551014616, "grad_norm": 0.828125, "learning_rate": 1.9814875300069164e-05, "loss": 0.3308, "step": 1668 }, { "epoch": 0.12631379783359348, "grad_norm": 0.87890625, "learning_rate": 1.981464710759643e-05, "loss": 0.3141, "step": 1669 }, { "epoch": 0.12638948015704082, "grad_norm": 0.86328125, "learning_rate": 1.9814418775886083e-05, "loss": 0.3445, "step": 1670 }, { "epoch": 0.12646516248048814, "grad_norm": 0.8828125, "learning_rate": 1.9814190304941368e-05, "loss": 0.3416, "step": 1671 }, { "epoch": 0.1265408448039355, "grad_norm": 0.84375, "learning_rate": 1.9813961694765518e-05, "loss": 0.3251, "step": 1672 }, { "epoch": 0.1266165271273828, "grad_norm": 0.89453125, "learning_rate": 1.9813732945361784e-05, "loss": 0.3613, "step": 1673 }, { "epoch": 0.12669220945083015, "grad_norm": 0.9296875, "learning_rate": 1.9813504056733403e-05, "loss": 0.359, "step": 1674 }, { "epoch": 0.12676789177427747, "grad_norm": 0.87109375, "learning_rate": 1.9813275028883624e-05, "loss": 0.3179, "step": 1675 }, { "epoch": 0.1268435740977248, "grad_norm": 0.890625, "learning_rate": 1.9813045861815706e-05, "loss": 0.3783, "step": 1676 }, { "epoch": 0.12691925642117213, "grad_norm": 1.171875, "learning_rate": 1.981281655553289e-05, "loss": 0.4095, "step": 1677 }, { "epoch": 0.12699493874461945, "grad_norm": 0.921875, "learning_rate": 1.9812587110038428e-05, "loss": 0.3812, "step": 1678 }, { "epoch": 0.1270706210680668, "grad_norm": 0.94140625, "learning_rate": 1.9812357525335583e-05, "loss": 0.3323, "step": 1679 }, { "epoch": 0.12714630339151411, "grad_norm": 0.83203125, "learning_rate": 1.9812127801427606e-05, "loss": 0.3109, "step": 1680 }, { "epoch": 0.12722198571496146, "grad_norm": 0.9296875, "learning_rate": 1.9811897938317757e-05, "loss": 0.384, "step": 1681 }, { "epoch": 0.12729766803840878, "grad_norm": 0.87109375, "learning_rate": 1.98116679360093e-05, "loss": 0.3753, "step": 1682 }, { "epoch": 0.1273733503618561, "grad_norm": 0.9140625, "learning_rate": 1.9811437794505496e-05, "loss": 0.3221, "step": 1683 }, { "epoch": 0.12744903268530344, "grad_norm": 0.8515625, "learning_rate": 1.981120751380961e-05, "loss": 0.3434, "step": 1684 }, { "epoch": 0.12752471500875076, "grad_norm": 0.90625, "learning_rate": 1.981097709392491e-05, "loss": 0.3529, "step": 1685 }, { "epoch": 0.1276003973321981, "grad_norm": 0.890625, "learning_rate": 1.9810746534854657e-05, "loss": 0.3619, "step": 1686 }, { "epoch": 0.12767607965564542, "grad_norm": 0.88671875, "learning_rate": 1.9810515836602137e-05, "loss": 0.3472, "step": 1687 }, { "epoch": 0.12775176197909277, "grad_norm": 0.93359375, "learning_rate": 1.981028499917061e-05, "loss": 0.3384, "step": 1688 }, { "epoch": 0.1278274443025401, "grad_norm": 0.93359375, "learning_rate": 1.9810054022563358e-05, "loss": 0.3341, "step": 1689 }, { "epoch": 0.1279031266259874, "grad_norm": 0.8515625, "learning_rate": 1.9809822906783652e-05, "loss": 0.3169, "step": 1690 }, { "epoch": 0.12797880894943475, "grad_norm": 1.0078125, "learning_rate": 1.9809591651834773e-05, "loss": 0.3683, "step": 1691 }, { "epoch": 0.12805449127288207, "grad_norm": 0.94921875, "learning_rate": 1.9809360257720004e-05, "loss": 0.3781, "step": 1692 }, { "epoch": 0.12813017359632942, "grad_norm": 1.0078125, "learning_rate": 1.9809128724442627e-05, "loss": 0.3578, "step": 1693 }, { "epoch": 0.12820585591977673, "grad_norm": 0.86328125, "learning_rate": 1.9808897052005927e-05, "loss": 0.3362, "step": 1694 }, { "epoch": 0.12828153824322408, "grad_norm": 0.91015625, "learning_rate": 1.980866524041319e-05, "loss": 0.3549, "step": 1695 }, { "epoch": 0.1283572205666714, "grad_norm": 0.8984375, "learning_rate": 1.9808433289667702e-05, "loss": 0.3716, "step": 1696 }, { "epoch": 0.12843290289011872, "grad_norm": 0.9375, "learning_rate": 1.9808201199772755e-05, "loss": 0.3808, "step": 1697 }, { "epoch": 0.12850858521356606, "grad_norm": 0.83203125, "learning_rate": 1.9807968970731643e-05, "loss": 0.3036, "step": 1698 }, { "epoch": 0.12858426753701338, "grad_norm": 0.8203125, "learning_rate": 1.9807736602547663e-05, "loss": 0.3322, "step": 1699 }, { "epoch": 0.12865994986046073, "grad_norm": 0.86328125, "learning_rate": 1.9807504095224105e-05, "loss": 0.329, "step": 1700 }, { "epoch": 0.12873563218390804, "grad_norm": 0.9140625, "learning_rate": 1.9807271448764273e-05, "loss": 0.3587, "step": 1701 }, { "epoch": 0.12881131450735536, "grad_norm": 0.82421875, "learning_rate": 1.980703866317147e-05, "loss": 0.3195, "step": 1702 }, { "epoch": 0.1288869968308027, "grad_norm": 0.87109375, "learning_rate": 1.980680573844899e-05, "loss": 0.3458, "step": 1703 }, { "epoch": 0.12896267915425003, "grad_norm": 12.75, "learning_rate": 1.9806572674600137e-05, "loss": 0.5445, "step": 1704 }, { "epoch": 0.12903836147769737, "grad_norm": 0.89453125, "learning_rate": 1.9806339471628227e-05, "loss": 0.3809, "step": 1705 }, { "epoch": 0.1291140438011447, "grad_norm": 0.85546875, "learning_rate": 1.9806106129536565e-05, "loss": 0.3406, "step": 1706 }, { "epoch": 0.12918972612459204, "grad_norm": 0.95703125, "learning_rate": 1.9805872648328453e-05, "loss": 0.4039, "step": 1707 }, { "epoch": 0.12926540844803935, "grad_norm": 0.8984375, "learning_rate": 1.9805639028007218e-05, "loss": 0.3866, "step": 1708 }, { "epoch": 0.12934109077148667, "grad_norm": 0.90625, "learning_rate": 1.9805405268576162e-05, "loss": 0.3403, "step": 1709 }, { "epoch": 0.12941677309493402, "grad_norm": 0.859375, "learning_rate": 1.9805171370038607e-05, "loss": 0.331, "step": 1710 }, { "epoch": 0.12949245541838134, "grad_norm": 0.90625, "learning_rate": 1.980493733239787e-05, "loss": 0.3589, "step": 1711 }, { "epoch": 0.12956813774182868, "grad_norm": 0.87109375, "learning_rate": 1.980470315565727e-05, "loss": 0.3185, "step": 1712 }, { "epoch": 0.129643820065276, "grad_norm": 0.90234375, "learning_rate": 1.980446883982013e-05, "loss": 0.3755, "step": 1713 }, { "epoch": 0.12971950238872335, "grad_norm": 0.88671875, "learning_rate": 1.9804234384889774e-05, "loss": 0.3508, "step": 1714 }, { "epoch": 0.12979518471217066, "grad_norm": 0.90234375, "learning_rate": 1.980399979086953e-05, "loss": 0.3337, "step": 1715 }, { "epoch": 0.12987086703561798, "grad_norm": 0.8203125, "learning_rate": 1.9803765057762723e-05, "loss": 0.2916, "step": 1716 }, { "epoch": 0.12994654935906533, "grad_norm": 0.96484375, "learning_rate": 1.9803530185572687e-05, "loss": 0.3832, "step": 1717 }, { "epoch": 0.13002223168251265, "grad_norm": 0.890625, "learning_rate": 1.9803295174302752e-05, "loss": 0.3591, "step": 1718 }, { "epoch": 0.13009791400596, "grad_norm": 0.859375, "learning_rate": 1.980306002395625e-05, "loss": 0.3029, "step": 1719 }, { "epoch": 0.1301735963294073, "grad_norm": 0.82421875, "learning_rate": 1.980282473453652e-05, "loss": 0.3067, "step": 1720 }, { "epoch": 0.13024927865285466, "grad_norm": 0.91796875, "learning_rate": 1.98025893060469e-05, "loss": 0.3798, "step": 1721 }, { "epoch": 0.13032496097630197, "grad_norm": 0.83203125, "learning_rate": 1.9802353738490727e-05, "loss": 0.3333, "step": 1722 }, { "epoch": 0.1304006432997493, "grad_norm": 0.86328125, "learning_rate": 1.9802118031871347e-05, "loss": 0.3444, "step": 1723 }, { "epoch": 0.13047632562319664, "grad_norm": 0.89453125, "learning_rate": 1.98018821861921e-05, "loss": 0.3855, "step": 1724 }, { "epoch": 0.13055200794664396, "grad_norm": 0.93359375, "learning_rate": 1.9801646201456334e-05, "loss": 0.4007, "step": 1725 }, { "epoch": 0.1306276902700913, "grad_norm": 0.859375, "learning_rate": 1.9801410077667396e-05, "loss": 0.3413, "step": 1726 }, { "epoch": 0.13070337259353862, "grad_norm": 0.8828125, "learning_rate": 1.9801173814828635e-05, "loss": 0.3305, "step": 1727 }, { "epoch": 0.13077905491698597, "grad_norm": 0.859375, "learning_rate": 1.980093741294341e-05, "loss": 0.3576, "step": 1728 }, { "epoch": 0.13085473724043328, "grad_norm": 0.90625, "learning_rate": 1.980070087201506e-05, "loss": 0.3741, "step": 1729 }, { "epoch": 0.1309304195638806, "grad_norm": 0.96875, "learning_rate": 1.9800464192046956e-05, "loss": 0.3913, "step": 1730 }, { "epoch": 0.13100610188732795, "grad_norm": 0.90234375, "learning_rate": 1.9800227373042446e-05, "loss": 0.3464, "step": 1731 }, { "epoch": 0.13108178421077527, "grad_norm": 0.8671875, "learning_rate": 1.9799990415004897e-05, "loss": 0.3047, "step": 1732 }, { "epoch": 0.1311574665342226, "grad_norm": 1.21875, "learning_rate": 1.979975331793766e-05, "loss": 0.3597, "step": 1733 }, { "epoch": 0.13123314885766993, "grad_norm": 0.8984375, "learning_rate": 1.979951608184411e-05, "loss": 0.3372, "step": 1734 }, { "epoch": 0.13130883118111725, "grad_norm": 0.83984375, "learning_rate": 1.9799278706727606e-05, "loss": 0.3116, "step": 1735 }, { "epoch": 0.1313845135045646, "grad_norm": 0.89453125, "learning_rate": 1.9799041192591518e-05, "loss": 0.3537, "step": 1736 }, { "epoch": 0.1314601958280119, "grad_norm": 0.87109375, "learning_rate": 1.9798803539439217e-05, "loss": 0.3343, "step": 1737 }, { "epoch": 0.13153587815145926, "grad_norm": 0.89453125, "learning_rate": 1.979856574727407e-05, "loss": 0.2967, "step": 1738 }, { "epoch": 0.13161156047490657, "grad_norm": 0.8671875, "learning_rate": 1.9798327816099453e-05, "loss": 0.3363, "step": 1739 }, { "epoch": 0.13168724279835392, "grad_norm": 0.87109375, "learning_rate": 1.9798089745918744e-05, "loss": 0.3334, "step": 1740 }, { "epoch": 0.13176292512180124, "grad_norm": 0.953125, "learning_rate": 1.9797851536735314e-05, "loss": 0.3613, "step": 1741 }, { "epoch": 0.13183860744524856, "grad_norm": 0.8359375, "learning_rate": 1.9797613188552547e-05, "loss": 0.3126, "step": 1742 }, { "epoch": 0.1319142897686959, "grad_norm": 0.84375, "learning_rate": 1.9797374701373824e-05, "loss": 0.3555, "step": 1743 }, { "epoch": 0.13198997209214322, "grad_norm": 0.8828125, "learning_rate": 1.9797136075202532e-05, "loss": 0.3628, "step": 1744 }, { "epoch": 0.13206565441559057, "grad_norm": 0.98046875, "learning_rate": 1.979689731004205e-05, "loss": 0.3862, "step": 1745 }, { "epoch": 0.13214133673903788, "grad_norm": 0.984375, "learning_rate": 1.9796658405895764e-05, "loss": 0.3573, "step": 1746 }, { "epoch": 0.13221701906248523, "grad_norm": 3.453125, "learning_rate": 1.979641936276707e-05, "loss": 0.5546, "step": 1747 }, { "epoch": 0.13229270138593255, "grad_norm": 0.9140625, "learning_rate": 1.9796180180659355e-05, "loss": 0.3304, "step": 1748 }, { "epoch": 0.13236838370937987, "grad_norm": 0.9375, "learning_rate": 1.9795940859576015e-05, "loss": 0.3809, "step": 1749 }, { "epoch": 0.1324440660328272, "grad_norm": 0.84375, "learning_rate": 1.979570139952044e-05, "loss": 0.2976, "step": 1750 }, { "epoch": 0.13251974835627453, "grad_norm": 0.9140625, "learning_rate": 1.9795461800496033e-05, "loss": 0.3484, "step": 1751 }, { "epoch": 0.13259543067972188, "grad_norm": 0.8671875, "learning_rate": 1.9795222062506193e-05, "loss": 0.3579, "step": 1752 }, { "epoch": 0.1326711130031692, "grad_norm": 0.85546875, "learning_rate": 1.9794982185554317e-05, "loss": 0.3073, "step": 1753 }, { "epoch": 0.13274679532661654, "grad_norm": 0.84765625, "learning_rate": 1.979474216964381e-05, "loss": 0.2742, "step": 1754 }, { "epoch": 0.13282247765006386, "grad_norm": 0.9375, "learning_rate": 1.9794502014778076e-05, "loss": 0.3498, "step": 1755 }, { "epoch": 0.13289815997351118, "grad_norm": 0.87890625, "learning_rate": 1.9794261720960524e-05, "loss": 0.3428, "step": 1756 }, { "epoch": 0.13297384229695852, "grad_norm": 0.8984375, "learning_rate": 1.979402128819456e-05, "loss": 0.3307, "step": 1757 }, { "epoch": 0.13304952462040584, "grad_norm": 0.83984375, "learning_rate": 1.97937807164836e-05, "loss": 0.3335, "step": 1758 }, { "epoch": 0.13312520694385319, "grad_norm": 0.796875, "learning_rate": 1.979354000583105e-05, "loss": 0.2751, "step": 1759 }, { "epoch": 0.1332008892673005, "grad_norm": 0.81640625, "learning_rate": 1.9793299156240333e-05, "loss": 0.2743, "step": 1760 }, { "epoch": 0.13327657159074782, "grad_norm": 0.96484375, "learning_rate": 1.979305816771486e-05, "loss": 0.3933, "step": 1761 }, { "epoch": 0.13335225391419517, "grad_norm": 0.8515625, "learning_rate": 1.979281704025805e-05, "loss": 0.3581, "step": 1762 }, { "epoch": 0.13342793623764249, "grad_norm": 0.9140625, "learning_rate": 1.979257577387333e-05, "loss": 0.3616, "step": 1763 }, { "epoch": 0.13350361856108983, "grad_norm": 0.81640625, "learning_rate": 1.979233436856411e-05, "loss": 0.3179, "step": 1764 }, { "epoch": 0.13357930088453715, "grad_norm": 0.984375, "learning_rate": 1.9792092824333828e-05, "loss": 0.3191, "step": 1765 }, { "epoch": 0.1336549832079845, "grad_norm": 0.9765625, "learning_rate": 1.9791851141185903e-05, "loss": 0.3258, "step": 1766 }, { "epoch": 0.1337306655314318, "grad_norm": 0.93359375, "learning_rate": 1.979160931912377e-05, "loss": 0.3863, "step": 1767 }, { "epoch": 0.13380634785487913, "grad_norm": 7.03125, "learning_rate": 1.979136735815085e-05, "loss": 0.5075, "step": 1768 }, { "epoch": 0.13388203017832648, "grad_norm": 0.95703125, "learning_rate": 1.9791125258270587e-05, "loss": 0.4124, "step": 1769 }, { "epoch": 0.1339577125017738, "grad_norm": 1.0, "learning_rate": 1.979088301948641e-05, "loss": 0.3655, "step": 1770 }, { "epoch": 0.13403339482522114, "grad_norm": 0.8671875, "learning_rate": 1.9790640641801752e-05, "loss": 0.3171, "step": 1771 }, { "epoch": 0.13410907714866846, "grad_norm": 0.84375, "learning_rate": 1.9790398125220056e-05, "loss": 0.311, "step": 1772 }, { "epoch": 0.1341847594721158, "grad_norm": 0.93359375, "learning_rate": 1.9790155469744766e-05, "loss": 0.3545, "step": 1773 }, { "epoch": 0.13426044179556312, "grad_norm": 0.9296875, "learning_rate": 1.9789912675379316e-05, "loss": 0.3628, "step": 1774 }, { "epoch": 0.13433612411901044, "grad_norm": 0.875, "learning_rate": 1.9789669742127154e-05, "loss": 0.3249, "step": 1775 }, { "epoch": 0.1344118064424578, "grad_norm": 0.84765625, "learning_rate": 1.978942666999173e-05, "loss": 0.3192, "step": 1776 }, { "epoch": 0.1344874887659051, "grad_norm": 0.9296875, "learning_rate": 1.9789183458976485e-05, "loss": 0.3884, "step": 1777 }, { "epoch": 0.13456317108935245, "grad_norm": 2.15625, "learning_rate": 1.9788940109084876e-05, "loss": 0.5151, "step": 1778 }, { "epoch": 0.13463885341279977, "grad_norm": 0.875, "learning_rate": 1.9788696620320355e-05, "loss": 0.3406, "step": 1779 }, { "epoch": 0.13471453573624712, "grad_norm": 0.89453125, "learning_rate": 1.9788452992686372e-05, "loss": 0.3382, "step": 1780 }, { "epoch": 0.13479021805969443, "grad_norm": 0.84765625, "learning_rate": 1.978820922618639e-05, "loss": 0.3215, "step": 1781 }, { "epoch": 0.13486590038314175, "grad_norm": 0.81640625, "learning_rate": 1.9787965320823858e-05, "loss": 0.2933, "step": 1782 }, { "epoch": 0.1349415827065891, "grad_norm": 0.8984375, "learning_rate": 1.9787721276602245e-05, "loss": 0.3827, "step": 1783 }, { "epoch": 0.13501726503003642, "grad_norm": 0.7890625, "learning_rate": 1.9787477093525008e-05, "loss": 0.2913, "step": 1784 }, { "epoch": 0.13509294735348376, "grad_norm": 2.25, "learning_rate": 1.978723277159561e-05, "loss": 0.5521, "step": 1785 }, { "epoch": 0.13516862967693108, "grad_norm": 2.546875, "learning_rate": 1.9786988310817523e-05, "loss": 0.3724, "step": 1786 }, { "epoch": 0.13524431200037842, "grad_norm": 0.91015625, "learning_rate": 1.9786743711194208e-05, "loss": 0.3436, "step": 1787 }, { "epoch": 0.13531999432382574, "grad_norm": 0.875, "learning_rate": 1.9786498972729142e-05, "loss": 0.3288, "step": 1788 }, { "epoch": 0.13539567664727306, "grad_norm": 2.15625, "learning_rate": 1.9786254095425795e-05, "loss": 0.4652, "step": 1789 }, { "epoch": 0.1354713589707204, "grad_norm": 0.95703125, "learning_rate": 1.978600907928764e-05, "loss": 0.3874, "step": 1790 }, { "epoch": 0.13554704129416772, "grad_norm": 0.9453125, "learning_rate": 1.978576392431815e-05, "loss": 0.313, "step": 1791 }, { "epoch": 0.13562272361761507, "grad_norm": 2.609375, "learning_rate": 1.9785518630520807e-05, "loss": 0.4818, "step": 1792 }, { "epoch": 0.1356984059410624, "grad_norm": 0.921875, "learning_rate": 1.978527319789909e-05, "loss": 0.3114, "step": 1793 }, { "epoch": 0.1357740882645097, "grad_norm": 0.90234375, "learning_rate": 1.978502762645648e-05, "loss": 0.3555, "step": 1794 }, { "epoch": 0.13584977058795705, "grad_norm": 0.96484375, "learning_rate": 1.978478191619646e-05, "loss": 0.3885, "step": 1795 }, { "epoch": 0.13592545291140437, "grad_norm": 0.9921875, "learning_rate": 1.9784536067122518e-05, "loss": 0.3786, "step": 1796 }, { "epoch": 0.13600113523485172, "grad_norm": 0.88671875, "learning_rate": 1.9784290079238142e-05, "loss": 0.349, "step": 1797 }, { "epoch": 0.13607681755829903, "grad_norm": 0.890625, "learning_rate": 1.978404395254682e-05, "loss": 0.3665, "step": 1798 }, { "epoch": 0.13615249988174638, "grad_norm": 4.09375, "learning_rate": 1.9783797687052046e-05, "loss": 0.468, "step": 1799 }, { "epoch": 0.1362281822051937, "grad_norm": 0.92578125, "learning_rate": 1.9783551282757312e-05, "loss": 0.4138, "step": 1800 }, { "epoch": 0.13630386452864102, "grad_norm": 0.91796875, "learning_rate": 1.9783304739666112e-05, "loss": 0.3492, "step": 1801 }, { "epoch": 0.13637954685208836, "grad_norm": 0.8359375, "learning_rate": 1.9783058057781947e-05, "loss": 0.3188, "step": 1802 }, { "epoch": 0.13645522917553568, "grad_norm": 1.015625, "learning_rate": 1.9782811237108313e-05, "loss": 0.3157, "step": 1803 }, { "epoch": 0.13653091149898303, "grad_norm": 0.9140625, "learning_rate": 1.9782564277648717e-05, "loss": 0.3766, "step": 1804 }, { "epoch": 0.13660659382243034, "grad_norm": 0.9296875, "learning_rate": 1.978231717940666e-05, "loss": 0.3742, "step": 1805 }, { "epoch": 0.1366822761458777, "grad_norm": 0.92578125, "learning_rate": 1.9782069942385642e-05, "loss": 0.3613, "step": 1806 }, { "epoch": 0.136757958469325, "grad_norm": 0.85546875, "learning_rate": 1.978182256658918e-05, "loss": 0.3485, "step": 1807 }, { "epoch": 0.13683364079277233, "grad_norm": 0.9453125, "learning_rate": 1.9781575052020775e-05, "loss": 0.3585, "step": 1808 }, { "epoch": 0.13690932311621967, "grad_norm": 0.95703125, "learning_rate": 1.9781327398683945e-05, "loss": 0.3562, "step": 1809 }, { "epoch": 0.136985005439667, "grad_norm": 0.9296875, "learning_rate": 1.97810796065822e-05, "loss": 0.3484, "step": 1810 }, { "epoch": 0.13706068776311434, "grad_norm": 0.83984375, "learning_rate": 1.9780831675719053e-05, "loss": 0.318, "step": 1811 }, { "epoch": 0.13713637008656165, "grad_norm": 0.8359375, "learning_rate": 1.978058360609803e-05, "loss": 0.2911, "step": 1812 }, { "epoch": 0.137212052410009, "grad_norm": 0.78515625, "learning_rate": 1.978033539772264e-05, "loss": 0.2881, "step": 1813 }, { "epoch": 0.13728773473345632, "grad_norm": 0.82421875, "learning_rate": 1.9780087050596407e-05, "loss": 0.3005, "step": 1814 }, { "epoch": 0.13736341705690364, "grad_norm": 0.9140625, "learning_rate": 1.977983856472286e-05, "loss": 0.3685, "step": 1815 }, { "epoch": 0.13743909938035098, "grad_norm": 0.82421875, "learning_rate": 1.977958994010552e-05, "loss": 0.3251, "step": 1816 }, { "epoch": 0.1375147817037983, "grad_norm": 0.89453125, "learning_rate": 1.9779341176747913e-05, "loss": 0.3379, "step": 1817 }, { "epoch": 0.13759046402724565, "grad_norm": 0.89453125, "learning_rate": 1.9779092274653567e-05, "loss": 0.3283, "step": 1818 }, { "epoch": 0.13766614635069296, "grad_norm": 0.8515625, "learning_rate": 1.977884323382602e-05, "loss": 0.3138, "step": 1819 }, { "epoch": 0.1377418286741403, "grad_norm": 0.84375, "learning_rate": 1.97785940542688e-05, "loss": 0.3373, "step": 1820 }, { "epoch": 0.13781751099758763, "grad_norm": 0.8046875, "learning_rate": 1.977834473598544e-05, "loss": 0.2707, "step": 1821 }, { "epoch": 0.13789319332103495, "grad_norm": 0.89453125, "learning_rate": 1.9778095278979482e-05, "loss": 0.364, "step": 1822 }, { "epoch": 0.1379688756444823, "grad_norm": 0.7734375, "learning_rate": 1.9777845683254464e-05, "loss": 0.2823, "step": 1823 }, { "epoch": 0.1380445579679296, "grad_norm": 0.89453125, "learning_rate": 1.9777595948813922e-05, "loss": 0.3409, "step": 1824 }, { "epoch": 0.13812024029137696, "grad_norm": 0.99609375, "learning_rate": 1.97773460756614e-05, "loss": 0.3972, "step": 1825 }, { "epoch": 0.13819592261482427, "grad_norm": 0.82421875, "learning_rate": 1.9777096063800453e-05, "loss": 0.3004, "step": 1826 }, { "epoch": 0.1382716049382716, "grad_norm": 0.8671875, "learning_rate": 1.9776845913234613e-05, "loss": 0.341, "step": 1827 }, { "epoch": 0.13834728726171894, "grad_norm": 0.7734375, "learning_rate": 1.9776595623967443e-05, "loss": 0.2743, "step": 1828 }, { "epoch": 0.13842296958516626, "grad_norm": 0.84375, "learning_rate": 1.9776345196002484e-05, "loss": 0.3161, "step": 1829 }, { "epoch": 0.1384986519086136, "grad_norm": 0.8828125, "learning_rate": 1.977609462934329e-05, "loss": 0.3636, "step": 1830 }, { "epoch": 0.13857433423206092, "grad_norm": 0.9296875, "learning_rate": 1.9775843923993418e-05, "loss": 0.4017, "step": 1831 }, { "epoch": 0.13865001655550827, "grad_norm": 0.87890625, "learning_rate": 1.9775593079956426e-05, "loss": 0.3279, "step": 1832 }, { "epoch": 0.13872569887895558, "grad_norm": 0.859375, "learning_rate": 1.977534209723587e-05, "loss": 0.3212, "step": 1833 }, { "epoch": 0.1388013812024029, "grad_norm": 0.88671875, "learning_rate": 1.977509097583531e-05, "loss": 0.3407, "step": 1834 }, { "epoch": 0.13887706352585025, "grad_norm": 0.83203125, "learning_rate": 1.977483971575831e-05, "loss": 0.312, "step": 1835 }, { "epoch": 0.13895274584929757, "grad_norm": 0.87890625, "learning_rate": 1.9774588317008432e-05, "loss": 0.2955, "step": 1836 }, { "epoch": 0.1390284281727449, "grad_norm": 0.86328125, "learning_rate": 1.9774336779589253e-05, "loss": 0.3311, "step": 1837 }, { "epoch": 0.13910411049619223, "grad_norm": 0.9140625, "learning_rate": 1.9774085103504326e-05, "loss": 0.3896, "step": 1838 }, { "epoch": 0.13917979281963958, "grad_norm": 0.796875, "learning_rate": 1.9773833288757232e-05, "loss": 0.2959, "step": 1839 }, { "epoch": 0.1392554751430869, "grad_norm": 0.83984375, "learning_rate": 1.977358133535154e-05, "loss": 0.3067, "step": 1840 }, { "epoch": 0.1393311574665342, "grad_norm": 0.93359375, "learning_rate": 1.9773329243290826e-05, "loss": 0.3632, "step": 1841 }, { "epoch": 0.13940683978998156, "grad_norm": 0.89453125, "learning_rate": 1.977307701257866e-05, "loss": 0.3301, "step": 1842 }, { "epoch": 0.13948252211342888, "grad_norm": 0.859375, "learning_rate": 1.977282464321863e-05, "loss": 0.346, "step": 1843 }, { "epoch": 0.13955820443687622, "grad_norm": 0.89453125, "learning_rate": 1.977257213521431e-05, "loss": 0.32, "step": 1844 }, { "epoch": 0.13963388676032354, "grad_norm": 0.90625, "learning_rate": 1.9772319488569284e-05, "loss": 0.35, "step": 1845 }, { "epoch": 0.13970956908377088, "grad_norm": 0.93359375, "learning_rate": 1.9772066703287137e-05, "loss": 0.3633, "step": 1846 }, { "epoch": 0.1397852514072182, "grad_norm": 0.8984375, "learning_rate": 1.9771813779371452e-05, "loss": 0.3652, "step": 1847 }, { "epoch": 0.13986093373066552, "grad_norm": 0.87890625, "learning_rate": 1.9771560716825822e-05, "loss": 0.3472, "step": 1848 }, { "epoch": 0.13993661605411287, "grad_norm": 0.91015625, "learning_rate": 1.9771307515653835e-05, "loss": 0.3738, "step": 1849 }, { "epoch": 0.14001229837756018, "grad_norm": 0.88671875, "learning_rate": 1.9771054175859082e-05, "loss": 0.3371, "step": 1850 }, { "epoch": 0.14008798070100753, "grad_norm": 0.9140625, "learning_rate": 1.9770800697445157e-05, "loss": 0.3962, "step": 1851 }, { "epoch": 0.14016366302445485, "grad_norm": 0.875, "learning_rate": 1.977054708041566e-05, "loss": 0.3256, "step": 1852 }, { "epoch": 0.14023934534790217, "grad_norm": 0.90234375, "learning_rate": 1.977029332477418e-05, "loss": 0.3788, "step": 1853 }, { "epoch": 0.1403150276713495, "grad_norm": 0.8828125, "learning_rate": 1.9770039430524325e-05, "loss": 0.3479, "step": 1854 }, { "epoch": 0.14039070999479683, "grad_norm": 0.87890625, "learning_rate": 1.9769785397669698e-05, "loss": 0.3622, "step": 1855 }, { "epoch": 0.14046639231824418, "grad_norm": 0.92578125, "learning_rate": 1.9769531226213897e-05, "loss": 0.3862, "step": 1856 }, { "epoch": 0.1405420746416915, "grad_norm": 0.9140625, "learning_rate": 1.976927691616053e-05, "loss": 0.3798, "step": 1857 }, { "epoch": 0.14061775696513884, "grad_norm": 0.83984375, "learning_rate": 1.9769022467513208e-05, "loss": 0.3406, "step": 1858 }, { "epoch": 0.14069343928858616, "grad_norm": 0.984375, "learning_rate": 1.9768767880275534e-05, "loss": 0.4067, "step": 1859 }, { "epoch": 0.14076912161203348, "grad_norm": 0.80859375, "learning_rate": 1.9768513154451126e-05, "loss": 0.3329, "step": 1860 }, { "epoch": 0.14084480393548082, "grad_norm": 0.875, "learning_rate": 1.9768258290043597e-05, "loss": 0.3484, "step": 1861 }, { "epoch": 0.14092048625892814, "grad_norm": 0.91015625, "learning_rate": 1.976800328705656e-05, "loss": 0.3845, "step": 1862 }, { "epoch": 0.1409961685823755, "grad_norm": 0.8828125, "learning_rate": 1.9767748145493635e-05, "loss": 0.3481, "step": 1863 }, { "epoch": 0.1410718509058228, "grad_norm": 0.84375, "learning_rate": 1.976749286535844e-05, "loss": 0.3467, "step": 1864 }, { "epoch": 0.14114753322927015, "grad_norm": 0.91796875, "learning_rate": 1.9767237446654595e-05, "loss": 0.3658, "step": 1865 }, { "epoch": 0.14122321555271747, "grad_norm": 0.859375, "learning_rate": 1.9766981889385727e-05, "loss": 0.32, "step": 1866 }, { "epoch": 0.1412988978761648, "grad_norm": 0.828125, "learning_rate": 1.9766726193555458e-05, "loss": 0.339, "step": 1867 }, { "epoch": 0.14137458019961213, "grad_norm": 0.84765625, "learning_rate": 1.9766470359167424e-05, "loss": 0.3441, "step": 1868 }, { "epoch": 0.14145026252305945, "grad_norm": 0.87890625, "learning_rate": 1.976621438622524e-05, "loss": 0.3397, "step": 1869 }, { "epoch": 0.1415259448465068, "grad_norm": 0.953125, "learning_rate": 1.976595827473255e-05, "loss": 0.3621, "step": 1870 }, { "epoch": 0.14160162716995411, "grad_norm": 0.8359375, "learning_rate": 1.9765702024692984e-05, "loss": 0.3182, "step": 1871 }, { "epoch": 0.14167730949340146, "grad_norm": 0.91796875, "learning_rate": 1.9765445636110175e-05, "loss": 0.3776, "step": 1872 }, { "epoch": 0.14175299181684878, "grad_norm": 1.046875, "learning_rate": 1.976518910898776e-05, "loss": 0.4067, "step": 1873 }, { "epoch": 0.1418286741402961, "grad_norm": 0.890625, "learning_rate": 1.9764932443329383e-05, "loss": 0.35, "step": 1874 }, { "epoch": 0.14190435646374344, "grad_norm": 0.87109375, "learning_rate": 1.976467563913868e-05, "loss": 0.348, "step": 1875 }, { "epoch": 0.14198003878719076, "grad_norm": 0.90625, "learning_rate": 1.9764418696419297e-05, "loss": 0.3852, "step": 1876 }, { "epoch": 0.1420557211106381, "grad_norm": 0.86328125, "learning_rate": 1.9764161615174876e-05, "loss": 0.3292, "step": 1877 }, { "epoch": 0.14213140343408542, "grad_norm": 0.921875, "learning_rate": 1.976390439540907e-05, "loss": 0.379, "step": 1878 }, { "epoch": 0.14220708575753277, "grad_norm": 0.9296875, "learning_rate": 1.9763647037125525e-05, "loss": 0.3405, "step": 1879 }, { "epoch": 0.1422827680809801, "grad_norm": 0.8515625, "learning_rate": 1.9763389540327888e-05, "loss": 0.3056, "step": 1880 }, { "epoch": 0.1423584504044274, "grad_norm": 4.875, "learning_rate": 1.976313190501982e-05, "loss": 0.556, "step": 1881 }, { "epoch": 0.14243413272787475, "grad_norm": 0.91015625, "learning_rate": 1.976287413120497e-05, "loss": 0.3447, "step": 1882 }, { "epoch": 0.14250981505132207, "grad_norm": 0.91015625, "learning_rate": 1.9762616218886995e-05, "loss": 0.3408, "step": 1883 }, { "epoch": 0.14258549737476942, "grad_norm": 0.78125, "learning_rate": 1.976235816806956e-05, "loss": 0.283, "step": 1884 }, { "epoch": 0.14266117969821673, "grad_norm": 2.0, "learning_rate": 1.9762099978756314e-05, "loss": 0.517, "step": 1885 }, { "epoch": 0.14273686202166405, "grad_norm": 0.8671875, "learning_rate": 1.9761841650950935e-05, "loss": 0.2924, "step": 1886 }, { "epoch": 0.1428125443451114, "grad_norm": 0.8359375, "learning_rate": 1.9761583184657076e-05, "loss": 0.3272, "step": 1887 }, { "epoch": 0.14288822666855872, "grad_norm": 0.875, "learning_rate": 1.9761324579878412e-05, "loss": 0.379, "step": 1888 }, { "epoch": 0.14296390899200606, "grad_norm": 0.93359375, "learning_rate": 1.9761065836618603e-05, "loss": 0.3745, "step": 1889 }, { "epoch": 0.14303959131545338, "grad_norm": 0.84765625, "learning_rate": 1.9760806954881328e-05, "loss": 0.3216, "step": 1890 }, { "epoch": 0.14311527363890073, "grad_norm": 0.87109375, "learning_rate": 1.9760547934670256e-05, "loss": 0.3547, "step": 1891 }, { "epoch": 0.14319095596234804, "grad_norm": 0.7890625, "learning_rate": 1.976028877598906e-05, "loss": 0.2882, "step": 1892 }, { "epoch": 0.14326663828579536, "grad_norm": 0.8515625, "learning_rate": 1.9760029478841423e-05, "loss": 0.3227, "step": 1893 }, { "epoch": 0.1433423206092427, "grad_norm": 0.89453125, "learning_rate": 1.9759770043231015e-05, "loss": 0.3527, "step": 1894 }, { "epoch": 0.14341800293269003, "grad_norm": 0.8984375, "learning_rate": 1.975951046916152e-05, "loss": 0.3298, "step": 1895 }, { "epoch": 0.14349368525613737, "grad_norm": 0.8515625, "learning_rate": 1.9759250756636624e-05, "loss": 0.3353, "step": 1896 }, { "epoch": 0.1435693675795847, "grad_norm": 0.875, "learning_rate": 1.9758990905660005e-05, "loss": 0.342, "step": 1897 }, { "epoch": 0.14364504990303203, "grad_norm": 0.80859375, "learning_rate": 1.9758730916235356e-05, "loss": 0.2974, "step": 1898 }, { "epoch": 0.14372073222647935, "grad_norm": 0.94140625, "learning_rate": 1.975847078836636e-05, "loss": 0.3605, "step": 1899 }, { "epoch": 0.14379641454992667, "grad_norm": 0.8515625, "learning_rate": 1.9758210522056713e-05, "loss": 0.3207, "step": 1900 }, { "epoch": 0.14387209687337402, "grad_norm": 0.8515625, "learning_rate": 1.9757950117310102e-05, "loss": 0.309, "step": 1901 }, { "epoch": 0.14394777919682133, "grad_norm": 0.8828125, "learning_rate": 1.9757689574130223e-05, "loss": 0.3302, "step": 1902 }, { "epoch": 0.14402346152026868, "grad_norm": 0.84765625, "learning_rate": 1.9757428892520773e-05, "loss": 0.3374, "step": 1903 }, { "epoch": 0.144099143843716, "grad_norm": 0.93359375, "learning_rate": 1.9757168072485448e-05, "loss": 0.3885, "step": 1904 }, { "epoch": 0.14417482616716334, "grad_norm": 0.8359375, "learning_rate": 1.9756907114027954e-05, "loss": 0.3287, "step": 1905 }, { "epoch": 0.14425050849061066, "grad_norm": 0.84765625, "learning_rate": 1.975664601715199e-05, "loss": 0.3692, "step": 1906 }, { "epoch": 0.14432619081405798, "grad_norm": 0.94140625, "learning_rate": 1.9756384781861255e-05, "loss": 0.2966, "step": 1907 }, { "epoch": 0.14440187313750533, "grad_norm": 0.8203125, "learning_rate": 1.975612340815946e-05, "loss": 0.3041, "step": 1908 }, { "epoch": 0.14447755546095264, "grad_norm": 0.90625, "learning_rate": 1.9755861896050312e-05, "loss": 0.3628, "step": 1909 }, { "epoch": 0.1445532377844, "grad_norm": 0.90234375, "learning_rate": 1.9755600245537522e-05, "loss": 0.3623, "step": 1910 }, { "epoch": 0.1446289201078473, "grad_norm": 0.87890625, "learning_rate": 1.97553384566248e-05, "loss": 0.3169, "step": 1911 }, { "epoch": 0.14470460243129465, "grad_norm": 0.8671875, "learning_rate": 1.9755076529315867e-05, "loss": 0.3215, "step": 1912 }, { "epoch": 0.14478028475474197, "grad_norm": 2.921875, "learning_rate": 1.9754814463614428e-05, "loss": 0.4627, "step": 1913 }, { "epoch": 0.1448559670781893, "grad_norm": 0.9609375, "learning_rate": 1.9754552259524208e-05, "loss": 0.3599, "step": 1914 }, { "epoch": 0.14493164940163664, "grad_norm": 0.83984375, "learning_rate": 1.9754289917048926e-05, "loss": 0.3296, "step": 1915 }, { "epoch": 0.14500733172508395, "grad_norm": 0.8828125, "learning_rate": 1.97540274361923e-05, "loss": 0.3556, "step": 1916 }, { "epoch": 0.1450830140485313, "grad_norm": 0.91015625, "learning_rate": 1.9753764816958055e-05, "loss": 0.3416, "step": 1917 }, { "epoch": 0.14515869637197862, "grad_norm": 0.88671875, "learning_rate": 1.975350205934992e-05, "loss": 0.3552, "step": 1918 }, { "epoch": 0.14523437869542594, "grad_norm": 0.96875, "learning_rate": 1.975323916337162e-05, "loss": 0.3666, "step": 1919 }, { "epoch": 0.14531006101887328, "grad_norm": 0.91015625, "learning_rate": 1.9752976129026885e-05, "loss": 0.3799, "step": 1920 }, { "epoch": 0.1453857433423206, "grad_norm": 0.80078125, "learning_rate": 1.975271295631945e-05, "loss": 0.3077, "step": 1921 }, { "epoch": 0.14546142566576795, "grad_norm": 0.89453125, "learning_rate": 1.9752449645253045e-05, "loss": 0.3575, "step": 1922 }, { "epoch": 0.14553710798921526, "grad_norm": 0.85546875, "learning_rate": 1.9752186195831404e-05, "loss": 0.3023, "step": 1923 }, { "epoch": 0.1456127903126626, "grad_norm": 0.84765625, "learning_rate": 1.9751922608058268e-05, "loss": 0.3357, "step": 1924 }, { "epoch": 0.14568847263610993, "grad_norm": 2.5, "learning_rate": 1.9751658881937374e-05, "loss": 0.5731, "step": 1925 }, { "epoch": 0.14576415495955725, "grad_norm": 0.859375, "learning_rate": 1.9751395017472462e-05, "loss": 0.3094, "step": 1926 }, { "epoch": 0.1458398372830046, "grad_norm": 0.97265625, "learning_rate": 1.975113101466728e-05, "loss": 0.4003, "step": 1927 }, { "epoch": 0.1459155196064519, "grad_norm": 0.91015625, "learning_rate": 1.975086687352557e-05, "loss": 0.3411, "step": 1928 }, { "epoch": 0.14599120192989926, "grad_norm": 0.984375, "learning_rate": 1.9750602594051083e-05, "loss": 0.3851, "step": 1929 }, { "epoch": 0.14606688425334657, "grad_norm": 1.03125, "learning_rate": 1.9750338176247563e-05, "loss": 0.3925, "step": 1930 }, { "epoch": 0.14614256657679392, "grad_norm": 0.93359375, "learning_rate": 1.9750073620118765e-05, "loss": 0.3717, "step": 1931 }, { "epoch": 0.14621824890024124, "grad_norm": 0.98046875, "learning_rate": 1.974980892566844e-05, "loss": 0.4032, "step": 1932 }, { "epoch": 0.14629393122368856, "grad_norm": 0.93359375, "learning_rate": 1.974954409290035e-05, "loss": 0.371, "step": 1933 }, { "epoch": 0.1463696135471359, "grad_norm": 0.875, "learning_rate": 1.9749279121818235e-05, "loss": 0.3599, "step": 1934 }, { "epoch": 0.14644529587058322, "grad_norm": 0.93359375, "learning_rate": 1.9749014012425873e-05, "loss": 0.3352, "step": 1935 }, { "epoch": 0.14652097819403057, "grad_norm": 0.96484375, "learning_rate": 1.9748748764727017e-05, "loss": 0.3746, "step": 1936 }, { "epoch": 0.14659666051747788, "grad_norm": 1.140625, "learning_rate": 1.9748483378725428e-05, "loss": 0.381, "step": 1937 }, { "epoch": 0.14667234284092523, "grad_norm": 0.80078125, "learning_rate": 1.9748217854424877e-05, "loss": 0.2896, "step": 1938 }, { "epoch": 0.14674802516437255, "grad_norm": 0.953125, "learning_rate": 1.9747952191829124e-05, "loss": 0.3848, "step": 1939 }, { "epoch": 0.14682370748781987, "grad_norm": 2.671875, "learning_rate": 1.9747686390941944e-05, "loss": 0.5197, "step": 1940 }, { "epoch": 0.1468993898112672, "grad_norm": 0.94140625, "learning_rate": 1.97474204517671e-05, "loss": 0.3353, "step": 1941 }, { "epoch": 0.14697507213471453, "grad_norm": 0.890625, "learning_rate": 1.9747154374308373e-05, "loss": 0.3309, "step": 1942 }, { "epoch": 0.14705075445816188, "grad_norm": 0.85546875, "learning_rate": 1.9746888158569534e-05, "loss": 0.326, "step": 1943 }, { "epoch": 0.1471264367816092, "grad_norm": 0.82421875, "learning_rate": 1.974662180455436e-05, "loss": 0.2817, "step": 1944 }, { "epoch": 0.1472021191050565, "grad_norm": 3.109375, "learning_rate": 1.974635531226663e-05, "loss": 0.5003, "step": 1945 }, { "epoch": 0.14727780142850386, "grad_norm": 0.91796875, "learning_rate": 1.9746088681710125e-05, "loss": 0.3044, "step": 1946 }, { "epoch": 0.14735348375195118, "grad_norm": 0.87890625, "learning_rate": 1.9745821912888625e-05, "loss": 0.3407, "step": 1947 }, { "epoch": 0.14742916607539852, "grad_norm": 0.86328125, "learning_rate": 1.974555500580592e-05, "loss": 0.3048, "step": 1948 }, { "epoch": 0.14750484839884584, "grad_norm": 2.546875, "learning_rate": 1.9745287960465794e-05, "loss": 0.4755, "step": 1949 }, { "epoch": 0.14758053072229319, "grad_norm": 1.0546875, "learning_rate": 1.974502077687203e-05, "loss": 0.3642, "step": 1950 }, { "epoch": 0.1476562130457405, "grad_norm": 0.90625, "learning_rate": 1.9744753455028426e-05, "loss": 0.3752, "step": 1951 }, { "epoch": 0.14773189536918782, "grad_norm": 0.8515625, "learning_rate": 1.974448599493877e-05, "loss": 0.3194, "step": 1952 }, { "epoch": 0.14780757769263517, "grad_norm": 1.796875, "learning_rate": 1.9744218396606863e-05, "loss": 0.5015, "step": 1953 }, { "epoch": 0.14788326001608249, "grad_norm": 1.5546875, "learning_rate": 1.974395066003649e-05, "loss": 0.4683, "step": 1954 }, { "epoch": 0.14795894233952983, "grad_norm": 0.84375, "learning_rate": 1.974368278523146e-05, "loss": 0.3117, "step": 1955 }, { "epoch": 0.14803462466297715, "grad_norm": 0.953125, "learning_rate": 1.9743414772195567e-05, "loss": 0.3549, "step": 1956 }, { "epoch": 0.1481103069864245, "grad_norm": 0.8515625, "learning_rate": 1.974314662093262e-05, "loss": 0.3116, "step": 1957 }, { "epoch": 0.1481859893098718, "grad_norm": 0.8515625, "learning_rate": 1.9742878331446415e-05, "loss": 0.3248, "step": 1958 }, { "epoch": 0.14826167163331913, "grad_norm": 0.921875, "learning_rate": 1.9742609903740757e-05, "loss": 0.3002, "step": 1959 }, { "epoch": 0.14833735395676648, "grad_norm": 1.953125, "learning_rate": 1.974234133781946e-05, "loss": 0.3788, "step": 1960 }, { "epoch": 0.1484130362802138, "grad_norm": 2.375, "learning_rate": 1.974207263368634e-05, "loss": 0.4712, "step": 1961 }, { "epoch": 0.14848871860366114, "grad_norm": 1.015625, "learning_rate": 1.9741803791345196e-05, "loss": 0.3651, "step": 1962 }, { "epoch": 0.14856440092710846, "grad_norm": 0.8671875, "learning_rate": 1.9741534810799846e-05, "loss": 0.3313, "step": 1963 }, { "epoch": 0.1486400832505558, "grad_norm": 0.84765625, "learning_rate": 1.974126569205411e-05, "loss": 0.3274, "step": 1964 }, { "epoch": 0.14871576557400312, "grad_norm": 0.94921875, "learning_rate": 1.9740996435111804e-05, "loss": 0.3828, "step": 1965 }, { "epoch": 0.14879144789745044, "grad_norm": 1.09375, "learning_rate": 1.974072703997675e-05, "loss": 0.3713, "step": 1966 }, { "epoch": 0.1488671302208978, "grad_norm": 0.86328125, "learning_rate": 1.9740457506652765e-05, "loss": 0.2969, "step": 1967 }, { "epoch": 0.1489428125443451, "grad_norm": 1.0625, "learning_rate": 1.9740187835143673e-05, "loss": 0.3829, "step": 1968 }, { "epoch": 0.14901849486779245, "grad_norm": 0.86328125, "learning_rate": 1.9739918025453303e-05, "loss": 0.2963, "step": 1969 }, { "epoch": 0.14909417719123977, "grad_norm": 0.90234375, "learning_rate": 1.973964807758548e-05, "loss": 0.3568, "step": 1970 }, { "epoch": 0.14916985951468711, "grad_norm": 0.890625, "learning_rate": 1.973937799154404e-05, "loss": 0.3294, "step": 1971 }, { "epoch": 0.14924554183813443, "grad_norm": 8.75, "learning_rate": 1.9739107767332806e-05, "loss": 0.5585, "step": 1972 }, { "epoch": 0.14932122416158175, "grad_norm": 0.84765625, "learning_rate": 1.9738837404955616e-05, "loss": 0.3064, "step": 1973 }, { "epoch": 0.1493969064850291, "grad_norm": 0.890625, "learning_rate": 1.9738566904416307e-05, "loss": 0.3537, "step": 1974 }, { "epoch": 0.14947258880847641, "grad_norm": 5.5, "learning_rate": 1.9738296265718714e-05, "loss": 0.6174, "step": 1975 }, { "epoch": 0.14954827113192376, "grad_norm": 0.875, "learning_rate": 1.9738025488866673e-05, "loss": 0.302, "step": 1976 }, { "epoch": 0.14962395345537108, "grad_norm": 0.95703125, "learning_rate": 1.9737754573864032e-05, "loss": 0.2877, "step": 1977 }, { "epoch": 0.1496996357788184, "grad_norm": 3.421875, "learning_rate": 1.9737483520714635e-05, "loss": 0.4073, "step": 1978 }, { "epoch": 0.14977531810226574, "grad_norm": 1.9609375, "learning_rate": 1.973721232942232e-05, "loss": 0.4459, "step": 1979 }, { "epoch": 0.14985100042571306, "grad_norm": 0.8671875, "learning_rate": 1.973694099999094e-05, "loss": 0.3462, "step": 1980 }, { "epoch": 0.1499266827491604, "grad_norm": 0.8671875, "learning_rate": 1.9736669532424343e-05, "loss": 0.3418, "step": 1981 }, { "epoch": 0.15000236507260772, "grad_norm": 0.84375, "learning_rate": 1.973639792672638e-05, "loss": 0.3206, "step": 1982 }, { "epoch": 0.15007804739605507, "grad_norm": 0.85546875, "learning_rate": 1.9736126182900902e-05, "loss": 0.3513, "step": 1983 }, { "epoch": 0.1501537297195024, "grad_norm": 0.80078125, "learning_rate": 1.9735854300951768e-05, "loss": 0.3123, "step": 1984 }, { "epoch": 0.1502294120429497, "grad_norm": 0.8671875, "learning_rate": 1.9735582280882832e-05, "loss": 0.3538, "step": 1985 }, { "epoch": 0.15030509436639705, "grad_norm": 0.828125, "learning_rate": 1.973531012269796e-05, "loss": 0.3208, "step": 1986 }, { "epoch": 0.15038077668984437, "grad_norm": 0.9296875, "learning_rate": 1.9735037826401e-05, "loss": 0.3677, "step": 1987 }, { "epoch": 0.15045645901329172, "grad_norm": 0.8984375, "learning_rate": 1.9734765391995827e-05, "loss": 0.3292, "step": 1988 }, { "epoch": 0.15053214133673903, "grad_norm": 0.82421875, "learning_rate": 1.97344928194863e-05, "loss": 0.3364, "step": 1989 }, { "epoch": 0.15060782366018638, "grad_norm": 0.83203125, "learning_rate": 1.973422010887629e-05, "loss": 0.3368, "step": 1990 }, { "epoch": 0.1506835059836337, "grad_norm": 0.8671875, "learning_rate": 1.9733947260169662e-05, "loss": 0.3185, "step": 1991 }, { "epoch": 0.15075918830708102, "grad_norm": 0.85546875, "learning_rate": 1.9733674273370284e-05, "loss": 0.3338, "step": 1992 }, { "epoch": 0.15083487063052836, "grad_norm": 0.92578125, "learning_rate": 1.973340114848204e-05, "loss": 0.3256, "step": 1993 }, { "epoch": 0.15091055295397568, "grad_norm": 0.88671875, "learning_rate": 1.973312788550879e-05, "loss": 0.3547, "step": 1994 }, { "epoch": 0.15098623527742303, "grad_norm": 0.84765625, "learning_rate": 1.9732854484454426e-05, "loss": 0.3423, "step": 1995 }, { "epoch": 0.15106191760087034, "grad_norm": 0.859375, "learning_rate": 1.9732580945322813e-05, "loss": 0.3644, "step": 1996 }, { "epoch": 0.1511375999243177, "grad_norm": 0.82421875, "learning_rate": 1.9732307268117837e-05, "loss": 0.2975, "step": 1997 }, { "epoch": 0.151213282247765, "grad_norm": 1.0, "learning_rate": 1.9732033452843384e-05, "loss": 0.4169, "step": 1998 }, { "epoch": 0.15128896457121233, "grad_norm": 0.859375, "learning_rate": 1.9731759499503336e-05, "loss": 0.3549, "step": 1999 }, { "epoch": 0.15136464689465967, "grad_norm": 0.8671875, "learning_rate": 1.973148540810158e-05, "loss": 0.3217, "step": 2000 }, { "epoch": 0.151440329218107, "grad_norm": 0.86328125, "learning_rate": 1.9731211178642e-05, "loss": 0.3206, "step": 2001 }, { "epoch": 0.15151601154155434, "grad_norm": 0.8359375, "learning_rate": 1.9730936811128494e-05, "loss": 0.3123, "step": 2002 }, { "epoch": 0.15159169386500165, "grad_norm": 0.765625, "learning_rate": 1.973066230556495e-05, "loss": 0.2793, "step": 2003 }, { "epoch": 0.151667376188449, "grad_norm": 0.83203125, "learning_rate": 1.973038766195526e-05, "loss": 0.3297, "step": 2004 }, { "epoch": 0.15174305851189632, "grad_norm": 0.98828125, "learning_rate": 1.9730112880303326e-05, "loss": 0.3736, "step": 2005 }, { "epoch": 0.15181874083534364, "grad_norm": 0.890625, "learning_rate": 1.9729837960613042e-05, "loss": 0.3195, "step": 2006 }, { "epoch": 0.15189442315879098, "grad_norm": 0.890625, "learning_rate": 1.972956290288831e-05, "loss": 0.3615, "step": 2007 }, { "epoch": 0.1519701054822383, "grad_norm": 0.81640625, "learning_rate": 1.972928770713303e-05, "loss": 0.3118, "step": 2008 }, { "epoch": 0.15204578780568564, "grad_norm": 0.9140625, "learning_rate": 1.972901237335111e-05, "loss": 0.3853, "step": 2009 }, { "epoch": 0.15212147012913296, "grad_norm": 0.9296875, "learning_rate": 1.9728736901546454e-05, "loss": 0.4108, "step": 2010 }, { "epoch": 0.15219715245258028, "grad_norm": 1.0078125, "learning_rate": 1.972846129172297e-05, "loss": 0.4034, "step": 2011 }, { "epoch": 0.15227283477602763, "grad_norm": 0.90625, "learning_rate": 1.972818554388457e-05, "loss": 0.3221, "step": 2012 }, { "epoch": 0.15234851709947494, "grad_norm": 0.8359375, "learning_rate": 1.972790965803516e-05, "loss": 0.3273, "step": 2013 }, { "epoch": 0.1524241994229223, "grad_norm": 0.875, "learning_rate": 1.9727633634178658e-05, "loss": 0.3287, "step": 2014 }, { "epoch": 0.1524998817463696, "grad_norm": 0.97265625, "learning_rate": 1.972735747231898e-05, "loss": 0.369, "step": 2015 }, { "epoch": 0.15257556406981695, "grad_norm": 0.921875, "learning_rate": 1.9727081172460046e-05, "loss": 0.3669, "step": 2016 }, { "epoch": 0.15265124639326427, "grad_norm": 0.9453125, "learning_rate": 1.9726804734605773e-05, "loss": 0.3659, "step": 2017 }, { "epoch": 0.1527269287167116, "grad_norm": 0.93359375, "learning_rate": 1.9726528158760078e-05, "loss": 0.4054, "step": 2018 }, { "epoch": 0.15280261104015894, "grad_norm": 0.875, "learning_rate": 1.9726251444926895e-05, "loss": 0.3763, "step": 2019 }, { "epoch": 0.15287829336360625, "grad_norm": 0.84765625, "learning_rate": 1.972597459311014e-05, "loss": 0.3106, "step": 2020 }, { "epoch": 0.1529539756870536, "grad_norm": 0.828125, "learning_rate": 1.972569760331375e-05, "loss": 0.2806, "step": 2021 }, { "epoch": 0.15302965801050092, "grad_norm": 0.921875, "learning_rate": 1.9725420475541648e-05, "loss": 0.3461, "step": 2022 }, { "epoch": 0.15310534033394826, "grad_norm": 0.91015625, "learning_rate": 1.9725143209797765e-05, "loss": 0.3266, "step": 2023 }, { "epoch": 0.15318102265739558, "grad_norm": 0.890625, "learning_rate": 1.972486580608604e-05, "loss": 0.3427, "step": 2024 }, { "epoch": 0.1532567049808429, "grad_norm": 0.82421875, "learning_rate": 1.9724588264410402e-05, "loss": 0.2894, "step": 2025 }, { "epoch": 0.15333238730429025, "grad_norm": 0.84765625, "learning_rate": 1.9724310584774797e-05, "loss": 0.3103, "step": 2026 }, { "epoch": 0.15340806962773756, "grad_norm": 3.765625, "learning_rate": 1.9724032767183152e-05, "loss": 0.5146, "step": 2027 }, { "epoch": 0.1534837519511849, "grad_norm": 1.84375, "learning_rate": 1.9723754811639418e-05, "loss": 0.5251, "step": 2028 }, { "epoch": 0.15355943427463223, "grad_norm": 0.8984375, "learning_rate": 1.9723476718147536e-05, "loss": 0.3209, "step": 2029 }, { "epoch": 0.15363511659807957, "grad_norm": 0.96875, "learning_rate": 1.972319848671145e-05, "loss": 0.3652, "step": 2030 }, { "epoch": 0.1537107989215269, "grad_norm": 0.9375, "learning_rate": 1.9722920117335105e-05, "loss": 0.3824, "step": 2031 }, { "epoch": 0.1537864812449742, "grad_norm": 0.921875, "learning_rate": 1.9722641610022458e-05, "loss": 0.3531, "step": 2032 }, { "epoch": 0.15386216356842156, "grad_norm": 0.859375, "learning_rate": 1.9722362964777454e-05, "loss": 0.3131, "step": 2033 }, { "epoch": 0.15393784589186887, "grad_norm": 0.9140625, "learning_rate": 1.9722084181604045e-05, "loss": 0.3497, "step": 2034 }, { "epoch": 0.15401352821531622, "grad_norm": 0.9140625, "learning_rate": 1.972180526050619e-05, "loss": 0.3318, "step": 2035 }, { "epoch": 0.15408921053876354, "grad_norm": 0.94140625, "learning_rate": 1.9721526201487842e-05, "loss": 0.3525, "step": 2036 }, { "epoch": 0.15416489286221086, "grad_norm": 0.828125, "learning_rate": 1.9721247004552967e-05, "loss": 0.3262, "step": 2037 }, { "epoch": 0.1542405751856582, "grad_norm": 0.92578125, "learning_rate": 1.972096766970552e-05, "loss": 0.3561, "step": 2038 }, { "epoch": 0.15431625750910552, "grad_norm": 0.8203125, "learning_rate": 1.972068819694946e-05, "loss": 0.3091, "step": 2039 }, { "epoch": 0.15439193983255287, "grad_norm": 0.87890625, "learning_rate": 1.972040858628876e-05, "loss": 0.3231, "step": 2040 }, { "epoch": 0.15446762215600018, "grad_norm": 0.93359375, "learning_rate": 1.9720128837727383e-05, "loss": 0.3595, "step": 2041 }, { "epoch": 0.15454330447944753, "grad_norm": 0.90625, "learning_rate": 1.9719848951269298e-05, "loss": 0.3436, "step": 2042 }, { "epoch": 0.15461898680289485, "grad_norm": 0.921875, "learning_rate": 1.9719568926918477e-05, "loss": 0.3661, "step": 2043 }, { "epoch": 0.15469466912634217, "grad_norm": 0.94140625, "learning_rate": 1.9719288764678888e-05, "loss": 0.3507, "step": 2044 }, { "epoch": 0.1547703514497895, "grad_norm": 0.890625, "learning_rate": 1.9719008464554512e-05, "loss": 0.3199, "step": 2045 }, { "epoch": 0.15484603377323683, "grad_norm": 0.9765625, "learning_rate": 1.9718728026549324e-05, "loss": 0.3462, "step": 2046 }, { "epoch": 0.15492171609668418, "grad_norm": 0.90625, "learning_rate": 1.9718447450667298e-05, "loss": 0.3355, "step": 2047 }, { "epoch": 0.1549973984201315, "grad_norm": 0.90234375, "learning_rate": 1.9718166736912417e-05, "loss": 0.3736, "step": 2048 }, { "epoch": 0.15507308074357884, "grad_norm": 0.84375, "learning_rate": 1.9717885885288664e-05, "loss": 0.3032, "step": 2049 }, { "epoch": 0.15514876306702616, "grad_norm": 0.83984375, "learning_rate": 1.9717604895800026e-05, "loss": 0.3164, "step": 2050 }, { "epoch": 0.15522444539047348, "grad_norm": 0.875, "learning_rate": 1.9717323768450483e-05, "loss": 0.3136, "step": 2051 }, { "epoch": 0.15530012771392082, "grad_norm": 0.8125, "learning_rate": 1.9717042503244032e-05, "loss": 0.3072, "step": 2052 }, { "epoch": 0.15537581003736814, "grad_norm": 0.87890625, "learning_rate": 1.9716761100184652e-05, "loss": 0.3419, "step": 2053 }, { "epoch": 0.15545149236081549, "grad_norm": 0.90625, "learning_rate": 1.9716479559276345e-05, "loss": 0.352, "step": 2054 }, { "epoch": 0.1555271746842628, "grad_norm": 0.8359375, "learning_rate": 1.97161978805231e-05, "loss": 0.3414, "step": 2055 }, { "epoch": 0.15560285700771015, "grad_norm": 0.81640625, "learning_rate": 1.9715916063928916e-05, "loss": 0.2886, "step": 2056 }, { "epoch": 0.15567853933115747, "grad_norm": 1.0390625, "learning_rate": 1.971563410949779e-05, "loss": 0.3705, "step": 2057 }, { "epoch": 0.15575422165460479, "grad_norm": 0.93359375, "learning_rate": 1.971535201723372e-05, "loss": 0.3274, "step": 2058 }, { "epoch": 0.15582990397805213, "grad_norm": 0.89453125, "learning_rate": 1.9715069787140707e-05, "loss": 0.3316, "step": 2059 }, { "epoch": 0.15590558630149945, "grad_norm": 0.87890625, "learning_rate": 1.971478741922276e-05, "loss": 0.335, "step": 2060 }, { "epoch": 0.1559812686249468, "grad_norm": 0.8671875, "learning_rate": 1.971450491348388e-05, "loss": 0.3582, "step": 2061 }, { "epoch": 0.1560569509483941, "grad_norm": 0.8359375, "learning_rate": 1.9714222269928084e-05, "loss": 0.3181, "step": 2062 }, { "epoch": 0.15613263327184146, "grad_norm": 1.0, "learning_rate": 1.971393948855937e-05, "loss": 0.3776, "step": 2063 }, { "epoch": 0.15620831559528878, "grad_norm": 0.91796875, "learning_rate": 1.9713656569381755e-05, "loss": 0.3801, "step": 2064 }, { "epoch": 0.1562839979187361, "grad_norm": 0.83984375, "learning_rate": 1.9713373512399252e-05, "loss": 0.2995, "step": 2065 }, { "epoch": 0.15635968024218344, "grad_norm": 0.87109375, "learning_rate": 1.9713090317615877e-05, "loss": 0.3536, "step": 2066 }, { "epoch": 0.15643536256563076, "grad_norm": 0.87890625, "learning_rate": 1.971280698503565e-05, "loss": 0.3529, "step": 2067 }, { "epoch": 0.1565110448890781, "grad_norm": 4.1875, "learning_rate": 1.9712523514662583e-05, "loss": 0.5044, "step": 2068 }, { "epoch": 0.15658672721252542, "grad_norm": 0.9609375, "learning_rate": 1.971223990650071e-05, "loss": 0.3804, "step": 2069 }, { "epoch": 0.15666240953597274, "grad_norm": 0.87890625, "learning_rate": 1.971195616055404e-05, "loss": 0.3396, "step": 2070 }, { "epoch": 0.1567380918594201, "grad_norm": 0.89453125, "learning_rate": 1.971167227682661e-05, "loss": 0.3258, "step": 2071 }, { "epoch": 0.1568137741828674, "grad_norm": 0.85546875, "learning_rate": 1.9711388255322442e-05, "loss": 0.3248, "step": 2072 }, { "epoch": 0.15688945650631475, "grad_norm": 2.265625, "learning_rate": 1.9711104096045567e-05, "loss": 0.4482, "step": 2073 }, { "epoch": 0.15696513882976207, "grad_norm": 0.90625, "learning_rate": 1.9710819799000012e-05, "loss": 0.3394, "step": 2074 }, { "epoch": 0.15704082115320941, "grad_norm": 0.859375, "learning_rate": 1.9710535364189818e-05, "loss": 0.3492, "step": 2075 }, { "epoch": 0.15711650347665673, "grad_norm": 0.9453125, "learning_rate": 1.971025079161901e-05, "loss": 0.3742, "step": 2076 }, { "epoch": 0.15719218580010405, "grad_norm": 0.859375, "learning_rate": 1.970996608129164e-05, "loss": 0.3264, "step": 2077 }, { "epoch": 0.1572678681235514, "grad_norm": 0.9296875, "learning_rate": 1.970968123321173e-05, "loss": 0.3686, "step": 2078 }, { "epoch": 0.15734355044699871, "grad_norm": 0.88671875, "learning_rate": 1.9709396247383335e-05, "loss": 0.3485, "step": 2079 }, { "epoch": 0.15741923277044606, "grad_norm": 0.85546875, "learning_rate": 1.970911112381049e-05, "loss": 0.3449, "step": 2080 }, { "epoch": 0.15749491509389338, "grad_norm": 0.8203125, "learning_rate": 1.9708825862497243e-05, "loss": 0.3096, "step": 2081 }, { "epoch": 0.15757059741734072, "grad_norm": 0.94140625, "learning_rate": 1.970854046344764e-05, "loss": 0.385, "step": 2082 }, { "epoch": 0.15764627974078804, "grad_norm": 0.8515625, "learning_rate": 1.970825492666573e-05, "loss": 0.3313, "step": 2083 }, { "epoch": 0.15772196206423536, "grad_norm": 0.87109375, "learning_rate": 1.9707969252155563e-05, "loss": 0.3448, "step": 2084 }, { "epoch": 0.1577976443876827, "grad_norm": 0.82421875, "learning_rate": 1.9707683439921196e-05, "loss": 0.2934, "step": 2085 }, { "epoch": 0.15787332671113002, "grad_norm": 0.92578125, "learning_rate": 1.9707397489966675e-05, "loss": 0.3799, "step": 2086 }, { "epoch": 0.15794900903457737, "grad_norm": 0.8828125, "learning_rate": 1.9707111402296065e-05, "loss": 0.3742, "step": 2087 }, { "epoch": 0.1580246913580247, "grad_norm": 0.91796875, "learning_rate": 1.9706825176913423e-05, "loss": 0.3609, "step": 2088 }, { "epoch": 0.15810037368147203, "grad_norm": 0.91796875, "learning_rate": 1.9706538813822807e-05, "loss": 0.3756, "step": 2089 }, { "epoch": 0.15817605600491935, "grad_norm": 0.84765625, "learning_rate": 1.9706252313028283e-05, "loss": 0.3015, "step": 2090 }, { "epoch": 0.15825173832836667, "grad_norm": 0.88671875, "learning_rate": 1.970596567453391e-05, "loss": 0.3417, "step": 2091 }, { "epoch": 0.15832742065181402, "grad_norm": 0.84765625, "learning_rate": 1.970567889834376e-05, "loss": 0.3408, "step": 2092 }, { "epoch": 0.15840310297526133, "grad_norm": 0.91796875, "learning_rate": 1.97053919844619e-05, "loss": 0.2891, "step": 2093 }, { "epoch": 0.15847878529870868, "grad_norm": 0.9296875, "learning_rate": 1.9705104932892398e-05, "loss": 0.3897, "step": 2094 }, { "epoch": 0.158554467622156, "grad_norm": 2.515625, "learning_rate": 1.970481774363933e-05, "loss": 0.4627, "step": 2095 }, { "epoch": 0.15863014994560334, "grad_norm": 0.9765625, "learning_rate": 1.9704530416706765e-05, "loss": 0.4061, "step": 2096 }, { "epoch": 0.15870583226905066, "grad_norm": 0.84765625, "learning_rate": 1.9704242952098784e-05, "loss": 0.314, "step": 2097 }, { "epoch": 0.15878151459249798, "grad_norm": 0.796875, "learning_rate": 1.9703955349819464e-05, "loss": 0.2951, "step": 2098 }, { "epoch": 0.15885719691594533, "grad_norm": 0.8828125, "learning_rate": 1.9703667609872886e-05, "loss": 0.3275, "step": 2099 }, { "epoch": 0.15893287923939264, "grad_norm": 0.86328125, "learning_rate": 1.970337973226313e-05, "loss": 0.3656, "step": 2100 }, { "epoch": 0.15900856156284, "grad_norm": 0.92578125, "learning_rate": 1.970309171699428e-05, "loss": 0.4019, "step": 2101 }, { "epoch": 0.1590842438862873, "grad_norm": 0.94921875, "learning_rate": 1.9702803564070426e-05, "loss": 0.3991, "step": 2102 }, { "epoch": 0.15915992620973463, "grad_norm": 0.7890625, "learning_rate": 1.9702515273495647e-05, "loss": 0.2913, "step": 2103 }, { "epoch": 0.15923560853318197, "grad_norm": 0.9296875, "learning_rate": 1.9702226845274043e-05, "loss": 0.3512, "step": 2104 }, { "epoch": 0.1593112908566293, "grad_norm": 0.98828125, "learning_rate": 1.9701938279409704e-05, "loss": 0.4298, "step": 2105 }, { "epoch": 0.15938697318007664, "grad_norm": 1.3515625, "learning_rate": 1.9701649575906715e-05, "loss": 0.3121, "step": 2106 }, { "epoch": 0.15946265550352395, "grad_norm": 0.9140625, "learning_rate": 1.970136073476918e-05, "loss": 0.3635, "step": 2107 }, { "epoch": 0.1595383378269713, "grad_norm": 0.95703125, "learning_rate": 1.97010717560012e-05, "loss": 0.3814, "step": 2108 }, { "epoch": 0.15961402015041862, "grad_norm": 0.9296875, "learning_rate": 1.9700782639606864e-05, "loss": 0.3848, "step": 2109 }, { "epoch": 0.15968970247386594, "grad_norm": 2.109375, "learning_rate": 1.9700493385590282e-05, "loss": 0.4262, "step": 2110 }, { "epoch": 0.15976538479731328, "grad_norm": 0.8984375, "learning_rate": 1.9700203993955555e-05, "loss": 0.3629, "step": 2111 }, { "epoch": 0.1598410671207606, "grad_norm": 0.89453125, "learning_rate": 1.9699914464706784e-05, "loss": 0.3528, "step": 2112 }, { "epoch": 0.15991674944420795, "grad_norm": 0.8984375, "learning_rate": 1.969962479784808e-05, "loss": 0.3398, "step": 2113 }, { "epoch": 0.15999243176765526, "grad_norm": 1.7109375, "learning_rate": 1.969933499338356e-05, "loss": 0.4649, "step": 2114 }, { "epoch": 0.1600681140911026, "grad_norm": 0.8671875, "learning_rate": 1.9699045051317327e-05, "loss": 0.3114, "step": 2115 }, { "epoch": 0.16014379641454993, "grad_norm": 0.88671875, "learning_rate": 1.9698754971653492e-05, "loss": 0.2946, "step": 2116 }, { "epoch": 0.16021947873799725, "grad_norm": 0.92578125, "learning_rate": 1.9698464754396175e-05, "loss": 0.3842, "step": 2117 }, { "epoch": 0.1602951610614446, "grad_norm": 0.86328125, "learning_rate": 1.9698174399549493e-05, "loss": 0.3163, "step": 2118 }, { "epoch": 0.1603708433848919, "grad_norm": 0.8671875, "learning_rate": 1.9697883907117568e-05, "loss": 0.3354, "step": 2119 }, { "epoch": 0.16044652570833925, "grad_norm": 0.8984375, "learning_rate": 1.969759327710451e-05, "loss": 0.3655, "step": 2120 }, { "epoch": 0.16052220803178657, "grad_norm": 0.9140625, "learning_rate": 1.969730250951446e-05, "loss": 0.3418, "step": 2121 }, { "epoch": 0.16059789035523392, "grad_norm": 0.8984375, "learning_rate": 1.9697011604351524e-05, "loss": 0.3543, "step": 2122 }, { "epoch": 0.16067357267868124, "grad_norm": 0.92578125, "learning_rate": 1.969672056161984e-05, "loss": 0.3754, "step": 2123 }, { "epoch": 0.16074925500212855, "grad_norm": 0.8984375, "learning_rate": 1.9696429381323536e-05, "loss": 0.3747, "step": 2124 }, { "epoch": 0.1608249373255759, "grad_norm": 0.96875, "learning_rate": 1.9696138063466742e-05, "loss": 0.3859, "step": 2125 }, { "epoch": 0.16090061964902322, "grad_norm": 0.92578125, "learning_rate": 1.9695846608053587e-05, "loss": 0.4071, "step": 2126 }, { "epoch": 0.16097630197247056, "grad_norm": 0.91015625, "learning_rate": 1.9695555015088213e-05, "loss": 0.3638, "step": 2127 }, { "epoch": 0.16105198429591788, "grad_norm": 0.83984375, "learning_rate": 1.9695263284574752e-05, "loss": 0.3198, "step": 2128 }, { "epoch": 0.1611276666193652, "grad_norm": 0.8359375, "learning_rate": 1.9694971416517343e-05, "loss": 0.3266, "step": 2129 }, { "epoch": 0.16120334894281255, "grad_norm": 0.953125, "learning_rate": 1.9694679410920127e-05, "loss": 0.396, "step": 2130 }, { "epoch": 0.16127903126625986, "grad_norm": 0.96875, "learning_rate": 1.969438726778725e-05, "loss": 0.3907, "step": 2131 }, { "epoch": 0.1613547135897072, "grad_norm": 0.8046875, "learning_rate": 1.969409498712285e-05, "loss": 0.2916, "step": 2132 }, { "epoch": 0.16143039591315453, "grad_norm": 0.8671875, "learning_rate": 1.969380256893108e-05, "loss": 0.3403, "step": 2133 }, { "epoch": 0.16150607823660187, "grad_norm": 0.76171875, "learning_rate": 1.969351001321608e-05, "loss": 0.2686, "step": 2134 }, { "epoch": 0.1615817605600492, "grad_norm": 0.8671875, "learning_rate": 1.9693217319982013e-05, "loss": 0.3249, "step": 2135 }, { "epoch": 0.1616574428834965, "grad_norm": 1.0078125, "learning_rate": 1.9692924489233015e-05, "loss": 0.401, "step": 2136 }, { "epoch": 0.16173312520694386, "grad_norm": 0.8671875, "learning_rate": 1.969263152097326e-05, "loss": 0.3251, "step": 2137 }, { "epoch": 0.16180880753039117, "grad_norm": 0.88671875, "learning_rate": 1.9692338415206886e-05, "loss": 0.3297, "step": 2138 }, { "epoch": 0.16188448985383852, "grad_norm": 0.83203125, "learning_rate": 1.9692045171938063e-05, "loss": 0.3234, "step": 2139 }, { "epoch": 0.16196017217728584, "grad_norm": 0.9296875, "learning_rate": 1.9691751791170944e-05, "loss": 0.3907, "step": 2140 }, { "epoch": 0.16203585450073318, "grad_norm": 0.83203125, "learning_rate": 1.9691458272909697e-05, "loss": 0.3121, "step": 2141 }, { "epoch": 0.1621115368241805, "grad_norm": 0.84375, "learning_rate": 1.9691164617158482e-05, "loss": 0.2817, "step": 2142 }, { "epoch": 0.16218721914762782, "grad_norm": 0.8046875, "learning_rate": 1.9690870823921466e-05, "loss": 0.2948, "step": 2143 }, { "epoch": 0.16226290147107517, "grad_norm": 0.84765625, "learning_rate": 1.9690576893202818e-05, "loss": 0.3499, "step": 2144 }, { "epoch": 0.16233858379452248, "grad_norm": 0.921875, "learning_rate": 1.9690282825006707e-05, "loss": 0.3909, "step": 2145 }, { "epoch": 0.16241426611796983, "grad_norm": 0.828125, "learning_rate": 1.9689988619337304e-05, "loss": 0.3231, "step": 2146 }, { "epoch": 0.16248994844141715, "grad_norm": 0.79296875, "learning_rate": 1.9689694276198785e-05, "loss": 0.283, "step": 2147 }, { "epoch": 0.1625656307648645, "grad_norm": 2.328125, "learning_rate": 1.9689399795595325e-05, "loss": 0.4399, "step": 2148 }, { "epoch": 0.1626413130883118, "grad_norm": 0.84375, "learning_rate": 1.96891051775311e-05, "loss": 0.3319, "step": 2149 }, { "epoch": 0.16271699541175913, "grad_norm": 0.890625, "learning_rate": 1.968881042201029e-05, "loss": 0.3063, "step": 2150 }, { "epoch": 0.16279267773520648, "grad_norm": 0.8828125, "learning_rate": 1.968851552903708e-05, "loss": 0.3579, "step": 2151 }, { "epoch": 0.1628683600586538, "grad_norm": 0.8515625, "learning_rate": 1.9688220498615652e-05, "loss": 0.3369, "step": 2152 }, { "epoch": 0.16294404238210114, "grad_norm": 0.83203125, "learning_rate": 1.968792533075019e-05, "loss": 0.3284, "step": 2153 }, { "epoch": 0.16301972470554846, "grad_norm": 0.80859375, "learning_rate": 1.968763002544488e-05, "loss": 0.328, "step": 2154 }, { "epoch": 0.1630954070289958, "grad_norm": 0.87890625, "learning_rate": 1.9687334582703916e-05, "loss": 0.3527, "step": 2155 }, { "epoch": 0.16317108935244312, "grad_norm": 0.87109375, "learning_rate": 1.9687039002531486e-05, "loss": 0.3255, "step": 2156 }, { "epoch": 0.16324677167589044, "grad_norm": 0.83984375, "learning_rate": 1.9686743284931783e-05, "loss": 0.3231, "step": 2157 }, { "epoch": 0.16332245399933779, "grad_norm": 0.94921875, "learning_rate": 1.9686447429909005e-05, "loss": 0.3713, "step": 2158 }, { "epoch": 0.1633981363227851, "grad_norm": 0.90234375, "learning_rate": 1.9686151437467347e-05, "loss": 0.3514, "step": 2159 }, { "epoch": 0.16347381864623245, "grad_norm": 0.8984375, "learning_rate": 1.9685855307611007e-05, "loss": 0.3551, "step": 2160 }, { "epoch": 0.16354950096967977, "grad_norm": 0.83203125, "learning_rate": 1.968555904034419e-05, "loss": 0.3137, "step": 2161 }, { "epoch": 0.16362518329312709, "grad_norm": 0.86328125, "learning_rate": 1.9685262635671097e-05, "loss": 0.3508, "step": 2162 }, { "epoch": 0.16370086561657443, "grad_norm": 0.8125, "learning_rate": 1.9684966093595934e-05, "loss": 0.3308, "step": 2163 }, { "epoch": 0.16377654794002175, "grad_norm": 0.8984375, "learning_rate": 1.9684669414122905e-05, "loss": 0.3485, "step": 2164 }, { "epoch": 0.1638522302634691, "grad_norm": 0.86328125, "learning_rate": 1.9684372597256224e-05, "loss": 0.3377, "step": 2165 }, { "epoch": 0.1639279125869164, "grad_norm": 2.109375, "learning_rate": 1.9684075643000097e-05, "loss": 0.4312, "step": 2166 }, { "epoch": 0.16400359491036376, "grad_norm": 0.890625, "learning_rate": 1.9683778551358736e-05, "loss": 0.3182, "step": 2167 }, { "epoch": 0.16407927723381108, "grad_norm": 0.96484375, "learning_rate": 1.968348132233636e-05, "loss": 0.3637, "step": 2168 }, { "epoch": 0.1641549595572584, "grad_norm": 0.8203125, "learning_rate": 1.9683183955937185e-05, "loss": 0.3173, "step": 2169 }, { "epoch": 0.16423064188070574, "grad_norm": 1.0234375, "learning_rate": 1.9682886452165428e-05, "loss": 0.4374, "step": 2170 }, { "epoch": 0.16430632420415306, "grad_norm": 0.921875, "learning_rate": 1.9682588811025313e-05, "loss": 0.363, "step": 2171 }, { "epoch": 0.1643820065276004, "grad_norm": 0.89453125, "learning_rate": 1.9682291032521056e-05, "loss": 0.3533, "step": 2172 }, { "epoch": 0.16445768885104772, "grad_norm": 0.90625, "learning_rate": 1.9681993116656884e-05, "loss": 0.3799, "step": 2173 }, { "epoch": 0.16453337117449507, "grad_norm": 0.890625, "learning_rate": 1.968169506343703e-05, "loss": 0.3743, "step": 2174 }, { "epoch": 0.1646090534979424, "grad_norm": 0.85546875, "learning_rate": 1.968139687286571e-05, "loss": 0.3267, "step": 2175 }, { "epoch": 0.1646847358213897, "grad_norm": 0.9375, "learning_rate": 1.968109854494717e-05, "loss": 0.3447, "step": 2176 }, { "epoch": 0.16476041814483705, "grad_norm": 0.890625, "learning_rate": 1.968080007968563e-05, "loss": 0.3561, "step": 2177 }, { "epoch": 0.16483610046828437, "grad_norm": 0.91796875, "learning_rate": 1.9680501477085326e-05, "loss": 0.361, "step": 2178 }, { "epoch": 0.16491178279173171, "grad_norm": 0.87890625, "learning_rate": 1.96802027371505e-05, "loss": 0.3569, "step": 2179 }, { "epoch": 0.16498746511517903, "grad_norm": 1.203125, "learning_rate": 1.9679903859885384e-05, "loss": 0.358, "step": 2180 }, { "epoch": 0.16506314743862638, "grad_norm": 0.9375, "learning_rate": 1.9679604845294224e-05, "loss": 0.3651, "step": 2181 }, { "epoch": 0.1651388297620737, "grad_norm": 0.8828125, "learning_rate": 1.9679305693381252e-05, "loss": 0.3645, "step": 2182 }, { "epoch": 0.16521451208552101, "grad_norm": 0.8828125, "learning_rate": 1.967900640415072e-05, "loss": 0.3285, "step": 2183 }, { "epoch": 0.16529019440896836, "grad_norm": 0.8203125, "learning_rate": 1.9678706977606876e-05, "loss": 0.2957, "step": 2184 }, { "epoch": 0.16536587673241568, "grad_norm": 0.98828125, "learning_rate": 1.9678407413753965e-05, "loss": 0.3269, "step": 2185 }, { "epoch": 0.16544155905586302, "grad_norm": 0.87890625, "learning_rate": 1.967810771259623e-05, "loss": 0.3271, "step": 2186 }, { "epoch": 0.16551724137931034, "grad_norm": 0.90625, "learning_rate": 1.9677807874137933e-05, "loss": 0.3546, "step": 2187 }, { "epoch": 0.1655929237027577, "grad_norm": 0.8828125, "learning_rate": 1.9677507898383322e-05, "loss": 0.3259, "step": 2188 }, { "epoch": 0.165668606026205, "grad_norm": 0.828125, "learning_rate": 1.9677207785336657e-05, "loss": 0.3093, "step": 2189 }, { "epoch": 0.16574428834965232, "grad_norm": 0.89453125, "learning_rate": 1.9676907535002195e-05, "loss": 0.3399, "step": 2190 }, { "epoch": 0.16581997067309967, "grad_norm": 0.92578125, "learning_rate": 1.9676607147384187e-05, "loss": 0.3761, "step": 2191 }, { "epoch": 0.165895652996547, "grad_norm": 0.78515625, "learning_rate": 1.9676306622486905e-05, "loss": 0.2711, "step": 2192 }, { "epoch": 0.16597133531999433, "grad_norm": 0.91796875, "learning_rate": 1.9676005960314608e-05, "loss": 0.3726, "step": 2193 }, { "epoch": 0.16604701764344165, "grad_norm": 0.93359375, "learning_rate": 1.9675705160871563e-05, "loss": 0.3266, "step": 2194 }, { "epoch": 0.16612269996688897, "grad_norm": 0.89453125, "learning_rate": 1.967540422416203e-05, "loss": 0.3343, "step": 2195 }, { "epoch": 0.16619838229033632, "grad_norm": 0.86328125, "learning_rate": 1.967510315019029e-05, "loss": 0.3407, "step": 2196 }, { "epoch": 0.16627406461378363, "grad_norm": 0.8984375, "learning_rate": 1.967480193896061e-05, "loss": 0.3641, "step": 2197 }, { "epoch": 0.16634974693723098, "grad_norm": 0.796875, "learning_rate": 1.9674500590477258e-05, "loss": 0.3034, "step": 2198 }, { "epoch": 0.1664254292606783, "grad_norm": 0.91015625, "learning_rate": 1.9674199104744517e-05, "loss": 0.3595, "step": 2199 }, { "epoch": 0.16650111158412564, "grad_norm": 0.9296875, "learning_rate": 1.9673897481766655e-05, "loss": 0.3777, "step": 2200 }, { "epoch": 0.16657679390757296, "grad_norm": 0.81640625, "learning_rate": 1.967359572154796e-05, "loss": 0.3149, "step": 2201 }, { "epoch": 0.16665247623102028, "grad_norm": 0.8125, "learning_rate": 1.967329382409271e-05, "loss": 0.3206, "step": 2202 }, { "epoch": 0.16672815855446763, "grad_norm": 2.453125, "learning_rate": 1.967299178940518e-05, "loss": 0.5164, "step": 2203 }, { "epoch": 0.16680384087791494, "grad_norm": 0.8515625, "learning_rate": 1.967268961748967e-05, "loss": 0.2984, "step": 2204 }, { "epoch": 0.1668795232013623, "grad_norm": 0.84765625, "learning_rate": 1.9672387308350458e-05, "loss": 0.339, "step": 2205 }, { "epoch": 0.1669552055248096, "grad_norm": 0.8984375, "learning_rate": 1.967208486199183e-05, "loss": 0.3508, "step": 2206 }, { "epoch": 0.16703088784825695, "grad_norm": 0.90625, "learning_rate": 1.9671782278418084e-05, "loss": 0.3612, "step": 2207 }, { "epoch": 0.16710657017170427, "grad_norm": 0.828125, "learning_rate": 1.9671479557633508e-05, "loss": 0.3441, "step": 2208 }, { "epoch": 0.1671822524951516, "grad_norm": 0.9140625, "learning_rate": 1.9671176699642397e-05, "loss": 0.3769, "step": 2209 }, { "epoch": 0.16725793481859894, "grad_norm": 1.5234375, "learning_rate": 1.967087370444905e-05, "loss": 0.4396, "step": 2210 }, { "epoch": 0.16733361714204625, "grad_norm": 0.8046875, "learning_rate": 1.967057057205776e-05, "loss": 0.2914, "step": 2211 }, { "epoch": 0.1674092994654936, "grad_norm": 0.921875, "learning_rate": 1.9670267302472833e-05, "loss": 0.3563, "step": 2212 }, { "epoch": 0.16748498178894092, "grad_norm": 0.8515625, "learning_rate": 1.9669963895698573e-05, "loss": 0.3278, "step": 2213 }, { "epoch": 0.16756066411238826, "grad_norm": 0.84375, "learning_rate": 1.9669660351739277e-05, "loss": 0.3208, "step": 2214 }, { "epoch": 0.16763634643583558, "grad_norm": 0.81640625, "learning_rate": 1.9669356670599255e-05, "loss": 0.3031, "step": 2215 }, { "epoch": 0.1677120287592829, "grad_norm": 0.80859375, "learning_rate": 1.9669052852282817e-05, "loss": 0.3248, "step": 2216 }, { "epoch": 0.16778771108273025, "grad_norm": 1.0234375, "learning_rate": 1.966874889679427e-05, "loss": 0.3596, "step": 2217 }, { "epoch": 0.16786339340617756, "grad_norm": 0.8984375, "learning_rate": 1.9668444804137928e-05, "loss": 0.3752, "step": 2218 }, { "epoch": 0.1679390757296249, "grad_norm": 0.79296875, "learning_rate": 1.9668140574318108e-05, "loss": 0.2902, "step": 2219 }, { "epoch": 0.16801475805307223, "grad_norm": 0.81640625, "learning_rate": 1.966783620733912e-05, "loss": 0.2935, "step": 2220 }, { "epoch": 0.16809044037651955, "grad_norm": 0.8515625, "learning_rate": 1.9667531703205285e-05, "loss": 0.2864, "step": 2221 }, { "epoch": 0.1681661226999669, "grad_norm": 0.87109375, "learning_rate": 1.9667227061920925e-05, "loss": 0.3373, "step": 2222 }, { "epoch": 0.1682418050234142, "grad_norm": 0.84765625, "learning_rate": 1.9666922283490353e-05, "loss": 0.3088, "step": 2223 }, { "epoch": 0.16831748734686156, "grad_norm": 0.88671875, "learning_rate": 1.9666617367917907e-05, "loss": 0.3631, "step": 2224 }, { "epoch": 0.16839316967030887, "grad_norm": 0.93359375, "learning_rate": 1.9666312315207903e-05, "loss": 0.3077, "step": 2225 }, { "epoch": 0.16846885199375622, "grad_norm": 0.85546875, "learning_rate": 1.966600712536467e-05, "loss": 0.3433, "step": 2226 }, { "epoch": 0.16854453431720354, "grad_norm": 0.8203125, "learning_rate": 1.9665701798392537e-05, "loss": 0.3028, "step": 2227 }, { "epoch": 0.16862021664065086, "grad_norm": 0.88671875, "learning_rate": 1.966539633429584e-05, "loss": 0.3663, "step": 2228 }, { "epoch": 0.1686958989640982, "grad_norm": 0.890625, "learning_rate": 1.9665090733078906e-05, "loss": 0.3719, "step": 2229 }, { "epoch": 0.16877158128754552, "grad_norm": 0.890625, "learning_rate": 1.9664784994746076e-05, "loss": 0.327, "step": 2230 }, { "epoch": 0.16884726361099286, "grad_norm": 0.9609375, "learning_rate": 1.9664479119301687e-05, "loss": 0.376, "step": 2231 }, { "epoch": 0.16892294593444018, "grad_norm": 0.85546875, "learning_rate": 1.9664173106750075e-05, "loss": 0.3155, "step": 2232 }, { "epoch": 0.16899862825788753, "grad_norm": 0.8984375, "learning_rate": 1.9663866957095585e-05, "loss": 0.3751, "step": 2233 }, { "epoch": 0.16907431058133485, "grad_norm": 0.88671875, "learning_rate": 1.966356067034256e-05, "loss": 0.3507, "step": 2234 }, { "epoch": 0.16914999290478216, "grad_norm": 0.87890625, "learning_rate": 1.966325424649534e-05, "loss": 0.3214, "step": 2235 }, { "epoch": 0.1692256752282295, "grad_norm": 0.8828125, "learning_rate": 1.9662947685558275e-05, "loss": 0.3455, "step": 2236 }, { "epoch": 0.16930135755167683, "grad_norm": 0.90234375, "learning_rate": 1.9662640987535716e-05, "loss": 0.3714, "step": 2237 }, { "epoch": 0.16937703987512417, "grad_norm": 0.98828125, "learning_rate": 1.9662334152432015e-05, "loss": 0.4089, "step": 2238 }, { "epoch": 0.1694527221985715, "grad_norm": 0.79296875, "learning_rate": 1.9662027180251518e-05, "loss": 0.2972, "step": 2239 }, { "epoch": 0.16952840452201884, "grad_norm": 0.8828125, "learning_rate": 1.966172007099859e-05, "loss": 0.3445, "step": 2240 }, { "epoch": 0.16960408684546616, "grad_norm": 0.84375, "learning_rate": 1.966141282467758e-05, "loss": 0.3463, "step": 2241 }, { "epoch": 0.16967976916891347, "grad_norm": 0.875, "learning_rate": 1.9661105441292853e-05, "loss": 0.3421, "step": 2242 }, { "epoch": 0.16975545149236082, "grad_norm": 0.81640625, "learning_rate": 1.9660797920848763e-05, "loss": 0.3156, "step": 2243 }, { "epoch": 0.16983113381580814, "grad_norm": 0.8203125, "learning_rate": 1.9660490263349677e-05, "loss": 0.3229, "step": 2244 }, { "epoch": 0.16990681613925548, "grad_norm": 0.83203125, "learning_rate": 1.966018246879996e-05, "loss": 0.3456, "step": 2245 }, { "epoch": 0.1699824984627028, "grad_norm": 0.8359375, "learning_rate": 1.9659874537203976e-05, "loss": 0.3163, "step": 2246 }, { "epoch": 0.17005818078615015, "grad_norm": 0.84765625, "learning_rate": 1.965956646856609e-05, "loss": 0.3503, "step": 2247 }, { "epoch": 0.17013386310959747, "grad_norm": 0.94921875, "learning_rate": 1.9659258262890683e-05, "loss": 0.4087, "step": 2248 }, { "epoch": 0.17020954543304478, "grad_norm": 2.15625, "learning_rate": 1.9658949920182123e-05, "loss": 0.4008, "step": 2249 }, { "epoch": 0.17028522775649213, "grad_norm": 0.97265625, "learning_rate": 1.965864144044478e-05, "loss": 0.3971, "step": 2250 }, { "epoch": 0.17036091007993945, "grad_norm": 0.89453125, "learning_rate": 1.9658332823683033e-05, "loss": 0.3744, "step": 2251 }, { "epoch": 0.1704365924033868, "grad_norm": 0.90625, "learning_rate": 1.965802406990126e-05, "loss": 0.3577, "step": 2252 }, { "epoch": 0.1705122747268341, "grad_norm": 0.8046875, "learning_rate": 1.9657715179103847e-05, "loss": 0.2984, "step": 2253 }, { "epoch": 0.17058795705028143, "grad_norm": 0.8359375, "learning_rate": 1.9657406151295167e-05, "loss": 0.3077, "step": 2254 }, { "epoch": 0.17066363937372878, "grad_norm": 0.85546875, "learning_rate": 1.9657096986479606e-05, "loss": 0.311, "step": 2255 }, { "epoch": 0.1707393216971761, "grad_norm": 0.921875, "learning_rate": 1.9656787684661555e-05, "loss": 0.3619, "step": 2256 }, { "epoch": 0.17081500402062344, "grad_norm": 0.92578125, "learning_rate": 1.9656478245845398e-05, "loss": 0.3855, "step": 2257 }, { "epoch": 0.17089068634407076, "grad_norm": 0.87890625, "learning_rate": 1.9656168670035525e-05, "loss": 0.3511, "step": 2258 }, { "epoch": 0.1709663686675181, "grad_norm": 0.8671875, "learning_rate": 1.9655858957236333e-05, "loss": 0.3499, "step": 2259 }, { "epoch": 0.17104205099096542, "grad_norm": 0.86328125, "learning_rate": 1.9655549107452207e-05, "loss": 0.3377, "step": 2260 }, { "epoch": 0.17111773331441274, "grad_norm": 0.82421875, "learning_rate": 1.965523912068755e-05, "loss": 0.3218, "step": 2261 }, { "epoch": 0.17119341563786009, "grad_norm": 0.93359375, "learning_rate": 1.9654928996946758e-05, "loss": 0.3624, "step": 2262 }, { "epoch": 0.1712690979613074, "grad_norm": 0.8046875, "learning_rate": 1.965461873623423e-05, "loss": 0.3111, "step": 2263 }, { "epoch": 0.17134478028475475, "grad_norm": 1.15625, "learning_rate": 1.9654308338554364e-05, "loss": 0.3711, "step": 2264 }, { "epoch": 0.17142046260820207, "grad_norm": 0.8203125, "learning_rate": 1.9653997803911568e-05, "loss": 0.3262, "step": 2265 }, { "epoch": 0.1714961449316494, "grad_norm": 0.8203125, "learning_rate": 1.965368713231025e-05, "loss": 0.3047, "step": 2266 }, { "epoch": 0.17157182725509673, "grad_norm": 0.75390625, "learning_rate": 1.965337632375481e-05, "loss": 0.2777, "step": 2267 }, { "epoch": 0.17164750957854405, "grad_norm": 0.82421875, "learning_rate": 1.965306537824966e-05, "loss": 0.3151, "step": 2268 }, { "epoch": 0.1717231919019914, "grad_norm": 0.88671875, "learning_rate": 1.9652754295799213e-05, "loss": 0.3432, "step": 2269 }, { "epoch": 0.1717988742254387, "grad_norm": 0.87890625, "learning_rate": 1.9652443076407884e-05, "loss": 0.3463, "step": 2270 }, { "epoch": 0.17187455654888606, "grad_norm": 0.89453125, "learning_rate": 1.9652131720080082e-05, "loss": 0.3581, "step": 2271 }, { "epoch": 0.17195023887233338, "grad_norm": 0.8515625, "learning_rate": 1.965182022682023e-05, "loss": 0.2936, "step": 2272 }, { "epoch": 0.17202592119578072, "grad_norm": 0.8203125, "learning_rate": 1.965150859663275e-05, "loss": 0.3099, "step": 2273 }, { "epoch": 0.17210160351922804, "grad_norm": 0.99609375, "learning_rate": 1.9651196829522054e-05, "loss": 0.4214, "step": 2274 }, { "epoch": 0.17217728584267536, "grad_norm": 0.8828125, "learning_rate": 1.9650884925492567e-05, "loss": 0.3404, "step": 2275 }, { "epoch": 0.1722529681661227, "grad_norm": 0.83203125, "learning_rate": 1.9650572884548715e-05, "loss": 0.3453, "step": 2276 }, { "epoch": 0.17232865048957002, "grad_norm": 2.453125, "learning_rate": 1.965026070669493e-05, "loss": 0.4218, "step": 2277 }, { "epoch": 0.17240433281301737, "grad_norm": 0.87890625, "learning_rate": 1.964994839193563e-05, "loss": 0.3505, "step": 2278 }, { "epoch": 0.1724800151364647, "grad_norm": 0.7890625, "learning_rate": 1.964963594027526e-05, "loss": 0.2973, "step": 2279 }, { "epoch": 0.17255569745991203, "grad_norm": 0.921875, "learning_rate": 1.9649323351718242e-05, "loss": 0.3722, "step": 2280 }, { "epoch": 0.17263137978335935, "grad_norm": 0.9140625, "learning_rate": 1.964901062626901e-05, "loss": 0.3871, "step": 2281 }, { "epoch": 0.17270706210680667, "grad_norm": 0.87890625, "learning_rate": 1.9648697763932007e-05, "loss": 0.3732, "step": 2282 }, { "epoch": 0.17278274443025401, "grad_norm": 0.82421875, "learning_rate": 1.964838476471167e-05, "loss": 0.3372, "step": 2283 }, { "epoch": 0.17285842675370133, "grad_norm": 0.8671875, "learning_rate": 1.9648071628612437e-05, "loss": 0.3554, "step": 2284 }, { "epoch": 0.17293410907714868, "grad_norm": 0.79296875, "learning_rate": 1.964775835563875e-05, "loss": 0.2976, "step": 2285 }, { "epoch": 0.173009791400596, "grad_norm": 0.84765625, "learning_rate": 1.9647444945795056e-05, "loss": 0.3499, "step": 2286 }, { "epoch": 0.17308547372404331, "grad_norm": 0.8359375, "learning_rate": 1.9647131399085796e-05, "loss": 0.3288, "step": 2287 }, { "epoch": 0.17316115604749066, "grad_norm": 0.921875, "learning_rate": 1.9646817715515425e-05, "loss": 0.3617, "step": 2288 }, { "epoch": 0.17323683837093798, "grad_norm": 0.97265625, "learning_rate": 1.964650389508839e-05, "loss": 0.4041, "step": 2289 }, { "epoch": 0.17331252069438532, "grad_norm": 0.8359375, "learning_rate": 1.9646189937809145e-05, "loss": 0.3156, "step": 2290 }, { "epoch": 0.17338820301783264, "grad_norm": 0.85546875, "learning_rate": 1.9645875843682142e-05, "loss": 0.3194, "step": 2291 }, { "epoch": 0.17346388534128, "grad_norm": 0.890625, "learning_rate": 1.9645561612711833e-05, "loss": 0.3688, "step": 2292 }, { "epoch": 0.1735395676647273, "grad_norm": 0.83203125, "learning_rate": 1.9645247244902685e-05, "loss": 0.3242, "step": 2293 }, { "epoch": 0.17361524998817462, "grad_norm": 0.80859375, "learning_rate": 1.9644932740259152e-05, "loss": 0.3144, "step": 2294 }, { "epoch": 0.17369093231162197, "grad_norm": 0.953125, "learning_rate": 1.9644618098785696e-05, "loss": 0.3998, "step": 2295 }, { "epoch": 0.1737666146350693, "grad_norm": 0.85546875, "learning_rate": 1.9644303320486783e-05, "loss": 0.3224, "step": 2296 }, { "epoch": 0.17384229695851663, "grad_norm": 0.90625, "learning_rate": 1.9643988405366872e-05, "loss": 0.3487, "step": 2297 }, { "epoch": 0.17391797928196395, "grad_norm": 0.796875, "learning_rate": 1.9643673353430443e-05, "loss": 0.3229, "step": 2298 }, { "epoch": 0.1739936616054113, "grad_norm": 0.83984375, "learning_rate": 1.9643358164681952e-05, "loss": 0.2821, "step": 2299 }, { "epoch": 0.17406934392885862, "grad_norm": 0.91015625, "learning_rate": 1.964304283912588e-05, "loss": 0.403, "step": 2300 }, { "epoch": 0.17414502625230593, "grad_norm": 0.86328125, "learning_rate": 1.96427273767667e-05, "loss": 0.3467, "step": 2301 }, { "epoch": 0.17422070857575328, "grad_norm": 0.859375, "learning_rate": 1.9642411777608877e-05, "loss": 0.3199, "step": 2302 }, { "epoch": 0.1742963908992006, "grad_norm": 0.87109375, "learning_rate": 1.9642096041656903e-05, "loss": 0.3564, "step": 2303 }, { "epoch": 0.17437207322264794, "grad_norm": 0.8359375, "learning_rate": 1.9641780168915247e-05, "loss": 0.3362, "step": 2304 }, { "epoch": 0.17444775554609526, "grad_norm": 0.85546875, "learning_rate": 1.9641464159388395e-05, "loss": 0.3391, "step": 2305 }, { "epoch": 0.1745234378695426, "grad_norm": 4.21875, "learning_rate": 1.964114801308083e-05, "loss": 0.5351, "step": 2306 }, { "epoch": 0.17459912019298993, "grad_norm": 0.88671875, "learning_rate": 1.964083172999703e-05, "loss": 0.3346, "step": 2307 }, { "epoch": 0.17467480251643724, "grad_norm": 0.84765625, "learning_rate": 1.9640515310141494e-05, "loss": 0.2897, "step": 2308 }, { "epoch": 0.1747504848398846, "grad_norm": 0.8671875, "learning_rate": 1.96401987535187e-05, "loss": 0.3374, "step": 2309 }, { "epoch": 0.1748261671633319, "grad_norm": 0.84765625, "learning_rate": 1.9639882060133148e-05, "loss": 0.3554, "step": 2310 }, { "epoch": 0.17490184948677925, "grad_norm": 0.8359375, "learning_rate": 1.9639565229989325e-05, "loss": 0.302, "step": 2311 }, { "epoch": 0.17497753181022657, "grad_norm": 0.875, "learning_rate": 1.9639248263091725e-05, "loss": 0.3376, "step": 2312 }, { "epoch": 0.1750532141336739, "grad_norm": 0.8125, "learning_rate": 1.963893115944485e-05, "loss": 0.3253, "step": 2313 }, { "epoch": 0.17512889645712124, "grad_norm": 0.875, "learning_rate": 1.963861391905319e-05, "loss": 0.3506, "step": 2314 }, { "epoch": 0.17520457878056855, "grad_norm": 0.82421875, "learning_rate": 1.9638296541921255e-05, "loss": 0.3232, "step": 2315 }, { "epoch": 0.1752802611040159, "grad_norm": 0.9296875, "learning_rate": 1.9637979028053545e-05, "loss": 0.3597, "step": 2316 }, { "epoch": 0.17535594342746322, "grad_norm": 2.03125, "learning_rate": 1.963766137745456e-05, "loss": 0.447, "step": 2317 }, { "epoch": 0.17543162575091056, "grad_norm": 0.8046875, "learning_rate": 1.963734359012881e-05, "loss": 0.3016, "step": 2318 }, { "epoch": 0.17550730807435788, "grad_norm": 0.8671875, "learning_rate": 1.9637025666080808e-05, "loss": 0.3372, "step": 2319 }, { "epoch": 0.1755829903978052, "grad_norm": 1.0, "learning_rate": 1.9636707605315054e-05, "loss": 0.364, "step": 2320 }, { "epoch": 0.17565867272125255, "grad_norm": 0.828125, "learning_rate": 1.9636389407836065e-05, "loss": 0.3146, "step": 2321 }, { "epoch": 0.17573435504469986, "grad_norm": 0.86328125, "learning_rate": 1.9636071073648357e-05, "loss": 0.375, "step": 2322 }, { "epoch": 0.1758100373681472, "grad_norm": 0.90625, "learning_rate": 1.9635752602756443e-05, "loss": 0.3656, "step": 2323 }, { "epoch": 0.17588571969159453, "grad_norm": 0.8203125, "learning_rate": 1.9635433995164846e-05, "loss": 0.3053, "step": 2324 }, { "epoch": 0.17596140201504187, "grad_norm": 0.87890625, "learning_rate": 1.9635115250878083e-05, "loss": 0.3877, "step": 2325 }, { "epoch": 0.1760370843384892, "grad_norm": 0.90625, "learning_rate": 1.9634796369900673e-05, "loss": 0.3903, "step": 2326 }, { "epoch": 0.1761127666619365, "grad_norm": 0.859375, "learning_rate": 1.9634477352237144e-05, "loss": 0.3493, "step": 2327 }, { "epoch": 0.17618844898538386, "grad_norm": 0.87109375, "learning_rate": 1.963415819789202e-05, "loss": 0.3753, "step": 2328 }, { "epoch": 0.17626413130883117, "grad_norm": 0.83203125, "learning_rate": 1.963383890686983e-05, "loss": 0.3416, "step": 2329 }, { "epoch": 0.17633981363227852, "grad_norm": 0.8125, "learning_rate": 1.9633519479175103e-05, "loss": 0.3372, "step": 2330 }, { "epoch": 0.17641549595572584, "grad_norm": 0.92578125, "learning_rate": 1.9633199914812372e-05, "loss": 0.4068, "step": 2331 }, { "epoch": 0.17649117827917318, "grad_norm": 0.8046875, "learning_rate": 1.9632880213786164e-05, "loss": 0.3271, "step": 2332 }, { "epoch": 0.1765668606026205, "grad_norm": 0.93359375, "learning_rate": 1.9632560376101026e-05, "loss": 0.393, "step": 2333 }, { "epoch": 0.17664254292606782, "grad_norm": 0.83984375, "learning_rate": 1.9632240401761484e-05, "loss": 0.3458, "step": 2334 }, { "epoch": 0.17671822524951517, "grad_norm": 3.765625, "learning_rate": 1.963192029077208e-05, "loss": 0.5604, "step": 2335 }, { "epoch": 0.17679390757296248, "grad_norm": 0.8515625, "learning_rate": 1.9631600043137366e-05, "loss": 0.3382, "step": 2336 }, { "epoch": 0.17686958989640983, "grad_norm": 0.96875, "learning_rate": 1.9631279658861873e-05, "loss": 0.4015, "step": 2337 }, { "epoch": 0.17694527221985715, "grad_norm": 0.82421875, "learning_rate": 1.963095913795015e-05, "loss": 0.3078, "step": 2338 }, { "epoch": 0.1770209545433045, "grad_norm": 0.875, "learning_rate": 1.9630638480406742e-05, "loss": 0.3938, "step": 2339 }, { "epoch": 0.1770966368667518, "grad_norm": 0.8125, "learning_rate": 1.9630317686236204e-05, "loss": 0.3405, "step": 2340 }, { "epoch": 0.17717231919019913, "grad_norm": 0.86328125, "learning_rate": 1.962999675544308e-05, "loss": 0.3327, "step": 2341 }, { "epoch": 0.17724800151364647, "grad_norm": 0.921875, "learning_rate": 1.962967568803193e-05, "loss": 0.3526, "step": 2342 }, { "epoch": 0.1773236838370938, "grad_norm": 0.8671875, "learning_rate": 1.9629354484007304e-05, "loss": 0.3397, "step": 2343 }, { "epoch": 0.17739936616054114, "grad_norm": 0.8046875, "learning_rate": 1.9629033143373762e-05, "loss": 0.337, "step": 2344 }, { "epoch": 0.17747504848398846, "grad_norm": 0.87109375, "learning_rate": 1.9628711666135855e-05, "loss": 0.3347, "step": 2345 }, { "epoch": 0.17755073080743577, "grad_norm": 0.9140625, "learning_rate": 1.9628390052298155e-05, "loss": 0.3809, "step": 2346 }, { "epoch": 0.17762641313088312, "grad_norm": 0.92578125, "learning_rate": 1.9628068301865217e-05, "loss": 0.3562, "step": 2347 }, { "epoch": 0.17770209545433044, "grad_norm": 0.9453125, "learning_rate": 1.9627746414841607e-05, "loss": 0.4325, "step": 2348 }, { "epoch": 0.17777777777777778, "grad_norm": 0.8828125, "learning_rate": 1.9627424391231894e-05, "loss": 0.3608, "step": 2349 }, { "epoch": 0.1778534601012251, "grad_norm": 0.88671875, "learning_rate": 1.9627102231040646e-05, "loss": 0.3506, "step": 2350 }, { "epoch": 0.17792914242467245, "grad_norm": 0.87109375, "learning_rate": 1.962677993427243e-05, "loss": 0.3256, "step": 2351 }, { "epoch": 0.17800482474811977, "grad_norm": 0.8203125, "learning_rate": 1.962645750093182e-05, "loss": 0.3103, "step": 2352 }, { "epoch": 0.17808050707156708, "grad_norm": 0.87109375, "learning_rate": 1.9626134931023394e-05, "loss": 0.2896, "step": 2353 }, { "epoch": 0.17815618939501443, "grad_norm": 1.171875, "learning_rate": 1.9625812224551725e-05, "loss": 0.36, "step": 2354 }, { "epoch": 0.17823187171846175, "grad_norm": 0.828125, "learning_rate": 1.9625489381521387e-05, "loss": 0.3385, "step": 2355 }, { "epoch": 0.1783075540419091, "grad_norm": 0.9140625, "learning_rate": 1.9625166401936964e-05, "loss": 0.3653, "step": 2356 }, { "epoch": 0.1783832363653564, "grad_norm": 0.828125, "learning_rate": 1.9624843285803045e-05, "loss": 0.3002, "step": 2357 }, { "epoch": 0.17845891868880376, "grad_norm": 0.86328125, "learning_rate": 1.96245200331242e-05, "loss": 0.3508, "step": 2358 }, { "epoch": 0.17853460101225108, "grad_norm": 0.90625, "learning_rate": 1.9624196643905024e-05, "loss": 0.3815, "step": 2359 }, { "epoch": 0.1786102833356984, "grad_norm": 0.86328125, "learning_rate": 1.9623873118150105e-05, "loss": 0.3604, "step": 2360 }, { "epoch": 0.17868596565914574, "grad_norm": 0.796875, "learning_rate": 1.962354945586403e-05, "loss": 0.3155, "step": 2361 }, { "epoch": 0.17876164798259306, "grad_norm": 0.90234375, "learning_rate": 1.962322565705139e-05, "loss": 0.3723, "step": 2362 }, { "epoch": 0.1788373303060404, "grad_norm": 0.86328125, "learning_rate": 1.962290172171678e-05, "loss": 0.347, "step": 2363 }, { "epoch": 0.17891301262948772, "grad_norm": 0.859375, "learning_rate": 1.9622577649864797e-05, "loss": 0.3312, "step": 2364 }, { "epoch": 0.17898869495293507, "grad_norm": 0.88671875, "learning_rate": 1.9622253441500035e-05, "loss": 0.3586, "step": 2365 }, { "epoch": 0.17906437727638239, "grad_norm": 0.8125, "learning_rate": 1.9621929096627098e-05, "loss": 0.3239, "step": 2366 }, { "epoch": 0.1791400595998297, "grad_norm": 0.82421875, "learning_rate": 1.9621604615250583e-05, "loss": 0.3203, "step": 2367 }, { "epoch": 0.17921574192327705, "grad_norm": 5.15625, "learning_rate": 1.9621279997375096e-05, "loss": 0.6106, "step": 2368 }, { "epoch": 0.17929142424672437, "grad_norm": 0.87109375, "learning_rate": 1.9620955243005243e-05, "loss": 0.3502, "step": 2369 }, { "epoch": 0.1793671065701717, "grad_norm": 0.921875, "learning_rate": 1.962063035214563e-05, "loss": 0.3239, "step": 2370 }, { "epoch": 0.17944278889361903, "grad_norm": 0.921875, "learning_rate": 1.9620305324800865e-05, "loss": 0.3943, "step": 2371 }, { "epoch": 0.17951847121706638, "grad_norm": 0.7578125, "learning_rate": 1.961998016097556e-05, "loss": 0.2743, "step": 2372 }, { "epoch": 0.1795941535405137, "grad_norm": 0.9140625, "learning_rate": 1.9619654860674323e-05, "loss": 0.3646, "step": 2373 }, { "epoch": 0.179669835863961, "grad_norm": 1.03125, "learning_rate": 1.9619329423901782e-05, "loss": 0.4032, "step": 2374 }, { "epoch": 0.17974551818740836, "grad_norm": 0.81640625, "learning_rate": 1.9619003850662542e-05, "loss": 0.3288, "step": 2375 }, { "epoch": 0.17982120051085568, "grad_norm": 0.80859375, "learning_rate": 1.9618678140961226e-05, "loss": 0.3078, "step": 2376 }, { "epoch": 0.17989688283430302, "grad_norm": 0.89453125, "learning_rate": 1.9618352294802455e-05, "loss": 0.3339, "step": 2377 }, { "epoch": 0.17997256515775034, "grad_norm": 0.87109375, "learning_rate": 1.9618026312190847e-05, "loss": 0.3239, "step": 2378 }, { "epoch": 0.18004824748119766, "grad_norm": 0.8359375, "learning_rate": 1.9617700193131034e-05, "loss": 0.3338, "step": 2379 }, { "epoch": 0.180123929804645, "grad_norm": 0.8515625, "learning_rate": 1.961737393762764e-05, "loss": 0.3485, "step": 2380 }, { "epoch": 0.18019961212809232, "grad_norm": 0.89453125, "learning_rate": 1.9617047545685293e-05, "loss": 0.3567, "step": 2381 }, { "epoch": 0.18027529445153967, "grad_norm": 0.78515625, "learning_rate": 1.961672101730862e-05, "loss": 0.3012, "step": 2382 }, { "epoch": 0.180350976774987, "grad_norm": 0.82421875, "learning_rate": 1.9616394352502256e-05, "loss": 0.3021, "step": 2383 }, { "epoch": 0.18042665909843433, "grad_norm": 0.875, "learning_rate": 1.961606755127084e-05, "loss": 0.373, "step": 2384 }, { "epoch": 0.18050234142188165, "grad_norm": 0.73828125, "learning_rate": 1.9615740613619004e-05, "loss": 0.27, "step": 2385 }, { "epoch": 0.18057802374532897, "grad_norm": 0.9453125, "learning_rate": 1.9615413539551382e-05, "loss": 0.3783, "step": 2386 }, { "epoch": 0.18065370606877632, "grad_norm": 0.82421875, "learning_rate": 1.961508632907262e-05, "loss": 0.3116, "step": 2387 }, { "epoch": 0.18072938839222363, "grad_norm": 1.6875, "learning_rate": 1.961475898218736e-05, "loss": 0.4357, "step": 2388 }, { "epoch": 0.18080507071567098, "grad_norm": 0.8828125, "learning_rate": 1.9614431498900246e-05, "loss": 0.3629, "step": 2389 }, { "epoch": 0.1808807530391183, "grad_norm": 0.87890625, "learning_rate": 1.9614103879215917e-05, "loss": 0.3554, "step": 2390 }, { "epoch": 0.18095643536256564, "grad_norm": 0.8046875, "learning_rate": 1.961377612313903e-05, "loss": 0.3076, "step": 2391 }, { "epoch": 0.18103211768601296, "grad_norm": 1.578125, "learning_rate": 1.961344823067423e-05, "loss": 0.425, "step": 2392 }, { "epoch": 0.18110780000946028, "grad_norm": 0.890625, "learning_rate": 1.961312020182617e-05, "loss": 0.332, "step": 2393 }, { "epoch": 0.18118348233290762, "grad_norm": 0.84765625, "learning_rate": 1.9612792036599502e-05, "loss": 0.3092, "step": 2394 }, { "epoch": 0.18125916465635494, "grad_norm": 0.90234375, "learning_rate": 1.9612463734998882e-05, "loss": 0.388, "step": 2395 }, { "epoch": 0.1813348469798023, "grad_norm": 0.78515625, "learning_rate": 1.9612135297028973e-05, "loss": 0.2829, "step": 2396 }, { "epoch": 0.1814105293032496, "grad_norm": 0.88671875, "learning_rate": 1.9611806722694427e-05, "loss": 0.3413, "step": 2397 }, { "epoch": 0.18148621162669695, "grad_norm": 0.93359375, "learning_rate": 1.9611478011999906e-05, "loss": 0.3957, "step": 2398 }, { "epoch": 0.18156189395014427, "grad_norm": 0.79296875, "learning_rate": 1.9611149164950078e-05, "loss": 0.293, "step": 2399 }, { "epoch": 0.1816375762735916, "grad_norm": 0.87109375, "learning_rate": 1.9610820181549606e-05, "loss": 0.3421, "step": 2400 }, { "epoch": 0.18171325859703893, "grad_norm": 0.84375, "learning_rate": 1.9610491061803153e-05, "loss": 0.3418, "step": 2401 }, { "epoch": 0.18178894092048625, "grad_norm": 0.859375, "learning_rate": 1.96101618057154e-05, "loss": 0.3002, "step": 2402 }, { "epoch": 0.1818646232439336, "grad_norm": 2.78125, "learning_rate": 1.9609832413291e-05, "loss": 0.5291, "step": 2403 }, { "epoch": 0.18194030556738092, "grad_norm": 0.83203125, "learning_rate": 1.9609502884534645e-05, "loss": 0.3216, "step": 2404 }, { "epoch": 0.18201598789082823, "grad_norm": 1.171875, "learning_rate": 1.9609173219450998e-05, "loss": 0.3753, "step": 2405 }, { "epoch": 0.18209167021427558, "grad_norm": 0.8203125, "learning_rate": 1.9608843418044738e-05, "loss": 0.3055, "step": 2406 }, { "epoch": 0.1821673525377229, "grad_norm": 0.7890625, "learning_rate": 1.9608513480320545e-05, "loss": 0.2999, "step": 2407 }, { "epoch": 0.18224303486117024, "grad_norm": 0.8828125, "learning_rate": 1.96081834062831e-05, "loss": 0.3448, "step": 2408 }, { "epoch": 0.18231871718461756, "grad_norm": 0.8671875, "learning_rate": 1.9607853195937085e-05, "loss": 0.3291, "step": 2409 }, { "epoch": 0.1823943995080649, "grad_norm": 0.796875, "learning_rate": 1.9607522849287185e-05, "loss": 0.2915, "step": 2410 }, { "epoch": 0.18247008183151223, "grad_norm": 0.86328125, "learning_rate": 1.960719236633809e-05, "loss": 0.3271, "step": 2411 }, { "epoch": 0.18254576415495954, "grad_norm": 0.78515625, "learning_rate": 1.9606861747094478e-05, "loss": 0.2954, "step": 2412 }, { "epoch": 0.1826214464784069, "grad_norm": 0.8359375, "learning_rate": 1.960653099156105e-05, "loss": 0.3289, "step": 2413 }, { "epoch": 0.1826971288018542, "grad_norm": 0.8515625, "learning_rate": 1.9606200099742492e-05, "loss": 0.3626, "step": 2414 }, { "epoch": 0.18277281112530155, "grad_norm": 0.78125, "learning_rate": 1.96058690716435e-05, "loss": 0.3002, "step": 2415 }, { "epoch": 0.18284849344874887, "grad_norm": 0.92578125, "learning_rate": 1.9605537907268776e-05, "loss": 0.357, "step": 2416 }, { "epoch": 0.18292417577219622, "grad_norm": 0.88671875, "learning_rate": 1.9605206606623014e-05, "loss": 0.3807, "step": 2417 }, { "epoch": 0.18299985809564354, "grad_norm": 0.87890625, "learning_rate": 1.9604875169710906e-05, "loss": 0.3796, "step": 2418 }, { "epoch": 0.18307554041909085, "grad_norm": 0.83203125, "learning_rate": 1.9604543596537166e-05, "loss": 0.3242, "step": 2419 }, { "epoch": 0.1831512227425382, "grad_norm": 0.83984375, "learning_rate": 1.9604211887106494e-05, "loss": 0.3344, "step": 2420 }, { "epoch": 0.18322690506598552, "grad_norm": 0.83203125, "learning_rate": 1.960388004142359e-05, "loss": 0.3303, "step": 2421 }, { "epoch": 0.18330258738943286, "grad_norm": 0.79296875, "learning_rate": 1.9603548059493173e-05, "loss": 0.2927, "step": 2422 }, { "epoch": 0.18337826971288018, "grad_norm": 0.8046875, "learning_rate": 1.9603215941319942e-05, "loss": 0.3344, "step": 2423 }, { "epoch": 0.18345395203632753, "grad_norm": 0.8203125, "learning_rate": 1.9602883686908614e-05, "loss": 0.3562, "step": 2424 }, { "epoch": 0.18352963435977485, "grad_norm": 0.83984375, "learning_rate": 1.9602551296263902e-05, "loss": 0.3504, "step": 2425 }, { "epoch": 0.18360531668322216, "grad_norm": 1.0703125, "learning_rate": 1.9602218769390522e-05, "loss": 0.3527, "step": 2426 }, { "epoch": 0.1836809990066695, "grad_norm": 0.84375, "learning_rate": 1.960188610629319e-05, "loss": 0.3446, "step": 2427 }, { "epoch": 0.18375668133011683, "grad_norm": 0.890625, "learning_rate": 1.9601553306976626e-05, "loss": 0.3409, "step": 2428 }, { "epoch": 0.18383236365356417, "grad_norm": 0.97265625, "learning_rate": 1.9601220371445553e-05, "loss": 0.3697, "step": 2429 }, { "epoch": 0.1839080459770115, "grad_norm": 0.8046875, "learning_rate": 1.9600887299704694e-05, "loss": 0.3114, "step": 2430 }, { "epoch": 0.18398372830045884, "grad_norm": 0.9140625, "learning_rate": 1.960055409175877e-05, "loss": 0.3658, "step": 2431 }, { "epoch": 0.18405941062390616, "grad_norm": 0.81640625, "learning_rate": 1.960022074761251e-05, "loss": 0.3237, "step": 2432 }, { "epoch": 0.18413509294735347, "grad_norm": 0.87890625, "learning_rate": 1.9599887267270648e-05, "loss": 0.345, "step": 2433 }, { "epoch": 0.18421077527080082, "grad_norm": 0.9296875, "learning_rate": 1.959955365073791e-05, "loss": 0.378, "step": 2434 }, { "epoch": 0.18428645759424814, "grad_norm": 0.85546875, "learning_rate": 1.9599219898019027e-05, "loss": 0.346, "step": 2435 }, { "epoch": 0.18436213991769548, "grad_norm": 0.84765625, "learning_rate": 1.959888600911874e-05, "loss": 0.3133, "step": 2436 }, { "epoch": 0.1844378222411428, "grad_norm": 0.85546875, "learning_rate": 1.9598551984041782e-05, "loss": 0.3588, "step": 2437 }, { "epoch": 0.18451350456459012, "grad_norm": 0.81640625, "learning_rate": 1.9598217822792892e-05, "loss": 0.3259, "step": 2438 }, { "epoch": 0.18458918688803747, "grad_norm": 0.796875, "learning_rate": 1.959788352537681e-05, "loss": 0.3227, "step": 2439 }, { "epoch": 0.18466486921148478, "grad_norm": 0.828125, "learning_rate": 1.9597549091798284e-05, "loss": 0.3518, "step": 2440 }, { "epoch": 0.18474055153493213, "grad_norm": 0.828125, "learning_rate": 1.9597214522062053e-05, "loss": 0.3267, "step": 2441 }, { "epoch": 0.18481623385837945, "grad_norm": 0.84375, "learning_rate": 1.959687981617286e-05, "loss": 0.3105, "step": 2442 }, { "epoch": 0.1848919161818268, "grad_norm": 0.8984375, "learning_rate": 1.959654497413546e-05, "loss": 0.3343, "step": 2443 }, { "epoch": 0.1849675985052741, "grad_norm": 1.0390625, "learning_rate": 1.9596209995954606e-05, "loss": 0.3601, "step": 2444 }, { "epoch": 0.18504328082872143, "grad_norm": 0.890625, "learning_rate": 1.9595874881635038e-05, "loss": 0.3705, "step": 2445 }, { "epoch": 0.18511896315216878, "grad_norm": 0.91796875, "learning_rate": 1.959553963118152e-05, "loss": 0.3879, "step": 2446 }, { "epoch": 0.1851946454756161, "grad_norm": 0.8515625, "learning_rate": 1.959520424459881e-05, "loss": 0.3456, "step": 2447 }, { "epoch": 0.18527032779906344, "grad_norm": 0.87109375, "learning_rate": 1.959486872189166e-05, "loss": 0.339, "step": 2448 }, { "epoch": 0.18534601012251076, "grad_norm": 0.8046875, "learning_rate": 1.959453306306483e-05, "loss": 0.3001, "step": 2449 }, { "epoch": 0.1854216924459581, "grad_norm": 0.8984375, "learning_rate": 1.9594197268123087e-05, "loss": 0.3508, "step": 2450 }, { "epoch": 0.18549737476940542, "grad_norm": 0.87890625, "learning_rate": 1.9593861337071187e-05, "loss": 0.3483, "step": 2451 }, { "epoch": 0.18557305709285274, "grad_norm": 0.88671875, "learning_rate": 1.9593525269913903e-05, "loss": 0.3567, "step": 2452 }, { "epoch": 0.18564873941630008, "grad_norm": 0.80078125, "learning_rate": 1.9593189066655998e-05, "loss": 0.3197, "step": 2453 }, { "epoch": 0.1857244217397474, "grad_norm": 1.0390625, "learning_rate": 1.9592852727302247e-05, "loss": 0.4041, "step": 2454 }, { "epoch": 0.18580010406319475, "grad_norm": 0.84375, "learning_rate": 1.9592516251857417e-05, "loss": 0.3236, "step": 2455 }, { "epoch": 0.18587578638664207, "grad_norm": 0.8515625, "learning_rate": 1.9592179640326277e-05, "loss": 0.3013, "step": 2456 }, { "epoch": 0.1859514687100894, "grad_norm": 0.97265625, "learning_rate": 1.9591842892713616e-05, "loss": 0.3262, "step": 2457 }, { "epoch": 0.18602715103353673, "grad_norm": 0.84765625, "learning_rate": 1.95915060090242e-05, "loss": 0.3442, "step": 2458 }, { "epoch": 0.18610283335698405, "grad_norm": 0.859375, "learning_rate": 1.9591168989262815e-05, "loss": 0.3346, "step": 2459 }, { "epoch": 0.1861785156804314, "grad_norm": 0.87890625, "learning_rate": 1.9590831833434233e-05, "loss": 0.3124, "step": 2460 }, { "epoch": 0.1862541980038787, "grad_norm": 0.8671875, "learning_rate": 1.959049454154325e-05, "loss": 0.3509, "step": 2461 }, { "epoch": 0.18632988032732606, "grad_norm": 0.86328125, "learning_rate": 1.9590157113594636e-05, "loss": 0.355, "step": 2462 }, { "epoch": 0.18640556265077338, "grad_norm": 0.8515625, "learning_rate": 1.958981954959319e-05, "loss": 0.331, "step": 2463 }, { "epoch": 0.18648124497422072, "grad_norm": 0.81640625, "learning_rate": 1.9589481849543696e-05, "loss": 0.3311, "step": 2464 }, { "epoch": 0.18655692729766804, "grad_norm": 0.90234375, "learning_rate": 1.958914401345095e-05, "loss": 0.3557, "step": 2465 }, { "epoch": 0.18663260962111536, "grad_norm": 0.9140625, "learning_rate": 1.9588806041319735e-05, "loss": 0.3739, "step": 2466 }, { "epoch": 0.1867082919445627, "grad_norm": 0.80078125, "learning_rate": 1.9588467933154856e-05, "loss": 0.3212, "step": 2467 }, { "epoch": 0.18678397426801002, "grad_norm": 0.921875, "learning_rate": 1.95881296889611e-05, "loss": 0.3921, "step": 2468 }, { "epoch": 0.18685965659145737, "grad_norm": 0.87109375, "learning_rate": 1.9587791308743277e-05, "loss": 0.3555, "step": 2469 }, { "epoch": 0.1869353389149047, "grad_norm": 0.76953125, "learning_rate": 1.9587452792506178e-05, "loss": 0.2728, "step": 2470 }, { "epoch": 0.187011021238352, "grad_norm": 0.8828125, "learning_rate": 1.9587114140254605e-05, "loss": 0.3262, "step": 2471 }, { "epoch": 0.18708670356179935, "grad_norm": 0.953125, "learning_rate": 1.958677535199337e-05, "loss": 0.3867, "step": 2472 }, { "epoch": 0.18716238588524667, "grad_norm": 0.8515625, "learning_rate": 1.9586436427727276e-05, "loss": 0.339, "step": 2473 }, { "epoch": 0.18723806820869401, "grad_norm": 0.80859375, "learning_rate": 1.9586097367461128e-05, "loss": 0.3259, "step": 2474 }, { "epoch": 0.18731375053214133, "grad_norm": 0.921875, "learning_rate": 1.9585758171199738e-05, "loss": 0.3835, "step": 2475 }, { "epoch": 0.18738943285558868, "grad_norm": 0.8203125, "learning_rate": 1.958541883894792e-05, "loss": 0.3323, "step": 2476 }, { "epoch": 0.187465115179036, "grad_norm": 0.85546875, "learning_rate": 1.9585079370710484e-05, "loss": 0.3144, "step": 2477 }, { "epoch": 0.18754079750248331, "grad_norm": 2.453125, "learning_rate": 1.9584739766492253e-05, "loss": 0.3359, "step": 2478 }, { "epoch": 0.18761647982593066, "grad_norm": 0.92578125, "learning_rate": 1.9584400026298037e-05, "loss": 0.4192, "step": 2479 }, { "epoch": 0.18769216214937798, "grad_norm": 0.90625, "learning_rate": 1.958406015013266e-05, "loss": 0.377, "step": 2480 }, { "epoch": 0.18776784447282532, "grad_norm": 0.859375, "learning_rate": 1.9583720138000944e-05, "loss": 0.323, "step": 2481 }, { "epoch": 0.18784352679627264, "grad_norm": 0.8359375, "learning_rate": 1.9583379989907712e-05, "loss": 0.3636, "step": 2482 }, { "epoch": 0.18791920911972, "grad_norm": 0.83203125, "learning_rate": 1.958303970585779e-05, "loss": 0.325, "step": 2483 }, { "epoch": 0.1879948914431673, "grad_norm": 0.87890625, "learning_rate": 1.9582699285856e-05, "loss": 0.3369, "step": 2484 }, { "epoch": 0.18807057376661462, "grad_norm": 0.80859375, "learning_rate": 1.958235872990718e-05, "loss": 0.325, "step": 2485 }, { "epoch": 0.18814625609006197, "grad_norm": 0.8515625, "learning_rate": 1.9582018038016156e-05, "loss": 0.3765, "step": 2486 }, { "epoch": 0.1882219384135093, "grad_norm": 0.859375, "learning_rate": 1.9581677210187763e-05, "loss": 0.3821, "step": 2487 }, { "epoch": 0.18829762073695663, "grad_norm": 0.91796875, "learning_rate": 1.9581336246426835e-05, "loss": 0.3701, "step": 2488 }, { "epoch": 0.18837330306040395, "grad_norm": 0.81640625, "learning_rate": 1.9580995146738213e-05, "loss": 0.3262, "step": 2489 }, { "epoch": 0.1884489853838513, "grad_norm": 0.8671875, "learning_rate": 1.9580653911126733e-05, "loss": 0.3612, "step": 2490 }, { "epoch": 0.18852466770729862, "grad_norm": 0.875, "learning_rate": 1.9580312539597236e-05, "loss": 0.3505, "step": 2491 }, { "epoch": 0.18860035003074593, "grad_norm": 0.8515625, "learning_rate": 1.9579971032154563e-05, "loss": 0.3341, "step": 2492 }, { "epoch": 0.18867603235419328, "grad_norm": 0.8515625, "learning_rate": 1.9579629388803563e-05, "loss": 0.2972, "step": 2493 }, { "epoch": 0.1887517146776406, "grad_norm": 0.93359375, "learning_rate": 1.957928760954908e-05, "loss": 0.3537, "step": 2494 }, { "epoch": 0.18882739700108794, "grad_norm": 4.1875, "learning_rate": 1.9578945694395967e-05, "loss": 0.5261, "step": 2495 }, { "epoch": 0.18890307932453526, "grad_norm": 0.875, "learning_rate": 1.9578603643349066e-05, "loss": 0.3652, "step": 2496 }, { "epoch": 0.18897876164798258, "grad_norm": 0.82421875, "learning_rate": 1.957826145641324e-05, "loss": 0.3066, "step": 2497 }, { "epoch": 0.18905444397142993, "grad_norm": 0.83203125, "learning_rate": 1.9577919133593335e-05, "loss": 0.3473, "step": 2498 }, { "epoch": 0.18913012629487724, "grad_norm": 0.81640625, "learning_rate": 1.957757667489421e-05, "loss": 0.3608, "step": 2499 }, { "epoch": 0.1892058086183246, "grad_norm": 0.8359375, "learning_rate": 1.9577234080320725e-05, "loss": 0.367, "step": 2500 }, { "epoch": 0.1892814909417719, "grad_norm": 0.8828125, "learning_rate": 1.957689134987774e-05, "loss": 0.3709, "step": 2501 }, { "epoch": 0.18935717326521925, "grad_norm": 0.8203125, "learning_rate": 1.9576548483570116e-05, "loss": 0.336, "step": 2502 }, { "epoch": 0.18943285558866657, "grad_norm": 0.82421875, "learning_rate": 1.957620548140272e-05, "loss": 0.3528, "step": 2503 }, { "epoch": 0.1895085379121139, "grad_norm": 0.83984375, "learning_rate": 1.9575862343380417e-05, "loss": 0.2846, "step": 2504 }, { "epoch": 0.18958422023556123, "grad_norm": 0.828125, "learning_rate": 1.957551906950807e-05, "loss": 0.3203, "step": 2505 }, { "epoch": 0.18965990255900855, "grad_norm": 0.875, "learning_rate": 1.9575175659790555e-05, "loss": 0.3229, "step": 2506 }, { "epoch": 0.1897355848824559, "grad_norm": 0.7734375, "learning_rate": 1.957483211423274e-05, "loss": 0.2881, "step": 2507 }, { "epoch": 0.18981126720590322, "grad_norm": 0.8203125, "learning_rate": 1.9574488432839502e-05, "loss": 0.2668, "step": 2508 }, { "epoch": 0.18988694952935056, "grad_norm": 0.94921875, "learning_rate": 1.9574144615615713e-05, "loss": 0.4168, "step": 2509 }, { "epoch": 0.18996263185279788, "grad_norm": 0.875, "learning_rate": 1.9573800662566257e-05, "loss": 0.3281, "step": 2510 }, { "epoch": 0.1900383141762452, "grad_norm": 0.76953125, "learning_rate": 1.9573456573696006e-05, "loss": 0.3061, "step": 2511 }, { "epoch": 0.19011399649969254, "grad_norm": 0.83203125, "learning_rate": 1.9573112349009846e-05, "loss": 0.322, "step": 2512 }, { "epoch": 0.19018967882313986, "grad_norm": 0.81640625, "learning_rate": 1.957276798851266e-05, "loss": 0.293, "step": 2513 }, { "epoch": 0.1902653611465872, "grad_norm": 0.8984375, "learning_rate": 1.957242349220933e-05, "loss": 0.3513, "step": 2514 }, { "epoch": 0.19034104347003453, "grad_norm": 0.765625, "learning_rate": 1.9572078860104748e-05, "loss": 0.2757, "step": 2515 }, { "epoch": 0.19041672579348187, "grad_norm": 0.828125, "learning_rate": 1.95717340922038e-05, "loss": 0.319, "step": 2516 }, { "epoch": 0.1904924081169292, "grad_norm": 0.91015625, "learning_rate": 1.957138918851138e-05, "loss": 0.3515, "step": 2517 }, { "epoch": 0.1905680904403765, "grad_norm": 0.83203125, "learning_rate": 1.9571044149032377e-05, "loss": 0.3172, "step": 2518 }, { "epoch": 0.19064377276382385, "grad_norm": 0.87109375, "learning_rate": 1.9570698973771692e-05, "loss": 0.3435, "step": 2519 }, { "epoch": 0.19071945508727117, "grad_norm": 6.4375, "learning_rate": 1.9570353662734217e-05, "loss": 0.4546, "step": 2520 }, { "epoch": 0.19079513741071852, "grad_norm": 0.859375, "learning_rate": 1.957000821592485e-05, "loss": 0.3524, "step": 2521 }, { "epoch": 0.19087081973416584, "grad_norm": 0.8359375, "learning_rate": 1.9569662633348493e-05, "loss": 0.3228, "step": 2522 }, { "epoch": 0.19094650205761318, "grad_norm": 0.84375, "learning_rate": 1.956931691501005e-05, "loss": 0.3192, "step": 2523 }, { "epoch": 0.1910221843810605, "grad_norm": 0.84375, "learning_rate": 1.9568971060914428e-05, "loss": 0.3422, "step": 2524 }, { "epoch": 0.19109786670450782, "grad_norm": 0.87109375, "learning_rate": 1.9568625071066528e-05, "loss": 0.293, "step": 2525 }, { "epoch": 0.19117354902795516, "grad_norm": 0.81640625, "learning_rate": 1.9568278945471264e-05, "loss": 0.2948, "step": 2526 }, { "epoch": 0.19124923135140248, "grad_norm": 0.8515625, "learning_rate": 1.9567932684133543e-05, "loss": 0.3321, "step": 2527 }, { "epoch": 0.19132491367484983, "grad_norm": 1.0625, "learning_rate": 1.9567586287058274e-05, "loss": 0.3543, "step": 2528 }, { "epoch": 0.19140059599829715, "grad_norm": 0.8203125, "learning_rate": 1.9567239754250376e-05, "loss": 0.3257, "step": 2529 }, { "epoch": 0.19147627832174446, "grad_norm": 0.90234375, "learning_rate": 1.9566893085714765e-05, "loss": 0.3688, "step": 2530 }, { "epoch": 0.1915519606451918, "grad_norm": 0.85546875, "learning_rate": 1.956654628145636e-05, "loss": 0.3327, "step": 2531 }, { "epoch": 0.19162764296863913, "grad_norm": 1.9765625, "learning_rate": 1.9566199341480075e-05, "loss": 0.4945, "step": 2532 }, { "epoch": 0.19170332529208647, "grad_norm": 0.9296875, "learning_rate": 1.956585226579084e-05, "loss": 0.4076, "step": 2533 }, { "epoch": 0.1917790076155338, "grad_norm": 0.88671875, "learning_rate": 1.956550505439357e-05, "loss": 0.3353, "step": 2534 }, { "epoch": 0.19185468993898114, "grad_norm": 0.86328125, "learning_rate": 1.95651577072932e-05, "loss": 0.2984, "step": 2535 }, { "epoch": 0.19193037226242846, "grad_norm": 0.91796875, "learning_rate": 1.9564810224494658e-05, "loss": 0.3903, "step": 2536 }, { "epoch": 0.19200605458587577, "grad_norm": 0.94921875, "learning_rate": 1.956446260600286e-05, "loss": 0.4078, "step": 2537 }, { "epoch": 0.19208173690932312, "grad_norm": 0.82421875, "learning_rate": 1.9564114851822752e-05, "loss": 0.3382, "step": 2538 }, { "epoch": 0.19215741923277044, "grad_norm": 0.890625, "learning_rate": 1.956376696195926e-05, "loss": 0.3209, "step": 2539 }, { "epoch": 0.19223310155621778, "grad_norm": 0.88671875, "learning_rate": 1.9563418936417324e-05, "loss": 0.343, "step": 2540 }, { "epoch": 0.1923087838796651, "grad_norm": 0.796875, "learning_rate": 1.956307077520188e-05, "loss": 0.305, "step": 2541 }, { "epoch": 0.19238446620311245, "grad_norm": 0.8828125, "learning_rate": 1.956272247831786e-05, "loss": 0.3358, "step": 2542 }, { "epoch": 0.19246014852655977, "grad_norm": 0.86328125, "learning_rate": 1.9562374045770218e-05, "loss": 0.363, "step": 2543 }, { "epoch": 0.19253583085000708, "grad_norm": 1.921875, "learning_rate": 1.956202547756389e-05, "loss": 0.4409, "step": 2544 }, { "epoch": 0.19261151317345443, "grad_norm": 0.90234375, "learning_rate": 1.9561676773703818e-05, "loss": 0.3382, "step": 2545 }, { "epoch": 0.19268719549690175, "grad_norm": 0.8828125, "learning_rate": 1.9561327934194954e-05, "loss": 0.3609, "step": 2546 }, { "epoch": 0.1927628778203491, "grad_norm": 0.93359375, "learning_rate": 1.9560978959042247e-05, "loss": 0.37, "step": 2547 }, { "epoch": 0.1928385601437964, "grad_norm": 0.8671875, "learning_rate": 1.9560629848250645e-05, "loss": 0.3409, "step": 2548 }, { "epoch": 0.19291424246724376, "grad_norm": 0.859375, "learning_rate": 1.95602806018251e-05, "loss": 0.3301, "step": 2549 }, { "epoch": 0.19298992479069108, "grad_norm": 0.88671875, "learning_rate": 1.9559931219770573e-05, "loss": 0.3503, "step": 2550 }, { "epoch": 0.1930656071141384, "grad_norm": 0.8203125, "learning_rate": 1.9559581702092013e-05, "loss": 0.3171, "step": 2551 }, { "epoch": 0.19314128943758574, "grad_norm": 0.86328125, "learning_rate": 1.9559232048794385e-05, "loss": 0.3317, "step": 2552 }, { "epoch": 0.19321697176103306, "grad_norm": 0.8984375, "learning_rate": 1.955888225988264e-05, "loss": 0.3262, "step": 2553 }, { "epoch": 0.1932926540844804, "grad_norm": 46.0, "learning_rate": 1.9558532335361753e-05, "loss": 0.4823, "step": 2554 }, { "epoch": 0.19336833640792772, "grad_norm": 0.85546875, "learning_rate": 1.9558182275236677e-05, "loss": 0.3553, "step": 2555 }, { "epoch": 0.19344401873137507, "grad_norm": 0.8828125, "learning_rate": 1.9557832079512387e-05, "loss": 0.3355, "step": 2556 }, { "epoch": 0.19351970105482239, "grad_norm": 0.84765625, "learning_rate": 1.9557481748193847e-05, "loss": 0.3394, "step": 2557 }, { "epoch": 0.1935953833782697, "grad_norm": 0.82421875, "learning_rate": 1.9557131281286022e-05, "loss": 0.3263, "step": 2558 }, { "epoch": 0.19367106570171705, "grad_norm": 0.82421875, "learning_rate": 1.9556780678793895e-05, "loss": 0.3025, "step": 2559 }, { "epoch": 0.19374674802516437, "grad_norm": 0.7890625, "learning_rate": 1.9556429940722432e-05, "loss": 0.3139, "step": 2560 }, { "epoch": 0.1938224303486117, "grad_norm": 0.8828125, "learning_rate": 1.9556079067076614e-05, "loss": 0.336, "step": 2561 }, { "epoch": 0.19389811267205903, "grad_norm": 0.83984375, "learning_rate": 1.955572805786141e-05, "loss": 0.291, "step": 2562 }, { "epoch": 0.19397379499550635, "grad_norm": 0.81640625, "learning_rate": 1.955537691308181e-05, "loss": 0.3077, "step": 2563 }, { "epoch": 0.1940494773189537, "grad_norm": 0.8515625, "learning_rate": 1.9555025632742787e-05, "loss": 0.3052, "step": 2564 }, { "epoch": 0.194125159642401, "grad_norm": 0.9609375, "learning_rate": 1.9554674216849328e-05, "loss": 0.3395, "step": 2565 }, { "epoch": 0.19420084196584836, "grad_norm": 2.1875, "learning_rate": 1.9554322665406424e-05, "loss": 0.3954, "step": 2566 }, { "epoch": 0.19427652428929568, "grad_norm": 1.0, "learning_rate": 1.955397097841905e-05, "loss": 0.422, "step": 2567 }, { "epoch": 0.19435220661274302, "grad_norm": 0.9296875, "learning_rate": 1.955361915589221e-05, "loss": 0.2688, "step": 2568 }, { "epoch": 0.19442788893619034, "grad_norm": 0.84765625, "learning_rate": 1.955326719783088e-05, "loss": 0.3259, "step": 2569 }, { "epoch": 0.19450357125963766, "grad_norm": 0.921875, "learning_rate": 1.9552915104240067e-05, "loss": 0.3573, "step": 2570 }, { "epoch": 0.194579253583085, "grad_norm": 4.34375, "learning_rate": 1.9552562875124757e-05, "loss": 0.4564, "step": 2571 }, { "epoch": 0.19465493590653232, "grad_norm": 0.93359375, "learning_rate": 1.9552210510489952e-05, "loss": 0.3671, "step": 2572 }, { "epoch": 0.19473061822997967, "grad_norm": 0.8671875, "learning_rate": 1.9551858010340646e-05, "loss": 0.3275, "step": 2573 }, { "epoch": 0.194806300553427, "grad_norm": 0.9375, "learning_rate": 1.9551505374681844e-05, "loss": 0.3791, "step": 2574 }, { "epoch": 0.19488198287687433, "grad_norm": 0.8515625, "learning_rate": 1.955115260351855e-05, "loss": 0.3472, "step": 2575 }, { "epoch": 0.19495766520032165, "grad_norm": 1.9375, "learning_rate": 1.9550799696855764e-05, "loss": 0.4392, "step": 2576 }, { "epoch": 0.19503334752376897, "grad_norm": 0.89453125, "learning_rate": 1.955044665469849e-05, "loss": 0.3341, "step": 2577 }, { "epoch": 0.19510902984721631, "grad_norm": 2.015625, "learning_rate": 1.955009347705175e-05, "loss": 0.4825, "step": 2578 }, { "epoch": 0.19518471217066363, "grad_norm": 0.8671875, "learning_rate": 1.954974016392054e-05, "loss": 0.3481, "step": 2579 }, { "epoch": 0.19526039449411098, "grad_norm": 0.84375, "learning_rate": 1.9549386715309876e-05, "loss": 0.3355, "step": 2580 }, { "epoch": 0.1953360768175583, "grad_norm": 1.6171875, "learning_rate": 1.9549033131224776e-05, "loss": 0.4142, "step": 2581 }, { "epoch": 0.19541175914100564, "grad_norm": 0.94140625, "learning_rate": 1.9548679411670254e-05, "loss": 0.3315, "step": 2582 }, { "epoch": 0.19548744146445296, "grad_norm": 0.85546875, "learning_rate": 1.9548325556651332e-05, "loss": 0.3494, "step": 2583 }, { "epoch": 0.19556312378790028, "grad_norm": 0.94140625, "learning_rate": 1.9547971566173025e-05, "loss": 0.3576, "step": 2584 }, { "epoch": 0.19563880611134762, "grad_norm": 0.921875, "learning_rate": 1.9547617440240355e-05, "loss": 0.3802, "step": 2585 }, { "epoch": 0.19571448843479494, "grad_norm": 0.87109375, "learning_rate": 1.9547263178858346e-05, "loss": 0.329, "step": 2586 }, { "epoch": 0.1957901707582423, "grad_norm": 0.87890625, "learning_rate": 1.9546908782032024e-05, "loss": 0.3255, "step": 2587 }, { "epoch": 0.1958658530816896, "grad_norm": 0.88671875, "learning_rate": 1.9546554249766422e-05, "loss": 0.3391, "step": 2588 }, { "epoch": 0.19594153540513692, "grad_norm": 0.8203125, "learning_rate": 1.9546199582066564e-05, "loss": 0.3091, "step": 2589 }, { "epoch": 0.19601721772858427, "grad_norm": 0.765625, "learning_rate": 1.9545844778937484e-05, "loss": 0.2647, "step": 2590 }, { "epoch": 0.1960929000520316, "grad_norm": 0.8203125, "learning_rate": 1.9545489840384214e-05, "loss": 0.2956, "step": 2591 }, { "epoch": 0.19616858237547893, "grad_norm": 0.890625, "learning_rate": 1.9545134766411792e-05, "loss": 0.3383, "step": 2592 }, { "epoch": 0.19624426469892625, "grad_norm": 0.79296875, "learning_rate": 1.954477955702525e-05, "loss": 0.2926, "step": 2593 }, { "epoch": 0.1963199470223736, "grad_norm": 0.8046875, "learning_rate": 1.9544424212229632e-05, "loss": 0.3104, "step": 2594 }, { "epoch": 0.19639562934582092, "grad_norm": 0.859375, "learning_rate": 1.9544068732029977e-05, "loss": 0.3223, "step": 2595 }, { "epoch": 0.19647131166926823, "grad_norm": 0.9296875, "learning_rate": 1.954371311643133e-05, "loss": 0.3824, "step": 2596 }, { "epoch": 0.19654699399271558, "grad_norm": 0.9375, "learning_rate": 1.9543357365438734e-05, "loss": 0.3471, "step": 2597 }, { "epoch": 0.1966226763161629, "grad_norm": 1.0078125, "learning_rate": 1.954300147905724e-05, "loss": 0.3817, "step": 2598 }, { "epoch": 0.19669835863961024, "grad_norm": 0.84375, "learning_rate": 1.954264545729189e-05, "loss": 0.3025, "step": 2599 }, { "epoch": 0.19677404096305756, "grad_norm": 0.83984375, "learning_rate": 1.9542289300147735e-05, "loss": 0.3107, "step": 2600 }, { "epoch": 0.1968497232865049, "grad_norm": 0.8203125, "learning_rate": 1.9541933007629836e-05, "loss": 0.3077, "step": 2601 }, { "epoch": 0.19692540560995223, "grad_norm": 0.90625, "learning_rate": 1.954157657974324e-05, "loss": 0.3553, "step": 2602 }, { "epoch": 0.19700108793339954, "grad_norm": 0.86328125, "learning_rate": 1.9541220016493012e-05, "loss": 0.3579, "step": 2603 }, { "epoch": 0.1970767702568469, "grad_norm": 0.88671875, "learning_rate": 1.95408633178842e-05, "loss": 0.2861, "step": 2604 }, { "epoch": 0.1971524525802942, "grad_norm": 0.86328125, "learning_rate": 1.954050648392187e-05, "loss": 0.3559, "step": 2605 }, { "epoch": 0.19722813490374155, "grad_norm": 0.8046875, "learning_rate": 1.9540149514611083e-05, "loss": 0.3268, "step": 2606 }, { "epoch": 0.19730381722718887, "grad_norm": 0.8671875, "learning_rate": 1.9539792409956906e-05, "loss": 0.3512, "step": 2607 }, { "epoch": 0.19737949955063622, "grad_norm": 0.87109375, "learning_rate": 1.9539435169964402e-05, "loss": 0.3554, "step": 2608 }, { "epoch": 0.19745518187408354, "grad_norm": 0.890625, "learning_rate": 1.9539077794638638e-05, "loss": 0.332, "step": 2609 }, { "epoch": 0.19753086419753085, "grad_norm": 0.890625, "learning_rate": 1.9538720283984687e-05, "loss": 0.3337, "step": 2610 }, { "epoch": 0.1976065465209782, "grad_norm": 0.8984375, "learning_rate": 1.953836263800762e-05, "loss": 0.4011, "step": 2611 }, { "epoch": 0.19768222884442552, "grad_norm": 0.89453125, "learning_rate": 1.953800485671251e-05, "loss": 0.3559, "step": 2612 }, { "epoch": 0.19775791116787286, "grad_norm": 0.92578125, "learning_rate": 1.9537646940104433e-05, "loss": 0.3957, "step": 2613 }, { "epoch": 0.19783359349132018, "grad_norm": 0.78515625, "learning_rate": 1.953728888818847e-05, "loss": 0.3216, "step": 2614 }, { "epoch": 0.19790927581476753, "grad_norm": 0.796875, "learning_rate": 1.9536930700969693e-05, "loss": 0.3016, "step": 2615 }, { "epoch": 0.19798495813821484, "grad_norm": 0.9453125, "learning_rate": 1.9536572378453188e-05, "loss": 0.3831, "step": 2616 }, { "epoch": 0.19806064046166216, "grad_norm": 0.84765625, "learning_rate": 1.9536213920644044e-05, "loss": 0.3693, "step": 2617 }, { "epoch": 0.1981363227851095, "grad_norm": 0.84765625, "learning_rate": 1.9535855327547336e-05, "loss": 0.3452, "step": 2618 }, { "epoch": 0.19821200510855683, "grad_norm": 0.80859375, "learning_rate": 1.9535496599168158e-05, "loss": 0.3081, "step": 2619 }, { "epoch": 0.19828768743200417, "grad_norm": 0.859375, "learning_rate": 1.9535137735511597e-05, "loss": 0.3555, "step": 2620 }, { "epoch": 0.1983633697554515, "grad_norm": 0.87109375, "learning_rate": 1.9534778736582746e-05, "loss": 0.3538, "step": 2621 }, { "epoch": 0.1984390520788988, "grad_norm": 0.8359375, "learning_rate": 1.9534419602386694e-05, "loss": 0.3272, "step": 2622 }, { "epoch": 0.19851473440234615, "grad_norm": 0.86328125, "learning_rate": 1.9534060332928537e-05, "loss": 0.3652, "step": 2623 }, { "epoch": 0.19859041672579347, "grad_norm": 0.85546875, "learning_rate": 1.9533700928213373e-05, "loss": 0.3381, "step": 2624 }, { "epoch": 0.19866609904924082, "grad_norm": 2.5, "learning_rate": 1.9533341388246307e-05, "loss": 0.4303, "step": 2625 }, { "epoch": 0.19874178137268814, "grad_norm": 0.86328125, "learning_rate": 1.9532981713032432e-05, "loss": 0.3413, "step": 2626 }, { "epoch": 0.19881746369613548, "grad_norm": 0.8125, "learning_rate": 1.9532621902576846e-05, "loss": 0.3184, "step": 2627 }, { "epoch": 0.1988931460195828, "grad_norm": 1.671875, "learning_rate": 1.9532261956884663e-05, "loss": 0.4109, "step": 2628 }, { "epoch": 0.19896882834303012, "grad_norm": 0.8203125, "learning_rate": 1.953190187596099e-05, "loss": 0.3162, "step": 2629 }, { "epoch": 0.19904451066647746, "grad_norm": 0.875, "learning_rate": 1.9531541659810927e-05, "loss": 0.3483, "step": 2630 }, { "epoch": 0.19912019298992478, "grad_norm": 0.9296875, "learning_rate": 1.9531181308439592e-05, "loss": 0.3378, "step": 2631 }, { "epoch": 0.19919587531337213, "grad_norm": 0.828125, "learning_rate": 1.9530820821852092e-05, "loss": 0.3358, "step": 2632 }, { "epoch": 0.19927155763681945, "grad_norm": 0.8203125, "learning_rate": 1.9530460200053542e-05, "loss": 0.3374, "step": 2633 }, { "epoch": 0.1993472399602668, "grad_norm": 0.79296875, "learning_rate": 1.9530099443049062e-05, "loss": 0.2955, "step": 2634 }, { "epoch": 0.1994229222837141, "grad_norm": 0.87890625, "learning_rate": 1.9529738550843765e-05, "loss": 0.3652, "step": 2635 }, { "epoch": 0.19949860460716143, "grad_norm": 0.86328125, "learning_rate": 1.9529377523442777e-05, "loss": 0.3652, "step": 2636 }, { "epoch": 0.19957428693060877, "grad_norm": 0.734375, "learning_rate": 1.952901636085121e-05, "loss": 0.2431, "step": 2637 }, { "epoch": 0.1996499692540561, "grad_norm": 0.87109375, "learning_rate": 1.95286550630742e-05, "loss": 0.3291, "step": 2638 }, { "epoch": 0.19972565157750344, "grad_norm": 0.8828125, "learning_rate": 1.952829363011686e-05, "loss": 0.3656, "step": 2639 }, { "epoch": 0.19980133390095076, "grad_norm": 0.875, "learning_rate": 1.9527932061984327e-05, "loss": 0.3445, "step": 2640 }, { "epoch": 0.1998770162243981, "grad_norm": 3.109375, "learning_rate": 1.9527570358681726e-05, "loss": 0.4393, "step": 2641 }, { "epoch": 0.19995269854784542, "grad_norm": 0.8515625, "learning_rate": 1.9527208520214192e-05, "loss": 0.3503, "step": 2642 }, { "epoch": 0.20002838087129274, "grad_norm": 0.84375, "learning_rate": 1.9526846546586854e-05, "loss": 0.3433, "step": 2643 }, { "epoch": 0.20010406319474008, "grad_norm": 0.80078125, "learning_rate": 1.952648443780485e-05, "loss": 0.3107, "step": 2644 }, { "epoch": 0.20010406319474008, "eval_loss": 0.3552929759025574, "eval_runtime": 83.7012, "eval_samples_per_second": 58.088, "eval_steps_per_second": 58.088, "step": 2644 }, { "epoch": 0.2001797455181874, "grad_norm": 0.83984375, "learning_rate": 1.9526122193873315e-05, "loss": 0.3504, "step": 2645 }, { "epoch": 0.20025542784163475, "grad_norm": 0.80859375, "learning_rate": 1.952575981479739e-05, "loss": 0.3325, "step": 2646 }, { "epoch": 0.20033111016508207, "grad_norm": 0.84765625, "learning_rate": 1.9525397300582214e-05, "loss": 0.3324, "step": 2647 }, { "epoch": 0.2004067924885294, "grad_norm": 0.8359375, "learning_rate": 1.9525034651232933e-05, "loss": 0.3457, "step": 2648 }, { "epoch": 0.20048247481197673, "grad_norm": 0.87890625, "learning_rate": 1.952467186675469e-05, "loss": 0.3437, "step": 2649 }, { "epoch": 0.20055815713542405, "grad_norm": 0.8359375, "learning_rate": 1.9524308947152632e-05, "loss": 0.3169, "step": 2650 }, { "epoch": 0.2006338394588714, "grad_norm": 0.8046875, "learning_rate": 1.9523945892431905e-05, "loss": 0.3294, "step": 2651 }, { "epoch": 0.2007095217823187, "grad_norm": 0.875, "learning_rate": 1.952358270259766e-05, "loss": 0.3564, "step": 2652 }, { "epoch": 0.20078520410576606, "grad_norm": 0.9375, "learning_rate": 1.9523219377655055e-05, "loss": 0.3715, "step": 2653 }, { "epoch": 0.20086088642921338, "grad_norm": 0.81640625, "learning_rate": 1.9522855917609243e-05, "loss": 0.3254, "step": 2654 }, { "epoch": 0.2009365687526607, "grad_norm": 0.890625, "learning_rate": 1.9522492322465375e-05, "loss": 0.3669, "step": 2655 }, { "epoch": 0.20101225107610804, "grad_norm": 0.89453125, "learning_rate": 1.9522128592228612e-05, "loss": 0.3727, "step": 2656 }, { "epoch": 0.20108793339955536, "grad_norm": 4.8125, "learning_rate": 1.9521764726904114e-05, "loss": 0.4355, "step": 2657 }, { "epoch": 0.2011636157230027, "grad_norm": 0.86328125, "learning_rate": 1.9521400726497044e-05, "loss": 0.3144, "step": 2658 }, { "epoch": 0.20123929804645002, "grad_norm": 0.8984375, "learning_rate": 1.9521036591012567e-05, "loss": 0.3609, "step": 2659 }, { "epoch": 0.20131498036989737, "grad_norm": 0.84765625, "learning_rate": 1.9520672320455845e-05, "loss": 0.324, "step": 2660 }, { "epoch": 0.20139066269334469, "grad_norm": 0.89453125, "learning_rate": 1.952030791483205e-05, "loss": 0.3724, "step": 2661 }, { "epoch": 0.201466345016792, "grad_norm": 0.85546875, "learning_rate": 1.9519943374146345e-05, "loss": 0.3338, "step": 2662 }, { "epoch": 0.20154202734023935, "grad_norm": 0.83203125, "learning_rate": 1.9519578698403912e-05, "loss": 0.327, "step": 2663 }, { "epoch": 0.20161770966368667, "grad_norm": 0.875, "learning_rate": 1.9519213887609915e-05, "loss": 0.3598, "step": 2664 }, { "epoch": 0.201693391987134, "grad_norm": 0.875, "learning_rate": 1.951884894176954e-05, "loss": 0.3677, "step": 2665 }, { "epoch": 0.20176907431058133, "grad_norm": 0.8515625, "learning_rate": 1.951848386088795e-05, "loss": 0.3459, "step": 2666 }, { "epoch": 0.20184475663402868, "grad_norm": 0.7578125, "learning_rate": 1.951811864497034e-05, "loss": 0.2793, "step": 2667 }, { "epoch": 0.201920438957476, "grad_norm": 0.87109375, "learning_rate": 1.9517753294021876e-05, "loss": 0.3488, "step": 2668 }, { "epoch": 0.2019961212809233, "grad_norm": 0.91015625, "learning_rate": 1.9517387808047753e-05, "loss": 0.3511, "step": 2669 }, { "epoch": 0.20207180360437066, "grad_norm": 0.8515625, "learning_rate": 1.951702218705315e-05, "loss": 0.3237, "step": 2670 }, { "epoch": 0.20214748592781798, "grad_norm": 0.8046875, "learning_rate": 1.9516656431043254e-05, "loss": 0.3373, "step": 2671 }, { "epoch": 0.20222316825126532, "grad_norm": 0.91796875, "learning_rate": 1.951629054002326e-05, "loss": 0.3325, "step": 2672 }, { "epoch": 0.20229885057471264, "grad_norm": 0.875, "learning_rate": 1.951592451399835e-05, "loss": 0.3649, "step": 2673 }, { "epoch": 0.20237453289816, "grad_norm": 0.890625, "learning_rate": 1.951555835297372e-05, "loss": 0.3574, "step": 2674 }, { "epoch": 0.2024502152216073, "grad_norm": 1.828125, "learning_rate": 1.951519205695457e-05, "loss": 0.4106, "step": 2675 }, { "epoch": 0.20252589754505462, "grad_norm": 7.3125, "learning_rate": 1.951482562594609e-05, "loss": 0.4183, "step": 2676 }, { "epoch": 0.20260157986850197, "grad_norm": 0.953125, "learning_rate": 1.951445905995348e-05, "loss": 0.3598, "step": 2677 }, { "epoch": 0.2026772621919493, "grad_norm": 0.88671875, "learning_rate": 1.951409235898194e-05, "loss": 0.3699, "step": 2678 }, { "epoch": 0.20275294451539663, "grad_norm": 0.8359375, "learning_rate": 1.9513725523036676e-05, "loss": 0.3405, "step": 2679 }, { "epoch": 0.20282862683884395, "grad_norm": 0.8671875, "learning_rate": 1.9513358552122886e-05, "loss": 0.3484, "step": 2680 }, { "epoch": 0.20290430916229127, "grad_norm": 0.8359375, "learning_rate": 1.951299144624578e-05, "loss": 0.2988, "step": 2681 }, { "epoch": 0.20297999148573861, "grad_norm": 4.53125, "learning_rate": 1.9512624205410568e-05, "loss": 0.349, "step": 2682 }, { "epoch": 0.20305567380918593, "grad_norm": 0.89453125, "learning_rate": 1.9512256829622455e-05, "loss": 0.3541, "step": 2683 }, { "epoch": 0.20313135613263328, "grad_norm": 0.86328125, "learning_rate": 1.9511889318886656e-05, "loss": 0.2991, "step": 2684 }, { "epoch": 0.2032070384560806, "grad_norm": 0.90234375, "learning_rate": 1.9511521673208385e-05, "loss": 0.3746, "step": 2685 }, { "epoch": 0.20328272077952794, "grad_norm": 0.91796875, "learning_rate": 1.9511153892592856e-05, "loss": 0.3986, "step": 2686 }, { "epoch": 0.20335840310297526, "grad_norm": 0.93359375, "learning_rate": 1.9510785977045286e-05, "loss": 0.3635, "step": 2687 }, { "epoch": 0.20343408542642258, "grad_norm": 0.84375, "learning_rate": 1.9510417926570898e-05, "loss": 0.3508, "step": 2688 }, { "epoch": 0.20350976774986992, "grad_norm": 0.859375, "learning_rate": 1.9510049741174912e-05, "loss": 0.3332, "step": 2689 }, { "epoch": 0.20358545007331724, "grad_norm": 0.79296875, "learning_rate": 1.950968142086255e-05, "loss": 0.3192, "step": 2690 }, { "epoch": 0.2036611323967646, "grad_norm": 0.80859375, "learning_rate": 1.9509312965639033e-05, "loss": 0.3199, "step": 2691 }, { "epoch": 0.2037368147202119, "grad_norm": 0.8203125, "learning_rate": 1.9508944375509596e-05, "loss": 0.2966, "step": 2692 }, { "epoch": 0.20381249704365925, "grad_norm": 0.80078125, "learning_rate": 1.950857565047947e-05, "loss": 0.2825, "step": 2693 }, { "epoch": 0.20388817936710657, "grad_norm": 0.890625, "learning_rate": 1.9508206790553875e-05, "loss": 0.3665, "step": 2694 }, { "epoch": 0.2039638616905539, "grad_norm": 0.8828125, "learning_rate": 1.9507837795738053e-05, "loss": 0.3618, "step": 2695 }, { "epoch": 0.20403954401400123, "grad_norm": 23.0, "learning_rate": 1.950746866603723e-05, "loss": 0.43, "step": 2696 }, { "epoch": 0.20411522633744855, "grad_norm": 0.953125, "learning_rate": 1.9507099401456658e-05, "loss": 0.3388, "step": 2697 }, { "epoch": 0.2041909086608959, "grad_norm": 0.87109375, "learning_rate": 1.9506730002001562e-05, "loss": 0.3597, "step": 2698 }, { "epoch": 0.20426659098434322, "grad_norm": 0.82421875, "learning_rate": 1.9506360467677184e-05, "loss": 0.3031, "step": 2699 }, { "epoch": 0.20434227330779056, "grad_norm": 0.83984375, "learning_rate": 1.9505990798488773e-05, "loss": 0.3436, "step": 2700 }, { "epoch": 0.20441795563123788, "grad_norm": 0.8359375, "learning_rate": 1.950562099444157e-05, "loss": 0.3542, "step": 2701 }, { "epoch": 0.2044936379546852, "grad_norm": 1.9921875, "learning_rate": 1.9505251055540818e-05, "loss": 0.4631, "step": 2702 }, { "epoch": 0.20456932027813254, "grad_norm": 0.96875, "learning_rate": 1.950488098179177e-05, "loss": 0.3537, "step": 2703 }, { "epoch": 0.20464500260157986, "grad_norm": 0.8984375, "learning_rate": 1.9504510773199677e-05, "loss": 0.3728, "step": 2704 }, { "epoch": 0.2047206849250272, "grad_norm": 0.87109375, "learning_rate": 1.9504140429769784e-05, "loss": 0.3366, "step": 2705 }, { "epoch": 0.20479636724847453, "grad_norm": 0.8359375, "learning_rate": 1.9503769951507353e-05, "loss": 0.3234, "step": 2706 }, { "epoch": 0.20487204957192187, "grad_norm": 0.92578125, "learning_rate": 1.9503399338417632e-05, "loss": 0.3801, "step": 2707 }, { "epoch": 0.2049477318953692, "grad_norm": 0.91796875, "learning_rate": 1.9503028590505885e-05, "loss": 0.3373, "step": 2708 }, { "epoch": 0.2050234142188165, "grad_norm": 0.796875, "learning_rate": 1.950265770777737e-05, "loss": 0.2803, "step": 2709 }, { "epoch": 0.20509909654226385, "grad_norm": 0.78125, "learning_rate": 1.950228669023735e-05, "loss": 0.2775, "step": 2710 }, { "epoch": 0.20517477886571117, "grad_norm": 0.85546875, "learning_rate": 1.9501915537891083e-05, "loss": 0.3188, "step": 2711 }, { "epoch": 0.20525046118915852, "grad_norm": 1.71875, "learning_rate": 1.950154425074384e-05, "loss": 0.4247, "step": 2712 }, { "epoch": 0.20532614351260584, "grad_norm": 0.8046875, "learning_rate": 1.9501172828800887e-05, "loss": 0.3294, "step": 2713 }, { "epoch": 0.20540182583605315, "grad_norm": 0.83203125, "learning_rate": 1.950080127206749e-05, "loss": 0.3295, "step": 2714 }, { "epoch": 0.2054775081595005, "grad_norm": 0.84765625, "learning_rate": 1.9500429580548928e-05, "loss": 0.34, "step": 2715 }, { "epoch": 0.20555319048294782, "grad_norm": 1.0078125, "learning_rate": 1.9500057754250465e-05, "loss": 0.3232, "step": 2716 }, { "epoch": 0.20562887280639516, "grad_norm": 0.90625, "learning_rate": 1.9499685793177384e-05, "loss": 0.3572, "step": 2717 }, { "epoch": 0.20570455512984248, "grad_norm": 0.82421875, "learning_rate": 1.9499313697334955e-05, "loss": 0.3392, "step": 2718 }, { "epoch": 0.20578023745328983, "grad_norm": 0.90625, "learning_rate": 1.9498941466728462e-05, "loss": 0.3522, "step": 2719 }, { "epoch": 0.20585591977673715, "grad_norm": 0.7890625, "learning_rate": 1.949856910136318e-05, "loss": 0.2953, "step": 2720 }, { "epoch": 0.20593160210018446, "grad_norm": 0.86328125, "learning_rate": 1.9498196601244398e-05, "loss": 0.3228, "step": 2721 }, { "epoch": 0.2060072844236318, "grad_norm": 0.81640625, "learning_rate": 1.9497823966377397e-05, "loss": 0.3073, "step": 2722 }, { "epoch": 0.20608296674707913, "grad_norm": 0.8359375, "learning_rate": 1.9497451196767465e-05, "loss": 0.3365, "step": 2723 }, { "epoch": 0.20615864907052647, "grad_norm": 0.86328125, "learning_rate": 1.949707829241989e-05, "loss": 0.3315, "step": 2724 }, { "epoch": 0.2062343313939738, "grad_norm": 0.8359375, "learning_rate": 1.9496705253339958e-05, "loss": 0.3384, "step": 2725 }, { "epoch": 0.20631001371742114, "grad_norm": 0.91015625, "learning_rate": 1.9496332079532968e-05, "loss": 0.3452, "step": 2726 }, { "epoch": 0.20638569604086845, "grad_norm": 0.7890625, "learning_rate": 1.9495958771004215e-05, "loss": 0.3139, "step": 2727 }, { "epoch": 0.20646137836431577, "grad_norm": 0.82421875, "learning_rate": 1.9495585327758985e-05, "loss": 0.3321, "step": 2728 }, { "epoch": 0.20653706068776312, "grad_norm": 0.90234375, "learning_rate": 1.9495211749802583e-05, "loss": 0.3779, "step": 2729 }, { "epoch": 0.20661274301121044, "grad_norm": 0.89453125, "learning_rate": 1.949483803714031e-05, "loss": 0.4007, "step": 2730 }, { "epoch": 0.20668842533465778, "grad_norm": 2.4375, "learning_rate": 1.9494464189777463e-05, "loss": 0.4518, "step": 2731 }, { "epoch": 0.2067641076581051, "grad_norm": 0.83203125, "learning_rate": 1.9494090207719353e-05, "loss": 0.3282, "step": 2732 }, { "epoch": 0.20683978998155245, "grad_norm": 0.8203125, "learning_rate": 1.9493716090971277e-05, "loss": 0.3097, "step": 2733 }, { "epoch": 0.20691547230499976, "grad_norm": 0.82421875, "learning_rate": 1.949334183953855e-05, "loss": 0.3476, "step": 2734 }, { "epoch": 0.20699115462844708, "grad_norm": 0.9296875, "learning_rate": 1.9492967453426475e-05, "loss": 0.3658, "step": 2735 }, { "epoch": 0.20706683695189443, "grad_norm": 0.90625, "learning_rate": 1.9492592932640367e-05, "loss": 0.3491, "step": 2736 }, { "epoch": 0.20714251927534175, "grad_norm": 0.82421875, "learning_rate": 1.949221827718554e-05, "loss": 0.2957, "step": 2737 }, { "epoch": 0.2072182015987891, "grad_norm": 0.8828125, "learning_rate": 1.9491843487067305e-05, "loss": 0.3583, "step": 2738 }, { "epoch": 0.2072938839222364, "grad_norm": 0.87109375, "learning_rate": 1.9491468562290984e-05, "loss": 0.3528, "step": 2739 }, { "epoch": 0.20736956624568376, "grad_norm": 0.81640625, "learning_rate": 1.949109350286189e-05, "loss": 0.2679, "step": 2740 }, { "epoch": 0.20744524856913107, "grad_norm": 0.91796875, "learning_rate": 1.949071830878535e-05, "loss": 0.4096, "step": 2741 }, { "epoch": 0.2075209308925784, "grad_norm": 0.87109375, "learning_rate": 1.9490342980066685e-05, "loss": 0.343, "step": 2742 }, { "epoch": 0.20759661321602574, "grad_norm": 0.91796875, "learning_rate": 1.9489967516711217e-05, "loss": 0.3962, "step": 2743 }, { "epoch": 0.20767229553947306, "grad_norm": 0.83203125, "learning_rate": 1.9489591918724274e-05, "loss": 0.3504, "step": 2744 }, { "epoch": 0.2077479778629204, "grad_norm": 0.83984375, "learning_rate": 1.9489216186111187e-05, "loss": 0.3293, "step": 2745 }, { "epoch": 0.20782366018636772, "grad_norm": 0.8828125, "learning_rate": 1.948884031887728e-05, "loss": 0.3751, "step": 2746 }, { "epoch": 0.20789934250981504, "grad_norm": 0.83203125, "learning_rate": 1.9488464317027894e-05, "loss": 0.3276, "step": 2747 }, { "epoch": 0.20797502483326238, "grad_norm": 0.82421875, "learning_rate": 1.9488088180568357e-05, "loss": 0.3288, "step": 2748 }, { "epoch": 0.2080507071567097, "grad_norm": 0.84375, "learning_rate": 1.948771190950401e-05, "loss": 0.3506, "step": 2749 }, { "epoch": 0.20812638948015705, "grad_norm": 0.81640625, "learning_rate": 1.9487335503840186e-05, "loss": 0.3503, "step": 2750 }, { "epoch": 0.20820207180360437, "grad_norm": 0.796875, "learning_rate": 1.9486958963582228e-05, "loss": 0.3139, "step": 2751 }, { "epoch": 0.2082777541270517, "grad_norm": 0.83984375, "learning_rate": 1.9486582288735475e-05, "loss": 0.3209, "step": 2752 }, { "epoch": 0.20835343645049903, "grad_norm": 0.78125, "learning_rate": 1.9486205479305272e-05, "loss": 0.2994, "step": 2753 }, { "epoch": 0.20842911877394635, "grad_norm": 0.79296875, "learning_rate": 1.9485828535296968e-05, "loss": 0.3183, "step": 2754 }, { "epoch": 0.2085048010973937, "grad_norm": 0.8359375, "learning_rate": 1.9485451456715907e-05, "loss": 0.3296, "step": 2755 }, { "epoch": 0.208580483420841, "grad_norm": 0.8828125, "learning_rate": 1.9485074243567437e-05, "loss": 0.3491, "step": 2756 }, { "epoch": 0.20865616574428836, "grad_norm": 0.80078125, "learning_rate": 1.9484696895856915e-05, "loss": 0.2951, "step": 2757 }, { "epoch": 0.20873184806773568, "grad_norm": 0.85546875, "learning_rate": 1.9484319413589692e-05, "loss": 0.3356, "step": 2758 }, { "epoch": 0.20880753039118302, "grad_norm": 0.80859375, "learning_rate": 1.948394179677112e-05, "loss": 0.3359, "step": 2759 }, { "epoch": 0.20888321271463034, "grad_norm": 0.8125, "learning_rate": 1.9483564045406562e-05, "loss": 0.3284, "step": 2760 }, { "epoch": 0.20895889503807766, "grad_norm": 0.87890625, "learning_rate": 1.948318615950137e-05, "loss": 0.3597, "step": 2761 }, { "epoch": 0.209034577361525, "grad_norm": 0.8671875, "learning_rate": 1.948280813906091e-05, "loss": 0.3833, "step": 2762 }, { "epoch": 0.20911025968497232, "grad_norm": 0.84765625, "learning_rate": 1.9482429984090543e-05, "loss": 0.3479, "step": 2763 }, { "epoch": 0.20918594200841967, "grad_norm": 0.8359375, "learning_rate": 1.9482051694595637e-05, "loss": 0.3308, "step": 2764 }, { "epoch": 0.20926162433186699, "grad_norm": 0.83984375, "learning_rate": 1.948167327058155e-05, "loss": 0.3179, "step": 2765 }, { "epoch": 0.20933730665531433, "grad_norm": 0.86328125, "learning_rate": 1.9481294712053663e-05, "loss": 0.3557, "step": 2766 }, { "epoch": 0.20941298897876165, "grad_norm": 0.859375, "learning_rate": 1.948091601901734e-05, "loss": 0.337, "step": 2767 }, { "epoch": 0.20948867130220897, "grad_norm": 0.90234375, "learning_rate": 1.9480537191477946e-05, "loss": 0.3975, "step": 2768 }, { "epoch": 0.2095643536256563, "grad_norm": 0.796875, "learning_rate": 1.9480158229440868e-05, "loss": 0.3276, "step": 2769 }, { "epoch": 0.20964003594910363, "grad_norm": 0.77734375, "learning_rate": 1.9479779132911476e-05, "loss": 0.2988, "step": 2770 }, { "epoch": 0.20971571827255098, "grad_norm": 1.9609375, "learning_rate": 1.9479399901895152e-05, "loss": 0.4358, "step": 2771 }, { "epoch": 0.2097914005959983, "grad_norm": 0.84765625, "learning_rate": 1.947902053639727e-05, "loss": 0.3239, "step": 2772 }, { "epoch": 0.2098670829194456, "grad_norm": 0.85546875, "learning_rate": 1.9478641036423215e-05, "loss": 0.3458, "step": 2773 }, { "epoch": 0.20994276524289296, "grad_norm": 0.86328125, "learning_rate": 1.947826140197837e-05, "loss": 0.3527, "step": 2774 }, { "epoch": 0.21001844756634028, "grad_norm": 0.8671875, "learning_rate": 1.9477881633068125e-05, "loss": 0.3936, "step": 2775 }, { "epoch": 0.21009412988978762, "grad_norm": 0.84765625, "learning_rate": 1.9477501729697865e-05, "loss": 0.3341, "step": 2776 }, { "epoch": 0.21016981221323494, "grad_norm": 0.8828125, "learning_rate": 1.9477121691872975e-05, "loss": 0.3709, "step": 2777 }, { "epoch": 0.2102454945366823, "grad_norm": 0.8203125, "learning_rate": 1.9476741519598855e-05, "loss": 0.3259, "step": 2778 }, { "epoch": 0.2103211768601296, "grad_norm": 0.8671875, "learning_rate": 1.947636121288089e-05, "loss": 0.3516, "step": 2779 }, { "epoch": 0.21039685918357692, "grad_norm": 0.8515625, "learning_rate": 1.947598077172448e-05, "loss": 0.2957, "step": 2780 }, { "epoch": 0.21047254150702427, "grad_norm": 0.82421875, "learning_rate": 1.9475600196135023e-05, "loss": 0.3117, "step": 2781 }, { "epoch": 0.2105482238304716, "grad_norm": 0.87890625, "learning_rate": 1.9475219486117916e-05, "loss": 0.3559, "step": 2782 }, { "epoch": 0.21062390615391893, "grad_norm": 0.92578125, "learning_rate": 1.947483864167856e-05, "loss": 0.3443, "step": 2783 }, { "epoch": 0.21069958847736625, "grad_norm": 0.90234375, "learning_rate": 1.9474457662822357e-05, "loss": 0.3761, "step": 2784 }, { "epoch": 0.2107752708008136, "grad_norm": 1.640625, "learning_rate": 1.9474076549554717e-05, "loss": 0.4228, "step": 2785 }, { "epoch": 0.21085095312426091, "grad_norm": 0.796875, "learning_rate": 1.947369530188104e-05, "loss": 0.2995, "step": 2786 }, { "epoch": 0.21092663544770823, "grad_norm": 0.890625, "learning_rate": 1.947331391980674e-05, "loss": 0.3483, "step": 2787 }, { "epoch": 0.21100231777115558, "grad_norm": 0.87109375, "learning_rate": 1.9472932403337224e-05, "loss": 0.3742, "step": 2788 }, { "epoch": 0.2110780000946029, "grad_norm": 0.859375, "learning_rate": 1.9472550752477904e-05, "loss": 0.3219, "step": 2789 }, { "epoch": 0.21115368241805024, "grad_norm": 0.81640625, "learning_rate": 1.94721689672342e-05, "loss": 0.2972, "step": 2790 }, { "epoch": 0.21122936474149756, "grad_norm": 0.83984375, "learning_rate": 1.9471787047611522e-05, "loss": 0.3038, "step": 2791 }, { "epoch": 0.2113050470649449, "grad_norm": 0.83984375, "learning_rate": 1.947140499361529e-05, "loss": 0.335, "step": 2792 }, { "epoch": 0.21138072938839222, "grad_norm": 0.8359375, "learning_rate": 1.9471022805250927e-05, "loss": 0.3017, "step": 2793 }, { "epoch": 0.21145641171183954, "grad_norm": 0.83984375, "learning_rate": 1.947064048252385e-05, "loss": 0.3293, "step": 2794 }, { "epoch": 0.2115320940352869, "grad_norm": 0.859375, "learning_rate": 1.9470258025439487e-05, "loss": 0.305, "step": 2795 }, { "epoch": 0.2116077763587342, "grad_norm": 0.91015625, "learning_rate": 1.9469875434003263e-05, "loss": 0.3707, "step": 2796 }, { "epoch": 0.21168345868218155, "grad_norm": 0.8359375, "learning_rate": 1.9469492708220604e-05, "loss": 0.312, "step": 2797 }, { "epoch": 0.21175914100562887, "grad_norm": 0.84375, "learning_rate": 1.946910984809694e-05, "loss": 0.3129, "step": 2798 }, { "epoch": 0.21183482332907622, "grad_norm": 0.828125, "learning_rate": 1.9468726853637704e-05, "loss": 0.3124, "step": 2799 }, { "epoch": 0.21191050565252353, "grad_norm": 0.80078125, "learning_rate": 1.9468343724848328e-05, "loss": 0.2974, "step": 2800 }, { "epoch": 0.21198618797597085, "grad_norm": 0.890625, "learning_rate": 1.9467960461734252e-05, "loss": 0.3595, "step": 2801 }, { "epoch": 0.2120618702994182, "grad_norm": 0.8984375, "learning_rate": 1.9467577064300904e-05, "loss": 0.3622, "step": 2802 }, { "epoch": 0.21213755262286552, "grad_norm": 0.8828125, "learning_rate": 1.946719353255373e-05, "loss": 0.3608, "step": 2803 }, { "epoch": 0.21221323494631286, "grad_norm": 0.88671875, "learning_rate": 1.9466809866498174e-05, "loss": 0.3654, "step": 2804 }, { "epoch": 0.21228891726976018, "grad_norm": 0.8203125, "learning_rate": 1.9466426066139668e-05, "loss": 0.351, "step": 2805 }, { "epoch": 0.2123645995932075, "grad_norm": 0.875, "learning_rate": 1.9466042131483665e-05, "loss": 0.3478, "step": 2806 }, { "epoch": 0.21244028191665484, "grad_norm": 0.765625, "learning_rate": 1.9465658062535613e-05, "loss": 0.303, "step": 2807 }, { "epoch": 0.21251596424010216, "grad_norm": 0.8046875, "learning_rate": 1.9465273859300957e-05, "loss": 0.3034, "step": 2808 }, { "epoch": 0.2125916465635495, "grad_norm": 0.83203125, "learning_rate": 1.9464889521785147e-05, "loss": 0.3316, "step": 2809 }, { "epoch": 0.21266732888699683, "grad_norm": 0.8203125, "learning_rate": 1.9464505049993636e-05, "loss": 0.303, "step": 2810 }, { "epoch": 0.21274301121044417, "grad_norm": 0.9296875, "learning_rate": 1.946412044393188e-05, "loss": 0.382, "step": 2811 }, { "epoch": 0.2128186935338915, "grad_norm": 0.828125, "learning_rate": 1.9463735703605336e-05, "loss": 0.3161, "step": 2812 }, { "epoch": 0.2128943758573388, "grad_norm": 0.80078125, "learning_rate": 1.9463350829019457e-05, "loss": 0.3021, "step": 2813 }, { "epoch": 0.21297005818078615, "grad_norm": 5.40625, "learning_rate": 1.946296582017971e-05, "loss": 0.504, "step": 2814 }, { "epoch": 0.21304574050423347, "grad_norm": 0.8359375, "learning_rate": 1.9462580677091552e-05, "loss": 0.3307, "step": 2815 }, { "epoch": 0.21312142282768082, "grad_norm": 0.85546875, "learning_rate": 1.946219539976045e-05, "loss": 0.3378, "step": 2816 }, { "epoch": 0.21319710515112814, "grad_norm": 0.80859375, "learning_rate": 1.946180998819187e-05, "loss": 0.3363, "step": 2817 }, { "epoch": 0.21327278747457548, "grad_norm": 0.84765625, "learning_rate": 1.9461424442391277e-05, "loss": 0.3529, "step": 2818 }, { "epoch": 0.2133484697980228, "grad_norm": 0.953125, "learning_rate": 1.9461038762364143e-05, "loss": 0.4024, "step": 2819 }, { "epoch": 0.21342415212147012, "grad_norm": 0.91796875, "learning_rate": 1.9460652948115935e-05, "loss": 0.3875, "step": 2820 }, { "epoch": 0.21349983444491746, "grad_norm": 0.78515625, "learning_rate": 1.9460266999652134e-05, "loss": 0.2989, "step": 2821 }, { "epoch": 0.21357551676836478, "grad_norm": 0.8125, "learning_rate": 1.945988091697821e-05, "loss": 0.3107, "step": 2822 }, { "epoch": 0.21365119909181213, "grad_norm": 0.8671875, "learning_rate": 1.945949470009964e-05, "loss": 0.3692, "step": 2823 }, { "epoch": 0.21372688141525945, "grad_norm": 0.80859375, "learning_rate": 1.94591083490219e-05, "loss": 0.2946, "step": 2824 }, { "epoch": 0.2138025637387068, "grad_norm": 0.921875, "learning_rate": 1.9458721863750486e-05, "loss": 0.3641, "step": 2825 }, { "epoch": 0.2138782460621541, "grad_norm": 0.8203125, "learning_rate": 1.9458335244290864e-05, "loss": 0.2998, "step": 2826 }, { "epoch": 0.21395392838560143, "grad_norm": 0.8515625, "learning_rate": 1.9457948490648527e-05, "loss": 0.3384, "step": 2827 }, { "epoch": 0.21402961070904877, "grad_norm": 0.81640625, "learning_rate": 1.945756160282896e-05, "loss": 0.3342, "step": 2828 }, { "epoch": 0.2141052930324961, "grad_norm": 0.97265625, "learning_rate": 1.945717458083765e-05, "loss": 0.4023, "step": 2829 }, { "epoch": 0.21418097535594344, "grad_norm": 0.84375, "learning_rate": 1.9456787424680095e-05, "loss": 0.3104, "step": 2830 }, { "epoch": 0.21425665767939076, "grad_norm": 0.859375, "learning_rate": 1.9456400134361776e-05, "loss": 0.3843, "step": 2831 }, { "epoch": 0.2143323400028381, "grad_norm": 0.75390625, "learning_rate": 1.94560127098882e-05, "loss": 0.292, "step": 2832 }, { "epoch": 0.21440802232628542, "grad_norm": 1.9453125, "learning_rate": 1.945562515126485e-05, "loss": 0.4858, "step": 2833 }, { "epoch": 0.21448370464973274, "grad_norm": 0.875, "learning_rate": 1.9455237458497234e-05, "loss": 0.3816, "step": 2834 }, { "epoch": 0.21455938697318008, "grad_norm": 0.83203125, "learning_rate": 1.945484963159085e-05, "loss": 0.3189, "step": 2835 }, { "epoch": 0.2146350692966274, "grad_norm": 0.87890625, "learning_rate": 1.9454461670551198e-05, "loss": 0.3753, "step": 2836 }, { "epoch": 0.21471075162007475, "grad_norm": 0.875, "learning_rate": 1.945407357538378e-05, "loss": 0.3785, "step": 2837 }, { "epoch": 0.21478643394352206, "grad_norm": 0.87890625, "learning_rate": 1.945368534609411e-05, "loss": 0.3805, "step": 2838 }, { "epoch": 0.21486211626696938, "grad_norm": 0.8125, "learning_rate": 1.9453296982687684e-05, "loss": 0.35, "step": 2839 }, { "epoch": 0.21493779859041673, "grad_norm": 0.8359375, "learning_rate": 1.945290848517002e-05, "loss": 0.3572, "step": 2840 }, { "epoch": 0.21501348091386405, "grad_norm": 0.87890625, "learning_rate": 1.945251985354663e-05, "loss": 0.3547, "step": 2841 }, { "epoch": 0.2150891632373114, "grad_norm": 0.85546875, "learning_rate": 1.9452131087823023e-05, "loss": 0.347, "step": 2842 }, { "epoch": 0.2151648455607587, "grad_norm": 0.80859375, "learning_rate": 1.9451742188004716e-05, "loss": 0.2777, "step": 2843 }, { "epoch": 0.21524052788420606, "grad_norm": 0.85546875, "learning_rate": 1.9451353154097224e-05, "loss": 0.3432, "step": 2844 }, { "epoch": 0.21531621020765337, "grad_norm": 0.78515625, "learning_rate": 1.9450963986106073e-05, "loss": 0.2817, "step": 2845 }, { "epoch": 0.2153918925311007, "grad_norm": 0.87109375, "learning_rate": 1.945057468403678e-05, "loss": 0.3614, "step": 2846 }, { "epoch": 0.21546757485454804, "grad_norm": 0.8203125, "learning_rate": 1.9450185247894862e-05, "loss": 0.3192, "step": 2847 }, { "epoch": 0.21554325717799536, "grad_norm": 1.765625, "learning_rate": 1.944979567768585e-05, "loss": 0.4436, "step": 2848 }, { "epoch": 0.2156189395014427, "grad_norm": 0.9140625, "learning_rate": 1.9449405973415275e-05, "loss": 0.3315, "step": 2849 }, { "epoch": 0.21569462182489002, "grad_norm": 0.78515625, "learning_rate": 1.9449016135088657e-05, "loss": 0.3222, "step": 2850 }, { "epoch": 0.21577030414833737, "grad_norm": 0.84765625, "learning_rate": 1.944862616271153e-05, "loss": 0.3548, "step": 2851 }, { "epoch": 0.21584598647178468, "grad_norm": 0.90625, "learning_rate": 1.9448236056289425e-05, "loss": 0.3592, "step": 2852 }, { "epoch": 0.215921668795232, "grad_norm": 0.875, "learning_rate": 1.944784581582788e-05, "loss": 0.3471, "step": 2853 }, { "epoch": 0.21599735111867935, "grad_norm": 0.796875, "learning_rate": 1.9447455441332433e-05, "loss": 0.3217, "step": 2854 }, { "epoch": 0.21607303344212667, "grad_norm": 0.8984375, "learning_rate": 1.9447064932808613e-05, "loss": 0.3613, "step": 2855 }, { "epoch": 0.216148715765574, "grad_norm": 0.86328125, "learning_rate": 1.9446674290261965e-05, "loss": 0.3202, "step": 2856 }, { "epoch": 0.21622439808902133, "grad_norm": 0.78515625, "learning_rate": 1.9446283513698033e-05, "loss": 0.3103, "step": 2857 }, { "epoch": 0.21630008041246868, "grad_norm": 0.88671875, "learning_rate": 1.9445892603122357e-05, "loss": 0.3874, "step": 2858 }, { "epoch": 0.216375762735916, "grad_norm": 0.83203125, "learning_rate": 1.9445501558540486e-05, "loss": 0.3196, "step": 2859 }, { "epoch": 0.2164514450593633, "grad_norm": 0.88671875, "learning_rate": 1.944511037995797e-05, "loss": 0.3536, "step": 2860 }, { "epoch": 0.21652712738281066, "grad_norm": 0.796875, "learning_rate": 1.944471906738035e-05, "loss": 0.3316, "step": 2861 }, { "epoch": 0.21660280970625798, "grad_norm": 0.87890625, "learning_rate": 1.944432762081318e-05, "loss": 0.3852, "step": 2862 }, { "epoch": 0.21667849202970532, "grad_norm": 0.8203125, "learning_rate": 1.944393604026202e-05, "loss": 0.3189, "step": 2863 }, { "epoch": 0.21675417435315264, "grad_norm": 0.87890625, "learning_rate": 1.944354432573242e-05, "loss": 0.3863, "step": 2864 }, { "epoch": 0.21682985667659996, "grad_norm": 0.83984375, "learning_rate": 1.944315247722994e-05, "loss": 0.3255, "step": 2865 }, { "epoch": 0.2169055390000473, "grad_norm": 0.80078125, "learning_rate": 1.9442760494760137e-05, "loss": 0.3024, "step": 2866 }, { "epoch": 0.21698122132349462, "grad_norm": 0.84765625, "learning_rate": 1.9442368378328568e-05, "loss": 0.3417, "step": 2867 }, { "epoch": 0.21705690364694197, "grad_norm": 0.8046875, "learning_rate": 1.9441976127940802e-05, "loss": 0.355, "step": 2868 }, { "epoch": 0.21713258597038929, "grad_norm": 0.8828125, "learning_rate": 1.9441583743602402e-05, "loss": 0.3825, "step": 2869 }, { "epoch": 0.21720826829383663, "grad_norm": 0.9453125, "learning_rate": 1.9441191225318934e-05, "loss": 0.3859, "step": 2870 }, { "epoch": 0.21728395061728395, "grad_norm": 0.83984375, "learning_rate": 1.9440798573095966e-05, "loss": 0.3451, "step": 2871 }, { "epoch": 0.21735963294073127, "grad_norm": 0.8203125, "learning_rate": 1.944040578693907e-05, "loss": 0.2982, "step": 2872 }, { "epoch": 0.2174353152641786, "grad_norm": 0.82421875, "learning_rate": 1.944001286685382e-05, "loss": 0.319, "step": 2873 }, { "epoch": 0.21751099758762593, "grad_norm": 0.80078125, "learning_rate": 1.9439619812845784e-05, "loss": 0.3063, "step": 2874 }, { "epoch": 0.21758667991107328, "grad_norm": 0.828125, "learning_rate": 1.9439226624920545e-05, "loss": 0.3407, "step": 2875 }, { "epoch": 0.2176623622345206, "grad_norm": 0.80078125, "learning_rate": 1.9438833303083677e-05, "loss": 0.3238, "step": 2876 }, { "epoch": 0.21773804455796794, "grad_norm": 4.21875, "learning_rate": 1.943843984734076e-05, "loss": 0.3447, "step": 2877 }, { "epoch": 0.21781372688141526, "grad_norm": 0.828125, "learning_rate": 1.943804625769738e-05, "loss": 0.3335, "step": 2878 }, { "epoch": 0.21788940920486258, "grad_norm": 0.82421875, "learning_rate": 1.9437652534159116e-05, "loss": 0.326, "step": 2879 }, { "epoch": 0.21796509152830992, "grad_norm": 0.84375, "learning_rate": 1.9437258676731555e-05, "loss": 0.3608, "step": 2880 }, { "epoch": 0.21804077385175724, "grad_norm": 0.84765625, "learning_rate": 1.9436864685420282e-05, "loss": 0.3542, "step": 2881 }, { "epoch": 0.2181164561752046, "grad_norm": 0.78125, "learning_rate": 1.9436470560230892e-05, "loss": 0.2975, "step": 2882 }, { "epoch": 0.2181921384986519, "grad_norm": 0.7734375, "learning_rate": 1.943607630116897e-05, "loss": 0.309, "step": 2883 }, { "epoch": 0.21826782082209925, "grad_norm": 0.875, "learning_rate": 1.943568190824012e-05, "loss": 0.3637, "step": 2884 }, { "epoch": 0.21834350314554657, "grad_norm": 0.875, "learning_rate": 1.9435287381449925e-05, "loss": 0.3572, "step": 2885 }, { "epoch": 0.2184191854689939, "grad_norm": 0.82421875, "learning_rate": 1.943489272080399e-05, "loss": 0.3361, "step": 2886 }, { "epoch": 0.21849486779244123, "grad_norm": 0.83203125, "learning_rate": 1.9434497926307906e-05, "loss": 0.3071, "step": 2887 }, { "epoch": 0.21857055011588855, "grad_norm": 0.84375, "learning_rate": 1.9434102997967283e-05, "loss": 0.3674, "step": 2888 }, { "epoch": 0.2186462324393359, "grad_norm": 0.84375, "learning_rate": 1.9433707935787715e-05, "loss": 0.2999, "step": 2889 }, { "epoch": 0.21872191476278321, "grad_norm": 0.84375, "learning_rate": 1.9433312739774814e-05, "loss": 0.3143, "step": 2890 }, { "epoch": 0.21879759708623056, "grad_norm": 0.81640625, "learning_rate": 1.9432917409934187e-05, "loss": 0.3313, "step": 2891 }, { "epoch": 0.21887327940967788, "grad_norm": 0.85546875, "learning_rate": 1.9432521946271434e-05, "loss": 0.341, "step": 2892 }, { "epoch": 0.2189489617331252, "grad_norm": 0.79296875, "learning_rate": 1.9432126348792173e-05, "loss": 0.3025, "step": 2893 }, { "epoch": 0.21902464405657254, "grad_norm": 0.8125, "learning_rate": 1.9431730617502012e-05, "loss": 0.3094, "step": 2894 }, { "epoch": 0.21910032638001986, "grad_norm": 0.90234375, "learning_rate": 1.9431334752406568e-05, "loss": 0.3456, "step": 2895 }, { "epoch": 0.2191760087034672, "grad_norm": 0.8671875, "learning_rate": 1.9430938753511458e-05, "loss": 0.3698, "step": 2896 }, { "epoch": 0.21925169102691452, "grad_norm": 0.7578125, "learning_rate": 1.9430542620822293e-05, "loss": 0.2786, "step": 2897 }, { "epoch": 0.21932737335036184, "grad_norm": 0.87109375, "learning_rate": 1.9430146354344698e-05, "loss": 0.3789, "step": 2898 }, { "epoch": 0.2194030556738092, "grad_norm": 0.82421875, "learning_rate": 1.9429749954084297e-05, "loss": 0.3098, "step": 2899 }, { "epoch": 0.2194787379972565, "grad_norm": 0.87109375, "learning_rate": 1.9429353420046714e-05, "loss": 0.3424, "step": 2900 }, { "epoch": 0.21955442032070385, "grad_norm": 0.7890625, "learning_rate": 1.9428956752237563e-05, "loss": 0.3176, "step": 2901 }, { "epoch": 0.21963010264415117, "grad_norm": 2.390625, "learning_rate": 1.9428559950662485e-05, "loss": 0.5257, "step": 2902 }, { "epoch": 0.21970578496759852, "grad_norm": 0.8359375, "learning_rate": 1.9428163015327105e-05, "loss": 0.3423, "step": 2903 }, { "epoch": 0.21978146729104583, "grad_norm": 0.84765625, "learning_rate": 1.942776594623705e-05, "loss": 0.3526, "step": 2904 }, { "epoch": 0.21985714961449315, "grad_norm": 0.87890625, "learning_rate": 1.942736874339796e-05, "loss": 0.382, "step": 2905 }, { "epoch": 0.2199328319379405, "grad_norm": 0.82421875, "learning_rate": 1.9426971406815464e-05, "loss": 0.3217, "step": 2906 }, { "epoch": 0.22000851426138782, "grad_norm": 0.80859375, "learning_rate": 1.9426573936495203e-05, "loss": 0.3134, "step": 2907 }, { "epoch": 0.22008419658483516, "grad_norm": 2.1875, "learning_rate": 1.942617633244281e-05, "loss": 0.4572, "step": 2908 }, { "epoch": 0.22015987890828248, "grad_norm": 0.87109375, "learning_rate": 1.9425778594663935e-05, "loss": 0.3651, "step": 2909 }, { "epoch": 0.22023556123172983, "grad_norm": 0.91015625, "learning_rate": 1.9425380723164212e-05, "loss": 0.3591, "step": 2910 }, { "epoch": 0.22031124355517714, "grad_norm": 0.80078125, "learning_rate": 1.9424982717949292e-05, "loss": 0.3167, "step": 2911 }, { "epoch": 0.22038692587862446, "grad_norm": 1.6015625, "learning_rate": 1.9424584579024818e-05, "loss": 0.4549, "step": 2912 }, { "epoch": 0.2204626082020718, "grad_norm": 0.859375, "learning_rate": 1.9424186306396436e-05, "loss": 0.3395, "step": 2913 }, { "epoch": 0.22053829052551913, "grad_norm": 0.828125, "learning_rate": 1.9423787900069803e-05, "loss": 0.3348, "step": 2914 }, { "epoch": 0.22061397284896647, "grad_norm": 1.375, "learning_rate": 1.9423389360050563e-05, "loss": 0.3892, "step": 2915 }, { "epoch": 0.2206896551724138, "grad_norm": 0.86328125, "learning_rate": 1.9422990686344373e-05, "loss": 0.3645, "step": 2916 }, { "epoch": 0.22076533749586114, "grad_norm": 0.8359375, "learning_rate": 1.9422591878956892e-05, "loss": 0.3324, "step": 2917 }, { "epoch": 0.22084101981930845, "grad_norm": 0.890625, "learning_rate": 1.9422192937893775e-05, "loss": 0.3557, "step": 2918 }, { "epoch": 0.22091670214275577, "grad_norm": 0.84375, "learning_rate": 1.942179386316068e-05, "loss": 0.3062, "step": 2919 }, { "epoch": 0.22099238446620312, "grad_norm": 2.234375, "learning_rate": 1.9421394654763275e-05, "loss": 0.3846, "step": 2920 }, { "epoch": 0.22106806678965044, "grad_norm": 0.82421875, "learning_rate": 1.9420995312707216e-05, "loss": 0.3159, "step": 2921 }, { "epoch": 0.22114374911309778, "grad_norm": 0.90234375, "learning_rate": 1.9420595836998173e-05, "loss": 0.3628, "step": 2922 }, { "epoch": 0.2212194314365451, "grad_norm": 0.79296875, "learning_rate": 1.942019622764181e-05, "loss": 0.3312, "step": 2923 }, { "epoch": 0.22129511375999245, "grad_norm": 0.8671875, "learning_rate": 1.94197964846438e-05, "loss": 0.3469, "step": 2924 }, { "epoch": 0.22137079608343976, "grad_norm": 0.8515625, "learning_rate": 1.9419396608009812e-05, "loss": 0.3569, "step": 2925 }, { "epoch": 0.22144647840688708, "grad_norm": 0.84765625, "learning_rate": 1.9418996597745517e-05, "loss": 0.3282, "step": 2926 }, { "epoch": 0.22152216073033443, "grad_norm": 0.80859375, "learning_rate": 1.9418596453856594e-05, "loss": 0.331, "step": 2927 }, { "epoch": 0.22159784305378175, "grad_norm": 0.859375, "learning_rate": 1.9418196176348715e-05, "loss": 0.3459, "step": 2928 }, { "epoch": 0.2216735253772291, "grad_norm": 0.84375, "learning_rate": 1.9417795765227564e-05, "loss": 0.3563, "step": 2929 }, { "epoch": 0.2217492077006764, "grad_norm": 0.87109375, "learning_rate": 1.9417395220498815e-05, "loss": 0.3693, "step": 2930 }, { "epoch": 0.22182489002412373, "grad_norm": 0.89453125, "learning_rate": 1.941699454216816e-05, "loss": 0.3288, "step": 2931 }, { "epoch": 0.22190057234757107, "grad_norm": 0.796875, "learning_rate": 1.941659373024127e-05, "loss": 0.3167, "step": 2932 }, { "epoch": 0.2219762546710184, "grad_norm": 0.96484375, "learning_rate": 1.941619278472384e-05, "loss": 0.3902, "step": 2933 }, { "epoch": 0.22205193699446574, "grad_norm": 0.875, "learning_rate": 1.941579170562156e-05, "loss": 0.3536, "step": 2934 }, { "epoch": 0.22212761931791306, "grad_norm": 0.76953125, "learning_rate": 1.9415390492940114e-05, "loss": 0.2858, "step": 2935 }, { "epoch": 0.2222033016413604, "grad_norm": 0.77734375, "learning_rate": 1.9414989146685194e-05, "loss": 0.2908, "step": 2936 }, { "epoch": 0.22227898396480772, "grad_norm": 0.83203125, "learning_rate": 1.94145876668625e-05, "loss": 0.3242, "step": 2937 }, { "epoch": 0.22235466628825504, "grad_norm": 0.8984375, "learning_rate": 1.9414186053477726e-05, "loss": 0.3886, "step": 2938 }, { "epoch": 0.22243034861170238, "grad_norm": 0.80859375, "learning_rate": 1.941378430653656e-05, "loss": 0.3158, "step": 2939 }, { "epoch": 0.2225060309351497, "grad_norm": 0.796875, "learning_rate": 1.9413382426044718e-05, "loss": 0.3173, "step": 2940 }, { "epoch": 0.22258171325859705, "grad_norm": 0.75390625, "learning_rate": 1.941298041200789e-05, "loss": 0.2701, "step": 2941 }, { "epoch": 0.22265739558204437, "grad_norm": 0.75, "learning_rate": 1.9412578264431778e-05, "loss": 0.2459, "step": 2942 }, { "epoch": 0.2227330779054917, "grad_norm": 0.765625, "learning_rate": 1.9412175983322092e-05, "loss": 0.2812, "step": 2943 }, { "epoch": 0.22280876022893903, "grad_norm": 0.89453125, "learning_rate": 1.941177356868454e-05, "loss": 0.3824, "step": 2944 }, { "epoch": 0.22288444255238635, "grad_norm": 2.28125, "learning_rate": 1.9411371020524827e-05, "loss": 0.3879, "step": 2945 }, { "epoch": 0.2229601248758337, "grad_norm": 1.6953125, "learning_rate": 1.9410968338848666e-05, "loss": 0.3164, "step": 2946 }, { "epoch": 0.223035807199281, "grad_norm": 1.8125, "learning_rate": 1.941056552366177e-05, "loss": 0.4709, "step": 2947 }, { "epoch": 0.22311148952272836, "grad_norm": 0.828125, "learning_rate": 1.941016257496985e-05, "loss": 0.3331, "step": 2948 }, { "epoch": 0.22318717184617567, "grad_norm": 0.86328125, "learning_rate": 1.9409759492778632e-05, "loss": 0.3356, "step": 2949 }, { "epoch": 0.22326285416962302, "grad_norm": 0.87109375, "learning_rate": 1.940935627709382e-05, "loss": 0.3667, "step": 2950 }, { "epoch": 0.22333853649307034, "grad_norm": 0.8359375, "learning_rate": 1.9408952927921147e-05, "loss": 0.3412, "step": 2951 }, { "epoch": 0.22341421881651766, "grad_norm": 0.77734375, "learning_rate": 1.9408549445266328e-05, "loss": 0.2948, "step": 2952 }, { "epoch": 0.223489901139965, "grad_norm": 0.8359375, "learning_rate": 1.940814582913509e-05, "loss": 0.3358, "step": 2953 }, { "epoch": 0.22356558346341232, "grad_norm": 1.0234375, "learning_rate": 1.940774207953316e-05, "loss": 0.3356, "step": 2954 }, { "epoch": 0.22364126578685967, "grad_norm": 0.82421875, "learning_rate": 1.9407338196466267e-05, "loss": 0.3091, "step": 2955 }, { "epoch": 0.22371694811030698, "grad_norm": 0.76171875, "learning_rate": 1.9406934179940134e-05, "loss": 0.2805, "step": 2956 }, { "epoch": 0.2237926304337543, "grad_norm": 1.015625, "learning_rate": 1.9406530029960494e-05, "loss": 0.3896, "step": 2957 }, { "epoch": 0.22386831275720165, "grad_norm": 0.83984375, "learning_rate": 1.940612574653309e-05, "loss": 0.3444, "step": 2958 }, { "epoch": 0.22394399508064897, "grad_norm": 0.8828125, "learning_rate": 1.9405721329663648e-05, "loss": 0.3667, "step": 2959 }, { "epoch": 0.2240196774040963, "grad_norm": 0.79296875, "learning_rate": 1.9405316779357904e-05, "loss": 0.3124, "step": 2960 }, { "epoch": 0.22409535972754363, "grad_norm": 0.890625, "learning_rate": 1.9404912095621608e-05, "loss": 0.3399, "step": 2961 }, { "epoch": 0.22417104205099098, "grad_norm": 0.921875, "learning_rate": 1.9404507278460492e-05, "loss": 0.3637, "step": 2962 }, { "epoch": 0.2242467243744383, "grad_norm": 0.84765625, "learning_rate": 1.94041023278803e-05, "loss": 0.3262, "step": 2963 }, { "epoch": 0.2243224066978856, "grad_norm": 0.87890625, "learning_rate": 1.940369724388678e-05, "loss": 0.3711, "step": 2964 }, { "epoch": 0.22439808902133296, "grad_norm": 0.87109375, "learning_rate": 1.9403292026485676e-05, "loss": 0.3407, "step": 2965 }, { "epoch": 0.22447377134478028, "grad_norm": 0.91015625, "learning_rate": 1.940288667568274e-05, "loss": 0.3464, "step": 2966 }, { "epoch": 0.22454945366822762, "grad_norm": 0.859375, "learning_rate": 1.940248119148372e-05, "loss": 0.3579, "step": 2967 }, { "epoch": 0.22462513599167494, "grad_norm": 0.80859375, "learning_rate": 1.940207557389437e-05, "loss": 0.3119, "step": 2968 }, { "epoch": 0.22470081831512229, "grad_norm": 0.8515625, "learning_rate": 1.9401669822920443e-05, "loss": 0.3279, "step": 2969 }, { "epoch": 0.2247765006385696, "grad_norm": 0.97265625, "learning_rate": 1.9401263938567694e-05, "loss": 0.3342, "step": 2970 }, { "epoch": 0.22485218296201692, "grad_norm": 0.8125, "learning_rate": 1.9400857920841885e-05, "loss": 0.3059, "step": 2971 }, { "epoch": 0.22492786528546427, "grad_norm": 0.8515625, "learning_rate": 1.940045176974877e-05, "loss": 0.3551, "step": 2972 }, { "epoch": 0.22500354760891159, "grad_norm": 0.84375, "learning_rate": 1.9400045485294117e-05, "loss": 0.3185, "step": 2973 }, { "epoch": 0.22507922993235893, "grad_norm": 0.83203125, "learning_rate": 1.939963906748369e-05, "loss": 0.3607, "step": 2974 }, { "epoch": 0.22515491225580625, "grad_norm": 0.80078125, "learning_rate": 1.939923251632325e-05, "loss": 0.2988, "step": 2975 }, { "epoch": 0.2252305945792536, "grad_norm": 0.73828125, "learning_rate": 1.939882583181857e-05, "loss": 0.2861, "step": 2976 }, { "epoch": 0.2253062769027009, "grad_norm": 0.80859375, "learning_rate": 1.9398419013975416e-05, "loss": 0.3372, "step": 2977 }, { "epoch": 0.22538195922614823, "grad_norm": 0.87109375, "learning_rate": 1.9398012062799557e-05, "loss": 0.3148, "step": 2978 }, { "epoch": 0.22545764154959558, "grad_norm": 0.796875, "learning_rate": 1.9397604978296772e-05, "loss": 0.3086, "step": 2979 }, { "epoch": 0.2255333238730429, "grad_norm": 0.82421875, "learning_rate": 1.939719776047283e-05, "loss": 0.3142, "step": 2980 }, { "epoch": 0.22560900619649024, "grad_norm": 2.078125, "learning_rate": 1.939679040933352e-05, "loss": 0.395, "step": 2981 }, { "epoch": 0.22568468851993756, "grad_norm": 0.9296875, "learning_rate": 1.9396382924884606e-05, "loss": 0.4044, "step": 2982 }, { "epoch": 0.2257603708433849, "grad_norm": 0.8359375, "learning_rate": 1.9395975307131873e-05, "loss": 0.3515, "step": 2983 }, { "epoch": 0.22583605316683222, "grad_norm": 0.828125, "learning_rate": 1.9395567556081112e-05, "loss": 0.2972, "step": 2984 }, { "epoch": 0.22591173549027954, "grad_norm": 0.82421875, "learning_rate": 1.9395159671738098e-05, "loss": 0.3364, "step": 2985 }, { "epoch": 0.2259874178137269, "grad_norm": 0.89453125, "learning_rate": 1.9394751654108622e-05, "loss": 0.3558, "step": 2986 }, { "epoch": 0.2260631001371742, "grad_norm": 0.765625, "learning_rate": 1.9394343503198475e-05, "loss": 0.2814, "step": 2987 }, { "epoch": 0.22613878246062155, "grad_norm": 0.78515625, "learning_rate": 1.9393935219013443e-05, "loss": 0.3095, "step": 2988 }, { "epoch": 0.22621446478406887, "grad_norm": 0.80859375, "learning_rate": 1.9393526801559317e-05, "loss": 0.3393, "step": 2989 }, { "epoch": 0.2262901471075162, "grad_norm": 0.796875, "learning_rate": 1.9393118250841897e-05, "loss": 0.2999, "step": 2990 }, { "epoch": 0.22636582943096353, "grad_norm": 0.875, "learning_rate": 1.939270956686697e-05, "loss": 0.354, "step": 2991 }, { "epoch": 0.22644151175441085, "grad_norm": 1.0625, "learning_rate": 1.9392300749640344e-05, "loss": 0.3611, "step": 2992 }, { "epoch": 0.2265171940778582, "grad_norm": 0.83203125, "learning_rate": 1.9391891799167812e-05, "loss": 0.3444, "step": 2993 }, { "epoch": 0.22659287640130552, "grad_norm": 0.859375, "learning_rate": 1.9391482715455178e-05, "loss": 0.3554, "step": 2994 }, { "epoch": 0.22666855872475286, "grad_norm": 0.7890625, "learning_rate": 1.939107349850825e-05, "loss": 0.3063, "step": 2995 }, { "epoch": 0.22674424104820018, "grad_norm": 0.84765625, "learning_rate": 1.9390664148332822e-05, "loss": 0.34, "step": 2996 }, { "epoch": 0.2268199233716475, "grad_norm": 0.91015625, "learning_rate": 1.939025466493471e-05, "loss": 0.3533, "step": 2997 }, { "epoch": 0.22689560569509484, "grad_norm": 0.8828125, "learning_rate": 1.938984504831972e-05, "loss": 0.3537, "step": 2998 }, { "epoch": 0.22697128801854216, "grad_norm": 0.82421875, "learning_rate": 1.938943529849367e-05, "loss": 0.3221, "step": 2999 }, { "epoch": 0.2270469703419895, "grad_norm": 0.90234375, "learning_rate": 1.9389025415462365e-05, "loss": 0.384, "step": 3000 }, { "epoch": 0.22712265266543682, "grad_norm": 0.9140625, "learning_rate": 1.938861539923162e-05, "loss": 0.3534, "step": 3001 }, { "epoch": 0.22719833498888417, "grad_norm": 0.828125, "learning_rate": 1.9388205249807254e-05, "loss": 0.3456, "step": 3002 }, { "epoch": 0.2272740173123315, "grad_norm": 0.85546875, "learning_rate": 1.938779496719509e-05, "loss": 0.3623, "step": 3003 }, { "epoch": 0.2273496996357788, "grad_norm": 0.84375, "learning_rate": 1.9387384551400942e-05, "loss": 0.3412, "step": 3004 }, { "epoch": 0.22742538195922615, "grad_norm": 0.80078125, "learning_rate": 1.938697400243063e-05, "loss": 0.3099, "step": 3005 }, { "epoch": 0.22750106428267347, "grad_norm": 3.65625, "learning_rate": 1.9386563320289992e-05, "loss": 0.5281, "step": 3006 }, { "epoch": 0.22757674660612082, "grad_norm": 0.86328125, "learning_rate": 1.9386152504984837e-05, "loss": 0.3221, "step": 3007 }, { "epoch": 0.22765242892956813, "grad_norm": 0.84765625, "learning_rate": 1.9385741556521008e-05, "loss": 0.3335, "step": 3008 }, { "epoch": 0.22772811125301548, "grad_norm": 0.80859375, "learning_rate": 1.9385330474904327e-05, "loss": 0.331, "step": 3009 }, { "epoch": 0.2278037935764628, "grad_norm": 0.95703125, "learning_rate": 1.9384919260140627e-05, "loss": 0.405, "step": 3010 }, { "epoch": 0.22787947589991012, "grad_norm": 0.7265625, "learning_rate": 1.938450791223574e-05, "loss": 0.2806, "step": 3011 }, { "epoch": 0.22795515822335746, "grad_norm": 0.88671875, "learning_rate": 1.9384096431195507e-05, "loss": 0.3312, "step": 3012 }, { "epoch": 0.22803084054680478, "grad_norm": 0.9765625, "learning_rate": 1.938368481702576e-05, "loss": 0.3591, "step": 3013 }, { "epoch": 0.22810652287025213, "grad_norm": 0.765625, "learning_rate": 1.9383273069732344e-05, "loss": 0.2844, "step": 3014 }, { "epoch": 0.22818220519369944, "grad_norm": 0.86328125, "learning_rate": 1.9382861189321093e-05, "loss": 0.3485, "step": 3015 }, { "epoch": 0.2282578875171468, "grad_norm": 0.78515625, "learning_rate": 1.9382449175797856e-05, "loss": 0.3119, "step": 3016 }, { "epoch": 0.2283335698405941, "grad_norm": 0.87890625, "learning_rate": 1.938203702916848e-05, "loss": 0.342, "step": 3017 }, { "epoch": 0.22840925216404143, "grad_norm": 0.80859375, "learning_rate": 1.9381624749438807e-05, "loss": 0.3159, "step": 3018 }, { "epoch": 0.22848493448748877, "grad_norm": 1.6171875, "learning_rate": 1.9381212336614684e-05, "loss": 0.4216, "step": 3019 }, { "epoch": 0.2285606168109361, "grad_norm": 0.80859375, "learning_rate": 1.938079979070197e-05, "loss": 0.3404, "step": 3020 }, { "epoch": 0.22863629913438344, "grad_norm": 0.86328125, "learning_rate": 1.938038711170651e-05, "loss": 0.3744, "step": 3021 }, { "epoch": 0.22871198145783075, "grad_norm": 0.796875, "learning_rate": 1.9379974299634164e-05, "loss": 0.2862, "step": 3022 }, { "epoch": 0.22878766378127807, "grad_norm": 0.88671875, "learning_rate": 1.9379561354490783e-05, "loss": 0.3999, "step": 3023 }, { "epoch": 0.22886334610472542, "grad_norm": 0.859375, "learning_rate": 1.9379148276282232e-05, "loss": 0.3139, "step": 3024 }, { "epoch": 0.22893902842817274, "grad_norm": 0.86328125, "learning_rate": 1.9378735065014363e-05, "loss": 0.3284, "step": 3025 }, { "epoch": 0.22901471075162008, "grad_norm": 0.84765625, "learning_rate": 1.9378321720693045e-05, "loss": 0.3196, "step": 3026 }, { "epoch": 0.2290903930750674, "grad_norm": 0.84375, "learning_rate": 1.937790824332414e-05, "loss": 0.3596, "step": 3027 }, { "epoch": 0.22916607539851475, "grad_norm": 0.76171875, "learning_rate": 1.9377494632913514e-05, "loss": 0.3137, "step": 3028 }, { "epoch": 0.22924175772196206, "grad_norm": 0.86328125, "learning_rate": 1.937708088946703e-05, "loss": 0.3389, "step": 3029 }, { "epoch": 0.22931744004540938, "grad_norm": 0.81640625, "learning_rate": 1.9376667012990568e-05, "loss": 0.3112, "step": 3030 }, { "epoch": 0.22939312236885673, "grad_norm": 0.828125, "learning_rate": 1.937625300348999e-05, "loss": 0.3355, "step": 3031 }, { "epoch": 0.22946880469230405, "grad_norm": 0.8984375, "learning_rate": 1.937583886097117e-05, "loss": 0.3616, "step": 3032 }, { "epoch": 0.2295444870157514, "grad_norm": 0.89453125, "learning_rate": 1.9375424585439994e-05, "loss": 0.361, "step": 3033 }, { "epoch": 0.2296201693391987, "grad_norm": 0.81640625, "learning_rate": 1.9375010176902327e-05, "loss": 0.3297, "step": 3034 }, { "epoch": 0.22969585166264606, "grad_norm": 0.78125, "learning_rate": 1.9374595635364054e-05, "loss": 0.2809, "step": 3035 }, { "epoch": 0.22977153398609337, "grad_norm": 0.84375, "learning_rate": 1.937418096083105e-05, "loss": 0.3509, "step": 3036 }, { "epoch": 0.2298472163095407, "grad_norm": 0.83203125, "learning_rate": 1.937376615330921e-05, "loss": 0.36, "step": 3037 }, { "epoch": 0.22992289863298804, "grad_norm": 0.83203125, "learning_rate": 1.9373351212804406e-05, "loss": 0.3244, "step": 3038 }, { "epoch": 0.22999858095643536, "grad_norm": 0.85546875, "learning_rate": 1.937293613932253e-05, "loss": 0.3138, "step": 3039 }, { "epoch": 0.2300742632798827, "grad_norm": 0.7890625, "learning_rate": 1.9372520932869473e-05, "loss": 0.3, "step": 3040 }, { "epoch": 0.23014994560333002, "grad_norm": 0.84765625, "learning_rate": 1.937210559345112e-05, "loss": 0.3597, "step": 3041 }, { "epoch": 0.23022562792677737, "grad_norm": 0.8671875, "learning_rate": 1.9371690121073367e-05, "loss": 0.3701, "step": 3042 }, { "epoch": 0.23030131025022468, "grad_norm": 0.91015625, "learning_rate": 1.937127451574211e-05, "loss": 0.4122, "step": 3043 }, { "epoch": 0.230376992573672, "grad_norm": 0.94140625, "learning_rate": 1.937085877746324e-05, "loss": 0.3689, "step": 3044 }, { "epoch": 0.23045267489711935, "grad_norm": 0.78125, "learning_rate": 1.9370442906242656e-05, "loss": 0.2947, "step": 3045 }, { "epoch": 0.23052835722056667, "grad_norm": 4.28125, "learning_rate": 1.937002690208626e-05, "loss": 0.4513, "step": 3046 }, { "epoch": 0.230604039544014, "grad_norm": 0.74609375, "learning_rate": 1.936961076499995e-05, "loss": 0.2834, "step": 3047 }, { "epoch": 0.23067972186746133, "grad_norm": 0.7890625, "learning_rate": 1.9369194494989637e-05, "loss": 0.3012, "step": 3048 }, { "epoch": 0.23075540419090865, "grad_norm": 0.765625, "learning_rate": 1.9368778092061222e-05, "loss": 0.295, "step": 3049 }, { "epoch": 0.230831086514356, "grad_norm": 0.80078125, "learning_rate": 1.9368361556220613e-05, "loss": 0.314, "step": 3050 }, { "epoch": 0.2309067688378033, "grad_norm": 0.77734375, "learning_rate": 1.9367944887473714e-05, "loss": 0.277, "step": 3051 }, { "epoch": 0.23098245116125066, "grad_norm": 0.84375, "learning_rate": 1.9367528085826444e-05, "loss": 0.3636, "step": 3052 }, { "epoch": 0.23105813348469798, "grad_norm": 0.8125, "learning_rate": 1.9367111151284712e-05, "loss": 0.3133, "step": 3053 }, { "epoch": 0.23113381580814532, "grad_norm": 0.75390625, "learning_rate": 1.9366694083854435e-05, "loss": 0.2733, "step": 3054 }, { "epoch": 0.23120949813159264, "grad_norm": 0.79296875, "learning_rate": 1.9366276883541526e-05, "loss": 0.3161, "step": 3055 }, { "epoch": 0.23128518045503996, "grad_norm": 0.76171875, "learning_rate": 1.9365859550351907e-05, "loss": 0.2775, "step": 3056 }, { "epoch": 0.2313608627784873, "grad_norm": 0.74609375, "learning_rate": 1.9365442084291502e-05, "loss": 0.245, "step": 3057 }, { "epoch": 0.23143654510193462, "grad_norm": 0.8828125, "learning_rate": 1.9365024485366227e-05, "loss": 0.3872, "step": 3058 }, { "epoch": 0.23151222742538197, "grad_norm": 0.83203125, "learning_rate": 1.936460675358201e-05, "loss": 0.3497, "step": 3059 }, { "epoch": 0.23158790974882928, "grad_norm": 0.83203125, "learning_rate": 1.936418888894477e-05, "loss": 0.3253, "step": 3060 }, { "epoch": 0.23166359207227663, "grad_norm": 0.8828125, "learning_rate": 1.9363770891460446e-05, "loss": 0.3892, "step": 3061 }, { "epoch": 0.23173927439572395, "grad_norm": 0.84765625, "learning_rate": 1.9363352761134964e-05, "loss": 0.3099, "step": 3062 }, { "epoch": 0.23181495671917127, "grad_norm": 0.8515625, "learning_rate": 1.936293449797425e-05, "loss": 0.3306, "step": 3063 }, { "epoch": 0.2318906390426186, "grad_norm": 0.921875, "learning_rate": 1.9362516101984248e-05, "loss": 0.3576, "step": 3064 }, { "epoch": 0.23196632136606593, "grad_norm": 0.8359375, "learning_rate": 1.9362097573170886e-05, "loss": 0.3507, "step": 3065 }, { "epoch": 0.23204200368951328, "grad_norm": 0.8828125, "learning_rate": 1.9361678911540105e-05, "loss": 0.3539, "step": 3066 }, { "epoch": 0.2321176860129606, "grad_norm": 0.80859375, "learning_rate": 1.936126011709784e-05, "loss": 0.3258, "step": 3067 }, { "epoch": 0.23219336833640794, "grad_norm": 0.99609375, "learning_rate": 1.9360841189850043e-05, "loss": 0.3087, "step": 3068 }, { "epoch": 0.23226905065985526, "grad_norm": 0.9140625, "learning_rate": 1.9360422129802644e-05, "loss": 0.3569, "step": 3069 }, { "epoch": 0.23234473298330258, "grad_norm": 0.8046875, "learning_rate": 1.9360002936961598e-05, "loss": 0.3252, "step": 3070 }, { "epoch": 0.23242041530674992, "grad_norm": 0.85546875, "learning_rate": 1.9359583611332847e-05, "loss": 0.3331, "step": 3071 }, { "epoch": 0.23249609763019724, "grad_norm": 0.8125, "learning_rate": 1.935916415292234e-05, "loss": 0.32, "step": 3072 }, { "epoch": 0.2325717799536446, "grad_norm": 0.8125, "learning_rate": 1.9358744561736027e-05, "loss": 0.3128, "step": 3073 }, { "epoch": 0.2326474622770919, "grad_norm": 0.8515625, "learning_rate": 1.9358324837779864e-05, "loss": 0.3622, "step": 3074 }, { "epoch": 0.23272314460053925, "grad_norm": 0.7734375, "learning_rate": 1.93579049810598e-05, "loss": 0.285, "step": 3075 }, { "epoch": 0.23279882692398657, "grad_norm": 0.8203125, "learning_rate": 1.93574849915818e-05, "loss": 0.3189, "step": 3076 }, { "epoch": 0.2328745092474339, "grad_norm": 0.82421875, "learning_rate": 1.9357064869351814e-05, "loss": 0.3676, "step": 3077 }, { "epoch": 0.23295019157088123, "grad_norm": 0.8203125, "learning_rate": 1.935664461437581e-05, "loss": 0.3194, "step": 3078 }, { "epoch": 0.23302587389432855, "grad_norm": 0.83984375, "learning_rate": 1.9356224226659744e-05, "loss": 0.3332, "step": 3079 }, { "epoch": 0.2331015562177759, "grad_norm": 0.85546875, "learning_rate": 1.935580370620958e-05, "loss": 0.3653, "step": 3080 }, { "epoch": 0.23317723854122321, "grad_norm": 0.8515625, "learning_rate": 1.9355383053031284e-05, "loss": 0.3542, "step": 3081 }, { "epoch": 0.23325292086467053, "grad_norm": 0.83984375, "learning_rate": 1.9354962267130827e-05, "loss": 0.3234, "step": 3082 }, { "epoch": 0.23332860318811788, "grad_norm": 0.7890625, "learning_rate": 1.9354541348514177e-05, "loss": 0.3087, "step": 3083 }, { "epoch": 0.2334042855115652, "grad_norm": 2.09375, "learning_rate": 1.9354120297187303e-05, "loss": 0.3045, "step": 3084 }, { "epoch": 0.23347996783501254, "grad_norm": 0.83203125, "learning_rate": 1.9353699113156183e-05, "loss": 0.3309, "step": 3085 }, { "epoch": 0.23355565015845986, "grad_norm": 0.93359375, "learning_rate": 1.9353277796426788e-05, "loss": 0.3709, "step": 3086 }, { "epoch": 0.2336313324819072, "grad_norm": 1.1640625, "learning_rate": 1.9352856347005097e-05, "loss": 0.3563, "step": 3087 }, { "epoch": 0.23370701480535452, "grad_norm": 0.8046875, "learning_rate": 1.935243476489709e-05, "loss": 0.3322, "step": 3088 }, { "epoch": 0.23378269712880184, "grad_norm": 0.80859375, "learning_rate": 1.9352013050108744e-05, "loss": 0.2979, "step": 3089 }, { "epoch": 0.2338583794522492, "grad_norm": 0.83984375, "learning_rate": 1.9351591202646048e-05, "loss": 0.3693, "step": 3090 }, { "epoch": 0.2339340617756965, "grad_norm": 0.78515625, "learning_rate": 1.935116922251498e-05, "loss": 0.3014, "step": 3091 }, { "epoch": 0.23400974409914385, "grad_norm": 0.84375, "learning_rate": 1.935074710972153e-05, "loss": 0.367, "step": 3092 }, { "epoch": 0.23408542642259117, "grad_norm": 0.91015625, "learning_rate": 1.9350324864271686e-05, "loss": 0.3538, "step": 3093 }, { "epoch": 0.23416110874603852, "grad_norm": 0.8828125, "learning_rate": 1.9349902486171438e-05, "loss": 0.4076, "step": 3094 }, { "epoch": 0.23423679106948583, "grad_norm": 0.8203125, "learning_rate": 1.9349479975426778e-05, "loss": 0.3234, "step": 3095 }, { "epoch": 0.23431247339293315, "grad_norm": 0.88671875, "learning_rate": 1.9349057332043698e-05, "loss": 0.3666, "step": 3096 }, { "epoch": 0.2343881557163805, "grad_norm": 0.87109375, "learning_rate": 1.93486345560282e-05, "loss": 0.3329, "step": 3097 }, { "epoch": 0.23446383803982782, "grad_norm": 0.80078125, "learning_rate": 1.9348211647386278e-05, "loss": 0.3115, "step": 3098 }, { "epoch": 0.23453952036327516, "grad_norm": 0.79296875, "learning_rate": 1.9347788606123928e-05, "loss": 0.319, "step": 3099 }, { "epoch": 0.23461520268672248, "grad_norm": 0.78515625, "learning_rate": 1.9347365432247155e-05, "loss": 0.3279, "step": 3100 }, { "epoch": 0.23469088501016983, "grad_norm": 0.83984375, "learning_rate": 1.9346942125761968e-05, "loss": 0.3492, "step": 3101 }, { "epoch": 0.23476656733361714, "grad_norm": 0.81640625, "learning_rate": 1.9346518686674365e-05, "loss": 0.3261, "step": 3102 }, { "epoch": 0.23484224965706446, "grad_norm": 0.92578125, "learning_rate": 1.9346095114990354e-05, "loss": 0.4003, "step": 3103 }, { "epoch": 0.2349179319805118, "grad_norm": 0.83984375, "learning_rate": 1.9345671410715947e-05, "loss": 0.3538, "step": 3104 }, { "epoch": 0.23499361430395913, "grad_norm": 0.97265625, "learning_rate": 1.9345247573857152e-05, "loss": 0.3317, "step": 3105 }, { "epoch": 0.23506929662740647, "grad_norm": 0.87890625, "learning_rate": 1.934482360441998e-05, "loss": 0.3412, "step": 3106 }, { "epoch": 0.2351449789508538, "grad_norm": 0.84765625, "learning_rate": 1.9344399502410455e-05, "loss": 0.3361, "step": 3107 }, { "epoch": 0.23522066127430113, "grad_norm": 0.83984375, "learning_rate": 1.9343975267834584e-05, "loss": 0.3516, "step": 3108 }, { "epoch": 0.23529634359774845, "grad_norm": 0.90234375, "learning_rate": 1.934355090069839e-05, "loss": 0.372, "step": 3109 }, { "epoch": 0.23537202592119577, "grad_norm": 0.78515625, "learning_rate": 1.9343126401007893e-05, "loss": 0.2884, "step": 3110 }, { "epoch": 0.23544770824464312, "grad_norm": 0.8359375, "learning_rate": 1.9342701768769112e-05, "loss": 0.3323, "step": 3111 }, { "epoch": 0.23552339056809043, "grad_norm": 1.484375, "learning_rate": 1.934227700398808e-05, "loss": 0.3914, "step": 3112 }, { "epoch": 0.23559907289153778, "grad_norm": 0.79296875, "learning_rate": 1.934185210667081e-05, "loss": 0.3395, "step": 3113 }, { "epoch": 0.2356747552149851, "grad_norm": 1.4296875, "learning_rate": 1.934142707682334e-05, "loss": 0.4239, "step": 3114 }, { "epoch": 0.23575043753843242, "grad_norm": 0.79296875, "learning_rate": 1.9341001914451694e-05, "loss": 0.2947, "step": 3115 }, { "epoch": 0.23582611986187976, "grad_norm": 0.7734375, "learning_rate": 1.9340576619561907e-05, "loss": 0.3202, "step": 3116 }, { "epoch": 0.23590180218532708, "grad_norm": 0.89453125, "learning_rate": 1.9340151192160013e-05, "loss": 0.3506, "step": 3117 }, { "epoch": 0.23597748450877443, "grad_norm": 0.76953125, "learning_rate": 1.9339725632252045e-05, "loss": 0.2965, "step": 3118 }, { "epoch": 0.23605316683222174, "grad_norm": 0.86328125, "learning_rate": 1.933929993984404e-05, "loss": 0.3581, "step": 3119 }, { "epoch": 0.2361288491556691, "grad_norm": 0.8984375, "learning_rate": 1.933887411494204e-05, "loss": 0.3763, "step": 3120 }, { "epoch": 0.2362045314791164, "grad_norm": 0.796875, "learning_rate": 1.9338448157552088e-05, "loss": 0.317, "step": 3121 }, { "epoch": 0.23628021380256373, "grad_norm": 0.87109375, "learning_rate": 1.933802206768022e-05, "loss": 0.3272, "step": 3122 }, { "epoch": 0.23635589612601107, "grad_norm": 0.8828125, "learning_rate": 1.9337595845332484e-05, "loss": 0.3631, "step": 3123 }, { "epoch": 0.2364315784494584, "grad_norm": 0.79296875, "learning_rate": 1.933716949051493e-05, "loss": 0.3219, "step": 3124 }, { "epoch": 0.23650726077290574, "grad_norm": 0.79296875, "learning_rate": 1.93367430032336e-05, "loss": 0.2724, "step": 3125 }, { "epoch": 0.23658294309635305, "grad_norm": 0.85546875, "learning_rate": 1.933631638349455e-05, "loss": 0.3378, "step": 3126 }, { "epoch": 0.2366586254198004, "grad_norm": 0.8203125, "learning_rate": 1.933588963130383e-05, "loss": 0.3263, "step": 3127 }, { "epoch": 0.23673430774324772, "grad_norm": 0.984375, "learning_rate": 1.9335462746667496e-05, "loss": 0.3293, "step": 3128 }, { "epoch": 0.23680999006669504, "grad_norm": 0.828125, "learning_rate": 1.93350357295916e-05, "loss": 0.3333, "step": 3129 }, { "epoch": 0.23688567239014238, "grad_norm": 0.80078125, "learning_rate": 1.9334608580082204e-05, "loss": 0.3134, "step": 3130 }, { "epoch": 0.2369613547135897, "grad_norm": 0.83984375, "learning_rate": 1.9334181298145365e-05, "loss": 0.3541, "step": 3131 }, { "epoch": 0.23703703703703705, "grad_norm": 0.82421875, "learning_rate": 1.9333753883787153e-05, "loss": 0.3302, "step": 3132 }, { "epoch": 0.23711271936048436, "grad_norm": 0.7890625, "learning_rate": 1.9333326337013618e-05, "loss": 0.3286, "step": 3133 }, { "epoch": 0.2371884016839317, "grad_norm": 1.734375, "learning_rate": 1.9332898657830836e-05, "loss": 0.423, "step": 3134 }, { "epoch": 0.23726408400737903, "grad_norm": 0.87109375, "learning_rate": 1.9332470846244867e-05, "loss": 0.3761, "step": 3135 }, { "epoch": 0.23733976633082635, "grad_norm": 0.8671875, "learning_rate": 1.9332042902261788e-05, "loss": 0.3596, "step": 3136 }, { "epoch": 0.2374154486542737, "grad_norm": 0.8203125, "learning_rate": 1.9331614825887662e-05, "loss": 0.3097, "step": 3137 }, { "epoch": 0.237491130977721, "grad_norm": 0.90625, "learning_rate": 1.9331186617128572e-05, "loss": 0.3717, "step": 3138 }, { "epoch": 0.23756681330116836, "grad_norm": 0.86328125, "learning_rate": 1.933075827599058e-05, "loss": 0.378, "step": 3139 }, { "epoch": 0.23764249562461567, "grad_norm": 0.859375, "learning_rate": 1.9330329802479776e-05, "loss": 0.3609, "step": 3140 }, { "epoch": 0.237718177948063, "grad_norm": 0.8515625, "learning_rate": 1.9329901196602228e-05, "loss": 0.3314, "step": 3141 }, { "epoch": 0.23779386027151034, "grad_norm": 2.421875, "learning_rate": 1.932947245836402e-05, "loss": 0.4572, "step": 3142 }, { "epoch": 0.23786954259495766, "grad_norm": 2.59375, "learning_rate": 1.9329043587771238e-05, "loss": 0.4678, "step": 3143 }, { "epoch": 0.237945224918405, "grad_norm": 0.8515625, "learning_rate": 1.9328614584829963e-05, "loss": 0.3148, "step": 3144 }, { "epoch": 0.23802090724185232, "grad_norm": 0.87109375, "learning_rate": 1.9328185449546282e-05, "loss": 0.3505, "step": 3145 }, { "epoch": 0.23809658956529967, "grad_norm": 0.7890625, "learning_rate": 1.9327756181926283e-05, "loss": 0.2879, "step": 3146 }, { "epoch": 0.23817227188874698, "grad_norm": 0.89453125, "learning_rate": 1.9327326781976055e-05, "loss": 0.3577, "step": 3147 }, { "epoch": 0.2382479542121943, "grad_norm": 0.88671875, "learning_rate": 1.932689724970169e-05, "loss": 0.3653, "step": 3148 }, { "epoch": 0.23832363653564165, "grad_norm": 0.96484375, "learning_rate": 1.932646758510928e-05, "loss": 0.3562, "step": 3149 }, { "epoch": 0.23839931885908897, "grad_norm": 0.8359375, "learning_rate": 1.9326037788204926e-05, "loss": 0.3183, "step": 3150 }, { "epoch": 0.2384750011825363, "grad_norm": 0.78515625, "learning_rate": 1.9325607858994722e-05, "loss": 0.3132, "step": 3151 }, { "epoch": 0.23855068350598363, "grad_norm": 0.82421875, "learning_rate": 1.9325177797484765e-05, "loss": 0.2812, "step": 3152 }, { "epoch": 0.23862636582943098, "grad_norm": 1.015625, "learning_rate": 1.932474760368116e-05, "loss": 0.3254, "step": 3153 }, { "epoch": 0.2387020481528783, "grad_norm": 0.93359375, "learning_rate": 1.9324317277590007e-05, "loss": 0.3753, "step": 3154 }, { "epoch": 0.2387777304763256, "grad_norm": 0.83984375, "learning_rate": 1.9323886819217413e-05, "loss": 0.349, "step": 3155 }, { "epoch": 0.23885341279977296, "grad_norm": 0.81640625, "learning_rate": 1.9323456228569485e-05, "loss": 0.3313, "step": 3156 }, { "epoch": 0.23892909512322028, "grad_norm": 0.78125, "learning_rate": 1.932302550565233e-05, "loss": 0.2944, "step": 3157 }, { "epoch": 0.23900477744666762, "grad_norm": 0.90625, "learning_rate": 1.932259465047206e-05, "loss": 0.3876, "step": 3158 }, { "epoch": 0.23908045977011494, "grad_norm": 0.80859375, "learning_rate": 1.9322163663034784e-05, "loss": 0.3034, "step": 3159 }, { "epoch": 0.23915614209356229, "grad_norm": 0.796875, "learning_rate": 1.9321732543346624e-05, "loss": 0.3044, "step": 3160 }, { "epoch": 0.2392318244170096, "grad_norm": 0.8984375, "learning_rate": 1.9321301291413686e-05, "loss": 0.4053, "step": 3161 }, { "epoch": 0.23930750674045692, "grad_norm": 0.8828125, "learning_rate": 1.9320869907242097e-05, "loss": 0.3403, "step": 3162 }, { "epoch": 0.23938318906390427, "grad_norm": 0.890625, "learning_rate": 1.9320438390837973e-05, "loss": 0.359, "step": 3163 }, { "epoch": 0.23945887138735159, "grad_norm": 0.88671875, "learning_rate": 1.9320006742207434e-05, "loss": 0.3479, "step": 3164 }, { "epoch": 0.23953455371079893, "grad_norm": 0.87890625, "learning_rate": 1.9319574961356607e-05, "loss": 0.3311, "step": 3165 }, { "epoch": 0.23961023603424625, "grad_norm": 1.1015625, "learning_rate": 1.9319143048291615e-05, "loss": 0.3797, "step": 3166 }, { "epoch": 0.2396859183576936, "grad_norm": 0.9140625, "learning_rate": 1.9318711003018587e-05, "loss": 0.3323, "step": 3167 }, { "epoch": 0.2397616006811409, "grad_norm": 0.76171875, "learning_rate": 1.931827882554365e-05, "loss": 0.2719, "step": 3168 }, { "epoch": 0.23983728300458823, "grad_norm": 0.828125, "learning_rate": 1.9317846515872944e-05, "loss": 0.3351, "step": 3169 }, { "epoch": 0.23991296532803558, "grad_norm": 0.83984375, "learning_rate": 1.9317414074012588e-05, "loss": 0.3029, "step": 3170 }, { "epoch": 0.2399886476514829, "grad_norm": 0.890625, "learning_rate": 1.931698149996873e-05, "loss": 0.3246, "step": 3171 }, { "epoch": 0.24006432997493024, "grad_norm": 0.85546875, "learning_rate": 1.9316548793747495e-05, "loss": 0.2911, "step": 3172 }, { "epoch": 0.24014001229837756, "grad_norm": 2.125, "learning_rate": 1.931611595535503e-05, "loss": 0.4411, "step": 3173 }, { "epoch": 0.24021569462182488, "grad_norm": 0.8359375, "learning_rate": 1.9315682984797475e-05, "loss": 0.3408, "step": 3174 }, { "epoch": 0.24029137694527222, "grad_norm": 0.89453125, "learning_rate": 1.931524988208097e-05, "loss": 0.2863, "step": 3175 }, { "epoch": 0.24036705926871954, "grad_norm": 0.80859375, "learning_rate": 1.931481664721166e-05, "loss": 0.3545, "step": 3176 }, { "epoch": 0.2404427415921669, "grad_norm": 1.4375, "learning_rate": 1.931438328019569e-05, "loss": 0.4624, "step": 3177 }, { "epoch": 0.2405184239156142, "grad_norm": 0.875, "learning_rate": 1.9313949781039207e-05, "loss": 0.3485, "step": 3178 }, { "epoch": 0.24059410623906155, "grad_norm": 0.8984375, "learning_rate": 1.931351614974837e-05, "loss": 0.361, "step": 3179 }, { "epoch": 0.24066978856250887, "grad_norm": 0.84765625, "learning_rate": 1.9313082386329315e-05, "loss": 0.3463, "step": 3180 }, { "epoch": 0.2407454708859562, "grad_norm": 0.85546875, "learning_rate": 1.931264849078821e-05, "loss": 0.3407, "step": 3181 }, { "epoch": 0.24082115320940353, "grad_norm": 0.75, "learning_rate": 1.93122144631312e-05, "loss": 0.2695, "step": 3182 }, { "epoch": 0.24089683553285085, "grad_norm": 0.828125, "learning_rate": 1.9311780303364456e-05, "loss": 0.3064, "step": 3183 }, { "epoch": 0.2409725178562982, "grad_norm": 0.94140625, "learning_rate": 1.9311346011494124e-05, "loss": 0.4062, "step": 3184 }, { "epoch": 0.24104820017974551, "grad_norm": 0.7890625, "learning_rate": 1.9310911587526367e-05, "loss": 0.3129, "step": 3185 }, { "epoch": 0.24112388250319286, "grad_norm": 1.046875, "learning_rate": 1.9310477031467357e-05, "loss": 0.3547, "step": 3186 }, { "epoch": 0.24119956482664018, "grad_norm": 0.81640625, "learning_rate": 1.931004234332325e-05, "loss": 0.3097, "step": 3187 }, { "epoch": 0.2412752471500875, "grad_norm": 0.79296875, "learning_rate": 1.9309607523100218e-05, "loss": 0.287, "step": 3188 }, { "epoch": 0.24135092947353484, "grad_norm": 0.90234375, "learning_rate": 1.9309172570804425e-05, "loss": 0.357, "step": 3189 }, { "epoch": 0.24142661179698216, "grad_norm": 0.83984375, "learning_rate": 1.9308737486442045e-05, "loss": 0.3195, "step": 3190 }, { "epoch": 0.2415022941204295, "grad_norm": 0.765625, "learning_rate": 1.9308302270019248e-05, "loss": 0.3079, "step": 3191 }, { "epoch": 0.24157797644387682, "grad_norm": 0.80078125, "learning_rate": 1.930786692154221e-05, "loss": 0.3102, "step": 3192 }, { "epoch": 0.24165365876732417, "grad_norm": 0.86328125, "learning_rate": 1.930743144101711e-05, "loss": 0.3049, "step": 3193 }, { "epoch": 0.2417293410907715, "grad_norm": 0.83203125, "learning_rate": 1.9306995828450118e-05, "loss": 0.3425, "step": 3194 }, { "epoch": 0.2418050234142188, "grad_norm": 0.73828125, "learning_rate": 1.9306560083847422e-05, "loss": 0.2857, "step": 3195 }, { "epoch": 0.24188070573766615, "grad_norm": 0.8203125, "learning_rate": 1.93061242072152e-05, "loss": 0.3294, "step": 3196 }, { "epoch": 0.24195638806111347, "grad_norm": 0.83203125, "learning_rate": 1.930568819855964e-05, "loss": 0.3312, "step": 3197 }, { "epoch": 0.24203207038456082, "grad_norm": 0.83984375, "learning_rate": 1.9305252057886918e-05, "loss": 0.3557, "step": 3198 }, { "epoch": 0.24210775270800813, "grad_norm": 0.8828125, "learning_rate": 1.9304815785203228e-05, "loss": 0.3302, "step": 3199 }, { "epoch": 0.24218343503145545, "grad_norm": 0.828125, "learning_rate": 1.930437938051476e-05, "loss": 0.3396, "step": 3200 }, { "epoch": 0.2422591173549028, "grad_norm": 0.84765625, "learning_rate": 1.93039428438277e-05, "loss": 0.3627, "step": 3201 }, { "epoch": 0.24233479967835012, "grad_norm": 0.84765625, "learning_rate": 1.930350617514825e-05, "loss": 0.3468, "step": 3202 }, { "epoch": 0.24241048200179746, "grad_norm": 0.828125, "learning_rate": 1.93030693744826e-05, "loss": 0.3517, "step": 3203 }, { "epoch": 0.24248616432524478, "grad_norm": 0.80859375, "learning_rate": 1.930263244183694e-05, "loss": 0.3121, "step": 3204 }, { "epoch": 0.24256184664869213, "grad_norm": 0.8046875, "learning_rate": 1.930219537721748e-05, "loss": 0.3228, "step": 3205 }, { "epoch": 0.24263752897213944, "grad_norm": 0.7734375, "learning_rate": 1.9301758180630414e-05, "loss": 0.2701, "step": 3206 }, { "epoch": 0.24271321129558676, "grad_norm": 6.09375, "learning_rate": 1.9301320852081947e-05, "loss": 0.4426, "step": 3207 }, { "epoch": 0.2427888936190341, "grad_norm": 0.82421875, "learning_rate": 1.930088339157828e-05, "loss": 0.3309, "step": 3208 }, { "epoch": 0.24286457594248143, "grad_norm": 4.03125, "learning_rate": 1.9300445799125623e-05, "loss": 0.4246, "step": 3209 }, { "epoch": 0.24294025826592877, "grad_norm": 0.828125, "learning_rate": 1.930000807473018e-05, "loss": 0.3292, "step": 3210 }, { "epoch": 0.2430159405893761, "grad_norm": 0.94140625, "learning_rate": 1.9299570218398167e-05, "loss": 0.3862, "step": 3211 }, { "epoch": 0.24309162291282344, "grad_norm": 0.8125, "learning_rate": 1.929913223013579e-05, "loss": 0.3126, "step": 3212 }, { "epoch": 0.24316730523627075, "grad_norm": 0.84375, "learning_rate": 1.9298694109949263e-05, "loss": 0.3288, "step": 3213 }, { "epoch": 0.24324298755971807, "grad_norm": 0.81640625, "learning_rate": 1.9298255857844803e-05, "loss": 0.315, "step": 3214 }, { "epoch": 0.24331866988316542, "grad_norm": 2.234375, "learning_rate": 1.9297817473828628e-05, "loss": 0.5038, "step": 3215 }, { "epoch": 0.24339435220661274, "grad_norm": 0.828125, "learning_rate": 1.9297378957906957e-05, "loss": 0.3094, "step": 3216 }, { "epoch": 0.24347003453006008, "grad_norm": 0.84375, "learning_rate": 1.9296940310086013e-05, "loss": 0.313, "step": 3217 }, { "epoch": 0.2435457168535074, "grad_norm": 0.9453125, "learning_rate": 1.929650153037201e-05, "loss": 0.3634, "step": 3218 }, { "epoch": 0.24362139917695474, "grad_norm": 0.79296875, "learning_rate": 1.9296062618771184e-05, "loss": 0.3078, "step": 3219 }, { "epoch": 0.24369708150040206, "grad_norm": 0.82421875, "learning_rate": 1.9295623575289756e-05, "loss": 0.3156, "step": 3220 }, { "epoch": 0.24377276382384938, "grad_norm": 0.8515625, "learning_rate": 1.929518439993396e-05, "loss": 0.3375, "step": 3221 }, { "epoch": 0.24384844614729673, "grad_norm": 0.80859375, "learning_rate": 1.9294745092710016e-05, "loss": 0.337, "step": 3222 }, { "epoch": 0.24392412847074404, "grad_norm": 0.79296875, "learning_rate": 1.9294305653624164e-05, "loss": 0.2876, "step": 3223 }, { "epoch": 0.2439998107941914, "grad_norm": 0.87890625, "learning_rate": 1.929386608268264e-05, "loss": 0.3441, "step": 3224 }, { "epoch": 0.2440754931176387, "grad_norm": 0.890625, "learning_rate": 1.9293426379891677e-05, "loss": 0.3204, "step": 3225 }, { "epoch": 0.24415117544108605, "grad_norm": 0.83203125, "learning_rate": 1.929298654525751e-05, "loss": 0.3204, "step": 3226 }, { "epoch": 0.24422685776453337, "grad_norm": 2.8125, "learning_rate": 1.929254657878638e-05, "loss": 0.5313, "step": 3227 }, { "epoch": 0.2443025400879807, "grad_norm": 0.8125, "learning_rate": 1.9292106480484533e-05, "loss": 0.312, "step": 3228 }, { "epoch": 0.24437822241142804, "grad_norm": 0.73828125, "learning_rate": 1.9291666250358206e-05, "loss": 0.2824, "step": 3229 }, { "epoch": 0.24445390473487535, "grad_norm": 0.7578125, "learning_rate": 1.9291225888413652e-05, "loss": 0.3031, "step": 3230 }, { "epoch": 0.2445295870583227, "grad_norm": 0.828125, "learning_rate": 1.9290785394657113e-05, "loss": 0.3182, "step": 3231 }, { "epoch": 0.24460526938177002, "grad_norm": 0.81640625, "learning_rate": 1.929034476909484e-05, "loss": 0.3458, "step": 3232 }, { "epoch": 0.24468095170521734, "grad_norm": 0.87109375, "learning_rate": 1.9289904011733084e-05, "loss": 0.3613, "step": 3233 }, { "epoch": 0.24475663402866468, "grad_norm": 0.94140625, "learning_rate": 1.92894631225781e-05, "loss": 0.3775, "step": 3234 }, { "epoch": 0.244832316352112, "grad_norm": 0.828125, "learning_rate": 1.9289022101636135e-05, "loss": 0.3191, "step": 3235 }, { "epoch": 0.24490799867555935, "grad_norm": 0.8515625, "learning_rate": 1.9288580948913456e-05, "loss": 0.332, "step": 3236 }, { "epoch": 0.24498368099900666, "grad_norm": 0.8359375, "learning_rate": 1.9288139664416312e-05, "loss": 0.3126, "step": 3237 }, { "epoch": 0.245059363322454, "grad_norm": 0.890625, "learning_rate": 1.9287698248150973e-05, "loss": 0.3711, "step": 3238 }, { "epoch": 0.24513504564590133, "grad_norm": 0.76953125, "learning_rate": 1.9287256700123692e-05, "loss": 0.2845, "step": 3239 }, { "epoch": 0.24521072796934865, "grad_norm": 0.87109375, "learning_rate": 1.9286815020340738e-05, "loss": 0.3449, "step": 3240 }, { "epoch": 0.245286410292796, "grad_norm": 0.84765625, "learning_rate": 1.9286373208808375e-05, "loss": 0.2981, "step": 3241 }, { "epoch": 0.2453620926162433, "grad_norm": 0.95703125, "learning_rate": 1.9285931265532875e-05, "loss": 0.3657, "step": 3242 }, { "epoch": 0.24543777493969066, "grad_norm": 1.015625, "learning_rate": 1.9285489190520502e-05, "loss": 0.3399, "step": 3243 }, { "epoch": 0.24551345726313797, "grad_norm": 0.953125, "learning_rate": 1.9285046983777535e-05, "loss": 0.3154, "step": 3244 }, { "epoch": 0.24558913958658532, "grad_norm": 0.8359375, "learning_rate": 1.9284604645310237e-05, "loss": 0.3185, "step": 3245 }, { "epoch": 0.24566482191003264, "grad_norm": 0.828125, "learning_rate": 1.9284162175124892e-05, "loss": 0.351, "step": 3246 }, { "epoch": 0.24574050423347996, "grad_norm": 0.87109375, "learning_rate": 1.9283719573227773e-05, "loss": 0.3645, "step": 3247 }, { "epoch": 0.2458161865569273, "grad_norm": 1.78125, "learning_rate": 1.9283276839625166e-05, "loss": 0.4033, "step": 3248 }, { "epoch": 0.24589186888037462, "grad_norm": 0.8125, "learning_rate": 1.928283397432334e-05, "loss": 0.3177, "step": 3249 }, { "epoch": 0.24596755120382197, "grad_norm": 0.82421875, "learning_rate": 1.9282390977328586e-05, "loss": 0.3395, "step": 3250 }, { "epoch": 0.24604323352726928, "grad_norm": 0.80859375, "learning_rate": 1.9281947848647186e-05, "loss": 0.3099, "step": 3251 }, { "epoch": 0.24611891585071663, "grad_norm": 0.8203125, "learning_rate": 1.9281504588285428e-05, "loss": 0.3364, "step": 3252 }, { "epoch": 0.24619459817416395, "grad_norm": 0.75390625, "learning_rate": 1.9281061196249603e-05, "loss": 0.2914, "step": 3253 }, { "epoch": 0.24627028049761127, "grad_norm": 0.82421875, "learning_rate": 1.9280617672545993e-05, "loss": 0.3126, "step": 3254 }, { "epoch": 0.2463459628210586, "grad_norm": 0.86328125, "learning_rate": 1.9280174017180895e-05, "loss": 0.3373, "step": 3255 }, { "epoch": 0.24642164514450593, "grad_norm": 0.83984375, "learning_rate": 1.927973023016061e-05, "loss": 0.3495, "step": 3256 }, { "epoch": 0.24649732746795328, "grad_norm": 0.78125, "learning_rate": 1.927928631149142e-05, "loss": 0.2766, "step": 3257 }, { "epoch": 0.2465730097914006, "grad_norm": 0.83203125, "learning_rate": 1.9278842261179635e-05, "loss": 0.3417, "step": 3258 }, { "epoch": 0.24664869211484794, "grad_norm": 0.8828125, "learning_rate": 1.927839807923155e-05, "loss": 0.3518, "step": 3259 }, { "epoch": 0.24672437443829526, "grad_norm": 0.78125, "learning_rate": 1.927795376565346e-05, "loss": 0.2888, "step": 3260 }, { "epoch": 0.24680005676174258, "grad_norm": 0.8203125, "learning_rate": 1.927750932045168e-05, "loss": 0.3278, "step": 3261 }, { "epoch": 0.24687573908518992, "grad_norm": 0.83984375, "learning_rate": 1.9277064743632505e-05, "loss": 0.3397, "step": 3262 }, { "epoch": 0.24695142140863724, "grad_norm": 1.28125, "learning_rate": 1.9276620035202248e-05, "loss": 0.3538, "step": 3263 }, { "epoch": 0.24702710373208459, "grad_norm": 0.80859375, "learning_rate": 1.9276175195167218e-05, "loss": 0.3223, "step": 3264 }, { "epoch": 0.2471027860555319, "grad_norm": 0.8203125, "learning_rate": 1.9275730223533724e-05, "loss": 0.3422, "step": 3265 }, { "epoch": 0.24717846837897922, "grad_norm": 1.78125, "learning_rate": 1.927528512030808e-05, "loss": 0.4583, "step": 3266 }, { "epoch": 0.24725415070242657, "grad_norm": 0.8125, "learning_rate": 1.9274839885496595e-05, "loss": 0.3259, "step": 3267 }, { "epoch": 0.24732983302587389, "grad_norm": 0.84375, "learning_rate": 1.92743945191056e-05, "loss": 0.3552, "step": 3268 }, { "epoch": 0.24740551534932123, "grad_norm": 0.79296875, "learning_rate": 1.9273949021141393e-05, "loss": 0.3001, "step": 3269 }, { "epoch": 0.24748119767276855, "grad_norm": 0.859375, "learning_rate": 1.9273503391610307e-05, "loss": 0.3331, "step": 3270 }, { "epoch": 0.2475568799962159, "grad_norm": 0.87109375, "learning_rate": 1.9273057630518664e-05, "loss": 0.3588, "step": 3271 }, { "epoch": 0.2476325623196632, "grad_norm": 0.84765625, "learning_rate": 1.9272611737872785e-05, "loss": 0.3527, "step": 3272 }, { "epoch": 0.24770824464311053, "grad_norm": 0.8359375, "learning_rate": 1.9272165713678993e-05, "loss": 0.3656, "step": 3273 }, { "epoch": 0.24778392696655788, "grad_norm": 0.7890625, "learning_rate": 1.9271719557943624e-05, "loss": 0.3017, "step": 3274 }, { "epoch": 0.2478596092900052, "grad_norm": 0.8046875, "learning_rate": 1.9271273270673e-05, "loss": 0.3133, "step": 3275 }, { "epoch": 0.24793529161345254, "grad_norm": 0.75, "learning_rate": 1.9270826851873452e-05, "loss": 0.265, "step": 3276 }, { "epoch": 0.24801097393689986, "grad_norm": 0.828125, "learning_rate": 1.9270380301551318e-05, "loss": 0.3403, "step": 3277 }, { "epoch": 0.2480866562603472, "grad_norm": 0.8046875, "learning_rate": 1.926993361971293e-05, "loss": 0.3207, "step": 3278 }, { "epoch": 0.24816233858379452, "grad_norm": 0.875, "learning_rate": 1.9269486806364626e-05, "loss": 0.3843, "step": 3279 }, { "epoch": 0.24823802090724184, "grad_norm": 0.80859375, "learning_rate": 1.926903986151275e-05, "loss": 0.2941, "step": 3280 }, { "epoch": 0.2483137032306892, "grad_norm": 0.8046875, "learning_rate": 1.926859278516363e-05, "loss": 0.308, "step": 3281 }, { "epoch": 0.2483893855541365, "grad_norm": 0.8203125, "learning_rate": 1.926814557732362e-05, "loss": 0.2972, "step": 3282 }, { "epoch": 0.24846506787758385, "grad_norm": 0.80078125, "learning_rate": 1.9267698237999056e-05, "loss": 0.3315, "step": 3283 }, { "epoch": 0.24854075020103117, "grad_norm": 0.83203125, "learning_rate": 1.926725076719629e-05, "loss": 0.3184, "step": 3284 }, { "epoch": 0.24861643252447851, "grad_norm": 0.80859375, "learning_rate": 1.9266803164921673e-05, "loss": 0.337, "step": 3285 }, { "epoch": 0.24869211484792583, "grad_norm": 0.828125, "learning_rate": 1.926635543118155e-05, "loss": 0.3504, "step": 3286 }, { "epoch": 0.24876779717137315, "grad_norm": 2.484375, "learning_rate": 1.926590756598227e-05, "loss": 0.5138, "step": 3287 }, { "epoch": 0.2488434794948205, "grad_norm": 0.9296875, "learning_rate": 1.9265459569330192e-05, "loss": 0.4046, "step": 3288 }, { "epoch": 0.24891916181826781, "grad_norm": 0.828125, "learning_rate": 1.926501144123167e-05, "loss": 0.3116, "step": 3289 }, { "epoch": 0.24899484414171516, "grad_norm": 0.82421875, "learning_rate": 1.926456318169306e-05, "loss": 0.3151, "step": 3290 }, { "epoch": 0.24907052646516248, "grad_norm": 0.83203125, "learning_rate": 1.9264114790720722e-05, "loss": 0.3574, "step": 3291 }, { "epoch": 0.2491462087886098, "grad_norm": 0.859375, "learning_rate": 1.926366626832102e-05, "loss": 0.3711, "step": 3292 }, { "epoch": 0.24922189111205714, "grad_norm": 0.85546875, "learning_rate": 1.9263217614500318e-05, "loss": 0.354, "step": 3293 }, { "epoch": 0.24929757343550446, "grad_norm": 2.734375, "learning_rate": 1.926276882926497e-05, "loss": 0.442, "step": 3294 }, { "epoch": 0.2493732557589518, "grad_norm": 0.8359375, "learning_rate": 1.926231991262136e-05, "loss": 0.3335, "step": 3295 }, { "epoch": 0.24944893808239912, "grad_norm": 0.859375, "learning_rate": 1.926187086457584e-05, "loss": 0.3714, "step": 3296 }, { "epoch": 0.24952462040584647, "grad_norm": 2.046875, "learning_rate": 1.9261421685134792e-05, "loss": 0.4568, "step": 3297 }, { "epoch": 0.2496003027292938, "grad_norm": 0.97265625, "learning_rate": 1.9260972374304582e-05, "loss": 0.3947, "step": 3298 }, { "epoch": 0.2496759850527411, "grad_norm": 0.81640625, "learning_rate": 1.926052293209159e-05, "loss": 0.3267, "step": 3299 }, { "epoch": 0.24975166737618845, "grad_norm": 0.796875, "learning_rate": 1.9260073358502184e-05, "loss": 0.3211, "step": 3300 }, { "epoch": 0.24982734969963577, "grad_norm": 0.76171875, "learning_rate": 1.925962365354275e-05, "loss": 0.2975, "step": 3301 }, { "epoch": 0.24990303202308312, "grad_norm": 0.80078125, "learning_rate": 1.925917381721966e-05, "loss": 0.3394, "step": 3302 }, { "epoch": 0.24997871434653043, "grad_norm": 0.83984375, "learning_rate": 1.9258723849539306e-05, "loss": 0.324, "step": 3303 }, { "epoch": 0.25005439666997775, "grad_norm": 0.8671875, "learning_rate": 1.9258273750508062e-05, "loss": 0.3589, "step": 3304 }, { "epoch": 0.2501300789934251, "grad_norm": 0.93359375, "learning_rate": 1.9257823520132318e-05, "loss": 0.3543, "step": 3305 }, { "epoch": 0.25020576131687244, "grad_norm": 0.8359375, "learning_rate": 1.925737315841846e-05, "loss": 0.3502, "step": 3306 }, { "epoch": 0.25028144364031973, "grad_norm": 0.8515625, "learning_rate": 1.9256922665372876e-05, "loss": 0.3706, "step": 3307 }, { "epoch": 0.2503571259637671, "grad_norm": 0.88671875, "learning_rate": 1.9256472041001962e-05, "loss": 0.3771, "step": 3308 }, { "epoch": 0.2504328082872144, "grad_norm": 0.78125, "learning_rate": 1.9256021285312105e-05, "loss": 0.3163, "step": 3309 }, { "epoch": 0.25050849061066177, "grad_norm": 1.390625, "learning_rate": 1.9255570398309702e-05, "loss": 0.4216, "step": 3310 }, { "epoch": 0.25058417293410906, "grad_norm": 0.8046875, "learning_rate": 1.9255119380001152e-05, "loss": 0.337, "step": 3311 }, { "epoch": 0.2506598552575564, "grad_norm": 0.80859375, "learning_rate": 1.925466823039285e-05, "loss": 0.3233, "step": 3312 }, { "epoch": 0.25073553758100375, "grad_norm": 0.84765625, "learning_rate": 1.9254216949491198e-05, "loss": 0.3342, "step": 3313 }, { "epoch": 0.25081121990445104, "grad_norm": 1.4296875, "learning_rate": 1.9253765537302597e-05, "loss": 0.4322, "step": 3314 }, { "epoch": 0.2508869022278984, "grad_norm": 0.828125, "learning_rate": 1.925331399383345e-05, "loss": 0.3559, "step": 3315 }, { "epoch": 0.25096258455134574, "grad_norm": 0.796875, "learning_rate": 1.925286231909017e-05, "loss": 0.3407, "step": 3316 }, { "epoch": 0.2510382668747931, "grad_norm": 0.8984375, "learning_rate": 1.9252410513079155e-05, "loss": 0.3866, "step": 3317 }, { "epoch": 0.25111394919824037, "grad_norm": 0.8203125, "learning_rate": 1.925195857580682e-05, "loss": 0.3151, "step": 3318 }, { "epoch": 0.2511896315216877, "grad_norm": 0.8359375, "learning_rate": 1.925150650727958e-05, "loss": 0.3251, "step": 3319 }, { "epoch": 0.25126531384513506, "grad_norm": 0.77734375, "learning_rate": 1.925105430750384e-05, "loss": 0.288, "step": 3320 }, { "epoch": 0.25134099616858235, "grad_norm": 0.80078125, "learning_rate": 1.925060197648602e-05, "loss": 0.3385, "step": 3321 }, { "epoch": 0.2514166784920297, "grad_norm": 0.84765625, "learning_rate": 1.9250149514232535e-05, "loss": 0.4011, "step": 3322 }, { "epoch": 0.25149236081547705, "grad_norm": 0.87109375, "learning_rate": 1.9249696920749807e-05, "loss": 0.3623, "step": 3323 }, { "epoch": 0.2515680431389244, "grad_norm": 0.7578125, "learning_rate": 1.9249244196044253e-05, "loss": 0.2909, "step": 3324 }, { "epoch": 0.2516437254623717, "grad_norm": 1.2421875, "learning_rate": 1.9248791340122302e-05, "loss": 0.3876, "step": 3325 }, { "epoch": 0.251719407785819, "grad_norm": 0.8828125, "learning_rate": 1.924833835299037e-05, "loss": 0.3853, "step": 3326 }, { "epoch": 0.2517950901092664, "grad_norm": 0.875, "learning_rate": 1.9247885234654887e-05, "loss": 0.3663, "step": 3327 }, { "epoch": 0.25187077243271366, "grad_norm": 0.8046875, "learning_rate": 1.924743198512229e-05, "loss": 0.3345, "step": 3328 }, { "epoch": 0.251946454756161, "grad_norm": 0.83984375, "learning_rate": 1.9246978604398994e-05, "loss": 0.3516, "step": 3329 }, { "epoch": 0.25202213707960835, "grad_norm": 0.83984375, "learning_rate": 1.924652509249144e-05, "loss": 0.368, "step": 3330 }, { "epoch": 0.2520978194030557, "grad_norm": 0.84765625, "learning_rate": 1.924607144940606e-05, "loss": 0.3667, "step": 3331 }, { "epoch": 0.252173501726503, "grad_norm": 0.83984375, "learning_rate": 1.924561767514929e-05, "loss": 0.3879, "step": 3332 }, { "epoch": 0.25224918404995034, "grad_norm": 0.83984375, "learning_rate": 1.9245163769727566e-05, "loss": 0.328, "step": 3333 }, { "epoch": 0.2523248663733977, "grad_norm": 0.8203125, "learning_rate": 1.9244709733147328e-05, "loss": 0.3377, "step": 3334 }, { "epoch": 0.252400548696845, "grad_norm": 0.828125, "learning_rate": 1.9244255565415023e-05, "loss": 0.3444, "step": 3335 }, { "epoch": 0.2524762310202923, "grad_norm": 0.81640625, "learning_rate": 1.9243801266537084e-05, "loss": 0.34, "step": 3336 }, { "epoch": 0.25255191334373966, "grad_norm": 0.83984375, "learning_rate": 1.9243346836519962e-05, "loss": 0.3455, "step": 3337 }, { "epoch": 0.25262759566718695, "grad_norm": 0.80859375, "learning_rate": 1.9242892275370103e-05, "loss": 0.3538, "step": 3338 }, { "epoch": 0.2527032779906343, "grad_norm": 0.8984375, "learning_rate": 1.924243758309396e-05, "loss": 0.3795, "step": 3339 }, { "epoch": 0.25277896031408165, "grad_norm": 0.83203125, "learning_rate": 1.9241982759697976e-05, "loss": 0.317, "step": 3340 }, { "epoch": 0.252854642637529, "grad_norm": 0.8671875, "learning_rate": 1.9241527805188604e-05, "loss": 0.3531, "step": 3341 }, { "epoch": 0.2529303249609763, "grad_norm": 0.8203125, "learning_rate": 1.9241072719572305e-05, "loss": 0.342, "step": 3342 }, { "epoch": 0.25300600728442363, "grad_norm": 0.984375, "learning_rate": 1.924061750285553e-05, "loss": 0.3056, "step": 3343 }, { "epoch": 0.253081689607871, "grad_norm": 0.80859375, "learning_rate": 1.9240162155044738e-05, "loss": 0.3345, "step": 3344 }, { "epoch": 0.25315737193131826, "grad_norm": 0.8828125, "learning_rate": 1.923970667614639e-05, "loss": 0.366, "step": 3345 }, { "epoch": 0.2532330542547656, "grad_norm": 0.97265625, "learning_rate": 1.9239251066166944e-05, "loss": 0.3753, "step": 3346 }, { "epoch": 0.25330873657821296, "grad_norm": 0.8203125, "learning_rate": 1.9238795325112867e-05, "loss": 0.3443, "step": 3347 }, { "epoch": 0.2533844189016603, "grad_norm": 0.7578125, "learning_rate": 1.923833945299063e-05, "loss": 0.2874, "step": 3348 }, { "epoch": 0.2534601012251076, "grad_norm": 0.79296875, "learning_rate": 1.9237883449806687e-05, "loss": 0.3196, "step": 3349 }, { "epoch": 0.25353578354855494, "grad_norm": 0.82421875, "learning_rate": 1.923742731556752e-05, "loss": 0.3408, "step": 3350 }, { "epoch": 0.2536114658720023, "grad_norm": 0.80078125, "learning_rate": 1.923697105027959e-05, "loss": 0.3347, "step": 3351 }, { "epoch": 0.2536871481954496, "grad_norm": 0.82421875, "learning_rate": 1.9236514653949377e-05, "loss": 0.3345, "step": 3352 }, { "epoch": 0.2537628305188969, "grad_norm": 0.83203125, "learning_rate": 1.9236058126583354e-05, "loss": 0.3365, "step": 3353 }, { "epoch": 0.25383851284234427, "grad_norm": 0.83984375, "learning_rate": 1.9235601468187997e-05, "loss": 0.3394, "step": 3354 }, { "epoch": 0.2539141951657916, "grad_norm": 0.85546875, "learning_rate": 1.9235144678769783e-05, "loss": 0.3513, "step": 3355 }, { "epoch": 0.2539898774892389, "grad_norm": 0.859375, "learning_rate": 1.9234687758335193e-05, "loss": 0.4039, "step": 3356 }, { "epoch": 0.25406555981268625, "grad_norm": 0.84375, "learning_rate": 1.9234230706890707e-05, "loss": 0.3398, "step": 3357 }, { "epoch": 0.2541412421361336, "grad_norm": 0.859375, "learning_rate": 1.923377352444282e-05, "loss": 0.3395, "step": 3358 }, { "epoch": 0.2542169244595809, "grad_norm": 0.87109375, "learning_rate": 1.9233316210998002e-05, "loss": 0.3648, "step": 3359 }, { "epoch": 0.25429260678302823, "grad_norm": 0.796875, "learning_rate": 1.9232858766562756e-05, "loss": 0.3132, "step": 3360 }, { "epoch": 0.2543682891064756, "grad_norm": 0.80859375, "learning_rate": 1.9232401191143557e-05, "loss": 0.3008, "step": 3361 }, { "epoch": 0.2544439714299229, "grad_norm": 0.77734375, "learning_rate": 1.9231943484746905e-05, "loss": 0.2835, "step": 3362 }, { "epoch": 0.2545196537533702, "grad_norm": 0.85546875, "learning_rate": 1.92314856473793e-05, "loss": 0.339, "step": 3363 }, { "epoch": 0.25459533607681756, "grad_norm": 0.90234375, "learning_rate": 1.923102767904722e-05, "loss": 0.3725, "step": 3364 }, { "epoch": 0.2546710184002649, "grad_norm": 0.796875, "learning_rate": 1.9230569579757173e-05, "loss": 0.3264, "step": 3365 }, { "epoch": 0.2547467007237122, "grad_norm": 0.8203125, "learning_rate": 1.923011134951566e-05, "loss": 0.338, "step": 3366 }, { "epoch": 0.25482238304715954, "grad_norm": 0.859375, "learning_rate": 1.9229652988329178e-05, "loss": 0.3641, "step": 3367 }, { "epoch": 0.2548980653706069, "grad_norm": 0.89453125, "learning_rate": 1.9229194496204226e-05, "loss": 0.3548, "step": 3368 }, { "epoch": 0.25497374769405423, "grad_norm": 1.2421875, "learning_rate": 1.9228735873147315e-05, "loss": 0.3667, "step": 3369 }, { "epoch": 0.2550494300175015, "grad_norm": 0.87890625, "learning_rate": 1.922827711916495e-05, "loss": 0.3812, "step": 3370 }, { "epoch": 0.25512511234094887, "grad_norm": 0.84375, "learning_rate": 1.9227818234263633e-05, "loss": 0.3493, "step": 3371 }, { "epoch": 0.2552007946643962, "grad_norm": 0.89453125, "learning_rate": 1.9227359218449885e-05, "loss": 0.3566, "step": 3372 }, { "epoch": 0.2552764769878435, "grad_norm": 0.73828125, "learning_rate": 1.9226900071730206e-05, "loss": 0.2638, "step": 3373 }, { "epoch": 0.25535215931129085, "grad_norm": 0.81640625, "learning_rate": 1.9226440794111117e-05, "loss": 0.3216, "step": 3374 }, { "epoch": 0.2554278416347382, "grad_norm": 0.8359375, "learning_rate": 1.9225981385599132e-05, "loss": 0.3178, "step": 3375 }, { "epoch": 0.25550352395818554, "grad_norm": 0.8125, "learning_rate": 1.9225521846200773e-05, "loss": 0.3607, "step": 3376 }, { "epoch": 0.25557920628163283, "grad_norm": 0.83203125, "learning_rate": 1.9225062175922552e-05, "loss": 0.314, "step": 3377 }, { "epoch": 0.2556548886050802, "grad_norm": 0.84375, "learning_rate": 1.9224602374770992e-05, "loss": 0.3079, "step": 3378 }, { "epoch": 0.2557305709285275, "grad_norm": 0.87890625, "learning_rate": 1.922414244275262e-05, "loss": 0.3796, "step": 3379 }, { "epoch": 0.2558062532519748, "grad_norm": 0.90234375, "learning_rate": 1.9223682379873952e-05, "loss": 0.2865, "step": 3380 }, { "epoch": 0.25588193557542216, "grad_norm": 0.84375, "learning_rate": 1.922322218614153e-05, "loss": 0.3261, "step": 3381 }, { "epoch": 0.2559576178988695, "grad_norm": 0.83984375, "learning_rate": 1.9222761861561868e-05, "loss": 0.3161, "step": 3382 }, { "epoch": 0.25603330022231685, "grad_norm": 0.90234375, "learning_rate": 1.9222301406141504e-05, "loss": 0.3364, "step": 3383 }, { "epoch": 0.25610898254576414, "grad_norm": 0.921875, "learning_rate": 1.922184081988697e-05, "loss": 0.3503, "step": 3384 }, { "epoch": 0.2561846648692115, "grad_norm": 0.82421875, "learning_rate": 1.9221380102804796e-05, "loss": 0.2888, "step": 3385 }, { "epoch": 0.25626034719265883, "grad_norm": 0.91796875, "learning_rate": 1.922091925490152e-05, "loss": 0.3955, "step": 3386 }, { "epoch": 0.2563360295161061, "grad_norm": 0.79296875, "learning_rate": 1.9220458276183683e-05, "loss": 0.2966, "step": 3387 }, { "epoch": 0.25641171183955347, "grad_norm": 0.84765625, "learning_rate": 1.9219997166657826e-05, "loss": 0.326, "step": 3388 }, { "epoch": 0.2564873941630008, "grad_norm": 0.84375, "learning_rate": 1.9219535926330478e-05, "loss": 0.3003, "step": 3389 }, { "epoch": 0.25656307648644816, "grad_norm": 0.859375, "learning_rate": 1.92190745552082e-05, "loss": 0.3815, "step": 3390 }, { "epoch": 0.25663875880989545, "grad_norm": 0.85546875, "learning_rate": 1.9218613053297527e-05, "loss": 0.3373, "step": 3391 }, { "epoch": 0.2567144411333428, "grad_norm": 0.875, "learning_rate": 1.9218151420605006e-05, "loss": 0.3553, "step": 3392 }, { "epoch": 0.25679012345679014, "grad_norm": 0.87109375, "learning_rate": 1.9217689657137187e-05, "loss": 0.2974, "step": 3393 }, { "epoch": 0.25686580578023743, "grad_norm": 0.78125, "learning_rate": 1.9217227762900624e-05, "loss": 0.2921, "step": 3394 }, { "epoch": 0.2569414881036848, "grad_norm": 0.94140625, "learning_rate": 1.921676573790187e-05, "loss": 0.377, "step": 3395 }, { "epoch": 0.2570171704271321, "grad_norm": 0.88671875, "learning_rate": 1.9216303582147475e-05, "loss": 0.3409, "step": 3396 }, { "epoch": 0.25709285275057947, "grad_norm": 0.8203125, "learning_rate": 1.9215841295644e-05, "loss": 0.3405, "step": 3397 }, { "epoch": 0.25716853507402676, "grad_norm": 0.87109375, "learning_rate": 1.9215378878398e-05, "loss": 0.3358, "step": 3398 }, { "epoch": 0.2572442173974741, "grad_norm": 0.82421875, "learning_rate": 1.9214916330416035e-05, "loss": 0.3123, "step": 3399 }, { "epoch": 0.25731989972092145, "grad_norm": 0.85546875, "learning_rate": 1.921445365170467e-05, "loss": 0.3178, "step": 3400 }, { "epoch": 0.25739558204436874, "grad_norm": 0.89453125, "learning_rate": 1.9213990842270466e-05, "loss": 0.354, "step": 3401 }, { "epoch": 0.2574712643678161, "grad_norm": 0.81640625, "learning_rate": 1.921352790211999e-05, "loss": 0.3314, "step": 3402 }, { "epoch": 0.25754694669126343, "grad_norm": 2.0, "learning_rate": 1.9213064831259814e-05, "loss": 0.4013, "step": 3403 }, { "epoch": 0.2576226290147107, "grad_norm": 0.80078125, "learning_rate": 1.9212601629696503e-05, "loss": 0.3125, "step": 3404 }, { "epoch": 0.25769831133815807, "grad_norm": 0.84375, "learning_rate": 1.921213829743662e-05, "loss": 0.3424, "step": 3405 }, { "epoch": 0.2577739936616054, "grad_norm": 0.8671875, "learning_rate": 1.9211674834486756e-05, "loss": 0.3455, "step": 3406 }, { "epoch": 0.25784967598505276, "grad_norm": 0.7734375, "learning_rate": 1.9211211240853473e-05, "loss": 0.2869, "step": 3407 }, { "epoch": 0.25792535830850005, "grad_norm": 0.8203125, "learning_rate": 1.9210747516543352e-05, "loss": 0.3251, "step": 3408 }, { "epoch": 0.2580010406319474, "grad_norm": 0.86328125, "learning_rate": 1.9210283661562972e-05, "loss": 0.3019, "step": 3409 }, { "epoch": 0.25807672295539474, "grad_norm": 0.8984375, "learning_rate": 1.920981967591891e-05, "loss": 0.3625, "step": 3410 }, { "epoch": 0.25815240527884203, "grad_norm": 0.90234375, "learning_rate": 1.9209355559617754e-05, "loss": 0.3231, "step": 3411 }, { "epoch": 0.2582280876022894, "grad_norm": 0.80078125, "learning_rate": 1.9208891312666085e-05, "loss": 0.3114, "step": 3412 }, { "epoch": 0.2583037699257367, "grad_norm": 0.890625, "learning_rate": 1.920842693507049e-05, "loss": 0.3324, "step": 3413 }, { "epoch": 0.25837945224918407, "grad_norm": 0.76171875, "learning_rate": 1.9207962426837557e-05, "loss": 0.3201, "step": 3414 }, { "epoch": 0.25845513457263136, "grad_norm": 0.78125, "learning_rate": 1.9207497787973874e-05, "loss": 0.298, "step": 3415 }, { "epoch": 0.2585308168960787, "grad_norm": 0.83203125, "learning_rate": 1.9207033018486038e-05, "loss": 0.3256, "step": 3416 }, { "epoch": 0.25860649921952605, "grad_norm": 0.828125, "learning_rate": 1.9206568118380633e-05, "loss": 0.3397, "step": 3417 }, { "epoch": 0.25868218154297334, "grad_norm": 0.84375, "learning_rate": 1.920610308766426e-05, "loss": 0.3618, "step": 3418 }, { "epoch": 0.2587578638664207, "grad_norm": 0.7578125, "learning_rate": 1.9205637926343524e-05, "loss": 0.2647, "step": 3419 }, { "epoch": 0.25883354618986804, "grad_norm": 0.79296875, "learning_rate": 1.920517263442501e-05, "loss": 0.3087, "step": 3420 }, { "epoch": 0.2589092285133154, "grad_norm": 0.90234375, "learning_rate": 1.920470721191533e-05, "loss": 0.3513, "step": 3421 }, { "epoch": 0.25898491083676267, "grad_norm": 0.8515625, "learning_rate": 1.9204241658821078e-05, "loss": 0.3379, "step": 3422 }, { "epoch": 0.25906059316021, "grad_norm": 0.78125, "learning_rate": 1.9203775975148868e-05, "loss": 0.3136, "step": 3423 }, { "epoch": 0.25913627548365736, "grad_norm": 0.8046875, "learning_rate": 1.9203310160905296e-05, "loss": 0.3133, "step": 3424 }, { "epoch": 0.25921195780710465, "grad_norm": 0.75390625, "learning_rate": 1.9202844216096982e-05, "loss": 0.3051, "step": 3425 }, { "epoch": 0.259287640130552, "grad_norm": 0.8203125, "learning_rate": 1.9202378140730524e-05, "loss": 0.3493, "step": 3426 }, { "epoch": 0.25936332245399935, "grad_norm": 0.82421875, "learning_rate": 1.9201911934812544e-05, "loss": 0.3361, "step": 3427 }, { "epoch": 0.2594390047774467, "grad_norm": 1.0234375, "learning_rate": 1.9201445598349653e-05, "loss": 0.386, "step": 3428 }, { "epoch": 0.259514687100894, "grad_norm": 0.8515625, "learning_rate": 1.920097913134846e-05, "loss": 0.3646, "step": 3429 }, { "epoch": 0.2595903694243413, "grad_norm": 0.8359375, "learning_rate": 1.92005125338156e-05, "loss": 0.3542, "step": 3430 }, { "epoch": 0.2596660517477887, "grad_norm": 0.8203125, "learning_rate": 1.920004580575767e-05, "loss": 0.3592, "step": 3431 }, { "epoch": 0.25974173407123596, "grad_norm": 0.8359375, "learning_rate": 1.9199578947181306e-05, "loss": 0.3596, "step": 3432 }, { "epoch": 0.2598174163946833, "grad_norm": 0.84765625, "learning_rate": 1.919911195809313e-05, "loss": 0.3434, "step": 3433 }, { "epoch": 0.25989309871813066, "grad_norm": 0.82421875, "learning_rate": 1.919864483849976e-05, "loss": 0.3006, "step": 3434 }, { "epoch": 0.259968781041578, "grad_norm": 0.8984375, "learning_rate": 1.9198177588407834e-05, "loss": 0.3921, "step": 3435 }, { "epoch": 0.2600444633650253, "grad_norm": 1.0078125, "learning_rate": 1.919771020782397e-05, "loss": 0.3751, "step": 3436 }, { "epoch": 0.26012014568847264, "grad_norm": 0.82421875, "learning_rate": 1.9197242696754803e-05, "loss": 0.3162, "step": 3437 }, { "epoch": 0.26019582801192, "grad_norm": 0.81640625, "learning_rate": 1.9196775055206965e-05, "loss": 0.3375, "step": 3438 }, { "epoch": 0.2602715103353673, "grad_norm": 2.5, "learning_rate": 1.9196307283187092e-05, "loss": 0.4579, "step": 3439 }, { "epoch": 0.2603471926588146, "grad_norm": 0.79296875, "learning_rate": 1.9195839380701818e-05, "loss": 0.3193, "step": 3440 }, { "epoch": 0.26042287498226196, "grad_norm": 9.375, "learning_rate": 1.9195371347757782e-05, "loss": 0.3793, "step": 3441 }, { "epoch": 0.2604985573057093, "grad_norm": 0.84375, "learning_rate": 1.9194903184361623e-05, "loss": 0.3094, "step": 3442 }, { "epoch": 0.2605742396291566, "grad_norm": 0.84765625, "learning_rate": 1.9194434890519985e-05, "loss": 0.331, "step": 3443 }, { "epoch": 0.26064992195260395, "grad_norm": 0.859375, "learning_rate": 1.9193966466239508e-05, "loss": 0.3463, "step": 3444 }, { "epoch": 0.2607256042760513, "grad_norm": 0.81640625, "learning_rate": 1.9193497911526842e-05, "loss": 0.3468, "step": 3445 }, { "epoch": 0.2608012865994986, "grad_norm": 0.86328125, "learning_rate": 1.9193029226388627e-05, "loss": 0.3494, "step": 3446 }, { "epoch": 0.26087696892294593, "grad_norm": 0.8359375, "learning_rate": 1.919256041083152e-05, "loss": 0.3368, "step": 3447 }, { "epoch": 0.2609526512463933, "grad_norm": 1.0625, "learning_rate": 1.9192091464862172e-05, "loss": 0.3364, "step": 3448 }, { "epoch": 0.2610283335698406, "grad_norm": 0.80078125, "learning_rate": 1.9191622388487224e-05, "loss": 0.3117, "step": 3449 }, { "epoch": 0.2611040158932879, "grad_norm": 0.7890625, "learning_rate": 1.9191153181713343e-05, "loss": 0.3225, "step": 3450 }, { "epoch": 0.26117969821673526, "grad_norm": 0.8125, "learning_rate": 1.9190683844547186e-05, "loss": 0.3564, "step": 3451 }, { "epoch": 0.2612553805401826, "grad_norm": 0.83203125, "learning_rate": 1.91902143769954e-05, "loss": 0.346, "step": 3452 }, { "epoch": 0.2613310628636299, "grad_norm": 0.73828125, "learning_rate": 1.9189744779064656e-05, "loss": 0.2855, "step": 3453 }, { "epoch": 0.26140674518707724, "grad_norm": 0.91015625, "learning_rate": 1.918927505076161e-05, "loss": 0.3534, "step": 3454 }, { "epoch": 0.2614824275105246, "grad_norm": 0.828125, "learning_rate": 1.918880519209293e-05, "loss": 0.3406, "step": 3455 }, { "epoch": 0.26155810983397193, "grad_norm": 0.90625, "learning_rate": 1.918833520306528e-05, "loss": 0.396, "step": 3456 }, { "epoch": 0.2616337921574192, "grad_norm": 1.8515625, "learning_rate": 1.9187865083685328e-05, "loss": 0.3696, "step": 3457 }, { "epoch": 0.26170947448086657, "grad_norm": 0.8828125, "learning_rate": 1.918739483395974e-05, "loss": 0.3008, "step": 3458 }, { "epoch": 0.2617851568043139, "grad_norm": 0.828125, "learning_rate": 1.918692445389519e-05, "loss": 0.3058, "step": 3459 }, { "epoch": 0.2618608391277612, "grad_norm": 0.828125, "learning_rate": 1.9186453943498353e-05, "loss": 0.3281, "step": 3460 }, { "epoch": 0.26193652145120855, "grad_norm": 0.8046875, "learning_rate": 1.91859833027759e-05, "loss": 0.3408, "step": 3461 }, { "epoch": 0.2620122037746559, "grad_norm": 1.15625, "learning_rate": 1.9185512531734515e-05, "loss": 0.3718, "step": 3462 }, { "epoch": 0.2620878860981032, "grad_norm": 0.84375, "learning_rate": 1.9185041630380868e-05, "loss": 0.324, "step": 3463 }, { "epoch": 0.26216356842155053, "grad_norm": 0.859375, "learning_rate": 1.9184570598721644e-05, "loss": 0.309, "step": 3464 }, { "epoch": 0.2622392507449979, "grad_norm": 0.734375, "learning_rate": 1.918409943676353e-05, "loss": 0.2826, "step": 3465 }, { "epoch": 0.2623149330684452, "grad_norm": 0.875, "learning_rate": 1.9183628144513196e-05, "loss": 0.3595, "step": 3466 }, { "epoch": 0.2623906153918925, "grad_norm": 0.828125, "learning_rate": 1.9183156721977344e-05, "loss": 0.3282, "step": 3467 }, { "epoch": 0.26246629771533986, "grad_norm": 0.90625, "learning_rate": 1.918268516916265e-05, "loss": 0.4316, "step": 3468 }, { "epoch": 0.2625419800387872, "grad_norm": 1.1328125, "learning_rate": 1.9182213486075813e-05, "loss": 0.3609, "step": 3469 }, { "epoch": 0.2626176623622345, "grad_norm": 0.83984375, "learning_rate": 1.9181741672723516e-05, "loss": 0.3769, "step": 3470 }, { "epoch": 0.26269334468568184, "grad_norm": 0.77734375, "learning_rate": 1.9181269729112463e-05, "loss": 0.3299, "step": 3471 }, { "epoch": 0.2627690270091292, "grad_norm": 0.8359375, "learning_rate": 1.918079765524934e-05, "loss": 0.3445, "step": 3472 }, { "epoch": 0.26284470933257653, "grad_norm": 0.77734375, "learning_rate": 1.918032545114085e-05, "loss": 0.2859, "step": 3473 }, { "epoch": 0.2629203916560238, "grad_norm": 0.8359375, "learning_rate": 1.9179853116793684e-05, "loss": 0.346, "step": 3474 }, { "epoch": 0.26299607397947117, "grad_norm": 0.87109375, "learning_rate": 1.9179380652214553e-05, "loss": 0.3517, "step": 3475 }, { "epoch": 0.2630717563029185, "grad_norm": 0.8125, "learning_rate": 1.9178908057410154e-05, "loss": 0.3123, "step": 3476 }, { "epoch": 0.2631474386263658, "grad_norm": 1.5546875, "learning_rate": 1.9178435332387192e-05, "loss": 0.4312, "step": 3477 }, { "epoch": 0.26322312094981315, "grad_norm": 0.8125, "learning_rate": 1.9177962477152373e-05, "loss": 0.3158, "step": 3478 }, { "epoch": 0.2632988032732605, "grad_norm": 0.8828125, "learning_rate": 1.9177489491712408e-05, "loss": 0.3793, "step": 3479 }, { "epoch": 0.26337448559670784, "grad_norm": 0.85546875, "learning_rate": 1.9177016376074006e-05, "loss": 0.3622, "step": 3480 }, { "epoch": 0.26345016792015513, "grad_norm": 0.82421875, "learning_rate": 1.9176543130243875e-05, "loss": 0.3216, "step": 3481 }, { "epoch": 0.2635258502436025, "grad_norm": 0.78125, "learning_rate": 1.9176069754228734e-05, "loss": 0.3028, "step": 3482 }, { "epoch": 0.2636015325670498, "grad_norm": 0.796875, "learning_rate": 1.9175596248035298e-05, "loss": 0.3392, "step": 3483 }, { "epoch": 0.2636772148904971, "grad_norm": 0.86328125, "learning_rate": 1.9175122611670286e-05, "loss": 0.3582, "step": 3484 }, { "epoch": 0.26375289721394446, "grad_norm": 0.88671875, "learning_rate": 1.917464884514041e-05, "loss": 0.3859, "step": 3485 }, { "epoch": 0.2638285795373918, "grad_norm": 0.78125, "learning_rate": 1.91741749484524e-05, "loss": 0.3057, "step": 3486 }, { "epoch": 0.26390426186083915, "grad_norm": 0.8984375, "learning_rate": 1.917370092161297e-05, "loss": 0.3691, "step": 3487 }, { "epoch": 0.26397994418428644, "grad_norm": 1.8359375, "learning_rate": 1.917322676462885e-05, "loss": 0.4142, "step": 3488 }, { "epoch": 0.2640556265077338, "grad_norm": 0.8828125, "learning_rate": 1.917275247750677e-05, "loss": 0.3499, "step": 3489 }, { "epoch": 0.26413130883118113, "grad_norm": 0.91015625, "learning_rate": 1.9172278060253454e-05, "loss": 0.3897, "step": 3490 }, { "epoch": 0.2642069911546284, "grad_norm": 0.85546875, "learning_rate": 1.9171803512875634e-05, "loss": 0.3466, "step": 3491 }, { "epoch": 0.26428267347807577, "grad_norm": 0.8515625, "learning_rate": 1.917132883538004e-05, "loss": 0.336, "step": 3492 }, { "epoch": 0.2643583558015231, "grad_norm": 0.82421875, "learning_rate": 1.917085402777341e-05, "loss": 0.3743, "step": 3493 }, { "epoch": 0.26443403812497046, "grad_norm": 1.1171875, "learning_rate": 1.917037909006248e-05, "loss": 0.3712, "step": 3494 }, { "epoch": 0.26450972044841775, "grad_norm": 0.9609375, "learning_rate": 1.916990402225398e-05, "loss": 0.4192, "step": 3495 }, { "epoch": 0.2645854027718651, "grad_norm": 0.859375, "learning_rate": 1.916942882435466e-05, "loss": 0.3431, "step": 3496 }, { "epoch": 0.26466108509531244, "grad_norm": 0.890625, "learning_rate": 1.9168953496371255e-05, "loss": 0.3728, "step": 3497 }, { "epoch": 0.26473676741875973, "grad_norm": 0.8046875, "learning_rate": 1.916847803831051e-05, "loss": 0.3147, "step": 3498 }, { "epoch": 0.2648124497422071, "grad_norm": 1.0078125, "learning_rate": 1.916800245017917e-05, "loss": 0.352, "step": 3499 }, { "epoch": 0.2648881320656544, "grad_norm": 0.8984375, "learning_rate": 1.9167526731983982e-05, "loss": 0.3598, "step": 3500 }, { "epoch": 0.26496381438910177, "grad_norm": 0.98046875, "learning_rate": 1.9167050883731698e-05, "loss": 0.2833, "step": 3501 }, { "epoch": 0.26503949671254906, "grad_norm": 0.76171875, "learning_rate": 1.916657490542906e-05, "loss": 0.2701, "step": 3502 }, { "epoch": 0.2651151790359964, "grad_norm": 0.79296875, "learning_rate": 1.9166098797082836e-05, "loss": 0.3313, "step": 3503 }, { "epoch": 0.26519086135944375, "grad_norm": 0.85546875, "learning_rate": 1.9165622558699763e-05, "loss": 0.3343, "step": 3504 }, { "epoch": 0.26526654368289104, "grad_norm": 0.7890625, "learning_rate": 1.916514619028661e-05, "loss": 0.3079, "step": 3505 }, { "epoch": 0.2653422260063384, "grad_norm": 0.79296875, "learning_rate": 1.9164669691850126e-05, "loss": 0.3342, "step": 3506 }, { "epoch": 0.26541790832978573, "grad_norm": 0.90625, "learning_rate": 1.9164193063397078e-05, "loss": 0.3205, "step": 3507 }, { "epoch": 0.2654935906532331, "grad_norm": 0.8125, "learning_rate": 1.9163716304934227e-05, "loss": 0.3251, "step": 3508 }, { "epoch": 0.26556927297668037, "grad_norm": 0.8125, "learning_rate": 1.9163239416468333e-05, "loss": 0.3232, "step": 3509 }, { "epoch": 0.2656449553001277, "grad_norm": 0.7734375, "learning_rate": 1.9162762398006164e-05, "loss": 0.3025, "step": 3510 }, { "epoch": 0.26572063762357506, "grad_norm": 0.82421875, "learning_rate": 1.9162285249554484e-05, "loss": 0.3103, "step": 3511 }, { "epoch": 0.26579631994702235, "grad_norm": 0.8828125, "learning_rate": 1.9161807971120065e-05, "loss": 0.3533, "step": 3512 }, { "epoch": 0.2658720022704697, "grad_norm": 0.859375, "learning_rate": 1.916133056270968e-05, "loss": 0.3486, "step": 3513 }, { "epoch": 0.26594768459391704, "grad_norm": 0.81640625, "learning_rate": 1.9160853024330096e-05, "loss": 0.3093, "step": 3514 }, { "epoch": 0.2660233669173644, "grad_norm": 0.8359375, "learning_rate": 1.9160375355988095e-05, "loss": 0.3586, "step": 3515 }, { "epoch": 0.2660990492408117, "grad_norm": 0.8515625, "learning_rate": 1.9159897557690448e-05, "loss": 0.3416, "step": 3516 }, { "epoch": 0.266174731564259, "grad_norm": 0.8359375, "learning_rate": 1.9159419629443936e-05, "loss": 0.3433, "step": 3517 }, { "epoch": 0.26625041388770637, "grad_norm": 0.83984375, "learning_rate": 1.915894157125534e-05, "loss": 0.3405, "step": 3518 }, { "epoch": 0.26632609621115366, "grad_norm": 0.80078125, "learning_rate": 1.9158463383131435e-05, "loss": 0.3213, "step": 3519 }, { "epoch": 0.266401778534601, "grad_norm": 0.8046875, "learning_rate": 1.9157985065079016e-05, "loss": 0.2926, "step": 3520 }, { "epoch": 0.26647746085804835, "grad_norm": 0.8828125, "learning_rate": 1.915750661710486e-05, "loss": 0.3691, "step": 3521 }, { "epoch": 0.26655314318149564, "grad_norm": 0.859375, "learning_rate": 1.915702803921576e-05, "loss": 0.3595, "step": 3522 }, { "epoch": 0.266628825504943, "grad_norm": 0.828125, "learning_rate": 1.9156549331418503e-05, "loss": 0.2666, "step": 3523 }, { "epoch": 0.26670450782839034, "grad_norm": 0.81640625, "learning_rate": 1.915607049371988e-05, "loss": 0.3272, "step": 3524 }, { "epoch": 0.2667801901518377, "grad_norm": 0.87890625, "learning_rate": 1.9155591526126684e-05, "loss": 0.3617, "step": 3525 }, { "epoch": 0.26685587247528497, "grad_norm": 0.81640625, "learning_rate": 1.9155112428645714e-05, "loss": 0.3157, "step": 3526 }, { "epoch": 0.2669315547987323, "grad_norm": 0.81640625, "learning_rate": 1.9154633201283758e-05, "loss": 0.32, "step": 3527 }, { "epoch": 0.26700723712217966, "grad_norm": 0.7890625, "learning_rate": 1.9154153844047622e-05, "loss": 0.3178, "step": 3528 }, { "epoch": 0.26708291944562695, "grad_norm": 0.80859375, "learning_rate": 1.9153674356944107e-05, "loss": 0.3362, "step": 3529 }, { "epoch": 0.2671586017690743, "grad_norm": 0.83203125, "learning_rate": 1.9153194739980012e-05, "loss": 0.326, "step": 3530 }, { "epoch": 0.26723428409252165, "grad_norm": 0.8125, "learning_rate": 1.915271499316214e-05, "loss": 0.3442, "step": 3531 }, { "epoch": 0.267309966415969, "grad_norm": 0.81640625, "learning_rate": 1.9152235116497305e-05, "loss": 0.3268, "step": 3532 }, { "epoch": 0.2673856487394163, "grad_norm": 0.81640625, "learning_rate": 1.9151755109992302e-05, "loss": 0.3175, "step": 3533 }, { "epoch": 0.2674613310628636, "grad_norm": 0.796875, "learning_rate": 1.9151274973653954e-05, "loss": 0.276, "step": 3534 }, { "epoch": 0.267537013386311, "grad_norm": 0.83984375, "learning_rate": 1.9150794707489063e-05, "loss": 0.3315, "step": 3535 }, { "epoch": 0.26761269570975826, "grad_norm": 0.81640625, "learning_rate": 1.9150314311504444e-05, "loss": 0.3197, "step": 3536 }, { "epoch": 0.2676883780332056, "grad_norm": 0.84765625, "learning_rate": 1.9149833785706916e-05, "loss": 0.3195, "step": 3537 }, { "epoch": 0.26776406035665296, "grad_norm": 0.83203125, "learning_rate": 1.9149353130103297e-05, "loss": 0.3279, "step": 3538 }, { "epoch": 0.2678397426801003, "grad_norm": 0.8671875, "learning_rate": 1.91488723447004e-05, "loss": 0.3863, "step": 3539 }, { "epoch": 0.2679154250035476, "grad_norm": 0.80078125, "learning_rate": 1.9148391429505047e-05, "loss": 0.3184, "step": 3540 }, { "epoch": 0.26799110732699494, "grad_norm": 0.875, "learning_rate": 1.9147910384524065e-05, "loss": 0.3725, "step": 3541 }, { "epoch": 0.2680667896504423, "grad_norm": 0.796875, "learning_rate": 1.9147429209764276e-05, "loss": 0.3231, "step": 3542 }, { "epoch": 0.2681424719738896, "grad_norm": 1.7578125, "learning_rate": 1.9146947905232506e-05, "loss": 0.3941, "step": 3543 }, { "epoch": 0.2682181542973369, "grad_norm": 0.8515625, "learning_rate": 1.914646647093558e-05, "loss": 0.3385, "step": 3544 }, { "epoch": 0.26829383662078427, "grad_norm": 0.80859375, "learning_rate": 1.9145984906880333e-05, "loss": 0.3311, "step": 3545 }, { "epoch": 0.2683695189442316, "grad_norm": 0.91796875, "learning_rate": 1.9145503213073597e-05, "loss": 0.3333, "step": 3546 }, { "epoch": 0.2684452012676789, "grad_norm": 0.80859375, "learning_rate": 1.9145021389522203e-05, "loss": 0.3481, "step": 3547 }, { "epoch": 0.26852088359112625, "grad_norm": 0.80859375, "learning_rate": 1.914453943623299e-05, "loss": 0.2982, "step": 3548 }, { "epoch": 0.2685965659145736, "grad_norm": 0.8203125, "learning_rate": 1.9144057353212787e-05, "loss": 0.3111, "step": 3549 }, { "epoch": 0.2686722482380209, "grad_norm": 0.84375, "learning_rate": 1.914357514046844e-05, "loss": 0.3548, "step": 3550 }, { "epoch": 0.26874793056146823, "grad_norm": 0.84375, "learning_rate": 1.914309279800679e-05, "loss": 0.3059, "step": 3551 }, { "epoch": 0.2688236128849156, "grad_norm": 0.8203125, "learning_rate": 1.914261032583468e-05, "loss": 0.3286, "step": 3552 }, { "epoch": 0.2688992952083629, "grad_norm": 0.84375, "learning_rate": 1.914212772395895e-05, "loss": 0.3495, "step": 3553 }, { "epoch": 0.2689749775318102, "grad_norm": 0.85546875, "learning_rate": 1.914164499238645e-05, "loss": 0.3657, "step": 3554 }, { "epoch": 0.26905065985525756, "grad_norm": 1.5, "learning_rate": 1.9141162131124032e-05, "loss": 0.4043, "step": 3555 }, { "epoch": 0.2691263421787049, "grad_norm": 0.75, "learning_rate": 1.914067914017854e-05, "loss": 0.2963, "step": 3556 }, { "epoch": 0.2692020245021522, "grad_norm": 0.82421875, "learning_rate": 1.9140196019556828e-05, "loss": 0.3415, "step": 3557 }, { "epoch": 0.26927770682559954, "grad_norm": 0.82421875, "learning_rate": 1.913971276926575e-05, "loss": 0.3306, "step": 3558 }, { "epoch": 0.2693533891490469, "grad_norm": 0.84765625, "learning_rate": 1.9139229389312162e-05, "loss": 0.3612, "step": 3559 }, { "epoch": 0.26942907147249423, "grad_norm": 0.85546875, "learning_rate": 1.9138745879702923e-05, "loss": 0.3415, "step": 3560 }, { "epoch": 0.2695047537959415, "grad_norm": 0.79296875, "learning_rate": 1.913826224044489e-05, "loss": 0.3244, "step": 3561 }, { "epoch": 0.26958043611938887, "grad_norm": 0.765625, "learning_rate": 1.9137778471544927e-05, "loss": 0.2896, "step": 3562 }, { "epoch": 0.2696561184428362, "grad_norm": 0.76171875, "learning_rate": 1.9137294573009895e-05, "loss": 0.2842, "step": 3563 }, { "epoch": 0.2697318007662835, "grad_norm": 0.8125, "learning_rate": 1.913681054484666e-05, "loss": 0.3486, "step": 3564 }, { "epoch": 0.26980748308973085, "grad_norm": 1.1796875, "learning_rate": 1.9136326387062086e-05, "loss": 0.3821, "step": 3565 }, { "epoch": 0.2698831654131782, "grad_norm": 0.79296875, "learning_rate": 1.9135842099663044e-05, "loss": 0.2991, "step": 3566 }, { "epoch": 0.26995884773662554, "grad_norm": 0.80859375, "learning_rate": 1.9135357682656404e-05, "loss": 0.3036, "step": 3567 }, { "epoch": 0.27003453006007283, "grad_norm": 0.84765625, "learning_rate": 1.9134873136049038e-05, "loss": 0.3444, "step": 3568 }, { "epoch": 0.2701102123835202, "grad_norm": 0.83203125, "learning_rate": 1.9134388459847822e-05, "loss": 0.3732, "step": 3569 }, { "epoch": 0.2701858947069675, "grad_norm": 0.85546875, "learning_rate": 1.9133903654059633e-05, "loss": 0.3581, "step": 3570 }, { "epoch": 0.2702615770304148, "grad_norm": 0.77734375, "learning_rate": 1.9133418718691343e-05, "loss": 0.3176, "step": 3571 }, { "epoch": 0.27033725935386216, "grad_norm": 0.78515625, "learning_rate": 1.9132933653749835e-05, "loss": 0.3054, "step": 3572 }, { "epoch": 0.2704129416773095, "grad_norm": 0.8125, "learning_rate": 1.913244845924199e-05, "loss": 0.3327, "step": 3573 }, { "epoch": 0.27048862400075685, "grad_norm": 0.87890625, "learning_rate": 1.9131963135174693e-05, "loss": 0.3758, "step": 3574 }, { "epoch": 0.27056430632420414, "grad_norm": 0.765625, "learning_rate": 1.9131477681554827e-05, "loss": 0.3331, "step": 3575 }, { "epoch": 0.2706399886476515, "grad_norm": 0.7890625, "learning_rate": 1.9130992098389282e-05, "loss": 0.3256, "step": 3576 }, { "epoch": 0.27071567097109883, "grad_norm": 0.80078125, "learning_rate": 1.9130506385684945e-05, "loss": 0.3184, "step": 3577 }, { "epoch": 0.2707913532945461, "grad_norm": 0.8515625, "learning_rate": 1.9130020543448705e-05, "loss": 0.3714, "step": 3578 }, { "epoch": 0.27086703561799347, "grad_norm": 0.828125, "learning_rate": 1.9129534571687456e-05, "loss": 0.3463, "step": 3579 }, { "epoch": 0.2709427179414408, "grad_norm": 0.79296875, "learning_rate": 1.9129048470408093e-05, "loss": 0.3141, "step": 3580 }, { "epoch": 0.27101840026488816, "grad_norm": 0.78515625, "learning_rate": 1.9128562239617507e-05, "loss": 0.3135, "step": 3581 }, { "epoch": 0.27109408258833545, "grad_norm": 0.78515625, "learning_rate": 1.9128075879322608e-05, "loss": 0.3036, "step": 3582 }, { "epoch": 0.2711697649117828, "grad_norm": 0.80859375, "learning_rate": 1.9127589389530282e-05, "loss": 0.3237, "step": 3583 }, { "epoch": 0.27124544723523014, "grad_norm": 0.78125, "learning_rate": 1.912710277024744e-05, "loss": 0.3048, "step": 3584 }, { "epoch": 0.27132112955867743, "grad_norm": 0.86328125, "learning_rate": 1.912661602148098e-05, "loss": 0.3779, "step": 3585 }, { "epoch": 0.2713968118821248, "grad_norm": 0.86328125, "learning_rate": 1.9126129143237814e-05, "loss": 0.3675, "step": 3586 }, { "epoch": 0.2714724942055721, "grad_norm": 1.21875, "learning_rate": 1.9125642135524845e-05, "loss": 0.3285, "step": 3587 }, { "epoch": 0.2715481765290194, "grad_norm": 0.84375, "learning_rate": 1.912515499834898e-05, "loss": 0.3454, "step": 3588 }, { "epoch": 0.27162385885246676, "grad_norm": 0.88671875, "learning_rate": 1.9124667731717133e-05, "loss": 0.3721, "step": 3589 }, { "epoch": 0.2716995411759141, "grad_norm": 0.82421875, "learning_rate": 1.9124180335636213e-05, "loss": 0.3174, "step": 3590 }, { "epoch": 0.27177522349936145, "grad_norm": 0.83984375, "learning_rate": 1.912369281011314e-05, "loss": 0.3793, "step": 3591 }, { "epoch": 0.27185090582280874, "grad_norm": 0.86328125, "learning_rate": 1.9123205155154827e-05, "loss": 0.3543, "step": 3592 }, { "epoch": 0.2719265881462561, "grad_norm": 0.84765625, "learning_rate": 1.9122717370768194e-05, "loss": 0.3707, "step": 3593 }, { "epoch": 0.27200227046970343, "grad_norm": 0.7890625, "learning_rate": 1.9122229456960157e-05, "loss": 0.3113, "step": 3594 }, { "epoch": 0.2720779527931507, "grad_norm": 0.84375, "learning_rate": 1.9121741413737643e-05, "loss": 0.3273, "step": 3595 }, { "epoch": 0.27215363511659807, "grad_norm": 0.8203125, "learning_rate": 1.9121253241107573e-05, "loss": 0.3023, "step": 3596 }, { "epoch": 0.2722293174400454, "grad_norm": 0.85546875, "learning_rate": 1.9120764939076873e-05, "loss": 0.2716, "step": 3597 }, { "epoch": 0.27230499976349276, "grad_norm": 0.93359375, "learning_rate": 1.9120276507652475e-05, "loss": 0.3442, "step": 3598 }, { "epoch": 0.27238068208694005, "grad_norm": 0.87109375, "learning_rate": 1.9119787946841296e-05, "loss": 0.3484, "step": 3599 }, { "epoch": 0.2724563644103874, "grad_norm": 0.78515625, "learning_rate": 1.911929925665028e-05, "loss": 0.2981, "step": 3600 }, { "epoch": 0.27253204673383474, "grad_norm": 0.87109375, "learning_rate": 1.9118810437086353e-05, "loss": 0.3322, "step": 3601 }, { "epoch": 0.27260772905728203, "grad_norm": 0.83984375, "learning_rate": 1.911832148815645e-05, "loss": 0.3451, "step": 3602 }, { "epoch": 0.2726834113807294, "grad_norm": 0.8671875, "learning_rate": 1.9117832409867513e-05, "loss": 0.3847, "step": 3603 }, { "epoch": 0.2727590937041767, "grad_norm": 0.8515625, "learning_rate": 1.911734320222648e-05, "loss": 0.3487, "step": 3604 }, { "epoch": 0.27283477602762407, "grad_norm": 0.78515625, "learning_rate": 1.9116853865240278e-05, "loss": 0.3212, "step": 3605 }, { "epoch": 0.27291045835107136, "grad_norm": 0.84375, "learning_rate": 1.9116364398915866e-05, "loss": 0.346, "step": 3606 }, { "epoch": 0.2729861406745187, "grad_norm": 0.8125, "learning_rate": 1.911587480326018e-05, "loss": 0.3305, "step": 3607 }, { "epoch": 0.27306182299796605, "grad_norm": 0.80078125, "learning_rate": 1.9115385078280158e-05, "loss": 0.3175, "step": 3608 }, { "epoch": 0.27313750532141334, "grad_norm": 1.109375, "learning_rate": 1.9114895223982767e-05, "loss": 0.3206, "step": 3609 }, { "epoch": 0.2732131876448607, "grad_norm": 0.76953125, "learning_rate": 1.911440524037494e-05, "loss": 0.2825, "step": 3610 }, { "epoch": 0.27328886996830803, "grad_norm": 0.8125, "learning_rate": 1.9113915127463637e-05, "loss": 0.3174, "step": 3611 }, { "epoch": 0.2733645522917554, "grad_norm": 3.484375, "learning_rate": 1.9113424885255805e-05, "loss": 0.4233, "step": 3612 }, { "epoch": 0.27344023461520267, "grad_norm": 2.28125, "learning_rate": 1.9112934513758403e-05, "loss": 0.4382, "step": 3613 }, { "epoch": 0.27351591693865, "grad_norm": 1.28125, "learning_rate": 1.9112444012978385e-05, "loss": 0.4053, "step": 3614 }, { "epoch": 0.27359159926209736, "grad_norm": 0.8203125, "learning_rate": 1.9111953382922715e-05, "loss": 0.3109, "step": 3615 }, { "epoch": 0.27366728158554465, "grad_norm": 0.84375, "learning_rate": 1.9111462623598345e-05, "loss": 0.3295, "step": 3616 }, { "epoch": 0.273742963908992, "grad_norm": 0.859375, "learning_rate": 1.9110971735012245e-05, "loss": 0.3547, "step": 3617 }, { "epoch": 0.27381864623243934, "grad_norm": 0.81640625, "learning_rate": 1.9110480717171376e-05, "loss": 0.3309, "step": 3618 }, { "epoch": 0.2738943285558867, "grad_norm": 0.77734375, "learning_rate": 1.9109989570082703e-05, "loss": 0.3028, "step": 3619 }, { "epoch": 0.273970010879334, "grad_norm": 0.90234375, "learning_rate": 1.9109498293753196e-05, "loss": 0.3596, "step": 3620 }, { "epoch": 0.2740456932027813, "grad_norm": 0.79296875, "learning_rate": 1.910900688818982e-05, "loss": 0.3166, "step": 3621 }, { "epoch": 0.27412137552622867, "grad_norm": 0.85546875, "learning_rate": 1.9108515353399553e-05, "loss": 0.3585, "step": 3622 }, { "epoch": 0.27419705784967596, "grad_norm": 0.91796875, "learning_rate": 1.9108023689389362e-05, "loss": 0.3946, "step": 3623 }, { "epoch": 0.2742727401731233, "grad_norm": 0.8515625, "learning_rate": 1.9107531896166226e-05, "loss": 0.3514, "step": 3624 }, { "epoch": 0.27434842249657065, "grad_norm": 0.796875, "learning_rate": 1.9107039973737125e-05, "loss": 0.3177, "step": 3625 }, { "epoch": 0.274424104820018, "grad_norm": 0.796875, "learning_rate": 1.9106547922109027e-05, "loss": 0.3048, "step": 3626 }, { "epoch": 0.2744997871434653, "grad_norm": 0.875, "learning_rate": 1.9106055741288924e-05, "loss": 0.3512, "step": 3627 }, { "epoch": 0.27457546946691264, "grad_norm": 0.78515625, "learning_rate": 1.9105563431283794e-05, "loss": 0.3288, "step": 3628 }, { "epoch": 0.27465115179036, "grad_norm": 0.81640625, "learning_rate": 1.910507099210062e-05, "loss": 0.3147, "step": 3629 }, { "epoch": 0.2747268341138073, "grad_norm": 0.7578125, "learning_rate": 1.910457842374639e-05, "loss": 0.2823, "step": 3630 }, { "epoch": 0.2748025164372546, "grad_norm": 0.7734375, "learning_rate": 1.9104085726228086e-05, "loss": 0.264, "step": 3631 }, { "epoch": 0.27487819876070196, "grad_norm": 0.7890625, "learning_rate": 1.910359289955271e-05, "loss": 0.3082, "step": 3632 }, { "epoch": 0.2749538810841493, "grad_norm": 0.84375, "learning_rate": 1.910309994372724e-05, "loss": 0.3465, "step": 3633 }, { "epoch": 0.2750295634075966, "grad_norm": 0.7734375, "learning_rate": 1.910260685875868e-05, "loss": 0.302, "step": 3634 }, { "epoch": 0.27510524573104395, "grad_norm": 0.828125, "learning_rate": 1.910211364465402e-05, "loss": 0.3529, "step": 3635 }, { "epoch": 0.2751809280544913, "grad_norm": 0.81640625, "learning_rate": 1.910162030142026e-05, "loss": 0.3137, "step": 3636 }, { "epoch": 0.2752566103779386, "grad_norm": 0.796875, "learning_rate": 1.9101126829064394e-05, "loss": 0.3418, "step": 3637 }, { "epoch": 0.27533229270138593, "grad_norm": 0.83203125, "learning_rate": 1.910063322759343e-05, "loss": 0.3604, "step": 3638 }, { "epoch": 0.2754079750248333, "grad_norm": 0.83203125, "learning_rate": 1.9100139497014363e-05, "loss": 0.3658, "step": 3639 }, { "epoch": 0.2754836573482806, "grad_norm": 0.828125, "learning_rate": 1.9099645637334202e-05, "loss": 0.3461, "step": 3640 }, { "epoch": 0.2755593396717279, "grad_norm": 0.8515625, "learning_rate": 1.909915164855995e-05, "loss": 0.3721, "step": 3641 }, { "epoch": 0.27563502199517526, "grad_norm": 0.7890625, "learning_rate": 1.909865753069862e-05, "loss": 0.3123, "step": 3642 }, { "epoch": 0.2757107043186226, "grad_norm": 0.828125, "learning_rate": 1.9098163283757215e-05, "loss": 0.3064, "step": 3643 }, { "epoch": 0.2757863866420699, "grad_norm": 0.875, "learning_rate": 1.9097668907742757e-05, "loss": 0.3687, "step": 3644 }, { "epoch": 0.27586206896551724, "grad_norm": 0.8359375, "learning_rate": 1.909717440266225e-05, "loss": 0.3376, "step": 3645 }, { "epoch": 0.2759377512889646, "grad_norm": 0.890625, "learning_rate": 1.9096679768522712e-05, "loss": 0.3476, "step": 3646 }, { "epoch": 0.2760134336124119, "grad_norm": 0.828125, "learning_rate": 1.9096185005331162e-05, "loss": 0.3267, "step": 3647 }, { "epoch": 0.2760891159358592, "grad_norm": 0.91015625, "learning_rate": 1.909569011309462e-05, "loss": 0.3568, "step": 3648 }, { "epoch": 0.27616479825930657, "grad_norm": 0.80078125, "learning_rate": 1.90951950918201e-05, "loss": 0.3055, "step": 3649 }, { "epoch": 0.2762404805827539, "grad_norm": 0.84375, "learning_rate": 1.9094699941514634e-05, "loss": 0.3628, "step": 3650 }, { "epoch": 0.2763161629062012, "grad_norm": 0.8359375, "learning_rate": 1.9094204662185244e-05, "loss": 0.3769, "step": 3651 }, { "epoch": 0.27639184522964855, "grad_norm": 0.87890625, "learning_rate": 1.909370925383895e-05, "loss": 0.4016, "step": 3652 }, { "epoch": 0.2764675275530959, "grad_norm": 0.859375, "learning_rate": 1.9093213716482786e-05, "loss": 0.345, "step": 3653 }, { "epoch": 0.2765432098765432, "grad_norm": 0.765625, "learning_rate": 1.909271805012378e-05, "loss": 0.2836, "step": 3654 }, { "epoch": 0.27661889219999053, "grad_norm": 0.83203125, "learning_rate": 1.9092222254768967e-05, "loss": 0.3487, "step": 3655 }, { "epoch": 0.2766945745234379, "grad_norm": 0.79296875, "learning_rate": 1.909172633042538e-05, "loss": 0.3232, "step": 3656 }, { "epoch": 0.2767702568468852, "grad_norm": 0.93359375, "learning_rate": 1.909123027710005e-05, "loss": 0.3472, "step": 3657 }, { "epoch": 0.2768459391703325, "grad_norm": 3.015625, "learning_rate": 1.909073409480002e-05, "loss": 0.4214, "step": 3658 }, { "epoch": 0.27692162149377986, "grad_norm": 0.765625, "learning_rate": 1.9090237783532324e-05, "loss": 0.2957, "step": 3659 }, { "epoch": 0.2769973038172272, "grad_norm": 0.8125, "learning_rate": 1.9089741343304005e-05, "loss": 0.3482, "step": 3660 }, { "epoch": 0.2770729861406745, "grad_norm": 0.9140625, "learning_rate": 1.908924477412211e-05, "loss": 0.3926, "step": 3661 }, { "epoch": 0.27714866846412184, "grad_norm": 0.84375, "learning_rate": 1.908874807599368e-05, "loss": 0.3203, "step": 3662 }, { "epoch": 0.2772243507875692, "grad_norm": 0.8828125, "learning_rate": 1.908825124892576e-05, "loss": 0.3824, "step": 3663 }, { "epoch": 0.27730003311101653, "grad_norm": 0.83203125, "learning_rate": 1.9087754292925397e-05, "loss": 0.35, "step": 3664 }, { "epoch": 0.2773757154344638, "grad_norm": 0.85546875, "learning_rate": 1.908725720799965e-05, "loss": 0.3777, "step": 3665 }, { "epoch": 0.27745139775791117, "grad_norm": 0.828125, "learning_rate": 1.9086759994155563e-05, "loss": 0.3445, "step": 3666 }, { "epoch": 0.2775270800813585, "grad_norm": 0.8671875, "learning_rate": 1.908626265140019e-05, "loss": 0.3742, "step": 3667 }, { "epoch": 0.2776027624048058, "grad_norm": 0.8203125, "learning_rate": 1.908576517974059e-05, "loss": 0.3379, "step": 3668 }, { "epoch": 0.27767844472825315, "grad_norm": 0.7890625, "learning_rate": 1.908526757918382e-05, "loss": 0.3541, "step": 3669 }, { "epoch": 0.2777541270517005, "grad_norm": 0.765625, "learning_rate": 1.908476984973694e-05, "loss": 0.3103, "step": 3670 }, { "epoch": 0.27782980937514784, "grad_norm": 0.7734375, "learning_rate": 1.9084271991407006e-05, "loss": 0.3136, "step": 3671 }, { "epoch": 0.27790549169859513, "grad_norm": 0.77734375, "learning_rate": 1.908377400420109e-05, "loss": 0.2886, "step": 3672 }, { "epoch": 0.2779811740220425, "grad_norm": 0.8125, "learning_rate": 1.9083275888126248e-05, "loss": 0.3603, "step": 3673 }, { "epoch": 0.2780568563454898, "grad_norm": 0.80859375, "learning_rate": 1.908277764318955e-05, "loss": 0.3138, "step": 3674 }, { "epoch": 0.2781325386689371, "grad_norm": 0.8125, "learning_rate": 1.9082279269398064e-05, "loss": 0.3531, "step": 3675 }, { "epoch": 0.27820822099238446, "grad_norm": 0.8515625, "learning_rate": 1.9081780766758862e-05, "loss": 0.3638, "step": 3676 }, { "epoch": 0.2782839033158318, "grad_norm": 0.7734375, "learning_rate": 1.9081282135279012e-05, "loss": 0.3151, "step": 3677 }, { "epoch": 0.27835958563927915, "grad_norm": 1.890625, "learning_rate": 1.9080783374965594e-05, "loss": 0.4538, "step": 3678 }, { "epoch": 0.27843526796272644, "grad_norm": 0.81640625, "learning_rate": 1.908028448582568e-05, "loss": 0.303, "step": 3679 }, { "epoch": 0.2785109502861738, "grad_norm": 0.83984375, "learning_rate": 1.907978546786635e-05, "loss": 0.3483, "step": 3680 }, { "epoch": 0.27858663260962113, "grad_norm": 0.81640625, "learning_rate": 1.907928632109468e-05, "loss": 0.3338, "step": 3681 }, { "epoch": 0.2786623149330684, "grad_norm": 1.21875, "learning_rate": 1.9078787045517754e-05, "loss": 0.3715, "step": 3682 }, { "epoch": 0.27873799725651577, "grad_norm": 0.76953125, "learning_rate": 1.907828764114265e-05, "loss": 0.2833, "step": 3683 }, { "epoch": 0.2788136795799631, "grad_norm": 0.7734375, "learning_rate": 1.907778810797646e-05, "loss": 0.3088, "step": 3684 }, { "epoch": 0.27888936190341046, "grad_norm": 0.84375, "learning_rate": 1.9077288446026267e-05, "loss": 0.33, "step": 3685 }, { "epoch": 0.27896504422685775, "grad_norm": 0.93359375, "learning_rate": 1.9076788655299163e-05, "loss": 0.4103, "step": 3686 }, { "epoch": 0.2790407265503051, "grad_norm": 0.83203125, "learning_rate": 1.9076288735802233e-05, "loss": 0.3283, "step": 3687 }, { "epoch": 0.27911640887375244, "grad_norm": 0.7578125, "learning_rate": 1.907578868754257e-05, "loss": 0.307, "step": 3688 }, { "epoch": 0.27919209119719973, "grad_norm": 0.78125, "learning_rate": 1.9075288510527273e-05, "loss": 0.3091, "step": 3689 }, { "epoch": 0.2792677735206471, "grad_norm": 0.8984375, "learning_rate": 1.9074788204763438e-05, "loss": 0.3467, "step": 3690 }, { "epoch": 0.2793434558440944, "grad_norm": 0.89453125, "learning_rate": 1.9074287770258154e-05, "loss": 0.3474, "step": 3691 }, { "epoch": 0.27941913816754177, "grad_norm": 0.734375, "learning_rate": 1.9073787207018526e-05, "loss": 0.2828, "step": 3692 }, { "epoch": 0.27949482049098906, "grad_norm": 3.015625, "learning_rate": 1.9073286515051656e-05, "loss": 0.3257, "step": 3693 }, { "epoch": 0.2795705028144364, "grad_norm": 0.88671875, "learning_rate": 1.9072785694364647e-05, "loss": 0.3294, "step": 3694 }, { "epoch": 0.27964618513788375, "grad_norm": 0.828125, "learning_rate": 1.9072284744964603e-05, "loss": 0.3422, "step": 3695 }, { "epoch": 0.27972186746133104, "grad_norm": 0.72265625, "learning_rate": 1.907178366685863e-05, "loss": 0.2668, "step": 3696 }, { "epoch": 0.2797975497847784, "grad_norm": 0.8671875, "learning_rate": 1.9071282460053845e-05, "loss": 0.3563, "step": 3697 }, { "epoch": 0.27987323210822573, "grad_norm": 8.5625, "learning_rate": 1.9070781124557345e-05, "loss": 0.3177, "step": 3698 }, { "epoch": 0.2799489144316731, "grad_norm": 0.7890625, "learning_rate": 1.907027966037625e-05, "loss": 0.3262, "step": 3699 }, { "epoch": 0.28002459675512037, "grad_norm": 0.79296875, "learning_rate": 1.9069778067517672e-05, "loss": 0.3018, "step": 3700 }, { "epoch": 0.2801002790785677, "grad_norm": 0.82421875, "learning_rate": 1.906927634598873e-05, "loss": 0.3164, "step": 3701 }, { "epoch": 0.28017596140201506, "grad_norm": 0.765625, "learning_rate": 1.9068774495796538e-05, "loss": 0.3074, "step": 3702 }, { "epoch": 0.28025164372546235, "grad_norm": 0.8125, "learning_rate": 1.9068272516948218e-05, "loss": 0.3161, "step": 3703 }, { "epoch": 0.2803273260489097, "grad_norm": 0.81640625, "learning_rate": 1.906777040945089e-05, "loss": 0.3432, "step": 3704 }, { "epoch": 0.28040300837235704, "grad_norm": 0.87109375, "learning_rate": 1.9067268173311678e-05, "loss": 0.3341, "step": 3705 }, { "epoch": 0.28047869069580433, "grad_norm": 1.875, "learning_rate": 1.9066765808537708e-05, "loss": 0.4098, "step": 3706 }, { "epoch": 0.2805543730192517, "grad_norm": 0.8515625, "learning_rate": 1.9066263315136107e-05, "loss": 0.3384, "step": 3707 }, { "epoch": 0.280630055342699, "grad_norm": 0.9296875, "learning_rate": 1.9065760693114e-05, "loss": 0.3901, "step": 3708 }, { "epoch": 0.28070573766614637, "grad_norm": 0.85546875, "learning_rate": 1.9065257942478518e-05, "loss": 0.3617, "step": 3709 }, { "epoch": 0.28078141998959366, "grad_norm": 0.74609375, "learning_rate": 1.90647550632368e-05, "loss": 0.2876, "step": 3710 }, { "epoch": 0.280857102313041, "grad_norm": 1.3125, "learning_rate": 1.9064252055395977e-05, "loss": 0.4475, "step": 3711 }, { "epoch": 0.28093278463648835, "grad_norm": 0.875, "learning_rate": 1.9063748918963178e-05, "loss": 0.3593, "step": 3712 }, { "epoch": 0.28100846695993564, "grad_norm": 0.83984375, "learning_rate": 1.906324565394555e-05, "loss": 0.326, "step": 3713 }, { "epoch": 0.281084149283383, "grad_norm": 0.87109375, "learning_rate": 1.906274226035023e-05, "loss": 0.3844, "step": 3714 }, { "epoch": 0.28115983160683033, "grad_norm": 0.828125, "learning_rate": 1.9062238738184357e-05, "loss": 0.3403, "step": 3715 }, { "epoch": 0.2812355139302777, "grad_norm": 0.75, "learning_rate": 1.9061735087455078e-05, "loss": 0.3011, "step": 3716 }, { "epoch": 0.28131119625372497, "grad_norm": 0.7890625, "learning_rate": 1.9061231308169534e-05, "loss": 0.2973, "step": 3717 }, { "epoch": 0.2813868785771723, "grad_norm": 0.8203125, "learning_rate": 1.9060727400334875e-05, "loss": 0.3327, "step": 3718 }, { "epoch": 0.28146256090061966, "grad_norm": 0.82421875, "learning_rate": 1.9060223363958254e-05, "loss": 0.2923, "step": 3719 }, { "epoch": 0.28153824322406695, "grad_norm": 0.8359375, "learning_rate": 1.9059719199046812e-05, "loss": 0.3662, "step": 3720 }, { "epoch": 0.2816139255475143, "grad_norm": 0.76953125, "learning_rate": 1.9059214905607705e-05, "loss": 0.2771, "step": 3721 }, { "epoch": 0.28168960787096164, "grad_norm": 1.390625, "learning_rate": 1.905871048364809e-05, "loss": 0.4283, "step": 3722 }, { "epoch": 0.281765290194409, "grad_norm": 0.8359375, "learning_rate": 1.905820593317512e-05, "loss": 0.343, "step": 3723 }, { "epoch": 0.2818409725178563, "grad_norm": 0.859375, "learning_rate": 1.9057701254195958e-05, "loss": 0.3499, "step": 3724 }, { "epoch": 0.2819166548413036, "grad_norm": 0.8359375, "learning_rate": 1.9057196446717757e-05, "loss": 0.3383, "step": 3725 }, { "epoch": 0.281992337164751, "grad_norm": 0.8515625, "learning_rate": 1.9056691510747686e-05, "loss": 0.3737, "step": 3726 }, { "epoch": 0.28206801948819826, "grad_norm": 0.87109375, "learning_rate": 1.9056186446292895e-05, "loss": 0.3673, "step": 3727 }, { "epoch": 0.2821437018116456, "grad_norm": 0.81640625, "learning_rate": 1.9055681253360568e-05, "loss": 0.3292, "step": 3728 }, { "epoch": 0.28221938413509295, "grad_norm": 0.83203125, "learning_rate": 1.9055175931957858e-05, "loss": 0.3187, "step": 3729 }, { "epoch": 0.2822950664585403, "grad_norm": 0.828125, "learning_rate": 1.905467048209194e-05, "loss": 0.343, "step": 3730 }, { "epoch": 0.2823707487819876, "grad_norm": 0.81640625, "learning_rate": 1.905416490376998e-05, "loss": 0.3212, "step": 3731 }, { "epoch": 0.28244643110543494, "grad_norm": 0.94140625, "learning_rate": 1.9053659196999157e-05, "loss": 0.3907, "step": 3732 }, { "epoch": 0.2825221134288823, "grad_norm": 0.8046875, "learning_rate": 1.9053153361786637e-05, "loss": 0.2943, "step": 3733 }, { "epoch": 0.2825977957523296, "grad_norm": 0.8203125, "learning_rate": 1.9052647398139607e-05, "loss": 0.3513, "step": 3734 }, { "epoch": 0.2826734780757769, "grad_norm": 0.87109375, "learning_rate": 1.9052141306065234e-05, "loss": 0.3466, "step": 3735 }, { "epoch": 0.28274916039922426, "grad_norm": 0.92578125, "learning_rate": 1.9051635085570702e-05, "loss": 0.3675, "step": 3736 }, { "epoch": 0.2828248427226716, "grad_norm": 0.83984375, "learning_rate": 1.9051128736663197e-05, "loss": 0.358, "step": 3737 }, { "epoch": 0.2829005250461189, "grad_norm": 0.7578125, "learning_rate": 1.9050622259349895e-05, "loss": 0.3153, "step": 3738 }, { "epoch": 0.28297620736956625, "grad_norm": 0.8515625, "learning_rate": 1.9050115653637985e-05, "loss": 0.3476, "step": 3739 }, { "epoch": 0.2830518896930136, "grad_norm": 0.80859375, "learning_rate": 1.9049608919534658e-05, "loss": 0.3237, "step": 3740 }, { "epoch": 0.2831275720164609, "grad_norm": 1.484375, "learning_rate": 1.9049102057047095e-05, "loss": 0.4218, "step": 3741 }, { "epoch": 0.28320325433990823, "grad_norm": 0.82421875, "learning_rate": 1.9048595066182495e-05, "loss": 0.3511, "step": 3742 }, { "epoch": 0.2832789366633556, "grad_norm": 0.859375, "learning_rate": 1.9048087946948038e-05, "loss": 0.3652, "step": 3743 }, { "epoch": 0.2833546189868029, "grad_norm": 0.83203125, "learning_rate": 1.9047580699350932e-05, "loss": 0.3509, "step": 3744 }, { "epoch": 0.2834303013102502, "grad_norm": 0.84375, "learning_rate": 1.9047073323398367e-05, "loss": 0.3385, "step": 3745 }, { "epoch": 0.28350598363369756, "grad_norm": 0.85546875, "learning_rate": 1.9046565819097546e-05, "loss": 0.3508, "step": 3746 }, { "epoch": 0.2835816659571449, "grad_norm": 0.8515625, "learning_rate": 1.9046058186455657e-05, "loss": 0.3514, "step": 3747 }, { "epoch": 0.2836573482805922, "grad_norm": 0.8125, "learning_rate": 1.9045550425479912e-05, "loss": 0.2954, "step": 3748 }, { "epoch": 0.28373303060403954, "grad_norm": 0.80078125, "learning_rate": 1.904504253617751e-05, "loss": 0.3062, "step": 3749 }, { "epoch": 0.2838087129274869, "grad_norm": 0.75390625, "learning_rate": 1.904453451855566e-05, "loss": 0.2739, "step": 3750 }, { "epoch": 0.28388439525093423, "grad_norm": 0.859375, "learning_rate": 1.9044026372621568e-05, "loss": 0.3714, "step": 3751 }, { "epoch": 0.2839600775743815, "grad_norm": 0.91796875, "learning_rate": 1.9043518098382437e-05, "loss": 0.3837, "step": 3752 }, { "epoch": 0.28403575989782887, "grad_norm": 0.984375, "learning_rate": 1.9043009695845486e-05, "loss": 0.3569, "step": 3753 }, { "epoch": 0.2841114422212762, "grad_norm": 0.80078125, "learning_rate": 1.9042501165017928e-05, "loss": 0.3142, "step": 3754 }, { "epoch": 0.2841871245447235, "grad_norm": 0.875, "learning_rate": 1.9041992505906965e-05, "loss": 0.3707, "step": 3755 }, { "epoch": 0.28426280686817085, "grad_norm": 0.78125, "learning_rate": 1.9041483718519825e-05, "loss": 0.2967, "step": 3756 }, { "epoch": 0.2843384891916182, "grad_norm": 0.8125, "learning_rate": 1.9040974802863725e-05, "loss": 0.3353, "step": 3757 }, { "epoch": 0.28441417151506554, "grad_norm": 0.8515625, "learning_rate": 1.9040465758945884e-05, "loss": 0.3212, "step": 3758 }, { "epoch": 0.28448985383851283, "grad_norm": 0.7734375, "learning_rate": 1.9039956586773516e-05, "loss": 0.3115, "step": 3759 }, { "epoch": 0.2845655361619602, "grad_norm": 0.87890625, "learning_rate": 1.9039447286353855e-05, "loss": 0.3807, "step": 3760 }, { "epoch": 0.2846412184854075, "grad_norm": 0.80078125, "learning_rate": 1.903893785769412e-05, "loss": 0.3337, "step": 3761 }, { "epoch": 0.2847169008088548, "grad_norm": 0.84765625, "learning_rate": 1.9038428300801542e-05, "loss": 0.3775, "step": 3762 }, { "epoch": 0.28479258313230216, "grad_norm": 0.7890625, "learning_rate": 1.9037918615683345e-05, "loss": 0.3268, "step": 3763 }, { "epoch": 0.2848682654557495, "grad_norm": 0.75390625, "learning_rate": 1.903740880234677e-05, "loss": 0.2918, "step": 3764 }, { "epoch": 0.28494394777919685, "grad_norm": 0.8203125, "learning_rate": 1.9036898860799037e-05, "loss": 0.3574, "step": 3765 }, { "epoch": 0.28501963010264414, "grad_norm": 0.8515625, "learning_rate": 1.9036388791047383e-05, "loss": 0.3642, "step": 3766 }, { "epoch": 0.2850953124260915, "grad_norm": 0.8359375, "learning_rate": 1.903587859309905e-05, "loss": 0.3289, "step": 3767 }, { "epoch": 0.28517099474953883, "grad_norm": 0.79296875, "learning_rate": 1.9035368266961275e-05, "loss": 0.3217, "step": 3768 }, { "epoch": 0.2852466770729861, "grad_norm": 0.87890625, "learning_rate": 1.9034857812641294e-05, "loss": 0.3627, "step": 3769 }, { "epoch": 0.28532235939643347, "grad_norm": 0.75390625, "learning_rate": 1.9034347230146353e-05, "loss": 0.292, "step": 3770 }, { "epoch": 0.2853980417198808, "grad_norm": 0.859375, "learning_rate": 1.903383651948369e-05, "loss": 0.3647, "step": 3771 }, { "epoch": 0.2854737240433281, "grad_norm": 0.99609375, "learning_rate": 1.9033325680660556e-05, "loss": 0.3515, "step": 3772 }, { "epoch": 0.28554940636677545, "grad_norm": 0.8984375, "learning_rate": 1.9032814713684195e-05, "loss": 0.3847, "step": 3773 }, { "epoch": 0.2856250886902228, "grad_norm": 0.7734375, "learning_rate": 1.9032303618561855e-05, "loss": 0.2958, "step": 3774 }, { "epoch": 0.28570077101367014, "grad_norm": 0.79296875, "learning_rate": 1.9031792395300787e-05, "loss": 0.3382, "step": 3775 }, { "epoch": 0.28577645333711743, "grad_norm": 0.81640625, "learning_rate": 1.9031281043908246e-05, "loss": 0.3355, "step": 3776 }, { "epoch": 0.2858521356605648, "grad_norm": 0.8671875, "learning_rate": 1.9030769564391486e-05, "loss": 0.3395, "step": 3777 }, { "epoch": 0.2859278179840121, "grad_norm": 0.85546875, "learning_rate": 1.903025795675776e-05, "loss": 0.3265, "step": 3778 }, { "epoch": 0.2860035003074594, "grad_norm": 0.79296875, "learning_rate": 1.9029746221014333e-05, "loss": 0.3432, "step": 3779 }, { "epoch": 0.28607918263090676, "grad_norm": 0.97265625, "learning_rate": 1.9029234357168454e-05, "loss": 0.3859, "step": 3780 }, { "epoch": 0.2861548649543541, "grad_norm": 0.76953125, "learning_rate": 1.9028722365227396e-05, "loss": 0.3002, "step": 3781 }, { "epoch": 0.28623054727780145, "grad_norm": 1.546875, "learning_rate": 1.9028210245198416e-05, "loss": 0.4838, "step": 3782 }, { "epoch": 0.28630622960124874, "grad_norm": 0.82421875, "learning_rate": 1.902769799708878e-05, "loss": 0.3655, "step": 3783 }, { "epoch": 0.2863819119246961, "grad_norm": 0.78125, "learning_rate": 1.9027185620905757e-05, "loss": 0.3074, "step": 3784 }, { "epoch": 0.28645759424814343, "grad_norm": 0.8515625, "learning_rate": 1.9026673116656615e-05, "loss": 0.3526, "step": 3785 }, { "epoch": 0.2865332765715907, "grad_norm": 0.90234375, "learning_rate": 1.9026160484348626e-05, "loss": 0.3953, "step": 3786 }, { "epoch": 0.28660895889503807, "grad_norm": 0.8671875, "learning_rate": 1.9025647723989057e-05, "loss": 0.3692, "step": 3787 }, { "epoch": 0.2866846412184854, "grad_norm": 0.76171875, "learning_rate": 1.9025134835585187e-05, "loss": 0.2989, "step": 3788 }, { "epoch": 0.28676032354193276, "grad_norm": 0.86328125, "learning_rate": 1.9024621819144294e-05, "loss": 0.3727, "step": 3789 }, { "epoch": 0.28683600586538005, "grad_norm": 0.8203125, "learning_rate": 1.9024108674673653e-05, "loss": 0.3451, "step": 3790 }, { "epoch": 0.2869116881888274, "grad_norm": 0.828125, "learning_rate": 1.9023595402180545e-05, "loss": 0.3462, "step": 3791 }, { "epoch": 0.28698737051227474, "grad_norm": 0.703125, "learning_rate": 1.902308200167225e-05, "loss": 0.2529, "step": 3792 }, { "epoch": 0.28706305283572203, "grad_norm": 0.83984375, "learning_rate": 1.9022568473156052e-05, "loss": 0.3541, "step": 3793 }, { "epoch": 0.2871387351591694, "grad_norm": 0.8671875, "learning_rate": 1.902205481663924e-05, "loss": 0.3227, "step": 3794 }, { "epoch": 0.2872144174826167, "grad_norm": 0.7578125, "learning_rate": 1.9021541032129098e-05, "loss": 0.301, "step": 3795 }, { "epoch": 0.28729009980606407, "grad_norm": 1.6015625, "learning_rate": 1.9021027119632914e-05, "loss": 0.4558, "step": 3796 }, { "epoch": 0.28736578212951136, "grad_norm": 0.828125, "learning_rate": 1.9020513079157978e-05, "loss": 0.3212, "step": 3797 }, { "epoch": 0.2874414644529587, "grad_norm": 0.86328125, "learning_rate": 1.9019998910711582e-05, "loss": 0.3674, "step": 3798 }, { "epoch": 0.28751714677640605, "grad_norm": 0.83203125, "learning_rate": 1.9019484614301025e-05, "loss": 0.3452, "step": 3799 }, { "epoch": 0.28759282909985334, "grad_norm": 0.80859375, "learning_rate": 1.9018970189933603e-05, "loss": 0.3149, "step": 3800 }, { "epoch": 0.2876685114233007, "grad_norm": 0.90234375, "learning_rate": 1.9018455637616612e-05, "loss": 0.3621, "step": 3801 }, { "epoch": 0.28774419374674803, "grad_norm": 0.796875, "learning_rate": 1.901794095735735e-05, "loss": 0.333, "step": 3802 }, { "epoch": 0.2878198760701954, "grad_norm": 0.828125, "learning_rate": 1.901742614916312e-05, "loss": 0.3253, "step": 3803 }, { "epoch": 0.28789555839364267, "grad_norm": 0.86328125, "learning_rate": 1.9016911213041225e-05, "loss": 0.3613, "step": 3804 }, { "epoch": 0.28797124071709, "grad_norm": 0.8359375, "learning_rate": 1.9016396148998973e-05, "loss": 0.3205, "step": 3805 }, { "epoch": 0.28804692304053736, "grad_norm": 0.7890625, "learning_rate": 1.9015880957043666e-05, "loss": 0.3149, "step": 3806 }, { "epoch": 0.28812260536398465, "grad_norm": 0.92578125, "learning_rate": 1.901536563718262e-05, "loss": 0.3456, "step": 3807 }, { "epoch": 0.288198287687432, "grad_norm": 0.86328125, "learning_rate": 1.9014850189423136e-05, "loss": 0.375, "step": 3808 }, { "epoch": 0.28827397001087934, "grad_norm": 0.8046875, "learning_rate": 1.9014334613772537e-05, "loss": 0.2958, "step": 3809 }, { "epoch": 0.2883496523343267, "grad_norm": 0.76171875, "learning_rate": 1.901381891023813e-05, "loss": 0.2713, "step": 3810 }, { "epoch": 0.288425334657774, "grad_norm": 0.734375, "learning_rate": 1.9013303078827235e-05, "loss": 0.2814, "step": 3811 }, { "epoch": 0.2885010169812213, "grad_norm": 0.86328125, "learning_rate": 1.9012787119547167e-05, "loss": 0.3473, "step": 3812 }, { "epoch": 0.28857669930466867, "grad_norm": 0.7421875, "learning_rate": 1.9012271032405246e-05, "loss": 0.2853, "step": 3813 }, { "epoch": 0.28865238162811596, "grad_norm": 0.828125, "learning_rate": 1.9011754817408797e-05, "loss": 0.3331, "step": 3814 }, { "epoch": 0.2887280639515633, "grad_norm": 0.796875, "learning_rate": 1.9011238474565143e-05, "loss": 0.2986, "step": 3815 }, { "epoch": 0.28880374627501065, "grad_norm": 0.8203125, "learning_rate": 1.9010722003881604e-05, "loss": 0.3545, "step": 3816 }, { "epoch": 0.288879428598458, "grad_norm": 0.77734375, "learning_rate": 1.9010205405365513e-05, "loss": 0.3241, "step": 3817 }, { "epoch": 0.2889551109219053, "grad_norm": 0.984375, "learning_rate": 1.900968867902419e-05, "loss": 0.3654, "step": 3818 }, { "epoch": 0.28903079324535264, "grad_norm": 0.8046875, "learning_rate": 1.900917182486498e-05, "loss": 0.3097, "step": 3819 }, { "epoch": 0.2891064755688, "grad_norm": 0.8203125, "learning_rate": 1.9008654842895206e-05, "loss": 0.3317, "step": 3820 }, { "epoch": 0.28918215789224727, "grad_norm": 0.7578125, "learning_rate": 1.90081377331222e-05, "loss": 0.2953, "step": 3821 }, { "epoch": 0.2892578402156946, "grad_norm": 0.80859375, "learning_rate": 1.9007620495553304e-05, "loss": 0.3184, "step": 3822 }, { "epoch": 0.28933352253914196, "grad_norm": 0.7578125, "learning_rate": 1.9007103130195853e-05, "loss": 0.2878, "step": 3823 }, { "epoch": 0.2894092048625893, "grad_norm": 0.765625, "learning_rate": 1.9006585637057187e-05, "loss": 0.3156, "step": 3824 }, { "epoch": 0.2894848871860366, "grad_norm": 0.85546875, "learning_rate": 1.900606801614465e-05, "loss": 0.3435, "step": 3825 }, { "epoch": 0.28956056950948394, "grad_norm": 0.8125, "learning_rate": 1.900555026746558e-05, "loss": 0.3357, "step": 3826 }, { "epoch": 0.2896362518329313, "grad_norm": 1.4375, "learning_rate": 1.9005032391027328e-05, "loss": 0.4226, "step": 3827 }, { "epoch": 0.2897119341563786, "grad_norm": 0.80859375, "learning_rate": 1.9004514386837236e-05, "loss": 0.3375, "step": 3828 }, { "epoch": 0.2897876164798259, "grad_norm": 0.82421875, "learning_rate": 1.9003996254902658e-05, "loss": 0.359, "step": 3829 }, { "epoch": 0.2898632988032733, "grad_norm": 0.80078125, "learning_rate": 1.9003477995230942e-05, "loss": 0.3321, "step": 3830 }, { "epoch": 0.28993898112672056, "grad_norm": 0.74609375, "learning_rate": 1.9002959607829436e-05, "loss": 0.2809, "step": 3831 }, { "epoch": 0.2900146634501679, "grad_norm": 0.859375, "learning_rate": 1.90024410927055e-05, "loss": 0.3693, "step": 3832 }, { "epoch": 0.29009034577361525, "grad_norm": 0.828125, "learning_rate": 1.900192244986649e-05, "loss": 0.3555, "step": 3833 }, { "epoch": 0.2901660280970626, "grad_norm": 0.875, "learning_rate": 1.9001403679319762e-05, "loss": 0.36, "step": 3834 }, { "epoch": 0.2902417104205099, "grad_norm": 0.8046875, "learning_rate": 1.9000884781072675e-05, "loss": 0.3533, "step": 3835 }, { "epoch": 0.29031739274395724, "grad_norm": 0.77734375, "learning_rate": 1.9000365755132593e-05, "loss": 0.3129, "step": 3836 }, { "epoch": 0.2903930750674046, "grad_norm": 0.76171875, "learning_rate": 1.8999846601506874e-05, "loss": 0.2837, "step": 3837 }, { "epoch": 0.2904687573908519, "grad_norm": 0.83984375, "learning_rate": 1.8999327320202893e-05, "loss": 0.3282, "step": 3838 }, { "epoch": 0.2905444397142992, "grad_norm": 0.82421875, "learning_rate": 1.8998807911228003e-05, "loss": 0.3444, "step": 3839 }, { "epoch": 0.29062012203774656, "grad_norm": 0.859375, "learning_rate": 1.8998288374589585e-05, "loss": 0.3464, "step": 3840 }, { "epoch": 0.2906958043611939, "grad_norm": 1.421875, "learning_rate": 1.8997768710295004e-05, "loss": 0.4511, "step": 3841 }, { "epoch": 0.2907714866846412, "grad_norm": 0.79296875, "learning_rate": 1.899724891835163e-05, "loss": 0.319, "step": 3842 }, { "epoch": 0.29084716900808855, "grad_norm": 0.859375, "learning_rate": 1.8996728998766845e-05, "loss": 0.3889, "step": 3843 }, { "epoch": 0.2909228513315359, "grad_norm": 0.78515625, "learning_rate": 1.8996208951548022e-05, "loss": 0.3158, "step": 3844 }, { "epoch": 0.2909985336549832, "grad_norm": 0.8125, "learning_rate": 1.899568877670253e-05, "loss": 0.3389, "step": 3845 }, { "epoch": 0.29107421597843053, "grad_norm": 0.90234375, "learning_rate": 1.899516847423776e-05, "loss": 0.3751, "step": 3846 }, { "epoch": 0.2911498983018779, "grad_norm": 0.79296875, "learning_rate": 1.8994648044161087e-05, "loss": 0.3429, "step": 3847 }, { "epoch": 0.2912255806253252, "grad_norm": 0.7578125, "learning_rate": 1.8994127486479897e-05, "loss": 0.2964, "step": 3848 }, { "epoch": 0.2913012629487725, "grad_norm": 0.83203125, "learning_rate": 1.8993606801201574e-05, "loss": 0.3018, "step": 3849 }, { "epoch": 0.29137694527221986, "grad_norm": 0.8203125, "learning_rate": 1.8993085988333505e-05, "loss": 0.3415, "step": 3850 }, { "epoch": 0.2914526275956672, "grad_norm": 0.78125, "learning_rate": 1.899256504788308e-05, "loss": 0.3169, "step": 3851 }, { "epoch": 0.2915283099191145, "grad_norm": 0.76953125, "learning_rate": 1.8992043979857686e-05, "loss": 0.2794, "step": 3852 }, { "epoch": 0.29160399224256184, "grad_norm": 0.8203125, "learning_rate": 1.899152278426472e-05, "loss": 0.3622, "step": 3853 }, { "epoch": 0.2916796745660092, "grad_norm": 0.83984375, "learning_rate": 1.8991001461111568e-05, "loss": 0.367, "step": 3854 }, { "epoch": 0.29175535688945653, "grad_norm": 0.79296875, "learning_rate": 1.8990480010405636e-05, "loss": 0.3163, "step": 3855 }, { "epoch": 0.2918310392129038, "grad_norm": 0.828125, "learning_rate": 1.8989958432154317e-05, "loss": 0.3653, "step": 3856 }, { "epoch": 0.29190672153635117, "grad_norm": 1.0234375, "learning_rate": 1.8989436726365006e-05, "loss": 0.3423, "step": 3857 }, { "epoch": 0.2919824038597985, "grad_norm": 0.796875, "learning_rate": 1.898891489304511e-05, "loss": 0.3128, "step": 3858 }, { "epoch": 0.2920580861832458, "grad_norm": 0.82421875, "learning_rate": 1.8988392932202036e-05, "loss": 0.3362, "step": 3859 }, { "epoch": 0.29213376850669315, "grad_norm": 0.75, "learning_rate": 1.898787084384318e-05, "loss": 0.2924, "step": 3860 }, { "epoch": 0.2922094508301405, "grad_norm": 0.859375, "learning_rate": 1.898734862797595e-05, "loss": 0.3688, "step": 3861 }, { "epoch": 0.29228513315358784, "grad_norm": 0.875, "learning_rate": 1.898682628460776e-05, "loss": 0.3697, "step": 3862 }, { "epoch": 0.29236081547703513, "grad_norm": 0.81640625, "learning_rate": 1.8986303813746016e-05, "loss": 0.3288, "step": 3863 }, { "epoch": 0.2924364978004825, "grad_norm": 0.83203125, "learning_rate": 1.8985781215398132e-05, "loss": 0.3543, "step": 3864 }, { "epoch": 0.2925121801239298, "grad_norm": 0.7578125, "learning_rate": 1.898525848957152e-05, "loss": 0.3066, "step": 3865 }, { "epoch": 0.2925878624473771, "grad_norm": 0.8671875, "learning_rate": 1.89847356362736e-05, "loss": 0.3757, "step": 3866 }, { "epoch": 0.29266354477082446, "grad_norm": 0.89453125, "learning_rate": 1.8984212655511784e-05, "loss": 0.3726, "step": 3867 }, { "epoch": 0.2927392270942718, "grad_norm": 0.78515625, "learning_rate": 1.8983689547293496e-05, "loss": 0.3078, "step": 3868 }, { "epoch": 0.29281490941771915, "grad_norm": 0.83203125, "learning_rate": 1.898316631162615e-05, "loss": 0.3376, "step": 3869 }, { "epoch": 0.29289059174116644, "grad_norm": 0.90234375, "learning_rate": 1.898264294851718e-05, "loss": 0.3798, "step": 3870 }, { "epoch": 0.2929662740646138, "grad_norm": 0.87109375, "learning_rate": 1.8982119457974005e-05, "loss": 0.3507, "step": 3871 }, { "epoch": 0.29304195638806113, "grad_norm": 1.65625, "learning_rate": 1.898159584000405e-05, "loss": 0.4661, "step": 3872 }, { "epoch": 0.2931176387115084, "grad_norm": 0.875, "learning_rate": 1.8981072094614743e-05, "loss": 0.3378, "step": 3873 }, { "epoch": 0.29319332103495577, "grad_norm": 5.25, "learning_rate": 1.8980548221813518e-05, "loss": 0.482, "step": 3874 }, { "epoch": 0.2932690033584031, "grad_norm": 0.8203125, "learning_rate": 1.8980024221607807e-05, "loss": 0.3407, "step": 3875 }, { "epoch": 0.29334468568185046, "grad_norm": 0.84375, "learning_rate": 1.8979500094005038e-05, "loss": 0.3347, "step": 3876 }, { "epoch": 0.29342036800529775, "grad_norm": 0.80859375, "learning_rate": 1.8978975839012657e-05, "loss": 0.3455, "step": 3877 }, { "epoch": 0.2934960503287451, "grad_norm": 0.88671875, "learning_rate": 1.8978451456638088e-05, "loss": 0.3677, "step": 3878 }, { "epoch": 0.29357173265219244, "grad_norm": 0.8359375, "learning_rate": 1.8977926946888785e-05, "loss": 0.3323, "step": 3879 }, { "epoch": 0.29364741497563973, "grad_norm": 0.88671875, "learning_rate": 1.8977402309772178e-05, "loss": 0.372, "step": 3880 }, { "epoch": 0.2937230972990871, "grad_norm": 0.890625, "learning_rate": 1.8976877545295713e-05, "loss": 0.3552, "step": 3881 }, { "epoch": 0.2937987796225344, "grad_norm": 0.81640625, "learning_rate": 1.8976352653466836e-05, "loss": 0.3012, "step": 3882 }, { "epoch": 0.29387446194598177, "grad_norm": 0.84375, "learning_rate": 1.897582763429299e-05, "loss": 0.308, "step": 3883 }, { "epoch": 0.29395014426942906, "grad_norm": 0.7890625, "learning_rate": 1.897530248778163e-05, "loss": 0.2992, "step": 3884 }, { "epoch": 0.2940258265928764, "grad_norm": 0.859375, "learning_rate": 1.8974777213940197e-05, "loss": 0.3271, "step": 3885 }, { "epoch": 0.29410150891632375, "grad_norm": 0.84375, "learning_rate": 1.8974251812776155e-05, "loss": 0.312, "step": 3886 }, { "epoch": 0.29417719123977104, "grad_norm": 0.8828125, "learning_rate": 1.8973726284296945e-05, "loss": 0.3641, "step": 3887 }, { "epoch": 0.2942528735632184, "grad_norm": 0.94140625, "learning_rate": 1.8973200628510027e-05, "loss": 0.3437, "step": 3888 }, { "epoch": 0.29432855588666573, "grad_norm": 0.81640625, "learning_rate": 1.8972674845422867e-05, "loss": 0.3247, "step": 3889 }, { "epoch": 0.294404238210113, "grad_norm": 0.82421875, "learning_rate": 1.8972148935042912e-05, "loss": 0.3497, "step": 3890 }, { "epoch": 0.29447992053356037, "grad_norm": 3.5625, "learning_rate": 1.8971622897377628e-05, "loss": 0.4467, "step": 3891 }, { "epoch": 0.2945556028570077, "grad_norm": 0.87109375, "learning_rate": 1.8971096732434476e-05, "loss": 0.3806, "step": 3892 }, { "epoch": 0.29463128518045506, "grad_norm": 0.81640625, "learning_rate": 1.8970570440220924e-05, "loss": 0.3135, "step": 3893 }, { "epoch": 0.29470696750390235, "grad_norm": 0.86328125, "learning_rate": 1.8970044020744437e-05, "loss": 0.3614, "step": 3894 }, { "epoch": 0.2947826498273497, "grad_norm": 0.9140625, "learning_rate": 1.896951747401248e-05, "loss": 0.3419, "step": 3895 }, { "epoch": 0.29485833215079704, "grad_norm": 0.83984375, "learning_rate": 1.8968990800032526e-05, "loss": 0.3189, "step": 3896 }, { "epoch": 0.29493401447424433, "grad_norm": 1.203125, "learning_rate": 1.8968463998812048e-05, "loss": 0.3439, "step": 3897 }, { "epoch": 0.2950096967976917, "grad_norm": 0.7578125, "learning_rate": 1.8967937070358514e-05, "loss": 0.3043, "step": 3898 }, { "epoch": 0.295085379121139, "grad_norm": 0.83203125, "learning_rate": 1.8967410014679408e-05, "loss": 0.3421, "step": 3899 }, { "epoch": 0.29516106144458637, "grad_norm": 0.78125, "learning_rate": 1.8966882831782197e-05, "loss": 0.3112, "step": 3900 }, { "epoch": 0.29523674376803366, "grad_norm": 0.82421875, "learning_rate": 1.896635552167437e-05, "loss": 0.3391, "step": 3901 }, { "epoch": 0.295312426091481, "grad_norm": 0.89453125, "learning_rate": 1.89658280843634e-05, "loss": 0.3474, "step": 3902 }, { "epoch": 0.29538810841492835, "grad_norm": 0.80859375, "learning_rate": 1.8965300519856774e-05, "loss": 0.3405, "step": 3903 }, { "epoch": 0.29546379073837564, "grad_norm": 0.828125, "learning_rate": 1.8964772828161977e-05, "loss": 0.3511, "step": 3904 }, { "epoch": 0.295539473061823, "grad_norm": 0.796875, "learning_rate": 1.896424500928649e-05, "loss": 0.3128, "step": 3905 }, { "epoch": 0.29561515538527033, "grad_norm": 0.83203125, "learning_rate": 1.8963717063237805e-05, "loss": 0.3401, "step": 3906 }, { "epoch": 0.2956908377087177, "grad_norm": 0.875, "learning_rate": 1.8963188990023414e-05, "loss": 0.3108, "step": 3907 }, { "epoch": 0.29576652003216497, "grad_norm": 1.7265625, "learning_rate": 1.8962660789650803e-05, "loss": 0.3899, "step": 3908 }, { "epoch": 0.2958422023556123, "grad_norm": 0.80859375, "learning_rate": 1.8962132462127468e-05, "loss": 0.339, "step": 3909 }, { "epoch": 0.29591788467905966, "grad_norm": 0.7578125, "learning_rate": 1.896160400746091e-05, "loss": 0.2808, "step": 3910 }, { "epoch": 0.29599356700250695, "grad_norm": 0.80078125, "learning_rate": 1.8961075425658612e-05, "loss": 0.3194, "step": 3911 }, { "epoch": 0.2960692493259543, "grad_norm": 0.85546875, "learning_rate": 1.8960546716728087e-05, "loss": 0.3645, "step": 3912 }, { "epoch": 0.29614493164940164, "grad_norm": 0.75, "learning_rate": 1.896001788067683e-05, "loss": 0.304, "step": 3913 }, { "epoch": 0.296220613972849, "grad_norm": 0.8125, "learning_rate": 1.895948891751234e-05, "loss": 0.3393, "step": 3914 }, { "epoch": 0.2962962962962963, "grad_norm": 0.875, "learning_rate": 1.8958959827242128e-05, "loss": 0.4112, "step": 3915 }, { "epoch": 0.2963719786197436, "grad_norm": 0.8125, "learning_rate": 1.8958430609873693e-05, "loss": 0.3345, "step": 3916 }, { "epoch": 0.29644766094319097, "grad_norm": 0.75390625, "learning_rate": 1.895790126541455e-05, "loss": 0.2701, "step": 3917 }, { "epoch": 0.29652334326663826, "grad_norm": 0.76171875, "learning_rate": 1.8957371793872204e-05, "loss": 0.2916, "step": 3918 }, { "epoch": 0.2965990255900856, "grad_norm": 0.7734375, "learning_rate": 1.895684219525417e-05, "loss": 0.3117, "step": 3919 }, { "epoch": 0.29667470791353295, "grad_norm": 0.76171875, "learning_rate": 1.8956312469567954e-05, "loss": 0.3064, "step": 3920 }, { "epoch": 0.2967503902369803, "grad_norm": 0.8359375, "learning_rate": 1.895578261682108e-05, "loss": 0.3477, "step": 3921 }, { "epoch": 0.2968260725604276, "grad_norm": 0.77734375, "learning_rate": 1.895525263702106e-05, "loss": 0.3289, "step": 3922 }, { "epoch": 0.29690175488387494, "grad_norm": 0.8203125, "learning_rate": 1.8954722530175415e-05, "loss": 0.3188, "step": 3923 }, { "epoch": 0.2969774372073223, "grad_norm": 1.8046875, "learning_rate": 1.895419229629166e-05, "loss": 0.4155, "step": 3924 }, { "epoch": 0.29705311953076957, "grad_norm": 0.890625, "learning_rate": 1.8953661935377326e-05, "loss": 0.3308, "step": 3925 }, { "epoch": 0.2971288018542169, "grad_norm": 0.82421875, "learning_rate": 1.895313144743993e-05, "loss": 0.3288, "step": 3926 }, { "epoch": 0.29720448417766426, "grad_norm": 1.2890625, "learning_rate": 1.8952600832487e-05, "loss": 0.3987, "step": 3927 }, { "epoch": 0.2972801665011116, "grad_norm": 0.8828125, "learning_rate": 1.8952070090526066e-05, "loss": 0.3222, "step": 3928 }, { "epoch": 0.2973558488245589, "grad_norm": 0.859375, "learning_rate": 1.8951539221564652e-05, "loss": 0.3758, "step": 3929 }, { "epoch": 0.29743153114800625, "grad_norm": 0.8828125, "learning_rate": 1.8951008225610296e-05, "loss": 0.3917, "step": 3930 }, { "epoch": 0.2975072134714536, "grad_norm": 0.9453125, "learning_rate": 1.895047710267053e-05, "loss": 0.3801, "step": 3931 }, { "epoch": 0.2975828957949009, "grad_norm": 0.796875, "learning_rate": 1.894994585275288e-05, "loss": 0.3057, "step": 3932 }, { "epoch": 0.2976585781183482, "grad_norm": 0.796875, "learning_rate": 1.8949414475864892e-05, "loss": 0.2711, "step": 3933 }, { "epoch": 0.2977342604417956, "grad_norm": 0.859375, "learning_rate": 1.8948882972014104e-05, "loss": 0.3606, "step": 3934 }, { "epoch": 0.2978099427652429, "grad_norm": 0.86328125, "learning_rate": 1.8948351341208052e-05, "loss": 0.3894, "step": 3935 }, { "epoch": 0.2978856250886902, "grad_norm": 0.7578125, "learning_rate": 1.8947819583454282e-05, "loss": 0.3105, "step": 3936 }, { "epoch": 0.29796130741213755, "grad_norm": 0.796875, "learning_rate": 1.8947287698760336e-05, "loss": 0.3093, "step": 3937 }, { "epoch": 0.2980369897355849, "grad_norm": 0.83203125, "learning_rate": 1.894675568713376e-05, "loss": 0.3428, "step": 3938 }, { "epoch": 0.2981126720590322, "grad_norm": 0.828125, "learning_rate": 1.8946223548582102e-05, "loss": 0.3457, "step": 3939 }, { "epoch": 0.29818835438247954, "grad_norm": 0.9375, "learning_rate": 1.8945691283112908e-05, "loss": 0.3852, "step": 3940 }, { "epoch": 0.2982640367059269, "grad_norm": 0.80859375, "learning_rate": 1.8945158890733733e-05, "loss": 0.2854, "step": 3941 }, { "epoch": 0.29833971902937423, "grad_norm": 2.109375, "learning_rate": 1.894462637145213e-05, "loss": 0.4656, "step": 3942 }, { "epoch": 0.2984154013528215, "grad_norm": 0.80859375, "learning_rate": 1.8944093725275655e-05, "loss": 0.3449, "step": 3943 }, { "epoch": 0.29849108367626886, "grad_norm": 0.83203125, "learning_rate": 1.8943560952211857e-05, "loss": 0.326, "step": 3944 }, { "epoch": 0.2985667659997162, "grad_norm": 0.8125, "learning_rate": 1.89430280522683e-05, "loss": 0.3351, "step": 3945 }, { "epoch": 0.2986424483231635, "grad_norm": 0.8515625, "learning_rate": 1.8942495025452544e-05, "loss": 0.3681, "step": 3946 }, { "epoch": 0.29871813064661085, "grad_norm": 0.78515625, "learning_rate": 1.8941961871772153e-05, "loss": 0.3319, "step": 3947 }, { "epoch": 0.2987938129700582, "grad_norm": 0.8046875, "learning_rate": 1.8941428591234684e-05, "loss": 0.3126, "step": 3948 }, { "epoch": 0.29886949529350554, "grad_norm": 0.8046875, "learning_rate": 1.8940895183847708e-05, "loss": 0.3447, "step": 3949 }, { "epoch": 0.29894517761695283, "grad_norm": 0.87109375, "learning_rate": 1.894036164961879e-05, "loss": 0.3521, "step": 3950 }, { "epoch": 0.2990208599404002, "grad_norm": 0.87109375, "learning_rate": 1.8939827988555502e-05, "loss": 0.3735, "step": 3951 }, { "epoch": 0.2990965422638475, "grad_norm": 0.83984375, "learning_rate": 1.893929420066541e-05, "loss": 0.3443, "step": 3952 }, { "epoch": 0.2991722245872948, "grad_norm": 0.859375, "learning_rate": 1.893876028595609e-05, "loss": 0.3736, "step": 3953 }, { "epoch": 0.29924790691074216, "grad_norm": 0.80859375, "learning_rate": 1.8938226244435117e-05, "loss": 0.3441, "step": 3954 }, { "epoch": 0.2993235892341895, "grad_norm": 0.8203125, "learning_rate": 1.8937692076110066e-05, "loss": 0.3536, "step": 3955 }, { "epoch": 0.2993992715576368, "grad_norm": 0.80078125, "learning_rate": 1.8937157780988512e-05, "loss": 0.3488, "step": 3956 }, { "epoch": 0.29947495388108414, "grad_norm": 0.79296875, "learning_rate": 1.8936623359078043e-05, "loss": 0.3194, "step": 3957 }, { "epoch": 0.2995506362045315, "grad_norm": 0.80078125, "learning_rate": 1.8936088810386233e-05, "loss": 0.3457, "step": 3958 }, { "epoch": 0.29962631852797883, "grad_norm": 0.8828125, "learning_rate": 1.893555413492067e-05, "loss": 0.3797, "step": 3959 }, { "epoch": 0.2997020008514261, "grad_norm": 0.765625, "learning_rate": 1.8935019332688937e-05, "loss": 0.3103, "step": 3960 }, { "epoch": 0.29977768317487347, "grad_norm": 0.8125, "learning_rate": 1.893448440369862e-05, "loss": 0.335, "step": 3961 }, { "epoch": 0.2998533654983208, "grad_norm": 0.8046875, "learning_rate": 1.893394934795731e-05, "loss": 0.3591, "step": 3962 }, { "epoch": 0.2999290478217681, "grad_norm": 0.84765625, "learning_rate": 1.8933414165472598e-05, "loss": 0.3119, "step": 3963 }, { "epoch": 0.30000473014521545, "grad_norm": 0.8125, "learning_rate": 1.893287885625208e-05, "loss": 0.3603, "step": 3964 }, { "epoch": 0.3000804124686628, "grad_norm": 1.546875, "learning_rate": 1.893234342030334e-05, "loss": 0.4595, "step": 3965 }, { "epoch": 0.30015609479211014, "grad_norm": 0.859375, "learning_rate": 1.8931807857633984e-05, "loss": 0.352, "step": 3966 }, { "epoch": 0.30015609479211014, "eval_loss": 0.3503101170063019, "eval_runtime": 83.3888, "eval_samples_per_second": 58.305, "eval_steps_per_second": 58.305, "step": 3966 }, { "epoch": 0.30023177711555743, "grad_norm": 0.8359375, "learning_rate": 1.8931272168251605e-05, "loss": 0.3716, "step": 3967 }, { "epoch": 0.3003074594390048, "grad_norm": 0.73046875, "learning_rate": 1.8930736352163804e-05, "loss": 0.279, "step": 3968 }, { "epoch": 0.3003831417624521, "grad_norm": 1.203125, "learning_rate": 1.893020040937818e-05, "loss": 0.3315, "step": 3969 }, { "epoch": 0.3004588240858994, "grad_norm": 0.74609375, "learning_rate": 1.8929664339902342e-05, "loss": 0.2718, "step": 3970 }, { "epoch": 0.30053450640934676, "grad_norm": 0.80078125, "learning_rate": 1.8929128143743888e-05, "loss": 0.3162, "step": 3971 }, { "epoch": 0.3006101887327941, "grad_norm": 0.8046875, "learning_rate": 1.892859182091043e-05, "loss": 0.3402, "step": 3972 }, { "epoch": 0.30068587105624145, "grad_norm": 0.79296875, "learning_rate": 1.8928055371409574e-05, "loss": 0.3395, "step": 3973 }, { "epoch": 0.30076155337968874, "grad_norm": 0.80078125, "learning_rate": 1.892751879524893e-05, "loss": 0.3372, "step": 3974 }, { "epoch": 0.3008372357031361, "grad_norm": 0.859375, "learning_rate": 1.8926982092436117e-05, "loss": 0.3343, "step": 3975 }, { "epoch": 0.30091291802658343, "grad_norm": 0.76953125, "learning_rate": 1.892644526297874e-05, "loss": 0.3057, "step": 3976 }, { "epoch": 0.3009886003500307, "grad_norm": 1.4765625, "learning_rate": 1.892590830688442e-05, "loss": 0.4119, "step": 3977 }, { "epoch": 0.30106428267347807, "grad_norm": 0.80078125, "learning_rate": 1.8925371224160772e-05, "loss": 0.3231, "step": 3978 }, { "epoch": 0.3011399649969254, "grad_norm": 0.8125, "learning_rate": 1.8924834014815418e-05, "loss": 0.3355, "step": 3979 }, { "epoch": 0.30121564732037276, "grad_norm": 0.87109375, "learning_rate": 1.892429667885598e-05, "loss": 0.3751, "step": 3980 }, { "epoch": 0.30129132964382005, "grad_norm": 0.828125, "learning_rate": 1.8923759216290074e-05, "loss": 0.3326, "step": 3981 }, { "epoch": 0.3013670119672674, "grad_norm": 1.1796875, "learning_rate": 1.8923221627125335e-05, "loss": 0.3752, "step": 3982 }, { "epoch": 0.30144269429071474, "grad_norm": 0.765625, "learning_rate": 1.892268391136938e-05, "loss": 0.3193, "step": 3983 }, { "epoch": 0.30151837661416203, "grad_norm": 0.82421875, "learning_rate": 1.8922146069029843e-05, "loss": 0.3263, "step": 3984 }, { "epoch": 0.3015940589376094, "grad_norm": 0.8359375, "learning_rate": 1.8921608100114355e-05, "loss": 0.3586, "step": 3985 }, { "epoch": 0.3016697412610567, "grad_norm": 0.84765625, "learning_rate": 1.8921070004630545e-05, "loss": 0.3396, "step": 3986 }, { "epoch": 0.30174542358450407, "grad_norm": 0.84765625, "learning_rate": 1.8920531782586047e-05, "loss": 0.3364, "step": 3987 }, { "epoch": 0.30182110590795136, "grad_norm": 0.83984375, "learning_rate": 1.8919993433988497e-05, "loss": 0.3498, "step": 3988 }, { "epoch": 0.3018967882313987, "grad_norm": 0.8671875, "learning_rate": 1.8919454958845533e-05, "loss": 0.3536, "step": 3989 }, { "epoch": 0.30197247055484605, "grad_norm": 1.3046875, "learning_rate": 1.8918916357164795e-05, "loss": 0.3473, "step": 3990 }, { "epoch": 0.30204815287829334, "grad_norm": 0.81640625, "learning_rate": 1.8918377628953926e-05, "loss": 0.3301, "step": 3991 }, { "epoch": 0.3021238352017407, "grad_norm": 0.8828125, "learning_rate": 1.8917838774220563e-05, "loss": 0.3736, "step": 3992 }, { "epoch": 0.30219951752518803, "grad_norm": 0.7734375, "learning_rate": 1.8917299792972353e-05, "loss": 0.2892, "step": 3993 }, { "epoch": 0.3022751998486354, "grad_norm": 0.80859375, "learning_rate": 1.891676068521694e-05, "loss": 0.3484, "step": 3994 }, { "epoch": 0.30235088217208267, "grad_norm": 0.75, "learning_rate": 1.8916221450961982e-05, "loss": 0.2921, "step": 3995 }, { "epoch": 0.30242656449553, "grad_norm": 1.6953125, "learning_rate": 1.8915682090215118e-05, "loss": 0.2767, "step": 3996 }, { "epoch": 0.30250224681897736, "grad_norm": 0.8671875, "learning_rate": 1.8915142602984e-05, "loss": 0.3153, "step": 3997 }, { "epoch": 0.30257792914242465, "grad_norm": 0.80078125, "learning_rate": 1.8914602989276294e-05, "loss": 0.3272, "step": 3998 }, { "epoch": 0.302653611465872, "grad_norm": 0.73046875, "learning_rate": 1.891406324909964e-05, "loss": 0.2781, "step": 3999 }, { "epoch": 0.30272929378931934, "grad_norm": 0.9140625, "learning_rate": 1.8913523382461706e-05, "loss": 0.3302, "step": 4000 }, { "epoch": 0.3028049761127667, "grad_norm": 0.77734375, "learning_rate": 1.8912983389370143e-05, "loss": 0.2961, "step": 4001 }, { "epoch": 0.302880658436214, "grad_norm": 0.75, "learning_rate": 1.8912443269832618e-05, "loss": 0.2739, "step": 4002 }, { "epoch": 0.3029563407596613, "grad_norm": 0.84765625, "learning_rate": 1.8911903023856787e-05, "loss": 0.3339, "step": 4003 }, { "epoch": 0.30303202308310867, "grad_norm": 0.76171875, "learning_rate": 1.8911362651450322e-05, "loss": 0.2885, "step": 4004 }, { "epoch": 0.30310770540655596, "grad_norm": 0.75390625, "learning_rate": 1.8910822152620885e-05, "loss": 0.2829, "step": 4005 }, { "epoch": 0.3031833877300033, "grad_norm": 0.8671875, "learning_rate": 1.8910281527376146e-05, "loss": 0.3739, "step": 4006 }, { "epoch": 0.30325907005345065, "grad_norm": 0.8125, "learning_rate": 1.890974077572377e-05, "loss": 0.3079, "step": 4007 }, { "epoch": 0.303334752376898, "grad_norm": 0.78125, "learning_rate": 1.8909199897671432e-05, "loss": 0.2969, "step": 4008 }, { "epoch": 0.3034104347003453, "grad_norm": 12.75, "learning_rate": 1.8908658893226803e-05, "loss": 0.3578, "step": 4009 }, { "epoch": 0.30348611702379263, "grad_norm": 0.80078125, "learning_rate": 1.8908117762397564e-05, "loss": 0.3435, "step": 4010 }, { "epoch": 0.30356179934724, "grad_norm": 0.94921875, "learning_rate": 1.8907576505191384e-05, "loss": 0.3608, "step": 4011 }, { "epoch": 0.30363748167068727, "grad_norm": 0.8125, "learning_rate": 1.8907035121615946e-05, "loss": 0.3297, "step": 4012 }, { "epoch": 0.3037131639941346, "grad_norm": 0.80078125, "learning_rate": 1.8906493611678927e-05, "loss": 0.3158, "step": 4013 }, { "epoch": 0.30378884631758196, "grad_norm": 0.8125, "learning_rate": 1.8905951975388018e-05, "loss": 0.3156, "step": 4014 }, { "epoch": 0.30386452864102925, "grad_norm": 0.87109375, "learning_rate": 1.8905410212750896e-05, "loss": 0.3932, "step": 4015 }, { "epoch": 0.3039402109644766, "grad_norm": 1.2890625, "learning_rate": 1.8904868323775246e-05, "loss": 0.3782, "step": 4016 }, { "epoch": 0.30401589328792394, "grad_norm": 0.8359375, "learning_rate": 1.8904326308468758e-05, "loss": 0.3099, "step": 4017 }, { "epoch": 0.3040915756113713, "grad_norm": 0.80078125, "learning_rate": 1.890378416683912e-05, "loss": 0.3005, "step": 4018 }, { "epoch": 0.3041672579348186, "grad_norm": 0.90234375, "learning_rate": 1.8903241898894028e-05, "loss": 0.3806, "step": 4019 }, { "epoch": 0.3042429402582659, "grad_norm": 0.8359375, "learning_rate": 1.8902699504641168e-05, "loss": 0.3498, "step": 4020 }, { "epoch": 0.30431862258171327, "grad_norm": 0.80859375, "learning_rate": 1.8902156984088238e-05, "loss": 0.3508, "step": 4021 }, { "epoch": 0.30439430490516056, "grad_norm": 0.78125, "learning_rate": 1.8901614337242936e-05, "loss": 0.2962, "step": 4022 }, { "epoch": 0.3044699872286079, "grad_norm": 0.8515625, "learning_rate": 1.8901071564112957e-05, "loss": 0.3563, "step": 4023 }, { "epoch": 0.30454566955205525, "grad_norm": 0.85546875, "learning_rate": 1.8900528664706e-05, "loss": 0.3727, "step": 4024 }, { "epoch": 0.3046213518755026, "grad_norm": 0.7421875, "learning_rate": 1.8899985639029778e-05, "loss": 0.3, "step": 4025 }, { "epoch": 0.3046970341989499, "grad_norm": 0.7890625, "learning_rate": 1.889944248709198e-05, "loss": 0.328, "step": 4026 }, { "epoch": 0.30477271652239724, "grad_norm": 0.8828125, "learning_rate": 1.8898899208900324e-05, "loss": 0.3683, "step": 4027 }, { "epoch": 0.3048483988458446, "grad_norm": 0.88671875, "learning_rate": 1.8898355804462507e-05, "loss": 0.4048, "step": 4028 }, { "epoch": 0.30492408116929187, "grad_norm": 0.80859375, "learning_rate": 1.8897812273786246e-05, "loss": 0.3273, "step": 4029 }, { "epoch": 0.3049997634927392, "grad_norm": 0.8203125, "learning_rate": 1.8897268616879247e-05, "loss": 0.3257, "step": 4030 }, { "epoch": 0.30507544581618656, "grad_norm": 0.80078125, "learning_rate": 1.8896724833749225e-05, "loss": 0.3126, "step": 4031 }, { "epoch": 0.3051511281396339, "grad_norm": 0.76953125, "learning_rate": 1.8896180924403894e-05, "loss": 0.3209, "step": 4032 }, { "epoch": 0.3052268104630812, "grad_norm": 0.87109375, "learning_rate": 1.889563688885097e-05, "loss": 0.3687, "step": 4033 }, { "epoch": 0.30530249278652855, "grad_norm": 0.80078125, "learning_rate": 1.889509272709817e-05, "loss": 0.2909, "step": 4034 }, { "epoch": 0.3053781751099759, "grad_norm": 0.8359375, "learning_rate": 1.8894548439153218e-05, "loss": 0.3546, "step": 4035 }, { "epoch": 0.3054538574334232, "grad_norm": 1.140625, "learning_rate": 1.889400402502383e-05, "loss": 0.3132, "step": 4036 }, { "epoch": 0.3055295397568705, "grad_norm": 0.953125, "learning_rate": 1.8893459484717734e-05, "loss": 0.3803, "step": 4037 }, { "epoch": 0.3056052220803179, "grad_norm": 0.8125, "learning_rate": 1.889291481824265e-05, "loss": 0.3485, "step": 4038 }, { "epoch": 0.3056809044037652, "grad_norm": 0.84765625, "learning_rate": 1.8892370025606314e-05, "loss": 0.3755, "step": 4039 }, { "epoch": 0.3057565867272125, "grad_norm": 1.0390625, "learning_rate": 1.8891825106816444e-05, "loss": 0.3062, "step": 4040 }, { "epoch": 0.30583226905065986, "grad_norm": 0.83203125, "learning_rate": 1.889128006188078e-05, "loss": 0.3274, "step": 4041 }, { "epoch": 0.3059079513741072, "grad_norm": 0.81640625, "learning_rate": 1.8890734890807053e-05, "loss": 0.3279, "step": 4042 }, { "epoch": 0.3059836336975545, "grad_norm": 0.80859375, "learning_rate": 1.889018959360299e-05, "loss": 0.3575, "step": 4043 }, { "epoch": 0.30605931602100184, "grad_norm": 0.796875, "learning_rate": 1.888964417027633e-05, "loss": 0.3244, "step": 4044 }, { "epoch": 0.3061349983444492, "grad_norm": 0.81640625, "learning_rate": 1.8889098620834814e-05, "loss": 0.3316, "step": 4045 }, { "epoch": 0.30621068066789653, "grad_norm": 0.80859375, "learning_rate": 1.888855294528618e-05, "loss": 0.3278, "step": 4046 }, { "epoch": 0.3062863629913438, "grad_norm": 0.984375, "learning_rate": 1.8888007143638172e-05, "loss": 0.3334, "step": 4047 }, { "epoch": 0.30636204531479116, "grad_norm": 0.83203125, "learning_rate": 1.888746121589853e-05, "loss": 0.3028, "step": 4048 }, { "epoch": 0.3064377276382385, "grad_norm": 0.80078125, "learning_rate": 1.8886915162074996e-05, "loss": 0.3293, "step": 4049 }, { "epoch": 0.3065134099616858, "grad_norm": 1.71875, "learning_rate": 1.888636898217532e-05, "loss": 0.4204, "step": 4050 }, { "epoch": 0.30658909228513315, "grad_norm": 0.8515625, "learning_rate": 1.8885822676207254e-05, "loss": 0.3201, "step": 4051 }, { "epoch": 0.3066647746085805, "grad_norm": 0.921875, "learning_rate": 1.888527624417854e-05, "loss": 0.3789, "step": 4052 }, { "epoch": 0.30674045693202784, "grad_norm": 0.86328125, "learning_rate": 1.8884729686096936e-05, "loss": 0.3459, "step": 4053 }, { "epoch": 0.30681613925547513, "grad_norm": 0.83984375, "learning_rate": 1.8884183001970197e-05, "loss": 0.349, "step": 4054 }, { "epoch": 0.3068918215789225, "grad_norm": 0.7890625, "learning_rate": 1.8883636191806073e-05, "loss": 0.3415, "step": 4055 }, { "epoch": 0.3069675039023698, "grad_norm": 0.81640625, "learning_rate": 1.888308925561233e-05, "loss": 0.3315, "step": 4056 }, { "epoch": 0.3070431862258171, "grad_norm": 0.87890625, "learning_rate": 1.8882542193396716e-05, "loss": 0.3599, "step": 4057 }, { "epoch": 0.30711886854926446, "grad_norm": 0.8359375, "learning_rate": 1.8881995005167005e-05, "loss": 0.3387, "step": 4058 }, { "epoch": 0.3071945508727118, "grad_norm": 0.90234375, "learning_rate": 1.888144769093095e-05, "loss": 0.3891, "step": 4059 }, { "epoch": 0.30727023319615915, "grad_norm": 0.875, "learning_rate": 1.8880900250696315e-05, "loss": 0.3216, "step": 4060 }, { "epoch": 0.30734591551960644, "grad_norm": 0.84765625, "learning_rate": 1.8880352684470874e-05, "loss": 0.3178, "step": 4061 }, { "epoch": 0.3074215978430538, "grad_norm": 0.78515625, "learning_rate": 1.8879804992262387e-05, "loss": 0.2919, "step": 4062 }, { "epoch": 0.30749728016650113, "grad_norm": 0.8046875, "learning_rate": 1.887925717407863e-05, "loss": 0.3504, "step": 4063 }, { "epoch": 0.3075729624899484, "grad_norm": 0.73828125, "learning_rate": 1.8878709229927372e-05, "loss": 0.2839, "step": 4064 }, { "epoch": 0.30764864481339577, "grad_norm": 0.82421875, "learning_rate": 1.8878161159816386e-05, "loss": 0.3249, "step": 4065 }, { "epoch": 0.3077243271368431, "grad_norm": 0.859375, "learning_rate": 1.887761296375345e-05, "loss": 0.3532, "step": 4066 }, { "epoch": 0.30780000946029046, "grad_norm": 0.79296875, "learning_rate": 1.887706464174634e-05, "loss": 0.3295, "step": 4067 }, { "epoch": 0.30787569178373775, "grad_norm": 0.8359375, "learning_rate": 1.8876516193802838e-05, "loss": 0.3646, "step": 4068 }, { "epoch": 0.3079513741071851, "grad_norm": 0.76953125, "learning_rate": 1.8875967619930715e-05, "loss": 0.302, "step": 4069 }, { "epoch": 0.30802705643063244, "grad_norm": 0.80078125, "learning_rate": 1.8875418920137764e-05, "loss": 0.3273, "step": 4070 }, { "epoch": 0.30810273875407973, "grad_norm": 0.80078125, "learning_rate": 1.8874870094431762e-05, "loss": 0.3174, "step": 4071 }, { "epoch": 0.3081784210775271, "grad_norm": 0.84375, "learning_rate": 1.8874321142820496e-05, "loss": 0.3375, "step": 4072 }, { "epoch": 0.3082541034009744, "grad_norm": 0.7578125, "learning_rate": 1.8873772065311762e-05, "loss": 0.2792, "step": 4073 }, { "epoch": 0.3083297857244217, "grad_norm": 0.8125, "learning_rate": 1.887322286191334e-05, "loss": 0.335, "step": 4074 }, { "epoch": 0.30840546804786906, "grad_norm": 0.73828125, "learning_rate": 1.8872673532633022e-05, "loss": 0.2883, "step": 4075 }, { "epoch": 0.3084811503713164, "grad_norm": 0.8671875, "learning_rate": 1.887212407747861e-05, "loss": 0.3874, "step": 4076 }, { "epoch": 0.30855683269476375, "grad_norm": 0.86328125, "learning_rate": 1.887157449645789e-05, "loss": 0.3916, "step": 4077 }, { "epoch": 0.30863251501821104, "grad_norm": 0.78515625, "learning_rate": 1.887102478957866e-05, "loss": 0.3258, "step": 4078 }, { "epoch": 0.3087081973416584, "grad_norm": 0.7734375, "learning_rate": 1.887047495684872e-05, "loss": 0.3141, "step": 4079 }, { "epoch": 0.30878387966510573, "grad_norm": 0.8046875, "learning_rate": 1.8869924998275873e-05, "loss": 0.312, "step": 4080 }, { "epoch": 0.308859561988553, "grad_norm": 0.79296875, "learning_rate": 1.8869374913867914e-05, "loss": 0.3178, "step": 4081 }, { "epoch": 0.30893524431200037, "grad_norm": 0.87109375, "learning_rate": 1.8868824703632658e-05, "loss": 0.3436, "step": 4082 }, { "epoch": 0.3090109266354477, "grad_norm": 0.8203125, "learning_rate": 1.88682743675779e-05, "loss": 0.3439, "step": 4083 }, { "epoch": 0.30908660895889506, "grad_norm": 0.8203125, "learning_rate": 1.886772390571145e-05, "loss": 0.317, "step": 4084 }, { "epoch": 0.30916229128234235, "grad_norm": 0.76953125, "learning_rate": 1.8867173318041123e-05, "loss": 0.3096, "step": 4085 }, { "epoch": 0.3092379736057897, "grad_norm": 0.78515625, "learning_rate": 1.8866622604574723e-05, "loss": 0.2912, "step": 4086 }, { "epoch": 0.30931365592923704, "grad_norm": 0.7734375, "learning_rate": 1.886607176532007e-05, "loss": 0.3249, "step": 4087 }, { "epoch": 0.30938933825268433, "grad_norm": 0.77734375, "learning_rate": 1.886552080028497e-05, "loss": 0.2988, "step": 4088 }, { "epoch": 0.3094650205761317, "grad_norm": 0.80078125, "learning_rate": 1.8864969709477248e-05, "loss": 0.3284, "step": 4089 }, { "epoch": 0.309540702899579, "grad_norm": 0.84765625, "learning_rate": 1.8864418492904712e-05, "loss": 0.3808, "step": 4090 }, { "epoch": 0.30961638522302637, "grad_norm": 0.7578125, "learning_rate": 1.886386715057519e-05, "loss": 0.307, "step": 4091 }, { "epoch": 0.30969206754647366, "grad_norm": 0.77734375, "learning_rate": 1.8863315682496503e-05, "loss": 0.3351, "step": 4092 }, { "epoch": 0.309767749869921, "grad_norm": 0.82421875, "learning_rate": 1.8862764088676472e-05, "loss": 0.3428, "step": 4093 }, { "epoch": 0.30984343219336835, "grad_norm": 0.859375, "learning_rate": 1.8862212369122925e-05, "loss": 0.336, "step": 4094 }, { "epoch": 0.30991911451681564, "grad_norm": 0.7734375, "learning_rate": 1.8861660523843684e-05, "loss": 0.3301, "step": 4095 }, { "epoch": 0.309994796840263, "grad_norm": 0.8046875, "learning_rate": 1.8861108552846587e-05, "loss": 0.3227, "step": 4096 }, { "epoch": 0.31007047916371033, "grad_norm": 0.83984375, "learning_rate": 1.8860556456139453e-05, "loss": 0.3054, "step": 4097 }, { "epoch": 0.3101461614871577, "grad_norm": 0.8515625, "learning_rate": 1.8860004233730126e-05, "loss": 0.3796, "step": 4098 }, { "epoch": 0.31022184381060497, "grad_norm": 0.74609375, "learning_rate": 1.885945188562643e-05, "loss": 0.2525, "step": 4099 }, { "epoch": 0.3102975261340523, "grad_norm": 0.77734375, "learning_rate": 1.885889941183621e-05, "loss": 0.2972, "step": 4100 }, { "epoch": 0.31037320845749966, "grad_norm": 11.25, "learning_rate": 1.8858346812367295e-05, "loss": 0.4335, "step": 4101 }, { "epoch": 0.31044889078094695, "grad_norm": 0.75, "learning_rate": 1.885779408722753e-05, "loss": 0.2992, "step": 4102 }, { "epoch": 0.3105245731043943, "grad_norm": 0.82421875, "learning_rate": 1.8857241236424756e-05, "loss": 0.3506, "step": 4103 }, { "epoch": 0.31060025542784164, "grad_norm": 1.6953125, "learning_rate": 1.8856688259966815e-05, "loss": 0.4637, "step": 4104 }, { "epoch": 0.310675937751289, "grad_norm": 0.75, "learning_rate": 1.8856135157861554e-05, "loss": 0.2974, "step": 4105 }, { "epoch": 0.3107516200747363, "grad_norm": 0.90234375, "learning_rate": 1.8855581930116814e-05, "loss": 0.3962, "step": 4106 }, { "epoch": 0.3108273023981836, "grad_norm": 0.84375, "learning_rate": 1.8855028576740453e-05, "loss": 0.3568, "step": 4107 }, { "epoch": 0.31090298472163097, "grad_norm": 0.890625, "learning_rate": 1.8854475097740313e-05, "loss": 0.3384, "step": 4108 }, { "epoch": 0.31097866704507826, "grad_norm": 0.8125, "learning_rate": 1.885392149312425e-05, "loss": 0.3361, "step": 4109 }, { "epoch": 0.3110543493685256, "grad_norm": 0.796875, "learning_rate": 1.8853367762900117e-05, "loss": 0.3362, "step": 4110 }, { "epoch": 0.31113003169197295, "grad_norm": 0.84765625, "learning_rate": 1.8852813907075768e-05, "loss": 0.3621, "step": 4111 }, { "epoch": 0.3112057140154203, "grad_norm": 0.8671875, "learning_rate": 1.8852259925659062e-05, "loss": 0.3909, "step": 4112 }, { "epoch": 0.3112813963388676, "grad_norm": 0.7890625, "learning_rate": 1.885170581865786e-05, "loss": 0.3238, "step": 4113 }, { "epoch": 0.31135707866231493, "grad_norm": 0.85546875, "learning_rate": 1.885115158608002e-05, "loss": 0.3834, "step": 4114 }, { "epoch": 0.3114327609857623, "grad_norm": 0.796875, "learning_rate": 1.8850597227933406e-05, "loss": 0.3231, "step": 4115 }, { "epoch": 0.31150844330920957, "grad_norm": 0.7734375, "learning_rate": 1.8850042744225877e-05, "loss": 0.3128, "step": 4116 }, { "epoch": 0.3115841256326569, "grad_norm": 0.82421875, "learning_rate": 1.8849488134965312e-05, "loss": 0.3249, "step": 4117 }, { "epoch": 0.31165980795610426, "grad_norm": 0.81640625, "learning_rate": 1.884893340015957e-05, "loss": 0.3251, "step": 4118 }, { "epoch": 0.3117354902795516, "grad_norm": 0.87109375, "learning_rate": 1.884837853981652e-05, "loss": 0.3472, "step": 4119 }, { "epoch": 0.3118111726029989, "grad_norm": 3.9375, "learning_rate": 1.8847823553944032e-05, "loss": 0.4659, "step": 4120 }, { "epoch": 0.31188685492644624, "grad_norm": 0.84765625, "learning_rate": 1.884726844254999e-05, "loss": 0.3426, "step": 4121 }, { "epoch": 0.3119625372498936, "grad_norm": 0.81640625, "learning_rate": 1.8846713205642262e-05, "loss": 0.3022, "step": 4122 }, { "epoch": 0.3120382195733409, "grad_norm": 0.8671875, "learning_rate": 1.8846157843228724e-05, "loss": 0.3718, "step": 4123 }, { "epoch": 0.3121139018967882, "grad_norm": 0.84765625, "learning_rate": 1.8845602355317257e-05, "loss": 0.3092, "step": 4124 }, { "epoch": 0.31218958422023557, "grad_norm": 0.87109375, "learning_rate": 1.884504674191574e-05, "loss": 0.4013, "step": 4125 }, { "epoch": 0.3122652665436829, "grad_norm": 1.375, "learning_rate": 1.884449100303206e-05, "loss": 0.3971, "step": 4126 }, { "epoch": 0.3123409488671302, "grad_norm": 0.828125, "learning_rate": 1.8843935138674093e-05, "loss": 0.3542, "step": 4127 }, { "epoch": 0.31241663119057755, "grad_norm": 0.8046875, "learning_rate": 1.8843379148849732e-05, "loss": 0.31, "step": 4128 }, { "epoch": 0.3124923135140249, "grad_norm": 0.83984375, "learning_rate": 1.884282303356686e-05, "loss": 0.3792, "step": 4129 }, { "epoch": 0.3125679958374722, "grad_norm": 0.77734375, "learning_rate": 1.8842266792833374e-05, "loss": 0.29, "step": 4130 }, { "epoch": 0.31264367816091954, "grad_norm": 0.8203125, "learning_rate": 1.8841710426657153e-05, "loss": 0.3188, "step": 4131 }, { "epoch": 0.3127193604843669, "grad_norm": 0.82421875, "learning_rate": 1.8841153935046098e-05, "loss": 0.3348, "step": 4132 }, { "epoch": 0.3127950428078142, "grad_norm": 0.859375, "learning_rate": 1.8840597318008104e-05, "loss": 0.3305, "step": 4133 }, { "epoch": 0.3128707251312615, "grad_norm": 0.83203125, "learning_rate": 1.8840040575551066e-05, "loss": 0.3655, "step": 4134 }, { "epoch": 0.31294640745470886, "grad_norm": 0.85546875, "learning_rate": 1.8839483707682883e-05, "loss": 0.2813, "step": 4135 }, { "epoch": 0.3130220897781562, "grad_norm": 0.85546875, "learning_rate": 1.8838926714411455e-05, "loss": 0.3639, "step": 4136 }, { "epoch": 0.3130977721016035, "grad_norm": 0.89453125, "learning_rate": 1.883836959574468e-05, "loss": 0.3377, "step": 4137 }, { "epoch": 0.31317345442505085, "grad_norm": 0.77734375, "learning_rate": 1.883781235169047e-05, "loss": 0.3135, "step": 4138 }, { "epoch": 0.3132491367484982, "grad_norm": 0.78125, "learning_rate": 1.883725498225672e-05, "loss": 0.3166, "step": 4139 }, { "epoch": 0.3133248190719455, "grad_norm": 0.8203125, "learning_rate": 1.8836697487451348e-05, "loss": 0.3013, "step": 4140 }, { "epoch": 0.3134005013953928, "grad_norm": 2.0, "learning_rate": 1.8836139867282253e-05, "loss": 0.3797, "step": 4141 }, { "epoch": 0.3134761837188402, "grad_norm": 2.484375, "learning_rate": 1.8835582121757355e-05, "loss": 0.3877, "step": 4142 }, { "epoch": 0.3135518660422875, "grad_norm": 1.21875, "learning_rate": 1.883502425088456e-05, "loss": 0.3912, "step": 4143 }, { "epoch": 0.3136275483657348, "grad_norm": 0.875, "learning_rate": 1.8834466254671783e-05, "loss": 0.3353, "step": 4144 }, { "epoch": 0.31370323068918216, "grad_norm": 0.8515625, "learning_rate": 1.8833908133126946e-05, "loss": 0.3519, "step": 4145 }, { "epoch": 0.3137789130126295, "grad_norm": 0.77734375, "learning_rate": 1.8833349886257958e-05, "loss": 0.3072, "step": 4146 }, { "epoch": 0.3138545953360768, "grad_norm": 0.8125, "learning_rate": 1.8832791514072748e-05, "loss": 0.3371, "step": 4147 }, { "epoch": 0.31393027765952414, "grad_norm": 0.859375, "learning_rate": 1.883223301657923e-05, "loss": 0.3487, "step": 4148 }, { "epoch": 0.3140059599829715, "grad_norm": 0.88671875, "learning_rate": 1.883167439378533e-05, "loss": 0.3195, "step": 4149 }, { "epoch": 0.31408164230641883, "grad_norm": 0.83984375, "learning_rate": 1.8831115645698973e-05, "loss": 0.31, "step": 4150 }, { "epoch": 0.3141573246298661, "grad_norm": 2.140625, "learning_rate": 1.8830556772328086e-05, "loss": 0.3845, "step": 4151 }, { "epoch": 0.31423300695331347, "grad_norm": 0.87890625, "learning_rate": 1.8829997773680598e-05, "loss": 0.3153, "step": 4152 }, { "epoch": 0.3143086892767608, "grad_norm": 0.8125, "learning_rate": 1.882943864976444e-05, "loss": 0.3379, "step": 4153 }, { "epoch": 0.3143843716002081, "grad_norm": 0.92578125, "learning_rate": 1.882887940058754e-05, "loss": 0.377, "step": 4154 }, { "epoch": 0.31446005392365545, "grad_norm": 0.890625, "learning_rate": 1.8828320026157836e-05, "loss": 0.3662, "step": 4155 }, { "epoch": 0.3145357362471028, "grad_norm": 0.96875, "learning_rate": 1.8827760526483263e-05, "loss": 0.3782, "step": 4156 }, { "epoch": 0.31461141857055014, "grad_norm": 0.77734375, "learning_rate": 1.8827200901571757e-05, "loss": 0.3109, "step": 4157 }, { "epoch": 0.31468710089399743, "grad_norm": 0.7578125, "learning_rate": 1.882664115143126e-05, "loss": 0.3031, "step": 4158 }, { "epoch": 0.3147627832174448, "grad_norm": 0.8984375, "learning_rate": 1.882608127606971e-05, "loss": 0.3487, "step": 4159 }, { "epoch": 0.3148384655408921, "grad_norm": 0.8125, "learning_rate": 1.8825521275495052e-05, "loss": 0.3345, "step": 4160 }, { "epoch": 0.3149141478643394, "grad_norm": 0.8359375, "learning_rate": 1.882496114971523e-05, "loss": 0.3317, "step": 4161 }, { "epoch": 0.31498983018778676, "grad_norm": 0.8359375, "learning_rate": 1.8824400898738185e-05, "loss": 0.3355, "step": 4162 }, { "epoch": 0.3150655125112341, "grad_norm": 0.77734375, "learning_rate": 1.8823840522571875e-05, "loss": 0.307, "step": 4163 }, { "epoch": 0.31514119483468145, "grad_norm": 0.7890625, "learning_rate": 1.8823280021224245e-05, "loss": 0.3413, "step": 4164 }, { "epoch": 0.31521687715812874, "grad_norm": 2.640625, "learning_rate": 1.8822719394703242e-05, "loss": 0.4683, "step": 4165 }, { "epoch": 0.3152925594815761, "grad_norm": 0.8203125, "learning_rate": 1.882215864301683e-05, "loss": 0.3238, "step": 4166 }, { "epoch": 0.31536824180502343, "grad_norm": 0.76171875, "learning_rate": 1.8821597766172957e-05, "loss": 0.2995, "step": 4167 }, { "epoch": 0.3154439241284707, "grad_norm": 1.0625, "learning_rate": 1.882103676417958e-05, "loss": 0.3594, "step": 4168 }, { "epoch": 0.31551960645191807, "grad_norm": 0.78125, "learning_rate": 1.8820475637044656e-05, "loss": 0.321, "step": 4169 }, { "epoch": 0.3155952887753654, "grad_norm": 0.7734375, "learning_rate": 1.8819914384776155e-05, "loss": 0.2969, "step": 4170 }, { "epoch": 0.31567097109881276, "grad_norm": 0.828125, "learning_rate": 1.8819353007382027e-05, "loss": 0.3239, "step": 4171 }, { "epoch": 0.31574665342226005, "grad_norm": 0.79296875, "learning_rate": 1.8818791504870247e-05, "loss": 0.3041, "step": 4172 }, { "epoch": 0.3158223357457074, "grad_norm": 0.85546875, "learning_rate": 1.8818229877248772e-05, "loss": 0.3529, "step": 4173 }, { "epoch": 0.31589801806915474, "grad_norm": 1.1640625, "learning_rate": 1.8817668124525577e-05, "loss": 0.3524, "step": 4174 }, { "epoch": 0.31597370039260203, "grad_norm": 0.77734375, "learning_rate": 1.8817106246708625e-05, "loss": 0.2878, "step": 4175 }, { "epoch": 0.3160493827160494, "grad_norm": 0.81640625, "learning_rate": 1.8816544243805893e-05, "loss": 0.3314, "step": 4176 }, { "epoch": 0.3161250650394967, "grad_norm": 0.8828125, "learning_rate": 1.881598211582535e-05, "loss": 0.3593, "step": 4177 }, { "epoch": 0.31620074736294407, "grad_norm": 0.80078125, "learning_rate": 1.881541986277497e-05, "loss": 0.3182, "step": 4178 }, { "epoch": 0.31627642968639136, "grad_norm": 0.7734375, "learning_rate": 1.8814857484662733e-05, "loss": 0.2886, "step": 4179 }, { "epoch": 0.3163521120098387, "grad_norm": 0.8671875, "learning_rate": 1.8814294981496616e-05, "loss": 0.3825, "step": 4180 }, { "epoch": 0.31642779433328605, "grad_norm": 0.83984375, "learning_rate": 1.8813732353284598e-05, "loss": 0.3528, "step": 4181 }, { "epoch": 0.31650347665673334, "grad_norm": 0.84765625, "learning_rate": 1.8813169600034664e-05, "loss": 0.3452, "step": 4182 }, { "epoch": 0.3165791589801807, "grad_norm": 0.81640625, "learning_rate": 1.8812606721754792e-05, "loss": 0.3268, "step": 4183 }, { "epoch": 0.31665484130362803, "grad_norm": 0.96484375, "learning_rate": 1.8812043718452973e-05, "loss": 0.4419, "step": 4184 }, { "epoch": 0.3167305236270754, "grad_norm": 0.81640625, "learning_rate": 1.8811480590137192e-05, "loss": 0.3193, "step": 4185 }, { "epoch": 0.31680620595052267, "grad_norm": 0.859375, "learning_rate": 1.8810917336815438e-05, "loss": 0.3798, "step": 4186 }, { "epoch": 0.31688188827397, "grad_norm": 0.74609375, "learning_rate": 1.88103539584957e-05, "loss": 0.2795, "step": 4187 }, { "epoch": 0.31695757059741736, "grad_norm": 0.81640625, "learning_rate": 1.880979045518597e-05, "loss": 0.3386, "step": 4188 }, { "epoch": 0.31703325292086465, "grad_norm": 0.765625, "learning_rate": 1.880922682689425e-05, "loss": 0.2999, "step": 4189 }, { "epoch": 0.317108935244312, "grad_norm": 0.79296875, "learning_rate": 1.880866307362853e-05, "loss": 0.314, "step": 4190 }, { "epoch": 0.31718461756775934, "grad_norm": 0.7890625, "learning_rate": 1.8808099195396804e-05, "loss": 0.3222, "step": 4191 }, { "epoch": 0.3172602998912067, "grad_norm": 1.3828125, "learning_rate": 1.8807535192207075e-05, "loss": 0.4347, "step": 4192 }, { "epoch": 0.317335982214654, "grad_norm": 0.83203125, "learning_rate": 1.8806971064067348e-05, "loss": 0.3596, "step": 4193 }, { "epoch": 0.3174116645381013, "grad_norm": 0.83984375, "learning_rate": 1.8806406810985624e-05, "loss": 0.2854, "step": 4194 }, { "epoch": 0.31748734686154867, "grad_norm": 0.85546875, "learning_rate": 1.8805842432969904e-05, "loss": 0.3364, "step": 4195 }, { "epoch": 0.31756302918499596, "grad_norm": 0.83203125, "learning_rate": 1.8805277930028198e-05, "loss": 0.347, "step": 4196 }, { "epoch": 0.3176387115084433, "grad_norm": 0.84765625, "learning_rate": 1.8804713302168513e-05, "loss": 0.3824, "step": 4197 }, { "epoch": 0.31771439383189065, "grad_norm": 0.8046875, "learning_rate": 1.8804148549398864e-05, "loss": 0.3334, "step": 4198 }, { "epoch": 0.31779007615533794, "grad_norm": 0.84375, "learning_rate": 1.880358367172726e-05, "loss": 0.3216, "step": 4199 }, { "epoch": 0.3178657584787853, "grad_norm": 0.81640625, "learning_rate": 1.8803018669161712e-05, "loss": 0.3707, "step": 4200 }, { "epoch": 0.31794144080223263, "grad_norm": 0.8671875, "learning_rate": 1.8802453541710238e-05, "loss": 0.4154, "step": 4201 }, { "epoch": 0.31801712312568, "grad_norm": 0.81640625, "learning_rate": 1.8801888289380855e-05, "loss": 0.3449, "step": 4202 }, { "epoch": 0.31809280544912727, "grad_norm": 0.83203125, "learning_rate": 1.880132291218158e-05, "loss": 0.3333, "step": 4203 }, { "epoch": 0.3181684877725746, "grad_norm": 0.84765625, "learning_rate": 1.8800757410120438e-05, "loss": 0.3211, "step": 4204 }, { "epoch": 0.31824417009602196, "grad_norm": 0.875, "learning_rate": 1.880019178320545e-05, "loss": 0.3671, "step": 4205 }, { "epoch": 0.31831985241946925, "grad_norm": 0.83984375, "learning_rate": 1.8799626031444638e-05, "loss": 0.3528, "step": 4206 }, { "epoch": 0.3183955347429166, "grad_norm": 0.80078125, "learning_rate": 1.879906015484603e-05, "loss": 0.3133, "step": 4207 }, { "epoch": 0.31847121706636394, "grad_norm": 0.94921875, "learning_rate": 1.8798494153417657e-05, "loss": 0.2837, "step": 4208 }, { "epoch": 0.3185468993898113, "grad_norm": 0.77734375, "learning_rate": 1.8797928027167543e-05, "loss": 0.3078, "step": 4209 }, { "epoch": 0.3186225817132586, "grad_norm": 0.828125, "learning_rate": 1.8797361776103727e-05, "loss": 0.3153, "step": 4210 }, { "epoch": 0.3186982640367059, "grad_norm": 0.87109375, "learning_rate": 1.879679540023423e-05, "loss": 0.3697, "step": 4211 }, { "epoch": 0.31877394636015327, "grad_norm": 0.828125, "learning_rate": 1.87962288995671e-05, "loss": 0.319, "step": 4212 }, { "epoch": 0.31884962868360056, "grad_norm": 0.82421875, "learning_rate": 1.8795662274110365e-05, "loss": 0.344, "step": 4213 }, { "epoch": 0.3189253110070479, "grad_norm": 0.80859375, "learning_rate": 1.879509552387207e-05, "loss": 0.3286, "step": 4214 }, { "epoch": 0.31900099333049525, "grad_norm": 0.7421875, "learning_rate": 1.879452864886025e-05, "loss": 0.2607, "step": 4215 }, { "epoch": 0.3190766756539426, "grad_norm": 0.79296875, "learning_rate": 1.8793961649082947e-05, "loss": 0.344, "step": 4216 }, { "epoch": 0.3191523579773899, "grad_norm": 0.8046875, "learning_rate": 1.879339452454821e-05, "loss": 0.3138, "step": 4217 }, { "epoch": 0.31922804030083723, "grad_norm": 0.86328125, "learning_rate": 1.879282727526408e-05, "loss": 0.3944, "step": 4218 }, { "epoch": 0.3193037226242846, "grad_norm": 1.4921875, "learning_rate": 1.8792259901238608e-05, "loss": 0.4251, "step": 4219 }, { "epoch": 0.31937940494773187, "grad_norm": 0.77734375, "learning_rate": 1.879169240247984e-05, "loss": 0.318, "step": 4220 }, { "epoch": 0.3194550872711792, "grad_norm": 0.7578125, "learning_rate": 1.8791124778995828e-05, "loss": 0.3004, "step": 4221 }, { "epoch": 0.31953076959462656, "grad_norm": 0.7890625, "learning_rate": 1.8790557030794627e-05, "loss": 0.3188, "step": 4222 }, { "epoch": 0.3196064519180739, "grad_norm": 0.8203125, "learning_rate": 1.8789989157884286e-05, "loss": 0.3524, "step": 4223 }, { "epoch": 0.3196821342415212, "grad_norm": 0.85546875, "learning_rate": 1.8789421160272868e-05, "loss": 0.3784, "step": 4224 }, { "epoch": 0.31975781656496854, "grad_norm": 0.79296875, "learning_rate": 1.8788853037968425e-05, "loss": 0.3344, "step": 4225 }, { "epoch": 0.3198334988884159, "grad_norm": 1.09375, "learning_rate": 1.8788284790979025e-05, "loss": 0.376, "step": 4226 }, { "epoch": 0.3199091812118632, "grad_norm": 0.7734375, "learning_rate": 1.8787716419312717e-05, "loss": 0.3117, "step": 4227 }, { "epoch": 0.3199848635353105, "grad_norm": 0.7734375, "learning_rate": 1.8787147922977573e-05, "loss": 0.3078, "step": 4228 }, { "epoch": 0.32006054585875787, "grad_norm": 0.79296875, "learning_rate": 1.878657930198166e-05, "loss": 0.3386, "step": 4229 }, { "epoch": 0.3201362281822052, "grad_norm": 0.81640625, "learning_rate": 1.8786010556333036e-05, "loss": 0.3549, "step": 4230 }, { "epoch": 0.3202119105056525, "grad_norm": 0.77734375, "learning_rate": 1.8785441686039777e-05, "loss": 0.3366, "step": 4231 }, { "epoch": 0.32028759282909985, "grad_norm": 0.83203125, "learning_rate": 1.8784872691109954e-05, "loss": 0.3522, "step": 4232 }, { "epoch": 0.3203632751525472, "grad_norm": 0.89453125, "learning_rate": 1.8784303571551636e-05, "loss": 0.4243, "step": 4233 }, { "epoch": 0.3204389574759945, "grad_norm": 0.76171875, "learning_rate": 1.8783734327372895e-05, "loss": 0.3072, "step": 4234 }, { "epoch": 0.32051463979944184, "grad_norm": 0.80078125, "learning_rate": 1.878316495858181e-05, "loss": 0.3321, "step": 4235 }, { "epoch": 0.3205903221228892, "grad_norm": 0.828125, "learning_rate": 1.878259546518646e-05, "loss": 0.3273, "step": 4236 }, { "epoch": 0.32066600444633653, "grad_norm": 0.86328125, "learning_rate": 1.878202584719492e-05, "loss": 0.3541, "step": 4237 }, { "epoch": 0.3207416867697838, "grad_norm": 0.88671875, "learning_rate": 1.8781456104615273e-05, "loss": 0.3701, "step": 4238 }, { "epoch": 0.32081736909323116, "grad_norm": 0.80859375, "learning_rate": 1.8780886237455597e-05, "loss": 0.3454, "step": 4239 }, { "epoch": 0.3208930514166785, "grad_norm": 0.8984375, "learning_rate": 1.8780316245723988e-05, "loss": 0.4031, "step": 4240 }, { "epoch": 0.3209687337401258, "grad_norm": 0.82421875, "learning_rate": 1.8779746129428523e-05, "loss": 0.3673, "step": 4241 }, { "epoch": 0.32104441606357315, "grad_norm": 0.79296875, "learning_rate": 1.8779175888577294e-05, "loss": 0.3226, "step": 4242 }, { "epoch": 0.3211200983870205, "grad_norm": 0.84765625, "learning_rate": 1.8778605523178388e-05, "loss": 0.3072, "step": 4243 }, { "epoch": 0.32119578071046784, "grad_norm": 1.0, "learning_rate": 1.8778035033239895e-05, "loss": 0.3452, "step": 4244 }, { "epoch": 0.32127146303391513, "grad_norm": 0.98828125, "learning_rate": 1.8777464418769917e-05, "loss": 0.3611, "step": 4245 }, { "epoch": 0.3213471453573625, "grad_norm": 1.328125, "learning_rate": 1.877689367977654e-05, "loss": 0.407, "step": 4246 }, { "epoch": 0.3214228276808098, "grad_norm": 0.80078125, "learning_rate": 1.8776322816267866e-05, "loss": 0.3275, "step": 4247 }, { "epoch": 0.3214985100042571, "grad_norm": 0.83984375, "learning_rate": 1.877575182825199e-05, "loss": 0.3166, "step": 4248 }, { "epoch": 0.32157419232770446, "grad_norm": 0.78125, "learning_rate": 1.8775180715737018e-05, "loss": 0.3157, "step": 4249 }, { "epoch": 0.3216498746511518, "grad_norm": 0.82421875, "learning_rate": 1.8774609478731048e-05, "loss": 0.3243, "step": 4250 }, { "epoch": 0.32172555697459915, "grad_norm": 0.8125, "learning_rate": 1.8774038117242185e-05, "loss": 0.3036, "step": 4251 }, { "epoch": 0.32180123929804644, "grad_norm": 0.82421875, "learning_rate": 1.877346663127853e-05, "loss": 0.3068, "step": 4252 }, { "epoch": 0.3218769216214938, "grad_norm": 0.84375, "learning_rate": 1.87728950208482e-05, "loss": 0.3647, "step": 4253 }, { "epoch": 0.32195260394494113, "grad_norm": 0.7890625, "learning_rate": 1.8772323285959298e-05, "loss": 0.3488, "step": 4254 }, { "epoch": 0.3220282862683884, "grad_norm": 0.859375, "learning_rate": 1.8771751426619936e-05, "loss": 0.3452, "step": 4255 }, { "epoch": 0.32210396859183577, "grad_norm": 0.7734375, "learning_rate": 1.8771179442838227e-05, "loss": 0.3344, "step": 4256 }, { "epoch": 0.3221796509152831, "grad_norm": 0.73828125, "learning_rate": 1.8770607334622285e-05, "loss": 0.2938, "step": 4257 }, { "epoch": 0.3222553332387304, "grad_norm": 0.94140625, "learning_rate": 1.877003510198023e-05, "loss": 0.3574, "step": 4258 }, { "epoch": 0.32233101556217775, "grad_norm": 0.87890625, "learning_rate": 1.8769462744920174e-05, "loss": 0.343, "step": 4259 }, { "epoch": 0.3224066978856251, "grad_norm": 0.84375, "learning_rate": 1.8768890263450244e-05, "loss": 0.3439, "step": 4260 }, { "epoch": 0.32248238020907244, "grad_norm": 0.8515625, "learning_rate": 1.8768317657578553e-05, "loss": 0.3291, "step": 4261 }, { "epoch": 0.32255806253251973, "grad_norm": 0.83984375, "learning_rate": 1.8767744927313233e-05, "loss": 0.3195, "step": 4262 }, { "epoch": 0.3226337448559671, "grad_norm": 0.95703125, "learning_rate": 1.8767172072662402e-05, "loss": 0.4226, "step": 4263 }, { "epoch": 0.3227094271794144, "grad_norm": 0.87109375, "learning_rate": 1.876659909363419e-05, "loss": 0.3948, "step": 4264 }, { "epoch": 0.3227851095028617, "grad_norm": 0.87109375, "learning_rate": 1.8766025990236733e-05, "loss": 0.3432, "step": 4265 }, { "epoch": 0.32286079182630906, "grad_norm": 0.890625, "learning_rate": 1.8765452762478148e-05, "loss": 0.3691, "step": 4266 }, { "epoch": 0.3229364741497564, "grad_norm": 0.7890625, "learning_rate": 1.876487941036657e-05, "loss": 0.2959, "step": 4267 }, { "epoch": 0.32301215647320375, "grad_norm": 0.83203125, "learning_rate": 1.876430593391014e-05, "loss": 0.2932, "step": 4268 }, { "epoch": 0.32308783879665104, "grad_norm": 0.83203125, "learning_rate": 1.8763732333116996e-05, "loss": 0.3408, "step": 4269 }, { "epoch": 0.3231635211200984, "grad_norm": 0.8515625, "learning_rate": 1.8763158607995264e-05, "loss": 0.3464, "step": 4270 }, { "epoch": 0.32323920344354573, "grad_norm": 0.7578125, "learning_rate": 1.876258475855309e-05, "loss": 0.3135, "step": 4271 }, { "epoch": 0.323314885766993, "grad_norm": 0.80859375, "learning_rate": 1.8762010784798616e-05, "loss": 0.3275, "step": 4272 }, { "epoch": 0.32339056809044037, "grad_norm": 1.078125, "learning_rate": 1.8761436686739976e-05, "loss": 0.3915, "step": 4273 }, { "epoch": 0.3234662504138877, "grad_norm": 1.0, "learning_rate": 1.8760862464385327e-05, "loss": 0.285, "step": 4274 }, { "epoch": 0.32354193273733506, "grad_norm": 0.796875, "learning_rate": 1.8760288117742812e-05, "loss": 0.3377, "step": 4275 }, { "epoch": 0.32361761506078235, "grad_norm": 0.828125, "learning_rate": 1.875971364682057e-05, "loss": 0.3568, "step": 4276 }, { "epoch": 0.3236932973842297, "grad_norm": 0.859375, "learning_rate": 1.8759139051626765e-05, "loss": 0.3795, "step": 4277 }, { "epoch": 0.32376897970767704, "grad_norm": 0.82421875, "learning_rate": 1.875856433216954e-05, "loss": 0.3493, "step": 4278 }, { "epoch": 0.32384466203112433, "grad_norm": 0.875, "learning_rate": 1.8757989488457045e-05, "loss": 0.3549, "step": 4279 }, { "epoch": 0.3239203443545717, "grad_norm": 0.82421875, "learning_rate": 1.8757414520497442e-05, "loss": 0.3394, "step": 4280 }, { "epoch": 0.323996026678019, "grad_norm": 0.76171875, "learning_rate": 1.875683942829889e-05, "loss": 0.2899, "step": 4281 }, { "epoch": 0.32407170900146637, "grad_norm": 0.80859375, "learning_rate": 1.8756264211869537e-05, "loss": 0.3294, "step": 4282 }, { "epoch": 0.32414739132491366, "grad_norm": 0.828125, "learning_rate": 1.8755688871217552e-05, "loss": 0.3454, "step": 4283 }, { "epoch": 0.324223073648361, "grad_norm": 0.78515625, "learning_rate": 1.87551134063511e-05, "loss": 0.3305, "step": 4284 }, { "epoch": 0.32429875597180835, "grad_norm": 0.83984375, "learning_rate": 1.8754537817278333e-05, "loss": 0.3064, "step": 4285 }, { "epoch": 0.32437443829525564, "grad_norm": 0.83984375, "learning_rate": 1.8753962104007426e-05, "loss": 0.3549, "step": 4286 }, { "epoch": 0.324450120618703, "grad_norm": 0.87109375, "learning_rate": 1.8753386266546548e-05, "loss": 0.3635, "step": 4287 }, { "epoch": 0.32452580294215033, "grad_norm": 0.82421875, "learning_rate": 1.875281030490386e-05, "loss": 0.3321, "step": 4288 }, { "epoch": 0.3246014852655977, "grad_norm": 0.80078125, "learning_rate": 1.8752234219087538e-05, "loss": 0.3515, "step": 4289 }, { "epoch": 0.32467716758904497, "grad_norm": 0.8359375, "learning_rate": 1.8751658009105754e-05, "loss": 0.3875, "step": 4290 }, { "epoch": 0.3247528499124923, "grad_norm": 0.8046875, "learning_rate": 1.8751081674966687e-05, "loss": 0.3312, "step": 4291 }, { "epoch": 0.32482853223593966, "grad_norm": 0.7734375, "learning_rate": 1.8750505216678505e-05, "loss": 0.3067, "step": 4292 }, { "epoch": 0.32490421455938695, "grad_norm": 0.80859375, "learning_rate": 1.874992863424939e-05, "loss": 0.3316, "step": 4293 }, { "epoch": 0.3249798968828343, "grad_norm": 0.80859375, "learning_rate": 1.8749351927687526e-05, "loss": 0.3162, "step": 4294 }, { "epoch": 0.32505557920628164, "grad_norm": 0.8125, "learning_rate": 1.8748775097001086e-05, "loss": 0.3124, "step": 4295 }, { "epoch": 0.325131261529729, "grad_norm": 0.80859375, "learning_rate": 1.874819814219826e-05, "loss": 0.3278, "step": 4296 }, { "epoch": 0.3252069438531763, "grad_norm": 0.75390625, "learning_rate": 1.874762106328723e-05, "loss": 0.3062, "step": 4297 }, { "epoch": 0.3252826261766236, "grad_norm": 0.7734375, "learning_rate": 1.8747043860276185e-05, "loss": 0.2856, "step": 4298 }, { "epoch": 0.32535830850007097, "grad_norm": 0.90234375, "learning_rate": 1.874646653317331e-05, "loss": 0.3497, "step": 4299 }, { "epoch": 0.32543399082351826, "grad_norm": 1.078125, "learning_rate": 1.8745889081986802e-05, "loss": 0.3696, "step": 4300 }, { "epoch": 0.3255096731469656, "grad_norm": 0.8046875, "learning_rate": 1.8745311506724845e-05, "loss": 0.3281, "step": 4301 }, { "epoch": 0.32558535547041295, "grad_norm": 0.74609375, "learning_rate": 1.8744733807395638e-05, "loss": 0.3005, "step": 4302 }, { "epoch": 0.3256610377938603, "grad_norm": 0.8984375, "learning_rate": 1.8744155984007376e-05, "loss": 0.3552, "step": 4303 }, { "epoch": 0.3257367201173076, "grad_norm": 0.9375, "learning_rate": 1.8743578036568257e-05, "loss": 0.3316, "step": 4304 }, { "epoch": 0.32581240244075493, "grad_norm": 0.91015625, "learning_rate": 1.8742999965086474e-05, "loss": 0.3856, "step": 4305 }, { "epoch": 0.3258880847642023, "grad_norm": 0.828125, "learning_rate": 1.8742421769570235e-05, "loss": 0.3433, "step": 4306 }, { "epoch": 0.32596376708764957, "grad_norm": 0.82421875, "learning_rate": 1.8741843450027743e-05, "loss": 0.3375, "step": 4307 }, { "epoch": 0.3260394494110969, "grad_norm": 0.8203125, "learning_rate": 1.8741265006467202e-05, "loss": 0.3411, "step": 4308 }, { "epoch": 0.32611513173454426, "grad_norm": 0.84765625, "learning_rate": 1.874068643889681e-05, "loss": 0.3554, "step": 4309 }, { "epoch": 0.3261908140579916, "grad_norm": 0.86328125, "learning_rate": 1.8740107747324785e-05, "loss": 0.34, "step": 4310 }, { "epoch": 0.3262664963814389, "grad_norm": 0.78515625, "learning_rate": 1.8739528931759334e-05, "loss": 0.3085, "step": 4311 }, { "epoch": 0.32634217870488624, "grad_norm": 0.8203125, "learning_rate": 1.8738949992208667e-05, "loss": 0.3209, "step": 4312 }, { "epoch": 0.3264178610283336, "grad_norm": 0.8203125, "learning_rate": 1.8738370928681e-05, "loss": 0.3172, "step": 4313 }, { "epoch": 0.3264935433517809, "grad_norm": 0.8359375, "learning_rate": 1.873779174118454e-05, "loss": 0.352, "step": 4314 }, { "epoch": 0.3265692256752282, "grad_norm": 0.73046875, "learning_rate": 1.8737212429727512e-05, "loss": 0.2782, "step": 4315 }, { "epoch": 0.32664490799867557, "grad_norm": 5.0625, "learning_rate": 1.8736632994318133e-05, "loss": 0.4819, "step": 4316 }, { "epoch": 0.3267205903221229, "grad_norm": 0.78515625, "learning_rate": 1.8736053434964625e-05, "loss": 0.3472, "step": 4317 }, { "epoch": 0.3267962726455702, "grad_norm": 0.77734375, "learning_rate": 1.8735473751675204e-05, "loss": 0.3198, "step": 4318 }, { "epoch": 0.32687195496901755, "grad_norm": 0.8046875, "learning_rate": 1.8734893944458097e-05, "loss": 0.3164, "step": 4319 }, { "epoch": 0.3269476372924649, "grad_norm": 0.76171875, "learning_rate": 1.873431401332153e-05, "loss": 0.2858, "step": 4320 }, { "epoch": 0.3270233196159122, "grad_norm": 0.84765625, "learning_rate": 1.8733733958273734e-05, "loss": 0.3472, "step": 4321 }, { "epoch": 0.32709900193935953, "grad_norm": 0.8359375, "learning_rate": 1.873315377932293e-05, "loss": 0.3421, "step": 4322 }, { "epoch": 0.3271746842628069, "grad_norm": 1.390625, "learning_rate": 1.8732573476477355e-05, "loss": 0.3893, "step": 4323 }, { "epoch": 0.32725036658625417, "grad_norm": 1.1015625, "learning_rate": 1.8731993049745236e-05, "loss": 0.3748, "step": 4324 }, { "epoch": 0.3273260489097015, "grad_norm": 0.7578125, "learning_rate": 1.8731412499134816e-05, "loss": 0.3011, "step": 4325 }, { "epoch": 0.32740173123314886, "grad_norm": 1.0703125, "learning_rate": 1.873083182465432e-05, "loss": 0.3398, "step": 4326 }, { "epoch": 0.3274774135565962, "grad_norm": 0.8125, "learning_rate": 1.8730251026312e-05, "loss": 0.3008, "step": 4327 }, { "epoch": 0.3275530958800435, "grad_norm": 0.8046875, "learning_rate": 1.8729670104116083e-05, "loss": 0.2873, "step": 4328 }, { "epoch": 0.32762877820349084, "grad_norm": 0.8515625, "learning_rate": 1.8729089058074815e-05, "loss": 0.3568, "step": 4329 }, { "epoch": 0.3277044605269382, "grad_norm": 0.80078125, "learning_rate": 1.872850788819644e-05, "loss": 0.3286, "step": 4330 }, { "epoch": 0.3277801428503855, "grad_norm": 0.765625, "learning_rate": 1.8727926594489202e-05, "loss": 0.3136, "step": 4331 }, { "epoch": 0.3278558251738328, "grad_norm": 0.8671875, "learning_rate": 1.8727345176961343e-05, "loss": 0.3871, "step": 4332 }, { "epoch": 0.3279315074972802, "grad_norm": 0.83203125, "learning_rate": 1.8726763635621124e-05, "loss": 0.3421, "step": 4333 }, { "epoch": 0.3280071898207275, "grad_norm": 0.7890625, "learning_rate": 1.872618197047678e-05, "loss": 0.3138, "step": 4334 }, { "epoch": 0.3280828721441748, "grad_norm": 0.7734375, "learning_rate": 1.8725600181536575e-05, "loss": 0.291, "step": 4335 }, { "epoch": 0.32815855446762215, "grad_norm": 0.80078125, "learning_rate": 1.8725018268808756e-05, "loss": 0.3215, "step": 4336 }, { "epoch": 0.3282342367910695, "grad_norm": 0.8125, "learning_rate": 1.872443623230158e-05, "loss": 0.3308, "step": 4337 }, { "epoch": 0.3283099191145168, "grad_norm": 1.140625, "learning_rate": 1.8723854072023303e-05, "loss": 0.3909, "step": 4338 }, { "epoch": 0.32838560143796414, "grad_norm": 0.77734375, "learning_rate": 1.872327178798219e-05, "loss": 0.2746, "step": 4339 }, { "epoch": 0.3284612837614115, "grad_norm": 0.80859375, "learning_rate": 1.8722689380186492e-05, "loss": 0.3475, "step": 4340 }, { "epoch": 0.32853696608485883, "grad_norm": 0.875, "learning_rate": 1.872210684864448e-05, "loss": 0.3392, "step": 4341 }, { "epoch": 0.3286126484083061, "grad_norm": 0.78515625, "learning_rate": 1.872152419336441e-05, "loss": 0.306, "step": 4342 }, { "epoch": 0.32868833073175346, "grad_norm": 0.8359375, "learning_rate": 1.872094141435456e-05, "loss": 0.341, "step": 4343 }, { "epoch": 0.3287640130552008, "grad_norm": 0.734375, "learning_rate": 1.8720358511623184e-05, "loss": 0.2854, "step": 4344 }, { "epoch": 0.3288396953786481, "grad_norm": 0.8515625, "learning_rate": 1.871977548517856e-05, "loss": 0.3794, "step": 4345 }, { "epoch": 0.32891537770209545, "grad_norm": 0.8671875, "learning_rate": 1.871919233502896e-05, "loss": 0.3235, "step": 4346 }, { "epoch": 0.3289910600255428, "grad_norm": 0.8671875, "learning_rate": 1.871860906118265e-05, "loss": 0.3899, "step": 4347 }, { "epoch": 0.32906674234899014, "grad_norm": 0.75390625, "learning_rate": 1.871802566364791e-05, "loss": 0.3299, "step": 4348 }, { "epoch": 0.32914242467243743, "grad_norm": 0.828125, "learning_rate": 1.8717442142433015e-05, "loss": 0.3593, "step": 4349 }, { "epoch": 0.3292181069958848, "grad_norm": 0.81640625, "learning_rate": 1.8716858497546244e-05, "loss": 0.3093, "step": 4350 }, { "epoch": 0.3292937893193321, "grad_norm": 0.8515625, "learning_rate": 1.8716274728995875e-05, "loss": 0.3518, "step": 4351 }, { "epoch": 0.3293694716427794, "grad_norm": 0.78125, "learning_rate": 1.8715690836790195e-05, "loss": 0.3185, "step": 4352 }, { "epoch": 0.32944515396622676, "grad_norm": 0.8203125, "learning_rate": 1.871510682093748e-05, "loss": 0.3244, "step": 4353 }, { "epoch": 0.3295208362896741, "grad_norm": 0.828125, "learning_rate": 1.8714522681446024e-05, "loss": 0.3459, "step": 4354 }, { "epoch": 0.32959651861312145, "grad_norm": 0.82421875, "learning_rate": 1.8713938418324105e-05, "loss": 0.3631, "step": 4355 }, { "epoch": 0.32967220093656874, "grad_norm": 0.859375, "learning_rate": 1.8713354031580014e-05, "loss": 0.3748, "step": 4356 }, { "epoch": 0.3297478832600161, "grad_norm": 0.71875, "learning_rate": 1.8712769521222046e-05, "loss": 0.2868, "step": 4357 }, { "epoch": 0.32982356558346343, "grad_norm": 1.390625, "learning_rate": 1.8712184887258494e-05, "loss": 0.4212, "step": 4358 }, { "epoch": 0.3298992479069107, "grad_norm": 0.734375, "learning_rate": 1.8711600129697643e-05, "loss": 0.2955, "step": 4359 }, { "epoch": 0.32997493023035807, "grad_norm": 0.75390625, "learning_rate": 1.87110152485478e-05, "loss": 0.2912, "step": 4360 }, { "epoch": 0.3300506125538054, "grad_norm": 0.79296875, "learning_rate": 1.8710430243817253e-05, "loss": 0.3313, "step": 4361 }, { "epoch": 0.33012629487725276, "grad_norm": 0.8359375, "learning_rate": 1.8709845115514306e-05, "loss": 0.3785, "step": 4362 }, { "epoch": 0.33020197720070005, "grad_norm": 0.84375, "learning_rate": 1.8709259863647263e-05, "loss": 0.3308, "step": 4363 }, { "epoch": 0.3302776595241474, "grad_norm": 0.765625, "learning_rate": 1.8708674488224422e-05, "loss": 0.3165, "step": 4364 }, { "epoch": 0.33035334184759474, "grad_norm": 0.74609375, "learning_rate": 1.8708088989254084e-05, "loss": 0.2986, "step": 4365 }, { "epoch": 0.33042902417104203, "grad_norm": 0.81640625, "learning_rate": 1.8707503366744563e-05, "loss": 0.29, "step": 4366 }, { "epoch": 0.3305047064944894, "grad_norm": 0.765625, "learning_rate": 1.8706917620704167e-05, "loss": 0.2929, "step": 4367 }, { "epoch": 0.3305803888179367, "grad_norm": 0.8828125, "learning_rate": 1.87063317511412e-05, "loss": 0.3548, "step": 4368 }, { "epoch": 0.33065607114138407, "grad_norm": 0.8046875, "learning_rate": 1.8705745758063977e-05, "loss": 0.3287, "step": 4369 }, { "epoch": 0.33073175346483136, "grad_norm": 0.78515625, "learning_rate": 1.870515964148081e-05, "loss": 0.3427, "step": 4370 }, { "epoch": 0.3308074357882787, "grad_norm": 0.828125, "learning_rate": 1.8704573401400016e-05, "loss": 0.3503, "step": 4371 }, { "epoch": 0.33088311811172605, "grad_norm": 0.78515625, "learning_rate": 1.8703987037829908e-05, "loss": 0.3125, "step": 4372 }, { "epoch": 0.33095880043517334, "grad_norm": 0.7890625, "learning_rate": 1.870340055077881e-05, "loss": 0.296, "step": 4373 }, { "epoch": 0.3310344827586207, "grad_norm": 1.6484375, "learning_rate": 1.870281394025504e-05, "loss": 0.3248, "step": 4374 }, { "epoch": 0.33111016508206803, "grad_norm": 3.109375, "learning_rate": 1.870222720626692e-05, "loss": 0.5142, "step": 4375 }, { "epoch": 0.3311858474055154, "grad_norm": 0.75390625, "learning_rate": 1.870164034882277e-05, "loss": 0.3061, "step": 4376 }, { "epoch": 0.33126152972896267, "grad_norm": 0.84375, "learning_rate": 1.8701053367930923e-05, "loss": 0.3678, "step": 4377 }, { "epoch": 0.33133721205241, "grad_norm": 0.91015625, "learning_rate": 1.8700466263599704e-05, "loss": 0.3642, "step": 4378 }, { "epoch": 0.33141289437585736, "grad_norm": 1.25, "learning_rate": 1.8699879035837435e-05, "loss": 0.3755, "step": 4379 }, { "epoch": 0.33148857669930465, "grad_norm": 0.85546875, "learning_rate": 1.869929168465246e-05, "loss": 0.3555, "step": 4380 }, { "epoch": 0.331564259022752, "grad_norm": 0.703125, "learning_rate": 1.8698704210053097e-05, "loss": 0.2655, "step": 4381 }, { "epoch": 0.33163994134619934, "grad_norm": 0.84765625, "learning_rate": 1.8698116612047688e-05, "loss": 0.3597, "step": 4382 }, { "epoch": 0.33171562366964663, "grad_norm": 0.859375, "learning_rate": 1.8697528890644568e-05, "loss": 0.3534, "step": 4383 }, { "epoch": 0.331791305993094, "grad_norm": 0.87890625, "learning_rate": 1.8696941045852078e-05, "loss": 0.3615, "step": 4384 }, { "epoch": 0.3318669883165413, "grad_norm": 0.95703125, "learning_rate": 1.8696353077678557e-05, "loss": 0.36, "step": 4385 }, { "epoch": 0.33194267063998867, "grad_norm": 0.83203125, "learning_rate": 1.869576498613234e-05, "loss": 0.3426, "step": 4386 }, { "epoch": 0.33201835296343596, "grad_norm": 0.76171875, "learning_rate": 1.8695176771221775e-05, "loss": 0.2982, "step": 4387 }, { "epoch": 0.3320940352868833, "grad_norm": 0.765625, "learning_rate": 1.8694588432955205e-05, "loss": 0.2692, "step": 4388 }, { "epoch": 0.33216971761033065, "grad_norm": 0.89453125, "learning_rate": 1.869399997134098e-05, "loss": 0.3527, "step": 4389 }, { "epoch": 0.33224539993377794, "grad_norm": 0.875, "learning_rate": 1.8693411386387445e-05, "loss": 0.3769, "step": 4390 }, { "epoch": 0.3323210822572253, "grad_norm": 0.9140625, "learning_rate": 1.869282267810295e-05, "loss": 0.405, "step": 4391 }, { "epoch": 0.33239676458067263, "grad_norm": 0.81640625, "learning_rate": 1.869223384649585e-05, "loss": 0.3447, "step": 4392 }, { "epoch": 0.33247244690412, "grad_norm": 0.828125, "learning_rate": 1.8691644891574493e-05, "loss": 0.3565, "step": 4393 }, { "epoch": 0.33254812922756727, "grad_norm": 0.83984375, "learning_rate": 1.8691055813347245e-05, "loss": 0.3807, "step": 4394 }, { "epoch": 0.3326238115510146, "grad_norm": 0.7734375, "learning_rate": 1.869046661182245e-05, "loss": 0.3191, "step": 4395 }, { "epoch": 0.33269949387446196, "grad_norm": 0.9609375, "learning_rate": 1.868987728700847e-05, "loss": 0.3922, "step": 4396 }, { "epoch": 0.33277517619790925, "grad_norm": 0.76953125, "learning_rate": 1.8689287838913674e-05, "loss": 0.3295, "step": 4397 }, { "epoch": 0.3328508585213566, "grad_norm": 0.79296875, "learning_rate": 1.868869826754642e-05, "loss": 0.3267, "step": 4398 }, { "epoch": 0.33292654084480394, "grad_norm": 0.81640625, "learning_rate": 1.8688108572915068e-05, "loss": 0.345, "step": 4399 }, { "epoch": 0.3330022231682513, "grad_norm": 0.765625, "learning_rate": 1.8687518755027984e-05, "loss": 0.3313, "step": 4400 }, { "epoch": 0.3330779054916986, "grad_norm": 1.40625, "learning_rate": 1.8686928813893538e-05, "loss": 0.3832, "step": 4401 }, { "epoch": 0.3331535878151459, "grad_norm": 1.28125, "learning_rate": 1.8686338749520107e-05, "loss": 0.4082, "step": 4402 }, { "epoch": 0.33322927013859327, "grad_norm": 1.0234375, "learning_rate": 1.8685748561916047e-05, "loss": 0.3448, "step": 4403 }, { "epoch": 0.33330495246204056, "grad_norm": 0.7890625, "learning_rate": 1.868515825108974e-05, "loss": 0.3167, "step": 4404 }, { "epoch": 0.3333806347854879, "grad_norm": 0.8203125, "learning_rate": 1.868456781704956e-05, "loss": 0.3161, "step": 4405 }, { "epoch": 0.33345631710893525, "grad_norm": 0.8046875, "learning_rate": 1.8683977259803883e-05, "loss": 0.3539, "step": 4406 }, { "epoch": 0.3335319994323826, "grad_norm": 1.234375, "learning_rate": 1.8683386579361086e-05, "loss": 0.3318, "step": 4407 }, { "epoch": 0.3336076817558299, "grad_norm": 0.8125, "learning_rate": 1.8682795775729547e-05, "loss": 0.3236, "step": 4408 }, { "epoch": 0.33368336407927723, "grad_norm": 0.89453125, "learning_rate": 1.868220484891765e-05, "loss": 0.415, "step": 4409 }, { "epoch": 0.3337590464027246, "grad_norm": 0.8671875, "learning_rate": 1.868161379893378e-05, "loss": 0.3148, "step": 4410 }, { "epoch": 0.33383472872617187, "grad_norm": 0.828125, "learning_rate": 1.868102262578632e-05, "loss": 0.3273, "step": 4411 }, { "epoch": 0.3339104110496192, "grad_norm": 1.3984375, "learning_rate": 1.868043132948366e-05, "loss": 0.3939, "step": 4412 }, { "epoch": 0.33398609337306656, "grad_norm": 0.8203125, "learning_rate": 1.8679839910034177e-05, "loss": 0.3271, "step": 4413 }, { "epoch": 0.3340617756965139, "grad_norm": 0.78125, "learning_rate": 1.8679248367446278e-05, "loss": 0.3221, "step": 4414 }, { "epoch": 0.3341374580199612, "grad_norm": 0.89453125, "learning_rate": 1.8678656701728342e-05, "loss": 0.3609, "step": 4415 }, { "epoch": 0.33421314034340854, "grad_norm": 0.76953125, "learning_rate": 1.8678064912888768e-05, "loss": 0.3208, "step": 4416 }, { "epoch": 0.3342888226668559, "grad_norm": 0.890625, "learning_rate": 1.8677473000935956e-05, "loss": 0.3573, "step": 4417 }, { "epoch": 0.3343645049903032, "grad_norm": 0.7734375, "learning_rate": 1.8676880965878293e-05, "loss": 0.2952, "step": 4418 }, { "epoch": 0.3344401873137505, "grad_norm": 0.8515625, "learning_rate": 1.8676288807724184e-05, "loss": 0.3531, "step": 4419 }, { "epoch": 0.33451586963719787, "grad_norm": 0.84375, "learning_rate": 1.8675696526482033e-05, "loss": 0.3482, "step": 4420 }, { "epoch": 0.3345915519606452, "grad_norm": 0.9375, "learning_rate": 1.8675104122160235e-05, "loss": 0.3982, "step": 4421 }, { "epoch": 0.3346672342840925, "grad_norm": 0.79296875, "learning_rate": 1.86745115947672e-05, "loss": 0.3455, "step": 4422 }, { "epoch": 0.33474291660753985, "grad_norm": 0.7890625, "learning_rate": 1.867391894431133e-05, "loss": 0.3144, "step": 4423 }, { "epoch": 0.3348185989309872, "grad_norm": 0.765625, "learning_rate": 1.8673326170801033e-05, "loss": 0.328, "step": 4424 }, { "epoch": 0.3348942812544345, "grad_norm": 1.0078125, "learning_rate": 1.8672733274244727e-05, "loss": 0.3465, "step": 4425 }, { "epoch": 0.33496996357788184, "grad_norm": 0.78515625, "learning_rate": 1.8672140254650813e-05, "loss": 0.344, "step": 4426 }, { "epoch": 0.3350456459013292, "grad_norm": 0.78515625, "learning_rate": 1.8671547112027708e-05, "loss": 0.3243, "step": 4427 }, { "epoch": 0.3351213282247765, "grad_norm": 0.7734375, "learning_rate": 1.8670953846383825e-05, "loss": 0.3037, "step": 4428 }, { "epoch": 0.3351970105482238, "grad_norm": 0.8046875, "learning_rate": 1.8670360457727583e-05, "loss": 0.3397, "step": 4429 }, { "epoch": 0.33527269287167116, "grad_norm": 0.92578125, "learning_rate": 1.8669766946067398e-05, "loss": 0.375, "step": 4430 }, { "epoch": 0.3353483751951185, "grad_norm": 0.8046875, "learning_rate": 1.8669173311411694e-05, "loss": 0.3301, "step": 4431 }, { "epoch": 0.3354240575185658, "grad_norm": 1.2890625, "learning_rate": 1.8668579553768882e-05, "loss": 0.397, "step": 4432 }, { "epoch": 0.33549973984201314, "grad_norm": 0.89453125, "learning_rate": 1.86679856731474e-05, "loss": 0.3633, "step": 4433 }, { "epoch": 0.3355754221654605, "grad_norm": 0.8046875, "learning_rate": 1.8667391669555668e-05, "loss": 0.3258, "step": 4434 }, { "epoch": 0.33565110448890784, "grad_norm": 0.8203125, "learning_rate": 1.866679754300211e-05, "loss": 0.3028, "step": 4435 }, { "epoch": 0.3357267868123551, "grad_norm": 0.77734375, "learning_rate": 1.866620329349515e-05, "loss": 0.3312, "step": 4436 }, { "epoch": 0.3358024691358025, "grad_norm": 0.734375, "learning_rate": 1.8665608921043232e-05, "loss": 0.3036, "step": 4437 }, { "epoch": 0.3358781514592498, "grad_norm": 0.8125, "learning_rate": 1.8665014425654778e-05, "loss": 0.348, "step": 4438 }, { "epoch": 0.3359538337826971, "grad_norm": 0.8984375, "learning_rate": 1.8664419807338224e-05, "loss": 0.3984, "step": 4439 }, { "epoch": 0.33602951610614445, "grad_norm": 0.80859375, "learning_rate": 1.8663825066102008e-05, "loss": 0.3375, "step": 4440 }, { "epoch": 0.3361051984295918, "grad_norm": 0.7734375, "learning_rate": 1.8663230201954566e-05, "loss": 0.3355, "step": 4441 }, { "epoch": 0.3361808807530391, "grad_norm": 1.90625, "learning_rate": 1.8662635214904333e-05, "loss": 0.4336, "step": 4442 }, { "epoch": 0.33625656307648644, "grad_norm": 0.8984375, "learning_rate": 1.8662040104959757e-05, "loss": 0.3604, "step": 4443 }, { "epoch": 0.3363322453999338, "grad_norm": 0.95703125, "learning_rate": 1.8661444872129277e-05, "loss": 0.365, "step": 4444 }, { "epoch": 0.33640792772338113, "grad_norm": 0.78125, "learning_rate": 1.866084951642134e-05, "loss": 0.3031, "step": 4445 }, { "epoch": 0.3364836100468284, "grad_norm": 0.83203125, "learning_rate": 1.866025403784439e-05, "loss": 0.3385, "step": 4446 }, { "epoch": 0.33655929237027576, "grad_norm": 0.82421875, "learning_rate": 1.865965843640687e-05, "loss": 0.3427, "step": 4447 }, { "epoch": 0.3366349746937231, "grad_norm": 1.453125, "learning_rate": 1.865906271211724e-05, "loss": 0.444, "step": 4448 }, { "epoch": 0.3367106570171704, "grad_norm": 0.7890625, "learning_rate": 1.865846686498394e-05, "loss": 0.3115, "step": 4449 }, { "epoch": 0.33678633934061775, "grad_norm": 0.796875, "learning_rate": 1.8657870895015437e-05, "loss": 0.3108, "step": 4450 }, { "epoch": 0.3368620216640651, "grad_norm": 0.86328125, "learning_rate": 1.8657274802220172e-05, "loss": 0.373, "step": 4451 }, { "epoch": 0.33693770398751244, "grad_norm": 0.8203125, "learning_rate": 1.865667858660661e-05, "loss": 0.3503, "step": 4452 }, { "epoch": 0.33701338631095973, "grad_norm": 0.77734375, "learning_rate": 1.8656082248183208e-05, "loss": 0.3226, "step": 4453 }, { "epoch": 0.3370890686344071, "grad_norm": 0.83984375, "learning_rate": 1.8655485786958423e-05, "loss": 0.2886, "step": 4454 }, { "epoch": 0.3371647509578544, "grad_norm": 0.875, "learning_rate": 1.865488920294072e-05, "loss": 0.3613, "step": 4455 }, { "epoch": 0.3372404332813017, "grad_norm": 0.76953125, "learning_rate": 1.8654292496138558e-05, "loss": 0.2952, "step": 4456 }, { "epoch": 0.33731611560474906, "grad_norm": 0.8125, "learning_rate": 1.8653695666560408e-05, "loss": 0.3204, "step": 4457 }, { "epoch": 0.3373917979281964, "grad_norm": 0.734375, "learning_rate": 1.8653098714214736e-05, "loss": 0.2726, "step": 4458 }, { "epoch": 0.33746748025164375, "grad_norm": 1.8203125, "learning_rate": 1.865250163911001e-05, "loss": 0.4156, "step": 4459 }, { "epoch": 0.33754316257509104, "grad_norm": 0.80078125, "learning_rate": 1.8651904441254696e-05, "loss": 0.3484, "step": 4460 }, { "epoch": 0.3376188448985384, "grad_norm": 0.875, "learning_rate": 1.8651307120657273e-05, "loss": 0.3445, "step": 4461 }, { "epoch": 0.33769452722198573, "grad_norm": 0.83203125, "learning_rate": 1.8650709677326212e-05, "loss": 0.3475, "step": 4462 }, { "epoch": 0.337770209545433, "grad_norm": 0.8984375, "learning_rate": 1.8650112111269987e-05, "loss": 0.361, "step": 4463 }, { "epoch": 0.33784589186888037, "grad_norm": 0.796875, "learning_rate": 1.8649514422497083e-05, "loss": 0.2952, "step": 4464 }, { "epoch": 0.3379215741923277, "grad_norm": 0.81640625, "learning_rate": 1.864891661101597e-05, "loss": 0.3368, "step": 4465 }, { "epoch": 0.33799725651577506, "grad_norm": 0.82421875, "learning_rate": 1.8648318676835132e-05, "loss": 0.3414, "step": 4466 }, { "epoch": 0.33807293883922235, "grad_norm": 0.89453125, "learning_rate": 1.8647720619963054e-05, "loss": 0.3885, "step": 4467 }, { "epoch": 0.3381486211626697, "grad_norm": 0.83984375, "learning_rate": 1.8647122440408218e-05, "loss": 0.3524, "step": 4468 }, { "epoch": 0.33822430348611704, "grad_norm": 0.8671875, "learning_rate": 1.8646524138179115e-05, "loss": 0.3955, "step": 4469 }, { "epoch": 0.33829998580956433, "grad_norm": 0.83203125, "learning_rate": 1.8645925713284225e-05, "loss": 0.3428, "step": 4470 }, { "epoch": 0.3383756681330117, "grad_norm": 0.73828125, "learning_rate": 1.8645327165732045e-05, "loss": 0.3099, "step": 4471 }, { "epoch": 0.338451350456459, "grad_norm": 0.828125, "learning_rate": 1.864472849553106e-05, "loss": 0.3227, "step": 4472 }, { "epoch": 0.33852703277990637, "grad_norm": 1.15625, "learning_rate": 1.8644129702689767e-05, "loss": 0.3765, "step": 4473 }, { "epoch": 0.33860271510335366, "grad_norm": 0.84765625, "learning_rate": 1.864353078721666e-05, "loss": 0.3188, "step": 4474 }, { "epoch": 0.338678397426801, "grad_norm": 1.1171875, "learning_rate": 1.864293174912024e-05, "loss": 0.359, "step": 4475 }, { "epoch": 0.33875407975024835, "grad_norm": 0.796875, "learning_rate": 1.8642332588408998e-05, "loss": 0.3295, "step": 4476 }, { "epoch": 0.33882976207369564, "grad_norm": 0.828125, "learning_rate": 1.8641733305091437e-05, "loss": 0.3214, "step": 4477 }, { "epoch": 0.338905444397143, "grad_norm": 0.74609375, "learning_rate": 1.8641133899176062e-05, "loss": 0.2766, "step": 4478 }, { "epoch": 0.33898112672059033, "grad_norm": 0.85546875, "learning_rate": 1.864053437067137e-05, "loss": 0.3383, "step": 4479 }, { "epoch": 0.3390568090440377, "grad_norm": 0.74609375, "learning_rate": 1.8639934719585874e-05, "loss": 0.277, "step": 4480 }, { "epoch": 0.33913249136748497, "grad_norm": 0.7421875, "learning_rate": 1.8639334945928075e-05, "loss": 0.2955, "step": 4481 }, { "epoch": 0.3392081736909323, "grad_norm": 0.78125, "learning_rate": 1.8638735049706486e-05, "loss": 0.3127, "step": 4482 }, { "epoch": 0.33928385601437966, "grad_norm": 2.203125, "learning_rate": 1.8638135030929618e-05, "loss": 0.3112, "step": 4483 }, { "epoch": 0.33935953833782695, "grad_norm": 0.89453125, "learning_rate": 1.8637534889605974e-05, "loss": 0.3479, "step": 4484 }, { "epoch": 0.3394352206612743, "grad_norm": 1.546875, "learning_rate": 1.863693462574408e-05, "loss": 0.417, "step": 4485 }, { "epoch": 0.33951090298472164, "grad_norm": 0.7890625, "learning_rate": 1.8636334239352445e-05, "loss": 0.3121, "step": 4486 }, { "epoch": 0.339586585308169, "grad_norm": 0.7890625, "learning_rate": 1.863573373043959e-05, "loss": 0.3149, "step": 4487 }, { "epoch": 0.3396622676316163, "grad_norm": 1.890625, "learning_rate": 1.8635133099014032e-05, "loss": 0.3383, "step": 4488 }, { "epoch": 0.3397379499550636, "grad_norm": 5.0, "learning_rate": 1.8634532345084292e-05, "loss": 0.2517, "step": 4489 }, { "epoch": 0.33981363227851097, "grad_norm": 0.84765625, "learning_rate": 1.8633931468658894e-05, "loss": 0.3093, "step": 4490 }, { "epoch": 0.33988931460195826, "grad_norm": 0.80859375, "learning_rate": 1.863333046974636e-05, "loss": 0.3364, "step": 4491 }, { "epoch": 0.3399649969254056, "grad_norm": 0.90625, "learning_rate": 1.863272934835522e-05, "loss": 0.3774, "step": 4492 }, { "epoch": 0.34004067924885295, "grad_norm": 4.40625, "learning_rate": 1.8632128104494e-05, "loss": 0.3095, "step": 4493 }, { "epoch": 0.3401163615723003, "grad_norm": 1.1484375, "learning_rate": 1.8631526738171226e-05, "loss": 0.329, "step": 4494 }, { "epoch": 0.3401920438957476, "grad_norm": 0.8671875, "learning_rate": 1.863092524939544e-05, "loss": 0.3255, "step": 4495 }, { "epoch": 0.34026772621919493, "grad_norm": 0.8046875, "learning_rate": 1.8630323638175164e-05, "loss": 0.3119, "step": 4496 }, { "epoch": 0.3403434085426423, "grad_norm": 0.81640625, "learning_rate": 1.8629721904518935e-05, "loss": 0.3392, "step": 4497 }, { "epoch": 0.34041909086608957, "grad_norm": 0.73046875, "learning_rate": 1.8629120048435294e-05, "loss": 0.2968, "step": 4498 }, { "epoch": 0.3404947731895369, "grad_norm": 0.99609375, "learning_rate": 1.8628518069932777e-05, "loss": 0.3341, "step": 4499 }, { "epoch": 0.34057045551298426, "grad_norm": 1.5078125, "learning_rate": 1.8627915969019925e-05, "loss": 0.4053, "step": 4500 }, { "epoch": 0.3406461378364316, "grad_norm": 0.8046875, "learning_rate": 1.862731374570528e-05, "loss": 0.3064, "step": 4501 }, { "epoch": 0.3407218201598789, "grad_norm": 0.875, "learning_rate": 1.862671139999738e-05, "loss": 0.36, "step": 4502 }, { "epoch": 0.34079750248332624, "grad_norm": 0.80859375, "learning_rate": 1.8626108931904777e-05, "loss": 0.3297, "step": 4503 }, { "epoch": 0.3408731848067736, "grad_norm": 0.8359375, "learning_rate": 1.8625506341436016e-05, "loss": 0.3223, "step": 4504 }, { "epoch": 0.3409488671302209, "grad_norm": 0.8359375, "learning_rate": 1.8624903628599647e-05, "loss": 0.3725, "step": 4505 }, { "epoch": 0.3410245494536682, "grad_norm": 0.82421875, "learning_rate": 1.862430079340422e-05, "loss": 0.3484, "step": 4506 }, { "epoch": 0.34110023177711557, "grad_norm": 0.80078125, "learning_rate": 1.862369783585828e-05, "loss": 0.2912, "step": 4507 }, { "epoch": 0.34117591410056286, "grad_norm": 0.80859375, "learning_rate": 1.8623094755970395e-05, "loss": 0.3353, "step": 4508 }, { "epoch": 0.3412515964240102, "grad_norm": 0.74609375, "learning_rate": 1.8622491553749106e-05, "loss": 0.2826, "step": 4509 }, { "epoch": 0.34132727874745755, "grad_norm": 0.8359375, "learning_rate": 1.8621888229202985e-05, "loss": 0.3361, "step": 4510 }, { "epoch": 0.3414029610709049, "grad_norm": 0.80859375, "learning_rate": 1.862128478234058e-05, "loss": 0.3458, "step": 4511 }, { "epoch": 0.3414786433943522, "grad_norm": 0.8125, "learning_rate": 1.862068121317046e-05, "loss": 0.3139, "step": 4512 }, { "epoch": 0.34155432571779953, "grad_norm": 0.796875, "learning_rate": 1.8620077521701177e-05, "loss": 0.3137, "step": 4513 }, { "epoch": 0.3416300080412469, "grad_norm": 0.8515625, "learning_rate": 1.8619473707941304e-05, "loss": 0.3396, "step": 4514 }, { "epoch": 0.34170569036469417, "grad_norm": 0.82421875, "learning_rate": 1.8618869771899404e-05, "loss": 0.3314, "step": 4515 }, { "epoch": 0.3417813726881415, "grad_norm": 0.87109375, "learning_rate": 1.8618265713584047e-05, "loss": 0.3517, "step": 4516 }, { "epoch": 0.34185705501158886, "grad_norm": 0.82421875, "learning_rate": 1.8617661533003803e-05, "loss": 0.3125, "step": 4517 }, { "epoch": 0.3419327373350362, "grad_norm": 0.86328125, "learning_rate": 1.8617057230167238e-05, "loss": 0.3912, "step": 4518 }, { "epoch": 0.3420084196584835, "grad_norm": 0.79296875, "learning_rate": 1.861645280508293e-05, "loss": 0.326, "step": 4519 }, { "epoch": 0.34208410198193084, "grad_norm": 0.859375, "learning_rate": 1.8615848257759458e-05, "loss": 0.3618, "step": 4520 }, { "epoch": 0.3421597843053782, "grad_norm": 0.8046875, "learning_rate": 1.8615243588205386e-05, "loss": 0.3176, "step": 4521 }, { "epoch": 0.3422354666288255, "grad_norm": 0.76953125, "learning_rate": 1.8614638796429302e-05, "loss": 0.3103, "step": 4522 }, { "epoch": 0.3423111489522728, "grad_norm": 0.74609375, "learning_rate": 1.8614033882439787e-05, "loss": 0.3001, "step": 4523 }, { "epoch": 0.34238683127572017, "grad_norm": 0.80078125, "learning_rate": 1.8613428846245414e-05, "loss": 0.302, "step": 4524 }, { "epoch": 0.3424625135991675, "grad_norm": 0.82421875, "learning_rate": 1.8612823687854774e-05, "loss": 0.3282, "step": 4525 }, { "epoch": 0.3425381959226148, "grad_norm": 0.82421875, "learning_rate": 1.861221840727645e-05, "loss": 0.3647, "step": 4526 }, { "epoch": 0.34261387824606215, "grad_norm": 1.3984375, "learning_rate": 1.8611613004519028e-05, "loss": 0.4393, "step": 4527 }, { "epoch": 0.3426895605695095, "grad_norm": 0.796875, "learning_rate": 1.86110074795911e-05, "loss": 0.3046, "step": 4528 }, { "epoch": 0.3427652428929568, "grad_norm": 0.79296875, "learning_rate": 1.861040183250125e-05, "loss": 0.3488, "step": 4529 }, { "epoch": 0.34284092521640414, "grad_norm": 0.8359375, "learning_rate": 1.8609796063258076e-05, "loss": 0.3397, "step": 4530 }, { "epoch": 0.3429166075398515, "grad_norm": 0.80078125, "learning_rate": 1.860919017187017e-05, "loss": 0.3162, "step": 4531 }, { "epoch": 0.3429922898632988, "grad_norm": 0.85546875, "learning_rate": 1.8608584158346124e-05, "loss": 0.3503, "step": 4532 }, { "epoch": 0.3430679721867461, "grad_norm": 0.86328125, "learning_rate": 1.8607978022694545e-05, "loss": 0.3637, "step": 4533 }, { "epoch": 0.34314365451019346, "grad_norm": 0.8515625, "learning_rate": 1.860737176492402e-05, "loss": 0.3308, "step": 4534 }, { "epoch": 0.3432193368336408, "grad_norm": 0.7890625, "learning_rate": 1.860676538504316e-05, "loss": 0.3191, "step": 4535 }, { "epoch": 0.3432950191570881, "grad_norm": 0.7890625, "learning_rate": 1.8606158883060562e-05, "loss": 0.3084, "step": 4536 }, { "epoch": 0.34337070148053545, "grad_norm": 0.84765625, "learning_rate": 1.8605552258984832e-05, "loss": 0.3673, "step": 4537 }, { "epoch": 0.3434463838039828, "grad_norm": 0.79296875, "learning_rate": 1.8604945512824577e-05, "loss": 0.2909, "step": 4538 }, { "epoch": 0.34352206612743014, "grad_norm": 1.171875, "learning_rate": 1.86043386445884e-05, "loss": 0.3333, "step": 4539 }, { "epoch": 0.3435977484508774, "grad_norm": 0.7890625, "learning_rate": 1.8603731654284916e-05, "loss": 0.3326, "step": 4540 }, { "epoch": 0.3436734307743248, "grad_norm": 0.80078125, "learning_rate": 1.8603124541922732e-05, "loss": 0.3182, "step": 4541 }, { "epoch": 0.3437491130977721, "grad_norm": 0.82421875, "learning_rate": 1.8602517307510463e-05, "loss": 0.3316, "step": 4542 }, { "epoch": 0.3438247954212194, "grad_norm": 0.87109375, "learning_rate": 1.8601909951056728e-05, "loss": 0.3712, "step": 4543 }, { "epoch": 0.34390047774466675, "grad_norm": 0.8671875, "learning_rate": 1.8601302472570137e-05, "loss": 0.3416, "step": 4544 }, { "epoch": 0.3439761600681141, "grad_norm": 0.74609375, "learning_rate": 1.8600694872059305e-05, "loss": 0.2971, "step": 4545 }, { "epoch": 0.34405184239156145, "grad_norm": 0.79296875, "learning_rate": 1.860008714953286e-05, "loss": 0.2874, "step": 4546 }, { "epoch": 0.34412752471500874, "grad_norm": 0.78125, "learning_rate": 1.859947930499942e-05, "loss": 0.2965, "step": 4547 }, { "epoch": 0.3442032070384561, "grad_norm": 0.83984375, "learning_rate": 1.8598871338467613e-05, "loss": 0.3573, "step": 4548 }, { "epoch": 0.34427888936190343, "grad_norm": 1.359375, "learning_rate": 1.8598263249946056e-05, "loss": 0.4494, "step": 4549 }, { "epoch": 0.3443545716853507, "grad_norm": 0.796875, "learning_rate": 1.8597655039443384e-05, "loss": 0.3344, "step": 4550 }, { "epoch": 0.34443025400879806, "grad_norm": 0.84765625, "learning_rate": 1.8597046706968217e-05, "loss": 0.3722, "step": 4551 }, { "epoch": 0.3445059363322454, "grad_norm": 0.85546875, "learning_rate": 1.8596438252529186e-05, "loss": 0.3461, "step": 4552 }, { "epoch": 0.34458161865569276, "grad_norm": 0.875, "learning_rate": 1.8595829676134934e-05, "loss": 0.3565, "step": 4553 }, { "epoch": 0.34465730097914005, "grad_norm": 0.828125, "learning_rate": 1.8595220977794085e-05, "loss": 0.3488, "step": 4554 }, { "epoch": 0.3447329833025874, "grad_norm": 0.765625, "learning_rate": 1.8594612157515276e-05, "loss": 0.3108, "step": 4555 }, { "epoch": 0.34480866562603474, "grad_norm": 0.828125, "learning_rate": 1.8594003215307142e-05, "loss": 0.3505, "step": 4556 }, { "epoch": 0.34488434794948203, "grad_norm": 0.875, "learning_rate": 1.8593394151178326e-05, "loss": 0.3759, "step": 4557 }, { "epoch": 0.3449600302729294, "grad_norm": 0.76171875, "learning_rate": 1.859278496513747e-05, "loss": 0.3155, "step": 4558 }, { "epoch": 0.3450357125963767, "grad_norm": 0.77734375, "learning_rate": 1.859217565719321e-05, "loss": 0.3387, "step": 4559 }, { "epoch": 0.34511139491982407, "grad_norm": 0.7890625, "learning_rate": 1.8591566227354194e-05, "loss": 0.3139, "step": 4560 }, { "epoch": 0.34518707724327136, "grad_norm": 0.74609375, "learning_rate": 1.859095667562907e-05, "loss": 0.2989, "step": 4561 }, { "epoch": 0.3452627595667187, "grad_norm": 0.76953125, "learning_rate": 1.8590347002026478e-05, "loss": 0.3006, "step": 4562 }, { "epoch": 0.34533844189016605, "grad_norm": 0.8671875, "learning_rate": 1.8589737206555077e-05, "loss": 0.3632, "step": 4563 }, { "epoch": 0.34541412421361334, "grad_norm": 0.79296875, "learning_rate": 1.858912728922351e-05, "loss": 0.3292, "step": 4564 }, { "epoch": 0.3454898065370607, "grad_norm": 0.8046875, "learning_rate": 1.8588517250040436e-05, "loss": 0.3347, "step": 4565 }, { "epoch": 0.34556548886050803, "grad_norm": 1.0546875, "learning_rate": 1.8587907089014504e-05, "loss": 0.3189, "step": 4566 }, { "epoch": 0.3456411711839553, "grad_norm": 0.79296875, "learning_rate": 1.8587296806154375e-05, "loss": 0.2993, "step": 4567 }, { "epoch": 0.34571685350740267, "grad_norm": 0.79296875, "learning_rate": 1.8586686401468705e-05, "loss": 0.3194, "step": 4568 }, { "epoch": 0.34579253583085, "grad_norm": 0.7734375, "learning_rate": 1.8586075874966152e-05, "loss": 0.3061, "step": 4569 }, { "epoch": 0.34586821815429736, "grad_norm": 0.8125, "learning_rate": 1.8585465226655376e-05, "loss": 0.3436, "step": 4570 }, { "epoch": 0.34594390047774465, "grad_norm": 0.8828125, "learning_rate": 1.8584854456545046e-05, "loss": 0.2886, "step": 4571 }, { "epoch": 0.346019582801192, "grad_norm": 0.76953125, "learning_rate": 1.858424356464382e-05, "loss": 0.3195, "step": 4572 }, { "epoch": 0.34609526512463934, "grad_norm": 0.91015625, "learning_rate": 1.858363255096037e-05, "loss": 0.378, "step": 4573 }, { "epoch": 0.34617094744808663, "grad_norm": 0.81640625, "learning_rate": 1.858302141550336e-05, "loss": 0.3453, "step": 4574 }, { "epoch": 0.346246629771534, "grad_norm": 0.80078125, "learning_rate": 1.8582410158281466e-05, "loss": 0.3367, "step": 4575 }, { "epoch": 0.3463223120949813, "grad_norm": 0.81640625, "learning_rate": 1.8581798779303356e-05, "loss": 0.3528, "step": 4576 }, { "epoch": 0.34639799441842867, "grad_norm": 0.86328125, "learning_rate": 1.85811872785777e-05, "loss": 0.3793, "step": 4577 }, { "epoch": 0.34647367674187596, "grad_norm": 0.796875, "learning_rate": 1.858057565611318e-05, "loss": 0.3039, "step": 4578 }, { "epoch": 0.3465493590653233, "grad_norm": 4.40625, "learning_rate": 1.8579963911918467e-05, "loss": 0.4365, "step": 4579 }, { "epoch": 0.34662504138877065, "grad_norm": 0.8203125, "learning_rate": 1.8579352046002243e-05, "loss": 0.3438, "step": 4580 }, { "epoch": 0.34670072371221794, "grad_norm": 0.8046875, "learning_rate": 1.857874005837319e-05, "loss": 0.341, "step": 4581 }, { "epoch": 0.3467764060356653, "grad_norm": 0.81640625, "learning_rate": 1.8578127949039985e-05, "loss": 0.3486, "step": 4582 }, { "epoch": 0.34685208835911263, "grad_norm": 0.80859375, "learning_rate": 1.8577515718011314e-05, "loss": 0.354, "step": 4583 }, { "epoch": 0.34692777068256, "grad_norm": 0.7734375, "learning_rate": 1.8576903365295864e-05, "loss": 0.3128, "step": 4584 }, { "epoch": 0.34700345300600727, "grad_norm": 0.87890625, "learning_rate": 1.857629089090232e-05, "loss": 0.3519, "step": 4585 }, { "epoch": 0.3470791353294546, "grad_norm": 0.8515625, "learning_rate": 1.857567829483937e-05, "loss": 0.3905, "step": 4586 }, { "epoch": 0.34715481765290196, "grad_norm": 0.8125, "learning_rate": 1.8575065577115713e-05, "loss": 0.323, "step": 4587 }, { "epoch": 0.34723049997634925, "grad_norm": 0.7578125, "learning_rate": 1.8574452737740034e-05, "loss": 0.3161, "step": 4588 }, { "epoch": 0.3473061822997966, "grad_norm": 0.80078125, "learning_rate": 1.857383977672103e-05, "loss": 0.3182, "step": 4589 }, { "epoch": 0.34738186462324394, "grad_norm": 0.73046875, "learning_rate": 1.857322669406739e-05, "loss": 0.2838, "step": 4590 }, { "epoch": 0.3474575469466913, "grad_norm": 0.83203125, "learning_rate": 1.857261348978782e-05, "loss": 0.3635, "step": 4591 }, { "epoch": 0.3475332292701386, "grad_norm": 0.828125, "learning_rate": 1.8572000163891017e-05, "loss": 0.348, "step": 4592 }, { "epoch": 0.3476089115935859, "grad_norm": 0.82421875, "learning_rate": 1.8571386716385682e-05, "loss": 0.3112, "step": 4593 }, { "epoch": 0.34768459391703327, "grad_norm": 0.8671875, "learning_rate": 1.8570773147280518e-05, "loss": 0.3332, "step": 4594 }, { "epoch": 0.34776027624048056, "grad_norm": 0.80859375, "learning_rate": 1.8570159456584225e-05, "loss": 0.3294, "step": 4595 }, { "epoch": 0.3478359585639279, "grad_norm": 0.78515625, "learning_rate": 1.8569545644305517e-05, "loss": 0.3314, "step": 4596 }, { "epoch": 0.34791164088737525, "grad_norm": 0.8671875, "learning_rate": 1.85689317104531e-05, "loss": 0.326, "step": 4597 }, { "epoch": 0.3479873232108226, "grad_norm": 1.4921875, "learning_rate": 1.8568317655035677e-05, "loss": 0.429, "step": 4598 }, { "epoch": 0.3480630055342699, "grad_norm": 0.8671875, "learning_rate": 1.8567703478061963e-05, "loss": 0.3751, "step": 4599 }, { "epoch": 0.34813868785771723, "grad_norm": 0.9140625, "learning_rate": 1.856708917954068e-05, "loss": 0.4014, "step": 4600 }, { "epoch": 0.3482143701811646, "grad_norm": 0.87109375, "learning_rate": 1.856647475948053e-05, "loss": 0.3518, "step": 4601 }, { "epoch": 0.34829005250461187, "grad_norm": 0.82421875, "learning_rate": 1.8565860217890235e-05, "loss": 0.3563, "step": 4602 }, { "epoch": 0.3483657348280592, "grad_norm": 0.8515625, "learning_rate": 1.8565245554778516e-05, "loss": 0.3364, "step": 4603 }, { "epoch": 0.34844141715150656, "grad_norm": 0.83203125, "learning_rate": 1.856463077015409e-05, "loss": 0.3331, "step": 4604 }, { "epoch": 0.3485170994749539, "grad_norm": 0.80078125, "learning_rate": 1.8564015864025677e-05, "loss": 0.3477, "step": 4605 }, { "epoch": 0.3485927817984012, "grad_norm": 0.90234375, "learning_rate": 1.8563400836402002e-05, "loss": 0.3679, "step": 4606 }, { "epoch": 0.34866846412184854, "grad_norm": 0.79296875, "learning_rate": 1.8562785687291797e-05, "loss": 0.3024, "step": 4607 }, { "epoch": 0.3487441464452959, "grad_norm": 0.875, "learning_rate": 1.8562170416703775e-05, "loss": 0.337, "step": 4608 }, { "epoch": 0.3488198287687432, "grad_norm": 0.76953125, "learning_rate": 1.8561555024646676e-05, "loss": 0.3377, "step": 4609 }, { "epoch": 0.3488955110921905, "grad_norm": 0.82421875, "learning_rate": 1.8560939511129225e-05, "loss": 0.348, "step": 4610 }, { "epoch": 0.34897119341563787, "grad_norm": 0.7421875, "learning_rate": 1.856032387616016e-05, "loss": 0.2983, "step": 4611 }, { "epoch": 0.3490468757390852, "grad_norm": 0.77734375, "learning_rate": 1.8559708119748205e-05, "loss": 0.3257, "step": 4612 }, { "epoch": 0.3491225580625325, "grad_norm": 0.7578125, "learning_rate": 1.8559092241902105e-05, "loss": 0.2958, "step": 4613 }, { "epoch": 0.34919824038597985, "grad_norm": 0.859375, "learning_rate": 1.8558476242630595e-05, "loss": 0.3617, "step": 4614 }, { "epoch": 0.3492739227094272, "grad_norm": 0.7734375, "learning_rate": 1.8557860121942412e-05, "loss": 0.3225, "step": 4615 }, { "epoch": 0.3493496050328745, "grad_norm": 1.171875, "learning_rate": 1.8557243879846296e-05, "loss": 0.3616, "step": 4616 }, { "epoch": 0.34942528735632183, "grad_norm": 1.1796875, "learning_rate": 1.8556627516350988e-05, "loss": 0.3756, "step": 4617 }, { "epoch": 0.3495009696797692, "grad_norm": 1.171875, "learning_rate": 1.855601103146524e-05, "loss": 0.4531, "step": 4618 }, { "epoch": 0.3495766520032165, "grad_norm": 0.84765625, "learning_rate": 1.855539442519779e-05, "loss": 0.3898, "step": 4619 }, { "epoch": 0.3496523343266638, "grad_norm": 0.81640625, "learning_rate": 1.855477769755739e-05, "loss": 0.3149, "step": 4620 }, { "epoch": 0.34972801665011116, "grad_norm": 0.91796875, "learning_rate": 1.855416084855279e-05, "loss": 0.3508, "step": 4621 }, { "epoch": 0.3498036989735585, "grad_norm": 0.83203125, "learning_rate": 1.8553543878192733e-05, "loss": 0.3534, "step": 4622 }, { "epoch": 0.3498793812970058, "grad_norm": 0.87109375, "learning_rate": 1.8552926786485983e-05, "loss": 0.3762, "step": 4623 }, { "epoch": 0.34995506362045314, "grad_norm": 0.859375, "learning_rate": 1.8552309573441288e-05, "loss": 0.3138, "step": 4624 }, { "epoch": 0.3500307459439005, "grad_norm": 0.82421875, "learning_rate": 1.85516922390674e-05, "loss": 0.3064, "step": 4625 }, { "epoch": 0.3501064282673478, "grad_norm": 0.85546875, "learning_rate": 1.855107478337309e-05, "loss": 0.3638, "step": 4626 }, { "epoch": 0.3501821105907951, "grad_norm": 0.796875, "learning_rate": 1.8550457206367106e-05, "loss": 0.3321, "step": 4627 }, { "epoch": 0.35025779291424247, "grad_norm": 0.8359375, "learning_rate": 1.8549839508058215e-05, "loss": 0.3415, "step": 4628 }, { "epoch": 0.3503334752376898, "grad_norm": 0.85546875, "learning_rate": 1.854922168845518e-05, "loss": 0.3664, "step": 4629 }, { "epoch": 0.3504091575611371, "grad_norm": 0.8515625, "learning_rate": 1.854860374756676e-05, "loss": 0.3409, "step": 4630 }, { "epoch": 0.35048483988458445, "grad_norm": 0.83984375, "learning_rate": 1.854798568540173e-05, "loss": 0.3329, "step": 4631 }, { "epoch": 0.3505605222080318, "grad_norm": 0.84765625, "learning_rate": 1.8547367501968853e-05, "loss": 0.3684, "step": 4632 }, { "epoch": 0.3506362045314791, "grad_norm": 0.765625, "learning_rate": 1.8546749197276898e-05, "loss": 0.3365, "step": 4633 }, { "epoch": 0.35071188685492644, "grad_norm": 0.78125, "learning_rate": 1.854613077133464e-05, "loss": 0.3179, "step": 4634 }, { "epoch": 0.3507875691783738, "grad_norm": 0.8046875, "learning_rate": 1.8545512224150854e-05, "loss": 0.3363, "step": 4635 }, { "epoch": 0.3508632515018211, "grad_norm": 0.87109375, "learning_rate": 1.854489355573431e-05, "loss": 0.3808, "step": 4636 }, { "epoch": 0.3509389338252684, "grad_norm": 0.8203125, "learning_rate": 1.8544274766093786e-05, "loss": 0.3524, "step": 4637 }, { "epoch": 0.35101461614871576, "grad_norm": 0.77734375, "learning_rate": 1.8543655855238064e-05, "loss": 0.3335, "step": 4638 }, { "epoch": 0.3510902984721631, "grad_norm": 0.796875, "learning_rate": 1.8543036823175922e-05, "loss": 0.333, "step": 4639 }, { "epoch": 0.3511659807956104, "grad_norm": 0.80859375, "learning_rate": 1.8542417669916143e-05, "loss": 0.3373, "step": 4640 }, { "epoch": 0.35124166311905775, "grad_norm": 0.828125, "learning_rate": 1.854179839546751e-05, "loss": 0.356, "step": 4641 }, { "epoch": 0.3513173454425051, "grad_norm": 0.71875, "learning_rate": 1.854117899983881e-05, "loss": 0.2777, "step": 4642 }, { "epoch": 0.35139302776595244, "grad_norm": 1.9140625, "learning_rate": 1.8540559483038825e-05, "loss": 0.41, "step": 4643 }, { "epoch": 0.3514687100893997, "grad_norm": 0.8359375, "learning_rate": 1.853993984507635e-05, "loss": 0.3603, "step": 4644 }, { "epoch": 0.3515443924128471, "grad_norm": 0.83203125, "learning_rate": 1.853932008596017e-05, "loss": 0.3338, "step": 4645 }, { "epoch": 0.3516200747362944, "grad_norm": 0.8359375, "learning_rate": 1.8538700205699084e-05, "loss": 0.3422, "step": 4646 }, { "epoch": 0.3516957570597417, "grad_norm": 0.96484375, "learning_rate": 1.8538080204301884e-05, "loss": 0.2888, "step": 4647 }, { "epoch": 0.35177143938318906, "grad_norm": 0.79296875, "learning_rate": 1.8537460081777362e-05, "loss": 0.34, "step": 4648 }, { "epoch": 0.3518471217066364, "grad_norm": 0.8125, "learning_rate": 1.8536839838134316e-05, "loss": 0.3148, "step": 4649 }, { "epoch": 0.35192280403008375, "grad_norm": 0.8828125, "learning_rate": 1.8536219473381553e-05, "loss": 0.3402, "step": 4650 }, { "epoch": 0.35199848635353104, "grad_norm": 0.83203125, "learning_rate": 1.8535598987527863e-05, "loss": 0.3414, "step": 4651 }, { "epoch": 0.3520741686769784, "grad_norm": 0.87890625, "learning_rate": 1.8534978380582055e-05, "loss": 0.4047, "step": 4652 }, { "epoch": 0.35214985100042573, "grad_norm": 0.765625, "learning_rate": 1.853435765255293e-05, "loss": 0.3116, "step": 4653 }, { "epoch": 0.352225533323873, "grad_norm": 0.7265625, "learning_rate": 1.8533736803449304e-05, "loss": 0.2721, "step": 4654 }, { "epoch": 0.35230121564732036, "grad_norm": 0.80078125, "learning_rate": 1.853311583327997e-05, "loss": 0.3633, "step": 4655 }, { "epoch": 0.3523768979707677, "grad_norm": 0.75, "learning_rate": 1.8532494742053747e-05, "loss": 0.2961, "step": 4656 }, { "epoch": 0.35245258029421506, "grad_norm": 0.828125, "learning_rate": 1.853187352977944e-05, "loss": 0.3362, "step": 4657 }, { "epoch": 0.35252826261766235, "grad_norm": 0.8125, "learning_rate": 1.853125219646587e-05, "loss": 0.3415, "step": 4658 }, { "epoch": 0.3526039449411097, "grad_norm": 0.78515625, "learning_rate": 1.8530630742121846e-05, "loss": 0.3049, "step": 4659 }, { "epoch": 0.35267962726455704, "grad_norm": 0.94140625, "learning_rate": 1.8530009166756188e-05, "loss": 0.3202, "step": 4660 }, { "epoch": 0.35275530958800433, "grad_norm": 0.8515625, "learning_rate": 1.8529387470377708e-05, "loss": 0.3732, "step": 4661 }, { "epoch": 0.3528309919114517, "grad_norm": 0.80078125, "learning_rate": 1.8528765652995235e-05, "loss": 0.3073, "step": 4662 }, { "epoch": 0.352906674234899, "grad_norm": 0.77734375, "learning_rate": 1.852814371461758e-05, "loss": 0.3201, "step": 4663 }, { "epoch": 0.35298235655834637, "grad_norm": 0.8046875, "learning_rate": 1.8527521655253574e-05, "loss": 0.3423, "step": 4664 }, { "epoch": 0.35305803888179366, "grad_norm": 0.8125, "learning_rate": 1.8526899474912042e-05, "loss": 0.312, "step": 4665 }, { "epoch": 0.353133721205241, "grad_norm": 0.87109375, "learning_rate": 1.8526277173601804e-05, "loss": 0.3776, "step": 4666 }, { "epoch": 0.35320940352868835, "grad_norm": 1.15625, "learning_rate": 1.8525654751331694e-05, "loss": 0.3919, "step": 4667 }, { "epoch": 0.35328508585213564, "grad_norm": 0.77734375, "learning_rate": 1.8525032208110544e-05, "loss": 0.311, "step": 4668 }, { "epoch": 0.353360768175583, "grad_norm": 0.765625, "learning_rate": 1.8524409543947177e-05, "loss": 0.3309, "step": 4669 }, { "epoch": 0.35343645049903033, "grad_norm": 0.78125, "learning_rate": 1.8523786758850436e-05, "loss": 0.2959, "step": 4670 }, { "epoch": 0.3535121328224777, "grad_norm": 0.7734375, "learning_rate": 1.852316385282915e-05, "loss": 0.292, "step": 4671 }, { "epoch": 0.35358781514592497, "grad_norm": 0.83984375, "learning_rate": 1.8522540825892164e-05, "loss": 0.3238, "step": 4672 }, { "epoch": 0.3536634974693723, "grad_norm": 0.71875, "learning_rate": 1.852191767804831e-05, "loss": 0.2738, "step": 4673 }, { "epoch": 0.35373917979281966, "grad_norm": 0.828125, "learning_rate": 1.8521294409306424e-05, "loss": 0.3531, "step": 4674 }, { "epoch": 0.35381486211626695, "grad_norm": 0.828125, "learning_rate": 1.8520671019675357e-05, "loss": 0.3228, "step": 4675 }, { "epoch": 0.3538905444397143, "grad_norm": 0.83203125, "learning_rate": 1.852004750916395e-05, "loss": 0.3434, "step": 4676 }, { "epoch": 0.35396622676316164, "grad_norm": 0.765625, "learning_rate": 1.8519423877781047e-05, "loss": 0.2983, "step": 4677 }, { "epoch": 0.354041909086609, "grad_norm": 0.7734375, "learning_rate": 1.8518800125535497e-05, "loss": 0.316, "step": 4678 }, { "epoch": 0.3541175914100563, "grad_norm": 0.84765625, "learning_rate": 1.851817625243615e-05, "loss": 0.3435, "step": 4679 }, { "epoch": 0.3541932737335036, "grad_norm": 1.0625, "learning_rate": 1.8517552258491852e-05, "loss": 0.3656, "step": 4680 }, { "epoch": 0.35426895605695097, "grad_norm": 0.8203125, "learning_rate": 1.851692814371146e-05, "loss": 0.3447, "step": 4681 }, { "epoch": 0.35434463838039826, "grad_norm": 0.75, "learning_rate": 1.851630390810383e-05, "loss": 0.2777, "step": 4682 }, { "epoch": 0.3544203207038456, "grad_norm": 0.83203125, "learning_rate": 1.851567955167781e-05, "loss": 0.3264, "step": 4683 }, { "epoch": 0.35449600302729295, "grad_norm": 0.84765625, "learning_rate": 1.851505507444226e-05, "loss": 0.3799, "step": 4684 }, { "epoch": 0.3545716853507403, "grad_norm": 0.89453125, "learning_rate": 1.8514430476406047e-05, "loss": 0.3835, "step": 4685 }, { "epoch": 0.3546473676741876, "grad_norm": 0.76953125, "learning_rate": 1.8513805757578027e-05, "loss": 0.3118, "step": 4686 }, { "epoch": 0.35472304999763493, "grad_norm": 0.87109375, "learning_rate": 1.851318091796706e-05, "loss": 0.3133, "step": 4687 }, { "epoch": 0.3547987323210823, "grad_norm": 0.7265625, "learning_rate": 1.8512555957582012e-05, "loss": 0.2797, "step": 4688 }, { "epoch": 0.35487441464452957, "grad_norm": 0.83203125, "learning_rate": 1.851193087643175e-05, "loss": 0.3319, "step": 4689 }, { "epoch": 0.3549500969679769, "grad_norm": 0.7421875, "learning_rate": 1.851130567452514e-05, "loss": 0.2963, "step": 4690 }, { "epoch": 0.35502577929142426, "grad_norm": 0.8046875, "learning_rate": 1.8510680351871057e-05, "loss": 0.3507, "step": 4691 }, { "epoch": 0.35510146161487155, "grad_norm": 0.796875, "learning_rate": 1.8510054908478366e-05, "loss": 0.3129, "step": 4692 }, { "epoch": 0.3551771439383189, "grad_norm": 1.2421875, "learning_rate": 1.850942934435594e-05, "loss": 0.4334, "step": 4693 }, { "epoch": 0.35525282626176624, "grad_norm": 0.6640625, "learning_rate": 1.850880365951266e-05, "loss": 0.2371, "step": 4694 }, { "epoch": 0.3553285085852136, "grad_norm": 1.2421875, "learning_rate": 1.8508177853957398e-05, "loss": 0.468, "step": 4695 }, { "epoch": 0.3554041909086609, "grad_norm": 0.86328125, "learning_rate": 1.850755192769903e-05, "loss": 0.3231, "step": 4696 }, { "epoch": 0.3554798732321082, "grad_norm": 0.79296875, "learning_rate": 1.850692588074644e-05, "loss": 0.314, "step": 4697 }, { "epoch": 0.35555555555555557, "grad_norm": 0.76171875, "learning_rate": 1.850629971310851e-05, "loss": 0.312, "step": 4698 }, { "epoch": 0.35563123787900286, "grad_norm": 0.85546875, "learning_rate": 1.850567342479412e-05, "loss": 0.36, "step": 4699 }, { "epoch": 0.3557069202024502, "grad_norm": 0.8203125, "learning_rate": 1.8505047015812155e-05, "loss": 0.332, "step": 4700 }, { "epoch": 0.35578260252589755, "grad_norm": 0.921875, "learning_rate": 1.85044204861715e-05, "loss": 0.4041, "step": 4701 }, { "epoch": 0.3558582848493449, "grad_norm": 0.7734375, "learning_rate": 1.8503793835881052e-05, "loss": 0.295, "step": 4702 }, { "epoch": 0.3559339671727922, "grad_norm": 0.828125, "learning_rate": 1.8503167064949693e-05, "loss": 0.3247, "step": 4703 }, { "epoch": 0.35600964949623953, "grad_norm": 0.81640625, "learning_rate": 1.8502540173386317e-05, "loss": 0.3291, "step": 4704 }, { "epoch": 0.3560853318196869, "grad_norm": 0.796875, "learning_rate": 1.8501913161199818e-05, "loss": 0.3163, "step": 4705 }, { "epoch": 0.35616101414313417, "grad_norm": 0.80078125, "learning_rate": 1.8501286028399094e-05, "loss": 0.3244, "step": 4706 }, { "epoch": 0.3562366964665815, "grad_norm": 0.8359375, "learning_rate": 1.8500658774993035e-05, "loss": 0.3295, "step": 4707 }, { "epoch": 0.35631237879002886, "grad_norm": 0.8984375, "learning_rate": 1.8500031400990544e-05, "loss": 0.3839, "step": 4708 }, { "epoch": 0.3563880611134762, "grad_norm": 0.875, "learning_rate": 1.849940390640052e-05, "loss": 0.3844, "step": 4709 }, { "epoch": 0.3564637434369235, "grad_norm": 0.8984375, "learning_rate": 1.849877629123187e-05, "loss": 0.3851, "step": 4710 }, { "epoch": 0.35653942576037084, "grad_norm": 0.8125, "learning_rate": 1.849814855549349e-05, "loss": 0.3212, "step": 4711 }, { "epoch": 0.3566151080838182, "grad_norm": 0.79296875, "learning_rate": 1.8497520699194295e-05, "loss": 0.3411, "step": 4712 }, { "epoch": 0.3566907904072655, "grad_norm": 0.88671875, "learning_rate": 1.849689272234318e-05, "loss": 0.4062, "step": 4713 }, { "epoch": 0.3567664727307128, "grad_norm": 0.80078125, "learning_rate": 1.8496264624949063e-05, "loss": 0.3292, "step": 4714 }, { "epoch": 0.35684215505416017, "grad_norm": 0.8359375, "learning_rate": 1.8495636407020854e-05, "loss": 0.342, "step": 4715 }, { "epoch": 0.3569178373776075, "grad_norm": 0.73828125, "learning_rate": 1.8495008068567463e-05, "loss": 0.3128, "step": 4716 }, { "epoch": 0.3569935197010548, "grad_norm": 0.83984375, "learning_rate": 1.8494379609597804e-05, "loss": 0.3281, "step": 4717 }, { "epoch": 0.35706920202450215, "grad_norm": 0.73828125, "learning_rate": 1.849375103012079e-05, "loss": 0.3108, "step": 4718 }, { "epoch": 0.3571448843479495, "grad_norm": 0.82421875, "learning_rate": 1.8493122330145346e-05, "loss": 0.3004, "step": 4719 }, { "epoch": 0.3572205666713968, "grad_norm": 0.8046875, "learning_rate": 1.8492493509680386e-05, "loss": 0.3316, "step": 4720 }, { "epoch": 0.35729624899484413, "grad_norm": 0.8359375, "learning_rate": 1.8491864568734834e-05, "loss": 0.3491, "step": 4721 }, { "epoch": 0.3573719313182915, "grad_norm": 0.78515625, "learning_rate": 1.849123550731761e-05, "loss": 0.3381, "step": 4722 }, { "epoch": 0.3574476136417388, "grad_norm": 0.81640625, "learning_rate": 1.8490606325437636e-05, "loss": 0.3496, "step": 4723 }, { "epoch": 0.3575232959651861, "grad_norm": 0.7578125, "learning_rate": 1.8489977023103845e-05, "loss": 0.2832, "step": 4724 }, { "epoch": 0.35759897828863346, "grad_norm": 0.80859375, "learning_rate": 1.8489347600325156e-05, "loss": 0.3479, "step": 4725 }, { "epoch": 0.3576746606120808, "grad_norm": 0.73828125, "learning_rate": 1.8488718057110503e-05, "loss": 0.2828, "step": 4726 }, { "epoch": 0.3577503429355281, "grad_norm": 0.85546875, "learning_rate": 1.848808839346882e-05, "loss": 0.3094, "step": 4727 }, { "epoch": 0.35782602525897544, "grad_norm": 0.8359375, "learning_rate": 1.8487458609409035e-05, "loss": 0.3568, "step": 4728 }, { "epoch": 0.3579017075824228, "grad_norm": 0.78125, "learning_rate": 1.848682870494009e-05, "loss": 0.3111, "step": 4729 }, { "epoch": 0.35797738990587014, "grad_norm": 0.796875, "learning_rate": 1.848619868007091e-05, "loss": 0.3255, "step": 4730 }, { "epoch": 0.3580530722293174, "grad_norm": 0.7890625, "learning_rate": 1.848556853481044e-05, "loss": 0.3324, "step": 4731 }, { "epoch": 0.35812875455276477, "grad_norm": 0.77734375, "learning_rate": 1.8484938269167623e-05, "loss": 0.3157, "step": 4732 }, { "epoch": 0.3582044368762121, "grad_norm": 0.78515625, "learning_rate": 1.8484307883151392e-05, "loss": 0.3223, "step": 4733 }, { "epoch": 0.3582801191996594, "grad_norm": 0.71875, "learning_rate": 1.8483677376770695e-05, "loss": 0.289, "step": 4734 }, { "epoch": 0.35835580152310675, "grad_norm": 0.7890625, "learning_rate": 1.8483046750034476e-05, "loss": 0.3411, "step": 4735 }, { "epoch": 0.3584314838465541, "grad_norm": 0.875, "learning_rate": 1.848241600295168e-05, "loss": 0.356, "step": 4736 }, { "epoch": 0.35850716617000145, "grad_norm": 0.8359375, "learning_rate": 1.8481785135531257e-05, "loss": 0.3461, "step": 4737 }, { "epoch": 0.35858284849344874, "grad_norm": 0.74609375, "learning_rate": 1.848115414778216e-05, "loss": 0.2793, "step": 4738 }, { "epoch": 0.3586585308168961, "grad_norm": 0.765625, "learning_rate": 1.8480523039713333e-05, "loss": 0.31, "step": 4739 }, { "epoch": 0.3587342131403434, "grad_norm": 0.859375, "learning_rate": 1.8479891811333735e-05, "loss": 0.3385, "step": 4740 }, { "epoch": 0.3588098954637907, "grad_norm": 0.80859375, "learning_rate": 1.847926046265232e-05, "loss": 0.3083, "step": 4741 }, { "epoch": 0.35888557778723806, "grad_norm": 0.83203125, "learning_rate": 1.8478628993678043e-05, "loss": 0.3315, "step": 4742 }, { "epoch": 0.3589612601106854, "grad_norm": 0.86328125, "learning_rate": 1.8477997404419865e-05, "loss": 0.3282, "step": 4743 }, { "epoch": 0.35903694243413276, "grad_norm": 0.90234375, "learning_rate": 1.8477365694886742e-05, "loss": 0.3505, "step": 4744 }, { "epoch": 0.35911262475758005, "grad_norm": 0.8203125, "learning_rate": 1.8476733865087647e-05, "loss": 0.3289, "step": 4745 }, { "epoch": 0.3591883070810274, "grad_norm": 0.7890625, "learning_rate": 1.847610191503153e-05, "loss": 0.3271, "step": 4746 }, { "epoch": 0.35926398940447474, "grad_norm": 0.8515625, "learning_rate": 1.8475469844727362e-05, "loss": 0.3282, "step": 4747 }, { "epoch": 0.359339671727922, "grad_norm": 1.5390625, "learning_rate": 1.847483765418411e-05, "loss": 0.4488, "step": 4748 }, { "epoch": 0.3594153540513694, "grad_norm": 0.76953125, "learning_rate": 1.8474205343410742e-05, "loss": 0.3247, "step": 4749 }, { "epoch": 0.3594910363748167, "grad_norm": 0.796875, "learning_rate": 1.8473572912416232e-05, "loss": 0.2889, "step": 4750 }, { "epoch": 0.359566718698264, "grad_norm": 0.8046875, "learning_rate": 1.8472940361209547e-05, "loss": 0.3414, "step": 4751 }, { "epoch": 0.35964240102171136, "grad_norm": 0.88671875, "learning_rate": 1.8472307689799663e-05, "loss": 0.3728, "step": 4752 }, { "epoch": 0.3597180833451587, "grad_norm": 0.7734375, "learning_rate": 1.8471674898195554e-05, "loss": 0.3277, "step": 4753 }, { "epoch": 0.35979376566860605, "grad_norm": 0.703125, "learning_rate": 1.84710419864062e-05, "loss": 0.2761, "step": 4754 }, { "epoch": 0.35986944799205334, "grad_norm": 2.03125, "learning_rate": 1.847040895444058e-05, "loss": 0.4875, "step": 4755 }, { "epoch": 0.3599451303155007, "grad_norm": 0.93359375, "learning_rate": 1.846977580230767e-05, "loss": 0.3828, "step": 4756 }, { "epoch": 0.36002081263894803, "grad_norm": 0.8984375, "learning_rate": 1.846914253001646e-05, "loss": 0.3751, "step": 4757 }, { "epoch": 0.3600964949623953, "grad_norm": 0.76171875, "learning_rate": 1.8468509137575924e-05, "loss": 0.3005, "step": 4758 }, { "epoch": 0.36017217728584267, "grad_norm": 0.84765625, "learning_rate": 1.846787562499506e-05, "loss": 0.3363, "step": 4759 }, { "epoch": 0.36024785960929, "grad_norm": 0.87109375, "learning_rate": 1.8467241992282842e-05, "loss": 0.3636, "step": 4760 }, { "epoch": 0.36032354193273736, "grad_norm": 0.90625, "learning_rate": 1.846660823944827e-05, "loss": 0.3929, "step": 4761 }, { "epoch": 0.36039922425618465, "grad_norm": 0.73828125, "learning_rate": 1.8465974366500332e-05, "loss": 0.3014, "step": 4762 }, { "epoch": 0.360474906579632, "grad_norm": 0.796875, "learning_rate": 1.8465340373448018e-05, "loss": 0.3363, "step": 4763 }, { "epoch": 0.36055058890307934, "grad_norm": 0.8515625, "learning_rate": 1.8464706260300322e-05, "loss": 0.3358, "step": 4764 }, { "epoch": 0.36062627122652663, "grad_norm": 0.80078125, "learning_rate": 1.8464072027066245e-05, "loss": 0.317, "step": 4765 }, { "epoch": 0.360701953549974, "grad_norm": 0.83984375, "learning_rate": 1.846343767375478e-05, "loss": 0.3625, "step": 4766 }, { "epoch": 0.3607776358734213, "grad_norm": 1.6328125, "learning_rate": 1.8462803200374928e-05, "loss": 0.3942, "step": 4767 }, { "epoch": 0.36085331819686867, "grad_norm": 0.75, "learning_rate": 1.846216860693569e-05, "loss": 0.2813, "step": 4768 }, { "epoch": 0.36092900052031596, "grad_norm": 0.7734375, "learning_rate": 1.8461533893446066e-05, "loss": 0.3019, "step": 4769 }, { "epoch": 0.3610046828437633, "grad_norm": 0.98046875, "learning_rate": 1.8460899059915067e-05, "loss": 0.3776, "step": 4770 }, { "epoch": 0.36108036516721065, "grad_norm": 0.8828125, "learning_rate": 1.8460264106351693e-05, "loss": 0.3427, "step": 4771 }, { "epoch": 0.36115604749065794, "grad_norm": 0.75390625, "learning_rate": 1.8459629032764957e-05, "loss": 0.3103, "step": 4772 }, { "epoch": 0.3612317298141053, "grad_norm": 0.82421875, "learning_rate": 1.8458993839163867e-05, "loss": 0.3568, "step": 4773 }, { "epoch": 0.36130741213755263, "grad_norm": 0.79296875, "learning_rate": 1.8458358525557426e-05, "loss": 0.3087, "step": 4774 }, { "epoch": 0.361383094461, "grad_norm": 0.92578125, "learning_rate": 1.845772309195466e-05, "loss": 0.3834, "step": 4775 }, { "epoch": 0.36145877678444727, "grad_norm": 0.84765625, "learning_rate": 1.8457087538364578e-05, "loss": 0.3571, "step": 4776 }, { "epoch": 0.3615344591078946, "grad_norm": 0.80078125, "learning_rate": 1.8456451864796195e-05, "loss": 0.3266, "step": 4777 }, { "epoch": 0.36161014143134196, "grad_norm": 0.79296875, "learning_rate": 1.845581607125853e-05, "loss": 0.3148, "step": 4778 }, { "epoch": 0.36168582375478925, "grad_norm": 0.80078125, "learning_rate": 1.8455180157760602e-05, "loss": 0.3302, "step": 4779 }, { "epoch": 0.3617615060782366, "grad_norm": 0.84375, "learning_rate": 1.8454544124311438e-05, "loss": 0.3726, "step": 4780 }, { "epoch": 0.36183718840168394, "grad_norm": 0.7890625, "learning_rate": 1.8453907970920056e-05, "loss": 0.3192, "step": 4781 }, { "epoch": 0.3619128707251313, "grad_norm": 0.76171875, "learning_rate": 1.845327169759548e-05, "loss": 0.3327, "step": 4782 }, { "epoch": 0.3619885530485786, "grad_norm": 0.8359375, "learning_rate": 1.8452635304346743e-05, "loss": 0.37, "step": 4783 }, { "epoch": 0.3620642353720259, "grad_norm": 0.77734375, "learning_rate": 1.8451998791182864e-05, "loss": 0.3255, "step": 4784 }, { "epoch": 0.36213991769547327, "grad_norm": 0.83984375, "learning_rate": 1.845136215811288e-05, "loss": 0.3346, "step": 4785 }, { "epoch": 0.36221560001892056, "grad_norm": 0.828125, "learning_rate": 1.845072540514582e-05, "loss": 0.3264, "step": 4786 }, { "epoch": 0.3622912823423679, "grad_norm": 0.8828125, "learning_rate": 1.845008853229072e-05, "loss": 0.3857, "step": 4787 }, { "epoch": 0.36236696466581525, "grad_norm": 1.4296875, "learning_rate": 1.8449451539556612e-05, "loss": 0.4404, "step": 4788 }, { "epoch": 0.3624426469892626, "grad_norm": 0.8203125, "learning_rate": 1.8448814426952534e-05, "loss": 0.3053, "step": 4789 }, { "epoch": 0.3625183293127099, "grad_norm": 0.8046875, "learning_rate": 1.8448177194487524e-05, "loss": 0.3445, "step": 4790 }, { "epoch": 0.36259401163615723, "grad_norm": 0.74609375, "learning_rate": 1.8447539842170625e-05, "loss": 0.2807, "step": 4791 }, { "epoch": 0.3626696939596046, "grad_norm": 1.1328125, "learning_rate": 1.8446902370010876e-05, "loss": 0.3736, "step": 4792 }, { "epoch": 0.36274537628305187, "grad_norm": 0.80078125, "learning_rate": 1.844626477801732e-05, "loss": 0.3003, "step": 4793 }, { "epoch": 0.3628210586064992, "grad_norm": 1.0859375, "learning_rate": 1.8445627066199005e-05, "loss": 0.3907, "step": 4794 }, { "epoch": 0.36289674092994656, "grad_norm": 0.87890625, "learning_rate": 1.844498923456498e-05, "loss": 0.3722, "step": 4795 }, { "epoch": 0.3629724232533939, "grad_norm": 0.7578125, "learning_rate": 1.8444351283124288e-05, "loss": 0.2967, "step": 4796 }, { "epoch": 0.3630481055768412, "grad_norm": 0.83203125, "learning_rate": 1.844371321188598e-05, "loss": 0.342, "step": 4797 }, { "epoch": 0.36312378790028854, "grad_norm": 0.83984375, "learning_rate": 1.844307502085911e-05, "loss": 0.3536, "step": 4798 }, { "epoch": 0.3631994702237359, "grad_norm": 0.81640625, "learning_rate": 1.8442436710052734e-05, "loss": 0.344, "step": 4799 }, { "epoch": 0.3632751525471832, "grad_norm": 0.75, "learning_rate": 1.8441798279475907e-05, "loss": 0.2777, "step": 4800 }, { "epoch": 0.3633508348706305, "grad_norm": 0.85546875, "learning_rate": 1.8441159729137684e-05, "loss": 0.3295, "step": 4801 }, { "epoch": 0.36342651719407787, "grad_norm": 1.1953125, "learning_rate": 1.8440521059047122e-05, "loss": 0.3383, "step": 4802 }, { "epoch": 0.3635021995175252, "grad_norm": 0.70703125, "learning_rate": 1.8439882269213285e-05, "loss": 0.2634, "step": 4803 }, { "epoch": 0.3635778818409725, "grad_norm": 0.8203125, "learning_rate": 1.8439243359645238e-05, "loss": 0.3403, "step": 4804 }, { "epoch": 0.36365356416441985, "grad_norm": 0.84375, "learning_rate": 1.8438604330352036e-05, "loss": 0.3333, "step": 4805 }, { "epoch": 0.3637292464878672, "grad_norm": 0.81640625, "learning_rate": 1.8437965181342757e-05, "loss": 0.359, "step": 4806 }, { "epoch": 0.3638049288113145, "grad_norm": 0.8515625, "learning_rate": 1.843732591262646e-05, "loss": 0.3645, "step": 4807 }, { "epoch": 0.36388061113476183, "grad_norm": 0.82421875, "learning_rate": 1.8436686524212212e-05, "loss": 0.3371, "step": 4808 }, { "epoch": 0.3639562934582092, "grad_norm": 0.90234375, "learning_rate": 1.8436047016109092e-05, "loss": 0.3737, "step": 4809 }, { "epoch": 0.36403197578165647, "grad_norm": 0.80859375, "learning_rate": 1.8435407388326167e-05, "loss": 0.348, "step": 4810 }, { "epoch": 0.3641076581051038, "grad_norm": 0.85546875, "learning_rate": 1.8434767640872512e-05, "loss": 0.3777, "step": 4811 }, { "epoch": 0.36418334042855116, "grad_norm": 0.8203125, "learning_rate": 1.8434127773757205e-05, "loss": 0.3262, "step": 4812 }, { "epoch": 0.3642590227519985, "grad_norm": 0.859375, "learning_rate": 1.843348778698932e-05, "loss": 0.3584, "step": 4813 }, { "epoch": 0.3643347050754458, "grad_norm": 0.7734375, "learning_rate": 1.843284768057794e-05, "loss": 0.3394, "step": 4814 }, { "epoch": 0.36441038739889314, "grad_norm": 1.171875, "learning_rate": 1.8432207454532146e-05, "loss": 0.368, "step": 4815 }, { "epoch": 0.3644860697223405, "grad_norm": 0.76953125, "learning_rate": 1.8431567108861014e-05, "loss": 0.3233, "step": 4816 }, { "epoch": 0.3645617520457878, "grad_norm": 0.79296875, "learning_rate": 1.8430926643573635e-05, "loss": 0.3053, "step": 4817 }, { "epoch": 0.3646374343692351, "grad_norm": 0.78515625, "learning_rate": 1.8430286058679096e-05, "loss": 0.3316, "step": 4818 }, { "epoch": 0.36471311669268247, "grad_norm": 0.81640625, "learning_rate": 1.842964535418648e-05, "loss": 0.3076, "step": 4819 }, { "epoch": 0.3647887990161298, "grad_norm": 1.046875, "learning_rate": 1.842900453010488e-05, "loss": 0.3722, "step": 4820 }, { "epoch": 0.3648644813395771, "grad_norm": 0.79296875, "learning_rate": 1.842836358644339e-05, "loss": 0.323, "step": 4821 }, { "epoch": 0.36494016366302445, "grad_norm": 0.76171875, "learning_rate": 1.8427722523211096e-05, "loss": 0.2938, "step": 4822 }, { "epoch": 0.3650158459864718, "grad_norm": 0.7890625, "learning_rate": 1.8427081340417093e-05, "loss": 0.3449, "step": 4823 }, { "epoch": 0.3650915283099191, "grad_norm": 0.81640625, "learning_rate": 1.842644003807048e-05, "loss": 0.3582, "step": 4824 }, { "epoch": 0.36516721063336643, "grad_norm": 0.72265625, "learning_rate": 1.842579861618036e-05, "loss": 0.2902, "step": 4825 }, { "epoch": 0.3652428929568138, "grad_norm": 1.125, "learning_rate": 1.842515707475582e-05, "loss": 0.3633, "step": 4826 }, { "epoch": 0.3653185752802611, "grad_norm": 0.7890625, "learning_rate": 1.8424515413805973e-05, "loss": 0.3037, "step": 4827 }, { "epoch": 0.3653942576037084, "grad_norm": 1.15625, "learning_rate": 1.842387363333992e-05, "loss": 0.3487, "step": 4828 }, { "epoch": 0.36546993992715576, "grad_norm": 0.78125, "learning_rate": 1.8423231733366762e-05, "loss": 0.3054, "step": 4829 }, { "epoch": 0.3655456222506031, "grad_norm": 0.73046875, "learning_rate": 1.8422589713895606e-05, "loss": 0.3005, "step": 4830 }, { "epoch": 0.3656213045740504, "grad_norm": 0.828125, "learning_rate": 1.8421947574935562e-05, "loss": 0.3265, "step": 4831 }, { "epoch": 0.36569698689749774, "grad_norm": 0.765625, "learning_rate": 1.8421305316495737e-05, "loss": 0.3004, "step": 4832 }, { "epoch": 0.3657726692209451, "grad_norm": 1.21875, "learning_rate": 1.8420662938585246e-05, "loss": 0.3896, "step": 4833 }, { "epoch": 0.36584835154439244, "grad_norm": 0.8359375, "learning_rate": 1.8420020441213202e-05, "loss": 0.3219, "step": 4834 }, { "epoch": 0.3659240338678397, "grad_norm": 0.81640625, "learning_rate": 1.8419377824388716e-05, "loss": 0.3596, "step": 4835 }, { "epoch": 0.36599971619128707, "grad_norm": 0.80078125, "learning_rate": 1.8418735088120913e-05, "loss": 0.3593, "step": 4836 }, { "epoch": 0.3660753985147344, "grad_norm": 0.8515625, "learning_rate": 1.84180922324189e-05, "loss": 0.3751, "step": 4837 }, { "epoch": 0.3661510808381817, "grad_norm": 0.7890625, "learning_rate": 1.8417449257291802e-05, "loss": 0.3507, "step": 4838 }, { "epoch": 0.36622676316162905, "grad_norm": 0.83984375, "learning_rate": 1.8416806162748747e-05, "loss": 0.362, "step": 4839 }, { "epoch": 0.3663024454850764, "grad_norm": 0.7734375, "learning_rate": 1.841616294879885e-05, "loss": 0.318, "step": 4840 }, { "epoch": 0.36637812780852375, "grad_norm": 0.87890625, "learning_rate": 1.841551961545124e-05, "loss": 0.4275, "step": 4841 }, { "epoch": 0.36645381013197104, "grad_norm": 0.90625, "learning_rate": 1.841487616271504e-05, "loss": 0.3838, "step": 4842 }, { "epoch": 0.3665294924554184, "grad_norm": 0.8125, "learning_rate": 1.8414232590599382e-05, "loss": 0.3343, "step": 4843 }, { "epoch": 0.36660517477886573, "grad_norm": 0.859375, "learning_rate": 1.8413588899113398e-05, "loss": 0.3721, "step": 4844 }, { "epoch": 0.366680857102313, "grad_norm": 0.8515625, "learning_rate": 1.8412945088266218e-05, "loss": 0.3699, "step": 4845 }, { "epoch": 0.36675653942576036, "grad_norm": 0.7578125, "learning_rate": 1.8412301158066972e-05, "loss": 0.259, "step": 4846 }, { "epoch": 0.3668322217492077, "grad_norm": 0.85546875, "learning_rate": 1.84116571085248e-05, "loss": 0.359, "step": 4847 }, { "epoch": 0.36690790407265506, "grad_norm": 0.80078125, "learning_rate": 1.8411012939648834e-05, "loss": 0.3338, "step": 4848 }, { "epoch": 0.36698358639610235, "grad_norm": 0.82421875, "learning_rate": 1.8410368651448217e-05, "loss": 0.3825, "step": 4849 }, { "epoch": 0.3670592687195497, "grad_norm": 0.83203125, "learning_rate": 1.840972424393209e-05, "loss": 0.3344, "step": 4850 }, { "epoch": 0.36713495104299704, "grad_norm": 0.75390625, "learning_rate": 1.8409079717109588e-05, "loss": 0.3071, "step": 4851 }, { "epoch": 0.36721063336644433, "grad_norm": 0.890625, "learning_rate": 1.8408435070989866e-05, "loss": 0.4057, "step": 4852 }, { "epoch": 0.3672863156898917, "grad_norm": 0.80078125, "learning_rate": 1.840779030558206e-05, "loss": 0.3369, "step": 4853 }, { "epoch": 0.367361998013339, "grad_norm": 0.85546875, "learning_rate": 1.840714542089532e-05, "loss": 0.3395, "step": 4854 }, { "epoch": 0.36743768033678637, "grad_norm": 0.8203125, "learning_rate": 1.8406500416938793e-05, "loss": 0.2888, "step": 4855 }, { "epoch": 0.36751336266023366, "grad_norm": 0.796875, "learning_rate": 1.8405855293721634e-05, "loss": 0.3195, "step": 4856 }, { "epoch": 0.367589044983681, "grad_norm": 0.7890625, "learning_rate": 1.8405210051252993e-05, "loss": 0.3219, "step": 4857 }, { "epoch": 0.36766472730712835, "grad_norm": 0.859375, "learning_rate": 1.8404564689542022e-05, "loss": 0.397, "step": 4858 }, { "epoch": 0.36774040963057564, "grad_norm": 0.7734375, "learning_rate": 1.840391920859788e-05, "loss": 0.2756, "step": 4859 }, { "epoch": 0.367816091954023, "grad_norm": 0.734375, "learning_rate": 1.8403273608429722e-05, "loss": 0.2844, "step": 4860 }, { "epoch": 0.36789177427747033, "grad_norm": 0.80078125, "learning_rate": 1.8402627889046706e-05, "loss": 0.3439, "step": 4861 }, { "epoch": 0.3679674566009177, "grad_norm": 0.8515625, "learning_rate": 1.8401982050457995e-05, "loss": 0.3787, "step": 4862 }, { "epoch": 0.36804313892436497, "grad_norm": 0.83984375, "learning_rate": 1.840133609267275e-05, "loss": 0.3785, "step": 4863 }, { "epoch": 0.3681188212478123, "grad_norm": 0.88671875, "learning_rate": 1.840069001570014e-05, "loss": 0.3466, "step": 4864 }, { "epoch": 0.36819450357125966, "grad_norm": 0.8203125, "learning_rate": 1.840004381954932e-05, "loss": 0.3269, "step": 4865 }, { "epoch": 0.36827018589470695, "grad_norm": 0.7265625, "learning_rate": 1.8399397504229464e-05, "loss": 0.2617, "step": 4866 }, { "epoch": 0.3683458682181543, "grad_norm": 0.796875, "learning_rate": 1.839875106974974e-05, "loss": 0.3129, "step": 4867 }, { "epoch": 0.36842155054160164, "grad_norm": 2.28125, "learning_rate": 1.8398104516119323e-05, "loss": 0.3996, "step": 4868 }, { "epoch": 0.36849723286504893, "grad_norm": 0.8515625, "learning_rate": 1.8397457843347382e-05, "loss": 0.3413, "step": 4869 }, { "epoch": 0.3685729151884963, "grad_norm": 0.796875, "learning_rate": 1.8396811051443086e-05, "loss": 0.3541, "step": 4870 }, { "epoch": 0.3686485975119436, "grad_norm": 0.78515625, "learning_rate": 1.839616414041562e-05, "loss": 0.3128, "step": 4871 }, { "epoch": 0.36872427983539097, "grad_norm": 0.78515625, "learning_rate": 1.8395517110274156e-05, "loss": 0.3036, "step": 4872 }, { "epoch": 0.36879996215883826, "grad_norm": 0.86328125, "learning_rate": 1.8394869961027875e-05, "loss": 0.3324, "step": 4873 }, { "epoch": 0.3688756444822856, "grad_norm": 0.765625, "learning_rate": 1.8394222692685958e-05, "loss": 0.3098, "step": 4874 }, { "epoch": 0.36895132680573295, "grad_norm": 0.796875, "learning_rate": 1.8393575305257587e-05, "loss": 0.3485, "step": 4875 }, { "epoch": 0.36902700912918024, "grad_norm": 0.80078125, "learning_rate": 1.8392927798751946e-05, "loss": 0.3065, "step": 4876 }, { "epoch": 0.3691026914526276, "grad_norm": 0.80078125, "learning_rate": 1.839228017317822e-05, "loss": 0.3319, "step": 4877 }, { "epoch": 0.36917837377607493, "grad_norm": 0.8515625, "learning_rate": 1.83916324285456e-05, "loss": 0.3631, "step": 4878 }, { "epoch": 0.3692540560995223, "grad_norm": 0.859375, "learning_rate": 1.839098456486327e-05, "loss": 0.3808, "step": 4879 }, { "epoch": 0.36932973842296957, "grad_norm": 0.80859375, "learning_rate": 1.839033658214043e-05, "loss": 0.3216, "step": 4880 }, { "epoch": 0.3694054207464169, "grad_norm": 0.796875, "learning_rate": 1.8389688480386264e-05, "loss": 0.3348, "step": 4881 }, { "epoch": 0.36948110306986426, "grad_norm": 0.86328125, "learning_rate": 1.838904025960997e-05, "loss": 0.3708, "step": 4882 }, { "epoch": 0.36955678539331155, "grad_norm": 0.80078125, "learning_rate": 1.8388391919820745e-05, "loss": 0.341, "step": 4883 }, { "epoch": 0.3696324677167589, "grad_norm": 0.8046875, "learning_rate": 1.8387743461027786e-05, "loss": 0.3364, "step": 4884 }, { "epoch": 0.36970815004020624, "grad_norm": 1.296875, "learning_rate": 1.8387094883240288e-05, "loss": 0.4074, "step": 4885 }, { "epoch": 0.3697838323636536, "grad_norm": 0.8046875, "learning_rate": 1.838644618646746e-05, "loss": 0.3479, "step": 4886 }, { "epoch": 0.3698595146871009, "grad_norm": 0.82421875, "learning_rate": 1.8385797370718502e-05, "loss": 0.345, "step": 4887 }, { "epoch": 0.3699351970105482, "grad_norm": 0.8828125, "learning_rate": 1.8385148436002613e-05, "loss": 0.3521, "step": 4888 }, { "epoch": 0.37001087933399557, "grad_norm": 0.7890625, "learning_rate": 1.8384499382329006e-05, "loss": 0.3131, "step": 4889 }, { "epoch": 0.37008656165744286, "grad_norm": 0.8203125, "learning_rate": 1.838385020970689e-05, "loss": 0.3066, "step": 4890 }, { "epoch": 0.3701622439808902, "grad_norm": 0.7890625, "learning_rate": 1.838320091814547e-05, "loss": 0.2936, "step": 4891 }, { "epoch": 0.37023792630433755, "grad_norm": 0.73046875, "learning_rate": 1.838255150765396e-05, "loss": 0.2916, "step": 4892 }, { "epoch": 0.3703136086277849, "grad_norm": 0.7890625, "learning_rate": 1.8381901978241566e-05, "loss": 0.3267, "step": 4893 }, { "epoch": 0.3703892909512322, "grad_norm": 0.7578125, "learning_rate": 1.8381252329917515e-05, "loss": 0.3137, "step": 4894 }, { "epoch": 0.37046497327467953, "grad_norm": 0.8125, "learning_rate": 1.8380602562691018e-05, "loss": 0.3369, "step": 4895 }, { "epoch": 0.3705406555981269, "grad_norm": 0.8359375, "learning_rate": 1.837995267657129e-05, "loss": 0.3192, "step": 4896 }, { "epoch": 0.37061633792157417, "grad_norm": 0.80859375, "learning_rate": 1.837930267156755e-05, "loss": 0.3636, "step": 4897 }, { "epoch": 0.3706920202450215, "grad_norm": 0.77734375, "learning_rate": 1.8378652547689023e-05, "loss": 0.326, "step": 4898 }, { "epoch": 0.37076770256846886, "grad_norm": 0.86328125, "learning_rate": 1.8378002304944934e-05, "loss": 0.3613, "step": 4899 }, { "epoch": 0.3708433848919162, "grad_norm": 0.81640625, "learning_rate": 1.8377351943344504e-05, "loss": 0.3264, "step": 4900 }, { "epoch": 0.3709190672153635, "grad_norm": 0.78125, "learning_rate": 1.837670146289696e-05, "loss": 0.3026, "step": 4901 }, { "epoch": 0.37099474953881084, "grad_norm": 0.9453125, "learning_rate": 1.837605086361153e-05, "loss": 0.3424, "step": 4902 }, { "epoch": 0.3710704318622582, "grad_norm": 0.77734375, "learning_rate": 1.8375400145497445e-05, "loss": 0.3124, "step": 4903 }, { "epoch": 0.3711461141857055, "grad_norm": 0.796875, "learning_rate": 1.8374749308563938e-05, "loss": 0.3276, "step": 4904 }, { "epoch": 0.3712217965091528, "grad_norm": 0.7734375, "learning_rate": 1.8374098352820236e-05, "loss": 0.3278, "step": 4905 }, { "epoch": 0.37129747883260017, "grad_norm": 0.7421875, "learning_rate": 1.8373447278275586e-05, "loss": 0.3021, "step": 4906 }, { "epoch": 0.3713731611560475, "grad_norm": 0.84375, "learning_rate": 1.8372796084939214e-05, "loss": 0.3711, "step": 4907 }, { "epoch": 0.3714488434794948, "grad_norm": 0.87890625, "learning_rate": 1.837214477282036e-05, "loss": 0.3799, "step": 4908 }, { "epoch": 0.37152452580294215, "grad_norm": 0.78125, "learning_rate": 1.8371493341928267e-05, "loss": 0.3283, "step": 4909 }, { "epoch": 0.3716002081263895, "grad_norm": 0.84765625, "learning_rate": 1.837084179227217e-05, "loss": 0.374, "step": 4910 }, { "epoch": 0.3716758904498368, "grad_norm": 0.765625, "learning_rate": 1.837019012386132e-05, "loss": 0.2898, "step": 4911 }, { "epoch": 0.37175157277328413, "grad_norm": 0.78125, "learning_rate": 1.8369538336704963e-05, "loss": 0.3146, "step": 4912 }, { "epoch": 0.3718272550967315, "grad_norm": 0.8046875, "learning_rate": 1.836888643081234e-05, "loss": 0.3253, "step": 4913 }, { "epoch": 0.3719029374201788, "grad_norm": 0.80078125, "learning_rate": 1.83682344061927e-05, "loss": 0.3356, "step": 4914 }, { "epoch": 0.3719786197436261, "grad_norm": 0.8125, "learning_rate": 1.8367582262855297e-05, "loss": 0.3465, "step": 4915 }, { "epoch": 0.37205430206707346, "grad_norm": 0.85546875, "learning_rate": 1.8366930000809378e-05, "loss": 0.3606, "step": 4916 }, { "epoch": 0.3721299843905208, "grad_norm": 0.77734375, "learning_rate": 1.83662776200642e-05, "loss": 0.3271, "step": 4917 }, { "epoch": 0.3722056667139681, "grad_norm": 0.8046875, "learning_rate": 1.8365625120629016e-05, "loss": 0.3187, "step": 4918 }, { "epoch": 0.37228134903741544, "grad_norm": 0.87890625, "learning_rate": 1.836497250251308e-05, "loss": 0.3288, "step": 4919 }, { "epoch": 0.3723570313608628, "grad_norm": 1.0859375, "learning_rate": 1.8364319765725663e-05, "loss": 0.3359, "step": 4920 }, { "epoch": 0.37243271368431013, "grad_norm": 0.703125, "learning_rate": 1.836366691027601e-05, "loss": 0.2824, "step": 4921 }, { "epoch": 0.3725083960077574, "grad_norm": 0.7890625, "learning_rate": 1.8363013936173393e-05, "loss": 0.328, "step": 4922 }, { "epoch": 0.37258407833120477, "grad_norm": 1.0625, "learning_rate": 1.836236084342707e-05, "loss": 0.4022, "step": 4923 }, { "epoch": 0.3726597606546521, "grad_norm": 0.75390625, "learning_rate": 1.836170763204631e-05, "loss": 0.3114, "step": 4924 }, { "epoch": 0.3727354429780994, "grad_norm": 0.8671875, "learning_rate": 1.8361054302040372e-05, "loss": 0.3228, "step": 4925 }, { "epoch": 0.37281112530154675, "grad_norm": 0.78125, "learning_rate": 1.836040085341854e-05, "loss": 0.3152, "step": 4926 }, { "epoch": 0.3728868076249941, "grad_norm": 0.7421875, "learning_rate": 1.835974728619007e-05, "loss": 0.2994, "step": 4927 }, { "epoch": 0.37296248994844144, "grad_norm": 0.796875, "learning_rate": 1.8359093600364237e-05, "loss": 0.3216, "step": 4928 }, { "epoch": 0.37303817227188873, "grad_norm": 0.7890625, "learning_rate": 1.835843979595032e-05, "loss": 0.3041, "step": 4929 }, { "epoch": 0.3731138545953361, "grad_norm": 0.7890625, "learning_rate": 1.8357785872957588e-05, "loss": 0.3239, "step": 4930 }, { "epoch": 0.3731895369187834, "grad_norm": 0.84765625, "learning_rate": 1.8357131831395322e-05, "loss": 0.3919, "step": 4931 }, { "epoch": 0.3732652192422307, "grad_norm": 0.79296875, "learning_rate": 1.8356477671272797e-05, "loss": 0.2948, "step": 4932 }, { "epoch": 0.37334090156567806, "grad_norm": 0.7890625, "learning_rate": 1.83558233925993e-05, "loss": 0.3174, "step": 4933 }, { "epoch": 0.3734165838891254, "grad_norm": 0.81640625, "learning_rate": 1.8355168995384113e-05, "loss": 0.3327, "step": 4934 }, { "epoch": 0.3734922662125727, "grad_norm": 1.1796875, "learning_rate": 1.8354514479636506e-05, "loss": 0.3823, "step": 4935 }, { "epoch": 0.37356794853602004, "grad_norm": 1.1171875, "learning_rate": 1.835385984536578e-05, "loss": 0.3702, "step": 4936 }, { "epoch": 0.3736436308594674, "grad_norm": 0.88671875, "learning_rate": 1.8353205092581216e-05, "loss": 0.3783, "step": 4937 }, { "epoch": 0.37371931318291474, "grad_norm": 0.7421875, "learning_rate": 1.8352550221292102e-05, "loss": 0.2857, "step": 4938 }, { "epoch": 0.373794995506362, "grad_norm": 0.80078125, "learning_rate": 1.8351895231507734e-05, "loss": 0.3359, "step": 4939 }, { "epoch": 0.3738706778298094, "grad_norm": 0.828125, "learning_rate": 1.8351240123237396e-05, "loss": 0.3296, "step": 4940 }, { "epoch": 0.3739463601532567, "grad_norm": 0.76171875, "learning_rate": 1.8350584896490384e-05, "loss": 0.3086, "step": 4941 }, { "epoch": 0.374022042476704, "grad_norm": 0.7890625, "learning_rate": 1.8349929551275996e-05, "loss": 0.3239, "step": 4942 }, { "epoch": 0.37409772480015135, "grad_norm": 0.73828125, "learning_rate": 1.834927408760353e-05, "loss": 0.277, "step": 4943 }, { "epoch": 0.3741734071235987, "grad_norm": 0.81640625, "learning_rate": 1.834861850548228e-05, "loss": 0.3457, "step": 4944 }, { "epoch": 0.37424908944704605, "grad_norm": 0.8671875, "learning_rate": 1.8347962804921556e-05, "loss": 0.3621, "step": 4945 }, { "epoch": 0.37432477177049334, "grad_norm": 0.79296875, "learning_rate": 1.834730698593065e-05, "loss": 0.3256, "step": 4946 }, { "epoch": 0.3744004540939407, "grad_norm": 0.79296875, "learning_rate": 1.834665104851887e-05, "loss": 0.3411, "step": 4947 }, { "epoch": 0.37447613641738803, "grad_norm": 0.7890625, "learning_rate": 1.8345994992695523e-05, "loss": 0.3416, "step": 4948 }, { "epoch": 0.3745518187408353, "grad_norm": 0.84765625, "learning_rate": 1.834533881846991e-05, "loss": 0.3549, "step": 4949 }, { "epoch": 0.37462750106428266, "grad_norm": 0.8203125, "learning_rate": 1.834468252585135e-05, "loss": 0.3447, "step": 4950 }, { "epoch": 0.37470318338773, "grad_norm": 0.8671875, "learning_rate": 1.8344026114849147e-05, "loss": 0.3688, "step": 4951 }, { "epoch": 0.37477886571117736, "grad_norm": 0.83203125, "learning_rate": 1.8343369585472615e-05, "loss": 0.3316, "step": 4952 }, { "epoch": 0.37485454803462465, "grad_norm": 0.81640625, "learning_rate": 1.8342712937731066e-05, "loss": 0.308, "step": 4953 }, { "epoch": 0.374930230358072, "grad_norm": 0.765625, "learning_rate": 1.834205617163382e-05, "loss": 0.3031, "step": 4954 }, { "epoch": 0.37500591268151934, "grad_norm": 0.79296875, "learning_rate": 1.834139928719019e-05, "loss": 0.3379, "step": 4955 }, { "epoch": 0.37508159500496663, "grad_norm": 0.81640625, "learning_rate": 1.8340742284409496e-05, "loss": 0.3298, "step": 4956 }, { "epoch": 0.375157277328414, "grad_norm": 0.7578125, "learning_rate": 1.8340085163301064e-05, "loss": 0.3096, "step": 4957 }, { "epoch": 0.3752329596518613, "grad_norm": 0.796875, "learning_rate": 1.8339427923874207e-05, "loss": 0.3335, "step": 4958 }, { "epoch": 0.37530864197530867, "grad_norm": 0.796875, "learning_rate": 1.8338770566138254e-05, "loss": 0.3161, "step": 4959 }, { "epoch": 0.37538432429875596, "grad_norm": 1.015625, "learning_rate": 1.8338113090102536e-05, "loss": 0.3071, "step": 4960 }, { "epoch": 0.3754600066222033, "grad_norm": 0.8515625, "learning_rate": 1.833745549577637e-05, "loss": 0.3913, "step": 4961 }, { "epoch": 0.37553568894565065, "grad_norm": 0.78125, "learning_rate": 1.8336797783169088e-05, "loss": 0.3267, "step": 4962 }, { "epoch": 0.37561137126909794, "grad_norm": 0.75, "learning_rate": 1.8336139952290027e-05, "loss": 0.3067, "step": 4963 }, { "epoch": 0.3756870535925453, "grad_norm": 0.8359375, "learning_rate": 1.8335482003148518e-05, "loss": 0.3263, "step": 4964 }, { "epoch": 0.37576273591599263, "grad_norm": 0.88671875, "learning_rate": 1.833482393575389e-05, "loss": 0.3413, "step": 4965 }, { "epoch": 0.37583841823944, "grad_norm": 0.8125, "learning_rate": 1.833416575011548e-05, "loss": 0.3649, "step": 4966 }, { "epoch": 0.37591410056288727, "grad_norm": 0.921875, "learning_rate": 1.8333507446242628e-05, "loss": 0.3841, "step": 4967 }, { "epoch": 0.3759897828863346, "grad_norm": 0.81640625, "learning_rate": 1.8332849024144673e-05, "loss": 0.3473, "step": 4968 }, { "epoch": 0.37606546520978196, "grad_norm": 0.80859375, "learning_rate": 1.8332190483830952e-05, "loss": 0.3538, "step": 4969 }, { "epoch": 0.37614114753322925, "grad_norm": 0.76953125, "learning_rate": 1.8331531825310815e-05, "loss": 0.2941, "step": 4970 }, { "epoch": 0.3762168298566766, "grad_norm": 0.76953125, "learning_rate": 1.8330873048593596e-05, "loss": 0.3052, "step": 4971 }, { "epoch": 0.37629251218012394, "grad_norm": 0.9140625, "learning_rate": 1.833021415368865e-05, "loss": 0.3791, "step": 4972 }, { "epoch": 0.3763681945035713, "grad_norm": 0.81640625, "learning_rate": 1.8329555140605323e-05, "loss": 0.3378, "step": 4973 }, { "epoch": 0.3764438768270186, "grad_norm": 0.76171875, "learning_rate": 1.832889600935296e-05, "loss": 0.3077, "step": 4974 }, { "epoch": 0.3765195591504659, "grad_norm": 0.84375, "learning_rate": 1.8328236759940915e-05, "loss": 0.3421, "step": 4975 }, { "epoch": 0.37659524147391327, "grad_norm": 0.7890625, "learning_rate": 1.8327577392378537e-05, "loss": 0.3521, "step": 4976 }, { "epoch": 0.37667092379736056, "grad_norm": 0.79296875, "learning_rate": 1.8326917906675182e-05, "loss": 0.2818, "step": 4977 }, { "epoch": 0.3767466061208079, "grad_norm": 0.84375, "learning_rate": 1.8326258302840214e-05, "loss": 0.3729, "step": 4978 }, { "epoch": 0.37682228844425525, "grad_norm": 0.8515625, "learning_rate": 1.832559858088298e-05, "loss": 0.3521, "step": 4979 }, { "epoch": 0.3768979707677026, "grad_norm": 0.875, "learning_rate": 1.8324938740812842e-05, "loss": 0.3576, "step": 4980 }, { "epoch": 0.3769736530911499, "grad_norm": 0.8046875, "learning_rate": 1.832427878263916e-05, "loss": 0.3162, "step": 4981 }, { "epoch": 0.37704933541459723, "grad_norm": 0.78125, "learning_rate": 1.8323618706371303e-05, "loss": 0.3038, "step": 4982 }, { "epoch": 0.3771250177380446, "grad_norm": 0.796875, "learning_rate": 1.8322958512018625e-05, "loss": 0.3215, "step": 4983 }, { "epoch": 0.37720070006149187, "grad_norm": 0.8671875, "learning_rate": 1.8322298199590504e-05, "loss": 0.3576, "step": 4984 }, { "epoch": 0.3772763823849392, "grad_norm": 0.8828125, "learning_rate": 1.83216377690963e-05, "loss": 0.3935, "step": 4985 }, { "epoch": 0.37735206470838656, "grad_norm": 0.80859375, "learning_rate": 1.8320977220545384e-05, "loss": 0.3288, "step": 4986 }, { "epoch": 0.3774277470318339, "grad_norm": 0.9453125, "learning_rate": 1.8320316553947125e-05, "loss": 0.343, "step": 4987 }, { "epoch": 0.3775034293552812, "grad_norm": 0.87109375, "learning_rate": 1.83196557693109e-05, "loss": 0.388, "step": 4988 }, { "epoch": 0.37757911167872854, "grad_norm": 0.765625, "learning_rate": 1.831899486664608e-05, "loss": 0.3098, "step": 4989 }, { "epoch": 0.3776547940021759, "grad_norm": 0.81640625, "learning_rate": 1.831833384596204e-05, "loss": 0.3492, "step": 4990 }, { "epoch": 0.3777304763256232, "grad_norm": 0.82421875, "learning_rate": 1.831767270726816e-05, "loss": 0.3435, "step": 4991 }, { "epoch": 0.3778061586490705, "grad_norm": 0.83984375, "learning_rate": 1.831701145057382e-05, "loss": 0.3918, "step": 4992 }, { "epoch": 0.37788184097251787, "grad_norm": 0.7890625, "learning_rate": 1.8316350075888402e-05, "loss": 0.2975, "step": 4993 }, { "epoch": 0.37795752329596516, "grad_norm": 0.81640625, "learning_rate": 1.8315688583221282e-05, "loss": 0.2912, "step": 4994 }, { "epoch": 0.3780332056194125, "grad_norm": 0.83984375, "learning_rate": 1.8315026972581852e-05, "loss": 0.3448, "step": 4995 }, { "epoch": 0.37810888794285985, "grad_norm": 0.76171875, "learning_rate": 1.83143652439795e-05, "loss": 0.3286, "step": 4996 }, { "epoch": 0.3781845702663072, "grad_norm": 0.7578125, "learning_rate": 1.8313703397423602e-05, "loss": 0.2973, "step": 4997 }, { "epoch": 0.3782602525897545, "grad_norm": 0.8203125, "learning_rate": 1.831304143292356e-05, "loss": 0.3593, "step": 4998 }, { "epoch": 0.37833593491320183, "grad_norm": 0.859375, "learning_rate": 1.8312379350488752e-05, "loss": 0.3582, "step": 4999 }, { "epoch": 0.3784116172366492, "grad_norm": 0.8671875, "learning_rate": 1.8311717150128583e-05, "loss": 0.3225, "step": 5000 }, { "epoch": 0.37848729956009647, "grad_norm": 0.7578125, "learning_rate": 1.8311054831852446e-05, "loss": 0.3246, "step": 5001 }, { "epoch": 0.3785629818835438, "grad_norm": 0.78515625, "learning_rate": 1.8310392395669728e-05, "loss": 0.3231, "step": 5002 }, { "epoch": 0.37863866420699116, "grad_norm": 0.828125, "learning_rate": 1.8309729841589833e-05, "loss": 0.3309, "step": 5003 }, { "epoch": 0.3787143465304385, "grad_norm": 0.7890625, "learning_rate": 1.8309067169622166e-05, "loss": 0.3085, "step": 5004 }, { "epoch": 0.3787900288538858, "grad_norm": 0.7578125, "learning_rate": 1.830840437977612e-05, "loss": 0.2807, "step": 5005 }, { "epoch": 0.37886571117733314, "grad_norm": 0.75390625, "learning_rate": 1.8307741472061097e-05, "loss": 0.3, "step": 5006 }, { "epoch": 0.3789413935007805, "grad_norm": 0.87890625, "learning_rate": 1.8307078446486506e-05, "loss": 0.2915, "step": 5007 }, { "epoch": 0.3790170758242278, "grad_norm": 0.83984375, "learning_rate": 1.8306415303061754e-05, "loss": 0.3445, "step": 5008 }, { "epoch": 0.3790927581476751, "grad_norm": 0.80078125, "learning_rate": 1.8305752041796247e-05, "loss": 0.3446, "step": 5009 }, { "epoch": 0.37916844047112247, "grad_norm": 1.6171875, "learning_rate": 1.830508866269939e-05, "loss": 0.3658, "step": 5010 }, { "epoch": 0.3792441227945698, "grad_norm": 0.765625, "learning_rate": 1.8304425165780603e-05, "loss": 0.3066, "step": 5011 }, { "epoch": 0.3793198051180171, "grad_norm": 0.86328125, "learning_rate": 1.8303761551049287e-05, "loss": 0.3461, "step": 5012 }, { "epoch": 0.37939548744146445, "grad_norm": 0.8515625, "learning_rate": 1.830309781851487e-05, "loss": 0.3385, "step": 5013 }, { "epoch": 0.3794711697649118, "grad_norm": 0.80078125, "learning_rate": 1.8302433968186763e-05, "loss": 0.3348, "step": 5014 }, { "epoch": 0.3795468520883591, "grad_norm": 0.78515625, "learning_rate": 1.8301770000074376e-05, "loss": 0.3137, "step": 5015 }, { "epoch": 0.37962253441180643, "grad_norm": 0.85546875, "learning_rate": 1.830110591418714e-05, "loss": 0.3778, "step": 5016 }, { "epoch": 0.3796982167352538, "grad_norm": 0.79296875, "learning_rate": 1.8300441710534473e-05, "loss": 0.3177, "step": 5017 }, { "epoch": 0.3797738990587011, "grad_norm": 0.8203125, "learning_rate": 1.8299777389125792e-05, "loss": 0.3637, "step": 5018 }, { "epoch": 0.3798495813821484, "grad_norm": 0.79296875, "learning_rate": 1.829911294997053e-05, "loss": 0.3139, "step": 5019 }, { "epoch": 0.37992526370559576, "grad_norm": 0.8203125, "learning_rate": 1.8298448393078106e-05, "loss": 0.3751, "step": 5020 }, { "epoch": 0.3800009460290431, "grad_norm": 0.8125, "learning_rate": 1.8297783718457953e-05, "loss": 0.3406, "step": 5021 }, { "epoch": 0.3800766283524904, "grad_norm": 0.75390625, "learning_rate": 1.8297118926119496e-05, "loss": 0.2945, "step": 5022 }, { "epoch": 0.38015231067593774, "grad_norm": 0.875, "learning_rate": 1.829645401607217e-05, "loss": 0.4039, "step": 5023 }, { "epoch": 0.3802279929993851, "grad_norm": 0.78125, "learning_rate": 1.8295788988325408e-05, "loss": 0.3344, "step": 5024 }, { "epoch": 0.38030367532283244, "grad_norm": 0.796875, "learning_rate": 1.829512384288864e-05, "loss": 0.3335, "step": 5025 }, { "epoch": 0.3803793576462797, "grad_norm": 0.8359375, "learning_rate": 1.8294458579771305e-05, "loss": 0.3509, "step": 5026 }, { "epoch": 0.38045503996972707, "grad_norm": 0.8203125, "learning_rate": 1.8293793198982844e-05, "loss": 0.3223, "step": 5027 }, { "epoch": 0.3805307222931744, "grad_norm": 0.80078125, "learning_rate": 1.8293127700532693e-05, "loss": 0.3318, "step": 5028 }, { "epoch": 0.3806064046166217, "grad_norm": 0.8125, "learning_rate": 1.8292462084430297e-05, "loss": 0.3185, "step": 5029 }, { "epoch": 0.38068208694006905, "grad_norm": 0.765625, "learning_rate": 1.829179635068509e-05, "loss": 0.3055, "step": 5030 }, { "epoch": 0.3807577692635164, "grad_norm": 0.75, "learning_rate": 1.8291130499306526e-05, "loss": 0.2807, "step": 5031 }, { "epoch": 0.38083345158696374, "grad_norm": 0.86328125, "learning_rate": 1.8290464530304047e-05, "loss": 0.3565, "step": 5032 }, { "epoch": 0.38090913391041104, "grad_norm": 0.7421875, "learning_rate": 1.8289798443687103e-05, "loss": 0.2944, "step": 5033 }, { "epoch": 0.3809848162338584, "grad_norm": 0.85546875, "learning_rate": 1.828913223946514e-05, "loss": 0.2991, "step": 5034 }, { "epoch": 0.3810604985573057, "grad_norm": 0.78515625, "learning_rate": 1.8288465917647615e-05, "loss": 0.3369, "step": 5035 }, { "epoch": 0.381136180880753, "grad_norm": 0.78515625, "learning_rate": 1.8287799478243976e-05, "loss": 0.3239, "step": 5036 }, { "epoch": 0.38121186320420036, "grad_norm": 0.73046875, "learning_rate": 1.8287132921263676e-05, "loss": 0.3068, "step": 5037 }, { "epoch": 0.3812875455276477, "grad_norm": 0.7890625, "learning_rate": 1.8286466246716176e-05, "loss": 0.3465, "step": 5038 }, { "epoch": 0.38136322785109505, "grad_norm": 0.80859375, "learning_rate": 1.8285799454610935e-05, "loss": 0.3188, "step": 5039 }, { "epoch": 0.38143891017454234, "grad_norm": 0.7734375, "learning_rate": 1.828513254495741e-05, "loss": 0.2741, "step": 5040 }, { "epoch": 0.3815145924979897, "grad_norm": 0.90625, "learning_rate": 1.828446551776506e-05, "loss": 0.3641, "step": 5041 }, { "epoch": 0.38159027482143704, "grad_norm": 0.859375, "learning_rate": 1.828379837304335e-05, "loss": 0.3919, "step": 5042 }, { "epoch": 0.3816659571448843, "grad_norm": 1.296875, "learning_rate": 1.828313111080174e-05, "loss": 0.4029, "step": 5043 }, { "epoch": 0.3817416394683317, "grad_norm": 0.78515625, "learning_rate": 1.828246373104971e-05, "loss": 0.3331, "step": 5044 }, { "epoch": 0.381817321791779, "grad_norm": 0.79296875, "learning_rate": 1.828179623379671e-05, "loss": 0.3269, "step": 5045 }, { "epoch": 0.38189300411522636, "grad_norm": 0.78515625, "learning_rate": 1.8281128619052224e-05, "loss": 0.3309, "step": 5046 }, { "epoch": 0.38196868643867365, "grad_norm": 0.828125, "learning_rate": 1.8280460886825718e-05, "loss": 0.3392, "step": 5047 }, { "epoch": 0.382044368762121, "grad_norm": 0.81640625, "learning_rate": 1.8279793037126662e-05, "loss": 0.3567, "step": 5048 }, { "epoch": 0.38212005108556835, "grad_norm": 1.3515625, "learning_rate": 1.8279125069964533e-05, "loss": 0.3818, "step": 5049 }, { "epoch": 0.38219573340901564, "grad_norm": 0.7890625, "learning_rate": 1.827845698534881e-05, "loss": 0.3127, "step": 5050 }, { "epoch": 0.382271415732463, "grad_norm": 0.84375, "learning_rate": 1.8277788783288965e-05, "loss": 0.3335, "step": 5051 }, { "epoch": 0.38234709805591033, "grad_norm": 0.84375, "learning_rate": 1.8277120463794486e-05, "loss": 0.3774, "step": 5052 }, { "epoch": 0.3824227803793576, "grad_norm": 0.8046875, "learning_rate": 1.8276452026874843e-05, "loss": 0.3323, "step": 5053 }, { "epoch": 0.38249846270280496, "grad_norm": 0.78125, "learning_rate": 1.8275783472539528e-05, "loss": 0.3321, "step": 5054 }, { "epoch": 0.3825741450262523, "grad_norm": 0.8984375, "learning_rate": 1.8275114800798022e-05, "loss": 0.3492, "step": 5055 }, { "epoch": 0.38264982734969966, "grad_norm": 0.828125, "learning_rate": 1.8274446011659813e-05, "loss": 0.3509, "step": 5056 }, { "epoch": 0.38272550967314695, "grad_norm": 1.03125, "learning_rate": 1.8273777105134388e-05, "loss": 0.3425, "step": 5057 }, { "epoch": 0.3828011919965943, "grad_norm": 0.78125, "learning_rate": 1.8273108081231234e-05, "loss": 0.3349, "step": 5058 }, { "epoch": 0.38287687432004164, "grad_norm": 0.77734375, "learning_rate": 1.8272438939959846e-05, "loss": 0.3316, "step": 5059 }, { "epoch": 0.38295255664348893, "grad_norm": 0.93359375, "learning_rate": 1.827176968132972e-05, "loss": 0.3607, "step": 5060 }, { "epoch": 0.3830282389669363, "grad_norm": 0.74609375, "learning_rate": 1.827110030535034e-05, "loss": 0.28, "step": 5061 }, { "epoch": 0.3831039212903836, "grad_norm": 0.83203125, "learning_rate": 1.827043081203121e-05, "loss": 0.3658, "step": 5062 }, { "epoch": 0.38317960361383097, "grad_norm": 0.84375, "learning_rate": 1.8269761201381828e-05, "loss": 0.3471, "step": 5063 }, { "epoch": 0.38325528593727826, "grad_norm": 0.76171875, "learning_rate": 1.8269091473411694e-05, "loss": 0.3228, "step": 5064 }, { "epoch": 0.3833309682607256, "grad_norm": 0.77734375, "learning_rate": 1.8268421628130303e-05, "loss": 0.3265, "step": 5065 }, { "epoch": 0.38340665058417295, "grad_norm": 0.8046875, "learning_rate": 1.8267751665547164e-05, "loss": 0.3693, "step": 5066 }, { "epoch": 0.38348233290762024, "grad_norm": 0.80078125, "learning_rate": 1.8267081585671778e-05, "loss": 0.3518, "step": 5067 }, { "epoch": 0.3835580152310676, "grad_norm": 0.75, "learning_rate": 1.8266411388513656e-05, "loss": 0.2958, "step": 5068 }, { "epoch": 0.38363369755451493, "grad_norm": 0.8046875, "learning_rate": 1.82657410740823e-05, "loss": 0.3337, "step": 5069 }, { "epoch": 0.3837093798779623, "grad_norm": 0.76171875, "learning_rate": 1.8265070642387223e-05, "loss": 0.3189, "step": 5070 }, { "epoch": 0.38378506220140957, "grad_norm": 0.80859375, "learning_rate": 1.8264400093437937e-05, "loss": 0.3085, "step": 5071 }, { "epoch": 0.3838607445248569, "grad_norm": 0.8828125, "learning_rate": 1.8263729427243953e-05, "loss": 0.3757, "step": 5072 }, { "epoch": 0.38393642684830426, "grad_norm": 0.80078125, "learning_rate": 1.8263058643814784e-05, "loss": 0.3109, "step": 5073 }, { "epoch": 0.38401210917175155, "grad_norm": 0.77734375, "learning_rate": 1.826238774315995e-05, "loss": 0.3137, "step": 5074 }, { "epoch": 0.3840877914951989, "grad_norm": 0.69140625, "learning_rate": 1.8261716725288968e-05, "loss": 0.2301, "step": 5075 }, { "epoch": 0.38416347381864624, "grad_norm": 0.81640625, "learning_rate": 1.826104559021135e-05, "loss": 0.2957, "step": 5076 }, { "epoch": 0.3842391561420936, "grad_norm": 0.77734375, "learning_rate": 1.8260374337936633e-05, "loss": 0.2994, "step": 5077 }, { "epoch": 0.3843148384655409, "grad_norm": 0.76171875, "learning_rate": 1.825970296847433e-05, "loss": 0.3211, "step": 5078 }, { "epoch": 0.3843905207889882, "grad_norm": 0.71484375, "learning_rate": 1.825903148183396e-05, "loss": 0.2929, "step": 5079 }, { "epoch": 0.38446620311243557, "grad_norm": 0.76953125, "learning_rate": 1.8258359878025062e-05, "loss": 0.3201, "step": 5080 }, { "epoch": 0.38454188543588286, "grad_norm": 0.76953125, "learning_rate": 1.825768815705715e-05, "loss": 0.274, "step": 5081 }, { "epoch": 0.3846175677593302, "grad_norm": 0.7734375, "learning_rate": 1.825701631893977e-05, "loss": 0.3189, "step": 5082 }, { "epoch": 0.38469325008277755, "grad_norm": 2.734375, "learning_rate": 1.8256344363682436e-05, "loss": 0.4166, "step": 5083 }, { "epoch": 0.3847689324062249, "grad_norm": 0.7578125, "learning_rate": 1.8255672291294694e-05, "loss": 0.3064, "step": 5084 }, { "epoch": 0.3848446147296722, "grad_norm": 0.79296875, "learning_rate": 1.825500010178607e-05, "loss": 0.3262, "step": 5085 }, { "epoch": 0.38492029705311953, "grad_norm": 0.7734375, "learning_rate": 1.8254327795166104e-05, "loss": 0.3153, "step": 5086 }, { "epoch": 0.3849959793765669, "grad_norm": 0.74609375, "learning_rate": 1.8253655371444332e-05, "loss": 0.3118, "step": 5087 }, { "epoch": 0.38507166170001417, "grad_norm": 0.8046875, "learning_rate": 1.8252982830630297e-05, "loss": 0.3213, "step": 5088 }, { "epoch": 0.3851473440234615, "grad_norm": 0.6953125, "learning_rate": 1.8252310172733537e-05, "loss": 0.2889, "step": 5089 }, { "epoch": 0.38522302634690886, "grad_norm": 0.73828125, "learning_rate": 1.8251637397763597e-05, "loss": 0.3095, "step": 5090 }, { "epoch": 0.3852987086703562, "grad_norm": 0.7265625, "learning_rate": 1.825096450573002e-05, "loss": 0.3063, "step": 5091 }, { "epoch": 0.3853743909938035, "grad_norm": 0.75, "learning_rate": 1.8250291496642352e-05, "loss": 0.2811, "step": 5092 }, { "epoch": 0.38545007331725084, "grad_norm": 1.21875, "learning_rate": 1.824961837051014e-05, "loss": 0.3975, "step": 5093 }, { "epoch": 0.3855257556406982, "grad_norm": 0.7578125, "learning_rate": 1.8248945127342937e-05, "loss": 0.2928, "step": 5094 }, { "epoch": 0.3856014379641455, "grad_norm": 0.75, "learning_rate": 1.824827176715029e-05, "loss": 0.3025, "step": 5095 }, { "epoch": 0.3856771202875928, "grad_norm": 0.8359375, "learning_rate": 1.8247598289941755e-05, "loss": 0.3578, "step": 5096 }, { "epoch": 0.38575280261104017, "grad_norm": 0.75390625, "learning_rate": 1.824692469572688e-05, "loss": 0.3098, "step": 5097 }, { "epoch": 0.3858284849344875, "grad_norm": 0.79296875, "learning_rate": 1.8246250984515233e-05, "loss": 0.3138, "step": 5098 }, { "epoch": 0.3859041672579348, "grad_norm": 0.8125, "learning_rate": 1.824557715631636e-05, "loss": 0.3586, "step": 5099 }, { "epoch": 0.38597984958138215, "grad_norm": 0.79296875, "learning_rate": 1.8244903211139825e-05, "loss": 0.3438, "step": 5100 }, { "epoch": 0.3860555319048295, "grad_norm": 0.75390625, "learning_rate": 1.8244229148995193e-05, "loss": 0.32, "step": 5101 }, { "epoch": 0.3861312142282768, "grad_norm": 0.78125, "learning_rate": 1.824355496989202e-05, "loss": 0.302, "step": 5102 }, { "epoch": 0.38620689655172413, "grad_norm": 0.84375, "learning_rate": 1.8242880673839876e-05, "loss": 0.3841, "step": 5103 }, { "epoch": 0.3862825788751715, "grad_norm": 0.765625, "learning_rate": 1.8242206260848322e-05, "loss": 0.3184, "step": 5104 }, { "epoch": 0.3863582611986188, "grad_norm": 0.7734375, "learning_rate": 1.8241531730926925e-05, "loss": 0.3427, "step": 5105 }, { "epoch": 0.3864339435220661, "grad_norm": 0.8203125, "learning_rate": 1.8240857084085265e-05, "loss": 0.2943, "step": 5106 }, { "epoch": 0.38650962584551346, "grad_norm": 0.80859375, "learning_rate": 1.82401823203329e-05, "loss": 0.3758, "step": 5107 }, { "epoch": 0.3865853081689608, "grad_norm": 0.83203125, "learning_rate": 1.823950743967941e-05, "loss": 0.3729, "step": 5108 }, { "epoch": 0.3866609904924081, "grad_norm": 0.82421875, "learning_rate": 1.8238832442134366e-05, "loss": 0.2985, "step": 5109 }, { "epoch": 0.38673667281585544, "grad_norm": 0.74609375, "learning_rate": 1.8238157327707347e-05, "loss": 0.2804, "step": 5110 }, { "epoch": 0.3868123551393028, "grad_norm": 0.80078125, "learning_rate": 1.8237482096407928e-05, "loss": 0.341, "step": 5111 }, { "epoch": 0.38688803746275013, "grad_norm": 0.74609375, "learning_rate": 1.823680674824569e-05, "loss": 0.2843, "step": 5112 }, { "epoch": 0.3869637197861974, "grad_norm": 0.7578125, "learning_rate": 1.823613128323021e-05, "loss": 0.3065, "step": 5113 }, { "epoch": 0.38703940210964477, "grad_norm": 0.83203125, "learning_rate": 1.8235455701371082e-05, "loss": 0.369, "step": 5114 }, { "epoch": 0.3871150844330921, "grad_norm": 0.78515625, "learning_rate": 1.823478000267788e-05, "loss": 0.3135, "step": 5115 }, { "epoch": 0.3871907667565394, "grad_norm": 0.76953125, "learning_rate": 1.823410418716019e-05, "loss": 0.3276, "step": 5116 }, { "epoch": 0.38726644907998675, "grad_norm": 0.71875, "learning_rate": 1.82334282548276e-05, "loss": 0.28, "step": 5117 }, { "epoch": 0.3873421314034341, "grad_norm": 0.8125, "learning_rate": 1.82327522056897e-05, "loss": 0.3647, "step": 5118 }, { "epoch": 0.3874178137268814, "grad_norm": 0.765625, "learning_rate": 1.8232076039756086e-05, "loss": 0.3377, "step": 5119 }, { "epoch": 0.38749349605032873, "grad_norm": 0.8046875, "learning_rate": 1.8231399757036347e-05, "loss": 0.3424, "step": 5120 }, { "epoch": 0.3875691783737761, "grad_norm": 0.81640625, "learning_rate": 1.8230723357540076e-05, "loss": 0.3164, "step": 5121 }, { "epoch": 0.3876448606972234, "grad_norm": 0.82421875, "learning_rate": 1.8230046841276873e-05, "loss": 0.3356, "step": 5122 }, { "epoch": 0.3877205430206707, "grad_norm": 0.78125, "learning_rate": 1.8229370208256325e-05, "loss": 0.3481, "step": 5123 }, { "epoch": 0.38779622534411806, "grad_norm": 0.76953125, "learning_rate": 1.8228693458488043e-05, "loss": 0.278, "step": 5124 }, { "epoch": 0.3878719076675654, "grad_norm": 0.89453125, "learning_rate": 1.8228016591981623e-05, "loss": 0.3844, "step": 5125 }, { "epoch": 0.3879475899910127, "grad_norm": 0.76953125, "learning_rate": 1.822733960874667e-05, "loss": 0.313, "step": 5126 }, { "epoch": 0.38802327231446004, "grad_norm": 0.72265625, "learning_rate": 1.822666250879278e-05, "loss": 0.268, "step": 5127 }, { "epoch": 0.3880989546379074, "grad_norm": 0.78515625, "learning_rate": 1.822598529212957e-05, "loss": 0.3125, "step": 5128 }, { "epoch": 0.38817463696135474, "grad_norm": 0.79296875, "learning_rate": 1.822530795876664e-05, "loss": 0.346, "step": 5129 }, { "epoch": 0.388250319284802, "grad_norm": 0.859375, "learning_rate": 1.8224630508713606e-05, "loss": 0.3313, "step": 5130 }, { "epoch": 0.38832600160824937, "grad_norm": 0.77734375, "learning_rate": 1.8223952941980068e-05, "loss": 0.312, "step": 5131 }, { "epoch": 0.3884016839316967, "grad_norm": 0.7421875, "learning_rate": 1.822327525857565e-05, "loss": 0.2972, "step": 5132 }, { "epoch": 0.388477366255144, "grad_norm": 0.76953125, "learning_rate": 1.8222597458509955e-05, "loss": 0.3145, "step": 5133 }, { "epoch": 0.38855304857859135, "grad_norm": 0.80859375, "learning_rate": 1.8221919541792608e-05, "loss": 0.3488, "step": 5134 }, { "epoch": 0.3886287309020387, "grad_norm": 0.74609375, "learning_rate": 1.8221241508433224e-05, "loss": 0.3006, "step": 5135 }, { "epoch": 0.38870441322548605, "grad_norm": 0.765625, "learning_rate": 1.822056335844142e-05, "loss": 0.3105, "step": 5136 }, { "epoch": 0.38878009554893334, "grad_norm": 0.71484375, "learning_rate": 1.821988509182682e-05, "loss": 0.273, "step": 5137 }, { "epoch": 0.3888557778723807, "grad_norm": 0.77734375, "learning_rate": 1.821920670859904e-05, "loss": 0.3387, "step": 5138 }, { "epoch": 0.388931460195828, "grad_norm": 0.76171875, "learning_rate": 1.821852820876771e-05, "loss": 0.3164, "step": 5139 }, { "epoch": 0.3890071425192753, "grad_norm": 0.81640625, "learning_rate": 1.8217849592342457e-05, "loss": 0.3566, "step": 5140 }, { "epoch": 0.38908282484272266, "grad_norm": 0.8125, "learning_rate": 1.82171708593329e-05, "loss": 0.3678, "step": 5141 }, { "epoch": 0.38915850716617, "grad_norm": 0.7890625, "learning_rate": 1.8216492009748674e-05, "loss": 0.3473, "step": 5142 }, { "epoch": 0.38923418948961735, "grad_norm": 0.77734375, "learning_rate": 1.821581304359941e-05, "loss": 0.3127, "step": 5143 }, { "epoch": 0.38930987181306465, "grad_norm": 0.76171875, "learning_rate": 1.8215133960894743e-05, "loss": 0.3226, "step": 5144 }, { "epoch": 0.389385554136512, "grad_norm": 0.7265625, "learning_rate": 1.8214454761644295e-05, "loss": 0.2936, "step": 5145 }, { "epoch": 0.38946123645995934, "grad_norm": 0.77734375, "learning_rate": 1.8213775445857716e-05, "loss": 0.303, "step": 5146 }, { "epoch": 0.3895369187834066, "grad_norm": 0.84765625, "learning_rate": 1.8213096013544635e-05, "loss": 0.3491, "step": 5147 }, { "epoch": 0.389612601106854, "grad_norm": 0.89453125, "learning_rate": 1.8212416464714695e-05, "loss": 0.3316, "step": 5148 }, { "epoch": 0.3896882834303013, "grad_norm": 0.83984375, "learning_rate": 1.8211736799377534e-05, "loss": 0.3035, "step": 5149 }, { "epoch": 0.38976396575374866, "grad_norm": 0.7890625, "learning_rate": 1.821105701754279e-05, "loss": 0.3025, "step": 5150 }, { "epoch": 0.38983964807719595, "grad_norm": 0.78515625, "learning_rate": 1.8210377119220116e-05, "loss": 0.3114, "step": 5151 }, { "epoch": 0.3899153304006433, "grad_norm": 0.8125, "learning_rate": 1.820969710441915e-05, "loss": 0.3432, "step": 5152 }, { "epoch": 0.38999101272409065, "grad_norm": 0.78515625, "learning_rate": 1.8209016973149545e-05, "loss": 0.3223, "step": 5153 }, { "epoch": 0.39006669504753794, "grad_norm": 0.8125, "learning_rate": 1.8208336725420946e-05, "loss": 0.365, "step": 5154 }, { "epoch": 0.3901423773709853, "grad_norm": 0.765625, "learning_rate": 1.8207656361243006e-05, "loss": 0.3377, "step": 5155 }, { "epoch": 0.39021805969443263, "grad_norm": 0.76171875, "learning_rate": 1.8206975880625375e-05, "loss": 0.3201, "step": 5156 }, { "epoch": 0.39029374201788, "grad_norm": 0.81640625, "learning_rate": 1.8206295283577705e-05, "loss": 0.3399, "step": 5157 }, { "epoch": 0.39036942434132726, "grad_norm": 0.828125, "learning_rate": 1.8205614570109656e-05, "loss": 0.3531, "step": 5158 }, { "epoch": 0.3904451066647746, "grad_norm": 0.76171875, "learning_rate": 1.8204933740230884e-05, "loss": 0.3177, "step": 5159 }, { "epoch": 0.39052078898822196, "grad_norm": 0.74609375, "learning_rate": 1.8204252793951046e-05, "loss": 0.2998, "step": 5160 }, { "epoch": 0.39059647131166925, "grad_norm": 0.71484375, "learning_rate": 1.82035717312798e-05, "loss": 0.285, "step": 5161 }, { "epoch": 0.3906721536351166, "grad_norm": 0.796875, "learning_rate": 1.8202890552226814e-05, "loss": 0.3231, "step": 5162 }, { "epoch": 0.39074783595856394, "grad_norm": 0.83984375, "learning_rate": 1.820220925680175e-05, "loss": 0.3062, "step": 5163 }, { "epoch": 0.3908235182820113, "grad_norm": 0.8359375, "learning_rate": 1.8201527845014274e-05, "loss": 0.3367, "step": 5164 }, { "epoch": 0.3908992006054586, "grad_norm": 0.75390625, "learning_rate": 1.8200846316874046e-05, "loss": 0.2957, "step": 5165 }, { "epoch": 0.3909748829289059, "grad_norm": 0.72265625, "learning_rate": 1.8200164672390743e-05, "loss": 0.2931, "step": 5166 }, { "epoch": 0.39105056525235327, "grad_norm": 0.78125, "learning_rate": 1.819948291157403e-05, "loss": 0.3146, "step": 5167 }, { "epoch": 0.39112624757580056, "grad_norm": 0.86328125, "learning_rate": 1.8198801034433585e-05, "loss": 0.329, "step": 5168 }, { "epoch": 0.3912019298992479, "grad_norm": 0.78515625, "learning_rate": 1.819811904097907e-05, "loss": 0.33, "step": 5169 }, { "epoch": 0.39127761222269525, "grad_norm": 0.81640625, "learning_rate": 1.8197436931220177e-05, "loss": 0.3645, "step": 5170 }, { "epoch": 0.3913532945461426, "grad_norm": 0.7734375, "learning_rate": 1.819675470516657e-05, "loss": 0.3163, "step": 5171 }, { "epoch": 0.3914289768695899, "grad_norm": 0.75390625, "learning_rate": 1.819607236282793e-05, "loss": 0.2847, "step": 5172 }, { "epoch": 0.39150465919303723, "grad_norm": 0.765625, "learning_rate": 1.819538990421394e-05, "loss": 0.3063, "step": 5173 }, { "epoch": 0.3915803415164846, "grad_norm": 0.734375, "learning_rate": 1.8194707329334277e-05, "loss": 0.289, "step": 5174 }, { "epoch": 0.39165602383993187, "grad_norm": 0.78515625, "learning_rate": 1.8194024638198632e-05, "loss": 0.3192, "step": 5175 }, { "epoch": 0.3917317061633792, "grad_norm": 0.828125, "learning_rate": 1.8193341830816687e-05, "loss": 0.3522, "step": 5176 }, { "epoch": 0.39180738848682656, "grad_norm": 0.80859375, "learning_rate": 1.8192658907198126e-05, "loss": 0.3336, "step": 5177 }, { "epoch": 0.39188307081027385, "grad_norm": 0.8359375, "learning_rate": 1.819197586735264e-05, "loss": 0.3146, "step": 5178 }, { "epoch": 0.3919587531337212, "grad_norm": 0.84375, "learning_rate": 1.8191292711289915e-05, "loss": 0.372, "step": 5179 }, { "epoch": 0.39203443545716854, "grad_norm": 0.75, "learning_rate": 1.819060943901965e-05, "loss": 0.2993, "step": 5180 }, { "epoch": 0.3921101177806159, "grad_norm": 0.8125, "learning_rate": 1.8189926050551534e-05, "loss": 0.3199, "step": 5181 }, { "epoch": 0.3921858001040632, "grad_norm": 0.78125, "learning_rate": 1.8189242545895262e-05, "loss": 0.3362, "step": 5182 }, { "epoch": 0.3922614824275105, "grad_norm": 0.83984375, "learning_rate": 1.8188558925060534e-05, "loss": 0.3475, "step": 5183 }, { "epoch": 0.39233716475095787, "grad_norm": 0.78125, "learning_rate": 1.8187875188057043e-05, "loss": 0.308, "step": 5184 }, { "epoch": 0.39241284707440516, "grad_norm": 1.2578125, "learning_rate": 1.818719133489449e-05, "loss": 0.3763, "step": 5185 }, { "epoch": 0.3924885293978525, "grad_norm": 0.79296875, "learning_rate": 1.818650736558258e-05, "loss": 0.3054, "step": 5186 }, { "epoch": 0.39256421172129985, "grad_norm": 0.84765625, "learning_rate": 1.8185823280131015e-05, "loss": 0.3496, "step": 5187 }, { "epoch": 0.3926398940447472, "grad_norm": 0.84375, "learning_rate": 1.81851390785495e-05, "loss": 0.399, "step": 5188 }, { "epoch": 0.3927155763681945, "grad_norm": 0.95703125, "learning_rate": 1.8184454760847737e-05, "loss": 0.3029, "step": 5189 }, { "epoch": 0.39279125869164183, "grad_norm": 0.76953125, "learning_rate": 1.818377032703544e-05, "loss": 0.3128, "step": 5190 }, { "epoch": 0.3928669410150892, "grad_norm": 0.8203125, "learning_rate": 1.8183085777122322e-05, "loss": 0.3605, "step": 5191 }, { "epoch": 0.39294262333853647, "grad_norm": 0.78125, "learning_rate": 1.818240111111808e-05, "loss": 0.3238, "step": 5192 }, { "epoch": 0.3930183056619838, "grad_norm": 0.77734375, "learning_rate": 1.8181716329032445e-05, "loss": 0.3473, "step": 5193 }, { "epoch": 0.39309398798543116, "grad_norm": 0.91015625, "learning_rate": 1.818103143087512e-05, "loss": 0.3908, "step": 5194 }, { "epoch": 0.3931696703088785, "grad_norm": 0.76953125, "learning_rate": 1.8180346416655827e-05, "loss": 0.3123, "step": 5195 }, { "epoch": 0.3932453526323258, "grad_norm": 2.734375, "learning_rate": 1.8179661286384284e-05, "loss": 0.4499, "step": 5196 }, { "epoch": 0.39332103495577314, "grad_norm": 0.81640625, "learning_rate": 1.8178976040070206e-05, "loss": 0.3389, "step": 5197 }, { "epoch": 0.3933967172792205, "grad_norm": 0.80859375, "learning_rate": 1.8178290677723314e-05, "loss": 0.3613, "step": 5198 }, { "epoch": 0.3934723996026678, "grad_norm": 0.83984375, "learning_rate": 1.8177605199353338e-05, "loss": 0.3734, "step": 5199 }, { "epoch": 0.3935480819261151, "grad_norm": 0.81640625, "learning_rate": 1.8176919604969997e-05, "loss": 0.3159, "step": 5200 }, { "epoch": 0.39362376424956247, "grad_norm": 0.875, "learning_rate": 1.8176233894583023e-05, "loss": 0.3585, "step": 5201 }, { "epoch": 0.3936994465730098, "grad_norm": 0.80078125, "learning_rate": 1.8175548068202138e-05, "loss": 0.3054, "step": 5202 }, { "epoch": 0.3937751288964571, "grad_norm": 0.828125, "learning_rate": 1.8174862125837075e-05, "loss": 0.3192, "step": 5203 }, { "epoch": 0.39385081121990445, "grad_norm": 0.765625, "learning_rate": 1.8174176067497563e-05, "loss": 0.3159, "step": 5204 }, { "epoch": 0.3939264935433518, "grad_norm": 1.1796875, "learning_rate": 1.8173489893193334e-05, "loss": 0.3637, "step": 5205 }, { "epoch": 0.3940021758667991, "grad_norm": 0.80859375, "learning_rate": 1.8172803602934127e-05, "loss": 0.3613, "step": 5206 }, { "epoch": 0.39407785819024643, "grad_norm": 0.78125, "learning_rate": 1.8172117196729675e-05, "loss": 0.3235, "step": 5207 }, { "epoch": 0.3941535405136938, "grad_norm": 0.8359375, "learning_rate": 1.817143067458972e-05, "loss": 0.3694, "step": 5208 }, { "epoch": 0.3942292228371411, "grad_norm": 0.8515625, "learning_rate": 1.8170744036523995e-05, "loss": 0.371, "step": 5209 }, { "epoch": 0.3943049051605884, "grad_norm": 0.859375, "learning_rate": 1.8170057282542246e-05, "loss": 0.3526, "step": 5210 }, { "epoch": 0.39438058748403576, "grad_norm": 0.78515625, "learning_rate": 1.8169370412654212e-05, "loss": 0.3347, "step": 5211 }, { "epoch": 0.3944562698074831, "grad_norm": 0.77734375, "learning_rate": 1.816868342686964e-05, "loss": 0.3356, "step": 5212 }, { "epoch": 0.3945319521309304, "grad_norm": 0.8046875, "learning_rate": 1.8167996325198278e-05, "loss": 0.3524, "step": 5213 }, { "epoch": 0.39460763445437774, "grad_norm": 0.74609375, "learning_rate": 1.816730910764987e-05, "loss": 0.2936, "step": 5214 }, { "epoch": 0.3946833167778251, "grad_norm": 0.7734375, "learning_rate": 1.8166621774234166e-05, "loss": 0.313, "step": 5215 }, { "epoch": 0.39475899910127243, "grad_norm": 0.7890625, "learning_rate": 1.816593432496092e-05, "loss": 0.2986, "step": 5216 }, { "epoch": 0.3948346814247197, "grad_norm": 0.7734375, "learning_rate": 1.816524675983988e-05, "loss": 0.2761, "step": 5217 }, { "epoch": 0.39491036374816707, "grad_norm": 0.80078125, "learning_rate": 1.8164559078880804e-05, "loss": 0.3206, "step": 5218 }, { "epoch": 0.3949860460716144, "grad_norm": 0.69140625, "learning_rate": 1.8163871282093445e-05, "loss": 0.2684, "step": 5219 }, { "epoch": 0.3950617283950617, "grad_norm": 0.81640625, "learning_rate": 1.8163183369487565e-05, "loss": 0.3594, "step": 5220 }, { "epoch": 0.39513741071850905, "grad_norm": 0.84375, "learning_rate": 1.8162495341072918e-05, "loss": 0.367, "step": 5221 }, { "epoch": 0.3952130930419564, "grad_norm": 0.87890625, "learning_rate": 1.8161807196859268e-05, "loss": 0.3882, "step": 5222 }, { "epoch": 0.39528877536540374, "grad_norm": 0.85546875, "learning_rate": 1.8161118936856374e-05, "loss": 0.4031, "step": 5223 }, { "epoch": 0.39536445768885103, "grad_norm": 0.7421875, "learning_rate": 1.8160430561074006e-05, "loss": 0.3128, "step": 5224 }, { "epoch": 0.3954401400122984, "grad_norm": 0.78125, "learning_rate": 1.8159742069521926e-05, "loss": 0.3259, "step": 5225 }, { "epoch": 0.3955158223357457, "grad_norm": 0.78125, "learning_rate": 1.8159053462209903e-05, "loss": 0.3465, "step": 5226 }, { "epoch": 0.395591504659193, "grad_norm": 0.90625, "learning_rate": 1.8158364739147706e-05, "loss": 0.3782, "step": 5227 }, { "epoch": 0.39566718698264036, "grad_norm": 0.7890625, "learning_rate": 1.8157675900345105e-05, "loss": 0.3045, "step": 5228 }, { "epoch": 0.3957428693060877, "grad_norm": 0.8125, "learning_rate": 1.815698694581187e-05, "loss": 0.3513, "step": 5229 }, { "epoch": 0.39581855162953505, "grad_norm": 0.78515625, "learning_rate": 1.8156297875557777e-05, "loss": 0.3106, "step": 5230 }, { "epoch": 0.39589423395298234, "grad_norm": 1.0703125, "learning_rate": 1.8155608689592604e-05, "loss": 0.3497, "step": 5231 }, { "epoch": 0.3959699162764297, "grad_norm": 0.81640625, "learning_rate": 1.8154919387926124e-05, "loss": 0.3515, "step": 5232 }, { "epoch": 0.39604559859987704, "grad_norm": 0.81640625, "learning_rate": 1.815422997056812e-05, "loss": 0.3423, "step": 5233 }, { "epoch": 0.3961212809233243, "grad_norm": 0.75, "learning_rate": 1.815354043752837e-05, "loss": 0.2813, "step": 5234 }, { "epoch": 0.39619696324677167, "grad_norm": 0.796875, "learning_rate": 1.8152850788816658e-05, "loss": 0.3384, "step": 5235 }, { "epoch": 0.396272645570219, "grad_norm": 0.7109375, "learning_rate": 1.8152161024442768e-05, "loss": 0.2772, "step": 5236 }, { "epoch": 0.3963483278936663, "grad_norm": 0.78125, "learning_rate": 1.815147114441648e-05, "loss": 0.3246, "step": 5237 }, { "epoch": 0.39642401021711365, "grad_norm": 0.890625, "learning_rate": 1.815078114874759e-05, "loss": 0.3126, "step": 5238 }, { "epoch": 0.396499692540561, "grad_norm": 0.86328125, "learning_rate": 1.8150091037445876e-05, "loss": 0.3247, "step": 5239 }, { "epoch": 0.39657537486400835, "grad_norm": 0.76171875, "learning_rate": 1.814940081052114e-05, "loss": 0.2831, "step": 5240 }, { "epoch": 0.39665105718745564, "grad_norm": 0.828125, "learning_rate": 1.8148710467983168e-05, "loss": 0.3377, "step": 5241 }, { "epoch": 0.396726739510903, "grad_norm": 1.1875, "learning_rate": 1.8148020009841755e-05, "loss": 0.3389, "step": 5242 }, { "epoch": 0.3968024218343503, "grad_norm": 0.84375, "learning_rate": 1.814732943610669e-05, "loss": 0.3174, "step": 5243 }, { "epoch": 0.3968781041577976, "grad_norm": 0.84765625, "learning_rate": 1.814663874678778e-05, "loss": 0.3715, "step": 5244 }, { "epoch": 0.39695378648124496, "grad_norm": 0.86328125, "learning_rate": 1.814594794189482e-05, "loss": 0.3417, "step": 5245 }, { "epoch": 0.3970294688046923, "grad_norm": 0.7578125, "learning_rate": 1.8145257021437607e-05, "loss": 0.2988, "step": 5246 }, { "epoch": 0.39710515112813966, "grad_norm": 0.83203125, "learning_rate": 1.814456598542595e-05, "loss": 0.3652, "step": 5247 }, { "epoch": 0.39718083345158695, "grad_norm": 0.78125, "learning_rate": 1.8143874833869645e-05, "loss": 0.3062, "step": 5248 }, { "epoch": 0.3972565157750343, "grad_norm": 0.8203125, "learning_rate": 1.81431835667785e-05, "loss": 0.3762, "step": 5249 }, { "epoch": 0.39733219809848164, "grad_norm": 0.8515625, "learning_rate": 1.8142492184162323e-05, "loss": 0.3763, "step": 5250 }, { "epoch": 0.3974078804219289, "grad_norm": 0.828125, "learning_rate": 1.8141800686030917e-05, "loss": 0.368, "step": 5251 }, { "epoch": 0.3974835627453763, "grad_norm": 0.87890625, "learning_rate": 1.81411090723941e-05, "loss": 0.365, "step": 5252 }, { "epoch": 0.3975592450688236, "grad_norm": 0.8671875, "learning_rate": 1.814041734326168e-05, "loss": 0.3644, "step": 5253 }, { "epoch": 0.39763492739227096, "grad_norm": 0.75390625, "learning_rate": 1.813972549864347e-05, "loss": 0.2971, "step": 5254 }, { "epoch": 0.39771060971571826, "grad_norm": 0.80078125, "learning_rate": 1.8139033538549288e-05, "loss": 0.3497, "step": 5255 }, { "epoch": 0.3977862920391656, "grad_norm": 0.86328125, "learning_rate": 1.8138341462988945e-05, "loss": 0.3587, "step": 5256 }, { "epoch": 0.39786197436261295, "grad_norm": 0.8359375, "learning_rate": 1.8137649271972265e-05, "loss": 0.3277, "step": 5257 }, { "epoch": 0.39793765668606024, "grad_norm": 0.79296875, "learning_rate": 1.8136956965509064e-05, "loss": 0.3236, "step": 5258 }, { "epoch": 0.3980133390095076, "grad_norm": 0.828125, "learning_rate": 1.8136264543609163e-05, "loss": 0.3759, "step": 5259 }, { "epoch": 0.39808902133295493, "grad_norm": 0.80859375, "learning_rate": 1.8135572006282392e-05, "loss": 0.3388, "step": 5260 }, { "epoch": 0.3981647036564023, "grad_norm": 0.9296875, "learning_rate": 1.8134879353538566e-05, "loss": 0.3192, "step": 5261 }, { "epoch": 0.39824038597984956, "grad_norm": 0.78515625, "learning_rate": 1.813418658538752e-05, "loss": 0.3418, "step": 5262 }, { "epoch": 0.3983160683032969, "grad_norm": 0.765625, "learning_rate": 1.813349370183908e-05, "loss": 0.318, "step": 5263 }, { "epoch": 0.39839175062674426, "grad_norm": 0.875, "learning_rate": 1.8132800702903068e-05, "loss": 0.3532, "step": 5264 }, { "epoch": 0.39846743295019155, "grad_norm": 1.1875, "learning_rate": 1.8132107588589325e-05, "loss": 0.3508, "step": 5265 }, { "epoch": 0.3985431152736389, "grad_norm": 0.7109375, "learning_rate": 1.813141435890768e-05, "loss": 0.2748, "step": 5266 }, { "epoch": 0.39861879759708624, "grad_norm": 0.85546875, "learning_rate": 1.813072101386797e-05, "loss": 0.3673, "step": 5267 }, { "epoch": 0.3986944799205336, "grad_norm": 0.796875, "learning_rate": 1.813002755348003e-05, "loss": 0.3489, "step": 5268 }, { "epoch": 0.3987701622439809, "grad_norm": 0.75390625, "learning_rate": 1.8129333977753695e-05, "loss": 0.3198, "step": 5269 }, { "epoch": 0.3988458445674282, "grad_norm": 0.8359375, "learning_rate": 1.812864028669881e-05, "loss": 0.3535, "step": 5270 }, { "epoch": 0.39892152689087557, "grad_norm": 0.75390625, "learning_rate": 1.812794648032521e-05, "loss": 0.2865, "step": 5271 }, { "epoch": 0.39899720921432286, "grad_norm": 0.78125, "learning_rate": 1.8127252558642742e-05, "loss": 0.3376, "step": 5272 }, { "epoch": 0.3990728915377702, "grad_norm": 0.796875, "learning_rate": 1.812655852166125e-05, "loss": 0.3323, "step": 5273 }, { "epoch": 0.39914857386121755, "grad_norm": 0.84375, "learning_rate": 1.812586436939058e-05, "loss": 0.4025, "step": 5274 }, { "epoch": 0.3992242561846649, "grad_norm": 0.79296875, "learning_rate": 1.8125170101840582e-05, "loss": 0.3248, "step": 5275 }, { "epoch": 0.3992999385081122, "grad_norm": 0.8515625, "learning_rate": 1.81244757190211e-05, "loss": 0.3579, "step": 5276 }, { "epoch": 0.39937562083155953, "grad_norm": 0.83203125, "learning_rate": 1.8123781220941987e-05, "loss": 0.3558, "step": 5277 }, { "epoch": 0.3994513031550069, "grad_norm": 0.79296875, "learning_rate": 1.81230866076131e-05, "loss": 0.3231, "step": 5278 }, { "epoch": 0.39952698547845417, "grad_norm": 0.89453125, "learning_rate": 1.8122391879044284e-05, "loss": 0.3889, "step": 5279 }, { "epoch": 0.3996026678019015, "grad_norm": 0.79296875, "learning_rate": 1.812169703524541e-05, "loss": 0.3378, "step": 5280 }, { "epoch": 0.39967835012534886, "grad_norm": 0.78125, "learning_rate": 1.8121002076226318e-05, "loss": 0.3206, "step": 5281 }, { "epoch": 0.3997540324487962, "grad_norm": 0.80078125, "learning_rate": 1.8120307001996876e-05, "loss": 0.315, "step": 5282 }, { "epoch": 0.3998297147722435, "grad_norm": 0.82421875, "learning_rate": 1.8119611812566948e-05, "loss": 0.3658, "step": 5283 }, { "epoch": 0.39990539709569084, "grad_norm": 0.75390625, "learning_rate": 1.8118916507946387e-05, "loss": 0.3256, "step": 5284 }, { "epoch": 0.3999810794191382, "grad_norm": 0.78125, "learning_rate": 1.811822108814507e-05, "loss": 0.3228, "step": 5285 }, { "epoch": 0.4000567617425855, "grad_norm": 0.7734375, "learning_rate": 1.8117525553172853e-05, "loss": 0.3476, "step": 5286 }, { "epoch": 0.4001324440660328, "grad_norm": 0.82421875, "learning_rate": 1.8116829903039604e-05, "loss": 0.3263, "step": 5287 }, { "epoch": 0.40020812638948017, "grad_norm": 0.765625, "learning_rate": 1.8116134137755198e-05, "loss": 0.3392, "step": 5288 }, { "epoch": 0.40020812638948017, "eval_loss": 0.34545648097991943, "eval_runtime": 83.3962, "eval_samples_per_second": 58.3, "eval_steps_per_second": 58.3, "step": 5288 }, { "epoch": 0.4002838087129275, "grad_norm": 1.3203125, "learning_rate": 1.8115438257329497e-05, "loss": 0.4362, "step": 5289 }, { "epoch": 0.4003594910363748, "grad_norm": 0.8125, "learning_rate": 1.8114742261772377e-05, "loss": 0.3421, "step": 5290 }, { "epoch": 0.40043517335982215, "grad_norm": 0.6875, "learning_rate": 1.8114046151093716e-05, "loss": 0.2691, "step": 5291 }, { "epoch": 0.4005108556832695, "grad_norm": 0.7578125, "learning_rate": 1.8113349925303385e-05, "loss": 0.3077, "step": 5292 }, { "epoch": 0.4005865380067168, "grad_norm": 0.84765625, "learning_rate": 1.811265358441126e-05, "loss": 0.3154, "step": 5293 }, { "epoch": 0.40066222033016413, "grad_norm": 0.72265625, "learning_rate": 1.8111957128427228e-05, "loss": 0.2972, "step": 5294 }, { "epoch": 0.4007379026536115, "grad_norm": 0.72265625, "learning_rate": 1.811126055736116e-05, "loss": 0.2866, "step": 5295 }, { "epoch": 0.4008135849770588, "grad_norm": 0.75390625, "learning_rate": 1.8110563871222942e-05, "loss": 0.321, "step": 5296 }, { "epoch": 0.4008892673005061, "grad_norm": 0.8046875, "learning_rate": 1.8109867070022456e-05, "loss": 0.3366, "step": 5297 }, { "epoch": 0.40096494962395346, "grad_norm": 0.82421875, "learning_rate": 1.810917015376959e-05, "loss": 0.3448, "step": 5298 }, { "epoch": 0.4010406319474008, "grad_norm": 0.9765625, "learning_rate": 1.8108473122474226e-05, "loss": 0.3328, "step": 5299 }, { "epoch": 0.4011163142708481, "grad_norm": 0.75390625, "learning_rate": 1.810777597614626e-05, "loss": 0.3017, "step": 5300 }, { "epoch": 0.40119199659429544, "grad_norm": 0.74609375, "learning_rate": 1.8107078714795578e-05, "loss": 0.3126, "step": 5301 }, { "epoch": 0.4012676789177428, "grad_norm": 0.765625, "learning_rate": 1.8106381338432073e-05, "loss": 0.3254, "step": 5302 }, { "epoch": 0.4013433612411901, "grad_norm": 0.8046875, "learning_rate": 1.8105683847065635e-05, "loss": 0.3242, "step": 5303 }, { "epoch": 0.4014190435646374, "grad_norm": 0.81640625, "learning_rate": 1.8104986240706162e-05, "loss": 0.3458, "step": 5304 }, { "epoch": 0.40149472588808477, "grad_norm": 0.78515625, "learning_rate": 1.8104288519363553e-05, "loss": 0.3106, "step": 5305 }, { "epoch": 0.4015704082115321, "grad_norm": 0.8046875, "learning_rate": 1.8103590683047704e-05, "loss": 0.3195, "step": 5306 }, { "epoch": 0.4016460905349794, "grad_norm": 0.7421875, "learning_rate": 1.810289273176851e-05, "loss": 0.2983, "step": 5307 }, { "epoch": 0.40172177285842675, "grad_norm": 0.8203125, "learning_rate": 1.8102194665535885e-05, "loss": 0.3447, "step": 5308 }, { "epoch": 0.4017974551818741, "grad_norm": 0.78125, "learning_rate": 1.8101496484359716e-05, "loss": 0.325, "step": 5309 }, { "epoch": 0.4018731375053214, "grad_norm": 0.91796875, "learning_rate": 1.8100798188249924e-05, "loss": 0.3986, "step": 5310 }, { "epoch": 0.40194881982876873, "grad_norm": 0.77734375, "learning_rate": 1.8100099777216406e-05, "loss": 0.312, "step": 5311 }, { "epoch": 0.4020245021522161, "grad_norm": 0.83203125, "learning_rate": 1.8099401251269072e-05, "loss": 0.3644, "step": 5312 }, { "epoch": 0.4021001844756634, "grad_norm": 0.796875, "learning_rate": 1.8098702610417833e-05, "loss": 0.3126, "step": 5313 }, { "epoch": 0.4021758667991107, "grad_norm": 0.796875, "learning_rate": 1.8098003854672602e-05, "loss": 0.3381, "step": 5314 }, { "epoch": 0.40225154912255806, "grad_norm": 0.78125, "learning_rate": 1.8097304984043286e-05, "loss": 0.3102, "step": 5315 }, { "epoch": 0.4023272314460054, "grad_norm": 0.6953125, "learning_rate": 1.8096605998539805e-05, "loss": 0.2756, "step": 5316 }, { "epoch": 0.4024029137694527, "grad_norm": 0.7578125, "learning_rate": 1.809590689817207e-05, "loss": 0.3159, "step": 5317 }, { "epoch": 0.40247859609290004, "grad_norm": 0.75390625, "learning_rate": 1.8095207682950006e-05, "loss": 0.3213, "step": 5318 }, { "epoch": 0.4025542784163474, "grad_norm": 0.796875, "learning_rate": 1.8094508352883527e-05, "loss": 0.3322, "step": 5319 }, { "epoch": 0.40262996073979473, "grad_norm": 0.76953125, "learning_rate": 1.809380890798256e-05, "loss": 0.3229, "step": 5320 }, { "epoch": 0.402705643063242, "grad_norm": 0.75, "learning_rate": 1.8093109348257023e-05, "loss": 0.2803, "step": 5321 }, { "epoch": 0.40278132538668937, "grad_norm": 0.75390625, "learning_rate": 1.809240967371684e-05, "loss": 0.2976, "step": 5322 }, { "epoch": 0.4028570077101367, "grad_norm": 0.85546875, "learning_rate": 1.809170988437194e-05, "loss": 0.3758, "step": 5323 }, { "epoch": 0.402932690033584, "grad_norm": 0.76953125, "learning_rate": 1.8091009980232247e-05, "loss": 0.3266, "step": 5324 }, { "epoch": 0.40300837235703135, "grad_norm": 0.8046875, "learning_rate": 1.8090309961307693e-05, "loss": 0.327, "step": 5325 }, { "epoch": 0.4030840546804787, "grad_norm": 0.78125, "learning_rate": 1.8089609827608213e-05, "loss": 0.3513, "step": 5326 }, { "epoch": 0.40315973700392604, "grad_norm": 0.76171875, "learning_rate": 1.808890957914373e-05, "loss": 0.2972, "step": 5327 }, { "epoch": 0.40323541932737333, "grad_norm": 0.8203125, "learning_rate": 1.8088209215924187e-05, "loss": 0.3394, "step": 5328 }, { "epoch": 0.4033111016508207, "grad_norm": 0.8046875, "learning_rate": 1.808750873795952e-05, "loss": 0.3381, "step": 5329 }, { "epoch": 0.403386783974268, "grad_norm": 0.79296875, "learning_rate": 1.808680814525966e-05, "loss": 0.342, "step": 5330 }, { "epoch": 0.4034624662977153, "grad_norm": 0.84375, "learning_rate": 1.8086107437834544e-05, "loss": 0.3257, "step": 5331 }, { "epoch": 0.40353814862116266, "grad_norm": 0.8671875, "learning_rate": 1.8085406615694122e-05, "loss": 0.3878, "step": 5332 }, { "epoch": 0.40361383094461, "grad_norm": 0.796875, "learning_rate": 1.8084705678848332e-05, "loss": 0.3236, "step": 5333 }, { "epoch": 0.40368951326805735, "grad_norm": 0.875, "learning_rate": 1.8084004627307118e-05, "loss": 0.3448, "step": 5334 }, { "epoch": 0.40376519559150464, "grad_norm": 0.828125, "learning_rate": 1.8083303461080425e-05, "loss": 0.3189, "step": 5335 }, { "epoch": 0.403840877914952, "grad_norm": 1.5078125, "learning_rate": 1.80826021801782e-05, "loss": 0.418, "step": 5336 }, { "epoch": 0.40391656023839934, "grad_norm": 0.74609375, "learning_rate": 1.8081900784610394e-05, "loss": 0.3023, "step": 5337 }, { "epoch": 0.4039922425618466, "grad_norm": 0.8828125, "learning_rate": 1.8081199274386958e-05, "loss": 0.3383, "step": 5338 }, { "epoch": 0.40406792488529397, "grad_norm": 0.82421875, "learning_rate": 1.8080497649517843e-05, "loss": 0.3271, "step": 5339 }, { "epoch": 0.4041436072087413, "grad_norm": 0.80078125, "learning_rate": 1.8079795910013e-05, "loss": 0.3014, "step": 5340 }, { "epoch": 0.40421928953218866, "grad_norm": 0.85546875, "learning_rate": 1.8079094055882387e-05, "loss": 0.3583, "step": 5341 }, { "epoch": 0.40429497185563595, "grad_norm": 0.80859375, "learning_rate": 1.8078392087135957e-05, "loss": 0.3141, "step": 5342 }, { "epoch": 0.4043706541790833, "grad_norm": 1.8359375, "learning_rate": 1.807769000378368e-05, "loss": 0.3711, "step": 5343 }, { "epoch": 0.40444633650253065, "grad_norm": 0.765625, "learning_rate": 1.8076987805835502e-05, "loss": 0.2613, "step": 5344 }, { "epoch": 0.40452201882597794, "grad_norm": 0.796875, "learning_rate": 1.8076285493301394e-05, "loss": 0.3196, "step": 5345 }, { "epoch": 0.4045977011494253, "grad_norm": 0.78515625, "learning_rate": 1.807558306619132e-05, "loss": 0.313, "step": 5346 }, { "epoch": 0.4046733834728726, "grad_norm": 0.83984375, "learning_rate": 1.8074880524515236e-05, "loss": 0.3545, "step": 5347 }, { "epoch": 0.40474906579632, "grad_norm": 0.84375, "learning_rate": 1.8074177868283114e-05, "loss": 0.3482, "step": 5348 }, { "epoch": 0.40482474811976726, "grad_norm": 0.7890625, "learning_rate": 1.807347509750493e-05, "loss": 0.3455, "step": 5349 }, { "epoch": 0.4049004304432146, "grad_norm": 0.765625, "learning_rate": 1.8072772212190645e-05, "loss": 0.3178, "step": 5350 }, { "epoch": 0.40497611276666196, "grad_norm": 0.78515625, "learning_rate": 1.807206921235023e-05, "loss": 0.3212, "step": 5351 }, { "epoch": 0.40505179509010925, "grad_norm": 0.78515625, "learning_rate": 1.8071366097993663e-05, "loss": 0.3045, "step": 5352 }, { "epoch": 0.4051274774135566, "grad_norm": 0.7890625, "learning_rate": 1.8070662869130914e-05, "loss": 0.3288, "step": 5353 }, { "epoch": 0.40520315973700394, "grad_norm": 0.7734375, "learning_rate": 1.806995952577197e-05, "loss": 0.3117, "step": 5354 }, { "epoch": 0.4052788420604513, "grad_norm": 0.81640625, "learning_rate": 1.8069256067926794e-05, "loss": 0.3391, "step": 5355 }, { "epoch": 0.4053545243838986, "grad_norm": 0.87109375, "learning_rate": 1.8068552495605375e-05, "loss": 0.3316, "step": 5356 }, { "epoch": 0.4054302067073459, "grad_norm": 0.8359375, "learning_rate": 1.8067848808817695e-05, "loss": 0.3179, "step": 5357 }, { "epoch": 0.40550588903079327, "grad_norm": 0.84765625, "learning_rate": 1.8067145007573733e-05, "loss": 0.3404, "step": 5358 }, { "epoch": 0.40558157135424056, "grad_norm": 0.87109375, "learning_rate": 1.8066441091883477e-05, "loss": 0.3429, "step": 5359 }, { "epoch": 0.4056572536776879, "grad_norm": 0.85546875, "learning_rate": 1.806573706175691e-05, "loss": 0.3292, "step": 5360 }, { "epoch": 0.40573293600113525, "grad_norm": 0.83984375, "learning_rate": 1.8065032917204025e-05, "loss": 0.3563, "step": 5361 }, { "epoch": 0.40580861832458254, "grad_norm": 0.79296875, "learning_rate": 1.80643286582348e-05, "loss": 0.3068, "step": 5362 }, { "epoch": 0.4058843006480299, "grad_norm": 0.8046875, "learning_rate": 1.8063624284859244e-05, "loss": 0.3402, "step": 5363 }, { "epoch": 0.40595998297147723, "grad_norm": 0.82421875, "learning_rate": 1.8062919797087333e-05, "loss": 0.365, "step": 5364 }, { "epoch": 0.4060356652949246, "grad_norm": 0.78515625, "learning_rate": 1.806221519492907e-05, "loss": 0.3334, "step": 5365 }, { "epoch": 0.40611134761837187, "grad_norm": 0.78125, "learning_rate": 1.8061510478394448e-05, "loss": 0.3126, "step": 5366 }, { "epoch": 0.4061870299418192, "grad_norm": 0.8203125, "learning_rate": 1.8060805647493468e-05, "loss": 0.3367, "step": 5367 }, { "epoch": 0.40626271226526656, "grad_norm": 0.796875, "learning_rate": 1.8060100702236128e-05, "loss": 0.3064, "step": 5368 }, { "epoch": 0.40633839458871385, "grad_norm": 0.87109375, "learning_rate": 1.8059395642632423e-05, "loss": 0.3535, "step": 5369 }, { "epoch": 0.4064140769121612, "grad_norm": 0.80078125, "learning_rate": 1.8058690468692366e-05, "loss": 0.3159, "step": 5370 }, { "epoch": 0.40648975923560854, "grad_norm": 1.0859375, "learning_rate": 1.8057985180425954e-05, "loss": 0.3974, "step": 5371 }, { "epoch": 0.4065654415590559, "grad_norm": 0.78125, "learning_rate": 1.8057279777843193e-05, "loss": 0.3345, "step": 5372 }, { "epoch": 0.4066411238825032, "grad_norm": 0.8515625, "learning_rate": 1.8056574260954095e-05, "loss": 0.3478, "step": 5373 }, { "epoch": 0.4067168062059505, "grad_norm": 0.86328125, "learning_rate": 1.805586862976866e-05, "loss": 0.3462, "step": 5374 }, { "epoch": 0.40679248852939787, "grad_norm": 0.76953125, "learning_rate": 1.8055162884296907e-05, "loss": 0.3, "step": 5375 }, { "epoch": 0.40686817085284516, "grad_norm": 0.82421875, "learning_rate": 1.8054457024548845e-05, "loss": 0.3308, "step": 5376 }, { "epoch": 0.4069438531762925, "grad_norm": 0.80078125, "learning_rate": 1.805375105053449e-05, "loss": 0.3398, "step": 5377 }, { "epoch": 0.40701953549973985, "grad_norm": 0.69140625, "learning_rate": 1.805304496226385e-05, "loss": 0.2717, "step": 5378 }, { "epoch": 0.4070952178231872, "grad_norm": 0.7578125, "learning_rate": 1.8052338759746955e-05, "loss": 0.314, "step": 5379 }, { "epoch": 0.4071709001466345, "grad_norm": 0.7890625, "learning_rate": 1.8051632442993813e-05, "loss": 0.3284, "step": 5380 }, { "epoch": 0.40724658247008183, "grad_norm": 0.796875, "learning_rate": 1.8050926012014445e-05, "loss": 0.2884, "step": 5381 }, { "epoch": 0.4073222647935292, "grad_norm": 0.80859375, "learning_rate": 1.8050219466818882e-05, "loss": 0.3745, "step": 5382 }, { "epoch": 0.40739794711697647, "grad_norm": 0.8515625, "learning_rate": 1.8049512807417135e-05, "loss": 0.3659, "step": 5383 }, { "epoch": 0.4074736294404238, "grad_norm": 0.8671875, "learning_rate": 1.804880603381924e-05, "loss": 0.407, "step": 5384 }, { "epoch": 0.40754931176387116, "grad_norm": 0.921875, "learning_rate": 1.8048099146035217e-05, "loss": 0.3937, "step": 5385 }, { "epoch": 0.4076249940873185, "grad_norm": 0.76171875, "learning_rate": 1.8047392144075094e-05, "loss": 0.2999, "step": 5386 }, { "epoch": 0.4077006764107658, "grad_norm": 0.79296875, "learning_rate": 1.8046685027948906e-05, "loss": 0.3448, "step": 5387 }, { "epoch": 0.40777635873421314, "grad_norm": 0.7734375, "learning_rate": 1.8045977797666685e-05, "loss": 0.3007, "step": 5388 }, { "epoch": 0.4078520410576605, "grad_norm": 0.8125, "learning_rate": 1.804527045323846e-05, "loss": 0.3686, "step": 5389 }, { "epoch": 0.4079277233811078, "grad_norm": 0.84765625, "learning_rate": 1.8044562994674266e-05, "loss": 0.3498, "step": 5390 }, { "epoch": 0.4080034057045551, "grad_norm": 0.78515625, "learning_rate": 1.8043855421984142e-05, "loss": 0.3, "step": 5391 }, { "epoch": 0.40807908802800247, "grad_norm": 0.71484375, "learning_rate": 1.8043147735178125e-05, "loss": 0.2931, "step": 5392 }, { "epoch": 0.4081547703514498, "grad_norm": 0.8515625, "learning_rate": 1.8042439934266253e-05, "loss": 0.3526, "step": 5393 }, { "epoch": 0.4082304526748971, "grad_norm": 0.83984375, "learning_rate": 1.8041732019258573e-05, "loss": 0.3445, "step": 5394 }, { "epoch": 0.40830613499834445, "grad_norm": 0.78515625, "learning_rate": 1.8041023990165122e-05, "loss": 0.3116, "step": 5395 }, { "epoch": 0.4083818173217918, "grad_norm": 0.7421875, "learning_rate": 1.8040315846995947e-05, "loss": 0.2911, "step": 5396 }, { "epoch": 0.4084574996452391, "grad_norm": 0.7421875, "learning_rate": 1.803960758976109e-05, "loss": 0.2896, "step": 5397 }, { "epoch": 0.40853318196868643, "grad_norm": 0.77734375, "learning_rate": 1.803889921847061e-05, "loss": 0.3198, "step": 5398 }, { "epoch": 0.4086088642921338, "grad_norm": 0.81640625, "learning_rate": 1.803819073313455e-05, "loss": 0.3453, "step": 5399 }, { "epoch": 0.4086845466155811, "grad_norm": 0.80078125, "learning_rate": 1.8037482133762954e-05, "loss": 0.3503, "step": 5400 }, { "epoch": 0.4087602289390284, "grad_norm": 0.8046875, "learning_rate": 1.8036773420365886e-05, "loss": 0.3345, "step": 5401 }, { "epoch": 0.40883591126247576, "grad_norm": 0.76171875, "learning_rate": 1.803606459295339e-05, "loss": 0.2659, "step": 5402 }, { "epoch": 0.4089115935859231, "grad_norm": 0.7734375, "learning_rate": 1.803535565153553e-05, "loss": 0.3236, "step": 5403 }, { "epoch": 0.4089872759093704, "grad_norm": 1.765625, "learning_rate": 1.8034646596122365e-05, "loss": 0.34, "step": 5404 }, { "epoch": 0.40906295823281774, "grad_norm": 0.83203125, "learning_rate": 1.8033937426723945e-05, "loss": 0.3493, "step": 5405 }, { "epoch": 0.4091386405562651, "grad_norm": 0.7421875, "learning_rate": 1.803322814335034e-05, "loss": 0.3176, "step": 5406 }, { "epoch": 0.40921432287971243, "grad_norm": 0.75390625, "learning_rate": 1.8032518746011603e-05, "loss": 0.304, "step": 5407 }, { "epoch": 0.4092900052031597, "grad_norm": 0.77734375, "learning_rate": 1.8031809234717807e-05, "loss": 0.3098, "step": 5408 }, { "epoch": 0.40936568752660707, "grad_norm": 0.8046875, "learning_rate": 1.803109960947901e-05, "loss": 0.3783, "step": 5409 }, { "epoch": 0.4094413698500544, "grad_norm": 0.734375, "learning_rate": 1.803038987030529e-05, "loss": 0.2981, "step": 5410 }, { "epoch": 0.4095170521735017, "grad_norm": 1.0390625, "learning_rate": 1.8029680017206705e-05, "loss": 0.3451, "step": 5411 }, { "epoch": 0.40959273449694905, "grad_norm": 0.77734375, "learning_rate": 1.8028970050193327e-05, "loss": 0.3185, "step": 5412 }, { "epoch": 0.4096684168203964, "grad_norm": 0.81640625, "learning_rate": 1.8028259969275237e-05, "loss": 0.3413, "step": 5413 }, { "epoch": 0.40974409914384374, "grad_norm": 0.8203125, "learning_rate": 1.80275497744625e-05, "loss": 0.3418, "step": 5414 }, { "epoch": 0.40981978146729103, "grad_norm": 0.89453125, "learning_rate": 1.802683946576519e-05, "loss": 0.2789, "step": 5415 }, { "epoch": 0.4098954637907384, "grad_norm": 0.78515625, "learning_rate": 1.802612904319339e-05, "loss": 0.2973, "step": 5416 }, { "epoch": 0.4099711461141857, "grad_norm": 0.78125, "learning_rate": 1.802541850675718e-05, "loss": 0.3281, "step": 5417 }, { "epoch": 0.410046828437633, "grad_norm": 0.78125, "learning_rate": 1.802470785646663e-05, "loss": 0.3316, "step": 5418 }, { "epoch": 0.41012251076108036, "grad_norm": 0.86328125, "learning_rate": 1.8023997092331833e-05, "loss": 0.3595, "step": 5419 }, { "epoch": 0.4101981930845277, "grad_norm": 0.76171875, "learning_rate": 1.802328621436287e-05, "loss": 0.3112, "step": 5420 }, { "epoch": 0.410273875407975, "grad_norm": 0.76171875, "learning_rate": 1.8022575222569814e-05, "loss": 0.2942, "step": 5421 }, { "epoch": 0.41034955773142234, "grad_norm": 0.7265625, "learning_rate": 1.8021864116962767e-05, "loss": 0.2726, "step": 5422 }, { "epoch": 0.4104252400548697, "grad_norm": 0.7421875, "learning_rate": 1.8021152897551813e-05, "loss": 0.2647, "step": 5423 }, { "epoch": 0.41050092237831703, "grad_norm": 0.7734375, "learning_rate": 1.8020441564347038e-05, "loss": 0.3196, "step": 5424 }, { "epoch": 0.4105766047017643, "grad_norm": 0.890625, "learning_rate": 1.8019730117358538e-05, "loss": 0.3856, "step": 5425 }, { "epoch": 0.41065228702521167, "grad_norm": 0.77734375, "learning_rate": 1.8019018556596402e-05, "loss": 0.3341, "step": 5426 }, { "epoch": 0.410727969348659, "grad_norm": 0.796875, "learning_rate": 1.8018306882070726e-05, "loss": 0.338, "step": 5427 }, { "epoch": 0.4108036516721063, "grad_norm": 0.79296875, "learning_rate": 1.8017595093791607e-05, "loss": 0.3304, "step": 5428 }, { "epoch": 0.41087933399555365, "grad_norm": 0.80859375, "learning_rate": 1.8016883191769144e-05, "loss": 0.3292, "step": 5429 }, { "epoch": 0.410955016319001, "grad_norm": 0.8046875, "learning_rate": 1.8016171176013436e-05, "loss": 0.346, "step": 5430 }, { "epoch": 0.41103069864244834, "grad_norm": 0.8203125, "learning_rate": 1.8015459046534582e-05, "loss": 0.3334, "step": 5431 }, { "epoch": 0.41110638096589563, "grad_norm": 0.796875, "learning_rate": 1.8014746803342688e-05, "loss": 0.3435, "step": 5432 }, { "epoch": 0.411182063289343, "grad_norm": 0.73046875, "learning_rate": 1.8014034446447858e-05, "loss": 0.254, "step": 5433 }, { "epoch": 0.4112577456127903, "grad_norm": 0.99609375, "learning_rate": 1.8013321975860194e-05, "loss": 0.3834, "step": 5434 }, { "epoch": 0.4113334279362376, "grad_norm": 0.78125, "learning_rate": 1.8012609391589805e-05, "loss": 0.3217, "step": 5435 }, { "epoch": 0.41140911025968496, "grad_norm": 1.3828125, "learning_rate": 1.8011896693646805e-05, "loss": 0.4139, "step": 5436 }, { "epoch": 0.4114847925831323, "grad_norm": 0.7890625, "learning_rate": 1.80111838820413e-05, "loss": 0.3251, "step": 5437 }, { "epoch": 0.41156047490657965, "grad_norm": 0.79296875, "learning_rate": 1.8010470956783407e-05, "loss": 0.3143, "step": 5438 }, { "epoch": 0.41163615723002694, "grad_norm": 0.734375, "learning_rate": 1.8009757917883236e-05, "loss": 0.2666, "step": 5439 }, { "epoch": 0.4117118395534743, "grad_norm": 0.88671875, "learning_rate": 1.8009044765350904e-05, "loss": 0.3552, "step": 5440 }, { "epoch": 0.41178752187692164, "grad_norm": 0.80078125, "learning_rate": 1.8008331499196523e-05, "loss": 0.3371, "step": 5441 }, { "epoch": 0.4118632042003689, "grad_norm": 0.77734375, "learning_rate": 1.8007618119430222e-05, "loss": 0.3365, "step": 5442 }, { "epoch": 0.4119388865238163, "grad_norm": 0.7890625, "learning_rate": 1.8006904626062115e-05, "loss": 0.3274, "step": 5443 }, { "epoch": 0.4120145688472636, "grad_norm": 0.90625, "learning_rate": 1.8006191019102327e-05, "loss": 0.408, "step": 5444 }, { "epoch": 0.41209025117071096, "grad_norm": 0.78515625, "learning_rate": 1.8005477298560977e-05, "loss": 0.3031, "step": 5445 }, { "epoch": 0.41216593349415825, "grad_norm": 1.1953125, "learning_rate": 1.8004763464448195e-05, "loss": 0.2946, "step": 5446 }, { "epoch": 0.4122416158176056, "grad_norm": 0.8828125, "learning_rate": 1.8004049516774106e-05, "loss": 0.3707, "step": 5447 }, { "epoch": 0.41231729814105295, "grad_norm": 0.79296875, "learning_rate": 1.800333545554884e-05, "loss": 0.3518, "step": 5448 }, { "epoch": 0.41239298046450024, "grad_norm": 0.83203125, "learning_rate": 1.8002621280782525e-05, "loss": 0.3511, "step": 5449 }, { "epoch": 0.4124686627879476, "grad_norm": 0.71484375, "learning_rate": 1.8001906992485297e-05, "loss": 0.2846, "step": 5450 }, { "epoch": 0.41254434511139493, "grad_norm": 0.8203125, "learning_rate": 1.8001192590667287e-05, "loss": 0.3456, "step": 5451 }, { "epoch": 0.4126200274348423, "grad_norm": 0.7890625, "learning_rate": 1.800047807533863e-05, "loss": 0.3457, "step": 5452 }, { "epoch": 0.41269570975828956, "grad_norm": 0.84765625, "learning_rate": 1.7999763446509456e-05, "loss": 0.3639, "step": 5453 }, { "epoch": 0.4127713920817369, "grad_norm": 0.79296875, "learning_rate": 1.7999048704189914e-05, "loss": 0.3469, "step": 5454 }, { "epoch": 0.41284707440518426, "grad_norm": 1.171875, "learning_rate": 1.799833384839014e-05, "loss": 0.3742, "step": 5455 }, { "epoch": 0.41292275672863155, "grad_norm": 0.79296875, "learning_rate": 1.7997618879120274e-05, "loss": 0.3368, "step": 5456 }, { "epoch": 0.4129984390520789, "grad_norm": 0.75, "learning_rate": 1.7996903796390455e-05, "loss": 0.3103, "step": 5457 }, { "epoch": 0.41307412137552624, "grad_norm": 0.69140625, "learning_rate": 1.799618860021084e-05, "loss": 0.2652, "step": 5458 }, { "epoch": 0.4131498036989736, "grad_norm": 0.75390625, "learning_rate": 1.7995473290591566e-05, "loss": 0.3042, "step": 5459 }, { "epoch": 0.4132254860224209, "grad_norm": 0.765625, "learning_rate": 1.799475786754278e-05, "loss": 0.3165, "step": 5460 }, { "epoch": 0.4133011683458682, "grad_norm": 0.83984375, "learning_rate": 1.7994042331074634e-05, "loss": 0.319, "step": 5461 }, { "epoch": 0.41337685066931557, "grad_norm": 0.8359375, "learning_rate": 1.7993326681197283e-05, "loss": 0.3547, "step": 5462 }, { "epoch": 0.41345253299276286, "grad_norm": 0.8046875, "learning_rate": 1.7992610917920876e-05, "loss": 0.3363, "step": 5463 }, { "epoch": 0.4135282153162102, "grad_norm": 0.86328125, "learning_rate": 1.7991895041255562e-05, "loss": 0.3573, "step": 5464 }, { "epoch": 0.41360389763965755, "grad_norm": 0.796875, "learning_rate": 1.7991179051211504e-05, "loss": 0.3611, "step": 5465 }, { "epoch": 0.4136795799631049, "grad_norm": 0.7734375, "learning_rate": 1.7990462947798863e-05, "loss": 0.3361, "step": 5466 }, { "epoch": 0.4137552622865522, "grad_norm": 0.8203125, "learning_rate": 1.7989746731027788e-05, "loss": 0.3714, "step": 5467 }, { "epoch": 0.41383094460999953, "grad_norm": 0.80859375, "learning_rate": 1.798903040090844e-05, "loss": 0.3227, "step": 5468 }, { "epoch": 0.4139066269334469, "grad_norm": 0.9296875, "learning_rate": 1.7988313957450998e-05, "loss": 0.3081, "step": 5469 }, { "epoch": 0.41398230925689417, "grad_norm": 0.80078125, "learning_rate": 1.7987597400665607e-05, "loss": 0.3264, "step": 5470 }, { "epoch": 0.4140579915803415, "grad_norm": 0.82421875, "learning_rate": 1.798688073056244e-05, "loss": 0.3207, "step": 5471 }, { "epoch": 0.41413367390378886, "grad_norm": 0.828125, "learning_rate": 1.7986163947151665e-05, "loss": 0.3235, "step": 5472 }, { "epoch": 0.4142093562272362, "grad_norm": 0.71875, "learning_rate": 1.798544705044345e-05, "loss": 0.295, "step": 5473 }, { "epoch": 0.4142850385506835, "grad_norm": 0.7421875, "learning_rate": 1.7984730040447964e-05, "loss": 0.2929, "step": 5474 }, { "epoch": 0.41436072087413084, "grad_norm": 0.703125, "learning_rate": 1.798401291717538e-05, "loss": 0.2703, "step": 5475 }, { "epoch": 0.4144364031975782, "grad_norm": 0.796875, "learning_rate": 1.7983295680635873e-05, "loss": 0.3351, "step": 5476 }, { "epoch": 0.4145120855210255, "grad_norm": 0.859375, "learning_rate": 1.7982578330839617e-05, "loss": 0.386, "step": 5477 }, { "epoch": 0.4145877678444728, "grad_norm": 0.75, "learning_rate": 1.7981860867796786e-05, "loss": 0.3209, "step": 5478 }, { "epoch": 0.41466345016792017, "grad_norm": 1.6953125, "learning_rate": 1.7981143291517564e-05, "loss": 0.3573, "step": 5479 }, { "epoch": 0.4147391324913675, "grad_norm": 0.84765625, "learning_rate": 1.798042560201213e-05, "loss": 0.3475, "step": 5480 }, { "epoch": 0.4148148148148148, "grad_norm": 0.796875, "learning_rate": 1.797970779929066e-05, "loss": 0.3282, "step": 5481 }, { "epoch": 0.41489049713826215, "grad_norm": 0.8359375, "learning_rate": 1.7978989883363344e-05, "loss": 0.3507, "step": 5482 }, { "epoch": 0.4149661794617095, "grad_norm": 0.75, "learning_rate": 1.7978271854240362e-05, "loss": 0.3175, "step": 5483 }, { "epoch": 0.4150418617851568, "grad_norm": 1.125, "learning_rate": 1.79775537119319e-05, "loss": 0.2984, "step": 5484 }, { "epoch": 0.41511754410860413, "grad_norm": 0.765625, "learning_rate": 1.7976835456448155e-05, "loss": 0.3084, "step": 5485 }, { "epoch": 0.4151932264320515, "grad_norm": 0.90234375, "learning_rate": 1.7976117087799307e-05, "loss": 0.3955, "step": 5486 }, { "epoch": 0.41526890875549877, "grad_norm": 0.82421875, "learning_rate": 1.7975398605995548e-05, "loss": 0.3581, "step": 5487 }, { "epoch": 0.4153445910789461, "grad_norm": 0.765625, "learning_rate": 1.797468001104708e-05, "loss": 0.3257, "step": 5488 }, { "epoch": 0.41542027340239346, "grad_norm": 0.8671875, "learning_rate": 1.7973961302964086e-05, "loss": 0.3912, "step": 5489 }, { "epoch": 0.4154959557258408, "grad_norm": 0.7890625, "learning_rate": 1.797324248175677e-05, "loss": 0.326, "step": 5490 }, { "epoch": 0.4155716380492881, "grad_norm": 0.80078125, "learning_rate": 1.7972523547435324e-05, "loss": 0.3145, "step": 5491 }, { "epoch": 0.41564732037273544, "grad_norm": 0.76953125, "learning_rate": 1.7971804500009953e-05, "loss": 0.3106, "step": 5492 }, { "epoch": 0.4157230026961828, "grad_norm": 0.75390625, "learning_rate": 1.7971085339490853e-05, "loss": 0.3033, "step": 5493 }, { "epoch": 0.4157986850196301, "grad_norm": 0.76953125, "learning_rate": 1.797036606588823e-05, "loss": 0.3081, "step": 5494 }, { "epoch": 0.4158743673430774, "grad_norm": 0.80859375, "learning_rate": 1.7969646679212285e-05, "loss": 0.3314, "step": 5495 }, { "epoch": 0.41595004966652477, "grad_norm": 0.796875, "learning_rate": 1.7968927179473228e-05, "loss": 0.3291, "step": 5496 }, { "epoch": 0.4160257319899721, "grad_norm": 0.8203125, "learning_rate": 1.7968207566681265e-05, "loss": 0.3448, "step": 5497 }, { "epoch": 0.4161014143134194, "grad_norm": 0.8046875, "learning_rate": 1.79674878408466e-05, "loss": 0.3174, "step": 5498 }, { "epoch": 0.41617709663686675, "grad_norm": 0.74609375, "learning_rate": 1.7966768001979446e-05, "loss": 0.3217, "step": 5499 }, { "epoch": 0.4162527789603141, "grad_norm": 0.7421875, "learning_rate": 1.796604805009002e-05, "loss": 0.3059, "step": 5500 }, { "epoch": 0.4163284612837614, "grad_norm": 0.80859375, "learning_rate": 1.796532798518853e-05, "loss": 0.3464, "step": 5501 }, { "epoch": 0.41640414360720873, "grad_norm": 0.7578125, "learning_rate": 1.7964607807285194e-05, "loss": 0.3218, "step": 5502 }, { "epoch": 0.4164798259306561, "grad_norm": 0.7578125, "learning_rate": 1.796388751639023e-05, "loss": 0.3308, "step": 5503 }, { "epoch": 0.4165555082541034, "grad_norm": 0.8046875, "learning_rate": 1.7963167112513854e-05, "loss": 0.3477, "step": 5504 }, { "epoch": 0.4166311905775507, "grad_norm": 0.80078125, "learning_rate": 1.7962446595666286e-05, "loss": 0.3176, "step": 5505 }, { "epoch": 0.41670687290099806, "grad_norm": 0.765625, "learning_rate": 1.796172596585775e-05, "loss": 0.3083, "step": 5506 }, { "epoch": 0.4167825552244454, "grad_norm": 0.8671875, "learning_rate": 1.796100522309847e-05, "loss": 0.3561, "step": 5507 }, { "epoch": 0.4168582375478927, "grad_norm": 0.79296875, "learning_rate": 1.7960284367398668e-05, "loss": 0.3239, "step": 5508 }, { "epoch": 0.41693391987134004, "grad_norm": 7.3125, "learning_rate": 1.7959563398768575e-05, "loss": 0.663, "step": 5509 }, { "epoch": 0.4170096021947874, "grad_norm": 0.828125, "learning_rate": 1.7958842317218413e-05, "loss": 0.3366, "step": 5510 }, { "epoch": 0.41708528451823473, "grad_norm": 0.7265625, "learning_rate": 1.7958121122758412e-05, "loss": 0.2906, "step": 5511 }, { "epoch": 0.417160966841682, "grad_norm": 0.796875, "learning_rate": 1.7957399815398814e-05, "loss": 0.3525, "step": 5512 }, { "epoch": 0.41723664916512937, "grad_norm": 0.765625, "learning_rate": 1.795667839514984e-05, "loss": 0.3086, "step": 5513 }, { "epoch": 0.4173123314885767, "grad_norm": 0.76171875, "learning_rate": 1.7955956862021726e-05, "loss": 0.2909, "step": 5514 }, { "epoch": 0.417388013812024, "grad_norm": 0.8125, "learning_rate": 1.7955235216024714e-05, "loss": 0.3136, "step": 5515 }, { "epoch": 0.41746369613547135, "grad_norm": 0.82421875, "learning_rate": 1.795451345716904e-05, "loss": 0.3325, "step": 5516 }, { "epoch": 0.4175393784589187, "grad_norm": 0.79296875, "learning_rate": 1.795379158546494e-05, "loss": 0.3671, "step": 5517 }, { "epoch": 0.41761506078236604, "grad_norm": 0.76953125, "learning_rate": 1.7953069600922653e-05, "loss": 0.3084, "step": 5518 }, { "epoch": 0.41769074310581333, "grad_norm": 1.640625, "learning_rate": 1.7952347503552433e-05, "loss": 0.4064, "step": 5519 }, { "epoch": 0.4177664254292607, "grad_norm": 0.85546875, "learning_rate": 1.7951625293364516e-05, "loss": 0.3599, "step": 5520 }, { "epoch": 0.417842107752708, "grad_norm": 0.79296875, "learning_rate": 1.7950902970369147e-05, "loss": 0.3138, "step": 5521 }, { "epoch": 0.4179177900761553, "grad_norm": 0.78515625, "learning_rate": 1.7950180534576572e-05, "loss": 0.308, "step": 5522 }, { "epoch": 0.41799347239960266, "grad_norm": 0.8125, "learning_rate": 1.794945798599705e-05, "loss": 0.3662, "step": 5523 }, { "epoch": 0.41806915472305, "grad_norm": 0.75390625, "learning_rate": 1.794873532464082e-05, "loss": 0.3163, "step": 5524 }, { "epoch": 0.41814483704649735, "grad_norm": 0.76171875, "learning_rate": 1.7948012550518142e-05, "loss": 0.3113, "step": 5525 }, { "epoch": 0.41822051936994464, "grad_norm": 0.77734375, "learning_rate": 1.7947289663639265e-05, "loss": 0.3192, "step": 5526 }, { "epoch": 0.418296201693392, "grad_norm": 0.8359375, "learning_rate": 1.7946566664014446e-05, "loss": 0.3105, "step": 5527 }, { "epoch": 0.41837188401683933, "grad_norm": 0.8203125, "learning_rate": 1.7945843551653942e-05, "loss": 0.3742, "step": 5528 }, { "epoch": 0.4184475663402866, "grad_norm": 0.859375, "learning_rate": 1.794512032656801e-05, "loss": 0.3542, "step": 5529 }, { "epoch": 0.41852324866373397, "grad_norm": 0.79296875, "learning_rate": 1.7944396988766916e-05, "loss": 0.3375, "step": 5530 }, { "epoch": 0.4185989309871813, "grad_norm": 0.7890625, "learning_rate": 1.7943673538260912e-05, "loss": 0.3587, "step": 5531 }, { "epoch": 0.41867461331062866, "grad_norm": 0.7890625, "learning_rate": 1.794294997506027e-05, "loss": 0.3161, "step": 5532 }, { "epoch": 0.41875029563407595, "grad_norm": 0.90625, "learning_rate": 1.7942226299175253e-05, "loss": 0.3505, "step": 5533 }, { "epoch": 0.4188259779575233, "grad_norm": 0.7578125, "learning_rate": 1.7941502510616126e-05, "loss": 0.313, "step": 5534 }, { "epoch": 0.41890166028097064, "grad_norm": 0.78515625, "learning_rate": 1.794077860939316e-05, "loss": 0.3237, "step": 5535 }, { "epoch": 0.41897734260441793, "grad_norm": 0.72265625, "learning_rate": 1.794005459551662e-05, "loss": 0.2929, "step": 5536 }, { "epoch": 0.4190530249278653, "grad_norm": 0.8359375, "learning_rate": 1.793933046899678e-05, "loss": 0.3806, "step": 5537 }, { "epoch": 0.4191287072513126, "grad_norm": 0.81640625, "learning_rate": 1.7938606229843913e-05, "loss": 0.3788, "step": 5538 }, { "epoch": 0.41920438957476, "grad_norm": 0.76953125, "learning_rate": 1.7937881878068297e-05, "loss": 0.2863, "step": 5539 }, { "epoch": 0.41928007189820726, "grad_norm": 0.80078125, "learning_rate": 1.7937157413680196e-05, "loss": 0.3701, "step": 5540 }, { "epoch": 0.4193557542216546, "grad_norm": 0.78515625, "learning_rate": 1.7936432836689907e-05, "loss": 0.3183, "step": 5541 }, { "epoch": 0.41943143654510195, "grad_norm": 0.72265625, "learning_rate": 1.7935708147107692e-05, "loss": 0.2968, "step": 5542 }, { "epoch": 0.41950711886854924, "grad_norm": 0.81640625, "learning_rate": 1.7934983344943838e-05, "loss": 0.3203, "step": 5543 }, { "epoch": 0.4195828011919966, "grad_norm": 0.70703125, "learning_rate": 1.793425843020863e-05, "loss": 0.2537, "step": 5544 }, { "epoch": 0.41965848351544394, "grad_norm": 0.83203125, "learning_rate": 1.7933533402912354e-05, "loss": 0.3326, "step": 5545 }, { "epoch": 0.4197341658388912, "grad_norm": 0.80078125, "learning_rate": 1.793280826306529e-05, "loss": 0.3398, "step": 5546 }, { "epoch": 0.4198098481623386, "grad_norm": 0.73828125, "learning_rate": 1.7932083010677725e-05, "loss": 0.3072, "step": 5547 }, { "epoch": 0.4198855304857859, "grad_norm": 0.8359375, "learning_rate": 1.7931357645759953e-05, "loss": 0.3618, "step": 5548 }, { "epoch": 0.41996121280923326, "grad_norm": 0.76953125, "learning_rate": 1.793063216832226e-05, "loss": 0.2928, "step": 5549 }, { "epoch": 0.42003689513268055, "grad_norm": 0.796875, "learning_rate": 1.7929906578374946e-05, "loss": 0.3148, "step": 5550 }, { "epoch": 0.4201125774561279, "grad_norm": 0.75, "learning_rate": 1.7929180875928293e-05, "loss": 0.312, "step": 5551 }, { "epoch": 0.42018825977957525, "grad_norm": 0.75, "learning_rate": 1.7928455060992606e-05, "loss": 0.3247, "step": 5552 }, { "epoch": 0.42026394210302254, "grad_norm": 0.76171875, "learning_rate": 1.7927729133578177e-05, "loss": 0.3127, "step": 5553 }, { "epoch": 0.4203396244264699, "grad_norm": 0.75390625, "learning_rate": 1.7927003093695306e-05, "loss": 0.3147, "step": 5554 }, { "epoch": 0.42041530674991723, "grad_norm": 0.76171875, "learning_rate": 1.7926276941354293e-05, "loss": 0.277, "step": 5555 }, { "epoch": 0.4204909890733646, "grad_norm": 0.78125, "learning_rate": 1.792555067656544e-05, "loss": 0.3249, "step": 5556 }, { "epoch": 0.42056667139681186, "grad_norm": 0.81640625, "learning_rate": 1.792482429933905e-05, "loss": 0.3412, "step": 5557 }, { "epoch": 0.4206423537202592, "grad_norm": 0.84765625, "learning_rate": 1.7924097809685427e-05, "loss": 0.3648, "step": 5558 }, { "epoch": 0.42071803604370656, "grad_norm": 0.82421875, "learning_rate": 1.7923371207614876e-05, "loss": 0.3369, "step": 5559 }, { "epoch": 0.42079371836715385, "grad_norm": 0.765625, "learning_rate": 1.792264449313771e-05, "loss": 0.3353, "step": 5560 }, { "epoch": 0.4208694006906012, "grad_norm": 0.76171875, "learning_rate": 1.7921917666264236e-05, "loss": 0.2906, "step": 5561 }, { "epoch": 0.42094508301404854, "grad_norm": 0.77734375, "learning_rate": 1.7921190727004763e-05, "loss": 0.3288, "step": 5562 }, { "epoch": 0.4210207653374959, "grad_norm": 0.765625, "learning_rate": 1.792046367536961e-05, "loss": 0.2989, "step": 5563 }, { "epoch": 0.4210964476609432, "grad_norm": 0.859375, "learning_rate": 1.7919736511369087e-05, "loss": 0.3683, "step": 5564 }, { "epoch": 0.4211721299843905, "grad_norm": 0.9921875, "learning_rate": 1.791900923501351e-05, "loss": 0.3412, "step": 5565 }, { "epoch": 0.42124781230783787, "grad_norm": 0.703125, "learning_rate": 1.79182818463132e-05, "loss": 0.2783, "step": 5566 }, { "epoch": 0.42132349463128516, "grad_norm": 0.73828125, "learning_rate": 1.791755434527847e-05, "loss": 0.3241, "step": 5567 }, { "epoch": 0.4213991769547325, "grad_norm": 0.81640625, "learning_rate": 1.7916826731919644e-05, "loss": 0.3261, "step": 5568 }, { "epoch": 0.42147485927817985, "grad_norm": 0.7578125, "learning_rate": 1.791609900624705e-05, "loss": 0.2798, "step": 5569 }, { "epoch": 0.4215505416016272, "grad_norm": 0.859375, "learning_rate": 1.7915371168271e-05, "loss": 0.3747, "step": 5570 }, { "epoch": 0.4216262239250745, "grad_norm": 0.76953125, "learning_rate": 1.7914643218001836e-05, "loss": 0.3161, "step": 5571 }, { "epoch": 0.42170190624852183, "grad_norm": 0.69140625, "learning_rate": 1.7913915155449867e-05, "loss": 0.285, "step": 5572 }, { "epoch": 0.4217775885719692, "grad_norm": 0.91796875, "learning_rate": 1.7913186980625437e-05, "loss": 0.3263, "step": 5573 }, { "epoch": 0.42185327089541647, "grad_norm": 0.828125, "learning_rate": 1.7912458693538868e-05, "loss": 0.3663, "step": 5574 }, { "epoch": 0.4219289532188638, "grad_norm": 0.80078125, "learning_rate": 1.7911730294200494e-05, "loss": 0.3783, "step": 5575 }, { "epoch": 0.42200463554231116, "grad_norm": 0.74609375, "learning_rate": 1.791100178262065e-05, "loss": 0.3099, "step": 5576 }, { "epoch": 0.4220803178657585, "grad_norm": 0.76953125, "learning_rate": 1.791027315880967e-05, "loss": 0.3337, "step": 5577 }, { "epoch": 0.4221560001892058, "grad_norm": 0.73828125, "learning_rate": 1.7909544422777887e-05, "loss": 0.3047, "step": 5578 }, { "epoch": 0.42223168251265314, "grad_norm": 0.8125, "learning_rate": 1.7908815574535646e-05, "loss": 0.3229, "step": 5579 }, { "epoch": 0.4223073648361005, "grad_norm": 0.7109375, "learning_rate": 1.7908086614093287e-05, "loss": 0.2626, "step": 5580 }, { "epoch": 0.4223830471595478, "grad_norm": 0.73828125, "learning_rate": 1.7907357541461142e-05, "loss": 0.2869, "step": 5581 }, { "epoch": 0.4224587294829951, "grad_norm": 0.89453125, "learning_rate": 1.7906628356649564e-05, "loss": 0.3828, "step": 5582 }, { "epoch": 0.42253441180644247, "grad_norm": 0.8125, "learning_rate": 1.7905899059668895e-05, "loss": 0.3399, "step": 5583 }, { "epoch": 0.4226100941298898, "grad_norm": 1.015625, "learning_rate": 1.790516965052948e-05, "loss": 0.3373, "step": 5584 }, { "epoch": 0.4226857764533371, "grad_norm": 0.8671875, "learning_rate": 1.790444012924167e-05, "loss": 0.3728, "step": 5585 }, { "epoch": 0.42276145877678445, "grad_norm": 0.89453125, "learning_rate": 1.790371049581581e-05, "loss": 0.3613, "step": 5586 }, { "epoch": 0.4228371411002318, "grad_norm": 0.7421875, "learning_rate": 1.7902980750262252e-05, "loss": 0.3105, "step": 5587 }, { "epoch": 0.4229128234236791, "grad_norm": 1.203125, "learning_rate": 1.7902250892591356e-05, "loss": 0.3366, "step": 5588 }, { "epoch": 0.42298850574712643, "grad_norm": 0.84765625, "learning_rate": 1.7901520922813467e-05, "loss": 0.3592, "step": 5589 }, { "epoch": 0.4230641880705738, "grad_norm": 0.9140625, "learning_rate": 1.790079084093894e-05, "loss": 0.3292, "step": 5590 }, { "epoch": 0.4231398703940211, "grad_norm": 1.1171875, "learning_rate": 1.7900060646978143e-05, "loss": 0.385, "step": 5591 }, { "epoch": 0.4232155527174684, "grad_norm": 0.91796875, "learning_rate": 1.7899330340941426e-05, "loss": 0.4322, "step": 5592 }, { "epoch": 0.42329123504091576, "grad_norm": 0.796875, "learning_rate": 1.7898599922839153e-05, "loss": 0.3274, "step": 5593 }, { "epoch": 0.4233669173643631, "grad_norm": 0.75, "learning_rate": 1.7897869392681685e-05, "loss": 0.2951, "step": 5594 }, { "epoch": 0.4234425996878104, "grad_norm": 0.77734375, "learning_rate": 1.7897138750479386e-05, "loss": 0.306, "step": 5595 }, { "epoch": 0.42351828201125774, "grad_norm": 0.75, "learning_rate": 1.7896407996242625e-05, "loss": 0.3175, "step": 5596 }, { "epoch": 0.4235939643347051, "grad_norm": 0.765625, "learning_rate": 1.7895677129981762e-05, "loss": 0.3287, "step": 5597 }, { "epoch": 0.42366964665815243, "grad_norm": 0.796875, "learning_rate": 1.789494615170717e-05, "loss": 0.3198, "step": 5598 }, { "epoch": 0.4237453289815997, "grad_norm": 0.97265625, "learning_rate": 1.7894215061429218e-05, "loss": 0.37, "step": 5599 }, { "epoch": 0.42382101130504707, "grad_norm": 0.73046875, "learning_rate": 1.7893483859158282e-05, "loss": 0.3084, "step": 5600 }, { "epoch": 0.4238966936284944, "grad_norm": 0.7890625, "learning_rate": 1.789275254490473e-05, "loss": 0.3545, "step": 5601 }, { "epoch": 0.4239723759519417, "grad_norm": 0.765625, "learning_rate": 1.7892021118678936e-05, "loss": 0.3148, "step": 5602 }, { "epoch": 0.42404805827538905, "grad_norm": 0.765625, "learning_rate": 1.7891289580491285e-05, "loss": 0.3233, "step": 5603 }, { "epoch": 0.4241237405988364, "grad_norm": 0.8359375, "learning_rate": 1.7890557930352143e-05, "loss": 0.3585, "step": 5604 }, { "epoch": 0.4241994229222837, "grad_norm": 0.84375, "learning_rate": 1.78898261682719e-05, "loss": 0.3686, "step": 5605 }, { "epoch": 0.42427510524573103, "grad_norm": 0.765625, "learning_rate": 1.7889094294260935e-05, "loss": 0.3138, "step": 5606 }, { "epoch": 0.4243507875691784, "grad_norm": 0.71875, "learning_rate": 1.7888362308329624e-05, "loss": 0.2969, "step": 5607 }, { "epoch": 0.4244264698926257, "grad_norm": 0.79296875, "learning_rate": 1.788763021048836e-05, "loss": 0.3429, "step": 5608 }, { "epoch": 0.424502152216073, "grad_norm": 0.80859375, "learning_rate": 1.7886898000747525e-05, "loss": 0.3171, "step": 5609 }, { "epoch": 0.42457783453952036, "grad_norm": 0.75390625, "learning_rate": 1.7886165679117508e-05, "loss": 0.3142, "step": 5610 }, { "epoch": 0.4246535168629677, "grad_norm": 0.75390625, "learning_rate": 1.78854332456087e-05, "loss": 0.3028, "step": 5611 }, { "epoch": 0.424729199186415, "grad_norm": 0.796875, "learning_rate": 1.7884700700231487e-05, "loss": 0.2881, "step": 5612 }, { "epoch": 0.42480488150986234, "grad_norm": 0.7421875, "learning_rate": 1.7883968042996262e-05, "loss": 0.3116, "step": 5613 }, { "epoch": 0.4248805638333097, "grad_norm": 1.9140625, "learning_rate": 1.7883235273913424e-05, "loss": 0.4047, "step": 5614 }, { "epoch": 0.42495624615675703, "grad_norm": 0.73046875, "learning_rate": 1.7882502392993366e-05, "loss": 0.2846, "step": 5615 }, { "epoch": 0.4250319284802043, "grad_norm": 0.78515625, "learning_rate": 1.7881769400246484e-05, "loss": 0.3443, "step": 5616 }, { "epoch": 0.42510761080365167, "grad_norm": 0.76953125, "learning_rate": 1.7881036295683175e-05, "loss": 0.3131, "step": 5617 }, { "epoch": 0.425183293127099, "grad_norm": 0.79296875, "learning_rate": 1.7880303079313845e-05, "loss": 0.3248, "step": 5618 }, { "epoch": 0.4252589754505463, "grad_norm": 0.828125, "learning_rate": 1.787956975114889e-05, "loss": 0.3122, "step": 5619 }, { "epoch": 0.42533465777399365, "grad_norm": 0.765625, "learning_rate": 1.7878836311198716e-05, "loss": 0.3234, "step": 5620 }, { "epoch": 0.425410340097441, "grad_norm": 1.0, "learning_rate": 1.787810275947373e-05, "loss": 0.3453, "step": 5621 }, { "epoch": 0.42548602242088834, "grad_norm": 0.765625, "learning_rate": 1.7877369095984338e-05, "loss": 0.309, "step": 5622 }, { "epoch": 0.42556170474433563, "grad_norm": 0.7890625, "learning_rate": 1.7876635320740948e-05, "loss": 0.3575, "step": 5623 }, { "epoch": 0.425637387067783, "grad_norm": 0.8125, "learning_rate": 1.7875901433753967e-05, "loss": 0.3179, "step": 5624 }, { "epoch": 0.4257130693912303, "grad_norm": 0.81640625, "learning_rate": 1.787516743503381e-05, "loss": 0.3156, "step": 5625 }, { "epoch": 0.4257887517146776, "grad_norm": 0.8515625, "learning_rate": 1.787443332459089e-05, "loss": 0.3608, "step": 5626 }, { "epoch": 0.42586443403812496, "grad_norm": 0.78515625, "learning_rate": 1.7873699102435616e-05, "loss": 0.3465, "step": 5627 }, { "epoch": 0.4259401163615723, "grad_norm": 0.72265625, "learning_rate": 1.7872964768578412e-05, "loss": 0.2794, "step": 5628 }, { "epoch": 0.42601579868501965, "grad_norm": 0.85546875, "learning_rate": 1.787223032302969e-05, "loss": 0.3643, "step": 5629 }, { "epoch": 0.42609148100846694, "grad_norm": 0.76171875, "learning_rate": 1.7871495765799875e-05, "loss": 0.2769, "step": 5630 }, { "epoch": 0.4261671633319143, "grad_norm": 0.80859375, "learning_rate": 1.787076109689938e-05, "loss": 0.3304, "step": 5631 }, { "epoch": 0.42624284565536164, "grad_norm": 0.8125, "learning_rate": 1.7870026316338635e-05, "loss": 0.3416, "step": 5632 }, { "epoch": 0.4263185279788089, "grad_norm": 0.78125, "learning_rate": 1.786929142412806e-05, "loss": 0.3318, "step": 5633 }, { "epoch": 0.42639421030225627, "grad_norm": 0.78515625, "learning_rate": 1.7868556420278084e-05, "loss": 0.2971, "step": 5634 }, { "epoch": 0.4264698926257036, "grad_norm": 0.7265625, "learning_rate": 1.786782130479913e-05, "loss": 0.2975, "step": 5635 }, { "epoch": 0.42654557494915096, "grad_norm": 0.796875, "learning_rate": 1.786708607770163e-05, "loss": 0.3347, "step": 5636 }, { "epoch": 0.42662125727259825, "grad_norm": 0.796875, "learning_rate": 1.7866350738996015e-05, "loss": 0.3439, "step": 5637 }, { "epoch": 0.4266969395960456, "grad_norm": 1.1328125, "learning_rate": 1.7865615288692716e-05, "loss": 0.388, "step": 5638 }, { "epoch": 0.42677262191949294, "grad_norm": 0.8046875, "learning_rate": 1.786487972680216e-05, "loss": 0.33, "step": 5639 }, { "epoch": 0.42684830424294024, "grad_norm": 0.78125, "learning_rate": 1.7864144053334796e-05, "loss": 0.3221, "step": 5640 }, { "epoch": 0.4269239865663876, "grad_norm": 0.8046875, "learning_rate": 1.786340826830105e-05, "loss": 0.3291, "step": 5641 }, { "epoch": 0.4269996688898349, "grad_norm": 0.83203125, "learning_rate": 1.7862672371711364e-05, "loss": 0.331, "step": 5642 }, { "epoch": 0.4270753512132823, "grad_norm": 0.71875, "learning_rate": 1.7861936363576178e-05, "loss": 0.2826, "step": 5643 }, { "epoch": 0.42715103353672956, "grad_norm": 0.78515625, "learning_rate": 1.7861200243905933e-05, "loss": 0.3281, "step": 5644 }, { "epoch": 0.4272267158601769, "grad_norm": 0.76953125, "learning_rate": 1.7860464012711072e-05, "loss": 0.3245, "step": 5645 }, { "epoch": 0.42730239818362425, "grad_norm": 0.7890625, "learning_rate": 1.785972767000204e-05, "loss": 0.306, "step": 5646 }, { "epoch": 0.42737808050707154, "grad_norm": 0.77734375, "learning_rate": 1.7858991215789282e-05, "loss": 0.3229, "step": 5647 }, { "epoch": 0.4274537628305189, "grad_norm": 0.85546875, "learning_rate": 1.7858254650083247e-05, "loss": 0.3868, "step": 5648 }, { "epoch": 0.42752944515396624, "grad_norm": 0.8203125, "learning_rate": 1.785751797289439e-05, "loss": 0.3419, "step": 5649 }, { "epoch": 0.4276051274774136, "grad_norm": 0.78515625, "learning_rate": 1.7856781184233152e-05, "loss": 0.3388, "step": 5650 }, { "epoch": 0.4276808098008609, "grad_norm": 0.8125, "learning_rate": 1.785604428410999e-05, "loss": 0.3529, "step": 5651 }, { "epoch": 0.4277564921243082, "grad_norm": 0.74609375, "learning_rate": 1.785530727253536e-05, "loss": 0.3069, "step": 5652 }, { "epoch": 0.42783217444775556, "grad_norm": 0.78515625, "learning_rate": 1.7854570149519714e-05, "loss": 0.3322, "step": 5653 }, { "epoch": 0.42790785677120285, "grad_norm": 0.875, "learning_rate": 1.785383291507351e-05, "loss": 0.3356, "step": 5654 }, { "epoch": 0.4279835390946502, "grad_norm": 0.7734375, "learning_rate": 1.7853095569207214e-05, "loss": 0.2978, "step": 5655 }, { "epoch": 0.42805922141809755, "grad_norm": 0.77734375, "learning_rate": 1.785235811193128e-05, "loss": 0.2985, "step": 5656 }, { "epoch": 0.4281349037415449, "grad_norm": 0.83203125, "learning_rate": 1.7851620543256166e-05, "loss": 0.3266, "step": 5657 }, { "epoch": 0.4282105860649922, "grad_norm": 0.7734375, "learning_rate": 1.7850882863192347e-05, "loss": 0.3215, "step": 5658 }, { "epoch": 0.42828626838843953, "grad_norm": 0.859375, "learning_rate": 1.7850145071750277e-05, "loss": 0.3715, "step": 5659 }, { "epoch": 0.4283619507118869, "grad_norm": 0.8203125, "learning_rate": 1.784940716894043e-05, "loss": 0.3456, "step": 5660 }, { "epoch": 0.42843763303533416, "grad_norm": 0.77734375, "learning_rate": 1.7848669154773275e-05, "loss": 0.3222, "step": 5661 }, { "epoch": 0.4285133153587815, "grad_norm": 0.78515625, "learning_rate": 1.7847931029259276e-05, "loss": 0.3531, "step": 5662 }, { "epoch": 0.42858899768222886, "grad_norm": 0.75390625, "learning_rate": 1.7847192792408906e-05, "loss": 0.3094, "step": 5663 }, { "epoch": 0.4286646800056762, "grad_norm": 0.734375, "learning_rate": 1.7846454444232643e-05, "loss": 0.3166, "step": 5664 }, { "epoch": 0.4287403623291235, "grad_norm": 0.796875, "learning_rate": 1.784571598474096e-05, "loss": 0.3404, "step": 5665 }, { "epoch": 0.42881604465257084, "grad_norm": 0.765625, "learning_rate": 1.784497741394433e-05, "loss": 0.3237, "step": 5666 }, { "epoch": 0.4288917269760182, "grad_norm": 0.7890625, "learning_rate": 1.7844238731853233e-05, "loss": 0.3154, "step": 5667 }, { "epoch": 0.4289674092994655, "grad_norm": 0.80859375, "learning_rate": 1.7843499938478147e-05, "loss": 0.3617, "step": 5668 }, { "epoch": 0.4290430916229128, "grad_norm": 0.7890625, "learning_rate": 1.7842761033829555e-05, "loss": 0.3279, "step": 5669 }, { "epoch": 0.42911877394636017, "grad_norm": 0.88671875, "learning_rate": 1.784202201791794e-05, "loss": 0.4103, "step": 5670 }, { "epoch": 0.42919445626980746, "grad_norm": 0.7734375, "learning_rate": 1.784128289075379e-05, "loss": 0.3552, "step": 5671 }, { "epoch": 0.4292701385932548, "grad_norm": 0.765625, "learning_rate": 1.784054365234758e-05, "loss": 0.2941, "step": 5672 }, { "epoch": 0.42934582091670215, "grad_norm": 0.9140625, "learning_rate": 1.7839804302709805e-05, "loss": 0.3633, "step": 5673 }, { "epoch": 0.4294215032401495, "grad_norm": 1.2265625, "learning_rate": 1.783906484185095e-05, "loss": 0.3784, "step": 5674 }, { "epoch": 0.4294971855635968, "grad_norm": 0.8359375, "learning_rate": 1.7838325269781508e-05, "loss": 0.3619, "step": 5675 }, { "epoch": 0.42957286788704413, "grad_norm": 0.83203125, "learning_rate": 1.7837585586511975e-05, "loss": 0.347, "step": 5676 }, { "epoch": 0.4296485502104915, "grad_norm": 0.80859375, "learning_rate": 1.783684579205284e-05, "loss": 0.3405, "step": 5677 }, { "epoch": 0.42972423253393877, "grad_norm": 0.79296875, "learning_rate": 1.7836105886414596e-05, "loss": 0.3324, "step": 5678 }, { "epoch": 0.4297999148573861, "grad_norm": 0.79296875, "learning_rate": 1.7835365869607747e-05, "loss": 0.3118, "step": 5679 }, { "epoch": 0.42987559718083346, "grad_norm": 0.83203125, "learning_rate": 1.7834625741642782e-05, "loss": 0.3462, "step": 5680 }, { "epoch": 0.4299512795042808, "grad_norm": 0.7734375, "learning_rate": 1.7833885502530212e-05, "loss": 0.3216, "step": 5681 }, { "epoch": 0.4300269618277281, "grad_norm": 0.8671875, "learning_rate": 1.783314515228053e-05, "loss": 0.3386, "step": 5682 }, { "epoch": 0.43010264415117544, "grad_norm": 0.79296875, "learning_rate": 1.783240469090424e-05, "loss": 0.335, "step": 5683 }, { "epoch": 0.4301783264746228, "grad_norm": 0.8203125, "learning_rate": 1.783166411841185e-05, "loss": 0.3603, "step": 5684 }, { "epoch": 0.4302540087980701, "grad_norm": 0.7734375, "learning_rate": 1.7830923434813866e-05, "loss": 0.2968, "step": 5685 }, { "epoch": 0.4303296911215174, "grad_norm": 0.828125, "learning_rate": 1.783018264012079e-05, "loss": 0.3481, "step": 5686 }, { "epoch": 0.43040537344496477, "grad_norm": 0.8984375, "learning_rate": 1.7829441734343143e-05, "loss": 0.3977, "step": 5687 }, { "epoch": 0.4304810557684121, "grad_norm": 0.79296875, "learning_rate": 1.7828700717491426e-05, "loss": 0.2961, "step": 5688 }, { "epoch": 0.4305567380918594, "grad_norm": 0.84765625, "learning_rate": 1.7827959589576155e-05, "loss": 0.332, "step": 5689 }, { "epoch": 0.43063242041530675, "grad_norm": 0.78515625, "learning_rate": 1.7827218350607845e-05, "loss": 0.3133, "step": 5690 }, { "epoch": 0.4307081027387541, "grad_norm": 0.80859375, "learning_rate": 1.782647700059701e-05, "loss": 0.3416, "step": 5691 }, { "epoch": 0.4307837850622014, "grad_norm": 1.2734375, "learning_rate": 1.7825735539554167e-05, "loss": 0.3738, "step": 5692 }, { "epoch": 0.43085946738564873, "grad_norm": 0.8359375, "learning_rate": 1.7824993967489835e-05, "loss": 0.3388, "step": 5693 }, { "epoch": 0.4309351497090961, "grad_norm": 0.7421875, "learning_rate": 1.7824252284414538e-05, "loss": 0.2863, "step": 5694 }, { "epoch": 0.4310108320325434, "grad_norm": 0.828125, "learning_rate": 1.7823510490338795e-05, "loss": 0.3349, "step": 5695 }, { "epoch": 0.4310865143559907, "grad_norm": 0.78515625, "learning_rate": 1.782276858527313e-05, "loss": 0.3413, "step": 5696 }, { "epoch": 0.43116219667943806, "grad_norm": 0.77734375, "learning_rate": 1.782202656922807e-05, "loss": 0.3172, "step": 5697 }, { "epoch": 0.4312378790028854, "grad_norm": 0.79296875, "learning_rate": 1.7821284442214138e-05, "loss": 0.3173, "step": 5698 }, { "epoch": 0.4313135613263327, "grad_norm": 0.78125, "learning_rate": 1.782054220424186e-05, "loss": 0.3121, "step": 5699 }, { "epoch": 0.43138924364978004, "grad_norm": 0.84765625, "learning_rate": 1.7819799855321777e-05, "loss": 0.3624, "step": 5700 }, { "epoch": 0.4314649259732274, "grad_norm": 0.7734375, "learning_rate": 1.7819057395464412e-05, "loss": 0.308, "step": 5701 }, { "epoch": 0.43154060829667473, "grad_norm": 0.7578125, "learning_rate": 1.78183148246803e-05, "loss": 0.2844, "step": 5702 }, { "epoch": 0.431616290620122, "grad_norm": 0.80078125, "learning_rate": 1.7817572142979975e-05, "loss": 0.3221, "step": 5703 }, { "epoch": 0.43169197294356937, "grad_norm": 0.83203125, "learning_rate": 1.7816829350373974e-05, "loss": 0.3561, "step": 5704 }, { "epoch": 0.4317676552670167, "grad_norm": 0.8203125, "learning_rate": 1.7816086446872833e-05, "loss": 0.3145, "step": 5705 }, { "epoch": 0.431843337590464, "grad_norm": 0.83203125, "learning_rate": 1.7815343432487094e-05, "loss": 0.3596, "step": 5706 }, { "epoch": 0.43191901991391135, "grad_norm": 1.171875, "learning_rate": 1.78146003072273e-05, "loss": 0.2981, "step": 5707 }, { "epoch": 0.4319947022373587, "grad_norm": 0.7578125, "learning_rate": 1.7813857071103986e-05, "loss": 0.2815, "step": 5708 }, { "epoch": 0.43207038456080604, "grad_norm": 0.7734375, "learning_rate": 1.78131137241277e-05, "loss": 0.3021, "step": 5709 }, { "epoch": 0.43214606688425333, "grad_norm": 0.8203125, "learning_rate": 1.781237026630899e-05, "loss": 0.3191, "step": 5710 }, { "epoch": 0.4322217492077007, "grad_norm": 0.765625, "learning_rate": 1.78116266976584e-05, "loss": 0.3261, "step": 5711 }, { "epoch": 0.432297431531148, "grad_norm": 0.7734375, "learning_rate": 1.7810883018186485e-05, "loss": 0.2913, "step": 5712 }, { "epoch": 0.4323731138545953, "grad_norm": 1.0390625, "learning_rate": 1.7810139227903783e-05, "loss": 0.3642, "step": 5713 }, { "epoch": 0.43244879617804266, "grad_norm": 0.9765625, "learning_rate": 1.7809395326820858e-05, "loss": 0.3406, "step": 5714 }, { "epoch": 0.43252447850149, "grad_norm": 0.8046875, "learning_rate": 1.7808651314948256e-05, "loss": 0.3109, "step": 5715 }, { "epoch": 0.43260016082493735, "grad_norm": 0.75, "learning_rate": 1.780790719229654e-05, "loss": 0.2755, "step": 5716 }, { "epoch": 0.43267584314838464, "grad_norm": 0.77734375, "learning_rate": 1.7807162958876256e-05, "loss": 0.3019, "step": 5717 }, { "epoch": 0.432751525471832, "grad_norm": 0.82421875, "learning_rate": 1.780641861469797e-05, "loss": 0.3551, "step": 5718 }, { "epoch": 0.43282720779527933, "grad_norm": 0.82421875, "learning_rate": 1.7805674159772244e-05, "loss": 0.3735, "step": 5719 }, { "epoch": 0.4329028901187266, "grad_norm": 0.80078125, "learning_rate": 1.780492959410963e-05, "loss": 0.3229, "step": 5720 }, { "epoch": 0.43297857244217397, "grad_norm": 0.734375, "learning_rate": 1.7804184917720694e-05, "loss": 0.2734, "step": 5721 }, { "epoch": 0.4330542547656213, "grad_norm": 0.79296875, "learning_rate": 1.7803440130616007e-05, "loss": 0.321, "step": 5722 }, { "epoch": 0.43312993708906866, "grad_norm": 0.890625, "learning_rate": 1.780269523280613e-05, "loss": 0.3483, "step": 5723 }, { "epoch": 0.43320561941251595, "grad_norm": 0.796875, "learning_rate": 1.7801950224301633e-05, "loss": 0.3245, "step": 5724 }, { "epoch": 0.4332813017359633, "grad_norm": 1.1484375, "learning_rate": 1.7801205105113077e-05, "loss": 0.3835, "step": 5725 }, { "epoch": 0.43335698405941064, "grad_norm": 0.73828125, "learning_rate": 1.7800459875251045e-05, "loss": 0.3052, "step": 5726 }, { "epoch": 0.43343266638285793, "grad_norm": 0.78125, "learning_rate": 1.77997145347261e-05, "loss": 0.3049, "step": 5727 }, { "epoch": 0.4335083487063053, "grad_norm": 0.7734375, "learning_rate": 1.779896908354882e-05, "loss": 0.3086, "step": 5728 }, { "epoch": 0.4335840310297526, "grad_norm": 0.796875, "learning_rate": 1.7798223521729783e-05, "loss": 0.3363, "step": 5729 }, { "epoch": 0.4336597133531999, "grad_norm": 0.7734375, "learning_rate": 1.7797477849279564e-05, "loss": 0.324, "step": 5730 }, { "epoch": 0.43373539567664726, "grad_norm": 0.78125, "learning_rate": 1.7796732066208736e-05, "loss": 0.2897, "step": 5731 }, { "epoch": 0.4338110780000946, "grad_norm": 0.80859375, "learning_rate": 1.7795986172527888e-05, "loss": 0.3324, "step": 5732 }, { "epoch": 0.43388676032354195, "grad_norm": 0.8125, "learning_rate": 1.7795240168247592e-05, "loss": 0.3363, "step": 5733 }, { "epoch": 0.43396244264698924, "grad_norm": 0.8046875, "learning_rate": 1.7794494053378445e-05, "loss": 0.364, "step": 5734 }, { "epoch": 0.4340381249704366, "grad_norm": 0.78515625, "learning_rate": 1.7793747827931017e-05, "loss": 0.3348, "step": 5735 }, { "epoch": 0.43411380729388394, "grad_norm": 0.84375, "learning_rate": 1.7793001491915905e-05, "loss": 0.4092, "step": 5736 }, { "epoch": 0.4341894896173312, "grad_norm": 0.7421875, "learning_rate": 1.7792255045343696e-05, "loss": 0.3015, "step": 5737 }, { "epoch": 0.43426517194077857, "grad_norm": 0.765625, "learning_rate": 1.7791508488224972e-05, "loss": 0.2965, "step": 5738 }, { "epoch": 0.4343408542642259, "grad_norm": 0.78515625, "learning_rate": 1.7790761820570336e-05, "loss": 0.3166, "step": 5739 }, { "epoch": 0.43441653658767326, "grad_norm": 0.796875, "learning_rate": 1.779001504239037e-05, "loss": 0.2946, "step": 5740 }, { "epoch": 0.43449221891112055, "grad_norm": 0.8125, "learning_rate": 1.778926815369567e-05, "loss": 0.3475, "step": 5741 }, { "epoch": 0.4345679012345679, "grad_norm": 0.77734375, "learning_rate": 1.778852115449684e-05, "loss": 0.295, "step": 5742 }, { "epoch": 0.43464358355801525, "grad_norm": 0.74609375, "learning_rate": 1.7787774044804467e-05, "loss": 0.2986, "step": 5743 }, { "epoch": 0.43471926588146254, "grad_norm": 0.8046875, "learning_rate": 1.7787026824629158e-05, "loss": 0.3198, "step": 5744 }, { "epoch": 0.4347949482049099, "grad_norm": 0.796875, "learning_rate": 1.7786279493981506e-05, "loss": 0.3306, "step": 5745 }, { "epoch": 0.4348706305283572, "grad_norm": 0.7421875, "learning_rate": 1.7785532052872124e-05, "loss": 0.3047, "step": 5746 }, { "epoch": 0.4349463128518046, "grad_norm": 0.80078125, "learning_rate": 1.7784784501311604e-05, "loss": 0.3463, "step": 5747 }, { "epoch": 0.43502199517525186, "grad_norm": 0.84375, "learning_rate": 1.7784036839310558e-05, "loss": 0.3806, "step": 5748 }, { "epoch": 0.4350976774986992, "grad_norm": 0.80859375, "learning_rate": 1.7783289066879593e-05, "loss": 0.3234, "step": 5749 }, { "epoch": 0.43517335982214655, "grad_norm": 0.73828125, "learning_rate": 1.7782541184029316e-05, "loss": 0.3024, "step": 5750 }, { "epoch": 0.43524904214559385, "grad_norm": 0.671875, "learning_rate": 1.7781793190770336e-05, "loss": 0.2563, "step": 5751 }, { "epoch": 0.4353247244690412, "grad_norm": 0.79296875, "learning_rate": 1.778104508711327e-05, "loss": 0.3268, "step": 5752 }, { "epoch": 0.43540040679248854, "grad_norm": 0.7578125, "learning_rate": 1.778029687306872e-05, "loss": 0.3007, "step": 5753 }, { "epoch": 0.4354760891159359, "grad_norm": 0.71484375, "learning_rate": 1.7779548548647312e-05, "loss": 0.3001, "step": 5754 }, { "epoch": 0.4355517714393832, "grad_norm": 0.8515625, "learning_rate": 1.7778800113859656e-05, "loss": 0.3628, "step": 5755 }, { "epoch": 0.4356274537628305, "grad_norm": 0.7265625, "learning_rate": 1.777805156871637e-05, "loss": 0.298, "step": 5756 }, { "epoch": 0.43570313608627786, "grad_norm": 0.83984375, "learning_rate": 1.777730291322808e-05, "loss": 0.3479, "step": 5757 }, { "epoch": 0.43577881840972515, "grad_norm": 0.9609375, "learning_rate": 1.7776554147405396e-05, "loss": 0.3466, "step": 5758 }, { "epoch": 0.4358545007331725, "grad_norm": 0.83203125, "learning_rate": 1.7775805271258948e-05, "loss": 0.3238, "step": 5759 }, { "epoch": 0.43593018305661985, "grad_norm": 0.83203125, "learning_rate": 1.777505628479936e-05, "loss": 0.3596, "step": 5760 }, { "epoch": 0.4360058653800672, "grad_norm": 0.76171875, "learning_rate": 1.7774307188037255e-05, "loss": 0.3293, "step": 5761 }, { "epoch": 0.4360815477035145, "grad_norm": 1.2734375, "learning_rate": 1.7773557980983264e-05, "loss": 0.3983, "step": 5762 }, { "epoch": 0.43615723002696183, "grad_norm": 0.8515625, "learning_rate": 1.7772808663648006e-05, "loss": 0.3469, "step": 5763 }, { "epoch": 0.4362329123504092, "grad_norm": 0.90234375, "learning_rate": 1.7772059236042126e-05, "loss": 0.3259, "step": 5764 }, { "epoch": 0.43630859467385646, "grad_norm": 0.76171875, "learning_rate": 1.7771309698176246e-05, "loss": 0.2818, "step": 5765 }, { "epoch": 0.4363842769973038, "grad_norm": 0.72265625, "learning_rate": 1.7770560050061002e-05, "loss": 0.3098, "step": 5766 }, { "epoch": 0.43645995932075116, "grad_norm": 0.83984375, "learning_rate": 1.7769810291707025e-05, "loss": 0.3442, "step": 5767 }, { "epoch": 0.4365356416441985, "grad_norm": 0.7421875, "learning_rate": 1.776906042312496e-05, "loss": 0.3021, "step": 5768 }, { "epoch": 0.4366113239676458, "grad_norm": 0.85546875, "learning_rate": 1.7768310444325442e-05, "loss": 0.3653, "step": 5769 }, { "epoch": 0.43668700629109314, "grad_norm": 0.80078125, "learning_rate": 1.7767560355319106e-05, "loss": 0.3453, "step": 5770 }, { "epoch": 0.4367626886145405, "grad_norm": 0.82421875, "learning_rate": 1.77668101561166e-05, "loss": 0.3592, "step": 5771 }, { "epoch": 0.4368383709379878, "grad_norm": 0.7265625, "learning_rate": 1.776605984672856e-05, "loss": 0.2862, "step": 5772 }, { "epoch": 0.4369140532614351, "grad_norm": 0.86328125, "learning_rate": 1.7765309427165636e-05, "loss": 0.3577, "step": 5773 }, { "epoch": 0.43698973558488247, "grad_norm": 0.76171875, "learning_rate": 1.776455889743847e-05, "loss": 0.3005, "step": 5774 }, { "epoch": 0.4370654179083298, "grad_norm": 0.97265625, "learning_rate": 1.7763808257557714e-05, "loss": 0.3, "step": 5775 }, { "epoch": 0.4371411002317771, "grad_norm": 0.74609375, "learning_rate": 1.7763057507534016e-05, "loss": 0.3084, "step": 5776 }, { "epoch": 0.43721678255522445, "grad_norm": 0.83984375, "learning_rate": 1.7762306647378023e-05, "loss": 0.3815, "step": 5777 }, { "epoch": 0.4372924648786718, "grad_norm": 1.1640625, "learning_rate": 1.776155567710039e-05, "loss": 0.3964, "step": 5778 }, { "epoch": 0.4373681472021191, "grad_norm": 0.74609375, "learning_rate": 1.7760804596711772e-05, "loss": 0.3245, "step": 5779 }, { "epoch": 0.43744382952556643, "grad_norm": 0.72265625, "learning_rate": 1.776005340622282e-05, "loss": 0.296, "step": 5780 }, { "epoch": 0.4375195118490138, "grad_norm": 0.87109375, "learning_rate": 1.7759302105644196e-05, "loss": 0.3496, "step": 5781 }, { "epoch": 0.4375951941724611, "grad_norm": 0.76953125, "learning_rate": 1.7758550694986555e-05, "loss": 0.3055, "step": 5782 }, { "epoch": 0.4376708764959084, "grad_norm": 0.78515625, "learning_rate": 1.7757799174260558e-05, "loss": 0.314, "step": 5783 }, { "epoch": 0.43774655881935576, "grad_norm": 0.83984375, "learning_rate": 1.775704754347687e-05, "loss": 0.3214, "step": 5784 }, { "epoch": 0.4378222411428031, "grad_norm": 0.86328125, "learning_rate": 1.7756295802646145e-05, "loss": 0.3524, "step": 5785 }, { "epoch": 0.4378979234662504, "grad_norm": 0.7578125, "learning_rate": 1.7755543951779058e-05, "loss": 0.3245, "step": 5786 }, { "epoch": 0.43797360578969774, "grad_norm": 0.7890625, "learning_rate": 1.775479199088627e-05, "loss": 0.339, "step": 5787 }, { "epoch": 0.4380492881131451, "grad_norm": 0.796875, "learning_rate": 1.7754039919978452e-05, "loss": 0.3271, "step": 5788 }, { "epoch": 0.4381249704365924, "grad_norm": 0.8203125, "learning_rate": 1.7753287739066268e-05, "loss": 0.3178, "step": 5789 }, { "epoch": 0.4382006527600397, "grad_norm": 0.82421875, "learning_rate": 1.7752535448160395e-05, "loss": 0.3594, "step": 5790 }, { "epoch": 0.43827633508348707, "grad_norm": 0.78125, "learning_rate": 1.77517830472715e-05, "loss": 0.2886, "step": 5791 }, { "epoch": 0.4383520174069344, "grad_norm": 0.83984375, "learning_rate": 1.7751030536410264e-05, "loss": 0.3632, "step": 5792 }, { "epoch": 0.4384276997303817, "grad_norm": 0.7890625, "learning_rate": 1.7750277915587358e-05, "loss": 0.3324, "step": 5793 }, { "epoch": 0.43850338205382905, "grad_norm": 0.69921875, "learning_rate": 1.7749525184813456e-05, "loss": 0.2586, "step": 5794 }, { "epoch": 0.4385790643772764, "grad_norm": 0.83984375, "learning_rate": 1.7748772344099242e-05, "loss": 0.3647, "step": 5795 }, { "epoch": 0.4386547467007237, "grad_norm": 0.8046875, "learning_rate": 1.77480193934554e-05, "loss": 0.3131, "step": 5796 }, { "epoch": 0.43873042902417103, "grad_norm": 0.8359375, "learning_rate": 1.77472663328926e-05, "loss": 0.3705, "step": 5797 }, { "epoch": 0.4388061113476184, "grad_norm": 0.734375, "learning_rate": 1.7746513162421535e-05, "loss": 0.2732, "step": 5798 }, { "epoch": 0.4388817936710657, "grad_norm": 0.74609375, "learning_rate": 1.7745759882052887e-05, "loss": 0.302, "step": 5799 }, { "epoch": 0.438957475994513, "grad_norm": 0.80859375, "learning_rate": 1.7745006491797345e-05, "loss": 0.3347, "step": 5800 }, { "epoch": 0.43903315831796036, "grad_norm": 0.7890625, "learning_rate": 1.7744252991665592e-05, "loss": 0.3201, "step": 5801 }, { "epoch": 0.4391088406414077, "grad_norm": 0.7265625, "learning_rate": 1.774349938166832e-05, "loss": 0.2878, "step": 5802 }, { "epoch": 0.439184522964855, "grad_norm": 0.98828125, "learning_rate": 1.7742745661816222e-05, "loss": 0.3563, "step": 5803 }, { "epoch": 0.43926020528830234, "grad_norm": 0.71484375, "learning_rate": 1.7741991832119993e-05, "loss": 0.2853, "step": 5804 }, { "epoch": 0.4393358876117497, "grad_norm": 0.765625, "learning_rate": 1.774123789259032e-05, "loss": 0.3137, "step": 5805 }, { "epoch": 0.43941156993519703, "grad_norm": 0.71484375, "learning_rate": 1.7740483843237905e-05, "loss": 0.2924, "step": 5806 }, { "epoch": 0.4394872522586443, "grad_norm": 0.79296875, "learning_rate": 1.7739729684073442e-05, "loss": 0.3541, "step": 5807 }, { "epoch": 0.43956293458209167, "grad_norm": 0.73046875, "learning_rate": 1.7738975415107634e-05, "loss": 0.2791, "step": 5808 }, { "epoch": 0.439638616905539, "grad_norm": 0.80078125, "learning_rate": 1.773822103635118e-05, "loss": 0.3505, "step": 5809 }, { "epoch": 0.4397142992289863, "grad_norm": 2.25, "learning_rate": 1.773746654781478e-05, "loss": 0.4116, "step": 5810 }, { "epoch": 0.43978998155243365, "grad_norm": 0.78125, "learning_rate": 1.7736711949509137e-05, "loss": 0.3585, "step": 5811 }, { "epoch": 0.439865663875881, "grad_norm": 0.75390625, "learning_rate": 1.773595724144496e-05, "loss": 0.325, "step": 5812 }, { "epoch": 0.43994134619932834, "grad_norm": 0.8046875, "learning_rate": 1.7735202423632952e-05, "loss": 0.3452, "step": 5813 }, { "epoch": 0.44001702852277563, "grad_norm": 0.8359375, "learning_rate": 1.773444749608383e-05, "loss": 0.3539, "step": 5814 }, { "epoch": 0.440092710846223, "grad_norm": 0.828125, "learning_rate": 1.7733692458808295e-05, "loss": 0.3548, "step": 5815 }, { "epoch": 0.4401683931696703, "grad_norm": 0.76953125, "learning_rate": 1.7732937311817055e-05, "loss": 0.3285, "step": 5816 }, { "epoch": 0.4402440754931176, "grad_norm": 0.76953125, "learning_rate": 1.7732182055120835e-05, "loss": 0.3504, "step": 5817 }, { "epoch": 0.44031975781656496, "grad_norm": 0.76171875, "learning_rate": 1.773142668873034e-05, "loss": 0.3204, "step": 5818 }, { "epoch": 0.4403954401400123, "grad_norm": 0.75390625, "learning_rate": 1.7730671212656295e-05, "loss": 0.3338, "step": 5819 }, { "epoch": 0.44047112246345965, "grad_norm": 0.76953125, "learning_rate": 1.772991562690941e-05, "loss": 0.3323, "step": 5820 }, { "epoch": 0.44054680478690694, "grad_norm": 1.03125, "learning_rate": 1.7729159931500405e-05, "loss": 0.3896, "step": 5821 }, { "epoch": 0.4406224871103543, "grad_norm": 0.76953125, "learning_rate": 1.7728404126440005e-05, "loss": 0.3095, "step": 5822 }, { "epoch": 0.44069816943380163, "grad_norm": 0.8984375, "learning_rate": 1.7727648211738925e-05, "loss": 0.4083, "step": 5823 }, { "epoch": 0.4407738517572489, "grad_norm": 0.80859375, "learning_rate": 1.77268921874079e-05, "loss": 0.3674, "step": 5824 }, { "epoch": 0.44084953408069627, "grad_norm": 0.921875, "learning_rate": 1.7726136053457647e-05, "loss": 0.4043, "step": 5825 }, { "epoch": 0.4409252164041436, "grad_norm": 0.78515625, "learning_rate": 1.7725379809898898e-05, "loss": 0.3175, "step": 5826 }, { "epoch": 0.44100089872759096, "grad_norm": 0.78515625, "learning_rate": 1.7724623456742377e-05, "loss": 0.3302, "step": 5827 }, { "epoch": 0.44107658105103825, "grad_norm": 1.21875, "learning_rate": 1.7723866993998813e-05, "loss": 0.3642, "step": 5828 }, { "epoch": 0.4411522633744856, "grad_norm": 0.8125, "learning_rate": 1.7723110421678948e-05, "loss": 0.315, "step": 5829 }, { "epoch": 0.44122794569793294, "grad_norm": 0.82421875, "learning_rate": 1.7722353739793504e-05, "loss": 0.3409, "step": 5830 }, { "epoch": 0.44130362802138023, "grad_norm": 0.765625, "learning_rate": 1.772159694835322e-05, "loss": 0.3092, "step": 5831 }, { "epoch": 0.4413793103448276, "grad_norm": 0.74609375, "learning_rate": 1.7720840047368834e-05, "loss": 0.3209, "step": 5832 }, { "epoch": 0.4414549926682749, "grad_norm": 0.80859375, "learning_rate": 1.7720083036851083e-05, "loss": 0.3469, "step": 5833 }, { "epoch": 0.44153067499172227, "grad_norm": 0.73828125, "learning_rate": 1.7719325916810705e-05, "loss": 0.2983, "step": 5834 }, { "epoch": 0.44160635731516956, "grad_norm": 0.7890625, "learning_rate": 1.7718568687258443e-05, "loss": 0.3524, "step": 5835 }, { "epoch": 0.4416820396386169, "grad_norm": 0.76953125, "learning_rate": 1.7717811348205035e-05, "loss": 0.3118, "step": 5836 }, { "epoch": 0.44175772196206425, "grad_norm": 0.8125, "learning_rate": 1.7717053899661234e-05, "loss": 0.3383, "step": 5837 }, { "epoch": 0.44183340428551154, "grad_norm": 0.82421875, "learning_rate": 1.771629634163778e-05, "loss": 0.362, "step": 5838 }, { "epoch": 0.4419090866089589, "grad_norm": 1.21875, "learning_rate": 1.7715538674145416e-05, "loss": 0.3547, "step": 5839 }, { "epoch": 0.44198476893240624, "grad_norm": 0.80078125, "learning_rate": 1.77147808971949e-05, "loss": 0.3556, "step": 5840 }, { "epoch": 0.4420604512558536, "grad_norm": 0.8671875, "learning_rate": 1.7714023010796976e-05, "loss": 0.3722, "step": 5841 }, { "epoch": 0.44213613357930087, "grad_norm": 0.78515625, "learning_rate": 1.77132650149624e-05, "loss": 0.3262, "step": 5842 }, { "epoch": 0.4422118159027482, "grad_norm": 0.796875, "learning_rate": 1.771250690970192e-05, "loss": 0.3021, "step": 5843 }, { "epoch": 0.44228749822619556, "grad_norm": 0.80859375, "learning_rate": 1.7711748695026298e-05, "loss": 0.3651, "step": 5844 }, { "epoch": 0.44236318054964285, "grad_norm": 0.73046875, "learning_rate": 1.7710990370946285e-05, "loss": 0.319, "step": 5845 }, { "epoch": 0.4424388628730902, "grad_norm": 0.7734375, "learning_rate": 1.771023193747264e-05, "loss": 0.2739, "step": 5846 }, { "epoch": 0.44251454519653755, "grad_norm": 0.875, "learning_rate": 1.7709473394616126e-05, "loss": 0.3245, "step": 5847 }, { "epoch": 0.4425902275199849, "grad_norm": 0.82421875, "learning_rate": 1.7708714742387502e-05, "loss": 0.3729, "step": 5848 }, { "epoch": 0.4426659098434322, "grad_norm": 0.78125, "learning_rate": 1.770795598079753e-05, "loss": 0.3373, "step": 5849 }, { "epoch": 0.4427415921668795, "grad_norm": 0.7265625, "learning_rate": 1.7707197109856972e-05, "loss": 0.2986, "step": 5850 }, { "epoch": 0.4428172744903269, "grad_norm": 0.734375, "learning_rate": 1.77064381295766e-05, "loss": 0.2966, "step": 5851 }, { "epoch": 0.44289295681377416, "grad_norm": 0.76171875, "learning_rate": 1.770567903996718e-05, "loss": 0.3235, "step": 5852 }, { "epoch": 0.4429686391372215, "grad_norm": 0.79296875, "learning_rate": 1.7704919841039477e-05, "loss": 0.3277, "step": 5853 }, { "epoch": 0.44304432146066886, "grad_norm": 0.828125, "learning_rate": 1.7704160532804266e-05, "loss": 0.3474, "step": 5854 }, { "epoch": 0.44312000378411615, "grad_norm": 0.8359375, "learning_rate": 1.7703401115272317e-05, "loss": 0.3197, "step": 5855 }, { "epoch": 0.4431956861075635, "grad_norm": 0.79296875, "learning_rate": 1.77026415884544e-05, "loss": 0.3152, "step": 5856 }, { "epoch": 0.44327136843101084, "grad_norm": 0.828125, "learning_rate": 1.7701881952361296e-05, "loss": 0.343, "step": 5857 }, { "epoch": 0.4433470507544582, "grad_norm": 0.75, "learning_rate": 1.770112220700378e-05, "loss": 0.2861, "step": 5858 }, { "epoch": 0.4434227330779055, "grad_norm": 0.74609375, "learning_rate": 1.7700362352392632e-05, "loss": 0.2982, "step": 5859 }, { "epoch": 0.4434984154013528, "grad_norm": 0.7578125, "learning_rate": 1.7699602388538622e-05, "loss": 0.3397, "step": 5860 }, { "epoch": 0.44357409772480016, "grad_norm": 0.76953125, "learning_rate": 1.7698842315452548e-05, "loss": 0.2991, "step": 5861 }, { "epoch": 0.44364978004824746, "grad_norm": 0.8203125, "learning_rate": 1.769808213314518e-05, "loss": 0.3139, "step": 5862 }, { "epoch": 0.4437254623716948, "grad_norm": 0.78515625, "learning_rate": 1.769732184162731e-05, "loss": 0.3192, "step": 5863 }, { "epoch": 0.44380114469514215, "grad_norm": 0.78125, "learning_rate": 1.7696561440909715e-05, "loss": 0.3142, "step": 5864 }, { "epoch": 0.4438768270185895, "grad_norm": 1.1171875, "learning_rate": 1.769580093100319e-05, "loss": 0.3681, "step": 5865 }, { "epoch": 0.4439525093420368, "grad_norm": 0.828125, "learning_rate": 1.7695040311918526e-05, "loss": 0.3441, "step": 5866 }, { "epoch": 0.44402819166548413, "grad_norm": 0.80859375, "learning_rate": 1.7694279583666508e-05, "loss": 0.3457, "step": 5867 }, { "epoch": 0.4441038739889315, "grad_norm": 0.77734375, "learning_rate": 1.769351874625793e-05, "loss": 0.3345, "step": 5868 }, { "epoch": 0.44417955631237876, "grad_norm": 0.796875, "learning_rate": 1.7692757799703586e-05, "loss": 0.3283, "step": 5869 }, { "epoch": 0.4442552386358261, "grad_norm": 0.79296875, "learning_rate": 1.769199674401427e-05, "loss": 0.336, "step": 5870 }, { "epoch": 0.44433092095927346, "grad_norm": 0.81640625, "learning_rate": 1.7691235579200784e-05, "loss": 0.3754, "step": 5871 }, { "epoch": 0.4444066032827208, "grad_norm": 0.80859375, "learning_rate": 1.769047430527392e-05, "loss": 0.3256, "step": 5872 }, { "epoch": 0.4444822856061681, "grad_norm": 0.8359375, "learning_rate": 1.768971292224448e-05, "loss": 0.3697, "step": 5873 }, { "epoch": 0.44455796792961544, "grad_norm": 0.7578125, "learning_rate": 1.768895143012327e-05, "loss": 0.3097, "step": 5874 }, { "epoch": 0.4446336502530628, "grad_norm": 0.8125, "learning_rate": 1.7688189828921083e-05, "loss": 0.3638, "step": 5875 }, { "epoch": 0.4447093325765101, "grad_norm": 0.7578125, "learning_rate": 1.7687428118648732e-05, "loss": 0.3353, "step": 5876 }, { "epoch": 0.4447850148999574, "grad_norm": 0.71875, "learning_rate": 1.7686666299317024e-05, "loss": 0.2962, "step": 5877 }, { "epoch": 0.44486069722340477, "grad_norm": 0.74609375, "learning_rate": 1.768590437093676e-05, "loss": 0.3173, "step": 5878 }, { "epoch": 0.4449363795468521, "grad_norm": 0.76953125, "learning_rate": 1.7685142333518755e-05, "loss": 0.3409, "step": 5879 }, { "epoch": 0.4450120618702994, "grad_norm": 0.8671875, "learning_rate": 1.768438018707382e-05, "loss": 0.3659, "step": 5880 }, { "epoch": 0.44508774419374675, "grad_norm": 0.79296875, "learning_rate": 1.7683617931612763e-05, "loss": 0.3236, "step": 5881 }, { "epoch": 0.4451634265171941, "grad_norm": 0.78125, "learning_rate": 1.76828555671464e-05, "loss": 0.3015, "step": 5882 }, { "epoch": 0.4452391088406414, "grad_norm": 0.75, "learning_rate": 1.7682093093685546e-05, "loss": 0.3122, "step": 5883 }, { "epoch": 0.44531479116408873, "grad_norm": 0.8359375, "learning_rate": 1.768133051124102e-05, "loss": 0.3953, "step": 5884 }, { "epoch": 0.4453904734875361, "grad_norm": 0.74609375, "learning_rate": 1.768056781982364e-05, "loss": 0.3, "step": 5885 }, { "epoch": 0.4454661558109834, "grad_norm": 0.78125, "learning_rate": 1.7679805019444224e-05, "loss": 0.3252, "step": 5886 }, { "epoch": 0.4455418381344307, "grad_norm": 0.75, "learning_rate": 1.7679042110113595e-05, "loss": 0.3333, "step": 5887 }, { "epoch": 0.44561752045787806, "grad_norm": 0.7890625, "learning_rate": 1.7678279091842575e-05, "loss": 0.3126, "step": 5888 }, { "epoch": 0.4456932027813254, "grad_norm": 0.8515625, "learning_rate": 1.7677515964641987e-05, "loss": 0.364, "step": 5889 }, { "epoch": 0.4457688851047727, "grad_norm": 0.890625, "learning_rate": 1.7676752728522662e-05, "loss": 0.3293, "step": 5890 }, { "epoch": 0.44584456742822004, "grad_norm": 0.8203125, "learning_rate": 1.767598938349543e-05, "loss": 0.3449, "step": 5891 }, { "epoch": 0.4459202497516674, "grad_norm": 0.7890625, "learning_rate": 1.7675225929571112e-05, "loss": 0.3523, "step": 5892 }, { "epoch": 0.44599593207511473, "grad_norm": 0.76953125, "learning_rate": 1.7674462366760544e-05, "loss": 0.2997, "step": 5893 }, { "epoch": 0.446071614398562, "grad_norm": 0.7734375, "learning_rate": 1.7673698695074557e-05, "loss": 0.338, "step": 5894 }, { "epoch": 0.44614729672200937, "grad_norm": 0.84765625, "learning_rate": 1.7672934914523985e-05, "loss": 0.3803, "step": 5895 }, { "epoch": 0.4462229790454567, "grad_norm": 0.79296875, "learning_rate": 1.7672171025119667e-05, "loss": 0.3598, "step": 5896 }, { "epoch": 0.446298661368904, "grad_norm": 0.6953125, "learning_rate": 1.7671407026872436e-05, "loss": 0.2599, "step": 5897 }, { "epoch": 0.44637434369235135, "grad_norm": 0.81640625, "learning_rate": 1.767064291979313e-05, "loss": 0.3433, "step": 5898 }, { "epoch": 0.4464500260157987, "grad_norm": 0.77734375, "learning_rate": 1.766987870389259e-05, "loss": 0.3503, "step": 5899 }, { "epoch": 0.44652570833924604, "grad_norm": 0.75, "learning_rate": 1.7669114379181663e-05, "loss": 0.2969, "step": 5900 }, { "epoch": 0.44660139066269333, "grad_norm": 0.70703125, "learning_rate": 1.7668349945671183e-05, "loss": 0.2948, "step": 5901 }, { "epoch": 0.4466770729861407, "grad_norm": 0.7890625, "learning_rate": 1.7667585403372003e-05, "loss": 0.3166, "step": 5902 }, { "epoch": 0.446752755309588, "grad_norm": 0.85546875, "learning_rate": 1.7666820752294965e-05, "loss": 0.3633, "step": 5903 }, { "epoch": 0.4468284376330353, "grad_norm": 0.73828125, "learning_rate": 1.7666055992450917e-05, "loss": 0.2968, "step": 5904 }, { "epoch": 0.44690411995648266, "grad_norm": 1.359375, "learning_rate": 1.7665291123850712e-05, "loss": 0.393, "step": 5905 }, { "epoch": 0.44697980227993, "grad_norm": 1.109375, "learning_rate": 1.7664526146505195e-05, "loss": 0.3306, "step": 5906 }, { "epoch": 0.44705548460337735, "grad_norm": 0.78125, "learning_rate": 1.7663761060425225e-05, "loss": 0.3245, "step": 5907 }, { "epoch": 0.44713116692682464, "grad_norm": 1.1015625, "learning_rate": 1.7662995865621654e-05, "loss": 0.407, "step": 5908 }, { "epoch": 0.447206849250272, "grad_norm": 0.77734375, "learning_rate": 1.7662230562105328e-05, "loss": 0.3328, "step": 5909 }, { "epoch": 0.44728253157371933, "grad_norm": 0.79296875, "learning_rate": 1.7661465149887123e-05, "loss": 0.3514, "step": 5910 }, { "epoch": 0.4473582138971666, "grad_norm": 0.8359375, "learning_rate": 1.7660699628977884e-05, "loss": 0.2934, "step": 5911 }, { "epoch": 0.44743389622061397, "grad_norm": 0.765625, "learning_rate": 1.765993399938847e-05, "loss": 0.3276, "step": 5912 }, { "epoch": 0.4475095785440613, "grad_norm": 0.7890625, "learning_rate": 1.7659168261129752e-05, "loss": 0.3662, "step": 5913 }, { "epoch": 0.4475852608675086, "grad_norm": 1.734375, "learning_rate": 1.7658402414212585e-05, "loss": 0.3726, "step": 5914 }, { "epoch": 0.44766094319095595, "grad_norm": 0.765625, "learning_rate": 1.7657636458647843e-05, "loss": 0.3108, "step": 5915 }, { "epoch": 0.4477366255144033, "grad_norm": 0.84375, "learning_rate": 1.7656870394446382e-05, "loss": 0.3344, "step": 5916 }, { "epoch": 0.44781230783785064, "grad_norm": 0.87109375, "learning_rate": 1.7656104221619075e-05, "loss": 0.3046, "step": 5917 }, { "epoch": 0.44788799016129793, "grad_norm": 0.7890625, "learning_rate": 1.7655337940176795e-05, "loss": 0.3124, "step": 5918 }, { "epoch": 0.4479636724847453, "grad_norm": 0.8359375, "learning_rate": 1.7654571550130406e-05, "loss": 0.3476, "step": 5919 }, { "epoch": 0.4480393548081926, "grad_norm": 0.8125, "learning_rate": 1.7653805051490785e-05, "loss": 0.3447, "step": 5920 }, { "epoch": 0.4481150371316399, "grad_norm": 0.8359375, "learning_rate": 1.7653038444268804e-05, "loss": 0.3547, "step": 5921 }, { "epoch": 0.44819071945508726, "grad_norm": 0.8515625, "learning_rate": 1.7652271728475342e-05, "loss": 0.3521, "step": 5922 }, { "epoch": 0.4482664017785346, "grad_norm": 0.80078125, "learning_rate": 1.765150490412127e-05, "loss": 0.3451, "step": 5923 }, { "epoch": 0.44834208410198195, "grad_norm": 0.90625, "learning_rate": 1.7650737971217476e-05, "loss": 0.3414, "step": 5924 }, { "epoch": 0.44841776642542924, "grad_norm": 0.796875, "learning_rate": 1.764997092977483e-05, "loss": 0.3362, "step": 5925 }, { "epoch": 0.4484934487488766, "grad_norm": 1.0390625, "learning_rate": 1.764920377980422e-05, "loss": 0.3545, "step": 5926 }, { "epoch": 0.44856913107232393, "grad_norm": 0.8046875, "learning_rate": 1.7648436521316528e-05, "loss": 0.3521, "step": 5927 }, { "epoch": 0.4486448133957712, "grad_norm": 0.69140625, "learning_rate": 1.7647669154322636e-05, "loss": 0.2588, "step": 5928 }, { "epoch": 0.44872049571921857, "grad_norm": 0.71875, "learning_rate": 1.7646901678833435e-05, "loss": 0.2724, "step": 5929 }, { "epoch": 0.4487961780426659, "grad_norm": 0.72265625, "learning_rate": 1.7646134094859816e-05, "loss": 0.2657, "step": 5930 }, { "epoch": 0.44887186036611326, "grad_norm": 0.828125, "learning_rate": 1.764536640241266e-05, "loss": 0.341, "step": 5931 }, { "epoch": 0.44894754268956055, "grad_norm": 0.7578125, "learning_rate": 1.764459860150286e-05, "loss": 0.2932, "step": 5932 }, { "epoch": 0.4490232250130079, "grad_norm": 0.87109375, "learning_rate": 1.764383069214131e-05, "loss": 0.3515, "step": 5933 }, { "epoch": 0.44909890733645524, "grad_norm": 0.7890625, "learning_rate": 1.7643062674338908e-05, "loss": 0.3051, "step": 5934 }, { "epoch": 0.44917458965990253, "grad_norm": 0.79296875, "learning_rate": 1.7642294548106545e-05, "loss": 0.3154, "step": 5935 }, { "epoch": 0.4492502719833499, "grad_norm": 0.78125, "learning_rate": 1.7641526313455118e-05, "loss": 0.3151, "step": 5936 }, { "epoch": 0.4493259543067972, "grad_norm": 0.796875, "learning_rate": 1.764075797039553e-05, "loss": 0.3484, "step": 5937 }, { "epoch": 0.44940163663024457, "grad_norm": 0.82421875, "learning_rate": 1.7639989518938673e-05, "loss": 0.3309, "step": 5938 }, { "epoch": 0.44947731895369186, "grad_norm": 0.76171875, "learning_rate": 1.7639220959095457e-05, "loss": 0.2958, "step": 5939 }, { "epoch": 0.4495530012771392, "grad_norm": 0.8046875, "learning_rate": 1.763845229087678e-05, "loss": 0.3246, "step": 5940 }, { "epoch": 0.44962868360058655, "grad_norm": 0.796875, "learning_rate": 1.7637683514293553e-05, "loss": 0.324, "step": 5941 }, { "epoch": 0.44970436592403384, "grad_norm": 0.8515625, "learning_rate": 1.7636914629356673e-05, "loss": 0.3607, "step": 5942 }, { "epoch": 0.4497800482474812, "grad_norm": 0.828125, "learning_rate": 1.763614563607706e-05, "loss": 0.3356, "step": 5943 }, { "epoch": 0.44985573057092854, "grad_norm": 0.796875, "learning_rate": 1.7635376534465614e-05, "loss": 0.3231, "step": 5944 }, { "epoch": 0.4499314128943759, "grad_norm": 0.8359375, "learning_rate": 1.763460732453325e-05, "loss": 0.3524, "step": 5945 }, { "epoch": 0.45000709521782317, "grad_norm": 0.8125, "learning_rate": 1.7633838006290878e-05, "loss": 0.3467, "step": 5946 }, { "epoch": 0.4500827775412705, "grad_norm": 0.84375, "learning_rate": 1.7633068579749415e-05, "loss": 0.3218, "step": 5947 }, { "epoch": 0.45015845986471786, "grad_norm": 0.79296875, "learning_rate": 1.7632299044919775e-05, "loss": 0.3478, "step": 5948 }, { "epoch": 0.45023414218816515, "grad_norm": 0.7890625, "learning_rate": 1.7631529401812875e-05, "loss": 0.3224, "step": 5949 }, { "epoch": 0.4503098245116125, "grad_norm": 0.76171875, "learning_rate": 1.7630759650439638e-05, "loss": 0.287, "step": 5950 }, { "epoch": 0.45038550683505985, "grad_norm": 0.75390625, "learning_rate": 1.762998979081097e-05, "loss": 0.3077, "step": 5951 }, { "epoch": 0.4504611891585072, "grad_norm": 0.85546875, "learning_rate": 1.7629219822937817e-05, "loss": 0.3411, "step": 5952 }, { "epoch": 0.4505368714819545, "grad_norm": 0.78515625, "learning_rate": 1.762844974683108e-05, "loss": 0.2966, "step": 5953 }, { "epoch": 0.4506125538054018, "grad_norm": 0.796875, "learning_rate": 1.7627679562501695e-05, "loss": 0.3303, "step": 5954 }, { "epoch": 0.4506882361288492, "grad_norm": 0.7421875, "learning_rate": 1.7626909269960588e-05, "loss": 0.309, "step": 5955 }, { "epoch": 0.45076391845229646, "grad_norm": 0.80078125, "learning_rate": 1.762613886921868e-05, "loss": 0.2958, "step": 5956 }, { "epoch": 0.4508396007757438, "grad_norm": 0.80078125, "learning_rate": 1.762536836028691e-05, "loss": 0.3321, "step": 5957 }, { "epoch": 0.45091528309919116, "grad_norm": 0.6953125, "learning_rate": 1.76245977431762e-05, "loss": 0.2534, "step": 5958 }, { "epoch": 0.4509909654226385, "grad_norm": 0.78125, "learning_rate": 1.7623827017897494e-05, "loss": 0.3273, "step": 5959 }, { "epoch": 0.4510666477460858, "grad_norm": 0.78515625, "learning_rate": 1.762305618446171e-05, "loss": 0.2873, "step": 5960 }, { "epoch": 0.45114233006953314, "grad_norm": 0.8125, "learning_rate": 1.7622285242879797e-05, "loss": 0.3538, "step": 5961 }, { "epoch": 0.4512180123929805, "grad_norm": 0.765625, "learning_rate": 1.7621514193162688e-05, "loss": 0.3234, "step": 5962 }, { "epoch": 0.4512936947164278, "grad_norm": 1.1171875, "learning_rate": 1.7620743035321318e-05, "loss": 0.4017, "step": 5963 }, { "epoch": 0.4513693770398751, "grad_norm": 0.8046875, "learning_rate": 1.7619971769366634e-05, "loss": 0.3522, "step": 5964 }, { "epoch": 0.45144505936332247, "grad_norm": 0.76953125, "learning_rate": 1.761920039530957e-05, "loss": 0.2934, "step": 5965 }, { "epoch": 0.4515207416867698, "grad_norm": 0.80078125, "learning_rate": 1.7618428913161078e-05, "loss": 0.2924, "step": 5966 }, { "epoch": 0.4515964240102171, "grad_norm": 0.75390625, "learning_rate": 1.7617657322932092e-05, "loss": 0.3031, "step": 5967 }, { "epoch": 0.45167210633366445, "grad_norm": 0.76953125, "learning_rate": 1.7616885624633567e-05, "loss": 0.3042, "step": 5968 }, { "epoch": 0.4517477886571118, "grad_norm": 0.8203125, "learning_rate": 1.7616113818276454e-05, "loss": 0.3243, "step": 5969 }, { "epoch": 0.4518234709805591, "grad_norm": 0.76953125, "learning_rate": 1.7615341903871693e-05, "loss": 0.3169, "step": 5970 }, { "epoch": 0.45189915330400643, "grad_norm": 0.7109375, "learning_rate": 1.7614569881430238e-05, "loss": 0.2701, "step": 5971 }, { "epoch": 0.4519748356274538, "grad_norm": 0.7578125, "learning_rate": 1.761379775096304e-05, "loss": 0.3199, "step": 5972 }, { "epoch": 0.45205051795090107, "grad_norm": 0.78515625, "learning_rate": 1.761302551248106e-05, "loss": 0.3294, "step": 5973 }, { "epoch": 0.4521262002743484, "grad_norm": 0.84375, "learning_rate": 1.7612253165995244e-05, "loss": 0.3645, "step": 5974 }, { "epoch": 0.45220188259779576, "grad_norm": 0.828125, "learning_rate": 1.7611480711516557e-05, "loss": 0.379, "step": 5975 }, { "epoch": 0.4522775649212431, "grad_norm": 0.7734375, "learning_rate": 1.761070814905595e-05, "loss": 0.3001, "step": 5976 }, { "epoch": 0.4523532472446904, "grad_norm": 0.734375, "learning_rate": 1.7609935478624394e-05, "loss": 0.2964, "step": 5977 }, { "epoch": 0.45242892956813774, "grad_norm": 0.8203125, "learning_rate": 1.7609162700232844e-05, "loss": 0.3943, "step": 5978 }, { "epoch": 0.4525046118915851, "grad_norm": 0.74609375, "learning_rate": 1.7608389813892257e-05, "loss": 0.3135, "step": 5979 }, { "epoch": 0.4525802942150324, "grad_norm": 0.78515625, "learning_rate": 1.7607616819613608e-05, "loss": 0.34, "step": 5980 }, { "epoch": 0.4526559765384797, "grad_norm": 0.828125, "learning_rate": 1.760684371740786e-05, "loss": 0.3469, "step": 5981 }, { "epoch": 0.45273165886192707, "grad_norm": 0.80859375, "learning_rate": 1.7606070507285978e-05, "loss": 0.3274, "step": 5982 }, { "epoch": 0.4528073411853744, "grad_norm": 0.78515625, "learning_rate": 1.7605297189258935e-05, "loss": 0.3174, "step": 5983 }, { "epoch": 0.4528830235088217, "grad_norm": 1.1875, "learning_rate": 1.76045237633377e-05, "loss": 0.339, "step": 5984 }, { "epoch": 0.45295870583226905, "grad_norm": 0.78515625, "learning_rate": 1.7603750229533246e-05, "loss": 0.3232, "step": 5985 }, { "epoch": 0.4530343881557164, "grad_norm": 0.8125, "learning_rate": 1.7602976587856547e-05, "loss": 0.3088, "step": 5986 }, { "epoch": 0.4531100704791637, "grad_norm": 0.87109375, "learning_rate": 1.7602202838318575e-05, "loss": 0.3819, "step": 5987 }, { "epoch": 0.45318575280261103, "grad_norm": 0.76171875, "learning_rate": 1.760142898093031e-05, "loss": 0.3113, "step": 5988 }, { "epoch": 0.4532614351260584, "grad_norm": 0.76953125, "learning_rate": 1.7600655015702733e-05, "loss": 0.3017, "step": 5989 }, { "epoch": 0.4533371174495057, "grad_norm": 0.77734375, "learning_rate": 1.759988094264682e-05, "loss": 0.3172, "step": 5990 }, { "epoch": 0.453412799772953, "grad_norm": 0.87890625, "learning_rate": 1.7599106761773554e-05, "loss": 0.387, "step": 5991 }, { "epoch": 0.45348848209640036, "grad_norm": 0.72265625, "learning_rate": 1.7598332473093917e-05, "loss": 0.2795, "step": 5992 }, { "epoch": 0.4535641644198477, "grad_norm": 0.8359375, "learning_rate": 1.7597558076618898e-05, "loss": 0.3325, "step": 5993 }, { "epoch": 0.453639846743295, "grad_norm": 0.77734375, "learning_rate": 1.7596783572359476e-05, "loss": 0.3306, "step": 5994 }, { "epoch": 0.45371552906674234, "grad_norm": 1.1171875, "learning_rate": 1.7596008960326644e-05, "loss": 0.3263, "step": 5995 }, { "epoch": 0.4537912113901897, "grad_norm": 0.8046875, "learning_rate": 1.759523424053139e-05, "loss": 0.3298, "step": 5996 }, { "epoch": 0.45386689371363703, "grad_norm": 0.75390625, "learning_rate": 1.7594459412984707e-05, "loss": 0.3067, "step": 5997 }, { "epoch": 0.4539425760370843, "grad_norm": 0.67578125, "learning_rate": 1.759368447769758e-05, "loss": 0.2615, "step": 5998 }, { "epoch": 0.45401825836053167, "grad_norm": 0.78515625, "learning_rate": 1.7592909434681012e-05, "loss": 0.3247, "step": 5999 }, { "epoch": 0.454093940683979, "grad_norm": 0.7265625, "learning_rate": 1.759213428394599e-05, "loss": 0.2862, "step": 6000 }, { "epoch": 0.4541696230074263, "grad_norm": 0.82421875, "learning_rate": 1.7591359025503515e-05, "loss": 0.3837, "step": 6001 }, { "epoch": 0.45424530533087365, "grad_norm": 0.765625, "learning_rate": 1.7590583659364586e-05, "loss": 0.3112, "step": 6002 }, { "epoch": 0.454320987654321, "grad_norm": 0.76953125, "learning_rate": 1.7589808185540206e-05, "loss": 0.3294, "step": 6003 }, { "epoch": 0.45439666997776834, "grad_norm": 0.7890625, "learning_rate": 1.7589032604041368e-05, "loss": 0.3251, "step": 6004 }, { "epoch": 0.45447235230121563, "grad_norm": 0.73046875, "learning_rate": 1.758825691487908e-05, "loss": 0.2769, "step": 6005 }, { "epoch": 0.454548034624663, "grad_norm": 0.7734375, "learning_rate": 1.7587481118064343e-05, "loss": 0.3154, "step": 6006 }, { "epoch": 0.4546237169481103, "grad_norm": 0.8125, "learning_rate": 1.758670521360817e-05, "loss": 0.3491, "step": 6007 }, { "epoch": 0.4546993992715576, "grad_norm": 0.84765625, "learning_rate": 1.7585929201521562e-05, "loss": 0.3316, "step": 6008 }, { "epoch": 0.45477508159500496, "grad_norm": 0.7890625, "learning_rate": 1.758515308181553e-05, "loss": 0.3464, "step": 6009 }, { "epoch": 0.4548507639184523, "grad_norm": 0.734375, "learning_rate": 1.7584376854501083e-05, "loss": 0.2831, "step": 6010 }, { "epoch": 0.45492644624189965, "grad_norm": 0.75, "learning_rate": 1.758360051958924e-05, "loss": 0.2897, "step": 6011 }, { "epoch": 0.45500212856534694, "grad_norm": 0.8125, "learning_rate": 1.758282407709101e-05, "loss": 0.334, "step": 6012 }, { "epoch": 0.4550778108887943, "grad_norm": 0.71484375, "learning_rate": 1.7582047527017402e-05, "loss": 0.3048, "step": 6013 }, { "epoch": 0.45515349321224163, "grad_norm": 0.80078125, "learning_rate": 1.7581270869379443e-05, "loss": 0.3141, "step": 6014 }, { "epoch": 0.4552291755356889, "grad_norm": 0.80078125, "learning_rate": 1.7580494104188145e-05, "loss": 0.3224, "step": 6015 }, { "epoch": 0.45530485785913627, "grad_norm": 0.8515625, "learning_rate": 1.757971723145453e-05, "loss": 0.3686, "step": 6016 }, { "epoch": 0.4553805401825836, "grad_norm": 0.80859375, "learning_rate": 1.757894025118962e-05, "loss": 0.3454, "step": 6017 }, { "epoch": 0.45545622250603096, "grad_norm": 0.78515625, "learning_rate": 1.757816316340444e-05, "loss": 0.3279, "step": 6018 }, { "epoch": 0.45553190482947825, "grad_norm": 0.8359375, "learning_rate": 1.7577385968110003e-05, "loss": 0.3791, "step": 6019 }, { "epoch": 0.4556075871529256, "grad_norm": 0.72265625, "learning_rate": 1.757660866531735e-05, "loss": 0.2899, "step": 6020 }, { "epoch": 0.45568326947637294, "grad_norm": 0.90234375, "learning_rate": 1.7575831255037494e-05, "loss": 0.3871, "step": 6021 }, { "epoch": 0.45575895179982023, "grad_norm": 0.796875, "learning_rate": 1.7575053737281474e-05, "loss": 0.3624, "step": 6022 }, { "epoch": 0.4558346341232676, "grad_norm": 0.77734375, "learning_rate": 1.757427611206032e-05, "loss": 0.3132, "step": 6023 }, { "epoch": 0.4559103164467149, "grad_norm": 0.86328125, "learning_rate": 1.757349837938506e-05, "loss": 0.3822, "step": 6024 }, { "epoch": 0.45598599877016227, "grad_norm": 1.2734375, "learning_rate": 1.757272053926673e-05, "loss": 0.364, "step": 6025 }, { "epoch": 0.45606168109360956, "grad_norm": 0.7578125, "learning_rate": 1.757194259171636e-05, "loss": 0.3006, "step": 6026 }, { "epoch": 0.4561373634170569, "grad_norm": 0.796875, "learning_rate": 1.7571164536744992e-05, "loss": 0.343, "step": 6027 }, { "epoch": 0.45621304574050425, "grad_norm": 0.76171875, "learning_rate": 1.7570386374363665e-05, "loss": 0.3012, "step": 6028 }, { "epoch": 0.45628872806395154, "grad_norm": 0.8203125, "learning_rate": 1.7569608104583416e-05, "loss": 0.3741, "step": 6029 }, { "epoch": 0.4563644103873989, "grad_norm": 0.94921875, "learning_rate": 1.7568829727415283e-05, "loss": 0.3083, "step": 6030 }, { "epoch": 0.45644009271084623, "grad_norm": 0.765625, "learning_rate": 1.7568051242870314e-05, "loss": 0.3148, "step": 6031 }, { "epoch": 0.4565157750342936, "grad_norm": 0.8203125, "learning_rate": 1.7567272650959552e-05, "loss": 0.3579, "step": 6032 }, { "epoch": 0.45659145735774087, "grad_norm": 0.94921875, "learning_rate": 1.756649395169404e-05, "loss": 0.3893, "step": 6033 }, { "epoch": 0.4566671396811882, "grad_norm": 0.83203125, "learning_rate": 1.7565715145084826e-05, "loss": 0.3761, "step": 6034 }, { "epoch": 0.45674282200463556, "grad_norm": 0.8515625, "learning_rate": 1.7564936231142962e-05, "loss": 0.3664, "step": 6035 }, { "epoch": 0.45681850432808285, "grad_norm": 0.82421875, "learning_rate": 1.7564157209879493e-05, "loss": 0.3712, "step": 6036 }, { "epoch": 0.4568941866515302, "grad_norm": 0.78125, "learning_rate": 1.7563378081305475e-05, "loss": 0.3245, "step": 6037 }, { "epoch": 0.45696986897497754, "grad_norm": 0.7734375, "learning_rate": 1.7562598845431958e-05, "loss": 0.3054, "step": 6038 }, { "epoch": 0.45704555129842483, "grad_norm": 0.75, "learning_rate": 1.7561819502270003e-05, "loss": 0.2846, "step": 6039 }, { "epoch": 0.4571212336218722, "grad_norm": 0.765625, "learning_rate": 1.7561040051830654e-05, "loss": 0.3014, "step": 6040 }, { "epoch": 0.4571969159453195, "grad_norm": 0.83984375, "learning_rate": 1.756026049412498e-05, "loss": 0.3643, "step": 6041 }, { "epoch": 0.45727259826876687, "grad_norm": 0.8984375, "learning_rate": 1.755948082916404e-05, "loss": 0.3549, "step": 6042 }, { "epoch": 0.45734828059221416, "grad_norm": 0.7734375, "learning_rate": 1.7558701056958888e-05, "loss": 0.3278, "step": 6043 }, { "epoch": 0.4574239629156615, "grad_norm": 0.75, "learning_rate": 1.7557921177520593e-05, "loss": 0.2781, "step": 6044 }, { "epoch": 0.45749964523910885, "grad_norm": 0.78515625, "learning_rate": 1.7557141190860217e-05, "loss": 0.3095, "step": 6045 }, { "epoch": 0.45757532756255614, "grad_norm": 0.77734375, "learning_rate": 1.7556361096988823e-05, "loss": 0.2715, "step": 6046 }, { "epoch": 0.4576510098860035, "grad_norm": 0.78125, "learning_rate": 1.7555580895917482e-05, "loss": 0.3309, "step": 6047 }, { "epoch": 0.45772669220945084, "grad_norm": 0.75390625, "learning_rate": 1.7554800587657256e-05, "loss": 0.3164, "step": 6048 }, { "epoch": 0.4578023745328982, "grad_norm": 0.7421875, "learning_rate": 1.7554020172219223e-05, "loss": 0.292, "step": 6049 }, { "epoch": 0.4578780568563455, "grad_norm": 0.7890625, "learning_rate": 1.755323964961445e-05, "loss": 0.3245, "step": 6050 }, { "epoch": 0.4579537391797928, "grad_norm": 0.84765625, "learning_rate": 1.7552459019854008e-05, "loss": 0.3876, "step": 6051 }, { "epoch": 0.45802942150324016, "grad_norm": 0.80859375, "learning_rate": 1.7551678282948974e-05, "loss": 0.3125, "step": 6052 }, { "epoch": 0.45810510382668745, "grad_norm": 0.8359375, "learning_rate": 1.7550897438910427e-05, "loss": 0.3541, "step": 6053 }, { "epoch": 0.4581807861501348, "grad_norm": 0.77734375, "learning_rate": 1.7550116487749442e-05, "loss": 0.3115, "step": 6054 }, { "epoch": 0.45825646847358215, "grad_norm": 0.85546875, "learning_rate": 1.75493354294771e-05, "loss": 0.3426, "step": 6055 }, { "epoch": 0.4583321507970295, "grad_norm": 0.796875, "learning_rate": 1.7548554264104477e-05, "loss": 0.3084, "step": 6056 }, { "epoch": 0.4584078331204768, "grad_norm": 0.6796875, "learning_rate": 1.754777299164266e-05, "loss": 0.2495, "step": 6057 }, { "epoch": 0.45848351544392413, "grad_norm": 0.78125, "learning_rate": 1.7546991612102727e-05, "loss": 0.2968, "step": 6058 }, { "epoch": 0.4585591977673715, "grad_norm": 0.796875, "learning_rate": 1.754621012549577e-05, "loss": 0.321, "step": 6059 }, { "epoch": 0.45863488009081876, "grad_norm": 0.73046875, "learning_rate": 1.754542853183287e-05, "loss": 0.2714, "step": 6060 }, { "epoch": 0.4587105624142661, "grad_norm": 0.80859375, "learning_rate": 1.754464683112512e-05, "loss": 0.3208, "step": 6061 }, { "epoch": 0.45878624473771346, "grad_norm": 0.76953125, "learning_rate": 1.754386502338361e-05, "loss": 0.3168, "step": 6062 }, { "epoch": 0.4588619270611608, "grad_norm": 0.7421875, "learning_rate": 1.7543083108619424e-05, "loss": 0.3035, "step": 6063 }, { "epoch": 0.4589376093846081, "grad_norm": 0.91015625, "learning_rate": 1.754230108684366e-05, "loss": 0.3984, "step": 6064 }, { "epoch": 0.45901329170805544, "grad_norm": 0.73046875, "learning_rate": 1.7541518958067417e-05, "loss": 0.2803, "step": 6065 }, { "epoch": 0.4590889740315028, "grad_norm": 0.80859375, "learning_rate": 1.7540736722301784e-05, "loss": 0.2949, "step": 6066 }, { "epoch": 0.4591646563549501, "grad_norm": 0.7734375, "learning_rate": 1.7539954379557858e-05, "loss": 0.2929, "step": 6067 }, { "epoch": 0.4592403386783974, "grad_norm": 0.7890625, "learning_rate": 1.7539171929846742e-05, "loss": 0.3393, "step": 6068 }, { "epoch": 0.45931602100184477, "grad_norm": 0.75, "learning_rate": 1.7538389373179533e-05, "loss": 0.3063, "step": 6069 }, { "epoch": 0.4593917033252921, "grad_norm": 0.79296875, "learning_rate": 1.7537606709567336e-05, "loss": 0.3469, "step": 6070 }, { "epoch": 0.4594673856487394, "grad_norm": 0.81640625, "learning_rate": 1.753682393902125e-05, "loss": 0.3409, "step": 6071 }, { "epoch": 0.45954306797218675, "grad_norm": 0.921875, "learning_rate": 1.753604106155239e-05, "loss": 0.4333, "step": 6072 }, { "epoch": 0.4596187502956341, "grad_norm": 0.734375, "learning_rate": 1.7535258077171847e-05, "loss": 0.2874, "step": 6073 }, { "epoch": 0.4596944326190814, "grad_norm": 0.79296875, "learning_rate": 1.7534474985890743e-05, "loss": 0.3483, "step": 6074 }, { "epoch": 0.45977011494252873, "grad_norm": 0.88671875, "learning_rate": 1.7533691787720177e-05, "loss": 0.3643, "step": 6075 }, { "epoch": 0.4598457972659761, "grad_norm": 0.68359375, "learning_rate": 1.7532908482671267e-05, "loss": 0.2662, "step": 6076 }, { "epoch": 0.4599214795894234, "grad_norm": 0.84375, "learning_rate": 1.753212507075512e-05, "loss": 0.3723, "step": 6077 }, { "epoch": 0.4599971619128707, "grad_norm": 0.8515625, "learning_rate": 1.753134155198286e-05, "loss": 0.368, "step": 6078 }, { "epoch": 0.46007284423631806, "grad_norm": 0.80859375, "learning_rate": 1.753055792636559e-05, "loss": 0.329, "step": 6079 }, { "epoch": 0.4601485265597654, "grad_norm": 0.75, "learning_rate": 1.7529774193914436e-05, "loss": 0.3062, "step": 6080 }, { "epoch": 0.4602242088832127, "grad_norm": 0.828125, "learning_rate": 1.752899035464051e-05, "loss": 0.3861, "step": 6081 }, { "epoch": 0.46029989120666004, "grad_norm": 0.765625, "learning_rate": 1.752820640855494e-05, "loss": 0.3211, "step": 6082 }, { "epoch": 0.4603755735301074, "grad_norm": 0.7890625, "learning_rate": 1.7527422355668844e-05, "loss": 0.3404, "step": 6083 }, { "epoch": 0.46045125585355473, "grad_norm": 0.76171875, "learning_rate": 1.752663819599334e-05, "loss": 0.3183, "step": 6084 }, { "epoch": 0.460526938177002, "grad_norm": 0.78515625, "learning_rate": 1.752585392953956e-05, "loss": 0.341, "step": 6085 }, { "epoch": 0.46060262050044937, "grad_norm": 0.84765625, "learning_rate": 1.7525069556318627e-05, "loss": 0.3961, "step": 6086 }, { "epoch": 0.4606783028238967, "grad_norm": 0.7734375, "learning_rate": 1.7524285076341667e-05, "loss": 0.3365, "step": 6087 }, { "epoch": 0.460753985147344, "grad_norm": 0.796875, "learning_rate": 1.7523500489619812e-05, "loss": 0.3295, "step": 6088 }, { "epoch": 0.46082966747079135, "grad_norm": 0.734375, "learning_rate": 1.7522715796164192e-05, "loss": 0.2646, "step": 6089 }, { "epoch": 0.4609053497942387, "grad_norm": 0.7890625, "learning_rate": 1.752193099598594e-05, "loss": 0.3383, "step": 6090 }, { "epoch": 0.46098103211768604, "grad_norm": 0.8125, "learning_rate": 1.7521146089096187e-05, "loss": 0.3659, "step": 6091 }, { "epoch": 0.46105671444113333, "grad_norm": 0.8515625, "learning_rate": 1.752036107550607e-05, "loss": 0.3722, "step": 6092 }, { "epoch": 0.4611323967645807, "grad_norm": 0.7578125, "learning_rate": 1.7519575955226728e-05, "loss": 0.3281, "step": 6093 }, { "epoch": 0.461208079088028, "grad_norm": 0.734375, "learning_rate": 1.7518790728269294e-05, "loss": 0.2915, "step": 6094 }, { "epoch": 0.4612837614114753, "grad_norm": 0.75, "learning_rate": 1.7518005394644913e-05, "loss": 0.3065, "step": 6095 }, { "epoch": 0.46135944373492266, "grad_norm": 0.77734375, "learning_rate": 1.7517219954364725e-05, "loss": 0.3161, "step": 6096 }, { "epoch": 0.46143512605837, "grad_norm": 0.75390625, "learning_rate": 1.751643440743987e-05, "loss": 0.3241, "step": 6097 }, { "epoch": 0.4615108083818173, "grad_norm": 0.8515625, "learning_rate": 1.7515648753881495e-05, "loss": 0.3293, "step": 6098 }, { "epoch": 0.46158649070526464, "grad_norm": 0.828125, "learning_rate": 1.7514862993700744e-05, "loss": 0.3605, "step": 6099 }, { "epoch": 0.461662173028712, "grad_norm": 0.83984375, "learning_rate": 1.7514077126908767e-05, "loss": 0.3645, "step": 6100 }, { "epoch": 0.46173785535215933, "grad_norm": 0.83203125, "learning_rate": 1.7513291153516708e-05, "loss": 0.3329, "step": 6101 }, { "epoch": 0.4618135376756066, "grad_norm": 0.74609375, "learning_rate": 1.7512505073535723e-05, "loss": 0.3128, "step": 6102 }, { "epoch": 0.46188921999905397, "grad_norm": 0.796875, "learning_rate": 1.7511718886976964e-05, "loss": 0.3639, "step": 6103 }, { "epoch": 0.4619649023225013, "grad_norm": 0.71484375, "learning_rate": 1.751093259385158e-05, "loss": 0.2652, "step": 6104 }, { "epoch": 0.4620405846459486, "grad_norm": 0.69140625, "learning_rate": 1.751014619417073e-05, "loss": 0.2872, "step": 6105 }, { "epoch": 0.46211626696939595, "grad_norm": 1.171875, "learning_rate": 1.7509359687945564e-05, "loss": 0.3547, "step": 6106 }, { "epoch": 0.4621919492928433, "grad_norm": 0.8671875, "learning_rate": 1.750857307518725e-05, "loss": 0.3762, "step": 6107 }, { "epoch": 0.46226763161629064, "grad_norm": 0.78125, "learning_rate": 1.7507786355906934e-05, "loss": 0.3237, "step": 6108 }, { "epoch": 0.46234331393973793, "grad_norm": 0.7734375, "learning_rate": 1.750699953011579e-05, "loss": 0.3015, "step": 6109 }, { "epoch": 0.4624189962631853, "grad_norm": 0.85546875, "learning_rate": 1.7506212597824976e-05, "loss": 0.3582, "step": 6110 }, { "epoch": 0.4624946785866326, "grad_norm": 0.7109375, "learning_rate": 1.7505425559045654e-05, "loss": 0.2898, "step": 6111 }, { "epoch": 0.4625703609100799, "grad_norm": 0.734375, "learning_rate": 1.7504638413788987e-05, "loss": 0.287, "step": 6112 }, { "epoch": 0.46264604323352726, "grad_norm": 0.8671875, "learning_rate": 1.750385116206615e-05, "loss": 0.3893, "step": 6113 }, { "epoch": 0.4627217255569746, "grad_norm": 0.76953125, "learning_rate": 1.7503063803888308e-05, "loss": 0.3413, "step": 6114 }, { "epoch": 0.46279740788042195, "grad_norm": 0.76171875, "learning_rate": 1.7502276339266627e-05, "loss": 0.2996, "step": 6115 }, { "epoch": 0.46287309020386924, "grad_norm": 0.85546875, "learning_rate": 1.7501488768212285e-05, "loss": 0.3409, "step": 6116 }, { "epoch": 0.4629487725273166, "grad_norm": 0.83203125, "learning_rate": 1.750070109073645e-05, "loss": 0.3541, "step": 6117 }, { "epoch": 0.46302445485076393, "grad_norm": 0.80859375, "learning_rate": 1.74999133068503e-05, "loss": 0.3148, "step": 6118 }, { "epoch": 0.4631001371742112, "grad_norm": 0.76171875, "learning_rate": 1.7499125416565005e-05, "loss": 0.2768, "step": 6119 }, { "epoch": 0.46317581949765857, "grad_norm": 0.84375, "learning_rate": 1.749833741989175e-05, "loss": 0.3455, "step": 6120 }, { "epoch": 0.4632515018211059, "grad_norm": 0.7421875, "learning_rate": 1.749754931684171e-05, "loss": 0.2916, "step": 6121 }, { "epoch": 0.46332718414455326, "grad_norm": 0.79296875, "learning_rate": 1.7496761107426066e-05, "loss": 0.323, "step": 6122 }, { "epoch": 0.46340286646800055, "grad_norm": 0.8046875, "learning_rate": 1.7495972791656007e-05, "loss": 0.3372, "step": 6123 }, { "epoch": 0.4634785487914479, "grad_norm": 1.1640625, "learning_rate": 1.7495184369542705e-05, "loss": 0.3597, "step": 6124 }, { "epoch": 0.46355423111489524, "grad_norm": 0.79296875, "learning_rate": 1.749439584109735e-05, "loss": 0.3108, "step": 6125 }, { "epoch": 0.46362991343834253, "grad_norm": 0.796875, "learning_rate": 1.749360720633113e-05, "loss": 0.34, "step": 6126 }, { "epoch": 0.4637055957617899, "grad_norm": 0.83203125, "learning_rate": 1.7492818465255234e-05, "loss": 0.3621, "step": 6127 }, { "epoch": 0.4637812780852372, "grad_norm": 0.984375, "learning_rate": 1.749202961788085e-05, "loss": 0.3079, "step": 6128 }, { "epoch": 0.46385696040868457, "grad_norm": 0.8203125, "learning_rate": 1.7491240664219167e-05, "loss": 0.3457, "step": 6129 }, { "epoch": 0.46393264273213186, "grad_norm": 0.73046875, "learning_rate": 1.7490451604281383e-05, "loss": 0.3004, "step": 6130 }, { "epoch": 0.4640083250555792, "grad_norm": 0.77734375, "learning_rate": 1.7489662438078683e-05, "loss": 0.3142, "step": 6131 }, { "epoch": 0.46408400737902655, "grad_norm": 0.84765625, "learning_rate": 1.7488873165622274e-05, "loss": 0.3449, "step": 6132 }, { "epoch": 0.46415968970247384, "grad_norm": 0.80859375, "learning_rate": 1.7488083786923345e-05, "loss": 0.3358, "step": 6133 }, { "epoch": 0.4642353720259212, "grad_norm": 0.78125, "learning_rate": 1.7487294301993102e-05, "loss": 0.3325, "step": 6134 }, { "epoch": 0.46431105434936853, "grad_norm": 0.84765625, "learning_rate": 1.7486504710842735e-05, "loss": 0.3643, "step": 6135 }, { "epoch": 0.4643867366728159, "grad_norm": 0.78125, "learning_rate": 1.7485715013483452e-05, "loss": 0.3233, "step": 6136 }, { "epoch": 0.46446241899626317, "grad_norm": 0.7421875, "learning_rate": 1.748492520992646e-05, "loss": 0.3201, "step": 6137 }, { "epoch": 0.4645381013197105, "grad_norm": 0.80078125, "learning_rate": 1.7484135300182957e-05, "loss": 0.3382, "step": 6138 }, { "epoch": 0.46461378364315786, "grad_norm": 0.80078125, "learning_rate": 1.748334528426415e-05, "loss": 0.3533, "step": 6139 }, { "epoch": 0.46468946596660515, "grad_norm": 0.78125, "learning_rate": 1.748255516218125e-05, "loss": 0.3167, "step": 6140 }, { "epoch": 0.4647651482900525, "grad_norm": 0.69140625, "learning_rate": 1.7481764933945464e-05, "loss": 0.2485, "step": 6141 }, { "epoch": 0.46484083061349984, "grad_norm": 0.7734375, "learning_rate": 1.7480974599568002e-05, "loss": 0.3135, "step": 6142 }, { "epoch": 0.4649165129369472, "grad_norm": 0.734375, "learning_rate": 1.748018415906008e-05, "loss": 0.2722, "step": 6143 }, { "epoch": 0.4649921952603945, "grad_norm": 0.765625, "learning_rate": 1.747939361243291e-05, "loss": 0.3062, "step": 6144 }, { "epoch": 0.4650678775838418, "grad_norm": 0.8203125, "learning_rate": 1.7478602959697702e-05, "loss": 0.3635, "step": 6145 }, { "epoch": 0.4651435599072892, "grad_norm": 1.203125, "learning_rate": 1.747781220086568e-05, "loss": 0.3475, "step": 6146 }, { "epoch": 0.46521924223073646, "grad_norm": 0.7578125, "learning_rate": 1.747702133594806e-05, "loss": 0.3144, "step": 6147 }, { "epoch": 0.4652949245541838, "grad_norm": 0.7890625, "learning_rate": 1.747623036495606e-05, "loss": 0.3454, "step": 6148 }, { "epoch": 0.46537060687763115, "grad_norm": 0.76171875, "learning_rate": 1.7475439287900907e-05, "loss": 0.306, "step": 6149 }, { "epoch": 0.4654462892010785, "grad_norm": 0.796875, "learning_rate": 1.7474648104793813e-05, "loss": 0.3427, "step": 6150 }, { "epoch": 0.4655219715245258, "grad_norm": 0.82421875, "learning_rate": 1.7473856815646015e-05, "loss": 0.3298, "step": 6151 }, { "epoch": 0.46559765384797314, "grad_norm": 0.79296875, "learning_rate": 1.7473065420468725e-05, "loss": 0.3591, "step": 6152 }, { "epoch": 0.4656733361714205, "grad_norm": 0.7265625, "learning_rate": 1.7472273919273184e-05, "loss": 0.2925, "step": 6153 }, { "epoch": 0.4657490184948678, "grad_norm": 0.75390625, "learning_rate": 1.7471482312070612e-05, "loss": 0.3324, "step": 6154 }, { "epoch": 0.4658247008183151, "grad_norm": 0.9765625, "learning_rate": 1.747069059887224e-05, "loss": 0.3743, "step": 6155 }, { "epoch": 0.46590038314176246, "grad_norm": 0.83203125, "learning_rate": 1.7469898779689306e-05, "loss": 0.3397, "step": 6156 }, { "epoch": 0.46597606546520975, "grad_norm": 1.28125, "learning_rate": 1.746910685453304e-05, "loss": 0.393, "step": 6157 }, { "epoch": 0.4660517477886571, "grad_norm": 0.7421875, "learning_rate": 1.746831482341467e-05, "loss": 0.2998, "step": 6158 }, { "epoch": 0.46612743011210445, "grad_norm": 0.8125, "learning_rate": 1.746752268634544e-05, "loss": 0.3673, "step": 6159 }, { "epoch": 0.4662031124355518, "grad_norm": 0.703125, "learning_rate": 1.7466730443336586e-05, "loss": 0.278, "step": 6160 }, { "epoch": 0.4662787947589991, "grad_norm": 0.7734375, "learning_rate": 1.746593809439935e-05, "loss": 0.3222, "step": 6161 }, { "epoch": 0.46635447708244643, "grad_norm": 0.8125, "learning_rate": 1.746514563954497e-05, "loss": 0.3855, "step": 6162 }, { "epoch": 0.4664301594058938, "grad_norm": 0.76171875, "learning_rate": 1.7464353078784686e-05, "loss": 0.3082, "step": 6163 }, { "epoch": 0.46650584172934106, "grad_norm": 0.85546875, "learning_rate": 1.7463560412129746e-05, "loss": 0.3542, "step": 6164 }, { "epoch": 0.4665815240527884, "grad_norm": 0.734375, "learning_rate": 1.746276763959139e-05, "loss": 0.3126, "step": 6165 }, { "epoch": 0.46665720637623576, "grad_norm": 1.0546875, "learning_rate": 1.7461974761180875e-05, "loss": 0.3646, "step": 6166 }, { "epoch": 0.4667328886996831, "grad_norm": 0.8203125, "learning_rate": 1.7461181776909437e-05, "loss": 0.3608, "step": 6167 }, { "epoch": 0.4668085710231304, "grad_norm": 0.8046875, "learning_rate": 1.7460388686788334e-05, "loss": 0.3303, "step": 6168 }, { "epoch": 0.46688425334657774, "grad_norm": 0.78125, "learning_rate": 1.7459595490828814e-05, "loss": 0.3232, "step": 6169 }, { "epoch": 0.4669599356700251, "grad_norm": 0.75, "learning_rate": 1.7458802189042133e-05, "loss": 0.2964, "step": 6170 }, { "epoch": 0.4670356179934724, "grad_norm": 0.734375, "learning_rate": 1.7458008781439542e-05, "loss": 0.2548, "step": 6171 }, { "epoch": 0.4671113003169197, "grad_norm": 0.74609375, "learning_rate": 1.7457215268032298e-05, "loss": 0.3133, "step": 6172 }, { "epoch": 0.46718698264036707, "grad_norm": 0.80859375, "learning_rate": 1.7456421648831658e-05, "loss": 0.3076, "step": 6173 }, { "epoch": 0.4672626649638144, "grad_norm": 0.859375, "learning_rate": 1.745562792384888e-05, "loss": 0.3947, "step": 6174 }, { "epoch": 0.4673383472872617, "grad_norm": 0.8125, "learning_rate": 1.745483409309523e-05, "loss": 0.346, "step": 6175 }, { "epoch": 0.46741402961070905, "grad_norm": 0.84765625, "learning_rate": 1.745404015658196e-05, "loss": 0.3602, "step": 6176 }, { "epoch": 0.4674897119341564, "grad_norm": 0.75, "learning_rate": 1.745324611432034e-05, "loss": 0.2902, "step": 6177 }, { "epoch": 0.4675653942576037, "grad_norm": 0.78125, "learning_rate": 1.7452451966321636e-05, "loss": 0.2547, "step": 6178 }, { "epoch": 0.46764107658105103, "grad_norm": 0.8359375, "learning_rate": 1.7451657712597116e-05, "loss": 0.3545, "step": 6179 }, { "epoch": 0.4677167589044984, "grad_norm": 0.77734375, "learning_rate": 1.7450863353158036e-05, "loss": 0.2996, "step": 6180 }, { "epoch": 0.4677924412279457, "grad_norm": 1.203125, "learning_rate": 1.7450068888015678e-05, "loss": 0.3504, "step": 6181 }, { "epoch": 0.467868123551393, "grad_norm": 0.84765625, "learning_rate": 1.7449274317181304e-05, "loss": 0.3429, "step": 6182 }, { "epoch": 0.46794380587484036, "grad_norm": 0.8203125, "learning_rate": 1.7448479640666194e-05, "loss": 0.3534, "step": 6183 }, { "epoch": 0.4680194881982877, "grad_norm": 0.7578125, "learning_rate": 1.7447684858481616e-05, "loss": 0.2951, "step": 6184 }, { "epoch": 0.468095170521735, "grad_norm": 0.7421875, "learning_rate": 1.744688997063885e-05, "loss": 0.3105, "step": 6185 }, { "epoch": 0.46817085284518234, "grad_norm": 0.7734375, "learning_rate": 1.7446094977149167e-05, "loss": 0.3199, "step": 6186 }, { "epoch": 0.4682465351686297, "grad_norm": 0.703125, "learning_rate": 1.744529987802385e-05, "loss": 0.2705, "step": 6187 }, { "epoch": 0.46832221749207703, "grad_norm": 0.8203125, "learning_rate": 1.744450467327418e-05, "loss": 0.3616, "step": 6188 }, { "epoch": 0.4683978998155243, "grad_norm": 0.8125, "learning_rate": 1.7443709362911434e-05, "loss": 0.3333, "step": 6189 }, { "epoch": 0.46847358213897167, "grad_norm": 0.83984375, "learning_rate": 1.74429139469469e-05, "loss": 0.3638, "step": 6190 }, { "epoch": 0.468549264462419, "grad_norm": 0.75, "learning_rate": 1.7442118425391855e-05, "loss": 0.3182, "step": 6191 }, { "epoch": 0.4686249467858663, "grad_norm": 0.78125, "learning_rate": 1.744132279825759e-05, "loss": 0.3371, "step": 6192 }, { "epoch": 0.46870062910931365, "grad_norm": 1.0234375, "learning_rate": 1.7440527065555394e-05, "loss": 0.3646, "step": 6193 }, { "epoch": 0.468776311432761, "grad_norm": 0.6875, "learning_rate": 1.743973122729655e-05, "loss": 0.2756, "step": 6194 }, { "epoch": 0.46885199375620834, "grad_norm": 0.75390625, "learning_rate": 1.743893528349235e-05, "loss": 0.2935, "step": 6195 }, { "epoch": 0.46892767607965563, "grad_norm": 0.80859375, "learning_rate": 1.743813923415409e-05, "loss": 0.3255, "step": 6196 }, { "epoch": 0.469003358403103, "grad_norm": 0.7265625, "learning_rate": 1.7437343079293062e-05, "loss": 0.2829, "step": 6197 }, { "epoch": 0.4690790407265503, "grad_norm": 0.7890625, "learning_rate": 1.743654681892056e-05, "loss": 0.3204, "step": 6198 }, { "epoch": 0.4691547230499976, "grad_norm": 0.76953125, "learning_rate": 1.7435750453047877e-05, "loss": 0.3025, "step": 6199 }, { "epoch": 0.46923040537344496, "grad_norm": 0.83203125, "learning_rate": 1.7434953981686313e-05, "loss": 0.3552, "step": 6200 }, { "epoch": 0.4693060876968923, "grad_norm": 0.734375, "learning_rate": 1.7434157404847172e-05, "loss": 0.2843, "step": 6201 }, { "epoch": 0.46938177002033965, "grad_norm": 0.80078125, "learning_rate": 1.743336072254175e-05, "loss": 0.346, "step": 6202 }, { "epoch": 0.46945745234378694, "grad_norm": 0.74609375, "learning_rate": 1.7432563934781344e-05, "loss": 0.3197, "step": 6203 }, { "epoch": 0.4695331346672343, "grad_norm": 0.71875, "learning_rate": 1.743176704157727e-05, "loss": 0.2838, "step": 6204 }, { "epoch": 0.46960881699068163, "grad_norm": 0.8359375, "learning_rate": 1.7430970042940827e-05, "loss": 0.3344, "step": 6205 }, { "epoch": 0.4696844993141289, "grad_norm": 0.74609375, "learning_rate": 1.7430172938883318e-05, "loss": 0.2867, "step": 6206 }, { "epoch": 0.46976018163757627, "grad_norm": 0.77734375, "learning_rate": 1.7429375729416057e-05, "loss": 0.3131, "step": 6207 }, { "epoch": 0.4698358639610236, "grad_norm": 3.953125, "learning_rate": 1.7428578414550353e-05, "loss": 0.4277, "step": 6208 }, { "epoch": 0.46991154628447096, "grad_norm": 0.84375, "learning_rate": 1.7427780994297517e-05, "loss": 0.356, "step": 6209 }, { "epoch": 0.46998722860791825, "grad_norm": 0.703125, "learning_rate": 1.742698346866886e-05, "loss": 0.2687, "step": 6210 }, { "epoch": 0.4700629109313656, "grad_norm": 0.81640625, "learning_rate": 1.7426185837675698e-05, "loss": 0.3004, "step": 6211 }, { "epoch": 0.47013859325481294, "grad_norm": 0.80859375, "learning_rate": 1.7425388101329347e-05, "loss": 0.3428, "step": 6212 }, { "epoch": 0.47021427557826023, "grad_norm": 0.83203125, "learning_rate": 1.742459025964112e-05, "loss": 0.3507, "step": 6213 }, { "epoch": 0.4702899579017076, "grad_norm": 1.0546875, "learning_rate": 1.742379231262234e-05, "loss": 0.3749, "step": 6214 }, { "epoch": 0.4703656402251549, "grad_norm": 0.80859375, "learning_rate": 1.742299426028433e-05, "loss": 0.3578, "step": 6215 }, { "epoch": 0.47044132254860227, "grad_norm": 0.69921875, "learning_rate": 1.7422196102638404e-05, "loss": 0.2806, "step": 6216 }, { "epoch": 0.47051700487204956, "grad_norm": 0.77734375, "learning_rate": 1.7421397839695894e-05, "loss": 0.322, "step": 6217 }, { "epoch": 0.4705926871954969, "grad_norm": 0.8046875, "learning_rate": 1.7420599471468114e-05, "loss": 0.304, "step": 6218 }, { "epoch": 0.47066836951894425, "grad_norm": 0.7890625, "learning_rate": 1.7419800997966397e-05, "loss": 0.3334, "step": 6219 }, { "epoch": 0.47074405184239154, "grad_norm": 0.85546875, "learning_rate": 1.7419002419202077e-05, "loss": 0.3888, "step": 6220 }, { "epoch": 0.4708197341658389, "grad_norm": 0.80859375, "learning_rate": 1.741820373518647e-05, "loss": 0.3518, "step": 6221 }, { "epoch": 0.47089541648928623, "grad_norm": 0.875, "learning_rate": 1.7417404945930916e-05, "loss": 0.3531, "step": 6222 }, { "epoch": 0.4709710988127335, "grad_norm": 0.8125, "learning_rate": 1.741660605144674e-05, "loss": 0.3534, "step": 6223 }, { "epoch": 0.47104678113618087, "grad_norm": 0.875, "learning_rate": 1.7415807051745283e-05, "loss": 0.3412, "step": 6224 }, { "epoch": 0.4711224634596282, "grad_norm": 0.8203125, "learning_rate": 1.7415007946837875e-05, "loss": 0.368, "step": 6225 }, { "epoch": 0.47119814578307556, "grad_norm": 0.72265625, "learning_rate": 1.741420873673586e-05, "loss": 0.2836, "step": 6226 }, { "epoch": 0.47127382810652285, "grad_norm": 0.81640625, "learning_rate": 1.7413409421450563e-05, "loss": 0.3846, "step": 6227 }, { "epoch": 0.4713495104299702, "grad_norm": 0.7890625, "learning_rate": 1.7412610000993337e-05, "loss": 0.3407, "step": 6228 }, { "epoch": 0.47142519275341754, "grad_norm": 0.828125, "learning_rate": 1.741181047537552e-05, "loss": 0.3213, "step": 6229 }, { "epoch": 0.47150087507686483, "grad_norm": 0.8515625, "learning_rate": 1.7411010844608448e-05, "loss": 0.32, "step": 6230 }, { "epoch": 0.4715765574003122, "grad_norm": 0.78515625, "learning_rate": 1.7410211108703467e-05, "loss": 0.3185, "step": 6231 }, { "epoch": 0.4716522397237595, "grad_norm": 0.84765625, "learning_rate": 1.740941126767193e-05, "loss": 0.3676, "step": 6232 }, { "epoch": 0.47172792204720687, "grad_norm": 0.80078125, "learning_rate": 1.7408611321525177e-05, "loss": 0.3401, "step": 6233 }, { "epoch": 0.47180360437065416, "grad_norm": 0.8125, "learning_rate": 1.740781127027456e-05, "loss": 0.3746, "step": 6234 }, { "epoch": 0.4718792866941015, "grad_norm": 0.7734375, "learning_rate": 1.7407011113931425e-05, "loss": 0.3183, "step": 6235 }, { "epoch": 0.47195496901754885, "grad_norm": 0.734375, "learning_rate": 1.7406210852507128e-05, "loss": 0.2754, "step": 6236 }, { "epoch": 0.47203065134099614, "grad_norm": 0.77734375, "learning_rate": 1.7405410486013023e-05, "loss": 0.3155, "step": 6237 }, { "epoch": 0.4721063336644435, "grad_norm": 0.8125, "learning_rate": 1.740461001446046e-05, "loss": 0.3554, "step": 6238 }, { "epoch": 0.47218201598789084, "grad_norm": 0.8125, "learning_rate": 1.7403809437860796e-05, "loss": 0.3355, "step": 6239 }, { "epoch": 0.4722576983113382, "grad_norm": 0.81640625, "learning_rate": 1.740300875622539e-05, "loss": 0.361, "step": 6240 }, { "epoch": 0.47233338063478547, "grad_norm": 0.84375, "learning_rate": 1.7402207969565604e-05, "loss": 0.3751, "step": 6241 }, { "epoch": 0.4724090629582328, "grad_norm": 0.8359375, "learning_rate": 1.7401407077892792e-05, "loss": 0.3641, "step": 6242 }, { "epoch": 0.47248474528168016, "grad_norm": 0.90625, "learning_rate": 1.7400606081218317e-05, "loss": 0.3562, "step": 6243 }, { "epoch": 0.47256042760512745, "grad_norm": 0.8125, "learning_rate": 1.7399804979553552e-05, "loss": 0.3476, "step": 6244 }, { "epoch": 0.4726361099285748, "grad_norm": 0.78515625, "learning_rate": 1.7399003772909852e-05, "loss": 0.3325, "step": 6245 }, { "epoch": 0.47271179225202214, "grad_norm": 0.71875, "learning_rate": 1.7398202461298583e-05, "loss": 0.2857, "step": 6246 }, { "epoch": 0.4727874745754695, "grad_norm": 0.8828125, "learning_rate": 1.739740104473112e-05, "loss": 0.3491, "step": 6247 }, { "epoch": 0.4728631568989168, "grad_norm": 0.8515625, "learning_rate": 1.739659952321883e-05, "loss": 0.3583, "step": 6248 }, { "epoch": 0.4729388392223641, "grad_norm": 1.28125, "learning_rate": 1.739579789677308e-05, "loss": 0.3719, "step": 6249 }, { "epoch": 0.4730145215458115, "grad_norm": 0.796875, "learning_rate": 1.7394996165405244e-05, "loss": 0.3567, "step": 6250 }, { "epoch": 0.47309020386925876, "grad_norm": 0.9296875, "learning_rate": 1.73941943291267e-05, "loss": 0.3571, "step": 6251 }, { "epoch": 0.4731658861927061, "grad_norm": 0.73046875, "learning_rate": 1.7393392387948818e-05, "loss": 0.3025, "step": 6252 }, { "epoch": 0.47324156851615345, "grad_norm": 0.74609375, "learning_rate": 1.7392590341882978e-05, "loss": 0.2937, "step": 6253 }, { "epoch": 0.4733172508396008, "grad_norm": 0.8203125, "learning_rate": 1.739178819094056e-05, "loss": 0.3742, "step": 6254 }, { "epoch": 0.4733929331630481, "grad_norm": 0.82421875, "learning_rate": 1.739098593513294e-05, "loss": 0.3742, "step": 6255 }, { "epoch": 0.47346861548649544, "grad_norm": 0.84765625, "learning_rate": 1.7390183574471502e-05, "loss": 0.3508, "step": 6256 }, { "epoch": 0.4735442978099428, "grad_norm": 0.7265625, "learning_rate": 1.7389381108967625e-05, "loss": 0.2857, "step": 6257 }, { "epoch": 0.4736199801333901, "grad_norm": 0.73046875, "learning_rate": 1.7388578538632705e-05, "loss": 0.308, "step": 6258 }, { "epoch": 0.4736956624568374, "grad_norm": 0.77734375, "learning_rate": 1.738777586347811e-05, "loss": 0.3381, "step": 6259 }, { "epoch": 0.47377134478028476, "grad_norm": 0.78515625, "learning_rate": 1.738697308351524e-05, "loss": 0.3419, "step": 6260 }, { "epoch": 0.4738470271037321, "grad_norm": 0.734375, "learning_rate": 1.7386170198755482e-05, "loss": 0.2977, "step": 6261 }, { "epoch": 0.4739227094271794, "grad_norm": 0.7734375, "learning_rate": 1.738536720921022e-05, "loss": 0.334, "step": 6262 }, { "epoch": 0.47399839175062675, "grad_norm": 0.78125, "learning_rate": 1.7384564114890855e-05, "loss": 0.3188, "step": 6263 }, { "epoch": 0.4740740740740741, "grad_norm": 0.74609375, "learning_rate": 1.7383760915808772e-05, "loss": 0.3, "step": 6264 }, { "epoch": 0.4741497563975214, "grad_norm": 0.8203125, "learning_rate": 1.738295761197537e-05, "loss": 0.34, "step": 6265 }, { "epoch": 0.47422543872096873, "grad_norm": 0.76953125, "learning_rate": 1.738215420340205e-05, "loss": 0.2875, "step": 6266 }, { "epoch": 0.4743011210444161, "grad_norm": 0.76953125, "learning_rate": 1.7381350690100198e-05, "loss": 0.318, "step": 6267 }, { "epoch": 0.4743768033678634, "grad_norm": 0.8203125, "learning_rate": 1.7380547072081222e-05, "loss": 0.3317, "step": 6268 }, { "epoch": 0.4744524856913107, "grad_norm": 0.79296875, "learning_rate": 1.7379743349356523e-05, "loss": 0.3766, "step": 6269 }, { "epoch": 0.47452816801475806, "grad_norm": 0.80078125, "learning_rate": 1.7378939521937495e-05, "loss": 0.3139, "step": 6270 }, { "epoch": 0.4746038503382054, "grad_norm": 0.796875, "learning_rate": 1.737813558983555e-05, "loss": 0.3501, "step": 6271 }, { "epoch": 0.4746795326616527, "grad_norm": 0.79296875, "learning_rate": 1.7377331553062088e-05, "loss": 0.3192, "step": 6272 }, { "epoch": 0.47475521498510004, "grad_norm": 0.8515625, "learning_rate": 1.7376527411628525e-05, "loss": 0.3859, "step": 6273 }, { "epoch": 0.4748308973085474, "grad_norm": 0.8203125, "learning_rate": 1.7375723165546254e-05, "loss": 0.331, "step": 6274 }, { "epoch": 0.47490657963199473, "grad_norm": 0.75, "learning_rate": 1.7374918814826696e-05, "loss": 0.2927, "step": 6275 }, { "epoch": 0.474982261955442, "grad_norm": 0.71875, "learning_rate": 1.737411435948126e-05, "loss": 0.2855, "step": 6276 }, { "epoch": 0.47505794427888937, "grad_norm": 0.73046875, "learning_rate": 1.7373309799521355e-05, "loss": 0.2861, "step": 6277 }, { "epoch": 0.4751336266023367, "grad_norm": 0.78515625, "learning_rate": 1.73725051349584e-05, "loss": 0.3142, "step": 6278 }, { "epoch": 0.475209308925784, "grad_norm": 0.77734375, "learning_rate": 1.7371700365803808e-05, "loss": 0.3292, "step": 6279 }, { "epoch": 0.47528499124923135, "grad_norm": 0.7265625, "learning_rate": 1.7370895492068994e-05, "loss": 0.3074, "step": 6280 }, { "epoch": 0.4753606735726787, "grad_norm": 0.73046875, "learning_rate": 1.737009051376538e-05, "loss": 0.281, "step": 6281 }, { "epoch": 0.475436355896126, "grad_norm": 0.82421875, "learning_rate": 1.7369285430904382e-05, "loss": 0.3611, "step": 6282 }, { "epoch": 0.47551203821957333, "grad_norm": 0.76171875, "learning_rate": 1.7368480243497425e-05, "loss": 0.3395, "step": 6283 }, { "epoch": 0.4755877205430207, "grad_norm": 0.85546875, "learning_rate": 1.7367674951555934e-05, "loss": 0.3464, "step": 6284 }, { "epoch": 0.475663402866468, "grad_norm": 0.86328125, "learning_rate": 1.736686955509133e-05, "loss": 0.3591, "step": 6285 }, { "epoch": 0.4757390851899153, "grad_norm": 0.78125, "learning_rate": 1.736606405411504e-05, "loss": 0.3131, "step": 6286 }, { "epoch": 0.47581476751336266, "grad_norm": 0.85546875, "learning_rate": 1.7365258448638485e-05, "loss": 0.3532, "step": 6287 }, { "epoch": 0.47589044983681, "grad_norm": 1.1328125, "learning_rate": 1.7364452738673105e-05, "loss": 0.3581, "step": 6288 }, { "epoch": 0.4759661321602573, "grad_norm": 0.77734375, "learning_rate": 1.736364692423032e-05, "loss": 0.3374, "step": 6289 }, { "epoch": 0.47604181448370464, "grad_norm": 0.80078125, "learning_rate": 1.736284100532157e-05, "loss": 0.3402, "step": 6290 }, { "epoch": 0.476117496807152, "grad_norm": 0.7265625, "learning_rate": 1.736203498195829e-05, "loss": 0.2877, "step": 6291 }, { "epoch": 0.47619317913059933, "grad_norm": 0.734375, "learning_rate": 1.7361228854151902e-05, "loss": 0.2877, "step": 6292 }, { "epoch": 0.4762688614540466, "grad_norm": 0.8203125, "learning_rate": 1.7360422621913854e-05, "loss": 0.368, "step": 6293 }, { "epoch": 0.47634454377749397, "grad_norm": 0.82421875, "learning_rate": 1.735961628525558e-05, "loss": 0.3705, "step": 6294 }, { "epoch": 0.4764202261009413, "grad_norm": 0.76953125, "learning_rate": 1.735880984418852e-05, "loss": 0.3086, "step": 6295 }, { "epoch": 0.4764959084243886, "grad_norm": 0.75, "learning_rate": 1.7358003298724116e-05, "loss": 0.304, "step": 6296 }, { "epoch": 0.47657159074783595, "grad_norm": 0.7734375, "learning_rate": 1.7357196648873804e-05, "loss": 0.3238, "step": 6297 }, { "epoch": 0.4766472730712833, "grad_norm": 0.80859375, "learning_rate": 1.735638989464904e-05, "loss": 0.3586, "step": 6298 }, { "epoch": 0.47672295539473064, "grad_norm": 0.76171875, "learning_rate": 1.735558303606125e-05, "loss": 0.3439, "step": 6299 }, { "epoch": 0.47679863771817793, "grad_norm": 0.78125, "learning_rate": 1.73547760731219e-05, "loss": 0.325, "step": 6300 }, { "epoch": 0.4768743200416253, "grad_norm": 0.73828125, "learning_rate": 1.7353969005842425e-05, "loss": 0.3079, "step": 6301 }, { "epoch": 0.4769500023650726, "grad_norm": 0.79296875, "learning_rate": 1.7353161834234288e-05, "loss": 0.3272, "step": 6302 }, { "epoch": 0.4770256846885199, "grad_norm": 0.7265625, "learning_rate": 1.7352354558308924e-05, "loss": 0.2839, "step": 6303 }, { "epoch": 0.47710136701196726, "grad_norm": 0.8125, "learning_rate": 1.7351547178077798e-05, "loss": 0.3564, "step": 6304 }, { "epoch": 0.4771770493354146, "grad_norm": 0.765625, "learning_rate": 1.7350739693552356e-05, "loss": 0.3028, "step": 6305 }, { "epoch": 0.47725273165886195, "grad_norm": 0.81640625, "learning_rate": 1.734993210474406e-05, "loss": 0.3254, "step": 6306 }, { "epoch": 0.47732841398230924, "grad_norm": 0.796875, "learning_rate": 1.734912441166436e-05, "loss": 0.3297, "step": 6307 }, { "epoch": 0.4774040963057566, "grad_norm": 0.8046875, "learning_rate": 1.7348316614324724e-05, "loss": 0.3379, "step": 6308 }, { "epoch": 0.47747977862920393, "grad_norm": 0.7890625, "learning_rate": 1.7347508712736603e-05, "loss": 0.3378, "step": 6309 }, { "epoch": 0.4775554609526512, "grad_norm": 0.81640625, "learning_rate": 1.7346700706911466e-05, "loss": 0.3449, "step": 6310 }, { "epoch": 0.47763114327609857, "grad_norm": 0.703125, "learning_rate": 1.734589259686077e-05, "loss": 0.2494, "step": 6311 }, { "epoch": 0.4777068255995459, "grad_norm": 0.80078125, "learning_rate": 1.734508438259598e-05, "loss": 0.3303, "step": 6312 }, { "epoch": 0.47778250792299326, "grad_norm": 1.015625, "learning_rate": 1.7344276064128567e-05, "loss": 0.3643, "step": 6313 }, { "epoch": 0.47785819024644055, "grad_norm": 0.84765625, "learning_rate": 1.734346764146999e-05, "loss": 0.3282, "step": 6314 }, { "epoch": 0.4779338725698879, "grad_norm": 0.75, "learning_rate": 1.7342659114631727e-05, "loss": 0.3102, "step": 6315 }, { "epoch": 0.47800955489333524, "grad_norm": 0.76953125, "learning_rate": 1.7341850483625242e-05, "loss": 0.3277, "step": 6316 }, { "epoch": 0.47808523721678253, "grad_norm": 0.81640625, "learning_rate": 1.734104174846201e-05, "loss": 0.3569, "step": 6317 }, { "epoch": 0.4781609195402299, "grad_norm": 0.88671875, "learning_rate": 1.73402329091535e-05, "loss": 0.3122, "step": 6318 }, { "epoch": 0.4782366018636772, "grad_norm": 0.84375, "learning_rate": 1.7339423965711193e-05, "loss": 0.3601, "step": 6319 }, { "epoch": 0.47831228418712457, "grad_norm": 0.859375, "learning_rate": 1.7338614918146563e-05, "loss": 0.3696, "step": 6320 }, { "epoch": 0.47838796651057186, "grad_norm": 0.83984375, "learning_rate": 1.7337805766471084e-05, "loss": 0.3645, "step": 6321 }, { "epoch": 0.4784636488340192, "grad_norm": 0.81640625, "learning_rate": 1.7336996510696238e-05, "loss": 0.3518, "step": 6322 }, { "epoch": 0.47853933115746655, "grad_norm": 0.78125, "learning_rate": 1.7336187150833508e-05, "loss": 0.3292, "step": 6323 }, { "epoch": 0.47861501348091384, "grad_norm": 0.8046875, "learning_rate": 1.7335377686894373e-05, "loss": 0.3331, "step": 6324 }, { "epoch": 0.4786906958043612, "grad_norm": 0.89453125, "learning_rate": 1.7334568118890318e-05, "loss": 0.3443, "step": 6325 }, { "epoch": 0.47876637812780853, "grad_norm": 0.75390625, "learning_rate": 1.7333758446832826e-05, "loss": 0.3142, "step": 6326 }, { "epoch": 0.4788420604512559, "grad_norm": 0.70703125, "learning_rate": 1.7332948670733386e-05, "loss": 0.2825, "step": 6327 }, { "epoch": 0.47891774277470317, "grad_norm": 0.73046875, "learning_rate": 1.7332138790603487e-05, "loss": 0.2824, "step": 6328 }, { "epoch": 0.4789934250981505, "grad_norm": 0.78125, "learning_rate": 1.7331328806454615e-05, "loss": 0.2842, "step": 6329 }, { "epoch": 0.47906910742159786, "grad_norm": 0.77734375, "learning_rate": 1.7330518718298263e-05, "loss": 0.3489, "step": 6330 }, { "epoch": 0.47914478974504515, "grad_norm": 0.76171875, "learning_rate": 1.7329708526145926e-05, "loss": 0.3182, "step": 6331 }, { "epoch": 0.4792204720684925, "grad_norm": 0.6796875, "learning_rate": 1.7328898230009094e-05, "loss": 0.2346, "step": 6332 }, { "epoch": 0.47929615439193984, "grad_norm": 0.87109375, "learning_rate": 1.7328087829899262e-05, "loss": 0.3257, "step": 6333 }, { "epoch": 0.4793718367153872, "grad_norm": 0.7578125, "learning_rate": 1.732727732582793e-05, "loss": 0.277, "step": 6334 }, { "epoch": 0.4794475190388345, "grad_norm": 0.8203125, "learning_rate": 1.7326466717806597e-05, "loss": 0.3603, "step": 6335 }, { "epoch": 0.4795232013622818, "grad_norm": 0.75390625, "learning_rate": 1.7325656005846755e-05, "loss": 0.3087, "step": 6336 }, { "epoch": 0.47959888368572917, "grad_norm": 0.7109375, "learning_rate": 1.732484518995992e-05, "loss": 0.2599, "step": 6337 }, { "epoch": 0.47967456600917646, "grad_norm": 0.80859375, "learning_rate": 1.732403427015758e-05, "loss": 0.3508, "step": 6338 }, { "epoch": 0.4797502483326238, "grad_norm": 0.78125, "learning_rate": 1.7323223246451246e-05, "loss": 0.3269, "step": 6339 }, { "epoch": 0.47982593065607115, "grad_norm": 0.7890625, "learning_rate": 1.7322412118852422e-05, "loss": 0.3357, "step": 6340 }, { "epoch": 0.47990161297951844, "grad_norm": 0.7578125, "learning_rate": 1.732160088737262e-05, "loss": 0.3241, "step": 6341 }, { "epoch": 0.4799772953029658, "grad_norm": 0.7890625, "learning_rate": 1.7320789552023344e-05, "loss": 0.3266, "step": 6342 }, { "epoch": 0.48005297762641314, "grad_norm": 0.78515625, "learning_rate": 1.73199781128161e-05, "loss": 0.324, "step": 6343 }, { "epoch": 0.4801286599498605, "grad_norm": 0.69921875, "learning_rate": 1.7319166569762416e-05, "loss": 0.2712, "step": 6344 }, { "epoch": 0.48020434227330777, "grad_norm": 0.8046875, "learning_rate": 1.7318354922873785e-05, "loss": 0.3333, "step": 6345 }, { "epoch": 0.4802800245967551, "grad_norm": 0.86328125, "learning_rate": 1.7317543172161736e-05, "loss": 0.3139, "step": 6346 }, { "epoch": 0.48035570692020246, "grad_norm": 0.78125, "learning_rate": 1.7316731317637777e-05, "loss": 0.327, "step": 6347 }, { "epoch": 0.48043138924364975, "grad_norm": 0.7734375, "learning_rate": 1.7315919359313426e-05, "loss": 0.3198, "step": 6348 }, { "epoch": 0.4805070715670971, "grad_norm": 0.80078125, "learning_rate": 1.7315107297200207e-05, "loss": 0.3449, "step": 6349 }, { "epoch": 0.48058275389054445, "grad_norm": 0.8125, "learning_rate": 1.731429513130964e-05, "loss": 0.3664, "step": 6350 }, { "epoch": 0.4806584362139918, "grad_norm": 0.73828125, "learning_rate": 1.731348286165324e-05, "loss": 0.2962, "step": 6351 }, { "epoch": 0.4807341185374391, "grad_norm": 0.7734375, "learning_rate": 1.731267048824254e-05, "loss": 0.3142, "step": 6352 }, { "epoch": 0.4808098008608864, "grad_norm": 1.25, "learning_rate": 1.7311858011089055e-05, "loss": 0.3991, "step": 6353 }, { "epoch": 0.4808854831843338, "grad_norm": 0.85546875, "learning_rate": 1.731104543020432e-05, "loss": 0.3885, "step": 6354 }, { "epoch": 0.48096116550778106, "grad_norm": 0.75, "learning_rate": 1.731023274559986e-05, "loss": 0.2913, "step": 6355 }, { "epoch": 0.4810368478312284, "grad_norm": 0.79296875, "learning_rate": 1.73094199572872e-05, "loss": 0.3452, "step": 6356 }, { "epoch": 0.48111253015467575, "grad_norm": 0.71484375, "learning_rate": 1.730860706527788e-05, "loss": 0.2783, "step": 6357 }, { "epoch": 0.4811882124781231, "grad_norm": 0.80078125, "learning_rate": 1.7307794069583422e-05, "loss": 0.3199, "step": 6358 }, { "epoch": 0.4812638948015704, "grad_norm": 0.9296875, "learning_rate": 1.7306980970215367e-05, "loss": 0.2932, "step": 6359 }, { "epoch": 0.48133957712501774, "grad_norm": 0.84765625, "learning_rate": 1.7306167767185245e-05, "loss": 0.3714, "step": 6360 }, { "epoch": 0.4814152594484651, "grad_norm": 0.76953125, "learning_rate": 1.73053544605046e-05, "loss": 0.326, "step": 6361 }, { "epoch": 0.4814909417719124, "grad_norm": 0.80078125, "learning_rate": 1.7304541050184963e-05, "loss": 0.3593, "step": 6362 }, { "epoch": 0.4815666240953597, "grad_norm": 0.828125, "learning_rate": 1.7303727536237876e-05, "loss": 0.3613, "step": 6363 }, { "epoch": 0.48164230641880706, "grad_norm": 0.921875, "learning_rate": 1.730291391867488e-05, "loss": 0.4119, "step": 6364 }, { "epoch": 0.4817179887422544, "grad_norm": 0.78515625, "learning_rate": 1.730210019750752e-05, "loss": 0.3382, "step": 6365 }, { "epoch": 0.4817936710657017, "grad_norm": 0.8203125, "learning_rate": 1.7301286372747336e-05, "loss": 0.3296, "step": 6366 }, { "epoch": 0.48186935338914905, "grad_norm": 0.7890625, "learning_rate": 1.7300472444405873e-05, "loss": 0.3789, "step": 6367 }, { "epoch": 0.4819450357125964, "grad_norm": 0.75, "learning_rate": 1.7299658412494686e-05, "loss": 0.2982, "step": 6368 }, { "epoch": 0.4820207180360437, "grad_norm": 0.80078125, "learning_rate": 1.7298844277025315e-05, "loss": 0.3277, "step": 6369 }, { "epoch": 0.48209640035949103, "grad_norm": 0.90625, "learning_rate": 1.7298030038009314e-05, "loss": 0.4042, "step": 6370 }, { "epoch": 0.4821720826829384, "grad_norm": 0.79296875, "learning_rate": 1.7297215695458226e-05, "loss": 0.3459, "step": 6371 }, { "epoch": 0.4822477650063857, "grad_norm": 0.79296875, "learning_rate": 1.729640124938362e-05, "loss": 0.3256, "step": 6372 }, { "epoch": 0.482323447329833, "grad_norm": 0.7890625, "learning_rate": 1.7295586699797034e-05, "loss": 0.3005, "step": 6373 }, { "epoch": 0.48239912965328036, "grad_norm": 0.68359375, "learning_rate": 1.7294772046710034e-05, "loss": 0.2658, "step": 6374 }, { "epoch": 0.4824748119767277, "grad_norm": 0.78125, "learning_rate": 1.7293957290134178e-05, "loss": 0.3621, "step": 6375 }, { "epoch": 0.482550494300175, "grad_norm": 0.76953125, "learning_rate": 1.7293142430081017e-05, "loss": 0.293, "step": 6376 }, { "epoch": 0.48262617662362234, "grad_norm": 0.765625, "learning_rate": 1.7292327466562116e-05, "loss": 0.3463, "step": 6377 }, { "epoch": 0.4827018589470697, "grad_norm": 0.7421875, "learning_rate": 1.7291512399589038e-05, "loss": 0.3094, "step": 6378 }, { "epoch": 0.48277754127051703, "grad_norm": 0.796875, "learning_rate": 1.729069722917334e-05, "loss": 0.3471, "step": 6379 }, { "epoch": 0.4828532235939643, "grad_norm": 1.140625, "learning_rate": 1.7289881955326594e-05, "loss": 0.3847, "step": 6380 }, { "epoch": 0.48292890591741167, "grad_norm": 0.75390625, "learning_rate": 1.728906657806036e-05, "loss": 0.3255, "step": 6381 }, { "epoch": 0.483004588240859, "grad_norm": 0.796875, "learning_rate": 1.7288251097386208e-05, "loss": 0.3437, "step": 6382 }, { "epoch": 0.4830802705643063, "grad_norm": 0.875, "learning_rate": 1.728743551331571e-05, "loss": 0.3766, "step": 6383 }, { "epoch": 0.48315595288775365, "grad_norm": 0.80078125, "learning_rate": 1.728661982586043e-05, "loss": 0.3477, "step": 6384 }, { "epoch": 0.483231635211201, "grad_norm": 0.82421875, "learning_rate": 1.7285804035031945e-05, "loss": 0.3543, "step": 6385 }, { "epoch": 0.48330731753464834, "grad_norm": 1.109375, "learning_rate": 1.7284988140841825e-05, "loss": 0.3357, "step": 6386 }, { "epoch": 0.48338299985809563, "grad_norm": 0.71484375, "learning_rate": 1.728417214330165e-05, "loss": 0.2885, "step": 6387 }, { "epoch": 0.483458682181543, "grad_norm": 0.78515625, "learning_rate": 1.7283356042422992e-05, "loss": 0.346, "step": 6388 }, { "epoch": 0.4835343645049903, "grad_norm": 0.81640625, "learning_rate": 1.728253983821743e-05, "loss": 0.3823, "step": 6389 }, { "epoch": 0.4836100468284376, "grad_norm": 0.875, "learning_rate": 1.7281723530696544e-05, "loss": 0.3518, "step": 6390 }, { "epoch": 0.48368572915188496, "grad_norm": 0.78125, "learning_rate": 1.728090711987191e-05, "loss": 0.3197, "step": 6391 }, { "epoch": 0.4837614114753323, "grad_norm": 0.72265625, "learning_rate": 1.728009060575512e-05, "loss": 0.3181, "step": 6392 }, { "epoch": 0.48383709379877965, "grad_norm": 0.79296875, "learning_rate": 1.7279273988357747e-05, "loss": 0.3809, "step": 6393 }, { "epoch": 0.48391277612222694, "grad_norm": 0.9296875, "learning_rate": 1.7278457267691383e-05, "loss": 0.3006, "step": 6394 }, { "epoch": 0.4839884584456743, "grad_norm": 0.79296875, "learning_rate": 1.727764044376761e-05, "loss": 0.3212, "step": 6395 }, { "epoch": 0.48406414076912163, "grad_norm": 0.6875, "learning_rate": 1.7276823516598022e-05, "loss": 0.2864, "step": 6396 }, { "epoch": 0.4841398230925689, "grad_norm": 0.8203125, "learning_rate": 1.7276006486194203e-05, "loss": 0.3616, "step": 6397 }, { "epoch": 0.48421550541601627, "grad_norm": 0.8515625, "learning_rate": 1.7275189352567744e-05, "loss": 0.3566, "step": 6398 }, { "epoch": 0.4842911877394636, "grad_norm": 0.765625, "learning_rate": 1.7274372115730242e-05, "loss": 0.3185, "step": 6399 }, { "epoch": 0.4843668700629109, "grad_norm": 0.78515625, "learning_rate": 1.727355477569329e-05, "loss": 0.3423, "step": 6400 }, { "epoch": 0.48444255238635825, "grad_norm": 0.72265625, "learning_rate": 1.7272737332468478e-05, "loss": 0.2943, "step": 6401 }, { "epoch": 0.4845182347098056, "grad_norm": 0.84375, "learning_rate": 1.7271919786067407e-05, "loss": 0.3404, "step": 6402 }, { "epoch": 0.48459391703325294, "grad_norm": 0.7109375, "learning_rate": 1.7271102136501677e-05, "loss": 0.2731, "step": 6403 }, { "epoch": 0.48466959935670023, "grad_norm": 0.7421875, "learning_rate": 1.7270284383782882e-05, "loss": 0.2855, "step": 6404 }, { "epoch": 0.4847452816801476, "grad_norm": 0.7734375, "learning_rate": 1.726946652792263e-05, "loss": 0.3458, "step": 6405 }, { "epoch": 0.4848209640035949, "grad_norm": 0.76953125, "learning_rate": 1.726864856893252e-05, "loss": 0.3205, "step": 6406 }, { "epoch": 0.4848966463270422, "grad_norm": 0.76953125, "learning_rate": 1.7267830506824156e-05, "loss": 0.2986, "step": 6407 }, { "epoch": 0.48497232865048956, "grad_norm": 0.7421875, "learning_rate": 1.7267012341609145e-05, "loss": 0.3188, "step": 6408 }, { "epoch": 0.4850480109739369, "grad_norm": 0.82421875, "learning_rate": 1.726619407329909e-05, "loss": 0.3732, "step": 6409 }, { "epoch": 0.48512369329738425, "grad_norm": 0.73828125, "learning_rate": 1.7265375701905606e-05, "loss": 0.2896, "step": 6410 }, { "epoch": 0.48519937562083154, "grad_norm": 0.7890625, "learning_rate": 1.72645572274403e-05, "loss": 0.3452, "step": 6411 }, { "epoch": 0.4852750579442789, "grad_norm": 0.78125, "learning_rate": 1.7263738649914783e-05, "loss": 0.3348, "step": 6412 }, { "epoch": 0.48535074026772623, "grad_norm": 0.75, "learning_rate": 1.726291996934067e-05, "loss": 0.3047, "step": 6413 }, { "epoch": 0.4854264225911735, "grad_norm": 0.83203125, "learning_rate": 1.726210118572957e-05, "loss": 0.3604, "step": 6414 }, { "epoch": 0.48550210491462087, "grad_norm": 0.80078125, "learning_rate": 1.72612822990931e-05, "loss": 0.3444, "step": 6415 }, { "epoch": 0.4855777872380682, "grad_norm": 0.8046875, "learning_rate": 1.7260463309442888e-05, "loss": 0.3402, "step": 6416 }, { "epoch": 0.48565346956151556, "grad_norm": 0.76171875, "learning_rate": 1.7259644216790538e-05, "loss": 0.3267, "step": 6417 }, { "epoch": 0.48572915188496285, "grad_norm": 0.828125, "learning_rate": 1.7258825021147682e-05, "loss": 0.3426, "step": 6418 }, { "epoch": 0.4858048342084102, "grad_norm": 0.71875, "learning_rate": 1.725800572252593e-05, "loss": 0.3027, "step": 6419 }, { "epoch": 0.48588051653185754, "grad_norm": 0.8203125, "learning_rate": 1.7257186320936918e-05, "loss": 0.3508, "step": 6420 }, { "epoch": 0.48595619885530483, "grad_norm": 0.8359375, "learning_rate": 1.725636681639226e-05, "loss": 0.3637, "step": 6421 }, { "epoch": 0.4860318811787522, "grad_norm": 0.71484375, "learning_rate": 1.725554720890359e-05, "loss": 0.2762, "step": 6422 }, { "epoch": 0.4861075635021995, "grad_norm": 0.82421875, "learning_rate": 1.7254727498482528e-05, "loss": 0.336, "step": 6423 }, { "epoch": 0.48618324582564687, "grad_norm": 0.765625, "learning_rate": 1.725390768514071e-05, "loss": 0.3244, "step": 6424 }, { "epoch": 0.48625892814909416, "grad_norm": 0.74609375, "learning_rate": 1.725308776888976e-05, "loss": 0.2706, "step": 6425 }, { "epoch": 0.4863346104725415, "grad_norm": 0.8203125, "learning_rate": 1.7252267749741317e-05, "loss": 0.3167, "step": 6426 }, { "epoch": 0.48641029279598885, "grad_norm": 0.765625, "learning_rate": 1.7251447627707007e-05, "loss": 0.3487, "step": 6427 }, { "epoch": 0.48648597511943614, "grad_norm": 0.8046875, "learning_rate": 1.7250627402798472e-05, "loss": 0.3343, "step": 6428 }, { "epoch": 0.4865616574428835, "grad_norm": 0.7578125, "learning_rate": 1.7249807075027344e-05, "loss": 0.3056, "step": 6429 }, { "epoch": 0.48663733976633083, "grad_norm": 0.73046875, "learning_rate": 1.724898664440526e-05, "loss": 0.3047, "step": 6430 }, { "epoch": 0.4867130220897782, "grad_norm": 1.265625, "learning_rate": 1.7248166110943863e-05, "loss": 0.3427, "step": 6431 }, { "epoch": 0.48678870441322547, "grad_norm": 0.84765625, "learning_rate": 1.724734547465479e-05, "loss": 0.3589, "step": 6432 }, { "epoch": 0.4868643867366728, "grad_norm": 0.76171875, "learning_rate": 1.7246524735549687e-05, "loss": 0.3119, "step": 6433 }, { "epoch": 0.48694006906012016, "grad_norm": 0.7734375, "learning_rate": 1.7245703893640188e-05, "loss": 0.3163, "step": 6434 }, { "epoch": 0.48701575138356745, "grad_norm": 0.83203125, "learning_rate": 1.7244882948937952e-05, "loss": 0.3931, "step": 6435 }, { "epoch": 0.4870914337070148, "grad_norm": 0.859375, "learning_rate": 1.7244061901454617e-05, "loss": 0.3473, "step": 6436 }, { "epoch": 0.48716711603046214, "grad_norm": 0.79296875, "learning_rate": 1.7243240751201832e-05, "loss": 0.3296, "step": 6437 }, { "epoch": 0.4872427983539095, "grad_norm": 0.76953125, "learning_rate": 1.724241949819125e-05, "loss": 0.3204, "step": 6438 }, { "epoch": 0.4873184806773568, "grad_norm": 0.765625, "learning_rate": 1.724159814243451e-05, "loss": 0.3439, "step": 6439 }, { "epoch": 0.4873941630008041, "grad_norm": 0.8359375, "learning_rate": 1.7240776683943283e-05, "loss": 0.3431, "step": 6440 }, { "epoch": 0.48746984532425147, "grad_norm": 0.75390625, "learning_rate": 1.723995512272921e-05, "loss": 0.2976, "step": 6441 }, { "epoch": 0.48754552764769876, "grad_norm": 0.765625, "learning_rate": 1.723913345880395e-05, "loss": 0.2937, "step": 6442 }, { "epoch": 0.4876212099711461, "grad_norm": 0.80078125, "learning_rate": 1.723831169217916e-05, "loss": 0.3336, "step": 6443 }, { "epoch": 0.48769689229459345, "grad_norm": 0.89453125, "learning_rate": 1.7237489822866496e-05, "loss": 0.3601, "step": 6444 }, { "epoch": 0.4877725746180408, "grad_norm": 0.859375, "learning_rate": 1.7236667850877616e-05, "loss": 0.3721, "step": 6445 }, { "epoch": 0.4878482569414881, "grad_norm": 0.73828125, "learning_rate": 1.7235845776224186e-05, "loss": 0.2905, "step": 6446 }, { "epoch": 0.48792393926493544, "grad_norm": 0.7734375, "learning_rate": 1.723502359891787e-05, "loss": 0.3179, "step": 6447 }, { "epoch": 0.4879996215883828, "grad_norm": 0.82421875, "learning_rate": 1.7234201318970325e-05, "loss": 0.3628, "step": 6448 }, { "epoch": 0.48807530391183007, "grad_norm": 0.83203125, "learning_rate": 1.723337893639322e-05, "loss": 0.3359, "step": 6449 }, { "epoch": 0.4881509862352774, "grad_norm": 0.8203125, "learning_rate": 1.7232556451198223e-05, "loss": 0.3446, "step": 6450 }, { "epoch": 0.48822666855872476, "grad_norm": 1.015625, "learning_rate": 1.7231733863397e-05, "loss": 0.2921, "step": 6451 }, { "epoch": 0.4883023508821721, "grad_norm": 0.77734375, "learning_rate": 1.7230911173001223e-05, "loss": 0.3165, "step": 6452 }, { "epoch": 0.4883780332056194, "grad_norm": 0.81640625, "learning_rate": 1.7230088380022562e-05, "loss": 0.3615, "step": 6453 }, { "epoch": 0.48845371552906675, "grad_norm": 0.8359375, "learning_rate": 1.7229265484472694e-05, "loss": 0.3515, "step": 6454 }, { "epoch": 0.4885293978525141, "grad_norm": 0.8046875, "learning_rate": 1.7228442486363283e-05, "loss": 0.3143, "step": 6455 }, { "epoch": 0.4886050801759614, "grad_norm": 0.7578125, "learning_rate": 1.722761938570601e-05, "loss": 0.305, "step": 6456 }, { "epoch": 0.4886807624994087, "grad_norm": 0.8203125, "learning_rate": 1.722679618251256e-05, "loss": 0.3477, "step": 6457 }, { "epoch": 0.4887564448228561, "grad_norm": 0.8125, "learning_rate": 1.72259728767946e-05, "loss": 0.3423, "step": 6458 }, { "epoch": 0.4888321271463034, "grad_norm": 0.75390625, "learning_rate": 1.7225149468563816e-05, "loss": 0.3392, "step": 6459 }, { "epoch": 0.4889078094697507, "grad_norm": 0.78515625, "learning_rate": 1.7224325957831886e-05, "loss": 0.3416, "step": 6460 }, { "epoch": 0.48898349179319806, "grad_norm": 0.75390625, "learning_rate": 1.72235023446105e-05, "loss": 0.3058, "step": 6461 }, { "epoch": 0.4890591741166454, "grad_norm": 0.78125, "learning_rate": 1.7222678628911335e-05, "loss": 0.3377, "step": 6462 }, { "epoch": 0.4891348564400927, "grad_norm": 0.76171875, "learning_rate": 1.7221854810746076e-05, "loss": 0.3166, "step": 6463 }, { "epoch": 0.48921053876354004, "grad_norm": 0.70703125, "learning_rate": 1.7221030890126415e-05, "loss": 0.2713, "step": 6464 }, { "epoch": 0.4892862210869874, "grad_norm": 0.7890625, "learning_rate": 1.722020686706404e-05, "loss": 0.3479, "step": 6465 }, { "epoch": 0.4893619034104347, "grad_norm": 0.82421875, "learning_rate": 1.7219382741570637e-05, "loss": 0.3343, "step": 6466 }, { "epoch": 0.489437585733882, "grad_norm": 0.78125, "learning_rate": 1.7218558513657903e-05, "loss": 0.323, "step": 6467 }, { "epoch": 0.48951326805732936, "grad_norm": 0.7578125, "learning_rate": 1.721773418333753e-05, "loss": 0.3034, "step": 6468 }, { "epoch": 0.4895889503807767, "grad_norm": 0.7421875, "learning_rate": 1.721690975062121e-05, "loss": 0.2874, "step": 6469 }, { "epoch": 0.489664632704224, "grad_norm": 0.76953125, "learning_rate": 1.7216085215520644e-05, "loss": 0.3277, "step": 6470 }, { "epoch": 0.48974031502767135, "grad_norm": 0.8515625, "learning_rate": 1.7215260578047523e-05, "loss": 0.3659, "step": 6471 }, { "epoch": 0.4898159973511187, "grad_norm": 0.78515625, "learning_rate": 1.721443583821355e-05, "loss": 0.3227, "step": 6472 }, { "epoch": 0.489891679674566, "grad_norm": 0.84375, "learning_rate": 1.721361099603042e-05, "loss": 0.3813, "step": 6473 }, { "epoch": 0.48996736199801333, "grad_norm": 0.73046875, "learning_rate": 1.7212786051509843e-05, "loss": 0.3071, "step": 6474 }, { "epoch": 0.4900430443214607, "grad_norm": 0.74609375, "learning_rate": 1.7211961004663523e-05, "loss": 0.3098, "step": 6475 }, { "epoch": 0.490118726644908, "grad_norm": 0.72265625, "learning_rate": 1.7211135855503152e-05, "loss": 0.3115, "step": 6476 }, { "epoch": 0.4901944089683553, "grad_norm": 0.68359375, "learning_rate": 1.721031060404045e-05, "loss": 0.2642, "step": 6477 }, { "epoch": 0.49027009129180266, "grad_norm": 0.75390625, "learning_rate": 1.7209485250287117e-05, "loss": 0.3231, "step": 6478 }, { "epoch": 0.49034577361525, "grad_norm": 0.765625, "learning_rate": 1.720865979425486e-05, "loss": 0.3471, "step": 6479 }, { "epoch": 0.4904214559386973, "grad_norm": 0.75390625, "learning_rate": 1.7207834235955398e-05, "loss": 0.3217, "step": 6480 }, { "epoch": 0.49049713826214464, "grad_norm": 0.7890625, "learning_rate": 1.7207008575400433e-05, "loss": 0.3342, "step": 6481 }, { "epoch": 0.490572820585592, "grad_norm": 0.78515625, "learning_rate": 1.720618281260169e-05, "loss": 0.3221, "step": 6482 }, { "epoch": 0.49064850290903933, "grad_norm": 0.796875, "learning_rate": 1.7205356947570875e-05, "loss": 0.3251, "step": 6483 }, { "epoch": 0.4907241852324866, "grad_norm": 0.77734375, "learning_rate": 1.7204530980319707e-05, "loss": 0.3345, "step": 6484 }, { "epoch": 0.49079986755593397, "grad_norm": 0.71875, "learning_rate": 1.7203704910859902e-05, "loss": 0.2906, "step": 6485 }, { "epoch": 0.4908755498793813, "grad_norm": 0.76953125, "learning_rate": 1.720287873920318e-05, "loss": 0.2978, "step": 6486 }, { "epoch": 0.4909512322028286, "grad_norm": 0.83984375, "learning_rate": 1.7202052465361268e-05, "loss": 0.3756, "step": 6487 }, { "epoch": 0.49102691452627595, "grad_norm": 0.859375, "learning_rate": 1.7201226089345877e-05, "loss": 0.2814, "step": 6488 }, { "epoch": 0.4911025968497233, "grad_norm": 0.93359375, "learning_rate": 1.7200399611168742e-05, "loss": 0.3237, "step": 6489 }, { "epoch": 0.49117827917317064, "grad_norm": 0.76953125, "learning_rate": 1.7199573030841577e-05, "loss": 0.3078, "step": 6490 }, { "epoch": 0.49125396149661793, "grad_norm": 0.71484375, "learning_rate": 1.7198746348376114e-05, "loss": 0.2799, "step": 6491 }, { "epoch": 0.4913296438200653, "grad_norm": 0.79296875, "learning_rate": 1.719791956378408e-05, "loss": 0.3376, "step": 6492 }, { "epoch": 0.4914053261435126, "grad_norm": 0.703125, "learning_rate": 1.719709267707721e-05, "loss": 0.2616, "step": 6493 }, { "epoch": 0.4914810084669599, "grad_norm": 0.79296875, "learning_rate": 1.7196265688267227e-05, "loss": 0.3316, "step": 6494 }, { "epoch": 0.49155669079040726, "grad_norm": 0.7890625, "learning_rate": 1.7195438597365865e-05, "loss": 0.3412, "step": 6495 }, { "epoch": 0.4916323731138546, "grad_norm": 0.78515625, "learning_rate": 1.719461140438486e-05, "loss": 0.326, "step": 6496 }, { "epoch": 0.49170805543730195, "grad_norm": 0.77734375, "learning_rate": 1.7193784109335945e-05, "loss": 0.3117, "step": 6497 }, { "epoch": 0.49178373776074924, "grad_norm": 0.80859375, "learning_rate": 1.7192956712230855e-05, "loss": 0.3557, "step": 6498 }, { "epoch": 0.4918594200841966, "grad_norm": 0.69140625, "learning_rate": 1.7192129213081333e-05, "loss": 0.2688, "step": 6499 }, { "epoch": 0.49193510240764393, "grad_norm": 0.8359375, "learning_rate": 1.7191301611899117e-05, "loss": 0.387, "step": 6500 }, { "epoch": 0.4920107847310912, "grad_norm": 0.75390625, "learning_rate": 1.7190473908695947e-05, "loss": 0.2973, "step": 6501 }, { "epoch": 0.49208646705453857, "grad_norm": 0.78515625, "learning_rate": 1.7189646103483565e-05, "loss": 0.3466, "step": 6502 }, { "epoch": 0.4921621493779859, "grad_norm": 0.72265625, "learning_rate": 1.7188818196273715e-05, "loss": 0.2862, "step": 6503 }, { "epoch": 0.49223783170143326, "grad_norm": 0.7421875, "learning_rate": 1.718799018707814e-05, "loss": 0.3155, "step": 6504 }, { "epoch": 0.49231351402488055, "grad_norm": 0.75390625, "learning_rate": 1.718716207590859e-05, "loss": 0.2969, "step": 6505 }, { "epoch": 0.4923891963483279, "grad_norm": 0.796875, "learning_rate": 1.7186333862776815e-05, "loss": 0.3195, "step": 6506 }, { "epoch": 0.49246487867177524, "grad_norm": 0.7890625, "learning_rate": 1.718550554769456e-05, "loss": 0.3206, "step": 6507 }, { "epoch": 0.49254056099522253, "grad_norm": 0.703125, "learning_rate": 1.718467713067358e-05, "loss": 0.2735, "step": 6508 }, { "epoch": 0.4926162433186699, "grad_norm": 0.83203125, "learning_rate": 1.7183848611725623e-05, "loss": 0.3461, "step": 6509 }, { "epoch": 0.4926919256421172, "grad_norm": 0.70703125, "learning_rate": 1.718301999086245e-05, "loss": 0.2683, "step": 6510 }, { "epoch": 0.49276760796556457, "grad_norm": 0.73828125, "learning_rate": 1.7182191268095806e-05, "loss": 0.2842, "step": 6511 }, { "epoch": 0.49284329028901186, "grad_norm": 1.921875, "learning_rate": 1.7181362443437456e-05, "loss": 0.4517, "step": 6512 }, { "epoch": 0.4929189726124592, "grad_norm": 0.80078125, "learning_rate": 1.718053351689916e-05, "loss": 0.334, "step": 6513 }, { "epoch": 0.49299465493590655, "grad_norm": 0.8046875, "learning_rate": 1.7179704488492664e-05, "loss": 0.3454, "step": 6514 }, { "epoch": 0.49307033725935384, "grad_norm": 0.83984375, "learning_rate": 1.7178875358229746e-05, "loss": 0.3605, "step": 6515 }, { "epoch": 0.4931460195828012, "grad_norm": 0.78515625, "learning_rate": 1.7178046126122162e-05, "loss": 0.3582, "step": 6516 }, { "epoch": 0.49322170190624853, "grad_norm": 0.77734375, "learning_rate": 1.7177216792181678e-05, "loss": 0.3555, "step": 6517 }, { "epoch": 0.4932973842296959, "grad_norm": 0.765625, "learning_rate": 1.717638735642005e-05, "loss": 0.3348, "step": 6518 }, { "epoch": 0.49337306655314317, "grad_norm": 0.78515625, "learning_rate": 1.717555781884906e-05, "loss": 0.3425, "step": 6519 }, { "epoch": 0.4934487488765905, "grad_norm": 0.74609375, "learning_rate": 1.717472817948046e-05, "loss": 0.3072, "step": 6520 }, { "epoch": 0.49352443120003786, "grad_norm": 3.46875, "learning_rate": 1.7173898438326038e-05, "loss": 0.3997, "step": 6521 }, { "epoch": 0.49360011352348515, "grad_norm": 0.78125, "learning_rate": 1.717306859539755e-05, "loss": 0.3322, "step": 6522 }, { "epoch": 0.4936757958469325, "grad_norm": 0.73828125, "learning_rate": 1.7172238650706775e-05, "loss": 0.293, "step": 6523 }, { "epoch": 0.49375147817037984, "grad_norm": 0.734375, "learning_rate": 1.717140860426549e-05, "loss": 0.3107, "step": 6524 }, { "epoch": 0.49382716049382713, "grad_norm": 0.796875, "learning_rate": 1.7170578456085464e-05, "loss": 0.3667, "step": 6525 }, { "epoch": 0.4939028428172745, "grad_norm": 0.78125, "learning_rate": 1.716974820617848e-05, "loss": 0.3233, "step": 6526 }, { "epoch": 0.4939785251407218, "grad_norm": 0.73046875, "learning_rate": 1.716891785455631e-05, "loss": 0.2776, "step": 6527 }, { "epoch": 0.49405420746416917, "grad_norm": 0.74609375, "learning_rate": 1.716808740123074e-05, "loss": 0.2775, "step": 6528 }, { "epoch": 0.49412988978761646, "grad_norm": 0.765625, "learning_rate": 1.7167256846213552e-05, "loss": 0.3203, "step": 6529 }, { "epoch": 0.4942055721110638, "grad_norm": 0.80078125, "learning_rate": 1.7166426189516524e-05, "loss": 0.3394, "step": 6530 }, { "epoch": 0.49428125443451115, "grad_norm": 0.74609375, "learning_rate": 1.7165595431151442e-05, "loss": 0.3034, "step": 6531 }, { "epoch": 0.49435693675795844, "grad_norm": 0.7421875, "learning_rate": 1.716476457113009e-05, "loss": 0.2921, "step": 6532 }, { "epoch": 0.4944326190814058, "grad_norm": 0.73828125, "learning_rate": 1.716393360946426e-05, "loss": 0.2776, "step": 6533 }, { "epoch": 0.49450830140485313, "grad_norm": 0.84375, "learning_rate": 1.716310254616574e-05, "loss": 0.3532, "step": 6534 }, { "epoch": 0.4945839837283005, "grad_norm": 0.8125, "learning_rate": 1.7162271381246315e-05, "loss": 0.3397, "step": 6535 }, { "epoch": 0.49465966605174777, "grad_norm": 0.80078125, "learning_rate": 1.7161440114717783e-05, "loss": 0.3091, "step": 6536 }, { "epoch": 0.4947353483751951, "grad_norm": 0.78515625, "learning_rate": 1.716060874659193e-05, "loss": 0.3088, "step": 6537 }, { "epoch": 0.49481103069864246, "grad_norm": 0.78125, "learning_rate": 1.7159777276880557e-05, "loss": 0.305, "step": 6538 }, { "epoch": 0.49488671302208975, "grad_norm": 0.82421875, "learning_rate": 1.7158945705595453e-05, "loss": 0.3918, "step": 6539 }, { "epoch": 0.4949623953455371, "grad_norm": 0.80859375, "learning_rate": 1.715811403274842e-05, "loss": 0.3301, "step": 6540 }, { "epoch": 0.49503807766898444, "grad_norm": 0.7734375, "learning_rate": 1.715728225835126e-05, "loss": 0.2849, "step": 6541 }, { "epoch": 0.4951137599924318, "grad_norm": 0.83203125, "learning_rate": 1.7156450382415763e-05, "loss": 0.3706, "step": 6542 }, { "epoch": 0.4951894423158791, "grad_norm": 0.8125, "learning_rate": 1.715561840495374e-05, "loss": 0.3279, "step": 6543 }, { "epoch": 0.4952651246393264, "grad_norm": 0.69140625, "learning_rate": 1.715478632597699e-05, "loss": 0.2711, "step": 6544 }, { "epoch": 0.49534080696277377, "grad_norm": 0.7578125, "learning_rate": 1.7153954145497315e-05, "loss": 0.3066, "step": 6545 }, { "epoch": 0.49541648928622106, "grad_norm": 1.46875, "learning_rate": 1.7153121863526526e-05, "loss": 0.4334, "step": 6546 }, { "epoch": 0.4954921716096684, "grad_norm": 0.7734375, "learning_rate": 1.715228948007643e-05, "loss": 0.3232, "step": 6547 }, { "epoch": 0.49556785393311575, "grad_norm": 0.79296875, "learning_rate": 1.7151456995158833e-05, "loss": 0.352, "step": 6548 }, { "epoch": 0.4956435362565631, "grad_norm": 0.85546875, "learning_rate": 1.7150624408785544e-05, "loss": 0.3285, "step": 6549 }, { "epoch": 0.4957192185800104, "grad_norm": 0.859375, "learning_rate": 1.7149791720968378e-05, "loss": 0.3696, "step": 6550 }, { "epoch": 0.49579490090345774, "grad_norm": 0.828125, "learning_rate": 1.7148958931719145e-05, "loss": 0.3538, "step": 6551 }, { "epoch": 0.4958705832269051, "grad_norm": 0.734375, "learning_rate": 1.7148126041049667e-05, "loss": 0.2878, "step": 6552 }, { "epoch": 0.49594626555035237, "grad_norm": 0.80859375, "learning_rate": 1.714729304897175e-05, "loss": 0.3551, "step": 6553 }, { "epoch": 0.4960219478737997, "grad_norm": 0.7890625, "learning_rate": 1.7146459955497217e-05, "loss": 0.2963, "step": 6554 }, { "epoch": 0.49609763019724706, "grad_norm": 0.84375, "learning_rate": 1.7145626760637886e-05, "loss": 0.3272, "step": 6555 }, { "epoch": 0.4961733125206944, "grad_norm": 0.7734375, "learning_rate": 1.714479346440558e-05, "loss": 0.3324, "step": 6556 }, { "epoch": 0.4962489948441417, "grad_norm": 0.8359375, "learning_rate": 1.7143960066812117e-05, "loss": 0.3386, "step": 6557 }, { "epoch": 0.49632467716758905, "grad_norm": 0.7890625, "learning_rate": 1.7143126567869316e-05, "loss": 0.3268, "step": 6558 }, { "epoch": 0.4964003594910364, "grad_norm": 0.73046875, "learning_rate": 1.714229296758901e-05, "loss": 0.2837, "step": 6559 }, { "epoch": 0.4964760418144837, "grad_norm": 0.8203125, "learning_rate": 1.714145926598302e-05, "loss": 0.34, "step": 6560 }, { "epoch": 0.496551724137931, "grad_norm": 0.74609375, "learning_rate": 1.7140625463063176e-05, "loss": 0.3007, "step": 6561 }, { "epoch": 0.4966274064613784, "grad_norm": 0.78515625, "learning_rate": 1.7139791558841307e-05, "loss": 0.3277, "step": 6562 }, { "epoch": 0.4967030887848257, "grad_norm": 0.7265625, "learning_rate": 1.7138957553329242e-05, "loss": 0.2716, "step": 6563 }, { "epoch": 0.496778771108273, "grad_norm": 0.8359375, "learning_rate": 1.7138123446538813e-05, "loss": 0.3462, "step": 6564 }, { "epoch": 0.49685445343172036, "grad_norm": 0.7265625, "learning_rate": 1.7137289238481855e-05, "loss": 0.3081, "step": 6565 }, { "epoch": 0.4969301357551677, "grad_norm": 0.8125, "learning_rate": 1.7136454929170198e-05, "loss": 0.331, "step": 6566 }, { "epoch": 0.497005818078615, "grad_norm": 0.859375, "learning_rate": 1.7135620518615682e-05, "loss": 0.2513, "step": 6567 }, { "epoch": 0.49708150040206234, "grad_norm": 0.88671875, "learning_rate": 1.7134786006830147e-05, "loss": 0.3582, "step": 6568 }, { "epoch": 0.4971571827255097, "grad_norm": 0.7421875, "learning_rate": 1.7133951393825426e-05, "loss": 0.2875, "step": 6569 }, { "epoch": 0.49723286504895703, "grad_norm": 0.83984375, "learning_rate": 1.7133116679613362e-05, "loss": 0.3597, "step": 6570 }, { "epoch": 0.4973085473724043, "grad_norm": 0.8359375, "learning_rate": 1.7132281864205798e-05, "loss": 0.3702, "step": 6571 }, { "epoch": 0.49738422969585167, "grad_norm": 0.7734375, "learning_rate": 1.7131446947614575e-05, "loss": 0.3283, "step": 6572 }, { "epoch": 0.497459912019299, "grad_norm": 0.76171875, "learning_rate": 1.713061192985154e-05, "loss": 0.3111, "step": 6573 }, { "epoch": 0.4975355943427463, "grad_norm": 0.71484375, "learning_rate": 1.712977681092854e-05, "loss": 0.2902, "step": 6574 }, { "epoch": 0.49761127666619365, "grad_norm": 0.7265625, "learning_rate": 1.712894159085742e-05, "loss": 0.2925, "step": 6575 }, { "epoch": 0.497686958989641, "grad_norm": 0.796875, "learning_rate": 1.7128106269650028e-05, "loss": 0.341, "step": 6576 }, { "epoch": 0.49776264131308834, "grad_norm": 0.83203125, "learning_rate": 1.7127270847318217e-05, "loss": 0.3502, "step": 6577 }, { "epoch": 0.49783832363653563, "grad_norm": 0.72265625, "learning_rate": 1.712643532387384e-05, "loss": 0.2936, "step": 6578 }, { "epoch": 0.497914005959983, "grad_norm": 0.8046875, "learning_rate": 1.7125599699328746e-05, "loss": 0.3691, "step": 6579 }, { "epoch": 0.4979896882834303, "grad_norm": 0.87109375, "learning_rate": 1.7124763973694793e-05, "loss": 0.3992, "step": 6580 }, { "epoch": 0.4980653706068776, "grad_norm": 0.72265625, "learning_rate": 1.712392814698384e-05, "loss": 0.2982, "step": 6581 }, { "epoch": 0.49814105293032496, "grad_norm": 0.76171875, "learning_rate": 1.7123092219207736e-05, "loss": 0.3284, "step": 6582 }, { "epoch": 0.4982167352537723, "grad_norm": 0.73046875, "learning_rate": 1.7122256190378346e-05, "loss": 0.2936, "step": 6583 }, { "epoch": 0.4982924175772196, "grad_norm": 0.7109375, "learning_rate": 1.712142006050753e-05, "loss": 0.2827, "step": 6584 }, { "epoch": 0.49836809990066694, "grad_norm": 0.79296875, "learning_rate": 1.7120583829607152e-05, "loss": 0.3437, "step": 6585 }, { "epoch": 0.4984437822241143, "grad_norm": 0.83984375, "learning_rate": 1.7119747497689072e-05, "loss": 0.3579, "step": 6586 }, { "epoch": 0.49851946454756163, "grad_norm": 1.1796875, "learning_rate": 1.711891106476516e-05, "loss": 0.3969, "step": 6587 }, { "epoch": 0.4985951468710089, "grad_norm": 0.73828125, "learning_rate": 1.711807453084727e-05, "loss": 0.3198, "step": 6588 }, { "epoch": 0.49867082919445627, "grad_norm": 0.7578125, "learning_rate": 1.711723789594728e-05, "loss": 0.3161, "step": 6589 }, { "epoch": 0.4987465115179036, "grad_norm": 0.8515625, "learning_rate": 1.711640116007706e-05, "loss": 0.3911, "step": 6590 }, { "epoch": 0.4988221938413509, "grad_norm": 0.8359375, "learning_rate": 1.7115564323248475e-05, "loss": 0.3756, "step": 6591 }, { "epoch": 0.49889787616479825, "grad_norm": 0.82421875, "learning_rate": 1.71147273854734e-05, "loss": 0.3678, "step": 6592 }, { "epoch": 0.4989735584882456, "grad_norm": 0.76953125, "learning_rate": 1.711389034676371e-05, "loss": 0.304, "step": 6593 }, { "epoch": 0.49904924081169294, "grad_norm": 0.76953125, "learning_rate": 1.7113053207131274e-05, "loss": 0.3014, "step": 6594 }, { "epoch": 0.49912492313514023, "grad_norm": 0.78515625, "learning_rate": 1.711221596658797e-05, "loss": 0.346, "step": 6595 }, { "epoch": 0.4992006054585876, "grad_norm": 0.8203125, "learning_rate": 1.7111378625145684e-05, "loss": 0.3651, "step": 6596 }, { "epoch": 0.4992762877820349, "grad_norm": 0.8359375, "learning_rate": 1.7110541182816284e-05, "loss": 0.3332, "step": 6597 }, { "epoch": 0.4993519701054822, "grad_norm": 0.83203125, "learning_rate": 1.710970363961166e-05, "loss": 0.3792, "step": 6598 }, { "epoch": 0.49942765242892956, "grad_norm": 0.921875, "learning_rate": 1.7108865995543684e-05, "loss": 0.3715, "step": 6599 }, { "epoch": 0.4995033347523769, "grad_norm": 0.7421875, "learning_rate": 1.7108028250624245e-05, "loss": 0.3119, "step": 6600 }, { "epoch": 0.49957901707582425, "grad_norm": 0.8046875, "learning_rate": 1.7107190404865232e-05, "loss": 0.3639, "step": 6601 }, { "epoch": 0.49965469939927154, "grad_norm": 0.7734375, "learning_rate": 1.7106352458278524e-05, "loss": 0.3145, "step": 6602 }, { "epoch": 0.4997303817227189, "grad_norm": 0.78125, "learning_rate": 1.710551441087601e-05, "loss": 0.3276, "step": 6603 }, { "epoch": 0.49980606404616623, "grad_norm": 0.7890625, "learning_rate": 1.710467626266958e-05, "loss": 0.3532, "step": 6604 }, { "epoch": 0.4998817463696135, "grad_norm": 0.765625, "learning_rate": 1.7103838013671126e-05, "loss": 0.3113, "step": 6605 }, { "epoch": 0.49995742869306087, "grad_norm": 0.8046875, "learning_rate": 1.710299966389254e-05, "loss": 0.355, "step": 6606 }, { "epoch": 0.5000331110165082, "grad_norm": 1.046875, "learning_rate": 1.7102161213345716e-05, "loss": 0.3393, "step": 6607 }, { "epoch": 0.5001087933399555, "grad_norm": 0.80859375, "learning_rate": 1.7101322662042546e-05, "loss": 0.3443, "step": 6608 }, { "epoch": 0.5001844756634028, "grad_norm": 0.8203125, "learning_rate": 1.7100484009994927e-05, "loss": 0.3615, "step": 6609 }, { "epoch": 0.5002601579868502, "grad_norm": 0.7421875, "learning_rate": 1.709964525721476e-05, "loss": 0.2891, "step": 6610 }, { "epoch": 0.5002601579868502, "eval_loss": 0.3412069082260132, "eval_runtime": 83.9577, "eval_samples_per_second": 57.91, "eval_steps_per_second": 57.91, "step": 6610 }, { "epoch": 0.5003358403102975, "grad_norm": 0.828125, "learning_rate": 1.709880640371394e-05, "loss": 0.3405, "step": 6611 }, { "epoch": 0.5004115226337449, "grad_norm": 0.8984375, "learning_rate": 1.709796744950437e-05, "loss": 0.3942, "step": 6612 }, { "epoch": 0.5004872049571922, "grad_norm": 0.7109375, "learning_rate": 1.7097128394597948e-05, "loss": 0.2746, "step": 6613 }, { "epoch": 0.5005628872806395, "grad_norm": 0.7734375, "learning_rate": 1.7096289239006583e-05, "loss": 0.3085, "step": 6614 }, { "epoch": 0.5006385696040868, "grad_norm": 0.7734375, "learning_rate": 1.7095449982742177e-05, "loss": 0.3306, "step": 6615 }, { "epoch": 0.5007142519275342, "grad_norm": 0.76171875, "learning_rate": 1.7094610625816637e-05, "loss": 0.323, "step": 6616 }, { "epoch": 0.5007899342509815, "grad_norm": 0.83203125, "learning_rate": 1.709377116824187e-05, "loss": 0.371, "step": 6617 }, { "epoch": 0.5008656165744289, "grad_norm": 0.71484375, "learning_rate": 1.709293161002979e-05, "loss": 0.2974, "step": 6618 }, { "epoch": 0.5009412988978762, "grad_norm": 0.75, "learning_rate": 1.7092091951192298e-05, "loss": 0.3231, "step": 6619 }, { "epoch": 0.5010169812213235, "grad_norm": 0.75, "learning_rate": 1.7091252191741312e-05, "loss": 0.2759, "step": 6620 }, { "epoch": 0.5010926635447708, "grad_norm": 0.74609375, "learning_rate": 1.7090412331688745e-05, "loss": 0.3117, "step": 6621 }, { "epoch": 0.5011683458682181, "grad_norm": 0.7265625, "learning_rate": 1.7089572371046513e-05, "loss": 0.3055, "step": 6622 }, { "epoch": 0.5012440281916655, "grad_norm": 0.74609375, "learning_rate": 1.708873230982653e-05, "loss": 0.3032, "step": 6623 }, { "epoch": 0.5013197105151128, "grad_norm": 0.81640625, "learning_rate": 1.7087892148040713e-05, "loss": 0.3519, "step": 6624 }, { "epoch": 0.5013953928385602, "grad_norm": 0.69140625, "learning_rate": 1.7087051885700984e-05, "loss": 0.2684, "step": 6625 }, { "epoch": 0.5014710751620075, "grad_norm": 0.7421875, "learning_rate": 1.708621152281926e-05, "loss": 0.3071, "step": 6626 }, { "epoch": 0.5015467574854549, "grad_norm": 0.84375, "learning_rate": 1.708537105940747e-05, "loss": 0.3731, "step": 6627 }, { "epoch": 0.5016224398089021, "grad_norm": 0.79296875, "learning_rate": 1.708453049547753e-05, "loss": 0.354, "step": 6628 }, { "epoch": 0.5016981221323494, "grad_norm": 0.79296875, "learning_rate": 1.708368983104137e-05, "loss": 0.3624, "step": 6629 }, { "epoch": 0.5017738044557968, "grad_norm": 0.9921875, "learning_rate": 1.708284906611091e-05, "loss": 0.295, "step": 6630 }, { "epoch": 0.5018494867792441, "grad_norm": 0.765625, "learning_rate": 1.7082008200698082e-05, "loss": 0.3231, "step": 6631 }, { "epoch": 0.5019251691026915, "grad_norm": 0.84375, "learning_rate": 1.7081167234814814e-05, "loss": 0.3682, "step": 6632 }, { "epoch": 0.5020008514261388, "grad_norm": 0.80859375, "learning_rate": 1.7080326168473038e-05, "loss": 0.3327, "step": 6633 }, { "epoch": 0.5020765337495862, "grad_norm": 0.7734375, "learning_rate": 1.7079485001684685e-05, "loss": 0.327, "step": 6634 }, { "epoch": 0.5021522160730334, "grad_norm": 0.72265625, "learning_rate": 1.707864373446169e-05, "loss": 0.2963, "step": 6635 }, { "epoch": 0.5022278983964807, "grad_norm": 0.7890625, "learning_rate": 1.7077802366815986e-05, "loss": 0.3355, "step": 6636 }, { "epoch": 0.5023035807199281, "grad_norm": 0.7734375, "learning_rate": 1.7076960898759508e-05, "loss": 0.326, "step": 6637 }, { "epoch": 0.5023792630433754, "grad_norm": 0.7734375, "learning_rate": 1.7076119330304194e-05, "loss": 0.3266, "step": 6638 }, { "epoch": 0.5024549453668228, "grad_norm": 0.7734375, "learning_rate": 1.7075277661461987e-05, "loss": 0.3108, "step": 6639 }, { "epoch": 0.5025306276902701, "grad_norm": 0.78515625, "learning_rate": 1.7074435892244824e-05, "loss": 0.3375, "step": 6640 }, { "epoch": 0.5026063100137175, "grad_norm": 0.78125, "learning_rate": 1.7073594022664646e-05, "loss": 0.3356, "step": 6641 }, { "epoch": 0.5026819923371647, "grad_norm": 0.77734375, "learning_rate": 1.70727520527334e-05, "loss": 0.3529, "step": 6642 }, { "epoch": 0.502757674660612, "grad_norm": 0.765625, "learning_rate": 1.707190998246303e-05, "loss": 0.272, "step": 6643 }, { "epoch": 0.5028333569840594, "grad_norm": 0.81640625, "learning_rate": 1.7071067811865477e-05, "loss": 0.3553, "step": 6644 }, { "epoch": 0.5029090393075067, "grad_norm": 0.8046875, "learning_rate": 1.7070225540952696e-05, "loss": 0.341, "step": 6645 }, { "epoch": 0.5029847216309541, "grad_norm": 0.7265625, "learning_rate": 1.706938316973663e-05, "loss": 0.2772, "step": 6646 }, { "epoch": 0.5030604039544014, "grad_norm": 0.82421875, "learning_rate": 1.7068540698229235e-05, "loss": 0.3206, "step": 6647 }, { "epoch": 0.5031360862778488, "grad_norm": 0.77734375, "learning_rate": 1.7067698126442456e-05, "loss": 0.3203, "step": 6648 }, { "epoch": 0.503211768601296, "grad_norm": 0.79296875, "learning_rate": 1.7066855454388257e-05, "loss": 0.3247, "step": 6649 }, { "epoch": 0.5032874509247434, "grad_norm": 0.79296875, "learning_rate": 1.706601268207858e-05, "loss": 0.3172, "step": 6650 }, { "epoch": 0.5033631332481907, "grad_norm": 0.734375, "learning_rate": 1.7065169809525392e-05, "loss": 0.3106, "step": 6651 }, { "epoch": 0.503438815571638, "grad_norm": 0.78515625, "learning_rate": 1.7064326836740646e-05, "loss": 0.3287, "step": 6652 }, { "epoch": 0.5035144978950854, "grad_norm": 0.82421875, "learning_rate": 1.70634837637363e-05, "loss": 0.3078, "step": 6653 }, { "epoch": 0.5035901802185327, "grad_norm": 0.78125, "learning_rate": 1.7062640590524314e-05, "loss": 0.3509, "step": 6654 }, { "epoch": 0.5036658625419801, "grad_norm": 0.74609375, "learning_rate": 1.7061797317116652e-05, "loss": 0.2799, "step": 6655 }, { "epoch": 0.5037415448654273, "grad_norm": 0.76953125, "learning_rate": 1.7060953943525277e-05, "loss": 0.3064, "step": 6656 }, { "epoch": 0.5038172271888747, "grad_norm": 0.76953125, "learning_rate": 1.7060110469762152e-05, "loss": 0.3389, "step": 6657 }, { "epoch": 0.503892909512322, "grad_norm": 0.76953125, "learning_rate": 1.7059266895839245e-05, "loss": 0.3282, "step": 6658 }, { "epoch": 0.5039685918357694, "grad_norm": 0.7421875, "learning_rate": 1.7058423221768525e-05, "loss": 0.2913, "step": 6659 }, { "epoch": 0.5040442741592167, "grad_norm": 0.75390625, "learning_rate": 1.7057579447561957e-05, "loss": 0.3142, "step": 6660 }, { "epoch": 0.5041199564826641, "grad_norm": 0.765625, "learning_rate": 1.7056735573231513e-05, "loss": 0.3027, "step": 6661 }, { "epoch": 0.5041956388061114, "grad_norm": 0.7421875, "learning_rate": 1.705589159878917e-05, "loss": 0.2945, "step": 6662 }, { "epoch": 0.5042713211295586, "grad_norm": 0.73828125, "learning_rate": 1.705504752424689e-05, "loss": 0.2943, "step": 6663 }, { "epoch": 0.504347003453006, "grad_norm": 0.97265625, "learning_rate": 1.7054203349616652e-05, "loss": 0.3494, "step": 6664 }, { "epoch": 0.5044226857764533, "grad_norm": 0.80859375, "learning_rate": 1.705335907491044e-05, "loss": 0.3204, "step": 6665 }, { "epoch": 0.5044983680999007, "grad_norm": 0.76171875, "learning_rate": 1.7052514700140227e-05, "loss": 0.3238, "step": 6666 }, { "epoch": 0.504574050423348, "grad_norm": 0.80078125, "learning_rate": 1.7051670225317983e-05, "loss": 0.3559, "step": 6667 }, { "epoch": 0.5046497327467954, "grad_norm": 0.78125, "learning_rate": 1.7050825650455704e-05, "loss": 0.3407, "step": 6668 }, { "epoch": 0.5047254150702427, "grad_norm": 0.7734375, "learning_rate": 1.7049980975565355e-05, "loss": 0.3373, "step": 6669 }, { "epoch": 0.50480109739369, "grad_norm": 0.75, "learning_rate": 1.7049136200658935e-05, "loss": 0.2984, "step": 6670 }, { "epoch": 0.5048767797171373, "grad_norm": 1.0078125, "learning_rate": 1.7048291325748417e-05, "loss": 0.3569, "step": 6671 }, { "epoch": 0.5049524620405846, "grad_norm": 0.75390625, "learning_rate": 1.7047446350845795e-05, "loss": 0.3078, "step": 6672 }, { "epoch": 0.505028144364032, "grad_norm": 1.1328125, "learning_rate": 1.704660127596305e-05, "loss": 0.3759, "step": 6673 }, { "epoch": 0.5051038266874793, "grad_norm": 0.765625, "learning_rate": 1.7045756101112175e-05, "loss": 0.2937, "step": 6674 }, { "epoch": 0.5051795090109267, "grad_norm": 0.8046875, "learning_rate": 1.7044910826305157e-05, "loss": 0.3387, "step": 6675 }, { "epoch": 0.5052551913343739, "grad_norm": 0.7421875, "learning_rate": 1.704406545155399e-05, "loss": 0.273, "step": 6676 }, { "epoch": 0.5053308736578213, "grad_norm": 0.78515625, "learning_rate": 1.7043219976870665e-05, "loss": 0.3392, "step": 6677 }, { "epoch": 0.5054065559812686, "grad_norm": 0.8984375, "learning_rate": 1.704237440226718e-05, "loss": 0.3333, "step": 6678 }, { "epoch": 0.505482238304716, "grad_norm": 0.7421875, "learning_rate": 1.704152872775553e-05, "loss": 0.3093, "step": 6679 }, { "epoch": 0.5055579206281633, "grad_norm": 0.8125, "learning_rate": 1.704068295334771e-05, "loss": 0.3104, "step": 6680 }, { "epoch": 0.5056336029516106, "grad_norm": 0.7890625, "learning_rate": 1.703983707905572e-05, "loss": 0.2974, "step": 6681 }, { "epoch": 0.505709285275058, "grad_norm": 0.8984375, "learning_rate": 1.703899110489156e-05, "loss": 0.3434, "step": 6682 }, { "epoch": 0.5057849675985052, "grad_norm": 0.83203125, "learning_rate": 1.703814503086723e-05, "loss": 0.3653, "step": 6683 }, { "epoch": 0.5058606499219526, "grad_norm": 3.34375, "learning_rate": 1.703729885699474e-05, "loss": 0.3513, "step": 6684 }, { "epoch": 0.5059363322453999, "grad_norm": 0.7734375, "learning_rate": 1.7036452583286085e-05, "loss": 0.3073, "step": 6685 }, { "epoch": 0.5060120145688473, "grad_norm": 0.77734375, "learning_rate": 1.7035606209753276e-05, "loss": 0.3119, "step": 6686 }, { "epoch": 0.5060876968922946, "grad_norm": 0.76171875, "learning_rate": 1.703475973640832e-05, "loss": 0.3032, "step": 6687 }, { "epoch": 0.506163379215742, "grad_norm": 0.74609375, "learning_rate": 1.7033913163263224e-05, "loss": 0.3188, "step": 6688 }, { "epoch": 0.5062390615391893, "grad_norm": 0.828125, "learning_rate": 1.703306649033e-05, "loss": 0.3817, "step": 6689 }, { "epoch": 0.5063147438626365, "grad_norm": 0.78515625, "learning_rate": 1.7032219717620663e-05, "loss": 0.3303, "step": 6690 }, { "epoch": 0.5063904261860839, "grad_norm": 0.76953125, "learning_rate": 1.7031372845147212e-05, "loss": 0.3222, "step": 6691 }, { "epoch": 0.5064661085095312, "grad_norm": 0.73046875, "learning_rate": 1.703052587292168e-05, "loss": 0.264, "step": 6692 }, { "epoch": 0.5065417908329786, "grad_norm": 0.76171875, "learning_rate": 1.702967880095607e-05, "loss": 0.3297, "step": 6693 }, { "epoch": 0.5066174731564259, "grad_norm": 1.2890625, "learning_rate": 1.70288316292624e-05, "loss": 0.3611, "step": 6694 }, { "epoch": 0.5066931554798733, "grad_norm": 0.78515625, "learning_rate": 1.7027984357852697e-05, "loss": 0.3472, "step": 6695 }, { "epoch": 0.5067688378033206, "grad_norm": 0.77734375, "learning_rate": 1.7027136986738974e-05, "loss": 0.3124, "step": 6696 }, { "epoch": 0.5068445201267678, "grad_norm": 0.78125, "learning_rate": 1.702628951593325e-05, "loss": 0.3248, "step": 6697 }, { "epoch": 0.5069202024502152, "grad_norm": 0.74609375, "learning_rate": 1.7025441945447557e-05, "loss": 0.3065, "step": 6698 }, { "epoch": 0.5069958847736625, "grad_norm": 0.828125, "learning_rate": 1.7024594275293907e-05, "loss": 0.349, "step": 6699 }, { "epoch": 0.5070715670971099, "grad_norm": 0.8359375, "learning_rate": 1.7023746505484337e-05, "loss": 0.3405, "step": 6700 }, { "epoch": 0.5071472494205572, "grad_norm": 1.21875, "learning_rate": 1.7022898636030867e-05, "loss": 0.4052, "step": 6701 }, { "epoch": 0.5072229317440046, "grad_norm": 0.71484375, "learning_rate": 1.7022050666945534e-05, "loss": 0.2768, "step": 6702 }, { "epoch": 0.5072986140674519, "grad_norm": 0.90625, "learning_rate": 1.702120259824036e-05, "loss": 0.3702, "step": 6703 }, { "epoch": 0.5073742963908991, "grad_norm": 0.83203125, "learning_rate": 1.7020354429927376e-05, "loss": 0.369, "step": 6704 }, { "epoch": 0.5074499787143465, "grad_norm": 0.796875, "learning_rate": 1.7019506162018617e-05, "loss": 0.3317, "step": 6705 }, { "epoch": 0.5075256610377938, "grad_norm": 0.74609375, "learning_rate": 1.701865779452612e-05, "loss": 0.3226, "step": 6706 }, { "epoch": 0.5076013433612412, "grad_norm": 0.796875, "learning_rate": 1.7017809327461914e-05, "loss": 0.3527, "step": 6707 }, { "epoch": 0.5076770256846885, "grad_norm": 0.8359375, "learning_rate": 1.7016960760838045e-05, "loss": 0.3396, "step": 6708 }, { "epoch": 0.5077527080081359, "grad_norm": 0.79296875, "learning_rate": 1.701611209466654e-05, "loss": 0.3121, "step": 6709 }, { "epoch": 0.5078283903315832, "grad_norm": 0.7890625, "learning_rate": 1.701526332895945e-05, "loss": 0.3402, "step": 6710 }, { "epoch": 0.5079040726550305, "grad_norm": 0.73828125, "learning_rate": 1.7014414463728806e-05, "loss": 0.3145, "step": 6711 }, { "epoch": 0.5079797549784778, "grad_norm": 0.73828125, "learning_rate": 1.7013565498986662e-05, "loss": 0.3138, "step": 6712 }, { "epoch": 0.5080554373019252, "grad_norm": 0.76171875, "learning_rate": 1.7012716434745052e-05, "loss": 0.3522, "step": 6713 }, { "epoch": 0.5081311196253725, "grad_norm": 0.85546875, "learning_rate": 1.7011867271016023e-05, "loss": 0.4048, "step": 6714 }, { "epoch": 0.5082068019488198, "grad_norm": 0.8125, "learning_rate": 1.701101800781163e-05, "loss": 0.3423, "step": 6715 }, { "epoch": 0.5082824842722672, "grad_norm": 0.76953125, "learning_rate": 1.701016864514391e-05, "loss": 0.335, "step": 6716 }, { "epoch": 0.5083581665957145, "grad_norm": 36.0, "learning_rate": 1.7009319183024916e-05, "loss": 0.4992, "step": 6717 }, { "epoch": 0.5084338489191618, "grad_norm": 0.7578125, "learning_rate": 1.7008469621466708e-05, "loss": 0.3183, "step": 6718 }, { "epoch": 0.5085095312426091, "grad_norm": 0.8203125, "learning_rate": 1.7007619960481326e-05, "loss": 0.354, "step": 6719 }, { "epoch": 0.5085852135660565, "grad_norm": 0.79296875, "learning_rate": 1.700677020008083e-05, "loss": 0.3285, "step": 6720 }, { "epoch": 0.5086608958895038, "grad_norm": 0.7265625, "learning_rate": 1.7005920340277276e-05, "loss": 0.2658, "step": 6721 }, { "epoch": 0.5087365782129512, "grad_norm": 0.71875, "learning_rate": 1.7005070381082724e-05, "loss": 0.2755, "step": 6722 }, { "epoch": 0.5088122605363985, "grad_norm": 0.83203125, "learning_rate": 1.700422032250922e-05, "loss": 0.3215, "step": 6723 }, { "epoch": 0.5088879428598458, "grad_norm": 0.796875, "learning_rate": 1.7003370164568834e-05, "loss": 0.3231, "step": 6724 }, { "epoch": 0.5089636251832931, "grad_norm": 0.7890625, "learning_rate": 1.7002519907273623e-05, "loss": 0.3272, "step": 6725 }, { "epoch": 0.5090393075067404, "grad_norm": 0.8046875, "learning_rate": 1.7001669550635653e-05, "loss": 0.333, "step": 6726 }, { "epoch": 0.5091149898301878, "grad_norm": 0.81640625, "learning_rate": 1.7000819094666984e-05, "loss": 0.3644, "step": 6727 }, { "epoch": 0.5091906721536351, "grad_norm": 0.80078125, "learning_rate": 1.699996853937968e-05, "loss": 0.3472, "step": 6728 }, { "epoch": 0.5092663544770825, "grad_norm": 0.7578125, "learning_rate": 1.6999117884785813e-05, "loss": 0.2749, "step": 6729 }, { "epoch": 0.5093420368005298, "grad_norm": 0.90234375, "learning_rate": 1.6998267130897445e-05, "loss": 0.385, "step": 6730 }, { "epoch": 0.5094177191239772, "grad_norm": 0.84375, "learning_rate": 1.699741627772665e-05, "loss": 0.3757, "step": 6731 }, { "epoch": 0.5094934014474244, "grad_norm": 0.70703125, "learning_rate": 1.69965653252855e-05, "loss": 0.2822, "step": 6732 }, { "epoch": 0.5095690837708717, "grad_norm": 0.94921875, "learning_rate": 1.699571427358606e-05, "loss": 0.312, "step": 6733 }, { "epoch": 0.5096447660943191, "grad_norm": 0.7734375, "learning_rate": 1.6994863122640407e-05, "loss": 0.3312, "step": 6734 }, { "epoch": 0.5097204484177664, "grad_norm": 0.7890625, "learning_rate": 1.699401187246062e-05, "loss": 0.3557, "step": 6735 }, { "epoch": 0.5097961307412138, "grad_norm": 0.83984375, "learning_rate": 1.6993160523058775e-05, "loss": 0.33, "step": 6736 }, { "epoch": 0.5098718130646611, "grad_norm": 0.85546875, "learning_rate": 1.6992309074446943e-05, "loss": 0.3425, "step": 6737 }, { "epoch": 0.5099474953881085, "grad_norm": 40.75, "learning_rate": 1.699145752663721e-05, "loss": 0.3391, "step": 6738 }, { "epoch": 0.5100231777115557, "grad_norm": 0.8046875, "learning_rate": 1.6990605879641652e-05, "loss": 0.3045, "step": 6739 }, { "epoch": 0.510098860035003, "grad_norm": 0.703125, "learning_rate": 1.6989754133472357e-05, "loss": 0.2727, "step": 6740 }, { "epoch": 0.5101745423584504, "grad_norm": 0.79296875, "learning_rate": 1.69889022881414e-05, "loss": 0.3148, "step": 6741 }, { "epoch": 0.5102502246818977, "grad_norm": 0.79296875, "learning_rate": 1.698805034366087e-05, "loss": 0.2984, "step": 6742 }, { "epoch": 0.5103259070053451, "grad_norm": 0.73828125, "learning_rate": 1.6987198300042858e-05, "loss": 0.2968, "step": 6743 }, { "epoch": 0.5104015893287924, "grad_norm": 0.8046875, "learning_rate": 1.6986346157299446e-05, "loss": 0.3172, "step": 6744 }, { "epoch": 0.5104772716522398, "grad_norm": 0.7109375, "learning_rate": 1.6985493915442726e-05, "loss": 0.2746, "step": 6745 }, { "epoch": 0.510552953975687, "grad_norm": 0.7890625, "learning_rate": 1.6984641574484788e-05, "loss": 0.3161, "step": 6746 }, { "epoch": 0.5106286362991344, "grad_norm": 0.73828125, "learning_rate": 1.698378913443772e-05, "loss": 0.3143, "step": 6747 }, { "epoch": 0.5107043186225817, "grad_norm": 0.7734375, "learning_rate": 1.6982936595313622e-05, "loss": 0.2908, "step": 6748 }, { "epoch": 0.510780000946029, "grad_norm": 0.7890625, "learning_rate": 1.6982083957124582e-05, "loss": 0.3341, "step": 6749 }, { "epoch": 0.5108556832694764, "grad_norm": 0.87890625, "learning_rate": 1.69812312198827e-05, "loss": 0.3625, "step": 6750 }, { "epoch": 0.5109313655929237, "grad_norm": 0.7421875, "learning_rate": 1.6980378383600077e-05, "loss": 0.3292, "step": 6751 }, { "epoch": 0.5110070479163711, "grad_norm": 0.8046875, "learning_rate": 1.6979525448288803e-05, "loss": 0.3173, "step": 6752 }, { "epoch": 0.5110827302398183, "grad_norm": 0.71875, "learning_rate": 1.6978672413960984e-05, "loss": 0.3017, "step": 6753 }, { "epoch": 0.5111584125632657, "grad_norm": 0.79296875, "learning_rate": 1.697781928062872e-05, "loss": 0.3001, "step": 6754 }, { "epoch": 0.511234094886713, "grad_norm": 0.796875, "learning_rate": 1.697696604830412e-05, "loss": 0.3318, "step": 6755 }, { "epoch": 0.5113097772101604, "grad_norm": 0.828125, "learning_rate": 1.697611271699928e-05, "loss": 0.3377, "step": 6756 }, { "epoch": 0.5113854595336077, "grad_norm": 0.73828125, "learning_rate": 1.6975259286726312e-05, "loss": 0.3208, "step": 6757 }, { "epoch": 0.511461141857055, "grad_norm": 0.77734375, "learning_rate": 1.6974405757497318e-05, "loss": 0.3226, "step": 6758 }, { "epoch": 0.5115368241805024, "grad_norm": 0.76171875, "learning_rate": 1.6973552129324413e-05, "loss": 0.3206, "step": 6759 }, { "epoch": 0.5116125065039496, "grad_norm": 0.7734375, "learning_rate": 1.69726984022197e-05, "loss": 0.3205, "step": 6760 }, { "epoch": 0.511688188827397, "grad_norm": 0.8046875, "learning_rate": 1.6971844576195297e-05, "loss": 0.301, "step": 6761 }, { "epoch": 0.5117638711508443, "grad_norm": 0.7578125, "learning_rate": 1.6970990651263314e-05, "loss": 0.295, "step": 6762 }, { "epoch": 0.5118395534742917, "grad_norm": 0.78125, "learning_rate": 1.6970136627435868e-05, "loss": 0.339, "step": 6763 }, { "epoch": 0.511915235797739, "grad_norm": 0.78515625, "learning_rate": 1.696928250472507e-05, "loss": 0.3037, "step": 6764 }, { "epoch": 0.5119909181211864, "grad_norm": 0.8984375, "learning_rate": 1.696842828314304e-05, "loss": 0.3799, "step": 6765 }, { "epoch": 0.5120666004446337, "grad_norm": 0.7421875, "learning_rate": 1.69675739627019e-05, "loss": 0.2802, "step": 6766 }, { "epoch": 0.5121422827680809, "grad_norm": 0.7734375, "learning_rate": 1.6966719543413767e-05, "loss": 0.3288, "step": 6767 }, { "epoch": 0.5122179650915283, "grad_norm": 1.5625, "learning_rate": 1.6965865025290758e-05, "loss": 0.4399, "step": 6768 }, { "epoch": 0.5122936474149756, "grad_norm": 0.875, "learning_rate": 1.6965010408345e-05, "loss": 0.3997, "step": 6769 }, { "epoch": 0.512369329738423, "grad_norm": 0.734375, "learning_rate": 1.696415569258862e-05, "loss": 0.3058, "step": 6770 }, { "epoch": 0.5124450120618703, "grad_norm": 0.77734375, "learning_rate": 1.6963300878033737e-05, "loss": 0.3306, "step": 6771 }, { "epoch": 0.5125206943853177, "grad_norm": 0.74609375, "learning_rate": 1.6962445964692483e-05, "loss": 0.3056, "step": 6772 }, { "epoch": 0.512596376708765, "grad_norm": 0.8203125, "learning_rate": 1.6961590952576986e-05, "loss": 0.3247, "step": 6773 }, { "epoch": 0.5126720590322122, "grad_norm": 0.8203125, "learning_rate": 1.6960735841699375e-05, "loss": 0.3535, "step": 6774 }, { "epoch": 0.5127477413556596, "grad_norm": 5.125, "learning_rate": 1.695988063207178e-05, "loss": 0.447, "step": 6775 }, { "epoch": 0.5128234236791069, "grad_norm": 0.76953125, "learning_rate": 1.6959025323706333e-05, "loss": 0.3166, "step": 6776 }, { "epoch": 0.5128991060025543, "grad_norm": 0.7578125, "learning_rate": 1.6958169916615174e-05, "loss": 0.3134, "step": 6777 }, { "epoch": 0.5129747883260016, "grad_norm": 0.8125, "learning_rate": 1.6957314410810433e-05, "loss": 0.3131, "step": 6778 }, { "epoch": 0.513050470649449, "grad_norm": 0.81640625, "learning_rate": 1.6956458806304247e-05, "loss": 0.364, "step": 6779 }, { "epoch": 0.5131261529728963, "grad_norm": 0.7890625, "learning_rate": 1.6955603103108754e-05, "loss": 0.3423, "step": 6780 }, { "epoch": 0.5132018352963436, "grad_norm": 0.7109375, "learning_rate": 1.695474730123609e-05, "loss": 0.2872, "step": 6781 }, { "epoch": 0.5132775176197909, "grad_norm": 0.765625, "learning_rate": 1.695389140069841e-05, "loss": 0.3201, "step": 6782 }, { "epoch": 0.5133531999432382, "grad_norm": 0.72265625, "learning_rate": 1.6953035401507845e-05, "loss": 0.2808, "step": 6783 }, { "epoch": 0.5134288822666856, "grad_norm": 0.80078125, "learning_rate": 1.6952179303676537e-05, "loss": 0.345, "step": 6784 }, { "epoch": 0.5135045645901329, "grad_norm": 0.73046875, "learning_rate": 1.695132310721664e-05, "loss": 0.2985, "step": 6785 }, { "epoch": 0.5135802469135803, "grad_norm": 0.765625, "learning_rate": 1.6950466812140292e-05, "loss": 0.324, "step": 6786 }, { "epoch": 0.5136559292370276, "grad_norm": 0.703125, "learning_rate": 1.6949610418459648e-05, "loss": 0.2703, "step": 6787 }, { "epoch": 0.5137316115604749, "grad_norm": 0.7109375, "learning_rate": 1.6948753926186852e-05, "loss": 0.2919, "step": 6788 }, { "epoch": 0.5138072938839222, "grad_norm": 0.75390625, "learning_rate": 1.6947897335334058e-05, "loss": 0.31, "step": 6789 }, { "epoch": 0.5138829762073696, "grad_norm": 0.8203125, "learning_rate": 1.694704064591342e-05, "loss": 0.3675, "step": 6790 }, { "epoch": 0.5139586585308169, "grad_norm": 0.8046875, "learning_rate": 1.694618385793708e-05, "loss": 0.3479, "step": 6791 }, { "epoch": 0.5140343408542642, "grad_norm": 0.83984375, "learning_rate": 1.694532697141721e-05, "loss": 0.3654, "step": 6792 }, { "epoch": 0.5141100231777116, "grad_norm": 0.74609375, "learning_rate": 1.694446998636596e-05, "loss": 0.3091, "step": 6793 }, { "epoch": 0.5141857055011589, "grad_norm": 0.88671875, "learning_rate": 1.694361290279548e-05, "loss": 0.3221, "step": 6794 }, { "epoch": 0.5142613878246062, "grad_norm": 0.8203125, "learning_rate": 1.694275572071794e-05, "loss": 0.3538, "step": 6795 }, { "epoch": 0.5143370701480535, "grad_norm": 0.7265625, "learning_rate": 1.694189844014549e-05, "loss": 0.275, "step": 6796 }, { "epoch": 0.5144127524715009, "grad_norm": 0.72265625, "learning_rate": 1.6941041061090305e-05, "loss": 0.2879, "step": 6797 }, { "epoch": 0.5144884347949482, "grad_norm": 0.81640625, "learning_rate": 1.6940183583564536e-05, "loss": 0.3507, "step": 6798 }, { "epoch": 0.5145641171183956, "grad_norm": 0.765625, "learning_rate": 1.6939326007580355e-05, "loss": 0.3244, "step": 6799 }, { "epoch": 0.5146397994418429, "grad_norm": 0.7578125, "learning_rate": 1.6938468333149923e-05, "loss": 0.3303, "step": 6800 }, { "epoch": 0.5147154817652901, "grad_norm": 0.796875, "learning_rate": 1.693761056028542e-05, "loss": 0.3519, "step": 6801 }, { "epoch": 0.5147911640887375, "grad_norm": 0.83984375, "learning_rate": 1.6936752688998997e-05, "loss": 0.3598, "step": 6802 }, { "epoch": 0.5148668464121848, "grad_norm": 0.83203125, "learning_rate": 1.6935894719302837e-05, "loss": 0.3923, "step": 6803 }, { "epoch": 0.5149425287356322, "grad_norm": 0.83203125, "learning_rate": 1.6935036651209106e-05, "loss": 0.3287, "step": 6804 }, { "epoch": 0.5150182110590795, "grad_norm": 0.8203125, "learning_rate": 1.693417848472998e-05, "loss": 0.3614, "step": 6805 }, { "epoch": 0.5150938933825269, "grad_norm": 0.75, "learning_rate": 1.6933320219877634e-05, "loss": 0.3029, "step": 6806 }, { "epoch": 0.5151695757059742, "grad_norm": 0.80859375, "learning_rate": 1.693246185666424e-05, "loss": 0.3642, "step": 6807 }, { "epoch": 0.5152452580294214, "grad_norm": 0.7734375, "learning_rate": 1.693160339510198e-05, "loss": 0.3387, "step": 6808 }, { "epoch": 0.5153209403528688, "grad_norm": 0.78125, "learning_rate": 1.6930744835203032e-05, "loss": 0.3366, "step": 6809 }, { "epoch": 0.5153966226763161, "grad_norm": 0.76171875, "learning_rate": 1.6929886176979576e-05, "loss": 0.2829, "step": 6810 }, { "epoch": 0.5154723049997635, "grad_norm": 0.74609375, "learning_rate": 1.6929027420443788e-05, "loss": 0.32, "step": 6811 }, { "epoch": 0.5155479873232108, "grad_norm": 0.7265625, "learning_rate": 1.6928168565607857e-05, "loss": 0.2742, "step": 6812 }, { "epoch": 0.5156236696466582, "grad_norm": 1.3984375, "learning_rate": 1.6927309612483965e-05, "loss": 0.3801, "step": 6813 }, { "epoch": 0.5156993519701055, "grad_norm": 0.73046875, "learning_rate": 1.6926450561084302e-05, "loss": 0.2909, "step": 6814 }, { "epoch": 0.5157750342935528, "grad_norm": 0.70703125, "learning_rate": 1.6925591411421048e-05, "loss": 0.2753, "step": 6815 }, { "epoch": 0.5158507166170001, "grad_norm": 0.8359375, "learning_rate": 1.69247321635064e-05, "loss": 0.3279, "step": 6816 }, { "epoch": 0.5159263989404475, "grad_norm": 0.83203125, "learning_rate": 1.692387281735254e-05, "loss": 0.3548, "step": 6817 }, { "epoch": 0.5160020812638948, "grad_norm": 0.74609375, "learning_rate": 1.692301337297166e-05, "loss": 0.3038, "step": 6818 }, { "epoch": 0.5160777635873421, "grad_norm": 0.79296875, "learning_rate": 1.6922153830375956e-05, "loss": 0.3332, "step": 6819 }, { "epoch": 0.5161534459107895, "grad_norm": 0.7734375, "learning_rate": 1.6921294189577622e-05, "loss": 0.3461, "step": 6820 }, { "epoch": 0.5162291282342368, "grad_norm": 0.8046875, "learning_rate": 1.6920434450588854e-05, "loss": 0.3845, "step": 6821 }, { "epoch": 0.5163048105576841, "grad_norm": 0.79296875, "learning_rate": 1.6919574613421845e-05, "loss": 0.3094, "step": 6822 }, { "epoch": 0.5163804928811314, "grad_norm": 0.75390625, "learning_rate": 1.6918714678088797e-05, "loss": 0.3071, "step": 6823 }, { "epoch": 0.5164561752045788, "grad_norm": 0.7890625, "learning_rate": 1.691785464460191e-05, "loss": 0.3144, "step": 6824 }, { "epoch": 0.5165318575280261, "grad_norm": 0.77734375, "learning_rate": 1.691699451297338e-05, "loss": 0.3301, "step": 6825 }, { "epoch": 0.5166075398514735, "grad_norm": 0.71875, "learning_rate": 1.6916134283215412e-05, "loss": 0.2758, "step": 6826 }, { "epoch": 0.5166832221749208, "grad_norm": 0.76953125, "learning_rate": 1.6915273955340214e-05, "loss": 0.3169, "step": 6827 }, { "epoch": 0.5167589044983681, "grad_norm": 0.734375, "learning_rate": 1.6914413529359987e-05, "loss": 0.2957, "step": 6828 }, { "epoch": 0.5168345868218154, "grad_norm": 0.765625, "learning_rate": 1.6913553005286937e-05, "loss": 0.3384, "step": 6829 }, { "epoch": 0.5169102691452627, "grad_norm": 0.81640625, "learning_rate": 1.6912692383133276e-05, "loss": 0.3265, "step": 6830 }, { "epoch": 0.5169859514687101, "grad_norm": 0.734375, "learning_rate": 1.691183166291121e-05, "loss": 0.2836, "step": 6831 }, { "epoch": 0.5170616337921574, "grad_norm": 0.7578125, "learning_rate": 1.691097084463295e-05, "loss": 0.2907, "step": 6832 }, { "epoch": 0.5171373161156048, "grad_norm": 0.84765625, "learning_rate": 1.691010992831071e-05, "loss": 0.3721, "step": 6833 }, { "epoch": 0.5172129984390521, "grad_norm": 0.76171875, "learning_rate": 1.69092489139567e-05, "loss": 0.301, "step": 6834 }, { "epoch": 0.5172886807624995, "grad_norm": 0.76171875, "learning_rate": 1.690838780158314e-05, "loss": 0.2917, "step": 6835 }, { "epoch": 0.5173643630859467, "grad_norm": 0.828125, "learning_rate": 1.6907526591202246e-05, "loss": 0.2912, "step": 6836 }, { "epoch": 0.517440045409394, "grad_norm": 0.7109375, "learning_rate": 1.690666528282623e-05, "loss": 0.2777, "step": 6837 }, { "epoch": 0.5175157277328414, "grad_norm": 0.7734375, "learning_rate": 1.6905803876467317e-05, "loss": 0.3051, "step": 6838 }, { "epoch": 0.5175914100562887, "grad_norm": 0.7421875, "learning_rate": 1.6904942372137725e-05, "loss": 0.3078, "step": 6839 }, { "epoch": 0.5176670923797361, "grad_norm": 0.6953125, "learning_rate": 1.6904080769849674e-05, "loss": 0.2989, "step": 6840 }, { "epoch": 0.5177427747031834, "grad_norm": 0.734375, "learning_rate": 1.6903219069615388e-05, "loss": 0.2814, "step": 6841 }, { "epoch": 0.5178184570266308, "grad_norm": 0.7734375, "learning_rate": 1.69023572714471e-05, "loss": 0.3152, "step": 6842 }, { "epoch": 0.517894139350078, "grad_norm": 0.8203125, "learning_rate": 1.6901495375357023e-05, "loss": 0.3488, "step": 6843 }, { "epoch": 0.5179698216735253, "grad_norm": 0.71875, "learning_rate": 1.6900633381357393e-05, "loss": 0.2875, "step": 6844 }, { "epoch": 0.5180455039969727, "grad_norm": 0.80859375, "learning_rate": 1.6899771289460436e-05, "loss": 0.3534, "step": 6845 }, { "epoch": 0.51812118632042, "grad_norm": 0.765625, "learning_rate": 1.6898909099678383e-05, "loss": 0.3212, "step": 6846 }, { "epoch": 0.5181968686438674, "grad_norm": 1.265625, "learning_rate": 1.6898046812023465e-05, "loss": 0.3683, "step": 6847 }, { "epoch": 0.5182725509673147, "grad_norm": 0.7578125, "learning_rate": 1.6897184426507917e-05, "loss": 0.3088, "step": 6848 }, { "epoch": 0.5183482332907621, "grad_norm": 0.78125, "learning_rate": 1.689632194314397e-05, "loss": 0.3285, "step": 6849 }, { "epoch": 0.5184239156142093, "grad_norm": 0.8125, "learning_rate": 1.6895459361943865e-05, "loss": 0.3716, "step": 6850 }, { "epoch": 0.5184995979376567, "grad_norm": 0.7578125, "learning_rate": 1.6894596682919834e-05, "loss": 0.3155, "step": 6851 }, { "epoch": 0.518575280261104, "grad_norm": 0.78515625, "learning_rate": 1.6893733906084118e-05, "loss": 0.3417, "step": 6852 }, { "epoch": 0.5186509625845513, "grad_norm": 0.84765625, "learning_rate": 1.6892871031448957e-05, "loss": 0.3564, "step": 6853 }, { "epoch": 0.5187266449079987, "grad_norm": 0.81640625, "learning_rate": 1.6892008059026587e-05, "loss": 0.3638, "step": 6854 }, { "epoch": 0.518802327231446, "grad_norm": 0.7734375, "learning_rate": 1.689114498882926e-05, "loss": 0.3638, "step": 6855 }, { "epoch": 0.5188780095548934, "grad_norm": 0.71875, "learning_rate": 1.6890281820869215e-05, "loss": 0.2957, "step": 6856 }, { "epoch": 0.5189536918783406, "grad_norm": 0.71875, "learning_rate": 1.68894185551587e-05, "loss": 0.3024, "step": 6857 }, { "epoch": 0.519029374201788, "grad_norm": 0.76953125, "learning_rate": 1.688855519170996e-05, "loss": 0.349, "step": 6858 }, { "epoch": 0.5191050565252353, "grad_norm": 0.74609375, "learning_rate": 1.6887691730535237e-05, "loss": 0.3213, "step": 6859 }, { "epoch": 0.5191807388486827, "grad_norm": 0.828125, "learning_rate": 1.688682817164679e-05, "loss": 0.3621, "step": 6860 }, { "epoch": 0.51925642117213, "grad_norm": 0.73828125, "learning_rate": 1.688596451505687e-05, "loss": 0.3153, "step": 6861 }, { "epoch": 0.5193321034955773, "grad_norm": 0.70703125, "learning_rate": 1.688510076077773e-05, "loss": 0.2621, "step": 6862 }, { "epoch": 0.5194077858190247, "grad_norm": 0.7734375, "learning_rate": 1.6884236908821617e-05, "loss": 0.3207, "step": 6863 }, { "epoch": 0.5194834681424719, "grad_norm": 0.81640625, "learning_rate": 1.6883372959200786e-05, "loss": 0.3628, "step": 6864 }, { "epoch": 0.5195591504659193, "grad_norm": 0.7890625, "learning_rate": 1.68825089119275e-05, "loss": 0.3414, "step": 6865 }, { "epoch": 0.5196348327893666, "grad_norm": 0.6875, "learning_rate": 1.6881644767014017e-05, "loss": 0.2913, "step": 6866 }, { "epoch": 0.519710515112814, "grad_norm": 0.87890625, "learning_rate": 1.688078052447259e-05, "loss": 0.406, "step": 6867 }, { "epoch": 0.5197861974362613, "grad_norm": 0.7421875, "learning_rate": 1.6879916184315485e-05, "loss": 0.3074, "step": 6868 }, { "epoch": 0.5198618797597087, "grad_norm": 0.77734375, "learning_rate": 1.6879051746554963e-05, "loss": 0.3153, "step": 6869 }, { "epoch": 0.519937562083156, "grad_norm": 0.68359375, "learning_rate": 1.6878187211203286e-05, "loss": 0.2873, "step": 6870 }, { "epoch": 0.5200132444066032, "grad_norm": 0.80078125, "learning_rate": 1.6877322578272722e-05, "loss": 0.3491, "step": 6871 }, { "epoch": 0.5200889267300506, "grad_norm": 0.875, "learning_rate": 1.687645784777553e-05, "loss": 0.3768, "step": 6872 }, { "epoch": 0.5201646090534979, "grad_norm": 0.78515625, "learning_rate": 1.6875593019723988e-05, "loss": 0.3297, "step": 6873 }, { "epoch": 0.5202402913769453, "grad_norm": 0.8046875, "learning_rate": 1.687472809413036e-05, "loss": 0.3599, "step": 6874 }, { "epoch": 0.5203159737003926, "grad_norm": 0.8828125, "learning_rate": 1.6873863071006914e-05, "loss": 0.3552, "step": 6875 }, { "epoch": 0.52039165602384, "grad_norm": 0.7578125, "learning_rate": 1.6872997950365926e-05, "loss": 0.3167, "step": 6876 }, { "epoch": 0.5204673383472873, "grad_norm": 0.78125, "learning_rate": 1.687213273221967e-05, "loss": 0.3285, "step": 6877 }, { "epoch": 0.5205430206707345, "grad_norm": 0.82421875, "learning_rate": 1.687126741658041e-05, "loss": 0.3593, "step": 6878 }, { "epoch": 0.5206187029941819, "grad_norm": 0.7265625, "learning_rate": 1.687040200346044e-05, "loss": 0.2754, "step": 6879 }, { "epoch": 0.5206943853176292, "grad_norm": 0.75390625, "learning_rate": 1.6869536492872023e-05, "loss": 0.3092, "step": 6880 }, { "epoch": 0.5207700676410766, "grad_norm": 0.83203125, "learning_rate": 1.6868670884827443e-05, "loss": 0.387, "step": 6881 }, { "epoch": 0.5208457499645239, "grad_norm": 0.86328125, "learning_rate": 1.6867805179338977e-05, "loss": 0.3467, "step": 6882 }, { "epoch": 0.5209214322879713, "grad_norm": 0.8359375, "learning_rate": 1.686693937641891e-05, "loss": 0.3685, "step": 6883 }, { "epoch": 0.5209971146114186, "grad_norm": 0.859375, "learning_rate": 1.6866073476079524e-05, "loss": 0.355, "step": 6884 }, { "epoch": 0.5210727969348659, "grad_norm": 0.76953125, "learning_rate": 1.6865207478333107e-05, "loss": 0.3289, "step": 6885 }, { "epoch": 0.5211484792583132, "grad_norm": 0.77734375, "learning_rate": 1.6864341383191938e-05, "loss": 0.3113, "step": 6886 }, { "epoch": 0.5212241615817605, "grad_norm": 0.83984375, "learning_rate": 1.6863475190668306e-05, "loss": 0.3818, "step": 6887 }, { "epoch": 0.5212998439052079, "grad_norm": 0.8203125, "learning_rate": 1.68626089007745e-05, "loss": 0.3702, "step": 6888 }, { "epoch": 0.5213755262286552, "grad_norm": 0.80078125, "learning_rate": 1.686174251352281e-05, "loss": 0.3403, "step": 6889 }, { "epoch": 0.5214512085521026, "grad_norm": 0.765625, "learning_rate": 1.686087602892553e-05, "loss": 0.3081, "step": 6890 }, { "epoch": 0.5215268908755499, "grad_norm": 0.71875, "learning_rate": 1.686000944699495e-05, "loss": 0.2793, "step": 6891 }, { "epoch": 0.5216025731989972, "grad_norm": 0.8125, "learning_rate": 1.6859142767743363e-05, "loss": 0.329, "step": 6892 }, { "epoch": 0.5216782555224445, "grad_norm": 0.765625, "learning_rate": 1.6858275991183063e-05, "loss": 0.3283, "step": 6893 }, { "epoch": 0.5217539378458919, "grad_norm": 0.77734375, "learning_rate": 1.685740911732635e-05, "loss": 0.336, "step": 6894 }, { "epoch": 0.5218296201693392, "grad_norm": 0.74609375, "learning_rate": 1.6856542146185522e-05, "loss": 0.3314, "step": 6895 }, { "epoch": 0.5219053024927865, "grad_norm": 0.8203125, "learning_rate": 1.6855675077772875e-05, "loss": 0.3381, "step": 6896 }, { "epoch": 0.5219809848162339, "grad_norm": 0.79296875, "learning_rate": 1.6854807912100716e-05, "loss": 0.3464, "step": 6897 }, { "epoch": 0.5220566671396812, "grad_norm": 0.80859375, "learning_rate": 1.685394064918134e-05, "loss": 0.367, "step": 6898 }, { "epoch": 0.5221323494631285, "grad_norm": 0.7421875, "learning_rate": 1.6853073289027055e-05, "loss": 0.3097, "step": 6899 }, { "epoch": 0.5222080317865758, "grad_norm": 0.75390625, "learning_rate": 1.6852205831650166e-05, "loss": 0.3428, "step": 6900 }, { "epoch": 0.5222837141100232, "grad_norm": 0.84375, "learning_rate": 1.685133827706298e-05, "loss": 0.3719, "step": 6901 }, { "epoch": 0.5223593964334705, "grad_norm": 0.74609375, "learning_rate": 1.68504706252778e-05, "loss": 0.3151, "step": 6902 }, { "epoch": 0.5224350787569179, "grad_norm": 0.7890625, "learning_rate": 1.684960287630694e-05, "loss": 0.325, "step": 6903 }, { "epoch": 0.5225107610803652, "grad_norm": 0.7890625, "learning_rate": 1.6848735030162707e-05, "loss": 0.366, "step": 6904 }, { "epoch": 0.5225864434038126, "grad_norm": 0.75, "learning_rate": 1.6847867086857416e-05, "loss": 0.2924, "step": 6905 }, { "epoch": 0.5226621257272598, "grad_norm": 0.81640625, "learning_rate": 1.684699904640338e-05, "loss": 0.3516, "step": 6906 }, { "epoch": 0.5227378080507071, "grad_norm": 0.75390625, "learning_rate": 1.684613090881291e-05, "loss": 0.2948, "step": 6907 }, { "epoch": 0.5228134903741545, "grad_norm": 0.74609375, "learning_rate": 1.684526267409833e-05, "loss": 0.3247, "step": 6908 }, { "epoch": 0.5228891726976018, "grad_norm": 0.7734375, "learning_rate": 1.6844394342271944e-05, "loss": 0.3141, "step": 6909 }, { "epoch": 0.5229648550210492, "grad_norm": 0.82421875, "learning_rate": 1.6843525913346087e-05, "loss": 0.3762, "step": 6910 }, { "epoch": 0.5230405373444965, "grad_norm": 0.78125, "learning_rate": 1.6842657387333065e-05, "loss": 0.2964, "step": 6911 }, { "epoch": 0.5231162196679439, "grad_norm": 0.80859375, "learning_rate": 1.684178876424521e-05, "loss": 0.3395, "step": 6912 }, { "epoch": 0.5231919019913911, "grad_norm": 0.73828125, "learning_rate": 1.684092004409484e-05, "loss": 0.3036, "step": 6913 }, { "epoch": 0.5232675843148384, "grad_norm": 0.8125, "learning_rate": 1.684005122689428e-05, "loss": 0.3463, "step": 6914 }, { "epoch": 0.5233432666382858, "grad_norm": 0.8046875, "learning_rate": 1.6839182312655853e-05, "loss": 0.3422, "step": 6915 }, { "epoch": 0.5234189489617331, "grad_norm": 0.73046875, "learning_rate": 1.6838313301391887e-05, "loss": 0.3038, "step": 6916 }, { "epoch": 0.5234946312851805, "grad_norm": 0.78515625, "learning_rate": 1.6837444193114712e-05, "loss": 0.3269, "step": 6917 }, { "epoch": 0.5235703136086278, "grad_norm": 0.7421875, "learning_rate": 1.6836574987836663e-05, "loss": 0.2799, "step": 6918 }, { "epoch": 0.5236459959320752, "grad_norm": 0.84765625, "learning_rate": 1.6835705685570062e-05, "loss": 0.3708, "step": 6919 }, { "epoch": 0.5237216782555224, "grad_norm": 0.8359375, "learning_rate": 1.6834836286327245e-05, "loss": 0.3436, "step": 6920 }, { "epoch": 0.5237973605789698, "grad_norm": 0.83203125, "learning_rate": 1.6833966790120548e-05, "loss": 0.3864, "step": 6921 }, { "epoch": 0.5238730429024171, "grad_norm": 0.88671875, "learning_rate": 1.6833097196962304e-05, "loss": 0.4003, "step": 6922 }, { "epoch": 0.5239487252258644, "grad_norm": 0.77734375, "learning_rate": 1.6832227506864853e-05, "loss": 0.3463, "step": 6923 }, { "epoch": 0.5240244075493118, "grad_norm": 0.6875, "learning_rate": 1.6831357719840523e-05, "loss": 0.2448, "step": 6924 }, { "epoch": 0.5241000898727591, "grad_norm": 0.796875, "learning_rate": 1.683048783590167e-05, "loss": 0.3582, "step": 6925 }, { "epoch": 0.5241757721962064, "grad_norm": 0.75, "learning_rate": 1.682961785506062e-05, "loss": 0.2914, "step": 6926 }, { "epoch": 0.5242514545196537, "grad_norm": 0.77734375, "learning_rate": 1.682874777732972e-05, "loss": 0.3321, "step": 6927 }, { "epoch": 0.5243271368431011, "grad_norm": 0.7421875, "learning_rate": 1.6827877602721316e-05, "loss": 0.2925, "step": 6928 }, { "epoch": 0.5244028191665484, "grad_norm": 0.96484375, "learning_rate": 1.682700733124775e-05, "loss": 0.299, "step": 6929 }, { "epoch": 0.5244785014899958, "grad_norm": 0.7890625, "learning_rate": 1.6826136962921373e-05, "loss": 0.3325, "step": 6930 }, { "epoch": 0.5245541838134431, "grad_norm": 0.85546875, "learning_rate": 1.6825266497754524e-05, "loss": 0.3733, "step": 6931 }, { "epoch": 0.5246298661368904, "grad_norm": 0.71484375, "learning_rate": 1.682439593575956e-05, "loss": 0.2644, "step": 6932 }, { "epoch": 0.5247055484603377, "grad_norm": 0.890625, "learning_rate": 1.6823525276948827e-05, "loss": 0.3683, "step": 6933 }, { "epoch": 0.524781230783785, "grad_norm": 0.83203125, "learning_rate": 1.682265452133468e-05, "loss": 0.3687, "step": 6934 }, { "epoch": 0.5248569131072324, "grad_norm": 0.8359375, "learning_rate": 1.6821783668929473e-05, "loss": 0.3729, "step": 6935 }, { "epoch": 0.5249325954306797, "grad_norm": 0.81640625, "learning_rate": 1.6820912719745555e-05, "loss": 0.2999, "step": 6936 }, { "epoch": 0.5250082777541271, "grad_norm": 0.7265625, "learning_rate": 1.6820041673795282e-05, "loss": 0.2859, "step": 6937 }, { "epoch": 0.5250839600775744, "grad_norm": 0.79296875, "learning_rate": 1.6819170531091018e-05, "loss": 0.3228, "step": 6938 }, { "epoch": 0.5251596424010218, "grad_norm": 0.765625, "learning_rate": 1.6818299291645118e-05, "loss": 0.2904, "step": 6939 }, { "epoch": 0.525235324724469, "grad_norm": 0.83984375, "learning_rate": 1.681742795546994e-05, "loss": 0.4091, "step": 6940 }, { "epoch": 0.5253110070479163, "grad_norm": 0.80859375, "learning_rate": 1.6816556522577848e-05, "loss": 0.3488, "step": 6941 }, { "epoch": 0.5253866893713637, "grad_norm": 0.8046875, "learning_rate": 1.681568499298121e-05, "loss": 0.3388, "step": 6942 }, { "epoch": 0.525462371694811, "grad_norm": 0.78515625, "learning_rate": 1.6814813366692377e-05, "loss": 0.3437, "step": 6943 }, { "epoch": 0.5255380540182584, "grad_norm": 0.8046875, "learning_rate": 1.6813941643723722e-05, "loss": 0.3406, "step": 6944 }, { "epoch": 0.5256137363417057, "grad_norm": 0.84765625, "learning_rate": 1.6813069824087613e-05, "loss": 0.4133, "step": 6945 }, { "epoch": 0.5256894186651531, "grad_norm": 0.7265625, "learning_rate": 1.681219790779642e-05, "loss": 0.3135, "step": 6946 }, { "epoch": 0.5257651009886003, "grad_norm": 0.7265625, "learning_rate": 1.6811325894862504e-05, "loss": 0.2962, "step": 6947 }, { "epoch": 0.5258407833120476, "grad_norm": 0.7734375, "learning_rate": 1.6810453785298248e-05, "loss": 0.3253, "step": 6948 }, { "epoch": 0.525916465635495, "grad_norm": 0.76171875, "learning_rate": 1.6809581579116013e-05, "loss": 0.3105, "step": 6949 }, { "epoch": 0.5259921479589423, "grad_norm": 0.81640625, "learning_rate": 1.680870927632818e-05, "loss": 0.3606, "step": 6950 }, { "epoch": 0.5260678302823897, "grad_norm": 0.7890625, "learning_rate": 1.680783687694712e-05, "loss": 0.3378, "step": 6951 }, { "epoch": 0.526143512605837, "grad_norm": 0.76171875, "learning_rate": 1.680696438098521e-05, "loss": 0.3292, "step": 6952 }, { "epoch": 0.5262191949292844, "grad_norm": 0.79296875, "learning_rate": 1.6806091788454832e-05, "loss": 0.3207, "step": 6953 }, { "epoch": 0.5262948772527316, "grad_norm": 0.76171875, "learning_rate": 1.680521909936836e-05, "loss": 0.3187, "step": 6954 }, { "epoch": 0.526370559576179, "grad_norm": 0.703125, "learning_rate": 1.680434631373818e-05, "loss": 0.2701, "step": 6955 }, { "epoch": 0.5264462418996263, "grad_norm": 0.70703125, "learning_rate": 1.6803473431576672e-05, "loss": 0.277, "step": 6956 }, { "epoch": 0.5265219242230736, "grad_norm": 0.74609375, "learning_rate": 1.6802600452896215e-05, "loss": 0.3142, "step": 6957 }, { "epoch": 0.526597606546521, "grad_norm": 0.828125, "learning_rate": 1.6801727377709195e-05, "loss": 0.3487, "step": 6958 }, { "epoch": 0.5266732888699683, "grad_norm": 0.73046875, "learning_rate": 1.6800854206028003e-05, "loss": 0.2875, "step": 6959 }, { "epoch": 0.5267489711934157, "grad_norm": 0.80078125, "learning_rate": 1.679998093786502e-05, "loss": 0.3273, "step": 6960 }, { "epoch": 0.5268246535168629, "grad_norm": 0.79296875, "learning_rate": 1.6799107573232643e-05, "loss": 0.331, "step": 6961 }, { "epoch": 0.5269003358403103, "grad_norm": 0.765625, "learning_rate": 1.6798234112143257e-05, "loss": 0.3197, "step": 6962 }, { "epoch": 0.5269760181637576, "grad_norm": 0.796875, "learning_rate": 1.679736055460925e-05, "loss": 0.3604, "step": 6963 }, { "epoch": 0.527051700487205, "grad_norm": 0.7421875, "learning_rate": 1.6796486900643024e-05, "loss": 0.3219, "step": 6964 }, { "epoch": 0.5271273828106523, "grad_norm": 0.78125, "learning_rate": 1.6795613150256965e-05, "loss": 0.3308, "step": 6965 }, { "epoch": 0.5272030651340996, "grad_norm": 1.015625, "learning_rate": 1.679473930346347e-05, "loss": 0.3454, "step": 6966 }, { "epoch": 0.527278747457547, "grad_norm": 0.7421875, "learning_rate": 1.679386536027494e-05, "loss": 0.2977, "step": 6967 }, { "epoch": 0.5273544297809942, "grad_norm": 0.83984375, "learning_rate": 1.6792991320703772e-05, "loss": 0.3312, "step": 6968 }, { "epoch": 0.5274301121044416, "grad_norm": 1.21875, "learning_rate": 1.6792117184762363e-05, "loss": 0.3879, "step": 6969 }, { "epoch": 0.5275057944278889, "grad_norm": 0.734375, "learning_rate": 1.679124295246312e-05, "loss": 0.2854, "step": 6970 }, { "epoch": 0.5275814767513363, "grad_norm": 0.78125, "learning_rate": 1.6790368623818438e-05, "loss": 0.325, "step": 6971 }, { "epoch": 0.5276571590747836, "grad_norm": 0.77734375, "learning_rate": 1.6789494198840723e-05, "loss": 0.3202, "step": 6972 }, { "epoch": 0.527732841398231, "grad_norm": 0.7265625, "learning_rate": 1.6788619677542385e-05, "loss": 0.2869, "step": 6973 }, { "epoch": 0.5278085237216783, "grad_norm": 0.703125, "learning_rate": 1.6787745059935826e-05, "loss": 0.2707, "step": 6974 }, { "epoch": 0.5278842060451255, "grad_norm": 0.82421875, "learning_rate": 1.678687034603346e-05, "loss": 0.3409, "step": 6975 }, { "epoch": 0.5279598883685729, "grad_norm": 0.80078125, "learning_rate": 1.678599553584768e-05, "loss": 0.3727, "step": 6976 }, { "epoch": 0.5280355706920202, "grad_norm": 0.7734375, "learning_rate": 1.6785120629390916e-05, "loss": 0.3307, "step": 6977 }, { "epoch": 0.5281112530154676, "grad_norm": 0.8125, "learning_rate": 1.6784245626675572e-05, "loss": 0.3176, "step": 6978 }, { "epoch": 0.5281869353389149, "grad_norm": 0.82421875, "learning_rate": 1.678337052771406e-05, "loss": 0.3625, "step": 6979 }, { "epoch": 0.5282626176623623, "grad_norm": 0.765625, "learning_rate": 1.67824953325188e-05, "loss": 0.3163, "step": 6980 }, { "epoch": 0.5283382999858096, "grad_norm": 0.76171875, "learning_rate": 1.67816200411022e-05, "loss": 0.2785, "step": 6981 }, { "epoch": 0.5284139823092568, "grad_norm": 0.76953125, "learning_rate": 1.6780744653476683e-05, "loss": 0.3131, "step": 6982 }, { "epoch": 0.5284896646327042, "grad_norm": 0.73046875, "learning_rate": 1.677986916965467e-05, "loss": 0.3045, "step": 6983 }, { "epoch": 0.5285653469561515, "grad_norm": 0.80078125, "learning_rate": 1.677899358964857e-05, "loss": 0.3649, "step": 6984 }, { "epoch": 0.5286410292795989, "grad_norm": 0.6953125, "learning_rate": 1.677811791347082e-05, "loss": 0.2837, "step": 6985 }, { "epoch": 0.5287167116030462, "grad_norm": 0.69140625, "learning_rate": 1.6777242141133837e-05, "loss": 0.2717, "step": 6986 }, { "epoch": 0.5287923939264936, "grad_norm": 0.765625, "learning_rate": 1.6776366272650037e-05, "loss": 0.3349, "step": 6987 }, { "epoch": 0.5288680762499409, "grad_norm": 0.7890625, "learning_rate": 1.6775490308031857e-05, "loss": 0.3594, "step": 6988 }, { "epoch": 0.5289437585733882, "grad_norm": 0.80078125, "learning_rate": 1.677461424729172e-05, "loss": 0.3297, "step": 6989 }, { "epoch": 0.5290194408968355, "grad_norm": 0.75, "learning_rate": 1.6773738090442057e-05, "loss": 0.3213, "step": 6990 }, { "epoch": 0.5290951232202828, "grad_norm": 0.78125, "learning_rate": 1.6772861837495292e-05, "loss": 0.3409, "step": 6991 }, { "epoch": 0.5291708055437302, "grad_norm": 0.734375, "learning_rate": 1.677198548846386e-05, "loss": 0.3168, "step": 6992 }, { "epoch": 0.5292464878671775, "grad_norm": 0.7578125, "learning_rate": 1.6771109043360193e-05, "loss": 0.3299, "step": 6993 }, { "epoch": 0.5293221701906249, "grad_norm": 0.8125, "learning_rate": 1.6770232502196722e-05, "loss": 0.3525, "step": 6994 }, { "epoch": 0.5293978525140722, "grad_norm": 0.75, "learning_rate": 1.6769355864985888e-05, "loss": 0.3195, "step": 6995 }, { "epoch": 0.5294735348375195, "grad_norm": 0.73046875, "learning_rate": 1.676847913174012e-05, "loss": 0.2938, "step": 6996 }, { "epoch": 0.5295492171609668, "grad_norm": 0.77734375, "learning_rate": 1.6767602302471865e-05, "loss": 0.2696, "step": 6997 }, { "epoch": 0.5296248994844142, "grad_norm": 0.6796875, "learning_rate": 1.6766725377193558e-05, "loss": 0.2648, "step": 6998 }, { "epoch": 0.5297005818078615, "grad_norm": 0.81640625, "learning_rate": 1.6765848355917636e-05, "loss": 0.3308, "step": 6999 }, { "epoch": 0.5297762641313088, "grad_norm": 0.73046875, "learning_rate": 1.6764971238656548e-05, "loss": 0.2939, "step": 7000 }, { "epoch": 0.5298519464547562, "grad_norm": 0.765625, "learning_rate": 1.676409402542273e-05, "loss": 0.3226, "step": 7001 }, { "epoch": 0.5299276287782035, "grad_norm": 0.765625, "learning_rate": 1.6763216716228634e-05, "loss": 0.3345, "step": 7002 }, { "epoch": 0.5300033111016508, "grad_norm": 0.82421875, "learning_rate": 1.67623393110867e-05, "loss": 0.3512, "step": 7003 }, { "epoch": 0.5300789934250981, "grad_norm": 0.7890625, "learning_rate": 1.676146181000938e-05, "loss": 0.3377, "step": 7004 }, { "epoch": 0.5301546757485455, "grad_norm": 0.77734375, "learning_rate": 1.6760584213009118e-05, "loss": 0.3365, "step": 7005 }, { "epoch": 0.5302303580719928, "grad_norm": 1.0703125, "learning_rate": 1.6759706520098373e-05, "loss": 0.3373, "step": 7006 }, { "epoch": 0.5303060403954402, "grad_norm": 0.71484375, "learning_rate": 1.6758828731289588e-05, "loss": 0.2955, "step": 7007 }, { "epoch": 0.5303817227188875, "grad_norm": 0.828125, "learning_rate": 1.675795084659522e-05, "loss": 0.3369, "step": 7008 }, { "epoch": 0.5304574050423349, "grad_norm": 0.796875, "learning_rate": 1.6757072866027718e-05, "loss": 0.2983, "step": 7009 }, { "epoch": 0.5305330873657821, "grad_norm": 0.77734375, "learning_rate": 1.6756194789599547e-05, "loss": 0.3483, "step": 7010 }, { "epoch": 0.5306087696892294, "grad_norm": 0.76953125, "learning_rate": 1.6755316617323157e-05, "loss": 0.3188, "step": 7011 }, { "epoch": 0.5306844520126768, "grad_norm": 0.8203125, "learning_rate": 1.6754438349211007e-05, "loss": 0.3124, "step": 7012 }, { "epoch": 0.5307601343361241, "grad_norm": 0.80078125, "learning_rate": 1.675355998527556e-05, "loss": 0.3438, "step": 7013 }, { "epoch": 0.5308358166595715, "grad_norm": 0.73046875, "learning_rate": 1.6752681525529274e-05, "loss": 0.2879, "step": 7014 }, { "epoch": 0.5309114989830188, "grad_norm": 0.703125, "learning_rate": 1.675180296998461e-05, "loss": 0.2896, "step": 7015 }, { "epoch": 0.5309871813064662, "grad_norm": 0.78125, "learning_rate": 1.6750924318654044e-05, "loss": 0.3537, "step": 7016 }, { "epoch": 0.5310628636299134, "grad_norm": 0.765625, "learning_rate": 1.6750045571550023e-05, "loss": 0.34, "step": 7017 }, { "epoch": 0.5311385459533607, "grad_norm": 0.8046875, "learning_rate": 1.6749166728685025e-05, "loss": 0.34, "step": 7018 }, { "epoch": 0.5312142282768081, "grad_norm": 1.171875, "learning_rate": 1.674828779007151e-05, "loss": 0.3593, "step": 7019 }, { "epoch": 0.5312899106002554, "grad_norm": 0.8125, "learning_rate": 1.6747408755721962e-05, "loss": 0.3519, "step": 7020 }, { "epoch": 0.5313655929237028, "grad_norm": 0.7578125, "learning_rate": 1.6746529625648834e-05, "loss": 0.3229, "step": 7021 }, { "epoch": 0.5314412752471501, "grad_norm": 0.7265625, "learning_rate": 1.6745650399864608e-05, "loss": 0.3088, "step": 7022 }, { "epoch": 0.5315169575705975, "grad_norm": 0.75, "learning_rate": 1.6744771078381756e-05, "loss": 0.311, "step": 7023 }, { "epoch": 0.5315926398940447, "grad_norm": 0.82421875, "learning_rate": 1.674389166121275e-05, "loss": 0.3258, "step": 7024 }, { "epoch": 0.531668322217492, "grad_norm": 0.71484375, "learning_rate": 1.6743012148370067e-05, "loss": 0.2806, "step": 7025 }, { "epoch": 0.5317440045409394, "grad_norm": 0.7421875, "learning_rate": 1.674213253986619e-05, "loss": 0.3015, "step": 7026 }, { "epoch": 0.5318196868643867, "grad_norm": 0.80078125, "learning_rate": 1.6741252835713586e-05, "loss": 0.3197, "step": 7027 }, { "epoch": 0.5318953691878341, "grad_norm": 0.8046875, "learning_rate": 1.6740373035924747e-05, "loss": 0.3281, "step": 7028 }, { "epoch": 0.5319710515112814, "grad_norm": 0.859375, "learning_rate": 1.673949314051215e-05, "loss": 0.3562, "step": 7029 }, { "epoch": 0.5320467338347288, "grad_norm": 0.8046875, "learning_rate": 1.6738613149488276e-05, "loss": 0.3358, "step": 7030 }, { "epoch": 0.532122416158176, "grad_norm": 0.72265625, "learning_rate": 1.6737733062865607e-05, "loss": 0.3018, "step": 7031 }, { "epoch": 0.5321980984816234, "grad_norm": 1.0078125, "learning_rate": 1.673685288065664e-05, "loss": 0.3441, "step": 7032 }, { "epoch": 0.5322737808050707, "grad_norm": 0.81640625, "learning_rate": 1.6735972602873847e-05, "loss": 0.3687, "step": 7033 }, { "epoch": 0.532349463128518, "grad_norm": 0.77734375, "learning_rate": 1.6735092229529727e-05, "loss": 0.3475, "step": 7034 }, { "epoch": 0.5324251454519654, "grad_norm": 0.7890625, "learning_rate": 1.6734211760636766e-05, "loss": 0.3563, "step": 7035 }, { "epoch": 0.5325008277754127, "grad_norm": 0.8203125, "learning_rate": 1.6733331196207453e-05, "loss": 0.319, "step": 7036 }, { "epoch": 0.5325765100988601, "grad_norm": 0.84765625, "learning_rate": 1.6732450536254285e-05, "loss": 0.3693, "step": 7037 }, { "epoch": 0.5326521924223073, "grad_norm": 1.171875, "learning_rate": 1.6731569780789752e-05, "loss": 0.3644, "step": 7038 }, { "epoch": 0.5327278747457547, "grad_norm": 1.0625, "learning_rate": 1.6730688929826348e-05, "loss": 0.3949, "step": 7039 }, { "epoch": 0.532803557069202, "grad_norm": 0.8046875, "learning_rate": 1.6729807983376574e-05, "loss": 0.3297, "step": 7040 }, { "epoch": 0.5328792393926494, "grad_norm": 0.73046875, "learning_rate": 1.6728926941452922e-05, "loss": 0.283, "step": 7041 }, { "epoch": 0.5329549217160967, "grad_norm": 0.6953125, "learning_rate": 1.6728045804067895e-05, "loss": 0.2868, "step": 7042 }, { "epoch": 0.533030604039544, "grad_norm": 0.81640625, "learning_rate": 1.672716457123399e-05, "loss": 0.3352, "step": 7043 }, { "epoch": 0.5331062863629913, "grad_norm": 0.80078125, "learning_rate": 1.6726283242963716e-05, "loss": 0.3255, "step": 7044 }, { "epoch": 0.5331819686864386, "grad_norm": 0.80859375, "learning_rate": 1.6725401819269572e-05, "loss": 0.3437, "step": 7045 }, { "epoch": 0.533257651009886, "grad_norm": 0.8125, "learning_rate": 1.672452030016406e-05, "loss": 0.333, "step": 7046 }, { "epoch": 0.5333333333333333, "grad_norm": 0.75390625, "learning_rate": 1.6723638685659687e-05, "loss": 0.3444, "step": 7047 }, { "epoch": 0.5334090156567807, "grad_norm": 0.75390625, "learning_rate": 1.6722756975768963e-05, "loss": 0.3026, "step": 7048 }, { "epoch": 0.533484697980228, "grad_norm": 0.74609375, "learning_rate": 1.672187517050439e-05, "loss": 0.298, "step": 7049 }, { "epoch": 0.5335603803036754, "grad_norm": 0.79296875, "learning_rate": 1.6720993269878486e-05, "loss": 0.3527, "step": 7050 }, { "epoch": 0.5336360626271226, "grad_norm": 0.7734375, "learning_rate": 1.672011127390376e-05, "loss": 0.3238, "step": 7051 }, { "epoch": 0.5337117449505699, "grad_norm": 0.83203125, "learning_rate": 1.671922918259272e-05, "loss": 0.3658, "step": 7052 }, { "epoch": 0.5337874272740173, "grad_norm": 0.76171875, "learning_rate": 1.6718346995957892e-05, "loss": 0.3056, "step": 7053 }, { "epoch": 0.5338631095974646, "grad_norm": 0.80078125, "learning_rate": 1.6717464714011775e-05, "loss": 0.3063, "step": 7054 }, { "epoch": 0.533938791920912, "grad_norm": 0.83203125, "learning_rate": 1.6716582336766892e-05, "loss": 0.3799, "step": 7055 }, { "epoch": 0.5340144742443593, "grad_norm": 0.8046875, "learning_rate": 1.6715699864235767e-05, "loss": 0.3325, "step": 7056 }, { "epoch": 0.5340901565678067, "grad_norm": 0.76171875, "learning_rate": 1.6714817296430914e-05, "loss": 0.3369, "step": 7057 }, { "epoch": 0.5341658388912539, "grad_norm": 0.765625, "learning_rate": 1.6713934633364853e-05, "loss": 0.3071, "step": 7058 }, { "epoch": 0.5342415212147013, "grad_norm": 1.125, "learning_rate": 1.671305187505011e-05, "loss": 0.3813, "step": 7059 }, { "epoch": 0.5343172035381486, "grad_norm": 0.7578125, "learning_rate": 1.6712169021499205e-05, "loss": 0.2991, "step": 7060 }, { "epoch": 0.534392885861596, "grad_norm": 0.82421875, "learning_rate": 1.6711286072724662e-05, "loss": 0.3654, "step": 7061 }, { "epoch": 0.5344685681850433, "grad_norm": 2.46875, "learning_rate": 1.671040302873901e-05, "loss": 0.3332, "step": 7062 }, { "epoch": 0.5345442505084906, "grad_norm": 0.8046875, "learning_rate": 1.6709519889554776e-05, "loss": 0.3436, "step": 7063 }, { "epoch": 0.534619932831938, "grad_norm": 0.73828125, "learning_rate": 1.670863665518449e-05, "loss": 0.2754, "step": 7064 }, { "epoch": 0.5346956151553852, "grad_norm": 0.78515625, "learning_rate": 1.6707753325640683e-05, "loss": 0.3423, "step": 7065 }, { "epoch": 0.5347712974788326, "grad_norm": 0.6953125, "learning_rate": 1.670686990093588e-05, "loss": 0.2748, "step": 7066 }, { "epoch": 0.5348469798022799, "grad_norm": 0.76953125, "learning_rate": 1.670598638108262e-05, "loss": 0.3059, "step": 7067 }, { "epoch": 0.5349226621257273, "grad_norm": 0.85546875, "learning_rate": 1.6705102766093432e-05, "loss": 0.325, "step": 7068 }, { "epoch": 0.5349983444491746, "grad_norm": 0.75, "learning_rate": 1.670421905598086e-05, "loss": 0.2858, "step": 7069 }, { "epoch": 0.535074026772622, "grad_norm": 0.953125, "learning_rate": 1.6703335250757428e-05, "loss": 0.3817, "step": 7070 }, { "epoch": 0.5351497090960693, "grad_norm": 1.2109375, "learning_rate": 1.6702451350435688e-05, "loss": 0.4103, "step": 7071 }, { "epoch": 0.5352253914195165, "grad_norm": 0.73046875, "learning_rate": 1.6701567355028174e-05, "loss": 0.2858, "step": 7072 }, { "epoch": 0.5353010737429639, "grad_norm": 0.8359375, "learning_rate": 1.6700683264547425e-05, "loss": 0.3167, "step": 7073 }, { "epoch": 0.5353767560664112, "grad_norm": 0.7578125, "learning_rate": 1.6699799079005985e-05, "loss": 0.3196, "step": 7074 }, { "epoch": 0.5354524383898586, "grad_norm": 0.78515625, "learning_rate": 1.6698914798416402e-05, "loss": 0.3289, "step": 7075 }, { "epoch": 0.5355281207133059, "grad_norm": 0.77734375, "learning_rate": 1.669803042279121e-05, "loss": 0.2881, "step": 7076 }, { "epoch": 0.5356038030367533, "grad_norm": 0.75390625, "learning_rate": 1.669714595214297e-05, "loss": 0.308, "step": 7077 }, { "epoch": 0.5356794853602006, "grad_norm": 0.79296875, "learning_rate": 1.6696261386484215e-05, "loss": 0.3565, "step": 7078 }, { "epoch": 0.5357551676836478, "grad_norm": 0.71875, "learning_rate": 1.6695376725827503e-05, "loss": 0.2933, "step": 7079 }, { "epoch": 0.5358308500070952, "grad_norm": 0.734375, "learning_rate": 1.669449197018538e-05, "loss": 0.2813, "step": 7080 }, { "epoch": 0.5359065323305425, "grad_norm": 0.71875, "learning_rate": 1.6693607119570408e-05, "loss": 0.272, "step": 7081 }, { "epoch": 0.5359822146539899, "grad_norm": 0.83203125, "learning_rate": 1.6692722173995127e-05, "loss": 0.3746, "step": 7082 }, { "epoch": 0.5360578969774372, "grad_norm": 0.80859375, "learning_rate": 1.6691837133472097e-05, "loss": 0.3481, "step": 7083 }, { "epoch": 0.5361335793008846, "grad_norm": 0.81640625, "learning_rate": 1.6690951998013876e-05, "loss": 0.3579, "step": 7084 }, { "epoch": 0.5362092616243319, "grad_norm": 0.76953125, "learning_rate": 1.6690066767633016e-05, "loss": 0.2966, "step": 7085 }, { "epoch": 0.5362849439477791, "grad_norm": 1.0546875, "learning_rate": 1.668918144234208e-05, "loss": 0.339, "step": 7086 }, { "epoch": 0.5363606262712265, "grad_norm": 0.7578125, "learning_rate": 1.6688296022153628e-05, "loss": 0.3149, "step": 7087 }, { "epoch": 0.5364363085946738, "grad_norm": 0.84765625, "learning_rate": 1.6687410507080214e-05, "loss": 0.3453, "step": 7088 }, { "epoch": 0.5365119909181212, "grad_norm": 0.7734375, "learning_rate": 1.668652489713441e-05, "loss": 0.3252, "step": 7089 }, { "epoch": 0.5365876732415685, "grad_norm": 0.78125, "learning_rate": 1.6685639192328777e-05, "loss": 0.2966, "step": 7090 }, { "epoch": 0.5366633555650159, "grad_norm": 0.796875, "learning_rate": 1.6684753392675873e-05, "loss": 0.3425, "step": 7091 }, { "epoch": 0.5367390378884632, "grad_norm": 0.80078125, "learning_rate": 1.6683867498188277e-05, "loss": 0.3176, "step": 7092 }, { "epoch": 0.5368147202119105, "grad_norm": 0.765625, "learning_rate": 1.6682981508878547e-05, "loss": 0.3206, "step": 7093 }, { "epoch": 0.5368904025353578, "grad_norm": 0.7578125, "learning_rate": 1.6682095424759258e-05, "loss": 0.3164, "step": 7094 }, { "epoch": 0.5369660848588051, "grad_norm": 0.7421875, "learning_rate": 1.6681209245842978e-05, "loss": 0.3142, "step": 7095 }, { "epoch": 0.5370417671822525, "grad_norm": 0.7578125, "learning_rate": 1.668032297214228e-05, "loss": 0.2992, "step": 7096 }, { "epoch": 0.5371174495056998, "grad_norm": 0.78125, "learning_rate": 1.6679436603669736e-05, "loss": 0.2998, "step": 7097 }, { "epoch": 0.5371931318291472, "grad_norm": 0.73828125, "learning_rate": 1.667855014043792e-05, "loss": 0.2959, "step": 7098 }, { "epoch": 0.5372688141525945, "grad_norm": 0.87109375, "learning_rate": 1.6677663582459414e-05, "loss": 0.3348, "step": 7099 }, { "epoch": 0.5373444964760418, "grad_norm": 0.75390625, "learning_rate": 1.667677692974679e-05, "loss": 0.3007, "step": 7100 }, { "epoch": 0.5374201787994891, "grad_norm": 0.7578125, "learning_rate": 1.6675890182312624e-05, "loss": 0.2882, "step": 7101 }, { "epoch": 0.5374958611229365, "grad_norm": 0.76171875, "learning_rate": 1.66750033401695e-05, "loss": 0.3207, "step": 7102 }, { "epoch": 0.5375715434463838, "grad_norm": 0.828125, "learning_rate": 1.6674116403330002e-05, "loss": 0.3656, "step": 7103 }, { "epoch": 0.5376472257698311, "grad_norm": 0.796875, "learning_rate": 1.6673229371806706e-05, "loss": 0.3546, "step": 7104 }, { "epoch": 0.5377229080932785, "grad_norm": 0.73828125, "learning_rate": 1.66723422456122e-05, "loss": 0.3263, "step": 7105 }, { "epoch": 0.5377985904167258, "grad_norm": 0.7734375, "learning_rate": 1.667145502475907e-05, "loss": 0.3162, "step": 7106 }, { "epoch": 0.5378742727401731, "grad_norm": 0.75, "learning_rate": 1.6670567709259903e-05, "loss": 0.3193, "step": 7107 }, { "epoch": 0.5379499550636204, "grad_norm": 0.86328125, "learning_rate": 1.6669680299127286e-05, "loss": 0.3855, "step": 7108 }, { "epoch": 0.5380256373870678, "grad_norm": 0.76953125, "learning_rate": 1.6668792794373807e-05, "loss": 0.3297, "step": 7109 }, { "epoch": 0.5381013197105151, "grad_norm": 0.78125, "learning_rate": 1.666790519501206e-05, "loss": 0.3666, "step": 7110 }, { "epoch": 0.5381770020339625, "grad_norm": 0.79296875, "learning_rate": 1.6667017501054633e-05, "loss": 0.3605, "step": 7111 }, { "epoch": 0.5382526843574098, "grad_norm": 0.84765625, "learning_rate": 1.6666129712514123e-05, "loss": 0.3765, "step": 7112 }, { "epoch": 0.5383283666808572, "grad_norm": 0.734375, "learning_rate": 1.6665241829403122e-05, "loss": 0.3076, "step": 7113 }, { "epoch": 0.5384040490043044, "grad_norm": 0.8203125, "learning_rate": 1.6664353851734228e-05, "loss": 0.3632, "step": 7114 }, { "epoch": 0.5384797313277517, "grad_norm": 0.79296875, "learning_rate": 1.6663465779520042e-05, "loss": 0.3488, "step": 7115 }, { "epoch": 0.5385554136511991, "grad_norm": 0.765625, "learning_rate": 1.6662577612773154e-05, "loss": 0.3246, "step": 7116 }, { "epoch": 0.5386310959746464, "grad_norm": 0.8203125, "learning_rate": 1.6661689351506174e-05, "loss": 0.3593, "step": 7117 }, { "epoch": 0.5387067782980938, "grad_norm": 0.82421875, "learning_rate": 1.6660800995731693e-05, "loss": 0.3326, "step": 7118 }, { "epoch": 0.5387824606215411, "grad_norm": 0.78515625, "learning_rate": 1.6659912545462325e-05, "loss": 0.3365, "step": 7119 }, { "epoch": 0.5388581429449885, "grad_norm": 0.71484375, "learning_rate": 1.6659024000710667e-05, "loss": 0.2896, "step": 7120 }, { "epoch": 0.5389338252684357, "grad_norm": 0.76953125, "learning_rate": 1.6658135361489326e-05, "loss": 0.346, "step": 7121 }, { "epoch": 0.539009507591883, "grad_norm": 0.75390625, "learning_rate": 1.6657246627810906e-05, "loss": 0.3211, "step": 7122 }, { "epoch": 0.5390851899153304, "grad_norm": 0.70703125, "learning_rate": 1.6656357799688025e-05, "loss": 0.2724, "step": 7123 }, { "epoch": 0.5391608722387777, "grad_norm": 0.78125, "learning_rate": 1.6655468877133282e-05, "loss": 0.3377, "step": 7124 }, { "epoch": 0.5392365545622251, "grad_norm": 0.71484375, "learning_rate": 1.665457986015929e-05, "loss": 0.2872, "step": 7125 }, { "epoch": 0.5393122368856724, "grad_norm": 0.81640625, "learning_rate": 1.6653690748778662e-05, "loss": 0.3712, "step": 7126 }, { "epoch": 0.5393879192091198, "grad_norm": 0.7734375, "learning_rate": 1.6652801543004017e-05, "loss": 0.3314, "step": 7127 }, { "epoch": 0.539463601532567, "grad_norm": 0.79296875, "learning_rate": 1.6651912242847966e-05, "loss": 0.3539, "step": 7128 }, { "epoch": 0.5395392838560144, "grad_norm": 0.8125, "learning_rate": 1.665102284832312e-05, "loss": 0.3368, "step": 7129 }, { "epoch": 0.5396149661794617, "grad_norm": 4.03125, "learning_rate": 1.6650133359442106e-05, "loss": 0.3956, "step": 7130 }, { "epoch": 0.539690648502909, "grad_norm": 0.75390625, "learning_rate": 1.6649243776217538e-05, "loss": 0.3221, "step": 7131 }, { "epoch": 0.5397663308263564, "grad_norm": 0.80859375, "learning_rate": 1.6648354098662034e-05, "loss": 0.3564, "step": 7132 }, { "epoch": 0.5398420131498037, "grad_norm": 0.73828125, "learning_rate": 1.664746432678822e-05, "loss": 0.3329, "step": 7133 }, { "epoch": 0.5399176954732511, "grad_norm": 0.75, "learning_rate": 1.6646574460608717e-05, "loss": 0.2971, "step": 7134 }, { "epoch": 0.5399933777966983, "grad_norm": 0.80078125, "learning_rate": 1.6645684500136145e-05, "loss": 0.3469, "step": 7135 }, { "epoch": 0.5400690601201457, "grad_norm": 0.7578125, "learning_rate": 1.6644794445383138e-05, "loss": 0.326, "step": 7136 }, { "epoch": 0.540144742443593, "grad_norm": 0.78515625, "learning_rate": 1.664390429636232e-05, "loss": 0.2968, "step": 7137 }, { "epoch": 0.5402204247670404, "grad_norm": 0.77734375, "learning_rate": 1.6643014053086318e-05, "loss": 0.3616, "step": 7138 }, { "epoch": 0.5402961070904877, "grad_norm": 0.77734375, "learning_rate": 1.664212371556776e-05, "loss": 0.354, "step": 7139 }, { "epoch": 0.540371789413935, "grad_norm": 0.76953125, "learning_rate": 1.664123328381928e-05, "loss": 0.3483, "step": 7140 }, { "epoch": 0.5404474717373824, "grad_norm": 0.78515625, "learning_rate": 1.6640342757853506e-05, "loss": 0.316, "step": 7141 }, { "epoch": 0.5405231540608296, "grad_norm": 0.75390625, "learning_rate": 1.6639452137683082e-05, "loss": 0.2964, "step": 7142 }, { "epoch": 0.540598836384277, "grad_norm": 0.78515625, "learning_rate": 1.6638561423320626e-05, "loss": 0.339, "step": 7143 }, { "epoch": 0.5406745187077243, "grad_norm": 0.9375, "learning_rate": 1.6637670614778793e-05, "loss": 0.2547, "step": 7144 }, { "epoch": 0.5407502010311717, "grad_norm": 0.72265625, "learning_rate": 1.6636779712070206e-05, "loss": 0.2769, "step": 7145 }, { "epoch": 0.540825883354619, "grad_norm": 0.88671875, "learning_rate": 1.663588871520751e-05, "loss": 0.3425, "step": 7146 }, { "epoch": 0.5409015656780664, "grad_norm": 0.7890625, "learning_rate": 1.6634997624203347e-05, "loss": 0.3429, "step": 7147 }, { "epoch": 0.5409772480015137, "grad_norm": 0.8046875, "learning_rate": 1.6634106439070357e-05, "loss": 0.379, "step": 7148 }, { "epoch": 0.5410529303249609, "grad_norm": 0.78515625, "learning_rate": 1.663321515982118e-05, "loss": 0.3227, "step": 7149 }, { "epoch": 0.5411286126484083, "grad_norm": 0.73828125, "learning_rate": 1.6632323786468468e-05, "loss": 0.309, "step": 7150 }, { "epoch": 0.5412042949718556, "grad_norm": 0.82421875, "learning_rate": 1.6631432319024855e-05, "loss": 0.306, "step": 7151 }, { "epoch": 0.541279977295303, "grad_norm": 0.8125, "learning_rate": 1.6630540757503e-05, "loss": 0.3486, "step": 7152 }, { "epoch": 0.5413556596187503, "grad_norm": 0.81640625, "learning_rate": 1.6629649101915544e-05, "loss": 0.3651, "step": 7153 }, { "epoch": 0.5414313419421977, "grad_norm": 1.1640625, "learning_rate": 1.662875735227514e-05, "loss": 0.3591, "step": 7154 }, { "epoch": 0.541507024265645, "grad_norm": 0.74609375, "learning_rate": 1.6627865508594433e-05, "loss": 0.2609, "step": 7155 }, { "epoch": 0.5415827065890922, "grad_norm": 0.72265625, "learning_rate": 1.6626973570886086e-05, "loss": 0.2712, "step": 7156 }, { "epoch": 0.5416583889125396, "grad_norm": 0.82421875, "learning_rate": 1.662608153916274e-05, "loss": 0.3372, "step": 7157 }, { "epoch": 0.5417340712359869, "grad_norm": 0.7109375, "learning_rate": 1.662518941343706e-05, "loss": 0.3032, "step": 7158 }, { "epoch": 0.5418097535594343, "grad_norm": 1.1640625, "learning_rate": 1.66242971937217e-05, "loss": 0.3611, "step": 7159 }, { "epoch": 0.5418854358828816, "grad_norm": 0.7421875, "learning_rate": 1.6623404880029317e-05, "loss": 0.2772, "step": 7160 }, { "epoch": 0.541961118206329, "grad_norm": 0.8046875, "learning_rate": 1.662251247237257e-05, "loss": 0.2896, "step": 7161 }, { "epoch": 0.5420368005297763, "grad_norm": 0.796875, "learning_rate": 1.6621619970764117e-05, "loss": 0.3512, "step": 7162 }, { "epoch": 0.5421124828532236, "grad_norm": 0.859375, "learning_rate": 1.662072737521662e-05, "loss": 0.426, "step": 7163 }, { "epoch": 0.5421881651766709, "grad_norm": 0.828125, "learning_rate": 1.6619834685742745e-05, "loss": 0.3682, "step": 7164 }, { "epoch": 0.5422638475001182, "grad_norm": 0.78515625, "learning_rate": 1.6618941902355156e-05, "loss": 0.3321, "step": 7165 }, { "epoch": 0.5423395298235656, "grad_norm": 0.75390625, "learning_rate": 1.6618049025066517e-05, "loss": 0.3121, "step": 7166 }, { "epoch": 0.5424152121470129, "grad_norm": 0.734375, "learning_rate": 1.6617156053889497e-05, "loss": 0.3071, "step": 7167 }, { "epoch": 0.5424908944704603, "grad_norm": 1.0078125, "learning_rate": 1.661626298883676e-05, "loss": 0.3578, "step": 7168 }, { "epoch": 0.5425665767939075, "grad_norm": 0.74609375, "learning_rate": 1.6615369829920983e-05, "loss": 0.3267, "step": 7169 }, { "epoch": 0.5426422591173549, "grad_norm": 0.75390625, "learning_rate": 1.661447657715483e-05, "loss": 0.3139, "step": 7170 }, { "epoch": 0.5427179414408022, "grad_norm": 0.7890625, "learning_rate": 1.6613583230550977e-05, "loss": 0.3452, "step": 7171 }, { "epoch": 0.5427936237642496, "grad_norm": 0.78515625, "learning_rate": 1.6612689790122095e-05, "loss": 0.3256, "step": 7172 }, { "epoch": 0.5428693060876969, "grad_norm": 0.8046875, "learning_rate": 1.6611796255880862e-05, "loss": 0.3636, "step": 7173 }, { "epoch": 0.5429449884111442, "grad_norm": 0.73046875, "learning_rate": 1.6610902627839953e-05, "loss": 0.2928, "step": 7174 }, { "epoch": 0.5430206707345916, "grad_norm": 0.7734375, "learning_rate": 1.661000890601204e-05, "loss": 0.3187, "step": 7175 }, { "epoch": 0.5430963530580388, "grad_norm": 0.74609375, "learning_rate": 1.6609115090409814e-05, "loss": 0.3098, "step": 7176 }, { "epoch": 0.5431720353814862, "grad_norm": 0.7734375, "learning_rate": 1.6608221181045946e-05, "loss": 0.325, "step": 7177 }, { "epoch": 0.5432477177049335, "grad_norm": 0.78515625, "learning_rate": 1.6607327177933125e-05, "loss": 0.3346, "step": 7178 }, { "epoch": 0.5433234000283809, "grad_norm": 0.734375, "learning_rate": 1.6606433081084022e-05, "loss": 0.3065, "step": 7179 }, { "epoch": 0.5433990823518282, "grad_norm": 0.75390625, "learning_rate": 1.6605538890511334e-05, "loss": 0.3181, "step": 7180 }, { "epoch": 0.5434747646752756, "grad_norm": 0.78515625, "learning_rate": 1.660464460622774e-05, "loss": 0.3258, "step": 7181 }, { "epoch": 0.5435504469987229, "grad_norm": 0.79296875, "learning_rate": 1.6603750228245928e-05, "loss": 0.2732, "step": 7182 }, { "epoch": 0.5436261293221701, "grad_norm": 0.7890625, "learning_rate": 1.6602855756578587e-05, "loss": 0.3464, "step": 7183 }, { "epoch": 0.5437018116456175, "grad_norm": 0.76171875, "learning_rate": 1.6601961191238408e-05, "loss": 0.32, "step": 7184 }, { "epoch": 0.5437774939690648, "grad_norm": 0.7734375, "learning_rate": 1.6601066532238075e-05, "loss": 0.3179, "step": 7185 }, { "epoch": 0.5438531762925122, "grad_norm": 0.765625, "learning_rate": 1.6600171779590288e-05, "loss": 0.3134, "step": 7186 }, { "epoch": 0.5439288586159595, "grad_norm": 0.75390625, "learning_rate": 1.659927693330774e-05, "loss": 0.302, "step": 7187 }, { "epoch": 0.5440045409394069, "grad_norm": 0.85546875, "learning_rate": 1.6598381993403122e-05, "loss": 0.3561, "step": 7188 }, { "epoch": 0.5440802232628542, "grad_norm": 0.69140625, "learning_rate": 1.6597486959889134e-05, "loss": 0.2862, "step": 7189 }, { "epoch": 0.5441559055863014, "grad_norm": 0.76953125, "learning_rate": 1.659659183277847e-05, "loss": 0.3547, "step": 7190 }, { "epoch": 0.5442315879097488, "grad_norm": 0.7265625, "learning_rate": 1.6595696612083833e-05, "loss": 0.2708, "step": 7191 }, { "epoch": 0.5443072702331961, "grad_norm": 0.7265625, "learning_rate": 1.6594801297817914e-05, "loss": 0.2844, "step": 7192 }, { "epoch": 0.5443829525566435, "grad_norm": 0.734375, "learning_rate": 1.659390588999343e-05, "loss": 0.2835, "step": 7193 }, { "epoch": 0.5444586348800908, "grad_norm": 0.78515625, "learning_rate": 1.6593010388623073e-05, "loss": 0.3292, "step": 7194 }, { "epoch": 0.5445343172035382, "grad_norm": 0.76953125, "learning_rate": 1.6592114793719547e-05, "loss": 0.3255, "step": 7195 }, { "epoch": 0.5446099995269855, "grad_norm": 0.7890625, "learning_rate": 1.659121910529556e-05, "loss": 0.3509, "step": 7196 }, { "epoch": 0.5446856818504328, "grad_norm": 0.76953125, "learning_rate": 1.6590323323363825e-05, "loss": 0.3228, "step": 7197 }, { "epoch": 0.5447613641738801, "grad_norm": 0.7421875, "learning_rate": 1.6589427447937038e-05, "loss": 0.2977, "step": 7198 }, { "epoch": 0.5448370464973274, "grad_norm": 0.74609375, "learning_rate": 1.6588531479027917e-05, "loss": 0.3177, "step": 7199 }, { "epoch": 0.5449127288207748, "grad_norm": 0.7890625, "learning_rate": 1.658763541664917e-05, "loss": 0.3162, "step": 7200 }, { "epoch": 0.5449884111442221, "grad_norm": 0.76171875, "learning_rate": 1.658673926081351e-05, "loss": 0.303, "step": 7201 }, { "epoch": 0.5450640934676695, "grad_norm": 0.7421875, "learning_rate": 1.6585843011533652e-05, "loss": 0.3106, "step": 7202 }, { "epoch": 0.5451397757911168, "grad_norm": 0.71484375, "learning_rate": 1.6584946668822308e-05, "loss": 0.2625, "step": 7203 }, { "epoch": 0.5452154581145641, "grad_norm": 0.7890625, "learning_rate": 1.6584050232692198e-05, "loss": 0.3076, "step": 7204 }, { "epoch": 0.5452911404380114, "grad_norm": 0.7265625, "learning_rate": 1.658315370315603e-05, "loss": 0.3047, "step": 7205 }, { "epoch": 0.5453668227614588, "grad_norm": 0.7578125, "learning_rate": 1.6582257080226537e-05, "loss": 0.2993, "step": 7206 }, { "epoch": 0.5454425050849061, "grad_norm": 0.78515625, "learning_rate": 1.658136036391643e-05, "loss": 0.3386, "step": 7207 }, { "epoch": 0.5455181874083534, "grad_norm": 0.70703125, "learning_rate": 1.658046355423843e-05, "loss": 0.2995, "step": 7208 }, { "epoch": 0.5455938697318008, "grad_norm": 0.76953125, "learning_rate": 1.6579566651205262e-05, "loss": 0.3136, "step": 7209 }, { "epoch": 0.5456695520552481, "grad_norm": 0.7734375, "learning_rate": 1.657866965482965e-05, "loss": 0.306, "step": 7210 }, { "epoch": 0.5457452343786954, "grad_norm": 0.8046875, "learning_rate": 1.6577772565124322e-05, "loss": 0.3404, "step": 7211 }, { "epoch": 0.5458209167021427, "grad_norm": 0.78515625, "learning_rate": 1.6576875382102e-05, "loss": 0.3283, "step": 7212 }, { "epoch": 0.5458965990255901, "grad_norm": 0.7421875, "learning_rate": 1.6575978105775416e-05, "loss": 0.3099, "step": 7213 }, { "epoch": 0.5459722813490374, "grad_norm": 0.76953125, "learning_rate": 1.6575080736157295e-05, "loss": 0.3115, "step": 7214 }, { "epoch": 0.5460479636724848, "grad_norm": 0.72265625, "learning_rate": 1.6574183273260372e-05, "loss": 0.3043, "step": 7215 }, { "epoch": 0.5461236459959321, "grad_norm": 0.73046875, "learning_rate": 1.657328571709738e-05, "loss": 0.3311, "step": 7216 }, { "epoch": 0.5461993283193795, "grad_norm": 0.74609375, "learning_rate": 1.6572388067681047e-05, "loss": 0.3359, "step": 7217 }, { "epoch": 0.5462750106428267, "grad_norm": 0.73046875, "learning_rate": 1.6571490325024114e-05, "loss": 0.3069, "step": 7218 }, { "epoch": 0.546350692966274, "grad_norm": 0.80078125, "learning_rate": 1.6570592489139313e-05, "loss": 0.3505, "step": 7219 }, { "epoch": 0.5464263752897214, "grad_norm": 0.76953125, "learning_rate": 1.656969456003938e-05, "loss": 0.3229, "step": 7220 }, { "epoch": 0.5465020576131687, "grad_norm": 0.74609375, "learning_rate": 1.6568796537737056e-05, "loss": 0.293, "step": 7221 }, { "epoch": 0.5465777399366161, "grad_norm": 0.703125, "learning_rate": 1.6567898422245083e-05, "loss": 0.2669, "step": 7222 }, { "epoch": 0.5466534222600634, "grad_norm": 0.78515625, "learning_rate": 1.6567000213576196e-05, "loss": 0.3364, "step": 7223 }, { "epoch": 0.5467291045835108, "grad_norm": 0.78515625, "learning_rate": 1.656610191174314e-05, "loss": 0.3231, "step": 7224 }, { "epoch": 0.546804786906958, "grad_norm": 0.77734375, "learning_rate": 1.6565203516758667e-05, "loss": 0.351, "step": 7225 }, { "epoch": 0.5468804692304053, "grad_norm": 0.7734375, "learning_rate": 1.656430502863551e-05, "loss": 0.3082, "step": 7226 }, { "epoch": 0.5469561515538527, "grad_norm": 0.72265625, "learning_rate": 1.656340644738643e-05, "loss": 0.2897, "step": 7227 }, { "epoch": 0.5470318338773, "grad_norm": 0.78515625, "learning_rate": 1.656250777302416e-05, "loss": 0.3418, "step": 7228 }, { "epoch": 0.5471075162007474, "grad_norm": 0.7734375, "learning_rate": 1.656160900556145e-05, "loss": 0.3535, "step": 7229 }, { "epoch": 0.5471831985241947, "grad_norm": 0.7109375, "learning_rate": 1.6560710145011064e-05, "loss": 0.2943, "step": 7230 }, { "epoch": 0.5472588808476421, "grad_norm": 0.9609375, "learning_rate": 1.6559811191385746e-05, "loss": 0.385, "step": 7231 }, { "epoch": 0.5473345631710893, "grad_norm": 0.73828125, "learning_rate": 1.6558912144698247e-05, "loss": 0.3355, "step": 7232 }, { "epoch": 0.5474102454945367, "grad_norm": 0.81640625, "learning_rate": 1.6558013004961325e-05, "loss": 0.3659, "step": 7233 }, { "epoch": 0.547485927817984, "grad_norm": 0.78125, "learning_rate": 1.6557113772187734e-05, "loss": 0.364, "step": 7234 }, { "epoch": 0.5475616101414313, "grad_norm": 0.8046875, "learning_rate": 1.6556214446390233e-05, "loss": 0.3341, "step": 7235 }, { "epoch": 0.5476372924648787, "grad_norm": 0.734375, "learning_rate": 1.6555315027581576e-05, "loss": 0.3008, "step": 7236 }, { "epoch": 0.547712974788326, "grad_norm": 0.83203125, "learning_rate": 1.655441551577453e-05, "loss": 0.3464, "step": 7237 }, { "epoch": 0.5477886571117734, "grad_norm": 0.7890625, "learning_rate": 1.655351591098185e-05, "loss": 0.3363, "step": 7238 }, { "epoch": 0.5478643394352206, "grad_norm": 0.7109375, "learning_rate": 1.65526162132163e-05, "loss": 0.2932, "step": 7239 }, { "epoch": 0.547940021758668, "grad_norm": 0.69921875, "learning_rate": 1.6551716422490645e-05, "loss": 0.2834, "step": 7240 }, { "epoch": 0.5480157040821153, "grad_norm": 0.8828125, "learning_rate": 1.655081653881765e-05, "loss": 0.3428, "step": 7241 }, { "epoch": 0.5480913864055627, "grad_norm": 0.703125, "learning_rate": 1.654991656221008e-05, "loss": 0.2877, "step": 7242 }, { "epoch": 0.54816706872901, "grad_norm": 0.8828125, "learning_rate": 1.6549016492680702e-05, "loss": 0.266, "step": 7243 }, { "epoch": 0.5482427510524573, "grad_norm": 0.8046875, "learning_rate": 1.654811633024229e-05, "loss": 0.2952, "step": 7244 }, { "epoch": 0.5483184333759047, "grad_norm": 0.76953125, "learning_rate": 1.654721607490761e-05, "loss": 0.3422, "step": 7245 }, { "epoch": 0.5483941156993519, "grad_norm": 0.72265625, "learning_rate": 1.6546315726689432e-05, "loss": 0.301, "step": 7246 }, { "epoch": 0.5484697980227993, "grad_norm": 0.79296875, "learning_rate": 1.6545415285600533e-05, "loss": 0.3189, "step": 7247 }, { "epoch": 0.5485454803462466, "grad_norm": 0.734375, "learning_rate": 1.654451475165369e-05, "loss": 0.2912, "step": 7248 }, { "epoch": 0.548621162669694, "grad_norm": 0.78125, "learning_rate": 1.654361412486167e-05, "loss": 0.3653, "step": 7249 }, { "epoch": 0.5486968449931413, "grad_norm": 0.80078125, "learning_rate": 1.6542713405237254e-05, "loss": 0.3627, "step": 7250 }, { "epoch": 0.5487725273165887, "grad_norm": 0.76953125, "learning_rate": 1.654181259279322e-05, "loss": 0.3247, "step": 7251 }, { "epoch": 0.548848209640036, "grad_norm": 0.78125, "learning_rate": 1.6540911687542352e-05, "loss": 0.3549, "step": 7252 }, { "epoch": 0.5489238919634832, "grad_norm": 1.5625, "learning_rate": 1.6540010689497424e-05, "loss": 0.3284, "step": 7253 }, { "epoch": 0.5489995742869306, "grad_norm": 0.8515625, "learning_rate": 1.6539109598671225e-05, "loss": 0.3032, "step": 7254 }, { "epoch": 0.5490752566103779, "grad_norm": 0.8046875, "learning_rate": 1.653820841507653e-05, "loss": 0.3544, "step": 7255 }, { "epoch": 0.5491509389338253, "grad_norm": 0.7734375, "learning_rate": 1.6537307138726132e-05, "loss": 0.3504, "step": 7256 }, { "epoch": 0.5492266212572726, "grad_norm": 0.76171875, "learning_rate": 1.653640576963281e-05, "loss": 0.336, "step": 7257 }, { "epoch": 0.54930230358072, "grad_norm": 0.765625, "learning_rate": 1.653550430780936e-05, "loss": 0.3063, "step": 7258 }, { "epoch": 0.5493779859041673, "grad_norm": 0.7421875, "learning_rate": 1.6534602753268566e-05, "loss": 0.298, "step": 7259 }, { "epoch": 0.5494536682276145, "grad_norm": 0.80078125, "learning_rate": 1.6533701106023216e-05, "loss": 0.3502, "step": 7260 }, { "epoch": 0.5495293505510619, "grad_norm": 0.77734375, "learning_rate": 1.6532799366086104e-05, "loss": 0.306, "step": 7261 }, { "epoch": 0.5496050328745092, "grad_norm": 0.7734375, "learning_rate": 1.653189753347002e-05, "loss": 0.3068, "step": 7262 }, { "epoch": 0.5496807151979566, "grad_norm": 0.71875, "learning_rate": 1.6530995608187767e-05, "loss": 0.282, "step": 7263 }, { "epoch": 0.5497563975214039, "grad_norm": 0.78515625, "learning_rate": 1.6530093590252128e-05, "loss": 0.2978, "step": 7264 }, { "epoch": 0.5498320798448513, "grad_norm": 0.78515625, "learning_rate": 1.6529191479675906e-05, "loss": 0.3236, "step": 7265 }, { "epoch": 0.5499077621682986, "grad_norm": 0.8125, "learning_rate": 1.65282892764719e-05, "loss": 0.3756, "step": 7266 }, { "epoch": 0.5499834444917459, "grad_norm": 0.80078125, "learning_rate": 1.6527386980652905e-05, "loss": 0.3275, "step": 7267 }, { "epoch": 0.5500591268151932, "grad_norm": 0.70703125, "learning_rate": 1.6526484592231726e-05, "loss": 0.2912, "step": 7268 }, { "epoch": 0.5501348091386405, "grad_norm": 0.75, "learning_rate": 1.6525582111221163e-05, "loss": 0.2912, "step": 7269 }, { "epoch": 0.5502104914620879, "grad_norm": 0.77734375, "learning_rate": 1.652467953763402e-05, "loss": 0.3235, "step": 7270 }, { "epoch": 0.5502861737855352, "grad_norm": 0.75, "learning_rate": 1.6523776871483102e-05, "loss": 0.2935, "step": 7271 }, { "epoch": 0.5503618561089826, "grad_norm": 0.796875, "learning_rate": 1.6522874112781213e-05, "loss": 0.3504, "step": 7272 }, { "epoch": 0.5504375384324299, "grad_norm": 0.84375, "learning_rate": 1.6521971261541164e-05, "loss": 0.3547, "step": 7273 }, { "epoch": 0.5505132207558772, "grad_norm": 0.79296875, "learning_rate": 1.6521068317775756e-05, "loss": 0.3376, "step": 7274 }, { "epoch": 0.5505889030793245, "grad_norm": 0.71875, "learning_rate": 1.6520165281497803e-05, "loss": 0.3064, "step": 7275 }, { "epoch": 0.5506645854027719, "grad_norm": 0.73828125, "learning_rate": 1.651926215272012e-05, "loss": 0.2569, "step": 7276 }, { "epoch": 0.5507402677262192, "grad_norm": 0.828125, "learning_rate": 1.6518358931455514e-05, "loss": 0.3644, "step": 7277 }, { "epoch": 0.5508159500496665, "grad_norm": 0.78125, "learning_rate": 1.65174556177168e-05, "loss": 0.3193, "step": 7278 }, { "epoch": 0.5508916323731139, "grad_norm": 0.7890625, "learning_rate": 1.6516552211516794e-05, "loss": 0.3085, "step": 7279 }, { "epoch": 0.5509673146965612, "grad_norm": 0.82421875, "learning_rate": 1.651564871286831e-05, "loss": 0.3545, "step": 7280 }, { "epoch": 0.5510429970200085, "grad_norm": 2.609375, "learning_rate": 1.6514745121784174e-05, "loss": 0.3849, "step": 7281 }, { "epoch": 0.5511186793434558, "grad_norm": 0.828125, "learning_rate": 1.6513841438277195e-05, "loss": 0.3935, "step": 7282 }, { "epoch": 0.5511943616669032, "grad_norm": 0.73828125, "learning_rate": 1.6512937662360195e-05, "loss": 0.2998, "step": 7283 }, { "epoch": 0.5512700439903505, "grad_norm": 1.3125, "learning_rate": 1.6512033794046002e-05, "loss": 0.4143, "step": 7284 }, { "epoch": 0.5513457263137979, "grad_norm": 0.8125, "learning_rate": 1.6511129833347434e-05, "loss": 0.337, "step": 7285 }, { "epoch": 0.5514214086372452, "grad_norm": 0.7890625, "learning_rate": 1.6510225780277313e-05, "loss": 0.3291, "step": 7286 }, { "epoch": 0.5514970909606925, "grad_norm": 0.7890625, "learning_rate": 1.650932163484847e-05, "loss": 0.3709, "step": 7287 }, { "epoch": 0.5515727732841398, "grad_norm": 0.75390625, "learning_rate": 1.6508417397073727e-05, "loss": 0.3357, "step": 7288 }, { "epoch": 0.5516484556075871, "grad_norm": 0.70703125, "learning_rate": 1.6507513066965913e-05, "loss": 0.2584, "step": 7289 }, { "epoch": 0.5517241379310345, "grad_norm": 0.7578125, "learning_rate": 1.6506608644537863e-05, "loss": 0.286, "step": 7290 }, { "epoch": 0.5517998202544818, "grad_norm": 0.82421875, "learning_rate": 1.6505704129802403e-05, "loss": 0.3459, "step": 7291 }, { "epoch": 0.5518755025779292, "grad_norm": 0.75, "learning_rate": 1.6504799522772366e-05, "loss": 0.2978, "step": 7292 }, { "epoch": 0.5519511849013765, "grad_norm": 0.82421875, "learning_rate": 1.6503894823460583e-05, "loss": 0.3748, "step": 7293 }, { "epoch": 0.5520268672248237, "grad_norm": 0.72265625, "learning_rate": 1.650299003187989e-05, "loss": 0.2929, "step": 7294 }, { "epoch": 0.5521025495482711, "grad_norm": 0.77734375, "learning_rate": 1.6502085148043126e-05, "loss": 0.3391, "step": 7295 }, { "epoch": 0.5521782318717184, "grad_norm": 0.80078125, "learning_rate": 1.6501180171963124e-05, "loss": 0.307, "step": 7296 }, { "epoch": 0.5522539141951658, "grad_norm": 0.8203125, "learning_rate": 1.650027510365273e-05, "loss": 0.3034, "step": 7297 }, { "epoch": 0.5523295965186131, "grad_norm": 0.78515625, "learning_rate": 1.6499369943124775e-05, "loss": 0.333, "step": 7298 }, { "epoch": 0.5524052788420605, "grad_norm": 0.76171875, "learning_rate": 1.6498464690392103e-05, "loss": 0.3063, "step": 7299 }, { "epoch": 0.5524809611655078, "grad_norm": 0.765625, "learning_rate": 1.649755934546756e-05, "loss": 0.3156, "step": 7300 }, { "epoch": 0.5525566434889551, "grad_norm": 0.75390625, "learning_rate": 1.6496653908363986e-05, "loss": 0.3113, "step": 7301 }, { "epoch": 0.5526323258124024, "grad_norm": 0.80859375, "learning_rate": 1.6495748379094227e-05, "loss": 0.3234, "step": 7302 }, { "epoch": 0.5527080081358497, "grad_norm": 0.984375, "learning_rate": 1.6494842757671136e-05, "loss": 0.3494, "step": 7303 }, { "epoch": 0.5527836904592971, "grad_norm": 0.765625, "learning_rate": 1.6493937044107546e-05, "loss": 0.3192, "step": 7304 }, { "epoch": 0.5528593727827444, "grad_norm": 0.75390625, "learning_rate": 1.6493031238416323e-05, "loss": 0.3101, "step": 7305 }, { "epoch": 0.5529350551061918, "grad_norm": 0.88671875, "learning_rate": 1.6492125340610304e-05, "loss": 0.3882, "step": 7306 }, { "epoch": 0.5530107374296391, "grad_norm": 0.76953125, "learning_rate": 1.6491219350702348e-05, "loss": 0.3327, "step": 7307 }, { "epoch": 0.5530864197530864, "grad_norm": 0.734375, "learning_rate": 1.6490313268705308e-05, "loss": 0.2964, "step": 7308 }, { "epoch": 0.5531621020765337, "grad_norm": 0.75, "learning_rate": 1.6489407094632033e-05, "loss": 0.2998, "step": 7309 }, { "epoch": 0.5532377843999811, "grad_norm": 0.75, "learning_rate": 1.6488500828495384e-05, "loss": 0.2964, "step": 7310 }, { "epoch": 0.5533134667234284, "grad_norm": 0.91015625, "learning_rate": 1.6487594470308215e-05, "loss": 0.3786, "step": 7311 }, { "epoch": 0.5533891490468758, "grad_norm": 0.7265625, "learning_rate": 1.6486688020083388e-05, "loss": 0.2687, "step": 7312 }, { "epoch": 0.5534648313703231, "grad_norm": 0.74609375, "learning_rate": 1.6485781477833758e-05, "loss": 0.2955, "step": 7313 }, { "epoch": 0.5535405136937704, "grad_norm": 0.8359375, "learning_rate": 1.648487484357219e-05, "loss": 0.3691, "step": 7314 }, { "epoch": 0.5536161960172177, "grad_norm": 0.78125, "learning_rate": 1.6483968117311538e-05, "loss": 0.3252, "step": 7315 }, { "epoch": 0.553691878340665, "grad_norm": 0.7578125, "learning_rate": 1.648306129906468e-05, "loss": 0.2601, "step": 7316 }, { "epoch": 0.5537675606641124, "grad_norm": 0.73828125, "learning_rate": 1.648215438884446e-05, "loss": 0.3134, "step": 7317 }, { "epoch": 0.5538432429875597, "grad_norm": 0.7578125, "learning_rate": 1.6481247386663767e-05, "loss": 0.3388, "step": 7318 }, { "epoch": 0.5539189253110071, "grad_norm": 0.81640625, "learning_rate": 1.6480340292535454e-05, "loss": 0.3603, "step": 7319 }, { "epoch": 0.5539946076344544, "grad_norm": 0.8203125, "learning_rate": 1.6479433106472394e-05, "loss": 0.352, "step": 7320 }, { "epoch": 0.5540702899579018, "grad_norm": 0.75390625, "learning_rate": 1.6478525828487456e-05, "loss": 0.3169, "step": 7321 }, { "epoch": 0.554145972281349, "grad_norm": 0.73046875, "learning_rate": 1.6477618458593513e-05, "loss": 0.2911, "step": 7322 }, { "epoch": 0.5542216546047963, "grad_norm": 0.75, "learning_rate": 1.6476710996803434e-05, "loss": 0.2916, "step": 7323 }, { "epoch": 0.5542973369282437, "grad_norm": 0.8125, "learning_rate": 1.6475803443130095e-05, "loss": 0.3594, "step": 7324 }, { "epoch": 0.554373019251691, "grad_norm": 0.734375, "learning_rate": 1.6474895797586376e-05, "loss": 0.3037, "step": 7325 }, { "epoch": 0.5544487015751384, "grad_norm": 0.765625, "learning_rate": 1.6473988060185142e-05, "loss": 0.3278, "step": 7326 }, { "epoch": 0.5545243838985857, "grad_norm": 0.7734375, "learning_rate": 1.647308023093928e-05, "loss": 0.3255, "step": 7327 }, { "epoch": 0.5546000662220331, "grad_norm": 0.83203125, "learning_rate": 1.647217230986167e-05, "loss": 0.3423, "step": 7328 }, { "epoch": 0.5546757485454803, "grad_norm": 0.796875, "learning_rate": 1.6471264296965185e-05, "loss": 0.3415, "step": 7329 }, { "epoch": 0.5547514308689276, "grad_norm": 0.78515625, "learning_rate": 1.647035619226271e-05, "loss": 0.3491, "step": 7330 }, { "epoch": 0.554827113192375, "grad_norm": 0.80859375, "learning_rate": 1.6469447995767132e-05, "loss": 0.3458, "step": 7331 }, { "epoch": 0.5549027955158223, "grad_norm": 0.6953125, "learning_rate": 1.646853970749133e-05, "loss": 0.2909, "step": 7332 }, { "epoch": 0.5549784778392697, "grad_norm": 0.78515625, "learning_rate": 1.64676313274482e-05, "loss": 0.3357, "step": 7333 }, { "epoch": 0.555054160162717, "grad_norm": 0.82421875, "learning_rate": 1.646672285565061e-05, "loss": 0.3631, "step": 7334 }, { "epoch": 0.5551298424861644, "grad_norm": 0.7109375, "learning_rate": 1.646581429211146e-05, "loss": 0.2796, "step": 7335 }, { "epoch": 0.5552055248096116, "grad_norm": 0.81640625, "learning_rate": 1.646490563684364e-05, "loss": 0.3655, "step": 7336 }, { "epoch": 0.555281207133059, "grad_norm": 0.79296875, "learning_rate": 1.6463996889860047e-05, "loss": 0.3165, "step": 7337 }, { "epoch": 0.5553568894565063, "grad_norm": 0.796875, "learning_rate": 1.646308805117356e-05, "loss": 0.3503, "step": 7338 }, { "epoch": 0.5554325717799536, "grad_norm": 0.71484375, "learning_rate": 1.6462179120797075e-05, "loss": 0.3026, "step": 7339 }, { "epoch": 0.555508254103401, "grad_norm": 0.77734375, "learning_rate": 1.6461270098743495e-05, "loss": 0.3201, "step": 7340 }, { "epoch": 0.5555839364268483, "grad_norm": 0.7421875, "learning_rate": 1.6460360985025703e-05, "loss": 0.3, "step": 7341 }, { "epoch": 0.5556596187502957, "grad_norm": 0.765625, "learning_rate": 1.645945177965661e-05, "loss": 0.3187, "step": 7342 }, { "epoch": 0.5557353010737429, "grad_norm": 0.71875, "learning_rate": 1.645854248264911e-05, "loss": 0.2847, "step": 7343 }, { "epoch": 0.5558109833971903, "grad_norm": 0.71875, "learning_rate": 1.64576330940161e-05, "loss": 0.2731, "step": 7344 }, { "epoch": 0.5558866657206376, "grad_norm": 0.7890625, "learning_rate": 1.645672361377048e-05, "loss": 0.3225, "step": 7345 }, { "epoch": 0.555962348044085, "grad_norm": 0.828125, "learning_rate": 1.6455814041925157e-05, "loss": 0.3826, "step": 7346 }, { "epoch": 0.5560380303675323, "grad_norm": 0.765625, "learning_rate": 1.645490437849304e-05, "loss": 0.3297, "step": 7347 }, { "epoch": 0.5561137126909796, "grad_norm": 0.75390625, "learning_rate": 1.6453994623487016e-05, "loss": 0.3027, "step": 7348 }, { "epoch": 0.556189395014427, "grad_norm": 0.76953125, "learning_rate": 1.6453084776920008e-05, "loss": 0.331, "step": 7349 }, { "epoch": 0.5562650773378742, "grad_norm": 0.80078125, "learning_rate": 1.645217483880492e-05, "loss": 0.3556, "step": 7350 }, { "epoch": 0.5563407596613216, "grad_norm": 0.73046875, "learning_rate": 1.6451264809154658e-05, "loss": 0.2843, "step": 7351 }, { "epoch": 0.5564164419847689, "grad_norm": 0.68359375, "learning_rate": 1.6450354687982134e-05, "loss": 0.2424, "step": 7352 }, { "epoch": 0.5564921243082163, "grad_norm": 0.765625, "learning_rate": 1.644944447530026e-05, "loss": 0.3146, "step": 7353 }, { "epoch": 0.5565678066316636, "grad_norm": 0.7109375, "learning_rate": 1.6448534171121946e-05, "loss": 0.2806, "step": 7354 }, { "epoch": 0.556643488955111, "grad_norm": 0.7890625, "learning_rate": 1.644762377546011e-05, "loss": 0.3402, "step": 7355 }, { "epoch": 0.5567191712785583, "grad_norm": 0.74609375, "learning_rate": 1.644671328832767e-05, "loss": 0.3159, "step": 7356 }, { "epoch": 0.5567948536020055, "grad_norm": 0.79296875, "learning_rate": 1.6445802709737536e-05, "loss": 0.3319, "step": 7357 }, { "epoch": 0.5568705359254529, "grad_norm": 0.80078125, "learning_rate": 1.644489203970263e-05, "loss": 0.3699, "step": 7358 }, { "epoch": 0.5569462182489002, "grad_norm": 0.7890625, "learning_rate": 1.644398127823587e-05, "loss": 0.3502, "step": 7359 }, { "epoch": 0.5570219005723476, "grad_norm": 0.7578125, "learning_rate": 1.6443070425350178e-05, "loss": 0.2922, "step": 7360 }, { "epoch": 0.5570975828957949, "grad_norm": 0.8984375, "learning_rate": 1.6442159481058472e-05, "loss": 0.392, "step": 7361 }, { "epoch": 0.5571732652192423, "grad_norm": 0.8125, "learning_rate": 1.6441248445373684e-05, "loss": 0.3359, "step": 7362 }, { "epoch": 0.5572489475426896, "grad_norm": 0.75, "learning_rate": 1.644033731830873e-05, "loss": 0.3052, "step": 7363 }, { "epoch": 0.5573246298661368, "grad_norm": 0.79296875, "learning_rate": 1.643942609987654e-05, "loss": 0.3256, "step": 7364 }, { "epoch": 0.5574003121895842, "grad_norm": 0.8046875, "learning_rate": 1.643851479009004e-05, "loss": 0.302, "step": 7365 }, { "epoch": 0.5574759945130315, "grad_norm": 0.765625, "learning_rate": 1.643760338896216e-05, "loss": 0.3271, "step": 7366 }, { "epoch": 0.5575516768364789, "grad_norm": 0.7265625, "learning_rate": 1.6436691896505824e-05, "loss": 0.3058, "step": 7367 }, { "epoch": 0.5576273591599262, "grad_norm": 0.78515625, "learning_rate": 1.6435780312733974e-05, "loss": 0.3271, "step": 7368 }, { "epoch": 0.5577030414833736, "grad_norm": 0.71484375, "learning_rate": 1.6434868637659535e-05, "loss": 0.2743, "step": 7369 }, { "epoch": 0.5577787238068209, "grad_norm": 0.78125, "learning_rate": 1.6433956871295435e-05, "loss": 0.3338, "step": 7370 }, { "epoch": 0.5578544061302682, "grad_norm": 0.76171875, "learning_rate": 1.643304501365462e-05, "loss": 0.3147, "step": 7371 }, { "epoch": 0.5579300884537155, "grad_norm": 0.73828125, "learning_rate": 1.6432133064750026e-05, "loss": 0.2988, "step": 7372 }, { "epoch": 0.5580057707771628, "grad_norm": 0.734375, "learning_rate": 1.6431221024594584e-05, "loss": 0.2923, "step": 7373 }, { "epoch": 0.5580814531006102, "grad_norm": 0.79296875, "learning_rate": 1.6430308893201233e-05, "loss": 0.2919, "step": 7374 }, { "epoch": 0.5581571354240575, "grad_norm": 0.83984375, "learning_rate": 1.642939667058292e-05, "loss": 0.3736, "step": 7375 }, { "epoch": 0.5582328177475049, "grad_norm": 0.7734375, "learning_rate": 1.6428484356752578e-05, "loss": 0.3308, "step": 7376 }, { "epoch": 0.5583085000709522, "grad_norm": 0.734375, "learning_rate": 1.6427571951723152e-05, "loss": 0.294, "step": 7377 }, { "epoch": 0.5583841823943995, "grad_norm": 1.1640625, "learning_rate": 1.642665945550759e-05, "loss": 0.4286, "step": 7378 }, { "epoch": 0.5584598647178468, "grad_norm": 0.8515625, "learning_rate": 1.642574686811884e-05, "loss": 0.3582, "step": 7379 }, { "epoch": 0.5585355470412942, "grad_norm": 0.75390625, "learning_rate": 1.6424834189569838e-05, "loss": 0.3028, "step": 7380 }, { "epoch": 0.5586112293647415, "grad_norm": 0.75, "learning_rate": 1.6423921419873542e-05, "loss": 0.3157, "step": 7381 }, { "epoch": 0.5586869116881888, "grad_norm": 0.8671875, "learning_rate": 1.642300855904289e-05, "loss": 0.4058, "step": 7382 }, { "epoch": 0.5587625940116362, "grad_norm": 0.76171875, "learning_rate": 1.6422095607090844e-05, "loss": 0.289, "step": 7383 }, { "epoch": 0.5588382763350835, "grad_norm": 0.73046875, "learning_rate": 1.6421182564030355e-05, "loss": 0.3134, "step": 7384 }, { "epoch": 0.5589139586585308, "grad_norm": 0.796875, "learning_rate": 1.642026942987437e-05, "loss": 0.3265, "step": 7385 }, { "epoch": 0.5589896409819781, "grad_norm": 0.78125, "learning_rate": 1.641935620463584e-05, "loss": 0.3371, "step": 7386 }, { "epoch": 0.5590653233054255, "grad_norm": 0.80078125, "learning_rate": 1.6418442888327728e-05, "loss": 0.3681, "step": 7387 }, { "epoch": 0.5591410056288728, "grad_norm": 0.734375, "learning_rate": 1.641752948096299e-05, "loss": 0.2786, "step": 7388 }, { "epoch": 0.5592166879523202, "grad_norm": 0.765625, "learning_rate": 1.6416615982554586e-05, "loss": 0.3085, "step": 7389 }, { "epoch": 0.5592923702757675, "grad_norm": 0.7265625, "learning_rate": 1.641570239311547e-05, "loss": 0.3006, "step": 7390 }, { "epoch": 0.5593680525992148, "grad_norm": 0.80859375, "learning_rate": 1.641478871265861e-05, "loss": 0.3227, "step": 7391 }, { "epoch": 0.5594437349226621, "grad_norm": 0.8203125, "learning_rate": 1.6413874941196957e-05, "loss": 0.3317, "step": 7392 }, { "epoch": 0.5595194172461094, "grad_norm": 0.73828125, "learning_rate": 1.6412961078743486e-05, "loss": 0.3131, "step": 7393 }, { "epoch": 0.5595950995695568, "grad_norm": 0.76171875, "learning_rate": 1.6412047125311155e-05, "loss": 0.3329, "step": 7394 }, { "epoch": 0.5596707818930041, "grad_norm": 0.78125, "learning_rate": 1.6411133080912937e-05, "loss": 0.3373, "step": 7395 }, { "epoch": 0.5597464642164515, "grad_norm": 1.5078125, "learning_rate": 1.6410218945561788e-05, "loss": 0.4338, "step": 7396 }, { "epoch": 0.5598221465398988, "grad_norm": 0.77734375, "learning_rate": 1.6409304719270685e-05, "loss": 0.2903, "step": 7397 }, { "epoch": 0.5598978288633462, "grad_norm": 0.77734375, "learning_rate": 1.6408390402052597e-05, "loss": 0.3011, "step": 7398 }, { "epoch": 0.5599735111867934, "grad_norm": 1.0703125, "learning_rate": 1.6407475993920495e-05, "loss": 0.36, "step": 7399 }, { "epoch": 0.5600491935102407, "grad_norm": 0.75, "learning_rate": 1.640656149488735e-05, "loss": 0.3405, "step": 7400 }, { "epoch": 0.5601248758336881, "grad_norm": 0.7578125, "learning_rate": 1.6405646904966135e-05, "loss": 0.3092, "step": 7401 }, { "epoch": 0.5602005581571354, "grad_norm": 0.7109375, "learning_rate": 1.6404732224169827e-05, "loss": 0.2832, "step": 7402 }, { "epoch": 0.5602762404805828, "grad_norm": 0.7421875, "learning_rate": 1.64038174525114e-05, "loss": 0.2892, "step": 7403 }, { "epoch": 0.5603519228040301, "grad_norm": 0.7421875, "learning_rate": 1.6402902590003836e-05, "loss": 0.2998, "step": 7404 }, { "epoch": 0.5604276051274775, "grad_norm": 0.79296875, "learning_rate": 1.640198763666011e-05, "loss": 0.3145, "step": 7405 }, { "epoch": 0.5605032874509247, "grad_norm": 1.296875, "learning_rate": 1.64010725924932e-05, "loss": 0.4183, "step": 7406 }, { "epoch": 0.560578969774372, "grad_norm": 0.8203125, "learning_rate": 1.640015745751609e-05, "loss": 0.3502, "step": 7407 }, { "epoch": 0.5606546520978194, "grad_norm": 0.7734375, "learning_rate": 1.639924223174177e-05, "loss": 0.3471, "step": 7408 }, { "epoch": 0.5607303344212667, "grad_norm": 0.80078125, "learning_rate": 1.6398326915183212e-05, "loss": 0.3596, "step": 7409 }, { "epoch": 0.5608060167447141, "grad_norm": 0.83203125, "learning_rate": 1.639741150785341e-05, "loss": 0.3042, "step": 7410 }, { "epoch": 0.5608816990681614, "grad_norm": 0.7421875, "learning_rate": 1.6396496009765345e-05, "loss": 0.3217, "step": 7411 }, { "epoch": 0.5609573813916087, "grad_norm": 0.76953125, "learning_rate": 1.6395580420932008e-05, "loss": 0.2988, "step": 7412 }, { "epoch": 0.561033063715056, "grad_norm": 0.84765625, "learning_rate": 1.639466474136639e-05, "loss": 0.3322, "step": 7413 }, { "epoch": 0.5611087460385034, "grad_norm": 0.765625, "learning_rate": 1.6393748971081475e-05, "loss": 0.3479, "step": 7414 }, { "epoch": 0.5611844283619507, "grad_norm": 0.81640625, "learning_rate": 1.639283311009026e-05, "loss": 0.3453, "step": 7415 }, { "epoch": 0.561260110685398, "grad_norm": 0.76953125, "learning_rate": 1.639191715840574e-05, "loss": 0.3294, "step": 7416 }, { "epoch": 0.5613357930088454, "grad_norm": 0.74609375, "learning_rate": 1.63910011160409e-05, "loss": 0.3013, "step": 7417 }, { "epoch": 0.5614114753322927, "grad_norm": 0.8125, "learning_rate": 1.6390084983008746e-05, "loss": 0.361, "step": 7418 }, { "epoch": 0.56148715765574, "grad_norm": 0.78515625, "learning_rate": 1.6389168759322274e-05, "loss": 0.3427, "step": 7419 }, { "epoch": 0.5615628399791873, "grad_norm": 0.7578125, "learning_rate": 1.6388252444994474e-05, "loss": 0.2863, "step": 7420 }, { "epoch": 0.5616385223026347, "grad_norm": 0.78125, "learning_rate": 1.638733604003835e-05, "loss": 0.3573, "step": 7421 }, { "epoch": 0.561714204626082, "grad_norm": 0.78515625, "learning_rate": 1.638641954446691e-05, "loss": 0.3393, "step": 7422 }, { "epoch": 0.5617898869495294, "grad_norm": 0.7734375, "learning_rate": 1.638550295829314e-05, "loss": 0.3395, "step": 7423 }, { "epoch": 0.5618655692729767, "grad_norm": 0.73828125, "learning_rate": 1.6384586281530056e-05, "loss": 0.2972, "step": 7424 }, { "epoch": 0.561941251596424, "grad_norm": 0.703125, "learning_rate": 1.6383669514190662e-05, "loss": 0.2793, "step": 7425 }, { "epoch": 0.5620169339198713, "grad_norm": 0.73828125, "learning_rate": 1.638275265628796e-05, "loss": 0.3098, "step": 7426 }, { "epoch": 0.5620926162433186, "grad_norm": 0.8125, "learning_rate": 1.6381835707834956e-05, "loss": 0.334, "step": 7427 }, { "epoch": 0.562168298566766, "grad_norm": 0.76953125, "learning_rate": 1.6380918668844663e-05, "loss": 0.3398, "step": 7428 }, { "epoch": 0.5622439808902133, "grad_norm": 0.72265625, "learning_rate": 1.6380001539330088e-05, "loss": 0.2624, "step": 7429 }, { "epoch": 0.5623196632136607, "grad_norm": 0.78125, "learning_rate": 1.6379084319304245e-05, "loss": 0.3545, "step": 7430 }, { "epoch": 0.562395345537108, "grad_norm": 0.69921875, "learning_rate": 1.637816700878014e-05, "loss": 0.2439, "step": 7431 }, { "epoch": 0.5624710278605554, "grad_norm": 0.78125, "learning_rate": 1.6377249607770792e-05, "loss": 0.3217, "step": 7432 }, { "epoch": 0.5625467101840026, "grad_norm": 0.7421875, "learning_rate": 1.6376332116289216e-05, "loss": 0.3267, "step": 7433 }, { "epoch": 0.5626223925074499, "grad_norm": 0.71484375, "learning_rate": 1.6375414534348425e-05, "loss": 0.2926, "step": 7434 }, { "epoch": 0.5626980748308973, "grad_norm": 0.765625, "learning_rate": 1.637449686196144e-05, "loss": 0.3171, "step": 7435 }, { "epoch": 0.5627737571543446, "grad_norm": 0.84375, "learning_rate": 1.6373579099141277e-05, "loss": 0.3216, "step": 7436 }, { "epoch": 0.562849439477792, "grad_norm": 0.81640625, "learning_rate": 1.637266124590096e-05, "loss": 0.3801, "step": 7437 }, { "epoch": 0.5629251218012393, "grad_norm": 0.78125, "learning_rate": 1.63717433022535e-05, "loss": 0.3517, "step": 7438 }, { "epoch": 0.5630008041246867, "grad_norm": 0.7890625, "learning_rate": 1.6370825268211933e-05, "loss": 0.3544, "step": 7439 }, { "epoch": 0.5630764864481339, "grad_norm": 0.81640625, "learning_rate": 1.6369907143789277e-05, "loss": 0.3193, "step": 7440 }, { "epoch": 0.5631521687715813, "grad_norm": 0.796875, "learning_rate": 1.6368988928998556e-05, "loss": 0.317, "step": 7441 }, { "epoch": 0.5632278510950286, "grad_norm": 0.7734375, "learning_rate": 1.6368070623852794e-05, "loss": 0.3124, "step": 7442 }, { "epoch": 0.5633035334184759, "grad_norm": 0.734375, "learning_rate": 1.636715222836503e-05, "loss": 0.2875, "step": 7443 }, { "epoch": 0.5633792157419233, "grad_norm": 0.7578125, "learning_rate": 1.6366233742548275e-05, "loss": 0.3268, "step": 7444 }, { "epoch": 0.5634548980653706, "grad_norm": 0.76953125, "learning_rate": 1.636531516641558e-05, "loss": 0.3155, "step": 7445 }, { "epoch": 0.563530580388818, "grad_norm": 0.78125, "learning_rate": 1.636439649997996e-05, "loss": 0.3302, "step": 7446 }, { "epoch": 0.5636062627122652, "grad_norm": 0.77734375, "learning_rate": 1.6363477743254454e-05, "loss": 0.335, "step": 7447 }, { "epoch": 0.5636819450357126, "grad_norm": 0.84375, "learning_rate": 1.6362558896252097e-05, "loss": 0.2904, "step": 7448 }, { "epoch": 0.5637576273591599, "grad_norm": 0.73828125, "learning_rate": 1.6361639958985925e-05, "loss": 0.3301, "step": 7449 }, { "epoch": 0.5638333096826073, "grad_norm": 1.1484375, "learning_rate": 1.636072093146897e-05, "loss": 0.3652, "step": 7450 }, { "epoch": 0.5639089920060546, "grad_norm": 0.85546875, "learning_rate": 1.6359801813714274e-05, "loss": 0.3468, "step": 7451 }, { "epoch": 0.563984674329502, "grad_norm": 0.80078125, "learning_rate": 1.6358882605734878e-05, "loss": 0.3267, "step": 7452 }, { "epoch": 0.5640603566529493, "grad_norm": 0.8203125, "learning_rate": 1.635796330754382e-05, "loss": 0.3707, "step": 7453 }, { "epoch": 0.5641360389763965, "grad_norm": 0.7421875, "learning_rate": 1.6357043919154137e-05, "loss": 0.3218, "step": 7454 }, { "epoch": 0.5642117212998439, "grad_norm": 0.78125, "learning_rate": 1.635612444057888e-05, "loss": 0.3078, "step": 7455 }, { "epoch": 0.5642874036232912, "grad_norm": 0.83984375, "learning_rate": 1.6355204871831094e-05, "loss": 0.3796, "step": 7456 }, { "epoch": 0.5643630859467386, "grad_norm": 0.828125, "learning_rate": 1.6354285212923818e-05, "loss": 0.3348, "step": 7457 }, { "epoch": 0.5644387682701859, "grad_norm": 0.78125, "learning_rate": 1.63533654638701e-05, "loss": 0.2973, "step": 7458 }, { "epoch": 0.5645144505936333, "grad_norm": 0.7578125, "learning_rate": 1.6352445624682993e-05, "loss": 0.3006, "step": 7459 }, { "epoch": 0.5645901329170806, "grad_norm": 0.80078125, "learning_rate": 1.6351525695375543e-05, "loss": 0.3328, "step": 7460 }, { "epoch": 0.5646658152405278, "grad_norm": 0.83203125, "learning_rate": 1.63506056759608e-05, "loss": 0.3592, "step": 7461 }, { "epoch": 0.5647414975639752, "grad_norm": 0.74609375, "learning_rate": 1.634968556645182e-05, "loss": 0.295, "step": 7462 }, { "epoch": 0.5648171798874225, "grad_norm": 0.890625, "learning_rate": 1.634876536686165e-05, "loss": 0.3919, "step": 7463 }, { "epoch": 0.5648928622108699, "grad_norm": 0.828125, "learning_rate": 1.634784507720335e-05, "loss": 0.3372, "step": 7464 }, { "epoch": 0.5649685445343172, "grad_norm": 0.8046875, "learning_rate": 1.6346924697489975e-05, "loss": 0.3386, "step": 7465 }, { "epoch": 0.5650442268577646, "grad_norm": 0.78125, "learning_rate": 1.6346004227734583e-05, "loss": 0.3205, "step": 7466 }, { "epoch": 0.5651199091812119, "grad_norm": 0.75390625, "learning_rate": 1.6345083667950227e-05, "loss": 0.3091, "step": 7467 }, { "epoch": 0.5651955915046591, "grad_norm": 1.125, "learning_rate": 1.6344163018149975e-05, "loss": 0.3631, "step": 7468 }, { "epoch": 0.5652712738281065, "grad_norm": 0.796875, "learning_rate": 1.634324227834688e-05, "loss": 0.3373, "step": 7469 }, { "epoch": 0.5653469561515538, "grad_norm": 0.7890625, "learning_rate": 1.634232144855401e-05, "loss": 0.3275, "step": 7470 }, { "epoch": 0.5654226384750012, "grad_norm": 0.80859375, "learning_rate": 1.6341400528784426e-05, "loss": 0.3417, "step": 7471 }, { "epoch": 0.5654983207984485, "grad_norm": 0.8046875, "learning_rate": 1.6340479519051192e-05, "loss": 0.3367, "step": 7472 }, { "epoch": 0.5655740031218959, "grad_norm": 0.80078125, "learning_rate": 1.6339558419367378e-05, "loss": 0.3411, "step": 7473 }, { "epoch": 0.5656496854453432, "grad_norm": 0.77734375, "learning_rate": 1.6338637229746046e-05, "loss": 0.3034, "step": 7474 }, { "epoch": 0.5657253677687905, "grad_norm": 0.80078125, "learning_rate": 1.633771595020027e-05, "loss": 0.33, "step": 7475 }, { "epoch": 0.5658010500922378, "grad_norm": 0.78515625, "learning_rate": 1.6336794580743115e-05, "loss": 0.326, "step": 7476 }, { "epoch": 0.5658767324156851, "grad_norm": 0.74609375, "learning_rate": 1.6335873121387655e-05, "loss": 0.3199, "step": 7477 }, { "epoch": 0.5659524147391325, "grad_norm": 0.8203125, "learning_rate": 1.6334951572146966e-05, "loss": 0.3556, "step": 7478 }, { "epoch": 0.5660280970625798, "grad_norm": 0.734375, "learning_rate": 1.6334029933034112e-05, "loss": 0.3003, "step": 7479 }, { "epoch": 0.5661037793860272, "grad_norm": 0.796875, "learning_rate": 1.6333108204062177e-05, "loss": 0.3668, "step": 7480 }, { "epoch": 0.5661794617094745, "grad_norm": 1.3671875, "learning_rate": 1.6332186385244233e-05, "loss": 0.3967, "step": 7481 }, { "epoch": 0.5662551440329218, "grad_norm": 0.71875, "learning_rate": 1.633126447659336e-05, "loss": 0.3084, "step": 7482 }, { "epoch": 0.5663308263563691, "grad_norm": 0.734375, "learning_rate": 1.6330342478122635e-05, "loss": 0.2928, "step": 7483 }, { "epoch": 0.5664065086798165, "grad_norm": 0.7109375, "learning_rate": 1.632942038984514e-05, "loss": 0.2878, "step": 7484 }, { "epoch": 0.5664821910032638, "grad_norm": 0.71875, "learning_rate": 1.632849821177395e-05, "loss": 0.3046, "step": 7485 }, { "epoch": 0.5665578733267111, "grad_norm": 0.734375, "learning_rate": 1.6327575943922157e-05, "loss": 0.3038, "step": 7486 }, { "epoch": 0.5666335556501585, "grad_norm": 0.79296875, "learning_rate": 1.6326653586302842e-05, "loss": 0.3249, "step": 7487 }, { "epoch": 0.5667092379736058, "grad_norm": 0.80078125, "learning_rate": 1.6325731138929084e-05, "loss": 0.3135, "step": 7488 }, { "epoch": 0.5667849202970531, "grad_norm": 0.8125, "learning_rate": 1.632480860181398e-05, "loss": 0.3308, "step": 7489 }, { "epoch": 0.5668606026205004, "grad_norm": 0.75390625, "learning_rate": 1.6323885974970606e-05, "loss": 0.2997, "step": 7490 }, { "epoch": 0.5669362849439478, "grad_norm": 0.7890625, "learning_rate": 1.6322963258412064e-05, "loss": 0.3041, "step": 7491 }, { "epoch": 0.5670119672673951, "grad_norm": 0.796875, "learning_rate": 1.6322040452151435e-05, "loss": 0.3415, "step": 7492 }, { "epoch": 0.5670876495908425, "grad_norm": 0.8515625, "learning_rate": 1.632111755620181e-05, "loss": 0.3803, "step": 7493 }, { "epoch": 0.5671633319142898, "grad_norm": 0.734375, "learning_rate": 1.6320194570576288e-05, "loss": 0.2892, "step": 7494 }, { "epoch": 0.5672390142377371, "grad_norm": 0.72265625, "learning_rate": 1.6319271495287963e-05, "loss": 0.3026, "step": 7495 }, { "epoch": 0.5673146965611844, "grad_norm": 0.78125, "learning_rate": 1.631834833034992e-05, "loss": 0.316, "step": 7496 }, { "epoch": 0.5673903788846317, "grad_norm": 0.7890625, "learning_rate": 1.6317425075775272e-05, "loss": 0.3126, "step": 7497 }, { "epoch": 0.5674660612080791, "grad_norm": 0.7578125, "learning_rate": 1.6316501731577103e-05, "loss": 0.3031, "step": 7498 }, { "epoch": 0.5675417435315264, "grad_norm": 0.7734375, "learning_rate": 1.631557829776852e-05, "loss": 0.2841, "step": 7499 }, { "epoch": 0.5676174258549738, "grad_norm": 0.76953125, "learning_rate": 1.631465477436262e-05, "loss": 0.3001, "step": 7500 }, { "epoch": 0.5676931081784211, "grad_norm": 0.79296875, "learning_rate": 1.6313731161372506e-05, "loss": 0.3662, "step": 7501 }, { "epoch": 0.5677687905018685, "grad_norm": 1.203125, "learning_rate": 1.631280745881128e-05, "loss": 0.3863, "step": 7502 }, { "epoch": 0.5678444728253157, "grad_norm": 0.7578125, "learning_rate": 1.6311883666692046e-05, "loss": 0.2935, "step": 7503 }, { "epoch": 0.567920155148763, "grad_norm": 0.82421875, "learning_rate": 1.631095978502791e-05, "loss": 0.3506, "step": 7504 }, { "epoch": 0.5679958374722104, "grad_norm": 0.734375, "learning_rate": 1.631003581383198e-05, "loss": 0.2941, "step": 7505 }, { "epoch": 0.5680715197956577, "grad_norm": 0.828125, "learning_rate": 1.630911175311737e-05, "loss": 0.3729, "step": 7506 }, { "epoch": 0.5681472021191051, "grad_norm": 0.80078125, "learning_rate": 1.6308187602897173e-05, "loss": 0.3259, "step": 7507 }, { "epoch": 0.5682228844425524, "grad_norm": 0.953125, "learning_rate": 1.6307263363184517e-05, "loss": 0.3466, "step": 7508 }, { "epoch": 0.5682985667659998, "grad_norm": 0.73828125, "learning_rate": 1.6306339033992503e-05, "loss": 0.2895, "step": 7509 }, { "epoch": 0.568374249089447, "grad_norm": 0.7421875, "learning_rate": 1.6305414615334247e-05, "loss": 0.3217, "step": 7510 }, { "epoch": 0.5684499314128943, "grad_norm": 0.70703125, "learning_rate": 1.6304490107222867e-05, "loss": 0.2894, "step": 7511 }, { "epoch": 0.5685256137363417, "grad_norm": 0.76171875, "learning_rate": 1.6303565509671476e-05, "loss": 0.3215, "step": 7512 }, { "epoch": 0.568601296059789, "grad_norm": 0.80859375, "learning_rate": 1.630264082269319e-05, "loss": 0.3394, "step": 7513 }, { "epoch": 0.5686769783832364, "grad_norm": 0.71875, "learning_rate": 1.6301716046301125e-05, "loss": 0.2603, "step": 7514 }, { "epoch": 0.5687526607066837, "grad_norm": 0.75, "learning_rate": 1.630079118050841e-05, "loss": 0.3115, "step": 7515 }, { "epoch": 0.5688283430301311, "grad_norm": 0.78515625, "learning_rate": 1.6299866225328155e-05, "loss": 0.3128, "step": 7516 }, { "epoch": 0.5689040253535783, "grad_norm": 0.7421875, "learning_rate": 1.6298941180773487e-05, "loss": 0.3077, "step": 7517 }, { "epoch": 0.5689797076770257, "grad_norm": 0.84375, "learning_rate": 1.6298016046857533e-05, "loss": 0.3382, "step": 7518 }, { "epoch": 0.569055390000473, "grad_norm": 0.78125, "learning_rate": 1.6297090823593407e-05, "loss": 0.2909, "step": 7519 }, { "epoch": 0.5691310723239204, "grad_norm": 0.83203125, "learning_rate": 1.6296165510994248e-05, "loss": 0.3018, "step": 7520 }, { "epoch": 0.5692067546473677, "grad_norm": 0.76953125, "learning_rate": 1.6295240109073173e-05, "loss": 0.3242, "step": 7521 }, { "epoch": 0.569282436970815, "grad_norm": 0.828125, "learning_rate": 1.6294314617843314e-05, "loss": 0.3682, "step": 7522 }, { "epoch": 0.5693581192942624, "grad_norm": 0.76171875, "learning_rate": 1.62933890373178e-05, "loss": 0.2974, "step": 7523 }, { "epoch": 0.5694338016177096, "grad_norm": 0.74609375, "learning_rate": 1.6292463367509767e-05, "loss": 0.2984, "step": 7524 }, { "epoch": 0.569509483941157, "grad_norm": 0.7890625, "learning_rate": 1.629153760843234e-05, "loss": 0.3385, "step": 7525 }, { "epoch": 0.5695851662646043, "grad_norm": 0.76171875, "learning_rate": 1.6290611760098655e-05, "loss": 0.3246, "step": 7526 }, { "epoch": 0.5696608485880517, "grad_norm": 0.765625, "learning_rate": 1.6289685822521847e-05, "loss": 0.2987, "step": 7527 }, { "epoch": 0.569736530911499, "grad_norm": 0.76953125, "learning_rate": 1.6288759795715053e-05, "loss": 0.3397, "step": 7528 }, { "epoch": 0.5698122132349464, "grad_norm": 0.77734375, "learning_rate": 1.628783367969141e-05, "loss": 0.3269, "step": 7529 }, { "epoch": 0.5698878955583937, "grad_norm": 0.8125, "learning_rate": 1.6286907474464056e-05, "loss": 0.3851, "step": 7530 }, { "epoch": 0.5699635778818409, "grad_norm": 0.81640625, "learning_rate": 1.6285981180046127e-05, "loss": 0.3231, "step": 7531 }, { "epoch": 0.5700392602052883, "grad_norm": 0.79296875, "learning_rate": 1.628505479645077e-05, "loss": 0.3378, "step": 7532 }, { "epoch": 0.5701149425287356, "grad_norm": 0.7734375, "learning_rate": 1.628412832369113e-05, "loss": 0.3279, "step": 7533 }, { "epoch": 0.570190624852183, "grad_norm": 0.828125, "learning_rate": 1.628320176178034e-05, "loss": 0.3378, "step": 7534 }, { "epoch": 0.5702663071756303, "grad_norm": 0.7421875, "learning_rate": 1.628227511073155e-05, "loss": 0.3064, "step": 7535 }, { "epoch": 0.5703419894990777, "grad_norm": 0.765625, "learning_rate": 1.628134837055791e-05, "loss": 0.3347, "step": 7536 }, { "epoch": 0.5704176718225249, "grad_norm": 0.703125, "learning_rate": 1.628042154127256e-05, "loss": 0.2786, "step": 7537 }, { "epoch": 0.5704933541459722, "grad_norm": 0.79296875, "learning_rate": 1.6279494622888662e-05, "loss": 0.3325, "step": 7538 }, { "epoch": 0.5705690364694196, "grad_norm": 0.765625, "learning_rate": 1.627856761541935e-05, "loss": 0.2939, "step": 7539 }, { "epoch": 0.5706447187928669, "grad_norm": 0.6875, "learning_rate": 1.627764051887778e-05, "loss": 0.2827, "step": 7540 }, { "epoch": 0.5707204011163143, "grad_norm": 0.82421875, "learning_rate": 1.6276713333277112e-05, "loss": 0.3593, "step": 7541 }, { "epoch": 0.5707960834397616, "grad_norm": 0.73046875, "learning_rate": 1.6275786058630488e-05, "loss": 0.3018, "step": 7542 }, { "epoch": 0.570871765763209, "grad_norm": 0.78125, "learning_rate": 1.6274858694951076e-05, "loss": 0.3475, "step": 7543 }, { "epoch": 0.5709474480866562, "grad_norm": 0.79296875, "learning_rate": 1.6273931242252024e-05, "loss": 0.3579, "step": 7544 }, { "epoch": 0.5710231304101036, "grad_norm": 0.765625, "learning_rate": 1.6273003700546486e-05, "loss": 0.3327, "step": 7545 }, { "epoch": 0.5710988127335509, "grad_norm": 0.79296875, "learning_rate": 1.627207606984763e-05, "loss": 0.3304, "step": 7546 }, { "epoch": 0.5711744950569982, "grad_norm": 0.76953125, "learning_rate": 1.627114835016861e-05, "loss": 0.3181, "step": 7547 }, { "epoch": 0.5712501773804456, "grad_norm": 0.828125, "learning_rate": 1.627022054152259e-05, "loss": 0.3346, "step": 7548 }, { "epoch": 0.5713258597038929, "grad_norm": 0.74609375, "learning_rate": 1.6269292643922732e-05, "loss": 0.3031, "step": 7549 }, { "epoch": 0.5714015420273403, "grad_norm": 0.7890625, "learning_rate": 1.6268364657382196e-05, "loss": 0.3225, "step": 7550 }, { "epoch": 0.5714772243507875, "grad_norm": 0.78515625, "learning_rate": 1.626743658191415e-05, "loss": 0.3338, "step": 7551 }, { "epoch": 0.5715529066742349, "grad_norm": 0.82421875, "learning_rate": 1.626650841753177e-05, "loss": 0.3659, "step": 7552 }, { "epoch": 0.5716285889976822, "grad_norm": 0.75, "learning_rate": 1.6265580164248204e-05, "loss": 0.3196, "step": 7553 }, { "epoch": 0.5717042713211296, "grad_norm": 0.8046875, "learning_rate": 1.6264651822076636e-05, "loss": 0.3713, "step": 7554 }, { "epoch": 0.5717799536445769, "grad_norm": 0.82421875, "learning_rate": 1.626372339103023e-05, "loss": 0.3517, "step": 7555 }, { "epoch": 0.5718556359680242, "grad_norm": 0.71484375, "learning_rate": 1.6262794871122157e-05, "loss": 0.2839, "step": 7556 }, { "epoch": 0.5719313182914716, "grad_norm": 0.796875, "learning_rate": 1.6261866262365597e-05, "loss": 0.356, "step": 7557 }, { "epoch": 0.5720070006149188, "grad_norm": 0.75390625, "learning_rate": 1.6260937564773715e-05, "loss": 0.3285, "step": 7558 }, { "epoch": 0.5720826829383662, "grad_norm": 0.81640625, "learning_rate": 1.626000877835969e-05, "loss": 0.3638, "step": 7559 }, { "epoch": 0.5721583652618135, "grad_norm": 0.76171875, "learning_rate": 1.6259079903136697e-05, "loss": 0.2975, "step": 7560 }, { "epoch": 0.5722340475852609, "grad_norm": 0.76171875, "learning_rate": 1.6258150939117917e-05, "loss": 0.3146, "step": 7561 }, { "epoch": 0.5723097299087082, "grad_norm": 0.7578125, "learning_rate": 1.6257221886316524e-05, "loss": 0.3067, "step": 7562 }, { "epoch": 0.5723854122321556, "grad_norm": 0.7578125, "learning_rate": 1.62562927447457e-05, "loss": 0.3399, "step": 7563 }, { "epoch": 0.5724610945556029, "grad_norm": 0.79296875, "learning_rate": 1.625536351441863e-05, "loss": 0.3172, "step": 7564 }, { "epoch": 0.5725367768790501, "grad_norm": 0.7734375, "learning_rate": 1.6254434195348492e-05, "loss": 0.3181, "step": 7565 }, { "epoch": 0.5726124592024975, "grad_norm": 0.8203125, "learning_rate": 1.625350478754847e-05, "loss": 0.3209, "step": 7566 }, { "epoch": 0.5726881415259448, "grad_norm": 0.796875, "learning_rate": 1.6252575291031755e-05, "loss": 0.3264, "step": 7567 }, { "epoch": 0.5727638238493922, "grad_norm": 1.078125, "learning_rate": 1.625164570581153e-05, "loss": 0.4296, "step": 7568 }, { "epoch": 0.5728395061728395, "grad_norm": 0.73046875, "learning_rate": 1.6250716031900983e-05, "loss": 0.3067, "step": 7569 }, { "epoch": 0.5729151884962869, "grad_norm": 0.796875, "learning_rate": 1.62497862693133e-05, "loss": 0.3527, "step": 7570 }, { "epoch": 0.5729908708197342, "grad_norm": 0.9140625, "learning_rate": 1.6248856418061672e-05, "loss": 0.3872, "step": 7571 }, { "epoch": 0.5730665531431814, "grad_norm": 0.80078125, "learning_rate": 1.6247926478159296e-05, "loss": 0.3753, "step": 7572 }, { "epoch": 0.5731422354666288, "grad_norm": 0.984375, "learning_rate": 1.624699644961936e-05, "loss": 0.3505, "step": 7573 }, { "epoch": 0.5732179177900761, "grad_norm": 0.79296875, "learning_rate": 1.624606633245506e-05, "loss": 0.333, "step": 7574 }, { "epoch": 0.5732936001135235, "grad_norm": 0.8046875, "learning_rate": 1.6245136126679584e-05, "loss": 0.3612, "step": 7575 }, { "epoch": 0.5733692824369708, "grad_norm": 0.8359375, "learning_rate": 1.6244205832306144e-05, "loss": 0.3409, "step": 7576 }, { "epoch": 0.5734449647604182, "grad_norm": 0.82421875, "learning_rate": 1.6243275449347925e-05, "loss": 0.3697, "step": 7577 }, { "epoch": 0.5735206470838655, "grad_norm": 0.75390625, "learning_rate": 1.6242344977818133e-05, "loss": 0.2931, "step": 7578 }, { "epoch": 0.5735963294073128, "grad_norm": 0.7421875, "learning_rate": 1.624141441772996e-05, "loss": 0.325, "step": 7579 }, { "epoch": 0.5736720117307601, "grad_norm": 0.8125, "learning_rate": 1.6240483769096614e-05, "loss": 0.334, "step": 7580 }, { "epoch": 0.5737476940542074, "grad_norm": 0.74609375, "learning_rate": 1.62395530319313e-05, "loss": 0.2986, "step": 7581 }, { "epoch": 0.5738233763776548, "grad_norm": 0.734375, "learning_rate": 1.6238622206247218e-05, "loss": 0.2719, "step": 7582 }, { "epoch": 0.5738990587011021, "grad_norm": 0.71484375, "learning_rate": 1.623769129205757e-05, "loss": 0.2957, "step": 7583 }, { "epoch": 0.5739747410245495, "grad_norm": 0.7109375, "learning_rate": 1.623676028937557e-05, "loss": 0.2799, "step": 7584 }, { "epoch": 0.5740504233479968, "grad_norm": 0.80078125, "learning_rate": 1.6235829198214425e-05, "loss": 0.3245, "step": 7585 }, { "epoch": 0.5741261056714441, "grad_norm": 0.7265625, "learning_rate": 1.6234898018587336e-05, "loss": 0.2708, "step": 7586 }, { "epoch": 0.5742017879948914, "grad_norm": 0.71484375, "learning_rate": 1.6233966750507523e-05, "loss": 0.2749, "step": 7587 }, { "epoch": 0.5742774703183388, "grad_norm": 0.93359375, "learning_rate": 1.623303539398819e-05, "loss": 0.3281, "step": 7588 }, { "epoch": 0.5743531526417861, "grad_norm": 0.7265625, "learning_rate": 1.623210394904256e-05, "loss": 0.2977, "step": 7589 }, { "epoch": 0.5744288349652334, "grad_norm": 0.7890625, "learning_rate": 1.623117241568384e-05, "loss": 0.2938, "step": 7590 }, { "epoch": 0.5745045172886808, "grad_norm": 0.78515625, "learning_rate": 1.6230240793925244e-05, "loss": 0.2953, "step": 7591 }, { "epoch": 0.5745801996121281, "grad_norm": 0.734375, "learning_rate": 1.6229309083779987e-05, "loss": 0.2783, "step": 7592 }, { "epoch": 0.5746558819355754, "grad_norm": 0.7890625, "learning_rate": 1.6228377285261295e-05, "loss": 0.3131, "step": 7593 }, { "epoch": 0.5747315642590227, "grad_norm": 0.75390625, "learning_rate": 1.6227445398382383e-05, "loss": 0.3179, "step": 7594 }, { "epoch": 0.5748072465824701, "grad_norm": 0.67578125, "learning_rate": 1.6226513423156468e-05, "loss": 0.2506, "step": 7595 }, { "epoch": 0.5748829289059174, "grad_norm": 0.859375, "learning_rate": 1.622558135959678e-05, "loss": 0.3553, "step": 7596 }, { "epoch": 0.5749586112293648, "grad_norm": 0.76953125, "learning_rate": 1.6224649207716536e-05, "loss": 0.322, "step": 7597 }, { "epoch": 0.5750342935528121, "grad_norm": 0.7421875, "learning_rate": 1.6223716967528956e-05, "loss": 0.281, "step": 7598 }, { "epoch": 0.5751099758762594, "grad_norm": 1.078125, "learning_rate": 1.6222784639047276e-05, "loss": 0.3287, "step": 7599 }, { "epoch": 0.5751856581997067, "grad_norm": 0.77734375, "learning_rate": 1.622185222228472e-05, "loss": 0.3434, "step": 7600 }, { "epoch": 0.575261340523154, "grad_norm": 0.72265625, "learning_rate": 1.6220919717254507e-05, "loss": 0.2923, "step": 7601 }, { "epoch": 0.5753370228466014, "grad_norm": 0.8203125, "learning_rate": 1.6219987123969874e-05, "loss": 0.3695, "step": 7602 }, { "epoch": 0.5754127051700487, "grad_norm": 0.7890625, "learning_rate": 1.6219054442444048e-05, "loss": 0.3406, "step": 7603 }, { "epoch": 0.5754883874934961, "grad_norm": 1.109375, "learning_rate": 1.6218121672690267e-05, "loss": 0.406, "step": 7604 }, { "epoch": 0.5755640698169434, "grad_norm": 0.765625, "learning_rate": 1.6217188814721756e-05, "loss": 0.3299, "step": 7605 }, { "epoch": 0.5756397521403908, "grad_norm": 0.734375, "learning_rate": 1.621625586855175e-05, "loss": 0.3194, "step": 7606 }, { "epoch": 0.575715434463838, "grad_norm": 0.92578125, "learning_rate": 1.6215322834193494e-05, "loss": 0.3233, "step": 7607 }, { "epoch": 0.5757911167872853, "grad_norm": 0.73828125, "learning_rate": 1.621438971166021e-05, "loss": 0.2992, "step": 7608 }, { "epoch": 0.5758667991107327, "grad_norm": 0.74609375, "learning_rate": 1.621345650096515e-05, "loss": 0.3122, "step": 7609 }, { "epoch": 0.57594248143418, "grad_norm": 0.78515625, "learning_rate": 1.6212523202121547e-05, "loss": 0.3506, "step": 7610 }, { "epoch": 0.5760181637576274, "grad_norm": 0.7890625, "learning_rate": 1.6211589815142637e-05, "loss": 0.3342, "step": 7611 }, { "epoch": 0.5760938460810747, "grad_norm": 0.78515625, "learning_rate": 1.6210656340041668e-05, "loss": 0.347, "step": 7612 }, { "epoch": 0.5761695284045221, "grad_norm": 0.75, "learning_rate": 1.620972277683188e-05, "loss": 0.295, "step": 7613 }, { "epoch": 0.5762452107279693, "grad_norm": 1.265625, "learning_rate": 1.620878912552652e-05, "loss": 0.3795, "step": 7614 }, { "epoch": 0.5763208930514166, "grad_norm": 0.73828125, "learning_rate": 1.6207855386138832e-05, "loss": 0.2755, "step": 7615 }, { "epoch": 0.576396575374864, "grad_norm": 0.71875, "learning_rate": 1.620692155868206e-05, "loss": 0.3011, "step": 7616 }, { "epoch": 0.5764722576983113, "grad_norm": 0.796875, "learning_rate": 1.6205987643169455e-05, "loss": 0.3041, "step": 7617 }, { "epoch": 0.5765479400217587, "grad_norm": 0.76171875, "learning_rate": 1.6205053639614267e-05, "loss": 0.3166, "step": 7618 }, { "epoch": 0.576623622345206, "grad_norm": 0.88671875, "learning_rate": 1.6204119548029746e-05, "loss": 0.3575, "step": 7619 }, { "epoch": 0.5766993046686534, "grad_norm": 0.76171875, "learning_rate": 1.6203185368429137e-05, "loss": 0.3189, "step": 7620 }, { "epoch": 0.5767749869921006, "grad_norm": 0.76953125, "learning_rate": 1.6202251100825705e-05, "loss": 0.3137, "step": 7621 }, { "epoch": 0.576850669315548, "grad_norm": 0.6796875, "learning_rate": 1.6201316745232693e-05, "loss": 0.2801, "step": 7622 }, { "epoch": 0.5769263516389953, "grad_norm": 0.7734375, "learning_rate": 1.6200382301663363e-05, "loss": 0.2968, "step": 7623 }, { "epoch": 0.5770020339624427, "grad_norm": 0.78515625, "learning_rate": 1.619944777013097e-05, "loss": 0.3494, "step": 7624 }, { "epoch": 0.57707771628589, "grad_norm": 0.71875, "learning_rate": 1.619851315064877e-05, "loss": 0.2908, "step": 7625 }, { "epoch": 0.5771533986093373, "grad_norm": 0.78125, "learning_rate": 1.6197578443230026e-05, "loss": 0.3542, "step": 7626 }, { "epoch": 0.5772290809327847, "grad_norm": 0.75390625, "learning_rate": 1.6196643647887994e-05, "loss": 0.2969, "step": 7627 }, { "epoch": 0.5773047632562319, "grad_norm": 0.71875, "learning_rate": 1.619570876463594e-05, "loss": 0.2855, "step": 7628 }, { "epoch": 0.5773804455796793, "grad_norm": 0.734375, "learning_rate": 1.6194773793487125e-05, "loss": 0.2893, "step": 7629 }, { "epoch": 0.5774561279031266, "grad_norm": 0.7578125, "learning_rate": 1.6193838734454813e-05, "loss": 0.3128, "step": 7630 }, { "epoch": 0.577531810226574, "grad_norm": 0.7578125, "learning_rate": 1.6192903587552268e-05, "loss": 0.3021, "step": 7631 }, { "epoch": 0.5776074925500213, "grad_norm": 1.0625, "learning_rate": 1.6191968352792762e-05, "loss": 0.376, "step": 7632 }, { "epoch": 0.5776831748734687, "grad_norm": 0.80078125, "learning_rate": 1.6191033030189554e-05, "loss": 0.3402, "step": 7633 }, { "epoch": 0.577758857196916, "grad_norm": 0.75390625, "learning_rate": 1.6190097619755923e-05, "loss": 0.35, "step": 7634 }, { "epoch": 0.5778345395203632, "grad_norm": 0.828125, "learning_rate": 1.618916212150513e-05, "loss": 0.3743, "step": 7635 }, { "epoch": 0.5779102218438106, "grad_norm": 0.703125, "learning_rate": 1.6188226535450454e-05, "loss": 0.27, "step": 7636 }, { "epoch": 0.5779859041672579, "grad_norm": 0.7421875, "learning_rate": 1.6187290861605166e-05, "loss": 0.2945, "step": 7637 }, { "epoch": 0.5780615864907053, "grad_norm": 0.72265625, "learning_rate": 1.6186355099982537e-05, "loss": 0.2735, "step": 7638 }, { "epoch": 0.5781372688141526, "grad_norm": 0.78515625, "learning_rate": 1.6185419250595847e-05, "loss": 0.3255, "step": 7639 }, { "epoch": 0.5782129511376, "grad_norm": 0.78125, "learning_rate": 1.618448331345837e-05, "loss": 0.3267, "step": 7640 }, { "epoch": 0.5782886334610473, "grad_norm": 0.7578125, "learning_rate": 1.6183547288583384e-05, "loss": 0.3073, "step": 7641 }, { "epoch": 0.5783643157844945, "grad_norm": 0.76171875, "learning_rate": 1.618261117598417e-05, "loss": 0.3324, "step": 7642 }, { "epoch": 0.5784399981079419, "grad_norm": 0.796875, "learning_rate": 1.6181674975674003e-05, "loss": 0.3257, "step": 7643 }, { "epoch": 0.5785156804313892, "grad_norm": 0.71484375, "learning_rate": 1.618073868766617e-05, "loss": 0.2701, "step": 7644 }, { "epoch": 0.5785913627548366, "grad_norm": 0.73046875, "learning_rate": 1.6179802311973953e-05, "loss": 0.269, "step": 7645 }, { "epoch": 0.5786670450782839, "grad_norm": 0.7890625, "learning_rate": 1.6178865848610637e-05, "loss": 0.3681, "step": 7646 }, { "epoch": 0.5787427274017313, "grad_norm": 0.7890625, "learning_rate": 1.6177929297589504e-05, "loss": 0.3318, "step": 7647 }, { "epoch": 0.5788184097251786, "grad_norm": 0.76953125, "learning_rate": 1.617699265892384e-05, "loss": 0.275, "step": 7648 }, { "epoch": 0.5788940920486259, "grad_norm": 0.7734375, "learning_rate": 1.6176055932626937e-05, "loss": 0.3498, "step": 7649 }, { "epoch": 0.5789697743720732, "grad_norm": 0.7734375, "learning_rate": 1.617511911871208e-05, "loss": 0.3306, "step": 7650 }, { "epoch": 0.5790454566955205, "grad_norm": 1.453125, "learning_rate": 1.6174182217192566e-05, "loss": 0.34, "step": 7651 }, { "epoch": 0.5791211390189679, "grad_norm": 0.78125, "learning_rate": 1.617324522808168e-05, "loss": 0.3322, "step": 7652 }, { "epoch": 0.5791968213424152, "grad_norm": 0.765625, "learning_rate": 1.6172308151392716e-05, "loss": 0.2752, "step": 7653 }, { "epoch": 0.5792725036658626, "grad_norm": 0.796875, "learning_rate": 1.6171370987138967e-05, "loss": 0.3338, "step": 7654 }, { "epoch": 0.5793481859893099, "grad_norm": 0.80859375, "learning_rate": 1.6170433735333734e-05, "loss": 0.3355, "step": 7655 }, { "epoch": 0.5794238683127572, "grad_norm": 0.78125, "learning_rate": 1.6169496395990306e-05, "loss": 0.3184, "step": 7656 }, { "epoch": 0.5794995506362045, "grad_norm": 0.77734375, "learning_rate": 1.6168558969121987e-05, "loss": 0.3351, "step": 7657 }, { "epoch": 0.5795752329596519, "grad_norm": 0.83203125, "learning_rate": 1.6167621454742074e-05, "loss": 0.3551, "step": 7658 }, { "epoch": 0.5796509152830992, "grad_norm": 0.78515625, "learning_rate": 1.6166683852863864e-05, "loss": 0.3407, "step": 7659 }, { "epoch": 0.5797265976065465, "grad_norm": 0.78515625, "learning_rate": 1.616574616350066e-05, "loss": 0.2968, "step": 7660 }, { "epoch": 0.5798022799299939, "grad_norm": 0.78125, "learning_rate": 1.616480838666577e-05, "loss": 0.325, "step": 7661 }, { "epoch": 0.5798779622534411, "grad_norm": 0.765625, "learning_rate": 1.6163870522372494e-05, "loss": 0.2977, "step": 7662 }, { "epoch": 0.5799536445768885, "grad_norm": 0.734375, "learning_rate": 1.6162932570634135e-05, "loss": 0.2947, "step": 7663 }, { "epoch": 0.5800293269003358, "grad_norm": 0.71875, "learning_rate": 1.6161994531464e-05, "loss": 0.3007, "step": 7664 }, { "epoch": 0.5801050092237832, "grad_norm": 0.76953125, "learning_rate": 1.6161056404875403e-05, "loss": 0.3184, "step": 7665 }, { "epoch": 0.5801806915472305, "grad_norm": 0.78125, "learning_rate": 1.6160118190881643e-05, "loss": 0.3177, "step": 7666 }, { "epoch": 0.5802563738706779, "grad_norm": 0.828125, "learning_rate": 1.6159179889496036e-05, "loss": 0.3551, "step": 7667 }, { "epoch": 0.5803320561941252, "grad_norm": 0.765625, "learning_rate": 1.6158241500731895e-05, "loss": 0.3018, "step": 7668 }, { "epoch": 0.5804077385175724, "grad_norm": 0.80859375, "learning_rate": 1.615730302460253e-05, "loss": 0.3581, "step": 7669 }, { "epoch": 0.5804834208410198, "grad_norm": 0.6875, "learning_rate": 1.6156364461121255e-05, "loss": 0.2997, "step": 7670 }, { "epoch": 0.5805591031644671, "grad_norm": 0.765625, "learning_rate": 1.6155425810301383e-05, "loss": 0.2886, "step": 7671 }, { "epoch": 0.5806347854879145, "grad_norm": 0.7734375, "learning_rate": 1.6154487072156237e-05, "loss": 0.2964, "step": 7672 }, { "epoch": 0.5807104678113618, "grad_norm": 0.75, "learning_rate": 1.615354824669913e-05, "loss": 0.3314, "step": 7673 }, { "epoch": 0.5807861501348092, "grad_norm": 0.82421875, "learning_rate": 1.615260933394338e-05, "loss": 0.289, "step": 7674 }, { "epoch": 0.5808618324582565, "grad_norm": 0.8359375, "learning_rate": 1.6151670333902307e-05, "loss": 0.3783, "step": 7675 }, { "epoch": 0.5809375147817037, "grad_norm": 0.77734375, "learning_rate": 1.6150731246589234e-05, "loss": 0.3488, "step": 7676 }, { "epoch": 0.5810131971051511, "grad_norm": 0.7578125, "learning_rate": 1.6149792072017483e-05, "loss": 0.2878, "step": 7677 }, { "epoch": 0.5810888794285984, "grad_norm": 0.76171875, "learning_rate": 1.614885281020038e-05, "loss": 0.3206, "step": 7678 }, { "epoch": 0.5811645617520458, "grad_norm": 0.796875, "learning_rate": 1.614791346115125e-05, "loss": 0.3236, "step": 7679 }, { "epoch": 0.5812402440754931, "grad_norm": 0.91015625, "learning_rate": 1.6146974024883414e-05, "loss": 0.3892, "step": 7680 }, { "epoch": 0.5813159263989405, "grad_norm": 0.70703125, "learning_rate": 1.61460345014102e-05, "loss": 0.2732, "step": 7681 }, { "epoch": 0.5813916087223878, "grad_norm": 0.7890625, "learning_rate": 1.6145094890744942e-05, "loss": 0.3339, "step": 7682 }, { "epoch": 0.581467291045835, "grad_norm": 0.7421875, "learning_rate": 1.614415519290097e-05, "loss": 0.3204, "step": 7683 }, { "epoch": 0.5815429733692824, "grad_norm": 0.75390625, "learning_rate": 1.6143215407891615e-05, "loss": 0.3215, "step": 7684 }, { "epoch": 0.5816186556927297, "grad_norm": 0.7890625, "learning_rate": 1.61422755357302e-05, "loss": 0.3262, "step": 7685 }, { "epoch": 0.5816943380161771, "grad_norm": 0.8203125, "learning_rate": 1.6141335576430074e-05, "loss": 0.314, "step": 7686 }, { "epoch": 0.5817700203396244, "grad_norm": 0.7265625, "learning_rate": 1.6140395530004556e-05, "loss": 0.3032, "step": 7687 }, { "epoch": 0.5818457026630718, "grad_norm": 0.7265625, "learning_rate": 1.6139455396466995e-05, "loss": 0.2778, "step": 7688 }, { "epoch": 0.5819213849865191, "grad_norm": 0.74609375, "learning_rate": 1.6138515175830724e-05, "loss": 0.3383, "step": 7689 }, { "epoch": 0.5819970673099664, "grad_norm": 0.83984375, "learning_rate": 1.6137574868109077e-05, "loss": 0.3777, "step": 7690 }, { "epoch": 0.5820727496334137, "grad_norm": 1.09375, "learning_rate": 1.61366344733154e-05, "loss": 0.3788, "step": 7691 }, { "epoch": 0.5821484319568611, "grad_norm": 1.1640625, "learning_rate": 1.613569399146304e-05, "loss": 0.4059, "step": 7692 }, { "epoch": 0.5822241142803084, "grad_norm": 0.7421875, "learning_rate": 1.613475342256532e-05, "loss": 0.2922, "step": 7693 }, { "epoch": 0.5822997966037557, "grad_norm": 0.78515625, "learning_rate": 1.6133812766635603e-05, "loss": 0.3319, "step": 7694 }, { "epoch": 0.5823754789272031, "grad_norm": 0.7109375, "learning_rate": 1.613287202368722e-05, "loss": 0.3009, "step": 7695 }, { "epoch": 0.5824511612506504, "grad_norm": 0.734375, "learning_rate": 1.6131931193733527e-05, "loss": 0.3043, "step": 7696 }, { "epoch": 0.5825268435740977, "grad_norm": 0.72265625, "learning_rate": 1.6130990276787864e-05, "loss": 0.3147, "step": 7697 }, { "epoch": 0.582602525897545, "grad_norm": 0.7734375, "learning_rate": 1.6130049272863587e-05, "loss": 0.3034, "step": 7698 }, { "epoch": 0.5826782082209924, "grad_norm": 0.75, "learning_rate": 1.6129108181974037e-05, "loss": 0.318, "step": 7699 }, { "epoch": 0.5827538905444397, "grad_norm": 0.78125, "learning_rate": 1.612816700413257e-05, "loss": 0.3113, "step": 7700 }, { "epoch": 0.5828295728678871, "grad_norm": 0.80859375, "learning_rate": 1.612722573935254e-05, "loss": 0.3031, "step": 7701 }, { "epoch": 0.5829052551913344, "grad_norm": 0.7890625, "learning_rate": 1.61262843876473e-05, "loss": 0.3447, "step": 7702 }, { "epoch": 0.5829809375147817, "grad_norm": 0.765625, "learning_rate": 1.61253429490302e-05, "loss": 0.3141, "step": 7703 }, { "epoch": 0.583056619838229, "grad_norm": 0.76171875, "learning_rate": 1.61244014235146e-05, "loss": 0.3113, "step": 7704 }, { "epoch": 0.5831323021616763, "grad_norm": 0.75, "learning_rate": 1.6123459811113855e-05, "loss": 0.3229, "step": 7705 }, { "epoch": 0.5832079844851237, "grad_norm": 0.7265625, "learning_rate": 1.6122518111841322e-05, "loss": 0.2767, "step": 7706 }, { "epoch": 0.583283666808571, "grad_norm": 0.77734375, "learning_rate": 1.6121576325710368e-05, "loss": 0.3155, "step": 7707 }, { "epoch": 0.5833593491320184, "grad_norm": 0.8046875, "learning_rate": 1.6120634452734346e-05, "loss": 0.3326, "step": 7708 }, { "epoch": 0.5834350314554657, "grad_norm": 0.73828125, "learning_rate": 1.611969249292662e-05, "loss": 0.2819, "step": 7709 }, { "epoch": 0.5835107137789131, "grad_norm": 0.7109375, "learning_rate": 1.6118750446300558e-05, "loss": 0.2724, "step": 7710 }, { "epoch": 0.5835863961023603, "grad_norm": 0.7890625, "learning_rate": 1.6117808312869517e-05, "loss": 0.354, "step": 7711 }, { "epoch": 0.5836620784258076, "grad_norm": 0.7421875, "learning_rate": 1.6116866092646866e-05, "loss": 0.2959, "step": 7712 }, { "epoch": 0.583737760749255, "grad_norm": 0.73828125, "learning_rate": 1.6115923785645975e-05, "loss": 0.3118, "step": 7713 }, { "epoch": 0.5838134430727023, "grad_norm": 0.75, "learning_rate": 1.6114981391880212e-05, "loss": 0.319, "step": 7714 }, { "epoch": 0.5838891253961497, "grad_norm": 0.76171875, "learning_rate": 1.611403891136294e-05, "loss": 0.332, "step": 7715 }, { "epoch": 0.583964807719597, "grad_norm": 0.765625, "learning_rate": 1.6113096344107537e-05, "loss": 0.3199, "step": 7716 }, { "epoch": 0.5840404900430444, "grad_norm": 0.84375, "learning_rate": 1.611215369012737e-05, "loss": 0.3912, "step": 7717 }, { "epoch": 0.5841161723664916, "grad_norm": 0.78515625, "learning_rate": 1.6111210949435815e-05, "loss": 0.3347, "step": 7718 }, { "epoch": 0.584191854689939, "grad_norm": 0.7578125, "learning_rate": 1.6110268122046243e-05, "loss": 0.2954, "step": 7719 }, { "epoch": 0.5842675370133863, "grad_norm": 0.76953125, "learning_rate": 1.6109325207972037e-05, "loss": 0.321, "step": 7720 }, { "epoch": 0.5843432193368336, "grad_norm": 0.7578125, "learning_rate": 1.6108382207226564e-05, "loss": 0.2867, "step": 7721 }, { "epoch": 0.584418901660281, "grad_norm": 0.76171875, "learning_rate": 1.610743911982321e-05, "loss": 0.3432, "step": 7722 }, { "epoch": 0.5844945839837283, "grad_norm": 0.7265625, "learning_rate": 1.610649594577535e-05, "loss": 0.272, "step": 7723 }, { "epoch": 0.5845702663071757, "grad_norm": 0.71875, "learning_rate": 1.6105552685096368e-05, "loss": 0.2909, "step": 7724 }, { "epoch": 0.5846459486306229, "grad_norm": 0.765625, "learning_rate": 1.610460933779964e-05, "loss": 0.3117, "step": 7725 }, { "epoch": 0.5847216309540703, "grad_norm": 0.80859375, "learning_rate": 1.6103665903898556e-05, "loss": 0.3215, "step": 7726 }, { "epoch": 0.5847973132775176, "grad_norm": 0.8046875, "learning_rate": 1.6102722383406497e-05, "loss": 0.3625, "step": 7727 }, { "epoch": 0.584872995600965, "grad_norm": 0.796875, "learning_rate": 1.6101778776336845e-05, "loss": 0.3497, "step": 7728 }, { "epoch": 0.5849486779244123, "grad_norm": 0.75390625, "learning_rate": 1.610083508270299e-05, "loss": 0.2695, "step": 7729 }, { "epoch": 0.5850243602478596, "grad_norm": 0.79296875, "learning_rate": 1.6099891302518326e-05, "loss": 0.3326, "step": 7730 }, { "epoch": 0.585100042571307, "grad_norm": 0.74609375, "learning_rate": 1.609894743579623e-05, "loss": 0.3134, "step": 7731 }, { "epoch": 0.5851757248947542, "grad_norm": 0.83203125, "learning_rate": 1.6098003482550102e-05, "loss": 0.3658, "step": 7732 }, { "epoch": 0.5852514072182016, "grad_norm": 0.7265625, "learning_rate": 1.6097059442793327e-05, "loss": 0.293, "step": 7733 }, { "epoch": 0.5853270895416489, "grad_norm": 0.81640625, "learning_rate": 1.6096115316539303e-05, "loss": 0.3336, "step": 7734 }, { "epoch": 0.5854027718650963, "grad_norm": 1.0859375, "learning_rate": 1.6095171103801423e-05, "loss": 0.3166, "step": 7735 }, { "epoch": 0.5854784541885436, "grad_norm": 0.81640625, "learning_rate": 1.609422680459308e-05, "loss": 0.3125, "step": 7736 }, { "epoch": 0.585554136511991, "grad_norm": 0.75390625, "learning_rate": 1.609328241892767e-05, "loss": 0.3291, "step": 7737 }, { "epoch": 0.5856298188354383, "grad_norm": 0.796875, "learning_rate": 1.6092337946818595e-05, "loss": 0.3379, "step": 7738 }, { "epoch": 0.5857055011588855, "grad_norm": 0.765625, "learning_rate": 1.609139338827925e-05, "loss": 0.3123, "step": 7739 }, { "epoch": 0.5857811834823329, "grad_norm": 0.78515625, "learning_rate": 1.609044874332304e-05, "loss": 0.3396, "step": 7740 }, { "epoch": 0.5858568658057802, "grad_norm": 0.78515625, "learning_rate": 1.6089504011963358e-05, "loss": 0.3376, "step": 7741 }, { "epoch": 0.5859325481292276, "grad_norm": 0.75390625, "learning_rate": 1.6088559194213615e-05, "loss": 0.3367, "step": 7742 }, { "epoch": 0.5860082304526749, "grad_norm": 0.80859375, "learning_rate": 1.608761429008721e-05, "loss": 0.3423, "step": 7743 }, { "epoch": 0.5860839127761223, "grad_norm": 0.703125, "learning_rate": 1.6086669299597547e-05, "loss": 0.2759, "step": 7744 }, { "epoch": 0.5861595950995696, "grad_norm": 0.73828125, "learning_rate": 1.608572422275804e-05, "loss": 0.3232, "step": 7745 }, { "epoch": 0.5862352774230168, "grad_norm": 0.7265625, "learning_rate": 1.6084779059582088e-05, "loss": 0.2811, "step": 7746 }, { "epoch": 0.5863109597464642, "grad_norm": 0.76953125, "learning_rate": 1.6083833810083104e-05, "loss": 0.3351, "step": 7747 }, { "epoch": 0.5863866420699115, "grad_norm": 0.81640625, "learning_rate": 1.6082888474274495e-05, "loss": 0.3289, "step": 7748 }, { "epoch": 0.5864623243933589, "grad_norm": 0.7578125, "learning_rate": 1.608194305216968e-05, "loss": 0.3155, "step": 7749 }, { "epoch": 0.5865380067168062, "grad_norm": 1.1875, "learning_rate": 1.6080997543782063e-05, "loss": 0.346, "step": 7750 }, { "epoch": 0.5866136890402536, "grad_norm": 0.7734375, "learning_rate": 1.608005194912506e-05, "loss": 0.332, "step": 7751 }, { "epoch": 0.5866893713637009, "grad_norm": 0.76953125, "learning_rate": 1.6079106268212085e-05, "loss": 0.3293, "step": 7752 }, { "epoch": 0.5867650536871482, "grad_norm": 0.77734375, "learning_rate": 1.607816050105656e-05, "loss": 0.3653, "step": 7753 }, { "epoch": 0.5868407360105955, "grad_norm": 0.8203125, "learning_rate": 1.6077214647671892e-05, "loss": 0.3601, "step": 7754 }, { "epoch": 0.5869164183340428, "grad_norm": 0.7421875, "learning_rate": 1.6076268708071507e-05, "loss": 0.308, "step": 7755 }, { "epoch": 0.5869921006574902, "grad_norm": 0.7734375, "learning_rate": 1.6075322682268825e-05, "loss": 0.338, "step": 7756 }, { "epoch": 0.5870677829809375, "grad_norm": 0.73828125, "learning_rate": 1.6074376570277265e-05, "loss": 0.307, "step": 7757 }, { "epoch": 0.5871434653043849, "grad_norm": 0.80859375, "learning_rate": 1.607343037211025e-05, "loss": 0.3613, "step": 7758 }, { "epoch": 0.5872191476278322, "grad_norm": 0.72265625, "learning_rate": 1.60724840877812e-05, "loss": 0.2904, "step": 7759 }, { "epoch": 0.5872948299512795, "grad_norm": 0.7734375, "learning_rate": 1.6071537717303544e-05, "loss": 0.3496, "step": 7760 }, { "epoch": 0.5873705122747268, "grad_norm": 0.73046875, "learning_rate": 1.6070591260690707e-05, "loss": 0.2785, "step": 7761 }, { "epoch": 0.5874461945981742, "grad_norm": 0.765625, "learning_rate": 1.6069644717956114e-05, "loss": 0.3218, "step": 7762 }, { "epoch": 0.5875218769216215, "grad_norm": 0.82421875, "learning_rate": 1.6068698089113198e-05, "loss": 0.3462, "step": 7763 }, { "epoch": 0.5875975592450688, "grad_norm": 0.7734375, "learning_rate": 1.6067751374175383e-05, "loss": 0.278, "step": 7764 }, { "epoch": 0.5876732415685162, "grad_norm": 0.82421875, "learning_rate": 1.6066804573156104e-05, "loss": 0.3673, "step": 7765 }, { "epoch": 0.5877489238919635, "grad_norm": 0.69140625, "learning_rate": 1.6065857686068787e-05, "loss": 0.2554, "step": 7766 }, { "epoch": 0.5878246062154108, "grad_norm": 0.73046875, "learning_rate": 1.6064910712926873e-05, "loss": 0.3134, "step": 7767 }, { "epoch": 0.5879002885388581, "grad_norm": 0.828125, "learning_rate": 1.606396365374379e-05, "loss": 0.3324, "step": 7768 }, { "epoch": 0.5879759708623055, "grad_norm": 0.78125, "learning_rate": 1.606301650853298e-05, "loss": 0.2941, "step": 7769 }, { "epoch": 0.5880516531857528, "grad_norm": 0.74609375, "learning_rate": 1.6062069277307872e-05, "loss": 0.3052, "step": 7770 }, { "epoch": 0.5881273355092002, "grad_norm": 0.8203125, "learning_rate": 1.606112196008191e-05, "loss": 0.366, "step": 7771 }, { "epoch": 0.5882030178326475, "grad_norm": 0.78125, "learning_rate": 1.6060174556868534e-05, "loss": 0.3413, "step": 7772 }, { "epoch": 0.5882787001560948, "grad_norm": 0.7890625, "learning_rate": 1.605922706768118e-05, "loss": 0.3704, "step": 7773 }, { "epoch": 0.5883543824795421, "grad_norm": 0.7734375, "learning_rate": 1.6058279492533292e-05, "loss": 0.3298, "step": 7774 }, { "epoch": 0.5884300648029894, "grad_norm": 0.7109375, "learning_rate": 1.6057331831438313e-05, "loss": 0.2948, "step": 7775 }, { "epoch": 0.5885057471264368, "grad_norm": 0.98046875, "learning_rate": 1.605638408440969e-05, "loss": 0.3429, "step": 7776 }, { "epoch": 0.5885814294498841, "grad_norm": 0.79296875, "learning_rate": 1.6055436251460864e-05, "loss": 0.347, "step": 7777 }, { "epoch": 0.5886571117733315, "grad_norm": 0.76953125, "learning_rate": 1.6054488332605282e-05, "loss": 0.3253, "step": 7778 }, { "epoch": 0.5887327940967788, "grad_norm": 1.265625, "learning_rate": 1.6053540327856395e-05, "loss": 0.3066, "step": 7779 }, { "epoch": 0.588808476420226, "grad_norm": 0.7890625, "learning_rate": 1.605259223722765e-05, "loss": 0.3391, "step": 7780 }, { "epoch": 0.5888841587436734, "grad_norm": 0.77734375, "learning_rate": 1.6051644060732497e-05, "loss": 0.2869, "step": 7781 }, { "epoch": 0.5889598410671207, "grad_norm": 0.84375, "learning_rate": 1.6050695798384387e-05, "loss": 0.3389, "step": 7782 }, { "epoch": 0.5890355233905681, "grad_norm": 0.7734375, "learning_rate": 1.6049747450196777e-05, "loss": 0.3148, "step": 7783 }, { "epoch": 0.5891112057140154, "grad_norm": 0.765625, "learning_rate": 1.6048799016183112e-05, "loss": 0.3114, "step": 7784 }, { "epoch": 0.5891868880374628, "grad_norm": 0.81640625, "learning_rate": 1.6047850496356858e-05, "loss": 0.3729, "step": 7785 }, { "epoch": 0.5892625703609101, "grad_norm": 0.74609375, "learning_rate": 1.6046901890731465e-05, "loss": 0.3053, "step": 7786 }, { "epoch": 0.5893382526843574, "grad_norm": 0.70703125, "learning_rate": 1.604595319932039e-05, "loss": 0.2894, "step": 7787 }, { "epoch": 0.5894139350078047, "grad_norm": 0.8359375, "learning_rate": 1.6045004422137095e-05, "loss": 0.3435, "step": 7788 }, { "epoch": 0.589489617331252, "grad_norm": 0.76953125, "learning_rate": 1.604405555919504e-05, "loss": 0.3372, "step": 7789 }, { "epoch": 0.5895652996546994, "grad_norm": 0.734375, "learning_rate": 1.6043106610507683e-05, "loss": 0.2987, "step": 7790 }, { "epoch": 0.5896409819781467, "grad_norm": 0.80859375, "learning_rate": 1.604215757608849e-05, "loss": 0.3196, "step": 7791 }, { "epoch": 0.5897166643015941, "grad_norm": 0.69921875, "learning_rate": 1.6041208455950924e-05, "loss": 0.2624, "step": 7792 }, { "epoch": 0.5897923466250414, "grad_norm": 0.765625, "learning_rate": 1.6040259250108444e-05, "loss": 0.3198, "step": 7793 }, { "epoch": 0.5898680289484887, "grad_norm": 0.796875, "learning_rate": 1.6039309958574524e-05, "loss": 0.3364, "step": 7794 }, { "epoch": 0.589943711271936, "grad_norm": 0.78515625, "learning_rate": 1.6038360581362628e-05, "loss": 0.3146, "step": 7795 }, { "epoch": 0.5900193935953834, "grad_norm": 0.765625, "learning_rate": 1.6037411118486228e-05, "loss": 0.3284, "step": 7796 }, { "epoch": 0.5900950759188307, "grad_norm": 0.76171875, "learning_rate": 1.6036461569958785e-05, "loss": 0.3024, "step": 7797 }, { "epoch": 0.590170758242278, "grad_norm": 0.703125, "learning_rate": 1.6035511935793783e-05, "loss": 0.2613, "step": 7798 }, { "epoch": 0.5902464405657254, "grad_norm": 0.7265625, "learning_rate": 1.603456221600468e-05, "loss": 0.2747, "step": 7799 }, { "epoch": 0.5903221228891727, "grad_norm": 0.74609375, "learning_rate": 1.6033612410604962e-05, "loss": 0.3049, "step": 7800 }, { "epoch": 0.59039780521262, "grad_norm": 0.80859375, "learning_rate": 1.603266251960809e-05, "loss": 0.3579, "step": 7801 }, { "epoch": 0.5904734875360673, "grad_norm": 0.77734375, "learning_rate": 1.6031712543027554e-05, "loss": 0.3437, "step": 7802 }, { "epoch": 0.5905491698595147, "grad_norm": 1.0078125, "learning_rate": 1.603076248087683e-05, "loss": 0.3362, "step": 7803 }, { "epoch": 0.590624852182962, "grad_norm": 0.74609375, "learning_rate": 1.602981233316938e-05, "loss": 0.3138, "step": 7804 }, { "epoch": 0.5907005345064094, "grad_norm": 0.72265625, "learning_rate": 1.60288620999187e-05, "loss": 0.3038, "step": 7805 }, { "epoch": 0.5907762168298567, "grad_norm": 0.703125, "learning_rate": 1.6027911781138263e-05, "loss": 0.2841, "step": 7806 }, { "epoch": 0.590851899153304, "grad_norm": 0.76171875, "learning_rate": 1.6026961376841557e-05, "loss": 0.331, "step": 7807 }, { "epoch": 0.5909275814767513, "grad_norm": 0.76171875, "learning_rate": 1.6026010887042057e-05, "loss": 0.3334, "step": 7808 }, { "epoch": 0.5910032638001986, "grad_norm": 0.71484375, "learning_rate": 1.6025060311753252e-05, "loss": 0.2822, "step": 7809 }, { "epoch": 0.591078946123646, "grad_norm": 0.69921875, "learning_rate": 1.602410965098863e-05, "loss": 0.2793, "step": 7810 }, { "epoch": 0.5911546284470933, "grad_norm": 0.76171875, "learning_rate": 1.6023158904761676e-05, "loss": 0.3239, "step": 7811 }, { "epoch": 0.5912303107705407, "grad_norm": 0.84375, "learning_rate": 1.602220807308587e-05, "loss": 0.3728, "step": 7812 }, { "epoch": 0.591305993093988, "grad_norm": 0.765625, "learning_rate": 1.6021257155974718e-05, "loss": 0.3225, "step": 7813 }, { "epoch": 0.5913816754174354, "grad_norm": 0.78515625, "learning_rate": 1.6020306153441695e-05, "loss": 0.3172, "step": 7814 }, { "epoch": 0.5914573577408826, "grad_norm": 0.7421875, "learning_rate": 1.6019355065500293e-05, "loss": 0.3017, "step": 7815 }, { "epoch": 0.5915330400643299, "grad_norm": 0.75390625, "learning_rate": 1.6018403892164015e-05, "loss": 0.3364, "step": 7816 }, { "epoch": 0.5916087223877773, "grad_norm": 0.80078125, "learning_rate": 1.601745263344635e-05, "loss": 0.3535, "step": 7817 }, { "epoch": 0.5916844047112246, "grad_norm": 0.6640625, "learning_rate": 1.6016501289360794e-05, "loss": 0.2521, "step": 7818 }, { "epoch": 0.591760087034672, "grad_norm": 0.73046875, "learning_rate": 1.601554985992084e-05, "loss": 0.3011, "step": 7819 }, { "epoch": 0.5918357693581193, "grad_norm": 0.84375, "learning_rate": 1.6014598345139992e-05, "loss": 0.3248, "step": 7820 }, { "epoch": 0.5919114516815667, "grad_norm": 0.76953125, "learning_rate": 1.601364674503174e-05, "loss": 0.3102, "step": 7821 }, { "epoch": 0.5919871340050139, "grad_norm": 0.78515625, "learning_rate": 1.601269505960959e-05, "loss": 0.359, "step": 7822 }, { "epoch": 0.5920628163284612, "grad_norm": 0.71875, "learning_rate": 1.6011743288887047e-05, "loss": 0.3075, "step": 7823 }, { "epoch": 0.5921384986519086, "grad_norm": 0.71875, "learning_rate": 1.6010791432877605e-05, "loss": 0.2893, "step": 7824 }, { "epoch": 0.5922141809753559, "grad_norm": 0.8125, "learning_rate": 1.6009839491594772e-05, "loss": 0.3375, "step": 7825 }, { "epoch": 0.5922898632988033, "grad_norm": 0.734375, "learning_rate": 1.6008887465052052e-05, "loss": 0.314, "step": 7826 }, { "epoch": 0.5923655456222506, "grad_norm": 0.75, "learning_rate": 1.6007935353262953e-05, "loss": 0.3138, "step": 7827 }, { "epoch": 0.592441227945698, "grad_norm": 1.0234375, "learning_rate": 1.600698315624098e-05, "loss": 0.3241, "step": 7828 }, { "epoch": 0.5925169102691452, "grad_norm": 0.73828125, "learning_rate": 1.6006030873999648e-05, "loss": 0.3297, "step": 7829 }, { "epoch": 0.5925925925925926, "grad_norm": 0.79296875, "learning_rate": 1.6005078506552455e-05, "loss": 0.3545, "step": 7830 }, { "epoch": 0.5926682749160399, "grad_norm": 0.71875, "learning_rate": 1.6004126053912918e-05, "loss": 0.297, "step": 7831 }, { "epoch": 0.5927439572394873, "grad_norm": 0.703125, "learning_rate": 1.600317351609455e-05, "loss": 0.273, "step": 7832 }, { "epoch": 0.5928196395629346, "grad_norm": 0.7265625, "learning_rate": 1.6002220893110867e-05, "loss": 0.2884, "step": 7833 }, { "epoch": 0.5928953218863819, "grad_norm": 1.2421875, "learning_rate": 1.600126818497538e-05, "loss": 0.3645, "step": 7834 }, { "epoch": 0.5929710042098293, "grad_norm": 0.7578125, "learning_rate": 1.6000315391701605e-05, "loss": 0.2869, "step": 7835 }, { "epoch": 0.5930466865332765, "grad_norm": 0.84375, "learning_rate": 1.5999362513303058e-05, "loss": 0.389, "step": 7836 }, { "epoch": 0.5931223688567239, "grad_norm": 0.75390625, "learning_rate": 1.599840954979326e-05, "loss": 0.2997, "step": 7837 }, { "epoch": 0.5931980511801712, "grad_norm": 0.85546875, "learning_rate": 1.599745650118573e-05, "loss": 0.3547, "step": 7838 }, { "epoch": 0.5932737335036186, "grad_norm": 0.7578125, "learning_rate": 1.5996503367493982e-05, "loss": 0.3322, "step": 7839 }, { "epoch": 0.5933494158270659, "grad_norm": 1.046875, "learning_rate": 1.5995550148731548e-05, "loss": 0.3467, "step": 7840 }, { "epoch": 0.5934250981505133, "grad_norm": 0.7109375, "learning_rate": 1.5994596844911946e-05, "loss": 0.2903, "step": 7841 }, { "epoch": 0.5935007804739606, "grad_norm": 0.8046875, "learning_rate": 1.5993643456048698e-05, "loss": 0.3494, "step": 7842 }, { "epoch": 0.5935764627974078, "grad_norm": 0.81640625, "learning_rate": 1.5992689982155333e-05, "loss": 0.341, "step": 7843 }, { "epoch": 0.5936521451208552, "grad_norm": 0.85546875, "learning_rate": 1.599173642324538e-05, "loss": 0.3812, "step": 7844 }, { "epoch": 0.5937278274443025, "grad_norm": 0.75, "learning_rate": 1.5990782779332362e-05, "loss": 0.3014, "step": 7845 }, { "epoch": 0.5938035097677499, "grad_norm": 0.7265625, "learning_rate": 1.5989829050429807e-05, "loss": 0.2993, "step": 7846 }, { "epoch": 0.5938791920911972, "grad_norm": 1.0234375, "learning_rate": 1.598887523655125e-05, "loss": 0.3447, "step": 7847 }, { "epoch": 0.5939548744146446, "grad_norm": 0.734375, "learning_rate": 1.5987921337710224e-05, "loss": 0.2805, "step": 7848 }, { "epoch": 0.5940305567380919, "grad_norm": 0.8046875, "learning_rate": 1.5986967353920253e-05, "loss": 0.3453, "step": 7849 }, { "epoch": 0.5941062390615391, "grad_norm": 0.83203125, "learning_rate": 1.5986013285194877e-05, "loss": 0.3693, "step": 7850 }, { "epoch": 0.5941819213849865, "grad_norm": 0.796875, "learning_rate": 1.598505913154763e-05, "loss": 0.3463, "step": 7851 }, { "epoch": 0.5942576037084338, "grad_norm": 0.7734375, "learning_rate": 1.5984104892992053e-05, "loss": 0.3285, "step": 7852 }, { "epoch": 0.5943332860318812, "grad_norm": 0.76171875, "learning_rate": 1.5983150569541674e-05, "loss": 0.318, "step": 7853 }, { "epoch": 0.5944089683553285, "grad_norm": 0.734375, "learning_rate": 1.5982196161210038e-05, "loss": 0.3109, "step": 7854 }, { "epoch": 0.5944846506787759, "grad_norm": 0.83203125, "learning_rate": 1.5981241668010685e-05, "loss": 0.3591, "step": 7855 }, { "epoch": 0.5945603330022232, "grad_norm": 0.74609375, "learning_rate": 1.5980287089957153e-05, "loss": 0.3049, "step": 7856 }, { "epoch": 0.5946360153256705, "grad_norm": 0.79296875, "learning_rate": 1.597933242706299e-05, "loss": 0.33, "step": 7857 }, { "epoch": 0.5947116976491178, "grad_norm": 0.7734375, "learning_rate": 1.597837767934173e-05, "loss": 0.3264, "step": 7858 }, { "epoch": 0.5947873799725651, "grad_norm": 0.796875, "learning_rate": 1.5977422846806926e-05, "loss": 0.3265, "step": 7859 }, { "epoch": 0.5948630622960125, "grad_norm": 0.82421875, "learning_rate": 1.597646792947212e-05, "loss": 0.3706, "step": 7860 }, { "epoch": 0.5949387446194598, "grad_norm": 0.79296875, "learning_rate": 1.5975512927350864e-05, "loss": 0.3351, "step": 7861 }, { "epoch": 0.5950144269429072, "grad_norm": 1.3046875, "learning_rate": 1.59745578404567e-05, "loss": 0.388, "step": 7862 }, { "epoch": 0.5950901092663545, "grad_norm": 0.7578125, "learning_rate": 1.597360266880318e-05, "loss": 0.2993, "step": 7863 }, { "epoch": 0.5951657915898018, "grad_norm": 0.72265625, "learning_rate": 1.5972647412403857e-05, "loss": 0.2986, "step": 7864 }, { "epoch": 0.5952414739132491, "grad_norm": 0.78125, "learning_rate": 1.597169207127228e-05, "loss": 0.34, "step": 7865 }, { "epoch": 0.5953171562366965, "grad_norm": 0.76953125, "learning_rate": 1.5970736645422004e-05, "loss": 0.3178, "step": 7866 }, { "epoch": 0.5953928385601438, "grad_norm": 0.73828125, "learning_rate": 1.596978113486658e-05, "loss": 0.3087, "step": 7867 }, { "epoch": 0.5954685208835911, "grad_norm": 0.72265625, "learning_rate": 1.5968825539619568e-05, "loss": 0.2908, "step": 7868 }, { "epoch": 0.5955442032070385, "grad_norm": 0.80859375, "learning_rate": 1.5967869859694523e-05, "loss": 0.3721, "step": 7869 }, { "epoch": 0.5956198855304858, "grad_norm": 0.80859375, "learning_rate": 1.5966914095105005e-05, "loss": 0.3469, "step": 7870 }, { "epoch": 0.5956955678539331, "grad_norm": 0.765625, "learning_rate": 1.596595824586457e-05, "loss": 0.3352, "step": 7871 }, { "epoch": 0.5957712501773804, "grad_norm": 0.81640625, "learning_rate": 1.5965002311986774e-05, "loss": 0.3525, "step": 7872 }, { "epoch": 0.5958469325008278, "grad_norm": 0.75390625, "learning_rate": 1.596404629348519e-05, "loss": 0.317, "step": 7873 }, { "epoch": 0.5959226148242751, "grad_norm": 0.78125, "learning_rate": 1.5963090190373368e-05, "loss": 0.3383, "step": 7874 }, { "epoch": 0.5959982971477225, "grad_norm": 0.75390625, "learning_rate": 1.5962134002664885e-05, "loss": 0.3152, "step": 7875 }, { "epoch": 0.5960739794711698, "grad_norm": 1.0625, "learning_rate": 1.5961177730373298e-05, "loss": 0.3683, "step": 7876 }, { "epoch": 0.5961496617946171, "grad_norm": 0.7890625, "learning_rate": 1.5960221373512176e-05, "loss": 0.3402, "step": 7877 }, { "epoch": 0.5962253441180644, "grad_norm": 0.83203125, "learning_rate": 1.5959264932095085e-05, "loss": 0.3928, "step": 7878 }, { "epoch": 0.5963010264415117, "grad_norm": 0.72265625, "learning_rate": 1.5958308406135593e-05, "loss": 0.2616, "step": 7879 }, { "epoch": 0.5963767087649591, "grad_norm": 0.80078125, "learning_rate": 1.5957351795647277e-05, "loss": 0.3143, "step": 7880 }, { "epoch": 0.5964523910884064, "grad_norm": 0.828125, "learning_rate": 1.5956395100643694e-05, "loss": 0.3579, "step": 7881 }, { "epoch": 0.5965280734118538, "grad_norm": 0.73828125, "learning_rate": 1.595543832113843e-05, "loss": 0.3238, "step": 7882 }, { "epoch": 0.5966037557353011, "grad_norm": 0.7734375, "learning_rate": 1.5954481457145053e-05, "loss": 0.3085, "step": 7883 }, { "epoch": 0.5966794380587485, "grad_norm": 0.87109375, "learning_rate": 1.595352450867714e-05, "loss": 0.3829, "step": 7884 }, { "epoch": 0.5967551203821957, "grad_norm": 0.98828125, "learning_rate": 1.5952567475748262e-05, "loss": 0.3489, "step": 7885 }, { "epoch": 0.596830802705643, "grad_norm": 1.40625, "learning_rate": 1.5951610358372002e-05, "loss": 0.3756, "step": 7886 }, { "epoch": 0.5969064850290904, "grad_norm": 0.765625, "learning_rate": 1.5950653156561932e-05, "loss": 0.3151, "step": 7887 }, { "epoch": 0.5969821673525377, "grad_norm": 0.78125, "learning_rate": 1.594969587033164e-05, "loss": 0.3001, "step": 7888 }, { "epoch": 0.5970578496759851, "grad_norm": 0.765625, "learning_rate": 1.59487384996947e-05, "loss": 0.2883, "step": 7889 }, { "epoch": 0.5971335319994324, "grad_norm": 0.7890625, "learning_rate": 1.5947781044664696e-05, "loss": 0.3593, "step": 7890 }, { "epoch": 0.5972092143228798, "grad_norm": 0.7734375, "learning_rate": 1.594682350525521e-05, "loss": 0.3382, "step": 7891 }, { "epoch": 0.597284896646327, "grad_norm": 0.75, "learning_rate": 1.594586588147983e-05, "loss": 0.3021, "step": 7892 }, { "epoch": 0.5973605789697743, "grad_norm": 0.8359375, "learning_rate": 1.5944908173352132e-05, "loss": 0.3795, "step": 7893 }, { "epoch": 0.5974362612932217, "grad_norm": 0.8125, "learning_rate": 1.594395038088572e-05, "loss": 0.3429, "step": 7894 }, { "epoch": 0.597511943616669, "grad_norm": 0.734375, "learning_rate": 1.5942992504094167e-05, "loss": 0.3227, "step": 7895 }, { "epoch": 0.5975876259401164, "grad_norm": 0.71484375, "learning_rate": 1.5942034542991068e-05, "loss": 0.2759, "step": 7896 }, { "epoch": 0.5976633082635637, "grad_norm": 0.7578125, "learning_rate": 1.594107649759001e-05, "loss": 0.3235, "step": 7897 }, { "epoch": 0.5977389905870111, "grad_norm": 0.84375, "learning_rate": 1.594011836790459e-05, "loss": 0.3336, "step": 7898 }, { "epoch": 0.5978146729104583, "grad_norm": 0.7421875, "learning_rate": 1.5939160153948393e-05, "loss": 0.3385, "step": 7899 }, { "epoch": 0.5978903552339057, "grad_norm": 0.8046875, "learning_rate": 1.5938201855735017e-05, "loss": 0.3691, "step": 7900 }, { "epoch": 0.597966037557353, "grad_norm": 0.80078125, "learning_rate": 1.593724347327806e-05, "loss": 0.3453, "step": 7901 }, { "epoch": 0.5980417198808003, "grad_norm": 0.7890625, "learning_rate": 1.5936285006591113e-05, "loss": 0.3527, "step": 7902 }, { "epoch": 0.5981174022042477, "grad_norm": 0.74609375, "learning_rate": 1.5935326455687782e-05, "loss": 0.3142, "step": 7903 }, { "epoch": 0.598193084527695, "grad_norm": 0.74609375, "learning_rate": 1.5934367820581654e-05, "loss": 0.3218, "step": 7904 }, { "epoch": 0.5982687668511423, "grad_norm": 0.765625, "learning_rate": 1.593340910128634e-05, "loss": 0.337, "step": 7905 }, { "epoch": 0.5983444491745896, "grad_norm": 0.85546875, "learning_rate": 1.5932450297815433e-05, "loss": 0.3638, "step": 7906 }, { "epoch": 0.598420131498037, "grad_norm": 0.75, "learning_rate": 1.5931491410182535e-05, "loss": 0.3254, "step": 7907 }, { "epoch": 0.5984958138214843, "grad_norm": 0.7578125, "learning_rate": 1.5930532438401255e-05, "loss": 0.2852, "step": 7908 }, { "epoch": 0.5985714961449317, "grad_norm": 0.78515625, "learning_rate": 1.59295733824852e-05, "loss": 0.3312, "step": 7909 }, { "epoch": 0.598647178468379, "grad_norm": 0.78125, "learning_rate": 1.5928614242447965e-05, "loss": 0.3293, "step": 7910 }, { "epoch": 0.5987228607918264, "grad_norm": 0.75, "learning_rate": 1.5927655018303162e-05, "loss": 0.3138, "step": 7911 }, { "epoch": 0.5987985431152736, "grad_norm": 0.87890625, "learning_rate": 1.5926695710064404e-05, "loss": 0.3564, "step": 7912 }, { "epoch": 0.5988742254387209, "grad_norm": 0.78515625, "learning_rate": 1.5925736317745295e-05, "loss": 0.3221, "step": 7913 }, { "epoch": 0.5989499077621683, "grad_norm": 0.83203125, "learning_rate": 1.592477684135945e-05, "loss": 0.3907, "step": 7914 }, { "epoch": 0.5990255900856156, "grad_norm": 0.74609375, "learning_rate": 1.5923817280920477e-05, "loss": 0.288, "step": 7915 }, { "epoch": 0.599101272409063, "grad_norm": 0.7421875, "learning_rate": 1.592285763644199e-05, "loss": 0.3029, "step": 7916 }, { "epoch": 0.5991769547325103, "grad_norm": 0.80859375, "learning_rate": 1.5921897907937604e-05, "loss": 0.3684, "step": 7917 }, { "epoch": 0.5992526370559577, "grad_norm": 1.8828125, "learning_rate": 1.5920938095420934e-05, "loss": 0.4115, "step": 7918 }, { "epoch": 0.5993283193794049, "grad_norm": 0.796875, "learning_rate": 1.5919978198905593e-05, "loss": 0.3312, "step": 7919 }, { "epoch": 0.5994040017028522, "grad_norm": 0.75, "learning_rate": 1.5919018218405206e-05, "loss": 0.2818, "step": 7920 }, { "epoch": 0.5994796840262996, "grad_norm": 0.74609375, "learning_rate": 1.5918058153933386e-05, "loss": 0.2959, "step": 7921 }, { "epoch": 0.5995553663497469, "grad_norm": 0.7734375, "learning_rate": 1.5917098005503757e-05, "loss": 0.3384, "step": 7922 }, { "epoch": 0.5996310486731943, "grad_norm": 0.7890625, "learning_rate": 1.591613777312994e-05, "loss": 0.3446, "step": 7923 }, { "epoch": 0.5997067309966416, "grad_norm": 0.75390625, "learning_rate": 1.5915177456825553e-05, "loss": 0.3138, "step": 7924 }, { "epoch": 0.599782413320089, "grad_norm": 0.81640625, "learning_rate": 1.591421705660422e-05, "loss": 0.3223, "step": 7925 }, { "epoch": 0.5998580956435362, "grad_norm": 0.7890625, "learning_rate": 1.591325657247958e-05, "loss": 0.3192, "step": 7926 }, { "epoch": 0.5999337779669836, "grad_norm": 0.7421875, "learning_rate": 1.591229600446524e-05, "loss": 0.311, "step": 7927 }, { "epoch": 0.6000094602904309, "grad_norm": 0.72265625, "learning_rate": 1.5911335352574835e-05, "loss": 0.257, "step": 7928 }, { "epoch": 0.6000851426138782, "grad_norm": 0.75, "learning_rate": 1.5910374616821994e-05, "loss": 0.3007, "step": 7929 }, { "epoch": 0.6001608249373256, "grad_norm": 0.77734375, "learning_rate": 1.590941379722035e-05, "loss": 0.3066, "step": 7930 }, { "epoch": 0.6002365072607729, "grad_norm": 0.7265625, "learning_rate": 1.590845289378353e-05, "loss": 0.2896, "step": 7931 }, { "epoch": 0.6003121895842203, "grad_norm": 0.81640625, "learning_rate": 1.5907491906525162e-05, "loss": 0.3203, "step": 7932 }, { "epoch": 0.6003121895842203, "eval_loss": 0.33722907304763794, "eval_runtime": 83.3824, "eval_samples_per_second": 58.31, "eval_steps_per_second": 58.31, "step": 7932 }, { "epoch": 0.6003878719076675, "grad_norm": 1.8125, "learning_rate": 1.5906530835458885e-05, "loss": 0.3822, "step": 7933 }, { "epoch": 0.6004635542311149, "grad_norm": 0.7265625, "learning_rate": 1.5905569680598333e-05, "loss": 0.2781, "step": 7934 }, { "epoch": 0.6005392365545622, "grad_norm": 0.8046875, "learning_rate": 1.5904608441957142e-05, "loss": 0.3457, "step": 7935 }, { "epoch": 0.6006149188780096, "grad_norm": 0.84375, "learning_rate": 1.590364711954895e-05, "loss": 0.3088, "step": 7936 }, { "epoch": 0.6006906012014569, "grad_norm": 0.7890625, "learning_rate": 1.5902685713387386e-05, "loss": 0.3631, "step": 7937 }, { "epoch": 0.6007662835249042, "grad_norm": 0.7890625, "learning_rate": 1.5901724223486102e-05, "loss": 0.3528, "step": 7938 }, { "epoch": 0.6008419658483516, "grad_norm": 0.765625, "learning_rate": 1.590076264985873e-05, "loss": 0.3206, "step": 7939 }, { "epoch": 0.6009176481717988, "grad_norm": 0.8125, "learning_rate": 1.5899800992518913e-05, "loss": 0.3716, "step": 7940 }, { "epoch": 0.6009933304952462, "grad_norm": 0.79296875, "learning_rate": 1.5898839251480295e-05, "loss": 0.3778, "step": 7941 }, { "epoch": 0.6010690128186935, "grad_norm": 0.8359375, "learning_rate": 1.589787742675652e-05, "loss": 0.3754, "step": 7942 }, { "epoch": 0.6011446951421409, "grad_norm": 0.72265625, "learning_rate": 1.5896915518361227e-05, "loss": 0.288, "step": 7943 }, { "epoch": 0.6012203774655882, "grad_norm": 0.7890625, "learning_rate": 1.5895953526308075e-05, "loss": 0.3416, "step": 7944 }, { "epoch": 0.6012960597890356, "grad_norm": 0.7109375, "learning_rate": 1.5894991450610704e-05, "loss": 0.2946, "step": 7945 }, { "epoch": 0.6013717421124829, "grad_norm": 0.7109375, "learning_rate": 1.589402929128276e-05, "loss": 0.2909, "step": 7946 }, { "epoch": 0.6014474244359301, "grad_norm": 0.85546875, "learning_rate": 1.58930670483379e-05, "loss": 0.3363, "step": 7947 }, { "epoch": 0.6015231067593775, "grad_norm": 0.7421875, "learning_rate": 1.5892104721789765e-05, "loss": 0.3052, "step": 7948 }, { "epoch": 0.6015987890828248, "grad_norm": 0.8203125, "learning_rate": 1.5891142311652016e-05, "loss": 0.3183, "step": 7949 }, { "epoch": 0.6016744714062722, "grad_norm": 1.3203125, "learning_rate": 1.5890179817938306e-05, "loss": 0.3539, "step": 7950 }, { "epoch": 0.6017501537297195, "grad_norm": 0.75390625, "learning_rate": 1.5889217240662288e-05, "loss": 0.3054, "step": 7951 }, { "epoch": 0.6018258360531669, "grad_norm": 0.7109375, "learning_rate": 1.5888254579837613e-05, "loss": 0.2805, "step": 7952 }, { "epoch": 0.6019015183766142, "grad_norm": 0.8828125, "learning_rate": 1.5887291835477946e-05, "loss": 0.3041, "step": 7953 }, { "epoch": 0.6019772007000614, "grad_norm": 0.79296875, "learning_rate": 1.588632900759694e-05, "loss": 0.3727, "step": 7954 }, { "epoch": 0.6020528830235088, "grad_norm": 0.76171875, "learning_rate": 1.5885366096208254e-05, "loss": 0.3318, "step": 7955 }, { "epoch": 0.6021285653469561, "grad_norm": 0.7421875, "learning_rate": 1.5884403101325552e-05, "loss": 0.265, "step": 7956 }, { "epoch": 0.6022042476704035, "grad_norm": 0.81640625, "learning_rate": 1.5883440022962495e-05, "loss": 0.3778, "step": 7957 }, { "epoch": 0.6022799299938508, "grad_norm": 0.75390625, "learning_rate": 1.588247686113274e-05, "loss": 0.3059, "step": 7958 }, { "epoch": 0.6023556123172982, "grad_norm": 0.74609375, "learning_rate": 1.588151361584996e-05, "loss": 0.3177, "step": 7959 }, { "epoch": 0.6024312946407455, "grad_norm": 0.734375, "learning_rate": 1.588055028712782e-05, "loss": 0.2972, "step": 7960 }, { "epoch": 0.6025069769641928, "grad_norm": 0.75, "learning_rate": 1.5879586874979977e-05, "loss": 0.3133, "step": 7961 }, { "epoch": 0.6025826592876401, "grad_norm": 0.71875, "learning_rate": 1.5878623379420105e-05, "loss": 0.2864, "step": 7962 }, { "epoch": 0.6026583416110874, "grad_norm": 0.7421875, "learning_rate": 1.5877659800461873e-05, "loss": 0.2989, "step": 7963 }, { "epoch": 0.6027340239345348, "grad_norm": 0.7734375, "learning_rate": 1.5876696138118953e-05, "loss": 0.3218, "step": 7964 }, { "epoch": 0.6028097062579821, "grad_norm": 0.734375, "learning_rate": 1.587573239240501e-05, "loss": 0.3056, "step": 7965 }, { "epoch": 0.6028853885814295, "grad_norm": 0.78515625, "learning_rate": 1.5874768563333724e-05, "loss": 0.3481, "step": 7966 }, { "epoch": 0.6029610709048768, "grad_norm": 0.703125, "learning_rate": 1.587380465091876e-05, "loss": 0.2934, "step": 7967 }, { "epoch": 0.6030367532283241, "grad_norm": 0.77734375, "learning_rate": 1.58728406551738e-05, "loss": 0.3327, "step": 7968 }, { "epoch": 0.6031124355517714, "grad_norm": 0.82421875, "learning_rate": 1.5871876576112516e-05, "loss": 0.3375, "step": 7969 }, { "epoch": 0.6031881178752188, "grad_norm": 0.7578125, "learning_rate": 1.5870912413748585e-05, "loss": 0.3063, "step": 7970 }, { "epoch": 0.6032638001986661, "grad_norm": 0.77734375, "learning_rate": 1.5869948168095687e-05, "loss": 0.3108, "step": 7971 }, { "epoch": 0.6033394825221134, "grad_norm": 0.734375, "learning_rate": 1.58689838391675e-05, "loss": 0.2903, "step": 7972 }, { "epoch": 0.6034151648455608, "grad_norm": 1.140625, "learning_rate": 1.586801942697771e-05, "loss": 0.4094, "step": 7973 }, { "epoch": 0.6034908471690081, "grad_norm": 0.78515625, "learning_rate": 1.5867054931539995e-05, "loss": 0.2985, "step": 7974 }, { "epoch": 0.6035665294924554, "grad_norm": 0.8046875, "learning_rate": 1.5866090352868037e-05, "loss": 0.3565, "step": 7975 }, { "epoch": 0.6036422118159027, "grad_norm": 0.73046875, "learning_rate": 1.5865125690975512e-05, "loss": 0.3001, "step": 7976 }, { "epoch": 0.6037178941393501, "grad_norm": 0.765625, "learning_rate": 1.5864160945876122e-05, "loss": 0.3197, "step": 7977 }, { "epoch": 0.6037935764627974, "grad_norm": 0.87890625, "learning_rate": 1.586319611758355e-05, "loss": 0.3329, "step": 7978 }, { "epoch": 0.6038692587862448, "grad_norm": 0.76953125, "learning_rate": 1.586223120611147e-05, "loss": 0.3164, "step": 7979 }, { "epoch": 0.6039449411096921, "grad_norm": 0.7265625, "learning_rate": 1.5861266211473587e-05, "loss": 0.2804, "step": 7980 }, { "epoch": 0.6040206234331394, "grad_norm": 0.96875, "learning_rate": 1.586030113368358e-05, "loss": 0.3928, "step": 7981 }, { "epoch": 0.6040963057565867, "grad_norm": 0.75390625, "learning_rate": 1.585933597275515e-05, "loss": 0.3201, "step": 7982 }, { "epoch": 0.604171988080034, "grad_norm": 0.765625, "learning_rate": 1.585837072870198e-05, "loss": 0.3032, "step": 7983 }, { "epoch": 0.6042476704034814, "grad_norm": 0.70703125, "learning_rate": 1.585740540153777e-05, "loss": 0.2915, "step": 7984 }, { "epoch": 0.6043233527269287, "grad_norm": 1.3984375, "learning_rate": 1.585643999127621e-05, "loss": 0.3277, "step": 7985 }, { "epoch": 0.6043990350503761, "grad_norm": 0.69921875, "learning_rate": 1.5855474497930998e-05, "loss": 0.2739, "step": 7986 }, { "epoch": 0.6044747173738234, "grad_norm": 0.79296875, "learning_rate": 1.5854508921515838e-05, "loss": 0.3264, "step": 7987 }, { "epoch": 0.6045503996972708, "grad_norm": 0.6953125, "learning_rate": 1.585354326204442e-05, "loss": 0.2985, "step": 7988 }, { "epoch": 0.604626082020718, "grad_norm": 0.84765625, "learning_rate": 1.585257751953044e-05, "loss": 0.3854, "step": 7989 }, { "epoch": 0.6047017643441653, "grad_norm": 0.765625, "learning_rate": 1.5851611693987608e-05, "loss": 0.3134, "step": 7990 }, { "epoch": 0.6047774466676127, "grad_norm": 0.77734375, "learning_rate": 1.5850645785429627e-05, "loss": 0.2991, "step": 7991 }, { "epoch": 0.60485312899106, "grad_norm": 0.765625, "learning_rate": 1.5849679793870192e-05, "loss": 0.2965, "step": 7992 }, { "epoch": 0.6049288113145074, "grad_norm": 0.73046875, "learning_rate": 1.584871371932301e-05, "loss": 0.3307, "step": 7993 }, { "epoch": 0.6050044936379547, "grad_norm": 0.734375, "learning_rate": 1.5847747561801788e-05, "loss": 0.3202, "step": 7994 }, { "epoch": 0.6050801759614021, "grad_norm": 0.74609375, "learning_rate": 1.584678132132023e-05, "loss": 0.3159, "step": 7995 }, { "epoch": 0.6051558582848493, "grad_norm": 0.73046875, "learning_rate": 1.5845814997892048e-05, "loss": 0.312, "step": 7996 }, { "epoch": 0.6052315406082966, "grad_norm": 0.84765625, "learning_rate": 1.5844848591530947e-05, "loss": 0.332, "step": 7997 }, { "epoch": 0.605307222931744, "grad_norm": 0.8359375, "learning_rate": 1.584388210225064e-05, "loss": 0.3811, "step": 7998 }, { "epoch": 0.6053829052551913, "grad_norm": 0.78125, "learning_rate": 1.5842915530064834e-05, "loss": 0.2958, "step": 7999 }, { "epoch": 0.6054585875786387, "grad_norm": 0.765625, "learning_rate": 1.584194887498725e-05, "loss": 0.356, "step": 8000 }, { "epoch": 0.605534269902086, "grad_norm": 0.74609375, "learning_rate": 1.5840982137031595e-05, "loss": 0.3154, "step": 8001 }, { "epoch": 0.6056099522255334, "grad_norm": 0.75390625, "learning_rate": 1.584001531621158e-05, "loss": 0.3149, "step": 8002 }, { "epoch": 0.6056856345489806, "grad_norm": 0.7265625, "learning_rate": 1.5839048412540926e-05, "loss": 0.2808, "step": 8003 }, { "epoch": 0.605761316872428, "grad_norm": 0.73828125, "learning_rate": 1.5838081426033352e-05, "loss": 0.3064, "step": 8004 }, { "epoch": 0.6058369991958753, "grad_norm": 0.7734375, "learning_rate": 1.5837114356702575e-05, "loss": 0.3052, "step": 8005 }, { "epoch": 0.6059126815193226, "grad_norm": 0.83984375, "learning_rate": 1.5836147204562317e-05, "loss": 0.2982, "step": 8006 }, { "epoch": 0.60598836384277, "grad_norm": 0.765625, "learning_rate": 1.5835179969626292e-05, "loss": 0.3163, "step": 8007 }, { "epoch": 0.6060640461662173, "grad_norm": 0.69921875, "learning_rate": 1.5834212651908226e-05, "loss": 0.279, "step": 8008 }, { "epoch": 0.6061397284896647, "grad_norm": 0.9921875, "learning_rate": 1.583324525142184e-05, "loss": 0.383, "step": 8009 }, { "epoch": 0.6062154108131119, "grad_norm": 0.83203125, "learning_rate": 1.5832277768180863e-05, "loss": 0.3459, "step": 8010 }, { "epoch": 0.6062910931365593, "grad_norm": 0.70703125, "learning_rate": 1.583131020219902e-05, "loss": 0.2911, "step": 8011 }, { "epoch": 0.6063667754600066, "grad_norm": 0.796875, "learning_rate": 1.583034255349003e-05, "loss": 0.3334, "step": 8012 }, { "epoch": 0.606442457783454, "grad_norm": 0.765625, "learning_rate": 1.5829374822067624e-05, "loss": 0.3191, "step": 8013 }, { "epoch": 0.6065181401069013, "grad_norm": 0.7890625, "learning_rate": 1.5828407007945537e-05, "loss": 0.3235, "step": 8014 }, { "epoch": 0.6065938224303487, "grad_norm": 0.76171875, "learning_rate": 1.5827439111137492e-05, "loss": 0.3175, "step": 8015 }, { "epoch": 0.606669504753796, "grad_norm": 0.76171875, "learning_rate": 1.5826471131657227e-05, "loss": 0.3295, "step": 8016 }, { "epoch": 0.6067451870772432, "grad_norm": 0.75, "learning_rate": 1.5825503069518465e-05, "loss": 0.3212, "step": 8017 }, { "epoch": 0.6068208694006906, "grad_norm": 0.859375, "learning_rate": 1.5824534924734946e-05, "loss": 0.3843, "step": 8018 }, { "epoch": 0.6068965517241379, "grad_norm": 0.83984375, "learning_rate": 1.5823566697320408e-05, "loss": 0.3452, "step": 8019 }, { "epoch": 0.6069722340475853, "grad_norm": 0.7421875, "learning_rate": 1.582259838728858e-05, "loss": 0.3043, "step": 8020 }, { "epoch": 0.6070479163710326, "grad_norm": 0.76953125, "learning_rate": 1.5821629994653203e-05, "loss": 0.3222, "step": 8021 }, { "epoch": 0.60712359869448, "grad_norm": 0.76953125, "learning_rate": 1.5820661519428012e-05, "loss": 0.3201, "step": 8022 }, { "epoch": 0.6071992810179272, "grad_norm": 0.703125, "learning_rate": 1.5819692961626752e-05, "loss": 0.3, "step": 8023 }, { "epoch": 0.6072749633413745, "grad_norm": 0.8125, "learning_rate": 1.5818724321263158e-05, "loss": 0.3597, "step": 8024 }, { "epoch": 0.6073506456648219, "grad_norm": 0.734375, "learning_rate": 1.5817755598350975e-05, "loss": 0.315, "step": 8025 }, { "epoch": 0.6074263279882692, "grad_norm": 0.734375, "learning_rate": 1.5816786792903943e-05, "loss": 0.3009, "step": 8026 }, { "epoch": 0.6075020103117166, "grad_norm": 0.76171875, "learning_rate": 1.5815817904935813e-05, "loss": 0.3377, "step": 8027 }, { "epoch": 0.6075776926351639, "grad_norm": 0.77734375, "learning_rate": 1.581484893446032e-05, "loss": 0.2703, "step": 8028 }, { "epoch": 0.6076533749586113, "grad_norm": 0.80859375, "learning_rate": 1.5813879881491224e-05, "loss": 0.3426, "step": 8029 }, { "epoch": 0.6077290572820585, "grad_norm": 0.75390625, "learning_rate": 1.581291074604226e-05, "loss": 0.2977, "step": 8030 }, { "epoch": 0.6078047396055059, "grad_norm": 0.76171875, "learning_rate": 1.5811941528127183e-05, "loss": 0.3269, "step": 8031 }, { "epoch": 0.6078804219289532, "grad_norm": 0.75, "learning_rate": 1.581097222775974e-05, "loss": 0.3126, "step": 8032 }, { "epoch": 0.6079561042524005, "grad_norm": 0.796875, "learning_rate": 1.5810002844953683e-05, "loss": 0.3269, "step": 8033 }, { "epoch": 0.6080317865758479, "grad_norm": 0.7421875, "learning_rate": 1.580903337972277e-05, "loss": 0.2931, "step": 8034 }, { "epoch": 0.6081074688992952, "grad_norm": 0.81640625, "learning_rate": 1.5808063832080746e-05, "loss": 0.3631, "step": 8035 }, { "epoch": 0.6081831512227426, "grad_norm": 0.74609375, "learning_rate": 1.580709420204137e-05, "loss": 0.3111, "step": 8036 }, { "epoch": 0.6082588335461898, "grad_norm": 0.75390625, "learning_rate": 1.58061244896184e-05, "loss": 0.2729, "step": 8037 }, { "epoch": 0.6083345158696372, "grad_norm": 0.765625, "learning_rate": 1.580515469482559e-05, "loss": 0.3544, "step": 8038 }, { "epoch": 0.6084101981930845, "grad_norm": 0.94921875, "learning_rate": 1.5804184817676696e-05, "loss": 0.3601, "step": 8039 }, { "epoch": 0.6084858805165319, "grad_norm": 0.765625, "learning_rate": 1.580321485818548e-05, "loss": 0.3287, "step": 8040 }, { "epoch": 0.6085615628399792, "grad_norm": 0.7890625, "learning_rate": 1.5802244816365702e-05, "loss": 0.307, "step": 8041 }, { "epoch": 0.6086372451634265, "grad_norm": 0.80078125, "learning_rate": 1.5801274692231126e-05, "loss": 0.343, "step": 8042 }, { "epoch": 0.6087129274868739, "grad_norm": 0.8046875, "learning_rate": 1.5800304485795516e-05, "loss": 0.3151, "step": 8043 }, { "epoch": 0.6087886098103211, "grad_norm": 0.7265625, "learning_rate": 1.579933419707263e-05, "loss": 0.2924, "step": 8044 }, { "epoch": 0.6088642921337685, "grad_norm": 0.8359375, "learning_rate": 1.5798363826076237e-05, "loss": 0.4142, "step": 8045 }, { "epoch": 0.6089399744572158, "grad_norm": 0.77734375, "learning_rate": 1.57973933728201e-05, "loss": 0.3061, "step": 8046 }, { "epoch": 0.6090156567806632, "grad_norm": 0.796875, "learning_rate": 1.5796422837317995e-05, "loss": 0.3382, "step": 8047 }, { "epoch": 0.6090913391041105, "grad_norm": 0.78125, "learning_rate": 1.5795452219583683e-05, "loss": 0.3271, "step": 8048 }, { "epoch": 0.6091670214275579, "grad_norm": 0.78515625, "learning_rate": 1.5794481519630936e-05, "loss": 0.3576, "step": 8049 }, { "epoch": 0.6092427037510052, "grad_norm": 0.76171875, "learning_rate": 1.5793510737473523e-05, "loss": 0.319, "step": 8050 }, { "epoch": 0.6093183860744524, "grad_norm": 0.75390625, "learning_rate": 1.579253987312522e-05, "loss": 0.3277, "step": 8051 }, { "epoch": 0.6093940683978998, "grad_norm": 0.71875, "learning_rate": 1.5791568926599796e-05, "loss": 0.3052, "step": 8052 }, { "epoch": 0.6094697507213471, "grad_norm": 0.765625, "learning_rate": 1.579059789791103e-05, "loss": 0.3244, "step": 8053 }, { "epoch": 0.6095454330447945, "grad_norm": 0.76953125, "learning_rate": 1.5789626787072698e-05, "loss": 0.3187, "step": 8054 }, { "epoch": 0.6096211153682418, "grad_norm": 0.83203125, "learning_rate": 1.5788655594098572e-05, "loss": 0.3781, "step": 8055 }, { "epoch": 0.6096967976916892, "grad_norm": 0.80859375, "learning_rate": 1.578768431900243e-05, "loss": 0.3013, "step": 8056 }, { "epoch": 0.6097724800151365, "grad_norm": 0.75390625, "learning_rate": 1.578671296179806e-05, "loss": 0.3477, "step": 8057 }, { "epoch": 0.6098481623385837, "grad_norm": 0.765625, "learning_rate": 1.5785741522499232e-05, "loss": 0.3345, "step": 8058 }, { "epoch": 0.6099238446620311, "grad_norm": 0.7578125, "learning_rate": 1.5784770001119733e-05, "loss": 0.3184, "step": 8059 }, { "epoch": 0.6099995269854784, "grad_norm": 0.82421875, "learning_rate": 1.5783798397673343e-05, "loss": 0.3571, "step": 8060 }, { "epoch": 0.6100752093089258, "grad_norm": 0.77734375, "learning_rate": 1.5782826712173852e-05, "loss": 0.3088, "step": 8061 }, { "epoch": 0.6101508916323731, "grad_norm": 0.8125, "learning_rate": 1.5781854944635036e-05, "loss": 0.3693, "step": 8062 }, { "epoch": 0.6102265739558205, "grad_norm": 0.73828125, "learning_rate": 1.5780883095070687e-05, "loss": 0.3086, "step": 8063 }, { "epoch": 0.6103022562792678, "grad_norm": 0.70703125, "learning_rate": 1.5779911163494592e-05, "loss": 0.2717, "step": 8064 }, { "epoch": 0.610377938602715, "grad_norm": 0.69921875, "learning_rate": 1.5778939149920538e-05, "loss": 0.2753, "step": 8065 }, { "epoch": 0.6104536209261624, "grad_norm": 0.90234375, "learning_rate": 1.5777967054362314e-05, "loss": 0.2985, "step": 8066 }, { "epoch": 0.6105293032496097, "grad_norm": 0.71484375, "learning_rate": 1.5776994876833713e-05, "loss": 0.3138, "step": 8067 }, { "epoch": 0.6106049855730571, "grad_norm": 0.7421875, "learning_rate": 1.5776022617348525e-05, "loss": 0.2939, "step": 8068 }, { "epoch": 0.6106806678965044, "grad_norm": 1.015625, "learning_rate": 1.5775050275920546e-05, "loss": 0.3847, "step": 8069 }, { "epoch": 0.6107563502199518, "grad_norm": 0.75, "learning_rate": 1.5774077852563567e-05, "loss": 0.3199, "step": 8070 }, { "epoch": 0.6108320325433991, "grad_norm": 0.796875, "learning_rate": 1.5773105347291386e-05, "loss": 0.3179, "step": 8071 }, { "epoch": 0.6109077148668464, "grad_norm": 0.77734375, "learning_rate": 1.5772132760117797e-05, "loss": 0.2868, "step": 8072 }, { "epoch": 0.6109833971902937, "grad_norm": 0.75390625, "learning_rate": 1.57711600910566e-05, "loss": 0.3002, "step": 8073 }, { "epoch": 0.611059079513741, "grad_norm": 1.28125, "learning_rate": 1.5770187340121595e-05, "loss": 0.383, "step": 8074 }, { "epoch": 0.6111347618371884, "grad_norm": 0.7265625, "learning_rate": 1.576921450732658e-05, "loss": 0.2753, "step": 8075 }, { "epoch": 0.6112104441606357, "grad_norm": 0.7890625, "learning_rate": 1.5768241592685357e-05, "loss": 0.3512, "step": 8076 }, { "epoch": 0.6112861264840831, "grad_norm": 1.109375, "learning_rate": 1.576726859621173e-05, "loss": 0.3429, "step": 8077 }, { "epoch": 0.6113618088075304, "grad_norm": 0.75, "learning_rate": 1.5766295517919496e-05, "loss": 0.3218, "step": 8078 }, { "epoch": 0.6114374911309777, "grad_norm": 0.7265625, "learning_rate": 1.5765322357822465e-05, "loss": 0.2815, "step": 8079 }, { "epoch": 0.611513173454425, "grad_norm": 0.859375, "learning_rate": 1.5764349115934448e-05, "loss": 0.382, "step": 8080 }, { "epoch": 0.6115888557778724, "grad_norm": 0.73046875, "learning_rate": 1.5763375792269246e-05, "loss": 0.3087, "step": 8081 }, { "epoch": 0.6116645381013197, "grad_norm": 0.7890625, "learning_rate": 1.5762402386840664e-05, "loss": 0.3692, "step": 8082 }, { "epoch": 0.6117402204247671, "grad_norm": 0.77734375, "learning_rate": 1.5761428899662517e-05, "loss": 0.3311, "step": 8083 }, { "epoch": 0.6118159027482144, "grad_norm": 0.7421875, "learning_rate": 1.5760455330748615e-05, "loss": 0.3217, "step": 8084 }, { "epoch": 0.6118915850716617, "grad_norm": 0.796875, "learning_rate": 1.575948168011277e-05, "loss": 0.3252, "step": 8085 }, { "epoch": 0.611967267395109, "grad_norm": 0.79296875, "learning_rate": 1.5758507947768794e-05, "loss": 0.34, "step": 8086 }, { "epoch": 0.6120429497185563, "grad_norm": 0.73828125, "learning_rate": 1.57575341337305e-05, "loss": 0.3113, "step": 8087 }, { "epoch": 0.6121186320420037, "grad_norm": 0.76171875, "learning_rate": 1.57565602380117e-05, "loss": 0.3358, "step": 8088 }, { "epoch": 0.612194314365451, "grad_norm": 0.73046875, "learning_rate": 1.5755586260626217e-05, "loss": 0.2981, "step": 8089 }, { "epoch": 0.6122699966888984, "grad_norm": 0.80078125, "learning_rate": 1.5754612201587868e-05, "loss": 0.3385, "step": 8090 }, { "epoch": 0.6123456790123457, "grad_norm": 0.7421875, "learning_rate": 1.575363806091047e-05, "loss": 0.3217, "step": 8091 }, { "epoch": 0.6124213613357931, "grad_norm": 0.7578125, "learning_rate": 1.575266383860784e-05, "loss": 0.3007, "step": 8092 }, { "epoch": 0.6124970436592403, "grad_norm": 0.8046875, "learning_rate": 1.57516895346938e-05, "loss": 0.3483, "step": 8093 }, { "epoch": 0.6125727259826876, "grad_norm": 0.75390625, "learning_rate": 1.575071514918218e-05, "loss": 0.3139, "step": 8094 }, { "epoch": 0.612648408306135, "grad_norm": 0.77734375, "learning_rate": 1.5749740682086795e-05, "loss": 0.3281, "step": 8095 }, { "epoch": 0.6127240906295823, "grad_norm": 0.75390625, "learning_rate": 1.5748766133421466e-05, "loss": 0.309, "step": 8096 }, { "epoch": 0.6127997729530297, "grad_norm": 0.7265625, "learning_rate": 1.5747791503200028e-05, "loss": 0.2613, "step": 8097 }, { "epoch": 0.612875455276477, "grad_norm": 0.76171875, "learning_rate": 1.5746816791436303e-05, "loss": 0.2917, "step": 8098 }, { "epoch": 0.6129511375999244, "grad_norm": 0.76171875, "learning_rate": 1.5745841998144126e-05, "loss": 0.3109, "step": 8099 }, { "epoch": 0.6130268199233716, "grad_norm": 0.80078125, "learning_rate": 1.5744867123337313e-05, "loss": 0.3568, "step": 8100 }, { "epoch": 0.613102502246819, "grad_norm": 0.73828125, "learning_rate": 1.57438921670297e-05, "loss": 0.3233, "step": 8101 }, { "epoch": 0.6131781845702663, "grad_norm": 0.79296875, "learning_rate": 1.5742917129235125e-05, "loss": 0.3296, "step": 8102 }, { "epoch": 0.6132538668937136, "grad_norm": 0.8125, "learning_rate": 1.5741942009967414e-05, "loss": 0.3692, "step": 8103 }, { "epoch": 0.613329549217161, "grad_norm": 0.7734375, "learning_rate": 1.5740966809240405e-05, "loss": 0.3219, "step": 8104 }, { "epoch": 0.6134052315406083, "grad_norm": 0.80859375, "learning_rate": 1.5739991527067922e-05, "loss": 0.3595, "step": 8105 }, { "epoch": 0.6134809138640557, "grad_norm": 0.6953125, "learning_rate": 1.5739016163463813e-05, "loss": 0.2839, "step": 8106 }, { "epoch": 0.6135565961875029, "grad_norm": 0.70703125, "learning_rate": 1.573804071844191e-05, "loss": 0.2999, "step": 8107 }, { "epoch": 0.6136322785109503, "grad_norm": 0.7734375, "learning_rate": 1.5737065192016053e-05, "loss": 0.3402, "step": 8108 }, { "epoch": 0.6137079608343976, "grad_norm": 0.75390625, "learning_rate": 1.5736089584200084e-05, "loss": 0.3043, "step": 8109 }, { "epoch": 0.613783643157845, "grad_norm": 0.78125, "learning_rate": 1.573511389500784e-05, "loss": 0.3518, "step": 8110 }, { "epoch": 0.6138593254812923, "grad_norm": 0.76953125, "learning_rate": 1.5734138124453165e-05, "loss": 0.3142, "step": 8111 }, { "epoch": 0.6139350078047396, "grad_norm": 0.77734375, "learning_rate": 1.5733162272549896e-05, "loss": 0.3614, "step": 8112 }, { "epoch": 0.614010690128187, "grad_norm": 0.78125, "learning_rate": 1.5732186339311888e-05, "loss": 0.3297, "step": 8113 }, { "epoch": 0.6140863724516342, "grad_norm": 0.75, "learning_rate": 1.573121032475297e-05, "loss": 0.3287, "step": 8114 }, { "epoch": 0.6141620547750816, "grad_norm": 0.7578125, "learning_rate": 1.573023422888701e-05, "loss": 0.2913, "step": 8115 }, { "epoch": 0.6142377370985289, "grad_norm": 0.69921875, "learning_rate": 1.5729258051727835e-05, "loss": 0.2681, "step": 8116 }, { "epoch": 0.6143134194219763, "grad_norm": 1.15625, "learning_rate": 1.5728281793289308e-05, "loss": 0.3858, "step": 8117 }, { "epoch": 0.6143891017454236, "grad_norm": 0.8359375, "learning_rate": 1.5727305453585274e-05, "loss": 0.3986, "step": 8118 }, { "epoch": 0.614464784068871, "grad_norm": 0.80078125, "learning_rate": 1.5726329032629578e-05, "loss": 0.3181, "step": 8119 }, { "epoch": 0.6145404663923183, "grad_norm": 0.72265625, "learning_rate": 1.5725352530436083e-05, "loss": 0.2895, "step": 8120 }, { "epoch": 0.6146161487157655, "grad_norm": 0.765625, "learning_rate": 1.5724375947018637e-05, "loss": 0.3202, "step": 8121 }, { "epoch": 0.6146918310392129, "grad_norm": 0.7578125, "learning_rate": 1.572339928239109e-05, "loss": 0.3257, "step": 8122 }, { "epoch": 0.6147675133626602, "grad_norm": 0.71484375, "learning_rate": 1.572242253656731e-05, "loss": 0.3013, "step": 8123 }, { "epoch": 0.6148431956861076, "grad_norm": 0.73046875, "learning_rate": 1.572144570956114e-05, "loss": 0.2761, "step": 8124 }, { "epoch": 0.6149188780095549, "grad_norm": 0.71875, "learning_rate": 1.572046880138645e-05, "loss": 0.2872, "step": 8125 }, { "epoch": 0.6149945603330023, "grad_norm": 0.82421875, "learning_rate": 1.571949181205709e-05, "loss": 0.3552, "step": 8126 }, { "epoch": 0.6150702426564496, "grad_norm": 0.7734375, "learning_rate": 1.5718514741586924e-05, "loss": 0.3085, "step": 8127 }, { "epoch": 0.6151459249798968, "grad_norm": 0.75, "learning_rate": 1.5717537589989813e-05, "loss": 0.3036, "step": 8128 }, { "epoch": 0.6152216073033442, "grad_norm": 1.1015625, "learning_rate": 1.571656035727962e-05, "loss": 0.3768, "step": 8129 }, { "epoch": 0.6152972896267915, "grad_norm": 0.765625, "learning_rate": 1.5715583043470204e-05, "loss": 0.2995, "step": 8130 }, { "epoch": 0.6153729719502389, "grad_norm": 0.77734375, "learning_rate": 1.571460564857544e-05, "loss": 0.3155, "step": 8131 }, { "epoch": 0.6154486542736862, "grad_norm": 0.80859375, "learning_rate": 1.571362817260919e-05, "loss": 0.3378, "step": 8132 }, { "epoch": 0.6155243365971336, "grad_norm": 0.83984375, "learning_rate": 1.5712650615585314e-05, "loss": 0.3128, "step": 8133 }, { "epoch": 0.6156000189205809, "grad_norm": 0.76171875, "learning_rate": 1.571167297751769e-05, "loss": 0.3248, "step": 8134 }, { "epoch": 0.6156757012440282, "grad_norm": 0.73046875, "learning_rate": 1.571069525842018e-05, "loss": 0.283, "step": 8135 }, { "epoch": 0.6157513835674755, "grad_norm": 0.875, "learning_rate": 1.5709717458306657e-05, "loss": 0.3628, "step": 8136 }, { "epoch": 0.6158270658909228, "grad_norm": 0.78515625, "learning_rate": 1.5708739577190996e-05, "loss": 0.308, "step": 8137 }, { "epoch": 0.6159027482143702, "grad_norm": 0.9296875, "learning_rate": 1.5707761615087068e-05, "loss": 0.3543, "step": 8138 }, { "epoch": 0.6159784305378175, "grad_norm": 0.7734375, "learning_rate": 1.5706783572008746e-05, "loss": 0.3022, "step": 8139 }, { "epoch": 0.6160541128612649, "grad_norm": 0.76953125, "learning_rate": 1.5705805447969903e-05, "loss": 0.3238, "step": 8140 }, { "epoch": 0.6161297951847122, "grad_norm": 0.76953125, "learning_rate": 1.5704827242984425e-05, "loss": 0.3262, "step": 8141 }, { "epoch": 0.6162054775081595, "grad_norm": 0.765625, "learning_rate": 1.5703848957066178e-05, "loss": 0.3418, "step": 8142 }, { "epoch": 0.6162811598316068, "grad_norm": 1.0234375, "learning_rate": 1.5702870590229044e-05, "loss": 0.3551, "step": 8143 }, { "epoch": 0.6163568421550542, "grad_norm": 0.8125, "learning_rate": 1.5701892142486908e-05, "loss": 0.3383, "step": 8144 }, { "epoch": 0.6164325244785015, "grad_norm": 0.69921875, "learning_rate": 1.5700913613853647e-05, "loss": 0.2935, "step": 8145 }, { "epoch": 0.6165082068019488, "grad_norm": 0.7890625, "learning_rate": 1.569993500434314e-05, "loss": 0.3549, "step": 8146 }, { "epoch": 0.6165838891253962, "grad_norm": 0.80078125, "learning_rate": 1.5698956313969275e-05, "loss": 0.3072, "step": 8147 }, { "epoch": 0.6166595714488434, "grad_norm": 0.7734375, "learning_rate": 1.5697977542745934e-05, "loss": 0.3051, "step": 8148 }, { "epoch": 0.6167352537722908, "grad_norm": 0.6640625, "learning_rate": 1.5696998690687006e-05, "loss": 0.2607, "step": 8149 }, { "epoch": 0.6168109360957381, "grad_norm": 0.796875, "learning_rate": 1.5696019757806373e-05, "loss": 0.2882, "step": 8150 }, { "epoch": 0.6168866184191855, "grad_norm": 0.78125, "learning_rate": 1.5695040744117927e-05, "loss": 0.3094, "step": 8151 }, { "epoch": 0.6169623007426328, "grad_norm": 0.7109375, "learning_rate": 1.5694061649635554e-05, "loss": 0.2969, "step": 8152 }, { "epoch": 0.6170379830660802, "grad_norm": 0.73046875, "learning_rate": 1.5693082474373146e-05, "loss": 0.3005, "step": 8153 }, { "epoch": 0.6171136653895275, "grad_norm": 0.796875, "learning_rate": 1.569210321834459e-05, "loss": 0.3524, "step": 8154 }, { "epoch": 0.6171893477129747, "grad_norm": 0.765625, "learning_rate": 1.5691123881563786e-05, "loss": 0.3524, "step": 8155 }, { "epoch": 0.6172650300364221, "grad_norm": 0.73828125, "learning_rate": 1.569014446404462e-05, "loss": 0.2983, "step": 8156 }, { "epoch": 0.6173407123598694, "grad_norm": 0.78515625, "learning_rate": 1.5689164965800994e-05, "loss": 0.3557, "step": 8157 }, { "epoch": 0.6174163946833168, "grad_norm": 0.76171875, "learning_rate": 1.5688185386846793e-05, "loss": 0.3345, "step": 8158 }, { "epoch": 0.6174920770067641, "grad_norm": 0.80859375, "learning_rate": 1.568720572719593e-05, "loss": 0.3433, "step": 8159 }, { "epoch": 0.6175677593302115, "grad_norm": 0.79296875, "learning_rate": 1.5686225986862292e-05, "loss": 0.3159, "step": 8160 }, { "epoch": 0.6176434416536588, "grad_norm": 0.8125, "learning_rate": 1.5685246165859774e-05, "loss": 0.3477, "step": 8161 }, { "epoch": 0.617719123977106, "grad_norm": 0.734375, "learning_rate": 1.568426626420229e-05, "loss": 0.2963, "step": 8162 }, { "epoch": 0.6177948063005534, "grad_norm": 0.69921875, "learning_rate": 1.5683286281903733e-05, "loss": 0.2967, "step": 8163 }, { "epoch": 0.6178704886240007, "grad_norm": 0.73828125, "learning_rate": 1.5682306218978004e-05, "loss": 0.3198, "step": 8164 }, { "epoch": 0.6179461709474481, "grad_norm": 0.765625, "learning_rate": 1.5681326075439014e-05, "loss": 0.332, "step": 8165 }, { "epoch": 0.6180218532708954, "grad_norm": 0.734375, "learning_rate": 1.568034585130066e-05, "loss": 0.3077, "step": 8166 }, { "epoch": 0.6180975355943428, "grad_norm": 0.76171875, "learning_rate": 1.5679365546576858e-05, "loss": 0.3273, "step": 8167 }, { "epoch": 0.6181732179177901, "grad_norm": 0.72265625, "learning_rate": 1.5678385161281505e-05, "loss": 0.2723, "step": 8168 }, { "epoch": 0.6182489002412374, "grad_norm": 0.8046875, "learning_rate": 1.5677404695428518e-05, "loss": 0.3334, "step": 8169 }, { "epoch": 0.6183245825646847, "grad_norm": 0.83203125, "learning_rate": 1.5676424149031798e-05, "loss": 0.3638, "step": 8170 }, { "epoch": 0.618400264888132, "grad_norm": 0.73046875, "learning_rate": 1.5675443522105264e-05, "loss": 0.3164, "step": 8171 }, { "epoch": 0.6184759472115794, "grad_norm": 0.671875, "learning_rate": 1.5674462814662824e-05, "loss": 0.2566, "step": 8172 }, { "epoch": 0.6185516295350267, "grad_norm": 0.78125, "learning_rate": 1.5673482026718386e-05, "loss": 0.3411, "step": 8173 }, { "epoch": 0.6186273118584741, "grad_norm": 0.8359375, "learning_rate": 1.5672501158285876e-05, "loss": 0.3576, "step": 8174 }, { "epoch": 0.6187029941819214, "grad_norm": 0.83984375, "learning_rate": 1.56715202093792e-05, "loss": 0.3874, "step": 8175 }, { "epoch": 0.6187786765053687, "grad_norm": 0.7421875, "learning_rate": 1.5670539180012277e-05, "loss": 0.3006, "step": 8176 }, { "epoch": 0.618854358828816, "grad_norm": 0.796875, "learning_rate": 1.5669558070199026e-05, "loss": 0.3266, "step": 8177 }, { "epoch": 0.6189300411522634, "grad_norm": 0.765625, "learning_rate": 1.5668576879953363e-05, "loss": 0.275, "step": 8178 }, { "epoch": 0.6190057234757107, "grad_norm": 0.7734375, "learning_rate": 1.566759560928921e-05, "loss": 0.3012, "step": 8179 }, { "epoch": 0.619081405799158, "grad_norm": 0.7734375, "learning_rate": 1.5666614258220486e-05, "loss": 0.3306, "step": 8180 }, { "epoch": 0.6191570881226054, "grad_norm": 0.80859375, "learning_rate": 1.5665632826761113e-05, "loss": 0.3297, "step": 8181 }, { "epoch": 0.6192327704460527, "grad_norm": 0.765625, "learning_rate": 1.5664651314925022e-05, "loss": 0.328, "step": 8182 }, { "epoch": 0.6193084527695, "grad_norm": 0.78125, "learning_rate": 1.5663669722726128e-05, "loss": 0.3146, "step": 8183 }, { "epoch": 0.6193841350929473, "grad_norm": 0.828125, "learning_rate": 1.566268805017836e-05, "loss": 0.3762, "step": 8184 }, { "epoch": 0.6194598174163947, "grad_norm": 0.7421875, "learning_rate": 1.566170629729564e-05, "loss": 0.3052, "step": 8185 }, { "epoch": 0.619535499739842, "grad_norm": 0.68359375, "learning_rate": 1.5660724464091906e-05, "loss": 0.2529, "step": 8186 }, { "epoch": 0.6196111820632894, "grad_norm": 0.7265625, "learning_rate": 1.5659742550581082e-05, "loss": 0.295, "step": 8187 }, { "epoch": 0.6196868643867367, "grad_norm": 0.76171875, "learning_rate": 1.5658760556777095e-05, "loss": 0.3382, "step": 8188 }, { "epoch": 0.619762546710184, "grad_norm": 0.75, "learning_rate": 1.565777848269388e-05, "loss": 0.2976, "step": 8189 }, { "epoch": 0.6198382290336313, "grad_norm": 0.70703125, "learning_rate": 1.5656796328345368e-05, "loss": 0.2657, "step": 8190 }, { "epoch": 0.6199139113570786, "grad_norm": 0.73828125, "learning_rate": 1.565581409374549e-05, "loss": 0.3033, "step": 8191 }, { "epoch": 0.619989593680526, "grad_norm": 0.734375, "learning_rate": 1.565483177890818e-05, "loss": 0.3048, "step": 8192 }, { "epoch": 0.6200652760039733, "grad_norm": 0.765625, "learning_rate": 1.565384938384739e-05, "loss": 0.3337, "step": 8193 }, { "epoch": 0.6201409583274207, "grad_norm": 0.7734375, "learning_rate": 1.5652866908577034e-05, "loss": 0.3446, "step": 8194 }, { "epoch": 0.620216640650868, "grad_norm": 0.796875, "learning_rate": 1.565188435311106e-05, "loss": 0.3657, "step": 8195 }, { "epoch": 0.6202923229743154, "grad_norm": 0.78515625, "learning_rate": 1.565090171746341e-05, "loss": 0.3161, "step": 8196 }, { "epoch": 0.6203680052977626, "grad_norm": 0.84375, "learning_rate": 1.5649919001648024e-05, "loss": 0.3658, "step": 8197 }, { "epoch": 0.6204436876212099, "grad_norm": 0.78515625, "learning_rate": 1.564893620567884e-05, "loss": 0.3418, "step": 8198 }, { "epoch": 0.6205193699446573, "grad_norm": 0.75390625, "learning_rate": 1.56479533295698e-05, "loss": 0.2622, "step": 8199 }, { "epoch": 0.6205950522681046, "grad_norm": 0.73046875, "learning_rate": 1.564697037333485e-05, "loss": 0.3031, "step": 8200 }, { "epoch": 0.620670734591552, "grad_norm": 0.76953125, "learning_rate": 1.5645987336987938e-05, "loss": 0.2941, "step": 8201 }, { "epoch": 0.6207464169149993, "grad_norm": 0.7890625, "learning_rate": 1.5645004220543006e-05, "loss": 0.3525, "step": 8202 }, { "epoch": 0.6208220992384467, "grad_norm": 0.74609375, "learning_rate": 1.5644021024014e-05, "loss": 0.3229, "step": 8203 }, { "epoch": 0.6208977815618939, "grad_norm": 0.78515625, "learning_rate": 1.564303774741487e-05, "loss": 0.3295, "step": 8204 }, { "epoch": 0.6209734638853412, "grad_norm": 0.7734375, "learning_rate": 1.564205439075957e-05, "loss": 0.3279, "step": 8205 }, { "epoch": 0.6210491462087886, "grad_norm": 0.79296875, "learning_rate": 1.5641070954062038e-05, "loss": 0.3398, "step": 8206 }, { "epoch": 0.6211248285322359, "grad_norm": 0.84375, "learning_rate": 1.5640087437336244e-05, "loss": 0.3922, "step": 8207 }, { "epoch": 0.6212005108556833, "grad_norm": 0.7578125, "learning_rate": 1.5639103840596123e-05, "loss": 0.2885, "step": 8208 }, { "epoch": 0.6212761931791306, "grad_norm": 0.74609375, "learning_rate": 1.563812016385564e-05, "loss": 0.3065, "step": 8209 }, { "epoch": 0.621351875502578, "grad_norm": 0.796875, "learning_rate": 1.563713640712875e-05, "loss": 0.3499, "step": 8210 }, { "epoch": 0.6214275578260252, "grad_norm": 0.74609375, "learning_rate": 1.5636152570429405e-05, "loss": 0.2935, "step": 8211 }, { "epoch": 0.6215032401494726, "grad_norm": 0.859375, "learning_rate": 1.5635168653771564e-05, "loss": 0.3636, "step": 8212 }, { "epoch": 0.6215789224729199, "grad_norm": 0.73046875, "learning_rate": 1.563418465716918e-05, "loss": 0.295, "step": 8213 }, { "epoch": 0.6216546047963672, "grad_norm": 0.75390625, "learning_rate": 1.563320058063622e-05, "loss": 0.3386, "step": 8214 }, { "epoch": 0.6217302871198146, "grad_norm": 0.80859375, "learning_rate": 1.5632216424186646e-05, "loss": 0.3605, "step": 8215 }, { "epoch": 0.6218059694432619, "grad_norm": 0.7890625, "learning_rate": 1.5631232187834416e-05, "loss": 0.3226, "step": 8216 }, { "epoch": 0.6218816517667093, "grad_norm": 0.734375, "learning_rate": 1.5630247871593494e-05, "loss": 0.3294, "step": 8217 }, { "epoch": 0.6219573340901565, "grad_norm": 1.1328125, "learning_rate": 1.5629263475477844e-05, "loss": 0.2937, "step": 8218 }, { "epoch": 0.6220330164136039, "grad_norm": 0.71875, "learning_rate": 1.562827899950143e-05, "loss": 0.3, "step": 8219 }, { "epoch": 0.6221086987370512, "grad_norm": 0.8125, "learning_rate": 1.562729444367822e-05, "loss": 0.3279, "step": 8220 }, { "epoch": 0.6221843810604986, "grad_norm": 0.78515625, "learning_rate": 1.5626309808022185e-05, "loss": 0.3242, "step": 8221 }, { "epoch": 0.6222600633839459, "grad_norm": 0.72265625, "learning_rate": 1.5625325092547287e-05, "loss": 0.246, "step": 8222 }, { "epoch": 0.6223357457073933, "grad_norm": 0.78125, "learning_rate": 1.56243402972675e-05, "loss": 0.3591, "step": 8223 }, { "epoch": 0.6224114280308406, "grad_norm": 0.78515625, "learning_rate": 1.5623355422196797e-05, "loss": 0.288, "step": 8224 }, { "epoch": 0.6224871103542878, "grad_norm": 0.79296875, "learning_rate": 1.5622370467349144e-05, "loss": 0.2996, "step": 8225 }, { "epoch": 0.6225627926777352, "grad_norm": 1.0859375, "learning_rate": 1.5621385432738516e-05, "loss": 0.3212, "step": 8226 }, { "epoch": 0.6226384750011825, "grad_norm": 0.76953125, "learning_rate": 1.562040031837889e-05, "loss": 0.3182, "step": 8227 }, { "epoch": 0.6227141573246299, "grad_norm": 0.81640625, "learning_rate": 1.5619415124284242e-05, "loss": 0.3489, "step": 8228 }, { "epoch": 0.6227898396480772, "grad_norm": 0.75390625, "learning_rate": 1.561842985046855e-05, "loss": 0.2976, "step": 8229 }, { "epoch": 0.6228655219715246, "grad_norm": 0.75390625, "learning_rate": 1.5617444496945785e-05, "loss": 0.3105, "step": 8230 }, { "epoch": 0.6229412042949719, "grad_norm": 0.82421875, "learning_rate": 1.5616459063729932e-05, "loss": 0.3693, "step": 8231 }, { "epoch": 0.6230168866184191, "grad_norm": 0.81640625, "learning_rate": 1.5615473550834964e-05, "loss": 0.2889, "step": 8232 }, { "epoch": 0.6230925689418665, "grad_norm": 1.5703125, "learning_rate": 1.561448795827487e-05, "loss": 0.3866, "step": 8233 }, { "epoch": 0.6231682512653138, "grad_norm": 0.7734375, "learning_rate": 1.5613502286063634e-05, "loss": 0.3408, "step": 8234 }, { "epoch": 0.6232439335887612, "grad_norm": 0.84765625, "learning_rate": 1.561251653421523e-05, "loss": 0.3713, "step": 8235 }, { "epoch": 0.6233196159122085, "grad_norm": 0.78515625, "learning_rate": 1.5611530702743653e-05, "loss": 0.3308, "step": 8236 }, { "epoch": 0.6233952982356559, "grad_norm": 0.828125, "learning_rate": 1.5610544791662877e-05, "loss": 0.3291, "step": 8237 }, { "epoch": 0.6234709805591032, "grad_norm": 0.7890625, "learning_rate": 1.5609558800986893e-05, "loss": 0.3366, "step": 8238 }, { "epoch": 0.6235466628825505, "grad_norm": 0.71484375, "learning_rate": 1.56085727307297e-05, "loss": 0.2936, "step": 8239 }, { "epoch": 0.6236223452059978, "grad_norm": 0.828125, "learning_rate": 1.5607586580905273e-05, "loss": 0.338, "step": 8240 }, { "epoch": 0.6236980275294451, "grad_norm": 0.75390625, "learning_rate": 1.5606600351527608e-05, "loss": 0.3164, "step": 8241 }, { "epoch": 0.6237737098528925, "grad_norm": 0.765625, "learning_rate": 1.5605614042610694e-05, "loss": 0.3118, "step": 8242 }, { "epoch": 0.6238493921763398, "grad_norm": 0.76953125, "learning_rate": 1.560462765416853e-05, "loss": 0.3149, "step": 8243 }, { "epoch": 0.6239250744997872, "grad_norm": 0.80859375, "learning_rate": 1.5603641186215104e-05, "loss": 0.3296, "step": 8244 }, { "epoch": 0.6240007568232345, "grad_norm": 0.796875, "learning_rate": 1.5602654638764408e-05, "loss": 0.3548, "step": 8245 }, { "epoch": 0.6240764391466818, "grad_norm": 0.78125, "learning_rate": 1.560166801183044e-05, "loss": 0.3159, "step": 8246 }, { "epoch": 0.6241521214701291, "grad_norm": 0.75390625, "learning_rate": 1.5600681305427202e-05, "loss": 0.3279, "step": 8247 }, { "epoch": 0.6242278037935765, "grad_norm": 1.2265625, "learning_rate": 1.5599694519568688e-05, "loss": 0.3516, "step": 8248 }, { "epoch": 0.6243034861170238, "grad_norm": 0.765625, "learning_rate": 1.5598707654268902e-05, "loss": 0.2953, "step": 8249 }, { "epoch": 0.6243791684404711, "grad_norm": 0.87109375, "learning_rate": 1.5597720709541834e-05, "loss": 0.3678, "step": 8250 }, { "epoch": 0.6244548507639185, "grad_norm": 0.828125, "learning_rate": 1.5596733685401493e-05, "loss": 0.3401, "step": 8251 }, { "epoch": 0.6245305330873658, "grad_norm": 0.7421875, "learning_rate": 1.5595746581861883e-05, "loss": 0.3187, "step": 8252 }, { "epoch": 0.6246062154108131, "grad_norm": 0.7890625, "learning_rate": 1.5594759398937004e-05, "loss": 0.3181, "step": 8253 }, { "epoch": 0.6246818977342604, "grad_norm": 0.7890625, "learning_rate": 1.559377213664086e-05, "loss": 0.3266, "step": 8254 }, { "epoch": 0.6247575800577078, "grad_norm": 0.8828125, "learning_rate": 1.559278479498746e-05, "loss": 0.3698, "step": 8255 }, { "epoch": 0.6248332623811551, "grad_norm": 0.71875, "learning_rate": 1.559179737399081e-05, "loss": 0.2833, "step": 8256 }, { "epoch": 0.6249089447046025, "grad_norm": 0.79296875, "learning_rate": 1.5590809873664918e-05, "loss": 0.3372, "step": 8257 }, { "epoch": 0.6249846270280498, "grad_norm": 0.85546875, "learning_rate": 1.55898222940238e-05, "loss": 0.3842, "step": 8258 }, { "epoch": 0.6250603093514971, "grad_norm": 0.79296875, "learning_rate": 1.558883463508145e-05, "loss": 0.3402, "step": 8259 }, { "epoch": 0.6251359916749444, "grad_norm": 0.67578125, "learning_rate": 1.5587846896851894e-05, "loss": 0.2715, "step": 8260 }, { "epoch": 0.6252116739983917, "grad_norm": 0.734375, "learning_rate": 1.558685907934914e-05, "loss": 0.3133, "step": 8261 }, { "epoch": 0.6252873563218391, "grad_norm": 0.7421875, "learning_rate": 1.5585871182587208e-05, "loss": 0.3023, "step": 8262 }, { "epoch": 0.6253630386452864, "grad_norm": 0.78515625, "learning_rate": 1.5584883206580105e-05, "loss": 0.3638, "step": 8263 }, { "epoch": 0.6254387209687338, "grad_norm": 0.734375, "learning_rate": 1.5583895151341845e-05, "loss": 0.305, "step": 8264 }, { "epoch": 0.6255144032921811, "grad_norm": 0.890625, "learning_rate": 1.5582907016886454e-05, "loss": 0.3739, "step": 8265 }, { "epoch": 0.6255900856156285, "grad_norm": 0.71875, "learning_rate": 1.5581918803227947e-05, "loss": 0.2932, "step": 8266 }, { "epoch": 0.6256657679390757, "grad_norm": 0.79296875, "learning_rate": 1.5580930510380343e-05, "loss": 0.3487, "step": 8267 }, { "epoch": 0.625741450262523, "grad_norm": 0.79296875, "learning_rate": 1.557994213835766e-05, "loss": 0.3343, "step": 8268 }, { "epoch": 0.6258171325859704, "grad_norm": 0.7421875, "learning_rate": 1.5578953687173923e-05, "loss": 0.3301, "step": 8269 }, { "epoch": 0.6258928149094177, "grad_norm": 0.78125, "learning_rate": 1.5577965156843153e-05, "loss": 0.3337, "step": 8270 }, { "epoch": 0.6259684972328651, "grad_norm": 0.75390625, "learning_rate": 1.557697654737938e-05, "loss": 0.3198, "step": 8271 }, { "epoch": 0.6260441795563124, "grad_norm": 0.7734375, "learning_rate": 1.557598785879662e-05, "loss": 0.3272, "step": 8272 }, { "epoch": 0.6261198618797597, "grad_norm": 0.84375, "learning_rate": 1.5574999091108903e-05, "loss": 0.384, "step": 8273 }, { "epoch": 0.626195544203207, "grad_norm": 0.6796875, "learning_rate": 1.557401024433026e-05, "loss": 0.2747, "step": 8274 }, { "epoch": 0.6262712265266543, "grad_norm": 0.80859375, "learning_rate": 1.5573021318474714e-05, "loss": 0.3603, "step": 8275 }, { "epoch": 0.6263469088501017, "grad_norm": 0.828125, "learning_rate": 1.5572032313556296e-05, "loss": 0.3819, "step": 8276 }, { "epoch": 0.626422591173549, "grad_norm": 0.70703125, "learning_rate": 1.557104322958904e-05, "loss": 0.2913, "step": 8277 }, { "epoch": 0.6264982734969964, "grad_norm": 0.84375, "learning_rate": 1.5570054066586974e-05, "loss": 0.3358, "step": 8278 }, { "epoch": 0.6265739558204437, "grad_norm": 0.77734375, "learning_rate": 1.5569064824564132e-05, "loss": 0.3232, "step": 8279 }, { "epoch": 0.626649638143891, "grad_norm": 0.76953125, "learning_rate": 1.556807550353455e-05, "loss": 0.3091, "step": 8280 }, { "epoch": 0.6267253204673383, "grad_norm": 0.77734375, "learning_rate": 1.5567086103512265e-05, "loss": 0.2939, "step": 8281 }, { "epoch": 0.6268010027907857, "grad_norm": 0.8046875, "learning_rate": 1.5566096624511306e-05, "loss": 0.3652, "step": 8282 }, { "epoch": 0.626876685114233, "grad_norm": 0.78125, "learning_rate": 1.5565107066545717e-05, "loss": 0.3446, "step": 8283 }, { "epoch": 0.6269523674376803, "grad_norm": 0.6796875, "learning_rate": 1.5564117429629533e-05, "loss": 0.277, "step": 8284 }, { "epoch": 0.6270280497611277, "grad_norm": 1.046875, "learning_rate": 1.5563127713776793e-05, "loss": 0.3093, "step": 8285 }, { "epoch": 0.627103732084575, "grad_norm": 0.796875, "learning_rate": 1.556213791900154e-05, "loss": 0.3469, "step": 8286 }, { "epoch": 0.6271794144080223, "grad_norm": 0.7265625, "learning_rate": 1.5561148045317818e-05, "loss": 0.2803, "step": 8287 }, { "epoch": 0.6272550967314696, "grad_norm": 0.76953125, "learning_rate": 1.5560158092739668e-05, "loss": 0.3469, "step": 8288 }, { "epoch": 0.627330779054917, "grad_norm": 0.765625, "learning_rate": 1.555916806128113e-05, "loss": 0.3345, "step": 8289 }, { "epoch": 0.6274064613783643, "grad_norm": 0.75, "learning_rate": 1.5558177950956256e-05, "loss": 0.3287, "step": 8290 }, { "epoch": 0.6274821437018117, "grad_norm": 0.7265625, "learning_rate": 1.5557187761779093e-05, "loss": 0.2852, "step": 8291 }, { "epoch": 0.627557826025259, "grad_norm": 0.78125, "learning_rate": 1.555619749376368e-05, "loss": 0.3483, "step": 8292 }, { "epoch": 0.6276335083487063, "grad_norm": 0.82421875, "learning_rate": 1.5555207146924074e-05, "loss": 0.3937, "step": 8293 }, { "epoch": 0.6277091906721536, "grad_norm": 0.7421875, "learning_rate": 1.555421672127432e-05, "loss": 0.3053, "step": 8294 }, { "epoch": 0.6277848729956009, "grad_norm": 0.74609375, "learning_rate": 1.555322621682847e-05, "loss": 0.3199, "step": 8295 }, { "epoch": 0.6278605553190483, "grad_norm": 0.82421875, "learning_rate": 1.5552235633600576e-05, "loss": 0.3839, "step": 8296 }, { "epoch": 0.6279362376424956, "grad_norm": 0.77734375, "learning_rate": 1.5551244971604695e-05, "loss": 0.3189, "step": 8297 }, { "epoch": 0.628011919965943, "grad_norm": 0.89453125, "learning_rate": 1.5550254230854872e-05, "loss": 0.3898, "step": 8298 }, { "epoch": 0.6280876022893903, "grad_norm": 0.75390625, "learning_rate": 1.5549263411365175e-05, "loss": 0.3011, "step": 8299 }, { "epoch": 0.6281632846128377, "grad_norm": 0.72265625, "learning_rate": 1.554827251314965e-05, "loss": 0.284, "step": 8300 }, { "epoch": 0.6282389669362849, "grad_norm": 0.75, "learning_rate": 1.554728153622236e-05, "loss": 0.304, "step": 8301 }, { "epoch": 0.6283146492597322, "grad_norm": 0.76171875, "learning_rate": 1.5546290480597356e-05, "loss": 0.3267, "step": 8302 }, { "epoch": 0.6283903315831796, "grad_norm": 0.77734375, "learning_rate": 1.5545299346288708e-05, "loss": 0.336, "step": 8303 }, { "epoch": 0.6284660139066269, "grad_norm": 0.796875, "learning_rate": 1.554430813331047e-05, "loss": 0.3491, "step": 8304 }, { "epoch": 0.6285416962300743, "grad_norm": 0.73828125, "learning_rate": 1.5543316841676715e-05, "loss": 0.314, "step": 8305 }, { "epoch": 0.6286173785535216, "grad_norm": 0.6875, "learning_rate": 1.5542325471401493e-05, "loss": 0.2607, "step": 8306 }, { "epoch": 0.628693060876969, "grad_norm": 0.82421875, "learning_rate": 1.554133402249887e-05, "loss": 0.3185, "step": 8307 }, { "epoch": 0.6287687432004162, "grad_norm": 0.78125, "learning_rate": 1.554034249498292e-05, "loss": 0.3291, "step": 8308 }, { "epoch": 0.6288444255238635, "grad_norm": 0.77734375, "learning_rate": 1.55393508888677e-05, "loss": 0.3505, "step": 8309 }, { "epoch": 0.6289201078473109, "grad_norm": 0.79296875, "learning_rate": 1.5538359204167285e-05, "loss": 0.3653, "step": 8310 }, { "epoch": 0.6289957901707582, "grad_norm": 0.75, "learning_rate": 1.5537367440895737e-05, "loss": 0.2883, "step": 8311 }, { "epoch": 0.6290714724942056, "grad_norm": 0.71484375, "learning_rate": 1.5536375599067133e-05, "loss": 0.2897, "step": 8312 }, { "epoch": 0.6291471548176529, "grad_norm": 0.76171875, "learning_rate": 1.5535383678695538e-05, "loss": 0.3153, "step": 8313 }, { "epoch": 0.6292228371411003, "grad_norm": 0.765625, "learning_rate": 1.5534391679795027e-05, "loss": 0.3464, "step": 8314 }, { "epoch": 0.6292985194645475, "grad_norm": 0.7578125, "learning_rate": 1.5533399602379674e-05, "loss": 0.3358, "step": 8315 }, { "epoch": 0.6293742017879949, "grad_norm": 0.80859375, "learning_rate": 1.5532407446463548e-05, "loss": 0.3363, "step": 8316 }, { "epoch": 0.6294498841114422, "grad_norm": 0.78125, "learning_rate": 1.5531415212060732e-05, "loss": 0.3164, "step": 8317 }, { "epoch": 0.6295255664348895, "grad_norm": 0.6953125, "learning_rate": 1.5530422899185298e-05, "loss": 0.2826, "step": 8318 }, { "epoch": 0.6296012487583369, "grad_norm": 0.78515625, "learning_rate": 1.5529430507851325e-05, "loss": 0.3548, "step": 8319 }, { "epoch": 0.6296769310817842, "grad_norm": 0.72265625, "learning_rate": 1.552843803807289e-05, "loss": 0.3027, "step": 8320 }, { "epoch": 0.6297526134052316, "grad_norm": 0.81640625, "learning_rate": 1.5527445489864076e-05, "loss": 0.3672, "step": 8321 }, { "epoch": 0.6298282957286788, "grad_norm": 0.8046875, "learning_rate": 1.5526452863238964e-05, "loss": 0.3535, "step": 8322 }, { "epoch": 0.6299039780521262, "grad_norm": 0.734375, "learning_rate": 1.5525460158211632e-05, "loss": 0.281, "step": 8323 }, { "epoch": 0.6299796603755735, "grad_norm": 0.765625, "learning_rate": 1.5524467374796165e-05, "loss": 0.3116, "step": 8324 }, { "epoch": 0.6300553426990209, "grad_norm": 0.79296875, "learning_rate": 1.552347451300665e-05, "loss": 0.3279, "step": 8325 }, { "epoch": 0.6301310250224682, "grad_norm": 0.73046875, "learning_rate": 1.5522481572857168e-05, "loss": 0.3102, "step": 8326 }, { "epoch": 0.6302067073459156, "grad_norm": 0.796875, "learning_rate": 1.552148855436181e-05, "loss": 0.3253, "step": 8327 }, { "epoch": 0.6302823896693629, "grad_norm": 0.77734375, "learning_rate": 1.5520495457534663e-05, "loss": 0.3189, "step": 8328 }, { "epoch": 0.6303580719928101, "grad_norm": 0.76953125, "learning_rate": 1.5519502282389812e-05, "loss": 0.3181, "step": 8329 }, { "epoch": 0.6304337543162575, "grad_norm": 0.81640625, "learning_rate": 1.551850902894135e-05, "loss": 0.3594, "step": 8330 }, { "epoch": 0.6305094366397048, "grad_norm": 0.7734375, "learning_rate": 1.5517515697203366e-05, "loss": 0.3499, "step": 8331 }, { "epoch": 0.6305851189631522, "grad_norm": 0.7421875, "learning_rate": 1.5516522287189953e-05, "loss": 0.3097, "step": 8332 }, { "epoch": 0.6306608012865995, "grad_norm": 0.81640625, "learning_rate": 1.551552879891521e-05, "loss": 0.3422, "step": 8333 }, { "epoch": 0.6307364836100469, "grad_norm": 0.765625, "learning_rate": 1.551453523239322e-05, "loss": 0.284, "step": 8334 }, { "epoch": 0.6308121659334942, "grad_norm": 0.75, "learning_rate": 1.5513541587638084e-05, "loss": 0.3147, "step": 8335 }, { "epoch": 0.6308878482569414, "grad_norm": 0.75390625, "learning_rate": 1.5512547864663904e-05, "loss": 0.3176, "step": 8336 }, { "epoch": 0.6309635305803888, "grad_norm": 0.7421875, "learning_rate": 1.5511554063484774e-05, "loss": 0.2795, "step": 8337 }, { "epoch": 0.6310392129038361, "grad_norm": 0.76171875, "learning_rate": 1.5510560184114785e-05, "loss": 0.3186, "step": 8338 }, { "epoch": 0.6311148952272835, "grad_norm": 0.79296875, "learning_rate": 1.5509566226568044e-05, "loss": 0.3794, "step": 8339 }, { "epoch": 0.6311905775507308, "grad_norm": 0.75390625, "learning_rate": 1.5508572190858654e-05, "loss": 0.3235, "step": 8340 }, { "epoch": 0.6312662598741782, "grad_norm": 0.73046875, "learning_rate": 1.5507578077000713e-05, "loss": 0.3134, "step": 8341 }, { "epoch": 0.6313419421976255, "grad_norm": 0.78125, "learning_rate": 1.550658388500833e-05, "loss": 0.3083, "step": 8342 }, { "epoch": 0.6314176245210728, "grad_norm": 0.75, "learning_rate": 1.5505589614895602e-05, "loss": 0.3311, "step": 8343 }, { "epoch": 0.6314933068445201, "grad_norm": 0.734375, "learning_rate": 1.5504595266676635e-05, "loss": 0.2913, "step": 8344 }, { "epoch": 0.6315689891679674, "grad_norm": 0.73046875, "learning_rate": 1.550360084036554e-05, "loss": 0.3109, "step": 8345 }, { "epoch": 0.6316446714914148, "grad_norm": 0.734375, "learning_rate": 1.550260633597642e-05, "loss": 0.3018, "step": 8346 }, { "epoch": 0.6317203538148621, "grad_norm": 0.71875, "learning_rate": 1.550161175352339e-05, "loss": 0.3077, "step": 8347 }, { "epoch": 0.6317960361383095, "grad_norm": 0.765625, "learning_rate": 1.5500617093020557e-05, "loss": 0.3082, "step": 8348 }, { "epoch": 0.6318717184617568, "grad_norm": 0.73046875, "learning_rate": 1.5499622354482028e-05, "loss": 0.3097, "step": 8349 }, { "epoch": 0.6319474007852041, "grad_norm": 0.77734375, "learning_rate": 1.549862753792192e-05, "loss": 0.337, "step": 8350 }, { "epoch": 0.6320230831086514, "grad_norm": 0.75, "learning_rate": 1.5497632643354344e-05, "loss": 0.3323, "step": 8351 }, { "epoch": 0.6320987654320988, "grad_norm": 0.8046875, "learning_rate": 1.5496637670793415e-05, "loss": 0.3418, "step": 8352 }, { "epoch": 0.6321744477555461, "grad_norm": 1.0078125, "learning_rate": 1.5495642620253248e-05, "loss": 0.3725, "step": 8353 }, { "epoch": 0.6322501300789934, "grad_norm": 0.77734375, "learning_rate": 1.5494647491747957e-05, "loss": 0.2894, "step": 8354 }, { "epoch": 0.6323258124024408, "grad_norm": 0.765625, "learning_rate": 1.5493652285291666e-05, "loss": 0.2996, "step": 8355 }, { "epoch": 0.6324014947258881, "grad_norm": 0.765625, "learning_rate": 1.549265700089849e-05, "loss": 0.3256, "step": 8356 }, { "epoch": 0.6324771770493354, "grad_norm": 0.72265625, "learning_rate": 1.5491661638582546e-05, "loss": 0.2727, "step": 8357 }, { "epoch": 0.6325528593727827, "grad_norm": 0.73828125, "learning_rate": 1.5490666198357956e-05, "loss": 0.2916, "step": 8358 }, { "epoch": 0.6326285416962301, "grad_norm": 0.7734375, "learning_rate": 1.5489670680238848e-05, "loss": 0.3159, "step": 8359 }, { "epoch": 0.6327042240196774, "grad_norm": 0.7734375, "learning_rate": 1.548867508423934e-05, "loss": 0.3186, "step": 8360 }, { "epoch": 0.6327799063431248, "grad_norm": 0.74609375, "learning_rate": 1.5487679410373555e-05, "loss": 0.317, "step": 8361 }, { "epoch": 0.6328555886665721, "grad_norm": 0.7578125, "learning_rate": 1.548668365865562e-05, "loss": 0.2982, "step": 8362 }, { "epoch": 0.6329312709900194, "grad_norm": 0.765625, "learning_rate": 1.5485687829099665e-05, "loss": 0.3287, "step": 8363 }, { "epoch": 0.6330069533134667, "grad_norm": 0.75390625, "learning_rate": 1.5484691921719813e-05, "loss": 0.3091, "step": 8364 }, { "epoch": 0.633082635636914, "grad_norm": 0.80859375, "learning_rate": 1.5483695936530193e-05, "loss": 0.3334, "step": 8365 }, { "epoch": 0.6331583179603614, "grad_norm": 0.7890625, "learning_rate": 1.5482699873544937e-05, "loss": 0.3327, "step": 8366 }, { "epoch": 0.6332340002838087, "grad_norm": 0.7734375, "learning_rate": 1.5481703732778173e-05, "loss": 0.3589, "step": 8367 }, { "epoch": 0.6333096826072561, "grad_norm": 0.80859375, "learning_rate": 1.5480707514244038e-05, "loss": 0.294, "step": 8368 }, { "epoch": 0.6333853649307034, "grad_norm": 0.79296875, "learning_rate": 1.5479711217956658e-05, "loss": 0.3519, "step": 8369 }, { "epoch": 0.6334610472541508, "grad_norm": 0.7578125, "learning_rate": 1.547871484393017e-05, "loss": 0.3091, "step": 8370 }, { "epoch": 0.633536729577598, "grad_norm": 0.76171875, "learning_rate": 1.5477718392178716e-05, "loss": 0.2901, "step": 8371 }, { "epoch": 0.6336124119010453, "grad_norm": 0.92578125, "learning_rate": 1.547672186271642e-05, "loss": 0.309, "step": 8372 }, { "epoch": 0.6336880942244927, "grad_norm": 0.83203125, "learning_rate": 1.547572525555743e-05, "loss": 0.3395, "step": 8373 }, { "epoch": 0.63376377654794, "grad_norm": 0.73828125, "learning_rate": 1.547472857071588e-05, "loss": 0.2912, "step": 8374 }, { "epoch": 0.6338394588713874, "grad_norm": 0.7109375, "learning_rate": 1.547373180820591e-05, "loss": 0.2585, "step": 8375 }, { "epoch": 0.6339151411948347, "grad_norm": 0.77734375, "learning_rate": 1.547273496804166e-05, "loss": 0.3394, "step": 8376 }, { "epoch": 0.6339908235182821, "grad_norm": 0.7421875, "learning_rate": 1.5471738050237277e-05, "loss": 0.3211, "step": 8377 }, { "epoch": 0.6340665058417293, "grad_norm": 0.7734375, "learning_rate": 1.5470741054806894e-05, "loss": 0.3093, "step": 8378 }, { "epoch": 0.6341421881651766, "grad_norm": 0.71484375, "learning_rate": 1.5469743981764667e-05, "loss": 0.2902, "step": 8379 }, { "epoch": 0.634217870488624, "grad_norm": 0.7734375, "learning_rate": 1.546874683112473e-05, "loss": 0.3479, "step": 8380 }, { "epoch": 0.6342935528120713, "grad_norm": 0.828125, "learning_rate": 1.5467749602901235e-05, "loss": 0.3731, "step": 8381 }, { "epoch": 0.6343692351355187, "grad_norm": 0.7890625, "learning_rate": 1.546675229710833e-05, "loss": 0.3367, "step": 8382 }, { "epoch": 0.634444917458966, "grad_norm": 0.765625, "learning_rate": 1.5465754913760164e-05, "loss": 0.3263, "step": 8383 }, { "epoch": 0.6345205997824134, "grad_norm": 0.71875, "learning_rate": 1.5464757452870885e-05, "loss": 0.2863, "step": 8384 }, { "epoch": 0.6345962821058606, "grad_norm": 0.71875, "learning_rate": 1.5463759914454643e-05, "loss": 0.2933, "step": 8385 }, { "epoch": 0.634671964429308, "grad_norm": 0.890625, "learning_rate": 1.546276229852559e-05, "loss": 0.3545, "step": 8386 }, { "epoch": 0.6347476467527553, "grad_norm": 0.671875, "learning_rate": 1.5461764605097877e-05, "loss": 0.2559, "step": 8387 }, { "epoch": 0.6348233290762026, "grad_norm": 0.76171875, "learning_rate": 1.5460766834185666e-05, "loss": 0.3089, "step": 8388 }, { "epoch": 0.63489901139965, "grad_norm": 0.75390625, "learning_rate": 1.5459768985803104e-05, "loss": 0.3, "step": 8389 }, { "epoch": 0.6349746937230973, "grad_norm": 0.7890625, "learning_rate": 1.5458771059964348e-05, "loss": 0.3371, "step": 8390 }, { "epoch": 0.6350503760465446, "grad_norm": 0.796875, "learning_rate": 1.545777305668356e-05, "loss": 0.3444, "step": 8391 }, { "epoch": 0.6351260583699919, "grad_norm": 0.796875, "learning_rate": 1.5456774975974892e-05, "loss": 0.3523, "step": 8392 }, { "epoch": 0.6352017406934393, "grad_norm": 0.74609375, "learning_rate": 1.545577681785251e-05, "loss": 0.306, "step": 8393 }, { "epoch": 0.6352774230168866, "grad_norm": 0.78125, "learning_rate": 1.5454778582330567e-05, "loss": 0.3354, "step": 8394 }, { "epoch": 0.635353105340334, "grad_norm": 0.77734375, "learning_rate": 1.545378026942323e-05, "loss": 0.309, "step": 8395 }, { "epoch": 0.6354287876637813, "grad_norm": 0.7734375, "learning_rate": 1.545278187914466e-05, "loss": 0.352, "step": 8396 }, { "epoch": 0.6355044699872286, "grad_norm": 0.765625, "learning_rate": 1.5451783411509026e-05, "loss": 0.3224, "step": 8397 }, { "epoch": 0.6355801523106759, "grad_norm": 0.78515625, "learning_rate": 1.5450784866530488e-05, "loss": 0.3527, "step": 8398 }, { "epoch": 0.6356558346341232, "grad_norm": 1.5234375, "learning_rate": 1.544978624422321e-05, "loss": 0.4038, "step": 8399 }, { "epoch": 0.6357315169575706, "grad_norm": 0.7734375, "learning_rate": 1.5448787544601366e-05, "loss": 0.3431, "step": 8400 }, { "epoch": 0.6358071992810179, "grad_norm": 0.79296875, "learning_rate": 1.5447788767679115e-05, "loss": 0.3469, "step": 8401 }, { "epoch": 0.6358828816044653, "grad_norm": 0.80859375, "learning_rate": 1.5446789913470633e-05, "loss": 0.3787, "step": 8402 }, { "epoch": 0.6359585639279126, "grad_norm": 0.8828125, "learning_rate": 1.5445790981990093e-05, "loss": 0.3405, "step": 8403 }, { "epoch": 0.63603424625136, "grad_norm": 0.73046875, "learning_rate": 1.544479197325166e-05, "loss": 0.2994, "step": 8404 }, { "epoch": 0.6361099285748072, "grad_norm": 0.72265625, "learning_rate": 1.544379288726951e-05, "loss": 0.3122, "step": 8405 }, { "epoch": 0.6361856108982545, "grad_norm": 0.7421875, "learning_rate": 1.5442793724057815e-05, "loss": 0.3118, "step": 8406 }, { "epoch": 0.6362612932217019, "grad_norm": 0.73828125, "learning_rate": 1.5441794483630753e-05, "loss": 0.3202, "step": 8407 }, { "epoch": 0.6363369755451492, "grad_norm": 0.7109375, "learning_rate": 1.5440795166002492e-05, "loss": 0.3046, "step": 8408 }, { "epoch": 0.6364126578685966, "grad_norm": 0.78125, "learning_rate": 1.543979577118722e-05, "loss": 0.2938, "step": 8409 }, { "epoch": 0.6364883401920439, "grad_norm": 0.7890625, "learning_rate": 1.543879629919911e-05, "loss": 0.3064, "step": 8410 }, { "epoch": 0.6365640225154913, "grad_norm": 0.75390625, "learning_rate": 1.543779675005234e-05, "loss": 0.3317, "step": 8411 }, { "epoch": 0.6366397048389385, "grad_norm": 0.73828125, "learning_rate": 1.5436797123761095e-05, "loss": 0.3107, "step": 8412 }, { "epoch": 0.6367153871623858, "grad_norm": 0.83203125, "learning_rate": 1.5435797420339545e-05, "loss": 0.3133, "step": 8413 }, { "epoch": 0.6367910694858332, "grad_norm": 0.71484375, "learning_rate": 1.5434797639801887e-05, "loss": 0.2811, "step": 8414 }, { "epoch": 0.6368667518092805, "grad_norm": 0.80859375, "learning_rate": 1.5433797782162295e-05, "loss": 0.3224, "step": 8415 }, { "epoch": 0.6369424341327279, "grad_norm": 0.77734375, "learning_rate": 1.5432797847434957e-05, "loss": 0.2974, "step": 8416 }, { "epoch": 0.6370181164561752, "grad_norm": 0.71484375, "learning_rate": 1.543179783563406e-05, "loss": 0.2809, "step": 8417 }, { "epoch": 0.6370937987796226, "grad_norm": 0.765625, "learning_rate": 1.543079774677379e-05, "loss": 0.3025, "step": 8418 }, { "epoch": 0.6371694811030698, "grad_norm": 0.73046875, "learning_rate": 1.5429797580868332e-05, "loss": 0.2958, "step": 8419 }, { "epoch": 0.6372451634265172, "grad_norm": 0.78515625, "learning_rate": 1.5428797337931877e-05, "loss": 0.3471, "step": 8420 }, { "epoch": 0.6373208457499645, "grad_norm": 0.79296875, "learning_rate": 1.542779701797862e-05, "loss": 0.3608, "step": 8421 }, { "epoch": 0.6373965280734118, "grad_norm": 0.7109375, "learning_rate": 1.5426796621022746e-05, "loss": 0.2837, "step": 8422 }, { "epoch": 0.6374722103968592, "grad_norm": 0.75, "learning_rate": 1.5425796147078448e-05, "loss": 0.3209, "step": 8423 }, { "epoch": 0.6375478927203065, "grad_norm": 0.77734375, "learning_rate": 1.542479559615992e-05, "loss": 0.3118, "step": 8424 }, { "epoch": 0.6376235750437539, "grad_norm": 0.734375, "learning_rate": 1.5423794968281356e-05, "loss": 0.2859, "step": 8425 }, { "epoch": 0.6376992573672011, "grad_norm": 0.75, "learning_rate": 1.5422794263456958e-05, "loss": 0.2992, "step": 8426 }, { "epoch": 0.6377749396906485, "grad_norm": 0.734375, "learning_rate": 1.5421793481700915e-05, "loss": 0.3072, "step": 8427 }, { "epoch": 0.6378506220140958, "grad_norm": 0.75390625, "learning_rate": 1.5420792623027425e-05, "loss": 0.3343, "step": 8428 }, { "epoch": 0.6379263043375432, "grad_norm": 0.75, "learning_rate": 1.5419791687450693e-05, "loss": 0.2774, "step": 8429 }, { "epoch": 0.6380019866609905, "grad_norm": 0.69140625, "learning_rate": 1.541879067498491e-05, "loss": 0.2709, "step": 8430 }, { "epoch": 0.6380776689844379, "grad_norm": 0.734375, "learning_rate": 1.541778958564429e-05, "loss": 0.3107, "step": 8431 }, { "epoch": 0.6381533513078852, "grad_norm": 0.78125, "learning_rate": 1.541678841944302e-05, "loss": 0.3463, "step": 8432 }, { "epoch": 0.6382290336313324, "grad_norm": 0.84765625, "learning_rate": 1.5415787176395313e-05, "loss": 0.3962, "step": 8433 }, { "epoch": 0.6383047159547798, "grad_norm": 0.85546875, "learning_rate": 1.5414785856515372e-05, "loss": 0.3609, "step": 8434 }, { "epoch": 0.6383803982782271, "grad_norm": 0.76953125, "learning_rate": 1.5413784459817403e-05, "loss": 0.3214, "step": 8435 }, { "epoch": 0.6384560806016745, "grad_norm": 0.734375, "learning_rate": 1.541278298631561e-05, "loss": 0.3273, "step": 8436 }, { "epoch": 0.6385317629251218, "grad_norm": 0.7734375, "learning_rate": 1.54117814360242e-05, "loss": 0.3633, "step": 8437 }, { "epoch": 0.6386074452485692, "grad_norm": 0.734375, "learning_rate": 1.5410779808957387e-05, "loss": 0.3195, "step": 8438 }, { "epoch": 0.6386831275720165, "grad_norm": 0.79296875, "learning_rate": 1.5409778105129375e-05, "loss": 0.3091, "step": 8439 }, { "epoch": 0.6387588098954637, "grad_norm": 0.8046875, "learning_rate": 1.5408776324554377e-05, "loss": 0.3228, "step": 8440 }, { "epoch": 0.6388344922189111, "grad_norm": 0.78125, "learning_rate": 1.5407774467246603e-05, "loss": 0.3393, "step": 8441 }, { "epoch": 0.6389101745423584, "grad_norm": 0.765625, "learning_rate": 1.540677253322027e-05, "loss": 0.2996, "step": 8442 }, { "epoch": 0.6389858568658058, "grad_norm": 0.72265625, "learning_rate": 1.5405770522489592e-05, "loss": 0.3094, "step": 8443 }, { "epoch": 0.6390615391892531, "grad_norm": 0.75, "learning_rate": 1.5404768435068778e-05, "loss": 0.3056, "step": 8444 }, { "epoch": 0.6391372215127005, "grad_norm": 0.7265625, "learning_rate": 1.5403766270972054e-05, "loss": 0.2835, "step": 8445 }, { "epoch": 0.6392129038361478, "grad_norm": 0.78515625, "learning_rate": 1.540276403021363e-05, "loss": 0.34, "step": 8446 }, { "epoch": 0.639288586159595, "grad_norm": 0.83984375, "learning_rate": 1.5401761712807724e-05, "loss": 0.3996, "step": 8447 }, { "epoch": 0.6393642684830424, "grad_norm": 0.78515625, "learning_rate": 1.5400759318768566e-05, "loss": 0.3622, "step": 8448 }, { "epoch": 0.6394399508064897, "grad_norm": 0.78125, "learning_rate": 1.539975684811036e-05, "loss": 0.3486, "step": 8449 }, { "epoch": 0.6395156331299371, "grad_norm": 1.21875, "learning_rate": 1.5398754300847346e-05, "loss": 0.3889, "step": 8450 }, { "epoch": 0.6395913154533844, "grad_norm": 0.75, "learning_rate": 1.5397751676993732e-05, "loss": 0.2958, "step": 8451 }, { "epoch": 0.6396669977768318, "grad_norm": 0.79296875, "learning_rate": 1.5396748976563745e-05, "loss": 0.3556, "step": 8452 }, { "epoch": 0.6397426801002791, "grad_norm": 0.7109375, "learning_rate": 1.5395746199571616e-05, "loss": 0.2707, "step": 8453 }, { "epoch": 0.6398183624237264, "grad_norm": 0.734375, "learning_rate": 1.539474334603157e-05, "loss": 0.3273, "step": 8454 }, { "epoch": 0.6398940447471737, "grad_norm": 0.82421875, "learning_rate": 1.539374041595783e-05, "loss": 0.3845, "step": 8455 }, { "epoch": 0.639969727070621, "grad_norm": 0.80078125, "learning_rate": 1.5392737409364624e-05, "loss": 0.3156, "step": 8456 }, { "epoch": 0.6400454093940684, "grad_norm": 0.83984375, "learning_rate": 1.5391734326266185e-05, "loss": 0.358, "step": 8457 }, { "epoch": 0.6401210917175157, "grad_norm": 0.80078125, "learning_rate": 1.539073116667674e-05, "loss": 0.315, "step": 8458 }, { "epoch": 0.6401967740409631, "grad_norm": 0.7578125, "learning_rate": 1.538972793061053e-05, "loss": 0.3156, "step": 8459 }, { "epoch": 0.6402724563644104, "grad_norm": 0.6953125, "learning_rate": 1.5388724618081774e-05, "loss": 0.281, "step": 8460 }, { "epoch": 0.6403481386878577, "grad_norm": 0.7890625, "learning_rate": 1.5387721229104712e-05, "loss": 0.3366, "step": 8461 }, { "epoch": 0.640423821011305, "grad_norm": 0.6953125, "learning_rate": 1.5386717763693578e-05, "loss": 0.2615, "step": 8462 }, { "epoch": 0.6404995033347524, "grad_norm": 0.73828125, "learning_rate": 1.5385714221862614e-05, "loss": 0.3184, "step": 8463 }, { "epoch": 0.6405751856581997, "grad_norm": 0.71875, "learning_rate": 1.538471060362605e-05, "loss": 0.2749, "step": 8464 }, { "epoch": 0.640650867981647, "grad_norm": 0.7265625, "learning_rate": 1.5383706908998123e-05, "loss": 0.2815, "step": 8465 }, { "epoch": 0.6407265503050944, "grad_norm": 0.8203125, "learning_rate": 1.5382703137993074e-05, "loss": 0.3579, "step": 8466 }, { "epoch": 0.6408022326285417, "grad_norm": 0.7578125, "learning_rate": 1.5381699290625142e-05, "loss": 0.3164, "step": 8467 }, { "epoch": 0.640877914951989, "grad_norm": 0.79296875, "learning_rate": 1.5380695366908577e-05, "loss": 0.3227, "step": 8468 }, { "epoch": 0.6409535972754363, "grad_norm": 0.83984375, "learning_rate": 1.537969136685761e-05, "loss": 0.3713, "step": 8469 }, { "epoch": 0.6410292795988837, "grad_norm": 0.73046875, "learning_rate": 1.537868729048649e-05, "loss": 0.2861, "step": 8470 }, { "epoch": 0.641104961922331, "grad_norm": 0.78515625, "learning_rate": 1.537768313780946e-05, "loss": 0.318, "step": 8471 }, { "epoch": 0.6411806442457784, "grad_norm": 0.76171875, "learning_rate": 1.5376678908840766e-05, "loss": 0.3353, "step": 8472 }, { "epoch": 0.6412563265692257, "grad_norm": 0.96484375, "learning_rate": 1.537567460359466e-05, "loss": 0.3307, "step": 8473 }, { "epoch": 0.6413320088926731, "grad_norm": 0.7421875, "learning_rate": 1.5374670222085383e-05, "loss": 0.2968, "step": 8474 }, { "epoch": 0.6414076912161203, "grad_norm": 0.74609375, "learning_rate": 1.5373665764327182e-05, "loss": 0.3062, "step": 8475 }, { "epoch": 0.6414833735395676, "grad_norm": 0.78515625, "learning_rate": 1.537266123033431e-05, "loss": 0.3513, "step": 8476 }, { "epoch": 0.641559055863015, "grad_norm": 0.73828125, "learning_rate": 1.5371656620121024e-05, "loss": 0.337, "step": 8477 }, { "epoch": 0.6416347381864623, "grad_norm": 0.73828125, "learning_rate": 1.5370651933701566e-05, "loss": 0.2761, "step": 8478 }, { "epoch": 0.6417104205099097, "grad_norm": 0.7265625, "learning_rate": 1.5369647171090197e-05, "loss": 0.2974, "step": 8479 }, { "epoch": 0.641786102833357, "grad_norm": 0.8046875, "learning_rate": 1.5368642332301165e-05, "loss": 0.3534, "step": 8480 }, { "epoch": 0.6418617851568044, "grad_norm": 0.9453125, "learning_rate": 1.536763741734873e-05, "loss": 0.371, "step": 8481 }, { "epoch": 0.6419374674802516, "grad_norm": 0.80078125, "learning_rate": 1.536663242624715e-05, "loss": 0.3155, "step": 8482 }, { "epoch": 0.642013149803699, "grad_norm": 0.75390625, "learning_rate": 1.5365627359010674e-05, "loss": 0.2965, "step": 8483 }, { "epoch": 0.6420888321271463, "grad_norm": 0.74609375, "learning_rate": 1.536462221565357e-05, "loss": 0.3246, "step": 8484 }, { "epoch": 0.6421645144505936, "grad_norm": 0.69140625, "learning_rate": 1.5363616996190093e-05, "loss": 0.2667, "step": 8485 }, { "epoch": 0.642240196774041, "grad_norm": 0.796875, "learning_rate": 1.5362611700634505e-05, "loss": 0.3603, "step": 8486 }, { "epoch": 0.6423158790974883, "grad_norm": 0.75, "learning_rate": 1.5361606329001066e-05, "loss": 0.3067, "step": 8487 }, { "epoch": 0.6423915614209357, "grad_norm": 0.87109375, "learning_rate": 1.5360600881304043e-05, "loss": 0.3822, "step": 8488 }, { "epoch": 0.6424672437443829, "grad_norm": 0.7421875, "learning_rate": 1.5359595357557697e-05, "loss": 0.2886, "step": 8489 }, { "epoch": 0.6425429260678303, "grad_norm": 0.7421875, "learning_rate": 1.5358589757776294e-05, "loss": 0.2989, "step": 8490 }, { "epoch": 0.6426186083912776, "grad_norm": 0.75, "learning_rate": 1.5357584081974097e-05, "loss": 0.3319, "step": 8491 }, { "epoch": 0.642694290714725, "grad_norm": 0.78125, "learning_rate": 1.5356578330165377e-05, "loss": 0.3107, "step": 8492 }, { "epoch": 0.6427699730381723, "grad_norm": 0.71875, "learning_rate": 1.5355572502364398e-05, "loss": 0.2961, "step": 8493 }, { "epoch": 0.6428456553616196, "grad_norm": 0.8203125, "learning_rate": 1.5354566598585435e-05, "loss": 0.3368, "step": 8494 }, { "epoch": 0.642921337685067, "grad_norm": 0.734375, "learning_rate": 1.5353560618842758e-05, "loss": 0.301, "step": 8495 }, { "epoch": 0.6429970200085142, "grad_norm": 0.7734375, "learning_rate": 1.5352554563150638e-05, "loss": 0.3111, "step": 8496 }, { "epoch": 0.6430727023319616, "grad_norm": 0.8046875, "learning_rate": 1.5351548431523342e-05, "loss": 0.3297, "step": 8497 }, { "epoch": 0.6431483846554089, "grad_norm": 1.1796875, "learning_rate": 1.535054222397515e-05, "loss": 0.3846, "step": 8498 }, { "epoch": 0.6432240669788563, "grad_norm": 0.7734375, "learning_rate": 1.534953594052033e-05, "loss": 0.3384, "step": 8499 }, { "epoch": 0.6432997493023036, "grad_norm": 0.70703125, "learning_rate": 1.534852958117317e-05, "loss": 0.2845, "step": 8500 }, { "epoch": 0.643375431625751, "grad_norm": 0.72265625, "learning_rate": 1.534752314594794e-05, "loss": 0.3035, "step": 8501 }, { "epoch": 0.6434511139491983, "grad_norm": 0.75, "learning_rate": 1.534651663485891e-05, "loss": 0.3031, "step": 8502 }, { "epoch": 0.6435267962726455, "grad_norm": 1.09375, "learning_rate": 1.5345510047920372e-05, "loss": 0.309, "step": 8503 }, { "epoch": 0.6436024785960929, "grad_norm": 0.73828125, "learning_rate": 1.53445033851466e-05, "loss": 0.2588, "step": 8504 }, { "epoch": 0.6436781609195402, "grad_norm": 0.828125, "learning_rate": 1.5343496646551875e-05, "loss": 0.3641, "step": 8505 }, { "epoch": 0.6437538432429876, "grad_norm": 0.84375, "learning_rate": 1.534248983215048e-05, "loss": 0.369, "step": 8506 }, { "epoch": 0.6438295255664349, "grad_norm": 0.78125, "learning_rate": 1.5341482941956697e-05, "loss": 0.3284, "step": 8507 }, { "epoch": 0.6439052078898823, "grad_norm": 0.7109375, "learning_rate": 1.5340475975984814e-05, "loss": 0.2879, "step": 8508 }, { "epoch": 0.6439808902133296, "grad_norm": 0.77734375, "learning_rate": 1.5339468934249115e-05, "loss": 0.3094, "step": 8509 }, { "epoch": 0.6440565725367768, "grad_norm": 0.79296875, "learning_rate": 1.533846181676389e-05, "loss": 0.3159, "step": 8510 }, { "epoch": 0.6441322548602242, "grad_norm": 1.0390625, "learning_rate": 1.5337454623543417e-05, "loss": 0.368, "step": 8511 }, { "epoch": 0.6442079371836715, "grad_norm": 1.3203125, "learning_rate": 1.5336447354601995e-05, "loss": 0.4046, "step": 8512 }, { "epoch": 0.6442836195071189, "grad_norm": 0.75, "learning_rate": 1.5335440009953906e-05, "loss": 0.323, "step": 8513 }, { "epoch": 0.6443593018305662, "grad_norm": 0.8046875, "learning_rate": 1.5334432589613445e-05, "loss": 0.3514, "step": 8514 }, { "epoch": 0.6444349841540136, "grad_norm": 0.82421875, "learning_rate": 1.5333425093594908e-05, "loss": 0.3863, "step": 8515 }, { "epoch": 0.6445106664774608, "grad_norm": 0.8359375, "learning_rate": 1.533241752191258e-05, "loss": 0.3463, "step": 8516 }, { "epoch": 0.6445863488009081, "grad_norm": 0.7578125, "learning_rate": 1.5331409874580757e-05, "loss": 0.3023, "step": 8517 }, { "epoch": 0.6446620311243555, "grad_norm": 0.76171875, "learning_rate": 1.5330402151613738e-05, "loss": 0.3052, "step": 8518 }, { "epoch": 0.6447377134478028, "grad_norm": 0.78125, "learning_rate": 1.532939435302582e-05, "loss": 0.3338, "step": 8519 }, { "epoch": 0.6448133957712502, "grad_norm": 0.72265625, "learning_rate": 1.5328386478831295e-05, "loss": 0.2742, "step": 8520 }, { "epoch": 0.6448890780946975, "grad_norm": 0.7578125, "learning_rate": 1.5327378529044464e-05, "loss": 0.3042, "step": 8521 }, { "epoch": 0.6449647604181449, "grad_norm": 0.80078125, "learning_rate": 1.5326370503679625e-05, "loss": 0.3242, "step": 8522 }, { "epoch": 0.6450404427415921, "grad_norm": 0.859375, "learning_rate": 1.5325362402751078e-05, "loss": 0.2636, "step": 8523 }, { "epoch": 0.6451161250650395, "grad_norm": 0.8203125, "learning_rate": 1.532435422627313e-05, "loss": 0.3718, "step": 8524 }, { "epoch": 0.6451918073884868, "grad_norm": 0.765625, "learning_rate": 1.5323345974260084e-05, "loss": 0.2868, "step": 8525 }, { "epoch": 0.6452674897119342, "grad_norm": 0.7890625, "learning_rate": 1.5322337646726232e-05, "loss": 0.2942, "step": 8526 }, { "epoch": 0.6453431720353815, "grad_norm": 0.76171875, "learning_rate": 1.5321329243685895e-05, "loss": 0.3045, "step": 8527 }, { "epoch": 0.6454188543588288, "grad_norm": 0.7734375, "learning_rate": 1.5320320765153367e-05, "loss": 0.2814, "step": 8528 }, { "epoch": 0.6454945366822762, "grad_norm": 0.9375, "learning_rate": 1.531931221114296e-05, "loss": 0.3494, "step": 8529 }, { "epoch": 0.6455702190057234, "grad_norm": 0.70703125, "learning_rate": 1.531830358166898e-05, "loss": 0.2773, "step": 8530 }, { "epoch": 0.6456459013291708, "grad_norm": 0.7265625, "learning_rate": 1.531729487674574e-05, "loss": 0.3075, "step": 8531 }, { "epoch": 0.6457215836526181, "grad_norm": 0.69921875, "learning_rate": 1.531628609638754e-05, "loss": 0.2766, "step": 8532 }, { "epoch": 0.6457972659760655, "grad_norm": 0.75390625, "learning_rate": 1.5315277240608707e-05, "loss": 0.3186, "step": 8533 }, { "epoch": 0.6458729482995128, "grad_norm": 0.89453125, "learning_rate": 1.531426830942354e-05, "loss": 0.3237, "step": 8534 }, { "epoch": 0.6459486306229602, "grad_norm": 0.6953125, "learning_rate": 1.531325930284636e-05, "loss": 0.2676, "step": 8535 }, { "epoch": 0.6460243129464075, "grad_norm": 0.7890625, "learning_rate": 1.531225022089148e-05, "loss": 0.3311, "step": 8536 }, { "epoch": 0.6460999952698547, "grad_norm": 0.75390625, "learning_rate": 1.531124106357321e-05, "loss": 0.3119, "step": 8537 }, { "epoch": 0.6461756775933021, "grad_norm": 0.72265625, "learning_rate": 1.5310231830905876e-05, "loss": 0.29, "step": 8538 }, { "epoch": 0.6462513599167494, "grad_norm": 0.81640625, "learning_rate": 1.530922252290379e-05, "loss": 0.381, "step": 8539 }, { "epoch": 0.6463270422401968, "grad_norm": 0.7265625, "learning_rate": 1.5308213139581268e-05, "loss": 0.3152, "step": 8540 }, { "epoch": 0.6464027245636441, "grad_norm": 0.76953125, "learning_rate": 1.5307203680952632e-05, "loss": 0.3304, "step": 8541 }, { "epoch": 0.6464784068870915, "grad_norm": 0.75390625, "learning_rate": 1.530619414703221e-05, "loss": 0.2726, "step": 8542 }, { "epoch": 0.6465540892105388, "grad_norm": 0.76171875, "learning_rate": 1.5305184537834317e-05, "loss": 0.3025, "step": 8543 }, { "epoch": 0.646629771533986, "grad_norm": 0.78125, "learning_rate": 1.5304174853373277e-05, "loss": 0.3098, "step": 8544 }, { "epoch": 0.6467054538574334, "grad_norm": 0.76953125, "learning_rate": 1.5303165093663413e-05, "loss": 0.3413, "step": 8545 }, { "epoch": 0.6467811361808807, "grad_norm": 0.76171875, "learning_rate": 1.5302155258719052e-05, "loss": 0.315, "step": 8546 }, { "epoch": 0.6468568185043281, "grad_norm": 0.71484375, "learning_rate": 1.5301145348554517e-05, "loss": 0.314, "step": 8547 }, { "epoch": 0.6469325008277754, "grad_norm": 0.765625, "learning_rate": 1.5300135363184145e-05, "loss": 0.2948, "step": 8548 }, { "epoch": 0.6470081831512228, "grad_norm": 0.734375, "learning_rate": 1.5299125302622252e-05, "loss": 0.282, "step": 8549 }, { "epoch": 0.6470838654746701, "grad_norm": 0.97265625, "learning_rate": 1.5298115166883176e-05, "loss": 0.3286, "step": 8550 }, { "epoch": 0.6471595477981174, "grad_norm": 0.72265625, "learning_rate": 1.5297104955981246e-05, "loss": 0.2958, "step": 8551 }, { "epoch": 0.6472352301215647, "grad_norm": 0.74609375, "learning_rate": 1.5296094669930787e-05, "loss": 0.3367, "step": 8552 }, { "epoch": 0.647310912445012, "grad_norm": 0.734375, "learning_rate": 1.529508430874614e-05, "loss": 0.2948, "step": 8553 }, { "epoch": 0.6473865947684594, "grad_norm": 0.796875, "learning_rate": 1.5294073872441634e-05, "loss": 0.3498, "step": 8554 }, { "epoch": 0.6474622770919067, "grad_norm": 0.74609375, "learning_rate": 1.5293063361031605e-05, "loss": 0.2865, "step": 8555 }, { "epoch": 0.6475379594153541, "grad_norm": 0.7578125, "learning_rate": 1.529205277453039e-05, "loss": 0.3286, "step": 8556 }, { "epoch": 0.6476136417388014, "grad_norm": 0.7265625, "learning_rate": 1.529104211295233e-05, "loss": 0.2873, "step": 8557 }, { "epoch": 0.6476893240622487, "grad_norm": 0.703125, "learning_rate": 1.529003137631175e-05, "loss": 0.2793, "step": 8558 }, { "epoch": 0.647765006385696, "grad_norm": 0.7265625, "learning_rate": 1.5289020564622998e-05, "loss": 0.3256, "step": 8559 }, { "epoch": 0.6478406887091434, "grad_norm": 0.79296875, "learning_rate": 1.5288009677900415e-05, "loss": 0.2851, "step": 8560 }, { "epoch": 0.6479163710325907, "grad_norm": 0.76953125, "learning_rate": 1.528699871615834e-05, "loss": 0.3123, "step": 8561 }, { "epoch": 0.647992053356038, "grad_norm": 0.7734375, "learning_rate": 1.5285987679411114e-05, "loss": 0.3107, "step": 8562 }, { "epoch": 0.6480677356794854, "grad_norm": 0.75, "learning_rate": 1.5284976567673083e-05, "loss": 0.3199, "step": 8563 }, { "epoch": 0.6481434180029327, "grad_norm": 0.74609375, "learning_rate": 1.528396538095859e-05, "loss": 0.3047, "step": 8564 }, { "epoch": 0.64821910032638, "grad_norm": 0.78515625, "learning_rate": 1.5282954119281978e-05, "loss": 0.3715, "step": 8565 }, { "epoch": 0.6482947826498273, "grad_norm": 0.75, "learning_rate": 1.52819427826576e-05, "loss": 0.3172, "step": 8566 }, { "epoch": 0.6483704649732747, "grad_norm": 0.76953125, "learning_rate": 1.5280931371099797e-05, "loss": 0.2939, "step": 8567 }, { "epoch": 0.648446147296722, "grad_norm": 0.78515625, "learning_rate": 1.5279919884622922e-05, "loss": 0.3241, "step": 8568 }, { "epoch": 0.6485218296201694, "grad_norm": 0.80078125, "learning_rate": 1.527890832324132e-05, "loss": 0.3227, "step": 8569 }, { "epoch": 0.6485975119436167, "grad_norm": 0.78515625, "learning_rate": 1.5277896686969345e-05, "loss": 0.3353, "step": 8570 }, { "epoch": 0.648673194267064, "grad_norm": 0.75390625, "learning_rate": 1.5276884975821355e-05, "loss": 0.3177, "step": 8571 }, { "epoch": 0.6487488765905113, "grad_norm": 0.78125, "learning_rate": 1.5275873189811688e-05, "loss": 0.3193, "step": 8572 }, { "epoch": 0.6488245589139586, "grad_norm": 0.75390625, "learning_rate": 1.527486132895471e-05, "loss": 0.2876, "step": 8573 }, { "epoch": 0.648900241237406, "grad_norm": 0.7265625, "learning_rate": 1.5273849393264767e-05, "loss": 0.3017, "step": 8574 }, { "epoch": 0.6489759235608533, "grad_norm": 0.76171875, "learning_rate": 1.5272837382756227e-05, "loss": 0.3235, "step": 8575 }, { "epoch": 0.6490516058843007, "grad_norm": 0.7734375, "learning_rate": 1.5271825297443436e-05, "loss": 0.3152, "step": 8576 }, { "epoch": 0.649127288207748, "grad_norm": 0.74609375, "learning_rate": 1.527081313734076e-05, "loss": 0.3253, "step": 8577 }, { "epoch": 0.6492029705311954, "grad_norm": 0.76953125, "learning_rate": 1.526980090246255e-05, "loss": 0.3419, "step": 8578 }, { "epoch": 0.6492786528546426, "grad_norm": 1.1171875, "learning_rate": 1.5268788592823173e-05, "loss": 0.35, "step": 8579 }, { "epoch": 0.6493543351780899, "grad_norm": 0.8203125, "learning_rate": 1.5267776208436988e-05, "loss": 0.3945, "step": 8580 }, { "epoch": 0.6494300175015373, "grad_norm": 0.71875, "learning_rate": 1.5266763749318362e-05, "loss": 0.2731, "step": 8581 }, { "epoch": 0.6495056998249846, "grad_norm": 0.796875, "learning_rate": 1.526575121548165e-05, "loss": 0.3233, "step": 8582 }, { "epoch": 0.649581382148432, "grad_norm": 0.765625, "learning_rate": 1.5264738606941215e-05, "loss": 0.3213, "step": 8583 }, { "epoch": 0.6496570644718793, "grad_norm": 0.72265625, "learning_rate": 1.5263725923711438e-05, "loss": 0.3062, "step": 8584 }, { "epoch": 0.6497327467953267, "grad_norm": 0.75390625, "learning_rate": 1.526271316580667e-05, "loss": 0.3073, "step": 8585 }, { "epoch": 0.6498084291187739, "grad_norm": 0.83203125, "learning_rate": 1.526170033324129e-05, "loss": 0.3274, "step": 8586 }, { "epoch": 0.6498841114422212, "grad_norm": 0.8125, "learning_rate": 1.5260687426029657e-05, "loss": 0.3448, "step": 8587 }, { "epoch": 0.6499597937656686, "grad_norm": 0.98046875, "learning_rate": 1.5259674444186145e-05, "loss": 0.3673, "step": 8588 }, { "epoch": 0.6500354760891159, "grad_norm": 0.8046875, "learning_rate": 1.5258661387725124e-05, "loss": 0.3332, "step": 8589 }, { "epoch": 0.6501111584125633, "grad_norm": 0.75390625, "learning_rate": 1.525764825666097e-05, "loss": 0.3263, "step": 8590 }, { "epoch": 0.6501868407360106, "grad_norm": 0.73828125, "learning_rate": 1.5256635051008054e-05, "loss": 0.3022, "step": 8591 }, { "epoch": 0.650262523059458, "grad_norm": 0.76171875, "learning_rate": 1.5255621770780748e-05, "loss": 0.3051, "step": 8592 }, { "epoch": 0.6503382053829052, "grad_norm": 0.76953125, "learning_rate": 1.5254608415993427e-05, "loss": 0.3347, "step": 8593 }, { "epoch": 0.6504138877063526, "grad_norm": 0.8203125, "learning_rate": 1.525359498666047e-05, "loss": 0.3553, "step": 8594 }, { "epoch": 0.6504895700297999, "grad_norm": 0.6796875, "learning_rate": 1.5252581482796252e-05, "loss": 0.2859, "step": 8595 }, { "epoch": 0.6505652523532472, "grad_norm": 0.7109375, "learning_rate": 1.525156790441515e-05, "loss": 0.2891, "step": 8596 }, { "epoch": 0.6506409346766946, "grad_norm": 0.73828125, "learning_rate": 1.5250554251531547e-05, "loss": 0.3139, "step": 8597 }, { "epoch": 0.6507166170001419, "grad_norm": 0.73046875, "learning_rate": 1.524954052415982e-05, "loss": 0.3017, "step": 8598 }, { "epoch": 0.6507922993235893, "grad_norm": 0.74609375, "learning_rate": 1.5248526722314357e-05, "loss": 0.3068, "step": 8599 }, { "epoch": 0.6508679816470365, "grad_norm": 0.73046875, "learning_rate": 1.5247512846009532e-05, "loss": 0.3091, "step": 8600 }, { "epoch": 0.6509436639704839, "grad_norm": 1.0, "learning_rate": 1.5246498895259732e-05, "loss": 0.353, "step": 8601 }, { "epoch": 0.6510193462939312, "grad_norm": 0.79296875, "learning_rate": 1.5245484870079343e-05, "loss": 0.3491, "step": 8602 }, { "epoch": 0.6510950286173786, "grad_norm": 0.69921875, "learning_rate": 1.5244470770482749e-05, "loss": 0.2797, "step": 8603 }, { "epoch": 0.6511707109408259, "grad_norm": 0.78515625, "learning_rate": 1.5243456596484339e-05, "loss": 0.3423, "step": 8604 }, { "epoch": 0.6512463932642732, "grad_norm": 0.75, "learning_rate": 1.5242442348098495e-05, "loss": 0.3285, "step": 8605 }, { "epoch": 0.6513220755877206, "grad_norm": 0.7734375, "learning_rate": 1.5241428025339613e-05, "loss": 0.3343, "step": 8606 }, { "epoch": 0.6513977579111678, "grad_norm": 0.7265625, "learning_rate": 1.524041362822208e-05, "loss": 0.2832, "step": 8607 }, { "epoch": 0.6514734402346152, "grad_norm": 0.796875, "learning_rate": 1.5239399156760287e-05, "loss": 0.3497, "step": 8608 }, { "epoch": 0.6515491225580625, "grad_norm": 0.7890625, "learning_rate": 1.5238384610968623e-05, "loss": 0.3508, "step": 8609 }, { "epoch": 0.6516248048815099, "grad_norm": 0.8046875, "learning_rate": 1.5237369990861486e-05, "loss": 0.3629, "step": 8610 }, { "epoch": 0.6517004872049572, "grad_norm": 0.8046875, "learning_rate": 1.5236355296453268e-05, "loss": 0.3374, "step": 8611 }, { "epoch": 0.6517761695284046, "grad_norm": 0.75, "learning_rate": 1.5235340527758364e-05, "loss": 0.2794, "step": 8612 }, { "epoch": 0.6518518518518519, "grad_norm": 0.765625, "learning_rate": 1.5234325684791173e-05, "loss": 0.2804, "step": 8613 }, { "epoch": 0.6519275341752991, "grad_norm": 0.76953125, "learning_rate": 1.5233310767566086e-05, "loss": 0.3067, "step": 8614 }, { "epoch": 0.6520032164987465, "grad_norm": 0.71484375, "learning_rate": 1.5232295776097505e-05, "loss": 0.2752, "step": 8615 }, { "epoch": 0.6520788988221938, "grad_norm": 0.76953125, "learning_rate": 1.5231280710399832e-05, "loss": 0.3227, "step": 8616 }, { "epoch": 0.6521545811456412, "grad_norm": 0.76171875, "learning_rate": 1.523026557048746e-05, "loss": 0.2915, "step": 8617 }, { "epoch": 0.6522302634690885, "grad_norm": 0.82421875, "learning_rate": 1.5229250356374804e-05, "loss": 0.3339, "step": 8618 }, { "epoch": 0.6523059457925359, "grad_norm": 0.765625, "learning_rate": 1.5228235068076253e-05, "loss": 0.3381, "step": 8619 }, { "epoch": 0.6523816281159832, "grad_norm": 0.7734375, "learning_rate": 1.5227219705606215e-05, "loss": 0.3289, "step": 8620 }, { "epoch": 0.6524573104394304, "grad_norm": 0.72265625, "learning_rate": 1.5226204268979096e-05, "loss": 0.3022, "step": 8621 }, { "epoch": 0.6525329927628778, "grad_norm": 0.75, "learning_rate": 1.5225188758209303e-05, "loss": 0.319, "step": 8622 }, { "epoch": 0.6526086750863251, "grad_norm": 0.77734375, "learning_rate": 1.5224173173311239e-05, "loss": 0.3477, "step": 8623 }, { "epoch": 0.6526843574097725, "grad_norm": 0.7578125, "learning_rate": 1.5223157514299311e-05, "loss": 0.344, "step": 8624 }, { "epoch": 0.6527600397332198, "grad_norm": 0.76171875, "learning_rate": 1.5222141781187936e-05, "loss": 0.3271, "step": 8625 }, { "epoch": 0.6528357220566672, "grad_norm": 0.8359375, "learning_rate": 1.5221125973991513e-05, "loss": 0.3646, "step": 8626 }, { "epoch": 0.6529114043801145, "grad_norm": 0.79296875, "learning_rate": 1.5220110092724462e-05, "loss": 0.3291, "step": 8627 }, { "epoch": 0.6529870867035618, "grad_norm": 1.015625, "learning_rate": 1.5219094137401192e-05, "loss": 0.3551, "step": 8628 }, { "epoch": 0.6530627690270091, "grad_norm": 0.7578125, "learning_rate": 1.5218078108036109e-05, "loss": 0.3413, "step": 8629 }, { "epoch": 0.6531384513504565, "grad_norm": 0.83203125, "learning_rate": 1.521706200464364e-05, "loss": 0.356, "step": 8630 }, { "epoch": 0.6532141336739038, "grad_norm": 0.7890625, "learning_rate": 1.521604582723819e-05, "loss": 0.3353, "step": 8631 }, { "epoch": 0.6532898159973511, "grad_norm": 0.76953125, "learning_rate": 1.5215029575834182e-05, "loss": 0.3198, "step": 8632 }, { "epoch": 0.6533654983207985, "grad_norm": 0.83203125, "learning_rate": 1.5214013250446029e-05, "loss": 0.3576, "step": 8633 }, { "epoch": 0.6534411806442458, "grad_norm": 0.71484375, "learning_rate": 1.5212996851088149e-05, "loss": 0.2675, "step": 8634 }, { "epoch": 0.6535168629676931, "grad_norm": 0.7890625, "learning_rate": 1.5211980377774961e-05, "loss": 0.33, "step": 8635 }, { "epoch": 0.6535925452911404, "grad_norm": 0.76953125, "learning_rate": 1.5210963830520893e-05, "loss": 0.3266, "step": 8636 }, { "epoch": 0.6536682276145878, "grad_norm": 0.875, "learning_rate": 1.5209947209340356e-05, "loss": 0.3746, "step": 8637 }, { "epoch": 0.6537439099380351, "grad_norm": 0.82421875, "learning_rate": 1.520893051424778e-05, "loss": 0.3701, "step": 8638 }, { "epoch": 0.6538195922614825, "grad_norm": 0.71875, "learning_rate": 1.5207913745257586e-05, "loss": 0.2951, "step": 8639 }, { "epoch": 0.6538952745849298, "grad_norm": 0.7421875, "learning_rate": 1.5206896902384196e-05, "loss": 0.2875, "step": 8640 }, { "epoch": 0.653970956908377, "grad_norm": 0.74609375, "learning_rate": 1.5205879985642041e-05, "loss": 0.2936, "step": 8641 }, { "epoch": 0.6540466392318244, "grad_norm": 0.8515625, "learning_rate": 1.5204862995045541e-05, "loss": 0.4014, "step": 8642 }, { "epoch": 0.6541223215552717, "grad_norm": 0.75390625, "learning_rate": 1.520384593060913e-05, "loss": 0.3136, "step": 8643 }, { "epoch": 0.6541980038787191, "grad_norm": 0.78515625, "learning_rate": 1.5202828792347232e-05, "loss": 0.3495, "step": 8644 }, { "epoch": 0.6542736862021664, "grad_norm": 0.8125, "learning_rate": 1.5201811580274284e-05, "loss": 0.3361, "step": 8645 }, { "epoch": 0.6543493685256138, "grad_norm": 0.80078125, "learning_rate": 1.5200794294404709e-05, "loss": 0.3442, "step": 8646 }, { "epoch": 0.6544250508490611, "grad_norm": 0.7265625, "learning_rate": 1.5199776934752944e-05, "loss": 0.2882, "step": 8647 }, { "epoch": 0.6545007331725083, "grad_norm": 0.69140625, "learning_rate": 1.5198759501333418e-05, "loss": 0.2784, "step": 8648 }, { "epoch": 0.6545764154959557, "grad_norm": 0.72265625, "learning_rate": 1.5197741994160569e-05, "loss": 0.3189, "step": 8649 }, { "epoch": 0.654652097819403, "grad_norm": 0.80078125, "learning_rate": 1.5196724413248829e-05, "loss": 0.3631, "step": 8650 }, { "epoch": 0.6547277801428504, "grad_norm": 0.7109375, "learning_rate": 1.5195706758612634e-05, "loss": 0.3053, "step": 8651 }, { "epoch": 0.6548034624662977, "grad_norm": 0.69921875, "learning_rate": 1.5194689030266422e-05, "loss": 0.2893, "step": 8652 }, { "epoch": 0.6548791447897451, "grad_norm": 0.83203125, "learning_rate": 1.5193671228224634e-05, "loss": 0.3768, "step": 8653 }, { "epoch": 0.6549548271131924, "grad_norm": 0.828125, "learning_rate": 1.5192653352501707e-05, "loss": 0.3397, "step": 8654 }, { "epoch": 0.6550305094366397, "grad_norm": 0.7578125, "learning_rate": 1.5191635403112081e-05, "loss": 0.3478, "step": 8655 }, { "epoch": 0.655106191760087, "grad_norm": 0.8359375, "learning_rate": 1.5190617380070199e-05, "loss": 0.3399, "step": 8656 }, { "epoch": 0.6551818740835343, "grad_norm": 0.76171875, "learning_rate": 1.5189599283390499e-05, "loss": 0.3113, "step": 8657 }, { "epoch": 0.6552575564069817, "grad_norm": 0.75, "learning_rate": 1.518858111308743e-05, "loss": 0.3121, "step": 8658 }, { "epoch": 0.655333238730429, "grad_norm": 0.7578125, "learning_rate": 1.518756286917543e-05, "loss": 0.3368, "step": 8659 }, { "epoch": 0.6554089210538764, "grad_norm": 0.79296875, "learning_rate": 1.5186544551668955e-05, "loss": 0.3514, "step": 8660 }, { "epoch": 0.6554846033773237, "grad_norm": 0.76953125, "learning_rate": 1.5185526160582437e-05, "loss": 0.2902, "step": 8661 }, { "epoch": 0.655560285700771, "grad_norm": 0.78515625, "learning_rate": 1.518450769593034e-05, "loss": 0.3281, "step": 8662 }, { "epoch": 0.6556359680242183, "grad_norm": 0.80078125, "learning_rate": 1.5183489157727099e-05, "loss": 0.3365, "step": 8663 }, { "epoch": 0.6557116503476657, "grad_norm": 0.7734375, "learning_rate": 1.5182470545987174e-05, "loss": 0.3203, "step": 8664 }, { "epoch": 0.655787332671113, "grad_norm": 0.703125, "learning_rate": 1.5181451860725007e-05, "loss": 0.282, "step": 8665 }, { "epoch": 0.6558630149945603, "grad_norm": 0.75, "learning_rate": 1.5180433101955055e-05, "loss": 0.2983, "step": 8666 }, { "epoch": 0.6559386973180077, "grad_norm": 0.81640625, "learning_rate": 1.5179414269691766e-05, "loss": 0.3716, "step": 8667 }, { "epoch": 0.656014379641455, "grad_norm": 0.70703125, "learning_rate": 1.5178395363949601e-05, "loss": 0.3059, "step": 8668 }, { "epoch": 0.6560900619649023, "grad_norm": 0.74609375, "learning_rate": 1.5177376384743013e-05, "loss": 0.3382, "step": 8669 }, { "epoch": 0.6561657442883496, "grad_norm": 0.8203125, "learning_rate": 1.5176357332086451e-05, "loss": 0.3403, "step": 8670 }, { "epoch": 0.656241426611797, "grad_norm": 0.7578125, "learning_rate": 1.5175338205994382e-05, "loss": 0.3135, "step": 8671 }, { "epoch": 0.6563171089352443, "grad_norm": 0.78515625, "learning_rate": 1.5174319006481257e-05, "loss": 0.3502, "step": 8672 }, { "epoch": 0.6563927912586917, "grad_norm": 0.7578125, "learning_rate": 1.5173299733561537e-05, "loss": 0.3235, "step": 8673 }, { "epoch": 0.656468473582139, "grad_norm": 0.703125, "learning_rate": 1.5172280387249685e-05, "loss": 0.2852, "step": 8674 }, { "epoch": 0.6565441559055863, "grad_norm": 0.76953125, "learning_rate": 1.517126096756016e-05, "loss": 0.33, "step": 8675 }, { "epoch": 0.6566198382290336, "grad_norm": 0.78125, "learning_rate": 1.5170241474507421e-05, "loss": 0.3349, "step": 8676 }, { "epoch": 0.6566955205524809, "grad_norm": 0.80078125, "learning_rate": 1.5169221908105933e-05, "loss": 0.3764, "step": 8677 }, { "epoch": 0.6567712028759283, "grad_norm": 0.7421875, "learning_rate": 1.5168202268370169e-05, "loss": 0.3227, "step": 8678 }, { "epoch": 0.6568468851993756, "grad_norm": 0.859375, "learning_rate": 1.5167182555314578e-05, "loss": 0.405, "step": 8679 }, { "epoch": 0.656922567522823, "grad_norm": 0.73828125, "learning_rate": 1.516616276895364e-05, "loss": 0.2942, "step": 8680 }, { "epoch": 0.6569982498462703, "grad_norm": 0.7421875, "learning_rate": 1.5165142909301817e-05, "loss": 0.294, "step": 8681 }, { "epoch": 0.6570739321697177, "grad_norm": 0.7578125, "learning_rate": 1.5164122976373579e-05, "loss": 0.3076, "step": 8682 }, { "epoch": 0.6571496144931649, "grad_norm": 0.80859375, "learning_rate": 1.5163102970183397e-05, "loss": 0.3506, "step": 8683 }, { "epoch": 0.6572252968166122, "grad_norm": 0.828125, "learning_rate": 1.5162082890745735e-05, "loss": 0.3619, "step": 8684 }, { "epoch": 0.6573009791400596, "grad_norm": 0.765625, "learning_rate": 1.5161062738075068e-05, "loss": 0.316, "step": 8685 }, { "epoch": 0.6573766614635069, "grad_norm": 0.78125, "learning_rate": 1.516004251218587e-05, "loss": 0.335, "step": 8686 }, { "epoch": 0.6574523437869543, "grad_norm": 0.78125, "learning_rate": 1.5159022213092615e-05, "loss": 0.3528, "step": 8687 }, { "epoch": 0.6575280261104016, "grad_norm": 0.76171875, "learning_rate": 1.5158001840809776e-05, "loss": 0.3279, "step": 8688 }, { "epoch": 0.657603708433849, "grad_norm": 0.71484375, "learning_rate": 1.5156981395351832e-05, "loss": 0.2753, "step": 8689 }, { "epoch": 0.6576793907572962, "grad_norm": 0.74609375, "learning_rate": 1.5155960876733255e-05, "loss": 0.2701, "step": 8690 }, { "epoch": 0.6577550730807435, "grad_norm": 0.828125, "learning_rate": 1.5154940284968525e-05, "loss": 0.3818, "step": 8691 }, { "epoch": 0.6578307554041909, "grad_norm": 0.76171875, "learning_rate": 1.5153919620072123e-05, "loss": 0.3242, "step": 8692 }, { "epoch": 0.6579064377276382, "grad_norm": 1.125, "learning_rate": 1.5152898882058527e-05, "loss": 0.3773, "step": 8693 }, { "epoch": 0.6579821200510856, "grad_norm": 0.7265625, "learning_rate": 1.5151878070942214e-05, "loss": 0.3019, "step": 8694 }, { "epoch": 0.6580578023745329, "grad_norm": 0.71875, "learning_rate": 1.5150857186737673e-05, "loss": 0.2904, "step": 8695 }, { "epoch": 0.6581334846979803, "grad_norm": 0.75390625, "learning_rate": 1.5149836229459381e-05, "loss": 0.3245, "step": 8696 }, { "epoch": 0.6582091670214275, "grad_norm": 0.734375, "learning_rate": 1.5148815199121827e-05, "loss": 0.3332, "step": 8697 }, { "epoch": 0.6582848493448749, "grad_norm": 0.68359375, "learning_rate": 1.5147794095739491e-05, "loss": 0.2875, "step": 8698 }, { "epoch": 0.6583605316683222, "grad_norm": 0.76953125, "learning_rate": 1.5146772919326864e-05, "loss": 0.2946, "step": 8699 }, { "epoch": 0.6584362139917695, "grad_norm": 0.75, "learning_rate": 1.514575166989843e-05, "loss": 0.3079, "step": 8700 }, { "epoch": 0.6585118963152169, "grad_norm": 0.7421875, "learning_rate": 1.514473034746868e-05, "loss": 0.3216, "step": 8701 }, { "epoch": 0.6585875786386642, "grad_norm": 0.78125, "learning_rate": 1.5143708952052099e-05, "loss": 0.3358, "step": 8702 }, { "epoch": 0.6586632609621116, "grad_norm": 0.765625, "learning_rate": 1.5142687483663178e-05, "loss": 0.3059, "step": 8703 }, { "epoch": 0.6587389432855588, "grad_norm": 0.74609375, "learning_rate": 1.5141665942316414e-05, "loss": 0.3083, "step": 8704 }, { "epoch": 0.6588146256090062, "grad_norm": 0.7734375, "learning_rate": 1.5140644328026295e-05, "loss": 0.3384, "step": 8705 }, { "epoch": 0.6588903079324535, "grad_norm": 0.7890625, "learning_rate": 1.5139622640807315e-05, "loss": 0.3267, "step": 8706 }, { "epoch": 0.6589659902559009, "grad_norm": 0.77734375, "learning_rate": 1.5138600880673963e-05, "loss": 0.2958, "step": 8707 }, { "epoch": 0.6590416725793482, "grad_norm": 0.765625, "learning_rate": 1.5137579047640743e-05, "loss": 0.3097, "step": 8708 }, { "epoch": 0.6591173549027955, "grad_norm": 0.765625, "learning_rate": 1.5136557141722148e-05, "loss": 0.3365, "step": 8709 }, { "epoch": 0.6591930372262429, "grad_norm": 0.80078125, "learning_rate": 1.5135535162932673e-05, "loss": 0.3321, "step": 8710 }, { "epoch": 0.6592687195496901, "grad_norm": 0.7421875, "learning_rate": 1.5134513111286822e-05, "loss": 0.314, "step": 8711 }, { "epoch": 0.6593444018731375, "grad_norm": 0.78515625, "learning_rate": 1.513349098679909e-05, "loss": 0.3334, "step": 8712 }, { "epoch": 0.6594200841965848, "grad_norm": 0.765625, "learning_rate": 1.5132468789483979e-05, "loss": 0.3292, "step": 8713 }, { "epoch": 0.6594957665200322, "grad_norm": 0.8125, "learning_rate": 1.5131446519355988e-05, "loss": 0.3062, "step": 8714 }, { "epoch": 0.6595714488434795, "grad_norm": 0.7734375, "learning_rate": 1.5130424176429625e-05, "loss": 0.3464, "step": 8715 }, { "epoch": 0.6596471311669269, "grad_norm": 0.80859375, "learning_rate": 1.5129401760719393e-05, "loss": 0.3325, "step": 8716 }, { "epoch": 0.6597228134903742, "grad_norm": 0.7890625, "learning_rate": 1.5128379272239791e-05, "loss": 0.3521, "step": 8717 }, { "epoch": 0.6597984958138214, "grad_norm": 0.8046875, "learning_rate": 1.5127356711005327e-05, "loss": 0.3433, "step": 8718 }, { "epoch": 0.6598741781372688, "grad_norm": 0.890625, "learning_rate": 1.5126334077030515e-05, "loss": 0.3261, "step": 8719 }, { "epoch": 0.6599498604607161, "grad_norm": 0.72265625, "learning_rate": 1.5125311370329854e-05, "loss": 0.3002, "step": 8720 }, { "epoch": 0.6600255427841635, "grad_norm": 0.76953125, "learning_rate": 1.5124288590917857e-05, "loss": 0.274, "step": 8721 }, { "epoch": 0.6601012251076108, "grad_norm": 0.77734375, "learning_rate": 1.5123265738809032e-05, "loss": 0.3118, "step": 8722 }, { "epoch": 0.6601769074310582, "grad_norm": 0.74609375, "learning_rate": 1.5122242814017887e-05, "loss": 0.3059, "step": 8723 }, { "epoch": 0.6602525897545055, "grad_norm": 0.7578125, "learning_rate": 1.512121981655894e-05, "loss": 0.3246, "step": 8724 }, { "epoch": 0.6603282720779527, "grad_norm": 0.76953125, "learning_rate": 1.5120196746446704e-05, "loss": 0.3061, "step": 8725 }, { "epoch": 0.6604039544014001, "grad_norm": 0.765625, "learning_rate": 1.511917360369569e-05, "loss": 0.3292, "step": 8726 }, { "epoch": 0.6604796367248474, "grad_norm": 0.7421875, "learning_rate": 1.5118150388320412e-05, "loss": 0.2917, "step": 8727 }, { "epoch": 0.6605553190482948, "grad_norm": 0.8125, "learning_rate": 1.511712710033539e-05, "loss": 0.3323, "step": 8728 }, { "epoch": 0.6606310013717421, "grad_norm": 1.0546875, "learning_rate": 1.5116103739755134e-05, "loss": 0.3264, "step": 8729 }, { "epoch": 0.6607066836951895, "grad_norm": 0.75390625, "learning_rate": 1.5115080306594172e-05, "loss": 0.3178, "step": 8730 }, { "epoch": 0.6607823660186368, "grad_norm": 0.703125, "learning_rate": 1.5114056800867017e-05, "loss": 0.281, "step": 8731 }, { "epoch": 0.6608580483420841, "grad_norm": 0.69921875, "learning_rate": 1.5113033222588188e-05, "loss": 0.2846, "step": 8732 }, { "epoch": 0.6609337306655314, "grad_norm": 0.78515625, "learning_rate": 1.5112009571772208e-05, "loss": 0.3669, "step": 8733 }, { "epoch": 0.6610094129889788, "grad_norm": 0.8515625, "learning_rate": 1.5110985848433606e-05, "loss": 0.3583, "step": 8734 }, { "epoch": 0.6610850953124261, "grad_norm": 0.7734375, "learning_rate": 1.5109962052586892e-05, "loss": 0.3153, "step": 8735 }, { "epoch": 0.6611607776358734, "grad_norm": 0.84375, "learning_rate": 1.5108938184246599e-05, "loss": 0.3768, "step": 8736 }, { "epoch": 0.6612364599593208, "grad_norm": 0.7890625, "learning_rate": 1.5107914243427253e-05, "loss": 0.343, "step": 8737 }, { "epoch": 0.6613121422827681, "grad_norm": 0.765625, "learning_rate": 1.5106890230143374e-05, "loss": 0.3253, "step": 8738 }, { "epoch": 0.6613878246062154, "grad_norm": 1.2734375, "learning_rate": 1.5105866144409498e-05, "loss": 0.3839, "step": 8739 }, { "epoch": 0.6614635069296627, "grad_norm": 0.76953125, "learning_rate": 1.5104841986240146e-05, "loss": 0.31, "step": 8740 }, { "epoch": 0.6615391892531101, "grad_norm": 0.7109375, "learning_rate": 1.510381775564985e-05, "loss": 0.2873, "step": 8741 }, { "epoch": 0.6616148715765574, "grad_norm": 0.76953125, "learning_rate": 1.5102793452653141e-05, "loss": 0.3344, "step": 8742 }, { "epoch": 0.6616905539000048, "grad_norm": 0.94921875, "learning_rate": 1.5101769077264552e-05, "loss": 0.3168, "step": 8743 }, { "epoch": 0.6617662362234521, "grad_norm": 0.75390625, "learning_rate": 1.5100744629498611e-05, "loss": 0.3063, "step": 8744 }, { "epoch": 0.6618419185468994, "grad_norm": 0.73828125, "learning_rate": 1.5099720109369858e-05, "loss": 0.3212, "step": 8745 }, { "epoch": 0.6619176008703467, "grad_norm": 0.7421875, "learning_rate": 1.5098695516892821e-05, "loss": 0.3061, "step": 8746 }, { "epoch": 0.661993283193794, "grad_norm": 1.09375, "learning_rate": 1.5097670852082039e-05, "loss": 0.3684, "step": 8747 }, { "epoch": 0.6620689655172414, "grad_norm": 0.81640625, "learning_rate": 1.5096646114952047e-05, "loss": 0.33, "step": 8748 }, { "epoch": 0.6621446478406887, "grad_norm": 0.671875, "learning_rate": 1.5095621305517387e-05, "loss": 0.273, "step": 8749 }, { "epoch": 0.6622203301641361, "grad_norm": 0.80859375, "learning_rate": 1.509459642379259e-05, "loss": 0.3253, "step": 8750 }, { "epoch": 0.6622960124875834, "grad_norm": 0.83984375, "learning_rate": 1.50935714697922e-05, "loss": 0.3624, "step": 8751 }, { "epoch": 0.6623716948110308, "grad_norm": 0.71875, "learning_rate": 1.5092546443530762e-05, "loss": 0.2687, "step": 8752 }, { "epoch": 0.662447377134478, "grad_norm": 0.76171875, "learning_rate": 1.5091521345022813e-05, "loss": 0.3394, "step": 8753 }, { "epoch": 0.6625230594579253, "grad_norm": 0.76953125, "learning_rate": 1.5090496174282893e-05, "loss": 0.3197, "step": 8754 }, { "epoch": 0.6625987417813727, "grad_norm": 0.80859375, "learning_rate": 1.5089470931325551e-05, "loss": 0.3518, "step": 8755 }, { "epoch": 0.66267442410482, "grad_norm": 0.78515625, "learning_rate": 1.508844561616533e-05, "loss": 0.31, "step": 8756 }, { "epoch": 0.6627501064282674, "grad_norm": 0.75, "learning_rate": 1.5087420228816776e-05, "loss": 0.2984, "step": 8757 }, { "epoch": 0.6628257887517147, "grad_norm": 0.765625, "learning_rate": 1.5086394769294437e-05, "loss": 0.3256, "step": 8758 }, { "epoch": 0.662901471075162, "grad_norm": 0.69921875, "learning_rate": 1.5085369237612857e-05, "loss": 0.2869, "step": 8759 }, { "epoch": 0.6629771533986093, "grad_norm": 0.8203125, "learning_rate": 1.5084343633786592e-05, "loss": 0.3492, "step": 8760 }, { "epoch": 0.6630528357220566, "grad_norm": 0.734375, "learning_rate": 1.5083317957830183e-05, "loss": 0.2951, "step": 8761 }, { "epoch": 0.663128518045504, "grad_norm": 0.75, "learning_rate": 1.5082292209758186e-05, "loss": 0.33, "step": 8762 }, { "epoch": 0.6632042003689513, "grad_norm": 0.75, "learning_rate": 1.5081266389585155e-05, "loss": 0.2982, "step": 8763 }, { "epoch": 0.6632798826923987, "grad_norm": 1.0390625, "learning_rate": 1.5080240497325638e-05, "loss": 0.3924, "step": 8764 }, { "epoch": 0.663355565015846, "grad_norm": 0.765625, "learning_rate": 1.507921453299419e-05, "loss": 0.2954, "step": 8765 }, { "epoch": 0.6634312473392933, "grad_norm": 0.80078125, "learning_rate": 1.507818849660537e-05, "loss": 0.3249, "step": 8766 }, { "epoch": 0.6635069296627406, "grad_norm": 0.80859375, "learning_rate": 1.5077162388173734e-05, "loss": 0.3522, "step": 8767 }, { "epoch": 0.663582611986188, "grad_norm": 0.84375, "learning_rate": 1.5076136207713834e-05, "loss": 0.3411, "step": 8768 }, { "epoch": 0.6636582943096353, "grad_norm": 0.74609375, "learning_rate": 1.5075109955240233e-05, "loss": 0.3046, "step": 8769 }, { "epoch": 0.6637339766330826, "grad_norm": 0.7109375, "learning_rate": 1.5074083630767484e-05, "loss": 0.3137, "step": 8770 }, { "epoch": 0.66380965895653, "grad_norm": 0.76171875, "learning_rate": 1.5073057234310154e-05, "loss": 0.3302, "step": 8771 }, { "epoch": 0.6638853412799773, "grad_norm": 0.78515625, "learning_rate": 1.5072030765882804e-05, "loss": 0.337, "step": 8772 }, { "epoch": 0.6639610236034246, "grad_norm": 0.80859375, "learning_rate": 1.507100422549999e-05, "loss": 0.325, "step": 8773 }, { "epoch": 0.6640367059268719, "grad_norm": 0.6953125, "learning_rate": 1.5069977613176278e-05, "loss": 0.27, "step": 8774 }, { "epoch": 0.6641123882503193, "grad_norm": 0.7578125, "learning_rate": 1.5068950928926236e-05, "loss": 0.331, "step": 8775 }, { "epoch": 0.6641880705737666, "grad_norm": 0.828125, "learning_rate": 1.5067924172764427e-05, "loss": 0.3698, "step": 8776 }, { "epoch": 0.664263752897214, "grad_norm": 0.7578125, "learning_rate": 1.5066897344705416e-05, "loss": 0.3131, "step": 8777 }, { "epoch": 0.6643394352206613, "grad_norm": 0.828125, "learning_rate": 1.506587044476377e-05, "loss": 0.3604, "step": 8778 }, { "epoch": 0.6644151175441086, "grad_norm": 0.80859375, "learning_rate": 1.5064843472954058e-05, "loss": 0.3765, "step": 8779 }, { "epoch": 0.6644907998675559, "grad_norm": 0.76171875, "learning_rate": 1.506381642929085e-05, "loss": 0.3473, "step": 8780 }, { "epoch": 0.6645664821910032, "grad_norm": 0.7265625, "learning_rate": 1.506278931378872e-05, "loss": 0.283, "step": 8781 }, { "epoch": 0.6646421645144506, "grad_norm": 0.73828125, "learning_rate": 1.5061762126462229e-05, "loss": 0.2904, "step": 8782 }, { "epoch": 0.6647178468378979, "grad_norm": 0.75390625, "learning_rate": 1.5060734867325959e-05, "loss": 0.3004, "step": 8783 }, { "epoch": 0.6647935291613453, "grad_norm": 0.7578125, "learning_rate": 1.5059707536394483e-05, "loss": 0.3254, "step": 8784 }, { "epoch": 0.6648692114847926, "grad_norm": 0.796875, "learning_rate": 1.5058680133682369e-05, "loss": 0.3472, "step": 8785 }, { "epoch": 0.66494489380824, "grad_norm": 0.78515625, "learning_rate": 1.5057652659204198e-05, "loss": 0.3572, "step": 8786 }, { "epoch": 0.6650205761316872, "grad_norm": 0.76953125, "learning_rate": 1.5056625112974545e-05, "loss": 0.3213, "step": 8787 }, { "epoch": 0.6650962584551345, "grad_norm": 0.69140625, "learning_rate": 1.5055597495007988e-05, "loss": 0.253, "step": 8788 }, { "epoch": 0.6651719407785819, "grad_norm": 0.72265625, "learning_rate": 1.5054569805319102e-05, "loss": 0.2993, "step": 8789 }, { "epoch": 0.6652476231020292, "grad_norm": 0.7265625, "learning_rate": 1.5053542043922474e-05, "loss": 0.2983, "step": 8790 }, { "epoch": 0.6653233054254766, "grad_norm": 0.76953125, "learning_rate": 1.5052514210832674e-05, "loss": 0.3395, "step": 8791 }, { "epoch": 0.6653989877489239, "grad_norm": 0.7734375, "learning_rate": 1.5051486306064293e-05, "loss": 0.3351, "step": 8792 }, { "epoch": 0.6654746700723713, "grad_norm": 0.83984375, "learning_rate": 1.505045832963191e-05, "loss": 0.3768, "step": 8793 }, { "epoch": 0.6655503523958185, "grad_norm": 0.828125, "learning_rate": 1.5049430281550108e-05, "loss": 0.3791, "step": 8794 }, { "epoch": 0.6656260347192658, "grad_norm": 0.73046875, "learning_rate": 1.5048402161833475e-05, "loss": 0.2861, "step": 8795 }, { "epoch": 0.6657017170427132, "grad_norm": 0.69921875, "learning_rate": 1.504737397049659e-05, "loss": 0.2589, "step": 8796 }, { "epoch": 0.6657773993661605, "grad_norm": 0.77734375, "learning_rate": 1.5046345707554044e-05, "loss": 0.3327, "step": 8797 }, { "epoch": 0.6658530816896079, "grad_norm": 0.76953125, "learning_rate": 1.5045317373020428e-05, "loss": 0.3547, "step": 8798 }, { "epoch": 0.6659287640130552, "grad_norm": 0.78125, "learning_rate": 1.5044288966910323e-05, "loss": 0.3293, "step": 8799 }, { "epoch": 0.6660044463365026, "grad_norm": 0.734375, "learning_rate": 1.5043260489238325e-05, "loss": 0.2974, "step": 8800 }, { "epoch": 0.6660801286599498, "grad_norm": 0.77734375, "learning_rate": 1.5042231940019023e-05, "loss": 0.3408, "step": 8801 }, { "epoch": 0.6661558109833972, "grad_norm": 0.734375, "learning_rate": 1.5041203319267007e-05, "loss": 0.3112, "step": 8802 }, { "epoch": 0.6662314933068445, "grad_norm": 0.80078125, "learning_rate": 1.5040174626996868e-05, "loss": 0.3467, "step": 8803 }, { "epoch": 0.6663071756302918, "grad_norm": 0.99609375, "learning_rate": 1.5039145863223207e-05, "loss": 0.3562, "step": 8804 }, { "epoch": 0.6663828579537392, "grad_norm": 0.7421875, "learning_rate": 1.5038117027960613e-05, "loss": 0.3134, "step": 8805 }, { "epoch": 0.6664585402771865, "grad_norm": 0.78515625, "learning_rate": 1.5037088121223682e-05, "loss": 0.3165, "step": 8806 }, { "epoch": 0.6665342226006339, "grad_norm": 0.75390625, "learning_rate": 1.5036059143027015e-05, "loss": 0.3325, "step": 8807 }, { "epoch": 0.6666099049240811, "grad_norm": 0.68359375, "learning_rate": 1.5035030093385206e-05, "loss": 0.2381, "step": 8808 }, { "epoch": 0.6666855872475285, "grad_norm": 0.7421875, "learning_rate": 1.5034000972312854e-05, "loss": 0.3142, "step": 8809 }, { "epoch": 0.6667612695709758, "grad_norm": 0.7265625, "learning_rate": 1.503297177982456e-05, "loss": 0.2879, "step": 8810 }, { "epoch": 0.6668369518944232, "grad_norm": 0.78125, "learning_rate": 1.5031942515934923e-05, "loss": 0.3352, "step": 8811 }, { "epoch": 0.6669126342178705, "grad_norm": 0.78515625, "learning_rate": 1.5030913180658546e-05, "loss": 0.3299, "step": 8812 }, { "epoch": 0.6669883165413178, "grad_norm": 0.88671875, "learning_rate": 1.5029883774010032e-05, "loss": 0.3803, "step": 8813 }, { "epoch": 0.6670639988647652, "grad_norm": 0.77734375, "learning_rate": 1.5028854296003987e-05, "loss": 0.3444, "step": 8814 }, { "epoch": 0.6671396811882124, "grad_norm": 0.796875, "learning_rate": 1.5027824746655016e-05, "loss": 0.3343, "step": 8815 }, { "epoch": 0.6672153635116598, "grad_norm": 0.8046875, "learning_rate": 1.502679512597772e-05, "loss": 0.3587, "step": 8816 }, { "epoch": 0.6672910458351071, "grad_norm": 0.77734375, "learning_rate": 1.502576543398671e-05, "loss": 0.3556, "step": 8817 }, { "epoch": 0.6673667281585545, "grad_norm": 0.78125, "learning_rate": 1.5024735670696591e-05, "loss": 0.3096, "step": 8818 }, { "epoch": 0.6674424104820018, "grad_norm": 0.77734375, "learning_rate": 1.5023705836121974e-05, "loss": 0.3013, "step": 8819 }, { "epoch": 0.6675180928054492, "grad_norm": 0.76953125, "learning_rate": 1.5022675930277468e-05, "loss": 0.3192, "step": 8820 }, { "epoch": 0.6675937751288965, "grad_norm": 0.7734375, "learning_rate": 1.5021645953177685e-05, "loss": 0.3097, "step": 8821 }, { "epoch": 0.6676694574523437, "grad_norm": 0.765625, "learning_rate": 1.5020615904837241e-05, "loss": 0.3207, "step": 8822 }, { "epoch": 0.6677451397757911, "grad_norm": 0.7109375, "learning_rate": 1.501958578527074e-05, "loss": 0.2821, "step": 8823 }, { "epoch": 0.6678208220992384, "grad_norm": 0.7265625, "learning_rate": 1.5018555594492802e-05, "loss": 0.3093, "step": 8824 }, { "epoch": 0.6678965044226858, "grad_norm": 0.77734375, "learning_rate": 1.5017525332518043e-05, "loss": 0.3315, "step": 8825 }, { "epoch": 0.6679721867461331, "grad_norm": 0.74609375, "learning_rate": 1.5016494999361074e-05, "loss": 0.3114, "step": 8826 }, { "epoch": 0.6680478690695805, "grad_norm": 0.78125, "learning_rate": 1.5015464595036516e-05, "loss": 0.3149, "step": 8827 }, { "epoch": 0.6681235513930278, "grad_norm": 0.80078125, "learning_rate": 1.5014434119558988e-05, "loss": 0.3441, "step": 8828 }, { "epoch": 0.668199233716475, "grad_norm": 0.7578125, "learning_rate": 1.5013403572943102e-05, "loss": 0.3334, "step": 8829 }, { "epoch": 0.6682749160399224, "grad_norm": 0.76171875, "learning_rate": 1.5012372955203487e-05, "loss": 0.3126, "step": 8830 }, { "epoch": 0.6683505983633697, "grad_norm": 0.78125, "learning_rate": 1.5011342266354758e-05, "loss": 0.352, "step": 8831 }, { "epoch": 0.6684262806868171, "grad_norm": 0.73046875, "learning_rate": 1.501031150641154e-05, "loss": 0.3077, "step": 8832 }, { "epoch": 0.6685019630102644, "grad_norm": 0.734375, "learning_rate": 1.5009280675388455e-05, "loss": 0.3036, "step": 8833 }, { "epoch": 0.6685776453337118, "grad_norm": 0.76171875, "learning_rate": 1.5008249773300127e-05, "loss": 0.3134, "step": 8834 }, { "epoch": 0.6686533276571591, "grad_norm": 0.78125, "learning_rate": 1.5007218800161183e-05, "loss": 0.2865, "step": 8835 }, { "epoch": 0.6687290099806064, "grad_norm": 0.796875, "learning_rate": 1.5006187755986248e-05, "loss": 0.3204, "step": 8836 }, { "epoch": 0.6688046923040537, "grad_norm": 0.8125, "learning_rate": 1.5005156640789948e-05, "loss": 0.3503, "step": 8837 }, { "epoch": 0.668880374627501, "grad_norm": 0.7890625, "learning_rate": 1.500412545458691e-05, "loss": 0.3468, "step": 8838 }, { "epoch": 0.6689560569509484, "grad_norm": 0.78515625, "learning_rate": 1.5003094197391766e-05, "loss": 0.3654, "step": 8839 }, { "epoch": 0.6690317392743957, "grad_norm": 0.71875, "learning_rate": 1.5002062869219147e-05, "loss": 0.2717, "step": 8840 }, { "epoch": 0.6691074215978431, "grad_norm": 0.828125, "learning_rate": 1.500103147008368e-05, "loss": 0.3263, "step": 8841 }, { "epoch": 0.6691831039212904, "grad_norm": 0.75, "learning_rate": 1.5000000000000002e-05, "loss": 0.3162, "step": 8842 }, { "epoch": 0.6692587862447377, "grad_norm": 0.7265625, "learning_rate": 1.4998968458982743e-05, "loss": 0.298, "step": 8843 }, { "epoch": 0.669334468568185, "grad_norm": 0.7578125, "learning_rate": 1.4997936847046535e-05, "loss": 0.3153, "step": 8844 }, { "epoch": 0.6694101508916324, "grad_norm": 0.8359375, "learning_rate": 1.499690516420602e-05, "loss": 0.3661, "step": 8845 }, { "epoch": 0.6694858332150797, "grad_norm": 0.86328125, "learning_rate": 1.499587341047583e-05, "loss": 0.3615, "step": 8846 }, { "epoch": 0.669561515538527, "grad_norm": 0.76171875, "learning_rate": 1.4994841585870599e-05, "loss": 0.3312, "step": 8847 }, { "epoch": 0.6696371978619744, "grad_norm": 0.73046875, "learning_rate": 1.499380969040497e-05, "loss": 0.3069, "step": 8848 }, { "epoch": 0.6697128801854217, "grad_norm": 0.73828125, "learning_rate": 1.4992777724093583e-05, "loss": 0.3107, "step": 8849 }, { "epoch": 0.669788562508869, "grad_norm": 0.7265625, "learning_rate": 1.4991745686951071e-05, "loss": 0.3136, "step": 8850 }, { "epoch": 0.6698642448323163, "grad_norm": 0.7421875, "learning_rate": 1.499071357899209e-05, "loss": 0.2779, "step": 8851 }, { "epoch": 0.6699399271557637, "grad_norm": 0.8203125, "learning_rate": 1.4989681400231265e-05, "loss": 0.359, "step": 8852 }, { "epoch": 0.670015609479211, "grad_norm": 0.7109375, "learning_rate": 1.4988649150683246e-05, "loss": 0.2905, "step": 8853 }, { "epoch": 0.6700912918026584, "grad_norm": 0.76953125, "learning_rate": 1.498761683036268e-05, "loss": 0.3269, "step": 8854 }, { "epoch": 0.6701669741261057, "grad_norm": 0.7265625, "learning_rate": 1.4986584439284212e-05, "loss": 0.3009, "step": 8855 }, { "epoch": 0.670242656449553, "grad_norm": 0.73046875, "learning_rate": 1.4985551977462484e-05, "loss": 0.2988, "step": 8856 }, { "epoch": 0.6703183387730003, "grad_norm": 0.65625, "learning_rate": 1.4984519444912147e-05, "loss": 0.2453, "step": 8857 }, { "epoch": 0.6703940210964476, "grad_norm": 0.74609375, "learning_rate": 1.4983486841647847e-05, "loss": 0.3273, "step": 8858 }, { "epoch": 0.670469703419895, "grad_norm": 0.78515625, "learning_rate": 1.4982454167684233e-05, "loss": 0.3419, "step": 8859 }, { "epoch": 0.6705453857433423, "grad_norm": 0.8125, "learning_rate": 1.4981421423035963e-05, "loss": 0.3785, "step": 8860 }, { "epoch": 0.6706210680667897, "grad_norm": 0.8359375, "learning_rate": 1.4980388607717673e-05, "loss": 0.3793, "step": 8861 }, { "epoch": 0.670696750390237, "grad_norm": 0.83984375, "learning_rate": 1.497935572174403e-05, "loss": 0.3771, "step": 8862 }, { "epoch": 0.6707724327136844, "grad_norm": 0.76953125, "learning_rate": 1.4978322765129678e-05, "loss": 0.3142, "step": 8863 }, { "epoch": 0.6708481150371316, "grad_norm": 0.73046875, "learning_rate": 1.4977289737889275e-05, "loss": 0.2922, "step": 8864 }, { "epoch": 0.6709237973605789, "grad_norm": 0.828125, "learning_rate": 1.4976256640037477e-05, "loss": 0.3774, "step": 8865 }, { "epoch": 0.6709994796840263, "grad_norm": 0.8203125, "learning_rate": 1.4975223471588937e-05, "loss": 0.3199, "step": 8866 }, { "epoch": 0.6710751620074736, "grad_norm": 0.765625, "learning_rate": 1.4974190232558317e-05, "loss": 0.3293, "step": 8867 }, { "epoch": 0.671150844330921, "grad_norm": 0.71875, "learning_rate": 1.497315692296027e-05, "loss": 0.2964, "step": 8868 }, { "epoch": 0.6712265266543683, "grad_norm": 0.82421875, "learning_rate": 1.4972123542809458e-05, "loss": 0.3577, "step": 8869 }, { "epoch": 0.6713022089778157, "grad_norm": 0.74609375, "learning_rate": 1.4971090092120544e-05, "loss": 0.279, "step": 8870 }, { "epoch": 0.6713778913012629, "grad_norm": 0.76953125, "learning_rate": 1.4970056570908184e-05, "loss": 0.307, "step": 8871 }, { "epoch": 0.6714535736247103, "grad_norm": 0.765625, "learning_rate": 1.4969022979187042e-05, "loss": 0.3323, "step": 8872 }, { "epoch": 0.6715292559481576, "grad_norm": 0.6796875, "learning_rate": 1.4967989316971783e-05, "loss": 0.2521, "step": 8873 }, { "epoch": 0.671604938271605, "grad_norm": 0.70703125, "learning_rate": 1.4966955584277072e-05, "loss": 0.3012, "step": 8874 }, { "epoch": 0.6716806205950523, "grad_norm": 0.69140625, "learning_rate": 1.496592178111757e-05, "loss": 0.2755, "step": 8875 }, { "epoch": 0.6717563029184996, "grad_norm": 0.796875, "learning_rate": 1.4964887907507946e-05, "loss": 0.383, "step": 8876 }, { "epoch": 0.671831985241947, "grad_norm": 0.78515625, "learning_rate": 1.4963853963462866e-05, "loss": 0.3373, "step": 8877 }, { "epoch": 0.6719076675653942, "grad_norm": 0.8671875, "learning_rate": 1.4962819948997e-05, "loss": 0.3832, "step": 8878 }, { "epoch": 0.6719833498888416, "grad_norm": 1.234375, "learning_rate": 1.4961785864125019e-05, "loss": 0.3807, "step": 8879 }, { "epoch": 0.6720590322122889, "grad_norm": 0.828125, "learning_rate": 1.4960751708861587e-05, "loss": 0.3716, "step": 8880 }, { "epoch": 0.6721347145357363, "grad_norm": 0.6953125, "learning_rate": 1.4959717483221381e-05, "loss": 0.2553, "step": 8881 }, { "epoch": 0.6722103968591836, "grad_norm": 0.75390625, "learning_rate": 1.495868318721907e-05, "loss": 0.3193, "step": 8882 }, { "epoch": 0.672286079182631, "grad_norm": 0.75390625, "learning_rate": 1.495764882086933e-05, "loss": 0.3057, "step": 8883 }, { "epoch": 0.6723617615060782, "grad_norm": 0.75390625, "learning_rate": 1.4956614384186837e-05, "loss": 0.3248, "step": 8884 }, { "epoch": 0.6724374438295255, "grad_norm": 0.7734375, "learning_rate": 1.4955579877186257e-05, "loss": 0.3419, "step": 8885 }, { "epoch": 0.6725131261529729, "grad_norm": 0.7109375, "learning_rate": 1.4954545299882278e-05, "loss": 0.2945, "step": 8886 }, { "epoch": 0.6725888084764202, "grad_norm": 0.78515625, "learning_rate": 1.4953510652289567e-05, "loss": 0.3386, "step": 8887 }, { "epoch": 0.6726644907998676, "grad_norm": 0.828125, "learning_rate": 1.495247593442281e-05, "loss": 0.3447, "step": 8888 }, { "epoch": 0.6727401731233149, "grad_norm": 0.77734375, "learning_rate": 1.495144114629668e-05, "loss": 0.2899, "step": 8889 }, { "epoch": 0.6728158554467623, "grad_norm": 0.71875, "learning_rate": 1.4950406287925865e-05, "loss": 0.3039, "step": 8890 }, { "epoch": 0.6728915377702095, "grad_norm": 0.73828125, "learning_rate": 1.4949371359325036e-05, "loss": 0.3077, "step": 8891 }, { "epoch": 0.6729672200936568, "grad_norm": 0.80078125, "learning_rate": 1.4948336360508883e-05, "loss": 0.3408, "step": 8892 }, { "epoch": 0.6730429024171042, "grad_norm": 0.67578125, "learning_rate": 1.4947301291492092e-05, "loss": 0.26, "step": 8893 }, { "epoch": 0.6731185847405515, "grad_norm": 0.796875, "learning_rate": 1.4946266152289334e-05, "loss": 0.3714, "step": 8894 }, { "epoch": 0.6731942670639989, "grad_norm": 0.79296875, "learning_rate": 1.4945230942915307e-05, "loss": 0.3739, "step": 8895 }, { "epoch": 0.6732699493874462, "grad_norm": 1.0234375, "learning_rate": 1.4944195663384693e-05, "loss": 0.3936, "step": 8896 }, { "epoch": 0.6733456317108936, "grad_norm": 0.78515625, "learning_rate": 1.4943160313712178e-05, "loss": 0.3417, "step": 8897 }, { "epoch": 0.6734213140343408, "grad_norm": 0.703125, "learning_rate": 1.4942124893912453e-05, "loss": 0.2766, "step": 8898 }, { "epoch": 0.6734969963577881, "grad_norm": 0.76171875, "learning_rate": 1.4941089404000204e-05, "loss": 0.3214, "step": 8899 }, { "epoch": 0.6735726786812355, "grad_norm": 0.80078125, "learning_rate": 1.4940053843990121e-05, "loss": 0.3609, "step": 8900 }, { "epoch": 0.6736483610046828, "grad_norm": 0.77734375, "learning_rate": 1.4939018213896898e-05, "loss": 0.325, "step": 8901 }, { "epoch": 0.6737240433281302, "grad_norm": 0.7265625, "learning_rate": 1.493798251373523e-05, "loss": 0.2752, "step": 8902 }, { "epoch": 0.6737997256515775, "grad_norm": 0.68359375, "learning_rate": 1.49369467435198e-05, "loss": 0.2754, "step": 8903 }, { "epoch": 0.6738754079750249, "grad_norm": 0.79296875, "learning_rate": 1.4935910903265311e-05, "loss": 0.3359, "step": 8904 }, { "epoch": 0.6739510902984721, "grad_norm": 0.79296875, "learning_rate": 1.4934874992986457e-05, "loss": 0.3608, "step": 8905 }, { "epoch": 0.6740267726219195, "grad_norm": 0.76953125, "learning_rate": 1.4933839012697928e-05, "loss": 0.3291, "step": 8906 }, { "epoch": 0.6741024549453668, "grad_norm": 0.76171875, "learning_rate": 1.493280296241443e-05, "loss": 0.3326, "step": 8907 }, { "epoch": 0.6741781372688141, "grad_norm": 0.7734375, "learning_rate": 1.4931766842150656e-05, "loss": 0.3228, "step": 8908 }, { "epoch": 0.6742538195922615, "grad_norm": 0.71484375, "learning_rate": 1.4930730651921306e-05, "loss": 0.2677, "step": 8909 }, { "epoch": 0.6743295019157088, "grad_norm": 0.77734375, "learning_rate": 1.492969439174108e-05, "loss": 0.3592, "step": 8910 }, { "epoch": 0.6744051842391562, "grad_norm": 0.734375, "learning_rate": 1.4928658061624681e-05, "loss": 0.2867, "step": 8911 }, { "epoch": 0.6744808665626034, "grad_norm": 0.78125, "learning_rate": 1.4927621661586806e-05, "loss": 0.3494, "step": 8912 }, { "epoch": 0.6745565488860508, "grad_norm": 0.76953125, "learning_rate": 1.4926585191642166e-05, "loss": 0.3309, "step": 8913 }, { "epoch": 0.6746322312094981, "grad_norm": 0.71484375, "learning_rate": 1.4925548651805459e-05, "loss": 0.2806, "step": 8914 }, { "epoch": 0.6747079135329455, "grad_norm": 0.73046875, "learning_rate": 1.492451204209139e-05, "loss": 0.3131, "step": 8915 }, { "epoch": 0.6747835958563928, "grad_norm": 0.703125, "learning_rate": 1.492347536251467e-05, "loss": 0.2662, "step": 8916 }, { "epoch": 0.6748592781798401, "grad_norm": 0.73046875, "learning_rate": 1.492243861309e-05, "loss": 0.313, "step": 8917 }, { "epoch": 0.6749349605032875, "grad_norm": 0.71875, "learning_rate": 1.4921401793832094e-05, "loss": 0.2841, "step": 8918 }, { "epoch": 0.6750106428267347, "grad_norm": 0.7265625, "learning_rate": 1.4920364904755656e-05, "loss": 0.3048, "step": 8919 }, { "epoch": 0.6750863251501821, "grad_norm": 0.8046875, "learning_rate": 1.4919327945875399e-05, "loss": 0.3217, "step": 8920 }, { "epoch": 0.6751620074736294, "grad_norm": 0.7265625, "learning_rate": 1.4918290917206032e-05, "loss": 0.3144, "step": 8921 }, { "epoch": 0.6752376897970768, "grad_norm": 0.6953125, "learning_rate": 1.4917253818762269e-05, "loss": 0.2819, "step": 8922 }, { "epoch": 0.6753133721205241, "grad_norm": 0.76171875, "learning_rate": 1.4916216650558824e-05, "loss": 0.3116, "step": 8923 }, { "epoch": 0.6753890544439715, "grad_norm": 1.09375, "learning_rate": 1.4915179412610408e-05, "loss": 0.3921, "step": 8924 }, { "epoch": 0.6754647367674188, "grad_norm": 0.75, "learning_rate": 1.4914142104931737e-05, "loss": 0.2928, "step": 8925 }, { "epoch": 0.675540419090866, "grad_norm": 0.765625, "learning_rate": 1.4913104727537526e-05, "loss": 0.3256, "step": 8926 }, { "epoch": 0.6756161014143134, "grad_norm": 0.75, "learning_rate": 1.4912067280442494e-05, "loss": 0.3194, "step": 8927 }, { "epoch": 0.6756917837377607, "grad_norm": 0.69921875, "learning_rate": 1.4911029763661362e-05, "loss": 0.2761, "step": 8928 }, { "epoch": 0.6757674660612081, "grad_norm": 0.859375, "learning_rate": 1.4909992177208841e-05, "loss": 0.3541, "step": 8929 }, { "epoch": 0.6758431483846554, "grad_norm": 0.70703125, "learning_rate": 1.4908954521099656e-05, "loss": 0.2839, "step": 8930 }, { "epoch": 0.6759188307081028, "grad_norm": 0.890625, "learning_rate": 1.4907916795348529e-05, "loss": 0.3158, "step": 8931 }, { "epoch": 0.6759945130315501, "grad_norm": 0.765625, "learning_rate": 1.4906878999970176e-05, "loss": 0.2994, "step": 8932 }, { "epoch": 0.6760701953549973, "grad_norm": 0.80859375, "learning_rate": 1.4905841134979326e-05, "loss": 0.3804, "step": 8933 }, { "epoch": 0.6761458776784447, "grad_norm": 0.7265625, "learning_rate": 1.4904803200390702e-05, "loss": 0.3251, "step": 8934 }, { "epoch": 0.676221560001892, "grad_norm": 0.76953125, "learning_rate": 1.4903765196219027e-05, "loss": 0.2955, "step": 8935 }, { "epoch": 0.6762972423253394, "grad_norm": 0.72265625, "learning_rate": 1.4902727122479029e-05, "loss": 0.3209, "step": 8936 }, { "epoch": 0.6763729246487867, "grad_norm": 0.74609375, "learning_rate": 1.490168897918543e-05, "loss": 0.3354, "step": 8937 }, { "epoch": 0.6764486069722341, "grad_norm": 0.8046875, "learning_rate": 1.4900650766352962e-05, "loss": 0.3298, "step": 8938 }, { "epoch": 0.6765242892956814, "grad_norm": 0.80078125, "learning_rate": 1.4899612483996352e-05, "loss": 0.337, "step": 8939 }, { "epoch": 0.6765999716191287, "grad_norm": 0.70703125, "learning_rate": 1.4898574132130337e-05, "loss": 0.2917, "step": 8940 }, { "epoch": 0.676675653942576, "grad_norm": 0.76953125, "learning_rate": 1.4897535710769636e-05, "loss": 0.3348, "step": 8941 }, { "epoch": 0.6767513362660234, "grad_norm": 0.73828125, "learning_rate": 1.489649721992899e-05, "loss": 0.3058, "step": 8942 }, { "epoch": 0.6768270185894707, "grad_norm": 0.796875, "learning_rate": 1.4895458659623125e-05, "loss": 0.3483, "step": 8943 }, { "epoch": 0.676902700912918, "grad_norm": 0.7109375, "learning_rate": 1.489442002986678e-05, "loss": 0.2992, "step": 8944 }, { "epoch": 0.6769783832363654, "grad_norm": 0.76171875, "learning_rate": 1.4893381330674688e-05, "loss": 0.3145, "step": 8945 }, { "epoch": 0.6770540655598127, "grad_norm": 0.78125, "learning_rate": 1.4892342562061583e-05, "loss": 0.3278, "step": 8946 }, { "epoch": 0.67712974788326, "grad_norm": 0.74609375, "learning_rate": 1.48913037240422e-05, "loss": 0.2792, "step": 8947 }, { "epoch": 0.6772054302067073, "grad_norm": 0.75390625, "learning_rate": 1.4890264816631285e-05, "loss": 0.3125, "step": 8948 }, { "epoch": 0.6772811125301547, "grad_norm": 0.7421875, "learning_rate": 1.4889225839843574e-05, "loss": 0.294, "step": 8949 }, { "epoch": 0.677356794853602, "grad_norm": 0.75, "learning_rate": 1.4888186793693798e-05, "loss": 0.2999, "step": 8950 }, { "epoch": 0.6774324771770494, "grad_norm": 0.79296875, "learning_rate": 1.4887147678196708e-05, "loss": 0.337, "step": 8951 }, { "epoch": 0.6775081595004967, "grad_norm": 0.76953125, "learning_rate": 1.4886108493367037e-05, "loss": 0.3168, "step": 8952 }, { "epoch": 0.677583841823944, "grad_norm": 0.796875, "learning_rate": 1.4885069239219534e-05, "loss": 0.3317, "step": 8953 }, { "epoch": 0.6776595241473913, "grad_norm": 0.796875, "learning_rate": 1.4884029915768945e-05, "loss": 0.3382, "step": 8954 }, { "epoch": 0.6777352064708386, "grad_norm": 0.78515625, "learning_rate": 1.4882990523030008e-05, "loss": 0.3327, "step": 8955 }, { "epoch": 0.677810888794286, "grad_norm": 0.7578125, "learning_rate": 1.4881951061017469e-05, "loss": 0.3371, "step": 8956 }, { "epoch": 0.6778865711177333, "grad_norm": 0.734375, "learning_rate": 1.4880911529746076e-05, "loss": 0.2895, "step": 8957 }, { "epoch": 0.6779622534411807, "grad_norm": 0.7578125, "learning_rate": 1.4879871929230583e-05, "loss": 0.3272, "step": 8958 }, { "epoch": 0.678037935764628, "grad_norm": 0.7578125, "learning_rate": 1.4878832259485724e-05, "loss": 0.2817, "step": 8959 }, { "epoch": 0.6781136180880754, "grad_norm": 0.75390625, "learning_rate": 1.4877792520526264e-05, "loss": 0.3191, "step": 8960 }, { "epoch": 0.6781893004115226, "grad_norm": 0.6953125, "learning_rate": 1.4876752712366945e-05, "loss": 0.2634, "step": 8961 }, { "epoch": 0.6782649827349699, "grad_norm": 0.68359375, "learning_rate": 1.4875712835022515e-05, "loss": 0.2573, "step": 8962 }, { "epoch": 0.6783406650584173, "grad_norm": 0.8203125, "learning_rate": 1.4874672888507738e-05, "loss": 0.3586, "step": 8963 }, { "epoch": 0.6784163473818646, "grad_norm": 0.8125, "learning_rate": 1.4873632872837357e-05, "loss": 0.3558, "step": 8964 }, { "epoch": 0.678492029705312, "grad_norm": 0.7734375, "learning_rate": 1.487259278802613e-05, "loss": 0.3529, "step": 8965 }, { "epoch": 0.6785677120287593, "grad_norm": 0.75, "learning_rate": 1.4871552634088816e-05, "loss": 0.315, "step": 8966 }, { "epoch": 0.6786433943522067, "grad_norm": 0.9765625, "learning_rate": 1.4870512411040165e-05, "loss": 0.3777, "step": 8967 }, { "epoch": 0.6787190766756539, "grad_norm": 0.8203125, "learning_rate": 1.4869472118894939e-05, "loss": 0.332, "step": 8968 }, { "epoch": 0.6787947589991012, "grad_norm": 0.7890625, "learning_rate": 1.4868431757667895e-05, "loss": 0.3542, "step": 8969 }, { "epoch": 0.6788704413225486, "grad_norm": 0.7734375, "learning_rate": 1.486739132737379e-05, "loss": 0.3041, "step": 8970 }, { "epoch": 0.6789461236459959, "grad_norm": 0.75, "learning_rate": 1.4866350828027387e-05, "loss": 0.3154, "step": 8971 }, { "epoch": 0.6790218059694433, "grad_norm": 0.765625, "learning_rate": 1.486531025964345e-05, "loss": 0.3408, "step": 8972 }, { "epoch": 0.6790974882928906, "grad_norm": 1.0390625, "learning_rate": 1.4864269622236732e-05, "loss": 0.3177, "step": 8973 }, { "epoch": 0.679173170616338, "grad_norm": 0.7890625, "learning_rate": 1.4863228915822006e-05, "loss": 0.3116, "step": 8974 }, { "epoch": 0.6792488529397852, "grad_norm": 0.8046875, "learning_rate": 1.486218814041403e-05, "loss": 0.3615, "step": 8975 }, { "epoch": 0.6793245352632326, "grad_norm": 0.91015625, "learning_rate": 1.4861147296027572e-05, "loss": 0.3345, "step": 8976 }, { "epoch": 0.6794002175866799, "grad_norm": 0.79296875, "learning_rate": 1.48601063826774e-05, "loss": 0.3482, "step": 8977 }, { "epoch": 0.6794758999101272, "grad_norm": 0.73828125, "learning_rate": 1.4859065400378277e-05, "loss": 0.3161, "step": 8978 }, { "epoch": 0.6795515822335746, "grad_norm": 0.78515625, "learning_rate": 1.485802434914497e-05, "loss": 0.3329, "step": 8979 }, { "epoch": 0.6796272645570219, "grad_norm": 0.80859375, "learning_rate": 1.4856983228992254e-05, "loss": 0.3462, "step": 8980 }, { "epoch": 0.6797029468804693, "grad_norm": 0.765625, "learning_rate": 1.4855942039934896e-05, "loss": 0.3429, "step": 8981 }, { "epoch": 0.6797786292039165, "grad_norm": 0.8515625, "learning_rate": 1.4854900781987667e-05, "loss": 0.3004, "step": 8982 }, { "epoch": 0.6798543115273639, "grad_norm": 0.671875, "learning_rate": 1.4853859455165338e-05, "loss": 0.264, "step": 8983 }, { "epoch": 0.6799299938508112, "grad_norm": 0.73828125, "learning_rate": 1.4852818059482682e-05, "loss": 0.2759, "step": 8984 }, { "epoch": 0.6800056761742586, "grad_norm": 0.78515625, "learning_rate": 1.4851776594954478e-05, "loss": 0.3234, "step": 8985 }, { "epoch": 0.6800813584977059, "grad_norm": 0.83203125, "learning_rate": 1.4850735061595496e-05, "loss": 0.3216, "step": 8986 }, { "epoch": 0.6801570408211532, "grad_norm": 0.73046875, "learning_rate": 1.4849693459420512e-05, "loss": 0.3126, "step": 8987 }, { "epoch": 0.6802327231446006, "grad_norm": 0.78125, "learning_rate": 1.4848651788444302e-05, "loss": 0.3483, "step": 8988 }, { "epoch": 0.6803084054680478, "grad_norm": 0.76171875, "learning_rate": 1.4847610048681649e-05, "loss": 0.3409, "step": 8989 }, { "epoch": 0.6803840877914952, "grad_norm": 0.73828125, "learning_rate": 1.4846568240147327e-05, "loss": 0.287, "step": 8990 }, { "epoch": 0.6804597701149425, "grad_norm": 0.7421875, "learning_rate": 1.4845526362856117e-05, "loss": 0.3285, "step": 8991 }, { "epoch": 0.6805354524383899, "grad_norm": 0.67578125, "learning_rate": 1.4844484416822802e-05, "loss": 0.269, "step": 8992 }, { "epoch": 0.6806111347618372, "grad_norm": 0.82421875, "learning_rate": 1.4843442402062163e-05, "loss": 0.3367, "step": 8993 }, { "epoch": 0.6806868170852846, "grad_norm": 0.75390625, "learning_rate": 1.4842400318588979e-05, "loss": 0.3179, "step": 8994 }, { "epoch": 0.6807624994087319, "grad_norm": 0.7890625, "learning_rate": 1.4841358166418039e-05, "loss": 0.322, "step": 8995 }, { "epoch": 0.6808381817321791, "grad_norm": 0.83203125, "learning_rate": 1.4840315945564126e-05, "loss": 0.3721, "step": 8996 }, { "epoch": 0.6809138640556265, "grad_norm": 0.80859375, "learning_rate": 1.4839273656042022e-05, "loss": 0.336, "step": 8997 }, { "epoch": 0.6809895463790738, "grad_norm": 0.7578125, "learning_rate": 1.4838231297866516e-05, "loss": 0.3041, "step": 8998 }, { "epoch": 0.6810652287025212, "grad_norm": 0.765625, "learning_rate": 1.4837188871052399e-05, "loss": 0.3026, "step": 8999 }, { "epoch": 0.6811409110259685, "grad_norm": 0.75, "learning_rate": 1.4836146375614456e-05, "loss": 0.2963, "step": 9000 }, { "epoch": 0.6812165933494159, "grad_norm": 0.73046875, "learning_rate": 1.4835103811567479e-05, "loss": 0.3086, "step": 9001 }, { "epoch": 0.6812922756728632, "grad_norm": 0.78515625, "learning_rate": 1.4834061178926256e-05, "loss": 0.3263, "step": 9002 }, { "epoch": 0.6813679579963104, "grad_norm": 0.76171875, "learning_rate": 1.4833018477705579e-05, "loss": 0.3181, "step": 9003 }, { "epoch": 0.6814436403197578, "grad_norm": 0.76171875, "learning_rate": 1.4831975707920242e-05, "loss": 0.3097, "step": 9004 }, { "epoch": 0.6815193226432051, "grad_norm": 0.77734375, "learning_rate": 1.483093286958504e-05, "loss": 0.3516, "step": 9005 }, { "epoch": 0.6815950049666525, "grad_norm": 0.65625, "learning_rate": 1.4829889962714761e-05, "loss": 0.2538, "step": 9006 }, { "epoch": 0.6816706872900998, "grad_norm": 0.7890625, "learning_rate": 1.4828846987324206e-05, "loss": 0.3185, "step": 9007 }, { "epoch": 0.6817463696135472, "grad_norm": 0.734375, "learning_rate": 1.482780394342817e-05, "loss": 0.312, "step": 9008 }, { "epoch": 0.6818220519369944, "grad_norm": 0.77734375, "learning_rate": 1.4826760831041447e-05, "loss": 0.3223, "step": 9009 }, { "epoch": 0.6818977342604418, "grad_norm": 0.89453125, "learning_rate": 1.4825717650178846e-05, "loss": 0.334, "step": 9010 }, { "epoch": 0.6819734165838891, "grad_norm": 0.765625, "learning_rate": 1.4824674400855155e-05, "loss": 0.3009, "step": 9011 }, { "epoch": 0.6820490989073364, "grad_norm": 0.734375, "learning_rate": 1.4823631083085175e-05, "loss": 0.2974, "step": 9012 }, { "epoch": 0.6821247812307838, "grad_norm": 1.1328125, "learning_rate": 1.4822587696883715e-05, "loss": 0.3784, "step": 9013 }, { "epoch": 0.6822004635542311, "grad_norm": 0.79296875, "learning_rate": 1.482154424226557e-05, "loss": 0.3392, "step": 9014 }, { "epoch": 0.6822761458776785, "grad_norm": 0.78125, "learning_rate": 1.4820500719245547e-05, "loss": 0.3021, "step": 9015 }, { "epoch": 0.6823518282011257, "grad_norm": 0.734375, "learning_rate": 1.4819457127838451e-05, "loss": 0.2722, "step": 9016 }, { "epoch": 0.6824275105245731, "grad_norm": 0.79296875, "learning_rate": 1.4818413468059083e-05, "loss": 0.3405, "step": 9017 }, { "epoch": 0.6825031928480204, "grad_norm": 0.79296875, "learning_rate": 1.481736973992225e-05, "loss": 0.2971, "step": 9018 }, { "epoch": 0.6825788751714678, "grad_norm": 0.83203125, "learning_rate": 1.4816325943442762e-05, "loss": 0.3845, "step": 9019 }, { "epoch": 0.6826545574949151, "grad_norm": 0.6953125, "learning_rate": 1.4815282078635426e-05, "loss": 0.2453, "step": 9020 }, { "epoch": 0.6827302398183624, "grad_norm": 0.83203125, "learning_rate": 1.4814238145515048e-05, "loss": 0.3379, "step": 9021 }, { "epoch": 0.6828059221418098, "grad_norm": 0.7734375, "learning_rate": 1.4813194144096441e-05, "loss": 0.33, "step": 9022 }, { "epoch": 0.682881604465257, "grad_norm": 0.74609375, "learning_rate": 1.4812150074394418e-05, "loss": 0.2789, "step": 9023 }, { "epoch": 0.6829572867887044, "grad_norm": 0.734375, "learning_rate": 1.4811105936423787e-05, "loss": 0.3068, "step": 9024 }, { "epoch": 0.6830329691121517, "grad_norm": 0.75, "learning_rate": 1.481006173019936e-05, "loss": 0.3022, "step": 9025 }, { "epoch": 0.6831086514355991, "grad_norm": 0.75390625, "learning_rate": 1.4809017455735957e-05, "loss": 0.3297, "step": 9026 }, { "epoch": 0.6831843337590464, "grad_norm": 0.7421875, "learning_rate": 1.4807973113048384e-05, "loss": 0.3069, "step": 9027 }, { "epoch": 0.6832600160824938, "grad_norm": 0.703125, "learning_rate": 1.4806928702151464e-05, "loss": 0.2958, "step": 9028 }, { "epoch": 0.6833356984059411, "grad_norm": 0.734375, "learning_rate": 1.4805884223060012e-05, "loss": 0.2968, "step": 9029 }, { "epoch": 0.6834113807293883, "grad_norm": 0.765625, "learning_rate": 1.4804839675788844e-05, "loss": 0.3227, "step": 9030 }, { "epoch": 0.6834870630528357, "grad_norm": 0.7578125, "learning_rate": 1.4803795060352782e-05, "loss": 0.3378, "step": 9031 }, { "epoch": 0.683562745376283, "grad_norm": 0.828125, "learning_rate": 1.4802750376766641e-05, "loss": 0.3493, "step": 9032 }, { "epoch": 0.6836384276997304, "grad_norm": 0.69921875, "learning_rate": 1.4801705625045246e-05, "loss": 0.2735, "step": 9033 }, { "epoch": 0.6837141100231777, "grad_norm": 0.75390625, "learning_rate": 1.4800660805203417e-05, "loss": 0.3399, "step": 9034 }, { "epoch": 0.6837897923466251, "grad_norm": 0.8125, "learning_rate": 1.4799615917255974e-05, "loss": 0.3415, "step": 9035 }, { "epoch": 0.6838654746700724, "grad_norm": 0.734375, "learning_rate": 1.4798570961217743e-05, "loss": 0.3056, "step": 9036 }, { "epoch": 0.6839411569935196, "grad_norm": 0.76953125, "learning_rate": 1.479752593710355e-05, "loss": 0.3395, "step": 9037 }, { "epoch": 0.684016839316967, "grad_norm": 0.76953125, "learning_rate": 1.4796480844928218e-05, "loss": 0.33, "step": 9038 }, { "epoch": 0.6840925216404143, "grad_norm": 0.75, "learning_rate": 1.4795435684706574e-05, "loss": 0.3226, "step": 9039 }, { "epoch": 0.6841682039638617, "grad_norm": 0.75, "learning_rate": 1.4794390456453444e-05, "loss": 0.3099, "step": 9040 }, { "epoch": 0.684243886287309, "grad_norm": 0.765625, "learning_rate": 1.479334516018366e-05, "loss": 0.3325, "step": 9041 }, { "epoch": 0.6843195686107564, "grad_norm": 0.76953125, "learning_rate": 1.4792299795912049e-05, "loss": 0.3133, "step": 9042 }, { "epoch": 0.6843952509342037, "grad_norm": 0.76953125, "learning_rate": 1.4791254363653443e-05, "loss": 0.3109, "step": 9043 }, { "epoch": 0.684470933257651, "grad_norm": 0.8359375, "learning_rate": 1.4790208863422669e-05, "loss": 0.3444, "step": 9044 }, { "epoch": 0.6845466155810983, "grad_norm": 0.7421875, "learning_rate": 1.4789163295234561e-05, "loss": 0.3217, "step": 9045 }, { "epoch": 0.6846222979045457, "grad_norm": 0.76953125, "learning_rate": 1.4788117659103956e-05, "loss": 0.3092, "step": 9046 }, { "epoch": 0.684697980227993, "grad_norm": 0.76171875, "learning_rate": 1.4787071955045686e-05, "loss": 0.3059, "step": 9047 }, { "epoch": 0.6847736625514403, "grad_norm": 0.74609375, "learning_rate": 1.4786026183074582e-05, "loss": 0.3164, "step": 9048 }, { "epoch": 0.6848493448748877, "grad_norm": 0.796875, "learning_rate": 1.4784980343205489e-05, "loss": 0.3478, "step": 9049 }, { "epoch": 0.684925027198335, "grad_norm": 0.72265625, "learning_rate": 1.4783934435453232e-05, "loss": 0.2925, "step": 9050 }, { "epoch": 0.6850007095217823, "grad_norm": 0.77734375, "learning_rate": 1.4782888459832658e-05, "loss": 0.341, "step": 9051 }, { "epoch": 0.6850763918452296, "grad_norm": 0.7734375, "learning_rate": 1.4781842416358604e-05, "loss": 0.3144, "step": 9052 }, { "epoch": 0.685152074168677, "grad_norm": 0.73828125, "learning_rate": 1.478079630504591e-05, "loss": 0.3293, "step": 9053 }, { "epoch": 0.6852277564921243, "grad_norm": 0.7890625, "learning_rate": 1.4779750125909415e-05, "loss": 0.3422, "step": 9054 }, { "epoch": 0.6853034388155717, "grad_norm": 0.73828125, "learning_rate": 1.4778703878963963e-05, "loss": 0.2945, "step": 9055 }, { "epoch": 0.685379121139019, "grad_norm": 0.765625, "learning_rate": 1.4777657564224394e-05, "loss": 0.3136, "step": 9056 }, { "epoch": 0.6854548034624663, "grad_norm": 0.73828125, "learning_rate": 1.4776611181705557e-05, "loss": 0.3161, "step": 9057 }, { "epoch": 0.6855304857859136, "grad_norm": 0.77734375, "learning_rate": 1.4775564731422294e-05, "loss": 0.3207, "step": 9058 }, { "epoch": 0.6856061681093609, "grad_norm": 0.76171875, "learning_rate": 1.4774518213389446e-05, "loss": 0.3263, "step": 9059 }, { "epoch": 0.6856818504328083, "grad_norm": 0.796875, "learning_rate": 1.4773471627621865e-05, "loss": 0.3212, "step": 9060 }, { "epoch": 0.6857575327562556, "grad_norm": 0.76953125, "learning_rate": 1.47724249741344e-05, "loss": 0.3279, "step": 9061 }, { "epoch": 0.685833215079703, "grad_norm": 0.828125, "learning_rate": 1.4771378252941896e-05, "loss": 0.3422, "step": 9062 }, { "epoch": 0.6859088974031503, "grad_norm": 0.74609375, "learning_rate": 1.47703314640592e-05, "loss": 0.2994, "step": 9063 }, { "epoch": 0.6859845797265977, "grad_norm": 0.74609375, "learning_rate": 1.4769284607501172e-05, "loss": 0.3092, "step": 9064 }, { "epoch": 0.6860602620500449, "grad_norm": 0.69140625, "learning_rate": 1.476823768328265e-05, "loss": 0.2565, "step": 9065 }, { "epoch": 0.6861359443734922, "grad_norm": 0.796875, "learning_rate": 1.47671906914185e-05, "loss": 0.3055, "step": 9066 }, { "epoch": 0.6862116266969396, "grad_norm": 0.80078125, "learning_rate": 1.4766143631923569e-05, "loss": 0.3306, "step": 9067 }, { "epoch": 0.6862873090203869, "grad_norm": 0.765625, "learning_rate": 1.4765096504812708e-05, "loss": 0.3196, "step": 9068 }, { "epoch": 0.6863629913438343, "grad_norm": 0.765625, "learning_rate": 1.4764049310100777e-05, "loss": 0.3112, "step": 9069 }, { "epoch": 0.6864386736672816, "grad_norm": 0.80078125, "learning_rate": 1.4763002047802633e-05, "loss": 0.3244, "step": 9070 }, { "epoch": 0.686514355990729, "grad_norm": 0.76953125, "learning_rate": 1.4761954717933132e-05, "loss": 0.3414, "step": 9071 }, { "epoch": 0.6865900383141762, "grad_norm": 0.73046875, "learning_rate": 1.4760907320507127e-05, "loss": 0.3093, "step": 9072 }, { "epoch": 0.6866657206376235, "grad_norm": 0.82421875, "learning_rate": 1.4759859855539484e-05, "loss": 0.3357, "step": 9073 }, { "epoch": 0.6867414029610709, "grad_norm": 0.79296875, "learning_rate": 1.4758812323045062e-05, "loss": 0.3283, "step": 9074 }, { "epoch": 0.6868170852845182, "grad_norm": 0.75, "learning_rate": 1.4757764723038721e-05, "loss": 0.3239, "step": 9075 }, { "epoch": 0.6868927676079656, "grad_norm": 0.7421875, "learning_rate": 1.4756717055535324e-05, "loss": 0.3039, "step": 9076 }, { "epoch": 0.6869684499314129, "grad_norm": 0.81640625, "learning_rate": 1.4755669320549727e-05, "loss": 0.3031, "step": 9077 }, { "epoch": 0.6870441322548603, "grad_norm": 0.72265625, "learning_rate": 1.4754621518096803e-05, "loss": 0.2887, "step": 9078 }, { "epoch": 0.6871198145783075, "grad_norm": 0.7578125, "learning_rate": 1.4753573648191416e-05, "loss": 0.3406, "step": 9079 }, { "epoch": 0.6871954969017549, "grad_norm": 0.75390625, "learning_rate": 1.4752525710848426e-05, "loss": 0.3194, "step": 9080 }, { "epoch": 0.6872711792252022, "grad_norm": 0.77734375, "learning_rate": 1.4751477706082704e-05, "loss": 0.3433, "step": 9081 }, { "epoch": 0.6873468615486495, "grad_norm": 0.7421875, "learning_rate": 1.4750429633909118e-05, "loss": 0.2885, "step": 9082 }, { "epoch": 0.6874225438720969, "grad_norm": 0.77734375, "learning_rate": 1.4749381494342534e-05, "loss": 0.3134, "step": 9083 }, { "epoch": 0.6874982261955442, "grad_norm": 0.78515625, "learning_rate": 1.4748333287397824e-05, "loss": 0.3176, "step": 9084 }, { "epoch": 0.6875739085189916, "grad_norm": 1.0390625, "learning_rate": 1.4747285013089858e-05, "loss": 0.3539, "step": 9085 }, { "epoch": 0.6876495908424388, "grad_norm": 0.73828125, "learning_rate": 1.474623667143351e-05, "loss": 0.2871, "step": 9086 }, { "epoch": 0.6877252731658862, "grad_norm": 0.8046875, "learning_rate": 1.4745188262443646e-05, "loss": 0.3397, "step": 9087 }, { "epoch": 0.6878009554893335, "grad_norm": 0.80078125, "learning_rate": 1.4744139786135146e-05, "loss": 0.3661, "step": 9088 }, { "epoch": 0.6878766378127809, "grad_norm": 0.72265625, "learning_rate": 1.474309124252288e-05, "loss": 0.271, "step": 9089 }, { "epoch": 0.6879523201362282, "grad_norm": 0.6875, "learning_rate": 1.4742042631621727e-05, "loss": 0.2512, "step": 9090 }, { "epoch": 0.6880280024596755, "grad_norm": 0.76171875, "learning_rate": 1.4740993953446561e-05, "loss": 0.294, "step": 9091 }, { "epoch": 0.6881036847831229, "grad_norm": 0.7890625, "learning_rate": 1.4739945208012258e-05, "loss": 0.3591, "step": 9092 }, { "epoch": 0.6881793671065701, "grad_norm": 0.7265625, "learning_rate": 1.4738896395333702e-05, "loss": 0.2783, "step": 9093 }, { "epoch": 0.6882550494300175, "grad_norm": 0.74609375, "learning_rate": 1.4737847515425766e-05, "loss": 0.3135, "step": 9094 }, { "epoch": 0.6883307317534648, "grad_norm": 1.0390625, "learning_rate": 1.4736798568303333e-05, "loss": 0.3536, "step": 9095 }, { "epoch": 0.6884064140769122, "grad_norm": 0.796875, "learning_rate": 1.4735749553981285e-05, "loss": 0.3537, "step": 9096 }, { "epoch": 0.6884820964003595, "grad_norm": 0.85546875, "learning_rate": 1.4734700472474502e-05, "loss": 0.3911, "step": 9097 }, { "epoch": 0.6885577787238069, "grad_norm": 0.81640625, "learning_rate": 1.4733651323797863e-05, "loss": 0.3574, "step": 9098 }, { "epoch": 0.6886334610472542, "grad_norm": 0.7421875, "learning_rate": 1.4732602107966262e-05, "loss": 0.3031, "step": 9099 }, { "epoch": 0.6887091433707014, "grad_norm": 0.7265625, "learning_rate": 1.473155282499458e-05, "loss": 0.3134, "step": 9100 }, { "epoch": 0.6887848256941488, "grad_norm": 0.796875, "learning_rate": 1.4730503474897698e-05, "loss": 0.3269, "step": 9101 }, { "epoch": 0.6888605080175961, "grad_norm": 0.76953125, "learning_rate": 1.4729454057690507e-05, "loss": 0.3102, "step": 9102 }, { "epoch": 0.6889361903410435, "grad_norm": 0.765625, "learning_rate": 1.4728404573387895e-05, "loss": 0.311, "step": 9103 }, { "epoch": 0.6890118726644908, "grad_norm": 0.78125, "learning_rate": 1.472735502200475e-05, "loss": 0.3394, "step": 9104 }, { "epoch": 0.6890875549879382, "grad_norm": 0.7265625, "learning_rate": 1.4726305403555961e-05, "loss": 0.2973, "step": 9105 }, { "epoch": 0.6891632373113855, "grad_norm": 0.73828125, "learning_rate": 1.4725255718056419e-05, "loss": 0.3109, "step": 9106 }, { "epoch": 0.6892389196348327, "grad_norm": 0.73046875, "learning_rate": 1.4724205965521018e-05, "loss": 0.2715, "step": 9107 }, { "epoch": 0.6893146019582801, "grad_norm": 0.77734375, "learning_rate": 1.4723156145964647e-05, "loss": 0.3342, "step": 9108 }, { "epoch": 0.6893902842817274, "grad_norm": 0.84765625, "learning_rate": 1.4722106259402197e-05, "loss": 0.3811, "step": 9109 }, { "epoch": 0.6894659666051748, "grad_norm": 0.7890625, "learning_rate": 1.4721056305848571e-05, "loss": 0.3272, "step": 9110 }, { "epoch": 0.6895416489286221, "grad_norm": 0.73046875, "learning_rate": 1.472000628531866e-05, "loss": 0.2898, "step": 9111 }, { "epoch": 0.6896173312520695, "grad_norm": 0.76953125, "learning_rate": 1.4718956197827356e-05, "loss": 0.3371, "step": 9112 }, { "epoch": 0.6896930135755168, "grad_norm": 0.7578125, "learning_rate": 1.4717906043389564e-05, "loss": 0.3036, "step": 9113 }, { "epoch": 0.6897686958989641, "grad_norm": 0.80078125, "learning_rate": 1.471685582202018e-05, "loss": 0.3379, "step": 9114 }, { "epoch": 0.6898443782224114, "grad_norm": 0.8046875, "learning_rate": 1.4715805533734097e-05, "loss": 0.3292, "step": 9115 }, { "epoch": 0.6899200605458587, "grad_norm": 0.734375, "learning_rate": 1.4714755178546223e-05, "loss": 0.3007, "step": 9116 }, { "epoch": 0.6899957428693061, "grad_norm": 0.79296875, "learning_rate": 1.4713704756471456e-05, "loss": 0.3157, "step": 9117 }, { "epoch": 0.6900714251927534, "grad_norm": 0.7734375, "learning_rate": 1.4712654267524695e-05, "loss": 0.3028, "step": 9118 }, { "epoch": 0.6901471075162008, "grad_norm": 0.734375, "learning_rate": 1.471160371172085e-05, "loss": 0.2737, "step": 9119 }, { "epoch": 0.6902227898396481, "grad_norm": 0.765625, "learning_rate": 1.4710553089074817e-05, "loss": 0.3131, "step": 9120 }, { "epoch": 0.6902984721630954, "grad_norm": 0.71875, "learning_rate": 1.4709502399601507e-05, "loss": 0.3028, "step": 9121 }, { "epoch": 0.6903741544865427, "grad_norm": 0.765625, "learning_rate": 1.4708451643315827e-05, "loss": 0.3288, "step": 9122 }, { "epoch": 0.6904498368099901, "grad_norm": 0.7578125, "learning_rate": 1.4707400820232675e-05, "loss": 0.298, "step": 9123 }, { "epoch": 0.6905255191334374, "grad_norm": 0.7890625, "learning_rate": 1.4706349930366965e-05, "loss": 0.328, "step": 9124 }, { "epoch": 0.6906012014568848, "grad_norm": 1.609375, "learning_rate": 1.4705298973733607e-05, "loss": 0.3488, "step": 9125 }, { "epoch": 0.6906768837803321, "grad_norm": 0.9296875, "learning_rate": 1.4704247950347508e-05, "loss": 0.3325, "step": 9126 }, { "epoch": 0.6907525661037793, "grad_norm": 0.8125, "learning_rate": 1.4703196860223579e-05, "loss": 0.3362, "step": 9127 }, { "epoch": 0.6908282484272267, "grad_norm": 0.78125, "learning_rate": 1.4702145703376731e-05, "loss": 0.3023, "step": 9128 }, { "epoch": 0.690903930750674, "grad_norm": 0.75390625, "learning_rate": 1.4701094479821878e-05, "loss": 0.3316, "step": 9129 }, { "epoch": 0.6909796130741214, "grad_norm": 0.7421875, "learning_rate": 1.470004318957393e-05, "loss": 0.3138, "step": 9130 }, { "epoch": 0.6910552953975687, "grad_norm": 0.8046875, "learning_rate": 1.4698991832647807e-05, "loss": 0.3418, "step": 9131 }, { "epoch": 0.6911309777210161, "grad_norm": 0.72265625, "learning_rate": 1.469794040905842e-05, "loss": 0.2723, "step": 9132 }, { "epoch": 0.6912066600444634, "grad_norm": 0.79296875, "learning_rate": 1.4696888918820684e-05, "loss": 0.3405, "step": 9133 }, { "epoch": 0.6912823423679106, "grad_norm": 0.7578125, "learning_rate": 1.4695837361949522e-05, "loss": 0.3202, "step": 9134 }, { "epoch": 0.691358024691358, "grad_norm": 0.70703125, "learning_rate": 1.4694785738459847e-05, "loss": 0.2841, "step": 9135 }, { "epoch": 0.6914337070148053, "grad_norm": 0.73828125, "learning_rate": 1.4693734048366582e-05, "loss": 0.3142, "step": 9136 }, { "epoch": 0.6915093893382527, "grad_norm": 1.0, "learning_rate": 1.4692682291684642e-05, "loss": 0.3409, "step": 9137 }, { "epoch": 0.6915850716617, "grad_norm": 0.84765625, "learning_rate": 1.469163046842895e-05, "loss": 0.3273, "step": 9138 }, { "epoch": 0.6916607539851474, "grad_norm": 0.734375, "learning_rate": 1.469057857861443e-05, "loss": 0.3057, "step": 9139 }, { "epoch": 0.6917364363085947, "grad_norm": 0.74609375, "learning_rate": 1.4689526622256005e-05, "loss": 0.3203, "step": 9140 }, { "epoch": 0.691812118632042, "grad_norm": 0.7421875, "learning_rate": 1.4688474599368596e-05, "loss": 0.3128, "step": 9141 }, { "epoch": 0.6918878009554893, "grad_norm": 0.6796875, "learning_rate": 1.4687422509967129e-05, "loss": 0.2675, "step": 9142 }, { "epoch": 0.6919634832789366, "grad_norm": 0.71484375, "learning_rate": 1.468637035406653e-05, "loss": 0.2734, "step": 9143 }, { "epoch": 0.692039165602384, "grad_norm": 0.80078125, "learning_rate": 1.4685318131681725e-05, "loss": 0.328, "step": 9144 }, { "epoch": 0.6921148479258313, "grad_norm": 0.7890625, "learning_rate": 1.4684265842827646e-05, "loss": 0.3539, "step": 9145 }, { "epoch": 0.6921905302492787, "grad_norm": 0.73046875, "learning_rate": 1.4683213487519212e-05, "loss": 0.3156, "step": 9146 }, { "epoch": 0.692266212572726, "grad_norm": 0.765625, "learning_rate": 1.468216106577136e-05, "loss": 0.3163, "step": 9147 }, { "epoch": 0.6923418948961733, "grad_norm": 0.6875, "learning_rate": 1.4681108577599017e-05, "loss": 0.2593, "step": 9148 }, { "epoch": 0.6924175772196206, "grad_norm": 0.83203125, "learning_rate": 1.4680056023017117e-05, "loss": 0.371, "step": 9149 }, { "epoch": 0.692493259543068, "grad_norm": 0.76953125, "learning_rate": 1.4679003402040593e-05, "loss": 0.3394, "step": 9150 }, { "epoch": 0.6925689418665153, "grad_norm": 0.7578125, "learning_rate": 1.4677950714684375e-05, "loss": 0.3177, "step": 9151 }, { "epoch": 0.6926446241899626, "grad_norm": 0.71875, "learning_rate": 1.4676897960963399e-05, "loss": 0.315, "step": 9152 }, { "epoch": 0.69272030651341, "grad_norm": 0.71875, "learning_rate": 1.4675845140892597e-05, "loss": 0.2601, "step": 9153 }, { "epoch": 0.6927959888368573, "grad_norm": 0.73046875, "learning_rate": 1.4674792254486909e-05, "loss": 0.3133, "step": 9154 }, { "epoch": 0.6928716711603046, "grad_norm": 0.7734375, "learning_rate": 1.4673739301761273e-05, "loss": 0.3193, "step": 9155 }, { "epoch": 0.6929473534837519, "grad_norm": 0.7578125, "learning_rate": 1.4672686282730622e-05, "loss": 0.3085, "step": 9156 }, { "epoch": 0.6930230358071993, "grad_norm": 0.77734375, "learning_rate": 1.4671633197409896e-05, "loss": 0.324, "step": 9157 }, { "epoch": 0.6930987181306466, "grad_norm": 0.765625, "learning_rate": 1.467058004581404e-05, "loss": 0.3346, "step": 9158 }, { "epoch": 0.693174400454094, "grad_norm": 0.8046875, "learning_rate": 1.4669526827957989e-05, "loss": 0.3332, "step": 9159 }, { "epoch": 0.6932500827775413, "grad_norm": 0.8203125, "learning_rate": 1.4668473543856687e-05, "loss": 0.3877, "step": 9160 }, { "epoch": 0.6933257651009886, "grad_norm": 0.80859375, "learning_rate": 1.4667420193525078e-05, "loss": 0.3768, "step": 9161 }, { "epoch": 0.6934014474244359, "grad_norm": 0.875, "learning_rate": 1.46663667769781e-05, "loss": 0.356, "step": 9162 }, { "epoch": 0.6934771297478832, "grad_norm": 0.76171875, "learning_rate": 1.4665313294230705e-05, "loss": 0.3078, "step": 9163 }, { "epoch": 0.6935528120713306, "grad_norm": 0.796875, "learning_rate": 1.4664259745297838e-05, "loss": 0.3264, "step": 9164 }, { "epoch": 0.6936284943947779, "grad_norm": 0.80078125, "learning_rate": 1.4663206130194434e-05, "loss": 0.3563, "step": 9165 }, { "epoch": 0.6937041767182253, "grad_norm": 0.81640625, "learning_rate": 1.4662152448935453e-05, "loss": 0.3336, "step": 9166 }, { "epoch": 0.6937798590416726, "grad_norm": 0.85546875, "learning_rate": 1.466109870153584e-05, "loss": 0.3297, "step": 9167 }, { "epoch": 0.69385554136512, "grad_norm": 1.0234375, "learning_rate": 1.4660044888010543e-05, "loss": 0.3733, "step": 9168 }, { "epoch": 0.6939312236885672, "grad_norm": 0.796875, "learning_rate": 1.4658991008374512e-05, "loss": 0.3595, "step": 9169 }, { "epoch": 0.6940069060120145, "grad_norm": 0.78515625, "learning_rate": 1.46579370626427e-05, "loss": 0.3116, "step": 9170 }, { "epoch": 0.6940825883354619, "grad_norm": 0.7890625, "learning_rate": 1.4656883050830053e-05, "loss": 0.3483, "step": 9171 }, { "epoch": 0.6941582706589092, "grad_norm": 0.765625, "learning_rate": 1.4655828972951531e-05, "loss": 0.3279, "step": 9172 }, { "epoch": 0.6942339529823566, "grad_norm": 0.8203125, "learning_rate": 1.4654774829022087e-05, "loss": 0.3426, "step": 9173 }, { "epoch": 0.6943096353058039, "grad_norm": 0.78125, "learning_rate": 1.4653720619056672e-05, "loss": 0.3402, "step": 9174 }, { "epoch": 0.6943853176292513, "grad_norm": 0.765625, "learning_rate": 1.4652666343070246e-05, "loss": 0.3101, "step": 9175 }, { "epoch": 0.6944609999526985, "grad_norm": 0.77734375, "learning_rate": 1.465161200107776e-05, "loss": 0.3531, "step": 9176 }, { "epoch": 0.6945366822761458, "grad_norm": 0.734375, "learning_rate": 1.4650557593094176e-05, "loss": 0.2907, "step": 9177 }, { "epoch": 0.6946123645995932, "grad_norm": 0.80859375, "learning_rate": 1.4649503119134456e-05, "loss": 0.3603, "step": 9178 }, { "epoch": 0.6946880469230405, "grad_norm": 0.78515625, "learning_rate": 1.4648448579213552e-05, "loss": 0.3333, "step": 9179 }, { "epoch": 0.6947637292464879, "grad_norm": 0.7890625, "learning_rate": 1.464739397334643e-05, "loss": 0.3528, "step": 9180 }, { "epoch": 0.6948394115699352, "grad_norm": 0.82421875, "learning_rate": 1.4646339301548046e-05, "loss": 0.3472, "step": 9181 }, { "epoch": 0.6949150938933826, "grad_norm": 0.8203125, "learning_rate": 1.4645284563833368e-05, "loss": 0.3433, "step": 9182 }, { "epoch": 0.6949907762168298, "grad_norm": 0.73828125, "learning_rate": 1.4644229760217356e-05, "loss": 0.305, "step": 9183 }, { "epoch": 0.6950664585402772, "grad_norm": 0.73828125, "learning_rate": 1.4643174890714978e-05, "loss": 0.2848, "step": 9184 }, { "epoch": 0.6951421408637245, "grad_norm": 0.94140625, "learning_rate": 1.4642119955341194e-05, "loss": 0.3256, "step": 9185 }, { "epoch": 0.6952178231871718, "grad_norm": 0.74609375, "learning_rate": 1.4641064954110969e-05, "loss": 0.3162, "step": 9186 }, { "epoch": 0.6952935055106192, "grad_norm": 0.796875, "learning_rate": 1.4640009887039279e-05, "loss": 0.358, "step": 9187 }, { "epoch": 0.6953691878340665, "grad_norm": 0.8203125, "learning_rate": 1.4638954754141084e-05, "loss": 0.353, "step": 9188 }, { "epoch": 0.6954448701575139, "grad_norm": 1.078125, "learning_rate": 1.4637899555431352e-05, "loss": 0.3647, "step": 9189 }, { "epoch": 0.6955205524809611, "grad_norm": 0.75390625, "learning_rate": 1.463684429092506e-05, "loss": 0.3207, "step": 9190 }, { "epoch": 0.6955962348044085, "grad_norm": 0.7265625, "learning_rate": 1.4635788960637174e-05, "loss": 0.29, "step": 9191 }, { "epoch": 0.6956719171278558, "grad_norm": 0.8125, "learning_rate": 1.4634733564582666e-05, "loss": 0.3391, "step": 9192 }, { "epoch": 0.6957475994513032, "grad_norm": 0.79296875, "learning_rate": 1.4633678102776508e-05, "loss": 0.3282, "step": 9193 }, { "epoch": 0.6958232817747505, "grad_norm": 0.83984375, "learning_rate": 1.4632622575233674e-05, "loss": 0.3348, "step": 9194 }, { "epoch": 0.6958989640981978, "grad_norm": 1.125, "learning_rate": 1.4631566981969139e-05, "loss": 0.3091, "step": 9195 }, { "epoch": 0.6959746464216452, "grad_norm": 0.76953125, "learning_rate": 1.463051132299788e-05, "loss": 0.3056, "step": 9196 }, { "epoch": 0.6960503287450924, "grad_norm": 0.78125, "learning_rate": 1.4629455598334871e-05, "loss": 0.3227, "step": 9197 }, { "epoch": 0.6961260110685398, "grad_norm": 1.453125, "learning_rate": 1.462839980799509e-05, "loss": 0.3846, "step": 9198 }, { "epoch": 0.6962016933919871, "grad_norm": 0.78515625, "learning_rate": 1.4627343951993513e-05, "loss": 0.3255, "step": 9199 }, { "epoch": 0.6962773757154345, "grad_norm": 0.76953125, "learning_rate": 1.4626288030345124e-05, "loss": 0.3094, "step": 9200 }, { "epoch": 0.6963530580388818, "grad_norm": 0.84765625, "learning_rate": 1.4625232043064901e-05, "loss": 0.3794, "step": 9201 }, { "epoch": 0.6964287403623292, "grad_norm": 0.77734375, "learning_rate": 1.4624175990167824e-05, "loss": 0.3384, "step": 9202 }, { "epoch": 0.6965044226857765, "grad_norm": 0.8125, "learning_rate": 1.4623119871668871e-05, "loss": 0.3483, "step": 9203 }, { "epoch": 0.6965801050092237, "grad_norm": 0.73828125, "learning_rate": 1.4622063687583033e-05, "loss": 0.2987, "step": 9204 }, { "epoch": 0.6966557873326711, "grad_norm": 0.71875, "learning_rate": 1.4621007437925287e-05, "loss": 0.2768, "step": 9205 }, { "epoch": 0.6967314696561184, "grad_norm": 0.7578125, "learning_rate": 1.4619951122710624e-05, "loss": 0.3454, "step": 9206 }, { "epoch": 0.6968071519795658, "grad_norm": 0.828125, "learning_rate": 1.4618894741954024e-05, "loss": 0.3351, "step": 9207 }, { "epoch": 0.6968828343030131, "grad_norm": 0.9375, "learning_rate": 1.4617838295670477e-05, "loss": 0.322, "step": 9208 }, { "epoch": 0.6969585166264605, "grad_norm": 0.71875, "learning_rate": 1.4616781783874967e-05, "loss": 0.3006, "step": 9209 }, { "epoch": 0.6970341989499078, "grad_norm": 0.76171875, "learning_rate": 1.4615725206582487e-05, "loss": 0.3237, "step": 9210 }, { "epoch": 0.697109881273355, "grad_norm": 1.078125, "learning_rate": 1.4614668563808025e-05, "loss": 0.3684, "step": 9211 }, { "epoch": 0.6971855635968024, "grad_norm": 0.7578125, "learning_rate": 1.4613611855566567e-05, "loss": 0.3274, "step": 9212 }, { "epoch": 0.6972612459202497, "grad_norm": 0.74609375, "learning_rate": 1.461255508187311e-05, "loss": 0.3078, "step": 9213 }, { "epoch": 0.6973369282436971, "grad_norm": 0.96484375, "learning_rate": 1.4611498242742643e-05, "loss": 0.3336, "step": 9214 }, { "epoch": 0.6974126105671444, "grad_norm": 0.7265625, "learning_rate": 1.4610441338190163e-05, "loss": 0.3045, "step": 9215 }, { "epoch": 0.6974882928905918, "grad_norm": 0.75, "learning_rate": 1.4609384368230658e-05, "loss": 0.328, "step": 9216 }, { "epoch": 0.6975639752140391, "grad_norm": 0.796875, "learning_rate": 1.4608327332879124e-05, "loss": 0.3659, "step": 9217 }, { "epoch": 0.6976396575374864, "grad_norm": 0.75, "learning_rate": 1.460727023215056e-05, "loss": 0.3228, "step": 9218 }, { "epoch": 0.6977153398609337, "grad_norm": 0.76953125, "learning_rate": 1.460621306605996e-05, "loss": 0.3182, "step": 9219 }, { "epoch": 0.697791022184381, "grad_norm": 0.765625, "learning_rate": 1.460515583462233e-05, "loss": 0.3141, "step": 9220 }, { "epoch": 0.6978667045078284, "grad_norm": 0.8203125, "learning_rate": 1.4604098537852653e-05, "loss": 0.3462, "step": 9221 }, { "epoch": 0.6979423868312757, "grad_norm": 0.75, "learning_rate": 1.4603041175765941e-05, "loss": 0.2978, "step": 9222 }, { "epoch": 0.6980180691547231, "grad_norm": 0.7265625, "learning_rate": 1.460198374837719e-05, "loss": 0.2814, "step": 9223 }, { "epoch": 0.6980937514781704, "grad_norm": 0.76171875, "learning_rate": 1.4600926255701402e-05, "loss": 0.2952, "step": 9224 }, { "epoch": 0.6981694338016177, "grad_norm": 0.7578125, "learning_rate": 1.4599868697753583e-05, "loss": 0.3185, "step": 9225 }, { "epoch": 0.698245116125065, "grad_norm": 0.7421875, "learning_rate": 1.4598811074548732e-05, "loss": 0.2977, "step": 9226 }, { "epoch": 0.6983207984485124, "grad_norm": 0.8203125, "learning_rate": 1.459775338610185e-05, "loss": 0.3568, "step": 9227 }, { "epoch": 0.6983964807719597, "grad_norm": 0.80859375, "learning_rate": 1.4596695632427948e-05, "loss": 0.3565, "step": 9228 }, { "epoch": 0.698472163095407, "grad_norm": 1.1484375, "learning_rate": 1.4595637813542035e-05, "loss": 0.3844, "step": 9229 }, { "epoch": 0.6985478454188544, "grad_norm": 0.7890625, "learning_rate": 1.4594579929459107e-05, "loss": 0.3171, "step": 9230 }, { "epoch": 0.6986235277423017, "grad_norm": 0.78515625, "learning_rate": 1.4593521980194181e-05, "loss": 0.302, "step": 9231 }, { "epoch": 0.698699210065749, "grad_norm": 0.7734375, "learning_rate": 1.4592463965762262e-05, "loss": 0.3061, "step": 9232 }, { "epoch": 0.6987748923891963, "grad_norm": 0.84765625, "learning_rate": 1.459140588617836e-05, "loss": 0.3662, "step": 9233 }, { "epoch": 0.6988505747126437, "grad_norm": 0.7734375, "learning_rate": 1.4590347741457489e-05, "loss": 0.3269, "step": 9234 }, { "epoch": 0.698926257036091, "grad_norm": 0.74609375, "learning_rate": 1.4589289531614657e-05, "loss": 0.3003, "step": 9235 }, { "epoch": 0.6990019393595384, "grad_norm": 0.734375, "learning_rate": 1.4588231256664876e-05, "loss": 0.3049, "step": 9236 }, { "epoch": 0.6990776216829857, "grad_norm": 0.78515625, "learning_rate": 1.458717291662316e-05, "loss": 0.3272, "step": 9237 }, { "epoch": 0.699153304006433, "grad_norm": 0.73828125, "learning_rate": 1.4586114511504527e-05, "loss": 0.3205, "step": 9238 }, { "epoch": 0.6992289863298803, "grad_norm": 0.65625, "learning_rate": 1.4585056041323991e-05, "loss": 0.2552, "step": 9239 }, { "epoch": 0.6993046686533276, "grad_norm": 0.765625, "learning_rate": 1.4583997506096563e-05, "loss": 0.3199, "step": 9240 }, { "epoch": 0.699380350976775, "grad_norm": 0.80078125, "learning_rate": 1.4582938905837267e-05, "loss": 0.3501, "step": 9241 }, { "epoch": 0.6994560333002223, "grad_norm": 0.80078125, "learning_rate": 1.4581880240561113e-05, "loss": 0.3725, "step": 9242 }, { "epoch": 0.6995317156236697, "grad_norm": 0.74609375, "learning_rate": 1.4580821510283132e-05, "loss": 0.3165, "step": 9243 }, { "epoch": 0.699607397947117, "grad_norm": 0.73828125, "learning_rate": 1.4579762715018332e-05, "loss": 0.3064, "step": 9244 }, { "epoch": 0.6996830802705644, "grad_norm": 0.7578125, "learning_rate": 1.457870385478174e-05, "loss": 0.2957, "step": 9245 }, { "epoch": 0.6997587625940116, "grad_norm": 0.81640625, "learning_rate": 1.4577644929588378e-05, "loss": 0.3337, "step": 9246 }, { "epoch": 0.6998344449174589, "grad_norm": 0.7421875, "learning_rate": 1.4576585939453265e-05, "loss": 0.3029, "step": 9247 }, { "epoch": 0.6999101272409063, "grad_norm": 0.83984375, "learning_rate": 1.457552688439143e-05, "loss": 0.3007, "step": 9248 }, { "epoch": 0.6999858095643536, "grad_norm": 0.76953125, "learning_rate": 1.457446776441789e-05, "loss": 0.327, "step": 9249 }, { "epoch": 0.700061491887801, "grad_norm": 0.6796875, "learning_rate": 1.4573408579547676e-05, "loss": 0.2446, "step": 9250 }, { "epoch": 0.7001371742112483, "grad_norm": 0.82421875, "learning_rate": 1.4572349329795814e-05, "loss": 0.3611, "step": 9251 }, { "epoch": 0.7002128565346956, "grad_norm": 0.703125, "learning_rate": 1.4571290015177328e-05, "loss": 0.2884, "step": 9252 }, { "epoch": 0.7002885388581429, "grad_norm": 0.71875, "learning_rate": 1.4570230635707251e-05, "loss": 0.2919, "step": 9253 }, { "epoch": 0.7003642211815903, "grad_norm": 0.78125, "learning_rate": 1.4569171191400611e-05, "loss": 0.3352, "step": 9254 }, { "epoch": 0.7003642211815903, "eval_loss": 0.33362916111946106, "eval_runtime": 83.3144, "eval_samples_per_second": 58.357, "eval_steps_per_second": 58.357, "step": 9254 }, { "epoch": 0.7004399035050376, "grad_norm": 0.77734375, "learning_rate": 1.4568111682272434e-05, "loss": 0.3506, "step": 9255 }, { "epoch": 0.7005155858284849, "grad_norm": 0.77734375, "learning_rate": 1.4567052108337755e-05, "loss": 0.3164, "step": 9256 }, { "epoch": 0.7005912681519323, "grad_norm": 0.78515625, "learning_rate": 1.4565992469611606e-05, "loss": 0.3276, "step": 9257 }, { "epoch": 0.7006669504753796, "grad_norm": 0.7734375, "learning_rate": 1.4564932766109015e-05, "loss": 0.3345, "step": 9258 }, { "epoch": 0.7007426327988269, "grad_norm": 0.7421875, "learning_rate": 1.4563872997845018e-05, "loss": 0.2941, "step": 9259 }, { "epoch": 0.7008183151222742, "grad_norm": 0.796875, "learning_rate": 1.4562813164834656e-05, "loss": 0.3316, "step": 9260 }, { "epoch": 0.7008939974457216, "grad_norm": 0.8203125, "learning_rate": 1.4561753267092954e-05, "loss": 0.3901, "step": 9261 }, { "epoch": 0.7009696797691689, "grad_norm": 0.7890625, "learning_rate": 1.4560693304634956e-05, "loss": 0.3239, "step": 9262 }, { "epoch": 0.7010453620926163, "grad_norm": 0.7734375, "learning_rate": 1.4559633277475697e-05, "loss": 0.318, "step": 9263 }, { "epoch": 0.7011210444160636, "grad_norm": 0.77734375, "learning_rate": 1.4558573185630217e-05, "loss": 0.3008, "step": 9264 }, { "epoch": 0.701196726739511, "grad_norm": 0.80859375, "learning_rate": 1.455751302911355e-05, "loss": 0.3633, "step": 9265 }, { "epoch": 0.7012724090629582, "grad_norm": 0.77734375, "learning_rate": 1.4556452807940743e-05, "loss": 0.3349, "step": 9266 }, { "epoch": 0.7013480913864055, "grad_norm": 0.6875, "learning_rate": 1.4555392522126834e-05, "loss": 0.2496, "step": 9267 }, { "epoch": 0.7014237737098529, "grad_norm": 0.76171875, "learning_rate": 1.455433217168686e-05, "loss": 0.3172, "step": 9268 }, { "epoch": 0.7014994560333002, "grad_norm": 0.734375, "learning_rate": 1.4553271756635874e-05, "loss": 0.3085, "step": 9269 }, { "epoch": 0.7015751383567476, "grad_norm": 0.734375, "learning_rate": 1.4552211276988913e-05, "loss": 0.3133, "step": 9270 }, { "epoch": 0.7016508206801949, "grad_norm": 0.65234375, "learning_rate": 1.4551150732761022e-05, "loss": 0.2469, "step": 9271 }, { "epoch": 0.7017265030036423, "grad_norm": 0.85546875, "learning_rate": 1.4550090123967249e-05, "loss": 0.3936, "step": 9272 }, { "epoch": 0.7018021853270895, "grad_norm": 0.85546875, "learning_rate": 1.4549029450622638e-05, "loss": 0.3796, "step": 9273 }, { "epoch": 0.7018778676505368, "grad_norm": 0.7734375, "learning_rate": 1.4547968712742236e-05, "loss": 0.3411, "step": 9274 }, { "epoch": 0.7019535499739842, "grad_norm": 0.7890625, "learning_rate": 1.4546907910341096e-05, "loss": 0.3586, "step": 9275 }, { "epoch": 0.7020292322974315, "grad_norm": 0.7890625, "learning_rate": 1.4545847043434266e-05, "loss": 0.3123, "step": 9276 }, { "epoch": 0.7021049146208789, "grad_norm": 0.72265625, "learning_rate": 1.4544786112036792e-05, "loss": 0.2934, "step": 9277 }, { "epoch": 0.7021805969443262, "grad_norm": 0.80078125, "learning_rate": 1.454372511616373e-05, "loss": 0.361, "step": 9278 }, { "epoch": 0.7022562792677736, "grad_norm": 0.7109375, "learning_rate": 1.4542664055830129e-05, "loss": 0.2675, "step": 9279 }, { "epoch": 0.7023319615912208, "grad_norm": 0.796875, "learning_rate": 1.454160293105104e-05, "loss": 0.3468, "step": 9280 }, { "epoch": 0.7024076439146681, "grad_norm": 0.84765625, "learning_rate": 1.4540541741841524e-05, "loss": 0.3279, "step": 9281 }, { "epoch": 0.7024833262381155, "grad_norm": 0.78125, "learning_rate": 1.453948048821663e-05, "loss": 0.3077, "step": 9282 }, { "epoch": 0.7025590085615628, "grad_norm": 0.7421875, "learning_rate": 1.4538419170191413e-05, "loss": 0.2959, "step": 9283 }, { "epoch": 0.7026346908850102, "grad_norm": 0.70703125, "learning_rate": 1.4537357787780937e-05, "loss": 0.3062, "step": 9284 }, { "epoch": 0.7027103732084575, "grad_norm": 0.71484375, "learning_rate": 1.4536296341000253e-05, "loss": 0.2817, "step": 9285 }, { "epoch": 0.7027860555319049, "grad_norm": 0.75390625, "learning_rate": 1.4535234829864417e-05, "loss": 0.3094, "step": 9286 }, { "epoch": 0.7028617378553521, "grad_norm": 0.7890625, "learning_rate": 1.4534173254388495e-05, "loss": 0.3309, "step": 9287 }, { "epoch": 0.7029374201787995, "grad_norm": 0.71484375, "learning_rate": 1.4533111614587545e-05, "loss": 0.27, "step": 9288 }, { "epoch": 0.7030131025022468, "grad_norm": 0.71875, "learning_rate": 1.4532049910476625e-05, "loss": 0.2844, "step": 9289 }, { "epoch": 0.7030887848256941, "grad_norm": 0.78125, "learning_rate": 1.4530988142070802e-05, "loss": 0.3485, "step": 9290 }, { "epoch": 0.7031644671491415, "grad_norm": 1.0546875, "learning_rate": 1.4529926309385139e-05, "loss": 0.3307, "step": 9291 }, { "epoch": 0.7032401494725888, "grad_norm": 0.734375, "learning_rate": 1.4528864412434691e-05, "loss": 0.282, "step": 9292 }, { "epoch": 0.7033158317960362, "grad_norm": 0.75390625, "learning_rate": 1.4527802451234535e-05, "loss": 0.2918, "step": 9293 }, { "epoch": 0.7033915141194834, "grad_norm": 0.84375, "learning_rate": 1.4526740425799731e-05, "loss": 0.3615, "step": 9294 }, { "epoch": 0.7034671964429308, "grad_norm": 0.75, "learning_rate": 1.4525678336145344e-05, "loss": 0.3068, "step": 9295 }, { "epoch": 0.7035428787663781, "grad_norm": 0.76953125, "learning_rate": 1.4524616182286444e-05, "loss": 0.312, "step": 9296 }, { "epoch": 0.7036185610898255, "grad_norm": 0.8046875, "learning_rate": 1.4523553964238098e-05, "loss": 0.3211, "step": 9297 }, { "epoch": 0.7036942434132728, "grad_norm": 0.7109375, "learning_rate": 1.4522491682015377e-05, "loss": 0.2636, "step": 9298 }, { "epoch": 0.7037699257367201, "grad_norm": 0.828125, "learning_rate": 1.4521429335633352e-05, "loss": 0.3486, "step": 9299 }, { "epoch": 0.7038456080601675, "grad_norm": 0.78125, "learning_rate": 1.452036692510709e-05, "loss": 0.3276, "step": 9300 }, { "epoch": 0.7039212903836147, "grad_norm": 0.796875, "learning_rate": 1.4519304450451668e-05, "loss": 0.351, "step": 9301 }, { "epoch": 0.7039969727070621, "grad_norm": 0.734375, "learning_rate": 1.4518241911682157e-05, "loss": 0.3052, "step": 9302 }, { "epoch": 0.7040726550305094, "grad_norm": 0.76171875, "learning_rate": 1.4517179308813631e-05, "loss": 0.331, "step": 9303 }, { "epoch": 0.7041483373539568, "grad_norm": 0.94140625, "learning_rate": 1.4516116641861164e-05, "loss": 0.3361, "step": 9304 }, { "epoch": 0.7042240196774041, "grad_norm": 0.8203125, "learning_rate": 1.4515053910839833e-05, "loss": 0.3415, "step": 9305 }, { "epoch": 0.7042997020008515, "grad_norm": 0.828125, "learning_rate": 1.4513991115764717e-05, "loss": 0.36, "step": 9306 }, { "epoch": 0.7043753843242988, "grad_norm": 0.7734375, "learning_rate": 1.4512928256650884e-05, "loss": 0.3308, "step": 9307 }, { "epoch": 0.704451066647746, "grad_norm": 0.72265625, "learning_rate": 1.4511865333513424e-05, "loss": 0.2899, "step": 9308 }, { "epoch": 0.7045267489711934, "grad_norm": 1.1796875, "learning_rate": 1.4510802346367409e-05, "loss": 0.3904, "step": 9309 }, { "epoch": 0.7046024312946407, "grad_norm": 0.7265625, "learning_rate": 1.4509739295227924e-05, "loss": 0.2988, "step": 9310 }, { "epoch": 0.7046781136180881, "grad_norm": 0.8203125, "learning_rate": 1.4508676180110048e-05, "loss": 0.3621, "step": 9311 }, { "epoch": 0.7047537959415354, "grad_norm": 0.70703125, "learning_rate": 1.4507613001028864e-05, "loss": 0.2726, "step": 9312 }, { "epoch": 0.7048294782649828, "grad_norm": 0.796875, "learning_rate": 1.4506549757999456e-05, "loss": 0.3405, "step": 9313 }, { "epoch": 0.7049051605884301, "grad_norm": 0.7890625, "learning_rate": 1.4505486451036901e-05, "loss": 0.3336, "step": 9314 }, { "epoch": 0.7049808429118773, "grad_norm": 0.80859375, "learning_rate": 1.450442308015629e-05, "loss": 0.3795, "step": 9315 }, { "epoch": 0.7050565252353247, "grad_norm": 0.74609375, "learning_rate": 1.450335964537271e-05, "loss": 0.3085, "step": 9316 }, { "epoch": 0.705132207558772, "grad_norm": 0.67578125, "learning_rate": 1.4502296146701245e-05, "loss": 0.2713, "step": 9317 }, { "epoch": 0.7052078898822194, "grad_norm": 0.85546875, "learning_rate": 1.4501232584156984e-05, "loss": 0.3499, "step": 9318 }, { "epoch": 0.7052835722056667, "grad_norm": 0.7890625, "learning_rate": 1.4500168957755012e-05, "loss": 0.33, "step": 9319 }, { "epoch": 0.7053592545291141, "grad_norm": 0.80859375, "learning_rate": 1.449910526751042e-05, "loss": 0.3205, "step": 9320 }, { "epoch": 0.7054349368525614, "grad_norm": 0.81640625, "learning_rate": 1.44980415134383e-05, "loss": 0.2836, "step": 9321 }, { "epoch": 0.7055106191760087, "grad_norm": 0.7890625, "learning_rate": 1.4496977695553743e-05, "loss": 0.3448, "step": 9322 }, { "epoch": 0.705586301499456, "grad_norm": 0.7421875, "learning_rate": 1.4495913813871843e-05, "loss": 0.289, "step": 9323 }, { "epoch": 0.7056619838229033, "grad_norm": 0.7421875, "learning_rate": 1.4494849868407686e-05, "loss": 0.3353, "step": 9324 }, { "epoch": 0.7057376661463507, "grad_norm": 0.79296875, "learning_rate": 1.4493785859176373e-05, "loss": 0.3521, "step": 9325 }, { "epoch": 0.705813348469798, "grad_norm": 0.77734375, "learning_rate": 1.4492721786192993e-05, "loss": 0.3343, "step": 9326 }, { "epoch": 0.7058890307932454, "grad_norm": 0.734375, "learning_rate": 1.4491657649472647e-05, "loss": 0.3072, "step": 9327 }, { "epoch": 0.7059647131166927, "grad_norm": 0.90234375, "learning_rate": 1.449059344903043e-05, "loss": 0.3825, "step": 9328 }, { "epoch": 0.70604039544014, "grad_norm": 0.78515625, "learning_rate": 1.4489529184881436e-05, "loss": 0.3367, "step": 9329 }, { "epoch": 0.7061160777635873, "grad_norm": 0.7734375, "learning_rate": 1.4488464857040767e-05, "loss": 0.2835, "step": 9330 }, { "epoch": 0.7061917600870347, "grad_norm": 0.6796875, "learning_rate": 1.4487400465523522e-05, "loss": 0.2558, "step": 9331 }, { "epoch": 0.706267442410482, "grad_norm": 0.7734375, "learning_rate": 1.4486336010344804e-05, "loss": 0.3501, "step": 9332 }, { "epoch": 0.7063431247339294, "grad_norm": 0.7265625, "learning_rate": 1.4485271491519706e-05, "loss": 0.304, "step": 9333 }, { "epoch": 0.7064188070573767, "grad_norm": 1.125, "learning_rate": 1.4484206909063336e-05, "loss": 0.3262, "step": 9334 }, { "epoch": 0.706494489380824, "grad_norm": 0.7421875, "learning_rate": 1.44831422629908e-05, "loss": 0.3109, "step": 9335 }, { "epoch": 0.7065701717042713, "grad_norm": 0.76953125, "learning_rate": 1.448207755331719e-05, "loss": 0.3192, "step": 9336 }, { "epoch": 0.7066458540277186, "grad_norm": 0.84375, "learning_rate": 1.4481012780057625e-05, "loss": 0.3583, "step": 9337 }, { "epoch": 0.706721536351166, "grad_norm": 0.78515625, "learning_rate": 1.4479947943227201e-05, "loss": 0.3233, "step": 9338 }, { "epoch": 0.7067972186746133, "grad_norm": 1.8515625, "learning_rate": 1.4478883042841027e-05, "loss": 0.3682, "step": 9339 }, { "epoch": 0.7068729009980607, "grad_norm": 0.75390625, "learning_rate": 1.4477818078914214e-05, "loss": 0.2958, "step": 9340 }, { "epoch": 0.706948583321508, "grad_norm": 0.734375, "learning_rate": 1.4476753051461868e-05, "loss": 0.309, "step": 9341 }, { "epoch": 0.7070242656449554, "grad_norm": 0.7109375, "learning_rate": 1.4475687960499093e-05, "loss": 0.2734, "step": 9342 }, { "epoch": 0.7070999479684026, "grad_norm": 0.75390625, "learning_rate": 1.4474622806041005e-05, "loss": 0.3137, "step": 9343 }, { "epoch": 0.7071756302918499, "grad_norm": 0.90625, "learning_rate": 1.4473557588102716e-05, "loss": 0.3624, "step": 9344 }, { "epoch": 0.7072513126152973, "grad_norm": 0.75390625, "learning_rate": 1.4472492306699333e-05, "loss": 0.3026, "step": 9345 }, { "epoch": 0.7073269949387446, "grad_norm": 0.71484375, "learning_rate": 1.4471426961845976e-05, "loss": 0.2851, "step": 9346 }, { "epoch": 0.707402677262192, "grad_norm": 0.73828125, "learning_rate": 1.447036155355775e-05, "loss": 0.3325, "step": 9347 }, { "epoch": 0.7074783595856393, "grad_norm": 0.72265625, "learning_rate": 1.4469296081849776e-05, "loss": 0.3054, "step": 9348 }, { "epoch": 0.7075540419090867, "grad_norm": 0.7734375, "learning_rate": 1.4468230546737165e-05, "loss": 0.3421, "step": 9349 }, { "epoch": 0.7076297242325339, "grad_norm": 0.84765625, "learning_rate": 1.446716494823504e-05, "loss": 0.3746, "step": 9350 }, { "epoch": 0.7077054065559812, "grad_norm": 0.75390625, "learning_rate": 1.446609928635851e-05, "loss": 0.3192, "step": 9351 }, { "epoch": 0.7077810888794286, "grad_norm": 0.7265625, "learning_rate": 1.4465033561122702e-05, "loss": 0.3139, "step": 9352 }, { "epoch": 0.7078567712028759, "grad_norm": 0.73828125, "learning_rate": 1.4463967772542726e-05, "loss": 0.3016, "step": 9353 }, { "epoch": 0.7079324535263233, "grad_norm": 1.0390625, "learning_rate": 1.446290192063371e-05, "loss": 0.3441, "step": 9354 }, { "epoch": 0.7080081358497706, "grad_norm": 0.80859375, "learning_rate": 1.4461836005410771e-05, "loss": 0.3578, "step": 9355 }, { "epoch": 0.708083818173218, "grad_norm": 0.74609375, "learning_rate": 1.446077002688903e-05, "loss": 0.3342, "step": 9356 }, { "epoch": 0.7081595004966652, "grad_norm": 0.828125, "learning_rate": 1.4459703985083613e-05, "loss": 0.3562, "step": 9357 }, { "epoch": 0.7082351828201126, "grad_norm": 0.78125, "learning_rate": 1.4458637880009641e-05, "loss": 0.3415, "step": 9358 }, { "epoch": 0.7083108651435599, "grad_norm": 0.75390625, "learning_rate": 1.4457571711682238e-05, "loss": 0.324, "step": 9359 }, { "epoch": 0.7083865474670072, "grad_norm": 0.80859375, "learning_rate": 1.4456505480116533e-05, "loss": 0.3209, "step": 9360 }, { "epoch": 0.7084622297904546, "grad_norm": 0.765625, "learning_rate": 1.4455439185327652e-05, "loss": 0.3243, "step": 9361 }, { "epoch": 0.7085379121139019, "grad_norm": 0.75, "learning_rate": 1.4454372827330719e-05, "loss": 0.3006, "step": 9362 }, { "epoch": 0.7086135944373493, "grad_norm": 0.76171875, "learning_rate": 1.4453306406140859e-05, "loss": 0.3325, "step": 9363 }, { "epoch": 0.7086892767607965, "grad_norm": 0.95703125, "learning_rate": 1.4452239921773209e-05, "loss": 0.3001, "step": 9364 }, { "epoch": 0.7087649590842439, "grad_norm": 0.80078125, "learning_rate": 1.4451173374242894e-05, "loss": 0.3638, "step": 9365 }, { "epoch": 0.7088406414076912, "grad_norm": 0.7890625, "learning_rate": 1.445010676356505e-05, "loss": 0.3537, "step": 9366 }, { "epoch": 0.7089163237311386, "grad_norm": 0.7578125, "learning_rate": 1.4449040089754802e-05, "loss": 0.3322, "step": 9367 }, { "epoch": 0.7089920060545859, "grad_norm": 0.75390625, "learning_rate": 1.4447973352827287e-05, "loss": 0.3398, "step": 9368 }, { "epoch": 0.7090676883780332, "grad_norm": 0.7421875, "learning_rate": 1.4446906552797635e-05, "loss": 0.3139, "step": 9369 }, { "epoch": 0.7091433707014806, "grad_norm": 0.81640625, "learning_rate": 1.4445839689680985e-05, "loss": 0.3407, "step": 9370 }, { "epoch": 0.7092190530249278, "grad_norm": 0.8046875, "learning_rate": 1.4444772763492466e-05, "loss": 0.3539, "step": 9371 }, { "epoch": 0.7092947353483752, "grad_norm": 0.74609375, "learning_rate": 1.4443705774247219e-05, "loss": 0.326, "step": 9372 }, { "epoch": 0.7093704176718225, "grad_norm": 0.7890625, "learning_rate": 1.4442638721960381e-05, "loss": 0.34, "step": 9373 }, { "epoch": 0.7094460999952699, "grad_norm": 0.78515625, "learning_rate": 1.4441571606647089e-05, "loss": 0.3397, "step": 9374 }, { "epoch": 0.7095217823187172, "grad_norm": 0.80859375, "learning_rate": 1.4440504428322482e-05, "loss": 0.3248, "step": 9375 }, { "epoch": 0.7095974646421646, "grad_norm": 0.72265625, "learning_rate": 1.4439437187001699e-05, "loss": 0.2781, "step": 9376 }, { "epoch": 0.7096731469656118, "grad_norm": 0.765625, "learning_rate": 1.4438369882699879e-05, "loss": 0.3304, "step": 9377 }, { "epoch": 0.7097488292890591, "grad_norm": 0.75390625, "learning_rate": 1.4437302515432168e-05, "loss": 0.2893, "step": 9378 }, { "epoch": 0.7098245116125065, "grad_norm": 0.77734375, "learning_rate": 1.443623508521371e-05, "loss": 0.3357, "step": 9379 }, { "epoch": 0.7099001939359538, "grad_norm": 0.75, "learning_rate": 1.4435167592059638e-05, "loss": 0.326, "step": 9380 }, { "epoch": 0.7099758762594012, "grad_norm": 0.6953125, "learning_rate": 1.4434100035985105e-05, "loss": 0.2847, "step": 9381 }, { "epoch": 0.7100515585828485, "grad_norm": 0.7421875, "learning_rate": 1.4433032417005255e-05, "loss": 0.3063, "step": 9382 }, { "epoch": 0.7101272409062959, "grad_norm": 0.76171875, "learning_rate": 1.4431964735135231e-05, "loss": 0.3186, "step": 9383 }, { "epoch": 0.7102029232297431, "grad_norm": 0.75, "learning_rate": 1.4430896990390184e-05, "loss": 0.3045, "step": 9384 }, { "epoch": 0.7102786055531904, "grad_norm": 0.77734375, "learning_rate": 1.4429829182785258e-05, "loss": 0.3328, "step": 9385 }, { "epoch": 0.7103542878766378, "grad_norm": 0.76171875, "learning_rate": 1.4428761312335601e-05, "loss": 0.2961, "step": 9386 }, { "epoch": 0.7104299702000851, "grad_norm": 0.80859375, "learning_rate": 1.4427693379056366e-05, "loss": 0.3227, "step": 9387 }, { "epoch": 0.7105056525235325, "grad_norm": 0.76953125, "learning_rate": 1.4426625382962706e-05, "loss": 0.3313, "step": 9388 }, { "epoch": 0.7105813348469798, "grad_norm": 0.7734375, "learning_rate": 1.4425557324069766e-05, "loss": 0.3278, "step": 9389 }, { "epoch": 0.7106570171704272, "grad_norm": 0.7578125, "learning_rate": 1.4424489202392699e-05, "loss": 0.3102, "step": 9390 }, { "epoch": 0.7107326994938744, "grad_norm": 0.765625, "learning_rate": 1.4423421017946662e-05, "loss": 0.3245, "step": 9391 }, { "epoch": 0.7108083818173218, "grad_norm": 0.72265625, "learning_rate": 1.4422352770746807e-05, "loss": 0.2737, "step": 9392 }, { "epoch": 0.7108840641407691, "grad_norm": 0.8125, "learning_rate": 1.442128446080829e-05, "loss": 0.3373, "step": 9393 }, { "epoch": 0.7109597464642164, "grad_norm": 1.0546875, "learning_rate": 1.4420216088146263e-05, "loss": 0.3658, "step": 9394 }, { "epoch": 0.7110354287876638, "grad_norm": 0.76171875, "learning_rate": 1.4419147652775887e-05, "loss": 0.3055, "step": 9395 }, { "epoch": 0.7111111111111111, "grad_norm": 0.84375, "learning_rate": 1.4418079154712317e-05, "loss": 0.3502, "step": 9396 }, { "epoch": 0.7111867934345585, "grad_norm": 0.7734375, "learning_rate": 1.4417010593970716e-05, "loss": 0.3305, "step": 9397 }, { "epoch": 0.7112624757580057, "grad_norm": 0.8046875, "learning_rate": 1.4415941970566234e-05, "loss": 0.323, "step": 9398 }, { "epoch": 0.7113381580814531, "grad_norm": 0.7890625, "learning_rate": 1.4414873284514041e-05, "loss": 0.3398, "step": 9399 }, { "epoch": 0.7114138404049004, "grad_norm": 0.80078125, "learning_rate": 1.4413804535829295e-05, "loss": 0.346, "step": 9400 }, { "epoch": 0.7114895227283478, "grad_norm": 0.6328125, "learning_rate": 1.4412735724527154e-05, "loss": 0.2317, "step": 9401 }, { "epoch": 0.7115652050517951, "grad_norm": 0.81640625, "learning_rate": 1.4411666850622788e-05, "loss": 0.3365, "step": 9402 }, { "epoch": 0.7116408873752424, "grad_norm": 0.73828125, "learning_rate": 1.4410597914131354e-05, "loss": 0.3088, "step": 9403 }, { "epoch": 0.7117165696986898, "grad_norm": 0.78125, "learning_rate": 1.440952891506802e-05, "loss": 0.318, "step": 9404 }, { "epoch": 0.711792252022137, "grad_norm": 0.73828125, "learning_rate": 1.4408459853447952e-05, "loss": 0.2996, "step": 9405 }, { "epoch": 0.7118679343455844, "grad_norm": 0.828125, "learning_rate": 1.4407390729286318e-05, "loss": 0.3319, "step": 9406 }, { "epoch": 0.7119436166690317, "grad_norm": 0.7890625, "learning_rate": 1.440632154259828e-05, "loss": 0.3537, "step": 9407 }, { "epoch": 0.7120192989924791, "grad_norm": 0.76953125, "learning_rate": 1.440525229339901e-05, "loss": 0.2996, "step": 9408 }, { "epoch": 0.7120949813159264, "grad_norm": 0.7734375, "learning_rate": 1.4404182981703679e-05, "loss": 0.3389, "step": 9409 }, { "epoch": 0.7121706636393738, "grad_norm": 0.734375, "learning_rate": 1.4403113607527451e-05, "loss": 0.3242, "step": 9410 }, { "epoch": 0.7122463459628211, "grad_norm": 0.7578125, "learning_rate": 1.4402044170885503e-05, "loss": 0.3175, "step": 9411 }, { "epoch": 0.7123220282862683, "grad_norm": 0.83203125, "learning_rate": 1.4400974671793002e-05, "loss": 0.3808, "step": 9412 }, { "epoch": 0.7123977106097157, "grad_norm": 0.76953125, "learning_rate": 1.4399905110265124e-05, "loss": 0.3254, "step": 9413 }, { "epoch": 0.712473392933163, "grad_norm": 0.7265625, "learning_rate": 1.4398835486317042e-05, "loss": 0.2928, "step": 9414 }, { "epoch": 0.7125490752566104, "grad_norm": 0.7578125, "learning_rate": 1.439776579996393e-05, "loss": 0.3197, "step": 9415 }, { "epoch": 0.7126247575800577, "grad_norm": 0.8125, "learning_rate": 1.4396696051220965e-05, "loss": 0.3718, "step": 9416 }, { "epoch": 0.7127004399035051, "grad_norm": 0.73046875, "learning_rate": 1.439562624010332e-05, "loss": 0.2885, "step": 9417 }, { "epoch": 0.7127761222269524, "grad_norm": 0.76953125, "learning_rate": 1.4394556366626174e-05, "loss": 0.3199, "step": 9418 }, { "epoch": 0.7128518045503996, "grad_norm": 0.79296875, "learning_rate": 1.4393486430804701e-05, "loss": 0.3339, "step": 9419 }, { "epoch": 0.712927486873847, "grad_norm": 0.69921875, "learning_rate": 1.4392416432654086e-05, "loss": 0.2858, "step": 9420 }, { "epoch": 0.7130031691972943, "grad_norm": 0.73046875, "learning_rate": 1.4391346372189507e-05, "loss": 0.2819, "step": 9421 }, { "epoch": 0.7130788515207417, "grad_norm": 0.78125, "learning_rate": 1.4390276249426145e-05, "loss": 0.3188, "step": 9422 }, { "epoch": 0.713154533844189, "grad_norm": 0.78125, "learning_rate": 1.438920606437918e-05, "loss": 0.2939, "step": 9423 }, { "epoch": 0.7132302161676364, "grad_norm": 0.73046875, "learning_rate": 1.4388135817063793e-05, "loss": 0.2726, "step": 9424 }, { "epoch": 0.7133058984910837, "grad_norm": 0.73828125, "learning_rate": 1.438706550749517e-05, "loss": 0.3124, "step": 9425 }, { "epoch": 0.713381580814531, "grad_norm": 0.7265625, "learning_rate": 1.4385995135688495e-05, "loss": 0.31, "step": 9426 }, { "epoch": 0.7134572631379783, "grad_norm": 0.78515625, "learning_rate": 1.4384924701658948e-05, "loss": 0.3547, "step": 9427 }, { "epoch": 0.7135329454614256, "grad_norm": 0.78125, "learning_rate": 1.4383854205421726e-05, "loss": 0.3468, "step": 9428 }, { "epoch": 0.713608627784873, "grad_norm": 0.77734375, "learning_rate": 1.4382783646992006e-05, "loss": 0.3132, "step": 9429 }, { "epoch": 0.7136843101083203, "grad_norm": 0.77734375, "learning_rate": 1.438171302638498e-05, "loss": 0.3131, "step": 9430 }, { "epoch": 0.7137599924317677, "grad_norm": 0.734375, "learning_rate": 1.4380642343615834e-05, "loss": 0.2923, "step": 9431 }, { "epoch": 0.713835674755215, "grad_norm": 0.69921875, "learning_rate": 1.437957159869976e-05, "loss": 0.2918, "step": 9432 }, { "epoch": 0.7139113570786623, "grad_norm": 0.71484375, "learning_rate": 1.4378500791651944e-05, "loss": 0.2975, "step": 9433 }, { "epoch": 0.7139870394021096, "grad_norm": 1.0546875, "learning_rate": 1.4377429922487584e-05, "loss": 0.3315, "step": 9434 }, { "epoch": 0.714062721725557, "grad_norm": 0.8203125, "learning_rate": 1.4376358991221869e-05, "loss": 0.3436, "step": 9435 }, { "epoch": 0.7141384040490043, "grad_norm": 0.74609375, "learning_rate": 1.4375287997869988e-05, "loss": 0.317, "step": 9436 }, { "epoch": 0.7142140863724517, "grad_norm": 0.69921875, "learning_rate": 1.4374216942447144e-05, "loss": 0.2791, "step": 9437 }, { "epoch": 0.714289768695899, "grad_norm": 0.7421875, "learning_rate": 1.4373145824968521e-05, "loss": 0.3259, "step": 9438 }, { "epoch": 0.7143654510193463, "grad_norm": 1.1484375, "learning_rate": 1.4372074645449322e-05, "loss": 0.353, "step": 9439 }, { "epoch": 0.7144411333427936, "grad_norm": 0.91015625, "learning_rate": 1.4371003403904742e-05, "loss": 0.3557, "step": 9440 }, { "epoch": 0.7145168156662409, "grad_norm": 0.78515625, "learning_rate": 1.4369932100349977e-05, "loss": 0.3499, "step": 9441 }, { "epoch": 0.7145924979896883, "grad_norm": 0.80078125, "learning_rate": 1.4368860734800226e-05, "loss": 0.3251, "step": 9442 }, { "epoch": 0.7146681803131356, "grad_norm": 0.71875, "learning_rate": 1.4367789307270688e-05, "loss": 0.3152, "step": 9443 }, { "epoch": 0.714743862636583, "grad_norm": 0.76953125, "learning_rate": 1.4366717817776567e-05, "loss": 0.3595, "step": 9444 }, { "epoch": 0.7148195449600303, "grad_norm": 0.765625, "learning_rate": 1.4365646266333055e-05, "loss": 0.3254, "step": 9445 }, { "epoch": 0.7148952272834777, "grad_norm": 0.76171875, "learning_rate": 1.4364574652955361e-05, "loss": 0.3353, "step": 9446 }, { "epoch": 0.7149709096069249, "grad_norm": 0.7421875, "learning_rate": 1.4363502977658685e-05, "loss": 0.3085, "step": 9447 }, { "epoch": 0.7150465919303722, "grad_norm": 0.734375, "learning_rate": 1.4362431240458232e-05, "loss": 0.2974, "step": 9448 }, { "epoch": 0.7151222742538196, "grad_norm": 0.76953125, "learning_rate": 1.4361359441369206e-05, "loss": 0.3319, "step": 9449 }, { "epoch": 0.7151979565772669, "grad_norm": 0.80859375, "learning_rate": 1.4360287580406813e-05, "loss": 0.3619, "step": 9450 }, { "epoch": 0.7152736389007143, "grad_norm": 0.76953125, "learning_rate": 1.4359215657586257e-05, "loss": 0.3162, "step": 9451 }, { "epoch": 0.7153493212241616, "grad_norm": 0.74609375, "learning_rate": 1.4358143672922747e-05, "loss": 0.3028, "step": 9452 }, { "epoch": 0.715425003547609, "grad_norm": 0.7109375, "learning_rate": 1.4357071626431489e-05, "loss": 0.2855, "step": 9453 }, { "epoch": 0.7155006858710562, "grad_norm": 0.7421875, "learning_rate": 1.4355999518127692e-05, "loss": 0.3339, "step": 9454 }, { "epoch": 0.7155763681945035, "grad_norm": 0.75, "learning_rate": 1.4354927348026569e-05, "loss": 0.2984, "step": 9455 }, { "epoch": 0.7156520505179509, "grad_norm": 0.79296875, "learning_rate": 1.4353855116143327e-05, "loss": 0.3374, "step": 9456 }, { "epoch": 0.7157277328413982, "grad_norm": 0.77734375, "learning_rate": 1.4352782822493176e-05, "loss": 0.3621, "step": 9457 }, { "epoch": 0.7158034151648456, "grad_norm": 0.7265625, "learning_rate": 1.4351710467091337e-05, "loss": 0.3221, "step": 9458 }, { "epoch": 0.7158790974882929, "grad_norm": 0.765625, "learning_rate": 1.4350638049953014e-05, "loss": 0.3483, "step": 9459 }, { "epoch": 0.7159547798117403, "grad_norm": 0.7578125, "learning_rate": 1.4349565571093423e-05, "loss": 0.3013, "step": 9460 }, { "epoch": 0.7160304621351875, "grad_norm": 0.76171875, "learning_rate": 1.4348493030527781e-05, "loss": 0.3217, "step": 9461 }, { "epoch": 0.7161061444586349, "grad_norm": 0.765625, "learning_rate": 1.4347420428271304e-05, "loss": 0.3301, "step": 9462 }, { "epoch": 0.7161818267820822, "grad_norm": 1.1953125, "learning_rate": 1.4346347764339208e-05, "loss": 0.3712, "step": 9463 }, { "epoch": 0.7162575091055295, "grad_norm": 0.75, "learning_rate": 1.434527503874671e-05, "loss": 0.2814, "step": 9464 }, { "epoch": 0.7163331914289769, "grad_norm": 0.765625, "learning_rate": 1.434420225150903e-05, "loss": 0.3286, "step": 9465 }, { "epoch": 0.7164088737524242, "grad_norm": 0.7734375, "learning_rate": 1.4343129402641383e-05, "loss": 0.3107, "step": 9466 }, { "epoch": 0.7164845560758716, "grad_norm": 0.703125, "learning_rate": 1.4342056492158998e-05, "loss": 0.2827, "step": 9467 }, { "epoch": 0.7165602383993188, "grad_norm": 0.71875, "learning_rate": 1.4340983520077083e-05, "loss": 0.3014, "step": 9468 }, { "epoch": 0.7166359207227662, "grad_norm": 0.796875, "learning_rate": 1.4339910486410873e-05, "loss": 0.3097, "step": 9469 }, { "epoch": 0.7167116030462135, "grad_norm": 0.76953125, "learning_rate": 1.4338837391175582e-05, "loss": 0.3198, "step": 9470 }, { "epoch": 0.7167872853696609, "grad_norm": 0.76171875, "learning_rate": 1.4337764234386439e-05, "loss": 0.3137, "step": 9471 }, { "epoch": 0.7168629676931082, "grad_norm": 0.765625, "learning_rate": 1.4336691016058665e-05, "loss": 0.308, "step": 9472 }, { "epoch": 0.7169386500165555, "grad_norm": 1.0234375, "learning_rate": 1.433561773620749e-05, "loss": 0.3335, "step": 9473 }, { "epoch": 0.7170143323400029, "grad_norm": 0.6953125, "learning_rate": 1.4334544394848134e-05, "loss": 0.2748, "step": 9474 }, { "epoch": 0.7170900146634501, "grad_norm": 0.76171875, "learning_rate": 1.4333470991995827e-05, "loss": 0.3069, "step": 9475 }, { "epoch": 0.7171656969868975, "grad_norm": 0.76953125, "learning_rate": 1.4332397527665799e-05, "loss": 0.3326, "step": 9476 }, { "epoch": 0.7172413793103448, "grad_norm": 0.8828125, "learning_rate": 1.4331324001873276e-05, "loss": 0.3977, "step": 9477 }, { "epoch": 0.7173170616337922, "grad_norm": 0.75, "learning_rate": 1.433025041463349e-05, "loss": 0.3234, "step": 9478 }, { "epoch": 0.7173927439572395, "grad_norm": 0.734375, "learning_rate": 1.4329176765961675e-05, "loss": 0.2785, "step": 9479 }, { "epoch": 0.7174684262806869, "grad_norm": 0.765625, "learning_rate": 1.4328103055873052e-05, "loss": 0.3109, "step": 9480 }, { "epoch": 0.7175441086041342, "grad_norm": 0.7265625, "learning_rate": 1.4327029284382863e-05, "loss": 0.3066, "step": 9481 }, { "epoch": 0.7176197909275814, "grad_norm": 0.7578125, "learning_rate": 1.4325955451506339e-05, "loss": 0.3252, "step": 9482 }, { "epoch": 0.7176954732510288, "grad_norm": 1.28125, "learning_rate": 1.4324881557258712e-05, "loss": 0.3777, "step": 9483 }, { "epoch": 0.7177711555744761, "grad_norm": 0.8203125, "learning_rate": 1.4323807601655218e-05, "loss": 0.3494, "step": 9484 }, { "epoch": 0.7178468378979235, "grad_norm": 0.7578125, "learning_rate": 1.4322733584711093e-05, "loss": 0.3055, "step": 9485 }, { "epoch": 0.7179225202213708, "grad_norm": 0.71875, "learning_rate": 1.4321659506441577e-05, "loss": 0.2921, "step": 9486 }, { "epoch": 0.7179982025448182, "grad_norm": 0.71875, "learning_rate": 1.4320585366861901e-05, "loss": 0.2825, "step": 9487 }, { "epoch": 0.7180738848682655, "grad_norm": 0.703125, "learning_rate": 1.431951116598731e-05, "loss": 0.2632, "step": 9488 }, { "epoch": 0.7181495671917127, "grad_norm": 0.7890625, "learning_rate": 1.4318436903833037e-05, "loss": 0.3335, "step": 9489 }, { "epoch": 0.7182252495151601, "grad_norm": 0.78515625, "learning_rate": 1.4317362580414327e-05, "loss": 0.3087, "step": 9490 }, { "epoch": 0.7183009318386074, "grad_norm": 0.8046875, "learning_rate": 1.4316288195746423e-05, "loss": 0.3271, "step": 9491 }, { "epoch": 0.7183766141620548, "grad_norm": 0.77734375, "learning_rate": 1.4315213749844562e-05, "loss": 0.3183, "step": 9492 }, { "epoch": 0.7184522964855021, "grad_norm": 0.7421875, "learning_rate": 1.4314139242723988e-05, "loss": 0.2982, "step": 9493 }, { "epoch": 0.7185279788089495, "grad_norm": 0.796875, "learning_rate": 1.4313064674399945e-05, "loss": 0.3392, "step": 9494 }, { "epoch": 0.7186036611323967, "grad_norm": 0.6796875, "learning_rate": 1.431199004488768e-05, "loss": 0.2586, "step": 9495 }, { "epoch": 0.718679343455844, "grad_norm": 0.76171875, "learning_rate": 1.4310915354202438e-05, "loss": 0.3064, "step": 9496 }, { "epoch": 0.7187550257792914, "grad_norm": 0.77734375, "learning_rate": 1.4309840602359462e-05, "loss": 0.3147, "step": 9497 }, { "epoch": 0.7188307081027387, "grad_norm": 0.7265625, "learning_rate": 1.4308765789374e-05, "loss": 0.3008, "step": 9498 }, { "epoch": 0.7189063904261861, "grad_norm": 0.76171875, "learning_rate": 1.4307690915261302e-05, "loss": 0.3112, "step": 9499 }, { "epoch": 0.7189820727496334, "grad_norm": 0.79296875, "learning_rate": 1.4306615980036618e-05, "loss": 0.3586, "step": 9500 }, { "epoch": 0.7190577550730808, "grad_norm": 0.7578125, "learning_rate": 1.4305540983715193e-05, "loss": 0.3426, "step": 9501 }, { "epoch": 0.719133437396528, "grad_norm": 0.73046875, "learning_rate": 1.4304465926312281e-05, "loss": 0.3338, "step": 9502 }, { "epoch": 0.7192091197199754, "grad_norm": 0.7578125, "learning_rate": 1.4303390807843135e-05, "loss": 0.3041, "step": 9503 }, { "epoch": 0.7192848020434227, "grad_norm": 0.73828125, "learning_rate": 1.4302315628323002e-05, "loss": 0.3025, "step": 9504 }, { "epoch": 0.7193604843668701, "grad_norm": 0.77734375, "learning_rate": 1.4301240387767144e-05, "loss": 0.3448, "step": 9505 }, { "epoch": 0.7194361666903174, "grad_norm": 0.765625, "learning_rate": 1.4300165086190807e-05, "loss": 0.3397, "step": 9506 }, { "epoch": 0.7195118490137647, "grad_norm": 0.7578125, "learning_rate": 1.4299089723609247e-05, "loss": 0.3017, "step": 9507 }, { "epoch": 0.7195875313372121, "grad_norm": 0.73046875, "learning_rate": 1.4298014300037726e-05, "loss": 0.3152, "step": 9508 }, { "epoch": 0.7196632136606593, "grad_norm": 0.74609375, "learning_rate": 1.4296938815491493e-05, "loss": 0.3021, "step": 9509 }, { "epoch": 0.7197388959841067, "grad_norm": 0.73828125, "learning_rate": 1.4295863269985813e-05, "loss": 0.2793, "step": 9510 }, { "epoch": 0.719814578307554, "grad_norm": 0.70703125, "learning_rate": 1.4294787663535936e-05, "loss": 0.2744, "step": 9511 }, { "epoch": 0.7198902606310014, "grad_norm": 0.8125, "learning_rate": 1.429371199615713e-05, "loss": 0.35, "step": 9512 }, { "epoch": 0.7199659429544487, "grad_norm": 0.72265625, "learning_rate": 1.4292636267864645e-05, "loss": 0.302, "step": 9513 }, { "epoch": 0.7200416252778961, "grad_norm": 0.7265625, "learning_rate": 1.4291560478673756e-05, "loss": 0.312, "step": 9514 }, { "epoch": 0.7201173076013434, "grad_norm": 0.69921875, "learning_rate": 1.4290484628599712e-05, "loss": 0.2946, "step": 9515 }, { "epoch": 0.7201929899247906, "grad_norm": 0.75, "learning_rate": 1.428940871765778e-05, "loss": 0.3065, "step": 9516 }, { "epoch": 0.720268672248238, "grad_norm": 0.74609375, "learning_rate": 1.4288332745863226e-05, "loss": 0.329, "step": 9517 }, { "epoch": 0.7203443545716853, "grad_norm": 0.80859375, "learning_rate": 1.4287256713231314e-05, "loss": 0.337, "step": 9518 }, { "epoch": 0.7204200368951327, "grad_norm": 0.80859375, "learning_rate": 1.4286180619777308e-05, "loss": 0.3242, "step": 9519 }, { "epoch": 0.72049571921858, "grad_norm": 0.7890625, "learning_rate": 1.4285104465516474e-05, "loss": 0.3181, "step": 9520 }, { "epoch": 0.7205714015420274, "grad_norm": 0.7734375, "learning_rate": 1.4284028250464082e-05, "loss": 0.3458, "step": 9521 }, { "epoch": 0.7206470838654747, "grad_norm": 0.7109375, "learning_rate": 1.4282951974635392e-05, "loss": 0.2676, "step": 9522 }, { "epoch": 0.720722766188922, "grad_norm": 0.71875, "learning_rate": 1.4281875638045683e-05, "loss": 0.2973, "step": 9523 }, { "epoch": 0.7207984485123693, "grad_norm": 0.76171875, "learning_rate": 1.4280799240710219e-05, "loss": 0.3411, "step": 9524 }, { "epoch": 0.7208741308358166, "grad_norm": 0.703125, "learning_rate": 1.4279722782644274e-05, "loss": 0.2784, "step": 9525 }, { "epoch": 0.720949813159264, "grad_norm": 0.7265625, "learning_rate": 1.4278646263863114e-05, "loss": 0.3126, "step": 9526 }, { "epoch": 0.7210254954827113, "grad_norm": 0.78515625, "learning_rate": 1.4277569684382015e-05, "loss": 0.3103, "step": 9527 }, { "epoch": 0.7211011778061587, "grad_norm": 0.890625, "learning_rate": 1.427649304421625e-05, "loss": 0.2772, "step": 9528 }, { "epoch": 0.721176860129606, "grad_norm": 0.7578125, "learning_rate": 1.4275416343381095e-05, "loss": 0.3243, "step": 9529 }, { "epoch": 0.7212525424530533, "grad_norm": 0.7578125, "learning_rate": 1.4274339581891816e-05, "loss": 0.3016, "step": 9530 }, { "epoch": 0.7213282247765006, "grad_norm": 0.73828125, "learning_rate": 1.4273262759763697e-05, "loss": 0.3304, "step": 9531 }, { "epoch": 0.721403907099948, "grad_norm": 0.7265625, "learning_rate": 1.4272185877012014e-05, "loss": 0.2933, "step": 9532 }, { "epoch": 0.7214795894233953, "grad_norm": 0.81640625, "learning_rate": 1.4271108933652044e-05, "loss": 0.3486, "step": 9533 }, { "epoch": 0.7215552717468426, "grad_norm": 0.75390625, "learning_rate": 1.4270031929699064e-05, "loss": 0.3067, "step": 9534 }, { "epoch": 0.72163095407029, "grad_norm": 1.15625, "learning_rate": 1.4268954865168354e-05, "loss": 0.4152, "step": 9535 }, { "epoch": 0.7217066363937373, "grad_norm": 0.74609375, "learning_rate": 1.4267877740075191e-05, "loss": 0.2947, "step": 9536 }, { "epoch": 0.7217823187171846, "grad_norm": 0.73828125, "learning_rate": 1.4266800554434859e-05, "loss": 0.2696, "step": 9537 }, { "epoch": 0.7218580010406319, "grad_norm": 0.81640625, "learning_rate": 1.4265723308262643e-05, "loss": 0.3252, "step": 9538 }, { "epoch": 0.7219336833640793, "grad_norm": 0.86328125, "learning_rate": 1.4264646001573814e-05, "loss": 0.3768, "step": 9539 }, { "epoch": 0.7220093656875266, "grad_norm": 0.82421875, "learning_rate": 1.4263568634383668e-05, "loss": 0.3176, "step": 9540 }, { "epoch": 0.722085048010974, "grad_norm": 0.7109375, "learning_rate": 1.4262491206707484e-05, "loss": 0.2997, "step": 9541 }, { "epoch": 0.7221607303344213, "grad_norm": 0.84375, "learning_rate": 1.4261413718560549e-05, "loss": 0.3489, "step": 9542 }, { "epoch": 0.7222364126578686, "grad_norm": 3.21875, "learning_rate": 1.4260336169958145e-05, "loss": 0.3674, "step": 9543 }, { "epoch": 0.7223120949813159, "grad_norm": 1.1171875, "learning_rate": 1.4259258560915563e-05, "loss": 0.3884, "step": 9544 }, { "epoch": 0.7223877773047632, "grad_norm": 0.66015625, "learning_rate": 1.4258180891448086e-05, "loss": 0.2616, "step": 9545 }, { "epoch": 0.7224634596282106, "grad_norm": 0.78515625, "learning_rate": 1.4257103161571009e-05, "loss": 0.3032, "step": 9546 }, { "epoch": 0.7225391419516579, "grad_norm": 0.71875, "learning_rate": 1.425602537129962e-05, "loss": 0.2923, "step": 9547 }, { "epoch": 0.7226148242751053, "grad_norm": 0.6796875, "learning_rate": 1.4254947520649203e-05, "loss": 0.2803, "step": 9548 }, { "epoch": 0.7226905065985526, "grad_norm": 0.7734375, "learning_rate": 1.4253869609635056e-05, "loss": 0.3296, "step": 9549 }, { "epoch": 0.722766188922, "grad_norm": 0.734375, "learning_rate": 1.4252791638272469e-05, "loss": 0.3003, "step": 9550 }, { "epoch": 0.7228418712454472, "grad_norm": 0.76171875, "learning_rate": 1.4251713606576731e-05, "loss": 0.3214, "step": 9551 }, { "epoch": 0.7229175535688945, "grad_norm": 0.734375, "learning_rate": 1.4250635514563145e-05, "loss": 0.3083, "step": 9552 }, { "epoch": 0.7229932358923419, "grad_norm": 0.8125, "learning_rate": 1.4249557362246997e-05, "loss": 0.3281, "step": 9553 }, { "epoch": 0.7230689182157892, "grad_norm": 1.234375, "learning_rate": 1.4248479149643583e-05, "loss": 0.395, "step": 9554 }, { "epoch": 0.7231446005392366, "grad_norm": 0.70703125, "learning_rate": 1.4247400876768205e-05, "loss": 0.2673, "step": 9555 }, { "epoch": 0.7232202828626839, "grad_norm": 0.828125, "learning_rate": 1.424632254363616e-05, "loss": 0.36, "step": 9556 }, { "epoch": 0.7232959651861313, "grad_norm": 0.7734375, "learning_rate": 1.4245244150262736e-05, "loss": 0.3034, "step": 9557 }, { "epoch": 0.7233716475095785, "grad_norm": 0.765625, "learning_rate": 1.4244165696663241e-05, "loss": 0.3231, "step": 9558 }, { "epoch": 0.7234473298330258, "grad_norm": 0.71875, "learning_rate": 1.4243087182852972e-05, "loss": 0.2905, "step": 9559 }, { "epoch": 0.7235230121564732, "grad_norm": 1.1328125, "learning_rate": 1.424200860884723e-05, "loss": 0.3246, "step": 9560 }, { "epoch": 0.7235986944799205, "grad_norm": 0.76953125, "learning_rate": 1.424092997466132e-05, "loss": 0.3345, "step": 9561 }, { "epoch": 0.7236743768033679, "grad_norm": 0.68359375, "learning_rate": 1.4239851280310538e-05, "loss": 0.2505, "step": 9562 }, { "epoch": 0.7237500591268152, "grad_norm": 0.90234375, "learning_rate": 1.4238772525810185e-05, "loss": 0.3473, "step": 9563 }, { "epoch": 0.7238257414502626, "grad_norm": 1.078125, "learning_rate": 1.4237693711175574e-05, "loss": 0.3415, "step": 9564 }, { "epoch": 0.7239014237737098, "grad_norm": 0.78515625, "learning_rate": 1.4236614836422006e-05, "loss": 0.3127, "step": 9565 }, { "epoch": 0.7239771060971572, "grad_norm": 0.8125, "learning_rate": 1.4235535901564788e-05, "loss": 0.3788, "step": 9566 }, { "epoch": 0.7240527884206045, "grad_norm": 0.6796875, "learning_rate": 1.423445690661922e-05, "loss": 0.2797, "step": 9567 }, { "epoch": 0.7241284707440518, "grad_norm": 0.73046875, "learning_rate": 1.4233377851600619e-05, "loss": 0.2998, "step": 9568 }, { "epoch": 0.7242041530674992, "grad_norm": 0.78125, "learning_rate": 1.4232298736524284e-05, "loss": 0.3298, "step": 9569 }, { "epoch": 0.7242798353909465, "grad_norm": 0.7734375, "learning_rate": 1.4231219561405533e-05, "loss": 0.3771, "step": 9570 }, { "epoch": 0.7243555177143939, "grad_norm": 0.78515625, "learning_rate": 1.4230140326259672e-05, "loss": 0.3433, "step": 9571 }, { "epoch": 0.7244312000378411, "grad_norm": 0.75390625, "learning_rate": 1.4229061031102005e-05, "loss": 0.3025, "step": 9572 }, { "epoch": 0.7245068823612885, "grad_norm": 0.77734375, "learning_rate": 1.4227981675947855e-05, "loss": 0.3174, "step": 9573 }, { "epoch": 0.7245825646847358, "grad_norm": 0.7890625, "learning_rate": 1.4226902260812527e-05, "loss": 0.3368, "step": 9574 }, { "epoch": 0.7246582470081832, "grad_norm": 0.796875, "learning_rate": 1.4225822785711338e-05, "loss": 0.3418, "step": 9575 }, { "epoch": 0.7247339293316305, "grad_norm": 0.6953125, "learning_rate": 1.4224743250659601e-05, "loss": 0.2896, "step": 9576 }, { "epoch": 0.7248096116550778, "grad_norm": 0.73046875, "learning_rate": 1.422366365567263e-05, "loss": 0.2882, "step": 9577 }, { "epoch": 0.7248852939785252, "grad_norm": 0.7890625, "learning_rate": 1.4222584000765743e-05, "loss": 0.2945, "step": 9578 }, { "epoch": 0.7249609763019724, "grad_norm": 0.76953125, "learning_rate": 1.4221504285954256e-05, "loss": 0.2714, "step": 9579 }, { "epoch": 0.7250366586254198, "grad_norm": 0.7734375, "learning_rate": 1.4220424511253486e-05, "loss": 0.3393, "step": 9580 }, { "epoch": 0.7251123409488671, "grad_norm": 0.734375, "learning_rate": 1.4219344676678753e-05, "loss": 0.2773, "step": 9581 }, { "epoch": 0.7251880232723145, "grad_norm": 0.8515625, "learning_rate": 1.4218264782245373e-05, "loss": 0.3354, "step": 9582 }, { "epoch": 0.7252637055957618, "grad_norm": 0.85546875, "learning_rate": 1.4217184827968668e-05, "loss": 0.3656, "step": 9583 }, { "epoch": 0.7253393879192092, "grad_norm": 0.7421875, "learning_rate": 1.4216104813863962e-05, "loss": 0.297, "step": 9584 }, { "epoch": 0.7254150702426565, "grad_norm": 0.83984375, "learning_rate": 1.4215024739946572e-05, "loss": 0.3563, "step": 9585 }, { "epoch": 0.7254907525661037, "grad_norm": 0.8203125, "learning_rate": 1.4213944606231823e-05, "loss": 0.3193, "step": 9586 }, { "epoch": 0.7255664348895511, "grad_norm": 0.77734375, "learning_rate": 1.4212864412735039e-05, "loss": 0.3247, "step": 9587 }, { "epoch": 0.7256421172129984, "grad_norm": 0.80859375, "learning_rate": 1.4211784159471545e-05, "loss": 0.3388, "step": 9588 }, { "epoch": 0.7257177995364458, "grad_norm": 0.7890625, "learning_rate": 1.4210703846456664e-05, "loss": 0.3451, "step": 9589 }, { "epoch": 0.7257934818598931, "grad_norm": 0.75, "learning_rate": 1.4209623473705722e-05, "loss": 0.3165, "step": 9590 }, { "epoch": 0.7258691641833405, "grad_norm": 0.765625, "learning_rate": 1.4208543041234049e-05, "loss": 0.314, "step": 9591 }, { "epoch": 0.7259448465067878, "grad_norm": 0.765625, "learning_rate": 1.420746254905697e-05, "loss": 0.3119, "step": 9592 }, { "epoch": 0.726020528830235, "grad_norm": 0.72265625, "learning_rate": 1.4206381997189811e-05, "loss": 0.2735, "step": 9593 }, { "epoch": 0.7260962111536824, "grad_norm": 0.796875, "learning_rate": 1.4205301385647911e-05, "loss": 0.315, "step": 9594 }, { "epoch": 0.7261718934771297, "grad_norm": 0.7421875, "learning_rate": 1.420422071444659e-05, "loss": 0.3237, "step": 9595 }, { "epoch": 0.7262475758005771, "grad_norm": 0.6875, "learning_rate": 1.4203139983601184e-05, "loss": 0.3036, "step": 9596 }, { "epoch": 0.7263232581240244, "grad_norm": 1.1015625, "learning_rate": 1.4202059193127027e-05, "loss": 0.3802, "step": 9597 }, { "epoch": 0.7263989404474718, "grad_norm": 0.7578125, "learning_rate": 1.4200978343039448e-05, "loss": 0.329, "step": 9598 }, { "epoch": 0.7264746227709191, "grad_norm": 0.78125, "learning_rate": 1.4199897433353782e-05, "loss": 0.345, "step": 9599 }, { "epoch": 0.7265503050943664, "grad_norm": 0.76171875, "learning_rate": 1.4198816464085363e-05, "loss": 0.3262, "step": 9600 }, { "epoch": 0.7266259874178137, "grad_norm": 0.703125, "learning_rate": 1.4197735435249527e-05, "loss": 0.2925, "step": 9601 }, { "epoch": 0.726701669741261, "grad_norm": 0.79296875, "learning_rate": 1.4196654346861611e-05, "loss": 0.2697, "step": 9602 }, { "epoch": 0.7267773520647084, "grad_norm": 0.8203125, "learning_rate": 1.4195573198936954e-05, "loss": 0.3649, "step": 9603 }, { "epoch": 0.7268530343881557, "grad_norm": 0.765625, "learning_rate": 1.4194491991490886e-05, "loss": 0.3236, "step": 9604 }, { "epoch": 0.7269287167116031, "grad_norm": 0.8046875, "learning_rate": 1.4193410724538753e-05, "loss": 0.359, "step": 9605 }, { "epoch": 0.7270043990350504, "grad_norm": 0.765625, "learning_rate": 1.4192329398095894e-05, "loss": 0.3439, "step": 9606 }, { "epoch": 0.7270800813584977, "grad_norm": 0.7265625, "learning_rate": 1.4191248012177647e-05, "loss": 0.3024, "step": 9607 }, { "epoch": 0.727155763681945, "grad_norm": 0.78125, "learning_rate": 1.4190166566799358e-05, "loss": 0.3209, "step": 9608 }, { "epoch": 0.7272314460053924, "grad_norm": 0.7421875, "learning_rate": 1.4189085061976362e-05, "loss": 0.2992, "step": 9609 }, { "epoch": 0.7273071283288397, "grad_norm": 0.93359375, "learning_rate": 1.4188003497724005e-05, "loss": 0.3837, "step": 9610 }, { "epoch": 0.727382810652287, "grad_norm": 0.77734375, "learning_rate": 1.4186921874057634e-05, "loss": 0.3177, "step": 9611 }, { "epoch": 0.7274584929757344, "grad_norm": 0.734375, "learning_rate": 1.418584019099259e-05, "loss": 0.3061, "step": 9612 }, { "epoch": 0.7275341752991817, "grad_norm": 0.75390625, "learning_rate": 1.418475844854422e-05, "loss": 0.2844, "step": 9613 }, { "epoch": 0.727609857622629, "grad_norm": 0.7265625, "learning_rate": 1.418367664672787e-05, "loss": 0.2845, "step": 9614 }, { "epoch": 0.7276855399460763, "grad_norm": 0.7421875, "learning_rate": 1.4182594785558888e-05, "loss": 0.3185, "step": 9615 }, { "epoch": 0.7277612222695237, "grad_norm": 0.7890625, "learning_rate": 1.4181512865052619e-05, "loss": 0.3234, "step": 9616 }, { "epoch": 0.727836904592971, "grad_norm": 0.796875, "learning_rate": 1.4180430885224419e-05, "loss": 0.3379, "step": 9617 }, { "epoch": 0.7279125869164184, "grad_norm": 0.74609375, "learning_rate": 1.4179348846089628e-05, "loss": 0.2907, "step": 9618 }, { "epoch": 0.7279882692398657, "grad_norm": 0.77734375, "learning_rate": 1.4178266747663602e-05, "loss": 0.35, "step": 9619 }, { "epoch": 0.7280639515633129, "grad_norm": 0.7734375, "learning_rate": 1.4177184589961695e-05, "loss": 0.3286, "step": 9620 }, { "epoch": 0.7281396338867603, "grad_norm": 0.77734375, "learning_rate": 1.4176102372999254e-05, "loss": 0.3177, "step": 9621 }, { "epoch": 0.7282153162102076, "grad_norm": 0.9296875, "learning_rate": 1.4175020096791636e-05, "loss": 0.3947, "step": 9622 }, { "epoch": 0.728290998533655, "grad_norm": 0.734375, "learning_rate": 1.4173937761354194e-05, "loss": 0.3302, "step": 9623 }, { "epoch": 0.7283666808571023, "grad_norm": 0.734375, "learning_rate": 1.417285536670228e-05, "loss": 0.3002, "step": 9624 }, { "epoch": 0.7284423631805497, "grad_norm": 0.80078125, "learning_rate": 1.4171772912851252e-05, "loss": 0.3306, "step": 9625 }, { "epoch": 0.728518045503997, "grad_norm": 0.7578125, "learning_rate": 1.4170690399816469e-05, "loss": 0.2949, "step": 9626 }, { "epoch": 0.7285937278274442, "grad_norm": 0.7890625, "learning_rate": 1.4169607827613284e-05, "loss": 0.3361, "step": 9627 }, { "epoch": 0.7286694101508916, "grad_norm": 0.72265625, "learning_rate": 1.4168525196257054e-05, "loss": 0.3065, "step": 9628 }, { "epoch": 0.7287450924743389, "grad_norm": 0.7734375, "learning_rate": 1.4167442505763143e-05, "loss": 0.3047, "step": 9629 }, { "epoch": 0.7288207747977863, "grad_norm": 0.8203125, "learning_rate": 1.4166359756146908e-05, "loss": 0.3543, "step": 9630 }, { "epoch": 0.7288964571212336, "grad_norm": 0.703125, "learning_rate": 1.4165276947423711e-05, "loss": 0.2683, "step": 9631 }, { "epoch": 0.728972139444681, "grad_norm": 0.81640625, "learning_rate": 1.4164194079608913e-05, "loss": 0.3535, "step": 9632 }, { "epoch": 0.7290478217681283, "grad_norm": 0.73828125, "learning_rate": 1.4163111152717874e-05, "loss": 0.3134, "step": 9633 }, { "epoch": 0.7291235040915756, "grad_norm": 0.7734375, "learning_rate": 1.416202816676596e-05, "loss": 0.3282, "step": 9634 }, { "epoch": 0.7291991864150229, "grad_norm": 0.875, "learning_rate": 1.4160945121768532e-05, "loss": 0.2954, "step": 9635 }, { "epoch": 0.7292748687384702, "grad_norm": 0.8359375, "learning_rate": 1.4159862017740962e-05, "loss": 0.3477, "step": 9636 }, { "epoch": 0.7293505510619176, "grad_norm": 0.8203125, "learning_rate": 1.4158778854698608e-05, "loss": 0.372, "step": 9637 }, { "epoch": 0.7294262333853649, "grad_norm": 0.7734375, "learning_rate": 1.4157695632656838e-05, "loss": 0.3215, "step": 9638 }, { "epoch": 0.7295019157088123, "grad_norm": 0.8125, "learning_rate": 1.4156612351631022e-05, "loss": 0.3248, "step": 9639 }, { "epoch": 0.7295775980322596, "grad_norm": 0.80859375, "learning_rate": 1.4155529011636528e-05, "loss": 0.3202, "step": 9640 }, { "epoch": 0.7296532803557069, "grad_norm": 0.8203125, "learning_rate": 1.4154445612688724e-05, "loss": 0.3262, "step": 9641 }, { "epoch": 0.7297289626791542, "grad_norm": 0.7734375, "learning_rate": 1.4153362154802976e-05, "loss": 0.3258, "step": 9642 }, { "epoch": 0.7298046450026016, "grad_norm": 0.80859375, "learning_rate": 1.415227863799466e-05, "loss": 0.3375, "step": 9643 }, { "epoch": 0.7298803273260489, "grad_norm": 0.7578125, "learning_rate": 1.4151195062279147e-05, "loss": 0.2939, "step": 9644 }, { "epoch": 0.7299560096494963, "grad_norm": 0.8046875, "learning_rate": 1.415011142767181e-05, "loss": 0.3443, "step": 9645 }, { "epoch": 0.7300316919729436, "grad_norm": 0.71484375, "learning_rate": 1.4149027734188016e-05, "loss": 0.305, "step": 9646 }, { "epoch": 0.7301073742963909, "grad_norm": 0.76953125, "learning_rate": 1.4147943981843147e-05, "loss": 0.341, "step": 9647 }, { "epoch": 0.7301830566198382, "grad_norm": 0.73828125, "learning_rate": 1.4146860170652572e-05, "loss": 0.3048, "step": 9648 }, { "epoch": 0.7302587389432855, "grad_norm": 0.75390625, "learning_rate": 1.414577630063167e-05, "loss": 0.3316, "step": 9649 }, { "epoch": 0.7303344212667329, "grad_norm": 0.703125, "learning_rate": 1.414469237179582e-05, "loss": 0.2508, "step": 9650 }, { "epoch": 0.7304101035901802, "grad_norm": 0.80078125, "learning_rate": 1.4143608384160392e-05, "loss": 0.3729, "step": 9651 }, { "epoch": 0.7304857859136276, "grad_norm": 0.7421875, "learning_rate": 1.414252433774077e-05, "loss": 0.3207, "step": 9652 }, { "epoch": 0.7305614682370749, "grad_norm": 0.7734375, "learning_rate": 1.4141440232552332e-05, "loss": 0.3163, "step": 9653 }, { "epoch": 0.7306371505605223, "grad_norm": 0.75, "learning_rate": 1.4140356068610459e-05, "loss": 0.3002, "step": 9654 }, { "epoch": 0.7307128328839695, "grad_norm": 0.73828125, "learning_rate": 1.4139271845930526e-05, "loss": 0.3208, "step": 9655 }, { "epoch": 0.7307885152074168, "grad_norm": 0.8046875, "learning_rate": 1.4138187564527922e-05, "loss": 0.3603, "step": 9656 }, { "epoch": 0.7308641975308642, "grad_norm": 0.78125, "learning_rate": 1.4137103224418022e-05, "loss": 0.3282, "step": 9657 }, { "epoch": 0.7309398798543115, "grad_norm": 0.69921875, "learning_rate": 1.4136018825616218e-05, "loss": 0.2819, "step": 9658 }, { "epoch": 0.7310155621777589, "grad_norm": 0.74609375, "learning_rate": 1.4134934368137889e-05, "loss": 0.2855, "step": 9659 }, { "epoch": 0.7310912445012062, "grad_norm": 0.75, "learning_rate": 1.4133849851998418e-05, "loss": 0.284, "step": 9660 }, { "epoch": 0.7311669268246536, "grad_norm": 0.6484375, "learning_rate": 1.4132765277213195e-05, "loss": 0.2371, "step": 9661 }, { "epoch": 0.7312426091481008, "grad_norm": 0.80078125, "learning_rate": 1.4131680643797601e-05, "loss": 0.3349, "step": 9662 }, { "epoch": 0.7313182914715481, "grad_norm": 0.8046875, "learning_rate": 1.4130595951767027e-05, "loss": 0.314, "step": 9663 }, { "epoch": 0.7313939737949955, "grad_norm": 0.765625, "learning_rate": 1.4129511201136865e-05, "loss": 0.333, "step": 9664 }, { "epoch": 0.7314696561184428, "grad_norm": 0.75390625, "learning_rate": 1.4128426391922498e-05, "loss": 0.3027, "step": 9665 }, { "epoch": 0.7315453384418902, "grad_norm": 0.7890625, "learning_rate": 1.4127341524139317e-05, "loss": 0.3693, "step": 9666 }, { "epoch": 0.7316210207653375, "grad_norm": 0.76953125, "learning_rate": 1.4126256597802715e-05, "loss": 0.3283, "step": 9667 }, { "epoch": 0.7316967030887849, "grad_norm": 0.7890625, "learning_rate": 1.4125171612928083e-05, "loss": 0.3454, "step": 9668 }, { "epoch": 0.7317723854122321, "grad_norm": 0.77734375, "learning_rate": 1.4124086569530811e-05, "loss": 0.3264, "step": 9669 }, { "epoch": 0.7318480677356795, "grad_norm": 0.80859375, "learning_rate": 1.4123001467626293e-05, "loss": 0.3894, "step": 9670 }, { "epoch": 0.7319237500591268, "grad_norm": 0.71875, "learning_rate": 1.4121916307229926e-05, "loss": 0.2895, "step": 9671 }, { "epoch": 0.7319994323825741, "grad_norm": 0.80859375, "learning_rate": 1.41208310883571e-05, "loss": 0.3731, "step": 9672 }, { "epoch": 0.7320751147060215, "grad_norm": 0.734375, "learning_rate": 1.4119745811023218e-05, "loss": 0.3125, "step": 9673 }, { "epoch": 0.7321507970294688, "grad_norm": 0.76171875, "learning_rate": 1.4118660475243671e-05, "loss": 0.35, "step": 9674 }, { "epoch": 0.7322264793529162, "grad_norm": 0.72265625, "learning_rate": 1.4117575081033855e-05, "loss": 0.2988, "step": 9675 }, { "epoch": 0.7323021616763634, "grad_norm": 0.79296875, "learning_rate": 1.4116489628409169e-05, "loss": 0.3117, "step": 9676 }, { "epoch": 0.7323778439998108, "grad_norm": 0.7734375, "learning_rate": 1.4115404117385017e-05, "loss": 0.3038, "step": 9677 }, { "epoch": 0.7324535263232581, "grad_norm": 0.78515625, "learning_rate": 1.4114318547976793e-05, "loss": 0.3125, "step": 9678 }, { "epoch": 0.7325292086467055, "grad_norm": 0.75390625, "learning_rate": 1.4113232920199902e-05, "loss": 0.31, "step": 9679 }, { "epoch": 0.7326048909701528, "grad_norm": 0.7890625, "learning_rate": 1.4112147234069742e-05, "loss": 0.3521, "step": 9680 }, { "epoch": 0.7326805732936001, "grad_norm": 0.7421875, "learning_rate": 1.4111061489601716e-05, "loss": 0.3134, "step": 9681 }, { "epoch": 0.7327562556170475, "grad_norm": 0.765625, "learning_rate": 1.4109975686811231e-05, "loss": 0.3299, "step": 9682 }, { "epoch": 0.7328319379404947, "grad_norm": 0.78125, "learning_rate": 1.4108889825713688e-05, "loss": 0.3335, "step": 9683 }, { "epoch": 0.7329076202639421, "grad_norm": 0.9609375, "learning_rate": 1.4107803906324487e-05, "loss": 0.3272, "step": 9684 }, { "epoch": 0.7329833025873894, "grad_norm": 0.75390625, "learning_rate": 1.4106717928659039e-05, "loss": 0.3072, "step": 9685 }, { "epoch": 0.7330589849108368, "grad_norm": 0.734375, "learning_rate": 1.4105631892732754e-05, "loss": 0.2777, "step": 9686 }, { "epoch": 0.7331346672342841, "grad_norm": 0.8203125, "learning_rate": 1.4104545798561033e-05, "loss": 0.3576, "step": 9687 }, { "epoch": 0.7332103495577315, "grad_norm": 1.09375, "learning_rate": 1.4103459646159285e-05, "loss": 0.4036, "step": 9688 }, { "epoch": 0.7332860318811788, "grad_norm": 0.8046875, "learning_rate": 1.4102373435542922e-05, "loss": 0.3718, "step": 9689 }, { "epoch": 0.733361714204626, "grad_norm": 0.6953125, "learning_rate": 1.410128716672735e-05, "loss": 0.2736, "step": 9690 }, { "epoch": 0.7334373965280734, "grad_norm": 0.76953125, "learning_rate": 1.4100200839727984e-05, "loss": 0.3348, "step": 9691 }, { "epoch": 0.7335130788515207, "grad_norm": 0.79296875, "learning_rate": 1.4099114454560231e-05, "loss": 0.3427, "step": 9692 }, { "epoch": 0.7335887611749681, "grad_norm": 1.0703125, "learning_rate": 1.4098028011239507e-05, "loss": 0.3827, "step": 9693 }, { "epoch": 0.7336644434984154, "grad_norm": 0.75390625, "learning_rate": 1.409694150978122e-05, "loss": 0.3041, "step": 9694 }, { "epoch": 0.7337401258218628, "grad_norm": 0.75, "learning_rate": 1.4095854950200792e-05, "loss": 0.3056, "step": 9695 }, { "epoch": 0.7338158081453101, "grad_norm": 0.71875, "learning_rate": 1.4094768332513629e-05, "loss": 0.301, "step": 9696 }, { "epoch": 0.7338914904687573, "grad_norm": 0.7421875, "learning_rate": 1.4093681656735152e-05, "loss": 0.2975, "step": 9697 }, { "epoch": 0.7339671727922047, "grad_norm": 0.74609375, "learning_rate": 1.4092594922880773e-05, "loss": 0.3165, "step": 9698 }, { "epoch": 0.734042855115652, "grad_norm": 0.72265625, "learning_rate": 1.4091508130965914e-05, "loss": 0.291, "step": 9699 }, { "epoch": 0.7341185374390994, "grad_norm": 0.7265625, "learning_rate": 1.409042128100599e-05, "loss": 0.3236, "step": 9700 }, { "epoch": 0.7341942197625467, "grad_norm": 0.73046875, "learning_rate": 1.4089334373016424e-05, "loss": 0.2931, "step": 9701 }, { "epoch": 0.7342699020859941, "grad_norm": 0.83203125, "learning_rate": 1.408824740701263e-05, "loss": 0.3637, "step": 9702 }, { "epoch": 0.7343455844094414, "grad_norm": 0.78515625, "learning_rate": 1.408716038301003e-05, "loss": 0.3117, "step": 9703 }, { "epoch": 0.7344212667328887, "grad_norm": 0.796875, "learning_rate": 1.4086073301024046e-05, "loss": 0.3373, "step": 9704 }, { "epoch": 0.734496949056336, "grad_norm": 0.765625, "learning_rate": 1.40849861610701e-05, "loss": 0.3167, "step": 9705 }, { "epoch": 0.7345726313797833, "grad_norm": 0.76953125, "learning_rate": 1.4083898963163618e-05, "loss": 0.3109, "step": 9706 }, { "epoch": 0.7346483137032307, "grad_norm": 0.8203125, "learning_rate": 1.408281170732002e-05, "loss": 0.3375, "step": 9707 }, { "epoch": 0.734723996026678, "grad_norm": 0.80859375, "learning_rate": 1.408172439355473e-05, "loss": 0.3486, "step": 9708 }, { "epoch": 0.7347996783501254, "grad_norm": 0.78515625, "learning_rate": 1.4080637021883177e-05, "loss": 0.3488, "step": 9709 }, { "epoch": 0.7348753606735727, "grad_norm": 0.82421875, "learning_rate": 1.4079549592320782e-05, "loss": 0.3671, "step": 9710 }, { "epoch": 0.73495104299702, "grad_norm": 0.74609375, "learning_rate": 1.4078462104882979e-05, "loss": 0.3044, "step": 9711 }, { "epoch": 0.7350267253204673, "grad_norm": 0.7109375, "learning_rate": 1.407737455958519e-05, "loss": 0.2772, "step": 9712 }, { "epoch": 0.7351024076439147, "grad_norm": 0.78515625, "learning_rate": 1.4076286956442846e-05, "loss": 0.3502, "step": 9713 }, { "epoch": 0.735178089967362, "grad_norm": 0.73828125, "learning_rate": 1.4075199295471378e-05, "loss": 0.2847, "step": 9714 }, { "epoch": 0.7352537722908093, "grad_norm": 0.72265625, "learning_rate": 1.4074111576686215e-05, "loss": 0.3004, "step": 9715 }, { "epoch": 0.7353294546142567, "grad_norm": 0.76953125, "learning_rate": 1.4073023800102785e-05, "loss": 0.3285, "step": 9716 }, { "epoch": 0.735405136937704, "grad_norm": 0.7421875, "learning_rate": 1.4071935965736523e-05, "loss": 0.298, "step": 9717 }, { "epoch": 0.7354808192611513, "grad_norm": 0.78125, "learning_rate": 1.4070848073602862e-05, "loss": 0.335, "step": 9718 }, { "epoch": 0.7355565015845986, "grad_norm": 0.73046875, "learning_rate": 1.4069760123717235e-05, "loss": 0.293, "step": 9719 }, { "epoch": 0.735632183908046, "grad_norm": 0.7109375, "learning_rate": 1.4068672116095081e-05, "loss": 0.2923, "step": 9720 }, { "epoch": 0.7357078662314933, "grad_norm": 0.671875, "learning_rate": 1.4067584050751827e-05, "loss": 0.2821, "step": 9721 }, { "epoch": 0.7357835485549407, "grad_norm": 0.78515625, "learning_rate": 1.4066495927702915e-05, "loss": 0.3492, "step": 9722 }, { "epoch": 0.735859230878388, "grad_norm": 0.80078125, "learning_rate": 1.4065407746963777e-05, "loss": 0.3478, "step": 9723 }, { "epoch": 0.7359349132018354, "grad_norm": 0.734375, "learning_rate": 1.4064319508549858e-05, "loss": 0.2865, "step": 9724 }, { "epoch": 0.7360105955252826, "grad_norm": 0.76171875, "learning_rate": 1.4063231212476586e-05, "loss": 0.3181, "step": 9725 }, { "epoch": 0.7360862778487299, "grad_norm": 0.7734375, "learning_rate": 1.406214285875941e-05, "loss": 0.3289, "step": 9726 }, { "epoch": 0.7361619601721773, "grad_norm": 0.76171875, "learning_rate": 1.406105444741377e-05, "loss": 0.3098, "step": 9727 }, { "epoch": 0.7362376424956246, "grad_norm": 0.71484375, "learning_rate": 1.4059965978455097e-05, "loss": 0.2859, "step": 9728 }, { "epoch": 0.736313324819072, "grad_norm": 0.78515625, "learning_rate": 1.4058877451898844e-05, "loss": 0.3282, "step": 9729 }, { "epoch": 0.7363890071425193, "grad_norm": 0.765625, "learning_rate": 1.4057788867760448e-05, "loss": 0.3208, "step": 9730 }, { "epoch": 0.7364646894659667, "grad_norm": 0.73828125, "learning_rate": 1.4056700226055354e-05, "loss": 0.3116, "step": 9731 }, { "epoch": 0.7365403717894139, "grad_norm": 0.7578125, "learning_rate": 1.4055611526799004e-05, "loss": 0.311, "step": 9732 }, { "epoch": 0.7366160541128612, "grad_norm": 0.796875, "learning_rate": 1.4054522770006847e-05, "loss": 0.337, "step": 9733 }, { "epoch": 0.7366917364363086, "grad_norm": 0.71484375, "learning_rate": 1.4053433955694327e-05, "loss": 0.2925, "step": 9734 }, { "epoch": 0.7367674187597559, "grad_norm": 0.76171875, "learning_rate": 1.4052345083876888e-05, "loss": 0.3288, "step": 9735 }, { "epoch": 0.7368431010832033, "grad_norm": 0.73828125, "learning_rate": 1.4051256154569982e-05, "loss": 0.2964, "step": 9736 }, { "epoch": 0.7369187834066506, "grad_norm": 0.78125, "learning_rate": 1.4050167167789052e-05, "loss": 0.2989, "step": 9737 }, { "epoch": 0.7369944657300979, "grad_norm": 0.78125, "learning_rate": 1.4049078123549556e-05, "loss": 0.3248, "step": 9738 }, { "epoch": 0.7370701480535452, "grad_norm": 0.7265625, "learning_rate": 1.4047989021866935e-05, "loss": 0.2807, "step": 9739 }, { "epoch": 0.7371458303769926, "grad_norm": 0.78125, "learning_rate": 1.4046899862756642e-05, "loss": 0.3304, "step": 9740 }, { "epoch": 0.7372215127004399, "grad_norm": 0.80859375, "learning_rate": 1.404581064623413e-05, "loss": 0.3625, "step": 9741 }, { "epoch": 0.7372971950238872, "grad_norm": 0.74609375, "learning_rate": 1.4044721372314854e-05, "loss": 0.3147, "step": 9742 }, { "epoch": 0.7373728773473346, "grad_norm": 0.7109375, "learning_rate": 1.4043632041014262e-05, "loss": 0.3075, "step": 9743 }, { "epoch": 0.7374485596707819, "grad_norm": 0.72265625, "learning_rate": 1.4042542652347811e-05, "loss": 0.2756, "step": 9744 }, { "epoch": 0.7375242419942292, "grad_norm": 0.7890625, "learning_rate": 1.4041453206330955e-05, "loss": 0.3143, "step": 9745 }, { "epoch": 0.7375999243176765, "grad_norm": 0.90625, "learning_rate": 1.4040363702979147e-05, "loss": 0.3382, "step": 9746 }, { "epoch": 0.7376756066411239, "grad_norm": 0.79296875, "learning_rate": 1.403927414230785e-05, "loss": 0.3339, "step": 9747 }, { "epoch": 0.7377512889645712, "grad_norm": 0.73046875, "learning_rate": 1.4038184524332517e-05, "loss": 0.3025, "step": 9748 }, { "epoch": 0.7378269712880186, "grad_norm": 0.72265625, "learning_rate": 1.4037094849068606e-05, "loss": 0.3218, "step": 9749 }, { "epoch": 0.7379026536114659, "grad_norm": 0.78125, "learning_rate": 1.4036005116531579e-05, "loss": 0.3356, "step": 9750 }, { "epoch": 0.7379783359349132, "grad_norm": 0.765625, "learning_rate": 1.403491532673689e-05, "loss": 0.3228, "step": 9751 }, { "epoch": 0.7380540182583605, "grad_norm": 0.703125, "learning_rate": 1.4033825479700008e-05, "loss": 0.249, "step": 9752 }, { "epoch": 0.7381297005818078, "grad_norm": 0.75390625, "learning_rate": 1.4032735575436384e-05, "loss": 0.256, "step": 9753 }, { "epoch": 0.7382053829052552, "grad_norm": 0.765625, "learning_rate": 1.4031645613961487e-05, "loss": 0.3154, "step": 9754 }, { "epoch": 0.7382810652287025, "grad_norm": 0.82421875, "learning_rate": 1.403055559529078e-05, "loss": 0.3764, "step": 9755 }, { "epoch": 0.7383567475521499, "grad_norm": 0.73828125, "learning_rate": 1.4029465519439724e-05, "loss": 0.2895, "step": 9756 }, { "epoch": 0.7384324298755972, "grad_norm": 0.73046875, "learning_rate": 1.4028375386423785e-05, "loss": 0.2734, "step": 9757 }, { "epoch": 0.7385081121990446, "grad_norm": 0.7265625, "learning_rate": 1.4027285196258426e-05, "loss": 0.2793, "step": 9758 }, { "epoch": 0.7385837945224918, "grad_norm": 0.77734375, "learning_rate": 1.402619494895912e-05, "loss": 0.3283, "step": 9759 }, { "epoch": 0.7386594768459391, "grad_norm": 0.8046875, "learning_rate": 1.4025104644541325e-05, "loss": 0.3407, "step": 9760 }, { "epoch": 0.7387351591693865, "grad_norm": 0.73046875, "learning_rate": 1.4024014283020514e-05, "loss": 0.3057, "step": 9761 }, { "epoch": 0.7388108414928338, "grad_norm": 0.7890625, "learning_rate": 1.4022923864412161e-05, "loss": 0.3153, "step": 9762 }, { "epoch": 0.7388865238162812, "grad_norm": 0.7265625, "learning_rate": 1.4021833388731723e-05, "loss": 0.2902, "step": 9763 }, { "epoch": 0.7389622061397285, "grad_norm": 0.79296875, "learning_rate": 1.4020742855994677e-05, "loss": 0.3313, "step": 9764 }, { "epoch": 0.7390378884631759, "grad_norm": 0.72265625, "learning_rate": 1.4019652266216497e-05, "loss": 0.3058, "step": 9765 }, { "epoch": 0.7391135707866231, "grad_norm": 0.8125, "learning_rate": 1.401856161941265e-05, "loss": 0.3272, "step": 9766 }, { "epoch": 0.7391892531100704, "grad_norm": 0.71875, "learning_rate": 1.401747091559861e-05, "loss": 0.2673, "step": 9767 }, { "epoch": 0.7392649354335178, "grad_norm": 0.73046875, "learning_rate": 1.401638015478985e-05, "loss": 0.3094, "step": 9768 }, { "epoch": 0.7393406177569651, "grad_norm": 0.7421875, "learning_rate": 1.4015289337001846e-05, "loss": 0.3153, "step": 9769 }, { "epoch": 0.7394163000804125, "grad_norm": 0.765625, "learning_rate": 1.4014198462250072e-05, "loss": 0.337, "step": 9770 }, { "epoch": 0.7394919824038598, "grad_norm": 0.765625, "learning_rate": 1.4013107530550007e-05, "loss": 0.318, "step": 9771 }, { "epoch": 0.7395676647273072, "grad_norm": 0.8359375, "learning_rate": 1.401201654191712e-05, "loss": 0.3643, "step": 9772 }, { "epoch": 0.7396433470507544, "grad_norm": 0.7421875, "learning_rate": 1.4010925496366896e-05, "loss": 0.2939, "step": 9773 }, { "epoch": 0.7397190293742018, "grad_norm": 0.78515625, "learning_rate": 1.4009834393914812e-05, "loss": 0.3471, "step": 9774 }, { "epoch": 0.7397947116976491, "grad_norm": 0.7578125, "learning_rate": 1.4008743234576343e-05, "loss": 0.3396, "step": 9775 }, { "epoch": 0.7398703940210964, "grad_norm": 0.7421875, "learning_rate": 1.4007652018366974e-05, "loss": 0.2875, "step": 9776 }, { "epoch": 0.7399460763445438, "grad_norm": 0.7578125, "learning_rate": 1.4006560745302188e-05, "loss": 0.3195, "step": 9777 }, { "epoch": 0.7400217586679911, "grad_norm": 0.7890625, "learning_rate": 1.4005469415397453e-05, "loss": 0.3294, "step": 9778 }, { "epoch": 0.7400974409914385, "grad_norm": 1.0859375, "learning_rate": 1.4004378028668266e-05, "loss": 0.3805, "step": 9779 }, { "epoch": 0.7401731233148857, "grad_norm": 0.75, "learning_rate": 1.4003286585130106e-05, "loss": 0.3139, "step": 9780 }, { "epoch": 0.7402488056383331, "grad_norm": 0.78125, "learning_rate": 1.4002195084798453e-05, "loss": 0.3448, "step": 9781 }, { "epoch": 0.7403244879617804, "grad_norm": 0.77734375, "learning_rate": 1.4001103527688797e-05, "loss": 0.3219, "step": 9782 }, { "epoch": 0.7404001702852278, "grad_norm": 0.703125, "learning_rate": 1.4000011913816623e-05, "loss": 0.2817, "step": 9783 }, { "epoch": 0.7404758526086751, "grad_norm": 0.85546875, "learning_rate": 1.3998920243197408e-05, "loss": 0.3565, "step": 9784 }, { "epoch": 0.7405515349321224, "grad_norm": 0.796875, "learning_rate": 1.3997828515846656e-05, "loss": 0.3408, "step": 9785 }, { "epoch": 0.7406272172555698, "grad_norm": 0.76953125, "learning_rate": 1.3996736731779843e-05, "loss": 0.317, "step": 9786 }, { "epoch": 0.740702899579017, "grad_norm": 0.9140625, "learning_rate": 1.3995644891012459e-05, "loss": 0.3498, "step": 9787 }, { "epoch": 0.7407785819024644, "grad_norm": 0.74609375, "learning_rate": 1.3994552993559996e-05, "loss": 0.3053, "step": 9788 }, { "epoch": 0.7408542642259117, "grad_norm": 1.015625, "learning_rate": 1.3993461039437946e-05, "loss": 0.3651, "step": 9789 }, { "epoch": 0.7409299465493591, "grad_norm": 0.76171875, "learning_rate": 1.3992369028661796e-05, "loss": 0.33, "step": 9790 }, { "epoch": 0.7410056288728064, "grad_norm": 0.7890625, "learning_rate": 1.399127696124704e-05, "loss": 0.3688, "step": 9791 }, { "epoch": 0.7410813111962538, "grad_norm": 0.7421875, "learning_rate": 1.3990184837209173e-05, "loss": 0.2895, "step": 9792 }, { "epoch": 0.7411569935197011, "grad_norm": 0.734375, "learning_rate": 1.3989092656563684e-05, "loss": 0.2794, "step": 9793 }, { "epoch": 0.7412326758431483, "grad_norm": 0.75, "learning_rate": 1.3988000419326073e-05, "loss": 0.2881, "step": 9794 }, { "epoch": 0.7413083581665957, "grad_norm": 0.82421875, "learning_rate": 1.398690812551183e-05, "loss": 0.3453, "step": 9795 }, { "epoch": 0.741384040490043, "grad_norm": 0.75, "learning_rate": 1.3985815775136455e-05, "loss": 0.2979, "step": 9796 }, { "epoch": 0.7414597228134904, "grad_norm": 0.7890625, "learning_rate": 1.3984723368215442e-05, "loss": 0.349, "step": 9797 }, { "epoch": 0.7415354051369377, "grad_norm": 0.73828125, "learning_rate": 1.3983630904764292e-05, "loss": 0.2823, "step": 9798 }, { "epoch": 0.7416110874603851, "grad_norm": 0.81640625, "learning_rate": 1.39825383847985e-05, "loss": 0.3643, "step": 9799 }, { "epoch": 0.7416867697838324, "grad_norm": 0.69921875, "learning_rate": 1.3981445808333566e-05, "loss": 0.282, "step": 9800 }, { "epoch": 0.7417624521072796, "grad_norm": 0.765625, "learning_rate": 1.3980353175384993e-05, "loss": 0.3153, "step": 9801 }, { "epoch": 0.741838134430727, "grad_norm": 0.7734375, "learning_rate": 1.3979260485968278e-05, "loss": 0.3148, "step": 9802 }, { "epoch": 0.7419138167541743, "grad_norm": 0.703125, "learning_rate": 1.3978167740098925e-05, "loss": 0.2798, "step": 9803 }, { "epoch": 0.7419894990776217, "grad_norm": 0.6953125, "learning_rate": 1.3977074937792434e-05, "loss": 0.3028, "step": 9804 }, { "epoch": 0.742065181401069, "grad_norm": 0.80078125, "learning_rate": 1.3975982079064314e-05, "loss": 0.3498, "step": 9805 }, { "epoch": 0.7421408637245164, "grad_norm": 0.80859375, "learning_rate": 1.3974889163930063e-05, "loss": 0.3603, "step": 9806 }, { "epoch": 0.7422165460479637, "grad_norm": 0.8203125, "learning_rate": 1.397379619240519e-05, "loss": 0.3621, "step": 9807 }, { "epoch": 0.742292228371411, "grad_norm": 0.6875, "learning_rate": 1.3972703164505195e-05, "loss": 0.2772, "step": 9808 }, { "epoch": 0.7423679106948583, "grad_norm": 0.671875, "learning_rate": 1.397161008024559e-05, "loss": 0.2724, "step": 9809 }, { "epoch": 0.7424435930183056, "grad_norm": 0.80859375, "learning_rate": 1.3970516939641878e-05, "loss": 0.3823, "step": 9810 }, { "epoch": 0.742519275341753, "grad_norm": 0.71875, "learning_rate": 1.3969423742709574e-05, "loss": 0.3004, "step": 9811 }, { "epoch": 0.7425949576652003, "grad_norm": 0.80859375, "learning_rate": 1.396833048946418e-05, "loss": 0.3614, "step": 9812 }, { "epoch": 0.7426706399886477, "grad_norm": 0.7890625, "learning_rate": 1.396723717992121e-05, "loss": 0.3768, "step": 9813 }, { "epoch": 0.742746322312095, "grad_norm": 0.76953125, "learning_rate": 1.3966143814096172e-05, "loss": 0.3239, "step": 9814 }, { "epoch": 0.7428220046355423, "grad_norm": 0.9140625, "learning_rate": 1.3965050392004576e-05, "loss": 0.2983, "step": 9815 }, { "epoch": 0.7428976869589896, "grad_norm": 0.765625, "learning_rate": 1.3963956913661936e-05, "loss": 0.3226, "step": 9816 }, { "epoch": 0.742973369282437, "grad_norm": 0.7578125, "learning_rate": 1.3962863379083766e-05, "loss": 0.338, "step": 9817 }, { "epoch": 0.7430490516058843, "grad_norm": 0.81640625, "learning_rate": 1.3961769788285579e-05, "loss": 0.3616, "step": 9818 }, { "epoch": 0.7431247339293316, "grad_norm": 0.80859375, "learning_rate": 1.3960676141282888e-05, "loss": 0.3475, "step": 9819 }, { "epoch": 0.743200416252779, "grad_norm": 0.8125, "learning_rate": 1.395958243809121e-05, "loss": 0.3262, "step": 9820 }, { "epoch": 0.7432760985762263, "grad_norm": 0.7421875, "learning_rate": 1.3958488678726058e-05, "loss": 0.3052, "step": 9821 }, { "epoch": 0.7433517808996736, "grad_norm": 0.75, "learning_rate": 1.3957394863202955e-05, "loss": 0.2887, "step": 9822 }, { "epoch": 0.7434274632231209, "grad_norm": 0.8046875, "learning_rate": 1.3956300991537412e-05, "loss": 0.3227, "step": 9823 }, { "epoch": 0.7435031455465683, "grad_norm": 0.73828125, "learning_rate": 1.395520706374495e-05, "loss": 0.3209, "step": 9824 }, { "epoch": 0.7435788278700156, "grad_norm": 0.75390625, "learning_rate": 1.395411307984109e-05, "loss": 0.3193, "step": 9825 }, { "epoch": 0.743654510193463, "grad_norm": 0.7578125, "learning_rate": 1.3953019039841349e-05, "loss": 0.2829, "step": 9826 }, { "epoch": 0.7437301925169103, "grad_norm": 0.74609375, "learning_rate": 1.3951924943761252e-05, "loss": 0.3052, "step": 9827 }, { "epoch": 0.7438058748403577, "grad_norm": 0.77734375, "learning_rate": 1.3950830791616314e-05, "loss": 0.3477, "step": 9828 }, { "epoch": 0.7438815571638049, "grad_norm": 0.7265625, "learning_rate": 1.3949736583422064e-05, "loss": 0.2879, "step": 9829 }, { "epoch": 0.7439572394872522, "grad_norm": 0.78515625, "learning_rate": 1.3948642319194021e-05, "loss": 0.3423, "step": 9830 }, { "epoch": 0.7440329218106996, "grad_norm": 0.78125, "learning_rate": 1.394754799894771e-05, "loss": 0.3111, "step": 9831 }, { "epoch": 0.7441086041341469, "grad_norm": 0.83203125, "learning_rate": 1.394645362269866e-05, "loss": 0.3482, "step": 9832 }, { "epoch": 0.7441842864575943, "grad_norm": 0.7734375, "learning_rate": 1.394535919046239e-05, "loss": 0.2913, "step": 9833 }, { "epoch": 0.7442599687810416, "grad_norm": 0.76171875, "learning_rate": 1.3944264702254431e-05, "loss": 0.3364, "step": 9834 }, { "epoch": 0.744335651104489, "grad_norm": 0.72265625, "learning_rate": 1.3943170158090307e-05, "loss": 0.3073, "step": 9835 }, { "epoch": 0.7444113334279362, "grad_norm": 0.76953125, "learning_rate": 1.394207555798555e-05, "loss": 0.326, "step": 9836 }, { "epoch": 0.7444870157513835, "grad_norm": 0.72265625, "learning_rate": 1.3940980901955684e-05, "loss": 0.3096, "step": 9837 }, { "epoch": 0.7445626980748309, "grad_norm": 0.7734375, "learning_rate": 1.3939886190016241e-05, "loss": 0.3218, "step": 9838 }, { "epoch": 0.7446383803982782, "grad_norm": 0.7421875, "learning_rate": 1.3938791422182754e-05, "loss": 0.2889, "step": 9839 }, { "epoch": 0.7447140627217256, "grad_norm": 1.09375, "learning_rate": 1.3937696598470747e-05, "loss": 0.3807, "step": 9840 }, { "epoch": 0.7447897450451729, "grad_norm": 0.80078125, "learning_rate": 1.3936601718895762e-05, "loss": 0.3318, "step": 9841 }, { "epoch": 0.7448654273686203, "grad_norm": 1.0234375, "learning_rate": 1.3935506783473323e-05, "loss": 0.3683, "step": 9842 }, { "epoch": 0.7449411096920675, "grad_norm": 0.859375, "learning_rate": 1.3934411792218965e-05, "loss": 0.3248, "step": 9843 }, { "epoch": 0.7450167920155149, "grad_norm": 0.74609375, "learning_rate": 1.3933316745148226e-05, "loss": 0.292, "step": 9844 }, { "epoch": 0.7450924743389622, "grad_norm": 0.7421875, "learning_rate": 1.393222164227664e-05, "loss": 0.2967, "step": 9845 }, { "epoch": 0.7451681566624095, "grad_norm": 0.75390625, "learning_rate": 1.3931126483619742e-05, "loss": 0.3067, "step": 9846 }, { "epoch": 0.7452438389858569, "grad_norm": 0.85546875, "learning_rate": 1.3930031269193068e-05, "loss": 0.3549, "step": 9847 }, { "epoch": 0.7453195213093042, "grad_norm": 0.70703125, "learning_rate": 1.3928935999012156e-05, "loss": 0.2883, "step": 9848 }, { "epoch": 0.7453952036327516, "grad_norm": 0.7265625, "learning_rate": 1.3927840673092543e-05, "loss": 0.2957, "step": 9849 }, { "epoch": 0.7454708859561988, "grad_norm": 0.8359375, "learning_rate": 1.3926745291449773e-05, "loss": 0.3718, "step": 9850 }, { "epoch": 0.7455465682796462, "grad_norm": 0.765625, "learning_rate": 1.3925649854099379e-05, "loss": 0.3438, "step": 9851 }, { "epoch": 0.7456222506030935, "grad_norm": 0.71875, "learning_rate": 1.3924554361056907e-05, "loss": 0.3084, "step": 9852 }, { "epoch": 0.7456979329265409, "grad_norm": 0.7421875, "learning_rate": 1.3923458812337898e-05, "loss": 0.2976, "step": 9853 }, { "epoch": 0.7457736152499882, "grad_norm": 0.7578125, "learning_rate": 1.3922363207957893e-05, "loss": 0.3352, "step": 9854 }, { "epoch": 0.7458492975734355, "grad_norm": 0.78515625, "learning_rate": 1.3921267547932433e-05, "loss": 0.3381, "step": 9855 }, { "epoch": 0.7459249798968829, "grad_norm": 0.765625, "learning_rate": 1.3920171832277066e-05, "loss": 0.3282, "step": 9856 }, { "epoch": 0.7460006622203301, "grad_norm": 0.80078125, "learning_rate": 1.391907606100733e-05, "loss": 0.3167, "step": 9857 }, { "epoch": 0.7460763445437775, "grad_norm": 0.8125, "learning_rate": 1.3917980234138777e-05, "loss": 0.3965, "step": 9858 }, { "epoch": 0.7461520268672248, "grad_norm": 0.76171875, "learning_rate": 1.391688435168695e-05, "loss": 0.323, "step": 9859 }, { "epoch": 0.7462277091906722, "grad_norm": 0.7421875, "learning_rate": 1.3915788413667398e-05, "loss": 0.3098, "step": 9860 }, { "epoch": 0.7463033915141195, "grad_norm": 0.7734375, "learning_rate": 1.391469242009567e-05, "loss": 0.3199, "step": 9861 }, { "epoch": 0.7463790738375669, "grad_norm": 1.4296875, "learning_rate": 1.3913596370987308e-05, "loss": 0.4255, "step": 9862 }, { "epoch": 0.7464547561610141, "grad_norm": 0.73046875, "learning_rate": 1.3912500266357868e-05, "loss": 0.2937, "step": 9863 }, { "epoch": 0.7465304384844614, "grad_norm": 0.78515625, "learning_rate": 1.3911404106222896e-05, "loss": 0.3185, "step": 9864 }, { "epoch": 0.7466061208079088, "grad_norm": 0.82421875, "learning_rate": 1.3910307890597948e-05, "loss": 0.3422, "step": 9865 }, { "epoch": 0.7466818031313561, "grad_norm": 0.73828125, "learning_rate": 1.3909211619498568e-05, "loss": 0.3209, "step": 9866 }, { "epoch": 0.7467574854548035, "grad_norm": 0.75390625, "learning_rate": 1.3908115292940315e-05, "loss": 0.3203, "step": 9867 }, { "epoch": 0.7468331677782508, "grad_norm": 0.79296875, "learning_rate": 1.3907018910938739e-05, "loss": 0.3319, "step": 9868 }, { "epoch": 0.7469088501016982, "grad_norm": 0.7578125, "learning_rate": 1.3905922473509397e-05, "loss": 0.3151, "step": 9869 }, { "epoch": 0.7469845324251454, "grad_norm": 0.7265625, "learning_rate": 1.390482598066784e-05, "loss": 0.2849, "step": 9870 }, { "epoch": 0.7470602147485927, "grad_norm": 0.765625, "learning_rate": 1.3903729432429627e-05, "loss": 0.327, "step": 9871 }, { "epoch": 0.7471358970720401, "grad_norm": 0.765625, "learning_rate": 1.390263282881031e-05, "loss": 0.281, "step": 9872 }, { "epoch": 0.7472115793954874, "grad_norm": 0.80078125, "learning_rate": 1.3901536169825451e-05, "loss": 0.3457, "step": 9873 }, { "epoch": 0.7472872617189348, "grad_norm": 0.70703125, "learning_rate": 1.3900439455490609e-05, "loss": 0.2945, "step": 9874 }, { "epoch": 0.7473629440423821, "grad_norm": 0.7109375, "learning_rate": 1.3899342685821335e-05, "loss": 0.288, "step": 9875 }, { "epoch": 0.7474386263658295, "grad_norm": 0.765625, "learning_rate": 1.3898245860833198e-05, "loss": 0.3454, "step": 9876 }, { "epoch": 0.7475143086892767, "grad_norm": 0.7734375, "learning_rate": 1.3897148980541751e-05, "loss": 0.3405, "step": 9877 }, { "epoch": 0.747589991012724, "grad_norm": 0.75, "learning_rate": 1.3896052044962558e-05, "loss": 0.322, "step": 9878 }, { "epoch": 0.7476656733361714, "grad_norm": 0.72265625, "learning_rate": 1.389495505411118e-05, "loss": 0.2908, "step": 9879 }, { "epoch": 0.7477413556596187, "grad_norm": 0.6953125, "learning_rate": 1.3893858008003185e-05, "loss": 0.2769, "step": 9880 }, { "epoch": 0.7478170379830661, "grad_norm": 0.79296875, "learning_rate": 1.3892760906654125e-05, "loss": 0.3269, "step": 9881 }, { "epoch": 0.7478927203065134, "grad_norm": 0.80078125, "learning_rate": 1.3891663750079577e-05, "loss": 0.3025, "step": 9882 }, { "epoch": 0.7479684026299608, "grad_norm": 0.71484375, "learning_rate": 1.3890566538295102e-05, "loss": 0.3009, "step": 9883 }, { "epoch": 0.748044084953408, "grad_norm": 0.765625, "learning_rate": 1.3889469271316259e-05, "loss": 0.3185, "step": 9884 }, { "epoch": 0.7481197672768554, "grad_norm": 0.7734375, "learning_rate": 1.3888371949158624e-05, "loss": 0.3157, "step": 9885 }, { "epoch": 0.7481954496003027, "grad_norm": 0.77734375, "learning_rate": 1.3887274571837757e-05, "loss": 0.319, "step": 9886 }, { "epoch": 0.74827113192375, "grad_norm": 0.734375, "learning_rate": 1.3886177139369229e-05, "loss": 0.3057, "step": 9887 }, { "epoch": 0.7483468142471974, "grad_norm": 0.7265625, "learning_rate": 1.3885079651768613e-05, "loss": 0.2685, "step": 9888 }, { "epoch": 0.7484224965706447, "grad_norm": 0.73046875, "learning_rate": 1.3883982109051472e-05, "loss": 0.2939, "step": 9889 }, { "epoch": 0.7484981788940921, "grad_norm": 0.71484375, "learning_rate": 1.3882884511233381e-05, "loss": 0.2896, "step": 9890 }, { "epoch": 0.7485738612175393, "grad_norm": 0.75390625, "learning_rate": 1.3881786858329909e-05, "loss": 0.2951, "step": 9891 }, { "epoch": 0.7486495435409867, "grad_norm": 0.72265625, "learning_rate": 1.3880689150356633e-05, "loss": 0.3136, "step": 9892 }, { "epoch": 0.748725225864434, "grad_norm": 0.796875, "learning_rate": 1.3879591387329114e-05, "loss": 0.3602, "step": 9893 }, { "epoch": 0.7488009081878814, "grad_norm": 0.7421875, "learning_rate": 1.3878493569262941e-05, "loss": 0.3332, "step": 9894 }, { "epoch": 0.7488765905113287, "grad_norm": 0.7890625, "learning_rate": 1.3877395696173677e-05, "loss": 0.3179, "step": 9895 }, { "epoch": 0.7489522728347761, "grad_norm": 0.734375, "learning_rate": 1.3876297768076901e-05, "loss": 0.3302, "step": 9896 }, { "epoch": 0.7490279551582234, "grad_norm": 0.76953125, "learning_rate": 1.3875199784988192e-05, "loss": 0.3354, "step": 9897 }, { "epoch": 0.7491036374816706, "grad_norm": 0.82421875, "learning_rate": 1.3874101746923124e-05, "loss": 0.3748, "step": 9898 }, { "epoch": 0.749179319805118, "grad_norm": 0.77734375, "learning_rate": 1.3873003653897271e-05, "loss": 0.3334, "step": 9899 }, { "epoch": 0.7492550021285653, "grad_norm": 0.7265625, "learning_rate": 1.3871905505926217e-05, "loss": 0.2979, "step": 9900 }, { "epoch": 0.7493306844520127, "grad_norm": 0.68359375, "learning_rate": 1.3870807303025539e-05, "loss": 0.2593, "step": 9901 }, { "epoch": 0.74940636677546, "grad_norm": 0.7890625, "learning_rate": 1.3869709045210817e-05, "loss": 0.3461, "step": 9902 }, { "epoch": 0.7494820490989074, "grad_norm": 0.71875, "learning_rate": 1.3868610732497632e-05, "loss": 0.2668, "step": 9903 }, { "epoch": 0.7495577314223547, "grad_norm": 0.828125, "learning_rate": 1.3867512364901563e-05, "loss": 0.3403, "step": 9904 }, { "epoch": 0.749633413745802, "grad_norm": 0.796875, "learning_rate": 1.3866413942438195e-05, "loss": 0.3199, "step": 9905 }, { "epoch": 0.7497090960692493, "grad_norm": 0.74609375, "learning_rate": 1.3865315465123112e-05, "loss": 0.3158, "step": 9906 }, { "epoch": 0.7497847783926966, "grad_norm": 0.82421875, "learning_rate": 1.3864216932971894e-05, "loss": 0.3071, "step": 9907 }, { "epoch": 0.749860460716144, "grad_norm": 0.7421875, "learning_rate": 1.3863118346000128e-05, "loss": 0.3086, "step": 9908 }, { "epoch": 0.7499361430395913, "grad_norm": 0.8359375, "learning_rate": 1.3862019704223401e-05, "loss": 0.3803, "step": 9909 }, { "epoch": 0.7500118253630387, "grad_norm": 0.77734375, "learning_rate": 1.3860921007657295e-05, "loss": 0.356, "step": 9910 }, { "epoch": 0.750087507686486, "grad_norm": 0.74609375, "learning_rate": 1.3859822256317399e-05, "loss": 0.3118, "step": 9911 }, { "epoch": 0.7501631900099333, "grad_norm": 0.7734375, "learning_rate": 1.3858723450219302e-05, "loss": 0.3436, "step": 9912 }, { "epoch": 0.7502388723333806, "grad_norm": 0.75390625, "learning_rate": 1.3857624589378591e-05, "loss": 0.3059, "step": 9913 }, { "epoch": 0.750314554656828, "grad_norm": 0.71484375, "learning_rate": 1.3856525673810852e-05, "loss": 0.2882, "step": 9914 }, { "epoch": 0.7503902369802753, "grad_norm": 0.80078125, "learning_rate": 1.385542670353168e-05, "loss": 0.3616, "step": 9915 }, { "epoch": 0.7504659193037226, "grad_norm": 0.859375, "learning_rate": 1.3854327678556665e-05, "loss": 0.3729, "step": 9916 }, { "epoch": 0.75054160162717, "grad_norm": 0.79296875, "learning_rate": 1.3853228598901398e-05, "loss": 0.3166, "step": 9917 }, { "epoch": 0.7506172839506173, "grad_norm": 0.8046875, "learning_rate": 1.385212946458147e-05, "loss": 0.2882, "step": 9918 }, { "epoch": 0.7506929662740646, "grad_norm": 0.78125, "learning_rate": 1.3851030275612477e-05, "loss": 0.3203, "step": 9919 }, { "epoch": 0.7507686485975119, "grad_norm": 0.8046875, "learning_rate": 1.384993103201001e-05, "loss": 0.365, "step": 9920 }, { "epoch": 0.7508443309209593, "grad_norm": 0.78125, "learning_rate": 1.3848831733789664e-05, "loss": 0.3232, "step": 9921 }, { "epoch": 0.7509200132444066, "grad_norm": 0.73046875, "learning_rate": 1.3847732380967031e-05, "loss": 0.3095, "step": 9922 }, { "epoch": 0.750995695567854, "grad_norm": 0.7578125, "learning_rate": 1.3846632973557714e-05, "loss": 0.3183, "step": 9923 }, { "epoch": 0.7510713778913013, "grad_norm": 0.71875, "learning_rate": 1.3845533511577309e-05, "loss": 0.2984, "step": 9924 }, { "epoch": 0.7511470602147486, "grad_norm": 0.734375, "learning_rate": 1.3844433995041414e-05, "loss": 0.3262, "step": 9925 }, { "epoch": 0.7512227425381959, "grad_norm": 0.734375, "learning_rate": 1.3843334423965623e-05, "loss": 0.2927, "step": 9926 }, { "epoch": 0.7512984248616432, "grad_norm": 0.73046875, "learning_rate": 1.3842234798365537e-05, "loss": 0.3079, "step": 9927 }, { "epoch": 0.7513741071850906, "grad_norm": 0.7734375, "learning_rate": 1.3841135118256754e-05, "loss": 0.3375, "step": 9928 }, { "epoch": 0.7514497895085379, "grad_norm": 0.75390625, "learning_rate": 1.3840035383654882e-05, "loss": 0.3275, "step": 9929 }, { "epoch": 0.7515254718319853, "grad_norm": 0.76953125, "learning_rate": 1.383893559457552e-05, "loss": 0.3264, "step": 9930 }, { "epoch": 0.7516011541554326, "grad_norm": 0.828125, "learning_rate": 1.3837835751034266e-05, "loss": 0.341, "step": 9931 }, { "epoch": 0.75167683647888, "grad_norm": 0.7265625, "learning_rate": 1.3836735853046725e-05, "loss": 0.302, "step": 9932 }, { "epoch": 0.7517525188023272, "grad_norm": 0.76171875, "learning_rate": 1.3835635900628504e-05, "loss": 0.2848, "step": 9933 }, { "epoch": 0.7518282011257745, "grad_norm": 0.765625, "learning_rate": 1.3834535893795204e-05, "loss": 0.3231, "step": 9934 }, { "epoch": 0.7519038834492219, "grad_norm": 0.77734375, "learning_rate": 1.3833435832562434e-05, "loss": 0.3462, "step": 9935 }, { "epoch": 0.7519795657726692, "grad_norm": 0.74609375, "learning_rate": 1.3832335716945797e-05, "loss": 0.3109, "step": 9936 }, { "epoch": 0.7520552480961166, "grad_norm": 0.81640625, "learning_rate": 1.38312355469609e-05, "loss": 0.3325, "step": 9937 }, { "epoch": 0.7521309304195639, "grad_norm": 0.8515625, "learning_rate": 1.3830135322623353e-05, "loss": 0.3003, "step": 9938 }, { "epoch": 0.7522066127430113, "grad_norm": 0.70703125, "learning_rate": 1.3829035043948767e-05, "loss": 0.2869, "step": 9939 }, { "epoch": 0.7522822950664585, "grad_norm": 0.73828125, "learning_rate": 1.3827934710952744e-05, "loss": 0.3333, "step": 9940 }, { "epoch": 0.7523579773899058, "grad_norm": 0.79296875, "learning_rate": 1.3826834323650899e-05, "loss": 0.3302, "step": 9941 }, { "epoch": 0.7524336597133532, "grad_norm": 0.79296875, "learning_rate": 1.3825733882058843e-05, "loss": 0.3627, "step": 9942 }, { "epoch": 0.7525093420368005, "grad_norm": 0.7578125, "learning_rate": 1.3824633386192185e-05, "loss": 0.3263, "step": 9943 }, { "epoch": 0.7525850243602479, "grad_norm": 0.7578125, "learning_rate": 1.3823532836066542e-05, "loss": 0.298, "step": 9944 }, { "epoch": 0.7526607066836952, "grad_norm": 0.89453125, "learning_rate": 1.3822432231697522e-05, "loss": 0.3182, "step": 9945 }, { "epoch": 0.7527363890071426, "grad_norm": 0.75390625, "learning_rate": 1.382133157310074e-05, "loss": 0.3038, "step": 9946 }, { "epoch": 0.7528120713305898, "grad_norm": 0.75, "learning_rate": 1.3820230860291814e-05, "loss": 0.2869, "step": 9947 }, { "epoch": 0.7528877536540372, "grad_norm": 0.80078125, "learning_rate": 1.3819130093286359e-05, "loss": 0.362, "step": 9948 }, { "epoch": 0.7529634359774845, "grad_norm": 0.8125, "learning_rate": 1.3818029272099988e-05, "loss": 0.3272, "step": 9949 }, { "epoch": 0.7530391183009318, "grad_norm": 0.75, "learning_rate": 1.3816928396748321e-05, "loss": 0.3039, "step": 9950 }, { "epoch": 0.7531148006243792, "grad_norm": 0.671875, "learning_rate": 1.3815827467246973e-05, "loss": 0.2564, "step": 9951 }, { "epoch": 0.7531904829478265, "grad_norm": 1.0078125, "learning_rate": 1.3814726483611565e-05, "loss": 0.3847, "step": 9952 }, { "epoch": 0.7532661652712739, "grad_norm": 0.765625, "learning_rate": 1.3813625445857717e-05, "loss": 0.3212, "step": 9953 }, { "epoch": 0.7533418475947211, "grad_norm": 0.75, "learning_rate": 1.3812524354001045e-05, "loss": 0.3383, "step": 9954 }, { "epoch": 0.7534175299181685, "grad_norm": 0.8046875, "learning_rate": 1.3811423208057173e-05, "loss": 0.3302, "step": 9955 }, { "epoch": 0.7534932122416158, "grad_norm": 0.78515625, "learning_rate": 1.3810322008041723e-05, "loss": 0.3286, "step": 9956 }, { "epoch": 0.7535688945650632, "grad_norm": 0.765625, "learning_rate": 1.3809220753970316e-05, "loss": 0.3403, "step": 9957 }, { "epoch": 0.7536445768885105, "grad_norm": 0.78515625, "learning_rate": 1.3808119445858576e-05, "loss": 0.3276, "step": 9958 }, { "epoch": 0.7537202592119578, "grad_norm": 0.71484375, "learning_rate": 1.3807018083722125e-05, "loss": 0.2986, "step": 9959 }, { "epoch": 0.7537959415354052, "grad_norm": 0.78125, "learning_rate": 1.3805916667576593e-05, "loss": 0.3024, "step": 9960 }, { "epoch": 0.7538716238588524, "grad_norm": 0.765625, "learning_rate": 1.3804815197437598e-05, "loss": 0.3055, "step": 9961 }, { "epoch": 0.7539473061822998, "grad_norm": 0.76171875, "learning_rate": 1.3803713673320773e-05, "loss": 0.3052, "step": 9962 }, { "epoch": 0.7540229885057471, "grad_norm": 0.73828125, "learning_rate": 1.3802612095241742e-05, "loss": 0.3123, "step": 9963 }, { "epoch": 0.7540986708291945, "grad_norm": 0.78125, "learning_rate": 1.3801510463216133e-05, "loss": 0.3202, "step": 9964 }, { "epoch": 0.7541743531526418, "grad_norm": 0.78125, "learning_rate": 1.3800408777259573e-05, "loss": 0.3217, "step": 9965 }, { "epoch": 0.7542500354760892, "grad_norm": 0.83203125, "learning_rate": 1.3799307037387695e-05, "loss": 0.3217, "step": 9966 }, { "epoch": 0.7543257177995365, "grad_norm": 0.7734375, "learning_rate": 1.3798205243616125e-05, "loss": 0.3263, "step": 9967 }, { "epoch": 0.7544014001229837, "grad_norm": 0.69921875, "learning_rate": 1.3797103395960493e-05, "loss": 0.2773, "step": 9968 }, { "epoch": 0.7544770824464311, "grad_norm": 0.74609375, "learning_rate": 1.3796001494436437e-05, "loss": 0.3376, "step": 9969 }, { "epoch": 0.7545527647698784, "grad_norm": 0.85546875, "learning_rate": 1.3794899539059585e-05, "loss": 0.374, "step": 9970 }, { "epoch": 0.7546284470933258, "grad_norm": 0.73828125, "learning_rate": 1.3793797529845572e-05, "loss": 0.2838, "step": 9971 }, { "epoch": 0.7547041294167731, "grad_norm": 0.71484375, "learning_rate": 1.3792695466810029e-05, "loss": 0.2869, "step": 9972 }, { "epoch": 0.7547798117402205, "grad_norm": 0.73828125, "learning_rate": 1.3791593349968592e-05, "loss": 0.317, "step": 9973 }, { "epoch": 0.7548554940636678, "grad_norm": 0.75, "learning_rate": 1.3790491179336899e-05, "loss": 0.3034, "step": 9974 }, { "epoch": 0.754931176387115, "grad_norm": 0.7890625, "learning_rate": 1.3789388954930583e-05, "loss": 0.3128, "step": 9975 }, { "epoch": 0.7550068587105624, "grad_norm": 0.75, "learning_rate": 1.378828667676528e-05, "loss": 0.3101, "step": 9976 }, { "epoch": 0.7550825410340097, "grad_norm": 0.72265625, "learning_rate": 1.378718434485663e-05, "loss": 0.3205, "step": 9977 }, { "epoch": 0.7551582233574571, "grad_norm": 0.74609375, "learning_rate": 1.378608195922027e-05, "loss": 0.3245, "step": 9978 }, { "epoch": 0.7552339056809044, "grad_norm": 0.8046875, "learning_rate": 1.3784979519871842e-05, "loss": 0.3626, "step": 9979 }, { "epoch": 0.7553095880043518, "grad_norm": 0.796875, "learning_rate": 1.3783877026826986e-05, "loss": 0.3439, "step": 9980 }, { "epoch": 0.7553852703277991, "grad_norm": 0.80078125, "learning_rate": 1.378277448010134e-05, "loss": 0.318, "step": 9981 }, { "epoch": 0.7554609526512464, "grad_norm": 0.765625, "learning_rate": 1.3781671879710546e-05, "loss": 0.2841, "step": 9982 }, { "epoch": 0.7555366349746937, "grad_norm": 0.76953125, "learning_rate": 1.3780569225670246e-05, "loss": 0.3163, "step": 9983 }, { "epoch": 0.755612317298141, "grad_norm": 0.87109375, "learning_rate": 1.3779466517996083e-05, "loss": 0.3598, "step": 9984 }, { "epoch": 0.7556879996215884, "grad_norm": 0.703125, "learning_rate": 1.3778363756703703e-05, "loss": 0.3001, "step": 9985 }, { "epoch": 0.7557636819450357, "grad_norm": 0.703125, "learning_rate": 1.3777260941808753e-05, "loss": 0.2673, "step": 9986 }, { "epoch": 0.7558393642684831, "grad_norm": 0.72265625, "learning_rate": 1.3776158073326869e-05, "loss": 0.3047, "step": 9987 }, { "epoch": 0.7559150465919303, "grad_norm": 0.75390625, "learning_rate": 1.3775055151273704e-05, "loss": 0.3014, "step": 9988 }, { "epoch": 0.7559907289153777, "grad_norm": 0.80078125, "learning_rate": 1.3773952175664904e-05, "loss": 0.3485, "step": 9989 }, { "epoch": 0.756066411238825, "grad_norm": 0.76171875, "learning_rate": 1.3772849146516114e-05, "loss": 0.2936, "step": 9990 }, { "epoch": 0.7561420935622724, "grad_norm": 0.73046875, "learning_rate": 1.377174606384299e-05, "loss": 0.3126, "step": 9991 }, { "epoch": 0.7562177758857197, "grad_norm": 0.75, "learning_rate": 1.3770642927661171e-05, "loss": 0.3345, "step": 9992 }, { "epoch": 0.756293458209167, "grad_norm": 0.76171875, "learning_rate": 1.3769539737986311e-05, "loss": 0.2995, "step": 9993 }, { "epoch": 0.7563691405326144, "grad_norm": 0.76953125, "learning_rate": 1.3768436494834063e-05, "loss": 0.3302, "step": 9994 }, { "epoch": 0.7564448228560616, "grad_norm": 0.671875, "learning_rate": 1.3767333198220078e-05, "loss": 0.2863, "step": 9995 }, { "epoch": 0.756520505179509, "grad_norm": 0.8203125, "learning_rate": 1.3766229848160002e-05, "loss": 0.3795, "step": 9996 }, { "epoch": 0.7565961875029563, "grad_norm": 0.8515625, "learning_rate": 1.3765126444669496e-05, "loss": 0.37, "step": 9997 }, { "epoch": 0.7566718698264037, "grad_norm": 0.80078125, "learning_rate": 1.376402298776421e-05, "loss": 0.3615, "step": 9998 }, { "epoch": 0.756747552149851, "grad_norm": 0.73828125, "learning_rate": 1.3762919477459796e-05, "loss": 0.312, "step": 9999 }, { "epoch": 0.7568232344732984, "grad_norm": 0.78515625, "learning_rate": 1.3761815913771917e-05, "loss": 0.3526, "step": 10000 }, { "epoch": 0.7568989167967457, "grad_norm": 0.7109375, "learning_rate": 1.3760712296716219e-05, "loss": 0.2906, "step": 10001 }, { "epoch": 0.7569745991201929, "grad_norm": 0.73046875, "learning_rate": 1.3759608626308363e-05, "loss": 0.2873, "step": 10002 }, { "epoch": 0.7570502814436403, "grad_norm": 0.69921875, "learning_rate": 1.3758504902564009e-05, "loss": 0.2774, "step": 10003 }, { "epoch": 0.7571259637670876, "grad_norm": 0.7265625, "learning_rate": 1.3757401125498815e-05, "loss": 0.3024, "step": 10004 }, { "epoch": 0.757201646090535, "grad_norm": 0.85546875, "learning_rate": 1.3756297295128436e-05, "loss": 0.3276, "step": 10005 }, { "epoch": 0.7572773284139823, "grad_norm": 0.73046875, "learning_rate": 1.3755193411468532e-05, "loss": 0.3045, "step": 10006 }, { "epoch": 0.7573530107374297, "grad_norm": 0.73046875, "learning_rate": 1.3754089474534768e-05, "loss": 0.2975, "step": 10007 }, { "epoch": 0.757428693060877, "grad_norm": 0.74609375, "learning_rate": 1.3752985484342802e-05, "loss": 0.2864, "step": 10008 }, { "epoch": 0.7575043753843242, "grad_norm": 0.70703125, "learning_rate": 1.3751881440908299e-05, "loss": 0.2797, "step": 10009 }, { "epoch": 0.7575800577077716, "grad_norm": 0.77734375, "learning_rate": 1.3750777344246917e-05, "loss": 0.3509, "step": 10010 }, { "epoch": 0.7576557400312189, "grad_norm": 0.921875, "learning_rate": 1.374967319437432e-05, "loss": 0.2722, "step": 10011 }, { "epoch": 0.7577314223546663, "grad_norm": 0.8203125, "learning_rate": 1.3748568991306175e-05, "loss": 0.3244, "step": 10012 }, { "epoch": 0.7578071046781136, "grad_norm": 0.7734375, "learning_rate": 1.3747464735058148e-05, "loss": 0.3181, "step": 10013 }, { "epoch": 0.757882787001561, "grad_norm": 0.69921875, "learning_rate": 1.3746360425645901e-05, "loss": 0.2661, "step": 10014 }, { "epoch": 0.7579584693250083, "grad_norm": 0.71875, "learning_rate": 1.3745256063085105e-05, "loss": 0.3092, "step": 10015 }, { "epoch": 0.7580341516484556, "grad_norm": 0.7578125, "learning_rate": 1.374415164739142e-05, "loss": 0.3326, "step": 10016 }, { "epoch": 0.7581098339719029, "grad_norm": 0.73828125, "learning_rate": 1.3743047178580523e-05, "loss": 0.3072, "step": 10017 }, { "epoch": 0.7581855162953502, "grad_norm": 0.7421875, "learning_rate": 1.3741942656668075e-05, "loss": 0.3097, "step": 10018 }, { "epoch": 0.7582611986187976, "grad_norm": 0.75, "learning_rate": 1.3740838081669753e-05, "loss": 0.3092, "step": 10019 }, { "epoch": 0.7583368809422449, "grad_norm": 0.76171875, "learning_rate": 1.3739733453601219e-05, "loss": 0.3057, "step": 10020 }, { "epoch": 0.7584125632656923, "grad_norm": 0.7421875, "learning_rate": 1.3738628772478152e-05, "loss": 0.3358, "step": 10021 }, { "epoch": 0.7584882455891396, "grad_norm": 0.78515625, "learning_rate": 1.3737524038316219e-05, "loss": 0.3313, "step": 10022 }, { "epoch": 0.7585639279125869, "grad_norm": 0.7890625, "learning_rate": 1.3736419251131094e-05, "loss": 0.3492, "step": 10023 }, { "epoch": 0.7586396102360342, "grad_norm": 0.8984375, "learning_rate": 1.373531441093845e-05, "loss": 0.3779, "step": 10024 }, { "epoch": 0.7587152925594816, "grad_norm": 0.7265625, "learning_rate": 1.3734209517753959e-05, "loss": 0.294, "step": 10025 }, { "epoch": 0.7587909748829289, "grad_norm": 0.71484375, "learning_rate": 1.3733104571593298e-05, "loss": 0.2895, "step": 10026 }, { "epoch": 0.7588666572063762, "grad_norm": 0.8125, "learning_rate": 1.3731999572472146e-05, "loss": 0.3402, "step": 10027 }, { "epoch": 0.7589423395298236, "grad_norm": 0.8125, "learning_rate": 1.3730894520406173e-05, "loss": 0.3322, "step": 10028 }, { "epoch": 0.7590180218532709, "grad_norm": 0.68359375, "learning_rate": 1.372978941541106e-05, "loss": 0.2463, "step": 10029 }, { "epoch": 0.7590937041767182, "grad_norm": 0.7421875, "learning_rate": 1.372868425750248e-05, "loss": 0.3332, "step": 10030 }, { "epoch": 0.7591693865001655, "grad_norm": 0.76171875, "learning_rate": 1.372757904669612e-05, "loss": 0.3121, "step": 10031 }, { "epoch": 0.7592450688236129, "grad_norm": 0.74609375, "learning_rate": 1.372647378300765e-05, "loss": 0.2842, "step": 10032 }, { "epoch": 0.7593207511470602, "grad_norm": 0.75, "learning_rate": 1.372536846645276e-05, "loss": 0.3283, "step": 10033 }, { "epoch": 0.7593964334705076, "grad_norm": 0.68359375, "learning_rate": 1.3724263097047122e-05, "loss": 0.2858, "step": 10034 }, { "epoch": 0.7594721157939549, "grad_norm": 0.76171875, "learning_rate": 1.3723157674806419e-05, "loss": 0.3026, "step": 10035 }, { "epoch": 0.7595477981174023, "grad_norm": 0.8046875, "learning_rate": 1.3722052199746338e-05, "loss": 0.3338, "step": 10036 }, { "epoch": 0.7596234804408495, "grad_norm": 0.78515625, "learning_rate": 1.372094667188256e-05, "loss": 0.3287, "step": 10037 }, { "epoch": 0.7596991627642968, "grad_norm": 0.73046875, "learning_rate": 1.3719841091230769e-05, "loss": 0.3049, "step": 10038 }, { "epoch": 0.7597748450877442, "grad_norm": 0.67578125, "learning_rate": 1.3718735457806646e-05, "loss": 0.2652, "step": 10039 }, { "epoch": 0.7598505274111915, "grad_norm": 0.81640625, "learning_rate": 1.3717629771625878e-05, "loss": 0.347, "step": 10040 }, { "epoch": 0.7599262097346389, "grad_norm": 0.75390625, "learning_rate": 1.3716524032704153e-05, "loss": 0.2965, "step": 10041 }, { "epoch": 0.7600018920580862, "grad_norm": 0.796875, "learning_rate": 1.3715418241057159e-05, "loss": 0.3476, "step": 10042 }, { "epoch": 0.7600775743815336, "grad_norm": 0.7578125, "learning_rate": 1.3714312396700579e-05, "loss": 0.331, "step": 10043 }, { "epoch": 0.7601532567049808, "grad_norm": 0.8046875, "learning_rate": 1.3713206499650105e-05, "loss": 0.309, "step": 10044 }, { "epoch": 0.7602289390284281, "grad_norm": 0.73828125, "learning_rate": 1.3712100549921423e-05, "loss": 0.3146, "step": 10045 }, { "epoch": 0.7603046213518755, "grad_norm": 0.74609375, "learning_rate": 1.3710994547530225e-05, "loss": 0.3462, "step": 10046 }, { "epoch": 0.7603803036753228, "grad_norm": 0.78515625, "learning_rate": 1.3709888492492204e-05, "loss": 0.341, "step": 10047 }, { "epoch": 0.7604559859987702, "grad_norm": 0.734375, "learning_rate": 1.3708782384823045e-05, "loss": 0.3116, "step": 10048 }, { "epoch": 0.7605316683222175, "grad_norm": 0.7578125, "learning_rate": 1.3707676224538445e-05, "loss": 0.3218, "step": 10049 }, { "epoch": 0.7606073506456649, "grad_norm": 0.71875, "learning_rate": 1.3706570011654094e-05, "loss": 0.2948, "step": 10050 }, { "epoch": 0.7606830329691121, "grad_norm": 0.6953125, "learning_rate": 1.370546374618569e-05, "loss": 0.2933, "step": 10051 }, { "epoch": 0.7607587152925595, "grad_norm": 0.72265625, "learning_rate": 1.370435742814892e-05, "loss": 0.2811, "step": 10052 }, { "epoch": 0.7608343976160068, "grad_norm": 0.7734375, "learning_rate": 1.3703251057559484e-05, "loss": 0.3482, "step": 10053 }, { "epoch": 0.7609100799394541, "grad_norm": 0.734375, "learning_rate": 1.3702144634433077e-05, "loss": 0.3045, "step": 10054 }, { "epoch": 0.7609857622629015, "grad_norm": 0.85546875, "learning_rate": 1.3701038158785393e-05, "loss": 0.3503, "step": 10055 }, { "epoch": 0.7610614445863488, "grad_norm": 0.76953125, "learning_rate": 1.3699931630632137e-05, "loss": 0.3534, "step": 10056 }, { "epoch": 0.7611371269097962, "grad_norm": 0.75390625, "learning_rate": 1.3698825049988998e-05, "loss": 0.323, "step": 10057 }, { "epoch": 0.7612128092332434, "grad_norm": 0.7421875, "learning_rate": 1.3697718416871677e-05, "loss": 0.328, "step": 10058 }, { "epoch": 0.7612884915566908, "grad_norm": 0.75, "learning_rate": 1.3696611731295877e-05, "loss": 0.3019, "step": 10059 }, { "epoch": 0.7613641738801381, "grad_norm": 0.78515625, "learning_rate": 1.3695504993277296e-05, "loss": 0.3412, "step": 10060 }, { "epoch": 0.7614398562035855, "grad_norm": 0.7421875, "learning_rate": 1.3694398202831633e-05, "loss": 0.3155, "step": 10061 }, { "epoch": 0.7615155385270328, "grad_norm": 0.8125, "learning_rate": 1.3693291359974595e-05, "loss": 0.3506, "step": 10062 }, { "epoch": 0.7615912208504801, "grad_norm": 1.40625, "learning_rate": 1.3692184464721877e-05, "loss": 0.3705, "step": 10063 }, { "epoch": 0.7616669031739275, "grad_norm": 0.69140625, "learning_rate": 1.3691077517089188e-05, "loss": 0.285, "step": 10064 }, { "epoch": 0.7617425854973747, "grad_norm": 0.72265625, "learning_rate": 1.3689970517092232e-05, "loss": 0.2967, "step": 10065 }, { "epoch": 0.7618182678208221, "grad_norm": 0.77734375, "learning_rate": 1.3688863464746711e-05, "loss": 0.3455, "step": 10066 }, { "epoch": 0.7618939501442694, "grad_norm": 0.94140625, "learning_rate": 1.3687756360068332e-05, "loss": 0.3893, "step": 10067 }, { "epoch": 0.7619696324677168, "grad_norm": 0.7578125, "learning_rate": 1.36866492030728e-05, "loss": 0.3373, "step": 10068 }, { "epoch": 0.7620453147911641, "grad_norm": 0.75, "learning_rate": 1.3685541993775822e-05, "loss": 0.2972, "step": 10069 }, { "epoch": 0.7621209971146115, "grad_norm": 0.8125, "learning_rate": 1.3684434732193107e-05, "loss": 0.3221, "step": 10070 }, { "epoch": 0.7621966794380588, "grad_norm": 0.7265625, "learning_rate": 1.3683327418340362e-05, "loss": 0.2951, "step": 10071 }, { "epoch": 0.762272361761506, "grad_norm": 0.765625, "learning_rate": 1.3682220052233297e-05, "loss": 0.3394, "step": 10072 }, { "epoch": 0.7623480440849534, "grad_norm": 0.75, "learning_rate": 1.368111263388762e-05, "loss": 0.3322, "step": 10073 }, { "epoch": 0.7624237264084007, "grad_norm": 0.80078125, "learning_rate": 1.3680005163319045e-05, "loss": 0.3088, "step": 10074 }, { "epoch": 0.7624994087318481, "grad_norm": 0.7265625, "learning_rate": 1.3678897640543283e-05, "loss": 0.2795, "step": 10075 }, { "epoch": 0.7625750910552954, "grad_norm": 0.7734375, "learning_rate": 1.3677790065576045e-05, "loss": 0.3068, "step": 10076 }, { "epoch": 0.7626507733787428, "grad_norm": 0.734375, "learning_rate": 1.367668243843304e-05, "loss": 0.3098, "step": 10077 }, { "epoch": 0.7627264557021901, "grad_norm": 0.72265625, "learning_rate": 1.3675574759129988e-05, "loss": 0.3055, "step": 10078 }, { "epoch": 0.7628021380256373, "grad_norm": 0.73828125, "learning_rate": 1.36744670276826e-05, "loss": 0.2893, "step": 10079 }, { "epoch": 0.7628778203490847, "grad_norm": 0.80859375, "learning_rate": 1.3673359244106593e-05, "loss": 0.329, "step": 10080 }, { "epoch": 0.762953502672532, "grad_norm": 0.70703125, "learning_rate": 1.3672251408417676e-05, "loss": 0.2628, "step": 10081 }, { "epoch": 0.7630291849959794, "grad_norm": 0.7890625, "learning_rate": 1.3671143520631575e-05, "loss": 0.3616, "step": 10082 }, { "epoch": 0.7631048673194267, "grad_norm": 0.74609375, "learning_rate": 1.3670035580764002e-05, "loss": 0.3347, "step": 10083 }, { "epoch": 0.7631805496428741, "grad_norm": 0.69140625, "learning_rate": 1.366892758883068e-05, "loss": 0.2872, "step": 10084 }, { "epoch": 0.7632562319663214, "grad_norm": 0.74609375, "learning_rate": 1.3667819544847321e-05, "loss": 0.3394, "step": 10085 }, { "epoch": 0.7633319142897687, "grad_norm": 0.75390625, "learning_rate": 1.3666711448829648e-05, "loss": 0.3094, "step": 10086 }, { "epoch": 0.763407596613216, "grad_norm": 0.73046875, "learning_rate": 1.3665603300793382e-05, "loss": 0.3419, "step": 10087 }, { "epoch": 0.7634832789366633, "grad_norm": 0.68359375, "learning_rate": 1.3664495100754238e-05, "loss": 0.2729, "step": 10088 }, { "epoch": 0.7635589612601107, "grad_norm": 0.76953125, "learning_rate": 1.3663386848727949e-05, "loss": 0.3237, "step": 10089 }, { "epoch": 0.763634643583558, "grad_norm": 0.71484375, "learning_rate": 1.3662278544730225e-05, "loss": 0.3037, "step": 10090 }, { "epoch": 0.7637103259070054, "grad_norm": 0.71484375, "learning_rate": 1.36611701887768e-05, "loss": 0.2967, "step": 10091 }, { "epoch": 0.7637860082304527, "grad_norm": 0.77734375, "learning_rate": 1.366006178088339e-05, "loss": 0.3201, "step": 10092 }, { "epoch": 0.7638616905539, "grad_norm": 0.7421875, "learning_rate": 1.3658953321065727e-05, "loss": 0.3101, "step": 10093 }, { "epoch": 0.7639373728773473, "grad_norm": 0.75, "learning_rate": 1.3657844809339529e-05, "loss": 0.3288, "step": 10094 }, { "epoch": 0.7640130552007947, "grad_norm": 0.7890625, "learning_rate": 1.3656736245720525e-05, "loss": 0.3478, "step": 10095 }, { "epoch": 0.764088737524242, "grad_norm": 0.8359375, "learning_rate": 1.3655627630224442e-05, "loss": 0.3482, "step": 10096 }, { "epoch": 0.7641644198476893, "grad_norm": 0.7265625, "learning_rate": 1.3654518962867007e-05, "loss": 0.309, "step": 10097 }, { "epoch": 0.7642401021711367, "grad_norm": 0.83984375, "learning_rate": 1.3653410243663953e-05, "loss": 0.3756, "step": 10098 }, { "epoch": 0.764315784494584, "grad_norm": 0.77734375, "learning_rate": 1.3652301472631e-05, "loss": 0.299, "step": 10099 }, { "epoch": 0.7643914668180313, "grad_norm": 0.6484375, "learning_rate": 1.3651192649783887e-05, "loss": 0.2253, "step": 10100 }, { "epoch": 0.7644671491414786, "grad_norm": 0.73046875, "learning_rate": 1.3650083775138341e-05, "loss": 0.2931, "step": 10101 }, { "epoch": 0.764542831464926, "grad_norm": 0.8203125, "learning_rate": 1.3648974848710088e-05, "loss": 0.3634, "step": 10102 }, { "epoch": 0.7646185137883733, "grad_norm": 0.77734375, "learning_rate": 1.3647865870514872e-05, "loss": 0.3367, "step": 10103 }, { "epoch": 0.7646941961118207, "grad_norm": 1.109375, "learning_rate": 1.3646756840568413e-05, "loss": 0.3345, "step": 10104 }, { "epoch": 0.764769878435268, "grad_norm": 0.71875, "learning_rate": 1.364564775888645e-05, "loss": 0.2788, "step": 10105 }, { "epoch": 0.7648455607587152, "grad_norm": 0.77734375, "learning_rate": 1.364453862548472e-05, "loss": 0.3419, "step": 10106 }, { "epoch": 0.7649212430821626, "grad_norm": 0.84375, "learning_rate": 1.364342944037896e-05, "loss": 0.3239, "step": 10107 }, { "epoch": 0.7649969254056099, "grad_norm": 0.7265625, "learning_rate": 1.3642320203584892e-05, "loss": 0.2981, "step": 10108 }, { "epoch": 0.7650726077290573, "grad_norm": 0.74609375, "learning_rate": 1.3641210915118265e-05, "loss": 0.317, "step": 10109 }, { "epoch": 0.7651482900525046, "grad_norm": 0.74609375, "learning_rate": 1.3640101574994816e-05, "loss": 0.3201, "step": 10110 }, { "epoch": 0.765223972375952, "grad_norm": 0.76171875, "learning_rate": 1.3638992183230273e-05, "loss": 0.3349, "step": 10111 }, { "epoch": 0.7652996546993993, "grad_norm": 0.81640625, "learning_rate": 1.3637882739840387e-05, "loss": 0.3322, "step": 10112 }, { "epoch": 0.7653753370228465, "grad_norm": 0.76171875, "learning_rate": 1.3636773244840891e-05, "loss": 0.3279, "step": 10113 }, { "epoch": 0.7654510193462939, "grad_norm": 0.8515625, "learning_rate": 1.3635663698247522e-05, "loss": 0.3697, "step": 10114 }, { "epoch": 0.7655267016697412, "grad_norm": 0.79296875, "learning_rate": 1.3634554100076025e-05, "loss": 0.3312, "step": 10115 }, { "epoch": 0.7656023839931886, "grad_norm": 0.73046875, "learning_rate": 1.3633444450342142e-05, "loss": 0.2634, "step": 10116 }, { "epoch": 0.7656780663166359, "grad_norm": 0.75, "learning_rate": 1.3632334749061615e-05, "loss": 0.3034, "step": 10117 }, { "epoch": 0.7657537486400833, "grad_norm": 0.77734375, "learning_rate": 1.3631224996250186e-05, "loss": 0.3541, "step": 10118 }, { "epoch": 0.7658294309635306, "grad_norm": 0.7734375, "learning_rate": 1.3630115191923598e-05, "loss": 0.3403, "step": 10119 }, { "epoch": 0.7659051132869779, "grad_norm": 0.72265625, "learning_rate": 1.3629005336097596e-05, "loss": 0.3082, "step": 10120 }, { "epoch": 0.7659807956104252, "grad_norm": 0.73046875, "learning_rate": 1.3627895428787929e-05, "loss": 0.2752, "step": 10121 }, { "epoch": 0.7660564779338725, "grad_norm": 0.765625, "learning_rate": 1.362678547001034e-05, "loss": 0.3231, "step": 10122 }, { "epoch": 0.7661321602573199, "grad_norm": 0.77734375, "learning_rate": 1.362567545978057e-05, "loss": 0.342, "step": 10123 }, { "epoch": 0.7662078425807672, "grad_norm": 0.7734375, "learning_rate": 1.3624565398114375e-05, "loss": 0.3169, "step": 10124 }, { "epoch": 0.7662835249042146, "grad_norm": 0.78125, "learning_rate": 1.3623455285027501e-05, "loss": 0.3655, "step": 10125 }, { "epoch": 0.7663592072276619, "grad_norm": 0.80078125, "learning_rate": 1.3622345120535693e-05, "loss": 0.3254, "step": 10126 }, { "epoch": 0.7664348895511092, "grad_norm": 0.77734375, "learning_rate": 1.3621234904654705e-05, "loss": 0.3166, "step": 10127 }, { "epoch": 0.7665105718745565, "grad_norm": 0.73828125, "learning_rate": 1.3620124637400286e-05, "loss": 0.2992, "step": 10128 }, { "epoch": 0.7665862541980039, "grad_norm": 0.7578125, "learning_rate": 1.3619014318788185e-05, "loss": 0.3311, "step": 10129 }, { "epoch": 0.7666619365214512, "grad_norm": 0.78125, "learning_rate": 1.3617903948834155e-05, "loss": 0.3473, "step": 10130 }, { "epoch": 0.7667376188448985, "grad_norm": 0.84375, "learning_rate": 1.3616793527553952e-05, "loss": 0.3937, "step": 10131 }, { "epoch": 0.7668133011683459, "grad_norm": 0.703125, "learning_rate": 1.3615683054963325e-05, "loss": 0.2598, "step": 10132 }, { "epoch": 0.7668889834917932, "grad_norm": 0.74609375, "learning_rate": 1.3614572531078028e-05, "loss": 0.3125, "step": 10133 }, { "epoch": 0.7669646658152405, "grad_norm": 0.70703125, "learning_rate": 1.3613461955913818e-05, "loss": 0.2911, "step": 10134 }, { "epoch": 0.7670403481386878, "grad_norm": 0.73046875, "learning_rate": 1.3612351329486448e-05, "loss": 0.2832, "step": 10135 }, { "epoch": 0.7671160304621352, "grad_norm": 0.74609375, "learning_rate": 1.3611240651811677e-05, "loss": 0.3425, "step": 10136 }, { "epoch": 0.7671917127855825, "grad_norm": 0.71484375, "learning_rate": 1.3610129922905257e-05, "loss": 0.2837, "step": 10137 }, { "epoch": 0.7672673951090299, "grad_norm": 0.7578125, "learning_rate": 1.3609019142782951e-05, "loss": 0.3116, "step": 10138 }, { "epoch": 0.7673430774324772, "grad_norm": 0.80078125, "learning_rate": 1.3607908311460516e-05, "loss": 0.3565, "step": 10139 }, { "epoch": 0.7674187597559246, "grad_norm": 0.67578125, "learning_rate": 1.360679742895371e-05, "loss": 0.2551, "step": 10140 }, { "epoch": 0.7674944420793718, "grad_norm": 0.734375, "learning_rate": 1.3605686495278293e-05, "loss": 0.3249, "step": 10141 }, { "epoch": 0.7675701244028191, "grad_norm": 0.94921875, "learning_rate": 1.3604575510450025e-05, "loss": 0.3525, "step": 10142 }, { "epoch": 0.7676458067262665, "grad_norm": 0.82421875, "learning_rate": 1.3603464474484666e-05, "loss": 0.3355, "step": 10143 }, { "epoch": 0.7677214890497138, "grad_norm": 0.71875, "learning_rate": 1.3602353387397983e-05, "loss": 0.2993, "step": 10144 }, { "epoch": 0.7677971713731612, "grad_norm": 0.734375, "learning_rate": 1.3601242249205736e-05, "loss": 0.315, "step": 10145 }, { "epoch": 0.7678728536966085, "grad_norm": 0.71875, "learning_rate": 1.3600131059923683e-05, "loss": 0.2836, "step": 10146 }, { "epoch": 0.7679485360200559, "grad_norm": 0.7109375, "learning_rate": 1.3599019819567596e-05, "loss": 0.2779, "step": 10147 }, { "epoch": 0.7680242183435031, "grad_norm": 0.75390625, "learning_rate": 1.3597908528153236e-05, "loss": 0.3163, "step": 10148 }, { "epoch": 0.7680999006669504, "grad_norm": 0.72265625, "learning_rate": 1.3596797185696371e-05, "loss": 0.2985, "step": 10149 }, { "epoch": 0.7681755829903978, "grad_norm": 0.7109375, "learning_rate": 1.3595685792212763e-05, "loss": 0.2762, "step": 10150 }, { "epoch": 0.7682512653138451, "grad_norm": 0.796875, "learning_rate": 1.3594574347718183e-05, "loss": 0.3254, "step": 10151 }, { "epoch": 0.7683269476372925, "grad_norm": 0.75390625, "learning_rate": 1.3593462852228396e-05, "loss": 0.3091, "step": 10152 }, { "epoch": 0.7684026299607398, "grad_norm": 0.7421875, "learning_rate": 1.3592351305759173e-05, "loss": 0.3099, "step": 10153 }, { "epoch": 0.7684783122841872, "grad_norm": 0.97265625, "learning_rate": 1.3591239708326286e-05, "loss": 0.3451, "step": 10154 }, { "epoch": 0.7685539946076344, "grad_norm": 0.765625, "learning_rate": 1.3590128059945493e-05, "loss": 0.3442, "step": 10155 }, { "epoch": 0.7686296769310818, "grad_norm": 0.71875, "learning_rate": 1.3589016360632577e-05, "loss": 0.2834, "step": 10156 }, { "epoch": 0.7687053592545291, "grad_norm": 0.69140625, "learning_rate": 1.3587904610403304e-05, "loss": 0.2728, "step": 10157 }, { "epoch": 0.7687810415779764, "grad_norm": 0.7109375, "learning_rate": 1.3586792809273447e-05, "loss": 0.2796, "step": 10158 }, { "epoch": 0.7688567239014238, "grad_norm": 0.76171875, "learning_rate": 1.358568095725878e-05, "loss": 0.3081, "step": 10159 }, { "epoch": 0.7689324062248711, "grad_norm": 0.80859375, "learning_rate": 1.3584569054375076e-05, "loss": 0.3391, "step": 10160 }, { "epoch": 0.7690080885483185, "grad_norm": 0.78125, "learning_rate": 1.3583457100638104e-05, "loss": 0.3122, "step": 10161 }, { "epoch": 0.7690837708717657, "grad_norm": 0.7890625, "learning_rate": 1.358234509606365e-05, "loss": 0.3518, "step": 10162 }, { "epoch": 0.7691594531952131, "grad_norm": 0.71484375, "learning_rate": 1.3581233040667482e-05, "loss": 0.2936, "step": 10163 }, { "epoch": 0.7692351355186604, "grad_norm": 0.7578125, "learning_rate": 1.3580120934465372e-05, "loss": 0.3386, "step": 10164 }, { "epoch": 0.7693108178421078, "grad_norm": 0.7734375, "learning_rate": 1.3579008777473108e-05, "loss": 0.3178, "step": 10165 }, { "epoch": 0.7693865001655551, "grad_norm": 0.75, "learning_rate": 1.3577896569706464e-05, "loss": 0.3029, "step": 10166 }, { "epoch": 0.7694621824890024, "grad_norm": 0.69921875, "learning_rate": 1.3576784311181212e-05, "loss": 0.2636, "step": 10167 }, { "epoch": 0.7695378648124498, "grad_norm": 0.72265625, "learning_rate": 1.3575672001913142e-05, "loss": 0.248, "step": 10168 }, { "epoch": 0.769613547135897, "grad_norm": 0.71875, "learning_rate": 1.3574559641918025e-05, "loss": 0.291, "step": 10169 }, { "epoch": 0.7696892294593444, "grad_norm": 0.95703125, "learning_rate": 1.3573447231211644e-05, "loss": 0.3213, "step": 10170 }, { "epoch": 0.7697649117827917, "grad_norm": 0.7578125, "learning_rate": 1.3572334769809785e-05, "loss": 0.3343, "step": 10171 }, { "epoch": 0.7698405941062391, "grad_norm": 1.015625, "learning_rate": 1.3571222257728228e-05, "loss": 0.3235, "step": 10172 }, { "epoch": 0.7699162764296864, "grad_norm": 0.73046875, "learning_rate": 1.3570109694982751e-05, "loss": 0.2945, "step": 10173 }, { "epoch": 0.7699919587531338, "grad_norm": 0.78125, "learning_rate": 1.3568997081589146e-05, "loss": 0.3738, "step": 10174 }, { "epoch": 0.7700676410765811, "grad_norm": 0.765625, "learning_rate": 1.3567884417563188e-05, "loss": 0.3176, "step": 10175 }, { "epoch": 0.7701433234000283, "grad_norm": 0.765625, "learning_rate": 1.3566771702920667e-05, "loss": 0.3217, "step": 10176 }, { "epoch": 0.7702190057234757, "grad_norm": 0.71484375, "learning_rate": 1.3565658937677375e-05, "loss": 0.3217, "step": 10177 }, { "epoch": 0.770294688046923, "grad_norm": 0.7890625, "learning_rate": 1.3564546121849087e-05, "loss": 0.3519, "step": 10178 }, { "epoch": 0.7703703703703704, "grad_norm": 0.73046875, "learning_rate": 1.3563433255451594e-05, "loss": 0.3094, "step": 10179 }, { "epoch": 0.7704460526938177, "grad_norm": 0.70703125, "learning_rate": 1.3562320338500687e-05, "loss": 0.277, "step": 10180 }, { "epoch": 0.7705217350172651, "grad_norm": 0.69921875, "learning_rate": 1.3561207371012155e-05, "loss": 0.2694, "step": 10181 }, { "epoch": 0.7705974173407124, "grad_norm": 0.734375, "learning_rate": 1.3560094353001783e-05, "loss": 0.2804, "step": 10182 }, { "epoch": 0.7706730996641596, "grad_norm": 0.80078125, "learning_rate": 1.3558981284485361e-05, "loss": 0.3235, "step": 10183 }, { "epoch": 0.770748781987607, "grad_norm": 0.8125, "learning_rate": 1.3557868165478688e-05, "loss": 0.3529, "step": 10184 }, { "epoch": 0.7708244643110543, "grad_norm": 0.8125, "learning_rate": 1.3556754995997543e-05, "loss": 0.3469, "step": 10185 }, { "epoch": 0.7709001466345017, "grad_norm": 0.74609375, "learning_rate": 1.3555641776057729e-05, "loss": 0.3094, "step": 10186 }, { "epoch": 0.770975828957949, "grad_norm": 0.79296875, "learning_rate": 1.3554528505675033e-05, "loss": 0.3238, "step": 10187 }, { "epoch": 0.7710515112813964, "grad_norm": 0.80078125, "learning_rate": 1.3553415184865251e-05, "loss": 0.3093, "step": 10188 }, { "epoch": 0.7711271936048437, "grad_norm": 0.78125, "learning_rate": 1.3552301813644177e-05, "loss": 0.3095, "step": 10189 }, { "epoch": 0.771202875928291, "grad_norm": 0.734375, "learning_rate": 1.3551188392027606e-05, "loss": 0.3148, "step": 10190 }, { "epoch": 0.7712785582517383, "grad_norm": 0.7578125, "learning_rate": 1.3550074920031332e-05, "loss": 0.3346, "step": 10191 }, { "epoch": 0.7713542405751856, "grad_norm": 0.8828125, "learning_rate": 1.3548961397671156e-05, "loss": 0.3623, "step": 10192 }, { "epoch": 0.771429922898633, "grad_norm": 0.74609375, "learning_rate": 1.354784782496287e-05, "loss": 0.3263, "step": 10193 }, { "epoch": 0.7715056052220803, "grad_norm": 0.76953125, "learning_rate": 1.3546734201922273e-05, "loss": 0.3269, "step": 10194 }, { "epoch": 0.7715812875455277, "grad_norm": 0.76953125, "learning_rate": 1.3545620528565169e-05, "loss": 0.3221, "step": 10195 }, { "epoch": 0.771656969868975, "grad_norm": 0.71875, "learning_rate": 1.3544506804907351e-05, "loss": 0.2917, "step": 10196 }, { "epoch": 0.7717326521924223, "grad_norm": 0.74609375, "learning_rate": 1.3543393030964622e-05, "loss": 0.3285, "step": 10197 }, { "epoch": 0.7718083345158696, "grad_norm": 0.9921875, "learning_rate": 1.3542279206752782e-05, "loss": 0.3506, "step": 10198 }, { "epoch": 0.771884016839317, "grad_norm": 0.7890625, "learning_rate": 1.354116533228763e-05, "loss": 0.3132, "step": 10199 }, { "epoch": 0.7719596991627643, "grad_norm": 0.8203125, "learning_rate": 1.3540051407584973e-05, "loss": 0.3631, "step": 10200 }, { "epoch": 0.7720353814862116, "grad_norm": 0.68359375, "learning_rate": 1.3538937432660614e-05, "loss": 0.2723, "step": 10201 }, { "epoch": 0.772111063809659, "grad_norm": 0.68359375, "learning_rate": 1.353782340753035e-05, "loss": 0.2789, "step": 10202 }, { "epoch": 0.7721867461331063, "grad_norm": 0.73828125, "learning_rate": 1.3536709332209995e-05, "loss": 0.2841, "step": 10203 }, { "epoch": 0.7722624284565536, "grad_norm": 0.71875, "learning_rate": 1.3535595206715346e-05, "loss": 0.3088, "step": 10204 }, { "epoch": 0.7723381107800009, "grad_norm": 0.77734375, "learning_rate": 1.3534481031062213e-05, "loss": 0.3177, "step": 10205 }, { "epoch": 0.7724137931034483, "grad_norm": 0.7734375, "learning_rate": 1.3533366805266402e-05, "loss": 0.3305, "step": 10206 }, { "epoch": 0.7724894754268956, "grad_norm": 0.82421875, "learning_rate": 1.3532252529343718e-05, "loss": 0.3494, "step": 10207 }, { "epoch": 0.772565157750343, "grad_norm": 0.7578125, "learning_rate": 1.3531138203309966e-05, "loss": 0.3245, "step": 10208 }, { "epoch": 0.7726408400737903, "grad_norm": 0.69140625, "learning_rate": 1.3530023827180965e-05, "loss": 0.2643, "step": 10209 }, { "epoch": 0.7727165223972376, "grad_norm": 0.7421875, "learning_rate": 1.3528909400972517e-05, "loss": 0.2875, "step": 10210 }, { "epoch": 0.7727922047206849, "grad_norm": 0.7578125, "learning_rate": 1.352779492470043e-05, "loss": 0.3322, "step": 10211 }, { "epoch": 0.7728678870441322, "grad_norm": 0.74609375, "learning_rate": 1.3526680398380523e-05, "loss": 0.2988, "step": 10212 }, { "epoch": 0.7729435693675796, "grad_norm": 0.703125, "learning_rate": 1.35255658220286e-05, "loss": 0.2914, "step": 10213 }, { "epoch": 0.7730192516910269, "grad_norm": 0.83984375, "learning_rate": 1.3524451195660472e-05, "loss": 0.343, "step": 10214 }, { "epoch": 0.7730949340144743, "grad_norm": 0.7890625, "learning_rate": 1.3523336519291963e-05, "loss": 0.3164, "step": 10215 }, { "epoch": 0.7731706163379216, "grad_norm": 0.78515625, "learning_rate": 1.3522221792938877e-05, "loss": 0.3327, "step": 10216 }, { "epoch": 0.773246298661369, "grad_norm": 0.72265625, "learning_rate": 1.3521107016617027e-05, "loss": 0.2972, "step": 10217 }, { "epoch": 0.7733219809848162, "grad_norm": 0.74609375, "learning_rate": 1.3519992190342234e-05, "loss": 0.3025, "step": 10218 }, { "epoch": 0.7733976633082635, "grad_norm": 0.76171875, "learning_rate": 1.3518877314130314e-05, "loss": 0.3355, "step": 10219 }, { "epoch": 0.7734733456317109, "grad_norm": 0.76953125, "learning_rate": 1.3517762387997076e-05, "loss": 0.332, "step": 10220 }, { "epoch": 0.7735490279551582, "grad_norm": 0.75390625, "learning_rate": 1.3516647411958344e-05, "loss": 0.3313, "step": 10221 }, { "epoch": 0.7736247102786056, "grad_norm": 0.70703125, "learning_rate": 1.3515532386029935e-05, "loss": 0.3055, "step": 10222 }, { "epoch": 0.7737003926020529, "grad_norm": 0.69140625, "learning_rate": 1.351441731022766e-05, "loss": 0.281, "step": 10223 }, { "epoch": 0.7737760749255003, "grad_norm": 0.73828125, "learning_rate": 1.3513302184567355e-05, "loss": 0.2879, "step": 10224 }, { "epoch": 0.7738517572489475, "grad_norm": 0.76171875, "learning_rate": 1.3512187009064822e-05, "loss": 0.3405, "step": 10225 }, { "epoch": 0.7739274395723948, "grad_norm": 0.984375, "learning_rate": 1.3511071783735887e-05, "loss": 0.3381, "step": 10226 }, { "epoch": 0.7740031218958422, "grad_norm": 0.73046875, "learning_rate": 1.350995650859638e-05, "loss": 0.2777, "step": 10227 }, { "epoch": 0.7740788042192895, "grad_norm": 0.9296875, "learning_rate": 1.3508841183662114e-05, "loss": 0.3151, "step": 10228 }, { "epoch": 0.7741544865427369, "grad_norm": 0.79296875, "learning_rate": 1.3507725808948916e-05, "loss": 0.341, "step": 10229 }, { "epoch": 0.7742301688661842, "grad_norm": 0.75390625, "learning_rate": 1.3506610384472605e-05, "loss": 0.2846, "step": 10230 }, { "epoch": 0.7743058511896315, "grad_norm": 0.7265625, "learning_rate": 1.3505494910249008e-05, "loss": 0.2932, "step": 10231 }, { "epoch": 0.7743815335130788, "grad_norm": 0.703125, "learning_rate": 1.3504379386293951e-05, "loss": 0.282, "step": 10232 }, { "epoch": 0.7744572158365262, "grad_norm": 0.73046875, "learning_rate": 1.3503263812623262e-05, "loss": 0.2855, "step": 10233 }, { "epoch": 0.7745328981599735, "grad_norm": 0.80078125, "learning_rate": 1.3502148189252761e-05, "loss": 0.3527, "step": 10234 }, { "epoch": 0.7746085804834208, "grad_norm": 0.96875, "learning_rate": 1.3501032516198276e-05, "loss": 0.3386, "step": 10235 }, { "epoch": 0.7746842628068682, "grad_norm": 0.74609375, "learning_rate": 1.349991679347564e-05, "loss": 0.3064, "step": 10236 }, { "epoch": 0.7747599451303155, "grad_norm": 0.71484375, "learning_rate": 1.3498801021100678e-05, "loss": 0.2906, "step": 10237 }, { "epoch": 0.7748356274537628, "grad_norm": 0.765625, "learning_rate": 1.349768519908922e-05, "loss": 0.3219, "step": 10238 }, { "epoch": 0.7749113097772101, "grad_norm": 1.1640625, "learning_rate": 1.3496569327457092e-05, "loss": 0.3275, "step": 10239 }, { "epoch": 0.7749869921006575, "grad_norm": 0.6875, "learning_rate": 1.349545340622013e-05, "loss": 0.2514, "step": 10240 }, { "epoch": 0.7750626744241048, "grad_norm": 0.75390625, "learning_rate": 1.3494337435394161e-05, "loss": 0.3103, "step": 10241 }, { "epoch": 0.7751383567475522, "grad_norm": 0.75390625, "learning_rate": 1.3493221414995021e-05, "loss": 0.3292, "step": 10242 }, { "epoch": 0.7752140390709995, "grad_norm": 0.734375, "learning_rate": 1.349210534503854e-05, "loss": 0.3095, "step": 10243 }, { "epoch": 0.7752897213944469, "grad_norm": 0.7734375, "learning_rate": 1.3490989225540552e-05, "loss": 0.3678, "step": 10244 }, { "epoch": 0.7753654037178941, "grad_norm": 0.70703125, "learning_rate": 1.3489873056516892e-05, "loss": 0.2756, "step": 10245 }, { "epoch": 0.7754410860413414, "grad_norm": 0.74609375, "learning_rate": 1.3488756837983395e-05, "loss": 0.3126, "step": 10246 }, { "epoch": 0.7755167683647888, "grad_norm": 0.75390625, "learning_rate": 1.3487640569955894e-05, "loss": 0.3244, "step": 10247 }, { "epoch": 0.7755924506882361, "grad_norm": 0.8984375, "learning_rate": 1.3486524252450226e-05, "loss": 0.4057, "step": 10248 }, { "epoch": 0.7756681330116835, "grad_norm": 0.78125, "learning_rate": 1.3485407885482226e-05, "loss": 0.3123, "step": 10249 }, { "epoch": 0.7757438153351308, "grad_norm": 0.79296875, "learning_rate": 1.3484291469067736e-05, "loss": 0.3224, "step": 10250 }, { "epoch": 0.7758194976585782, "grad_norm": 0.7578125, "learning_rate": 1.3483175003222594e-05, "loss": 0.3437, "step": 10251 }, { "epoch": 0.7758951799820254, "grad_norm": 0.71484375, "learning_rate": 1.3482058487962638e-05, "loss": 0.2663, "step": 10252 }, { "epoch": 0.7759708623054727, "grad_norm": 0.73828125, "learning_rate": 1.3480941923303702e-05, "loss": 0.3087, "step": 10253 }, { "epoch": 0.7760465446289201, "grad_norm": 0.765625, "learning_rate": 1.3479825309261635e-05, "loss": 0.3411, "step": 10254 }, { "epoch": 0.7761222269523674, "grad_norm": 0.76953125, "learning_rate": 1.3478708645852272e-05, "loss": 0.3246, "step": 10255 }, { "epoch": 0.7761979092758148, "grad_norm": 0.75, "learning_rate": 1.347759193309146e-05, "loss": 0.3167, "step": 10256 }, { "epoch": 0.7762735915992621, "grad_norm": 0.70703125, "learning_rate": 1.3476475170995041e-05, "loss": 0.2941, "step": 10257 }, { "epoch": 0.7763492739227095, "grad_norm": 0.76171875, "learning_rate": 1.3475358359578849e-05, "loss": 0.3349, "step": 10258 }, { "epoch": 0.7764249562461567, "grad_norm": 0.7265625, "learning_rate": 1.3474241498858737e-05, "loss": 0.3055, "step": 10259 }, { "epoch": 0.776500638569604, "grad_norm": 0.95703125, "learning_rate": 1.347312458885055e-05, "loss": 0.3539, "step": 10260 }, { "epoch": 0.7765763208930514, "grad_norm": 1.0234375, "learning_rate": 1.347200762957013e-05, "loss": 0.3186, "step": 10261 }, { "epoch": 0.7766520032164987, "grad_norm": 0.76171875, "learning_rate": 1.3470890621033323e-05, "loss": 0.3165, "step": 10262 }, { "epoch": 0.7767276855399461, "grad_norm": 0.734375, "learning_rate": 1.3469773563255975e-05, "loss": 0.315, "step": 10263 }, { "epoch": 0.7768033678633934, "grad_norm": 0.73828125, "learning_rate": 1.3468656456253934e-05, "loss": 0.3276, "step": 10264 }, { "epoch": 0.7768790501868408, "grad_norm": 0.77734375, "learning_rate": 1.346753930004305e-05, "loss": 0.3353, "step": 10265 }, { "epoch": 0.776954732510288, "grad_norm": 0.765625, "learning_rate": 1.3466422094639175e-05, "loss": 0.3181, "step": 10266 }, { "epoch": 0.7770304148337354, "grad_norm": 0.78125, "learning_rate": 1.3465304840058147e-05, "loss": 0.3406, "step": 10267 }, { "epoch": 0.7771060971571827, "grad_norm": 0.80078125, "learning_rate": 1.3464187536315828e-05, "loss": 0.3193, "step": 10268 }, { "epoch": 0.77718177948063, "grad_norm": 0.71875, "learning_rate": 1.3463070183428062e-05, "loss": 0.3049, "step": 10269 }, { "epoch": 0.7772574618040774, "grad_norm": 0.6953125, "learning_rate": 1.34619527814107e-05, "loss": 0.2826, "step": 10270 }, { "epoch": 0.7773331441275247, "grad_norm": 0.76953125, "learning_rate": 1.3460835330279602e-05, "loss": 0.3128, "step": 10271 }, { "epoch": 0.7774088264509721, "grad_norm": 0.765625, "learning_rate": 1.3459717830050613e-05, "loss": 0.3529, "step": 10272 }, { "epoch": 0.7774845087744193, "grad_norm": 0.80078125, "learning_rate": 1.3458600280739589e-05, "loss": 0.3384, "step": 10273 }, { "epoch": 0.7775601910978667, "grad_norm": 0.83984375, "learning_rate": 1.3457482682362387e-05, "loss": 0.3765, "step": 10274 }, { "epoch": 0.777635873421314, "grad_norm": 0.765625, "learning_rate": 1.3456365034934859e-05, "loss": 0.3142, "step": 10275 }, { "epoch": 0.7777115557447614, "grad_norm": 0.79296875, "learning_rate": 1.3455247338472861e-05, "loss": 0.3555, "step": 10276 }, { "epoch": 0.7777872380682087, "grad_norm": 0.7421875, "learning_rate": 1.345412959299225e-05, "loss": 0.3088, "step": 10277 }, { "epoch": 0.777862920391656, "grad_norm": 0.94140625, "learning_rate": 1.3453011798508886e-05, "loss": 0.3256, "step": 10278 }, { "epoch": 0.7779386027151034, "grad_norm": 0.7578125, "learning_rate": 1.3451893955038618e-05, "loss": 0.3076, "step": 10279 }, { "epoch": 0.7780142850385506, "grad_norm": 0.859375, "learning_rate": 1.3450776062597316e-05, "loss": 0.369, "step": 10280 }, { "epoch": 0.778089967361998, "grad_norm": 1.140625, "learning_rate": 1.3449658121200834e-05, "loss": 0.3798, "step": 10281 }, { "epoch": 0.7781656496854453, "grad_norm": 0.69921875, "learning_rate": 1.3448540130865028e-05, "loss": 0.26, "step": 10282 }, { "epoch": 0.7782413320088927, "grad_norm": 0.76171875, "learning_rate": 1.3447422091605764e-05, "loss": 0.3369, "step": 10283 }, { "epoch": 0.77831701433234, "grad_norm": 0.80859375, "learning_rate": 1.3446304003438904e-05, "loss": 0.3401, "step": 10284 }, { "epoch": 0.7783926966557874, "grad_norm": 0.87109375, "learning_rate": 1.3445185866380307e-05, "loss": 0.278, "step": 10285 }, { "epoch": 0.7784683789792347, "grad_norm": 0.80859375, "learning_rate": 1.3444067680445834e-05, "loss": 0.3473, "step": 10286 }, { "epoch": 0.778544061302682, "grad_norm": 0.78125, "learning_rate": 1.3442949445651352e-05, "loss": 0.3051, "step": 10287 }, { "epoch": 0.7786197436261293, "grad_norm": 0.79296875, "learning_rate": 1.3441831162012719e-05, "loss": 0.3279, "step": 10288 }, { "epoch": 0.7786954259495766, "grad_norm": 0.7578125, "learning_rate": 1.3440712829545813e-05, "loss": 0.3271, "step": 10289 }, { "epoch": 0.778771108273024, "grad_norm": 0.82421875, "learning_rate": 1.3439594448266484e-05, "loss": 0.3429, "step": 10290 }, { "epoch": 0.7788467905964713, "grad_norm": 0.6875, "learning_rate": 1.343847601819061e-05, "loss": 0.2722, "step": 10291 }, { "epoch": 0.7789224729199187, "grad_norm": 0.76171875, "learning_rate": 1.3437357539334053e-05, "loss": 0.3328, "step": 10292 }, { "epoch": 0.778998155243366, "grad_norm": 0.73046875, "learning_rate": 1.3436239011712677e-05, "loss": 0.3117, "step": 10293 }, { "epoch": 0.7790738375668133, "grad_norm": 0.76171875, "learning_rate": 1.3435120435342357e-05, "loss": 0.3082, "step": 10294 }, { "epoch": 0.7791495198902606, "grad_norm": 0.7421875, "learning_rate": 1.3434001810238957e-05, "loss": 0.3198, "step": 10295 }, { "epoch": 0.779225202213708, "grad_norm": 0.83203125, "learning_rate": 1.343288313641835e-05, "loss": 0.3436, "step": 10296 }, { "epoch": 0.7793008845371553, "grad_norm": 0.7421875, "learning_rate": 1.3431764413896403e-05, "loss": 0.2896, "step": 10297 }, { "epoch": 0.7793765668606026, "grad_norm": 0.75390625, "learning_rate": 1.343064564268899e-05, "loss": 0.329, "step": 10298 }, { "epoch": 0.77945224918405, "grad_norm": 0.7265625, "learning_rate": 1.3429526822811978e-05, "loss": 0.3216, "step": 10299 }, { "epoch": 0.7795279315074973, "grad_norm": 0.78515625, "learning_rate": 1.3428407954281246e-05, "loss": 0.3279, "step": 10300 }, { "epoch": 0.7796036138309446, "grad_norm": 0.9921875, "learning_rate": 1.3427289037112665e-05, "loss": 0.3085, "step": 10301 }, { "epoch": 0.7796792961543919, "grad_norm": 0.72265625, "learning_rate": 1.3426170071322104e-05, "loss": 0.2971, "step": 10302 }, { "epoch": 0.7797549784778393, "grad_norm": 0.77734375, "learning_rate": 1.3425051056925444e-05, "loss": 0.3411, "step": 10303 }, { "epoch": 0.7798306608012866, "grad_norm": 0.76953125, "learning_rate": 1.3423931993938557e-05, "loss": 0.3267, "step": 10304 }, { "epoch": 0.779906343124734, "grad_norm": 1.0234375, "learning_rate": 1.3422812882377315e-05, "loss": 0.3105, "step": 10305 }, { "epoch": 0.7799820254481813, "grad_norm": 0.75390625, "learning_rate": 1.3421693722257603e-05, "loss": 0.2925, "step": 10306 }, { "epoch": 0.7800577077716286, "grad_norm": 0.7109375, "learning_rate": 1.3420574513595292e-05, "loss": 0.29, "step": 10307 }, { "epoch": 0.7801333900950759, "grad_norm": 0.72265625, "learning_rate": 1.3419455256406263e-05, "loss": 0.3176, "step": 10308 }, { "epoch": 0.7802090724185232, "grad_norm": 0.734375, "learning_rate": 1.3418335950706391e-05, "loss": 0.3229, "step": 10309 }, { "epoch": 0.7802847547419706, "grad_norm": 0.75390625, "learning_rate": 1.3417216596511557e-05, "loss": 0.3503, "step": 10310 }, { "epoch": 0.7803604370654179, "grad_norm": 0.77734375, "learning_rate": 1.3416097193837642e-05, "loss": 0.2937, "step": 10311 }, { "epoch": 0.7804361193888653, "grad_norm": 0.75390625, "learning_rate": 1.3414977742700528e-05, "loss": 0.3139, "step": 10312 }, { "epoch": 0.7805118017123126, "grad_norm": 0.76953125, "learning_rate": 1.3413858243116095e-05, "loss": 0.3207, "step": 10313 }, { "epoch": 0.78058748403576, "grad_norm": 0.92578125, "learning_rate": 1.3412738695100222e-05, "loss": 0.315, "step": 10314 }, { "epoch": 0.7806631663592072, "grad_norm": 0.796875, "learning_rate": 1.3411619098668795e-05, "loss": 0.3291, "step": 10315 }, { "epoch": 0.7807388486826545, "grad_norm": 0.74609375, "learning_rate": 1.3410499453837697e-05, "loss": 0.2816, "step": 10316 }, { "epoch": 0.7808145310061019, "grad_norm": 0.734375, "learning_rate": 1.3409379760622812e-05, "loss": 0.2842, "step": 10317 }, { "epoch": 0.7808902133295492, "grad_norm": 0.74609375, "learning_rate": 1.3408260019040022e-05, "loss": 0.2973, "step": 10318 }, { "epoch": 0.7809658956529966, "grad_norm": 0.80859375, "learning_rate": 1.3407140229105218e-05, "loss": 0.3316, "step": 10319 }, { "epoch": 0.7810415779764439, "grad_norm": 0.70703125, "learning_rate": 1.340602039083428e-05, "loss": 0.2724, "step": 10320 }, { "epoch": 0.7811172602998913, "grad_norm": 0.75, "learning_rate": 1.3404900504243099e-05, "loss": 0.3046, "step": 10321 }, { "epoch": 0.7811929426233385, "grad_norm": 0.75390625, "learning_rate": 1.3403780569347563e-05, "loss": 0.3319, "step": 10322 }, { "epoch": 0.7812686249467858, "grad_norm": 0.78125, "learning_rate": 1.3402660586163556e-05, "loss": 0.3311, "step": 10323 }, { "epoch": 0.7813443072702332, "grad_norm": 0.76953125, "learning_rate": 1.340154055470697e-05, "loss": 0.3298, "step": 10324 }, { "epoch": 0.7814199895936805, "grad_norm": 0.828125, "learning_rate": 1.3400420474993696e-05, "loss": 0.3591, "step": 10325 }, { "epoch": 0.7814956719171279, "grad_norm": 0.7109375, "learning_rate": 1.339930034703962e-05, "loss": 0.2645, "step": 10326 }, { "epoch": 0.7815713542405752, "grad_norm": 0.83203125, "learning_rate": 1.3398180170860637e-05, "loss": 0.3536, "step": 10327 }, { "epoch": 0.7816470365640226, "grad_norm": 0.75, "learning_rate": 1.3397059946472637e-05, "loss": 0.3104, "step": 10328 }, { "epoch": 0.7817227188874698, "grad_norm": 0.734375, "learning_rate": 1.339593967389151e-05, "loss": 0.3127, "step": 10329 }, { "epoch": 0.7817984012109171, "grad_norm": 0.80859375, "learning_rate": 1.3394819353133151e-05, "loss": 0.3309, "step": 10330 }, { "epoch": 0.7818740835343645, "grad_norm": 0.73046875, "learning_rate": 1.3393698984213456e-05, "loss": 0.3125, "step": 10331 }, { "epoch": 0.7819497658578118, "grad_norm": 0.71875, "learning_rate": 1.3392578567148316e-05, "loss": 0.2936, "step": 10332 }, { "epoch": 0.7820254481812592, "grad_norm": 0.75, "learning_rate": 1.3391458101953628e-05, "loss": 0.3452, "step": 10333 }, { "epoch": 0.7821011305047065, "grad_norm": 0.7109375, "learning_rate": 1.3390337588645283e-05, "loss": 0.3029, "step": 10334 }, { "epoch": 0.7821768128281539, "grad_norm": 0.75390625, "learning_rate": 1.3389217027239182e-05, "loss": 0.3026, "step": 10335 }, { "epoch": 0.7822524951516011, "grad_norm": 0.72265625, "learning_rate": 1.3388096417751229e-05, "loss": 0.2928, "step": 10336 }, { "epoch": 0.7823281774750485, "grad_norm": 0.73046875, "learning_rate": 1.3386975760197307e-05, "loss": 0.299, "step": 10337 }, { "epoch": 0.7824038597984958, "grad_norm": 0.87890625, "learning_rate": 1.3385855054593322e-05, "loss": 0.3601, "step": 10338 }, { "epoch": 0.7824795421219432, "grad_norm": 0.75390625, "learning_rate": 1.3384734300955176e-05, "loss": 0.338, "step": 10339 }, { "epoch": 0.7825552244453905, "grad_norm": 0.76953125, "learning_rate": 1.3383613499298762e-05, "loss": 0.2981, "step": 10340 }, { "epoch": 0.7826309067688378, "grad_norm": 0.7265625, "learning_rate": 1.3382492649639987e-05, "loss": 0.2937, "step": 10341 }, { "epoch": 0.7827065890922852, "grad_norm": 0.77734375, "learning_rate": 1.3381371751994747e-05, "loss": 0.3079, "step": 10342 }, { "epoch": 0.7827822714157324, "grad_norm": 0.8046875, "learning_rate": 1.3380250806378947e-05, "loss": 0.3222, "step": 10343 }, { "epoch": 0.7828579537391798, "grad_norm": 0.78125, "learning_rate": 1.3379129812808488e-05, "loss": 0.3256, "step": 10344 }, { "epoch": 0.7829336360626271, "grad_norm": 0.75, "learning_rate": 1.3378008771299276e-05, "loss": 0.3059, "step": 10345 }, { "epoch": 0.7830093183860745, "grad_norm": 0.70703125, "learning_rate": 1.337688768186721e-05, "loss": 0.2729, "step": 10346 }, { "epoch": 0.7830850007095218, "grad_norm": 0.72265625, "learning_rate": 1.3375766544528197e-05, "loss": 0.3232, "step": 10347 }, { "epoch": 0.7831606830329692, "grad_norm": 0.79296875, "learning_rate": 1.3374645359298144e-05, "loss": 0.3489, "step": 10348 }, { "epoch": 0.7832363653564165, "grad_norm": 0.75390625, "learning_rate": 1.3373524126192956e-05, "loss": 0.3038, "step": 10349 }, { "epoch": 0.7833120476798637, "grad_norm": 0.72265625, "learning_rate": 1.3372402845228538e-05, "loss": 0.2917, "step": 10350 }, { "epoch": 0.7833877300033111, "grad_norm": 0.73828125, "learning_rate": 1.33712815164208e-05, "loss": 0.3002, "step": 10351 }, { "epoch": 0.7834634123267584, "grad_norm": 0.76171875, "learning_rate": 1.337016013978565e-05, "loss": 0.3234, "step": 10352 }, { "epoch": 0.7835390946502058, "grad_norm": 0.70703125, "learning_rate": 1.336903871533899e-05, "loss": 0.266, "step": 10353 }, { "epoch": 0.7836147769736531, "grad_norm": 0.83203125, "learning_rate": 1.3367917243096736e-05, "loss": 0.312, "step": 10354 }, { "epoch": 0.7836904592971005, "grad_norm": 0.703125, "learning_rate": 1.3366795723074795e-05, "loss": 0.306, "step": 10355 }, { "epoch": 0.7837661416205477, "grad_norm": 0.7109375, "learning_rate": 1.3365674155289081e-05, "loss": 0.3108, "step": 10356 }, { "epoch": 0.783841823943995, "grad_norm": 0.76953125, "learning_rate": 1.3364552539755504e-05, "loss": 0.3446, "step": 10357 }, { "epoch": 0.7839175062674424, "grad_norm": 0.7734375, "learning_rate": 1.3363430876489975e-05, "loss": 0.3246, "step": 10358 }, { "epoch": 0.7839931885908897, "grad_norm": 0.78125, "learning_rate": 1.3362309165508407e-05, "loss": 0.3152, "step": 10359 }, { "epoch": 0.7840688709143371, "grad_norm": 0.734375, "learning_rate": 1.3361187406826714e-05, "loss": 0.3023, "step": 10360 }, { "epoch": 0.7841445532377844, "grad_norm": 0.75390625, "learning_rate": 1.3360065600460808e-05, "loss": 0.3161, "step": 10361 }, { "epoch": 0.7842202355612318, "grad_norm": 0.79296875, "learning_rate": 1.3358943746426607e-05, "loss": 0.3501, "step": 10362 }, { "epoch": 0.784295917884679, "grad_norm": 0.8359375, "learning_rate": 1.3357821844740024e-05, "loss": 0.3712, "step": 10363 }, { "epoch": 0.7843716002081264, "grad_norm": 0.76953125, "learning_rate": 1.3356699895416978e-05, "loss": 0.3299, "step": 10364 }, { "epoch": 0.7844472825315737, "grad_norm": 0.7734375, "learning_rate": 1.3355577898473382e-05, "loss": 0.3194, "step": 10365 }, { "epoch": 0.784522964855021, "grad_norm": 0.72265625, "learning_rate": 1.3354455853925155e-05, "loss": 0.3024, "step": 10366 }, { "epoch": 0.7845986471784684, "grad_norm": 0.76953125, "learning_rate": 1.3353333761788214e-05, "loss": 0.3322, "step": 10367 }, { "epoch": 0.7846743295019157, "grad_norm": 0.7421875, "learning_rate": 1.335221162207848e-05, "loss": 0.3131, "step": 10368 }, { "epoch": 0.7847500118253631, "grad_norm": 0.7578125, "learning_rate": 1.3351089434811875e-05, "loss": 0.3283, "step": 10369 }, { "epoch": 0.7848256941488103, "grad_norm": 0.796875, "learning_rate": 1.334996720000431e-05, "loss": 0.3273, "step": 10370 }, { "epoch": 0.7849013764722577, "grad_norm": 0.72265625, "learning_rate": 1.3348844917671714e-05, "loss": 0.2957, "step": 10371 }, { "epoch": 0.784977058795705, "grad_norm": 0.7578125, "learning_rate": 1.3347722587830008e-05, "loss": 0.3191, "step": 10372 }, { "epoch": 0.7850527411191524, "grad_norm": 0.734375, "learning_rate": 1.3346600210495111e-05, "loss": 0.3085, "step": 10373 }, { "epoch": 0.7851284234425997, "grad_norm": 0.76953125, "learning_rate": 1.3345477785682945e-05, "loss": 0.3489, "step": 10374 }, { "epoch": 0.785204105766047, "grad_norm": 0.703125, "learning_rate": 1.3344355313409438e-05, "loss": 0.2837, "step": 10375 }, { "epoch": 0.7852797880894944, "grad_norm": 0.734375, "learning_rate": 1.334323279369051e-05, "loss": 0.3165, "step": 10376 }, { "epoch": 0.7853554704129416, "grad_norm": 0.8046875, "learning_rate": 1.3342110226542089e-05, "loss": 0.3588, "step": 10377 }, { "epoch": 0.785431152736389, "grad_norm": 0.734375, "learning_rate": 1.3340987611980101e-05, "loss": 0.3013, "step": 10378 }, { "epoch": 0.7855068350598363, "grad_norm": 0.7421875, "learning_rate": 1.3339864950020465e-05, "loss": 0.3078, "step": 10379 }, { "epoch": 0.7855825173832837, "grad_norm": 0.78125, "learning_rate": 1.3338742240679116e-05, "loss": 0.3421, "step": 10380 }, { "epoch": 0.785658199706731, "grad_norm": 0.7734375, "learning_rate": 1.3337619483971983e-05, "loss": 0.3398, "step": 10381 }, { "epoch": 0.7857338820301784, "grad_norm": 0.7734375, "learning_rate": 1.3336496679914982e-05, "loss": 0.3152, "step": 10382 }, { "epoch": 0.7858095643536257, "grad_norm": 0.6875, "learning_rate": 1.3335373828524057e-05, "loss": 0.2719, "step": 10383 }, { "epoch": 0.7858852466770729, "grad_norm": 0.75, "learning_rate": 1.333425092981513e-05, "loss": 0.312, "step": 10384 }, { "epoch": 0.7859609290005203, "grad_norm": 0.79296875, "learning_rate": 1.3333127983804126e-05, "loss": 0.3432, "step": 10385 }, { "epoch": 0.7860366113239676, "grad_norm": 0.73828125, "learning_rate": 1.3332004990506986e-05, "loss": 0.3204, "step": 10386 }, { "epoch": 0.786112293647415, "grad_norm": 0.7421875, "learning_rate": 1.3330881949939638e-05, "loss": 0.2894, "step": 10387 }, { "epoch": 0.7861879759708623, "grad_norm": 0.84765625, "learning_rate": 1.3329758862118012e-05, "loss": 0.3827, "step": 10388 }, { "epoch": 0.7862636582943097, "grad_norm": 0.74609375, "learning_rate": 1.332863572705804e-05, "loss": 0.3357, "step": 10389 }, { "epoch": 0.786339340617757, "grad_norm": 0.765625, "learning_rate": 1.3327512544775662e-05, "loss": 0.3124, "step": 10390 }, { "epoch": 0.7864150229412042, "grad_norm": 0.77734375, "learning_rate": 1.3326389315286807e-05, "loss": 0.3204, "step": 10391 }, { "epoch": 0.7864907052646516, "grad_norm": 0.78125, "learning_rate": 1.3325266038607412e-05, "loss": 0.3142, "step": 10392 }, { "epoch": 0.7865663875880989, "grad_norm": 0.7734375, "learning_rate": 1.3324142714753413e-05, "loss": 0.3193, "step": 10393 }, { "epoch": 0.7866420699115463, "grad_norm": 0.77734375, "learning_rate": 1.332301934374074e-05, "loss": 0.3185, "step": 10394 }, { "epoch": 0.7867177522349936, "grad_norm": 0.734375, "learning_rate": 1.332189592558534e-05, "loss": 0.3013, "step": 10395 }, { "epoch": 0.786793434558441, "grad_norm": 0.75, "learning_rate": 1.3320772460303145e-05, "loss": 0.3174, "step": 10396 }, { "epoch": 0.7868691168818883, "grad_norm": 0.71484375, "learning_rate": 1.3319648947910094e-05, "loss": 0.2681, "step": 10397 }, { "epoch": 0.7869447992053356, "grad_norm": 0.71875, "learning_rate": 1.3318525388422123e-05, "loss": 0.286, "step": 10398 }, { "epoch": 0.7870204815287829, "grad_norm": 0.7734375, "learning_rate": 1.331740178185518e-05, "loss": 0.3175, "step": 10399 }, { "epoch": 0.7870961638522302, "grad_norm": 0.73046875, "learning_rate": 1.3316278128225192e-05, "loss": 0.3005, "step": 10400 }, { "epoch": 0.7871718461756776, "grad_norm": 0.8125, "learning_rate": 1.3315154427548117e-05, "loss": 0.3374, "step": 10401 }, { "epoch": 0.7872475284991249, "grad_norm": 0.765625, "learning_rate": 1.331403067983988e-05, "loss": 0.3065, "step": 10402 }, { "epoch": 0.7873232108225723, "grad_norm": 0.73828125, "learning_rate": 1.3312906885116435e-05, "loss": 0.3069, "step": 10403 }, { "epoch": 0.7873988931460196, "grad_norm": 0.73046875, "learning_rate": 1.3311783043393718e-05, "loss": 0.2975, "step": 10404 }, { "epoch": 0.7874745754694669, "grad_norm": 0.7109375, "learning_rate": 1.3310659154687677e-05, "loss": 0.3056, "step": 10405 }, { "epoch": 0.7875502577929142, "grad_norm": 0.78515625, "learning_rate": 1.3309535219014254e-05, "loss": 0.3651, "step": 10406 }, { "epoch": 0.7876259401163616, "grad_norm": 0.74609375, "learning_rate": 1.3308411236389395e-05, "loss": 0.3032, "step": 10407 }, { "epoch": 0.7877016224398089, "grad_norm": 0.74609375, "learning_rate": 1.3307287206829047e-05, "loss": 0.2805, "step": 10408 }, { "epoch": 0.7877773047632562, "grad_norm": 0.78515625, "learning_rate": 1.330616313034915e-05, "loss": 0.3388, "step": 10409 }, { "epoch": 0.7878529870867036, "grad_norm": 0.80078125, "learning_rate": 1.3305039006965657e-05, "loss": 0.3683, "step": 10410 }, { "epoch": 0.7879286694101509, "grad_norm": 0.72265625, "learning_rate": 1.3303914836694515e-05, "loss": 0.3213, "step": 10411 }, { "epoch": 0.7880043517335982, "grad_norm": 0.83203125, "learning_rate": 1.3302790619551673e-05, "loss": 0.3445, "step": 10412 }, { "epoch": 0.7880800340570455, "grad_norm": 0.76171875, "learning_rate": 1.3301666355553076e-05, "loss": 0.3178, "step": 10413 }, { "epoch": 0.7881557163804929, "grad_norm": 0.9140625, "learning_rate": 1.3300542044714677e-05, "loss": 0.3323, "step": 10414 }, { "epoch": 0.7882313987039402, "grad_norm": 0.7734375, "learning_rate": 1.3299417687052423e-05, "loss": 0.3437, "step": 10415 }, { "epoch": 0.7883070810273876, "grad_norm": 0.78125, "learning_rate": 1.329829328258227e-05, "loss": 0.315, "step": 10416 }, { "epoch": 0.7883827633508349, "grad_norm": 0.796875, "learning_rate": 1.3297168831320164e-05, "loss": 0.3475, "step": 10417 }, { "epoch": 0.7884584456742822, "grad_norm": 0.78125, "learning_rate": 1.329604433328206e-05, "loss": 0.333, "step": 10418 }, { "epoch": 0.7885341279977295, "grad_norm": 0.7109375, "learning_rate": 1.3294919788483915e-05, "loss": 0.2977, "step": 10419 }, { "epoch": 0.7886098103211768, "grad_norm": 0.796875, "learning_rate": 1.3293795196941677e-05, "loss": 0.3675, "step": 10420 }, { "epoch": 0.7886854926446242, "grad_norm": 0.671875, "learning_rate": 1.3292670558671299e-05, "loss": 0.2587, "step": 10421 }, { "epoch": 0.7887611749680715, "grad_norm": 0.8515625, "learning_rate": 1.329154587368874e-05, "loss": 0.3612, "step": 10422 }, { "epoch": 0.7888368572915189, "grad_norm": 0.76171875, "learning_rate": 1.3290421142009956e-05, "loss": 0.3279, "step": 10423 }, { "epoch": 0.7889125396149662, "grad_norm": 0.73046875, "learning_rate": 1.32892963636509e-05, "loss": 0.2922, "step": 10424 }, { "epoch": 0.7889882219384136, "grad_norm": 0.765625, "learning_rate": 1.3288171538627533e-05, "loss": 0.3232, "step": 10425 }, { "epoch": 0.7890639042618608, "grad_norm": 0.71484375, "learning_rate": 1.3287046666955805e-05, "loss": 0.2965, "step": 10426 }, { "epoch": 0.7891395865853081, "grad_norm": 0.74609375, "learning_rate": 1.3285921748651682e-05, "loss": 0.306, "step": 10427 }, { "epoch": 0.7892152689087555, "grad_norm": 0.69140625, "learning_rate": 1.328479678373112e-05, "loss": 0.2897, "step": 10428 }, { "epoch": 0.7892909512322028, "grad_norm": 0.7734375, "learning_rate": 1.3283671772210079e-05, "loss": 0.3171, "step": 10429 }, { "epoch": 0.7893666335556502, "grad_norm": 0.7265625, "learning_rate": 1.328254671410452e-05, "loss": 0.288, "step": 10430 }, { "epoch": 0.7894423158790975, "grad_norm": 0.72265625, "learning_rate": 1.32814216094304e-05, "loss": 0.2936, "step": 10431 }, { "epoch": 0.7895179982025449, "grad_norm": 0.796875, "learning_rate": 1.3280296458203684e-05, "loss": 0.3673, "step": 10432 }, { "epoch": 0.7895936805259921, "grad_norm": 0.73828125, "learning_rate": 1.3279171260440336e-05, "loss": 0.2988, "step": 10433 }, { "epoch": 0.7896693628494394, "grad_norm": 0.6953125, "learning_rate": 1.3278046016156316e-05, "loss": 0.2845, "step": 10434 }, { "epoch": 0.7897450451728868, "grad_norm": 0.94140625, "learning_rate": 1.3276920725367586e-05, "loss": 0.3247, "step": 10435 }, { "epoch": 0.7898207274963341, "grad_norm": 0.7109375, "learning_rate": 1.3275795388090114e-05, "loss": 0.2835, "step": 10436 }, { "epoch": 0.7898964098197815, "grad_norm": 0.74609375, "learning_rate": 1.327467000433986e-05, "loss": 0.325, "step": 10437 }, { "epoch": 0.7899720921432288, "grad_norm": 0.75, "learning_rate": 1.3273544574132792e-05, "loss": 0.2881, "step": 10438 }, { "epoch": 0.7900477744666762, "grad_norm": 0.7578125, "learning_rate": 1.3272419097484883e-05, "loss": 0.3073, "step": 10439 }, { "epoch": 0.7901234567901234, "grad_norm": 0.7265625, "learning_rate": 1.327129357441209e-05, "loss": 0.2973, "step": 10440 }, { "epoch": 0.7901991391135708, "grad_norm": 0.80078125, "learning_rate": 1.3270168004930382e-05, "loss": 0.3099, "step": 10441 }, { "epoch": 0.7902748214370181, "grad_norm": 0.88671875, "learning_rate": 1.326904238905573e-05, "loss": 0.3438, "step": 10442 }, { "epoch": 0.7903505037604655, "grad_norm": 0.73828125, "learning_rate": 1.3267916726804102e-05, "loss": 0.3219, "step": 10443 }, { "epoch": 0.7904261860839128, "grad_norm": 0.73828125, "learning_rate": 1.326679101819147e-05, "loss": 0.3101, "step": 10444 }, { "epoch": 0.7905018684073601, "grad_norm": 0.77734375, "learning_rate": 1.3265665263233798e-05, "loss": 0.2841, "step": 10445 }, { "epoch": 0.7905775507308075, "grad_norm": 0.77734375, "learning_rate": 1.3264539461947061e-05, "loss": 0.2983, "step": 10446 }, { "epoch": 0.7906532330542547, "grad_norm": 0.78125, "learning_rate": 1.3263413614347226e-05, "loss": 0.329, "step": 10447 }, { "epoch": 0.7907289153777021, "grad_norm": 0.6875, "learning_rate": 1.3262287720450275e-05, "loss": 0.2704, "step": 10448 }, { "epoch": 0.7908045977011494, "grad_norm": 0.69921875, "learning_rate": 1.3261161780272173e-05, "loss": 0.27, "step": 10449 }, { "epoch": 0.7908802800245968, "grad_norm": 0.77734375, "learning_rate": 1.3260035793828891e-05, "loss": 0.328, "step": 10450 }, { "epoch": 0.7909559623480441, "grad_norm": 0.80859375, "learning_rate": 1.325890976113641e-05, "loss": 0.3243, "step": 10451 }, { "epoch": 0.7910316446714915, "grad_norm": 0.66796875, "learning_rate": 1.3257783682210704e-05, "loss": 0.2561, "step": 10452 }, { "epoch": 0.7911073269949388, "grad_norm": 1.1484375, "learning_rate": 1.325665755706774e-05, "loss": 0.3951, "step": 10453 }, { "epoch": 0.791183009318386, "grad_norm": 0.6796875, "learning_rate": 1.3255531385723504e-05, "loss": 0.2579, "step": 10454 }, { "epoch": 0.7912586916418334, "grad_norm": 0.7265625, "learning_rate": 1.3254405168193969e-05, "loss": 0.2703, "step": 10455 }, { "epoch": 0.7913343739652807, "grad_norm": 0.7734375, "learning_rate": 1.3253278904495109e-05, "loss": 0.3552, "step": 10456 }, { "epoch": 0.7914100562887281, "grad_norm": 0.703125, "learning_rate": 1.3252152594642906e-05, "loss": 0.2577, "step": 10457 }, { "epoch": 0.7914857386121754, "grad_norm": 0.765625, "learning_rate": 1.3251026238653337e-05, "loss": 0.2802, "step": 10458 }, { "epoch": 0.7915614209356228, "grad_norm": 0.78125, "learning_rate": 1.3249899836542384e-05, "loss": 0.3168, "step": 10459 }, { "epoch": 0.7916371032590701, "grad_norm": 0.77734375, "learning_rate": 1.3248773388326024e-05, "loss": 0.3194, "step": 10460 }, { "epoch": 0.7917127855825173, "grad_norm": 0.76171875, "learning_rate": 1.3247646894020236e-05, "loss": 0.3294, "step": 10461 }, { "epoch": 0.7917884679059647, "grad_norm": 0.73046875, "learning_rate": 1.3246520353641008e-05, "loss": 0.2913, "step": 10462 }, { "epoch": 0.791864150229412, "grad_norm": 0.76171875, "learning_rate": 1.3245393767204315e-05, "loss": 0.3286, "step": 10463 }, { "epoch": 0.7919398325528594, "grad_norm": 0.7578125, "learning_rate": 1.3244267134726141e-05, "loss": 0.2746, "step": 10464 }, { "epoch": 0.7920155148763067, "grad_norm": 0.71484375, "learning_rate": 1.3243140456222472e-05, "loss": 0.2852, "step": 10465 }, { "epoch": 0.7920911971997541, "grad_norm": 0.7265625, "learning_rate": 1.324201373170929e-05, "loss": 0.3056, "step": 10466 }, { "epoch": 0.7921668795232014, "grad_norm": 0.7421875, "learning_rate": 1.3240886961202579e-05, "loss": 0.3282, "step": 10467 }, { "epoch": 0.7922425618466487, "grad_norm": 0.69140625, "learning_rate": 1.3239760144718326e-05, "loss": 0.2578, "step": 10468 }, { "epoch": 0.792318244170096, "grad_norm": 0.73828125, "learning_rate": 1.3238633282272514e-05, "loss": 0.2856, "step": 10469 }, { "epoch": 0.7923939264935433, "grad_norm": 0.75390625, "learning_rate": 1.3237506373881135e-05, "loss": 0.2548, "step": 10470 }, { "epoch": 0.7924696088169907, "grad_norm": 0.75390625, "learning_rate": 1.3236379419560166e-05, "loss": 0.3324, "step": 10471 }, { "epoch": 0.792545291140438, "grad_norm": 0.7734375, "learning_rate": 1.3235252419325608e-05, "loss": 0.3274, "step": 10472 }, { "epoch": 0.7926209734638854, "grad_norm": 0.69921875, "learning_rate": 1.3234125373193437e-05, "loss": 0.2998, "step": 10473 }, { "epoch": 0.7926966557873326, "grad_norm": 0.80859375, "learning_rate": 1.3232998281179649e-05, "loss": 0.3461, "step": 10474 }, { "epoch": 0.79277233811078, "grad_norm": 0.76953125, "learning_rate": 1.3231871143300236e-05, "loss": 0.3108, "step": 10475 }, { "epoch": 0.7928480204342273, "grad_norm": 0.73828125, "learning_rate": 1.3230743959571182e-05, "loss": 0.2959, "step": 10476 }, { "epoch": 0.7929237027576747, "grad_norm": 0.76171875, "learning_rate": 1.3229616730008481e-05, "loss": 0.2682, "step": 10477 }, { "epoch": 0.792999385081122, "grad_norm": 0.74609375, "learning_rate": 1.3228489454628126e-05, "loss": 0.3277, "step": 10478 }, { "epoch": 0.7930750674045693, "grad_norm": 0.734375, "learning_rate": 1.3227362133446105e-05, "loss": 0.299, "step": 10479 }, { "epoch": 0.7931507497280167, "grad_norm": 0.71484375, "learning_rate": 1.3226234766478413e-05, "loss": 0.2964, "step": 10480 }, { "epoch": 0.7932264320514639, "grad_norm": 0.73046875, "learning_rate": 1.322510735374105e-05, "loss": 0.3215, "step": 10481 }, { "epoch": 0.7933021143749113, "grad_norm": 0.76171875, "learning_rate": 1.3223979895249998e-05, "loss": 0.2989, "step": 10482 }, { "epoch": 0.7933777966983586, "grad_norm": 0.7265625, "learning_rate": 1.3222852391021261e-05, "loss": 0.3318, "step": 10483 }, { "epoch": 0.793453479021806, "grad_norm": 0.734375, "learning_rate": 1.3221724841070835e-05, "loss": 0.3098, "step": 10484 }, { "epoch": 0.7935291613452533, "grad_norm": 0.7734375, "learning_rate": 1.322059724541471e-05, "loss": 0.3277, "step": 10485 }, { "epoch": 0.7936048436687007, "grad_norm": 0.73828125, "learning_rate": 1.3219469604068888e-05, "loss": 0.325, "step": 10486 }, { "epoch": 0.793680525992148, "grad_norm": 0.81640625, "learning_rate": 1.3218341917049367e-05, "loss": 0.3423, "step": 10487 }, { "epoch": 0.7937562083155952, "grad_norm": 0.78515625, "learning_rate": 1.3217214184372138e-05, "loss": 0.3408, "step": 10488 }, { "epoch": 0.7938318906390426, "grad_norm": 0.8125, "learning_rate": 1.3216086406053208e-05, "loss": 0.2876, "step": 10489 }, { "epoch": 0.7939075729624899, "grad_norm": 0.6875, "learning_rate": 1.3214958582108576e-05, "loss": 0.2642, "step": 10490 }, { "epoch": 0.7939832552859373, "grad_norm": 0.73828125, "learning_rate": 1.3213830712554235e-05, "loss": 0.3041, "step": 10491 }, { "epoch": 0.7940589376093846, "grad_norm": 0.7890625, "learning_rate": 1.3212702797406192e-05, "loss": 0.3501, "step": 10492 }, { "epoch": 0.794134619932832, "grad_norm": 0.73046875, "learning_rate": 1.3211574836680446e-05, "loss": 0.3002, "step": 10493 }, { "epoch": 0.7942103022562793, "grad_norm": 0.7578125, "learning_rate": 1.3210446830392997e-05, "loss": 0.3126, "step": 10494 }, { "epoch": 0.7942859845797265, "grad_norm": 0.7734375, "learning_rate": 1.3209318778559856e-05, "loss": 0.3076, "step": 10495 }, { "epoch": 0.7943616669031739, "grad_norm": 0.76953125, "learning_rate": 1.320819068119702e-05, "loss": 0.3034, "step": 10496 }, { "epoch": 0.7944373492266212, "grad_norm": 0.78125, "learning_rate": 1.3207062538320487e-05, "loss": 0.2931, "step": 10497 }, { "epoch": 0.7945130315500686, "grad_norm": 0.7578125, "learning_rate": 1.3205934349946275e-05, "loss": 0.3134, "step": 10498 }, { "epoch": 0.7945887138735159, "grad_norm": 0.76171875, "learning_rate": 1.3204806116090381e-05, "loss": 0.3332, "step": 10499 }, { "epoch": 0.7946643961969633, "grad_norm": 0.75390625, "learning_rate": 1.3203677836768813e-05, "loss": 0.3301, "step": 10500 }, { "epoch": 0.7947400785204106, "grad_norm": 0.75, "learning_rate": 1.3202549511997578e-05, "loss": 0.329, "step": 10501 }, { "epoch": 0.7948157608438579, "grad_norm": 0.71875, "learning_rate": 1.3201421141792681e-05, "loss": 0.3128, "step": 10502 }, { "epoch": 0.7948914431673052, "grad_norm": 0.71875, "learning_rate": 1.320029272617013e-05, "loss": 0.2793, "step": 10503 }, { "epoch": 0.7949671254907525, "grad_norm": 0.83984375, "learning_rate": 1.3199164265145936e-05, "loss": 0.3658, "step": 10504 }, { "epoch": 0.7950428078141999, "grad_norm": 0.75, "learning_rate": 1.3198035758736109e-05, "loss": 0.3304, "step": 10505 }, { "epoch": 0.7951184901376472, "grad_norm": 0.80078125, "learning_rate": 1.3196907206956652e-05, "loss": 0.3187, "step": 10506 }, { "epoch": 0.7951941724610946, "grad_norm": 0.71484375, "learning_rate": 1.3195778609823584e-05, "loss": 0.2678, "step": 10507 }, { "epoch": 0.7952698547845419, "grad_norm": 0.7734375, "learning_rate": 1.3194649967352913e-05, "loss": 0.3036, "step": 10508 }, { "epoch": 0.7953455371079892, "grad_norm": 0.734375, "learning_rate": 1.3193521279560648e-05, "loss": 0.3061, "step": 10509 }, { "epoch": 0.7954212194314365, "grad_norm": 0.72265625, "learning_rate": 1.3192392546462802e-05, "loss": 0.288, "step": 10510 }, { "epoch": 0.7954969017548839, "grad_norm": 0.7265625, "learning_rate": 1.3191263768075393e-05, "loss": 0.2935, "step": 10511 }, { "epoch": 0.7955725840783312, "grad_norm": 0.72265625, "learning_rate": 1.3190134944414425e-05, "loss": 0.2982, "step": 10512 }, { "epoch": 0.7956482664017785, "grad_norm": 0.7578125, "learning_rate": 1.3189006075495925e-05, "loss": 0.3314, "step": 10513 }, { "epoch": 0.7957239487252259, "grad_norm": 0.734375, "learning_rate": 1.3187877161335899e-05, "loss": 0.3144, "step": 10514 }, { "epoch": 0.7957996310486732, "grad_norm": 0.76171875, "learning_rate": 1.3186748201950363e-05, "loss": 0.3402, "step": 10515 }, { "epoch": 0.7958753133721205, "grad_norm": 0.765625, "learning_rate": 1.318561919735534e-05, "loss": 0.3135, "step": 10516 }, { "epoch": 0.7959509956955678, "grad_norm": 0.8125, "learning_rate": 1.3184490147566837e-05, "loss": 0.352, "step": 10517 }, { "epoch": 0.7960266780190152, "grad_norm": 0.7421875, "learning_rate": 1.3183361052600878e-05, "loss": 0.3055, "step": 10518 }, { "epoch": 0.7961023603424625, "grad_norm": 0.75390625, "learning_rate": 1.318223191247348e-05, "loss": 0.2992, "step": 10519 }, { "epoch": 0.7961780426659099, "grad_norm": 0.79296875, "learning_rate": 1.318110272720066e-05, "loss": 0.3439, "step": 10520 }, { "epoch": 0.7962537249893572, "grad_norm": 0.76953125, "learning_rate": 1.3179973496798439e-05, "loss": 0.3016, "step": 10521 }, { "epoch": 0.7963294073128045, "grad_norm": 0.78515625, "learning_rate": 1.317884422128284e-05, "loss": 0.3452, "step": 10522 }, { "epoch": 0.7964050896362518, "grad_norm": 0.7265625, "learning_rate": 1.317771490066988e-05, "loss": 0.3184, "step": 10523 }, { "epoch": 0.7964807719596991, "grad_norm": 0.75, "learning_rate": 1.3176585534975578e-05, "loss": 0.2899, "step": 10524 }, { "epoch": 0.7965564542831465, "grad_norm": 0.79296875, "learning_rate": 1.3175456124215961e-05, "loss": 0.3061, "step": 10525 }, { "epoch": 0.7966321366065938, "grad_norm": 0.80078125, "learning_rate": 1.3174326668407051e-05, "loss": 0.3194, "step": 10526 }, { "epoch": 0.7967078189300412, "grad_norm": 0.78125, "learning_rate": 1.3173197167564867e-05, "loss": 0.3278, "step": 10527 }, { "epoch": 0.7967835012534885, "grad_norm": 0.796875, "learning_rate": 1.3172067621705438e-05, "loss": 0.3533, "step": 10528 }, { "epoch": 0.7968591835769359, "grad_norm": 0.71875, "learning_rate": 1.3170938030844785e-05, "loss": 0.2844, "step": 10529 }, { "epoch": 0.7969348659003831, "grad_norm": 0.7109375, "learning_rate": 1.3169808394998933e-05, "loss": 0.2957, "step": 10530 }, { "epoch": 0.7970105482238304, "grad_norm": 0.73828125, "learning_rate": 1.3168678714183911e-05, "loss": 0.2999, "step": 10531 }, { "epoch": 0.7970862305472778, "grad_norm": 0.7265625, "learning_rate": 1.3167548988415747e-05, "loss": 0.3274, "step": 10532 }, { "epoch": 0.7971619128707251, "grad_norm": 0.9921875, "learning_rate": 1.316641921771046e-05, "loss": 0.3363, "step": 10533 }, { "epoch": 0.7972375951941725, "grad_norm": 0.7578125, "learning_rate": 1.3165289402084084e-05, "loss": 0.3208, "step": 10534 }, { "epoch": 0.7973132775176198, "grad_norm": 0.7265625, "learning_rate": 1.3164159541552646e-05, "loss": 0.3042, "step": 10535 }, { "epoch": 0.7973889598410672, "grad_norm": 0.734375, "learning_rate": 1.3163029636132176e-05, "loss": 0.2759, "step": 10536 }, { "epoch": 0.7974646421645144, "grad_norm": 0.7734375, "learning_rate": 1.3161899685838704e-05, "loss": 0.292, "step": 10537 }, { "epoch": 0.7975403244879617, "grad_norm": 0.7265625, "learning_rate": 1.3160769690688256e-05, "loss": 0.2867, "step": 10538 }, { "epoch": 0.7976160068114091, "grad_norm": 0.91796875, "learning_rate": 1.3159639650696865e-05, "loss": 0.3022, "step": 10539 }, { "epoch": 0.7976916891348564, "grad_norm": 0.70703125, "learning_rate": 1.3158509565880566e-05, "loss": 0.2772, "step": 10540 }, { "epoch": 0.7977673714583038, "grad_norm": 0.72265625, "learning_rate": 1.3157379436255386e-05, "loss": 0.272, "step": 10541 }, { "epoch": 0.7978430537817511, "grad_norm": 0.83203125, "learning_rate": 1.3156249261837366e-05, "loss": 0.311, "step": 10542 }, { "epoch": 0.7979187361051985, "grad_norm": 0.75, "learning_rate": 1.315511904264253e-05, "loss": 0.3048, "step": 10543 }, { "epoch": 0.7979944184286457, "grad_norm": 0.74609375, "learning_rate": 1.3153988778686914e-05, "loss": 0.317, "step": 10544 }, { "epoch": 0.7980701007520931, "grad_norm": 0.6953125, "learning_rate": 1.3152858469986559e-05, "loss": 0.243, "step": 10545 }, { "epoch": 0.7981457830755404, "grad_norm": 0.72265625, "learning_rate": 1.3151728116557496e-05, "loss": 0.3054, "step": 10546 }, { "epoch": 0.7982214653989878, "grad_norm": 0.8125, "learning_rate": 1.3150597718415759e-05, "loss": 0.3351, "step": 10547 }, { "epoch": 0.7982971477224351, "grad_norm": 0.73828125, "learning_rate": 1.3149467275577385e-05, "loss": 0.3011, "step": 10548 }, { "epoch": 0.7983728300458824, "grad_norm": 0.86328125, "learning_rate": 1.3148336788058415e-05, "loss": 0.3548, "step": 10549 }, { "epoch": 0.7984485123693298, "grad_norm": 0.8125, "learning_rate": 1.3147206255874886e-05, "loss": 0.3516, "step": 10550 }, { "epoch": 0.798524194692777, "grad_norm": 0.7890625, "learning_rate": 1.3146075679042835e-05, "loss": 0.3615, "step": 10551 }, { "epoch": 0.7985998770162244, "grad_norm": 0.79296875, "learning_rate": 1.3144945057578303e-05, "loss": 0.3351, "step": 10552 }, { "epoch": 0.7986755593396717, "grad_norm": 0.7265625, "learning_rate": 1.3143814391497323e-05, "loss": 0.3059, "step": 10553 }, { "epoch": 0.7987512416631191, "grad_norm": 0.7578125, "learning_rate": 1.3142683680815944e-05, "loss": 0.2827, "step": 10554 }, { "epoch": 0.7988269239865664, "grad_norm": 0.78515625, "learning_rate": 1.3141552925550205e-05, "loss": 0.3541, "step": 10555 }, { "epoch": 0.7989026063100138, "grad_norm": 0.8046875, "learning_rate": 1.3140422125716148e-05, "loss": 0.3405, "step": 10556 }, { "epoch": 0.7989782886334611, "grad_norm": 0.76953125, "learning_rate": 1.3139291281329812e-05, "loss": 0.3306, "step": 10557 }, { "epoch": 0.7990539709569083, "grad_norm": 0.7109375, "learning_rate": 1.3138160392407242e-05, "loss": 0.2854, "step": 10558 }, { "epoch": 0.7991296532803557, "grad_norm": 0.71875, "learning_rate": 1.3137029458964481e-05, "loss": 0.2952, "step": 10559 }, { "epoch": 0.799205335603803, "grad_norm": 0.859375, "learning_rate": 1.3135898481017575e-05, "loss": 0.4094, "step": 10560 }, { "epoch": 0.7992810179272504, "grad_norm": 0.8046875, "learning_rate": 1.3134767458582567e-05, "loss": 0.3478, "step": 10561 }, { "epoch": 0.7993567002506977, "grad_norm": 0.75390625, "learning_rate": 1.3133636391675501e-05, "loss": 0.3164, "step": 10562 }, { "epoch": 0.7994323825741451, "grad_norm": 0.765625, "learning_rate": 1.3132505280312429e-05, "loss": 0.3102, "step": 10563 }, { "epoch": 0.7995080648975924, "grad_norm": 0.8671875, "learning_rate": 1.3131374124509393e-05, "loss": 0.3689, "step": 10564 }, { "epoch": 0.7995837472210396, "grad_norm": 0.76953125, "learning_rate": 1.3130242924282442e-05, "loss": 0.3247, "step": 10565 }, { "epoch": 0.799659429544487, "grad_norm": 0.7421875, "learning_rate": 1.3129111679647622e-05, "loss": 0.3192, "step": 10566 }, { "epoch": 0.7997351118679343, "grad_norm": 0.80859375, "learning_rate": 1.3127980390620984e-05, "loss": 0.3767, "step": 10567 }, { "epoch": 0.7998107941913817, "grad_norm": 0.7890625, "learning_rate": 1.3126849057218573e-05, "loss": 0.3212, "step": 10568 }, { "epoch": 0.799886476514829, "grad_norm": 0.7578125, "learning_rate": 1.3125717679456447e-05, "loss": 0.311, "step": 10569 }, { "epoch": 0.7999621588382764, "grad_norm": 0.75390625, "learning_rate": 1.3124586257350649e-05, "loss": 0.3137, "step": 10570 }, { "epoch": 0.8000378411617237, "grad_norm": 0.765625, "learning_rate": 1.3123454790917235e-05, "loss": 0.3128, "step": 10571 }, { "epoch": 0.800113523485171, "grad_norm": 0.75390625, "learning_rate": 1.3122323280172254e-05, "loss": 0.3081, "step": 10572 }, { "epoch": 0.8001892058086183, "grad_norm": 0.80859375, "learning_rate": 1.312119172513176e-05, "loss": 0.3598, "step": 10573 }, { "epoch": 0.8002648881320656, "grad_norm": 0.90625, "learning_rate": 1.3120060125811802e-05, "loss": 0.3386, "step": 10574 }, { "epoch": 0.800340570455513, "grad_norm": 0.68359375, "learning_rate": 1.311892848222844e-05, "loss": 0.2798, "step": 10575 }, { "epoch": 0.8004162527789603, "grad_norm": 0.703125, "learning_rate": 1.3117796794397721e-05, "loss": 0.2808, "step": 10576 }, { "epoch": 0.8004162527789603, "eval_loss": 0.33043158054351807, "eval_runtime": 83.4562, "eval_samples_per_second": 58.258, "eval_steps_per_second": 58.258, "step": 10576 }, { "epoch": 0.8004919351024077, "grad_norm": 0.7265625, "learning_rate": 1.311666506233571e-05, "loss": 0.3104, "step": 10577 }, { "epoch": 0.800567617425855, "grad_norm": 0.7578125, "learning_rate": 1.3115533286058453e-05, "loss": 0.324, "step": 10578 }, { "epoch": 0.8006432997493023, "grad_norm": 0.8203125, "learning_rate": 1.3114401465582012e-05, "loss": 0.3713, "step": 10579 }, { "epoch": 0.8007189820727496, "grad_norm": 0.7890625, "learning_rate": 1.3113269600922441e-05, "loss": 0.3304, "step": 10580 }, { "epoch": 0.800794664396197, "grad_norm": 1.0546875, "learning_rate": 1.3112137692095797e-05, "loss": 0.3934, "step": 10581 }, { "epoch": 0.8008703467196443, "grad_norm": 0.75390625, "learning_rate": 1.3111005739118142e-05, "loss": 0.3232, "step": 10582 }, { "epoch": 0.8009460290430916, "grad_norm": 0.703125, "learning_rate": 1.3109873742005528e-05, "loss": 0.2872, "step": 10583 }, { "epoch": 0.801021711366539, "grad_norm": 0.765625, "learning_rate": 1.3108741700774021e-05, "loss": 0.3194, "step": 10584 }, { "epoch": 0.8010973936899863, "grad_norm": 0.73828125, "learning_rate": 1.3107609615439677e-05, "loss": 0.3012, "step": 10585 }, { "epoch": 0.8011730760134336, "grad_norm": 0.88671875, "learning_rate": 1.3106477486018556e-05, "loss": 0.3104, "step": 10586 }, { "epoch": 0.8012487583368809, "grad_norm": 0.76953125, "learning_rate": 1.3105345312526725e-05, "loss": 0.3093, "step": 10587 }, { "epoch": 0.8013244406603283, "grad_norm": 0.796875, "learning_rate": 1.310421309498024e-05, "loss": 0.3335, "step": 10588 }, { "epoch": 0.8014001229837756, "grad_norm": 0.7578125, "learning_rate": 1.3103080833395165e-05, "loss": 0.3158, "step": 10589 }, { "epoch": 0.801475805307223, "grad_norm": 0.7578125, "learning_rate": 1.3101948527787565e-05, "loss": 0.3132, "step": 10590 }, { "epoch": 0.8015514876306703, "grad_norm": 0.8203125, "learning_rate": 1.3100816178173496e-05, "loss": 0.2856, "step": 10591 }, { "epoch": 0.8016271699541176, "grad_norm": 0.734375, "learning_rate": 1.3099683784569036e-05, "loss": 0.2727, "step": 10592 }, { "epoch": 0.8017028522775649, "grad_norm": 0.734375, "learning_rate": 1.309855134699024e-05, "loss": 0.3016, "step": 10593 }, { "epoch": 0.8017785346010122, "grad_norm": 0.8203125, "learning_rate": 1.3097418865453173e-05, "loss": 0.2891, "step": 10594 }, { "epoch": 0.8018542169244596, "grad_norm": 0.84765625, "learning_rate": 1.3096286339973907e-05, "loss": 0.3656, "step": 10595 }, { "epoch": 0.8019298992479069, "grad_norm": 0.76953125, "learning_rate": 1.3095153770568504e-05, "loss": 0.329, "step": 10596 }, { "epoch": 0.8020055815713543, "grad_norm": 0.75, "learning_rate": 1.3094021157253032e-05, "loss": 0.3012, "step": 10597 }, { "epoch": 0.8020812638948016, "grad_norm": 0.84375, "learning_rate": 1.3092888500043566e-05, "loss": 0.3796, "step": 10598 }, { "epoch": 0.8021569462182488, "grad_norm": 0.76953125, "learning_rate": 1.3091755798956164e-05, "loss": 0.314, "step": 10599 }, { "epoch": 0.8022326285416962, "grad_norm": 1.1015625, "learning_rate": 1.3090623054006901e-05, "loss": 0.3432, "step": 10600 }, { "epoch": 0.8023083108651435, "grad_norm": 0.68359375, "learning_rate": 1.3089490265211848e-05, "loss": 0.2847, "step": 10601 }, { "epoch": 0.8023839931885909, "grad_norm": 0.78125, "learning_rate": 1.3088357432587076e-05, "loss": 0.324, "step": 10602 }, { "epoch": 0.8024596755120382, "grad_norm": 0.8984375, "learning_rate": 1.3087224556148648e-05, "loss": 0.3243, "step": 10603 }, { "epoch": 0.8025353578354856, "grad_norm": 0.73828125, "learning_rate": 1.3086091635912645e-05, "loss": 0.2865, "step": 10604 }, { "epoch": 0.8026110401589329, "grad_norm": 0.7421875, "learning_rate": 1.3084958671895136e-05, "loss": 0.3168, "step": 10605 }, { "epoch": 0.8026867224823802, "grad_norm": 0.73828125, "learning_rate": 1.3083825664112193e-05, "loss": 0.306, "step": 10606 }, { "epoch": 0.8027624048058275, "grad_norm": 0.71484375, "learning_rate": 1.3082692612579895e-05, "loss": 0.2768, "step": 10607 }, { "epoch": 0.8028380871292748, "grad_norm": 0.78125, "learning_rate": 1.3081559517314308e-05, "loss": 0.3306, "step": 10608 }, { "epoch": 0.8029137694527222, "grad_norm": 0.7265625, "learning_rate": 1.3080426378331512e-05, "loss": 0.2788, "step": 10609 }, { "epoch": 0.8029894517761695, "grad_norm": 0.79296875, "learning_rate": 1.3079293195647582e-05, "loss": 0.3605, "step": 10610 }, { "epoch": 0.8030651340996169, "grad_norm": 0.77734375, "learning_rate": 1.3078159969278595e-05, "loss": 0.2779, "step": 10611 }, { "epoch": 0.8031408164230642, "grad_norm": 3.25, "learning_rate": 1.3077026699240624e-05, "loss": 0.3627, "step": 10612 }, { "epoch": 0.8032164987465115, "grad_norm": 0.7890625, "learning_rate": 1.3075893385549752e-05, "loss": 0.3761, "step": 10613 }, { "epoch": 0.8032921810699588, "grad_norm": 0.74609375, "learning_rate": 1.3074760028222053e-05, "loss": 0.3134, "step": 10614 }, { "epoch": 0.8033678633934062, "grad_norm": 0.828125, "learning_rate": 1.3073626627273604e-05, "loss": 0.3655, "step": 10615 }, { "epoch": 0.8034435457168535, "grad_norm": 0.73046875, "learning_rate": 1.3072493182720492e-05, "loss": 0.3056, "step": 10616 }, { "epoch": 0.8035192280403008, "grad_norm": 0.8671875, "learning_rate": 1.3071359694578787e-05, "loss": 0.3313, "step": 10617 }, { "epoch": 0.8035949103637482, "grad_norm": 0.671875, "learning_rate": 1.3070226162864575e-05, "loss": 0.2484, "step": 10618 }, { "epoch": 0.8036705926871955, "grad_norm": 0.6953125, "learning_rate": 1.3069092587593936e-05, "loss": 0.2841, "step": 10619 }, { "epoch": 0.8037462750106428, "grad_norm": 0.81640625, "learning_rate": 1.3067958968782952e-05, "loss": 0.3448, "step": 10620 }, { "epoch": 0.8038219573340901, "grad_norm": 0.8125, "learning_rate": 1.3066825306447706e-05, "loss": 0.3343, "step": 10621 }, { "epoch": 0.8038976396575375, "grad_norm": 0.76171875, "learning_rate": 1.306569160060428e-05, "loss": 0.332, "step": 10622 }, { "epoch": 0.8039733219809848, "grad_norm": 0.765625, "learning_rate": 1.3064557851268758e-05, "loss": 0.3316, "step": 10623 }, { "epoch": 0.8040490043044322, "grad_norm": 0.7421875, "learning_rate": 1.3063424058457222e-05, "loss": 0.2999, "step": 10624 }, { "epoch": 0.8041246866278795, "grad_norm": 0.734375, "learning_rate": 1.3062290222185761e-05, "loss": 0.3178, "step": 10625 }, { "epoch": 0.8042003689513268, "grad_norm": 0.7734375, "learning_rate": 1.3061156342470458e-05, "loss": 0.3188, "step": 10626 }, { "epoch": 0.8042760512747741, "grad_norm": 0.73046875, "learning_rate": 1.3060022419327398e-05, "loss": 0.3159, "step": 10627 }, { "epoch": 0.8043517335982214, "grad_norm": 0.69921875, "learning_rate": 1.3058888452772668e-05, "loss": 0.2919, "step": 10628 }, { "epoch": 0.8044274159216688, "grad_norm": 0.73828125, "learning_rate": 1.3057754442822358e-05, "loss": 0.2918, "step": 10629 }, { "epoch": 0.8045030982451161, "grad_norm": 0.76953125, "learning_rate": 1.3056620389492554e-05, "loss": 0.3411, "step": 10630 }, { "epoch": 0.8045787805685635, "grad_norm": 0.765625, "learning_rate": 1.3055486292799345e-05, "loss": 0.2734, "step": 10631 }, { "epoch": 0.8046544628920108, "grad_norm": 0.74609375, "learning_rate": 1.3054352152758816e-05, "loss": 0.2881, "step": 10632 }, { "epoch": 0.8047301452154582, "grad_norm": 0.75, "learning_rate": 1.3053217969387064e-05, "loss": 0.3155, "step": 10633 }, { "epoch": 0.8048058275389054, "grad_norm": 0.7734375, "learning_rate": 1.3052083742700172e-05, "loss": 0.3241, "step": 10634 }, { "epoch": 0.8048815098623527, "grad_norm": 0.75, "learning_rate": 1.3050949472714239e-05, "loss": 0.3052, "step": 10635 }, { "epoch": 0.8049571921858001, "grad_norm": 0.7265625, "learning_rate": 1.304981515944535e-05, "loss": 0.3028, "step": 10636 }, { "epoch": 0.8050328745092474, "grad_norm": 0.734375, "learning_rate": 1.30486808029096e-05, "loss": 0.2938, "step": 10637 }, { "epoch": 0.8051085568326948, "grad_norm": 0.72265625, "learning_rate": 1.3047546403123077e-05, "loss": 0.2914, "step": 10638 }, { "epoch": 0.8051842391561421, "grad_norm": 0.7578125, "learning_rate": 1.304641196010188e-05, "loss": 0.293, "step": 10639 }, { "epoch": 0.8052599214795895, "grad_norm": 0.78515625, "learning_rate": 1.3045277473862109e-05, "loss": 0.3356, "step": 10640 }, { "epoch": 0.8053356038030367, "grad_norm": 0.7578125, "learning_rate": 1.3044142944419841e-05, "loss": 0.3313, "step": 10641 }, { "epoch": 0.805411286126484, "grad_norm": 1.1328125, "learning_rate": 1.3043008371791186e-05, "loss": 0.3356, "step": 10642 }, { "epoch": 0.8054869684499314, "grad_norm": 0.91015625, "learning_rate": 1.3041873755992236e-05, "loss": 0.3529, "step": 10643 }, { "epoch": 0.8055626507733787, "grad_norm": 0.7421875, "learning_rate": 1.3040739097039087e-05, "loss": 0.2817, "step": 10644 }, { "epoch": 0.8056383330968261, "grad_norm": 0.8203125, "learning_rate": 1.3039604394947833e-05, "loss": 0.3738, "step": 10645 }, { "epoch": 0.8057140154202734, "grad_norm": 0.80859375, "learning_rate": 1.3038469649734577e-05, "loss": 0.3362, "step": 10646 }, { "epoch": 0.8057896977437208, "grad_norm": 0.734375, "learning_rate": 1.3037334861415414e-05, "loss": 0.3166, "step": 10647 }, { "epoch": 0.805865380067168, "grad_norm": 0.75390625, "learning_rate": 1.3036200030006443e-05, "loss": 0.3475, "step": 10648 }, { "epoch": 0.8059410623906154, "grad_norm": 0.71875, "learning_rate": 1.3035065155523768e-05, "loss": 0.2669, "step": 10649 }, { "epoch": 0.8060167447140627, "grad_norm": 0.75390625, "learning_rate": 1.303393023798348e-05, "loss": 0.3549, "step": 10650 }, { "epoch": 0.80609242703751, "grad_norm": 0.7734375, "learning_rate": 1.3032795277401689e-05, "loss": 0.333, "step": 10651 }, { "epoch": 0.8061681093609574, "grad_norm": 0.72265625, "learning_rate": 1.3031660273794491e-05, "loss": 0.2821, "step": 10652 }, { "epoch": 0.8062437916844047, "grad_norm": 0.78515625, "learning_rate": 1.3030525227177985e-05, "loss": 0.3405, "step": 10653 }, { "epoch": 0.8063194740078521, "grad_norm": 0.73046875, "learning_rate": 1.3029390137568288e-05, "loss": 0.2855, "step": 10654 }, { "epoch": 0.8063951563312993, "grad_norm": 0.80078125, "learning_rate": 1.3028255004981488e-05, "loss": 0.3484, "step": 10655 }, { "epoch": 0.8064708386547467, "grad_norm": 0.78125, "learning_rate": 1.3027119829433693e-05, "loss": 0.3036, "step": 10656 }, { "epoch": 0.806546520978194, "grad_norm": 0.7734375, "learning_rate": 1.3025984610941009e-05, "loss": 0.3264, "step": 10657 }, { "epoch": 0.8066222033016414, "grad_norm": 0.74609375, "learning_rate": 1.3024849349519543e-05, "loss": 0.2975, "step": 10658 }, { "epoch": 0.8066978856250887, "grad_norm": 0.8203125, "learning_rate": 1.3023714045185393e-05, "loss": 0.367, "step": 10659 }, { "epoch": 0.806773567948536, "grad_norm": 0.80078125, "learning_rate": 1.3022578697954675e-05, "loss": 0.341, "step": 10660 }, { "epoch": 0.8068492502719834, "grad_norm": 0.71875, "learning_rate": 1.302144330784349e-05, "loss": 0.2874, "step": 10661 }, { "epoch": 0.8069249325954306, "grad_norm": 0.76171875, "learning_rate": 1.3020307874867946e-05, "loss": 0.2967, "step": 10662 }, { "epoch": 0.807000614918878, "grad_norm": 0.71484375, "learning_rate": 1.3019172399044156e-05, "loss": 0.2962, "step": 10663 }, { "epoch": 0.8070762972423253, "grad_norm": 0.84765625, "learning_rate": 1.3018036880388219e-05, "loss": 0.3734, "step": 10664 }, { "epoch": 0.8071519795657727, "grad_norm": 0.7421875, "learning_rate": 1.3016901318916251e-05, "loss": 0.3157, "step": 10665 }, { "epoch": 0.80722766188922, "grad_norm": 0.7421875, "learning_rate": 1.3015765714644362e-05, "loss": 0.3068, "step": 10666 }, { "epoch": 0.8073033442126674, "grad_norm": 0.7421875, "learning_rate": 1.3014630067588658e-05, "loss": 0.3029, "step": 10667 }, { "epoch": 0.8073790265361147, "grad_norm": 0.734375, "learning_rate": 1.3013494377765256e-05, "loss": 0.3129, "step": 10668 }, { "epoch": 0.8074547088595619, "grad_norm": 0.74609375, "learning_rate": 1.3012358645190264e-05, "loss": 0.3001, "step": 10669 }, { "epoch": 0.8075303911830093, "grad_norm": 0.703125, "learning_rate": 1.3011222869879796e-05, "loss": 0.2714, "step": 10670 }, { "epoch": 0.8076060735064566, "grad_norm": 0.7890625, "learning_rate": 1.3010087051849962e-05, "loss": 0.3367, "step": 10671 }, { "epoch": 0.807681755829904, "grad_norm": 0.6796875, "learning_rate": 1.3008951191116884e-05, "loss": 0.2595, "step": 10672 }, { "epoch": 0.8077574381533513, "grad_norm": 0.73828125, "learning_rate": 1.3007815287696665e-05, "loss": 0.288, "step": 10673 }, { "epoch": 0.8078331204767987, "grad_norm": 0.73046875, "learning_rate": 1.3006679341605421e-05, "loss": 0.2896, "step": 10674 }, { "epoch": 0.807908802800246, "grad_norm": 0.70703125, "learning_rate": 1.3005543352859276e-05, "loss": 0.2853, "step": 10675 }, { "epoch": 0.8079844851236933, "grad_norm": 0.77734375, "learning_rate": 1.300440732147434e-05, "loss": 0.3177, "step": 10676 }, { "epoch": 0.8080601674471406, "grad_norm": 0.8203125, "learning_rate": 1.300327124746673e-05, "loss": 0.3435, "step": 10677 }, { "epoch": 0.8081358497705879, "grad_norm": 0.76953125, "learning_rate": 1.3002135130852565e-05, "loss": 0.3372, "step": 10678 }, { "epoch": 0.8082115320940353, "grad_norm": 0.75, "learning_rate": 1.300099897164796e-05, "loss": 0.3087, "step": 10679 }, { "epoch": 0.8082872144174826, "grad_norm": 0.796875, "learning_rate": 1.2999862769869032e-05, "loss": 0.3301, "step": 10680 }, { "epoch": 0.80836289674093, "grad_norm": 0.76953125, "learning_rate": 1.2998726525531906e-05, "loss": 0.2935, "step": 10681 }, { "epoch": 0.8084385790643773, "grad_norm": 0.71875, "learning_rate": 1.2997590238652697e-05, "loss": 0.298, "step": 10682 }, { "epoch": 0.8085142613878246, "grad_norm": 0.78125, "learning_rate": 1.2996453909247529e-05, "loss": 0.3547, "step": 10683 }, { "epoch": 0.8085899437112719, "grad_norm": 0.71875, "learning_rate": 1.2995317537332517e-05, "loss": 0.2798, "step": 10684 }, { "epoch": 0.8086656260347193, "grad_norm": 0.73828125, "learning_rate": 1.2994181122923788e-05, "loss": 0.3243, "step": 10685 }, { "epoch": 0.8087413083581666, "grad_norm": 0.75390625, "learning_rate": 1.299304466603746e-05, "loss": 0.3183, "step": 10686 }, { "epoch": 0.808816990681614, "grad_norm": 0.80078125, "learning_rate": 1.2991908166689657e-05, "loss": 0.3497, "step": 10687 }, { "epoch": 0.8088926730050613, "grad_norm": 0.70703125, "learning_rate": 1.29907716248965e-05, "loss": 0.2989, "step": 10688 }, { "epoch": 0.8089683553285086, "grad_norm": 0.75390625, "learning_rate": 1.298963504067412e-05, "loss": 0.3172, "step": 10689 }, { "epoch": 0.8090440376519559, "grad_norm": 0.671875, "learning_rate": 1.2988498414038635e-05, "loss": 0.258, "step": 10690 }, { "epoch": 0.8091197199754032, "grad_norm": 0.76171875, "learning_rate": 1.298736174500617e-05, "loss": 0.2859, "step": 10691 }, { "epoch": 0.8091954022988506, "grad_norm": 0.78515625, "learning_rate": 1.2986225033592854e-05, "loss": 0.3361, "step": 10692 }, { "epoch": 0.8092710846222979, "grad_norm": 0.80078125, "learning_rate": 1.2985088279814813e-05, "loss": 0.3337, "step": 10693 }, { "epoch": 0.8093467669457453, "grad_norm": 0.75390625, "learning_rate": 1.2983951483688167e-05, "loss": 0.2881, "step": 10694 }, { "epoch": 0.8094224492691926, "grad_norm": 0.75390625, "learning_rate": 1.2982814645229052e-05, "loss": 0.3387, "step": 10695 }, { "epoch": 0.80949813159264, "grad_norm": 0.70703125, "learning_rate": 1.2981677764453593e-05, "loss": 0.2838, "step": 10696 }, { "epoch": 0.8095738139160872, "grad_norm": 0.80078125, "learning_rate": 1.2980540841377915e-05, "loss": 0.2897, "step": 10697 }, { "epoch": 0.8096494962395345, "grad_norm": 0.75390625, "learning_rate": 1.2979403876018153e-05, "loss": 0.2974, "step": 10698 }, { "epoch": 0.8097251785629819, "grad_norm": 0.765625, "learning_rate": 1.2978266868390435e-05, "loss": 0.3307, "step": 10699 }, { "epoch": 0.8098008608864292, "grad_norm": 0.8671875, "learning_rate": 1.2977129818510887e-05, "loss": 0.3538, "step": 10700 }, { "epoch": 0.8098765432098766, "grad_norm": 0.79296875, "learning_rate": 1.2975992726395647e-05, "loss": 0.353, "step": 10701 }, { "epoch": 0.8099522255333239, "grad_norm": 0.71484375, "learning_rate": 1.2974855592060841e-05, "loss": 0.3084, "step": 10702 }, { "epoch": 0.8100279078567713, "grad_norm": 0.734375, "learning_rate": 1.2973718415522605e-05, "loss": 0.2927, "step": 10703 }, { "epoch": 0.8101035901802185, "grad_norm": 0.75, "learning_rate": 1.297258119679707e-05, "loss": 0.296, "step": 10704 }, { "epoch": 0.8101792725036658, "grad_norm": 0.78125, "learning_rate": 1.2971443935900371e-05, "loss": 0.3243, "step": 10705 }, { "epoch": 0.8102549548271132, "grad_norm": 0.671875, "learning_rate": 1.2970306632848637e-05, "loss": 0.2782, "step": 10706 }, { "epoch": 0.8103306371505605, "grad_norm": 0.75, "learning_rate": 1.2969169287658008e-05, "loss": 0.304, "step": 10707 }, { "epoch": 0.8104063194740079, "grad_norm": 1.140625, "learning_rate": 1.2968031900344618e-05, "loss": 0.3618, "step": 10708 }, { "epoch": 0.8104820017974552, "grad_norm": 0.71875, "learning_rate": 1.29668944709246e-05, "loss": 0.2969, "step": 10709 }, { "epoch": 0.8105576841209026, "grad_norm": 0.75390625, "learning_rate": 1.2965756999414097e-05, "loss": 0.309, "step": 10710 }, { "epoch": 0.8106333664443498, "grad_norm": 0.78125, "learning_rate": 1.2964619485829237e-05, "loss": 0.3406, "step": 10711 }, { "epoch": 0.8107090487677971, "grad_norm": 1.203125, "learning_rate": 1.2963481930186163e-05, "loss": 0.3431, "step": 10712 }, { "epoch": 0.8107847310912445, "grad_norm": 0.73828125, "learning_rate": 1.2962344332501013e-05, "loss": 0.3051, "step": 10713 }, { "epoch": 0.8108604134146918, "grad_norm": 0.70703125, "learning_rate": 1.296120669278993e-05, "loss": 0.3031, "step": 10714 }, { "epoch": 0.8109360957381392, "grad_norm": 0.70703125, "learning_rate": 1.296006901106904e-05, "loss": 0.285, "step": 10715 }, { "epoch": 0.8110117780615865, "grad_norm": 0.734375, "learning_rate": 1.2958931287354495e-05, "loss": 0.3019, "step": 10716 }, { "epoch": 0.8110874603850339, "grad_norm": 0.73046875, "learning_rate": 1.2957793521662434e-05, "loss": 0.3266, "step": 10717 }, { "epoch": 0.8111631427084811, "grad_norm": 0.734375, "learning_rate": 1.2956655714008992e-05, "loss": 0.3136, "step": 10718 }, { "epoch": 0.8112388250319285, "grad_norm": 0.78125, "learning_rate": 1.295551786441032e-05, "loss": 0.3211, "step": 10719 }, { "epoch": 0.8113145073553758, "grad_norm": 0.73046875, "learning_rate": 1.2954379972882551e-05, "loss": 0.3018, "step": 10720 }, { "epoch": 0.8113901896788231, "grad_norm": 0.7578125, "learning_rate": 1.2953242039441833e-05, "loss": 0.3063, "step": 10721 }, { "epoch": 0.8114658720022705, "grad_norm": 0.7578125, "learning_rate": 1.2952104064104309e-05, "loss": 0.3035, "step": 10722 }, { "epoch": 0.8115415543257178, "grad_norm": 0.796875, "learning_rate": 1.2950966046886124e-05, "loss": 0.3491, "step": 10723 }, { "epoch": 0.8116172366491651, "grad_norm": 0.7890625, "learning_rate": 1.2949827987803421e-05, "loss": 0.3316, "step": 10724 }, { "epoch": 0.8116929189726124, "grad_norm": 0.76953125, "learning_rate": 1.2948689886872346e-05, "loss": 0.3272, "step": 10725 }, { "epoch": 0.8117686012960598, "grad_norm": 0.7265625, "learning_rate": 1.2947551744109044e-05, "loss": 0.3193, "step": 10726 }, { "epoch": 0.8118442836195071, "grad_norm": 0.76953125, "learning_rate": 1.2946413559529662e-05, "loss": 0.3296, "step": 10727 }, { "epoch": 0.8119199659429545, "grad_norm": 0.76171875, "learning_rate": 1.2945275333150348e-05, "loss": 0.3198, "step": 10728 }, { "epoch": 0.8119956482664018, "grad_norm": 0.77734375, "learning_rate": 1.2944137064987249e-05, "loss": 0.3166, "step": 10729 }, { "epoch": 0.8120713305898491, "grad_norm": 0.8046875, "learning_rate": 1.2942998755056515e-05, "loss": 0.3155, "step": 10730 }, { "epoch": 0.8121470129132964, "grad_norm": 0.75, "learning_rate": 1.2941860403374294e-05, "loss": 0.3078, "step": 10731 }, { "epoch": 0.8122226952367437, "grad_norm": 0.7109375, "learning_rate": 1.2940722009956731e-05, "loss": 0.2635, "step": 10732 }, { "epoch": 0.8122983775601911, "grad_norm": 0.78515625, "learning_rate": 1.2939583574819983e-05, "loss": 0.3071, "step": 10733 }, { "epoch": 0.8123740598836384, "grad_norm": 1.1796875, "learning_rate": 1.2938445097980197e-05, "loss": 0.3631, "step": 10734 }, { "epoch": 0.8124497422070858, "grad_norm": 0.81640625, "learning_rate": 1.2937306579453524e-05, "loss": 0.3538, "step": 10735 }, { "epoch": 0.8125254245305331, "grad_norm": 0.8046875, "learning_rate": 1.2936168019256114e-05, "loss": 0.3442, "step": 10736 }, { "epoch": 0.8126011068539805, "grad_norm": 0.75, "learning_rate": 1.2935029417404126e-05, "loss": 0.3055, "step": 10737 }, { "epoch": 0.8126767891774277, "grad_norm": 0.7421875, "learning_rate": 1.2933890773913707e-05, "loss": 0.3135, "step": 10738 }, { "epoch": 0.812752471500875, "grad_norm": 0.76953125, "learning_rate": 1.2932752088801013e-05, "loss": 0.3402, "step": 10739 }, { "epoch": 0.8128281538243224, "grad_norm": 0.7734375, "learning_rate": 1.2931613362082198e-05, "loss": 0.3002, "step": 10740 }, { "epoch": 0.8129038361477697, "grad_norm": 0.80078125, "learning_rate": 1.2930474593773416e-05, "loss": 0.3174, "step": 10741 }, { "epoch": 0.8129795184712171, "grad_norm": 0.71875, "learning_rate": 1.2929335783890826e-05, "loss": 0.2893, "step": 10742 }, { "epoch": 0.8130552007946644, "grad_norm": 0.7734375, "learning_rate": 1.2928196932450579e-05, "loss": 0.3354, "step": 10743 }, { "epoch": 0.8131308831181118, "grad_norm": 0.82421875, "learning_rate": 1.2927058039468832e-05, "loss": 0.3428, "step": 10744 }, { "epoch": 0.813206565441559, "grad_norm": 0.71484375, "learning_rate": 1.2925919104961744e-05, "loss": 0.287, "step": 10745 }, { "epoch": 0.8132822477650063, "grad_norm": 0.67578125, "learning_rate": 1.2924780128945473e-05, "loss": 0.2731, "step": 10746 }, { "epoch": 0.8133579300884537, "grad_norm": 0.7421875, "learning_rate": 1.2923641111436178e-05, "loss": 0.3007, "step": 10747 }, { "epoch": 0.813433612411901, "grad_norm": 0.80859375, "learning_rate": 1.2922502052450017e-05, "loss": 0.3322, "step": 10748 }, { "epoch": 0.8135092947353484, "grad_norm": 0.703125, "learning_rate": 1.292136295200315e-05, "loss": 0.2895, "step": 10749 }, { "epoch": 0.8135849770587957, "grad_norm": 0.76171875, "learning_rate": 1.2920223810111731e-05, "loss": 0.3075, "step": 10750 }, { "epoch": 0.8136606593822431, "grad_norm": 0.703125, "learning_rate": 1.2919084626791929e-05, "loss": 0.2759, "step": 10751 }, { "epoch": 0.8137363417056903, "grad_norm": 0.76171875, "learning_rate": 1.2917945402059906e-05, "loss": 0.3078, "step": 10752 }, { "epoch": 0.8138120240291377, "grad_norm": 0.7109375, "learning_rate": 1.2916806135931815e-05, "loss": 0.284, "step": 10753 }, { "epoch": 0.813887706352585, "grad_norm": 0.79296875, "learning_rate": 1.2915666828423826e-05, "loss": 0.3369, "step": 10754 }, { "epoch": 0.8139633886760324, "grad_norm": 0.73828125, "learning_rate": 1.29145274795521e-05, "loss": 0.3081, "step": 10755 }, { "epoch": 0.8140390709994797, "grad_norm": 0.77734375, "learning_rate": 1.2913388089332797e-05, "loss": 0.3175, "step": 10756 }, { "epoch": 0.814114753322927, "grad_norm": 0.78515625, "learning_rate": 1.2912248657782086e-05, "loss": 0.3059, "step": 10757 }, { "epoch": 0.8141904356463744, "grad_norm": 0.80078125, "learning_rate": 1.2911109184916132e-05, "loss": 0.3113, "step": 10758 }, { "epoch": 0.8142661179698216, "grad_norm": 0.76953125, "learning_rate": 1.2909969670751097e-05, "loss": 0.3113, "step": 10759 }, { "epoch": 0.814341800293269, "grad_norm": 0.7578125, "learning_rate": 1.2908830115303149e-05, "loss": 0.3281, "step": 10760 }, { "epoch": 0.8144174826167163, "grad_norm": 0.74609375, "learning_rate": 1.2907690518588456e-05, "loss": 0.3193, "step": 10761 }, { "epoch": 0.8144931649401637, "grad_norm": 0.7421875, "learning_rate": 1.2906550880623178e-05, "loss": 0.3121, "step": 10762 }, { "epoch": 0.814568847263611, "grad_norm": 0.7578125, "learning_rate": 1.2905411201423491e-05, "loss": 0.2978, "step": 10763 }, { "epoch": 0.8146445295870584, "grad_norm": 0.75, "learning_rate": 1.2904271481005562e-05, "loss": 0.3181, "step": 10764 }, { "epoch": 0.8147202119105057, "grad_norm": 1.1015625, "learning_rate": 1.2903131719385553e-05, "loss": 0.3747, "step": 10765 }, { "epoch": 0.8147958942339529, "grad_norm": 0.734375, "learning_rate": 1.2901991916579646e-05, "loss": 0.3079, "step": 10766 }, { "epoch": 0.8148715765574003, "grad_norm": 0.734375, "learning_rate": 1.2900852072603999e-05, "loss": 0.2925, "step": 10767 }, { "epoch": 0.8149472588808476, "grad_norm": 0.7578125, "learning_rate": 1.2899712187474787e-05, "loss": 0.3342, "step": 10768 }, { "epoch": 0.815022941204295, "grad_norm": 0.8046875, "learning_rate": 1.2898572261208182e-05, "loss": 0.3637, "step": 10769 }, { "epoch": 0.8150986235277423, "grad_norm": 0.734375, "learning_rate": 1.289743229382036e-05, "loss": 0.2805, "step": 10770 }, { "epoch": 0.8151743058511897, "grad_norm": 0.70703125, "learning_rate": 1.289629228532748e-05, "loss": 0.2894, "step": 10771 }, { "epoch": 0.815249988174637, "grad_norm": 0.8046875, "learning_rate": 1.2895152235745732e-05, "loss": 0.3633, "step": 10772 }, { "epoch": 0.8153256704980842, "grad_norm": 0.78515625, "learning_rate": 1.2894012145091277e-05, "loss": 0.324, "step": 10773 }, { "epoch": 0.8154013528215316, "grad_norm": 0.98046875, "learning_rate": 1.2892872013380293e-05, "loss": 0.3843, "step": 10774 }, { "epoch": 0.8154770351449789, "grad_norm": 0.7734375, "learning_rate": 1.2891731840628958e-05, "loss": 0.3095, "step": 10775 }, { "epoch": 0.8155527174684263, "grad_norm": 0.734375, "learning_rate": 1.2890591626853443e-05, "loss": 0.2913, "step": 10776 }, { "epoch": 0.8156283997918736, "grad_norm": 0.7578125, "learning_rate": 1.2889451372069924e-05, "loss": 0.3255, "step": 10777 }, { "epoch": 0.815704082115321, "grad_norm": 0.796875, "learning_rate": 1.288831107629458e-05, "loss": 0.3362, "step": 10778 }, { "epoch": 0.8157797644387683, "grad_norm": 0.796875, "learning_rate": 1.2887170739543586e-05, "loss": 0.3453, "step": 10779 }, { "epoch": 0.8158554467622156, "grad_norm": 0.78515625, "learning_rate": 1.2886030361833122e-05, "loss": 0.3421, "step": 10780 }, { "epoch": 0.8159311290856629, "grad_norm": 0.7578125, "learning_rate": 1.2884889943179363e-05, "loss": 0.3393, "step": 10781 }, { "epoch": 0.8160068114091102, "grad_norm": 0.7734375, "learning_rate": 1.2883749483598491e-05, "loss": 0.3206, "step": 10782 }, { "epoch": 0.8160824937325576, "grad_norm": 0.7734375, "learning_rate": 1.2882608983106684e-05, "loss": 0.3342, "step": 10783 }, { "epoch": 0.8161581760560049, "grad_norm": 0.80859375, "learning_rate": 1.2881468441720122e-05, "loss": 0.369, "step": 10784 }, { "epoch": 0.8162338583794523, "grad_norm": 0.67578125, "learning_rate": 1.2880327859454983e-05, "loss": 0.2277, "step": 10785 }, { "epoch": 0.8163095407028996, "grad_norm": 0.71875, "learning_rate": 1.2879187236327452e-05, "loss": 0.2834, "step": 10786 }, { "epoch": 0.8163852230263469, "grad_norm": 0.76171875, "learning_rate": 1.287804657235371e-05, "loss": 0.33, "step": 10787 }, { "epoch": 0.8164609053497942, "grad_norm": 0.73828125, "learning_rate": 1.2876905867549938e-05, "loss": 0.3147, "step": 10788 }, { "epoch": 0.8165365876732416, "grad_norm": 0.8125, "learning_rate": 1.287576512193232e-05, "loss": 0.3401, "step": 10789 }, { "epoch": 0.8166122699966889, "grad_norm": 0.859375, "learning_rate": 1.287462433551704e-05, "loss": 0.3448, "step": 10790 }, { "epoch": 0.8166879523201362, "grad_norm": 0.76171875, "learning_rate": 1.2873483508320277e-05, "loss": 0.324, "step": 10791 }, { "epoch": 0.8167636346435836, "grad_norm": 0.69140625, "learning_rate": 1.287234264035822e-05, "loss": 0.2698, "step": 10792 }, { "epoch": 0.8168393169670309, "grad_norm": 0.6796875, "learning_rate": 1.2871201731647054e-05, "loss": 0.2727, "step": 10793 }, { "epoch": 0.8169149992904782, "grad_norm": 0.79296875, "learning_rate": 1.2870060782202964e-05, "loss": 0.3283, "step": 10794 }, { "epoch": 0.8169906816139255, "grad_norm": 0.69921875, "learning_rate": 1.286891979204214e-05, "loss": 0.2969, "step": 10795 }, { "epoch": 0.8170663639373729, "grad_norm": 0.72265625, "learning_rate": 1.2867778761180763e-05, "loss": 0.2826, "step": 10796 }, { "epoch": 0.8171420462608202, "grad_norm": 0.8046875, "learning_rate": 1.2866637689635023e-05, "loss": 0.341, "step": 10797 }, { "epoch": 0.8172177285842676, "grad_norm": 0.76171875, "learning_rate": 1.2865496577421108e-05, "loss": 0.3466, "step": 10798 }, { "epoch": 0.8172934109077149, "grad_norm": 0.67578125, "learning_rate": 1.2864355424555206e-05, "loss": 0.2571, "step": 10799 }, { "epoch": 0.8173690932311622, "grad_norm": 0.84375, "learning_rate": 1.2863214231053507e-05, "loss": 0.3378, "step": 10800 }, { "epoch": 0.8174447755546095, "grad_norm": 0.75, "learning_rate": 1.2862072996932202e-05, "loss": 0.3061, "step": 10801 }, { "epoch": 0.8175204578780568, "grad_norm": 0.75390625, "learning_rate": 1.286093172220748e-05, "loss": 0.3327, "step": 10802 }, { "epoch": 0.8175961402015042, "grad_norm": 0.77734375, "learning_rate": 1.2859790406895532e-05, "loss": 0.3339, "step": 10803 }, { "epoch": 0.8176718225249515, "grad_norm": 0.7890625, "learning_rate": 1.2858649051012549e-05, "loss": 0.3491, "step": 10804 }, { "epoch": 0.8177475048483989, "grad_norm": 0.7265625, "learning_rate": 1.2857507654574723e-05, "loss": 0.3038, "step": 10805 }, { "epoch": 0.8178231871718462, "grad_norm": 0.78515625, "learning_rate": 1.2856366217598246e-05, "loss": 0.3448, "step": 10806 }, { "epoch": 0.8178988694952936, "grad_norm": 0.78515625, "learning_rate": 1.2855224740099314e-05, "loss": 0.3291, "step": 10807 }, { "epoch": 0.8179745518187408, "grad_norm": 0.74609375, "learning_rate": 1.2854083222094122e-05, "loss": 0.2936, "step": 10808 }, { "epoch": 0.8180502341421881, "grad_norm": 0.74609375, "learning_rate": 1.285294166359886e-05, "loss": 0.2894, "step": 10809 }, { "epoch": 0.8181259164656355, "grad_norm": 0.76171875, "learning_rate": 1.2851800064629723e-05, "loss": 0.3179, "step": 10810 }, { "epoch": 0.8182015987890828, "grad_norm": 0.80078125, "learning_rate": 1.2850658425202908e-05, "loss": 0.341, "step": 10811 }, { "epoch": 0.8182772811125302, "grad_norm": 0.7890625, "learning_rate": 1.2849516745334614e-05, "loss": 0.3113, "step": 10812 }, { "epoch": 0.8183529634359775, "grad_norm": 0.734375, "learning_rate": 1.2848375025041034e-05, "loss": 0.3097, "step": 10813 }, { "epoch": 0.8184286457594249, "grad_norm": 0.75, "learning_rate": 1.2847233264338366e-05, "loss": 0.3053, "step": 10814 }, { "epoch": 0.8185043280828721, "grad_norm": 0.73828125, "learning_rate": 1.2846091463242804e-05, "loss": 0.3234, "step": 10815 }, { "epoch": 0.8185800104063194, "grad_norm": 0.8046875, "learning_rate": 1.2844949621770558e-05, "loss": 0.347, "step": 10816 }, { "epoch": 0.8186556927297668, "grad_norm": 0.73046875, "learning_rate": 1.2843807739937816e-05, "loss": 0.3139, "step": 10817 }, { "epoch": 0.8187313750532141, "grad_norm": 0.69140625, "learning_rate": 1.284266581776078e-05, "loss": 0.2748, "step": 10818 }, { "epoch": 0.8188070573766615, "grad_norm": 0.77734375, "learning_rate": 1.2841523855255654e-05, "loss": 0.3392, "step": 10819 }, { "epoch": 0.8188827397001088, "grad_norm": 0.765625, "learning_rate": 1.2840381852438633e-05, "loss": 0.3142, "step": 10820 }, { "epoch": 0.8189584220235562, "grad_norm": 1.0390625, "learning_rate": 1.2839239809325921e-05, "loss": 0.308, "step": 10821 }, { "epoch": 0.8190341043470034, "grad_norm": 0.734375, "learning_rate": 1.2838097725933723e-05, "loss": 0.3134, "step": 10822 }, { "epoch": 0.8191097866704508, "grad_norm": 0.796875, "learning_rate": 1.2836955602278238e-05, "loss": 0.3234, "step": 10823 }, { "epoch": 0.8191854689938981, "grad_norm": 0.7421875, "learning_rate": 1.2835813438375667e-05, "loss": 0.2919, "step": 10824 }, { "epoch": 0.8192611513173454, "grad_norm": 0.6875, "learning_rate": 1.2834671234242215e-05, "loss": 0.2517, "step": 10825 }, { "epoch": 0.8193368336407928, "grad_norm": 0.98046875, "learning_rate": 1.2833528989894093e-05, "loss": 0.3597, "step": 10826 }, { "epoch": 0.8194125159642401, "grad_norm": 0.8125, "learning_rate": 1.2832386705347494e-05, "loss": 0.354, "step": 10827 }, { "epoch": 0.8194881982876875, "grad_norm": 0.75390625, "learning_rate": 1.2831244380618633e-05, "loss": 0.314, "step": 10828 }, { "epoch": 0.8195638806111347, "grad_norm": 0.765625, "learning_rate": 1.283010201572371e-05, "loss": 0.3503, "step": 10829 }, { "epoch": 0.8196395629345821, "grad_norm": 0.72265625, "learning_rate": 1.282895961067893e-05, "loss": 0.2774, "step": 10830 }, { "epoch": 0.8197152452580294, "grad_norm": 0.8046875, "learning_rate": 1.282781716550051e-05, "loss": 0.3236, "step": 10831 }, { "epoch": 0.8197909275814768, "grad_norm": 0.74609375, "learning_rate": 1.2826674680204647e-05, "loss": 0.3169, "step": 10832 }, { "epoch": 0.8198666099049241, "grad_norm": 0.76953125, "learning_rate": 1.2825532154807552e-05, "loss": 0.2916, "step": 10833 }, { "epoch": 0.8199422922283714, "grad_norm": 0.7734375, "learning_rate": 1.2824389589325435e-05, "loss": 0.3448, "step": 10834 }, { "epoch": 0.8200179745518188, "grad_norm": 0.79296875, "learning_rate": 1.2823246983774506e-05, "loss": 0.2739, "step": 10835 }, { "epoch": 0.820093656875266, "grad_norm": 0.73828125, "learning_rate": 1.2822104338170973e-05, "loss": 0.2955, "step": 10836 }, { "epoch": 0.8201693391987134, "grad_norm": 0.796875, "learning_rate": 1.2820961652531048e-05, "loss": 0.3524, "step": 10837 }, { "epoch": 0.8202450215221607, "grad_norm": 0.734375, "learning_rate": 1.2819818926870942e-05, "loss": 0.2826, "step": 10838 }, { "epoch": 0.8203207038456081, "grad_norm": 0.75, "learning_rate": 1.2818676161206862e-05, "loss": 0.329, "step": 10839 }, { "epoch": 0.8203963861690554, "grad_norm": 0.765625, "learning_rate": 1.2817533355555028e-05, "loss": 0.3425, "step": 10840 }, { "epoch": 0.8204720684925028, "grad_norm": 0.83203125, "learning_rate": 1.2816390509931641e-05, "loss": 0.3701, "step": 10841 }, { "epoch": 0.82054775081595, "grad_norm": 0.82421875, "learning_rate": 1.281524762435293e-05, "loss": 0.3331, "step": 10842 }, { "epoch": 0.8206234331393973, "grad_norm": 0.7265625, "learning_rate": 1.2814104698835096e-05, "loss": 0.322, "step": 10843 }, { "epoch": 0.8206991154628447, "grad_norm": 0.90234375, "learning_rate": 1.2812961733394357e-05, "loss": 0.3177, "step": 10844 }, { "epoch": 0.820774797786292, "grad_norm": 0.7734375, "learning_rate": 1.2811818728046932e-05, "loss": 0.3429, "step": 10845 }, { "epoch": 0.8208504801097394, "grad_norm": 1.015625, "learning_rate": 1.2810675682809031e-05, "loss": 0.2918, "step": 10846 }, { "epoch": 0.8209261624331867, "grad_norm": 0.7890625, "learning_rate": 1.2809532597696872e-05, "loss": 0.3453, "step": 10847 }, { "epoch": 0.8210018447566341, "grad_norm": 0.71875, "learning_rate": 1.280838947272667e-05, "loss": 0.2965, "step": 10848 }, { "epoch": 0.8210775270800813, "grad_norm": 0.76953125, "learning_rate": 1.2807246307914648e-05, "loss": 0.3352, "step": 10849 }, { "epoch": 0.8211532094035286, "grad_norm": 0.68359375, "learning_rate": 1.2806103103277017e-05, "loss": 0.2768, "step": 10850 }, { "epoch": 0.821228891726976, "grad_norm": 0.80859375, "learning_rate": 1.280495985883e-05, "loss": 0.3339, "step": 10851 }, { "epoch": 0.8213045740504233, "grad_norm": 0.78125, "learning_rate": 1.2803816574589814e-05, "loss": 0.2909, "step": 10852 }, { "epoch": 0.8213802563738707, "grad_norm": 0.83203125, "learning_rate": 1.2802673250572674e-05, "loss": 0.3436, "step": 10853 }, { "epoch": 0.821455938697318, "grad_norm": 0.83984375, "learning_rate": 1.280152988679481e-05, "loss": 0.3771, "step": 10854 }, { "epoch": 0.8215316210207654, "grad_norm": 0.76171875, "learning_rate": 1.2800386483272433e-05, "loss": 0.3188, "step": 10855 }, { "epoch": 0.8216073033442126, "grad_norm": 0.75390625, "learning_rate": 1.279924304002177e-05, "loss": 0.2892, "step": 10856 }, { "epoch": 0.82168298566766, "grad_norm": 0.8203125, "learning_rate": 1.279809955705904e-05, "loss": 0.3326, "step": 10857 }, { "epoch": 0.8217586679911073, "grad_norm": 0.80078125, "learning_rate": 1.2796956034400467e-05, "loss": 0.3105, "step": 10858 }, { "epoch": 0.8218343503145547, "grad_norm": 0.75, "learning_rate": 1.2795812472062273e-05, "loss": 0.3055, "step": 10859 }, { "epoch": 0.821910032638002, "grad_norm": 0.7265625, "learning_rate": 1.279466887006068e-05, "loss": 0.28, "step": 10860 }, { "epoch": 0.8219857149614493, "grad_norm": 0.703125, "learning_rate": 1.2793525228411913e-05, "loss": 0.28, "step": 10861 }, { "epoch": 0.8220613972848967, "grad_norm": 0.8828125, "learning_rate": 1.2792381547132195e-05, "loss": 0.2976, "step": 10862 }, { "epoch": 0.8221370796083439, "grad_norm": 0.80859375, "learning_rate": 1.2791237826237756e-05, "loss": 0.3334, "step": 10863 }, { "epoch": 0.8222127619317913, "grad_norm": 0.73046875, "learning_rate": 1.2790094065744822e-05, "loss": 0.2707, "step": 10864 }, { "epoch": 0.8222884442552386, "grad_norm": 0.73828125, "learning_rate": 1.2788950265669611e-05, "loss": 0.3064, "step": 10865 }, { "epoch": 0.822364126578686, "grad_norm": 0.75, "learning_rate": 1.2787806426028354e-05, "loss": 0.2878, "step": 10866 }, { "epoch": 0.8224398089021333, "grad_norm": 0.69921875, "learning_rate": 1.2786662546837281e-05, "loss": 0.2716, "step": 10867 }, { "epoch": 0.8225154912255807, "grad_norm": 0.734375, "learning_rate": 1.2785518628112616e-05, "loss": 0.2989, "step": 10868 }, { "epoch": 0.822591173549028, "grad_norm": 0.7890625, "learning_rate": 1.2784374669870592e-05, "loss": 0.3179, "step": 10869 }, { "epoch": 0.8226668558724752, "grad_norm": 0.8203125, "learning_rate": 1.2783230672127434e-05, "loss": 0.3567, "step": 10870 }, { "epoch": 0.8227425381959226, "grad_norm": 0.73046875, "learning_rate": 1.2782086634899369e-05, "loss": 0.3105, "step": 10871 }, { "epoch": 0.8228182205193699, "grad_norm": 0.7421875, "learning_rate": 1.2780942558202633e-05, "loss": 0.3011, "step": 10872 }, { "epoch": 0.8228939028428173, "grad_norm": 0.75390625, "learning_rate": 1.2779798442053456e-05, "loss": 0.3041, "step": 10873 }, { "epoch": 0.8229695851662646, "grad_norm": 0.83984375, "learning_rate": 1.2778654286468067e-05, "loss": 0.3566, "step": 10874 }, { "epoch": 0.823045267489712, "grad_norm": 0.7109375, "learning_rate": 1.2777510091462699e-05, "loss": 0.299, "step": 10875 }, { "epoch": 0.8231209498131593, "grad_norm": 0.8515625, "learning_rate": 1.2776365857053584e-05, "loss": 0.3563, "step": 10876 }, { "epoch": 0.8231966321366065, "grad_norm": 0.71875, "learning_rate": 1.2775221583256952e-05, "loss": 0.2785, "step": 10877 }, { "epoch": 0.8232723144600539, "grad_norm": 0.73046875, "learning_rate": 1.2774077270089044e-05, "loss": 0.2959, "step": 10878 }, { "epoch": 0.8233479967835012, "grad_norm": 0.75390625, "learning_rate": 1.2772932917566087e-05, "loss": 0.3075, "step": 10879 }, { "epoch": 0.8234236791069486, "grad_norm": 0.87890625, "learning_rate": 1.2771788525704318e-05, "loss": 0.3776, "step": 10880 }, { "epoch": 0.8234993614303959, "grad_norm": 0.7734375, "learning_rate": 1.2770644094519973e-05, "loss": 0.3213, "step": 10881 }, { "epoch": 0.8235750437538433, "grad_norm": 0.671875, "learning_rate": 1.2769499624029288e-05, "loss": 0.2651, "step": 10882 }, { "epoch": 0.8236507260772906, "grad_norm": 0.796875, "learning_rate": 1.2768355114248493e-05, "loss": 0.3141, "step": 10883 }, { "epoch": 0.8237264084007379, "grad_norm": 0.74609375, "learning_rate": 1.2767210565193838e-05, "loss": 0.2994, "step": 10884 }, { "epoch": 0.8238020907241852, "grad_norm": 0.75, "learning_rate": 1.2766065976881547e-05, "loss": 0.299, "step": 10885 }, { "epoch": 0.8238777730476325, "grad_norm": 0.73046875, "learning_rate": 1.2764921349327864e-05, "loss": 0.2884, "step": 10886 }, { "epoch": 0.8239534553710799, "grad_norm": 0.94921875, "learning_rate": 1.2763776682549028e-05, "loss": 0.3389, "step": 10887 }, { "epoch": 0.8240291376945272, "grad_norm": 0.71875, "learning_rate": 1.2762631976561277e-05, "loss": 0.289, "step": 10888 }, { "epoch": 0.8241048200179746, "grad_norm": 0.74609375, "learning_rate": 1.2761487231380849e-05, "loss": 0.3033, "step": 10889 }, { "epoch": 0.8241805023414219, "grad_norm": 0.73046875, "learning_rate": 1.2760342447023986e-05, "loss": 0.3054, "step": 10890 }, { "epoch": 0.8242561846648692, "grad_norm": 0.6953125, "learning_rate": 1.2759197623506929e-05, "loss": 0.2757, "step": 10891 }, { "epoch": 0.8243318669883165, "grad_norm": 0.75390625, "learning_rate": 1.275805276084592e-05, "loss": 0.3311, "step": 10892 }, { "epoch": 0.8244075493117639, "grad_norm": 0.76953125, "learning_rate": 1.2756907859057199e-05, "loss": 0.3279, "step": 10893 }, { "epoch": 0.8244832316352112, "grad_norm": 0.76953125, "learning_rate": 1.2755762918157008e-05, "loss": 0.3508, "step": 10894 }, { "epoch": 0.8245589139586585, "grad_norm": 0.7578125, "learning_rate": 1.2754617938161591e-05, "loss": 0.3229, "step": 10895 }, { "epoch": 0.8246345962821059, "grad_norm": 0.7109375, "learning_rate": 1.2753472919087192e-05, "loss": 0.2622, "step": 10896 }, { "epoch": 0.8247102786055532, "grad_norm": 0.69140625, "learning_rate": 1.2752327860950056e-05, "loss": 0.2583, "step": 10897 }, { "epoch": 0.8247859609290005, "grad_norm": 0.8046875, "learning_rate": 1.2751182763766425e-05, "loss": 0.3206, "step": 10898 }, { "epoch": 0.8248616432524478, "grad_norm": 0.71875, "learning_rate": 1.2750037627552544e-05, "loss": 0.2646, "step": 10899 }, { "epoch": 0.8249373255758952, "grad_norm": 0.7734375, "learning_rate": 1.2748892452324661e-05, "loss": 0.3338, "step": 10900 }, { "epoch": 0.8250130078993425, "grad_norm": 0.6953125, "learning_rate": 1.2747747238099022e-05, "loss": 0.2929, "step": 10901 }, { "epoch": 0.8250886902227899, "grad_norm": 0.7578125, "learning_rate": 1.2746601984891873e-05, "loss": 0.3307, "step": 10902 }, { "epoch": 0.8251643725462372, "grad_norm": 0.79296875, "learning_rate": 1.2745456692719458e-05, "loss": 0.3204, "step": 10903 }, { "epoch": 0.8252400548696845, "grad_norm": 0.76953125, "learning_rate": 1.2744311361598034e-05, "loss": 0.3469, "step": 10904 }, { "epoch": 0.8253157371931318, "grad_norm": 0.76171875, "learning_rate": 1.274316599154384e-05, "loss": 0.3128, "step": 10905 }, { "epoch": 0.8253914195165791, "grad_norm": 0.84375, "learning_rate": 1.2742020582573131e-05, "loss": 0.3833, "step": 10906 }, { "epoch": 0.8254671018400265, "grad_norm": 0.76171875, "learning_rate": 1.2740875134702156e-05, "loss": 0.3134, "step": 10907 }, { "epoch": 0.8255427841634738, "grad_norm": 0.671875, "learning_rate": 1.2739729647947159e-05, "loss": 0.2493, "step": 10908 }, { "epoch": 0.8256184664869212, "grad_norm": 0.74609375, "learning_rate": 1.27385841223244e-05, "loss": 0.3137, "step": 10909 }, { "epoch": 0.8256941488103685, "grad_norm": 0.80859375, "learning_rate": 1.273743855785012e-05, "loss": 0.3644, "step": 10910 }, { "epoch": 0.8257698311338159, "grad_norm": 0.765625, "learning_rate": 1.2736292954540583e-05, "loss": 0.3163, "step": 10911 }, { "epoch": 0.8258455134572631, "grad_norm": 0.71875, "learning_rate": 1.273514731241203e-05, "loss": 0.2711, "step": 10912 }, { "epoch": 0.8259211957807104, "grad_norm": 0.734375, "learning_rate": 1.2734001631480721e-05, "loss": 0.3059, "step": 10913 }, { "epoch": 0.8259968781041578, "grad_norm": 0.73828125, "learning_rate": 1.2732855911762907e-05, "loss": 0.3318, "step": 10914 }, { "epoch": 0.8260725604276051, "grad_norm": 0.796875, "learning_rate": 1.2731710153274843e-05, "loss": 0.3295, "step": 10915 }, { "epoch": 0.8261482427510525, "grad_norm": 0.7109375, "learning_rate": 1.2730564356032782e-05, "loss": 0.2801, "step": 10916 }, { "epoch": 0.8262239250744998, "grad_norm": 0.78515625, "learning_rate": 1.272941852005298e-05, "loss": 0.3482, "step": 10917 }, { "epoch": 0.8262996073979472, "grad_norm": 0.75390625, "learning_rate": 1.2728272645351688e-05, "loss": 0.3255, "step": 10918 }, { "epoch": 0.8263752897213944, "grad_norm": 0.734375, "learning_rate": 1.2727126731945171e-05, "loss": 0.3031, "step": 10919 }, { "epoch": 0.8264509720448417, "grad_norm": 0.7734375, "learning_rate": 1.2725980779849685e-05, "loss": 0.305, "step": 10920 }, { "epoch": 0.8265266543682891, "grad_norm": 0.69140625, "learning_rate": 1.2724834789081477e-05, "loss": 0.2673, "step": 10921 }, { "epoch": 0.8266023366917364, "grad_norm": 0.71484375, "learning_rate": 1.2723688759656817e-05, "loss": 0.3052, "step": 10922 }, { "epoch": 0.8266780190151838, "grad_norm": 0.765625, "learning_rate": 1.2722542691591953e-05, "loss": 0.3261, "step": 10923 }, { "epoch": 0.8267537013386311, "grad_norm": 0.71484375, "learning_rate": 1.2721396584903149e-05, "loss": 0.3354, "step": 10924 }, { "epoch": 0.8268293836620785, "grad_norm": 0.8125, "learning_rate": 1.272025043960667e-05, "loss": 0.331, "step": 10925 }, { "epoch": 0.8269050659855257, "grad_norm": 0.79296875, "learning_rate": 1.2719104255718767e-05, "loss": 0.3358, "step": 10926 }, { "epoch": 0.8269807483089731, "grad_norm": 0.796875, "learning_rate": 1.2717958033255702e-05, "loss": 0.311, "step": 10927 }, { "epoch": 0.8270564306324204, "grad_norm": 0.76953125, "learning_rate": 1.271681177223374e-05, "loss": 0.3503, "step": 10928 }, { "epoch": 0.8271321129558677, "grad_norm": 0.80859375, "learning_rate": 1.271566547266914e-05, "loss": 0.3336, "step": 10929 }, { "epoch": 0.8272077952793151, "grad_norm": 0.7578125, "learning_rate": 1.2714519134578165e-05, "loss": 0.3129, "step": 10930 }, { "epoch": 0.8272834776027624, "grad_norm": 0.7578125, "learning_rate": 1.2713372757977078e-05, "loss": 0.3071, "step": 10931 }, { "epoch": 0.8273591599262098, "grad_norm": 0.80078125, "learning_rate": 1.2712226342882143e-05, "loss": 0.3394, "step": 10932 }, { "epoch": 0.827434842249657, "grad_norm": 0.765625, "learning_rate": 1.2711079889309624e-05, "loss": 0.2937, "step": 10933 }, { "epoch": 0.8275105245731044, "grad_norm": 0.74609375, "learning_rate": 1.2709933397275785e-05, "loss": 0.3237, "step": 10934 }, { "epoch": 0.8275862068965517, "grad_norm": 0.7109375, "learning_rate": 1.2708786866796887e-05, "loss": 0.2952, "step": 10935 }, { "epoch": 0.8276618892199991, "grad_norm": 0.74609375, "learning_rate": 1.2707640297889198e-05, "loss": 0.3418, "step": 10936 }, { "epoch": 0.8277375715434464, "grad_norm": 0.72265625, "learning_rate": 1.2706493690568988e-05, "loss": 0.3087, "step": 10937 }, { "epoch": 0.8278132538668938, "grad_norm": 0.7890625, "learning_rate": 1.2705347044852521e-05, "loss": 0.3704, "step": 10938 }, { "epoch": 0.8278889361903411, "grad_norm": 0.7109375, "learning_rate": 1.270420036075606e-05, "loss": 0.2831, "step": 10939 }, { "epoch": 0.8279646185137883, "grad_norm": 0.703125, "learning_rate": 1.2703053638295879e-05, "loss": 0.2531, "step": 10940 }, { "epoch": 0.8280403008372357, "grad_norm": 0.80859375, "learning_rate": 1.2701906877488243e-05, "loss": 0.3572, "step": 10941 }, { "epoch": 0.828115983160683, "grad_norm": 0.7734375, "learning_rate": 1.270076007834942e-05, "loss": 0.3074, "step": 10942 }, { "epoch": 0.8281916654841304, "grad_norm": 0.7265625, "learning_rate": 1.2699613240895686e-05, "loss": 0.3177, "step": 10943 }, { "epoch": 0.8282673478075777, "grad_norm": 0.76171875, "learning_rate": 1.26984663651433e-05, "loss": 0.2972, "step": 10944 }, { "epoch": 0.8283430301310251, "grad_norm": 0.83984375, "learning_rate": 1.2697319451108538e-05, "loss": 0.3791, "step": 10945 }, { "epoch": 0.8284187124544724, "grad_norm": 0.75390625, "learning_rate": 1.2696172498807673e-05, "loss": 0.3263, "step": 10946 }, { "epoch": 0.8284943947779196, "grad_norm": 0.7265625, "learning_rate": 1.2695025508256973e-05, "loss": 0.2942, "step": 10947 }, { "epoch": 0.828570077101367, "grad_norm": 0.80859375, "learning_rate": 1.2693878479472712e-05, "loss": 0.356, "step": 10948 }, { "epoch": 0.8286457594248143, "grad_norm": 0.7421875, "learning_rate": 1.2692731412471163e-05, "loss": 0.3022, "step": 10949 }, { "epoch": 0.8287214417482617, "grad_norm": 0.703125, "learning_rate": 1.2691584307268596e-05, "loss": 0.3038, "step": 10950 }, { "epoch": 0.828797124071709, "grad_norm": 0.84375, "learning_rate": 1.2690437163881286e-05, "loss": 0.3326, "step": 10951 }, { "epoch": 0.8288728063951564, "grad_norm": 0.75, "learning_rate": 1.268928998232551e-05, "loss": 0.3065, "step": 10952 }, { "epoch": 0.8289484887186037, "grad_norm": 0.71875, "learning_rate": 1.2688142762617542e-05, "loss": 0.2905, "step": 10953 }, { "epoch": 0.829024171042051, "grad_norm": 0.7109375, "learning_rate": 1.2686995504773654e-05, "loss": 0.2851, "step": 10954 }, { "epoch": 0.8290998533654983, "grad_norm": 0.73046875, "learning_rate": 1.2685848208810123e-05, "loss": 0.2672, "step": 10955 }, { "epoch": 0.8291755356889456, "grad_norm": 0.8203125, "learning_rate": 1.2684700874743228e-05, "loss": 0.3556, "step": 10956 }, { "epoch": 0.829251218012393, "grad_norm": 0.73046875, "learning_rate": 1.268355350258924e-05, "loss": 0.3058, "step": 10957 }, { "epoch": 0.8293269003358403, "grad_norm": 0.79296875, "learning_rate": 1.2682406092364446e-05, "loss": 0.3408, "step": 10958 }, { "epoch": 0.8294025826592877, "grad_norm": 0.78515625, "learning_rate": 1.2681258644085112e-05, "loss": 0.3341, "step": 10959 }, { "epoch": 0.829478264982735, "grad_norm": 0.7109375, "learning_rate": 1.2680111157767528e-05, "loss": 0.2773, "step": 10960 }, { "epoch": 0.8295539473061823, "grad_norm": 0.8125, "learning_rate": 1.2678963633427966e-05, "loss": 0.3601, "step": 10961 }, { "epoch": 0.8296296296296296, "grad_norm": 0.75390625, "learning_rate": 1.267781607108271e-05, "loss": 0.29, "step": 10962 }, { "epoch": 0.829705311953077, "grad_norm": 0.75390625, "learning_rate": 1.2676668470748035e-05, "loss": 0.3076, "step": 10963 }, { "epoch": 0.8297809942765243, "grad_norm": 0.77734375, "learning_rate": 1.2675520832440225e-05, "loss": 0.3381, "step": 10964 }, { "epoch": 0.8298566765999716, "grad_norm": 0.7890625, "learning_rate": 1.2674373156175562e-05, "loss": 0.3339, "step": 10965 }, { "epoch": 0.829932358923419, "grad_norm": 0.7421875, "learning_rate": 1.2673225441970322e-05, "loss": 0.3344, "step": 10966 }, { "epoch": 0.8300080412468662, "grad_norm": 0.68359375, "learning_rate": 1.2672077689840799e-05, "loss": 0.2753, "step": 10967 }, { "epoch": 0.8300837235703136, "grad_norm": 0.76171875, "learning_rate": 1.2670929899803261e-05, "loss": 0.3212, "step": 10968 }, { "epoch": 0.8301594058937609, "grad_norm": 0.7890625, "learning_rate": 1.2669782071874004e-05, "loss": 0.3166, "step": 10969 }, { "epoch": 0.8302350882172083, "grad_norm": 0.83984375, "learning_rate": 1.2668634206069305e-05, "loss": 0.3597, "step": 10970 }, { "epoch": 0.8303107705406556, "grad_norm": 0.7421875, "learning_rate": 1.266748630240545e-05, "loss": 0.2888, "step": 10971 }, { "epoch": 0.830386452864103, "grad_norm": 0.76171875, "learning_rate": 1.2666338360898724e-05, "loss": 0.3285, "step": 10972 }, { "epoch": 0.8304621351875503, "grad_norm": 0.78125, "learning_rate": 1.2665190381565415e-05, "loss": 0.3154, "step": 10973 }, { "epoch": 0.8305378175109975, "grad_norm": 0.75, "learning_rate": 1.2664042364421803e-05, "loss": 0.331, "step": 10974 }, { "epoch": 0.8306134998344449, "grad_norm": 0.765625, "learning_rate": 1.2662894309484181e-05, "loss": 0.3018, "step": 10975 }, { "epoch": 0.8306891821578922, "grad_norm": 0.73046875, "learning_rate": 1.2661746216768835e-05, "loss": 0.2972, "step": 10976 }, { "epoch": 0.8307648644813396, "grad_norm": 0.8046875, "learning_rate": 1.2660598086292049e-05, "loss": 0.3014, "step": 10977 }, { "epoch": 0.8308405468047869, "grad_norm": 0.9375, "learning_rate": 1.2659449918070114e-05, "loss": 0.3331, "step": 10978 }, { "epoch": 0.8309162291282343, "grad_norm": 0.7265625, "learning_rate": 1.2658301712119316e-05, "loss": 0.2878, "step": 10979 }, { "epoch": 0.8309919114516816, "grad_norm": 0.73046875, "learning_rate": 1.2657153468455947e-05, "loss": 0.3216, "step": 10980 }, { "epoch": 0.8310675937751288, "grad_norm": 0.72265625, "learning_rate": 1.26560051870963e-05, "loss": 0.3075, "step": 10981 }, { "epoch": 0.8311432760985762, "grad_norm": 0.78515625, "learning_rate": 1.2654856868056662e-05, "loss": 0.3285, "step": 10982 }, { "epoch": 0.8312189584220235, "grad_norm": 0.74609375, "learning_rate": 1.2653708511353318e-05, "loss": 0.3026, "step": 10983 }, { "epoch": 0.8312946407454709, "grad_norm": 0.7421875, "learning_rate": 1.2652560117002567e-05, "loss": 0.3188, "step": 10984 }, { "epoch": 0.8313703230689182, "grad_norm": 0.79296875, "learning_rate": 1.26514116850207e-05, "loss": 0.3329, "step": 10985 }, { "epoch": 0.8314460053923656, "grad_norm": 0.84765625, "learning_rate": 1.2650263215424008e-05, "loss": 0.285, "step": 10986 }, { "epoch": 0.8315216877158129, "grad_norm": 0.73046875, "learning_rate": 1.2649114708228783e-05, "loss": 0.2734, "step": 10987 }, { "epoch": 0.8315973700392602, "grad_norm": 1.0546875, "learning_rate": 1.2647966163451321e-05, "loss": 0.3843, "step": 10988 }, { "epoch": 0.8316730523627075, "grad_norm": 0.71875, "learning_rate": 1.2646817581107913e-05, "loss": 0.3214, "step": 10989 }, { "epoch": 0.8317487346861548, "grad_norm": 0.73046875, "learning_rate": 1.2645668961214858e-05, "loss": 0.282, "step": 10990 }, { "epoch": 0.8318244170096022, "grad_norm": 0.765625, "learning_rate": 1.2644520303788448e-05, "loss": 0.323, "step": 10991 }, { "epoch": 0.8319000993330495, "grad_norm": 0.78125, "learning_rate": 1.264337160884498e-05, "loss": 0.3102, "step": 10992 }, { "epoch": 0.8319757816564969, "grad_norm": 1.125, "learning_rate": 1.2642222876400748e-05, "loss": 0.3456, "step": 10993 }, { "epoch": 0.8320514639799442, "grad_norm": 0.76953125, "learning_rate": 1.2641074106472049e-05, "loss": 0.3231, "step": 10994 }, { "epoch": 0.8321271463033915, "grad_norm": 0.82421875, "learning_rate": 1.2639925299075185e-05, "loss": 0.3734, "step": 10995 }, { "epoch": 0.8322028286268388, "grad_norm": 0.83203125, "learning_rate": 1.2638776454226449e-05, "loss": 0.3897, "step": 10996 }, { "epoch": 0.8322785109502862, "grad_norm": 0.76171875, "learning_rate": 1.263762757194214e-05, "loss": 0.3219, "step": 10997 }, { "epoch": 0.8323541932737335, "grad_norm": 0.8515625, "learning_rate": 1.2636478652238556e-05, "loss": 0.349, "step": 10998 }, { "epoch": 0.8324298755971808, "grad_norm": 0.75, "learning_rate": 1.2635329695132003e-05, "loss": 0.338, "step": 10999 }, { "epoch": 0.8325055579206282, "grad_norm": 0.84765625, "learning_rate": 1.2634180700638771e-05, "loss": 0.3104, "step": 11000 }, { "epoch": 0.8325812402440755, "grad_norm": 0.671875, "learning_rate": 1.2633031668775169e-05, "loss": 0.2478, "step": 11001 }, { "epoch": 0.8326569225675228, "grad_norm": 0.75390625, "learning_rate": 1.263188259955749e-05, "loss": 0.3185, "step": 11002 }, { "epoch": 0.8327326048909701, "grad_norm": 0.75, "learning_rate": 1.2630733493002043e-05, "loss": 0.3199, "step": 11003 }, { "epoch": 0.8328082872144175, "grad_norm": 0.7109375, "learning_rate": 1.2629584349125129e-05, "loss": 0.3107, "step": 11004 }, { "epoch": 0.8328839695378648, "grad_norm": 0.76953125, "learning_rate": 1.2628435167943045e-05, "loss": 0.3175, "step": 11005 }, { "epoch": 0.8329596518613122, "grad_norm": 0.79296875, "learning_rate": 1.26272859494721e-05, "loss": 0.3809, "step": 11006 }, { "epoch": 0.8330353341847595, "grad_norm": 0.796875, "learning_rate": 1.262613669372859e-05, "loss": 0.3378, "step": 11007 }, { "epoch": 0.8331110165082068, "grad_norm": 0.73046875, "learning_rate": 1.2624987400728829e-05, "loss": 0.3084, "step": 11008 }, { "epoch": 0.8331866988316541, "grad_norm": 0.72265625, "learning_rate": 1.2623838070489116e-05, "loss": 0.3139, "step": 11009 }, { "epoch": 0.8332623811551014, "grad_norm": 0.73046875, "learning_rate": 1.2622688703025758e-05, "loss": 0.297, "step": 11010 }, { "epoch": 0.8333380634785488, "grad_norm": 0.73046875, "learning_rate": 1.262153929835506e-05, "loss": 0.3079, "step": 11011 }, { "epoch": 0.8334137458019961, "grad_norm": 0.8046875, "learning_rate": 1.2620389856493326e-05, "loss": 0.3295, "step": 11012 }, { "epoch": 0.8334894281254435, "grad_norm": 0.7890625, "learning_rate": 1.2619240377456869e-05, "loss": 0.3259, "step": 11013 }, { "epoch": 0.8335651104488908, "grad_norm": 0.80859375, "learning_rate": 1.261809086126199e-05, "loss": 0.3726, "step": 11014 }, { "epoch": 0.8336407927723382, "grad_norm": 1.078125, "learning_rate": 1.2616941307925e-05, "loss": 0.3871, "step": 11015 }, { "epoch": 0.8337164750957854, "grad_norm": 0.71484375, "learning_rate": 1.2615791717462205e-05, "loss": 0.2643, "step": 11016 }, { "epoch": 0.8337921574192327, "grad_norm": 0.78515625, "learning_rate": 1.2614642089889917e-05, "loss": 0.3491, "step": 11017 }, { "epoch": 0.8338678397426801, "grad_norm": 0.71484375, "learning_rate": 1.2613492425224445e-05, "loss": 0.3049, "step": 11018 }, { "epoch": 0.8339435220661274, "grad_norm": 0.77734375, "learning_rate": 1.2612342723482099e-05, "loss": 0.3485, "step": 11019 }, { "epoch": 0.8340192043895748, "grad_norm": 0.765625, "learning_rate": 1.2611192984679186e-05, "loss": 0.3209, "step": 11020 }, { "epoch": 0.8340948867130221, "grad_norm": 0.79296875, "learning_rate": 1.2610043208832021e-05, "loss": 0.3193, "step": 11021 }, { "epoch": 0.8341705690364695, "grad_norm": 0.7421875, "learning_rate": 1.2608893395956911e-05, "loss": 0.3104, "step": 11022 }, { "epoch": 0.8342462513599167, "grad_norm": 0.72265625, "learning_rate": 1.2607743546070176e-05, "loss": 0.3074, "step": 11023 }, { "epoch": 0.834321933683364, "grad_norm": 0.7265625, "learning_rate": 1.260659365918812e-05, "loss": 0.285, "step": 11024 }, { "epoch": 0.8343976160068114, "grad_norm": 0.75, "learning_rate": 1.2605443735327061e-05, "loss": 0.337, "step": 11025 }, { "epoch": 0.8344732983302587, "grad_norm": 0.76953125, "learning_rate": 1.2604293774503311e-05, "loss": 0.322, "step": 11026 }, { "epoch": 0.8345489806537061, "grad_norm": 0.8203125, "learning_rate": 1.2603143776733186e-05, "loss": 0.3388, "step": 11027 }, { "epoch": 0.8346246629771534, "grad_norm": 0.828125, "learning_rate": 1.2601993742033001e-05, "loss": 0.3635, "step": 11028 }, { "epoch": 0.8347003453006008, "grad_norm": 0.78515625, "learning_rate": 1.2600843670419067e-05, "loss": 0.3269, "step": 11029 }, { "epoch": 0.834776027624048, "grad_norm": 0.91015625, "learning_rate": 1.25996935619077e-05, "loss": 0.3114, "step": 11030 }, { "epoch": 0.8348517099474954, "grad_norm": 0.75, "learning_rate": 1.259854341651522e-05, "loss": 0.2922, "step": 11031 }, { "epoch": 0.8349273922709427, "grad_norm": 0.71875, "learning_rate": 1.259739323425795e-05, "loss": 0.291, "step": 11032 }, { "epoch": 0.83500307459439, "grad_norm": 0.765625, "learning_rate": 1.259624301515219e-05, "loss": 0.3285, "step": 11033 }, { "epoch": 0.8350787569178374, "grad_norm": 0.78125, "learning_rate": 1.2595092759214271e-05, "loss": 0.321, "step": 11034 }, { "epoch": 0.8351544392412847, "grad_norm": 0.75390625, "learning_rate": 1.2593942466460507e-05, "loss": 0.3156, "step": 11035 }, { "epoch": 0.8352301215647321, "grad_norm": 0.83984375, "learning_rate": 1.2592792136907217e-05, "loss": 0.3776, "step": 11036 }, { "epoch": 0.8353058038881793, "grad_norm": 0.76953125, "learning_rate": 1.2591641770570726e-05, "loss": 0.3243, "step": 11037 }, { "epoch": 0.8353814862116267, "grad_norm": 0.859375, "learning_rate": 1.2590491367467346e-05, "loss": 0.3897, "step": 11038 }, { "epoch": 0.835457168535074, "grad_norm": 0.79296875, "learning_rate": 1.2589340927613398e-05, "loss": 0.3634, "step": 11039 }, { "epoch": 0.8355328508585214, "grad_norm": 0.859375, "learning_rate": 1.2588190451025209e-05, "loss": 0.3687, "step": 11040 }, { "epoch": 0.8356085331819687, "grad_norm": 0.73828125, "learning_rate": 1.2587039937719098e-05, "loss": 0.3191, "step": 11041 }, { "epoch": 0.835684215505416, "grad_norm": 0.7265625, "learning_rate": 1.258588938771138e-05, "loss": 0.2961, "step": 11042 }, { "epoch": 0.8357598978288634, "grad_norm": 0.69140625, "learning_rate": 1.258473880101839e-05, "loss": 0.2849, "step": 11043 }, { "epoch": 0.8358355801523106, "grad_norm": 0.8203125, "learning_rate": 1.2583588177656441e-05, "loss": 0.3674, "step": 11044 }, { "epoch": 0.835911262475758, "grad_norm": 0.76953125, "learning_rate": 1.258243751764186e-05, "loss": 0.3207, "step": 11045 }, { "epoch": 0.8359869447992053, "grad_norm": 0.8046875, "learning_rate": 1.2581286820990975e-05, "loss": 0.3377, "step": 11046 }, { "epoch": 0.8360626271226527, "grad_norm": 0.74609375, "learning_rate": 1.2580136087720103e-05, "loss": 0.3214, "step": 11047 }, { "epoch": 0.8361383094461, "grad_norm": 0.76953125, "learning_rate": 1.2578985317845571e-05, "loss": 0.3587, "step": 11048 }, { "epoch": 0.8362139917695474, "grad_norm": 0.76953125, "learning_rate": 1.257783451138371e-05, "loss": 0.3206, "step": 11049 }, { "epoch": 0.8362896740929947, "grad_norm": 0.71484375, "learning_rate": 1.2576683668350842e-05, "loss": 0.3076, "step": 11050 }, { "epoch": 0.8363653564164419, "grad_norm": 0.78125, "learning_rate": 1.2575532788763295e-05, "loss": 0.3663, "step": 11051 }, { "epoch": 0.8364410387398893, "grad_norm": 0.73046875, "learning_rate": 1.2574381872637395e-05, "loss": 0.2816, "step": 11052 }, { "epoch": 0.8365167210633366, "grad_norm": 0.734375, "learning_rate": 1.2573230919989473e-05, "loss": 0.2988, "step": 11053 }, { "epoch": 0.836592403386784, "grad_norm": 0.7890625, "learning_rate": 1.257207993083585e-05, "loss": 0.3194, "step": 11054 }, { "epoch": 0.8366680857102313, "grad_norm": 0.74609375, "learning_rate": 1.2570928905192862e-05, "loss": 0.2936, "step": 11055 }, { "epoch": 0.8367437680336787, "grad_norm": 0.81640625, "learning_rate": 1.2569777843076836e-05, "loss": 0.3398, "step": 11056 }, { "epoch": 0.836819450357126, "grad_norm": 0.796875, "learning_rate": 1.2568626744504098e-05, "loss": 0.3389, "step": 11057 }, { "epoch": 0.8368951326805733, "grad_norm": 0.74609375, "learning_rate": 1.2567475609490986e-05, "loss": 0.2842, "step": 11058 }, { "epoch": 0.8369708150040206, "grad_norm": 0.7421875, "learning_rate": 1.2566324438053825e-05, "loss": 0.304, "step": 11059 }, { "epoch": 0.8370464973274679, "grad_norm": 0.74609375, "learning_rate": 1.2565173230208946e-05, "loss": 0.2981, "step": 11060 }, { "epoch": 0.8371221796509153, "grad_norm": 0.78125, "learning_rate": 1.2564021985972686e-05, "loss": 0.3409, "step": 11061 }, { "epoch": 0.8371978619743626, "grad_norm": 0.671875, "learning_rate": 1.2562870705361372e-05, "loss": 0.242, "step": 11062 }, { "epoch": 0.83727354429781, "grad_norm": 0.71875, "learning_rate": 1.2561719388391338e-05, "loss": 0.316, "step": 11063 }, { "epoch": 0.8373492266212573, "grad_norm": 0.78515625, "learning_rate": 1.256056803507892e-05, "loss": 0.3518, "step": 11064 }, { "epoch": 0.8374249089447046, "grad_norm": 0.69921875, "learning_rate": 1.2559416645440452e-05, "loss": 0.292, "step": 11065 }, { "epoch": 0.8375005912681519, "grad_norm": 0.77734375, "learning_rate": 1.2558265219492265e-05, "loss": 0.3576, "step": 11066 }, { "epoch": 0.8375762735915993, "grad_norm": 0.765625, "learning_rate": 1.2557113757250697e-05, "loss": 0.3362, "step": 11067 }, { "epoch": 0.8376519559150466, "grad_norm": 0.73828125, "learning_rate": 1.255596225873208e-05, "loss": 0.3179, "step": 11068 }, { "epoch": 0.8377276382384939, "grad_norm": 0.70703125, "learning_rate": 1.2554810723952752e-05, "loss": 0.3084, "step": 11069 }, { "epoch": 0.8378033205619413, "grad_norm": 0.82421875, "learning_rate": 1.2553659152929052e-05, "loss": 0.3561, "step": 11070 }, { "epoch": 0.8378790028853886, "grad_norm": 0.85546875, "learning_rate": 1.2552507545677313e-05, "loss": 0.3872, "step": 11071 }, { "epoch": 0.8379546852088359, "grad_norm": 0.77734375, "learning_rate": 1.2551355902213873e-05, "loss": 0.3453, "step": 11072 }, { "epoch": 0.8380303675322832, "grad_norm": 0.75390625, "learning_rate": 1.2550204222555074e-05, "loss": 0.3039, "step": 11073 }, { "epoch": 0.8381060498557306, "grad_norm": 0.83203125, "learning_rate": 1.2549052506717249e-05, "loss": 0.3744, "step": 11074 }, { "epoch": 0.8381817321791779, "grad_norm": 0.75, "learning_rate": 1.2547900754716745e-05, "loss": 0.3328, "step": 11075 }, { "epoch": 0.8382574145026253, "grad_norm": 0.78515625, "learning_rate": 1.2546748966569894e-05, "loss": 0.317, "step": 11076 }, { "epoch": 0.8383330968260726, "grad_norm": 0.75, "learning_rate": 1.2545597142293033e-05, "loss": 0.2962, "step": 11077 }, { "epoch": 0.83840877914952, "grad_norm": 0.76171875, "learning_rate": 1.2544445281902514e-05, "loss": 0.3388, "step": 11078 }, { "epoch": 0.8384844614729672, "grad_norm": 0.7421875, "learning_rate": 1.2543293385414672e-05, "loss": 0.3106, "step": 11079 }, { "epoch": 0.8385601437964145, "grad_norm": 0.7734375, "learning_rate": 1.2542141452845847e-05, "loss": 0.3231, "step": 11080 }, { "epoch": 0.8386358261198619, "grad_norm": 0.76953125, "learning_rate": 1.2540989484212385e-05, "loss": 0.3493, "step": 11081 }, { "epoch": 0.8387115084433092, "grad_norm": 0.65234375, "learning_rate": 1.2539837479530626e-05, "loss": 0.2255, "step": 11082 }, { "epoch": 0.8387871907667566, "grad_norm": 0.77734375, "learning_rate": 1.2538685438816914e-05, "loss": 0.3089, "step": 11083 }, { "epoch": 0.8388628730902039, "grad_norm": 0.7578125, "learning_rate": 1.2537533362087591e-05, "loss": 0.3265, "step": 11084 }, { "epoch": 0.8389385554136511, "grad_norm": 0.82421875, "learning_rate": 1.2536381249359004e-05, "loss": 0.3584, "step": 11085 }, { "epoch": 0.8390142377370985, "grad_norm": 0.8125, "learning_rate": 1.2535229100647493e-05, "loss": 0.3838, "step": 11086 }, { "epoch": 0.8390899200605458, "grad_norm": 0.72265625, "learning_rate": 1.2534076915969412e-05, "loss": 0.2831, "step": 11087 }, { "epoch": 0.8391656023839932, "grad_norm": 0.73828125, "learning_rate": 1.25329246953411e-05, "loss": 0.2943, "step": 11088 }, { "epoch": 0.8392412847074405, "grad_norm": 0.9296875, "learning_rate": 1.2531772438778902e-05, "loss": 0.2902, "step": 11089 }, { "epoch": 0.8393169670308879, "grad_norm": 0.74609375, "learning_rate": 1.2530620146299168e-05, "loss": 0.3018, "step": 11090 }, { "epoch": 0.8393926493543352, "grad_norm": 0.6875, "learning_rate": 1.2529467817918245e-05, "loss": 0.2444, "step": 11091 }, { "epoch": 0.8394683316777825, "grad_norm": 0.7578125, "learning_rate": 1.252831545365248e-05, "loss": 0.3341, "step": 11092 }, { "epoch": 0.8395440140012298, "grad_norm": 0.77734375, "learning_rate": 1.2527163053518225e-05, "loss": 0.2922, "step": 11093 }, { "epoch": 0.8396196963246771, "grad_norm": 0.8125, "learning_rate": 1.2526010617531822e-05, "loss": 0.3447, "step": 11094 }, { "epoch": 0.8396953786481245, "grad_norm": 0.80859375, "learning_rate": 1.2524858145709622e-05, "loss": 0.3351, "step": 11095 }, { "epoch": 0.8397710609715718, "grad_norm": 0.7578125, "learning_rate": 1.252370563806798e-05, "loss": 0.3268, "step": 11096 }, { "epoch": 0.8398467432950192, "grad_norm": 0.76953125, "learning_rate": 1.2522553094623242e-05, "loss": 0.3455, "step": 11097 }, { "epoch": 0.8399224256184665, "grad_norm": 0.82421875, "learning_rate": 1.2521400515391757e-05, "loss": 0.3299, "step": 11098 }, { "epoch": 0.8399981079419138, "grad_norm": 0.79296875, "learning_rate": 1.2520247900389881e-05, "loss": 0.323, "step": 11099 }, { "epoch": 0.8400737902653611, "grad_norm": 0.76953125, "learning_rate": 1.2519095249633964e-05, "loss": 0.3437, "step": 11100 }, { "epoch": 0.8401494725888085, "grad_norm": 0.78515625, "learning_rate": 1.2517942563140355e-05, "loss": 0.3316, "step": 11101 }, { "epoch": 0.8402251549122558, "grad_norm": 0.7109375, "learning_rate": 1.2516789840925415e-05, "loss": 0.2603, "step": 11102 }, { "epoch": 0.8403008372357031, "grad_norm": 0.7734375, "learning_rate": 1.2515637083005489e-05, "loss": 0.3279, "step": 11103 }, { "epoch": 0.8403765195591505, "grad_norm": 0.83984375, "learning_rate": 1.2514484289396931e-05, "loss": 0.3428, "step": 11104 }, { "epoch": 0.8404522018825978, "grad_norm": 0.796875, "learning_rate": 1.2513331460116102e-05, "loss": 0.3651, "step": 11105 }, { "epoch": 0.8405278842060451, "grad_norm": 0.703125, "learning_rate": 1.2512178595179354e-05, "loss": 0.2753, "step": 11106 }, { "epoch": 0.8406035665294924, "grad_norm": 0.75, "learning_rate": 1.251102569460304e-05, "loss": 0.2931, "step": 11107 }, { "epoch": 0.8406792488529398, "grad_norm": 0.73828125, "learning_rate": 1.2509872758403518e-05, "loss": 0.2854, "step": 11108 }, { "epoch": 0.8407549311763871, "grad_norm": 0.80859375, "learning_rate": 1.2508719786597143e-05, "loss": 0.3389, "step": 11109 }, { "epoch": 0.8408306134998345, "grad_norm": 0.71875, "learning_rate": 1.2507566779200273e-05, "loss": 0.2842, "step": 11110 }, { "epoch": 0.8409062958232818, "grad_norm": 0.79296875, "learning_rate": 1.2506413736229267e-05, "loss": 0.3324, "step": 11111 }, { "epoch": 0.8409819781467291, "grad_norm": 0.72265625, "learning_rate": 1.250526065770048e-05, "loss": 0.2917, "step": 11112 }, { "epoch": 0.8410576604701764, "grad_norm": 0.765625, "learning_rate": 1.2504107543630269e-05, "loss": 0.321, "step": 11113 }, { "epoch": 0.8411333427936237, "grad_norm": 0.69140625, "learning_rate": 1.2502954394034997e-05, "loss": 0.2673, "step": 11114 }, { "epoch": 0.8412090251170711, "grad_norm": 0.73828125, "learning_rate": 1.2501801208931021e-05, "loss": 0.3101, "step": 11115 }, { "epoch": 0.8412847074405184, "grad_norm": 0.8515625, "learning_rate": 1.2500647988334703e-05, "loss": 0.3623, "step": 11116 }, { "epoch": 0.8413603897639658, "grad_norm": 0.796875, "learning_rate": 1.24994947322624e-05, "loss": 0.3275, "step": 11117 }, { "epoch": 0.8414360720874131, "grad_norm": 0.7265625, "learning_rate": 1.2498341440730478e-05, "loss": 0.2923, "step": 11118 }, { "epoch": 0.8415117544108605, "grad_norm": 0.9375, "learning_rate": 1.2497188113755292e-05, "loss": 0.3132, "step": 11119 }, { "epoch": 0.8415874367343077, "grad_norm": 0.76953125, "learning_rate": 1.249603475135321e-05, "loss": 0.3279, "step": 11120 }, { "epoch": 0.841663119057755, "grad_norm": 0.73046875, "learning_rate": 1.249488135354059e-05, "loss": 0.2861, "step": 11121 }, { "epoch": 0.8417388013812024, "grad_norm": 0.71875, "learning_rate": 1.2493727920333799e-05, "loss": 0.2852, "step": 11122 }, { "epoch": 0.8418144837046497, "grad_norm": 0.8359375, "learning_rate": 1.2492574451749195e-05, "loss": 0.3019, "step": 11123 }, { "epoch": 0.8418901660280971, "grad_norm": 0.71484375, "learning_rate": 1.2491420947803148e-05, "loss": 0.2909, "step": 11124 }, { "epoch": 0.8419658483515444, "grad_norm": 0.68359375, "learning_rate": 1.2490267408512016e-05, "loss": 0.2611, "step": 11125 }, { "epoch": 0.8420415306749918, "grad_norm": 0.68359375, "learning_rate": 1.248911383389217e-05, "loss": 0.2705, "step": 11126 }, { "epoch": 0.842117212998439, "grad_norm": 0.765625, "learning_rate": 1.248796022395997e-05, "loss": 0.313, "step": 11127 }, { "epoch": 0.8421928953218863, "grad_norm": 0.75390625, "learning_rate": 1.2486806578731787e-05, "loss": 0.3181, "step": 11128 }, { "epoch": 0.8422685776453337, "grad_norm": 0.7421875, "learning_rate": 1.2485652898223985e-05, "loss": 0.3067, "step": 11129 }, { "epoch": 0.842344259968781, "grad_norm": 0.8046875, "learning_rate": 1.248449918245293e-05, "loss": 0.3289, "step": 11130 }, { "epoch": 0.8424199422922284, "grad_norm": 0.70703125, "learning_rate": 1.2483345431434994e-05, "loss": 0.2801, "step": 11131 }, { "epoch": 0.8424956246156757, "grad_norm": 0.80859375, "learning_rate": 1.2482191645186538e-05, "loss": 0.3422, "step": 11132 }, { "epoch": 0.8425713069391231, "grad_norm": 0.7578125, "learning_rate": 1.2481037823723935e-05, "loss": 0.3416, "step": 11133 }, { "epoch": 0.8426469892625703, "grad_norm": 0.66796875, "learning_rate": 1.2479883967063553e-05, "loss": 0.2344, "step": 11134 }, { "epoch": 0.8427226715860177, "grad_norm": 1.0, "learning_rate": 1.247873007522176e-05, "loss": 0.3637, "step": 11135 }, { "epoch": 0.842798353909465, "grad_norm": 0.79296875, "learning_rate": 1.247757614821493e-05, "loss": 0.3386, "step": 11136 }, { "epoch": 0.8428740362329123, "grad_norm": 0.75390625, "learning_rate": 1.2476422186059426e-05, "loss": 0.3093, "step": 11137 }, { "epoch": 0.8429497185563597, "grad_norm": 0.80078125, "learning_rate": 1.2475268188771628e-05, "loss": 0.2759, "step": 11138 }, { "epoch": 0.843025400879807, "grad_norm": 0.76953125, "learning_rate": 1.2474114156367903e-05, "loss": 0.3268, "step": 11139 }, { "epoch": 0.8431010832032544, "grad_norm": 0.77734375, "learning_rate": 1.2472960088864621e-05, "loss": 0.3311, "step": 11140 }, { "epoch": 0.8431767655267016, "grad_norm": 0.78125, "learning_rate": 1.2471805986278159e-05, "loss": 0.3347, "step": 11141 }, { "epoch": 0.843252447850149, "grad_norm": 0.77734375, "learning_rate": 1.2470651848624881e-05, "loss": 0.3034, "step": 11142 }, { "epoch": 0.8433281301735963, "grad_norm": 0.75390625, "learning_rate": 1.2469497675921172e-05, "loss": 0.3176, "step": 11143 }, { "epoch": 0.8434038124970437, "grad_norm": 0.8515625, "learning_rate": 1.2468343468183404e-05, "loss": 0.3644, "step": 11144 }, { "epoch": 0.843479494820491, "grad_norm": 0.73046875, "learning_rate": 1.246718922542794e-05, "loss": 0.2941, "step": 11145 }, { "epoch": 0.8435551771439384, "grad_norm": 0.75, "learning_rate": 1.2466034947671168e-05, "loss": 0.3096, "step": 11146 }, { "epoch": 0.8436308594673857, "grad_norm": 0.75390625, "learning_rate": 1.2464880634929457e-05, "loss": 0.3154, "step": 11147 }, { "epoch": 0.8437065417908329, "grad_norm": 0.72265625, "learning_rate": 1.2463726287219182e-05, "loss": 0.3004, "step": 11148 }, { "epoch": 0.8437822241142803, "grad_norm": 0.69921875, "learning_rate": 1.2462571904556729e-05, "loss": 0.2823, "step": 11149 }, { "epoch": 0.8438579064377276, "grad_norm": 0.734375, "learning_rate": 1.2461417486958463e-05, "loss": 0.3003, "step": 11150 }, { "epoch": 0.843933588761175, "grad_norm": 0.77734375, "learning_rate": 1.2460263034440763e-05, "loss": 0.3353, "step": 11151 }, { "epoch": 0.8440092710846223, "grad_norm": 0.78515625, "learning_rate": 1.2459108547020014e-05, "loss": 0.3318, "step": 11152 }, { "epoch": 0.8440849534080697, "grad_norm": 0.875, "learning_rate": 1.245795402471259e-05, "loss": 0.3591, "step": 11153 }, { "epoch": 0.844160635731517, "grad_norm": 0.81640625, "learning_rate": 1.2456799467534868e-05, "loss": 0.3228, "step": 11154 }, { "epoch": 0.8442363180549642, "grad_norm": 0.72265625, "learning_rate": 1.245564487550323e-05, "loss": 0.2875, "step": 11155 }, { "epoch": 0.8443120003784116, "grad_norm": 0.7421875, "learning_rate": 1.2454490248634057e-05, "loss": 0.3005, "step": 11156 }, { "epoch": 0.8443876827018589, "grad_norm": 0.78125, "learning_rate": 1.2453335586943724e-05, "loss": 0.3233, "step": 11157 }, { "epoch": 0.8444633650253063, "grad_norm": 0.73828125, "learning_rate": 1.2452180890448622e-05, "loss": 0.2802, "step": 11158 }, { "epoch": 0.8445390473487536, "grad_norm": 0.83984375, "learning_rate": 1.2451026159165122e-05, "loss": 0.3481, "step": 11159 }, { "epoch": 0.844614729672201, "grad_norm": 0.74609375, "learning_rate": 1.244987139310961e-05, "loss": 0.2936, "step": 11160 }, { "epoch": 0.8446904119956483, "grad_norm": 0.7734375, "learning_rate": 1.2448716592298468e-05, "loss": 0.3254, "step": 11161 }, { "epoch": 0.8447660943190956, "grad_norm": 0.859375, "learning_rate": 1.2447561756748079e-05, "loss": 0.3706, "step": 11162 }, { "epoch": 0.8448417766425429, "grad_norm": 0.72265625, "learning_rate": 1.244640688647483e-05, "loss": 0.2907, "step": 11163 }, { "epoch": 0.8449174589659902, "grad_norm": 0.79296875, "learning_rate": 1.2445251981495096e-05, "loss": 0.3399, "step": 11164 }, { "epoch": 0.8449931412894376, "grad_norm": 0.75, "learning_rate": 1.2444097041825269e-05, "loss": 0.3008, "step": 11165 }, { "epoch": 0.8450688236128849, "grad_norm": 0.75, "learning_rate": 1.244294206748173e-05, "loss": 0.3038, "step": 11166 }, { "epoch": 0.8451445059363323, "grad_norm": 0.75, "learning_rate": 1.2441787058480867e-05, "loss": 0.3175, "step": 11167 }, { "epoch": 0.8452201882597796, "grad_norm": 0.77734375, "learning_rate": 1.2440632014839065e-05, "loss": 0.3634, "step": 11168 }, { "epoch": 0.8452958705832269, "grad_norm": 0.796875, "learning_rate": 1.2439476936572705e-05, "loss": 0.346, "step": 11169 }, { "epoch": 0.8453715529066742, "grad_norm": 0.72265625, "learning_rate": 1.2438321823698182e-05, "loss": 0.3084, "step": 11170 }, { "epoch": 0.8454472352301216, "grad_norm": 0.9765625, "learning_rate": 1.243716667623188e-05, "loss": 0.3252, "step": 11171 }, { "epoch": 0.8455229175535689, "grad_norm": 0.85546875, "learning_rate": 1.2436011494190185e-05, "loss": 0.3241, "step": 11172 }, { "epoch": 0.8455985998770162, "grad_norm": 1.1328125, "learning_rate": 1.2434856277589486e-05, "loss": 0.3401, "step": 11173 }, { "epoch": 0.8456742822004636, "grad_norm": 0.7890625, "learning_rate": 1.2433701026446175e-05, "loss": 0.3428, "step": 11174 }, { "epoch": 0.8457499645239109, "grad_norm": 0.7578125, "learning_rate": 1.2432545740776636e-05, "loss": 0.3115, "step": 11175 }, { "epoch": 0.8458256468473582, "grad_norm": 0.7890625, "learning_rate": 1.2431390420597263e-05, "loss": 0.365, "step": 11176 }, { "epoch": 0.8459013291708055, "grad_norm": 0.71484375, "learning_rate": 1.2430235065924443e-05, "loss": 0.3042, "step": 11177 }, { "epoch": 0.8459770114942529, "grad_norm": 0.7578125, "learning_rate": 1.242907967677457e-05, "loss": 0.3008, "step": 11178 }, { "epoch": 0.8460526938177002, "grad_norm": 0.7265625, "learning_rate": 1.2427924253164032e-05, "loss": 0.3197, "step": 11179 }, { "epoch": 0.8461283761411476, "grad_norm": 0.74609375, "learning_rate": 1.2426768795109223e-05, "loss": 0.2936, "step": 11180 }, { "epoch": 0.8462040584645949, "grad_norm": 0.71484375, "learning_rate": 1.2425613302626533e-05, "loss": 0.3085, "step": 11181 }, { "epoch": 0.8462797407880422, "grad_norm": 0.7265625, "learning_rate": 1.2424457775732357e-05, "loss": 0.2895, "step": 11182 }, { "epoch": 0.8463554231114895, "grad_norm": 0.7578125, "learning_rate": 1.2423302214443085e-05, "loss": 0.3314, "step": 11183 }, { "epoch": 0.8464311054349368, "grad_norm": 0.75390625, "learning_rate": 1.2422146618775113e-05, "loss": 0.3635, "step": 11184 }, { "epoch": 0.8465067877583842, "grad_norm": 0.75, "learning_rate": 1.2420990988744837e-05, "loss": 0.3259, "step": 11185 }, { "epoch": 0.8465824700818315, "grad_norm": 0.74609375, "learning_rate": 1.2419835324368648e-05, "loss": 0.3221, "step": 11186 }, { "epoch": 0.8466581524052789, "grad_norm": 0.859375, "learning_rate": 1.241867962566294e-05, "loss": 0.3886, "step": 11187 }, { "epoch": 0.8467338347287262, "grad_norm": 0.74609375, "learning_rate": 1.2417523892644115e-05, "loss": 0.304, "step": 11188 }, { "epoch": 0.8468095170521736, "grad_norm": 0.73046875, "learning_rate": 1.241636812532856e-05, "loss": 0.2951, "step": 11189 }, { "epoch": 0.8468851993756208, "grad_norm": 0.75, "learning_rate": 1.241521232373268e-05, "loss": 0.2954, "step": 11190 }, { "epoch": 0.8469608816990681, "grad_norm": 0.71484375, "learning_rate": 1.241405648787287e-05, "loss": 0.2795, "step": 11191 }, { "epoch": 0.8470365640225155, "grad_norm": 0.71484375, "learning_rate": 1.2412900617765522e-05, "loss": 0.2604, "step": 11192 }, { "epoch": 0.8471122463459628, "grad_norm": 0.77734375, "learning_rate": 1.2411744713427041e-05, "loss": 0.3327, "step": 11193 }, { "epoch": 0.8471879286694102, "grad_norm": 0.7578125, "learning_rate": 1.2410588774873822e-05, "loss": 0.2934, "step": 11194 }, { "epoch": 0.8472636109928575, "grad_norm": 0.7734375, "learning_rate": 1.2409432802122265e-05, "loss": 0.3058, "step": 11195 }, { "epoch": 0.8473392933163049, "grad_norm": 0.7578125, "learning_rate": 1.2408276795188766e-05, "loss": 0.2795, "step": 11196 }, { "epoch": 0.8474149756397521, "grad_norm": 0.69921875, "learning_rate": 1.2407120754089733e-05, "loss": 0.2973, "step": 11197 }, { "epoch": 0.8474906579631994, "grad_norm": 0.78125, "learning_rate": 1.2405964678841556e-05, "loss": 0.3436, "step": 11198 }, { "epoch": 0.8475663402866468, "grad_norm": 0.71875, "learning_rate": 1.2404808569460644e-05, "loss": 0.274, "step": 11199 }, { "epoch": 0.8476420226100941, "grad_norm": 0.765625, "learning_rate": 1.2403652425963398e-05, "loss": 0.3469, "step": 11200 }, { "epoch": 0.8477177049335415, "grad_norm": 0.75, "learning_rate": 1.240249624836621e-05, "loss": 0.2866, "step": 11201 }, { "epoch": 0.8477933872569888, "grad_norm": 0.78515625, "learning_rate": 1.2401340036685497e-05, "loss": 0.3191, "step": 11202 }, { "epoch": 0.8478690695804362, "grad_norm": 0.7734375, "learning_rate": 1.2400183790937652e-05, "loss": 0.3256, "step": 11203 }, { "epoch": 0.8479447519038834, "grad_norm": 0.76171875, "learning_rate": 1.2399027511139081e-05, "loss": 0.3231, "step": 11204 }, { "epoch": 0.8480204342273308, "grad_norm": 0.80859375, "learning_rate": 1.239787119730619e-05, "loss": 0.36, "step": 11205 }, { "epoch": 0.8480961165507781, "grad_norm": 0.71484375, "learning_rate": 1.239671484945538e-05, "loss": 0.3217, "step": 11206 }, { "epoch": 0.8481717988742254, "grad_norm": 0.74609375, "learning_rate": 1.2395558467603055e-05, "loss": 0.3085, "step": 11207 }, { "epoch": 0.8482474811976728, "grad_norm": 0.69140625, "learning_rate": 1.2394402051765624e-05, "loss": 0.2994, "step": 11208 }, { "epoch": 0.8483231635211201, "grad_norm": 0.8046875, "learning_rate": 1.2393245601959493e-05, "loss": 0.348, "step": 11209 }, { "epoch": 0.8483988458445674, "grad_norm": 0.8046875, "learning_rate": 1.2392089118201062e-05, "loss": 0.3351, "step": 11210 }, { "epoch": 0.8484745281680147, "grad_norm": 0.69921875, "learning_rate": 1.2390932600506745e-05, "loss": 0.2811, "step": 11211 }, { "epoch": 0.8485502104914621, "grad_norm": 0.7734375, "learning_rate": 1.2389776048892945e-05, "loss": 0.2816, "step": 11212 }, { "epoch": 0.8486258928149094, "grad_norm": 0.65234375, "learning_rate": 1.2388619463376068e-05, "loss": 0.2452, "step": 11213 }, { "epoch": 0.8487015751383568, "grad_norm": 0.75390625, "learning_rate": 1.2387462843972531e-05, "loss": 0.3371, "step": 11214 }, { "epoch": 0.8487772574618041, "grad_norm": 0.765625, "learning_rate": 1.2386306190698731e-05, "loss": 0.3259, "step": 11215 }, { "epoch": 0.8488529397852514, "grad_norm": 0.74609375, "learning_rate": 1.2385149503571084e-05, "loss": 0.3219, "step": 11216 }, { "epoch": 0.8489286221086987, "grad_norm": 0.69921875, "learning_rate": 1.2383992782605999e-05, "loss": 0.2937, "step": 11217 }, { "epoch": 0.849004304432146, "grad_norm": 0.70703125, "learning_rate": 1.2382836027819887e-05, "loss": 0.2919, "step": 11218 }, { "epoch": 0.8490799867555934, "grad_norm": 0.75, "learning_rate": 1.2381679239229155e-05, "loss": 0.3118, "step": 11219 }, { "epoch": 0.8491556690790407, "grad_norm": 0.7421875, "learning_rate": 1.2380522416850216e-05, "loss": 0.2992, "step": 11220 }, { "epoch": 0.8492313514024881, "grad_norm": 0.734375, "learning_rate": 1.2379365560699482e-05, "loss": 0.3138, "step": 11221 }, { "epoch": 0.8493070337259354, "grad_norm": 0.8671875, "learning_rate": 1.2378208670793361e-05, "loss": 0.3026, "step": 11222 }, { "epoch": 0.8493827160493828, "grad_norm": 0.7421875, "learning_rate": 1.2377051747148274e-05, "loss": 0.2852, "step": 11223 }, { "epoch": 0.84945839837283, "grad_norm": 0.7734375, "learning_rate": 1.2375894789780625e-05, "loss": 0.3026, "step": 11224 }, { "epoch": 0.8495340806962773, "grad_norm": 0.7265625, "learning_rate": 1.2374737798706832e-05, "loss": 0.2989, "step": 11225 }, { "epoch": 0.8496097630197247, "grad_norm": 0.69921875, "learning_rate": 1.237358077394331e-05, "loss": 0.2879, "step": 11226 }, { "epoch": 0.849685445343172, "grad_norm": 0.7890625, "learning_rate": 1.2372423715506469e-05, "loss": 0.3267, "step": 11227 }, { "epoch": 0.8497611276666194, "grad_norm": 0.73046875, "learning_rate": 1.237126662341273e-05, "loss": 0.3165, "step": 11228 }, { "epoch": 0.8498368099900667, "grad_norm": 0.71484375, "learning_rate": 1.2370109497678503e-05, "loss": 0.3003, "step": 11229 }, { "epoch": 0.8499124923135141, "grad_norm": 0.95703125, "learning_rate": 1.2368952338320204e-05, "loss": 0.3222, "step": 11230 }, { "epoch": 0.8499881746369613, "grad_norm": 0.7890625, "learning_rate": 1.2367795145354249e-05, "loss": 0.3382, "step": 11231 }, { "epoch": 0.8500638569604086, "grad_norm": 0.74609375, "learning_rate": 1.236663791879706e-05, "loss": 0.3154, "step": 11232 }, { "epoch": 0.850139539283856, "grad_norm": 0.7578125, "learning_rate": 1.236548065866505e-05, "loss": 0.3325, "step": 11233 }, { "epoch": 0.8502152216073033, "grad_norm": 0.734375, "learning_rate": 1.236432336497464e-05, "loss": 0.3237, "step": 11234 }, { "epoch": 0.8502909039307507, "grad_norm": 0.75390625, "learning_rate": 1.2363166037742242e-05, "loss": 0.343, "step": 11235 }, { "epoch": 0.850366586254198, "grad_norm": 0.76953125, "learning_rate": 1.2362008676984281e-05, "loss": 0.3287, "step": 11236 }, { "epoch": 0.8504422685776454, "grad_norm": 0.7578125, "learning_rate": 1.2360851282717172e-05, "loss": 0.333, "step": 11237 }, { "epoch": 0.8505179509010926, "grad_norm": 0.7578125, "learning_rate": 1.2359693854957337e-05, "loss": 0.3251, "step": 11238 }, { "epoch": 0.85059363322454, "grad_norm": 0.68359375, "learning_rate": 1.2358536393721194e-05, "loss": 0.2599, "step": 11239 }, { "epoch": 0.8506693155479873, "grad_norm": 0.7734375, "learning_rate": 1.2357378899025166e-05, "loss": 0.3384, "step": 11240 }, { "epoch": 0.8507449978714346, "grad_norm": 0.75, "learning_rate": 1.2356221370885673e-05, "loss": 0.2897, "step": 11241 }, { "epoch": 0.850820680194882, "grad_norm": 0.75390625, "learning_rate": 1.2355063809319137e-05, "loss": 0.3198, "step": 11242 }, { "epoch": 0.8508963625183293, "grad_norm": 0.671875, "learning_rate": 1.2353906214341977e-05, "loss": 0.2741, "step": 11243 }, { "epoch": 0.8509720448417767, "grad_norm": 0.78125, "learning_rate": 1.2352748585970619e-05, "loss": 0.3281, "step": 11244 }, { "epoch": 0.8510477271652239, "grad_norm": 0.8203125, "learning_rate": 1.2351590924221486e-05, "loss": 0.3703, "step": 11245 }, { "epoch": 0.8511234094886713, "grad_norm": 0.7734375, "learning_rate": 1.2350433229110999e-05, "loss": 0.3242, "step": 11246 }, { "epoch": 0.8511990918121186, "grad_norm": 0.75390625, "learning_rate": 1.2349275500655587e-05, "loss": 0.3192, "step": 11247 }, { "epoch": 0.851274774135566, "grad_norm": 0.68359375, "learning_rate": 1.2348117738871666e-05, "loss": 0.276, "step": 11248 }, { "epoch": 0.8513504564590133, "grad_norm": 0.68359375, "learning_rate": 1.2346959943775665e-05, "loss": 0.2418, "step": 11249 }, { "epoch": 0.8514261387824607, "grad_norm": 0.7109375, "learning_rate": 1.2345802115384014e-05, "loss": 0.2864, "step": 11250 }, { "epoch": 0.851501821105908, "grad_norm": 0.76953125, "learning_rate": 1.234464425371313e-05, "loss": 0.3159, "step": 11251 }, { "epoch": 0.8515775034293552, "grad_norm": 0.80078125, "learning_rate": 1.2343486358779446e-05, "loss": 0.3366, "step": 11252 }, { "epoch": 0.8516531857528026, "grad_norm": 0.73828125, "learning_rate": 1.2342328430599385e-05, "loss": 0.3214, "step": 11253 }, { "epoch": 0.8517288680762499, "grad_norm": 1.3671875, "learning_rate": 1.2341170469189377e-05, "loss": 0.3637, "step": 11254 }, { "epoch": 0.8518045503996973, "grad_norm": 0.71875, "learning_rate": 1.2340012474565848e-05, "loss": 0.3013, "step": 11255 }, { "epoch": 0.8518802327231446, "grad_norm": 0.7578125, "learning_rate": 1.2338854446745228e-05, "loss": 0.3096, "step": 11256 }, { "epoch": 0.851955915046592, "grad_norm": 0.74609375, "learning_rate": 1.233769638574394e-05, "loss": 0.3086, "step": 11257 }, { "epoch": 0.8520315973700393, "grad_norm": 0.75, "learning_rate": 1.2336538291578422e-05, "loss": 0.3191, "step": 11258 }, { "epoch": 0.8521072796934865, "grad_norm": 0.74609375, "learning_rate": 1.2335380164265097e-05, "loss": 0.3171, "step": 11259 }, { "epoch": 0.8521829620169339, "grad_norm": 0.68359375, "learning_rate": 1.2334222003820393e-05, "loss": 0.2844, "step": 11260 }, { "epoch": 0.8522586443403812, "grad_norm": 0.75, "learning_rate": 1.2333063810260751e-05, "loss": 0.3135, "step": 11261 }, { "epoch": 0.8523343266638286, "grad_norm": 0.72265625, "learning_rate": 1.233190558360259e-05, "loss": 0.264, "step": 11262 }, { "epoch": 0.8524100089872759, "grad_norm": 0.72265625, "learning_rate": 1.2330747323862347e-05, "loss": 0.2961, "step": 11263 }, { "epoch": 0.8524856913107233, "grad_norm": 0.7421875, "learning_rate": 1.2329589031056454e-05, "loss": 0.3044, "step": 11264 }, { "epoch": 0.8525613736341706, "grad_norm": 0.76953125, "learning_rate": 1.2328430705201347e-05, "loss": 0.3354, "step": 11265 }, { "epoch": 0.8526370559576179, "grad_norm": 0.7578125, "learning_rate": 1.2327272346313449e-05, "loss": 0.3253, "step": 11266 }, { "epoch": 0.8527127382810652, "grad_norm": 0.8203125, "learning_rate": 1.23261139544092e-05, "loss": 0.3607, "step": 11267 }, { "epoch": 0.8527884206045125, "grad_norm": 0.734375, "learning_rate": 1.2324955529505034e-05, "loss": 0.3145, "step": 11268 }, { "epoch": 0.8528641029279599, "grad_norm": 0.7265625, "learning_rate": 1.2323797071617383e-05, "loss": 0.3043, "step": 11269 }, { "epoch": 0.8529397852514072, "grad_norm": 0.82421875, "learning_rate": 1.2322638580762684e-05, "loss": 0.3661, "step": 11270 }, { "epoch": 0.8530154675748546, "grad_norm": 0.79296875, "learning_rate": 1.2321480056957373e-05, "loss": 0.3543, "step": 11271 }, { "epoch": 0.8530911498983019, "grad_norm": 0.765625, "learning_rate": 1.232032150021788e-05, "loss": 0.3251, "step": 11272 }, { "epoch": 0.8531668322217492, "grad_norm": 0.6875, "learning_rate": 1.2319162910560645e-05, "loss": 0.263, "step": 11273 }, { "epoch": 0.8532425145451965, "grad_norm": 0.80078125, "learning_rate": 1.2318004288002106e-05, "loss": 0.3761, "step": 11274 }, { "epoch": 0.8533181968686439, "grad_norm": 0.7265625, "learning_rate": 1.2316845632558698e-05, "loss": 0.3108, "step": 11275 }, { "epoch": 0.8533938791920912, "grad_norm": 0.7109375, "learning_rate": 1.2315686944246859e-05, "loss": 0.2948, "step": 11276 }, { "epoch": 0.8534695615155385, "grad_norm": 0.73046875, "learning_rate": 1.2314528223083027e-05, "loss": 0.3005, "step": 11277 }, { "epoch": 0.8535452438389859, "grad_norm": 0.69921875, "learning_rate": 1.2313369469083636e-05, "loss": 0.2729, "step": 11278 }, { "epoch": 0.8536209261624332, "grad_norm": 0.734375, "learning_rate": 1.2312210682265135e-05, "loss": 0.3067, "step": 11279 }, { "epoch": 0.8536966084858805, "grad_norm": 0.76953125, "learning_rate": 1.2311051862643955e-05, "loss": 0.3333, "step": 11280 }, { "epoch": 0.8537722908093278, "grad_norm": 0.80078125, "learning_rate": 1.230989301023654e-05, "loss": 0.3032, "step": 11281 }, { "epoch": 0.8538479731327752, "grad_norm": 0.74609375, "learning_rate": 1.2308734125059327e-05, "loss": 0.3234, "step": 11282 }, { "epoch": 0.8539236554562225, "grad_norm": 0.7421875, "learning_rate": 1.2307575207128758e-05, "loss": 0.3154, "step": 11283 }, { "epoch": 0.8539993377796699, "grad_norm": 0.75, "learning_rate": 1.2306416256461278e-05, "loss": 0.3294, "step": 11284 }, { "epoch": 0.8540750201031172, "grad_norm": 1.0078125, "learning_rate": 1.2305257273073323e-05, "loss": 0.376, "step": 11285 }, { "epoch": 0.8541507024265645, "grad_norm": 0.765625, "learning_rate": 1.2304098256981339e-05, "loss": 0.305, "step": 11286 }, { "epoch": 0.8542263847500118, "grad_norm": 0.8359375, "learning_rate": 1.2302939208201763e-05, "loss": 0.3888, "step": 11287 }, { "epoch": 0.8543020670734591, "grad_norm": 0.796875, "learning_rate": 1.2301780126751047e-05, "loss": 0.3441, "step": 11288 }, { "epoch": 0.8543777493969065, "grad_norm": 0.78125, "learning_rate": 1.2300621012645629e-05, "loss": 0.3499, "step": 11289 }, { "epoch": 0.8544534317203538, "grad_norm": 0.734375, "learning_rate": 1.2299461865901954e-05, "loss": 0.3147, "step": 11290 }, { "epoch": 0.8545291140438012, "grad_norm": 0.76171875, "learning_rate": 1.2298302686536464e-05, "loss": 0.2942, "step": 11291 }, { "epoch": 0.8546047963672485, "grad_norm": 0.72265625, "learning_rate": 1.229714347456561e-05, "loss": 0.2973, "step": 11292 }, { "epoch": 0.8546804786906959, "grad_norm": 1.046875, "learning_rate": 1.229598423000583e-05, "loss": 0.3324, "step": 11293 }, { "epoch": 0.8547561610141431, "grad_norm": 0.76171875, "learning_rate": 1.2294824952873578e-05, "loss": 0.3295, "step": 11294 }, { "epoch": 0.8548318433375904, "grad_norm": 0.77734375, "learning_rate": 1.2293665643185292e-05, "loss": 0.3431, "step": 11295 }, { "epoch": 0.8549075256610378, "grad_norm": 0.76953125, "learning_rate": 1.2292506300957423e-05, "loss": 0.2856, "step": 11296 }, { "epoch": 0.8549832079844851, "grad_norm": 0.70703125, "learning_rate": 1.229134692620642e-05, "loss": 0.2809, "step": 11297 }, { "epoch": 0.8550588903079325, "grad_norm": 0.80859375, "learning_rate": 1.2290187518948729e-05, "loss": 0.3539, "step": 11298 }, { "epoch": 0.8551345726313798, "grad_norm": 0.76953125, "learning_rate": 1.2289028079200798e-05, "loss": 0.3393, "step": 11299 }, { "epoch": 0.8552102549548272, "grad_norm": 0.7734375, "learning_rate": 1.2287868606979074e-05, "loss": 0.3485, "step": 11300 }, { "epoch": 0.8552859372782744, "grad_norm": 0.7734375, "learning_rate": 1.2286709102300008e-05, "loss": 0.2972, "step": 11301 }, { "epoch": 0.8553616196017217, "grad_norm": 0.85546875, "learning_rate": 1.2285549565180048e-05, "loss": 0.3234, "step": 11302 }, { "epoch": 0.8554373019251691, "grad_norm": 0.67578125, "learning_rate": 1.228438999563565e-05, "loss": 0.2736, "step": 11303 }, { "epoch": 0.8555129842486164, "grad_norm": 0.74609375, "learning_rate": 1.2283230393683257e-05, "loss": 0.2895, "step": 11304 }, { "epoch": 0.8555886665720638, "grad_norm": 0.78515625, "learning_rate": 1.2282070759339321e-05, "loss": 0.3261, "step": 11305 }, { "epoch": 0.8556643488955111, "grad_norm": 0.77734375, "learning_rate": 1.2280911092620298e-05, "loss": 0.3067, "step": 11306 }, { "epoch": 0.8557400312189585, "grad_norm": 0.75, "learning_rate": 1.2279751393542634e-05, "loss": 0.3002, "step": 11307 }, { "epoch": 0.8558157135424057, "grad_norm": 0.734375, "learning_rate": 1.2278591662122787e-05, "loss": 0.3197, "step": 11308 }, { "epoch": 0.855891395865853, "grad_norm": 0.71875, "learning_rate": 1.2277431898377205e-05, "loss": 0.2838, "step": 11309 }, { "epoch": 0.8559670781893004, "grad_norm": 0.828125, "learning_rate": 1.2276272102322344e-05, "loss": 0.3413, "step": 11310 }, { "epoch": 0.8560427605127477, "grad_norm": 0.859375, "learning_rate": 1.2275112273974657e-05, "loss": 0.3657, "step": 11311 }, { "epoch": 0.8561184428361951, "grad_norm": 0.77734375, "learning_rate": 1.2273952413350601e-05, "loss": 0.3377, "step": 11312 }, { "epoch": 0.8561941251596424, "grad_norm": 0.69921875, "learning_rate": 1.2272792520466623e-05, "loss": 0.2537, "step": 11313 }, { "epoch": 0.8562698074830898, "grad_norm": 0.72265625, "learning_rate": 1.2271632595339184e-05, "loss": 0.31, "step": 11314 }, { "epoch": 0.856345489806537, "grad_norm": 0.78515625, "learning_rate": 1.227047263798474e-05, "loss": 0.3318, "step": 11315 }, { "epoch": 0.8564211721299844, "grad_norm": 0.79296875, "learning_rate": 1.2269312648419744e-05, "loss": 0.3536, "step": 11316 }, { "epoch": 0.8564968544534317, "grad_norm": 0.74609375, "learning_rate": 1.2268152626660657e-05, "loss": 0.3101, "step": 11317 }, { "epoch": 0.8565725367768791, "grad_norm": 0.7734375, "learning_rate": 1.226699257272393e-05, "loss": 0.3462, "step": 11318 }, { "epoch": 0.8566482191003264, "grad_norm": 0.77734375, "learning_rate": 1.2265832486626022e-05, "loss": 0.3165, "step": 11319 }, { "epoch": 0.8567239014237737, "grad_norm": 0.765625, "learning_rate": 1.2264672368383392e-05, "loss": 0.318, "step": 11320 }, { "epoch": 0.8567995837472211, "grad_norm": 0.74609375, "learning_rate": 1.2263512218012503e-05, "loss": 0.3098, "step": 11321 }, { "epoch": 0.8568752660706683, "grad_norm": 0.7109375, "learning_rate": 1.2262352035529803e-05, "loss": 0.297, "step": 11322 }, { "epoch": 0.8569509483941157, "grad_norm": 0.703125, "learning_rate": 1.2261191820951758e-05, "loss": 0.2856, "step": 11323 }, { "epoch": 0.857026630717563, "grad_norm": 0.76953125, "learning_rate": 1.2260031574294831e-05, "loss": 0.3417, "step": 11324 }, { "epoch": 0.8571023130410104, "grad_norm": 0.69921875, "learning_rate": 1.2258871295575472e-05, "loss": 0.2527, "step": 11325 }, { "epoch": 0.8571779953644577, "grad_norm": 0.7109375, "learning_rate": 1.2257710984810151e-05, "loss": 0.28, "step": 11326 }, { "epoch": 0.8572536776879051, "grad_norm": 0.70703125, "learning_rate": 1.2256550642015326e-05, "loss": 0.289, "step": 11327 }, { "epoch": 0.8573293600113524, "grad_norm": 0.76171875, "learning_rate": 1.2255390267207454e-05, "loss": 0.2989, "step": 11328 }, { "epoch": 0.8574050423347996, "grad_norm": 0.7578125, "learning_rate": 1.2254229860403005e-05, "loss": 0.3299, "step": 11329 }, { "epoch": 0.857480724658247, "grad_norm": 0.765625, "learning_rate": 1.2253069421618434e-05, "loss": 0.3308, "step": 11330 }, { "epoch": 0.8575564069816943, "grad_norm": 0.77734375, "learning_rate": 1.2251908950870209e-05, "loss": 0.3337, "step": 11331 }, { "epoch": 0.8576320893051417, "grad_norm": 0.70703125, "learning_rate": 1.225074844817479e-05, "loss": 0.2931, "step": 11332 }, { "epoch": 0.857707771628589, "grad_norm": 0.82421875, "learning_rate": 1.224958791354864e-05, "loss": 0.3697, "step": 11333 }, { "epoch": 0.8577834539520364, "grad_norm": 0.80078125, "learning_rate": 1.2248427347008224e-05, "loss": 0.3704, "step": 11334 }, { "epoch": 0.8578591362754836, "grad_norm": 0.71875, "learning_rate": 1.2247266748570015e-05, "loss": 0.3091, "step": 11335 }, { "epoch": 0.857934818598931, "grad_norm": 0.7890625, "learning_rate": 1.2246106118250462e-05, "loss": 0.3693, "step": 11336 }, { "epoch": 0.8580105009223783, "grad_norm": 0.75, "learning_rate": 1.2244945456066044e-05, "loss": 0.3097, "step": 11337 }, { "epoch": 0.8580861832458256, "grad_norm": 0.77734375, "learning_rate": 1.2243784762033222e-05, "loss": 0.3106, "step": 11338 }, { "epoch": 0.858161865569273, "grad_norm": 0.734375, "learning_rate": 1.2242624036168462e-05, "loss": 0.3134, "step": 11339 }, { "epoch": 0.8582375478927203, "grad_norm": 0.6640625, "learning_rate": 1.2241463278488233e-05, "loss": 0.2537, "step": 11340 }, { "epoch": 0.8583132302161677, "grad_norm": 0.94921875, "learning_rate": 1.2240302489009e-05, "loss": 0.3519, "step": 11341 }, { "epoch": 0.8583889125396149, "grad_norm": 0.69140625, "learning_rate": 1.2239141667747231e-05, "loss": 0.2891, "step": 11342 }, { "epoch": 0.8584645948630623, "grad_norm": 0.76953125, "learning_rate": 1.2237980814719395e-05, "loss": 0.3105, "step": 11343 }, { "epoch": 0.8585402771865096, "grad_norm": 0.6953125, "learning_rate": 1.2236819929941962e-05, "loss": 0.2897, "step": 11344 }, { "epoch": 0.858615959509957, "grad_norm": 0.74609375, "learning_rate": 1.2235659013431398e-05, "loss": 0.3051, "step": 11345 }, { "epoch": 0.8586916418334043, "grad_norm": 0.72265625, "learning_rate": 1.2234498065204175e-05, "loss": 0.2969, "step": 11346 }, { "epoch": 0.8587673241568516, "grad_norm": 0.71875, "learning_rate": 1.2233337085276765e-05, "loss": 0.2707, "step": 11347 }, { "epoch": 0.858843006480299, "grad_norm": 0.78125, "learning_rate": 1.2232176073665631e-05, "loss": 0.3188, "step": 11348 }, { "epoch": 0.8589186888037462, "grad_norm": 0.73828125, "learning_rate": 1.2231015030387252e-05, "loss": 0.313, "step": 11349 }, { "epoch": 0.8589943711271936, "grad_norm": 0.77734375, "learning_rate": 1.2229853955458095e-05, "loss": 0.3339, "step": 11350 }, { "epoch": 0.8590700534506409, "grad_norm": 0.6875, "learning_rate": 1.2228692848894633e-05, "loss": 0.2849, "step": 11351 }, { "epoch": 0.8591457357740883, "grad_norm": 0.7421875, "learning_rate": 1.2227531710713341e-05, "loss": 0.3225, "step": 11352 }, { "epoch": 0.8592214180975356, "grad_norm": 0.73046875, "learning_rate": 1.2226370540930685e-05, "loss": 0.3086, "step": 11353 }, { "epoch": 0.859297100420983, "grad_norm": 0.8359375, "learning_rate": 1.2225209339563144e-05, "loss": 0.3087, "step": 11354 }, { "epoch": 0.8593727827444303, "grad_norm": 0.734375, "learning_rate": 1.2224048106627194e-05, "loss": 0.3131, "step": 11355 }, { "epoch": 0.8594484650678775, "grad_norm": 0.90234375, "learning_rate": 1.2222886842139299e-05, "loss": 0.3631, "step": 11356 }, { "epoch": 0.8595241473913249, "grad_norm": 0.74609375, "learning_rate": 1.2221725546115941e-05, "loss": 0.3196, "step": 11357 }, { "epoch": 0.8595998297147722, "grad_norm": 0.80859375, "learning_rate": 1.2220564218573594e-05, "loss": 0.3303, "step": 11358 }, { "epoch": 0.8596755120382196, "grad_norm": 0.7421875, "learning_rate": 1.2219402859528737e-05, "loss": 0.3092, "step": 11359 }, { "epoch": 0.8597511943616669, "grad_norm": 0.7734375, "learning_rate": 1.2218241468997836e-05, "loss": 0.2954, "step": 11360 }, { "epoch": 0.8598268766851143, "grad_norm": 0.80859375, "learning_rate": 1.2217080046997376e-05, "loss": 0.3383, "step": 11361 }, { "epoch": 0.8599025590085616, "grad_norm": 0.69921875, "learning_rate": 1.221591859354383e-05, "loss": 0.3006, "step": 11362 }, { "epoch": 0.8599782413320088, "grad_norm": 0.96484375, "learning_rate": 1.2214757108653674e-05, "loss": 0.2985, "step": 11363 }, { "epoch": 0.8600539236554562, "grad_norm": 0.91015625, "learning_rate": 1.2213595592343392e-05, "loss": 0.317, "step": 11364 }, { "epoch": 0.8601296059789035, "grad_norm": 0.7109375, "learning_rate": 1.2212434044629458e-05, "loss": 0.2655, "step": 11365 }, { "epoch": 0.8602052883023509, "grad_norm": 0.7265625, "learning_rate": 1.2211272465528345e-05, "loss": 0.2936, "step": 11366 }, { "epoch": 0.8602809706257982, "grad_norm": 0.81640625, "learning_rate": 1.221011085505654e-05, "loss": 0.3349, "step": 11367 }, { "epoch": 0.8603566529492456, "grad_norm": 0.7109375, "learning_rate": 1.2208949213230526e-05, "loss": 0.2517, "step": 11368 }, { "epoch": 0.8604323352726929, "grad_norm": 0.7109375, "learning_rate": 1.2207787540066769e-05, "loss": 0.2896, "step": 11369 }, { "epoch": 0.8605080175961402, "grad_norm": 0.78125, "learning_rate": 1.2206625835581757e-05, "loss": 0.3295, "step": 11370 }, { "epoch": 0.8605836999195875, "grad_norm": 0.8046875, "learning_rate": 1.2205464099791976e-05, "loss": 0.3749, "step": 11371 }, { "epoch": 0.8606593822430348, "grad_norm": 0.76171875, "learning_rate": 1.2204302332713896e-05, "loss": 0.3183, "step": 11372 }, { "epoch": 0.8607350645664822, "grad_norm": 0.75390625, "learning_rate": 1.220314053436401e-05, "loss": 0.3125, "step": 11373 }, { "epoch": 0.8608107468899295, "grad_norm": 0.79296875, "learning_rate": 1.2201978704758793e-05, "loss": 0.331, "step": 11374 }, { "epoch": 0.8608864292133769, "grad_norm": 0.74609375, "learning_rate": 1.2200816843914726e-05, "loss": 0.3051, "step": 11375 }, { "epoch": 0.8609621115368242, "grad_norm": 0.765625, "learning_rate": 1.2199654951848301e-05, "loss": 0.3234, "step": 11376 }, { "epoch": 0.8610377938602715, "grad_norm": 0.74609375, "learning_rate": 1.2198493028575992e-05, "loss": 0.2983, "step": 11377 }, { "epoch": 0.8611134761837188, "grad_norm": 0.74609375, "learning_rate": 1.219733107411429e-05, "loss": 0.2925, "step": 11378 }, { "epoch": 0.8611891585071662, "grad_norm": 0.73828125, "learning_rate": 1.2196169088479673e-05, "loss": 0.3003, "step": 11379 }, { "epoch": 0.8612648408306135, "grad_norm": 0.76953125, "learning_rate": 1.2195007071688629e-05, "loss": 0.3187, "step": 11380 }, { "epoch": 0.8613405231540608, "grad_norm": 0.77734375, "learning_rate": 1.2193845023757643e-05, "loss": 0.3519, "step": 11381 }, { "epoch": 0.8614162054775082, "grad_norm": 0.7734375, "learning_rate": 1.2192682944703202e-05, "loss": 0.3294, "step": 11382 }, { "epoch": 0.8614918878009555, "grad_norm": 0.73828125, "learning_rate": 1.219152083454179e-05, "loss": 0.312, "step": 11383 }, { "epoch": 0.8615675701244028, "grad_norm": 0.8203125, "learning_rate": 1.2190358693289892e-05, "loss": 0.3803, "step": 11384 }, { "epoch": 0.8616432524478501, "grad_norm": 0.78515625, "learning_rate": 1.2189196520964e-05, "loss": 0.3205, "step": 11385 }, { "epoch": 0.8617189347712975, "grad_norm": 0.765625, "learning_rate": 1.2188034317580598e-05, "loss": 0.3161, "step": 11386 }, { "epoch": 0.8617946170947448, "grad_norm": 0.74609375, "learning_rate": 1.2186872083156174e-05, "loss": 0.3147, "step": 11387 }, { "epoch": 0.8618702994181922, "grad_norm": 0.71875, "learning_rate": 1.2185709817707217e-05, "loss": 0.304, "step": 11388 }, { "epoch": 0.8619459817416395, "grad_norm": 0.8125, "learning_rate": 1.2184547521250213e-05, "loss": 0.3927, "step": 11389 }, { "epoch": 0.8620216640650868, "grad_norm": 0.75, "learning_rate": 1.2183385193801655e-05, "loss": 0.308, "step": 11390 }, { "epoch": 0.8620973463885341, "grad_norm": 0.765625, "learning_rate": 1.2182222835378033e-05, "loss": 0.2769, "step": 11391 }, { "epoch": 0.8621730287119814, "grad_norm": 1.015625, "learning_rate": 1.2181060445995833e-05, "loss": 0.3336, "step": 11392 }, { "epoch": 0.8622487110354288, "grad_norm": 0.74609375, "learning_rate": 1.217989802567155e-05, "loss": 0.2921, "step": 11393 }, { "epoch": 0.8623243933588761, "grad_norm": 0.73046875, "learning_rate": 1.2178735574421671e-05, "loss": 0.2999, "step": 11394 }, { "epoch": 0.8624000756823235, "grad_norm": 0.79296875, "learning_rate": 1.217757309226269e-05, "loss": 0.3432, "step": 11395 }, { "epoch": 0.8624757580057708, "grad_norm": 0.82421875, "learning_rate": 1.21764105792111e-05, "loss": 0.2616, "step": 11396 }, { "epoch": 0.8625514403292182, "grad_norm": 0.71484375, "learning_rate": 1.2175248035283387e-05, "loss": 0.2784, "step": 11397 }, { "epoch": 0.8626271226526654, "grad_norm": 0.7421875, "learning_rate": 1.2174085460496048e-05, "loss": 0.3108, "step": 11398 }, { "epoch": 0.8627028049761127, "grad_norm": 0.64453125, "learning_rate": 1.2172922854865578e-05, "loss": 0.2242, "step": 11399 }, { "epoch": 0.8627784872995601, "grad_norm": 0.765625, "learning_rate": 1.2171760218408468e-05, "loss": 0.3249, "step": 11400 }, { "epoch": 0.8628541696230074, "grad_norm": 0.84375, "learning_rate": 1.2170597551141212e-05, "loss": 0.3676, "step": 11401 }, { "epoch": 0.8629298519464548, "grad_norm": 0.75390625, "learning_rate": 1.2169434853080304e-05, "loss": 0.2908, "step": 11402 }, { "epoch": 0.8630055342699021, "grad_norm": 0.73046875, "learning_rate": 1.2168272124242242e-05, "loss": 0.2833, "step": 11403 }, { "epoch": 0.8630812165933495, "grad_norm": 0.76171875, "learning_rate": 1.2167109364643518e-05, "loss": 0.3003, "step": 11404 }, { "epoch": 0.8631568989167967, "grad_norm": 0.79296875, "learning_rate": 1.2165946574300626e-05, "loss": 0.343, "step": 11405 }, { "epoch": 0.863232581240244, "grad_norm": 0.71875, "learning_rate": 1.216478375323007e-05, "loss": 0.2987, "step": 11406 }, { "epoch": 0.8633082635636914, "grad_norm": 0.75390625, "learning_rate": 1.2163620901448339e-05, "loss": 0.3032, "step": 11407 }, { "epoch": 0.8633839458871387, "grad_norm": 0.7109375, "learning_rate": 1.2162458018971931e-05, "loss": 0.3152, "step": 11408 }, { "epoch": 0.8634596282105861, "grad_norm": 0.7109375, "learning_rate": 1.2161295105817347e-05, "loss": 0.291, "step": 11409 }, { "epoch": 0.8635353105340334, "grad_norm": 0.95703125, "learning_rate": 1.2160132162001084e-05, "loss": 0.3933, "step": 11410 }, { "epoch": 0.8636109928574808, "grad_norm": 0.734375, "learning_rate": 1.2158969187539637e-05, "loss": 0.2971, "step": 11411 }, { "epoch": 0.863686675180928, "grad_norm": 0.73046875, "learning_rate": 1.215780618244951e-05, "loss": 0.2938, "step": 11412 }, { "epoch": 0.8637623575043754, "grad_norm": 0.8515625, "learning_rate": 1.2156643146747195e-05, "loss": 0.3912, "step": 11413 }, { "epoch": 0.8638380398278227, "grad_norm": 0.734375, "learning_rate": 1.21554800804492e-05, "loss": 0.3051, "step": 11414 }, { "epoch": 0.86391372215127, "grad_norm": 0.7421875, "learning_rate": 1.2154316983572021e-05, "loss": 0.3217, "step": 11415 }, { "epoch": 0.8639894044747174, "grad_norm": 0.75, "learning_rate": 1.2153153856132155e-05, "loss": 0.2957, "step": 11416 }, { "epoch": 0.8640650867981647, "grad_norm": 0.7109375, "learning_rate": 1.215199069814611e-05, "loss": 0.28, "step": 11417 }, { "epoch": 0.8641407691216121, "grad_norm": 0.7734375, "learning_rate": 1.2150827509630383e-05, "loss": 0.3421, "step": 11418 }, { "epoch": 0.8642164514450593, "grad_norm": 0.78125, "learning_rate": 1.2149664290601474e-05, "loss": 0.359, "step": 11419 }, { "epoch": 0.8642921337685067, "grad_norm": 0.69921875, "learning_rate": 1.2148501041075896e-05, "loss": 0.2603, "step": 11420 }, { "epoch": 0.864367816091954, "grad_norm": 0.82421875, "learning_rate": 1.2147337761070138e-05, "loss": 0.3617, "step": 11421 }, { "epoch": 0.8644434984154014, "grad_norm": 0.78515625, "learning_rate": 1.2146174450600707e-05, "loss": 0.3224, "step": 11422 }, { "epoch": 0.8645191807388487, "grad_norm": 0.75390625, "learning_rate": 1.2145011109684112e-05, "loss": 0.3082, "step": 11423 }, { "epoch": 0.864594863062296, "grad_norm": 0.74609375, "learning_rate": 1.2143847738336856e-05, "loss": 0.3191, "step": 11424 }, { "epoch": 0.8646705453857434, "grad_norm": 0.75, "learning_rate": 1.2142684336575434e-05, "loss": 0.3187, "step": 11425 }, { "epoch": 0.8647462277091906, "grad_norm": 0.75390625, "learning_rate": 1.2141520904416362e-05, "loss": 0.3108, "step": 11426 }, { "epoch": 0.864821910032638, "grad_norm": 0.78515625, "learning_rate": 1.214035744187614e-05, "loss": 0.3263, "step": 11427 }, { "epoch": 0.8648975923560853, "grad_norm": 0.84765625, "learning_rate": 1.2139193948971273e-05, "loss": 0.3503, "step": 11428 }, { "epoch": 0.8649732746795327, "grad_norm": 0.7265625, "learning_rate": 1.2138030425718273e-05, "loss": 0.2968, "step": 11429 }, { "epoch": 0.86504895700298, "grad_norm": 0.78125, "learning_rate": 1.2136866872133641e-05, "loss": 0.3161, "step": 11430 }, { "epoch": 0.8651246393264274, "grad_norm": 0.73046875, "learning_rate": 1.2135703288233882e-05, "loss": 0.3171, "step": 11431 }, { "epoch": 0.8652003216498747, "grad_norm": 0.74609375, "learning_rate": 1.2134539674035511e-05, "loss": 0.3165, "step": 11432 }, { "epoch": 0.8652760039733219, "grad_norm": 0.7421875, "learning_rate": 1.2133376029555028e-05, "loss": 0.3105, "step": 11433 }, { "epoch": 0.8653516862967693, "grad_norm": 0.71875, "learning_rate": 1.2132212354808947e-05, "loss": 0.2956, "step": 11434 }, { "epoch": 0.8654273686202166, "grad_norm": 0.7734375, "learning_rate": 1.2131048649813778e-05, "loss": 0.3399, "step": 11435 }, { "epoch": 0.865503050943664, "grad_norm": 0.77734375, "learning_rate": 1.2129884914586024e-05, "loss": 0.3209, "step": 11436 }, { "epoch": 0.8655787332671113, "grad_norm": 0.75, "learning_rate": 1.2128721149142192e-05, "loss": 0.3068, "step": 11437 }, { "epoch": 0.8656544155905587, "grad_norm": 0.73828125, "learning_rate": 1.2127557353498807e-05, "loss": 0.3227, "step": 11438 }, { "epoch": 0.865730097914006, "grad_norm": 0.72265625, "learning_rate": 1.2126393527672367e-05, "loss": 0.298, "step": 11439 }, { "epoch": 0.8658057802374532, "grad_norm": 0.7421875, "learning_rate": 1.2125229671679382e-05, "loss": 0.3161, "step": 11440 }, { "epoch": 0.8658814625609006, "grad_norm": 0.703125, "learning_rate": 1.212406578553637e-05, "loss": 0.2725, "step": 11441 }, { "epoch": 0.8659571448843479, "grad_norm": 0.7578125, "learning_rate": 1.212290186925984e-05, "loss": 0.3391, "step": 11442 }, { "epoch": 0.8660328272077953, "grad_norm": 0.79296875, "learning_rate": 1.2121737922866303e-05, "loss": 0.3559, "step": 11443 }, { "epoch": 0.8661085095312426, "grad_norm": 0.75, "learning_rate": 1.2120573946372272e-05, "loss": 0.3428, "step": 11444 }, { "epoch": 0.86618419185469, "grad_norm": 0.66796875, "learning_rate": 1.2119409939794262e-05, "loss": 0.2655, "step": 11445 }, { "epoch": 0.8662598741781373, "grad_norm": 0.77734375, "learning_rate": 1.2118245903148782e-05, "loss": 0.3328, "step": 11446 }, { "epoch": 0.8663355565015846, "grad_norm": 0.72265625, "learning_rate": 1.2117081836452353e-05, "loss": 0.2724, "step": 11447 }, { "epoch": 0.8664112388250319, "grad_norm": 0.73046875, "learning_rate": 1.2115917739721483e-05, "loss": 0.3146, "step": 11448 }, { "epoch": 0.8664869211484792, "grad_norm": 0.75, "learning_rate": 1.211475361297269e-05, "loss": 0.3174, "step": 11449 }, { "epoch": 0.8665626034719266, "grad_norm": 0.79296875, "learning_rate": 1.2113589456222488e-05, "loss": 0.3419, "step": 11450 }, { "epoch": 0.8666382857953739, "grad_norm": 0.7265625, "learning_rate": 1.211242526948739e-05, "loss": 0.2907, "step": 11451 }, { "epoch": 0.8667139681188213, "grad_norm": 0.71484375, "learning_rate": 1.2111261052783916e-05, "loss": 0.2827, "step": 11452 }, { "epoch": 0.8667896504422685, "grad_norm": 0.7421875, "learning_rate": 1.2110096806128581e-05, "loss": 0.2879, "step": 11453 }, { "epoch": 0.8668653327657159, "grad_norm": 0.73046875, "learning_rate": 1.21089325295379e-05, "loss": 0.2617, "step": 11454 }, { "epoch": 0.8669410150891632, "grad_norm": 0.70703125, "learning_rate": 1.2107768223028395e-05, "loss": 0.2757, "step": 11455 }, { "epoch": 0.8670166974126106, "grad_norm": 0.75390625, "learning_rate": 1.210660388661658e-05, "loss": 0.3199, "step": 11456 }, { "epoch": 0.8670923797360579, "grad_norm": 0.765625, "learning_rate": 1.2105439520318973e-05, "loss": 0.2985, "step": 11457 }, { "epoch": 0.8671680620595053, "grad_norm": 0.703125, "learning_rate": 1.2104275124152094e-05, "loss": 0.261, "step": 11458 }, { "epoch": 0.8672437443829526, "grad_norm": 0.90625, "learning_rate": 1.2103110698132461e-05, "loss": 0.3939, "step": 11459 }, { "epoch": 0.8673194267063998, "grad_norm": 0.69921875, "learning_rate": 1.2101946242276594e-05, "loss": 0.2777, "step": 11460 }, { "epoch": 0.8673951090298472, "grad_norm": 0.75, "learning_rate": 1.210078175660101e-05, "loss": 0.2964, "step": 11461 }, { "epoch": 0.8674707913532945, "grad_norm": 1.0625, "learning_rate": 1.2099617241122237e-05, "loss": 0.3682, "step": 11462 }, { "epoch": 0.8675464736767419, "grad_norm": 0.76953125, "learning_rate": 1.2098452695856787e-05, "loss": 0.3353, "step": 11463 }, { "epoch": 0.8676221560001892, "grad_norm": 0.8203125, "learning_rate": 1.2097288120821182e-05, "loss": 0.3043, "step": 11464 }, { "epoch": 0.8676978383236366, "grad_norm": 0.73828125, "learning_rate": 1.209612351603195e-05, "loss": 0.3124, "step": 11465 }, { "epoch": 0.8677735206470839, "grad_norm": 0.73828125, "learning_rate": 1.2094958881505608e-05, "loss": 0.3173, "step": 11466 }, { "epoch": 0.8678492029705311, "grad_norm": 0.734375, "learning_rate": 1.2093794217258679e-05, "loss": 0.2964, "step": 11467 }, { "epoch": 0.8679248852939785, "grad_norm": 0.7265625, "learning_rate": 1.2092629523307686e-05, "loss": 0.2816, "step": 11468 }, { "epoch": 0.8680005676174258, "grad_norm": 0.8203125, "learning_rate": 1.2091464799669151e-05, "loss": 0.3738, "step": 11469 }, { "epoch": 0.8680762499408732, "grad_norm": 0.79296875, "learning_rate": 1.2090300046359597e-05, "loss": 0.3428, "step": 11470 }, { "epoch": 0.8681519322643205, "grad_norm": 0.70703125, "learning_rate": 1.2089135263395556e-05, "loss": 0.2888, "step": 11471 }, { "epoch": 0.8682276145877679, "grad_norm": 0.921875, "learning_rate": 1.208797045079354e-05, "loss": 0.3555, "step": 11472 }, { "epoch": 0.8683032969112152, "grad_norm": 0.828125, "learning_rate": 1.2086805608570082e-05, "loss": 0.3795, "step": 11473 }, { "epoch": 0.8683789792346625, "grad_norm": 0.71875, "learning_rate": 1.2085640736741708e-05, "loss": 0.2741, "step": 11474 }, { "epoch": 0.8684546615581098, "grad_norm": 0.79296875, "learning_rate": 1.2084475835324935e-05, "loss": 0.3335, "step": 11475 }, { "epoch": 0.8685303438815571, "grad_norm": 0.74609375, "learning_rate": 1.2083310904336302e-05, "loss": 0.2865, "step": 11476 }, { "epoch": 0.8686060262050045, "grad_norm": 0.70703125, "learning_rate": 1.2082145943792327e-05, "loss": 0.2755, "step": 11477 }, { "epoch": 0.8686817085284518, "grad_norm": 0.75, "learning_rate": 1.2080980953709536e-05, "loss": 0.3211, "step": 11478 }, { "epoch": 0.8687573908518992, "grad_norm": 0.71875, "learning_rate": 1.2079815934104462e-05, "loss": 0.2817, "step": 11479 }, { "epoch": 0.8688330731753465, "grad_norm": 0.72265625, "learning_rate": 1.207865088499363e-05, "loss": 0.2869, "step": 11480 }, { "epoch": 0.8689087554987938, "grad_norm": 0.69921875, "learning_rate": 1.2077485806393565e-05, "loss": 0.2755, "step": 11481 }, { "epoch": 0.8689844378222411, "grad_norm": 0.78515625, "learning_rate": 1.20763206983208e-05, "loss": 0.3138, "step": 11482 }, { "epoch": 0.8690601201456885, "grad_norm": 0.75390625, "learning_rate": 1.2075155560791865e-05, "loss": 0.3244, "step": 11483 }, { "epoch": 0.8691358024691358, "grad_norm": 0.73046875, "learning_rate": 1.2073990393823282e-05, "loss": 0.3053, "step": 11484 }, { "epoch": 0.8692114847925831, "grad_norm": 0.71484375, "learning_rate": 1.2072825197431592e-05, "loss": 0.2911, "step": 11485 }, { "epoch": 0.8692871671160305, "grad_norm": 0.80078125, "learning_rate": 1.2071659971633318e-05, "loss": 0.3259, "step": 11486 }, { "epoch": 0.8693628494394778, "grad_norm": 0.76953125, "learning_rate": 1.207049471644499e-05, "loss": 0.3248, "step": 11487 }, { "epoch": 0.8694385317629251, "grad_norm": 0.75390625, "learning_rate": 1.2069329431883144e-05, "loss": 0.292, "step": 11488 }, { "epoch": 0.8695142140863724, "grad_norm": 0.828125, "learning_rate": 1.2068164117964307e-05, "loss": 0.3883, "step": 11489 }, { "epoch": 0.8695898964098198, "grad_norm": 0.734375, "learning_rate": 1.2066998774705012e-05, "loss": 0.2996, "step": 11490 }, { "epoch": 0.8696655787332671, "grad_norm": 0.86328125, "learning_rate": 1.2065833402121796e-05, "loss": 0.319, "step": 11491 }, { "epoch": 0.8697412610567145, "grad_norm": 0.6953125, "learning_rate": 1.2064668000231185e-05, "loss": 0.2696, "step": 11492 }, { "epoch": 0.8698169433801618, "grad_norm": 0.75, "learning_rate": 1.2063502569049715e-05, "loss": 0.3164, "step": 11493 }, { "epoch": 0.8698926257036091, "grad_norm": 0.7578125, "learning_rate": 1.2062337108593924e-05, "loss": 0.2956, "step": 11494 }, { "epoch": 0.8699683080270564, "grad_norm": 0.79296875, "learning_rate": 1.2061171618880338e-05, "loss": 0.3068, "step": 11495 }, { "epoch": 0.8700439903505037, "grad_norm": 0.75390625, "learning_rate": 1.2060006099925496e-05, "loss": 0.3138, "step": 11496 }, { "epoch": 0.8701196726739511, "grad_norm": 0.80078125, "learning_rate": 1.2058840551745934e-05, "loss": 0.3368, "step": 11497 }, { "epoch": 0.8701953549973984, "grad_norm": 0.7578125, "learning_rate": 1.2057674974358185e-05, "loss": 0.2995, "step": 11498 }, { "epoch": 0.8702710373208458, "grad_norm": 0.734375, "learning_rate": 1.2056509367778784e-05, "loss": 0.2736, "step": 11499 }, { "epoch": 0.8703467196442931, "grad_norm": 0.78515625, "learning_rate": 1.2055343732024271e-05, "loss": 0.3285, "step": 11500 }, { "epoch": 0.8704224019677405, "grad_norm": 0.7109375, "learning_rate": 1.205417806711118e-05, "loss": 0.2782, "step": 11501 }, { "epoch": 0.8704980842911877, "grad_norm": 0.73046875, "learning_rate": 1.2053012373056043e-05, "loss": 0.2844, "step": 11502 }, { "epoch": 0.870573766614635, "grad_norm": 0.71484375, "learning_rate": 1.2051846649875407e-05, "loss": 0.2978, "step": 11503 }, { "epoch": 0.8706494489380824, "grad_norm": 0.7734375, "learning_rate": 1.2050680897585805e-05, "loss": 0.3539, "step": 11504 }, { "epoch": 0.8707251312615297, "grad_norm": 0.76953125, "learning_rate": 1.2049515116203776e-05, "loss": 0.308, "step": 11505 }, { "epoch": 0.8708008135849771, "grad_norm": 0.69921875, "learning_rate": 1.2048349305745856e-05, "loss": 0.2737, "step": 11506 }, { "epoch": 0.8708764959084244, "grad_norm": 0.73828125, "learning_rate": 1.2047183466228587e-05, "loss": 0.3233, "step": 11507 }, { "epoch": 0.8709521782318718, "grad_norm": 0.765625, "learning_rate": 1.2046017597668507e-05, "loss": 0.3225, "step": 11508 }, { "epoch": 0.871027860555319, "grad_norm": 0.78125, "learning_rate": 1.2044851700082156e-05, "loss": 0.3112, "step": 11509 }, { "epoch": 0.8711035428787663, "grad_norm": 0.796875, "learning_rate": 1.2043685773486073e-05, "loss": 0.3386, "step": 11510 }, { "epoch": 0.8711792252022137, "grad_norm": 0.7890625, "learning_rate": 1.2042519817896805e-05, "loss": 0.3152, "step": 11511 }, { "epoch": 0.871254907525661, "grad_norm": 0.75390625, "learning_rate": 1.2041353833330884e-05, "loss": 0.3264, "step": 11512 }, { "epoch": 0.8713305898491084, "grad_norm": 0.87109375, "learning_rate": 1.2040187819804858e-05, "loss": 0.2754, "step": 11513 }, { "epoch": 0.8714062721725557, "grad_norm": 0.7421875, "learning_rate": 1.2039021777335267e-05, "loss": 0.3258, "step": 11514 }, { "epoch": 0.8714819544960031, "grad_norm": 0.703125, "learning_rate": 1.2037855705938653e-05, "loss": 0.2894, "step": 11515 }, { "epoch": 0.8715576368194503, "grad_norm": 0.9296875, "learning_rate": 1.2036689605631557e-05, "loss": 0.3066, "step": 11516 }, { "epoch": 0.8716333191428977, "grad_norm": 0.82421875, "learning_rate": 1.2035523476430523e-05, "loss": 0.3082, "step": 11517 }, { "epoch": 0.871709001466345, "grad_norm": 0.7265625, "learning_rate": 1.2034357318352099e-05, "loss": 0.2822, "step": 11518 }, { "epoch": 0.8717846837897923, "grad_norm": 0.81640625, "learning_rate": 1.2033191131412822e-05, "loss": 0.3629, "step": 11519 }, { "epoch": 0.8718603661132397, "grad_norm": 0.70703125, "learning_rate": 1.2032024915629241e-05, "loss": 0.2766, "step": 11520 }, { "epoch": 0.871936048436687, "grad_norm": 0.7890625, "learning_rate": 1.20308586710179e-05, "loss": 0.3164, "step": 11521 }, { "epoch": 0.8720117307601344, "grad_norm": 0.7421875, "learning_rate": 1.2029692397595345e-05, "loss": 0.3038, "step": 11522 }, { "epoch": 0.8720874130835816, "grad_norm": 0.703125, "learning_rate": 1.2028526095378118e-05, "loss": 0.2831, "step": 11523 }, { "epoch": 0.872163095407029, "grad_norm": 0.69921875, "learning_rate": 1.202735976438277e-05, "loss": 0.2945, "step": 11524 }, { "epoch": 0.8722387777304763, "grad_norm": 0.72265625, "learning_rate": 1.2026193404625839e-05, "loss": 0.2884, "step": 11525 }, { "epoch": 0.8723144600539237, "grad_norm": 0.75390625, "learning_rate": 1.202502701612388e-05, "loss": 0.3209, "step": 11526 }, { "epoch": 0.872390142377371, "grad_norm": 0.74609375, "learning_rate": 1.2023860598893442e-05, "loss": 0.2949, "step": 11527 }, { "epoch": 0.8724658247008183, "grad_norm": 0.8203125, "learning_rate": 1.2022694152951065e-05, "loss": 0.3448, "step": 11528 }, { "epoch": 0.8725415070242657, "grad_norm": 0.73046875, "learning_rate": 1.2021527678313299e-05, "loss": 0.2927, "step": 11529 }, { "epoch": 0.8726171893477129, "grad_norm": 0.78515625, "learning_rate": 1.2020361174996694e-05, "loss": 0.3485, "step": 11530 }, { "epoch": 0.8726928716711603, "grad_norm": 0.75390625, "learning_rate": 1.20191946430178e-05, "loss": 0.2883, "step": 11531 }, { "epoch": 0.8727685539946076, "grad_norm": 0.765625, "learning_rate": 1.2018028082393167e-05, "loss": 0.3265, "step": 11532 }, { "epoch": 0.872844236318055, "grad_norm": 0.73046875, "learning_rate": 1.201686149313934e-05, "loss": 0.2869, "step": 11533 }, { "epoch": 0.8729199186415023, "grad_norm": 0.796875, "learning_rate": 1.2015694875272873e-05, "loss": 0.3117, "step": 11534 }, { "epoch": 0.8729956009649497, "grad_norm": 0.76171875, "learning_rate": 1.2014528228810311e-05, "loss": 0.334, "step": 11535 }, { "epoch": 0.873071283288397, "grad_norm": 0.81640625, "learning_rate": 1.2013361553768217e-05, "loss": 0.3825, "step": 11536 }, { "epoch": 0.8731469656118442, "grad_norm": 0.765625, "learning_rate": 1.2012194850163127e-05, "loss": 0.3346, "step": 11537 }, { "epoch": 0.8732226479352916, "grad_norm": 0.8203125, "learning_rate": 1.2011028118011602e-05, "loss": 0.3106, "step": 11538 }, { "epoch": 0.8732983302587389, "grad_norm": 0.8046875, "learning_rate": 1.2009861357330195e-05, "loss": 0.3524, "step": 11539 }, { "epoch": 0.8733740125821863, "grad_norm": 0.7265625, "learning_rate": 1.2008694568135451e-05, "loss": 0.2882, "step": 11540 }, { "epoch": 0.8734496949056336, "grad_norm": 0.7578125, "learning_rate": 1.2007527750443932e-05, "loss": 0.3054, "step": 11541 }, { "epoch": 0.873525377229081, "grad_norm": 0.71875, "learning_rate": 1.2006360904272186e-05, "loss": 0.3018, "step": 11542 }, { "epoch": 0.8736010595525283, "grad_norm": 0.703125, "learning_rate": 1.2005194029636761e-05, "loss": 0.2661, "step": 11543 }, { "epoch": 0.8736767418759755, "grad_norm": 0.80078125, "learning_rate": 1.2004027126554225e-05, "loss": 0.3354, "step": 11544 }, { "epoch": 0.8737524241994229, "grad_norm": 0.76171875, "learning_rate": 1.2002860195041124e-05, "loss": 0.3189, "step": 11545 }, { "epoch": 0.8738281065228702, "grad_norm": 0.73828125, "learning_rate": 1.2001693235114013e-05, "loss": 0.2836, "step": 11546 }, { "epoch": 0.8739037888463176, "grad_norm": 0.78515625, "learning_rate": 1.2000526246789449e-05, "loss": 0.321, "step": 11547 }, { "epoch": 0.8739794711697649, "grad_norm": 0.71875, "learning_rate": 1.1999359230083986e-05, "loss": 0.3046, "step": 11548 }, { "epoch": 0.8740551534932123, "grad_norm": 0.76953125, "learning_rate": 1.199819218501418e-05, "loss": 0.3125, "step": 11549 }, { "epoch": 0.8741308358166596, "grad_norm": 1.296875, "learning_rate": 1.1997025111596595e-05, "loss": 0.361, "step": 11550 }, { "epoch": 0.8742065181401069, "grad_norm": 0.75390625, "learning_rate": 1.1995858009847776e-05, "loss": 0.3187, "step": 11551 }, { "epoch": 0.8742822004635542, "grad_norm": 0.78125, "learning_rate": 1.1994690879784287e-05, "loss": 0.3446, "step": 11552 }, { "epoch": 0.8743578827870016, "grad_norm": 0.69140625, "learning_rate": 1.1993523721422687e-05, "loss": 0.2672, "step": 11553 }, { "epoch": 0.8744335651104489, "grad_norm": 0.7109375, "learning_rate": 1.199235653477953e-05, "loss": 0.2735, "step": 11554 }, { "epoch": 0.8745092474338962, "grad_norm": 0.76171875, "learning_rate": 1.1991189319871377e-05, "loss": 0.3278, "step": 11555 }, { "epoch": 0.8745849297573436, "grad_norm": 1.125, "learning_rate": 1.1990022076714787e-05, "loss": 0.3892, "step": 11556 }, { "epoch": 0.8746606120807909, "grad_norm": 0.77734375, "learning_rate": 1.198885480532632e-05, "loss": 0.3528, "step": 11557 }, { "epoch": 0.8747362944042382, "grad_norm": 0.7109375, "learning_rate": 1.1987687505722532e-05, "loss": 0.2785, "step": 11558 }, { "epoch": 0.8748119767276855, "grad_norm": 0.7734375, "learning_rate": 1.1986520177919986e-05, "loss": 0.3449, "step": 11559 }, { "epoch": 0.8748876590511329, "grad_norm": 0.75390625, "learning_rate": 1.1985352821935243e-05, "loss": 0.3199, "step": 11560 }, { "epoch": 0.8749633413745802, "grad_norm": 0.7578125, "learning_rate": 1.1984185437784862e-05, "loss": 0.2964, "step": 11561 }, { "epoch": 0.8750390236980276, "grad_norm": 0.7265625, "learning_rate": 1.1983018025485408e-05, "loss": 0.3088, "step": 11562 }, { "epoch": 0.8751147060214749, "grad_norm": 0.765625, "learning_rate": 1.198185058505344e-05, "loss": 0.3264, "step": 11563 }, { "epoch": 0.8751903883449222, "grad_norm": 0.74609375, "learning_rate": 1.198068311650552e-05, "loss": 0.3209, "step": 11564 }, { "epoch": 0.8752660706683695, "grad_norm": 0.89453125, "learning_rate": 1.1979515619858213e-05, "loss": 0.3115, "step": 11565 }, { "epoch": 0.8753417529918168, "grad_norm": 0.734375, "learning_rate": 1.1978348095128076e-05, "loss": 0.3148, "step": 11566 }, { "epoch": 0.8754174353152642, "grad_norm": 0.7734375, "learning_rate": 1.1977180542331677e-05, "loss": 0.2999, "step": 11567 }, { "epoch": 0.8754931176387115, "grad_norm": 0.76953125, "learning_rate": 1.1976012961485582e-05, "loss": 0.3173, "step": 11568 }, { "epoch": 0.8755687999621589, "grad_norm": 0.73828125, "learning_rate": 1.197484535260635e-05, "loss": 0.2989, "step": 11569 }, { "epoch": 0.8756444822856062, "grad_norm": 0.73828125, "learning_rate": 1.1973677715710547e-05, "loss": 0.3194, "step": 11570 }, { "epoch": 0.8757201646090536, "grad_norm": 0.78515625, "learning_rate": 1.1972510050814741e-05, "loss": 0.3282, "step": 11571 }, { "epoch": 0.8757958469325008, "grad_norm": 0.734375, "learning_rate": 1.1971342357935491e-05, "loss": 0.3035, "step": 11572 }, { "epoch": 0.8758715292559481, "grad_norm": 0.73828125, "learning_rate": 1.1970174637089372e-05, "loss": 0.2786, "step": 11573 }, { "epoch": 0.8759472115793955, "grad_norm": 0.7890625, "learning_rate": 1.1969006888292945e-05, "loss": 0.3602, "step": 11574 }, { "epoch": 0.8760228939028428, "grad_norm": 0.82421875, "learning_rate": 1.1967839111562771e-05, "loss": 0.366, "step": 11575 }, { "epoch": 0.8760985762262902, "grad_norm": 0.79296875, "learning_rate": 1.1966671306915425e-05, "loss": 0.3214, "step": 11576 }, { "epoch": 0.8761742585497375, "grad_norm": 0.828125, "learning_rate": 1.196550347436747e-05, "loss": 0.3593, "step": 11577 }, { "epoch": 0.8762499408731848, "grad_norm": 0.71875, "learning_rate": 1.1964335613935478e-05, "loss": 0.2804, "step": 11578 }, { "epoch": 0.8763256231966321, "grad_norm": 0.734375, "learning_rate": 1.1963167725636013e-05, "loss": 0.2964, "step": 11579 }, { "epoch": 0.8764013055200794, "grad_norm": 0.75, "learning_rate": 1.1961999809485648e-05, "loss": 0.3307, "step": 11580 }, { "epoch": 0.8764769878435268, "grad_norm": 1.0234375, "learning_rate": 1.1960831865500941e-05, "loss": 0.3556, "step": 11581 }, { "epoch": 0.8765526701669741, "grad_norm": 0.79296875, "learning_rate": 1.1959663893698475e-05, "loss": 0.3621, "step": 11582 }, { "epoch": 0.8766283524904215, "grad_norm": 0.70703125, "learning_rate": 1.1958495894094815e-05, "loss": 0.2739, "step": 11583 }, { "epoch": 0.8767040348138688, "grad_norm": 0.7578125, "learning_rate": 1.1957327866706528e-05, "loss": 0.3176, "step": 11584 }, { "epoch": 0.8767797171373161, "grad_norm": 0.80078125, "learning_rate": 1.1956159811550185e-05, "loss": 0.3248, "step": 11585 }, { "epoch": 0.8768553994607634, "grad_norm": 0.765625, "learning_rate": 1.1954991728642358e-05, "loss": 0.3249, "step": 11586 }, { "epoch": 0.8769310817842108, "grad_norm": 0.75390625, "learning_rate": 1.1953823617999618e-05, "loss": 0.3291, "step": 11587 }, { "epoch": 0.8770067641076581, "grad_norm": 0.81640625, "learning_rate": 1.1952655479638542e-05, "loss": 0.343, "step": 11588 }, { "epoch": 0.8770824464311054, "grad_norm": 0.796875, "learning_rate": 1.1951487313575693e-05, "loss": 0.3515, "step": 11589 }, { "epoch": 0.8771581287545528, "grad_norm": 0.9765625, "learning_rate": 1.1950319119827647e-05, "loss": 0.3089, "step": 11590 }, { "epoch": 0.8772338110780001, "grad_norm": 0.8046875, "learning_rate": 1.194915089841098e-05, "loss": 0.3385, "step": 11591 }, { "epoch": 0.8773094934014474, "grad_norm": 0.78125, "learning_rate": 1.1947982649342263e-05, "loss": 0.3275, "step": 11592 }, { "epoch": 0.8773851757248947, "grad_norm": 0.8046875, "learning_rate": 1.1946814372638066e-05, "loss": 0.3608, "step": 11593 }, { "epoch": 0.8774608580483421, "grad_norm": 0.81640625, "learning_rate": 1.194564606831497e-05, "loss": 0.3698, "step": 11594 }, { "epoch": 0.8775365403717894, "grad_norm": 0.66015625, "learning_rate": 1.1944477736389542e-05, "loss": 0.253, "step": 11595 }, { "epoch": 0.8776122226952368, "grad_norm": 0.72265625, "learning_rate": 1.1943309376878361e-05, "loss": 0.2883, "step": 11596 }, { "epoch": 0.8776879050186841, "grad_norm": 0.7578125, "learning_rate": 1.1942140989798006e-05, "loss": 0.307, "step": 11597 }, { "epoch": 0.8777635873421314, "grad_norm": 0.74609375, "learning_rate": 1.1940972575165044e-05, "loss": 0.3118, "step": 11598 }, { "epoch": 0.8778392696655787, "grad_norm": 0.79296875, "learning_rate": 1.1939804132996056e-05, "loss": 0.3284, "step": 11599 }, { "epoch": 0.877914951989026, "grad_norm": 0.703125, "learning_rate": 1.1938635663307617e-05, "loss": 0.3078, "step": 11600 }, { "epoch": 0.8779906343124734, "grad_norm": 0.81640625, "learning_rate": 1.1937467166116305e-05, "loss": 0.3446, "step": 11601 }, { "epoch": 0.8780663166359207, "grad_norm": 0.8046875, "learning_rate": 1.1936298641438697e-05, "loss": 0.3359, "step": 11602 }, { "epoch": 0.8781419989593681, "grad_norm": 0.76953125, "learning_rate": 1.193513008929137e-05, "loss": 0.3488, "step": 11603 }, { "epoch": 0.8782176812828154, "grad_norm": 1.0390625, "learning_rate": 1.1933961509690902e-05, "loss": 0.329, "step": 11604 }, { "epoch": 0.8782933636062628, "grad_norm": 1.0625, "learning_rate": 1.1932792902653867e-05, "loss": 0.366, "step": 11605 }, { "epoch": 0.87836904592971, "grad_norm": 0.79296875, "learning_rate": 1.1931624268196855e-05, "loss": 0.3306, "step": 11606 }, { "epoch": 0.8784447282531573, "grad_norm": 0.828125, "learning_rate": 1.1930455606336435e-05, "loss": 0.319, "step": 11607 }, { "epoch": 0.8785204105766047, "grad_norm": 0.80078125, "learning_rate": 1.1929286917089187e-05, "loss": 0.3472, "step": 11608 }, { "epoch": 0.878596092900052, "grad_norm": 0.74609375, "learning_rate": 1.1928118200471695e-05, "loss": 0.3088, "step": 11609 }, { "epoch": 0.8786717752234994, "grad_norm": 0.765625, "learning_rate": 1.1926949456500538e-05, "loss": 0.33, "step": 11610 }, { "epoch": 0.8787474575469467, "grad_norm": 1.0390625, "learning_rate": 1.1925780685192297e-05, "loss": 0.346, "step": 11611 }, { "epoch": 0.8788231398703941, "grad_norm": 0.74609375, "learning_rate": 1.192461188656355e-05, "loss": 0.3103, "step": 11612 }, { "epoch": 0.8788988221938413, "grad_norm": 0.86328125, "learning_rate": 1.1923443060630884e-05, "loss": 0.356, "step": 11613 }, { "epoch": 0.8789745045172886, "grad_norm": 0.734375, "learning_rate": 1.1922274207410874e-05, "loss": 0.2776, "step": 11614 }, { "epoch": 0.879050186840736, "grad_norm": 0.734375, "learning_rate": 1.1921105326920107e-05, "loss": 0.2688, "step": 11615 }, { "epoch": 0.8791258691641833, "grad_norm": 0.7734375, "learning_rate": 1.1919936419175161e-05, "loss": 0.3204, "step": 11616 }, { "epoch": 0.8792015514876307, "grad_norm": 0.828125, "learning_rate": 1.1918767484192628e-05, "loss": 0.3588, "step": 11617 }, { "epoch": 0.879277233811078, "grad_norm": 0.72265625, "learning_rate": 1.1917598521989082e-05, "loss": 0.2896, "step": 11618 }, { "epoch": 0.8793529161345254, "grad_norm": 0.77734375, "learning_rate": 1.1916429532581111e-05, "loss": 0.3258, "step": 11619 }, { "epoch": 0.8794285984579726, "grad_norm": 1.0390625, "learning_rate": 1.19152605159853e-05, "loss": 0.4109, "step": 11620 }, { "epoch": 0.87950428078142, "grad_norm": 0.75390625, "learning_rate": 1.1914091472218229e-05, "loss": 0.3105, "step": 11621 }, { "epoch": 0.8795799631048673, "grad_norm": 0.6953125, "learning_rate": 1.1912922401296488e-05, "loss": 0.2712, "step": 11622 }, { "epoch": 0.8796556454283146, "grad_norm": 0.7578125, "learning_rate": 1.1911753303236656e-05, "loss": 0.3519, "step": 11623 }, { "epoch": 0.879731327751762, "grad_norm": 0.76953125, "learning_rate": 1.1910584178055328e-05, "loss": 0.33, "step": 11624 }, { "epoch": 0.8798070100752093, "grad_norm": 0.71875, "learning_rate": 1.1909415025769083e-05, "loss": 0.2916, "step": 11625 }, { "epoch": 0.8798826923986567, "grad_norm": 0.81640625, "learning_rate": 1.1908245846394508e-05, "loss": 0.3328, "step": 11626 }, { "epoch": 0.8799583747221039, "grad_norm": 0.95703125, "learning_rate": 1.1907076639948194e-05, "loss": 0.3165, "step": 11627 }, { "epoch": 0.8800340570455513, "grad_norm": 0.7109375, "learning_rate": 1.190590740644672e-05, "loss": 0.2911, "step": 11628 }, { "epoch": 0.8801097393689986, "grad_norm": 0.81640625, "learning_rate": 1.1904738145906682e-05, "loss": 0.3639, "step": 11629 }, { "epoch": 0.880185421692446, "grad_norm": 0.8046875, "learning_rate": 1.1903568858344667e-05, "loss": 0.3035, "step": 11630 }, { "epoch": 0.8802611040158933, "grad_norm": 0.734375, "learning_rate": 1.1902399543777256e-05, "loss": 0.309, "step": 11631 }, { "epoch": 0.8803367863393406, "grad_norm": 0.765625, "learning_rate": 1.1901230202221048e-05, "loss": 0.3292, "step": 11632 }, { "epoch": 0.880412468662788, "grad_norm": 0.87890625, "learning_rate": 1.1900060833692624e-05, "loss": 0.3489, "step": 11633 }, { "epoch": 0.8804881509862352, "grad_norm": 0.75390625, "learning_rate": 1.1898891438208578e-05, "loss": 0.3217, "step": 11634 }, { "epoch": 0.8805638333096826, "grad_norm": 0.71875, "learning_rate": 1.1897722015785496e-05, "loss": 0.2784, "step": 11635 }, { "epoch": 0.8806395156331299, "grad_norm": 0.78125, "learning_rate": 1.1896552566439972e-05, "loss": 0.3214, "step": 11636 }, { "epoch": 0.8807151979565773, "grad_norm": 0.7890625, "learning_rate": 1.1895383090188592e-05, "loss": 0.3588, "step": 11637 }, { "epoch": 0.8807908802800246, "grad_norm": 0.78125, "learning_rate": 1.1894213587047955e-05, "loss": 0.3266, "step": 11638 }, { "epoch": 0.880866562603472, "grad_norm": 0.73828125, "learning_rate": 1.1893044057034648e-05, "loss": 0.3033, "step": 11639 }, { "epoch": 0.8809422449269193, "grad_norm": 1.0546875, "learning_rate": 1.1891874500165259e-05, "loss": 0.3381, "step": 11640 }, { "epoch": 0.8810179272503665, "grad_norm": 0.7109375, "learning_rate": 1.1890704916456386e-05, "loss": 0.2914, "step": 11641 }, { "epoch": 0.8810936095738139, "grad_norm": 0.79296875, "learning_rate": 1.1889535305924619e-05, "loss": 0.3535, "step": 11642 }, { "epoch": 0.8811692918972612, "grad_norm": 0.796875, "learning_rate": 1.1888365668586548e-05, "loss": 0.3098, "step": 11643 }, { "epoch": 0.8812449742207086, "grad_norm": 0.78125, "learning_rate": 1.1887196004458774e-05, "loss": 0.3177, "step": 11644 }, { "epoch": 0.8813206565441559, "grad_norm": 0.76171875, "learning_rate": 1.1886026313557886e-05, "loss": 0.3237, "step": 11645 }, { "epoch": 0.8813963388676033, "grad_norm": 0.7578125, "learning_rate": 1.1884856595900474e-05, "loss": 0.31, "step": 11646 }, { "epoch": 0.8814720211910506, "grad_norm": 0.765625, "learning_rate": 1.188368685150314e-05, "loss": 0.3159, "step": 11647 }, { "epoch": 0.8815477035144978, "grad_norm": 0.6875, "learning_rate": 1.1882517080382476e-05, "loss": 0.2586, "step": 11648 }, { "epoch": 0.8816233858379452, "grad_norm": 0.7890625, "learning_rate": 1.1881347282555073e-05, "loss": 0.3506, "step": 11649 }, { "epoch": 0.8816990681613925, "grad_norm": 0.74609375, "learning_rate": 1.1880177458037534e-05, "loss": 0.3075, "step": 11650 }, { "epoch": 0.8817747504848399, "grad_norm": 0.7734375, "learning_rate": 1.1879007606846448e-05, "loss": 0.3289, "step": 11651 }, { "epoch": 0.8818504328082872, "grad_norm": 0.69921875, "learning_rate": 1.1877837728998416e-05, "loss": 0.2622, "step": 11652 }, { "epoch": 0.8819261151317346, "grad_norm": 0.73046875, "learning_rate": 1.1876667824510035e-05, "loss": 0.2909, "step": 11653 }, { "epoch": 0.8820017974551819, "grad_norm": 0.78515625, "learning_rate": 1.18754978933979e-05, "loss": 0.3288, "step": 11654 }, { "epoch": 0.8820774797786292, "grad_norm": 0.7890625, "learning_rate": 1.1874327935678607e-05, "loss": 0.3255, "step": 11655 }, { "epoch": 0.8821531621020765, "grad_norm": 0.6796875, "learning_rate": 1.1873157951368757e-05, "loss": 0.2301, "step": 11656 }, { "epoch": 0.8822288444255239, "grad_norm": 0.9453125, "learning_rate": 1.1871987940484946e-05, "loss": 0.3478, "step": 11657 }, { "epoch": 0.8823045267489712, "grad_norm": 0.71875, "learning_rate": 1.1870817903043775e-05, "loss": 0.2792, "step": 11658 }, { "epoch": 0.8823802090724185, "grad_norm": 0.73046875, "learning_rate": 1.1869647839061841e-05, "loss": 0.2794, "step": 11659 }, { "epoch": 0.8824558913958659, "grad_norm": 0.73828125, "learning_rate": 1.1868477748555745e-05, "loss": 0.2848, "step": 11660 }, { "epoch": 0.8825315737193132, "grad_norm": 0.7578125, "learning_rate": 1.1867307631542082e-05, "loss": 0.3338, "step": 11661 }, { "epoch": 0.8826072560427605, "grad_norm": 0.74609375, "learning_rate": 1.1866137488037461e-05, "loss": 0.307, "step": 11662 }, { "epoch": 0.8826829383662078, "grad_norm": 0.7578125, "learning_rate": 1.1864967318058473e-05, "loss": 0.3151, "step": 11663 }, { "epoch": 0.8827586206896552, "grad_norm": 0.98828125, "learning_rate": 1.1863797121621725e-05, "loss": 0.3644, "step": 11664 }, { "epoch": 0.8828343030131025, "grad_norm": 0.7578125, "learning_rate": 1.1862626898743819e-05, "loss": 0.307, "step": 11665 }, { "epoch": 0.8829099853365499, "grad_norm": 0.78125, "learning_rate": 1.1861456649441351e-05, "loss": 0.3185, "step": 11666 }, { "epoch": 0.8829856676599972, "grad_norm": 0.74609375, "learning_rate": 1.1860286373730927e-05, "loss": 0.2911, "step": 11667 }, { "epoch": 0.8830613499834445, "grad_norm": 0.7578125, "learning_rate": 1.1859116071629148e-05, "loss": 0.341, "step": 11668 }, { "epoch": 0.8831370323068918, "grad_norm": 0.7109375, "learning_rate": 1.1857945743152617e-05, "loss": 0.2748, "step": 11669 }, { "epoch": 0.8832127146303391, "grad_norm": 0.76953125, "learning_rate": 1.1856775388317936e-05, "loss": 0.3063, "step": 11670 }, { "epoch": 0.8832883969537865, "grad_norm": 0.82421875, "learning_rate": 1.1855605007141713e-05, "loss": 0.3798, "step": 11671 }, { "epoch": 0.8833640792772338, "grad_norm": 0.76171875, "learning_rate": 1.1854434599640547e-05, "loss": 0.3258, "step": 11672 }, { "epoch": 0.8834397616006812, "grad_norm": 0.71484375, "learning_rate": 1.1853264165831044e-05, "loss": 0.2915, "step": 11673 }, { "epoch": 0.8835154439241285, "grad_norm": 0.7578125, "learning_rate": 1.1852093705729809e-05, "loss": 0.3276, "step": 11674 }, { "epoch": 0.8835911262475759, "grad_norm": 0.765625, "learning_rate": 1.1850923219353446e-05, "loss": 0.3444, "step": 11675 }, { "epoch": 0.8836668085710231, "grad_norm": 0.7421875, "learning_rate": 1.184975270671856e-05, "loss": 0.2929, "step": 11676 }, { "epoch": 0.8837424908944704, "grad_norm": 0.7109375, "learning_rate": 1.1848582167841758e-05, "loss": 0.2967, "step": 11677 }, { "epoch": 0.8838181732179178, "grad_norm": 0.69921875, "learning_rate": 1.1847411602739644e-05, "loss": 0.2745, "step": 11678 }, { "epoch": 0.8838938555413651, "grad_norm": 0.75, "learning_rate": 1.184624101142883e-05, "loss": 0.2864, "step": 11679 }, { "epoch": 0.8839695378648125, "grad_norm": 0.90234375, "learning_rate": 1.1845070393925917e-05, "loss": 0.3895, "step": 11680 }, { "epoch": 0.8840452201882598, "grad_norm": 0.796875, "learning_rate": 1.1843899750247514e-05, "loss": 0.327, "step": 11681 }, { "epoch": 0.8841209025117072, "grad_norm": 0.8359375, "learning_rate": 1.1842729080410229e-05, "loss": 0.3541, "step": 11682 }, { "epoch": 0.8841965848351544, "grad_norm": 0.85546875, "learning_rate": 1.184155838443067e-05, "loss": 0.3619, "step": 11683 }, { "epoch": 0.8842722671586017, "grad_norm": 0.7421875, "learning_rate": 1.1840387662325443e-05, "loss": 0.3165, "step": 11684 }, { "epoch": 0.8843479494820491, "grad_norm": 0.79296875, "learning_rate": 1.1839216914111162e-05, "loss": 0.359, "step": 11685 }, { "epoch": 0.8844236318054964, "grad_norm": 0.76171875, "learning_rate": 1.1838046139804433e-05, "loss": 0.3356, "step": 11686 }, { "epoch": 0.8844993141289438, "grad_norm": 0.73828125, "learning_rate": 1.1836875339421862e-05, "loss": 0.2935, "step": 11687 }, { "epoch": 0.8845749964523911, "grad_norm": 0.6953125, "learning_rate": 1.1835704512980067e-05, "loss": 0.2882, "step": 11688 }, { "epoch": 0.8846506787758385, "grad_norm": 0.734375, "learning_rate": 1.1834533660495649e-05, "loss": 0.3083, "step": 11689 }, { "epoch": 0.8847263610992857, "grad_norm": 0.78515625, "learning_rate": 1.1833362781985225e-05, "loss": 0.3205, "step": 11690 }, { "epoch": 0.884802043422733, "grad_norm": 0.796875, "learning_rate": 1.1832191877465401e-05, "loss": 0.3357, "step": 11691 }, { "epoch": 0.8848777257461804, "grad_norm": 0.8203125, "learning_rate": 1.1831020946952796e-05, "loss": 0.3363, "step": 11692 }, { "epoch": 0.8849534080696277, "grad_norm": 0.73828125, "learning_rate": 1.1829849990464012e-05, "loss": 0.274, "step": 11693 }, { "epoch": 0.8850290903930751, "grad_norm": 0.73828125, "learning_rate": 1.1828679008015666e-05, "loss": 0.3013, "step": 11694 }, { "epoch": 0.8851047727165224, "grad_norm": 0.77734375, "learning_rate": 1.1827507999624375e-05, "loss": 0.3299, "step": 11695 }, { "epoch": 0.8851804550399698, "grad_norm": 0.76953125, "learning_rate": 1.1826336965306742e-05, "loss": 0.3198, "step": 11696 }, { "epoch": 0.885256137363417, "grad_norm": 0.81640625, "learning_rate": 1.1825165905079387e-05, "loss": 0.3513, "step": 11697 }, { "epoch": 0.8853318196868644, "grad_norm": 0.74609375, "learning_rate": 1.1823994818958922e-05, "loss": 0.3138, "step": 11698 }, { "epoch": 0.8854075020103117, "grad_norm": 0.75390625, "learning_rate": 1.1822823706961957e-05, "loss": 0.2867, "step": 11699 }, { "epoch": 0.885483184333759, "grad_norm": 0.75, "learning_rate": 1.1821652569105116e-05, "loss": 0.2937, "step": 11700 }, { "epoch": 0.8855588666572064, "grad_norm": 0.68359375, "learning_rate": 1.1820481405405003e-05, "loss": 0.2588, "step": 11701 }, { "epoch": 0.8856345489806537, "grad_norm": 0.77734375, "learning_rate": 1.1819310215878235e-05, "loss": 0.3261, "step": 11702 }, { "epoch": 0.885710231304101, "grad_norm": 0.75, "learning_rate": 1.1818139000541433e-05, "loss": 0.3133, "step": 11703 }, { "epoch": 0.8857859136275483, "grad_norm": 0.796875, "learning_rate": 1.1816967759411211e-05, "loss": 0.3383, "step": 11704 }, { "epoch": 0.8858615959509957, "grad_norm": 0.74609375, "learning_rate": 1.1815796492504179e-05, "loss": 0.3097, "step": 11705 }, { "epoch": 0.885937278274443, "grad_norm": 0.71875, "learning_rate": 1.181462519983696e-05, "loss": 0.2951, "step": 11706 }, { "epoch": 0.8860129605978904, "grad_norm": 1.2421875, "learning_rate": 1.1813453881426167e-05, "loss": 0.3691, "step": 11707 }, { "epoch": 0.8860886429213377, "grad_norm": 0.71875, "learning_rate": 1.1812282537288419e-05, "loss": 0.2811, "step": 11708 }, { "epoch": 0.8861643252447851, "grad_norm": 0.7734375, "learning_rate": 1.1811111167440338e-05, "loss": 0.2424, "step": 11709 }, { "epoch": 0.8862400075682323, "grad_norm": 1.1328125, "learning_rate": 1.1809939771898531e-05, "loss": 0.3255, "step": 11710 }, { "epoch": 0.8863156898916796, "grad_norm": 0.859375, "learning_rate": 1.1808768350679623e-05, "loss": 0.3366, "step": 11711 }, { "epoch": 0.886391372215127, "grad_norm": 0.8828125, "learning_rate": 1.1807596903800234e-05, "loss": 0.336, "step": 11712 }, { "epoch": 0.8864670545385743, "grad_norm": 0.890625, "learning_rate": 1.180642543127698e-05, "loss": 0.3248, "step": 11713 }, { "epoch": 0.8865427368620217, "grad_norm": 0.6875, "learning_rate": 1.1805253933126482e-05, "loss": 0.2347, "step": 11714 }, { "epoch": 0.886618419185469, "grad_norm": 0.765625, "learning_rate": 1.180408240936536e-05, "loss": 0.303, "step": 11715 }, { "epoch": 0.8866941015089164, "grad_norm": 0.80078125, "learning_rate": 1.1802910860010233e-05, "loss": 0.3214, "step": 11716 }, { "epoch": 0.8867697838323636, "grad_norm": 0.76171875, "learning_rate": 1.1801739285077718e-05, "loss": 0.2993, "step": 11717 }, { "epoch": 0.886845466155811, "grad_norm": 0.9140625, "learning_rate": 1.1800567684584444e-05, "loss": 0.315, "step": 11718 }, { "epoch": 0.8869211484792583, "grad_norm": 0.73828125, "learning_rate": 1.1799396058547024e-05, "loss": 0.2956, "step": 11719 }, { "epoch": 0.8869968308027056, "grad_norm": 0.77734375, "learning_rate": 1.1798224406982084e-05, "loss": 0.3165, "step": 11720 }, { "epoch": 0.887072513126153, "grad_norm": 0.74609375, "learning_rate": 1.1797052729906246e-05, "loss": 0.2895, "step": 11721 }, { "epoch": 0.8871481954496003, "grad_norm": 0.76171875, "learning_rate": 1.1795881027336131e-05, "loss": 0.3207, "step": 11722 }, { "epoch": 0.8872238777730477, "grad_norm": 0.75390625, "learning_rate": 1.179470929928836e-05, "loss": 0.309, "step": 11723 }, { "epoch": 0.8872995600964949, "grad_norm": 0.74609375, "learning_rate": 1.1793537545779562e-05, "loss": 0.3175, "step": 11724 }, { "epoch": 0.8873752424199423, "grad_norm": 0.7421875, "learning_rate": 1.1792365766826352e-05, "loss": 0.2991, "step": 11725 }, { "epoch": 0.8874509247433896, "grad_norm": 0.7109375, "learning_rate": 1.1791193962445358e-05, "loss": 0.2845, "step": 11726 }, { "epoch": 0.887526607066837, "grad_norm": 0.74609375, "learning_rate": 1.1790022132653202e-05, "loss": 0.2955, "step": 11727 }, { "epoch": 0.8876022893902843, "grad_norm": 0.75390625, "learning_rate": 1.1788850277466514e-05, "loss": 0.3258, "step": 11728 }, { "epoch": 0.8876779717137316, "grad_norm": 0.76171875, "learning_rate": 1.1787678396901916e-05, "loss": 0.3362, "step": 11729 }, { "epoch": 0.887753654037179, "grad_norm": 0.70703125, "learning_rate": 1.178650649097603e-05, "loss": 0.2537, "step": 11730 }, { "epoch": 0.8878293363606262, "grad_norm": 0.984375, "learning_rate": 1.1785334559705484e-05, "loss": 0.3548, "step": 11731 }, { "epoch": 0.8879050186840736, "grad_norm": 0.79296875, "learning_rate": 1.1784162603106904e-05, "loss": 0.3615, "step": 11732 }, { "epoch": 0.8879807010075209, "grad_norm": 0.71484375, "learning_rate": 1.1782990621196918e-05, "loss": 0.2779, "step": 11733 }, { "epoch": 0.8880563833309683, "grad_norm": 0.73046875, "learning_rate": 1.1781818613992144e-05, "loss": 0.2849, "step": 11734 }, { "epoch": 0.8881320656544156, "grad_norm": 0.7265625, "learning_rate": 1.1780646581509219e-05, "loss": 0.3058, "step": 11735 }, { "epoch": 0.888207747977863, "grad_norm": 0.74609375, "learning_rate": 1.1779474523764766e-05, "loss": 0.2969, "step": 11736 }, { "epoch": 0.8882834303013103, "grad_norm": 0.6953125, "learning_rate": 1.1778302440775413e-05, "loss": 0.2713, "step": 11737 }, { "epoch": 0.8883591126247575, "grad_norm": 0.78125, "learning_rate": 1.177713033255779e-05, "loss": 0.3211, "step": 11738 }, { "epoch": 0.8884347949482049, "grad_norm": 0.75, "learning_rate": 1.1775958199128524e-05, "loss": 0.3191, "step": 11739 }, { "epoch": 0.8885104772716522, "grad_norm": 0.73828125, "learning_rate": 1.1774786040504238e-05, "loss": 0.3025, "step": 11740 }, { "epoch": 0.8885861595950996, "grad_norm": 0.859375, "learning_rate": 1.1773613856701572e-05, "loss": 0.4146, "step": 11741 }, { "epoch": 0.8886618419185469, "grad_norm": 0.80859375, "learning_rate": 1.177244164773715e-05, "loss": 0.3545, "step": 11742 }, { "epoch": 0.8887375242419943, "grad_norm": 0.765625, "learning_rate": 1.1771269413627597e-05, "loss": 0.3191, "step": 11743 }, { "epoch": 0.8888132065654416, "grad_norm": 0.78125, "learning_rate": 1.177009715438955e-05, "loss": 0.3366, "step": 11744 }, { "epoch": 0.8888888888888888, "grad_norm": 0.76953125, "learning_rate": 1.1768924870039637e-05, "loss": 0.3115, "step": 11745 }, { "epoch": 0.8889645712123362, "grad_norm": 0.79296875, "learning_rate": 1.176775256059449e-05, "loss": 0.3476, "step": 11746 }, { "epoch": 0.8890402535357835, "grad_norm": 0.77734375, "learning_rate": 1.1766580226070741e-05, "loss": 0.3316, "step": 11747 }, { "epoch": 0.8891159358592309, "grad_norm": 0.734375, "learning_rate": 1.1765407866485017e-05, "loss": 0.3042, "step": 11748 }, { "epoch": 0.8891916181826782, "grad_norm": 0.8125, "learning_rate": 1.1764235481853951e-05, "loss": 0.3371, "step": 11749 }, { "epoch": 0.8892673005061256, "grad_norm": 0.765625, "learning_rate": 1.1763063072194181e-05, "loss": 0.3234, "step": 11750 }, { "epoch": 0.8893429828295729, "grad_norm": 0.78125, "learning_rate": 1.1761890637522337e-05, "loss": 0.3287, "step": 11751 }, { "epoch": 0.8894186651530201, "grad_norm": 0.73046875, "learning_rate": 1.1760718177855047e-05, "loss": 0.2982, "step": 11752 }, { "epoch": 0.8894943474764675, "grad_norm": 0.80078125, "learning_rate": 1.175954569320895e-05, "loss": 0.3548, "step": 11753 }, { "epoch": 0.8895700297999148, "grad_norm": 0.7421875, "learning_rate": 1.1758373183600678e-05, "loss": 0.3113, "step": 11754 }, { "epoch": 0.8896457121233622, "grad_norm": 0.78125, "learning_rate": 1.1757200649046863e-05, "loss": 0.2971, "step": 11755 }, { "epoch": 0.8897213944468095, "grad_norm": 0.7421875, "learning_rate": 1.1756028089564145e-05, "loss": 0.3092, "step": 11756 }, { "epoch": 0.8897970767702569, "grad_norm": 0.8046875, "learning_rate": 1.1754855505169154e-05, "loss": 0.3407, "step": 11757 }, { "epoch": 0.8898727590937042, "grad_norm": 0.74609375, "learning_rate": 1.1753682895878522e-05, "loss": 0.3175, "step": 11758 }, { "epoch": 0.8899484414171515, "grad_norm": 0.7890625, "learning_rate": 1.1752510261708892e-05, "loss": 0.3279, "step": 11759 }, { "epoch": 0.8900241237405988, "grad_norm": 0.68359375, "learning_rate": 1.17513376026769e-05, "loss": 0.2789, "step": 11760 }, { "epoch": 0.8900998060640462, "grad_norm": 0.7734375, "learning_rate": 1.1750164918799172e-05, "loss": 0.3214, "step": 11761 }, { "epoch": 0.8901754883874935, "grad_norm": 0.79296875, "learning_rate": 1.1748992210092354e-05, "loss": 0.3624, "step": 11762 }, { "epoch": 0.8902511707109408, "grad_norm": 0.72265625, "learning_rate": 1.174781947657308e-05, "loss": 0.3041, "step": 11763 }, { "epoch": 0.8903268530343882, "grad_norm": 0.765625, "learning_rate": 1.1746646718257987e-05, "loss": 0.3133, "step": 11764 }, { "epoch": 0.8904025353578355, "grad_norm": 0.80078125, "learning_rate": 1.1745473935163716e-05, "loss": 0.3278, "step": 11765 }, { "epoch": 0.8904782176812828, "grad_norm": 0.72265625, "learning_rate": 1.17443011273069e-05, "loss": 0.3069, "step": 11766 }, { "epoch": 0.8905539000047301, "grad_norm": 0.76953125, "learning_rate": 1.1743128294704178e-05, "loss": 0.3136, "step": 11767 }, { "epoch": 0.8906295823281775, "grad_norm": 0.8203125, "learning_rate": 1.174195543737219e-05, "loss": 0.3099, "step": 11768 }, { "epoch": 0.8907052646516248, "grad_norm": 0.76953125, "learning_rate": 1.1740782555327577e-05, "loss": 0.3043, "step": 11769 }, { "epoch": 0.8907809469750722, "grad_norm": 0.99609375, "learning_rate": 1.1739609648586973e-05, "loss": 0.3837, "step": 11770 }, { "epoch": 0.8908566292985195, "grad_norm": 0.77734375, "learning_rate": 1.1738436717167022e-05, "loss": 0.3378, "step": 11771 }, { "epoch": 0.8909323116219668, "grad_norm": 0.8984375, "learning_rate": 1.1737263761084366e-05, "loss": 0.3079, "step": 11772 }, { "epoch": 0.8910079939454141, "grad_norm": 0.75, "learning_rate": 1.1736090780355637e-05, "loss": 0.3318, "step": 11773 }, { "epoch": 0.8910836762688614, "grad_norm": 0.74609375, "learning_rate": 1.1734917774997486e-05, "loss": 0.307, "step": 11774 }, { "epoch": 0.8911593585923088, "grad_norm": 0.71484375, "learning_rate": 1.1733744745026544e-05, "loss": 0.2966, "step": 11775 }, { "epoch": 0.8912350409157561, "grad_norm": 0.828125, "learning_rate": 1.1732571690459462e-05, "loss": 0.3687, "step": 11776 }, { "epoch": 0.8913107232392035, "grad_norm": 0.76171875, "learning_rate": 1.1731398611312877e-05, "loss": 0.2989, "step": 11777 }, { "epoch": 0.8913864055626508, "grad_norm": 0.7421875, "learning_rate": 1.173022550760343e-05, "loss": 0.3019, "step": 11778 }, { "epoch": 0.8914620878860982, "grad_norm": 0.71484375, "learning_rate": 1.1729052379347766e-05, "loss": 0.3106, "step": 11779 }, { "epoch": 0.8915377702095454, "grad_norm": 0.73046875, "learning_rate": 1.1727879226562527e-05, "loss": 0.2943, "step": 11780 }, { "epoch": 0.8916134525329927, "grad_norm": 0.80078125, "learning_rate": 1.1726706049264353e-05, "loss": 0.3518, "step": 11781 }, { "epoch": 0.8916891348564401, "grad_norm": 0.77734375, "learning_rate": 1.1725532847469892e-05, "loss": 0.3316, "step": 11782 }, { "epoch": 0.8917648171798874, "grad_norm": 0.97265625, "learning_rate": 1.1724359621195788e-05, "loss": 0.3369, "step": 11783 }, { "epoch": 0.8918404995033348, "grad_norm": 0.73046875, "learning_rate": 1.1723186370458684e-05, "loss": 0.3144, "step": 11784 }, { "epoch": 0.8919161818267821, "grad_norm": 0.65234375, "learning_rate": 1.1722013095275223e-05, "loss": 0.2582, "step": 11785 }, { "epoch": 0.8919918641502295, "grad_norm": 0.7890625, "learning_rate": 1.172083979566205e-05, "loss": 0.3375, "step": 11786 }, { "epoch": 0.8920675464736767, "grad_norm": 0.76171875, "learning_rate": 1.1719666471635813e-05, "loss": 0.3359, "step": 11787 }, { "epoch": 0.892143228797124, "grad_norm": 1.140625, "learning_rate": 1.1718493123213157e-05, "loss": 0.3896, "step": 11788 }, { "epoch": 0.8922189111205714, "grad_norm": 0.75, "learning_rate": 1.1717319750410727e-05, "loss": 0.3065, "step": 11789 }, { "epoch": 0.8922945934440187, "grad_norm": 0.7265625, "learning_rate": 1.1716146353245165e-05, "loss": 0.3168, "step": 11790 }, { "epoch": 0.8923702757674661, "grad_norm": 0.70703125, "learning_rate": 1.1714972931733125e-05, "loss": 0.291, "step": 11791 }, { "epoch": 0.8924459580909134, "grad_norm": 0.76171875, "learning_rate": 1.1713799485891253e-05, "loss": 0.318, "step": 11792 }, { "epoch": 0.8925216404143608, "grad_norm": 0.75, "learning_rate": 1.1712626015736193e-05, "loss": 0.3312, "step": 11793 }, { "epoch": 0.892597322737808, "grad_norm": 0.75, "learning_rate": 1.1711452521284593e-05, "loss": 0.3062, "step": 11794 }, { "epoch": 0.8926730050612554, "grad_norm": 0.69140625, "learning_rate": 1.1710279002553103e-05, "loss": 0.2817, "step": 11795 }, { "epoch": 0.8927486873847027, "grad_norm": 0.78125, "learning_rate": 1.1709105459558367e-05, "loss": 0.3284, "step": 11796 }, { "epoch": 0.89282436970815, "grad_norm": 0.81640625, "learning_rate": 1.1707931892317041e-05, "loss": 0.3662, "step": 11797 }, { "epoch": 0.8929000520315974, "grad_norm": 0.71875, "learning_rate": 1.170675830084577e-05, "loss": 0.2811, "step": 11798 }, { "epoch": 0.8929757343550447, "grad_norm": 0.953125, "learning_rate": 1.1705584685161202e-05, "loss": 0.3497, "step": 11799 }, { "epoch": 0.8930514166784921, "grad_norm": 0.75, "learning_rate": 1.1704411045279989e-05, "loss": 0.2957, "step": 11800 }, { "epoch": 0.8931270990019393, "grad_norm": 0.8359375, "learning_rate": 1.1703237381218782e-05, "loss": 0.3534, "step": 11801 }, { "epoch": 0.8932027813253867, "grad_norm": 0.765625, "learning_rate": 1.1702063692994227e-05, "loss": 0.2993, "step": 11802 }, { "epoch": 0.893278463648834, "grad_norm": 0.7890625, "learning_rate": 1.170088998062298e-05, "loss": 0.3532, "step": 11803 }, { "epoch": 0.8933541459722814, "grad_norm": 0.78515625, "learning_rate": 1.169971624412169e-05, "loss": 0.3254, "step": 11804 }, { "epoch": 0.8934298282957287, "grad_norm": 0.78125, "learning_rate": 1.1698542483507004e-05, "loss": 0.3194, "step": 11805 }, { "epoch": 0.893505510619176, "grad_norm": 0.82421875, "learning_rate": 1.1697368698795581e-05, "loss": 0.3031, "step": 11806 }, { "epoch": 0.8935811929426234, "grad_norm": 0.77734375, "learning_rate": 1.1696194890004076e-05, "loss": 0.2701, "step": 11807 }, { "epoch": 0.8936568752660706, "grad_norm": 0.7890625, "learning_rate": 1.1695021057149128e-05, "loss": 0.3234, "step": 11808 }, { "epoch": 0.893732557589518, "grad_norm": 0.78515625, "learning_rate": 1.16938472002474e-05, "loss": 0.3195, "step": 11809 }, { "epoch": 0.8938082399129653, "grad_norm": 0.8125, "learning_rate": 1.1692673319315541e-05, "loss": 0.344, "step": 11810 }, { "epoch": 0.8938839222364127, "grad_norm": 0.8125, "learning_rate": 1.1691499414370207e-05, "loss": 0.356, "step": 11811 }, { "epoch": 0.89395960455986, "grad_norm": 0.78515625, "learning_rate": 1.1690325485428054e-05, "loss": 0.3548, "step": 11812 }, { "epoch": 0.8940352868833074, "grad_norm": 0.72265625, "learning_rate": 1.1689151532505732e-05, "loss": 0.3001, "step": 11813 }, { "epoch": 0.8941109692067547, "grad_norm": 0.72265625, "learning_rate": 1.1687977555619894e-05, "loss": 0.2952, "step": 11814 }, { "epoch": 0.8941866515302019, "grad_norm": 0.671875, "learning_rate": 1.1686803554787198e-05, "loss": 0.2438, "step": 11815 }, { "epoch": 0.8942623338536493, "grad_norm": 0.80078125, "learning_rate": 1.1685629530024305e-05, "loss": 0.3546, "step": 11816 }, { "epoch": 0.8943380161770966, "grad_norm": 0.75390625, "learning_rate": 1.1684455481347858e-05, "loss": 0.3434, "step": 11817 }, { "epoch": 0.894413698500544, "grad_norm": 0.74609375, "learning_rate": 1.1683281408774521e-05, "loss": 0.3092, "step": 11818 }, { "epoch": 0.8944893808239913, "grad_norm": 0.80078125, "learning_rate": 1.168210731232095e-05, "loss": 0.3384, "step": 11819 }, { "epoch": 0.8945650631474387, "grad_norm": 0.78125, "learning_rate": 1.1680933192003796e-05, "loss": 0.3234, "step": 11820 }, { "epoch": 0.8946407454708859, "grad_norm": 1.015625, "learning_rate": 1.1679759047839727e-05, "loss": 0.3472, "step": 11821 }, { "epoch": 0.8947164277943332, "grad_norm": 0.765625, "learning_rate": 1.1678584879845389e-05, "loss": 0.2808, "step": 11822 }, { "epoch": 0.8947921101177806, "grad_norm": 0.765625, "learning_rate": 1.1677410688037443e-05, "loss": 0.3256, "step": 11823 }, { "epoch": 0.8948677924412279, "grad_norm": 0.6953125, "learning_rate": 1.1676236472432549e-05, "loss": 0.2897, "step": 11824 }, { "epoch": 0.8949434747646753, "grad_norm": 0.73828125, "learning_rate": 1.1675062233047365e-05, "loss": 0.3018, "step": 11825 }, { "epoch": 0.8950191570881226, "grad_norm": 0.69921875, "learning_rate": 1.167388796989855e-05, "loss": 0.2665, "step": 11826 }, { "epoch": 0.89509483941157, "grad_norm": 0.7421875, "learning_rate": 1.1672713683002759e-05, "loss": 0.299, "step": 11827 }, { "epoch": 0.8951705217350172, "grad_norm": 0.75, "learning_rate": 1.1671539372376654e-05, "loss": 0.2846, "step": 11828 }, { "epoch": 0.8952462040584646, "grad_norm": 0.8203125, "learning_rate": 1.1670365038036896e-05, "loss": 0.3509, "step": 11829 }, { "epoch": 0.8953218863819119, "grad_norm": 0.75390625, "learning_rate": 1.1669190680000142e-05, "loss": 0.3302, "step": 11830 }, { "epoch": 0.8953975687053592, "grad_norm": 0.76171875, "learning_rate": 1.1668016298283055e-05, "loss": 0.3052, "step": 11831 }, { "epoch": 0.8954732510288066, "grad_norm": 0.78125, "learning_rate": 1.1666841892902294e-05, "loss": 0.3512, "step": 11832 }, { "epoch": 0.8955489333522539, "grad_norm": 0.7265625, "learning_rate": 1.1665667463874521e-05, "loss": 0.2933, "step": 11833 }, { "epoch": 0.8956246156757013, "grad_norm": 0.80078125, "learning_rate": 1.1664493011216396e-05, "loss": 0.335, "step": 11834 }, { "epoch": 0.8957002979991485, "grad_norm": 0.78515625, "learning_rate": 1.1663318534944581e-05, "loss": 0.331, "step": 11835 }, { "epoch": 0.8957759803225959, "grad_norm": 0.7578125, "learning_rate": 1.166214403507574e-05, "loss": 0.3151, "step": 11836 }, { "epoch": 0.8958516626460432, "grad_norm": 0.734375, "learning_rate": 1.1660969511626529e-05, "loss": 0.3034, "step": 11837 }, { "epoch": 0.8959273449694906, "grad_norm": 0.67578125, "learning_rate": 1.1659794964613619e-05, "loss": 0.2583, "step": 11838 }, { "epoch": 0.8960030272929379, "grad_norm": 0.8359375, "learning_rate": 1.1658620394053668e-05, "loss": 0.3803, "step": 11839 }, { "epoch": 0.8960787096163852, "grad_norm": 0.75, "learning_rate": 1.1657445799963343e-05, "loss": 0.3151, "step": 11840 }, { "epoch": 0.8961543919398326, "grad_norm": 0.7890625, "learning_rate": 1.1656271182359302e-05, "loss": 0.2925, "step": 11841 }, { "epoch": 0.8962300742632798, "grad_norm": 0.765625, "learning_rate": 1.1655096541258213e-05, "loss": 0.3071, "step": 11842 }, { "epoch": 0.8963057565867272, "grad_norm": 0.76171875, "learning_rate": 1.165392187667674e-05, "loss": 0.3188, "step": 11843 }, { "epoch": 0.8963814389101745, "grad_norm": 0.8515625, "learning_rate": 1.1652747188631543e-05, "loss": 0.3658, "step": 11844 }, { "epoch": 0.8964571212336219, "grad_norm": 0.828125, "learning_rate": 1.1651572477139298e-05, "loss": 0.3509, "step": 11845 }, { "epoch": 0.8965328035570692, "grad_norm": 0.78125, "learning_rate": 1.1650397742216657e-05, "loss": 0.3471, "step": 11846 }, { "epoch": 0.8966084858805166, "grad_norm": 0.76171875, "learning_rate": 1.1649222983880294e-05, "loss": 0.3357, "step": 11847 }, { "epoch": 0.8966841682039639, "grad_norm": 0.6875, "learning_rate": 1.1648048202146873e-05, "loss": 0.2446, "step": 11848 }, { "epoch": 0.8967598505274111, "grad_norm": 0.73828125, "learning_rate": 1.164687339703306e-05, "loss": 0.2751, "step": 11849 }, { "epoch": 0.8968355328508585, "grad_norm": 0.7578125, "learning_rate": 1.1645698568555522e-05, "loss": 0.3381, "step": 11850 }, { "epoch": 0.8969112151743058, "grad_norm": 0.69921875, "learning_rate": 1.1644523716730926e-05, "loss": 0.2668, "step": 11851 }, { "epoch": 0.8969868974977532, "grad_norm": 0.78515625, "learning_rate": 1.1643348841575936e-05, "loss": 0.3419, "step": 11852 }, { "epoch": 0.8970625798212005, "grad_norm": 0.796875, "learning_rate": 1.1642173943107227e-05, "loss": 0.3358, "step": 11853 }, { "epoch": 0.8971382621446479, "grad_norm": 0.72265625, "learning_rate": 1.164099902134146e-05, "loss": 0.2916, "step": 11854 }, { "epoch": 0.8972139444680952, "grad_norm": 0.77734375, "learning_rate": 1.1639824076295306e-05, "loss": 0.3546, "step": 11855 }, { "epoch": 0.8972896267915424, "grad_norm": 0.79296875, "learning_rate": 1.1638649107985433e-05, "loss": 0.3377, "step": 11856 }, { "epoch": 0.8973653091149898, "grad_norm": 0.7734375, "learning_rate": 1.1637474116428513e-05, "loss": 0.3239, "step": 11857 }, { "epoch": 0.8974409914384371, "grad_norm": 0.71875, "learning_rate": 1.1636299101641209e-05, "loss": 0.3107, "step": 11858 }, { "epoch": 0.8975166737618845, "grad_norm": 0.76953125, "learning_rate": 1.1635124063640198e-05, "loss": 0.2976, "step": 11859 }, { "epoch": 0.8975923560853318, "grad_norm": 0.6953125, "learning_rate": 1.1633949002442145e-05, "loss": 0.2635, "step": 11860 }, { "epoch": 0.8976680384087792, "grad_norm": 0.7109375, "learning_rate": 1.1632773918063718e-05, "loss": 0.2982, "step": 11861 }, { "epoch": 0.8977437207322265, "grad_norm": 0.78515625, "learning_rate": 1.1631598810521596e-05, "loss": 0.339, "step": 11862 }, { "epoch": 0.8978194030556738, "grad_norm": 0.73828125, "learning_rate": 1.1630423679832442e-05, "loss": 0.3104, "step": 11863 }, { "epoch": 0.8978950853791211, "grad_norm": 0.73828125, "learning_rate": 1.162924852601293e-05, "loss": 0.3137, "step": 11864 }, { "epoch": 0.8979707677025685, "grad_norm": 0.7578125, "learning_rate": 1.1628073349079734e-05, "loss": 0.3234, "step": 11865 }, { "epoch": 0.8980464500260158, "grad_norm": 0.71875, "learning_rate": 1.1626898149049523e-05, "loss": 0.2804, "step": 11866 }, { "epoch": 0.8981221323494631, "grad_norm": 0.72265625, "learning_rate": 1.1625722925938967e-05, "loss": 0.2851, "step": 11867 }, { "epoch": 0.8981978146729105, "grad_norm": 0.76171875, "learning_rate": 1.1624547679764745e-05, "loss": 0.3275, "step": 11868 }, { "epoch": 0.8982734969963578, "grad_norm": 0.7734375, "learning_rate": 1.1623372410543527e-05, "loss": 0.3216, "step": 11869 }, { "epoch": 0.8983491793198051, "grad_norm": 0.734375, "learning_rate": 1.1622197118291982e-05, "loss": 0.2972, "step": 11870 }, { "epoch": 0.8984248616432524, "grad_norm": 0.75, "learning_rate": 1.1621021803026789e-05, "loss": 0.3069, "step": 11871 }, { "epoch": 0.8985005439666998, "grad_norm": 0.7421875, "learning_rate": 1.161984646476462e-05, "loss": 0.2903, "step": 11872 }, { "epoch": 0.8985762262901471, "grad_norm": 0.84765625, "learning_rate": 1.1618671103522148e-05, "loss": 0.3561, "step": 11873 }, { "epoch": 0.8986519086135945, "grad_norm": 0.73828125, "learning_rate": 1.161749571931605e-05, "loss": 0.3122, "step": 11874 }, { "epoch": 0.8987275909370418, "grad_norm": 1.046875, "learning_rate": 1.1616320312162999e-05, "loss": 0.3642, "step": 11875 }, { "epoch": 0.8988032732604891, "grad_norm": 0.765625, "learning_rate": 1.161514488207967e-05, "loss": 0.3193, "step": 11876 }, { "epoch": 0.8988789555839364, "grad_norm": 0.71484375, "learning_rate": 1.1613969429082742e-05, "loss": 0.2709, "step": 11877 }, { "epoch": 0.8989546379073837, "grad_norm": 0.6953125, "learning_rate": 1.1612793953188885e-05, "loss": 0.2878, "step": 11878 }, { "epoch": 0.8990303202308311, "grad_norm": 0.7265625, "learning_rate": 1.1611618454414777e-05, "loss": 0.2661, "step": 11879 }, { "epoch": 0.8991060025542784, "grad_norm": 0.75, "learning_rate": 1.1610442932777098e-05, "loss": 0.3247, "step": 11880 }, { "epoch": 0.8991816848777258, "grad_norm": 0.75390625, "learning_rate": 1.1609267388292523e-05, "loss": 0.3287, "step": 11881 }, { "epoch": 0.8992573672011731, "grad_norm": 0.72265625, "learning_rate": 1.1608091820977728e-05, "loss": 0.2798, "step": 11882 }, { "epoch": 0.8993330495246205, "grad_norm": 0.76171875, "learning_rate": 1.160691623084939e-05, "loss": 0.3133, "step": 11883 }, { "epoch": 0.8994087318480677, "grad_norm": 0.9609375, "learning_rate": 1.1605740617924188e-05, "loss": 0.3349, "step": 11884 }, { "epoch": 0.899484414171515, "grad_norm": 0.76953125, "learning_rate": 1.1604564982218798e-05, "loss": 0.31, "step": 11885 }, { "epoch": 0.8995600964949624, "grad_norm": 0.7578125, "learning_rate": 1.1603389323749902e-05, "loss": 0.3279, "step": 11886 }, { "epoch": 0.8996357788184097, "grad_norm": 0.8046875, "learning_rate": 1.1602213642534177e-05, "loss": 0.3377, "step": 11887 }, { "epoch": 0.8997114611418571, "grad_norm": 0.73828125, "learning_rate": 1.1601037938588302e-05, "loss": 0.3014, "step": 11888 }, { "epoch": 0.8997871434653044, "grad_norm": 0.7734375, "learning_rate": 1.1599862211928958e-05, "loss": 0.315, "step": 11889 }, { "epoch": 0.8998628257887518, "grad_norm": 0.734375, "learning_rate": 1.1598686462572822e-05, "loss": 0.3135, "step": 11890 }, { "epoch": 0.899938508112199, "grad_norm": 0.80859375, "learning_rate": 1.1597510690536576e-05, "loss": 0.3523, "step": 11891 }, { "epoch": 0.9000141904356463, "grad_norm": 0.70703125, "learning_rate": 1.1596334895836896e-05, "loss": 0.2731, "step": 11892 }, { "epoch": 0.9000898727590937, "grad_norm": 0.78125, "learning_rate": 1.1595159078490465e-05, "loss": 0.308, "step": 11893 }, { "epoch": 0.900165555082541, "grad_norm": 0.71484375, "learning_rate": 1.1593983238513971e-05, "loss": 0.2638, "step": 11894 }, { "epoch": 0.9002412374059884, "grad_norm": 0.7421875, "learning_rate": 1.1592807375924084e-05, "loss": 0.2833, "step": 11895 }, { "epoch": 0.9003169197294357, "grad_norm": 0.8046875, "learning_rate": 1.1591631490737495e-05, "loss": 0.3632, "step": 11896 }, { "epoch": 0.9003926020528831, "grad_norm": 0.75, "learning_rate": 1.159045558297088e-05, "loss": 0.3026, "step": 11897 }, { "epoch": 0.9004682843763303, "grad_norm": 0.79296875, "learning_rate": 1.1589279652640926e-05, "loss": 0.3138, "step": 11898 }, { "epoch": 0.9004682843763303, "eval_loss": 0.32722821831703186, "eval_runtime": 83.5552, "eval_samples_per_second": 58.189, "eval_steps_per_second": 58.189, "step": 11898 }, { "epoch": 0.9005439666997777, "grad_norm": 0.7421875, "learning_rate": 1.1588103699764311e-05, "loss": 0.3179, "step": 11899 }, { "epoch": 0.900619649023225, "grad_norm": 0.7421875, "learning_rate": 1.1586927724357717e-05, "loss": 0.2809, "step": 11900 }, { "epoch": 0.9006953313466723, "grad_norm": 0.74609375, "learning_rate": 1.1585751726437835e-05, "loss": 0.295, "step": 11901 }, { "epoch": 0.9007710136701197, "grad_norm": 0.74609375, "learning_rate": 1.1584575706021337e-05, "loss": 0.3259, "step": 11902 }, { "epoch": 0.900846695993567, "grad_norm": 0.76171875, "learning_rate": 1.1583399663124919e-05, "loss": 0.3287, "step": 11903 }, { "epoch": 0.9009223783170144, "grad_norm": 0.734375, "learning_rate": 1.158222359776526e-05, "loss": 0.3282, "step": 11904 }, { "epoch": 0.9009980606404616, "grad_norm": 0.80078125, "learning_rate": 1.158104750995904e-05, "loss": 0.3412, "step": 11905 }, { "epoch": 0.901073742963909, "grad_norm": 0.79296875, "learning_rate": 1.157987139972295e-05, "loss": 0.3641, "step": 11906 }, { "epoch": 0.9011494252873563, "grad_norm": 0.75, "learning_rate": 1.1578695267073672e-05, "loss": 0.3342, "step": 11907 }, { "epoch": 0.9012251076108037, "grad_norm": 0.76171875, "learning_rate": 1.157751911202789e-05, "loss": 0.3236, "step": 11908 }, { "epoch": 0.901300789934251, "grad_norm": 0.76171875, "learning_rate": 1.1576342934602298e-05, "loss": 0.3435, "step": 11909 }, { "epoch": 0.9013764722576983, "grad_norm": 0.734375, "learning_rate": 1.1575166734813576e-05, "loss": 0.3145, "step": 11910 }, { "epoch": 0.9014521545811457, "grad_norm": 0.76171875, "learning_rate": 1.1573990512678406e-05, "loss": 0.3099, "step": 11911 }, { "epoch": 0.9015278369045929, "grad_norm": 0.75, "learning_rate": 1.1572814268213483e-05, "loss": 0.3026, "step": 11912 }, { "epoch": 0.9016035192280403, "grad_norm": 0.72265625, "learning_rate": 1.157163800143549e-05, "loss": 0.2905, "step": 11913 }, { "epoch": 0.9016792015514876, "grad_norm": 0.74609375, "learning_rate": 1.1570461712361114e-05, "loss": 0.3041, "step": 11914 }, { "epoch": 0.901754883874935, "grad_norm": 0.84765625, "learning_rate": 1.1569285401007046e-05, "loss": 0.3781, "step": 11915 }, { "epoch": 0.9018305661983823, "grad_norm": 0.80859375, "learning_rate": 1.156810906738997e-05, "loss": 0.3234, "step": 11916 }, { "epoch": 0.9019062485218297, "grad_norm": 0.90625, "learning_rate": 1.1566932711526575e-05, "loss": 0.264, "step": 11917 }, { "epoch": 0.901981930845277, "grad_norm": 0.7734375, "learning_rate": 1.156575633343355e-05, "loss": 0.3021, "step": 11918 }, { "epoch": 0.9020576131687242, "grad_norm": 0.76953125, "learning_rate": 1.1564579933127589e-05, "loss": 0.3086, "step": 11919 }, { "epoch": 0.9021332954921716, "grad_norm": 0.73828125, "learning_rate": 1.156340351062537e-05, "loss": 0.2888, "step": 11920 }, { "epoch": 0.9022089778156189, "grad_norm": 0.80078125, "learning_rate": 1.1562227065943595e-05, "loss": 0.3669, "step": 11921 }, { "epoch": 0.9022846601390663, "grad_norm": 0.75, "learning_rate": 1.1561050599098946e-05, "loss": 0.2954, "step": 11922 }, { "epoch": 0.9023603424625136, "grad_norm": 0.71484375, "learning_rate": 1.1559874110108113e-05, "loss": 0.2994, "step": 11923 }, { "epoch": 0.902436024785961, "grad_norm": 0.8046875, "learning_rate": 1.1558697598987794e-05, "loss": 0.3599, "step": 11924 }, { "epoch": 0.9025117071094083, "grad_norm": 0.74609375, "learning_rate": 1.1557521065754673e-05, "loss": 0.2936, "step": 11925 }, { "epoch": 0.9025873894328555, "grad_norm": 0.69921875, "learning_rate": 1.1556344510425439e-05, "loss": 0.2916, "step": 11926 }, { "epoch": 0.9026630717563029, "grad_norm": 0.7421875, "learning_rate": 1.155516793301679e-05, "loss": 0.3111, "step": 11927 }, { "epoch": 0.9027387540797502, "grad_norm": 1.0078125, "learning_rate": 1.1553991333545419e-05, "loss": 0.3302, "step": 11928 }, { "epoch": 0.9028144364031976, "grad_norm": 0.7265625, "learning_rate": 1.1552814712028009e-05, "loss": 0.3049, "step": 11929 }, { "epoch": 0.9028901187266449, "grad_norm": 0.8046875, "learning_rate": 1.1551638068481259e-05, "loss": 0.3356, "step": 11930 }, { "epoch": 0.9029658010500923, "grad_norm": 0.7734375, "learning_rate": 1.1550461402921863e-05, "loss": 0.3144, "step": 11931 }, { "epoch": 0.9030414833735396, "grad_norm": 0.7109375, "learning_rate": 1.1549284715366506e-05, "loss": 0.3028, "step": 11932 }, { "epoch": 0.9031171656969869, "grad_norm": 0.765625, "learning_rate": 1.154810800583189e-05, "loss": 0.3215, "step": 11933 }, { "epoch": 0.9031928480204342, "grad_norm": 0.6875, "learning_rate": 1.1546931274334708e-05, "loss": 0.2895, "step": 11934 }, { "epoch": 0.9032685303438815, "grad_norm": 0.7890625, "learning_rate": 1.1545754520891645e-05, "loss": 0.2933, "step": 11935 }, { "epoch": 0.9033442126673289, "grad_norm": 0.703125, "learning_rate": 1.1544577745519404e-05, "loss": 0.2903, "step": 11936 }, { "epoch": 0.9034198949907762, "grad_norm": 1.078125, "learning_rate": 1.1543400948234677e-05, "loss": 0.3696, "step": 11937 }, { "epoch": 0.9034955773142236, "grad_norm": 0.76953125, "learning_rate": 1.154222412905416e-05, "loss": 0.3188, "step": 11938 }, { "epoch": 0.9035712596376709, "grad_norm": 0.78515625, "learning_rate": 1.1541047287994548e-05, "loss": 0.3462, "step": 11939 }, { "epoch": 0.9036469419611182, "grad_norm": 0.7890625, "learning_rate": 1.1539870425072535e-05, "loss": 0.354, "step": 11940 }, { "epoch": 0.9037226242845655, "grad_norm": 0.79296875, "learning_rate": 1.1538693540304815e-05, "loss": 0.3287, "step": 11941 }, { "epoch": 0.9037983066080129, "grad_norm": 0.7578125, "learning_rate": 1.153751663370809e-05, "loss": 0.3345, "step": 11942 }, { "epoch": 0.9038739889314602, "grad_norm": 0.8046875, "learning_rate": 1.1536339705299051e-05, "loss": 0.3342, "step": 11943 }, { "epoch": 0.9039496712549075, "grad_norm": 0.7421875, "learning_rate": 1.1535162755094397e-05, "loss": 0.3119, "step": 11944 }, { "epoch": 0.9040253535783549, "grad_norm": 0.71484375, "learning_rate": 1.1533985783110829e-05, "loss": 0.3122, "step": 11945 }, { "epoch": 0.9041010359018021, "grad_norm": 0.7578125, "learning_rate": 1.1532808789365036e-05, "loss": 0.3159, "step": 11946 }, { "epoch": 0.9041767182252495, "grad_norm": 0.8515625, "learning_rate": 1.1531631773873719e-05, "loss": 0.3109, "step": 11947 }, { "epoch": 0.9042524005486968, "grad_norm": 0.76953125, "learning_rate": 1.153045473665358e-05, "loss": 0.2646, "step": 11948 }, { "epoch": 0.9043280828721442, "grad_norm": 0.76171875, "learning_rate": 1.152927767772131e-05, "loss": 0.3146, "step": 11949 }, { "epoch": 0.9044037651955915, "grad_norm": 0.9453125, "learning_rate": 1.1528100597093617e-05, "loss": 0.3738, "step": 11950 }, { "epoch": 0.9044794475190389, "grad_norm": 0.73046875, "learning_rate": 1.152692349478719e-05, "loss": 0.3084, "step": 11951 }, { "epoch": 0.9045551298424862, "grad_norm": 0.765625, "learning_rate": 1.1525746370818735e-05, "loss": 0.3333, "step": 11952 }, { "epoch": 0.9046308121659334, "grad_norm": 0.78515625, "learning_rate": 1.1524569225204951e-05, "loss": 0.3001, "step": 11953 }, { "epoch": 0.9047064944893808, "grad_norm": 0.7734375, "learning_rate": 1.1523392057962538e-05, "loss": 0.321, "step": 11954 }, { "epoch": 0.9047821768128281, "grad_norm": 0.8828125, "learning_rate": 1.152221486910819e-05, "loss": 0.3187, "step": 11955 }, { "epoch": 0.9048578591362755, "grad_norm": 0.7890625, "learning_rate": 1.1521037658658612e-05, "loss": 0.3459, "step": 11956 }, { "epoch": 0.9049335414597228, "grad_norm": 0.74609375, "learning_rate": 1.1519860426630507e-05, "loss": 0.2758, "step": 11957 }, { "epoch": 0.9050092237831702, "grad_norm": 0.94921875, "learning_rate": 1.1518683173040573e-05, "loss": 0.3112, "step": 11958 }, { "epoch": 0.9050849061066175, "grad_norm": 0.78515625, "learning_rate": 1.151750589790551e-05, "loss": 0.3226, "step": 11959 }, { "epoch": 0.9051605884300647, "grad_norm": 0.765625, "learning_rate": 1.1516328601242024e-05, "loss": 0.3199, "step": 11960 }, { "epoch": 0.9052362707535121, "grad_norm": 0.80078125, "learning_rate": 1.1515151283066814e-05, "loss": 0.332, "step": 11961 }, { "epoch": 0.9053119530769594, "grad_norm": 0.7578125, "learning_rate": 1.1513973943396584e-05, "loss": 0.3142, "step": 11962 }, { "epoch": 0.9053876354004068, "grad_norm": 0.79296875, "learning_rate": 1.1512796582248034e-05, "loss": 0.3689, "step": 11963 }, { "epoch": 0.9054633177238541, "grad_norm": 0.70703125, "learning_rate": 1.1511619199637867e-05, "loss": 0.2724, "step": 11964 }, { "epoch": 0.9055390000473015, "grad_norm": 0.70703125, "learning_rate": 1.151044179558279e-05, "loss": 0.3063, "step": 11965 }, { "epoch": 0.9056146823707488, "grad_norm": 0.75390625, "learning_rate": 1.1509264370099506e-05, "loss": 0.306, "step": 11966 }, { "epoch": 0.9056903646941961, "grad_norm": 0.7578125, "learning_rate": 1.1508086923204714e-05, "loss": 0.33, "step": 11967 }, { "epoch": 0.9057660470176434, "grad_norm": 0.69921875, "learning_rate": 1.150690945491512e-05, "loss": 0.2931, "step": 11968 }, { "epoch": 0.9058417293410908, "grad_norm": 0.73828125, "learning_rate": 1.1505731965247432e-05, "loss": 0.3194, "step": 11969 }, { "epoch": 0.9059174116645381, "grad_norm": 0.7890625, "learning_rate": 1.150455445421835e-05, "loss": 0.3356, "step": 11970 }, { "epoch": 0.9059930939879854, "grad_norm": 0.734375, "learning_rate": 1.1503376921844584e-05, "loss": 0.3082, "step": 11971 }, { "epoch": 0.9060687763114328, "grad_norm": 0.7578125, "learning_rate": 1.1502199368142835e-05, "loss": 0.3097, "step": 11972 }, { "epoch": 0.9061444586348801, "grad_norm": 0.76953125, "learning_rate": 1.150102179312981e-05, "loss": 0.3099, "step": 11973 }, { "epoch": 0.9062201409583274, "grad_norm": 0.7890625, "learning_rate": 1.1499844196822213e-05, "loss": 0.3168, "step": 11974 }, { "epoch": 0.9062958232817747, "grad_norm": 0.703125, "learning_rate": 1.1498666579236756e-05, "loss": 0.311, "step": 11975 }, { "epoch": 0.9063715056052221, "grad_norm": 0.75, "learning_rate": 1.149748894039014e-05, "loss": 0.3048, "step": 11976 }, { "epoch": 0.9064471879286694, "grad_norm": 0.7109375, "learning_rate": 1.1496311280299073e-05, "loss": 0.2794, "step": 11977 }, { "epoch": 0.9065228702521168, "grad_norm": 0.83203125, "learning_rate": 1.1495133598980263e-05, "loss": 0.377, "step": 11978 }, { "epoch": 0.9065985525755641, "grad_norm": 0.78515625, "learning_rate": 1.1493955896450417e-05, "loss": 0.3269, "step": 11979 }, { "epoch": 0.9066742348990114, "grad_norm": 0.75390625, "learning_rate": 1.1492778172726246e-05, "loss": 0.3209, "step": 11980 }, { "epoch": 0.9067499172224587, "grad_norm": 0.79296875, "learning_rate": 1.1491600427824451e-05, "loss": 0.3352, "step": 11981 }, { "epoch": 0.906825599545906, "grad_norm": 0.73828125, "learning_rate": 1.1490422661761744e-05, "loss": 0.2996, "step": 11982 }, { "epoch": 0.9069012818693534, "grad_norm": 0.7578125, "learning_rate": 1.1489244874554837e-05, "loss": 0.3068, "step": 11983 }, { "epoch": 0.9069769641928007, "grad_norm": 0.72265625, "learning_rate": 1.1488067066220433e-05, "loss": 0.2907, "step": 11984 }, { "epoch": 0.9070526465162481, "grad_norm": 0.78125, "learning_rate": 1.1486889236775246e-05, "loss": 0.2889, "step": 11985 }, { "epoch": 0.9071283288396954, "grad_norm": 0.98046875, "learning_rate": 1.1485711386235985e-05, "loss": 0.3293, "step": 11986 }, { "epoch": 0.9072040111631428, "grad_norm": 0.9140625, "learning_rate": 1.1484533514619357e-05, "loss": 0.309, "step": 11987 }, { "epoch": 0.90727969348659, "grad_norm": 0.7734375, "learning_rate": 1.1483355621942069e-05, "loss": 0.3016, "step": 11988 }, { "epoch": 0.9073553758100373, "grad_norm": 0.72265625, "learning_rate": 1.1482177708220843e-05, "loss": 0.3078, "step": 11989 }, { "epoch": 0.9074310581334847, "grad_norm": 0.67578125, "learning_rate": 1.148099977347238e-05, "loss": 0.2543, "step": 11990 }, { "epoch": 0.907506740456932, "grad_norm": 0.921875, "learning_rate": 1.1479821817713392e-05, "loss": 0.3178, "step": 11991 }, { "epoch": 0.9075824227803794, "grad_norm": 0.78125, "learning_rate": 1.1478643840960593e-05, "loss": 0.3113, "step": 11992 }, { "epoch": 0.9076581051038267, "grad_norm": 0.7734375, "learning_rate": 1.1477465843230695e-05, "loss": 0.3162, "step": 11993 }, { "epoch": 0.9077337874272741, "grad_norm": 0.76171875, "learning_rate": 1.1476287824540409e-05, "loss": 0.3115, "step": 11994 }, { "epoch": 0.9078094697507213, "grad_norm": 0.75390625, "learning_rate": 1.1475109784906444e-05, "loss": 0.3332, "step": 11995 }, { "epoch": 0.9078851520741686, "grad_norm": 0.76953125, "learning_rate": 1.1473931724345517e-05, "loss": 0.327, "step": 11996 }, { "epoch": 0.907960834397616, "grad_norm": 0.88671875, "learning_rate": 1.1472753642874337e-05, "loss": 0.3785, "step": 11997 }, { "epoch": 0.9080365167210633, "grad_norm": 0.7109375, "learning_rate": 1.1471575540509624e-05, "loss": 0.2919, "step": 11998 }, { "epoch": 0.9081121990445107, "grad_norm": 0.796875, "learning_rate": 1.1470397417268082e-05, "loss": 0.31, "step": 11999 }, { "epoch": 0.908187881367958, "grad_norm": 0.8203125, "learning_rate": 1.1469219273166432e-05, "loss": 0.3588, "step": 12000 }, { "epoch": 0.9082635636914054, "grad_norm": 0.78125, "learning_rate": 1.1468041108221385e-05, "loss": 0.3423, "step": 12001 }, { "epoch": 0.9083392460148526, "grad_norm": 0.76171875, "learning_rate": 1.1466862922449655e-05, "loss": 0.3165, "step": 12002 }, { "epoch": 0.9084149283383, "grad_norm": 0.828125, "learning_rate": 1.1465684715867958e-05, "loss": 0.348, "step": 12003 }, { "epoch": 0.9084906106617473, "grad_norm": 0.69921875, "learning_rate": 1.1464506488493008e-05, "loss": 0.2864, "step": 12004 }, { "epoch": 0.9085662929851946, "grad_norm": 0.71484375, "learning_rate": 1.1463328240341519e-05, "loss": 0.2852, "step": 12005 }, { "epoch": 0.908641975308642, "grad_norm": 0.765625, "learning_rate": 1.1462149971430207e-05, "loss": 0.3133, "step": 12006 }, { "epoch": 0.9087176576320893, "grad_norm": 0.74609375, "learning_rate": 1.1460971681775789e-05, "loss": 0.295, "step": 12007 }, { "epoch": 0.9087933399555367, "grad_norm": 0.74609375, "learning_rate": 1.1459793371394983e-05, "loss": 0.2945, "step": 12008 }, { "epoch": 0.9088690222789839, "grad_norm": 0.71875, "learning_rate": 1.1458615040304501e-05, "loss": 0.2928, "step": 12009 }, { "epoch": 0.9089447046024313, "grad_norm": 0.69921875, "learning_rate": 1.1457436688521065e-05, "loss": 0.2634, "step": 12010 }, { "epoch": 0.9090203869258786, "grad_norm": 0.73046875, "learning_rate": 1.1456258316061382e-05, "loss": 0.2882, "step": 12011 }, { "epoch": 0.909096069249326, "grad_norm": 0.72265625, "learning_rate": 1.145507992294218e-05, "loss": 0.2943, "step": 12012 }, { "epoch": 0.9091717515727733, "grad_norm": 0.73046875, "learning_rate": 1.1453901509180175e-05, "loss": 0.2826, "step": 12013 }, { "epoch": 0.9092474338962206, "grad_norm": 0.734375, "learning_rate": 1.1452723074792077e-05, "loss": 0.316, "step": 12014 }, { "epoch": 0.909323116219668, "grad_norm": 0.7890625, "learning_rate": 1.145154461979461e-05, "loss": 0.326, "step": 12015 }, { "epoch": 0.9093987985431152, "grad_norm": 0.76171875, "learning_rate": 1.1450366144204493e-05, "loss": 0.3285, "step": 12016 }, { "epoch": 0.9094744808665626, "grad_norm": 0.73046875, "learning_rate": 1.1449187648038445e-05, "loss": 0.3037, "step": 12017 }, { "epoch": 0.9095501631900099, "grad_norm": 0.71484375, "learning_rate": 1.144800913131318e-05, "loss": 0.3065, "step": 12018 }, { "epoch": 0.9096258455134573, "grad_norm": 0.83984375, "learning_rate": 1.1446830594045421e-05, "loss": 0.3898, "step": 12019 }, { "epoch": 0.9097015278369046, "grad_norm": 0.73828125, "learning_rate": 1.1445652036251889e-05, "loss": 0.2994, "step": 12020 }, { "epoch": 0.909777210160352, "grad_norm": 0.66796875, "learning_rate": 1.14444734579493e-05, "loss": 0.2567, "step": 12021 }, { "epoch": 0.9098528924837993, "grad_norm": 0.7734375, "learning_rate": 1.144329485915438e-05, "loss": 0.3287, "step": 12022 }, { "epoch": 0.9099285748072465, "grad_norm": 0.71875, "learning_rate": 1.1442116239883841e-05, "loss": 0.3116, "step": 12023 }, { "epoch": 0.9100042571306939, "grad_norm": 0.734375, "learning_rate": 1.144093760015441e-05, "loss": 0.2813, "step": 12024 }, { "epoch": 0.9100799394541412, "grad_norm": 0.70703125, "learning_rate": 1.1439758939982808e-05, "loss": 0.3009, "step": 12025 }, { "epoch": 0.9101556217775886, "grad_norm": 0.7578125, "learning_rate": 1.1438580259385752e-05, "loss": 0.301, "step": 12026 }, { "epoch": 0.9102313041010359, "grad_norm": 0.81640625, "learning_rate": 1.143740155837997e-05, "loss": 0.3795, "step": 12027 }, { "epoch": 0.9103069864244833, "grad_norm": 0.81640625, "learning_rate": 1.1436222836982179e-05, "loss": 0.3333, "step": 12028 }, { "epoch": 0.9103826687479306, "grad_norm": 1.0, "learning_rate": 1.1435044095209099e-05, "loss": 0.3306, "step": 12029 }, { "epoch": 0.9104583510713778, "grad_norm": 0.87109375, "learning_rate": 1.1433865333077458e-05, "loss": 0.3205, "step": 12030 }, { "epoch": 0.9105340333948252, "grad_norm": 0.7578125, "learning_rate": 1.1432686550603979e-05, "loss": 0.3474, "step": 12031 }, { "epoch": 0.9106097157182725, "grad_norm": 0.6953125, "learning_rate": 1.143150774780538e-05, "loss": 0.2746, "step": 12032 }, { "epoch": 0.9106853980417199, "grad_norm": 0.8046875, "learning_rate": 1.1430328924698389e-05, "loss": 0.3403, "step": 12033 }, { "epoch": 0.9107610803651672, "grad_norm": 0.875, "learning_rate": 1.1429150081299725e-05, "loss": 0.3828, "step": 12034 }, { "epoch": 0.9108367626886146, "grad_norm": 0.73046875, "learning_rate": 1.1427971217626116e-05, "loss": 0.2983, "step": 12035 }, { "epoch": 0.9109124450120619, "grad_norm": 0.7421875, "learning_rate": 1.1426792333694285e-05, "loss": 0.3008, "step": 12036 }, { "epoch": 0.9109881273355092, "grad_norm": 0.75390625, "learning_rate": 1.1425613429520956e-05, "loss": 0.3038, "step": 12037 }, { "epoch": 0.9110638096589565, "grad_norm": 0.71484375, "learning_rate": 1.1424434505122851e-05, "loss": 0.2863, "step": 12038 }, { "epoch": 0.9111394919824038, "grad_norm": 0.765625, "learning_rate": 1.1423255560516704e-05, "loss": 0.3239, "step": 12039 }, { "epoch": 0.9112151743058512, "grad_norm": 0.72265625, "learning_rate": 1.1422076595719232e-05, "loss": 0.2921, "step": 12040 }, { "epoch": 0.9112908566292985, "grad_norm": 0.76171875, "learning_rate": 1.1420897610747163e-05, "loss": 0.3266, "step": 12041 }, { "epoch": 0.9113665389527459, "grad_norm": 0.72265625, "learning_rate": 1.1419718605617223e-05, "loss": 0.2995, "step": 12042 }, { "epoch": 0.9114422212761932, "grad_norm": 0.71484375, "learning_rate": 1.141853958034614e-05, "loss": 0.2909, "step": 12043 }, { "epoch": 0.9115179035996405, "grad_norm": 0.7109375, "learning_rate": 1.1417360534950635e-05, "loss": 0.2972, "step": 12044 }, { "epoch": 0.9115935859230878, "grad_norm": 0.8046875, "learning_rate": 1.1416181469447442e-05, "loss": 0.3827, "step": 12045 }, { "epoch": 0.9116692682465352, "grad_norm": 0.7265625, "learning_rate": 1.1415002383853282e-05, "loss": 0.2903, "step": 12046 }, { "epoch": 0.9117449505699825, "grad_norm": 0.7890625, "learning_rate": 1.1413823278184885e-05, "loss": 0.3538, "step": 12047 }, { "epoch": 0.9118206328934298, "grad_norm": 0.73828125, "learning_rate": 1.141264415245898e-05, "loss": 0.2779, "step": 12048 }, { "epoch": 0.9118963152168772, "grad_norm": 0.734375, "learning_rate": 1.1411465006692292e-05, "loss": 0.2967, "step": 12049 }, { "epoch": 0.9119719975403245, "grad_norm": 0.7265625, "learning_rate": 1.1410285840901554e-05, "loss": 0.2925, "step": 12050 }, { "epoch": 0.9120476798637718, "grad_norm": 0.71484375, "learning_rate": 1.140910665510349e-05, "loss": 0.2832, "step": 12051 }, { "epoch": 0.9121233621872191, "grad_norm": 0.7890625, "learning_rate": 1.1407927449314829e-05, "loss": 0.3465, "step": 12052 }, { "epoch": 0.9121990445106665, "grad_norm": 0.734375, "learning_rate": 1.14067482235523e-05, "loss": 0.2889, "step": 12053 }, { "epoch": 0.9122747268341138, "grad_norm": 0.71484375, "learning_rate": 1.1405568977832633e-05, "loss": 0.2914, "step": 12054 }, { "epoch": 0.9123504091575612, "grad_norm": 0.72265625, "learning_rate": 1.1404389712172561e-05, "loss": 0.3028, "step": 12055 }, { "epoch": 0.9124260914810085, "grad_norm": 0.73828125, "learning_rate": 1.1403210426588808e-05, "loss": 0.3099, "step": 12056 }, { "epoch": 0.9125017738044559, "grad_norm": 0.66796875, "learning_rate": 1.140203112109811e-05, "loss": 0.2388, "step": 12057 }, { "epoch": 0.9125774561279031, "grad_norm": 0.83203125, "learning_rate": 1.1400851795717193e-05, "loss": 0.3804, "step": 12058 }, { "epoch": 0.9126531384513504, "grad_norm": 0.7890625, "learning_rate": 1.1399672450462791e-05, "loss": 0.3527, "step": 12059 }, { "epoch": 0.9127288207747978, "grad_norm": 0.8046875, "learning_rate": 1.1398493085351633e-05, "loss": 0.3614, "step": 12060 }, { "epoch": 0.9128045030982451, "grad_norm": 0.765625, "learning_rate": 1.1397313700400447e-05, "loss": 0.306, "step": 12061 }, { "epoch": 0.9128801854216925, "grad_norm": 0.7890625, "learning_rate": 1.1396134295625971e-05, "loss": 0.336, "step": 12062 }, { "epoch": 0.9129558677451398, "grad_norm": 0.7109375, "learning_rate": 1.1394954871044935e-05, "loss": 0.3044, "step": 12063 }, { "epoch": 0.9130315500685872, "grad_norm": 0.73046875, "learning_rate": 1.1393775426674068e-05, "loss": 0.3007, "step": 12064 }, { "epoch": 0.9131072323920344, "grad_norm": 0.671875, "learning_rate": 1.1392595962530106e-05, "loss": 0.2425, "step": 12065 }, { "epoch": 0.9131829147154817, "grad_norm": 0.7265625, "learning_rate": 1.1391416478629781e-05, "loss": 0.3026, "step": 12066 }, { "epoch": 0.9132585970389291, "grad_norm": 0.76953125, "learning_rate": 1.1390236974989824e-05, "loss": 0.3166, "step": 12067 }, { "epoch": 0.9133342793623764, "grad_norm": 0.74609375, "learning_rate": 1.1389057451626971e-05, "loss": 0.2841, "step": 12068 }, { "epoch": 0.9134099616858238, "grad_norm": 0.765625, "learning_rate": 1.1387877908557956e-05, "loss": 0.2966, "step": 12069 }, { "epoch": 0.9134856440092711, "grad_norm": 0.7421875, "learning_rate": 1.1386698345799507e-05, "loss": 0.2943, "step": 12070 }, { "epoch": 0.9135613263327184, "grad_norm": 0.68359375, "learning_rate": 1.1385518763368366e-05, "loss": 0.251, "step": 12071 }, { "epoch": 0.9136370086561657, "grad_norm": 0.7421875, "learning_rate": 1.1384339161281264e-05, "loss": 0.2834, "step": 12072 }, { "epoch": 0.913712690979613, "grad_norm": 0.76953125, "learning_rate": 1.1383159539554933e-05, "loss": 0.3191, "step": 12073 }, { "epoch": 0.9137883733030604, "grad_norm": 0.76953125, "learning_rate": 1.1381979898206112e-05, "loss": 0.3259, "step": 12074 }, { "epoch": 0.9138640556265077, "grad_norm": 0.66796875, "learning_rate": 1.1380800237251533e-05, "loss": 0.2464, "step": 12075 }, { "epoch": 0.9139397379499551, "grad_norm": 0.7890625, "learning_rate": 1.1379620556707934e-05, "loss": 0.3303, "step": 12076 }, { "epoch": 0.9140154202734024, "grad_norm": 0.75390625, "learning_rate": 1.1378440856592048e-05, "loss": 0.334, "step": 12077 }, { "epoch": 0.9140911025968497, "grad_norm": 0.7734375, "learning_rate": 1.1377261136920618e-05, "loss": 0.3569, "step": 12078 }, { "epoch": 0.914166784920297, "grad_norm": 0.765625, "learning_rate": 1.137608139771037e-05, "loss": 0.3428, "step": 12079 }, { "epoch": 0.9142424672437444, "grad_norm": 0.7421875, "learning_rate": 1.1374901638978048e-05, "loss": 0.3108, "step": 12080 }, { "epoch": 0.9143181495671917, "grad_norm": 0.77734375, "learning_rate": 1.1373721860740388e-05, "loss": 0.3391, "step": 12081 }, { "epoch": 0.914393831890639, "grad_norm": 0.796875, "learning_rate": 1.1372542063014123e-05, "loss": 0.3711, "step": 12082 }, { "epoch": 0.9144695142140864, "grad_norm": 0.75390625, "learning_rate": 1.1371362245815995e-05, "loss": 0.3024, "step": 12083 }, { "epoch": 0.9145451965375337, "grad_norm": 0.640625, "learning_rate": 1.1370182409162742e-05, "loss": 0.2202, "step": 12084 }, { "epoch": 0.914620878860981, "grad_norm": 0.7890625, "learning_rate": 1.1369002553071098e-05, "loss": 0.3466, "step": 12085 }, { "epoch": 0.9146965611844283, "grad_norm": 0.74609375, "learning_rate": 1.1367822677557804e-05, "loss": 0.2995, "step": 12086 }, { "epoch": 0.9147722435078757, "grad_norm": 0.74609375, "learning_rate": 1.13666427826396e-05, "loss": 0.2954, "step": 12087 }, { "epoch": 0.914847925831323, "grad_norm": 0.7890625, "learning_rate": 1.136546286833322e-05, "loss": 0.3356, "step": 12088 }, { "epoch": 0.9149236081547704, "grad_norm": 0.7890625, "learning_rate": 1.1364282934655407e-05, "loss": 0.3407, "step": 12089 }, { "epoch": 0.9149992904782177, "grad_norm": 0.703125, "learning_rate": 1.13631029816229e-05, "loss": 0.2782, "step": 12090 }, { "epoch": 0.915074972801665, "grad_norm": 0.76953125, "learning_rate": 1.1361923009252436e-05, "loss": 0.3138, "step": 12091 }, { "epoch": 0.9151506551251123, "grad_norm": 0.76953125, "learning_rate": 1.136074301756076e-05, "loss": 0.2988, "step": 12092 }, { "epoch": 0.9152263374485596, "grad_norm": 0.76953125, "learning_rate": 1.135956300656461e-05, "loss": 0.2754, "step": 12093 }, { "epoch": 0.915302019772007, "grad_norm": 0.79296875, "learning_rate": 1.1358382976280719e-05, "loss": 0.3527, "step": 12094 }, { "epoch": 0.9153777020954543, "grad_norm": 0.78125, "learning_rate": 1.1357202926725841e-05, "loss": 0.2975, "step": 12095 }, { "epoch": 0.9154533844189017, "grad_norm": 0.7890625, "learning_rate": 1.1356022857916709e-05, "loss": 0.3229, "step": 12096 }, { "epoch": 0.915529066742349, "grad_norm": 0.734375, "learning_rate": 1.1354842769870065e-05, "loss": 0.285, "step": 12097 }, { "epoch": 0.9156047490657964, "grad_norm": 0.75390625, "learning_rate": 1.1353662662602652e-05, "loss": 0.3056, "step": 12098 }, { "epoch": 0.9156804313892436, "grad_norm": 0.75, "learning_rate": 1.1352482536131209e-05, "loss": 0.3102, "step": 12099 }, { "epoch": 0.915756113712691, "grad_norm": 0.78515625, "learning_rate": 1.1351302390472481e-05, "loss": 0.3301, "step": 12100 }, { "epoch": 0.9158317960361383, "grad_norm": 0.78515625, "learning_rate": 1.1350122225643213e-05, "loss": 0.3321, "step": 12101 }, { "epoch": 0.9159074783595856, "grad_norm": 1.7265625, "learning_rate": 1.134894204166014e-05, "loss": 0.4015, "step": 12102 }, { "epoch": 0.915983160683033, "grad_norm": 0.75390625, "learning_rate": 1.134776183854001e-05, "loss": 0.315, "step": 12103 }, { "epoch": 0.9160588430064803, "grad_norm": 0.78515625, "learning_rate": 1.1346581616299565e-05, "loss": 0.3383, "step": 12104 }, { "epoch": 0.9161345253299277, "grad_norm": 0.7734375, "learning_rate": 1.1345401374955554e-05, "loss": 0.3249, "step": 12105 }, { "epoch": 0.9162102076533749, "grad_norm": 0.77734375, "learning_rate": 1.1344221114524712e-05, "loss": 0.3333, "step": 12106 }, { "epoch": 0.9162858899768223, "grad_norm": 0.91796875, "learning_rate": 1.1343040835023786e-05, "loss": 0.2849, "step": 12107 }, { "epoch": 0.9163615723002696, "grad_norm": 0.70703125, "learning_rate": 1.1341860536469523e-05, "loss": 0.2888, "step": 12108 }, { "epoch": 0.916437254623717, "grad_norm": 0.734375, "learning_rate": 1.1340680218878662e-05, "loss": 0.2746, "step": 12109 }, { "epoch": 0.9165129369471643, "grad_norm": 0.78515625, "learning_rate": 1.1339499882267955e-05, "loss": 0.3639, "step": 12110 }, { "epoch": 0.9165886192706116, "grad_norm": 0.76953125, "learning_rate": 1.1338319526654143e-05, "loss": 0.3315, "step": 12111 }, { "epoch": 0.916664301594059, "grad_norm": 0.76953125, "learning_rate": 1.1337139152053971e-05, "loss": 0.344, "step": 12112 }, { "epoch": 0.9167399839175062, "grad_norm": 0.75, "learning_rate": 1.1335958758484185e-05, "loss": 0.3191, "step": 12113 }, { "epoch": 0.9168156662409536, "grad_norm": 0.75390625, "learning_rate": 1.1334778345961532e-05, "loss": 0.3161, "step": 12114 }, { "epoch": 0.9168913485644009, "grad_norm": 1.0078125, "learning_rate": 1.1333597914502758e-05, "loss": 0.3512, "step": 12115 }, { "epoch": 0.9169670308878483, "grad_norm": 0.80859375, "learning_rate": 1.133241746412461e-05, "loss": 0.3511, "step": 12116 }, { "epoch": 0.9170427132112956, "grad_norm": 0.82421875, "learning_rate": 1.1331236994843831e-05, "loss": 0.3412, "step": 12117 }, { "epoch": 0.917118395534743, "grad_norm": 0.76171875, "learning_rate": 1.1330056506677174e-05, "loss": 0.3216, "step": 12118 }, { "epoch": 0.9171940778581903, "grad_norm": 0.78125, "learning_rate": 1.132887599964138e-05, "loss": 0.3487, "step": 12119 }, { "epoch": 0.9172697601816375, "grad_norm": 0.8046875, "learning_rate": 1.1327695473753201e-05, "loss": 0.3372, "step": 12120 }, { "epoch": 0.9173454425050849, "grad_norm": 0.72265625, "learning_rate": 1.1326514929029385e-05, "loss": 0.2848, "step": 12121 }, { "epoch": 0.9174211248285322, "grad_norm": 0.7265625, "learning_rate": 1.1325334365486676e-05, "loss": 0.3333, "step": 12122 }, { "epoch": 0.9174968071519796, "grad_norm": 0.65234375, "learning_rate": 1.1324153783141825e-05, "loss": 0.2714, "step": 12123 }, { "epoch": 0.9175724894754269, "grad_norm": 0.7578125, "learning_rate": 1.1322973182011579e-05, "loss": 0.309, "step": 12124 }, { "epoch": 0.9176481717988743, "grad_norm": 0.7734375, "learning_rate": 1.1321792562112692e-05, "loss": 0.3258, "step": 12125 }, { "epoch": 0.9177238541223216, "grad_norm": 0.76171875, "learning_rate": 1.1320611923461906e-05, "loss": 0.3371, "step": 12126 }, { "epoch": 0.9177995364457688, "grad_norm": 0.79296875, "learning_rate": 1.1319431266075975e-05, "loss": 0.3405, "step": 12127 }, { "epoch": 0.9178752187692162, "grad_norm": 0.76171875, "learning_rate": 1.1318250589971646e-05, "loss": 0.3406, "step": 12128 }, { "epoch": 0.9179509010926635, "grad_norm": 0.734375, "learning_rate": 1.1317069895165671e-05, "loss": 0.3295, "step": 12129 }, { "epoch": 0.9180265834161109, "grad_norm": 0.66796875, "learning_rate": 1.1315889181674799e-05, "loss": 0.2483, "step": 12130 }, { "epoch": 0.9181022657395582, "grad_norm": 0.72265625, "learning_rate": 1.1314708449515783e-05, "loss": 0.2733, "step": 12131 }, { "epoch": 0.9181779480630056, "grad_norm": 0.7734375, "learning_rate": 1.1313527698705367e-05, "loss": 0.3395, "step": 12132 }, { "epoch": 0.9182536303864529, "grad_norm": 0.78125, "learning_rate": 1.131234692926031e-05, "loss": 0.3302, "step": 12133 }, { "epoch": 0.9183293127099001, "grad_norm": 0.76953125, "learning_rate": 1.1311166141197361e-05, "loss": 0.3346, "step": 12134 }, { "epoch": 0.9184049950333475, "grad_norm": 0.7421875, "learning_rate": 1.1309985334533266e-05, "loss": 0.3232, "step": 12135 }, { "epoch": 0.9184806773567948, "grad_norm": 0.69921875, "learning_rate": 1.1308804509284784e-05, "loss": 0.249, "step": 12136 }, { "epoch": 0.9185563596802422, "grad_norm": 0.73828125, "learning_rate": 1.1307623665468663e-05, "loss": 0.3157, "step": 12137 }, { "epoch": 0.9186320420036895, "grad_norm": 0.73046875, "learning_rate": 1.1306442803101655e-05, "loss": 0.3106, "step": 12138 }, { "epoch": 0.9187077243271369, "grad_norm": 0.828125, "learning_rate": 1.130526192220052e-05, "loss": 0.351, "step": 12139 }, { "epoch": 0.9187834066505842, "grad_norm": 0.75, "learning_rate": 1.1304081022781998e-05, "loss": 0.3099, "step": 12140 }, { "epoch": 0.9188590889740315, "grad_norm": 0.78515625, "learning_rate": 1.130290010486285e-05, "loss": 0.3377, "step": 12141 }, { "epoch": 0.9189347712974788, "grad_norm": 0.94140625, "learning_rate": 1.1301719168459828e-05, "loss": 0.2887, "step": 12142 }, { "epoch": 0.9190104536209261, "grad_norm": 0.703125, "learning_rate": 1.1300538213589692e-05, "loss": 0.2874, "step": 12143 }, { "epoch": 0.9190861359443735, "grad_norm": 0.7421875, "learning_rate": 1.1299357240269182e-05, "loss": 0.3217, "step": 12144 }, { "epoch": 0.9191618182678208, "grad_norm": 0.76953125, "learning_rate": 1.1298176248515063e-05, "loss": 0.3407, "step": 12145 }, { "epoch": 0.9192375005912682, "grad_norm": 0.77734375, "learning_rate": 1.1296995238344084e-05, "loss": 0.3418, "step": 12146 }, { "epoch": 0.9193131829147155, "grad_norm": 0.7890625, "learning_rate": 1.1295814209773001e-05, "loss": 0.3513, "step": 12147 }, { "epoch": 0.9193888652381628, "grad_norm": 0.73828125, "learning_rate": 1.1294633162818576e-05, "loss": 0.3095, "step": 12148 }, { "epoch": 0.9194645475616101, "grad_norm": 0.828125, "learning_rate": 1.1293452097497554e-05, "loss": 0.3309, "step": 12149 }, { "epoch": 0.9195402298850575, "grad_norm": 1.03125, "learning_rate": 1.1292271013826692e-05, "loss": 0.3666, "step": 12150 }, { "epoch": 0.9196159122085048, "grad_norm": 0.7734375, "learning_rate": 1.129108991182275e-05, "loss": 0.3309, "step": 12151 }, { "epoch": 0.9196915945319522, "grad_norm": 0.78515625, "learning_rate": 1.1289908791502484e-05, "loss": 0.3364, "step": 12152 }, { "epoch": 0.9197672768553995, "grad_norm": 0.75, "learning_rate": 1.1288727652882648e-05, "loss": 0.3229, "step": 12153 }, { "epoch": 0.9198429591788468, "grad_norm": 0.76171875, "learning_rate": 1.1287546495979997e-05, "loss": 0.2982, "step": 12154 }, { "epoch": 0.9199186415022941, "grad_norm": 0.71484375, "learning_rate": 1.128636532081129e-05, "loss": 0.296, "step": 12155 }, { "epoch": 0.9199943238257414, "grad_norm": 0.81640625, "learning_rate": 1.128518412739328e-05, "loss": 0.3298, "step": 12156 }, { "epoch": 0.9200700061491888, "grad_norm": 0.76171875, "learning_rate": 1.1284002915742733e-05, "loss": 0.3263, "step": 12157 }, { "epoch": 0.9201456884726361, "grad_norm": 0.72265625, "learning_rate": 1.12828216858764e-05, "loss": 0.269, "step": 12158 }, { "epoch": 0.9202213707960835, "grad_norm": 0.71875, "learning_rate": 1.1281640437811039e-05, "loss": 0.2924, "step": 12159 }, { "epoch": 0.9202970531195308, "grad_norm": 0.6796875, "learning_rate": 1.1280459171563409e-05, "loss": 0.268, "step": 12160 }, { "epoch": 0.9203727354429782, "grad_norm": 0.76953125, "learning_rate": 1.1279277887150268e-05, "loss": 0.3317, "step": 12161 }, { "epoch": 0.9204484177664254, "grad_norm": 0.72265625, "learning_rate": 1.1278096584588376e-05, "loss": 0.2939, "step": 12162 }, { "epoch": 0.9205241000898727, "grad_norm": 0.76953125, "learning_rate": 1.127691526389449e-05, "loss": 0.341, "step": 12163 }, { "epoch": 0.9205997824133201, "grad_norm": 0.7890625, "learning_rate": 1.127573392508537e-05, "loss": 0.3376, "step": 12164 }, { "epoch": 0.9206754647367674, "grad_norm": 0.8359375, "learning_rate": 1.1274552568177772e-05, "loss": 0.3271, "step": 12165 }, { "epoch": 0.9207511470602148, "grad_norm": 0.77734375, "learning_rate": 1.1273371193188463e-05, "loss": 0.3069, "step": 12166 }, { "epoch": 0.9208268293836621, "grad_norm": 0.81640625, "learning_rate": 1.1272189800134197e-05, "loss": 0.354, "step": 12167 }, { "epoch": 0.9209025117071095, "grad_norm": 0.7734375, "learning_rate": 1.1271008389031734e-05, "loss": 0.3183, "step": 12168 }, { "epoch": 0.9209781940305567, "grad_norm": 0.7734375, "learning_rate": 1.1269826959897839e-05, "loss": 0.3251, "step": 12169 }, { "epoch": 0.921053876354004, "grad_norm": 0.75390625, "learning_rate": 1.1268645512749267e-05, "loss": 0.2981, "step": 12170 }, { "epoch": 0.9211295586774514, "grad_norm": 0.7890625, "learning_rate": 1.1267464047602782e-05, "loss": 0.3242, "step": 12171 }, { "epoch": 0.9212052410008987, "grad_norm": 0.72265625, "learning_rate": 1.1266282564475146e-05, "loss": 0.28, "step": 12172 }, { "epoch": 0.9212809233243461, "grad_norm": 0.76953125, "learning_rate": 1.1265101063383116e-05, "loss": 0.3355, "step": 12173 }, { "epoch": 0.9213566056477934, "grad_norm": 0.73828125, "learning_rate": 1.1263919544343458e-05, "loss": 0.3149, "step": 12174 }, { "epoch": 0.9214322879712408, "grad_norm": 0.75, "learning_rate": 1.1262738007372931e-05, "loss": 0.3312, "step": 12175 }, { "epoch": 0.921507970294688, "grad_norm": 0.74609375, "learning_rate": 1.12615564524883e-05, "loss": 0.3235, "step": 12176 }, { "epoch": 0.9215836526181354, "grad_norm": 0.7890625, "learning_rate": 1.1260374879706325e-05, "loss": 0.3395, "step": 12177 }, { "epoch": 0.9216593349415827, "grad_norm": 0.7578125, "learning_rate": 1.125919328904377e-05, "loss": 0.2909, "step": 12178 }, { "epoch": 0.92173501726503, "grad_norm": 0.76953125, "learning_rate": 1.1258011680517396e-05, "loss": 0.3228, "step": 12179 }, { "epoch": 0.9218106995884774, "grad_norm": 0.79296875, "learning_rate": 1.125683005414397e-05, "loss": 0.3465, "step": 12180 }, { "epoch": 0.9218863819119247, "grad_norm": 0.76171875, "learning_rate": 1.1255648409940254e-05, "loss": 0.3248, "step": 12181 }, { "epoch": 0.9219620642353721, "grad_norm": 0.77734375, "learning_rate": 1.1254466747923005e-05, "loss": 0.3449, "step": 12182 }, { "epoch": 0.9220377465588193, "grad_norm": 0.7421875, "learning_rate": 1.1253285068108996e-05, "loss": 0.3238, "step": 12183 }, { "epoch": 0.9221134288822667, "grad_norm": 0.72265625, "learning_rate": 1.1252103370514986e-05, "loss": 0.3054, "step": 12184 }, { "epoch": 0.922189111205714, "grad_norm": 0.78125, "learning_rate": 1.1250921655157743e-05, "loss": 0.3622, "step": 12185 }, { "epoch": 0.9222647935291614, "grad_norm": 0.7265625, "learning_rate": 1.1249739922054029e-05, "loss": 0.3161, "step": 12186 }, { "epoch": 0.9223404758526087, "grad_norm": 1.0546875, "learning_rate": 1.124855817122061e-05, "loss": 0.3907, "step": 12187 }, { "epoch": 0.922416158176056, "grad_norm": 0.671875, "learning_rate": 1.1247376402674248e-05, "loss": 0.2559, "step": 12188 }, { "epoch": 0.9224918404995033, "grad_norm": 0.6875, "learning_rate": 1.1246194616431713e-05, "loss": 0.2654, "step": 12189 }, { "epoch": 0.9225675228229506, "grad_norm": 0.8046875, "learning_rate": 1.1245012812509773e-05, "loss": 0.3583, "step": 12190 }, { "epoch": 0.922643205146398, "grad_norm": 0.73828125, "learning_rate": 1.1243830990925183e-05, "loss": 0.3197, "step": 12191 }, { "epoch": 0.9227188874698453, "grad_norm": 0.78125, "learning_rate": 1.1242649151694718e-05, "loss": 0.3355, "step": 12192 }, { "epoch": 0.9227945697932927, "grad_norm": 0.76953125, "learning_rate": 1.1241467294835144e-05, "loss": 0.321, "step": 12193 }, { "epoch": 0.92287025211674, "grad_norm": 0.7734375, "learning_rate": 1.1240285420363221e-05, "loss": 0.3181, "step": 12194 }, { "epoch": 0.9229459344401874, "grad_norm": 0.90234375, "learning_rate": 1.123910352829573e-05, "loss": 0.3425, "step": 12195 }, { "epoch": 0.9230216167636346, "grad_norm": 1.015625, "learning_rate": 1.1237921618649423e-05, "loss": 0.3654, "step": 12196 }, { "epoch": 0.9230972990870819, "grad_norm": 0.765625, "learning_rate": 1.1236739691441073e-05, "loss": 0.335, "step": 12197 }, { "epoch": 0.9231729814105293, "grad_norm": 0.7109375, "learning_rate": 1.1235557746687448e-05, "loss": 0.283, "step": 12198 }, { "epoch": 0.9232486637339766, "grad_norm": 0.765625, "learning_rate": 1.1234375784405322e-05, "loss": 0.317, "step": 12199 }, { "epoch": 0.923324346057424, "grad_norm": 0.74609375, "learning_rate": 1.123319380461145e-05, "loss": 0.3326, "step": 12200 }, { "epoch": 0.9234000283808713, "grad_norm": 0.75, "learning_rate": 1.1232011807322611e-05, "loss": 0.3328, "step": 12201 }, { "epoch": 0.9234757107043187, "grad_norm": 0.70703125, "learning_rate": 1.1230829792555571e-05, "loss": 0.2753, "step": 12202 }, { "epoch": 0.9235513930277659, "grad_norm": 0.75390625, "learning_rate": 1.1229647760327096e-05, "loss": 0.3079, "step": 12203 }, { "epoch": 0.9236270753512132, "grad_norm": 0.7109375, "learning_rate": 1.122846571065396e-05, "loss": 0.2961, "step": 12204 }, { "epoch": 0.9237027576746606, "grad_norm": 0.765625, "learning_rate": 1.1227283643552928e-05, "loss": 0.3439, "step": 12205 }, { "epoch": 0.9237784399981079, "grad_norm": 1.046875, "learning_rate": 1.122610155904077e-05, "loss": 0.3687, "step": 12206 }, { "epoch": 0.9238541223215553, "grad_norm": 0.69140625, "learning_rate": 1.122491945713426e-05, "loss": 0.2651, "step": 12207 }, { "epoch": 0.9239298046450026, "grad_norm": 0.6953125, "learning_rate": 1.1223737337850165e-05, "loss": 0.2674, "step": 12208 }, { "epoch": 0.92400548696845, "grad_norm": 0.72265625, "learning_rate": 1.1222555201205253e-05, "loss": 0.2657, "step": 12209 }, { "epoch": 0.9240811692918972, "grad_norm": 0.7578125, "learning_rate": 1.1221373047216302e-05, "loss": 0.3114, "step": 12210 }, { "epoch": 0.9241568516153446, "grad_norm": 0.7109375, "learning_rate": 1.1220190875900076e-05, "loss": 0.2774, "step": 12211 }, { "epoch": 0.9242325339387919, "grad_norm": 0.73828125, "learning_rate": 1.1219008687273347e-05, "loss": 0.3158, "step": 12212 }, { "epoch": 0.9243082162622392, "grad_norm": 0.68359375, "learning_rate": 1.1217826481352892e-05, "loss": 0.2637, "step": 12213 }, { "epoch": 0.9243838985856866, "grad_norm": 0.70703125, "learning_rate": 1.1216644258155473e-05, "loss": 0.2947, "step": 12214 }, { "epoch": 0.9244595809091339, "grad_norm": 0.77734375, "learning_rate": 1.1215462017697871e-05, "loss": 0.328, "step": 12215 }, { "epoch": 0.9245352632325813, "grad_norm": 0.765625, "learning_rate": 1.1214279759996856e-05, "loss": 0.3417, "step": 12216 }, { "epoch": 0.9246109455560285, "grad_norm": 0.75, "learning_rate": 1.1213097485069195e-05, "loss": 0.3039, "step": 12217 }, { "epoch": 0.9246866278794759, "grad_norm": 0.7109375, "learning_rate": 1.1211915192931669e-05, "loss": 0.2925, "step": 12218 }, { "epoch": 0.9247623102029232, "grad_norm": 0.76171875, "learning_rate": 1.1210732883601044e-05, "loss": 0.3174, "step": 12219 }, { "epoch": 0.9248379925263706, "grad_norm": 0.765625, "learning_rate": 1.1209550557094093e-05, "loss": 0.3406, "step": 12220 }, { "epoch": 0.9249136748498179, "grad_norm": 0.83203125, "learning_rate": 1.1208368213427591e-05, "loss": 0.3049, "step": 12221 }, { "epoch": 0.9249893571732652, "grad_norm": 0.71484375, "learning_rate": 1.1207185852618317e-05, "loss": 0.2741, "step": 12222 }, { "epoch": 0.9250650394967126, "grad_norm": 0.74609375, "learning_rate": 1.1206003474683036e-05, "loss": 0.2905, "step": 12223 }, { "epoch": 0.9251407218201598, "grad_norm": 0.71875, "learning_rate": 1.1204821079638528e-05, "loss": 0.3016, "step": 12224 }, { "epoch": 0.9252164041436072, "grad_norm": 0.7578125, "learning_rate": 1.1203638667501566e-05, "loss": 0.3092, "step": 12225 }, { "epoch": 0.9252920864670545, "grad_norm": 0.6796875, "learning_rate": 1.1202456238288923e-05, "loss": 0.2461, "step": 12226 }, { "epoch": 0.9253677687905019, "grad_norm": 0.7265625, "learning_rate": 1.1201273792017378e-05, "loss": 0.2805, "step": 12227 }, { "epoch": 0.9254434511139492, "grad_norm": 0.75390625, "learning_rate": 1.12000913287037e-05, "loss": 0.3073, "step": 12228 }, { "epoch": 0.9255191334373966, "grad_norm": 0.77734375, "learning_rate": 1.1198908848364665e-05, "loss": 0.3379, "step": 12229 }, { "epoch": 0.9255948157608439, "grad_norm": 0.80078125, "learning_rate": 1.1197726351017052e-05, "loss": 0.2983, "step": 12230 }, { "epoch": 0.9256704980842911, "grad_norm": 0.63671875, "learning_rate": 1.1196543836677635e-05, "loss": 0.2183, "step": 12231 }, { "epoch": 0.9257461804077385, "grad_norm": 0.72265625, "learning_rate": 1.1195361305363193e-05, "loss": 0.2999, "step": 12232 }, { "epoch": 0.9258218627311858, "grad_norm": 0.75390625, "learning_rate": 1.1194178757090496e-05, "loss": 0.305, "step": 12233 }, { "epoch": 0.9258975450546332, "grad_norm": 0.69921875, "learning_rate": 1.1192996191876329e-05, "loss": 0.2846, "step": 12234 }, { "epoch": 0.9259732273780805, "grad_norm": 0.78515625, "learning_rate": 1.1191813609737459e-05, "loss": 0.3287, "step": 12235 }, { "epoch": 0.9260489097015279, "grad_norm": 0.69921875, "learning_rate": 1.119063101069067e-05, "loss": 0.2807, "step": 12236 }, { "epoch": 0.9261245920249752, "grad_norm": 0.94921875, "learning_rate": 1.1189448394752737e-05, "loss": 0.3308, "step": 12237 }, { "epoch": 0.9262002743484224, "grad_norm": 0.74609375, "learning_rate": 1.1188265761940436e-05, "loss": 0.2924, "step": 12238 }, { "epoch": 0.9262759566718698, "grad_norm": 0.80078125, "learning_rate": 1.1187083112270546e-05, "loss": 0.3567, "step": 12239 }, { "epoch": 0.9263516389953171, "grad_norm": 0.734375, "learning_rate": 1.1185900445759846e-05, "loss": 0.272, "step": 12240 }, { "epoch": 0.9264273213187645, "grad_norm": 0.7734375, "learning_rate": 1.1184717762425113e-05, "loss": 0.3151, "step": 12241 }, { "epoch": 0.9265030036422118, "grad_norm": 1.015625, "learning_rate": 1.1183535062283127e-05, "loss": 0.3467, "step": 12242 }, { "epoch": 0.9265786859656592, "grad_norm": 0.75390625, "learning_rate": 1.1182352345350662e-05, "loss": 0.3151, "step": 12243 }, { "epoch": 0.9266543682891065, "grad_norm": 0.76171875, "learning_rate": 1.11811696116445e-05, "loss": 0.3031, "step": 12244 }, { "epoch": 0.9267300506125538, "grad_norm": 0.84375, "learning_rate": 1.1179986861181423e-05, "loss": 0.3346, "step": 12245 }, { "epoch": 0.9268057329360011, "grad_norm": 0.78125, "learning_rate": 1.1178804093978205e-05, "loss": 0.3371, "step": 12246 }, { "epoch": 0.9268814152594484, "grad_norm": 0.796875, "learning_rate": 1.117762131005163e-05, "loss": 0.3349, "step": 12247 }, { "epoch": 0.9269570975828958, "grad_norm": 0.7578125, "learning_rate": 1.1176438509418475e-05, "loss": 0.329, "step": 12248 }, { "epoch": 0.9270327799063431, "grad_norm": 0.73828125, "learning_rate": 1.1175255692095518e-05, "loss": 0.3051, "step": 12249 }, { "epoch": 0.9271084622297905, "grad_norm": 0.75390625, "learning_rate": 1.1174072858099545e-05, "loss": 0.2868, "step": 12250 }, { "epoch": 0.9271841445532378, "grad_norm": 0.7890625, "learning_rate": 1.1172890007447336e-05, "loss": 0.2829, "step": 12251 }, { "epoch": 0.9272598268766851, "grad_norm": 0.76171875, "learning_rate": 1.1171707140155668e-05, "loss": 0.3327, "step": 12252 }, { "epoch": 0.9273355092001324, "grad_norm": 0.7109375, "learning_rate": 1.117052425624132e-05, "loss": 0.2862, "step": 12253 }, { "epoch": 0.9274111915235798, "grad_norm": 0.78125, "learning_rate": 1.116934135572108e-05, "loss": 0.3097, "step": 12254 }, { "epoch": 0.9274868738470271, "grad_norm": 0.765625, "learning_rate": 1.1168158438611727e-05, "loss": 0.3461, "step": 12255 }, { "epoch": 0.9275625561704745, "grad_norm": 0.80078125, "learning_rate": 1.116697550493004e-05, "loss": 0.3446, "step": 12256 }, { "epoch": 0.9276382384939218, "grad_norm": 0.78515625, "learning_rate": 1.1165792554692802e-05, "loss": 0.3471, "step": 12257 }, { "epoch": 0.9277139208173691, "grad_norm": 0.7421875, "learning_rate": 1.11646095879168e-05, "loss": 0.2857, "step": 12258 }, { "epoch": 0.9277896031408164, "grad_norm": 0.76171875, "learning_rate": 1.1163426604618808e-05, "loss": 0.3278, "step": 12259 }, { "epoch": 0.9278652854642637, "grad_norm": 0.74609375, "learning_rate": 1.1162243604815616e-05, "loss": 0.319, "step": 12260 }, { "epoch": 0.9279409677877111, "grad_norm": 0.73828125, "learning_rate": 1.1161060588524004e-05, "loss": 0.3229, "step": 12261 }, { "epoch": 0.9280166501111584, "grad_norm": 0.78125, "learning_rate": 1.1159877555760751e-05, "loss": 0.3325, "step": 12262 }, { "epoch": 0.9280923324346058, "grad_norm": 0.75390625, "learning_rate": 1.1158694506542649e-05, "loss": 0.2965, "step": 12263 }, { "epoch": 0.9281680147580531, "grad_norm": 0.79296875, "learning_rate": 1.1157511440886475e-05, "loss": 0.3254, "step": 12264 }, { "epoch": 0.9282436970815005, "grad_norm": 0.75390625, "learning_rate": 1.1156328358809016e-05, "loss": 0.3083, "step": 12265 }, { "epoch": 0.9283193794049477, "grad_norm": 0.9453125, "learning_rate": 1.1155145260327054e-05, "loss": 0.2921, "step": 12266 }, { "epoch": 0.928395061728395, "grad_norm": 0.828125, "learning_rate": 1.1153962145457376e-05, "loss": 0.3684, "step": 12267 }, { "epoch": 0.9284707440518424, "grad_norm": 0.7734375, "learning_rate": 1.1152779014216762e-05, "loss": 0.3141, "step": 12268 }, { "epoch": 0.9285464263752897, "grad_norm": 0.734375, "learning_rate": 1.1151595866622002e-05, "loss": 0.3146, "step": 12269 }, { "epoch": 0.9286221086987371, "grad_norm": 0.71875, "learning_rate": 1.1150412702689875e-05, "loss": 0.2816, "step": 12270 }, { "epoch": 0.9286977910221844, "grad_norm": 0.765625, "learning_rate": 1.1149229522437173e-05, "loss": 0.326, "step": 12271 }, { "epoch": 0.9287734733456318, "grad_norm": 0.76171875, "learning_rate": 1.1148046325880675e-05, "loss": 0.3016, "step": 12272 }, { "epoch": 0.928849155669079, "grad_norm": 0.734375, "learning_rate": 1.1146863113037172e-05, "loss": 0.3258, "step": 12273 }, { "epoch": 0.9289248379925263, "grad_norm": 0.7265625, "learning_rate": 1.1145679883923448e-05, "loss": 0.2705, "step": 12274 }, { "epoch": 0.9290005203159737, "grad_norm": 0.734375, "learning_rate": 1.1144496638556288e-05, "loss": 0.311, "step": 12275 }, { "epoch": 0.929076202639421, "grad_norm": 0.75390625, "learning_rate": 1.114331337695248e-05, "loss": 0.3276, "step": 12276 }, { "epoch": 0.9291518849628684, "grad_norm": 0.76953125, "learning_rate": 1.1142130099128808e-05, "loss": 0.3243, "step": 12277 }, { "epoch": 0.9292275672863157, "grad_norm": 0.8984375, "learning_rate": 1.114094680510206e-05, "loss": 0.2966, "step": 12278 }, { "epoch": 0.9293032496097631, "grad_norm": 0.69921875, "learning_rate": 1.1139763494889027e-05, "loss": 0.2746, "step": 12279 }, { "epoch": 0.9293789319332103, "grad_norm": 0.78125, "learning_rate": 1.113858016850649e-05, "loss": 0.3216, "step": 12280 }, { "epoch": 0.9294546142566577, "grad_norm": 0.73046875, "learning_rate": 1.1137396825971241e-05, "loss": 0.2968, "step": 12281 }, { "epoch": 0.929530296580105, "grad_norm": 0.72265625, "learning_rate": 1.1136213467300065e-05, "loss": 0.2762, "step": 12282 }, { "epoch": 0.9296059789035523, "grad_norm": 0.765625, "learning_rate": 1.1135030092509753e-05, "loss": 0.307, "step": 12283 }, { "epoch": 0.9296816612269997, "grad_norm": 0.77734375, "learning_rate": 1.113384670161709e-05, "loss": 0.3369, "step": 12284 }, { "epoch": 0.929757343550447, "grad_norm": 0.8359375, "learning_rate": 1.1132663294638864e-05, "loss": 0.3074, "step": 12285 }, { "epoch": 0.9298330258738944, "grad_norm": 0.73828125, "learning_rate": 1.1131479871591869e-05, "loss": 0.3059, "step": 12286 }, { "epoch": 0.9299087081973416, "grad_norm": 0.76953125, "learning_rate": 1.1130296432492888e-05, "loss": 0.3262, "step": 12287 }, { "epoch": 0.929984390520789, "grad_norm": 0.71484375, "learning_rate": 1.1129112977358713e-05, "loss": 0.2614, "step": 12288 }, { "epoch": 0.9300600728442363, "grad_norm": 0.765625, "learning_rate": 1.1127929506206134e-05, "loss": 0.2901, "step": 12289 }, { "epoch": 0.9301357551676837, "grad_norm": 0.78515625, "learning_rate": 1.112674601905194e-05, "loss": 0.3321, "step": 12290 }, { "epoch": 0.930211437491131, "grad_norm": 0.74609375, "learning_rate": 1.1125562515912915e-05, "loss": 0.3307, "step": 12291 }, { "epoch": 0.9302871198145783, "grad_norm": 0.82421875, "learning_rate": 1.1124378996805858e-05, "loss": 0.3516, "step": 12292 }, { "epoch": 0.9303628021380257, "grad_norm": 0.77734375, "learning_rate": 1.1123195461747558e-05, "loss": 0.3147, "step": 12293 }, { "epoch": 0.9304384844614729, "grad_norm": 0.75390625, "learning_rate": 1.1122011910754797e-05, "loss": 0.2973, "step": 12294 }, { "epoch": 0.9305141667849203, "grad_norm": 0.65234375, "learning_rate": 1.1120828343844377e-05, "loss": 0.2612, "step": 12295 }, { "epoch": 0.9305898491083676, "grad_norm": 0.7890625, "learning_rate": 1.1119644761033079e-05, "loss": 0.3467, "step": 12296 }, { "epoch": 0.930665531431815, "grad_norm": 1.0078125, "learning_rate": 1.11184611623377e-05, "loss": 0.347, "step": 12297 }, { "epoch": 0.9307412137552623, "grad_norm": 0.76171875, "learning_rate": 1.1117277547775035e-05, "loss": 0.333, "step": 12298 }, { "epoch": 0.9308168960787097, "grad_norm": 0.78515625, "learning_rate": 1.1116093917361865e-05, "loss": 0.3111, "step": 12299 }, { "epoch": 0.930892578402157, "grad_norm": 0.70703125, "learning_rate": 1.1114910271114987e-05, "loss": 0.2776, "step": 12300 }, { "epoch": 0.9309682607256042, "grad_norm": 0.73046875, "learning_rate": 1.1113726609051197e-05, "loss": 0.3169, "step": 12301 }, { "epoch": 0.9310439430490516, "grad_norm": 0.79296875, "learning_rate": 1.1112542931187284e-05, "loss": 0.3208, "step": 12302 }, { "epoch": 0.9311196253724989, "grad_norm": 0.72265625, "learning_rate": 1.1111359237540039e-05, "loss": 0.2713, "step": 12303 }, { "epoch": 0.9311953076959463, "grad_norm": 0.78125, "learning_rate": 1.1110175528126255e-05, "loss": 0.3105, "step": 12304 }, { "epoch": 0.9312709900193936, "grad_norm": 0.75390625, "learning_rate": 1.1108991802962724e-05, "loss": 0.3085, "step": 12305 }, { "epoch": 0.931346672342841, "grad_norm": 0.72265625, "learning_rate": 1.1107808062066245e-05, "loss": 0.2964, "step": 12306 }, { "epoch": 0.9314223546662883, "grad_norm": 0.7109375, "learning_rate": 1.1106624305453608e-05, "loss": 0.2895, "step": 12307 }, { "epoch": 0.9314980369897355, "grad_norm": 0.7265625, "learning_rate": 1.1105440533141605e-05, "loss": 0.2866, "step": 12308 }, { "epoch": 0.9315737193131829, "grad_norm": 0.75390625, "learning_rate": 1.1104256745147031e-05, "loss": 0.2799, "step": 12309 }, { "epoch": 0.9316494016366302, "grad_norm": 0.80078125, "learning_rate": 1.110307294148668e-05, "loss": 0.3637, "step": 12310 }, { "epoch": 0.9317250839600776, "grad_norm": 0.73828125, "learning_rate": 1.1101889122177345e-05, "loss": 0.2844, "step": 12311 }, { "epoch": 0.9318007662835249, "grad_norm": 0.734375, "learning_rate": 1.1100705287235824e-05, "loss": 0.2953, "step": 12312 }, { "epoch": 0.9318764486069723, "grad_norm": 0.765625, "learning_rate": 1.1099521436678908e-05, "loss": 0.3299, "step": 12313 }, { "epoch": 0.9319521309304195, "grad_norm": 0.796875, "learning_rate": 1.1098337570523397e-05, "loss": 0.3633, "step": 12314 }, { "epoch": 0.9320278132538669, "grad_norm": 0.75, "learning_rate": 1.1097153688786077e-05, "loss": 0.323, "step": 12315 }, { "epoch": 0.9321034955773142, "grad_norm": 0.7421875, "learning_rate": 1.1095969791483756e-05, "loss": 0.3271, "step": 12316 }, { "epoch": 0.9321791779007615, "grad_norm": 0.76953125, "learning_rate": 1.1094785878633219e-05, "loss": 0.3063, "step": 12317 }, { "epoch": 0.9322548602242089, "grad_norm": 0.78515625, "learning_rate": 1.1093601950251262e-05, "loss": 0.3507, "step": 12318 }, { "epoch": 0.9323305425476562, "grad_norm": 0.765625, "learning_rate": 1.109241800635469e-05, "loss": 0.3295, "step": 12319 }, { "epoch": 0.9324062248711036, "grad_norm": 0.7890625, "learning_rate": 1.1091234046960294e-05, "loss": 0.2953, "step": 12320 }, { "epoch": 0.9324819071945508, "grad_norm": 0.78125, "learning_rate": 1.1090050072084868e-05, "loss": 0.3307, "step": 12321 }, { "epoch": 0.9325575895179982, "grad_norm": 0.69140625, "learning_rate": 1.108886608174521e-05, "loss": 0.2655, "step": 12322 }, { "epoch": 0.9326332718414455, "grad_norm": 0.76953125, "learning_rate": 1.1087682075958121e-05, "loss": 0.3266, "step": 12323 }, { "epoch": 0.9327089541648929, "grad_norm": 0.734375, "learning_rate": 1.1086498054740393e-05, "loss": 0.3067, "step": 12324 }, { "epoch": 0.9327846364883402, "grad_norm": 0.953125, "learning_rate": 1.1085314018108826e-05, "loss": 0.3323, "step": 12325 }, { "epoch": 0.9328603188117875, "grad_norm": 0.765625, "learning_rate": 1.108412996608022e-05, "loss": 0.3088, "step": 12326 }, { "epoch": 0.9329360011352349, "grad_norm": 0.76171875, "learning_rate": 1.1082945898671367e-05, "loss": 0.316, "step": 12327 }, { "epoch": 0.9330116834586821, "grad_norm": 0.703125, "learning_rate": 1.108176181589907e-05, "loss": 0.2805, "step": 12328 }, { "epoch": 0.9330873657821295, "grad_norm": 0.7578125, "learning_rate": 1.1080577717780123e-05, "loss": 0.3047, "step": 12329 }, { "epoch": 0.9331630481055768, "grad_norm": 0.7890625, "learning_rate": 1.107939360433133e-05, "loss": 0.3704, "step": 12330 }, { "epoch": 0.9332387304290242, "grad_norm": 0.6953125, "learning_rate": 1.1078209475569484e-05, "loss": 0.2651, "step": 12331 }, { "epoch": 0.9333144127524715, "grad_norm": 0.7734375, "learning_rate": 1.1077025331511385e-05, "loss": 0.3126, "step": 12332 }, { "epoch": 0.9333900950759189, "grad_norm": 0.85546875, "learning_rate": 1.1075841172173836e-05, "loss": 0.3148, "step": 12333 }, { "epoch": 0.9334657773993662, "grad_norm": 0.7265625, "learning_rate": 1.1074656997573632e-05, "loss": 0.2878, "step": 12334 }, { "epoch": 0.9335414597228134, "grad_norm": 0.75, "learning_rate": 1.1073472807727574e-05, "loss": 0.3182, "step": 12335 }, { "epoch": 0.9336171420462608, "grad_norm": 0.734375, "learning_rate": 1.1072288602652466e-05, "loss": 0.3137, "step": 12336 }, { "epoch": 0.9336928243697081, "grad_norm": 0.6953125, "learning_rate": 1.1071104382365101e-05, "loss": 0.2827, "step": 12337 }, { "epoch": 0.9337685066931555, "grad_norm": 0.7890625, "learning_rate": 1.1069920146882283e-05, "loss": 0.3338, "step": 12338 }, { "epoch": 0.9338441890166028, "grad_norm": 0.71484375, "learning_rate": 1.1068735896220808e-05, "loss": 0.2819, "step": 12339 }, { "epoch": 0.9339198713400502, "grad_norm": 0.73828125, "learning_rate": 1.1067551630397486e-05, "loss": 0.2992, "step": 12340 }, { "epoch": 0.9339955536634975, "grad_norm": 0.7734375, "learning_rate": 1.1066367349429108e-05, "loss": 0.298, "step": 12341 }, { "epoch": 0.9340712359869447, "grad_norm": 0.66796875, "learning_rate": 1.1065183053332481e-05, "loss": 0.2551, "step": 12342 }, { "epoch": 0.9341469183103921, "grad_norm": 0.765625, "learning_rate": 1.1063998742124404e-05, "loss": 0.2994, "step": 12343 }, { "epoch": 0.9342226006338394, "grad_norm": 0.71875, "learning_rate": 1.1062814415821681e-05, "loss": 0.2909, "step": 12344 }, { "epoch": 0.9342982829572868, "grad_norm": 0.73046875, "learning_rate": 1.1061630074441109e-05, "loss": 0.2863, "step": 12345 }, { "epoch": 0.9343739652807341, "grad_norm": 0.65234375, "learning_rate": 1.1060445717999495e-05, "loss": 0.2455, "step": 12346 }, { "epoch": 0.9344496476041815, "grad_norm": 0.7578125, "learning_rate": 1.1059261346513636e-05, "loss": 0.3126, "step": 12347 }, { "epoch": 0.9345253299276288, "grad_norm": 0.74609375, "learning_rate": 1.1058076960000338e-05, "loss": 0.3152, "step": 12348 }, { "epoch": 0.9346010122510761, "grad_norm": 0.73828125, "learning_rate": 1.1056892558476407e-05, "loss": 0.3124, "step": 12349 }, { "epoch": 0.9346766945745234, "grad_norm": 0.75390625, "learning_rate": 1.1055708141958634e-05, "loss": 0.2994, "step": 12350 }, { "epoch": 0.9347523768979707, "grad_norm": 0.7421875, "learning_rate": 1.1054523710463833e-05, "loss": 0.3018, "step": 12351 }, { "epoch": 0.9348280592214181, "grad_norm": 0.796875, "learning_rate": 1.1053339264008804e-05, "loss": 0.2684, "step": 12352 }, { "epoch": 0.9349037415448654, "grad_norm": 0.73828125, "learning_rate": 1.1052154802610347e-05, "loss": 0.3151, "step": 12353 }, { "epoch": 0.9349794238683128, "grad_norm": 0.81640625, "learning_rate": 1.1050970326285273e-05, "loss": 0.3357, "step": 12354 }, { "epoch": 0.9350551061917601, "grad_norm": 0.73046875, "learning_rate": 1.104978583505038e-05, "loss": 0.304, "step": 12355 }, { "epoch": 0.9351307885152074, "grad_norm": 0.81640625, "learning_rate": 1.104860132892247e-05, "loss": 0.3681, "step": 12356 }, { "epoch": 0.9352064708386547, "grad_norm": 0.68359375, "learning_rate": 1.1047416807918354e-05, "loss": 0.2619, "step": 12357 }, { "epoch": 0.9352821531621021, "grad_norm": 0.875, "learning_rate": 1.1046232272054833e-05, "loss": 0.3459, "step": 12358 }, { "epoch": 0.9353578354855494, "grad_norm": 0.78125, "learning_rate": 1.104504772134871e-05, "loss": 0.3345, "step": 12359 }, { "epoch": 0.9354335178089968, "grad_norm": 0.74609375, "learning_rate": 1.1043863155816794e-05, "loss": 0.3164, "step": 12360 }, { "epoch": 0.9355092001324441, "grad_norm": 0.76171875, "learning_rate": 1.1042678575475887e-05, "loss": 0.3189, "step": 12361 }, { "epoch": 0.9355848824558914, "grad_norm": 0.8046875, "learning_rate": 1.1041493980342792e-05, "loss": 0.33, "step": 12362 }, { "epoch": 0.9356605647793387, "grad_norm": 0.6875, "learning_rate": 1.1040309370434323e-05, "loss": 0.2872, "step": 12363 }, { "epoch": 0.935736247102786, "grad_norm": 0.75, "learning_rate": 1.1039124745767278e-05, "loss": 0.3146, "step": 12364 }, { "epoch": 0.9358119294262334, "grad_norm": 0.7890625, "learning_rate": 1.1037940106358462e-05, "loss": 0.3343, "step": 12365 }, { "epoch": 0.9358876117496807, "grad_norm": 0.7734375, "learning_rate": 1.103675545222469e-05, "loss": 0.3076, "step": 12366 }, { "epoch": 0.9359632940731281, "grad_norm": 0.7578125, "learning_rate": 1.103557078338276e-05, "loss": 0.3119, "step": 12367 }, { "epoch": 0.9360389763965754, "grad_norm": 0.7265625, "learning_rate": 1.1034386099849481e-05, "loss": 0.276, "step": 12368 }, { "epoch": 0.9361146587200228, "grad_norm": 0.6953125, "learning_rate": 1.1033201401641662e-05, "loss": 0.2742, "step": 12369 }, { "epoch": 0.93619034104347, "grad_norm": 0.75, "learning_rate": 1.1032016688776106e-05, "loss": 0.3144, "step": 12370 }, { "epoch": 0.9362660233669173, "grad_norm": 0.75390625, "learning_rate": 1.1030831961269622e-05, "loss": 0.2998, "step": 12371 }, { "epoch": 0.9363417056903647, "grad_norm": 0.79296875, "learning_rate": 1.1029647219139022e-05, "loss": 0.3437, "step": 12372 }, { "epoch": 0.936417388013812, "grad_norm": 0.7578125, "learning_rate": 1.1028462462401106e-05, "loss": 0.3161, "step": 12373 }, { "epoch": 0.9364930703372594, "grad_norm": 0.82421875, "learning_rate": 1.1027277691072681e-05, "loss": 0.3614, "step": 12374 }, { "epoch": 0.9365687526607067, "grad_norm": 0.73046875, "learning_rate": 1.1026092905170564e-05, "loss": 0.2906, "step": 12375 }, { "epoch": 0.9366444349841541, "grad_norm": 0.69921875, "learning_rate": 1.1024908104711558e-05, "loss": 0.2766, "step": 12376 }, { "epoch": 0.9367201173076013, "grad_norm": 0.75, "learning_rate": 1.1023723289712472e-05, "loss": 0.3212, "step": 12377 }, { "epoch": 0.9367957996310486, "grad_norm": 1.03125, "learning_rate": 1.1022538460190112e-05, "loss": 0.3173, "step": 12378 }, { "epoch": 0.936871481954496, "grad_norm": 0.76953125, "learning_rate": 1.1021353616161292e-05, "loss": 0.3322, "step": 12379 }, { "epoch": 0.9369471642779433, "grad_norm": 0.75, "learning_rate": 1.1020168757642815e-05, "loss": 0.3209, "step": 12380 }, { "epoch": 0.9370228466013907, "grad_norm": 0.82421875, "learning_rate": 1.1018983884651497e-05, "loss": 0.3782, "step": 12381 }, { "epoch": 0.937098528924838, "grad_norm": 0.7265625, "learning_rate": 1.1017798997204143e-05, "loss": 0.2978, "step": 12382 }, { "epoch": 0.9371742112482854, "grad_norm": 0.78515625, "learning_rate": 1.1016614095317562e-05, "loss": 0.3308, "step": 12383 }, { "epoch": 0.9372498935717326, "grad_norm": 0.6796875, "learning_rate": 1.1015429179008567e-05, "loss": 0.2442, "step": 12384 }, { "epoch": 0.93732557589518, "grad_norm": 0.6953125, "learning_rate": 1.1014244248293965e-05, "loss": 0.2804, "step": 12385 }, { "epoch": 0.9374012582186273, "grad_norm": 0.72265625, "learning_rate": 1.1013059303190572e-05, "loss": 0.2929, "step": 12386 }, { "epoch": 0.9374769405420746, "grad_norm": 0.70703125, "learning_rate": 1.101187434371519e-05, "loss": 0.2789, "step": 12387 }, { "epoch": 0.937552622865522, "grad_norm": 0.97265625, "learning_rate": 1.1010689369884632e-05, "loss": 0.3916, "step": 12388 }, { "epoch": 0.9376283051889693, "grad_norm": 0.7734375, "learning_rate": 1.1009504381715717e-05, "loss": 0.3146, "step": 12389 }, { "epoch": 0.9377039875124167, "grad_norm": 0.7890625, "learning_rate": 1.1008319379225247e-05, "loss": 0.3477, "step": 12390 }, { "epoch": 0.9377796698358639, "grad_norm": 0.77734375, "learning_rate": 1.1007134362430036e-05, "loss": 0.3365, "step": 12391 }, { "epoch": 0.9378553521593113, "grad_norm": 0.70703125, "learning_rate": 1.1005949331346897e-05, "loss": 0.2553, "step": 12392 }, { "epoch": 0.9379310344827586, "grad_norm": 0.83984375, "learning_rate": 1.100476428599264e-05, "loss": 0.2847, "step": 12393 }, { "epoch": 0.938006716806206, "grad_norm": 0.75, "learning_rate": 1.1003579226384078e-05, "loss": 0.3139, "step": 12394 }, { "epoch": 0.9380823991296533, "grad_norm": 0.8125, "learning_rate": 1.1002394152538018e-05, "loss": 0.3564, "step": 12395 }, { "epoch": 0.9381580814531006, "grad_norm": 0.86328125, "learning_rate": 1.1001209064471283e-05, "loss": 0.3195, "step": 12396 }, { "epoch": 0.938233763776548, "grad_norm": 0.78125, "learning_rate": 1.1000023962200674e-05, "loss": 0.3475, "step": 12397 }, { "epoch": 0.9383094460999952, "grad_norm": 0.80859375, "learning_rate": 1.0998838845743012e-05, "loss": 0.3562, "step": 12398 }, { "epoch": 0.9383851284234426, "grad_norm": 0.734375, "learning_rate": 1.0997653715115107e-05, "loss": 0.3073, "step": 12399 }, { "epoch": 0.9384608107468899, "grad_norm": 0.7109375, "learning_rate": 1.0996468570333771e-05, "loss": 0.2856, "step": 12400 }, { "epoch": 0.9385364930703373, "grad_norm": 0.77734375, "learning_rate": 1.099528341141582e-05, "loss": 0.3618, "step": 12401 }, { "epoch": 0.9386121753937846, "grad_norm": 0.7421875, "learning_rate": 1.0994098238378066e-05, "loss": 0.3002, "step": 12402 }, { "epoch": 0.938687857717232, "grad_norm": 0.7265625, "learning_rate": 1.099291305123732e-05, "loss": 0.2461, "step": 12403 }, { "epoch": 0.9387635400406793, "grad_norm": 0.6953125, "learning_rate": 1.09917278500104e-05, "loss": 0.2727, "step": 12404 }, { "epoch": 0.9388392223641265, "grad_norm": 0.75, "learning_rate": 1.099054263471412e-05, "loss": 0.308, "step": 12405 }, { "epoch": 0.9389149046875739, "grad_norm": 0.7578125, "learning_rate": 1.0989357405365289e-05, "loss": 0.3099, "step": 12406 }, { "epoch": 0.9389905870110212, "grad_norm": 0.6953125, "learning_rate": 1.0988172161980727e-05, "loss": 0.2793, "step": 12407 }, { "epoch": 0.9390662693344686, "grad_norm": 0.76953125, "learning_rate": 1.0986986904577249e-05, "loss": 0.3285, "step": 12408 }, { "epoch": 0.9391419516579159, "grad_norm": 0.73046875, "learning_rate": 1.0985801633171665e-05, "loss": 0.2851, "step": 12409 }, { "epoch": 0.9392176339813633, "grad_norm": 0.76953125, "learning_rate": 1.0984616347780797e-05, "loss": 0.3398, "step": 12410 }, { "epoch": 0.9392933163048106, "grad_norm": 0.83203125, "learning_rate": 1.0983431048421454e-05, "loss": 0.3483, "step": 12411 }, { "epoch": 0.9393689986282578, "grad_norm": 0.74609375, "learning_rate": 1.0982245735110453e-05, "loss": 0.3062, "step": 12412 }, { "epoch": 0.9394446809517052, "grad_norm": 0.79296875, "learning_rate": 1.0981060407864612e-05, "loss": 0.3337, "step": 12413 }, { "epoch": 0.9395203632751525, "grad_norm": 0.7734375, "learning_rate": 1.0979875066700747e-05, "loss": 0.3225, "step": 12414 }, { "epoch": 0.9395960455985999, "grad_norm": 0.7421875, "learning_rate": 1.0978689711635671e-05, "loss": 0.3274, "step": 12415 }, { "epoch": 0.9396717279220472, "grad_norm": 0.78515625, "learning_rate": 1.09775043426862e-05, "loss": 0.3468, "step": 12416 }, { "epoch": 0.9397474102454946, "grad_norm": 1.1015625, "learning_rate": 1.0976318959869157e-05, "loss": 0.3722, "step": 12417 }, { "epoch": 0.9398230925689419, "grad_norm": 0.73046875, "learning_rate": 1.0975133563201347e-05, "loss": 0.3036, "step": 12418 }, { "epoch": 0.9398987748923892, "grad_norm": 0.76171875, "learning_rate": 1.0973948152699603e-05, "loss": 0.3042, "step": 12419 }, { "epoch": 0.9399744572158365, "grad_norm": 0.765625, "learning_rate": 1.0972762728380728e-05, "loss": 0.283, "step": 12420 }, { "epoch": 0.9400501395392838, "grad_norm": 0.71484375, "learning_rate": 1.0971577290261544e-05, "loss": 0.2908, "step": 12421 }, { "epoch": 0.9401258218627312, "grad_norm": 0.7109375, "learning_rate": 1.0970391838358869e-05, "loss": 0.2861, "step": 12422 }, { "epoch": 0.9402015041861785, "grad_norm": 0.76171875, "learning_rate": 1.0969206372689523e-05, "loss": 0.3169, "step": 12423 }, { "epoch": 0.9402771865096259, "grad_norm": 0.71484375, "learning_rate": 1.096802089327032e-05, "loss": 0.303, "step": 12424 }, { "epoch": 0.9403528688330732, "grad_norm": 0.7734375, "learning_rate": 1.0966835400118078e-05, "loss": 0.3252, "step": 12425 }, { "epoch": 0.9404285511565205, "grad_norm": 0.90234375, "learning_rate": 1.0965649893249619e-05, "loss": 0.302, "step": 12426 }, { "epoch": 0.9405042334799678, "grad_norm": 0.78125, "learning_rate": 1.0964464372681757e-05, "loss": 0.3543, "step": 12427 }, { "epoch": 0.9405799158034152, "grad_norm": 0.77734375, "learning_rate": 1.0963278838431318e-05, "loss": 0.3253, "step": 12428 }, { "epoch": 0.9406555981268625, "grad_norm": 0.75, "learning_rate": 1.096209329051511e-05, "loss": 0.3009, "step": 12429 }, { "epoch": 0.9407312804503098, "grad_norm": 0.78515625, "learning_rate": 1.0960907728949959e-05, "loss": 0.3418, "step": 12430 }, { "epoch": 0.9408069627737572, "grad_norm": 0.7890625, "learning_rate": 1.0959722153752683e-05, "loss": 0.3453, "step": 12431 }, { "epoch": 0.9408826450972045, "grad_norm": 0.83203125, "learning_rate": 1.0958536564940102e-05, "loss": 0.3479, "step": 12432 }, { "epoch": 0.9409583274206518, "grad_norm": 0.796875, "learning_rate": 1.0957350962529035e-05, "loss": 0.3489, "step": 12433 }, { "epoch": 0.9410340097440991, "grad_norm": 0.7421875, "learning_rate": 1.0956165346536301e-05, "loss": 0.3113, "step": 12434 }, { "epoch": 0.9411096920675465, "grad_norm": 0.79296875, "learning_rate": 1.095497971697872e-05, "loss": 0.3742, "step": 12435 }, { "epoch": 0.9411853743909938, "grad_norm": 0.7890625, "learning_rate": 1.0953794073873111e-05, "loss": 0.3121, "step": 12436 }, { "epoch": 0.9412610567144412, "grad_norm": 0.796875, "learning_rate": 1.0952608417236301e-05, "loss": 0.3339, "step": 12437 }, { "epoch": 0.9413367390378885, "grad_norm": 0.7421875, "learning_rate": 1.0951422747085103e-05, "loss": 0.2893, "step": 12438 }, { "epoch": 0.9414124213613357, "grad_norm": 0.72265625, "learning_rate": 1.0950237063436342e-05, "loss": 0.2838, "step": 12439 }, { "epoch": 0.9414881036847831, "grad_norm": 0.80859375, "learning_rate": 1.0949051366306838e-05, "loss": 0.3627, "step": 12440 }, { "epoch": 0.9415637860082304, "grad_norm": 0.71484375, "learning_rate": 1.0947865655713412e-05, "loss": 0.2881, "step": 12441 }, { "epoch": 0.9416394683316778, "grad_norm": 0.72265625, "learning_rate": 1.0946679931672883e-05, "loss": 0.3135, "step": 12442 }, { "epoch": 0.9417151506551251, "grad_norm": 0.74609375, "learning_rate": 1.0945494194202077e-05, "loss": 0.2812, "step": 12443 }, { "epoch": 0.9417908329785725, "grad_norm": 0.76171875, "learning_rate": 1.094430844331781e-05, "loss": 0.3209, "step": 12444 }, { "epoch": 0.9418665153020198, "grad_norm": 0.83203125, "learning_rate": 1.0943122679036909e-05, "loss": 0.3309, "step": 12445 }, { "epoch": 0.941942197625467, "grad_norm": 0.765625, "learning_rate": 1.0941936901376195e-05, "loss": 0.3058, "step": 12446 }, { "epoch": 0.9420178799489144, "grad_norm": 0.71875, "learning_rate": 1.094075111035249e-05, "loss": 0.3028, "step": 12447 }, { "epoch": 0.9420935622723617, "grad_norm": 0.7734375, "learning_rate": 1.0939565305982616e-05, "loss": 0.3351, "step": 12448 }, { "epoch": 0.9421692445958091, "grad_norm": 0.734375, "learning_rate": 1.0938379488283396e-05, "loss": 0.3238, "step": 12449 }, { "epoch": 0.9422449269192564, "grad_norm": 3.390625, "learning_rate": 1.093719365727165e-05, "loss": 0.3376, "step": 12450 }, { "epoch": 0.9423206092427038, "grad_norm": 0.765625, "learning_rate": 1.0936007812964204e-05, "loss": 0.3194, "step": 12451 }, { "epoch": 0.9423962915661511, "grad_norm": 0.7734375, "learning_rate": 1.0934821955377885e-05, "loss": 0.3005, "step": 12452 }, { "epoch": 0.9424719738895984, "grad_norm": 0.796875, "learning_rate": 1.0933636084529507e-05, "loss": 0.3521, "step": 12453 }, { "epoch": 0.9425476562130457, "grad_norm": 0.671875, "learning_rate": 1.0932450200435902e-05, "loss": 0.2742, "step": 12454 }, { "epoch": 0.942623338536493, "grad_norm": 0.7578125, "learning_rate": 1.093126430311389e-05, "loss": 0.3163, "step": 12455 }, { "epoch": 0.9426990208599404, "grad_norm": 0.77734375, "learning_rate": 1.0930078392580296e-05, "loss": 0.3276, "step": 12456 }, { "epoch": 0.9427747031833877, "grad_norm": 0.73046875, "learning_rate": 1.0928892468851946e-05, "loss": 0.3064, "step": 12457 }, { "epoch": 0.9428503855068351, "grad_norm": 0.703125, "learning_rate": 1.0927706531945661e-05, "loss": 0.2883, "step": 12458 }, { "epoch": 0.9429260678302824, "grad_norm": 0.79296875, "learning_rate": 1.0926520581878264e-05, "loss": 0.3238, "step": 12459 }, { "epoch": 0.9430017501537297, "grad_norm": 0.7109375, "learning_rate": 1.0925334618666585e-05, "loss": 0.2889, "step": 12460 }, { "epoch": 0.943077432477177, "grad_norm": 0.72265625, "learning_rate": 1.0924148642327448e-05, "loss": 0.3171, "step": 12461 }, { "epoch": 0.9431531148006244, "grad_norm": 0.69921875, "learning_rate": 1.0922962652877673e-05, "loss": 0.2822, "step": 12462 }, { "epoch": 0.9432287971240717, "grad_norm": 0.73828125, "learning_rate": 1.0921776650334093e-05, "loss": 0.2862, "step": 12463 }, { "epoch": 0.943304479447519, "grad_norm": 0.75, "learning_rate": 1.0920590634713527e-05, "loss": 0.2932, "step": 12464 }, { "epoch": 0.9433801617709664, "grad_norm": 0.72265625, "learning_rate": 1.0919404606032802e-05, "loss": 0.3045, "step": 12465 }, { "epoch": 0.9434558440944137, "grad_norm": 0.7421875, "learning_rate": 1.0918218564308748e-05, "loss": 0.3016, "step": 12466 }, { "epoch": 0.943531526417861, "grad_norm": 1.2890625, "learning_rate": 1.0917032509558185e-05, "loss": 0.3579, "step": 12467 }, { "epoch": 0.9436072087413083, "grad_norm": 0.7890625, "learning_rate": 1.0915846441797942e-05, "loss": 0.3383, "step": 12468 }, { "epoch": 0.9436828910647557, "grad_norm": 0.82421875, "learning_rate": 1.0914660361044849e-05, "loss": 0.3613, "step": 12469 }, { "epoch": 0.943758573388203, "grad_norm": 0.69140625, "learning_rate": 1.091347426731573e-05, "loss": 0.2818, "step": 12470 }, { "epoch": 0.9438342557116504, "grad_norm": 0.78125, "learning_rate": 1.0912288160627404e-05, "loss": 0.351, "step": 12471 }, { "epoch": 0.9439099380350977, "grad_norm": 0.734375, "learning_rate": 1.091110204099671e-05, "loss": 0.3208, "step": 12472 }, { "epoch": 0.943985620358545, "grad_norm": 0.68359375, "learning_rate": 1.0909915908440469e-05, "loss": 0.259, "step": 12473 }, { "epoch": 0.9440613026819923, "grad_norm": 0.7890625, "learning_rate": 1.0908729762975507e-05, "loss": 0.3412, "step": 12474 }, { "epoch": 0.9441369850054396, "grad_norm": 1.140625, "learning_rate": 1.0907543604618658e-05, "loss": 0.4005, "step": 12475 }, { "epoch": 0.944212667328887, "grad_norm": 0.71484375, "learning_rate": 1.0906357433386745e-05, "loss": 0.3161, "step": 12476 }, { "epoch": 0.9442883496523343, "grad_norm": 0.71875, "learning_rate": 1.0905171249296593e-05, "loss": 0.3042, "step": 12477 }, { "epoch": 0.9443640319757817, "grad_norm": 0.71875, "learning_rate": 1.0903985052365035e-05, "loss": 0.2895, "step": 12478 }, { "epoch": 0.944439714299229, "grad_norm": 0.7421875, "learning_rate": 1.0902798842608899e-05, "loss": 0.3202, "step": 12479 }, { "epoch": 0.9445153966226764, "grad_norm": 0.75390625, "learning_rate": 1.0901612620045013e-05, "loss": 0.3119, "step": 12480 }, { "epoch": 0.9445910789461236, "grad_norm": 0.7890625, "learning_rate": 1.0900426384690204e-05, "loss": 0.3356, "step": 12481 }, { "epoch": 0.9446667612695709, "grad_norm": 0.734375, "learning_rate": 1.08992401365613e-05, "loss": 0.2866, "step": 12482 }, { "epoch": 0.9447424435930183, "grad_norm": 0.78515625, "learning_rate": 1.0898053875675132e-05, "loss": 0.3379, "step": 12483 }, { "epoch": 0.9448181259164656, "grad_norm": 0.75, "learning_rate": 1.089686760204853e-05, "loss": 0.3136, "step": 12484 }, { "epoch": 0.944893808239913, "grad_norm": 0.796875, "learning_rate": 1.0895681315698323e-05, "loss": 0.3584, "step": 12485 }, { "epoch": 0.9449694905633603, "grad_norm": 0.734375, "learning_rate": 1.0894495016641336e-05, "loss": 0.2835, "step": 12486 }, { "epoch": 0.9450451728868077, "grad_norm": 0.7421875, "learning_rate": 1.0893308704894403e-05, "loss": 0.3012, "step": 12487 }, { "epoch": 0.9451208552102549, "grad_norm": 0.83984375, "learning_rate": 1.0892122380474354e-05, "loss": 0.3673, "step": 12488 }, { "epoch": 0.9451965375337023, "grad_norm": 0.7734375, "learning_rate": 1.0890936043398019e-05, "loss": 0.3154, "step": 12489 }, { "epoch": 0.9452722198571496, "grad_norm": 0.75390625, "learning_rate": 1.0889749693682226e-05, "loss": 0.3332, "step": 12490 }, { "epoch": 0.9453479021805969, "grad_norm": 0.78125, "learning_rate": 1.0888563331343807e-05, "loss": 0.3522, "step": 12491 }, { "epoch": 0.9454235845040443, "grad_norm": 0.76953125, "learning_rate": 1.0887376956399591e-05, "loss": 0.3442, "step": 12492 }, { "epoch": 0.9454992668274916, "grad_norm": 0.78515625, "learning_rate": 1.0886190568866411e-05, "loss": 0.354, "step": 12493 }, { "epoch": 0.945574949150939, "grad_norm": 0.76953125, "learning_rate": 1.0885004168761097e-05, "loss": 0.3194, "step": 12494 }, { "epoch": 0.9456506314743862, "grad_norm": 0.85546875, "learning_rate": 1.0883817756100481e-05, "loss": 0.3369, "step": 12495 }, { "epoch": 0.9457263137978336, "grad_norm": 0.73828125, "learning_rate": 1.0882631330901394e-05, "loss": 0.2659, "step": 12496 }, { "epoch": 0.9458019961212809, "grad_norm": 0.80078125, "learning_rate": 1.0881444893180665e-05, "loss": 0.3813, "step": 12497 }, { "epoch": 0.9458776784447283, "grad_norm": 0.74609375, "learning_rate": 1.088025844295513e-05, "loss": 0.3007, "step": 12498 }, { "epoch": 0.9459533607681756, "grad_norm": 0.765625, "learning_rate": 1.0879071980241617e-05, "loss": 0.3175, "step": 12499 }, { "epoch": 0.946029043091623, "grad_norm": 0.7421875, "learning_rate": 1.0877885505056958e-05, "loss": 0.3094, "step": 12500 }, { "epoch": 0.9461047254150703, "grad_norm": 0.7734375, "learning_rate": 1.0876699017417988e-05, "loss": 0.3089, "step": 12501 }, { "epoch": 0.9461804077385175, "grad_norm": 0.7265625, "learning_rate": 1.0875512517341538e-05, "loss": 0.2862, "step": 12502 }, { "epoch": 0.9462560900619649, "grad_norm": 0.79296875, "learning_rate": 1.087432600484444e-05, "loss": 0.3541, "step": 12503 }, { "epoch": 0.9463317723854122, "grad_norm": 0.7421875, "learning_rate": 1.087313947994353e-05, "loss": 0.3073, "step": 12504 }, { "epoch": 0.9464074547088596, "grad_norm": 0.69921875, "learning_rate": 1.0871952942655634e-05, "loss": 0.2767, "step": 12505 }, { "epoch": 0.9464831370323069, "grad_norm": 0.66015625, "learning_rate": 1.087076639299759e-05, "loss": 0.2618, "step": 12506 }, { "epoch": 0.9465588193557543, "grad_norm": 0.6953125, "learning_rate": 1.0869579830986233e-05, "loss": 0.2751, "step": 12507 }, { "epoch": 0.9466345016792016, "grad_norm": 0.75390625, "learning_rate": 1.0868393256638394e-05, "loss": 0.3111, "step": 12508 }, { "epoch": 0.9467101840026488, "grad_norm": 0.828125, "learning_rate": 1.0867206669970902e-05, "loss": 0.3296, "step": 12509 }, { "epoch": 0.9467858663260962, "grad_norm": 0.8125, "learning_rate": 1.0866020071000597e-05, "loss": 0.3756, "step": 12510 }, { "epoch": 0.9468615486495435, "grad_norm": 0.7265625, "learning_rate": 1.0864833459744313e-05, "loss": 0.3086, "step": 12511 }, { "epoch": 0.9469372309729909, "grad_norm": 0.828125, "learning_rate": 1.0863646836218882e-05, "loss": 0.3303, "step": 12512 }, { "epoch": 0.9470129132964382, "grad_norm": 0.75, "learning_rate": 1.0862460200441138e-05, "loss": 0.3026, "step": 12513 }, { "epoch": 0.9470885956198856, "grad_norm": 0.93359375, "learning_rate": 1.0861273552427915e-05, "loss": 0.3155, "step": 12514 }, { "epoch": 0.9471642779433329, "grad_norm": 0.8125, "learning_rate": 1.0860086892196049e-05, "loss": 0.3607, "step": 12515 }, { "epoch": 0.9472399602667801, "grad_norm": 0.68359375, "learning_rate": 1.0858900219762377e-05, "loss": 0.2804, "step": 12516 }, { "epoch": 0.9473156425902275, "grad_norm": 0.796875, "learning_rate": 1.085771353514373e-05, "loss": 0.3529, "step": 12517 }, { "epoch": 0.9473913249136748, "grad_norm": 0.875, "learning_rate": 1.0856526838356943e-05, "loss": 0.3056, "step": 12518 }, { "epoch": 0.9474670072371222, "grad_norm": 0.7265625, "learning_rate": 1.0855340129418854e-05, "loss": 0.2943, "step": 12519 }, { "epoch": 0.9475426895605695, "grad_norm": 0.7109375, "learning_rate": 1.0854153408346298e-05, "loss": 0.29, "step": 12520 }, { "epoch": 0.9476183718840169, "grad_norm": 0.76953125, "learning_rate": 1.0852966675156106e-05, "loss": 0.3354, "step": 12521 }, { "epoch": 0.9476940542074642, "grad_norm": 0.7109375, "learning_rate": 1.0851779929865125e-05, "loss": 0.2862, "step": 12522 }, { "epoch": 0.9477697365309115, "grad_norm": 0.8203125, "learning_rate": 1.085059317249018e-05, "loss": 0.3565, "step": 12523 }, { "epoch": 0.9478454188543588, "grad_norm": 0.98828125, "learning_rate": 1.084940640304811e-05, "loss": 0.3361, "step": 12524 }, { "epoch": 0.9479211011778061, "grad_norm": 0.97265625, "learning_rate": 1.0848219621555756e-05, "loss": 0.344, "step": 12525 }, { "epoch": 0.9479967835012535, "grad_norm": 0.74609375, "learning_rate": 1.0847032828029952e-05, "loss": 0.2929, "step": 12526 }, { "epoch": 0.9480724658247008, "grad_norm": 0.71875, "learning_rate": 1.084584602248753e-05, "loss": 0.276, "step": 12527 }, { "epoch": 0.9481481481481482, "grad_norm": 0.83984375, "learning_rate": 1.0844659204945331e-05, "loss": 0.3338, "step": 12528 }, { "epoch": 0.9482238304715955, "grad_norm": 0.7734375, "learning_rate": 1.0843472375420192e-05, "loss": 0.3216, "step": 12529 }, { "epoch": 0.9482995127950428, "grad_norm": 0.77734375, "learning_rate": 1.084228553392895e-05, "loss": 0.3448, "step": 12530 }, { "epoch": 0.9483751951184901, "grad_norm": 0.7734375, "learning_rate": 1.0841098680488445e-05, "loss": 0.3323, "step": 12531 }, { "epoch": 0.9484508774419375, "grad_norm": 0.74609375, "learning_rate": 1.0839911815115509e-05, "loss": 0.3162, "step": 12532 }, { "epoch": 0.9485265597653848, "grad_norm": 0.77734375, "learning_rate": 1.083872493782698e-05, "loss": 0.3223, "step": 12533 }, { "epoch": 0.9486022420888321, "grad_norm": 0.71484375, "learning_rate": 1.0837538048639705e-05, "loss": 0.2835, "step": 12534 }, { "epoch": 0.9486779244122795, "grad_norm": 0.72265625, "learning_rate": 1.083635114757051e-05, "loss": 0.2876, "step": 12535 }, { "epoch": 0.9487536067357268, "grad_norm": 0.76953125, "learning_rate": 1.0835164234636242e-05, "loss": 0.3168, "step": 12536 }, { "epoch": 0.9488292890591741, "grad_norm": 0.77734375, "learning_rate": 1.0833977309853736e-05, "loss": 0.3606, "step": 12537 }, { "epoch": 0.9489049713826214, "grad_norm": 0.78515625, "learning_rate": 1.083279037323983e-05, "loss": 0.355, "step": 12538 }, { "epoch": 0.9489806537060688, "grad_norm": 0.7734375, "learning_rate": 1.0831603424811361e-05, "loss": 0.3302, "step": 12539 }, { "epoch": 0.9490563360295161, "grad_norm": 0.72265625, "learning_rate": 1.0830416464585174e-05, "loss": 0.2979, "step": 12540 }, { "epoch": 0.9491320183529635, "grad_norm": 0.7890625, "learning_rate": 1.0829229492578104e-05, "loss": 0.3225, "step": 12541 }, { "epoch": 0.9492077006764108, "grad_norm": 0.7421875, "learning_rate": 1.0828042508806986e-05, "loss": 0.2932, "step": 12542 }, { "epoch": 0.9492833829998581, "grad_norm": 0.66796875, "learning_rate": 1.0826855513288667e-05, "loss": 0.2672, "step": 12543 }, { "epoch": 0.9493590653233054, "grad_norm": 0.78515625, "learning_rate": 1.0825668506039986e-05, "loss": 0.3475, "step": 12544 }, { "epoch": 0.9494347476467527, "grad_norm": 0.72265625, "learning_rate": 1.0824481487077778e-05, "loss": 0.3077, "step": 12545 }, { "epoch": 0.9495104299702001, "grad_norm": 1.9140625, "learning_rate": 1.0823294456418886e-05, "loss": 0.3992, "step": 12546 }, { "epoch": 0.9495861122936474, "grad_norm": 0.703125, "learning_rate": 1.0822107414080149e-05, "loss": 0.2888, "step": 12547 }, { "epoch": 0.9496617946170948, "grad_norm": 0.76171875, "learning_rate": 1.0820920360078407e-05, "loss": 0.3328, "step": 12548 }, { "epoch": 0.9497374769405421, "grad_norm": 0.74609375, "learning_rate": 1.0819733294430502e-05, "loss": 0.3035, "step": 12549 }, { "epoch": 0.9498131592639895, "grad_norm": 0.73046875, "learning_rate": 1.0818546217153272e-05, "loss": 0.2993, "step": 12550 }, { "epoch": 0.9498888415874367, "grad_norm": 0.82421875, "learning_rate": 1.081735912826356e-05, "loss": 0.3828, "step": 12551 }, { "epoch": 0.949964523910884, "grad_norm": 0.7890625, "learning_rate": 1.0816172027778208e-05, "loss": 0.3184, "step": 12552 }, { "epoch": 0.9500402062343314, "grad_norm": 0.71484375, "learning_rate": 1.0814984915714053e-05, "loss": 0.2934, "step": 12553 }, { "epoch": 0.9501158885577787, "grad_norm": 0.7109375, "learning_rate": 1.081379779208794e-05, "loss": 0.2816, "step": 12554 }, { "epoch": 0.9501915708812261, "grad_norm": 0.734375, "learning_rate": 1.0812610656916708e-05, "loss": 0.3045, "step": 12555 }, { "epoch": 0.9502672532046734, "grad_norm": 0.79296875, "learning_rate": 1.0811423510217198e-05, "loss": 0.3191, "step": 12556 }, { "epoch": 0.9503429355281207, "grad_norm": 0.7109375, "learning_rate": 1.0810236352006254e-05, "loss": 0.2762, "step": 12557 }, { "epoch": 0.950418617851568, "grad_norm": 1.1796875, "learning_rate": 1.0809049182300716e-05, "loss": 0.4041, "step": 12558 }, { "epoch": 0.9504943001750153, "grad_norm": 0.76953125, "learning_rate": 1.0807862001117431e-05, "loss": 0.3224, "step": 12559 }, { "epoch": 0.9505699824984627, "grad_norm": 0.76953125, "learning_rate": 1.0806674808473234e-05, "loss": 0.3007, "step": 12560 }, { "epoch": 0.95064566482191, "grad_norm": 0.73828125, "learning_rate": 1.080548760438497e-05, "loss": 0.2946, "step": 12561 }, { "epoch": 0.9507213471453574, "grad_norm": 0.76953125, "learning_rate": 1.0804300388869481e-05, "loss": 0.3197, "step": 12562 }, { "epoch": 0.9507970294688047, "grad_norm": 0.69921875, "learning_rate": 1.0803113161943613e-05, "loss": 0.2622, "step": 12563 }, { "epoch": 0.950872711792252, "grad_norm": 0.7421875, "learning_rate": 1.080192592362421e-05, "loss": 0.2934, "step": 12564 }, { "epoch": 0.9509483941156993, "grad_norm": 0.734375, "learning_rate": 1.0800738673928106e-05, "loss": 0.3054, "step": 12565 }, { "epoch": 0.9510240764391467, "grad_norm": 0.70703125, "learning_rate": 1.0799551412872151e-05, "loss": 0.2784, "step": 12566 }, { "epoch": 0.951099758762594, "grad_norm": 0.703125, "learning_rate": 1.0798364140473187e-05, "loss": 0.2455, "step": 12567 }, { "epoch": 0.9511754410860414, "grad_norm": 0.765625, "learning_rate": 1.079717685674806e-05, "loss": 0.3301, "step": 12568 }, { "epoch": 0.9512511234094887, "grad_norm": 0.7578125, "learning_rate": 1.0795989561713609e-05, "loss": 0.3465, "step": 12569 }, { "epoch": 0.951326805732936, "grad_norm": 0.75390625, "learning_rate": 1.0794802255386682e-05, "loss": 0.3377, "step": 12570 }, { "epoch": 0.9514024880563833, "grad_norm": 0.8359375, "learning_rate": 1.0793614937784118e-05, "loss": 0.3629, "step": 12571 }, { "epoch": 0.9514781703798306, "grad_norm": 0.765625, "learning_rate": 1.0792427608922766e-05, "loss": 0.3433, "step": 12572 }, { "epoch": 0.951553852703278, "grad_norm": 0.75390625, "learning_rate": 1.0791240268819472e-05, "loss": 0.3302, "step": 12573 }, { "epoch": 0.9516295350267253, "grad_norm": 0.80859375, "learning_rate": 1.0790052917491071e-05, "loss": 0.3691, "step": 12574 }, { "epoch": 0.9517052173501727, "grad_norm": 0.7578125, "learning_rate": 1.0788865554954417e-05, "loss": 0.3304, "step": 12575 }, { "epoch": 0.95178089967362, "grad_norm": 0.76171875, "learning_rate": 1.0787678181226351e-05, "loss": 0.3133, "step": 12576 }, { "epoch": 0.9518565819970674, "grad_norm": 0.79296875, "learning_rate": 1.0786490796323716e-05, "loss": 0.3457, "step": 12577 }, { "epoch": 0.9519322643205146, "grad_norm": 0.73828125, "learning_rate": 1.0785303400263362e-05, "loss": 0.3123, "step": 12578 }, { "epoch": 0.9520079466439619, "grad_norm": 0.71484375, "learning_rate": 1.0784115993062132e-05, "loss": 0.2734, "step": 12579 }, { "epoch": 0.9520836289674093, "grad_norm": 0.78125, "learning_rate": 1.0782928574736869e-05, "loss": 0.3533, "step": 12580 }, { "epoch": 0.9521593112908566, "grad_norm": 0.796875, "learning_rate": 1.078174114530442e-05, "loss": 0.3008, "step": 12581 }, { "epoch": 0.952234993614304, "grad_norm": 1.1796875, "learning_rate": 1.0780553704781637e-05, "loss": 0.32, "step": 12582 }, { "epoch": 0.9523106759377513, "grad_norm": 0.71875, "learning_rate": 1.0779366253185355e-05, "loss": 0.2903, "step": 12583 }, { "epoch": 0.9523863582611987, "grad_norm": 0.79296875, "learning_rate": 1.0778178790532425e-05, "loss": 0.3454, "step": 12584 }, { "epoch": 0.9524620405846459, "grad_norm": 0.734375, "learning_rate": 1.0776991316839695e-05, "loss": 0.3107, "step": 12585 }, { "epoch": 0.9525377229080932, "grad_norm": 0.7890625, "learning_rate": 1.0775803832124008e-05, "loss": 0.3433, "step": 12586 }, { "epoch": 0.9526134052315406, "grad_norm": 0.80859375, "learning_rate": 1.0774616336402217e-05, "loss": 0.3684, "step": 12587 }, { "epoch": 0.9526890875549879, "grad_norm": 0.83984375, "learning_rate": 1.0773428829691161e-05, "loss": 0.3727, "step": 12588 }, { "epoch": 0.9527647698784353, "grad_norm": 0.73046875, "learning_rate": 1.0772241312007687e-05, "loss": 0.2842, "step": 12589 }, { "epoch": 0.9528404522018826, "grad_norm": 0.7109375, "learning_rate": 1.0771053783368647e-05, "loss": 0.2988, "step": 12590 }, { "epoch": 0.95291613452533, "grad_norm": 0.7421875, "learning_rate": 1.0769866243790887e-05, "loss": 0.3005, "step": 12591 }, { "epoch": 0.9529918168487772, "grad_norm": 0.7109375, "learning_rate": 1.0768678693291255e-05, "loss": 0.2565, "step": 12592 }, { "epoch": 0.9530674991722246, "grad_norm": 0.79296875, "learning_rate": 1.0767491131886594e-05, "loss": 0.3192, "step": 12593 }, { "epoch": 0.9531431814956719, "grad_norm": 0.78515625, "learning_rate": 1.0766303559593754e-05, "loss": 0.3148, "step": 12594 }, { "epoch": 0.9532188638191192, "grad_norm": 0.7734375, "learning_rate": 1.076511597642958e-05, "loss": 0.3317, "step": 12595 }, { "epoch": 0.9532945461425666, "grad_norm": 0.7734375, "learning_rate": 1.076392838241093e-05, "loss": 0.3261, "step": 12596 }, { "epoch": 0.9533702284660139, "grad_norm": 0.84765625, "learning_rate": 1.0762740777554642e-05, "loss": 0.3539, "step": 12597 }, { "epoch": 0.9534459107894613, "grad_norm": 0.71875, "learning_rate": 1.0761553161877565e-05, "loss": 0.2746, "step": 12598 }, { "epoch": 0.9535215931129085, "grad_norm": 0.74609375, "learning_rate": 1.0760365535396552e-05, "loss": 0.3204, "step": 12599 }, { "epoch": 0.9535972754363559, "grad_norm": 0.91015625, "learning_rate": 1.0759177898128449e-05, "loss": 0.3205, "step": 12600 }, { "epoch": 0.9536729577598032, "grad_norm": 0.74609375, "learning_rate": 1.0757990250090102e-05, "loss": 0.3045, "step": 12601 }, { "epoch": 0.9537486400832506, "grad_norm": 0.76171875, "learning_rate": 1.0756802591298365e-05, "loss": 0.3136, "step": 12602 }, { "epoch": 0.9538243224066979, "grad_norm": 0.734375, "learning_rate": 1.0755614921770084e-05, "loss": 0.3228, "step": 12603 }, { "epoch": 0.9539000047301452, "grad_norm": 0.6953125, "learning_rate": 1.0754427241522108e-05, "loss": 0.2576, "step": 12604 }, { "epoch": 0.9539756870535926, "grad_norm": 0.73828125, "learning_rate": 1.0753239550571286e-05, "loss": 0.2681, "step": 12605 }, { "epoch": 0.9540513693770398, "grad_norm": 0.765625, "learning_rate": 1.0752051848934471e-05, "loss": 0.3145, "step": 12606 }, { "epoch": 0.9541270517004872, "grad_norm": 0.703125, "learning_rate": 1.0750864136628509e-05, "loss": 0.2955, "step": 12607 }, { "epoch": 0.9542027340239345, "grad_norm": 0.87890625, "learning_rate": 1.0749676413670252e-05, "loss": 0.316, "step": 12608 }, { "epoch": 0.9542784163473819, "grad_norm": 0.765625, "learning_rate": 1.0748488680076546e-05, "loss": 0.2904, "step": 12609 }, { "epoch": 0.9543540986708292, "grad_norm": 0.73828125, "learning_rate": 1.0747300935864245e-05, "loss": 0.3218, "step": 12610 }, { "epoch": 0.9544297809942766, "grad_norm": 0.765625, "learning_rate": 1.0746113181050197e-05, "loss": 0.3043, "step": 12611 }, { "epoch": 0.9545054633177239, "grad_norm": 0.78515625, "learning_rate": 1.0744925415651252e-05, "loss": 0.3444, "step": 12612 }, { "epoch": 0.9545811456411711, "grad_norm": 0.77734375, "learning_rate": 1.0743737639684264e-05, "loss": 0.3233, "step": 12613 }, { "epoch": 0.9546568279646185, "grad_norm": 0.7734375, "learning_rate": 1.074254985316608e-05, "loss": 0.3145, "step": 12614 }, { "epoch": 0.9547325102880658, "grad_norm": 0.74609375, "learning_rate": 1.0741362056113554e-05, "loss": 0.2719, "step": 12615 }, { "epoch": 0.9548081926115132, "grad_norm": 0.74609375, "learning_rate": 1.0740174248543534e-05, "loss": 0.3026, "step": 12616 }, { "epoch": 0.9548838749349605, "grad_norm": 0.8515625, "learning_rate": 1.073898643047287e-05, "loss": 0.3291, "step": 12617 }, { "epoch": 0.9549595572584079, "grad_norm": 0.7578125, "learning_rate": 1.0737798601918416e-05, "loss": 0.3012, "step": 12618 }, { "epoch": 0.9550352395818552, "grad_norm": 0.7421875, "learning_rate": 1.0736610762897025e-05, "loss": 0.2977, "step": 12619 }, { "epoch": 0.9551109219053024, "grad_norm": 0.73828125, "learning_rate": 1.0735422913425549e-05, "loss": 0.2733, "step": 12620 }, { "epoch": 0.9551866042287498, "grad_norm": 0.79296875, "learning_rate": 1.073423505352083e-05, "loss": 0.3312, "step": 12621 }, { "epoch": 0.9552622865521971, "grad_norm": 0.75, "learning_rate": 1.0733047183199729e-05, "loss": 0.2985, "step": 12622 }, { "epoch": 0.9553379688756445, "grad_norm": 0.77734375, "learning_rate": 1.0731859302479097e-05, "loss": 0.3455, "step": 12623 }, { "epoch": 0.9554136511990918, "grad_norm": 0.71875, "learning_rate": 1.0730671411375786e-05, "loss": 0.2735, "step": 12624 }, { "epoch": 0.9554893335225392, "grad_norm": 1.03125, "learning_rate": 1.0729483509906644e-05, "loss": 0.3626, "step": 12625 }, { "epoch": 0.9555650158459865, "grad_norm": 0.74609375, "learning_rate": 1.0728295598088529e-05, "loss": 0.3157, "step": 12626 }, { "epoch": 0.9556406981694338, "grad_norm": 0.81640625, "learning_rate": 1.0727107675938289e-05, "loss": 0.3422, "step": 12627 }, { "epoch": 0.9557163804928811, "grad_norm": 0.8125, "learning_rate": 1.0725919743472778e-05, "loss": 0.3626, "step": 12628 }, { "epoch": 0.9557920628163284, "grad_norm": 0.70703125, "learning_rate": 1.0724731800708857e-05, "loss": 0.2946, "step": 12629 }, { "epoch": 0.9558677451397758, "grad_norm": 0.8828125, "learning_rate": 1.0723543847663364e-05, "loss": 0.3855, "step": 12630 }, { "epoch": 0.9559434274632231, "grad_norm": 1.078125, "learning_rate": 1.0722355884353162e-05, "loss": 0.2852, "step": 12631 }, { "epoch": 0.9560191097866705, "grad_norm": 0.8046875, "learning_rate": 1.0721167910795102e-05, "loss": 0.2714, "step": 12632 }, { "epoch": 0.9560947921101178, "grad_norm": 0.8046875, "learning_rate": 1.0719979927006039e-05, "loss": 0.3239, "step": 12633 }, { "epoch": 0.9561704744335651, "grad_norm": 0.8046875, "learning_rate": 1.0718791933002826e-05, "loss": 0.3752, "step": 12634 }, { "epoch": 0.9562461567570124, "grad_norm": 0.71875, "learning_rate": 1.0717603928802313e-05, "loss": 0.2978, "step": 12635 }, { "epoch": 0.9563218390804598, "grad_norm": 0.74609375, "learning_rate": 1.071641591442136e-05, "loss": 0.3153, "step": 12636 }, { "epoch": 0.9563975214039071, "grad_norm": 0.73828125, "learning_rate": 1.0715227889876813e-05, "loss": 0.3161, "step": 12637 }, { "epoch": 0.9564732037273544, "grad_norm": 0.76171875, "learning_rate": 1.0714039855185539e-05, "loss": 0.3399, "step": 12638 }, { "epoch": 0.9565488860508018, "grad_norm": 0.6875, "learning_rate": 1.0712851810364376e-05, "loss": 0.2534, "step": 12639 }, { "epoch": 0.9566245683742491, "grad_norm": 0.77734375, "learning_rate": 1.0711663755430191e-05, "loss": 0.3316, "step": 12640 }, { "epoch": 0.9567002506976964, "grad_norm": 0.7421875, "learning_rate": 1.0710475690399833e-05, "loss": 0.3185, "step": 12641 }, { "epoch": 0.9567759330211437, "grad_norm": 0.66796875, "learning_rate": 1.0709287615290158e-05, "loss": 0.2641, "step": 12642 }, { "epoch": 0.9568516153445911, "grad_norm": 0.7421875, "learning_rate": 1.0708099530118024e-05, "loss": 0.3358, "step": 12643 }, { "epoch": 0.9569272976680384, "grad_norm": 0.6953125, "learning_rate": 1.070691143490028e-05, "loss": 0.275, "step": 12644 }, { "epoch": 0.9570029799914858, "grad_norm": 0.71484375, "learning_rate": 1.0705723329653782e-05, "loss": 0.3006, "step": 12645 }, { "epoch": 0.9570786623149331, "grad_norm": 0.7890625, "learning_rate": 1.0704535214395392e-05, "loss": 0.3627, "step": 12646 }, { "epoch": 0.9571543446383804, "grad_norm": 0.7421875, "learning_rate": 1.070334708914196e-05, "loss": 0.2954, "step": 12647 }, { "epoch": 0.9572300269618277, "grad_norm": 0.7578125, "learning_rate": 1.0702158953910342e-05, "loss": 0.342, "step": 12648 }, { "epoch": 0.957305709285275, "grad_norm": 0.72265625, "learning_rate": 1.0700970808717394e-05, "loss": 0.272, "step": 12649 }, { "epoch": 0.9573813916087224, "grad_norm": 0.69921875, "learning_rate": 1.0699782653579973e-05, "loss": 0.2678, "step": 12650 }, { "epoch": 0.9574570739321697, "grad_norm": 0.6875, "learning_rate": 1.069859448851493e-05, "loss": 0.2857, "step": 12651 }, { "epoch": 0.9575327562556171, "grad_norm": 0.7578125, "learning_rate": 1.0697406313539134e-05, "loss": 0.2967, "step": 12652 }, { "epoch": 0.9576084385790644, "grad_norm": 0.70703125, "learning_rate": 1.0696218128669424e-05, "loss": 0.272, "step": 12653 }, { "epoch": 0.9576841209025118, "grad_norm": 0.734375, "learning_rate": 1.0695029933922669e-05, "loss": 0.2963, "step": 12654 }, { "epoch": 0.957759803225959, "grad_norm": 0.78125, "learning_rate": 1.0693841729315721e-05, "loss": 0.3175, "step": 12655 }, { "epoch": 0.9578354855494063, "grad_norm": 0.76171875, "learning_rate": 1.0692653514865438e-05, "loss": 0.3158, "step": 12656 }, { "epoch": 0.9579111678728537, "grad_norm": 0.80078125, "learning_rate": 1.0691465290588678e-05, "loss": 0.3538, "step": 12657 }, { "epoch": 0.957986850196301, "grad_norm": 0.7890625, "learning_rate": 1.0690277056502294e-05, "loss": 0.3312, "step": 12658 }, { "epoch": 0.9580625325197484, "grad_norm": 0.8203125, "learning_rate": 1.0689088812623145e-05, "loss": 0.3889, "step": 12659 }, { "epoch": 0.9581382148431957, "grad_norm": 0.7578125, "learning_rate": 1.068790055896809e-05, "loss": 0.321, "step": 12660 }, { "epoch": 0.9582138971666431, "grad_norm": 0.79296875, "learning_rate": 1.0686712295553983e-05, "loss": 0.3332, "step": 12661 }, { "epoch": 0.9582895794900903, "grad_norm": 0.8203125, "learning_rate": 1.0685524022397686e-05, "loss": 0.3437, "step": 12662 }, { "epoch": 0.9583652618135376, "grad_norm": 0.80859375, "learning_rate": 1.0684335739516054e-05, "loss": 0.3232, "step": 12663 }, { "epoch": 0.958440944136985, "grad_norm": 0.71484375, "learning_rate": 1.0683147446925943e-05, "loss": 0.2979, "step": 12664 }, { "epoch": 0.9585166264604323, "grad_norm": 0.75390625, "learning_rate": 1.0681959144644216e-05, "loss": 0.3243, "step": 12665 }, { "epoch": 0.9585923087838797, "grad_norm": 0.71484375, "learning_rate": 1.0680770832687728e-05, "loss": 0.2758, "step": 12666 }, { "epoch": 0.958667991107327, "grad_norm": 0.7890625, "learning_rate": 1.0679582511073337e-05, "loss": 0.345, "step": 12667 }, { "epoch": 0.9587436734307744, "grad_norm": 0.74609375, "learning_rate": 1.06783941798179e-05, "loss": 0.3129, "step": 12668 }, { "epoch": 0.9588193557542216, "grad_norm": 0.75, "learning_rate": 1.0677205838938279e-05, "loss": 0.2952, "step": 12669 }, { "epoch": 0.958895038077669, "grad_norm": 0.75390625, "learning_rate": 1.067601748845133e-05, "loss": 0.325, "step": 12670 }, { "epoch": 0.9589707204011163, "grad_norm": 0.7734375, "learning_rate": 1.0674829128373914e-05, "loss": 0.3233, "step": 12671 }, { "epoch": 0.9590464027245637, "grad_norm": 0.7421875, "learning_rate": 1.0673640758722888e-05, "loss": 0.2998, "step": 12672 }, { "epoch": 0.959122085048011, "grad_norm": 0.80078125, "learning_rate": 1.0672452379515113e-05, "loss": 0.3625, "step": 12673 }, { "epoch": 0.9591977673714583, "grad_norm": 0.86328125, "learning_rate": 1.0671263990767445e-05, "loss": 0.3594, "step": 12674 }, { "epoch": 0.9592734496949057, "grad_norm": 0.76953125, "learning_rate": 1.0670075592496744e-05, "loss": 0.337, "step": 12675 }, { "epoch": 0.9593491320183529, "grad_norm": 0.7890625, "learning_rate": 1.0668887184719878e-05, "loss": 0.3235, "step": 12676 }, { "epoch": 0.9594248143418003, "grad_norm": 0.67578125, "learning_rate": 1.0667698767453694e-05, "loss": 0.2386, "step": 12677 }, { "epoch": 0.9595004966652476, "grad_norm": 0.75, "learning_rate": 1.0666510340715057e-05, "loss": 0.2932, "step": 12678 }, { "epoch": 0.959576178988695, "grad_norm": 0.62109375, "learning_rate": 1.066532190452083e-05, "loss": 0.2183, "step": 12679 }, { "epoch": 0.9596518613121423, "grad_norm": 0.72265625, "learning_rate": 1.0664133458887867e-05, "loss": 0.3059, "step": 12680 }, { "epoch": 0.9597275436355897, "grad_norm": 0.69921875, "learning_rate": 1.0662945003833031e-05, "loss": 0.2664, "step": 12681 }, { "epoch": 0.9598032259590369, "grad_norm": 0.7890625, "learning_rate": 1.0661756539373184e-05, "loss": 0.3378, "step": 12682 }, { "epoch": 0.9598789082824842, "grad_norm": 0.78125, "learning_rate": 1.0660568065525185e-05, "loss": 0.3315, "step": 12683 }, { "epoch": 0.9599545906059316, "grad_norm": 0.7734375, "learning_rate": 1.0659379582305894e-05, "loss": 0.3042, "step": 12684 }, { "epoch": 0.9600302729293789, "grad_norm": 0.7578125, "learning_rate": 1.0658191089732174e-05, "loss": 0.306, "step": 12685 }, { "epoch": 0.9601059552528263, "grad_norm": 0.69921875, "learning_rate": 1.0657002587820879e-05, "loss": 0.2707, "step": 12686 }, { "epoch": 0.9601816375762736, "grad_norm": 0.77734375, "learning_rate": 1.0655814076588879e-05, "loss": 0.3185, "step": 12687 }, { "epoch": 0.960257319899721, "grad_norm": 0.74609375, "learning_rate": 1.0654625556053031e-05, "loss": 0.3042, "step": 12688 }, { "epoch": 0.9603330022231682, "grad_norm": 0.76171875, "learning_rate": 1.0653437026230193e-05, "loss": 0.322, "step": 12689 }, { "epoch": 0.9604086845466155, "grad_norm": 0.703125, "learning_rate": 1.0652248487137233e-05, "loss": 0.2588, "step": 12690 }, { "epoch": 0.9604843668700629, "grad_norm": 0.76171875, "learning_rate": 1.0651059938791008e-05, "loss": 0.3177, "step": 12691 }, { "epoch": 0.9605600491935102, "grad_norm": 0.7421875, "learning_rate": 1.0649871381208379e-05, "loss": 0.3125, "step": 12692 }, { "epoch": 0.9606357315169576, "grad_norm": 0.76171875, "learning_rate": 1.064868281440621e-05, "loss": 0.294, "step": 12693 }, { "epoch": 0.9607114138404049, "grad_norm": 1.0234375, "learning_rate": 1.0647494238401363e-05, "loss": 0.2925, "step": 12694 }, { "epoch": 0.9607870961638523, "grad_norm": 0.7578125, "learning_rate": 1.0646305653210698e-05, "loss": 0.3127, "step": 12695 }, { "epoch": 0.9608627784872995, "grad_norm": 0.79296875, "learning_rate": 1.0645117058851077e-05, "loss": 0.3322, "step": 12696 }, { "epoch": 0.9609384608107469, "grad_norm": 0.80859375, "learning_rate": 1.0643928455339367e-05, "loss": 0.3366, "step": 12697 }, { "epoch": 0.9610141431341942, "grad_norm": 0.76953125, "learning_rate": 1.0642739842692422e-05, "loss": 0.311, "step": 12698 }, { "epoch": 0.9610898254576415, "grad_norm": 0.72265625, "learning_rate": 1.0641551220927115e-05, "loss": 0.2968, "step": 12699 }, { "epoch": 0.9611655077810889, "grad_norm": 0.8046875, "learning_rate": 1.06403625900603e-05, "loss": 0.3466, "step": 12700 }, { "epoch": 0.9612411901045362, "grad_norm": 0.703125, "learning_rate": 1.0639173950108841e-05, "loss": 0.2871, "step": 12701 }, { "epoch": 0.9613168724279836, "grad_norm": 0.73046875, "learning_rate": 1.0637985301089605e-05, "loss": 0.2969, "step": 12702 }, { "epoch": 0.9613925547514308, "grad_norm": 0.72265625, "learning_rate": 1.0636796643019453e-05, "loss": 0.2963, "step": 12703 }, { "epoch": 0.9614682370748782, "grad_norm": 0.7265625, "learning_rate": 1.0635607975915247e-05, "loss": 0.276, "step": 12704 }, { "epoch": 0.9615439193983255, "grad_norm": 0.6796875, "learning_rate": 1.0634419299793852e-05, "loss": 0.2638, "step": 12705 }, { "epoch": 0.9616196017217729, "grad_norm": 0.80859375, "learning_rate": 1.063323061467213e-05, "loss": 0.3514, "step": 12706 }, { "epoch": 0.9616952840452202, "grad_norm": 0.7421875, "learning_rate": 1.0632041920566942e-05, "loss": 0.2618, "step": 12707 }, { "epoch": 0.9617709663686675, "grad_norm": 0.69140625, "learning_rate": 1.063085321749516e-05, "loss": 0.2726, "step": 12708 }, { "epoch": 0.9618466486921149, "grad_norm": 0.79296875, "learning_rate": 1.0629664505473637e-05, "loss": 0.3494, "step": 12709 }, { "epoch": 0.9619223310155621, "grad_norm": 0.7109375, "learning_rate": 1.0628475784519248e-05, "loss": 0.2992, "step": 12710 }, { "epoch": 0.9619980133390095, "grad_norm": 0.75390625, "learning_rate": 1.0627287054648846e-05, "loss": 0.3271, "step": 12711 }, { "epoch": 0.9620736956624568, "grad_norm": 0.75, "learning_rate": 1.0626098315879305e-05, "loss": 0.3117, "step": 12712 }, { "epoch": 0.9621493779859042, "grad_norm": 0.81640625, "learning_rate": 1.0624909568227486e-05, "loss": 0.3207, "step": 12713 }, { "epoch": 0.9622250603093515, "grad_norm": 0.79296875, "learning_rate": 1.062372081171025e-05, "loss": 0.3449, "step": 12714 }, { "epoch": 0.9623007426327989, "grad_norm": 0.7578125, "learning_rate": 1.0622532046344462e-05, "loss": 0.3021, "step": 12715 }, { "epoch": 0.9623764249562462, "grad_norm": 0.84765625, "learning_rate": 1.062134327214699e-05, "loss": 0.3555, "step": 12716 }, { "epoch": 0.9624521072796934, "grad_norm": 0.7265625, "learning_rate": 1.0620154489134698e-05, "loss": 0.2706, "step": 12717 }, { "epoch": 0.9625277896031408, "grad_norm": 0.75390625, "learning_rate": 1.061896569732445e-05, "loss": 0.3268, "step": 12718 }, { "epoch": 0.9626034719265881, "grad_norm": 0.77734375, "learning_rate": 1.0617776896733115e-05, "loss": 0.3194, "step": 12719 }, { "epoch": 0.9626791542500355, "grad_norm": 0.78515625, "learning_rate": 1.061658808737755e-05, "loss": 0.3373, "step": 12720 }, { "epoch": 0.9627548365734828, "grad_norm": 0.80859375, "learning_rate": 1.0615399269274627e-05, "loss": 0.3654, "step": 12721 }, { "epoch": 0.9628305188969302, "grad_norm": 3.625, "learning_rate": 1.0614210442441209e-05, "loss": 0.2968, "step": 12722 }, { "epoch": 0.9629062012203775, "grad_norm": 0.78125, "learning_rate": 1.061302160689416e-05, "loss": 0.3203, "step": 12723 }, { "epoch": 0.9629818835438247, "grad_norm": 0.765625, "learning_rate": 1.0611832762650347e-05, "loss": 0.2915, "step": 12724 }, { "epoch": 0.9630575658672721, "grad_norm": 0.72265625, "learning_rate": 1.0610643909726639e-05, "loss": 0.2905, "step": 12725 }, { "epoch": 0.9631332481907194, "grad_norm": 0.77734375, "learning_rate": 1.0609455048139899e-05, "loss": 0.2946, "step": 12726 }, { "epoch": 0.9632089305141668, "grad_norm": 0.734375, "learning_rate": 1.0608266177906992e-05, "loss": 0.2892, "step": 12727 }, { "epoch": 0.9632846128376141, "grad_norm": 0.73828125, "learning_rate": 1.0607077299044788e-05, "loss": 0.3018, "step": 12728 }, { "epoch": 0.9633602951610615, "grad_norm": 0.74609375, "learning_rate": 1.0605888411570148e-05, "loss": 0.3212, "step": 12729 }, { "epoch": 0.9634359774845088, "grad_norm": 0.703125, "learning_rate": 1.0604699515499941e-05, "loss": 0.2967, "step": 12730 }, { "epoch": 0.9635116598079561, "grad_norm": 0.6796875, "learning_rate": 1.0603510610851036e-05, "loss": 0.2677, "step": 12731 }, { "epoch": 0.9635873421314034, "grad_norm": 0.7890625, "learning_rate": 1.0602321697640297e-05, "loss": 0.3203, "step": 12732 }, { "epoch": 0.9636630244548507, "grad_norm": 0.7890625, "learning_rate": 1.0601132775884589e-05, "loss": 0.3123, "step": 12733 }, { "epoch": 0.9637387067782981, "grad_norm": 0.73828125, "learning_rate": 1.0599943845600781e-05, "loss": 0.2687, "step": 12734 }, { "epoch": 0.9638143891017454, "grad_norm": 0.76953125, "learning_rate": 1.0598754906805742e-05, "loss": 0.3259, "step": 12735 }, { "epoch": 0.9638900714251928, "grad_norm": 0.703125, "learning_rate": 1.0597565959516337e-05, "loss": 0.2876, "step": 12736 }, { "epoch": 0.9639657537486401, "grad_norm": 0.80859375, "learning_rate": 1.0596377003749431e-05, "loss": 0.3565, "step": 12737 }, { "epoch": 0.9640414360720874, "grad_norm": 0.80859375, "learning_rate": 1.0595188039521897e-05, "loss": 0.3636, "step": 12738 }, { "epoch": 0.9641171183955347, "grad_norm": 0.765625, "learning_rate": 1.0593999066850596e-05, "loss": 0.2826, "step": 12739 }, { "epoch": 0.9641928007189821, "grad_norm": 0.77734375, "learning_rate": 1.0592810085752401e-05, "loss": 0.3433, "step": 12740 }, { "epoch": 0.9642684830424294, "grad_norm": 0.7109375, "learning_rate": 1.059162109624418e-05, "loss": 0.2947, "step": 12741 }, { "epoch": 0.9643441653658767, "grad_norm": 0.76171875, "learning_rate": 1.0590432098342794e-05, "loss": 0.3207, "step": 12742 }, { "epoch": 0.9644198476893241, "grad_norm": 0.76171875, "learning_rate": 1.0589243092065118e-05, "loss": 0.3247, "step": 12743 }, { "epoch": 0.9644955300127714, "grad_norm": 0.8203125, "learning_rate": 1.0588054077428016e-05, "loss": 0.3457, "step": 12744 }, { "epoch": 0.9645712123362187, "grad_norm": 0.7578125, "learning_rate": 1.0586865054448356e-05, "loss": 0.2294, "step": 12745 }, { "epoch": 0.964646894659666, "grad_norm": 0.73828125, "learning_rate": 1.0585676023143014e-05, "loss": 0.305, "step": 12746 }, { "epoch": 0.9647225769831134, "grad_norm": 0.734375, "learning_rate": 1.058448698352885e-05, "loss": 0.3322, "step": 12747 }, { "epoch": 0.9647982593065607, "grad_norm": 0.74609375, "learning_rate": 1.0583297935622732e-05, "loss": 0.3181, "step": 12748 }, { "epoch": 0.9648739416300081, "grad_norm": 0.6953125, "learning_rate": 1.0582108879441534e-05, "loss": 0.2807, "step": 12749 }, { "epoch": 0.9649496239534554, "grad_norm": 0.6953125, "learning_rate": 1.0580919815002126e-05, "loss": 0.2664, "step": 12750 }, { "epoch": 0.9650253062769028, "grad_norm": 0.703125, "learning_rate": 1.0579730742321367e-05, "loss": 0.2831, "step": 12751 }, { "epoch": 0.96510098860035, "grad_norm": 1.2421875, "learning_rate": 1.0578541661416137e-05, "loss": 0.3193, "step": 12752 }, { "epoch": 0.9651766709237973, "grad_norm": 0.7109375, "learning_rate": 1.0577352572303301e-05, "loss": 0.2955, "step": 12753 }, { "epoch": 0.9652523532472447, "grad_norm": 0.765625, "learning_rate": 1.0576163474999725e-05, "loss": 0.3182, "step": 12754 }, { "epoch": 0.965328035570692, "grad_norm": 0.75390625, "learning_rate": 1.0574974369522284e-05, "loss": 0.304, "step": 12755 }, { "epoch": 0.9654037178941394, "grad_norm": 0.67578125, "learning_rate": 1.0573785255887845e-05, "loss": 0.2545, "step": 12756 }, { "epoch": 0.9654794002175867, "grad_norm": 0.77734375, "learning_rate": 1.0572596134113276e-05, "loss": 0.326, "step": 12757 }, { "epoch": 0.9655550825410341, "grad_norm": 0.703125, "learning_rate": 1.0571407004215448e-05, "loss": 0.278, "step": 12758 }, { "epoch": 0.9656307648644813, "grad_norm": 1.109375, "learning_rate": 1.0570217866211232e-05, "loss": 0.3624, "step": 12759 }, { "epoch": 0.9657064471879286, "grad_norm": 0.7734375, "learning_rate": 1.0569028720117496e-05, "loss": 0.3149, "step": 12760 }, { "epoch": 0.965782129511376, "grad_norm": 0.76171875, "learning_rate": 1.0567839565951112e-05, "loss": 0.3216, "step": 12761 }, { "epoch": 0.9658578118348233, "grad_norm": 0.8046875, "learning_rate": 1.056665040372895e-05, "loss": 0.3299, "step": 12762 }, { "epoch": 0.9659334941582707, "grad_norm": 0.828125, "learning_rate": 1.0565461233467877e-05, "loss": 0.3495, "step": 12763 }, { "epoch": 0.966009176481718, "grad_norm": 0.75390625, "learning_rate": 1.056427205518477e-05, "loss": 0.3092, "step": 12764 }, { "epoch": 0.9660848588051654, "grad_norm": 0.71875, "learning_rate": 1.0563082868896492e-05, "loss": 0.2901, "step": 12765 }, { "epoch": 0.9661605411286126, "grad_norm": 0.72265625, "learning_rate": 1.0561893674619918e-05, "loss": 0.2825, "step": 12766 }, { "epoch": 0.96623622345206, "grad_norm": 0.73046875, "learning_rate": 1.0560704472371919e-05, "loss": 0.2968, "step": 12767 }, { "epoch": 0.9663119057755073, "grad_norm": 0.76171875, "learning_rate": 1.0559515262169364e-05, "loss": 0.3092, "step": 12768 }, { "epoch": 0.9663875880989546, "grad_norm": 0.73828125, "learning_rate": 1.0558326044029126e-05, "loss": 0.3106, "step": 12769 }, { "epoch": 0.966463270422402, "grad_norm": 0.80078125, "learning_rate": 1.0557136817968075e-05, "loss": 0.3524, "step": 12770 }, { "epoch": 0.9665389527458493, "grad_norm": 0.75390625, "learning_rate": 1.055594758400308e-05, "loss": 0.3119, "step": 12771 }, { "epoch": 0.9666146350692967, "grad_norm": 0.8984375, "learning_rate": 1.0554758342151015e-05, "loss": 0.2884, "step": 12772 }, { "epoch": 0.9666903173927439, "grad_norm": 0.76953125, "learning_rate": 1.0553569092428752e-05, "loss": 0.3311, "step": 12773 }, { "epoch": 0.9667659997161913, "grad_norm": 0.68359375, "learning_rate": 1.055237983485316e-05, "loss": 0.2672, "step": 12774 }, { "epoch": 0.9668416820396386, "grad_norm": 0.78515625, "learning_rate": 1.0551190569441117e-05, "loss": 0.3251, "step": 12775 }, { "epoch": 0.966917364363086, "grad_norm": 0.7890625, "learning_rate": 1.0550001296209487e-05, "loss": 0.3431, "step": 12776 }, { "epoch": 0.9669930466865333, "grad_norm": 0.7578125, "learning_rate": 1.0548812015175143e-05, "loss": 0.3012, "step": 12777 }, { "epoch": 0.9670687290099806, "grad_norm": 0.76171875, "learning_rate": 1.0547622726354958e-05, "loss": 0.2986, "step": 12778 }, { "epoch": 0.967144411333428, "grad_norm": 0.7578125, "learning_rate": 1.054643342976581e-05, "loss": 0.2952, "step": 12779 }, { "epoch": 0.9672200936568752, "grad_norm": 0.7578125, "learning_rate": 1.054524412542456e-05, "loss": 0.303, "step": 12780 }, { "epoch": 0.9672957759803226, "grad_norm": 0.83203125, "learning_rate": 1.054405481334809e-05, "loss": 0.3438, "step": 12781 }, { "epoch": 0.9673714583037699, "grad_norm": 0.71875, "learning_rate": 1.0542865493553267e-05, "loss": 0.2874, "step": 12782 }, { "epoch": 0.9674471406272173, "grad_norm": 0.68359375, "learning_rate": 1.0541676166056966e-05, "loss": 0.2679, "step": 12783 }, { "epoch": 0.9675228229506646, "grad_norm": 0.71484375, "learning_rate": 1.0540486830876062e-05, "loss": 0.2887, "step": 12784 }, { "epoch": 0.967598505274112, "grad_norm": 0.72265625, "learning_rate": 1.0539297488027422e-05, "loss": 0.288, "step": 12785 }, { "epoch": 0.9676741875975593, "grad_norm": 0.76171875, "learning_rate": 1.053810813752792e-05, "loss": 0.3167, "step": 12786 }, { "epoch": 0.9677498699210065, "grad_norm": 0.7734375, "learning_rate": 1.0536918779394432e-05, "loss": 0.3389, "step": 12787 }, { "epoch": 0.9678255522444539, "grad_norm": 0.6875, "learning_rate": 1.0535729413643834e-05, "loss": 0.2537, "step": 12788 }, { "epoch": 0.9679012345679012, "grad_norm": 0.74609375, "learning_rate": 1.0534540040292988e-05, "loss": 0.3176, "step": 12789 }, { "epoch": 0.9679769168913486, "grad_norm": 0.75, "learning_rate": 1.0533350659358779e-05, "loss": 0.3024, "step": 12790 }, { "epoch": 0.9680525992147959, "grad_norm": 0.765625, "learning_rate": 1.0532161270858074e-05, "loss": 0.3302, "step": 12791 }, { "epoch": 0.9681282815382433, "grad_norm": 0.71484375, "learning_rate": 1.0530971874807746e-05, "loss": 0.2915, "step": 12792 }, { "epoch": 0.9682039638616906, "grad_norm": 0.8046875, "learning_rate": 1.0529782471224677e-05, "loss": 0.3352, "step": 12793 }, { "epoch": 0.9682796461851378, "grad_norm": 0.8046875, "learning_rate": 1.052859306012573e-05, "loss": 0.3526, "step": 12794 }, { "epoch": 0.9683553285085852, "grad_norm": 0.734375, "learning_rate": 1.052740364152778e-05, "loss": 0.2947, "step": 12795 }, { "epoch": 0.9684310108320325, "grad_norm": 0.75, "learning_rate": 1.052621421544771e-05, "loss": 0.2756, "step": 12796 }, { "epoch": 0.9685066931554799, "grad_norm": 0.75, "learning_rate": 1.0525024781902389e-05, "loss": 0.3227, "step": 12797 }, { "epoch": 0.9685823754789272, "grad_norm": 0.75390625, "learning_rate": 1.0523835340908686e-05, "loss": 0.3085, "step": 12798 }, { "epoch": 0.9686580578023746, "grad_norm": 0.828125, "learning_rate": 1.0522645892483483e-05, "loss": 0.3366, "step": 12799 }, { "epoch": 0.9687337401258218, "grad_norm": 0.79296875, "learning_rate": 1.0521456436643648e-05, "loss": 0.3238, "step": 12800 }, { "epoch": 0.9688094224492692, "grad_norm": 0.74609375, "learning_rate": 1.0520266973406059e-05, "loss": 0.3069, "step": 12801 }, { "epoch": 0.9688851047727165, "grad_norm": 0.71484375, "learning_rate": 1.0519077502787596e-05, "loss": 0.2814, "step": 12802 }, { "epoch": 0.9689607870961638, "grad_norm": 0.76953125, "learning_rate": 1.0517888024805123e-05, "loss": 0.3317, "step": 12803 }, { "epoch": 0.9690364694196112, "grad_norm": 0.6875, "learning_rate": 1.0516698539475518e-05, "loss": 0.254, "step": 12804 }, { "epoch": 0.9691121517430585, "grad_norm": 0.859375, "learning_rate": 1.051550904681566e-05, "loss": 0.3308, "step": 12805 }, { "epoch": 0.9691878340665059, "grad_norm": 0.75390625, "learning_rate": 1.0514319546842422e-05, "loss": 0.3027, "step": 12806 }, { "epoch": 0.9692635163899531, "grad_norm": 0.78515625, "learning_rate": 1.0513130039572676e-05, "loss": 0.3156, "step": 12807 }, { "epoch": 0.9693391987134005, "grad_norm": 0.7734375, "learning_rate": 1.0511940525023302e-05, "loss": 0.3316, "step": 12808 }, { "epoch": 0.9694148810368478, "grad_norm": 0.734375, "learning_rate": 1.0510751003211175e-05, "loss": 0.3229, "step": 12809 }, { "epoch": 0.9694905633602952, "grad_norm": 0.71875, "learning_rate": 1.0509561474153162e-05, "loss": 0.2972, "step": 12810 }, { "epoch": 0.9695662456837425, "grad_norm": 0.734375, "learning_rate": 1.0508371937866153e-05, "loss": 0.3042, "step": 12811 }, { "epoch": 0.9696419280071898, "grad_norm": 0.7265625, "learning_rate": 1.0507182394367013e-05, "loss": 0.2618, "step": 12812 }, { "epoch": 0.9697176103306372, "grad_norm": 0.79296875, "learning_rate": 1.0505992843672617e-05, "loss": 0.3349, "step": 12813 }, { "epoch": 0.9697932926540844, "grad_norm": 0.78515625, "learning_rate": 1.0504803285799848e-05, "loss": 0.3218, "step": 12814 }, { "epoch": 0.9698689749775318, "grad_norm": 0.76171875, "learning_rate": 1.0503613720765576e-05, "loss": 0.2883, "step": 12815 }, { "epoch": 0.9699446573009791, "grad_norm": 0.79296875, "learning_rate": 1.050242414858668e-05, "loss": 0.3627, "step": 12816 }, { "epoch": 0.9700203396244265, "grad_norm": 0.7265625, "learning_rate": 1.0501234569280037e-05, "loss": 0.2987, "step": 12817 }, { "epoch": 0.9700960219478738, "grad_norm": 0.82421875, "learning_rate": 1.0500044982862519e-05, "loss": 0.3774, "step": 12818 }, { "epoch": 0.9701717042713212, "grad_norm": 0.75, "learning_rate": 1.0498855389351004e-05, "loss": 0.308, "step": 12819 }, { "epoch": 0.9702473865947685, "grad_norm": 0.703125, "learning_rate": 1.049766578876237e-05, "loss": 0.2784, "step": 12820 }, { "epoch": 0.9703230689182157, "grad_norm": 0.77734375, "learning_rate": 1.0496476181113492e-05, "loss": 0.328, "step": 12821 }, { "epoch": 0.9703987512416631, "grad_norm": 0.82421875, "learning_rate": 1.049528656642125e-05, "loss": 0.3385, "step": 12822 }, { "epoch": 0.9704744335651104, "grad_norm": 0.734375, "learning_rate": 1.0494096944702518e-05, "loss": 0.2964, "step": 12823 }, { "epoch": 0.9705501158885578, "grad_norm": 0.80078125, "learning_rate": 1.0492907315974172e-05, "loss": 0.3458, "step": 12824 }, { "epoch": 0.9706257982120051, "grad_norm": 0.7890625, "learning_rate": 1.0491717680253091e-05, "loss": 0.3264, "step": 12825 }, { "epoch": 0.9707014805354525, "grad_norm": 0.73828125, "learning_rate": 1.0490528037556149e-05, "loss": 0.2922, "step": 12826 }, { "epoch": 0.9707771628588998, "grad_norm": 0.76953125, "learning_rate": 1.0489338387900222e-05, "loss": 0.3046, "step": 12827 }, { "epoch": 0.970852845182347, "grad_norm": 0.74609375, "learning_rate": 1.0488148731302197e-05, "loss": 0.3006, "step": 12828 }, { "epoch": 0.9709285275057944, "grad_norm": 0.7265625, "learning_rate": 1.0486959067778942e-05, "loss": 0.2989, "step": 12829 }, { "epoch": 0.9710042098292417, "grad_norm": 0.7421875, "learning_rate": 1.0485769397347335e-05, "loss": 0.3063, "step": 12830 }, { "epoch": 0.9710798921526891, "grad_norm": 0.71875, "learning_rate": 1.0484579720024257e-05, "loss": 0.2592, "step": 12831 }, { "epoch": 0.9711555744761364, "grad_norm": 0.7265625, "learning_rate": 1.0483390035826586e-05, "loss": 0.2663, "step": 12832 }, { "epoch": 0.9712312567995838, "grad_norm": 0.734375, "learning_rate": 1.0482200344771196e-05, "loss": 0.2835, "step": 12833 }, { "epoch": 0.9713069391230311, "grad_norm": 0.80078125, "learning_rate": 1.0481010646874963e-05, "loss": 0.349, "step": 12834 }, { "epoch": 0.9713826214464784, "grad_norm": 0.75390625, "learning_rate": 1.0479820942154775e-05, "loss": 0.3024, "step": 12835 }, { "epoch": 0.9714583037699257, "grad_norm": 0.6953125, "learning_rate": 1.0478631230627498e-05, "loss": 0.2633, "step": 12836 }, { "epoch": 0.971533986093373, "grad_norm": 0.70703125, "learning_rate": 1.0477441512310019e-05, "loss": 0.299, "step": 12837 }, { "epoch": 0.9716096684168204, "grad_norm": 0.6640625, "learning_rate": 1.0476251787219212e-05, "loss": 0.2612, "step": 12838 }, { "epoch": 0.9716853507402677, "grad_norm": 0.71875, "learning_rate": 1.0475062055371957e-05, "loss": 0.2713, "step": 12839 }, { "epoch": 0.9717610330637151, "grad_norm": 0.76171875, "learning_rate": 1.047387231678513e-05, "loss": 0.283, "step": 12840 }, { "epoch": 0.9718367153871624, "grad_norm": 0.7734375, "learning_rate": 1.0472682571475612e-05, "loss": 0.3366, "step": 12841 }, { "epoch": 0.9719123977106097, "grad_norm": 0.8359375, "learning_rate": 1.0471492819460278e-05, "loss": 0.3825, "step": 12842 }, { "epoch": 0.971988080034057, "grad_norm": 0.7109375, "learning_rate": 1.0470303060756013e-05, "loss": 0.2799, "step": 12843 }, { "epoch": 0.9720637623575044, "grad_norm": 0.76953125, "learning_rate": 1.0469113295379691e-05, "loss": 0.3072, "step": 12844 }, { "epoch": 0.9721394446809517, "grad_norm": 0.74609375, "learning_rate": 1.0467923523348192e-05, "loss": 0.2984, "step": 12845 }, { "epoch": 0.972215127004399, "grad_norm": 0.78125, "learning_rate": 1.0466733744678392e-05, "loss": 0.3461, "step": 12846 }, { "epoch": 0.9722908093278464, "grad_norm": 0.796875, "learning_rate": 1.0465543959387176e-05, "loss": 0.3444, "step": 12847 }, { "epoch": 0.9723664916512937, "grad_norm": 0.75, "learning_rate": 1.0464354167491418e-05, "loss": 0.2955, "step": 12848 }, { "epoch": 0.972442173974741, "grad_norm": 0.7734375, "learning_rate": 1.0463164369008003e-05, "loss": 0.3429, "step": 12849 }, { "epoch": 0.9725178562981883, "grad_norm": 0.765625, "learning_rate": 1.0461974563953802e-05, "loss": 0.3107, "step": 12850 }, { "epoch": 0.9725935386216357, "grad_norm": 0.7578125, "learning_rate": 1.04607847523457e-05, "loss": 0.3098, "step": 12851 }, { "epoch": 0.972669220945083, "grad_norm": 0.765625, "learning_rate": 1.0459594934200578e-05, "loss": 0.3086, "step": 12852 }, { "epoch": 0.9727449032685304, "grad_norm": 0.7265625, "learning_rate": 1.0458405109535314e-05, "loss": 0.3056, "step": 12853 }, { "epoch": 0.9728205855919777, "grad_norm": 0.75390625, "learning_rate": 1.0457215278366783e-05, "loss": 0.3202, "step": 12854 }, { "epoch": 0.972896267915425, "grad_norm": 0.7421875, "learning_rate": 1.045602544071187e-05, "loss": 0.304, "step": 12855 }, { "epoch": 0.9729719502388723, "grad_norm": 0.75, "learning_rate": 1.0454835596587454e-05, "loss": 0.3252, "step": 12856 }, { "epoch": 0.9730476325623196, "grad_norm": 0.7265625, "learning_rate": 1.0453645746010412e-05, "loss": 0.2597, "step": 12857 }, { "epoch": 0.973123314885767, "grad_norm": 0.75390625, "learning_rate": 1.0452455888997633e-05, "loss": 0.2862, "step": 12858 }, { "epoch": 0.9731989972092143, "grad_norm": 0.80078125, "learning_rate": 1.0451266025565987e-05, "loss": 0.3646, "step": 12859 }, { "epoch": 0.9732746795326617, "grad_norm": 0.73828125, "learning_rate": 1.0450076155732357e-05, "loss": 0.2974, "step": 12860 }, { "epoch": 0.973350361856109, "grad_norm": 0.8046875, "learning_rate": 1.0448886279513623e-05, "loss": 0.3376, "step": 12861 }, { "epoch": 0.9734260441795564, "grad_norm": 0.7421875, "learning_rate": 1.044769639692667e-05, "loss": 0.3044, "step": 12862 }, { "epoch": 0.9735017265030036, "grad_norm": 0.73046875, "learning_rate": 1.0446506507988375e-05, "loss": 0.2987, "step": 12863 }, { "epoch": 0.9735774088264509, "grad_norm": 0.828125, "learning_rate": 1.0445316612715616e-05, "loss": 0.3817, "step": 12864 }, { "epoch": 0.9736530911498983, "grad_norm": 0.71484375, "learning_rate": 1.0444126711125283e-05, "loss": 0.2774, "step": 12865 }, { "epoch": 0.9737287734733456, "grad_norm": 1.0703125, "learning_rate": 1.0442936803234242e-05, "loss": 0.3147, "step": 12866 }, { "epoch": 0.973804455796793, "grad_norm": 0.70703125, "learning_rate": 1.044174688905939e-05, "loss": 0.2769, "step": 12867 }, { "epoch": 0.9738801381202403, "grad_norm": 0.796875, "learning_rate": 1.0440556968617597e-05, "loss": 0.3578, "step": 12868 }, { "epoch": 0.9739558204436877, "grad_norm": 0.765625, "learning_rate": 1.0439367041925746e-05, "loss": 0.2992, "step": 12869 }, { "epoch": 0.9740315027671349, "grad_norm": 0.77734375, "learning_rate": 1.043817710900072e-05, "loss": 0.3426, "step": 12870 }, { "epoch": 0.9741071850905823, "grad_norm": 0.765625, "learning_rate": 1.0436987169859401e-05, "loss": 0.3117, "step": 12871 }, { "epoch": 0.9741828674140296, "grad_norm": 0.78125, "learning_rate": 1.0435797224518669e-05, "loss": 0.3442, "step": 12872 }, { "epoch": 0.9742585497374769, "grad_norm": 0.76953125, "learning_rate": 1.0434607272995404e-05, "loss": 0.2998, "step": 12873 }, { "epoch": 0.9743342320609243, "grad_norm": 0.828125, "learning_rate": 1.0433417315306494e-05, "loss": 0.3622, "step": 12874 }, { "epoch": 0.9744099143843716, "grad_norm": 0.76171875, "learning_rate": 1.0432227351468809e-05, "loss": 0.3134, "step": 12875 }, { "epoch": 0.974485596707819, "grad_norm": 0.7734375, "learning_rate": 1.043103738149924e-05, "loss": 0.3327, "step": 12876 }, { "epoch": 0.9745612790312662, "grad_norm": 0.8046875, "learning_rate": 1.0429847405414666e-05, "loss": 0.3316, "step": 12877 }, { "epoch": 0.9746369613547136, "grad_norm": 0.78515625, "learning_rate": 1.0428657423231968e-05, "loss": 0.3057, "step": 12878 }, { "epoch": 0.9747126436781609, "grad_norm": 0.75390625, "learning_rate": 1.0427467434968033e-05, "loss": 0.285, "step": 12879 }, { "epoch": 0.9747883260016083, "grad_norm": 0.734375, "learning_rate": 1.0426277440639737e-05, "loss": 0.3015, "step": 12880 }, { "epoch": 0.9748640083250556, "grad_norm": 0.73046875, "learning_rate": 1.042508744026396e-05, "loss": 0.2754, "step": 12881 }, { "epoch": 0.9749396906485029, "grad_norm": 0.78515625, "learning_rate": 1.0423897433857593e-05, "loss": 0.3247, "step": 12882 }, { "epoch": 0.9750153729719503, "grad_norm": 0.7578125, "learning_rate": 1.042270742143751e-05, "loss": 0.3285, "step": 12883 }, { "epoch": 0.9750910552953975, "grad_norm": 0.7578125, "learning_rate": 1.0421517403020597e-05, "loss": 0.3169, "step": 12884 }, { "epoch": 0.9751667376188449, "grad_norm": 0.7421875, "learning_rate": 1.042032737862374e-05, "loss": 0.3124, "step": 12885 }, { "epoch": 0.9752424199422922, "grad_norm": 0.8359375, "learning_rate": 1.0419137348263815e-05, "loss": 0.3428, "step": 12886 }, { "epoch": 0.9753181022657396, "grad_norm": 0.81640625, "learning_rate": 1.0417947311957707e-05, "loss": 0.3613, "step": 12887 }, { "epoch": 0.9753937845891869, "grad_norm": 0.734375, "learning_rate": 1.0416757269722301e-05, "loss": 0.3009, "step": 12888 }, { "epoch": 0.9754694669126343, "grad_norm": 0.875, "learning_rate": 1.0415567221574478e-05, "loss": 0.4172, "step": 12889 }, { "epoch": 0.9755451492360816, "grad_norm": 0.79296875, "learning_rate": 1.0414377167531118e-05, "loss": 0.3362, "step": 12890 }, { "epoch": 0.9756208315595288, "grad_norm": 0.7421875, "learning_rate": 1.041318710760911e-05, "loss": 0.3093, "step": 12891 }, { "epoch": 0.9756965138829762, "grad_norm": 0.7265625, "learning_rate": 1.0411997041825333e-05, "loss": 0.3018, "step": 12892 }, { "epoch": 0.9757721962064235, "grad_norm": 0.765625, "learning_rate": 1.041080697019667e-05, "loss": 0.318, "step": 12893 }, { "epoch": 0.9758478785298709, "grad_norm": 0.8046875, "learning_rate": 1.0409616892740007e-05, "loss": 0.3261, "step": 12894 }, { "epoch": 0.9759235608533182, "grad_norm": 0.77734375, "learning_rate": 1.0408426809472223e-05, "loss": 0.3063, "step": 12895 }, { "epoch": 0.9759992431767656, "grad_norm": 0.7421875, "learning_rate": 1.0407236720410205e-05, "loss": 0.3209, "step": 12896 }, { "epoch": 0.9760749255002129, "grad_norm": 0.76953125, "learning_rate": 1.0406046625570836e-05, "loss": 0.2897, "step": 12897 }, { "epoch": 0.9761506078236601, "grad_norm": 0.75, "learning_rate": 1.0404856524970996e-05, "loss": 0.3239, "step": 12898 }, { "epoch": 0.9762262901471075, "grad_norm": 0.75390625, "learning_rate": 1.0403666418627574e-05, "loss": 0.3233, "step": 12899 }, { "epoch": 0.9763019724705548, "grad_norm": 0.75390625, "learning_rate": 1.0402476306557451e-05, "loss": 0.3491, "step": 12900 }, { "epoch": 0.9763776547940022, "grad_norm": 0.72265625, "learning_rate": 1.0401286188777511e-05, "loss": 0.2697, "step": 12901 }, { "epoch": 0.9764533371174495, "grad_norm": 0.80859375, "learning_rate": 1.0400096065304637e-05, "loss": 0.3761, "step": 12902 }, { "epoch": 0.9765290194408969, "grad_norm": 0.75, "learning_rate": 1.0398905936155714e-05, "loss": 0.3137, "step": 12903 }, { "epoch": 0.9766047017643442, "grad_norm": 0.76953125, "learning_rate": 1.0397715801347624e-05, "loss": 0.3154, "step": 12904 }, { "epoch": 0.9766803840877915, "grad_norm": 0.7265625, "learning_rate": 1.0396525660897257e-05, "loss": 0.31, "step": 12905 }, { "epoch": 0.9767560664112388, "grad_norm": 0.765625, "learning_rate": 1.039533551482149e-05, "loss": 0.311, "step": 12906 }, { "epoch": 0.9768317487346861, "grad_norm": 0.78125, "learning_rate": 1.039414536313721e-05, "loss": 0.3413, "step": 12907 }, { "epoch": 0.9769074310581335, "grad_norm": 0.765625, "learning_rate": 1.03929552058613e-05, "loss": 0.3316, "step": 12908 }, { "epoch": 0.9769831133815808, "grad_norm": 0.68359375, "learning_rate": 1.0391765043010653e-05, "loss": 0.2555, "step": 12909 }, { "epoch": 0.9770587957050282, "grad_norm": 0.73046875, "learning_rate": 1.0390574874602142e-05, "loss": 0.3098, "step": 12910 }, { "epoch": 0.9771344780284755, "grad_norm": 0.796875, "learning_rate": 1.0389384700652656e-05, "loss": 0.3237, "step": 12911 }, { "epoch": 0.9772101603519228, "grad_norm": 0.78125, "learning_rate": 1.038819452117908e-05, "loss": 0.3383, "step": 12912 }, { "epoch": 0.9772858426753701, "grad_norm": 0.7421875, "learning_rate": 1.0387004336198297e-05, "loss": 0.2952, "step": 12913 }, { "epoch": 0.9773615249988175, "grad_norm": 0.74609375, "learning_rate": 1.0385814145727197e-05, "loss": 0.3011, "step": 12914 }, { "epoch": 0.9774372073222648, "grad_norm": 0.76171875, "learning_rate": 1.038462394978266e-05, "loss": 0.3133, "step": 12915 }, { "epoch": 0.9775128896457121, "grad_norm": 0.75390625, "learning_rate": 1.0383433748381571e-05, "loss": 0.3422, "step": 12916 }, { "epoch": 0.9775885719691595, "grad_norm": 0.74609375, "learning_rate": 1.0382243541540816e-05, "loss": 0.3046, "step": 12917 }, { "epoch": 0.9776642542926068, "grad_norm": 0.71484375, "learning_rate": 1.0381053329277281e-05, "loss": 0.2712, "step": 12918 }, { "epoch": 0.9777399366160541, "grad_norm": 0.7421875, "learning_rate": 1.037986311160785e-05, "loss": 0.3119, "step": 12919 }, { "epoch": 0.9778156189395014, "grad_norm": 1.1328125, "learning_rate": 1.0378672888549413e-05, "loss": 0.3272, "step": 12920 }, { "epoch": 0.9778913012629488, "grad_norm": 0.8125, "learning_rate": 1.0377482660118847e-05, "loss": 0.393, "step": 12921 }, { "epoch": 0.9779669835863961, "grad_norm": 0.7734375, "learning_rate": 1.0376292426333042e-05, "loss": 0.3266, "step": 12922 }, { "epoch": 0.9780426659098435, "grad_norm": 0.83984375, "learning_rate": 1.0375102187208885e-05, "loss": 0.3601, "step": 12923 }, { "epoch": 0.9781183482332908, "grad_norm": 0.71484375, "learning_rate": 1.037391194276326e-05, "loss": 0.2796, "step": 12924 }, { "epoch": 0.978194030556738, "grad_norm": 0.91015625, "learning_rate": 1.0372721693013047e-05, "loss": 0.356, "step": 12925 }, { "epoch": 0.9782697128801854, "grad_norm": 0.70703125, "learning_rate": 1.0371531437975142e-05, "loss": 0.2941, "step": 12926 }, { "epoch": 0.9783453952036327, "grad_norm": 0.73828125, "learning_rate": 1.0370341177666428e-05, "loss": 0.2957, "step": 12927 }, { "epoch": 0.9784210775270801, "grad_norm": 0.71484375, "learning_rate": 1.0369150912103784e-05, "loss": 0.3042, "step": 12928 }, { "epoch": 0.9784967598505274, "grad_norm": 0.76953125, "learning_rate": 1.0367960641304105e-05, "loss": 0.3262, "step": 12929 }, { "epoch": 0.9785724421739748, "grad_norm": 0.70703125, "learning_rate": 1.0366770365284271e-05, "loss": 0.2793, "step": 12930 }, { "epoch": 0.9786481244974221, "grad_norm": 0.78515625, "learning_rate": 1.0365580084061166e-05, "loss": 0.3262, "step": 12931 }, { "epoch": 0.9787238068208693, "grad_norm": 0.77734375, "learning_rate": 1.0364389797651686e-05, "loss": 0.3107, "step": 12932 }, { "epoch": 0.9787994891443167, "grad_norm": 0.6953125, "learning_rate": 1.0363199506072709e-05, "loss": 0.2696, "step": 12933 }, { "epoch": 0.978875171467764, "grad_norm": 0.8125, "learning_rate": 1.0362009209341125e-05, "loss": 0.3487, "step": 12934 }, { "epoch": 0.9789508537912114, "grad_norm": 1.0390625, "learning_rate": 1.0360818907473817e-05, "loss": 0.3875, "step": 12935 }, { "epoch": 0.9790265361146587, "grad_norm": 0.65625, "learning_rate": 1.0359628600487677e-05, "loss": 0.2513, "step": 12936 }, { "epoch": 0.9791022184381061, "grad_norm": 0.7109375, "learning_rate": 1.0358438288399587e-05, "loss": 0.2553, "step": 12937 }, { "epoch": 0.9791779007615534, "grad_norm": 0.703125, "learning_rate": 1.0357247971226432e-05, "loss": 0.2601, "step": 12938 }, { "epoch": 0.9792535830850007, "grad_norm": 0.71875, "learning_rate": 1.0356057648985102e-05, "loss": 0.3096, "step": 12939 }, { "epoch": 0.979329265408448, "grad_norm": 0.6953125, "learning_rate": 1.0354867321692486e-05, "loss": 0.2446, "step": 12940 }, { "epoch": 0.9794049477318953, "grad_norm": 0.7578125, "learning_rate": 1.0353676989365467e-05, "loss": 0.3112, "step": 12941 }, { "epoch": 0.9794806300553427, "grad_norm": 0.78515625, "learning_rate": 1.0352486652020933e-05, "loss": 0.3431, "step": 12942 }, { "epoch": 0.97955631237879, "grad_norm": 0.84375, "learning_rate": 1.0351296309675768e-05, "loss": 0.3722, "step": 12943 }, { "epoch": 0.9796319947022374, "grad_norm": 0.9296875, "learning_rate": 1.0350105962346866e-05, "loss": 0.3156, "step": 12944 }, { "epoch": 0.9797076770256847, "grad_norm": 0.73828125, "learning_rate": 1.0348915610051107e-05, "loss": 0.3041, "step": 12945 }, { "epoch": 0.979783359349132, "grad_norm": 0.73828125, "learning_rate": 1.0347725252805383e-05, "loss": 0.2989, "step": 12946 }, { "epoch": 0.9798590416725793, "grad_norm": 0.7109375, "learning_rate": 1.0346534890626583e-05, "loss": 0.3258, "step": 12947 }, { "epoch": 0.9799347239960267, "grad_norm": 0.71875, "learning_rate": 1.0345344523531584e-05, "loss": 0.2859, "step": 12948 }, { "epoch": 0.980010406319474, "grad_norm": 0.76953125, "learning_rate": 1.0344154151537285e-05, "loss": 0.3094, "step": 12949 }, { "epoch": 0.9800860886429213, "grad_norm": 0.75, "learning_rate": 1.0342963774660566e-05, "loss": 0.2891, "step": 12950 }, { "epoch": 0.9801617709663687, "grad_norm": 0.80078125, "learning_rate": 1.0341773392918319e-05, "loss": 0.3483, "step": 12951 }, { "epoch": 0.980237453289816, "grad_norm": 0.76953125, "learning_rate": 1.0340583006327432e-05, "loss": 0.3259, "step": 12952 }, { "epoch": 0.9803131356132633, "grad_norm": 1.0234375, "learning_rate": 1.0339392614904789e-05, "loss": 0.3634, "step": 12953 }, { "epoch": 0.9803888179367106, "grad_norm": 0.8046875, "learning_rate": 1.0338202218667275e-05, "loss": 0.3297, "step": 12954 }, { "epoch": 0.980464500260158, "grad_norm": 0.76953125, "learning_rate": 1.0337011817631786e-05, "loss": 0.2881, "step": 12955 }, { "epoch": 0.9805401825836053, "grad_norm": 0.703125, "learning_rate": 1.0335821411815207e-05, "loss": 0.2962, "step": 12956 }, { "epoch": 0.9806158649070527, "grad_norm": 0.72265625, "learning_rate": 1.0334631001234423e-05, "loss": 0.3017, "step": 12957 }, { "epoch": 0.9806915472305, "grad_norm": 0.71875, "learning_rate": 1.0333440585906326e-05, "loss": 0.313, "step": 12958 }, { "epoch": 0.9807672295539474, "grad_norm": 0.7265625, "learning_rate": 1.0332250165847802e-05, "loss": 0.3047, "step": 12959 }, { "epoch": 0.9808429118773946, "grad_norm": 0.6953125, "learning_rate": 1.0331059741075735e-05, "loss": 0.2728, "step": 12960 }, { "epoch": 0.9809185942008419, "grad_norm": 0.7265625, "learning_rate": 1.0329869311607024e-05, "loss": 0.2944, "step": 12961 }, { "epoch": 0.9809942765242893, "grad_norm": 0.84375, "learning_rate": 1.0328678877458547e-05, "loss": 0.3402, "step": 12962 }, { "epoch": 0.9810699588477366, "grad_norm": 0.77734375, "learning_rate": 1.0327488438647195e-05, "loss": 0.3062, "step": 12963 }, { "epoch": 0.981145641171184, "grad_norm": 0.76171875, "learning_rate": 1.0326297995189858e-05, "loss": 0.3226, "step": 12964 }, { "epoch": 0.9812213234946313, "grad_norm": 0.75390625, "learning_rate": 1.0325107547103429e-05, "loss": 0.3139, "step": 12965 }, { "epoch": 0.9812970058180787, "grad_norm": 0.77734375, "learning_rate": 1.0323917094404784e-05, "loss": 0.3133, "step": 12966 }, { "epoch": 0.9813726881415259, "grad_norm": 0.76171875, "learning_rate": 1.0322726637110822e-05, "loss": 0.3473, "step": 12967 }, { "epoch": 0.9814483704649732, "grad_norm": 0.765625, "learning_rate": 1.0321536175238431e-05, "loss": 0.3293, "step": 12968 }, { "epoch": 0.9815240527884206, "grad_norm": 0.7421875, "learning_rate": 1.0320345708804494e-05, "loss": 0.3132, "step": 12969 }, { "epoch": 0.9815997351118679, "grad_norm": 0.81640625, "learning_rate": 1.031915523782591e-05, "loss": 0.3531, "step": 12970 }, { "epoch": 0.9816754174353153, "grad_norm": 0.7421875, "learning_rate": 1.0317964762319554e-05, "loss": 0.3126, "step": 12971 }, { "epoch": 0.9817510997587626, "grad_norm": 0.68359375, "learning_rate": 1.0316774282302324e-05, "loss": 0.2806, "step": 12972 }, { "epoch": 0.98182678208221, "grad_norm": 0.70703125, "learning_rate": 1.0315583797791109e-05, "loss": 0.2953, "step": 12973 }, { "epoch": 0.9819024644056572, "grad_norm": 0.734375, "learning_rate": 1.0314393308802795e-05, "loss": 0.3079, "step": 12974 }, { "epoch": 0.9819781467291046, "grad_norm": 0.72265625, "learning_rate": 1.0313202815354273e-05, "loss": 0.2894, "step": 12975 }, { "epoch": 0.9820538290525519, "grad_norm": 0.72265625, "learning_rate": 1.0312012317462433e-05, "loss": 0.2717, "step": 12976 }, { "epoch": 0.9821295113759992, "grad_norm": 0.7578125, "learning_rate": 1.0310821815144161e-05, "loss": 0.32, "step": 12977 }, { "epoch": 0.9822051936994466, "grad_norm": 0.65234375, "learning_rate": 1.0309631308416346e-05, "loss": 0.2574, "step": 12978 }, { "epoch": 0.9822808760228939, "grad_norm": 0.703125, "learning_rate": 1.0308440797295885e-05, "loss": 0.2698, "step": 12979 }, { "epoch": 0.9823565583463413, "grad_norm": 0.703125, "learning_rate": 1.030725028179966e-05, "loss": 0.2702, "step": 12980 }, { "epoch": 0.9824322406697885, "grad_norm": 0.76171875, "learning_rate": 1.0306059761944558e-05, "loss": 0.315, "step": 12981 }, { "epoch": 0.9825079229932359, "grad_norm": 0.78125, "learning_rate": 1.0304869237747476e-05, "loss": 0.3288, "step": 12982 }, { "epoch": 0.9825836053166832, "grad_norm": 0.79296875, "learning_rate": 1.0303678709225302e-05, "loss": 0.3392, "step": 12983 }, { "epoch": 0.9826592876401306, "grad_norm": 0.69921875, "learning_rate": 1.0302488176394925e-05, "loss": 0.2967, "step": 12984 }, { "epoch": 0.9827349699635779, "grad_norm": 0.75390625, "learning_rate": 1.030129763927323e-05, "loss": 0.2618, "step": 12985 }, { "epoch": 0.9828106522870252, "grad_norm": 0.72265625, "learning_rate": 1.0300107097877114e-05, "loss": 0.2904, "step": 12986 }, { "epoch": 0.9828863346104726, "grad_norm": 0.73046875, "learning_rate": 1.0298916552223463e-05, "loss": 0.2872, "step": 12987 }, { "epoch": 0.9829620169339198, "grad_norm": 0.77734375, "learning_rate": 1.0297726002329169e-05, "loss": 0.3187, "step": 12988 }, { "epoch": 0.9830376992573672, "grad_norm": 0.75, "learning_rate": 1.029653544821112e-05, "loss": 0.2949, "step": 12989 }, { "epoch": 0.9831133815808145, "grad_norm": 0.75, "learning_rate": 1.0295344889886207e-05, "loss": 0.3114, "step": 12990 }, { "epoch": 0.9831890639042619, "grad_norm": 0.796875, "learning_rate": 1.029415432737132e-05, "loss": 0.3435, "step": 12991 }, { "epoch": 0.9832647462277092, "grad_norm": 0.78515625, "learning_rate": 1.0292963760683347e-05, "loss": 0.3375, "step": 12992 }, { "epoch": 0.9833404285511566, "grad_norm": 0.75, "learning_rate": 1.0291773189839182e-05, "loss": 0.3272, "step": 12993 }, { "epoch": 0.9834161108746039, "grad_norm": 0.69140625, "learning_rate": 1.0290582614855716e-05, "loss": 0.2548, "step": 12994 }, { "epoch": 0.9834917931980511, "grad_norm": 0.75, "learning_rate": 1.0289392035749832e-05, "loss": 0.309, "step": 12995 }, { "epoch": 0.9835674755214985, "grad_norm": 0.7734375, "learning_rate": 1.0288201452538428e-05, "loss": 0.3447, "step": 12996 }, { "epoch": 0.9836431578449458, "grad_norm": 0.76171875, "learning_rate": 1.0287010865238391e-05, "loss": 0.3151, "step": 12997 }, { "epoch": 0.9837188401683932, "grad_norm": 0.7265625, "learning_rate": 1.0285820273866613e-05, "loss": 0.2857, "step": 12998 }, { "epoch": 0.9837945224918405, "grad_norm": 0.83203125, "learning_rate": 1.0284629678439986e-05, "loss": 0.3544, "step": 12999 }, { "epoch": 0.9838702048152879, "grad_norm": 0.81640625, "learning_rate": 1.0283439078975398e-05, "loss": 0.3308, "step": 13000 }, { "epoch": 0.9839458871387352, "grad_norm": 0.765625, "learning_rate": 1.0282248475489735e-05, "loss": 0.3194, "step": 13001 }, { "epoch": 0.9840215694621824, "grad_norm": 0.73828125, "learning_rate": 1.0281057867999897e-05, "loss": 0.2865, "step": 13002 }, { "epoch": 0.9840972517856298, "grad_norm": 0.765625, "learning_rate": 1.0279867256522774e-05, "loss": 0.3162, "step": 13003 }, { "epoch": 0.9841729341090771, "grad_norm": 0.828125, "learning_rate": 1.027867664107525e-05, "loss": 0.3318, "step": 13004 }, { "epoch": 0.9842486164325245, "grad_norm": 0.7578125, "learning_rate": 1.0277486021674219e-05, "loss": 0.3297, "step": 13005 }, { "epoch": 0.9843242987559718, "grad_norm": 0.69921875, "learning_rate": 1.0276295398336573e-05, "loss": 0.2588, "step": 13006 }, { "epoch": 0.9843999810794192, "grad_norm": 0.7890625, "learning_rate": 1.0275104771079203e-05, "loss": 0.3316, "step": 13007 }, { "epoch": 0.9844756634028665, "grad_norm": 0.7578125, "learning_rate": 1.0273914139919e-05, "loss": 0.3402, "step": 13008 }, { "epoch": 0.9845513457263138, "grad_norm": 0.78125, "learning_rate": 1.0272723504872857e-05, "loss": 0.3523, "step": 13009 }, { "epoch": 0.9846270280497611, "grad_norm": 0.69921875, "learning_rate": 1.0271532865957658e-05, "loss": 0.3003, "step": 13010 }, { "epoch": 0.9847027103732084, "grad_norm": 0.7578125, "learning_rate": 1.0270342223190304e-05, "loss": 0.33, "step": 13011 }, { "epoch": 0.9847783926966558, "grad_norm": 0.75390625, "learning_rate": 1.0269151576587681e-05, "loss": 0.2866, "step": 13012 }, { "epoch": 0.9848540750201031, "grad_norm": 0.7109375, "learning_rate": 1.0267960926166678e-05, "loss": 0.3058, "step": 13013 }, { "epoch": 0.9849297573435505, "grad_norm": 0.734375, "learning_rate": 1.0266770271944191e-05, "loss": 0.3096, "step": 13014 }, { "epoch": 0.9850054396669978, "grad_norm": 0.75390625, "learning_rate": 1.0265579613937112e-05, "loss": 0.3085, "step": 13015 }, { "epoch": 0.9850811219904451, "grad_norm": 0.8125, "learning_rate": 1.0264388952162325e-05, "loss": 0.3361, "step": 13016 }, { "epoch": 0.9851568043138924, "grad_norm": 0.75, "learning_rate": 1.0263198286636733e-05, "loss": 0.3331, "step": 13017 }, { "epoch": 0.9852324866373398, "grad_norm": 0.74609375, "learning_rate": 1.0262007617377221e-05, "loss": 0.282, "step": 13018 }, { "epoch": 0.9853081689607871, "grad_norm": 0.73046875, "learning_rate": 1.0260816944400678e-05, "loss": 0.3233, "step": 13019 }, { "epoch": 0.9853838512842344, "grad_norm": 0.77734375, "learning_rate": 1.0259626267724001e-05, "loss": 0.3376, "step": 13020 }, { "epoch": 0.9854595336076818, "grad_norm": 0.75, "learning_rate": 1.0258435587364081e-05, "loss": 0.3295, "step": 13021 }, { "epoch": 0.9855352159311291, "grad_norm": 0.7109375, "learning_rate": 1.0257244903337807e-05, "loss": 0.3077, "step": 13022 }, { "epoch": 0.9856108982545764, "grad_norm": 0.7265625, "learning_rate": 1.0256054215662072e-05, "loss": 0.3195, "step": 13023 }, { "epoch": 0.9856865805780237, "grad_norm": 0.81640625, "learning_rate": 1.0254863524353768e-05, "loss": 0.3767, "step": 13024 }, { "epoch": 0.9857622629014711, "grad_norm": 0.78515625, "learning_rate": 1.0253672829429788e-05, "loss": 0.3498, "step": 13025 }, { "epoch": 0.9858379452249184, "grad_norm": 0.73046875, "learning_rate": 1.0252482130907027e-05, "loss": 0.291, "step": 13026 }, { "epoch": 0.9859136275483658, "grad_norm": 1.3046875, "learning_rate": 1.0251291428802373e-05, "loss": 0.3428, "step": 13027 }, { "epoch": 0.9859893098718131, "grad_norm": 0.80078125, "learning_rate": 1.0250100723132714e-05, "loss": 0.2736, "step": 13028 }, { "epoch": 0.9860649921952604, "grad_norm": 0.69140625, "learning_rate": 1.024891001391495e-05, "loss": 0.2779, "step": 13029 }, { "epoch": 0.9861406745187077, "grad_norm": 0.71484375, "learning_rate": 1.024771930116597e-05, "loss": 0.2909, "step": 13030 }, { "epoch": 0.986216356842155, "grad_norm": 0.78515625, "learning_rate": 1.0246528584902668e-05, "loss": 0.3405, "step": 13031 }, { "epoch": 0.9862920391656024, "grad_norm": 0.69140625, "learning_rate": 1.0245337865141935e-05, "loss": 0.2689, "step": 13032 }, { "epoch": 0.9863677214890497, "grad_norm": 0.72265625, "learning_rate": 1.0244147141900663e-05, "loss": 0.3189, "step": 13033 }, { "epoch": 0.9864434038124971, "grad_norm": 0.7734375, "learning_rate": 1.0242956415195742e-05, "loss": 0.3229, "step": 13034 }, { "epoch": 0.9865190861359444, "grad_norm": 1.0390625, "learning_rate": 1.0241765685044071e-05, "loss": 0.3152, "step": 13035 }, { "epoch": 0.9865947684593918, "grad_norm": 0.70703125, "learning_rate": 1.0240574951462539e-05, "loss": 0.2643, "step": 13036 }, { "epoch": 0.986670450782839, "grad_norm": 0.75, "learning_rate": 1.0239384214468033e-05, "loss": 0.3248, "step": 13037 }, { "epoch": 0.9867461331062863, "grad_norm": 0.8046875, "learning_rate": 1.0238193474077456e-05, "loss": 0.3657, "step": 13038 }, { "epoch": 0.9868218154297337, "grad_norm": 0.6953125, "learning_rate": 1.0237002730307695e-05, "loss": 0.2689, "step": 13039 }, { "epoch": 0.986897497753181, "grad_norm": 0.765625, "learning_rate": 1.0235811983175646e-05, "loss": 0.3568, "step": 13040 }, { "epoch": 0.9869731800766284, "grad_norm": 0.703125, "learning_rate": 1.0234621232698197e-05, "loss": 0.2796, "step": 13041 }, { "epoch": 0.9870488624000757, "grad_norm": 0.765625, "learning_rate": 1.0233430478892242e-05, "loss": 0.2838, "step": 13042 }, { "epoch": 0.9871245447235231, "grad_norm": 0.74609375, "learning_rate": 1.0232239721774672e-05, "loss": 0.308, "step": 13043 }, { "epoch": 0.9872002270469703, "grad_norm": 0.75, "learning_rate": 1.0231048961362389e-05, "loss": 0.2959, "step": 13044 }, { "epoch": 0.9872759093704176, "grad_norm": 0.765625, "learning_rate": 1.0229858197672277e-05, "loss": 0.3363, "step": 13045 }, { "epoch": 0.987351591693865, "grad_norm": 0.69921875, "learning_rate": 1.0228667430721235e-05, "loss": 0.2629, "step": 13046 }, { "epoch": 0.9874272740173123, "grad_norm": 0.75, "learning_rate": 1.022747666052615e-05, "loss": 0.3049, "step": 13047 }, { "epoch": 0.9875029563407597, "grad_norm": 0.76953125, "learning_rate": 1.0226285887103919e-05, "loss": 0.3094, "step": 13048 }, { "epoch": 0.987578638664207, "grad_norm": 0.7734375, "learning_rate": 1.0225095110471437e-05, "loss": 0.3009, "step": 13049 }, { "epoch": 0.9876543209876543, "grad_norm": 0.7734375, "learning_rate": 1.022390433064559e-05, "loss": 0.3118, "step": 13050 }, { "epoch": 0.9877300033111016, "grad_norm": 0.72265625, "learning_rate": 1.0222713547643277e-05, "loss": 0.288, "step": 13051 }, { "epoch": 0.987805685634549, "grad_norm": 0.73828125, "learning_rate": 1.0221522761481394e-05, "loss": 0.2987, "step": 13052 }, { "epoch": 0.9878813679579963, "grad_norm": 0.76953125, "learning_rate": 1.0220331972176826e-05, "loss": 0.3206, "step": 13053 }, { "epoch": 0.9879570502814436, "grad_norm": 0.79296875, "learning_rate": 1.0219141179746472e-05, "loss": 0.3149, "step": 13054 }, { "epoch": 0.988032732604891, "grad_norm": 0.74609375, "learning_rate": 1.0217950384207227e-05, "loss": 0.3151, "step": 13055 }, { "epoch": 0.9881084149283383, "grad_norm": 0.73828125, "learning_rate": 1.0216759585575979e-05, "loss": 0.3197, "step": 13056 }, { "epoch": 0.9881840972517856, "grad_norm": 0.89453125, "learning_rate": 1.0215568783869622e-05, "loss": 0.2767, "step": 13057 }, { "epoch": 0.9882597795752329, "grad_norm": 0.77734375, "learning_rate": 1.0214377979105057e-05, "loss": 0.3385, "step": 13058 }, { "epoch": 0.9883354618986803, "grad_norm": 0.7265625, "learning_rate": 1.0213187171299173e-05, "loss": 0.2803, "step": 13059 }, { "epoch": 0.9884111442221276, "grad_norm": 1.0390625, "learning_rate": 1.0211996360468859e-05, "loss": 0.401, "step": 13060 }, { "epoch": 0.988486826545575, "grad_norm": 0.74609375, "learning_rate": 1.0210805546631013e-05, "loss": 0.3157, "step": 13061 }, { "epoch": 0.9885625088690223, "grad_norm": 0.66796875, "learning_rate": 1.020961472980253e-05, "loss": 0.2659, "step": 13062 }, { "epoch": 0.9886381911924697, "grad_norm": 0.75, "learning_rate": 1.0208423910000302e-05, "loss": 0.3115, "step": 13063 }, { "epoch": 0.9887138735159169, "grad_norm": 0.71484375, "learning_rate": 1.0207233087241224e-05, "loss": 0.2896, "step": 13064 }, { "epoch": 0.9887895558393642, "grad_norm": 0.8125, "learning_rate": 1.020604226154219e-05, "loss": 0.3529, "step": 13065 }, { "epoch": 0.9888652381628116, "grad_norm": 0.734375, "learning_rate": 1.0204851432920088e-05, "loss": 0.2979, "step": 13066 }, { "epoch": 0.9889409204862589, "grad_norm": 0.78125, "learning_rate": 1.0203660601391821e-05, "loss": 0.3455, "step": 13067 }, { "epoch": 0.9890166028097063, "grad_norm": 0.96484375, "learning_rate": 1.0202469766974281e-05, "loss": 0.3677, "step": 13068 }, { "epoch": 0.9890922851331536, "grad_norm": 0.8359375, "learning_rate": 1.0201278929684355e-05, "loss": 0.3287, "step": 13069 }, { "epoch": 0.989167967456601, "grad_norm": 0.82421875, "learning_rate": 1.0200088089538944e-05, "loss": 0.3572, "step": 13070 }, { "epoch": 0.9892436497800482, "grad_norm": 0.73828125, "learning_rate": 1.0198897246554942e-05, "loss": 0.2641, "step": 13071 }, { "epoch": 0.9893193321034955, "grad_norm": 0.703125, "learning_rate": 1.0197706400749236e-05, "loss": 0.2964, "step": 13072 }, { "epoch": 0.9893950144269429, "grad_norm": 0.74609375, "learning_rate": 1.019651555213873e-05, "loss": 0.3077, "step": 13073 }, { "epoch": 0.9894706967503902, "grad_norm": 0.74609375, "learning_rate": 1.0195324700740314e-05, "loss": 0.3076, "step": 13074 }, { "epoch": 0.9895463790738376, "grad_norm": 0.7578125, "learning_rate": 1.0194133846570879e-05, "loss": 0.3098, "step": 13075 }, { "epoch": 0.9896220613972849, "grad_norm": 0.734375, "learning_rate": 1.0192942989647322e-05, "loss": 0.2678, "step": 13076 }, { "epoch": 0.9896977437207323, "grad_norm": 0.671875, "learning_rate": 1.0191752129986543e-05, "loss": 0.2622, "step": 13077 }, { "epoch": 0.9897734260441795, "grad_norm": 0.79296875, "learning_rate": 1.0190561267605425e-05, "loss": 0.3331, "step": 13078 }, { "epoch": 0.9898491083676269, "grad_norm": 0.73046875, "learning_rate": 1.018937040252087e-05, "loss": 0.2783, "step": 13079 }, { "epoch": 0.9899247906910742, "grad_norm": 1.15625, "learning_rate": 1.0188179534749773e-05, "loss": 0.3553, "step": 13080 }, { "epoch": 0.9900004730145215, "grad_norm": 0.765625, "learning_rate": 1.0186988664309023e-05, "loss": 0.3207, "step": 13081 }, { "epoch": 0.9900761553379689, "grad_norm": 0.7890625, "learning_rate": 1.0185797791215521e-05, "loss": 0.3414, "step": 13082 }, { "epoch": 0.9901518376614162, "grad_norm": 0.71484375, "learning_rate": 1.018460691548616e-05, "loss": 0.3028, "step": 13083 }, { "epoch": 0.9902275199848636, "grad_norm": 0.76171875, "learning_rate": 1.0183416037137828e-05, "loss": 0.3459, "step": 13084 }, { "epoch": 0.9903032023083108, "grad_norm": 0.7265625, "learning_rate": 1.0182225156187426e-05, "loss": 0.2796, "step": 13085 }, { "epoch": 0.9903788846317582, "grad_norm": 0.7421875, "learning_rate": 1.0181034272651848e-05, "loss": 0.3177, "step": 13086 }, { "epoch": 0.9904545669552055, "grad_norm": 0.87109375, "learning_rate": 1.017984338654799e-05, "loss": 0.3698, "step": 13087 }, { "epoch": 0.9905302492786529, "grad_norm": 0.796875, "learning_rate": 1.0178652497892744e-05, "loss": 0.3288, "step": 13088 }, { "epoch": 0.9906059316021002, "grad_norm": 0.77734375, "learning_rate": 1.0177461606703005e-05, "loss": 0.3247, "step": 13089 }, { "epoch": 0.9906816139255475, "grad_norm": 0.7734375, "learning_rate": 1.0176270712995668e-05, "loss": 0.3076, "step": 13090 }, { "epoch": 0.9907572962489949, "grad_norm": 0.6953125, "learning_rate": 1.0175079816787631e-05, "loss": 0.2568, "step": 13091 }, { "epoch": 0.9908329785724421, "grad_norm": 0.74609375, "learning_rate": 1.0173888918095782e-05, "loss": 0.2973, "step": 13092 }, { "epoch": 0.9909086608958895, "grad_norm": 0.7265625, "learning_rate": 1.0172698016937023e-05, "loss": 0.2969, "step": 13093 }, { "epoch": 0.9909843432193368, "grad_norm": 0.77734375, "learning_rate": 1.0171507113328248e-05, "loss": 0.3467, "step": 13094 }, { "epoch": 0.9910600255427842, "grad_norm": 0.67578125, "learning_rate": 1.0170316207286349e-05, "loss": 0.2565, "step": 13095 }, { "epoch": 0.9911357078662315, "grad_norm": 0.76953125, "learning_rate": 1.0169125298828222e-05, "loss": 0.3115, "step": 13096 }, { "epoch": 0.9912113901896789, "grad_norm": 0.75390625, "learning_rate": 1.0167934387970764e-05, "loss": 0.3294, "step": 13097 }, { "epoch": 0.9912870725131262, "grad_norm": 0.74609375, "learning_rate": 1.0166743474730865e-05, "loss": 0.3032, "step": 13098 }, { "epoch": 0.9913627548365734, "grad_norm": 0.73046875, "learning_rate": 1.0165552559125424e-05, "loss": 0.3344, "step": 13099 }, { "epoch": 0.9914384371600208, "grad_norm": 0.6796875, "learning_rate": 1.016436164117134e-05, "loss": 0.2513, "step": 13100 }, { "epoch": 0.9915141194834681, "grad_norm": 0.83203125, "learning_rate": 1.01631707208855e-05, "loss": 0.3315, "step": 13101 }, { "epoch": 0.9915898018069155, "grad_norm": 0.7109375, "learning_rate": 1.0161979798284805e-05, "loss": 0.2759, "step": 13102 }, { "epoch": 0.9916654841303628, "grad_norm": 0.8203125, "learning_rate": 1.016078887338615e-05, "loss": 0.3433, "step": 13103 }, { "epoch": 0.9917411664538102, "grad_norm": 0.70703125, "learning_rate": 1.0159597946206425e-05, "loss": 0.2904, "step": 13104 }, { "epoch": 0.9918168487772575, "grad_norm": 0.75390625, "learning_rate": 1.0158407016762533e-05, "loss": 0.3152, "step": 13105 }, { "epoch": 0.9918925311007047, "grad_norm": 0.73046875, "learning_rate": 1.0157216085071365e-05, "loss": 0.2872, "step": 13106 }, { "epoch": 0.9919682134241521, "grad_norm": 0.7421875, "learning_rate": 1.0156025151149813e-05, "loss": 0.2691, "step": 13107 }, { "epoch": 0.9920438957475994, "grad_norm": 0.75390625, "learning_rate": 1.015483421501478e-05, "loss": 0.323, "step": 13108 }, { "epoch": 0.9921195780710468, "grad_norm": 0.78515625, "learning_rate": 1.0153643276683158e-05, "loss": 0.3241, "step": 13109 }, { "epoch": 0.9921952603944941, "grad_norm": 0.78125, "learning_rate": 1.0152452336171842e-05, "loss": 0.3133, "step": 13110 }, { "epoch": 0.9922709427179415, "grad_norm": 0.75, "learning_rate": 1.0151261393497728e-05, "loss": 0.2865, "step": 13111 }, { "epoch": 0.9923466250413888, "grad_norm": 0.75390625, "learning_rate": 1.0150070448677714e-05, "loss": 0.3271, "step": 13112 }, { "epoch": 0.992422307364836, "grad_norm": 0.76171875, "learning_rate": 1.0148879501728688e-05, "loss": 0.3295, "step": 13113 }, { "epoch": 0.9924979896882834, "grad_norm": 0.7265625, "learning_rate": 1.0147688552667555e-05, "loss": 0.2885, "step": 13114 }, { "epoch": 0.9925736720117307, "grad_norm": 0.8203125, "learning_rate": 1.0146497601511205e-05, "loss": 0.3448, "step": 13115 }, { "epoch": 0.9926493543351781, "grad_norm": 0.80859375, "learning_rate": 1.0145306648276535e-05, "loss": 0.3526, "step": 13116 }, { "epoch": 0.9927250366586254, "grad_norm": 0.79296875, "learning_rate": 1.014411569298044e-05, "loss": 0.3507, "step": 13117 }, { "epoch": 0.9928007189820728, "grad_norm": 0.75390625, "learning_rate": 1.0142924735639819e-05, "loss": 0.3037, "step": 13118 }, { "epoch": 0.9928764013055201, "grad_norm": 0.70703125, "learning_rate": 1.0141733776271563e-05, "loss": 0.2773, "step": 13119 }, { "epoch": 0.9929520836289674, "grad_norm": 0.70703125, "learning_rate": 1.014054281489257e-05, "loss": 0.251, "step": 13120 }, { "epoch": 0.9930277659524147, "grad_norm": 0.76171875, "learning_rate": 1.013935185151974e-05, "loss": 0.2742, "step": 13121 }, { "epoch": 0.993103448275862, "grad_norm": 0.69921875, "learning_rate": 1.0138160886169958e-05, "loss": 0.2863, "step": 13122 }, { "epoch": 0.9931791305993094, "grad_norm": 0.74609375, "learning_rate": 1.0136969918860131e-05, "loss": 0.3204, "step": 13123 }, { "epoch": 0.9932548129227567, "grad_norm": 0.8046875, "learning_rate": 1.0135778949607153e-05, "loss": 0.3343, "step": 13124 }, { "epoch": 0.9933304952462041, "grad_norm": 0.77734375, "learning_rate": 1.0134587978427914e-05, "loss": 0.3278, "step": 13125 }, { "epoch": 0.9934061775696514, "grad_norm": 0.734375, "learning_rate": 1.0133397005339313e-05, "loss": 0.3047, "step": 13126 }, { "epoch": 0.9934818598930987, "grad_norm": 0.765625, "learning_rate": 1.013220603035825e-05, "loss": 0.3153, "step": 13127 }, { "epoch": 0.993557542216546, "grad_norm": 0.8203125, "learning_rate": 1.0131015053501614e-05, "loss": 0.35, "step": 13128 }, { "epoch": 0.9936332245399934, "grad_norm": 0.7578125, "learning_rate": 1.0129824074786307e-05, "loss": 0.3139, "step": 13129 }, { "epoch": 0.9937089068634407, "grad_norm": 0.7421875, "learning_rate": 1.0128633094229224e-05, "loss": 0.2959, "step": 13130 }, { "epoch": 0.9937845891868881, "grad_norm": 0.703125, "learning_rate": 1.0127442111847254e-05, "loss": 0.2899, "step": 13131 }, { "epoch": 0.9938602715103354, "grad_norm": 0.6875, "learning_rate": 1.0126251127657305e-05, "loss": 0.2592, "step": 13132 }, { "epoch": 0.9939359538337827, "grad_norm": 0.76953125, "learning_rate": 1.0125060141676267e-05, "loss": 0.3211, "step": 13133 }, { "epoch": 0.99401163615723, "grad_norm": 0.84375, "learning_rate": 1.0123869153921031e-05, "loss": 0.3649, "step": 13134 }, { "epoch": 0.9940873184806773, "grad_norm": 0.7109375, "learning_rate": 1.01226781644085e-05, "loss": 0.2733, "step": 13135 }, { "epoch": 0.9941630008041247, "grad_norm": 0.77734375, "learning_rate": 1.0121487173155573e-05, "loss": 0.3187, "step": 13136 }, { "epoch": 0.994238683127572, "grad_norm": 0.74609375, "learning_rate": 1.0120296180179137e-05, "loss": 0.2731, "step": 13137 }, { "epoch": 0.9943143654510194, "grad_norm": 0.7890625, "learning_rate": 1.0119105185496099e-05, "loss": 0.3425, "step": 13138 }, { "epoch": 0.9943900477744667, "grad_norm": 0.7421875, "learning_rate": 1.0117914189123346e-05, "loss": 0.3083, "step": 13139 }, { "epoch": 0.9944657300979141, "grad_norm": 0.73046875, "learning_rate": 1.0116723191077775e-05, "loss": 0.295, "step": 13140 }, { "epoch": 0.9945414124213613, "grad_norm": 0.7578125, "learning_rate": 1.0115532191376289e-05, "loss": 0.311, "step": 13141 }, { "epoch": 0.9946170947448086, "grad_norm": 0.74609375, "learning_rate": 1.0114341190035782e-05, "loss": 0.3313, "step": 13142 }, { "epoch": 0.994692777068256, "grad_norm": 0.73828125, "learning_rate": 1.0113150187073147e-05, "loss": 0.3043, "step": 13143 }, { "epoch": 0.9947684593917033, "grad_norm": 0.65234375, "learning_rate": 1.011195918250528e-05, "loss": 0.2359, "step": 13144 }, { "epoch": 0.9948441417151507, "grad_norm": 0.73828125, "learning_rate": 1.0110768176349084e-05, "loss": 0.3198, "step": 13145 }, { "epoch": 0.994919824038598, "grad_norm": 0.69140625, "learning_rate": 1.0109577168621447e-05, "loss": 0.2581, "step": 13146 }, { "epoch": 0.9949955063620454, "grad_norm": 0.72265625, "learning_rate": 1.0108386159339274e-05, "loss": 0.288, "step": 13147 }, { "epoch": 0.9950711886854926, "grad_norm": 0.71484375, "learning_rate": 1.0107195148519453e-05, "loss": 0.295, "step": 13148 }, { "epoch": 0.99514687100894, "grad_norm": 0.82421875, "learning_rate": 1.0106004136178888e-05, "loss": 0.3316, "step": 13149 }, { "epoch": 0.9952225533323873, "grad_norm": 0.75390625, "learning_rate": 1.0104813122334474e-05, "loss": 0.3184, "step": 13150 }, { "epoch": 0.9952982356558346, "grad_norm": 0.71875, "learning_rate": 1.0103622107003103e-05, "loss": 0.2958, "step": 13151 }, { "epoch": 0.995373917979282, "grad_norm": 0.75, "learning_rate": 1.0102431090201677e-05, "loss": 0.3153, "step": 13152 }, { "epoch": 0.9954496003027293, "grad_norm": 0.73046875, "learning_rate": 1.0101240071947087e-05, "loss": 0.2838, "step": 13153 }, { "epoch": 0.9955252826261767, "grad_norm": 0.79296875, "learning_rate": 1.0100049052256236e-05, "loss": 0.3121, "step": 13154 }, { "epoch": 0.9956009649496239, "grad_norm": 0.76953125, "learning_rate": 1.0098858031146012e-05, "loss": 0.2859, "step": 13155 }, { "epoch": 0.9956766472730713, "grad_norm": 0.7265625, "learning_rate": 1.0097667008633324e-05, "loss": 0.2706, "step": 13156 }, { "epoch": 0.9957523295965186, "grad_norm": 0.7109375, "learning_rate": 1.0096475984735059e-05, "loss": 0.2734, "step": 13157 }, { "epoch": 0.995828011919966, "grad_norm": 0.69140625, "learning_rate": 1.0095284959468117e-05, "loss": 0.274, "step": 13158 }, { "epoch": 0.9959036942434133, "grad_norm": 0.796875, "learning_rate": 1.0094093932849394e-05, "loss": 0.3686, "step": 13159 }, { "epoch": 0.9959793765668606, "grad_norm": 0.7890625, "learning_rate": 1.0092902904895787e-05, "loss": 0.3449, "step": 13160 }, { "epoch": 0.996055058890308, "grad_norm": 0.79296875, "learning_rate": 1.0091711875624192e-05, "loss": 0.3066, "step": 13161 }, { "epoch": 0.9961307412137552, "grad_norm": 0.8203125, "learning_rate": 1.0090520845051508e-05, "loss": 0.333, "step": 13162 }, { "epoch": 0.9962064235372026, "grad_norm": 0.68359375, "learning_rate": 1.0089329813194628e-05, "loss": 0.2549, "step": 13163 }, { "epoch": 0.9962821058606499, "grad_norm": 0.78515625, "learning_rate": 1.0088138780070456e-05, "loss": 0.3208, "step": 13164 }, { "epoch": 0.9963577881840973, "grad_norm": 0.796875, "learning_rate": 1.008694774569588e-05, "loss": 0.3509, "step": 13165 }, { "epoch": 0.9964334705075446, "grad_norm": 0.76171875, "learning_rate": 1.0085756710087803e-05, "loss": 0.3137, "step": 13166 }, { "epoch": 0.996509152830992, "grad_norm": 0.6875, "learning_rate": 1.0084565673263121e-05, "loss": 0.2831, "step": 13167 }, { "epoch": 0.9965848351544392, "grad_norm": 0.69140625, "learning_rate": 1.0083374635238728e-05, "loss": 0.2637, "step": 13168 }, { "epoch": 0.9966605174778865, "grad_norm": 0.74609375, "learning_rate": 1.008218359603152e-05, "loss": 0.283, "step": 13169 }, { "epoch": 0.9967361998013339, "grad_norm": 0.75, "learning_rate": 1.00809925556584e-05, "loss": 0.3031, "step": 13170 }, { "epoch": 0.9968118821247812, "grad_norm": 0.7421875, "learning_rate": 1.0079801514136265e-05, "loss": 0.321, "step": 13171 }, { "epoch": 0.9968875644482286, "grad_norm": 0.80859375, "learning_rate": 1.0078610471482002e-05, "loss": 0.355, "step": 13172 }, { "epoch": 0.9969632467716759, "grad_norm": 0.75, "learning_rate": 1.0077419427712519e-05, "loss": 0.3279, "step": 13173 }, { "epoch": 0.9970389290951233, "grad_norm": 0.70703125, "learning_rate": 1.0076228382844706e-05, "loss": 0.3004, "step": 13174 }, { "epoch": 0.9971146114185705, "grad_norm": 0.69921875, "learning_rate": 1.0075037336895463e-05, "loss": 0.244, "step": 13175 }, { "epoch": 0.9971902937420178, "grad_norm": 0.69921875, "learning_rate": 1.0073846289881687e-05, "loss": 0.2801, "step": 13176 }, { "epoch": 0.9972659760654652, "grad_norm": 0.80078125, "learning_rate": 1.0072655241820278e-05, "loss": 0.356, "step": 13177 }, { "epoch": 0.9973416583889125, "grad_norm": 0.7109375, "learning_rate": 1.0071464192728122e-05, "loss": 0.293, "step": 13178 }, { "epoch": 0.9974173407123599, "grad_norm": 0.7890625, "learning_rate": 1.0070273142622129e-05, "loss": 0.3525, "step": 13179 }, { "epoch": 0.9974930230358072, "grad_norm": 0.7734375, "learning_rate": 1.0069082091519193e-05, "loss": 0.3498, "step": 13180 }, { "epoch": 0.9975687053592546, "grad_norm": 0.78515625, "learning_rate": 1.0067891039436203e-05, "loss": 0.3377, "step": 13181 }, { "epoch": 0.9976443876827018, "grad_norm": 0.76953125, "learning_rate": 1.0066699986390067e-05, "loss": 0.3151, "step": 13182 }, { "epoch": 0.9977200700061492, "grad_norm": 0.73828125, "learning_rate": 1.0065508932397675e-05, "loss": 0.309, "step": 13183 }, { "epoch": 0.9977957523295965, "grad_norm": 0.8203125, "learning_rate": 1.0064317877475926e-05, "loss": 0.3464, "step": 13184 }, { "epoch": 0.9978714346530438, "grad_norm": 0.69921875, "learning_rate": 1.006312682164172e-05, "loss": 0.2533, "step": 13185 }, { "epoch": 0.9979471169764912, "grad_norm": 0.75390625, "learning_rate": 1.0061935764911953e-05, "loss": 0.3411, "step": 13186 }, { "epoch": 0.9980227992999385, "grad_norm": 0.82421875, "learning_rate": 1.0060744707303513e-05, "loss": 0.3017, "step": 13187 }, { "epoch": 0.9980984816233859, "grad_norm": 0.69140625, "learning_rate": 1.0059553648833312e-05, "loss": 0.2428, "step": 13188 }, { "epoch": 0.9981741639468331, "grad_norm": 0.66796875, "learning_rate": 1.0058362589518241e-05, "loss": 0.2564, "step": 13189 }, { "epoch": 0.9982498462702805, "grad_norm": 0.78515625, "learning_rate": 1.0057171529375192e-05, "loss": 0.3088, "step": 13190 }, { "epoch": 0.9983255285937278, "grad_norm": 0.7890625, "learning_rate": 1.005598046842107e-05, "loss": 0.3417, "step": 13191 }, { "epoch": 0.9984012109171752, "grad_norm": 0.72265625, "learning_rate": 1.0054789406672767e-05, "loss": 0.27, "step": 13192 }, { "epoch": 0.9984768932406225, "grad_norm": 0.73046875, "learning_rate": 1.005359834414718e-05, "loss": 0.2642, "step": 13193 }, { "epoch": 0.9985525755640698, "grad_norm": 0.76171875, "learning_rate": 1.0052407280861214e-05, "loss": 0.3164, "step": 13194 }, { "epoch": 0.9986282578875172, "grad_norm": 0.76171875, "learning_rate": 1.005121621683176e-05, "loss": 0.3242, "step": 13195 }, { "epoch": 0.9987039402109644, "grad_norm": 0.76953125, "learning_rate": 1.0050025152075711e-05, "loss": 0.3214, "step": 13196 }, { "epoch": 0.9987796225344118, "grad_norm": 0.80859375, "learning_rate": 1.0048834086609973e-05, "loss": 0.3524, "step": 13197 }, { "epoch": 0.9988553048578591, "grad_norm": 0.84765625, "learning_rate": 1.0047643020451439e-05, "loss": 0.3819, "step": 13198 }, { "epoch": 0.9989309871813065, "grad_norm": 0.80078125, "learning_rate": 1.0046451953617008e-05, "loss": 0.361, "step": 13199 }, { "epoch": 0.9990066695047538, "grad_norm": 0.765625, "learning_rate": 1.0045260886123576e-05, "loss": 0.3095, "step": 13200 }, { "epoch": 0.9990823518282012, "grad_norm": 0.703125, "learning_rate": 1.004406981798804e-05, "loss": 0.2856, "step": 13201 }, { "epoch": 0.9991580341516485, "grad_norm": 0.7734375, "learning_rate": 1.0042878749227296e-05, "loss": 0.2885, "step": 13202 }, { "epoch": 0.9992337164750957, "grad_norm": 0.82421875, "learning_rate": 1.004168767985825e-05, "loss": 0.351, "step": 13203 }, { "epoch": 0.9993093987985431, "grad_norm": 0.69921875, "learning_rate": 1.0040496609897785e-05, "loss": 0.2948, "step": 13204 }, { "epoch": 0.9993850811219904, "grad_norm": 0.7421875, "learning_rate": 1.003930553936281e-05, "loss": 0.3158, "step": 13205 }, { "epoch": 0.9994607634454378, "grad_norm": 0.734375, "learning_rate": 1.003811446827022e-05, "loss": 0.2831, "step": 13206 }, { "epoch": 0.9995364457688851, "grad_norm": 0.73046875, "learning_rate": 1.0036923396636906e-05, "loss": 0.3073, "step": 13207 }, { "epoch": 0.9996121280923325, "grad_norm": 0.765625, "learning_rate": 1.0035732324479774e-05, "loss": 0.3321, "step": 13208 }, { "epoch": 0.9996878104157798, "grad_norm": 0.8125, "learning_rate": 1.0034541251815716e-05, "loss": 0.303, "step": 13209 }, { "epoch": 0.999763492739227, "grad_norm": 0.80078125, "learning_rate": 1.0033350178661633e-05, "loss": 0.3667, "step": 13210 }, { "epoch": 0.9998391750626744, "grad_norm": 0.7734375, "learning_rate": 1.0032159105034418e-05, "loss": 0.3296, "step": 13211 }, { "epoch": 0.9999148573861217, "grad_norm": 0.734375, "learning_rate": 1.0030968030950972e-05, "loss": 0.3005, "step": 13212 }, { "epoch": 0.9999905397095691, "grad_norm": 0.73828125, "learning_rate": 1.0029776956428192e-05, "loss": 0.2956, "step": 13213 }, { "epoch": 1.0000662220330163, "grad_norm": 0.71484375, "learning_rate": 1.0028585881482975e-05, "loss": 0.2994, "step": 13214 } ], "logging_steps": 1, "max_steps": 26426, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 6607, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.8995302625402946e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }