{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 31359, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003188877196339169, "grad_norm": 3.2215495109558105, "learning_rate": 3.1888771963391685e-05, "loss": 9.4793, "step": 10 }, { "epoch": 0.0006377754392678338, "grad_norm": 1.5021514892578125, "learning_rate": 6.377754392678337e-05, "loss": 8.1296, "step": 20 }, { "epoch": 0.0009566631589017507, "grad_norm": 0.8992422819137573, "learning_rate": 9.566631589017505e-05, "loss": 7.1242, "step": 30 }, { "epoch": 0.0012755508785356677, "grad_norm": 0.7726672887802124, "learning_rate": 0.00012755508785356674, "loss": 6.4833, "step": 40 }, { "epoch": 0.0015944385981695845, "grad_norm": 0.9172486066818237, "learning_rate": 0.00015944385981695843, "loss": 6.1522, "step": 50 }, { "epoch": 0.0019133263178035013, "grad_norm": 0.9134557247161865, "learning_rate": 0.0001913326317803501, "loss": 5.9715, "step": 60 }, { "epoch": 0.0022322140374374183, "grad_norm": 0.7963905334472656, "learning_rate": 0.00022322140374374182, "loss": 5.792, "step": 70 }, { "epoch": 0.0025511017570713354, "grad_norm": 1.257215142250061, "learning_rate": 0.0002551101757071335, "loss": 5.6415, "step": 80 }, { "epoch": 0.002869989476705252, "grad_norm": 0.7022666931152344, "learning_rate": 0.0002869989476705252, "loss": 5.4826, "step": 90 }, { "epoch": 0.003188877196339169, "grad_norm": 0.8765685558319092, "learning_rate": 0.00031888771963391687, "loss": 5.2978, "step": 100 }, { "epoch": 0.003507764915973086, "grad_norm": 0.8249574303627014, "learning_rate": 0.00035077649159730856, "loss": 5.126, "step": 110 }, { "epoch": 0.0038266526356070026, "grad_norm": 1.0378992557525635, "learning_rate": 0.0003826652635607002, "loss": 4.9622, "step": 120 }, { "epoch": 0.00414554035524092, "grad_norm": 0.8625181913375854, "learning_rate": 0.00041455403552409194, "loss": 4.8241, "step": 130 }, { "epoch": 0.004464428074874837, "grad_norm": 0.7947406768798828, "learning_rate": 0.00044644280748748364, "loss": 4.65, "step": 140 }, { "epoch": 0.004783315794508754, "grad_norm": 0.7589473128318787, "learning_rate": 0.0004783315794508753, "loss": 4.5083, "step": 150 }, { "epoch": 0.005102203514142671, "grad_norm": 0.8085631728172302, "learning_rate": 0.000510220351414267, "loss": 4.3782, "step": 160 }, { "epoch": 0.005421091233776587, "grad_norm": 0.7931132912635803, "learning_rate": 0.0005421091233776587, "loss": 4.3206, "step": 170 }, { "epoch": 0.005739978953410504, "grad_norm": 0.9039464592933655, "learning_rate": 0.0005739978953410504, "loss": 4.2164, "step": 180 }, { "epoch": 0.006058866673044421, "grad_norm": 0.9057785272598267, "learning_rate": 0.0006058866673044421, "loss": 4.1272, "step": 190 }, { "epoch": 0.006377754392678338, "grad_norm": 0.9266021847724915, "learning_rate": 0.0006377754392678337, "loss": 4.0183, "step": 200 }, { "epoch": 0.006696642112312255, "grad_norm": 0.724240243434906, "learning_rate": 0.0006696642112312254, "loss": 3.9509, "step": 210 }, { "epoch": 0.007015529831946172, "grad_norm": 0.7587215900421143, "learning_rate": 0.0007015529831946171, "loss": 3.8491, "step": 220 }, { "epoch": 0.007334417551580088, "grad_norm": 0.8444303870201111, "learning_rate": 0.0007334417551580089, "loss": 3.7938, "step": 230 }, { "epoch": 0.007653305271214005, "grad_norm": 0.7100092172622681, "learning_rate": 0.0007653305271214004, "loss": 3.7144, "step": 240 }, { "epoch": 0.007972192990847922, "grad_norm": 0.7530874609947205, "learning_rate": 0.0007972192990847921, "loss": 3.6929, "step": 250 }, { "epoch": 0.00829108071048184, "grad_norm": 0.8656921982765198, "learning_rate": 0.0008291080710481839, "loss": 3.6039, "step": 260 }, { "epoch": 0.008609968430115756, "grad_norm": 0.6925029158592224, "learning_rate": 0.0008609968430115756, "loss": 3.5808, "step": 270 }, { "epoch": 0.008928856149749673, "grad_norm": 0.6730265021324158, "learning_rate": 0.0008928856149749673, "loss": 3.5033, "step": 280 }, { "epoch": 0.00924774386938359, "grad_norm": 0.8135679960250854, "learning_rate": 0.0009247743869383588, "loss": 3.4702, "step": 290 }, { "epoch": 0.009566631589017507, "grad_norm": 0.7290470600128174, "learning_rate": 0.0009566631589017505, "loss": 3.4341, "step": 300 }, { "epoch": 0.009885519308651424, "grad_norm": 0.7947729229927063, "learning_rate": 0.0009885519308651422, "loss": 3.3841, "step": 310 }, { "epoch": 0.010204407028285341, "grad_norm": 0.8279941082000732, "learning_rate": 0.001, "loss": 3.3531, "step": 320 }, { "epoch": 0.010523294747919257, "grad_norm": 0.8101890683174133, "learning_rate": 0.001, "loss": 3.2957, "step": 330 }, { "epoch": 0.010842182467553174, "grad_norm": 0.6927778720855713, "learning_rate": 0.001, "loss": 3.272, "step": 340 }, { "epoch": 0.01116107018718709, "grad_norm": 0.6906934976577759, "learning_rate": 0.001, "loss": 3.2044, "step": 350 }, { "epoch": 0.011479957906821008, "grad_norm": 0.8388308882713318, "learning_rate": 0.001, "loss": 3.1869, "step": 360 }, { "epoch": 0.011798845626454925, "grad_norm": 0.719241201877594, "learning_rate": 0.001, "loss": 3.1254, "step": 370 }, { "epoch": 0.012117733346088842, "grad_norm": 0.7356446981430054, "learning_rate": 0.001, "loss": 3.107, "step": 380 }, { "epoch": 0.012436621065722759, "grad_norm": 0.6843870878219604, "learning_rate": 0.001, "loss": 3.079, "step": 390 }, { "epoch": 0.012755508785356676, "grad_norm": 0.6850771307945251, "learning_rate": 0.001, "loss": 3.04, "step": 400 }, { "epoch": 0.013074396504990593, "grad_norm": 0.7865222096443176, "learning_rate": 0.001, "loss": 2.9971, "step": 410 }, { "epoch": 0.01339328422462451, "grad_norm": 0.6876953840255737, "learning_rate": 0.001, "loss": 2.973, "step": 420 }, { "epoch": 0.013712171944258427, "grad_norm": 0.7146596908569336, "learning_rate": 0.001, "loss": 2.9296, "step": 430 }, { "epoch": 0.014031059663892344, "grad_norm": 0.6826304197311401, "learning_rate": 0.001, "loss": 2.8941, "step": 440 }, { "epoch": 0.014349947383526261, "grad_norm": 0.8065870404243469, "learning_rate": 0.001, "loss": 2.8534, "step": 450 }, { "epoch": 0.014668835103160176, "grad_norm": 0.6614107489585876, "learning_rate": 0.001, "loss": 2.8442, "step": 460 }, { "epoch": 0.014987722822794093, "grad_norm": 0.6620937585830688, "learning_rate": 0.001, "loss": 2.8236, "step": 470 }, { "epoch": 0.01530661054242801, "grad_norm": 0.7093802094459534, "learning_rate": 0.001, "loss": 2.8044, "step": 480 }, { "epoch": 0.01562549826206193, "grad_norm": 0.6854017376899719, "learning_rate": 0.001, "loss": 2.7472, "step": 490 }, { "epoch": 0.015944385981695845, "grad_norm": 0.6393734216690063, "learning_rate": 0.001, "loss": 2.7519, "step": 500 }, { "epoch": 0.016263273701329763, "grad_norm": 0.6611132025718689, "learning_rate": 0.001, "loss": 2.7141, "step": 510 }, { "epoch": 0.01658216142096368, "grad_norm": 0.68845534324646, "learning_rate": 0.001, "loss": 2.6999, "step": 520 }, { "epoch": 0.016901049140597594, "grad_norm": 0.7871103882789612, "learning_rate": 0.001, "loss": 2.7055, "step": 530 }, { "epoch": 0.017219936860231513, "grad_norm": 0.6550216674804688, "learning_rate": 0.001, "loss": 2.6843, "step": 540 }, { "epoch": 0.017538824579865428, "grad_norm": 0.7184970378875732, "learning_rate": 0.001, "loss": 2.638, "step": 550 }, { "epoch": 0.017857712299499347, "grad_norm": 0.6269710659980774, "learning_rate": 0.001, "loss": 2.6259, "step": 560 }, { "epoch": 0.018176600019133262, "grad_norm": 0.622220516204834, "learning_rate": 0.001, "loss": 2.6048, "step": 570 }, { "epoch": 0.01849548773876718, "grad_norm": 0.6675232648849487, "learning_rate": 0.001, "loss": 2.6026, "step": 580 }, { "epoch": 0.018814375458401096, "grad_norm": 0.6323223114013672, "learning_rate": 0.001, "loss": 2.5598, "step": 590 }, { "epoch": 0.019133263178035015, "grad_norm": 0.6446636915206909, "learning_rate": 0.001, "loss": 2.5686, "step": 600 }, { "epoch": 0.01945215089766893, "grad_norm": 0.6598224639892578, "learning_rate": 0.001, "loss": 2.5535, "step": 610 }, { "epoch": 0.01977103861730285, "grad_norm": 0.6299172639846802, "learning_rate": 0.001, "loss": 2.4965, "step": 620 }, { "epoch": 0.020089926336936764, "grad_norm": 0.6265530586242676, "learning_rate": 0.001, "loss": 2.5023, "step": 630 }, { "epoch": 0.020408814056570683, "grad_norm": 0.6296660304069519, "learning_rate": 0.001, "loss": 2.5133, "step": 640 }, { "epoch": 0.0207277017762046, "grad_norm": 0.6240696907043457, "learning_rate": 0.001, "loss": 2.4724, "step": 650 }, { "epoch": 0.021046589495838514, "grad_norm": 0.6711713671684265, "learning_rate": 0.001, "loss": 2.4841, "step": 660 }, { "epoch": 0.021365477215472432, "grad_norm": 0.6260756254196167, "learning_rate": 0.001, "loss": 2.4608, "step": 670 }, { "epoch": 0.021684364935106348, "grad_norm": 0.6255728602409363, "learning_rate": 0.001, "loss": 2.4568, "step": 680 }, { "epoch": 0.022003252654740266, "grad_norm": 0.6062188148498535, "learning_rate": 0.001, "loss": 2.4329, "step": 690 }, { "epoch": 0.02232214037437418, "grad_norm": 0.6365958452224731, "learning_rate": 0.001, "loss": 2.4095, "step": 700 }, { "epoch": 0.0226410280940081, "grad_norm": 0.6736142635345459, "learning_rate": 0.001, "loss": 2.4445, "step": 710 }, { "epoch": 0.022959915813642016, "grad_norm": 0.6197222471237183, "learning_rate": 0.001, "loss": 2.3984, "step": 720 }, { "epoch": 0.023278803533275935, "grad_norm": 0.6317213177680969, "learning_rate": 0.001, "loss": 2.403, "step": 730 }, { "epoch": 0.02359769125290985, "grad_norm": 0.6519914865493774, "learning_rate": 0.001, "loss": 2.3768, "step": 740 }, { "epoch": 0.02391657897254377, "grad_norm": 0.749313473701477, "learning_rate": 0.001, "loss": 2.4057, "step": 750 }, { "epoch": 0.024235466692177684, "grad_norm": 0.6350657939910889, "learning_rate": 0.001, "loss": 2.4037, "step": 760 }, { "epoch": 0.024554354411811603, "grad_norm": 0.6400778889656067, "learning_rate": 0.001, "loss": 2.3994, "step": 770 }, { "epoch": 0.024873242131445518, "grad_norm": 0.621253252029419, "learning_rate": 0.001, "loss": 2.3554, "step": 780 }, { "epoch": 0.025192129851079433, "grad_norm": 0.6236314177513123, "learning_rate": 0.001, "loss": 2.3783, "step": 790 }, { "epoch": 0.025511017570713352, "grad_norm": 0.6254419684410095, "learning_rate": 0.001, "loss": 2.3943, "step": 800 }, { "epoch": 0.025829905290347267, "grad_norm": 0.6776648759841919, "learning_rate": 0.001, "loss": 2.3347, "step": 810 }, { "epoch": 0.026148793009981186, "grad_norm": 0.6007510423660278, "learning_rate": 0.001, "loss": 2.372, "step": 820 }, { "epoch": 0.0264676807296151, "grad_norm": 0.5976624488830566, "learning_rate": 0.001, "loss": 2.33, "step": 830 }, { "epoch": 0.02678656844924902, "grad_norm": 0.6124452352523804, "learning_rate": 0.001, "loss": 2.327, "step": 840 }, { "epoch": 0.027105456168882935, "grad_norm": 0.5857394337654114, "learning_rate": 0.001, "loss": 2.3208, "step": 850 }, { "epoch": 0.027424343888516854, "grad_norm": 0.6345723271369934, "learning_rate": 0.001, "loss": 2.317, "step": 860 }, { "epoch": 0.02774323160815077, "grad_norm": 0.6067624688148499, "learning_rate": 0.001, "loss": 2.2959, "step": 870 }, { "epoch": 0.028062119327784688, "grad_norm": 0.5995084047317505, "learning_rate": 0.001, "loss": 2.3031, "step": 880 }, { "epoch": 0.028381007047418604, "grad_norm": 0.6133849024772644, "learning_rate": 0.001, "loss": 2.283, "step": 890 }, { "epoch": 0.028699894767052522, "grad_norm": 0.6345779895782471, "learning_rate": 0.001, "loss": 2.2874, "step": 900 }, { "epoch": 0.029018782486686438, "grad_norm": 0.5908248424530029, "learning_rate": 0.001, "loss": 2.2586, "step": 910 }, { "epoch": 0.029337670206320353, "grad_norm": 0.5945861339569092, "learning_rate": 0.001, "loss": 2.2623, "step": 920 }, { "epoch": 0.02965655792595427, "grad_norm": 0.6086012721061707, "learning_rate": 0.001, "loss": 2.2776, "step": 930 }, { "epoch": 0.029975445645588187, "grad_norm": 0.596208930015564, "learning_rate": 0.001, "loss": 2.2784, "step": 940 }, { "epoch": 0.030294333365222106, "grad_norm": 0.618234395980835, "learning_rate": 0.001, "loss": 2.2536, "step": 950 }, { "epoch": 0.03061322108485602, "grad_norm": 0.6146029829978943, "learning_rate": 0.001, "loss": 2.2574, "step": 960 }, { "epoch": 0.03093210880448994, "grad_norm": 0.6115110516548157, "learning_rate": 0.001, "loss": 2.2441, "step": 970 }, { "epoch": 0.03125099652412386, "grad_norm": 0.6049351692199707, "learning_rate": 0.001, "loss": 2.2694, "step": 980 }, { "epoch": 0.03156988424375777, "grad_norm": 0.5909845232963562, "learning_rate": 0.001, "loss": 2.2532, "step": 990 }, { "epoch": 0.03188877196339169, "grad_norm": 0.5601927638053894, "learning_rate": 0.001, "loss": 2.2379, "step": 1000 }, { "epoch": 0.03220765968302561, "grad_norm": 0.6240832209587097, "learning_rate": 0.001, "loss": 2.2355, "step": 1010 }, { "epoch": 0.03252654740265953, "grad_norm": 0.6171403527259827, "learning_rate": 0.001, "loss": 2.252, "step": 1020 }, { "epoch": 0.03284543512229344, "grad_norm": 0.587386429309845, "learning_rate": 0.001, "loss": 2.2236, "step": 1030 }, { "epoch": 0.03316432284192736, "grad_norm": 0.5910850167274475, "learning_rate": 0.001, "loss": 2.1752, "step": 1040 }, { "epoch": 0.033483210561561276, "grad_norm": 0.5837855339050293, "learning_rate": 0.001, "loss": 2.2151, "step": 1050 }, { "epoch": 0.03380209828119519, "grad_norm": 0.5770293474197388, "learning_rate": 0.001, "loss": 2.1735, "step": 1060 }, { "epoch": 0.03412098600082911, "grad_norm": 0.6157304644584656, "learning_rate": 0.001, "loss": 2.2111, "step": 1070 }, { "epoch": 0.034439873720463025, "grad_norm": 0.6692126989364624, "learning_rate": 0.001, "loss": 2.2099, "step": 1080 }, { "epoch": 0.034758761440096944, "grad_norm": 0.6117860078811646, "learning_rate": 0.001, "loss": 2.2038, "step": 1090 }, { "epoch": 0.035077649159730856, "grad_norm": 0.6830983757972717, "learning_rate": 0.001, "loss": 2.2173, "step": 1100 }, { "epoch": 0.035396536879364775, "grad_norm": 0.5630941987037659, "learning_rate": 0.001, "loss": 2.1761, "step": 1110 }, { "epoch": 0.035715424598998693, "grad_norm": 0.5978312492370605, "learning_rate": 0.001, "loss": 2.1767, "step": 1120 }, { "epoch": 0.03603431231863261, "grad_norm": 0.6004764437675476, "learning_rate": 0.001, "loss": 2.1733, "step": 1130 }, { "epoch": 0.036353200038266524, "grad_norm": 0.619663655757904, "learning_rate": 0.001, "loss": 2.1967, "step": 1140 }, { "epoch": 0.03667208775790044, "grad_norm": 0.653950035572052, "learning_rate": 0.001, "loss": 2.1843, "step": 1150 }, { "epoch": 0.03699097547753436, "grad_norm": 0.6021509766578674, "learning_rate": 0.001, "loss": 2.1771, "step": 1160 }, { "epoch": 0.03730986319716828, "grad_norm": 0.5993155837059021, "learning_rate": 0.001, "loss": 2.136, "step": 1170 }, { "epoch": 0.03762875091680219, "grad_norm": 0.5950784683227539, "learning_rate": 0.001, "loss": 2.1163, "step": 1180 }, { "epoch": 0.03794763863643611, "grad_norm": 0.5831239819526672, "learning_rate": 0.001, "loss": 2.1326, "step": 1190 }, { "epoch": 0.03826652635607003, "grad_norm": 0.5934585332870483, "learning_rate": 0.001, "loss": 2.1463, "step": 1200 }, { "epoch": 0.03858541407570394, "grad_norm": 0.5983244776725769, "learning_rate": 0.001, "loss": 2.1268, "step": 1210 }, { "epoch": 0.03890430179533786, "grad_norm": 0.6157175302505493, "learning_rate": 0.001, "loss": 2.145, "step": 1220 }, { "epoch": 0.03922318951497178, "grad_norm": 0.5686699151992798, "learning_rate": 0.001, "loss": 2.1292, "step": 1230 }, { "epoch": 0.0395420772346057, "grad_norm": 0.6384000182151794, "learning_rate": 0.001, "loss": 2.1362, "step": 1240 }, { "epoch": 0.03986096495423961, "grad_norm": 0.5988278985023499, "learning_rate": 0.001, "loss": 2.1187, "step": 1250 }, { "epoch": 0.04017985267387353, "grad_norm": 0.5760336518287659, "learning_rate": 0.001, "loss": 2.1132, "step": 1260 }, { "epoch": 0.04049874039350745, "grad_norm": 0.5890830159187317, "learning_rate": 0.001, "loss": 2.1298, "step": 1270 }, { "epoch": 0.040817628113141366, "grad_norm": 0.5832691788673401, "learning_rate": 0.001, "loss": 2.1087, "step": 1280 }, { "epoch": 0.04113651583277528, "grad_norm": 0.5678040981292725, "learning_rate": 0.001, "loss": 2.1192, "step": 1290 }, { "epoch": 0.0414554035524092, "grad_norm": 0.5775576829910278, "learning_rate": 0.001, "loss": 2.0936, "step": 1300 }, { "epoch": 0.041774291272043115, "grad_norm": 0.6111060380935669, "learning_rate": 0.001, "loss": 2.0832, "step": 1310 }, { "epoch": 0.04209317899167703, "grad_norm": 0.5604788661003113, "learning_rate": 0.001, "loss": 2.1251, "step": 1320 }, { "epoch": 0.042412066711310946, "grad_norm": 0.6098892688751221, "learning_rate": 0.001, "loss": 2.1046, "step": 1330 }, { "epoch": 0.042730954430944865, "grad_norm": 0.5982756614685059, "learning_rate": 0.001, "loss": 2.0743, "step": 1340 }, { "epoch": 0.04304984215057878, "grad_norm": 0.5638964176177979, "learning_rate": 0.001, "loss": 2.1103, "step": 1350 }, { "epoch": 0.043368729870212695, "grad_norm": 0.5656088590621948, "learning_rate": 0.001, "loss": 2.0873, "step": 1360 }, { "epoch": 0.043687617589846614, "grad_norm": 0.5705325603485107, "learning_rate": 0.001, "loss": 2.0958, "step": 1370 }, { "epoch": 0.04400650530948053, "grad_norm": 0.5603035688400269, "learning_rate": 0.001, "loss": 2.1022, "step": 1380 }, { "epoch": 0.04432539302911445, "grad_norm": 0.56059330701828, "learning_rate": 0.001, "loss": 2.0511, "step": 1390 }, { "epoch": 0.04464428074874836, "grad_norm": 0.5691202878952026, "learning_rate": 0.001, "loss": 2.0845, "step": 1400 }, { "epoch": 0.04496316846838228, "grad_norm": 0.5622802376747131, "learning_rate": 0.001, "loss": 2.0817, "step": 1410 }, { "epoch": 0.0452820561880162, "grad_norm": 0.5683780908584595, "learning_rate": 0.001, "loss": 2.05, "step": 1420 }, { "epoch": 0.04560094390765012, "grad_norm": 0.5761978626251221, "learning_rate": 0.001, "loss": 2.0766, "step": 1430 }, { "epoch": 0.04591983162728403, "grad_norm": 0.5588021278381348, "learning_rate": 0.001, "loss": 2.0496, "step": 1440 }, { "epoch": 0.04623871934691795, "grad_norm": 0.5767732858657837, "learning_rate": 0.001, "loss": 2.0992, "step": 1450 }, { "epoch": 0.04655760706655187, "grad_norm": 0.5720701217651367, "learning_rate": 0.001, "loss": 2.0471, "step": 1460 }, { "epoch": 0.04687649478618578, "grad_norm": 0.6206384301185608, "learning_rate": 0.001, "loss": 2.0519, "step": 1470 }, { "epoch": 0.0471953825058197, "grad_norm": 0.5901126861572266, "learning_rate": 0.001, "loss": 2.0665, "step": 1480 }, { "epoch": 0.04751427022545362, "grad_norm": 0.5666248202323914, "learning_rate": 0.001, "loss": 2.0524, "step": 1490 }, { "epoch": 0.04783315794508754, "grad_norm": 0.5244306325912476, "learning_rate": 0.001, "loss": 2.0374, "step": 1500 }, { "epoch": 0.04815204566472145, "grad_norm": 0.5552513599395752, "learning_rate": 0.001, "loss": 2.0222, "step": 1510 }, { "epoch": 0.04847093338435537, "grad_norm": 0.5625154376029968, "learning_rate": 0.001, "loss": 2.0465, "step": 1520 }, { "epoch": 0.048789821103989287, "grad_norm": 0.5688402056694031, "learning_rate": 0.001, "loss": 2.0549, "step": 1530 }, { "epoch": 0.049108708823623205, "grad_norm": 0.5560811758041382, "learning_rate": 0.001, "loss": 2.0122, "step": 1540 }, { "epoch": 0.04942759654325712, "grad_norm": 0.5479153394699097, "learning_rate": 0.001, "loss": 2.0345, "step": 1550 }, { "epoch": 0.049746484262891036, "grad_norm": 0.5255228877067566, "learning_rate": 0.001, "loss": 2.0217, "step": 1560 }, { "epoch": 0.050065371982524955, "grad_norm": 0.582529604434967, "learning_rate": 0.001, "loss": 2.0241, "step": 1570 }, { "epoch": 0.050384259702158866, "grad_norm": 0.53663569688797, "learning_rate": 0.001, "loss": 2.0001, "step": 1580 }, { "epoch": 0.050703147421792785, "grad_norm": 0.550887405872345, "learning_rate": 0.001, "loss": 2.0114, "step": 1590 }, { "epoch": 0.051022035141426704, "grad_norm": 0.5510720610618591, "learning_rate": 0.001, "loss": 2.0124, "step": 1600 }, { "epoch": 0.05134092286106062, "grad_norm": 0.5833602547645569, "learning_rate": 0.001, "loss": 2.0178, "step": 1610 }, { "epoch": 0.051659810580694535, "grad_norm": 0.5617836713790894, "learning_rate": 0.001, "loss": 2.0381, "step": 1620 }, { "epoch": 0.05197869830032845, "grad_norm": 0.5440929532051086, "learning_rate": 0.001, "loss": 1.9956, "step": 1630 }, { "epoch": 0.05229758601996237, "grad_norm": 0.5375956892967224, "learning_rate": 0.001, "loss": 2.0045, "step": 1640 }, { "epoch": 0.05261647373959629, "grad_norm": 0.5563559532165527, "learning_rate": 0.001, "loss": 2.0267, "step": 1650 }, { "epoch": 0.0529353614592302, "grad_norm": 0.5130823850631714, "learning_rate": 0.001, "loss": 2.007, "step": 1660 }, { "epoch": 0.05325424917886412, "grad_norm": 0.5466340780258179, "learning_rate": 0.001, "loss": 2.0031, "step": 1670 }, { "epoch": 0.05357313689849804, "grad_norm": 0.5090089440345764, "learning_rate": 0.001, "loss": 1.964, "step": 1680 }, { "epoch": 0.05389202461813196, "grad_norm": 0.5429556369781494, "learning_rate": 0.001, "loss": 1.9897, "step": 1690 }, { "epoch": 0.05421091233776587, "grad_norm": 0.5265209078788757, "learning_rate": 0.001, "loss": 1.9896, "step": 1700 }, { "epoch": 0.05452980005739979, "grad_norm": 0.5308935046195984, "learning_rate": 0.001, "loss": 1.9797, "step": 1710 }, { "epoch": 0.05484868777703371, "grad_norm": 0.52045077085495, "learning_rate": 0.001, "loss": 1.9848, "step": 1720 }, { "epoch": 0.05516757549666762, "grad_norm": 0.5526434183120728, "learning_rate": 0.001, "loss": 1.9823, "step": 1730 }, { "epoch": 0.05548646321630154, "grad_norm": 0.5421900749206543, "learning_rate": 0.001, "loss": 1.9867, "step": 1740 }, { "epoch": 0.05580535093593546, "grad_norm": 0.5268239974975586, "learning_rate": 0.001, "loss": 1.973, "step": 1750 }, { "epoch": 0.056124238655569376, "grad_norm": 0.5331604480743408, "learning_rate": 0.001, "loss": 1.969, "step": 1760 }, { "epoch": 0.05644312637520329, "grad_norm": 0.5327712297439575, "learning_rate": 0.001, "loss": 1.966, "step": 1770 }, { "epoch": 0.05676201409483721, "grad_norm": 0.526186466217041, "learning_rate": 0.001, "loss": 1.9931, "step": 1780 }, { "epoch": 0.057080901814471126, "grad_norm": 0.5733531713485718, "learning_rate": 0.001, "loss": 1.9699, "step": 1790 }, { "epoch": 0.057399789534105045, "grad_norm": 0.5519174933433533, "learning_rate": 0.001, "loss": 1.9623, "step": 1800 }, { "epoch": 0.057718677253738956, "grad_norm": 0.6039563417434692, "learning_rate": 0.001, "loss": 1.9908, "step": 1810 }, { "epoch": 0.058037564973372875, "grad_norm": 0.5261481404304504, "learning_rate": 0.001, "loss": 1.9698, "step": 1820 }, { "epoch": 0.058356452693006794, "grad_norm": 0.5257373452186584, "learning_rate": 0.001, "loss": 1.9554, "step": 1830 }, { "epoch": 0.058675340412640706, "grad_norm": 0.5442422032356262, "learning_rate": 0.001, "loss": 1.956, "step": 1840 }, { "epoch": 0.058994228132274625, "grad_norm": 0.5888342261314392, "learning_rate": 0.001, "loss": 1.9254, "step": 1850 }, { "epoch": 0.05931311585190854, "grad_norm": 0.5307484865188599, "learning_rate": 0.001, "loss": 1.9524, "step": 1860 }, { "epoch": 0.05963200357154246, "grad_norm": 0.5235530734062195, "learning_rate": 0.001, "loss": 1.9542, "step": 1870 }, { "epoch": 0.059950891291176374, "grad_norm": 0.4977993071079254, "learning_rate": 0.001, "loss": 1.9636, "step": 1880 }, { "epoch": 0.06026977901081029, "grad_norm": 0.5077757239341736, "learning_rate": 0.001, "loss": 1.9524, "step": 1890 }, { "epoch": 0.06058866673044421, "grad_norm": 0.5321727395057678, "learning_rate": 0.001, "loss": 1.9548, "step": 1900 }, { "epoch": 0.06090755445007813, "grad_norm": 0.49839040637016296, "learning_rate": 0.001, "loss": 1.9353, "step": 1910 }, { "epoch": 0.06122644216971204, "grad_norm": 0.5668623447418213, "learning_rate": 0.001, "loss": 1.9226, "step": 1920 }, { "epoch": 0.06154532988934596, "grad_norm": 0.4942353665828705, "learning_rate": 0.001, "loss": 1.9532, "step": 1930 }, { "epoch": 0.06186421760897988, "grad_norm": 0.4908944070339203, "learning_rate": 0.001, "loss": 1.9236, "step": 1940 }, { "epoch": 0.0621831053286138, "grad_norm": 0.5142762660980225, "learning_rate": 0.001, "loss": 1.9319, "step": 1950 }, { "epoch": 0.06250199304824772, "grad_norm": 0.5228368043899536, "learning_rate": 0.001, "loss": 1.9281, "step": 1960 }, { "epoch": 0.06282088076788163, "grad_norm": 0.5202351212501526, "learning_rate": 0.001, "loss": 1.9435, "step": 1970 }, { "epoch": 0.06313976848751554, "grad_norm": 0.5125612616539001, "learning_rate": 0.001, "loss": 1.9153, "step": 1980 }, { "epoch": 0.06345865620714947, "grad_norm": 0.4858326315879822, "learning_rate": 0.001, "loss": 1.9081, "step": 1990 }, { "epoch": 0.06377754392678338, "grad_norm": 0.48831015825271606, "learning_rate": 0.001, "loss": 1.9123, "step": 2000 }, { "epoch": 0.06409643164641729, "grad_norm": 0.5106445550918579, "learning_rate": 0.001, "loss": 1.9144, "step": 2010 }, { "epoch": 0.06441531936605122, "grad_norm": 0.5125754475593567, "learning_rate": 0.001, "loss": 1.9336, "step": 2020 }, { "epoch": 0.06473420708568513, "grad_norm": 0.5162148475646973, "learning_rate": 0.001, "loss": 1.9143, "step": 2030 }, { "epoch": 0.06505309480531905, "grad_norm": 0.5132099986076355, "learning_rate": 0.001, "loss": 1.9116, "step": 2040 }, { "epoch": 0.06537198252495297, "grad_norm": 0.4962635338306427, "learning_rate": 0.001, "loss": 1.8872, "step": 2050 }, { "epoch": 0.06569087024458688, "grad_norm": 0.5262729525566101, "learning_rate": 0.001, "loss": 1.8783, "step": 2060 }, { "epoch": 0.0660097579642208, "grad_norm": 0.49188458919525146, "learning_rate": 0.001, "loss": 1.91, "step": 2070 }, { "epoch": 0.06632864568385471, "grad_norm": 0.4812409579753876, "learning_rate": 0.001, "loss": 1.8985, "step": 2080 }, { "epoch": 0.06664753340348863, "grad_norm": 0.48991620540618896, "learning_rate": 0.001, "loss": 1.9143, "step": 2090 }, { "epoch": 0.06696642112312255, "grad_norm": 0.481096088886261, "learning_rate": 0.001, "loss": 1.9079, "step": 2100 }, { "epoch": 0.06728530884275646, "grad_norm": 0.4740917384624481, "learning_rate": 0.001, "loss": 1.8818, "step": 2110 }, { "epoch": 0.06760419656239038, "grad_norm": 0.5008795261383057, "learning_rate": 0.001, "loss": 1.8831, "step": 2120 }, { "epoch": 0.0679230842820243, "grad_norm": 0.49072882533073425, "learning_rate": 0.001, "loss": 1.8938, "step": 2130 }, { "epoch": 0.06824197200165821, "grad_norm": 0.47333481907844543, "learning_rate": 0.001, "loss": 1.8753, "step": 2140 }, { "epoch": 0.06856085972129214, "grad_norm": 0.4727560579776764, "learning_rate": 0.001, "loss": 1.8894, "step": 2150 }, { "epoch": 0.06887974744092605, "grad_norm": 0.4693233072757721, "learning_rate": 0.001, "loss": 1.8974, "step": 2160 }, { "epoch": 0.06919863516055996, "grad_norm": 0.4827735722064972, "learning_rate": 0.001, "loss": 1.88, "step": 2170 }, { "epoch": 0.06951752288019389, "grad_norm": 0.5058079361915588, "learning_rate": 0.001, "loss": 1.8769, "step": 2180 }, { "epoch": 0.0698364105998278, "grad_norm": 0.4859229624271393, "learning_rate": 0.001, "loss": 1.8586, "step": 2190 }, { "epoch": 0.07015529831946171, "grad_norm": 0.49526458978652954, "learning_rate": 0.001, "loss": 1.8846, "step": 2200 }, { "epoch": 0.07047418603909564, "grad_norm": 0.4747912585735321, "learning_rate": 0.001, "loss": 1.8608, "step": 2210 }, { "epoch": 0.07079307375872955, "grad_norm": 0.5108914375305176, "learning_rate": 0.001, "loss": 1.8728, "step": 2220 }, { "epoch": 0.07111196147836348, "grad_norm": 0.44781816005706787, "learning_rate": 0.001, "loss": 1.8597, "step": 2230 }, { "epoch": 0.07143084919799739, "grad_norm": 0.4641876518726349, "learning_rate": 0.001, "loss": 1.8608, "step": 2240 }, { "epoch": 0.0717497369176313, "grad_norm": 0.47702521085739136, "learning_rate": 0.001, "loss": 1.838, "step": 2250 }, { "epoch": 0.07206862463726522, "grad_norm": 0.46187707781791687, "learning_rate": 0.001, "loss": 1.8257, "step": 2260 }, { "epoch": 0.07238751235689914, "grad_norm": 0.4767541289329529, "learning_rate": 0.001, "loss": 1.8606, "step": 2270 }, { "epoch": 0.07270640007653305, "grad_norm": 0.47086700797080994, "learning_rate": 0.001, "loss": 1.8422, "step": 2280 }, { "epoch": 0.07302528779616697, "grad_norm": 0.4712027311325073, "learning_rate": 0.001, "loss": 1.8214, "step": 2290 }, { "epoch": 0.07334417551580089, "grad_norm": 0.4686843156814575, "learning_rate": 0.001, "loss": 1.8307, "step": 2300 }, { "epoch": 0.0736630632354348, "grad_norm": 0.4426477253437042, "learning_rate": 0.001, "loss": 1.8205, "step": 2310 }, { "epoch": 0.07398195095506872, "grad_norm": 0.465758353471756, "learning_rate": 0.001, "loss": 1.8373, "step": 2320 }, { "epoch": 0.07430083867470264, "grad_norm": 0.45455947518348694, "learning_rate": 0.001, "loss": 1.837, "step": 2330 }, { "epoch": 0.07461972639433656, "grad_norm": 0.4629955291748047, "learning_rate": 0.001, "loss": 1.8196, "step": 2340 }, { "epoch": 0.07493861411397047, "grad_norm": 0.4526189863681793, "learning_rate": 0.001, "loss": 1.8441, "step": 2350 }, { "epoch": 0.07525750183360438, "grad_norm": 0.4585621953010559, "learning_rate": 0.001, "loss": 1.8329, "step": 2360 }, { "epoch": 0.07557638955323831, "grad_norm": 0.4587751626968384, "learning_rate": 0.001, "loss": 1.7881, "step": 2370 }, { "epoch": 0.07589527727287222, "grad_norm": 0.4355972111225128, "learning_rate": 0.001, "loss": 1.796, "step": 2380 }, { "epoch": 0.07621416499250613, "grad_norm": 0.4731498956680298, "learning_rate": 0.001, "loss": 1.8371, "step": 2390 }, { "epoch": 0.07653305271214006, "grad_norm": 0.45605430006980896, "learning_rate": 0.001, "loss": 1.8212, "step": 2400 }, { "epoch": 0.07685194043177397, "grad_norm": 0.4283037483692169, "learning_rate": 0.001, "loss": 1.8333, "step": 2410 }, { "epoch": 0.07717082815140788, "grad_norm": 0.4527207911014557, "learning_rate": 0.001, "loss": 1.8028, "step": 2420 }, { "epoch": 0.07748971587104181, "grad_norm": 0.44645947217941284, "learning_rate": 0.001, "loss": 1.8036, "step": 2430 }, { "epoch": 0.07780860359067572, "grad_norm": 0.43729937076568604, "learning_rate": 0.001, "loss": 1.7946, "step": 2440 }, { "epoch": 0.07812749131030965, "grad_norm": 0.4428195655345917, "learning_rate": 0.001, "loss": 1.7917, "step": 2450 }, { "epoch": 0.07844637902994356, "grad_norm": 0.45311400294303894, "learning_rate": 0.001, "loss": 1.8078, "step": 2460 }, { "epoch": 0.07876526674957747, "grad_norm": 0.43074876070022583, "learning_rate": 0.001, "loss": 1.8011, "step": 2470 }, { "epoch": 0.0790841544692114, "grad_norm": 0.43784239888191223, "learning_rate": 0.001, "loss": 1.7801, "step": 2480 }, { "epoch": 0.07940304218884531, "grad_norm": 0.42712098360061646, "learning_rate": 0.001, "loss": 1.8244, "step": 2490 }, { "epoch": 0.07972192990847922, "grad_norm": 0.45883429050445557, "learning_rate": 0.001, "loss": 1.8038, "step": 2500 }, { "epoch": 0.08004081762811315, "grad_norm": 0.4601421654224396, "learning_rate": 0.001, "loss": 1.7772, "step": 2510 }, { "epoch": 0.08035970534774706, "grad_norm": 0.47060567140579224, "learning_rate": 0.001, "loss": 1.8024, "step": 2520 }, { "epoch": 0.08067859306738097, "grad_norm": 0.42333802580833435, "learning_rate": 0.001, "loss": 1.7989, "step": 2530 }, { "epoch": 0.0809974807870149, "grad_norm": 0.414415180683136, "learning_rate": 0.001, "loss": 1.792, "step": 2540 }, { "epoch": 0.0813163685066488, "grad_norm": 0.41989174485206604, "learning_rate": 0.001, "loss": 1.7762, "step": 2550 }, { "epoch": 0.08163525622628273, "grad_norm": 0.4492782950401306, "learning_rate": 0.001, "loss": 1.7852, "step": 2560 }, { "epoch": 0.08195414394591664, "grad_norm": 0.43293607234954834, "learning_rate": 0.001, "loss": 1.7578, "step": 2570 }, { "epoch": 0.08227303166555056, "grad_norm": 0.4404749572277069, "learning_rate": 0.001, "loss": 1.7949, "step": 2580 }, { "epoch": 0.08259191938518448, "grad_norm": 0.482251912355423, "learning_rate": 0.001, "loss": 1.7615, "step": 2590 }, { "epoch": 0.0829108071048184, "grad_norm": 0.441723495721817, "learning_rate": 0.001, "loss": 1.7755, "step": 2600 }, { "epoch": 0.0832296948244523, "grad_norm": 0.4477382302284241, "learning_rate": 0.001, "loss": 1.7761, "step": 2610 }, { "epoch": 0.08354858254408623, "grad_norm": 0.4355809688568115, "learning_rate": 0.001, "loss": 1.7598, "step": 2620 }, { "epoch": 0.08386747026372014, "grad_norm": 0.4181293547153473, "learning_rate": 0.001, "loss": 1.7601, "step": 2630 }, { "epoch": 0.08418635798335405, "grad_norm": 0.4101440906524658, "learning_rate": 0.001, "loss": 1.7626, "step": 2640 }, { "epoch": 0.08450524570298798, "grad_norm": 0.41453298926353455, "learning_rate": 0.001, "loss": 1.7624, "step": 2650 }, { "epoch": 0.08482413342262189, "grad_norm": 0.4500748813152313, "learning_rate": 0.001, "loss": 1.7717, "step": 2660 }, { "epoch": 0.08514302114225582, "grad_norm": 0.45811501145362854, "learning_rate": 0.001, "loss": 1.7451, "step": 2670 }, { "epoch": 0.08546190886188973, "grad_norm": 0.40631115436553955, "learning_rate": 0.001, "loss": 1.7792, "step": 2680 }, { "epoch": 0.08578079658152364, "grad_norm": 0.4175332188606262, "learning_rate": 0.001, "loss": 1.762, "step": 2690 }, { "epoch": 0.08609968430115757, "grad_norm": 0.4147797226905823, "learning_rate": 0.001, "loss": 1.758, "step": 2700 }, { "epoch": 0.08641857202079148, "grad_norm": 0.4185612201690674, "learning_rate": 0.001, "loss": 1.7508, "step": 2710 }, { "epoch": 0.08673745974042539, "grad_norm": 0.4215739965438843, "learning_rate": 0.001, "loss": 1.7676, "step": 2720 }, { "epoch": 0.08705634746005932, "grad_norm": 0.4120407998561859, "learning_rate": 0.001, "loss": 1.7702, "step": 2730 }, { "epoch": 0.08737523517969323, "grad_norm": 0.4168885350227356, "learning_rate": 0.001, "loss": 1.7794, "step": 2740 }, { "epoch": 0.08769412289932715, "grad_norm": 0.420948326587677, "learning_rate": 0.001, "loss": 1.7321, "step": 2750 }, { "epoch": 0.08801301061896107, "grad_norm": 0.4142162501811981, "learning_rate": 0.001, "loss": 1.7386, "step": 2760 }, { "epoch": 0.08833189833859498, "grad_norm": 0.4113253951072693, "learning_rate": 0.001, "loss": 1.7362, "step": 2770 }, { "epoch": 0.0886507860582289, "grad_norm": 0.41032281517982483, "learning_rate": 0.001, "loss": 1.76, "step": 2780 }, { "epoch": 0.08896967377786281, "grad_norm": 0.3968314230442047, "learning_rate": 0.001, "loss": 1.7167, "step": 2790 }, { "epoch": 0.08928856149749673, "grad_norm": 0.4045749306678772, "learning_rate": 0.001, "loss": 1.7521, "step": 2800 }, { "epoch": 0.08960744921713065, "grad_norm": 0.38197824358940125, "learning_rate": 0.001, "loss": 1.71, "step": 2810 }, { "epoch": 0.08992633693676456, "grad_norm": 0.40112364292144775, "learning_rate": 0.001, "loss": 1.7327, "step": 2820 }, { "epoch": 0.09024522465639848, "grad_norm": 0.40666356682777405, "learning_rate": 0.001, "loss": 1.7358, "step": 2830 }, { "epoch": 0.0905641123760324, "grad_norm": 0.40545156598091125, "learning_rate": 0.001, "loss": 1.7315, "step": 2840 }, { "epoch": 0.09088300009566631, "grad_norm": 0.4269545376300812, "learning_rate": 0.001, "loss": 1.7073, "step": 2850 }, { "epoch": 0.09120188781530024, "grad_norm": 0.4080691635608673, "learning_rate": 0.001, "loss": 1.7141, "step": 2860 }, { "epoch": 0.09152077553493415, "grad_norm": 0.41239744424819946, "learning_rate": 0.001, "loss": 1.7126, "step": 2870 }, { "epoch": 0.09183966325456806, "grad_norm": 0.3845699727535248, "learning_rate": 0.001, "loss": 1.7263, "step": 2880 }, { "epoch": 0.09215855097420199, "grad_norm": 0.38979536294937134, "learning_rate": 0.001, "loss": 1.7173, "step": 2890 }, { "epoch": 0.0924774386938359, "grad_norm": 0.3904477655887604, "learning_rate": 0.001, "loss": 1.7167, "step": 2900 }, { "epoch": 0.09279632641346981, "grad_norm": 0.4068198502063751, "learning_rate": 0.001, "loss": 1.723, "step": 2910 }, { "epoch": 0.09311521413310374, "grad_norm": 0.37980249524116516, "learning_rate": 0.001, "loss": 1.724, "step": 2920 }, { "epoch": 0.09343410185273765, "grad_norm": 0.3883666396141052, "learning_rate": 0.001, "loss": 1.7292, "step": 2930 }, { "epoch": 0.09375298957237156, "grad_norm": 0.38029372692108154, "learning_rate": 0.001, "loss": 1.7312, "step": 2940 }, { "epoch": 0.09407187729200549, "grad_norm": 0.40660858154296875, "learning_rate": 0.001, "loss": 1.7042, "step": 2950 }, { "epoch": 0.0943907650116394, "grad_norm": 0.40074729919433594, "learning_rate": 0.001, "loss": 1.6959, "step": 2960 }, { "epoch": 0.09470965273127332, "grad_norm": 0.38984328508377075, "learning_rate": 0.001, "loss": 1.7042, "step": 2970 }, { "epoch": 0.09502854045090724, "grad_norm": 0.36554253101348877, "learning_rate": 0.001, "loss": 1.6994, "step": 2980 }, { "epoch": 0.09534742817054115, "grad_norm": 0.3839636445045471, "learning_rate": 0.001, "loss": 1.7045, "step": 2990 }, { "epoch": 0.09566631589017507, "grad_norm": 0.3920130729675293, "learning_rate": 0.001, "loss": 1.7151, "step": 3000 }, { "epoch": 0.09598520360980899, "grad_norm": 0.37574824690818787, "learning_rate": 0.001, "loss": 1.6945, "step": 3010 }, { "epoch": 0.0963040913294429, "grad_norm": 0.37784722447395325, "learning_rate": 0.001, "loss": 1.6825, "step": 3020 }, { "epoch": 0.09662297904907682, "grad_norm": 0.37999963760375977, "learning_rate": 0.001, "loss": 1.6928, "step": 3030 }, { "epoch": 0.09694186676871074, "grad_norm": 0.3637866675853729, "learning_rate": 0.001, "loss": 1.6893, "step": 3040 }, { "epoch": 0.09726075448834465, "grad_norm": 0.37975192070007324, "learning_rate": 0.001, "loss": 1.7077, "step": 3050 }, { "epoch": 0.09757964220797857, "grad_norm": 0.3667464852333069, "learning_rate": 0.001, "loss": 1.6983, "step": 3060 }, { "epoch": 0.09789852992761248, "grad_norm": 0.37685081362724304, "learning_rate": 0.001, "loss": 1.676, "step": 3070 }, { "epoch": 0.09821741764724641, "grad_norm": 0.35984039306640625, "learning_rate": 0.001, "loss": 1.6843, "step": 3080 }, { "epoch": 0.09853630536688032, "grad_norm": 0.3787494897842407, "learning_rate": 0.001, "loss": 1.6745, "step": 3090 }, { "epoch": 0.09885519308651423, "grad_norm": 0.3618012070655823, "learning_rate": 0.001, "loss": 1.666, "step": 3100 }, { "epoch": 0.09917408080614816, "grad_norm": 0.37266436219215393, "learning_rate": 0.001, "loss": 1.6618, "step": 3110 }, { "epoch": 0.09949296852578207, "grad_norm": 0.36171668767929077, "learning_rate": 0.001, "loss": 1.6866, "step": 3120 }, { "epoch": 0.09981185624541598, "grad_norm": 0.3792877495288849, "learning_rate": 0.001, "loss": 1.6932, "step": 3130 }, { "epoch": 0.10013074396504991, "grad_norm": 0.3717607259750366, "learning_rate": 0.001, "loss": 1.6773, "step": 3140 }, { "epoch": 0.10044963168468382, "grad_norm": 0.3575504422187805, "learning_rate": 0.001, "loss": 1.6799, "step": 3150 }, { "epoch": 0.10076851940431773, "grad_norm": 0.37053510546684265, "learning_rate": 0.001, "loss": 1.6593, "step": 3160 }, { "epoch": 0.10108740712395166, "grad_norm": 0.35849571228027344, "learning_rate": 0.001, "loss": 1.6635, "step": 3170 }, { "epoch": 0.10140629484358557, "grad_norm": 0.3712310194969177, "learning_rate": 0.001, "loss": 1.6642, "step": 3180 }, { "epoch": 0.1017251825632195, "grad_norm": 0.36214131116867065, "learning_rate": 0.001, "loss": 1.6422, "step": 3190 }, { "epoch": 0.10204407028285341, "grad_norm": 0.3629904091358185, "learning_rate": 0.001, "loss": 1.6492, "step": 3200 }, { "epoch": 0.10236295800248732, "grad_norm": 0.34454378485679626, "learning_rate": 0.001, "loss": 1.68, "step": 3210 }, { "epoch": 0.10268184572212125, "grad_norm": 0.356357604265213, "learning_rate": 0.001, "loss": 1.6717, "step": 3220 }, { "epoch": 0.10300073344175516, "grad_norm": 0.35266950726509094, "learning_rate": 0.001, "loss": 1.6358, "step": 3230 }, { "epoch": 0.10331962116138907, "grad_norm": 0.3726760149002075, "learning_rate": 0.001, "loss": 1.6207, "step": 3240 }, { "epoch": 0.103638508881023, "grad_norm": 0.35605040192604065, "learning_rate": 0.001, "loss": 1.6467, "step": 3250 }, { "epoch": 0.1039573966006569, "grad_norm": 0.36946994066238403, "learning_rate": 0.001, "loss": 1.663, "step": 3260 }, { "epoch": 0.10427628432029083, "grad_norm": 0.3503727614879608, "learning_rate": 0.001, "loss": 1.6157, "step": 3270 }, { "epoch": 0.10459517203992474, "grad_norm": 0.3665166199207306, "learning_rate": 0.001, "loss": 1.6582, "step": 3280 }, { "epoch": 0.10491405975955866, "grad_norm": 0.3531331717967987, "learning_rate": 0.001, "loss": 1.6184, "step": 3290 }, { "epoch": 0.10523294747919258, "grad_norm": 0.3589783012866974, "learning_rate": 0.001, "loss": 1.6514, "step": 3300 }, { "epoch": 0.1055518351988265, "grad_norm": 0.3521636724472046, "learning_rate": 0.001, "loss": 1.6394, "step": 3310 }, { "epoch": 0.1058707229184604, "grad_norm": 0.3591137230396271, "learning_rate": 0.001, "loss": 1.6222, "step": 3320 }, { "epoch": 0.10618961063809433, "grad_norm": 0.34527188539505005, "learning_rate": 0.001, "loss": 1.6849, "step": 3330 }, { "epoch": 0.10650849835772824, "grad_norm": 0.33738335967063904, "learning_rate": 0.001, "loss": 1.6509, "step": 3340 }, { "epoch": 0.10682738607736215, "grad_norm": 0.342528373003006, "learning_rate": 0.001, "loss": 1.6568, "step": 3350 }, { "epoch": 0.10714627379699608, "grad_norm": 0.3343750536441803, "learning_rate": 0.001, "loss": 1.6315, "step": 3360 }, { "epoch": 0.10746516151662999, "grad_norm": 0.33615440130233765, "learning_rate": 0.001, "loss": 1.6399, "step": 3370 }, { "epoch": 0.10778404923626392, "grad_norm": 0.3438841700553894, "learning_rate": 0.001, "loss": 1.6302, "step": 3380 }, { "epoch": 0.10810293695589783, "grad_norm": 0.3580395579338074, "learning_rate": 0.001, "loss": 1.5996, "step": 3390 }, { "epoch": 0.10842182467553174, "grad_norm": 0.34119513630867004, "learning_rate": 0.001, "loss": 1.6179, "step": 3400 }, { "epoch": 0.10874071239516567, "grad_norm": 0.33958378434181213, "learning_rate": 0.001, "loss": 1.6254, "step": 3410 }, { "epoch": 0.10905960011479958, "grad_norm": 0.3339976668357849, "learning_rate": 0.001, "loss": 1.6334, "step": 3420 }, { "epoch": 0.10937848783443349, "grad_norm": 0.3434707522392273, "learning_rate": 0.001, "loss": 1.6307, "step": 3430 }, { "epoch": 0.10969737555406742, "grad_norm": 0.33472973108291626, "learning_rate": 0.001, "loss": 1.6306, "step": 3440 }, { "epoch": 0.11001626327370133, "grad_norm": 0.3320460915565491, "learning_rate": 0.001, "loss": 1.6366, "step": 3450 }, { "epoch": 0.11033515099333524, "grad_norm": 0.3379921317100525, "learning_rate": 0.001, "loss": 1.5962, "step": 3460 }, { "epoch": 0.11065403871296917, "grad_norm": 0.3337703049182892, "learning_rate": 0.001, "loss": 1.624, "step": 3470 }, { "epoch": 0.11097292643260308, "grad_norm": 0.3293701410293579, "learning_rate": 0.001, "loss": 1.6056, "step": 3480 }, { "epoch": 0.111291814152237, "grad_norm": 0.3398142158985138, "learning_rate": 0.001, "loss": 1.593, "step": 3490 }, { "epoch": 0.11161070187187092, "grad_norm": 0.33788856863975525, "learning_rate": 0.001, "loss": 1.6093, "step": 3500 }, { "epoch": 0.11192958959150483, "grad_norm": 0.3267028033733368, "learning_rate": 0.001, "loss": 1.6069, "step": 3510 }, { "epoch": 0.11224847731113875, "grad_norm": 0.32580670714378357, "learning_rate": 0.001, "loss": 1.5999, "step": 3520 }, { "epoch": 0.11256736503077266, "grad_norm": 0.31678780913352966, "learning_rate": 0.001, "loss": 1.5919, "step": 3530 }, { "epoch": 0.11288625275040658, "grad_norm": 0.31694918870925903, "learning_rate": 0.001, "loss": 1.6112, "step": 3540 }, { "epoch": 0.1132051404700405, "grad_norm": 0.31398412585258484, "learning_rate": 0.001, "loss": 1.6043, "step": 3550 }, { "epoch": 0.11352402818967441, "grad_norm": 0.3309810757637024, "learning_rate": 0.001, "loss": 1.5972, "step": 3560 }, { "epoch": 0.11384291590930833, "grad_norm": 0.33123892545700073, "learning_rate": 0.001, "loss": 1.5764, "step": 3570 }, { "epoch": 0.11416180362894225, "grad_norm": 0.3245631158351898, "learning_rate": 0.001, "loss": 1.5751, "step": 3580 }, { "epoch": 0.11448069134857616, "grad_norm": 0.32239916920661926, "learning_rate": 0.001, "loss": 1.6094, "step": 3590 }, { "epoch": 0.11479957906821009, "grad_norm": 0.3406887948513031, "learning_rate": 0.001, "loss": 1.6002, "step": 3600 }, { "epoch": 0.115118466787844, "grad_norm": 0.3163871467113495, "learning_rate": 0.001, "loss": 1.5643, "step": 3610 }, { "epoch": 0.11543735450747791, "grad_norm": 0.3176119327545166, "learning_rate": 0.001, "loss": 1.5611, "step": 3620 }, { "epoch": 0.11575624222711184, "grad_norm": 0.32567960023880005, "learning_rate": 0.001, "loss": 1.5995, "step": 3630 }, { "epoch": 0.11607512994674575, "grad_norm": 0.3347359895706177, "learning_rate": 0.001, "loss": 1.6058, "step": 3640 }, { "epoch": 0.11639401766637966, "grad_norm": 0.3217657506465912, "learning_rate": 0.001, "loss": 1.5745, "step": 3650 }, { "epoch": 0.11671290538601359, "grad_norm": 0.31946665048599243, "learning_rate": 0.001, "loss": 1.559, "step": 3660 }, { "epoch": 0.1170317931056475, "grad_norm": 0.3089522421360016, "learning_rate": 0.001, "loss": 1.5884, "step": 3670 }, { "epoch": 0.11735068082528141, "grad_norm": 0.31650635600090027, "learning_rate": 0.001, "loss": 1.6132, "step": 3680 }, { "epoch": 0.11766956854491534, "grad_norm": 0.3170093595981598, "learning_rate": 0.001, "loss": 1.5718, "step": 3690 }, { "epoch": 0.11798845626454925, "grad_norm": 0.30170735716819763, "learning_rate": 0.001, "loss": 1.5786, "step": 3700 }, { "epoch": 0.11830734398418317, "grad_norm": 0.3147510290145874, "learning_rate": 0.001, "loss": 1.5805, "step": 3710 }, { "epoch": 0.11862623170381709, "grad_norm": 0.30160295963287354, "learning_rate": 0.001, "loss": 1.5567, "step": 3720 }, { "epoch": 0.118945119423451, "grad_norm": 0.32837751507759094, "learning_rate": 0.001, "loss": 1.5771, "step": 3730 }, { "epoch": 0.11926400714308492, "grad_norm": 0.31761300563812256, "learning_rate": 0.001, "loss": 1.5816, "step": 3740 }, { "epoch": 0.11958289486271884, "grad_norm": 0.3200889527797699, "learning_rate": 0.001, "loss": 1.5536, "step": 3750 }, { "epoch": 0.11990178258235275, "grad_norm": 0.30934789776802063, "learning_rate": 0.001, "loss": 1.5577, "step": 3760 }, { "epoch": 0.12022067030198667, "grad_norm": 0.30753806233406067, "learning_rate": 0.001, "loss": 1.5784, "step": 3770 }, { "epoch": 0.12053955802162059, "grad_norm": 0.3071571886539459, "learning_rate": 0.001, "loss": 1.5894, "step": 3780 }, { "epoch": 0.12085844574125451, "grad_norm": 0.31278368830680847, "learning_rate": 0.001, "loss": 1.5458, "step": 3790 }, { "epoch": 0.12117733346088842, "grad_norm": 0.3064911365509033, "learning_rate": 0.001, "loss": 1.5434, "step": 3800 }, { "epoch": 0.12149622118052233, "grad_norm": 0.3021445572376251, "learning_rate": 0.001, "loss": 1.5439, "step": 3810 }, { "epoch": 0.12181510890015626, "grad_norm": 0.30052146315574646, "learning_rate": 0.001, "loss": 1.5531, "step": 3820 }, { "epoch": 0.12213399661979017, "grad_norm": 0.3149283826351166, "learning_rate": 0.001, "loss": 1.5463, "step": 3830 }, { "epoch": 0.12245288433942408, "grad_norm": 0.30645984411239624, "learning_rate": 0.001, "loss": 1.561, "step": 3840 }, { "epoch": 0.12277177205905801, "grad_norm": 0.3132129907608032, "learning_rate": 0.001, "loss": 1.5622, "step": 3850 }, { "epoch": 0.12309065977869192, "grad_norm": 0.31083235144615173, "learning_rate": 0.001, "loss": 1.5573, "step": 3860 }, { "epoch": 0.12340954749832583, "grad_norm": 0.3180761933326721, "learning_rate": 0.001, "loss": 1.5747, "step": 3870 }, { "epoch": 0.12372843521795976, "grad_norm": 0.3081989288330078, "learning_rate": 0.001, "loss": 1.536, "step": 3880 }, { "epoch": 0.12404732293759367, "grad_norm": 0.30181053280830383, "learning_rate": 0.001, "loss": 1.5566, "step": 3890 }, { "epoch": 0.1243662106572276, "grad_norm": 0.3038397431373596, "learning_rate": 0.001, "loss": 1.5501, "step": 3900 }, { "epoch": 0.12468509837686151, "grad_norm": 0.2994053363800049, "learning_rate": 0.001, "loss": 1.5416, "step": 3910 }, { "epoch": 0.12500398609649543, "grad_norm": 0.29628464579582214, "learning_rate": 0.001, "loss": 1.5511, "step": 3920 }, { "epoch": 0.12532287381612933, "grad_norm": 0.2959844768047333, "learning_rate": 0.001, "loss": 1.5207, "step": 3930 }, { "epoch": 0.12564176153576326, "grad_norm": 0.294956773519516, "learning_rate": 0.001, "loss": 1.5164, "step": 3940 }, { "epoch": 0.12596064925539718, "grad_norm": 0.28598493337631226, "learning_rate": 0.001, "loss": 1.5311, "step": 3950 }, { "epoch": 0.12627953697503108, "grad_norm": 0.2978171706199646, "learning_rate": 0.001, "loss": 1.5241, "step": 3960 }, { "epoch": 0.126598424694665, "grad_norm": 0.2851006090641022, "learning_rate": 0.001, "loss": 1.5414, "step": 3970 }, { "epoch": 0.12691731241429893, "grad_norm": 0.2982429265975952, "learning_rate": 0.001, "loss": 1.5179, "step": 3980 }, { "epoch": 0.12723620013393283, "grad_norm": 0.28805026412010193, "learning_rate": 0.001, "loss": 1.5252, "step": 3990 }, { "epoch": 0.12755508785356676, "grad_norm": 0.3060665726661682, "learning_rate": 0.001, "loss": 1.5211, "step": 4000 }, { "epoch": 0.12787397557320068, "grad_norm": 0.2944425642490387, "learning_rate": 0.001, "loss": 1.5364, "step": 4010 }, { "epoch": 0.12819286329283458, "grad_norm": 0.29006776213645935, "learning_rate": 0.001, "loss": 1.5241, "step": 4020 }, { "epoch": 0.1285117510124685, "grad_norm": 0.29584676027297974, "learning_rate": 0.001, "loss": 1.5295, "step": 4030 }, { "epoch": 0.12883063873210243, "grad_norm": 0.28195831179618835, "learning_rate": 0.001, "loss": 1.525, "step": 4040 }, { "epoch": 0.12914952645173636, "grad_norm": 0.2912329137325287, "learning_rate": 0.001, "loss": 1.5306, "step": 4050 }, { "epoch": 0.12946841417137026, "grad_norm": 0.28421998023986816, "learning_rate": 0.001, "loss": 1.5337, "step": 4060 }, { "epoch": 0.12978730189100418, "grad_norm": 0.2915787994861603, "learning_rate": 0.001, "loss": 1.5267, "step": 4070 }, { "epoch": 0.1301061896106381, "grad_norm": 0.28192031383514404, "learning_rate": 0.001, "loss": 1.5003, "step": 4080 }, { "epoch": 0.130425077330272, "grad_norm": 0.28582581877708435, "learning_rate": 0.001, "loss": 1.518, "step": 4090 }, { "epoch": 0.13074396504990593, "grad_norm": 0.28264012932777405, "learning_rate": 0.001, "loss": 1.5238, "step": 4100 }, { "epoch": 0.13106285276953986, "grad_norm": 0.2747161388397217, "learning_rate": 0.001, "loss": 1.5093, "step": 4110 }, { "epoch": 0.13138174048917375, "grad_norm": 0.2909848690032959, "learning_rate": 0.001, "loss": 1.4931, "step": 4120 }, { "epoch": 0.13170062820880768, "grad_norm": 0.2762302756309509, "learning_rate": 0.001, "loss": 1.5131, "step": 4130 }, { "epoch": 0.1320195159284416, "grad_norm": 0.29010942578315735, "learning_rate": 0.001, "loss": 1.5016, "step": 4140 }, { "epoch": 0.1323384036480755, "grad_norm": 0.28639352321624756, "learning_rate": 0.001, "loss": 1.4924, "step": 4150 }, { "epoch": 0.13265729136770943, "grad_norm": 0.27551382780075073, "learning_rate": 0.001, "loss": 1.4986, "step": 4160 }, { "epoch": 0.13297617908734335, "grad_norm": 0.2695043087005615, "learning_rate": 0.001, "loss": 1.5002, "step": 4170 }, { "epoch": 0.13329506680697725, "grad_norm": 0.287056028842926, "learning_rate": 0.001, "loss": 1.4837, "step": 4180 }, { "epoch": 0.13361395452661118, "grad_norm": 0.27863118052482605, "learning_rate": 0.001, "loss": 1.4895, "step": 4190 }, { "epoch": 0.1339328422462451, "grad_norm": 0.2812846004962921, "learning_rate": 0.001, "loss": 1.4992, "step": 4200 }, { "epoch": 0.134251729965879, "grad_norm": 0.27025651931762695, "learning_rate": 0.001, "loss": 1.4919, "step": 4210 }, { "epoch": 0.13457061768551293, "grad_norm": 0.26951390504837036, "learning_rate": 0.001, "loss": 1.4928, "step": 4220 }, { "epoch": 0.13488950540514685, "grad_norm": 0.27416202425956726, "learning_rate": 0.001, "loss": 1.4852, "step": 4230 }, { "epoch": 0.13520839312478075, "grad_norm": 0.27375853061676025, "learning_rate": 0.001, "loss": 1.5213, "step": 4240 }, { "epoch": 0.13552728084441468, "grad_norm": 0.26884832978248596, "learning_rate": 0.001, "loss": 1.4888, "step": 4250 }, { "epoch": 0.1358461685640486, "grad_norm": 0.27149176597595215, "learning_rate": 0.001, "loss": 1.5023, "step": 4260 }, { "epoch": 0.13616505628368253, "grad_norm": 0.27007535099983215, "learning_rate": 0.001, "loss": 1.4405, "step": 4270 }, { "epoch": 0.13648394400331643, "grad_norm": 0.26930558681488037, "learning_rate": 0.001, "loss": 1.4861, "step": 4280 }, { "epoch": 0.13680283172295035, "grad_norm": 0.2684692144393921, "learning_rate": 0.001, "loss": 1.4629, "step": 4290 }, { "epoch": 0.13712171944258428, "grad_norm": 0.2631552815437317, "learning_rate": 0.001, "loss": 1.4726, "step": 4300 }, { "epoch": 0.13744060716221818, "grad_norm": 0.2739587426185608, "learning_rate": 0.001, "loss": 1.488, "step": 4310 }, { "epoch": 0.1377594948818521, "grad_norm": 0.2854085862636566, "learning_rate": 0.001, "loss": 1.4637, "step": 4320 }, { "epoch": 0.13807838260148603, "grad_norm": 0.2662985622882843, "learning_rate": 0.001, "loss": 1.4772, "step": 4330 }, { "epoch": 0.13839727032111993, "grad_norm": 0.27411964535713196, "learning_rate": 0.001, "loss": 1.4772, "step": 4340 }, { "epoch": 0.13871615804075385, "grad_norm": 0.2799747884273529, "learning_rate": 0.001, "loss": 1.4814, "step": 4350 }, { "epoch": 0.13903504576038778, "grad_norm": 0.266664057970047, "learning_rate": 0.001, "loss": 1.4773, "step": 4360 }, { "epoch": 0.13935393348002167, "grad_norm": 0.26244714856147766, "learning_rate": 0.001, "loss": 1.4922, "step": 4370 }, { "epoch": 0.1396728211996556, "grad_norm": 0.268928200006485, "learning_rate": 0.001, "loss": 1.4656, "step": 4380 }, { "epoch": 0.13999170891928953, "grad_norm": 0.26305675506591797, "learning_rate": 0.001, "loss": 1.4728, "step": 4390 }, { "epoch": 0.14031059663892342, "grad_norm": 0.26536253094673157, "learning_rate": 0.001, "loss": 1.4829, "step": 4400 }, { "epoch": 0.14062948435855735, "grad_norm": 0.2680259048938751, "learning_rate": 0.001, "loss": 1.4879, "step": 4410 }, { "epoch": 0.14094837207819128, "grad_norm": 0.2655434012413025, "learning_rate": 0.001, "loss": 1.4493, "step": 4420 }, { "epoch": 0.14126725979782517, "grad_norm": 0.2688651978969574, "learning_rate": 0.001, "loss": 1.4648, "step": 4430 }, { "epoch": 0.1415861475174591, "grad_norm": 0.2561871409416199, "learning_rate": 0.001, "loss": 1.4479, "step": 4440 }, { "epoch": 0.14190503523709302, "grad_norm": 0.2676304876804352, "learning_rate": 0.001, "loss": 1.4353, "step": 4450 }, { "epoch": 0.14222392295672695, "grad_norm": 0.26640716195106506, "learning_rate": 0.001, "loss": 1.4368, "step": 4460 }, { "epoch": 0.14254281067636085, "grad_norm": 0.26717209815979004, "learning_rate": 0.001, "loss": 1.4525, "step": 4470 }, { "epoch": 0.14286169839599477, "grad_norm": 0.253572016954422, "learning_rate": 0.001, "loss": 1.4607, "step": 4480 }, { "epoch": 0.1431805861156287, "grad_norm": 0.2572033107280731, "learning_rate": 0.001, "loss": 1.4488, "step": 4490 }, { "epoch": 0.1434994738352626, "grad_norm": 0.2625178098678589, "learning_rate": 0.001, "loss": 1.4813, "step": 4500 }, { "epoch": 0.14381836155489652, "grad_norm": 0.26461777091026306, "learning_rate": 0.001, "loss": 1.4848, "step": 4510 }, { "epoch": 0.14413724927453045, "grad_norm": 0.2646142840385437, "learning_rate": 0.001, "loss": 1.4555, "step": 4520 }, { "epoch": 0.14445613699416435, "grad_norm": 0.2517695128917694, "learning_rate": 0.001, "loss": 1.4482, "step": 4530 }, { "epoch": 0.14477502471379827, "grad_norm": 0.26152729988098145, "learning_rate": 0.001, "loss": 1.4518, "step": 4540 }, { "epoch": 0.1450939124334322, "grad_norm": 0.25882112979888916, "learning_rate": 0.001, "loss": 1.45, "step": 4550 }, { "epoch": 0.1454128001530661, "grad_norm": 0.25552093982696533, "learning_rate": 0.001, "loss": 1.4729, "step": 4560 }, { "epoch": 0.14573168787270002, "grad_norm": 0.25193673372268677, "learning_rate": 0.001, "loss": 1.4483, "step": 4570 }, { "epoch": 0.14605057559233395, "grad_norm": 0.24570788443088531, "learning_rate": 0.001, "loss": 1.435, "step": 4580 }, { "epoch": 0.14636946331196785, "grad_norm": 0.2533837854862213, "learning_rate": 0.001, "loss": 1.4343, "step": 4590 }, { "epoch": 0.14668835103160177, "grad_norm": 0.25800344347953796, "learning_rate": 0.001, "loss": 1.4592, "step": 4600 }, { "epoch": 0.1470072387512357, "grad_norm": 0.2562967836856842, "learning_rate": 0.001, "loss": 1.4477, "step": 4610 }, { "epoch": 0.1473261264708696, "grad_norm": 0.2644691467285156, "learning_rate": 0.001, "loss": 1.452, "step": 4620 }, { "epoch": 0.14764501419050352, "grad_norm": 0.2588248550891876, "learning_rate": 0.001, "loss": 1.4492, "step": 4630 }, { "epoch": 0.14796390191013745, "grad_norm": 0.24891693890094757, "learning_rate": 0.001, "loss": 1.4514, "step": 4640 }, { "epoch": 0.14828278962977134, "grad_norm": 0.2602750360965729, "learning_rate": 0.001, "loss": 1.4502, "step": 4650 }, { "epoch": 0.14860167734940527, "grad_norm": 0.2690906226634979, "learning_rate": 0.001, "loss": 1.447, "step": 4660 }, { "epoch": 0.1489205650690392, "grad_norm": 0.2503431439399719, "learning_rate": 0.001, "loss": 1.4279, "step": 4670 }, { "epoch": 0.14923945278867312, "grad_norm": 0.2497878074645996, "learning_rate": 0.001, "loss": 1.4413, "step": 4680 }, { "epoch": 0.14955834050830702, "grad_norm": 0.25570768117904663, "learning_rate": 0.001, "loss": 1.4402, "step": 4690 }, { "epoch": 0.14987722822794095, "grad_norm": 0.2501462697982788, "learning_rate": 0.001, "loss": 1.4119, "step": 4700 }, { "epoch": 0.15019611594757487, "grad_norm": 0.2574600577354431, "learning_rate": 0.001, "loss": 1.4297, "step": 4710 }, { "epoch": 0.15051500366720877, "grad_norm": 0.26934248208999634, "learning_rate": 0.001, "loss": 1.4183, "step": 4720 }, { "epoch": 0.1508338913868427, "grad_norm": 0.24917541444301605, "learning_rate": 0.001, "loss": 1.4301, "step": 4730 }, { "epoch": 0.15115277910647662, "grad_norm": 0.25259947776794434, "learning_rate": 0.001, "loss": 1.4254, "step": 4740 }, { "epoch": 0.15147166682611052, "grad_norm": 0.2534835636615753, "learning_rate": 0.001, "loss": 1.413, "step": 4750 }, { "epoch": 0.15179055454574444, "grad_norm": 0.237448051571846, "learning_rate": 0.001, "loss": 1.4096, "step": 4760 }, { "epoch": 0.15210944226537837, "grad_norm": 0.2515486478805542, "learning_rate": 0.001, "loss": 1.4226, "step": 4770 }, { "epoch": 0.15242832998501227, "grad_norm": 0.24898935854434967, "learning_rate": 0.001, "loss": 1.4077, "step": 4780 }, { "epoch": 0.1527472177046462, "grad_norm": 0.2587830424308777, "learning_rate": 0.001, "loss": 1.4354, "step": 4790 }, { "epoch": 0.15306610542428012, "grad_norm": 0.2498152256011963, "learning_rate": 0.001, "loss": 1.4091, "step": 4800 }, { "epoch": 0.15338499314391402, "grad_norm": 0.2435469776391983, "learning_rate": 0.001, "loss": 1.4354, "step": 4810 }, { "epoch": 0.15370388086354794, "grad_norm": 0.2446700930595398, "learning_rate": 0.001, "loss": 1.4227, "step": 4820 }, { "epoch": 0.15402276858318187, "grad_norm": 0.23855316638946533, "learning_rate": 0.001, "loss": 1.4085, "step": 4830 }, { "epoch": 0.15434165630281577, "grad_norm": 0.23332339525222778, "learning_rate": 0.001, "loss": 1.425, "step": 4840 }, { "epoch": 0.1546605440224497, "grad_norm": 0.25121748447418213, "learning_rate": 0.001, "loss": 1.4188, "step": 4850 }, { "epoch": 0.15497943174208362, "grad_norm": 0.23875190317630768, "learning_rate": 0.001, "loss": 1.4166, "step": 4860 }, { "epoch": 0.15529831946171752, "grad_norm": 0.2450498342514038, "learning_rate": 0.001, "loss": 1.4002, "step": 4870 }, { "epoch": 0.15561720718135144, "grad_norm": 0.24624621868133545, "learning_rate": 0.001, "loss": 1.3945, "step": 4880 }, { "epoch": 0.15593609490098537, "grad_norm": 0.24817205965518951, "learning_rate": 0.001, "loss": 1.4173, "step": 4890 }, { "epoch": 0.1562549826206193, "grad_norm": 0.2515687346458435, "learning_rate": 0.001, "loss": 1.4208, "step": 4900 }, { "epoch": 0.1565738703402532, "grad_norm": 0.2485036849975586, "learning_rate": 0.001, "loss": 1.4264, "step": 4910 }, { "epoch": 0.15689275805988712, "grad_norm": 0.24037910997867584, "learning_rate": 0.001, "loss": 1.411, "step": 4920 }, { "epoch": 0.15721164577952104, "grad_norm": 0.2485659420490265, "learning_rate": 0.001, "loss": 1.4192, "step": 4930 }, { "epoch": 0.15753053349915494, "grad_norm": 0.23820529878139496, "learning_rate": 0.001, "loss": 1.3934, "step": 4940 }, { "epoch": 0.15784942121878887, "grad_norm": 0.2456134855747223, "learning_rate": 0.001, "loss": 1.3899, "step": 4950 }, { "epoch": 0.1581683089384228, "grad_norm": 0.24716608226299286, "learning_rate": 0.001, "loss": 1.4188, "step": 4960 }, { "epoch": 0.1584871966580567, "grad_norm": 0.23966428637504578, "learning_rate": 0.001, "loss": 1.413, "step": 4970 }, { "epoch": 0.15880608437769062, "grad_norm": 0.2461225539445877, "learning_rate": 0.001, "loss": 1.3964, "step": 4980 }, { "epoch": 0.15912497209732454, "grad_norm": 0.2454925775527954, "learning_rate": 0.001, "loss": 1.3956, "step": 4990 }, { "epoch": 0.15944385981695844, "grad_norm": 0.24597546458244324, "learning_rate": 0.001, "loss": 1.3761, "step": 5000 }, { "epoch": 0.15976274753659236, "grad_norm": 0.24570181965827942, "learning_rate": 0.001, "loss": 1.4079, "step": 5010 }, { "epoch": 0.1600816352562263, "grad_norm": 0.23773568868637085, "learning_rate": 0.001, "loss": 1.3889, "step": 5020 }, { "epoch": 0.1604005229758602, "grad_norm": 0.2613687217235565, "learning_rate": 0.001, "loss": 1.428, "step": 5030 }, { "epoch": 0.1607194106954941, "grad_norm": 0.24893403053283691, "learning_rate": 0.001, "loss": 1.4064, "step": 5040 }, { "epoch": 0.16103829841512804, "grad_norm": 0.24659830331802368, "learning_rate": 0.001, "loss": 1.3935, "step": 5050 }, { "epoch": 0.16135718613476194, "grad_norm": 0.25039970874786377, "learning_rate": 0.001, "loss": 1.4005, "step": 5060 }, { "epoch": 0.16167607385439586, "grad_norm": 0.23986603319644928, "learning_rate": 0.001, "loss": 1.3882, "step": 5070 }, { "epoch": 0.1619949615740298, "grad_norm": 0.2418118715286255, "learning_rate": 0.001, "loss": 1.3963, "step": 5080 }, { "epoch": 0.16231384929366371, "grad_norm": 0.23808790743350983, "learning_rate": 0.001, "loss": 1.4192, "step": 5090 }, { "epoch": 0.1626327370132976, "grad_norm": 0.2416272610425949, "learning_rate": 0.001, "loss": 1.4061, "step": 5100 }, { "epoch": 0.16295162473293154, "grad_norm": 0.24399322271347046, "learning_rate": 0.001, "loss": 1.3958, "step": 5110 }, { "epoch": 0.16327051245256546, "grad_norm": 0.23533989489078522, "learning_rate": 0.001, "loss": 1.3766, "step": 5120 }, { "epoch": 0.16358940017219936, "grad_norm": 0.23567648231983185, "learning_rate": 0.001, "loss": 1.394, "step": 5130 }, { "epoch": 0.1639082878918333, "grad_norm": 0.23541750013828278, "learning_rate": 0.001, "loss": 1.3877, "step": 5140 }, { "epoch": 0.1642271756114672, "grad_norm": 0.2420421689748764, "learning_rate": 0.001, "loss": 1.3941, "step": 5150 }, { "epoch": 0.1645460633311011, "grad_norm": 0.24234576523303986, "learning_rate": 0.001, "loss": 1.3893, "step": 5160 }, { "epoch": 0.16486495105073504, "grad_norm": 0.23756305873394012, "learning_rate": 0.001, "loss": 1.3961, "step": 5170 }, { "epoch": 0.16518383877036896, "grad_norm": 0.23691706359386444, "learning_rate": 0.001, "loss": 1.4036, "step": 5180 }, { "epoch": 0.16550272649000286, "grad_norm": 0.2374681830406189, "learning_rate": 0.001, "loss": 1.4124, "step": 5190 }, { "epoch": 0.1658216142096368, "grad_norm": 0.2333029806613922, "learning_rate": 0.001, "loss": 1.373, "step": 5200 }, { "epoch": 0.1661405019292707, "grad_norm": 0.23446527123451233, "learning_rate": 0.001, "loss": 1.3737, "step": 5210 }, { "epoch": 0.1664593896489046, "grad_norm": 0.24269339442253113, "learning_rate": 0.001, "loss": 1.3741, "step": 5220 }, { "epoch": 0.16677827736853854, "grad_norm": 0.25331172347068787, "learning_rate": 0.001, "loss": 1.3733, "step": 5230 }, { "epoch": 0.16709716508817246, "grad_norm": 0.24028655886650085, "learning_rate": 0.001, "loss": 1.3879, "step": 5240 }, { "epoch": 0.16741605280780636, "grad_norm": 0.2346467673778534, "learning_rate": 0.001, "loss": 1.3834, "step": 5250 }, { "epoch": 0.16773494052744028, "grad_norm": 0.2357403039932251, "learning_rate": 0.001, "loss": 1.3681, "step": 5260 }, { "epoch": 0.1680538282470742, "grad_norm": 0.2432994395494461, "learning_rate": 0.001, "loss": 1.3637, "step": 5270 }, { "epoch": 0.1683727159667081, "grad_norm": 0.23171168565750122, "learning_rate": 0.001, "loss": 1.3785, "step": 5280 }, { "epoch": 0.16869160368634203, "grad_norm": 0.2409990429878235, "learning_rate": 0.001, "loss": 1.3765, "step": 5290 }, { "epoch": 0.16901049140597596, "grad_norm": 0.23982109129428864, "learning_rate": 0.001, "loss": 1.3738, "step": 5300 }, { "epoch": 0.16932937912560989, "grad_norm": 0.23728245496749878, "learning_rate": 0.001, "loss": 1.3802, "step": 5310 }, { "epoch": 0.16964826684524378, "grad_norm": 0.23985517024993896, "learning_rate": 0.001, "loss": 1.3708, "step": 5320 }, { "epoch": 0.1699671545648777, "grad_norm": 0.2339295744895935, "learning_rate": 0.001, "loss": 1.3887, "step": 5330 }, { "epoch": 0.17028604228451164, "grad_norm": 0.23314034938812256, "learning_rate": 0.001, "loss": 1.3959, "step": 5340 }, { "epoch": 0.17060493000414553, "grad_norm": 0.2369535267353058, "learning_rate": 0.001, "loss": 1.3737, "step": 5350 }, { "epoch": 0.17092381772377946, "grad_norm": 0.2344839721918106, "learning_rate": 0.001, "loss": 1.3763, "step": 5360 }, { "epoch": 0.17124270544341338, "grad_norm": 0.23449836671352386, "learning_rate": 0.001, "loss": 1.3603, "step": 5370 }, { "epoch": 0.17156159316304728, "grad_norm": 0.2421630322933197, "learning_rate": 0.001, "loss": 1.3503, "step": 5380 }, { "epoch": 0.1718804808826812, "grad_norm": 0.23793990910053253, "learning_rate": 0.001, "loss": 1.3711, "step": 5390 }, { "epoch": 0.17219936860231513, "grad_norm": 0.24048961699008942, "learning_rate": 0.001, "loss": 1.398, "step": 5400 }, { "epoch": 0.17251825632194903, "grad_norm": 0.22494950890541077, "learning_rate": 0.001, "loss": 1.3571, "step": 5410 }, { "epoch": 0.17283714404158296, "grad_norm": 0.22547166049480438, "learning_rate": 0.001, "loss": 1.3788, "step": 5420 }, { "epoch": 0.17315603176121688, "grad_norm": 0.23615238070487976, "learning_rate": 0.001, "loss": 1.37, "step": 5430 }, { "epoch": 0.17347491948085078, "grad_norm": 0.23652249574661255, "learning_rate": 0.001, "loss": 1.3757, "step": 5440 }, { "epoch": 0.1737938072004847, "grad_norm": 0.24254925549030304, "learning_rate": 0.001, "loss": 1.3745, "step": 5450 }, { "epoch": 0.17411269492011863, "grad_norm": 0.2400273233652115, "learning_rate": 0.001, "loss": 1.3562, "step": 5460 }, { "epoch": 0.17443158263975253, "grad_norm": 0.23082616925239563, "learning_rate": 0.001, "loss": 1.3822, "step": 5470 }, { "epoch": 0.17475047035938646, "grad_norm": 0.2507484257221222, "learning_rate": 0.001, "loss": 1.3813, "step": 5480 }, { "epoch": 0.17506935807902038, "grad_norm": 0.23675039410591125, "learning_rate": 0.001, "loss": 1.3426, "step": 5490 }, { "epoch": 0.1753882457986543, "grad_norm": 0.2322949916124344, "learning_rate": 0.001, "loss": 1.3429, "step": 5500 }, { "epoch": 0.1757071335182882, "grad_norm": 0.23295463621616364, "learning_rate": 0.001, "loss": 1.3746, "step": 5510 }, { "epoch": 0.17602602123792213, "grad_norm": 0.2300776094198227, "learning_rate": 0.001, "loss": 1.3767, "step": 5520 }, { "epoch": 0.17634490895755606, "grad_norm": 0.23304656147956848, "learning_rate": 0.001, "loss": 1.3554, "step": 5530 }, { "epoch": 0.17666379667718995, "grad_norm": 0.23406460881233215, "learning_rate": 0.001, "loss": 1.3605, "step": 5540 }, { "epoch": 0.17698268439682388, "grad_norm": 0.2351730912923813, "learning_rate": 0.001, "loss": 1.3747, "step": 5550 }, { "epoch": 0.1773015721164578, "grad_norm": 0.2326095998287201, "learning_rate": 0.001, "loss": 1.3662, "step": 5560 }, { "epoch": 0.1776204598360917, "grad_norm": 0.23386625945568085, "learning_rate": 0.001, "loss": 1.3551, "step": 5570 }, { "epoch": 0.17793934755572563, "grad_norm": 0.2271582931280136, "learning_rate": 0.001, "loss": 1.3441, "step": 5580 }, { "epoch": 0.17825823527535956, "grad_norm": 0.23613621294498444, "learning_rate": 0.001, "loss": 1.3816, "step": 5590 }, { "epoch": 0.17857712299499345, "grad_norm": 0.23003219068050385, "learning_rate": 0.001, "loss": 1.3676, "step": 5600 }, { "epoch": 0.17889601071462738, "grad_norm": 0.23304754495620728, "learning_rate": 0.001, "loss": 1.365, "step": 5610 }, { "epoch": 0.1792148984342613, "grad_norm": 0.23864856362342834, "learning_rate": 0.001, "loss": 1.3803, "step": 5620 }, { "epoch": 0.1795337861538952, "grad_norm": 0.23164138197898865, "learning_rate": 0.001, "loss": 1.3666, "step": 5630 }, { "epoch": 0.17985267387352913, "grad_norm": 0.22914890944957733, "learning_rate": 0.001, "loss": 1.3537, "step": 5640 }, { "epoch": 0.18017156159316305, "grad_norm": 0.23517832159996033, "learning_rate": 0.001, "loss": 1.3742, "step": 5650 }, { "epoch": 0.18049044931279695, "grad_norm": 0.23220737278461456, "learning_rate": 0.001, "loss": 1.3631, "step": 5660 }, { "epoch": 0.18080933703243088, "grad_norm": 0.23347978293895721, "learning_rate": 0.001, "loss": 1.3663, "step": 5670 }, { "epoch": 0.1811282247520648, "grad_norm": 0.2269410789012909, "learning_rate": 0.001, "loss": 1.3452, "step": 5680 }, { "epoch": 0.1814471124716987, "grad_norm": 0.23430763185024261, "learning_rate": 0.001, "loss": 1.3517, "step": 5690 }, { "epoch": 0.18176600019133263, "grad_norm": 0.22661398351192474, "learning_rate": 0.001, "loss": 1.357, "step": 5700 }, { "epoch": 0.18208488791096655, "grad_norm": 0.23189307749271393, "learning_rate": 0.001, "loss": 1.3619, "step": 5710 }, { "epoch": 0.18240377563060048, "grad_norm": 0.22509999573230743, "learning_rate": 0.001, "loss": 1.3737, "step": 5720 }, { "epoch": 0.18272266335023438, "grad_norm": 0.23490670323371887, "learning_rate": 0.001, "loss": 1.3397, "step": 5730 }, { "epoch": 0.1830415510698683, "grad_norm": 0.22804059088230133, "learning_rate": 0.001, "loss": 1.3344, "step": 5740 }, { "epoch": 0.18336043878950223, "grad_norm": 0.22229887545108795, "learning_rate": 0.001, "loss": 1.3693, "step": 5750 }, { "epoch": 0.18367932650913613, "grad_norm": 0.23096612095832825, "learning_rate": 0.001, "loss": 1.3536, "step": 5760 }, { "epoch": 0.18399821422877005, "grad_norm": 0.23599880933761597, "learning_rate": 0.001, "loss": 1.3496, "step": 5770 }, { "epoch": 0.18431710194840398, "grad_norm": 0.22269390523433685, "learning_rate": 0.001, "loss": 1.3378, "step": 5780 }, { "epoch": 0.18463598966803788, "grad_norm": 0.22350311279296875, "learning_rate": 0.001, "loss": 1.34, "step": 5790 }, { "epoch": 0.1849548773876718, "grad_norm": 0.23092792928218842, "learning_rate": 0.001, "loss": 1.3526, "step": 5800 }, { "epoch": 0.18527376510730573, "grad_norm": 0.22100470960140228, "learning_rate": 0.001, "loss": 1.3526, "step": 5810 }, { "epoch": 0.18559265282693962, "grad_norm": 0.2348584532737732, "learning_rate": 0.001, "loss": 1.3433, "step": 5820 }, { "epoch": 0.18591154054657355, "grad_norm": 0.22047287225723267, "learning_rate": 0.001, "loss": 1.3441, "step": 5830 }, { "epoch": 0.18623042826620748, "grad_norm": 0.22145791351795197, "learning_rate": 0.001, "loss": 1.3441, "step": 5840 }, { "epoch": 0.18654931598584137, "grad_norm": 0.22203807532787323, "learning_rate": 0.001, "loss": 1.3493, "step": 5850 }, { "epoch": 0.1868682037054753, "grad_norm": 0.22726795077323914, "learning_rate": 0.001, "loss": 1.3482, "step": 5860 }, { "epoch": 0.18718709142510923, "grad_norm": 0.22857330739498138, "learning_rate": 0.001, "loss": 1.34, "step": 5870 }, { "epoch": 0.18750597914474312, "grad_norm": 0.22861173748970032, "learning_rate": 0.001, "loss": 1.3598, "step": 5880 }, { "epoch": 0.18782486686437705, "grad_norm": 0.22939689457416534, "learning_rate": 0.001, "loss": 1.3443, "step": 5890 }, { "epoch": 0.18814375458401097, "grad_norm": 0.23213569819927216, "learning_rate": 0.001, "loss": 1.3412, "step": 5900 }, { "epoch": 0.18846264230364487, "grad_norm": 0.2435399442911148, "learning_rate": 0.001, "loss": 1.3379, "step": 5910 }, { "epoch": 0.1887815300232788, "grad_norm": 0.22397400438785553, "learning_rate": 0.001, "loss": 1.3425, "step": 5920 }, { "epoch": 0.18910041774291272, "grad_norm": 0.23101122677326202, "learning_rate": 0.001, "loss": 1.341, "step": 5930 }, { "epoch": 0.18941930546254665, "grad_norm": 0.22613437473773956, "learning_rate": 0.001, "loss": 1.3503, "step": 5940 }, { "epoch": 0.18973819318218055, "grad_norm": 0.23326362669467926, "learning_rate": 0.001, "loss": 1.3355, "step": 5950 }, { "epoch": 0.19005708090181447, "grad_norm": 0.22755029797554016, "learning_rate": 0.001, "loss": 1.3639, "step": 5960 }, { "epoch": 0.1903759686214484, "grad_norm": 0.23369061946868896, "learning_rate": 0.001, "loss": 1.3495, "step": 5970 }, { "epoch": 0.1906948563410823, "grad_norm": 0.2289803922176361, "learning_rate": 0.001, "loss": 1.3372, "step": 5980 }, { "epoch": 0.19101374406071622, "grad_norm": 0.22431151568889618, "learning_rate": 0.001, "loss": 1.3442, "step": 5990 }, { "epoch": 0.19133263178035015, "grad_norm": 0.22803178429603577, "learning_rate": 0.001, "loss": 1.325, "step": 6000 }, { "epoch": 0.19165151949998405, "grad_norm": 0.2185739278793335, "learning_rate": 0.001, "loss": 1.3482, "step": 6010 }, { "epoch": 0.19197040721961797, "grad_norm": 0.2274511605501175, "learning_rate": 0.001, "loss": 1.3283, "step": 6020 }, { "epoch": 0.1922892949392519, "grad_norm": 0.22346645593643188, "learning_rate": 0.001, "loss": 1.3133, "step": 6030 }, { "epoch": 0.1926081826588858, "grad_norm": 0.21699689328670502, "learning_rate": 0.001, "loss": 1.3313, "step": 6040 }, { "epoch": 0.19292707037851972, "grad_norm": 0.22448177635669708, "learning_rate": 0.001, "loss": 1.3327, "step": 6050 }, { "epoch": 0.19324595809815365, "grad_norm": 0.2269693911075592, "learning_rate": 0.001, "loss": 1.325, "step": 6060 }, { "epoch": 0.19356484581778755, "grad_norm": 0.22963395714759827, "learning_rate": 0.001, "loss": 1.3337, "step": 6070 }, { "epoch": 0.19388373353742147, "grad_norm": 0.22270570695400238, "learning_rate": 0.001, "loss": 1.3263, "step": 6080 }, { "epoch": 0.1942026212570554, "grad_norm": 0.22263237833976746, "learning_rate": 0.001, "loss": 1.3291, "step": 6090 }, { "epoch": 0.1945215089766893, "grad_norm": 0.21834489703178406, "learning_rate": 0.001, "loss": 1.3361, "step": 6100 }, { "epoch": 0.19484039669632322, "grad_norm": 0.22711575031280518, "learning_rate": 0.001, "loss": 1.3428, "step": 6110 }, { "epoch": 0.19515928441595715, "grad_norm": 0.23172850906848907, "learning_rate": 0.001, "loss": 1.3353, "step": 6120 }, { "epoch": 0.19547817213559107, "grad_norm": 0.22978724539279938, "learning_rate": 0.001, "loss": 1.3139, "step": 6130 }, { "epoch": 0.19579705985522497, "grad_norm": 0.22300496697425842, "learning_rate": 0.001, "loss": 1.3414, "step": 6140 }, { "epoch": 0.1961159475748589, "grad_norm": 0.22236841917037964, "learning_rate": 0.001, "loss": 1.3035, "step": 6150 }, { "epoch": 0.19643483529449282, "grad_norm": 0.22211438417434692, "learning_rate": 0.001, "loss": 1.3141, "step": 6160 }, { "epoch": 0.19675372301412672, "grad_norm": 0.21687041223049164, "learning_rate": 0.001, "loss": 1.3444, "step": 6170 }, { "epoch": 0.19707261073376064, "grad_norm": 0.2247399538755417, "learning_rate": 0.001, "loss": 1.3274, "step": 6180 }, { "epoch": 0.19739149845339457, "grad_norm": 0.22150497138500214, "learning_rate": 0.001, "loss": 1.3467, "step": 6190 }, { "epoch": 0.19771038617302847, "grad_norm": 0.22610719501972198, "learning_rate": 0.001, "loss": 1.331, "step": 6200 }, { "epoch": 0.1980292738926624, "grad_norm": 0.21927721798419952, "learning_rate": 0.001, "loss": 1.3341, "step": 6210 }, { "epoch": 0.19834816161229632, "grad_norm": 0.22373303771018982, "learning_rate": 0.001, "loss": 1.3066, "step": 6220 }, { "epoch": 0.19866704933193022, "grad_norm": 0.22470548748970032, "learning_rate": 0.001, "loss": 1.3102, "step": 6230 }, { "epoch": 0.19898593705156414, "grad_norm": 0.22980283200740814, "learning_rate": 0.001, "loss": 1.326, "step": 6240 }, { "epoch": 0.19930482477119807, "grad_norm": 0.2177285999059677, "learning_rate": 0.001, "loss": 1.3367, "step": 6250 }, { "epoch": 0.19962371249083197, "grad_norm": 0.23009584844112396, "learning_rate": 0.001, "loss": 1.3157, "step": 6260 }, { "epoch": 0.1999426002104659, "grad_norm": 0.22571367025375366, "learning_rate": 0.001, "loss": 1.3271, "step": 6270 }, { "epoch": 0.20026148793009982, "grad_norm": 0.22114919126033783, "learning_rate": 0.001, "loss": 1.3243, "step": 6280 }, { "epoch": 0.20058037564973372, "grad_norm": 0.22167930006980896, "learning_rate": 0.001, "loss": 1.3355, "step": 6290 }, { "epoch": 0.20089926336936764, "grad_norm": 0.22725333273410797, "learning_rate": 0.001, "loss": 1.3277, "step": 6300 }, { "epoch": 0.20121815108900157, "grad_norm": 0.22026440501213074, "learning_rate": 0.001, "loss": 1.3198, "step": 6310 }, { "epoch": 0.20153703880863547, "grad_norm": 0.22313068807125092, "learning_rate": 0.001, "loss": 1.321, "step": 6320 }, { "epoch": 0.2018559265282694, "grad_norm": 0.22668123245239258, "learning_rate": 0.001, "loss": 1.3187, "step": 6330 }, { "epoch": 0.20217481424790332, "grad_norm": 0.22383753955364227, "learning_rate": 0.001, "loss": 1.3243, "step": 6340 }, { "epoch": 0.20249370196753724, "grad_norm": 0.22911064326763153, "learning_rate": 0.001, "loss": 1.3146, "step": 6350 }, { "epoch": 0.20281258968717114, "grad_norm": 0.21768306195735931, "learning_rate": 0.001, "loss": 1.3181, "step": 6360 }, { "epoch": 0.20313147740680507, "grad_norm": 0.21696394681930542, "learning_rate": 0.001, "loss": 1.3349, "step": 6370 }, { "epoch": 0.203450365126439, "grad_norm": 0.21784517168998718, "learning_rate": 0.001, "loss": 1.3312, "step": 6380 }, { "epoch": 0.2037692528460729, "grad_norm": 0.22547577321529388, "learning_rate": 0.001, "loss": 1.3171, "step": 6390 }, { "epoch": 0.20408814056570682, "grad_norm": 0.22969777882099152, "learning_rate": 0.001, "loss": 1.3022, "step": 6400 }, { "epoch": 0.20440702828534074, "grad_norm": 0.23056884109973907, "learning_rate": 0.001, "loss": 1.3302, "step": 6410 }, { "epoch": 0.20472591600497464, "grad_norm": 0.21913301944732666, "learning_rate": 0.001, "loss": 1.3091, "step": 6420 }, { "epoch": 0.20504480372460857, "grad_norm": 0.2276025414466858, "learning_rate": 0.001, "loss": 1.3374, "step": 6430 }, { "epoch": 0.2053636914442425, "grad_norm": 0.21844200789928436, "learning_rate": 0.001, "loss": 1.3138, "step": 6440 }, { "epoch": 0.2056825791638764, "grad_norm": 0.21239309012889862, "learning_rate": 0.001, "loss": 1.3079, "step": 6450 }, { "epoch": 0.20600146688351031, "grad_norm": 0.21648620069026947, "learning_rate": 0.001, "loss": 1.3069, "step": 6460 }, { "epoch": 0.20632035460314424, "grad_norm": 0.2209746241569519, "learning_rate": 0.001, "loss": 1.3235, "step": 6470 }, { "epoch": 0.20663924232277814, "grad_norm": 0.22533218562602997, "learning_rate": 0.001, "loss": 1.3189, "step": 6480 }, { "epoch": 0.20695813004241206, "grad_norm": 0.22373533248901367, "learning_rate": 0.001, "loss": 1.321, "step": 6490 }, { "epoch": 0.207277017762046, "grad_norm": 0.22977867722511292, "learning_rate": 0.001, "loss": 1.3111, "step": 6500 }, { "epoch": 0.2075959054816799, "grad_norm": 0.2225407063961029, "learning_rate": 0.001, "loss": 1.3111, "step": 6510 }, { "epoch": 0.2079147932013138, "grad_norm": 0.22135379910469055, "learning_rate": 0.001, "loss": 1.3111, "step": 6520 }, { "epoch": 0.20823368092094774, "grad_norm": 0.2236153930425644, "learning_rate": 0.001, "loss": 1.3216, "step": 6530 }, { "epoch": 0.20855256864058166, "grad_norm": 0.21490934491157532, "learning_rate": 0.001, "loss": 1.3331, "step": 6540 }, { "epoch": 0.20887145636021556, "grad_norm": 0.22031015157699585, "learning_rate": 0.001, "loss": 1.3187, "step": 6550 }, { "epoch": 0.2091903440798495, "grad_norm": 0.21435341238975525, "learning_rate": 0.001, "loss": 1.3103, "step": 6560 }, { "epoch": 0.20950923179948341, "grad_norm": 0.21968773007392883, "learning_rate": 0.001, "loss": 1.316, "step": 6570 }, { "epoch": 0.2098281195191173, "grad_norm": 0.21836096048355103, "learning_rate": 0.001, "loss": 1.316, "step": 6580 }, { "epoch": 0.21014700723875124, "grad_norm": 0.21528609097003937, "learning_rate": 0.001, "loss": 1.3201, "step": 6590 }, { "epoch": 0.21046589495838516, "grad_norm": 0.22226691246032715, "learning_rate": 0.001, "loss": 1.2988, "step": 6600 }, { "epoch": 0.21078478267801906, "grad_norm": 0.21093668043613434, "learning_rate": 0.001, "loss": 1.3211, "step": 6610 }, { "epoch": 0.211103670397653, "grad_norm": 0.21473108232021332, "learning_rate": 0.001, "loss": 1.307, "step": 6620 }, { "epoch": 0.2114225581172869, "grad_norm": 0.21987295150756836, "learning_rate": 0.001, "loss": 1.3161, "step": 6630 }, { "epoch": 0.2117414458369208, "grad_norm": 0.2141866534948349, "learning_rate": 0.001, "loss": 1.3273, "step": 6640 }, { "epoch": 0.21206033355655474, "grad_norm": 0.211161270737648, "learning_rate": 0.001, "loss": 1.3012, "step": 6650 }, { "epoch": 0.21237922127618866, "grad_norm": 0.22050651907920837, "learning_rate": 0.001, "loss": 1.3133, "step": 6660 }, { "epoch": 0.21269810899582256, "grad_norm": 0.22973541915416718, "learning_rate": 0.001, "loss": 1.3085, "step": 6670 }, { "epoch": 0.21301699671545649, "grad_norm": 0.21897663176059723, "learning_rate": 0.001, "loss": 1.3053, "step": 6680 }, { "epoch": 0.2133358844350904, "grad_norm": 0.22126558423042297, "learning_rate": 0.001, "loss": 1.3155, "step": 6690 }, { "epoch": 0.2136547721547243, "grad_norm": 0.2211180031299591, "learning_rate": 0.001, "loss": 1.3106, "step": 6700 }, { "epoch": 0.21397365987435824, "grad_norm": 0.2270718365907669, "learning_rate": 0.001, "loss": 1.2997, "step": 6710 }, { "epoch": 0.21429254759399216, "grad_norm": 0.22596475481987, "learning_rate": 0.001, "loss": 1.3124, "step": 6720 }, { "epoch": 0.21461143531362606, "grad_norm": 0.21958962082862854, "learning_rate": 0.001, "loss": 1.3033, "step": 6730 }, { "epoch": 0.21493032303325998, "grad_norm": 0.21273846924304962, "learning_rate": 0.001, "loss": 1.3066, "step": 6740 }, { "epoch": 0.2152492107528939, "grad_norm": 0.21829353272914886, "learning_rate": 0.001, "loss": 1.321, "step": 6750 }, { "epoch": 0.21556809847252784, "grad_norm": 0.21527628600597382, "learning_rate": 0.001, "loss": 1.3192, "step": 6760 }, { "epoch": 0.21588698619216173, "grad_norm": 0.2100159376859665, "learning_rate": 0.001, "loss": 1.301, "step": 6770 }, { "epoch": 0.21620587391179566, "grad_norm": 0.2193724364042282, "learning_rate": 0.001, "loss": 1.2945, "step": 6780 }, { "epoch": 0.21652476163142959, "grad_norm": 0.22077114880084991, "learning_rate": 0.001, "loss": 1.2939, "step": 6790 }, { "epoch": 0.21684364935106348, "grad_norm": 0.2293231189250946, "learning_rate": 0.001, "loss": 1.2818, "step": 6800 }, { "epoch": 0.2171625370706974, "grad_norm": 0.21325518190860748, "learning_rate": 0.001, "loss": 1.3176, "step": 6810 }, { "epoch": 0.21748142479033133, "grad_norm": 0.2264716476202011, "learning_rate": 0.001, "loss": 1.3017, "step": 6820 }, { "epoch": 0.21780031250996523, "grad_norm": 0.21110965311527252, "learning_rate": 0.001, "loss": 1.3018, "step": 6830 }, { "epoch": 0.21811920022959916, "grad_norm": 0.21436037123203278, "learning_rate": 0.001, "loss": 1.2995, "step": 6840 }, { "epoch": 0.21843808794923308, "grad_norm": 0.2117929756641388, "learning_rate": 0.001, "loss": 1.282, "step": 6850 }, { "epoch": 0.21875697566886698, "grad_norm": 0.21568013727664948, "learning_rate": 0.001, "loss": 1.2839, "step": 6860 }, { "epoch": 0.2190758633885009, "grad_norm": 0.21337729692459106, "learning_rate": 0.001, "loss": 1.2981, "step": 6870 }, { "epoch": 0.21939475110813483, "grad_norm": 0.21572987735271454, "learning_rate": 0.001, "loss": 1.3014, "step": 6880 }, { "epoch": 0.21971363882776873, "grad_norm": 0.2122887820005417, "learning_rate": 0.001, "loss": 1.2972, "step": 6890 }, { "epoch": 0.22003252654740266, "grad_norm": 0.21946419775485992, "learning_rate": 0.001, "loss": 1.3096, "step": 6900 }, { "epoch": 0.22035141426703658, "grad_norm": 0.21935173869132996, "learning_rate": 0.001, "loss": 1.3027, "step": 6910 }, { "epoch": 0.22067030198667048, "grad_norm": 0.2146780639886856, "learning_rate": 0.001, "loss": 1.3165, "step": 6920 }, { "epoch": 0.2209891897063044, "grad_norm": 0.21132074296474457, "learning_rate": 0.001, "loss": 1.3073, "step": 6930 }, { "epoch": 0.22130807742593833, "grad_norm": 0.21391667425632477, "learning_rate": 0.001, "loss": 1.2909, "step": 6940 }, { "epoch": 0.22162696514557226, "grad_norm": 0.21686720848083496, "learning_rate": 0.001, "loss": 1.2877, "step": 6950 }, { "epoch": 0.22194585286520616, "grad_norm": 0.2125672698020935, "learning_rate": 0.001, "loss": 1.2829, "step": 6960 }, { "epoch": 0.22226474058484008, "grad_norm": 0.20903590321540833, "learning_rate": 0.001, "loss": 1.2921, "step": 6970 }, { "epoch": 0.222583628304474, "grad_norm": 0.21638572216033936, "learning_rate": 0.001, "loss": 1.2918, "step": 6980 }, { "epoch": 0.2229025160241079, "grad_norm": 0.2114180624485016, "learning_rate": 0.001, "loss": 1.3036, "step": 6990 }, { "epoch": 0.22322140374374183, "grad_norm": 0.21430033445358276, "learning_rate": 0.001, "loss": 1.3194, "step": 7000 }, { "epoch": 0.22354029146337576, "grad_norm": 0.21226227283477783, "learning_rate": 0.001, "loss": 1.2825, "step": 7010 }, { "epoch": 0.22385917918300965, "grad_norm": 0.21256470680236816, "learning_rate": 0.001, "loss": 1.3053, "step": 7020 }, { "epoch": 0.22417806690264358, "grad_norm": 0.22182166576385498, "learning_rate": 0.001, "loss": 1.2713, "step": 7030 }, { "epoch": 0.2244969546222775, "grad_norm": 0.21688029170036316, "learning_rate": 0.001, "loss": 1.2743, "step": 7040 }, { "epoch": 0.2248158423419114, "grad_norm": 0.2168598622083664, "learning_rate": 0.001, "loss": 1.2936, "step": 7050 }, { "epoch": 0.22513473006154533, "grad_norm": 0.2037907838821411, "learning_rate": 0.001, "loss": 1.2862, "step": 7060 }, { "epoch": 0.22545361778117926, "grad_norm": 0.21954897046089172, "learning_rate": 0.001, "loss": 1.2856, "step": 7070 }, { "epoch": 0.22577250550081315, "grad_norm": 0.21183601021766663, "learning_rate": 0.001, "loss": 1.3106, "step": 7080 }, { "epoch": 0.22609139322044708, "grad_norm": 0.21412093937397003, "learning_rate": 0.001, "loss": 1.3166, "step": 7090 }, { "epoch": 0.226410280940081, "grad_norm": 0.2191409319639206, "learning_rate": 0.001, "loss": 1.2924, "step": 7100 }, { "epoch": 0.2267291686597149, "grad_norm": 0.21817272901535034, "learning_rate": 0.001, "loss": 1.3071, "step": 7110 }, { "epoch": 0.22704805637934883, "grad_norm": 0.20995283126831055, "learning_rate": 0.001, "loss": 1.2837, "step": 7120 }, { "epoch": 0.22736694409898275, "grad_norm": 0.21756869554519653, "learning_rate": 0.001, "loss": 1.3008, "step": 7130 }, { "epoch": 0.22768583181861665, "grad_norm": 0.21312449872493744, "learning_rate": 0.001, "loss": 1.2956, "step": 7140 }, { "epoch": 0.22800471953825058, "grad_norm": 0.2182151824235916, "learning_rate": 0.001, "loss": 1.2939, "step": 7150 }, { "epoch": 0.2283236072578845, "grad_norm": 0.2198774218559265, "learning_rate": 0.001, "loss": 1.2938, "step": 7160 }, { "epoch": 0.22864249497751843, "grad_norm": 0.2158440351486206, "learning_rate": 0.001, "loss": 1.3113, "step": 7170 }, { "epoch": 0.22896138269715233, "grad_norm": 0.21088720858097076, "learning_rate": 0.001, "loss": 1.2764, "step": 7180 }, { "epoch": 0.22928027041678625, "grad_norm": 0.20957180857658386, "learning_rate": 0.001, "loss": 1.2852, "step": 7190 }, { "epoch": 0.22959915813642018, "grad_norm": 0.2130306363105774, "learning_rate": 0.001, "loss": 1.2853, "step": 7200 }, { "epoch": 0.22991804585605408, "grad_norm": 0.21509139239788055, "learning_rate": 0.001, "loss": 1.2817, "step": 7210 }, { "epoch": 0.230236933575688, "grad_norm": 0.2113594263792038, "learning_rate": 0.001, "loss": 1.261, "step": 7220 }, { "epoch": 0.23055582129532193, "grad_norm": 0.20813211798667908, "learning_rate": 0.001, "loss": 1.2936, "step": 7230 }, { "epoch": 0.23087470901495583, "grad_norm": 0.21231849491596222, "learning_rate": 0.001, "loss": 1.2991, "step": 7240 }, { "epoch": 0.23119359673458975, "grad_norm": 0.21319466829299927, "learning_rate": 0.001, "loss": 1.2865, "step": 7250 }, { "epoch": 0.23151248445422368, "grad_norm": 0.2093285769224167, "learning_rate": 0.001, "loss": 1.2739, "step": 7260 }, { "epoch": 0.23183137217385758, "grad_norm": 0.21632854640483856, "learning_rate": 0.001, "loss": 1.274, "step": 7270 }, { "epoch": 0.2321502598934915, "grad_norm": 0.2091766893863678, "learning_rate": 0.001, "loss": 1.3008, "step": 7280 }, { "epoch": 0.23246914761312543, "grad_norm": 0.20856739580631256, "learning_rate": 0.001, "loss": 1.2885, "step": 7290 }, { "epoch": 0.23278803533275932, "grad_norm": 0.2127932757139206, "learning_rate": 0.001, "loss": 1.2793, "step": 7300 }, { "epoch": 0.23310692305239325, "grad_norm": 0.2079467624425888, "learning_rate": 0.001, "loss": 1.2964, "step": 7310 }, { "epoch": 0.23342581077202718, "grad_norm": 0.21427300572395325, "learning_rate": 0.001, "loss": 1.2781, "step": 7320 }, { "epoch": 0.23374469849166107, "grad_norm": 0.20755432546138763, "learning_rate": 0.001, "loss": 1.2683, "step": 7330 }, { "epoch": 0.234063586211295, "grad_norm": 0.20725442469120026, "learning_rate": 0.001, "loss": 1.2912, "step": 7340 }, { "epoch": 0.23438247393092893, "grad_norm": 0.20349465310573578, "learning_rate": 0.001, "loss": 1.2808, "step": 7350 }, { "epoch": 0.23470136165056282, "grad_norm": 0.20813782513141632, "learning_rate": 0.001, "loss": 1.2753, "step": 7360 }, { "epoch": 0.23502024937019675, "grad_norm": 0.2123478800058365, "learning_rate": 0.001, "loss": 1.2779, "step": 7370 }, { "epoch": 0.23533913708983067, "grad_norm": 0.21471963822841644, "learning_rate": 0.001, "loss": 1.2853, "step": 7380 }, { "epoch": 0.2356580248094646, "grad_norm": 0.2094661146402359, "learning_rate": 0.001, "loss": 1.2864, "step": 7390 }, { "epoch": 0.2359769125290985, "grad_norm": 0.2113139033317566, "learning_rate": 0.001, "loss": 1.2727, "step": 7400 }, { "epoch": 0.23629580024873242, "grad_norm": 0.2110980600118637, "learning_rate": 0.001, "loss": 1.2748, "step": 7410 }, { "epoch": 0.23661468796836635, "grad_norm": 0.22186870872974396, "learning_rate": 0.001, "loss": 1.2975, "step": 7420 }, { "epoch": 0.23693357568800025, "grad_norm": 0.2114681750535965, "learning_rate": 0.001, "loss": 1.2569, "step": 7430 }, { "epoch": 0.23725246340763417, "grad_norm": 0.21068887412548065, "learning_rate": 0.001, "loss": 1.2703, "step": 7440 }, { "epoch": 0.2375713511272681, "grad_norm": 0.21688032150268555, "learning_rate": 0.001, "loss": 1.294, "step": 7450 }, { "epoch": 0.237890238846902, "grad_norm": 0.20603680610656738, "learning_rate": 0.001, "loss": 1.2963, "step": 7460 }, { "epoch": 0.23820912656653592, "grad_norm": 0.2126203328371048, "learning_rate": 0.001, "loss": 1.2791, "step": 7470 }, { "epoch": 0.23852801428616985, "grad_norm": 0.20721890032291412, "learning_rate": 0.001, "loss": 1.2941, "step": 7480 }, { "epoch": 0.23884690200580375, "grad_norm": 0.20675908029079437, "learning_rate": 0.001, "loss": 1.2644, "step": 7490 }, { "epoch": 0.23916578972543767, "grad_norm": 0.2099614143371582, "learning_rate": 0.001, "loss": 1.2948, "step": 7500 }, { "epoch": 0.2394846774450716, "grad_norm": 0.2123858779668808, "learning_rate": 0.001, "loss": 1.2781, "step": 7510 }, { "epoch": 0.2398035651647055, "grad_norm": 0.20939020812511444, "learning_rate": 0.001, "loss": 1.2867, "step": 7520 }, { "epoch": 0.24012245288433942, "grad_norm": 0.1992987096309662, "learning_rate": 0.001, "loss": 1.2857, "step": 7530 }, { "epoch": 0.24044134060397335, "grad_norm": 0.20575982332229614, "learning_rate": 0.001, "loss": 1.2829, "step": 7540 }, { "epoch": 0.24076022832360724, "grad_norm": 0.2120957225561142, "learning_rate": 0.001, "loss": 1.2595, "step": 7550 }, { "epoch": 0.24107911604324117, "grad_norm": 0.2008555829524994, "learning_rate": 0.001, "loss": 1.2722, "step": 7560 }, { "epoch": 0.2413980037628751, "grad_norm": 0.2073550969362259, "learning_rate": 0.001, "loss": 1.2973, "step": 7570 }, { "epoch": 0.24171689148250902, "grad_norm": 0.21187391877174377, "learning_rate": 0.001, "loss": 1.2726, "step": 7580 }, { "epoch": 0.24203577920214292, "grad_norm": 0.20674507319927216, "learning_rate": 0.001, "loss": 1.2773, "step": 7590 }, { "epoch": 0.24235466692177685, "grad_norm": 0.20273102819919586, "learning_rate": 0.001, "loss": 1.2873, "step": 7600 }, { "epoch": 0.24267355464141077, "grad_norm": 0.2100614309310913, "learning_rate": 0.001, "loss": 1.253, "step": 7610 }, { "epoch": 0.24299244236104467, "grad_norm": 0.22176362574100494, "learning_rate": 0.001, "loss": 1.2605, "step": 7620 }, { "epoch": 0.2433113300806786, "grad_norm": 0.2098623365163803, "learning_rate": 0.001, "loss": 1.2823, "step": 7630 }, { "epoch": 0.24363021780031252, "grad_norm": 0.2050837129354477, "learning_rate": 0.001, "loss": 1.3001, "step": 7640 }, { "epoch": 0.24394910551994642, "grad_norm": 0.20297372341156006, "learning_rate": 0.001, "loss": 1.2672, "step": 7650 }, { "epoch": 0.24426799323958034, "grad_norm": 0.19953450560569763, "learning_rate": 0.001, "loss": 1.2662, "step": 7660 }, { "epoch": 0.24458688095921427, "grad_norm": 0.20888438820838928, "learning_rate": 0.001, "loss": 1.2693, "step": 7670 }, { "epoch": 0.24490576867884817, "grad_norm": 0.20096510648727417, "learning_rate": 0.001, "loss": 1.2601, "step": 7680 }, { "epoch": 0.2452246563984821, "grad_norm": 0.21057695150375366, "learning_rate": 0.001, "loss": 1.2813, "step": 7690 }, { "epoch": 0.24554354411811602, "grad_norm": 0.2141493558883667, "learning_rate": 0.001, "loss": 1.2634, "step": 7700 }, { "epoch": 0.24586243183774992, "grad_norm": 0.20686107873916626, "learning_rate": 0.001, "loss": 1.2446, "step": 7710 }, { "epoch": 0.24618131955738384, "grad_norm": 0.20684966444969177, "learning_rate": 0.001, "loss": 1.2648, "step": 7720 }, { "epoch": 0.24650020727701777, "grad_norm": 0.2081177532672882, "learning_rate": 0.001, "loss": 1.2761, "step": 7730 }, { "epoch": 0.24681909499665167, "grad_norm": 0.2092471718788147, "learning_rate": 0.001, "loss": 1.2679, "step": 7740 }, { "epoch": 0.2471379827162856, "grad_norm": 0.20629584789276123, "learning_rate": 0.001, "loss": 1.2924, "step": 7750 }, { "epoch": 0.24745687043591952, "grad_norm": 0.20295438170433044, "learning_rate": 0.001, "loss": 1.262, "step": 7760 }, { "epoch": 0.24777575815555342, "grad_norm": 0.1990422010421753, "learning_rate": 0.001, "loss": 1.2745, "step": 7770 }, { "epoch": 0.24809464587518734, "grad_norm": 0.2118285447359085, "learning_rate": 0.001, "loss": 1.2472, "step": 7780 }, { "epoch": 0.24841353359482127, "grad_norm": 0.2059514820575714, "learning_rate": 0.001, "loss": 1.281, "step": 7790 }, { "epoch": 0.2487324213144552, "grad_norm": 0.20811574161052704, "learning_rate": 0.001, "loss": 1.2628, "step": 7800 }, { "epoch": 0.2490513090340891, "grad_norm": 0.21515397727489471, "learning_rate": 0.001, "loss": 1.2752, "step": 7810 }, { "epoch": 0.24937019675372302, "grad_norm": 0.20975211262702942, "learning_rate": 0.001, "loss": 1.2606, "step": 7820 }, { "epoch": 0.24968908447335694, "grad_norm": 0.19748273491859436, "learning_rate": 0.001, "loss": 1.2481, "step": 7830 }, { "epoch": 0.25000797219299087, "grad_norm": 0.20883125066757202, "learning_rate": 0.001, "loss": 1.252, "step": 7840 }, { "epoch": 0.25032685991262477, "grad_norm": 0.1997639238834381, "learning_rate": 0.001, "loss": 1.2382, "step": 7850 }, { "epoch": 0.25064574763225866, "grad_norm": 0.20731328427791595, "learning_rate": 0.001, "loss": 1.2407, "step": 7860 }, { "epoch": 0.2509646353518926, "grad_norm": 0.20873534679412842, "learning_rate": 0.001, "loss": 1.2475, "step": 7870 }, { "epoch": 0.2512835230715265, "grad_norm": 0.20659437775611877, "learning_rate": 0.001, "loss": 1.2505, "step": 7880 }, { "epoch": 0.2516024107911604, "grad_norm": 0.20546947419643402, "learning_rate": 0.001, "loss": 1.2891, "step": 7890 }, { "epoch": 0.25192129851079437, "grad_norm": 0.20524942874908447, "learning_rate": 0.001, "loss": 1.2644, "step": 7900 }, { "epoch": 0.25224018623042826, "grad_norm": 0.2121630758047104, "learning_rate": 0.001, "loss": 1.2607, "step": 7910 }, { "epoch": 0.25255907395006216, "grad_norm": 0.20716558396816254, "learning_rate": 0.001, "loss": 1.2671, "step": 7920 }, { "epoch": 0.2528779616696961, "grad_norm": 0.21428154408931732, "learning_rate": 0.001, "loss": 1.2506, "step": 7930 }, { "epoch": 0.25319684938933, "grad_norm": 0.202268585562706, "learning_rate": 0.001, "loss": 1.2561, "step": 7940 }, { "epoch": 0.2535157371089639, "grad_norm": 0.20032866299152374, "learning_rate": 0.001, "loss": 1.2575, "step": 7950 }, { "epoch": 0.25383462482859787, "grad_norm": 0.2026292085647583, "learning_rate": 0.001, "loss": 1.2714, "step": 7960 }, { "epoch": 0.25415351254823176, "grad_norm": 0.20408962666988373, "learning_rate": 0.001, "loss": 1.2591, "step": 7970 }, { "epoch": 0.25447240026786566, "grad_norm": 0.2172502875328064, "learning_rate": 0.001, "loss": 1.2891, "step": 7980 }, { "epoch": 0.2547912879874996, "grad_norm": 0.20545944571495056, "learning_rate": 0.001, "loss": 1.2786, "step": 7990 }, { "epoch": 0.2551101757071335, "grad_norm": 0.20460359752178192, "learning_rate": 0.001, "loss": 1.2502, "step": 8000 }, { "epoch": 0.2554290634267674, "grad_norm": 0.20056135952472687, "learning_rate": 0.001, "loss": 1.2456, "step": 8010 }, { "epoch": 0.25574795114640136, "grad_norm": 0.20083419978618622, "learning_rate": 0.001, "loss": 1.2616, "step": 8020 }, { "epoch": 0.25606683886603526, "grad_norm": 0.19361791014671326, "learning_rate": 0.001, "loss": 1.2442, "step": 8030 }, { "epoch": 0.25638572658566916, "grad_norm": 0.21003025770187378, "learning_rate": 0.001, "loss": 1.2615, "step": 8040 }, { "epoch": 0.2567046143053031, "grad_norm": 0.20616671442985535, "learning_rate": 0.001, "loss": 1.2771, "step": 8050 }, { "epoch": 0.257023502024937, "grad_norm": 0.20462070405483246, "learning_rate": 0.001, "loss": 1.2612, "step": 8060 }, { "epoch": 0.2573423897445709, "grad_norm": 0.2028227299451828, "learning_rate": 0.001, "loss": 1.2701, "step": 8070 }, { "epoch": 0.25766127746420486, "grad_norm": 0.20757098495960236, "learning_rate": 0.001, "loss": 1.2567, "step": 8080 }, { "epoch": 0.25798016518383876, "grad_norm": 0.20266075432300568, "learning_rate": 0.001, "loss": 1.2876, "step": 8090 }, { "epoch": 0.2582990529034727, "grad_norm": 0.20029297471046448, "learning_rate": 0.001, "loss": 1.2747, "step": 8100 }, { "epoch": 0.2586179406231066, "grad_norm": 0.2076231986284256, "learning_rate": 0.001, "loss": 1.2667, "step": 8110 }, { "epoch": 0.2589368283427405, "grad_norm": 0.20260873436927795, "learning_rate": 0.001, "loss": 1.2404, "step": 8120 }, { "epoch": 0.25925571606237446, "grad_norm": 0.20188632607460022, "learning_rate": 0.001, "loss": 1.2461, "step": 8130 }, { "epoch": 0.25957460378200836, "grad_norm": 0.20284780859947205, "learning_rate": 0.001, "loss": 1.2333, "step": 8140 }, { "epoch": 0.25989349150164226, "grad_norm": 0.20577070116996765, "learning_rate": 0.001, "loss": 1.2544, "step": 8150 }, { "epoch": 0.2602123792212762, "grad_norm": 0.20342661440372467, "learning_rate": 0.001, "loss": 1.2508, "step": 8160 }, { "epoch": 0.2605312669409101, "grad_norm": 0.20312359929084778, "learning_rate": 0.001, "loss": 1.2402, "step": 8170 }, { "epoch": 0.260850154660544, "grad_norm": 0.20663923025131226, "learning_rate": 0.001, "loss": 1.2609, "step": 8180 }, { "epoch": 0.26116904238017796, "grad_norm": 0.20582140982151031, "learning_rate": 0.001, "loss": 1.2448, "step": 8190 }, { "epoch": 0.26148793009981186, "grad_norm": 0.20321714878082275, "learning_rate": 0.001, "loss": 1.2531, "step": 8200 }, { "epoch": 0.26180681781944576, "grad_norm": 0.1960032731294632, "learning_rate": 0.001, "loss": 1.273, "step": 8210 }, { "epoch": 0.2621257055390797, "grad_norm": 0.20806966722011566, "learning_rate": 0.001, "loss": 1.2685, "step": 8220 }, { "epoch": 0.2624445932587136, "grad_norm": 0.1973854899406433, "learning_rate": 0.001, "loss": 1.2501, "step": 8230 }, { "epoch": 0.2627634809783475, "grad_norm": 0.19955401122570038, "learning_rate": 0.001, "loss": 1.2412, "step": 8240 }, { "epoch": 0.26308236869798146, "grad_norm": 0.19847828149795532, "learning_rate": 0.001, "loss": 1.2345, "step": 8250 }, { "epoch": 0.26340125641761536, "grad_norm": 0.20189815759658813, "learning_rate": 0.001, "loss": 1.2618, "step": 8260 }, { "epoch": 0.26372014413724926, "grad_norm": 0.20722807943820953, "learning_rate": 0.001, "loss": 1.2596, "step": 8270 }, { "epoch": 0.2640390318568832, "grad_norm": 0.20148128271102905, "learning_rate": 0.001, "loss": 1.2566, "step": 8280 }, { "epoch": 0.2643579195765171, "grad_norm": 0.21168474853038788, "learning_rate": 0.001, "loss": 1.2536, "step": 8290 }, { "epoch": 0.264676807296151, "grad_norm": 0.2093587964773178, "learning_rate": 0.001, "loss": 1.2445, "step": 8300 }, { "epoch": 0.26499569501578496, "grad_norm": 0.19885730743408203, "learning_rate": 0.001, "loss": 1.2429, "step": 8310 }, { "epoch": 0.26531458273541886, "grad_norm": 0.18978652358055115, "learning_rate": 0.001, "loss": 1.2362, "step": 8320 }, { "epoch": 0.26563347045505276, "grad_norm": 0.20467509329319, "learning_rate": 0.001, "loss": 1.2345, "step": 8330 }, { "epoch": 0.2659523581746867, "grad_norm": 0.20757165551185608, "learning_rate": 0.001, "loss": 1.2598, "step": 8340 }, { "epoch": 0.2662712458943206, "grad_norm": 0.21299956738948822, "learning_rate": 0.001, "loss": 1.2489, "step": 8350 }, { "epoch": 0.2665901336139545, "grad_norm": 0.19811475276947021, "learning_rate": 0.001, "loss": 1.2773, "step": 8360 }, { "epoch": 0.26690902133358846, "grad_norm": 0.2071559578180313, "learning_rate": 0.001, "loss": 1.2648, "step": 8370 }, { "epoch": 0.26722790905322236, "grad_norm": 0.19420473277568817, "learning_rate": 0.001, "loss": 1.2542, "step": 8380 }, { "epoch": 0.26754679677285625, "grad_norm": 0.20581373572349548, "learning_rate": 0.001, "loss": 1.2541, "step": 8390 }, { "epoch": 0.2678656844924902, "grad_norm": 0.20274776220321655, "learning_rate": 0.001, "loss": 1.2484, "step": 8400 }, { "epoch": 0.2681845722121241, "grad_norm": 0.19483987987041473, "learning_rate": 0.001, "loss": 1.2419, "step": 8410 }, { "epoch": 0.268503459931758, "grad_norm": 0.19840216636657715, "learning_rate": 0.001, "loss": 1.2419, "step": 8420 }, { "epoch": 0.26882234765139196, "grad_norm": 0.19197873771190643, "learning_rate": 0.001, "loss": 1.2376, "step": 8430 }, { "epoch": 0.26914123537102586, "grad_norm": 0.21160933375358582, "learning_rate": 0.001, "loss": 1.237, "step": 8440 }, { "epoch": 0.26946012309065975, "grad_norm": 0.1986304521560669, "learning_rate": 0.001, "loss": 1.2527, "step": 8450 }, { "epoch": 0.2697790108102937, "grad_norm": 0.19879919290542603, "learning_rate": 0.001, "loss": 1.2396, "step": 8460 }, { "epoch": 0.2700978985299276, "grad_norm": 0.20429867506027222, "learning_rate": 0.001, "loss": 1.2508, "step": 8470 }, { "epoch": 0.2704167862495615, "grad_norm": 0.19134077429771423, "learning_rate": 0.001, "loss": 1.2362, "step": 8480 }, { "epoch": 0.27073567396919546, "grad_norm": 0.20987531542778015, "learning_rate": 0.001, "loss": 1.2356, "step": 8490 }, { "epoch": 0.27105456168882935, "grad_norm": 0.19630587100982666, "learning_rate": 0.001, "loss": 1.2669, "step": 8500 }, { "epoch": 0.2713734494084633, "grad_norm": 0.19786277413368225, "learning_rate": 0.001, "loss": 1.2348, "step": 8510 }, { "epoch": 0.2716923371280972, "grad_norm": 0.19404275715351105, "learning_rate": 0.001, "loss": 1.2329, "step": 8520 }, { "epoch": 0.2720112248477311, "grad_norm": 0.19640901684761047, "learning_rate": 0.001, "loss": 1.2343, "step": 8530 }, { "epoch": 0.27233011256736506, "grad_norm": 0.19867222011089325, "learning_rate": 0.001, "loss": 1.2477, "step": 8540 }, { "epoch": 0.27264900028699895, "grad_norm": 0.2037269026041031, "learning_rate": 0.001, "loss": 1.2681, "step": 8550 }, { "epoch": 0.27296788800663285, "grad_norm": 0.20036159455776215, "learning_rate": 0.001, "loss": 1.253, "step": 8560 }, { "epoch": 0.2732867757262668, "grad_norm": 0.19691398739814758, "learning_rate": 0.001, "loss": 1.2617, "step": 8570 }, { "epoch": 0.2736056634459007, "grad_norm": 0.19771870970726013, "learning_rate": 0.001, "loss": 1.2303, "step": 8580 }, { "epoch": 0.2739245511655346, "grad_norm": 0.19885499775409698, "learning_rate": 0.001, "loss": 1.2328, "step": 8590 }, { "epoch": 0.27424343888516856, "grad_norm": 0.19315415620803833, "learning_rate": 0.001, "loss": 1.2546, "step": 8600 }, { "epoch": 0.27456232660480245, "grad_norm": 0.1995774805545807, "learning_rate": 0.001, "loss": 1.2558, "step": 8610 }, { "epoch": 0.27488121432443635, "grad_norm": 0.1985071748495102, "learning_rate": 0.001, "loss": 1.2273, "step": 8620 }, { "epoch": 0.2752001020440703, "grad_norm": 0.1915522962808609, "learning_rate": 0.001, "loss": 1.234, "step": 8630 }, { "epoch": 0.2755189897637042, "grad_norm": 0.19988441467285156, "learning_rate": 0.001, "loss": 1.2308, "step": 8640 }, { "epoch": 0.2758378774833381, "grad_norm": 0.19376257061958313, "learning_rate": 0.001, "loss": 1.2494, "step": 8650 }, { "epoch": 0.27615676520297205, "grad_norm": 0.19480600953102112, "learning_rate": 0.001, "loss": 1.2374, "step": 8660 }, { "epoch": 0.27647565292260595, "grad_norm": 0.2003336101770401, "learning_rate": 0.001, "loss": 1.24, "step": 8670 }, { "epoch": 0.27679454064223985, "grad_norm": 0.19479194283485413, "learning_rate": 0.001, "loss": 1.2266, "step": 8680 }, { "epoch": 0.2771134283618738, "grad_norm": 0.1989787369966507, "learning_rate": 0.001, "loss": 1.2187, "step": 8690 }, { "epoch": 0.2774323160815077, "grad_norm": 0.19739316403865814, "learning_rate": 0.001, "loss": 1.2395, "step": 8700 }, { "epoch": 0.2777512038011416, "grad_norm": 0.19161522388458252, "learning_rate": 0.001, "loss": 1.243, "step": 8710 }, { "epoch": 0.27807009152077555, "grad_norm": 0.1980825811624527, "learning_rate": 0.001, "loss": 1.2383, "step": 8720 }, { "epoch": 0.27838897924040945, "grad_norm": 0.19861190021038055, "learning_rate": 0.001, "loss": 1.2142, "step": 8730 }, { "epoch": 0.27870786696004335, "grad_norm": 0.19875994324684143, "learning_rate": 0.001, "loss": 1.2325, "step": 8740 }, { "epoch": 0.2790267546796773, "grad_norm": 0.2007092535495758, "learning_rate": 0.001, "loss": 1.2412, "step": 8750 }, { "epoch": 0.2793456423993112, "grad_norm": 0.19943733513355255, "learning_rate": 0.001, "loss": 1.2317, "step": 8760 }, { "epoch": 0.2796645301189451, "grad_norm": 0.19409018754959106, "learning_rate": 0.001, "loss": 1.2405, "step": 8770 }, { "epoch": 0.27998341783857905, "grad_norm": 0.20169158279895782, "learning_rate": 0.001, "loss": 1.2454, "step": 8780 }, { "epoch": 0.28030230555821295, "grad_norm": 0.19915443658828735, "learning_rate": 0.001, "loss": 1.2327, "step": 8790 }, { "epoch": 0.28062119327784685, "grad_norm": 0.19803552329540253, "learning_rate": 0.001, "loss": 1.2259, "step": 8800 }, { "epoch": 0.2809400809974808, "grad_norm": 0.19060640037059784, "learning_rate": 0.001, "loss": 1.2495, "step": 8810 }, { "epoch": 0.2812589687171147, "grad_norm": 0.19586661458015442, "learning_rate": 0.001, "loss": 1.2341, "step": 8820 }, { "epoch": 0.2815778564367486, "grad_norm": 0.2034222036600113, "learning_rate": 0.001, "loss": 1.2595, "step": 8830 }, { "epoch": 0.28189674415638255, "grad_norm": 0.1960514336824417, "learning_rate": 0.001, "loss": 1.2333, "step": 8840 }, { "epoch": 0.28221563187601645, "grad_norm": 0.2002130150794983, "learning_rate": 0.001, "loss": 1.2387, "step": 8850 }, { "epoch": 0.28253451959565035, "grad_norm": 0.1940252184867859, "learning_rate": 0.001, "loss": 1.2403, "step": 8860 }, { "epoch": 0.2828534073152843, "grad_norm": 0.1985214501619339, "learning_rate": 0.001, "loss": 1.2211, "step": 8870 }, { "epoch": 0.2831722950349182, "grad_norm": 0.1938486248254776, "learning_rate": 0.001, "loss": 1.2421, "step": 8880 }, { "epoch": 0.2834911827545521, "grad_norm": 0.1903192102909088, "learning_rate": 0.001, "loss": 1.2221, "step": 8890 }, { "epoch": 0.28381007047418605, "grad_norm": 0.19713927805423737, "learning_rate": 0.001, "loss": 1.245, "step": 8900 }, { "epoch": 0.28412895819381995, "grad_norm": 0.2070610523223877, "learning_rate": 0.001, "loss": 1.2296, "step": 8910 }, { "epoch": 0.2844478459134539, "grad_norm": 0.19895747303962708, "learning_rate": 0.001, "loss": 1.2276, "step": 8920 }, { "epoch": 0.2847667336330878, "grad_norm": 0.1950913965702057, "learning_rate": 0.001, "loss": 1.239, "step": 8930 }, { "epoch": 0.2850856213527217, "grad_norm": 0.20076586306095123, "learning_rate": 0.001, "loss": 1.2338, "step": 8940 }, { "epoch": 0.28540450907235565, "grad_norm": 0.1983807235956192, "learning_rate": 0.001, "loss": 1.2431, "step": 8950 }, { "epoch": 0.28572339679198955, "grad_norm": 0.1993577629327774, "learning_rate": 0.001, "loss": 1.2283, "step": 8960 }, { "epoch": 0.28604228451162345, "grad_norm": 0.19628196954727173, "learning_rate": 0.001, "loss": 1.2281, "step": 8970 }, { "epoch": 0.2863611722312574, "grad_norm": 0.19559252262115479, "learning_rate": 0.001, "loss": 1.2316, "step": 8980 }, { "epoch": 0.2866800599508913, "grad_norm": 0.1995292752981186, "learning_rate": 0.001, "loss": 1.2321, "step": 8990 }, { "epoch": 0.2869989476705252, "grad_norm": 0.19853276014328003, "learning_rate": 0.001, "loss": 1.2576, "step": 9000 }, { "epoch": 0.28731783539015915, "grad_norm": 0.19437505304813385, "learning_rate": 0.001, "loss": 1.2182, "step": 9010 }, { "epoch": 0.28763672310979305, "grad_norm": 0.19659703969955444, "learning_rate": 0.001, "loss": 1.2313, "step": 9020 }, { "epoch": 0.28795561082942694, "grad_norm": 0.19425907731056213, "learning_rate": 0.001, "loss": 1.2316, "step": 9030 }, { "epoch": 0.2882744985490609, "grad_norm": 0.19886291027069092, "learning_rate": 0.001, "loss": 1.2373, "step": 9040 }, { "epoch": 0.2885933862686948, "grad_norm": 0.18857437372207642, "learning_rate": 0.001, "loss": 1.2226, "step": 9050 }, { "epoch": 0.2889122739883287, "grad_norm": 0.1934089958667755, "learning_rate": 0.001, "loss": 1.2191, "step": 9060 }, { "epoch": 0.28923116170796265, "grad_norm": 0.19871141016483307, "learning_rate": 0.001, "loss": 1.2246, "step": 9070 }, { "epoch": 0.28955004942759655, "grad_norm": 0.19774867594242096, "learning_rate": 0.001, "loss": 1.2335, "step": 9080 }, { "epoch": 0.28986893714723044, "grad_norm": 0.19685259461402893, "learning_rate": 0.001, "loss": 1.2537, "step": 9090 }, { "epoch": 0.2901878248668644, "grad_norm": 0.19641003012657166, "learning_rate": 0.001, "loss": 1.2202, "step": 9100 }, { "epoch": 0.2905067125864983, "grad_norm": 0.19249022006988525, "learning_rate": 0.001, "loss": 1.2282, "step": 9110 }, { "epoch": 0.2908256003061322, "grad_norm": 0.20958197116851807, "learning_rate": 0.001, "loss": 1.2346, "step": 9120 }, { "epoch": 0.29114448802576615, "grad_norm": 0.1979653686285019, "learning_rate": 0.001, "loss": 1.2356, "step": 9130 }, { "epoch": 0.29146337574540004, "grad_norm": 0.20007383823394775, "learning_rate": 0.001, "loss": 1.2219, "step": 9140 }, { "epoch": 0.29178226346503394, "grad_norm": 0.1912803053855896, "learning_rate": 0.001, "loss": 1.2164, "step": 9150 }, { "epoch": 0.2921011511846679, "grad_norm": 0.2007441520690918, "learning_rate": 0.001, "loss": 1.234, "step": 9160 }, { "epoch": 0.2924200389043018, "grad_norm": 0.1962765008211136, "learning_rate": 0.001, "loss": 1.2397, "step": 9170 }, { "epoch": 0.2927389266239357, "grad_norm": 0.19007021188735962, "learning_rate": 0.001, "loss": 1.2328, "step": 9180 }, { "epoch": 0.29305781434356964, "grad_norm": 0.19233642518520355, "learning_rate": 0.001, "loss": 1.2173, "step": 9190 }, { "epoch": 0.29337670206320354, "grad_norm": 0.19066978991031647, "learning_rate": 0.001, "loss": 1.2226, "step": 9200 }, { "epoch": 0.29369558978283744, "grad_norm": 0.19215360283851624, "learning_rate": 0.001, "loss": 1.2153, "step": 9210 }, { "epoch": 0.2940144775024714, "grad_norm": 0.1964012086391449, "learning_rate": 0.001, "loss": 1.2221, "step": 9220 }, { "epoch": 0.2943333652221053, "grad_norm": 0.196467325091362, "learning_rate": 0.001, "loss": 1.2316, "step": 9230 }, { "epoch": 0.2946522529417392, "grad_norm": 0.19289752840995789, "learning_rate": 0.001, "loss": 1.21, "step": 9240 }, { "epoch": 0.29497114066137314, "grad_norm": 0.19339169561862946, "learning_rate": 0.001, "loss": 1.2243, "step": 9250 }, { "epoch": 0.29529002838100704, "grad_norm": 0.19633044302463531, "learning_rate": 0.001, "loss": 1.2165, "step": 9260 }, { "epoch": 0.29560891610064094, "grad_norm": 0.1902557909488678, "learning_rate": 0.001, "loss": 1.2105, "step": 9270 }, { "epoch": 0.2959278038202749, "grad_norm": 0.1978464275598526, "learning_rate": 0.001, "loss": 1.221, "step": 9280 }, { "epoch": 0.2962466915399088, "grad_norm": 0.1890365481376648, "learning_rate": 0.001, "loss": 1.2059, "step": 9290 }, { "epoch": 0.2965655792595427, "grad_norm": 0.1897841989994049, "learning_rate": 0.001, "loss": 1.2232, "step": 9300 }, { "epoch": 0.29688446697917664, "grad_norm": 0.19626909494400024, "learning_rate": 0.001, "loss": 1.2306, "step": 9310 }, { "epoch": 0.29720335469881054, "grad_norm": 0.19002099335193634, "learning_rate": 0.001, "loss": 1.2019, "step": 9320 }, { "epoch": 0.29752224241844444, "grad_norm": 0.18954624235630035, "learning_rate": 0.001, "loss": 1.2302, "step": 9330 }, { "epoch": 0.2978411301380784, "grad_norm": 0.1903507560491562, "learning_rate": 0.001, "loss": 1.2088, "step": 9340 }, { "epoch": 0.2981600178577123, "grad_norm": 0.1922921985387802, "learning_rate": 0.001, "loss": 1.215, "step": 9350 }, { "epoch": 0.29847890557734624, "grad_norm": 0.20056477189064026, "learning_rate": 0.001, "loss": 1.2221, "step": 9360 }, { "epoch": 0.29879779329698014, "grad_norm": 0.18921448290348053, "learning_rate": 0.001, "loss": 1.2233, "step": 9370 }, { "epoch": 0.29911668101661404, "grad_norm": 0.1903228610754013, "learning_rate": 0.001, "loss": 1.2194, "step": 9380 }, { "epoch": 0.299435568736248, "grad_norm": 0.19145965576171875, "learning_rate": 0.001, "loss": 1.2065, "step": 9390 }, { "epoch": 0.2997544564558819, "grad_norm": 0.1889374703168869, "learning_rate": 0.001, "loss": 1.219, "step": 9400 }, { "epoch": 0.3000733441755158, "grad_norm": 0.18882133066654205, "learning_rate": 0.001, "loss": 1.2229, "step": 9410 }, { "epoch": 0.30039223189514974, "grad_norm": 0.19275881350040436, "learning_rate": 0.001, "loss": 1.2212, "step": 9420 }, { "epoch": 0.30071111961478364, "grad_norm": 0.19594255089759827, "learning_rate": 0.001, "loss": 1.211, "step": 9430 }, { "epoch": 0.30103000733441754, "grad_norm": 0.19568689167499542, "learning_rate": 0.001, "loss": 1.2228, "step": 9440 }, { "epoch": 0.3013488950540515, "grad_norm": 0.18956463038921356, "learning_rate": 0.001, "loss": 1.2271, "step": 9450 }, { "epoch": 0.3016677827736854, "grad_norm": 0.19660969078540802, "learning_rate": 0.001, "loss": 1.2256, "step": 9460 }, { "epoch": 0.3019866704933193, "grad_norm": 0.1920958012342453, "learning_rate": 0.001, "loss": 1.2402, "step": 9470 }, { "epoch": 0.30230555821295324, "grad_norm": 0.19480574131011963, "learning_rate": 0.001, "loss": 1.2192, "step": 9480 }, { "epoch": 0.30262444593258714, "grad_norm": 0.1968194842338562, "learning_rate": 0.001, "loss": 1.2208, "step": 9490 }, { "epoch": 0.30294333365222104, "grad_norm": 0.19631661474704742, "learning_rate": 0.001, "loss": 1.2202, "step": 9500 }, { "epoch": 0.303262221371855, "grad_norm": 0.19253483414649963, "learning_rate": 0.001, "loss": 1.208, "step": 9510 }, { "epoch": 0.3035811090914889, "grad_norm": 0.19759908318519592, "learning_rate": 0.001, "loss": 1.2177, "step": 9520 }, { "epoch": 0.3038999968111228, "grad_norm": 0.1910729557275772, "learning_rate": 0.001, "loss": 1.1982, "step": 9530 }, { "epoch": 0.30421888453075674, "grad_norm": 0.19392292201519012, "learning_rate": 0.001, "loss": 1.2137, "step": 9540 }, { "epoch": 0.30453777225039064, "grad_norm": 0.19059373438358307, "learning_rate": 0.001, "loss": 1.1914, "step": 9550 }, { "epoch": 0.30485665997002453, "grad_norm": 0.18858584761619568, "learning_rate": 0.001, "loss": 1.2147, "step": 9560 }, { "epoch": 0.3051755476896585, "grad_norm": 0.19062539935112, "learning_rate": 0.001, "loss": 1.2092, "step": 9570 }, { "epoch": 0.3054944354092924, "grad_norm": 0.19087275862693787, "learning_rate": 0.001, "loss": 1.2288, "step": 9580 }, { "epoch": 0.3058133231289263, "grad_norm": 0.18619580566883087, "learning_rate": 0.001, "loss": 1.2138, "step": 9590 }, { "epoch": 0.30613221084856024, "grad_norm": 0.1980937123298645, "learning_rate": 0.001, "loss": 1.2036, "step": 9600 }, { "epoch": 0.30645109856819414, "grad_norm": 0.1878875494003296, "learning_rate": 0.001, "loss": 1.2163, "step": 9610 }, { "epoch": 0.30676998628782803, "grad_norm": 0.1916397362947464, "learning_rate": 0.001, "loss": 1.2165, "step": 9620 }, { "epoch": 0.307088874007462, "grad_norm": 0.19258850812911987, "learning_rate": 0.001, "loss": 1.1858, "step": 9630 }, { "epoch": 0.3074077617270959, "grad_norm": 0.18846213817596436, "learning_rate": 0.001, "loss": 1.2184, "step": 9640 }, { "epoch": 0.3077266494467298, "grad_norm": 0.18937738239765167, "learning_rate": 0.001, "loss": 1.2119, "step": 9650 }, { "epoch": 0.30804553716636374, "grad_norm": 0.18987585604190826, "learning_rate": 0.001, "loss": 1.2016, "step": 9660 }, { "epoch": 0.30836442488599763, "grad_norm": 0.19264313578605652, "learning_rate": 0.001, "loss": 1.2025, "step": 9670 }, { "epoch": 0.30868331260563153, "grad_norm": 0.1826832890510559, "learning_rate": 0.001, "loss": 1.2143, "step": 9680 }, { "epoch": 0.3090022003252655, "grad_norm": 0.1982695609331131, "learning_rate": 0.001, "loss": 1.2236, "step": 9690 }, { "epoch": 0.3093210880448994, "grad_norm": 0.1873609572649002, "learning_rate": 0.001, "loss": 1.2129, "step": 9700 }, { "epoch": 0.3096399757645333, "grad_norm": 0.1930837631225586, "learning_rate": 0.001, "loss": 1.2252, "step": 9710 }, { "epoch": 0.30995886348416724, "grad_norm": 0.19828754663467407, "learning_rate": 0.001, "loss": 1.2227, "step": 9720 }, { "epoch": 0.31027775120380113, "grad_norm": 0.19810664653778076, "learning_rate": 0.001, "loss": 1.2245, "step": 9730 }, { "epoch": 0.31059663892343503, "grad_norm": 0.19223099946975708, "learning_rate": 0.001, "loss": 1.2154, "step": 9740 }, { "epoch": 0.310915526643069, "grad_norm": 0.19165821373462677, "learning_rate": 0.001, "loss": 1.2004, "step": 9750 }, { "epoch": 0.3112344143627029, "grad_norm": 0.1895214170217514, "learning_rate": 0.001, "loss": 1.2117, "step": 9760 }, { "epoch": 0.31155330208233684, "grad_norm": 0.19103483855724335, "learning_rate": 0.001, "loss": 1.2319, "step": 9770 }, { "epoch": 0.31187218980197073, "grad_norm": 0.18547318875789642, "learning_rate": 0.001, "loss": 1.1935, "step": 9780 }, { "epoch": 0.31219107752160463, "grad_norm": 0.18510322272777557, "learning_rate": 0.001, "loss": 1.2051, "step": 9790 }, { "epoch": 0.3125099652412386, "grad_norm": 0.18715843558311462, "learning_rate": 0.001, "loss": 1.2166, "step": 9800 }, { "epoch": 0.3128288529608725, "grad_norm": 0.18825583159923553, "learning_rate": 0.001, "loss": 1.2168, "step": 9810 }, { "epoch": 0.3131477406805064, "grad_norm": 0.1959000676870346, "learning_rate": 0.001, "loss": 1.2143, "step": 9820 }, { "epoch": 0.31346662840014033, "grad_norm": 0.1898072212934494, "learning_rate": 0.001, "loss": 1.2043, "step": 9830 }, { "epoch": 0.31378551611977423, "grad_norm": 0.18969126045703888, "learning_rate": 0.001, "loss": 1.2081, "step": 9840 }, { "epoch": 0.31410440383940813, "grad_norm": 0.1923128217458725, "learning_rate": 0.001, "loss": 1.1894, "step": 9850 }, { "epoch": 0.3144232915590421, "grad_norm": 0.18702685832977295, "learning_rate": 0.001, "loss": 1.2191, "step": 9860 }, { "epoch": 0.314742179278676, "grad_norm": 0.18185706436634064, "learning_rate": 0.001, "loss": 1.2183, "step": 9870 }, { "epoch": 0.3150610669983099, "grad_norm": 0.1877908557653427, "learning_rate": 0.001, "loss": 1.2166, "step": 9880 }, { "epoch": 0.31537995471794383, "grad_norm": 0.19115397334098816, "learning_rate": 0.001, "loss": 1.1977, "step": 9890 }, { "epoch": 0.31569884243757773, "grad_norm": 0.18303260207176208, "learning_rate": 0.001, "loss": 1.2028, "step": 9900 }, { "epoch": 0.31601773015721163, "grad_norm": 0.18337474763393402, "learning_rate": 0.001, "loss": 1.1928, "step": 9910 }, { "epoch": 0.3163366178768456, "grad_norm": 0.19313004612922668, "learning_rate": 0.001, "loss": 1.1989, "step": 9920 }, { "epoch": 0.3166555055964795, "grad_norm": 0.19004032015800476, "learning_rate": 0.001, "loss": 1.2133, "step": 9930 }, { "epoch": 0.3169743933161134, "grad_norm": 0.1894432008266449, "learning_rate": 0.001, "loss": 1.2166, "step": 9940 }, { "epoch": 0.31729328103574733, "grad_norm": 0.19445645809173584, "learning_rate": 0.001, "loss": 1.2175, "step": 9950 }, { "epoch": 0.31761216875538123, "grad_norm": 0.18664146959781647, "learning_rate": 0.001, "loss": 1.2049, "step": 9960 }, { "epoch": 0.31793105647501513, "grad_norm": 0.1890193223953247, "learning_rate": 0.001, "loss": 1.223, "step": 9970 }, { "epoch": 0.3182499441946491, "grad_norm": 0.18643978238105774, "learning_rate": 0.001, "loss": 1.2026, "step": 9980 }, { "epoch": 0.318568831914283, "grad_norm": 0.18719391524791718, "learning_rate": 0.001, "loss": 1.2092, "step": 9990 }, { "epoch": 0.3188877196339169, "grad_norm": 0.18894146382808685, "learning_rate": 0.001, "loss": 1.211, "step": 10000 }, { "epoch": 0.31920660735355083, "grad_norm": 0.19064651429653168, "learning_rate": 0.001, "loss": 1.2126, "step": 10010 }, { "epoch": 0.31952549507318473, "grad_norm": 0.19338785111904144, "learning_rate": 0.001, "loss": 1.1982, "step": 10020 }, { "epoch": 0.3198443827928186, "grad_norm": 0.19196036458015442, "learning_rate": 0.001, "loss": 1.2046, "step": 10030 }, { "epoch": 0.3201632705124526, "grad_norm": 0.1986386626958847, "learning_rate": 0.001, "loss": 1.2066, "step": 10040 }, { "epoch": 0.3204821582320865, "grad_norm": 0.20869721472263336, "learning_rate": 0.001, "loss": 1.226, "step": 10050 }, { "epoch": 0.3208010459517204, "grad_norm": 0.18814514577388763, "learning_rate": 0.001, "loss": 1.209, "step": 10060 }, { "epoch": 0.32111993367135433, "grad_norm": 0.19529877603054047, "learning_rate": 0.001, "loss": 1.1918, "step": 10070 }, { "epoch": 0.3214388213909882, "grad_norm": 0.1876852661371231, "learning_rate": 0.001, "loss": 1.2105, "step": 10080 }, { "epoch": 0.3217577091106221, "grad_norm": 0.18959972262382507, "learning_rate": 0.001, "loss": 1.205, "step": 10090 }, { "epoch": 0.3220765968302561, "grad_norm": 0.19066168367862701, "learning_rate": 0.001, "loss": 1.2123, "step": 10100 }, { "epoch": 0.32239548454989, "grad_norm": 0.18713495135307312, "learning_rate": 0.001, "loss": 1.2129, "step": 10110 }, { "epoch": 0.3227143722695239, "grad_norm": 0.18978673219680786, "learning_rate": 0.001, "loss": 1.185, "step": 10120 }, { "epoch": 0.32303325998915783, "grad_norm": 0.19576674699783325, "learning_rate": 0.001, "loss": 1.1905, "step": 10130 }, { "epoch": 0.3233521477087917, "grad_norm": 0.18363942205905914, "learning_rate": 0.001, "loss": 1.1981, "step": 10140 }, { "epoch": 0.3236710354284256, "grad_norm": 0.19066651165485382, "learning_rate": 0.001, "loss": 1.2113, "step": 10150 }, { "epoch": 0.3239899231480596, "grad_norm": 0.1898638755083084, "learning_rate": 0.001, "loss": 1.1983, "step": 10160 }, { "epoch": 0.3243088108676935, "grad_norm": 0.18936438858509064, "learning_rate": 0.001, "loss": 1.1957, "step": 10170 }, { "epoch": 0.32462769858732743, "grad_norm": 0.18695956468582153, "learning_rate": 0.001, "loss": 1.1932, "step": 10180 }, { "epoch": 0.3249465863069613, "grad_norm": 0.19916784763336182, "learning_rate": 0.001, "loss": 1.188, "step": 10190 }, { "epoch": 0.3252654740265952, "grad_norm": 0.1864575296640396, "learning_rate": 0.001, "loss": 1.1994, "step": 10200 }, { "epoch": 0.3255843617462292, "grad_norm": 0.18736214935779572, "learning_rate": 0.001, "loss": 1.197, "step": 10210 }, { "epoch": 0.3259032494658631, "grad_norm": 0.18586505949497223, "learning_rate": 0.001, "loss": 1.2005, "step": 10220 }, { "epoch": 0.326222137185497, "grad_norm": 0.18664199113845825, "learning_rate": 0.001, "loss": 1.2323, "step": 10230 }, { "epoch": 0.3265410249051309, "grad_norm": 0.18509739637374878, "learning_rate": 0.001, "loss": 1.2228, "step": 10240 }, { "epoch": 0.3268599126247648, "grad_norm": 0.18879742920398712, "learning_rate": 0.001, "loss": 1.18, "step": 10250 }, { "epoch": 0.3271788003443987, "grad_norm": 0.19517406821250916, "learning_rate": 0.001, "loss": 1.2042, "step": 10260 }, { "epoch": 0.3274976880640327, "grad_norm": 0.18386130034923553, "learning_rate": 0.001, "loss": 1.2029, "step": 10270 }, { "epoch": 0.3278165757836666, "grad_norm": 0.19367419183254242, "learning_rate": 0.001, "loss": 1.2048, "step": 10280 }, { "epoch": 0.3281354635033005, "grad_norm": 0.18701566755771637, "learning_rate": 0.001, "loss": 1.2081, "step": 10290 }, { "epoch": 0.3284543512229344, "grad_norm": 0.18644075095653534, "learning_rate": 0.001, "loss": 1.1982, "step": 10300 }, { "epoch": 0.3287732389425683, "grad_norm": 0.1888340711593628, "learning_rate": 0.001, "loss": 1.2174, "step": 10310 }, { "epoch": 0.3290921266622022, "grad_norm": 0.1851629763841629, "learning_rate": 0.001, "loss": 1.1892, "step": 10320 }, { "epoch": 0.3294110143818362, "grad_norm": 0.18563096225261688, "learning_rate": 0.001, "loss": 1.225, "step": 10330 }, { "epoch": 0.3297299021014701, "grad_norm": 0.18963338434696198, "learning_rate": 0.001, "loss": 1.2111, "step": 10340 }, { "epoch": 0.33004878982110397, "grad_norm": 0.19024492800235748, "learning_rate": 0.001, "loss": 1.1871, "step": 10350 }, { "epoch": 0.3303676775407379, "grad_norm": 0.19344636797904968, "learning_rate": 0.001, "loss": 1.1991, "step": 10360 }, { "epoch": 0.3306865652603718, "grad_norm": 0.18379245698451996, "learning_rate": 0.001, "loss": 1.2176, "step": 10370 }, { "epoch": 0.3310054529800057, "grad_norm": 0.18924225866794586, "learning_rate": 0.001, "loss": 1.1919, "step": 10380 }, { "epoch": 0.3313243406996397, "grad_norm": 0.18927711248397827, "learning_rate": 0.001, "loss": 1.1853, "step": 10390 }, { "epoch": 0.3316432284192736, "grad_norm": 0.18071341514587402, "learning_rate": 0.001, "loss": 1.2049, "step": 10400 }, { "epoch": 0.33196211613890747, "grad_norm": 0.18121443688869476, "learning_rate": 0.001, "loss": 1.185, "step": 10410 }, { "epoch": 0.3322810038585414, "grad_norm": 0.18824678659439087, "learning_rate": 0.001, "loss": 1.196, "step": 10420 }, { "epoch": 0.3325998915781753, "grad_norm": 0.181623637676239, "learning_rate": 0.001, "loss": 1.1843, "step": 10430 }, { "epoch": 0.3329187792978092, "grad_norm": 0.18745820224285126, "learning_rate": 0.001, "loss": 1.2181, "step": 10440 }, { "epoch": 0.3332376670174432, "grad_norm": 0.18517442047595978, "learning_rate": 0.001, "loss": 1.1942, "step": 10450 }, { "epoch": 0.33355655473707707, "grad_norm": 0.1849382370710373, "learning_rate": 0.001, "loss": 1.1822, "step": 10460 }, { "epoch": 0.33387544245671097, "grad_norm": 0.18831832706928253, "learning_rate": 0.001, "loss": 1.2021, "step": 10470 }, { "epoch": 0.3341943301763449, "grad_norm": 0.18308256566524506, "learning_rate": 0.001, "loss": 1.1916, "step": 10480 }, { "epoch": 0.3345132178959788, "grad_norm": 0.17930716276168823, "learning_rate": 0.001, "loss": 1.1848, "step": 10490 }, { "epoch": 0.3348321056156127, "grad_norm": 0.19042780995368958, "learning_rate": 0.001, "loss": 1.1918, "step": 10500 }, { "epoch": 0.33515099333524667, "grad_norm": 0.18928125500679016, "learning_rate": 0.001, "loss": 1.2009, "step": 10510 }, { "epoch": 0.33546988105488057, "grad_norm": 0.18939156830310822, "learning_rate": 0.001, "loss": 1.2123, "step": 10520 }, { "epoch": 0.33578876877451447, "grad_norm": 0.18722973763942719, "learning_rate": 0.001, "loss": 1.1999, "step": 10530 }, { "epoch": 0.3361076564941484, "grad_norm": 0.18385377526283264, "learning_rate": 0.001, "loss": 1.1748, "step": 10540 }, { "epoch": 0.3364265442137823, "grad_norm": 0.18305833637714386, "learning_rate": 0.001, "loss": 1.1826, "step": 10550 }, { "epoch": 0.3367454319334162, "grad_norm": 0.18611466884613037, "learning_rate": 0.001, "loss": 1.1928, "step": 10560 }, { "epoch": 0.33706431965305017, "grad_norm": 0.19067902863025665, "learning_rate": 0.001, "loss": 1.1842, "step": 10570 }, { "epoch": 0.33738320737268407, "grad_norm": 0.1923217922449112, "learning_rate": 0.001, "loss": 1.1901, "step": 10580 }, { "epoch": 0.337702095092318, "grad_norm": 0.18926678597927094, "learning_rate": 0.001, "loss": 1.1637, "step": 10590 }, { "epoch": 0.3380209828119519, "grad_norm": 0.19210688769817352, "learning_rate": 0.001, "loss": 1.1846, "step": 10600 }, { "epoch": 0.3383398705315858, "grad_norm": 0.18888874351978302, "learning_rate": 0.001, "loss": 1.1892, "step": 10610 }, { "epoch": 0.33865875825121977, "grad_norm": 0.1817292869091034, "learning_rate": 0.001, "loss": 1.2155, "step": 10620 }, { "epoch": 0.33897764597085367, "grad_norm": 0.1859046071767807, "learning_rate": 0.001, "loss": 1.1924, "step": 10630 }, { "epoch": 0.33929653369048757, "grad_norm": 0.19133812189102173, "learning_rate": 0.001, "loss": 1.1985, "step": 10640 }, { "epoch": 0.3396154214101215, "grad_norm": 0.18890315294265747, "learning_rate": 0.001, "loss": 1.1963, "step": 10650 }, { "epoch": 0.3399343091297554, "grad_norm": 0.1901807337999344, "learning_rate": 0.001, "loss": 1.1926, "step": 10660 }, { "epoch": 0.3402531968493893, "grad_norm": 0.17766164243221283, "learning_rate": 0.001, "loss": 1.2001, "step": 10670 }, { "epoch": 0.34057208456902327, "grad_norm": 0.18734650313854218, "learning_rate": 0.001, "loss": 1.2038, "step": 10680 }, { "epoch": 0.34089097228865717, "grad_norm": 0.1862167865037918, "learning_rate": 0.001, "loss": 1.1942, "step": 10690 }, { "epoch": 0.34120986000829107, "grad_norm": 0.18553335964679718, "learning_rate": 0.001, "loss": 1.1926, "step": 10700 }, { "epoch": 0.341528747727925, "grad_norm": 0.18530946969985962, "learning_rate": 0.001, "loss": 1.1901, "step": 10710 }, { "epoch": 0.3418476354475589, "grad_norm": 0.18436305224895477, "learning_rate": 0.001, "loss": 1.1963, "step": 10720 }, { "epoch": 0.3421665231671928, "grad_norm": 0.1863793581724167, "learning_rate": 0.001, "loss": 1.1721, "step": 10730 }, { "epoch": 0.34248541088682677, "grad_norm": 0.19209203124046326, "learning_rate": 0.001, "loss": 1.1999, "step": 10740 }, { "epoch": 0.34280429860646067, "grad_norm": 0.182088702917099, "learning_rate": 0.001, "loss": 1.1859, "step": 10750 }, { "epoch": 0.34312318632609456, "grad_norm": 0.18213383853435516, "learning_rate": 0.001, "loss": 1.1793, "step": 10760 }, { "epoch": 0.3434420740457285, "grad_norm": 0.18192435801029205, "learning_rate": 0.001, "loss": 1.1703, "step": 10770 }, { "epoch": 0.3437609617653624, "grad_norm": 0.1949847936630249, "learning_rate": 0.001, "loss": 1.1938, "step": 10780 }, { "epoch": 0.3440798494849963, "grad_norm": 0.18721050024032593, "learning_rate": 0.001, "loss": 1.1833, "step": 10790 }, { "epoch": 0.34439873720463027, "grad_norm": 0.18720798194408417, "learning_rate": 0.001, "loss": 1.194, "step": 10800 }, { "epoch": 0.34471762492426417, "grad_norm": 0.18595951795578003, "learning_rate": 0.001, "loss": 1.2108, "step": 10810 }, { "epoch": 0.34503651264389806, "grad_norm": 0.1828535795211792, "learning_rate": 0.001, "loss": 1.1993, "step": 10820 }, { "epoch": 0.345355400363532, "grad_norm": 0.181146040558815, "learning_rate": 0.001, "loss": 1.1879, "step": 10830 }, { "epoch": 0.3456742880831659, "grad_norm": 0.18184798955917358, "learning_rate": 0.001, "loss": 1.1761, "step": 10840 }, { "epoch": 0.3459931758027998, "grad_norm": 0.18391500413417816, "learning_rate": 0.001, "loss": 1.1768, "step": 10850 }, { "epoch": 0.34631206352243377, "grad_norm": 0.18120771646499634, "learning_rate": 0.001, "loss": 1.1782, "step": 10860 }, { "epoch": 0.34663095124206766, "grad_norm": 0.18713876605033875, "learning_rate": 0.001, "loss": 1.194, "step": 10870 }, { "epoch": 0.34694983896170156, "grad_norm": 0.18283255398273468, "learning_rate": 0.001, "loss": 1.2069, "step": 10880 }, { "epoch": 0.3472687266813355, "grad_norm": 0.1826285868883133, "learning_rate": 0.001, "loss": 1.1933, "step": 10890 }, { "epoch": 0.3475876144009694, "grad_norm": 0.19150082767009735, "learning_rate": 0.001, "loss": 1.199, "step": 10900 }, { "epoch": 0.3479065021206033, "grad_norm": 0.18925228714942932, "learning_rate": 0.001, "loss": 1.1748, "step": 10910 }, { "epoch": 0.34822538984023726, "grad_norm": 0.18681123852729797, "learning_rate": 0.001, "loss": 1.1938, "step": 10920 }, { "epoch": 0.34854427755987116, "grad_norm": 0.19334517419338226, "learning_rate": 0.001, "loss": 1.1997, "step": 10930 }, { "epoch": 0.34886316527950506, "grad_norm": 0.19396010041236877, "learning_rate": 0.001, "loss": 1.1772, "step": 10940 }, { "epoch": 0.349182052999139, "grad_norm": 0.18436571955680847, "learning_rate": 0.001, "loss": 1.2072, "step": 10950 }, { "epoch": 0.3495009407187729, "grad_norm": 0.18500304222106934, "learning_rate": 0.001, "loss": 1.1882, "step": 10960 }, { "epoch": 0.3498198284384068, "grad_norm": 0.18106575310230255, "learning_rate": 0.001, "loss": 1.1967, "step": 10970 }, { "epoch": 0.35013871615804076, "grad_norm": 0.18210440874099731, "learning_rate": 0.001, "loss": 1.1908, "step": 10980 }, { "epoch": 0.35045760387767466, "grad_norm": 0.1854126900434494, "learning_rate": 0.001, "loss": 1.1698, "step": 10990 }, { "epoch": 0.3507764915973086, "grad_norm": 0.18545810878276825, "learning_rate": 0.001, "loss": 1.185, "step": 11000 }, { "epoch": 0.3510953793169425, "grad_norm": 0.18830567598342896, "learning_rate": 0.001, "loss": 1.2, "step": 11010 }, { "epoch": 0.3514142670365764, "grad_norm": 0.18633617460727692, "learning_rate": 0.001, "loss": 1.1788, "step": 11020 }, { "epoch": 0.35173315475621036, "grad_norm": 0.1790749579668045, "learning_rate": 0.001, "loss": 1.1621, "step": 11030 }, { "epoch": 0.35205204247584426, "grad_norm": 0.18545784056186676, "learning_rate": 0.001, "loss": 1.1776, "step": 11040 }, { "epoch": 0.35237093019547816, "grad_norm": 0.18230289220809937, "learning_rate": 0.001, "loss": 1.1611, "step": 11050 }, { "epoch": 0.3526898179151121, "grad_norm": 0.1841791570186615, "learning_rate": 0.001, "loss": 1.1863, "step": 11060 }, { "epoch": 0.353008705634746, "grad_norm": 0.18114890158176422, "learning_rate": 0.001, "loss": 1.1791, "step": 11070 }, { "epoch": 0.3533275933543799, "grad_norm": 0.18278636038303375, "learning_rate": 0.001, "loss": 1.1774, "step": 11080 }, { "epoch": 0.35364648107401386, "grad_norm": 0.18547603487968445, "learning_rate": 0.001, "loss": 1.1663, "step": 11090 }, { "epoch": 0.35396536879364776, "grad_norm": 0.18347465991973877, "learning_rate": 0.001, "loss": 1.1656, "step": 11100 }, { "epoch": 0.35428425651328166, "grad_norm": 0.1870725154876709, "learning_rate": 0.001, "loss": 1.1898, "step": 11110 }, { "epoch": 0.3546031442329156, "grad_norm": 0.18504945933818817, "learning_rate": 0.001, "loss": 1.193, "step": 11120 }, { "epoch": 0.3549220319525495, "grad_norm": 0.185967355966568, "learning_rate": 0.001, "loss": 1.191, "step": 11130 }, { "epoch": 0.3552409196721834, "grad_norm": 0.18489550054073334, "learning_rate": 0.001, "loss": 1.1814, "step": 11140 }, { "epoch": 0.35555980739181736, "grad_norm": 0.18694624304771423, "learning_rate": 0.001, "loss": 1.1817, "step": 11150 }, { "epoch": 0.35587869511145126, "grad_norm": 0.1808748096227646, "learning_rate": 0.001, "loss": 1.1878, "step": 11160 }, { "epoch": 0.35619758283108516, "grad_norm": 0.17861339449882507, "learning_rate": 0.001, "loss": 1.1849, "step": 11170 }, { "epoch": 0.3565164705507191, "grad_norm": 0.19131644070148468, "learning_rate": 0.001, "loss": 1.1769, "step": 11180 }, { "epoch": 0.356835358270353, "grad_norm": 0.18798422813415527, "learning_rate": 0.001, "loss": 1.1763, "step": 11190 }, { "epoch": 0.3571542459899869, "grad_norm": 0.1764545887708664, "learning_rate": 0.001, "loss": 1.1528, "step": 11200 }, { "epoch": 0.35747313370962086, "grad_norm": 0.18498189747333527, "learning_rate": 0.001, "loss": 1.1927, "step": 11210 }, { "epoch": 0.35779202142925476, "grad_norm": 0.17672859132289886, "learning_rate": 0.001, "loss": 1.1782, "step": 11220 }, { "epoch": 0.35811090914888866, "grad_norm": 0.18820945918560028, "learning_rate": 0.001, "loss": 1.2013, "step": 11230 }, { "epoch": 0.3584297968685226, "grad_norm": 0.18688707053661346, "learning_rate": 0.001, "loss": 1.1976, "step": 11240 }, { "epoch": 0.3587486845881565, "grad_norm": 0.18575413525104523, "learning_rate": 0.001, "loss": 1.176, "step": 11250 }, { "epoch": 0.3590675723077904, "grad_norm": 0.18457651138305664, "learning_rate": 0.001, "loss": 1.1669, "step": 11260 }, { "epoch": 0.35938646002742436, "grad_norm": 0.1832694411277771, "learning_rate": 0.001, "loss": 1.192, "step": 11270 }, { "epoch": 0.35970534774705826, "grad_norm": 0.1787375658750534, "learning_rate": 0.001, "loss": 1.16, "step": 11280 }, { "epoch": 0.36002423546669216, "grad_norm": 0.19141420722007751, "learning_rate": 0.001, "loss": 1.171, "step": 11290 }, { "epoch": 0.3603431231863261, "grad_norm": 0.18656058609485626, "learning_rate": 0.001, "loss": 1.1834, "step": 11300 }, { "epoch": 0.36066201090596, "grad_norm": 0.17995838820934296, "learning_rate": 0.001, "loss": 1.1693, "step": 11310 }, { "epoch": 0.3609808986255939, "grad_norm": 0.1849241405725479, "learning_rate": 0.001, "loss": 1.1671, "step": 11320 }, { "epoch": 0.36129978634522786, "grad_norm": 0.18031972646713257, "learning_rate": 0.001, "loss": 1.1673, "step": 11330 }, { "epoch": 0.36161867406486176, "grad_norm": 0.17960664629936218, "learning_rate": 0.001, "loss": 1.1894, "step": 11340 }, { "epoch": 0.36193756178449565, "grad_norm": 0.18871818482875824, "learning_rate": 0.001, "loss": 1.1815, "step": 11350 }, { "epoch": 0.3622564495041296, "grad_norm": 0.1836932748556137, "learning_rate": 0.001, "loss": 1.1872, "step": 11360 }, { "epoch": 0.3625753372237635, "grad_norm": 0.18293382227420807, "learning_rate": 0.001, "loss": 1.1752, "step": 11370 }, { "epoch": 0.3628942249433974, "grad_norm": 0.1888611763715744, "learning_rate": 0.001, "loss": 1.1745, "step": 11380 }, { "epoch": 0.36321311266303136, "grad_norm": 0.18882402777671814, "learning_rate": 0.001, "loss": 1.1753, "step": 11390 }, { "epoch": 0.36353200038266525, "grad_norm": 0.18266329169273376, "learning_rate": 0.001, "loss": 1.1863, "step": 11400 }, { "epoch": 0.3638508881022992, "grad_norm": 0.18598082661628723, "learning_rate": 0.001, "loss": 1.1694, "step": 11410 }, { "epoch": 0.3641697758219331, "grad_norm": 0.18703070282936096, "learning_rate": 0.001, "loss": 1.1781, "step": 11420 }, { "epoch": 0.364488663541567, "grad_norm": 0.18162168562412262, "learning_rate": 0.001, "loss": 1.1567, "step": 11430 }, { "epoch": 0.36480755126120096, "grad_norm": 0.1802840381860733, "learning_rate": 0.001, "loss": 1.1776, "step": 11440 }, { "epoch": 0.36512643898083486, "grad_norm": 0.18248935043811798, "learning_rate": 0.001, "loss": 1.1772, "step": 11450 }, { "epoch": 0.36544532670046875, "grad_norm": 0.17799849808216095, "learning_rate": 0.001, "loss": 1.1591, "step": 11460 }, { "epoch": 0.3657642144201027, "grad_norm": 0.18336179852485657, "learning_rate": 0.001, "loss": 1.1777, "step": 11470 }, { "epoch": 0.3660831021397366, "grad_norm": 0.18439556658267975, "learning_rate": 0.001, "loss": 1.1685, "step": 11480 }, { "epoch": 0.3664019898593705, "grad_norm": 0.1861710548400879, "learning_rate": 0.001, "loss": 1.1785, "step": 11490 }, { "epoch": 0.36672087757900446, "grad_norm": 0.1906169354915619, "learning_rate": 0.001, "loss": 1.1917, "step": 11500 }, { "epoch": 0.36703976529863835, "grad_norm": 0.17864377796649933, "learning_rate": 0.001, "loss": 1.1622, "step": 11510 }, { "epoch": 0.36735865301827225, "grad_norm": 0.18871931731700897, "learning_rate": 0.001, "loss": 1.189, "step": 11520 }, { "epoch": 0.3676775407379062, "grad_norm": 0.1843557208776474, "learning_rate": 0.001, "loss": 1.1632, "step": 11530 }, { "epoch": 0.3679964284575401, "grad_norm": 0.1822815090417862, "learning_rate": 0.001, "loss": 1.1522, "step": 11540 }, { "epoch": 0.368315316177174, "grad_norm": 0.18551307916641235, "learning_rate": 0.001, "loss": 1.1796, "step": 11550 }, { "epoch": 0.36863420389680795, "grad_norm": 0.1817987561225891, "learning_rate": 0.001, "loss": 1.1682, "step": 11560 }, { "epoch": 0.36895309161644185, "grad_norm": 0.1884773075580597, "learning_rate": 0.001, "loss": 1.1818, "step": 11570 }, { "epoch": 0.36927197933607575, "grad_norm": 0.18173812329769135, "learning_rate": 0.001, "loss": 1.1507, "step": 11580 }, { "epoch": 0.3695908670557097, "grad_norm": 0.1824948489665985, "learning_rate": 0.001, "loss": 1.1586, "step": 11590 }, { "epoch": 0.3699097547753436, "grad_norm": 0.1829361766576767, "learning_rate": 0.001, "loss": 1.1953, "step": 11600 }, { "epoch": 0.3702286424949775, "grad_norm": 0.18169468641281128, "learning_rate": 0.001, "loss": 1.1786, "step": 11610 }, { "epoch": 0.37054753021461145, "grad_norm": 0.18068274855613708, "learning_rate": 0.001, "loss": 1.1862, "step": 11620 }, { "epoch": 0.37086641793424535, "grad_norm": 0.1841064840555191, "learning_rate": 0.001, "loss": 1.1768, "step": 11630 }, { "epoch": 0.37118530565387925, "grad_norm": 0.18492387235164642, "learning_rate": 0.001, "loss": 1.198, "step": 11640 }, { "epoch": 0.3715041933735132, "grad_norm": 0.17407332360744476, "learning_rate": 0.001, "loss": 1.1688, "step": 11650 }, { "epoch": 0.3718230810931471, "grad_norm": 0.18511362373828888, "learning_rate": 0.001, "loss": 1.1905, "step": 11660 }, { "epoch": 0.372141968812781, "grad_norm": 0.18631397187709808, "learning_rate": 0.001, "loss": 1.1934, "step": 11670 }, { "epoch": 0.37246085653241495, "grad_norm": 0.18116828799247742, "learning_rate": 0.001, "loss": 1.1727, "step": 11680 }, { "epoch": 0.37277974425204885, "grad_norm": 0.18609020113945007, "learning_rate": 0.001, "loss": 1.1718, "step": 11690 }, { "epoch": 0.37309863197168275, "grad_norm": 0.1801086664199829, "learning_rate": 0.001, "loss": 1.1578, "step": 11700 }, { "epoch": 0.3734175196913167, "grad_norm": 0.17963343858718872, "learning_rate": 0.001, "loss": 1.1743, "step": 11710 }, { "epoch": 0.3737364074109506, "grad_norm": 0.17758981883525848, "learning_rate": 0.001, "loss": 1.1653, "step": 11720 }, { "epoch": 0.3740552951305845, "grad_norm": 0.18511025607585907, "learning_rate": 0.001, "loss": 1.1803, "step": 11730 }, { "epoch": 0.37437418285021845, "grad_norm": 0.18526363372802734, "learning_rate": 0.001, "loss": 1.1813, "step": 11740 }, { "epoch": 0.37469307056985235, "grad_norm": 0.17796054482460022, "learning_rate": 0.001, "loss": 1.1909, "step": 11750 }, { "epoch": 0.37501195828948625, "grad_norm": 0.18078628182411194, "learning_rate": 0.001, "loss": 1.1823, "step": 11760 }, { "epoch": 0.3753308460091202, "grad_norm": 0.18209408223628998, "learning_rate": 0.001, "loss": 1.1798, "step": 11770 }, { "epoch": 0.3756497337287541, "grad_norm": 0.18052178621292114, "learning_rate": 0.001, "loss": 1.1704, "step": 11780 }, { "epoch": 0.375968621448388, "grad_norm": 0.18084560334682465, "learning_rate": 0.001, "loss": 1.1969, "step": 11790 }, { "epoch": 0.37628750916802195, "grad_norm": 0.17806032299995422, "learning_rate": 0.001, "loss": 1.1739, "step": 11800 }, { "epoch": 0.37660639688765585, "grad_norm": 0.1794821321964264, "learning_rate": 0.001, "loss": 1.1806, "step": 11810 }, { "epoch": 0.37692528460728975, "grad_norm": 0.1821807473897934, "learning_rate": 0.001, "loss": 1.1794, "step": 11820 }, { "epoch": 0.3772441723269237, "grad_norm": 0.18536563217639923, "learning_rate": 0.001, "loss": 1.1574, "step": 11830 }, { "epoch": 0.3775630600465576, "grad_norm": 0.17974741756916046, "learning_rate": 0.001, "loss": 1.1795, "step": 11840 }, { "epoch": 0.37788194776619155, "grad_norm": 0.1832747459411621, "learning_rate": 0.001, "loss": 1.175, "step": 11850 }, { "epoch": 0.37820083548582545, "grad_norm": 0.17949554324150085, "learning_rate": 0.001, "loss": 1.1824, "step": 11860 }, { "epoch": 0.37851972320545935, "grad_norm": 0.17655141651630402, "learning_rate": 0.001, "loss": 1.178, "step": 11870 }, { "epoch": 0.3788386109250933, "grad_norm": 0.17882749438285828, "learning_rate": 0.001, "loss": 1.1756, "step": 11880 }, { "epoch": 0.3791574986447272, "grad_norm": 0.17046712338924408, "learning_rate": 0.001, "loss": 1.1677, "step": 11890 }, { "epoch": 0.3794763863643611, "grad_norm": 0.18203288316726685, "learning_rate": 0.001, "loss": 1.1608, "step": 11900 }, { "epoch": 0.37979527408399505, "grad_norm": 0.18050289154052734, "learning_rate": 0.001, "loss": 1.1797, "step": 11910 }, { "epoch": 0.38011416180362895, "grad_norm": 0.17975153028964996, "learning_rate": 0.001, "loss": 1.1846, "step": 11920 }, { "epoch": 0.38043304952326285, "grad_norm": 0.17785479128360748, "learning_rate": 0.001, "loss": 1.1494, "step": 11930 }, { "epoch": 0.3807519372428968, "grad_norm": 0.18440751731395721, "learning_rate": 0.001, "loss": 1.1433, "step": 11940 }, { "epoch": 0.3810708249625307, "grad_norm": 0.17766568064689636, "learning_rate": 0.001, "loss": 1.1682, "step": 11950 }, { "epoch": 0.3813897126821646, "grad_norm": 0.17816171050071716, "learning_rate": 0.001, "loss": 1.1793, "step": 11960 }, { "epoch": 0.38170860040179855, "grad_norm": 0.1773807853460312, "learning_rate": 0.001, "loss": 1.1751, "step": 11970 }, { "epoch": 0.38202748812143245, "grad_norm": 0.17774201929569244, "learning_rate": 0.001, "loss": 1.1522, "step": 11980 }, { "epoch": 0.38234637584106634, "grad_norm": 0.184428870677948, "learning_rate": 0.001, "loss": 1.1644, "step": 11990 }, { "epoch": 0.3826652635607003, "grad_norm": 0.18758635222911835, "learning_rate": 0.001, "loss": 1.1649, "step": 12000 }, { "epoch": 0.3829841512803342, "grad_norm": 0.18008406460285187, "learning_rate": 0.001, "loss": 1.1769, "step": 12010 }, { "epoch": 0.3833030389999681, "grad_norm": 0.18150904774665833, "learning_rate": 0.001, "loss": 1.1926, "step": 12020 }, { "epoch": 0.38362192671960205, "grad_norm": 0.18088509142398834, "learning_rate": 0.001, "loss": 1.1672, "step": 12030 }, { "epoch": 0.38394081443923594, "grad_norm": 0.17663919925689697, "learning_rate": 0.001, "loss": 1.1621, "step": 12040 }, { "epoch": 0.38425970215886984, "grad_norm": 0.17578722536563873, "learning_rate": 0.001, "loss": 1.1701, "step": 12050 }, { "epoch": 0.3845785898785038, "grad_norm": 0.17906726896762848, "learning_rate": 0.001, "loss": 1.1585, "step": 12060 }, { "epoch": 0.3848974775981377, "grad_norm": 0.17861928045749664, "learning_rate": 0.001, "loss": 1.1749, "step": 12070 }, { "epoch": 0.3852163653177716, "grad_norm": 0.18296191096305847, "learning_rate": 0.001, "loss": 1.1684, "step": 12080 }, { "epoch": 0.38553525303740555, "grad_norm": 0.18413016200065613, "learning_rate": 0.001, "loss": 1.1622, "step": 12090 }, { "epoch": 0.38585414075703944, "grad_norm": 0.18453866243362427, "learning_rate": 0.001, "loss": 1.169, "step": 12100 }, { "epoch": 0.38617302847667334, "grad_norm": 0.1800256073474884, "learning_rate": 0.001, "loss": 1.1711, "step": 12110 }, { "epoch": 0.3864919161963073, "grad_norm": 0.18271325528621674, "learning_rate": 0.001, "loss": 1.1779, "step": 12120 }, { "epoch": 0.3868108039159412, "grad_norm": 0.18513017892837524, "learning_rate": 0.001, "loss": 1.1789, "step": 12130 }, { "epoch": 0.3871296916355751, "grad_norm": 0.17271831631660461, "learning_rate": 0.001, "loss": 1.1441, "step": 12140 }, { "epoch": 0.38744857935520904, "grad_norm": 0.17758046090602875, "learning_rate": 0.001, "loss": 1.1834, "step": 12150 }, { "epoch": 0.38776746707484294, "grad_norm": 0.17706969380378723, "learning_rate": 0.001, "loss": 1.1809, "step": 12160 }, { "epoch": 0.38808635479447684, "grad_norm": 0.18004131317138672, "learning_rate": 0.001, "loss": 1.1454, "step": 12170 }, { "epoch": 0.3884052425141108, "grad_norm": 0.17989245057106018, "learning_rate": 0.001, "loss": 1.1868, "step": 12180 }, { "epoch": 0.3887241302337447, "grad_norm": 0.18094083666801453, "learning_rate": 0.001, "loss": 1.1606, "step": 12190 }, { "epoch": 0.3890430179533786, "grad_norm": 0.1783522516489029, "learning_rate": 0.001, "loss": 1.1639, "step": 12200 }, { "epoch": 0.38936190567301254, "grad_norm": 0.18109023571014404, "learning_rate": 0.001, "loss": 1.168, "step": 12210 }, { "epoch": 0.38968079339264644, "grad_norm": 0.17977750301361084, "learning_rate": 0.001, "loss": 1.1695, "step": 12220 }, { "epoch": 0.38999968111228034, "grad_norm": 0.17573317885398865, "learning_rate": 0.001, "loss": 1.1569, "step": 12230 }, { "epoch": 0.3903185688319143, "grad_norm": 0.17866761982440948, "learning_rate": 0.001, "loss": 1.1573, "step": 12240 }, { "epoch": 0.3906374565515482, "grad_norm": 0.1950611174106598, "learning_rate": 0.001, "loss": 1.1459, "step": 12250 }, { "epoch": 0.39095634427118214, "grad_norm": 0.18050596117973328, "learning_rate": 0.001, "loss": 1.1614, "step": 12260 }, { "epoch": 0.39127523199081604, "grad_norm": 0.17666298151016235, "learning_rate": 0.001, "loss": 1.1762, "step": 12270 }, { "epoch": 0.39159411971044994, "grad_norm": 0.18661339581012726, "learning_rate": 0.001, "loss": 1.1631, "step": 12280 }, { "epoch": 0.3919130074300839, "grad_norm": 0.18043366074562073, "learning_rate": 0.001, "loss": 1.1614, "step": 12290 }, { "epoch": 0.3922318951497178, "grad_norm": 0.1795511692762375, "learning_rate": 0.001, "loss": 1.18, "step": 12300 }, { "epoch": 0.3925507828693517, "grad_norm": 0.18567945063114166, "learning_rate": 0.001, "loss": 1.1677, "step": 12310 }, { "epoch": 0.39286967058898564, "grad_norm": 0.18031778931617737, "learning_rate": 0.001, "loss": 1.1733, "step": 12320 }, { "epoch": 0.39318855830861954, "grad_norm": 0.1776386946439743, "learning_rate": 0.001, "loss": 1.1709, "step": 12330 }, { "epoch": 0.39350744602825344, "grad_norm": 0.18277223408222198, "learning_rate": 0.001, "loss": 1.1522, "step": 12340 }, { "epoch": 0.3938263337478874, "grad_norm": 0.1731247454881668, "learning_rate": 0.001, "loss": 1.1509, "step": 12350 }, { "epoch": 0.3941452214675213, "grad_norm": 0.18343017995357513, "learning_rate": 0.001, "loss": 1.1909, "step": 12360 }, { "epoch": 0.3944641091871552, "grad_norm": 0.1750156581401825, "learning_rate": 0.001, "loss": 1.1576, "step": 12370 }, { "epoch": 0.39478299690678914, "grad_norm": 0.18162328004837036, "learning_rate": 0.001, "loss": 1.1551, "step": 12380 }, { "epoch": 0.39510188462642304, "grad_norm": 0.18877561390399933, "learning_rate": 0.001, "loss": 1.1558, "step": 12390 }, { "epoch": 0.39542077234605694, "grad_norm": 0.18337975442409515, "learning_rate": 0.001, "loss": 1.1701, "step": 12400 }, { "epoch": 0.3957396600656909, "grad_norm": 0.18324777483940125, "learning_rate": 0.001, "loss": 1.155, "step": 12410 }, { "epoch": 0.3960585477853248, "grad_norm": 0.18144458532333374, "learning_rate": 0.001, "loss": 1.1681, "step": 12420 }, { "epoch": 0.3963774355049587, "grad_norm": 0.18112270534038544, "learning_rate": 0.001, "loss": 1.171, "step": 12430 }, { "epoch": 0.39669632322459264, "grad_norm": 0.18132545053958893, "learning_rate": 0.001, "loss": 1.1454, "step": 12440 }, { "epoch": 0.39701521094422654, "grad_norm": 0.18609032034873962, "learning_rate": 0.001, "loss": 1.169, "step": 12450 }, { "epoch": 0.39733409866386044, "grad_norm": 0.17792826890945435, "learning_rate": 0.001, "loss": 1.1535, "step": 12460 }, { "epoch": 0.3976529863834944, "grad_norm": 0.17896825075149536, "learning_rate": 0.001, "loss": 1.1752, "step": 12470 }, { "epoch": 0.3979718741031283, "grad_norm": 0.18214313685894012, "learning_rate": 0.001, "loss": 1.1752, "step": 12480 }, { "epoch": 0.3982907618227622, "grad_norm": 0.17771407961845398, "learning_rate": 0.001, "loss": 1.1647, "step": 12490 }, { "epoch": 0.39860964954239614, "grad_norm": 0.17843423783779144, "learning_rate": 0.001, "loss": 1.1795, "step": 12500 }, { "epoch": 0.39892853726203004, "grad_norm": 0.17646920680999756, "learning_rate": 0.001, "loss": 1.1538, "step": 12510 }, { "epoch": 0.39924742498166393, "grad_norm": 0.1750120222568512, "learning_rate": 0.001, "loss": 1.157, "step": 12520 }, { "epoch": 0.3995663127012979, "grad_norm": 0.17426247894763947, "learning_rate": 0.001, "loss": 1.1755, "step": 12530 }, { "epoch": 0.3998852004209318, "grad_norm": 0.17977719008922577, "learning_rate": 0.001, "loss": 1.162, "step": 12540 }, { "epoch": 0.4002040881405657, "grad_norm": 0.1777566522359848, "learning_rate": 0.001, "loss": 1.1392, "step": 12550 }, { "epoch": 0.40052297586019964, "grad_norm": 0.17475956678390503, "learning_rate": 0.001, "loss": 1.1584, "step": 12560 }, { "epoch": 0.40084186357983353, "grad_norm": 0.1797400861978531, "learning_rate": 0.001, "loss": 1.1841, "step": 12570 }, { "epoch": 0.40116075129946743, "grad_norm": 0.18256771564483643, "learning_rate": 0.001, "loss": 1.1384, "step": 12580 }, { "epoch": 0.4014796390191014, "grad_norm": 0.1778700202703476, "learning_rate": 0.001, "loss": 1.1823, "step": 12590 }, { "epoch": 0.4017985267387353, "grad_norm": 0.17812106013298035, "learning_rate": 0.001, "loss": 1.1546, "step": 12600 }, { "epoch": 0.4021174144583692, "grad_norm": 0.17732104659080505, "learning_rate": 0.001, "loss": 1.1618, "step": 12610 }, { "epoch": 0.40243630217800314, "grad_norm": 0.1758512556552887, "learning_rate": 0.001, "loss": 1.1683, "step": 12620 }, { "epoch": 0.40275518989763703, "grad_norm": 0.17862358689308167, "learning_rate": 0.001, "loss": 1.1444, "step": 12630 }, { "epoch": 0.40307407761727093, "grad_norm": 0.17952404916286469, "learning_rate": 0.001, "loss": 1.1415, "step": 12640 }, { "epoch": 0.4033929653369049, "grad_norm": 0.17920245230197906, "learning_rate": 0.001, "loss": 1.1672, "step": 12650 }, { "epoch": 0.4037118530565388, "grad_norm": 0.18261930346488953, "learning_rate": 0.001, "loss": 1.1579, "step": 12660 }, { "epoch": 0.40403074077617274, "grad_norm": 0.17810742557048798, "learning_rate": 0.001, "loss": 1.1572, "step": 12670 }, { "epoch": 0.40434962849580663, "grad_norm": 0.18469861149787903, "learning_rate": 0.001, "loss": 1.1627, "step": 12680 }, { "epoch": 0.40466851621544053, "grad_norm": 0.1746484488248825, "learning_rate": 0.001, "loss": 1.15, "step": 12690 }, { "epoch": 0.4049874039350745, "grad_norm": 0.1788916438817978, "learning_rate": 0.001, "loss": 1.1631, "step": 12700 }, { "epoch": 0.4053062916547084, "grad_norm": 0.17759430408477783, "learning_rate": 0.001, "loss": 1.148, "step": 12710 }, { "epoch": 0.4056251793743423, "grad_norm": 0.18186287581920624, "learning_rate": 0.001, "loss": 1.1633, "step": 12720 }, { "epoch": 0.40594406709397624, "grad_norm": 0.18196360766887665, "learning_rate": 0.001, "loss": 1.1485, "step": 12730 }, { "epoch": 0.40626295481361013, "grad_norm": 0.1794370859861374, "learning_rate": 0.001, "loss": 1.1638, "step": 12740 }, { "epoch": 0.40658184253324403, "grad_norm": 0.17594540119171143, "learning_rate": 0.001, "loss": 1.1521, "step": 12750 }, { "epoch": 0.406900730252878, "grad_norm": 0.1787782609462738, "learning_rate": 0.001, "loss": 1.1568, "step": 12760 }, { "epoch": 0.4072196179725119, "grad_norm": 0.1763012856245041, "learning_rate": 0.001, "loss": 1.1754, "step": 12770 }, { "epoch": 0.4075385056921458, "grad_norm": 0.18189765512943268, "learning_rate": 0.001, "loss": 1.1529, "step": 12780 }, { "epoch": 0.40785739341177973, "grad_norm": 0.18166020512580872, "learning_rate": 0.001, "loss": 1.1541, "step": 12790 }, { "epoch": 0.40817628113141363, "grad_norm": 0.1810879111289978, "learning_rate": 0.001, "loss": 1.1717, "step": 12800 }, { "epoch": 0.40849516885104753, "grad_norm": 0.17724236845970154, "learning_rate": 0.001, "loss": 1.1492, "step": 12810 }, { "epoch": 0.4088140565706815, "grad_norm": 0.18092194199562073, "learning_rate": 0.001, "loss": 1.1522, "step": 12820 }, { "epoch": 0.4091329442903154, "grad_norm": 0.17586788535118103, "learning_rate": 0.001, "loss": 1.1487, "step": 12830 }, { "epoch": 0.4094518320099493, "grad_norm": 0.17758262157440186, "learning_rate": 0.001, "loss": 1.1283, "step": 12840 }, { "epoch": 0.40977071972958323, "grad_norm": 0.1868557333946228, "learning_rate": 0.001, "loss": 1.1606, "step": 12850 }, { "epoch": 0.41008960744921713, "grad_norm": 0.1756649762392044, "learning_rate": 0.001, "loss": 1.1413, "step": 12860 }, { "epoch": 0.41040849516885103, "grad_norm": 0.1724313199520111, "learning_rate": 0.001, "loss": 1.1465, "step": 12870 }, { "epoch": 0.410727382888485, "grad_norm": 0.17643587291240692, "learning_rate": 0.001, "loss": 1.1592, "step": 12880 }, { "epoch": 0.4110462706081189, "grad_norm": 0.18537591397762299, "learning_rate": 0.001, "loss": 1.1638, "step": 12890 }, { "epoch": 0.4113651583277528, "grad_norm": 0.1723945140838623, "learning_rate": 0.001, "loss": 1.1469, "step": 12900 }, { "epoch": 0.41168404604738673, "grad_norm": 0.1771482527256012, "learning_rate": 0.001, "loss": 1.1432, "step": 12910 }, { "epoch": 0.41200293376702063, "grad_norm": 0.1836037039756775, "learning_rate": 0.001, "loss": 1.1522, "step": 12920 }, { "epoch": 0.4123218214866545, "grad_norm": 0.1788443922996521, "learning_rate": 0.001, "loss": 1.1467, "step": 12930 }, { "epoch": 0.4126407092062885, "grad_norm": 0.18017710745334625, "learning_rate": 0.001, "loss": 1.1655, "step": 12940 }, { "epoch": 0.4129595969259224, "grad_norm": 0.18141914904117584, "learning_rate": 0.001, "loss": 1.1614, "step": 12950 }, { "epoch": 0.4132784846455563, "grad_norm": 0.1714852750301361, "learning_rate": 0.001, "loss": 1.1592, "step": 12960 }, { "epoch": 0.41359737236519023, "grad_norm": 0.18428003787994385, "learning_rate": 0.001, "loss": 1.1645, "step": 12970 }, { "epoch": 0.41391626008482413, "grad_norm": 0.18629293143749237, "learning_rate": 0.001, "loss": 1.1676, "step": 12980 }, { "epoch": 0.414235147804458, "grad_norm": 0.18700791895389557, "learning_rate": 0.001, "loss": 1.1527, "step": 12990 }, { "epoch": 0.414554035524092, "grad_norm": 0.17667566239833832, "learning_rate": 0.001, "loss": 1.154, "step": 13000 }, { "epoch": 0.4148729232437259, "grad_norm": 0.17916272580623627, "learning_rate": 0.001, "loss": 1.1339, "step": 13010 }, { "epoch": 0.4151918109633598, "grad_norm": 0.18255506455898285, "learning_rate": 0.001, "loss": 1.171, "step": 13020 }, { "epoch": 0.41551069868299373, "grad_norm": 0.17822222411632538, "learning_rate": 0.001, "loss": 1.1656, "step": 13030 }, { "epoch": 0.4158295864026276, "grad_norm": 0.1775846928358078, "learning_rate": 0.001, "loss": 1.1466, "step": 13040 }, { "epoch": 0.4161484741222615, "grad_norm": 0.17505867779254913, "learning_rate": 0.001, "loss": 1.1528, "step": 13050 }, { "epoch": 0.4164673618418955, "grad_norm": 0.18390125036239624, "learning_rate": 0.001, "loss": 1.1696, "step": 13060 }, { "epoch": 0.4167862495615294, "grad_norm": 0.17525775730609894, "learning_rate": 0.001, "loss": 1.153, "step": 13070 }, { "epoch": 0.41710513728116333, "grad_norm": 0.17498883605003357, "learning_rate": 0.001, "loss": 1.1583, "step": 13080 }, { "epoch": 0.4174240250007972, "grad_norm": 0.18329474329948425, "learning_rate": 0.001, "loss": 1.1583, "step": 13090 }, { "epoch": 0.4177429127204311, "grad_norm": 0.17313571274280548, "learning_rate": 0.001, "loss": 1.1477, "step": 13100 }, { "epoch": 0.4180618004400651, "grad_norm": 0.1746223270893097, "learning_rate": 0.001, "loss": 1.1566, "step": 13110 }, { "epoch": 0.418380688159699, "grad_norm": 0.18266509473323822, "learning_rate": 0.001, "loss": 1.1692, "step": 13120 }, { "epoch": 0.4186995758793329, "grad_norm": 0.17773644626140594, "learning_rate": 0.001, "loss": 1.1571, "step": 13130 }, { "epoch": 0.41901846359896683, "grad_norm": 0.1756133735179901, "learning_rate": 0.001, "loss": 1.1497, "step": 13140 }, { "epoch": 0.4193373513186007, "grad_norm": 0.17332470417022705, "learning_rate": 0.001, "loss": 1.163, "step": 13150 }, { "epoch": 0.4196562390382346, "grad_norm": 0.17600354552268982, "learning_rate": 0.001, "loss": 1.1471, "step": 13160 }, { "epoch": 0.4199751267578686, "grad_norm": 0.17582380771636963, "learning_rate": 0.001, "loss": 1.1549, "step": 13170 }, { "epoch": 0.4202940144775025, "grad_norm": 0.1780974119901657, "learning_rate": 0.001, "loss": 1.1389, "step": 13180 }, { "epoch": 0.4206129021971364, "grad_norm": 0.18114261329174042, "learning_rate": 0.001, "loss": 1.1524, "step": 13190 }, { "epoch": 0.4209317899167703, "grad_norm": 0.1800422966480255, "learning_rate": 0.001, "loss": 1.1694, "step": 13200 }, { "epoch": 0.4212506776364042, "grad_norm": 0.17944826185703278, "learning_rate": 0.001, "loss": 1.1596, "step": 13210 }, { "epoch": 0.4215695653560381, "grad_norm": 0.17792369425296783, "learning_rate": 0.001, "loss": 1.1504, "step": 13220 }, { "epoch": 0.4218884530756721, "grad_norm": 0.17227941751480103, "learning_rate": 0.001, "loss": 1.1581, "step": 13230 }, { "epoch": 0.422207340795306, "grad_norm": 0.18069201707839966, "learning_rate": 0.001, "loss": 1.1401, "step": 13240 }, { "epoch": 0.42252622851493987, "grad_norm": 0.17398959398269653, "learning_rate": 0.001, "loss": 1.1329, "step": 13250 }, { "epoch": 0.4228451162345738, "grad_norm": 0.17893588542938232, "learning_rate": 0.001, "loss": 1.1442, "step": 13260 }, { "epoch": 0.4231640039542077, "grad_norm": 0.18032392859458923, "learning_rate": 0.001, "loss": 1.1578, "step": 13270 }, { "epoch": 0.4234828916738416, "grad_norm": 0.17575973272323608, "learning_rate": 0.001, "loss": 1.1347, "step": 13280 }, { "epoch": 0.4238017793934756, "grad_norm": 0.1713789403438568, "learning_rate": 0.001, "loss": 1.1405, "step": 13290 }, { "epoch": 0.4241206671131095, "grad_norm": 0.17849451303482056, "learning_rate": 0.001, "loss": 1.1668, "step": 13300 }, { "epoch": 0.42443955483274337, "grad_norm": 0.17444877326488495, "learning_rate": 0.001, "loss": 1.1665, "step": 13310 }, { "epoch": 0.4247584425523773, "grad_norm": 0.1770418882369995, "learning_rate": 0.001, "loss": 1.1431, "step": 13320 }, { "epoch": 0.4250773302720112, "grad_norm": 0.17722870409488678, "learning_rate": 0.001, "loss": 1.152, "step": 13330 }, { "epoch": 0.4253962179916451, "grad_norm": 0.17850705981254578, "learning_rate": 0.001, "loss": 1.1465, "step": 13340 }, { "epoch": 0.4257151057112791, "grad_norm": 0.18088005483150482, "learning_rate": 0.001, "loss": 1.1511, "step": 13350 }, { "epoch": 0.42603399343091297, "grad_norm": 0.18208032846450806, "learning_rate": 0.001, "loss": 1.1367, "step": 13360 }, { "epoch": 0.42635288115054687, "grad_norm": 0.18329893052577972, "learning_rate": 0.001, "loss": 1.1551, "step": 13370 }, { "epoch": 0.4266717688701808, "grad_norm": 0.17308764159679413, "learning_rate": 0.001, "loss": 1.1335, "step": 13380 }, { "epoch": 0.4269906565898147, "grad_norm": 0.17596715688705444, "learning_rate": 0.001, "loss": 1.1395, "step": 13390 }, { "epoch": 0.4273095443094486, "grad_norm": 0.17568935453891754, "learning_rate": 0.001, "loss": 1.1381, "step": 13400 }, { "epoch": 0.4276284320290826, "grad_norm": 0.1743965744972229, "learning_rate": 0.001, "loss": 1.1283, "step": 13410 }, { "epoch": 0.42794731974871647, "grad_norm": 0.18175481259822845, "learning_rate": 0.001, "loss": 1.1598, "step": 13420 }, { "epoch": 0.42826620746835037, "grad_norm": 0.17291270196437836, "learning_rate": 0.001, "loss": 1.1515, "step": 13430 }, { "epoch": 0.4285850951879843, "grad_norm": 0.18205703794956207, "learning_rate": 0.001, "loss": 1.1602, "step": 13440 }, { "epoch": 0.4289039829076182, "grad_norm": 0.1734030842781067, "learning_rate": 0.001, "loss": 1.1451, "step": 13450 }, { "epoch": 0.4292228706272521, "grad_norm": 0.17287306487560272, "learning_rate": 0.001, "loss": 1.148, "step": 13460 }, { "epoch": 0.42954175834688607, "grad_norm": 0.1736667901277542, "learning_rate": 0.001, "loss": 1.15, "step": 13470 }, { "epoch": 0.42986064606651997, "grad_norm": 0.17618460953235626, "learning_rate": 0.001, "loss": 1.165, "step": 13480 }, { "epoch": 0.4301795337861539, "grad_norm": 0.16635005176067352, "learning_rate": 0.001, "loss": 1.1414, "step": 13490 }, { "epoch": 0.4304984215057878, "grad_norm": 0.17537757754325867, "learning_rate": 0.001, "loss": 1.1448, "step": 13500 }, { "epoch": 0.4308173092254217, "grad_norm": 0.1771899312734604, "learning_rate": 0.001, "loss": 1.1423, "step": 13510 }, { "epoch": 0.43113619694505567, "grad_norm": 0.17241476476192474, "learning_rate": 0.001, "loss": 1.1694, "step": 13520 }, { "epoch": 0.43145508466468957, "grad_norm": 0.17704904079437256, "learning_rate": 0.001, "loss": 1.1405, "step": 13530 }, { "epoch": 0.43177397238432347, "grad_norm": 0.17959144711494446, "learning_rate": 0.001, "loss": 1.15, "step": 13540 }, { "epoch": 0.4320928601039574, "grad_norm": 0.17902454733848572, "learning_rate": 0.001, "loss": 1.1579, "step": 13550 }, { "epoch": 0.4324117478235913, "grad_norm": 0.1775130033493042, "learning_rate": 0.001, "loss": 1.1479, "step": 13560 }, { "epoch": 0.4327306355432252, "grad_norm": 0.17502056062221527, "learning_rate": 0.001, "loss": 1.1513, "step": 13570 }, { "epoch": 0.43304952326285917, "grad_norm": 0.18220333755016327, "learning_rate": 0.001, "loss": 1.1607, "step": 13580 }, { "epoch": 0.43336841098249307, "grad_norm": 0.1754351109266281, "learning_rate": 0.001, "loss": 1.1423, "step": 13590 }, { "epoch": 0.43368729870212697, "grad_norm": 0.17384397983551025, "learning_rate": 0.001, "loss": 1.1395, "step": 13600 }, { "epoch": 0.4340061864217609, "grad_norm": 0.17870260775089264, "learning_rate": 0.001, "loss": 1.1468, "step": 13610 }, { "epoch": 0.4343250741413948, "grad_norm": 0.1795383095741272, "learning_rate": 0.001, "loss": 1.1653, "step": 13620 }, { "epoch": 0.4346439618610287, "grad_norm": 0.1721075177192688, "learning_rate": 0.001, "loss": 1.1291, "step": 13630 }, { "epoch": 0.43496284958066267, "grad_norm": 0.17633718252182007, "learning_rate": 0.001, "loss": 1.154, "step": 13640 }, { "epoch": 0.43528173730029657, "grad_norm": 0.17714627087116241, "learning_rate": 0.001, "loss": 1.147, "step": 13650 }, { "epoch": 0.43560062501993047, "grad_norm": 0.17243249714374542, "learning_rate": 0.001, "loss": 1.1351, "step": 13660 }, { "epoch": 0.4359195127395644, "grad_norm": 0.17836323380470276, "learning_rate": 0.001, "loss": 1.1531, "step": 13670 }, { "epoch": 0.4362384004591983, "grad_norm": 0.1793064922094345, "learning_rate": 0.001, "loss": 1.1409, "step": 13680 }, { "epoch": 0.4365572881788322, "grad_norm": 0.17111380398273468, "learning_rate": 0.001, "loss": 1.1363, "step": 13690 }, { "epoch": 0.43687617589846617, "grad_norm": 0.18050673604011536, "learning_rate": 0.001, "loss": 1.1549, "step": 13700 }, { "epoch": 0.43719506361810007, "grad_norm": 0.17576226592063904, "learning_rate": 0.001, "loss": 1.1389, "step": 13710 }, { "epoch": 0.43751395133773396, "grad_norm": 0.16893474757671356, "learning_rate": 0.001, "loss": 1.1222, "step": 13720 }, { "epoch": 0.4378328390573679, "grad_norm": 0.17337141931056976, "learning_rate": 0.001, "loss": 1.1638, "step": 13730 }, { "epoch": 0.4381517267770018, "grad_norm": 0.17256872355937958, "learning_rate": 0.001, "loss": 1.1492, "step": 13740 }, { "epoch": 0.4384706144966357, "grad_norm": 0.17770156264305115, "learning_rate": 0.001, "loss": 1.1438, "step": 13750 }, { "epoch": 0.43878950221626967, "grad_norm": 0.17507579922676086, "learning_rate": 0.001, "loss": 1.1391, "step": 13760 }, { "epoch": 0.43910838993590356, "grad_norm": 0.18140773475170135, "learning_rate": 0.001, "loss": 1.1493, "step": 13770 }, { "epoch": 0.43942727765553746, "grad_norm": 0.18639299273490906, "learning_rate": 0.001, "loss": 1.1545, "step": 13780 }, { "epoch": 0.4397461653751714, "grad_norm": 0.176975280046463, "learning_rate": 0.001, "loss": 1.1519, "step": 13790 }, { "epoch": 0.4400650530948053, "grad_norm": 0.17814040184020996, "learning_rate": 0.001, "loss": 1.1446, "step": 13800 }, { "epoch": 0.4403839408144392, "grad_norm": 0.1774367243051529, "learning_rate": 0.001, "loss": 1.1355, "step": 13810 }, { "epoch": 0.44070282853407317, "grad_norm": 0.18180528283119202, "learning_rate": 0.001, "loss": 1.1372, "step": 13820 }, { "epoch": 0.44102171625370706, "grad_norm": 0.1772371381521225, "learning_rate": 0.001, "loss": 1.1409, "step": 13830 }, { "epoch": 0.44134060397334096, "grad_norm": 0.18037815392017365, "learning_rate": 0.001, "loss": 1.1297, "step": 13840 }, { "epoch": 0.4416594916929749, "grad_norm": 0.17408838868141174, "learning_rate": 0.001, "loss": 1.1401, "step": 13850 }, { "epoch": 0.4419783794126088, "grad_norm": 0.1728648841381073, "learning_rate": 0.001, "loss": 1.1397, "step": 13860 }, { "epoch": 0.4422972671322427, "grad_norm": 0.17422670125961304, "learning_rate": 0.001, "loss": 1.1305, "step": 13870 }, { "epoch": 0.44261615485187666, "grad_norm": 0.17485372722148895, "learning_rate": 0.001, "loss": 1.1217, "step": 13880 }, { "epoch": 0.44293504257151056, "grad_norm": 0.17043618857860565, "learning_rate": 0.001, "loss": 1.1286, "step": 13890 }, { "epoch": 0.4432539302911445, "grad_norm": 0.17486435174942017, "learning_rate": 0.001, "loss": 1.1489, "step": 13900 }, { "epoch": 0.4435728180107784, "grad_norm": 0.1703348606824875, "learning_rate": 0.001, "loss": 1.1316, "step": 13910 }, { "epoch": 0.4438917057304123, "grad_norm": 0.17141640186309814, "learning_rate": 0.001, "loss": 1.1498, "step": 13920 }, { "epoch": 0.44421059345004626, "grad_norm": 0.17244957387447357, "learning_rate": 0.001, "loss": 1.1553, "step": 13930 }, { "epoch": 0.44452948116968016, "grad_norm": 0.17857715487480164, "learning_rate": 0.001, "loss": 1.1266, "step": 13940 }, { "epoch": 0.44484836888931406, "grad_norm": 0.17840982973575592, "learning_rate": 0.001, "loss": 1.1417, "step": 13950 }, { "epoch": 0.445167256608948, "grad_norm": 0.17459362745285034, "learning_rate": 0.001, "loss": 1.1621, "step": 13960 }, { "epoch": 0.4454861443285819, "grad_norm": 0.17860184609889984, "learning_rate": 0.001, "loss": 1.1337, "step": 13970 }, { "epoch": 0.4458050320482158, "grad_norm": 0.1751028597354889, "learning_rate": 0.001, "loss": 1.143, "step": 13980 }, { "epoch": 0.44612391976784976, "grad_norm": 0.1695946753025055, "learning_rate": 0.001, "loss": 1.1387, "step": 13990 }, { "epoch": 0.44644280748748366, "grad_norm": 0.18408192694187164, "learning_rate": 0.001, "loss": 1.1318, "step": 14000 }, { "epoch": 0.44676169520711756, "grad_norm": 0.17113225162029266, "learning_rate": 0.001, "loss": 1.1492, "step": 14010 }, { "epoch": 0.4470805829267515, "grad_norm": 0.17978601157665253, "learning_rate": 0.001, "loss": 1.1497, "step": 14020 }, { "epoch": 0.4473994706463854, "grad_norm": 0.18019011616706848, "learning_rate": 0.001, "loss": 1.147, "step": 14030 }, { "epoch": 0.4477183583660193, "grad_norm": 0.17102354764938354, "learning_rate": 0.001, "loss": 1.1339, "step": 14040 }, { "epoch": 0.44803724608565326, "grad_norm": 0.17504438757896423, "learning_rate": 0.001, "loss": 1.1222, "step": 14050 }, { "epoch": 0.44835613380528716, "grad_norm": 0.18110573291778564, "learning_rate": 0.001, "loss": 1.1286, "step": 14060 }, { "epoch": 0.44867502152492106, "grad_norm": 0.17824122309684753, "learning_rate": 0.001, "loss": 1.1247, "step": 14070 }, { "epoch": 0.448993909244555, "grad_norm": 0.17802254855632782, "learning_rate": 0.001, "loss": 1.1375, "step": 14080 }, { "epoch": 0.4493127969641889, "grad_norm": 0.1781211942434311, "learning_rate": 0.001, "loss": 1.1347, "step": 14090 }, { "epoch": 0.4496316846838228, "grad_norm": 0.17185848951339722, "learning_rate": 0.001, "loss": 1.1378, "step": 14100 }, { "epoch": 0.44995057240345676, "grad_norm": 0.1789436936378479, "learning_rate": 0.001, "loss": 1.1426, "step": 14110 }, { "epoch": 0.45026946012309066, "grad_norm": 0.17003895342350006, "learning_rate": 0.001, "loss": 1.1282, "step": 14120 }, { "epoch": 0.45058834784272456, "grad_norm": 0.17219972610473633, "learning_rate": 0.001, "loss": 1.1324, "step": 14130 }, { "epoch": 0.4509072355623585, "grad_norm": 0.1720394790172577, "learning_rate": 0.001, "loss": 1.1298, "step": 14140 }, { "epoch": 0.4512261232819924, "grad_norm": 0.17621329426765442, "learning_rate": 0.001, "loss": 1.1602, "step": 14150 }, { "epoch": 0.4515450110016263, "grad_norm": 0.17632067203521729, "learning_rate": 0.001, "loss": 1.1334, "step": 14160 }, { "epoch": 0.45186389872126026, "grad_norm": 0.16688014566898346, "learning_rate": 0.001, "loss": 1.1238, "step": 14170 }, { "epoch": 0.45218278644089416, "grad_norm": 0.17131859064102173, "learning_rate": 0.001, "loss": 1.1355, "step": 14180 }, { "epoch": 0.45250167416052806, "grad_norm": 0.16830040514469147, "learning_rate": 0.001, "loss": 1.1219, "step": 14190 }, { "epoch": 0.452820561880162, "grad_norm": 0.17178581655025482, "learning_rate": 0.001, "loss": 1.1489, "step": 14200 }, { "epoch": 0.4531394495997959, "grad_norm": 0.17017489671707153, "learning_rate": 0.001, "loss": 1.1487, "step": 14210 }, { "epoch": 0.4534583373194298, "grad_norm": 0.17112135887145996, "learning_rate": 0.001, "loss": 1.1372, "step": 14220 }, { "epoch": 0.45377722503906376, "grad_norm": 0.1778554469347, "learning_rate": 0.001, "loss": 1.1248, "step": 14230 }, { "epoch": 0.45409611275869766, "grad_norm": 0.17707771062850952, "learning_rate": 0.001, "loss": 1.1243, "step": 14240 }, { "epoch": 0.45441500047833155, "grad_norm": 0.17464545369148254, "learning_rate": 0.001, "loss": 1.1392, "step": 14250 }, { "epoch": 0.4547338881979655, "grad_norm": 0.17195813357830048, "learning_rate": 0.001, "loss": 1.1511, "step": 14260 }, { "epoch": 0.4550527759175994, "grad_norm": 0.1783856600522995, "learning_rate": 0.001, "loss": 1.1345, "step": 14270 }, { "epoch": 0.4553716636372333, "grad_norm": 0.1746705174446106, "learning_rate": 0.001, "loss": 1.1424, "step": 14280 }, { "epoch": 0.45569055135686726, "grad_norm": 0.18295544385910034, "learning_rate": 0.001, "loss": 1.1479, "step": 14290 }, { "epoch": 0.45600943907650116, "grad_norm": 0.16954278945922852, "learning_rate": 0.001, "loss": 1.1473, "step": 14300 }, { "epoch": 0.45632832679613505, "grad_norm": 0.17534016072750092, "learning_rate": 0.001, "loss": 1.148, "step": 14310 }, { "epoch": 0.456647214515769, "grad_norm": 0.17889931797981262, "learning_rate": 0.001, "loss": 1.1374, "step": 14320 }, { "epoch": 0.4569661022354029, "grad_norm": 0.17737379670143127, "learning_rate": 0.001, "loss": 1.1311, "step": 14330 }, { "epoch": 0.45728498995503686, "grad_norm": 0.17604157328605652, "learning_rate": 0.001, "loss": 1.1262, "step": 14340 }, { "epoch": 0.45760387767467076, "grad_norm": 0.17472808063030243, "learning_rate": 0.001, "loss": 1.1366, "step": 14350 }, { "epoch": 0.45792276539430465, "grad_norm": 0.1687580943107605, "learning_rate": 0.001, "loss": 1.1317, "step": 14360 }, { "epoch": 0.4582416531139386, "grad_norm": 0.1738176941871643, "learning_rate": 0.001, "loss": 1.1597, "step": 14370 }, { "epoch": 0.4585605408335725, "grad_norm": 0.17493782937526703, "learning_rate": 0.001, "loss": 1.1475, "step": 14380 }, { "epoch": 0.4588794285532064, "grad_norm": 0.17473384737968445, "learning_rate": 0.001, "loss": 1.1285, "step": 14390 }, { "epoch": 0.45919831627284036, "grad_norm": 0.17252366244792938, "learning_rate": 0.001, "loss": 1.1527, "step": 14400 }, { "epoch": 0.45951720399247425, "grad_norm": 0.17336592078208923, "learning_rate": 0.001, "loss": 1.1074, "step": 14410 }, { "epoch": 0.45983609171210815, "grad_norm": 0.1781364381313324, "learning_rate": 0.001, "loss": 1.1315, "step": 14420 }, { "epoch": 0.4601549794317421, "grad_norm": 0.1762470006942749, "learning_rate": 0.001, "loss": 1.1317, "step": 14430 }, { "epoch": 0.460473867151376, "grad_norm": 0.17392881214618683, "learning_rate": 0.001, "loss": 1.1483, "step": 14440 }, { "epoch": 0.4607927548710099, "grad_norm": 0.17682306468486786, "learning_rate": 0.001, "loss": 1.1258, "step": 14450 }, { "epoch": 0.46111164259064386, "grad_norm": 0.1736285388469696, "learning_rate": 0.001, "loss": 1.1405, "step": 14460 }, { "epoch": 0.46143053031027775, "grad_norm": 0.1780506670475006, "learning_rate": 0.001, "loss": 1.1301, "step": 14470 }, { "epoch": 0.46174941802991165, "grad_norm": 0.17654019594192505, "learning_rate": 0.001, "loss": 1.119, "step": 14480 }, { "epoch": 0.4620683057495456, "grad_norm": 0.17297044396400452, "learning_rate": 0.001, "loss": 1.131, "step": 14490 }, { "epoch": 0.4623871934691795, "grad_norm": 0.17622877657413483, "learning_rate": 0.001, "loss": 1.1345, "step": 14500 }, { "epoch": 0.4627060811888134, "grad_norm": 0.18307894468307495, "learning_rate": 0.001, "loss": 1.1295, "step": 14510 }, { "epoch": 0.46302496890844735, "grad_norm": 0.17374959588050842, "learning_rate": 0.001, "loss": 1.1271, "step": 14520 }, { "epoch": 0.46334385662808125, "grad_norm": 0.18093709647655487, "learning_rate": 0.001, "loss": 1.156, "step": 14530 }, { "epoch": 0.46366274434771515, "grad_norm": 0.16554683446884155, "learning_rate": 0.001, "loss": 1.1208, "step": 14540 }, { "epoch": 0.4639816320673491, "grad_norm": 0.1772506684064865, "learning_rate": 0.001, "loss": 1.1486, "step": 14550 }, { "epoch": 0.464300519786983, "grad_norm": 0.1779564768075943, "learning_rate": 0.001, "loss": 1.135, "step": 14560 }, { "epoch": 0.4646194075066169, "grad_norm": 0.17387722432613373, "learning_rate": 0.001, "loss": 1.1349, "step": 14570 }, { "epoch": 0.46493829522625085, "grad_norm": 0.1813112050294876, "learning_rate": 0.001, "loss": 1.1294, "step": 14580 }, { "epoch": 0.46525718294588475, "grad_norm": 0.17628876864910126, "learning_rate": 0.001, "loss": 1.1273, "step": 14590 }, { "epoch": 0.46557607066551865, "grad_norm": 0.17620135843753815, "learning_rate": 0.001, "loss": 1.1346, "step": 14600 }, { "epoch": 0.4658949583851526, "grad_norm": 0.17300722002983093, "learning_rate": 0.001, "loss": 1.1397, "step": 14610 }, { "epoch": 0.4662138461047865, "grad_norm": 0.1759226769208908, "learning_rate": 0.001, "loss": 1.1327, "step": 14620 }, { "epoch": 0.4665327338244204, "grad_norm": 0.17027615010738373, "learning_rate": 0.001, "loss": 1.1353, "step": 14630 }, { "epoch": 0.46685162154405435, "grad_norm": 0.17422300577163696, "learning_rate": 0.001, "loss": 1.1139, "step": 14640 }, { "epoch": 0.46717050926368825, "grad_norm": 0.1712965965270996, "learning_rate": 0.001, "loss": 1.1373, "step": 14650 }, { "epoch": 0.46748939698332215, "grad_norm": 0.17639024555683136, "learning_rate": 0.001, "loss": 1.1278, "step": 14660 }, { "epoch": 0.4678082847029561, "grad_norm": 0.17248843610286713, "learning_rate": 0.001, "loss": 1.1298, "step": 14670 }, { "epoch": 0.46812717242259, "grad_norm": 0.1724853813648224, "learning_rate": 0.001, "loss": 1.1185, "step": 14680 }, { "epoch": 0.4684460601422239, "grad_norm": 0.1779608428478241, "learning_rate": 0.001, "loss": 1.1258, "step": 14690 }, { "epoch": 0.46876494786185785, "grad_norm": 0.17226381599903107, "learning_rate": 0.001, "loss": 1.1276, "step": 14700 }, { "epoch": 0.46908383558149175, "grad_norm": 0.1835961490869522, "learning_rate": 0.001, "loss": 1.1435, "step": 14710 }, { "epoch": 0.46940272330112565, "grad_norm": 0.17311832308769226, "learning_rate": 0.001, "loss": 1.1353, "step": 14720 }, { "epoch": 0.4697216110207596, "grad_norm": 0.1746002435684204, "learning_rate": 0.001, "loss": 1.1284, "step": 14730 }, { "epoch": 0.4700404987403935, "grad_norm": 0.1797454059123993, "learning_rate": 0.001, "loss": 1.1441, "step": 14740 }, { "epoch": 0.47035938646002745, "grad_norm": 0.1763915866613388, "learning_rate": 0.001, "loss": 1.1451, "step": 14750 }, { "epoch": 0.47067827417966135, "grad_norm": 0.16806401312351227, "learning_rate": 0.001, "loss": 1.1159, "step": 14760 }, { "epoch": 0.47099716189929525, "grad_norm": 0.17495940625667572, "learning_rate": 0.001, "loss": 1.1063, "step": 14770 }, { "epoch": 0.4713160496189292, "grad_norm": 0.16822218894958496, "learning_rate": 0.001, "loss": 1.1362, "step": 14780 }, { "epoch": 0.4716349373385631, "grad_norm": 0.18633721768856049, "learning_rate": 0.001, "loss": 1.1432, "step": 14790 }, { "epoch": 0.471953825058197, "grad_norm": 0.17266841232776642, "learning_rate": 0.001, "loss": 1.1402, "step": 14800 }, { "epoch": 0.47227271277783095, "grad_norm": 0.1750502586364746, "learning_rate": 0.001, "loss": 1.1287, "step": 14810 }, { "epoch": 0.47259160049746485, "grad_norm": 0.17796385288238525, "learning_rate": 0.001, "loss": 1.1362, "step": 14820 }, { "epoch": 0.47291048821709875, "grad_norm": 0.17094792425632477, "learning_rate": 0.001, "loss": 1.1424, "step": 14830 }, { "epoch": 0.4732293759367327, "grad_norm": 0.1744205206632614, "learning_rate": 0.001, "loss": 1.143, "step": 14840 }, { "epoch": 0.4735482636563666, "grad_norm": 0.16905353963375092, "learning_rate": 0.001, "loss": 1.1341, "step": 14850 }, { "epoch": 0.4738671513760005, "grad_norm": 0.17296987771987915, "learning_rate": 0.001, "loss": 1.121, "step": 14860 }, { "epoch": 0.47418603909563445, "grad_norm": 0.17449218034744263, "learning_rate": 0.001, "loss": 1.1205, "step": 14870 }, { "epoch": 0.47450492681526835, "grad_norm": 0.17594942450523376, "learning_rate": 0.001, "loss": 1.1289, "step": 14880 }, { "epoch": 0.47482381453490224, "grad_norm": 0.17545631527900696, "learning_rate": 0.001, "loss": 1.1348, "step": 14890 }, { "epoch": 0.4751427022545362, "grad_norm": 0.17231062054634094, "learning_rate": 0.001, "loss": 1.1314, "step": 14900 }, { "epoch": 0.4754615899741701, "grad_norm": 0.17671747505664825, "learning_rate": 0.001, "loss": 1.1464, "step": 14910 }, { "epoch": 0.475780477693804, "grad_norm": 0.17096923291683197, "learning_rate": 0.001, "loss": 1.1296, "step": 14920 }, { "epoch": 0.47609936541343795, "grad_norm": 0.1766541302204132, "learning_rate": 0.001, "loss": 1.138, "step": 14930 }, { "epoch": 0.47641825313307185, "grad_norm": 0.172738716006279, "learning_rate": 0.001, "loss": 1.1477, "step": 14940 }, { "epoch": 0.47673714085270574, "grad_norm": 0.17469772696495056, "learning_rate": 0.001, "loss": 1.1195, "step": 14950 }, { "epoch": 0.4770560285723397, "grad_norm": 0.1674947440624237, "learning_rate": 0.001, "loss": 1.1367, "step": 14960 }, { "epoch": 0.4773749162919736, "grad_norm": 0.1782265305519104, "learning_rate": 0.001, "loss": 1.1374, "step": 14970 }, { "epoch": 0.4776938040116075, "grad_norm": 0.17530570924282074, "learning_rate": 0.001, "loss": 1.122, "step": 14980 }, { "epoch": 0.47801269173124145, "grad_norm": 0.16821470856666565, "learning_rate": 0.001, "loss": 1.116, "step": 14990 }, { "epoch": 0.47833157945087534, "grad_norm": 0.17344245314598083, "learning_rate": 0.001, "loss": 1.1444, "step": 15000 }, { "epoch": 0.47865046717050924, "grad_norm": 0.17519459128379822, "learning_rate": 0.001, "loss": 1.1424, "step": 15010 }, { "epoch": 0.4789693548901432, "grad_norm": 0.17834389209747314, "learning_rate": 0.001, "loss": 1.1181, "step": 15020 }, { "epoch": 0.4792882426097771, "grad_norm": 0.1712467074394226, "learning_rate": 0.001, "loss": 1.1416, "step": 15030 }, { "epoch": 0.479607130329411, "grad_norm": 0.16900737583637238, "learning_rate": 0.001, "loss": 1.1191, "step": 15040 }, { "epoch": 0.47992601804904494, "grad_norm": 0.17559660971164703, "learning_rate": 0.001, "loss": 1.1333, "step": 15050 }, { "epoch": 0.48024490576867884, "grad_norm": 0.17230333387851715, "learning_rate": 0.001, "loss": 1.1261, "step": 15060 }, { "epoch": 0.48056379348831274, "grad_norm": 0.1761026233434677, "learning_rate": 0.001, "loss": 1.1384, "step": 15070 }, { "epoch": 0.4808826812079467, "grad_norm": 0.17489396035671234, "learning_rate": 0.001, "loss": 1.1341, "step": 15080 }, { "epoch": 0.4812015689275806, "grad_norm": 0.1864793449640274, "learning_rate": 0.001, "loss": 1.1252, "step": 15090 }, { "epoch": 0.4815204566472145, "grad_norm": 0.1766740083694458, "learning_rate": 0.001, "loss": 1.1406, "step": 15100 }, { "epoch": 0.48183934436684844, "grad_norm": 0.17469963431358337, "learning_rate": 0.001, "loss": 1.1129, "step": 15110 }, { "epoch": 0.48215823208648234, "grad_norm": 0.17255161702632904, "learning_rate": 0.001, "loss": 1.1078, "step": 15120 }, { "epoch": 0.48247711980611624, "grad_norm": 0.17505139112472534, "learning_rate": 0.001, "loss": 1.1173, "step": 15130 }, { "epoch": 0.4827960075257502, "grad_norm": 0.17324578762054443, "learning_rate": 0.001, "loss": 1.126, "step": 15140 }, { "epoch": 0.4831148952453841, "grad_norm": 0.1717669516801834, "learning_rate": 0.001, "loss": 1.1094, "step": 15150 }, { "epoch": 0.48343378296501804, "grad_norm": 0.1693733185529709, "learning_rate": 0.001, "loss": 1.1206, "step": 15160 }, { "epoch": 0.48375267068465194, "grad_norm": 0.1819078028202057, "learning_rate": 0.001, "loss": 1.1259, "step": 15170 }, { "epoch": 0.48407155840428584, "grad_norm": 0.18116608262062073, "learning_rate": 0.001, "loss": 1.1226, "step": 15180 }, { "epoch": 0.4843904461239198, "grad_norm": 0.1702233999967575, "learning_rate": 0.001, "loss": 1.1141, "step": 15190 }, { "epoch": 0.4847093338435537, "grad_norm": 0.16998600959777832, "learning_rate": 0.001, "loss": 1.1519, "step": 15200 }, { "epoch": 0.4850282215631876, "grad_norm": 0.17265725135803223, "learning_rate": 0.001, "loss": 1.1135, "step": 15210 }, { "epoch": 0.48534710928282154, "grad_norm": 0.1677086502313614, "learning_rate": 0.001, "loss": 1.1275, "step": 15220 }, { "epoch": 0.48566599700245544, "grad_norm": 0.1745789349079132, "learning_rate": 0.001, "loss": 1.1331, "step": 15230 }, { "epoch": 0.48598488472208934, "grad_norm": 0.1719202697277069, "learning_rate": 0.001, "loss": 1.1365, "step": 15240 }, { "epoch": 0.4863037724417233, "grad_norm": 0.17276684939861298, "learning_rate": 0.001, "loss": 1.1176, "step": 15250 }, { "epoch": 0.4866226601613572, "grad_norm": 0.17195747792720795, "learning_rate": 0.001, "loss": 1.1238, "step": 15260 }, { "epoch": 0.4869415478809911, "grad_norm": 0.16910310089588165, "learning_rate": 0.001, "loss": 1.1206, "step": 15270 }, { "epoch": 0.48726043560062504, "grad_norm": 0.1696513593196869, "learning_rate": 0.001, "loss": 1.1058, "step": 15280 }, { "epoch": 0.48757932332025894, "grad_norm": 0.170016348361969, "learning_rate": 0.001, "loss": 1.1272, "step": 15290 }, { "epoch": 0.48789821103989284, "grad_norm": 0.16925400495529175, "learning_rate": 0.001, "loss": 1.1151, "step": 15300 }, { "epoch": 0.4882170987595268, "grad_norm": 0.17601697146892548, "learning_rate": 0.001, "loss": 1.1072, "step": 15310 }, { "epoch": 0.4885359864791607, "grad_norm": 0.17617474496364594, "learning_rate": 0.001, "loss": 1.1257, "step": 15320 }, { "epoch": 0.4888548741987946, "grad_norm": 0.17635928094387054, "learning_rate": 0.001, "loss": 1.126, "step": 15330 }, { "epoch": 0.48917376191842854, "grad_norm": 0.17766179144382477, "learning_rate": 0.001, "loss": 1.1111, "step": 15340 }, { "epoch": 0.48949264963806244, "grad_norm": 0.16769689321517944, "learning_rate": 0.001, "loss": 1.1188, "step": 15350 }, { "epoch": 0.48981153735769634, "grad_norm": 0.17344802618026733, "learning_rate": 0.001, "loss": 1.1076, "step": 15360 }, { "epoch": 0.4901304250773303, "grad_norm": 0.17091096937656403, "learning_rate": 0.001, "loss": 1.1342, "step": 15370 }, { "epoch": 0.4904493127969642, "grad_norm": 0.1739739030599594, "learning_rate": 0.001, "loss": 1.1275, "step": 15380 }, { "epoch": 0.4907682005165981, "grad_norm": 0.17534139752388, "learning_rate": 0.001, "loss": 1.1352, "step": 15390 }, { "epoch": 0.49108708823623204, "grad_norm": 0.17085367441177368, "learning_rate": 0.001, "loss": 1.1117, "step": 15400 }, { "epoch": 0.49140597595586594, "grad_norm": 0.17168210446834564, "learning_rate": 0.001, "loss": 1.1283, "step": 15410 }, { "epoch": 0.49172486367549983, "grad_norm": 0.1765969693660736, "learning_rate": 0.001, "loss": 1.1319, "step": 15420 }, { "epoch": 0.4920437513951338, "grad_norm": 0.1726970672607422, "learning_rate": 0.001, "loss": 1.1264, "step": 15430 }, { "epoch": 0.4923626391147677, "grad_norm": 0.16879424452781677, "learning_rate": 0.001, "loss": 1.1417, "step": 15440 }, { "epoch": 0.4926815268344016, "grad_norm": 0.17404453456401825, "learning_rate": 0.001, "loss": 1.1529, "step": 15450 }, { "epoch": 0.49300041455403554, "grad_norm": 0.16755877435207367, "learning_rate": 0.001, "loss": 1.14, "step": 15460 }, { "epoch": 0.49331930227366944, "grad_norm": 0.17902107536792755, "learning_rate": 0.001, "loss": 1.1458, "step": 15470 }, { "epoch": 0.49363818999330333, "grad_norm": 0.16922469437122345, "learning_rate": 0.001, "loss": 1.114, "step": 15480 }, { "epoch": 0.4939570777129373, "grad_norm": 0.1739080250263214, "learning_rate": 0.001, "loss": 1.1414, "step": 15490 }, { "epoch": 0.4942759654325712, "grad_norm": 0.17348088324069977, "learning_rate": 0.001, "loss": 1.1178, "step": 15500 }, { "epoch": 0.4945948531522051, "grad_norm": 0.1783573478460312, "learning_rate": 0.001, "loss": 1.1251, "step": 15510 }, { "epoch": 0.49491374087183904, "grad_norm": 0.17203998565673828, "learning_rate": 0.001, "loss": 1.1375, "step": 15520 }, { "epoch": 0.49523262859147293, "grad_norm": 0.17022830247879028, "learning_rate": 0.001, "loss": 1.1246, "step": 15530 }, { "epoch": 0.49555151631110683, "grad_norm": 0.16698627173900604, "learning_rate": 0.001, "loss": 1.1226, "step": 15540 }, { "epoch": 0.4958704040307408, "grad_norm": 0.17425251007080078, "learning_rate": 0.001, "loss": 1.1304, "step": 15550 }, { "epoch": 0.4961892917503747, "grad_norm": 0.17078517377376556, "learning_rate": 0.001, "loss": 1.1178, "step": 15560 }, { "epoch": 0.49650817947000864, "grad_norm": 0.16847193241119385, "learning_rate": 0.001, "loss": 1.1301, "step": 15570 }, { "epoch": 0.49682706718964254, "grad_norm": 0.17440183460712433, "learning_rate": 0.001, "loss": 1.133, "step": 15580 }, { "epoch": 0.49714595490927643, "grad_norm": 0.17285966873168945, "learning_rate": 0.001, "loss": 1.1362, "step": 15590 }, { "epoch": 0.4974648426289104, "grad_norm": 0.1749267727136612, "learning_rate": 0.001, "loss": 1.1195, "step": 15600 }, { "epoch": 0.4977837303485443, "grad_norm": 0.17195169627666473, "learning_rate": 0.001, "loss": 1.1165, "step": 15610 }, { "epoch": 0.4981026180681782, "grad_norm": 0.1727495789527893, "learning_rate": 0.001, "loss": 1.1264, "step": 15620 }, { "epoch": 0.49842150578781214, "grad_norm": 0.17486001551151276, "learning_rate": 0.001, "loss": 1.1146, "step": 15630 }, { "epoch": 0.49874039350744603, "grad_norm": 0.17329901456832886, "learning_rate": 0.001, "loss": 1.1258, "step": 15640 }, { "epoch": 0.49905928122707993, "grad_norm": 0.17345182597637177, "learning_rate": 0.001, "loss": 1.1229, "step": 15650 }, { "epoch": 0.4993781689467139, "grad_norm": 0.17703242599964142, "learning_rate": 0.001, "loss": 1.1452, "step": 15660 }, { "epoch": 0.4996970566663478, "grad_norm": 0.17290346324443817, "learning_rate": 0.001, "loss": 1.1236, "step": 15670 }, { "epoch": 0.5000159443859817, "grad_norm": 0.17458419501781464, "learning_rate": 0.001, "loss": 1.1279, "step": 15680 }, { "epoch": 0.5003348321056156, "grad_norm": 0.17450138926506042, "learning_rate": 0.001, "loss": 1.1207, "step": 15690 }, { "epoch": 0.5006537198252495, "grad_norm": 0.17006513476371765, "learning_rate": 0.001, "loss": 1.1216, "step": 15700 }, { "epoch": 0.5009726075448835, "grad_norm": 0.1693337857723236, "learning_rate": 0.001, "loss": 1.1245, "step": 15710 }, { "epoch": 0.5012914952645173, "grad_norm": 0.16920679807662964, "learning_rate": 0.001, "loss": 1.1386, "step": 15720 }, { "epoch": 0.5016103829841513, "grad_norm": 0.17201140522956848, "learning_rate": 0.001, "loss": 1.1135, "step": 15730 }, { "epoch": 0.5019292707037852, "grad_norm": 0.1762687861919403, "learning_rate": 0.001, "loss": 1.1462, "step": 15740 }, { "epoch": 0.5022481584234191, "grad_norm": 0.17598646879196167, "learning_rate": 0.001, "loss": 1.1259, "step": 15750 }, { "epoch": 0.502567046143053, "grad_norm": 0.17038299143314362, "learning_rate": 0.001, "loss": 1.109, "step": 15760 }, { "epoch": 0.502885933862687, "grad_norm": 0.18112069368362427, "learning_rate": 0.001, "loss": 1.1322, "step": 15770 }, { "epoch": 0.5032048215823208, "grad_norm": 0.1772948056459427, "learning_rate": 0.001, "loss": 1.1253, "step": 15780 }, { "epoch": 0.5035237093019548, "grad_norm": 0.16839788854122162, "learning_rate": 0.001, "loss": 1.1199, "step": 15790 }, { "epoch": 0.5038425970215887, "grad_norm": 0.17266593873500824, "learning_rate": 0.001, "loss": 1.1166, "step": 15800 }, { "epoch": 0.5041614847412226, "grad_norm": 0.1770016849040985, "learning_rate": 0.001, "loss": 1.1319, "step": 15810 }, { "epoch": 0.5044803724608565, "grad_norm": 0.16987302899360657, "learning_rate": 0.001, "loss": 1.1176, "step": 15820 }, { "epoch": 0.5047992601804905, "grad_norm": 0.16873012483119965, "learning_rate": 0.001, "loss": 1.1114, "step": 15830 }, { "epoch": 0.5051181479001243, "grad_norm": 0.17353899776935577, "learning_rate": 0.001, "loss": 1.1298, "step": 15840 }, { "epoch": 0.5054370356197583, "grad_norm": 0.17694109678268433, "learning_rate": 0.001, "loss": 1.1314, "step": 15850 }, { "epoch": 0.5057559233393922, "grad_norm": 0.17422708868980408, "learning_rate": 0.001, "loss": 1.1295, "step": 15860 }, { "epoch": 0.5060748110590261, "grad_norm": 0.17685523629188538, "learning_rate": 0.001, "loss": 1.1136, "step": 15870 }, { "epoch": 0.50639369877866, "grad_norm": 0.16311325132846832, "learning_rate": 0.001, "loss": 1.1278, "step": 15880 }, { "epoch": 0.506712586498294, "grad_norm": 0.1693374216556549, "learning_rate": 0.001, "loss": 1.1117, "step": 15890 }, { "epoch": 0.5070314742179278, "grad_norm": 0.1808386892080307, "learning_rate": 0.001, "loss": 1.1157, "step": 15900 }, { "epoch": 0.5073503619375618, "grad_norm": 0.17110781371593475, "learning_rate": 0.001, "loss": 1.1191, "step": 15910 }, { "epoch": 0.5076692496571957, "grad_norm": 0.1752341389656067, "learning_rate": 0.001, "loss": 1.1173, "step": 15920 }, { "epoch": 0.5079881373768296, "grad_norm": 0.17590519785881042, "learning_rate": 0.001, "loss": 1.123, "step": 15930 }, { "epoch": 0.5083070250964635, "grad_norm": 0.16483011841773987, "learning_rate": 0.001, "loss": 1.114, "step": 15940 }, { "epoch": 0.5086259128160975, "grad_norm": 0.1718084067106247, "learning_rate": 0.001, "loss": 1.1205, "step": 15950 }, { "epoch": 0.5089448005357313, "grad_norm": 0.17069004476070404, "learning_rate": 0.001, "loss": 1.1278, "step": 15960 }, { "epoch": 0.5092636882553653, "grad_norm": 0.1754763126373291, "learning_rate": 0.001, "loss": 1.1312, "step": 15970 }, { "epoch": 0.5095825759749992, "grad_norm": 0.1660955846309662, "learning_rate": 0.001, "loss": 1.1153, "step": 15980 }, { "epoch": 0.5099014636946331, "grad_norm": 0.16823166608810425, "learning_rate": 0.001, "loss": 1.1049, "step": 15990 }, { "epoch": 0.510220351414267, "grad_norm": 0.17043384909629822, "learning_rate": 0.001, "loss": 1.1104, "step": 16000 }, { "epoch": 0.510539239133901, "grad_norm": 0.17292456328868866, "learning_rate": 0.001, "loss": 1.119, "step": 16010 }, { "epoch": 0.5108581268535348, "grad_norm": 0.17111191153526306, "learning_rate": 0.001, "loss": 1.1254, "step": 16020 }, { "epoch": 0.5111770145731688, "grad_norm": 0.16945838928222656, "learning_rate": 0.001, "loss": 1.099, "step": 16030 }, { "epoch": 0.5114959022928027, "grad_norm": 0.17108163237571716, "learning_rate": 0.001, "loss": 1.1369, "step": 16040 }, { "epoch": 0.5118147900124366, "grad_norm": 0.17080144584178925, "learning_rate": 0.001, "loss": 1.1138, "step": 16050 }, { "epoch": 0.5121336777320705, "grad_norm": 0.17457976937294006, "learning_rate": 0.001, "loss": 1.1342, "step": 16060 }, { "epoch": 0.5124525654517045, "grad_norm": 0.17557039856910706, "learning_rate": 0.001, "loss": 1.0928, "step": 16070 }, { "epoch": 0.5127714531713383, "grad_norm": 0.17394962906837463, "learning_rate": 0.001, "loss": 1.113, "step": 16080 }, { "epoch": 0.5130903408909723, "grad_norm": 0.17363347113132477, "learning_rate": 0.001, "loss": 1.1287, "step": 16090 }, { "epoch": 0.5134092286106062, "grad_norm": 0.17071598768234253, "learning_rate": 0.001, "loss": 1.1229, "step": 16100 }, { "epoch": 0.5137281163302401, "grad_norm": 0.17206592857837677, "learning_rate": 0.001, "loss": 1.1182, "step": 16110 }, { "epoch": 0.514047004049874, "grad_norm": 0.16682256758213043, "learning_rate": 0.001, "loss": 1.1171, "step": 16120 }, { "epoch": 0.514365891769508, "grad_norm": 0.16921396553516388, "learning_rate": 0.001, "loss": 1.1136, "step": 16130 }, { "epoch": 0.5146847794891418, "grad_norm": 0.17058788239955902, "learning_rate": 0.001, "loss": 1.1172, "step": 16140 }, { "epoch": 0.5150036672087758, "grad_norm": 0.17264387011528015, "learning_rate": 0.001, "loss": 1.1265, "step": 16150 }, { "epoch": 0.5153225549284097, "grad_norm": 0.1710854172706604, "learning_rate": 0.001, "loss": 1.1195, "step": 16160 }, { "epoch": 0.5156414426480436, "grad_norm": 0.18194888532161713, "learning_rate": 0.001, "loss": 1.1026, "step": 16170 }, { "epoch": 0.5159603303676775, "grad_norm": 0.17689311504364014, "learning_rate": 0.001, "loss": 1.106, "step": 16180 }, { "epoch": 0.5162792180873115, "grad_norm": 0.16910506784915924, "learning_rate": 0.001, "loss": 1.1127, "step": 16190 }, { "epoch": 0.5165981058069454, "grad_norm": 0.16860614717006683, "learning_rate": 0.001, "loss": 1.1228, "step": 16200 }, { "epoch": 0.5169169935265793, "grad_norm": 0.17780789732933044, "learning_rate": 0.001, "loss": 1.1163, "step": 16210 }, { "epoch": 0.5172358812462132, "grad_norm": 0.1708582043647766, "learning_rate": 0.001, "loss": 1.1167, "step": 16220 }, { "epoch": 0.5175547689658472, "grad_norm": 0.16993635892868042, "learning_rate": 0.001, "loss": 1.1147, "step": 16230 }, { "epoch": 0.517873656685481, "grad_norm": 0.17231173813343048, "learning_rate": 0.001, "loss": 1.1149, "step": 16240 }, { "epoch": 0.518192544405115, "grad_norm": 0.17344434559345245, "learning_rate": 0.001, "loss": 1.1283, "step": 16250 }, { "epoch": 0.5185114321247489, "grad_norm": 0.17418132722377777, "learning_rate": 0.001, "loss": 1.1177, "step": 16260 }, { "epoch": 0.5188303198443828, "grad_norm": 0.1698276549577713, "learning_rate": 0.001, "loss": 1.1043, "step": 16270 }, { "epoch": 0.5191492075640167, "grad_norm": 0.16663305461406708, "learning_rate": 0.001, "loss": 1.1038, "step": 16280 }, { "epoch": 0.5194680952836507, "grad_norm": 0.1778172254562378, "learning_rate": 0.001, "loss": 1.1117, "step": 16290 }, { "epoch": 0.5197869830032845, "grad_norm": 0.1671564280986786, "learning_rate": 0.001, "loss": 1.1325, "step": 16300 }, { "epoch": 0.5201058707229185, "grad_norm": 0.17214436829090118, "learning_rate": 0.001, "loss": 1.1304, "step": 16310 }, { "epoch": 0.5204247584425524, "grad_norm": 0.1681654304265976, "learning_rate": 0.001, "loss": 1.113, "step": 16320 }, { "epoch": 0.5207436461621863, "grad_norm": 0.1728064864873886, "learning_rate": 0.001, "loss": 1.1161, "step": 16330 }, { "epoch": 0.5210625338818202, "grad_norm": 0.17555077373981476, "learning_rate": 0.001, "loss": 1.1304, "step": 16340 }, { "epoch": 0.5213814216014542, "grad_norm": 0.17947399616241455, "learning_rate": 0.001, "loss": 1.1088, "step": 16350 }, { "epoch": 0.521700309321088, "grad_norm": 0.16473524272441864, "learning_rate": 0.001, "loss": 1.0931, "step": 16360 }, { "epoch": 0.522019197040722, "grad_norm": 0.17586977779865265, "learning_rate": 0.001, "loss": 1.1067, "step": 16370 }, { "epoch": 0.5223380847603559, "grad_norm": 0.16663765907287598, "learning_rate": 0.001, "loss": 1.1028, "step": 16380 }, { "epoch": 0.5226569724799898, "grad_norm": 0.17395268380641937, "learning_rate": 0.001, "loss": 1.1454, "step": 16390 }, { "epoch": 0.5229758601996237, "grad_norm": 0.1764364093542099, "learning_rate": 0.001, "loss": 1.1085, "step": 16400 }, { "epoch": 0.5232947479192577, "grad_norm": 0.17778083682060242, "learning_rate": 0.001, "loss": 1.1096, "step": 16410 }, { "epoch": 0.5236136356388915, "grad_norm": 0.1722065955400467, "learning_rate": 0.001, "loss": 1.1066, "step": 16420 }, { "epoch": 0.5239325233585255, "grad_norm": 0.17221935093402863, "learning_rate": 0.001, "loss": 1.1341, "step": 16430 }, { "epoch": 0.5242514110781594, "grad_norm": 0.1714521199464798, "learning_rate": 0.001, "loss": 1.1257, "step": 16440 }, { "epoch": 0.5245702987977933, "grad_norm": 0.17682762444019318, "learning_rate": 0.001, "loss": 1.1281, "step": 16450 }, { "epoch": 0.5248891865174272, "grad_norm": 0.17583733797073364, "learning_rate": 0.001, "loss": 1.118, "step": 16460 }, { "epoch": 0.5252080742370612, "grad_norm": 0.172967791557312, "learning_rate": 0.001, "loss": 1.1237, "step": 16470 }, { "epoch": 0.525526961956695, "grad_norm": 0.16899310052394867, "learning_rate": 0.001, "loss": 1.1354, "step": 16480 }, { "epoch": 0.525845849676329, "grad_norm": 0.1808301955461502, "learning_rate": 0.001, "loss": 1.1272, "step": 16490 }, { "epoch": 0.5261647373959629, "grad_norm": 0.17048123478889465, "learning_rate": 0.001, "loss": 1.0992, "step": 16500 }, { "epoch": 0.5264836251155968, "grad_norm": 0.16553248465061188, "learning_rate": 0.001, "loss": 1.104, "step": 16510 }, { "epoch": 0.5268025128352307, "grad_norm": 0.1753700077533722, "learning_rate": 0.001, "loss": 1.119, "step": 16520 }, { "epoch": 0.5271214005548647, "grad_norm": 0.16744454205036163, "learning_rate": 0.001, "loss": 1.1107, "step": 16530 }, { "epoch": 0.5274402882744985, "grad_norm": 0.17314577102661133, "learning_rate": 0.001, "loss": 1.1442, "step": 16540 }, { "epoch": 0.5277591759941325, "grad_norm": 0.16656264662742615, "learning_rate": 0.001, "loss": 1.0986, "step": 16550 }, { "epoch": 0.5280780637137664, "grad_norm": 0.1720547080039978, "learning_rate": 0.001, "loss": 1.1202, "step": 16560 }, { "epoch": 0.5283969514334003, "grad_norm": 0.17319488525390625, "learning_rate": 0.001, "loss": 1.1054, "step": 16570 }, { "epoch": 0.5287158391530342, "grad_norm": 0.1704757660627365, "learning_rate": 0.001, "loss": 1.0962, "step": 16580 }, { "epoch": 0.5290347268726682, "grad_norm": 0.17540287971496582, "learning_rate": 0.001, "loss": 1.1066, "step": 16590 }, { "epoch": 0.529353614592302, "grad_norm": 0.16792285442352295, "learning_rate": 0.001, "loss": 1.1111, "step": 16600 }, { "epoch": 0.529672502311936, "grad_norm": 0.1729079931974411, "learning_rate": 0.001, "loss": 1.1015, "step": 16610 }, { "epoch": 0.5299913900315699, "grad_norm": 0.16947157680988312, "learning_rate": 0.001, "loss": 1.1383, "step": 16620 }, { "epoch": 0.5303102777512038, "grad_norm": 0.17262917757034302, "learning_rate": 0.001, "loss": 1.1178, "step": 16630 }, { "epoch": 0.5306291654708377, "grad_norm": 0.1714227795600891, "learning_rate": 0.001, "loss": 1.1154, "step": 16640 }, { "epoch": 0.5309480531904717, "grad_norm": 0.16897515952587128, "learning_rate": 0.001, "loss": 1.1197, "step": 16650 }, { "epoch": 0.5312669409101055, "grad_norm": 0.167450949549675, "learning_rate": 0.001, "loss": 1.118, "step": 16660 }, { "epoch": 0.5315858286297395, "grad_norm": 0.16560305655002594, "learning_rate": 0.001, "loss": 1.0995, "step": 16670 }, { "epoch": 0.5319047163493734, "grad_norm": 0.16930148005485535, "learning_rate": 0.001, "loss": 1.1458, "step": 16680 }, { "epoch": 0.5322236040690073, "grad_norm": 0.16743934154510498, "learning_rate": 0.001, "loss": 1.1063, "step": 16690 }, { "epoch": 0.5325424917886412, "grad_norm": 0.17878761887550354, "learning_rate": 0.001, "loss": 1.1078, "step": 16700 }, { "epoch": 0.5328613795082752, "grad_norm": 0.166554257273674, "learning_rate": 0.001, "loss": 1.1031, "step": 16710 }, { "epoch": 0.533180267227909, "grad_norm": 0.17427927255630493, "learning_rate": 0.001, "loss": 1.1299, "step": 16720 }, { "epoch": 0.533499154947543, "grad_norm": 0.16897770762443542, "learning_rate": 0.001, "loss": 1.1172, "step": 16730 }, { "epoch": 0.5338180426671769, "grad_norm": 0.16430383920669556, "learning_rate": 0.001, "loss": 1.1102, "step": 16740 }, { "epoch": 0.5341369303868108, "grad_norm": 0.16883912682533264, "learning_rate": 0.001, "loss": 1.1037, "step": 16750 }, { "epoch": 0.5344558181064447, "grad_norm": 0.1662365049123764, "learning_rate": 0.001, "loss": 1.1239, "step": 16760 }, { "epoch": 0.5347747058260787, "grad_norm": 0.16690349578857422, "learning_rate": 0.001, "loss": 1.1167, "step": 16770 }, { "epoch": 0.5350935935457125, "grad_norm": 0.17271411418914795, "learning_rate": 0.001, "loss": 1.1211, "step": 16780 }, { "epoch": 0.5354124812653465, "grad_norm": 0.17028991878032684, "learning_rate": 0.001, "loss": 1.1196, "step": 16790 }, { "epoch": 0.5357313689849804, "grad_norm": 0.17466682195663452, "learning_rate": 0.001, "loss": 1.1097, "step": 16800 }, { "epoch": 0.5360502567046143, "grad_norm": 0.17412136495113373, "learning_rate": 0.001, "loss": 1.1349, "step": 16810 }, { "epoch": 0.5363691444242482, "grad_norm": 0.17085425555706024, "learning_rate": 0.001, "loss": 1.1139, "step": 16820 }, { "epoch": 0.5366880321438822, "grad_norm": 0.1674584150314331, "learning_rate": 0.001, "loss": 1.111, "step": 16830 }, { "epoch": 0.537006919863516, "grad_norm": 0.17785713076591492, "learning_rate": 0.001, "loss": 1.1393, "step": 16840 }, { "epoch": 0.53732580758315, "grad_norm": 0.17034348845481873, "learning_rate": 0.001, "loss": 1.1224, "step": 16850 }, { "epoch": 0.5376446953027839, "grad_norm": 0.1680379956960678, "learning_rate": 0.001, "loss": 1.1181, "step": 16860 }, { "epoch": 0.5379635830224178, "grad_norm": 0.17434479296207428, "learning_rate": 0.001, "loss": 1.108, "step": 16870 }, { "epoch": 0.5382824707420517, "grad_norm": 0.1710536777973175, "learning_rate": 0.001, "loss": 1.0958, "step": 16880 }, { "epoch": 0.5386013584616857, "grad_norm": 0.1662193238735199, "learning_rate": 0.001, "loss": 1.1238, "step": 16890 }, { "epoch": 0.5389202461813195, "grad_norm": 0.16588041186332703, "learning_rate": 0.001, "loss": 1.1196, "step": 16900 }, { "epoch": 0.5392391339009535, "grad_norm": 0.16906344890594482, "learning_rate": 0.001, "loss": 1.1005, "step": 16910 }, { "epoch": 0.5395580216205874, "grad_norm": 0.16948343813419342, "learning_rate": 0.001, "loss": 1.1265, "step": 16920 }, { "epoch": 0.5398769093402213, "grad_norm": 0.17260994017124176, "learning_rate": 0.001, "loss": 1.1098, "step": 16930 }, { "epoch": 0.5401957970598552, "grad_norm": 0.16991999745368958, "learning_rate": 0.001, "loss": 1.1175, "step": 16940 }, { "epoch": 0.5405146847794892, "grad_norm": 0.17070095241069794, "learning_rate": 0.001, "loss": 1.1061, "step": 16950 }, { "epoch": 0.540833572499123, "grad_norm": 0.17698924243450165, "learning_rate": 0.001, "loss": 1.1124, "step": 16960 }, { "epoch": 0.541152460218757, "grad_norm": 0.1651303917169571, "learning_rate": 0.001, "loss": 1.1122, "step": 16970 }, { "epoch": 0.5414713479383909, "grad_norm": 0.17084306478500366, "learning_rate": 0.001, "loss": 1.1069, "step": 16980 }, { "epoch": 0.5417902356580248, "grad_norm": 0.1684514433145523, "learning_rate": 0.001, "loss": 1.1282, "step": 16990 }, { "epoch": 0.5421091233776587, "grad_norm": 0.17040887475013733, "learning_rate": 0.001, "loss": 1.094, "step": 17000 }, { "epoch": 0.5424280110972927, "grad_norm": 0.16266708076000214, "learning_rate": 0.001, "loss": 1.0929, "step": 17010 }, { "epoch": 0.5427468988169266, "grad_norm": 0.1671130657196045, "learning_rate": 0.001, "loss": 1.0842, "step": 17020 }, { "epoch": 0.5430657865365605, "grad_norm": 0.17035824060440063, "learning_rate": 0.001, "loss": 1.1192, "step": 17030 }, { "epoch": 0.5433846742561944, "grad_norm": 0.16444888710975647, "learning_rate": 0.001, "loss": 1.094, "step": 17040 }, { "epoch": 0.5437035619758284, "grad_norm": 0.16725726425647736, "learning_rate": 0.001, "loss": 1.0976, "step": 17050 }, { "epoch": 0.5440224496954622, "grad_norm": 0.16788360476493835, "learning_rate": 0.001, "loss": 1.1231, "step": 17060 }, { "epoch": 0.5443413374150962, "grad_norm": 0.1691126525402069, "learning_rate": 0.001, "loss": 1.1243, "step": 17070 }, { "epoch": 0.5446602251347301, "grad_norm": 0.17056216299533844, "learning_rate": 0.001, "loss": 1.1253, "step": 17080 }, { "epoch": 0.544979112854364, "grad_norm": 0.17879615724086761, "learning_rate": 0.001, "loss": 1.1165, "step": 17090 }, { "epoch": 0.5452980005739979, "grad_norm": 0.17294946312904358, "learning_rate": 0.001, "loss": 1.1075, "step": 17100 }, { "epoch": 0.5456168882936319, "grad_norm": 0.16905255615711212, "learning_rate": 0.001, "loss": 1.1197, "step": 17110 }, { "epoch": 0.5459357760132657, "grad_norm": 0.17166034877300262, "learning_rate": 0.001, "loss": 1.1225, "step": 17120 }, { "epoch": 0.5462546637328997, "grad_norm": 0.1762578934431076, "learning_rate": 0.001, "loss": 1.1156, "step": 17130 }, { "epoch": 0.5465735514525336, "grad_norm": 0.17545616626739502, "learning_rate": 0.001, "loss": 1.1052, "step": 17140 }, { "epoch": 0.5468924391721675, "grad_norm": 0.17133742570877075, "learning_rate": 0.001, "loss": 1.1096, "step": 17150 }, { "epoch": 0.5472113268918014, "grad_norm": 0.16983424127101898, "learning_rate": 0.001, "loss": 1.1073, "step": 17160 }, { "epoch": 0.5475302146114354, "grad_norm": 0.16490906476974487, "learning_rate": 0.001, "loss": 1.1016, "step": 17170 }, { "epoch": 0.5478491023310692, "grad_norm": 0.17924512922763824, "learning_rate": 0.001, "loss": 1.1254, "step": 17180 }, { "epoch": 0.5481679900507032, "grad_norm": 0.17216329276561737, "learning_rate": 0.001, "loss": 1.1011, "step": 17190 }, { "epoch": 0.5484868777703371, "grad_norm": 0.1712387204170227, "learning_rate": 0.001, "loss": 1.1206, "step": 17200 }, { "epoch": 0.548805765489971, "grad_norm": 0.1688283532857895, "learning_rate": 0.001, "loss": 1.1118, "step": 17210 }, { "epoch": 0.5491246532096049, "grad_norm": 0.17269356548786163, "learning_rate": 0.001, "loss": 1.1031, "step": 17220 }, { "epoch": 0.5494435409292389, "grad_norm": 0.167024627327919, "learning_rate": 0.001, "loss": 1.0996, "step": 17230 }, { "epoch": 0.5497624286488727, "grad_norm": 0.17057278752326965, "learning_rate": 0.001, "loss": 1.1167, "step": 17240 }, { "epoch": 0.5500813163685067, "grad_norm": 0.1737520545721054, "learning_rate": 0.001, "loss": 1.1163, "step": 17250 }, { "epoch": 0.5504002040881406, "grad_norm": 0.1737147867679596, "learning_rate": 0.001, "loss": 1.1085, "step": 17260 }, { "epoch": 0.5507190918077745, "grad_norm": 0.17490079998970032, "learning_rate": 0.001, "loss": 1.1343, "step": 17270 }, { "epoch": 0.5510379795274084, "grad_norm": 0.16712409257888794, "learning_rate": 0.001, "loss": 1.0932, "step": 17280 }, { "epoch": 0.5513568672470424, "grad_norm": 0.1729220300912857, "learning_rate": 0.001, "loss": 1.1298, "step": 17290 }, { "epoch": 0.5516757549666762, "grad_norm": 0.17015966773033142, "learning_rate": 0.001, "loss": 1.1075, "step": 17300 }, { "epoch": 0.5519946426863102, "grad_norm": 0.1695127785205841, "learning_rate": 0.001, "loss": 1.1123, "step": 17310 }, { "epoch": 0.5523135304059441, "grad_norm": 0.16665105521678925, "learning_rate": 0.001, "loss": 1.1105, "step": 17320 }, { "epoch": 0.552632418125578, "grad_norm": 0.16556957364082336, "learning_rate": 0.001, "loss": 1.0983, "step": 17330 }, { "epoch": 0.5529513058452119, "grad_norm": 0.1715242713689804, "learning_rate": 0.001, "loss": 1.1118, "step": 17340 }, { "epoch": 0.5532701935648459, "grad_norm": 0.166187584400177, "learning_rate": 0.001, "loss": 1.1107, "step": 17350 }, { "epoch": 0.5535890812844797, "grad_norm": 0.1719885617494583, "learning_rate": 0.001, "loss": 1.0866, "step": 17360 }, { "epoch": 0.5539079690041137, "grad_norm": 0.16900859773159027, "learning_rate": 0.001, "loss": 1.1024, "step": 17370 }, { "epoch": 0.5542268567237476, "grad_norm": 0.17110826075077057, "learning_rate": 0.001, "loss": 1.1181, "step": 17380 }, { "epoch": 0.5545457444433814, "grad_norm": 0.1805061548948288, "learning_rate": 0.001, "loss": 1.1095, "step": 17390 }, { "epoch": 0.5548646321630154, "grad_norm": 0.16934873163700104, "learning_rate": 0.001, "loss": 1.1336, "step": 17400 }, { "epoch": 0.5551835198826494, "grad_norm": 0.17308500409126282, "learning_rate": 0.001, "loss": 1.1165, "step": 17410 }, { "epoch": 0.5555024076022832, "grad_norm": 0.17419050633907318, "learning_rate": 0.001, "loss": 1.0959, "step": 17420 }, { "epoch": 0.5558212953219172, "grad_norm": 0.17350651323795319, "learning_rate": 0.001, "loss": 1.1082, "step": 17430 }, { "epoch": 0.5561401830415511, "grad_norm": 0.1707797795534134, "learning_rate": 0.001, "loss": 1.1107, "step": 17440 }, { "epoch": 0.556459070761185, "grad_norm": 0.16700954735279083, "learning_rate": 0.001, "loss": 1.1141, "step": 17450 }, { "epoch": 0.5567779584808189, "grad_norm": 0.1669778972864151, "learning_rate": 0.001, "loss": 1.113, "step": 17460 }, { "epoch": 0.5570968462004529, "grad_norm": 0.16680364310741425, "learning_rate": 0.001, "loss": 1.1146, "step": 17470 }, { "epoch": 0.5574157339200867, "grad_norm": 0.16998562216758728, "learning_rate": 0.001, "loss": 1.0902, "step": 17480 }, { "epoch": 0.5577346216397207, "grad_norm": 0.16862469911575317, "learning_rate": 0.001, "loss": 1.1176, "step": 17490 }, { "epoch": 0.5580535093593546, "grad_norm": 0.16966623067855835, "learning_rate": 0.001, "loss": 1.1113, "step": 17500 }, { "epoch": 0.5583723970789884, "grad_norm": 0.1724562793970108, "learning_rate": 0.001, "loss": 1.1101, "step": 17510 }, { "epoch": 0.5586912847986224, "grad_norm": 0.17634367942810059, "learning_rate": 0.001, "loss": 1.1112, "step": 17520 }, { "epoch": 0.5590101725182564, "grad_norm": 0.17674317955970764, "learning_rate": 0.001, "loss": 1.0956, "step": 17530 }, { "epoch": 0.5593290602378902, "grad_norm": 0.16610750555992126, "learning_rate": 0.001, "loss": 1.1033, "step": 17540 }, { "epoch": 0.5596479479575242, "grad_norm": 0.17032527923583984, "learning_rate": 0.001, "loss": 1.1341, "step": 17550 }, { "epoch": 0.5599668356771581, "grad_norm": 0.17055188119411469, "learning_rate": 0.001, "loss": 1.0943, "step": 17560 }, { "epoch": 0.560285723396792, "grad_norm": 0.18244749307632446, "learning_rate": 0.001, "loss": 1.1105, "step": 17570 }, { "epoch": 0.5606046111164259, "grad_norm": 0.17542274296283722, "learning_rate": 0.001, "loss": 1.1116, "step": 17580 }, { "epoch": 0.5609234988360599, "grad_norm": 0.168791264295578, "learning_rate": 0.001, "loss": 1.1051, "step": 17590 }, { "epoch": 0.5612423865556937, "grad_norm": 0.16718518733978271, "learning_rate": 0.001, "loss": 1.1019, "step": 17600 }, { "epoch": 0.5615612742753276, "grad_norm": 0.16974017024040222, "learning_rate": 0.001, "loss": 1.1123, "step": 17610 }, { "epoch": 0.5618801619949616, "grad_norm": 0.16427423059940338, "learning_rate": 0.001, "loss": 1.1102, "step": 17620 }, { "epoch": 0.5621990497145954, "grad_norm": 0.1736641824245453, "learning_rate": 0.001, "loss": 1.1146, "step": 17630 }, { "epoch": 0.5625179374342294, "grad_norm": 0.1725444495677948, "learning_rate": 0.001, "loss": 1.0983, "step": 17640 }, { "epoch": 0.5628368251538634, "grad_norm": 0.16529996693134308, "learning_rate": 0.001, "loss": 1.0841, "step": 17650 }, { "epoch": 0.5631557128734972, "grad_norm": 0.17500054836273193, "learning_rate": 0.001, "loss": 1.1078, "step": 17660 }, { "epoch": 0.5634746005931311, "grad_norm": 0.17414040863513947, "learning_rate": 0.001, "loss": 1.106, "step": 17670 }, { "epoch": 0.5637934883127651, "grad_norm": 0.17169852554798126, "learning_rate": 0.001, "loss": 1.1011, "step": 17680 }, { "epoch": 0.5641123760323989, "grad_norm": 0.1719132363796234, "learning_rate": 0.001, "loss": 1.1165, "step": 17690 }, { "epoch": 0.5644312637520329, "grad_norm": 0.17163269221782684, "learning_rate": 0.001, "loss": 1.1082, "step": 17700 }, { "epoch": 0.5647501514716669, "grad_norm": 0.16895899176597595, "learning_rate": 0.001, "loss": 1.1132, "step": 17710 }, { "epoch": 0.5650690391913007, "grad_norm": 0.1689912974834442, "learning_rate": 0.001, "loss": 1.1078, "step": 17720 }, { "epoch": 0.5653879269109346, "grad_norm": 0.17176558077335358, "learning_rate": 0.001, "loss": 1.0839, "step": 17730 }, { "epoch": 0.5657068146305686, "grad_norm": 0.16738194227218628, "learning_rate": 0.001, "loss": 1.102, "step": 17740 }, { "epoch": 0.5660257023502024, "grad_norm": 0.17044048011302948, "learning_rate": 0.001, "loss": 1.0937, "step": 17750 }, { "epoch": 0.5663445900698364, "grad_norm": 0.16983206570148468, "learning_rate": 0.001, "loss": 1.1066, "step": 17760 }, { "epoch": 0.5666634777894703, "grad_norm": 0.17275819182395935, "learning_rate": 0.001, "loss": 1.0966, "step": 17770 }, { "epoch": 0.5669823655091042, "grad_norm": 0.1728706955909729, "learning_rate": 0.001, "loss": 1.1057, "step": 17780 }, { "epoch": 0.5673012532287381, "grad_norm": 0.17265573143959045, "learning_rate": 0.001, "loss": 1.1164, "step": 17790 }, { "epoch": 0.5676201409483721, "grad_norm": 0.16493971645832062, "learning_rate": 0.001, "loss": 1.0871, "step": 17800 }, { "epoch": 0.5679390286680059, "grad_norm": 0.17371971905231476, "learning_rate": 0.001, "loss": 1.1067, "step": 17810 }, { "epoch": 0.5682579163876399, "grad_norm": 0.17413072288036346, "learning_rate": 0.001, "loss": 1.1055, "step": 17820 }, { "epoch": 0.5685768041072738, "grad_norm": 0.16485251486301422, "learning_rate": 0.001, "loss": 1.097, "step": 17830 }, { "epoch": 0.5688956918269078, "grad_norm": 0.17777450382709503, "learning_rate": 0.001, "loss": 1.095, "step": 17840 }, { "epoch": 0.5692145795465416, "grad_norm": 0.16648739576339722, "learning_rate": 0.001, "loss": 1.1036, "step": 17850 }, { "epoch": 0.5695334672661756, "grad_norm": 0.16905073821544647, "learning_rate": 0.001, "loss": 1.0986, "step": 17860 }, { "epoch": 0.5698523549858096, "grad_norm": 0.16770754754543304, "learning_rate": 0.001, "loss": 1.0964, "step": 17870 }, { "epoch": 0.5701712427054434, "grad_norm": 0.17330941557884216, "learning_rate": 0.001, "loss": 1.1084, "step": 17880 }, { "epoch": 0.5704901304250773, "grad_norm": 0.16497695446014404, "learning_rate": 0.001, "loss": 1.0997, "step": 17890 }, { "epoch": 0.5708090181447113, "grad_norm": 0.1721220165491104, "learning_rate": 0.001, "loss": 1.1014, "step": 17900 }, { "epoch": 0.5711279058643451, "grad_norm": 0.16339078545570374, "learning_rate": 0.001, "loss": 1.1042, "step": 17910 }, { "epoch": 0.5714467935839791, "grad_norm": 0.16810251772403717, "learning_rate": 0.001, "loss": 1.1007, "step": 17920 }, { "epoch": 0.571765681303613, "grad_norm": 0.1646348237991333, "learning_rate": 0.001, "loss": 1.1022, "step": 17930 }, { "epoch": 0.5720845690232469, "grad_norm": 0.16776010394096375, "learning_rate": 0.001, "loss": 1.118, "step": 17940 }, { "epoch": 0.5724034567428808, "grad_norm": 0.1724771410226822, "learning_rate": 0.001, "loss": 1.0982, "step": 17950 }, { "epoch": 0.5727223444625148, "grad_norm": 0.1659904420375824, "learning_rate": 0.001, "loss": 1.0859, "step": 17960 }, { "epoch": 0.5730412321821486, "grad_norm": 0.16321036219596863, "learning_rate": 0.001, "loss": 1.0966, "step": 17970 }, { "epoch": 0.5733601199017826, "grad_norm": 0.17762115597724915, "learning_rate": 0.001, "loss": 1.1058, "step": 17980 }, { "epoch": 0.5736790076214165, "grad_norm": 0.16604764759540558, "learning_rate": 0.001, "loss": 1.1161, "step": 17990 }, { "epoch": 0.5739978953410504, "grad_norm": 0.17151857912540436, "learning_rate": 0.001, "loss": 1.0966, "step": 18000 }, { "epoch": 0.5743167830606843, "grad_norm": 0.17127293348312378, "learning_rate": 0.001, "loss": 1.0915, "step": 18010 }, { "epoch": 0.5746356707803183, "grad_norm": 0.17190389335155487, "learning_rate": 0.001, "loss": 1.1058, "step": 18020 }, { "epoch": 0.5749545584999521, "grad_norm": 0.16845406591892242, "learning_rate": 0.001, "loss": 1.1093, "step": 18030 }, { "epoch": 0.5752734462195861, "grad_norm": 0.1646239012479782, "learning_rate": 0.001, "loss": 1.091, "step": 18040 }, { "epoch": 0.57559233393922, "grad_norm": 0.1710473597049713, "learning_rate": 0.001, "loss": 1.0982, "step": 18050 }, { "epoch": 0.5759112216588539, "grad_norm": 0.17428575456142426, "learning_rate": 0.001, "loss": 1.096, "step": 18060 }, { "epoch": 0.5762301093784878, "grad_norm": 0.16330930590629578, "learning_rate": 0.001, "loss": 1.0996, "step": 18070 }, { "epoch": 0.5765489970981218, "grad_norm": 0.16600897908210754, "learning_rate": 0.001, "loss": 1.1, "step": 18080 }, { "epoch": 0.5768678848177556, "grad_norm": 0.17178833484649658, "learning_rate": 0.001, "loss": 1.1241, "step": 18090 }, { "epoch": 0.5771867725373896, "grad_norm": 0.16852764785289764, "learning_rate": 0.001, "loss": 1.1157, "step": 18100 }, { "epoch": 0.5775056602570235, "grad_norm": 0.16611634194850922, "learning_rate": 0.001, "loss": 1.0974, "step": 18110 }, { "epoch": 0.5778245479766574, "grad_norm": 0.17456525564193726, "learning_rate": 0.001, "loss": 1.1091, "step": 18120 }, { "epoch": 0.5781434356962913, "grad_norm": 0.16914406418800354, "learning_rate": 0.001, "loss": 1.0874, "step": 18130 }, { "epoch": 0.5784623234159253, "grad_norm": 0.17011883854866028, "learning_rate": 0.001, "loss": 1.0998, "step": 18140 }, { "epoch": 0.5787812111355591, "grad_norm": 0.16776974499225616, "learning_rate": 0.001, "loss": 1.0735, "step": 18150 }, { "epoch": 0.5791000988551931, "grad_norm": 0.16967648267745972, "learning_rate": 0.001, "loss": 1.0962, "step": 18160 }, { "epoch": 0.579418986574827, "grad_norm": 0.16465380787849426, "learning_rate": 0.001, "loss": 1.0846, "step": 18170 }, { "epoch": 0.5797378742944609, "grad_norm": 0.16524037718772888, "learning_rate": 0.001, "loss": 1.1098, "step": 18180 }, { "epoch": 0.5800567620140948, "grad_norm": 0.1675986647605896, "learning_rate": 0.001, "loss": 1.1059, "step": 18190 }, { "epoch": 0.5803756497337288, "grad_norm": 0.17379865050315857, "learning_rate": 0.001, "loss": 1.1106, "step": 18200 }, { "epoch": 0.5806945374533626, "grad_norm": 0.1684434860944748, "learning_rate": 0.001, "loss": 1.0985, "step": 18210 }, { "epoch": 0.5810134251729966, "grad_norm": 0.16824005544185638, "learning_rate": 0.001, "loss": 1.1036, "step": 18220 }, { "epoch": 0.5813323128926305, "grad_norm": 0.17007438838481903, "learning_rate": 0.001, "loss": 1.1254, "step": 18230 }, { "epoch": 0.5816512006122644, "grad_norm": 0.16796430945396423, "learning_rate": 0.001, "loss": 1.0957, "step": 18240 }, { "epoch": 0.5819700883318983, "grad_norm": 0.16562458872795105, "learning_rate": 0.001, "loss": 1.1029, "step": 18250 }, { "epoch": 0.5822889760515323, "grad_norm": 0.17029453814029694, "learning_rate": 0.001, "loss": 1.0963, "step": 18260 }, { "epoch": 0.5826078637711661, "grad_norm": 0.17549550533294678, "learning_rate": 0.001, "loss": 1.1081, "step": 18270 }, { "epoch": 0.5829267514908001, "grad_norm": 0.17263686656951904, "learning_rate": 0.001, "loss": 1.0888, "step": 18280 }, { "epoch": 0.583245639210434, "grad_norm": 0.16540630161762238, "learning_rate": 0.001, "loss": 1.1016, "step": 18290 }, { "epoch": 0.5835645269300679, "grad_norm": 0.17288976907730103, "learning_rate": 0.001, "loss": 1.0994, "step": 18300 }, { "epoch": 0.5838834146497018, "grad_norm": 0.16975054144859314, "learning_rate": 0.001, "loss": 1.0882, "step": 18310 }, { "epoch": 0.5842023023693358, "grad_norm": 0.16850727796554565, "learning_rate": 0.001, "loss": 1.102, "step": 18320 }, { "epoch": 0.5845211900889696, "grad_norm": 0.17005418241024017, "learning_rate": 0.001, "loss": 1.1047, "step": 18330 }, { "epoch": 0.5848400778086036, "grad_norm": 0.17504164576530457, "learning_rate": 0.001, "loss": 1.0929, "step": 18340 }, { "epoch": 0.5851589655282375, "grad_norm": 0.16485849022865295, "learning_rate": 0.001, "loss": 1.1031, "step": 18350 }, { "epoch": 0.5854778532478714, "grad_norm": 0.16495245695114136, "learning_rate": 0.001, "loss": 1.1203, "step": 18360 }, { "epoch": 0.5857967409675053, "grad_norm": 0.1715477854013443, "learning_rate": 0.001, "loss": 1.116, "step": 18370 }, { "epoch": 0.5861156286871393, "grad_norm": 0.18126294016838074, "learning_rate": 0.001, "loss": 1.1229, "step": 18380 }, { "epoch": 0.5864345164067731, "grad_norm": 0.16917794942855835, "learning_rate": 0.001, "loss": 1.0919, "step": 18390 }, { "epoch": 0.5867534041264071, "grad_norm": 0.17057420313358307, "learning_rate": 0.001, "loss": 1.0914, "step": 18400 }, { "epoch": 0.587072291846041, "grad_norm": 0.16627562046051025, "learning_rate": 0.001, "loss": 1.0981, "step": 18410 }, { "epoch": 0.5873911795656749, "grad_norm": 0.17235489189624786, "learning_rate": 0.001, "loss": 1.1172, "step": 18420 }, { "epoch": 0.5877100672853088, "grad_norm": 0.16684970259666443, "learning_rate": 0.001, "loss": 1.1117, "step": 18430 }, { "epoch": 0.5880289550049428, "grad_norm": 0.1660860925912857, "learning_rate": 0.001, "loss": 1.1038, "step": 18440 }, { "epoch": 0.5883478427245766, "grad_norm": 0.16902343928813934, "learning_rate": 0.001, "loss": 1.1023, "step": 18450 }, { "epoch": 0.5886667304442106, "grad_norm": 0.16275769472122192, "learning_rate": 0.001, "loss": 1.0823, "step": 18460 }, { "epoch": 0.5889856181638445, "grad_norm": 0.16918061673641205, "learning_rate": 0.001, "loss": 1.0965, "step": 18470 }, { "epoch": 0.5893045058834784, "grad_norm": 0.16743110120296478, "learning_rate": 0.001, "loss": 1.1065, "step": 18480 }, { "epoch": 0.5896233936031123, "grad_norm": 0.16440755128860474, "learning_rate": 0.001, "loss": 1.0934, "step": 18490 }, { "epoch": 0.5899422813227463, "grad_norm": 0.16429467499256134, "learning_rate": 0.001, "loss": 1.1044, "step": 18500 }, { "epoch": 0.5902611690423801, "grad_norm": 0.17269568145275116, "learning_rate": 0.001, "loss": 1.1021, "step": 18510 }, { "epoch": 0.5905800567620141, "grad_norm": 0.17350298166275024, "learning_rate": 0.001, "loss": 1.1106, "step": 18520 }, { "epoch": 0.590898944481648, "grad_norm": 0.17228271067142487, "learning_rate": 0.001, "loss": 1.0906, "step": 18530 }, { "epoch": 0.5912178322012819, "grad_norm": 0.17125701904296875, "learning_rate": 0.001, "loss": 1.1031, "step": 18540 }, { "epoch": 0.5915367199209158, "grad_norm": 0.17287059128284454, "learning_rate": 0.001, "loss": 1.1035, "step": 18550 }, { "epoch": 0.5918556076405498, "grad_norm": 0.17267583310604095, "learning_rate": 0.001, "loss": 1.122, "step": 18560 }, { "epoch": 0.5921744953601836, "grad_norm": 0.1665111482143402, "learning_rate": 0.001, "loss": 1.0852, "step": 18570 }, { "epoch": 0.5924933830798176, "grad_norm": 0.18036475777626038, "learning_rate": 0.001, "loss": 1.0897, "step": 18580 }, { "epoch": 0.5928122707994515, "grad_norm": 0.16732090711593628, "learning_rate": 0.001, "loss": 1.1005, "step": 18590 }, { "epoch": 0.5931311585190854, "grad_norm": 0.16416814923286438, "learning_rate": 0.001, "loss": 1.0994, "step": 18600 }, { "epoch": 0.5934500462387193, "grad_norm": 0.1659676730632782, "learning_rate": 0.001, "loss": 1.1058, "step": 18610 }, { "epoch": 0.5937689339583533, "grad_norm": 0.17046456038951874, "learning_rate": 0.001, "loss": 1.1051, "step": 18620 }, { "epoch": 0.5940878216779871, "grad_norm": 0.17397566139698029, "learning_rate": 0.001, "loss": 1.1013, "step": 18630 }, { "epoch": 0.5944067093976211, "grad_norm": 0.1660611480474472, "learning_rate": 0.001, "loss": 1.1004, "step": 18640 }, { "epoch": 0.594725597117255, "grad_norm": 0.1655200570821762, "learning_rate": 0.001, "loss": 1.1037, "step": 18650 }, { "epoch": 0.5950444848368889, "grad_norm": 0.16909074783325195, "learning_rate": 0.001, "loss": 1.0759, "step": 18660 }, { "epoch": 0.5953633725565228, "grad_norm": 0.17524762451648712, "learning_rate": 0.001, "loss": 1.092, "step": 18670 }, { "epoch": 0.5956822602761568, "grad_norm": 0.1684812307357788, "learning_rate": 0.001, "loss": 1.1036, "step": 18680 }, { "epoch": 0.5960011479957907, "grad_norm": 0.1623716950416565, "learning_rate": 0.001, "loss": 1.0907, "step": 18690 }, { "epoch": 0.5963200357154246, "grad_norm": 0.16788646578788757, "learning_rate": 0.001, "loss": 1.0807, "step": 18700 }, { "epoch": 0.5966389234350585, "grad_norm": 0.1718616485595703, "learning_rate": 0.001, "loss": 1.1057, "step": 18710 }, { "epoch": 0.5969578111546925, "grad_norm": 0.17129045724868774, "learning_rate": 0.001, "loss": 1.0989, "step": 18720 }, { "epoch": 0.5972766988743263, "grad_norm": 0.16963504254817963, "learning_rate": 0.001, "loss": 1.0987, "step": 18730 }, { "epoch": 0.5975955865939603, "grad_norm": 0.16359004378318787, "learning_rate": 0.001, "loss": 1.1128, "step": 18740 }, { "epoch": 0.5979144743135942, "grad_norm": 0.16450457274913788, "learning_rate": 0.001, "loss": 1.084, "step": 18750 }, { "epoch": 0.5982333620332281, "grad_norm": 0.16531401872634888, "learning_rate": 0.001, "loss": 1.105, "step": 18760 }, { "epoch": 0.598552249752862, "grad_norm": 0.16693365573883057, "learning_rate": 0.001, "loss": 1.0991, "step": 18770 }, { "epoch": 0.598871137472496, "grad_norm": 0.1713932305574417, "learning_rate": 0.001, "loss": 1.0912, "step": 18780 }, { "epoch": 0.5991900251921298, "grad_norm": 0.16845330595970154, "learning_rate": 0.001, "loss": 1.1299, "step": 18790 }, { "epoch": 0.5995089129117638, "grad_norm": 0.1709652543067932, "learning_rate": 0.001, "loss": 1.0666, "step": 18800 }, { "epoch": 0.5998278006313977, "grad_norm": 0.16776952147483826, "learning_rate": 0.001, "loss": 1.0857, "step": 18810 }, { "epoch": 0.6001466883510316, "grad_norm": 0.17375540733337402, "learning_rate": 0.001, "loss": 1.1064, "step": 18820 }, { "epoch": 0.6004655760706655, "grad_norm": 0.16971836984157562, "learning_rate": 0.001, "loss": 1.0992, "step": 18830 }, { "epoch": 0.6007844637902995, "grad_norm": 0.17225322127342224, "learning_rate": 0.001, "loss": 1.0926, "step": 18840 }, { "epoch": 0.6011033515099333, "grad_norm": 0.172017440199852, "learning_rate": 0.001, "loss": 1.1328, "step": 18850 }, { "epoch": 0.6014222392295673, "grad_norm": 0.16906633973121643, "learning_rate": 0.001, "loss": 1.0944, "step": 18860 }, { "epoch": 0.6017411269492012, "grad_norm": 0.17072793841362, "learning_rate": 0.001, "loss": 1.1012, "step": 18870 }, { "epoch": 0.6020600146688351, "grad_norm": 0.16955476999282837, "learning_rate": 0.001, "loss": 1.0825, "step": 18880 }, { "epoch": 0.602378902388469, "grad_norm": 0.16245858371257782, "learning_rate": 0.001, "loss": 1.0948, "step": 18890 }, { "epoch": 0.602697790108103, "grad_norm": 0.1670025736093521, "learning_rate": 0.001, "loss": 1.0995, "step": 18900 }, { "epoch": 0.6030166778277368, "grad_norm": 0.1683637946844101, "learning_rate": 0.001, "loss": 1.1024, "step": 18910 }, { "epoch": 0.6033355655473708, "grad_norm": 0.17176498472690582, "learning_rate": 0.001, "loss": 1.1075, "step": 18920 }, { "epoch": 0.6036544532670047, "grad_norm": 0.16669496893882751, "learning_rate": 0.001, "loss": 1.0736, "step": 18930 }, { "epoch": 0.6039733409866386, "grad_norm": 0.17387279868125916, "learning_rate": 0.001, "loss": 1.0768, "step": 18940 }, { "epoch": 0.6042922287062725, "grad_norm": 0.170260950922966, "learning_rate": 0.001, "loss": 1.1012, "step": 18950 }, { "epoch": 0.6046111164259065, "grad_norm": 0.17083097994327545, "learning_rate": 0.001, "loss": 1.1049, "step": 18960 }, { "epoch": 0.6049300041455403, "grad_norm": 0.17165276408195496, "learning_rate": 0.001, "loss": 1.101, "step": 18970 }, { "epoch": 0.6052488918651743, "grad_norm": 0.17530037462711334, "learning_rate": 0.001, "loss": 1.1065, "step": 18980 }, { "epoch": 0.6055677795848082, "grad_norm": 0.16949541866779327, "learning_rate": 0.001, "loss": 1.1038, "step": 18990 }, { "epoch": 0.6058866673044421, "grad_norm": 0.17218689620494843, "learning_rate": 0.001, "loss": 1.098, "step": 19000 }, { "epoch": 0.606205555024076, "grad_norm": 0.18472091853618622, "learning_rate": 0.001, "loss": 1.0901, "step": 19010 }, { "epoch": 0.60652444274371, "grad_norm": 0.1716701090335846, "learning_rate": 0.001, "loss": 1.1091, "step": 19020 }, { "epoch": 0.6068433304633438, "grad_norm": 0.1648435741662979, "learning_rate": 0.001, "loss": 1.094, "step": 19030 }, { "epoch": 0.6071622181829778, "grad_norm": 0.16991214454174042, "learning_rate": 0.001, "loss": 1.0827, "step": 19040 }, { "epoch": 0.6074811059026117, "grad_norm": 0.17173060774803162, "learning_rate": 0.001, "loss": 1.0895, "step": 19050 }, { "epoch": 0.6077999936222456, "grad_norm": 0.16547681391239166, "learning_rate": 0.001, "loss": 1.0994, "step": 19060 }, { "epoch": 0.6081188813418795, "grad_norm": 0.16734836995601654, "learning_rate": 0.001, "loss": 1.0989, "step": 19070 }, { "epoch": 0.6084377690615135, "grad_norm": 0.17032268643379211, "learning_rate": 0.001, "loss": 1.0928, "step": 19080 }, { "epoch": 0.6087566567811473, "grad_norm": 0.17411461472511292, "learning_rate": 0.001, "loss": 1.0737, "step": 19090 }, { "epoch": 0.6090755445007813, "grad_norm": 0.1729476898908615, "learning_rate": 0.001, "loss": 1.0995, "step": 19100 }, { "epoch": 0.6093944322204152, "grad_norm": 0.17751654982566833, "learning_rate": 0.001, "loss": 1.0767, "step": 19110 }, { "epoch": 0.6097133199400491, "grad_norm": 0.1721947342157364, "learning_rate": 0.001, "loss": 1.0965, "step": 19120 }, { "epoch": 0.610032207659683, "grad_norm": 0.1693287491798401, "learning_rate": 0.001, "loss": 1.1138, "step": 19130 }, { "epoch": 0.610351095379317, "grad_norm": 0.16716571152210236, "learning_rate": 0.001, "loss": 1.1116, "step": 19140 }, { "epoch": 0.6106699830989508, "grad_norm": 0.16647756099700928, "learning_rate": 0.001, "loss": 1.1066, "step": 19150 }, { "epoch": 0.6109888708185848, "grad_norm": 0.1653558611869812, "learning_rate": 0.001, "loss": 1.0802, "step": 19160 }, { "epoch": 0.6113077585382187, "grad_norm": 0.16676802933216095, "learning_rate": 0.001, "loss": 1.1088, "step": 19170 }, { "epoch": 0.6116266462578526, "grad_norm": 0.1702679842710495, "learning_rate": 0.001, "loss": 1.1055, "step": 19180 }, { "epoch": 0.6119455339774865, "grad_norm": 0.16800037026405334, "learning_rate": 0.001, "loss": 1.104, "step": 19190 }, { "epoch": 0.6122644216971205, "grad_norm": 0.17141634225845337, "learning_rate": 0.001, "loss": 1.0885, "step": 19200 }, { "epoch": 0.6125833094167543, "grad_norm": 0.16200610995292664, "learning_rate": 0.001, "loss": 1.0737, "step": 19210 }, { "epoch": 0.6129021971363883, "grad_norm": 0.1708095520734787, "learning_rate": 0.001, "loss": 1.1009, "step": 19220 }, { "epoch": 0.6132210848560222, "grad_norm": 0.16531571745872498, "learning_rate": 0.001, "loss": 1.0825, "step": 19230 }, { "epoch": 0.6135399725756561, "grad_norm": 0.1678553968667984, "learning_rate": 0.001, "loss": 1.0928, "step": 19240 }, { "epoch": 0.61385886029529, "grad_norm": 0.16929244995117188, "learning_rate": 0.001, "loss": 1.0762, "step": 19250 }, { "epoch": 0.614177748014924, "grad_norm": 0.1687399297952652, "learning_rate": 0.001, "loss": 1.1068, "step": 19260 }, { "epoch": 0.6144966357345578, "grad_norm": 0.1622915267944336, "learning_rate": 0.001, "loss": 1.1062, "step": 19270 }, { "epoch": 0.6148155234541918, "grad_norm": 0.16613072156906128, "learning_rate": 0.001, "loss": 1.0977, "step": 19280 }, { "epoch": 0.6151344111738257, "grad_norm": 0.16244791448116302, "learning_rate": 0.001, "loss": 1.0794, "step": 19290 }, { "epoch": 0.6154532988934596, "grad_norm": 0.17199251055717468, "learning_rate": 0.001, "loss": 1.1048, "step": 19300 }, { "epoch": 0.6157721866130935, "grad_norm": 0.1752566248178482, "learning_rate": 0.001, "loss": 1.1059, "step": 19310 }, { "epoch": 0.6160910743327275, "grad_norm": 0.17897622287273407, "learning_rate": 0.001, "loss": 1.0919, "step": 19320 }, { "epoch": 0.6164099620523613, "grad_norm": 0.1731637865304947, "learning_rate": 0.001, "loss": 1.1154, "step": 19330 }, { "epoch": 0.6167288497719953, "grad_norm": 0.16791284084320068, "learning_rate": 0.001, "loss": 1.0889, "step": 19340 }, { "epoch": 0.6170477374916292, "grad_norm": 0.16614709794521332, "learning_rate": 0.001, "loss": 1.0938, "step": 19350 }, { "epoch": 0.6173666252112631, "grad_norm": 0.1709696352481842, "learning_rate": 0.001, "loss": 1.0956, "step": 19360 }, { "epoch": 0.617685512930897, "grad_norm": 0.16510511934757233, "learning_rate": 0.001, "loss": 1.0989, "step": 19370 }, { "epoch": 0.618004400650531, "grad_norm": 0.16931302845478058, "learning_rate": 0.001, "loss": 1.1062, "step": 19380 }, { "epoch": 0.6183232883701648, "grad_norm": 0.17815431952476501, "learning_rate": 0.001, "loss": 1.1119, "step": 19390 }, { "epoch": 0.6186421760897988, "grad_norm": 0.1690226048231125, "learning_rate": 0.001, "loss": 1.0876, "step": 19400 }, { "epoch": 0.6189610638094327, "grad_norm": 0.16781018674373627, "learning_rate": 0.001, "loss": 1.0896, "step": 19410 }, { "epoch": 0.6192799515290666, "grad_norm": 0.16898411512374878, "learning_rate": 0.001, "loss": 1.1129, "step": 19420 }, { "epoch": 0.6195988392487005, "grad_norm": 0.17247016727924347, "learning_rate": 0.001, "loss": 1.0907, "step": 19430 }, { "epoch": 0.6199177269683345, "grad_norm": 0.1620088517665863, "learning_rate": 0.001, "loss": 1.0984, "step": 19440 }, { "epoch": 0.6202366146879683, "grad_norm": 0.16331516206264496, "learning_rate": 0.001, "loss": 1.0794, "step": 19450 }, { "epoch": 0.6205555024076023, "grad_norm": 0.1672041267156601, "learning_rate": 0.001, "loss": 1.114, "step": 19460 }, { "epoch": 0.6208743901272362, "grad_norm": 0.16793550550937653, "learning_rate": 0.001, "loss": 1.0828, "step": 19470 }, { "epoch": 0.6211932778468701, "grad_norm": 0.16448058187961578, "learning_rate": 0.001, "loss": 1.094, "step": 19480 }, { "epoch": 0.621512165566504, "grad_norm": 0.1713237166404724, "learning_rate": 0.001, "loss": 1.098, "step": 19490 }, { "epoch": 0.621831053286138, "grad_norm": 0.17198623716831207, "learning_rate": 0.001, "loss": 1.1004, "step": 19500 }, { "epoch": 0.6221499410057719, "grad_norm": 0.16575346887111664, "learning_rate": 0.001, "loss": 1.1014, "step": 19510 }, { "epoch": 0.6224688287254058, "grad_norm": 0.1719786524772644, "learning_rate": 0.001, "loss": 1.094, "step": 19520 }, { "epoch": 0.6227877164450397, "grad_norm": 0.16646794974803925, "learning_rate": 0.001, "loss": 1.1097, "step": 19530 }, { "epoch": 0.6231066041646737, "grad_norm": 0.16815558075904846, "learning_rate": 0.001, "loss": 1.0888, "step": 19540 }, { "epoch": 0.6234254918843075, "grad_norm": 0.1675843745470047, "learning_rate": 0.001, "loss": 1.0896, "step": 19550 }, { "epoch": 0.6237443796039415, "grad_norm": 0.1759590357542038, "learning_rate": 0.001, "loss": 1.1131, "step": 19560 }, { "epoch": 0.6240632673235754, "grad_norm": 0.17111027240753174, "learning_rate": 0.001, "loss": 1.1067, "step": 19570 }, { "epoch": 0.6243821550432093, "grad_norm": 0.17272379994392395, "learning_rate": 0.001, "loss": 1.101, "step": 19580 }, { "epoch": 0.6247010427628432, "grad_norm": 0.16584420204162598, "learning_rate": 0.001, "loss": 1.0948, "step": 19590 }, { "epoch": 0.6250199304824772, "grad_norm": 0.16466067731380463, "learning_rate": 0.001, "loss": 1.0887, "step": 19600 }, { "epoch": 0.625338818202111, "grad_norm": 0.1696598380804062, "learning_rate": 0.001, "loss": 1.0999, "step": 19610 }, { "epoch": 0.625657705921745, "grad_norm": 0.1659633368253708, "learning_rate": 0.001, "loss": 1.1112, "step": 19620 }, { "epoch": 0.6259765936413789, "grad_norm": 0.17163695394992828, "learning_rate": 0.001, "loss": 1.086, "step": 19630 }, { "epoch": 0.6262954813610128, "grad_norm": 0.17734724283218384, "learning_rate": 0.001, "loss": 1.099, "step": 19640 }, { "epoch": 0.6266143690806467, "grad_norm": 0.16526159644126892, "learning_rate": 0.001, "loss": 1.0978, "step": 19650 }, { "epoch": 0.6269332568002807, "grad_norm": 0.1740102618932724, "learning_rate": 0.001, "loss": 1.0921, "step": 19660 }, { "epoch": 0.6272521445199145, "grad_norm": 0.17085231840610504, "learning_rate": 0.001, "loss": 1.091, "step": 19670 }, { "epoch": 0.6275710322395485, "grad_norm": 0.17608655989170074, "learning_rate": 0.001, "loss": 1.1068, "step": 19680 }, { "epoch": 0.6278899199591824, "grad_norm": 0.16677728295326233, "learning_rate": 0.001, "loss": 1.094, "step": 19690 }, { "epoch": 0.6282088076788163, "grad_norm": 0.17247577011585236, "learning_rate": 0.001, "loss": 1.0922, "step": 19700 }, { "epoch": 0.6285276953984502, "grad_norm": 0.16981416940689087, "learning_rate": 0.001, "loss": 1.0772, "step": 19710 }, { "epoch": 0.6288465831180842, "grad_norm": 0.16509251296520233, "learning_rate": 0.001, "loss": 1.0939, "step": 19720 }, { "epoch": 0.629165470837718, "grad_norm": 0.16891643404960632, "learning_rate": 0.001, "loss": 1.0857, "step": 19730 }, { "epoch": 0.629484358557352, "grad_norm": 0.16418832540512085, "learning_rate": 0.001, "loss": 1.0903, "step": 19740 }, { "epoch": 0.6298032462769859, "grad_norm": 0.16804587841033936, "learning_rate": 0.001, "loss": 1.0838, "step": 19750 }, { "epoch": 0.6301221339966198, "grad_norm": 0.1670849472284317, "learning_rate": 0.001, "loss": 1.0839, "step": 19760 }, { "epoch": 0.6304410217162537, "grad_norm": 0.17061035335063934, "learning_rate": 0.001, "loss": 1.0819, "step": 19770 }, { "epoch": 0.6307599094358877, "grad_norm": 0.16851983964443207, "learning_rate": 0.001, "loss": 1.085, "step": 19780 }, { "epoch": 0.6310787971555215, "grad_norm": 0.16248421370983124, "learning_rate": 0.001, "loss": 1.0959, "step": 19790 }, { "epoch": 0.6313976848751555, "grad_norm": 0.17037761211395264, "learning_rate": 0.001, "loss": 1.0885, "step": 19800 }, { "epoch": 0.6317165725947894, "grad_norm": 0.1663527637720108, "learning_rate": 0.001, "loss": 1.0947, "step": 19810 }, { "epoch": 0.6320354603144233, "grad_norm": 0.17086707055568695, "learning_rate": 0.001, "loss": 1.0895, "step": 19820 }, { "epoch": 0.6323543480340572, "grad_norm": 0.17373676598072052, "learning_rate": 0.001, "loss": 1.0898, "step": 19830 }, { "epoch": 0.6326732357536912, "grad_norm": 0.1695992350578308, "learning_rate": 0.001, "loss": 1.105, "step": 19840 }, { "epoch": 0.632992123473325, "grad_norm": 0.1636921763420105, "learning_rate": 0.001, "loss": 1.0857, "step": 19850 }, { "epoch": 0.633311011192959, "grad_norm": 0.17723794281482697, "learning_rate": 0.001, "loss": 1.0957, "step": 19860 }, { "epoch": 0.6336298989125929, "grad_norm": 0.16286857426166534, "learning_rate": 0.001, "loss": 1.0859, "step": 19870 }, { "epoch": 0.6339487866322268, "grad_norm": 0.17155247926712036, "learning_rate": 0.001, "loss": 1.0921, "step": 19880 }, { "epoch": 0.6342676743518607, "grad_norm": 0.16349942982196808, "learning_rate": 0.001, "loss": 1.0939, "step": 19890 }, { "epoch": 0.6345865620714947, "grad_norm": 0.1695716679096222, "learning_rate": 0.001, "loss": 1.095, "step": 19900 }, { "epoch": 0.6349054497911285, "grad_norm": 0.17381371557712555, "learning_rate": 0.001, "loss": 1.1043, "step": 19910 }, { "epoch": 0.6352243375107625, "grad_norm": 0.1696106344461441, "learning_rate": 0.001, "loss": 1.099, "step": 19920 }, { "epoch": 0.6355432252303964, "grad_norm": 0.17153136432170868, "learning_rate": 0.001, "loss": 1.0776, "step": 19930 }, { "epoch": 0.6358621129500303, "grad_norm": 0.17578768730163574, "learning_rate": 0.001, "loss": 1.0836, "step": 19940 }, { "epoch": 0.6361810006696642, "grad_norm": 0.16676296293735504, "learning_rate": 0.001, "loss": 1.0752, "step": 19950 }, { "epoch": 0.6364998883892982, "grad_norm": 0.17143648862838745, "learning_rate": 0.001, "loss": 1.0936, "step": 19960 }, { "epoch": 0.636818776108932, "grad_norm": 0.172052264213562, "learning_rate": 0.001, "loss": 1.1046, "step": 19970 }, { "epoch": 0.637137663828566, "grad_norm": 0.16690969467163086, "learning_rate": 0.001, "loss": 1.1054, "step": 19980 }, { "epoch": 0.6374565515481999, "grad_norm": 0.16814520955085754, "learning_rate": 0.001, "loss": 1.0981, "step": 19990 }, { "epoch": 0.6377754392678338, "grad_norm": 0.16802173852920532, "learning_rate": 0.001, "loss": 1.0911, "step": 20000 }, { "epoch": 0.6380943269874677, "grad_norm": 0.16470474004745483, "learning_rate": 0.001, "loss": 1.1015, "step": 20010 }, { "epoch": 0.6384132147071017, "grad_norm": 0.1644125133752823, "learning_rate": 0.001, "loss": 1.1007, "step": 20020 }, { "epoch": 0.6387321024267355, "grad_norm": 0.17056648433208466, "learning_rate": 0.001, "loss": 1.1168, "step": 20030 }, { "epoch": 0.6390509901463695, "grad_norm": 0.1640109121799469, "learning_rate": 0.001, "loss": 1.0898, "step": 20040 }, { "epoch": 0.6393698778660034, "grad_norm": 0.16083675622940063, "learning_rate": 0.001, "loss": 1.0826, "step": 20050 }, { "epoch": 0.6396887655856373, "grad_norm": 0.1644384115934372, "learning_rate": 0.001, "loss": 1.0919, "step": 20060 }, { "epoch": 0.6400076533052712, "grad_norm": 0.1658620685338974, "learning_rate": 0.001, "loss": 1.0918, "step": 20070 }, { "epoch": 0.6403265410249052, "grad_norm": 0.17355003952980042, "learning_rate": 0.001, "loss": 1.087, "step": 20080 }, { "epoch": 0.640645428744539, "grad_norm": 0.15890207886695862, "learning_rate": 0.001, "loss": 1.0992, "step": 20090 }, { "epoch": 0.640964316464173, "grad_norm": 0.17783460021018982, "learning_rate": 0.001, "loss": 1.1057, "step": 20100 }, { "epoch": 0.6412832041838069, "grad_norm": 0.16919948160648346, "learning_rate": 0.001, "loss": 1.079, "step": 20110 }, { "epoch": 0.6416020919034408, "grad_norm": 0.16504615545272827, "learning_rate": 0.001, "loss": 1.1018, "step": 20120 }, { "epoch": 0.6419209796230747, "grad_norm": 0.1690061092376709, "learning_rate": 0.001, "loss": 1.0985, "step": 20130 }, { "epoch": 0.6422398673427087, "grad_norm": 0.16797204315662384, "learning_rate": 0.001, "loss": 1.0923, "step": 20140 }, { "epoch": 0.6425587550623425, "grad_norm": 0.1663273572921753, "learning_rate": 0.001, "loss": 1.0958, "step": 20150 }, { "epoch": 0.6428776427819765, "grad_norm": 0.16364677250385284, "learning_rate": 0.001, "loss": 1.0996, "step": 20160 }, { "epoch": 0.6431965305016104, "grad_norm": 0.17317451536655426, "learning_rate": 0.001, "loss": 1.0985, "step": 20170 }, { "epoch": 0.6435154182212443, "grad_norm": 0.1685129553079605, "learning_rate": 0.001, "loss": 1.0944, "step": 20180 }, { "epoch": 0.6438343059408782, "grad_norm": 0.16640588641166687, "learning_rate": 0.001, "loss": 1.064, "step": 20190 }, { "epoch": 0.6441531936605122, "grad_norm": 0.16327440738677979, "learning_rate": 0.001, "loss": 1.0887, "step": 20200 }, { "epoch": 0.644472081380146, "grad_norm": 0.16816017031669617, "learning_rate": 0.001, "loss": 1.1073, "step": 20210 }, { "epoch": 0.64479096909978, "grad_norm": 0.16961325705051422, "learning_rate": 0.001, "loss": 1.0847, "step": 20220 }, { "epoch": 0.6451098568194139, "grad_norm": 0.17201103270053864, "learning_rate": 0.001, "loss": 1.0926, "step": 20230 }, { "epoch": 0.6454287445390477, "grad_norm": 0.16639626026153564, "learning_rate": 0.001, "loss": 1.0798, "step": 20240 }, { "epoch": 0.6457476322586817, "grad_norm": 0.16544729471206665, "learning_rate": 0.001, "loss": 1.0707, "step": 20250 }, { "epoch": 0.6460665199783157, "grad_norm": 0.1765151172876358, "learning_rate": 0.001, "loss": 1.0868, "step": 20260 }, { "epoch": 0.6463854076979495, "grad_norm": 0.1677905023097992, "learning_rate": 0.001, "loss": 1.0962, "step": 20270 }, { "epoch": 0.6467042954175835, "grad_norm": 0.16907256841659546, "learning_rate": 0.001, "loss": 1.1083, "step": 20280 }, { "epoch": 0.6470231831372174, "grad_norm": 0.16835346817970276, "learning_rate": 0.001, "loss": 1.0787, "step": 20290 }, { "epoch": 0.6473420708568512, "grad_norm": 0.17407572269439697, "learning_rate": 0.001, "loss": 1.0952, "step": 20300 }, { "epoch": 0.6476609585764852, "grad_norm": 0.17516972124576569, "learning_rate": 0.001, "loss": 1.0838, "step": 20310 }, { "epoch": 0.6479798462961192, "grad_norm": 0.16547511518001556, "learning_rate": 0.001, "loss": 1.0819, "step": 20320 }, { "epoch": 0.6482987340157531, "grad_norm": 0.16972166299819946, "learning_rate": 0.001, "loss": 1.0617, "step": 20330 }, { "epoch": 0.648617621735387, "grad_norm": 0.17067791521549225, "learning_rate": 0.001, "loss": 1.0962, "step": 20340 }, { "epoch": 0.6489365094550209, "grad_norm": 0.16942836344242096, "learning_rate": 0.001, "loss": 1.1061, "step": 20350 }, { "epoch": 0.6492553971746549, "grad_norm": 0.16373704373836517, "learning_rate": 0.001, "loss": 1.094, "step": 20360 }, { "epoch": 0.6495742848942887, "grad_norm": 0.1656084656715393, "learning_rate": 0.001, "loss": 1.0858, "step": 20370 }, { "epoch": 0.6498931726139227, "grad_norm": 0.1641538143157959, "learning_rate": 0.001, "loss": 1.1047, "step": 20380 }, { "epoch": 0.6502120603335566, "grad_norm": 0.16529102623462677, "learning_rate": 0.0009972136778050526, "loss": 1.0647, "step": 20390 }, { "epoch": 0.6505309480531904, "grad_norm": 0.17615461349487305, "learning_rate": 0.0009930383279382303, "loss": 1.0964, "step": 20400 }, { "epoch": 0.6508498357728244, "grad_norm": 0.17260606586933136, "learning_rate": 0.000988880460329121, "loss": 1.0724, "step": 20410 }, { "epoch": 0.6511687234924584, "grad_norm": 0.16743755340576172, "learning_rate": 0.000984740001779228, "loss": 1.0932, "step": 20420 }, { "epoch": 0.6514876112120922, "grad_norm": 0.1746867150068283, "learning_rate": 0.000980616879396537, "loss": 1.0988, "step": 20430 }, { "epoch": 0.6518064989317262, "grad_norm": 0.1718806028366089, "learning_rate": 0.0009765110205942349, "loss": 1.1176, "step": 20440 }, { "epoch": 0.6521253866513601, "grad_norm": 0.1670590043067932, "learning_rate": 0.0009724223530894298, "loss": 1.0917, "step": 20450 }, { "epoch": 0.652444274370994, "grad_norm": 0.1681852489709854, "learning_rate": 0.00096835080490188, "loss": 1.0839, "step": 20460 }, { "epoch": 0.6527631620906279, "grad_norm": 0.17223966121673584, "learning_rate": 0.0009642963043527262, "loss": 1.0833, "step": 20470 }, { "epoch": 0.6530820498102619, "grad_norm": 0.16178889572620392, "learning_rate": 0.0009602587800632295, "loss": 1.0536, "step": 20480 }, { "epoch": 0.6534009375298957, "grad_norm": 0.1683199554681778, "learning_rate": 0.0009562381609535146, "loss": 1.0837, "step": 20490 }, { "epoch": 0.6537198252495297, "grad_norm": 0.17109110951423645, "learning_rate": 0.0009522343762413196, "loss": 1.0786, "step": 20500 }, { "epoch": 0.6540387129691636, "grad_norm": 0.1645098775625229, "learning_rate": 0.0009482473554407485, "loss": 1.0779, "step": 20510 }, { "epoch": 0.6543576006887974, "grad_norm": 0.1678561270236969, "learning_rate": 0.000944277028361031, "loss": 1.0829, "step": 20520 }, { "epoch": 0.6546764884084314, "grad_norm": 0.16512475907802582, "learning_rate": 0.0009403233251052866, "loss": 1.068, "step": 20530 }, { "epoch": 0.6549953761280654, "grad_norm": 0.1729077249765396, "learning_rate": 0.0009363861760692945, "loss": 1.0914, "step": 20540 }, { "epoch": 0.6553142638476992, "grad_norm": 0.16553378105163574, "learning_rate": 0.0009324655119402678, "loss": 1.0798, "step": 20550 }, { "epoch": 0.6556331515673332, "grad_norm": 0.16073313355445862, "learning_rate": 0.0009285612636956329, "loss": 1.0699, "step": 20560 }, { "epoch": 0.6559520392869671, "grad_norm": 0.16968417167663574, "learning_rate": 0.0009246733626018155, "loss": 1.0918, "step": 20570 }, { "epoch": 0.656270927006601, "grad_norm": 0.1629115641117096, "learning_rate": 0.0009208017402130296, "loss": 1.0878, "step": 20580 }, { "epoch": 0.6565898147262349, "grad_norm": 0.16741321980953217, "learning_rate": 0.0009169463283700727, "loss": 1.0725, "step": 20590 }, { "epoch": 0.6569087024458689, "grad_norm": 0.16493017971515656, "learning_rate": 0.0009131070591991262, "loss": 1.0786, "step": 20600 }, { "epoch": 0.6572275901655027, "grad_norm": 0.1635984629392624, "learning_rate": 0.00090928386511056, "loss": 1.0826, "step": 20610 }, { "epoch": 0.6575464778851366, "grad_norm": 0.16892851889133453, "learning_rate": 0.0009054766787977433, "loss": 1.0711, "step": 20620 }, { "epoch": 0.6578653656047706, "grad_norm": 0.1606256067752838, "learning_rate": 0.0009016854332358588, "loss": 1.056, "step": 20630 }, { "epoch": 0.6581842533244044, "grad_norm": 0.16923518478870392, "learning_rate": 0.0008979100616807236, "loss": 1.1132, "step": 20640 }, { "epoch": 0.6585031410440384, "grad_norm": 0.17301279306411743, "learning_rate": 0.0008941504976676136, "loss": 1.0764, "step": 20650 }, { "epoch": 0.6588220287636724, "grad_norm": 0.1645253598690033, "learning_rate": 0.0008904066750100934, "loss": 1.0599, "step": 20660 }, { "epoch": 0.6591409164833062, "grad_norm": 0.16737417876720428, "learning_rate": 0.0008866785277988516, "loss": 1.0728, "step": 20670 }, { "epoch": 0.6594598042029401, "grad_norm": 0.15950274467468262, "learning_rate": 0.00088296599040054, "loss": 1.0716, "step": 20680 }, { "epoch": 0.6597786919225741, "grad_norm": 0.1676434576511383, "learning_rate": 0.0008792689974566183, "loss": 1.0655, "step": 20690 }, { "epoch": 0.6600975796422079, "grad_norm": 0.16000451147556305, "learning_rate": 0.0008755874838822034, "loss": 1.0722, "step": 20700 }, { "epoch": 0.6604164673618419, "grad_norm": 0.1731007993221283, "learning_rate": 0.0008719213848649239, "loss": 1.0753, "step": 20710 }, { "epoch": 0.6607353550814759, "grad_norm": 0.16616320610046387, "learning_rate": 0.0008682706358637785, "loss": 1.0395, "step": 20720 }, { "epoch": 0.6610542428011097, "grad_norm": 0.1657589226961136, "learning_rate": 0.0008646351726080005, "loss": 1.0694, "step": 20730 }, { "epoch": 0.6613731305207436, "grad_norm": 0.1682083159685135, "learning_rate": 0.0008610149310959256, "loss": 1.0667, "step": 20740 }, { "epoch": 0.6616920182403776, "grad_norm": 0.16832314431667328, "learning_rate": 0.0008574098475938659, "loss": 1.0754, "step": 20750 }, { "epoch": 0.6620109059600114, "grad_norm": 0.1608540415763855, "learning_rate": 0.000853819858634987, "loss": 1.0568, "step": 20760 }, { "epoch": 0.6623297936796454, "grad_norm": 0.1716543585062027, "learning_rate": 0.0008502449010181915, "loss": 1.0788, "step": 20770 }, { "epoch": 0.6626486813992793, "grad_norm": 0.16817323863506317, "learning_rate": 0.0008466849118070059, "loss": 1.0665, "step": 20780 }, { "epoch": 0.6629675691189132, "grad_norm": 0.16492323577404022, "learning_rate": 0.0008431398283284729, "loss": 1.0679, "step": 20790 }, { "epoch": 0.6632864568385471, "grad_norm": 0.16486430168151855, "learning_rate": 0.0008396095881720477, "loss": 1.0807, "step": 20800 }, { "epoch": 0.6636053445581811, "grad_norm": 0.16902168095111847, "learning_rate": 0.0008360941291884995, "loss": 1.0478, "step": 20810 }, { "epoch": 0.6639242322778149, "grad_norm": 0.1692483276128769, "learning_rate": 0.0008325933894888175, "loss": 1.0753, "step": 20820 }, { "epoch": 0.6642431199974489, "grad_norm": 0.16759838163852692, "learning_rate": 0.0008291073074431209, "loss": 1.0563, "step": 20830 }, { "epoch": 0.6645620077170828, "grad_norm": 0.16476279497146606, "learning_rate": 0.0008256358216795744, "loss": 1.0563, "step": 20840 }, { "epoch": 0.6648808954367167, "grad_norm": 0.16580504179000854, "learning_rate": 0.0008221788710833077, "loss": 1.0632, "step": 20850 }, { "epoch": 0.6651997831563506, "grad_norm": 0.16249163448810577, "learning_rate": 0.0008187363947953391, "loss": 1.0545, "step": 20860 }, { "epoch": 0.6655186708759846, "grad_norm": 0.16070739924907684, "learning_rate": 0.0008153083322115049, "loss": 1.0685, "step": 20870 }, { "epoch": 0.6658375585956184, "grad_norm": 0.1714041382074356, "learning_rate": 0.0008118946229813915, "loss": 1.0571, "step": 20880 }, { "epoch": 0.6661564463152524, "grad_norm": 0.1640019714832306, "learning_rate": 0.0008084952070072738, "loss": 1.0558, "step": 20890 }, { "epoch": 0.6664753340348863, "grad_norm": 0.16367119550704956, "learning_rate": 0.0008051100244430569, "loss": 1.0645, "step": 20900 }, { "epoch": 0.6667942217545202, "grad_norm": 0.16876378655433655, "learning_rate": 0.0008017390156932223, "loss": 1.0564, "step": 20910 }, { "epoch": 0.6671131094741541, "grad_norm": 0.1563488394021988, "learning_rate": 0.0007983821214117789, "loss": 1.0362, "step": 20920 }, { "epoch": 0.6674319971937881, "grad_norm": 0.1577063649892807, "learning_rate": 0.0007950392825012183, "loss": 1.0588, "step": 20930 }, { "epoch": 0.6677508849134219, "grad_norm": 0.16953368484973907, "learning_rate": 0.0007917104401114743, "loss": 1.0446, "step": 20940 }, { "epoch": 0.6680697726330559, "grad_norm": 0.17253628373146057, "learning_rate": 0.000788395535638887, "loss": 1.0621, "step": 20950 }, { "epoch": 0.6683886603526898, "grad_norm": 0.1640058308839798, "learning_rate": 0.0007850945107251707, "loss": 1.0624, "step": 20960 }, { "epoch": 0.6687075480723237, "grad_norm": 0.16152934730052948, "learning_rate": 0.000781807307256387, "loss": 1.0424, "step": 20970 }, { "epoch": 0.6690264357919576, "grad_norm": 0.15980294346809387, "learning_rate": 0.0007785338673619216, "loss": 1.0482, "step": 20980 }, { "epoch": 0.6693453235115916, "grad_norm": 0.16612927615642548, "learning_rate": 0.0007752741334134652, "loss": 1.0475, "step": 20990 }, { "epoch": 0.6696642112312254, "grad_norm": 0.16465890407562256, "learning_rate": 0.0007720280480239992, "loss": 1.0668, "step": 21000 }, { "epoch": 0.6699830989508594, "grad_norm": 0.1595165729522705, "learning_rate": 0.0007687955540467853, "loss": 1.0507, "step": 21010 }, { "epoch": 0.6703019866704933, "grad_norm": 0.16686223447322845, "learning_rate": 0.0007655765945743598, "loss": 1.0484, "step": 21020 }, { "epoch": 0.6706208743901272, "grad_norm": 0.1746356338262558, "learning_rate": 0.0007623711129375314, "loss": 1.0527, "step": 21030 }, { "epoch": 0.6709397621097611, "grad_norm": 0.16428135335445404, "learning_rate": 0.0007591790527043832, "loss": 1.0506, "step": 21040 }, { "epoch": 0.6712586498293951, "grad_norm": 0.15915681421756744, "learning_rate": 0.0007560003576792802, "loss": 1.0527, "step": 21050 }, { "epoch": 0.6715775375490289, "grad_norm": 0.1672055721282959, "learning_rate": 0.0007528349719018794, "loss": 1.0528, "step": 21060 }, { "epoch": 0.6718964252686629, "grad_norm": 0.1689584106206894, "learning_rate": 0.0007496828396461442, "loss": 1.0495, "step": 21070 }, { "epoch": 0.6722153129882968, "grad_norm": 0.16179822385311127, "learning_rate": 0.0007465439054193641, "loss": 1.0713, "step": 21080 }, { "epoch": 0.6725342007079307, "grad_norm": 0.1632169634103775, "learning_rate": 0.0007434181139611777, "loss": 1.0444, "step": 21090 }, { "epoch": 0.6728530884275646, "grad_norm": 0.16350291669368744, "learning_rate": 0.0007403054102425991, "loss": 1.0191, "step": 21100 }, { "epoch": 0.6731719761471986, "grad_norm": 0.16606953740119934, "learning_rate": 0.0007372057394650503, "loss": 1.0697, "step": 21110 }, { "epoch": 0.6734908638668324, "grad_norm": 0.16665737330913544, "learning_rate": 0.0007341190470593954, "loss": 1.045, "step": 21120 }, { "epoch": 0.6738097515864664, "grad_norm": 0.1595230996608734, "learning_rate": 0.0007310452786849806, "loss": 1.0286, "step": 21130 }, { "epoch": 0.6741286393061003, "grad_norm": 0.1614699512720108, "learning_rate": 0.0007279843802286769, "loss": 1.045, "step": 21140 }, { "epoch": 0.6744475270257342, "grad_norm": 0.16494181752204895, "learning_rate": 0.0007249362978039282, "loss": 1.0395, "step": 21150 }, { "epoch": 0.6747664147453681, "grad_norm": 0.164317786693573, "learning_rate": 0.0007219009777498024, "loss": 1.0667, "step": 21160 }, { "epoch": 0.6750853024650021, "grad_norm": 0.16172905266284943, "learning_rate": 0.0007188783666300463, "loss": 1.027, "step": 21170 }, { "epoch": 0.675404190184636, "grad_norm": 0.16476504504680634, "learning_rate": 0.000715868411232145, "loss": 1.0615, "step": 21180 }, { "epoch": 0.6757230779042699, "grad_norm": 0.1657302975654602, "learning_rate": 0.0007128710585663859, "loss": 1.0207, "step": 21190 }, { "epoch": 0.6760419656239038, "grad_norm": 0.1605929583311081, "learning_rate": 0.0007098862558649246, "loss": 1.0271, "step": 21200 }, { "epoch": 0.6763608533435378, "grad_norm": 0.16407832503318787, "learning_rate": 0.000706913950580857, "loss": 1.0121, "step": 21210 }, { "epoch": 0.6766797410631716, "grad_norm": 0.15979787707328796, "learning_rate": 0.000703954090387293, "loss": 1.034, "step": 21220 }, { "epoch": 0.6769986287828056, "grad_norm": 0.1594686061143875, "learning_rate": 0.0007010066231764369, "loss": 1.0199, "step": 21230 }, { "epoch": 0.6773175165024395, "grad_norm": 0.16522930562496185, "learning_rate": 0.0006980714970586688, "loss": 1.0329, "step": 21240 }, { "epoch": 0.6776364042220734, "grad_norm": 0.16075100004673004, "learning_rate": 0.0006951486603616313, "loss": 1.0463, "step": 21250 }, { "epoch": 0.6779552919417073, "grad_norm": 0.1620459407567978, "learning_rate": 0.0006922380616293202, "loss": 1.0279, "step": 21260 }, { "epoch": 0.6782741796613413, "grad_norm": 0.17030924558639526, "learning_rate": 0.0006893396496211784, "loss": 1.0406, "step": 21270 }, { "epoch": 0.6785930673809751, "grad_norm": 0.16657640039920807, "learning_rate": 0.0006864533733111942, "loss": 1.0515, "step": 21280 }, { "epoch": 0.6789119551006091, "grad_norm": 0.16453786194324493, "learning_rate": 0.0006835791818870018, "loss": 1.0203, "step": 21290 }, { "epoch": 0.679230842820243, "grad_norm": 0.1666298508644104, "learning_rate": 0.0006807170247489883, "loss": 1.0369, "step": 21300 }, { "epoch": 0.6795497305398769, "grad_norm": 0.16200481355190277, "learning_rate": 0.0006778668515094021, "loss": 1.0376, "step": 21310 }, { "epoch": 0.6798686182595108, "grad_norm": 0.16168266534805298, "learning_rate": 0.0006750286119914656, "loss": 1.0258, "step": 21320 }, { "epoch": 0.6801875059791448, "grad_norm": 0.16025297343730927, "learning_rate": 0.0006722022562284926, "loss": 1.0484, "step": 21330 }, { "epoch": 0.6805063936987786, "grad_norm": 0.16829195618629456, "learning_rate": 0.000669387734463008, "loss": 1.0187, "step": 21340 }, { "epoch": 0.6808252814184126, "grad_norm": 0.16115964949131012, "learning_rate": 0.0006665849971458721, "loss": 1.046, "step": 21350 }, { "epoch": 0.6811441691380465, "grad_norm": 0.1624351292848587, "learning_rate": 0.0006637939949354081, "loss": 1.0218, "step": 21360 }, { "epoch": 0.6814630568576804, "grad_norm": 0.1657162606716156, "learning_rate": 0.000661014678696534, "loss": 1.0321, "step": 21370 }, { "epoch": 0.6817819445773143, "grad_norm": 0.16228361427783966, "learning_rate": 0.0006582469994998967, "loss": 1.0553, "step": 21380 }, { "epoch": 0.6821008322969483, "grad_norm": 0.16864708065986633, "learning_rate": 0.0006554909086210115, "loss": 1.0299, "step": 21390 }, { "epoch": 0.6824197200165821, "grad_norm": 0.16412338614463806, "learning_rate": 0.0006527463575394037, "loss": 1.0311, "step": 21400 }, { "epoch": 0.6827386077362161, "grad_norm": 0.15431377291679382, "learning_rate": 0.0006500132979377546, "loss": 1.0314, "step": 21410 }, { "epoch": 0.68305749545585, "grad_norm": 0.16200804710388184, "learning_rate": 0.0006472916817010511, "loss": 1.0223, "step": 21420 }, { "epoch": 0.6833763831754839, "grad_norm": 0.16413038969039917, "learning_rate": 0.0006445814609157381, "loss": 1.0238, "step": 21430 }, { "epoch": 0.6836952708951178, "grad_norm": 0.1650371551513672, "learning_rate": 0.0006418825878688756, "loss": 1.0315, "step": 21440 }, { "epoch": 0.6840141586147518, "grad_norm": 0.16407425701618195, "learning_rate": 0.0006391950150472985, "loss": 1.0199, "step": 21450 }, { "epoch": 0.6843330463343856, "grad_norm": 0.1609756350517273, "learning_rate": 0.0006365186951367798, "loss": 1.0235, "step": 21460 }, { "epoch": 0.6846519340540196, "grad_norm": 0.16075292229652405, "learning_rate": 0.0006338535810211983, "loss": 1.0267, "step": 21470 }, { "epoch": 0.6849708217736535, "grad_norm": 0.15738439559936523, "learning_rate": 0.0006311996257817083, "loss": 1.0226, "step": 21480 }, { "epoch": 0.6852897094932874, "grad_norm": 0.16439034044742584, "learning_rate": 0.0006285567826959148, "loss": 1.0295, "step": 21490 }, { "epoch": 0.6856085972129213, "grad_norm": 0.16584716737270355, "learning_rate": 0.0006259250052370493, "loss": 1.0455, "step": 21500 }, { "epoch": 0.6859274849325553, "grad_norm": 0.15936855971813202, "learning_rate": 0.0006233042470731524, "loss": 1.0172, "step": 21510 }, { "epoch": 0.6862463726521891, "grad_norm": 0.15984030067920685, "learning_rate": 0.0006206944620662569, "loss": 1.0297, "step": 21520 }, { "epoch": 0.6865652603718231, "grad_norm": 0.15906420350074768, "learning_rate": 0.0006180956042715764, "loss": 1.016, "step": 21530 }, { "epoch": 0.686884148091457, "grad_norm": 0.16585637629032135, "learning_rate": 0.0006155076279366958, "loss": 1.0237, "step": 21540 }, { "epoch": 0.6872030358110909, "grad_norm": 0.1607387810945511, "learning_rate": 0.0006129304875007661, "loss": 1.0156, "step": 21550 }, { "epoch": 0.6875219235307248, "grad_norm": 0.16103488206863403, "learning_rate": 0.0006103641375937023, "loss": 1.0243, "step": 21560 }, { "epoch": 0.6878408112503588, "grad_norm": 0.16695347428321838, "learning_rate": 0.0006078085330353851, "loss": 1.0086, "step": 21570 }, { "epoch": 0.6881596989699926, "grad_norm": 0.16508355736732483, "learning_rate": 0.0006052636288348644, "loss": 0.9974, "step": 21580 }, { "epoch": 0.6884785866896266, "grad_norm": 0.16593703627586365, "learning_rate": 0.0006027293801895685, "loss": 1.0211, "step": 21590 }, { "epoch": 0.6887974744092605, "grad_norm": 0.16203592717647552, "learning_rate": 0.0006002057424845144, "loss": 1.0126, "step": 21600 }, { "epoch": 0.6891163621288944, "grad_norm": 0.16561388969421387, "learning_rate": 0.0005976926712915233, "loss": 1.0207, "step": 21610 }, { "epoch": 0.6894352498485283, "grad_norm": 0.1665153205394745, "learning_rate": 0.0005951901223684372, "loss": 1.0224, "step": 21620 }, { "epoch": 0.6897541375681623, "grad_norm": 0.1631123572587967, "learning_rate": 0.0005926980516583412, "loss": 1.007, "step": 21630 }, { "epoch": 0.6900730252877961, "grad_norm": 0.16275134682655334, "learning_rate": 0.0005902164152887875, "loss": 1.0144, "step": 21640 }, { "epoch": 0.6903919130074301, "grad_norm": 0.16036659479141235, "learning_rate": 0.0005877451695710226, "loss": 1.0222, "step": 21650 }, { "epoch": 0.690710800727064, "grad_norm": 0.15921048820018768, "learning_rate": 0.0005852842709992187, "loss": 1.0201, "step": 21660 }, { "epoch": 0.6910296884466979, "grad_norm": 0.1643412709236145, "learning_rate": 0.0005828336762497074, "loss": 1.0071, "step": 21670 }, { "epoch": 0.6913485761663318, "grad_norm": 0.16244398057460785, "learning_rate": 0.0005803933421802178, "loss": 1.0255, "step": 21680 }, { "epoch": 0.6916674638859658, "grad_norm": 0.16339954733848572, "learning_rate": 0.0005779632258291156, "loss": 1.0173, "step": 21690 }, { "epoch": 0.6919863516055996, "grad_norm": 0.16126830875873566, "learning_rate": 0.0005755432844146483, "loss": 1.018, "step": 21700 }, { "epoch": 0.6923052393252336, "grad_norm": 0.1622725874185562, "learning_rate": 0.0005731334753341907, "loss": 1.0112, "step": 21710 }, { "epoch": 0.6926241270448675, "grad_norm": 0.1686084270477295, "learning_rate": 0.0005707337561634957, "loss": 1.0075, "step": 21720 }, { "epoch": 0.6929430147645014, "grad_norm": 0.16552889347076416, "learning_rate": 0.0005683440846559473, "loss": 1.0229, "step": 21730 }, { "epoch": 0.6932619024841353, "grad_norm": 0.1642754077911377, "learning_rate": 0.0005659644187418168, "loss": 1.0056, "step": 21740 }, { "epoch": 0.6935807902037693, "grad_norm": 0.1628817319869995, "learning_rate": 0.0005635947165275219, "loss": 1.0098, "step": 21750 }, { "epoch": 0.6938996779234031, "grad_norm": 0.16525153815746307, "learning_rate": 0.0005612349362948896, "loss": 1.0185, "step": 21760 }, { "epoch": 0.6942185656430371, "grad_norm": 0.1632060408592224, "learning_rate": 0.0005588850365004215, "loss": 1.0148, "step": 21770 }, { "epoch": 0.694537453362671, "grad_norm": 0.1638498157262802, "learning_rate": 0.0005565449757745625, "loss": 1.0189, "step": 21780 }, { "epoch": 0.6948563410823049, "grad_norm": 0.15921951830387115, "learning_rate": 0.0005542147129209725, "loss": 1.012, "step": 21790 }, { "epoch": 0.6951752288019388, "grad_norm": 0.1655687838792801, "learning_rate": 0.0005518942069158012, "loss": 1.0176, "step": 21800 }, { "epoch": 0.6954941165215728, "grad_norm": 0.16503483057022095, "learning_rate": 0.0005495834169069658, "loss": 1.0059, "step": 21810 }, { "epoch": 0.6958130042412066, "grad_norm": 0.1610029637813568, "learning_rate": 0.0005472823022134319, "loss": 1.0158, "step": 21820 }, { "epoch": 0.6961318919608406, "grad_norm": 0.1622898131608963, "learning_rate": 0.000544990822324497, "loss": 0.997, "step": 21830 }, { "epoch": 0.6964507796804745, "grad_norm": 0.16136029362678528, "learning_rate": 0.0005427089368990779, "loss": 1.0004, "step": 21840 }, { "epoch": 0.6967696674001084, "grad_norm": 0.16808313131332397, "learning_rate": 0.0005404366057649998, "loss": 1.0151, "step": 21850 }, { "epoch": 0.6970885551197423, "grad_norm": 0.16658589243888855, "learning_rate": 0.00053817378891829, "loss": 1.0045, "step": 21860 }, { "epoch": 0.6974074428393763, "grad_norm": 0.16027787327766418, "learning_rate": 0.0005359204465224725, "loss": 1.0188, "step": 21870 }, { "epoch": 0.6977263305590101, "grad_norm": 0.1679868847131729, "learning_rate": 0.0005336765389078676, "loss": 1.0053, "step": 21880 }, { "epoch": 0.6980452182786441, "grad_norm": 0.16373640298843384, "learning_rate": 0.000531442026570893, "loss": 1.0139, "step": 21890 }, { "epoch": 0.698364105998278, "grad_norm": 0.16460253298282623, "learning_rate": 0.0005292168701733688, "loss": 1.0089, "step": 21900 }, { "epoch": 0.6986829937179119, "grad_norm": 0.16029703617095947, "learning_rate": 0.0005270010305418245, "loss": 0.9898, "step": 21910 }, { "epoch": 0.6990018814375458, "grad_norm": 0.16033555567264557, "learning_rate": 0.0005247944686668097, "loss": 0.9906, "step": 21920 }, { "epoch": 0.6993207691571798, "grad_norm": 0.16450348496437073, "learning_rate": 0.0005225971457022069, "loss": 1.0056, "step": 21930 }, { "epoch": 0.6996396568768136, "grad_norm": 0.16469576954841614, "learning_rate": 0.0005204090229645483, "loss": 0.9883, "step": 21940 }, { "epoch": 0.6999585445964476, "grad_norm": 0.15795905888080597, "learning_rate": 0.0005182300619323341, "loss": 0.9956, "step": 21950 }, { "epoch": 0.7002774323160815, "grad_norm": 0.1648419350385666, "learning_rate": 0.0005160602242453551, "loss": 1.0061, "step": 21960 }, { "epoch": 0.7005963200357154, "grad_norm": 0.16443631052970886, "learning_rate": 0.0005138994717040161, "loss": 0.9819, "step": 21970 }, { "epoch": 0.7009152077553493, "grad_norm": 0.16426272690296173, "learning_rate": 0.0005117477662686652, "loss": 1.0071, "step": 21980 }, { "epoch": 0.7012340954749833, "grad_norm": 0.1597410887479782, "learning_rate": 0.0005096050700589225, "loss": 1.0085, "step": 21990 }, { "epoch": 0.7015529831946172, "grad_norm": 0.16440628468990326, "learning_rate": 0.0005074713453530141, "loss": 0.9989, "step": 22000 }, { "epoch": 0.7018718709142511, "grad_norm": 0.15793122351169586, "learning_rate": 0.0005053465545871075, "loss": 1.0088, "step": 22010 }, { "epoch": 0.702190758633885, "grad_norm": 0.17422626912593842, "learning_rate": 0.0005032306603546511, "loss": 0.9918, "step": 22020 }, { "epoch": 0.702509646353519, "grad_norm": 0.1663317233324051, "learning_rate": 0.0005011236254057146, "loss": 1.0069, "step": 22030 }, { "epoch": 0.7028285340731528, "grad_norm": 0.1605159342288971, "learning_rate": 0.0004990254126463343, "loss": 0.9984, "step": 22040 }, { "epoch": 0.7031474217927868, "grad_norm": 0.166510671377182, "learning_rate": 0.0004969359851378588, "loss": 1.0014, "step": 22050 }, { "epoch": 0.7034663095124207, "grad_norm": 0.16222979128360748, "learning_rate": 0.0004948553060963001, "loss": 0.9773, "step": 22060 }, { "epoch": 0.7037851972320546, "grad_norm": 0.17086096107959747, "learning_rate": 0.0004927833388916852, "loss": 1.0113, "step": 22070 }, { "epoch": 0.7041040849516885, "grad_norm": 0.16001038253307343, "learning_rate": 0.0004907200470474113, "loss": 0.9999, "step": 22080 }, { "epoch": 0.7044229726713225, "grad_norm": 0.1599177122116089, "learning_rate": 0.0004886653942396035, "loss": 0.9879, "step": 22090 }, { "epoch": 0.7047418603909563, "grad_norm": 0.15955601632595062, "learning_rate": 0.00048661934429647597, "loss": 1.0056, "step": 22100 }, { "epoch": 0.7050607481105903, "grad_norm": 0.168484166264534, "learning_rate": 0.0004845818611976946, "loss": 0.9948, "step": 22110 }, { "epoch": 0.7053796358302242, "grad_norm": 0.1610783189535141, "learning_rate": 0.0004825529090737429, "loss": 0.9842, "step": 22120 }, { "epoch": 0.7056985235498581, "grad_norm": 0.1566549837589264, "learning_rate": 0.0004805324522052906, "loss": 1.0026, "step": 22130 }, { "epoch": 0.706017411269492, "grad_norm": 0.15728874504566193, "learning_rate": 0.000478520455022565, "loss": 0.9897, "step": 22140 }, { "epoch": 0.706336298989126, "grad_norm": 0.1586431860923767, "learning_rate": 0.0004765168821047247, "loss": 0.9911, "step": 22150 }, { "epoch": 0.7066551867087598, "grad_norm": 0.1617947518825531, "learning_rate": 0.0004745216981792355, "loss": 1.0057, "step": 22160 }, { "epoch": 0.7069740744283938, "grad_norm": 0.15989385545253754, "learning_rate": 0.00047253486812125044, "loss": 0.9716, "step": 22170 }, { "epoch": 0.7072929621480277, "grad_norm": 0.16351810097694397, "learning_rate": 0.0004705563569529904, "loss": 0.9917, "step": 22180 }, { "epoch": 0.7076118498676616, "grad_norm": 0.1647985577583313, "learning_rate": 0.00046858612984312904, "loss": 1.0022, "step": 22190 }, { "epoch": 0.7079307375872955, "grad_norm": 0.16196519136428833, "learning_rate": 0.00046662415210617933, "loss": 0.9898, "step": 22200 }, { "epoch": 0.7082496253069295, "grad_norm": 0.15809550881385803, "learning_rate": 0.00046467038920188283, "loss": 0.991, "step": 22210 }, { "epoch": 0.7085685130265633, "grad_norm": 0.1606694906949997, "learning_rate": 0.0004627248067346017, "loss": 0.9884, "step": 22220 }, { "epoch": 0.7088874007461973, "grad_norm": 0.15880940854549408, "learning_rate": 0.0004607873704527135, "loss": 1.0084, "step": 22230 }, { "epoch": 0.7092062884658312, "grad_norm": 0.1667298674583435, "learning_rate": 0.00045885804624800757, "loss": 1.0052, "step": 22240 }, { "epoch": 0.7095251761854651, "grad_norm": 0.15849944949150085, "learning_rate": 0.000456936800155085, "loss": 0.9801, "step": 22250 }, { "epoch": 0.709844063905099, "grad_norm": 0.16091540455818176, "learning_rate": 0.00045502359835076047, "loss": 0.9847, "step": 22260 }, { "epoch": 0.710162951624733, "grad_norm": 0.165780708193779, "learning_rate": 0.00045311840715346694, "loss": 0.9868, "step": 22270 }, { "epoch": 0.7104818393443668, "grad_norm": 0.16307060420513153, "learning_rate": 0.0004512211930226627, "loss": 0.9873, "step": 22280 }, { "epoch": 0.7108007270640008, "grad_norm": 0.16260948777198792, "learning_rate": 0.0004493319225582409, "loss": 0.9976, "step": 22290 }, { "epoch": 0.7111196147836347, "grad_norm": 0.16536939144134521, "learning_rate": 0.00044745056249994127, "loss": 0.9958, "step": 22300 }, { "epoch": 0.7114385025032686, "grad_norm": 0.16163595020771027, "learning_rate": 0.00044557707972676475, "loss": 0.9889, "step": 22310 }, { "epoch": 0.7117573902229025, "grad_norm": 0.1603289395570755, "learning_rate": 0.0004437114412563908, "loss": 0.9947, "step": 22320 }, { "epoch": 0.7120762779425365, "grad_norm": 0.16332769393920898, "learning_rate": 0.0004418536142445961, "loss": 0.9886, "step": 22330 }, { "epoch": 0.7123951656621703, "grad_norm": 0.16538774967193604, "learning_rate": 0.0004400035659846766, "loss": 0.9816, "step": 22340 }, { "epoch": 0.7127140533818043, "grad_norm": 0.16004317998886108, "learning_rate": 0.00043816126390687195, "loss": 0.9704, "step": 22350 }, { "epoch": 0.7130329411014382, "grad_norm": 0.1620021015405655, "learning_rate": 0.0004363266755777918, "loss": 0.9873, "step": 22360 }, { "epoch": 0.7133518288210721, "grad_norm": 0.1575515866279602, "learning_rate": 0.00043449976869984496, "loss": 0.9837, "step": 22370 }, { "epoch": 0.713670716540706, "grad_norm": 0.15856903791427612, "learning_rate": 0.00043268051111067067, "loss": 0.9711, "step": 22380 }, { "epoch": 0.71398960426034, "grad_norm": 0.161849707365036, "learning_rate": 0.00043086887078257267, "loss": 0.9754, "step": 22390 }, { "epoch": 0.7143084919799738, "grad_norm": 0.16092804074287415, "learning_rate": 0.00042906481582195513, "loss": 0.9665, "step": 22400 }, { "epoch": 0.7146273796996078, "grad_norm": 0.16372103989124298, "learning_rate": 0.0004272683144687611, "loss": 0.997, "step": 22410 }, { "epoch": 0.7149462674192417, "grad_norm": 0.1614946871995926, "learning_rate": 0.0004254793350959138, "loss": 0.9884, "step": 22420 }, { "epoch": 0.7152651551388756, "grad_norm": 0.15983077883720398, "learning_rate": 0.00042369784620875905, "loss": 0.9708, "step": 22430 }, { "epoch": 0.7155840428585095, "grad_norm": 0.16505417227745056, "learning_rate": 0.00042192381644451176, "loss": 0.9783, "step": 22440 }, { "epoch": 0.7159029305781435, "grad_norm": 0.1651798039674759, "learning_rate": 0.0004201572145717032, "loss": 0.9792, "step": 22450 }, { "epoch": 0.7162218182977773, "grad_norm": 0.16053563356399536, "learning_rate": 0.0004183980094896312, "loss": 0.9881, "step": 22460 }, { "epoch": 0.7165407060174113, "grad_norm": 0.1612672060728073, "learning_rate": 0.0004166461702278128, "loss": 0.9749, "step": 22470 }, { "epoch": 0.7168595937370452, "grad_norm": 0.1621503382921219, "learning_rate": 0.0004149016659454389, "loss": 0.9765, "step": 22480 }, { "epoch": 0.7171784814566791, "grad_norm": 0.16153399646282196, "learning_rate": 0.00041316446593083145, "loss": 0.9799, "step": 22490 }, { "epoch": 0.717497369176313, "grad_norm": 0.16426432132720947, "learning_rate": 0.00041143453960090277, "loss": 0.9826, "step": 22500 }, { "epoch": 0.717816256895947, "grad_norm": 0.16212931275367737, "learning_rate": 0.0004097118565006169, "loss": 0.9799, "step": 22510 }, { "epoch": 0.7181351446155808, "grad_norm": 0.16201704740524292, "learning_rate": 0.00040799638630245356, "loss": 0.9711, "step": 22520 }, { "epoch": 0.7184540323352148, "grad_norm": 0.1674143671989441, "learning_rate": 0.0004062880988058746, "loss": 0.9731, "step": 22530 }, { "epoch": 0.7187729200548487, "grad_norm": 0.1614939272403717, "learning_rate": 0.0004045869639367917, "loss": 0.9775, "step": 22540 }, { "epoch": 0.7190918077744826, "grad_norm": 0.1606423556804657, "learning_rate": 0.0004028929517470373, "loss": 0.9737, "step": 22550 }, { "epoch": 0.7194106954941165, "grad_norm": 0.15913470089435577, "learning_rate": 0.00040120603241383754, "loss": 0.9726, "step": 22560 }, { "epoch": 0.7197295832137505, "grad_norm": 0.16026009619235992, "learning_rate": 0.0003995261762392866, "loss": 0.9765, "step": 22570 }, { "epoch": 0.7200484709333843, "grad_norm": 0.1610075980424881, "learning_rate": 0.0003978533536498247, "loss": 0.9736, "step": 22580 }, { "epoch": 0.7203673586530183, "grad_norm": 0.1646539866924286, "learning_rate": 0.0003961875351957167, "loss": 0.97, "step": 22590 }, { "epoch": 0.7206862463726522, "grad_norm": 0.16013289988040924, "learning_rate": 0.000394528691550534, "loss": 0.9765, "step": 22600 }, { "epoch": 0.7210051340922861, "grad_norm": 0.15997812151908875, "learning_rate": 0.0003928767935106386, "loss": 0.9622, "step": 22610 }, { "epoch": 0.72132402181192, "grad_norm": 0.15702947974205017, "learning_rate": 0.0003912318119946682, "loss": 0.9789, "step": 22620 }, { "epoch": 0.721642909531554, "grad_norm": 0.16323722898960114, "learning_rate": 0.0003895937180430247, "loss": 0.9719, "step": 22630 }, { "epoch": 0.7219617972511878, "grad_norm": 0.15763328969478607, "learning_rate": 0.0003879624828173645, "loss": 0.9613, "step": 22640 }, { "epoch": 0.7222806849708218, "grad_norm": 0.16371284425258636, "learning_rate": 0.0003863380776000905, "loss": 0.974, "step": 22650 }, { "epoch": 0.7225995726904557, "grad_norm": 0.168775275349617, "learning_rate": 0.0003847204737938466, "loss": 0.9674, "step": 22660 }, { "epoch": 0.7229184604100896, "grad_norm": 0.16056294739246368, "learning_rate": 0.0003831096429210144, "loss": 0.9665, "step": 22670 }, { "epoch": 0.7232373481297235, "grad_norm": 0.16246573626995087, "learning_rate": 0.00038150555662321163, "loss": 0.9709, "step": 22680 }, { "epoch": 0.7235562358493575, "grad_norm": 0.16221703588962555, "learning_rate": 0.0003799081866607931, "loss": 0.9827, "step": 22690 }, { "epoch": 0.7238751235689913, "grad_norm": 0.15954597294330597, "learning_rate": 0.00037831750491235344, "loss": 0.9786, "step": 22700 }, { "epoch": 0.7241940112886253, "grad_norm": 0.16421189904212952, "learning_rate": 0.0003767334833742322, "loss": 0.9726, "step": 22710 }, { "epoch": 0.7245128990082592, "grad_norm": 0.1675567775964737, "learning_rate": 0.0003751560941600205, "loss": 0.9591, "step": 22720 }, { "epoch": 0.7248317867278931, "grad_norm": 0.16072584688663483, "learning_rate": 0.0003735853095000704, "loss": 0.9649, "step": 22730 }, { "epoch": 0.725150674447527, "grad_norm": 0.16682232916355133, "learning_rate": 0.0003720211017410059, "loss": 0.9567, "step": 22740 }, { "epoch": 0.725469562167161, "grad_norm": 0.16807231307029724, "learning_rate": 0.0003704634433452362, "loss": 0.9624, "step": 22750 }, { "epoch": 0.7257884498867948, "grad_norm": 0.1716778427362442, "learning_rate": 0.0003689123068904708, "loss": 0.969, "step": 22760 }, { "epoch": 0.7261073376064288, "grad_norm": 0.17154444754123688, "learning_rate": 0.00036736766506923683, "loss": 0.9727, "step": 22770 }, { "epoch": 0.7264262253260627, "grad_norm": 0.16088175773620605, "learning_rate": 0.00036582949068839814, "loss": 0.9604, "step": 22780 }, { "epoch": 0.7267451130456966, "grad_norm": 0.16620905697345734, "learning_rate": 0.0003642977566686768, "loss": 0.9853, "step": 22790 }, { "epoch": 0.7270640007653305, "grad_norm": 0.15978989005088806, "learning_rate": 0.00036277243604417616, "loss": 0.9629, "step": 22800 }, { "epoch": 0.7273828884849645, "grad_norm": 0.16269829869270325, "learning_rate": 0.00036125350196190614, "loss": 0.975, "step": 22810 }, { "epoch": 0.7277017762045984, "grad_norm": 0.16348029673099518, "learning_rate": 0.0003597409276813109, "loss": 0.9833, "step": 22820 }, { "epoch": 0.7280206639242323, "grad_norm": 0.16305918991565704, "learning_rate": 0.0003582346865737974, "loss": 0.9713, "step": 22830 }, { "epoch": 0.7283395516438662, "grad_norm": 0.16581085324287415, "learning_rate": 0.0003567347521222671, "loss": 0.983, "step": 22840 }, { "epoch": 0.7286584393635002, "grad_norm": 0.1627841591835022, "learning_rate": 0.00035524109792064903, "loss": 0.9846, "step": 22850 }, { "epoch": 0.728977327083134, "grad_norm": 0.16312730312347412, "learning_rate": 0.00035375369767343465, "loss": 0.9612, "step": 22860 }, { "epoch": 0.729296214802768, "grad_norm": 0.16436518728733063, "learning_rate": 0.0003522725251952154, "loss": 0.9677, "step": 22870 }, { "epoch": 0.7296151025224019, "grad_norm": 0.1663922518491745, "learning_rate": 0.0003507975544102212, "loss": 0.9509, "step": 22880 }, { "epoch": 0.7299339902420358, "grad_norm": 0.16606448590755463, "learning_rate": 0.000349328759351862, "loss": 0.9618, "step": 22890 }, { "epoch": 0.7302528779616697, "grad_norm": 0.1651606410741806, "learning_rate": 0.00034786611416226987, "loss": 0.9774, "step": 22900 }, { "epoch": 0.7305717656813037, "grad_norm": 0.15751025080680847, "learning_rate": 0.0003464095930918445, "loss": 0.9614, "step": 22910 }, { "epoch": 0.7308906534009375, "grad_norm": 0.16441288590431213, "learning_rate": 0.0003449591704987995, "loss": 0.9568, "step": 22920 }, { "epoch": 0.7312095411205715, "grad_norm": 0.16570603847503662, "learning_rate": 0.0003435148208487109, "loss": 0.9729, "step": 22930 }, { "epoch": 0.7315284288402054, "grad_norm": 0.16226355731487274, "learning_rate": 0.0003420765187140679, "loss": 0.9594, "step": 22940 }, { "epoch": 0.7318473165598393, "grad_norm": 0.16443803906440735, "learning_rate": 0.00034064423877382523, "loss": 0.9567, "step": 22950 }, { "epoch": 0.7321662042794732, "grad_norm": 0.16529656946659088, "learning_rate": 0.000339217955812957, "loss": 0.9604, "step": 22960 }, { "epoch": 0.7324850919991072, "grad_norm": 0.160089910030365, "learning_rate": 0.0003377976447220132, "loss": 0.949, "step": 22970 }, { "epoch": 0.732803979718741, "grad_norm": 0.1677403301000595, "learning_rate": 0.0003363832804966775, "loss": 0.958, "step": 22980 }, { "epoch": 0.733122867438375, "grad_norm": 0.16066356003284454, "learning_rate": 0.00033497483823732686, "loss": 0.948, "step": 22990 }, { "epoch": 0.7334417551580089, "grad_norm": 0.1701483428478241, "learning_rate": 0.0003335722931485937, "loss": 0.97, "step": 23000 }, { "epoch": 0.7337606428776428, "grad_norm": 0.16166579723358154, "learning_rate": 0.00033217562053892876, "loss": 0.9516, "step": 23010 }, { "epoch": 0.7340795305972767, "grad_norm": 0.16624568402767181, "learning_rate": 0.00033078479582016675, "loss": 0.9675, "step": 23020 }, { "epoch": 0.7343984183169107, "grad_norm": 0.1629163771867752, "learning_rate": 0.0003293997945070935, "loss": 0.9766, "step": 23030 }, { "epoch": 0.7347173060365445, "grad_norm": 0.16117964684963226, "learning_rate": 0.00032802059221701484, "loss": 0.949, "step": 23040 }, { "epoch": 0.7350361937561785, "grad_norm": 0.1631103754043579, "learning_rate": 0.00032664716466932733, "loss": 0.9792, "step": 23050 }, { "epoch": 0.7353550814758124, "grad_norm": 0.1656711995601654, "learning_rate": 0.0003252794876850907, "loss": 0.9605, "step": 23060 }, { "epoch": 0.7356739691954463, "grad_norm": 0.16462354362010956, "learning_rate": 0.00032391753718660234, "loss": 0.955, "step": 23070 }, { "epoch": 0.7359928569150802, "grad_norm": 0.15977884829044342, "learning_rate": 0.00032256128919697346, "loss": 0.9651, "step": 23080 }, { "epoch": 0.7363117446347142, "grad_norm": 0.16425421833992004, "learning_rate": 0.00032121071983970694, "loss": 0.9403, "step": 23090 }, { "epoch": 0.736630632354348, "grad_norm": 0.16412289440631866, "learning_rate": 0.0003198658053382767, "loss": 0.9581, "step": 23100 }, { "epoch": 0.736949520073982, "grad_norm": 0.16336853802204132, "learning_rate": 0.0003185265220157095, "loss": 0.9439, "step": 23110 }, { "epoch": 0.7372684077936159, "grad_norm": 0.1620175540447235, "learning_rate": 0.00031719284629416804, "loss": 0.9593, "step": 23120 }, { "epoch": 0.7375872955132498, "grad_norm": 0.16276094317436218, "learning_rate": 0.0003158647546945357, "loss": 0.9502, "step": 23130 }, { "epoch": 0.7379061832328837, "grad_norm": 0.17003417015075684, "learning_rate": 0.0003145422238360034, "loss": 0.9791, "step": 23140 }, { "epoch": 0.7382250709525177, "grad_norm": 0.16041214764118195, "learning_rate": 0.0003132252304356578, "loss": 0.9757, "step": 23150 }, { "epoch": 0.7385439586721515, "grad_norm": 0.16528736054897308, "learning_rate": 0.00031191375130807145, "loss": 0.9597, "step": 23160 }, { "epoch": 0.7388628463917855, "grad_norm": 0.16135884821414948, "learning_rate": 0.0003106077633648948, "loss": 0.9646, "step": 23170 }, { "epoch": 0.7391817341114194, "grad_norm": 0.1672159880399704, "learning_rate": 0.0003093072436144496, "loss": 0.9544, "step": 23180 }, { "epoch": 0.7395006218310533, "grad_norm": 0.1631564199924469, "learning_rate": 0.00030801216916132403, "loss": 0.9518, "step": 23190 }, { "epoch": 0.7398195095506872, "grad_norm": 0.15956902503967285, "learning_rate": 0.00030672251720596967, "loss": 0.9487, "step": 23200 }, { "epoch": 0.7401383972703212, "grad_norm": 0.16405710577964783, "learning_rate": 0.0003054382650443004, "loss": 0.962, "step": 23210 }, { "epoch": 0.740457284989955, "grad_norm": 0.16590505838394165, "learning_rate": 0.0003041593900672922, "loss": 0.9454, "step": 23220 }, { "epoch": 0.740776172709589, "grad_norm": 0.16362358629703522, "learning_rate": 0.00030288586976058574, "loss": 0.9398, "step": 23230 }, { "epoch": 0.7410950604292229, "grad_norm": 0.17358466982841492, "learning_rate": 0.00030161768170408935, "loss": 0.9452, "step": 23240 }, { "epoch": 0.7414139481488567, "grad_norm": 0.1667748987674713, "learning_rate": 0.0003003548035715848, "loss": 0.9467, "step": 23250 }, { "epoch": 0.7417328358684907, "grad_norm": 0.16409194469451904, "learning_rate": 0.0002990972131303341, "loss": 0.9482, "step": 23260 }, { "epoch": 0.7420517235881247, "grad_norm": 0.16616195440292358, "learning_rate": 0.0002978448882406881, "loss": 0.9478, "step": 23270 }, { "epoch": 0.7423706113077585, "grad_norm": 0.16837824881076813, "learning_rate": 0.00029659780685569674, "loss": 0.9413, "step": 23280 }, { "epoch": 0.7426894990273925, "grad_norm": 0.16764934360980988, "learning_rate": 0.00029535594702072087, "loss": 0.942, "step": 23290 }, { "epoch": 0.7430083867470264, "grad_norm": 0.1687283217906952, "learning_rate": 0.0002941192868730457, "loss": 0.9677, "step": 23300 }, { "epoch": 0.7433272744666602, "grad_norm": 0.16668742895126343, "learning_rate": 0.00029288780464149593, "loss": 0.9544, "step": 23310 }, { "epoch": 0.7436461621862942, "grad_norm": 0.16436542570590973, "learning_rate": 0.00029166147864605294, "loss": 0.9604, "step": 23320 }, { "epoch": 0.7439650499059282, "grad_norm": 0.16353577375411987, "learning_rate": 0.0002904402872974721, "loss": 0.9582, "step": 23330 }, { "epoch": 0.744283937625562, "grad_norm": 0.16399306058883667, "learning_rate": 0.00028922420909690367, "loss": 0.9537, "step": 23340 }, { "epoch": 0.744602825345196, "grad_norm": 0.16380132734775543, "learning_rate": 0.00028801322263551397, "loss": 0.9527, "step": 23350 }, { "epoch": 0.7449217130648299, "grad_norm": 0.16486787796020508, "learning_rate": 0.0002868073065941083, "loss": 0.9344, "step": 23360 }, { "epoch": 0.7452406007844637, "grad_norm": 0.1594451665878296, "learning_rate": 0.00028560643974275587, "loss": 0.9459, "step": 23370 }, { "epoch": 0.7455594885040977, "grad_norm": 0.16545240581035614, "learning_rate": 0.00028441060094041583, "loss": 0.9464, "step": 23380 }, { "epoch": 0.7458783762237317, "grad_norm": 0.170954629778862, "learning_rate": 0.0002832197691345653, "loss": 0.9508, "step": 23390 }, { "epoch": 0.7461972639433655, "grad_norm": 0.1691250205039978, "learning_rate": 0.0002820339233608287, "loss": 0.9528, "step": 23400 }, { "epoch": 0.7465161516629994, "grad_norm": 0.1623115986585617, "learning_rate": 0.00028085304274260857, "loss": 0.9598, "step": 23410 }, { "epoch": 0.7468350393826334, "grad_norm": 0.1692652702331543, "learning_rate": 0.0002796771064907181, "loss": 0.952, "step": 23420 }, { "epoch": 0.7471539271022672, "grad_norm": 0.16102375090122223, "learning_rate": 0.0002785060939030151, "loss": 0.9432, "step": 23430 }, { "epoch": 0.7474728148219012, "grad_norm": 0.1656680405139923, "learning_rate": 0.0002773399843640378, "loss": 0.9331, "step": 23440 }, { "epoch": 0.7477917025415352, "grad_norm": 0.16000087559223175, "learning_rate": 0.00027617875734464145, "loss": 0.9358, "step": 23450 }, { "epoch": 0.748110590261169, "grad_norm": 0.17037495970726013, "learning_rate": 0.00027502239240163715, "loss": 0.9616, "step": 23460 }, { "epoch": 0.748429477980803, "grad_norm": 0.16514143347740173, "learning_rate": 0.00027387086917743224, "loss": 0.9491, "step": 23470 }, { "epoch": 0.7487483657004369, "grad_norm": 0.1671529859304428, "learning_rate": 0.0002727241673996714, "loss": 0.9391, "step": 23480 }, { "epoch": 0.7490672534200707, "grad_norm": 0.16395649313926697, "learning_rate": 0.00027158226688088006, "loss": 0.9431, "step": 23490 }, { "epoch": 0.7493861411397047, "grad_norm": 0.17046767473220825, "learning_rate": 0.0002704451475181089, "loss": 0.9499, "step": 23500 }, { "epoch": 0.7497050288593387, "grad_norm": 0.15811336040496826, "learning_rate": 0.00026931278929257993, "loss": 0.9359, "step": 23510 }, { "epoch": 0.7500239165789725, "grad_norm": 0.16410349309444427, "learning_rate": 0.00026818517226933437, "loss": 0.9351, "step": 23520 }, { "epoch": 0.7503428042986064, "grad_norm": 0.16768516600131989, "learning_rate": 0.00026706227659688107, "loss": 0.9462, "step": 23530 }, { "epoch": 0.7506616920182404, "grad_norm": 0.16026252508163452, "learning_rate": 0.00026594408250684776, "loss": 0.9439, "step": 23540 }, { "epoch": 0.7509805797378742, "grad_norm": 0.16729193925857544, "learning_rate": 0.00026483057031363234, "loss": 0.9493, "step": 23550 }, { "epoch": 0.7512994674575082, "grad_norm": 0.16243338584899902, "learning_rate": 0.00026372172041405677, "loss": 0.9404, "step": 23560 }, { "epoch": 0.7516183551771422, "grad_norm": 0.169617161154747, "learning_rate": 0.0002626175132870219, "loss": 0.9504, "step": 23570 }, { "epoch": 0.751937242896776, "grad_norm": 0.1605086624622345, "learning_rate": 0.0002615179294931637, "loss": 0.9399, "step": 23580 }, { "epoch": 0.75225613061641, "grad_norm": 0.16321012377738953, "learning_rate": 0.000260422949674511, "loss": 0.9362, "step": 23590 }, { "epoch": 0.7525750183360439, "grad_norm": 0.16131918132305145, "learning_rate": 0.00025933255455414493, "loss": 0.9342, "step": 23600 }, { "epoch": 0.7528939060556777, "grad_norm": 0.16513609886169434, "learning_rate": 0.0002582467249358593, "loss": 0.95, "step": 23610 }, { "epoch": 0.7532127937753117, "grad_norm": 0.1651507318019867, "learning_rate": 0.0002571654417038226, "loss": 0.9507, "step": 23620 }, { "epoch": 0.7535316814949456, "grad_norm": 0.1660146713256836, "learning_rate": 0.0002560886858222419, "loss": 0.9526, "step": 23630 }, { "epoch": 0.7538505692145795, "grad_norm": 0.15882445871829987, "learning_rate": 0.0002550164383350272, "loss": 0.9243, "step": 23640 }, { "epoch": 0.7541694569342134, "grad_norm": 0.17136244475841522, "learning_rate": 0.0002539486803654581, "loss": 0.9448, "step": 23650 }, { "epoch": 0.7544883446538474, "grad_norm": 0.1678006798028946, "learning_rate": 0.0002528853931158513, "loss": 0.9429, "step": 23660 }, { "epoch": 0.7548072323734814, "grad_norm": 0.16127173602581024, "learning_rate": 0.0002518265578672297, "loss": 0.9366, "step": 23670 }, { "epoch": 0.7551261200931152, "grad_norm": 0.16299548745155334, "learning_rate": 0.00025077215597899276, "loss": 0.9378, "step": 23680 }, { "epoch": 0.7554450078127491, "grad_norm": 0.1708274483680725, "learning_rate": 0.0002497221688885888, "loss": 0.9601, "step": 23690 }, { "epoch": 0.7557638955323831, "grad_norm": 0.16434860229492188, "learning_rate": 0.0002486765781111873, "loss": 0.9401, "step": 23700 }, { "epoch": 0.7560827832520169, "grad_norm": 0.1635703295469284, "learning_rate": 0.0002476353652393545, "loss": 0.9476, "step": 23710 }, { "epoch": 0.7564016709716509, "grad_norm": 0.17095215618610382, "learning_rate": 0.0002465985119427286, "loss": 0.9291, "step": 23720 }, { "epoch": 0.7567205586912849, "grad_norm": 0.1732560396194458, "learning_rate": 0.0002455659999676974, "loss": 0.9374, "step": 23730 }, { "epoch": 0.7570394464109187, "grad_norm": 0.16333815455436707, "learning_rate": 0.0002445378111370768, "loss": 0.9406, "step": 23740 }, { "epoch": 0.7573583341305526, "grad_norm": 0.16199623048305511, "learning_rate": 0.00024351392734979106, "loss": 0.934, "step": 23750 }, { "epoch": 0.7576772218501866, "grad_norm": 0.16906283795833588, "learning_rate": 0.00024249433058055368, "loss": 0.9252, "step": 23760 }, { "epoch": 0.7579961095698204, "grad_norm": 0.17197982966899872, "learning_rate": 0.00024147900287955056, "loss": 0.9489, "step": 23770 }, { "epoch": 0.7583149972894544, "grad_norm": 0.16192105412483215, "learning_rate": 0.0002404679263721236, "loss": 0.9261, "step": 23780 }, { "epoch": 0.7586338850090883, "grad_norm": 0.16491258144378662, "learning_rate": 0.00023946108325845628, "loss": 0.9431, "step": 23790 }, { "epoch": 0.7589527727287222, "grad_norm": 0.16750048100948334, "learning_rate": 0.00023845845581326017, "loss": 0.9591, "step": 23800 }, { "epoch": 0.7592716604483561, "grad_norm": 0.17201946675777435, "learning_rate": 0.00023746002638546287, "loss": 0.926, "step": 23810 }, { "epoch": 0.7595905481679901, "grad_norm": 0.16682586073875427, "learning_rate": 0.00023646577739789735, "loss": 0.9395, "step": 23820 }, { "epoch": 0.7599094358876239, "grad_norm": 0.16354051232337952, "learning_rate": 0.00023547569134699248, "loss": 0.9419, "step": 23830 }, { "epoch": 0.7602283236072579, "grad_norm": 0.16591399908065796, "learning_rate": 0.00023448975080246477, "loss": 0.938, "step": 23840 }, { "epoch": 0.7605472113268918, "grad_norm": 0.16553214192390442, "learning_rate": 0.00023350793840701175, "loss": 0.9359, "step": 23850 }, { "epoch": 0.7608660990465257, "grad_norm": 0.16791701316833496, "learning_rate": 0.00023253023687600625, "loss": 0.928, "step": 23860 }, { "epoch": 0.7611849867661596, "grad_norm": 0.16159778833389282, "learning_rate": 0.00023155662899719193, "loss": 0.9554, "step": 23870 }, { "epoch": 0.7615038744857936, "grad_norm": 0.1624593585729599, "learning_rate": 0.00023058709763038065, "loss": 0.9385, "step": 23880 }, { "epoch": 0.7618227622054274, "grad_norm": 0.16371050477027893, "learning_rate": 0.00022962162570715043, "loss": 0.943, "step": 23890 }, { "epoch": 0.7621416499250614, "grad_norm": 0.17248646914958954, "learning_rate": 0.00022866019623054502, "loss": 0.9253, "step": 23900 }, { "epoch": 0.7624605376446953, "grad_norm": 0.16547873616218567, "learning_rate": 0.00022770279227477477, "loss": 0.9403, "step": 23910 }, { "epoch": 0.7627794253643292, "grad_norm": 0.16172504425048828, "learning_rate": 0.00022674939698491854, "loss": 0.9285, "step": 23920 }, { "epoch": 0.7630983130839631, "grad_norm": 0.16042064130306244, "learning_rate": 0.000225799993576627, "loss": 0.9295, "step": 23930 }, { "epoch": 0.7634172008035971, "grad_norm": 0.17115207016468048, "learning_rate": 0.0002248545653358273, "loss": 0.943, "step": 23940 }, { "epoch": 0.7637360885232309, "grad_norm": 0.1645117849111557, "learning_rate": 0.00022391309561842852, "loss": 0.9374, "step": 23950 }, { "epoch": 0.7640549762428649, "grad_norm": 0.16595900058746338, "learning_rate": 0.00022297556785002887, "loss": 0.936, "step": 23960 }, { "epoch": 0.7643738639624988, "grad_norm": 0.16360650956630707, "learning_rate": 0.0002220419655256239, "loss": 0.9239, "step": 23970 }, { "epoch": 0.7646927516821327, "grad_norm": 0.1633913815021515, "learning_rate": 0.00022111227220931578, "loss": 0.9372, "step": 23980 }, { "epoch": 0.7650116394017666, "grad_norm": 0.16824324429035187, "learning_rate": 0.00022018647153402418, "loss": 0.9355, "step": 23990 }, { "epoch": 0.7653305271214006, "grad_norm": 0.16444505751132965, "learning_rate": 0.00021926454720119792, "loss": 0.9482, "step": 24000 }, { "epoch": 0.7656494148410344, "grad_norm": 0.16452354192733765, "learning_rate": 0.00021834648298052806, "loss": 0.9165, "step": 24010 }, { "epoch": 0.7659683025606684, "grad_norm": 0.16021281480789185, "learning_rate": 0.00021743226270966244, "loss": 0.9326, "step": 24020 }, { "epoch": 0.7662871902803023, "grad_norm": 0.17205902934074402, "learning_rate": 0.00021652187029392068, "loss": 0.9492, "step": 24030 }, { "epoch": 0.7666060779999362, "grad_norm": 0.16032502055168152, "learning_rate": 0.00021561528970601124, "loss": 0.939, "step": 24040 }, { "epoch": 0.7669249657195701, "grad_norm": 0.1687769591808319, "learning_rate": 0.00021471250498574909, "loss": 0.936, "step": 24050 }, { "epoch": 0.7672438534392041, "grad_norm": 0.163095161318779, "learning_rate": 0.0002138135002397747, "loss": 0.9395, "step": 24060 }, { "epoch": 0.7675627411588379, "grad_norm": 0.16366122663021088, "learning_rate": 0.0002129182596412743, "loss": 0.9528, "step": 24070 }, { "epoch": 0.7678816288784719, "grad_norm": 0.16610878705978394, "learning_rate": 0.00021202676742970136, "loss": 0.9416, "step": 24080 }, { "epoch": 0.7682005165981058, "grad_norm": 0.16417580842971802, "learning_rate": 0.0002111390079104989, "loss": 0.9317, "step": 24090 }, { "epoch": 0.7685194043177397, "grad_norm": 0.16696830093860626, "learning_rate": 0.00021025496545482323, "loss": 0.9359, "step": 24100 }, { "epoch": 0.7688382920373736, "grad_norm": 0.1716535985469818, "learning_rate": 0.0002093746244992691, "loss": 0.9396, "step": 24110 }, { "epoch": 0.7691571797570076, "grad_norm": 0.1632964015007019, "learning_rate": 0.0002084979695455954, "loss": 0.9292, "step": 24120 }, { "epoch": 0.7694760674766414, "grad_norm": 0.16584451496601105, "learning_rate": 0.00020762498516045232, "loss": 0.9326, "step": 24130 }, { "epoch": 0.7697949551962754, "grad_norm": 0.16385923326015472, "learning_rate": 0.00020675565597510982, "loss": 0.9329, "step": 24140 }, { "epoch": 0.7701138429159093, "grad_norm": 0.1649266630411148, "learning_rate": 0.00020588996668518696, "loss": 0.9371, "step": 24150 }, { "epoch": 0.7704327306355432, "grad_norm": 0.16053041815757751, "learning_rate": 0.00020502790205038248, "loss": 0.92, "step": 24160 }, { "epoch": 0.7707516183551771, "grad_norm": 0.16492949426174164, "learning_rate": 0.00020416944689420654, "loss": 0.9235, "step": 24170 }, { "epoch": 0.7710705060748111, "grad_norm": 0.16709771752357483, "learning_rate": 0.00020331458610371345, "loss": 0.9385, "step": 24180 }, { "epoch": 0.7713893937944449, "grad_norm": 0.16922813653945923, "learning_rate": 0.00020246330462923582, "loss": 0.9301, "step": 24190 }, { "epoch": 0.7717082815140789, "grad_norm": 0.1640113890171051, "learning_rate": 0.00020161558748411925, "loss": 0.9211, "step": 24200 }, { "epoch": 0.7720271692337128, "grad_norm": 0.16404199600219727, "learning_rate": 0.00020077141974445887, "loss": 0.9278, "step": 24210 }, { "epoch": 0.7723460569533467, "grad_norm": 0.16532927751541138, "learning_rate": 0.00019993078654883636, "loss": 0.9219, "step": 24220 }, { "epoch": 0.7726649446729806, "grad_norm": 0.1662260890007019, "learning_rate": 0.00019909367309805842, "loss": 0.9054, "step": 24230 }, { "epoch": 0.7729838323926146, "grad_norm": 0.16583295166492462, "learning_rate": 0.0001982600646548962, "loss": 0.9262, "step": 24240 }, { "epoch": 0.7733027201122484, "grad_norm": 0.16515842080116272, "learning_rate": 0.0001974299465438259, "loss": 0.9179, "step": 24250 }, { "epoch": 0.7736216078318824, "grad_norm": 0.17167966067790985, "learning_rate": 0.00019660330415077035, "loss": 0.9172, "step": 24260 }, { "epoch": 0.7739404955515163, "grad_norm": 0.1663963943719864, "learning_rate": 0.00019578012292284172, "loss": 0.933, "step": 24270 }, { "epoch": 0.7742593832711502, "grad_norm": 0.16700772941112518, "learning_rate": 0.0001949603883680855, "loss": 0.9381, "step": 24280 }, { "epoch": 0.7745782709907841, "grad_norm": 0.1680431216955185, "learning_rate": 0.0001941440860552251, "loss": 0.9357, "step": 24290 }, { "epoch": 0.7748971587104181, "grad_norm": 0.16372446715831757, "learning_rate": 0.00019333120161340792, "loss": 0.9299, "step": 24300 }, { "epoch": 0.7752160464300519, "grad_norm": 0.16472508013248444, "learning_rate": 0.00019252172073195239, "loss": 0.931, "step": 24310 }, { "epoch": 0.7755349341496859, "grad_norm": 0.17051909863948822, "learning_rate": 0.00019171562916009603, "loss": 0.9378, "step": 24320 }, { "epoch": 0.7758538218693198, "grad_norm": 0.16289319097995758, "learning_rate": 0.00019091291270674447, "loss": 0.9212, "step": 24330 }, { "epoch": 0.7761727095889537, "grad_norm": 0.1658150851726532, "learning_rate": 0.00019011355724022166, "loss": 0.9321, "step": 24340 }, { "epoch": 0.7764915973085876, "grad_norm": 0.16301952302455902, "learning_rate": 0.0001893175486880212, "loss": 0.9341, "step": 24350 }, { "epoch": 0.7768104850282216, "grad_norm": 0.1663392335176468, "learning_rate": 0.0001885248730365585, "loss": 0.9267, "step": 24360 }, { "epoch": 0.7771293727478554, "grad_norm": 0.16546833515167236, "learning_rate": 0.00018773551633092397, "loss": 0.9157, "step": 24370 }, { "epoch": 0.7774482604674894, "grad_norm": 0.16583992540836334, "learning_rate": 0.00018694946467463756, "loss": 0.9084, "step": 24380 }, { "epoch": 0.7777671481871233, "grad_norm": 0.1641332507133484, "learning_rate": 0.00018616670422940394, "loss": 0.9277, "step": 24390 }, { "epoch": 0.7780860359067572, "grad_norm": 0.171635240316391, "learning_rate": 0.00018538722121486895, "loss": 0.929, "step": 24400 }, { "epoch": 0.7784049236263911, "grad_norm": 0.16188062727451324, "learning_rate": 0.00018461100190837707, "loss": 0.9308, "step": 24410 }, { "epoch": 0.7787238113460251, "grad_norm": 0.16226285696029663, "learning_rate": 0.0001838380326447297, "loss": 0.9362, "step": 24420 }, { "epoch": 0.7790426990656589, "grad_norm": 0.16537430882453918, "learning_rate": 0.00018306829981594458, "loss": 0.9271, "step": 24430 }, { "epoch": 0.7793615867852929, "grad_norm": 0.163283571600914, "learning_rate": 0.0001823017898710165, "loss": 0.9274, "step": 24440 }, { "epoch": 0.7796804745049268, "grad_norm": 0.161969393491745, "learning_rate": 0.00018153848931567836, "loss": 0.9175, "step": 24450 }, { "epoch": 0.7799993622245607, "grad_norm": 0.16395604610443115, "learning_rate": 0.00018077838471216377, "loss": 0.928, "step": 24460 }, { "epoch": 0.7803182499441946, "grad_norm": 0.1620090901851654, "learning_rate": 0.00018002146267897054, "loss": 0.92, "step": 24470 }, { "epoch": 0.7806371376638286, "grad_norm": 0.16308417916297913, "learning_rate": 0.00017926770989062511, "loss": 0.9098, "step": 24480 }, { "epoch": 0.7809560253834625, "grad_norm": 0.1651722639799118, "learning_rate": 0.0001785171130774477, "loss": 0.9244, "step": 24490 }, { "epoch": 0.7812749131030964, "grad_norm": 0.1706753969192505, "learning_rate": 0.00017776965902531916, "loss": 0.9302, "step": 24500 }, { "epoch": 0.7815938008227303, "grad_norm": 0.16880400478839874, "learning_rate": 0.00017702533457544784, "loss": 0.9094, "step": 24510 }, { "epoch": 0.7819126885423643, "grad_norm": 0.16843248903751373, "learning_rate": 0.00017628412662413823, "loss": 0.9308, "step": 24520 }, { "epoch": 0.7822315762619981, "grad_norm": 0.16641728579998016, "learning_rate": 0.0001755460221225603, "loss": 0.9351, "step": 24530 }, { "epoch": 0.7825504639816321, "grad_norm": 0.16158147156238556, "learning_rate": 0.00017481100807651963, "loss": 0.9155, "step": 24540 }, { "epoch": 0.782869351701266, "grad_norm": 0.16641920804977417, "learning_rate": 0.00017407907154622863, "loss": 0.9249, "step": 24550 }, { "epoch": 0.7831882394208999, "grad_norm": 0.16956974565982819, "learning_rate": 0.00017335019964607887, "loss": 0.9187, "step": 24560 }, { "epoch": 0.7835071271405338, "grad_norm": 0.16971714794635773, "learning_rate": 0.00017262437954441417, "loss": 0.9259, "step": 24570 }, { "epoch": 0.7838260148601678, "grad_norm": 0.16633319854736328, "learning_rate": 0.00017190159846330476, "loss": 0.9217, "step": 24580 }, { "epoch": 0.7841449025798016, "grad_norm": 0.16065815091133118, "learning_rate": 0.00017118184367832215, "loss": 0.927, "step": 24590 }, { "epoch": 0.7844637902994356, "grad_norm": 0.1626022458076477, "learning_rate": 0.00017046510251831525, "loss": 0.9235, "step": 24600 }, { "epoch": 0.7847826780190695, "grad_norm": 0.1678556501865387, "learning_rate": 0.0001697513623651875, "loss": 0.9168, "step": 24610 }, { "epoch": 0.7851015657387034, "grad_norm": 0.16368116438388824, "learning_rate": 0.00016904061065367424, "loss": 0.907, "step": 24620 }, { "epoch": 0.7854204534583373, "grad_norm": 0.16854064166545868, "learning_rate": 0.00016833283487112187, "loss": 0.9207, "step": 24630 }, { "epoch": 0.7857393411779713, "grad_norm": 0.16941261291503906, "learning_rate": 0.00016762802255726757, "loss": 0.9172, "step": 24640 }, { "epoch": 0.7860582288976051, "grad_norm": 0.1684810221195221, "learning_rate": 0.0001669261613040197, "loss": 0.9164, "step": 24650 }, { "epoch": 0.7863771166172391, "grad_norm": 0.16645093262195587, "learning_rate": 0.00016622723875523958, "loss": 0.9237, "step": 24660 }, { "epoch": 0.786696004336873, "grad_norm": 0.16230951249599457, "learning_rate": 0.0001655312426065239, "loss": 0.9133, "step": 24670 }, { "epoch": 0.7870148920565069, "grad_norm": 0.1660119891166687, "learning_rate": 0.00016483816060498802, "loss": 0.9321, "step": 24680 }, { "epoch": 0.7873337797761408, "grad_norm": 0.16883163154125214, "learning_rate": 0.00016414798054905036, "loss": 0.9268, "step": 24690 }, { "epoch": 0.7876526674957748, "grad_norm": 0.16361361742019653, "learning_rate": 0.0001634606902882176, "loss": 0.9082, "step": 24700 }, { "epoch": 0.7879715552154086, "grad_norm": 0.16677947342395782, "learning_rate": 0.00016277627772287072, "loss": 0.926, "step": 24710 }, { "epoch": 0.7882904429350426, "grad_norm": 0.16485854983329773, "learning_rate": 0.00016209473080405187, "loss": 0.9066, "step": 24720 }, { "epoch": 0.7886093306546765, "grad_norm": 0.16697336733341217, "learning_rate": 0.0001614160375332526, "loss": 0.9246, "step": 24730 }, { "epoch": 0.7889282183743104, "grad_norm": 0.17086774110794067, "learning_rate": 0.00016074018596220224, "loss": 0.9235, "step": 24740 }, { "epoch": 0.7892471060939443, "grad_norm": 0.1602909117937088, "learning_rate": 0.00016006716419265783, "loss": 0.9371, "step": 24750 }, { "epoch": 0.7895659938135783, "grad_norm": 0.16252830624580383, "learning_rate": 0.00015939696037619444, "loss": 0.9139, "step": 24760 }, { "epoch": 0.7898848815332121, "grad_norm": 0.1664387583732605, "learning_rate": 0.00015872956271399674, "loss": 0.9048, "step": 24770 }, { "epoch": 0.7902037692528461, "grad_norm": 0.162310391664505, "learning_rate": 0.00015806495945665133, "loss": 0.9271, "step": 24780 }, { "epoch": 0.79052265697248, "grad_norm": 0.16639670729637146, "learning_rate": 0.00015740313890393964, "loss": 0.9401, "step": 24790 }, { "epoch": 0.7908415446921139, "grad_norm": 0.16429823637008667, "learning_rate": 0.00015674408940463216, "loss": 0.9052, "step": 24800 }, { "epoch": 0.7911604324117478, "grad_norm": 0.16192969679832458, "learning_rate": 0.00015608779935628333, "loss": 0.9241, "step": 24810 }, { "epoch": 0.7914793201313818, "grad_norm": 0.17283402383327484, "learning_rate": 0.0001554342572050271, "loss": 0.9164, "step": 24820 }, { "epoch": 0.7917982078510156, "grad_norm": 0.17254789173603058, "learning_rate": 0.00015478345144537376, "loss": 0.9205, "step": 24830 }, { "epoch": 0.7921170955706496, "grad_norm": 0.16515520215034485, "learning_rate": 0.0001541353706200072, "loss": 0.9109, "step": 24840 }, { "epoch": 0.7924359832902835, "grad_norm": 0.1679113805294037, "learning_rate": 0.0001534900033195833, "loss": 0.9232, "step": 24850 }, { "epoch": 0.7927548710099174, "grad_norm": 0.17153725028038025, "learning_rate": 0.00015284733818252897, "loss": 0.9125, "step": 24860 }, { "epoch": 0.7930737587295513, "grad_norm": 0.16140176355838776, "learning_rate": 0.00015220736389484244, "loss": 0.9065, "step": 24870 }, { "epoch": 0.7933926464491853, "grad_norm": 0.16564328968524933, "learning_rate": 0.00015157006918989363, "loss": 0.9144, "step": 24880 }, { "epoch": 0.7937115341688191, "grad_norm": 0.16771960258483887, "learning_rate": 0.00015093544284822607, "loss": 0.909, "step": 24890 }, { "epoch": 0.7940304218884531, "grad_norm": 0.1626899540424347, "learning_rate": 0.0001503034736973594, "loss": 0.9096, "step": 24900 }, { "epoch": 0.794349309608087, "grad_norm": 0.1682693064212799, "learning_rate": 0.00014967415061159254, "loss": 0.9203, "step": 24910 }, { "epoch": 0.7946681973277209, "grad_norm": 0.16387136280536652, "learning_rate": 0.00014904746251180796, "loss": 0.9158, "step": 24920 }, { "epoch": 0.7949870850473548, "grad_norm": 0.16813243925571442, "learning_rate": 0.00014842339836527651, "loss": 0.907, "step": 24930 }, { "epoch": 0.7953059727669888, "grad_norm": 0.16818828880786896, "learning_rate": 0.00014780194718546334, "loss": 0.9139, "step": 24940 }, { "epoch": 0.7956248604866226, "grad_norm": 0.17012420296669006, "learning_rate": 0.00014718309803183436, "loss": 0.9245, "step": 24950 }, { "epoch": 0.7959437482062566, "grad_norm": 0.16938473284244537, "learning_rate": 0.00014656684000966363, "loss": 0.911, "step": 24960 }, { "epoch": 0.7962626359258905, "grad_norm": 0.16450780630111694, "learning_rate": 0.00014595316226984173, "loss": 0.9091, "step": 24970 }, { "epoch": 0.7965815236455244, "grad_norm": 0.16333532333374023, "learning_rate": 0.00014534205400868448, "loss": 0.9147, "step": 24980 }, { "epoch": 0.7969004113651583, "grad_norm": 0.16380712389945984, "learning_rate": 0.000144733504467743, "loss": 0.8932, "step": 24990 }, { "epoch": 0.7972192990847923, "grad_norm": 0.166630357503891, "learning_rate": 0.00014412750293361419, "loss": 0.9106, "step": 25000 }, { "epoch": 0.7975381868044261, "grad_norm": 0.16908857226371765, "learning_rate": 0.00014352403873775206, "loss": 0.9279, "step": 25010 }, { "epoch": 0.7978570745240601, "grad_norm": 0.17311139404773712, "learning_rate": 0.0001429231012562802, "loss": 0.9077, "step": 25020 }, { "epoch": 0.798175962243694, "grad_norm": 0.17072586715221405, "learning_rate": 0.00014232467990980434, "loss": 0.904, "step": 25030 }, { "epoch": 0.7984948499633279, "grad_norm": 0.17837849259376526, "learning_rate": 0.0001417287641632264, "loss": 0.9207, "step": 25040 }, { "epoch": 0.7988137376829618, "grad_norm": 0.16402915120124817, "learning_rate": 0.00014113534352555893, "loss": 0.9072, "step": 25050 }, { "epoch": 0.7991326254025958, "grad_norm": 0.16501149535179138, "learning_rate": 0.00014054440754974036, "loss": 0.9053, "step": 25060 }, { "epoch": 0.7994515131222296, "grad_norm": 0.16770212352275848, "learning_rate": 0.00013995594583245116, "loss": 0.9241, "step": 25070 }, { "epoch": 0.7997704008418636, "grad_norm": 0.17260852456092834, "learning_rate": 0.0001393699480139307, "loss": 0.9161, "step": 25080 }, { "epoch": 0.8000892885614975, "grad_norm": 0.16932493448257446, "learning_rate": 0.00013878640377779487, "loss": 0.9239, "step": 25090 }, { "epoch": 0.8004081762811314, "grad_norm": 0.16582724452018738, "learning_rate": 0.00013820530285085425, "loss": 0.9012, "step": 25100 }, { "epoch": 0.8007270640007653, "grad_norm": 0.16309213638305664, "learning_rate": 0.0001376266350029336, "loss": 0.9143, "step": 25110 }, { "epoch": 0.8010459517203993, "grad_norm": 0.16737112402915955, "learning_rate": 0.0001370503900466916, "loss": 0.9116, "step": 25120 }, { "epoch": 0.8013648394400331, "grad_norm": 0.16866783797740936, "learning_rate": 0.00013647655783744143, "loss": 0.9052, "step": 25130 }, { "epoch": 0.8016837271596671, "grad_norm": 0.16859377920627594, "learning_rate": 0.00013590512827297215, "loss": 0.9124, "step": 25140 }, { "epoch": 0.802002614879301, "grad_norm": 0.1713414490222931, "learning_rate": 0.00013533609129337112, "loss": 0.9035, "step": 25150 }, { "epoch": 0.8023215025989349, "grad_norm": 0.16575460135936737, "learning_rate": 0.00013476943688084665, "loss": 0.926, "step": 25160 }, { "epoch": 0.8026403903185688, "grad_norm": 0.17065243422985077, "learning_rate": 0.00013420515505955158, "loss": 0.8947, "step": 25170 }, { "epoch": 0.8029592780382028, "grad_norm": 0.16338184475898743, "learning_rate": 0.000133643235895408, "loss": 0.9037, "step": 25180 }, { "epoch": 0.8032781657578366, "grad_norm": 0.17414377629756927, "learning_rate": 0.00013308366949593199, "loss": 0.9007, "step": 25190 }, { "epoch": 0.8035970534774706, "grad_norm": 0.1691446453332901, "learning_rate": 0.0001325264460100597, "loss": 0.91, "step": 25200 }, { "epoch": 0.8039159411971045, "grad_norm": 0.17124170064926147, "learning_rate": 0.0001319715556279738, "loss": 0.9199, "step": 25210 }, { "epoch": 0.8042348289167384, "grad_norm": 0.16459155082702637, "learning_rate": 0.00013141898858093086, "loss": 0.9072, "step": 25220 }, { "epoch": 0.8045537166363723, "grad_norm": 0.1644795536994934, "learning_rate": 0.00013086873514108925, "loss": 0.8996, "step": 25230 }, { "epoch": 0.8048726043560063, "grad_norm": 0.16637784242630005, "learning_rate": 0.00013032078562133812, "loss": 0.9232, "step": 25240 }, { "epoch": 0.8051914920756401, "grad_norm": 0.165706068277359, "learning_rate": 0.0001297751303751266, "loss": 0.9191, "step": 25250 }, { "epoch": 0.8055103797952741, "grad_norm": 0.1714121401309967, "learning_rate": 0.00012923175979629407, "loss": 0.891, "step": 25260 }, { "epoch": 0.805829267514908, "grad_norm": 0.1578255593776703, "learning_rate": 0.00012869066431890117, "loss": 0.8875, "step": 25270 }, { "epoch": 0.8061481552345419, "grad_norm": 0.16436845064163208, "learning_rate": 0.00012815183441706112, "loss": 0.9124, "step": 25280 }, { "epoch": 0.8064670429541758, "grad_norm": 0.165314182639122, "learning_rate": 0.0001276152606047724, "loss": 0.9131, "step": 25290 }, { "epoch": 0.8067859306738098, "grad_norm": 0.1633354127407074, "learning_rate": 0.0001270809334357514, "loss": 0.9082, "step": 25300 }, { "epoch": 0.8071048183934437, "grad_norm": 0.1653420776128769, "learning_rate": 0.00012654884350326619, "loss": 0.8937, "step": 25310 }, { "epoch": 0.8074237061130776, "grad_norm": 0.16683243215084076, "learning_rate": 0.00012601898143997108, "loss": 0.9267, "step": 25320 }, { "epoch": 0.8077425938327115, "grad_norm": 0.16653725504875183, "learning_rate": 0.00012549133791774162, "loss": 0.9094, "step": 25330 }, { "epoch": 0.8080614815523455, "grad_norm": 0.16630671918392181, "learning_rate": 0.0001249659036475103, "loss": 0.9072, "step": 25340 }, { "epoch": 0.8083803692719793, "grad_norm": 0.16600552201271057, "learning_rate": 0.00012444266937910312, "loss": 0.9107, "step": 25350 }, { "epoch": 0.8086992569916133, "grad_norm": 0.17081421613693237, "learning_rate": 0.0001239216259010767, "loss": 0.9019, "step": 25360 }, { "epoch": 0.8090181447112472, "grad_norm": 0.16658015549182892, "learning_rate": 0.00012340276404055616, "loss": 0.8995, "step": 25370 }, { "epoch": 0.8093370324308811, "grad_norm": 0.16746315360069275, "learning_rate": 0.00012288607466307355, "loss": 0.915, "step": 25380 }, { "epoch": 0.809655920150515, "grad_norm": 0.1671326905488968, "learning_rate": 0.0001223715486724071, "loss": 0.9013, "step": 25390 }, { "epoch": 0.809974807870149, "grad_norm": 0.1631750613451004, "learning_rate": 0.00012185917701042106, "loss": 0.9014, "step": 25400 }, { "epoch": 0.8102936955897828, "grad_norm": 0.16608569025993347, "learning_rate": 0.0001213489506569063, "loss": 0.9044, "step": 25410 }, { "epoch": 0.8106125833094168, "grad_norm": 0.17144687473773956, "learning_rate": 0.00012084086062942134, "loss": 0.9071, "step": 25420 }, { "epoch": 0.8109314710290507, "grad_norm": 0.17095890641212463, "learning_rate": 0.00012033489798313444, "loss": 0.9074, "step": 25430 }, { "epoch": 0.8112503587486846, "grad_norm": 0.16820640861988068, "learning_rate": 0.00011983105381066592, "loss": 0.9162, "step": 25440 }, { "epoch": 0.8115692464683185, "grad_norm": 0.16968011856079102, "learning_rate": 0.00011932931924193155, "loss": 0.9058, "step": 25450 }, { "epoch": 0.8118881341879525, "grad_norm": 0.16316497325897217, "learning_rate": 0.0001188296854439862, "loss": 0.8939, "step": 25460 }, { "epoch": 0.8122070219075863, "grad_norm": 0.17123737931251526, "learning_rate": 0.00011833214362086844, "loss": 0.9167, "step": 25470 }, { "epoch": 0.8125259096272203, "grad_norm": 0.16038616001605988, "learning_rate": 0.00011783668501344572, "loss": 0.9195, "step": 25480 }, { "epoch": 0.8128447973468542, "grad_norm": 0.17204549908638, "learning_rate": 0.00011734330089926006, "loss": 0.9142, "step": 25490 }, { "epoch": 0.8131636850664881, "grad_norm": 0.17099328339099884, "learning_rate": 0.0001168519825923746, "loss": 0.9152, "step": 25500 }, { "epoch": 0.813482572786122, "grad_norm": 0.16785970330238342, "learning_rate": 0.00011636272144322059, "loss": 0.9089, "step": 25510 }, { "epoch": 0.813801460505756, "grad_norm": 0.1722290962934494, "learning_rate": 0.00011587550883844523, "loss": 0.9049, "step": 25520 }, { "epoch": 0.8141203482253898, "grad_norm": 0.17191378772258759, "learning_rate": 0.00011539033620075986, "loss": 0.9104, "step": 25530 }, { "epoch": 0.8144392359450238, "grad_norm": 0.16544301807880402, "learning_rate": 0.00011490719498878924, "loss": 0.9116, "step": 25540 }, { "epoch": 0.8147581236646577, "grad_norm": 0.1681593805551529, "learning_rate": 0.00011442607669692085, "loss": 0.9086, "step": 25550 }, { "epoch": 0.8150770113842916, "grad_norm": 0.16554562747478485, "learning_rate": 0.00011394697285515537, "loss": 0.9004, "step": 25560 }, { "epoch": 0.8153958991039255, "grad_norm": 0.16348989307880402, "learning_rate": 0.0001134698750289575, "loss": 0.9104, "step": 25570 }, { "epoch": 0.8157147868235595, "grad_norm": 0.16586485505104065, "learning_rate": 0.00011299477481910747, "loss": 0.9067, "step": 25580 }, { "epoch": 0.8160336745431933, "grad_norm": 0.17197948694229126, "learning_rate": 0.00011252166386155319, "loss": 0.9176, "step": 25590 }, { "epoch": 0.8163525622628273, "grad_norm": 0.16917942464351654, "learning_rate": 0.00011205053382726299, "loss": 0.9088, "step": 25600 }, { "epoch": 0.8166714499824612, "grad_norm": 0.16606974601745605, "learning_rate": 0.00011158137642207893, "loss": 0.9034, "step": 25610 }, { "epoch": 0.8169903377020951, "grad_norm": 0.16527631878852844, "learning_rate": 0.00011111418338657102, "loss": 0.908, "step": 25620 }, { "epoch": 0.817309225421729, "grad_norm": 0.16847580671310425, "learning_rate": 0.00011064894649589143, "loss": 0.9171, "step": 25630 }, { "epoch": 0.817628113141363, "grad_norm": 0.16356876492500305, "learning_rate": 0.00011018565755962999, "loss": 0.912, "step": 25640 }, { "epoch": 0.8179470008609968, "grad_norm": 0.16655093431472778, "learning_rate": 0.00010972430842166995, "loss": 0.8906, "step": 25650 }, { "epoch": 0.8182658885806308, "grad_norm": 0.1708131730556488, "learning_rate": 0.0001092648909600443, "loss": 0.9081, "step": 25660 }, { "epoch": 0.8185847763002647, "grad_norm": 0.16948804259300232, "learning_rate": 0.00010880739708679283, "loss": 0.9052, "step": 25670 }, { "epoch": 0.8189036640198986, "grad_norm": 0.17297212779521942, "learning_rate": 0.00010835181874781979, "loss": 0.9129, "step": 25680 }, { "epoch": 0.8192225517395325, "grad_norm": 0.17286381125450134, "learning_rate": 0.00010789814792275205, "loss": 0.9137, "step": 25690 }, { "epoch": 0.8195414394591665, "grad_norm": 0.17813879251480103, "learning_rate": 0.00010744637662479786, "loss": 0.9146, "step": 25700 }, { "epoch": 0.8198603271788003, "grad_norm": 0.16722270846366882, "learning_rate": 0.00010699649690060641, "loss": 0.9048, "step": 25710 }, { "epoch": 0.8201792148984343, "grad_norm": 0.1656230092048645, "learning_rate": 0.00010654850083012758, "loss": 0.889, "step": 25720 }, { "epoch": 0.8204981026180682, "grad_norm": 0.16876403987407684, "learning_rate": 0.00010610238052647271, "loss": 0.9064, "step": 25730 }, { "epoch": 0.8208169903377021, "grad_norm": 0.17110751569271088, "learning_rate": 0.00010565812813577561, "loss": 0.9029, "step": 25740 }, { "epoch": 0.821135878057336, "grad_norm": 0.16737671196460724, "learning_rate": 0.00010521573583705442, "loss": 0.9041, "step": 25750 }, { "epoch": 0.82145476577697, "grad_norm": 0.16565321385860443, "learning_rate": 0.00010477519584207379, "loss": 0.8823, "step": 25760 }, { "epoch": 0.8217736534966038, "grad_norm": 0.17009054124355316, "learning_rate": 0.00010433650039520788, "loss": 0.9021, "step": 25770 }, { "epoch": 0.8220925412162378, "grad_norm": 0.1713910698890686, "learning_rate": 0.00010389964177330376, "loss": 0.9013, "step": 25780 }, { "epoch": 0.8224114289358717, "grad_norm": 0.171029731631279, "learning_rate": 0.00010346461228554552, "loss": 0.9077, "step": 25790 }, { "epoch": 0.8227303166555056, "grad_norm": 0.1669735312461853, "learning_rate": 0.00010303140427331876, "loss": 0.8842, "step": 25800 }, { "epoch": 0.8230492043751395, "grad_norm": 0.16688182950019836, "learning_rate": 0.00010260001011007584, "loss": 0.9053, "step": 25810 }, { "epoch": 0.8233680920947735, "grad_norm": 0.16837717592716217, "learning_rate": 0.00010217042220120158, "loss": 0.9078, "step": 25820 }, { "epoch": 0.8236869798144073, "grad_norm": 0.1698017567396164, "learning_rate": 0.00010174263298387963, "loss": 0.8976, "step": 25830 }, { "epoch": 0.8240058675340413, "grad_norm": 0.16503947973251343, "learning_rate": 0.00010131663492695925, "loss": 0.8984, "step": 25840 }, { "epoch": 0.8243247552536752, "grad_norm": 0.1713607758283615, "learning_rate": 0.00010089242053082271, "loss": 0.9078, "step": 25850 }, { "epoch": 0.824643642973309, "grad_norm": 0.1704990118741989, "learning_rate": 0.00010046998232725337, "loss": 0.895, "step": 25860 }, { "epoch": 0.824962530692943, "grad_norm": 0.16618500649929047, "learning_rate": 0.00010004931287930405, "loss": 0.8866, "step": 25870 }, { "epoch": 0.825281418412577, "grad_norm": 0.16616980731487274, "learning_rate": 9.96304047811663e-05, "loss": 0.8938, "step": 25880 }, { "epoch": 0.8256003061322108, "grad_norm": 0.1703374981880188, "learning_rate": 9.921325065803983e-05, "loss": 0.9068, "step": 25890 }, { "epoch": 0.8259191938518448, "grad_norm": 0.17246967554092407, "learning_rate": 9.879784316600278e-05, "loss": 0.8894, "step": 25900 }, { "epoch": 0.8262380815714787, "grad_norm": 0.17710429430007935, "learning_rate": 9.838417499188239e-05, "loss": 0.9046, "step": 25910 }, { "epoch": 0.8265569692911126, "grad_norm": 0.1626577228307724, "learning_rate": 9.79722388531263e-05, "loss": 0.9032, "step": 25920 }, { "epoch": 0.8268758570107465, "grad_norm": 0.16949373483657837, "learning_rate": 9.756202749767429e-05, "loss": 0.896, "step": 25930 }, { "epoch": 0.8271947447303805, "grad_norm": 0.16658318042755127, "learning_rate": 9.715353370383069e-05, "loss": 0.9044, "step": 25940 }, { "epoch": 0.8275136324500143, "grad_norm": 0.16725113987922668, "learning_rate": 9.674675028013712e-05, "loss": 0.8774, "step": 25950 }, { "epoch": 0.8278325201696483, "grad_norm": 0.17133763432502747, "learning_rate": 9.634167006524597e-05, "loss": 0.905, "step": 25960 }, { "epoch": 0.8281514078892822, "grad_norm": 0.17014974355697632, "learning_rate": 9.593828592779434e-05, "loss": 0.9, "step": 25970 }, { "epoch": 0.828470295608916, "grad_norm": 0.17587527632713318, "learning_rate": 9.553659076627844e-05, "loss": 0.9114, "step": 25980 }, { "epoch": 0.82878918332855, "grad_norm": 0.17234353721141815, "learning_rate": 9.513657750892853e-05, "loss": 0.8956, "step": 25990 }, { "epoch": 0.829108071048184, "grad_norm": 0.17450740933418274, "learning_rate": 9.473823911358465e-05, "loss": 0.8936, "step": 26000 }, { "epoch": 0.8294269587678178, "grad_norm": 0.16306303441524506, "learning_rate": 9.434156856757232e-05, "loss": 0.8906, "step": 26010 }, { "epoch": 0.8297458464874518, "grad_norm": 0.16995976865291595, "learning_rate": 9.394655888757933e-05, "loss": 0.891, "step": 26020 }, { "epoch": 0.8300647342070857, "grad_norm": 0.16628731787204742, "learning_rate": 9.355320311953263e-05, "loss": 0.9054, "step": 26030 }, { "epoch": 0.8303836219267196, "grad_norm": 0.17312276363372803, "learning_rate": 9.316149433847609e-05, "loss": 0.8978, "step": 26040 }, { "epoch": 0.8307025096463535, "grad_norm": 0.16996102035045624, "learning_rate": 9.27714256484484e-05, "loss": 0.9096, "step": 26050 }, { "epoch": 0.8310213973659875, "grad_norm": 0.1690961867570877, "learning_rate": 9.238299018236176e-05, "loss": 0.8966, "step": 26060 }, { "epoch": 0.8313402850856213, "grad_norm": 0.16808012127876282, "learning_rate": 9.199618110188106e-05, "loss": 0.8875, "step": 26070 }, { "epoch": 0.8316591728052553, "grad_norm": 0.16871201992034912, "learning_rate": 9.161099159730329e-05, "loss": 0.898, "step": 26080 }, { "epoch": 0.8319780605248892, "grad_norm": 0.16951216757297516, "learning_rate": 9.122741488743787e-05, "loss": 0.8942, "step": 26090 }, { "epoch": 0.832296948244523, "grad_norm": 0.17077015340328217, "learning_rate": 9.084544421948714e-05, "loss": 0.9001, "step": 26100 }, { "epoch": 0.832615835964157, "grad_norm": 0.16758744418621063, "learning_rate": 9.046507286892751e-05, "loss": 0.93, "step": 26110 }, { "epoch": 0.832934723683791, "grad_norm": 0.1701851636171341, "learning_rate": 9.008629413939108e-05, "loss": 0.9008, "step": 26120 }, { "epoch": 0.8332536114034248, "grad_norm": 0.16817468404769897, "learning_rate": 8.970910136254777e-05, "loss": 0.8946, "step": 26130 }, { "epoch": 0.8335724991230588, "grad_norm": 0.16986632347106934, "learning_rate": 8.93334878979879e-05, "loss": 0.8882, "step": 26140 }, { "epoch": 0.8338913868426927, "grad_norm": 0.16999934613704681, "learning_rate": 8.895944713310525e-05, "loss": 0.8989, "step": 26150 }, { "epoch": 0.8342102745623267, "grad_norm": 0.1713329702615738, "learning_rate": 8.858697248298071e-05, "loss": 0.9081, "step": 26160 }, { "epoch": 0.8345291622819605, "grad_norm": 0.1673143059015274, "learning_rate": 8.821605739026645e-05, "loss": 0.8731, "step": 26170 }, { "epoch": 0.8348480500015945, "grad_norm": 0.1767398566007614, "learning_rate": 8.784669532507018e-05, "loss": 0.9198, "step": 26180 }, { "epoch": 0.8351669377212284, "grad_norm": 0.1685178577899933, "learning_rate": 8.747887978484048e-05, "loss": 0.8949, "step": 26190 }, { "epoch": 0.8354858254408623, "grad_norm": 0.1668165922164917, "learning_rate": 8.71126042942522e-05, "loss": 0.8824, "step": 26200 }, { "epoch": 0.8358047131604962, "grad_norm": 0.16967454552650452, "learning_rate": 8.674786240509246e-05, "loss": 0.8866, "step": 26210 }, { "epoch": 0.8361236008801302, "grad_norm": 0.16545966267585754, "learning_rate": 8.638464769614718e-05, "loss": 0.8944, "step": 26220 }, { "epoch": 0.836442488599764, "grad_norm": 0.1696653962135315, "learning_rate": 8.602295377308798e-05, "loss": 0.8903, "step": 26230 }, { "epoch": 0.836761376319398, "grad_norm": 0.1657346487045288, "learning_rate": 8.566277426835967e-05, "loss": 0.8936, "step": 26240 }, { "epoch": 0.8370802640390319, "grad_norm": 0.17045268416404724, "learning_rate": 8.53041028410681e-05, "loss": 0.8925, "step": 26250 }, { "epoch": 0.8373991517586657, "grad_norm": 0.16796588897705078, "learning_rate": 8.494693317686852e-05, "loss": 0.8732, "step": 26260 }, { "epoch": 0.8377180394782997, "grad_norm": 0.16764356195926666, "learning_rate": 8.459125898785451e-05, "loss": 0.9136, "step": 26270 }, { "epoch": 0.8380369271979337, "grad_norm": 0.17094580829143524, "learning_rate": 8.423707401244714e-05, "loss": 0.9105, "step": 26280 }, { "epoch": 0.8383558149175675, "grad_norm": 0.16578027606010437, "learning_rate": 8.388437201528488e-05, "loss": 0.8921, "step": 26290 }, { "epoch": 0.8386747026372015, "grad_norm": 0.16390374302864075, "learning_rate": 8.353314678711372e-05, "loss": 0.9015, "step": 26300 }, { "epoch": 0.8389935903568354, "grad_norm": 0.16992898285388947, "learning_rate": 8.318339214467789e-05, "loss": 0.8958, "step": 26310 }, { "epoch": 0.8393124780764692, "grad_norm": 0.16880562901496887, "learning_rate": 8.283510193061105e-05, "loss": 0.8929, "step": 26320 }, { "epoch": 0.8396313657961032, "grad_norm": 0.17104868590831757, "learning_rate": 8.248827001332778e-05, "loss": 0.8978, "step": 26330 }, { "epoch": 0.8399502535157372, "grad_norm": 0.1668756604194641, "learning_rate": 8.214289028691584e-05, "loss": 0.893, "step": 26340 }, { "epoch": 0.840269141235371, "grad_norm": 0.164683997631073, "learning_rate": 8.179895667102844e-05, "loss": 0.8812, "step": 26350 }, { "epoch": 0.840588028955005, "grad_norm": 0.1663207858800888, "learning_rate": 8.145646311077731e-05, "loss": 0.9042, "step": 26360 }, { "epoch": 0.8409069166746389, "grad_norm": 0.17519667744636536, "learning_rate": 8.11154035766261e-05, "loss": 0.8994, "step": 26370 }, { "epoch": 0.8412258043942727, "grad_norm": 0.17041118443012238, "learning_rate": 8.077577206428427e-05, "loss": 0.894, "step": 26380 }, { "epoch": 0.8415446921139067, "grad_norm": 0.16244420409202576, "learning_rate": 8.043756259460127e-05, "loss": 0.8898, "step": 26390 }, { "epoch": 0.8418635798335407, "grad_norm": 0.17468824982643127, "learning_rate": 8.010076921346141e-05, "loss": 0.911, "step": 26400 }, { "epoch": 0.8421824675531745, "grad_norm": 0.16985976696014404, "learning_rate": 7.976538599167896e-05, "loss": 0.9037, "step": 26410 }, { "epoch": 0.8425013552728084, "grad_norm": 0.16972625255584717, "learning_rate": 7.943140702489378e-05, "loss": 0.8783, "step": 26420 }, { "epoch": 0.8428202429924424, "grad_norm": 0.1677105873823166, "learning_rate": 7.909882643346739e-05, "loss": 0.8946, "step": 26430 }, { "epoch": 0.8431391307120762, "grad_norm": 0.16455358266830444, "learning_rate": 7.876763836237944e-05, "loss": 0.8986, "step": 26440 }, { "epoch": 0.8434580184317102, "grad_norm": 0.17376911640167236, "learning_rate": 7.843783698112465e-05, "loss": 0.9102, "step": 26450 }, { "epoch": 0.8437769061513442, "grad_norm": 0.16827256977558136, "learning_rate": 7.810941648361018e-05, "loss": 0.9022, "step": 26460 }, { "epoch": 0.844095793870978, "grad_norm": 0.17023880779743195, "learning_rate": 7.778237108805339e-05, "loss": 0.8833, "step": 26470 }, { "epoch": 0.844414681590612, "grad_norm": 0.16073589026927948, "learning_rate": 7.745669503688002e-05, "loss": 0.8917, "step": 26480 }, { "epoch": 0.8447335693102459, "grad_norm": 0.1697019338607788, "learning_rate": 7.71323825966229e-05, "loss": 0.8898, "step": 26490 }, { "epoch": 0.8450524570298797, "grad_norm": 0.1616843193769455, "learning_rate": 7.680942805782095e-05, "loss": 0.8913, "step": 26500 }, { "epoch": 0.8453713447495137, "grad_norm": 0.16631615161895752, "learning_rate": 7.648782573491877e-05, "loss": 0.8904, "step": 26510 }, { "epoch": 0.8456902324691477, "grad_norm": 0.16899070143699646, "learning_rate": 7.616756996616643e-05, "loss": 0.8829, "step": 26520 }, { "epoch": 0.8460091201887815, "grad_norm": 0.17207767069339752, "learning_rate": 7.58486551135198e-05, "loss": 0.8887, "step": 26530 }, { "epoch": 0.8463280079084154, "grad_norm": 0.1661059707403183, "learning_rate": 7.553107556254135e-05, "loss": 0.886, "step": 26540 }, { "epoch": 0.8466468956280494, "grad_norm": 0.1708707958459854, "learning_rate": 7.521482572230134e-05, "loss": 0.8922, "step": 26550 }, { "epoch": 0.8469657833476832, "grad_norm": 0.17209689319133759, "learning_rate": 7.48999000252793e-05, "loss": 0.8907, "step": 26560 }, { "epoch": 0.8472846710673172, "grad_norm": 0.16356642544269562, "learning_rate": 7.458629292726607e-05, "loss": 0.9037, "step": 26570 }, { "epoch": 0.8476035587869512, "grad_norm": 0.1679961085319519, "learning_rate": 7.427399890726616e-05, "loss": 0.8862, "step": 26580 }, { "epoch": 0.847922446506585, "grad_norm": 0.16855265200138092, "learning_rate": 7.396301246740063e-05, "loss": 0.8844, "step": 26590 }, { "epoch": 0.848241334226219, "grad_norm": 0.1739872395992279, "learning_rate": 7.36533281328102e-05, "loss": 0.898, "step": 26600 }, { "epoch": 0.8485602219458529, "grad_norm": 0.17627912759780884, "learning_rate": 7.334494045155892e-05, "loss": 0.8887, "step": 26610 }, { "epoch": 0.8488791096654867, "grad_norm": 0.1743604987859726, "learning_rate": 7.303784399453824e-05, "loss": 0.8938, "step": 26620 }, { "epoch": 0.8491979973851207, "grad_norm": 0.17387619614601135, "learning_rate": 7.273203335537129e-05, "loss": 0.8926, "step": 26630 }, { "epoch": 0.8495168851047546, "grad_norm": 0.16775065660476685, "learning_rate": 7.242750315031787e-05, "loss": 0.9088, "step": 26640 }, { "epoch": 0.8498357728243885, "grad_norm": 0.16406485438346863, "learning_rate": 7.21242480181795e-05, "loss": 0.8927, "step": 26650 }, { "epoch": 0.8501546605440224, "grad_norm": 0.16974839568138123, "learning_rate": 7.182226262020522e-05, "loss": 0.884, "step": 26660 }, { "epoch": 0.8504735482636564, "grad_norm": 0.1713690608739853, "learning_rate": 7.15215416399974e-05, "loss": 0.8959, "step": 26670 }, { "epoch": 0.8507924359832902, "grad_norm": 0.16833071410655975, "learning_rate": 7.122207978341839e-05, "loss": 0.8971, "step": 26680 }, { "epoch": 0.8511113237029242, "grad_norm": 0.16310231387615204, "learning_rate": 7.092387177849706e-05, "loss": 0.8876, "step": 26690 }, { "epoch": 0.8514302114225581, "grad_norm": 0.16681894659996033, "learning_rate": 7.062691237533617e-05, "loss": 0.8831, "step": 26700 }, { "epoch": 0.851749099142192, "grad_norm": 0.17143681645393372, "learning_rate": 7.033119634601985e-05, "loss": 0.8951, "step": 26710 }, { "epoch": 0.8520679868618259, "grad_norm": 0.17085224390029907, "learning_rate": 7.003671848452163e-05, "loss": 0.8917, "step": 26720 }, { "epoch": 0.8523868745814599, "grad_norm": 0.17123526334762573, "learning_rate": 6.974347360661275e-05, "loss": 0.8968, "step": 26730 }, { "epoch": 0.8527057623010937, "grad_norm": 0.16923531889915466, "learning_rate": 6.945145654977087e-05, "loss": 0.903, "step": 26740 }, { "epoch": 0.8530246500207277, "grad_norm": 0.16685211658477783, "learning_rate": 6.916066217308926e-05, "loss": 0.9017, "step": 26750 }, { "epoch": 0.8533435377403616, "grad_norm": 0.17287422716617584, "learning_rate": 6.887108535718623e-05, "loss": 0.8875, "step": 26760 }, { "epoch": 0.8536624254599955, "grad_norm": 0.1733153909444809, "learning_rate": 6.858272100411499e-05, "loss": 0.882, "step": 26770 }, { "epoch": 0.8539813131796294, "grad_norm": 0.17159032821655273, "learning_rate": 6.829556403727401e-05, "loss": 0.9034, "step": 26780 }, { "epoch": 0.8543002008992634, "grad_norm": 0.17427712678909302, "learning_rate": 6.800960940131751e-05, "loss": 0.8958, "step": 26790 }, { "epoch": 0.8546190886188972, "grad_norm": 0.17055897414684296, "learning_rate": 6.772485206206656e-05, "loss": 0.8752, "step": 26800 }, { "epoch": 0.8549379763385312, "grad_norm": 0.16725292801856995, "learning_rate": 6.74412870064204e-05, "loss": 0.8948, "step": 26810 }, { "epoch": 0.8552568640581651, "grad_norm": 0.17402830719947815, "learning_rate": 6.71589092422682e-05, "loss": 0.9024, "step": 26820 }, { "epoch": 0.855575751777799, "grad_norm": 0.17112645506858826, "learning_rate": 6.687771379840115e-05, "loss": 0.9049, "step": 26830 }, { "epoch": 0.8558946394974329, "grad_norm": 0.16811510920524597, "learning_rate": 6.659769572442513e-05, "loss": 0.8944, "step": 26840 }, { "epoch": 0.8562135272170669, "grad_norm": 0.17514300346374512, "learning_rate": 6.631885009067319e-05, "loss": 0.8874, "step": 26850 }, { "epoch": 0.8565324149367007, "grad_norm": 0.17014634609222412, "learning_rate": 6.60411719881191e-05, "loss": 0.8843, "step": 26860 }, { "epoch": 0.8568513026563347, "grad_norm": 0.1698083132505417, "learning_rate": 6.576465652829075e-05, "loss": 0.8958, "step": 26870 }, { "epoch": 0.8571701903759686, "grad_norm": 0.17299139499664307, "learning_rate": 6.548929884318418e-05, "loss": 0.8875, "step": 26880 }, { "epoch": 0.8574890780956025, "grad_norm": 0.1718268096446991, "learning_rate": 6.521509408517782e-05, "loss": 0.8985, "step": 26890 }, { "epoch": 0.8578079658152364, "grad_norm": 0.17264573276042938, "learning_rate": 6.494203742694715e-05, "loss": 0.901, "step": 26900 }, { "epoch": 0.8581268535348704, "grad_norm": 0.17113256454467773, "learning_rate": 6.46701240613798e-05, "loss": 0.8991, "step": 26910 }, { "epoch": 0.8584457412545042, "grad_norm": 0.1725773960351944, "learning_rate": 6.439934920149081e-05, "loss": 0.8862, "step": 26920 }, { "epoch": 0.8587646289741382, "grad_norm": 0.16512632369995117, "learning_rate": 6.412970808033838e-05, "loss": 0.9074, "step": 26930 }, { "epoch": 0.8590835166937721, "grad_norm": 0.16803482174873352, "learning_rate": 6.386119595094003e-05, "loss": 0.8861, "step": 26940 }, { "epoch": 0.859402404413406, "grad_norm": 0.17119905352592468, "learning_rate": 6.359380808618895e-05, "loss": 0.8833, "step": 26950 }, { "epoch": 0.8597212921330399, "grad_norm": 0.17140822112560272, "learning_rate": 6.332753977877079e-05, "loss": 0.8966, "step": 26960 }, { "epoch": 0.8600401798526739, "grad_norm": 0.16653090715408325, "learning_rate": 6.306238634108082e-05, "loss": 0.8692, "step": 26970 }, { "epoch": 0.8603590675723078, "grad_norm": 0.17240291833877563, "learning_rate": 6.279834310514136e-05, "loss": 0.8787, "step": 26980 }, { "epoch": 0.8606779552919417, "grad_norm": 0.1726362258195877, "learning_rate": 6.253540542251968e-05, "loss": 0.8907, "step": 26990 }, { "epoch": 0.8609968430115756, "grad_norm": 0.16658638417720795, "learning_rate": 6.227356866424601e-05, "loss": 0.8746, "step": 27000 }, { "epoch": 0.8613157307312096, "grad_norm": 0.1688665747642517, "learning_rate": 6.201282822073233e-05, "loss": 0.8718, "step": 27010 }, { "epoch": 0.8616346184508434, "grad_norm": 0.16762253642082214, "learning_rate": 6.175317950169087e-05, "loss": 0.8908, "step": 27020 }, { "epoch": 0.8619535061704774, "grad_norm": 0.1679273396730423, "learning_rate": 6.149461793605354e-05, "loss": 0.896, "step": 27030 }, { "epoch": 0.8622723938901113, "grad_norm": 0.16647253930568695, "learning_rate": 6.123713897189136e-05, "loss": 0.9041, "step": 27040 }, { "epoch": 0.8625912816097452, "grad_norm": 0.17113405466079712, "learning_rate": 6.0980738076334414e-05, "loss": 0.9049, "step": 27050 }, { "epoch": 0.8629101693293791, "grad_norm": 0.16743695735931396, "learning_rate": 6.0725410735491895e-05, "loss": 0.8993, "step": 27060 }, { "epoch": 0.8632290570490131, "grad_norm": 0.1676100492477417, "learning_rate": 6.0471152454372806e-05, "loss": 0.8842, "step": 27070 }, { "epoch": 0.8635479447686469, "grad_norm": 0.16340462863445282, "learning_rate": 6.02179587568067e-05, "loss": 0.8992, "step": 27080 }, { "epoch": 0.8638668324882809, "grad_norm": 0.17687806487083435, "learning_rate": 5.9965825185364964e-05, "loss": 0.9011, "step": 27090 }, { "epoch": 0.8641857202079148, "grad_norm": 0.16743621230125427, "learning_rate": 5.971474730128228e-05, "loss": 0.8925, "step": 27100 }, { "epoch": 0.8645046079275487, "grad_norm": 0.16600142419338226, "learning_rate": 5.946472068437854e-05, "loss": 0.8834, "step": 27110 }, { "epoch": 0.8648234956471826, "grad_norm": 0.16654567420482635, "learning_rate": 5.9215740932980986e-05, "loss": 0.8979, "step": 27120 }, { "epoch": 0.8651423833668166, "grad_norm": 0.16914024949073792, "learning_rate": 5.8967803663846734e-05, "loss": 0.8792, "step": 27130 }, { "epoch": 0.8654612710864504, "grad_norm": 0.1687769889831543, "learning_rate": 5.8720904512085626e-05, "loss": 0.8741, "step": 27140 }, { "epoch": 0.8657801588060844, "grad_norm": 0.16539452970027924, "learning_rate": 5.8475039131083355e-05, "loss": 0.8891, "step": 27150 }, { "epoch": 0.8660990465257183, "grad_norm": 0.1684424728155136, "learning_rate": 5.823020319242495e-05, "loss": 0.8984, "step": 27160 }, { "epoch": 0.8664179342453522, "grad_norm": 0.17129109799861908, "learning_rate": 5.7986392385818584e-05, "loss": 0.8829, "step": 27170 }, { "epoch": 0.8667368219649861, "grad_norm": 0.16870862245559692, "learning_rate": 5.774360241901975e-05, "loss": 0.8918, "step": 27180 }, { "epoch": 0.8670557096846201, "grad_norm": 0.1671636551618576, "learning_rate": 5.7501829017755564e-05, "loss": 0.8901, "step": 27190 }, { "epoch": 0.8673745974042539, "grad_norm": 0.1682850569486618, "learning_rate": 5.7261067925649635e-05, "loss": 0.8859, "step": 27200 }, { "epoch": 0.8676934851238879, "grad_norm": 0.1720380336046219, "learning_rate": 5.7021314904147045e-05, "loss": 0.8873, "step": 27210 }, { "epoch": 0.8680123728435218, "grad_norm": 0.16618755459785461, "learning_rate": 5.678256573243984e-05, "loss": 0.8861, "step": 27220 }, { "epoch": 0.8683312605631557, "grad_norm": 0.16924364864826202, "learning_rate": 5.6544816207392587e-05, "loss": 0.8817, "step": 27230 }, { "epoch": 0.8686501482827896, "grad_norm": 0.17504048347473145, "learning_rate": 5.630806214346851e-05, "loss": 0.8845, "step": 27240 }, { "epoch": 0.8689690360024236, "grad_norm": 0.17469248175621033, "learning_rate": 5.6072299372655695e-05, "loss": 0.8939, "step": 27250 }, { "epoch": 0.8692879237220574, "grad_norm": 0.17130273580551147, "learning_rate": 5.58375237443938e-05, "loss": 0.8864, "step": 27260 }, { "epoch": 0.8696068114416914, "grad_norm": 0.16825518012046814, "learning_rate": 5.5603731125500924e-05, "loss": 0.8862, "step": 27270 }, { "epoch": 0.8699256991613253, "grad_norm": 0.16963137686252594, "learning_rate": 5.537091740010087e-05, "loss": 0.8836, "step": 27280 }, { "epoch": 0.8702445868809592, "grad_norm": 0.16663995385169983, "learning_rate": 5.513907846955069e-05, "loss": 0.889, "step": 27290 }, { "epoch": 0.8705634746005931, "grad_norm": 0.17626075446605682, "learning_rate": 5.490821025236851e-05, "loss": 0.8988, "step": 27300 }, { "epoch": 0.8708823623202271, "grad_norm": 0.17636112868785858, "learning_rate": 5.467830868416169e-05, "loss": 0.9077, "step": 27310 }, { "epoch": 0.8712012500398609, "grad_norm": 0.16531747579574585, "learning_rate": 5.444936971755526e-05, "loss": 0.8831, "step": 27320 }, { "epoch": 0.8715201377594949, "grad_norm": 0.16421984136104584, "learning_rate": 5.42213893221207e-05, "loss": 0.8964, "step": 27330 }, { "epoch": 0.8718390254791288, "grad_norm": 0.16629725694656372, "learning_rate": 5.399436348430491e-05, "loss": 0.898, "step": 27340 }, { "epoch": 0.8721579131987627, "grad_norm": 0.16994720697402954, "learning_rate": 5.3768288207359675e-05, "loss": 0.8807, "step": 27350 }, { "epoch": 0.8724768009183966, "grad_norm": 0.16791296005249023, "learning_rate": 5.354315951127119e-05, "loss": 0.8975, "step": 27360 }, { "epoch": 0.8727956886380306, "grad_norm": 0.16906745731830597, "learning_rate": 5.331897343269001e-05, "loss": 0.8791, "step": 27370 }, { "epoch": 0.8731145763576644, "grad_norm": 0.16941961646080017, "learning_rate": 5.309572602486132e-05, "loss": 0.8884, "step": 27380 }, { "epoch": 0.8734334640772984, "grad_norm": 0.1706288754940033, "learning_rate": 5.287341335755546e-05, "loss": 0.8908, "step": 27390 }, { "epoch": 0.8737523517969323, "grad_norm": 0.17212943732738495, "learning_rate": 5.265203151699865e-05, "loss": 0.8844, "step": 27400 }, { "epoch": 0.8740712395165662, "grad_norm": 0.169947549700737, "learning_rate": 5.243157660580418e-05, "loss": 0.8755, "step": 27410 }, { "epoch": 0.8743901272362001, "grad_norm": 0.16829125583171844, "learning_rate": 5.221204474290376e-05, "loss": 0.8802, "step": 27420 }, { "epoch": 0.8747090149558341, "grad_norm": 0.16881629824638367, "learning_rate": 5.199343206347918e-05, "loss": 0.8895, "step": 27430 }, { "epoch": 0.8750279026754679, "grad_norm": 0.17132100462913513, "learning_rate": 5.1775734718894304e-05, "loss": 0.8841, "step": 27440 }, { "epoch": 0.8753467903951019, "grad_norm": 0.16899465024471283, "learning_rate": 5.155894887662728e-05, "loss": 0.8875, "step": 27450 }, { "epoch": 0.8756656781147358, "grad_norm": 0.16898700594902039, "learning_rate": 5.134307072020311e-05, "loss": 0.8845, "step": 27460 }, { "epoch": 0.8759845658343697, "grad_norm": 0.16705811023712158, "learning_rate": 5.1128096449126425e-05, "loss": 0.8938, "step": 27470 }, { "epoch": 0.8763034535540036, "grad_norm": 0.1732359230518341, "learning_rate": 5.091402227881458e-05, "loss": 0.8851, "step": 27480 }, { "epoch": 0.8766223412736376, "grad_norm": 0.16625288128852844, "learning_rate": 5.070084444053106e-05, "loss": 0.8773, "step": 27490 }, { "epoch": 0.8769412289932714, "grad_norm": 0.1704675406217575, "learning_rate": 5.0488559181319116e-05, "loss": 0.8947, "step": 27500 }, { "epoch": 0.8772601167129054, "grad_norm": 0.1694217324256897, "learning_rate": 5.027716276393563e-05, "loss": 0.8796, "step": 27510 }, { "epoch": 0.8775790044325393, "grad_norm": 0.1724761426448822, "learning_rate": 5.00666514667855e-05, "loss": 0.8954, "step": 27520 }, { "epoch": 0.8778978921521732, "grad_norm": 0.17092296481132507, "learning_rate": 4.9857021583855885e-05, "loss": 0.8679, "step": 27530 }, { "epoch": 0.8782167798718071, "grad_norm": 0.16874085366725922, "learning_rate": 4.9648269424651144e-05, "loss": 0.8892, "step": 27540 }, { "epoch": 0.8785356675914411, "grad_norm": 0.1716938316822052, "learning_rate": 4.9440391314127776e-05, "loss": 0.8908, "step": 27550 }, { "epoch": 0.8788545553110749, "grad_norm": 0.16745954751968384, "learning_rate": 4.9233383592629755e-05, "loss": 0.8868, "step": 27560 }, { "epoch": 0.8791734430307089, "grad_norm": 0.16559866070747375, "learning_rate": 4.90272426158241e-05, "loss": 0.8863, "step": 27570 }, { "epoch": 0.8794923307503428, "grad_norm": 0.17054715752601624, "learning_rate": 4.88219647546367e-05, "loss": 0.8916, "step": 27580 }, { "epoch": 0.8798112184699767, "grad_norm": 0.16883596777915955, "learning_rate": 4.861754639518844e-05, "loss": 0.8855, "step": 27590 }, { "epoch": 0.8801301061896106, "grad_norm": 0.17326515913009644, "learning_rate": 4.841398393873158e-05, "loss": 0.8869, "step": 27600 }, { "epoch": 0.8804489939092446, "grad_norm": 0.16945582628250122, "learning_rate": 4.821127380158639e-05, "loss": 0.8828, "step": 27610 }, { "epoch": 0.8807678816288784, "grad_norm": 0.17122694849967957, "learning_rate": 4.800941241507807e-05, "loss": 0.8858, "step": 27620 }, { "epoch": 0.8810867693485124, "grad_norm": 0.1702810525894165, "learning_rate": 4.7808396225473926e-05, "loss": 0.888, "step": 27630 }, { "epoch": 0.8814056570681463, "grad_norm": 0.1752856969833374, "learning_rate": 4.760822169392078e-05, "loss": 0.8836, "step": 27640 }, { "epoch": 0.8817245447877802, "grad_norm": 0.17062492668628693, "learning_rate": 4.74088852963827e-05, "loss": 0.8794, "step": 27650 }, { "epoch": 0.8820434325074141, "grad_norm": 0.17067866027355194, "learning_rate": 4.7210383523578954e-05, "loss": 0.8948, "step": 27660 }, { "epoch": 0.8823623202270481, "grad_norm": 0.17074808478355408, "learning_rate": 4.701271288092223e-05, "loss": 0.8772, "step": 27670 }, { "epoch": 0.8826812079466819, "grad_norm": 0.17069657146930695, "learning_rate": 4.681586988845707e-05, "loss": 0.8873, "step": 27680 }, { "epoch": 0.8830000956663159, "grad_norm": 0.17014001309871674, "learning_rate": 4.661985108079873e-05, "loss": 0.906, "step": 27690 }, { "epoch": 0.8833189833859498, "grad_norm": 0.1751696914434433, "learning_rate": 4.6424653007072e-05, "loss": 0.8868, "step": 27700 }, { "epoch": 0.8836378711055837, "grad_norm": 0.17144422233104706, "learning_rate": 4.62302722308506e-05, "loss": 0.8726, "step": 27710 }, { "epoch": 0.8839567588252176, "grad_norm": 0.16718114912509918, "learning_rate": 4.603670533009658e-05, "loss": 0.88, "step": 27720 }, { "epoch": 0.8842756465448516, "grad_norm": 0.16777946054935455, "learning_rate": 4.5843948897100144e-05, "loss": 0.8843, "step": 27730 }, { "epoch": 0.8845945342644854, "grad_norm": 0.1685672551393509, "learning_rate": 4.565199953841964e-05, "loss": 0.8636, "step": 27740 }, { "epoch": 0.8849134219841194, "grad_norm": 0.16482628881931305, "learning_rate": 4.5460853874821776e-05, "loss": 0.8987, "step": 27750 }, { "epoch": 0.8852323097037533, "grad_norm": 0.1724732220172882, "learning_rate": 4.52705085412222e-05, "loss": 0.8861, "step": 27760 }, { "epoch": 0.8855511974233872, "grad_norm": 0.16705913841724396, "learning_rate": 4.50809601866262e-05, "loss": 0.8876, "step": 27770 }, { "epoch": 0.8858700851430211, "grad_norm": 0.1683725118637085, "learning_rate": 4.4892205474069755e-05, "loss": 0.8732, "step": 27780 }, { "epoch": 0.8861889728626551, "grad_norm": 0.16657312214374542, "learning_rate": 4.4704241080560745e-05, "loss": 0.879, "step": 27790 }, { "epoch": 0.886507860582289, "grad_norm": 0.17320038378238678, "learning_rate": 4.4517063697020485e-05, "loss": 0.8927, "step": 27800 }, { "epoch": 0.8868267483019229, "grad_norm": 0.168224036693573, "learning_rate": 4.433067002822545e-05, "loss": 0.8843, "step": 27810 }, { "epoch": 0.8871456360215568, "grad_norm": 0.17128513753414154, "learning_rate": 4.414505679274928e-05, "loss": 0.8674, "step": 27820 }, { "epoch": 0.8874645237411908, "grad_norm": 0.1696639060974121, "learning_rate": 4.396022072290497e-05, "loss": 0.8786, "step": 27830 }, { "epoch": 0.8877834114608246, "grad_norm": 0.16941528022289276, "learning_rate": 4.3776158564687404e-05, "loss": 0.883, "step": 27840 }, { "epoch": 0.8881022991804586, "grad_norm": 0.17108596861362457, "learning_rate": 4.359286707771605e-05, "loss": 0.8974, "step": 27850 }, { "epoch": 0.8884211869000925, "grad_norm": 0.17025145888328552, "learning_rate": 4.341034303517787e-05, "loss": 0.8835, "step": 27860 }, { "epoch": 0.8887400746197264, "grad_norm": 0.1636083573102951, "learning_rate": 4.322858322377056e-05, "loss": 0.8814, "step": 27870 }, { "epoch": 0.8890589623393603, "grad_norm": 0.17729361355304718, "learning_rate": 4.3047584443645955e-05, "loss": 0.889, "step": 27880 }, { "epoch": 0.8893778500589943, "grad_norm": 0.17000319063663483, "learning_rate": 4.2867343508353714e-05, "loss": 0.8673, "step": 27890 }, { "epoch": 0.8896967377786281, "grad_norm": 0.16710565984249115, "learning_rate": 4.2687857244785214e-05, "loss": 0.8925, "step": 27900 }, { "epoch": 0.8900156254982621, "grad_norm": 0.16732816398143768, "learning_rate": 4.2509122493117696e-05, "loss": 0.891, "step": 27910 }, { "epoch": 0.890334513217896, "grad_norm": 0.1728038638830185, "learning_rate": 4.2331136106758614e-05, "loss": 0.8982, "step": 27920 }, { "epoch": 0.8906534009375299, "grad_norm": 0.17565219104290009, "learning_rate": 4.2153894952290276e-05, "loss": 0.8811, "step": 27930 }, { "epoch": 0.8909722886571638, "grad_norm": 0.16937175393104553, "learning_rate": 4.1977395909414635e-05, "loss": 0.8812, "step": 27940 }, { "epoch": 0.8912911763767978, "grad_norm": 0.17110460996627808, "learning_rate": 4.180163587089841e-05, "loss": 0.8875, "step": 27950 }, { "epoch": 0.8916100640964316, "grad_norm": 0.17079512774944305, "learning_rate": 4.162661174251835e-05, "loss": 0.893, "step": 27960 }, { "epoch": 0.8919289518160656, "grad_norm": 0.17112374305725098, "learning_rate": 4.1452320443006753e-05, "loss": 0.8969, "step": 27970 }, { "epoch": 0.8922478395356995, "grad_norm": 0.17021846771240234, "learning_rate": 4.1278758903997235e-05, "loss": 0.9055, "step": 27980 }, { "epoch": 0.8925667272553334, "grad_norm": 0.1693822592496872, "learning_rate": 4.110592406997074e-05, "loss": 0.881, "step": 27990 }, { "epoch": 0.8928856149749673, "grad_norm": 0.1631503701210022, "learning_rate": 4.0933812898201676e-05, "loss": 0.8829, "step": 28000 }, { "epoch": 0.8932045026946013, "grad_norm": 0.17165639996528625, "learning_rate": 4.0762422358704414e-05, "loss": 0.8823, "step": 28010 }, { "epoch": 0.8935233904142351, "grad_norm": 0.16423417627811432, "learning_rate": 4.059174943417997e-05, "loss": 0.8846, "step": 28020 }, { "epoch": 0.8938422781338691, "grad_norm": 0.1652715504169464, "learning_rate": 4.0421791119962764e-05, "loss": 0.8891, "step": 28030 }, { "epoch": 0.894161165853503, "grad_norm": 0.16647374629974365, "learning_rate": 4.0252544423967835e-05, "loss": 0.8773, "step": 28040 }, { "epoch": 0.8944800535731369, "grad_norm": 0.1734417974948883, "learning_rate": 4.008400636663814e-05, "loss": 0.8669, "step": 28050 }, { "epoch": 0.8947989412927708, "grad_norm": 0.17314991354942322, "learning_rate": 3.991617398089205e-05, "loss": 0.8803, "step": 28060 }, { "epoch": 0.8951178290124048, "grad_norm": 0.17280423641204834, "learning_rate": 3.974904431207121e-05, "loss": 0.8789, "step": 28070 }, { "epoch": 0.8954367167320386, "grad_norm": 0.1620936095714569, "learning_rate": 3.958261441788843e-05, "loss": 0.8818, "step": 28080 }, { "epoch": 0.8957556044516726, "grad_norm": 0.17496642470359802, "learning_rate": 3.941688136837593e-05, "loss": 0.8794, "step": 28090 }, { "epoch": 0.8960744921713065, "grad_norm": 0.1700027585029602, "learning_rate": 3.925184224583376e-05, "loss": 0.8868, "step": 28100 }, { "epoch": 0.8963933798909404, "grad_norm": 0.17198674380779266, "learning_rate": 3.908749414477842e-05, "loss": 0.875, "step": 28110 }, { "epoch": 0.8967122676105743, "grad_norm": 0.17227719724178314, "learning_rate": 3.8923834171891724e-05, "loss": 0.8971, "step": 28120 }, { "epoch": 0.8970311553302083, "grad_norm": 0.1696770191192627, "learning_rate": 3.876085944596985e-05, "loss": 0.8776, "step": 28130 }, { "epoch": 0.8973500430498421, "grad_norm": 0.17306533455848694, "learning_rate": 3.8598567097872634e-05, "loss": 0.9021, "step": 28140 }, { "epoch": 0.8976689307694761, "grad_norm": 0.1734350621700287, "learning_rate": 3.843695427047303e-05, "loss": 0.8853, "step": 28150 }, { "epoch": 0.89798781848911, "grad_norm": 0.1695108264684677, "learning_rate": 3.8276018118606846e-05, "loss": 0.8854, "step": 28160 }, { "epoch": 0.8983067062087439, "grad_norm": 0.17414987087249756, "learning_rate": 3.8115755809022624e-05, "loss": 0.8736, "step": 28170 }, { "epoch": 0.8986255939283778, "grad_norm": 0.17102502286434174, "learning_rate": 3.7956164520331766e-05, "loss": 0.8808, "step": 28180 }, { "epoch": 0.8989444816480118, "grad_norm": 0.1671244353055954, "learning_rate": 3.7797241442958946e-05, "loss": 0.8792, "step": 28190 }, { "epoch": 0.8992633693676456, "grad_norm": 0.16684648394584656, "learning_rate": 3.7638983779092486e-05, "loss": 0.8803, "step": 28200 }, { "epoch": 0.8995822570872796, "grad_norm": 0.16940031945705414, "learning_rate": 3.748138874263523e-05, "loss": 0.8861, "step": 28210 }, { "epoch": 0.8999011448069135, "grad_norm": 0.17539231479167938, "learning_rate": 3.732445355915546e-05, "loss": 0.8952, "step": 28220 }, { "epoch": 0.9002200325265474, "grad_norm": 0.1667565107345581, "learning_rate": 3.716817546583803e-05, "loss": 0.8709, "step": 28230 }, { "epoch": 0.9005389202461813, "grad_norm": 0.17119689285755157, "learning_rate": 3.7012551711435764e-05, "loss": 0.8894, "step": 28240 }, { "epoch": 0.9008578079658153, "grad_norm": 0.16709868609905243, "learning_rate": 3.6857579556221e-05, "loss": 0.8902, "step": 28250 }, { "epoch": 0.9011766956854491, "grad_norm": 0.16595764458179474, "learning_rate": 3.670325627193734e-05, "loss": 0.8678, "step": 28260 }, { "epoch": 0.9014955834050831, "grad_norm": 0.1742243766784668, "learning_rate": 3.654957914175167e-05, "loss": 0.889, "step": 28270 }, { "epoch": 0.901814471124717, "grad_norm": 0.16573461890220642, "learning_rate": 3.639654546020629e-05, "loss": 0.8755, "step": 28280 }, { "epoch": 0.9021333588443509, "grad_norm": 0.16762784123420715, "learning_rate": 3.6244152533171275e-05, "loss": 0.8882, "step": 28290 }, { "epoch": 0.9024522465639848, "grad_norm": 0.17437873780727386, "learning_rate": 3.609239767779709e-05, "loss": 0.9081, "step": 28300 }, { "epoch": 0.9027711342836188, "grad_norm": 0.1660221666097641, "learning_rate": 3.594127822246733e-05, "loss": 0.8893, "step": 28310 }, { "epoch": 0.9030900220032526, "grad_norm": 0.1724633127450943, "learning_rate": 3.579079150675168e-05, "loss": 0.8775, "step": 28320 }, { "epoch": 0.9034089097228866, "grad_norm": 0.16792918741703033, "learning_rate": 3.564093488135911e-05, "loss": 0.8816, "step": 28330 }, { "epoch": 0.9037277974425205, "grad_norm": 0.16467350721359253, "learning_rate": 3.54917057080912e-05, "loss": 0.8676, "step": 28340 }, { "epoch": 0.9040466851621544, "grad_norm": 0.17283858358860016, "learning_rate": 3.534310135979569e-05, "loss": 0.8891, "step": 28350 }, { "epoch": 0.9043655728817883, "grad_norm": 0.16493968665599823, "learning_rate": 3.51951192203203e-05, "loss": 0.886, "step": 28360 }, { "epoch": 0.9046844606014223, "grad_norm": 0.16881853342056274, "learning_rate": 3.504775668446659e-05, "loss": 0.8881, "step": 28370 }, { "epoch": 0.9050033483210561, "grad_norm": 0.17092576622962952, "learning_rate": 3.4901011157944126e-05, "loss": 0.8742, "step": 28380 }, { "epoch": 0.9053222360406901, "grad_norm": 0.16808760166168213, "learning_rate": 3.475488005732481e-05, "loss": 0.8764, "step": 28390 }, { "epoch": 0.905641123760324, "grad_norm": 0.17145703732967377, "learning_rate": 3.460936080999741e-05, "loss": 0.8797, "step": 28400 }, { "epoch": 0.9059600114799579, "grad_norm": 0.17452701926231384, "learning_rate": 3.4464450854122266e-05, "loss": 0.8841, "step": 28410 }, { "epoch": 0.9062788991995918, "grad_norm": 0.17020602524280548, "learning_rate": 3.4320147638586154e-05, "loss": 0.8796, "step": 28420 }, { "epoch": 0.9065977869192258, "grad_norm": 0.167311429977417, "learning_rate": 3.4176448622957427e-05, "loss": 0.9003, "step": 28430 }, { "epoch": 0.9069166746388596, "grad_norm": 0.17277421057224274, "learning_rate": 3.4033351277441255e-05, "loss": 0.8943, "step": 28440 }, { "epoch": 0.9072355623584936, "grad_norm": 0.16550636291503906, "learning_rate": 3.38908530828351e-05, "loss": 0.891, "step": 28450 }, { "epoch": 0.9075544500781275, "grad_norm": 0.17455977201461792, "learning_rate": 3.374895153048438e-05, "loss": 0.8742, "step": 28460 }, { "epoch": 0.9078733377977614, "grad_norm": 0.1689627766609192, "learning_rate": 3.360764412223827e-05, "loss": 0.8864, "step": 28470 }, { "epoch": 0.9081922255173953, "grad_norm": 0.16884039342403412, "learning_rate": 3.3466928370405766e-05, "loss": 0.8854, "step": 28480 }, { "epoch": 0.9085111132370293, "grad_norm": 0.17706649005413055, "learning_rate": 3.332680179771184e-05, "loss": 0.8888, "step": 28490 }, { "epoch": 0.9088300009566631, "grad_norm": 0.17109733819961548, "learning_rate": 3.318726193725388e-05, "loss": 0.8859, "step": 28500 }, { "epoch": 0.9091488886762971, "grad_norm": 0.16820432245731354, "learning_rate": 3.304830633245822e-05, "loss": 0.8813, "step": 28510 }, { "epoch": 0.909467776395931, "grad_norm": 0.16424791514873505, "learning_rate": 3.290993253703689e-05, "loss": 0.8874, "step": 28520 }, { "epoch": 0.9097866641155649, "grad_norm": 0.1689070761203766, "learning_rate": 3.277213811494463e-05, "loss": 0.8583, "step": 28530 }, { "epoch": 0.9101055518351988, "grad_norm": 0.1675632745027542, "learning_rate": 3.263492064033587e-05, "loss": 0.8841, "step": 28540 }, { "epoch": 0.9104244395548328, "grad_norm": 0.16971516609191895, "learning_rate": 3.249827769752215e-05, "loss": 0.8744, "step": 28550 }, { "epoch": 0.9107433272744666, "grad_norm": 0.17137958109378815, "learning_rate": 3.2362206880929494e-05, "loss": 0.8926, "step": 28560 }, { "epoch": 0.9110622149941006, "grad_norm": 0.16915251314640045, "learning_rate": 3.222670579505612e-05, "loss": 0.8867, "step": 28570 }, { "epoch": 0.9113811027137345, "grad_norm": 0.1734655350446701, "learning_rate": 3.2091772054430256e-05, "loss": 0.8892, "step": 28580 }, { "epoch": 0.9116999904333684, "grad_norm": 0.17015081644058228, "learning_rate": 3.195740328356815e-05, "loss": 0.893, "step": 28590 }, { "epoch": 0.9120188781530023, "grad_norm": 0.17893768846988678, "learning_rate": 3.182359711693219e-05, "loss": 0.899, "step": 28600 }, { "epoch": 0.9123377658726363, "grad_norm": 0.17800496518611908, "learning_rate": 3.169035119888938e-05, "loss": 0.8932, "step": 28610 }, { "epoch": 0.9126566535922701, "grad_norm": 0.167316734790802, "learning_rate": 3.155766318366973e-05, "loss": 0.8821, "step": 28620 }, { "epoch": 0.9129755413119041, "grad_norm": 0.16948860883712769, "learning_rate": 3.142553073532508e-05, "loss": 0.8715, "step": 28630 }, { "epoch": 0.913294429031538, "grad_norm": 0.16441291570663452, "learning_rate": 3.129395152768789e-05, "loss": 0.8813, "step": 28640 }, { "epoch": 0.913613316751172, "grad_norm": 0.17731501162052155, "learning_rate": 3.116292324433033e-05, "loss": 0.8748, "step": 28650 }, { "epoch": 0.9139322044708058, "grad_norm": 0.1751125305891037, "learning_rate": 3.103244357852349e-05, "loss": 0.8809, "step": 28660 }, { "epoch": 0.9142510921904398, "grad_norm": 0.17110154032707214, "learning_rate": 3.090251023319679e-05, "loss": 0.8851, "step": 28670 }, { "epoch": 0.9145699799100737, "grad_norm": 0.17188861966133118, "learning_rate": 3.0773120920897486e-05, "loss": 0.8927, "step": 28680 }, { "epoch": 0.9148888676297076, "grad_norm": 0.16816741228103638, "learning_rate": 3.064427336375045e-05, "loss": 0.8914, "step": 28690 }, { "epoch": 0.9152077553493415, "grad_norm": 0.1723993867635727, "learning_rate": 3.0515965293418096e-05, "loss": 0.8826, "step": 28700 }, { "epoch": 0.9155266430689755, "grad_norm": 0.17036281526088715, "learning_rate": 3.038819445106034e-05, "loss": 0.8818, "step": 28710 }, { "epoch": 0.9158455307886093, "grad_norm": 0.17272977530956268, "learning_rate": 3.0260958587294935e-05, "loss": 0.8868, "step": 28720 }, { "epoch": 0.9161644185082433, "grad_norm": 0.16877420246601105, "learning_rate": 3.013425546215782e-05, "loss": 0.899, "step": 28730 }, { "epoch": 0.9164833062278772, "grad_norm": 0.17606976628303528, "learning_rate": 3.0008082845063706e-05, "loss": 0.9043, "step": 28740 }, { "epoch": 0.9168021939475111, "grad_norm": 0.1669413298368454, "learning_rate": 2.9882438514766802e-05, "loss": 0.8902, "step": 28750 }, { "epoch": 0.917121081667145, "grad_norm": 0.1702776402235031, "learning_rate": 2.9757320259321707e-05, "loss": 0.8857, "step": 28760 }, { "epoch": 0.917439969386779, "grad_norm": 0.17731963098049164, "learning_rate": 2.963272587604447e-05, "loss": 0.8812, "step": 28770 }, { "epoch": 0.9177588571064128, "grad_norm": 0.17650321125984192, "learning_rate": 2.9508653171473813e-05, "loss": 0.8824, "step": 28780 }, { "epoch": 0.9180777448260468, "grad_norm": 0.1683824211359024, "learning_rate": 2.9385099961332523e-05, "loss": 0.8816, "step": 28790 }, { "epoch": 0.9183966325456807, "grad_norm": 0.16492652893066406, "learning_rate": 2.926206407048898e-05, "loss": 0.8801, "step": 28800 }, { "epoch": 0.9187155202653146, "grad_norm": 0.171652689576149, "learning_rate": 2.913954333291889e-05, "loss": 0.878, "step": 28810 }, { "epoch": 0.9190344079849485, "grad_norm": 0.17132626473903656, "learning_rate": 2.901753559166712e-05, "loss": 0.8644, "step": 28820 }, { "epoch": 0.9193532957045825, "grad_norm": 0.16985774040222168, "learning_rate": 2.8896038698809757e-05, "loss": 0.876, "step": 28830 }, { "epoch": 0.9196721834242163, "grad_norm": 0.16665631532669067, "learning_rate": 2.8775050515416274e-05, "loss": 0.8824, "step": 28840 }, { "epoch": 0.9199910711438503, "grad_norm": 0.16637495160102844, "learning_rate": 2.865456891151188e-05, "loss": 0.8828, "step": 28850 }, { "epoch": 0.9203099588634842, "grad_norm": 0.17059068381786346, "learning_rate": 2.8534591766040024e-05, "loss": 0.8874, "step": 28860 }, { "epoch": 0.920628846583118, "grad_norm": 0.17028462886810303, "learning_rate": 2.8415116966825072e-05, "loss": 0.8691, "step": 28870 }, { "epoch": 0.920947734302752, "grad_norm": 0.17274853587150574, "learning_rate": 2.8296142410535074e-05, "loss": 0.8799, "step": 28880 }, { "epoch": 0.921266622022386, "grad_norm": 0.17280860245227814, "learning_rate": 2.8177666002644782e-05, "loss": 0.8886, "step": 28890 }, { "epoch": 0.9215855097420198, "grad_norm": 0.17315393686294556, "learning_rate": 2.8059685657398748e-05, "loss": 0.8912, "step": 28900 }, { "epoch": 0.9219043974616538, "grad_norm": 0.1712070256471634, "learning_rate": 2.7942199297774628e-05, "loss": 0.8801, "step": 28910 }, { "epoch": 0.9222232851812877, "grad_norm": 0.1668015420436859, "learning_rate": 2.7825204855446604e-05, "loss": 0.8856, "step": 28920 }, { "epoch": 0.9225421729009216, "grad_norm": 0.17411622405052185, "learning_rate": 2.7708700270748953e-05, "loss": 0.8919, "step": 28930 }, { "epoch": 0.9228610606205555, "grad_norm": 0.1741609424352646, "learning_rate": 2.759268349263983e-05, "loss": 0.8824, "step": 28940 }, { "epoch": 0.9231799483401895, "grad_norm": 0.16620464622974396, "learning_rate": 2.7477152478665126e-05, "loss": 0.8814, "step": 28950 }, { "epoch": 0.9234988360598233, "grad_norm": 0.1636183261871338, "learning_rate": 2.736210519492252e-05, "loss": 0.8813, "step": 28960 }, { "epoch": 0.9238177237794573, "grad_norm": 0.17570005357265472, "learning_rate": 2.7247539616025663e-05, "loss": 0.8769, "step": 28970 }, { "epoch": 0.9241366114990912, "grad_norm": 0.17470939457416534, "learning_rate": 2.7133453725068553e-05, "loss": 0.8717, "step": 28980 }, { "epoch": 0.924455499218725, "grad_norm": 0.1725778877735138, "learning_rate": 2.701984551358999e-05, "loss": 0.8794, "step": 28990 }, { "epoch": 0.924774386938359, "grad_norm": 0.17439262568950653, "learning_rate": 2.6906712981538236e-05, "loss": 0.871, "step": 29000 }, { "epoch": 0.925093274657993, "grad_norm": 0.17004485428333282, "learning_rate": 2.6794054137235806e-05, "loss": 0.8829, "step": 29010 }, { "epoch": 0.9254121623776268, "grad_norm": 0.16694343090057373, "learning_rate": 2.668186699734439e-05, "loss": 0.8817, "step": 29020 }, { "epoch": 0.9257310500972608, "grad_norm": 0.16977915167808533, "learning_rate": 2.657014958682998e-05, "loss": 0.8775, "step": 29030 }, { "epoch": 0.9260499378168947, "grad_norm": 0.16832669079303741, "learning_rate": 2.6458899938928035e-05, "loss": 0.8709, "step": 29040 }, { "epoch": 0.9263688255365286, "grad_norm": 0.17108400166034698, "learning_rate": 2.634811609510889e-05, "loss": 0.8769, "step": 29050 }, { "epoch": 0.9266877132561625, "grad_norm": 0.16579735279083252, "learning_rate": 2.6237796105043294e-05, "loss": 0.8902, "step": 29060 }, { "epoch": 0.9270066009757965, "grad_norm": 0.17090383172035217, "learning_rate": 2.6127938026568044e-05, "loss": 0.8801, "step": 29070 }, { "epoch": 0.9273254886954303, "grad_norm": 0.16820332407951355, "learning_rate": 2.6018539925651806e-05, "loss": 0.8622, "step": 29080 }, { "epoch": 0.9276443764150643, "grad_norm": 0.16625086963176727, "learning_rate": 2.5909599876361067e-05, "loss": 0.8704, "step": 29090 }, { "epoch": 0.9279632641346982, "grad_norm": 0.16523200273513794, "learning_rate": 2.5801115960826234e-05, "loss": 0.8886, "step": 29100 }, { "epoch": 0.928282151854332, "grad_norm": 0.17791950702667236, "learning_rate": 2.5693086269207865e-05, "loss": 0.9059, "step": 29110 }, { "epoch": 0.928601039573966, "grad_norm": 0.17191381752490997, "learning_rate": 2.5585508899663037e-05, "loss": 0.8945, "step": 29120 }, { "epoch": 0.9289199272936, "grad_norm": 0.16965806484222412, "learning_rate": 2.5478381958311885e-05, "loss": 0.8772, "step": 29130 }, { "epoch": 0.9292388150132338, "grad_norm": 0.17314006388187408, "learning_rate": 2.5371703559204248e-05, "loss": 0.8806, "step": 29140 }, { "epoch": 0.9295577027328678, "grad_norm": 0.16447074711322784, "learning_rate": 2.526547182428646e-05, "loss": 0.8664, "step": 29150 }, { "epoch": 0.9298765904525017, "grad_norm": 0.1670355200767517, "learning_rate": 2.5159684883368308e-05, "loss": 0.8647, "step": 29160 }, { "epoch": 0.9301954781721355, "grad_norm": 0.17165569961071014, "learning_rate": 2.505434087409009e-05, "loss": 0.887, "step": 29170 }, { "epoch": 0.9305143658917695, "grad_norm": 0.1715584695339203, "learning_rate": 2.4949437941889832e-05, "loss": 0.8798, "step": 29180 }, { "epoch": 0.9308332536114035, "grad_norm": 0.16674351692199707, "learning_rate": 2.4844974239970637e-05, "loss": 0.8571, "step": 29190 }, { "epoch": 0.9311521413310373, "grad_norm": 0.17173631489276886, "learning_rate": 2.4740947929268202e-05, "loss": 0.8847, "step": 29200 }, { "epoch": 0.9314710290506713, "grad_norm": 0.16858477890491486, "learning_rate": 2.4637357178418395e-05, "loss": 0.8984, "step": 29210 }, { "epoch": 0.9317899167703052, "grad_norm": 0.1788715422153473, "learning_rate": 2.4534200163725028e-05, "loss": 0.8812, "step": 29220 }, { "epoch": 0.932108804489939, "grad_norm": 0.16826432943344116, "learning_rate": 2.443147506912777e-05, "loss": 0.8722, "step": 29230 }, { "epoch": 0.932427692209573, "grad_norm": 0.17240197956562042, "learning_rate": 2.4329180086170172e-05, "loss": 0.873, "step": 29240 }, { "epoch": 0.932746579929207, "grad_norm": 0.17108172178268433, "learning_rate": 2.4227313413967803e-05, "loss": 0.8774, "step": 29250 }, { "epoch": 0.9330654676488408, "grad_norm": 0.17302483320236206, "learning_rate": 2.412587325917658e-05, "loss": 0.8797, "step": 29260 }, { "epoch": 0.9333843553684747, "grad_norm": 0.17489032447338104, "learning_rate": 2.4024857835961166e-05, "loss": 0.8738, "step": 29270 }, { "epoch": 0.9337032430881087, "grad_norm": 0.1717793345451355, "learning_rate": 2.392426536596356e-05, "loss": 0.8737, "step": 29280 }, { "epoch": 0.9340221308077425, "grad_norm": 0.1737363338470459, "learning_rate": 2.3824094078271775e-05, "loss": 0.8741, "step": 29290 }, { "epoch": 0.9343410185273765, "grad_norm": 0.167171448469162, "learning_rate": 2.3724342209388646e-05, "loss": 0.8704, "step": 29300 }, { "epoch": 0.9346599062470105, "grad_norm": 0.17019569873809814, "learning_rate": 2.3625008003200812e-05, "loss": 0.8873, "step": 29310 }, { "epoch": 0.9349787939666443, "grad_norm": 0.16639681160449982, "learning_rate": 2.352608971094778e-05, "loss": 0.8801, "step": 29320 }, { "epoch": 0.9352976816862782, "grad_norm": 0.16961655020713806, "learning_rate": 2.3427585591191153e-05, "loss": 0.8958, "step": 29330 }, { "epoch": 0.9356165694059122, "grad_norm": 0.16953252255916595, "learning_rate": 2.3329493909783962e-05, "loss": 0.8683, "step": 29340 }, { "epoch": 0.935935457125546, "grad_norm": 0.17069625854492188, "learning_rate": 2.3231812939840138e-05, "loss": 0.8698, "step": 29350 }, { "epoch": 0.93625434484518, "grad_norm": 0.17173562943935394, "learning_rate": 2.3134540961704098e-05, "loss": 0.8596, "step": 29360 }, { "epoch": 0.936573232564814, "grad_norm": 0.17173653841018677, "learning_rate": 2.3037676262920523e-05, "loss": 0.8856, "step": 29370 }, { "epoch": 0.9368921202844478, "grad_norm": 0.16764011979103088, "learning_rate": 2.2941217138204138e-05, "loss": 0.8833, "step": 29380 }, { "epoch": 0.9372110080040817, "grad_norm": 0.17342248558998108, "learning_rate": 2.2845161889409744e-05, "loss": 0.8802, "step": 29390 }, { "epoch": 0.9375298957237157, "grad_norm": 0.16736873984336853, "learning_rate": 2.2749508825502283e-05, "loss": 0.8853, "step": 29400 }, { "epoch": 0.9378487834433495, "grad_norm": 0.16693644225597382, "learning_rate": 2.2654256262527105e-05, "loss": 0.8825, "step": 29410 }, { "epoch": 0.9381676711629835, "grad_norm": 0.17182965576648712, "learning_rate": 2.2559402523580303e-05, "loss": 0.8868, "step": 29420 }, { "epoch": 0.9384865588826174, "grad_norm": 0.17486432194709778, "learning_rate": 2.2464945938779194e-05, "loss": 0.8742, "step": 29430 }, { "epoch": 0.9388054466022513, "grad_norm": 0.17658363282680511, "learning_rate": 2.2370884845232913e-05, "loss": 0.8701, "step": 29440 }, { "epoch": 0.9391243343218852, "grad_norm": 0.16929444670677185, "learning_rate": 2.2277217587013156e-05, "loss": 0.8716, "step": 29450 }, { "epoch": 0.9394432220415192, "grad_norm": 0.1668825000524521, "learning_rate": 2.2183942515125016e-05, "loss": 0.8824, "step": 29460 }, { "epoch": 0.9397621097611532, "grad_norm": 0.16626104712486267, "learning_rate": 2.2091057987477944e-05, "loss": 0.8784, "step": 29470 }, { "epoch": 0.940080997480787, "grad_norm": 0.16839087009429932, "learning_rate": 2.1998562368856864e-05, "loss": 0.8781, "step": 29480 }, { "epoch": 0.940399885200421, "grad_norm": 0.1720423549413681, "learning_rate": 2.190645403089337e-05, "loss": 0.8778, "step": 29490 }, { "epoch": 0.9407187729200549, "grad_norm": 0.17319394648075104, "learning_rate": 2.181473135203705e-05, "loss": 0.8832, "step": 29500 }, { "epoch": 0.9410376606396887, "grad_norm": 0.16802969574928284, "learning_rate": 2.172339271752697e-05, "loss": 0.8736, "step": 29510 }, { "epoch": 0.9413565483593227, "grad_norm": 0.17481163144111633, "learning_rate": 2.1632436519363203e-05, "loss": 0.8721, "step": 29520 }, { "epoch": 0.9416754360789567, "grad_norm": 0.1747370958328247, "learning_rate": 2.1541861156278552e-05, "loss": 0.8597, "step": 29530 }, { "epoch": 0.9419943237985905, "grad_norm": 0.17296218872070312, "learning_rate": 2.145166503371037e-05, "loss": 0.8858, "step": 29540 }, { "epoch": 0.9423132115182244, "grad_norm": 0.1767471730709076, "learning_rate": 2.1361846563772446e-05, "loss": 0.8844, "step": 29550 }, { "epoch": 0.9426320992378584, "grad_norm": 0.16958440840244293, "learning_rate": 2.127240416522709e-05, "loss": 0.8722, "step": 29560 }, { "epoch": 0.9429509869574922, "grad_norm": 0.17238831520080566, "learning_rate": 2.1183336263457258e-05, "loss": 0.8849, "step": 29570 }, { "epoch": 0.9432698746771262, "grad_norm": 0.17094658315181732, "learning_rate": 2.109464129043888e-05, "loss": 0.8672, "step": 29580 }, { "epoch": 0.9435887623967602, "grad_norm": 0.17316597700119019, "learning_rate": 2.100631768471321e-05, "loss": 0.8698, "step": 29590 }, { "epoch": 0.943907650116394, "grad_norm": 0.17091426253318787, "learning_rate": 2.0918363891359365e-05, "loss": 0.8863, "step": 29600 }, { "epoch": 0.944226537836028, "grad_norm": 0.17296843230724335, "learning_rate": 2.0830778361966933e-05, "loss": 0.8859, "step": 29610 }, { "epoch": 0.9445454255556619, "grad_norm": 0.16335801780223846, "learning_rate": 2.0743559554608734e-05, "loss": 0.8889, "step": 29620 }, { "epoch": 0.9448643132752957, "grad_norm": 0.17202697694301605, "learning_rate": 2.0656705933813646e-05, "loss": 0.8884, "step": 29630 }, { "epoch": 0.9451832009949297, "grad_norm": 0.1737438142299652, "learning_rate": 2.05702159705396e-05, "loss": 0.8747, "step": 29640 }, { "epoch": 0.9455020887145636, "grad_norm": 0.16886982321739197, "learning_rate": 2.0484088142146646e-05, "loss": 0.8689, "step": 29650 }, { "epoch": 0.9458209764341975, "grad_norm": 0.1703483760356903, "learning_rate": 2.039832093237016e-05, "loss": 0.8752, "step": 29660 }, { "epoch": 0.9461398641538314, "grad_norm": 0.17107826471328735, "learning_rate": 2.0312912831294133e-05, "loss": 0.8907, "step": 29670 }, { "epoch": 0.9464587518734654, "grad_norm": 0.17108580470085144, "learning_rate": 2.022786233532461e-05, "loss": 0.8825, "step": 29680 }, { "epoch": 0.9467776395930992, "grad_norm": 0.17031945288181305, "learning_rate": 2.014316794716319e-05, "loss": 0.8757, "step": 29690 }, { "epoch": 0.9470965273127332, "grad_norm": 0.1722126007080078, "learning_rate": 2.0058828175780694e-05, "loss": 0.8735, "step": 29700 }, { "epoch": 0.9474154150323671, "grad_norm": 0.16917821764945984, "learning_rate": 1.9974841536390925e-05, "loss": 0.8595, "step": 29710 }, { "epoch": 0.947734302752001, "grad_norm": 0.1698334962129593, "learning_rate": 1.989120655042449e-05, "loss": 0.8678, "step": 29720 }, { "epoch": 0.9480531904716349, "grad_norm": 0.17627465724945068, "learning_rate": 1.9807921745502785e-05, "loss": 0.8899, "step": 29730 }, { "epoch": 0.9483720781912689, "grad_norm": 0.17068272829055786, "learning_rate": 1.972498565541209e-05, "loss": 0.8645, "step": 29740 }, { "epoch": 0.9486909659109027, "grad_norm": 0.17030967772006989, "learning_rate": 1.964239682007775e-05, "loss": 0.8811, "step": 29750 }, { "epoch": 0.9490098536305367, "grad_norm": 0.1698002964258194, "learning_rate": 1.956015378553845e-05, "loss": 0.8813, "step": 29760 }, { "epoch": 0.9493287413501706, "grad_norm": 0.17307348549365997, "learning_rate": 1.947825510392065e-05, "loss": 0.8724, "step": 29770 }, { "epoch": 0.9496476290698045, "grad_norm": 0.1735437661409378, "learning_rate": 1.939669933341307e-05, "loss": 0.8746, "step": 29780 }, { "epoch": 0.9499665167894384, "grad_norm": 0.1740484982728958, "learning_rate": 1.9315485038241333e-05, "loss": 0.8795, "step": 29790 }, { "epoch": 0.9502854045090724, "grad_norm": 0.1707371026277542, "learning_rate": 1.9234610788642647e-05, "loss": 0.8771, "step": 29800 }, { "epoch": 0.9506042922287062, "grad_norm": 0.1706804782152176, "learning_rate": 1.9154075160840683e-05, "loss": 0.8634, "step": 29810 }, { "epoch": 0.9509231799483402, "grad_norm": 0.17211967706680298, "learning_rate": 1.907387673702047e-05, "loss": 0.8853, "step": 29820 }, { "epoch": 0.9512420676679741, "grad_norm": 0.16938042640686035, "learning_rate": 1.8994014105303468e-05, "loss": 0.8897, "step": 29830 }, { "epoch": 0.951560955387608, "grad_norm": 0.17498505115509033, "learning_rate": 1.8914485859722682e-05, "loss": 0.8851, "step": 29840 }, { "epoch": 0.9518798431072419, "grad_norm": 0.1684788018465042, "learning_rate": 1.8835290600197926e-05, "loss": 0.8822, "step": 29850 }, { "epoch": 0.9521987308268759, "grad_norm": 0.1729750782251358, "learning_rate": 1.8756426932511182e-05, "loss": 0.8712, "step": 29860 }, { "epoch": 0.9525176185465097, "grad_norm": 0.1740708351135254, "learning_rate": 1.8677893468282023e-05, "loss": 0.8855, "step": 29870 }, { "epoch": 0.9528365062661437, "grad_norm": 0.1664377897977829, "learning_rate": 1.8599688824943227e-05, "loss": 0.8728, "step": 29880 }, { "epoch": 0.9531553939857776, "grad_norm": 0.17212225496768951, "learning_rate": 1.852181162571638e-05, "loss": 0.8684, "step": 29890 }, { "epoch": 0.9534742817054115, "grad_norm": 0.17010776698589325, "learning_rate": 1.8444260499587665e-05, "loss": 0.8701, "step": 29900 }, { "epoch": 0.9537931694250454, "grad_norm": 0.1705661565065384, "learning_rate": 1.8367034081283716e-05, "loss": 0.8809, "step": 29910 }, { "epoch": 0.9541120571446794, "grad_norm": 0.1687018722295761, "learning_rate": 1.829013101124761e-05, "loss": 0.8812, "step": 29920 }, { "epoch": 0.9544309448643132, "grad_norm": 0.16721801459789276, "learning_rate": 1.8213549935614886e-05, "loss": 0.8739, "step": 29930 }, { "epoch": 0.9547498325839472, "grad_norm": 0.1781189888715744, "learning_rate": 1.8137289506189752e-05, "loss": 0.8778, "step": 29940 }, { "epoch": 0.9550687203035811, "grad_norm": 0.17269571125507355, "learning_rate": 1.806134838042133e-05, "loss": 0.9006, "step": 29950 }, { "epoch": 0.955387608023215, "grad_norm": 0.17672085762023926, "learning_rate": 1.7985725221380017e-05, "loss": 0.8752, "step": 29960 }, { "epoch": 0.9557064957428489, "grad_norm": 0.1679825484752655, "learning_rate": 1.7910418697733964e-05, "loss": 0.8853, "step": 29970 }, { "epoch": 0.9560253834624829, "grad_norm": 0.16887645423412323, "learning_rate": 1.7835427483725635e-05, "loss": 0.8723, "step": 29980 }, { "epoch": 0.9563442711821167, "grad_norm": 0.17646433413028717, "learning_rate": 1.7760750259148444e-05, "loss": 0.8949, "step": 29990 }, { "epoch": 0.9566631589017507, "grad_norm": 0.17680183053016663, "learning_rate": 1.7686385709323553e-05, "loss": 0.8886, "step": 30000 }, { "epoch": 0.9569820466213846, "grad_norm": 0.1752249002456665, "learning_rate": 1.7612332525076688e-05, "loss": 0.8928, "step": 30010 }, { "epoch": 0.9573009343410185, "grad_norm": 0.17468760907649994, "learning_rate": 1.7538589402715118e-05, "loss": 0.8648, "step": 30020 }, { "epoch": 0.9576198220606524, "grad_norm": 0.16802415251731873, "learning_rate": 1.7465155044004687e-05, "loss": 0.8745, "step": 30030 }, { "epoch": 0.9579387097802864, "grad_norm": 0.1705239713191986, "learning_rate": 1.7392028156146985e-05, "loss": 0.8756, "step": 30040 }, { "epoch": 0.9582575974999202, "grad_norm": 0.17179515957832336, "learning_rate": 1.7319207451756553e-05, "loss": 0.8656, "step": 30050 }, { "epoch": 0.9585764852195542, "grad_norm": 0.17518065869808197, "learning_rate": 1.724669164883824e-05, "loss": 0.8944, "step": 30060 }, { "epoch": 0.9588953729391881, "grad_norm": 0.1713200956583023, "learning_rate": 1.7174479470764624e-05, "loss": 0.8925, "step": 30070 }, { "epoch": 0.959214260658822, "grad_norm": 0.17048819363117218, "learning_rate": 1.7102569646253555e-05, "loss": 0.874, "step": 30080 }, { "epoch": 0.9595331483784559, "grad_norm": 0.1709250658750534, "learning_rate": 1.7030960909345757e-05, "loss": 0.8777, "step": 30090 }, { "epoch": 0.9598520360980899, "grad_norm": 0.1756034642457962, "learning_rate": 1.6959651999382535e-05, "loss": 0.8848, "step": 30100 }, { "epoch": 0.9601709238177237, "grad_norm": 0.171064093708992, "learning_rate": 1.688864166098361e-05, "loss": 0.8829, "step": 30110 }, { "epoch": 0.9604898115373577, "grad_norm": 0.1718042641878128, "learning_rate": 1.6817928644024988e-05, "loss": 0.8746, "step": 30120 }, { "epoch": 0.9608086992569916, "grad_norm": 0.16927538812160492, "learning_rate": 1.6747511703616973e-05, "loss": 0.8656, "step": 30130 }, { "epoch": 0.9611275869766255, "grad_norm": 0.17567414045333862, "learning_rate": 1.6677389600082237e-05, "loss": 0.8831, "step": 30140 }, { "epoch": 0.9614464746962594, "grad_norm": 0.16869021952152252, "learning_rate": 1.6607561098934e-05, "loss": 0.8792, "step": 30150 }, { "epoch": 0.9617653624158934, "grad_norm": 0.17464403808116913, "learning_rate": 1.6538024970854306e-05, "loss": 0.8719, "step": 30160 }, { "epoch": 0.9620842501355272, "grad_norm": 0.1718774139881134, "learning_rate": 1.646877999167236e-05, "loss": 0.8693, "step": 30170 }, { "epoch": 0.9624031378551612, "grad_norm": 0.16808542609214783, "learning_rate": 1.639982494234301e-05, "loss": 0.8805, "step": 30180 }, { "epoch": 0.9627220255747951, "grad_norm": 0.17283496260643005, "learning_rate": 1.633115860892524e-05, "loss": 0.8868, "step": 30190 }, { "epoch": 0.963040913294429, "grad_norm": 0.17048844695091248, "learning_rate": 1.6262779782560837e-05, "loss": 0.8721, "step": 30200 }, { "epoch": 0.9633598010140629, "grad_norm": 0.17138278484344482, "learning_rate": 1.6194687259453118e-05, "loss": 0.8829, "step": 30210 }, { "epoch": 0.9636786887336969, "grad_norm": 0.1714802086353302, "learning_rate": 1.6126879840845686e-05, "loss": 0.8881, "step": 30220 }, { "epoch": 0.9639975764533307, "grad_norm": 0.17534974217414856, "learning_rate": 1.6059356333001364e-05, "loss": 0.8867, "step": 30230 }, { "epoch": 0.9643164641729647, "grad_norm": 0.17431078851222992, "learning_rate": 1.599211554718118e-05, "loss": 0.8848, "step": 30240 }, { "epoch": 0.9646353518925986, "grad_norm": 0.16717074811458588, "learning_rate": 1.5925156299623422e-05, "loss": 0.8569, "step": 30250 }, { "epoch": 0.9649542396122325, "grad_norm": 0.17699192464351654, "learning_rate": 1.5858477411522817e-05, "loss": 0.8696, "step": 30260 }, { "epoch": 0.9652731273318664, "grad_norm": 0.1727645993232727, "learning_rate": 1.5792077709009763e-05, "loss": 0.8755, "step": 30270 }, { "epoch": 0.9655920150515004, "grad_norm": 0.1745349019765854, "learning_rate": 1.5725956023129666e-05, "loss": 0.8632, "step": 30280 }, { "epoch": 0.9659109027711343, "grad_norm": 0.17108358442783356, "learning_rate": 1.566011118982237e-05, "loss": 0.8732, "step": 30290 }, { "epoch": 0.9662297904907682, "grad_norm": 0.17055270075798035, "learning_rate": 1.559454204990166e-05, "loss": 0.887, "step": 30300 }, { "epoch": 0.9665486782104021, "grad_norm": 0.17470329999923706, "learning_rate": 1.5529247449034844e-05, "loss": 0.8752, "step": 30310 }, { "epoch": 0.9668675659300361, "grad_norm": 0.17025306820869446, "learning_rate": 1.5464226237722444e-05, "loss": 0.878, "step": 30320 }, { "epoch": 0.9671864536496699, "grad_norm": 0.1691192239522934, "learning_rate": 1.5399477271277955e-05, "loss": 0.8757, "step": 30330 }, { "epoch": 0.9675053413693039, "grad_norm": 0.17150309681892395, "learning_rate": 1.5334999409807695e-05, "loss": 0.8994, "step": 30340 }, { "epoch": 0.9678242290889378, "grad_norm": 0.1817106157541275, "learning_rate": 1.5270791518190734e-05, "loss": 0.8946, "step": 30350 }, { "epoch": 0.9681431168085717, "grad_norm": 0.17197778820991516, "learning_rate": 1.5206852466058905e-05, "loss": 0.881, "step": 30360 }, { "epoch": 0.9684620045282056, "grad_norm": 0.17225344479084015, "learning_rate": 1.5143181127776916e-05, "loss": 0.8959, "step": 30370 }, { "epoch": 0.9687808922478396, "grad_norm": 0.1728435456752777, "learning_rate": 1.5079776382422542e-05, "loss": 0.8694, "step": 30380 }, { "epoch": 0.9690997799674734, "grad_norm": 0.17408286035060883, "learning_rate": 1.5016637113766854e-05, "loss": 0.8748, "step": 30390 }, { "epoch": 0.9694186676871074, "grad_norm": 0.16729173064231873, "learning_rate": 1.495376221025461e-05, "loss": 0.888, "step": 30400 }, { "epoch": 0.9697375554067413, "grad_norm": 0.17172005772590637, "learning_rate": 1.4891150564984654e-05, "loss": 0.8711, "step": 30410 }, { "epoch": 0.9700564431263752, "grad_norm": 0.17462344467639923, "learning_rate": 1.482880107569045e-05, "loss": 0.8854, "step": 30420 }, { "epoch": 0.9703753308460091, "grad_norm": 0.17063672840595245, "learning_rate": 1.4766712644720671e-05, "loss": 0.8837, "step": 30430 }, { "epoch": 0.9706942185656431, "grad_norm": 0.170479878783226, "learning_rate": 1.4704884179019873e-05, "loss": 0.8683, "step": 30440 }, { "epoch": 0.9710131062852769, "grad_norm": 0.165556862950325, "learning_rate": 1.464331459010925e-05, "loss": 0.8834, "step": 30450 }, { "epoch": 0.9713319940049109, "grad_norm": 0.16775330901145935, "learning_rate": 1.4582002794067476e-05, "loss": 0.8865, "step": 30460 }, { "epoch": 0.9716508817245448, "grad_norm": 0.16928617656230927, "learning_rate": 1.4520947711511627e-05, "loss": 0.8902, "step": 30470 }, { "epoch": 0.9719697694441787, "grad_norm": 0.17188507318496704, "learning_rate": 1.446014826757816e-05, "loss": 0.8896, "step": 30480 }, { "epoch": 0.9722886571638126, "grad_norm": 0.17368733882904053, "learning_rate": 1.4399603391904017e-05, "loss": 0.8858, "step": 30490 }, { "epoch": 0.9726075448834466, "grad_norm": 0.1663048416376114, "learning_rate": 1.4339312018607758e-05, "loss": 0.881, "step": 30500 }, { "epoch": 0.9729264326030804, "grad_norm": 0.17066152393817902, "learning_rate": 1.4279273086270809e-05, "loss": 0.8813, "step": 30510 }, { "epoch": 0.9732453203227144, "grad_norm": 0.17235307395458221, "learning_rate": 1.4219485537918775e-05, "loss": 0.8846, "step": 30520 }, { "epoch": 0.9735642080423483, "grad_norm": 0.17622828483581543, "learning_rate": 1.4159948321002827e-05, "loss": 0.8801, "step": 30530 }, { "epoch": 0.9738830957619822, "grad_norm": 0.17217357456684113, "learning_rate": 1.4100660387381168e-05, "loss": 0.8739, "step": 30540 }, { "epoch": 0.9742019834816161, "grad_norm": 0.1750701367855072, "learning_rate": 1.4041620693300609e-05, "loss": 0.887, "step": 30550 }, { "epoch": 0.9745208712012501, "grad_norm": 0.17289851605892181, "learning_rate": 1.3982828199378157e-05, "loss": 0.8878, "step": 30560 }, { "epoch": 0.9748397589208839, "grad_norm": 0.1716613620519638, "learning_rate": 1.3924281870582728e-05, "loss": 0.8808, "step": 30570 }, { "epoch": 0.9751586466405179, "grad_norm": 0.17046386003494263, "learning_rate": 1.3865980676216942e-05, "loss": 0.8796, "step": 30580 }, { "epoch": 0.9754775343601518, "grad_norm": 0.16988524794578552, "learning_rate": 1.3807923589898957e-05, "loss": 0.8868, "step": 30590 }, { "epoch": 0.9757964220797857, "grad_norm": 0.1751394271850586, "learning_rate": 1.3750109589544415e-05, "loss": 0.9025, "step": 30600 }, { "epoch": 0.9761153097994196, "grad_norm": 0.16379600763320923, "learning_rate": 1.3692537657348436e-05, "loss": 0.8785, "step": 30610 }, { "epoch": 0.9764341975190536, "grad_norm": 0.1647310107946396, "learning_rate": 1.3635206779767707e-05, "loss": 0.8775, "step": 30620 }, { "epoch": 0.9767530852386874, "grad_norm": 0.1679423302412033, "learning_rate": 1.3578115947502643e-05, "loss": 0.8741, "step": 30630 }, { "epoch": 0.9770719729583214, "grad_norm": 0.17554236948490143, "learning_rate": 1.3521264155479603e-05, "loss": 0.8844, "step": 30640 }, { "epoch": 0.9773908606779553, "grad_norm": 0.17117299139499664, "learning_rate": 1.3464650402833215e-05, "loss": 0.8864, "step": 30650 }, { "epoch": 0.9777097483975892, "grad_norm": 0.17434369027614594, "learning_rate": 1.3408273692888739e-05, "loss": 0.8729, "step": 30660 }, { "epoch": 0.9780286361172231, "grad_norm": 0.17379888892173767, "learning_rate": 1.3352133033144537e-05, "loss": 0.8756, "step": 30670 }, { "epoch": 0.9783475238368571, "grad_norm": 0.17442287504673004, "learning_rate": 1.3296227435254582e-05, "loss": 0.8775, "step": 30680 }, { "epoch": 0.9786664115564909, "grad_norm": 0.17200325429439545, "learning_rate": 1.3240555915011072e-05, "loss": 0.8863, "step": 30690 }, { "epoch": 0.9789852992761249, "grad_norm": 0.16841526329517365, "learning_rate": 1.31851174923271e-05, "loss": 0.8805, "step": 30700 }, { "epoch": 0.9793041869957588, "grad_norm": 0.16977955400943756, "learning_rate": 1.3129911191219392e-05, "loss": 0.889, "step": 30710 }, { "epoch": 0.9796230747153927, "grad_norm": 0.1685485690832138, "learning_rate": 1.307493603979115e-05, "loss": 0.8745, "step": 30720 }, { "epoch": 0.9799419624350266, "grad_norm": 0.16957922279834747, "learning_rate": 1.3020191070214908e-05, "loss": 0.8651, "step": 30730 }, { "epoch": 0.9802608501546606, "grad_norm": 0.16795560717582703, "learning_rate": 1.2965675318715508e-05, "loss": 0.8608, "step": 30740 }, { "epoch": 0.9805797378742944, "grad_norm": 0.17047218978405, "learning_rate": 1.2911387825553141e-05, "loss": 0.8793, "step": 30750 }, { "epoch": 0.9808986255939284, "grad_norm": 0.1741809844970703, "learning_rate": 1.2857327635006445e-05, "loss": 0.8855, "step": 30760 }, { "epoch": 0.9812175133135623, "grad_norm": 0.17339977622032166, "learning_rate": 1.2803493795355673e-05, "loss": 0.8727, "step": 30770 }, { "epoch": 0.9815364010331962, "grad_norm": 0.16938899457454681, "learning_rate": 1.2749885358865947e-05, "loss": 0.8811, "step": 30780 }, { "epoch": 0.9818552887528301, "grad_norm": 0.17674767971038818, "learning_rate": 1.269650138177057e-05, "loss": 0.8734, "step": 30790 }, { "epoch": 0.9821741764724641, "grad_norm": 0.17623662948608398, "learning_rate": 1.2643340924254416e-05, "loss": 0.869, "step": 30800 }, { "epoch": 0.9824930641920979, "grad_norm": 0.16907668113708496, "learning_rate": 1.2590403050437372e-05, "loss": 0.8666, "step": 30810 }, { "epoch": 0.9828119519117319, "grad_norm": 0.17110703885555267, "learning_rate": 1.2537686828357875e-05, "loss": 0.8691, "step": 30820 }, { "epoch": 0.9831308396313658, "grad_norm": 0.16781097650527954, "learning_rate": 1.2485191329956501e-05, "loss": 0.871, "step": 30830 }, { "epoch": 0.9834497273509997, "grad_norm": 0.16678723692893982, "learning_rate": 1.2432915631059624e-05, "loss": 0.8652, "step": 30840 }, { "epoch": 0.9837686150706336, "grad_norm": 0.17238172888755798, "learning_rate": 1.2380858811363149e-05, "loss": 0.8761, "step": 30850 }, { "epoch": 0.9840875027902676, "grad_norm": 0.1715608835220337, "learning_rate": 1.2329019954416307e-05, "loss": 0.8715, "step": 30860 }, { "epoch": 0.9844063905099014, "grad_norm": 0.16758796572685242, "learning_rate": 1.2277398147605525e-05, "loss": 0.8706, "step": 30870 }, { "epoch": 0.9847252782295354, "grad_norm": 0.17507082223892212, "learning_rate": 1.2225992482138354e-05, "loss": 0.8715, "step": 30880 }, { "epoch": 0.9850441659491693, "grad_norm": 0.17221605777740479, "learning_rate": 1.2174802053027486e-05, "loss": 0.8773, "step": 30890 }, { "epoch": 0.9853630536688032, "grad_norm": 0.1673084944486618, "learning_rate": 1.2123825959074799e-05, "loss": 0.8777, "step": 30900 }, { "epoch": 0.9856819413884371, "grad_norm": 0.17324191331863403, "learning_rate": 1.2073063302855502e-05, "loss": 0.8656, "step": 30910 }, { "epoch": 0.9860008291080711, "grad_norm": 0.1690828800201416, "learning_rate": 1.2022513190702337e-05, "loss": 0.879, "step": 30920 }, { "epoch": 0.9863197168277049, "grad_norm": 0.17445652186870575, "learning_rate": 1.1972174732689848e-05, "loss": 0.8715, "step": 30930 }, { "epoch": 0.9866386045473389, "grad_norm": 0.1705475151538849, "learning_rate": 1.1922047042618712e-05, "loss": 0.8645, "step": 30940 }, { "epoch": 0.9869574922669728, "grad_norm": 0.17571979761123657, "learning_rate": 1.1872129238000134e-05, "loss": 0.8752, "step": 30950 }, { "epoch": 0.9872763799866067, "grad_norm": 0.17660577595233917, "learning_rate": 1.1822420440040315e-05, "loss": 0.8826, "step": 30960 }, { "epoch": 0.9875952677062406, "grad_norm": 0.1759461760520935, "learning_rate": 1.1772919773624978e-05, "loss": 0.8774, "step": 30970 }, { "epoch": 0.9879141554258746, "grad_norm": 0.17794868350028992, "learning_rate": 1.1723626367303966e-05, "loss": 0.8807, "step": 30980 }, { "epoch": 0.9882330431455084, "grad_norm": 0.1735115349292755, "learning_rate": 1.1674539353275899e-05, "loss": 0.8959, "step": 30990 }, { "epoch": 0.9885519308651424, "grad_norm": 0.16572922468185425, "learning_rate": 1.1625657867372895e-05, "loss": 0.8712, "step": 31000 }, { "epoch": 0.9888708185847763, "grad_norm": 0.17459288239479065, "learning_rate": 1.1576981049045353e-05, "loss": 0.9071, "step": 31010 }, { "epoch": 0.9891897063044102, "grad_norm": 0.17562910914421082, "learning_rate": 1.1528508041346812e-05, "loss": 0.875, "step": 31020 }, { "epoch": 0.9895085940240441, "grad_norm": 0.16303937137126923, "learning_rate": 1.1480237990918854e-05, "loss": 0.8776, "step": 31030 }, { "epoch": 0.9898274817436781, "grad_norm": 0.17524918913841248, "learning_rate": 1.1432170047976097e-05, "loss": 0.8894, "step": 31040 }, { "epoch": 0.9901463694633119, "grad_norm": 0.1762991100549698, "learning_rate": 1.1384303366291208e-05, "loss": 0.8783, "step": 31050 }, { "epoch": 0.9904652571829459, "grad_norm": 0.1695641428232193, "learning_rate": 1.1336637103180043e-05, "loss": 0.8692, "step": 31060 }, { "epoch": 0.9907841449025798, "grad_norm": 0.17893773317337036, "learning_rate": 1.1289170419486774e-05, "loss": 0.8776, "step": 31070 }, { "epoch": 0.9911030326222137, "grad_norm": 0.17522774636745453, "learning_rate": 1.1241902479569133e-05, "loss": 0.871, "step": 31080 }, { "epoch": 0.9914219203418476, "grad_norm": 0.17226657271385193, "learning_rate": 1.1194832451283707e-05, "loss": 0.8735, "step": 31090 }, { "epoch": 0.9917408080614816, "grad_norm": 0.17155729234218597, "learning_rate": 1.1147959505971274e-05, "loss": 0.877, "step": 31100 }, { "epoch": 0.9920596957811154, "grad_norm": 0.16582393646240234, "learning_rate": 1.110128281844223e-05, "loss": 0.859, "step": 31110 }, { "epoch": 0.9923785835007494, "grad_norm": 0.1682422012090683, "learning_rate": 1.1054801566962041e-05, "loss": 0.8771, "step": 31120 }, { "epoch": 0.9926974712203833, "grad_norm": 0.17062459886074066, "learning_rate": 1.1008514933236803e-05, "loss": 0.8733, "step": 31130 }, { "epoch": 0.9930163589400173, "grad_norm": 0.17315512895584106, "learning_rate": 1.096242210239881e-05, "loss": 0.8878, "step": 31140 }, { "epoch": 0.9933352466596511, "grad_norm": 0.1766100823879242, "learning_rate": 1.0916522262992225e-05, "loss": 0.8758, "step": 31150 }, { "epoch": 0.9936541343792851, "grad_norm": 0.17147080600261688, "learning_rate": 1.0870814606958793e-05, "loss": 0.8713, "step": 31160 }, { "epoch": 0.993973022098919, "grad_norm": 0.1682249754667282, "learning_rate": 1.0825298329623609e-05, "loss": 0.8689, "step": 31170 }, { "epoch": 0.9942919098185529, "grad_norm": 0.16981013119220734, "learning_rate": 1.0779972629680952e-05, "loss": 0.8667, "step": 31180 }, { "epoch": 0.9946107975381868, "grad_norm": 0.17520365118980408, "learning_rate": 1.0734836709180184e-05, "loss": 0.869, "step": 31190 }, { "epoch": 0.9949296852578208, "grad_norm": 0.17918860912322998, "learning_rate": 1.0689889773511703e-05, "loss": 0.8717, "step": 31200 }, { "epoch": 0.9952485729774546, "grad_norm": 0.17659881711006165, "learning_rate": 1.0645131031392937e-05, "loss": 0.894, "step": 31210 }, { "epoch": 0.9955674606970886, "grad_norm": 0.16800250113010406, "learning_rate": 1.060055969485445e-05, "loss": 0.8641, "step": 31220 }, { "epoch": 0.9958863484167225, "grad_norm": 0.16605263948440552, "learning_rate": 1.0556174979226025e-05, "loss": 0.8679, "step": 31230 }, { "epoch": 0.9962052361363564, "grad_norm": 0.1731094866991043, "learning_rate": 1.0511976103122883e-05, "loss": 0.8639, "step": 31240 }, { "epoch": 0.9965241238559903, "grad_norm": 0.17676420509815216, "learning_rate": 1.0467962288431913e-05, "loss": 0.8796, "step": 31250 }, { "epoch": 0.9968430115756243, "grad_norm": 0.1687876135110855, "learning_rate": 1.0424132760297974e-05, "loss": 0.8637, "step": 31260 }, { "epoch": 0.9971618992952581, "grad_norm": 0.17514613270759583, "learning_rate": 1.0380486747110261e-05, "loss": 0.8864, "step": 31270 }, { "epoch": 0.9974807870148921, "grad_norm": 0.17455033957958221, "learning_rate": 1.033702348048871e-05, "loss": 0.8689, "step": 31280 }, { "epoch": 0.997799674734526, "grad_norm": 0.16740122437477112, "learning_rate": 1.0293742195270484e-05, "loss": 0.8631, "step": 31290 }, { "epoch": 0.9981185624541599, "grad_norm": 0.17466361820697784, "learning_rate": 1.0250642129496489e-05, "loss": 0.8783, "step": 31300 }, { "epoch": 0.9984374501737938, "grad_norm": 0.16998808085918427, "learning_rate": 1.0207722524397968e-05, "loss": 0.8581, "step": 31310 }, { "epoch": 0.9987563378934278, "grad_norm": 0.16853420436382294, "learning_rate": 1.0164982624383143e-05, "loss": 0.8747, "step": 31320 }, { "epoch": 0.9990752256130616, "grad_norm": 0.17115938663482666, "learning_rate": 1.0122421677023911e-05, "loss": 0.8842, "step": 31330 }, { "epoch": 0.9993941133326956, "grad_norm": 0.16913767158985138, "learning_rate": 1.0080038933042593e-05, "loss": 0.8605, "step": 31340 }, { "epoch": 0.9997130010523295, "grad_norm": 0.17436672747135162, "learning_rate": 1.003783364629875e-05, "loss": 0.8836, "step": 31350 } ], "logging_steps": 10, "max_steps": 31359, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1294004204869434e+18, "train_batch_size": 512, "trial_name": null, "trial_params": null }