{ "best_metric": null, "best_model_checkpoint": null, "epoch": 28.999985968653974, "eval_steps": 500, "global_step": 1033400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 7.5e-05, "loss": 44.5033, "step": 500 }, { "epoch": 0.03, "learning_rate": 0.00015, "loss": 6.0308, "step": 1000 }, { "epoch": 0.04, "learning_rate": 0.000225, "loss": 5.869, "step": 1500 }, { "epoch": 0.06, "learning_rate": 0.0003, "loss": 5.7816, "step": 2000 }, { "epoch": 0.07, "learning_rate": 0.00029985942156660604, "loss": 5.7394, "step": 2500 }, { "epoch": 0.08, "learning_rate": 0.0002997188431332121, "loss": 5.4199, "step": 3000 }, { "epoch": 0.1, "learning_rate": 0.00029957826469981816, "loss": 4.4523, "step": 3500 }, { "epoch": 0.11, "learning_rate": 0.0002994376862664242, "loss": 3.9274, "step": 4000 }, { "epoch": 0.13, "learning_rate": 0.0002992971078330303, "loss": 3.6766, "step": 4500 }, { "epoch": 0.14, "learning_rate": 0.00029915652939963635, "loss": 3.4911, "step": 5000 }, { "epoch": 0.15, "learning_rate": 0.0002990159509662424, "loss": 3.3588, "step": 5500 }, { "epoch": 0.17, "learning_rate": 0.00029887537253284847, "loss": 3.288, "step": 6000 }, { "epoch": 0.18, "learning_rate": 0.00029873479409945453, "loss": 3.2493, "step": 6500 }, { "epoch": 0.2, "learning_rate": 0.0002985942156660606, "loss": 3.2086, "step": 7000 }, { "epoch": 0.21, "learning_rate": 0.00029845363723266665, "loss": 3.0969, "step": 7500 }, { "epoch": 0.22, "learning_rate": 0.0002983130587992727, "loss": 3.0552, "step": 8000 }, { "epoch": 0.24, "learning_rate": 0.0002981724803658788, "loss": 3.0411, "step": 8500 }, { "epoch": 0.25, "learning_rate": 0.00029803190193248484, "loss": 2.9908, "step": 9000 }, { "epoch": 0.27, "learning_rate": 0.0002978913234990909, "loss": 2.9927, "step": 9500 }, { "epoch": 0.28, "learning_rate": 0.00029775074506569696, "loss": 2.9911, "step": 10000 }, { "epoch": 0.29, "learning_rate": 0.000297610166632303, "loss": 2.9854, "step": 10500 }, { "epoch": 0.31, "learning_rate": 0.0002974695881989091, "loss": 2.9732, "step": 11000 }, { "epoch": 0.32, "learning_rate": 0.00029732900976551515, "loss": 2.9407, "step": 11500 }, { "epoch": 0.34, "learning_rate": 0.0002971884313321212, "loss": 2.9384, "step": 12000 }, { "epoch": 0.35, "learning_rate": 0.0002970478528987273, "loss": 2.9073, "step": 12500 }, { "epoch": 0.36, "learning_rate": 0.00029690727446533334, "loss": 2.9253, "step": 13000 }, { "epoch": 0.38, "learning_rate": 0.0002967666960319394, "loss": 2.8914, "step": 13500 }, { "epoch": 0.39, "learning_rate": 0.00029662611759854546, "loss": 2.8701, "step": 14000 }, { "epoch": 0.41, "learning_rate": 0.0002964855391651515, "loss": 2.8428, "step": 14500 }, { "epoch": 0.42, "learning_rate": 0.0002963449607317576, "loss": 2.8333, "step": 15000 }, { "epoch": 0.43, "learning_rate": 0.00029620438229836365, "loss": 2.8261, "step": 15500 }, { "epoch": 0.45, "learning_rate": 0.0002960638038649697, "loss": 2.8314, "step": 16000 }, { "epoch": 0.46, "learning_rate": 0.00029592322543157577, "loss": 2.8064, "step": 16500 }, { "epoch": 0.48, "learning_rate": 0.00029578264699818183, "loss": 2.8008, "step": 17000 }, { "epoch": 0.49, "learning_rate": 0.0002956420685647879, "loss": 2.7798, "step": 17500 }, { "epoch": 0.51, "learning_rate": 0.00029550149013139396, "loss": 2.7898, "step": 18000 }, { "epoch": 0.52, "learning_rate": 0.000295360911698, "loss": 2.7996, "step": 18500 }, { "epoch": 0.53, "learning_rate": 0.0002952203332646061, "loss": 2.7596, "step": 19000 }, { "epoch": 0.55, "learning_rate": 0.00029507975483121214, "loss": 2.7718, "step": 19500 }, { "epoch": 0.56, "learning_rate": 0.0002949391763978182, "loss": 2.774, "step": 20000 }, { "epoch": 0.58, "learning_rate": 0.00029479859796442427, "loss": 2.751, "step": 20500 }, { "epoch": 0.59, "learning_rate": 0.0002946580195310303, "loss": 2.7666, "step": 21000 }, { "epoch": 0.6, "learning_rate": 0.0002945174410976364, "loss": 2.7769, "step": 21500 }, { "epoch": 0.62, "learning_rate": 0.00029437686266424245, "loss": 2.7581, "step": 22000 }, { "epoch": 0.63, "learning_rate": 0.0002942362842308485, "loss": 2.7098, "step": 22500 }, { "epoch": 0.65, "learning_rate": 0.0002940957057974546, "loss": 2.7282, "step": 23000 }, { "epoch": 0.66, "learning_rate": 0.00029395512736406064, "loss": 2.7095, "step": 23500 }, { "epoch": 0.67, "learning_rate": 0.0002938145489306667, "loss": 2.7139, "step": 24000 }, { "epoch": 0.69, "learning_rate": 0.00029367397049727276, "loss": 2.6969, "step": 24500 }, { "epoch": 0.7, "learning_rate": 0.0002935333920638788, "loss": 2.705, "step": 25000 }, { "epoch": 0.72, "learning_rate": 0.0002933928136304849, "loss": 2.7324, "step": 25500 }, { "epoch": 0.73, "learning_rate": 0.00029325223519709095, "loss": 2.6955, "step": 26000 }, { "epoch": 0.74, "learning_rate": 0.000293111656763697, "loss": 2.6963, "step": 26500 }, { "epoch": 0.76, "learning_rate": 0.00029297107833030307, "loss": 2.678, "step": 27000 }, { "epoch": 0.77, "learning_rate": 0.00029283049989690913, "loss": 2.7222, "step": 27500 }, { "epoch": 0.79, "learning_rate": 0.0002926899214635152, "loss": 2.7119, "step": 28000 }, { "epoch": 0.8, "learning_rate": 0.00029254934303012126, "loss": 2.7088, "step": 28500 }, { "epoch": 0.81, "learning_rate": 0.0002924087645967273, "loss": 2.6917, "step": 29000 }, { "epoch": 0.83, "learning_rate": 0.0002922681861633334, "loss": 2.6737, "step": 29500 }, { "epoch": 0.84, "learning_rate": 0.00029212760772993944, "loss": 2.6373, "step": 30000 }, { "epoch": 0.86, "learning_rate": 0.0002919870292965455, "loss": 2.6861, "step": 30500 }, { "epoch": 0.87, "learning_rate": 0.00029184645086315157, "loss": 2.658, "step": 31000 }, { "epoch": 0.88, "learning_rate": 0.00029170587242975763, "loss": 2.6517, "step": 31500 }, { "epoch": 0.9, "learning_rate": 0.0002915652939963637, "loss": 2.6492, "step": 32000 }, { "epoch": 0.91, "learning_rate": 0.00029142471556296975, "loss": 2.6422, "step": 32500 }, { "epoch": 0.93, "learning_rate": 0.0002912841371295758, "loss": 2.6518, "step": 33000 }, { "epoch": 0.94, "learning_rate": 0.0002911435586961819, "loss": 2.6342, "step": 33500 }, { "epoch": 0.95, "learning_rate": 0.00029100298026278794, "loss": 2.6301, "step": 34000 }, { "epoch": 0.97, "learning_rate": 0.000290862401829394, "loss": 2.6546, "step": 34500 }, { "epoch": 0.98, "learning_rate": 0.00029072182339600006, "loss": 2.6329, "step": 35000 }, { "epoch": 1.0, "learning_rate": 0.0002905812449626061, "loss": 2.6626, "step": 35500 }, { "epoch": 1.01, "learning_rate": 0.0002904406665292122, "loss": 2.5699, "step": 36000 }, { "epoch": 1.02, "learning_rate": 0.00029030008809581825, "loss": 2.5145, "step": 36500 }, { "epoch": 1.04, "learning_rate": 0.0002901595096624243, "loss": 2.5691, "step": 37000 }, { "epoch": 1.05, "learning_rate": 0.00029001893122903037, "loss": 2.5582, "step": 37500 }, { "epoch": 1.07, "learning_rate": 0.00028987835279563643, "loss": 2.6044, "step": 38000 }, { "epoch": 1.08, "learning_rate": 0.0002897377743622425, "loss": 2.5418, "step": 38500 }, { "epoch": 1.09, "learning_rate": 0.00028959719592884856, "loss": 2.5534, "step": 39000 }, { "epoch": 1.11, "learning_rate": 0.0002894566174954546, "loss": 2.5921, "step": 39500 }, { "epoch": 1.12, "learning_rate": 0.0002893160390620607, "loss": 2.5474, "step": 40000 }, { "epoch": 1.14, "learning_rate": 0.00028917546062866674, "loss": 2.5525, "step": 40500 }, { "epoch": 1.15, "learning_rate": 0.00028903488219527275, "loss": 2.5519, "step": 41000 }, { "epoch": 1.16, "learning_rate": 0.00028889430376187887, "loss": 2.5802, "step": 41500 }, { "epoch": 1.18, "learning_rate": 0.00028875372532848493, "loss": 2.4925, "step": 42000 }, { "epoch": 1.19, "learning_rate": 0.000288613146895091, "loss": 2.5546, "step": 42500 }, { "epoch": 1.21, "learning_rate": 0.00028847256846169705, "loss": 2.513, "step": 43000 }, { "epoch": 1.22, "learning_rate": 0.0002883319900283031, "loss": 2.5301, "step": 43500 }, { "epoch": 1.23, "learning_rate": 0.0002881914115949092, "loss": 2.514, "step": 44000 }, { "epoch": 1.25, "learning_rate": 0.00028805083316151524, "loss": 2.5389, "step": 44500 }, { "epoch": 1.26, "learning_rate": 0.00028791025472812125, "loss": 2.5552, "step": 45000 }, { "epoch": 1.28, "learning_rate": 0.00028776967629472736, "loss": 2.5773, "step": 45500 }, { "epoch": 1.29, "learning_rate": 0.0002876290978613334, "loss": 2.5215, "step": 46000 }, { "epoch": 1.3, "learning_rate": 0.0002874885194279395, "loss": 2.5633, "step": 46500 }, { "epoch": 1.32, "learning_rate": 0.00028734794099454555, "loss": 2.5442, "step": 47000 }, { "epoch": 1.33, "learning_rate": 0.0002872073625611516, "loss": 2.5673, "step": 47500 }, { "epoch": 1.35, "learning_rate": 0.00028706678412775767, "loss": 2.5251, "step": 48000 }, { "epoch": 1.36, "learning_rate": 0.00028692620569436373, "loss": 2.4993, "step": 48500 }, { "epoch": 1.38, "learning_rate": 0.00028678562726096974, "loss": 2.5582, "step": 49000 }, { "epoch": 1.39, "learning_rate": 0.0002866450488275758, "loss": 2.5427, "step": 49500 }, { "epoch": 1.4, "learning_rate": 0.0002865044703941819, "loss": 2.5727, "step": 50000 }, { "epoch": 1.42, "learning_rate": 0.000286363891960788, "loss": 2.5369, "step": 50500 }, { "epoch": 1.43, "learning_rate": 0.00028622331352739404, "loss": 2.5114, "step": 51000 }, { "epoch": 1.45, "learning_rate": 0.0002860827350940001, "loss": 2.5438, "step": 51500 }, { "epoch": 1.46, "learning_rate": 0.00028594215666060617, "loss": 2.5043, "step": 52000 }, { "epoch": 1.47, "learning_rate": 0.00028580157822721223, "loss": 2.5161, "step": 52500 }, { "epoch": 1.49, "learning_rate": 0.00028566099979381824, "loss": 2.5474, "step": 53000 }, { "epoch": 1.5, "learning_rate": 0.0002855204213604243, "loss": 2.5154, "step": 53500 }, { "epoch": 1.52, "learning_rate": 0.0002853798429270304, "loss": 2.4809, "step": 54000 }, { "epoch": 1.53, "learning_rate": 0.0002852392644936365, "loss": 2.5301, "step": 54500 }, { "epoch": 1.54, "learning_rate": 0.00028509868606024254, "loss": 2.504, "step": 55000 }, { "epoch": 1.56, "learning_rate": 0.0002849581076268486, "loss": 2.482, "step": 55500 }, { "epoch": 1.57, "learning_rate": 0.00028481752919345466, "loss": 2.5116, "step": 56000 }, { "epoch": 1.59, "learning_rate": 0.0002846769507600607, "loss": 2.4682, "step": 56500 }, { "epoch": 1.6, "learning_rate": 0.00028453637232666673, "loss": 2.5098, "step": 57000 }, { "epoch": 1.61, "learning_rate": 0.0002843957938932728, "loss": 2.4829, "step": 57500 }, { "epoch": 1.63, "learning_rate": 0.00028425521545987886, "loss": 2.498, "step": 58000 }, { "epoch": 1.64, "learning_rate": 0.00028411463702648497, "loss": 2.4492, "step": 58500 }, { "epoch": 1.66, "learning_rate": 0.00028397405859309103, "loss": 2.4253, "step": 59000 }, { "epoch": 1.67, "learning_rate": 0.0002838334801596971, "loss": 2.4648, "step": 59500 }, { "epoch": 1.68, "learning_rate": 0.00028369290172630316, "loss": 2.4785, "step": 60000 }, { "epoch": 1.7, "learning_rate": 0.0002835523232929092, "loss": 2.5128, "step": 60500 }, { "epoch": 1.71, "learning_rate": 0.0002834117448595153, "loss": 2.4789, "step": 61000 }, { "epoch": 1.73, "learning_rate": 0.0002832711664261213, "loss": 2.4908, "step": 61500 }, { "epoch": 1.74, "learning_rate": 0.00028313058799272735, "loss": 2.4679, "step": 62000 }, { "epoch": 1.75, "learning_rate": 0.00028299000955933347, "loss": 2.4972, "step": 62500 }, { "epoch": 1.77, "learning_rate": 0.00028284943112593953, "loss": 2.4933, "step": 63000 }, { "epoch": 1.78, "learning_rate": 0.0002827088526925456, "loss": 2.427, "step": 63500 }, { "epoch": 1.8, "learning_rate": 0.00028256827425915165, "loss": 2.4344, "step": 64000 }, { "epoch": 1.81, "learning_rate": 0.0002824276958257577, "loss": 2.4604, "step": 64500 }, { "epoch": 1.82, "learning_rate": 0.0002822871173923638, "loss": 2.4777, "step": 65000 }, { "epoch": 1.84, "learning_rate": 0.0002821465389589698, "loss": 2.4681, "step": 65500 }, { "epoch": 1.85, "learning_rate": 0.00028200596052557585, "loss": 2.4243, "step": 66000 }, { "epoch": 1.87, "learning_rate": 0.0002818653820921819, "loss": 2.4423, "step": 66500 }, { "epoch": 1.88, "learning_rate": 0.000281724803658788, "loss": 2.4525, "step": 67000 }, { "epoch": 1.89, "learning_rate": 0.0002815842252253941, "loss": 2.4513, "step": 67500 }, { "epoch": 1.91, "learning_rate": 0.00028144364679200015, "loss": 2.4303, "step": 68000 }, { "epoch": 1.92, "learning_rate": 0.0002813030683586062, "loss": 2.4732, "step": 68500 }, { "epoch": 1.94, "learning_rate": 0.00028116248992521227, "loss": 2.4074, "step": 69000 }, { "epoch": 1.95, "learning_rate": 0.0002810219114918183, "loss": 2.4309, "step": 69500 }, { "epoch": 1.96, "learning_rate": 0.00028088133305842434, "loss": 2.4698, "step": 70000 }, { "epoch": 1.98, "learning_rate": 0.0002807407546250304, "loss": 2.4569, "step": 70500 }, { "epoch": 1.99, "learning_rate": 0.0002806001761916365, "loss": 2.4364, "step": 71000 }, { "epoch": 2.01, "learning_rate": 0.0002804595977582426, "loss": 2.4081, "step": 71500 }, { "epoch": 2.02, "learning_rate": 0.00028031901932484864, "loss": 2.3468, "step": 72000 }, { "epoch": 2.03, "learning_rate": 0.0002801784408914547, "loss": 2.3954, "step": 72500 }, { "epoch": 2.05, "learning_rate": 0.00028003786245806077, "loss": 2.3745, "step": 73000 }, { "epoch": 2.06, "learning_rate": 0.0002798972840246668, "loss": 2.399, "step": 73500 }, { "epoch": 2.08, "learning_rate": 0.00027975670559127284, "loss": 2.3572, "step": 74000 }, { "epoch": 2.09, "learning_rate": 0.0002796161271578789, "loss": 2.3931, "step": 74500 }, { "epoch": 2.1, "learning_rate": 0.00027947554872448496, "loss": 2.3707, "step": 75000 }, { "epoch": 2.12, "learning_rate": 0.0002793349702910911, "loss": 2.425, "step": 75500 }, { "epoch": 2.13, "learning_rate": 0.00027919439185769714, "loss": 2.3634, "step": 76000 }, { "epoch": 2.15, "learning_rate": 0.0002790538134243032, "loss": 2.3933, "step": 76500 }, { "epoch": 2.16, "learning_rate": 0.00027891323499090926, "loss": 2.356, "step": 77000 }, { "epoch": 2.17, "learning_rate": 0.00027877265655751527, "loss": 2.3819, "step": 77500 }, { "epoch": 2.19, "learning_rate": 0.00027863207812412133, "loss": 2.3687, "step": 78000 }, { "epoch": 2.2, "learning_rate": 0.0002784914996907274, "loss": 2.3515, "step": 78500 }, { "epoch": 2.22, "learning_rate": 0.00027835092125733346, "loss": 2.3663, "step": 79000 }, { "epoch": 2.23, "learning_rate": 0.0002782103428239396, "loss": 2.3916, "step": 79500 }, { "epoch": 2.25, "learning_rate": 0.00027806976439054563, "loss": 2.3404, "step": 80000 }, { "epoch": 2.26, "learning_rate": 0.0002779291859571517, "loss": 2.3482, "step": 80500 }, { "epoch": 2.27, "learning_rate": 0.00027778860752375776, "loss": 2.3524, "step": 81000 }, { "epoch": 2.29, "learning_rate": 0.00027764802909036377, "loss": 2.3759, "step": 81500 }, { "epoch": 2.3, "learning_rate": 0.00027750745065696983, "loss": 2.3498, "step": 82000 }, { "epoch": 2.32, "learning_rate": 0.0002773668722235759, "loss": 2.3667, "step": 82500 }, { "epoch": 2.33, "learning_rate": 0.00027722629379018195, "loss": 2.3777, "step": 83000 }, { "epoch": 2.34, "learning_rate": 0.000277085715356788, "loss": 2.3955, "step": 83500 }, { "epoch": 2.36, "learning_rate": 0.00027694513692339413, "loss": 2.3583, "step": 84000 }, { "epoch": 2.37, "learning_rate": 0.0002768045584900002, "loss": 2.376, "step": 84500 }, { "epoch": 2.39, "learning_rate": 0.00027666398005660625, "loss": 2.3855, "step": 85000 }, { "epoch": 2.4, "learning_rate": 0.00027652340162321226, "loss": 2.3843, "step": 85500 }, { "epoch": 2.41, "learning_rate": 0.0002763828231898183, "loss": 2.3652, "step": 86000 }, { "epoch": 2.43, "learning_rate": 0.0002762422447564244, "loss": 2.3697, "step": 86500 }, { "epoch": 2.44, "learning_rate": 0.00027610166632303045, "loss": 2.3478, "step": 87000 }, { "epoch": 2.46, "learning_rate": 0.0002759610878896365, "loss": 2.3332, "step": 87500 }, { "epoch": 2.47, "learning_rate": 0.0002758205094562426, "loss": 2.381, "step": 88000 }, { "epoch": 2.48, "learning_rate": 0.0002756799310228487, "loss": 2.3139, "step": 88500 }, { "epoch": 2.5, "learning_rate": 0.00027553935258945475, "loss": 2.3374, "step": 89000 }, { "epoch": 2.51, "learning_rate": 0.00027539877415606076, "loss": 2.3663, "step": 89500 }, { "epoch": 2.53, "learning_rate": 0.0002752581957226668, "loss": 2.3629, "step": 90000 }, { "epoch": 2.54, "learning_rate": 0.0002751176172892729, "loss": 2.3424, "step": 90500 }, { "epoch": 2.55, "learning_rate": 0.00027497703885587894, "loss": 2.3227, "step": 91000 }, { "epoch": 2.57, "learning_rate": 0.000274836460422485, "loss": 2.331, "step": 91500 }, { "epoch": 2.58, "learning_rate": 0.00027469588198909107, "loss": 2.3293, "step": 92000 }, { "epoch": 2.6, "learning_rate": 0.0002745553035556972, "loss": 2.343, "step": 92500 }, { "epoch": 2.61, "learning_rate": 0.00027441472512230325, "loss": 2.3556, "step": 93000 }, { "epoch": 2.62, "learning_rate": 0.0002742741466889093, "loss": 2.3014, "step": 93500 }, { "epoch": 2.64, "learning_rate": 0.0002741335682555153, "loss": 2.3458, "step": 94000 }, { "epoch": 2.65, "learning_rate": 0.0002739929898221214, "loss": 2.3415, "step": 94500 }, { "epoch": 2.67, "learning_rate": 0.00027385241138872744, "loss": 2.3327, "step": 95000 }, { "epoch": 2.68, "learning_rate": 0.0002737118329553335, "loss": 2.3377, "step": 95500 }, { "epoch": 2.69, "learning_rate": 0.00027357125452193956, "loss": 2.3452, "step": 96000 }, { "epoch": 2.71, "learning_rate": 0.0002734306760885457, "loss": 2.3191, "step": 96500 }, { "epoch": 2.72, "learning_rate": 0.00027329009765515174, "loss": 2.3126, "step": 97000 }, { "epoch": 2.74, "learning_rate": 0.0002731495192217578, "loss": 2.3646, "step": 97500 }, { "epoch": 2.75, "learning_rate": 0.0002730089407883638, "loss": 2.3326, "step": 98000 }, { "epoch": 2.76, "learning_rate": 0.00027286836235496987, "loss": 2.3596, "step": 98500 }, { "epoch": 2.78, "learning_rate": 0.00027272778392157593, "loss": 2.3274, "step": 99000 }, { "epoch": 2.79, "learning_rate": 0.000272587205488182, "loss": 2.2924, "step": 99500 }, { "epoch": 2.81, "learning_rate": 0.00027244662705478806, "loss": 2.3603, "step": 100000 }, { "epoch": 2.82, "learning_rate": 0.0002723060486213941, "loss": 2.354, "step": 100500 }, { "epoch": 2.83, "learning_rate": 0.00027216547018800024, "loss": 2.3229, "step": 101000 }, { "epoch": 2.85, "learning_rate": 0.0002720248917546063, "loss": 2.3265, "step": 101500 }, { "epoch": 2.86, "learning_rate": 0.0002718843133212123, "loss": 2.3141, "step": 102000 }, { "epoch": 2.88, "learning_rate": 0.00027174373488781837, "loss": 2.3071, "step": 102500 }, { "epoch": 2.89, "learning_rate": 0.00027160315645442443, "loss": 2.3051, "step": 103000 }, { "epoch": 2.9, "learning_rate": 0.0002714625780210305, "loss": 2.3332, "step": 103500 }, { "epoch": 2.92, "learning_rate": 0.00027132199958763655, "loss": 2.3254, "step": 104000 }, { "epoch": 2.93, "learning_rate": 0.0002711814211542426, "loss": 2.2945, "step": 104500 }, { "epoch": 2.95, "learning_rate": 0.00027104084272084873, "loss": 2.3055, "step": 105000 }, { "epoch": 2.96, "learning_rate": 0.0002709002642874548, "loss": 2.3189, "step": 105500 }, { "epoch": 2.97, "learning_rate": 0.0002707596858540608, "loss": 2.2937, "step": 106000 }, { "epoch": 2.99, "learning_rate": 0.00027061910742066686, "loss": 2.283, "step": 106500 }, { "epoch": 3.0, "learning_rate": 0.0002704785289872729, "loss": 2.3291, "step": 107000 }, { "epoch": 3.02, "learning_rate": 0.000270337950553879, "loss": 2.2292, "step": 107500 }, { "epoch": 3.03, "learning_rate": 0.00027019737212048505, "loss": 2.2265, "step": 108000 }, { "epoch": 3.04, "learning_rate": 0.0002700567936870911, "loss": 2.2228, "step": 108500 }, { "epoch": 3.06, "learning_rate": 0.00026991621525369717, "loss": 2.2762, "step": 109000 }, { "epoch": 3.07, "learning_rate": 0.0002697756368203033, "loss": 2.2504, "step": 109500 }, { "epoch": 3.09, "learning_rate": 0.0002696350583869093, "loss": 2.2539, "step": 110000 }, { "epoch": 3.1, "learning_rate": 0.00026949447995351536, "loss": 2.2537, "step": 110500 }, { "epoch": 3.11, "learning_rate": 0.0002693539015201214, "loss": 2.2348, "step": 111000 }, { "epoch": 3.13, "learning_rate": 0.0002692133230867275, "loss": 2.2565, "step": 111500 }, { "epoch": 3.14, "learning_rate": 0.00026907274465333354, "loss": 2.2512, "step": 112000 }, { "epoch": 3.16, "learning_rate": 0.0002689321662199396, "loss": 2.2837, "step": 112500 }, { "epoch": 3.17, "learning_rate": 0.00026879158778654567, "loss": 2.2672, "step": 113000 }, { "epoch": 3.19, "learning_rate": 0.0002686510093531518, "loss": 2.27, "step": 113500 }, { "epoch": 3.2, "learning_rate": 0.0002685104309197578, "loss": 2.2441, "step": 114000 }, { "epoch": 3.21, "learning_rate": 0.00026836985248636385, "loss": 2.2211, "step": 114500 }, { "epoch": 3.23, "learning_rate": 0.0002682292740529699, "loss": 2.2713, "step": 115000 }, { "epoch": 3.24, "learning_rate": 0.000268088695619576, "loss": 2.2502, "step": 115500 }, { "epoch": 3.26, "learning_rate": 0.00026794811718618204, "loss": 2.26, "step": 116000 }, { "epoch": 3.27, "learning_rate": 0.0002678075387527881, "loss": 2.2297, "step": 116500 }, { "epoch": 3.28, "learning_rate": 0.00026766696031939416, "loss": 2.2647, "step": 117000 }, { "epoch": 3.3, "learning_rate": 0.0002675263818860002, "loss": 2.2684, "step": 117500 }, { "epoch": 3.31, "learning_rate": 0.0002673858034526063, "loss": 2.2555, "step": 118000 }, { "epoch": 3.33, "learning_rate": 0.00026724522501921235, "loss": 2.2513, "step": 118500 }, { "epoch": 3.34, "learning_rate": 0.0002671046465858184, "loss": 2.2653, "step": 119000 }, { "epoch": 3.35, "learning_rate": 0.0002669640681524245, "loss": 2.2498, "step": 119500 }, { "epoch": 3.37, "learning_rate": 0.00026682348971903054, "loss": 2.2688, "step": 120000 }, { "epoch": 3.38, "learning_rate": 0.0002666829112856366, "loss": 2.2662, "step": 120500 }, { "epoch": 3.4, "learning_rate": 0.00026654233285224266, "loss": 2.2642, "step": 121000 }, { "epoch": 3.41, "learning_rate": 0.0002664017544188487, "loss": 2.2346, "step": 121500 }, { "epoch": 3.42, "learning_rate": 0.00026626117598545484, "loss": 2.2195, "step": 122000 }, { "epoch": 3.44, "learning_rate": 0.00026612059755206084, "loss": 2.2449, "step": 122500 }, { "epoch": 3.45, "learning_rate": 0.0002659800191186669, "loss": 2.2538, "step": 123000 }, { "epoch": 3.47, "learning_rate": 0.00026583944068527297, "loss": 2.2572, "step": 123500 }, { "epoch": 3.48, "learning_rate": 0.00026569886225187903, "loss": 2.2885, "step": 124000 }, { "epoch": 3.49, "learning_rate": 0.0002655582838184851, "loss": 2.2428, "step": 124500 }, { "epoch": 3.51, "learning_rate": 0.00026541770538509115, "loss": 2.2527, "step": 125000 }, { "epoch": 3.52, "learning_rate": 0.0002652771269516972, "loss": 2.2884, "step": 125500 }, { "epoch": 3.54, "learning_rate": 0.0002651365485183033, "loss": 2.2668, "step": 126000 }, { "epoch": 3.55, "learning_rate": 0.00026499597008490934, "loss": 2.2687, "step": 126500 }, { "epoch": 3.56, "learning_rate": 0.0002648553916515154, "loss": 2.2512, "step": 127000 }, { "epoch": 3.58, "learning_rate": 0.00026471481321812146, "loss": 2.2464, "step": 127500 }, { "epoch": 3.59, "learning_rate": 0.0002645742347847275, "loss": 2.2876, "step": 128000 }, { "epoch": 3.61, "learning_rate": 0.0002644336563513336, "loss": 2.232, "step": 128500 }, { "epoch": 3.62, "learning_rate": 0.00026429307791793965, "loss": 2.2532, "step": 129000 }, { "epoch": 3.63, "learning_rate": 0.0002641524994845457, "loss": 2.2571, "step": 129500 }, { "epoch": 3.65, "learning_rate": 0.0002640119210511518, "loss": 2.272, "step": 130000 }, { "epoch": 3.66, "learning_rate": 0.00026387134261775784, "loss": 2.2685, "step": 130500 }, { "epoch": 3.68, "learning_rate": 0.0002637307641843639, "loss": 2.2876, "step": 131000 }, { "epoch": 3.69, "learning_rate": 0.00026359018575096996, "loss": 2.2326, "step": 131500 }, { "epoch": 3.7, "learning_rate": 0.000263449607317576, "loss": 2.2249, "step": 132000 }, { "epoch": 3.72, "learning_rate": 0.0002633090288841821, "loss": 2.2476, "step": 132500 }, { "epoch": 3.73, "learning_rate": 0.00026316845045078815, "loss": 2.2363, "step": 133000 }, { "epoch": 3.75, "learning_rate": 0.0002630278720173942, "loss": 2.2405, "step": 133500 }, { "epoch": 3.76, "learning_rate": 0.00026288729358400027, "loss": 2.2412, "step": 134000 }, { "epoch": 3.77, "learning_rate": 0.00026274671515060633, "loss": 2.2385, "step": 134500 }, { "epoch": 3.79, "learning_rate": 0.0002626061367172124, "loss": 2.2333, "step": 135000 }, { "epoch": 3.8, "learning_rate": 0.00026246555828381846, "loss": 2.2556, "step": 135500 }, { "epoch": 3.82, "learning_rate": 0.0002623249798504245, "loss": 2.2539, "step": 136000 }, { "epoch": 3.83, "learning_rate": 0.0002621844014170306, "loss": 2.2738, "step": 136500 }, { "epoch": 3.84, "learning_rate": 0.00026204382298363664, "loss": 2.2561, "step": 137000 }, { "epoch": 3.86, "learning_rate": 0.0002619032445502427, "loss": 2.22, "step": 137500 }, { "epoch": 3.87, "learning_rate": 0.00026176266611684876, "loss": 2.2397, "step": 138000 }, { "epoch": 3.89, "learning_rate": 0.0002616220876834548, "loss": 2.2599, "step": 138500 }, { "epoch": 3.9, "learning_rate": 0.0002614815092500609, "loss": 2.2284, "step": 139000 }, { "epoch": 3.91, "learning_rate": 0.00026134093081666695, "loss": 2.248, "step": 139500 }, { "epoch": 3.93, "learning_rate": 0.000261200352383273, "loss": 2.2485, "step": 140000 }, { "epoch": 3.94, "learning_rate": 0.0002610597739498791, "loss": 2.28, "step": 140500 }, { "epoch": 3.96, "learning_rate": 0.00026091919551648514, "loss": 2.209, "step": 141000 }, { "epoch": 3.97, "learning_rate": 0.0002607786170830912, "loss": 2.2213, "step": 141500 }, { "epoch": 3.98, "learning_rate": 0.00026063803864969726, "loss": 2.2504, "step": 142000 }, { "epoch": 4.0, "learning_rate": 0.0002604974602163033, "loss": 2.2513, "step": 142500 }, { "epoch": 4.01, "learning_rate": 0.0002603568817829094, "loss": 2.1913, "step": 143000 }, { "epoch": 4.03, "learning_rate": 0.00026021630334951545, "loss": 2.1501, "step": 143500 }, { "epoch": 4.04, "learning_rate": 0.0002600757249161215, "loss": 2.1712, "step": 144000 }, { "epoch": 4.06, "learning_rate": 0.00025993514648272757, "loss": 2.1857, "step": 144500 }, { "epoch": 4.07, "learning_rate": 0.00025979456804933363, "loss": 2.173, "step": 145000 }, { "epoch": 4.08, "learning_rate": 0.0002596539896159397, "loss": 2.192, "step": 145500 }, { "epoch": 4.1, "learning_rate": 0.00025951341118254576, "loss": 2.1625, "step": 146000 }, { "epoch": 4.11, "learning_rate": 0.0002593728327491518, "loss": 2.1566, "step": 146500 }, { "epoch": 4.13, "learning_rate": 0.0002592322543157579, "loss": 2.2201, "step": 147000 }, { "epoch": 4.14, "learning_rate": 0.00025909167588236394, "loss": 2.1807, "step": 147500 }, { "epoch": 4.15, "learning_rate": 0.00025895109744897, "loss": 2.1862, "step": 148000 }, { "epoch": 4.17, "learning_rate": 0.00025881051901557607, "loss": 2.1699, "step": 148500 }, { "epoch": 4.18, "learning_rate": 0.00025866994058218213, "loss": 2.1755, "step": 149000 }, { "epoch": 4.2, "learning_rate": 0.0002585293621487882, "loss": 2.1475, "step": 149500 }, { "epoch": 4.21, "learning_rate": 0.00025838878371539425, "loss": 2.1974, "step": 150000 }, { "epoch": 4.22, "learning_rate": 0.0002582482052820003, "loss": 2.1115, "step": 150500 }, { "epoch": 4.24, "learning_rate": 0.0002581076268486064, "loss": 2.1923, "step": 151000 }, { "epoch": 4.25, "learning_rate": 0.00025796704841521244, "loss": 2.1547, "step": 151500 }, { "epoch": 4.27, "learning_rate": 0.0002578264699818185, "loss": 2.1803, "step": 152000 }, { "epoch": 4.28, "learning_rate": 0.00025768589154842456, "loss": 2.1682, "step": 152500 }, { "epoch": 4.29, "learning_rate": 0.0002575453131150306, "loss": 2.1428, "step": 153000 }, { "epoch": 4.31, "learning_rate": 0.0002574047346816367, "loss": 2.1664, "step": 153500 }, { "epoch": 4.32, "learning_rate": 0.00025726415624824275, "loss": 2.1683, "step": 154000 }, { "epoch": 4.34, "learning_rate": 0.0002571235778148488, "loss": 2.1786, "step": 154500 }, { "epoch": 4.35, "learning_rate": 0.00025698299938145487, "loss": 2.1604, "step": 155000 }, { "epoch": 4.36, "learning_rate": 0.00025684242094806093, "loss": 2.1749, "step": 155500 }, { "epoch": 4.38, "learning_rate": 0.000256701842514667, "loss": 2.162, "step": 156000 }, { "epoch": 4.39, "learning_rate": 0.00025656126408127306, "loss": 2.1696, "step": 156500 }, { "epoch": 4.41, "learning_rate": 0.0002564206856478791, "loss": 2.1784, "step": 157000 }, { "epoch": 4.42, "learning_rate": 0.0002562801072144852, "loss": 2.1277, "step": 157500 }, { "epoch": 4.43, "learning_rate": 0.00025613952878109124, "loss": 2.1739, "step": 158000 }, { "epoch": 4.45, "learning_rate": 0.0002559989503476973, "loss": 2.1694, "step": 158500 }, { "epoch": 4.46, "learning_rate": 0.00025585837191430337, "loss": 2.2062, "step": 159000 }, { "epoch": 4.48, "learning_rate": 0.00025571779348090943, "loss": 2.1801, "step": 159500 }, { "epoch": 4.49, "learning_rate": 0.0002555772150475155, "loss": 2.1527, "step": 160000 }, { "epoch": 4.5, "learning_rate": 0.00025543663661412155, "loss": 2.1805, "step": 160500 }, { "epoch": 4.52, "learning_rate": 0.0002552960581807276, "loss": 2.1429, "step": 161000 }, { "epoch": 4.53, "learning_rate": 0.0002551554797473337, "loss": 2.181, "step": 161500 }, { "epoch": 4.55, "learning_rate": 0.00025501490131393974, "loss": 2.2047, "step": 162000 }, { "epoch": 4.56, "learning_rate": 0.0002548743228805458, "loss": 2.1467, "step": 162500 }, { "epoch": 4.57, "learning_rate": 0.00025473374444715186, "loss": 2.1619, "step": 163000 }, { "epoch": 4.59, "learning_rate": 0.0002545931660137579, "loss": 2.2002, "step": 163500 }, { "epoch": 4.6, "learning_rate": 0.000254452587580364, "loss": 2.1779, "step": 164000 }, { "epoch": 4.62, "learning_rate": 0.00025431200914697005, "loss": 2.175, "step": 164500 }, { "epoch": 4.63, "learning_rate": 0.0002541714307135761, "loss": 2.1816, "step": 165000 }, { "epoch": 4.64, "learning_rate": 0.00025403085228018217, "loss": 2.1696, "step": 165500 }, { "epoch": 4.66, "learning_rate": 0.00025389027384678823, "loss": 2.2045, "step": 166000 }, { "epoch": 4.67, "learning_rate": 0.0002537496954133943, "loss": 2.1508, "step": 166500 }, { "epoch": 4.69, "learning_rate": 0.00025360911698000036, "loss": 2.1918, "step": 167000 }, { "epoch": 4.7, "learning_rate": 0.0002534685385466064, "loss": 2.1761, "step": 167500 }, { "epoch": 4.71, "learning_rate": 0.0002533279601132125, "loss": 2.155, "step": 168000 }, { "epoch": 4.73, "learning_rate": 0.00025318738167981854, "loss": 2.1483, "step": 168500 }, { "epoch": 4.74, "learning_rate": 0.0002530468032464246, "loss": 2.1618, "step": 169000 }, { "epoch": 4.76, "learning_rate": 0.00025290622481303067, "loss": 2.147, "step": 169500 }, { "epoch": 4.77, "learning_rate": 0.00025276564637963673, "loss": 2.1718, "step": 170000 }, { "epoch": 4.78, "learning_rate": 0.0002526250679462428, "loss": 2.1329, "step": 170500 }, { "epoch": 4.8, "learning_rate": 0.00025248448951284885, "loss": 2.1677, "step": 171000 }, { "epoch": 4.81, "learning_rate": 0.0002523439110794549, "loss": 2.1127, "step": 171500 }, { "epoch": 4.83, "learning_rate": 0.000252203332646061, "loss": 2.1832, "step": 172000 }, { "epoch": 4.84, "learning_rate": 0.00025206275421266704, "loss": 2.1843, "step": 172500 }, { "epoch": 4.85, "learning_rate": 0.0002519221757792731, "loss": 2.2132, "step": 173000 }, { "epoch": 4.87, "learning_rate": 0.00025178159734587916, "loss": 2.1789, "step": 173500 }, { "epoch": 4.88, "learning_rate": 0.0002516410189124852, "loss": 2.1333, "step": 174000 }, { "epoch": 4.9, "learning_rate": 0.0002515004404790913, "loss": 2.1702, "step": 174500 }, { "epoch": 4.91, "learning_rate": 0.00025135986204569735, "loss": 2.1817, "step": 175000 }, { "epoch": 4.93, "learning_rate": 0.0002512192836123034, "loss": 2.1101, "step": 175500 }, { "epoch": 4.94, "learning_rate": 0.00025107870517890947, "loss": 2.1573, "step": 176000 }, { "epoch": 4.95, "learning_rate": 0.00025093812674551553, "loss": 2.1691, "step": 176500 }, { "epoch": 4.97, "learning_rate": 0.0002507975483121216, "loss": 2.1822, "step": 177000 }, { "epoch": 4.98, "learning_rate": 0.00025065696987872766, "loss": 2.1869, "step": 177500 }, { "epoch": 5.0, "learning_rate": 0.0002505163914453337, "loss": 2.1731, "step": 178000 }, { "epoch": 5.01, "learning_rate": 0.0002503758130119398, "loss": 2.1219, "step": 178500 }, { "epoch": 5.02, "learning_rate": 0.00025023523457854584, "loss": 2.0762, "step": 179000 }, { "epoch": 5.04, "learning_rate": 0.0002500946561451519, "loss": 2.1061, "step": 179500 }, { "epoch": 5.05, "learning_rate": 0.00024995407771175797, "loss": 2.0847, "step": 180000 }, { "epoch": 5.07, "learning_rate": 0.00024981349927836403, "loss": 2.1121, "step": 180500 }, { "epoch": 5.08, "learning_rate": 0.0002496729208449701, "loss": 2.099, "step": 181000 }, { "epoch": 5.09, "learning_rate": 0.00024953234241157615, "loss": 2.1179, "step": 181500 }, { "epoch": 5.11, "learning_rate": 0.0002493917639781822, "loss": 2.0985, "step": 182000 }, { "epoch": 5.12, "learning_rate": 0.0002492511855447883, "loss": 2.1106, "step": 182500 }, { "epoch": 5.14, "learning_rate": 0.00024911060711139434, "loss": 2.0744, "step": 183000 }, { "epoch": 5.15, "learning_rate": 0.0002489700286780004, "loss": 2.0858, "step": 183500 }, { "epoch": 5.16, "learning_rate": 0.00024882945024460646, "loss": 2.0899, "step": 184000 }, { "epoch": 5.18, "learning_rate": 0.0002486888718112125, "loss": 2.0637, "step": 184500 }, { "epoch": 5.19, "learning_rate": 0.0002485482933778186, "loss": 2.1075, "step": 185000 }, { "epoch": 5.21, "learning_rate": 0.00024840771494442465, "loss": 2.1119, "step": 185500 }, { "epoch": 5.22, "learning_rate": 0.0002482671365110307, "loss": 2.0968, "step": 186000 }, { "epoch": 5.23, "learning_rate": 0.00024812655807763677, "loss": 2.1092, "step": 186500 }, { "epoch": 5.25, "learning_rate": 0.00024798597964424283, "loss": 2.0675, "step": 187000 }, { "epoch": 5.26, "learning_rate": 0.0002478454012108489, "loss": 2.0575, "step": 187500 }, { "epoch": 5.28, "learning_rate": 0.00024770482277745496, "loss": 2.0931, "step": 188000 }, { "epoch": 5.29, "learning_rate": 0.000247564244344061, "loss": 2.0929, "step": 188500 }, { "epoch": 5.3, "learning_rate": 0.0002474236659106671, "loss": 2.0896, "step": 189000 }, { "epoch": 5.32, "learning_rate": 0.00024728308747727314, "loss": 2.1169, "step": 189500 }, { "epoch": 5.33, "learning_rate": 0.0002471425090438792, "loss": 2.1009, "step": 190000 }, { "epoch": 5.35, "learning_rate": 0.00024700193061048527, "loss": 2.1346, "step": 190500 }, { "epoch": 5.36, "learning_rate": 0.00024686135217709133, "loss": 2.0952, "step": 191000 }, { "epoch": 5.37, "learning_rate": 0.00024672077374369734, "loss": 2.1042, "step": 191500 }, { "epoch": 5.39, "learning_rate": 0.00024658019531030345, "loss": 2.1056, "step": 192000 }, { "epoch": 5.4, "learning_rate": 0.0002464396168769095, "loss": 2.0961, "step": 192500 }, { "epoch": 5.42, "learning_rate": 0.0002462990384435156, "loss": 2.1284, "step": 193000 }, { "epoch": 5.43, "learning_rate": 0.00024615846001012164, "loss": 2.0954, "step": 193500 }, { "epoch": 5.44, "learning_rate": 0.0002460178815767277, "loss": 2.1349, "step": 194000 }, { "epoch": 5.46, "learning_rate": 0.00024587730314333376, "loss": 2.0941, "step": 194500 }, { "epoch": 5.47, "learning_rate": 0.0002457367247099398, "loss": 2.1228, "step": 195000 }, { "epoch": 5.49, "learning_rate": 0.00024559614627654583, "loss": 2.0905, "step": 195500 }, { "epoch": 5.5, "learning_rate": 0.00024545556784315195, "loss": 2.0905, "step": 196000 }, { "epoch": 5.51, "learning_rate": 0.000245314989409758, "loss": 2.1323, "step": 196500 }, { "epoch": 5.53, "learning_rate": 0.00024517441097636407, "loss": 2.1154, "step": 197000 }, { "epoch": 5.54, "learning_rate": 0.00024503383254297013, "loss": 2.1118, "step": 197500 }, { "epoch": 5.56, "learning_rate": 0.0002448932541095762, "loss": 2.1114, "step": 198000 }, { "epoch": 5.57, "learning_rate": 0.00024475267567618226, "loss": 2.1129, "step": 198500 }, { "epoch": 5.58, "learning_rate": 0.0002446120972427883, "loss": 2.1318, "step": 199000 }, { "epoch": 5.6, "learning_rate": 0.00024447151880939433, "loss": 2.081, "step": 199500 }, { "epoch": 5.61, "learning_rate": 0.0002443309403760004, "loss": 2.087, "step": 200000 }, { "epoch": 5.63, "learning_rate": 0.0002441903619426065, "loss": 2.1043, "step": 200500 }, { "epoch": 5.64, "learning_rate": 0.00024404978350921254, "loss": 2.1188, "step": 201000 }, { "epoch": 5.65, "learning_rate": 0.0002439092050758186, "loss": 2.1295, "step": 201500 }, { "epoch": 5.67, "learning_rate": 0.0002437686266424247, "loss": 2.0991, "step": 202000 }, { "epoch": 5.68, "learning_rate": 0.00024362804820903075, "loss": 2.0925, "step": 202500 }, { "epoch": 5.7, "learning_rate": 0.00024348746977563682, "loss": 2.0488, "step": 203000 }, { "epoch": 5.71, "learning_rate": 0.00024334689134224285, "loss": 2.0945, "step": 203500 }, { "epoch": 5.72, "learning_rate": 0.0002432063129088489, "loss": 2.1083, "step": 204000 }, { "epoch": 5.74, "learning_rate": 0.00024306573447545497, "loss": 2.1211, "step": 204500 }, { "epoch": 5.75, "learning_rate": 0.00024292515604206104, "loss": 2.1377, "step": 205000 }, { "epoch": 5.77, "learning_rate": 0.0002427845776086671, "loss": 2.1321, "step": 205500 }, { "epoch": 5.78, "learning_rate": 0.00024264399917527316, "loss": 2.1156, "step": 206000 }, { "epoch": 5.79, "learning_rate": 0.00024250342074187925, "loss": 2.0931, "step": 206500 }, { "epoch": 5.81, "learning_rate": 0.0002423628423084853, "loss": 2.1073, "step": 207000 }, { "epoch": 5.82, "learning_rate": 0.00024222226387509137, "loss": 2.1215, "step": 207500 }, { "epoch": 5.84, "learning_rate": 0.0002420816854416974, "loss": 2.1354, "step": 208000 }, { "epoch": 5.85, "learning_rate": 0.00024194110700830347, "loss": 2.0763, "step": 208500 }, { "epoch": 5.87, "learning_rate": 0.00024180052857490953, "loss": 2.1315, "step": 209000 }, { "epoch": 5.88, "learning_rate": 0.0002416599501415156, "loss": 2.1101, "step": 209500 }, { "epoch": 5.89, "learning_rate": 0.00024151937170812166, "loss": 2.0826, "step": 210000 }, { "epoch": 5.91, "learning_rate": 0.00024137879327472774, "loss": 2.1161, "step": 210500 }, { "epoch": 5.92, "learning_rate": 0.0002412382148413338, "loss": 2.1167, "step": 211000 }, { "epoch": 5.94, "learning_rate": 0.00024109763640793987, "loss": 2.1309, "step": 211500 }, { "epoch": 5.95, "learning_rate": 0.0002409570579745459, "loss": 2.0976, "step": 212000 }, { "epoch": 5.96, "learning_rate": 0.00024081647954115197, "loss": 2.1108, "step": 212500 }, { "epoch": 5.98, "learning_rate": 0.00024067590110775803, "loss": 2.1444, "step": 213000 }, { "epoch": 5.99, "learning_rate": 0.0002405353226743641, "loss": 2.0976, "step": 213500 }, { "epoch": 6.01, "learning_rate": 0.00024039474424097015, "loss": 2.0973, "step": 214000 }, { "epoch": 6.02, "learning_rate": 0.0002402541658075762, "loss": 2.0388, "step": 214500 }, { "epoch": 6.03, "learning_rate": 0.0002401135873741823, "loss": 2.0271, "step": 215000 }, { "epoch": 6.05, "learning_rate": 0.00023997300894078836, "loss": 2.0473, "step": 215500 }, { "epoch": 6.06, "learning_rate": 0.0002398324305073944, "loss": 2.0443, "step": 216000 }, { "epoch": 6.08, "learning_rate": 0.00023969185207400046, "loss": 2.0398, "step": 216500 }, { "epoch": 6.09, "learning_rate": 0.00023955127364060652, "loss": 2.049, "step": 217000 }, { "epoch": 6.1, "learning_rate": 0.00023941069520721258, "loss": 2.0455, "step": 217500 }, { "epoch": 6.12, "learning_rate": 0.00023927011677381865, "loss": 2.0773, "step": 218000 }, { "epoch": 6.13, "learning_rate": 0.0002391295383404247, "loss": 2.025, "step": 218500 }, { "epoch": 6.15, "learning_rate": 0.0002389889599070308, "loss": 2.0532, "step": 219000 }, { "epoch": 6.16, "learning_rate": 0.00023884838147363686, "loss": 2.0117, "step": 219500 }, { "epoch": 6.17, "learning_rate": 0.0002387078030402429, "loss": 2.0746, "step": 220000 }, { "epoch": 6.19, "learning_rate": 0.00023856722460684896, "loss": 2.1025, "step": 220500 }, { "epoch": 6.2, "learning_rate": 0.00023842664617345502, "loss": 2.0361, "step": 221000 }, { "epoch": 6.22, "learning_rate": 0.00023828606774006108, "loss": 2.0572, "step": 221500 }, { "epoch": 6.23, "learning_rate": 0.00023814548930666714, "loss": 2.0498, "step": 222000 }, { "epoch": 6.24, "learning_rate": 0.0002380049108732732, "loss": 2.0354, "step": 222500 }, { "epoch": 6.26, "learning_rate": 0.00023786433243987927, "loss": 2.0455, "step": 223000 }, { "epoch": 6.27, "learning_rate": 0.00023772375400648536, "loss": 2.0328, "step": 223500 }, { "epoch": 6.29, "learning_rate": 0.0002375831755730914, "loss": 2.0704, "step": 224000 }, { "epoch": 6.3, "learning_rate": 0.00023744259713969745, "loss": 2.065, "step": 224500 }, { "epoch": 6.31, "learning_rate": 0.00023730201870630351, "loss": 2.05, "step": 225000 }, { "epoch": 6.33, "learning_rate": 0.00023716144027290958, "loss": 2.0312, "step": 225500 }, { "epoch": 6.34, "learning_rate": 0.00023702086183951564, "loss": 2.0403, "step": 226000 }, { "epoch": 6.36, "learning_rate": 0.0002368802834061217, "loss": 2.0701, "step": 226500 }, { "epoch": 6.37, "learning_rate": 0.00023673970497272776, "loss": 2.0486, "step": 227000 }, { "epoch": 6.38, "learning_rate": 0.00023659912653933385, "loss": 2.049, "step": 227500 }, { "epoch": 6.4, "learning_rate": 0.00023645854810593986, "loss": 2.0493, "step": 228000 }, { "epoch": 6.41, "learning_rate": 0.00023631796967254595, "loss": 2.0892, "step": 228500 }, { "epoch": 6.43, "learning_rate": 0.000236177391239152, "loss": 2.0607, "step": 229000 }, { "epoch": 6.44, "learning_rate": 0.00023603681280575807, "loss": 2.0407, "step": 229500 }, { "epoch": 6.45, "learning_rate": 0.00023589623437236413, "loss": 2.0438, "step": 230000 }, { "epoch": 6.47, "learning_rate": 0.0002357556559389702, "loss": 2.0424, "step": 230500 }, { "epoch": 6.48, "learning_rate": 0.00023561507750557626, "loss": 2.0456, "step": 231000 }, { "epoch": 6.5, "learning_rate": 0.00023547449907218232, "loss": 2.0296, "step": 231500 }, { "epoch": 6.51, "learning_rate": 0.00023533392063878835, "loss": 2.0823, "step": 232000 }, { "epoch": 6.52, "learning_rate": 0.00023519334220539444, "loss": 2.0428, "step": 232500 }, { "epoch": 6.54, "learning_rate": 0.0002350527637720005, "loss": 2.0489, "step": 233000 }, { "epoch": 6.55, "learning_rate": 0.00023491218533860657, "loss": 2.0478, "step": 233500 }, { "epoch": 6.57, "learning_rate": 0.00023477160690521263, "loss": 2.0622, "step": 234000 }, { "epoch": 6.58, "learning_rate": 0.0002346310284718187, "loss": 2.0566, "step": 234500 }, { "epoch": 6.59, "learning_rate": 0.00023449045003842475, "loss": 2.0431, "step": 235000 }, { "epoch": 6.61, "learning_rate": 0.00023434987160503081, "loss": 2.0194, "step": 235500 }, { "epoch": 6.62, "learning_rate": 0.00023420929317163685, "loss": 1.9937, "step": 236000 }, { "epoch": 6.64, "learning_rate": 0.0002340687147382429, "loss": 2.0064, "step": 236500 }, { "epoch": 6.65, "learning_rate": 0.000233928136304849, "loss": 2.0274, "step": 237000 }, { "epoch": 6.66, "learning_rate": 0.00023378755787145506, "loss": 2.0512, "step": 237500 }, { "epoch": 6.68, "learning_rate": 0.00023364697943806112, "loss": 2.0267, "step": 238000 }, { "epoch": 6.69, "learning_rate": 0.00023350640100466719, "loss": 2.0159, "step": 238500 }, { "epoch": 6.71, "learning_rate": 0.00023336582257127325, "loss": 2.0458, "step": 239000 }, { "epoch": 6.72, "learning_rate": 0.0002332252441378793, "loss": 2.037, "step": 239500 }, { "epoch": 6.74, "learning_rate": 0.00023308466570448537, "loss": 2.0356, "step": 240000 }, { "epoch": 6.75, "learning_rate": 0.0002329440872710914, "loss": 2.0204, "step": 240500 }, { "epoch": 6.76, "learning_rate": 0.0002328035088376975, "loss": 2.0731, "step": 241000 }, { "epoch": 6.78, "learning_rate": 0.00023266293040430356, "loss": 2.0473, "step": 241500 }, { "epoch": 6.79, "learning_rate": 0.00023252235197090962, "loss": 2.0236, "step": 242000 }, { "epoch": 6.81, "learning_rate": 0.00023238177353751568, "loss": 2.0754, "step": 242500 }, { "epoch": 6.82, "learning_rate": 0.00023224119510412174, "loss": 2.0216, "step": 243000 }, { "epoch": 6.83, "learning_rate": 0.0002321006166707278, "loss": 2.0726, "step": 243500 }, { "epoch": 6.85, "learning_rate": 0.00023196003823733387, "loss": 2.037, "step": 244000 }, { "epoch": 6.86, "learning_rate": 0.0002318194598039399, "loss": 2.0778, "step": 244500 }, { "epoch": 6.88, "learning_rate": 0.00023167888137054596, "loss": 2.0366, "step": 245000 }, { "epoch": 6.89, "learning_rate": 0.00023153830293715205, "loss": 2.0554, "step": 245500 }, { "epoch": 6.9, "learning_rate": 0.00023139772450375811, "loss": 2.036, "step": 246000 }, { "epoch": 6.92, "learning_rate": 0.00023125714607036418, "loss": 2.0575, "step": 246500 }, { "epoch": 6.93, "learning_rate": 0.00023111656763697024, "loss": 2.0008, "step": 247000 }, { "epoch": 6.95, "learning_rate": 0.0002309759892035763, "loss": 2.0589, "step": 247500 }, { "epoch": 6.96, "learning_rate": 0.00023083541077018236, "loss": 2.0505, "step": 248000 }, { "epoch": 6.97, "learning_rate": 0.0002306948323367884, "loss": 2.0034, "step": 248500 }, { "epoch": 6.99, "learning_rate": 0.00023055425390339446, "loss": 2.0229, "step": 249000 }, { "epoch": 7.0, "learning_rate": 0.00023041367547000055, "loss": 2.0329, "step": 249500 }, { "epoch": 7.02, "learning_rate": 0.0002302730970366066, "loss": 1.9613, "step": 250000 }, { "epoch": 7.03, "learning_rate": 0.00023013251860321267, "loss": 1.9882, "step": 250500 }, { "epoch": 7.04, "learning_rate": 0.00022999194016981873, "loss": 1.9655, "step": 251000 }, { "epoch": 7.06, "learning_rate": 0.0002298513617364248, "loss": 1.9578, "step": 251500 }, { "epoch": 7.07, "learning_rate": 0.00022971078330303086, "loss": 2.033, "step": 252000 }, { "epoch": 7.09, "learning_rate": 0.0002295702048696369, "loss": 1.9843, "step": 252500 }, { "epoch": 7.1, "learning_rate": 0.00022942962643624295, "loss": 1.9682, "step": 253000 }, { "epoch": 7.11, "learning_rate": 0.00022928904800284902, "loss": 1.9537, "step": 253500 }, { "epoch": 7.13, "learning_rate": 0.0002291484695694551, "loss": 2.0058, "step": 254000 }, { "epoch": 7.14, "learning_rate": 0.00022900789113606117, "loss": 1.95, "step": 254500 }, { "epoch": 7.16, "learning_rate": 0.00022886731270266723, "loss": 1.9726, "step": 255000 }, { "epoch": 7.17, "learning_rate": 0.0002287267342692733, "loss": 1.9936, "step": 255500 }, { "epoch": 7.18, "learning_rate": 0.00022858615583587935, "loss": 1.9598, "step": 256000 }, { "epoch": 7.2, "learning_rate": 0.0002284455774024854, "loss": 1.9492, "step": 256500 }, { "epoch": 7.21, "learning_rate": 0.00022830499896909145, "loss": 1.9778, "step": 257000 }, { "epoch": 7.23, "learning_rate": 0.0002281644205356975, "loss": 1.9714, "step": 257500 }, { "epoch": 7.24, "learning_rate": 0.0002280238421023036, "loss": 1.9862, "step": 258000 }, { "epoch": 7.25, "learning_rate": 0.00022788326366890966, "loss": 1.997, "step": 258500 }, { "epoch": 7.27, "learning_rate": 0.00022774268523551573, "loss": 1.9797, "step": 259000 }, { "epoch": 7.28, "learning_rate": 0.0002276021068021218, "loss": 2.0386, "step": 259500 }, { "epoch": 7.3, "learning_rate": 0.00022746152836872785, "loss": 1.9809, "step": 260000 }, { "epoch": 7.31, "learning_rate": 0.00022732094993533388, "loss": 1.9879, "step": 260500 }, { "epoch": 7.32, "learning_rate": 0.00022718037150193995, "loss": 2.0213, "step": 261000 }, { "epoch": 7.34, "learning_rate": 0.000227039793068546, "loss": 1.9486, "step": 261500 }, { "epoch": 7.35, "learning_rate": 0.00022689921463515207, "loss": 1.977, "step": 262000 }, { "epoch": 7.37, "learning_rate": 0.00022675863620175816, "loss": 1.9849, "step": 262500 }, { "epoch": 7.38, "learning_rate": 0.00022661805776836422, "loss": 1.9433, "step": 263000 }, { "epoch": 7.39, "learning_rate": 0.00022647747933497028, "loss": 2.0007, "step": 263500 }, { "epoch": 7.41, "learning_rate": 0.00022633690090157634, "loss": 1.9975, "step": 264000 }, { "epoch": 7.42, "learning_rate": 0.00022619632246818238, "loss": 1.9288, "step": 264500 }, { "epoch": 7.44, "learning_rate": 0.00022605574403478844, "loss": 1.996, "step": 265000 }, { "epoch": 7.45, "learning_rate": 0.0002259151656013945, "loss": 1.9691, "step": 265500 }, { "epoch": 7.46, "learning_rate": 0.00022577458716800056, "loss": 1.9859, "step": 266000 }, { "epoch": 7.48, "learning_rate": 0.00022563400873460665, "loss": 1.9902, "step": 266500 }, { "epoch": 7.49, "learning_rate": 0.00022549343030121272, "loss": 1.9626, "step": 267000 }, { "epoch": 7.51, "learning_rate": 0.00022535285186781878, "loss": 1.9749, "step": 267500 }, { "epoch": 7.52, "learning_rate": 0.00022521227343442484, "loss": 1.9832, "step": 268000 }, { "epoch": 7.53, "learning_rate": 0.0002250716950010309, "loss": 1.9943, "step": 268500 }, { "epoch": 7.55, "learning_rate": 0.00022493111656763694, "loss": 1.9564, "step": 269000 }, { "epoch": 7.56, "learning_rate": 0.000224790538134243, "loss": 1.9827, "step": 269500 }, { "epoch": 7.58, "learning_rate": 0.00022464995970084906, "loss": 1.9721, "step": 270000 }, { "epoch": 7.59, "learning_rate": 0.00022450938126745512, "loss": 1.9993, "step": 270500 }, { "epoch": 7.6, "learning_rate": 0.0002243688028340612, "loss": 1.9978, "step": 271000 }, { "epoch": 7.62, "learning_rate": 0.00022422822440066727, "loss": 1.9862, "step": 271500 }, { "epoch": 7.63, "learning_rate": 0.00022408764596727334, "loss": 1.9931, "step": 272000 }, { "epoch": 7.65, "learning_rate": 0.0002239470675338794, "loss": 1.9609, "step": 272500 }, { "epoch": 7.66, "learning_rate": 0.00022380648910048543, "loss": 1.9802, "step": 273000 }, { "epoch": 7.68, "learning_rate": 0.0002236659106670915, "loss": 1.9919, "step": 273500 }, { "epoch": 7.69, "learning_rate": 0.00022352533223369756, "loss": 1.9749, "step": 274000 }, { "epoch": 7.7, "learning_rate": 0.00022338475380030362, "loss": 2.0121, "step": 274500 }, { "epoch": 7.72, "learning_rate": 0.0002232441753669097, "loss": 1.9941, "step": 275000 }, { "epoch": 7.73, "learning_rate": 0.00022310359693351577, "loss": 1.9829, "step": 275500 }, { "epoch": 7.75, "learning_rate": 0.00022296301850012183, "loss": 2.0042, "step": 276000 }, { "epoch": 7.76, "learning_rate": 0.0002228224400667279, "loss": 1.9994, "step": 276500 }, { "epoch": 7.77, "learning_rate": 0.00022268186163333393, "loss": 2.0085, "step": 277000 }, { "epoch": 7.79, "learning_rate": 0.00022254128319994, "loss": 1.9972, "step": 277500 }, { "epoch": 7.8, "learning_rate": 0.00022240070476654605, "loss": 2.0229, "step": 278000 }, { "epoch": 7.82, "learning_rate": 0.0002222601263331521, "loss": 1.9405, "step": 278500 }, { "epoch": 7.83, "learning_rate": 0.00022211954789975818, "loss": 1.9859, "step": 279000 }, { "epoch": 7.84, "learning_rate": 0.00022197896946636426, "loss": 1.9755, "step": 279500 }, { "epoch": 7.86, "learning_rate": 0.00022183839103297033, "loss": 1.9853, "step": 280000 }, { "epoch": 7.87, "learning_rate": 0.0002216978125995764, "loss": 2.0005, "step": 280500 }, { "epoch": 7.89, "learning_rate": 0.00022155723416618242, "loss": 1.9459, "step": 281000 }, { "epoch": 7.9, "learning_rate": 0.00022141665573278848, "loss": 1.9943, "step": 281500 }, { "epoch": 7.91, "learning_rate": 0.00022127607729939455, "loss": 1.9501, "step": 282000 }, { "epoch": 7.93, "learning_rate": 0.0002211354988660006, "loss": 1.9885, "step": 282500 }, { "epoch": 7.94, "learning_rate": 0.00022099492043260667, "loss": 2.0035, "step": 283000 }, { "epoch": 7.96, "learning_rate": 0.00022085434199921276, "loss": 2.002, "step": 283500 }, { "epoch": 7.97, "learning_rate": 0.00022071376356581882, "loss": 1.992, "step": 284000 }, { "epoch": 7.98, "learning_rate": 0.00022057318513242488, "loss": 2.0223, "step": 284500 }, { "epoch": 8.0, "learning_rate": 0.00022043260669903092, "loss": 2.0528, "step": 285000 }, { "epoch": 8.01, "learning_rate": 0.00022029202826563698, "loss": 1.9177, "step": 285500 }, { "epoch": 8.03, "learning_rate": 0.00022015144983224304, "loss": 1.9261, "step": 286000 }, { "epoch": 8.04, "learning_rate": 0.0002200108713988491, "loss": 1.9438, "step": 286500 }, { "epoch": 8.05, "learning_rate": 0.00021987029296545517, "loss": 1.9014, "step": 287000 }, { "epoch": 8.07, "learning_rate": 0.00021972971453206123, "loss": 1.9587, "step": 287500 }, { "epoch": 8.08, "learning_rate": 0.00021958913609866732, "loss": 1.9076, "step": 288000 }, { "epoch": 8.1, "learning_rate": 0.00021944855766527338, "loss": 1.9177, "step": 288500 }, { "epoch": 8.11, "learning_rate": 0.00021930797923187941, "loss": 1.9116, "step": 289000 }, { "epoch": 8.12, "learning_rate": 0.00021916740079848548, "loss": 1.9303, "step": 289500 }, { "epoch": 8.14, "learning_rate": 0.00021902682236509154, "loss": 1.9255, "step": 290000 }, { "epoch": 8.15, "learning_rate": 0.0002188862439316976, "loss": 1.9152, "step": 290500 }, { "epoch": 8.17, "learning_rate": 0.00021874566549830366, "loss": 1.9149, "step": 291000 }, { "epoch": 8.18, "learning_rate": 0.00021860508706490972, "loss": 1.9298, "step": 291500 }, { "epoch": 8.19, "learning_rate": 0.0002184645086315158, "loss": 1.9279, "step": 292000 }, { "epoch": 8.21, "learning_rate": 0.00021832393019812187, "loss": 1.9413, "step": 292500 }, { "epoch": 8.22, "learning_rate": 0.0002181833517647279, "loss": 1.9344, "step": 293000 }, { "epoch": 8.24, "learning_rate": 0.00021804277333133397, "loss": 1.9529, "step": 293500 }, { "epoch": 8.25, "learning_rate": 0.00021790219489794003, "loss": 1.9493, "step": 294000 }, { "epoch": 8.26, "learning_rate": 0.0002177616164645461, "loss": 1.9435, "step": 294500 }, { "epoch": 8.28, "learning_rate": 0.00021762103803115216, "loss": 1.9373, "step": 295000 }, { "epoch": 8.29, "learning_rate": 0.00021748045959775822, "loss": 1.9651, "step": 295500 }, { "epoch": 8.31, "learning_rate": 0.00021733988116436428, "loss": 1.9283, "step": 296000 }, { "epoch": 8.32, "learning_rate": 0.00021719930273097037, "loss": 1.9202, "step": 296500 }, { "epoch": 8.33, "learning_rate": 0.00021705872429757643, "loss": 1.9429, "step": 297000 }, { "epoch": 8.35, "learning_rate": 0.00021691814586418247, "loss": 1.9262, "step": 297500 }, { "epoch": 8.36, "learning_rate": 0.00021677756743078853, "loss": 1.9686, "step": 298000 }, { "epoch": 8.38, "learning_rate": 0.0002166369889973946, "loss": 1.9541, "step": 298500 }, { "epoch": 8.39, "learning_rate": 0.00021649641056400065, "loss": 1.9397, "step": 299000 }, { "epoch": 8.4, "learning_rate": 0.00021635583213060671, "loss": 1.9521, "step": 299500 }, { "epoch": 8.42, "learning_rate": 0.00021621525369721278, "loss": 1.9354, "step": 300000 }, { "epoch": 8.43, "learning_rate": 0.00021607467526381887, "loss": 1.933, "step": 300500 }, { "epoch": 8.45, "learning_rate": 0.00021593409683042493, "loss": 1.9874, "step": 301000 }, { "epoch": 8.46, "learning_rate": 0.00021579351839703096, "loss": 1.9077, "step": 301500 }, { "epoch": 8.47, "learning_rate": 0.00021565293996363702, "loss": 1.9458, "step": 302000 }, { "epoch": 8.49, "learning_rate": 0.00021551236153024309, "loss": 1.9521, "step": 302500 }, { "epoch": 8.5, "learning_rate": 0.00021537178309684915, "loss": 1.9498, "step": 303000 }, { "epoch": 8.52, "learning_rate": 0.0002152312046634552, "loss": 1.918, "step": 303500 }, { "epoch": 8.53, "learning_rate": 0.00021509062623006127, "loss": 1.9309, "step": 304000 }, { "epoch": 8.55, "learning_rate": 0.00021495004779666733, "loss": 1.9431, "step": 304500 }, { "epoch": 8.56, "learning_rate": 0.00021480946936327342, "loss": 1.9449, "step": 305000 }, { "epoch": 8.57, "learning_rate": 0.00021466889092987946, "loss": 1.9312, "step": 305500 }, { "epoch": 8.59, "learning_rate": 0.00021452831249648552, "loss": 1.9315, "step": 306000 }, { "epoch": 8.6, "learning_rate": 0.00021438773406309158, "loss": 1.9769, "step": 306500 }, { "epoch": 8.62, "learning_rate": 0.00021424715562969764, "loss": 1.9534, "step": 307000 }, { "epoch": 8.63, "learning_rate": 0.0002141065771963037, "loss": 1.9379, "step": 307500 }, { "epoch": 8.64, "learning_rate": 0.00021396599876290977, "loss": 1.9297, "step": 308000 }, { "epoch": 8.66, "learning_rate": 0.00021382542032951583, "loss": 1.9221, "step": 308500 }, { "epoch": 8.67, "learning_rate": 0.00021368484189612192, "loss": 1.9232, "step": 309000 }, { "epoch": 8.69, "learning_rate": 0.00021354426346272793, "loss": 1.9292, "step": 309500 }, { "epoch": 8.7, "learning_rate": 0.00021340368502933401, "loss": 1.9457, "step": 310000 }, { "epoch": 8.71, "learning_rate": 0.00021326310659594008, "loss": 1.9498, "step": 310500 }, { "epoch": 8.73, "learning_rate": 0.00021312252816254614, "loss": 1.9103, "step": 311000 }, { "epoch": 8.74, "learning_rate": 0.0002129819497291522, "loss": 1.9209, "step": 311500 }, { "epoch": 8.76, "learning_rate": 0.00021284137129575826, "loss": 1.9357, "step": 312000 }, { "epoch": 8.77, "learning_rate": 0.00021270079286236432, "loss": 1.9045, "step": 312500 }, { "epoch": 8.78, "learning_rate": 0.0002125602144289704, "loss": 1.9402, "step": 313000 }, { "epoch": 8.8, "learning_rate": 0.00021241963599557642, "loss": 1.8733, "step": 313500 }, { "epoch": 8.81, "learning_rate": 0.0002122790575621825, "loss": 1.9188, "step": 314000 }, { "epoch": 8.83, "learning_rate": 0.00021213847912878857, "loss": 1.94, "step": 314500 }, { "epoch": 8.84, "learning_rate": 0.00021199790069539463, "loss": 1.969, "step": 315000 }, { "epoch": 8.85, "learning_rate": 0.0002118573222620007, "loss": 1.9897, "step": 315500 }, { "epoch": 8.87, "learning_rate": 0.00021171674382860676, "loss": 1.9422, "step": 316000 }, { "epoch": 8.88, "learning_rate": 0.00021157616539521282, "loss": 1.9329, "step": 316500 }, { "epoch": 8.9, "learning_rate": 0.00021143558696181888, "loss": 1.9291, "step": 317000 }, { "epoch": 8.91, "learning_rate": 0.00021129500852842492, "loss": 1.9177, "step": 317500 }, { "epoch": 8.92, "learning_rate": 0.00021115443009503098, "loss": 1.9471, "step": 318000 }, { "epoch": 8.94, "learning_rate": 0.00021101385166163707, "loss": 1.8992, "step": 318500 }, { "epoch": 8.95, "learning_rate": 0.00021087327322824313, "loss": 1.9003, "step": 319000 }, { "epoch": 8.97, "learning_rate": 0.0002107326947948492, "loss": 1.9663, "step": 319500 }, { "epoch": 8.98, "learning_rate": 0.00021059211636145525, "loss": 1.963, "step": 320000 }, { "epoch": 8.99, "learning_rate": 0.00021045153792806132, "loss": 1.9454, "step": 320500 }, { "epoch": 9.01, "learning_rate": 0.00021031095949466738, "loss": 1.8856, "step": 321000 }, { "epoch": 9.02, "learning_rate": 0.0002101703810612734, "loss": 1.8773, "step": 321500 }, { "epoch": 9.04, "learning_rate": 0.00021002980262787947, "loss": 1.8712, "step": 322000 }, { "epoch": 9.05, "learning_rate": 0.00020988922419448556, "loss": 1.8868, "step": 322500 }, { "epoch": 9.06, "learning_rate": 0.00020974864576109163, "loss": 1.8753, "step": 323000 }, { "epoch": 9.08, "learning_rate": 0.0002096080673276977, "loss": 1.9066, "step": 323500 }, { "epoch": 9.09, "learning_rate": 0.00020946748889430375, "loss": 1.8513, "step": 324000 }, { "epoch": 9.11, "learning_rate": 0.0002093269104609098, "loss": 1.8249, "step": 324500 }, { "epoch": 9.12, "learning_rate": 0.00020918633202751587, "loss": 1.8887, "step": 325000 }, { "epoch": 9.13, "learning_rate": 0.00020904575359412193, "loss": 1.8882, "step": 325500 }, { "epoch": 9.15, "learning_rate": 0.00020890517516072797, "loss": 1.8697, "step": 326000 }, { "epoch": 9.16, "learning_rate": 0.00020876459672733403, "loss": 1.8819, "step": 326500 }, { "epoch": 9.18, "learning_rate": 0.00020862401829394012, "loss": 1.8538, "step": 327000 }, { "epoch": 9.19, "learning_rate": 0.00020848343986054618, "loss": 1.882, "step": 327500 }, { "epoch": 9.2, "learning_rate": 0.00020834286142715224, "loss": 1.8733, "step": 328000 }, { "epoch": 9.22, "learning_rate": 0.0002082022829937583, "loss": 1.8481, "step": 328500 }, { "epoch": 9.23, "learning_rate": 0.00020806170456036437, "loss": 1.886, "step": 329000 }, { "epoch": 9.25, "learning_rate": 0.00020792112612697043, "loss": 1.9102, "step": 329500 }, { "epoch": 9.26, "learning_rate": 0.00020778054769357647, "loss": 1.8617, "step": 330000 }, { "epoch": 9.27, "learning_rate": 0.00020763996926018253, "loss": 1.8809, "step": 330500 }, { "epoch": 9.29, "learning_rate": 0.00020749939082678862, "loss": 1.8844, "step": 331000 }, { "epoch": 9.3, "learning_rate": 0.00020735881239339468, "loss": 1.8758, "step": 331500 }, { "epoch": 9.32, "learning_rate": 0.00020721823396000074, "loss": 1.9132, "step": 332000 }, { "epoch": 9.33, "learning_rate": 0.0002070776555266068, "loss": 1.8819, "step": 332500 }, { "epoch": 9.34, "learning_rate": 0.00020693707709321286, "loss": 1.8851, "step": 333000 }, { "epoch": 9.36, "learning_rate": 0.00020679649865981893, "loss": 1.8757, "step": 333500 }, { "epoch": 9.37, "learning_rate": 0.00020665592022642496, "loss": 1.8982, "step": 334000 }, { "epoch": 9.39, "learning_rate": 0.00020651534179303102, "loss": 1.8726, "step": 334500 }, { "epoch": 9.4, "learning_rate": 0.00020637476335963708, "loss": 1.871, "step": 335000 }, { "epoch": 9.42, "learning_rate": 0.00020623418492624317, "loss": 1.8679, "step": 335500 }, { "epoch": 9.43, "learning_rate": 0.00020609360649284924, "loss": 1.846, "step": 336000 }, { "epoch": 9.44, "learning_rate": 0.0002059530280594553, "loss": 1.8835, "step": 336500 }, { "epoch": 9.46, "learning_rate": 0.00020581244962606136, "loss": 1.8742, "step": 337000 }, { "epoch": 9.47, "learning_rate": 0.00020567187119266742, "loss": 1.9074, "step": 337500 }, { "epoch": 9.49, "learning_rate": 0.00020553129275927346, "loss": 1.8882, "step": 338000 }, { "epoch": 9.5, "learning_rate": 0.00020539071432587952, "loss": 1.8665, "step": 338500 }, { "epoch": 9.51, "learning_rate": 0.00020525013589248558, "loss": 1.8989, "step": 339000 }, { "epoch": 9.53, "learning_rate": 0.00020510955745909167, "loss": 1.892, "step": 339500 }, { "epoch": 9.54, "learning_rate": 0.00020496897902569773, "loss": 1.8602, "step": 340000 }, { "epoch": 9.56, "learning_rate": 0.0002048284005923038, "loss": 1.8711, "step": 340500 }, { "epoch": 9.57, "learning_rate": 0.00020468782215890985, "loss": 1.8755, "step": 341000 }, { "epoch": 9.58, "learning_rate": 0.00020454724372551592, "loss": 1.9111, "step": 341500 }, { "epoch": 9.6, "learning_rate": 0.00020440666529212195, "loss": 1.8911, "step": 342000 }, { "epoch": 9.61, "learning_rate": 0.000204266086858728, "loss": 1.8861, "step": 342500 }, { "epoch": 9.63, "learning_rate": 0.00020412550842533408, "loss": 1.9273, "step": 343000 }, { "epoch": 9.64, "learning_rate": 0.00020398492999194014, "loss": 1.8982, "step": 343500 }, { "epoch": 9.65, "learning_rate": 0.00020384435155854623, "loss": 1.9151, "step": 344000 }, { "epoch": 9.67, "learning_rate": 0.0002037037731251523, "loss": 1.9015, "step": 344500 }, { "epoch": 9.68, "learning_rate": 0.00020356319469175835, "loss": 1.888, "step": 345000 }, { "epoch": 9.7, "learning_rate": 0.0002034226162583644, "loss": 1.8713, "step": 345500 }, { "epoch": 9.71, "learning_rate": 0.00020328203782497045, "loss": 1.8942, "step": 346000 }, { "epoch": 9.72, "learning_rate": 0.0002031414593915765, "loss": 1.8574, "step": 346500 }, { "epoch": 9.74, "learning_rate": 0.00020300088095818257, "loss": 1.9104, "step": 347000 }, { "epoch": 9.75, "learning_rate": 0.00020286030252478863, "loss": 1.8821, "step": 347500 }, { "epoch": 9.77, "learning_rate": 0.00020271972409139472, "loss": 1.8594, "step": 348000 }, { "epoch": 9.78, "learning_rate": 0.00020257914565800078, "loss": 1.854, "step": 348500 }, { "epoch": 9.79, "learning_rate": 0.00020243856722460685, "loss": 1.901, "step": 349000 }, { "epoch": 9.81, "learning_rate": 0.0002022979887912129, "loss": 1.9013, "step": 349500 }, { "epoch": 9.82, "learning_rate": 0.00020215741035781894, "loss": 1.8955, "step": 350000 }, { "epoch": 9.84, "learning_rate": 0.000202016831924425, "loss": 1.8922, "step": 350500 }, { "epoch": 9.85, "learning_rate": 0.00020187625349103107, "loss": 1.8906, "step": 351000 }, { "epoch": 9.86, "learning_rate": 0.00020173567505763713, "loss": 1.8719, "step": 351500 }, { "epoch": 9.88, "learning_rate": 0.0002015950966242432, "loss": 1.9044, "step": 352000 }, { "epoch": 9.89, "learning_rate": 0.00020145451819084928, "loss": 1.8842, "step": 352500 }, { "epoch": 9.91, "learning_rate": 0.00020131393975745534, "loss": 1.8419, "step": 353000 }, { "epoch": 9.92, "learning_rate": 0.0002011733613240614, "loss": 1.9004, "step": 353500 }, { "epoch": 9.93, "learning_rate": 0.00020103278289066746, "loss": 1.8882, "step": 354000 }, { "epoch": 9.95, "learning_rate": 0.0002008922044572735, "loss": 1.9269, "step": 354500 }, { "epoch": 9.96, "learning_rate": 0.00020075162602387956, "loss": 1.8875, "step": 355000 }, { "epoch": 9.98, "learning_rate": 0.00020061104759048562, "loss": 1.8902, "step": 355500 }, { "epoch": 9.99, "learning_rate": 0.00020047046915709169, "loss": 1.8823, "step": 356000 }, { "epoch": 10.0, "learning_rate": 0.00020032989072369777, "loss": 1.8574, "step": 356500 }, { "epoch": 10.02, "learning_rate": 0.00020018931229030384, "loss": 1.7962, "step": 357000 }, { "epoch": 10.03, "learning_rate": 0.0002000487338569099, "loss": 1.8085, "step": 357500 }, { "epoch": 10.05, "learning_rate": 0.00019990815542351596, "loss": 1.8401, "step": 358000 }, { "epoch": 10.06, "learning_rate": 0.000199767576990122, "loss": 1.8218, "step": 358500 }, { "epoch": 10.07, "learning_rate": 0.00019962699855672806, "loss": 1.8065, "step": 359000 }, { "epoch": 10.09, "learning_rate": 0.00019948642012333412, "loss": 1.8245, "step": 359500 }, { "epoch": 10.1, "learning_rate": 0.00019934584168994018, "loss": 1.8177, "step": 360000 }, { "epoch": 10.12, "learning_rate": 0.00019920526325654624, "loss": 1.8626, "step": 360500 }, { "epoch": 10.13, "learning_rate": 0.00019906468482315233, "loss": 1.8341, "step": 361000 }, { "epoch": 10.14, "learning_rate": 0.0001989241063897584, "loss": 1.8424, "step": 361500 }, { "epoch": 10.16, "learning_rate": 0.00019878352795636446, "loss": 1.8221, "step": 362000 }, { "epoch": 10.17, "learning_rate": 0.0001986429495229705, "loss": 1.8554, "step": 362500 }, { "epoch": 10.19, "learning_rate": 0.00019850237108957655, "loss": 1.8426, "step": 363000 }, { "epoch": 10.2, "learning_rate": 0.00019836179265618261, "loss": 1.8355, "step": 363500 }, { "epoch": 10.21, "learning_rate": 0.00019822121422278868, "loss": 1.8147, "step": 364000 }, { "epoch": 10.23, "learning_rate": 0.00019808063578939474, "loss": 1.8212, "step": 364500 }, { "epoch": 10.24, "learning_rate": 0.00019794005735600083, "loss": 1.8188, "step": 365000 }, { "epoch": 10.26, "learning_rate": 0.0001977994789226069, "loss": 1.8313, "step": 365500 }, { "epoch": 10.27, "learning_rate": 0.00019765890048921295, "loss": 1.8165, "step": 366000 }, { "epoch": 10.28, "learning_rate": 0.00019751832205581899, "loss": 1.8183, "step": 366500 }, { "epoch": 10.3, "learning_rate": 0.00019737774362242505, "loss": 1.8196, "step": 367000 }, { "epoch": 10.31, "learning_rate": 0.0001972371651890311, "loss": 1.8276, "step": 367500 }, { "epoch": 10.33, "learning_rate": 0.00019709658675563717, "loss": 1.8811, "step": 368000 }, { "epoch": 10.34, "learning_rate": 0.00019695600832224323, "loss": 1.8258, "step": 368500 }, { "epoch": 10.36, "learning_rate": 0.0001968154298888493, "loss": 1.8061, "step": 369000 }, { "epoch": 10.37, "learning_rate": 0.00019667485145545538, "loss": 1.8099, "step": 369500 }, { "epoch": 10.38, "learning_rate": 0.00019653427302206145, "loss": 1.8491, "step": 370000 }, { "epoch": 10.4, "learning_rate": 0.00019639369458866748, "loss": 1.8424, "step": 370500 }, { "epoch": 10.41, "learning_rate": 0.00019625311615527354, "loss": 1.8419, "step": 371000 }, { "epoch": 10.43, "learning_rate": 0.0001961125377218796, "loss": 1.828, "step": 371500 }, { "epoch": 10.44, "learning_rate": 0.00019597195928848567, "loss": 1.8339, "step": 372000 }, { "epoch": 10.45, "learning_rate": 0.00019583138085509173, "loss": 1.8416, "step": 372500 }, { "epoch": 10.47, "learning_rate": 0.0001956908024216978, "loss": 1.8231, "step": 373000 }, { "epoch": 10.48, "learning_rate": 0.00019555022398830388, "loss": 1.8288, "step": 373500 }, { "epoch": 10.5, "learning_rate": 0.00019540964555490994, "loss": 1.8484, "step": 374000 }, { "epoch": 10.51, "learning_rate": 0.00019526906712151598, "loss": 1.8453, "step": 374500 }, { "epoch": 10.52, "learning_rate": 0.00019512848868812204, "loss": 1.8539, "step": 375000 }, { "epoch": 10.54, "learning_rate": 0.0001949879102547281, "loss": 1.869, "step": 375500 }, { "epoch": 10.55, "learning_rate": 0.00019484733182133416, "loss": 1.8507, "step": 376000 }, { "epoch": 10.57, "learning_rate": 0.00019470675338794022, "loss": 1.8167, "step": 376500 }, { "epoch": 10.58, "learning_rate": 0.0001945661749545463, "loss": 1.823, "step": 377000 }, { "epoch": 10.59, "learning_rate": 0.00019442559652115235, "loss": 1.8547, "step": 377500 }, { "epoch": 10.61, "learning_rate": 0.00019428501808775844, "loss": 1.8602, "step": 378000 }, { "epoch": 10.62, "learning_rate": 0.00019414443965436445, "loss": 1.8406, "step": 378500 }, { "epoch": 10.64, "learning_rate": 0.00019400386122097053, "loss": 1.8683, "step": 379000 }, { "epoch": 10.65, "learning_rate": 0.0001938632827875766, "loss": 1.8577, "step": 379500 }, { "epoch": 10.66, "learning_rate": 0.00019372270435418266, "loss": 1.8518, "step": 380000 }, { "epoch": 10.68, "learning_rate": 0.00019358212592078872, "loss": 1.8134, "step": 380500 }, { "epoch": 10.69, "learning_rate": 0.00019344154748739478, "loss": 1.8355, "step": 381000 }, { "epoch": 10.71, "learning_rate": 0.00019330096905400084, "loss": 1.8328, "step": 381500 }, { "epoch": 10.72, "learning_rate": 0.00019316039062060693, "loss": 1.8329, "step": 382000 }, { "epoch": 10.73, "learning_rate": 0.00019301981218721294, "loss": 1.8755, "step": 382500 }, { "epoch": 10.75, "learning_rate": 0.00019287923375381903, "loss": 1.84, "step": 383000 }, { "epoch": 10.76, "learning_rate": 0.0001927386553204251, "loss": 1.8026, "step": 383500 }, { "epoch": 10.78, "learning_rate": 0.00019259807688703115, "loss": 1.8435, "step": 384000 }, { "epoch": 10.79, "learning_rate": 0.00019245749845363722, "loss": 1.8323, "step": 384500 }, { "epoch": 10.8, "learning_rate": 0.00019231692002024328, "loss": 1.8357, "step": 385000 }, { "epoch": 10.82, "learning_rate": 0.00019217634158684934, "loss": 1.8223, "step": 385500 }, { "epoch": 10.83, "learning_rate": 0.0001920357631534554, "loss": 1.8558, "step": 386000 }, { "epoch": 10.85, "learning_rate": 0.0001918951847200615, "loss": 1.8361, "step": 386500 }, { "epoch": 10.86, "learning_rate": 0.0001917546062866675, "loss": 1.8391, "step": 387000 }, { "epoch": 10.87, "learning_rate": 0.0001916140278532736, "loss": 1.9066, "step": 387500 }, { "epoch": 10.89, "learning_rate": 0.00019147344941987965, "loss": 1.8619, "step": 388000 }, { "epoch": 10.9, "learning_rate": 0.0001913328709864857, "loss": 1.8718, "step": 388500 }, { "epoch": 10.92, "learning_rate": 0.00019119229255309177, "loss": 1.8475, "step": 389000 }, { "epoch": 10.93, "learning_rate": 0.00019105171411969783, "loss": 1.8525, "step": 389500 }, { "epoch": 10.94, "learning_rate": 0.0001909111356863039, "loss": 1.8086, "step": 390000 }, { "epoch": 10.96, "learning_rate": 0.00019077055725290999, "loss": 1.8516, "step": 390500 }, { "epoch": 10.97, "learning_rate": 0.000190629978819516, "loss": 1.8374, "step": 391000 }, { "epoch": 10.99, "learning_rate": 0.00019048940038612208, "loss": 1.8353, "step": 391500 }, { "epoch": 11.0, "learning_rate": 0.00019034882195272814, "loss": 1.8845, "step": 392000 }, { "epoch": 11.01, "learning_rate": 0.0001902082435193342, "loss": 1.8034, "step": 392500 }, { "epoch": 11.03, "learning_rate": 0.00019006766508594027, "loss": 1.7912, "step": 393000 }, { "epoch": 11.04, "learning_rate": 0.00018992708665254633, "loss": 1.7747, "step": 393500 }, { "epoch": 11.06, "learning_rate": 0.0001897865082191524, "loss": 1.7816, "step": 394000 }, { "epoch": 11.07, "learning_rate": 0.00018964592978575845, "loss": 1.7557, "step": 394500 }, { "epoch": 11.08, "learning_rate": 0.0001895053513523645, "loss": 1.7835, "step": 395000 }, { "epoch": 11.1, "learning_rate": 0.00018936477291897055, "loss": 1.7824, "step": 395500 }, { "epoch": 11.11, "learning_rate": 0.00018922419448557664, "loss": 1.7959, "step": 396000 }, { "epoch": 11.13, "learning_rate": 0.0001890836160521827, "loss": 1.7989, "step": 396500 }, { "epoch": 11.14, "learning_rate": 0.00018894303761878876, "loss": 1.761, "step": 397000 }, { "epoch": 11.15, "learning_rate": 0.00018880245918539483, "loss": 1.7773, "step": 397500 }, { "epoch": 11.17, "learning_rate": 0.0001886618807520009, "loss": 1.7956, "step": 398000 }, { "epoch": 11.18, "learning_rate": 0.00018852130231860695, "loss": 1.7832, "step": 398500 }, { "epoch": 11.2, "learning_rate": 0.00018838072388521298, "loss": 1.82, "step": 399000 }, { "epoch": 11.21, "learning_rate": 0.00018824014545181905, "loss": 1.7774, "step": 399500 }, { "epoch": 11.23, "learning_rate": 0.00018809956701842514, "loss": 1.8108, "step": 400000 }, { "epoch": 11.24, "learning_rate": 0.0001879589885850312, "loss": 1.8072, "step": 400500 }, { "epoch": 11.25, "learning_rate": 0.00018781841015163726, "loss": 1.7976, "step": 401000 }, { "epoch": 11.27, "learning_rate": 0.00018767783171824332, "loss": 1.7671, "step": 401500 }, { "epoch": 11.28, "learning_rate": 0.00018753725328484938, "loss": 1.8202, "step": 402000 }, { "epoch": 11.3, "learning_rate": 0.00018739667485145545, "loss": 1.7835, "step": 402500 }, { "epoch": 11.31, "learning_rate": 0.00018725609641806148, "loss": 1.79, "step": 403000 }, { "epoch": 11.32, "learning_rate": 0.00018711551798466754, "loss": 1.8187, "step": 403500 }, { "epoch": 11.34, "learning_rate": 0.0001869749395512736, "loss": 1.8261, "step": 404000 }, { "epoch": 11.35, "learning_rate": 0.0001868343611178797, "loss": 1.7671, "step": 404500 }, { "epoch": 11.37, "learning_rate": 0.00018669378268448575, "loss": 1.797, "step": 405000 }, { "epoch": 11.38, "learning_rate": 0.00018655320425109182, "loss": 1.7898, "step": 405500 }, { "epoch": 11.39, "learning_rate": 0.00018641262581769788, "loss": 1.7916, "step": 406000 }, { "epoch": 11.41, "learning_rate": 0.00018627204738430394, "loss": 1.852, "step": 406500 }, { "epoch": 11.42, "learning_rate": 0.00018613146895090998, "loss": 1.7775, "step": 407000 }, { "epoch": 11.44, "learning_rate": 0.00018599089051751604, "loss": 1.8036, "step": 407500 }, { "epoch": 11.45, "learning_rate": 0.0001858503120841221, "loss": 1.8111, "step": 408000 }, { "epoch": 11.46, "learning_rate": 0.0001857097336507282, "loss": 1.7921, "step": 408500 }, { "epoch": 11.48, "learning_rate": 0.00018556915521733425, "loss": 1.8084, "step": 409000 }, { "epoch": 11.49, "learning_rate": 0.0001854285767839403, "loss": 1.7578, "step": 409500 }, { "epoch": 11.51, "learning_rate": 0.00018528799835054637, "loss": 1.7714, "step": 410000 }, { "epoch": 11.52, "learning_rate": 0.00018514741991715244, "loss": 1.8062, "step": 410500 }, { "epoch": 11.53, "learning_rate": 0.00018500684148375847, "loss": 1.8261, "step": 411000 }, { "epoch": 11.55, "learning_rate": 0.00018486626305036453, "loss": 1.7864, "step": 411500 }, { "epoch": 11.56, "learning_rate": 0.0001847256846169706, "loss": 1.8151, "step": 412000 }, { "epoch": 11.58, "learning_rate": 0.00018458510618357666, "loss": 1.8115, "step": 412500 }, { "epoch": 11.59, "learning_rate": 0.00018444452775018275, "loss": 1.8313, "step": 413000 }, { "epoch": 11.6, "learning_rate": 0.0001843039493167888, "loss": 1.8024, "step": 413500 }, { "epoch": 11.62, "learning_rate": 0.00018416337088339487, "loss": 1.793, "step": 414000 }, { "epoch": 11.63, "learning_rate": 0.00018402279245000093, "loss": 1.7891, "step": 414500 }, { "epoch": 11.65, "learning_rate": 0.000183882214016607, "loss": 1.7916, "step": 415000 }, { "epoch": 11.66, "learning_rate": 0.00018374163558321303, "loss": 1.7597, "step": 415500 }, { "epoch": 11.67, "learning_rate": 0.0001836010571498191, "loss": 1.809, "step": 416000 }, { "epoch": 11.69, "learning_rate": 0.00018346047871642515, "loss": 1.7811, "step": 416500 }, { "epoch": 11.7, "learning_rate": 0.00018331990028303124, "loss": 1.8246, "step": 417000 }, { "epoch": 11.72, "learning_rate": 0.0001831793218496373, "loss": 1.8383, "step": 417500 }, { "epoch": 11.73, "learning_rate": 0.00018303874341624336, "loss": 1.7673, "step": 418000 }, { "epoch": 11.74, "learning_rate": 0.00018289816498284943, "loss": 1.7972, "step": 418500 }, { "epoch": 11.76, "learning_rate": 0.0001827575865494555, "loss": 1.8094, "step": 419000 }, { "epoch": 11.77, "learning_rate": 0.00018261700811606152, "loss": 1.7746, "step": 419500 }, { "epoch": 11.79, "learning_rate": 0.00018247642968266759, "loss": 1.8126, "step": 420000 }, { "epoch": 11.8, "learning_rate": 0.00018233585124927365, "loss": 1.8074, "step": 420500 }, { "epoch": 11.81, "learning_rate": 0.0001821952728158797, "loss": 1.8232, "step": 421000 }, { "epoch": 11.83, "learning_rate": 0.0001820546943824858, "loss": 1.8191, "step": 421500 }, { "epoch": 11.84, "learning_rate": 0.00018191411594909186, "loss": 1.7828, "step": 422000 }, { "epoch": 11.86, "learning_rate": 0.00018177353751569792, "loss": 1.7953, "step": 422500 }, { "epoch": 11.87, "learning_rate": 0.00018163295908230398, "loss": 1.8094, "step": 423000 }, { "epoch": 11.88, "learning_rate": 0.00018149238064891002, "loss": 1.8087, "step": 423500 }, { "epoch": 11.9, "learning_rate": 0.00018135180221551608, "loss": 1.819, "step": 424000 }, { "epoch": 11.91, "learning_rate": 0.00018121122378212214, "loss": 1.7811, "step": 424500 }, { "epoch": 11.93, "learning_rate": 0.0001810706453487282, "loss": 1.7685, "step": 425000 }, { "epoch": 11.94, "learning_rate": 0.0001809300669153343, "loss": 1.7985, "step": 425500 }, { "epoch": 11.95, "learning_rate": 0.00018078948848194036, "loss": 1.7791, "step": 426000 }, { "epoch": 11.97, "learning_rate": 0.00018064891004854642, "loss": 1.8049, "step": 426500 }, { "epoch": 11.98, "learning_rate": 0.00018050833161515248, "loss": 1.8249, "step": 427000 }, { "epoch": 12.0, "learning_rate": 0.00018036775318175851, "loss": 1.7993, "step": 427500 }, { "epoch": 12.01, "learning_rate": 0.00018022717474836458, "loss": 1.7924, "step": 428000 }, { "epoch": 12.02, "learning_rate": 0.00018008659631497064, "loss": 1.7594, "step": 428500 }, { "epoch": 12.04, "learning_rate": 0.0001799460178815767, "loss": 1.7235, "step": 429000 }, { "epoch": 12.05, "learning_rate": 0.00017980543944818276, "loss": 1.7484, "step": 429500 }, { "epoch": 12.07, "learning_rate": 0.00017966486101478885, "loss": 1.7384, "step": 430000 }, { "epoch": 12.08, "learning_rate": 0.0001795242825813949, "loss": 1.7548, "step": 430500 }, { "epoch": 12.1, "learning_rate": 0.00017938370414800098, "loss": 1.7523, "step": 431000 }, { "epoch": 12.11, "learning_rate": 0.000179243125714607, "loss": 1.7387, "step": 431500 }, { "epoch": 12.12, "learning_rate": 0.00017910254728121307, "loss": 1.7238, "step": 432000 }, { "epoch": 12.14, "learning_rate": 0.00017896196884781913, "loss": 1.7568, "step": 432500 }, { "epoch": 12.15, "learning_rate": 0.0001788213904144252, "loss": 1.728, "step": 433000 }, { "epoch": 12.17, "learning_rate": 0.00017868081198103126, "loss": 1.7263, "step": 433500 }, { "epoch": 12.18, "learning_rate": 0.00017854023354763735, "loss": 1.7219, "step": 434000 }, { "epoch": 12.19, "learning_rate": 0.0001783996551142434, "loss": 1.7415, "step": 434500 }, { "epoch": 12.21, "learning_rate": 0.00017825907668084947, "loss": 1.7571, "step": 435000 }, { "epoch": 12.22, "learning_rate": 0.0001781184982474555, "loss": 1.7429, "step": 435500 }, { "epoch": 12.24, "learning_rate": 0.00017797791981406157, "loss": 1.763, "step": 436000 }, { "epoch": 12.25, "learning_rate": 0.00017783734138066763, "loss": 1.7558, "step": 436500 }, { "epoch": 12.26, "learning_rate": 0.0001776967629472737, "loss": 1.7665, "step": 437000 }, { "epoch": 12.28, "learning_rate": 0.00017755618451387975, "loss": 1.7411, "step": 437500 }, { "epoch": 12.29, "learning_rate": 0.00017741560608048582, "loss": 1.7478, "step": 438000 }, { "epoch": 12.31, "learning_rate": 0.0001772750276470919, "loss": 1.7673, "step": 438500 }, { "epoch": 12.32, "learning_rate": 0.00017713444921369797, "loss": 1.752, "step": 439000 }, { "epoch": 12.33, "learning_rate": 0.000176993870780304, "loss": 1.7297, "step": 439500 }, { "epoch": 12.35, "learning_rate": 0.00017685329234691006, "loss": 1.7654, "step": 440000 }, { "epoch": 12.36, "learning_rate": 0.00017671271391351612, "loss": 1.7312, "step": 440500 }, { "epoch": 12.38, "learning_rate": 0.0001765721354801222, "loss": 1.773, "step": 441000 }, { "epoch": 12.39, "learning_rate": 0.00017643155704672825, "loss": 1.7771, "step": 441500 }, { "epoch": 12.4, "learning_rate": 0.0001762909786133343, "loss": 1.7174, "step": 442000 }, { "epoch": 12.42, "learning_rate": 0.0001761504001799404, "loss": 1.7056, "step": 442500 }, { "epoch": 12.43, "learning_rate": 0.00017600982174654646, "loss": 1.7896, "step": 443000 }, { "epoch": 12.45, "learning_rate": 0.00017586924331315252, "loss": 1.7391, "step": 443500 }, { "epoch": 12.46, "learning_rate": 0.00017572866487975856, "loss": 1.7606, "step": 444000 }, { "epoch": 12.47, "learning_rate": 0.00017558808644636462, "loss": 1.7515, "step": 444500 }, { "epoch": 12.49, "learning_rate": 0.00017544750801297068, "loss": 1.7781, "step": 445000 }, { "epoch": 12.5, "learning_rate": 0.00017530692957957674, "loss": 1.7423, "step": 445500 }, { "epoch": 12.52, "learning_rate": 0.0001751663511461828, "loss": 1.7643, "step": 446000 }, { "epoch": 12.53, "learning_rate": 0.00017502577271278887, "loss": 1.7443, "step": 446500 }, { "epoch": 12.54, "learning_rate": 0.00017488519427939496, "loss": 1.7836, "step": 447000 }, { "epoch": 12.56, "learning_rate": 0.00017474461584600102, "loss": 1.7667, "step": 447500 }, { "epoch": 12.57, "learning_rate": 0.00017460403741260705, "loss": 1.7431, "step": 448000 }, { "epoch": 12.59, "learning_rate": 0.00017446345897921312, "loss": 1.7748, "step": 448500 }, { "epoch": 12.6, "learning_rate": 0.00017432288054581918, "loss": 1.7667, "step": 449000 }, { "epoch": 12.61, "learning_rate": 0.00017418230211242524, "loss": 1.7635, "step": 449500 }, { "epoch": 12.63, "learning_rate": 0.0001740417236790313, "loss": 1.77, "step": 450000 }, { "epoch": 12.64, "learning_rate": 0.00017390114524563736, "loss": 1.7338, "step": 450500 }, { "epoch": 12.66, "learning_rate": 0.00017376056681224345, "loss": 1.761, "step": 451000 }, { "epoch": 12.67, "learning_rate": 0.00017361998837884951, "loss": 1.7557, "step": 451500 }, { "epoch": 12.68, "learning_rate": 0.00017347940994545555, "loss": 1.7942, "step": 452000 }, { "epoch": 12.7, "learning_rate": 0.0001733388315120616, "loss": 1.7452, "step": 452500 }, { "epoch": 12.71, "learning_rate": 0.00017319825307866767, "loss": 1.7466, "step": 453000 }, { "epoch": 12.73, "learning_rate": 0.00017305767464527373, "loss": 1.7551, "step": 453500 }, { "epoch": 12.74, "learning_rate": 0.0001729170962118798, "loss": 1.7765, "step": 454000 }, { "epoch": 12.75, "learning_rate": 0.00017277651777848586, "loss": 1.7686, "step": 454500 }, { "epoch": 12.77, "learning_rate": 0.00017263593934509192, "loss": 1.7711, "step": 455000 }, { "epoch": 12.78, "learning_rate": 0.000172495360911698, "loss": 1.7608, "step": 455500 }, { "epoch": 12.8, "learning_rate": 0.00017235478247830404, "loss": 1.7719, "step": 456000 }, { "epoch": 12.81, "learning_rate": 0.0001722142040449101, "loss": 1.7815, "step": 456500 }, { "epoch": 12.82, "learning_rate": 0.00017207362561151617, "loss": 1.7713, "step": 457000 }, { "epoch": 12.84, "learning_rate": 0.00017193304717812223, "loss": 1.7486, "step": 457500 }, { "epoch": 12.85, "learning_rate": 0.0001717924687447283, "loss": 1.7744, "step": 458000 }, { "epoch": 12.87, "learning_rate": 0.00017165189031133435, "loss": 1.7748, "step": 458500 }, { "epoch": 12.88, "learning_rate": 0.00017151131187794042, "loss": 1.7558, "step": 459000 }, { "epoch": 12.89, "learning_rate": 0.0001713707334445465, "loss": 1.765, "step": 459500 }, { "epoch": 12.91, "learning_rate": 0.0001712301550111525, "loss": 1.7724, "step": 460000 }, { "epoch": 12.92, "learning_rate": 0.0001710895765777586, "loss": 1.7484, "step": 460500 }, { "epoch": 12.94, "learning_rate": 0.00017094899814436466, "loss": 1.7719, "step": 461000 }, { "epoch": 12.95, "learning_rate": 0.00017080841971097073, "loss": 1.7918, "step": 461500 }, { "epoch": 12.96, "learning_rate": 0.0001706678412775768, "loss": 1.7792, "step": 462000 }, { "epoch": 12.98, "learning_rate": 0.00017052726284418285, "loss": 1.7368, "step": 462500 }, { "epoch": 12.99, "learning_rate": 0.0001703866844107889, "loss": 1.7598, "step": 463000 }, { "epoch": 13.01, "learning_rate": 0.00017024610597739497, "loss": 1.7528, "step": 463500 }, { "epoch": 13.02, "learning_rate": 0.000170105527544001, "loss": 1.6807, "step": 464000 }, { "epoch": 13.04, "learning_rate": 0.0001699649491106071, "loss": 1.7284, "step": 464500 }, { "epoch": 13.05, "learning_rate": 0.00016982437067721316, "loss": 1.6896, "step": 465000 }, { "epoch": 13.06, "learning_rate": 0.00016968379224381922, "loss": 1.7024, "step": 465500 }, { "epoch": 13.08, "learning_rate": 0.00016954321381042528, "loss": 1.7025, "step": 466000 }, { "epoch": 13.09, "learning_rate": 0.00016940263537703135, "loss": 1.6848, "step": 466500 }, { "epoch": 13.11, "learning_rate": 0.0001692620569436374, "loss": 1.707, "step": 467000 }, { "epoch": 13.12, "learning_rate": 0.00016912147851024347, "loss": 1.7138, "step": 467500 }, { "epoch": 13.13, "learning_rate": 0.0001689809000768495, "loss": 1.6969, "step": 468000 }, { "epoch": 13.15, "learning_rate": 0.00016884032164345557, "loss": 1.6782, "step": 468500 }, { "epoch": 13.16, "learning_rate": 0.00016869974321006165, "loss": 1.6959, "step": 469000 }, { "epoch": 13.18, "learning_rate": 0.00016855916477666772, "loss": 1.7567, "step": 469500 }, { "epoch": 13.19, "learning_rate": 0.00016841858634327378, "loss": 1.7163, "step": 470000 }, { "epoch": 13.2, "learning_rate": 0.00016827800790987984, "loss": 1.7081, "step": 470500 }, { "epoch": 13.22, "learning_rate": 0.0001681374294764859, "loss": 1.703, "step": 471000 }, { "epoch": 13.23, "learning_rate": 0.00016799685104309196, "loss": 1.7047, "step": 471500 }, { "epoch": 13.25, "learning_rate": 0.00016785627260969803, "loss": 1.7161, "step": 472000 }, { "epoch": 13.26, "learning_rate": 0.00016771569417630406, "loss": 1.7038, "step": 472500 }, { "epoch": 13.27, "learning_rate": 0.00016757511574291015, "loss": 1.7404, "step": 473000 }, { "epoch": 13.29, "learning_rate": 0.0001674345373095162, "loss": 1.7217, "step": 473500 }, { "epoch": 13.3, "learning_rate": 0.00016729395887612227, "loss": 1.702, "step": 474000 }, { "epoch": 13.32, "learning_rate": 0.00016715338044272834, "loss": 1.7199, "step": 474500 }, { "epoch": 13.33, "learning_rate": 0.0001670128020093344, "loss": 1.7062, "step": 475000 }, { "epoch": 13.34, "learning_rate": 0.00016687222357594046, "loss": 1.6744, "step": 475500 }, { "epoch": 13.36, "learning_rate": 0.00016673164514254652, "loss": 1.673, "step": 476000 }, { "epoch": 13.37, "learning_rate": 0.00016659106670915256, "loss": 1.7313, "step": 476500 }, { "epoch": 13.39, "learning_rate": 0.00016645048827575862, "loss": 1.7032, "step": 477000 }, { "epoch": 13.4, "learning_rate": 0.0001663099098423647, "loss": 1.7284, "step": 477500 }, { "epoch": 13.41, "learning_rate": 0.00016616933140897077, "loss": 1.7071, "step": 478000 }, { "epoch": 13.43, "learning_rate": 0.00016602875297557683, "loss": 1.7076, "step": 478500 }, { "epoch": 13.44, "learning_rate": 0.0001658881745421829, "loss": 1.7219, "step": 479000 }, { "epoch": 13.46, "learning_rate": 0.00016574759610878896, "loss": 1.7143, "step": 479500 }, { "epoch": 13.47, "learning_rate": 0.00016560701767539502, "loss": 1.7173, "step": 480000 }, { "epoch": 13.48, "learning_rate": 0.00016546643924200105, "loss": 1.7236, "step": 480500 }, { "epoch": 13.5, "learning_rate": 0.00016532586080860711, "loss": 1.7243, "step": 481000 }, { "epoch": 13.51, "learning_rate": 0.0001651852823752132, "loss": 1.7027, "step": 481500 }, { "epoch": 13.53, "learning_rate": 0.00016504470394181927, "loss": 1.7147, "step": 482000 }, { "epoch": 13.54, "learning_rate": 0.00016490412550842533, "loss": 1.705, "step": 482500 }, { "epoch": 13.55, "learning_rate": 0.0001647635470750314, "loss": 1.6933, "step": 483000 }, { "epoch": 13.57, "learning_rate": 0.00016462296864163745, "loss": 1.7353, "step": 483500 }, { "epoch": 13.58, "learning_rate": 0.0001644823902082435, "loss": 1.6976, "step": 484000 }, { "epoch": 13.6, "learning_rate": 0.00016434181177484955, "loss": 1.7118, "step": 484500 }, { "epoch": 13.61, "learning_rate": 0.0001642012333414556, "loss": 1.7086, "step": 485000 }, { "epoch": 13.62, "learning_rate": 0.00016406065490806167, "loss": 1.7019, "step": 485500 }, { "epoch": 13.64, "learning_rate": 0.00016392007647466776, "loss": 1.7094, "step": 486000 }, { "epoch": 13.65, "learning_rate": 0.00016377949804127382, "loss": 1.7013, "step": 486500 }, { "epoch": 13.67, "learning_rate": 0.00016363891960787988, "loss": 1.7514, "step": 487000 }, { "epoch": 13.68, "learning_rate": 0.00016349834117448595, "loss": 1.6936, "step": 487500 }, { "epoch": 13.69, "learning_rate": 0.000163357762741092, "loss": 1.7091, "step": 488000 }, { "epoch": 13.71, "learning_rate": 0.00016321718430769804, "loss": 1.7251, "step": 488500 }, { "epoch": 13.72, "learning_rate": 0.0001630766058743041, "loss": 1.6919, "step": 489000 }, { "epoch": 13.74, "learning_rate": 0.00016293602744091017, "loss": 1.7196, "step": 489500 }, { "epoch": 13.75, "learning_rate": 0.00016279544900751626, "loss": 1.7089, "step": 490000 }, { "epoch": 13.76, "learning_rate": 0.00016265487057412232, "loss": 1.727, "step": 490500 }, { "epoch": 13.78, "learning_rate": 0.00016251429214072838, "loss": 1.7141, "step": 491000 }, { "epoch": 13.79, "learning_rate": 0.00016237371370733444, "loss": 1.7047, "step": 491500 }, { "epoch": 13.81, "learning_rate": 0.0001622331352739405, "loss": 1.6898, "step": 492000 }, { "epoch": 13.82, "learning_rate": 0.00016209255684054654, "loss": 1.6999, "step": 492500 }, { "epoch": 13.83, "learning_rate": 0.0001619519784071526, "loss": 1.7309, "step": 493000 }, { "epoch": 13.85, "learning_rate": 0.00016181139997375866, "loss": 1.6896, "step": 493500 }, { "epoch": 13.86, "learning_rate": 0.00016167082154036472, "loss": 1.7286, "step": 494000 }, { "epoch": 13.88, "learning_rate": 0.0001615302431069708, "loss": 1.7278, "step": 494500 }, { "epoch": 13.89, "learning_rate": 0.00016138966467357688, "loss": 1.725, "step": 495000 }, { "epoch": 13.91, "learning_rate": 0.00016124908624018294, "loss": 1.7102, "step": 495500 }, { "epoch": 13.92, "learning_rate": 0.000161108507806789, "loss": 1.6975, "step": 496000 }, { "epoch": 13.93, "learning_rate": 0.00016096792937339503, "loss": 1.72, "step": 496500 }, { "epoch": 13.95, "learning_rate": 0.0001608273509400011, "loss": 1.7225, "step": 497000 }, { "epoch": 13.96, "learning_rate": 0.00016068677250660716, "loss": 1.6941, "step": 497500 }, { "epoch": 13.98, "learning_rate": 0.00016054619407321322, "loss": 1.7246, "step": 498000 }, { "epoch": 13.99, "learning_rate": 0.0001604056156398193, "loss": 1.7029, "step": 498500 }, { "epoch": 14.0, "learning_rate": 0.00016026503720642537, "loss": 1.7228, "step": 499000 }, { "epoch": 14.02, "learning_rate": 0.00016012445877303143, "loss": 1.657, "step": 499500 }, { "epoch": 14.03, "learning_rate": 0.0001599838803396375, "loss": 1.6552, "step": 500000 }, { "epoch": 14.05, "learning_rate": 0.00015984330190624356, "loss": 1.6641, "step": 500500 }, { "epoch": 14.06, "learning_rate": 0.0001597027234728496, "loss": 1.6517, "step": 501000 }, { "epoch": 14.07, "learning_rate": 0.00015956214503945565, "loss": 1.6249, "step": 501500 }, { "epoch": 14.09, "learning_rate": 0.00015942156660606172, "loss": 1.6939, "step": 502000 }, { "epoch": 14.1, "learning_rate": 0.00015928098817266778, "loss": 1.6659, "step": 502500 }, { "epoch": 14.12, "learning_rate": 0.00015914040973927387, "loss": 1.6344, "step": 503000 }, { "epoch": 14.13, "learning_rate": 0.00015899983130587993, "loss": 1.6471, "step": 503500 }, { "epoch": 14.14, "learning_rate": 0.000158859252872486, "loss": 1.6678, "step": 504000 }, { "epoch": 14.16, "learning_rate": 0.00015871867443909205, "loss": 1.645, "step": 504500 }, { "epoch": 14.17, "learning_rate": 0.0001585780960056981, "loss": 1.6122, "step": 505000 }, { "epoch": 14.19, "learning_rate": 0.00015843751757230415, "loss": 1.6648, "step": 505500 }, { "epoch": 14.2, "learning_rate": 0.0001582969391389102, "loss": 1.634, "step": 506000 }, { "epoch": 14.21, "learning_rate": 0.00015815636070551627, "loss": 1.6539, "step": 506500 }, { "epoch": 14.23, "learning_rate": 0.00015801578227212236, "loss": 1.6509, "step": 507000 }, { "epoch": 14.24, "learning_rate": 0.00015787520383872842, "loss": 1.6445, "step": 507500 }, { "epoch": 14.26, "learning_rate": 0.00015773462540533449, "loss": 1.6699, "step": 508000 }, { "epoch": 14.27, "learning_rate": 0.00015759404697194055, "loss": 1.6545, "step": 508500 }, { "epoch": 14.28, "learning_rate": 0.00015745346853854658, "loss": 1.6798, "step": 509000 }, { "epoch": 14.3, "learning_rate": 0.00015731289010515264, "loss": 1.6795, "step": 509500 }, { "epoch": 14.31, "learning_rate": 0.0001571723116717587, "loss": 1.6626, "step": 510000 }, { "epoch": 14.33, "learning_rate": 0.00015703173323836477, "loss": 1.675, "step": 510500 }, { "epoch": 14.34, "learning_rate": 0.00015689115480497083, "loss": 1.6708, "step": 511000 }, { "epoch": 14.35, "learning_rate": 0.00015675057637157692, "loss": 1.6486, "step": 511500 }, { "epoch": 14.37, "learning_rate": 0.00015660999793818298, "loss": 1.6444, "step": 512000 }, { "epoch": 14.38, "learning_rate": 0.00015646941950478904, "loss": 1.6837, "step": 512500 }, { "epoch": 14.4, "learning_rate": 0.00015632884107139508, "loss": 1.6924, "step": 513000 }, { "epoch": 14.41, "learning_rate": 0.00015618826263800114, "loss": 1.6624, "step": 513500 }, { "epoch": 14.42, "learning_rate": 0.0001560476842046072, "loss": 1.6541, "step": 514000 }, { "epoch": 14.44, "learning_rate": 0.00015590710577121326, "loss": 1.6492, "step": 514500 }, { "epoch": 14.45, "learning_rate": 0.00015576652733781933, "loss": 1.6661, "step": 515000 }, { "epoch": 14.47, "learning_rate": 0.00015562594890442541, "loss": 1.6671, "step": 515500 }, { "epoch": 14.48, "learning_rate": 0.00015548537047103148, "loss": 1.6869, "step": 516000 }, { "epoch": 14.49, "learning_rate": 0.00015534479203763754, "loss": 1.6793, "step": 516500 }, { "epoch": 14.51, "learning_rate": 0.00015520421360424357, "loss": 1.635, "step": 517000 }, { "epoch": 14.52, "learning_rate": 0.00015506363517084964, "loss": 1.6433, "step": 517500 }, { "epoch": 14.54, "learning_rate": 0.0001549230567374557, "loss": 1.6893, "step": 518000 }, { "epoch": 14.55, "learning_rate": 0.00015478247830406176, "loss": 1.6515, "step": 518500 }, { "epoch": 14.56, "learning_rate": 0.00015464189987066782, "loss": 1.6721, "step": 519000 }, { "epoch": 14.58, "learning_rate": 0.00015450132143727388, "loss": 1.6938, "step": 519500 }, { "epoch": 14.59, "learning_rate": 0.00015436074300387997, "loss": 1.6508, "step": 520000 }, { "epoch": 14.61, "learning_rate": 0.00015422016457048603, "loss": 1.6625, "step": 520500 }, { "epoch": 14.62, "learning_rate": 0.00015407958613709207, "loss": 1.6956, "step": 521000 }, { "epoch": 14.63, "learning_rate": 0.00015393900770369813, "loss": 1.6799, "step": 521500 }, { "epoch": 14.65, "learning_rate": 0.0001537984292703042, "loss": 1.6834, "step": 522000 }, { "epoch": 14.66, "learning_rate": 0.00015365785083691025, "loss": 1.6736, "step": 522500 }, { "epoch": 14.68, "learning_rate": 0.00015351727240351632, "loss": 1.6651, "step": 523000 }, { "epoch": 14.69, "learning_rate": 0.00015337669397012238, "loss": 1.6767, "step": 523500 }, { "epoch": 14.7, "learning_rate": 0.00015323611553672847, "loss": 1.6678, "step": 524000 }, { "epoch": 14.72, "learning_rate": 0.00015309553710333453, "loss": 1.6838, "step": 524500 }, { "epoch": 14.73, "learning_rate": 0.00015295495866994056, "loss": 1.6628, "step": 525000 }, { "epoch": 14.75, "learning_rate": 0.00015281438023654663, "loss": 1.6682, "step": 525500 }, { "epoch": 14.76, "learning_rate": 0.0001526738018031527, "loss": 1.6808, "step": 526000 }, { "epoch": 14.78, "learning_rate": 0.00015253322336975875, "loss": 1.6699, "step": 526500 }, { "epoch": 14.79, "learning_rate": 0.0001523926449363648, "loss": 1.6419, "step": 527000 }, { "epoch": 14.8, "learning_rate": 0.00015225206650297087, "loss": 1.6573, "step": 527500 }, { "epoch": 14.82, "learning_rate": 0.00015211148806957694, "loss": 1.6837, "step": 528000 }, { "epoch": 14.83, "learning_rate": 0.00015197090963618302, "loss": 1.6669, "step": 528500 }, { "epoch": 14.85, "learning_rate": 0.00015183033120278906, "loss": 1.6609, "step": 529000 }, { "epoch": 14.86, "learning_rate": 0.00015168975276939512, "loss": 1.7295, "step": 529500 }, { "epoch": 14.87, "learning_rate": 0.00015154917433600118, "loss": 1.6872, "step": 530000 }, { "epoch": 14.89, "learning_rate": 0.00015140859590260725, "loss": 1.6727, "step": 530500 }, { "epoch": 14.9, "learning_rate": 0.0001512680174692133, "loss": 1.6716, "step": 531000 }, { "epoch": 14.92, "learning_rate": 0.00015112743903581937, "loss": 1.6782, "step": 531500 }, { "epoch": 14.93, "learning_rate": 0.00015098686060242543, "loss": 1.6883, "step": 532000 }, { "epoch": 14.94, "learning_rate": 0.00015084628216903152, "loss": 1.6422, "step": 532500 }, { "epoch": 14.96, "learning_rate": 0.00015070570373563758, "loss": 1.6991, "step": 533000 }, { "epoch": 14.97, "learning_rate": 0.00015056512530224362, "loss": 1.6951, "step": 533500 }, { "epoch": 14.99, "learning_rate": 0.00015042454686884968, "loss": 1.6697, "step": 534000 }, { "epoch": 15.0, "learning_rate": 0.00015028396843545574, "loss": 1.7002, "step": 534500 }, { "epoch": 15.01, "learning_rate": 0.0001501433900020618, "loss": 1.6082, "step": 535000 }, { "epoch": 15.03, "learning_rate": 0.00015000281156866786, "loss": 1.6386, "step": 535500 }, { "epoch": 15.04, "learning_rate": 0.00014986223313527393, "loss": 1.6434, "step": 536000 }, { "epoch": 15.06, "learning_rate": 0.00014972165470188, "loss": 1.6171, "step": 536500 }, { "epoch": 15.07, "learning_rate": 0.00014958107626848605, "loss": 1.5879, "step": 537000 }, { "epoch": 15.08, "learning_rate": 0.0001494404978350921, "loss": 1.6285, "step": 537500 }, { "epoch": 15.1, "learning_rate": 0.00014929991940169817, "loss": 1.6323, "step": 538000 }, { "epoch": 15.11, "learning_rate": 0.00014915934096830424, "loss": 1.6375, "step": 538500 }, { "epoch": 15.13, "learning_rate": 0.0001490187625349103, "loss": 1.6198, "step": 539000 }, { "epoch": 15.14, "learning_rate": 0.00014887818410151636, "loss": 1.6497, "step": 539500 }, { "epoch": 15.15, "learning_rate": 0.00014873760566812242, "loss": 1.6025, "step": 540000 }, { "epoch": 15.17, "learning_rate": 0.00014859702723472848, "loss": 1.642, "step": 540500 }, { "epoch": 15.18, "learning_rate": 0.00014845644880133455, "loss": 1.6113, "step": 541000 }, { "epoch": 15.2, "learning_rate": 0.0001483158703679406, "loss": 1.6269, "step": 541500 }, { "epoch": 15.21, "learning_rate": 0.00014817529193454667, "loss": 1.6071, "step": 542000 }, { "epoch": 15.22, "learning_rate": 0.00014803471350115273, "loss": 1.6044, "step": 542500 }, { "epoch": 15.24, "learning_rate": 0.0001478941350677588, "loss": 1.6428, "step": 543000 }, { "epoch": 15.25, "learning_rate": 0.00014775355663436486, "loss": 1.5916, "step": 543500 }, { "epoch": 15.27, "learning_rate": 0.00014761297820097092, "loss": 1.6241, "step": 544000 }, { "epoch": 15.28, "learning_rate": 0.00014747239976757698, "loss": 1.6256, "step": 544500 }, { "epoch": 15.29, "learning_rate": 0.00014733182133418304, "loss": 1.6019, "step": 545000 }, { "epoch": 15.31, "learning_rate": 0.0001471912429007891, "loss": 1.6239, "step": 545500 }, { "epoch": 15.32, "learning_rate": 0.00014705066446739517, "loss": 1.6393, "step": 546000 }, { "epoch": 15.34, "learning_rate": 0.00014691008603400123, "loss": 1.6319, "step": 546500 }, { "epoch": 15.35, "learning_rate": 0.0001467695076006073, "loss": 1.6056, "step": 547000 }, { "epoch": 15.36, "learning_rate": 0.00014662892916721335, "loss": 1.6102, "step": 547500 }, { "epoch": 15.38, "learning_rate": 0.0001464883507338194, "loss": 1.6085, "step": 548000 }, { "epoch": 15.39, "learning_rate": 0.00014634777230042547, "loss": 1.6607, "step": 548500 }, { "epoch": 15.41, "learning_rate": 0.00014620719386703154, "loss": 1.6512, "step": 549000 }, { "epoch": 15.42, "learning_rate": 0.0001460666154336376, "loss": 1.6552, "step": 549500 }, { "epoch": 15.43, "learning_rate": 0.00014592603700024366, "loss": 1.618, "step": 550000 }, { "epoch": 15.45, "learning_rate": 0.00014578545856684972, "loss": 1.6183, "step": 550500 }, { "epoch": 15.46, "learning_rate": 0.00014564488013345578, "loss": 1.6328, "step": 551000 }, { "epoch": 15.48, "learning_rate": 0.00014550430170006185, "loss": 1.6187, "step": 551500 }, { "epoch": 15.49, "learning_rate": 0.0001453637232666679, "loss": 1.6064, "step": 552000 }, { "epoch": 15.5, "learning_rate": 0.00014522314483327397, "loss": 1.6181, "step": 552500 }, { "epoch": 15.52, "learning_rate": 0.00014508256639988003, "loss": 1.6118, "step": 553000 }, { "epoch": 15.53, "learning_rate": 0.0001449419879664861, "loss": 1.6387, "step": 553500 }, { "epoch": 15.55, "learning_rate": 0.00014480140953309216, "loss": 1.6194, "step": 554000 }, { "epoch": 15.56, "learning_rate": 0.00014466083109969822, "loss": 1.6419, "step": 554500 }, { "epoch": 15.57, "learning_rate": 0.00014452025266630428, "loss": 1.6443, "step": 555000 }, { "epoch": 15.59, "learning_rate": 0.00014437967423291034, "loss": 1.6379, "step": 555500 }, { "epoch": 15.6, "learning_rate": 0.0001442390957995164, "loss": 1.617, "step": 556000 }, { "epoch": 15.62, "learning_rate": 0.00014409851736612244, "loss": 1.6325, "step": 556500 }, { "epoch": 15.63, "learning_rate": 0.00014395793893272853, "loss": 1.6373, "step": 557000 }, { "epoch": 15.64, "learning_rate": 0.0001438173604993346, "loss": 1.6221, "step": 557500 }, { "epoch": 15.66, "learning_rate": 0.00014367678206594065, "loss": 1.612, "step": 558000 }, { "epoch": 15.67, "learning_rate": 0.0001435362036325467, "loss": 1.6379, "step": 558500 }, { "epoch": 15.69, "learning_rate": 0.00014339562519915278, "loss": 1.6326, "step": 559000 }, { "epoch": 15.7, "learning_rate": 0.00014325504676575884, "loss": 1.6009, "step": 559500 }, { "epoch": 15.72, "learning_rate": 0.0001431144683323649, "loss": 1.6199, "step": 560000 }, { "epoch": 15.73, "learning_rate": 0.00014297388989897096, "loss": 1.6541, "step": 560500 }, { "epoch": 15.74, "learning_rate": 0.00014283331146557702, "loss": 1.6647, "step": 561000 }, { "epoch": 15.76, "learning_rate": 0.00014269273303218308, "loss": 1.6486, "step": 561500 }, { "epoch": 15.77, "learning_rate": 0.00014255215459878915, "loss": 1.6654, "step": 562000 }, { "epoch": 15.79, "learning_rate": 0.0001424115761653952, "loss": 1.6393, "step": 562500 }, { "epoch": 15.8, "learning_rate": 0.00014227099773200127, "loss": 1.6223, "step": 563000 }, { "epoch": 15.81, "learning_rate": 0.00014213041929860733, "loss": 1.677, "step": 563500 }, { "epoch": 15.83, "learning_rate": 0.0001419898408652134, "loss": 1.5877, "step": 564000 }, { "epoch": 15.84, "learning_rate": 0.00014184926243181946, "loss": 1.6096, "step": 564500 }, { "epoch": 15.86, "learning_rate": 0.0001417086839984255, "loss": 1.6199, "step": 565000 }, { "epoch": 15.87, "learning_rate": 0.00014156810556503158, "loss": 1.609, "step": 565500 }, { "epoch": 15.88, "learning_rate": 0.00014142752713163764, "loss": 1.6239, "step": 566000 }, { "epoch": 15.9, "learning_rate": 0.0001412869486982437, "loss": 1.6299, "step": 566500 }, { "epoch": 15.91, "learning_rate": 0.00014114637026484974, "loss": 1.6673, "step": 567000 }, { "epoch": 15.93, "learning_rate": 0.00014100579183145583, "loss": 1.6265, "step": 567500 }, { "epoch": 15.94, "learning_rate": 0.0001408652133980619, "loss": 1.6383, "step": 568000 }, { "epoch": 15.95, "learning_rate": 0.00014072463496466795, "loss": 1.6294, "step": 568500 }, { "epoch": 15.97, "learning_rate": 0.000140584056531274, "loss": 1.6214, "step": 569000 }, { "epoch": 15.98, "learning_rate": 0.00014044347809788008, "loss": 1.6521, "step": 569500 }, { "epoch": 16.0, "learning_rate": 0.00014030289966448614, "loss": 1.6504, "step": 570000 }, { "epoch": 16.01, "learning_rate": 0.0001401623212310922, "loss": 1.5651, "step": 570500 }, { "epoch": 16.02, "learning_rate": 0.00014002174279769823, "loss": 1.5787, "step": 571000 }, { "epoch": 16.04, "learning_rate": 0.00013988116436430432, "loss": 1.5698, "step": 571500 }, { "epoch": 16.05, "learning_rate": 0.00013974058593091039, "loss": 1.5477, "step": 572000 }, { "epoch": 16.07, "learning_rate": 0.00013960000749751645, "loss": 1.5727, "step": 572500 }, { "epoch": 16.08, "learning_rate": 0.00013945942906412248, "loss": 1.5762, "step": 573000 }, { "epoch": 16.09, "learning_rate": 0.00013931885063072854, "loss": 1.5924, "step": 573500 }, { "epoch": 16.11, "learning_rate": 0.00013917827219733463, "loss": 1.5913, "step": 574000 }, { "epoch": 16.12, "learning_rate": 0.0001390376937639407, "loss": 1.581, "step": 574500 }, { "epoch": 16.14, "learning_rate": 0.00013889711533054673, "loss": 1.5518, "step": 575000 }, { "epoch": 16.15, "learning_rate": 0.0001387565368971528, "loss": 1.5782, "step": 575500 }, { "epoch": 16.16, "learning_rate": 0.00013861595846375888, "loss": 1.5733, "step": 576000 }, { "epoch": 16.18, "learning_rate": 0.00013847538003036494, "loss": 1.5869, "step": 576500 }, { "epoch": 16.19, "learning_rate": 0.00013833480159697098, "loss": 1.5828, "step": 577000 }, { "epoch": 16.21, "learning_rate": 0.00013819422316357704, "loss": 1.5645, "step": 577500 }, { "epoch": 16.22, "learning_rate": 0.00013805364473018313, "loss": 1.5653, "step": 578000 }, { "epoch": 16.23, "learning_rate": 0.0001379130662967892, "loss": 1.601, "step": 578500 }, { "epoch": 16.25, "learning_rate": 0.00013777248786339523, "loss": 1.5955, "step": 579000 }, { "epoch": 16.26, "learning_rate": 0.0001376319094300013, "loss": 1.5793, "step": 579500 }, { "epoch": 16.28, "learning_rate": 0.00013749133099660735, "loss": 1.5702, "step": 580000 }, { "epoch": 16.29, "learning_rate": 0.00013735075256321344, "loss": 1.5841, "step": 580500 }, { "epoch": 16.3, "learning_rate": 0.00013721017412981947, "loss": 1.5379, "step": 581000 }, { "epoch": 16.32, "learning_rate": 0.00013706959569642554, "loss": 1.5676, "step": 581500 }, { "epoch": 16.33, "learning_rate": 0.0001369290172630316, "loss": 1.5905, "step": 582000 }, { "epoch": 16.35, "learning_rate": 0.00013678843882963769, "loss": 1.6139, "step": 582500 }, { "epoch": 16.36, "learning_rate": 0.00013664786039624372, "loss": 1.608, "step": 583000 }, { "epoch": 16.37, "learning_rate": 0.00013650728196284978, "loss": 1.5832, "step": 583500 }, { "epoch": 16.39, "learning_rate": 0.00013636670352945584, "loss": 1.6174, "step": 584000 }, { "epoch": 16.4, "learning_rate": 0.00013622612509606193, "loss": 1.5895, "step": 584500 }, { "epoch": 16.42, "learning_rate": 0.00013608554666266797, "loss": 1.5861, "step": 585000 }, { "epoch": 16.43, "learning_rate": 0.00013594496822927403, "loss": 1.5796, "step": 585500 }, { "epoch": 16.44, "learning_rate": 0.0001358043897958801, "loss": 1.5854, "step": 586000 }, { "epoch": 16.46, "learning_rate": 0.00013566381136248618, "loss": 1.5782, "step": 586500 }, { "epoch": 16.47, "learning_rate": 0.00013552323292909222, "loss": 1.5978, "step": 587000 }, { "epoch": 16.49, "learning_rate": 0.00013538265449569828, "loss": 1.6099, "step": 587500 }, { "epoch": 16.5, "learning_rate": 0.00013524207606230434, "loss": 1.6093, "step": 588000 }, { "epoch": 16.51, "learning_rate": 0.0001351014976289104, "loss": 1.6228, "step": 588500 }, { "epoch": 16.53, "learning_rate": 0.0001349609191955165, "loss": 1.5772, "step": 589000 }, { "epoch": 16.54, "learning_rate": 0.00013482034076212253, "loss": 1.5884, "step": 589500 }, { "epoch": 16.56, "learning_rate": 0.0001346797623287286, "loss": 1.5894, "step": 590000 }, { "epoch": 16.57, "learning_rate": 0.00013453918389533465, "loss": 1.5949, "step": 590500 }, { "epoch": 16.59, "learning_rate": 0.00013439860546194074, "loss": 1.5949, "step": 591000 }, { "epoch": 16.6, "learning_rate": 0.00013425802702854677, "loss": 1.6063, "step": 591500 }, { "epoch": 16.61, "learning_rate": 0.00013411744859515284, "loss": 1.5743, "step": 592000 }, { "epoch": 16.63, "learning_rate": 0.0001339768701617589, "loss": 1.586, "step": 592500 }, { "epoch": 16.64, "learning_rate": 0.000133836291728365, "loss": 1.5523, "step": 593000 }, { "epoch": 16.66, "learning_rate": 0.00013369571329497102, "loss": 1.5871, "step": 593500 }, { "epoch": 16.67, "learning_rate": 0.00013355513486157708, "loss": 1.5904, "step": 594000 }, { "epoch": 16.68, "learning_rate": 0.00013341455642818315, "loss": 1.5936, "step": 594500 }, { "epoch": 16.7, "learning_rate": 0.00013327397799478923, "loss": 1.6254, "step": 595000 }, { "epoch": 16.71, "learning_rate": 0.00013313339956139527, "loss": 1.5805, "step": 595500 }, { "epoch": 16.73, "learning_rate": 0.00013299282112800133, "loss": 1.6077, "step": 596000 }, { "epoch": 16.74, "learning_rate": 0.0001328522426946074, "loss": 1.5784, "step": 596500 }, { "epoch": 16.75, "learning_rate": 0.00013271166426121345, "loss": 1.585, "step": 597000 }, { "epoch": 16.77, "learning_rate": 0.00013257108582781952, "loss": 1.6174, "step": 597500 }, { "epoch": 16.78, "learning_rate": 0.00013243050739442558, "loss": 1.5657, "step": 598000 }, { "epoch": 16.8, "learning_rate": 0.00013228992896103164, "loss": 1.6194, "step": 598500 }, { "epoch": 16.81, "learning_rate": 0.0001321493505276377, "loss": 1.5821, "step": 599000 }, { "epoch": 16.82, "learning_rate": 0.00013200877209424376, "loss": 1.5954, "step": 599500 }, { "epoch": 16.84, "learning_rate": 0.00013186819366084983, "loss": 1.6197, "step": 600000 }, { "epoch": 16.85, "learning_rate": 0.0001317276152274559, "loss": 1.5936, "step": 600500 }, { "epoch": 16.87, "learning_rate": 0.00013158703679406195, "loss": 1.6081, "step": 601000 }, { "epoch": 16.88, "learning_rate": 0.000131446458360668, "loss": 1.6012, "step": 601500 }, { "epoch": 16.89, "learning_rate": 0.00013130587992727407, "loss": 1.6169, "step": 602000 }, { "epoch": 16.91, "learning_rate": 0.00013116530149388014, "loss": 1.6046, "step": 602500 }, { "epoch": 16.92, "learning_rate": 0.0001310247230604862, "loss": 1.6007, "step": 603000 }, { "epoch": 16.94, "learning_rate": 0.00013088414462709226, "loss": 1.5971, "step": 603500 }, { "epoch": 16.95, "learning_rate": 0.00013074356619369832, "loss": 1.5821, "step": 604000 }, { "epoch": 16.96, "learning_rate": 0.00013060298776030438, "loss": 1.585, "step": 604500 }, { "epoch": 16.98, "learning_rate": 0.00013046240932691045, "loss": 1.617, "step": 605000 }, { "epoch": 16.99, "learning_rate": 0.0001303218308935165, "loss": 1.6341, "step": 605500 }, { "epoch": 17.01, "learning_rate": 0.00013018125246012257, "loss": 1.5642, "step": 606000 }, { "epoch": 17.02, "learning_rate": 0.00013004067402672863, "loss": 1.5107, "step": 606500 }, { "epoch": 17.03, "learning_rate": 0.0001299000955933347, "loss": 1.5292, "step": 607000 }, { "epoch": 17.05, "learning_rate": 0.00012975951715994076, "loss": 1.5316, "step": 607500 }, { "epoch": 17.06, "learning_rate": 0.00012961893872654682, "loss": 1.5349, "step": 608000 }, { "epoch": 17.08, "learning_rate": 0.00012947836029315288, "loss": 1.5068, "step": 608500 }, { "epoch": 17.09, "learning_rate": 0.00012933778185975894, "loss": 1.5212, "step": 609000 }, { "epoch": 17.1, "learning_rate": 0.000129197203426365, "loss": 1.5319, "step": 609500 }, { "epoch": 17.12, "learning_rate": 0.00012905662499297107, "loss": 1.5505, "step": 610000 }, { "epoch": 17.13, "learning_rate": 0.00012891604655957713, "loss": 1.4921, "step": 610500 }, { "epoch": 17.15, "learning_rate": 0.0001287754681261832, "loss": 1.5233, "step": 611000 }, { "epoch": 17.16, "learning_rate": 0.00012863488969278925, "loss": 1.5579, "step": 611500 }, { "epoch": 17.17, "learning_rate": 0.0001284943112593953, "loss": 1.5588, "step": 612000 }, { "epoch": 17.19, "learning_rate": 0.00012835373282600137, "loss": 1.5452, "step": 612500 }, { "epoch": 17.2, "learning_rate": 0.00012821315439260744, "loss": 1.5415, "step": 613000 }, { "epoch": 17.22, "learning_rate": 0.0001280725759592135, "loss": 1.5208, "step": 613500 }, { "epoch": 17.23, "learning_rate": 0.00012793199752581956, "loss": 1.5582, "step": 614000 }, { "epoch": 17.24, "learning_rate": 0.00012779141909242562, "loss": 1.5683, "step": 614500 }, { "epoch": 17.26, "learning_rate": 0.00012765084065903168, "loss": 1.534, "step": 615000 }, { "epoch": 17.27, "learning_rate": 0.00012751026222563775, "loss": 1.5108, "step": 615500 }, { "epoch": 17.29, "learning_rate": 0.0001273696837922438, "loss": 1.5363, "step": 616000 }, { "epoch": 17.3, "learning_rate": 0.00012722910535884987, "loss": 1.5752, "step": 616500 }, { "epoch": 17.31, "learning_rate": 0.00012708852692545593, "loss": 1.5703, "step": 617000 }, { "epoch": 17.33, "learning_rate": 0.000126947948492062, "loss": 1.5406, "step": 617500 }, { "epoch": 17.34, "learning_rate": 0.00012680737005866806, "loss": 1.5417, "step": 618000 }, { "epoch": 17.36, "learning_rate": 0.00012666679162527412, "loss": 1.5452, "step": 618500 }, { "epoch": 17.37, "learning_rate": 0.00012652621319188018, "loss": 1.5571, "step": 619000 }, { "epoch": 17.38, "learning_rate": 0.00012638563475848624, "loss": 1.5484, "step": 619500 }, { "epoch": 17.4, "learning_rate": 0.0001262450563250923, "loss": 1.5979, "step": 620000 }, { "epoch": 17.41, "learning_rate": 0.00012610447789169837, "loss": 1.5486, "step": 620500 }, { "epoch": 17.43, "learning_rate": 0.00012596389945830443, "loss": 1.4974, "step": 621000 }, { "epoch": 17.44, "learning_rate": 0.0001258233210249105, "loss": 1.5237, "step": 621500 }, { "epoch": 17.45, "learning_rate": 0.00012568274259151655, "loss": 1.5502, "step": 622000 }, { "epoch": 17.47, "learning_rate": 0.0001255421641581226, "loss": 1.5373, "step": 622500 }, { "epoch": 17.48, "learning_rate": 0.00012540158572472868, "loss": 1.5406, "step": 623000 }, { "epoch": 17.5, "learning_rate": 0.00012526100729133474, "loss": 1.5559, "step": 623500 }, { "epoch": 17.51, "learning_rate": 0.0001251204288579408, "loss": 1.5506, "step": 624000 }, { "epoch": 17.53, "learning_rate": 0.00012497985042454686, "loss": 1.5421, "step": 624500 }, { "epoch": 17.54, "learning_rate": 0.00012483927199115292, "loss": 1.5631, "step": 625000 }, { "epoch": 17.55, "learning_rate": 0.00012469869355775899, "loss": 1.5374, "step": 625500 }, { "epoch": 17.57, "learning_rate": 0.00012455811512436505, "loss": 1.5681, "step": 626000 }, { "epoch": 17.58, "learning_rate": 0.0001244175366909711, "loss": 1.541, "step": 626500 }, { "epoch": 17.6, "learning_rate": 0.00012427695825757717, "loss": 1.5325, "step": 627000 }, { "epoch": 17.61, "learning_rate": 0.00012413637982418323, "loss": 1.5226, "step": 627500 }, { "epoch": 17.62, "learning_rate": 0.0001239958013907893, "loss": 1.5524, "step": 628000 }, { "epoch": 17.64, "learning_rate": 0.00012385522295739536, "loss": 1.6071, "step": 628500 }, { "epoch": 17.65, "learning_rate": 0.00012371464452400142, "loss": 1.5639, "step": 629000 }, { "epoch": 17.67, "learning_rate": 0.00012357406609060748, "loss": 1.5428, "step": 629500 }, { "epoch": 17.68, "learning_rate": 0.00012343348765721354, "loss": 1.5646, "step": 630000 }, { "epoch": 17.69, "learning_rate": 0.0001232929092238196, "loss": 1.5568, "step": 630500 }, { "epoch": 17.71, "learning_rate": 0.00012315233079042567, "loss": 1.5319, "step": 631000 }, { "epoch": 17.72, "learning_rate": 0.00012301175235703173, "loss": 1.5503, "step": 631500 }, { "epoch": 17.74, "learning_rate": 0.0001228711739236378, "loss": 1.5291, "step": 632000 }, { "epoch": 17.75, "learning_rate": 0.00012273059549024385, "loss": 1.5911, "step": 632500 }, { "epoch": 17.76, "learning_rate": 0.00012259001705684991, "loss": 1.5696, "step": 633000 }, { "epoch": 17.78, "learning_rate": 0.00012244943862345598, "loss": 1.5498, "step": 633500 }, { "epoch": 17.79, "learning_rate": 0.00012230886019006204, "loss": 1.5292, "step": 634000 }, { "epoch": 17.81, "learning_rate": 0.0001221682817566681, "loss": 1.5606, "step": 634500 }, { "epoch": 17.82, "learning_rate": 0.00012202770332327416, "loss": 1.5626, "step": 635000 }, { "epoch": 17.83, "learning_rate": 0.00012188712488988022, "loss": 1.5849, "step": 635500 }, { "epoch": 17.85, "learning_rate": 0.00012174654645648627, "loss": 1.5534, "step": 636000 }, { "epoch": 17.86, "learning_rate": 0.00012160596802309233, "loss": 1.5346, "step": 636500 }, { "epoch": 17.88, "learning_rate": 0.00012146538958969841, "loss": 1.5747, "step": 637000 }, { "epoch": 17.89, "learning_rate": 0.00012132481115630447, "loss": 1.5572, "step": 637500 }, { "epoch": 17.9, "learning_rate": 0.00012118423272291052, "loss": 1.5445, "step": 638000 }, { "epoch": 17.92, "learning_rate": 0.00012104365428951658, "loss": 1.5555, "step": 638500 }, { "epoch": 17.93, "learning_rate": 0.00012090307585612264, "loss": 1.5335, "step": 639000 }, { "epoch": 17.95, "learning_rate": 0.00012076249742272872, "loss": 1.5836, "step": 639500 }, { "epoch": 17.96, "learning_rate": 0.00012062191898933477, "loss": 1.5538, "step": 640000 }, { "epoch": 17.97, "learning_rate": 0.00012048134055594083, "loss": 1.5627, "step": 640500 }, { "epoch": 17.99, "learning_rate": 0.00012034076212254689, "loss": 1.5717, "step": 641000 }, { "epoch": 18.0, "learning_rate": 0.00012020018368915297, "loss": 1.5152, "step": 641500 }, { "epoch": 18.02, "learning_rate": 0.00012005960525575902, "loss": 1.4885, "step": 642000 }, { "epoch": 18.03, "learning_rate": 0.00011991902682236508, "loss": 1.4731, "step": 642500 }, { "epoch": 18.04, "learning_rate": 0.00011977844838897114, "loss": 1.5073, "step": 643000 }, { "epoch": 18.06, "learning_rate": 0.00011963786995557721, "loss": 1.4917, "step": 643500 }, { "epoch": 18.07, "learning_rate": 0.00011949729152218326, "loss": 1.4906, "step": 644000 }, { "epoch": 18.09, "learning_rate": 0.00011935671308878932, "loss": 1.5095, "step": 644500 }, { "epoch": 18.1, "learning_rate": 0.00011921613465539539, "loss": 1.4915, "step": 645000 }, { "epoch": 18.11, "learning_rate": 0.00011907555622200146, "loss": 1.5024, "step": 645500 }, { "epoch": 18.13, "learning_rate": 0.00011893497778860751, "loss": 1.5251, "step": 646000 }, { "epoch": 18.14, "learning_rate": 0.00011879439935521357, "loss": 1.4836, "step": 646500 }, { "epoch": 18.16, "learning_rate": 0.00011865382092181963, "loss": 1.5216, "step": 647000 }, { "epoch": 18.17, "learning_rate": 0.0001185132424884257, "loss": 1.4953, "step": 647500 }, { "epoch": 18.18, "learning_rate": 0.00011837266405503177, "loss": 1.528, "step": 648000 }, { "epoch": 18.2, "learning_rate": 0.00011823208562163782, "loss": 1.5212, "step": 648500 }, { "epoch": 18.21, "learning_rate": 0.00011809150718824388, "loss": 1.5065, "step": 649000 }, { "epoch": 18.23, "learning_rate": 0.00011795092875484994, "loss": 1.5281, "step": 649500 }, { "epoch": 18.24, "learning_rate": 0.00011781035032145602, "loss": 1.5104, "step": 650000 }, { "epoch": 18.25, "learning_rate": 0.00011766977188806207, "loss": 1.4646, "step": 650500 }, { "epoch": 18.27, "learning_rate": 0.00011752919345466813, "loss": 1.4751, "step": 651000 }, { "epoch": 18.28, "learning_rate": 0.00011738861502127419, "loss": 1.5132, "step": 651500 }, { "epoch": 18.3, "learning_rate": 0.00011724803658788027, "loss": 1.4984, "step": 652000 }, { "epoch": 18.31, "learning_rate": 0.00011710745815448632, "loss": 1.4835, "step": 652500 }, { "epoch": 18.32, "learning_rate": 0.00011696687972109238, "loss": 1.5207, "step": 653000 }, { "epoch": 18.34, "learning_rate": 0.00011682630128769844, "loss": 1.4979, "step": 653500 }, { "epoch": 18.35, "learning_rate": 0.00011668572285430452, "loss": 1.5274, "step": 654000 }, { "epoch": 18.37, "learning_rate": 0.00011654514442091056, "loss": 1.5101, "step": 654500 }, { "epoch": 18.38, "learning_rate": 0.00011640456598751663, "loss": 1.5121, "step": 655000 }, { "epoch": 18.4, "learning_rate": 0.00011626398755412269, "loss": 1.5177, "step": 655500 }, { "epoch": 18.41, "learning_rate": 0.00011612340912072875, "loss": 1.4977, "step": 656000 }, { "epoch": 18.42, "learning_rate": 0.00011598283068733481, "loss": 1.5221, "step": 656500 }, { "epoch": 18.44, "learning_rate": 0.00011584225225394087, "loss": 1.5066, "step": 657000 }, { "epoch": 18.45, "learning_rate": 0.00011570167382054694, "loss": 1.4997, "step": 657500 }, { "epoch": 18.47, "learning_rate": 0.000115561095387153, "loss": 1.5211, "step": 658000 }, { "epoch": 18.48, "learning_rate": 0.00011542051695375905, "loss": 1.5144, "step": 658500 }, { "epoch": 18.49, "learning_rate": 0.00011527993852036512, "loss": 1.4994, "step": 659000 }, { "epoch": 18.51, "learning_rate": 0.00011513936008697118, "loss": 1.5215, "step": 659500 }, { "epoch": 18.52, "learning_rate": 0.00011499878165357724, "loss": 1.5035, "step": 660000 }, { "epoch": 18.54, "learning_rate": 0.00011485820322018329, "loss": 1.5063, "step": 660500 }, { "epoch": 18.55, "learning_rate": 0.00011471762478678937, "loss": 1.4783, "step": 661000 }, { "epoch": 18.56, "learning_rate": 0.00011457704635339543, "loss": 1.4942, "step": 661500 }, { "epoch": 18.58, "learning_rate": 0.00011443646792000149, "loss": 1.528, "step": 662000 }, { "epoch": 18.59, "learning_rate": 0.00011429588948660754, "loss": 1.5122, "step": 662500 }, { "epoch": 18.61, "learning_rate": 0.00011415531105321362, "loss": 1.5361, "step": 663000 }, { "epoch": 18.62, "learning_rate": 0.00011401473261981968, "loss": 1.5162, "step": 663500 }, { "epoch": 18.63, "learning_rate": 0.00011387415418642574, "loss": 1.4806, "step": 664000 }, { "epoch": 18.65, "learning_rate": 0.00011373357575303179, "loss": 1.496, "step": 664500 }, { "epoch": 18.66, "learning_rate": 0.00011359299731963786, "loss": 1.5381, "step": 665000 }, { "epoch": 18.68, "learning_rate": 0.00011345241888624393, "loss": 1.5123, "step": 665500 }, { "epoch": 18.69, "learning_rate": 0.00011331184045284999, "loss": 1.5367, "step": 666000 }, { "epoch": 18.7, "learning_rate": 0.00011317126201945604, "loss": 1.5061, "step": 666500 }, { "epoch": 18.72, "learning_rate": 0.0001130306835860621, "loss": 1.4939, "step": 667000 }, { "epoch": 18.73, "learning_rate": 0.00011289010515266817, "loss": 1.5094, "step": 667500 }, { "epoch": 18.75, "learning_rate": 0.00011274952671927424, "loss": 1.5351, "step": 668000 }, { "epoch": 18.76, "learning_rate": 0.00011260894828588028, "loss": 1.5246, "step": 668500 }, { "epoch": 18.77, "learning_rate": 0.00011246836985248635, "loss": 1.5233, "step": 669000 }, { "epoch": 18.79, "learning_rate": 0.00011232779141909242, "loss": 1.5041, "step": 669500 }, { "epoch": 18.8, "learning_rate": 0.00011218721298569848, "loss": 1.5076, "step": 670000 }, { "epoch": 18.82, "learning_rate": 0.00011204663455230453, "loss": 1.5281, "step": 670500 }, { "epoch": 18.83, "learning_rate": 0.0001119060561189106, "loss": 1.5359, "step": 671000 }, { "epoch": 18.84, "learning_rate": 0.00011176547768551667, "loss": 1.5241, "step": 671500 }, { "epoch": 18.86, "learning_rate": 0.00011162489925212273, "loss": 1.5, "step": 672000 }, { "epoch": 18.87, "learning_rate": 0.00011148432081872878, "loss": 1.4874, "step": 672500 }, { "epoch": 18.89, "learning_rate": 0.00011134374238533484, "loss": 1.5024, "step": 673000 }, { "epoch": 18.9, "learning_rate": 0.00011120316395194092, "loss": 1.5587, "step": 673500 }, { "epoch": 18.91, "learning_rate": 0.00011106258551854698, "loss": 1.5569, "step": 674000 }, { "epoch": 18.93, "learning_rate": 0.00011092200708515303, "loss": 1.5419, "step": 674500 }, { "epoch": 18.94, "learning_rate": 0.00011078142865175909, "loss": 1.4949, "step": 675000 }, { "epoch": 18.96, "learning_rate": 0.00011064085021836515, "loss": 1.5332, "step": 675500 }, { "epoch": 18.97, "learning_rate": 0.00011050027178497123, "loss": 1.5137, "step": 676000 }, { "epoch": 18.98, "learning_rate": 0.00011035969335157729, "loss": 1.537, "step": 676500 }, { "epoch": 19.0, "learning_rate": 0.00011021911491818334, "loss": 1.5142, "step": 677000 }, { "epoch": 19.01, "learning_rate": 0.0001100785364847894, "loss": 1.4425, "step": 677500 }, { "epoch": 19.03, "learning_rate": 0.00010993795805139547, "loss": 1.4679, "step": 678000 }, { "epoch": 19.04, "learning_rate": 0.00010979737961800154, "loss": 1.441, "step": 678500 }, { "epoch": 19.05, "learning_rate": 0.00010965680118460758, "loss": 1.4367, "step": 679000 }, { "epoch": 19.07, "learning_rate": 0.00010951622275121365, "loss": 1.4158, "step": 679500 }, { "epoch": 19.08, "learning_rate": 0.00010937564431781972, "loss": 1.467, "step": 680000 }, { "epoch": 19.1, "learning_rate": 0.00010923506588442578, "loss": 1.4936, "step": 680500 }, { "epoch": 19.11, "learning_rate": 0.00010909448745103183, "loss": 1.4517, "step": 681000 }, { "epoch": 19.12, "learning_rate": 0.0001089539090176379, "loss": 1.4532, "step": 681500 }, { "epoch": 19.14, "learning_rate": 0.00010881333058424396, "loss": 1.4788, "step": 682000 }, { "epoch": 19.15, "learning_rate": 0.00010867275215085003, "loss": 1.4477, "step": 682500 }, { "epoch": 19.17, "learning_rate": 0.00010853217371745608, "loss": 1.4405, "step": 683000 }, { "epoch": 19.18, "learning_rate": 0.00010839159528406214, "loss": 1.4977, "step": 683500 }, { "epoch": 19.19, "learning_rate": 0.0001082510168506682, "loss": 1.4801, "step": 684000 }, { "epoch": 19.21, "learning_rate": 0.00010811043841727428, "loss": 1.4558, "step": 684500 }, { "epoch": 19.22, "learning_rate": 0.00010796985998388033, "loss": 1.4634, "step": 685000 }, { "epoch": 19.24, "learning_rate": 0.00010782928155048639, "loss": 1.451, "step": 685500 }, { "epoch": 19.25, "learning_rate": 0.00010768870311709245, "loss": 1.4647, "step": 686000 }, { "epoch": 19.27, "learning_rate": 0.00010754812468369853, "loss": 1.458, "step": 686500 }, { "epoch": 19.28, "learning_rate": 0.00010740754625030458, "loss": 1.4842, "step": 687000 }, { "epoch": 19.29, "learning_rate": 0.00010726696781691064, "loss": 1.4794, "step": 687500 }, { "epoch": 19.31, "learning_rate": 0.0001071263893835167, "loss": 1.4627, "step": 688000 }, { "epoch": 19.32, "learning_rate": 0.00010698581095012277, "loss": 1.4567, "step": 688500 }, { "epoch": 19.34, "learning_rate": 0.00010684523251672882, "loss": 1.4916, "step": 689000 }, { "epoch": 19.35, "learning_rate": 0.00010670465408333489, "loss": 1.4865, "step": 689500 }, { "epoch": 19.36, "learning_rate": 0.00010656407564994095, "loss": 1.4926, "step": 690000 }, { "epoch": 19.38, "learning_rate": 0.00010642349721654701, "loss": 1.4923, "step": 690500 }, { "epoch": 19.39, "learning_rate": 0.00010628291878315307, "loss": 1.4813, "step": 691000 }, { "epoch": 19.41, "learning_rate": 0.00010614234034975913, "loss": 1.4633, "step": 691500 }, { "epoch": 19.42, "learning_rate": 0.0001060017619163652, "loss": 1.4653, "step": 692000 }, { "epoch": 19.43, "learning_rate": 0.00010586118348297126, "loss": 1.4518, "step": 692500 }, { "epoch": 19.45, "learning_rate": 0.0001057206050495773, "loss": 1.4856, "step": 693000 }, { "epoch": 19.46, "learning_rate": 0.00010558002661618338, "loss": 1.4646, "step": 693500 }, { "epoch": 19.48, "learning_rate": 0.00010543944818278944, "loss": 1.4714, "step": 694000 }, { "epoch": 19.49, "learning_rate": 0.0001052988697493955, "loss": 1.4602, "step": 694500 }, { "epoch": 19.5, "learning_rate": 0.00010515829131600155, "loss": 1.4555, "step": 695000 }, { "epoch": 19.52, "learning_rate": 0.00010501771288260763, "loss": 1.4647, "step": 695500 }, { "epoch": 19.53, "learning_rate": 0.00010487713444921369, "loss": 1.4686, "step": 696000 }, { "epoch": 19.55, "learning_rate": 0.00010473655601581975, "loss": 1.4484, "step": 696500 }, { "epoch": 19.56, "learning_rate": 0.0001045959775824258, "loss": 1.488, "step": 697000 }, { "epoch": 19.57, "learning_rate": 0.00010445539914903188, "loss": 1.4916, "step": 697500 }, { "epoch": 19.59, "learning_rate": 0.00010431482071563794, "loss": 1.4915, "step": 698000 }, { "epoch": 19.6, "learning_rate": 0.000104174242282244, "loss": 1.4803, "step": 698500 }, { "epoch": 19.62, "learning_rate": 0.00010403366384885005, "loss": 1.5062, "step": 699000 }, { "epoch": 19.63, "learning_rate": 0.00010389308541545612, "loss": 1.4809, "step": 699500 }, { "epoch": 19.64, "learning_rate": 0.00010375250698206219, "loss": 1.4766, "step": 700000 }, { "epoch": 19.66, "learning_rate": 0.00010361192854866825, "loss": 1.4997, "step": 700500 }, { "epoch": 19.67, "learning_rate": 0.0001034713501152743, "loss": 1.4345, "step": 701000 }, { "epoch": 19.69, "learning_rate": 0.00010333077168188036, "loss": 1.4837, "step": 701500 }, { "epoch": 19.7, "learning_rate": 0.00010319019324848643, "loss": 1.5083, "step": 702000 }, { "epoch": 19.71, "learning_rate": 0.0001030496148150925, "loss": 1.4603, "step": 702500 }, { "epoch": 19.73, "learning_rate": 0.00010290903638169854, "loss": 1.52, "step": 703000 }, { "epoch": 19.74, "learning_rate": 0.0001027684579483046, "loss": 1.4754, "step": 703500 }, { "epoch": 19.76, "learning_rate": 0.00010262787951491068, "loss": 1.4833, "step": 704000 }, { "epoch": 19.77, "learning_rate": 0.00010248730108151674, "loss": 1.484, "step": 704500 }, { "epoch": 19.78, "learning_rate": 0.0001023467226481228, "loss": 1.4821, "step": 705000 }, { "epoch": 19.8, "learning_rate": 0.00010220614421472885, "loss": 1.504, "step": 705500 }, { "epoch": 19.81, "learning_rate": 0.00010206556578133493, "loss": 1.4986, "step": 706000 }, { "epoch": 19.83, "learning_rate": 0.00010192498734794099, "loss": 1.5064, "step": 706500 }, { "epoch": 19.84, "learning_rate": 0.00010178440891454705, "loss": 1.4506, "step": 707000 }, { "epoch": 19.85, "learning_rate": 0.0001016438304811531, "loss": 1.4618, "step": 707500 }, { "epoch": 19.87, "learning_rate": 0.00010150325204775918, "loss": 1.4656, "step": 708000 }, { "epoch": 19.88, "learning_rate": 0.00010136267361436524, "loss": 1.477, "step": 708500 }, { "epoch": 19.9, "learning_rate": 0.0001012220951809713, "loss": 1.4651, "step": 709000 }, { "epoch": 19.91, "learning_rate": 0.00010108151674757735, "loss": 1.484, "step": 709500 }, { "epoch": 19.92, "learning_rate": 0.00010094093831418341, "loss": 1.4566, "step": 710000 }, { "epoch": 19.94, "learning_rate": 0.00010080035988078949, "loss": 1.48, "step": 710500 }, { "epoch": 19.95, "learning_rate": 0.00010065978144739555, "loss": 1.484, "step": 711000 }, { "epoch": 19.97, "learning_rate": 0.0001005192030140016, "loss": 1.4938, "step": 711500 }, { "epoch": 19.98, "learning_rate": 0.00010037862458060766, "loss": 1.4896, "step": 712000 }, { "epoch": 19.99, "learning_rate": 0.00010023804614721373, "loss": 1.4827, "step": 712500 }, { "epoch": 20.01, "learning_rate": 0.0001000974677138198, "loss": 1.4364, "step": 713000 }, { "epoch": 20.02, "learning_rate": 9.995688928042584e-05, "loss": 1.4643, "step": 713500 }, { "epoch": 20.04, "learning_rate": 9.98163108470319e-05, "loss": 1.4105, "step": 714000 }, { "epoch": 20.05, "learning_rate": 9.967573241363798e-05, "loss": 1.4383, "step": 714500 }, { "epoch": 20.06, "learning_rate": 9.953515398024404e-05, "loss": 1.4317, "step": 715000 }, { "epoch": 20.08, "learning_rate": 9.939457554685009e-05, "loss": 1.4217, "step": 715500 }, { "epoch": 20.09, "learning_rate": 9.925399711345615e-05, "loss": 1.4363, "step": 716000 }, { "epoch": 20.11, "learning_rate": 9.911341868006223e-05, "loss": 1.4337, "step": 716500 }, { "epoch": 20.12, "learning_rate": 9.897284024666829e-05, "loss": 1.4153, "step": 717000 }, { "epoch": 20.13, "learning_rate": 9.883226181327434e-05, "loss": 1.4394, "step": 717500 }, { "epoch": 20.15, "learning_rate": 9.86916833798804e-05, "loss": 1.4216, "step": 718000 }, { "epoch": 20.16, "learning_rate": 9.855110494648646e-05, "loss": 1.4515, "step": 718500 }, { "epoch": 20.18, "learning_rate": 9.841052651309254e-05, "loss": 1.4232, "step": 719000 }, { "epoch": 20.19, "learning_rate": 9.826994807969859e-05, "loss": 1.4117, "step": 719500 }, { "epoch": 20.21, "learning_rate": 9.812936964630465e-05, "loss": 1.4224, "step": 720000 }, { "epoch": 20.22, "learning_rate": 9.798879121291071e-05, "loss": 1.426, "step": 720500 }, { "epoch": 20.23, "learning_rate": 9.784821277951679e-05, "loss": 1.4179, "step": 721000 }, { "epoch": 20.25, "learning_rate": 9.770763434612284e-05, "loss": 1.4248, "step": 721500 }, { "epoch": 20.26, "learning_rate": 9.75670559127289e-05, "loss": 1.4219, "step": 722000 }, { "epoch": 20.28, "learning_rate": 9.742647747933496e-05, "loss": 1.4071, "step": 722500 }, { "epoch": 20.29, "learning_rate": 9.728589904594103e-05, "loss": 1.4151, "step": 723000 }, { "epoch": 20.3, "learning_rate": 9.714532061254708e-05, "loss": 1.4235, "step": 723500 }, { "epoch": 20.32, "learning_rate": 9.700474217915314e-05, "loss": 1.4523, "step": 724000 }, { "epoch": 20.33, "learning_rate": 9.68641637457592e-05, "loss": 1.4305, "step": 724500 }, { "epoch": 20.35, "learning_rate": 9.672358531236528e-05, "loss": 1.4417, "step": 725000 }, { "epoch": 20.36, "learning_rate": 9.658300687897133e-05, "loss": 1.4372, "step": 725500 }, { "epoch": 20.37, "learning_rate": 9.644242844557739e-05, "loss": 1.4423, "step": 726000 }, { "epoch": 20.39, "learning_rate": 9.630185001218345e-05, "loss": 1.428, "step": 726500 }, { "epoch": 20.4, "learning_rate": 9.616127157878952e-05, "loss": 1.4481, "step": 727000 }, { "epoch": 20.42, "learning_rate": 9.602069314539558e-05, "loss": 1.4533, "step": 727500 }, { "epoch": 20.43, "learning_rate": 9.588011471200164e-05, "loss": 1.4555, "step": 728000 }, { "epoch": 20.44, "learning_rate": 9.57395362786077e-05, "loss": 1.4279, "step": 728500 }, { "epoch": 20.46, "learning_rate": 9.559895784521376e-05, "loss": 1.4715, "step": 729000 }, { "epoch": 20.47, "learning_rate": 9.545837941181981e-05, "loss": 1.4165, "step": 729500 }, { "epoch": 20.49, "learning_rate": 9.531780097842589e-05, "loss": 1.4176, "step": 730000 }, { "epoch": 20.5, "learning_rate": 9.517722254503195e-05, "loss": 1.4379, "step": 730500 }, { "epoch": 20.51, "learning_rate": 9.503664411163801e-05, "loss": 1.4531, "step": 731000 }, { "epoch": 20.53, "learning_rate": 9.489606567824406e-05, "loss": 1.4247, "step": 731500 }, { "epoch": 20.54, "learning_rate": 9.475548724485014e-05, "loss": 1.4398, "step": 732000 }, { "epoch": 20.56, "learning_rate": 9.46149088114562e-05, "loss": 1.4422, "step": 732500 }, { "epoch": 20.57, "learning_rate": 9.447433037806226e-05, "loss": 1.4273, "step": 733000 }, { "epoch": 20.58, "learning_rate": 9.433375194466831e-05, "loss": 1.4482, "step": 733500 }, { "epoch": 20.6, "learning_rate": 9.419317351127438e-05, "loss": 1.445, "step": 734000 }, { "epoch": 20.61, "learning_rate": 9.405259507788045e-05, "loss": 1.4507, "step": 734500 }, { "epoch": 20.63, "learning_rate": 9.391201664448651e-05, "loss": 1.4147, "step": 735000 }, { "epoch": 20.64, "learning_rate": 9.377143821109257e-05, "loss": 1.4329, "step": 735500 }, { "epoch": 20.65, "learning_rate": 9.363085977769863e-05, "loss": 1.4602, "step": 736000 }, { "epoch": 20.67, "learning_rate": 9.349028134430469e-05, "loss": 1.4706, "step": 736500 }, { "epoch": 20.68, "learning_rate": 9.334970291091076e-05, "loss": 1.4183, "step": 737000 }, { "epoch": 20.7, "learning_rate": 9.320912447751682e-05, "loss": 1.4342, "step": 737500 }, { "epoch": 20.71, "learning_rate": 9.306854604412287e-05, "loss": 1.4291, "step": 738000 }, { "epoch": 20.72, "learning_rate": 9.292796761072894e-05, "loss": 1.4312, "step": 738500 }, { "epoch": 20.74, "learning_rate": 9.2787389177335e-05, "loss": 1.4287, "step": 739000 }, { "epoch": 20.75, "learning_rate": 9.264681074394106e-05, "loss": 1.4421, "step": 739500 }, { "epoch": 20.77, "learning_rate": 9.250623231054711e-05, "loss": 1.4204, "step": 740000 }, { "epoch": 20.78, "learning_rate": 9.236565387715319e-05, "loss": 1.4279, "step": 740500 }, { "epoch": 20.79, "learning_rate": 9.222507544375925e-05, "loss": 1.4129, "step": 741000 }, { "epoch": 20.81, "learning_rate": 9.208449701036531e-05, "loss": 1.4563, "step": 741500 }, { "epoch": 20.82, "learning_rate": 9.194391857697136e-05, "loss": 1.437, "step": 742000 }, { "epoch": 20.84, "learning_rate": 9.180334014357744e-05, "loss": 1.4574, "step": 742500 }, { "epoch": 20.85, "learning_rate": 9.16627617101835e-05, "loss": 1.4312, "step": 743000 }, { "epoch": 20.86, "learning_rate": 9.152218327678956e-05, "loss": 1.4421, "step": 743500 }, { "epoch": 20.88, "learning_rate": 9.138160484339561e-05, "loss": 1.4347, "step": 744000 }, { "epoch": 20.89, "learning_rate": 9.124102641000168e-05, "loss": 1.4635, "step": 744500 }, { "epoch": 20.91, "learning_rate": 9.110044797660775e-05, "loss": 1.4754, "step": 745000 }, { "epoch": 20.92, "learning_rate": 9.095986954321381e-05, "loss": 1.4191, "step": 745500 }, { "epoch": 20.93, "learning_rate": 9.081929110981986e-05, "loss": 1.4343, "step": 746000 }, { "epoch": 20.95, "learning_rate": 9.067871267642592e-05, "loss": 1.4699, "step": 746500 }, { "epoch": 20.96, "learning_rate": 9.0538134243032e-05, "loss": 1.4495, "step": 747000 }, { "epoch": 20.98, "learning_rate": 9.039755580963806e-05, "loss": 1.4427, "step": 747500 }, { "epoch": 20.99, "learning_rate": 9.02569773762441e-05, "loss": 1.4523, "step": 748000 }, { "epoch": 21.0, "learning_rate": 9.011639894285017e-05, "loss": 1.4197, "step": 748500 }, { "epoch": 21.02, "learning_rate": 8.997582050945624e-05, "loss": 1.4132, "step": 749000 }, { "epoch": 21.03, "learning_rate": 8.98352420760623e-05, "loss": 1.3953, "step": 749500 }, { "epoch": 21.05, "learning_rate": 8.969466364266835e-05, "loss": 1.4112, "step": 750000 }, { "epoch": 21.06, "learning_rate": 8.955408520927441e-05, "loss": 1.3864, "step": 750500 }, { "epoch": 21.08, "learning_rate": 8.941350677588049e-05, "loss": 1.3912, "step": 751000 }, { "epoch": 21.09, "learning_rate": 8.927292834248655e-05, "loss": 1.3867, "step": 751500 }, { "epoch": 21.1, "learning_rate": 8.91323499090926e-05, "loss": 1.4091, "step": 752000 }, { "epoch": 21.12, "learning_rate": 8.899177147569866e-05, "loss": 1.4045, "step": 752500 }, { "epoch": 21.13, "learning_rate": 8.885119304230474e-05, "loss": 1.4034, "step": 753000 }, { "epoch": 21.15, "learning_rate": 8.87106146089108e-05, "loss": 1.3951, "step": 753500 }, { "epoch": 21.16, "learning_rate": 8.857003617551685e-05, "loss": 1.397, "step": 754000 }, { "epoch": 21.17, "learning_rate": 8.842945774212291e-05, "loss": 1.383, "step": 754500 }, { "epoch": 21.19, "learning_rate": 8.828887930872897e-05, "loss": 1.4052, "step": 755000 }, { "epoch": 21.2, "learning_rate": 8.814830087533505e-05, "loss": 1.405, "step": 755500 }, { "epoch": 21.22, "learning_rate": 8.80077224419411e-05, "loss": 1.4064, "step": 756000 }, { "epoch": 21.23, "learning_rate": 8.786714400854716e-05, "loss": 1.413, "step": 756500 }, { "epoch": 21.24, "learning_rate": 8.772656557515322e-05, "loss": 1.3881, "step": 757000 }, { "epoch": 21.26, "learning_rate": 8.75859871417593e-05, "loss": 1.377, "step": 757500 }, { "epoch": 21.27, "learning_rate": 8.744540870836534e-05, "loss": 1.3939, "step": 758000 }, { "epoch": 21.29, "learning_rate": 8.73048302749714e-05, "loss": 1.4041, "step": 758500 }, { "epoch": 21.3, "learning_rate": 8.716425184157747e-05, "loss": 1.3923, "step": 759000 }, { "epoch": 21.31, "learning_rate": 8.702367340818354e-05, "loss": 1.3887, "step": 759500 }, { "epoch": 21.33, "learning_rate": 8.688309497478959e-05, "loss": 1.4045, "step": 760000 }, { "epoch": 21.34, "learning_rate": 8.674251654139565e-05, "loss": 1.3886, "step": 760500 }, { "epoch": 21.36, "learning_rate": 8.660193810800171e-05, "loss": 1.4225, "step": 761000 }, { "epoch": 21.37, "learning_rate": 8.646135967460779e-05, "loss": 1.3489, "step": 761500 }, { "epoch": 21.38, "learning_rate": 8.632078124121384e-05, "loss": 1.4059, "step": 762000 }, { "epoch": 21.4, "learning_rate": 8.61802028078199e-05, "loss": 1.3909, "step": 762500 }, { "epoch": 21.41, "learning_rate": 8.603962437442596e-05, "loss": 1.3911, "step": 763000 }, { "epoch": 21.43, "learning_rate": 8.589904594103202e-05, "loss": 1.3751, "step": 763500 }, { "epoch": 21.44, "learning_rate": 8.57584675076381e-05, "loss": 1.3928, "step": 764000 }, { "epoch": 21.45, "learning_rate": 8.561788907424415e-05, "loss": 1.4066, "step": 764500 }, { "epoch": 21.47, "learning_rate": 8.547731064085021e-05, "loss": 1.4075, "step": 765000 }, { "epoch": 21.48, "learning_rate": 8.533673220745627e-05, "loss": 1.4234, "step": 765500 }, { "epoch": 21.5, "learning_rate": 8.519615377406235e-05, "loss": 1.4039, "step": 766000 }, { "epoch": 21.51, "learning_rate": 8.50555753406684e-05, "loss": 1.3861, "step": 766500 }, { "epoch": 21.52, "learning_rate": 8.491499690727446e-05, "loss": 1.3816, "step": 767000 }, { "epoch": 21.54, "learning_rate": 8.477441847388052e-05, "loss": 1.4111, "step": 767500 }, { "epoch": 21.55, "learning_rate": 8.46338400404866e-05, "loss": 1.3977, "step": 768000 }, { "epoch": 21.57, "learning_rate": 8.449326160709264e-05, "loss": 1.3999, "step": 768500 }, { "epoch": 21.58, "learning_rate": 8.43526831736987e-05, "loss": 1.3902, "step": 769000 }, { "epoch": 21.59, "learning_rate": 8.421210474030477e-05, "loss": 1.4278, "step": 769500 }, { "epoch": 21.61, "learning_rate": 8.407152630691084e-05, "loss": 1.4353, "step": 770000 }, { "epoch": 21.62, "learning_rate": 8.393094787351689e-05, "loss": 1.4072, "step": 770500 }, { "epoch": 21.64, "learning_rate": 8.379036944012295e-05, "loss": 1.4325, "step": 771000 }, { "epoch": 21.65, "learning_rate": 8.364979100672901e-05, "loss": 1.3832, "step": 771500 }, { "epoch": 21.66, "learning_rate": 8.350921257333508e-05, "loss": 1.3876, "step": 772000 }, { "epoch": 21.68, "learning_rate": 8.336863413994113e-05, "loss": 1.3998, "step": 772500 }, { "epoch": 21.69, "learning_rate": 8.32280557065472e-05, "loss": 1.4081, "step": 773000 }, { "epoch": 21.71, "learning_rate": 8.308747727315326e-05, "loss": 1.4361, "step": 773500 }, { "epoch": 21.72, "learning_rate": 8.294689883975932e-05, "loss": 1.3762, "step": 774000 }, { "epoch": 21.73, "learning_rate": 8.280632040636537e-05, "loss": 1.4322, "step": 774500 }, { "epoch": 21.75, "learning_rate": 8.266574197297145e-05, "loss": 1.3878, "step": 775000 }, { "epoch": 21.76, "learning_rate": 8.252516353957751e-05, "loss": 1.3681, "step": 775500 }, { "epoch": 21.78, "learning_rate": 8.238458510618357e-05, "loss": 1.4005, "step": 776000 }, { "epoch": 21.79, "learning_rate": 8.224400667278962e-05, "loss": 1.424, "step": 776500 }, { "epoch": 21.8, "learning_rate": 8.21034282393957e-05, "loss": 1.4016, "step": 777000 }, { "epoch": 21.82, "learning_rate": 8.196284980600176e-05, "loss": 1.3916, "step": 777500 }, { "epoch": 21.83, "learning_rate": 8.182227137260782e-05, "loss": 1.449, "step": 778000 }, { "epoch": 21.85, "learning_rate": 8.168169293921387e-05, "loss": 1.4048, "step": 778500 }, { "epoch": 21.86, "learning_rate": 8.154111450581994e-05, "loss": 1.4209, "step": 779000 }, { "epoch": 21.87, "learning_rate": 8.1400536072426e-05, "loss": 1.4021, "step": 779500 }, { "epoch": 21.89, "learning_rate": 8.125995763903207e-05, "loss": 1.3813, "step": 780000 }, { "epoch": 21.9, "learning_rate": 8.111937920563812e-05, "loss": 1.3991, "step": 780500 }, { "epoch": 21.92, "learning_rate": 8.097880077224418e-05, "loss": 1.3895, "step": 781000 }, { "epoch": 21.93, "learning_rate": 8.083822233885025e-05, "loss": 1.383, "step": 781500 }, { "epoch": 21.95, "learning_rate": 8.069764390545632e-05, "loss": 1.3922, "step": 782000 }, { "epoch": 21.96, "learning_rate": 8.055706547206236e-05, "loss": 1.3866, "step": 782500 }, { "epoch": 21.97, "learning_rate": 8.041648703866843e-05, "loss": 1.404, "step": 783000 }, { "epoch": 21.99, "learning_rate": 8.02759086052745e-05, "loss": 1.3895, "step": 783500 }, { "epoch": 22.0, "learning_rate": 8.013533017188056e-05, "loss": 1.4153, "step": 784000 }, { "epoch": 22.02, "learning_rate": 7.999475173848661e-05, "loss": 1.339, "step": 784500 }, { "epoch": 22.03, "learning_rate": 7.985417330509267e-05, "loss": 1.3512, "step": 785000 }, { "epoch": 22.04, "learning_rate": 7.971359487169875e-05, "loss": 1.3543, "step": 785500 }, { "epoch": 22.06, "learning_rate": 7.957301643830481e-05, "loss": 1.3823, "step": 786000 }, { "epoch": 22.07, "learning_rate": 7.943243800491086e-05, "loss": 1.3589, "step": 786500 }, { "epoch": 22.09, "learning_rate": 7.929185957151692e-05, "loss": 1.3661, "step": 787000 }, { "epoch": 22.1, "learning_rate": 7.9151281138123e-05, "loss": 1.3565, "step": 787500 }, { "epoch": 22.11, "learning_rate": 7.901070270472906e-05, "loss": 1.3667, "step": 788000 }, { "epoch": 22.13, "learning_rate": 7.887012427133511e-05, "loss": 1.3758, "step": 788500 }, { "epoch": 22.14, "learning_rate": 7.872954583794117e-05, "loss": 1.3565, "step": 789000 }, { "epoch": 22.16, "learning_rate": 7.858896740454723e-05, "loss": 1.366, "step": 789500 }, { "epoch": 22.17, "learning_rate": 7.84483889711533e-05, "loss": 1.3763, "step": 790000 }, { "epoch": 22.18, "learning_rate": 7.830781053775935e-05, "loss": 1.3381, "step": 790500 }, { "epoch": 22.2, "learning_rate": 7.816723210436542e-05, "loss": 1.3546, "step": 791000 }, { "epoch": 22.21, "learning_rate": 7.802665367097148e-05, "loss": 1.3326, "step": 791500 }, { "epoch": 22.23, "learning_rate": 7.788607523757755e-05, "loss": 1.3528, "step": 792000 }, { "epoch": 22.24, "learning_rate": 7.77454968041836e-05, "loss": 1.3642, "step": 792500 }, { "epoch": 22.25, "learning_rate": 7.760491837078966e-05, "loss": 1.3849, "step": 793000 }, { "epoch": 22.27, "learning_rate": 7.746433993739573e-05, "loss": 1.3709, "step": 793500 }, { "epoch": 22.28, "learning_rate": 7.73237615040018e-05, "loss": 1.3701, "step": 794000 }, { "epoch": 22.3, "learning_rate": 7.718318307060786e-05, "loss": 1.3805, "step": 794500 }, { "epoch": 22.31, "learning_rate": 7.704260463721391e-05, "loss": 1.3518, "step": 795000 }, { "epoch": 22.32, "learning_rate": 7.690202620381997e-05, "loss": 1.3907, "step": 795500 }, { "epoch": 22.34, "learning_rate": 7.676144777042605e-05, "loss": 1.3648, "step": 796000 }, { "epoch": 22.35, "learning_rate": 7.662086933703211e-05, "loss": 1.3384, "step": 796500 }, { "epoch": 22.37, "learning_rate": 7.648029090363816e-05, "loss": 1.3522, "step": 797000 }, { "epoch": 22.38, "learning_rate": 7.633971247024422e-05, "loss": 1.3454, "step": 797500 }, { "epoch": 22.39, "learning_rate": 7.619913403685028e-05, "loss": 1.3731, "step": 798000 }, { "epoch": 22.41, "learning_rate": 7.605855560345636e-05, "loss": 1.3833, "step": 798500 }, { "epoch": 22.42, "learning_rate": 7.591797717006241e-05, "loss": 1.3655, "step": 799000 }, { "epoch": 22.44, "learning_rate": 7.577739873666847e-05, "loss": 1.3855, "step": 799500 }, { "epoch": 22.45, "learning_rate": 7.563682030327453e-05, "loss": 1.3427, "step": 800000 }, { "epoch": 22.46, "learning_rate": 7.54962418698806e-05, "loss": 1.3713, "step": 800500 }, { "epoch": 22.48, "learning_rate": 7.535566343648666e-05, "loss": 1.3618, "step": 801000 }, { "epoch": 22.49, "learning_rate": 7.521508500309272e-05, "loss": 1.3967, "step": 801500 }, { "epoch": 22.51, "learning_rate": 7.507450656969878e-05, "loss": 1.3566, "step": 802000 }, { "epoch": 22.52, "learning_rate": 7.493392813630484e-05, "loss": 1.3438, "step": 802500 }, { "epoch": 22.53, "learning_rate": 7.47933497029109e-05, "loss": 1.37, "step": 803000 }, { "epoch": 22.55, "learning_rate": 7.465277126951696e-05, "loss": 1.3862, "step": 803500 }, { "epoch": 22.56, "learning_rate": 7.451219283612303e-05, "loss": 1.3876, "step": 804000 }, { "epoch": 22.58, "learning_rate": 7.437161440272909e-05, "loss": 1.3405, "step": 804500 }, { "epoch": 22.59, "learning_rate": 7.423103596933515e-05, "loss": 1.3513, "step": 805000 }, { "epoch": 22.6, "learning_rate": 7.409045753594121e-05, "loss": 1.3928, "step": 805500 }, { "epoch": 22.62, "learning_rate": 7.394987910254727e-05, "loss": 1.3646, "step": 806000 }, { "epoch": 22.63, "learning_rate": 7.380930066915334e-05, "loss": 1.3238, "step": 806500 }, { "epoch": 22.65, "learning_rate": 7.36687222357594e-05, "loss": 1.361, "step": 807000 }, { "epoch": 22.66, "learning_rate": 7.352814380236546e-05, "loss": 1.3775, "step": 807500 }, { "epoch": 22.67, "learning_rate": 7.338756536897152e-05, "loss": 1.3854, "step": 808000 }, { "epoch": 22.69, "learning_rate": 7.324698693557758e-05, "loss": 1.367, "step": 808500 }, { "epoch": 22.7, "learning_rate": 7.310640850218365e-05, "loss": 1.3572, "step": 809000 }, { "epoch": 22.72, "learning_rate": 7.296583006878971e-05, "loss": 1.3752, "step": 809500 }, { "epoch": 22.73, "learning_rate": 7.282525163539577e-05, "loss": 1.3821, "step": 810000 }, { "epoch": 22.74, "learning_rate": 7.268467320200183e-05, "loss": 1.3616, "step": 810500 }, { "epoch": 22.76, "learning_rate": 7.25440947686079e-05, "loss": 1.3904, "step": 811000 }, { "epoch": 22.77, "learning_rate": 7.240351633521396e-05, "loss": 1.3583, "step": 811500 }, { "epoch": 22.79, "learning_rate": 7.226293790182002e-05, "loss": 1.4081, "step": 812000 }, { "epoch": 22.8, "learning_rate": 7.212235946842608e-05, "loss": 1.3632, "step": 812500 }, { "epoch": 22.81, "learning_rate": 7.198178103503214e-05, "loss": 1.362, "step": 813000 }, { "epoch": 22.83, "learning_rate": 7.18412026016382e-05, "loss": 1.3824, "step": 813500 }, { "epoch": 22.84, "learning_rate": 7.170062416824427e-05, "loss": 1.3701, "step": 814000 }, { "epoch": 22.86, "learning_rate": 7.156004573485033e-05, "loss": 1.3528, "step": 814500 }, { "epoch": 22.87, "learning_rate": 7.141946730145639e-05, "loss": 1.3577, "step": 815000 }, { "epoch": 22.89, "learning_rate": 7.127888886806245e-05, "loss": 1.3679, "step": 815500 }, { "epoch": 22.9, "learning_rate": 7.113831043466851e-05, "loss": 1.3678, "step": 816000 }, { "epoch": 22.91, "learning_rate": 7.099773200127456e-05, "loss": 1.3471, "step": 816500 }, { "epoch": 22.93, "learning_rate": 7.085715356788064e-05, "loss": 1.377, "step": 817000 }, { "epoch": 22.94, "learning_rate": 7.071657513448669e-05, "loss": 1.36, "step": 817500 }, { "epoch": 22.96, "learning_rate": 7.057599670109276e-05, "loss": 1.3942, "step": 818000 }, { "epoch": 22.97, "learning_rate": 7.043541826769881e-05, "loss": 1.369, "step": 818500 }, { "epoch": 22.98, "learning_rate": 7.029483983430488e-05, "loss": 1.367, "step": 819000 }, { "epoch": 23.0, "learning_rate": 7.015426140091093e-05, "loss": 1.3907, "step": 819500 }, { "epoch": 23.01, "learning_rate": 7.001368296751701e-05, "loss": 1.3379, "step": 820000 }, { "epoch": 23.03, "learning_rate": 6.987310453412306e-05, "loss": 1.335, "step": 820500 }, { "epoch": 23.04, "learning_rate": 6.973252610072913e-05, "loss": 1.3061, "step": 821000 }, { "epoch": 23.05, "learning_rate": 6.959194766733518e-05, "loss": 1.2759, "step": 821500 }, { "epoch": 23.07, "learning_rate": 6.945136923394126e-05, "loss": 1.3039, "step": 822000 }, { "epoch": 23.08, "learning_rate": 6.931079080054732e-05, "loss": 1.329, "step": 822500 }, { "epoch": 23.1, "learning_rate": 6.917021236715338e-05, "loss": 1.3405, "step": 823000 }, { "epoch": 23.11, "learning_rate": 6.902963393375944e-05, "loss": 1.3211, "step": 823500 }, { "epoch": 23.12, "learning_rate": 6.88890555003655e-05, "loss": 1.3352, "step": 824000 }, { "epoch": 23.14, "learning_rate": 6.874847706697157e-05, "loss": 1.338, "step": 824500 }, { "epoch": 23.15, "learning_rate": 6.860789863357761e-05, "loss": 1.3533, "step": 825000 }, { "epoch": 23.17, "learning_rate": 6.846732020018369e-05, "loss": 1.324, "step": 825500 }, { "epoch": 23.18, "learning_rate": 6.832674176678974e-05, "loss": 1.3076, "step": 826000 }, { "epoch": 23.19, "learning_rate": 6.818616333339581e-05, "loss": 1.3694, "step": 826500 }, { "epoch": 23.21, "learning_rate": 6.804558490000186e-05, "loss": 1.3091, "step": 827000 }, { "epoch": 23.22, "learning_rate": 6.790500646660794e-05, "loss": 1.3354, "step": 827500 }, { "epoch": 23.24, "learning_rate": 6.776442803321399e-05, "loss": 1.3473, "step": 828000 }, { "epoch": 23.25, "learning_rate": 6.762384959982006e-05, "loss": 1.3365, "step": 828500 }, { "epoch": 23.26, "learning_rate": 6.748327116642611e-05, "loss": 1.3606, "step": 829000 }, { "epoch": 23.28, "learning_rate": 6.734269273303219e-05, "loss": 1.3394, "step": 829500 }, { "epoch": 23.29, "learning_rate": 6.720211429963823e-05, "loss": 1.3267, "step": 830000 }, { "epoch": 23.31, "learning_rate": 6.706153586624431e-05, "loss": 1.3294, "step": 830500 }, { "epoch": 23.32, "learning_rate": 6.692095743285036e-05, "loss": 1.3472, "step": 831000 }, { "epoch": 23.33, "learning_rate": 6.678037899945643e-05, "loss": 1.303, "step": 831500 }, { "epoch": 23.35, "learning_rate": 6.663980056606248e-05, "loss": 1.3304, "step": 832000 }, { "epoch": 23.36, "learning_rate": 6.649922213266856e-05, "loss": 1.3474, "step": 832500 }, { "epoch": 23.38, "learning_rate": 6.63586436992746e-05, "loss": 1.3662, "step": 833000 }, { "epoch": 23.39, "learning_rate": 6.621806526588067e-05, "loss": 1.3298, "step": 833500 }, { "epoch": 23.4, "learning_rate": 6.607748683248673e-05, "loss": 1.3309, "step": 834000 }, { "epoch": 23.42, "learning_rate": 6.593690839909279e-05, "loss": 1.3172, "step": 834500 }, { "epoch": 23.43, "learning_rate": 6.579632996569885e-05, "loss": 1.3243, "step": 835000 }, { "epoch": 23.45, "learning_rate": 6.565575153230491e-05, "loss": 1.3123, "step": 835500 }, { "epoch": 23.46, "learning_rate": 6.551517309891098e-05, "loss": 1.3471, "step": 836000 }, { "epoch": 23.47, "learning_rate": 6.537459466551704e-05, "loss": 1.3276, "step": 836500 }, { "epoch": 23.49, "learning_rate": 6.52340162321231e-05, "loss": 1.3227, "step": 837000 }, { "epoch": 23.5, "learning_rate": 6.509343779872916e-05, "loss": 1.3373, "step": 837500 }, { "epoch": 23.52, "learning_rate": 6.495285936533522e-05, "loss": 1.3258, "step": 838000 }, { "epoch": 23.53, "learning_rate": 6.481228093194129e-05, "loss": 1.3358, "step": 838500 }, { "epoch": 23.54, "learning_rate": 6.467170249854735e-05, "loss": 1.3238, "step": 839000 }, { "epoch": 23.56, "learning_rate": 6.453112406515341e-05, "loss": 1.3125, "step": 839500 }, { "epoch": 23.57, "learning_rate": 6.439054563175947e-05, "loss": 1.3413, "step": 840000 }, { "epoch": 23.59, "learning_rate": 6.424996719836553e-05, "loss": 1.331, "step": 840500 }, { "epoch": 23.6, "learning_rate": 6.41093887649716e-05, "loss": 1.346, "step": 841000 }, { "epoch": 23.61, "learning_rate": 6.396881033157766e-05, "loss": 1.3422, "step": 841500 }, { "epoch": 23.63, "learning_rate": 6.382823189818372e-05, "loss": 1.3346, "step": 842000 }, { "epoch": 23.64, "learning_rate": 6.368765346478978e-05, "loss": 1.3375, "step": 842500 }, { "epoch": 23.66, "learning_rate": 6.354707503139584e-05, "loss": 1.334, "step": 843000 }, { "epoch": 23.67, "learning_rate": 6.34064965980019e-05, "loss": 1.311, "step": 843500 }, { "epoch": 23.68, "learning_rate": 6.326591816460797e-05, "loss": 1.3061, "step": 844000 }, { "epoch": 23.7, "learning_rate": 6.312533973121403e-05, "loss": 1.3373, "step": 844500 }, { "epoch": 23.71, "learning_rate": 6.298476129782009e-05, "loss": 1.3494, "step": 845000 }, { "epoch": 23.73, "learning_rate": 6.284418286442615e-05, "loss": 1.326, "step": 845500 }, { "epoch": 23.74, "learning_rate": 6.270360443103222e-05, "loss": 1.348, "step": 846000 }, { "epoch": 23.76, "learning_rate": 6.256302599763828e-05, "loss": 1.3473, "step": 846500 }, { "epoch": 23.77, "learning_rate": 6.242244756424434e-05, "loss": 1.3397, "step": 847000 }, { "epoch": 23.78, "learning_rate": 6.22818691308504e-05, "loss": 1.3583, "step": 847500 }, { "epoch": 23.8, "learning_rate": 6.214129069745646e-05, "loss": 1.3292, "step": 848000 }, { "epoch": 23.81, "learning_rate": 6.200071226406253e-05, "loss": 1.3455, "step": 848500 }, { "epoch": 23.83, "learning_rate": 6.186013383066859e-05, "loss": 1.3458, "step": 849000 }, { "epoch": 23.84, "learning_rate": 6.171955539727465e-05, "loss": 1.3477, "step": 849500 }, { "epoch": 23.85, "learning_rate": 6.157897696388071e-05, "loss": 1.3462, "step": 850000 }, { "epoch": 23.87, "learning_rate": 6.143839853048677e-05, "loss": 1.328, "step": 850500 }, { "epoch": 23.88, "learning_rate": 6.129782009709283e-05, "loss": 1.3453, "step": 851000 }, { "epoch": 23.9, "learning_rate": 6.11572416636989e-05, "loss": 1.3241, "step": 851500 }, { "epoch": 23.91, "learning_rate": 6.101666323030496e-05, "loss": 1.3564, "step": 852000 }, { "epoch": 23.92, "learning_rate": 6.087608479691102e-05, "loss": 1.3582, "step": 852500 }, { "epoch": 23.94, "learning_rate": 6.073550636351708e-05, "loss": 1.3355, "step": 853000 }, { "epoch": 23.95, "learning_rate": 6.0594927930123144e-05, "loss": 1.3438, "step": 853500 }, { "epoch": 23.97, "learning_rate": 6.0454349496729206e-05, "loss": 1.3057, "step": 854000 }, { "epoch": 23.98, "learning_rate": 6.031377106333526e-05, "loss": 1.3214, "step": 854500 }, { "epoch": 23.99, "learning_rate": 6.017319262994133e-05, "loss": 1.3221, "step": 855000 }, { "epoch": 24.01, "learning_rate": 6.0032614196547385e-05, "loss": 1.2929, "step": 855500 }, { "epoch": 24.02, "learning_rate": 5.9892035763153454e-05, "loss": 1.3036, "step": 856000 }, { "epoch": 24.04, "learning_rate": 5.975145732975951e-05, "loss": 1.3002, "step": 856500 }, { "epoch": 24.05, "learning_rate": 5.961087889636558e-05, "loss": 1.2967, "step": 857000 }, { "epoch": 24.06, "learning_rate": 5.947030046297163e-05, "loss": 1.2968, "step": 857500 }, { "epoch": 24.08, "learning_rate": 5.93297220295777e-05, "loss": 1.298, "step": 858000 }, { "epoch": 24.09, "learning_rate": 5.918914359618376e-05, "loss": 1.2798, "step": 858500 }, { "epoch": 24.11, "learning_rate": 5.9048565162789826e-05, "loss": 1.2808, "step": 859000 }, { "epoch": 24.12, "learning_rate": 5.890798672939588e-05, "loss": 1.3283, "step": 859500 }, { "epoch": 24.13, "learning_rate": 5.876740829600195e-05, "loss": 1.3048, "step": 860000 }, { "epoch": 24.15, "learning_rate": 5.8626829862608005e-05, "loss": 1.2911, "step": 860500 }, { "epoch": 24.16, "learning_rate": 5.848625142921407e-05, "loss": 1.2803, "step": 861000 }, { "epoch": 24.18, "learning_rate": 5.834567299582013e-05, "loss": 1.3098, "step": 861500 }, { "epoch": 24.19, "learning_rate": 5.82050945624262e-05, "loss": 1.3068, "step": 862000 }, { "epoch": 24.2, "learning_rate": 5.806451612903225e-05, "loss": 1.2908, "step": 862500 }, { "epoch": 24.22, "learning_rate": 5.7923937695638314e-05, "loss": 1.2579, "step": 863000 }, { "epoch": 24.23, "learning_rate": 5.7783359262244376e-05, "loss": 1.3015, "step": 863500 }, { "epoch": 24.25, "learning_rate": 5.764278082885044e-05, "loss": 1.2958, "step": 864000 }, { "epoch": 24.26, "learning_rate": 5.75022023954565e-05, "loss": 1.3135, "step": 864500 }, { "epoch": 24.27, "learning_rate": 5.736162396206256e-05, "loss": 1.2794, "step": 865000 }, { "epoch": 24.29, "learning_rate": 5.7221045528668624e-05, "loss": 1.2913, "step": 865500 }, { "epoch": 24.3, "learning_rate": 5.7080467095274686e-05, "loss": 1.2561, "step": 866000 }, { "epoch": 24.32, "learning_rate": 5.693988866188075e-05, "loss": 1.3083, "step": 866500 }, { "epoch": 24.33, "learning_rate": 5.679931022848681e-05, "loss": 1.282, "step": 867000 }, { "epoch": 24.34, "learning_rate": 5.6658731795092865e-05, "loss": 1.2826, "step": 867500 }, { "epoch": 24.36, "learning_rate": 5.6518153361698934e-05, "loss": 1.2866, "step": 868000 }, { "epoch": 24.37, "learning_rate": 5.637757492830499e-05, "loss": 1.3105, "step": 868500 }, { "epoch": 24.39, "learning_rate": 5.623699649491106e-05, "loss": 1.3, "step": 869000 }, { "epoch": 24.4, "learning_rate": 5.609641806151711e-05, "loss": 1.2891, "step": 869500 }, { "epoch": 24.41, "learning_rate": 5.595583962812318e-05, "loss": 1.3011, "step": 870000 }, { "epoch": 24.43, "learning_rate": 5.5815261194729236e-05, "loss": 1.3177, "step": 870500 }, { "epoch": 24.44, "learning_rate": 5.5674682761335305e-05, "loss": 1.3146, "step": 871000 }, { "epoch": 24.46, "learning_rate": 5.553410432794136e-05, "loss": 1.3113, "step": 871500 }, { "epoch": 24.47, "learning_rate": 5.539352589454743e-05, "loss": 1.29, "step": 872000 }, { "epoch": 24.48, "learning_rate": 5.5252947461153484e-05, "loss": 1.3293, "step": 872500 }, { "epoch": 24.5, "learning_rate": 5.511236902775955e-05, "loss": 1.3065, "step": 873000 }, { "epoch": 24.51, "learning_rate": 5.497179059436561e-05, "loss": 1.2612, "step": 873500 }, { "epoch": 24.53, "learning_rate": 5.483121216097168e-05, "loss": 1.3006, "step": 874000 }, { "epoch": 24.54, "learning_rate": 5.469063372757773e-05, "loss": 1.2845, "step": 874500 }, { "epoch": 24.55, "learning_rate": 5.45500552941838e-05, "loss": 1.3217, "step": 875000 }, { "epoch": 24.57, "learning_rate": 5.4409476860789856e-05, "loss": 1.3276, "step": 875500 }, { "epoch": 24.58, "learning_rate": 5.426889842739592e-05, "loss": 1.2993, "step": 876000 }, { "epoch": 24.6, "learning_rate": 5.412831999400198e-05, "loss": 1.2916, "step": 876500 }, { "epoch": 24.61, "learning_rate": 5.398774156060804e-05, "loss": 1.3397, "step": 877000 }, { "epoch": 24.63, "learning_rate": 5.3847163127214103e-05, "loss": 1.3067, "step": 877500 }, { "epoch": 24.64, "learning_rate": 5.3706584693820165e-05, "loss": 1.3235, "step": 878000 }, { "epoch": 24.65, "learning_rate": 5.356600626042623e-05, "loss": 1.2989, "step": 878500 }, { "epoch": 24.67, "learning_rate": 5.342542782703229e-05, "loss": 1.2858, "step": 879000 }, { "epoch": 24.68, "learning_rate": 5.328484939363835e-05, "loss": 1.3062, "step": 879500 }, { "epoch": 24.7, "learning_rate": 5.314427096024441e-05, "loss": 1.2865, "step": 880000 }, { "epoch": 24.71, "learning_rate": 5.300369252685048e-05, "loss": 1.2966, "step": 880500 }, { "epoch": 24.72, "learning_rate": 5.286311409345654e-05, "loss": 1.2927, "step": 881000 }, { "epoch": 24.74, "learning_rate": 5.2722535660062606e-05, "loss": 1.2981, "step": 881500 }, { "epoch": 24.75, "learning_rate": 5.258195722666866e-05, "loss": 1.3123, "step": 882000 }, { "epoch": 24.77, "learning_rate": 5.244137879327473e-05, "loss": 1.3186, "step": 882500 }, { "epoch": 24.78, "learning_rate": 5.2300800359880785e-05, "loss": 1.2962, "step": 883000 }, { "epoch": 24.79, "learning_rate": 5.2160221926486853e-05, "loss": 1.2899, "step": 883500 }, { "epoch": 24.81, "learning_rate": 5.201964349309291e-05, "loss": 1.3057, "step": 884000 }, { "epoch": 24.82, "learning_rate": 5.187906505969897e-05, "loss": 1.2949, "step": 884500 }, { "epoch": 24.84, "learning_rate": 5.173848662630503e-05, "loss": 1.2874, "step": 885000 }, { "epoch": 24.85, "learning_rate": 5.1597908192911094e-05, "loss": 1.3297, "step": 885500 }, { "epoch": 24.86, "learning_rate": 5.1457329759517156e-05, "loss": 1.3324, "step": 886000 }, { "epoch": 24.88, "learning_rate": 5.131675132612322e-05, "loss": 1.3237, "step": 886500 }, { "epoch": 24.89, "learning_rate": 5.117617289272928e-05, "loss": 1.3252, "step": 887000 }, { "epoch": 24.91, "learning_rate": 5.103559445933534e-05, "loss": 1.2741, "step": 887500 }, { "epoch": 24.92, "learning_rate": 5.0895016025941404e-05, "loss": 1.3208, "step": 888000 }, { "epoch": 24.93, "learning_rate": 5.0754437592547466e-05, "loss": 1.2998, "step": 888500 }, { "epoch": 24.95, "learning_rate": 5.061385915915353e-05, "loss": 1.2971, "step": 889000 }, { "epoch": 24.96, "learning_rate": 5.047328072575959e-05, "loss": 1.3066, "step": 889500 }, { "epoch": 24.98, "learning_rate": 5.0332702292365645e-05, "loss": 1.3289, "step": 890000 }, { "epoch": 24.99, "learning_rate": 5.0192123858971714e-05, "loss": 1.3317, "step": 890500 }, { "epoch": 25.0, "learning_rate": 5.005154542557777e-05, "loss": 1.3158, "step": 891000 }, { "epoch": 25.02, "learning_rate": 4.991096699218384e-05, "loss": 1.2461, "step": 891500 }, { "epoch": 25.03, "learning_rate": 4.977038855878989e-05, "loss": 1.2825, "step": 892000 }, { "epoch": 25.05, "learning_rate": 4.962981012539596e-05, "loss": 1.2938, "step": 892500 }, { "epoch": 25.06, "learning_rate": 4.9489231692002017e-05, "loss": 1.2464, "step": 893000 }, { "epoch": 25.07, "learning_rate": 4.9348653258608085e-05, "loss": 1.2535, "step": 893500 }, { "epoch": 25.09, "learning_rate": 4.920807482521414e-05, "loss": 1.2678, "step": 894000 }, { "epoch": 25.1, "learning_rate": 4.906749639182021e-05, "loss": 1.2809, "step": 894500 }, { "epoch": 25.12, "learning_rate": 4.8926917958426264e-05, "loss": 1.2612, "step": 895000 }, { "epoch": 25.13, "learning_rate": 4.878633952503233e-05, "loss": 1.2808, "step": 895500 }, { "epoch": 25.14, "learning_rate": 4.864576109163839e-05, "loss": 1.2758, "step": 896000 }, { "epoch": 25.16, "learning_rate": 4.850518265824446e-05, "loss": 1.267, "step": 896500 }, { "epoch": 25.17, "learning_rate": 4.836460422485051e-05, "loss": 1.2714, "step": 897000 }, { "epoch": 25.19, "learning_rate": 4.822402579145658e-05, "loss": 1.2657, "step": 897500 }, { "epoch": 25.2, "learning_rate": 4.8083447358062636e-05, "loss": 1.2706, "step": 898000 }, { "epoch": 25.21, "learning_rate": 4.79428689246687e-05, "loss": 1.2595, "step": 898500 }, { "epoch": 25.23, "learning_rate": 4.780229049127476e-05, "loss": 1.2448, "step": 899000 }, { "epoch": 25.24, "learning_rate": 4.766171205788082e-05, "loss": 1.2639, "step": 899500 }, { "epoch": 25.26, "learning_rate": 4.7521133624486884e-05, "loss": 1.2664, "step": 900000 }, { "epoch": 25.27, "learning_rate": 4.7380555191092946e-05, "loss": 1.2512, "step": 900500 }, { "epoch": 25.28, "learning_rate": 4.723997675769901e-05, "loss": 1.2682, "step": 901000 }, { "epoch": 25.3, "learning_rate": 4.709939832430507e-05, "loss": 1.2866, "step": 901500 }, { "epoch": 25.31, "learning_rate": 4.695881989091113e-05, "loss": 1.2619, "step": 902000 }, { "epoch": 25.33, "learning_rate": 4.681824145751719e-05, "loss": 1.2451, "step": 902500 }, { "epoch": 25.34, "learning_rate": 4.6677663024123255e-05, "loss": 1.2759, "step": 903000 }, { "epoch": 25.35, "learning_rate": 4.653708459072932e-05, "loss": 1.2691, "step": 903500 }, { "epoch": 25.37, "learning_rate": 4.639650615733537e-05, "loss": 1.2742, "step": 904000 }, { "epoch": 25.38, "learning_rate": 4.625592772394144e-05, "loss": 1.263, "step": 904500 }, { "epoch": 25.4, "learning_rate": 4.6115349290547496e-05, "loss": 1.2801, "step": 905000 }, { "epoch": 25.41, "learning_rate": 4.5974770857153565e-05, "loss": 1.281, "step": 905500 }, { "epoch": 25.42, "learning_rate": 4.583419242375962e-05, "loss": 1.2552, "step": 906000 }, { "epoch": 25.44, "learning_rate": 4.569361399036569e-05, "loss": 1.2866, "step": 906500 }, { "epoch": 25.45, "learning_rate": 4.5553035556971744e-05, "loss": 1.2792, "step": 907000 }, { "epoch": 25.47, "learning_rate": 4.541245712357781e-05, "loss": 1.2564, "step": 907500 }, { "epoch": 25.48, "learning_rate": 4.527187869018387e-05, "loss": 1.2652, "step": 908000 }, { "epoch": 25.49, "learning_rate": 4.5131300256789936e-05, "loss": 1.2889, "step": 908500 }, { "epoch": 25.51, "learning_rate": 4.499072182339599e-05, "loss": 1.2696, "step": 909000 }, { "epoch": 25.52, "learning_rate": 4.485014339000206e-05, "loss": 1.2568, "step": 909500 }, { "epoch": 25.54, "learning_rate": 4.470956495660812e-05, "loss": 1.2891, "step": 910000 }, { "epoch": 25.55, "learning_rate": 4.4568986523214184e-05, "loss": 1.2697, "step": 910500 }, { "epoch": 25.57, "learning_rate": 4.4428408089820246e-05, "loss": 1.2713, "step": 911000 }, { "epoch": 25.58, "learning_rate": 4.428782965642631e-05, "loss": 1.2878, "step": 911500 }, { "epoch": 25.59, "learning_rate": 4.414725122303237e-05, "loss": 1.2697, "step": 912000 }, { "epoch": 25.61, "learning_rate": 4.4006672789638425e-05, "loss": 1.2866, "step": 912500 }, { "epoch": 25.62, "learning_rate": 4.3866094356244494e-05, "loss": 1.2519, "step": 913000 }, { "epoch": 25.64, "learning_rate": 4.372551592285055e-05, "loss": 1.2736, "step": 913500 }, { "epoch": 25.65, "learning_rate": 4.358493748945662e-05, "loss": 1.2847, "step": 914000 }, { "epoch": 25.66, "learning_rate": 4.344435905606267e-05, "loss": 1.2802, "step": 914500 }, { "epoch": 25.68, "learning_rate": 4.330378062266874e-05, "loss": 1.2639, "step": 915000 }, { "epoch": 25.69, "learning_rate": 4.31632021892748e-05, "loss": 1.2613, "step": 915500 }, { "epoch": 25.71, "learning_rate": 4.3022623755880865e-05, "loss": 1.2673, "step": 916000 }, { "epoch": 25.72, "learning_rate": 4.288204532248692e-05, "loss": 1.2737, "step": 916500 }, { "epoch": 25.73, "learning_rate": 4.274146688909299e-05, "loss": 1.2877, "step": 917000 }, { "epoch": 25.75, "learning_rate": 4.2600888455699044e-05, "loss": 1.2975, "step": 917500 }, { "epoch": 25.76, "learning_rate": 4.246031002230511e-05, "loss": 1.2643, "step": 918000 }, { "epoch": 25.78, "learning_rate": 4.231973158891117e-05, "loss": 1.3068, "step": 918500 }, { "epoch": 25.79, "learning_rate": 4.217915315551724e-05, "loss": 1.2768, "step": 919000 }, { "epoch": 25.8, "learning_rate": 4.203857472212329e-05, "loss": 1.2809, "step": 919500 }, { "epoch": 25.82, "learning_rate": 4.1897996288729354e-05, "loss": 1.2895, "step": 920000 }, { "epoch": 25.83, "learning_rate": 4.1757417855335416e-05, "loss": 1.289, "step": 920500 }, { "epoch": 25.85, "learning_rate": 4.161683942194148e-05, "loss": 1.2685, "step": 921000 }, { "epoch": 25.86, "learning_rate": 4.147626098854754e-05, "loss": 1.2986, "step": 921500 }, { "epoch": 25.87, "learning_rate": 4.13356825551536e-05, "loss": 1.2887, "step": 922000 }, { "epoch": 25.89, "learning_rate": 4.1195104121759664e-05, "loss": 1.2693, "step": 922500 }, { "epoch": 25.9, "learning_rate": 4.1054525688365726e-05, "loss": 1.2494, "step": 923000 }, { "epoch": 25.92, "learning_rate": 4.091394725497179e-05, "loss": 1.2642, "step": 923500 }, { "epoch": 25.93, "learning_rate": 4.077336882157785e-05, "loss": 1.2777, "step": 924000 }, { "epoch": 25.94, "learning_rate": 4.063279038818391e-05, "loss": 1.2755, "step": 924500 }, { "epoch": 25.96, "learning_rate": 4.0492211954789973e-05, "loss": 1.2565, "step": 925000 }, { "epoch": 25.97, "learning_rate": 4.035163352139603e-05, "loss": 1.2715, "step": 925500 }, { "epoch": 25.99, "learning_rate": 4.02110550880021e-05, "loss": 1.2794, "step": 926000 }, { "epoch": 26.0, "learning_rate": 4.007047665460815e-05, "loss": 1.2859, "step": 926500 }, { "epoch": 26.01, "learning_rate": 3.992989822121422e-05, "loss": 1.2242, "step": 927000 }, { "epoch": 26.03, "learning_rate": 3.9789319787820276e-05, "loss": 1.244, "step": 927500 }, { "epoch": 26.04, "learning_rate": 3.9648741354426345e-05, "loss": 1.2497, "step": 928000 }, { "epoch": 26.06, "learning_rate": 3.95081629210324e-05, "loss": 1.2126, "step": 928500 }, { "epoch": 26.07, "learning_rate": 3.936758448763847e-05, "loss": 1.2374, "step": 929000 }, { "epoch": 26.08, "learning_rate": 3.9227006054244524e-05, "loss": 1.2321, "step": 929500 }, { "epoch": 26.1, "learning_rate": 3.908642762085059e-05, "loss": 1.2191, "step": 930000 }, { "epoch": 26.11, "learning_rate": 3.894584918745665e-05, "loss": 1.2377, "step": 930500 }, { "epoch": 26.13, "learning_rate": 3.8805270754062717e-05, "loss": 1.2306, "step": 931000 }, { "epoch": 26.14, "learning_rate": 3.866469232066877e-05, "loss": 1.2093, "step": 931500 }, { "epoch": 26.15, "learning_rate": 3.852411388727484e-05, "loss": 1.2353, "step": 932000 }, { "epoch": 26.17, "learning_rate": 3.8383535453880896e-05, "loss": 1.2532, "step": 932500 }, { "epoch": 26.18, "learning_rate": 3.8242957020486964e-05, "loss": 1.2356, "step": 933000 }, { "epoch": 26.2, "learning_rate": 3.810237858709302e-05, "loss": 1.2562, "step": 933500 }, { "epoch": 26.21, "learning_rate": 3.796180015369908e-05, "loss": 1.2434, "step": 934000 }, { "epoch": 26.22, "learning_rate": 3.782122172030514e-05, "loss": 1.2458, "step": 934500 }, { "epoch": 26.24, "learning_rate": 3.7680643286911205e-05, "loss": 1.2291, "step": 935000 }, { "epoch": 26.25, "learning_rate": 3.754006485351727e-05, "loss": 1.2355, "step": 935500 }, { "epoch": 26.27, "learning_rate": 3.739948642012333e-05, "loss": 1.2213, "step": 936000 }, { "epoch": 26.28, "learning_rate": 3.725890798672939e-05, "loss": 1.2332, "step": 936500 }, { "epoch": 26.29, "learning_rate": 3.711832955333545e-05, "loss": 1.2243, "step": 937000 }, { "epoch": 26.31, "learning_rate": 3.6977751119941515e-05, "loss": 1.2342, "step": 937500 }, { "epoch": 26.32, "learning_rate": 3.683717268654758e-05, "loss": 1.276, "step": 938000 }, { "epoch": 26.34, "learning_rate": 3.669659425315364e-05, "loss": 1.2475, "step": 938500 }, { "epoch": 26.35, "learning_rate": 3.65560158197597e-05, "loss": 1.2522, "step": 939000 }, { "epoch": 26.36, "learning_rate": 3.641543738636576e-05, "loss": 1.2453, "step": 939500 }, { "epoch": 26.38, "learning_rate": 3.6274858952971825e-05, "loss": 1.2416, "step": 940000 }, { "epoch": 26.39, "learning_rate": 3.6134280519577886e-05, "loss": 1.2485, "step": 940500 }, { "epoch": 26.41, "learning_rate": 3.599370208618395e-05, "loss": 1.2451, "step": 941000 }, { "epoch": 26.42, "learning_rate": 3.585312365279001e-05, "loss": 1.2666, "step": 941500 }, { "epoch": 26.44, "learning_rate": 3.571254521939607e-05, "loss": 1.2476, "step": 942000 }, { "epoch": 26.45, "learning_rate": 3.5571966786002134e-05, "loss": 1.2161, "step": 942500 }, { "epoch": 26.46, "learning_rate": 3.5431388352608196e-05, "loss": 1.2345, "step": 943000 }, { "epoch": 26.48, "learning_rate": 3.529080991921426e-05, "loss": 1.247, "step": 943500 }, { "epoch": 26.49, "learning_rate": 3.515023148582032e-05, "loss": 1.2497, "step": 944000 }, { "epoch": 26.51, "learning_rate": 3.500965305242638e-05, "loss": 1.2481, "step": 944500 }, { "epoch": 26.52, "learning_rate": 3.4869074619032444e-05, "loss": 1.2761, "step": 945000 }, { "epoch": 26.53, "learning_rate": 3.4728496185638506e-05, "loss": 1.2396, "step": 945500 }, { "epoch": 26.55, "learning_rate": 3.458791775224457e-05, "loss": 1.2603, "step": 946000 }, { "epoch": 26.56, "learning_rate": 3.444733931885063e-05, "loss": 1.2455, "step": 946500 }, { "epoch": 26.58, "learning_rate": 3.430676088545669e-05, "loss": 1.2233, "step": 947000 }, { "epoch": 26.59, "learning_rate": 3.416618245206275e-05, "loss": 1.2498, "step": 947500 }, { "epoch": 26.6, "learning_rate": 3.402560401866881e-05, "loss": 1.2853, "step": 948000 }, { "epoch": 26.62, "learning_rate": 3.388502558527487e-05, "loss": 1.2309, "step": 948500 }, { "epoch": 26.63, "learning_rate": 3.374444715188093e-05, "loss": 1.2246, "step": 949000 }, { "epoch": 26.65, "learning_rate": 3.3603868718486994e-05, "loss": 1.2179, "step": 949500 }, { "epoch": 26.66, "learning_rate": 3.3463290285093056e-05, "loss": 1.2463, "step": 950000 }, { "epoch": 26.67, "learning_rate": 3.332271185169912e-05, "loss": 1.2507, "step": 950500 }, { "epoch": 26.69, "learning_rate": 3.318213341830518e-05, "loss": 1.242, "step": 951000 }, { "epoch": 26.7, "learning_rate": 3.304155498491124e-05, "loss": 1.2434, "step": 951500 }, { "epoch": 26.72, "learning_rate": 3.2900976551517304e-05, "loss": 1.244, "step": 952000 }, { "epoch": 26.73, "learning_rate": 3.2760398118123366e-05, "loss": 1.2368, "step": 952500 }, { "epoch": 26.74, "learning_rate": 3.261981968472943e-05, "loss": 1.2472, "step": 953000 }, { "epoch": 26.76, "learning_rate": 3.247924125133549e-05, "loss": 1.251, "step": 953500 }, { "epoch": 26.77, "learning_rate": 3.233866281794156e-05, "loss": 1.2534, "step": 954000 }, { "epoch": 26.79, "learning_rate": 3.219808438454762e-05, "loss": 1.2408, "step": 954500 }, { "epoch": 26.8, "learning_rate": 3.205750595115368e-05, "loss": 1.2493, "step": 955000 }, { "epoch": 26.81, "learning_rate": 3.1916927517759744e-05, "loss": 1.2163, "step": 955500 }, { "epoch": 26.83, "learning_rate": 3.17763490843658e-05, "loss": 1.2495, "step": 956000 }, { "epoch": 26.84, "learning_rate": 3.163577065097186e-05, "loss": 1.2494, "step": 956500 }, { "epoch": 26.86, "learning_rate": 3.1495192217577923e-05, "loss": 1.229, "step": 957000 }, { "epoch": 26.87, "learning_rate": 3.1354613784183985e-05, "loss": 1.257, "step": 957500 }, { "epoch": 26.88, "learning_rate": 3.121403535079005e-05, "loss": 1.2336, "step": 958000 }, { "epoch": 26.9, "learning_rate": 3.107345691739611e-05, "loss": 1.2409, "step": 958500 }, { "epoch": 26.91, "learning_rate": 3.093287848400217e-05, "loss": 1.2139, "step": 959000 }, { "epoch": 26.93, "learning_rate": 3.079230005060823e-05, "loss": 1.2512, "step": 959500 }, { "epoch": 26.94, "learning_rate": 3.0651721617214295e-05, "loss": 1.2627, "step": 960000 }, { "epoch": 26.95, "learning_rate": 3.0511143183820357e-05, "loss": 1.251, "step": 960500 }, { "epoch": 26.97, "learning_rate": 3.037056475042642e-05, "loss": 1.2024, "step": 961000 }, { "epoch": 26.98, "learning_rate": 3.022998631703248e-05, "loss": 1.2204, "step": 961500 }, { "epoch": 27.0, "learning_rate": 3.0089407883638543e-05, "loss": 1.2593, "step": 962000 }, { "epoch": 27.01, "learning_rate": 2.9948829450244605e-05, "loss": 1.2286, "step": 962500 }, { "epoch": 27.02, "learning_rate": 2.9808251016850667e-05, "loss": 1.2079, "step": 963000 }, { "epoch": 27.04, "learning_rate": 2.966767258345673e-05, "loss": 1.2022, "step": 963500 }, { "epoch": 27.05, "learning_rate": 2.952709415006279e-05, "loss": 1.1943, "step": 964000 }, { "epoch": 27.07, "learning_rate": 2.9386515716668852e-05, "loss": 1.1952, "step": 964500 }, { "epoch": 27.08, "learning_rate": 2.9245937283274914e-05, "loss": 1.2296, "step": 965000 }, { "epoch": 27.09, "learning_rate": 2.9105358849880976e-05, "loss": 1.2143, "step": 965500 }, { "epoch": 27.11, "learning_rate": 2.8964780416487035e-05, "loss": 1.2432, "step": 966000 }, { "epoch": 27.12, "learning_rate": 2.8824201983093097e-05, "loss": 1.2106, "step": 966500 }, { "epoch": 27.14, "learning_rate": 2.868362354969916e-05, "loss": 1.2221, "step": 967000 }, { "epoch": 27.15, "learning_rate": 2.854304511630522e-05, "loss": 1.2327, "step": 967500 }, { "epoch": 27.16, "learning_rate": 2.8402466682911283e-05, "loss": 1.2119, "step": 968000 }, { "epoch": 27.18, "learning_rate": 2.8261888249517344e-05, "loss": 1.2094, "step": 968500 }, { "epoch": 27.19, "learning_rate": 2.8121309816123406e-05, "loss": 1.1851, "step": 969000 }, { "epoch": 27.21, "learning_rate": 2.798073138272947e-05, "loss": 1.2177, "step": 969500 }, { "epoch": 27.22, "learning_rate": 2.784015294933553e-05, "loss": 1.253, "step": 970000 }, { "epoch": 27.23, "learning_rate": 2.7699574515941592e-05, "loss": 1.2167, "step": 970500 }, { "epoch": 27.25, "learning_rate": 2.7558996082547654e-05, "loss": 1.2125, "step": 971000 }, { "epoch": 27.26, "learning_rate": 2.7418417649153716e-05, "loss": 1.1955, "step": 971500 }, { "epoch": 27.28, "learning_rate": 2.7277839215759778e-05, "loss": 1.1958, "step": 972000 }, { "epoch": 27.29, "learning_rate": 2.713726078236584e-05, "loss": 1.229, "step": 972500 }, { "epoch": 27.3, "learning_rate": 2.69966823489719e-05, "loss": 1.2004, "step": 973000 }, { "epoch": 27.32, "learning_rate": 2.685610391557796e-05, "loss": 1.2221, "step": 973500 }, { "epoch": 27.33, "learning_rate": 2.6715525482184022e-05, "loss": 1.2211, "step": 974000 }, { "epoch": 27.35, "learning_rate": 2.6574947048790084e-05, "loss": 1.2279, "step": 974500 }, { "epoch": 27.36, "learning_rate": 2.6434368615396146e-05, "loss": 1.229, "step": 975000 }, { "epoch": 27.38, "learning_rate": 2.6293790182002208e-05, "loss": 1.2155, "step": 975500 }, { "epoch": 27.39, "learning_rate": 2.615321174860827e-05, "loss": 1.2055, "step": 976000 }, { "epoch": 27.4, "learning_rate": 2.6012633315214332e-05, "loss": 1.2284, "step": 976500 }, { "epoch": 27.42, "learning_rate": 2.5872054881820394e-05, "loss": 1.2137, "step": 977000 }, { "epoch": 27.43, "learning_rate": 2.5731476448426456e-05, "loss": 1.2012, "step": 977500 }, { "epoch": 27.45, "learning_rate": 2.5590898015032518e-05, "loss": 1.2012, "step": 978000 }, { "epoch": 27.46, "learning_rate": 2.545031958163858e-05, "loss": 1.2168, "step": 978500 }, { "epoch": 27.47, "learning_rate": 2.530974114824464e-05, "loss": 1.2162, "step": 979000 }, { "epoch": 27.49, "learning_rate": 2.5169162714850704e-05, "loss": 1.2203, "step": 979500 }, { "epoch": 27.5, "learning_rate": 2.5028584281456762e-05, "loss": 1.1994, "step": 980000 }, { "epoch": 27.52, "learning_rate": 2.4888005848062824e-05, "loss": 1.2206, "step": 980500 }, { "epoch": 27.53, "learning_rate": 2.4747427414668886e-05, "loss": 1.2047, "step": 981000 }, { "epoch": 27.54, "learning_rate": 2.4606848981274948e-05, "loss": 1.1918, "step": 981500 }, { "epoch": 27.56, "learning_rate": 2.446627054788101e-05, "loss": 1.2074, "step": 982000 }, { "epoch": 27.57, "learning_rate": 2.4325692114487072e-05, "loss": 1.2206, "step": 982500 }, { "epoch": 27.59, "learning_rate": 2.4185113681093137e-05, "loss": 1.2193, "step": 983000 }, { "epoch": 27.6, "learning_rate": 2.40445352476992e-05, "loss": 1.2269, "step": 983500 }, { "epoch": 27.61, "learning_rate": 2.390395681430526e-05, "loss": 1.2168, "step": 984000 }, { "epoch": 27.63, "learning_rate": 2.3763378380911323e-05, "loss": 1.2136, "step": 984500 }, { "epoch": 27.64, "learning_rate": 2.3622799947517385e-05, "loss": 1.1938, "step": 985000 }, { "epoch": 27.66, "learning_rate": 2.3482221514123447e-05, "loss": 1.2355, "step": 985500 }, { "epoch": 27.67, "learning_rate": 2.334164308072951e-05, "loss": 1.2056, "step": 986000 }, { "epoch": 27.68, "learning_rate": 2.320106464733557e-05, "loss": 1.2273, "step": 986500 }, { "epoch": 27.7, "learning_rate": 2.3060486213941633e-05, "loss": 1.2103, "step": 987000 }, { "epoch": 27.71, "learning_rate": 2.2919907780547694e-05, "loss": 1.1975, "step": 987500 }, { "epoch": 27.73, "learning_rate": 2.2779329347153753e-05, "loss": 1.2034, "step": 988000 }, { "epoch": 27.74, "learning_rate": 2.2638750913759815e-05, "loss": 1.2128, "step": 988500 }, { "epoch": 27.75, "learning_rate": 2.2498172480365877e-05, "loss": 1.2214, "step": 989000 }, { "epoch": 27.77, "learning_rate": 2.235759404697194e-05, "loss": 1.2121, "step": 989500 }, { "epoch": 27.78, "learning_rate": 2.2217015613578e-05, "loss": 1.2008, "step": 990000 }, { "epoch": 27.8, "learning_rate": 2.2076437180184063e-05, "loss": 1.2235, "step": 990500 }, { "epoch": 27.81, "learning_rate": 2.1935858746790125e-05, "loss": 1.224, "step": 991000 }, { "epoch": 27.82, "learning_rate": 2.1795280313396187e-05, "loss": 1.2283, "step": 991500 }, { "epoch": 27.84, "learning_rate": 2.165470188000225e-05, "loss": 1.1691, "step": 992000 }, { "epoch": 27.85, "learning_rate": 2.151412344660831e-05, "loss": 1.2283, "step": 992500 }, { "epoch": 27.87, "learning_rate": 2.1373545013214372e-05, "loss": 1.2298, "step": 993000 }, { "epoch": 27.88, "learning_rate": 2.1232966579820434e-05, "loss": 1.2071, "step": 993500 }, { "epoch": 27.89, "learning_rate": 2.1092388146426496e-05, "loss": 1.201, "step": 994000 }, { "epoch": 27.91, "learning_rate": 2.0951809713032558e-05, "loss": 1.2401, "step": 994500 }, { "epoch": 27.92, "learning_rate": 2.0811231279638617e-05, "loss": 1.2178, "step": 995000 }, { "epoch": 27.94, "learning_rate": 2.067065284624468e-05, "loss": 1.2387, "step": 995500 }, { "epoch": 27.95, "learning_rate": 2.053007441285074e-05, "loss": 1.2397, "step": 996000 }, { "epoch": 27.96, "learning_rate": 2.0389495979456802e-05, "loss": 1.2406, "step": 996500 }, { "epoch": 27.98, "learning_rate": 2.0248917546062864e-05, "loss": 1.2341, "step": 997000 }, { "epoch": 27.99, "learning_rate": 2.0108339112668926e-05, "loss": 1.2314, "step": 997500 }, { "epoch": 28.01, "learning_rate": 1.9967760679274988e-05, "loss": 1.206, "step": 998000 }, { "epoch": 28.02, "learning_rate": 1.982718224588105e-05, "loss": 1.1898, "step": 998500 }, { "epoch": 28.03, "learning_rate": 1.9686603812487112e-05, "loss": 1.2027, "step": 999000 }, { "epoch": 28.05, "learning_rate": 1.9546025379093174e-05, "loss": 1.2206, "step": 999500 }, { "epoch": 28.06, "learning_rate": 1.9405446945699236e-05, "loss": 1.1764, "step": 1000000 }, { "epoch": 28.08, "learning_rate": 1.9264868512305298e-05, "loss": 1.1983, "step": 1000500 }, { "epoch": 28.09, "learning_rate": 1.912429007891136e-05, "loss": 1.2146, "step": 1001000 }, { "epoch": 28.1, "learning_rate": 1.8983711645517422e-05, "loss": 1.2142, "step": 1001500 }, { "epoch": 28.12, "learning_rate": 1.884313321212348e-05, "loss": 1.212, "step": 1002000 }, { "epoch": 28.13, "learning_rate": 1.8702554778729542e-05, "loss": 1.1655, "step": 1002500 }, { "epoch": 28.15, "learning_rate": 1.8561976345335604e-05, "loss": 1.2233, "step": 1003000 }, { "epoch": 28.16, "learning_rate": 1.8421397911941666e-05, "loss": 1.2042, "step": 1003500 }, { "epoch": 28.17, "learning_rate": 1.8280819478547728e-05, "loss": 1.1557, "step": 1004000 }, { "epoch": 28.19, "learning_rate": 1.814024104515379e-05, "loss": 1.2044, "step": 1004500 }, { "epoch": 28.2, "learning_rate": 1.7999662611759855e-05, "loss": 1.2152, "step": 1005000 }, { "epoch": 28.22, "learning_rate": 1.7859084178365917e-05, "loss": 1.1963, "step": 1005500 }, { "epoch": 28.23, "learning_rate": 1.7718505744971976e-05, "loss": 1.1855, "step": 1006000 }, { "epoch": 28.25, "learning_rate": 1.7577927311578038e-05, "loss": 1.2021, "step": 1006500 }, { "epoch": 28.26, "learning_rate": 1.74373488781841e-05, "loss": 1.2276, "step": 1007000 }, { "epoch": 28.27, "learning_rate": 1.729677044479016e-05, "loss": 1.2031, "step": 1007500 }, { "epoch": 28.29, "learning_rate": 1.7156192011396223e-05, "loss": 1.197, "step": 1008000 }, { "epoch": 28.3, "learning_rate": 1.7015613578002285e-05, "loss": 1.1679, "step": 1008500 }, { "epoch": 28.32, "learning_rate": 1.6875035144608347e-05, "loss": 1.189, "step": 1009000 }, { "epoch": 28.33, "learning_rate": 1.673445671121441e-05, "loss": 1.2135, "step": 1009500 }, { "epoch": 28.34, "learning_rate": 1.659387827782047e-05, "loss": 1.1794, "step": 1010000 }, { "epoch": 28.36, "learning_rate": 1.6453299844426533e-05, "loss": 1.1916, "step": 1010500 }, { "epoch": 28.37, "learning_rate": 1.6312721411032595e-05, "loss": 1.1758, "step": 1011000 }, { "epoch": 28.39, "learning_rate": 1.6172142977638657e-05, "loss": 1.1733, "step": 1011500 }, { "epoch": 28.4, "learning_rate": 1.603156454424472e-05, "loss": 1.2269, "step": 1012000 }, { "epoch": 28.41, "learning_rate": 1.589098611085078e-05, "loss": 1.2067, "step": 1012500 }, { "epoch": 28.43, "learning_rate": 1.575040767745684e-05, "loss": 1.1773, "step": 1013000 }, { "epoch": 28.44, "learning_rate": 1.56098292440629e-05, "loss": 1.2163, "step": 1013500 }, { "epoch": 28.46, "learning_rate": 1.5469250810668963e-05, "loss": 1.1957, "step": 1014000 }, { "epoch": 28.47, "learning_rate": 1.5328672377275025e-05, "loss": 1.2178, "step": 1014500 }, { "epoch": 28.48, "learning_rate": 1.5188093943881087e-05, "loss": 1.1912, "step": 1015000 }, { "epoch": 28.5, "learning_rate": 1.5047515510487149e-05, "loss": 1.1908, "step": 1015500 }, { "epoch": 28.51, "learning_rate": 1.4906937077093211e-05, "loss": 1.2142, "step": 1016000 }, { "epoch": 28.53, "learning_rate": 1.4766358643699273e-05, "loss": 1.1873, "step": 1016500 }, { "epoch": 28.54, "learning_rate": 1.4625780210305335e-05, "loss": 1.171, "step": 1017000 }, { "epoch": 28.55, "learning_rate": 1.4485201776911397e-05, "loss": 1.1901, "step": 1017500 }, { "epoch": 28.57, "learning_rate": 1.4344623343517457e-05, "loss": 1.1877, "step": 1018000 }, { "epoch": 28.58, "learning_rate": 1.4204044910123519e-05, "loss": 1.2195, "step": 1018500 }, { "epoch": 28.6, "learning_rate": 1.4063466476729581e-05, "loss": 1.1885, "step": 1019000 }, { "epoch": 28.61, "learning_rate": 1.3922888043335643e-05, "loss": 1.2032, "step": 1019500 }, { "epoch": 28.62, "learning_rate": 1.3782309609941706e-05, "loss": 1.201, "step": 1020000 }, { "epoch": 28.64, "learning_rate": 1.3641731176547768e-05, "loss": 1.1836, "step": 1020500 }, { "epoch": 28.65, "learning_rate": 1.350115274315383e-05, "loss": 1.1866, "step": 1021000 }, { "epoch": 28.67, "learning_rate": 1.3360574309759892e-05, "loss": 1.2105, "step": 1021500 }, { "epoch": 28.68, "learning_rate": 1.3219995876365954e-05, "loss": 1.1799, "step": 1022000 }, { "epoch": 28.69, "learning_rate": 1.3079417442972014e-05, "loss": 1.1936, "step": 1022500 }, { "epoch": 28.71, "learning_rate": 1.2938839009578076e-05, "loss": 1.1856, "step": 1023000 }, { "epoch": 28.72, "learning_rate": 1.2798260576184138e-05, "loss": 1.1831, "step": 1023500 }, { "epoch": 28.74, "learning_rate": 1.26576821427902e-05, "loss": 1.1974, "step": 1024000 }, { "epoch": 28.75, "learning_rate": 1.2517103709396262e-05, "loss": 1.2063, "step": 1024500 }, { "epoch": 28.76, "learning_rate": 1.2376525276002324e-05, "loss": 1.1803, "step": 1025000 }, { "epoch": 28.78, "learning_rate": 1.2235946842608384e-05, "loss": 1.2299, "step": 1025500 }, { "epoch": 28.79, "learning_rate": 1.2095368409214446e-05, "loss": 1.185, "step": 1026000 }, { "epoch": 28.81, "learning_rate": 1.1954789975820508e-05, "loss": 1.2083, "step": 1026500 }, { "epoch": 28.82, "learning_rate": 1.181421154242657e-05, "loss": 1.1777, "step": 1027000 }, { "epoch": 28.83, "learning_rate": 1.1673633109032632e-05, "loss": 1.2018, "step": 1027500 }, { "epoch": 28.85, "learning_rate": 1.1533054675638694e-05, "loss": 1.1975, "step": 1028000 }, { "epoch": 28.86, "learning_rate": 1.1392476242244756e-05, "loss": 1.1666, "step": 1028500 }, { "epoch": 28.88, "learning_rate": 1.1251897808850816e-05, "loss": 1.1997, "step": 1029000 }, { "epoch": 28.89, "learning_rate": 1.1111319375456878e-05, "loss": 1.2238, "step": 1029500 }, { "epoch": 28.9, "learning_rate": 1.097074094206294e-05, "loss": 1.1982, "step": 1030000 }, { "epoch": 28.92, "learning_rate": 1.0830162508669002e-05, "loss": 1.1779, "step": 1030500 }, { "epoch": 28.93, "learning_rate": 1.0689584075275064e-05, "loss": 1.2023, "step": 1031000 }, { "epoch": 28.95, "learning_rate": 1.0549005641881126e-05, "loss": 1.1825, "step": 1031500 }, { "epoch": 28.96, "learning_rate": 1.0408427208487188e-05, "loss": 1.1994, "step": 1032000 }, { "epoch": 28.97, "learning_rate": 1.0267848775093248e-05, "loss": 1.2162, "step": 1032500 }, { "epoch": 28.99, "learning_rate": 1.012727034169931e-05, "loss": 1.2024, "step": 1033000 } ], "logging_steps": 500, "max_steps": 1069020, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 1.7996577873665385e+21, "train_batch_size": 2, "trial_name": null, "trial_params": null }