diff --git "a/checkpoint-1033400/trainer_state.json" "b/checkpoint-1033400/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-1033400/trainer_state.json" @@ -0,0 +1,12417 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 28.999985968653974, + "eval_steps": 500, + "global_step": 1033400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 7.5e-05, + "loss": 44.5033, + "step": 500 + }, + { + "epoch": 0.03, + "learning_rate": 0.00015, + "loss": 6.0308, + "step": 1000 + }, + { + "epoch": 0.04, + "learning_rate": 0.000225, + "loss": 5.869, + "step": 1500 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003, + "loss": 5.7816, + "step": 2000 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029985942156660604, + "loss": 5.7394, + "step": 2500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002997188431332121, + "loss": 5.4199, + "step": 3000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029957826469981816, + "loss": 4.4523, + "step": 3500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002994376862664242, + "loss": 3.9274, + "step": 4000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002992971078330303, + "loss": 3.6766, + "step": 4500 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029915652939963635, + "loss": 3.4911, + "step": 5000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002990159509662424, + "loss": 3.3588, + "step": 5500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029887537253284847, + "loss": 3.288, + "step": 6000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00029873479409945453, + "loss": 3.2493, + "step": 6500 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002985942156660606, + "loss": 3.2086, + "step": 7000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029845363723266665, + "loss": 3.0969, + "step": 7500 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002983130587992727, + "loss": 3.0552, + "step": 8000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002981724803658788, + "loss": 3.0411, + "step": 8500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029803190193248484, + "loss": 2.9908, + "step": 9000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002978913234990909, + "loss": 2.9927, + "step": 9500 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029775074506569696, + "loss": 2.9911, + "step": 10000 + }, + { + "epoch": 0.29, + "learning_rate": 0.000297610166632303, + "loss": 2.9854, + "step": 10500 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002974695881989091, + "loss": 2.9732, + "step": 11000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029732900976551515, + "loss": 2.9407, + "step": 11500 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002971884313321212, + "loss": 2.9384, + "step": 12000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002970478528987273, + "loss": 2.9073, + "step": 12500 + }, + { + "epoch": 0.36, + "learning_rate": 0.00029690727446533334, + "loss": 2.9253, + "step": 13000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002967666960319394, + "loss": 2.8914, + "step": 13500 + }, + { + "epoch": 0.39, + "learning_rate": 0.00029662611759854546, + "loss": 2.8701, + "step": 14000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002964855391651515, + "loss": 2.8428, + "step": 14500 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002963449607317576, + "loss": 2.8333, + "step": 15000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029620438229836365, + "loss": 2.8261, + "step": 15500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002960638038649697, + "loss": 2.8314, + "step": 16000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029592322543157577, + "loss": 2.8064, + "step": 16500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029578264699818183, + "loss": 2.8008, + "step": 17000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002956420685647879, + "loss": 2.7798, + "step": 17500 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029550149013139396, + "loss": 2.7898, + "step": 18000 + }, + { + "epoch": 0.52, + "learning_rate": 0.000295360911698, + "loss": 2.7996, + "step": 18500 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002952203332646061, + "loss": 2.7596, + "step": 19000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00029507975483121214, + "loss": 2.7718, + "step": 19500 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002949391763978182, + "loss": 2.774, + "step": 20000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00029479859796442427, + "loss": 2.751, + "step": 20500 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002946580195310303, + "loss": 2.7666, + "step": 21000 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002945174410976364, + "loss": 2.7769, + "step": 21500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029437686266424245, + "loss": 2.7581, + "step": 22000 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002942362842308485, + "loss": 2.7098, + "step": 22500 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002940957057974546, + "loss": 2.7282, + "step": 23000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00029395512736406064, + "loss": 2.7095, + "step": 23500 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002938145489306667, + "loss": 2.7139, + "step": 24000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00029367397049727276, + "loss": 2.6969, + "step": 24500 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002935333920638788, + "loss": 2.705, + "step": 25000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002933928136304849, + "loss": 2.7324, + "step": 25500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00029325223519709095, + "loss": 2.6955, + "step": 26000 + }, + { + "epoch": 0.74, + "learning_rate": 0.000293111656763697, + "loss": 2.6963, + "step": 26500 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029297107833030307, + "loss": 2.678, + "step": 27000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00029283049989690913, + "loss": 2.7222, + "step": 27500 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002926899214635152, + "loss": 2.7119, + "step": 28000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00029254934303012126, + "loss": 2.7088, + "step": 28500 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002924087645967273, + "loss": 2.6917, + "step": 29000 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002922681861633334, + "loss": 2.6737, + "step": 29500 + }, + { + "epoch": 0.84, + "learning_rate": 0.00029212760772993944, + "loss": 2.6373, + "step": 30000 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002919870292965455, + "loss": 2.6861, + "step": 30500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00029184645086315157, + "loss": 2.658, + "step": 31000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00029170587242975763, + "loss": 2.6517, + "step": 31500 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002915652939963637, + "loss": 2.6492, + "step": 32000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00029142471556296975, + "loss": 2.6422, + "step": 32500 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002912841371295758, + "loss": 2.6518, + "step": 33000 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002911435586961819, + "loss": 2.6342, + "step": 33500 + }, + { + "epoch": 0.95, + "learning_rate": 0.00029100298026278794, + "loss": 2.6301, + "step": 34000 + }, + { + "epoch": 0.97, + "learning_rate": 0.000290862401829394, + "loss": 2.6546, + "step": 34500 + }, + { + "epoch": 0.98, + "learning_rate": 0.00029072182339600006, + "loss": 2.6329, + "step": 35000 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002905812449626061, + "loss": 2.6626, + "step": 35500 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002904406665292122, + "loss": 2.5699, + "step": 36000 + }, + { + "epoch": 1.02, + "learning_rate": 0.00029030008809581825, + "loss": 2.5145, + "step": 36500 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002901595096624243, + "loss": 2.5691, + "step": 37000 + }, + { + "epoch": 1.05, + "learning_rate": 0.00029001893122903037, + "loss": 2.5582, + "step": 37500 + }, + { + "epoch": 1.07, + "learning_rate": 0.00028987835279563643, + "loss": 2.6044, + "step": 38000 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002897377743622425, + "loss": 2.5418, + "step": 38500 + }, + { + "epoch": 1.09, + "learning_rate": 0.00028959719592884856, + "loss": 2.5534, + "step": 39000 + }, + { + "epoch": 1.11, + "learning_rate": 0.0002894566174954546, + "loss": 2.5921, + "step": 39500 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002893160390620607, + "loss": 2.5474, + "step": 40000 + }, + { + "epoch": 1.14, + "learning_rate": 0.00028917546062866674, + "loss": 2.5525, + "step": 40500 + }, + { + "epoch": 1.15, + "learning_rate": 0.00028903488219527275, + "loss": 2.5519, + "step": 41000 + }, + { + "epoch": 1.16, + "learning_rate": 0.00028889430376187887, + "loss": 2.5802, + "step": 41500 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028875372532848493, + "loss": 2.4925, + "step": 42000 + }, + { + "epoch": 1.19, + "learning_rate": 0.000288613146895091, + "loss": 2.5546, + "step": 42500 + }, + { + "epoch": 1.21, + "learning_rate": 0.00028847256846169705, + "loss": 2.513, + "step": 43000 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002883319900283031, + "loss": 2.5301, + "step": 43500 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002881914115949092, + "loss": 2.514, + "step": 44000 + }, + { + "epoch": 1.25, + "learning_rate": 0.00028805083316151524, + "loss": 2.5389, + "step": 44500 + }, + { + "epoch": 1.26, + "learning_rate": 0.00028791025472812125, + "loss": 2.5552, + "step": 45000 + }, + { + "epoch": 1.28, + "learning_rate": 0.00028776967629472736, + "loss": 2.5773, + "step": 45500 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002876290978613334, + "loss": 2.5215, + "step": 46000 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002874885194279395, + "loss": 2.5633, + "step": 46500 + }, + { + "epoch": 1.32, + "learning_rate": 0.00028734794099454555, + "loss": 2.5442, + "step": 47000 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002872073625611516, + "loss": 2.5673, + "step": 47500 + }, + { + "epoch": 1.35, + "learning_rate": 0.00028706678412775767, + "loss": 2.5251, + "step": 48000 + }, + { + "epoch": 1.36, + "learning_rate": 0.00028692620569436373, + "loss": 2.4993, + "step": 48500 + }, + { + "epoch": 1.38, + "learning_rate": 0.00028678562726096974, + "loss": 2.5582, + "step": 49000 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002866450488275758, + "loss": 2.5427, + "step": 49500 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002865044703941819, + "loss": 2.5727, + "step": 50000 + }, + { + "epoch": 1.42, + "learning_rate": 0.000286363891960788, + "loss": 2.5369, + "step": 50500 + }, + { + "epoch": 1.43, + "learning_rate": 0.00028622331352739404, + "loss": 2.5114, + "step": 51000 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002860827350940001, + "loss": 2.5438, + "step": 51500 + }, + { + "epoch": 1.46, + "learning_rate": 0.00028594215666060617, + "loss": 2.5043, + "step": 52000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00028580157822721223, + "loss": 2.5161, + "step": 52500 + }, + { + "epoch": 1.49, + "learning_rate": 0.00028566099979381824, + "loss": 2.5474, + "step": 53000 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002855204213604243, + "loss": 2.5154, + "step": 53500 + }, + { + "epoch": 1.52, + "learning_rate": 0.0002853798429270304, + "loss": 2.4809, + "step": 54000 + }, + { + "epoch": 1.53, + "learning_rate": 0.0002852392644936365, + "loss": 2.5301, + "step": 54500 + }, + { + "epoch": 1.54, + "learning_rate": 0.00028509868606024254, + "loss": 2.504, + "step": 55000 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002849581076268486, + "loss": 2.482, + "step": 55500 + }, + { + "epoch": 1.57, + "learning_rate": 0.00028481752919345466, + "loss": 2.5116, + "step": 56000 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002846769507600607, + "loss": 2.4682, + "step": 56500 + }, + { + "epoch": 1.6, + "learning_rate": 0.00028453637232666673, + "loss": 2.5098, + "step": 57000 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002843957938932728, + "loss": 2.4829, + "step": 57500 + }, + { + "epoch": 1.63, + "learning_rate": 0.00028425521545987886, + "loss": 2.498, + "step": 58000 + }, + { + "epoch": 1.64, + "learning_rate": 0.00028411463702648497, + "loss": 2.4492, + "step": 58500 + }, + { + "epoch": 1.66, + "learning_rate": 0.00028397405859309103, + "loss": 2.4253, + "step": 59000 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002838334801596971, + "loss": 2.4648, + "step": 59500 + }, + { + "epoch": 1.68, + "learning_rate": 0.00028369290172630316, + "loss": 2.4785, + "step": 60000 + }, + { + "epoch": 1.7, + "learning_rate": 0.0002835523232929092, + "loss": 2.5128, + "step": 60500 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002834117448595153, + "loss": 2.4789, + "step": 61000 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002832711664261213, + "loss": 2.4908, + "step": 61500 + }, + { + "epoch": 1.74, + "learning_rate": 0.00028313058799272735, + "loss": 2.4679, + "step": 62000 + }, + { + "epoch": 1.75, + "learning_rate": 0.00028299000955933347, + "loss": 2.4972, + "step": 62500 + }, + { + "epoch": 1.77, + "learning_rate": 0.00028284943112593953, + "loss": 2.4933, + "step": 63000 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002827088526925456, + "loss": 2.427, + "step": 63500 + }, + { + "epoch": 1.8, + "learning_rate": 0.00028256827425915165, + "loss": 2.4344, + "step": 64000 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002824276958257577, + "loss": 2.4604, + "step": 64500 + }, + { + "epoch": 1.82, + "learning_rate": 0.0002822871173923638, + "loss": 2.4777, + "step": 65000 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002821465389589698, + "loss": 2.4681, + "step": 65500 + }, + { + "epoch": 1.85, + "learning_rate": 0.00028200596052557585, + "loss": 2.4243, + "step": 66000 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002818653820921819, + "loss": 2.4423, + "step": 66500 + }, + { + "epoch": 1.88, + "learning_rate": 0.000281724803658788, + "loss": 2.4525, + "step": 67000 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002815842252253941, + "loss": 2.4513, + "step": 67500 + }, + { + "epoch": 1.91, + "learning_rate": 0.00028144364679200015, + "loss": 2.4303, + "step": 68000 + }, + { + "epoch": 1.92, + "learning_rate": 0.0002813030683586062, + "loss": 2.4732, + "step": 68500 + }, + { + "epoch": 1.94, + "learning_rate": 0.00028116248992521227, + "loss": 2.4074, + "step": 69000 + }, + { + "epoch": 1.95, + "learning_rate": 0.0002810219114918183, + "loss": 2.4309, + "step": 69500 + }, + { + "epoch": 1.96, + "learning_rate": 0.00028088133305842434, + "loss": 2.4698, + "step": 70000 + }, + { + "epoch": 1.98, + "learning_rate": 0.0002807407546250304, + "loss": 2.4569, + "step": 70500 + }, + { + "epoch": 1.99, + "learning_rate": 0.0002806001761916365, + "loss": 2.4364, + "step": 71000 + }, + { + "epoch": 2.01, + "learning_rate": 0.0002804595977582426, + "loss": 2.4081, + "step": 71500 + }, + { + "epoch": 2.02, + "learning_rate": 0.00028031901932484864, + "loss": 2.3468, + "step": 72000 + }, + { + "epoch": 2.03, + "learning_rate": 0.0002801784408914547, + "loss": 2.3954, + "step": 72500 + }, + { + "epoch": 2.05, + "learning_rate": 0.00028003786245806077, + "loss": 2.3745, + "step": 73000 + }, + { + "epoch": 2.06, + "learning_rate": 0.0002798972840246668, + "loss": 2.399, + "step": 73500 + }, + { + "epoch": 2.08, + "learning_rate": 0.00027975670559127284, + "loss": 2.3572, + "step": 74000 + }, + { + "epoch": 2.09, + "learning_rate": 0.0002796161271578789, + "loss": 2.3931, + "step": 74500 + }, + { + "epoch": 2.1, + "learning_rate": 0.00027947554872448496, + "loss": 2.3707, + "step": 75000 + }, + { + "epoch": 2.12, + "learning_rate": 0.0002793349702910911, + "loss": 2.425, + "step": 75500 + }, + { + "epoch": 2.13, + "learning_rate": 0.00027919439185769714, + "loss": 2.3634, + "step": 76000 + }, + { + "epoch": 2.15, + "learning_rate": 0.0002790538134243032, + "loss": 2.3933, + "step": 76500 + }, + { + "epoch": 2.16, + "learning_rate": 0.00027891323499090926, + "loss": 2.356, + "step": 77000 + }, + { + "epoch": 2.17, + "learning_rate": 0.00027877265655751527, + "loss": 2.3819, + "step": 77500 + }, + { + "epoch": 2.19, + "learning_rate": 0.00027863207812412133, + "loss": 2.3687, + "step": 78000 + }, + { + "epoch": 2.2, + "learning_rate": 0.0002784914996907274, + "loss": 2.3515, + "step": 78500 + }, + { + "epoch": 2.22, + "learning_rate": 0.00027835092125733346, + "loss": 2.3663, + "step": 79000 + }, + { + "epoch": 2.23, + "learning_rate": 0.0002782103428239396, + "loss": 2.3916, + "step": 79500 + }, + { + "epoch": 2.25, + "learning_rate": 0.00027806976439054563, + "loss": 2.3404, + "step": 80000 + }, + { + "epoch": 2.26, + "learning_rate": 0.0002779291859571517, + "loss": 2.3482, + "step": 80500 + }, + { + "epoch": 2.27, + "learning_rate": 0.00027778860752375776, + "loss": 2.3524, + "step": 81000 + }, + { + "epoch": 2.29, + "learning_rate": 0.00027764802909036377, + "loss": 2.3759, + "step": 81500 + }, + { + "epoch": 2.3, + "learning_rate": 0.00027750745065696983, + "loss": 2.3498, + "step": 82000 + }, + { + "epoch": 2.32, + "learning_rate": 0.0002773668722235759, + "loss": 2.3667, + "step": 82500 + }, + { + "epoch": 2.33, + "learning_rate": 0.00027722629379018195, + "loss": 2.3777, + "step": 83000 + }, + { + "epoch": 2.34, + "learning_rate": 0.000277085715356788, + "loss": 2.3955, + "step": 83500 + }, + { + "epoch": 2.36, + "learning_rate": 0.00027694513692339413, + "loss": 2.3583, + "step": 84000 + }, + { + "epoch": 2.37, + "learning_rate": 0.0002768045584900002, + "loss": 2.376, + "step": 84500 + }, + { + "epoch": 2.39, + "learning_rate": 0.00027666398005660625, + "loss": 2.3855, + "step": 85000 + }, + { + "epoch": 2.4, + "learning_rate": 0.00027652340162321226, + "loss": 2.3843, + "step": 85500 + }, + { + "epoch": 2.41, + "learning_rate": 0.0002763828231898183, + "loss": 2.3652, + "step": 86000 + }, + { + "epoch": 2.43, + "learning_rate": 0.0002762422447564244, + "loss": 2.3697, + "step": 86500 + }, + { + "epoch": 2.44, + "learning_rate": 0.00027610166632303045, + "loss": 2.3478, + "step": 87000 + }, + { + "epoch": 2.46, + "learning_rate": 0.0002759610878896365, + "loss": 2.3332, + "step": 87500 + }, + { + "epoch": 2.47, + "learning_rate": 0.0002758205094562426, + "loss": 2.381, + "step": 88000 + }, + { + "epoch": 2.48, + "learning_rate": 0.0002756799310228487, + "loss": 2.3139, + "step": 88500 + }, + { + "epoch": 2.5, + "learning_rate": 0.00027553935258945475, + "loss": 2.3374, + "step": 89000 + }, + { + "epoch": 2.51, + "learning_rate": 0.00027539877415606076, + "loss": 2.3663, + "step": 89500 + }, + { + "epoch": 2.53, + "learning_rate": 0.0002752581957226668, + "loss": 2.3629, + "step": 90000 + }, + { + "epoch": 2.54, + "learning_rate": 0.0002751176172892729, + "loss": 2.3424, + "step": 90500 + }, + { + "epoch": 2.55, + "learning_rate": 0.00027497703885587894, + "loss": 2.3227, + "step": 91000 + }, + { + "epoch": 2.57, + "learning_rate": 0.000274836460422485, + "loss": 2.331, + "step": 91500 + }, + { + "epoch": 2.58, + "learning_rate": 0.00027469588198909107, + "loss": 2.3293, + "step": 92000 + }, + { + "epoch": 2.6, + "learning_rate": 0.0002745553035556972, + "loss": 2.343, + "step": 92500 + }, + { + "epoch": 2.61, + "learning_rate": 0.00027441472512230325, + "loss": 2.3556, + "step": 93000 + }, + { + "epoch": 2.62, + "learning_rate": 0.0002742741466889093, + "loss": 2.3014, + "step": 93500 + }, + { + "epoch": 2.64, + "learning_rate": 0.0002741335682555153, + "loss": 2.3458, + "step": 94000 + }, + { + "epoch": 2.65, + "learning_rate": 0.0002739929898221214, + "loss": 2.3415, + "step": 94500 + }, + { + "epoch": 2.67, + "learning_rate": 0.00027385241138872744, + "loss": 2.3327, + "step": 95000 + }, + { + "epoch": 2.68, + "learning_rate": 0.0002737118329553335, + "loss": 2.3377, + "step": 95500 + }, + { + "epoch": 2.69, + "learning_rate": 0.00027357125452193956, + "loss": 2.3452, + "step": 96000 + }, + { + "epoch": 2.71, + "learning_rate": 0.0002734306760885457, + "loss": 2.3191, + "step": 96500 + }, + { + "epoch": 2.72, + "learning_rate": 0.00027329009765515174, + "loss": 2.3126, + "step": 97000 + }, + { + "epoch": 2.74, + "learning_rate": 0.0002731495192217578, + "loss": 2.3646, + "step": 97500 + }, + { + "epoch": 2.75, + "learning_rate": 0.0002730089407883638, + "loss": 2.3326, + "step": 98000 + }, + { + "epoch": 2.76, + "learning_rate": 0.00027286836235496987, + "loss": 2.3596, + "step": 98500 + }, + { + "epoch": 2.78, + "learning_rate": 0.00027272778392157593, + "loss": 2.3274, + "step": 99000 + }, + { + "epoch": 2.79, + "learning_rate": 0.000272587205488182, + "loss": 2.2924, + "step": 99500 + }, + { + "epoch": 2.81, + "learning_rate": 0.00027244662705478806, + "loss": 2.3603, + "step": 100000 + }, + { + "epoch": 2.82, + "learning_rate": 0.0002723060486213941, + "loss": 2.354, + "step": 100500 + }, + { + "epoch": 2.83, + "learning_rate": 0.00027216547018800024, + "loss": 2.3229, + "step": 101000 + }, + { + "epoch": 2.85, + "learning_rate": 0.0002720248917546063, + "loss": 2.3265, + "step": 101500 + }, + { + "epoch": 2.86, + "learning_rate": 0.0002718843133212123, + "loss": 2.3141, + "step": 102000 + }, + { + "epoch": 2.88, + "learning_rate": 0.00027174373488781837, + "loss": 2.3071, + "step": 102500 + }, + { + "epoch": 2.89, + "learning_rate": 0.00027160315645442443, + "loss": 2.3051, + "step": 103000 + }, + { + "epoch": 2.9, + "learning_rate": 0.0002714625780210305, + "loss": 2.3332, + "step": 103500 + }, + { + "epoch": 2.92, + "learning_rate": 0.00027132199958763655, + "loss": 2.3254, + "step": 104000 + }, + { + "epoch": 2.93, + "learning_rate": 0.0002711814211542426, + "loss": 2.2945, + "step": 104500 + }, + { + "epoch": 2.95, + "learning_rate": 0.00027104084272084873, + "loss": 2.3055, + "step": 105000 + }, + { + "epoch": 2.96, + "learning_rate": 0.0002709002642874548, + "loss": 2.3189, + "step": 105500 + }, + { + "epoch": 2.97, + "learning_rate": 0.0002707596858540608, + "loss": 2.2937, + "step": 106000 + }, + { + "epoch": 2.99, + "learning_rate": 0.00027061910742066686, + "loss": 2.283, + "step": 106500 + }, + { + "epoch": 3.0, + "learning_rate": 0.0002704785289872729, + "loss": 2.3291, + "step": 107000 + }, + { + "epoch": 3.02, + "learning_rate": 0.000270337950553879, + "loss": 2.2292, + "step": 107500 + }, + { + "epoch": 3.03, + "learning_rate": 0.00027019737212048505, + "loss": 2.2265, + "step": 108000 + }, + { + "epoch": 3.04, + "learning_rate": 0.0002700567936870911, + "loss": 2.2228, + "step": 108500 + }, + { + "epoch": 3.06, + "learning_rate": 0.00026991621525369717, + "loss": 2.2762, + "step": 109000 + }, + { + "epoch": 3.07, + "learning_rate": 0.0002697756368203033, + "loss": 2.2504, + "step": 109500 + }, + { + "epoch": 3.09, + "learning_rate": 0.0002696350583869093, + "loss": 2.2539, + "step": 110000 + }, + { + "epoch": 3.1, + "learning_rate": 0.00026949447995351536, + "loss": 2.2537, + "step": 110500 + }, + { + "epoch": 3.11, + "learning_rate": 0.0002693539015201214, + "loss": 2.2348, + "step": 111000 + }, + { + "epoch": 3.13, + "learning_rate": 0.0002692133230867275, + "loss": 2.2565, + "step": 111500 + }, + { + "epoch": 3.14, + "learning_rate": 0.00026907274465333354, + "loss": 2.2512, + "step": 112000 + }, + { + "epoch": 3.16, + "learning_rate": 0.0002689321662199396, + "loss": 2.2837, + "step": 112500 + }, + { + "epoch": 3.17, + "learning_rate": 0.00026879158778654567, + "loss": 2.2672, + "step": 113000 + }, + { + "epoch": 3.19, + "learning_rate": 0.0002686510093531518, + "loss": 2.27, + "step": 113500 + }, + { + "epoch": 3.2, + "learning_rate": 0.0002685104309197578, + "loss": 2.2441, + "step": 114000 + }, + { + "epoch": 3.21, + "learning_rate": 0.00026836985248636385, + "loss": 2.2211, + "step": 114500 + }, + { + "epoch": 3.23, + "learning_rate": 0.0002682292740529699, + "loss": 2.2713, + "step": 115000 + }, + { + "epoch": 3.24, + "learning_rate": 0.000268088695619576, + "loss": 2.2502, + "step": 115500 + }, + { + "epoch": 3.26, + "learning_rate": 0.00026794811718618204, + "loss": 2.26, + "step": 116000 + }, + { + "epoch": 3.27, + "learning_rate": 0.0002678075387527881, + "loss": 2.2297, + "step": 116500 + }, + { + "epoch": 3.28, + "learning_rate": 0.00026766696031939416, + "loss": 2.2647, + "step": 117000 + }, + { + "epoch": 3.3, + "learning_rate": 0.0002675263818860002, + "loss": 2.2684, + "step": 117500 + }, + { + "epoch": 3.31, + "learning_rate": 0.0002673858034526063, + "loss": 2.2555, + "step": 118000 + }, + { + "epoch": 3.33, + "learning_rate": 0.00026724522501921235, + "loss": 2.2513, + "step": 118500 + }, + { + "epoch": 3.34, + "learning_rate": 0.0002671046465858184, + "loss": 2.2653, + "step": 119000 + }, + { + "epoch": 3.35, + "learning_rate": 0.0002669640681524245, + "loss": 2.2498, + "step": 119500 + }, + { + "epoch": 3.37, + "learning_rate": 0.00026682348971903054, + "loss": 2.2688, + "step": 120000 + }, + { + "epoch": 3.38, + "learning_rate": 0.0002666829112856366, + "loss": 2.2662, + "step": 120500 + }, + { + "epoch": 3.4, + "learning_rate": 0.00026654233285224266, + "loss": 2.2642, + "step": 121000 + }, + { + "epoch": 3.41, + "learning_rate": 0.0002664017544188487, + "loss": 2.2346, + "step": 121500 + }, + { + "epoch": 3.42, + "learning_rate": 0.00026626117598545484, + "loss": 2.2195, + "step": 122000 + }, + { + "epoch": 3.44, + "learning_rate": 0.00026612059755206084, + "loss": 2.2449, + "step": 122500 + }, + { + "epoch": 3.45, + "learning_rate": 0.0002659800191186669, + "loss": 2.2538, + "step": 123000 + }, + { + "epoch": 3.47, + "learning_rate": 0.00026583944068527297, + "loss": 2.2572, + "step": 123500 + }, + { + "epoch": 3.48, + "learning_rate": 0.00026569886225187903, + "loss": 2.2885, + "step": 124000 + }, + { + "epoch": 3.49, + "learning_rate": 0.0002655582838184851, + "loss": 2.2428, + "step": 124500 + }, + { + "epoch": 3.51, + "learning_rate": 0.00026541770538509115, + "loss": 2.2527, + "step": 125000 + }, + { + "epoch": 3.52, + "learning_rate": 0.0002652771269516972, + "loss": 2.2884, + "step": 125500 + }, + { + "epoch": 3.54, + "learning_rate": 0.0002651365485183033, + "loss": 2.2668, + "step": 126000 + }, + { + "epoch": 3.55, + "learning_rate": 0.00026499597008490934, + "loss": 2.2687, + "step": 126500 + }, + { + "epoch": 3.56, + "learning_rate": 0.0002648553916515154, + "loss": 2.2512, + "step": 127000 + }, + { + "epoch": 3.58, + "learning_rate": 0.00026471481321812146, + "loss": 2.2464, + "step": 127500 + }, + { + "epoch": 3.59, + "learning_rate": 0.0002645742347847275, + "loss": 2.2876, + "step": 128000 + }, + { + "epoch": 3.61, + "learning_rate": 0.0002644336563513336, + "loss": 2.232, + "step": 128500 + }, + { + "epoch": 3.62, + "learning_rate": 0.00026429307791793965, + "loss": 2.2532, + "step": 129000 + }, + { + "epoch": 3.63, + "learning_rate": 0.0002641524994845457, + "loss": 2.2571, + "step": 129500 + }, + { + "epoch": 3.65, + "learning_rate": 0.0002640119210511518, + "loss": 2.272, + "step": 130000 + }, + { + "epoch": 3.66, + "learning_rate": 0.00026387134261775784, + "loss": 2.2685, + "step": 130500 + }, + { + "epoch": 3.68, + "learning_rate": 0.0002637307641843639, + "loss": 2.2876, + "step": 131000 + }, + { + "epoch": 3.69, + "learning_rate": 0.00026359018575096996, + "loss": 2.2326, + "step": 131500 + }, + { + "epoch": 3.7, + "learning_rate": 0.000263449607317576, + "loss": 2.2249, + "step": 132000 + }, + { + "epoch": 3.72, + "learning_rate": 0.0002633090288841821, + "loss": 2.2476, + "step": 132500 + }, + { + "epoch": 3.73, + "learning_rate": 0.00026316845045078815, + "loss": 2.2363, + "step": 133000 + }, + { + "epoch": 3.75, + "learning_rate": 0.0002630278720173942, + "loss": 2.2405, + "step": 133500 + }, + { + "epoch": 3.76, + "learning_rate": 0.00026288729358400027, + "loss": 2.2412, + "step": 134000 + }, + { + "epoch": 3.77, + "learning_rate": 0.00026274671515060633, + "loss": 2.2385, + "step": 134500 + }, + { + "epoch": 3.79, + "learning_rate": 0.0002626061367172124, + "loss": 2.2333, + "step": 135000 + }, + { + "epoch": 3.8, + "learning_rate": 0.00026246555828381846, + "loss": 2.2556, + "step": 135500 + }, + { + "epoch": 3.82, + "learning_rate": 0.0002623249798504245, + "loss": 2.2539, + "step": 136000 + }, + { + "epoch": 3.83, + "learning_rate": 0.0002621844014170306, + "loss": 2.2738, + "step": 136500 + }, + { + "epoch": 3.84, + "learning_rate": 0.00026204382298363664, + "loss": 2.2561, + "step": 137000 + }, + { + "epoch": 3.86, + "learning_rate": 0.0002619032445502427, + "loss": 2.22, + "step": 137500 + }, + { + "epoch": 3.87, + "learning_rate": 0.00026176266611684876, + "loss": 2.2397, + "step": 138000 + }, + { + "epoch": 3.89, + "learning_rate": 0.0002616220876834548, + "loss": 2.2599, + "step": 138500 + }, + { + "epoch": 3.9, + "learning_rate": 0.0002614815092500609, + "loss": 2.2284, + "step": 139000 + }, + { + "epoch": 3.91, + "learning_rate": 0.00026134093081666695, + "loss": 2.248, + "step": 139500 + }, + { + "epoch": 3.93, + "learning_rate": 0.000261200352383273, + "loss": 2.2485, + "step": 140000 + }, + { + "epoch": 3.94, + "learning_rate": 0.0002610597739498791, + "loss": 2.28, + "step": 140500 + }, + { + "epoch": 3.96, + "learning_rate": 0.00026091919551648514, + "loss": 2.209, + "step": 141000 + }, + { + "epoch": 3.97, + "learning_rate": 0.0002607786170830912, + "loss": 2.2213, + "step": 141500 + }, + { + "epoch": 3.98, + "learning_rate": 0.00026063803864969726, + "loss": 2.2504, + "step": 142000 + }, + { + "epoch": 4.0, + "learning_rate": 0.0002604974602163033, + "loss": 2.2513, + "step": 142500 + }, + { + "epoch": 4.01, + "learning_rate": 0.0002603568817829094, + "loss": 2.1913, + "step": 143000 + }, + { + "epoch": 4.03, + "learning_rate": 0.00026021630334951545, + "loss": 2.1501, + "step": 143500 + }, + { + "epoch": 4.04, + "learning_rate": 0.0002600757249161215, + "loss": 2.1712, + "step": 144000 + }, + { + "epoch": 4.06, + "learning_rate": 0.00025993514648272757, + "loss": 2.1857, + "step": 144500 + }, + { + "epoch": 4.07, + "learning_rate": 0.00025979456804933363, + "loss": 2.173, + "step": 145000 + }, + { + "epoch": 4.08, + "learning_rate": 0.0002596539896159397, + "loss": 2.192, + "step": 145500 + }, + { + "epoch": 4.1, + "learning_rate": 0.00025951341118254576, + "loss": 2.1625, + "step": 146000 + }, + { + "epoch": 4.11, + "learning_rate": 0.0002593728327491518, + "loss": 2.1566, + "step": 146500 + }, + { + "epoch": 4.13, + "learning_rate": 0.0002592322543157579, + "loss": 2.2201, + "step": 147000 + }, + { + "epoch": 4.14, + "learning_rate": 0.00025909167588236394, + "loss": 2.1807, + "step": 147500 + }, + { + "epoch": 4.15, + "learning_rate": 0.00025895109744897, + "loss": 2.1862, + "step": 148000 + }, + { + "epoch": 4.17, + "learning_rate": 0.00025881051901557607, + "loss": 2.1699, + "step": 148500 + }, + { + "epoch": 4.18, + "learning_rate": 0.00025866994058218213, + "loss": 2.1755, + "step": 149000 + }, + { + "epoch": 4.2, + "learning_rate": 0.0002585293621487882, + "loss": 2.1475, + "step": 149500 + }, + { + "epoch": 4.21, + "learning_rate": 0.00025838878371539425, + "loss": 2.1974, + "step": 150000 + }, + { + "epoch": 4.22, + "learning_rate": 0.0002582482052820003, + "loss": 2.1115, + "step": 150500 + }, + { + "epoch": 4.24, + "learning_rate": 0.0002581076268486064, + "loss": 2.1923, + "step": 151000 + }, + { + "epoch": 4.25, + "learning_rate": 0.00025796704841521244, + "loss": 2.1547, + "step": 151500 + }, + { + "epoch": 4.27, + "learning_rate": 0.0002578264699818185, + "loss": 2.1803, + "step": 152000 + }, + { + "epoch": 4.28, + "learning_rate": 0.00025768589154842456, + "loss": 2.1682, + "step": 152500 + }, + { + "epoch": 4.29, + "learning_rate": 0.0002575453131150306, + "loss": 2.1428, + "step": 153000 + }, + { + "epoch": 4.31, + "learning_rate": 0.0002574047346816367, + "loss": 2.1664, + "step": 153500 + }, + { + "epoch": 4.32, + "learning_rate": 0.00025726415624824275, + "loss": 2.1683, + "step": 154000 + }, + { + "epoch": 4.34, + "learning_rate": 0.0002571235778148488, + "loss": 2.1786, + "step": 154500 + }, + { + "epoch": 4.35, + "learning_rate": 0.00025698299938145487, + "loss": 2.1604, + "step": 155000 + }, + { + "epoch": 4.36, + "learning_rate": 0.00025684242094806093, + "loss": 2.1749, + "step": 155500 + }, + { + "epoch": 4.38, + "learning_rate": 0.000256701842514667, + "loss": 2.162, + "step": 156000 + }, + { + "epoch": 4.39, + "learning_rate": 0.00025656126408127306, + "loss": 2.1696, + "step": 156500 + }, + { + "epoch": 4.41, + "learning_rate": 0.0002564206856478791, + "loss": 2.1784, + "step": 157000 + }, + { + "epoch": 4.42, + "learning_rate": 0.0002562801072144852, + "loss": 2.1277, + "step": 157500 + }, + { + "epoch": 4.43, + "learning_rate": 0.00025613952878109124, + "loss": 2.1739, + "step": 158000 + }, + { + "epoch": 4.45, + "learning_rate": 0.0002559989503476973, + "loss": 2.1694, + "step": 158500 + }, + { + "epoch": 4.46, + "learning_rate": 0.00025585837191430337, + "loss": 2.2062, + "step": 159000 + }, + { + "epoch": 4.48, + "learning_rate": 0.00025571779348090943, + "loss": 2.1801, + "step": 159500 + }, + { + "epoch": 4.49, + "learning_rate": 0.0002555772150475155, + "loss": 2.1527, + "step": 160000 + }, + { + "epoch": 4.5, + "learning_rate": 0.00025543663661412155, + "loss": 2.1805, + "step": 160500 + }, + { + "epoch": 4.52, + "learning_rate": 0.0002552960581807276, + "loss": 2.1429, + "step": 161000 + }, + { + "epoch": 4.53, + "learning_rate": 0.0002551554797473337, + "loss": 2.181, + "step": 161500 + }, + { + "epoch": 4.55, + "learning_rate": 0.00025501490131393974, + "loss": 2.2047, + "step": 162000 + }, + { + "epoch": 4.56, + "learning_rate": 0.0002548743228805458, + "loss": 2.1467, + "step": 162500 + }, + { + "epoch": 4.57, + "learning_rate": 0.00025473374444715186, + "loss": 2.1619, + "step": 163000 + }, + { + "epoch": 4.59, + "learning_rate": 0.0002545931660137579, + "loss": 2.2002, + "step": 163500 + }, + { + "epoch": 4.6, + "learning_rate": 0.000254452587580364, + "loss": 2.1779, + "step": 164000 + }, + { + "epoch": 4.62, + "learning_rate": 0.00025431200914697005, + "loss": 2.175, + "step": 164500 + }, + { + "epoch": 4.63, + "learning_rate": 0.0002541714307135761, + "loss": 2.1816, + "step": 165000 + }, + { + "epoch": 4.64, + "learning_rate": 0.00025403085228018217, + "loss": 2.1696, + "step": 165500 + }, + { + "epoch": 4.66, + "learning_rate": 0.00025389027384678823, + "loss": 2.2045, + "step": 166000 + }, + { + "epoch": 4.67, + "learning_rate": 0.0002537496954133943, + "loss": 2.1508, + "step": 166500 + }, + { + "epoch": 4.69, + "learning_rate": 0.00025360911698000036, + "loss": 2.1918, + "step": 167000 + }, + { + "epoch": 4.7, + "learning_rate": 0.0002534685385466064, + "loss": 2.1761, + "step": 167500 + }, + { + "epoch": 4.71, + "learning_rate": 0.0002533279601132125, + "loss": 2.155, + "step": 168000 + }, + { + "epoch": 4.73, + "learning_rate": 0.00025318738167981854, + "loss": 2.1483, + "step": 168500 + }, + { + "epoch": 4.74, + "learning_rate": 0.0002530468032464246, + "loss": 2.1618, + "step": 169000 + }, + { + "epoch": 4.76, + "learning_rate": 0.00025290622481303067, + "loss": 2.147, + "step": 169500 + }, + { + "epoch": 4.77, + "learning_rate": 0.00025276564637963673, + "loss": 2.1718, + "step": 170000 + }, + { + "epoch": 4.78, + "learning_rate": 0.0002526250679462428, + "loss": 2.1329, + "step": 170500 + }, + { + "epoch": 4.8, + "learning_rate": 0.00025248448951284885, + "loss": 2.1677, + "step": 171000 + }, + { + "epoch": 4.81, + "learning_rate": 0.0002523439110794549, + "loss": 2.1127, + "step": 171500 + }, + { + "epoch": 4.83, + "learning_rate": 0.000252203332646061, + "loss": 2.1832, + "step": 172000 + }, + { + "epoch": 4.84, + "learning_rate": 0.00025206275421266704, + "loss": 2.1843, + "step": 172500 + }, + { + "epoch": 4.85, + "learning_rate": 0.0002519221757792731, + "loss": 2.2132, + "step": 173000 + }, + { + "epoch": 4.87, + "learning_rate": 0.00025178159734587916, + "loss": 2.1789, + "step": 173500 + }, + { + "epoch": 4.88, + "learning_rate": 0.0002516410189124852, + "loss": 2.1333, + "step": 174000 + }, + { + "epoch": 4.9, + "learning_rate": 0.0002515004404790913, + "loss": 2.1702, + "step": 174500 + }, + { + "epoch": 4.91, + "learning_rate": 0.00025135986204569735, + "loss": 2.1817, + "step": 175000 + }, + { + "epoch": 4.93, + "learning_rate": 0.0002512192836123034, + "loss": 2.1101, + "step": 175500 + }, + { + "epoch": 4.94, + "learning_rate": 0.00025107870517890947, + "loss": 2.1573, + "step": 176000 + }, + { + "epoch": 4.95, + "learning_rate": 0.00025093812674551553, + "loss": 2.1691, + "step": 176500 + }, + { + "epoch": 4.97, + "learning_rate": 0.0002507975483121216, + "loss": 2.1822, + "step": 177000 + }, + { + "epoch": 4.98, + "learning_rate": 0.00025065696987872766, + "loss": 2.1869, + "step": 177500 + }, + { + "epoch": 5.0, + "learning_rate": 0.0002505163914453337, + "loss": 2.1731, + "step": 178000 + }, + { + "epoch": 5.01, + "learning_rate": 0.0002503758130119398, + "loss": 2.1219, + "step": 178500 + }, + { + "epoch": 5.02, + "learning_rate": 0.00025023523457854584, + "loss": 2.0762, + "step": 179000 + }, + { + "epoch": 5.04, + "learning_rate": 0.0002500946561451519, + "loss": 2.1061, + "step": 179500 + }, + { + "epoch": 5.05, + "learning_rate": 0.00024995407771175797, + "loss": 2.0847, + "step": 180000 + }, + { + "epoch": 5.07, + "learning_rate": 0.00024981349927836403, + "loss": 2.1121, + "step": 180500 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002496729208449701, + "loss": 2.099, + "step": 181000 + }, + { + "epoch": 5.09, + "learning_rate": 0.00024953234241157615, + "loss": 2.1179, + "step": 181500 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002493917639781822, + "loss": 2.0985, + "step": 182000 + }, + { + "epoch": 5.12, + "learning_rate": 0.0002492511855447883, + "loss": 2.1106, + "step": 182500 + }, + { + "epoch": 5.14, + "learning_rate": 0.00024911060711139434, + "loss": 2.0744, + "step": 183000 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002489700286780004, + "loss": 2.0858, + "step": 183500 + }, + { + "epoch": 5.16, + "learning_rate": 0.00024882945024460646, + "loss": 2.0899, + "step": 184000 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002486888718112125, + "loss": 2.0637, + "step": 184500 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002485482933778186, + "loss": 2.1075, + "step": 185000 + }, + { + "epoch": 5.21, + "learning_rate": 0.00024840771494442465, + "loss": 2.1119, + "step": 185500 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002482671365110307, + "loss": 2.0968, + "step": 186000 + }, + { + "epoch": 5.23, + "learning_rate": 0.00024812655807763677, + "loss": 2.1092, + "step": 186500 + }, + { + "epoch": 5.25, + "learning_rate": 0.00024798597964424283, + "loss": 2.0675, + "step": 187000 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002478454012108489, + "loss": 2.0575, + "step": 187500 + }, + { + "epoch": 5.28, + "learning_rate": 0.00024770482277745496, + "loss": 2.0931, + "step": 188000 + }, + { + "epoch": 5.29, + "learning_rate": 0.000247564244344061, + "loss": 2.0929, + "step": 188500 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002474236659106671, + "loss": 2.0896, + "step": 189000 + }, + { + "epoch": 5.32, + "learning_rate": 0.00024728308747727314, + "loss": 2.1169, + "step": 189500 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002471425090438792, + "loss": 2.1009, + "step": 190000 + }, + { + "epoch": 5.35, + "learning_rate": 0.00024700193061048527, + "loss": 2.1346, + "step": 190500 + }, + { + "epoch": 5.36, + "learning_rate": 0.00024686135217709133, + "loss": 2.0952, + "step": 191000 + }, + { + "epoch": 5.37, + "learning_rate": 0.00024672077374369734, + "loss": 2.1042, + "step": 191500 + }, + { + "epoch": 5.39, + "learning_rate": 0.00024658019531030345, + "loss": 2.1056, + "step": 192000 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002464396168769095, + "loss": 2.0961, + "step": 192500 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002462990384435156, + "loss": 2.1284, + "step": 193000 + }, + { + "epoch": 5.43, + "learning_rate": 0.00024615846001012164, + "loss": 2.0954, + "step": 193500 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002460178815767277, + "loss": 2.1349, + "step": 194000 + }, + { + "epoch": 5.46, + "learning_rate": 0.00024587730314333376, + "loss": 2.0941, + "step": 194500 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002457367247099398, + "loss": 2.1228, + "step": 195000 + }, + { + "epoch": 5.49, + "learning_rate": 0.00024559614627654583, + "loss": 2.0905, + "step": 195500 + }, + { + "epoch": 5.5, + "learning_rate": 0.00024545556784315195, + "loss": 2.0905, + "step": 196000 + }, + { + "epoch": 5.51, + "learning_rate": 0.000245314989409758, + "loss": 2.1323, + "step": 196500 + }, + { + "epoch": 5.53, + "learning_rate": 0.00024517441097636407, + "loss": 2.1154, + "step": 197000 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024503383254297013, + "loss": 2.1118, + "step": 197500 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002448932541095762, + "loss": 2.1114, + "step": 198000 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024475267567618226, + "loss": 2.1129, + "step": 198500 + }, + { + "epoch": 5.58, + "learning_rate": 0.0002446120972427883, + "loss": 2.1318, + "step": 199000 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024447151880939433, + "loss": 2.081, + "step": 199500 + }, + { + "epoch": 5.61, + "learning_rate": 0.0002443309403760004, + "loss": 2.087, + "step": 200000 + }, + { + "epoch": 5.63, + "learning_rate": 0.0002441903619426065, + "loss": 2.1043, + "step": 200500 + }, + { + "epoch": 5.64, + "learning_rate": 0.00024404978350921254, + "loss": 2.1188, + "step": 201000 + }, + { + "epoch": 5.65, + "learning_rate": 0.0002439092050758186, + "loss": 2.1295, + "step": 201500 + }, + { + "epoch": 5.67, + "learning_rate": 0.0002437686266424247, + "loss": 2.0991, + "step": 202000 + }, + { + "epoch": 5.68, + "learning_rate": 0.00024362804820903075, + "loss": 2.0925, + "step": 202500 + }, + { + "epoch": 5.7, + "learning_rate": 0.00024348746977563682, + "loss": 2.0488, + "step": 203000 + }, + { + "epoch": 5.71, + "learning_rate": 0.00024334689134224285, + "loss": 2.0945, + "step": 203500 + }, + { + "epoch": 5.72, + "learning_rate": 0.0002432063129088489, + "loss": 2.1083, + "step": 204000 + }, + { + "epoch": 5.74, + "learning_rate": 0.00024306573447545497, + "loss": 2.1211, + "step": 204500 + }, + { + "epoch": 5.75, + "learning_rate": 0.00024292515604206104, + "loss": 2.1377, + "step": 205000 + }, + { + "epoch": 5.77, + "learning_rate": 0.0002427845776086671, + "loss": 2.1321, + "step": 205500 + }, + { + "epoch": 5.78, + "learning_rate": 0.00024264399917527316, + "loss": 2.1156, + "step": 206000 + }, + { + "epoch": 5.79, + "learning_rate": 0.00024250342074187925, + "loss": 2.0931, + "step": 206500 + }, + { + "epoch": 5.81, + "learning_rate": 0.0002423628423084853, + "loss": 2.1073, + "step": 207000 + }, + { + "epoch": 5.82, + "learning_rate": 0.00024222226387509137, + "loss": 2.1215, + "step": 207500 + }, + { + "epoch": 5.84, + "learning_rate": 0.0002420816854416974, + "loss": 2.1354, + "step": 208000 + }, + { + "epoch": 5.85, + "learning_rate": 0.00024194110700830347, + "loss": 2.0763, + "step": 208500 + }, + { + "epoch": 5.87, + "learning_rate": 0.00024180052857490953, + "loss": 2.1315, + "step": 209000 + }, + { + "epoch": 5.88, + "learning_rate": 0.0002416599501415156, + "loss": 2.1101, + "step": 209500 + }, + { + "epoch": 5.89, + "learning_rate": 0.00024151937170812166, + "loss": 2.0826, + "step": 210000 + }, + { + "epoch": 5.91, + "learning_rate": 0.00024137879327472774, + "loss": 2.1161, + "step": 210500 + }, + { + "epoch": 5.92, + "learning_rate": 0.0002412382148413338, + "loss": 2.1167, + "step": 211000 + }, + { + "epoch": 5.94, + "learning_rate": 0.00024109763640793987, + "loss": 2.1309, + "step": 211500 + }, + { + "epoch": 5.95, + "learning_rate": 0.0002409570579745459, + "loss": 2.0976, + "step": 212000 + }, + { + "epoch": 5.96, + "learning_rate": 0.00024081647954115197, + "loss": 2.1108, + "step": 212500 + }, + { + "epoch": 5.98, + "learning_rate": 0.00024067590110775803, + "loss": 2.1444, + "step": 213000 + }, + { + "epoch": 5.99, + "learning_rate": 0.0002405353226743641, + "loss": 2.0976, + "step": 213500 + }, + { + "epoch": 6.01, + "learning_rate": 0.00024039474424097015, + "loss": 2.0973, + "step": 214000 + }, + { + "epoch": 6.02, + "learning_rate": 0.0002402541658075762, + "loss": 2.0388, + "step": 214500 + }, + { + "epoch": 6.03, + "learning_rate": 0.0002401135873741823, + "loss": 2.0271, + "step": 215000 + }, + { + "epoch": 6.05, + "learning_rate": 0.00023997300894078836, + "loss": 2.0473, + "step": 215500 + }, + { + "epoch": 6.06, + "learning_rate": 0.0002398324305073944, + "loss": 2.0443, + "step": 216000 + }, + { + "epoch": 6.08, + "learning_rate": 0.00023969185207400046, + "loss": 2.0398, + "step": 216500 + }, + { + "epoch": 6.09, + "learning_rate": 0.00023955127364060652, + "loss": 2.049, + "step": 217000 + }, + { + "epoch": 6.1, + "learning_rate": 0.00023941069520721258, + "loss": 2.0455, + "step": 217500 + }, + { + "epoch": 6.12, + "learning_rate": 0.00023927011677381865, + "loss": 2.0773, + "step": 218000 + }, + { + "epoch": 6.13, + "learning_rate": 0.0002391295383404247, + "loss": 2.025, + "step": 218500 + }, + { + "epoch": 6.15, + "learning_rate": 0.0002389889599070308, + "loss": 2.0532, + "step": 219000 + }, + { + "epoch": 6.16, + "learning_rate": 0.00023884838147363686, + "loss": 2.0117, + "step": 219500 + }, + { + "epoch": 6.17, + "learning_rate": 0.0002387078030402429, + "loss": 2.0746, + "step": 220000 + }, + { + "epoch": 6.19, + "learning_rate": 0.00023856722460684896, + "loss": 2.1025, + "step": 220500 + }, + { + "epoch": 6.2, + "learning_rate": 0.00023842664617345502, + "loss": 2.0361, + "step": 221000 + }, + { + "epoch": 6.22, + "learning_rate": 0.00023828606774006108, + "loss": 2.0572, + "step": 221500 + }, + { + "epoch": 6.23, + "learning_rate": 0.00023814548930666714, + "loss": 2.0498, + "step": 222000 + }, + { + "epoch": 6.24, + "learning_rate": 0.0002380049108732732, + "loss": 2.0354, + "step": 222500 + }, + { + "epoch": 6.26, + "learning_rate": 0.00023786433243987927, + "loss": 2.0455, + "step": 223000 + }, + { + "epoch": 6.27, + "learning_rate": 0.00023772375400648536, + "loss": 2.0328, + "step": 223500 + }, + { + "epoch": 6.29, + "learning_rate": 0.0002375831755730914, + "loss": 2.0704, + "step": 224000 + }, + { + "epoch": 6.3, + "learning_rate": 0.00023744259713969745, + "loss": 2.065, + "step": 224500 + }, + { + "epoch": 6.31, + "learning_rate": 0.00023730201870630351, + "loss": 2.05, + "step": 225000 + }, + { + "epoch": 6.33, + "learning_rate": 0.00023716144027290958, + "loss": 2.0312, + "step": 225500 + }, + { + "epoch": 6.34, + "learning_rate": 0.00023702086183951564, + "loss": 2.0403, + "step": 226000 + }, + { + "epoch": 6.36, + "learning_rate": 0.0002368802834061217, + "loss": 2.0701, + "step": 226500 + }, + { + "epoch": 6.37, + "learning_rate": 0.00023673970497272776, + "loss": 2.0486, + "step": 227000 + }, + { + "epoch": 6.38, + "learning_rate": 0.00023659912653933385, + "loss": 2.049, + "step": 227500 + }, + { + "epoch": 6.4, + "learning_rate": 0.00023645854810593986, + "loss": 2.0493, + "step": 228000 + }, + { + "epoch": 6.41, + "learning_rate": 0.00023631796967254595, + "loss": 2.0892, + "step": 228500 + }, + { + "epoch": 6.43, + "learning_rate": 0.000236177391239152, + "loss": 2.0607, + "step": 229000 + }, + { + "epoch": 6.44, + "learning_rate": 0.00023603681280575807, + "loss": 2.0407, + "step": 229500 + }, + { + "epoch": 6.45, + "learning_rate": 0.00023589623437236413, + "loss": 2.0438, + "step": 230000 + }, + { + "epoch": 6.47, + "learning_rate": 0.0002357556559389702, + "loss": 2.0424, + "step": 230500 + }, + { + "epoch": 6.48, + "learning_rate": 0.00023561507750557626, + "loss": 2.0456, + "step": 231000 + }, + { + "epoch": 6.5, + "learning_rate": 0.00023547449907218232, + "loss": 2.0296, + "step": 231500 + }, + { + "epoch": 6.51, + "learning_rate": 0.00023533392063878835, + "loss": 2.0823, + "step": 232000 + }, + { + "epoch": 6.52, + "learning_rate": 0.00023519334220539444, + "loss": 2.0428, + "step": 232500 + }, + { + "epoch": 6.54, + "learning_rate": 0.0002350527637720005, + "loss": 2.0489, + "step": 233000 + }, + { + "epoch": 6.55, + "learning_rate": 0.00023491218533860657, + "loss": 2.0478, + "step": 233500 + }, + { + "epoch": 6.57, + "learning_rate": 0.00023477160690521263, + "loss": 2.0622, + "step": 234000 + }, + { + "epoch": 6.58, + "learning_rate": 0.0002346310284718187, + "loss": 2.0566, + "step": 234500 + }, + { + "epoch": 6.59, + "learning_rate": 0.00023449045003842475, + "loss": 2.0431, + "step": 235000 + }, + { + "epoch": 6.61, + "learning_rate": 0.00023434987160503081, + "loss": 2.0194, + "step": 235500 + }, + { + "epoch": 6.62, + "learning_rate": 0.00023420929317163685, + "loss": 1.9937, + "step": 236000 + }, + { + "epoch": 6.64, + "learning_rate": 0.0002340687147382429, + "loss": 2.0064, + "step": 236500 + }, + { + "epoch": 6.65, + "learning_rate": 0.000233928136304849, + "loss": 2.0274, + "step": 237000 + }, + { + "epoch": 6.66, + "learning_rate": 0.00023378755787145506, + "loss": 2.0512, + "step": 237500 + }, + { + "epoch": 6.68, + "learning_rate": 0.00023364697943806112, + "loss": 2.0267, + "step": 238000 + }, + { + "epoch": 6.69, + "learning_rate": 0.00023350640100466719, + "loss": 2.0159, + "step": 238500 + }, + { + "epoch": 6.71, + "learning_rate": 0.00023336582257127325, + "loss": 2.0458, + "step": 239000 + }, + { + "epoch": 6.72, + "learning_rate": 0.0002332252441378793, + "loss": 2.037, + "step": 239500 + }, + { + "epoch": 6.74, + "learning_rate": 0.00023308466570448537, + "loss": 2.0356, + "step": 240000 + }, + { + "epoch": 6.75, + "learning_rate": 0.0002329440872710914, + "loss": 2.0204, + "step": 240500 + }, + { + "epoch": 6.76, + "learning_rate": 0.0002328035088376975, + "loss": 2.0731, + "step": 241000 + }, + { + "epoch": 6.78, + "learning_rate": 0.00023266293040430356, + "loss": 2.0473, + "step": 241500 + }, + { + "epoch": 6.79, + "learning_rate": 0.00023252235197090962, + "loss": 2.0236, + "step": 242000 + }, + { + "epoch": 6.81, + "learning_rate": 0.00023238177353751568, + "loss": 2.0754, + "step": 242500 + }, + { + "epoch": 6.82, + "learning_rate": 0.00023224119510412174, + "loss": 2.0216, + "step": 243000 + }, + { + "epoch": 6.83, + "learning_rate": 0.0002321006166707278, + "loss": 2.0726, + "step": 243500 + }, + { + "epoch": 6.85, + "learning_rate": 0.00023196003823733387, + "loss": 2.037, + "step": 244000 + }, + { + "epoch": 6.86, + "learning_rate": 0.0002318194598039399, + "loss": 2.0778, + "step": 244500 + }, + { + "epoch": 6.88, + "learning_rate": 0.00023167888137054596, + "loss": 2.0366, + "step": 245000 + }, + { + "epoch": 6.89, + "learning_rate": 0.00023153830293715205, + "loss": 2.0554, + "step": 245500 + }, + { + "epoch": 6.9, + "learning_rate": 0.00023139772450375811, + "loss": 2.036, + "step": 246000 + }, + { + "epoch": 6.92, + "learning_rate": 0.00023125714607036418, + "loss": 2.0575, + "step": 246500 + }, + { + "epoch": 6.93, + "learning_rate": 0.00023111656763697024, + "loss": 2.0008, + "step": 247000 + }, + { + "epoch": 6.95, + "learning_rate": 0.0002309759892035763, + "loss": 2.0589, + "step": 247500 + }, + { + "epoch": 6.96, + "learning_rate": 0.00023083541077018236, + "loss": 2.0505, + "step": 248000 + }, + { + "epoch": 6.97, + "learning_rate": 0.0002306948323367884, + "loss": 2.0034, + "step": 248500 + }, + { + "epoch": 6.99, + "learning_rate": 0.00023055425390339446, + "loss": 2.0229, + "step": 249000 + }, + { + "epoch": 7.0, + "learning_rate": 0.00023041367547000055, + "loss": 2.0329, + "step": 249500 + }, + { + "epoch": 7.02, + "learning_rate": 0.0002302730970366066, + "loss": 1.9613, + "step": 250000 + }, + { + "epoch": 7.03, + "learning_rate": 0.00023013251860321267, + "loss": 1.9882, + "step": 250500 + }, + { + "epoch": 7.04, + "learning_rate": 0.00022999194016981873, + "loss": 1.9655, + "step": 251000 + }, + { + "epoch": 7.06, + "learning_rate": 0.0002298513617364248, + "loss": 1.9578, + "step": 251500 + }, + { + "epoch": 7.07, + "learning_rate": 0.00022971078330303086, + "loss": 2.033, + "step": 252000 + }, + { + "epoch": 7.09, + "learning_rate": 0.0002295702048696369, + "loss": 1.9843, + "step": 252500 + }, + { + "epoch": 7.1, + "learning_rate": 0.00022942962643624295, + "loss": 1.9682, + "step": 253000 + }, + { + "epoch": 7.11, + "learning_rate": 0.00022928904800284902, + "loss": 1.9537, + "step": 253500 + }, + { + "epoch": 7.13, + "learning_rate": 0.0002291484695694551, + "loss": 2.0058, + "step": 254000 + }, + { + "epoch": 7.14, + "learning_rate": 0.00022900789113606117, + "loss": 1.95, + "step": 254500 + }, + { + "epoch": 7.16, + "learning_rate": 0.00022886731270266723, + "loss": 1.9726, + "step": 255000 + }, + { + "epoch": 7.17, + "learning_rate": 0.0002287267342692733, + "loss": 1.9936, + "step": 255500 + }, + { + "epoch": 7.18, + "learning_rate": 0.00022858615583587935, + "loss": 1.9598, + "step": 256000 + }, + { + "epoch": 7.2, + "learning_rate": 0.0002284455774024854, + "loss": 1.9492, + "step": 256500 + }, + { + "epoch": 7.21, + "learning_rate": 0.00022830499896909145, + "loss": 1.9778, + "step": 257000 + }, + { + "epoch": 7.23, + "learning_rate": 0.0002281644205356975, + "loss": 1.9714, + "step": 257500 + }, + { + "epoch": 7.24, + "learning_rate": 0.0002280238421023036, + "loss": 1.9862, + "step": 258000 + }, + { + "epoch": 7.25, + "learning_rate": 0.00022788326366890966, + "loss": 1.997, + "step": 258500 + }, + { + "epoch": 7.27, + "learning_rate": 0.00022774268523551573, + "loss": 1.9797, + "step": 259000 + }, + { + "epoch": 7.28, + "learning_rate": 0.0002276021068021218, + "loss": 2.0386, + "step": 259500 + }, + { + "epoch": 7.3, + "learning_rate": 0.00022746152836872785, + "loss": 1.9809, + "step": 260000 + }, + { + "epoch": 7.31, + "learning_rate": 0.00022732094993533388, + "loss": 1.9879, + "step": 260500 + }, + { + "epoch": 7.32, + "learning_rate": 0.00022718037150193995, + "loss": 2.0213, + "step": 261000 + }, + { + "epoch": 7.34, + "learning_rate": 0.000227039793068546, + "loss": 1.9486, + "step": 261500 + }, + { + "epoch": 7.35, + "learning_rate": 0.00022689921463515207, + "loss": 1.977, + "step": 262000 + }, + { + "epoch": 7.37, + "learning_rate": 0.00022675863620175816, + "loss": 1.9849, + "step": 262500 + }, + { + "epoch": 7.38, + "learning_rate": 0.00022661805776836422, + "loss": 1.9433, + "step": 263000 + }, + { + "epoch": 7.39, + "learning_rate": 0.00022647747933497028, + "loss": 2.0007, + "step": 263500 + }, + { + "epoch": 7.41, + "learning_rate": 0.00022633690090157634, + "loss": 1.9975, + "step": 264000 + }, + { + "epoch": 7.42, + "learning_rate": 0.00022619632246818238, + "loss": 1.9288, + "step": 264500 + }, + { + "epoch": 7.44, + "learning_rate": 0.00022605574403478844, + "loss": 1.996, + "step": 265000 + }, + { + "epoch": 7.45, + "learning_rate": 0.0002259151656013945, + "loss": 1.9691, + "step": 265500 + }, + { + "epoch": 7.46, + "learning_rate": 0.00022577458716800056, + "loss": 1.9859, + "step": 266000 + }, + { + "epoch": 7.48, + "learning_rate": 0.00022563400873460665, + "loss": 1.9902, + "step": 266500 + }, + { + "epoch": 7.49, + "learning_rate": 0.00022549343030121272, + "loss": 1.9626, + "step": 267000 + }, + { + "epoch": 7.51, + "learning_rate": 0.00022535285186781878, + "loss": 1.9749, + "step": 267500 + }, + { + "epoch": 7.52, + "learning_rate": 0.00022521227343442484, + "loss": 1.9832, + "step": 268000 + }, + { + "epoch": 7.53, + "learning_rate": 0.0002250716950010309, + "loss": 1.9943, + "step": 268500 + }, + { + "epoch": 7.55, + "learning_rate": 0.00022493111656763694, + "loss": 1.9564, + "step": 269000 + }, + { + "epoch": 7.56, + "learning_rate": 0.000224790538134243, + "loss": 1.9827, + "step": 269500 + }, + { + "epoch": 7.58, + "learning_rate": 0.00022464995970084906, + "loss": 1.9721, + "step": 270000 + }, + { + "epoch": 7.59, + "learning_rate": 0.00022450938126745512, + "loss": 1.9993, + "step": 270500 + }, + { + "epoch": 7.6, + "learning_rate": 0.0002243688028340612, + "loss": 1.9978, + "step": 271000 + }, + { + "epoch": 7.62, + "learning_rate": 0.00022422822440066727, + "loss": 1.9862, + "step": 271500 + }, + { + "epoch": 7.63, + "learning_rate": 0.00022408764596727334, + "loss": 1.9931, + "step": 272000 + }, + { + "epoch": 7.65, + "learning_rate": 0.0002239470675338794, + "loss": 1.9609, + "step": 272500 + }, + { + "epoch": 7.66, + "learning_rate": 0.00022380648910048543, + "loss": 1.9802, + "step": 273000 + }, + { + "epoch": 7.68, + "learning_rate": 0.0002236659106670915, + "loss": 1.9919, + "step": 273500 + }, + { + "epoch": 7.69, + "learning_rate": 0.00022352533223369756, + "loss": 1.9749, + "step": 274000 + }, + { + "epoch": 7.7, + "learning_rate": 0.00022338475380030362, + "loss": 2.0121, + "step": 274500 + }, + { + "epoch": 7.72, + "learning_rate": 0.0002232441753669097, + "loss": 1.9941, + "step": 275000 + }, + { + "epoch": 7.73, + "learning_rate": 0.00022310359693351577, + "loss": 1.9829, + "step": 275500 + }, + { + "epoch": 7.75, + "learning_rate": 0.00022296301850012183, + "loss": 2.0042, + "step": 276000 + }, + { + "epoch": 7.76, + "learning_rate": 0.0002228224400667279, + "loss": 1.9994, + "step": 276500 + }, + { + "epoch": 7.77, + "learning_rate": 0.00022268186163333393, + "loss": 2.0085, + "step": 277000 + }, + { + "epoch": 7.79, + "learning_rate": 0.00022254128319994, + "loss": 1.9972, + "step": 277500 + }, + { + "epoch": 7.8, + "learning_rate": 0.00022240070476654605, + "loss": 2.0229, + "step": 278000 + }, + { + "epoch": 7.82, + "learning_rate": 0.0002222601263331521, + "loss": 1.9405, + "step": 278500 + }, + { + "epoch": 7.83, + "learning_rate": 0.00022211954789975818, + "loss": 1.9859, + "step": 279000 + }, + { + "epoch": 7.84, + "learning_rate": 0.00022197896946636426, + "loss": 1.9755, + "step": 279500 + }, + { + "epoch": 7.86, + "learning_rate": 0.00022183839103297033, + "loss": 1.9853, + "step": 280000 + }, + { + "epoch": 7.87, + "learning_rate": 0.0002216978125995764, + "loss": 2.0005, + "step": 280500 + }, + { + "epoch": 7.89, + "learning_rate": 0.00022155723416618242, + "loss": 1.9459, + "step": 281000 + }, + { + "epoch": 7.9, + "learning_rate": 0.00022141665573278848, + "loss": 1.9943, + "step": 281500 + }, + { + "epoch": 7.91, + "learning_rate": 0.00022127607729939455, + "loss": 1.9501, + "step": 282000 + }, + { + "epoch": 7.93, + "learning_rate": 0.0002211354988660006, + "loss": 1.9885, + "step": 282500 + }, + { + "epoch": 7.94, + "learning_rate": 0.00022099492043260667, + "loss": 2.0035, + "step": 283000 + }, + { + "epoch": 7.96, + "learning_rate": 0.00022085434199921276, + "loss": 2.002, + "step": 283500 + }, + { + "epoch": 7.97, + "learning_rate": 0.00022071376356581882, + "loss": 1.992, + "step": 284000 + }, + { + "epoch": 7.98, + "learning_rate": 0.00022057318513242488, + "loss": 2.0223, + "step": 284500 + }, + { + "epoch": 8.0, + "learning_rate": 0.00022043260669903092, + "loss": 2.0528, + "step": 285000 + }, + { + "epoch": 8.01, + "learning_rate": 0.00022029202826563698, + "loss": 1.9177, + "step": 285500 + }, + { + "epoch": 8.03, + "learning_rate": 0.00022015144983224304, + "loss": 1.9261, + "step": 286000 + }, + { + "epoch": 8.04, + "learning_rate": 0.0002200108713988491, + "loss": 1.9438, + "step": 286500 + }, + { + "epoch": 8.05, + "learning_rate": 0.00021987029296545517, + "loss": 1.9014, + "step": 287000 + }, + { + "epoch": 8.07, + "learning_rate": 0.00021972971453206123, + "loss": 1.9587, + "step": 287500 + }, + { + "epoch": 8.08, + "learning_rate": 0.00021958913609866732, + "loss": 1.9076, + "step": 288000 + }, + { + "epoch": 8.1, + "learning_rate": 0.00021944855766527338, + "loss": 1.9177, + "step": 288500 + }, + { + "epoch": 8.11, + "learning_rate": 0.00021930797923187941, + "loss": 1.9116, + "step": 289000 + }, + { + "epoch": 8.12, + "learning_rate": 0.00021916740079848548, + "loss": 1.9303, + "step": 289500 + }, + { + "epoch": 8.14, + "learning_rate": 0.00021902682236509154, + "loss": 1.9255, + "step": 290000 + }, + { + "epoch": 8.15, + "learning_rate": 0.0002188862439316976, + "loss": 1.9152, + "step": 290500 + }, + { + "epoch": 8.17, + "learning_rate": 0.00021874566549830366, + "loss": 1.9149, + "step": 291000 + }, + { + "epoch": 8.18, + "learning_rate": 0.00021860508706490972, + "loss": 1.9298, + "step": 291500 + }, + { + "epoch": 8.19, + "learning_rate": 0.0002184645086315158, + "loss": 1.9279, + "step": 292000 + }, + { + "epoch": 8.21, + "learning_rate": 0.00021832393019812187, + "loss": 1.9413, + "step": 292500 + }, + { + "epoch": 8.22, + "learning_rate": 0.0002181833517647279, + "loss": 1.9344, + "step": 293000 + }, + { + "epoch": 8.24, + "learning_rate": 0.00021804277333133397, + "loss": 1.9529, + "step": 293500 + }, + { + "epoch": 8.25, + "learning_rate": 0.00021790219489794003, + "loss": 1.9493, + "step": 294000 + }, + { + "epoch": 8.26, + "learning_rate": 0.0002177616164645461, + "loss": 1.9435, + "step": 294500 + }, + { + "epoch": 8.28, + "learning_rate": 0.00021762103803115216, + "loss": 1.9373, + "step": 295000 + }, + { + "epoch": 8.29, + "learning_rate": 0.00021748045959775822, + "loss": 1.9651, + "step": 295500 + }, + { + "epoch": 8.31, + "learning_rate": 0.00021733988116436428, + "loss": 1.9283, + "step": 296000 + }, + { + "epoch": 8.32, + "learning_rate": 0.00021719930273097037, + "loss": 1.9202, + "step": 296500 + }, + { + "epoch": 8.33, + "learning_rate": 0.00021705872429757643, + "loss": 1.9429, + "step": 297000 + }, + { + "epoch": 8.35, + "learning_rate": 0.00021691814586418247, + "loss": 1.9262, + "step": 297500 + }, + { + "epoch": 8.36, + "learning_rate": 0.00021677756743078853, + "loss": 1.9686, + "step": 298000 + }, + { + "epoch": 8.38, + "learning_rate": 0.0002166369889973946, + "loss": 1.9541, + "step": 298500 + }, + { + "epoch": 8.39, + "learning_rate": 0.00021649641056400065, + "loss": 1.9397, + "step": 299000 + }, + { + "epoch": 8.4, + "learning_rate": 0.00021635583213060671, + "loss": 1.9521, + "step": 299500 + }, + { + "epoch": 8.42, + "learning_rate": 0.00021621525369721278, + "loss": 1.9354, + "step": 300000 + }, + { + "epoch": 8.43, + "learning_rate": 0.00021607467526381887, + "loss": 1.933, + "step": 300500 + }, + { + "epoch": 8.45, + "learning_rate": 0.00021593409683042493, + "loss": 1.9874, + "step": 301000 + }, + { + "epoch": 8.46, + "learning_rate": 0.00021579351839703096, + "loss": 1.9077, + "step": 301500 + }, + { + "epoch": 8.47, + "learning_rate": 0.00021565293996363702, + "loss": 1.9458, + "step": 302000 + }, + { + "epoch": 8.49, + "learning_rate": 0.00021551236153024309, + "loss": 1.9521, + "step": 302500 + }, + { + "epoch": 8.5, + "learning_rate": 0.00021537178309684915, + "loss": 1.9498, + "step": 303000 + }, + { + "epoch": 8.52, + "learning_rate": 0.0002152312046634552, + "loss": 1.918, + "step": 303500 + }, + { + "epoch": 8.53, + "learning_rate": 0.00021509062623006127, + "loss": 1.9309, + "step": 304000 + }, + { + "epoch": 8.55, + "learning_rate": 0.00021495004779666733, + "loss": 1.9431, + "step": 304500 + }, + { + "epoch": 8.56, + "learning_rate": 0.00021480946936327342, + "loss": 1.9449, + "step": 305000 + }, + { + "epoch": 8.57, + "learning_rate": 0.00021466889092987946, + "loss": 1.9312, + "step": 305500 + }, + { + "epoch": 8.59, + "learning_rate": 0.00021452831249648552, + "loss": 1.9315, + "step": 306000 + }, + { + "epoch": 8.6, + "learning_rate": 0.00021438773406309158, + "loss": 1.9769, + "step": 306500 + }, + { + "epoch": 8.62, + "learning_rate": 0.00021424715562969764, + "loss": 1.9534, + "step": 307000 + }, + { + "epoch": 8.63, + "learning_rate": 0.0002141065771963037, + "loss": 1.9379, + "step": 307500 + }, + { + "epoch": 8.64, + "learning_rate": 0.00021396599876290977, + "loss": 1.9297, + "step": 308000 + }, + { + "epoch": 8.66, + "learning_rate": 0.00021382542032951583, + "loss": 1.9221, + "step": 308500 + }, + { + "epoch": 8.67, + "learning_rate": 0.00021368484189612192, + "loss": 1.9232, + "step": 309000 + }, + { + "epoch": 8.69, + "learning_rate": 0.00021354426346272793, + "loss": 1.9292, + "step": 309500 + }, + { + "epoch": 8.7, + "learning_rate": 0.00021340368502933401, + "loss": 1.9457, + "step": 310000 + }, + { + "epoch": 8.71, + "learning_rate": 0.00021326310659594008, + "loss": 1.9498, + "step": 310500 + }, + { + "epoch": 8.73, + "learning_rate": 0.00021312252816254614, + "loss": 1.9103, + "step": 311000 + }, + { + "epoch": 8.74, + "learning_rate": 0.0002129819497291522, + "loss": 1.9209, + "step": 311500 + }, + { + "epoch": 8.76, + "learning_rate": 0.00021284137129575826, + "loss": 1.9357, + "step": 312000 + }, + { + "epoch": 8.77, + "learning_rate": 0.00021270079286236432, + "loss": 1.9045, + "step": 312500 + }, + { + "epoch": 8.78, + "learning_rate": 0.0002125602144289704, + "loss": 1.9402, + "step": 313000 + }, + { + "epoch": 8.8, + "learning_rate": 0.00021241963599557642, + "loss": 1.8733, + "step": 313500 + }, + { + "epoch": 8.81, + "learning_rate": 0.0002122790575621825, + "loss": 1.9188, + "step": 314000 + }, + { + "epoch": 8.83, + "learning_rate": 0.00021213847912878857, + "loss": 1.94, + "step": 314500 + }, + { + "epoch": 8.84, + "learning_rate": 0.00021199790069539463, + "loss": 1.969, + "step": 315000 + }, + { + "epoch": 8.85, + "learning_rate": 0.0002118573222620007, + "loss": 1.9897, + "step": 315500 + }, + { + "epoch": 8.87, + "learning_rate": 0.00021171674382860676, + "loss": 1.9422, + "step": 316000 + }, + { + "epoch": 8.88, + "learning_rate": 0.00021157616539521282, + "loss": 1.9329, + "step": 316500 + }, + { + "epoch": 8.9, + "learning_rate": 0.00021143558696181888, + "loss": 1.9291, + "step": 317000 + }, + { + "epoch": 8.91, + "learning_rate": 0.00021129500852842492, + "loss": 1.9177, + "step": 317500 + }, + { + "epoch": 8.92, + "learning_rate": 0.00021115443009503098, + "loss": 1.9471, + "step": 318000 + }, + { + "epoch": 8.94, + "learning_rate": 0.00021101385166163707, + "loss": 1.8992, + "step": 318500 + }, + { + "epoch": 8.95, + "learning_rate": 0.00021087327322824313, + "loss": 1.9003, + "step": 319000 + }, + { + "epoch": 8.97, + "learning_rate": 0.0002107326947948492, + "loss": 1.9663, + "step": 319500 + }, + { + "epoch": 8.98, + "learning_rate": 0.00021059211636145525, + "loss": 1.963, + "step": 320000 + }, + { + "epoch": 8.99, + "learning_rate": 0.00021045153792806132, + "loss": 1.9454, + "step": 320500 + }, + { + "epoch": 9.01, + "learning_rate": 0.00021031095949466738, + "loss": 1.8856, + "step": 321000 + }, + { + "epoch": 9.02, + "learning_rate": 0.0002101703810612734, + "loss": 1.8773, + "step": 321500 + }, + { + "epoch": 9.04, + "learning_rate": 0.00021002980262787947, + "loss": 1.8712, + "step": 322000 + }, + { + "epoch": 9.05, + "learning_rate": 0.00020988922419448556, + "loss": 1.8868, + "step": 322500 + }, + { + "epoch": 9.06, + "learning_rate": 0.00020974864576109163, + "loss": 1.8753, + "step": 323000 + }, + { + "epoch": 9.08, + "learning_rate": 0.0002096080673276977, + "loss": 1.9066, + "step": 323500 + }, + { + "epoch": 9.09, + "learning_rate": 0.00020946748889430375, + "loss": 1.8513, + "step": 324000 + }, + { + "epoch": 9.11, + "learning_rate": 0.0002093269104609098, + "loss": 1.8249, + "step": 324500 + }, + { + "epoch": 9.12, + "learning_rate": 0.00020918633202751587, + "loss": 1.8887, + "step": 325000 + }, + { + "epoch": 9.13, + "learning_rate": 0.00020904575359412193, + "loss": 1.8882, + "step": 325500 + }, + { + "epoch": 9.15, + "learning_rate": 0.00020890517516072797, + "loss": 1.8697, + "step": 326000 + }, + { + "epoch": 9.16, + "learning_rate": 0.00020876459672733403, + "loss": 1.8819, + "step": 326500 + }, + { + "epoch": 9.18, + "learning_rate": 0.00020862401829394012, + "loss": 1.8538, + "step": 327000 + }, + { + "epoch": 9.19, + "learning_rate": 0.00020848343986054618, + "loss": 1.882, + "step": 327500 + }, + { + "epoch": 9.2, + "learning_rate": 0.00020834286142715224, + "loss": 1.8733, + "step": 328000 + }, + { + "epoch": 9.22, + "learning_rate": 0.0002082022829937583, + "loss": 1.8481, + "step": 328500 + }, + { + "epoch": 9.23, + "learning_rate": 0.00020806170456036437, + "loss": 1.886, + "step": 329000 + }, + { + "epoch": 9.25, + "learning_rate": 0.00020792112612697043, + "loss": 1.9102, + "step": 329500 + }, + { + "epoch": 9.26, + "learning_rate": 0.00020778054769357647, + "loss": 1.8617, + "step": 330000 + }, + { + "epoch": 9.27, + "learning_rate": 0.00020763996926018253, + "loss": 1.8809, + "step": 330500 + }, + { + "epoch": 9.29, + "learning_rate": 0.00020749939082678862, + "loss": 1.8844, + "step": 331000 + }, + { + "epoch": 9.3, + "learning_rate": 0.00020735881239339468, + "loss": 1.8758, + "step": 331500 + }, + { + "epoch": 9.32, + "learning_rate": 0.00020721823396000074, + "loss": 1.9132, + "step": 332000 + }, + { + "epoch": 9.33, + "learning_rate": 0.0002070776555266068, + "loss": 1.8819, + "step": 332500 + }, + { + "epoch": 9.34, + "learning_rate": 0.00020693707709321286, + "loss": 1.8851, + "step": 333000 + }, + { + "epoch": 9.36, + "learning_rate": 0.00020679649865981893, + "loss": 1.8757, + "step": 333500 + }, + { + "epoch": 9.37, + "learning_rate": 0.00020665592022642496, + "loss": 1.8982, + "step": 334000 + }, + { + "epoch": 9.39, + "learning_rate": 0.00020651534179303102, + "loss": 1.8726, + "step": 334500 + }, + { + "epoch": 9.4, + "learning_rate": 0.00020637476335963708, + "loss": 1.871, + "step": 335000 + }, + { + "epoch": 9.42, + "learning_rate": 0.00020623418492624317, + "loss": 1.8679, + "step": 335500 + }, + { + "epoch": 9.43, + "learning_rate": 0.00020609360649284924, + "loss": 1.846, + "step": 336000 + }, + { + "epoch": 9.44, + "learning_rate": 0.0002059530280594553, + "loss": 1.8835, + "step": 336500 + }, + { + "epoch": 9.46, + "learning_rate": 0.00020581244962606136, + "loss": 1.8742, + "step": 337000 + }, + { + "epoch": 9.47, + "learning_rate": 0.00020567187119266742, + "loss": 1.9074, + "step": 337500 + }, + { + "epoch": 9.49, + "learning_rate": 0.00020553129275927346, + "loss": 1.8882, + "step": 338000 + }, + { + "epoch": 9.5, + "learning_rate": 0.00020539071432587952, + "loss": 1.8665, + "step": 338500 + }, + { + "epoch": 9.51, + "learning_rate": 0.00020525013589248558, + "loss": 1.8989, + "step": 339000 + }, + { + "epoch": 9.53, + "learning_rate": 0.00020510955745909167, + "loss": 1.892, + "step": 339500 + }, + { + "epoch": 9.54, + "learning_rate": 0.00020496897902569773, + "loss": 1.8602, + "step": 340000 + }, + { + "epoch": 9.56, + "learning_rate": 0.0002048284005923038, + "loss": 1.8711, + "step": 340500 + }, + { + "epoch": 9.57, + "learning_rate": 0.00020468782215890985, + "loss": 1.8755, + "step": 341000 + }, + { + "epoch": 9.58, + "learning_rate": 0.00020454724372551592, + "loss": 1.9111, + "step": 341500 + }, + { + "epoch": 9.6, + "learning_rate": 0.00020440666529212195, + "loss": 1.8911, + "step": 342000 + }, + { + "epoch": 9.61, + "learning_rate": 0.000204266086858728, + "loss": 1.8861, + "step": 342500 + }, + { + "epoch": 9.63, + "learning_rate": 0.00020412550842533408, + "loss": 1.9273, + "step": 343000 + }, + { + "epoch": 9.64, + "learning_rate": 0.00020398492999194014, + "loss": 1.8982, + "step": 343500 + }, + { + "epoch": 9.65, + "learning_rate": 0.00020384435155854623, + "loss": 1.9151, + "step": 344000 + }, + { + "epoch": 9.67, + "learning_rate": 0.0002037037731251523, + "loss": 1.9015, + "step": 344500 + }, + { + "epoch": 9.68, + "learning_rate": 0.00020356319469175835, + "loss": 1.888, + "step": 345000 + }, + { + "epoch": 9.7, + "learning_rate": 0.0002034226162583644, + "loss": 1.8713, + "step": 345500 + }, + { + "epoch": 9.71, + "learning_rate": 0.00020328203782497045, + "loss": 1.8942, + "step": 346000 + }, + { + "epoch": 9.72, + "learning_rate": 0.0002031414593915765, + "loss": 1.8574, + "step": 346500 + }, + { + "epoch": 9.74, + "learning_rate": 0.00020300088095818257, + "loss": 1.9104, + "step": 347000 + }, + { + "epoch": 9.75, + "learning_rate": 0.00020286030252478863, + "loss": 1.8821, + "step": 347500 + }, + { + "epoch": 9.77, + "learning_rate": 0.00020271972409139472, + "loss": 1.8594, + "step": 348000 + }, + { + "epoch": 9.78, + "learning_rate": 0.00020257914565800078, + "loss": 1.854, + "step": 348500 + }, + { + "epoch": 9.79, + "learning_rate": 0.00020243856722460685, + "loss": 1.901, + "step": 349000 + }, + { + "epoch": 9.81, + "learning_rate": 0.0002022979887912129, + "loss": 1.9013, + "step": 349500 + }, + { + "epoch": 9.82, + "learning_rate": 0.00020215741035781894, + "loss": 1.8955, + "step": 350000 + }, + { + "epoch": 9.84, + "learning_rate": 0.000202016831924425, + "loss": 1.8922, + "step": 350500 + }, + { + "epoch": 9.85, + "learning_rate": 0.00020187625349103107, + "loss": 1.8906, + "step": 351000 + }, + { + "epoch": 9.86, + "learning_rate": 0.00020173567505763713, + "loss": 1.8719, + "step": 351500 + }, + { + "epoch": 9.88, + "learning_rate": 0.0002015950966242432, + "loss": 1.9044, + "step": 352000 + }, + { + "epoch": 9.89, + "learning_rate": 0.00020145451819084928, + "loss": 1.8842, + "step": 352500 + }, + { + "epoch": 9.91, + "learning_rate": 0.00020131393975745534, + "loss": 1.8419, + "step": 353000 + }, + { + "epoch": 9.92, + "learning_rate": 0.0002011733613240614, + "loss": 1.9004, + "step": 353500 + }, + { + "epoch": 9.93, + "learning_rate": 0.00020103278289066746, + "loss": 1.8882, + "step": 354000 + }, + { + "epoch": 9.95, + "learning_rate": 0.0002008922044572735, + "loss": 1.9269, + "step": 354500 + }, + { + "epoch": 9.96, + "learning_rate": 0.00020075162602387956, + "loss": 1.8875, + "step": 355000 + }, + { + "epoch": 9.98, + "learning_rate": 0.00020061104759048562, + "loss": 1.8902, + "step": 355500 + }, + { + "epoch": 9.99, + "learning_rate": 0.00020047046915709169, + "loss": 1.8823, + "step": 356000 + }, + { + "epoch": 10.0, + "learning_rate": 0.00020032989072369777, + "loss": 1.8574, + "step": 356500 + }, + { + "epoch": 10.02, + "learning_rate": 0.00020018931229030384, + "loss": 1.7962, + "step": 357000 + }, + { + "epoch": 10.03, + "learning_rate": 0.0002000487338569099, + "loss": 1.8085, + "step": 357500 + }, + { + "epoch": 10.05, + "learning_rate": 0.00019990815542351596, + "loss": 1.8401, + "step": 358000 + }, + { + "epoch": 10.06, + "learning_rate": 0.000199767576990122, + "loss": 1.8218, + "step": 358500 + }, + { + "epoch": 10.07, + "learning_rate": 0.00019962699855672806, + "loss": 1.8065, + "step": 359000 + }, + { + "epoch": 10.09, + "learning_rate": 0.00019948642012333412, + "loss": 1.8245, + "step": 359500 + }, + { + "epoch": 10.1, + "learning_rate": 0.00019934584168994018, + "loss": 1.8177, + "step": 360000 + }, + { + "epoch": 10.12, + "learning_rate": 0.00019920526325654624, + "loss": 1.8626, + "step": 360500 + }, + { + "epoch": 10.13, + "learning_rate": 0.00019906468482315233, + "loss": 1.8341, + "step": 361000 + }, + { + "epoch": 10.14, + "learning_rate": 0.0001989241063897584, + "loss": 1.8424, + "step": 361500 + }, + { + "epoch": 10.16, + "learning_rate": 0.00019878352795636446, + "loss": 1.8221, + "step": 362000 + }, + { + "epoch": 10.17, + "learning_rate": 0.0001986429495229705, + "loss": 1.8554, + "step": 362500 + }, + { + "epoch": 10.19, + "learning_rate": 0.00019850237108957655, + "loss": 1.8426, + "step": 363000 + }, + { + "epoch": 10.2, + "learning_rate": 0.00019836179265618261, + "loss": 1.8355, + "step": 363500 + }, + { + "epoch": 10.21, + "learning_rate": 0.00019822121422278868, + "loss": 1.8147, + "step": 364000 + }, + { + "epoch": 10.23, + "learning_rate": 0.00019808063578939474, + "loss": 1.8212, + "step": 364500 + }, + { + "epoch": 10.24, + "learning_rate": 0.00019794005735600083, + "loss": 1.8188, + "step": 365000 + }, + { + "epoch": 10.26, + "learning_rate": 0.0001977994789226069, + "loss": 1.8313, + "step": 365500 + }, + { + "epoch": 10.27, + "learning_rate": 0.00019765890048921295, + "loss": 1.8165, + "step": 366000 + }, + { + "epoch": 10.28, + "learning_rate": 0.00019751832205581899, + "loss": 1.8183, + "step": 366500 + }, + { + "epoch": 10.3, + "learning_rate": 0.00019737774362242505, + "loss": 1.8196, + "step": 367000 + }, + { + "epoch": 10.31, + "learning_rate": 0.0001972371651890311, + "loss": 1.8276, + "step": 367500 + }, + { + "epoch": 10.33, + "learning_rate": 0.00019709658675563717, + "loss": 1.8811, + "step": 368000 + }, + { + "epoch": 10.34, + "learning_rate": 0.00019695600832224323, + "loss": 1.8258, + "step": 368500 + }, + { + "epoch": 10.36, + "learning_rate": 0.0001968154298888493, + "loss": 1.8061, + "step": 369000 + }, + { + "epoch": 10.37, + "learning_rate": 0.00019667485145545538, + "loss": 1.8099, + "step": 369500 + }, + { + "epoch": 10.38, + "learning_rate": 0.00019653427302206145, + "loss": 1.8491, + "step": 370000 + }, + { + "epoch": 10.4, + "learning_rate": 0.00019639369458866748, + "loss": 1.8424, + "step": 370500 + }, + { + "epoch": 10.41, + "learning_rate": 0.00019625311615527354, + "loss": 1.8419, + "step": 371000 + }, + { + "epoch": 10.43, + "learning_rate": 0.0001961125377218796, + "loss": 1.828, + "step": 371500 + }, + { + "epoch": 10.44, + "learning_rate": 0.00019597195928848567, + "loss": 1.8339, + "step": 372000 + }, + { + "epoch": 10.45, + "learning_rate": 0.00019583138085509173, + "loss": 1.8416, + "step": 372500 + }, + { + "epoch": 10.47, + "learning_rate": 0.0001956908024216978, + "loss": 1.8231, + "step": 373000 + }, + { + "epoch": 10.48, + "learning_rate": 0.00019555022398830388, + "loss": 1.8288, + "step": 373500 + }, + { + "epoch": 10.5, + "learning_rate": 0.00019540964555490994, + "loss": 1.8484, + "step": 374000 + }, + { + "epoch": 10.51, + "learning_rate": 0.00019526906712151598, + "loss": 1.8453, + "step": 374500 + }, + { + "epoch": 10.52, + "learning_rate": 0.00019512848868812204, + "loss": 1.8539, + "step": 375000 + }, + { + "epoch": 10.54, + "learning_rate": 0.0001949879102547281, + "loss": 1.869, + "step": 375500 + }, + { + "epoch": 10.55, + "learning_rate": 0.00019484733182133416, + "loss": 1.8507, + "step": 376000 + }, + { + "epoch": 10.57, + "learning_rate": 0.00019470675338794022, + "loss": 1.8167, + "step": 376500 + }, + { + "epoch": 10.58, + "learning_rate": 0.0001945661749545463, + "loss": 1.823, + "step": 377000 + }, + { + "epoch": 10.59, + "learning_rate": 0.00019442559652115235, + "loss": 1.8547, + "step": 377500 + }, + { + "epoch": 10.61, + "learning_rate": 0.00019428501808775844, + "loss": 1.8602, + "step": 378000 + }, + { + "epoch": 10.62, + "learning_rate": 0.00019414443965436445, + "loss": 1.8406, + "step": 378500 + }, + { + "epoch": 10.64, + "learning_rate": 0.00019400386122097053, + "loss": 1.8683, + "step": 379000 + }, + { + "epoch": 10.65, + "learning_rate": 0.0001938632827875766, + "loss": 1.8577, + "step": 379500 + }, + { + "epoch": 10.66, + "learning_rate": 0.00019372270435418266, + "loss": 1.8518, + "step": 380000 + }, + { + "epoch": 10.68, + "learning_rate": 0.00019358212592078872, + "loss": 1.8134, + "step": 380500 + }, + { + "epoch": 10.69, + "learning_rate": 0.00019344154748739478, + "loss": 1.8355, + "step": 381000 + }, + { + "epoch": 10.71, + "learning_rate": 0.00019330096905400084, + "loss": 1.8328, + "step": 381500 + }, + { + "epoch": 10.72, + "learning_rate": 0.00019316039062060693, + "loss": 1.8329, + "step": 382000 + }, + { + "epoch": 10.73, + "learning_rate": 0.00019301981218721294, + "loss": 1.8755, + "step": 382500 + }, + { + "epoch": 10.75, + "learning_rate": 0.00019287923375381903, + "loss": 1.84, + "step": 383000 + }, + { + "epoch": 10.76, + "learning_rate": 0.0001927386553204251, + "loss": 1.8026, + "step": 383500 + }, + { + "epoch": 10.78, + "learning_rate": 0.00019259807688703115, + "loss": 1.8435, + "step": 384000 + }, + { + "epoch": 10.79, + "learning_rate": 0.00019245749845363722, + "loss": 1.8323, + "step": 384500 + }, + { + "epoch": 10.8, + "learning_rate": 0.00019231692002024328, + "loss": 1.8357, + "step": 385000 + }, + { + "epoch": 10.82, + "learning_rate": 0.00019217634158684934, + "loss": 1.8223, + "step": 385500 + }, + { + "epoch": 10.83, + "learning_rate": 0.0001920357631534554, + "loss": 1.8558, + "step": 386000 + }, + { + "epoch": 10.85, + "learning_rate": 0.0001918951847200615, + "loss": 1.8361, + "step": 386500 + }, + { + "epoch": 10.86, + "learning_rate": 0.0001917546062866675, + "loss": 1.8391, + "step": 387000 + }, + { + "epoch": 10.87, + "learning_rate": 0.0001916140278532736, + "loss": 1.9066, + "step": 387500 + }, + { + "epoch": 10.89, + "learning_rate": 0.00019147344941987965, + "loss": 1.8619, + "step": 388000 + }, + { + "epoch": 10.9, + "learning_rate": 0.0001913328709864857, + "loss": 1.8718, + "step": 388500 + }, + { + "epoch": 10.92, + "learning_rate": 0.00019119229255309177, + "loss": 1.8475, + "step": 389000 + }, + { + "epoch": 10.93, + "learning_rate": 0.00019105171411969783, + "loss": 1.8525, + "step": 389500 + }, + { + "epoch": 10.94, + "learning_rate": 0.0001909111356863039, + "loss": 1.8086, + "step": 390000 + }, + { + "epoch": 10.96, + "learning_rate": 0.00019077055725290999, + "loss": 1.8516, + "step": 390500 + }, + { + "epoch": 10.97, + "learning_rate": 0.000190629978819516, + "loss": 1.8374, + "step": 391000 + }, + { + "epoch": 10.99, + "learning_rate": 0.00019048940038612208, + "loss": 1.8353, + "step": 391500 + }, + { + "epoch": 11.0, + "learning_rate": 0.00019034882195272814, + "loss": 1.8845, + "step": 392000 + }, + { + "epoch": 11.01, + "learning_rate": 0.0001902082435193342, + "loss": 1.8034, + "step": 392500 + }, + { + "epoch": 11.03, + "learning_rate": 0.00019006766508594027, + "loss": 1.7912, + "step": 393000 + }, + { + "epoch": 11.04, + "learning_rate": 0.00018992708665254633, + "loss": 1.7747, + "step": 393500 + }, + { + "epoch": 11.06, + "learning_rate": 0.0001897865082191524, + "loss": 1.7816, + "step": 394000 + }, + { + "epoch": 11.07, + "learning_rate": 0.00018964592978575845, + "loss": 1.7557, + "step": 394500 + }, + { + "epoch": 11.08, + "learning_rate": 0.0001895053513523645, + "loss": 1.7835, + "step": 395000 + }, + { + "epoch": 11.1, + "learning_rate": 0.00018936477291897055, + "loss": 1.7824, + "step": 395500 + }, + { + "epoch": 11.11, + "learning_rate": 0.00018922419448557664, + "loss": 1.7959, + "step": 396000 + }, + { + "epoch": 11.13, + "learning_rate": 0.0001890836160521827, + "loss": 1.7989, + "step": 396500 + }, + { + "epoch": 11.14, + "learning_rate": 0.00018894303761878876, + "loss": 1.761, + "step": 397000 + }, + { + "epoch": 11.15, + "learning_rate": 0.00018880245918539483, + "loss": 1.7773, + "step": 397500 + }, + { + "epoch": 11.17, + "learning_rate": 0.0001886618807520009, + "loss": 1.7956, + "step": 398000 + }, + { + "epoch": 11.18, + "learning_rate": 0.00018852130231860695, + "loss": 1.7832, + "step": 398500 + }, + { + "epoch": 11.2, + "learning_rate": 0.00018838072388521298, + "loss": 1.82, + "step": 399000 + }, + { + "epoch": 11.21, + "learning_rate": 0.00018824014545181905, + "loss": 1.7774, + "step": 399500 + }, + { + "epoch": 11.23, + "learning_rate": 0.00018809956701842514, + "loss": 1.8108, + "step": 400000 + }, + { + "epoch": 11.24, + "learning_rate": 0.0001879589885850312, + "loss": 1.8072, + "step": 400500 + }, + { + "epoch": 11.25, + "learning_rate": 0.00018781841015163726, + "loss": 1.7976, + "step": 401000 + }, + { + "epoch": 11.27, + "learning_rate": 0.00018767783171824332, + "loss": 1.7671, + "step": 401500 + }, + { + "epoch": 11.28, + "learning_rate": 0.00018753725328484938, + "loss": 1.8202, + "step": 402000 + }, + { + "epoch": 11.3, + "learning_rate": 0.00018739667485145545, + "loss": 1.7835, + "step": 402500 + }, + { + "epoch": 11.31, + "learning_rate": 0.00018725609641806148, + "loss": 1.79, + "step": 403000 + }, + { + "epoch": 11.32, + "learning_rate": 0.00018711551798466754, + "loss": 1.8187, + "step": 403500 + }, + { + "epoch": 11.34, + "learning_rate": 0.0001869749395512736, + "loss": 1.8261, + "step": 404000 + }, + { + "epoch": 11.35, + "learning_rate": 0.0001868343611178797, + "loss": 1.7671, + "step": 404500 + }, + { + "epoch": 11.37, + "learning_rate": 0.00018669378268448575, + "loss": 1.797, + "step": 405000 + }, + { + "epoch": 11.38, + "learning_rate": 0.00018655320425109182, + "loss": 1.7898, + "step": 405500 + }, + { + "epoch": 11.39, + "learning_rate": 0.00018641262581769788, + "loss": 1.7916, + "step": 406000 + }, + { + "epoch": 11.41, + "learning_rate": 0.00018627204738430394, + "loss": 1.852, + "step": 406500 + }, + { + "epoch": 11.42, + "learning_rate": 0.00018613146895090998, + "loss": 1.7775, + "step": 407000 + }, + { + "epoch": 11.44, + "learning_rate": 0.00018599089051751604, + "loss": 1.8036, + "step": 407500 + }, + { + "epoch": 11.45, + "learning_rate": 0.0001858503120841221, + "loss": 1.8111, + "step": 408000 + }, + { + "epoch": 11.46, + "learning_rate": 0.0001857097336507282, + "loss": 1.7921, + "step": 408500 + }, + { + "epoch": 11.48, + "learning_rate": 0.00018556915521733425, + "loss": 1.8084, + "step": 409000 + }, + { + "epoch": 11.49, + "learning_rate": 0.0001854285767839403, + "loss": 1.7578, + "step": 409500 + }, + { + "epoch": 11.51, + "learning_rate": 0.00018528799835054637, + "loss": 1.7714, + "step": 410000 + }, + { + "epoch": 11.52, + "learning_rate": 0.00018514741991715244, + "loss": 1.8062, + "step": 410500 + }, + { + "epoch": 11.53, + "learning_rate": 0.00018500684148375847, + "loss": 1.8261, + "step": 411000 + }, + { + "epoch": 11.55, + "learning_rate": 0.00018486626305036453, + "loss": 1.7864, + "step": 411500 + }, + { + "epoch": 11.56, + "learning_rate": 0.0001847256846169706, + "loss": 1.8151, + "step": 412000 + }, + { + "epoch": 11.58, + "learning_rate": 0.00018458510618357666, + "loss": 1.8115, + "step": 412500 + }, + { + "epoch": 11.59, + "learning_rate": 0.00018444452775018275, + "loss": 1.8313, + "step": 413000 + }, + { + "epoch": 11.6, + "learning_rate": 0.0001843039493167888, + "loss": 1.8024, + "step": 413500 + }, + { + "epoch": 11.62, + "learning_rate": 0.00018416337088339487, + "loss": 1.793, + "step": 414000 + }, + { + "epoch": 11.63, + "learning_rate": 0.00018402279245000093, + "loss": 1.7891, + "step": 414500 + }, + { + "epoch": 11.65, + "learning_rate": 0.000183882214016607, + "loss": 1.7916, + "step": 415000 + }, + { + "epoch": 11.66, + "learning_rate": 0.00018374163558321303, + "loss": 1.7597, + "step": 415500 + }, + { + "epoch": 11.67, + "learning_rate": 0.0001836010571498191, + "loss": 1.809, + "step": 416000 + }, + { + "epoch": 11.69, + "learning_rate": 0.00018346047871642515, + "loss": 1.7811, + "step": 416500 + }, + { + "epoch": 11.7, + "learning_rate": 0.00018331990028303124, + "loss": 1.8246, + "step": 417000 + }, + { + "epoch": 11.72, + "learning_rate": 0.0001831793218496373, + "loss": 1.8383, + "step": 417500 + }, + { + "epoch": 11.73, + "learning_rate": 0.00018303874341624336, + "loss": 1.7673, + "step": 418000 + }, + { + "epoch": 11.74, + "learning_rate": 0.00018289816498284943, + "loss": 1.7972, + "step": 418500 + }, + { + "epoch": 11.76, + "learning_rate": 0.0001827575865494555, + "loss": 1.8094, + "step": 419000 + }, + { + "epoch": 11.77, + "learning_rate": 0.00018261700811606152, + "loss": 1.7746, + "step": 419500 + }, + { + "epoch": 11.79, + "learning_rate": 0.00018247642968266759, + "loss": 1.8126, + "step": 420000 + }, + { + "epoch": 11.8, + "learning_rate": 0.00018233585124927365, + "loss": 1.8074, + "step": 420500 + }, + { + "epoch": 11.81, + "learning_rate": 0.0001821952728158797, + "loss": 1.8232, + "step": 421000 + }, + { + "epoch": 11.83, + "learning_rate": 0.0001820546943824858, + "loss": 1.8191, + "step": 421500 + }, + { + "epoch": 11.84, + "learning_rate": 0.00018191411594909186, + "loss": 1.7828, + "step": 422000 + }, + { + "epoch": 11.86, + "learning_rate": 0.00018177353751569792, + "loss": 1.7953, + "step": 422500 + }, + { + "epoch": 11.87, + "learning_rate": 0.00018163295908230398, + "loss": 1.8094, + "step": 423000 + }, + { + "epoch": 11.88, + "learning_rate": 0.00018149238064891002, + "loss": 1.8087, + "step": 423500 + }, + { + "epoch": 11.9, + "learning_rate": 0.00018135180221551608, + "loss": 1.819, + "step": 424000 + }, + { + "epoch": 11.91, + "learning_rate": 0.00018121122378212214, + "loss": 1.7811, + "step": 424500 + }, + { + "epoch": 11.93, + "learning_rate": 0.0001810706453487282, + "loss": 1.7685, + "step": 425000 + }, + { + "epoch": 11.94, + "learning_rate": 0.0001809300669153343, + "loss": 1.7985, + "step": 425500 + }, + { + "epoch": 11.95, + "learning_rate": 0.00018078948848194036, + "loss": 1.7791, + "step": 426000 + }, + { + "epoch": 11.97, + "learning_rate": 0.00018064891004854642, + "loss": 1.8049, + "step": 426500 + }, + { + "epoch": 11.98, + "learning_rate": 0.00018050833161515248, + "loss": 1.8249, + "step": 427000 + }, + { + "epoch": 12.0, + "learning_rate": 0.00018036775318175851, + "loss": 1.7993, + "step": 427500 + }, + { + "epoch": 12.01, + "learning_rate": 0.00018022717474836458, + "loss": 1.7924, + "step": 428000 + }, + { + "epoch": 12.02, + "learning_rate": 0.00018008659631497064, + "loss": 1.7594, + "step": 428500 + }, + { + "epoch": 12.04, + "learning_rate": 0.0001799460178815767, + "loss": 1.7235, + "step": 429000 + }, + { + "epoch": 12.05, + "learning_rate": 0.00017980543944818276, + "loss": 1.7484, + "step": 429500 + }, + { + "epoch": 12.07, + "learning_rate": 0.00017966486101478885, + "loss": 1.7384, + "step": 430000 + }, + { + "epoch": 12.08, + "learning_rate": 0.0001795242825813949, + "loss": 1.7548, + "step": 430500 + }, + { + "epoch": 12.1, + "learning_rate": 0.00017938370414800098, + "loss": 1.7523, + "step": 431000 + }, + { + "epoch": 12.11, + "learning_rate": 0.000179243125714607, + "loss": 1.7387, + "step": 431500 + }, + { + "epoch": 12.12, + "learning_rate": 0.00017910254728121307, + "loss": 1.7238, + "step": 432000 + }, + { + "epoch": 12.14, + "learning_rate": 0.00017896196884781913, + "loss": 1.7568, + "step": 432500 + }, + { + "epoch": 12.15, + "learning_rate": 0.0001788213904144252, + "loss": 1.728, + "step": 433000 + }, + { + "epoch": 12.17, + "learning_rate": 0.00017868081198103126, + "loss": 1.7263, + "step": 433500 + }, + { + "epoch": 12.18, + "learning_rate": 0.00017854023354763735, + "loss": 1.7219, + "step": 434000 + }, + { + "epoch": 12.19, + "learning_rate": 0.0001783996551142434, + "loss": 1.7415, + "step": 434500 + }, + { + "epoch": 12.21, + "learning_rate": 0.00017825907668084947, + "loss": 1.7571, + "step": 435000 + }, + { + "epoch": 12.22, + "learning_rate": 0.0001781184982474555, + "loss": 1.7429, + "step": 435500 + }, + { + "epoch": 12.24, + "learning_rate": 0.00017797791981406157, + "loss": 1.763, + "step": 436000 + }, + { + "epoch": 12.25, + "learning_rate": 0.00017783734138066763, + "loss": 1.7558, + "step": 436500 + }, + { + "epoch": 12.26, + "learning_rate": 0.0001776967629472737, + "loss": 1.7665, + "step": 437000 + }, + { + "epoch": 12.28, + "learning_rate": 0.00017755618451387975, + "loss": 1.7411, + "step": 437500 + }, + { + "epoch": 12.29, + "learning_rate": 0.00017741560608048582, + "loss": 1.7478, + "step": 438000 + }, + { + "epoch": 12.31, + "learning_rate": 0.0001772750276470919, + "loss": 1.7673, + "step": 438500 + }, + { + "epoch": 12.32, + "learning_rate": 0.00017713444921369797, + "loss": 1.752, + "step": 439000 + }, + { + "epoch": 12.33, + "learning_rate": 0.000176993870780304, + "loss": 1.7297, + "step": 439500 + }, + { + "epoch": 12.35, + "learning_rate": 0.00017685329234691006, + "loss": 1.7654, + "step": 440000 + }, + { + "epoch": 12.36, + "learning_rate": 0.00017671271391351612, + "loss": 1.7312, + "step": 440500 + }, + { + "epoch": 12.38, + "learning_rate": 0.0001765721354801222, + "loss": 1.773, + "step": 441000 + }, + { + "epoch": 12.39, + "learning_rate": 0.00017643155704672825, + "loss": 1.7771, + "step": 441500 + }, + { + "epoch": 12.4, + "learning_rate": 0.0001762909786133343, + "loss": 1.7174, + "step": 442000 + }, + { + "epoch": 12.42, + "learning_rate": 0.0001761504001799404, + "loss": 1.7056, + "step": 442500 + }, + { + "epoch": 12.43, + "learning_rate": 0.00017600982174654646, + "loss": 1.7896, + "step": 443000 + }, + { + "epoch": 12.45, + "learning_rate": 0.00017586924331315252, + "loss": 1.7391, + "step": 443500 + }, + { + "epoch": 12.46, + "learning_rate": 0.00017572866487975856, + "loss": 1.7606, + "step": 444000 + }, + { + "epoch": 12.47, + "learning_rate": 0.00017558808644636462, + "loss": 1.7515, + "step": 444500 + }, + { + "epoch": 12.49, + "learning_rate": 0.00017544750801297068, + "loss": 1.7781, + "step": 445000 + }, + { + "epoch": 12.5, + "learning_rate": 0.00017530692957957674, + "loss": 1.7423, + "step": 445500 + }, + { + "epoch": 12.52, + "learning_rate": 0.0001751663511461828, + "loss": 1.7643, + "step": 446000 + }, + { + "epoch": 12.53, + "learning_rate": 0.00017502577271278887, + "loss": 1.7443, + "step": 446500 + }, + { + "epoch": 12.54, + "learning_rate": 0.00017488519427939496, + "loss": 1.7836, + "step": 447000 + }, + { + "epoch": 12.56, + "learning_rate": 0.00017474461584600102, + "loss": 1.7667, + "step": 447500 + }, + { + "epoch": 12.57, + "learning_rate": 0.00017460403741260705, + "loss": 1.7431, + "step": 448000 + }, + { + "epoch": 12.59, + "learning_rate": 0.00017446345897921312, + "loss": 1.7748, + "step": 448500 + }, + { + "epoch": 12.6, + "learning_rate": 0.00017432288054581918, + "loss": 1.7667, + "step": 449000 + }, + { + "epoch": 12.61, + "learning_rate": 0.00017418230211242524, + "loss": 1.7635, + "step": 449500 + }, + { + "epoch": 12.63, + "learning_rate": 0.0001740417236790313, + "loss": 1.77, + "step": 450000 + }, + { + "epoch": 12.64, + "learning_rate": 0.00017390114524563736, + "loss": 1.7338, + "step": 450500 + }, + { + "epoch": 12.66, + "learning_rate": 0.00017376056681224345, + "loss": 1.761, + "step": 451000 + }, + { + "epoch": 12.67, + "learning_rate": 0.00017361998837884951, + "loss": 1.7557, + "step": 451500 + }, + { + "epoch": 12.68, + "learning_rate": 0.00017347940994545555, + "loss": 1.7942, + "step": 452000 + }, + { + "epoch": 12.7, + "learning_rate": 0.0001733388315120616, + "loss": 1.7452, + "step": 452500 + }, + { + "epoch": 12.71, + "learning_rate": 0.00017319825307866767, + "loss": 1.7466, + "step": 453000 + }, + { + "epoch": 12.73, + "learning_rate": 0.00017305767464527373, + "loss": 1.7551, + "step": 453500 + }, + { + "epoch": 12.74, + "learning_rate": 0.0001729170962118798, + "loss": 1.7765, + "step": 454000 + }, + { + "epoch": 12.75, + "learning_rate": 0.00017277651777848586, + "loss": 1.7686, + "step": 454500 + }, + { + "epoch": 12.77, + "learning_rate": 0.00017263593934509192, + "loss": 1.7711, + "step": 455000 + }, + { + "epoch": 12.78, + "learning_rate": 0.000172495360911698, + "loss": 1.7608, + "step": 455500 + }, + { + "epoch": 12.8, + "learning_rate": 0.00017235478247830404, + "loss": 1.7719, + "step": 456000 + }, + { + "epoch": 12.81, + "learning_rate": 0.0001722142040449101, + "loss": 1.7815, + "step": 456500 + }, + { + "epoch": 12.82, + "learning_rate": 0.00017207362561151617, + "loss": 1.7713, + "step": 457000 + }, + { + "epoch": 12.84, + "learning_rate": 0.00017193304717812223, + "loss": 1.7486, + "step": 457500 + }, + { + "epoch": 12.85, + "learning_rate": 0.0001717924687447283, + "loss": 1.7744, + "step": 458000 + }, + { + "epoch": 12.87, + "learning_rate": 0.00017165189031133435, + "loss": 1.7748, + "step": 458500 + }, + { + "epoch": 12.88, + "learning_rate": 0.00017151131187794042, + "loss": 1.7558, + "step": 459000 + }, + { + "epoch": 12.89, + "learning_rate": 0.0001713707334445465, + "loss": 1.765, + "step": 459500 + }, + { + "epoch": 12.91, + "learning_rate": 0.0001712301550111525, + "loss": 1.7724, + "step": 460000 + }, + { + "epoch": 12.92, + "learning_rate": 0.0001710895765777586, + "loss": 1.7484, + "step": 460500 + }, + { + "epoch": 12.94, + "learning_rate": 0.00017094899814436466, + "loss": 1.7719, + "step": 461000 + }, + { + "epoch": 12.95, + "learning_rate": 0.00017080841971097073, + "loss": 1.7918, + "step": 461500 + }, + { + "epoch": 12.96, + "learning_rate": 0.0001706678412775768, + "loss": 1.7792, + "step": 462000 + }, + { + "epoch": 12.98, + "learning_rate": 0.00017052726284418285, + "loss": 1.7368, + "step": 462500 + }, + { + "epoch": 12.99, + "learning_rate": 0.0001703866844107889, + "loss": 1.7598, + "step": 463000 + }, + { + "epoch": 13.01, + "learning_rate": 0.00017024610597739497, + "loss": 1.7528, + "step": 463500 + }, + { + "epoch": 13.02, + "learning_rate": 0.000170105527544001, + "loss": 1.6807, + "step": 464000 + }, + { + "epoch": 13.04, + "learning_rate": 0.0001699649491106071, + "loss": 1.7284, + "step": 464500 + }, + { + "epoch": 13.05, + "learning_rate": 0.00016982437067721316, + "loss": 1.6896, + "step": 465000 + }, + { + "epoch": 13.06, + "learning_rate": 0.00016968379224381922, + "loss": 1.7024, + "step": 465500 + }, + { + "epoch": 13.08, + "learning_rate": 0.00016954321381042528, + "loss": 1.7025, + "step": 466000 + }, + { + "epoch": 13.09, + "learning_rate": 0.00016940263537703135, + "loss": 1.6848, + "step": 466500 + }, + { + "epoch": 13.11, + "learning_rate": 0.0001692620569436374, + "loss": 1.707, + "step": 467000 + }, + { + "epoch": 13.12, + "learning_rate": 0.00016912147851024347, + "loss": 1.7138, + "step": 467500 + }, + { + "epoch": 13.13, + "learning_rate": 0.0001689809000768495, + "loss": 1.6969, + "step": 468000 + }, + { + "epoch": 13.15, + "learning_rate": 0.00016884032164345557, + "loss": 1.6782, + "step": 468500 + }, + { + "epoch": 13.16, + "learning_rate": 0.00016869974321006165, + "loss": 1.6959, + "step": 469000 + }, + { + "epoch": 13.18, + "learning_rate": 0.00016855916477666772, + "loss": 1.7567, + "step": 469500 + }, + { + "epoch": 13.19, + "learning_rate": 0.00016841858634327378, + "loss": 1.7163, + "step": 470000 + }, + { + "epoch": 13.2, + "learning_rate": 0.00016827800790987984, + "loss": 1.7081, + "step": 470500 + }, + { + "epoch": 13.22, + "learning_rate": 0.0001681374294764859, + "loss": 1.703, + "step": 471000 + }, + { + "epoch": 13.23, + "learning_rate": 0.00016799685104309196, + "loss": 1.7047, + "step": 471500 + }, + { + "epoch": 13.25, + "learning_rate": 0.00016785627260969803, + "loss": 1.7161, + "step": 472000 + }, + { + "epoch": 13.26, + "learning_rate": 0.00016771569417630406, + "loss": 1.7038, + "step": 472500 + }, + { + "epoch": 13.27, + "learning_rate": 0.00016757511574291015, + "loss": 1.7404, + "step": 473000 + }, + { + "epoch": 13.29, + "learning_rate": 0.0001674345373095162, + "loss": 1.7217, + "step": 473500 + }, + { + "epoch": 13.3, + "learning_rate": 0.00016729395887612227, + "loss": 1.702, + "step": 474000 + }, + { + "epoch": 13.32, + "learning_rate": 0.00016715338044272834, + "loss": 1.7199, + "step": 474500 + }, + { + "epoch": 13.33, + "learning_rate": 0.0001670128020093344, + "loss": 1.7062, + "step": 475000 + }, + { + "epoch": 13.34, + "learning_rate": 0.00016687222357594046, + "loss": 1.6744, + "step": 475500 + }, + { + "epoch": 13.36, + "learning_rate": 0.00016673164514254652, + "loss": 1.673, + "step": 476000 + }, + { + "epoch": 13.37, + "learning_rate": 0.00016659106670915256, + "loss": 1.7313, + "step": 476500 + }, + { + "epoch": 13.39, + "learning_rate": 0.00016645048827575862, + "loss": 1.7032, + "step": 477000 + }, + { + "epoch": 13.4, + "learning_rate": 0.0001663099098423647, + "loss": 1.7284, + "step": 477500 + }, + { + "epoch": 13.41, + "learning_rate": 0.00016616933140897077, + "loss": 1.7071, + "step": 478000 + }, + { + "epoch": 13.43, + "learning_rate": 0.00016602875297557683, + "loss": 1.7076, + "step": 478500 + }, + { + "epoch": 13.44, + "learning_rate": 0.0001658881745421829, + "loss": 1.7219, + "step": 479000 + }, + { + "epoch": 13.46, + "learning_rate": 0.00016574759610878896, + "loss": 1.7143, + "step": 479500 + }, + { + "epoch": 13.47, + "learning_rate": 0.00016560701767539502, + "loss": 1.7173, + "step": 480000 + }, + { + "epoch": 13.48, + "learning_rate": 0.00016546643924200105, + "loss": 1.7236, + "step": 480500 + }, + { + "epoch": 13.5, + "learning_rate": 0.00016532586080860711, + "loss": 1.7243, + "step": 481000 + }, + { + "epoch": 13.51, + "learning_rate": 0.0001651852823752132, + "loss": 1.7027, + "step": 481500 + }, + { + "epoch": 13.53, + "learning_rate": 0.00016504470394181927, + "loss": 1.7147, + "step": 482000 + }, + { + "epoch": 13.54, + "learning_rate": 0.00016490412550842533, + "loss": 1.705, + "step": 482500 + }, + { + "epoch": 13.55, + "learning_rate": 0.0001647635470750314, + "loss": 1.6933, + "step": 483000 + }, + { + "epoch": 13.57, + "learning_rate": 0.00016462296864163745, + "loss": 1.7353, + "step": 483500 + }, + { + "epoch": 13.58, + "learning_rate": 0.0001644823902082435, + "loss": 1.6976, + "step": 484000 + }, + { + "epoch": 13.6, + "learning_rate": 0.00016434181177484955, + "loss": 1.7118, + "step": 484500 + }, + { + "epoch": 13.61, + "learning_rate": 0.0001642012333414556, + "loss": 1.7086, + "step": 485000 + }, + { + "epoch": 13.62, + "learning_rate": 0.00016406065490806167, + "loss": 1.7019, + "step": 485500 + }, + { + "epoch": 13.64, + "learning_rate": 0.00016392007647466776, + "loss": 1.7094, + "step": 486000 + }, + { + "epoch": 13.65, + "learning_rate": 0.00016377949804127382, + "loss": 1.7013, + "step": 486500 + }, + { + "epoch": 13.67, + "learning_rate": 0.00016363891960787988, + "loss": 1.7514, + "step": 487000 + }, + { + "epoch": 13.68, + "learning_rate": 0.00016349834117448595, + "loss": 1.6936, + "step": 487500 + }, + { + "epoch": 13.69, + "learning_rate": 0.000163357762741092, + "loss": 1.7091, + "step": 488000 + }, + { + "epoch": 13.71, + "learning_rate": 0.00016321718430769804, + "loss": 1.7251, + "step": 488500 + }, + { + "epoch": 13.72, + "learning_rate": 0.0001630766058743041, + "loss": 1.6919, + "step": 489000 + }, + { + "epoch": 13.74, + "learning_rate": 0.00016293602744091017, + "loss": 1.7196, + "step": 489500 + }, + { + "epoch": 13.75, + "learning_rate": 0.00016279544900751626, + "loss": 1.7089, + "step": 490000 + }, + { + "epoch": 13.76, + "learning_rate": 0.00016265487057412232, + "loss": 1.727, + "step": 490500 + }, + { + "epoch": 13.78, + "learning_rate": 0.00016251429214072838, + "loss": 1.7141, + "step": 491000 + }, + { + "epoch": 13.79, + "learning_rate": 0.00016237371370733444, + "loss": 1.7047, + "step": 491500 + }, + { + "epoch": 13.81, + "learning_rate": 0.0001622331352739405, + "loss": 1.6898, + "step": 492000 + }, + { + "epoch": 13.82, + "learning_rate": 0.00016209255684054654, + "loss": 1.6999, + "step": 492500 + }, + { + "epoch": 13.83, + "learning_rate": 0.0001619519784071526, + "loss": 1.7309, + "step": 493000 + }, + { + "epoch": 13.85, + "learning_rate": 0.00016181139997375866, + "loss": 1.6896, + "step": 493500 + }, + { + "epoch": 13.86, + "learning_rate": 0.00016167082154036472, + "loss": 1.7286, + "step": 494000 + }, + { + "epoch": 13.88, + "learning_rate": 0.0001615302431069708, + "loss": 1.7278, + "step": 494500 + }, + { + "epoch": 13.89, + "learning_rate": 0.00016138966467357688, + "loss": 1.725, + "step": 495000 + }, + { + "epoch": 13.91, + "learning_rate": 0.00016124908624018294, + "loss": 1.7102, + "step": 495500 + }, + { + "epoch": 13.92, + "learning_rate": 0.000161108507806789, + "loss": 1.6975, + "step": 496000 + }, + { + "epoch": 13.93, + "learning_rate": 0.00016096792937339503, + "loss": 1.72, + "step": 496500 + }, + { + "epoch": 13.95, + "learning_rate": 0.0001608273509400011, + "loss": 1.7225, + "step": 497000 + }, + { + "epoch": 13.96, + "learning_rate": 0.00016068677250660716, + "loss": 1.6941, + "step": 497500 + }, + { + "epoch": 13.98, + "learning_rate": 0.00016054619407321322, + "loss": 1.7246, + "step": 498000 + }, + { + "epoch": 13.99, + "learning_rate": 0.0001604056156398193, + "loss": 1.7029, + "step": 498500 + }, + { + "epoch": 14.0, + "learning_rate": 0.00016026503720642537, + "loss": 1.7228, + "step": 499000 + }, + { + "epoch": 14.02, + "learning_rate": 0.00016012445877303143, + "loss": 1.657, + "step": 499500 + }, + { + "epoch": 14.03, + "learning_rate": 0.0001599838803396375, + "loss": 1.6552, + "step": 500000 + }, + { + "epoch": 14.05, + "learning_rate": 0.00015984330190624356, + "loss": 1.6641, + "step": 500500 + }, + { + "epoch": 14.06, + "learning_rate": 0.0001597027234728496, + "loss": 1.6517, + "step": 501000 + }, + { + "epoch": 14.07, + "learning_rate": 0.00015956214503945565, + "loss": 1.6249, + "step": 501500 + }, + { + "epoch": 14.09, + "learning_rate": 0.00015942156660606172, + "loss": 1.6939, + "step": 502000 + }, + { + "epoch": 14.1, + "learning_rate": 0.00015928098817266778, + "loss": 1.6659, + "step": 502500 + }, + { + "epoch": 14.12, + "learning_rate": 0.00015914040973927387, + "loss": 1.6344, + "step": 503000 + }, + { + "epoch": 14.13, + "learning_rate": 0.00015899983130587993, + "loss": 1.6471, + "step": 503500 + }, + { + "epoch": 14.14, + "learning_rate": 0.000158859252872486, + "loss": 1.6678, + "step": 504000 + }, + { + "epoch": 14.16, + "learning_rate": 0.00015871867443909205, + "loss": 1.645, + "step": 504500 + }, + { + "epoch": 14.17, + "learning_rate": 0.0001585780960056981, + "loss": 1.6122, + "step": 505000 + }, + { + "epoch": 14.19, + "learning_rate": 0.00015843751757230415, + "loss": 1.6648, + "step": 505500 + }, + { + "epoch": 14.2, + "learning_rate": 0.0001582969391389102, + "loss": 1.634, + "step": 506000 + }, + { + "epoch": 14.21, + "learning_rate": 0.00015815636070551627, + "loss": 1.6539, + "step": 506500 + }, + { + "epoch": 14.23, + "learning_rate": 0.00015801578227212236, + "loss": 1.6509, + "step": 507000 + }, + { + "epoch": 14.24, + "learning_rate": 0.00015787520383872842, + "loss": 1.6445, + "step": 507500 + }, + { + "epoch": 14.26, + "learning_rate": 0.00015773462540533449, + "loss": 1.6699, + "step": 508000 + }, + { + "epoch": 14.27, + "learning_rate": 0.00015759404697194055, + "loss": 1.6545, + "step": 508500 + }, + { + "epoch": 14.28, + "learning_rate": 0.00015745346853854658, + "loss": 1.6798, + "step": 509000 + }, + { + "epoch": 14.3, + "learning_rate": 0.00015731289010515264, + "loss": 1.6795, + "step": 509500 + }, + { + "epoch": 14.31, + "learning_rate": 0.0001571723116717587, + "loss": 1.6626, + "step": 510000 + }, + { + "epoch": 14.33, + "learning_rate": 0.00015703173323836477, + "loss": 1.675, + "step": 510500 + }, + { + "epoch": 14.34, + "learning_rate": 0.00015689115480497083, + "loss": 1.6708, + "step": 511000 + }, + { + "epoch": 14.35, + "learning_rate": 0.00015675057637157692, + "loss": 1.6486, + "step": 511500 + }, + { + "epoch": 14.37, + "learning_rate": 0.00015660999793818298, + "loss": 1.6444, + "step": 512000 + }, + { + "epoch": 14.38, + "learning_rate": 0.00015646941950478904, + "loss": 1.6837, + "step": 512500 + }, + { + "epoch": 14.4, + "learning_rate": 0.00015632884107139508, + "loss": 1.6924, + "step": 513000 + }, + { + "epoch": 14.41, + "learning_rate": 0.00015618826263800114, + "loss": 1.6624, + "step": 513500 + }, + { + "epoch": 14.42, + "learning_rate": 0.0001560476842046072, + "loss": 1.6541, + "step": 514000 + }, + { + "epoch": 14.44, + "learning_rate": 0.00015590710577121326, + "loss": 1.6492, + "step": 514500 + }, + { + "epoch": 14.45, + "learning_rate": 0.00015576652733781933, + "loss": 1.6661, + "step": 515000 + }, + { + "epoch": 14.47, + "learning_rate": 0.00015562594890442541, + "loss": 1.6671, + "step": 515500 + }, + { + "epoch": 14.48, + "learning_rate": 0.00015548537047103148, + "loss": 1.6869, + "step": 516000 + }, + { + "epoch": 14.49, + "learning_rate": 0.00015534479203763754, + "loss": 1.6793, + "step": 516500 + }, + { + "epoch": 14.51, + "learning_rate": 0.00015520421360424357, + "loss": 1.635, + "step": 517000 + }, + { + "epoch": 14.52, + "learning_rate": 0.00015506363517084964, + "loss": 1.6433, + "step": 517500 + }, + { + "epoch": 14.54, + "learning_rate": 0.0001549230567374557, + "loss": 1.6893, + "step": 518000 + }, + { + "epoch": 14.55, + "learning_rate": 0.00015478247830406176, + "loss": 1.6515, + "step": 518500 + }, + { + "epoch": 14.56, + "learning_rate": 0.00015464189987066782, + "loss": 1.6721, + "step": 519000 + }, + { + "epoch": 14.58, + "learning_rate": 0.00015450132143727388, + "loss": 1.6938, + "step": 519500 + }, + { + "epoch": 14.59, + "learning_rate": 0.00015436074300387997, + "loss": 1.6508, + "step": 520000 + }, + { + "epoch": 14.61, + "learning_rate": 0.00015422016457048603, + "loss": 1.6625, + "step": 520500 + }, + { + "epoch": 14.62, + "learning_rate": 0.00015407958613709207, + "loss": 1.6956, + "step": 521000 + }, + { + "epoch": 14.63, + "learning_rate": 0.00015393900770369813, + "loss": 1.6799, + "step": 521500 + }, + { + "epoch": 14.65, + "learning_rate": 0.0001537984292703042, + "loss": 1.6834, + "step": 522000 + }, + { + "epoch": 14.66, + "learning_rate": 0.00015365785083691025, + "loss": 1.6736, + "step": 522500 + }, + { + "epoch": 14.68, + "learning_rate": 0.00015351727240351632, + "loss": 1.6651, + "step": 523000 + }, + { + "epoch": 14.69, + "learning_rate": 0.00015337669397012238, + "loss": 1.6767, + "step": 523500 + }, + { + "epoch": 14.7, + "learning_rate": 0.00015323611553672847, + "loss": 1.6678, + "step": 524000 + }, + { + "epoch": 14.72, + "learning_rate": 0.00015309553710333453, + "loss": 1.6838, + "step": 524500 + }, + { + "epoch": 14.73, + "learning_rate": 0.00015295495866994056, + "loss": 1.6628, + "step": 525000 + }, + { + "epoch": 14.75, + "learning_rate": 0.00015281438023654663, + "loss": 1.6682, + "step": 525500 + }, + { + "epoch": 14.76, + "learning_rate": 0.0001526738018031527, + "loss": 1.6808, + "step": 526000 + }, + { + "epoch": 14.78, + "learning_rate": 0.00015253322336975875, + "loss": 1.6699, + "step": 526500 + }, + { + "epoch": 14.79, + "learning_rate": 0.0001523926449363648, + "loss": 1.6419, + "step": 527000 + }, + { + "epoch": 14.8, + "learning_rate": 0.00015225206650297087, + "loss": 1.6573, + "step": 527500 + }, + { + "epoch": 14.82, + "learning_rate": 0.00015211148806957694, + "loss": 1.6837, + "step": 528000 + }, + { + "epoch": 14.83, + "learning_rate": 0.00015197090963618302, + "loss": 1.6669, + "step": 528500 + }, + { + "epoch": 14.85, + "learning_rate": 0.00015183033120278906, + "loss": 1.6609, + "step": 529000 + }, + { + "epoch": 14.86, + "learning_rate": 0.00015168975276939512, + "loss": 1.7295, + "step": 529500 + }, + { + "epoch": 14.87, + "learning_rate": 0.00015154917433600118, + "loss": 1.6872, + "step": 530000 + }, + { + "epoch": 14.89, + "learning_rate": 0.00015140859590260725, + "loss": 1.6727, + "step": 530500 + }, + { + "epoch": 14.9, + "learning_rate": 0.0001512680174692133, + "loss": 1.6716, + "step": 531000 + }, + { + "epoch": 14.92, + "learning_rate": 0.00015112743903581937, + "loss": 1.6782, + "step": 531500 + }, + { + "epoch": 14.93, + "learning_rate": 0.00015098686060242543, + "loss": 1.6883, + "step": 532000 + }, + { + "epoch": 14.94, + "learning_rate": 0.00015084628216903152, + "loss": 1.6422, + "step": 532500 + }, + { + "epoch": 14.96, + "learning_rate": 0.00015070570373563758, + "loss": 1.6991, + "step": 533000 + }, + { + "epoch": 14.97, + "learning_rate": 0.00015056512530224362, + "loss": 1.6951, + "step": 533500 + }, + { + "epoch": 14.99, + "learning_rate": 0.00015042454686884968, + "loss": 1.6697, + "step": 534000 + }, + { + "epoch": 15.0, + "learning_rate": 0.00015028396843545574, + "loss": 1.7002, + "step": 534500 + }, + { + "epoch": 15.01, + "learning_rate": 0.0001501433900020618, + "loss": 1.6082, + "step": 535000 + }, + { + "epoch": 15.03, + "learning_rate": 0.00015000281156866786, + "loss": 1.6386, + "step": 535500 + }, + { + "epoch": 15.04, + "learning_rate": 0.00014986223313527393, + "loss": 1.6434, + "step": 536000 + }, + { + "epoch": 15.06, + "learning_rate": 0.00014972165470188, + "loss": 1.6171, + "step": 536500 + }, + { + "epoch": 15.07, + "learning_rate": 0.00014958107626848605, + "loss": 1.5879, + "step": 537000 + }, + { + "epoch": 15.08, + "learning_rate": 0.0001494404978350921, + "loss": 1.6285, + "step": 537500 + }, + { + "epoch": 15.1, + "learning_rate": 0.00014929991940169817, + "loss": 1.6323, + "step": 538000 + }, + { + "epoch": 15.11, + "learning_rate": 0.00014915934096830424, + "loss": 1.6375, + "step": 538500 + }, + { + "epoch": 15.13, + "learning_rate": 0.0001490187625349103, + "loss": 1.6198, + "step": 539000 + }, + { + "epoch": 15.14, + "learning_rate": 0.00014887818410151636, + "loss": 1.6497, + "step": 539500 + }, + { + "epoch": 15.15, + "learning_rate": 0.00014873760566812242, + "loss": 1.6025, + "step": 540000 + }, + { + "epoch": 15.17, + "learning_rate": 0.00014859702723472848, + "loss": 1.642, + "step": 540500 + }, + { + "epoch": 15.18, + "learning_rate": 0.00014845644880133455, + "loss": 1.6113, + "step": 541000 + }, + { + "epoch": 15.2, + "learning_rate": 0.0001483158703679406, + "loss": 1.6269, + "step": 541500 + }, + { + "epoch": 15.21, + "learning_rate": 0.00014817529193454667, + "loss": 1.6071, + "step": 542000 + }, + { + "epoch": 15.22, + "learning_rate": 0.00014803471350115273, + "loss": 1.6044, + "step": 542500 + }, + { + "epoch": 15.24, + "learning_rate": 0.0001478941350677588, + "loss": 1.6428, + "step": 543000 + }, + { + "epoch": 15.25, + "learning_rate": 0.00014775355663436486, + "loss": 1.5916, + "step": 543500 + }, + { + "epoch": 15.27, + "learning_rate": 0.00014761297820097092, + "loss": 1.6241, + "step": 544000 + }, + { + "epoch": 15.28, + "learning_rate": 0.00014747239976757698, + "loss": 1.6256, + "step": 544500 + }, + { + "epoch": 15.29, + "learning_rate": 0.00014733182133418304, + "loss": 1.6019, + "step": 545000 + }, + { + "epoch": 15.31, + "learning_rate": 0.0001471912429007891, + "loss": 1.6239, + "step": 545500 + }, + { + "epoch": 15.32, + "learning_rate": 0.00014705066446739517, + "loss": 1.6393, + "step": 546000 + }, + { + "epoch": 15.34, + "learning_rate": 0.00014691008603400123, + "loss": 1.6319, + "step": 546500 + }, + { + "epoch": 15.35, + "learning_rate": 0.0001467695076006073, + "loss": 1.6056, + "step": 547000 + }, + { + "epoch": 15.36, + "learning_rate": 0.00014662892916721335, + "loss": 1.6102, + "step": 547500 + }, + { + "epoch": 15.38, + "learning_rate": 0.0001464883507338194, + "loss": 1.6085, + "step": 548000 + }, + { + "epoch": 15.39, + "learning_rate": 0.00014634777230042547, + "loss": 1.6607, + "step": 548500 + }, + { + "epoch": 15.41, + "learning_rate": 0.00014620719386703154, + "loss": 1.6512, + "step": 549000 + }, + { + "epoch": 15.42, + "learning_rate": 0.0001460666154336376, + "loss": 1.6552, + "step": 549500 + }, + { + "epoch": 15.43, + "learning_rate": 0.00014592603700024366, + "loss": 1.618, + "step": 550000 + }, + { + "epoch": 15.45, + "learning_rate": 0.00014578545856684972, + "loss": 1.6183, + "step": 550500 + }, + { + "epoch": 15.46, + "learning_rate": 0.00014564488013345578, + "loss": 1.6328, + "step": 551000 + }, + { + "epoch": 15.48, + "learning_rate": 0.00014550430170006185, + "loss": 1.6187, + "step": 551500 + }, + { + "epoch": 15.49, + "learning_rate": 0.0001453637232666679, + "loss": 1.6064, + "step": 552000 + }, + { + "epoch": 15.5, + "learning_rate": 0.00014522314483327397, + "loss": 1.6181, + "step": 552500 + }, + { + "epoch": 15.52, + "learning_rate": 0.00014508256639988003, + "loss": 1.6118, + "step": 553000 + }, + { + "epoch": 15.53, + "learning_rate": 0.0001449419879664861, + "loss": 1.6387, + "step": 553500 + }, + { + "epoch": 15.55, + "learning_rate": 0.00014480140953309216, + "loss": 1.6194, + "step": 554000 + }, + { + "epoch": 15.56, + "learning_rate": 0.00014466083109969822, + "loss": 1.6419, + "step": 554500 + }, + { + "epoch": 15.57, + "learning_rate": 0.00014452025266630428, + "loss": 1.6443, + "step": 555000 + }, + { + "epoch": 15.59, + "learning_rate": 0.00014437967423291034, + "loss": 1.6379, + "step": 555500 + }, + { + "epoch": 15.6, + "learning_rate": 0.0001442390957995164, + "loss": 1.617, + "step": 556000 + }, + { + "epoch": 15.62, + "learning_rate": 0.00014409851736612244, + "loss": 1.6325, + "step": 556500 + }, + { + "epoch": 15.63, + "learning_rate": 0.00014395793893272853, + "loss": 1.6373, + "step": 557000 + }, + { + "epoch": 15.64, + "learning_rate": 0.0001438173604993346, + "loss": 1.6221, + "step": 557500 + }, + { + "epoch": 15.66, + "learning_rate": 0.00014367678206594065, + "loss": 1.612, + "step": 558000 + }, + { + "epoch": 15.67, + "learning_rate": 0.0001435362036325467, + "loss": 1.6379, + "step": 558500 + }, + { + "epoch": 15.69, + "learning_rate": 0.00014339562519915278, + "loss": 1.6326, + "step": 559000 + }, + { + "epoch": 15.7, + "learning_rate": 0.00014325504676575884, + "loss": 1.6009, + "step": 559500 + }, + { + "epoch": 15.72, + "learning_rate": 0.0001431144683323649, + "loss": 1.6199, + "step": 560000 + }, + { + "epoch": 15.73, + "learning_rate": 0.00014297388989897096, + "loss": 1.6541, + "step": 560500 + }, + { + "epoch": 15.74, + "learning_rate": 0.00014283331146557702, + "loss": 1.6647, + "step": 561000 + }, + { + "epoch": 15.76, + "learning_rate": 0.00014269273303218308, + "loss": 1.6486, + "step": 561500 + }, + { + "epoch": 15.77, + "learning_rate": 0.00014255215459878915, + "loss": 1.6654, + "step": 562000 + }, + { + "epoch": 15.79, + "learning_rate": 0.0001424115761653952, + "loss": 1.6393, + "step": 562500 + }, + { + "epoch": 15.8, + "learning_rate": 0.00014227099773200127, + "loss": 1.6223, + "step": 563000 + }, + { + "epoch": 15.81, + "learning_rate": 0.00014213041929860733, + "loss": 1.677, + "step": 563500 + }, + { + "epoch": 15.83, + "learning_rate": 0.0001419898408652134, + "loss": 1.5877, + "step": 564000 + }, + { + "epoch": 15.84, + "learning_rate": 0.00014184926243181946, + "loss": 1.6096, + "step": 564500 + }, + { + "epoch": 15.86, + "learning_rate": 0.0001417086839984255, + "loss": 1.6199, + "step": 565000 + }, + { + "epoch": 15.87, + "learning_rate": 0.00014156810556503158, + "loss": 1.609, + "step": 565500 + }, + { + "epoch": 15.88, + "learning_rate": 0.00014142752713163764, + "loss": 1.6239, + "step": 566000 + }, + { + "epoch": 15.9, + "learning_rate": 0.0001412869486982437, + "loss": 1.6299, + "step": 566500 + }, + { + "epoch": 15.91, + "learning_rate": 0.00014114637026484974, + "loss": 1.6673, + "step": 567000 + }, + { + "epoch": 15.93, + "learning_rate": 0.00014100579183145583, + "loss": 1.6265, + "step": 567500 + }, + { + "epoch": 15.94, + "learning_rate": 0.0001408652133980619, + "loss": 1.6383, + "step": 568000 + }, + { + "epoch": 15.95, + "learning_rate": 0.00014072463496466795, + "loss": 1.6294, + "step": 568500 + }, + { + "epoch": 15.97, + "learning_rate": 0.000140584056531274, + "loss": 1.6214, + "step": 569000 + }, + { + "epoch": 15.98, + "learning_rate": 0.00014044347809788008, + "loss": 1.6521, + "step": 569500 + }, + { + "epoch": 16.0, + "learning_rate": 0.00014030289966448614, + "loss": 1.6504, + "step": 570000 + }, + { + "epoch": 16.01, + "learning_rate": 0.0001401623212310922, + "loss": 1.5651, + "step": 570500 + }, + { + "epoch": 16.02, + "learning_rate": 0.00014002174279769823, + "loss": 1.5787, + "step": 571000 + }, + { + "epoch": 16.04, + "learning_rate": 0.00013988116436430432, + "loss": 1.5698, + "step": 571500 + }, + { + "epoch": 16.05, + "learning_rate": 0.00013974058593091039, + "loss": 1.5477, + "step": 572000 + }, + { + "epoch": 16.07, + "learning_rate": 0.00013960000749751645, + "loss": 1.5727, + "step": 572500 + }, + { + "epoch": 16.08, + "learning_rate": 0.00013945942906412248, + "loss": 1.5762, + "step": 573000 + }, + { + "epoch": 16.09, + "learning_rate": 0.00013931885063072854, + "loss": 1.5924, + "step": 573500 + }, + { + "epoch": 16.11, + "learning_rate": 0.00013917827219733463, + "loss": 1.5913, + "step": 574000 + }, + { + "epoch": 16.12, + "learning_rate": 0.0001390376937639407, + "loss": 1.581, + "step": 574500 + }, + { + "epoch": 16.14, + "learning_rate": 0.00013889711533054673, + "loss": 1.5518, + "step": 575000 + }, + { + "epoch": 16.15, + "learning_rate": 0.0001387565368971528, + "loss": 1.5782, + "step": 575500 + }, + { + "epoch": 16.16, + "learning_rate": 0.00013861595846375888, + "loss": 1.5733, + "step": 576000 + }, + { + "epoch": 16.18, + "learning_rate": 0.00013847538003036494, + "loss": 1.5869, + "step": 576500 + }, + { + "epoch": 16.19, + "learning_rate": 0.00013833480159697098, + "loss": 1.5828, + "step": 577000 + }, + { + "epoch": 16.21, + "learning_rate": 0.00013819422316357704, + "loss": 1.5645, + "step": 577500 + }, + { + "epoch": 16.22, + "learning_rate": 0.00013805364473018313, + "loss": 1.5653, + "step": 578000 + }, + { + "epoch": 16.23, + "learning_rate": 0.0001379130662967892, + "loss": 1.601, + "step": 578500 + }, + { + "epoch": 16.25, + "learning_rate": 0.00013777248786339523, + "loss": 1.5955, + "step": 579000 + }, + { + "epoch": 16.26, + "learning_rate": 0.0001376319094300013, + "loss": 1.5793, + "step": 579500 + }, + { + "epoch": 16.28, + "learning_rate": 0.00013749133099660735, + "loss": 1.5702, + "step": 580000 + }, + { + "epoch": 16.29, + "learning_rate": 0.00013735075256321344, + "loss": 1.5841, + "step": 580500 + }, + { + "epoch": 16.3, + "learning_rate": 0.00013721017412981947, + "loss": 1.5379, + "step": 581000 + }, + { + "epoch": 16.32, + "learning_rate": 0.00013706959569642554, + "loss": 1.5676, + "step": 581500 + }, + { + "epoch": 16.33, + "learning_rate": 0.0001369290172630316, + "loss": 1.5905, + "step": 582000 + }, + { + "epoch": 16.35, + "learning_rate": 0.00013678843882963769, + "loss": 1.6139, + "step": 582500 + }, + { + "epoch": 16.36, + "learning_rate": 0.00013664786039624372, + "loss": 1.608, + "step": 583000 + }, + { + "epoch": 16.37, + "learning_rate": 0.00013650728196284978, + "loss": 1.5832, + "step": 583500 + }, + { + "epoch": 16.39, + "learning_rate": 0.00013636670352945584, + "loss": 1.6174, + "step": 584000 + }, + { + "epoch": 16.4, + "learning_rate": 0.00013622612509606193, + "loss": 1.5895, + "step": 584500 + }, + { + "epoch": 16.42, + "learning_rate": 0.00013608554666266797, + "loss": 1.5861, + "step": 585000 + }, + { + "epoch": 16.43, + "learning_rate": 0.00013594496822927403, + "loss": 1.5796, + "step": 585500 + }, + { + "epoch": 16.44, + "learning_rate": 0.0001358043897958801, + "loss": 1.5854, + "step": 586000 + }, + { + "epoch": 16.46, + "learning_rate": 0.00013566381136248618, + "loss": 1.5782, + "step": 586500 + }, + { + "epoch": 16.47, + "learning_rate": 0.00013552323292909222, + "loss": 1.5978, + "step": 587000 + }, + { + "epoch": 16.49, + "learning_rate": 0.00013538265449569828, + "loss": 1.6099, + "step": 587500 + }, + { + "epoch": 16.5, + "learning_rate": 0.00013524207606230434, + "loss": 1.6093, + "step": 588000 + }, + { + "epoch": 16.51, + "learning_rate": 0.0001351014976289104, + "loss": 1.6228, + "step": 588500 + }, + { + "epoch": 16.53, + "learning_rate": 0.0001349609191955165, + "loss": 1.5772, + "step": 589000 + }, + { + "epoch": 16.54, + "learning_rate": 0.00013482034076212253, + "loss": 1.5884, + "step": 589500 + }, + { + "epoch": 16.56, + "learning_rate": 0.0001346797623287286, + "loss": 1.5894, + "step": 590000 + }, + { + "epoch": 16.57, + "learning_rate": 0.00013453918389533465, + "loss": 1.5949, + "step": 590500 + }, + { + "epoch": 16.59, + "learning_rate": 0.00013439860546194074, + "loss": 1.5949, + "step": 591000 + }, + { + "epoch": 16.6, + "learning_rate": 0.00013425802702854677, + "loss": 1.6063, + "step": 591500 + }, + { + "epoch": 16.61, + "learning_rate": 0.00013411744859515284, + "loss": 1.5743, + "step": 592000 + }, + { + "epoch": 16.63, + "learning_rate": 0.0001339768701617589, + "loss": 1.586, + "step": 592500 + }, + { + "epoch": 16.64, + "learning_rate": 0.000133836291728365, + "loss": 1.5523, + "step": 593000 + }, + { + "epoch": 16.66, + "learning_rate": 0.00013369571329497102, + "loss": 1.5871, + "step": 593500 + }, + { + "epoch": 16.67, + "learning_rate": 0.00013355513486157708, + "loss": 1.5904, + "step": 594000 + }, + { + "epoch": 16.68, + "learning_rate": 0.00013341455642818315, + "loss": 1.5936, + "step": 594500 + }, + { + "epoch": 16.7, + "learning_rate": 0.00013327397799478923, + "loss": 1.6254, + "step": 595000 + }, + { + "epoch": 16.71, + "learning_rate": 0.00013313339956139527, + "loss": 1.5805, + "step": 595500 + }, + { + "epoch": 16.73, + "learning_rate": 0.00013299282112800133, + "loss": 1.6077, + "step": 596000 + }, + { + "epoch": 16.74, + "learning_rate": 0.0001328522426946074, + "loss": 1.5784, + "step": 596500 + }, + { + "epoch": 16.75, + "learning_rate": 0.00013271166426121345, + "loss": 1.585, + "step": 597000 + }, + { + "epoch": 16.77, + "learning_rate": 0.00013257108582781952, + "loss": 1.6174, + "step": 597500 + }, + { + "epoch": 16.78, + "learning_rate": 0.00013243050739442558, + "loss": 1.5657, + "step": 598000 + }, + { + "epoch": 16.8, + "learning_rate": 0.00013228992896103164, + "loss": 1.6194, + "step": 598500 + }, + { + "epoch": 16.81, + "learning_rate": 0.0001321493505276377, + "loss": 1.5821, + "step": 599000 + }, + { + "epoch": 16.82, + "learning_rate": 0.00013200877209424376, + "loss": 1.5954, + "step": 599500 + }, + { + "epoch": 16.84, + "learning_rate": 0.00013186819366084983, + "loss": 1.6197, + "step": 600000 + }, + { + "epoch": 16.85, + "learning_rate": 0.0001317276152274559, + "loss": 1.5936, + "step": 600500 + }, + { + "epoch": 16.87, + "learning_rate": 0.00013158703679406195, + "loss": 1.6081, + "step": 601000 + }, + { + "epoch": 16.88, + "learning_rate": 0.000131446458360668, + "loss": 1.6012, + "step": 601500 + }, + { + "epoch": 16.89, + "learning_rate": 0.00013130587992727407, + "loss": 1.6169, + "step": 602000 + }, + { + "epoch": 16.91, + "learning_rate": 0.00013116530149388014, + "loss": 1.6046, + "step": 602500 + }, + { + "epoch": 16.92, + "learning_rate": 0.0001310247230604862, + "loss": 1.6007, + "step": 603000 + }, + { + "epoch": 16.94, + "learning_rate": 0.00013088414462709226, + "loss": 1.5971, + "step": 603500 + }, + { + "epoch": 16.95, + "learning_rate": 0.00013074356619369832, + "loss": 1.5821, + "step": 604000 + }, + { + "epoch": 16.96, + "learning_rate": 0.00013060298776030438, + "loss": 1.585, + "step": 604500 + }, + { + "epoch": 16.98, + "learning_rate": 0.00013046240932691045, + "loss": 1.617, + "step": 605000 + }, + { + "epoch": 16.99, + "learning_rate": 0.0001303218308935165, + "loss": 1.6341, + "step": 605500 + }, + { + "epoch": 17.01, + "learning_rate": 0.00013018125246012257, + "loss": 1.5642, + "step": 606000 + }, + { + "epoch": 17.02, + "learning_rate": 0.00013004067402672863, + "loss": 1.5107, + "step": 606500 + }, + { + "epoch": 17.03, + "learning_rate": 0.0001299000955933347, + "loss": 1.5292, + "step": 607000 + }, + { + "epoch": 17.05, + "learning_rate": 0.00012975951715994076, + "loss": 1.5316, + "step": 607500 + }, + { + "epoch": 17.06, + "learning_rate": 0.00012961893872654682, + "loss": 1.5349, + "step": 608000 + }, + { + "epoch": 17.08, + "learning_rate": 0.00012947836029315288, + "loss": 1.5068, + "step": 608500 + }, + { + "epoch": 17.09, + "learning_rate": 0.00012933778185975894, + "loss": 1.5212, + "step": 609000 + }, + { + "epoch": 17.1, + "learning_rate": 0.000129197203426365, + "loss": 1.5319, + "step": 609500 + }, + { + "epoch": 17.12, + "learning_rate": 0.00012905662499297107, + "loss": 1.5505, + "step": 610000 + }, + { + "epoch": 17.13, + "learning_rate": 0.00012891604655957713, + "loss": 1.4921, + "step": 610500 + }, + { + "epoch": 17.15, + "learning_rate": 0.0001287754681261832, + "loss": 1.5233, + "step": 611000 + }, + { + "epoch": 17.16, + "learning_rate": 0.00012863488969278925, + "loss": 1.5579, + "step": 611500 + }, + { + "epoch": 17.17, + "learning_rate": 0.0001284943112593953, + "loss": 1.5588, + "step": 612000 + }, + { + "epoch": 17.19, + "learning_rate": 0.00012835373282600137, + "loss": 1.5452, + "step": 612500 + }, + { + "epoch": 17.2, + "learning_rate": 0.00012821315439260744, + "loss": 1.5415, + "step": 613000 + }, + { + "epoch": 17.22, + "learning_rate": 0.0001280725759592135, + "loss": 1.5208, + "step": 613500 + }, + { + "epoch": 17.23, + "learning_rate": 0.00012793199752581956, + "loss": 1.5582, + "step": 614000 + }, + { + "epoch": 17.24, + "learning_rate": 0.00012779141909242562, + "loss": 1.5683, + "step": 614500 + }, + { + "epoch": 17.26, + "learning_rate": 0.00012765084065903168, + "loss": 1.534, + "step": 615000 + }, + { + "epoch": 17.27, + "learning_rate": 0.00012751026222563775, + "loss": 1.5108, + "step": 615500 + }, + { + "epoch": 17.29, + "learning_rate": 0.0001273696837922438, + "loss": 1.5363, + "step": 616000 + }, + { + "epoch": 17.3, + "learning_rate": 0.00012722910535884987, + "loss": 1.5752, + "step": 616500 + }, + { + "epoch": 17.31, + "learning_rate": 0.00012708852692545593, + "loss": 1.5703, + "step": 617000 + }, + { + "epoch": 17.33, + "learning_rate": 0.000126947948492062, + "loss": 1.5406, + "step": 617500 + }, + { + "epoch": 17.34, + "learning_rate": 0.00012680737005866806, + "loss": 1.5417, + "step": 618000 + }, + { + "epoch": 17.36, + "learning_rate": 0.00012666679162527412, + "loss": 1.5452, + "step": 618500 + }, + { + "epoch": 17.37, + "learning_rate": 0.00012652621319188018, + "loss": 1.5571, + "step": 619000 + }, + { + "epoch": 17.38, + "learning_rate": 0.00012638563475848624, + "loss": 1.5484, + "step": 619500 + }, + { + "epoch": 17.4, + "learning_rate": 0.0001262450563250923, + "loss": 1.5979, + "step": 620000 + }, + { + "epoch": 17.41, + "learning_rate": 0.00012610447789169837, + "loss": 1.5486, + "step": 620500 + }, + { + "epoch": 17.43, + "learning_rate": 0.00012596389945830443, + "loss": 1.4974, + "step": 621000 + }, + { + "epoch": 17.44, + "learning_rate": 0.0001258233210249105, + "loss": 1.5237, + "step": 621500 + }, + { + "epoch": 17.45, + "learning_rate": 0.00012568274259151655, + "loss": 1.5502, + "step": 622000 + }, + { + "epoch": 17.47, + "learning_rate": 0.0001255421641581226, + "loss": 1.5373, + "step": 622500 + }, + { + "epoch": 17.48, + "learning_rate": 0.00012540158572472868, + "loss": 1.5406, + "step": 623000 + }, + { + "epoch": 17.5, + "learning_rate": 0.00012526100729133474, + "loss": 1.5559, + "step": 623500 + }, + { + "epoch": 17.51, + "learning_rate": 0.0001251204288579408, + "loss": 1.5506, + "step": 624000 + }, + { + "epoch": 17.53, + "learning_rate": 0.00012497985042454686, + "loss": 1.5421, + "step": 624500 + }, + { + "epoch": 17.54, + "learning_rate": 0.00012483927199115292, + "loss": 1.5631, + "step": 625000 + }, + { + "epoch": 17.55, + "learning_rate": 0.00012469869355775899, + "loss": 1.5374, + "step": 625500 + }, + { + "epoch": 17.57, + "learning_rate": 0.00012455811512436505, + "loss": 1.5681, + "step": 626000 + }, + { + "epoch": 17.58, + "learning_rate": 0.0001244175366909711, + "loss": 1.541, + "step": 626500 + }, + { + "epoch": 17.6, + "learning_rate": 0.00012427695825757717, + "loss": 1.5325, + "step": 627000 + }, + { + "epoch": 17.61, + "learning_rate": 0.00012413637982418323, + "loss": 1.5226, + "step": 627500 + }, + { + "epoch": 17.62, + "learning_rate": 0.0001239958013907893, + "loss": 1.5524, + "step": 628000 + }, + { + "epoch": 17.64, + "learning_rate": 0.00012385522295739536, + "loss": 1.6071, + "step": 628500 + }, + { + "epoch": 17.65, + "learning_rate": 0.00012371464452400142, + "loss": 1.5639, + "step": 629000 + }, + { + "epoch": 17.67, + "learning_rate": 0.00012357406609060748, + "loss": 1.5428, + "step": 629500 + }, + { + "epoch": 17.68, + "learning_rate": 0.00012343348765721354, + "loss": 1.5646, + "step": 630000 + }, + { + "epoch": 17.69, + "learning_rate": 0.0001232929092238196, + "loss": 1.5568, + "step": 630500 + }, + { + "epoch": 17.71, + "learning_rate": 0.00012315233079042567, + "loss": 1.5319, + "step": 631000 + }, + { + "epoch": 17.72, + "learning_rate": 0.00012301175235703173, + "loss": 1.5503, + "step": 631500 + }, + { + "epoch": 17.74, + "learning_rate": 0.0001228711739236378, + "loss": 1.5291, + "step": 632000 + }, + { + "epoch": 17.75, + "learning_rate": 0.00012273059549024385, + "loss": 1.5911, + "step": 632500 + }, + { + "epoch": 17.76, + "learning_rate": 0.00012259001705684991, + "loss": 1.5696, + "step": 633000 + }, + { + "epoch": 17.78, + "learning_rate": 0.00012244943862345598, + "loss": 1.5498, + "step": 633500 + }, + { + "epoch": 17.79, + "learning_rate": 0.00012230886019006204, + "loss": 1.5292, + "step": 634000 + }, + { + "epoch": 17.81, + "learning_rate": 0.0001221682817566681, + "loss": 1.5606, + "step": 634500 + }, + { + "epoch": 17.82, + "learning_rate": 0.00012202770332327416, + "loss": 1.5626, + "step": 635000 + }, + { + "epoch": 17.83, + "learning_rate": 0.00012188712488988022, + "loss": 1.5849, + "step": 635500 + }, + { + "epoch": 17.85, + "learning_rate": 0.00012174654645648627, + "loss": 1.5534, + "step": 636000 + }, + { + "epoch": 17.86, + "learning_rate": 0.00012160596802309233, + "loss": 1.5346, + "step": 636500 + }, + { + "epoch": 17.88, + "learning_rate": 0.00012146538958969841, + "loss": 1.5747, + "step": 637000 + }, + { + "epoch": 17.89, + "learning_rate": 0.00012132481115630447, + "loss": 1.5572, + "step": 637500 + }, + { + "epoch": 17.9, + "learning_rate": 0.00012118423272291052, + "loss": 1.5445, + "step": 638000 + }, + { + "epoch": 17.92, + "learning_rate": 0.00012104365428951658, + "loss": 1.5555, + "step": 638500 + }, + { + "epoch": 17.93, + "learning_rate": 0.00012090307585612264, + "loss": 1.5335, + "step": 639000 + }, + { + "epoch": 17.95, + "learning_rate": 0.00012076249742272872, + "loss": 1.5836, + "step": 639500 + }, + { + "epoch": 17.96, + "learning_rate": 0.00012062191898933477, + "loss": 1.5538, + "step": 640000 + }, + { + "epoch": 17.97, + "learning_rate": 0.00012048134055594083, + "loss": 1.5627, + "step": 640500 + }, + { + "epoch": 17.99, + "learning_rate": 0.00012034076212254689, + "loss": 1.5717, + "step": 641000 + }, + { + "epoch": 18.0, + "learning_rate": 0.00012020018368915297, + "loss": 1.5152, + "step": 641500 + }, + { + "epoch": 18.02, + "learning_rate": 0.00012005960525575902, + "loss": 1.4885, + "step": 642000 + }, + { + "epoch": 18.03, + "learning_rate": 0.00011991902682236508, + "loss": 1.4731, + "step": 642500 + }, + { + "epoch": 18.04, + "learning_rate": 0.00011977844838897114, + "loss": 1.5073, + "step": 643000 + }, + { + "epoch": 18.06, + "learning_rate": 0.00011963786995557721, + "loss": 1.4917, + "step": 643500 + }, + { + "epoch": 18.07, + "learning_rate": 0.00011949729152218326, + "loss": 1.4906, + "step": 644000 + }, + { + "epoch": 18.09, + "learning_rate": 0.00011935671308878932, + "loss": 1.5095, + "step": 644500 + }, + { + "epoch": 18.1, + "learning_rate": 0.00011921613465539539, + "loss": 1.4915, + "step": 645000 + }, + { + "epoch": 18.11, + "learning_rate": 0.00011907555622200146, + "loss": 1.5024, + "step": 645500 + }, + { + "epoch": 18.13, + "learning_rate": 0.00011893497778860751, + "loss": 1.5251, + "step": 646000 + }, + { + "epoch": 18.14, + "learning_rate": 0.00011879439935521357, + "loss": 1.4836, + "step": 646500 + }, + { + "epoch": 18.16, + "learning_rate": 0.00011865382092181963, + "loss": 1.5216, + "step": 647000 + }, + { + "epoch": 18.17, + "learning_rate": 0.0001185132424884257, + "loss": 1.4953, + "step": 647500 + }, + { + "epoch": 18.18, + "learning_rate": 0.00011837266405503177, + "loss": 1.528, + "step": 648000 + }, + { + "epoch": 18.2, + "learning_rate": 0.00011823208562163782, + "loss": 1.5212, + "step": 648500 + }, + { + "epoch": 18.21, + "learning_rate": 0.00011809150718824388, + "loss": 1.5065, + "step": 649000 + }, + { + "epoch": 18.23, + "learning_rate": 0.00011795092875484994, + "loss": 1.5281, + "step": 649500 + }, + { + "epoch": 18.24, + "learning_rate": 0.00011781035032145602, + "loss": 1.5104, + "step": 650000 + }, + { + "epoch": 18.25, + "learning_rate": 0.00011766977188806207, + "loss": 1.4646, + "step": 650500 + }, + { + "epoch": 18.27, + "learning_rate": 0.00011752919345466813, + "loss": 1.4751, + "step": 651000 + }, + { + "epoch": 18.28, + "learning_rate": 0.00011738861502127419, + "loss": 1.5132, + "step": 651500 + }, + { + "epoch": 18.3, + "learning_rate": 0.00011724803658788027, + "loss": 1.4984, + "step": 652000 + }, + { + "epoch": 18.31, + "learning_rate": 0.00011710745815448632, + "loss": 1.4835, + "step": 652500 + }, + { + "epoch": 18.32, + "learning_rate": 0.00011696687972109238, + "loss": 1.5207, + "step": 653000 + }, + { + "epoch": 18.34, + "learning_rate": 0.00011682630128769844, + "loss": 1.4979, + "step": 653500 + }, + { + "epoch": 18.35, + "learning_rate": 0.00011668572285430452, + "loss": 1.5274, + "step": 654000 + }, + { + "epoch": 18.37, + "learning_rate": 0.00011654514442091056, + "loss": 1.5101, + "step": 654500 + }, + { + "epoch": 18.38, + "learning_rate": 0.00011640456598751663, + "loss": 1.5121, + "step": 655000 + }, + { + "epoch": 18.4, + "learning_rate": 0.00011626398755412269, + "loss": 1.5177, + "step": 655500 + }, + { + "epoch": 18.41, + "learning_rate": 0.00011612340912072875, + "loss": 1.4977, + "step": 656000 + }, + { + "epoch": 18.42, + "learning_rate": 0.00011598283068733481, + "loss": 1.5221, + "step": 656500 + }, + { + "epoch": 18.44, + "learning_rate": 0.00011584225225394087, + "loss": 1.5066, + "step": 657000 + }, + { + "epoch": 18.45, + "learning_rate": 0.00011570167382054694, + "loss": 1.4997, + "step": 657500 + }, + { + "epoch": 18.47, + "learning_rate": 0.000115561095387153, + "loss": 1.5211, + "step": 658000 + }, + { + "epoch": 18.48, + "learning_rate": 0.00011542051695375905, + "loss": 1.5144, + "step": 658500 + }, + { + "epoch": 18.49, + "learning_rate": 0.00011527993852036512, + "loss": 1.4994, + "step": 659000 + }, + { + "epoch": 18.51, + "learning_rate": 0.00011513936008697118, + "loss": 1.5215, + "step": 659500 + }, + { + "epoch": 18.52, + "learning_rate": 0.00011499878165357724, + "loss": 1.5035, + "step": 660000 + }, + { + "epoch": 18.54, + "learning_rate": 0.00011485820322018329, + "loss": 1.5063, + "step": 660500 + }, + { + "epoch": 18.55, + "learning_rate": 0.00011471762478678937, + "loss": 1.4783, + "step": 661000 + }, + { + "epoch": 18.56, + "learning_rate": 0.00011457704635339543, + "loss": 1.4942, + "step": 661500 + }, + { + "epoch": 18.58, + "learning_rate": 0.00011443646792000149, + "loss": 1.528, + "step": 662000 + }, + { + "epoch": 18.59, + "learning_rate": 0.00011429588948660754, + "loss": 1.5122, + "step": 662500 + }, + { + "epoch": 18.61, + "learning_rate": 0.00011415531105321362, + "loss": 1.5361, + "step": 663000 + }, + { + "epoch": 18.62, + "learning_rate": 0.00011401473261981968, + "loss": 1.5162, + "step": 663500 + }, + { + "epoch": 18.63, + "learning_rate": 0.00011387415418642574, + "loss": 1.4806, + "step": 664000 + }, + { + "epoch": 18.65, + "learning_rate": 0.00011373357575303179, + "loss": 1.496, + "step": 664500 + }, + { + "epoch": 18.66, + "learning_rate": 0.00011359299731963786, + "loss": 1.5381, + "step": 665000 + }, + { + "epoch": 18.68, + "learning_rate": 0.00011345241888624393, + "loss": 1.5123, + "step": 665500 + }, + { + "epoch": 18.69, + "learning_rate": 0.00011331184045284999, + "loss": 1.5367, + "step": 666000 + }, + { + "epoch": 18.7, + "learning_rate": 0.00011317126201945604, + "loss": 1.5061, + "step": 666500 + }, + { + "epoch": 18.72, + "learning_rate": 0.0001130306835860621, + "loss": 1.4939, + "step": 667000 + }, + { + "epoch": 18.73, + "learning_rate": 0.00011289010515266817, + "loss": 1.5094, + "step": 667500 + }, + { + "epoch": 18.75, + "learning_rate": 0.00011274952671927424, + "loss": 1.5351, + "step": 668000 + }, + { + "epoch": 18.76, + "learning_rate": 0.00011260894828588028, + "loss": 1.5246, + "step": 668500 + }, + { + "epoch": 18.77, + "learning_rate": 0.00011246836985248635, + "loss": 1.5233, + "step": 669000 + }, + { + "epoch": 18.79, + "learning_rate": 0.00011232779141909242, + "loss": 1.5041, + "step": 669500 + }, + { + "epoch": 18.8, + "learning_rate": 0.00011218721298569848, + "loss": 1.5076, + "step": 670000 + }, + { + "epoch": 18.82, + "learning_rate": 0.00011204663455230453, + "loss": 1.5281, + "step": 670500 + }, + { + "epoch": 18.83, + "learning_rate": 0.0001119060561189106, + "loss": 1.5359, + "step": 671000 + }, + { + "epoch": 18.84, + "learning_rate": 0.00011176547768551667, + "loss": 1.5241, + "step": 671500 + }, + { + "epoch": 18.86, + "learning_rate": 0.00011162489925212273, + "loss": 1.5, + "step": 672000 + }, + { + "epoch": 18.87, + "learning_rate": 0.00011148432081872878, + "loss": 1.4874, + "step": 672500 + }, + { + "epoch": 18.89, + "learning_rate": 0.00011134374238533484, + "loss": 1.5024, + "step": 673000 + }, + { + "epoch": 18.9, + "learning_rate": 0.00011120316395194092, + "loss": 1.5587, + "step": 673500 + }, + { + "epoch": 18.91, + "learning_rate": 0.00011106258551854698, + "loss": 1.5569, + "step": 674000 + }, + { + "epoch": 18.93, + "learning_rate": 0.00011092200708515303, + "loss": 1.5419, + "step": 674500 + }, + { + "epoch": 18.94, + "learning_rate": 0.00011078142865175909, + "loss": 1.4949, + "step": 675000 + }, + { + "epoch": 18.96, + "learning_rate": 0.00011064085021836515, + "loss": 1.5332, + "step": 675500 + }, + { + "epoch": 18.97, + "learning_rate": 0.00011050027178497123, + "loss": 1.5137, + "step": 676000 + }, + { + "epoch": 18.98, + "learning_rate": 0.00011035969335157729, + "loss": 1.537, + "step": 676500 + }, + { + "epoch": 19.0, + "learning_rate": 0.00011021911491818334, + "loss": 1.5142, + "step": 677000 + }, + { + "epoch": 19.01, + "learning_rate": 0.0001100785364847894, + "loss": 1.4425, + "step": 677500 + }, + { + "epoch": 19.03, + "learning_rate": 0.00010993795805139547, + "loss": 1.4679, + "step": 678000 + }, + { + "epoch": 19.04, + "learning_rate": 0.00010979737961800154, + "loss": 1.441, + "step": 678500 + }, + { + "epoch": 19.05, + "learning_rate": 0.00010965680118460758, + "loss": 1.4367, + "step": 679000 + }, + { + "epoch": 19.07, + "learning_rate": 0.00010951622275121365, + "loss": 1.4158, + "step": 679500 + }, + { + "epoch": 19.08, + "learning_rate": 0.00010937564431781972, + "loss": 1.467, + "step": 680000 + }, + { + "epoch": 19.1, + "learning_rate": 0.00010923506588442578, + "loss": 1.4936, + "step": 680500 + }, + { + "epoch": 19.11, + "learning_rate": 0.00010909448745103183, + "loss": 1.4517, + "step": 681000 + }, + { + "epoch": 19.12, + "learning_rate": 0.0001089539090176379, + "loss": 1.4532, + "step": 681500 + }, + { + "epoch": 19.14, + "learning_rate": 0.00010881333058424396, + "loss": 1.4788, + "step": 682000 + }, + { + "epoch": 19.15, + "learning_rate": 0.00010867275215085003, + "loss": 1.4477, + "step": 682500 + }, + { + "epoch": 19.17, + "learning_rate": 0.00010853217371745608, + "loss": 1.4405, + "step": 683000 + }, + { + "epoch": 19.18, + "learning_rate": 0.00010839159528406214, + "loss": 1.4977, + "step": 683500 + }, + { + "epoch": 19.19, + "learning_rate": 0.0001082510168506682, + "loss": 1.4801, + "step": 684000 + }, + { + "epoch": 19.21, + "learning_rate": 0.00010811043841727428, + "loss": 1.4558, + "step": 684500 + }, + { + "epoch": 19.22, + "learning_rate": 0.00010796985998388033, + "loss": 1.4634, + "step": 685000 + }, + { + "epoch": 19.24, + "learning_rate": 0.00010782928155048639, + "loss": 1.451, + "step": 685500 + }, + { + "epoch": 19.25, + "learning_rate": 0.00010768870311709245, + "loss": 1.4647, + "step": 686000 + }, + { + "epoch": 19.27, + "learning_rate": 0.00010754812468369853, + "loss": 1.458, + "step": 686500 + }, + { + "epoch": 19.28, + "learning_rate": 0.00010740754625030458, + "loss": 1.4842, + "step": 687000 + }, + { + "epoch": 19.29, + "learning_rate": 0.00010726696781691064, + "loss": 1.4794, + "step": 687500 + }, + { + "epoch": 19.31, + "learning_rate": 0.0001071263893835167, + "loss": 1.4627, + "step": 688000 + }, + { + "epoch": 19.32, + "learning_rate": 0.00010698581095012277, + "loss": 1.4567, + "step": 688500 + }, + { + "epoch": 19.34, + "learning_rate": 0.00010684523251672882, + "loss": 1.4916, + "step": 689000 + }, + { + "epoch": 19.35, + "learning_rate": 0.00010670465408333489, + "loss": 1.4865, + "step": 689500 + }, + { + "epoch": 19.36, + "learning_rate": 0.00010656407564994095, + "loss": 1.4926, + "step": 690000 + }, + { + "epoch": 19.38, + "learning_rate": 0.00010642349721654701, + "loss": 1.4923, + "step": 690500 + }, + { + "epoch": 19.39, + "learning_rate": 0.00010628291878315307, + "loss": 1.4813, + "step": 691000 + }, + { + "epoch": 19.41, + "learning_rate": 0.00010614234034975913, + "loss": 1.4633, + "step": 691500 + }, + { + "epoch": 19.42, + "learning_rate": 0.0001060017619163652, + "loss": 1.4653, + "step": 692000 + }, + { + "epoch": 19.43, + "learning_rate": 0.00010586118348297126, + "loss": 1.4518, + "step": 692500 + }, + { + "epoch": 19.45, + "learning_rate": 0.0001057206050495773, + "loss": 1.4856, + "step": 693000 + }, + { + "epoch": 19.46, + "learning_rate": 0.00010558002661618338, + "loss": 1.4646, + "step": 693500 + }, + { + "epoch": 19.48, + "learning_rate": 0.00010543944818278944, + "loss": 1.4714, + "step": 694000 + }, + { + "epoch": 19.49, + "learning_rate": 0.0001052988697493955, + "loss": 1.4602, + "step": 694500 + }, + { + "epoch": 19.5, + "learning_rate": 0.00010515829131600155, + "loss": 1.4555, + "step": 695000 + }, + { + "epoch": 19.52, + "learning_rate": 0.00010501771288260763, + "loss": 1.4647, + "step": 695500 + }, + { + "epoch": 19.53, + "learning_rate": 0.00010487713444921369, + "loss": 1.4686, + "step": 696000 + }, + { + "epoch": 19.55, + "learning_rate": 0.00010473655601581975, + "loss": 1.4484, + "step": 696500 + }, + { + "epoch": 19.56, + "learning_rate": 0.0001045959775824258, + "loss": 1.488, + "step": 697000 + }, + { + "epoch": 19.57, + "learning_rate": 0.00010445539914903188, + "loss": 1.4916, + "step": 697500 + }, + { + "epoch": 19.59, + "learning_rate": 0.00010431482071563794, + "loss": 1.4915, + "step": 698000 + }, + { + "epoch": 19.6, + "learning_rate": 0.000104174242282244, + "loss": 1.4803, + "step": 698500 + }, + { + "epoch": 19.62, + "learning_rate": 0.00010403366384885005, + "loss": 1.5062, + "step": 699000 + }, + { + "epoch": 19.63, + "learning_rate": 0.00010389308541545612, + "loss": 1.4809, + "step": 699500 + }, + { + "epoch": 19.64, + "learning_rate": 0.00010375250698206219, + "loss": 1.4766, + "step": 700000 + }, + { + "epoch": 19.66, + "learning_rate": 0.00010361192854866825, + "loss": 1.4997, + "step": 700500 + }, + { + "epoch": 19.67, + "learning_rate": 0.0001034713501152743, + "loss": 1.4345, + "step": 701000 + }, + { + "epoch": 19.69, + "learning_rate": 0.00010333077168188036, + "loss": 1.4837, + "step": 701500 + }, + { + "epoch": 19.7, + "learning_rate": 0.00010319019324848643, + "loss": 1.5083, + "step": 702000 + }, + { + "epoch": 19.71, + "learning_rate": 0.0001030496148150925, + "loss": 1.4603, + "step": 702500 + }, + { + "epoch": 19.73, + "learning_rate": 0.00010290903638169854, + "loss": 1.52, + "step": 703000 + }, + { + "epoch": 19.74, + "learning_rate": 0.0001027684579483046, + "loss": 1.4754, + "step": 703500 + }, + { + "epoch": 19.76, + "learning_rate": 0.00010262787951491068, + "loss": 1.4833, + "step": 704000 + }, + { + "epoch": 19.77, + "learning_rate": 0.00010248730108151674, + "loss": 1.484, + "step": 704500 + }, + { + "epoch": 19.78, + "learning_rate": 0.0001023467226481228, + "loss": 1.4821, + "step": 705000 + }, + { + "epoch": 19.8, + "learning_rate": 0.00010220614421472885, + "loss": 1.504, + "step": 705500 + }, + { + "epoch": 19.81, + "learning_rate": 0.00010206556578133493, + "loss": 1.4986, + "step": 706000 + }, + { + "epoch": 19.83, + "learning_rate": 0.00010192498734794099, + "loss": 1.5064, + "step": 706500 + }, + { + "epoch": 19.84, + "learning_rate": 0.00010178440891454705, + "loss": 1.4506, + "step": 707000 + }, + { + "epoch": 19.85, + "learning_rate": 0.0001016438304811531, + "loss": 1.4618, + "step": 707500 + }, + { + "epoch": 19.87, + "learning_rate": 0.00010150325204775918, + "loss": 1.4656, + "step": 708000 + }, + { + "epoch": 19.88, + "learning_rate": 0.00010136267361436524, + "loss": 1.477, + "step": 708500 + }, + { + "epoch": 19.9, + "learning_rate": 0.0001012220951809713, + "loss": 1.4651, + "step": 709000 + }, + { + "epoch": 19.91, + "learning_rate": 0.00010108151674757735, + "loss": 1.484, + "step": 709500 + }, + { + "epoch": 19.92, + "learning_rate": 0.00010094093831418341, + "loss": 1.4566, + "step": 710000 + }, + { + "epoch": 19.94, + "learning_rate": 0.00010080035988078949, + "loss": 1.48, + "step": 710500 + }, + { + "epoch": 19.95, + "learning_rate": 0.00010065978144739555, + "loss": 1.484, + "step": 711000 + }, + { + "epoch": 19.97, + "learning_rate": 0.0001005192030140016, + "loss": 1.4938, + "step": 711500 + }, + { + "epoch": 19.98, + "learning_rate": 0.00010037862458060766, + "loss": 1.4896, + "step": 712000 + }, + { + "epoch": 19.99, + "learning_rate": 0.00010023804614721373, + "loss": 1.4827, + "step": 712500 + }, + { + "epoch": 20.01, + "learning_rate": 0.0001000974677138198, + "loss": 1.4364, + "step": 713000 + }, + { + "epoch": 20.02, + "learning_rate": 9.995688928042584e-05, + "loss": 1.4643, + "step": 713500 + }, + { + "epoch": 20.04, + "learning_rate": 9.98163108470319e-05, + "loss": 1.4105, + "step": 714000 + }, + { + "epoch": 20.05, + "learning_rate": 9.967573241363798e-05, + "loss": 1.4383, + "step": 714500 + }, + { + "epoch": 20.06, + "learning_rate": 9.953515398024404e-05, + "loss": 1.4317, + "step": 715000 + }, + { + "epoch": 20.08, + "learning_rate": 9.939457554685009e-05, + "loss": 1.4217, + "step": 715500 + }, + { + "epoch": 20.09, + "learning_rate": 9.925399711345615e-05, + "loss": 1.4363, + "step": 716000 + }, + { + "epoch": 20.11, + "learning_rate": 9.911341868006223e-05, + "loss": 1.4337, + "step": 716500 + }, + { + "epoch": 20.12, + "learning_rate": 9.897284024666829e-05, + "loss": 1.4153, + "step": 717000 + }, + { + "epoch": 20.13, + "learning_rate": 9.883226181327434e-05, + "loss": 1.4394, + "step": 717500 + }, + { + "epoch": 20.15, + "learning_rate": 9.86916833798804e-05, + "loss": 1.4216, + "step": 718000 + }, + { + "epoch": 20.16, + "learning_rate": 9.855110494648646e-05, + "loss": 1.4515, + "step": 718500 + }, + { + "epoch": 20.18, + "learning_rate": 9.841052651309254e-05, + "loss": 1.4232, + "step": 719000 + }, + { + "epoch": 20.19, + "learning_rate": 9.826994807969859e-05, + "loss": 1.4117, + "step": 719500 + }, + { + "epoch": 20.21, + "learning_rate": 9.812936964630465e-05, + "loss": 1.4224, + "step": 720000 + }, + { + "epoch": 20.22, + "learning_rate": 9.798879121291071e-05, + "loss": 1.426, + "step": 720500 + }, + { + "epoch": 20.23, + "learning_rate": 9.784821277951679e-05, + "loss": 1.4179, + "step": 721000 + }, + { + "epoch": 20.25, + "learning_rate": 9.770763434612284e-05, + "loss": 1.4248, + "step": 721500 + }, + { + "epoch": 20.26, + "learning_rate": 9.75670559127289e-05, + "loss": 1.4219, + "step": 722000 + }, + { + "epoch": 20.28, + "learning_rate": 9.742647747933496e-05, + "loss": 1.4071, + "step": 722500 + }, + { + "epoch": 20.29, + "learning_rate": 9.728589904594103e-05, + "loss": 1.4151, + "step": 723000 + }, + { + "epoch": 20.3, + "learning_rate": 9.714532061254708e-05, + "loss": 1.4235, + "step": 723500 + }, + { + "epoch": 20.32, + "learning_rate": 9.700474217915314e-05, + "loss": 1.4523, + "step": 724000 + }, + { + "epoch": 20.33, + "learning_rate": 9.68641637457592e-05, + "loss": 1.4305, + "step": 724500 + }, + { + "epoch": 20.35, + "learning_rate": 9.672358531236528e-05, + "loss": 1.4417, + "step": 725000 + }, + { + "epoch": 20.36, + "learning_rate": 9.658300687897133e-05, + "loss": 1.4372, + "step": 725500 + }, + { + "epoch": 20.37, + "learning_rate": 9.644242844557739e-05, + "loss": 1.4423, + "step": 726000 + }, + { + "epoch": 20.39, + "learning_rate": 9.630185001218345e-05, + "loss": 1.428, + "step": 726500 + }, + { + "epoch": 20.4, + "learning_rate": 9.616127157878952e-05, + "loss": 1.4481, + "step": 727000 + }, + { + "epoch": 20.42, + "learning_rate": 9.602069314539558e-05, + "loss": 1.4533, + "step": 727500 + }, + { + "epoch": 20.43, + "learning_rate": 9.588011471200164e-05, + "loss": 1.4555, + "step": 728000 + }, + { + "epoch": 20.44, + "learning_rate": 9.57395362786077e-05, + "loss": 1.4279, + "step": 728500 + }, + { + "epoch": 20.46, + "learning_rate": 9.559895784521376e-05, + "loss": 1.4715, + "step": 729000 + }, + { + "epoch": 20.47, + "learning_rate": 9.545837941181981e-05, + "loss": 1.4165, + "step": 729500 + }, + { + "epoch": 20.49, + "learning_rate": 9.531780097842589e-05, + "loss": 1.4176, + "step": 730000 + }, + { + "epoch": 20.5, + "learning_rate": 9.517722254503195e-05, + "loss": 1.4379, + "step": 730500 + }, + { + "epoch": 20.51, + "learning_rate": 9.503664411163801e-05, + "loss": 1.4531, + "step": 731000 + }, + { + "epoch": 20.53, + "learning_rate": 9.489606567824406e-05, + "loss": 1.4247, + "step": 731500 + }, + { + "epoch": 20.54, + "learning_rate": 9.475548724485014e-05, + "loss": 1.4398, + "step": 732000 + }, + { + "epoch": 20.56, + "learning_rate": 9.46149088114562e-05, + "loss": 1.4422, + "step": 732500 + }, + { + "epoch": 20.57, + "learning_rate": 9.447433037806226e-05, + "loss": 1.4273, + "step": 733000 + }, + { + "epoch": 20.58, + "learning_rate": 9.433375194466831e-05, + "loss": 1.4482, + "step": 733500 + }, + { + "epoch": 20.6, + "learning_rate": 9.419317351127438e-05, + "loss": 1.445, + "step": 734000 + }, + { + "epoch": 20.61, + "learning_rate": 9.405259507788045e-05, + "loss": 1.4507, + "step": 734500 + }, + { + "epoch": 20.63, + "learning_rate": 9.391201664448651e-05, + "loss": 1.4147, + "step": 735000 + }, + { + "epoch": 20.64, + "learning_rate": 9.377143821109257e-05, + "loss": 1.4329, + "step": 735500 + }, + { + "epoch": 20.65, + "learning_rate": 9.363085977769863e-05, + "loss": 1.4602, + "step": 736000 + }, + { + "epoch": 20.67, + "learning_rate": 9.349028134430469e-05, + "loss": 1.4706, + "step": 736500 + }, + { + "epoch": 20.68, + "learning_rate": 9.334970291091076e-05, + "loss": 1.4183, + "step": 737000 + }, + { + "epoch": 20.7, + "learning_rate": 9.320912447751682e-05, + "loss": 1.4342, + "step": 737500 + }, + { + "epoch": 20.71, + "learning_rate": 9.306854604412287e-05, + "loss": 1.4291, + "step": 738000 + }, + { + "epoch": 20.72, + "learning_rate": 9.292796761072894e-05, + "loss": 1.4312, + "step": 738500 + }, + { + "epoch": 20.74, + "learning_rate": 9.2787389177335e-05, + "loss": 1.4287, + "step": 739000 + }, + { + "epoch": 20.75, + "learning_rate": 9.264681074394106e-05, + "loss": 1.4421, + "step": 739500 + }, + { + "epoch": 20.77, + "learning_rate": 9.250623231054711e-05, + "loss": 1.4204, + "step": 740000 + }, + { + "epoch": 20.78, + "learning_rate": 9.236565387715319e-05, + "loss": 1.4279, + "step": 740500 + }, + { + "epoch": 20.79, + "learning_rate": 9.222507544375925e-05, + "loss": 1.4129, + "step": 741000 + }, + { + "epoch": 20.81, + "learning_rate": 9.208449701036531e-05, + "loss": 1.4563, + "step": 741500 + }, + { + "epoch": 20.82, + "learning_rate": 9.194391857697136e-05, + "loss": 1.437, + "step": 742000 + }, + { + "epoch": 20.84, + "learning_rate": 9.180334014357744e-05, + "loss": 1.4574, + "step": 742500 + }, + { + "epoch": 20.85, + "learning_rate": 9.16627617101835e-05, + "loss": 1.4312, + "step": 743000 + }, + { + "epoch": 20.86, + "learning_rate": 9.152218327678956e-05, + "loss": 1.4421, + "step": 743500 + }, + { + "epoch": 20.88, + "learning_rate": 9.138160484339561e-05, + "loss": 1.4347, + "step": 744000 + }, + { + "epoch": 20.89, + "learning_rate": 9.124102641000168e-05, + "loss": 1.4635, + "step": 744500 + }, + { + "epoch": 20.91, + "learning_rate": 9.110044797660775e-05, + "loss": 1.4754, + "step": 745000 + }, + { + "epoch": 20.92, + "learning_rate": 9.095986954321381e-05, + "loss": 1.4191, + "step": 745500 + }, + { + "epoch": 20.93, + "learning_rate": 9.081929110981986e-05, + "loss": 1.4343, + "step": 746000 + }, + { + "epoch": 20.95, + "learning_rate": 9.067871267642592e-05, + "loss": 1.4699, + "step": 746500 + }, + { + "epoch": 20.96, + "learning_rate": 9.0538134243032e-05, + "loss": 1.4495, + "step": 747000 + }, + { + "epoch": 20.98, + "learning_rate": 9.039755580963806e-05, + "loss": 1.4427, + "step": 747500 + }, + { + "epoch": 20.99, + "learning_rate": 9.02569773762441e-05, + "loss": 1.4523, + "step": 748000 + }, + { + "epoch": 21.0, + "learning_rate": 9.011639894285017e-05, + "loss": 1.4197, + "step": 748500 + }, + { + "epoch": 21.02, + "learning_rate": 8.997582050945624e-05, + "loss": 1.4132, + "step": 749000 + }, + { + "epoch": 21.03, + "learning_rate": 8.98352420760623e-05, + "loss": 1.3953, + "step": 749500 + }, + { + "epoch": 21.05, + "learning_rate": 8.969466364266835e-05, + "loss": 1.4112, + "step": 750000 + }, + { + "epoch": 21.06, + "learning_rate": 8.955408520927441e-05, + "loss": 1.3864, + "step": 750500 + }, + { + "epoch": 21.08, + "learning_rate": 8.941350677588049e-05, + "loss": 1.3912, + "step": 751000 + }, + { + "epoch": 21.09, + "learning_rate": 8.927292834248655e-05, + "loss": 1.3867, + "step": 751500 + }, + { + "epoch": 21.1, + "learning_rate": 8.91323499090926e-05, + "loss": 1.4091, + "step": 752000 + }, + { + "epoch": 21.12, + "learning_rate": 8.899177147569866e-05, + "loss": 1.4045, + "step": 752500 + }, + { + "epoch": 21.13, + "learning_rate": 8.885119304230474e-05, + "loss": 1.4034, + "step": 753000 + }, + { + "epoch": 21.15, + "learning_rate": 8.87106146089108e-05, + "loss": 1.3951, + "step": 753500 + }, + { + "epoch": 21.16, + "learning_rate": 8.857003617551685e-05, + "loss": 1.397, + "step": 754000 + }, + { + "epoch": 21.17, + "learning_rate": 8.842945774212291e-05, + "loss": 1.383, + "step": 754500 + }, + { + "epoch": 21.19, + "learning_rate": 8.828887930872897e-05, + "loss": 1.4052, + "step": 755000 + }, + { + "epoch": 21.2, + "learning_rate": 8.814830087533505e-05, + "loss": 1.405, + "step": 755500 + }, + { + "epoch": 21.22, + "learning_rate": 8.80077224419411e-05, + "loss": 1.4064, + "step": 756000 + }, + { + "epoch": 21.23, + "learning_rate": 8.786714400854716e-05, + "loss": 1.413, + "step": 756500 + }, + { + "epoch": 21.24, + "learning_rate": 8.772656557515322e-05, + "loss": 1.3881, + "step": 757000 + }, + { + "epoch": 21.26, + "learning_rate": 8.75859871417593e-05, + "loss": 1.377, + "step": 757500 + }, + { + "epoch": 21.27, + "learning_rate": 8.744540870836534e-05, + "loss": 1.3939, + "step": 758000 + }, + { + "epoch": 21.29, + "learning_rate": 8.73048302749714e-05, + "loss": 1.4041, + "step": 758500 + }, + { + "epoch": 21.3, + "learning_rate": 8.716425184157747e-05, + "loss": 1.3923, + "step": 759000 + }, + { + "epoch": 21.31, + "learning_rate": 8.702367340818354e-05, + "loss": 1.3887, + "step": 759500 + }, + { + "epoch": 21.33, + "learning_rate": 8.688309497478959e-05, + "loss": 1.4045, + "step": 760000 + }, + { + "epoch": 21.34, + "learning_rate": 8.674251654139565e-05, + "loss": 1.3886, + "step": 760500 + }, + { + "epoch": 21.36, + "learning_rate": 8.660193810800171e-05, + "loss": 1.4225, + "step": 761000 + }, + { + "epoch": 21.37, + "learning_rate": 8.646135967460779e-05, + "loss": 1.3489, + "step": 761500 + }, + { + "epoch": 21.38, + "learning_rate": 8.632078124121384e-05, + "loss": 1.4059, + "step": 762000 + }, + { + "epoch": 21.4, + "learning_rate": 8.61802028078199e-05, + "loss": 1.3909, + "step": 762500 + }, + { + "epoch": 21.41, + "learning_rate": 8.603962437442596e-05, + "loss": 1.3911, + "step": 763000 + }, + { + "epoch": 21.43, + "learning_rate": 8.589904594103202e-05, + "loss": 1.3751, + "step": 763500 + }, + { + "epoch": 21.44, + "learning_rate": 8.57584675076381e-05, + "loss": 1.3928, + "step": 764000 + }, + { + "epoch": 21.45, + "learning_rate": 8.561788907424415e-05, + "loss": 1.4066, + "step": 764500 + }, + { + "epoch": 21.47, + "learning_rate": 8.547731064085021e-05, + "loss": 1.4075, + "step": 765000 + }, + { + "epoch": 21.48, + "learning_rate": 8.533673220745627e-05, + "loss": 1.4234, + "step": 765500 + }, + { + "epoch": 21.5, + "learning_rate": 8.519615377406235e-05, + "loss": 1.4039, + "step": 766000 + }, + { + "epoch": 21.51, + "learning_rate": 8.50555753406684e-05, + "loss": 1.3861, + "step": 766500 + }, + { + "epoch": 21.52, + "learning_rate": 8.491499690727446e-05, + "loss": 1.3816, + "step": 767000 + }, + { + "epoch": 21.54, + "learning_rate": 8.477441847388052e-05, + "loss": 1.4111, + "step": 767500 + }, + { + "epoch": 21.55, + "learning_rate": 8.46338400404866e-05, + "loss": 1.3977, + "step": 768000 + }, + { + "epoch": 21.57, + "learning_rate": 8.449326160709264e-05, + "loss": 1.3999, + "step": 768500 + }, + { + "epoch": 21.58, + "learning_rate": 8.43526831736987e-05, + "loss": 1.3902, + "step": 769000 + }, + { + "epoch": 21.59, + "learning_rate": 8.421210474030477e-05, + "loss": 1.4278, + "step": 769500 + }, + { + "epoch": 21.61, + "learning_rate": 8.407152630691084e-05, + "loss": 1.4353, + "step": 770000 + }, + { + "epoch": 21.62, + "learning_rate": 8.393094787351689e-05, + "loss": 1.4072, + "step": 770500 + }, + { + "epoch": 21.64, + "learning_rate": 8.379036944012295e-05, + "loss": 1.4325, + "step": 771000 + }, + { + "epoch": 21.65, + "learning_rate": 8.364979100672901e-05, + "loss": 1.3832, + "step": 771500 + }, + { + "epoch": 21.66, + "learning_rate": 8.350921257333508e-05, + "loss": 1.3876, + "step": 772000 + }, + { + "epoch": 21.68, + "learning_rate": 8.336863413994113e-05, + "loss": 1.3998, + "step": 772500 + }, + { + "epoch": 21.69, + "learning_rate": 8.32280557065472e-05, + "loss": 1.4081, + "step": 773000 + }, + { + "epoch": 21.71, + "learning_rate": 8.308747727315326e-05, + "loss": 1.4361, + "step": 773500 + }, + { + "epoch": 21.72, + "learning_rate": 8.294689883975932e-05, + "loss": 1.3762, + "step": 774000 + }, + { + "epoch": 21.73, + "learning_rate": 8.280632040636537e-05, + "loss": 1.4322, + "step": 774500 + }, + { + "epoch": 21.75, + "learning_rate": 8.266574197297145e-05, + "loss": 1.3878, + "step": 775000 + }, + { + "epoch": 21.76, + "learning_rate": 8.252516353957751e-05, + "loss": 1.3681, + "step": 775500 + }, + { + "epoch": 21.78, + "learning_rate": 8.238458510618357e-05, + "loss": 1.4005, + "step": 776000 + }, + { + "epoch": 21.79, + "learning_rate": 8.224400667278962e-05, + "loss": 1.424, + "step": 776500 + }, + { + "epoch": 21.8, + "learning_rate": 8.21034282393957e-05, + "loss": 1.4016, + "step": 777000 + }, + { + "epoch": 21.82, + "learning_rate": 8.196284980600176e-05, + "loss": 1.3916, + "step": 777500 + }, + { + "epoch": 21.83, + "learning_rate": 8.182227137260782e-05, + "loss": 1.449, + "step": 778000 + }, + { + "epoch": 21.85, + "learning_rate": 8.168169293921387e-05, + "loss": 1.4048, + "step": 778500 + }, + { + "epoch": 21.86, + "learning_rate": 8.154111450581994e-05, + "loss": 1.4209, + "step": 779000 + }, + { + "epoch": 21.87, + "learning_rate": 8.1400536072426e-05, + "loss": 1.4021, + "step": 779500 + }, + { + "epoch": 21.89, + "learning_rate": 8.125995763903207e-05, + "loss": 1.3813, + "step": 780000 + }, + { + "epoch": 21.9, + "learning_rate": 8.111937920563812e-05, + "loss": 1.3991, + "step": 780500 + }, + { + "epoch": 21.92, + "learning_rate": 8.097880077224418e-05, + "loss": 1.3895, + "step": 781000 + }, + { + "epoch": 21.93, + "learning_rate": 8.083822233885025e-05, + "loss": 1.383, + "step": 781500 + }, + { + "epoch": 21.95, + "learning_rate": 8.069764390545632e-05, + "loss": 1.3922, + "step": 782000 + }, + { + "epoch": 21.96, + "learning_rate": 8.055706547206236e-05, + "loss": 1.3866, + "step": 782500 + }, + { + "epoch": 21.97, + "learning_rate": 8.041648703866843e-05, + "loss": 1.404, + "step": 783000 + }, + { + "epoch": 21.99, + "learning_rate": 8.02759086052745e-05, + "loss": 1.3895, + "step": 783500 + }, + { + "epoch": 22.0, + "learning_rate": 8.013533017188056e-05, + "loss": 1.4153, + "step": 784000 + }, + { + "epoch": 22.02, + "learning_rate": 7.999475173848661e-05, + "loss": 1.339, + "step": 784500 + }, + { + "epoch": 22.03, + "learning_rate": 7.985417330509267e-05, + "loss": 1.3512, + "step": 785000 + }, + { + "epoch": 22.04, + "learning_rate": 7.971359487169875e-05, + "loss": 1.3543, + "step": 785500 + }, + { + "epoch": 22.06, + "learning_rate": 7.957301643830481e-05, + "loss": 1.3823, + "step": 786000 + }, + { + "epoch": 22.07, + "learning_rate": 7.943243800491086e-05, + "loss": 1.3589, + "step": 786500 + }, + { + "epoch": 22.09, + "learning_rate": 7.929185957151692e-05, + "loss": 1.3661, + "step": 787000 + }, + { + "epoch": 22.1, + "learning_rate": 7.9151281138123e-05, + "loss": 1.3565, + "step": 787500 + }, + { + "epoch": 22.11, + "learning_rate": 7.901070270472906e-05, + "loss": 1.3667, + "step": 788000 + }, + { + "epoch": 22.13, + "learning_rate": 7.887012427133511e-05, + "loss": 1.3758, + "step": 788500 + }, + { + "epoch": 22.14, + "learning_rate": 7.872954583794117e-05, + "loss": 1.3565, + "step": 789000 + }, + { + "epoch": 22.16, + "learning_rate": 7.858896740454723e-05, + "loss": 1.366, + "step": 789500 + }, + { + "epoch": 22.17, + "learning_rate": 7.84483889711533e-05, + "loss": 1.3763, + "step": 790000 + }, + { + "epoch": 22.18, + "learning_rate": 7.830781053775935e-05, + "loss": 1.3381, + "step": 790500 + }, + { + "epoch": 22.2, + "learning_rate": 7.816723210436542e-05, + "loss": 1.3546, + "step": 791000 + }, + { + "epoch": 22.21, + "learning_rate": 7.802665367097148e-05, + "loss": 1.3326, + "step": 791500 + }, + { + "epoch": 22.23, + "learning_rate": 7.788607523757755e-05, + "loss": 1.3528, + "step": 792000 + }, + { + "epoch": 22.24, + "learning_rate": 7.77454968041836e-05, + "loss": 1.3642, + "step": 792500 + }, + { + "epoch": 22.25, + "learning_rate": 7.760491837078966e-05, + "loss": 1.3849, + "step": 793000 + }, + { + "epoch": 22.27, + "learning_rate": 7.746433993739573e-05, + "loss": 1.3709, + "step": 793500 + }, + { + "epoch": 22.28, + "learning_rate": 7.73237615040018e-05, + "loss": 1.3701, + "step": 794000 + }, + { + "epoch": 22.3, + "learning_rate": 7.718318307060786e-05, + "loss": 1.3805, + "step": 794500 + }, + { + "epoch": 22.31, + "learning_rate": 7.704260463721391e-05, + "loss": 1.3518, + "step": 795000 + }, + { + "epoch": 22.32, + "learning_rate": 7.690202620381997e-05, + "loss": 1.3907, + "step": 795500 + }, + { + "epoch": 22.34, + "learning_rate": 7.676144777042605e-05, + "loss": 1.3648, + "step": 796000 + }, + { + "epoch": 22.35, + "learning_rate": 7.662086933703211e-05, + "loss": 1.3384, + "step": 796500 + }, + { + "epoch": 22.37, + "learning_rate": 7.648029090363816e-05, + "loss": 1.3522, + "step": 797000 + }, + { + "epoch": 22.38, + "learning_rate": 7.633971247024422e-05, + "loss": 1.3454, + "step": 797500 + }, + { + "epoch": 22.39, + "learning_rate": 7.619913403685028e-05, + "loss": 1.3731, + "step": 798000 + }, + { + "epoch": 22.41, + "learning_rate": 7.605855560345636e-05, + "loss": 1.3833, + "step": 798500 + }, + { + "epoch": 22.42, + "learning_rate": 7.591797717006241e-05, + "loss": 1.3655, + "step": 799000 + }, + { + "epoch": 22.44, + "learning_rate": 7.577739873666847e-05, + "loss": 1.3855, + "step": 799500 + }, + { + "epoch": 22.45, + "learning_rate": 7.563682030327453e-05, + "loss": 1.3427, + "step": 800000 + }, + { + "epoch": 22.46, + "learning_rate": 7.54962418698806e-05, + "loss": 1.3713, + "step": 800500 + }, + { + "epoch": 22.48, + "learning_rate": 7.535566343648666e-05, + "loss": 1.3618, + "step": 801000 + }, + { + "epoch": 22.49, + "learning_rate": 7.521508500309272e-05, + "loss": 1.3967, + "step": 801500 + }, + { + "epoch": 22.51, + "learning_rate": 7.507450656969878e-05, + "loss": 1.3566, + "step": 802000 + }, + { + "epoch": 22.52, + "learning_rate": 7.493392813630484e-05, + "loss": 1.3438, + "step": 802500 + }, + { + "epoch": 22.53, + "learning_rate": 7.47933497029109e-05, + "loss": 1.37, + "step": 803000 + }, + { + "epoch": 22.55, + "learning_rate": 7.465277126951696e-05, + "loss": 1.3862, + "step": 803500 + }, + { + "epoch": 22.56, + "learning_rate": 7.451219283612303e-05, + "loss": 1.3876, + "step": 804000 + }, + { + "epoch": 22.58, + "learning_rate": 7.437161440272909e-05, + "loss": 1.3405, + "step": 804500 + }, + { + "epoch": 22.59, + "learning_rate": 7.423103596933515e-05, + "loss": 1.3513, + "step": 805000 + }, + { + "epoch": 22.6, + "learning_rate": 7.409045753594121e-05, + "loss": 1.3928, + "step": 805500 + }, + { + "epoch": 22.62, + "learning_rate": 7.394987910254727e-05, + "loss": 1.3646, + "step": 806000 + }, + { + "epoch": 22.63, + "learning_rate": 7.380930066915334e-05, + "loss": 1.3238, + "step": 806500 + }, + { + "epoch": 22.65, + "learning_rate": 7.36687222357594e-05, + "loss": 1.361, + "step": 807000 + }, + { + "epoch": 22.66, + "learning_rate": 7.352814380236546e-05, + "loss": 1.3775, + "step": 807500 + }, + { + "epoch": 22.67, + "learning_rate": 7.338756536897152e-05, + "loss": 1.3854, + "step": 808000 + }, + { + "epoch": 22.69, + "learning_rate": 7.324698693557758e-05, + "loss": 1.367, + "step": 808500 + }, + { + "epoch": 22.7, + "learning_rate": 7.310640850218365e-05, + "loss": 1.3572, + "step": 809000 + }, + { + "epoch": 22.72, + "learning_rate": 7.296583006878971e-05, + "loss": 1.3752, + "step": 809500 + }, + { + "epoch": 22.73, + "learning_rate": 7.282525163539577e-05, + "loss": 1.3821, + "step": 810000 + }, + { + "epoch": 22.74, + "learning_rate": 7.268467320200183e-05, + "loss": 1.3616, + "step": 810500 + }, + { + "epoch": 22.76, + "learning_rate": 7.25440947686079e-05, + "loss": 1.3904, + "step": 811000 + }, + { + "epoch": 22.77, + "learning_rate": 7.240351633521396e-05, + "loss": 1.3583, + "step": 811500 + }, + { + "epoch": 22.79, + "learning_rate": 7.226293790182002e-05, + "loss": 1.4081, + "step": 812000 + }, + { + "epoch": 22.8, + "learning_rate": 7.212235946842608e-05, + "loss": 1.3632, + "step": 812500 + }, + { + "epoch": 22.81, + "learning_rate": 7.198178103503214e-05, + "loss": 1.362, + "step": 813000 + }, + { + "epoch": 22.83, + "learning_rate": 7.18412026016382e-05, + "loss": 1.3824, + "step": 813500 + }, + { + "epoch": 22.84, + "learning_rate": 7.170062416824427e-05, + "loss": 1.3701, + "step": 814000 + }, + { + "epoch": 22.86, + "learning_rate": 7.156004573485033e-05, + "loss": 1.3528, + "step": 814500 + }, + { + "epoch": 22.87, + "learning_rate": 7.141946730145639e-05, + "loss": 1.3577, + "step": 815000 + }, + { + "epoch": 22.89, + "learning_rate": 7.127888886806245e-05, + "loss": 1.3679, + "step": 815500 + }, + { + "epoch": 22.9, + "learning_rate": 7.113831043466851e-05, + "loss": 1.3678, + "step": 816000 + }, + { + "epoch": 22.91, + "learning_rate": 7.099773200127456e-05, + "loss": 1.3471, + "step": 816500 + }, + { + "epoch": 22.93, + "learning_rate": 7.085715356788064e-05, + "loss": 1.377, + "step": 817000 + }, + { + "epoch": 22.94, + "learning_rate": 7.071657513448669e-05, + "loss": 1.36, + "step": 817500 + }, + { + "epoch": 22.96, + "learning_rate": 7.057599670109276e-05, + "loss": 1.3942, + "step": 818000 + }, + { + "epoch": 22.97, + "learning_rate": 7.043541826769881e-05, + "loss": 1.369, + "step": 818500 + }, + { + "epoch": 22.98, + "learning_rate": 7.029483983430488e-05, + "loss": 1.367, + "step": 819000 + }, + { + "epoch": 23.0, + "learning_rate": 7.015426140091093e-05, + "loss": 1.3907, + "step": 819500 + }, + { + "epoch": 23.01, + "learning_rate": 7.001368296751701e-05, + "loss": 1.3379, + "step": 820000 + }, + { + "epoch": 23.03, + "learning_rate": 6.987310453412306e-05, + "loss": 1.335, + "step": 820500 + }, + { + "epoch": 23.04, + "learning_rate": 6.973252610072913e-05, + "loss": 1.3061, + "step": 821000 + }, + { + "epoch": 23.05, + "learning_rate": 6.959194766733518e-05, + "loss": 1.2759, + "step": 821500 + }, + { + "epoch": 23.07, + "learning_rate": 6.945136923394126e-05, + "loss": 1.3039, + "step": 822000 + }, + { + "epoch": 23.08, + "learning_rate": 6.931079080054732e-05, + "loss": 1.329, + "step": 822500 + }, + { + "epoch": 23.1, + "learning_rate": 6.917021236715338e-05, + "loss": 1.3405, + "step": 823000 + }, + { + "epoch": 23.11, + "learning_rate": 6.902963393375944e-05, + "loss": 1.3211, + "step": 823500 + }, + { + "epoch": 23.12, + "learning_rate": 6.88890555003655e-05, + "loss": 1.3352, + "step": 824000 + }, + { + "epoch": 23.14, + "learning_rate": 6.874847706697157e-05, + "loss": 1.338, + "step": 824500 + }, + { + "epoch": 23.15, + "learning_rate": 6.860789863357761e-05, + "loss": 1.3533, + "step": 825000 + }, + { + "epoch": 23.17, + "learning_rate": 6.846732020018369e-05, + "loss": 1.324, + "step": 825500 + }, + { + "epoch": 23.18, + "learning_rate": 6.832674176678974e-05, + "loss": 1.3076, + "step": 826000 + }, + { + "epoch": 23.19, + "learning_rate": 6.818616333339581e-05, + "loss": 1.3694, + "step": 826500 + }, + { + "epoch": 23.21, + "learning_rate": 6.804558490000186e-05, + "loss": 1.3091, + "step": 827000 + }, + { + "epoch": 23.22, + "learning_rate": 6.790500646660794e-05, + "loss": 1.3354, + "step": 827500 + }, + { + "epoch": 23.24, + "learning_rate": 6.776442803321399e-05, + "loss": 1.3473, + "step": 828000 + }, + { + "epoch": 23.25, + "learning_rate": 6.762384959982006e-05, + "loss": 1.3365, + "step": 828500 + }, + { + "epoch": 23.26, + "learning_rate": 6.748327116642611e-05, + "loss": 1.3606, + "step": 829000 + }, + { + "epoch": 23.28, + "learning_rate": 6.734269273303219e-05, + "loss": 1.3394, + "step": 829500 + }, + { + "epoch": 23.29, + "learning_rate": 6.720211429963823e-05, + "loss": 1.3267, + "step": 830000 + }, + { + "epoch": 23.31, + "learning_rate": 6.706153586624431e-05, + "loss": 1.3294, + "step": 830500 + }, + { + "epoch": 23.32, + "learning_rate": 6.692095743285036e-05, + "loss": 1.3472, + "step": 831000 + }, + { + "epoch": 23.33, + "learning_rate": 6.678037899945643e-05, + "loss": 1.303, + "step": 831500 + }, + { + "epoch": 23.35, + "learning_rate": 6.663980056606248e-05, + "loss": 1.3304, + "step": 832000 + }, + { + "epoch": 23.36, + "learning_rate": 6.649922213266856e-05, + "loss": 1.3474, + "step": 832500 + }, + { + "epoch": 23.38, + "learning_rate": 6.63586436992746e-05, + "loss": 1.3662, + "step": 833000 + }, + { + "epoch": 23.39, + "learning_rate": 6.621806526588067e-05, + "loss": 1.3298, + "step": 833500 + }, + { + "epoch": 23.4, + "learning_rate": 6.607748683248673e-05, + "loss": 1.3309, + "step": 834000 + }, + { + "epoch": 23.42, + "learning_rate": 6.593690839909279e-05, + "loss": 1.3172, + "step": 834500 + }, + { + "epoch": 23.43, + "learning_rate": 6.579632996569885e-05, + "loss": 1.3243, + "step": 835000 + }, + { + "epoch": 23.45, + "learning_rate": 6.565575153230491e-05, + "loss": 1.3123, + "step": 835500 + }, + { + "epoch": 23.46, + "learning_rate": 6.551517309891098e-05, + "loss": 1.3471, + "step": 836000 + }, + { + "epoch": 23.47, + "learning_rate": 6.537459466551704e-05, + "loss": 1.3276, + "step": 836500 + }, + { + "epoch": 23.49, + "learning_rate": 6.52340162321231e-05, + "loss": 1.3227, + "step": 837000 + }, + { + "epoch": 23.5, + "learning_rate": 6.509343779872916e-05, + "loss": 1.3373, + "step": 837500 + }, + { + "epoch": 23.52, + "learning_rate": 6.495285936533522e-05, + "loss": 1.3258, + "step": 838000 + }, + { + "epoch": 23.53, + "learning_rate": 6.481228093194129e-05, + "loss": 1.3358, + "step": 838500 + }, + { + "epoch": 23.54, + "learning_rate": 6.467170249854735e-05, + "loss": 1.3238, + "step": 839000 + }, + { + "epoch": 23.56, + "learning_rate": 6.453112406515341e-05, + "loss": 1.3125, + "step": 839500 + }, + { + "epoch": 23.57, + "learning_rate": 6.439054563175947e-05, + "loss": 1.3413, + "step": 840000 + }, + { + "epoch": 23.59, + "learning_rate": 6.424996719836553e-05, + "loss": 1.331, + "step": 840500 + }, + { + "epoch": 23.6, + "learning_rate": 6.41093887649716e-05, + "loss": 1.346, + "step": 841000 + }, + { + "epoch": 23.61, + "learning_rate": 6.396881033157766e-05, + "loss": 1.3422, + "step": 841500 + }, + { + "epoch": 23.63, + "learning_rate": 6.382823189818372e-05, + "loss": 1.3346, + "step": 842000 + }, + { + "epoch": 23.64, + "learning_rate": 6.368765346478978e-05, + "loss": 1.3375, + "step": 842500 + }, + { + "epoch": 23.66, + "learning_rate": 6.354707503139584e-05, + "loss": 1.334, + "step": 843000 + }, + { + "epoch": 23.67, + "learning_rate": 6.34064965980019e-05, + "loss": 1.311, + "step": 843500 + }, + { + "epoch": 23.68, + "learning_rate": 6.326591816460797e-05, + "loss": 1.3061, + "step": 844000 + }, + { + "epoch": 23.7, + "learning_rate": 6.312533973121403e-05, + "loss": 1.3373, + "step": 844500 + }, + { + "epoch": 23.71, + "learning_rate": 6.298476129782009e-05, + "loss": 1.3494, + "step": 845000 + }, + { + "epoch": 23.73, + "learning_rate": 6.284418286442615e-05, + "loss": 1.326, + "step": 845500 + }, + { + "epoch": 23.74, + "learning_rate": 6.270360443103222e-05, + "loss": 1.348, + "step": 846000 + }, + { + "epoch": 23.76, + "learning_rate": 6.256302599763828e-05, + "loss": 1.3473, + "step": 846500 + }, + { + "epoch": 23.77, + "learning_rate": 6.242244756424434e-05, + "loss": 1.3397, + "step": 847000 + }, + { + "epoch": 23.78, + "learning_rate": 6.22818691308504e-05, + "loss": 1.3583, + "step": 847500 + }, + { + "epoch": 23.8, + "learning_rate": 6.214129069745646e-05, + "loss": 1.3292, + "step": 848000 + }, + { + "epoch": 23.81, + "learning_rate": 6.200071226406253e-05, + "loss": 1.3455, + "step": 848500 + }, + { + "epoch": 23.83, + "learning_rate": 6.186013383066859e-05, + "loss": 1.3458, + "step": 849000 + }, + { + "epoch": 23.84, + "learning_rate": 6.171955539727465e-05, + "loss": 1.3477, + "step": 849500 + }, + { + "epoch": 23.85, + "learning_rate": 6.157897696388071e-05, + "loss": 1.3462, + "step": 850000 + }, + { + "epoch": 23.87, + "learning_rate": 6.143839853048677e-05, + "loss": 1.328, + "step": 850500 + }, + { + "epoch": 23.88, + "learning_rate": 6.129782009709283e-05, + "loss": 1.3453, + "step": 851000 + }, + { + "epoch": 23.9, + "learning_rate": 6.11572416636989e-05, + "loss": 1.3241, + "step": 851500 + }, + { + "epoch": 23.91, + "learning_rate": 6.101666323030496e-05, + "loss": 1.3564, + "step": 852000 + }, + { + "epoch": 23.92, + "learning_rate": 6.087608479691102e-05, + "loss": 1.3582, + "step": 852500 + }, + { + "epoch": 23.94, + "learning_rate": 6.073550636351708e-05, + "loss": 1.3355, + "step": 853000 + }, + { + "epoch": 23.95, + "learning_rate": 6.0594927930123144e-05, + "loss": 1.3438, + "step": 853500 + }, + { + "epoch": 23.97, + "learning_rate": 6.0454349496729206e-05, + "loss": 1.3057, + "step": 854000 + }, + { + "epoch": 23.98, + "learning_rate": 6.031377106333526e-05, + "loss": 1.3214, + "step": 854500 + }, + { + "epoch": 23.99, + "learning_rate": 6.017319262994133e-05, + "loss": 1.3221, + "step": 855000 + }, + { + "epoch": 24.01, + "learning_rate": 6.0032614196547385e-05, + "loss": 1.2929, + "step": 855500 + }, + { + "epoch": 24.02, + "learning_rate": 5.9892035763153454e-05, + "loss": 1.3036, + "step": 856000 + }, + { + "epoch": 24.04, + "learning_rate": 5.975145732975951e-05, + "loss": 1.3002, + "step": 856500 + }, + { + "epoch": 24.05, + "learning_rate": 5.961087889636558e-05, + "loss": 1.2967, + "step": 857000 + }, + { + "epoch": 24.06, + "learning_rate": 5.947030046297163e-05, + "loss": 1.2968, + "step": 857500 + }, + { + "epoch": 24.08, + "learning_rate": 5.93297220295777e-05, + "loss": 1.298, + "step": 858000 + }, + { + "epoch": 24.09, + "learning_rate": 5.918914359618376e-05, + "loss": 1.2798, + "step": 858500 + }, + { + "epoch": 24.11, + "learning_rate": 5.9048565162789826e-05, + "loss": 1.2808, + "step": 859000 + }, + { + "epoch": 24.12, + "learning_rate": 5.890798672939588e-05, + "loss": 1.3283, + "step": 859500 + }, + { + "epoch": 24.13, + "learning_rate": 5.876740829600195e-05, + "loss": 1.3048, + "step": 860000 + }, + { + "epoch": 24.15, + "learning_rate": 5.8626829862608005e-05, + "loss": 1.2911, + "step": 860500 + }, + { + "epoch": 24.16, + "learning_rate": 5.848625142921407e-05, + "loss": 1.2803, + "step": 861000 + }, + { + "epoch": 24.18, + "learning_rate": 5.834567299582013e-05, + "loss": 1.3098, + "step": 861500 + }, + { + "epoch": 24.19, + "learning_rate": 5.82050945624262e-05, + "loss": 1.3068, + "step": 862000 + }, + { + "epoch": 24.2, + "learning_rate": 5.806451612903225e-05, + "loss": 1.2908, + "step": 862500 + }, + { + "epoch": 24.22, + "learning_rate": 5.7923937695638314e-05, + "loss": 1.2579, + "step": 863000 + }, + { + "epoch": 24.23, + "learning_rate": 5.7783359262244376e-05, + "loss": 1.3015, + "step": 863500 + }, + { + "epoch": 24.25, + "learning_rate": 5.764278082885044e-05, + "loss": 1.2958, + "step": 864000 + }, + { + "epoch": 24.26, + "learning_rate": 5.75022023954565e-05, + "loss": 1.3135, + "step": 864500 + }, + { + "epoch": 24.27, + "learning_rate": 5.736162396206256e-05, + "loss": 1.2794, + "step": 865000 + }, + { + "epoch": 24.29, + "learning_rate": 5.7221045528668624e-05, + "loss": 1.2913, + "step": 865500 + }, + { + "epoch": 24.3, + "learning_rate": 5.7080467095274686e-05, + "loss": 1.2561, + "step": 866000 + }, + { + "epoch": 24.32, + "learning_rate": 5.693988866188075e-05, + "loss": 1.3083, + "step": 866500 + }, + { + "epoch": 24.33, + "learning_rate": 5.679931022848681e-05, + "loss": 1.282, + "step": 867000 + }, + { + "epoch": 24.34, + "learning_rate": 5.6658731795092865e-05, + "loss": 1.2826, + "step": 867500 + }, + { + "epoch": 24.36, + "learning_rate": 5.6518153361698934e-05, + "loss": 1.2866, + "step": 868000 + }, + { + "epoch": 24.37, + "learning_rate": 5.637757492830499e-05, + "loss": 1.3105, + "step": 868500 + }, + { + "epoch": 24.39, + "learning_rate": 5.623699649491106e-05, + "loss": 1.3, + "step": 869000 + }, + { + "epoch": 24.4, + "learning_rate": 5.609641806151711e-05, + "loss": 1.2891, + "step": 869500 + }, + { + "epoch": 24.41, + "learning_rate": 5.595583962812318e-05, + "loss": 1.3011, + "step": 870000 + }, + { + "epoch": 24.43, + "learning_rate": 5.5815261194729236e-05, + "loss": 1.3177, + "step": 870500 + }, + { + "epoch": 24.44, + "learning_rate": 5.5674682761335305e-05, + "loss": 1.3146, + "step": 871000 + }, + { + "epoch": 24.46, + "learning_rate": 5.553410432794136e-05, + "loss": 1.3113, + "step": 871500 + }, + { + "epoch": 24.47, + "learning_rate": 5.539352589454743e-05, + "loss": 1.29, + "step": 872000 + }, + { + "epoch": 24.48, + "learning_rate": 5.5252947461153484e-05, + "loss": 1.3293, + "step": 872500 + }, + { + "epoch": 24.5, + "learning_rate": 5.511236902775955e-05, + "loss": 1.3065, + "step": 873000 + }, + { + "epoch": 24.51, + "learning_rate": 5.497179059436561e-05, + "loss": 1.2612, + "step": 873500 + }, + { + "epoch": 24.53, + "learning_rate": 5.483121216097168e-05, + "loss": 1.3006, + "step": 874000 + }, + { + "epoch": 24.54, + "learning_rate": 5.469063372757773e-05, + "loss": 1.2845, + "step": 874500 + }, + { + "epoch": 24.55, + "learning_rate": 5.45500552941838e-05, + "loss": 1.3217, + "step": 875000 + }, + { + "epoch": 24.57, + "learning_rate": 5.4409476860789856e-05, + "loss": 1.3276, + "step": 875500 + }, + { + "epoch": 24.58, + "learning_rate": 5.426889842739592e-05, + "loss": 1.2993, + "step": 876000 + }, + { + "epoch": 24.6, + "learning_rate": 5.412831999400198e-05, + "loss": 1.2916, + "step": 876500 + }, + { + "epoch": 24.61, + "learning_rate": 5.398774156060804e-05, + "loss": 1.3397, + "step": 877000 + }, + { + "epoch": 24.63, + "learning_rate": 5.3847163127214103e-05, + "loss": 1.3067, + "step": 877500 + }, + { + "epoch": 24.64, + "learning_rate": 5.3706584693820165e-05, + "loss": 1.3235, + "step": 878000 + }, + { + "epoch": 24.65, + "learning_rate": 5.356600626042623e-05, + "loss": 1.2989, + "step": 878500 + }, + { + "epoch": 24.67, + "learning_rate": 5.342542782703229e-05, + "loss": 1.2858, + "step": 879000 + }, + { + "epoch": 24.68, + "learning_rate": 5.328484939363835e-05, + "loss": 1.3062, + "step": 879500 + }, + { + "epoch": 24.7, + "learning_rate": 5.314427096024441e-05, + "loss": 1.2865, + "step": 880000 + }, + { + "epoch": 24.71, + "learning_rate": 5.300369252685048e-05, + "loss": 1.2966, + "step": 880500 + }, + { + "epoch": 24.72, + "learning_rate": 5.286311409345654e-05, + "loss": 1.2927, + "step": 881000 + }, + { + "epoch": 24.74, + "learning_rate": 5.2722535660062606e-05, + "loss": 1.2981, + "step": 881500 + }, + { + "epoch": 24.75, + "learning_rate": 5.258195722666866e-05, + "loss": 1.3123, + "step": 882000 + }, + { + "epoch": 24.77, + "learning_rate": 5.244137879327473e-05, + "loss": 1.3186, + "step": 882500 + }, + { + "epoch": 24.78, + "learning_rate": 5.2300800359880785e-05, + "loss": 1.2962, + "step": 883000 + }, + { + "epoch": 24.79, + "learning_rate": 5.2160221926486853e-05, + "loss": 1.2899, + "step": 883500 + }, + { + "epoch": 24.81, + "learning_rate": 5.201964349309291e-05, + "loss": 1.3057, + "step": 884000 + }, + { + "epoch": 24.82, + "learning_rate": 5.187906505969897e-05, + "loss": 1.2949, + "step": 884500 + }, + { + "epoch": 24.84, + "learning_rate": 5.173848662630503e-05, + "loss": 1.2874, + "step": 885000 + }, + { + "epoch": 24.85, + "learning_rate": 5.1597908192911094e-05, + "loss": 1.3297, + "step": 885500 + }, + { + "epoch": 24.86, + "learning_rate": 5.1457329759517156e-05, + "loss": 1.3324, + "step": 886000 + }, + { + "epoch": 24.88, + "learning_rate": 5.131675132612322e-05, + "loss": 1.3237, + "step": 886500 + }, + { + "epoch": 24.89, + "learning_rate": 5.117617289272928e-05, + "loss": 1.3252, + "step": 887000 + }, + { + "epoch": 24.91, + "learning_rate": 5.103559445933534e-05, + "loss": 1.2741, + "step": 887500 + }, + { + "epoch": 24.92, + "learning_rate": 5.0895016025941404e-05, + "loss": 1.3208, + "step": 888000 + }, + { + "epoch": 24.93, + "learning_rate": 5.0754437592547466e-05, + "loss": 1.2998, + "step": 888500 + }, + { + "epoch": 24.95, + "learning_rate": 5.061385915915353e-05, + "loss": 1.2971, + "step": 889000 + }, + { + "epoch": 24.96, + "learning_rate": 5.047328072575959e-05, + "loss": 1.3066, + "step": 889500 + }, + { + "epoch": 24.98, + "learning_rate": 5.0332702292365645e-05, + "loss": 1.3289, + "step": 890000 + }, + { + "epoch": 24.99, + "learning_rate": 5.0192123858971714e-05, + "loss": 1.3317, + "step": 890500 + }, + { + "epoch": 25.0, + "learning_rate": 5.005154542557777e-05, + "loss": 1.3158, + "step": 891000 + }, + { + "epoch": 25.02, + "learning_rate": 4.991096699218384e-05, + "loss": 1.2461, + "step": 891500 + }, + { + "epoch": 25.03, + "learning_rate": 4.977038855878989e-05, + "loss": 1.2825, + "step": 892000 + }, + { + "epoch": 25.05, + "learning_rate": 4.962981012539596e-05, + "loss": 1.2938, + "step": 892500 + }, + { + "epoch": 25.06, + "learning_rate": 4.9489231692002017e-05, + "loss": 1.2464, + "step": 893000 + }, + { + "epoch": 25.07, + "learning_rate": 4.9348653258608085e-05, + "loss": 1.2535, + "step": 893500 + }, + { + "epoch": 25.09, + "learning_rate": 4.920807482521414e-05, + "loss": 1.2678, + "step": 894000 + }, + { + "epoch": 25.1, + "learning_rate": 4.906749639182021e-05, + "loss": 1.2809, + "step": 894500 + }, + { + "epoch": 25.12, + "learning_rate": 4.8926917958426264e-05, + "loss": 1.2612, + "step": 895000 + }, + { + "epoch": 25.13, + "learning_rate": 4.878633952503233e-05, + "loss": 1.2808, + "step": 895500 + }, + { + "epoch": 25.14, + "learning_rate": 4.864576109163839e-05, + "loss": 1.2758, + "step": 896000 + }, + { + "epoch": 25.16, + "learning_rate": 4.850518265824446e-05, + "loss": 1.267, + "step": 896500 + }, + { + "epoch": 25.17, + "learning_rate": 4.836460422485051e-05, + "loss": 1.2714, + "step": 897000 + }, + { + "epoch": 25.19, + "learning_rate": 4.822402579145658e-05, + "loss": 1.2657, + "step": 897500 + }, + { + "epoch": 25.2, + "learning_rate": 4.8083447358062636e-05, + "loss": 1.2706, + "step": 898000 + }, + { + "epoch": 25.21, + "learning_rate": 4.79428689246687e-05, + "loss": 1.2595, + "step": 898500 + }, + { + "epoch": 25.23, + "learning_rate": 4.780229049127476e-05, + "loss": 1.2448, + "step": 899000 + }, + { + "epoch": 25.24, + "learning_rate": 4.766171205788082e-05, + "loss": 1.2639, + "step": 899500 + }, + { + "epoch": 25.26, + "learning_rate": 4.7521133624486884e-05, + "loss": 1.2664, + "step": 900000 + }, + { + "epoch": 25.27, + "learning_rate": 4.7380555191092946e-05, + "loss": 1.2512, + "step": 900500 + }, + { + "epoch": 25.28, + "learning_rate": 4.723997675769901e-05, + "loss": 1.2682, + "step": 901000 + }, + { + "epoch": 25.3, + "learning_rate": 4.709939832430507e-05, + "loss": 1.2866, + "step": 901500 + }, + { + "epoch": 25.31, + "learning_rate": 4.695881989091113e-05, + "loss": 1.2619, + "step": 902000 + }, + { + "epoch": 25.33, + "learning_rate": 4.681824145751719e-05, + "loss": 1.2451, + "step": 902500 + }, + { + "epoch": 25.34, + "learning_rate": 4.6677663024123255e-05, + "loss": 1.2759, + "step": 903000 + }, + { + "epoch": 25.35, + "learning_rate": 4.653708459072932e-05, + "loss": 1.2691, + "step": 903500 + }, + { + "epoch": 25.37, + "learning_rate": 4.639650615733537e-05, + "loss": 1.2742, + "step": 904000 + }, + { + "epoch": 25.38, + "learning_rate": 4.625592772394144e-05, + "loss": 1.263, + "step": 904500 + }, + { + "epoch": 25.4, + "learning_rate": 4.6115349290547496e-05, + "loss": 1.2801, + "step": 905000 + }, + { + "epoch": 25.41, + "learning_rate": 4.5974770857153565e-05, + "loss": 1.281, + "step": 905500 + }, + { + "epoch": 25.42, + "learning_rate": 4.583419242375962e-05, + "loss": 1.2552, + "step": 906000 + }, + { + "epoch": 25.44, + "learning_rate": 4.569361399036569e-05, + "loss": 1.2866, + "step": 906500 + }, + { + "epoch": 25.45, + "learning_rate": 4.5553035556971744e-05, + "loss": 1.2792, + "step": 907000 + }, + { + "epoch": 25.47, + "learning_rate": 4.541245712357781e-05, + "loss": 1.2564, + "step": 907500 + }, + { + "epoch": 25.48, + "learning_rate": 4.527187869018387e-05, + "loss": 1.2652, + "step": 908000 + }, + { + "epoch": 25.49, + "learning_rate": 4.5131300256789936e-05, + "loss": 1.2889, + "step": 908500 + }, + { + "epoch": 25.51, + "learning_rate": 4.499072182339599e-05, + "loss": 1.2696, + "step": 909000 + }, + { + "epoch": 25.52, + "learning_rate": 4.485014339000206e-05, + "loss": 1.2568, + "step": 909500 + }, + { + "epoch": 25.54, + "learning_rate": 4.470956495660812e-05, + "loss": 1.2891, + "step": 910000 + }, + { + "epoch": 25.55, + "learning_rate": 4.4568986523214184e-05, + "loss": 1.2697, + "step": 910500 + }, + { + "epoch": 25.57, + "learning_rate": 4.4428408089820246e-05, + "loss": 1.2713, + "step": 911000 + }, + { + "epoch": 25.58, + "learning_rate": 4.428782965642631e-05, + "loss": 1.2878, + "step": 911500 + }, + { + "epoch": 25.59, + "learning_rate": 4.414725122303237e-05, + "loss": 1.2697, + "step": 912000 + }, + { + "epoch": 25.61, + "learning_rate": 4.4006672789638425e-05, + "loss": 1.2866, + "step": 912500 + }, + { + "epoch": 25.62, + "learning_rate": 4.3866094356244494e-05, + "loss": 1.2519, + "step": 913000 + }, + { + "epoch": 25.64, + "learning_rate": 4.372551592285055e-05, + "loss": 1.2736, + "step": 913500 + }, + { + "epoch": 25.65, + "learning_rate": 4.358493748945662e-05, + "loss": 1.2847, + "step": 914000 + }, + { + "epoch": 25.66, + "learning_rate": 4.344435905606267e-05, + "loss": 1.2802, + "step": 914500 + }, + { + "epoch": 25.68, + "learning_rate": 4.330378062266874e-05, + "loss": 1.2639, + "step": 915000 + }, + { + "epoch": 25.69, + "learning_rate": 4.31632021892748e-05, + "loss": 1.2613, + "step": 915500 + }, + { + "epoch": 25.71, + "learning_rate": 4.3022623755880865e-05, + "loss": 1.2673, + "step": 916000 + }, + { + "epoch": 25.72, + "learning_rate": 4.288204532248692e-05, + "loss": 1.2737, + "step": 916500 + }, + { + "epoch": 25.73, + "learning_rate": 4.274146688909299e-05, + "loss": 1.2877, + "step": 917000 + }, + { + "epoch": 25.75, + "learning_rate": 4.2600888455699044e-05, + "loss": 1.2975, + "step": 917500 + }, + { + "epoch": 25.76, + "learning_rate": 4.246031002230511e-05, + "loss": 1.2643, + "step": 918000 + }, + { + "epoch": 25.78, + "learning_rate": 4.231973158891117e-05, + "loss": 1.3068, + "step": 918500 + }, + { + "epoch": 25.79, + "learning_rate": 4.217915315551724e-05, + "loss": 1.2768, + "step": 919000 + }, + { + "epoch": 25.8, + "learning_rate": 4.203857472212329e-05, + "loss": 1.2809, + "step": 919500 + }, + { + "epoch": 25.82, + "learning_rate": 4.1897996288729354e-05, + "loss": 1.2895, + "step": 920000 + }, + { + "epoch": 25.83, + "learning_rate": 4.1757417855335416e-05, + "loss": 1.289, + "step": 920500 + }, + { + "epoch": 25.85, + "learning_rate": 4.161683942194148e-05, + "loss": 1.2685, + "step": 921000 + }, + { + "epoch": 25.86, + "learning_rate": 4.147626098854754e-05, + "loss": 1.2986, + "step": 921500 + }, + { + "epoch": 25.87, + "learning_rate": 4.13356825551536e-05, + "loss": 1.2887, + "step": 922000 + }, + { + "epoch": 25.89, + "learning_rate": 4.1195104121759664e-05, + "loss": 1.2693, + "step": 922500 + }, + { + "epoch": 25.9, + "learning_rate": 4.1054525688365726e-05, + "loss": 1.2494, + "step": 923000 + }, + { + "epoch": 25.92, + "learning_rate": 4.091394725497179e-05, + "loss": 1.2642, + "step": 923500 + }, + { + "epoch": 25.93, + "learning_rate": 4.077336882157785e-05, + "loss": 1.2777, + "step": 924000 + }, + { + "epoch": 25.94, + "learning_rate": 4.063279038818391e-05, + "loss": 1.2755, + "step": 924500 + }, + { + "epoch": 25.96, + "learning_rate": 4.0492211954789973e-05, + "loss": 1.2565, + "step": 925000 + }, + { + "epoch": 25.97, + "learning_rate": 4.035163352139603e-05, + "loss": 1.2715, + "step": 925500 + }, + { + "epoch": 25.99, + "learning_rate": 4.02110550880021e-05, + "loss": 1.2794, + "step": 926000 + }, + { + "epoch": 26.0, + "learning_rate": 4.007047665460815e-05, + "loss": 1.2859, + "step": 926500 + }, + { + "epoch": 26.01, + "learning_rate": 3.992989822121422e-05, + "loss": 1.2242, + "step": 927000 + }, + { + "epoch": 26.03, + "learning_rate": 3.9789319787820276e-05, + "loss": 1.244, + "step": 927500 + }, + { + "epoch": 26.04, + "learning_rate": 3.9648741354426345e-05, + "loss": 1.2497, + "step": 928000 + }, + { + "epoch": 26.06, + "learning_rate": 3.95081629210324e-05, + "loss": 1.2126, + "step": 928500 + }, + { + "epoch": 26.07, + "learning_rate": 3.936758448763847e-05, + "loss": 1.2374, + "step": 929000 + }, + { + "epoch": 26.08, + "learning_rate": 3.9227006054244524e-05, + "loss": 1.2321, + "step": 929500 + }, + { + "epoch": 26.1, + "learning_rate": 3.908642762085059e-05, + "loss": 1.2191, + "step": 930000 + }, + { + "epoch": 26.11, + "learning_rate": 3.894584918745665e-05, + "loss": 1.2377, + "step": 930500 + }, + { + "epoch": 26.13, + "learning_rate": 3.8805270754062717e-05, + "loss": 1.2306, + "step": 931000 + }, + { + "epoch": 26.14, + "learning_rate": 3.866469232066877e-05, + "loss": 1.2093, + "step": 931500 + }, + { + "epoch": 26.15, + "learning_rate": 3.852411388727484e-05, + "loss": 1.2353, + "step": 932000 + }, + { + "epoch": 26.17, + "learning_rate": 3.8383535453880896e-05, + "loss": 1.2532, + "step": 932500 + }, + { + "epoch": 26.18, + "learning_rate": 3.8242957020486964e-05, + "loss": 1.2356, + "step": 933000 + }, + { + "epoch": 26.2, + "learning_rate": 3.810237858709302e-05, + "loss": 1.2562, + "step": 933500 + }, + { + "epoch": 26.21, + "learning_rate": 3.796180015369908e-05, + "loss": 1.2434, + "step": 934000 + }, + { + "epoch": 26.22, + "learning_rate": 3.782122172030514e-05, + "loss": 1.2458, + "step": 934500 + }, + { + "epoch": 26.24, + "learning_rate": 3.7680643286911205e-05, + "loss": 1.2291, + "step": 935000 + }, + { + "epoch": 26.25, + "learning_rate": 3.754006485351727e-05, + "loss": 1.2355, + "step": 935500 + }, + { + "epoch": 26.27, + "learning_rate": 3.739948642012333e-05, + "loss": 1.2213, + "step": 936000 + }, + { + "epoch": 26.28, + "learning_rate": 3.725890798672939e-05, + "loss": 1.2332, + "step": 936500 + }, + { + "epoch": 26.29, + "learning_rate": 3.711832955333545e-05, + "loss": 1.2243, + "step": 937000 + }, + { + "epoch": 26.31, + "learning_rate": 3.6977751119941515e-05, + "loss": 1.2342, + "step": 937500 + }, + { + "epoch": 26.32, + "learning_rate": 3.683717268654758e-05, + "loss": 1.276, + "step": 938000 + }, + { + "epoch": 26.34, + "learning_rate": 3.669659425315364e-05, + "loss": 1.2475, + "step": 938500 + }, + { + "epoch": 26.35, + "learning_rate": 3.65560158197597e-05, + "loss": 1.2522, + "step": 939000 + }, + { + "epoch": 26.36, + "learning_rate": 3.641543738636576e-05, + "loss": 1.2453, + "step": 939500 + }, + { + "epoch": 26.38, + "learning_rate": 3.6274858952971825e-05, + "loss": 1.2416, + "step": 940000 + }, + { + "epoch": 26.39, + "learning_rate": 3.6134280519577886e-05, + "loss": 1.2485, + "step": 940500 + }, + { + "epoch": 26.41, + "learning_rate": 3.599370208618395e-05, + "loss": 1.2451, + "step": 941000 + }, + { + "epoch": 26.42, + "learning_rate": 3.585312365279001e-05, + "loss": 1.2666, + "step": 941500 + }, + { + "epoch": 26.44, + "learning_rate": 3.571254521939607e-05, + "loss": 1.2476, + "step": 942000 + }, + { + "epoch": 26.45, + "learning_rate": 3.5571966786002134e-05, + "loss": 1.2161, + "step": 942500 + }, + { + "epoch": 26.46, + "learning_rate": 3.5431388352608196e-05, + "loss": 1.2345, + "step": 943000 + }, + { + "epoch": 26.48, + "learning_rate": 3.529080991921426e-05, + "loss": 1.247, + "step": 943500 + }, + { + "epoch": 26.49, + "learning_rate": 3.515023148582032e-05, + "loss": 1.2497, + "step": 944000 + }, + { + "epoch": 26.51, + "learning_rate": 3.500965305242638e-05, + "loss": 1.2481, + "step": 944500 + }, + { + "epoch": 26.52, + "learning_rate": 3.4869074619032444e-05, + "loss": 1.2761, + "step": 945000 + }, + { + "epoch": 26.53, + "learning_rate": 3.4728496185638506e-05, + "loss": 1.2396, + "step": 945500 + }, + { + "epoch": 26.55, + "learning_rate": 3.458791775224457e-05, + "loss": 1.2603, + "step": 946000 + }, + { + "epoch": 26.56, + "learning_rate": 3.444733931885063e-05, + "loss": 1.2455, + "step": 946500 + }, + { + "epoch": 26.58, + "learning_rate": 3.430676088545669e-05, + "loss": 1.2233, + "step": 947000 + }, + { + "epoch": 26.59, + "learning_rate": 3.416618245206275e-05, + "loss": 1.2498, + "step": 947500 + }, + { + "epoch": 26.6, + "learning_rate": 3.402560401866881e-05, + "loss": 1.2853, + "step": 948000 + }, + { + "epoch": 26.62, + "learning_rate": 3.388502558527487e-05, + "loss": 1.2309, + "step": 948500 + }, + { + "epoch": 26.63, + "learning_rate": 3.374444715188093e-05, + "loss": 1.2246, + "step": 949000 + }, + { + "epoch": 26.65, + "learning_rate": 3.3603868718486994e-05, + "loss": 1.2179, + "step": 949500 + }, + { + "epoch": 26.66, + "learning_rate": 3.3463290285093056e-05, + "loss": 1.2463, + "step": 950000 + }, + { + "epoch": 26.67, + "learning_rate": 3.332271185169912e-05, + "loss": 1.2507, + "step": 950500 + }, + { + "epoch": 26.69, + "learning_rate": 3.318213341830518e-05, + "loss": 1.242, + "step": 951000 + }, + { + "epoch": 26.7, + "learning_rate": 3.304155498491124e-05, + "loss": 1.2434, + "step": 951500 + }, + { + "epoch": 26.72, + "learning_rate": 3.2900976551517304e-05, + "loss": 1.244, + "step": 952000 + }, + { + "epoch": 26.73, + "learning_rate": 3.2760398118123366e-05, + "loss": 1.2368, + "step": 952500 + }, + { + "epoch": 26.74, + "learning_rate": 3.261981968472943e-05, + "loss": 1.2472, + "step": 953000 + }, + { + "epoch": 26.76, + "learning_rate": 3.247924125133549e-05, + "loss": 1.251, + "step": 953500 + }, + { + "epoch": 26.77, + "learning_rate": 3.233866281794156e-05, + "loss": 1.2534, + "step": 954000 + }, + { + "epoch": 26.79, + "learning_rate": 3.219808438454762e-05, + "loss": 1.2408, + "step": 954500 + }, + { + "epoch": 26.8, + "learning_rate": 3.205750595115368e-05, + "loss": 1.2493, + "step": 955000 + }, + { + "epoch": 26.81, + "learning_rate": 3.1916927517759744e-05, + "loss": 1.2163, + "step": 955500 + }, + { + "epoch": 26.83, + "learning_rate": 3.17763490843658e-05, + "loss": 1.2495, + "step": 956000 + }, + { + "epoch": 26.84, + "learning_rate": 3.163577065097186e-05, + "loss": 1.2494, + "step": 956500 + }, + { + "epoch": 26.86, + "learning_rate": 3.1495192217577923e-05, + "loss": 1.229, + "step": 957000 + }, + { + "epoch": 26.87, + "learning_rate": 3.1354613784183985e-05, + "loss": 1.257, + "step": 957500 + }, + { + "epoch": 26.88, + "learning_rate": 3.121403535079005e-05, + "loss": 1.2336, + "step": 958000 + }, + { + "epoch": 26.9, + "learning_rate": 3.107345691739611e-05, + "loss": 1.2409, + "step": 958500 + }, + { + "epoch": 26.91, + "learning_rate": 3.093287848400217e-05, + "loss": 1.2139, + "step": 959000 + }, + { + "epoch": 26.93, + "learning_rate": 3.079230005060823e-05, + "loss": 1.2512, + "step": 959500 + }, + { + "epoch": 26.94, + "learning_rate": 3.0651721617214295e-05, + "loss": 1.2627, + "step": 960000 + }, + { + "epoch": 26.95, + "learning_rate": 3.0511143183820357e-05, + "loss": 1.251, + "step": 960500 + }, + { + "epoch": 26.97, + "learning_rate": 3.037056475042642e-05, + "loss": 1.2024, + "step": 961000 + }, + { + "epoch": 26.98, + "learning_rate": 3.022998631703248e-05, + "loss": 1.2204, + "step": 961500 + }, + { + "epoch": 27.0, + "learning_rate": 3.0089407883638543e-05, + "loss": 1.2593, + "step": 962000 + }, + { + "epoch": 27.01, + "learning_rate": 2.9948829450244605e-05, + "loss": 1.2286, + "step": 962500 + }, + { + "epoch": 27.02, + "learning_rate": 2.9808251016850667e-05, + "loss": 1.2079, + "step": 963000 + }, + { + "epoch": 27.04, + "learning_rate": 2.966767258345673e-05, + "loss": 1.2022, + "step": 963500 + }, + { + "epoch": 27.05, + "learning_rate": 2.952709415006279e-05, + "loss": 1.1943, + "step": 964000 + }, + { + "epoch": 27.07, + "learning_rate": 2.9386515716668852e-05, + "loss": 1.1952, + "step": 964500 + }, + { + "epoch": 27.08, + "learning_rate": 2.9245937283274914e-05, + "loss": 1.2296, + "step": 965000 + }, + { + "epoch": 27.09, + "learning_rate": 2.9105358849880976e-05, + "loss": 1.2143, + "step": 965500 + }, + { + "epoch": 27.11, + "learning_rate": 2.8964780416487035e-05, + "loss": 1.2432, + "step": 966000 + }, + { + "epoch": 27.12, + "learning_rate": 2.8824201983093097e-05, + "loss": 1.2106, + "step": 966500 + }, + { + "epoch": 27.14, + "learning_rate": 2.868362354969916e-05, + "loss": 1.2221, + "step": 967000 + }, + { + "epoch": 27.15, + "learning_rate": 2.854304511630522e-05, + "loss": 1.2327, + "step": 967500 + }, + { + "epoch": 27.16, + "learning_rate": 2.8402466682911283e-05, + "loss": 1.2119, + "step": 968000 + }, + { + "epoch": 27.18, + "learning_rate": 2.8261888249517344e-05, + "loss": 1.2094, + "step": 968500 + }, + { + "epoch": 27.19, + "learning_rate": 2.8121309816123406e-05, + "loss": 1.1851, + "step": 969000 + }, + { + "epoch": 27.21, + "learning_rate": 2.798073138272947e-05, + "loss": 1.2177, + "step": 969500 + }, + { + "epoch": 27.22, + "learning_rate": 2.784015294933553e-05, + "loss": 1.253, + "step": 970000 + }, + { + "epoch": 27.23, + "learning_rate": 2.7699574515941592e-05, + "loss": 1.2167, + "step": 970500 + }, + { + "epoch": 27.25, + "learning_rate": 2.7558996082547654e-05, + "loss": 1.2125, + "step": 971000 + }, + { + "epoch": 27.26, + "learning_rate": 2.7418417649153716e-05, + "loss": 1.1955, + "step": 971500 + }, + { + "epoch": 27.28, + "learning_rate": 2.7277839215759778e-05, + "loss": 1.1958, + "step": 972000 + }, + { + "epoch": 27.29, + "learning_rate": 2.713726078236584e-05, + "loss": 1.229, + "step": 972500 + }, + { + "epoch": 27.3, + "learning_rate": 2.69966823489719e-05, + "loss": 1.2004, + "step": 973000 + }, + { + "epoch": 27.32, + "learning_rate": 2.685610391557796e-05, + "loss": 1.2221, + "step": 973500 + }, + { + "epoch": 27.33, + "learning_rate": 2.6715525482184022e-05, + "loss": 1.2211, + "step": 974000 + }, + { + "epoch": 27.35, + "learning_rate": 2.6574947048790084e-05, + "loss": 1.2279, + "step": 974500 + }, + { + "epoch": 27.36, + "learning_rate": 2.6434368615396146e-05, + "loss": 1.229, + "step": 975000 + }, + { + "epoch": 27.38, + "learning_rate": 2.6293790182002208e-05, + "loss": 1.2155, + "step": 975500 + }, + { + "epoch": 27.39, + "learning_rate": 2.615321174860827e-05, + "loss": 1.2055, + "step": 976000 + }, + { + "epoch": 27.4, + "learning_rate": 2.6012633315214332e-05, + "loss": 1.2284, + "step": 976500 + }, + { + "epoch": 27.42, + "learning_rate": 2.5872054881820394e-05, + "loss": 1.2137, + "step": 977000 + }, + { + "epoch": 27.43, + "learning_rate": 2.5731476448426456e-05, + "loss": 1.2012, + "step": 977500 + }, + { + "epoch": 27.45, + "learning_rate": 2.5590898015032518e-05, + "loss": 1.2012, + "step": 978000 + }, + { + "epoch": 27.46, + "learning_rate": 2.545031958163858e-05, + "loss": 1.2168, + "step": 978500 + }, + { + "epoch": 27.47, + "learning_rate": 2.530974114824464e-05, + "loss": 1.2162, + "step": 979000 + }, + { + "epoch": 27.49, + "learning_rate": 2.5169162714850704e-05, + "loss": 1.2203, + "step": 979500 + }, + { + "epoch": 27.5, + "learning_rate": 2.5028584281456762e-05, + "loss": 1.1994, + "step": 980000 + }, + { + "epoch": 27.52, + "learning_rate": 2.4888005848062824e-05, + "loss": 1.2206, + "step": 980500 + }, + { + "epoch": 27.53, + "learning_rate": 2.4747427414668886e-05, + "loss": 1.2047, + "step": 981000 + }, + { + "epoch": 27.54, + "learning_rate": 2.4606848981274948e-05, + "loss": 1.1918, + "step": 981500 + }, + { + "epoch": 27.56, + "learning_rate": 2.446627054788101e-05, + "loss": 1.2074, + "step": 982000 + }, + { + "epoch": 27.57, + "learning_rate": 2.4325692114487072e-05, + "loss": 1.2206, + "step": 982500 + }, + { + "epoch": 27.59, + "learning_rate": 2.4185113681093137e-05, + "loss": 1.2193, + "step": 983000 + }, + { + "epoch": 27.6, + "learning_rate": 2.40445352476992e-05, + "loss": 1.2269, + "step": 983500 + }, + { + "epoch": 27.61, + "learning_rate": 2.390395681430526e-05, + "loss": 1.2168, + "step": 984000 + }, + { + "epoch": 27.63, + "learning_rate": 2.3763378380911323e-05, + "loss": 1.2136, + "step": 984500 + }, + { + "epoch": 27.64, + "learning_rate": 2.3622799947517385e-05, + "loss": 1.1938, + "step": 985000 + }, + { + "epoch": 27.66, + "learning_rate": 2.3482221514123447e-05, + "loss": 1.2355, + "step": 985500 + }, + { + "epoch": 27.67, + "learning_rate": 2.334164308072951e-05, + "loss": 1.2056, + "step": 986000 + }, + { + "epoch": 27.68, + "learning_rate": 2.320106464733557e-05, + "loss": 1.2273, + "step": 986500 + }, + { + "epoch": 27.7, + "learning_rate": 2.3060486213941633e-05, + "loss": 1.2103, + "step": 987000 + }, + { + "epoch": 27.71, + "learning_rate": 2.2919907780547694e-05, + "loss": 1.1975, + "step": 987500 + }, + { + "epoch": 27.73, + "learning_rate": 2.2779329347153753e-05, + "loss": 1.2034, + "step": 988000 + }, + { + "epoch": 27.74, + "learning_rate": 2.2638750913759815e-05, + "loss": 1.2128, + "step": 988500 + }, + { + "epoch": 27.75, + "learning_rate": 2.2498172480365877e-05, + "loss": 1.2214, + "step": 989000 + }, + { + "epoch": 27.77, + "learning_rate": 2.235759404697194e-05, + "loss": 1.2121, + "step": 989500 + }, + { + "epoch": 27.78, + "learning_rate": 2.2217015613578e-05, + "loss": 1.2008, + "step": 990000 + }, + { + "epoch": 27.8, + "learning_rate": 2.2076437180184063e-05, + "loss": 1.2235, + "step": 990500 + }, + { + "epoch": 27.81, + "learning_rate": 2.1935858746790125e-05, + "loss": 1.224, + "step": 991000 + }, + { + "epoch": 27.82, + "learning_rate": 2.1795280313396187e-05, + "loss": 1.2283, + "step": 991500 + }, + { + "epoch": 27.84, + "learning_rate": 2.165470188000225e-05, + "loss": 1.1691, + "step": 992000 + }, + { + "epoch": 27.85, + "learning_rate": 2.151412344660831e-05, + "loss": 1.2283, + "step": 992500 + }, + { + "epoch": 27.87, + "learning_rate": 2.1373545013214372e-05, + "loss": 1.2298, + "step": 993000 + }, + { + "epoch": 27.88, + "learning_rate": 2.1232966579820434e-05, + "loss": 1.2071, + "step": 993500 + }, + { + "epoch": 27.89, + "learning_rate": 2.1092388146426496e-05, + "loss": 1.201, + "step": 994000 + }, + { + "epoch": 27.91, + "learning_rate": 2.0951809713032558e-05, + "loss": 1.2401, + "step": 994500 + }, + { + "epoch": 27.92, + "learning_rate": 2.0811231279638617e-05, + "loss": 1.2178, + "step": 995000 + }, + { + "epoch": 27.94, + "learning_rate": 2.067065284624468e-05, + "loss": 1.2387, + "step": 995500 + }, + { + "epoch": 27.95, + "learning_rate": 2.053007441285074e-05, + "loss": 1.2397, + "step": 996000 + }, + { + "epoch": 27.96, + "learning_rate": 2.0389495979456802e-05, + "loss": 1.2406, + "step": 996500 + }, + { + "epoch": 27.98, + "learning_rate": 2.0248917546062864e-05, + "loss": 1.2341, + "step": 997000 + }, + { + "epoch": 27.99, + "learning_rate": 2.0108339112668926e-05, + "loss": 1.2314, + "step": 997500 + }, + { + "epoch": 28.01, + "learning_rate": 1.9967760679274988e-05, + "loss": 1.206, + "step": 998000 + }, + { + "epoch": 28.02, + "learning_rate": 1.982718224588105e-05, + "loss": 1.1898, + "step": 998500 + }, + { + "epoch": 28.03, + "learning_rate": 1.9686603812487112e-05, + "loss": 1.2027, + "step": 999000 + }, + { + "epoch": 28.05, + "learning_rate": 1.9546025379093174e-05, + "loss": 1.2206, + "step": 999500 + }, + { + "epoch": 28.06, + "learning_rate": 1.9405446945699236e-05, + "loss": 1.1764, + "step": 1000000 + }, + { + "epoch": 28.08, + "learning_rate": 1.9264868512305298e-05, + "loss": 1.1983, + "step": 1000500 + }, + { + "epoch": 28.09, + "learning_rate": 1.912429007891136e-05, + "loss": 1.2146, + "step": 1001000 + }, + { + "epoch": 28.1, + "learning_rate": 1.8983711645517422e-05, + "loss": 1.2142, + "step": 1001500 + }, + { + "epoch": 28.12, + "learning_rate": 1.884313321212348e-05, + "loss": 1.212, + "step": 1002000 + }, + { + "epoch": 28.13, + "learning_rate": 1.8702554778729542e-05, + "loss": 1.1655, + "step": 1002500 + }, + { + "epoch": 28.15, + "learning_rate": 1.8561976345335604e-05, + "loss": 1.2233, + "step": 1003000 + }, + { + "epoch": 28.16, + "learning_rate": 1.8421397911941666e-05, + "loss": 1.2042, + "step": 1003500 + }, + { + "epoch": 28.17, + "learning_rate": 1.8280819478547728e-05, + "loss": 1.1557, + "step": 1004000 + }, + { + "epoch": 28.19, + "learning_rate": 1.814024104515379e-05, + "loss": 1.2044, + "step": 1004500 + }, + { + "epoch": 28.2, + "learning_rate": 1.7999662611759855e-05, + "loss": 1.2152, + "step": 1005000 + }, + { + "epoch": 28.22, + "learning_rate": 1.7859084178365917e-05, + "loss": 1.1963, + "step": 1005500 + }, + { + "epoch": 28.23, + "learning_rate": 1.7718505744971976e-05, + "loss": 1.1855, + "step": 1006000 + }, + { + "epoch": 28.25, + "learning_rate": 1.7577927311578038e-05, + "loss": 1.2021, + "step": 1006500 + }, + { + "epoch": 28.26, + "learning_rate": 1.74373488781841e-05, + "loss": 1.2276, + "step": 1007000 + }, + { + "epoch": 28.27, + "learning_rate": 1.729677044479016e-05, + "loss": 1.2031, + "step": 1007500 + }, + { + "epoch": 28.29, + "learning_rate": 1.7156192011396223e-05, + "loss": 1.197, + "step": 1008000 + }, + { + "epoch": 28.3, + "learning_rate": 1.7015613578002285e-05, + "loss": 1.1679, + "step": 1008500 + }, + { + "epoch": 28.32, + "learning_rate": 1.6875035144608347e-05, + "loss": 1.189, + "step": 1009000 + }, + { + "epoch": 28.33, + "learning_rate": 1.673445671121441e-05, + "loss": 1.2135, + "step": 1009500 + }, + { + "epoch": 28.34, + "learning_rate": 1.659387827782047e-05, + "loss": 1.1794, + "step": 1010000 + }, + { + "epoch": 28.36, + "learning_rate": 1.6453299844426533e-05, + "loss": 1.1916, + "step": 1010500 + }, + { + "epoch": 28.37, + "learning_rate": 1.6312721411032595e-05, + "loss": 1.1758, + "step": 1011000 + }, + { + "epoch": 28.39, + "learning_rate": 1.6172142977638657e-05, + "loss": 1.1733, + "step": 1011500 + }, + { + "epoch": 28.4, + "learning_rate": 1.603156454424472e-05, + "loss": 1.2269, + "step": 1012000 + }, + { + "epoch": 28.41, + "learning_rate": 1.589098611085078e-05, + "loss": 1.2067, + "step": 1012500 + }, + { + "epoch": 28.43, + "learning_rate": 1.575040767745684e-05, + "loss": 1.1773, + "step": 1013000 + }, + { + "epoch": 28.44, + "learning_rate": 1.56098292440629e-05, + "loss": 1.2163, + "step": 1013500 + }, + { + "epoch": 28.46, + "learning_rate": 1.5469250810668963e-05, + "loss": 1.1957, + "step": 1014000 + }, + { + "epoch": 28.47, + "learning_rate": 1.5328672377275025e-05, + "loss": 1.2178, + "step": 1014500 + }, + { + "epoch": 28.48, + "learning_rate": 1.5188093943881087e-05, + "loss": 1.1912, + "step": 1015000 + }, + { + "epoch": 28.5, + "learning_rate": 1.5047515510487149e-05, + "loss": 1.1908, + "step": 1015500 + }, + { + "epoch": 28.51, + "learning_rate": 1.4906937077093211e-05, + "loss": 1.2142, + "step": 1016000 + }, + { + "epoch": 28.53, + "learning_rate": 1.4766358643699273e-05, + "loss": 1.1873, + "step": 1016500 + }, + { + "epoch": 28.54, + "learning_rate": 1.4625780210305335e-05, + "loss": 1.171, + "step": 1017000 + }, + { + "epoch": 28.55, + "learning_rate": 1.4485201776911397e-05, + "loss": 1.1901, + "step": 1017500 + }, + { + "epoch": 28.57, + "learning_rate": 1.4344623343517457e-05, + "loss": 1.1877, + "step": 1018000 + }, + { + "epoch": 28.58, + "learning_rate": 1.4204044910123519e-05, + "loss": 1.2195, + "step": 1018500 + }, + { + "epoch": 28.6, + "learning_rate": 1.4063466476729581e-05, + "loss": 1.1885, + "step": 1019000 + }, + { + "epoch": 28.61, + "learning_rate": 1.3922888043335643e-05, + "loss": 1.2032, + "step": 1019500 + }, + { + "epoch": 28.62, + "learning_rate": 1.3782309609941706e-05, + "loss": 1.201, + "step": 1020000 + }, + { + "epoch": 28.64, + "learning_rate": 1.3641731176547768e-05, + "loss": 1.1836, + "step": 1020500 + }, + { + "epoch": 28.65, + "learning_rate": 1.350115274315383e-05, + "loss": 1.1866, + "step": 1021000 + }, + { + "epoch": 28.67, + "learning_rate": 1.3360574309759892e-05, + "loss": 1.2105, + "step": 1021500 + }, + { + "epoch": 28.68, + "learning_rate": 1.3219995876365954e-05, + "loss": 1.1799, + "step": 1022000 + }, + { + "epoch": 28.69, + "learning_rate": 1.3079417442972014e-05, + "loss": 1.1936, + "step": 1022500 + }, + { + "epoch": 28.71, + "learning_rate": 1.2938839009578076e-05, + "loss": 1.1856, + "step": 1023000 + }, + { + "epoch": 28.72, + "learning_rate": 1.2798260576184138e-05, + "loss": 1.1831, + "step": 1023500 + }, + { + "epoch": 28.74, + "learning_rate": 1.26576821427902e-05, + "loss": 1.1974, + "step": 1024000 + }, + { + "epoch": 28.75, + "learning_rate": 1.2517103709396262e-05, + "loss": 1.2063, + "step": 1024500 + }, + { + "epoch": 28.76, + "learning_rate": 1.2376525276002324e-05, + "loss": 1.1803, + "step": 1025000 + }, + { + "epoch": 28.78, + "learning_rate": 1.2235946842608384e-05, + "loss": 1.2299, + "step": 1025500 + }, + { + "epoch": 28.79, + "learning_rate": 1.2095368409214446e-05, + "loss": 1.185, + "step": 1026000 + }, + { + "epoch": 28.81, + "learning_rate": 1.1954789975820508e-05, + "loss": 1.2083, + "step": 1026500 + }, + { + "epoch": 28.82, + "learning_rate": 1.181421154242657e-05, + "loss": 1.1777, + "step": 1027000 + }, + { + "epoch": 28.83, + "learning_rate": 1.1673633109032632e-05, + "loss": 1.2018, + "step": 1027500 + }, + { + "epoch": 28.85, + "learning_rate": 1.1533054675638694e-05, + "loss": 1.1975, + "step": 1028000 + }, + { + "epoch": 28.86, + "learning_rate": 1.1392476242244756e-05, + "loss": 1.1666, + "step": 1028500 + }, + { + "epoch": 28.88, + "learning_rate": 1.1251897808850816e-05, + "loss": 1.1997, + "step": 1029000 + }, + { + "epoch": 28.89, + "learning_rate": 1.1111319375456878e-05, + "loss": 1.2238, + "step": 1029500 + }, + { + "epoch": 28.9, + "learning_rate": 1.097074094206294e-05, + "loss": 1.1982, + "step": 1030000 + }, + { + "epoch": 28.92, + "learning_rate": 1.0830162508669002e-05, + "loss": 1.1779, + "step": 1030500 + }, + { + "epoch": 28.93, + "learning_rate": 1.0689584075275064e-05, + "loss": 1.2023, + "step": 1031000 + }, + { + "epoch": 28.95, + "learning_rate": 1.0549005641881126e-05, + "loss": 1.1825, + "step": 1031500 + }, + { + "epoch": 28.96, + "learning_rate": 1.0408427208487188e-05, + "loss": 1.1994, + "step": 1032000 + }, + { + "epoch": 28.97, + "learning_rate": 1.0267848775093248e-05, + "loss": 1.2162, + "step": 1032500 + }, + { + "epoch": 28.99, + "learning_rate": 1.012727034169931e-05, + "loss": 1.2024, + "step": 1033000 + } + ], + "logging_steps": 500, + "max_steps": 1069020, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 1.7996577873665385e+21, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}