diff --git "a/checkpoint-1069020/trainer_state.json" "b/checkpoint-1069020/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-1069020/trainer_state.json" @@ -0,0 +1,12849 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.99957905961919, + "eval_steps": 500, + "global_step": 1069020, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 0.00015, + "loss": 33.6336, + "step": 500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003, + "loss": 5.981, + "step": 1000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002998595531918878, + "loss": 5.8563, + "step": 1500 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029971910638377554, + "loss": 5.7975, + "step": 2000 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029957865957566334, + "loss": 5.7589, + "step": 2500 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029943821276755115, + "loss": 5.7376, + "step": 3000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029929776595943896, + "loss": 5.7336, + "step": 3500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002991573191513267, + "loss": 5.6985, + "step": 4000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002990168723432145, + "loss": 5.6467, + "step": 4500 + }, + { + "epoch": 0.14, + "learning_rate": 0.00029887642553510233, + "loss": 5.5431, + "step": 5000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002987359787269901, + "loss": 5.2658, + "step": 5500 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002985955319188779, + "loss": 4.7901, + "step": 6000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002984550851107657, + "loss": 4.3833, + "step": 6500 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002983146383026535, + "loss": 4.1403, + "step": 7000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029817419149454126, + "loss": 3.8533, + "step": 7500 + }, + { + "epoch": 0.22, + "learning_rate": 0.00029803374468642907, + "loss": 3.7172, + "step": 8000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002978932978783169, + "loss": 3.6096, + "step": 8500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00029775285107020463, + "loss": 3.4788, + "step": 9000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00029761240426209244, + "loss": 3.3928, + "step": 9500 + }, + { + "epoch": 0.28, + "learning_rate": 0.00029747195745398025, + "loss": 3.2847, + "step": 10000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00029733151064586806, + "loss": 3.2708, + "step": 10500 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002971910638377558, + "loss": 3.2045, + "step": 11000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002970506170296436, + "loss": 3.1776, + "step": 11500 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002969101702215314, + "loss": 3.1367, + "step": 12000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002967697234134192, + "loss": 3.0834, + "step": 12500 + }, + { + "epoch": 0.36, + "learning_rate": 0.000296629276605307, + "loss": 3.0597, + "step": 13000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002964888297971948, + "loss": 3.047, + "step": 13500 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002963483829890826, + "loss": 3.0075, + "step": 14000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002962079361809704, + "loss": 3.0121, + "step": 14500 + }, + { + "epoch": 0.42, + "learning_rate": 0.00029606748937285816, + "loss": 2.9503, + "step": 15000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00029592704256474597, + "loss": 2.9327, + "step": 15500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002957865957566337, + "loss": 2.9738, + "step": 16000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029564614894852153, + "loss": 2.9376, + "step": 16500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00029550570214040934, + "loss": 2.8891, + "step": 17000 + }, + { + "epoch": 0.49, + "learning_rate": 0.00029536525533229715, + "loss": 2.9003, + "step": 17500 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002952248085241849, + "loss": 2.8601, + "step": 18000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002950843617160727, + "loss": 2.8786, + "step": 18500 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002949439149079605, + "loss": 2.8824, + "step": 19000 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002948034680998483, + "loss": 2.8358, + "step": 19500 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002946630212917361, + "loss": 2.8757, + "step": 20000 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002945225744836239, + "loss": 2.8083, + "step": 20500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00029438212767551164, + "loss": 2.7675, + "step": 21000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00029424168086739945, + "loss": 2.8155, + "step": 21500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029410123405928726, + "loss": 2.7924, + "step": 22000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029396078725117507, + "loss": 2.7935, + "step": 22500 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002938203404430628, + "loss": 2.7332, + "step": 23000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00029367989363495063, + "loss": 2.7284, + "step": 23500 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002935394468268384, + "loss": 2.757, + "step": 24000 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002933990000187262, + "loss": 2.7514, + "step": 24500 + }, + { + "epoch": 0.7, + "learning_rate": 0.000293258553210614, + "loss": 2.7135, + "step": 25000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002931181064025018, + "loss": 2.7346, + "step": 25500 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002929776595943896, + "loss": 2.7471, + "step": 26000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00029283721278627737, + "loss": 2.7027, + "step": 26500 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002926967659781652, + "loss": 2.7163, + "step": 27000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00029255631917005293, + "loss": 2.7118, + "step": 27500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00029241587236194074, + "loss": 2.668, + "step": 28000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00029227542555382855, + "loss": 2.6531, + "step": 28500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00029213497874571636, + "loss": 2.6297, + "step": 29000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00029199453193760416, + "loss": 2.6589, + "step": 29500 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002918540851294919, + "loss": 2.6096, + "step": 30000 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002917136383213797, + "loss": 2.643, + "step": 30500 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002915731915132675, + "loss": 2.621, + "step": 31000 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002914327447051553, + "loss": 2.6194, + "step": 31500 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002912922978970431, + "loss": 2.649, + "step": 32000 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002911518510889309, + "loss": 2.6221, + "step": 32500 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002910114042808187, + "loss": 2.5794, + "step": 33000 + }, + { + "epoch": 0.94, + "learning_rate": 0.00029087095747270646, + "loss": 2.6582, + "step": 33500 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002907305106645943, + "loss": 2.6264, + "step": 34000 + }, + { + "epoch": 0.97, + "learning_rate": 0.000290590063856482, + "loss": 2.6119, + "step": 34500 + }, + { + "epoch": 0.98, + "learning_rate": 0.00029044961704836983, + "loss": 2.5911, + "step": 35000 + }, + { + "epoch": 1.0, + "learning_rate": 0.00029030917024025764, + "loss": 2.5688, + "step": 35500 + }, + { + "epoch": 1.01, + "learning_rate": 0.00029016872343214545, + "loss": 2.5316, + "step": 36000 + }, + { + "epoch": 1.02, + "learning_rate": 0.00029002827662403326, + "loss": 2.5186, + "step": 36500 + }, + { + "epoch": 1.04, + "learning_rate": 0.000289887829815921, + "loss": 2.4959, + "step": 37000 + }, + { + "epoch": 1.05, + "learning_rate": 0.0002897473830078088, + "loss": 2.5466, + "step": 37500 + }, + { + "epoch": 1.07, + "learning_rate": 0.0002896069361996966, + "loss": 2.5292, + "step": 38000 + }, + { + "epoch": 1.08, + "learning_rate": 0.0002894664893915844, + "loss": 2.5283, + "step": 38500 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002893260425834722, + "loss": 2.5334, + "step": 39000 + }, + { + "epoch": 1.11, + "learning_rate": 0.00028918559577536, + "loss": 2.5344, + "step": 39500 + }, + { + "epoch": 1.12, + "learning_rate": 0.0002890451489672478, + "loss": 2.5227, + "step": 40000 + }, + { + "epoch": 1.14, + "learning_rate": 0.00028890470215913556, + "loss": 2.5043, + "step": 40500 + }, + { + "epoch": 1.15, + "learning_rate": 0.00028876425535102337, + "loss": 2.4737, + "step": 41000 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002886238085429112, + "loss": 2.5168, + "step": 41500 + }, + { + "epoch": 1.18, + "learning_rate": 0.00028848336173479893, + "loss": 2.5557, + "step": 42000 + }, + { + "epoch": 1.19, + "learning_rate": 0.00028834291492668674, + "loss": 2.4842, + "step": 42500 + }, + { + "epoch": 1.21, + "learning_rate": 0.00028820246811857455, + "loss": 2.5212, + "step": 43000 + }, + { + "epoch": 1.22, + "learning_rate": 0.00028806202131046235, + "loss": 2.5265, + "step": 43500 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002879215745023501, + "loss": 2.5013, + "step": 44000 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002877811276942379, + "loss": 2.5131, + "step": 44500 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002876406808861257, + "loss": 2.4899, + "step": 45000 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002875002340780135, + "loss": 2.5271, + "step": 45500 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002873597872699013, + "loss": 2.4909, + "step": 46000 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002872193404617891, + "loss": 2.4901, + "step": 46500 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002870788936536769, + "loss": 2.4997, + "step": 47000 + }, + { + "epoch": 1.33, + "learning_rate": 0.00028693844684556466, + "loss": 2.4705, + "step": 47500 + }, + { + "epoch": 1.35, + "learning_rate": 0.00028679800003745246, + "loss": 2.453, + "step": 48000 + }, + { + "epoch": 1.36, + "learning_rate": 0.00028665755322934027, + "loss": 2.4742, + "step": 48500 + }, + { + "epoch": 1.38, + "learning_rate": 0.000286517106421228, + "loss": 2.4825, + "step": 49000 + }, + { + "epoch": 1.39, + "learning_rate": 0.00028637665961311583, + "loss": 2.4236, + "step": 49500 + }, + { + "epoch": 1.4, + "learning_rate": 0.00028623621280500364, + "loss": 2.4295, + "step": 50000 + }, + { + "epoch": 1.42, + "learning_rate": 0.00028609576599689145, + "loss": 2.4636, + "step": 50500 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002859553191887792, + "loss": 2.4772, + "step": 51000 + }, + { + "epoch": 1.45, + "learning_rate": 0.000285814872380667, + "loss": 2.4243, + "step": 51500 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002856744255725548, + "loss": 2.4275, + "step": 52000 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002855339787644426, + "loss": 2.4304, + "step": 52500 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002853935319563304, + "loss": 2.4375, + "step": 53000 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002852530851482182, + "loss": 2.4304, + "step": 53500 + }, + { + "epoch": 1.52, + "learning_rate": 0.00028511263834010594, + "loss": 2.4101, + "step": 54000 + }, + { + "epoch": 1.53, + "learning_rate": 0.00028497219153199375, + "loss": 2.4256, + "step": 54500 + }, + { + "epoch": 1.54, + "learning_rate": 0.00028483174472388156, + "loss": 2.4022, + "step": 55000 + }, + { + "epoch": 1.56, + "learning_rate": 0.00028469129791576937, + "loss": 2.405, + "step": 55500 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002845508511076571, + "loss": 2.4053, + "step": 56000 + }, + { + "epoch": 1.59, + "learning_rate": 0.00028441040429954493, + "loss": 2.4268, + "step": 56500 + }, + { + "epoch": 1.6, + "learning_rate": 0.0002842699574914327, + "loss": 2.3901, + "step": 57000 + }, + { + "epoch": 1.61, + "learning_rate": 0.0002841295106833205, + "loss": 2.4176, + "step": 57500 + }, + { + "epoch": 1.63, + "learning_rate": 0.0002839890638752083, + "loss": 2.42, + "step": 58000 + }, + { + "epoch": 1.64, + "learning_rate": 0.0002838486170670961, + "loss": 2.3913, + "step": 58500 + }, + { + "epoch": 1.66, + "learning_rate": 0.0002837081702589839, + "loss": 2.382, + "step": 59000 + }, + { + "epoch": 1.67, + "learning_rate": 0.00028356772345087167, + "loss": 2.3878, + "step": 59500 + }, + { + "epoch": 1.68, + "learning_rate": 0.0002834272766427595, + "loss": 2.4147, + "step": 60000 + }, + { + "epoch": 1.7, + "learning_rate": 0.00028328682983464723, + "loss": 2.38, + "step": 60500 + }, + { + "epoch": 1.71, + "learning_rate": 0.00028314638302653504, + "loss": 2.3794, + "step": 61000 + }, + { + "epoch": 1.73, + "learning_rate": 0.00028300593621842285, + "loss": 2.3822, + "step": 61500 + }, + { + "epoch": 1.74, + "learning_rate": 0.00028286548941031065, + "loss": 2.3941, + "step": 62000 + }, + { + "epoch": 1.75, + "learning_rate": 0.00028272504260219846, + "loss": 2.3859, + "step": 62500 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002825845957940862, + "loss": 2.387, + "step": 63000 + }, + { + "epoch": 1.78, + "learning_rate": 0.000282444148985974, + "loss": 2.3683, + "step": 63500 + }, + { + "epoch": 1.8, + "learning_rate": 0.0002823037021778618, + "loss": 2.3984, + "step": 64000 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002821632553697496, + "loss": 2.3278, + "step": 64500 + }, + { + "epoch": 1.82, + "learning_rate": 0.0002820228085616374, + "loss": 2.3648, + "step": 65000 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002818823617535252, + "loss": 2.3494, + "step": 65500 + }, + { + "epoch": 1.85, + "learning_rate": 0.000281741914945413, + "loss": 2.3724, + "step": 66000 + }, + { + "epoch": 1.87, + "learning_rate": 0.00028160146813730076, + "loss": 2.3507, + "step": 66500 + }, + { + "epoch": 1.88, + "learning_rate": 0.00028146102132918857, + "loss": 2.3449, + "step": 67000 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002813205745210763, + "loss": 2.371, + "step": 67500 + }, + { + "epoch": 1.91, + "learning_rate": 0.00028118012771296413, + "loss": 2.3572, + "step": 68000 + }, + { + "epoch": 1.92, + "learning_rate": 0.00028103968090485194, + "loss": 2.3779, + "step": 68500 + }, + { + "epoch": 1.94, + "learning_rate": 0.00028089923409673975, + "loss": 2.3376, + "step": 69000 + }, + { + "epoch": 1.95, + "learning_rate": 0.00028075878728862756, + "loss": 2.3384, + "step": 69500 + }, + { + "epoch": 1.96, + "learning_rate": 0.0002806183404805153, + "loss": 2.3622, + "step": 70000 + }, + { + "epoch": 1.98, + "learning_rate": 0.0002804778936724031, + "loss": 2.3338, + "step": 70500 + }, + { + "epoch": 1.99, + "learning_rate": 0.0002803374468642909, + "loss": 2.3579, + "step": 71000 + }, + { + "epoch": 2.01, + "learning_rate": 0.0002801970000561787, + "loss": 2.3414, + "step": 71500 + }, + { + "epoch": 2.02, + "learning_rate": 0.0002800565532480665, + "loss": 2.2625, + "step": 72000 + }, + { + "epoch": 2.03, + "learning_rate": 0.0002799161064399543, + "loss": 2.2404, + "step": 72500 + }, + { + "epoch": 2.05, + "learning_rate": 0.0002797756596318421, + "loss": 2.2747, + "step": 73000 + }, + { + "epoch": 2.06, + "learning_rate": 0.00027963521282372986, + "loss": 2.2791, + "step": 73500 + }, + { + "epoch": 2.08, + "learning_rate": 0.00027949476601561767, + "loss": 2.3042, + "step": 74000 + }, + { + "epoch": 2.09, + "learning_rate": 0.0002793543192075054, + "loss": 2.2652, + "step": 74500 + }, + { + "epoch": 2.1, + "learning_rate": 0.00027921387239939323, + "loss": 2.236, + "step": 75000 + }, + { + "epoch": 2.12, + "learning_rate": 0.00027907342559128104, + "loss": 2.2508, + "step": 75500 + }, + { + "epoch": 2.13, + "learning_rate": 0.00027893297878316885, + "loss": 2.2145, + "step": 76000 + }, + { + "epoch": 2.15, + "learning_rate": 0.00027879253197505665, + "loss": 2.2401, + "step": 76500 + }, + { + "epoch": 2.16, + "learning_rate": 0.0002786520851669444, + "loss": 2.2362, + "step": 77000 + }, + { + "epoch": 2.17, + "learning_rate": 0.0002785116383588322, + "loss": 2.237, + "step": 77500 + }, + { + "epoch": 2.19, + "learning_rate": 0.00027837119155071997, + "loss": 2.2818, + "step": 78000 + }, + { + "epoch": 2.2, + "learning_rate": 0.0002782307447426078, + "loss": 2.2515, + "step": 78500 + }, + { + "epoch": 2.22, + "learning_rate": 0.0002780902979344956, + "loss": 2.2559, + "step": 79000 + }, + { + "epoch": 2.23, + "learning_rate": 0.0002779498511263834, + "loss": 2.2788, + "step": 79500 + }, + { + "epoch": 2.25, + "learning_rate": 0.0002778094043182712, + "loss": 2.2364, + "step": 80000 + }, + { + "epoch": 2.26, + "learning_rate": 0.00027766895751015896, + "loss": 2.2632, + "step": 80500 + }, + { + "epoch": 2.27, + "learning_rate": 0.00027752851070204676, + "loss": 2.2095, + "step": 81000 + }, + { + "epoch": 2.29, + "learning_rate": 0.0002773880638939345, + "loss": 2.256, + "step": 81500 + }, + { + "epoch": 2.3, + "learning_rate": 0.0002772476170858223, + "loss": 2.2417, + "step": 82000 + }, + { + "epoch": 2.32, + "learning_rate": 0.00027710717027771013, + "loss": 2.2227, + "step": 82500 + }, + { + "epoch": 2.33, + "learning_rate": 0.00027696672346959794, + "loss": 2.2372, + "step": 83000 + }, + { + "epoch": 2.34, + "learning_rate": 0.00027682627666148575, + "loss": 2.2767, + "step": 83500 + }, + { + "epoch": 2.36, + "learning_rate": 0.0002766858298533735, + "loss": 2.2501, + "step": 84000 + }, + { + "epoch": 2.37, + "learning_rate": 0.0002765453830452613, + "loss": 2.2325, + "step": 84500 + }, + { + "epoch": 2.39, + "learning_rate": 0.00027640493623714906, + "loss": 2.2462, + "step": 85000 + }, + { + "epoch": 2.4, + "learning_rate": 0.00027626448942903687, + "loss": 2.2773, + "step": 85500 + }, + { + "epoch": 2.41, + "learning_rate": 0.0002761240426209247, + "loss": 2.2363, + "step": 86000 + }, + { + "epoch": 2.43, + "learning_rate": 0.0002759835958128125, + "loss": 2.2692, + "step": 86500 + }, + { + "epoch": 2.44, + "learning_rate": 0.0002758431490047003, + "loss": 2.2882, + "step": 87000 + }, + { + "epoch": 2.46, + "learning_rate": 0.00027570270219658805, + "loss": 2.2471, + "step": 87500 + }, + { + "epoch": 2.47, + "learning_rate": 0.00027556225538847586, + "loss": 2.2483, + "step": 88000 + }, + { + "epoch": 2.48, + "learning_rate": 0.0002754218085803636, + "loss": 2.2677, + "step": 88500 + }, + { + "epoch": 2.5, + "learning_rate": 0.0002752813617722514, + "loss": 2.2497, + "step": 89000 + }, + { + "epoch": 2.51, + "learning_rate": 0.00027514091496413923, + "loss": 2.2268, + "step": 89500 + }, + { + "epoch": 2.53, + "learning_rate": 0.00027500046815602704, + "loss": 2.2396, + "step": 90000 + }, + { + "epoch": 2.54, + "learning_rate": 0.0002748600213479148, + "loss": 2.2197, + "step": 90500 + }, + { + "epoch": 2.55, + "learning_rate": 0.0002747195745398026, + "loss": 2.216, + "step": 91000 + }, + { + "epoch": 2.57, + "learning_rate": 0.0002745791277316904, + "loss": 2.2479, + "step": 91500 + }, + { + "epoch": 2.58, + "learning_rate": 0.0002744386809235782, + "loss": 2.238, + "step": 92000 + }, + { + "epoch": 2.6, + "learning_rate": 0.00027429823411546597, + "loss": 2.2426, + "step": 92500 + }, + { + "epoch": 2.61, + "learning_rate": 0.0002741577873073538, + "loss": 2.2297, + "step": 93000 + }, + { + "epoch": 2.62, + "learning_rate": 0.00027401734049924153, + "loss": 2.183, + "step": 93500 + }, + { + "epoch": 2.64, + "learning_rate": 0.00027387689369112934, + "loss": 2.2415, + "step": 94000 + }, + { + "epoch": 2.65, + "learning_rate": 0.00027373644688301715, + "loss": 2.2219, + "step": 94500 + }, + { + "epoch": 2.67, + "learning_rate": 0.00027359600007490495, + "loss": 2.2549, + "step": 95000 + }, + { + "epoch": 2.68, + "learning_rate": 0.00027345555326679276, + "loss": 2.2516, + "step": 95500 + }, + { + "epoch": 2.69, + "learning_rate": 0.0002733151064586805, + "loss": 2.2183, + "step": 96000 + }, + { + "epoch": 2.71, + "learning_rate": 0.0002731746596505683, + "loss": 2.1943, + "step": 96500 + }, + { + "epoch": 2.72, + "learning_rate": 0.0002730342128424561, + "loss": 2.2435, + "step": 97000 + }, + { + "epoch": 2.74, + "learning_rate": 0.0002728937660343439, + "loss": 2.2291, + "step": 97500 + }, + { + "epoch": 2.75, + "learning_rate": 0.0002727533192262317, + "loss": 2.2371, + "step": 98000 + }, + { + "epoch": 2.76, + "learning_rate": 0.0002726128724181195, + "loss": 2.235, + "step": 98500 + }, + { + "epoch": 2.78, + "learning_rate": 0.0002724724256100073, + "loss": 2.2207, + "step": 99000 + }, + { + "epoch": 2.79, + "learning_rate": 0.00027233197880189506, + "loss": 2.26, + "step": 99500 + }, + { + "epoch": 2.81, + "learning_rate": 0.00027219153199378287, + "loss": 2.2456, + "step": 100000 + }, + { + "epoch": 2.82, + "learning_rate": 0.0002720510851856706, + "loss": 2.251, + "step": 100500 + }, + { + "epoch": 2.83, + "learning_rate": 0.00027191063837755843, + "loss": 2.2112, + "step": 101000 + }, + { + "epoch": 2.85, + "learning_rate": 0.00027177019156944624, + "loss": 2.2283, + "step": 101500 + }, + { + "epoch": 2.86, + "learning_rate": 0.00027162974476133405, + "loss": 2.2472, + "step": 102000 + }, + { + "epoch": 2.88, + "learning_rate": 0.00027148929795322186, + "loss": 2.2258, + "step": 102500 + }, + { + "epoch": 2.89, + "learning_rate": 0.0002713488511451096, + "loss": 2.225, + "step": 103000 + }, + { + "epoch": 2.9, + "learning_rate": 0.0002712084043369974, + "loss": 2.2183, + "step": 103500 + }, + { + "epoch": 2.92, + "learning_rate": 0.0002710679575288852, + "loss": 2.2316, + "step": 104000 + }, + { + "epoch": 2.93, + "learning_rate": 0.000270927510720773, + "loss": 2.2095, + "step": 104500 + }, + { + "epoch": 2.95, + "learning_rate": 0.0002707870639126608, + "loss": 2.2091, + "step": 105000 + }, + { + "epoch": 2.96, + "learning_rate": 0.0002706466171045486, + "loss": 2.2138, + "step": 105500 + }, + { + "epoch": 2.97, + "learning_rate": 0.0002705061702964364, + "loss": 2.2082, + "step": 106000 + }, + { + "epoch": 2.99, + "learning_rate": 0.00027036572348832416, + "loss": 2.2378, + "step": 106500 + }, + { + "epoch": 3.0, + "learning_rate": 0.00027022527668021197, + "loss": 2.211, + "step": 107000 + }, + { + "epoch": 3.02, + "learning_rate": 0.0002700848298720997, + "loss": 2.1808, + "step": 107500 + }, + { + "epoch": 3.03, + "learning_rate": 0.00026994438306398753, + "loss": 2.1297, + "step": 108000 + }, + { + "epoch": 3.04, + "learning_rate": 0.00026980393625587534, + "loss": 2.1099, + "step": 108500 + }, + { + "epoch": 3.06, + "learning_rate": 0.00026966348944776314, + "loss": 2.1498, + "step": 109000 + }, + { + "epoch": 3.07, + "learning_rate": 0.00026952304263965095, + "loss": 2.1412, + "step": 109500 + }, + { + "epoch": 3.09, + "learning_rate": 0.0002693825958315387, + "loss": 2.1386, + "step": 110000 + }, + { + "epoch": 3.1, + "learning_rate": 0.0002692421490234265, + "loss": 2.1541, + "step": 110500 + }, + { + "epoch": 3.11, + "learning_rate": 0.00026910170221531427, + "loss": 2.1623, + "step": 111000 + }, + { + "epoch": 3.13, + "learning_rate": 0.0002689612554072021, + "loss": 2.1315, + "step": 111500 + }, + { + "epoch": 3.14, + "learning_rate": 0.0002688208085990899, + "loss": 2.1556, + "step": 112000 + }, + { + "epoch": 3.16, + "learning_rate": 0.0002686803617909777, + "loss": 2.1764, + "step": 112500 + }, + { + "epoch": 3.17, + "learning_rate": 0.0002685399149828655, + "loss": 2.1254, + "step": 113000 + }, + { + "epoch": 3.19, + "learning_rate": 0.00026839946817475325, + "loss": 2.1201, + "step": 113500 + }, + { + "epoch": 3.2, + "learning_rate": 0.00026825902136664106, + "loss": 2.1488, + "step": 114000 + }, + { + "epoch": 3.21, + "learning_rate": 0.0002681185745585288, + "loss": 2.1311, + "step": 114500 + }, + { + "epoch": 3.23, + "learning_rate": 0.0002679781277504166, + "loss": 2.1196, + "step": 115000 + }, + { + "epoch": 3.24, + "learning_rate": 0.00026783768094230443, + "loss": 2.1513, + "step": 115500 + }, + { + "epoch": 3.26, + "learning_rate": 0.00026769723413419224, + "loss": 2.1543, + "step": 116000 + }, + { + "epoch": 3.27, + "learning_rate": 0.00026755678732608005, + "loss": 2.1223, + "step": 116500 + }, + { + "epoch": 3.28, + "learning_rate": 0.0002674163405179678, + "loss": 2.1536, + "step": 117000 + }, + { + "epoch": 3.3, + "learning_rate": 0.0002672758937098556, + "loss": 2.1219, + "step": 117500 + }, + { + "epoch": 3.31, + "learning_rate": 0.00026713544690174336, + "loss": 2.1435, + "step": 118000 + }, + { + "epoch": 3.33, + "learning_rate": 0.00026699500009363117, + "loss": 2.1299, + "step": 118500 + }, + { + "epoch": 3.34, + "learning_rate": 0.000266854553285519, + "loss": 2.1715, + "step": 119000 + }, + { + "epoch": 3.35, + "learning_rate": 0.0002667141064774068, + "loss": 2.1372, + "step": 119500 + }, + { + "epoch": 3.37, + "learning_rate": 0.0002665736596692946, + "loss": 2.1533, + "step": 120000 + }, + { + "epoch": 3.38, + "learning_rate": 0.00026643321286118235, + "loss": 2.1325, + "step": 120500 + }, + { + "epoch": 3.4, + "learning_rate": 0.00026629276605307016, + "loss": 2.1112, + "step": 121000 + }, + { + "epoch": 3.41, + "learning_rate": 0.0002661523192449579, + "loss": 2.1307, + "step": 121500 + }, + { + "epoch": 3.42, + "learning_rate": 0.0002660118724368457, + "loss": 2.1162, + "step": 122000 + }, + { + "epoch": 3.44, + "learning_rate": 0.00026587142562873353, + "loss": 2.1148, + "step": 122500 + }, + { + "epoch": 3.45, + "learning_rate": 0.00026573097882062134, + "loss": 2.1122, + "step": 123000 + }, + { + "epoch": 3.47, + "learning_rate": 0.0002655905320125091, + "loss": 2.1272, + "step": 123500 + }, + { + "epoch": 3.48, + "learning_rate": 0.0002654500852043969, + "loss": 2.1124, + "step": 124000 + }, + { + "epoch": 3.49, + "learning_rate": 0.0002653096383962847, + "loss": 2.1317, + "step": 124500 + }, + { + "epoch": 3.51, + "learning_rate": 0.00026516919158817246, + "loss": 2.0704, + "step": 125000 + }, + { + "epoch": 3.52, + "learning_rate": 0.00026502874478006027, + "loss": 2.1439, + "step": 125500 + }, + { + "epoch": 3.54, + "learning_rate": 0.0002648882979719481, + "loss": 2.1099, + "step": 126000 + }, + { + "epoch": 3.55, + "learning_rate": 0.00026474785116383583, + "loss": 2.1256, + "step": 126500 + }, + { + "epoch": 3.56, + "learning_rate": 0.00026460740435572364, + "loss": 2.1312, + "step": 127000 + }, + { + "epoch": 3.58, + "learning_rate": 0.00026446695754761145, + "loss": 2.1406, + "step": 127500 + }, + { + "epoch": 3.59, + "learning_rate": 0.00026432651073949925, + "loss": 2.0957, + "step": 128000 + }, + { + "epoch": 3.61, + "learning_rate": 0.000264186063931387, + "loss": 2.1054, + "step": 128500 + }, + { + "epoch": 3.62, + "learning_rate": 0.0002640456171232748, + "loss": 2.1584, + "step": 129000 + }, + { + "epoch": 3.63, + "learning_rate": 0.0002639051703151626, + "loss": 2.119, + "step": 129500 + }, + { + "epoch": 3.65, + "learning_rate": 0.0002637647235070504, + "loss": 2.1035, + "step": 130000 + }, + { + "epoch": 3.66, + "learning_rate": 0.0002636242766989382, + "loss": 2.1215, + "step": 130500 + }, + { + "epoch": 3.68, + "learning_rate": 0.000263483829890826, + "loss": 2.1211, + "step": 131000 + }, + { + "epoch": 3.69, + "learning_rate": 0.0002633433830827138, + "loss": 2.1282, + "step": 131500 + }, + { + "epoch": 3.7, + "learning_rate": 0.00026320293627460155, + "loss": 2.1391, + "step": 132000 + }, + { + "epoch": 3.72, + "learning_rate": 0.00026306248946648936, + "loss": 2.1249, + "step": 132500 + }, + { + "epoch": 3.73, + "learning_rate": 0.00026292204265837717, + "loss": 2.1576, + "step": 133000 + }, + { + "epoch": 3.75, + "learning_rate": 0.0002627815958502649, + "loss": 2.1529, + "step": 133500 + }, + { + "epoch": 3.76, + "learning_rate": 0.00026264114904215273, + "loss": 2.119, + "step": 134000 + }, + { + "epoch": 3.77, + "learning_rate": 0.00026250070223404054, + "loss": 2.1246, + "step": 134500 + }, + { + "epoch": 3.79, + "learning_rate": 0.00026236025542592835, + "loss": 2.1074, + "step": 135000 + }, + { + "epoch": 3.8, + "learning_rate": 0.0002622198086178161, + "loss": 2.1399, + "step": 135500 + }, + { + "epoch": 3.82, + "learning_rate": 0.0002620793618097039, + "loss": 2.1434, + "step": 136000 + }, + { + "epoch": 3.83, + "learning_rate": 0.0002619389150015917, + "loss": 2.1012, + "step": 136500 + }, + { + "epoch": 3.84, + "learning_rate": 0.00026179846819347947, + "loss": 2.103, + "step": 137000 + }, + { + "epoch": 3.86, + "learning_rate": 0.0002616580213853673, + "loss": 2.1803, + "step": 137500 + }, + { + "epoch": 3.87, + "learning_rate": 0.0002615175745772551, + "loss": 2.0987, + "step": 138000 + }, + { + "epoch": 3.89, + "learning_rate": 0.0002613771277691429, + "loss": 2.1171, + "step": 138500 + }, + { + "epoch": 3.9, + "learning_rate": 0.00026123668096103065, + "loss": 2.1175, + "step": 139000 + }, + { + "epoch": 3.91, + "learning_rate": 0.00026109623415291846, + "loss": 2.1562, + "step": 139500 + }, + { + "epoch": 3.93, + "learning_rate": 0.00026095578734480627, + "loss": 2.1321, + "step": 140000 + }, + { + "epoch": 3.94, + "learning_rate": 0.000260815340536694, + "loss": 2.1195, + "step": 140500 + }, + { + "epoch": 3.96, + "learning_rate": 0.00026067489372858183, + "loss": 2.1, + "step": 141000 + }, + { + "epoch": 3.97, + "learning_rate": 0.00026053444692046964, + "loss": 2.1052, + "step": 141500 + }, + { + "epoch": 3.98, + "learning_rate": 0.00026039400011235744, + "loss": 2.0947, + "step": 142000 + }, + { + "epoch": 4.0, + "learning_rate": 0.00026025355330424525, + "loss": 2.1094, + "step": 142500 + }, + { + "epoch": 4.01, + "learning_rate": 0.000260113106496133, + "loss": 2.0432, + "step": 143000 + }, + { + "epoch": 4.03, + "learning_rate": 0.0002599726596880208, + "loss": 2.0555, + "step": 143500 + }, + { + "epoch": 4.04, + "learning_rate": 0.00025983221287990857, + "loss": 2.007, + "step": 144000 + }, + { + "epoch": 4.06, + "learning_rate": 0.0002596917660717964, + "loss": 2.0206, + "step": 144500 + }, + { + "epoch": 4.07, + "learning_rate": 0.0002595513192636842, + "loss": 2.0597, + "step": 145000 + }, + { + "epoch": 4.08, + "learning_rate": 0.000259410872455572, + "loss": 2.0644, + "step": 145500 + }, + { + "epoch": 4.1, + "learning_rate": 0.0002592704256474598, + "loss": 2.0271, + "step": 146000 + }, + { + "epoch": 4.11, + "learning_rate": 0.00025912997883934755, + "loss": 2.0555, + "step": 146500 + }, + { + "epoch": 4.13, + "learning_rate": 0.00025898953203123536, + "loss": 2.0494, + "step": 147000 + }, + { + "epoch": 4.14, + "learning_rate": 0.0002588490852231231, + "loss": 2.0369, + "step": 147500 + }, + { + "epoch": 4.15, + "learning_rate": 0.0002587086384150109, + "loss": 2.0388, + "step": 148000 + }, + { + "epoch": 4.17, + "learning_rate": 0.00025856819160689873, + "loss": 2.0225, + "step": 148500 + }, + { + "epoch": 4.18, + "learning_rate": 0.00025842774479878654, + "loss": 2.0506, + "step": 149000 + }, + { + "epoch": 4.2, + "learning_rate": 0.00025828729799067435, + "loss": 2.028, + "step": 149500 + }, + { + "epoch": 4.21, + "learning_rate": 0.0002581468511825621, + "loss": 2.0405, + "step": 150000 + }, + { + "epoch": 4.22, + "learning_rate": 0.0002580064043744499, + "loss": 2.0818, + "step": 150500 + }, + { + "epoch": 4.24, + "learning_rate": 0.00025786595756633766, + "loss": 2.0557, + "step": 151000 + }, + { + "epoch": 4.25, + "learning_rate": 0.00025772551075822547, + "loss": 2.0426, + "step": 151500 + }, + { + "epoch": 4.27, + "learning_rate": 0.0002575850639501133, + "loss": 2.0414, + "step": 152000 + }, + { + "epoch": 4.28, + "learning_rate": 0.0002574446171420011, + "loss": 2.0591, + "step": 152500 + }, + { + "epoch": 4.29, + "learning_rate": 0.0002573041703338889, + "loss": 2.0769, + "step": 153000 + }, + { + "epoch": 4.31, + "learning_rate": 0.00025716372352577665, + "loss": 2.087, + "step": 153500 + }, + { + "epoch": 4.32, + "learning_rate": 0.00025702327671766446, + "loss": 2.0396, + "step": 154000 + }, + { + "epoch": 4.34, + "learning_rate": 0.0002568828299095522, + "loss": 2.0451, + "step": 154500 + }, + { + "epoch": 4.35, + "learning_rate": 0.00025674238310144, + "loss": 2.0115, + "step": 155000 + }, + { + "epoch": 4.36, + "learning_rate": 0.0002566019362933278, + "loss": 2.0424, + "step": 155500 + }, + { + "epoch": 4.38, + "learning_rate": 0.00025646148948521563, + "loss": 2.0356, + "step": 156000 + }, + { + "epoch": 4.39, + "learning_rate": 0.00025632104267710344, + "loss": 2.0349, + "step": 156500 + }, + { + "epoch": 4.41, + "learning_rate": 0.0002561805958689912, + "loss": 2.0458, + "step": 157000 + }, + { + "epoch": 4.42, + "learning_rate": 0.000256040149060879, + "loss": 2.0379, + "step": 157500 + }, + { + "epoch": 4.43, + "learning_rate": 0.00025589970225276676, + "loss": 2.0478, + "step": 158000 + }, + { + "epoch": 4.45, + "learning_rate": 0.00025575925544465457, + "loss": 2.042, + "step": 158500 + }, + { + "epoch": 4.46, + "learning_rate": 0.0002556188086365424, + "loss": 2.0609, + "step": 159000 + }, + { + "epoch": 4.48, + "learning_rate": 0.0002554783618284302, + "loss": 2.0513, + "step": 159500 + }, + { + "epoch": 4.49, + "learning_rate": 0.00025533791502031794, + "loss": 2.0489, + "step": 160000 + }, + { + "epoch": 4.5, + "learning_rate": 0.00025519746821220574, + "loss": 2.049, + "step": 160500 + }, + { + "epoch": 4.52, + "learning_rate": 0.00025505702140409355, + "loss": 2.0444, + "step": 161000 + }, + { + "epoch": 4.53, + "learning_rate": 0.0002549165745959813, + "loss": 2.0658, + "step": 161500 + }, + { + "epoch": 4.55, + "learning_rate": 0.0002547761277878691, + "loss": 2.0258, + "step": 162000 + }, + { + "epoch": 4.56, + "learning_rate": 0.0002546356809797569, + "loss": 2.0551, + "step": 162500 + }, + { + "epoch": 4.57, + "learning_rate": 0.0002544952341716447, + "loss": 2.0555, + "step": 163000 + }, + { + "epoch": 4.59, + "learning_rate": 0.0002543547873635325, + "loss": 2.0466, + "step": 163500 + }, + { + "epoch": 4.6, + "learning_rate": 0.0002542143405554203, + "loss": 2.0336, + "step": 164000 + }, + { + "epoch": 4.62, + "learning_rate": 0.0002540738937473081, + "loss": 2.0412, + "step": 164500 + }, + { + "epoch": 4.63, + "learning_rate": 0.00025393344693919585, + "loss": 2.0094, + "step": 165000 + }, + { + "epoch": 4.64, + "learning_rate": 0.00025379300013108366, + "loss": 2.0189, + "step": 165500 + }, + { + "epoch": 4.66, + "learning_rate": 0.0002536525533229714, + "loss": 2.0634, + "step": 166000 + }, + { + "epoch": 4.67, + "learning_rate": 0.0002535121065148592, + "loss": 2.0543, + "step": 166500 + }, + { + "epoch": 4.69, + "learning_rate": 0.00025337165970674703, + "loss": 2.0689, + "step": 167000 + }, + { + "epoch": 4.7, + "learning_rate": 0.00025323121289863484, + "loss": 2.0068, + "step": 167500 + }, + { + "epoch": 4.71, + "learning_rate": 0.00025309076609052265, + "loss": 2.0627, + "step": 168000 + }, + { + "epoch": 4.73, + "learning_rate": 0.0002529503192824104, + "loss": 2.0485, + "step": 168500 + }, + { + "epoch": 4.74, + "learning_rate": 0.0002528098724742982, + "loss": 2.0589, + "step": 169000 + }, + { + "epoch": 4.76, + "learning_rate": 0.000252669425666186, + "loss": 2.0407, + "step": 169500 + }, + { + "epoch": 4.77, + "learning_rate": 0.00025252897885807377, + "loss": 2.0687, + "step": 170000 + }, + { + "epoch": 4.78, + "learning_rate": 0.0002523885320499616, + "loss": 2.0451, + "step": 170500 + }, + { + "epoch": 4.8, + "learning_rate": 0.0002522480852418494, + "loss": 2.0376, + "step": 171000 + }, + { + "epoch": 4.81, + "learning_rate": 0.0002521076384337372, + "loss": 2.0302, + "step": 171500 + }, + { + "epoch": 4.83, + "learning_rate": 0.00025196719162562495, + "loss": 2.0364, + "step": 172000 + }, + { + "epoch": 4.84, + "learning_rate": 0.00025182674481751276, + "loss": 2.0172, + "step": 172500 + }, + { + "epoch": 4.85, + "learning_rate": 0.00025168629800940057, + "loss": 2.0511, + "step": 173000 + }, + { + "epoch": 4.87, + "learning_rate": 0.0002515458512012883, + "loss": 2.0463, + "step": 173500 + }, + { + "epoch": 4.88, + "learning_rate": 0.00025140540439317613, + "loss": 2.0661, + "step": 174000 + }, + { + "epoch": 4.9, + "learning_rate": 0.00025126495758506394, + "loss": 2.0165, + "step": 174500 + }, + { + "epoch": 4.91, + "learning_rate": 0.00025112451077695174, + "loss": 2.031, + "step": 175000 + }, + { + "epoch": 4.93, + "learning_rate": 0.0002509840639688395, + "loss": 2.0353, + "step": 175500 + }, + { + "epoch": 4.94, + "learning_rate": 0.0002508436171607273, + "loss": 2.0225, + "step": 176000 + }, + { + "epoch": 4.95, + "learning_rate": 0.0002507031703526151, + "loss": 2.0854, + "step": 176500 + }, + { + "epoch": 4.97, + "learning_rate": 0.00025056272354450287, + "loss": 2.0487, + "step": 177000 + }, + { + "epoch": 4.98, + "learning_rate": 0.0002504222767363907, + "loss": 2.0354, + "step": 177500 + }, + { + "epoch": 5.0, + "learning_rate": 0.0002502818299282785, + "loss": 2.04, + "step": 178000 + }, + { + "epoch": 5.01, + "learning_rate": 0.0002501413831201663, + "loss": 2.0023, + "step": 178500 + }, + { + "epoch": 5.02, + "learning_rate": 0.00025000093631205404, + "loss": 1.9267, + "step": 179000 + }, + { + "epoch": 5.04, + "learning_rate": 0.00024986048950394185, + "loss": 1.9619, + "step": 179500 + }, + { + "epoch": 5.05, + "learning_rate": 0.00024972004269582966, + "loss": 1.9616, + "step": 180000 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002495795958877174, + "loss": 1.9733, + "step": 180500 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002494391490796052, + "loss": 1.9445, + "step": 181000 + }, + { + "epoch": 5.09, + "learning_rate": 0.00024929870227149303, + "loss": 1.9845, + "step": 181500 + }, + { + "epoch": 5.11, + "learning_rate": 0.00024915825546338084, + "loss": 1.9633, + "step": 182000 + }, + { + "epoch": 5.12, + "learning_rate": 0.0002490178086552686, + "loss": 1.9904, + "step": 182500 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002488773618471564, + "loss": 1.9854, + "step": 183000 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002487369150390442, + "loss": 1.952, + "step": 183500 + }, + { + "epoch": 5.16, + "learning_rate": 0.00024859646823093196, + "loss": 2.003, + "step": 184000 + }, + { + "epoch": 5.18, + "learning_rate": 0.00024845602142281977, + "loss": 1.9374, + "step": 184500 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002483155746147076, + "loss": 1.9553, + "step": 185000 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002481751278065954, + "loss": 1.9394, + "step": 185500 + }, + { + "epoch": 5.22, + "learning_rate": 0.00024803468099848314, + "loss": 1.9391, + "step": 186000 + }, + { + "epoch": 5.23, + "learning_rate": 0.00024789423419037095, + "loss": 1.9604, + "step": 186500 + }, + { + "epoch": 5.25, + "learning_rate": 0.00024775378738225876, + "loss": 1.9474, + "step": 187000 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002476133405741465, + "loss": 1.9424, + "step": 187500 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002474728937660343, + "loss": 1.9742, + "step": 188000 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002473324469579221, + "loss": 1.9361, + "step": 188500 + }, + { + "epoch": 5.3, + "learning_rate": 0.00024719200014980993, + "loss": 1.9689, + "step": 189000 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002470515533416977, + "loss": 1.9674, + "step": 189500 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002469111065335855, + "loss": 1.9602, + "step": 190000 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002467706597254733, + "loss": 1.9455, + "step": 190500 + }, + { + "epoch": 5.36, + "learning_rate": 0.00024663021291736106, + "loss": 1.9798, + "step": 191000 + }, + { + "epoch": 5.37, + "learning_rate": 0.00024648976610924887, + "loss": 1.9809, + "step": 191500 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002463493193011367, + "loss": 1.9677, + "step": 192000 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002462088724930245, + "loss": 1.981, + "step": 192500 + }, + { + "epoch": 5.42, + "learning_rate": 0.00024606842568491224, + "loss": 1.9866, + "step": 193000 + }, + { + "epoch": 5.43, + "learning_rate": 0.00024592797887680004, + "loss": 1.9806, + "step": 193500 + }, + { + "epoch": 5.44, + "learning_rate": 0.00024578753206868785, + "loss": 1.9721, + "step": 194000 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002456470852605756, + "loss": 1.9762, + "step": 194500 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002455066384524634, + "loss": 2.0143, + "step": 195000 + }, + { + "epoch": 5.49, + "learning_rate": 0.0002453661916443512, + "loss": 1.9659, + "step": 195500 + }, + { + "epoch": 5.5, + "learning_rate": 0.000245225744836239, + "loss": 1.9384, + "step": 196000 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002450852980281268, + "loss": 1.9937, + "step": 196500 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002449448512200146, + "loss": 1.9358, + "step": 197000 + }, + { + "epoch": 5.54, + "learning_rate": 0.0002448044044119024, + "loss": 1.9805, + "step": 197500 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024466395760379015, + "loss": 1.952, + "step": 198000 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024452351079567796, + "loss": 2.03, + "step": 198500 + }, + { + "epoch": 5.58, + "learning_rate": 0.0002443830639875657, + "loss": 2.0039, + "step": 199000 + }, + { + "epoch": 5.6, + "learning_rate": 0.0002442426171794535, + "loss": 1.9566, + "step": 199500 + }, + { + "epoch": 5.61, + "learning_rate": 0.00024410217037134136, + "loss": 1.9538, + "step": 200000 + }, + { + "epoch": 5.63, + "learning_rate": 0.00024396172356322914, + "loss": 1.9754, + "step": 200500 + }, + { + "epoch": 5.64, + "learning_rate": 0.00024382127675511695, + "loss": 1.977, + "step": 201000 + }, + { + "epoch": 5.65, + "learning_rate": 0.0002436808299470047, + "loss": 1.9565, + "step": 201500 + }, + { + "epoch": 5.67, + "learning_rate": 0.0002435403831388925, + "loss": 1.9479, + "step": 202000 + }, + { + "epoch": 5.68, + "learning_rate": 0.0002433999363307803, + "loss": 2.0019, + "step": 202500 + }, + { + "epoch": 5.7, + "learning_rate": 0.0002432594895226681, + "loss": 1.9587, + "step": 203000 + }, + { + "epoch": 5.71, + "learning_rate": 0.0002431190427145559, + "loss": 1.9554, + "step": 203500 + }, + { + "epoch": 5.72, + "learning_rate": 0.0002429785959064437, + "loss": 1.9694, + "step": 204000 + }, + { + "epoch": 5.74, + "learning_rate": 0.00024283814909833147, + "loss": 1.9901, + "step": 204500 + }, + { + "epoch": 5.75, + "learning_rate": 0.00024269770229021925, + "loss": 1.9503, + "step": 205000 + }, + { + "epoch": 5.77, + "learning_rate": 0.00024255725548210706, + "loss": 1.9554, + "step": 205500 + }, + { + "epoch": 5.78, + "learning_rate": 0.00024241680867399484, + "loss": 1.9525, + "step": 206000 + }, + { + "epoch": 5.79, + "learning_rate": 0.00024227636186588265, + "loss": 1.981, + "step": 206500 + }, + { + "epoch": 5.81, + "learning_rate": 0.00024213591505777045, + "loss": 1.9451, + "step": 207000 + }, + { + "epoch": 5.82, + "learning_rate": 0.0002419954682496582, + "loss": 1.9553, + "step": 207500 + }, + { + "epoch": 5.84, + "learning_rate": 0.00024185502144154602, + "loss": 1.9474, + "step": 208000 + }, + { + "epoch": 5.85, + "learning_rate": 0.0002417145746334338, + "loss": 1.9642, + "step": 208500 + }, + { + "epoch": 5.87, + "learning_rate": 0.0002415741278253216, + "loss": 1.9603, + "step": 209000 + }, + { + "epoch": 5.88, + "learning_rate": 0.00024143368101720939, + "loss": 1.9093, + "step": 209500 + }, + { + "epoch": 5.89, + "learning_rate": 0.0002412932342090972, + "loss": 1.9364, + "step": 210000 + }, + { + "epoch": 5.91, + "learning_rate": 0.000241152787400985, + "loss": 1.9891, + "step": 210500 + }, + { + "epoch": 5.92, + "learning_rate": 0.00024101234059287276, + "loss": 1.9717, + "step": 211000 + }, + { + "epoch": 5.94, + "learning_rate": 0.00024087189378476056, + "loss": 1.9513, + "step": 211500 + }, + { + "epoch": 5.95, + "learning_rate": 0.00024073144697664834, + "loss": 1.9571, + "step": 212000 + }, + { + "epoch": 5.96, + "learning_rate": 0.00024059100016853615, + "loss": 1.9442, + "step": 212500 + }, + { + "epoch": 5.98, + "learning_rate": 0.00024045055336042393, + "loss": 1.9624, + "step": 213000 + }, + { + "epoch": 5.99, + "learning_rate": 0.00024031010655231174, + "loss": 1.9352, + "step": 213500 + }, + { + "epoch": 6.01, + "learning_rate": 0.00024016965974419955, + "loss": 1.9092, + "step": 214000 + }, + { + "epoch": 6.02, + "learning_rate": 0.0002400292129360873, + "loss": 1.8456, + "step": 214500 + }, + { + "epoch": 6.03, + "learning_rate": 0.0002398887661279751, + "loss": 1.8846, + "step": 215000 + }, + { + "epoch": 6.05, + "learning_rate": 0.0002397483193198629, + "loss": 1.8422, + "step": 215500 + }, + { + "epoch": 6.06, + "learning_rate": 0.0002396078725117507, + "loss": 1.8811, + "step": 216000 + }, + { + "epoch": 6.08, + "learning_rate": 0.00023946742570363848, + "loss": 1.8762, + "step": 216500 + }, + { + "epoch": 6.09, + "learning_rate": 0.0002393269788955263, + "loss": 1.8813, + "step": 217000 + }, + { + "epoch": 6.1, + "learning_rate": 0.0002391865320874141, + "loss": 1.8543, + "step": 217500 + }, + { + "epoch": 6.12, + "learning_rate": 0.00023904608527930185, + "loss": 1.8821, + "step": 218000 + }, + { + "epoch": 6.13, + "learning_rate": 0.00023890563847118966, + "loss": 1.8509, + "step": 218500 + }, + { + "epoch": 6.15, + "learning_rate": 0.00023876519166307744, + "loss": 1.8799, + "step": 219000 + }, + { + "epoch": 6.16, + "learning_rate": 0.00023862474485496525, + "loss": 1.8657, + "step": 219500 + }, + { + "epoch": 6.17, + "learning_rate": 0.00023848429804685306, + "loss": 1.8988, + "step": 220000 + }, + { + "epoch": 6.19, + "learning_rate": 0.00023834385123874084, + "loss": 1.8873, + "step": 220500 + }, + { + "epoch": 6.2, + "learning_rate": 0.00023820340443062862, + "loss": 1.8665, + "step": 221000 + }, + { + "epoch": 6.22, + "learning_rate": 0.0002380629576225164, + "loss": 1.8889, + "step": 221500 + }, + { + "epoch": 6.23, + "learning_rate": 0.0002379225108144042, + "loss": 1.8704, + "step": 222000 + }, + { + "epoch": 6.24, + "learning_rate": 0.000237782064006292, + "loss": 1.8989, + "step": 222500 + }, + { + "epoch": 6.26, + "learning_rate": 0.0002376416171981798, + "loss": 1.8646, + "step": 223000 + }, + { + "epoch": 6.27, + "learning_rate": 0.0002375011703900676, + "loss": 1.8908, + "step": 223500 + }, + { + "epoch": 6.29, + "learning_rate": 0.00023736072358195536, + "loss": 1.9154, + "step": 224000 + }, + { + "epoch": 6.3, + "learning_rate": 0.00023722027677384317, + "loss": 1.8759, + "step": 224500 + }, + { + "epoch": 6.31, + "learning_rate": 0.00023707982996573095, + "loss": 1.8696, + "step": 225000 + }, + { + "epoch": 6.33, + "learning_rate": 0.00023693938315761875, + "loss": 1.8735, + "step": 225500 + }, + { + "epoch": 6.34, + "learning_rate": 0.00023679893634950654, + "loss": 1.8781, + "step": 226000 + }, + { + "epoch": 6.36, + "learning_rate": 0.00023665848954139434, + "loss": 1.8739, + "step": 226500 + }, + { + "epoch": 6.37, + "learning_rate": 0.00023651804273328215, + "loss": 1.8942, + "step": 227000 + }, + { + "epoch": 6.38, + "learning_rate": 0.0002363775959251699, + "loss": 1.8897, + "step": 227500 + }, + { + "epoch": 6.4, + "learning_rate": 0.0002362371491170577, + "loss": 1.8956, + "step": 228000 + }, + { + "epoch": 6.41, + "learning_rate": 0.0002360967023089455, + "loss": 1.8849, + "step": 228500 + }, + { + "epoch": 6.43, + "learning_rate": 0.0002359562555008333, + "loss": 1.8823, + "step": 229000 + }, + { + "epoch": 6.44, + "learning_rate": 0.00023581580869272108, + "loss": 1.8674, + "step": 229500 + }, + { + "epoch": 6.45, + "learning_rate": 0.0002356753618846089, + "loss": 1.896, + "step": 230000 + }, + { + "epoch": 6.47, + "learning_rate": 0.0002355349150764967, + "loss": 1.9278, + "step": 230500 + }, + { + "epoch": 6.48, + "learning_rate": 0.00023539446826838445, + "loss": 1.8707, + "step": 231000 + }, + { + "epoch": 6.5, + "learning_rate": 0.00023525402146027226, + "loss": 1.9087, + "step": 231500 + }, + { + "epoch": 6.51, + "learning_rate": 0.00023511357465216004, + "loss": 1.8762, + "step": 232000 + }, + { + "epoch": 6.52, + "learning_rate": 0.00023497312784404785, + "loss": 1.9027, + "step": 232500 + }, + { + "epoch": 6.54, + "learning_rate": 0.00023483268103593563, + "loss": 1.938, + "step": 233000 + }, + { + "epoch": 6.55, + "learning_rate": 0.00023469223422782344, + "loss": 1.9021, + "step": 233500 + }, + { + "epoch": 6.57, + "learning_rate": 0.00023455178741971125, + "loss": 1.9112, + "step": 234000 + }, + { + "epoch": 6.58, + "learning_rate": 0.000234411340611599, + "loss": 1.904, + "step": 234500 + }, + { + "epoch": 6.59, + "learning_rate": 0.0002342708938034868, + "loss": 1.9256, + "step": 235000 + }, + { + "epoch": 6.61, + "learning_rate": 0.0002341304469953746, + "loss": 1.9016, + "step": 235500 + }, + { + "epoch": 6.62, + "learning_rate": 0.0002339900001872624, + "loss": 1.8928, + "step": 236000 + }, + { + "epoch": 6.64, + "learning_rate": 0.00023384955337915018, + "loss": 1.8887, + "step": 236500 + }, + { + "epoch": 6.65, + "learning_rate": 0.00023370910657103799, + "loss": 1.8664, + "step": 237000 + }, + { + "epoch": 6.66, + "learning_rate": 0.00023356865976292577, + "loss": 1.9367, + "step": 237500 + }, + { + "epoch": 6.68, + "learning_rate": 0.00023342821295481355, + "loss": 1.9405, + "step": 238000 + }, + { + "epoch": 6.69, + "learning_rate": 0.00023328776614670136, + "loss": 1.8594, + "step": 238500 + }, + { + "epoch": 6.71, + "learning_rate": 0.00023314731933858914, + "loss": 1.895, + "step": 239000 + }, + { + "epoch": 6.72, + "learning_rate": 0.00023300687253047695, + "loss": 1.9265, + "step": 239500 + }, + { + "epoch": 6.74, + "learning_rate": 0.00023286642572236473, + "loss": 1.8762, + "step": 240000 + }, + { + "epoch": 6.75, + "learning_rate": 0.0002327259789142525, + "loss": 1.9104, + "step": 240500 + }, + { + "epoch": 6.76, + "learning_rate": 0.00023258553210614031, + "loss": 1.8946, + "step": 241000 + }, + { + "epoch": 6.78, + "learning_rate": 0.0002324450852980281, + "loss": 1.9149, + "step": 241500 + }, + { + "epoch": 6.79, + "learning_rate": 0.0002323046384899159, + "loss": 1.9136, + "step": 242000 + }, + { + "epoch": 6.81, + "learning_rate": 0.00023216419168180368, + "loss": 1.88, + "step": 242500 + }, + { + "epoch": 6.82, + "learning_rate": 0.0002320237448736915, + "loss": 1.8988, + "step": 243000 + }, + { + "epoch": 6.83, + "learning_rate": 0.00023188329806557925, + "loss": 1.9184, + "step": 243500 + }, + { + "epoch": 6.85, + "learning_rate": 0.00023174285125746705, + "loss": 1.8672, + "step": 244000 + }, + { + "epoch": 6.86, + "learning_rate": 0.00023160240444935486, + "loss": 1.9284, + "step": 244500 + }, + { + "epoch": 6.88, + "learning_rate": 0.00023146195764124264, + "loss": 1.9108, + "step": 245000 + }, + { + "epoch": 6.89, + "learning_rate": 0.00023132151083313045, + "loss": 1.8983, + "step": 245500 + }, + { + "epoch": 6.9, + "learning_rate": 0.00023118106402501823, + "loss": 1.8783, + "step": 246000 + }, + { + "epoch": 6.92, + "learning_rate": 0.00023104061721690604, + "loss": 1.9412, + "step": 246500 + }, + { + "epoch": 6.93, + "learning_rate": 0.00023090017040879385, + "loss": 1.9064, + "step": 247000 + }, + { + "epoch": 6.95, + "learning_rate": 0.0002307597236006816, + "loss": 1.9071, + "step": 247500 + }, + { + "epoch": 6.96, + "learning_rate": 0.0002306192767925694, + "loss": 1.9568, + "step": 248000 + }, + { + "epoch": 6.97, + "learning_rate": 0.0002304788299844572, + "loss": 1.9261, + "step": 248500 + }, + { + "epoch": 6.99, + "learning_rate": 0.000230338383176345, + "loss": 1.8863, + "step": 249000 + }, + { + "epoch": 7.0, + "learning_rate": 0.00023019793636823278, + "loss": 1.8946, + "step": 249500 + }, + { + "epoch": 7.02, + "learning_rate": 0.0002300574895601206, + "loss": 1.7976, + "step": 250000 + }, + { + "epoch": 7.03, + "learning_rate": 0.0002299170427520084, + "loss": 1.8215, + "step": 250500 + }, + { + "epoch": 7.04, + "learning_rate": 0.00022977659594389615, + "loss": 1.8041, + "step": 251000 + }, + { + "epoch": 7.06, + "learning_rate": 0.00022963614913578396, + "loss": 1.8455, + "step": 251500 + }, + { + "epoch": 7.07, + "learning_rate": 0.00022949570232767174, + "loss": 1.83, + "step": 252000 + }, + { + "epoch": 7.09, + "learning_rate": 0.00022935525551955955, + "loss": 1.8258, + "step": 252500 + }, + { + "epoch": 7.1, + "learning_rate": 0.00022921480871144733, + "loss": 1.7908, + "step": 253000 + }, + { + "epoch": 7.11, + "learning_rate": 0.00022907436190333514, + "loss": 1.8099, + "step": 253500 + }, + { + "epoch": 7.13, + "learning_rate": 0.00022893391509522294, + "loss": 1.8051, + "step": 254000 + }, + { + "epoch": 7.14, + "learning_rate": 0.0002287934682871107, + "loss": 1.8227, + "step": 254500 + }, + { + "epoch": 7.16, + "learning_rate": 0.0002286530214789985, + "loss": 1.8217, + "step": 255000 + }, + { + "epoch": 7.17, + "learning_rate": 0.0002285125746708863, + "loss": 1.818, + "step": 255500 + }, + { + "epoch": 7.18, + "learning_rate": 0.0002283721278627741, + "loss": 1.8631, + "step": 256000 + }, + { + "epoch": 7.2, + "learning_rate": 0.00022823168105466188, + "loss": 1.8189, + "step": 256500 + }, + { + "epoch": 7.21, + "learning_rate": 0.00022809123424654968, + "loss": 1.8172, + "step": 257000 + }, + { + "epoch": 7.23, + "learning_rate": 0.00022795078743843746, + "loss": 1.8375, + "step": 257500 + }, + { + "epoch": 7.24, + "learning_rate": 0.00022781034063032525, + "loss": 1.841, + "step": 258000 + }, + { + "epoch": 7.25, + "learning_rate": 0.00022766989382221305, + "loss": 1.8825, + "step": 258500 + }, + { + "epoch": 7.27, + "learning_rate": 0.00022752944701410083, + "loss": 1.8211, + "step": 259000 + }, + { + "epoch": 7.28, + "learning_rate": 0.00022738900020598864, + "loss": 1.835, + "step": 259500 + }, + { + "epoch": 7.3, + "learning_rate": 0.0002272485533978764, + "loss": 1.809, + "step": 260000 + }, + { + "epoch": 7.31, + "learning_rate": 0.0002271081065897642, + "loss": 1.833, + "step": 260500 + }, + { + "epoch": 7.32, + "learning_rate": 0.000226967659781652, + "loss": 1.8221, + "step": 261000 + }, + { + "epoch": 7.34, + "learning_rate": 0.0002268272129735398, + "loss": 1.842, + "step": 261500 + }, + { + "epoch": 7.35, + "learning_rate": 0.0002266867661654276, + "loss": 1.8132, + "step": 262000 + }, + { + "epoch": 7.37, + "learning_rate": 0.00022654631935731538, + "loss": 1.8774, + "step": 262500 + }, + { + "epoch": 7.38, + "learning_rate": 0.0002264058725492032, + "loss": 1.8435, + "step": 263000 + }, + { + "epoch": 7.39, + "learning_rate": 0.00022626542574109094, + "loss": 1.8275, + "step": 263500 + }, + { + "epoch": 7.41, + "learning_rate": 0.00022612497893297875, + "loss": 1.8354, + "step": 264000 + }, + { + "epoch": 7.42, + "learning_rate": 0.00022598453212486656, + "loss": 1.8332, + "step": 264500 + }, + { + "epoch": 7.44, + "learning_rate": 0.00022584408531675434, + "loss": 1.8744, + "step": 265000 + }, + { + "epoch": 7.45, + "learning_rate": 0.00022570363850864215, + "loss": 1.8174, + "step": 265500 + }, + { + "epoch": 7.46, + "learning_rate": 0.00022556319170052993, + "loss": 1.819, + "step": 266000 + }, + { + "epoch": 7.48, + "learning_rate": 0.00022542274489241774, + "loss": 1.8235, + "step": 266500 + }, + { + "epoch": 7.49, + "learning_rate": 0.0002252822980843055, + "loss": 1.8256, + "step": 267000 + }, + { + "epoch": 7.51, + "learning_rate": 0.0002251418512761933, + "loss": 1.8291, + "step": 267500 + }, + { + "epoch": 7.52, + "learning_rate": 0.0002250014044680811, + "loss": 1.8595, + "step": 268000 + }, + { + "epoch": 7.53, + "learning_rate": 0.0002248609576599689, + "loss": 1.839, + "step": 268500 + }, + { + "epoch": 7.55, + "learning_rate": 0.0002247205108518567, + "loss": 1.8658, + "step": 269000 + }, + { + "epoch": 7.56, + "learning_rate": 0.00022458006404374448, + "loss": 1.8508, + "step": 269500 + }, + { + "epoch": 7.58, + "learning_rate": 0.00022443961723563229, + "loss": 1.8332, + "step": 270000 + }, + { + "epoch": 7.59, + "learning_rate": 0.0002242991704275201, + "loss": 1.8551, + "step": 270500 + }, + { + "epoch": 7.6, + "learning_rate": 0.00022415872361940785, + "loss": 1.8145, + "step": 271000 + }, + { + "epoch": 7.62, + "learning_rate": 0.00022401827681129566, + "loss": 1.8381, + "step": 271500 + }, + { + "epoch": 7.63, + "learning_rate": 0.00022387783000318344, + "loss": 1.8425, + "step": 272000 + }, + { + "epoch": 7.65, + "learning_rate": 0.00022373738319507124, + "loss": 1.8686, + "step": 272500 + }, + { + "epoch": 7.66, + "learning_rate": 0.00022359693638695903, + "loss": 1.8304, + "step": 273000 + }, + { + "epoch": 7.68, + "learning_rate": 0.00022345648957884683, + "loss": 1.8242, + "step": 273500 + }, + { + "epoch": 7.69, + "learning_rate": 0.00022331604277073461, + "loss": 1.8065, + "step": 274000 + }, + { + "epoch": 7.7, + "learning_rate": 0.0002231755959626224, + "loss": 1.847, + "step": 274500 + }, + { + "epoch": 7.72, + "learning_rate": 0.0002230351491545102, + "loss": 1.8359, + "step": 275000 + }, + { + "epoch": 7.73, + "learning_rate": 0.00022289470234639798, + "loss": 1.8148, + "step": 275500 + }, + { + "epoch": 7.75, + "learning_rate": 0.0002227542555382858, + "loss": 1.8743, + "step": 276000 + }, + { + "epoch": 7.76, + "learning_rate": 0.00022261380873017357, + "loss": 1.8471, + "step": 276500 + }, + { + "epoch": 7.77, + "learning_rate": 0.00022247336192206135, + "loss": 1.8661, + "step": 277000 + }, + { + "epoch": 7.79, + "learning_rate": 0.00022233291511394916, + "loss": 1.8514, + "step": 277500 + }, + { + "epoch": 7.8, + "learning_rate": 0.00022219246830583694, + "loss": 1.8565, + "step": 278000 + }, + { + "epoch": 7.82, + "learning_rate": 0.00022205202149772475, + "loss": 1.8542, + "step": 278500 + }, + { + "epoch": 7.83, + "learning_rate": 0.00022191157468961253, + "loss": 1.8716, + "step": 279000 + }, + { + "epoch": 7.84, + "learning_rate": 0.00022177112788150034, + "loss": 1.8332, + "step": 279500 + }, + { + "epoch": 7.86, + "learning_rate": 0.0002216306810733881, + "loss": 1.8361, + "step": 280000 + }, + { + "epoch": 7.87, + "learning_rate": 0.0002214902342652759, + "loss": 1.8131, + "step": 280500 + }, + { + "epoch": 7.89, + "learning_rate": 0.0002213497874571637, + "loss": 1.8443, + "step": 281000 + }, + { + "epoch": 7.9, + "learning_rate": 0.0002212093406490515, + "loss": 1.8881, + "step": 281500 + }, + { + "epoch": 7.91, + "learning_rate": 0.0002210688938409393, + "loss": 1.8587, + "step": 282000 + }, + { + "epoch": 7.93, + "learning_rate": 0.00022092844703282708, + "loss": 1.846, + "step": 282500 + }, + { + "epoch": 7.94, + "learning_rate": 0.0002207880002247149, + "loss": 1.8216, + "step": 283000 + }, + { + "epoch": 7.96, + "learning_rate": 0.00022064755341660264, + "loss": 1.8285, + "step": 283500 + }, + { + "epoch": 7.97, + "learning_rate": 0.00022050710660849045, + "loss": 1.8622, + "step": 284000 + }, + { + "epoch": 7.98, + "learning_rate": 0.00022036665980037826, + "loss": 1.8458, + "step": 284500 + }, + { + "epoch": 8.0, + "learning_rate": 0.00022022621299226604, + "loss": 1.8345, + "step": 285000 + }, + { + "epoch": 8.01, + "learning_rate": 0.00022008576618415385, + "loss": 1.7663, + "step": 285500 + }, + { + "epoch": 8.03, + "learning_rate": 0.00021994531937604163, + "loss": 1.7715, + "step": 286000 + }, + { + "epoch": 8.04, + "learning_rate": 0.00021980487256792944, + "loss": 1.7414, + "step": 286500 + }, + { + "epoch": 8.05, + "learning_rate": 0.0002196644257598172, + "loss": 1.7456, + "step": 287000 + }, + { + "epoch": 8.07, + "learning_rate": 0.000219523978951705, + "loss": 1.7446, + "step": 287500 + }, + { + "epoch": 8.08, + "learning_rate": 0.0002193835321435928, + "loss": 1.76, + "step": 288000 + }, + { + "epoch": 8.1, + "learning_rate": 0.00021924308533548059, + "loss": 1.7493, + "step": 288500 + }, + { + "epoch": 8.11, + "learning_rate": 0.0002191026385273684, + "loss": 1.761, + "step": 289000 + }, + { + "epoch": 8.12, + "learning_rate": 0.00021896219171925617, + "loss": 1.7456, + "step": 289500 + }, + { + "epoch": 8.14, + "learning_rate": 0.00021882174491114398, + "loss": 1.7712, + "step": 290000 + }, + { + "epoch": 8.15, + "learning_rate": 0.00021868129810303174, + "loss": 1.7744, + "step": 290500 + }, + { + "epoch": 8.17, + "learning_rate": 0.00021854085129491954, + "loss": 1.7679, + "step": 291000 + }, + { + "epoch": 8.18, + "learning_rate": 0.00021840040448680735, + "loss": 1.7527, + "step": 291500 + }, + { + "epoch": 8.19, + "learning_rate": 0.00021825995767869513, + "loss": 1.7428, + "step": 292000 + }, + { + "epoch": 8.21, + "learning_rate": 0.00021811951087058294, + "loss": 1.7666, + "step": 292500 + }, + { + "epoch": 8.22, + "learning_rate": 0.00021797906406247072, + "loss": 1.7579, + "step": 293000 + }, + { + "epoch": 8.24, + "learning_rate": 0.0002178386172543585, + "loss": 1.7789, + "step": 293500 + }, + { + "epoch": 8.25, + "learning_rate": 0.00021769817044624628, + "loss": 1.7492, + "step": 294000 + }, + { + "epoch": 8.26, + "learning_rate": 0.0002175577236381341, + "loss": 1.7532, + "step": 294500 + }, + { + "epoch": 8.28, + "learning_rate": 0.0002174172768300219, + "loss": 1.7642, + "step": 295000 + }, + { + "epoch": 8.29, + "learning_rate": 0.00021727683002190968, + "loss": 1.7626, + "step": 295500 + }, + { + "epoch": 8.31, + "learning_rate": 0.0002171363832137975, + "loss": 1.7749, + "step": 296000 + }, + { + "epoch": 8.32, + "learning_rate": 0.00021699593640568524, + "loss": 1.7803, + "step": 296500 + }, + { + "epoch": 8.33, + "learning_rate": 0.00021685548959757305, + "loss": 1.7891, + "step": 297000 + }, + { + "epoch": 8.35, + "learning_rate": 0.00021671504278946086, + "loss": 1.7569, + "step": 297500 + }, + { + "epoch": 8.36, + "learning_rate": 0.00021657459598134864, + "loss": 1.7607, + "step": 298000 + }, + { + "epoch": 8.38, + "learning_rate": 0.00021643414917323645, + "loss": 1.7525, + "step": 298500 + }, + { + "epoch": 8.39, + "learning_rate": 0.00021629370236512423, + "loss": 1.8031, + "step": 299000 + }, + { + "epoch": 8.4, + "learning_rate": 0.00021615325555701204, + "loss": 1.768, + "step": 299500 + }, + { + "epoch": 8.42, + "learning_rate": 0.0002160128087488998, + "loss": 1.7674, + "step": 300000 + }, + { + "epoch": 8.43, + "learning_rate": 0.0002158723619407876, + "loss": 1.7714, + "step": 300500 + }, + { + "epoch": 8.45, + "learning_rate": 0.0002157319151326754, + "loss": 1.7616, + "step": 301000 + }, + { + "epoch": 8.46, + "learning_rate": 0.0002155914683245632, + "loss": 1.8042, + "step": 301500 + }, + { + "epoch": 8.47, + "learning_rate": 0.000215451021516451, + "loss": 1.7405, + "step": 302000 + }, + { + "epoch": 8.49, + "learning_rate": 0.00021531057470833878, + "loss": 1.793, + "step": 302500 + }, + { + "epoch": 8.5, + "learning_rate": 0.00021517012790022658, + "loss": 1.7702, + "step": 303000 + }, + { + "epoch": 8.52, + "learning_rate": 0.00021502968109211434, + "loss": 1.7444, + "step": 303500 + }, + { + "epoch": 8.53, + "learning_rate": 0.00021488923428400215, + "loss": 1.7789, + "step": 304000 + }, + { + "epoch": 8.55, + "learning_rate": 0.00021474878747588995, + "loss": 1.775, + "step": 304500 + }, + { + "epoch": 8.56, + "learning_rate": 0.00021460834066777774, + "loss": 1.7749, + "step": 305000 + }, + { + "epoch": 8.57, + "learning_rate": 0.00021446789385966554, + "loss": 1.7767, + "step": 305500 + }, + { + "epoch": 8.59, + "learning_rate": 0.00021432744705155332, + "loss": 1.7808, + "step": 306000 + }, + { + "epoch": 8.6, + "learning_rate": 0.00021418700024344113, + "loss": 1.7688, + "step": 306500 + }, + { + "epoch": 8.62, + "learning_rate": 0.00021404655343532889, + "loss": 1.7917, + "step": 307000 + }, + { + "epoch": 8.63, + "learning_rate": 0.0002139061066272167, + "loss": 1.7924, + "step": 307500 + }, + { + "epoch": 8.64, + "learning_rate": 0.0002137656598191045, + "loss": 1.7847, + "step": 308000 + }, + { + "epoch": 8.66, + "learning_rate": 0.00021362521301099228, + "loss": 1.7892, + "step": 308500 + }, + { + "epoch": 8.67, + "learning_rate": 0.0002134847662028801, + "loss": 1.7915, + "step": 309000 + }, + { + "epoch": 8.69, + "learning_rate": 0.00021334431939476787, + "loss": 1.7895, + "step": 309500 + }, + { + "epoch": 8.7, + "learning_rate": 0.00021320387258665565, + "loss": 1.7896, + "step": 310000 + }, + { + "epoch": 8.71, + "learning_rate": 0.00021306342577854343, + "loss": 1.785, + "step": 310500 + }, + { + "epoch": 8.73, + "learning_rate": 0.00021292297897043124, + "loss": 1.8127, + "step": 311000 + }, + { + "epoch": 8.74, + "learning_rate": 0.00021278253216231905, + "loss": 1.7889, + "step": 311500 + }, + { + "epoch": 8.76, + "learning_rate": 0.00021264208535420683, + "loss": 1.7924, + "step": 312000 + }, + { + "epoch": 8.77, + "learning_rate": 0.00021250163854609464, + "loss": 1.8108, + "step": 312500 + }, + { + "epoch": 8.78, + "learning_rate": 0.0002123611917379824, + "loss": 1.8106, + "step": 313000 + }, + { + "epoch": 8.8, + "learning_rate": 0.0002122207449298702, + "loss": 1.7852, + "step": 313500 + }, + { + "epoch": 8.81, + "learning_rate": 0.00021208029812175798, + "loss": 1.7817, + "step": 314000 + }, + { + "epoch": 8.83, + "learning_rate": 0.0002119398513136458, + "loss": 1.7985, + "step": 314500 + }, + { + "epoch": 8.84, + "learning_rate": 0.0002117994045055336, + "loss": 1.7904, + "step": 315000 + }, + { + "epoch": 8.85, + "learning_rate": 0.00021165895769742138, + "loss": 1.8134, + "step": 315500 + }, + { + "epoch": 8.87, + "learning_rate": 0.0002115185108893092, + "loss": 1.812, + "step": 316000 + }, + { + "epoch": 8.88, + "learning_rate": 0.00021137806408119694, + "loss": 1.7939, + "step": 316500 + }, + { + "epoch": 8.9, + "learning_rate": 0.00021123761727308475, + "loss": 1.7814, + "step": 317000 + }, + { + "epoch": 8.91, + "learning_rate": 0.00021109717046497253, + "loss": 1.7592, + "step": 317500 + }, + { + "epoch": 8.92, + "learning_rate": 0.00021095672365686034, + "loss": 1.7919, + "step": 318000 + }, + { + "epoch": 8.94, + "learning_rate": 0.00021081627684874815, + "loss": 1.7763, + "step": 318500 + }, + { + "epoch": 8.95, + "learning_rate": 0.00021067583004063593, + "loss": 1.8105, + "step": 319000 + }, + { + "epoch": 8.97, + "learning_rate": 0.00021053538323252373, + "loss": 1.7793, + "step": 319500 + }, + { + "epoch": 8.98, + "learning_rate": 0.0002103949364244115, + "loss": 1.8149, + "step": 320000 + }, + { + "epoch": 8.99, + "learning_rate": 0.0002102544896162993, + "loss": 1.7625, + "step": 320500 + }, + { + "epoch": 9.01, + "learning_rate": 0.0002101140428081871, + "loss": 1.6988, + "step": 321000 + }, + { + "epoch": 9.02, + "learning_rate": 0.00020997359600007489, + "loss": 1.6651, + "step": 321500 + }, + { + "epoch": 9.04, + "learning_rate": 0.0002098331491919627, + "loss": 1.6747, + "step": 322000 + }, + { + "epoch": 9.05, + "learning_rate": 0.00020969270238385047, + "loss": 1.7048, + "step": 322500 + }, + { + "epoch": 9.06, + "learning_rate": 0.00020955225557573828, + "loss": 1.6829, + "step": 323000 + }, + { + "epoch": 9.08, + "learning_rate": 0.00020941180876762604, + "loss": 1.7218, + "step": 323500 + }, + { + "epoch": 9.09, + "learning_rate": 0.00020927136195951384, + "loss": 1.7071, + "step": 324000 + }, + { + "epoch": 9.11, + "learning_rate": 0.00020913091515140165, + "loss": 1.7048, + "step": 324500 + }, + { + "epoch": 9.12, + "learning_rate": 0.00020899046834328943, + "loss": 1.7058, + "step": 325000 + }, + { + "epoch": 9.13, + "learning_rate": 0.00020885002153517724, + "loss": 1.7024, + "step": 325500 + }, + { + "epoch": 9.15, + "learning_rate": 0.00020870957472706502, + "loss": 1.6729, + "step": 326000 + }, + { + "epoch": 9.16, + "learning_rate": 0.00020856912791895283, + "loss": 1.7195, + "step": 326500 + }, + { + "epoch": 9.18, + "learning_rate": 0.00020842868111084058, + "loss": 1.723, + "step": 327000 + }, + { + "epoch": 9.19, + "learning_rate": 0.0002082882343027284, + "loss": 1.7097, + "step": 327500 + }, + { + "epoch": 9.2, + "learning_rate": 0.0002081477874946162, + "loss": 1.7152, + "step": 328000 + }, + { + "epoch": 9.22, + "learning_rate": 0.00020800734068650398, + "loss": 1.7013, + "step": 328500 + }, + { + "epoch": 9.23, + "learning_rate": 0.0002078668938783918, + "loss": 1.6876, + "step": 329000 + }, + { + "epoch": 9.25, + "learning_rate": 0.00020772644707027954, + "loss": 1.6929, + "step": 329500 + }, + { + "epoch": 9.26, + "learning_rate": 0.00020758600026216735, + "loss": 1.7123, + "step": 330000 + }, + { + "epoch": 9.27, + "learning_rate": 0.00020744555345405513, + "loss": 1.6763, + "step": 330500 + }, + { + "epoch": 9.29, + "learning_rate": 0.00020730510664594294, + "loss": 1.7054, + "step": 331000 + }, + { + "epoch": 9.3, + "learning_rate": 0.00020716465983783075, + "loss": 1.7192, + "step": 331500 + }, + { + "epoch": 9.32, + "learning_rate": 0.00020702421302971853, + "loss": 1.7188, + "step": 332000 + }, + { + "epoch": 9.33, + "learning_rate": 0.00020688376622160634, + "loss": 1.7263, + "step": 332500 + }, + { + "epoch": 9.34, + "learning_rate": 0.0002067433194134941, + "loss": 1.7191, + "step": 333000 + }, + { + "epoch": 9.36, + "learning_rate": 0.0002066028726053819, + "loss": 1.7121, + "step": 333500 + }, + { + "epoch": 9.37, + "learning_rate": 0.00020646242579726968, + "loss": 1.7201, + "step": 334000 + }, + { + "epoch": 9.39, + "learning_rate": 0.0002063219789891575, + "loss": 1.7105, + "step": 334500 + }, + { + "epoch": 9.4, + "learning_rate": 0.0002061815321810453, + "loss": 1.6985, + "step": 335000 + }, + { + "epoch": 9.42, + "learning_rate": 0.00020604108537293308, + "loss": 1.7075, + "step": 335500 + }, + { + "epoch": 9.43, + "learning_rate": 0.00020590063856482088, + "loss": 1.6683, + "step": 336000 + }, + { + "epoch": 9.44, + "learning_rate": 0.00020576019175670864, + "loss": 1.7711, + "step": 336500 + }, + { + "epoch": 9.46, + "learning_rate": 0.00020561974494859645, + "loss": 1.7415, + "step": 337000 + }, + { + "epoch": 9.47, + "learning_rate": 0.00020547929814048423, + "loss": 1.7425, + "step": 337500 + }, + { + "epoch": 9.49, + "learning_rate": 0.00020533885133237203, + "loss": 1.7103, + "step": 338000 + }, + { + "epoch": 9.5, + "learning_rate": 0.00020519840452425984, + "loss": 1.7212, + "step": 338500 + }, + { + "epoch": 9.51, + "learning_rate": 0.00020505795771614762, + "loss": 1.7515, + "step": 339000 + }, + { + "epoch": 9.53, + "learning_rate": 0.00020491751090803543, + "loss": 1.7124, + "step": 339500 + }, + { + "epoch": 9.54, + "learning_rate": 0.00020477706409992319, + "loss": 1.7298, + "step": 340000 + }, + { + "epoch": 9.56, + "learning_rate": 0.000204636617291811, + "loss": 1.6947, + "step": 340500 + }, + { + "epoch": 9.57, + "learning_rate": 0.00020449617048369877, + "loss": 1.7412, + "step": 341000 + }, + { + "epoch": 9.58, + "learning_rate": 0.00020435572367558658, + "loss": 1.7487, + "step": 341500 + }, + { + "epoch": 9.6, + "learning_rate": 0.0002042152768674744, + "loss": 1.7391, + "step": 342000 + }, + { + "epoch": 9.61, + "learning_rate": 0.00020407483005936217, + "loss": 1.7039, + "step": 342500 + }, + { + "epoch": 9.63, + "learning_rate": 0.00020393438325124998, + "loss": 1.7487, + "step": 343000 + }, + { + "epoch": 9.64, + "learning_rate": 0.00020379393644313773, + "loss": 1.7047, + "step": 343500 + }, + { + "epoch": 9.65, + "learning_rate": 0.00020365348963502554, + "loss": 1.7137, + "step": 344000 + }, + { + "epoch": 9.67, + "learning_rate": 0.00020351304282691332, + "loss": 1.7259, + "step": 344500 + }, + { + "epoch": 9.68, + "learning_rate": 0.00020337259601880113, + "loss": 1.7112, + "step": 345000 + }, + { + "epoch": 9.7, + "learning_rate": 0.00020323214921068894, + "loss": 1.7499, + "step": 345500 + }, + { + "epoch": 9.71, + "learning_rate": 0.00020309170240257672, + "loss": 1.741, + "step": 346000 + }, + { + "epoch": 9.72, + "learning_rate": 0.0002029512555944645, + "loss": 1.6921, + "step": 346500 + }, + { + "epoch": 9.74, + "learning_rate": 0.00020281080878635228, + "loss": 1.7582, + "step": 347000 + }, + { + "epoch": 9.75, + "learning_rate": 0.0002026703619782401, + "loss": 1.7167, + "step": 347500 + }, + { + "epoch": 9.77, + "learning_rate": 0.0002025299151701279, + "loss": 1.7155, + "step": 348000 + }, + { + "epoch": 9.78, + "learning_rate": 0.00020238946836201568, + "loss": 1.7296, + "step": 348500 + }, + { + "epoch": 9.79, + "learning_rate": 0.00020224902155390349, + "loss": 1.7333, + "step": 349000 + }, + { + "epoch": 9.81, + "learning_rate": 0.00020210857474579124, + "loss": 1.6987, + "step": 349500 + }, + { + "epoch": 9.82, + "learning_rate": 0.00020196812793767905, + "loss": 1.7412, + "step": 350000 + }, + { + "epoch": 9.84, + "learning_rate": 0.00020182768112956683, + "loss": 1.7516, + "step": 350500 + }, + { + "epoch": 9.85, + "learning_rate": 0.00020168723432145464, + "loss": 1.7316, + "step": 351000 + }, + { + "epoch": 9.86, + "learning_rate": 0.00020154678751334244, + "loss": 1.7409, + "step": 351500 + }, + { + "epoch": 9.88, + "learning_rate": 0.00020140634070523023, + "loss": 1.7477, + "step": 352000 + }, + { + "epoch": 9.89, + "learning_rate": 0.00020126589389711803, + "loss": 1.7584, + "step": 352500 + }, + { + "epoch": 9.91, + "learning_rate": 0.0002011254470890058, + "loss": 1.7328, + "step": 353000 + }, + { + "epoch": 9.92, + "learning_rate": 0.0002009850002808936, + "loss": 1.7421, + "step": 353500 + }, + { + "epoch": 9.93, + "learning_rate": 0.00020084455347278138, + "loss": 1.719, + "step": 354000 + }, + { + "epoch": 9.95, + "learning_rate": 0.00020070410666466918, + "loss": 1.7224, + "step": 354500 + }, + { + "epoch": 9.96, + "learning_rate": 0.000200563659856557, + "loss": 1.7464, + "step": 355000 + }, + { + "epoch": 9.98, + "learning_rate": 0.00020042321304844477, + "loss": 1.73, + "step": 355500 + }, + { + "epoch": 9.99, + "learning_rate": 0.00020028276624033258, + "loss": 1.7005, + "step": 356000 + }, + { + "epoch": 10.0, + "learning_rate": 0.00020014231943222034, + "loss": 1.7133, + "step": 356500 + }, + { + "epoch": 10.02, + "learning_rate": 0.00020000187262410814, + "loss": 1.6669, + "step": 357000 + }, + { + "epoch": 10.03, + "learning_rate": 0.00019986142581599592, + "loss": 1.6335, + "step": 357500 + }, + { + "epoch": 10.05, + "learning_rate": 0.00019972097900788373, + "loss": 1.641, + "step": 358000 + }, + { + "epoch": 10.06, + "learning_rate": 0.00019958053219977154, + "loss": 1.6476, + "step": 358500 + }, + { + "epoch": 10.07, + "learning_rate": 0.00019944008539165932, + "loss": 1.6382, + "step": 359000 + }, + { + "epoch": 10.09, + "learning_rate": 0.00019929963858354713, + "loss": 1.6289, + "step": 359500 + }, + { + "epoch": 10.1, + "learning_rate": 0.00019915919177543488, + "loss": 1.6553, + "step": 360000 + }, + { + "epoch": 10.12, + "learning_rate": 0.0001990187449673227, + "loss": 1.6524, + "step": 360500 + }, + { + "epoch": 10.13, + "learning_rate": 0.00019887829815921047, + "loss": 1.6656, + "step": 361000 + }, + { + "epoch": 10.14, + "learning_rate": 0.00019873785135109828, + "loss": 1.6646, + "step": 361500 + }, + { + "epoch": 10.16, + "learning_rate": 0.0001985974045429861, + "loss": 1.6444, + "step": 362000 + }, + { + "epoch": 10.17, + "learning_rate": 0.00019845695773487387, + "loss": 1.6786, + "step": 362500 + }, + { + "epoch": 10.19, + "learning_rate": 0.00019831651092676165, + "loss": 1.6439, + "step": 363000 + }, + { + "epoch": 10.2, + "learning_rate": 0.00019817606411864943, + "loss": 1.6175, + "step": 363500 + }, + { + "epoch": 10.21, + "learning_rate": 0.00019803561731053724, + "loss": 1.6708, + "step": 364000 + }, + { + "epoch": 10.23, + "learning_rate": 0.00019789517050242502, + "loss": 1.6821, + "step": 364500 + }, + { + "epoch": 10.24, + "learning_rate": 0.00019775472369431283, + "loss": 1.6408, + "step": 365000 + }, + { + "epoch": 10.26, + "learning_rate": 0.00019761427688620064, + "loss": 1.6807, + "step": 365500 + }, + { + "epoch": 10.27, + "learning_rate": 0.0001974738300780884, + "loss": 1.6527, + "step": 366000 + }, + { + "epoch": 10.28, + "learning_rate": 0.0001973333832699762, + "loss": 1.6521, + "step": 366500 + }, + { + "epoch": 10.3, + "learning_rate": 0.00019719293646186398, + "loss": 1.6517, + "step": 367000 + }, + { + "epoch": 10.31, + "learning_rate": 0.00019705248965375179, + "loss": 1.6752, + "step": 367500 + }, + { + "epoch": 10.33, + "learning_rate": 0.00019691204284563957, + "loss": 1.6419, + "step": 368000 + }, + { + "epoch": 10.34, + "learning_rate": 0.00019677159603752738, + "loss": 1.6271, + "step": 368500 + }, + { + "epoch": 10.36, + "learning_rate": 0.00019663114922941518, + "loss": 1.6934, + "step": 369000 + }, + { + "epoch": 10.37, + "learning_rate": 0.00019649070242130294, + "loss": 1.6474, + "step": 369500 + }, + { + "epoch": 10.38, + "learning_rate": 0.00019635025561319075, + "loss": 1.6374, + "step": 370000 + }, + { + "epoch": 10.4, + "learning_rate": 0.00019620980880507853, + "loss": 1.6623, + "step": 370500 + }, + { + "epoch": 10.41, + "learning_rate": 0.00019606936199696633, + "loss": 1.6459, + "step": 371000 + }, + { + "epoch": 10.43, + "learning_rate": 0.00019592891518885412, + "loss": 1.6437, + "step": 371500 + }, + { + "epoch": 10.44, + "learning_rate": 0.00019578846838074192, + "loss": 1.6553, + "step": 372000 + }, + { + "epoch": 10.45, + "learning_rate": 0.00019564802157262973, + "loss": 1.655, + "step": 372500 + }, + { + "epoch": 10.47, + "learning_rate": 0.00019550757476451748, + "loss": 1.6601, + "step": 373000 + }, + { + "epoch": 10.48, + "learning_rate": 0.0001953671279564053, + "loss": 1.6885, + "step": 373500 + }, + { + "epoch": 10.5, + "learning_rate": 0.00019522668114829307, + "loss": 1.6711, + "step": 374000 + }, + { + "epoch": 10.51, + "learning_rate": 0.00019508623434018088, + "loss": 1.6831, + "step": 374500 + }, + { + "epoch": 10.52, + "learning_rate": 0.0001949457875320687, + "loss": 1.6198, + "step": 375000 + }, + { + "epoch": 10.54, + "learning_rate": 0.00019480534072395647, + "loss": 1.6693, + "step": 375500 + }, + { + "epoch": 10.55, + "learning_rate": 0.00019466489391584428, + "loss": 1.651, + "step": 376000 + }, + { + "epoch": 10.57, + "learning_rate": 0.00019452444710773203, + "loss": 1.6937, + "step": 376500 + }, + { + "epoch": 10.58, + "learning_rate": 0.00019438400029961984, + "loss": 1.6631, + "step": 377000 + }, + { + "epoch": 10.59, + "learning_rate": 0.00019424355349150762, + "loss": 1.6963, + "step": 377500 + }, + { + "epoch": 10.61, + "learning_rate": 0.00019410310668339543, + "loss": 1.6647, + "step": 378000 + }, + { + "epoch": 10.62, + "learning_rate": 0.00019396265987528324, + "loss": 1.6662, + "step": 378500 + }, + { + "epoch": 10.64, + "learning_rate": 0.00019382221306717102, + "loss": 1.68, + "step": 379000 + }, + { + "epoch": 10.65, + "learning_rate": 0.0001936817662590588, + "loss": 1.6921, + "step": 379500 + }, + { + "epoch": 10.66, + "learning_rate": 0.00019354131945094658, + "loss": 1.6737, + "step": 380000 + }, + { + "epoch": 10.68, + "learning_rate": 0.0001934008726428344, + "loss": 1.6782, + "step": 380500 + }, + { + "epoch": 10.69, + "learning_rate": 0.00019326042583472217, + "loss": 1.667, + "step": 381000 + }, + { + "epoch": 10.71, + "learning_rate": 0.00019311997902660998, + "loss": 1.6799, + "step": 381500 + }, + { + "epoch": 10.72, + "learning_rate": 0.00019297953221849779, + "loss": 1.6438, + "step": 382000 + }, + { + "epoch": 10.73, + "learning_rate": 0.00019283908541038554, + "loss": 1.6626, + "step": 382500 + }, + { + "epoch": 10.75, + "learning_rate": 0.00019269863860227335, + "loss": 1.6698, + "step": 383000 + }, + { + "epoch": 10.76, + "learning_rate": 0.00019255819179416113, + "loss": 1.6847, + "step": 383500 + }, + { + "epoch": 10.78, + "learning_rate": 0.00019241774498604894, + "loss": 1.725, + "step": 384000 + }, + { + "epoch": 10.79, + "learning_rate": 0.00019227729817793672, + "loss": 1.6462, + "step": 384500 + }, + { + "epoch": 10.8, + "learning_rate": 0.00019213685136982452, + "loss": 1.6951, + "step": 385000 + }, + { + "epoch": 10.82, + "learning_rate": 0.00019199640456171233, + "loss": 1.6711, + "step": 385500 + }, + { + "epoch": 10.83, + "learning_rate": 0.0001918559577536001, + "loss": 1.6783, + "step": 386000 + }, + { + "epoch": 10.85, + "learning_rate": 0.0001917155109454879, + "loss": 1.6566, + "step": 386500 + }, + { + "epoch": 10.86, + "learning_rate": 0.00019157506413737568, + "loss": 1.6606, + "step": 387000 + }, + { + "epoch": 10.87, + "learning_rate": 0.00019143461732926348, + "loss": 1.6585, + "step": 387500 + }, + { + "epoch": 10.89, + "learning_rate": 0.00019129417052115126, + "loss": 1.7094, + "step": 388000 + }, + { + "epoch": 10.9, + "learning_rate": 0.00019115372371303907, + "loss": 1.638, + "step": 388500 + }, + { + "epoch": 10.92, + "learning_rate": 0.00019101327690492688, + "loss": 1.6686, + "step": 389000 + }, + { + "epoch": 10.93, + "learning_rate": 0.00019087283009681463, + "loss": 1.6696, + "step": 389500 + }, + { + "epoch": 10.94, + "learning_rate": 0.00019073238328870244, + "loss": 1.6516, + "step": 390000 + }, + { + "epoch": 10.96, + "learning_rate": 0.00019059193648059022, + "loss": 1.6625, + "step": 390500 + }, + { + "epoch": 10.97, + "learning_rate": 0.00019045148967247803, + "loss": 1.6865, + "step": 391000 + }, + { + "epoch": 10.99, + "learning_rate": 0.0001903110428643658, + "loss": 1.6903, + "step": 391500 + }, + { + "epoch": 11.0, + "learning_rate": 0.00019017059605625362, + "loss": 1.6686, + "step": 392000 + }, + { + "epoch": 11.01, + "learning_rate": 0.00019003014924814143, + "loss": 1.5473, + "step": 392500 + }, + { + "epoch": 11.03, + "learning_rate": 0.00018988970244002918, + "loss": 1.5807, + "step": 393000 + }, + { + "epoch": 11.04, + "learning_rate": 0.000189749255631917, + "loss": 1.5696, + "step": 393500 + }, + { + "epoch": 11.06, + "learning_rate": 0.00018960880882380477, + "loss": 1.5851, + "step": 394000 + }, + { + "epoch": 11.07, + "learning_rate": 0.00018946836201569258, + "loss": 1.5706, + "step": 394500 + }, + { + "epoch": 11.08, + "learning_rate": 0.00018932791520758036, + "loss": 1.5829, + "step": 395000 + }, + { + "epoch": 11.1, + "learning_rate": 0.00018918746839946817, + "loss": 1.6146, + "step": 395500 + }, + { + "epoch": 11.11, + "learning_rate": 0.00018904702159135598, + "loss": 1.6072, + "step": 396000 + }, + { + "epoch": 11.13, + "learning_rate": 0.00018890657478324373, + "loss": 1.587, + "step": 396500 + }, + { + "epoch": 11.14, + "learning_rate": 0.00018876612797513154, + "loss": 1.6045, + "step": 397000 + }, + { + "epoch": 11.15, + "learning_rate": 0.00018862568116701932, + "loss": 1.5912, + "step": 397500 + }, + { + "epoch": 11.17, + "learning_rate": 0.00018848523435890713, + "loss": 1.6025, + "step": 398000 + }, + { + "epoch": 11.18, + "learning_rate": 0.00018834478755079493, + "loss": 1.568, + "step": 398500 + }, + { + "epoch": 11.2, + "learning_rate": 0.0001882043407426827, + "loss": 1.5908, + "step": 399000 + }, + { + "epoch": 11.21, + "learning_rate": 0.0001880638939345705, + "loss": 1.5785, + "step": 399500 + }, + { + "epoch": 11.23, + "learning_rate": 0.00018792344712645828, + "loss": 1.5831, + "step": 400000 + }, + { + "epoch": 11.24, + "learning_rate": 0.00018778300031834609, + "loss": 1.6117, + "step": 400500 + }, + { + "epoch": 11.25, + "learning_rate": 0.00018764255351023387, + "loss": 1.5902, + "step": 401000 + }, + { + "epoch": 11.27, + "learning_rate": 0.00018750210670212167, + "loss": 1.6121, + "step": 401500 + }, + { + "epoch": 11.28, + "learning_rate": 0.00018736165989400948, + "loss": 1.619, + "step": 402000 + }, + { + "epoch": 11.3, + "learning_rate": 0.00018722121308589724, + "loss": 1.5974, + "step": 402500 + }, + { + "epoch": 11.31, + "learning_rate": 0.00018708076627778504, + "loss": 1.6051, + "step": 403000 + }, + { + "epoch": 11.32, + "learning_rate": 0.00018694031946967283, + "loss": 1.6172, + "step": 403500 + }, + { + "epoch": 11.34, + "learning_rate": 0.00018679987266156063, + "loss": 1.603, + "step": 404000 + }, + { + "epoch": 11.35, + "learning_rate": 0.00018665942585344841, + "loss": 1.6111, + "step": 404500 + }, + { + "epoch": 11.37, + "learning_rate": 0.00018651897904533622, + "loss": 1.5969, + "step": 405000 + }, + { + "epoch": 11.38, + "learning_rate": 0.00018637853223722403, + "loss": 1.6005, + "step": 405500 + }, + { + "epoch": 11.39, + "learning_rate": 0.00018623808542911178, + "loss": 1.6083, + "step": 406000 + }, + { + "epoch": 11.41, + "learning_rate": 0.0001860976386209996, + "loss": 1.6204, + "step": 406500 + }, + { + "epoch": 11.42, + "learning_rate": 0.00018595719181288737, + "loss": 1.6001, + "step": 407000 + }, + { + "epoch": 11.44, + "learning_rate": 0.00018581674500477518, + "loss": 1.5974, + "step": 407500 + }, + { + "epoch": 11.45, + "learning_rate": 0.00018567629819666296, + "loss": 1.6192, + "step": 408000 + }, + { + "epoch": 11.46, + "learning_rate": 0.00018553585138855077, + "loss": 1.6349, + "step": 408500 + }, + { + "epoch": 11.48, + "learning_rate": 0.00018539540458043858, + "loss": 1.6468, + "step": 409000 + }, + { + "epoch": 11.49, + "learning_rate": 0.00018525495777232633, + "loss": 1.5827, + "step": 409500 + }, + { + "epoch": 11.51, + "learning_rate": 0.00018511451096421414, + "loss": 1.5988, + "step": 410000 + }, + { + "epoch": 11.52, + "learning_rate": 0.00018497406415610192, + "loss": 1.6065, + "step": 410500 + }, + { + "epoch": 11.53, + "learning_rate": 0.00018483361734798973, + "loss": 1.628, + "step": 411000 + }, + { + "epoch": 11.55, + "learning_rate": 0.0001846931705398775, + "loss": 1.6265, + "step": 411500 + }, + { + "epoch": 11.56, + "learning_rate": 0.00018455272373176532, + "loss": 1.5977, + "step": 412000 + }, + { + "epoch": 11.58, + "learning_rate": 0.00018441227692365313, + "loss": 1.5836, + "step": 412500 + }, + { + "epoch": 11.59, + "learning_rate": 0.00018427183011554088, + "loss": 1.5906, + "step": 413000 + }, + { + "epoch": 11.6, + "learning_rate": 0.0001841313833074287, + "loss": 1.6158, + "step": 413500 + }, + { + "epoch": 11.62, + "learning_rate": 0.00018399093649931647, + "loss": 1.6166, + "step": 414000 + }, + { + "epoch": 11.63, + "learning_rate": 0.00018385048969120428, + "loss": 1.6381, + "step": 414500 + }, + { + "epoch": 11.65, + "learning_rate": 0.00018371004288309206, + "loss": 1.646, + "step": 415000 + }, + { + "epoch": 11.66, + "learning_rate": 0.00018356959607497987, + "loss": 1.6012, + "step": 415500 + }, + { + "epoch": 11.67, + "learning_rate": 0.00018342914926686765, + "loss": 1.6277, + "step": 416000 + }, + { + "epoch": 11.69, + "learning_rate": 0.00018328870245875543, + "loss": 1.6731, + "step": 416500 + }, + { + "epoch": 11.7, + "learning_rate": 0.00018314825565064324, + "loss": 1.6306, + "step": 417000 + }, + { + "epoch": 11.72, + "learning_rate": 0.00018300780884253102, + "loss": 1.6024, + "step": 417500 + }, + { + "epoch": 11.73, + "learning_rate": 0.00018286736203441882, + "loss": 1.6096, + "step": 418000 + }, + { + "epoch": 11.74, + "learning_rate": 0.0001827269152263066, + "loss": 1.6349, + "step": 418500 + }, + { + "epoch": 11.76, + "learning_rate": 0.00018258646841819439, + "loss": 1.611, + "step": 419000 + }, + { + "epoch": 11.77, + "learning_rate": 0.0001824460216100822, + "loss": 1.6175, + "step": 419500 + }, + { + "epoch": 11.79, + "learning_rate": 0.00018230557480196997, + "loss": 1.6383, + "step": 420000 + }, + { + "epoch": 11.8, + "learning_rate": 0.00018216512799385778, + "loss": 1.6082, + "step": 420500 + }, + { + "epoch": 11.81, + "learning_rate": 0.00018202468118574556, + "loss": 1.6501, + "step": 421000 + }, + { + "epoch": 11.83, + "learning_rate": 0.00018188423437763337, + "loss": 1.6195, + "step": 421500 + }, + { + "epoch": 11.84, + "learning_rate": 0.00018174378756952113, + "loss": 1.6106, + "step": 422000 + }, + { + "epoch": 11.86, + "learning_rate": 0.00018160334076140893, + "loss": 1.5999, + "step": 422500 + }, + { + "epoch": 11.87, + "learning_rate": 0.00018146289395329674, + "loss": 1.6126, + "step": 423000 + }, + { + "epoch": 11.88, + "learning_rate": 0.00018132244714518452, + "loss": 1.6388, + "step": 423500 + }, + { + "epoch": 11.9, + "learning_rate": 0.00018118200033707233, + "loss": 1.586, + "step": 424000 + }, + { + "epoch": 11.91, + "learning_rate": 0.0001810415535289601, + "loss": 1.6462, + "step": 424500 + }, + { + "epoch": 11.93, + "learning_rate": 0.00018090110672084792, + "loss": 1.6112, + "step": 425000 + }, + { + "epoch": 11.94, + "learning_rate": 0.00018076065991273573, + "loss": 1.5967, + "step": 425500 + }, + { + "epoch": 11.95, + "learning_rate": 0.00018062021310462348, + "loss": 1.6225, + "step": 426000 + }, + { + "epoch": 11.97, + "learning_rate": 0.0001804797662965113, + "loss": 1.6086, + "step": 426500 + }, + { + "epoch": 11.98, + "learning_rate": 0.00018033931948839907, + "loss": 1.6326, + "step": 427000 + }, + { + "epoch": 12.0, + "learning_rate": 0.00018019887268028688, + "loss": 1.6163, + "step": 427500 + }, + { + "epoch": 12.01, + "learning_rate": 0.00018005842587217466, + "loss": 1.5382, + "step": 428000 + }, + { + "epoch": 12.02, + "learning_rate": 0.00017991797906406247, + "loss": 1.5531, + "step": 428500 + }, + { + "epoch": 12.04, + "learning_rate": 0.00017977753225595028, + "loss": 1.5373, + "step": 429000 + }, + { + "epoch": 12.05, + "learning_rate": 0.00017963708544783803, + "loss": 1.5424, + "step": 429500 + }, + { + "epoch": 12.07, + "learning_rate": 0.00017949663863972584, + "loss": 1.5394, + "step": 430000 + }, + { + "epoch": 12.08, + "learning_rate": 0.00017935619183161362, + "loss": 1.5331, + "step": 430500 + }, + { + "epoch": 12.1, + "learning_rate": 0.00017921574502350143, + "loss": 1.5342, + "step": 431000 + }, + { + "epoch": 12.11, + "learning_rate": 0.0001790752982153892, + "loss": 1.524, + "step": 431500 + }, + { + "epoch": 12.12, + "learning_rate": 0.00017893485140727702, + "loss": 1.5285, + "step": 432000 + }, + { + "epoch": 12.14, + "learning_rate": 0.0001787944045991648, + "loss": 1.5422, + "step": 432500 + }, + { + "epoch": 12.15, + "learning_rate": 0.00017865395779105258, + "loss": 1.5073, + "step": 433000 + }, + { + "epoch": 12.17, + "learning_rate": 0.00017851351098294038, + "loss": 1.5426, + "step": 433500 + }, + { + "epoch": 12.18, + "learning_rate": 0.00017837306417482817, + "loss": 1.5532, + "step": 434000 + }, + { + "epoch": 12.19, + "learning_rate": 0.00017823261736671597, + "loss": 1.5142, + "step": 434500 + }, + { + "epoch": 12.21, + "learning_rate": 0.00017809217055860375, + "loss": 1.5332, + "step": 435000 + }, + { + "epoch": 12.22, + "learning_rate": 0.00017795172375049154, + "loss": 1.5548, + "step": 435500 + }, + { + "epoch": 12.24, + "learning_rate": 0.00017781127694237934, + "loss": 1.5717, + "step": 436000 + }, + { + "epoch": 12.25, + "learning_rate": 0.00017767083013426712, + "loss": 1.5633, + "step": 436500 + }, + { + "epoch": 12.26, + "learning_rate": 0.00017753038332615493, + "loss": 1.5532, + "step": 437000 + }, + { + "epoch": 12.28, + "learning_rate": 0.00017738993651804271, + "loss": 1.562, + "step": 437500 + }, + { + "epoch": 12.29, + "learning_rate": 0.00017724948970993052, + "loss": 1.5514, + "step": 438000 + }, + { + "epoch": 12.31, + "learning_rate": 0.00017710904290181828, + "loss": 1.5566, + "step": 438500 + }, + { + "epoch": 12.32, + "learning_rate": 0.00017696859609370608, + "loss": 1.5624, + "step": 439000 + }, + { + "epoch": 12.33, + "learning_rate": 0.0001768281492855939, + "loss": 1.51, + "step": 439500 + }, + { + "epoch": 12.35, + "learning_rate": 0.00017668770247748167, + "loss": 1.5532, + "step": 440000 + }, + { + "epoch": 12.36, + "learning_rate": 0.00017654725566936948, + "loss": 1.5621, + "step": 440500 + }, + { + "epoch": 12.38, + "learning_rate": 0.00017640680886125726, + "loss": 1.5415, + "step": 441000 + }, + { + "epoch": 12.39, + "learning_rate": 0.00017626636205314507, + "loss": 1.5547, + "step": 441500 + }, + { + "epoch": 12.4, + "learning_rate": 0.00017612591524503282, + "loss": 1.5488, + "step": 442000 + }, + { + "epoch": 12.42, + "learning_rate": 0.00017598546843692063, + "loss": 1.5488, + "step": 442500 + }, + { + "epoch": 12.43, + "learning_rate": 0.00017584502162880844, + "loss": 1.5959, + "step": 443000 + }, + { + "epoch": 12.45, + "learning_rate": 0.00017570457482069622, + "loss": 1.5253, + "step": 443500 + }, + { + "epoch": 12.46, + "learning_rate": 0.00017556412801258403, + "loss": 1.5767, + "step": 444000 + }, + { + "epoch": 12.47, + "learning_rate": 0.0001754236812044718, + "loss": 1.557, + "step": 444500 + }, + { + "epoch": 12.49, + "learning_rate": 0.00017528323439635962, + "loss": 1.5658, + "step": 445000 + }, + { + "epoch": 12.5, + "learning_rate": 0.00017514278758824737, + "loss": 1.5549, + "step": 445500 + }, + { + "epoch": 12.52, + "learning_rate": 0.00017500234078013518, + "loss": 1.555, + "step": 446000 + }, + { + "epoch": 12.53, + "learning_rate": 0.000174861893972023, + "loss": 1.5539, + "step": 446500 + }, + { + "epoch": 12.54, + "learning_rate": 0.00017472144716391077, + "loss": 1.5777, + "step": 447000 + }, + { + "epoch": 12.56, + "learning_rate": 0.00017458100035579858, + "loss": 1.543, + "step": 447500 + }, + { + "epoch": 12.57, + "learning_rate": 0.00017444055354768636, + "loss": 1.5948, + "step": 448000 + }, + { + "epoch": 12.59, + "learning_rate": 0.00017430010673957416, + "loss": 1.5819, + "step": 448500 + }, + { + "epoch": 12.6, + "learning_rate": 0.00017415965993146195, + "loss": 1.5539, + "step": 449000 + }, + { + "epoch": 12.61, + "learning_rate": 0.00017401921312334973, + "loss": 1.566, + "step": 449500 + }, + { + "epoch": 12.63, + "learning_rate": 0.00017387876631523753, + "loss": 1.5644, + "step": 450000 + }, + { + "epoch": 12.64, + "learning_rate": 0.00017373831950712532, + "loss": 1.556, + "step": 450500 + }, + { + "epoch": 12.66, + "learning_rate": 0.00017359787269901312, + "loss": 1.5502, + "step": 451000 + }, + { + "epoch": 12.67, + "learning_rate": 0.0001734574258909009, + "loss": 1.581, + "step": 451500 + }, + { + "epoch": 12.68, + "learning_rate": 0.00017331697908278869, + "loss": 1.5689, + "step": 452000 + }, + { + "epoch": 12.7, + "learning_rate": 0.0001731765322746765, + "loss": 1.594, + "step": 452500 + }, + { + "epoch": 12.71, + "learning_rate": 0.00017303608546656427, + "loss": 1.5912, + "step": 453000 + }, + { + "epoch": 12.73, + "learning_rate": 0.00017289563865845208, + "loss": 1.5534, + "step": 453500 + }, + { + "epoch": 12.74, + "learning_rate": 0.00017275519185033986, + "loss": 1.5707, + "step": 454000 + }, + { + "epoch": 12.75, + "learning_rate": 0.00017261474504222767, + "loss": 1.5652, + "step": 454500 + }, + { + "epoch": 12.77, + "learning_rate": 0.00017247429823411543, + "loss": 1.5836, + "step": 455000 + }, + { + "epoch": 12.78, + "learning_rate": 0.00017233385142600323, + "loss": 1.5521, + "step": 455500 + }, + { + "epoch": 12.8, + "learning_rate": 0.00017219340461789104, + "loss": 1.5621, + "step": 456000 + }, + { + "epoch": 12.81, + "learning_rate": 0.00017205295780977882, + "loss": 1.5712, + "step": 456500 + }, + { + "epoch": 12.82, + "learning_rate": 0.00017191251100166663, + "loss": 1.5442, + "step": 457000 + }, + { + "epoch": 12.84, + "learning_rate": 0.0001717720641935544, + "loss": 1.5823, + "step": 457500 + }, + { + "epoch": 12.85, + "learning_rate": 0.00017163161738544222, + "loss": 1.5797, + "step": 458000 + }, + { + "epoch": 12.87, + "learning_rate": 0.00017149117057732997, + "loss": 1.5101, + "step": 458500 + }, + { + "epoch": 12.88, + "learning_rate": 0.00017135072376921778, + "loss": 1.5733, + "step": 459000 + }, + { + "epoch": 12.89, + "learning_rate": 0.0001712102769611056, + "loss": 1.5874, + "step": 459500 + }, + { + "epoch": 12.91, + "learning_rate": 0.00017106983015299337, + "loss": 1.5539, + "step": 460000 + }, + { + "epoch": 12.92, + "learning_rate": 0.00017092938334488118, + "loss": 1.5474, + "step": 460500 + }, + { + "epoch": 12.94, + "learning_rate": 0.00017078893653676896, + "loss": 1.5657, + "step": 461000 + }, + { + "epoch": 12.95, + "learning_rate": 0.00017064848972865677, + "loss": 1.5742, + "step": 461500 + }, + { + "epoch": 12.96, + "learning_rate": 0.00017050804292054452, + "loss": 1.5759, + "step": 462000 + }, + { + "epoch": 12.98, + "learning_rate": 0.00017036759611243233, + "loss": 1.5867, + "step": 462500 + }, + { + "epoch": 12.99, + "learning_rate": 0.00017022714930432014, + "loss": 1.5741, + "step": 463000 + }, + { + "epoch": 13.01, + "learning_rate": 0.00017008670249620792, + "loss": 1.5301, + "step": 463500 + }, + { + "epoch": 13.02, + "learning_rate": 0.00016994625568809573, + "loss": 1.4812, + "step": 464000 + }, + { + "epoch": 13.04, + "learning_rate": 0.0001698058088799835, + "loss": 1.4959, + "step": 464500 + }, + { + "epoch": 13.05, + "learning_rate": 0.00016966536207187131, + "loss": 1.4608, + "step": 465000 + }, + { + "epoch": 13.06, + "learning_rate": 0.00016952491526375907, + "loss": 1.5079, + "step": 465500 + }, + { + "epoch": 13.08, + "learning_rate": 0.00016938446845564688, + "loss": 1.4575, + "step": 466000 + }, + { + "epoch": 13.09, + "learning_rate": 0.00016924402164753468, + "loss": 1.4994, + "step": 466500 + }, + { + "epoch": 13.11, + "learning_rate": 0.00016910357483942247, + "loss": 1.5024, + "step": 467000 + }, + { + "epoch": 13.12, + "learning_rate": 0.00016896312803131027, + "loss": 1.4771, + "step": 467500 + }, + { + "epoch": 13.13, + "learning_rate": 0.00016882268122319805, + "loss": 1.4988, + "step": 468000 + }, + { + "epoch": 13.15, + "learning_rate": 0.00016868223441508583, + "loss": 1.495, + "step": 468500 + }, + { + "epoch": 13.16, + "learning_rate": 0.00016854178760697362, + "loss": 1.4725, + "step": 469000 + }, + { + "epoch": 13.18, + "learning_rate": 0.00016840134079886142, + "loss": 1.4715, + "step": 469500 + }, + { + "epoch": 13.19, + "learning_rate": 0.00016826089399074923, + "loss": 1.5111, + "step": 470000 + }, + { + "epoch": 13.2, + "learning_rate": 0.000168120447182637, + "loss": 1.5205, + "step": 470500 + }, + { + "epoch": 13.22, + "learning_rate": 0.00016798000037452482, + "loss": 1.5102, + "step": 471000 + }, + { + "epoch": 13.23, + "learning_rate": 0.00016783955356641257, + "loss": 1.4871, + "step": 471500 + }, + { + "epoch": 13.25, + "learning_rate": 0.00016769910675830038, + "loss": 1.4828, + "step": 472000 + }, + { + "epoch": 13.26, + "learning_rate": 0.00016755865995018816, + "loss": 1.4935, + "step": 472500 + }, + { + "epoch": 13.27, + "learning_rate": 0.00016741821314207597, + "loss": 1.4811, + "step": 473000 + }, + { + "epoch": 13.29, + "learning_rate": 0.00016727776633396378, + "loss": 1.4903, + "step": 473500 + }, + { + "epoch": 13.3, + "learning_rate": 0.00016713731952585156, + "loss": 1.5206, + "step": 474000 + }, + { + "epoch": 13.32, + "learning_rate": 0.00016699687271773937, + "loss": 1.4896, + "step": 474500 + }, + { + "epoch": 13.33, + "learning_rate": 0.00016685642590962712, + "loss": 1.5221, + "step": 475000 + }, + { + "epoch": 13.34, + "learning_rate": 0.00016671597910151493, + "loss": 1.5004, + "step": 475500 + }, + { + "epoch": 13.36, + "learning_rate": 0.00016657553229340274, + "loss": 1.5036, + "step": 476000 + }, + { + "epoch": 13.37, + "learning_rate": 0.00016643508548529052, + "loss": 1.5516, + "step": 476500 + }, + { + "epoch": 13.39, + "learning_rate": 0.00016629463867717833, + "loss": 1.5031, + "step": 477000 + }, + { + "epoch": 13.4, + "learning_rate": 0.0001661541918690661, + "loss": 1.516, + "step": 477500 + }, + { + "epoch": 13.41, + "learning_rate": 0.00016601374506095392, + "loss": 1.5192, + "step": 478000 + }, + { + "epoch": 13.43, + "learning_rate": 0.00016587329825284167, + "loss": 1.5089, + "step": 478500 + }, + { + "epoch": 13.44, + "learning_rate": 0.00016573285144472948, + "loss": 1.5197, + "step": 479000 + }, + { + "epoch": 13.46, + "learning_rate": 0.00016559240463661729, + "loss": 1.5161, + "step": 479500 + }, + { + "epoch": 13.47, + "learning_rate": 0.00016545195782850507, + "loss": 1.5045, + "step": 480000 + }, + { + "epoch": 13.48, + "learning_rate": 0.00016531151102039288, + "loss": 1.4987, + "step": 480500 + }, + { + "epoch": 13.5, + "learning_rate": 0.00016517106421228066, + "loss": 1.5233, + "step": 481000 + }, + { + "epoch": 13.51, + "learning_rate": 0.00016503061740416846, + "loss": 1.5311, + "step": 481500 + }, + { + "epoch": 13.53, + "learning_rate": 0.00016489017059605622, + "loss": 1.507, + "step": 482000 + }, + { + "epoch": 13.54, + "learning_rate": 0.00016474972378794403, + "loss": 1.5194, + "step": 482500 + }, + { + "epoch": 13.55, + "learning_rate": 0.00016460927697983183, + "loss": 1.5057, + "step": 483000 + }, + { + "epoch": 13.57, + "learning_rate": 0.00016446883017171961, + "loss": 1.5187, + "step": 483500 + }, + { + "epoch": 13.58, + "learning_rate": 0.00016432838336360742, + "loss": 1.4982, + "step": 484000 + }, + { + "epoch": 13.6, + "learning_rate": 0.0001641879365554952, + "loss": 1.5115, + "step": 484500 + }, + { + "epoch": 13.61, + "learning_rate": 0.000164047489747383, + "loss": 1.4948, + "step": 485000 + }, + { + "epoch": 13.62, + "learning_rate": 0.00016390704293927077, + "loss": 1.5035, + "step": 485500 + }, + { + "epoch": 13.64, + "learning_rate": 0.00016376659613115857, + "loss": 1.4884, + "step": 486000 + }, + { + "epoch": 13.65, + "learning_rate": 0.00016362614932304638, + "loss": 1.518, + "step": 486500 + }, + { + "epoch": 13.67, + "learning_rate": 0.00016348570251493416, + "loss": 1.494, + "step": 487000 + }, + { + "epoch": 13.68, + "learning_rate": 0.00016334525570682197, + "loss": 1.4975, + "step": 487500 + }, + { + "epoch": 13.69, + "learning_rate": 0.00016320480889870975, + "loss": 1.5047, + "step": 488000 + }, + { + "epoch": 13.71, + "learning_rate": 0.00016306436209059753, + "loss": 1.5242, + "step": 488500 + }, + { + "epoch": 13.72, + "learning_rate": 0.0001629239152824853, + "loss": 1.5266, + "step": 489000 + }, + { + "epoch": 13.74, + "learning_rate": 0.00016278346847437312, + "loss": 1.5154, + "step": 489500 + }, + { + "epoch": 13.75, + "learning_rate": 0.00016264302166626093, + "loss": 1.5013, + "step": 490000 + }, + { + "epoch": 13.76, + "learning_rate": 0.0001625025748581487, + "loss": 1.5039, + "step": 490500 + }, + { + "epoch": 13.78, + "learning_rate": 0.00016236212805003652, + "loss": 1.4944, + "step": 491000 + }, + { + "epoch": 13.79, + "learning_rate": 0.00016222168124192427, + "loss": 1.5096, + "step": 491500 + }, + { + "epoch": 13.81, + "learning_rate": 0.00016208123443381208, + "loss": 1.5127, + "step": 492000 + }, + { + "epoch": 13.82, + "learning_rate": 0.00016194078762569986, + "loss": 1.4777, + "step": 492500 + }, + { + "epoch": 13.83, + "learning_rate": 0.00016180034081758767, + "loss": 1.5085, + "step": 493000 + }, + { + "epoch": 13.85, + "learning_rate": 0.00016165989400947548, + "loss": 1.5391, + "step": 493500 + }, + { + "epoch": 13.86, + "learning_rate": 0.00016151944720136326, + "loss": 1.5089, + "step": 494000 + }, + { + "epoch": 13.88, + "learning_rate": 0.00016137900039325107, + "loss": 1.5116, + "step": 494500 + }, + { + "epoch": 13.89, + "learning_rate": 0.00016123855358513882, + "loss": 1.5482, + "step": 495000 + }, + { + "epoch": 13.91, + "learning_rate": 0.00016109810677702663, + "loss": 1.5358, + "step": 495500 + }, + { + "epoch": 13.92, + "learning_rate": 0.0001609576599689144, + "loss": 1.507, + "step": 496000 + }, + { + "epoch": 13.93, + "learning_rate": 0.00016081721316080222, + "loss": 1.5343, + "step": 496500 + }, + { + "epoch": 13.95, + "learning_rate": 0.00016067676635269002, + "loss": 1.5384, + "step": 497000 + }, + { + "epoch": 13.96, + "learning_rate": 0.0001605363195445778, + "loss": 1.4906, + "step": 497500 + }, + { + "epoch": 13.98, + "learning_rate": 0.00016039587273646561, + "loss": 1.5225, + "step": 498000 + }, + { + "epoch": 13.99, + "learning_rate": 0.00016025542592835337, + "loss": 1.5258, + "step": 498500 + }, + { + "epoch": 14.0, + "learning_rate": 0.00016011497912024118, + "loss": 1.5214, + "step": 499000 + }, + { + "epoch": 14.02, + "learning_rate": 0.00015997453231212896, + "loss": 1.44, + "step": 499500 + }, + { + "epoch": 14.03, + "learning_rate": 0.00015983408550401676, + "loss": 1.4353, + "step": 500000 + }, + { + "epoch": 14.05, + "learning_rate": 0.00015969363869590457, + "loss": 1.4431, + "step": 500500 + }, + { + "epoch": 14.06, + "learning_rate": 0.00015955319188779235, + "loss": 1.4284, + "step": 501000 + }, + { + "epoch": 14.07, + "learning_rate": 0.00015941274507968016, + "loss": 1.4001, + "step": 501500 + }, + { + "epoch": 14.09, + "learning_rate": 0.00015927229827156792, + "loss": 1.453, + "step": 502000 + }, + { + "epoch": 14.1, + "learning_rate": 0.00015913185146345572, + "loss": 1.4384, + "step": 502500 + }, + { + "epoch": 14.12, + "learning_rate": 0.00015899140465534353, + "loss": 1.4264, + "step": 503000 + }, + { + "epoch": 14.13, + "learning_rate": 0.0001588509578472313, + "loss": 1.4246, + "step": 503500 + }, + { + "epoch": 14.14, + "learning_rate": 0.00015871051103911912, + "loss": 1.4032, + "step": 504000 + }, + { + "epoch": 14.16, + "learning_rate": 0.0001585700642310069, + "loss": 1.4715, + "step": 504500 + }, + { + "epoch": 14.17, + "learning_rate": 0.00015842961742289468, + "loss": 1.4753, + "step": 505000 + }, + { + "epoch": 14.19, + "learning_rate": 0.00015828917061478246, + "loss": 1.4413, + "step": 505500 + }, + { + "epoch": 14.2, + "learning_rate": 0.00015814872380667027, + "loss": 1.4542, + "step": 506000 + }, + { + "epoch": 14.21, + "learning_rate": 0.00015800827699855808, + "loss": 1.4645, + "step": 506500 + }, + { + "epoch": 14.23, + "learning_rate": 0.00015786783019044586, + "loss": 1.4672, + "step": 507000 + }, + { + "epoch": 14.24, + "learning_rate": 0.00015772738338233367, + "loss": 1.4443, + "step": 507500 + }, + { + "epoch": 14.26, + "learning_rate": 0.00015758693657422142, + "loss": 1.4625, + "step": 508000 + }, + { + "epoch": 14.27, + "learning_rate": 0.00015744648976610923, + "loss": 1.4585, + "step": 508500 + }, + { + "epoch": 14.28, + "learning_rate": 0.000157306042957997, + "loss": 1.4591, + "step": 509000 + }, + { + "epoch": 14.3, + "learning_rate": 0.00015716559614988482, + "loss": 1.4824, + "step": 509500 + }, + { + "epoch": 14.31, + "learning_rate": 0.00015702514934177263, + "loss": 1.4764, + "step": 510000 + }, + { + "epoch": 14.33, + "learning_rate": 0.0001568847025336604, + "loss": 1.476, + "step": 510500 + }, + { + "epoch": 14.34, + "learning_rate": 0.00015674425572554822, + "loss": 1.4494, + "step": 511000 + }, + { + "epoch": 14.35, + "learning_rate": 0.00015660380891743597, + "loss": 1.4923, + "step": 511500 + }, + { + "epoch": 14.37, + "learning_rate": 0.00015646336210932378, + "loss": 1.4689, + "step": 512000 + }, + { + "epoch": 14.38, + "learning_rate": 0.00015632291530121156, + "loss": 1.4711, + "step": 512500 + }, + { + "epoch": 14.4, + "learning_rate": 0.00015618246849309937, + "loss": 1.4899, + "step": 513000 + }, + { + "epoch": 14.41, + "learning_rate": 0.00015604202168498717, + "loss": 1.4408, + "step": 513500 + }, + { + "epoch": 14.42, + "learning_rate": 0.00015590157487687496, + "loss": 1.4241, + "step": 514000 + }, + { + "epoch": 14.44, + "learning_rate": 0.00015576112806876276, + "loss": 1.4586, + "step": 514500 + }, + { + "epoch": 14.45, + "learning_rate": 0.00015562068126065052, + "loss": 1.4789, + "step": 515000 + }, + { + "epoch": 14.47, + "learning_rate": 0.00015548023445253833, + "loss": 1.4828, + "step": 515500 + }, + { + "epoch": 14.48, + "learning_rate": 0.0001553397876444261, + "loss": 1.4457, + "step": 516000 + }, + { + "epoch": 14.49, + "learning_rate": 0.00015519934083631391, + "loss": 1.4695, + "step": 516500 + }, + { + "epoch": 14.51, + "learning_rate": 0.00015505889402820172, + "loss": 1.4421, + "step": 517000 + }, + { + "epoch": 14.52, + "learning_rate": 0.0001549184472200895, + "loss": 1.4464, + "step": 517500 + }, + { + "epoch": 14.54, + "learning_rate": 0.0001547780004119773, + "loss": 1.439, + "step": 518000 + }, + { + "epoch": 14.55, + "learning_rate": 0.00015463755360386506, + "loss": 1.4784, + "step": 518500 + }, + { + "epoch": 14.56, + "learning_rate": 0.00015449710679575287, + "loss": 1.423, + "step": 519000 + }, + { + "epoch": 14.58, + "learning_rate": 0.00015435665998764065, + "loss": 1.4751, + "step": 519500 + }, + { + "epoch": 14.59, + "learning_rate": 0.00015421621317952846, + "loss": 1.4603, + "step": 520000 + }, + { + "epoch": 14.61, + "learning_rate": 0.00015407576637141627, + "loss": 1.4713, + "step": 520500 + }, + { + "epoch": 14.62, + "learning_rate": 0.00015393531956330405, + "loss": 1.4577, + "step": 521000 + }, + { + "epoch": 14.63, + "learning_rate": 0.00015379487275519183, + "loss": 1.4449, + "step": 521500 + }, + { + "epoch": 14.65, + "learning_rate": 0.0001536544259470796, + "loss": 1.4836, + "step": 522000 + }, + { + "epoch": 14.66, + "learning_rate": 0.00015351397913896742, + "loss": 1.4734, + "step": 522500 + }, + { + "epoch": 14.68, + "learning_rate": 0.0001533735323308552, + "loss": 1.4549, + "step": 523000 + }, + { + "epoch": 14.69, + "learning_rate": 0.000153233085522743, + "loss": 1.4572, + "step": 523500 + }, + { + "epoch": 14.7, + "learning_rate": 0.00015309263871463082, + "loss": 1.4503, + "step": 524000 + }, + { + "epoch": 14.72, + "learning_rate": 0.00015295219190651857, + "loss": 1.4661, + "step": 524500 + }, + { + "epoch": 14.73, + "learning_rate": 0.00015281174509840638, + "loss": 1.4359, + "step": 525000 + }, + { + "epoch": 14.75, + "learning_rate": 0.00015267129829029416, + "loss": 1.4545, + "step": 525500 + }, + { + "epoch": 14.76, + "learning_rate": 0.00015253085148218197, + "loss": 1.4696, + "step": 526000 + }, + { + "epoch": 14.78, + "learning_rate": 0.00015239040467406978, + "loss": 1.4677, + "step": 526500 + }, + { + "epoch": 14.79, + "learning_rate": 0.00015224995786595756, + "loss": 1.4685, + "step": 527000 + }, + { + "epoch": 14.8, + "learning_rate": 0.00015210951105784537, + "loss": 1.4631, + "step": 527500 + }, + { + "epoch": 14.82, + "learning_rate": 0.00015196906424973312, + "loss": 1.4639, + "step": 528000 + }, + { + "epoch": 14.83, + "learning_rate": 0.00015182861744162093, + "loss": 1.4791, + "step": 528500 + }, + { + "epoch": 14.85, + "learning_rate": 0.0001516881706335087, + "loss": 1.4634, + "step": 529000 + }, + { + "epoch": 14.86, + "learning_rate": 0.00015154772382539652, + "loss": 1.4521, + "step": 529500 + }, + { + "epoch": 14.87, + "learning_rate": 0.00015140727701728432, + "loss": 1.4717, + "step": 530000 + }, + { + "epoch": 14.89, + "learning_rate": 0.0001512668302091721, + "loss": 1.4843, + "step": 530500 + }, + { + "epoch": 14.9, + "learning_rate": 0.0001511263834010599, + "loss": 1.4988, + "step": 531000 + }, + { + "epoch": 14.92, + "learning_rate": 0.00015098593659294767, + "loss": 1.4741, + "step": 531500 + }, + { + "epoch": 14.93, + "learning_rate": 0.00015084548978483547, + "loss": 1.471, + "step": 532000 + }, + { + "epoch": 14.94, + "learning_rate": 0.00015070504297672326, + "loss": 1.4618, + "step": 532500 + }, + { + "epoch": 14.96, + "learning_rate": 0.00015056459616861106, + "loss": 1.4727, + "step": 533000 + }, + { + "epoch": 14.97, + "learning_rate": 0.00015042414936049887, + "loss": 1.441, + "step": 533500 + }, + { + "epoch": 14.99, + "learning_rate": 0.00015028370255238665, + "loss": 1.513, + "step": 534000 + }, + { + "epoch": 15.0, + "learning_rate": 0.00015014325574427446, + "loss": 1.4718, + "step": 534500 + }, + { + "epoch": 15.01, + "learning_rate": 0.00015000280893616221, + "loss": 1.4022, + "step": 535000 + }, + { + "epoch": 15.03, + "learning_rate": 0.00014986236212805002, + "loss": 1.3613, + "step": 535500 + }, + { + "epoch": 15.04, + "learning_rate": 0.00014972191531993783, + "loss": 1.3951, + "step": 536000 + }, + { + "epoch": 15.06, + "learning_rate": 0.0001495814685118256, + "loss": 1.4099, + "step": 536500 + }, + { + "epoch": 15.07, + "learning_rate": 0.0001494410217037134, + "loss": 1.4049, + "step": 537000 + }, + { + "epoch": 15.08, + "learning_rate": 0.0001493005748956012, + "loss": 1.3861, + "step": 537500 + }, + { + "epoch": 15.1, + "learning_rate": 0.00014916012808748898, + "loss": 1.4068, + "step": 538000 + }, + { + "epoch": 15.11, + "learning_rate": 0.0001490196812793768, + "loss": 1.3857, + "step": 538500 + }, + { + "epoch": 15.13, + "learning_rate": 0.00014887923447126457, + "loss": 1.3978, + "step": 539000 + }, + { + "epoch": 15.14, + "learning_rate": 0.00014873878766315235, + "loss": 1.4054, + "step": 539500 + }, + { + "epoch": 15.15, + "learning_rate": 0.00014859834085504016, + "loss": 1.4441, + "step": 540000 + }, + { + "epoch": 15.17, + "learning_rate": 0.00014845789404692794, + "loss": 1.4118, + "step": 540500 + }, + { + "epoch": 15.18, + "learning_rate": 0.00014831744723881572, + "loss": 1.395, + "step": 541000 + }, + { + "epoch": 15.2, + "learning_rate": 0.00014817700043070353, + "loss": 1.3928, + "step": 541500 + }, + { + "epoch": 15.21, + "learning_rate": 0.00014803655362259134, + "loss": 1.4088, + "step": 542000 + }, + { + "epoch": 15.22, + "learning_rate": 0.00014789610681447912, + "loss": 1.4245, + "step": 542500 + }, + { + "epoch": 15.24, + "learning_rate": 0.0001477556600063669, + "loss": 1.3877, + "step": 543000 + }, + { + "epoch": 15.25, + "learning_rate": 0.0001476152131982547, + "loss": 1.4311, + "step": 543500 + }, + { + "epoch": 15.27, + "learning_rate": 0.0001474747663901425, + "loss": 1.4027, + "step": 544000 + }, + { + "epoch": 15.28, + "learning_rate": 0.00014733431958203027, + "loss": 1.4169, + "step": 544500 + }, + { + "epoch": 15.29, + "learning_rate": 0.00014719387277391808, + "loss": 1.3918, + "step": 545000 + }, + { + "epoch": 15.31, + "learning_rate": 0.00014705342596580588, + "loss": 1.4059, + "step": 545500 + }, + { + "epoch": 15.32, + "learning_rate": 0.00014691297915769367, + "loss": 1.4136, + "step": 546000 + }, + { + "epoch": 15.34, + "learning_rate": 0.00014677253234958145, + "loss": 1.3865, + "step": 546500 + }, + { + "epoch": 15.35, + "learning_rate": 0.00014663208554146925, + "loss": 1.3966, + "step": 547000 + }, + { + "epoch": 15.36, + "learning_rate": 0.00014649163873335704, + "loss": 1.4183, + "step": 547500 + }, + { + "epoch": 15.38, + "learning_rate": 0.00014635119192524482, + "loss": 1.4159, + "step": 548000 + }, + { + "epoch": 15.39, + "learning_rate": 0.00014621074511713262, + "loss": 1.4426, + "step": 548500 + }, + { + "epoch": 15.41, + "learning_rate": 0.00014607029830902043, + "loss": 1.4249, + "step": 549000 + }, + { + "epoch": 15.42, + "learning_rate": 0.0001459298515009082, + "loss": 1.4265, + "step": 549500 + }, + { + "epoch": 15.43, + "learning_rate": 0.000145789404692796, + "loss": 1.4052, + "step": 550000 + }, + { + "epoch": 15.45, + "learning_rate": 0.0001456489578846838, + "loss": 1.3974, + "step": 550500 + }, + { + "epoch": 15.46, + "learning_rate": 0.00014550851107657158, + "loss": 1.4252, + "step": 551000 + }, + { + "epoch": 15.48, + "learning_rate": 0.00014536806426845936, + "loss": 1.4379, + "step": 551500 + }, + { + "epoch": 15.49, + "learning_rate": 0.00014522761746034717, + "loss": 1.448, + "step": 552000 + }, + { + "epoch": 15.5, + "learning_rate": 0.00014508717065223498, + "loss": 1.3834, + "step": 552500 + }, + { + "epoch": 15.52, + "learning_rate": 0.00014494672384412276, + "loss": 1.44, + "step": 553000 + }, + { + "epoch": 15.53, + "learning_rate": 0.00014480627703601054, + "loss": 1.4171, + "step": 553500 + }, + { + "epoch": 15.55, + "learning_rate": 0.00014466583022789835, + "loss": 1.3753, + "step": 554000 + }, + { + "epoch": 15.56, + "learning_rate": 0.00014452538341978613, + "loss": 1.4184, + "step": 554500 + }, + { + "epoch": 15.57, + "learning_rate": 0.0001443849366116739, + "loss": 1.4294, + "step": 555000 + }, + { + "epoch": 15.59, + "learning_rate": 0.00014424448980356172, + "loss": 1.4299, + "step": 555500 + }, + { + "epoch": 15.6, + "learning_rate": 0.00014410404299544953, + "loss": 1.4341, + "step": 556000 + }, + { + "epoch": 15.62, + "learning_rate": 0.0001439635961873373, + "loss": 1.4158, + "step": 556500 + }, + { + "epoch": 15.63, + "learning_rate": 0.0001438231493792251, + "loss": 1.3985, + "step": 557000 + }, + { + "epoch": 15.64, + "learning_rate": 0.0001436827025711129, + "loss": 1.4194, + "step": 557500 + }, + { + "epoch": 15.66, + "learning_rate": 0.00014354225576300068, + "loss": 1.4301, + "step": 558000 + }, + { + "epoch": 15.67, + "learning_rate": 0.0001434018089548885, + "loss": 1.4088, + "step": 558500 + }, + { + "epoch": 15.69, + "learning_rate": 0.00014326136214677627, + "loss": 1.4206, + "step": 559000 + }, + { + "epoch": 15.7, + "learning_rate": 0.00014312091533866405, + "loss": 1.4305, + "step": 559500 + }, + { + "epoch": 15.72, + "learning_rate": 0.00014298046853055186, + "loss": 1.4144, + "step": 560000 + }, + { + "epoch": 15.73, + "learning_rate": 0.00014284002172243964, + "loss": 1.3909, + "step": 560500 + }, + { + "epoch": 15.74, + "learning_rate": 0.00014269957491432742, + "loss": 1.3907, + "step": 561000 + }, + { + "epoch": 15.76, + "learning_rate": 0.00014255912810621523, + "loss": 1.42, + "step": 561500 + }, + { + "epoch": 15.77, + "learning_rate": 0.00014241868129810303, + "loss": 1.4351, + "step": 562000 + }, + { + "epoch": 15.79, + "learning_rate": 0.00014227823448999082, + "loss": 1.4395, + "step": 562500 + }, + { + "epoch": 15.8, + "learning_rate": 0.0001421377876818786, + "loss": 1.4346, + "step": 563000 + }, + { + "epoch": 15.81, + "learning_rate": 0.0001419973408737664, + "loss": 1.4151, + "step": 563500 + }, + { + "epoch": 15.83, + "learning_rate": 0.00014185689406565419, + "loss": 1.411, + "step": 564000 + }, + { + "epoch": 15.84, + "learning_rate": 0.00014171644725754197, + "loss": 1.4364, + "step": 564500 + }, + { + "epoch": 15.86, + "learning_rate": 0.00014157600044942977, + "loss": 1.4227, + "step": 565000 + }, + { + "epoch": 15.87, + "learning_rate": 0.00014143555364131758, + "loss": 1.4068, + "step": 565500 + }, + { + "epoch": 15.88, + "learning_rate": 0.00014129510683320536, + "loss": 1.4267, + "step": 566000 + }, + { + "epoch": 15.9, + "learning_rate": 0.00014115466002509314, + "loss": 1.4291, + "step": 566500 + }, + { + "epoch": 15.91, + "learning_rate": 0.00014101421321698095, + "loss": 1.4357, + "step": 567000 + }, + { + "epoch": 15.93, + "learning_rate": 0.00014087376640886873, + "loss": 1.3937, + "step": 567500 + }, + { + "epoch": 15.94, + "learning_rate": 0.00014073331960075651, + "loss": 1.4021, + "step": 568000 + }, + { + "epoch": 15.95, + "learning_rate": 0.00014059287279264432, + "loss": 1.4269, + "step": 568500 + }, + { + "epoch": 15.97, + "learning_rate": 0.00014045242598453213, + "loss": 1.4335, + "step": 569000 + }, + { + "epoch": 15.98, + "learning_rate": 0.0001403119791764199, + "loss": 1.4595, + "step": 569500 + }, + { + "epoch": 16.0, + "learning_rate": 0.0001401715323683077, + "loss": 1.4421, + "step": 570000 + }, + { + "epoch": 16.01, + "learning_rate": 0.0001400310855601955, + "loss": 1.3778, + "step": 570500 + }, + { + "epoch": 16.02, + "learning_rate": 0.00013989063875208328, + "loss": 1.3223, + "step": 571000 + }, + { + "epoch": 16.04, + "learning_rate": 0.00013975019194397106, + "loss": 1.3708, + "step": 571500 + }, + { + "epoch": 16.05, + "learning_rate": 0.00013960974513585887, + "loss": 1.3409, + "step": 572000 + }, + { + "epoch": 16.07, + "learning_rate": 0.00013946929832774668, + "loss": 1.3737, + "step": 572500 + }, + { + "epoch": 16.08, + "learning_rate": 0.00013932885151963446, + "loss": 1.3662, + "step": 573000 + }, + { + "epoch": 16.09, + "learning_rate": 0.00013918840471152224, + "loss": 1.3599, + "step": 573500 + }, + { + "epoch": 16.11, + "learning_rate": 0.00013904795790341005, + "loss": 1.3449, + "step": 574000 + }, + { + "epoch": 16.12, + "learning_rate": 0.00013890751109529783, + "loss": 1.3371, + "step": 574500 + }, + { + "epoch": 16.14, + "learning_rate": 0.0001387670642871856, + "loss": 1.3623, + "step": 575000 + }, + { + "epoch": 16.15, + "learning_rate": 0.00013862661747907342, + "loss": 1.3336, + "step": 575500 + }, + { + "epoch": 16.16, + "learning_rate": 0.0001384861706709612, + "loss": 1.3426, + "step": 576000 + }, + { + "epoch": 16.18, + "learning_rate": 0.000138345723862849, + "loss": 1.389, + "step": 576500 + }, + { + "epoch": 16.19, + "learning_rate": 0.0001382052770547368, + "loss": 1.3399, + "step": 577000 + }, + { + "epoch": 16.21, + "learning_rate": 0.00013806483024662457, + "loss": 1.3722, + "step": 577500 + }, + { + "epoch": 16.22, + "learning_rate": 0.00013792438343851238, + "loss": 1.3475, + "step": 578000 + }, + { + "epoch": 16.23, + "learning_rate": 0.00013778393663040016, + "loss": 1.3582, + "step": 578500 + }, + { + "epoch": 16.25, + "learning_rate": 0.00013764348982228796, + "loss": 1.3363, + "step": 579000 + }, + { + "epoch": 16.26, + "learning_rate": 0.00013750304301417575, + "loss": 1.3506, + "step": 579500 + }, + { + "epoch": 16.28, + "learning_rate": 0.00013736259620606355, + "loss": 1.3908, + "step": 580000 + }, + { + "epoch": 16.29, + "learning_rate": 0.00013722214939795133, + "loss": 1.3727, + "step": 580500 + }, + { + "epoch": 16.3, + "learning_rate": 0.00013708170258983912, + "loss": 1.3497, + "step": 581000 + }, + { + "epoch": 16.32, + "learning_rate": 0.00013694125578172692, + "loss": 1.357, + "step": 581500 + }, + { + "epoch": 16.33, + "learning_rate": 0.0001368008089736147, + "loss": 1.3516, + "step": 582000 + }, + { + "epoch": 16.35, + "learning_rate": 0.0001366603621655025, + "loss": 1.3448, + "step": 582500 + }, + { + "epoch": 16.36, + "learning_rate": 0.0001365199153573903, + "loss": 1.3756, + "step": 583000 + }, + { + "epoch": 16.37, + "learning_rate": 0.0001363794685492781, + "loss": 1.3671, + "step": 583500 + }, + { + "epoch": 16.39, + "learning_rate": 0.00013623902174116588, + "loss": 1.3616, + "step": 584000 + }, + { + "epoch": 16.4, + "learning_rate": 0.00013609857493305366, + "loss": 1.3418, + "step": 584500 + }, + { + "epoch": 16.42, + "learning_rate": 0.00013595812812494147, + "loss": 1.3471, + "step": 585000 + }, + { + "epoch": 16.43, + "learning_rate": 0.00013581768131682928, + "loss": 1.358, + "step": 585500 + }, + { + "epoch": 16.44, + "learning_rate": 0.00013567723450871706, + "loss": 1.357, + "step": 586000 + }, + { + "epoch": 16.46, + "learning_rate": 0.00013553678770060484, + "loss": 1.3636, + "step": 586500 + }, + { + "epoch": 16.47, + "learning_rate": 0.00013539634089249265, + "loss": 1.3472, + "step": 587000 + }, + { + "epoch": 16.49, + "learning_rate": 0.00013525589408438043, + "loss": 1.3594, + "step": 587500 + }, + { + "epoch": 16.5, + "learning_rate": 0.0001351154472762682, + "loss": 1.3551, + "step": 588000 + }, + { + "epoch": 16.51, + "learning_rate": 0.00013497500046815602, + "loss": 1.3657, + "step": 588500 + }, + { + "epoch": 16.53, + "learning_rate": 0.00013483455366004383, + "loss": 1.3729, + "step": 589000 + }, + { + "epoch": 16.54, + "learning_rate": 0.0001346941068519316, + "loss": 1.3573, + "step": 589500 + }, + { + "epoch": 16.56, + "learning_rate": 0.0001345536600438194, + "loss": 1.336, + "step": 590000 + }, + { + "epoch": 16.57, + "learning_rate": 0.0001344132132357072, + "loss": 1.3719, + "step": 590500 + }, + { + "epoch": 16.59, + "learning_rate": 0.00013427276642759498, + "loss": 1.3597, + "step": 591000 + }, + { + "epoch": 16.6, + "learning_rate": 0.00013413231961948276, + "loss": 1.3644, + "step": 591500 + }, + { + "epoch": 16.61, + "learning_rate": 0.00013399187281137057, + "loss": 1.3769, + "step": 592000 + }, + { + "epoch": 16.63, + "learning_rate": 0.00013385142600325835, + "loss": 1.372, + "step": 592500 + }, + { + "epoch": 16.64, + "learning_rate": 0.00013371097919514616, + "loss": 1.3404, + "step": 593000 + }, + { + "epoch": 16.66, + "learning_rate": 0.00013357053238703394, + "loss": 1.3573, + "step": 593500 + }, + { + "epoch": 16.67, + "learning_rate": 0.00013343008557892172, + "loss": 1.3646, + "step": 594000 + }, + { + "epoch": 16.68, + "learning_rate": 0.00013328963877080953, + "loss": 1.3726, + "step": 594500 + }, + { + "epoch": 16.7, + "learning_rate": 0.0001331491919626973, + "loss": 1.347, + "step": 595000 + }, + { + "epoch": 16.71, + "learning_rate": 0.0001330087451545851, + "loss": 1.3938, + "step": 595500 + }, + { + "epoch": 16.73, + "learning_rate": 0.0001328682983464729, + "loss": 1.3703, + "step": 596000 + }, + { + "epoch": 16.74, + "learning_rate": 0.0001327278515383607, + "loss": 1.3553, + "step": 596500 + }, + { + "epoch": 16.75, + "learning_rate": 0.00013258740473024848, + "loss": 1.3985, + "step": 597000 + }, + { + "epoch": 16.77, + "learning_rate": 0.00013244695792213627, + "loss": 1.3638, + "step": 597500 + }, + { + "epoch": 16.78, + "learning_rate": 0.00013230651111402407, + "loss": 1.3921, + "step": 598000 + }, + { + "epoch": 16.8, + "learning_rate": 0.00013216606430591185, + "loss": 1.3586, + "step": 598500 + }, + { + "epoch": 16.81, + "learning_rate": 0.00013202561749779966, + "loss": 1.3723, + "step": 599000 + }, + { + "epoch": 16.82, + "learning_rate": 0.00013188517068968744, + "loss": 1.3877, + "step": 599500 + }, + { + "epoch": 16.84, + "learning_rate": 0.00013174472388157525, + "loss": 1.396, + "step": 600000 + }, + { + "epoch": 16.85, + "learning_rate": 0.00013160427707346303, + "loss": 1.3886, + "step": 600500 + }, + { + "epoch": 16.87, + "learning_rate": 0.0001314638302653508, + "loss": 1.3702, + "step": 601000 + }, + { + "epoch": 16.88, + "learning_rate": 0.00013132338345723862, + "loss": 1.3806, + "step": 601500 + }, + { + "epoch": 16.89, + "learning_rate": 0.0001311829366491264, + "loss": 1.3646, + "step": 602000 + }, + { + "epoch": 16.91, + "learning_rate": 0.0001310424898410142, + "loss": 1.3682, + "step": 602500 + }, + { + "epoch": 16.92, + "learning_rate": 0.000130902043032902, + "loss": 1.3821, + "step": 603000 + }, + { + "epoch": 16.94, + "learning_rate": 0.0001307615962247898, + "loss": 1.3607, + "step": 603500 + }, + { + "epoch": 16.95, + "learning_rate": 0.00013062114941667758, + "loss": 1.3596, + "step": 604000 + }, + { + "epoch": 16.96, + "learning_rate": 0.00013048070260856536, + "loss": 1.3993, + "step": 604500 + }, + { + "epoch": 16.98, + "learning_rate": 0.00013034025580045317, + "loss": 1.3803, + "step": 605000 + }, + { + "epoch": 16.99, + "learning_rate": 0.00013019980899234095, + "loss": 1.3975, + "step": 605500 + }, + { + "epoch": 17.01, + "learning_rate": 0.00013005936218422876, + "loss": 1.3645, + "step": 606000 + }, + { + "epoch": 17.02, + "learning_rate": 0.00012991891537611654, + "loss": 1.2937, + "step": 606500 + }, + { + "epoch": 17.03, + "learning_rate": 0.00012977846856800435, + "loss": 1.3025, + "step": 607000 + }, + { + "epoch": 17.05, + "learning_rate": 0.00012963802175989213, + "loss": 1.2941, + "step": 607500 + }, + { + "epoch": 17.06, + "learning_rate": 0.0001294975749517799, + "loss": 1.3042, + "step": 608000 + }, + { + "epoch": 17.08, + "learning_rate": 0.00012935712814366772, + "loss": 1.3002, + "step": 608500 + }, + { + "epoch": 17.09, + "learning_rate": 0.0001292166813355555, + "loss": 1.3202, + "step": 609000 + }, + { + "epoch": 17.1, + "learning_rate": 0.0001290762345274433, + "loss": 1.2977, + "step": 609500 + }, + { + "epoch": 17.12, + "learning_rate": 0.00012893578771933109, + "loss": 1.2959, + "step": 610000 + }, + { + "epoch": 17.13, + "learning_rate": 0.00012879534091121887, + "loss": 1.3038, + "step": 610500 + }, + { + "epoch": 17.15, + "learning_rate": 0.00012865489410310668, + "loss": 1.3174, + "step": 611000 + }, + { + "epoch": 17.16, + "learning_rate": 0.00012851444729499446, + "loss": 1.2808, + "step": 611500 + }, + { + "epoch": 17.17, + "learning_rate": 0.00012837400048688224, + "loss": 1.3028, + "step": 612000 + }, + { + "epoch": 17.19, + "learning_rate": 0.00012823355367877005, + "loss": 1.3325, + "step": 612500 + }, + { + "epoch": 17.2, + "learning_rate": 0.00012809310687065785, + "loss": 1.3061, + "step": 613000 + }, + { + "epoch": 17.22, + "learning_rate": 0.00012795266006254563, + "loss": 1.3116, + "step": 613500 + }, + { + "epoch": 17.23, + "learning_rate": 0.00012781221325443341, + "loss": 1.3119, + "step": 614000 + }, + { + "epoch": 17.24, + "learning_rate": 0.00012767176644632122, + "loss": 1.303, + "step": 614500 + }, + { + "epoch": 17.26, + "learning_rate": 0.000127531319638209, + "loss": 1.276, + "step": 615000 + }, + { + "epoch": 17.27, + "learning_rate": 0.00012739087283009678, + "loss": 1.3066, + "step": 615500 + }, + { + "epoch": 17.29, + "learning_rate": 0.0001272504260219846, + "loss": 1.3132, + "step": 616000 + }, + { + "epoch": 17.3, + "learning_rate": 0.0001271099792138724, + "loss": 1.2656, + "step": 616500 + }, + { + "epoch": 17.31, + "learning_rate": 0.00012696953240576018, + "loss": 1.3112, + "step": 617000 + }, + { + "epoch": 17.33, + "learning_rate": 0.00012682908559764796, + "loss": 1.337, + "step": 617500 + }, + { + "epoch": 17.34, + "learning_rate": 0.00012668863878953577, + "loss": 1.3286, + "step": 618000 + }, + { + "epoch": 17.36, + "learning_rate": 0.00012654819198142355, + "loss": 1.2837, + "step": 618500 + }, + { + "epoch": 17.37, + "learning_rate": 0.00012640774517331133, + "loss": 1.3011, + "step": 619000 + }, + { + "epoch": 17.38, + "learning_rate": 0.00012626729836519914, + "loss": 1.3076, + "step": 619500 + }, + { + "epoch": 17.4, + "learning_rate": 0.00012612685155708695, + "loss": 1.3343, + "step": 620000 + }, + { + "epoch": 17.41, + "learning_rate": 0.00012598640474897473, + "loss": 1.2962, + "step": 620500 + }, + { + "epoch": 17.43, + "learning_rate": 0.0001258459579408625, + "loss": 1.3236, + "step": 621000 + }, + { + "epoch": 17.44, + "learning_rate": 0.00012570551113275032, + "loss": 1.3082, + "step": 621500 + }, + { + "epoch": 17.45, + "learning_rate": 0.0001255650643246381, + "loss": 1.3086, + "step": 622000 + }, + { + "epoch": 17.47, + "learning_rate": 0.0001254246175165259, + "loss": 1.3151, + "step": 622500 + }, + { + "epoch": 17.48, + "learning_rate": 0.0001252841707084137, + "loss": 1.3091, + "step": 623000 + }, + { + "epoch": 17.5, + "learning_rate": 0.0001251437239003015, + "loss": 1.3105, + "step": 623500 + }, + { + "epoch": 17.51, + "learning_rate": 0.00012500327709218928, + "loss": 1.3166, + "step": 624000 + }, + { + "epoch": 17.53, + "learning_rate": 0.00012486283028407706, + "loss": 1.3014, + "step": 624500 + }, + { + "epoch": 17.54, + "learning_rate": 0.00012472238347596487, + "loss": 1.3021, + "step": 625000 + }, + { + "epoch": 17.55, + "learning_rate": 0.00012458193666785265, + "loss": 1.3102, + "step": 625500 + }, + { + "epoch": 17.57, + "learning_rate": 0.00012444148985974046, + "loss": 1.3234, + "step": 626000 + }, + { + "epoch": 17.58, + "learning_rate": 0.00012430104305162824, + "loss": 1.336, + "step": 626500 + }, + { + "epoch": 17.6, + "learning_rate": 0.00012416059624351604, + "loss": 1.315, + "step": 627000 + }, + { + "epoch": 17.61, + "learning_rate": 0.00012402014943540382, + "loss": 1.3038, + "step": 627500 + }, + { + "epoch": 17.62, + "learning_rate": 0.0001238797026272916, + "loss": 1.3132, + "step": 628000 + }, + { + "epoch": 17.64, + "learning_rate": 0.00012373925581917941, + "loss": 1.3037, + "step": 628500 + }, + { + "epoch": 17.65, + "learning_rate": 0.0001235988090110672, + "loss": 1.3109, + "step": 629000 + }, + { + "epoch": 17.67, + "learning_rate": 0.000123458362202955, + "loss": 1.3006, + "step": 629500 + }, + { + "epoch": 17.68, + "learning_rate": 0.00012331791539484278, + "loss": 1.3587, + "step": 630000 + }, + { + "epoch": 17.69, + "learning_rate": 0.00012317746858673056, + "loss": 1.3146, + "step": 630500 + }, + { + "epoch": 17.71, + "learning_rate": 0.00012303702177861837, + "loss": 1.3138, + "step": 631000 + }, + { + "epoch": 17.72, + "learning_rate": 0.00012289657497050615, + "loss": 1.2895, + "step": 631500 + }, + { + "epoch": 17.74, + "learning_rate": 0.00012275612816239393, + "loss": 1.3425, + "step": 632000 + }, + { + "epoch": 17.75, + "learning_rate": 0.00012261568135428174, + "loss": 1.3281, + "step": 632500 + }, + { + "epoch": 17.76, + "learning_rate": 0.00012247523454616955, + "loss": 1.326, + "step": 633000 + }, + { + "epoch": 17.78, + "learning_rate": 0.00012233478773805733, + "loss": 1.3228, + "step": 633500 + }, + { + "epoch": 17.79, + "learning_rate": 0.0001221943409299451, + "loss": 1.3185, + "step": 634000 + }, + { + "epoch": 17.81, + "learning_rate": 0.0001220538941218329, + "loss": 1.349, + "step": 634500 + }, + { + "epoch": 17.82, + "learning_rate": 0.0001219134473137207, + "loss": 1.3335, + "step": 635000 + }, + { + "epoch": 17.83, + "learning_rate": 0.0001217730005056085, + "loss": 1.3331, + "step": 635500 + }, + { + "epoch": 17.85, + "learning_rate": 0.0001216325536974963, + "loss": 1.3346, + "step": 636000 + }, + { + "epoch": 17.86, + "learning_rate": 0.00012149210688938408, + "loss": 1.3286, + "step": 636500 + }, + { + "epoch": 17.88, + "learning_rate": 0.00012135166008127188, + "loss": 1.3264, + "step": 637000 + }, + { + "epoch": 17.89, + "learning_rate": 0.00012121121327315967, + "loss": 1.3262, + "step": 637500 + }, + { + "epoch": 17.9, + "learning_rate": 0.00012107076646504745, + "loss": 1.333, + "step": 638000 + }, + { + "epoch": 17.92, + "learning_rate": 0.00012093031965693525, + "loss": 1.3542, + "step": 638500 + }, + { + "epoch": 17.93, + "learning_rate": 0.00012078987284882304, + "loss": 1.3194, + "step": 639000 + }, + { + "epoch": 17.95, + "learning_rate": 0.00012064942604071085, + "loss": 1.3137, + "step": 639500 + }, + { + "epoch": 17.96, + "learning_rate": 0.00012050897923259863, + "loss": 1.3317, + "step": 640000 + }, + { + "epoch": 17.97, + "learning_rate": 0.00012036853242448643, + "loss": 1.3195, + "step": 640500 + }, + { + "epoch": 17.99, + "learning_rate": 0.00012022808561637422, + "loss": 1.3313, + "step": 641000 + }, + { + "epoch": 18.0, + "learning_rate": 0.000120087638808262, + "loss": 1.3185, + "step": 641500 + }, + { + "epoch": 18.02, + "learning_rate": 0.0001199471920001498, + "loss": 1.2436, + "step": 642000 + }, + { + "epoch": 18.03, + "learning_rate": 0.00011980674519203758, + "loss": 1.2658, + "step": 642500 + }, + { + "epoch": 18.04, + "learning_rate": 0.00011966629838392539, + "loss": 1.2675, + "step": 643000 + }, + { + "epoch": 18.06, + "learning_rate": 0.00011952585157581318, + "loss": 1.2203, + "step": 643500 + }, + { + "epoch": 18.07, + "learning_rate": 0.00011938540476770097, + "loss": 1.2593, + "step": 644000 + }, + { + "epoch": 18.09, + "learning_rate": 0.00011924495795958876, + "loss": 1.2784, + "step": 644500 + }, + { + "epoch": 18.1, + "learning_rate": 0.00011910451115147655, + "loss": 1.2633, + "step": 645000 + }, + { + "epoch": 18.11, + "learning_rate": 0.00011896406434336434, + "loss": 1.2395, + "step": 645500 + }, + { + "epoch": 18.13, + "learning_rate": 0.00011882361753525213, + "loss": 1.2544, + "step": 646000 + }, + { + "epoch": 18.14, + "learning_rate": 0.00011868317072713993, + "loss": 1.2599, + "step": 646500 + }, + { + "epoch": 18.16, + "learning_rate": 0.00011854272391902773, + "loss": 1.2445, + "step": 647000 + }, + { + "epoch": 18.17, + "learning_rate": 0.00011840227711091552, + "loss": 1.2267, + "step": 647500 + }, + { + "epoch": 18.18, + "learning_rate": 0.0001182618303028033, + "loss": 1.2634, + "step": 648000 + }, + { + "epoch": 18.2, + "learning_rate": 0.0001181213834946911, + "loss": 1.2239, + "step": 648500 + }, + { + "epoch": 18.21, + "learning_rate": 0.00011798093668657889, + "loss": 1.2746, + "step": 649000 + }, + { + "epoch": 18.23, + "learning_rate": 0.00011784048987846669, + "loss": 1.2392, + "step": 649500 + }, + { + "epoch": 18.24, + "learning_rate": 0.00011770004307035448, + "loss": 1.2407, + "step": 650000 + }, + { + "epoch": 18.25, + "learning_rate": 0.00011755959626224228, + "loss": 1.2554, + "step": 650500 + }, + { + "epoch": 18.27, + "learning_rate": 0.00011741914945413006, + "loss": 1.2624, + "step": 651000 + }, + { + "epoch": 18.28, + "learning_rate": 0.00011727870264601785, + "loss": 1.2498, + "step": 651500 + }, + { + "epoch": 18.3, + "learning_rate": 0.00011713825583790565, + "loss": 1.2597, + "step": 652000 + }, + { + "epoch": 18.31, + "learning_rate": 0.00011699780902979343, + "loss": 1.278, + "step": 652500 + }, + { + "epoch": 18.32, + "learning_rate": 0.00011685736222168123, + "loss": 1.2565, + "step": 653000 + }, + { + "epoch": 18.34, + "learning_rate": 0.00011671691541356903, + "loss": 1.2496, + "step": 653500 + }, + { + "epoch": 18.35, + "learning_rate": 0.00011657646860545682, + "loss": 1.2582, + "step": 654000 + }, + { + "epoch": 18.37, + "learning_rate": 0.0001164360217973446, + "loss": 1.2733, + "step": 654500 + }, + { + "epoch": 18.38, + "learning_rate": 0.0001162955749892324, + "loss": 1.2515, + "step": 655000 + }, + { + "epoch": 18.4, + "learning_rate": 0.00011615512818112019, + "loss": 1.2596, + "step": 655500 + }, + { + "epoch": 18.41, + "learning_rate": 0.00011601468137300797, + "loss": 1.2828, + "step": 656000 + }, + { + "epoch": 18.42, + "learning_rate": 0.00011587423456489578, + "loss": 1.2634, + "step": 656500 + }, + { + "epoch": 18.44, + "learning_rate": 0.00011573378775678358, + "loss": 1.2501, + "step": 657000 + }, + { + "epoch": 18.45, + "learning_rate": 0.00011559334094867137, + "loss": 1.2474, + "step": 657500 + }, + { + "epoch": 18.47, + "learning_rate": 0.00011545289414055915, + "loss": 1.2746, + "step": 658000 + }, + { + "epoch": 18.48, + "learning_rate": 0.00011531244733244695, + "loss": 1.2862, + "step": 658500 + }, + { + "epoch": 18.49, + "learning_rate": 0.00011517200052433474, + "loss": 1.2696, + "step": 659000 + }, + { + "epoch": 18.51, + "learning_rate": 0.00011503155371622252, + "loss": 1.2729, + "step": 659500 + }, + { + "epoch": 18.52, + "learning_rate": 0.00011489110690811033, + "loss": 1.255, + "step": 660000 + }, + { + "epoch": 18.54, + "learning_rate": 0.00011475066009999812, + "loss": 1.2736, + "step": 660500 + }, + { + "epoch": 18.55, + "learning_rate": 0.0001146102132918859, + "loss": 1.2411, + "step": 661000 + }, + { + "epoch": 18.56, + "learning_rate": 0.0001144697664837737, + "loss": 1.2661, + "step": 661500 + }, + { + "epoch": 18.58, + "learning_rate": 0.0001143293196756615, + "loss": 1.2637, + "step": 662000 + }, + { + "epoch": 18.59, + "learning_rate": 0.00011418887286754927, + "loss": 1.2766, + "step": 662500 + }, + { + "epoch": 18.61, + "learning_rate": 0.00011404842605943708, + "loss": 1.2677, + "step": 663000 + }, + { + "epoch": 18.62, + "learning_rate": 0.00011390797925132488, + "loss": 1.2798, + "step": 663500 + }, + { + "epoch": 18.63, + "learning_rate": 0.00011376753244321267, + "loss": 1.2858, + "step": 664000 + }, + { + "epoch": 18.65, + "learning_rate": 0.00011362708563510045, + "loss": 1.2779, + "step": 664500 + }, + { + "epoch": 18.66, + "learning_rate": 0.00011348663882698825, + "loss": 1.2734, + "step": 665000 + }, + { + "epoch": 18.68, + "learning_rate": 0.00011334619201887604, + "loss": 1.2503, + "step": 665500 + }, + { + "epoch": 18.69, + "learning_rate": 0.00011320574521076382, + "loss": 1.2903, + "step": 666000 + }, + { + "epoch": 18.7, + "learning_rate": 0.00011306529840265163, + "loss": 1.2798, + "step": 666500 + }, + { + "epoch": 18.72, + "learning_rate": 0.00011292485159453943, + "loss": 1.2993, + "step": 667000 + }, + { + "epoch": 18.73, + "learning_rate": 0.0001127844047864272, + "loss": 1.259, + "step": 667500 + }, + { + "epoch": 18.75, + "learning_rate": 0.000112643957978315, + "loss": 1.2531, + "step": 668000 + }, + { + "epoch": 18.76, + "learning_rate": 0.0001125035111702028, + "loss": 1.2713, + "step": 668500 + }, + { + "epoch": 18.77, + "learning_rate": 0.00011236306436209058, + "loss": 1.2919, + "step": 669000 + }, + { + "epoch": 18.79, + "learning_rate": 0.00011222261755397837, + "loss": 1.2812, + "step": 669500 + }, + { + "epoch": 18.8, + "learning_rate": 0.00011208217074586618, + "loss": 1.2909, + "step": 670000 + }, + { + "epoch": 18.82, + "learning_rate": 0.00011194172393775397, + "loss": 1.2807, + "step": 670500 + }, + { + "epoch": 18.83, + "learning_rate": 0.00011180127712964175, + "loss": 1.2726, + "step": 671000 + }, + { + "epoch": 18.84, + "learning_rate": 0.00011166083032152955, + "loss": 1.2782, + "step": 671500 + }, + { + "epoch": 18.86, + "learning_rate": 0.00011152038351341734, + "loss": 1.2554, + "step": 672000 + }, + { + "epoch": 18.87, + "learning_rate": 0.00011137993670530512, + "loss": 1.2748, + "step": 672500 + }, + { + "epoch": 18.89, + "learning_rate": 0.00011123948989719293, + "loss": 1.2875, + "step": 673000 + }, + { + "epoch": 18.9, + "learning_rate": 0.00011109904308908073, + "loss": 1.2593, + "step": 673500 + }, + { + "epoch": 18.91, + "learning_rate": 0.00011095859628096852, + "loss": 1.3059, + "step": 674000 + }, + { + "epoch": 18.93, + "learning_rate": 0.0001108181494728563, + "loss": 1.2666, + "step": 674500 + }, + { + "epoch": 18.94, + "learning_rate": 0.0001106777026647441, + "loss": 1.2965, + "step": 675000 + }, + { + "epoch": 18.96, + "learning_rate": 0.00011053725585663189, + "loss": 1.2656, + "step": 675500 + }, + { + "epoch": 18.97, + "learning_rate": 0.00011039680904851967, + "loss": 1.2737, + "step": 676000 + }, + { + "epoch": 18.98, + "learning_rate": 0.00011025636224040748, + "loss": 1.3124, + "step": 676500 + }, + { + "epoch": 19.0, + "learning_rate": 0.00011011591543229527, + "loss": 1.2881, + "step": 677000 + }, + { + "epoch": 19.01, + "learning_rate": 0.00010997546862418305, + "loss": 1.2242, + "step": 677500 + }, + { + "epoch": 19.03, + "learning_rate": 0.00010983502181607085, + "loss": 1.2085, + "step": 678000 + }, + { + "epoch": 19.04, + "learning_rate": 0.00010969457500795864, + "loss": 1.2096, + "step": 678500 + }, + { + "epoch": 19.05, + "learning_rate": 0.00010955412819984642, + "loss": 1.175, + "step": 679000 + }, + { + "epoch": 19.07, + "learning_rate": 0.00010941368139173422, + "loss": 1.2158, + "step": 679500 + }, + { + "epoch": 19.08, + "learning_rate": 0.00010927323458362203, + "loss": 1.192, + "step": 680000 + }, + { + "epoch": 19.1, + "learning_rate": 0.00010913278777550982, + "loss": 1.211, + "step": 680500 + }, + { + "epoch": 19.11, + "learning_rate": 0.0001089923409673976, + "loss": 1.1971, + "step": 681000 + }, + { + "epoch": 19.12, + "learning_rate": 0.0001088518941592854, + "loss": 1.2256, + "step": 681500 + }, + { + "epoch": 19.14, + "learning_rate": 0.00010871144735117319, + "loss": 1.2046, + "step": 682000 + }, + { + "epoch": 19.15, + "learning_rate": 0.00010857100054306097, + "loss": 1.1963, + "step": 682500 + }, + { + "epoch": 19.17, + "learning_rate": 0.00010843055373494877, + "loss": 1.1934, + "step": 683000 + }, + { + "epoch": 19.18, + "learning_rate": 0.00010829010692683657, + "loss": 1.2209, + "step": 683500 + }, + { + "epoch": 19.19, + "learning_rate": 0.00010814966011872437, + "loss": 1.1987, + "step": 684000 + }, + { + "epoch": 19.21, + "learning_rate": 0.00010800921331061215, + "loss": 1.1989, + "step": 684500 + }, + { + "epoch": 19.22, + "learning_rate": 0.00010786876650249994, + "loss": 1.2023, + "step": 685000 + }, + { + "epoch": 19.24, + "learning_rate": 0.00010772831969438774, + "loss": 1.2116, + "step": 685500 + }, + { + "epoch": 19.25, + "learning_rate": 0.00010758787288627552, + "loss": 1.2047, + "step": 686000 + }, + { + "epoch": 19.27, + "learning_rate": 0.00010744742607816333, + "loss": 1.2377, + "step": 686500 + }, + { + "epoch": 19.28, + "learning_rate": 0.00010730697927005112, + "loss": 1.189, + "step": 687000 + }, + { + "epoch": 19.29, + "learning_rate": 0.0001071665324619389, + "loss": 1.2157, + "step": 687500 + }, + { + "epoch": 19.31, + "learning_rate": 0.0001070260856538267, + "loss": 1.2216, + "step": 688000 + }, + { + "epoch": 19.32, + "learning_rate": 0.00010688563884571449, + "loss": 1.177, + "step": 688500 + }, + { + "epoch": 19.34, + "learning_rate": 0.00010674519203760227, + "loss": 1.2182, + "step": 689000 + }, + { + "epoch": 19.35, + "learning_rate": 0.00010660474522949007, + "loss": 1.1988, + "step": 689500 + }, + { + "epoch": 19.36, + "learning_rate": 0.00010646429842137788, + "loss": 1.2144, + "step": 690000 + }, + { + "epoch": 19.38, + "learning_rate": 0.00010632385161326567, + "loss": 1.2286, + "step": 690500 + }, + { + "epoch": 19.39, + "learning_rate": 0.00010618340480515345, + "loss": 1.2298, + "step": 691000 + }, + { + "epoch": 19.41, + "learning_rate": 0.00010604295799704125, + "loss": 1.1789, + "step": 691500 + }, + { + "epoch": 19.42, + "learning_rate": 0.00010590251118892904, + "loss": 1.2358, + "step": 692000 + }, + { + "epoch": 19.43, + "learning_rate": 0.00010576206438081682, + "loss": 1.2112, + "step": 692500 + }, + { + "epoch": 19.45, + "learning_rate": 0.00010562161757270462, + "loss": 1.2054, + "step": 693000 + }, + { + "epoch": 19.46, + "learning_rate": 0.00010548117076459242, + "loss": 1.2202, + "step": 693500 + }, + { + "epoch": 19.48, + "learning_rate": 0.0001053407239564802, + "loss": 1.2374, + "step": 694000 + }, + { + "epoch": 19.49, + "learning_rate": 0.000105200277148368, + "loss": 1.2075, + "step": 694500 + }, + { + "epoch": 19.5, + "learning_rate": 0.0001050598303402558, + "loss": 1.2234, + "step": 695000 + }, + { + "epoch": 19.52, + "learning_rate": 0.00010491938353214357, + "loss": 1.2231, + "step": 695500 + }, + { + "epoch": 19.53, + "learning_rate": 0.00010477893672403137, + "loss": 1.2446, + "step": 696000 + }, + { + "epoch": 19.55, + "learning_rate": 0.00010463848991591916, + "loss": 1.2135, + "step": 696500 + }, + { + "epoch": 19.56, + "learning_rate": 0.00010449804310780697, + "loss": 1.2248, + "step": 697000 + }, + { + "epoch": 19.57, + "learning_rate": 0.00010435759629969475, + "loss": 1.2104, + "step": 697500 + }, + { + "epoch": 19.59, + "learning_rate": 0.00010421714949158255, + "loss": 1.2013, + "step": 698000 + }, + { + "epoch": 19.6, + "learning_rate": 0.00010407670268347034, + "loss": 1.2264, + "step": 698500 + }, + { + "epoch": 19.62, + "learning_rate": 0.00010393625587535812, + "loss": 1.2195, + "step": 699000 + }, + { + "epoch": 19.63, + "learning_rate": 0.00010379580906724592, + "loss": 1.2195, + "step": 699500 + }, + { + "epoch": 19.64, + "learning_rate": 0.00010365536225913372, + "loss": 1.2198, + "step": 700000 + }, + { + "epoch": 19.66, + "learning_rate": 0.00010351491545102152, + "loss": 1.1913, + "step": 700500 + }, + { + "epoch": 19.67, + "learning_rate": 0.0001033744686429093, + "loss": 1.2231, + "step": 701000 + }, + { + "epoch": 19.69, + "learning_rate": 0.0001032340218347971, + "loss": 1.2456, + "step": 701500 + }, + { + "epoch": 19.7, + "learning_rate": 0.00010309357502668489, + "loss": 1.2426, + "step": 702000 + }, + { + "epoch": 19.71, + "learning_rate": 0.00010295312821857267, + "loss": 1.2245, + "step": 702500 + }, + { + "epoch": 19.73, + "learning_rate": 0.00010281268141046046, + "loss": 1.28, + "step": 703000 + }, + { + "epoch": 19.74, + "learning_rate": 0.00010267223460234827, + "loss": 1.253, + "step": 703500 + }, + { + "epoch": 19.76, + "learning_rate": 0.00010253178779423605, + "loss": 1.2213, + "step": 704000 + }, + { + "epoch": 19.77, + "learning_rate": 0.00010239134098612385, + "loss": 1.2145, + "step": 704500 + }, + { + "epoch": 19.78, + "learning_rate": 0.00010225089417801164, + "loss": 1.2306, + "step": 705000 + }, + { + "epoch": 19.8, + "learning_rate": 0.00010211044736989942, + "loss": 1.241, + "step": 705500 + }, + { + "epoch": 19.81, + "learning_rate": 0.00010197000056178722, + "loss": 1.2447, + "step": 706000 + }, + { + "epoch": 19.83, + "learning_rate": 0.00010182955375367501, + "loss": 1.2606, + "step": 706500 + }, + { + "epoch": 19.84, + "learning_rate": 0.00010168910694556282, + "loss": 1.2593, + "step": 707000 + }, + { + "epoch": 19.85, + "learning_rate": 0.0001015486601374506, + "loss": 1.2424, + "step": 707500 + }, + { + "epoch": 19.87, + "learning_rate": 0.0001014082133293384, + "loss": 1.2421, + "step": 708000 + }, + { + "epoch": 19.88, + "learning_rate": 0.00010126776652122619, + "loss": 1.2328, + "step": 708500 + }, + { + "epoch": 19.9, + "learning_rate": 0.00010112731971311397, + "loss": 1.2323, + "step": 709000 + }, + { + "epoch": 19.91, + "learning_rate": 0.00010098687290500177, + "loss": 1.2401, + "step": 709500 + }, + { + "epoch": 19.92, + "learning_rate": 0.00010084642609688956, + "loss": 1.2343, + "step": 710000 + }, + { + "epoch": 19.94, + "learning_rate": 0.00010070597928877737, + "loss": 1.1958, + "step": 710500 + }, + { + "epoch": 19.95, + "learning_rate": 0.00010056553248066515, + "loss": 1.2349, + "step": 711000 + }, + { + "epoch": 19.97, + "learning_rate": 0.00010042508567255294, + "loss": 1.2477, + "step": 711500 + }, + { + "epoch": 19.98, + "learning_rate": 0.00010028463886444074, + "loss": 1.2248, + "step": 712000 + }, + { + "epoch": 19.99, + "learning_rate": 0.00010014419205632852, + "loss": 1.2459, + "step": 712500 + }, + { + "epoch": 20.01, + "learning_rate": 0.00010000374524821631, + "loss": 1.1723, + "step": 713000 + }, + { + "epoch": 20.02, + "learning_rate": 9.986329844010412e-05, + "loss": 1.1634, + "step": 713500 + }, + { + "epoch": 20.04, + "learning_rate": 9.97228516319919e-05, + "loss": 1.1365, + "step": 714000 + }, + { + "epoch": 20.05, + "learning_rate": 9.95824048238797e-05, + "loss": 1.1556, + "step": 714500 + }, + { + "epoch": 20.06, + "learning_rate": 9.944195801576749e-05, + "loss": 1.179, + "step": 715000 + }, + { + "epoch": 20.08, + "learning_rate": 9.930151120765527e-05, + "loss": 1.1533, + "step": 715500 + }, + { + "epoch": 20.09, + "learning_rate": 9.916106439954307e-05, + "loss": 1.1557, + "step": 716000 + }, + { + "epoch": 20.11, + "learning_rate": 9.902061759143086e-05, + "loss": 1.1516, + "step": 716500 + }, + { + "epoch": 20.12, + "learning_rate": 9.888017078331867e-05, + "loss": 1.1769, + "step": 717000 + }, + { + "epoch": 20.13, + "learning_rate": 9.873972397520645e-05, + "loss": 1.1663, + "step": 717500 + }, + { + "epoch": 20.15, + "learning_rate": 9.859927716709424e-05, + "loss": 1.1744, + "step": 718000 + }, + { + "epoch": 20.16, + "learning_rate": 9.845883035898204e-05, + "loss": 1.1447, + "step": 718500 + }, + { + "epoch": 20.18, + "learning_rate": 9.831838355086982e-05, + "loss": 1.1439, + "step": 719000 + }, + { + "epoch": 20.19, + "learning_rate": 9.817793674275761e-05, + "loss": 1.1868, + "step": 719500 + }, + { + "epoch": 20.21, + "learning_rate": 9.803748993464541e-05, + "loss": 1.181, + "step": 720000 + }, + { + "epoch": 20.22, + "learning_rate": 9.78970431265332e-05, + "loss": 1.1764, + "step": 720500 + }, + { + "epoch": 20.23, + "learning_rate": 9.7756596318421e-05, + "loss": 1.1794, + "step": 721000 + }, + { + "epoch": 20.25, + "learning_rate": 9.761614951030879e-05, + "loss": 1.1583, + "step": 721500 + }, + { + "epoch": 20.26, + "learning_rate": 9.747570270219657e-05, + "loss": 1.174, + "step": 722000 + }, + { + "epoch": 20.28, + "learning_rate": 9.733525589408437e-05, + "loss": 1.1712, + "step": 722500 + }, + { + "epoch": 20.29, + "learning_rate": 9.719480908597216e-05, + "loss": 1.1656, + "step": 723000 + }, + { + "epoch": 20.3, + "learning_rate": 9.705436227785997e-05, + "loss": 1.1593, + "step": 723500 + }, + { + "epoch": 20.32, + "learning_rate": 9.691391546974775e-05, + "loss": 1.1696, + "step": 724000 + }, + { + "epoch": 20.33, + "learning_rate": 9.677346866163554e-05, + "loss": 1.1739, + "step": 724500 + }, + { + "epoch": 20.35, + "learning_rate": 9.663302185352334e-05, + "loss": 1.1932, + "step": 725000 + }, + { + "epoch": 20.36, + "learning_rate": 9.649257504541112e-05, + "loss": 1.1506, + "step": 725500 + }, + { + "epoch": 20.37, + "learning_rate": 9.635212823729891e-05, + "loss": 1.1521, + "step": 726000 + }, + { + "epoch": 20.39, + "learning_rate": 9.621168142918671e-05, + "loss": 1.177, + "step": 726500 + }, + { + "epoch": 20.4, + "learning_rate": 9.607123462107452e-05, + "loss": 1.1874, + "step": 727000 + }, + { + "epoch": 20.42, + "learning_rate": 9.59307878129623e-05, + "loss": 1.1543, + "step": 727500 + }, + { + "epoch": 20.43, + "learning_rate": 9.579034100485009e-05, + "loss": 1.1798, + "step": 728000 + }, + { + "epoch": 20.44, + "learning_rate": 9.564989419673789e-05, + "loss": 1.1665, + "step": 728500 + }, + { + "epoch": 20.46, + "learning_rate": 9.550944738862567e-05, + "loss": 1.1723, + "step": 729000 + }, + { + "epoch": 20.47, + "learning_rate": 9.536900058051346e-05, + "loss": 1.1711, + "step": 729500 + }, + { + "epoch": 20.49, + "learning_rate": 9.522855377240126e-05, + "loss": 1.1574, + "step": 730000 + }, + { + "epoch": 20.5, + "learning_rate": 9.508810696428905e-05, + "loss": 1.2109, + "step": 730500 + }, + { + "epoch": 20.51, + "learning_rate": 9.494766015617685e-05, + "loss": 1.1871, + "step": 731000 + }, + { + "epoch": 20.53, + "learning_rate": 9.480721334806464e-05, + "loss": 1.1752, + "step": 731500 + }, + { + "epoch": 20.54, + "learning_rate": 9.466676653995242e-05, + "loss": 1.1446, + "step": 732000 + }, + { + "epoch": 20.56, + "learning_rate": 9.452631973184022e-05, + "loss": 1.2237, + "step": 732500 + }, + { + "epoch": 20.57, + "learning_rate": 9.438587292372801e-05, + "loss": 1.1726, + "step": 733000 + }, + { + "epoch": 20.58, + "learning_rate": 9.424542611561579e-05, + "loss": 1.1836, + "step": 733500 + }, + { + "epoch": 20.6, + "learning_rate": 9.41049793075036e-05, + "loss": 1.1865, + "step": 734000 + }, + { + "epoch": 20.61, + "learning_rate": 9.39645324993914e-05, + "loss": 1.1597, + "step": 734500 + }, + { + "epoch": 20.63, + "learning_rate": 9.382408569127919e-05, + "loss": 1.1935, + "step": 735000 + }, + { + "epoch": 20.64, + "learning_rate": 9.368363888316697e-05, + "loss": 1.1654, + "step": 735500 + }, + { + "epoch": 20.65, + "learning_rate": 9.354319207505476e-05, + "loss": 1.1634, + "step": 736000 + }, + { + "epoch": 20.67, + "learning_rate": 9.340274526694256e-05, + "loss": 1.1966, + "step": 736500 + }, + { + "epoch": 20.68, + "learning_rate": 9.326229845883035e-05, + "loss": 1.1762, + "step": 737000 + }, + { + "epoch": 20.7, + "learning_rate": 9.312185165071815e-05, + "loss": 1.1922, + "step": 737500 + }, + { + "epoch": 20.71, + "learning_rate": 9.298140484260594e-05, + "loss": 1.183, + "step": 738000 + }, + { + "epoch": 20.72, + "learning_rate": 9.284095803449372e-05, + "loss": 1.171, + "step": 738500 + }, + { + "epoch": 20.74, + "learning_rate": 9.270051122638152e-05, + "loss": 1.1857, + "step": 739000 + }, + { + "epoch": 20.75, + "learning_rate": 9.256006441826931e-05, + "loss": 1.1882, + "step": 739500 + }, + { + "epoch": 20.77, + "learning_rate": 9.241961761015709e-05, + "loss": 1.1782, + "step": 740000 + }, + { + "epoch": 20.78, + "learning_rate": 9.22791708020449e-05, + "loss": 1.1772, + "step": 740500 + }, + { + "epoch": 20.79, + "learning_rate": 9.21387239939327e-05, + "loss": 1.1686, + "step": 741000 + }, + { + "epoch": 20.81, + "learning_rate": 9.199827718582049e-05, + "loss": 1.1965, + "step": 741500 + }, + { + "epoch": 20.82, + "learning_rate": 9.185783037770827e-05, + "loss": 1.1715, + "step": 742000 + }, + { + "epoch": 20.84, + "learning_rate": 9.171738356959606e-05, + "loss": 1.1701, + "step": 742500 + }, + { + "epoch": 20.85, + "learning_rate": 9.157693676148386e-05, + "loss": 1.1616, + "step": 743000 + }, + { + "epoch": 20.86, + "learning_rate": 9.143648995337164e-05, + "loss": 1.2022, + "step": 743500 + }, + { + "epoch": 20.88, + "learning_rate": 9.129604314525945e-05, + "loss": 1.1882, + "step": 744000 + }, + { + "epoch": 20.89, + "learning_rate": 9.115559633714724e-05, + "loss": 1.1873, + "step": 744500 + }, + { + "epoch": 20.91, + "learning_rate": 9.101514952903504e-05, + "loss": 1.1965, + "step": 745000 + }, + { + "epoch": 20.92, + "learning_rate": 9.087470272092282e-05, + "loss": 1.2093, + "step": 745500 + }, + { + "epoch": 20.93, + "learning_rate": 9.073425591281061e-05, + "loss": 1.1754, + "step": 746000 + }, + { + "epoch": 20.95, + "learning_rate": 9.05938091046984e-05, + "loss": 1.1968, + "step": 746500 + }, + { + "epoch": 20.96, + "learning_rate": 9.045336229658619e-05, + "loss": 1.1894, + "step": 747000 + }, + { + "epoch": 20.98, + "learning_rate": 9.0312915488474e-05, + "loss": 1.1938, + "step": 747500 + }, + { + "epoch": 20.99, + "learning_rate": 9.017246868036179e-05, + "loss": 1.1966, + "step": 748000 + }, + { + "epoch": 21.0, + "learning_rate": 9.003202187224957e-05, + "loss": 1.175, + "step": 748500 + }, + { + "epoch": 21.02, + "learning_rate": 8.989157506413737e-05, + "loss": 1.1007, + "step": 749000 + }, + { + "epoch": 21.03, + "learning_rate": 8.975112825602516e-05, + "loss": 1.1129, + "step": 749500 + }, + { + "epoch": 21.05, + "learning_rate": 8.961068144791294e-05, + "loss": 1.1079, + "step": 750000 + }, + { + "epoch": 21.06, + "learning_rate": 8.947023463980075e-05, + "loss": 1.1114, + "step": 750500 + }, + { + "epoch": 21.08, + "learning_rate": 8.932978783168854e-05, + "loss": 1.1245, + "step": 751000 + }, + { + "epoch": 21.09, + "learning_rate": 8.918934102357634e-05, + "loss": 1.1261, + "step": 751500 + }, + { + "epoch": 21.1, + "learning_rate": 8.904889421546412e-05, + "loss": 1.1244, + "step": 752000 + }, + { + "epoch": 21.12, + "learning_rate": 8.890844740735191e-05, + "loss": 1.1137, + "step": 752500 + }, + { + "epoch": 21.13, + "learning_rate": 8.876800059923971e-05, + "loss": 1.1279, + "step": 753000 + }, + { + "epoch": 21.15, + "learning_rate": 8.862755379112749e-05, + "loss": 1.1199, + "step": 753500 + }, + { + "epoch": 21.16, + "learning_rate": 8.84871069830153e-05, + "loss": 1.124, + "step": 754000 + }, + { + "epoch": 21.17, + "learning_rate": 8.834666017490309e-05, + "loss": 1.0937, + "step": 754500 + }, + { + "epoch": 21.19, + "learning_rate": 8.820621336679089e-05, + "loss": 1.134, + "step": 755000 + }, + { + "epoch": 21.2, + "learning_rate": 8.806576655867867e-05, + "loss": 1.1194, + "step": 755500 + }, + { + "epoch": 21.22, + "learning_rate": 8.792531975056646e-05, + "loss": 1.1378, + "step": 756000 + }, + { + "epoch": 21.23, + "learning_rate": 8.778487294245426e-05, + "loss": 1.1242, + "step": 756500 + }, + { + "epoch": 21.24, + "learning_rate": 8.764442613434204e-05, + "loss": 1.1631, + "step": 757000 + }, + { + "epoch": 21.26, + "learning_rate": 8.750397932622984e-05, + "loss": 1.103, + "step": 757500 + }, + { + "epoch": 21.27, + "learning_rate": 8.736353251811764e-05, + "loss": 1.1169, + "step": 758000 + }, + { + "epoch": 21.29, + "learning_rate": 8.722308571000542e-05, + "loss": 1.1532, + "step": 758500 + }, + { + "epoch": 21.3, + "learning_rate": 8.708263890189321e-05, + "loss": 1.1296, + "step": 759000 + }, + { + "epoch": 21.31, + "learning_rate": 8.694219209378101e-05, + "loss": 1.1365, + "step": 759500 + }, + { + "epoch": 21.33, + "learning_rate": 8.680174528566879e-05, + "loss": 1.1171, + "step": 760000 + }, + { + "epoch": 21.34, + "learning_rate": 8.666129847755658e-05, + "loss": 1.1402, + "step": 760500 + }, + { + "epoch": 21.36, + "learning_rate": 8.652085166944439e-05, + "loss": 1.1393, + "step": 761000 + }, + { + "epoch": 21.37, + "learning_rate": 8.638040486133219e-05, + "loss": 1.1442, + "step": 761500 + }, + { + "epoch": 21.38, + "learning_rate": 8.623995805321997e-05, + "loss": 1.143, + "step": 762000 + }, + { + "epoch": 21.4, + "learning_rate": 8.609951124510776e-05, + "loss": 1.1264, + "step": 762500 + }, + { + "epoch": 21.41, + "learning_rate": 8.595906443699556e-05, + "loss": 1.1292, + "step": 763000 + }, + { + "epoch": 21.43, + "learning_rate": 8.581861762888334e-05, + "loss": 1.1087, + "step": 763500 + }, + { + "epoch": 21.44, + "learning_rate": 8.567817082077115e-05, + "loss": 1.1085, + "step": 764000 + }, + { + "epoch": 21.45, + "learning_rate": 8.553772401265894e-05, + "loss": 1.1242, + "step": 764500 + }, + { + "epoch": 21.47, + "learning_rate": 8.539727720454672e-05, + "loss": 1.1303, + "step": 765000 + }, + { + "epoch": 21.48, + "learning_rate": 8.525683039643451e-05, + "loss": 1.099, + "step": 765500 + }, + { + "epoch": 21.5, + "learning_rate": 8.511638358832231e-05, + "loss": 1.1538, + "step": 766000 + }, + { + "epoch": 21.51, + "learning_rate": 8.497593678021009e-05, + "loss": 1.1517, + "step": 766500 + }, + { + "epoch": 21.52, + "learning_rate": 8.483548997209788e-05, + "loss": 1.124, + "step": 767000 + }, + { + "epoch": 21.54, + "learning_rate": 8.469504316398569e-05, + "loss": 1.1305, + "step": 767500 + }, + { + "epoch": 21.55, + "learning_rate": 8.455459635587349e-05, + "loss": 1.1466, + "step": 768000 + }, + { + "epoch": 21.57, + "learning_rate": 8.441414954776127e-05, + "loss": 1.1146, + "step": 768500 + }, + { + "epoch": 21.58, + "learning_rate": 8.427370273964906e-05, + "loss": 1.122, + "step": 769000 + }, + { + "epoch": 21.59, + "learning_rate": 8.413325593153686e-05, + "loss": 1.1454, + "step": 769500 + }, + { + "epoch": 21.61, + "learning_rate": 8.399280912342464e-05, + "loss": 1.16, + "step": 770000 + }, + { + "epoch": 21.62, + "learning_rate": 8.385236231531243e-05, + "loss": 1.1593, + "step": 770500 + }, + { + "epoch": 21.64, + "learning_rate": 8.371191550720024e-05, + "loss": 1.1463, + "step": 771000 + }, + { + "epoch": 21.65, + "learning_rate": 8.357146869908803e-05, + "loss": 1.1309, + "step": 771500 + }, + { + "epoch": 21.66, + "learning_rate": 8.343102189097582e-05, + "loss": 1.1496, + "step": 772000 + }, + { + "epoch": 21.68, + "learning_rate": 8.329057508286361e-05, + "loss": 1.1591, + "step": 772500 + }, + { + "epoch": 21.69, + "learning_rate": 8.31501282747514e-05, + "loss": 1.105, + "step": 773000 + }, + { + "epoch": 21.71, + "learning_rate": 8.300968146663919e-05, + "loss": 1.1542, + "step": 773500 + }, + { + "epoch": 21.72, + "learning_rate": 8.286923465852698e-05, + "loss": 1.17, + "step": 774000 + }, + { + "epoch": 21.73, + "learning_rate": 8.272878785041479e-05, + "loss": 1.1318, + "step": 774500 + }, + { + "epoch": 21.75, + "learning_rate": 8.258834104230257e-05, + "loss": 1.1279, + "step": 775000 + }, + { + "epoch": 21.76, + "learning_rate": 8.244789423419036e-05, + "loss": 1.136, + "step": 775500 + }, + { + "epoch": 21.78, + "learning_rate": 8.230744742607816e-05, + "loss": 1.1214, + "step": 776000 + }, + { + "epoch": 21.79, + "learning_rate": 8.216700061796594e-05, + "loss": 1.1429, + "step": 776500 + }, + { + "epoch": 21.8, + "learning_rate": 8.202655380985373e-05, + "loss": 1.1407, + "step": 777000 + }, + { + "epoch": 21.82, + "learning_rate": 8.188610700174154e-05, + "loss": 1.1617, + "step": 777500 + }, + { + "epoch": 21.83, + "learning_rate": 8.174566019362934e-05, + "loss": 1.1193, + "step": 778000 + }, + { + "epoch": 21.85, + "learning_rate": 8.160521338551712e-05, + "loss": 1.1362, + "step": 778500 + }, + { + "epoch": 21.86, + "learning_rate": 8.146476657740491e-05, + "loss": 1.1325, + "step": 779000 + }, + { + "epoch": 21.87, + "learning_rate": 8.13243197692927e-05, + "loss": 1.1055, + "step": 779500 + }, + { + "epoch": 21.89, + "learning_rate": 8.118387296118049e-05, + "loss": 1.1304, + "step": 780000 + }, + { + "epoch": 21.9, + "learning_rate": 8.104342615306828e-05, + "loss": 1.1545, + "step": 780500 + }, + { + "epoch": 21.92, + "learning_rate": 8.090297934495609e-05, + "loss": 1.1518, + "step": 781000 + }, + { + "epoch": 21.93, + "learning_rate": 8.076253253684388e-05, + "loss": 1.1682, + "step": 781500 + }, + { + "epoch": 21.95, + "learning_rate": 8.062208572873166e-05, + "loss": 1.1433, + "step": 782000 + }, + { + "epoch": 21.96, + "learning_rate": 8.048163892061946e-05, + "loss": 1.1372, + "step": 782500 + }, + { + "epoch": 21.97, + "learning_rate": 8.034119211250724e-05, + "loss": 1.1321, + "step": 783000 + }, + { + "epoch": 21.99, + "learning_rate": 8.020074530439503e-05, + "loss": 1.1505, + "step": 783500 + }, + { + "epoch": 22.0, + "learning_rate": 8.006029849628283e-05, + "loss": 1.1292, + "step": 784000 + }, + { + "epoch": 22.02, + "learning_rate": 7.991985168817064e-05, + "loss": 1.0754, + "step": 784500 + }, + { + "epoch": 22.03, + "learning_rate": 7.977940488005842e-05, + "loss": 1.0455, + "step": 785000 + }, + { + "epoch": 22.04, + "learning_rate": 7.963895807194621e-05, + "loss": 1.0852, + "step": 785500 + }, + { + "epoch": 22.06, + "learning_rate": 7.9498511263834e-05, + "loss": 1.0497, + "step": 786000 + }, + { + "epoch": 22.07, + "learning_rate": 7.935806445572179e-05, + "loss": 1.1157, + "step": 786500 + }, + { + "epoch": 22.09, + "learning_rate": 7.921761764760958e-05, + "loss": 1.085, + "step": 787000 + }, + { + "epoch": 22.1, + "learning_rate": 7.907717083949739e-05, + "loss": 1.0713, + "step": 787500 + }, + { + "epoch": 22.11, + "learning_rate": 7.893672403138518e-05, + "loss": 1.1042, + "step": 788000 + }, + { + "epoch": 22.13, + "learning_rate": 7.879627722327297e-05, + "loss": 1.1009, + "step": 788500 + }, + { + "epoch": 22.14, + "learning_rate": 7.865583041516076e-05, + "loss": 1.0994, + "step": 789000 + }, + { + "epoch": 22.16, + "learning_rate": 7.851538360704855e-05, + "loss": 1.081, + "step": 789500 + }, + { + "epoch": 22.17, + "learning_rate": 7.837493679893634e-05, + "loss": 1.0888, + "step": 790000 + }, + { + "epoch": 22.18, + "learning_rate": 7.823448999082413e-05, + "loss": 1.1001, + "step": 790500 + }, + { + "epoch": 22.2, + "learning_rate": 7.809404318271194e-05, + "loss": 1.0861, + "step": 791000 + }, + { + "epoch": 22.21, + "learning_rate": 7.795359637459972e-05, + "loss": 1.0728, + "step": 791500 + }, + { + "epoch": 22.23, + "learning_rate": 7.781314956648751e-05, + "loss": 1.0877, + "step": 792000 + }, + { + "epoch": 22.24, + "learning_rate": 7.767270275837531e-05, + "loss": 1.1109, + "step": 792500 + }, + { + "epoch": 22.25, + "learning_rate": 7.753225595026309e-05, + "loss": 1.0825, + "step": 793000 + }, + { + "epoch": 22.27, + "learning_rate": 7.739180914215088e-05, + "loss": 1.0772, + "step": 793500 + }, + { + "epoch": 22.28, + "learning_rate": 7.725136233403868e-05, + "loss": 1.114, + "step": 794000 + }, + { + "epoch": 22.3, + "learning_rate": 7.711091552592649e-05, + "loss": 1.0956, + "step": 794500 + }, + { + "epoch": 22.31, + "learning_rate": 7.697046871781427e-05, + "loss": 1.0918, + "step": 795000 + }, + { + "epoch": 22.32, + "learning_rate": 7.683002190970206e-05, + "loss": 1.099, + "step": 795500 + }, + { + "epoch": 22.34, + "learning_rate": 7.668957510158986e-05, + "loss": 1.0841, + "step": 796000 + }, + { + "epoch": 22.35, + "learning_rate": 7.654912829347764e-05, + "loss": 1.0937, + "step": 796500 + }, + { + "epoch": 22.37, + "learning_rate": 7.640868148536543e-05, + "loss": 1.0962, + "step": 797000 + }, + { + "epoch": 22.38, + "learning_rate": 7.626823467725323e-05, + "loss": 1.1158, + "step": 797500 + }, + { + "epoch": 22.39, + "learning_rate": 7.612778786914103e-05, + "loss": 1.1044, + "step": 798000 + }, + { + "epoch": 22.41, + "learning_rate": 7.598734106102881e-05, + "loss": 1.0872, + "step": 798500 + }, + { + "epoch": 22.42, + "learning_rate": 7.584689425291661e-05, + "loss": 1.1139, + "step": 799000 + }, + { + "epoch": 22.44, + "learning_rate": 7.57064474448044e-05, + "loss": 1.0982, + "step": 799500 + }, + { + "epoch": 22.45, + "learning_rate": 7.556600063669218e-05, + "loss": 1.0748, + "step": 800000 + }, + { + "epoch": 22.46, + "learning_rate": 7.542555382857998e-05, + "loss": 1.0869, + "step": 800500 + }, + { + "epoch": 22.48, + "learning_rate": 7.528510702046779e-05, + "loss": 1.1141, + "step": 801000 + }, + { + "epoch": 22.49, + "learning_rate": 7.514466021235557e-05, + "loss": 1.0838, + "step": 801500 + }, + { + "epoch": 22.51, + "learning_rate": 7.500421340424336e-05, + "loss": 1.0729, + "step": 802000 + }, + { + "epoch": 22.52, + "learning_rate": 7.486376659613116e-05, + "loss": 1.0518, + "step": 802500 + }, + { + "epoch": 22.53, + "learning_rate": 7.472331978801894e-05, + "loss": 1.0645, + "step": 803000 + }, + { + "epoch": 22.55, + "learning_rate": 7.458287297990675e-05, + "loss": 1.0993, + "step": 803500 + }, + { + "epoch": 22.56, + "learning_rate": 7.444242617179453e-05, + "loss": 1.0817, + "step": 804000 + }, + { + "epoch": 22.58, + "learning_rate": 7.430197936368232e-05, + "loss": 1.1001, + "step": 804500 + }, + { + "epoch": 22.59, + "learning_rate": 7.416153255557012e-05, + "loss": 1.0691, + "step": 805000 + }, + { + "epoch": 22.6, + "learning_rate": 7.402108574745791e-05, + "loss": 1.0951, + "step": 805500 + }, + { + "epoch": 22.62, + "learning_rate": 7.38806389393457e-05, + "loss": 1.1136, + "step": 806000 + }, + { + "epoch": 22.63, + "learning_rate": 7.374019213123348e-05, + "loss": 1.0895, + "step": 806500 + }, + { + "epoch": 22.65, + "learning_rate": 7.359974532312129e-05, + "loss": 1.0736, + "step": 807000 + }, + { + "epoch": 22.66, + "learning_rate": 7.345929851500907e-05, + "loss": 1.1013, + "step": 807500 + }, + { + "epoch": 22.67, + "learning_rate": 7.331885170689687e-05, + "loss": 1.07, + "step": 808000 + }, + { + "epoch": 22.69, + "learning_rate": 7.317840489878466e-05, + "loss": 1.0795, + "step": 808500 + }, + { + "epoch": 22.7, + "learning_rate": 7.303795809067246e-05, + "loss": 1.0871, + "step": 809000 + }, + { + "epoch": 22.72, + "learning_rate": 7.289751128256024e-05, + "loss": 1.074, + "step": 809500 + }, + { + "epoch": 22.73, + "learning_rate": 7.275706447444803e-05, + "loss": 1.0948, + "step": 810000 + }, + { + "epoch": 22.74, + "learning_rate": 7.261661766633583e-05, + "loss": 1.1008, + "step": 810500 + }, + { + "epoch": 22.76, + "learning_rate": 7.247617085822362e-05, + "loss": 1.1233, + "step": 811000 + }, + { + "epoch": 22.77, + "learning_rate": 7.233572405011142e-05, + "loss": 1.0846, + "step": 811500 + }, + { + "epoch": 22.79, + "learning_rate": 7.219527724199921e-05, + "loss": 1.1133, + "step": 812000 + }, + { + "epoch": 22.8, + "learning_rate": 7.2054830433887e-05, + "loss": 1.1052, + "step": 812500 + }, + { + "epoch": 22.81, + "learning_rate": 7.191438362577479e-05, + "loss": 1.0933, + "step": 813000 + }, + { + "epoch": 22.83, + "learning_rate": 7.177393681766258e-05, + "loss": 1.0879, + "step": 813500 + }, + { + "epoch": 22.84, + "learning_rate": 7.163349000955037e-05, + "loss": 1.0886, + "step": 814000 + }, + { + "epoch": 22.86, + "learning_rate": 7.149304320143817e-05, + "loss": 1.0757, + "step": 814500 + }, + { + "epoch": 22.87, + "learning_rate": 7.135259639332596e-05, + "loss": 1.0979, + "step": 815000 + }, + { + "epoch": 22.89, + "learning_rate": 7.121214958521376e-05, + "loss": 1.1325, + "step": 815500 + }, + { + "epoch": 22.9, + "learning_rate": 7.107170277710155e-05, + "loss": 1.1131, + "step": 816000 + }, + { + "epoch": 22.91, + "learning_rate": 7.093125596898933e-05, + "loss": 1.107, + "step": 816500 + }, + { + "epoch": 22.93, + "learning_rate": 7.079080916087714e-05, + "loss": 1.0754, + "step": 817000 + }, + { + "epoch": 22.94, + "learning_rate": 7.065036235276492e-05, + "loss": 1.1159, + "step": 817500 + }, + { + "epoch": 22.96, + "learning_rate": 7.050991554465272e-05, + "loss": 1.1028, + "step": 818000 + }, + { + "epoch": 22.97, + "learning_rate": 7.03694687365405e-05, + "loss": 1.1204, + "step": 818500 + }, + { + "epoch": 22.98, + "learning_rate": 7.02290219284283e-05, + "loss": 1.0885, + "step": 819000 + }, + { + "epoch": 23.0, + "learning_rate": 7.008857512031609e-05, + "loss": 1.0953, + "step": 819500 + }, + { + "epoch": 23.01, + "learning_rate": 6.994812831220388e-05, + "loss": 1.056, + "step": 820000 + }, + { + "epoch": 23.03, + "learning_rate": 6.980768150409168e-05, + "loss": 1.031, + "step": 820500 + }, + { + "epoch": 23.04, + "learning_rate": 6.966723469597947e-05, + "loss": 1.0594, + "step": 821000 + }, + { + "epoch": 23.05, + "learning_rate": 6.952678788786726e-05, + "loss": 0.9976, + "step": 821500 + }, + { + "epoch": 23.07, + "learning_rate": 6.938634107975505e-05, + "loss": 1.0471, + "step": 822000 + }, + { + "epoch": 23.08, + "learning_rate": 6.924589427164285e-05, + "loss": 1.0296, + "step": 822500 + }, + { + "epoch": 23.1, + "learning_rate": 6.910544746353063e-05, + "loss": 1.0404, + "step": 823000 + }, + { + "epoch": 23.11, + "learning_rate": 6.896500065541843e-05, + "loss": 1.0662, + "step": 823500 + }, + { + "epoch": 23.12, + "learning_rate": 6.882455384730622e-05, + "loss": 1.0507, + "step": 824000 + }, + { + "epoch": 23.14, + "learning_rate": 6.868410703919402e-05, + "loss": 1.0617, + "step": 824500 + }, + { + "epoch": 23.15, + "learning_rate": 6.854366023108181e-05, + "loss": 1.0324, + "step": 825000 + }, + { + "epoch": 23.17, + "learning_rate": 6.840321342296961e-05, + "loss": 1.0515, + "step": 825500 + }, + { + "epoch": 23.18, + "learning_rate": 6.82627666148574e-05, + "loss": 1.051, + "step": 826000 + }, + { + "epoch": 23.19, + "learning_rate": 6.812231980674518e-05, + "loss": 1.0416, + "step": 826500 + }, + { + "epoch": 23.21, + "learning_rate": 6.798187299863298e-05, + "loss": 1.0278, + "step": 827000 + }, + { + "epoch": 23.22, + "learning_rate": 6.784142619052077e-05, + "loss": 1.0625, + "step": 827500 + }, + { + "epoch": 23.24, + "learning_rate": 6.770097938240857e-05, + "loss": 1.0301, + "step": 828000 + }, + { + "epoch": 23.25, + "learning_rate": 6.756053257429635e-05, + "loss": 1.0468, + "step": 828500 + }, + { + "epoch": 23.26, + "learning_rate": 6.742008576618415e-05, + "loss": 1.0368, + "step": 829000 + }, + { + "epoch": 23.28, + "learning_rate": 6.727963895807194e-05, + "loss": 1.0442, + "step": 829500 + }, + { + "epoch": 23.29, + "learning_rate": 6.713919214995973e-05, + "loss": 1.039, + "step": 830000 + }, + { + "epoch": 23.31, + "learning_rate": 6.699874534184752e-05, + "loss": 1.0589, + "step": 830500 + }, + { + "epoch": 23.32, + "learning_rate": 6.685829853373532e-05, + "loss": 1.0398, + "step": 831000 + }, + { + "epoch": 23.33, + "learning_rate": 6.671785172562311e-05, + "loss": 1.0442, + "step": 831500 + }, + { + "epoch": 23.35, + "learning_rate": 6.65774049175109e-05, + "loss": 1.0707, + "step": 832000 + }, + { + "epoch": 23.36, + "learning_rate": 6.64369581093987e-05, + "loss": 1.0625, + "step": 832500 + }, + { + "epoch": 23.38, + "learning_rate": 6.629651130128648e-05, + "loss": 1.0501, + "step": 833000 + }, + { + "epoch": 23.39, + "learning_rate": 6.615606449317428e-05, + "loss": 1.084, + "step": 833500 + }, + { + "epoch": 23.4, + "learning_rate": 6.601561768506207e-05, + "loss": 1.0412, + "step": 834000 + }, + { + "epoch": 23.42, + "learning_rate": 6.587517087694987e-05, + "loss": 1.0433, + "step": 834500 + }, + { + "epoch": 23.43, + "learning_rate": 6.573472406883766e-05, + "loss": 1.0554, + "step": 835000 + }, + { + "epoch": 23.45, + "learning_rate": 6.559427726072546e-05, + "loss": 1.0543, + "step": 835500 + }, + { + "epoch": 23.46, + "learning_rate": 6.545383045261324e-05, + "loss": 1.0653, + "step": 836000 + }, + { + "epoch": 23.47, + "learning_rate": 6.531338364450103e-05, + "loss": 1.0521, + "step": 836500 + }, + { + "epoch": 23.49, + "learning_rate": 6.517293683638883e-05, + "loss": 1.0521, + "step": 837000 + }, + { + "epoch": 23.5, + "learning_rate": 6.503249002827662e-05, + "loss": 1.0654, + "step": 837500 + }, + { + "epoch": 23.52, + "learning_rate": 6.489204322016441e-05, + "loss": 1.0661, + "step": 838000 + }, + { + "epoch": 23.53, + "learning_rate": 6.47515964120522e-05, + "loss": 1.0238, + "step": 838500 + }, + { + "epoch": 23.54, + "learning_rate": 6.461114960394e-05, + "loss": 1.0643, + "step": 839000 + }, + { + "epoch": 23.56, + "learning_rate": 6.447070279582778e-05, + "loss": 1.0537, + "step": 839500 + }, + { + "epoch": 23.57, + "learning_rate": 6.433025598771558e-05, + "loss": 1.0452, + "step": 840000 + }, + { + "epoch": 23.59, + "learning_rate": 6.418980917960337e-05, + "loss": 1.0785, + "step": 840500 + }, + { + "epoch": 23.6, + "learning_rate": 6.404936237149117e-05, + "loss": 1.0739, + "step": 841000 + }, + { + "epoch": 23.61, + "learning_rate": 6.390891556337896e-05, + "loss": 1.0694, + "step": 841500 + }, + { + "epoch": 23.63, + "learning_rate": 6.376846875526674e-05, + "loss": 1.0649, + "step": 842000 + }, + { + "epoch": 23.64, + "learning_rate": 6.362802194715455e-05, + "loss": 1.0585, + "step": 842500 + }, + { + "epoch": 23.66, + "learning_rate": 6.348757513904233e-05, + "loss": 1.0764, + "step": 843000 + }, + { + "epoch": 23.67, + "learning_rate": 6.334712833093013e-05, + "loss": 1.0815, + "step": 843500 + }, + { + "epoch": 23.68, + "learning_rate": 6.320668152281792e-05, + "loss": 1.0748, + "step": 844000 + }, + { + "epoch": 23.7, + "learning_rate": 6.306623471470572e-05, + "loss": 1.0502, + "step": 844500 + }, + { + "epoch": 23.71, + "learning_rate": 6.29257879065935e-05, + "loss": 1.053, + "step": 845000 + }, + { + "epoch": 23.73, + "learning_rate": 6.278534109848129e-05, + "loss": 1.0697, + "step": 845500 + }, + { + "epoch": 23.74, + "learning_rate": 6.264489429036909e-05, + "loss": 1.0811, + "step": 846000 + }, + { + "epoch": 23.76, + "learning_rate": 6.250444748225688e-05, + "loss": 1.0412, + "step": 846500 + }, + { + "epoch": 23.77, + "learning_rate": 6.236400067414467e-05, + "loss": 1.0698, + "step": 847000 + }, + { + "epoch": 23.78, + "learning_rate": 6.222355386603247e-05, + "loss": 1.0591, + "step": 847500 + }, + { + "epoch": 23.8, + "learning_rate": 6.208310705792026e-05, + "loss": 1.0589, + "step": 848000 + }, + { + "epoch": 23.81, + "learning_rate": 6.194266024980804e-05, + "loss": 1.0547, + "step": 848500 + }, + { + "epoch": 23.83, + "learning_rate": 6.180221344169585e-05, + "loss": 1.0766, + "step": 849000 + }, + { + "epoch": 23.84, + "learning_rate": 6.166176663358363e-05, + "loss": 1.0689, + "step": 849500 + }, + { + "epoch": 23.85, + "learning_rate": 6.152131982547143e-05, + "loss": 1.0286, + "step": 850000 + }, + { + "epoch": 23.87, + "learning_rate": 6.138087301735922e-05, + "loss": 1.0654, + "step": 850500 + }, + { + "epoch": 23.88, + "learning_rate": 6.124042620924702e-05, + "loss": 1.0763, + "step": 851000 + }, + { + "epoch": 23.9, + "learning_rate": 6.109997940113481e-05, + "loss": 1.0731, + "step": 851500 + }, + { + "epoch": 23.91, + "learning_rate": 6.095953259302259e-05, + "loss": 1.0472, + "step": 852000 + }, + { + "epoch": 23.92, + "learning_rate": 6.081908578491039e-05, + "loss": 1.0806, + "step": 852500 + }, + { + "epoch": 23.94, + "learning_rate": 6.067863897679818e-05, + "loss": 1.0616, + "step": 853000 + }, + { + "epoch": 23.95, + "learning_rate": 6.0538192168685975e-05, + "loss": 1.0509, + "step": 853500 + }, + { + "epoch": 23.97, + "learning_rate": 6.039774536057376e-05, + "loss": 1.0808, + "step": 854000 + }, + { + "epoch": 23.98, + "learning_rate": 6.0257298552461564e-05, + "loss": 1.0587, + "step": 854500 + }, + { + "epoch": 23.99, + "learning_rate": 6.011685174434935e-05, + "loss": 1.0787, + "step": 855000 + }, + { + "epoch": 24.01, + "learning_rate": 5.997640493623714e-05, + "loss": 1.0337, + "step": 855500 + }, + { + "epoch": 24.02, + "learning_rate": 5.983595812812494e-05, + "loss": 1.0066, + "step": 856000 + }, + { + "epoch": 24.04, + "learning_rate": 5.969551132001273e-05, + "loss": 1.0121, + "step": 856500 + }, + { + "epoch": 24.05, + "learning_rate": 5.9555064511900516e-05, + "loss": 1.0268, + "step": 857000 + }, + { + "epoch": 24.06, + "learning_rate": 5.941461770378832e-05, + "loss": 1.0055, + "step": 857500 + }, + { + "epoch": 24.08, + "learning_rate": 5.9274170895676105e-05, + "loss": 1.0119, + "step": 858000 + }, + { + "epoch": 24.09, + "learning_rate": 5.91337240875639e-05, + "loss": 1.0203, + "step": 858500 + }, + { + "epoch": 24.11, + "learning_rate": 5.899327727945169e-05, + "loss": 1.0067, + "step": 859000 + }, + { + "epoch": 24.12, + "learning_rate": 5.885283047133948e-05, + "loss": 1.0304, + "step": 859500 + }, + { + "epoch": 24.13, + "learning_rate": 5.8712383663227276e-05, + "loss": 1.0314, + "step": 860000 + }, + { + "epoch": 24.15, + "learning_rate": 5.8571936855115064e-05, + "loss": 1.002, + "step": 860500 + }, + { + "epoch": 24.16, + "learning_rate": 5.8431490047002865e-05, + "loss": 0.9908, + "step": 861000 + }, + { + "epoch": 24.18, + "learning_rate": 5.829104323889065e-05, + "loss": 1.0277, + "step": 861500 + }, + { + "epoch": 24.19, + "learning_rate": 5.815059643077844e-05, + "loss": 1.0328, + "step": 862000 + }, + { + "epoch": 24.2, + "learning_rate": 5.801014962266624e-05, + "loss": 1.0052, + "step": 862500 + }, + { + "epoch": 24.22, + "learning_rate": 5.786970281455403e-05, + "loss": 0.9812, + "step": 863000 + }, + { + "epoch": 24.23, + "learning_rate": 5.7729256006441824e-05, + "loss": 0.9925, + "step": 863500 + }, + { + "epoch": 24.25, + "learning_rate": 5.758880919832961e-05, + "loss": 1.0073, + "step": 864000 + }, + { + "epoch": 24.26, + "learning_rate": 5.7448362390217406e-05, + "loss": 0.9962, + "step": 864500 + }, + { + "epoch": 24.27, + "learning_rate": 5.73079155821052e-05, + "loss": 0.9965, + "step": 865000 + }, + { + "epoch": 24.29, + "learning_rate": 5.716746877399299e-05, + "loss": 1.0207, + "step": 865500 + }, + { + "epoch": 24.3, + "learning_rate": 5.702702196588079e-05, + "loss": 1.037, + "step": 866000 + }, + { + "epoch": 24.32, + "learning_rate": 5.688657515776858e-05, + "loss": 1.031, + "step": 866500 + }, + { + "epoch": 24.33, + "learning_rate": 5.6746128349656365e-05, + "loss": 1.0097, + "step": 867000 + }, + { + "epoch": 24.34, + "learning_rate": 5.6605681541544166e-05, + "loss": 1.0315, + "step": 867500 + }, + { + "epoch": 24.36, + "learning_rate": 5.6465234733431954e-05, + "loss": 1.0114, + "step": 868000 + }, + { + "epoch": 24.37, + "learning_rate": 5.632478792531975e-05, + "loss": 1.0141, + "step": 868500 + }, + { + "epoch": 24.39, + "learning_rate": 5.6184341117207536e-05, + "loss": 1.003, + "step": 869000 + }, + { + "epoch": 24.4, + "learning_rate": 5.604389430909533e-05, + "loss": 0.999, + "step": 869500 + }, + { + "epoch": 24.41, + "learning_rate": 5.5903447500983125e-05, + "loss": 1.026, + "step": 870000 + }, + { + "epoch": 24.43, + "learning_rate": 5.576300069287091e-05, + "loss": 0.9967, + "step": 870500 + }, + { + "epoch": 24.44, + "learning_rate": 5.5622553884758714e-05, + "loss": 1.0358, + "step": 871000 + }, + { + "epoch": 24.46, + "learning_rate": 5.54821070766465e-05, + "loss": 1.0213, + "step": 871500 + }, + { + "epoch": 24.47, + "learning_rate": 5.534166026853429e-05, + "loss": 1.0055, + "step": 872000 + }, + { + "epoch": 24.48, + "learning_rate": 5.5201213460422084e-05, + "loss": 1.0138, + "step": 872500 + }, + { + "epoch": 24.5, + "learning_rate": 5.506076665230988e-05, + "loss": 0.9862, + "step": 873000 + }, + { + "epoch": 24.51, + "learning_rate": 5.4920319844197666e-05, + "loss": 1.0179, + "step": 873500 + }, + { + "epoch": 24.53, + "learning_rate": 5.477987303608546e-05, + "loss": 1.0121, + "step": 874000 + }, + { + "epoch": 24.54, + "learning_rate": 5.4639426227973255e-05, + "loss": 0.9966, + "step": 874500 + }, + { + "epoch": 24.55, + "learning_rate": 5.449897941986105e-05, + "loss": 1.0033, + "step": 875000 + }, + { + "epoch": 24.57, + "learning_rate": 5.435853261174884e-05, + "loss": 1.0233, + "step": 875500 + }, + { + "epoch": 24.58, + "learning_rate": 5.421808580363664e-05, + "loss": 1.0081, + "step": 876000 + }, + { + "epoch": 24.6, + "learning_rate": 5.4077638995524426e-05, + "loss": 1.0259, + "step": 876500 + }, + { + "epoch": 24.61, + "learning_rate": 5.3937192187412213e-05, + "loss": 1.0035, + "step": 877000 + }, + { + "epoch": 24.63, + "learning_rate": 5.379674537930001e-05, + "loss": 1.0148, + "step": 877500 + }, + { + "epoch": 24.64, + "learning_rate": 5.36562985711878e-05, + "loss": 1.0296, + "step": 878000 + }, + { + "epoch": 24.65, + "learning_rate": 5.351585176307559e-05, + "loss": 1.0188, + "step": 878500 + }, + { + "epoch": 24.67, + "learning_rate": 5.3375404954963385e-05, + "loss": 1.035, + "step": 879000 + }, + { + "epoch": 24.68, + "learning_rate": 5.323495814685118e-05, + "loss": 1.0216, + "step": 879500 + }, + { + "epoch": 24.7, + "learning_rate": 5.3094511338738973e-05, + "loss": 1.0217, + "step": 880000 + }, + { + "epoch": 24.71, + "learning_rate": 5.295406453062676e-05, + "loss": 1.0079, + "step": 880500 + }, + { + "epoch": 24.72, + "learning_rate": 5.281361772251456e-05, + "loss": 0.9966, + "step": 881000 + }, + { + "epoch": 24.74, + "learning_rate": 5.267317091440235e-05, + "loss": 1.0, + "step": 881500 + }, + { + "epoch": 24.75, + "learning_rate": 5.253272410629014e-05, + "loss": 1.0185, + "step": 882000 + }, + { + "epoch": 24.77, + "learning_rate": 5.2392277298177925e-05, + "loss": 1.0232, + "step": 882500 + }, + { + "epoch": 24.78, + "learning_rate": 5.225183049006573e-05, + "loss": 1.0223, + "step": 883000 + }, + { + "epoch": 24.79, + "learning_rate": 5.2111383681953514e-05, + "loss": 1.0066, + "step": 883500 + }, + { + "epoch": 24.81, + "learning_rate": 5.197093687384131e-05, + "loss": 1.0333, + "step": 884000 + }, + { + "epoch": 24.82, + "learning_rate": 5.18304900657291e-05, + "loss": 1.0348, + "step": 884500 + }, + { + "epoch": 24.84, + "learning_rate": 5.16900432576169e-05, + "loss": 1.0362, + "step": 885000 + }, + { + "epoch": 24.85, + "learning_rate": 5.1549596449504685e-05, + "loss": 1.0099, + "step": 885500 + }, + { + "epoch": 24.86, + "learning_rate": 5.140914964139247e-05, + "loss": 1.0146, + "step": 886000 + }, + { + "epoch": 24.88, + "learning_rate": 5.1268702833280274e-05, + "loss": 1.0462, + "step": 886500 + }, + { + "epoch": 24.89, + "learning_rate": 5.112825602516806e-05, + "loss": 1.0164, + "step": 887000 + }, + { + "epoch": 24.91, + "learning_rate": 5.098780921705585e-05, + "loss": 1.0112, + "step": 887500 + }, + { + "epoch": 24.92, + "learning_rate": 5.084736240894365e-05, + "loss": 1.0161, + "step": 888000 + }, + { + "epoch": 24.93, + "learning_rate": 5.070691560083144e-05, + "loss": 1.0426, + "step": 888500 + }, + { + "epoch": 24.95, + "learning_rate": 5.056646879271923e-05, + "loss": 0.9918, + "step": 889000 + }, + { + "epoch": 24.96, + "learning_rate": 5.042602198460703e-05, + "loss": 1.023, + "step": 889500 + }, + { + "epoch": 24.98, + "learning_rate": 5.028557517649482e-05, + "loss": 1.0051, + "step": 890000 + }, + { + "epoch": 24.99, + "learning_rate": 5.014512836838261e-05, + "loss": 1.0501, + "step": 890500 + }, + { + "epoch": 25.0, + "learning_rate": 5.00046815602704e-05, + "loss": 1.0258, + "step": 891000 + }, + { + "epoch": 25.02, + "learning_rate": 4.98642347521582e-05, + "loss": 0.9781, + "step": 891500 + }, + { + "epoch": 25.03, + "learning_rate": 4.9723787944045986e-05, + "loss": 0.9605, + "step": 892000 + }, + { + "epoch": 25.05, + "learning_rate": 4.9583341135933774e-05, + "loss": 0.9655, + "step": 892500 + }, + { + "epoch": 25.06, + "learning_rate": 4.9442894327821575e-05, + "loss": 0.964, + "step": 893000 + }, + { + "epoch": 25.07, + "learning_rate": 4.930244751970936e-05, + "loss": 0.9666, + "step": 893500 + }, + { + "epoch": 25.09, + "learning_rate": 4.916200071159716e-05, + "loss": 0.9809, + "step": 894000 + }, + { + "epoch": 25.1, + "learning_rate": 4.902155390348495e-05, + "loss": 0.9729, + "step": 894500 + }, + { + "epoch": 25.12, + "learning_rate": 4.888110709537274e-05, + "loss": 0.9895, + "step": 895000 + }, + { + "epoch": 25.13, + "learning_rate": 4.8740660287260534e-05, + "loss": 0.9598, + "step": 895500 + }, + { + "epoch": 25.14, + "learning_rate": 4.860021347914832e-05, + "loss": 0.95, + "step": 896000 + }, + { + "epoch": 25.16, + "learning_rate": 4.845976667103612e-05, + "loss": 0.9933, + "step": 896500 + }, + { + "epoch": 25.17, + "learning_rate": 4.831931986292391e-05, + "loss": 0.9686, + "step": 897000 + }, + { + "epoch": 25.19, + "learning_rate": 4.81788730548117e-05, + "loss": 0.9934, + "step": 897500 + }, + { + "epoch": 25.2, + "learning_rate": 4.80384262466995e-05, + "loss": 0.9792, + "step": 898000 + }, + { + "epoch": 25.21, + "learning_rate": 4.789797943858729e-05, + "loss": 0.9899, + "step": 898500 + }, + { + "epoch": 25.23, + "learning_rate": 4.775753263047508e-05, + "loss": 0.9809, + "step": 899000 + }, + { + "epoch": 25.24, + "learning_rate": 4.7617085822362876e-05, + "loss": 0.9835, + "step": 899500 + }, + { + "epoch": 25.26, + "learning_rate": 4.7476639014250664e-05, + "loss": 0.9338, + "step": 900000 + }, + { + "epoch": 25.27, + "learning_rate": 4.733619220613846e-05, + "loss": 0.9526, + "step": 900500 + }, + { + "epoch": 25.28, + "learning_rate": 4.7195745398026246e-05, + "loss": 0.9714, + "step": 901000 + }, + { + "epoch": 25.3, + "learning_rate": 4.705529858991405e-05, + "loss": 0.9842, + "step": 901500 + }, + { + "epoch": 25.31, + "learning_rate": 4.6914851781801835e-05, + "loss": 0.979, + "step": 902000 + }, + { + "epoch": 25.33, + "learning_rate": 4.677440497368962e-05, + "loss": 0.9746, + "step": 902500 + }, + { + "epoch": 25.34, + "learning_rate": 4.6633958165577424e-05, + "loss": 0.9716, + "step": 903000 + }, + { + "epoch": 25.35, + "learning_rate": 4.649351135746521e-05, + "loss": 0.9827, + "step": 903500 + }, + { + "epoch": 25.37, + "learning_rate": 4.6353064549353006e-05, + "loss": 0.994, + "step": 904000 + }, + { + "epoch": 25.38, + "learning_rate": 4.6212617741240794e-05, + "loss": 0.981, + "step": 904500 + }, + { + "epoch": 25.4, + "learning_rate": 4.607217093312859e-05, + "loss": 0.9842, + "step": 905000 + }, + { + "epoch": 25.41, + "learning_rate": 4.593172412501638e-05, + "loss": 0.979, + "step": 905500 + }, + { + "epoch": 25.42, + "learning_rate": 4.579127731690417e-05, + "loss": 0.9942, + "step": 906000 + }, + { + "epoch": 25.44, + "learning_rate": 4.565083050879197e-05, + "loss": 0.9978, + "step": 906500 + }, + { + "epoch": 25.45, + "learning_rate": 4.551038370067976e-05, + "loss": 1.0006, + "step": 907000 + }, + { + "epoch": 25.47, + "learning_rate": 4.536993689256755e-05, + "loss": 1.0112, + "step": 907500 + }, + { + "epoch": 25.48, + "learning_rate": 4.522949008445535e-05, + "loss": 0.9642, + "step": 908000 + }, + { + "epoch": 25.49, + "learning_rate": 4.5089043276343136e-05, + "loss": 0.9905, + "step": 908500 + }, + { + "epoch": 25.51, + "learning_rate": 4.4948596468230924e-05, + "loss": 0.986, + "step": 909000 + }, + { + "epoch": 25.52, + "learning_rate": 4.480814966011872e-05, + "loss": 0.9829, + "step": 909500 + }, + { + "epoch": 25.54, + "learning_rate": 4.466770285200651e-05, + "loss": 0.9783, + "step": 910000 + }, + { + "epoch": 25.55, + "learning_rate": 4.452725604389431e-05, + "loss": 0.983, + "step": 910500 + }, + { + "epoch": 25.57, + "learning_rate": 4.4386809235782095e-05, + "loss": 0.972, + "step": 911000 + }, + { + "epoch": 25.58, + "learning_rate": 4.4246362427669896e-05, + "loss": 0.9854, + "step": 911500 + }, + { + "epoch": 25.59, + "learning_rate": 4.4105915619557684e-05, + "loss": 0.981, + "step": 912000 + }, + { + "epoch": 25.61, + "learning_rate": 4.396546881144547e-05, + "loss": 0.9816, + "step": 912500 + }, + { + "epoch": 25.62, + "learning_rate": 4.382502200333327e-05, + "loss": 0.9745, + "step": 913000 + }, + { + "epoch": 25.64, + "learning_rate": 4.368457519522106e-05, + "loss": 0.9589, + "step": 913500 + }, + { + "epoch": 25.65, + "learning_rate": 4.354412838710885e-05, + "loss": 0.9854, + "step": 914000 + }, + { + "epoch": 25.66, + "learning_rate": 4.340368157899664e-05, + "loss": 0.9811, + "step": 914500 + }, + { + "epoch": 25.68, + "learning_rate": 4.326323477088444e-05, + "loss": 0.9934, + "step": 915000 + }, + { + "epoch": 25.69, + "learning_rate": 4.312278796277223e-05, + "loss": 0.9691, + "step": 915500 + }, + { + "epoch": 25.71, + "learning_rate": 4.298234115466002e-05, + "loss": 0.9757, + "step": 916000 + }, + { + "epoch": 25.72, + "learning_rate": 4.284189434654782e-05, + "loss": 0.98, + "step": 916500 + }, + { + "epoch": 25.73, + "learning_rate": 4.270144753843561e-05, + "loss": 0.9761, + "step": 917000 + }, + { + "epoch": 25.75, + "learning_rate": 4.2561000730323396e-05, + "loss": 0.9808, + "step": 917500 + }, + { + "epoch": 25.76, + "learning_rate": 4.2420553922211184e-05, + "loss": 0.9752, + "step": 918000 + }, + { + "epoch": 25.78, + "learning_rate": 4.2280107114098985e-05, + "loss": 1.0108, + "step": 918500 + }, + { + "epoch": 25.79, + "learning_rate": 4.213966030598677e-05, + "loss": 0.9959, + "step": 919000 + }, + { + "epoch": 25.8, + "learning_rate": 4.199921349787457e-05, + "loss": 0.9715, + "step": 919500 + }, + { + "epoch": 25.82, + "learning_rate": 4.185876668976236e-05, + "loss": 0.9801, + "step": 920000 + }, + { + "epoch": 25.83, + "learning_rate": 4.1718319881650156e-05, + "loss": 0.9798, + "step": 920500 + }, + { + "epoch": 25.85, + "learning_rate": 4.1577873073537944e-05, + "loss": 1.0103, + "step": 921000 + }, + { + "epoch": 25.86, + "learning_rate": 4.143742626542574e-05, + "loss": 0.982, + "step": 921500 + }, + { + "epoch": 25.87, + "learning_rate": 4.129697945731353e-05, + "loss": 0.9832, + "step": 922000 + }, + { + "epoch": 25.89, + "learning_rate": 4.115653264920132e-05, + "loss": 0.9876, + "step": 922500 + }, + { + "epoch": 25.9, + "learning_rate": 4.101608584108911e-05, + "loss": 0.9796, + "step": 923000 + }, + { + "epoch": 25.92, + "learning_rate": 4.087563903297691e-05, + "loss": 0.988, + "step": 923500 + }, + { + "epoch": 25.93, + "learning_rate": 4.07351922248647e-05, + "loss": 0.9728, + "step": 924000 + }, + { + "epoch": 25.94, + "learning_rate": 4.059474541675249e-05, + "loss": 0.9854, + "step": 924500 + }, + { + "epoch": 25.96, + "learning_rate": 4.0454298608640286e-05, + "loss": 0.9798, + "step": 925000 + }, + { + "epoch": 25.97, + "learning_rate": 4.031385180052808e-05, + "loss": 0.9965, + "step": 925500 + }, + { + "epoch": 25.99, + "learning_rate": 4.017340499241587e-05, + "loss": 0.978, + "step": 926000 + }, + { + "epoch": 26.0, + "learning_rate": 4.003295818430366e-05, + "loss": 0.978, + "step": 926500 + }, + { + "epoch": 26.01, + "learning_rate": 3.989251137619146e-05, + "loss": 0.9435, + "step": 927000 + }, + { + "epoch": 26.03, + "learning_rate": 3.9752064568079244e-05, + "loss": 0.9353, + "step": 927500 + }, + { + "epoch": 26.04, + "learning_rate": 3.961161775996703e-05, + "loss": 0.9308, + "step": 928000 + }, + { + "epoch": 26.06, + "learning_rate": 3.9471170951854833e-05, + "loss": 0.9444, + "step": 928500 + }, + { + "epoch": 26.07, + "learning_rate": 3.933072414374262e-05, + "loss": 0.9623, + "step": 929000 + }, + { + "epoch": 26.08, + "learning_rate": 3.9190277335630416e-05, + "loss": 0.9253, + "step": 929500 + }, + { + "epoch": 26.1, + "learning_rate": 3.904983052751821e-05, + "loss": 0.9352, + "step": 930000 + }, + { + "epoch": 26.11, + "learning_rate": 3.8909383719406e-05, + "loss": 0.9643, + "step": 930500 + }, + { + "epoch": 26.13, + "learning_rate": 3.876893691129379e-05, + "loss": 0.9441, + "step": 931000 + }, + { + "epoch": 26.14, + "learning_rate": 3.862849010318159e-05, + "loss": 0.9533, + "step": 931500 + }, + { + "epoch": 26.15, + "learning_rate": 3.848804329506938e-05, + "loss": 0.9373, + "step": 932000 + }, + { + "epoch": 26.17, + "learning_rate": 3.834759648695717e-05, + "loss": 0.9354, + "step": 932500 + }, + { + "epoch": 26.18, + "learning_rate": 3.8207149678844957e-05, + "loss": 0.9522, + "step": 933000 + }, + { + "epoch": 26.2, + "learning_rate": 3.806670287073276e-05, + "loss": 0.9477, + "step": 933500 + }, + { + "epoch": 26.21, + "learning_rate": 3.7926256062620545e-05, + "loss": 0.9361, + "step": 934000 + }, + { + "epoch": 26.22, + "learning_rate": 3.778580925450834e-05, + "loss": 0.9419, + "step": 934500 + }, + { + "epoch": 26.24, + "learning_rate": 3.7645362446396134e-05, + "loss": 0.968, + "step": 935000 + }, + { + "epoch": 26.25, + "learning_rate": 3.750491563828392e-05, + "loss": 0.9233, + "step": 935500 + }, + { + "epoch": 26.27, + "learning_rate": 3.7364468830171717e-05, + "loss": 0.9514, + "step": 936000 + }, + { + "epoch": 26.28, + "learning_rate": 3.722402202205951e-05, + "loss": 0.9475, + "step": 936500 + }, + { + "epoch": 26.29, + "learning_rate": 3.7083575213947305e-05, + "loss": 0.9377, + "step": 937000 + }, + { + "epoch": 26.31, + "learning_rate": 3.694312840583509e-05, + "loss": 0.9704, + "step": 937500 + }, + { + "epoch": 26.32, + "learning_rate": 3.680268159772289e-05, + "loss": 0.925, + "step": 938000 + }, + { + "epoch": 26.34, + "learning_rate": 3.6662234789610675e-05, + "loss": 0.9333, + "step": 938500 + }, + { + "epoch": 26.35, + "learning_rate": 3.652178798149847e-05, + "loss": 0.9438, + "step": 939000 + }, + { + "epoch": 26.36, + "learning_rate": 3.6381341173386264e-05, + "loss": 0.9884, + "step": 939500 + }, + { + "epoch": 26.38, + "learning_rate": 3.624089436527405e-05, + "loss": 0.9539, + "step": 940000 + }, + { + "epoch": 26.39, + "learning_rate": 3.6100447557161846e-05, + "loss": 0.9565, + "step": 940500 + }, + { + "epoch": 26.41, + "learning_rate": 3.596000074904964e-05, + "loss": 0.9604, + "step": 941000 + }, + { + "epoch": 26.42, + "learning_rate": 3.5819553940937435e-05, + "loss": 0.9424, + "step": 941500 + }, + { + "epoch": 26.44, + "learning_rate": 3.567910713282522e-05, + "loss": 0.9411, + "step": 942000 + }, + { + "epoch": 26.45, + "learning_rate": 3.553866032471302e-05, + "loss": 0.963, + "step": 942500 + }, + { + "epoch": 26.46, + "learning_rate": 3.539821351660081e-05, + "loss": 0.9385, + "step": 943000 + }, + { + "epoch": 26.48, + "learning_rate": 3.52577667084886e-05, + "loss": 0.9585, + "step": 943500 + }, + { + "epoch": 26.49, + "learning_rate": 3.5117319900376394e-05, + "loss": 0.9489, + "step": 944000 + }, + { + "epoch": 26.51, + "learning_rate": 3.497687309226418e-05, + "loss": 0.9572, + "step": 944500 + }, + { + "epoch": 26.52, + "learning_rate": 3.4836426284151976e-05, + "loss": 0.9138, + "step": 945000 + }, + { + "epoch": 26.53, + "learning_rate": 3.469597947603977e-05, + "loss": 0.9485, + "step": 945500 + }, + { + "epoch": 26.55, + "learning_rate": 3.4555532667927565e-05, + "loss": 0.9519, + "step": 946000 + }, + { + "epoch": 26.56, + "learning_rate": 3.441508585981536e-05, + "loss": 0.9356, + "step": 946500 + }, + { + "epoch": 26.58, + "learning_rate": 3.427463905170315e-05, + "loss": 0.9257, + "step": 947000 + }, + { + "epoch": 26.59, + "learning_rate": 3.413419224359094e-05, + "loss": 0.9313, + "step": 947500 + }, + { + "epoch": 26.6, + "learning_rate": 3.3993745435478736e-05, + "loss": 0.9591, + "step": 948000 + }, + { + "epoch": 26.62, + "learning_rate": 3.3853298627366524e-05, + "loss": 0.9424, + "step": 948500 + }, + { + "epoch": 26.63, + "learning_rate": 3.371285181925432e-05, + "loss": 0.9564, + "step": 949000 + }, + { + "epoch": 26.65, + "learning_rate": 3.3572405011142106e-05, + "loss": 0.9548, + "step": 949500 + }, + { + "epoch": 26.66, + "learning_rate": 3.34319582030299e-05, + "loss": 0.9363, + "step": 950000 + }, + { + "epoch": 26.67, + "learning_rate": 3.3291511394917695e-05, + "loss": 0.9518, + "step": 950500 + }, + { + "epoch": 26.69, + "learning_rate": 3.315106458680549e-05, + "loss": 0.9612, + "step": 951000 + }, + { + "epoch": 26.7, + "learning_rate": 3.3010617778693284e-05, + "loss": 0.953, + "step": 951500 + }, + { + "epoch": 26.72, + "learning_rate": 3.287017097058107e-05, + "loss": 0.9675, + "step": 952000 + }, + { + "epoch": 26.73, + "learning_rate": 3.2729724162468866e-05, + "loss": 0.9442, + "step": 952500 + }, + { + "epoch": 26.74, + "learning_rate": 3.258927735435666e-05, + "loss": 0.946, + "step": 953000 + }, + { + "epoch": 26.76, + "learning_rate": 3.244883054624445e-05, + "loss": 0.9465, + "step": 953500 + }, + { + "epoch": 26.77, + "learning_rate": 3.230838373813224e-05, + "loss": 0.949, + "step": 954000 + }, + { + "epoch": 26.79, + "learning_rate": 3.216793693002003e-05, + "loss": 0.9622, + "step": 954500 + }, + { + "epoch": 26.8, + "learning_rate": 3.2027490121907825e-05, + "loss": 0.9415, + "step": 955000 + }, + { + "epoch": 26.81, + "learning_rate": 3.188704331379562e-05, + "loss": 0.9294, + "step": 955500 + }, + { + "epoch": 26.83, + "learning_rate": 3.1746596505683414e-05, + "loss": 0.9346, + "step": 956000 + }, + { + "epoch": 26.84, + "learning_rate": 3.160614969757121e-05, + "loss": 0.9471, + "step": 956500 + }, + { + "epoch": 26.86, + "learning_rate": 3.1465702889458996e-05, + "loss": 0.9375, + "step": 957000 + }, + { + "epoch": 26.87, + "learning_rate": 3.132525608134679e-05, + "loss": 0.9421, + "step": 957500 + }, + { + "epoch": 26.88, + "learning_rate": 3.118480927323458e-05, + "loss": 0.9358, + "step": 958000 + }, + { + "epoch": 26.9, + "learning_rate": 3.104436246512237e-05, + "loss": 0.9515, + "step": 958500 + }, + { + "epoch": 26.91, + "learning_rate": 3.090391565701017e-05, + "loss": 0.9672, + "step": 959000 + }, + { + "epoch": 26.93, + "learning_rate": 3.0763468848897955e-05, + "loss": 0.9548, + "step": 959500 + }, + { + "epoch": 26.94, + "learning_rate": 3.062302204078575e-05, + "loss": 0.9608, + "step": 960000 + }, + { + "epoch": 26.95, + "learning_rate": 3.048257523267354e-05, + "loss": 0.938, + "step": 960500 + }, + { + "epoch": 26.97, + "learning_rate": 3.0342128424561335e-05, + "loss": 0.9375, + "step": 961000 + }, + { + "epoch": 26.98, + "learning_rate": 3.020168161644913e-05, + "loss": 0.926, + "step": 961500 + }, + { + "epoch": 27.0, + "learning_rate": 3.006123480833692e-05, + "loss": 0.9601, + "step": 962000 + }, + { + "epoch": 27.01, + "learning_rate": 2.992078800022471e-05, + "loss": 0.906, + "step": 962500 + }, + { + "epoch": 27.02, + "learning_rate": 2.9780341192112503e-05, + "loss": 0.9176, + "step": 963000 + }, + { + "epoch": 27.04, + "learning_rate": 2.9639894384000297e-05, + "loss": 0.914, + "step": 963500 + }, + { + "epoch": 27.05, + "learning_rate": 2.949944757588809e-05, + "loss": 0.8998, + "step": 964000 + }, + { + "epoch": 27.07, + "learning_rate": 2.9359000767775883e-05, + "loss": 0.9115, + "step": 964500 + }, + { + "epoch": 27.08, + "learning_rate": 2.9218553959663674e-05, + "loss": 0.9167, + "step": 965000 + }, + { + "epoch": 27.09, + "learning_rate": 2.9078107151551465e-05, + "loss": 0.9245, + "step": 965500 + }, + { + "epoch": 27.11, + "learning_rate": 2.893766034343926e-05, + "loss": 0.9306, + "step": 966000 + }, + { + "epoch": 27.12, + "learning_rate": 2.8797213535327054e-05, + "loss": 0.8957, + "step": 966500 + }, + { + "epoch": 27.14, + "learning_rate": 2.865676672721484e-05, + "loss": 0.907, + "step": 967000 + }, + { + "epoch": 27.15, + "learning_rate": 2.8516319919102636e-05, + "loss": 0.9172, + "step": 967500 + }, + { + "epoch": 27.16, + "learning_rate": 2.8375873110990427e-05, + "loss": 0.9171, + "step": 968000 + }, + { + "epoch": 27.18, + "learning_rate": 2.823542630287822e-05, + "loss": 0.9068, + "step": 968500 + }, + { + "epoch": 27.19, + "learning_rate": 2.8094979494766016e-05, + "loss": 0.8993, + "step": 969000 + }, + { + "epoch": 27.21, + "learning_rate": 2.7954532686653804e-05, + "loss": 0.9355, + "step": 969500 + }, + { + "epoch": 27.22, + "learning_rate": 2.7814085878541598e-05, + "loss": 0.9208, + "step": 970000 + }, + { + "epoch": 27.23, + "learning_rate": 2.767363907042939e-05, + "loss": 0.9365, + "step": 970500 + }, + { + "epoch": 27.25, + "learning_rate": 2.7533192262317184e-05, + "loss": 0.9097, + "step": 971000 + }, + { + "epoch": 27.26, + "learning_rate": 2.7392745454204978e-05, + "loss": 0.9277, + "step": 971500 + }, + { + "epoch": 27.28, + "learning_rate": 2.7252298646092766e-05, + "loss": 0.9186, + "step": 972000 + }, + { + "epoch": 27.29, + "learning_rate": 2.711185183798056e-05, + "loss": 0.9201, + "step": 972500 + }, + { + "epoch": 27.3, + "learning_rate": 2.697140502986835e-05, + "loss": 0.9255, + "step": 973000 + }, + { + "epoch": 27.32, + "learning_rate": 2.6830958221756146e-05, + "loss": 0.9401, + "step": 973500 + }, + { + "epoch": 27.33, + "learning_rate": 2.6690511413643933e-05, + "loss": 0.9133, + "step": 974000 + }, + { + "epoch": 27.35, + "learning_rate": 2.6550064605531728e-05, + "loss": 0.9175, + "step": 974500 + }, + { + "epoch": 27.36, + "learning_rate": 2.6409617797419522e-05, + "loss": 0.9265, + "step": 975000 + }, + { + "epoch": 27.38, + "learning_rate": 2.6269170989307313e-05, + "loss": 0.938, + "step": 975500 + }, + { + "epoch": 27.39, + "learning_rate": 2.6128724181195108e-05, + "loss": 0.9301, + "step": 976000 + }, + { + "epoch": 27.4, + "learning_rate": 2.5988277373082896e-05, + "loss": 0.8971, + "step": 976500 + }, + { + "epoch": 27.42, + "learning_rate": 2.584783056497069e-05, + "loss": 0.9441, + "step": 977000 + }, + { + "epoch": 27.43, + "learning_rate": 2.5707383756858484e-05, + "loss": 0.9362, + "step": 977500 + }, + { + "epoch": 27.45, + "learning_rate": 2.5566936948746276e-05, + "loss": 0.9083, + "step": 978000 + }, + { + "epoch": 27.46, + "learning_rate": 2.542649014063407e-05, + "loss": 0.9082, + "step": 978500 + }, + { + "epoch": 27.47, + "learning_rate": 2.5286043332521858e-05, + "loss": 0.9421, + "step": 979000 + }, + { + "epoch": 27.49, + "learning_rate": 2.5145596524409652e-05, + "loss": 0.8992, + "step": 979500 + }, + { + "epoch": 27.5, + "learning_rate": 2.5005149716297447e-05, + "loss": 0.9328, + "step": 980000 + }, + { + "epoch": 27.52, + "learning_rate": 2.4864702908185238e-05, + "loss": 0.9284, + "step": 980500 + }, + { + "epoch": 27.53, + "learning_rate": 2.4724256100073032e-05, + "loss": 0.9122, + "step": 981000 + }, + { + "epoch": 27.54, + "learning_rate": 2.458380929196082e-05, + "loss": 0.8997, + "step": 981500 + }, + { + "epoch": 27.56, + "learning_rate": 2.4443362483848614e-05, + "loss": 0.9328, + "step": 982000 + }, + { + "epoch": 27.57, + "learning_rate": 2.430291567573641e-05, + "loss": 0.9093, + "step": 982500 + }, + { + "epoch": 27.59, + "learning_rate": 2.41624688676242e-05, + "loss": 0.9023, + "step": 983000 + }, + { + "epoch": 27.6, + "learning_rate": 2.4022022059511994e-05, + "loss": 0.9187, + "step": 983500 + }, + { + "epoch": 27.61, + "learning_rate": 2.3881575251399782e-05, + "loss": 0.9128, + "step": 984000 + }, + { + "epoch": 27.63, + "learning_rate": 2.3741128443287577e-05, + "loss": 0.9219, + "step": 984500 + }, + { + "epoch": 27.64, + "learning_rate": 2.360068163517537e-05, + "loss": 0.9225, + "step": 985000 + }, + { + "epoch": 27.66, + "learning_rate": 2.3460234827063162e-05, + "loss": 0.9106, + "step": 985500 + }, + { + "epoch": 27.67, + "learning_rate": 2.3319788018950957e-05, + "loss": 0.9105, + "step": 986000 + }, + { + "epoch": 27.68, + "learning_rate": 2.3179341210838744e-05, + "loss": 0.9277, + "step": 986500 + }, + { + "epoch": 27.7, + "learning_rate": 2.303889440272654e-05, + "loss": 0.9037, + "step": 987000 + }, + { + "epoch": 27.71, + "learning_rate": 2.2898447594614333e-05, + "loss": 0.9158, + "step": 987500 + }, + { + "epoch": 27.73, + "learning_rate": 2.2758000786502124e-05, + "loss": 0.9387, + "step": 988000 + }, + { + "epoch": 27.74, + "learning_rate": 2.261755397838992e-05, + "loss": 0.9273, + "step": 988500 + }, + { + "epoch": 27.75, + "learning_rate": 2.2477107170277706e-05, + "loss": 0.9185, + "step": 989000 + }, + { + "epoch": 27.77, + "learning_rate": 2.23366603621655e-05, + "loss": 0.91, + "step": 989500 + }, + { + "epoch": 27.78, + "learning_rate": 2.2196213554053292e-05, + "loss": 0.9192, + "step": 990000 + }, + { + "epoch": 27.8, + "learning_rate": 2.2055766745941086e-05, + "loss": 0.901, + "step": 990500 + }, + { + "epoch": 27.81, + "learning_rate": 2.1915319937828877e-05, + "loss": 0.9072, + "step": 991000 + }, + { + "epoch": 27.82, + "learning_rate": 2.177487312971667e-05, + "loss": 0.9137, + "step": 991500 + }, + { + "epoch": 27.84, + "learning_rate": 2.1634426321604463e-05, + "loss": 0.9501, + "step": 992000 + }, + { + "epoch": 27.85, + "learning_rate": 2.1493979513492254e-05, + "loss": 0.9243, + "step": 992500 + }, + { + "epoch": 27.87, + "learning_rate": 2.135353270538005e-05, + "loss": 0.9278, + "step": 993000 + }, + { + "epoch": 27.88, + "learning_rate": 2.121308589726784e-05, + "loss": 0.8987, + "step": 993500 + }, + { + "epoch": 27.89, + "learning_rate": 2.107263908915563e-05, + "loss": 0.9121, + "step": 994000 + }, + { + "epoch": 27.91, + "learning_rate": 2.0932192281043425e-05, + "loss": 0.9219, + "step": 994500 + }, + { + "epoch": 27.92, + "learning_rate": 2.0791745472931216e-05, + "loss": 0.921, + "step": 995000 + }, + { + "epoch": 27.94, + "learning_rate": 2.0651298664819007e-05, + "loss": 0.9194, + "step": 995500 + }, + { + "epoch": 27.95, + "learning_rate": 2.0510851856706802e-05, + "loss": 0.9205, + "step": 996000 + }, + { + "epoch": 27.96, + "learning_rate": 2.0370405048594593e-05, + "loss": 0.9412, + "step": 996500 + }, + { + "epoch": 27.98, + "learning_rate": 2.0229958240482387e-05, + "loss": 0.9151, + "step": 997000 + }, + { + "epoch": 27.99, + "learning_rate": 2.008951143237018e-05, + "loss": 0.8942, + "step": 997500 + }, + { + "epoch": 28.01, + "learning_rate": 1.994906462425797e-05, + "loss": 0.8877, + "step": 998000 + }, + { + "epoch": 28.02, + "learning_rate": 1.9808617816145764e-05, + "loss": 0.9026, + "step": 998500 + }, + { + "epoch": 28.03, + "learning_rate": 1.9668171008033555e-05, + "loss": 0.8988, + "step": 999000 + }, + { + "epoch": 28.05, + "learning_rate": 1.952772419992135e-05, + "loss": 0.8773, + "step": 999500 + }, + { + "epoch": 28.06, + "learning_rate": 1.938727739180914e-05, + "loss": 0.8875, + "step": 1000000 + }, + { + "epoch": 28.08, + "learning_rate": 1.9246830583696932e-05, + "loss": 0.8946, + "step": 1000500 + }, + { + "epoch": 28.09, + "learning_rate": 1.9106383775584726e-05, + "loss": 0.8824, + "step": 1001000 + }, + { + "epoch": 28.1, + "learning_rate": 1.8965936967472517e-05, + "loss": 0.914, + "step": 1001500 + }, + { + "epoch": 28.12, + "learning_rate": 1.8825490159360312e-05, + "loss": 0.9184, + "step": 1002000 + }, + { + "epoch": 28.13, + "learning_rate": 1.8685043351248103e-05, + "loss": 0.9002, + "step": 1002500 + }, + { + "epoch": 28.15, + "learning_rate": 1.8544596543135894e-05, + "loss": 0.8737, + "step": 1003000 + }, + { + "epoch": 28.16, + "learning_rate": 1.840414973502369e-05, + "loss": 0.8976, + "step": 1003500 + }, + { + "epoch": 28.17, + "learning_rate": 1.826370292691148e-05, + "loss": 0.8865, + "step": 1004000 + }, + { + "epoch": 28.19, + "learning_rate": 1.812325611879927e-05, + "loss": 0.8947, + "step": 1004500 + }, + { + "epoch": 28.2, + "learning_rate": 1.7982809310687065e-05, + "loss": 0.8757, + "step": 1005000 + }, + { + "epoch": 28.22, + "learning_rate": 1.7842362502574856e-05, + "loss": 0.8997, + "step": 1005500 + }, + { + "epoch": 28.23, + "learning_rate": 1.770191569446265e-05, + "loss": 0.9052, + "step": 1006000 + }, + { + "epoch": 28.25, + "learning_rate": 1.756146888635044e-05, + "loss": 0.8833, + "step": 1006500 + }, + { + "epoch": 28.26, + "learning_rate": 1.7421022078238233e-05, + "loss": 0.9094, + "step": 1007000 + }, + { + "epoch": 28.27, + "learning_rate": 1.7280575270126027e-05, + "loss": 0.888, + "step": 1007500 + }, + { + "epoch": 28.29, + "learning_rate": 1.7140128462013818e-05, + "loss": 0.8907, + "step": 1008000 + }, + { + "epoch": 28.3, + "learning_rate": 1.6999681653901613e-05, + "loss": 0.8982, + "step": 1008500 + }, + { + "epoch": 28.32, + "learning_rate": 1.6859234845789404e-05, + "loss": 0.8964, + "step": 1009000 + }, + { + "epoch": 28.33, + "learning_rate": 1.6718788037677195e-05, + "loss": 0.8914, + "step": 1009500 + }, + { + "epoch": 28.34, + "learning_rate": 1.6578341229564986e-05, + "loss": 0.8984, + "step": 1010000 + }, + { + "epoch": 28.36, + "learning_rate": 1.643789442145278e-05, + "loss": 0.916, + "step": 1010500 + }, + { + "epoch": 28.37, + "learning_rate": 1.629744761334057e-05, + "loss": 0.8848, + "step": 1011000 + }, + { + "epoch": 28.39, + "learning_rate": 1.6157000805228366e-05, + "loss": 0.9012, + "step": 1011500 + }, + { + "epoch": 28.4, + "learning_rate": 1.6016553997116157e-05, + "loss": 0.8999, + "step": 1012000 + }, + { + "epoch": 28.41, + "learning_rate": 1.5876107189003948e-05, + "loss": 0.8859, + "step": 1012500 + }, + { + "epoch": 28.43, + "learning_rate": 1.5735660380891743e-05, + "loss": 0.8975, + "step": 1013000 + }, + { + "epoch": 28.44, + "learning_rate": 1.5595213572779534e-05, + "loss": 0.8983, + "step": 1013500 + }, + { + "epoch": 28.46, + "learning_rate": 1.5454766764667328e-05, + "loss": 0.9061, + "step": 1014000 + }, + { + "epoch": 28.47, + "learning_rate": 1.531431995655512e-05, + "loss": 0.8942, + "step": 1014500 + }, + { + "epoch": 28.48, + "learning_rate": 1.517387314844291e-05, + "loss": 0.917, + "step": 1015000 + }, + { + "epoch": 28.5, + "learning_rate": 1.5033426340330705e-05, + "loss": 0.8868, + "step": 1015500 + }, + { + "epoch": 28.51, + "learning_rate": 1.4892979532218497e-05, + "loss": 0.8692, + "step": 1016000 + }, + { + "epoch": 28.53, + "learning_rate": 1.4752532724106289e-05, + "loss": 0.8927, + "step": 1016500 + }, + { + "epoch": 28.54, + "learning_rate": 1.4612085915994081e-05, + "loss": 0.9105, + "step": 1017000 + }, + { + "epoch": 28.55, + "learning_rate": 1.4471639107881872e-05, + "loss": 0.8974, + "step": 1017500 + }, + { + "epoch": 28.57, + "learning_rate": 1.4331192299769665e-05, + "loss": 0.9013, + "step": 1018000 + }, + { + "epoch": 28.58, + "learning_rate": 1.419074549165746e-05, + "loss": 0.8981, + "step": 1018500 + }, + { + "epoch": 28.6, + "learning_rate": 1.405029868354525e-05, + "loss": 0.871, + "step": 1019000 + }, + { + "epoch": 28.61, + "learning_rate": 1.3909851875433044e-05, + "loss": 0.8917, + "step": 1019500 + }, + { + "epoch": 28.62, + "learning_rate": 1.3769405067320835e-05, + "loss": 0.8872, + "step": 1020000 + }, + { + "epoch": 28.64, + "learning_rate": 1.3628958259208627e-05, + "loss": 0.8958, + "step": 1020500 + }, + { + "epoch": 28.65, + "learning_rate": 1.3488511451096422e-05, + "loss": 0.8783, + "step": 1021000 + }, + { + "epoch": 28.67, + "learning_rate": 1.3348064642984213e-05, + "loss": 0.8722, + "step": 1021500 + }, + { + "epoch": 28.68, + "learning_rate": 1.3207617834872006e-05, + "loss": 0.9058, + "step": 1022000 + }, + { + "epoch": 28.69, + "learning_rate": 1.3067171026759797e-05, + "loss": 0.9316, + "step": 1022500 + }, + { + "epoch": 28.71, + "learning_rate": 1.292672421864759e-05, + "loss": 0.8852, + "step": 1023000 + }, + { + "epoch": 28.72, + "learning_rate": 1.2786277410535382e-05, + "loss": 0.8857, + "step": 1023500 + }, + { + "epoch": 28.74, + "learning_rate": 1.2645830602423175e-05, + "loss": 0.893, + "step": 1024000 + }, + { + "epoch": 28.75, + "learning_rate": 1.2505383794310968e-05, + "loss": 0.8962, + "step": 1024500 + }, + { + "epoch": 28.76, + "learning_rate": 1.2364936986198759e-05, + "loss": 0.8671, + "step": 1025000 + }, + { + "epoch": 28.78, + "learning_rate": 1.2224490178086552e-05, + "loss": 0.8975, + "step": 1025500 + }, + { + "epoch": 28.79, + "learning_rate": 1.2084043369974343e-05, + "loss": 0.904, + "step": 1026000 + }, + { + "epoch": 28.81, + "learning_rate": 1.1943596561862137e-05, + "loss": 0.9048, + "step": 1026500 + }, + { + "epoch": 28.82, + "learning_rate": 1.1803149753749928e-05, + "loss": 0.9158, + "step": 1027000 + }, + { + "epoch": 28.83, + "learning_rate": 1.1662702945637721e-05, + "loss": 0.8749, + "step": 1027500 + }, + { + "epoch": 28.85, + "learning_rate": 1.1522256137525514e-05, + "loss": 0.8801, + "step": 1028000 + }, + { + "epoch": 28.86, + "learning_rate": 1.1381809329413305e-05, + "loss": 0.9151, + "step": 1028500 + }, + { + "epoch": 28.88, + "learning_rate": 1.12413625213011e-05, + "loss": 0.8973, + "step": 1029000 + }, + { + "epoch": 28.89, + "learning_rate": 1.110091571318889e-05, + "loss": 0.8863, + "step": 1029500 + }, + { + "epoch": 28.9, + "learning_rate": 1.0960468905076683e-05, + "loss": 0.8966, + "step": 1030000 + }, + { + "epoch": 28.92, + "learning_rate": 1.0820022096964474e-05, + "loss": 0.8778, + "step": 1030500 + }, + { + "epoch": 28.93, + "learning_rate": 1.0679575288852267e-05, + "loss": 0.8899, + "step": 1031000 + }, + { + "epoch": 28.95, + "learning_rate": 1.0539128480740062e-05, + "loss": 0.8851, + "step": 1031500 + }, + { + "epoch": 28.96, + "learning_rate": 1.0398681672627853e-05, + "loss": 0.8972, + "step": 1032000 + }, + { + "epoch": 28.97, + "learning_rate": 1.0258234864515645e-05, + "loss": 0.8887, + "step": 1032500 + }, + { + "epoch": 28.99, + "learning_rate": 1.0117788056403437e-05, + "loss": 0.9048, + "step": 1033000 + }, + { + "epoch": 29.0, + "learning_rate": 9.97734124829123e-06, + "loss": 0.8814, + "step": 1033500 + }, + { + "epoch": 29.02, + "learning_rate": 9.83689444017902e-06, + "loss": 0.878, + "step": 1034000 + }, + { + "epoch": 29.03, + "learning_rate": 9.696447632066815e-06, + "loss": 0.8769, + "step": 1034500 + }, + { + "epoch": 29.04, + "learning_rate": 9.556000823954608e-06, + "loss": 0.8792, + "step": 1035000 + }, + { + "epoch": 29.06, + "learning_rate": 9.415554015842399e-06, + "loss": 0.8968, + "step": 1035500 + }, + { + "epoch": 29.07, + "learning_rate": 9.275107207730191e-06, + "loss": 0.8679, + "step": 1036000 + }, + { + "epoch": 29.09, + "learning_rate": 9.134660399617984e-06, + "loss": 0.8861, + "step": 1036500 + }, + { + "epoch": 29.1, + "learning_rate": 8.994213591505775e-06, + "loss": 0.8787, + "step": 1037000 + }, + { + "epoch": 29.12, + "learning_rate": 8.85376678339357e-06, + "loss": 0.8827, + "step": 1037500 + }, + { + "epoch": 29.13, + "learning_rate": 8.713319975281361e-06, + "loss": 0.8814, + "step": 1038000 + }, + { + "epoch": 29.14, + "learning_rate": 8.572873167169154e-06, + "loss": 0.8912, + "step": 1038500 + }, + { + "epoch": 29.16, + "learning_rate": 8.432426359056946e-06, + "loss": 0.879, + "step": 1039000 + }, + { + "epoch": 29.17, + "learning_rate": 8.291979550944737e-06, + "loss": 0.8579, + "step": 1039500 + }, + { + "epoch": 29.19, + "learning_rate": 8.15153274283253e-06, + "loss": 0.8612, + "step": 1040000 + }, + { + "epoch": 29.2, + "learning_rate": 8.011085934720323e-06, + "loss": 0.8734, + "step": 1040500 + }, + { + "epoch": 29.21, + "learning_rate": 7.870639126608116e-06, + "loss": 0.8676, + "step": 1041000 + }, + { + "epoch": 29.23, + "learning_rate": 7.730192318495909e-06, + "loss": 0.8894, + "step": 1041500 + }, + { + "epoch": 29.24, + "learning_rate": 7.5897455103837005e-06, + "loss": 0.9045, + "step": 1042000 + }, + { + "epoch": 29.26, + "learning_rate": 7.449298702271492e-06, + "loss": 0.872, + "step": 1042500 + }, + { + "epoch": 29.27, + "learning_rate": 7.308851894159285e-06, + "loss": 0.8897, + "step": 1043000 + }, + { + "epoch": 29.28, + "learning_rate": 7.168405086047077e-06, + "loss": 0.867, + "step": 1043500 + }, + { + "epoch": 29.3, + "learning_rate": 7.027958277934869e-06, + "loss": 0.8667, + "step": 1044000 + }, + { + "epoch": 29.31, + "learning_rate": 6.887511469822662e-06, + "loss": 0.8703, + "step": 1044500 + }, + { + "epoch": 29.33, + "learning_rate": 6.747064661710454e-06, + "loss": 0.8975, + "step": 1045000 + }, + { + "epoch": 29.34, + "learning_rate": 6.606617853598247e-06, + "loss": 0.8692, + "step": 1045500 + }, + { + "epoch": 29.35, + "learning_rate": 6.466171045486039e-06, + "loss": 0.8732, + "step": 1046000 + }, + { + "epoch": 29.37, + "learning_rate": 6.325724237373831e-06, + "loss": 0.8776, + "step": 1046500 + }, + { + "epoch": 29.38, + "learning_rate": 6.185277429261624e-06, + "loss": 0.8845, + "step": 1047000 + }, + { + "epoch": 29.4, + "learning_rate": 6.044830621149416e-06, + "loss": 0.8785, + "step": 1047500 + }, + { + "epoch": 29.41, + "learning_rate": 5.904383813037208e-06, + "loss": 0.8749, + "step": 1048000 + }, + { + "epoch": 29.42, + "learning_rate": 5.7639370049250015e-06, + "loss": 0.8644, + "step": 1048500 + }, + { + "epoch": 29.44, + "learning_rate": 5.623490196812793e-06, + "loss": 0.8678, + "step": 1049000 + }, + { + "epoch": 29.45, + "learning_rate": 5.483043388700586e-06, + "loss": 0.8888, + "step": 1049500 + }, + { + "epoch": 29.47, + "learning_rate": 5.342596580588378e-06, + "loss": 0.8788, + "step": 1050000 + }, + { + "epoch": 29.48, + "learning_rate": 5.20214977247617e-06, + "loss": 0.8708, + "step": 1050500 + }, + { + "epoch": 29.49, + "learning_rate": 5.061702964363963e-06, + "loss": 0.8732, + "step": 1051000 + }, + { + "epoch": 29.51, + "learning_rate": 4.921256156251755e-06, + "loss": 0.8776, + "step": 1051500 + }, + { + "epoch": 29.52, + "learning_rate": 4.7808093481395475e-06, + "loss": 0.8775, + "step": 1052000 + }, + { + "epoch": 29.54, + "learning_rate": 4.64036254002734e-06, + "loss": 0.8747, + "step": 1052500 + }, + { + "epoch": 29.55, + "learning_rate": 4.499915731915132e-06, + "loss": 0.8606, + "step": 1053000 + }, + { + "epoch": 29.56, + "learning_rate": 4.359468923802925e-06, + "loss": 0.8943, + "step": 1053500 + }, + { + "epoch": 29.58, + "learning_rate": 4.219022115690717e-06, + "loss": 0.8663, + "step": 1054000 + }, + { + "epoch": 29.59, + "learning_rate": 4.07857530757851e-06, + "loss": 0.882, + "step": 1054500 + }, + { + "epoch": 29.61, + "learning_rate": 3.9381284994663016e-06, + "loss": 0.8743, + "step": 1055000 + }, + { + "epoch": 29.62, + "learning_rate": 3.797681691354094e-06, + "loss": 0.8854, + "step": 1055500 + }, + { + "epoch": 29.63, + "learning_rate": 3.6572348832418867e-06, + "loss": 0.862, + "step": 1056000 + }, + { + "epoch": 29.65, + "learning_rate": 3.516788075129679e-06, + "loss": 0.8515, + "step": 1056500 + }, + { + "epoch": 29.66, + "learning_rate": 3.376341267017471e-06, + "loss": 0.8682, + "step": 1057000 + }, + { + "epoch": 29.68, + "learning_rate": 3.2358944589052637e-06, + "loss": 0.8791, + "step": 1057500 + }, + { + "epoch": 29.69, + "learning_rate": 3.095447650793056e-06, + "loss": 0.8518, + "step": 1058000 + }, + { + "epoch": 29.7, + "learning_rate": 2.9550008426808484e-06, + "loss": 0.8799, + "step": 1058500 + }, + { + "epoch": 29.72, + "learning_rate": 2.8145540345686408e-06, + "loss": 0.8911, + "step": 1059000 + }, + { + "epoch": 29.73, + "learning_rate": 2.674107226456433e-06, + "loss": 0.8737, + "step": 1059500 + }, + { + "epoch": 29.75, + "learning_rate": 2.5336604183442255e-06, + "loss": 0.8882, + "step": 1060000 + }, + { + "epoch": 29.76, + "learning_rate": 2.3932136102320183e-06, + "loss": 0.8904, + "step": 1060500 + }, + { + "epoch": 29.77, + "learning_rate": 2.25276680211981e-06, + "loss": 0.881, + "step": 1061000 + }, + { + "epoch": 29.79, + "learning_rate": 2.1123199940076025e-06, + "loss": 0.9008, + "step": 1061500 + }, + { + "epoch": 29.8, + "learning_rate": 1.9718731858953953e-06, + "loss": 0.8672, + "step": 1062000 + }, + { + "epoch": 29.82, + "learning_rate": 1.8314263777831874e-06, + "loss": 0.898, + "step": 1062500 + }, + { + "epoch": 29.83, + "learning_rate": 1.6909795696709798e-06, + "loss": 0.8588, + "step": 1063000 + }, + { + "epoch": 29.84, + "learning_rate": 1.5505327615587721e-06, + "loss": 0.8846, + "step": 1063500 + }, + { + "epoch": 29.86, + "learning_rate": 1.4100859534465647e-06, + "loss": 0.8712, + "step": 1064000 + }, + { + "epoch": 29.87, + "learning_rate": 1.2696391453343568e-06, + "loss": 0.872, + "step": 1064500 + }, + { + "epoch": 29.89, + "learning_rate": 1.1291923372221492e-06, + "loss": 0.87, + "step": 1065000 + }, + { + "epoch": 29.9, + "learning_rate": 9.887455291099415e-07, + "loss": 0.8602, + "step": 1065500 + }, + { + "epoch": 29.91, + "learning_rate": 8.48298720997734e-07, + "loss": 0.8788, + "step": 1066000 + }, + { + "epoch": 29.93, + "learning_rate": 7.078519128855263e-07, + "loss": 0.8837, + "step": 1066500 + }, + { + "epoch": 29.94, + "learning_rate": 5.674051047733188e-07, + "loss": 0.8723, + "step": 1067000 + }, + { + "epoch": 29.96, + "learning_rate": 4.269582966611112e-07, + "loss": 0.8793, + "step": 1067500 + }, + { + "epoch": 29.97, + "learning_rate": 2.8651148854890354e-07, + "loss": 0.8865, + "step": 1068000 + }, + { + "epoch": 29.98, + "learning_rate": 1.4606468043669594e-07, + "loss": 0.8533, + "step": 1068500 + }, + { + "epoch": 30.0, + "learning_rate": 5.617872324488305e-09, + "loss": 0.8603, + "step": 1069000 + } + ], + "logging_steps": 500, + "max_steps": 1069020, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "total_flos": 1.8687680352154907e+21, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}