{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.97789367628756, "eval_steps": 500, "global_step": 33000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.998518342914716e-05, "loss": 1.1436, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.9970366858294324e-05, "loss": 1.0946, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.9955550287441474e-05, "loss": 1.0475, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.994073371658863e-05, "loss": 1.2085, "step": 40 }, { "epoch": 0.0, "learning_rate": 4.9925917145735795e-05, "loss": 1.0201, "step": 50 }, { "epoch": 0.0, "learning_rate": 4.991110057488295e-05, "loss": 1.1401, "step": 60 }, { "epoch": 0.0, "learning_rate": 4.989628400403011e-05, "loss": 1.2025, "step": 70 }, { "epoch": 0.0, "learning_rate": 4.988146743317727e-05, "loss": 1.0663, "step": 80 }, { "epoch": 0.0, "learning_rate": 4.9866650862324424e-05, "loss": 1.0889, "step": 90 }, { "epoch": 0.0, "learning_rate": 4.985183429147158e-05, "loss": 1.1078, "step": 100 }, { "epoch": 0.0, "learning_rate": 4.9837017720618746e-05, "loss": 1.0007, "step": 110 }, { "epoch": 0.0, "learning_rate": 4.98222011497659e-05, "loss": 1.1888, "step": 120 }, { "epoch": 0.0, "learning_rate": 4.980738457891306e-05, "loss": 1.1755, "step": 130 }, { "epoch": 0.0, "learning_rate": 4.979256800806022e-05, "loss": 1.1285, "step": 140 }, { "epoch": 0.0, "learning_rate": 4.9777751437207374e-05, "loss": 1.0683, "step": 150 }, { "epoch": 0.0, "learning_rate": 4.976293486635453e-05, "loss": 1.0682, "step": 160 }, { "epoch": 0.01, "learning_rate": 4.974811829550169e-05, "loss": 1.0443, "step": 170 }, { "epoch": 0.01, "learning_rate": 4.973330172464885e-05, "loss": 1.0094, "step": 180 }, { "epoch": 0.01, "learning_rate": 4.971848515379601e-05, "loss": 1.0373, "step": 190 }, { "epoch": 0.01, "learning_rate": 4.970366858294316e-05, "loss": 1.0986, "step": 200 }, { "epoch": 0.01, "learning_rate": 4.9688852012090325e-05, "loss": 1.0402, "step": 210 }, { "epoch": 0.01, "learning_rate": 4.967403544123748e-05, "loss": 1.1408, "step": 220 }, { "epoch": 0.01, "learning_rate": 4.965921887038464e-05, "loss": 1.1502, "step": 230 }, { "epoch": 0.01, "learning_rate": 4.96444022995318e-05, "loss": 1.1159, "step": 240 }, { "epoch": 0.01, "learning_rate": 4.9629585728678954e-05, "loss": 1.0239, "step": 250 }, { "epoch": 0.01, "learning_rate": 4.961476915782611e-05, "loss": 1.1654, "step": 260 }, { "epoch": 0.01, "learning_rate": 4.9599952586973275e-05, "loss": 1.1863, "step": 270 }, { "epoch": 0.01, "learning_rate": 4.958513601612043e-05, "loss": 1.0276, "step": 280 }, { "epoch": 0.01, "learning_rate": 4.957031944526759e-05, "loss": 1.1198, "step": 290 }, { "epoch": 0.01, "learning_rate": 4.9555502874414746e-05, "loss": 1.0463, "step": 300 }, { "epoch": 0.01, "learning_rate": 4.9540686303561904e-05, "loss": 0.966, "step": 310 }, { "epoch": 0.01, "learning_rate": 4.952586973270906e-05, "loss": 0.9627, "step": 320 }, { "epoch": 0.01, "learning_rate": 4.9511053161856225e-05, "loss": 1.1312, "step": 330 }, { "epoch": 0.01, "learning_rate": 4.949623659100338e-05, "loss": 0.9223, "step": 340 }, { "epoch": 0.01, "learning_rate": 4.948142002015054e-05, "loss": 1.1389, "step": 350 }, { "epoch": 0.01, "learning_rate": 4.94666034492977e-05, "loss": 0.8973, "step": 360 }, { "epoch": 0.01, "learning_rate": 4.9451786878444854e-05, "loss": 1.0691, "step": 370 }, { "epoch": 0.01, "learning_rate": 4.943697030759201e-05, "loss": 1.099, "step": 380 }, { "epoch": 0.01, "learning_rate": 4.9422153736739175e-05, "loss": 1.0559, "step": 390 }, { "epoch": 0.01, "learning_rate": 4.940733716588633e-05, "loss": 0.9345, "step": 400 }, { "epoch": 0.01, "learning_rate": 4.939252059503349e-05, "loss": 1.1638, "step": 410 }, { "epoch": 0.01, "learning_rate": 4.937770402418064e-05, "loss": 1.0137, "step": 420 }, { "epoch": 0.01, "learning_rate": 4.9362887453327804e-05, "loss": 1.1298, "step": 430 }, { "epoch": 0.01, "learning_rate": 4.934807088247496e-05, "loss": 1.297, "step": 440 }, { "epoch": 0.01, "learning_rate": 4.933325431162212e-05, "loss": 1.1458, "step": 450 }, { "epoch": 0.01, "learning_rate": 4.931843774076928e-05, "loss": 1.2104, "step": 460 }, { "epoch": 0.01, "learning_rate": 4.930362116991643e-05, "loss": 1.0494, "step": 470 }, { "epoch": 0.01, "learning_rate": 4.928880459906359e-05, "loss": 1.0527, "step": 480 }, { "epoch": 0.01, "learning_rate": 4.9273988028210754e-05, "loss": 0.9439, "step": 490 }, { "epoch": 0.01, "learning_rate": 4.925917145735791e-05, "loss": 1.1118, "step": 500 }, { "epoch": 0.02, "learning_rate": 4.924435488650507e-05, "loss": 1.1182, "step": 510 }, { "epoch": 0.02, "learning_rate": 4.922953831565223e-05, "loss": 1.0818, "step": 520 }, { "epoch": 0.02, "learning_rate": 4.921472174479938e-05, "loss": 1.0487, "step": 530 }, { "epoch": 0.02, "learning_rate": 4.919990517394654e-05, "loss": 1.1353, "step": 540 }, { "epoch": 0.02, "learning_rate": 4.9185088603093704e-05, "loss": 0.9229, "step": 550 }, { "epoch": 0.02, "learning_rate": 4.917027203224086e-05, "loss": 0.9945, "step": 560 }, { "epoch": 0.02, "learning_rate": 4.915545546138802e-05, "loss": 1.0377, "step": 570 }, { "epoch": 0.02, "learning_rate": 4.9140638890535176e-05, "loss": 0.9988, "step": 580 }, { "epoch": 0.02, "learning_rate": 4.912582231968233e-05, "loss": 1.0964, "step": 590 }, { "epoch": 0.02, "learning_rate": 4.911100574882949e-05, "loss": 1.0607, "step": 600 }, { "epoch": 0.02, "learning_rate": 4.9096189177976655e-05, "loss": 1.2273, "step": 610 }, { "epoch": 0.02, "learning_rate": 4.908137260712381e-05, "loss": 1.2177, "step": 620 }, { "epoch": 0.02, "learning_rate": 4.906655603627097e-05, "loss": 0.9447, "step": 630 }, { "epoch": 0.02, "learning_rate": 4.9051739465418126e-05, "loss": 1.0235, "step": 640 }, { "epoch": 0.02, "learning_rate": 4.9036922894565283e-05, "loss": 0.8942, "step": 650 }, { "epoch": 0.02, "learning_rate": 4.902210632371244e-05, "loss": 1.1545, "step": 660 }, { "epoch": 0.02, "learning_rate": 4.9007289752859605e-05, "loss": 0.9827, "step": 670 }, { "epoch": 0.02, "learning_rate": 4.899247318200676e-05, "loss": 1.004, "step": 680 }, { "epoch": 0.02, "learning_rate": 4.897765661115392e-05, "loss": 1.1165, "step": 690 }, { "epoch": 0.02, "learning_rate": 4.896284004030107e-05, "loss": 0.9904, "step": 700 }, { "epoch": 0.02, "learning_rate": 4.8948023469448234e-05, "loss": 1.0665, "step": 710 }, { "epoch": 0.02, "learning_rate": 4.893320689859539e-05, "loss": 0.9563, "step": 720 }, { "epoch": 0.02, "learning_rate": 4.891839032774255e-05, "loss": 1.2268, "step": 730 }, { "epoch": 0.02, "learning_rate": 4.890357375688971e-05, "loss": 1.0352, "step": 740 }, { "epoch": 0.02, "learning_rate": 4.888875718603686e-05, "loss": 1.0372, "step": 750 }, { "epoch": 0.02, "learning_rate": 4.887394061518402e-05, "loss": 0.9848, "step": 760 }, { "epoch": 0.02, "learning_rate": 4.8859124044331184e-05, "loss": 1.0746, "step": 770 }, { "epoch": 0.02, "learning_rate": 4.884430747347834e-05, "loss": 1.1633, "step": 780 }, { "epoch": 0.02, "learning_rate": 4.88294909026255e-05, "loss": 1.1765, "step": 790 }, { "epoch": 0.02, "learning_rate": 4.8814674331772656e-05, "loss": 0.9531, "step": 800 }, { "epoch": 0.02, "learning_rate": 4.879985776091981e-05, "loss": 0.9954, "step": 810 }, { "epoch": 0.02, "learning_rate": 4.878504119006697e-05, "loss": 0.9633, "step": 820 }, { "epoch": 0.02, "learning_rate": 4.8770224619214134e-05, "loss": 1.2382, "step": 830 }, { "epoch": 0.02, "learning_rate": 4.875540804836129e-05, "loss": 1.005, "step": 840 }, { "epoch": 0.03, "learning_rate": 4.874059147750845e-05, "loss": 1.0923, "step": 850 }, { "epoch": 0.03, "learning_rate": 4.8725774906655606e-05, "loss": 0.9299, "step": 860 }, { "epoch": 0.03, "learning_rate": 4.871095833580276e-05, "loss": 1.0834, "step": 870 }, { "epoch": 0.03, "learning_rate": 4.869614176494992e-05, "loss": 0.9882, "step": 880 }, { "epoch": 0.03, "learning_rate": 4.8681325194097084e-05, "loss": 1.0145, "step": 890 }, { "epoch": 0.03, "learning_rate": 4.866650862324424e-05, "loss": 1.0234, "step": 900 }, { "epoch": 0.03, "learning_rate": 4.86516920523914e-05, "loss": 1.1596, "step": 910 }, { "epoch": 0.03, "learning_rate": 4.8636875481538556e-05, "loss": 1.1069, "step": 920 }, { "epoch": 0.03, "learning_rate": 4.862205891068571e-05, "loss": 1.012, "step": 930 }, { "epoch": 0.03, "learning_rate": 4.860724233983287e-05, "loss": 1.0147, "step": 940 }, { "epoch": 0.03, "learning_rate": 4.8592425768980034e-05, "loss": 1.0027, "step": 950 }, { "epoch": 0.03, "learning_rate": 4.857760919812719e-05, "loss": 1.0529, "step": 960 }, { "epoch": 0.03, "learning_rate": 4.856279262727434e-05, "loss": 1.0556, "step": 970 }, { "epoch": 0.03, "learning_rate": 4.85479760564215e-05, "loss": 0.8887, "step": 980 }, { "epoch": 0.03, "learning_rate": 4.853315948556866e-05, "loss": 1.1216, "step": 990 }, { "epoch": 0.03, "learning_rate": 4.851834291471582e-05, "loss": 1.0372, "step": 1000 }, { "epoch": 0.03, "learning_rate": 4.850352634386298e-05, "loss": 1.0452, "step": 1010 }, { "epoch": 0.03, "learning_rate": 4.8488709773010135e-05, "loss": 1.0786, "step": 1020 }, { "epoch": 0.03, "learning_rate": 4.847389320215729e-05, "loss": 1.0466, "step": 1030 }, { "epoch": 0.03, "learning_rate": 4.845907663130445e-05, "loss": 1.161, "step": 1040 }, { "epoch": 0.03, "learning_rate": 4.8444260060451613e-05, "loss": 1.0792, "step": 1050 }, { "epoch": 0.03, "learning_rate": 4.842944348959877e-05, "loss": 1.1396, "step": 1060 }, { "epoch": 0.03, "learning_rate": 4.841462691874593e-05, "loss": 1.0736, "step": 1070 }, { "epoch": 0.03, "learning_rate": 4.8399810347893085e-05, "loss": 1.104, "step": 1080 }, { "epoch": 0.03, "learning_rate": 4.838499377704024e-05, "loss": 0.948, "step": 1090 }, { "epoch": 0.03, "learning_rate": 4.83701772061874e-05, "loss": 0.965, "step": 1100 }, { "epoch": 0.03, "learning_rate": 4.8355360635334564e-05, "loss": 1.1474, "step": 1110 }, { "epoch": 0.03, "learning_rate": 4.834054406448172e-05, "loss": 1.0499, "step": 1120 }, { "epoch": 0.03, "learning_rate": 4.832572749362888e-05, "loss": 1.0691, "step": 1130 }, { "epoch": 0.03, "learning_rate": 4.8310910922776035e-05, "loss": 1.1725, "step": 1140 }, { "epoch": 0.03, "learning_rate": 4.829609435192319e-05, "loss": 1.0933, "step": 1150 }, { "epoch": 0.03, "learning_rate": 4.828127778107035e-05, "loss": 1.1244, "step": 1160 }, { "epoch": 0.03, "learning_rate": 4.8266461210217514e-05, "loss": 1.0848, "step": 1170 }, { "epoch": 0.03, "learning_rate": 4.825164463936467e-05, "loss": 1.2447, "step": 1180 }, { "epoch": 0.04, "learning_rate": 4.823682806851182e-05, "loss": 1.0453, "step": 1190 }, { "epoch": 0.04, "learning_rate": 4.8222011497658985e-05, "loss": 1.0077, "step": 1200 }, { "epoch": 0.04, "learning_rate": 4.820719492680614e-05, "loss": 1.1749, "step": 1210 }, { "epoch": 0.04, "learning_rate": 4.81923783559533e-05, "loss": 1.021, "step": 1220 }, { "epoch": 0.04, "learning_rate": 4.8177561785100464e-05, "loss": 1.1465, "step": 1230 }, { "epoch": 0.04, "learning_rate": 4.8162745214247614e-05, "loss": 1.0847, "step": 1240 }, { "epoch": 0.04, "learning_rate": 4.814792864339477e-05, "loss": 1.0417, "step": 1250 }, { "epoch": 0.04, "learning_rate": 4.813311207254193e-05, "loss": 0.9796, "step": 1260 }, { "epoch": 0.04, "learning_rate": 4.811829550168909e-05, "loss": 1.148, "step": 1270 }, { "epoch": 0.04, "learning_rate": 4.810347893083625e-05, "loss": 1.0537, "step": 1280 }, { "epoch": 0.04, "learning_rate": 4.808866235998341e-05, "loss": 1.0994, "step": 1290 }, { "epoch": 0.04, "learning_rate": 4.8073845789130565e-05, "loss": 0.9678, "step": 1300 }, { "epoch": 0.04, "learning_rate": 4.805902921827772e-05, "loss": 1.0571, "step": 1310 }, { "epoch": 0.04, "learning_rate": 4.804421264742488e-05, "loss": 1.0449, "step": 1320 }, { "epoch": 0.04, "learning_rate": 4.802939607657204e-05, "loss": 1.0071, "step": 1330 }, { "epoch": 0.04, "learning_rate": 4.80145795057192e-05, "loss": 0.8959, "step": 1340 }, { "epoch": 0.04, "learning_rate": 4.799976293486636e-05, "loss": 1.0767, "step": 1350 }, { "epoch": 0.04, "learning_rate": 4.7984946364013515e-05, "loss": 0.9891, "step": 1360 }, { "epoch": 0.04, "learning_rate": 4.797012979316067e-05, "loss": 1.0674, "step": 1370 }, { "epoch": 0.04, "learning_rate": 4.795531322230783e-05, "loss": 1.0322, "step": 1380 }, { "epoch": 0.04, "learning_rate": 4.794049665145499e-05, "loss": 1.0522, "step": 1390 }, { "epoch": 0.04, "learning_rate": 4.792568008060215e-05, "loss": 1.2132, "step": 1400 }, { "epoch": 0.04, "learning_rate": 4.79108635097493e-05, "loss": 1.1291, "step": 1410 }, { "epoch": 0.04, "learning_rate": 4.7896046938896465e-05, "loss": 1.0099, "step": 1420 }, { "epoch": 0.04, "learning_rate": 4.788123036804362e-05, "loss": 1.029, "step": 1430 }, { "epoch": 0.04, "learning_rate": 4.786641379719078e-05, "loss": 0.9722, "step": 1440 }, { "epoch": 0.04, "learning_rate": 4.7851597226337943e-05, "loss": 1.023, "step": 1450 }, { "epoch": 0.04, "learning_rate": 4.7836780655485094e-05, "loss": 1.0579, "step": 1460 }, { "epoch": 0.04, "learning_rate": 4.782196408463225e-05, "loss": 1.0452, "step": 1470 }, { "epoch": 0.04, "learning_rate": 4.7807147513779415e-05, "loss": 1.2105, "step": 1480 }, { "epoch": 0.04, "learning_rate": 4.779233094292657e-05, "loss": 1.0491, "step": 1490 }, { "epoch": 0.04, "learning_rate": 4.777751437207373e-05, "loss": 0.9678, "step": 1500 }, { "epoch": 0.04, "learning_rate": 4.7762697801220894e-05, "loss": 0.9197, "step": 1510 }, { "epoch": 0.05, "learning_rate": 4.7747881230368044e-05, "loss": 0.9753, "step": 1520 }, { "epoch": 0.05, "learning_rate": 4.77330646595152e-05, "loss": 1.0212, "step": 1530 }, { "epoch": 0.05, "learning_rate": 4.771824808866236e-05, "loss": 1.1102, "step": 1540 }, { "epoch": 0.05, "learning_rate": 4.770343151780952e-05, "loss": 1.0254, "step": 1550 }, { "epoch": 0.05, "learning_rate": 4.768861494695668e-05, "loss": 1.1006, "step": 1560 }, { "epoch": 0.05, "learning_rate": 4.767379837610384e-05, "loss": 0.9692, "step": 1570 }, { "epoch": 0.05, "learning_rate": 4.7658981805250994e-05, "loss": 1.076, "step": 1580 }, { "epoch": 0.05, "learning_rate": 4.764416523439815e-05, "loss": 0.986, "step": 1590 }, { "epoch": 0.05, "learning_rate": 4.762934866354531e-05, "loss": 0.9922, "step": 1600 }, { "epoch": 0.05, "learning_rate": 4.761453209269247e-05, "loss": 1.1154, "step": 1610 }, { "epoch": 0.05, "learning_rate": 4.759971552183963e-05, "loss": 1.1606, "step": 1620 }, { "epoch": 0.05, "learning_rate": 4.758489895098678e-05, "loss": 1.0098, "step": 1630 }, { "epoch": 0.05, "learning_rate": 4.7570082380133944e-05, "loss": 1.0313, "step": 1640 }, { "epoch": 0.05, "learning_rate": 4.75552658092811e-05, "loss": 1.0075, "step": 1650 }, { "epoch": 0.05, "learning_rate": 4.754044923842826e-05, "loss": 1.1056, "step": 1660 }, { "epoch": 0.05, "learning_rate": 4.752563266757542e-05, "loss": 1.0331, "step": 1670 }, { "epoch": 0.05, "learning_rate": 4.751081609672258e-05, "loss": 0.9307, "step": 1680 }, { "epoch": 0.05, "learning_rate": 4.749599952586973e-05, "loss": 0.9668, "step": 1690 }, { "epoch": 0.05, "learning_rate": 4.7481182955016895e-05, "loss": 0.9923, "step": 1700 }, { "epoch": 0.05, "learning_rate": 4.746636638416405e-05, "loss": 0.9852, "step": 1710 }, { "epoch": 0.05, "learning_rate": 4.745154981331121e-05, "loss": 0.959, "step": 1720 }, { "epoch": 0.05, "learning_rate": 4.743673324245837e-05, "loss": 1.0467, "step": 1730 }, { "epoch": 0.05, "learning_rate": 4.7421916671605523e-05, "loss": 1.0341, "step": 1740 }, { "epoch": 0.05, "learning_rate": 4.740710010075268e-05, "loss": 1.0286, "step": 1750 }, { "epoch": 0.05, "learning_rate": 4.7392283529899845e-05, "loss": 1.0069, "step": 1760 }, { "epoch": 0.05, "learning_rate": 4.7377466959047e-05, "loss": 0.9959, "step": 1770 }, { "epoch": 0.05, "learning_rate": 4.736265038819416e-05, "loss": 1.032, "step": 1780 }, { "epoch": 0.05, "learning_rate": 4.7347833817341316e-05, "loss": 1.0334, "step": 1790 }, { "epoch": 0.05, "learning_rate": 4.7333017246488474e-05, "loss": 0.9271, "step": 1800 }, { "epoch": 0.05, "learning_rate": 4.731820067563563e-05, "loss": 1.0898, "step": 1810 }, { "epoch": 0.05, "learning_rate": 4.730338410478279e-05, "loss": 0.8218, "step": 1820 }, { "epoch": 0.05, "learning_rate": 4.728856753392995e-05, "loss": 0.9664, "step": 1830 }, { "epoch": 0.05, "learning_rate": 4.727375096307711e-05, "loss": 1.1085, "step": 1840 }, { "epoch": 0.05, "learning_rate": 4.7258934392224267e-05, "loss": 1.0249, "step": 1850 }, { "epoch": 0.06, "learning_rate": 4.7244117821371424e-05, "loss": 0.9883, "step": 1860 }, { "epoch": 0.06, "learning_rate": 4.722930125051858e-05, "loss": 1.0239, "step": 1870 }, { "epoch": 0.06, "learning_rate": 4.721448467966574e-05, "loss": 0.9759, "step": 1880 }, { "epoch": 0.06, "learning_rate": 4.71996681088129e-05, "loss": 0.9996, "step": 1890 }, { "epoch": 0.06, "learning_rate": 4.718485153796006e-05, "loss": 0.8492, "step": 1900 }, { "epoch": 0.06, "learning_rate": 4.717003496710721e-05, "loss": 0.9642, "step": 1910 }, { "epoch": 0.06, "learning_rate": 4.7155218396254374e-05, "loss": 0.9916, "step": 1920 }, { "epoch": 0.06, "learning_rate": 4.714040182540153e-05, "loss": 0.902, "step": 1930 }, { "epoch": 0.06, "learning_rate": 4.712558525454869e-05, "loss": 1.1098, "step": 1940 }, { "epoch": 0.06, "learning_rate": 4.711076868369585e-05, "loss": 1.0721, "step": 1950 }, { "epoch": 0.06, "learning_rate": 4.7095952112843e-05, "loss": 1.0584, "step": 1960 }, { "epoch": 0.06, "learning_rate": 4.708113554199016e-05, "loss": 1.0544, "step": 1970 }, { "epoch": 0.06, "learning_rate": 4.7066318971137324e-05, "loss": 1.1046, "step": 1980 }, { "epoch": 0.06, "learning_rate": 4.705150240028448e-05, "loss": 0.93, "step": 1990 }, { "epoch": 0.06, "learning_rate": 4.703668582943164e-05, "loss": 1.1538, "step": 2000 }, { "epoch": 0.06, "learning_rate": 4.7021869258578796e-05, "loss": 1.1012, "step": 2010 }, { "epoch": 0.06, "learning_rate": 4.700705268772595e-05, "loss": 1.056, "step": 2020 }, { "epoch": 0.06, "learning_rate": 4.699223611687311e-05, "loss": 1.0762, "step": 2030 }, { "epoch": 0.06, "learning_rate": 4.6977419546020274e-05, "loss": 1.0884, "step": 2040 }, { "epoch": 0.06, "learning_rate": 4.696260297516743e-05, "loss": 1.1416, "step": 2050 }, { "epoch": 0.06, "learning_rate": 4.694778640431459e-05, "loss": 1.0391, "step": 2060 }, { "epoch": 0.06, "learning_rate": 4.6932969833461746e-05, "loss": 1.1021, "step": 2070 }, { "epoch": 0.06, "learning_rate": 4.69181532626089e-05, "loss": 1.1112, "step": 2080 }, { "epoch": 0.06, "learning_rate": 4.690333669175606e-05, "loss": 1.1211, "step": 2090 }, { "epoch": 0.06, "learning_rate": 4.688852012090322e-05, "loss": 0.9748, "step": 2100 }, { "epoch": 0.06, "learning_rate": 4.687370355005038e-05, "loss": 0.945, "step": 2110 }, { "epoch": 0.06, "learning_rate": 4.685888697919754e-05, "loss": 1.1073, "step": 2120 }, { "epoch": 0.06, "learning_rate": 4.684407040834469e-05, "loss": 1.0968, "step": 2130 }, { "epoch": 0.06, "learning_rate": 4.6829253837491853e-05, "loss": 1.0562, "step": 2140 }, { "epoch": 0.06, "learning_rate": 4.681443726663901e-05, "loss": 1.0573, "step": 2150 }, { "epoch": 0.06, "learning_rate": 4.679962069578617e-05, "loss": 1.0862, "step": 2160 }, { "epoch": 0.06, "learning_rate": 4.678480412493333e-05, "loss": 1.0493, "step": 2170 }, { "epoch": 0.06, "learning_rate": 4.676998755408048e-05, "loss": 1.019, "step": 2180 }, { "epoch": 0.06, "learning_rate": 4.675517098322764e-05, "loss": 0.8864, "step": 2190 }, { "epoch": 0.07, "learning_rate": 4.6740354412374804e-05, "loss": 0.9383, "step": 2200 }, { "epoch": 0.07, "learning_rate": 4.672553784152196e-05, "loss": 1.0499, "step": 2210 }, { "epoch": 0.07, "learning_rate": 4.671072127066912e-05, "loss": 0.9938, "step": 2220 }, { "epoch": 0.07, "learning_rate": 4.6695904699816275e-05, "loss": 1.0305, "step": 2230 }, { "epoch": 0.07, "learning_rate": 4.668108812896343e-05, "loss": 0.9835, "step": 2240 }, { "epoch": 0.07, "learning_rate": 4.666627155811059e-05, "loss": 0.8994, "step": 2250 }, { "epoch": 0.07, "learning_rate": 4.6651454987257754e-05, "loss": 1.0598, "step": 2260 }, { "epoch": 0.07, "learning_rate": 4.663663841640491e-05, "loss": 0.9745, "step": 2270 }, { "epoch": 0.07, "learning_rate": 4.662182184555207e-05, "loss": 0.9789, "step": 2280 }, { "epoch": 0.07, "learning_rate": 4.6607005274699225e-05, "loss": 1.0458, "step": 2290 }, { "epoch": 0.07, "learning_rate": 4.659218870384638e-05, "loss": 0.9545, "step": 2300 }, { "epoch": 0.07, "learning_rate": 4.657737213299354e-05, "loss": 0.9021, "step": 2310 }, { "epoch": 0.07, "learning_rate": 4.6562555562140704e-05, "loss": 0.971, "step": 2320 }, { "epoch": 0.07, "learning_rate": 4.654773899128786e-05, "loss": 1.0396, "step": 2330 }, { "epoch": 0.07, "learning_rate": 4.653292242043502e-05, "loss": 0.8743, "step": 2340 }, { "epoch": 0.07, "learning_rate": 4.6518105849582176e-05, "loss": 0.967, "step": 2350 }, { "epoch": 0.07, "learning_rate": 4.650328927872933e-05, "loss": 1.1139, "step": 2360 }, { "epoch": 0.07, "learning_rate": 4.648847270787649e-05, "loss": 0.9882, "step": 2370 }, { "epoch": 0.07, "learning_rate": 4.647365613702365e-05, "loss": 1.0006, "step": 2380 }, { "epoch": 0.07, "learning_rate": 4.645883956617081e-05, "loss": 0.9794, "step": 2390 }, { "epoch": 0.07, "learning_rate": 4.644402299531796e-05, "loss": 0.9471, "step": 2400 }, { "epoch": 0.07, "learning_rate": 4.642920642446512e-05, "loss": 0.8593, "step": 2410 }, { "epoch": 0.07, "learning_rate": 4.641438985361228e-05, "loss": 1.0272, "step": 2420 }, { "epoch": 0.07, "learning_rate": 4.639957328275944e-05, "loss": 0.9176, "step": 2430 }, { "epoch": 0.07, "learning_rate": 4.63847567119066e-05, "loss": 0.9669, "step": 2440 }, { "epoch": 0.07, "learning_rate": 4.636994014105376e-05, "loss": 1.014, "step": 2450 }, { "epoch": 0.07, "learning_rate": 4.635512357020091e-05, "loss": 0.7286, "step": 2460 }, { "epoch": 0.07, "learning_rate": 4.634030699934807e-05, "loss": 1.114, "step": 2470 }, { "epoch": 0.07, "learning_rate": 4.632549042849523e-05, "loss": 0.9132, "step": 2480 }, { "epoch": 0.07, "learning_rate": 4.631067385764239e-05, "loss": 1.0984, "step": 2490 }, { "epoch": 0.07, "learning_rate": 4.629585728678955e-05, "loss": 1.028, "step": 2500 }, { "epoch": 0.07, "learning_rate": 4.6281040715936705e-05, "loss": 1.1322, "step": 2510 }, { "epoch": 0.07, "learning_rate": 4.626622414508386e-05, "loss": 0.9703, "step": 2520 }, { "epoch": 0.07, "learning_rate": 4.625140757423102e-05, "loss": 1.0486, "step": 2530 }, { "epoch": 0.08, "learning_rate": 4.623659100337818e-05, "loss": 1.1308, "step": 2540 }, { "epoch": 0.08, "learning_rate": 4.622177443252534e-05, "loss": 0.9167, "step": 2550 }, { "epoch": 0.08, "learning_rate": 4.62069578616725e-05, "loss": 0.9322, "step": 2560 }, { "epoch": 0.08, "learning_rate": 4.6192141290819655e-05, "loss": 1.1608, "step": 2570 }, { "epoch": 0.08, "learning_rate": 4.617732471996681e-05, "loss": 1.0079, "step": 2580 }, { "epoch": 0.08, "learning_rate": 4.616250814911397e-05, "loss": 0.9097, "step": 2590 }, { "epoch": 0.08, "learning_rate": 4.6147691578261134e-05, "loss": 1.0079, "step": 2600 }, { "epoch": 0.08, "learning_rate": 4.613287500740829e-05, "loss": 1.0918, "step": 2610 }, { "epoch": 0.08, "learning_rate": 4.611805843655545e-05, "loss": 1.0862, "step": 2620 }, { "epoch": 0.08, "learning_rate": 4.6103241865702605e-05, "loss": 1.0602, "step": 2630 }, { "epoch": 0.08, "learning_rate": 4.608842529484976e-05, "loss": 1.1089, "step": 2640 }, { "epoch": 0.08, "learning_rate": 4.607360872399692e-05, "loss": 0.9527, "step": 2650 }, { "epoch": 0.08, "learning_rate": 4.605879215314408e-05, "loss": 1.0643, "step": 2660 }, { "epoch": 0.08, "learning_rate": 4.604397558229124e-05, "loss": 0.9497, "step": 2670 }, { "epoch": 0.08, "learning_rate": 4.602915901143839e-05, "loss": 1.048, "step": 2680 }, { "epoch": 0.08, "learning_rate": 4.601434244058555e-05, "loss": 1.1039, "step": 2690 }, { "epoch": 0.08, "learning_rate": 4.599952586973271e-05, "loss": 0.9535, "step": 2700 }, { "epoch": 0.08, "learning_rate": 4.598470929887987e-05, "loss": 1.1689, "step": 2710 }, { "epoch": 0.08, "learning_rate": 4.596989272802703e-05, "loss": 0.9185, "step": 2720 }, { "epoch": 0.08, "learning_rate": 4.5955076157174184e-05, "loss": 0.9434, "step": 2730 }, { "epoch": 0.08, "learning_rate": 4.594025958632134e-05, "loss": 1.085, "step": 2740 }, { "epoch": 0.08, "learning_rate": 4.59254430154685e-05, "loss": 0.9472, "step": 2750 }, { "epoch": 0.08, "learning_rate": 4.591062644461566e-05, "loss": 1.1535, "step": 2760 }, { "epoch": 0.08, "learning_rate": 4.589580987376282e-05, "loss": 0.9407, "step": 2770 }, { "epoch": 0.08, "learning_rate": 4.588099330290998e-05, "loss": 1.0422, "step": 2780 }, { "epoch": 0.08, "learning_rate": 4.5866176732057135e-05, "loss": 0.9648, "step": 2790 }, { "epoch": 0.08, "learning_rate": 4.585136016120429e-05, "loss": 1.0928, "step": 2800 }, { "epoch": 0.08, "learning_rate": 4.583654359035145e-05, "loss": 0.9872, "step": 2810 }, { "epoch": 0.08, "learning_rate": 4.582172701949861e-05, "loss": 0.9209, "step": 2820 }, { "epoch": 0.08, "learning_rate": 4.580691044864577e-05, "loss": 1.114, "step": 2830 }, { "epoch": 0.08, "learning_rate": 4.579209387779293e-05, "loss": 1.1243, "step": 2840 }, { "epoch": 0.08, "learning_rate": 4.5777277306940085e-05, "loss": 0.9924, "step": 2850 }, { "epoch": 0.08, "learning_rate": 4.576246073608724e-05, "loss": 0.9567, "step": 2860 }, { "epoch": 0.09, "learning_rate": 4.57476441652344e-05, "loss": 0.8971, "step": 2870 }, { "epoch": 0.09, "learning_rate": 4.573282759438156e-05, "loss": 0.9983, "step": 2880 }, { "epoch": 0.09, "learning_rate": 4.571801102352872e-05, "loss": 1.0459, "step": 2890 }, { "epoch": 0.09, "learning_rate": 4.570319445267587e-05, "loss": 1.0651, "step": 2900 }, { "epoch": 0.09, "learning_rate": 4.5688377881823035e-05, "loss": 1.1088, "step": 2910 }, { "epoch": 0.09, "learning_rate": 4.567356131097019e-05, "loss": 1.1004, "step": 2920 }, { "epoch": 0.09, "learning_rate": 4.565874474011735e-05, "loss": 1.0982, "step": 2930 }, { "epoch": 0.09, "learning_rate": 4.5643928169264507e-05, "loss": 1.1044, "step": 2940 }, { "epoch": 0.09, "learning_rate": 4.5629111598411664e-05, "loss": 0.9959, "step": 2950 }, { "epoch": 0.09, "learning_rate": 4.561429502755882e-05, "loss": 0.9248, "step": 2960 }, { "epoch": 0.09, "learning_rate": 4.559947845670598e-05, "loss": 1.0083, "step": 2970 }, { "epoch": 0.09, "learning_rate": 4.558466188585314e-05, "loss": 1.0696, "step": 2980 }, { "epoch": 0.09, "learning_rate": 4.55698453150003e-05, "loss": 1.0809, "step": 2990 }, { "epoch": 0.09, "learning_rate": 4.555502874414746e-05, "loss": 0.9692, "step": 3000 }, { "epoch": 0.09, "learning_rate": 4.5540212173294614e-05, "loss": 0.8689, "step": 3010 }, { "epoch": 0.09, "learning_rate": 4.552539560244177e-05, "loss": 1.0184, "step": 3020 }, { "epoch": 0.09, "learning_rate": 4.551057903158893e-05, "loss": 0.9662, "step": 3030 }, { "epoch": 0.09, "learning_rate": 4.549576246073609e-05, "loss": 0.9472, "step": 3040 }, { "epoch": 0.09, "learning_rate": 4.548094588988325e-05, "loss": 1.0923, "step": 3050 }, { "epoch": 0.09, "learning_rate": 4.546612931903041e-05, "loss": 0.9392, "step": 3060 }, { "epoch": 0.09, "learning_rate": 4.5451312748177564e-05, "loss": 1.0266, "step": 3070 }, { "epoch": 0.09, "learning_rate": 4.543649617732472e-05, "loss": 1.0079, "step": 3080 }, { "epoch": 0.09, "learning_rate": 4.542167960647188e-05, "loss": 1.0063, "step": 3090 }, { "epoch": 0.09, "learning_rate": 4.540686303561904e-05, "loss": 0.9931, "step": 3100 }, { "epoch": 0.09, "learning_rate": 4.53920464647662e-05, "loss": 0.8938, "step": 3110 }, { "epoch": 0.09, "learning_rate": 4.537722989391335e-05, "loss": 1.0389, "step": 3120 }, { "epoch": 0.09, "learning_rate": 4.5362413323060514e-05, "loss": 1.0111, "step": 3130 }, { "epoch": 0.09, "learning_rate": 4.534759675220767e-05, "loss": 0.9162, "step": 3140 }, { "epoch": 0.09, "learning_rate": 4.533278018135483e-05, "loss": 1.0123, "step": 3150 }, { "epoch": 0.09, "learning_rate": 4.531796361050199e-05, "loss": 1.007, "step": 3160 }, { "epoch": 0.09, "learning_rate": 4.530314703964914e-05, "loss": 1.0215, "step": 3170 }, { "epoch": 0.09, "learning_rate": 4.52883304687963e-05, "loss": 0.9833, "step": 3180 }, { "epoch": 0.09, "learning_rate": 4.5273513897943464e-05, "loss": 1.0657, "step": 3190 }, { "epoch": 0.09, "learning_rate": 4.525869732709062e-05, "loss": 1.0104, "step": 3200 }, { "epoch": 0.1, "learning_rate": 4.524388075623778e-05, "loss": 1.136, "step": 3210 }, { "epoch": 0.1, "learning_rate": 4.5229064185384936e-05, "loss": 1.0972, "step": 3220 }, { "epoch": 0.1, "learning_rate": 4.5214247614532093e-05, "loss": 0.9414, "step": 3230 }, { "epoch": 0.1, "learning_rate": 4.519943104367925e-05, "loss": 1.0983, "step": 3240 }, { "epoch": 0.1, "learning_rate": 4.518461447282641e-05, "loss": 0.9094, "step": 3250 }, { "epoch": 0.1, "learning_rate": 4.516979790197357e-05, "loss": 0.9771, "step": 3260 }, { "epoch": 0.1, "learning_rate": 4.515498133112073e-05, "loss": 1.0007, "step": 3270 }, { "epoch": 0.1, "learning_rate": 4.5140164760267886e-05, "loss": 1.0244, "step": 3280 }, { "epoch": 0.1, "learning_rate": 4.5125348189415044e-05, "loss": 1.0073, "step": 3290 }, { "epoch": 0.1, "learning_rate": 4.51105316185622e-05, "loss": 0.9975, "step": 3300 }, { "epoch": 0.1, "learning_rate": 4.509571504770936e-05, "loss": 1.1689, "step": 3310 }, { "epoch": 0.1, "learning_rate": 4.508089847685652e-05, "loss": 0.8902, "step": 3320 }, { "epoch": 0.1, "learning_rate": 4.506608190600368e-05, "loss": 0.9794, "step": 3330 }, { "epoch": 0.1, "learning_rate": 4.505126533515083e-05, "loss": 0.8807, "step": 3340 }, { "epoch": 0.1, "learning_rate": 4.5036448764297994e-05, "loss": 0.9263, "step": 3350 }, { "epoch": 0.1, "learning_rate": 4.502163219344515e-05, "loss": 0.8875, "step": 3360 }, { "epoch": 0.1, "learning_rate": 4.500681562259231e-05, "loss": 0.9376, "step": 3370 }, { "epoch": 0.1, "learning_rate": 4.499199905173947e-05, "loss": 1.0126, "step": 3380 }, { "epoch": 0.1, "learning_rate": 4.497718248088662e-05, "loss": 1.0124, "step": 3390 }, { "epoch": 0.1, "learning_rate": 4.496236591003378e-05, "loss": 0.7539, "step": 3400 }, { "epoch": 0.1, "learning_rate": 4.4947549339180944e-05, "loss": 0.8332, "step": 3410 }, { "epoch": 0.1, "learning_rate": 4.49327327683281e-05, "loss": 1.027, "step": 3420 }, { "epoch": 0.1, "learning_rate": 4.491791619747526e-05, "loss": 0.8988, "step": 3430 }, { "epoch": 0.1, "learning_rate": 4.490309962662242e-05, "loss": 1.1417, "step": 3440 }, { "epoch": 0.1, "learning_rate": 4.488828305576957e-05, "loss": 1.0029, "step": 3450 }, { "epoch": 0.1, "learning_rate": 4.487346648491673e-05, "loss": 1.0582, "step": 3460 }, { "epoch": 0.1, "learning_rate": 4.4858649914063894e-05, "loss": 1.0007, "step": 3470 }, { "epoch": 0.1, "learning_rate": 4.484383334321105e-05, "loss": 0.8922, "step": 3480 }, { "epoch": 0.1, "learning_rate": 4.482901677235821e-05, "loss": 1.0126, "step": 3490 }, { "epoch": 0.1, "learning_rate": 4.4814200201505366e-05, "loss": 0.8675, "step": 3500 }, { "epoch": 0.1, "learning_rate": 4.479938363065252e-05, "loss": 0.9286, "step": 3510 }, { "epoch": 0.1, "learning_rate": 4.478456705979968e-05, "loss": 1.0148, "step": 3520 }, { "epoch": 0.1, "learning_rate": 4.476975048894684e-05, "loss": 1.0747, "step": 3530 }, { "epoch": 0.1, "learning_rate": 4.4754933918094e-05, "loss": 1.0847, "step": 3540 }, { "epoch": 0.11, "learning_rate": 4.474011734724116e-05, "loss": 1.0646, "step": 3550 }, { "epoch": 0.11, "learning_rate": 4.472530077638831e-05, "loss": 1.0878, "step": 3560 }, { "epoch": 0.11, "learning_rate": 4.471048420553547e-05, "loss": 0.9359, "step": 3570 }, { "epoch": 0.11, "learning_rate": 4.469566763468263e-05, "loss": 1.1106, "step": 3580 }, { "epoch": 0.11, "learning_rate": 4.468085106382979e-05, "loss": 0.949, "step": 3590 }, { "epoch": 0.11, "learning_rate": 4.466603449297695e-05, "loss": 0.9367, "step": 3600 }, { "epoch": 0.11, "learning_rate": 4.465121792212411e-05, "loss": 0.9948, "step": 3610 }, { "epoch": 0.11, "learning_rate": 4.463640135127126e-05, "loss": 1.0947, "step": 3620 }, { "epoch": 0.11, "learning_rate": 4.462158478041842e-05, "loss": 0.9545, "step": 3630 }, { "epoch": 0.11, "learning_rate": 4.460676820956558e-05, "loss": 1.0944, "step": 3640 }, { "epoch": 0.11, "learning_rate": 4.459195163871274e-05, "loss": 1.0387, "step": 3650 }, { "epoch": 0.11, "learning_rate": 4.45771350678599e-05, "loss": 0.9061, "step": 3660 }, { "epoch": 0.11, "learning_rate": 4.456231849700705e-05, "loss": 1.0306, "step": 3670 }, { "epoch": 0.11, "learning_rate": 4.454750192615421e-05, "loss": 1.0257, "step": 3680 }, { "epoch": 0.11, "learning_rate": 4.4532685355301374e-05, "loss": 0.9344, "step": 3690 }, { "epoch": 0.11, "learning_rate": 4.451786878444853e-05, "loss": 0.9438, "step": 3700 }, { "epoch": 0.11, "learning_rate": 4.450305221359569e-05, "loss": 1.0341, "step": 3710 }, { "epoch": 0.11, "learning_rate": 4.4488235642742845e-05, "loss": 1.0352, "step": 3720 }, { "epoch": 0.11, "learning_rate": 4.447341907189e-05, "loss": 1.1207, "step": 3730 }, { "epoch": 0.11, "learning_rate": 4.445860250103716e-05, "loss": 1.0834, "step": 3740 }, { "epoch": 0.11, "learning_rate": 4.4443785930184324e-05, "loss": 0.8907, "step": 3750 }, { "epoch": 0.11, "learning_rate": 4.442896935933148e-05, "loss": 1.1111, "step": 3760 }, { "epoch": 0.11, "learning_rate": 4.441415278847864e-05, "loss": 0.9999, "step": 3770 }, { "epoch": 0.11, "learning_rate": 4.4399336217625795e-05, "loss": 1.0504, "step": 3780 }, { "epoch": 0.11, "learning_rate": 4.438451964677295e-05, "loss": 0.9121, "step": 3790 }, { "epoch": 0.11, "learning_rate": 4.436970307592011e-05, "loss": 0.9015, "step": 3800 }, { "epoch": 0.11, "learning_rate": 4.435488650506727e-05, "loss": 1.0593, "step": 3810 }, { "epoch": 0.11, "learning_rate": 4.434006993421443e-05, "loss": 1.0612, "step": 3820 }, { "epoch": 0.11, "learning_rate": 4.432525336336159e-05, "loss": 0.9286, "step": 3830 }, { "epoch": 0.11, "learning_rate": 4.431043679250874e-05, "loss": 1.0359, "step": 3840 }, { "epoch": 0.11, "learning_rate": 4.42956202216559e-05, "loss": 1.0434, "step": 3850 }, { "epoch": 0.11, "learning_rate": 4.428080365080306e-05, "loss": 0.9933, "step": 3860 }, { "epoch": 0.11, "learning_rate": 4.426598707995022e-05, "loss": 0.9287, "step": 3870 }, { "epoch": 0.11, "learning_rate": 4.425117050909738e-05, "loss": 1.0855, "step": 3880 }, { "epoch": 0.12, "learning_rate": 4.423635393824453e-05, "loss": 1.0433, "step": 3890 }, { "epoch": 0.12, "learning_rate": 4.422153736739169e-05, "loss": 1.072, "step": 3900 }, { "epoch": 0.12, "learning_rate": 4.420672079653885e-05, "loss": 0.9849, "step": 3910 }, { "epoch": 0.12, "learning_rate": 4.419190422568601e-05, "loss": 0.999, "step": 3920 }, { "epoch": 0.12, "learning_rate": 4.417708765483317e-05, "loss": 0.8054, "step": 3930 }, { "epoch": 0.12, "learning_rate": 4.4162271083980325e-05, "loss": 0.9708, "step": 3940 }, { "epoch": 0.12, "learning_rate": 4.414745451312748e-05, "loss": 1.0625, "step": 3950 }, { "epoch": 0.12, "learning_rate": 4.413263794227464e-05, "loss": 0.9717, "step": 3960 }, { "epoch": 0.12, "learning_rate": 4.41178213714218e-05, "loss": 0.9269, "step": 3970 }, { "epoch": 0.12, "learning_rate": 4.410300480056896e-05, "loss": 1.0109, "step": 3980 }, { "epoch": 0.12, "learning_rate": 4.408818822971612e-05, "loss": 1.1448, "step": 3990 }, { "epoch": 0.12, "learning_rate": 4.4073371658863275e-05, "loss": 1.0363, "step": 4000 }, { "epoch": 0.12, "learning_rate": 4.405855508801043e-05, "loss": 1.0063, "step": 4010 }, { "epoch": 0.12, "learning_rate": 4.404373851715759e-05, "loss": 0.8822, "step": 4020 }, { "epoch": 0.12, "learning_rate": 4.402892194630475e-05, "loss": 1.0641, "step": 4030 }, { "epoch": 0.12, "learning_rate": 4.401410537545191e-05, "loss": 1.0352, "step": 4040 }, { "epoch": 0.12, "learning_rate": 4.399928880459907e-05, "loss": 1.073, "step": 4050 }, { "epoch": 0.12, "learning_rate": 4.398447223374622e-05, "loss": 1.0877, "step": 4060 }, { "epoch": 0.12, "learning_rate": 4.396965566289338e-05, "loss": 0.9055, "step": 4070 }, { "epoch": 0.12, "learning_rate": 4.395483909204054e-05, "loss": 1.0689, "step": 4080 }, { "epoch": 0.12, "learning_rate": 4.39400225211877e-05, "loss": 1.0445, "step": 4090 }, { "epoch": 0.12, "learning_rate": 4.392520595033486e-05, "loss": 0.9072, "step": 4100 }, { "epoch": 0.12, "learning_rate": 4.391038937948201e-05, "loss": 0.9371, "step": 4110 }, { "epoch": 0.12, "learning_rate": 4.389557280862917e-05, "loss": 0.8655, "step": 4120 }, { "epoch": 0.12, "learning_rate": 4.388075623777633e-05, "loss": 1.1038, "step": 4130 }, { "epoch": 0.12, "learning_rate": 4.386593966692349e-05, "loss": 1.0431, "step": 4140 }, { "epoch": 0.12, "learning_rate": 4.385112309607065e-05, "loss": 0.9737, "step": 4150 }, { "epoch": 0.12, "learning_rate": 4.3836306525217804e-05, "loss": 1.0146, "step": 4160 }, { "epoch": 0.12, "learning_rate": 4.382148995436496e-05, "loss": 0.9463, "step": 4170 }, { "epoch": 0.12, "learning_rate": 4.380667338351212e-05, "loss": 1.048, "step": 4180 }, { "epoch": 0.12, "learning_rate": 4.379185681265928e-05, "loss": 0.9281, "step": 4190 }, { "epoch": 0.12, "learning_rate": 4.377704024180644e-05, "loss": 1.0834, "step": 4200 }, { "epoch": 0.12, "learning_rate": 4.37622236709536e-05, "loss": 1.007, "step": 4210 }, { "epoch": 0.13, "learning_rate": 4.3747407100100754e-05, "loss": 0.9686, "step": 4220 }, { "epoch": 0.13, "learning_rate": 4.373259052924791e-05, "loss": 1.0291, "step": 4230 }, { "epoch": 0.13, "learning_rate": 4.371777395839507e-05, "loss": 1.0465, "step": 4240 }, { "epoch": 0.13, "learning_rate": 4.370295738754223e-05, "loss": 0.953, "step": 4250 }, { "epoch": 0.13, "learning_rate": 4.368814081668939e-05, "loss": 1.0508, "step": 4260 }, { "epoch": 0.13, "learning_rate": 4.367332424583655e-05, "loss": 1.0006, "step": 4270 }, { "epoch": 0.13, "learning_rate": 4.3658507674983704e-05, "loss": 1.1245, "step": 4280 }, { "epoch": 0.13, "learning_rate": 4.364369110413086e-05, "loss": 0.9801, "step": 4290 }, { "epoch": 0.13, "learning_rate": 4.362887453327802e-05, "loss": 0.9444, "step": 4300 }, { "epoch": 0.13, "learning_rate": 4.361405796242518e-05, "loss": 1.0968, "step": 4310 }, { "epoch": 0.13, "learning_rate": 4.359924139157234e-05, "loss": 0.9368, "step": 4320 }, { "epoch": 0.13, "learning_rate": 4.358442482071949e-05, "loss": 0.9025, "step": 4330 }, { "epoch": 0.13, "learning_rate": 4.356960824986665e-05, "loss": 0.8605, "step": 4340 }, { "epoch": 0.13, "learning_rate": 4.355479167901381e-05, "loss": 0.9366, "step": 4350 }, { "epoch": 0.13, "learning_rate": 4.353997510816097e-05, "loss": 0.8986, "step": 4360 }, { "epoch": 0.13, "learning_rate": 4.3525158537308126e-05, "loss": 0.9287, "step": 4370 }, { "epoch": 0.13, "learning_rate": 4.3510341966455284e-05, "loss": 1.0341, "step": 4380 }, { "epoch": 0.13, "learning_rate": 4.349552539560244e-05, "loss": 1.0212, "step": 4390 }, { "epoch": 0.13, "learning_rate": 4.34807088247496e-05, "loss": 0.8867, "step": 4400 }, { "epoch": 0.13, "learning_rate": 4.346589225389676e-05, "loss": 0.9691, "step": 4410 }, { "epoch": 0.13, "learning_rate": 4.345107568304392e-05, "loss": 0.9443, "step": 4420 }, { "epoch": 0.13, "learning_rate": 4.3436259112191076e-05, "loss": 1.0488, "step": 4430 }, { "epoch": 0.13, "learning_rate": 4.3421442541338234e-05, "loss": 1.1335, "step": 4440 }, { "epoch": 0.13, "learning_rate": 4.340662597048539e-05, "loss": 0.9009, "step": 4450 }, { "epoch": 0.13, "learning_rate": 4.339180939963255e-05, "loss": 1.0512, "step": 4460 }, { "epoch": 0.13, "learning_rate": 4.337699282877971e-05, "loss": 0.89, "step": 4470 }, { "epoch": 0.13, "learning_rate": 4.336217625792687e-05, "loss": 0.9175, "step": 4480 }, { "epoch": 0.13, "learning_rate": 4.334735968707403e-05, "loss": 1.0487, "step": 4490 }, { "epoch": 0.13, "learning_rate": 4.3332543116221184e-05, "loss": 0.9249, "step": 4500 }, { "epoch": 0.13, "learning_rate": 4.331772654536834e-05, "loss": 0.9176, "step": 4510 }, { "epoch": 0.13, "learning_rate": 4.33029099745155e-05, "loss": 1.0861, "step": 4520 }, { "epoch": 0.13, "learning_rate": 4.328809340366266e-05, "loss": 1.0183, "step": 4530 }, { "epoch": 0.13, "learning_rate": 4.327327683280982e-05, "loss": 0.9382, "step": 4540 }, { "epoch": 0.13, "learning_rate": 4.325846026195697e-05, "loss": 1.011, "step": 4550 }, { "epoch": 0.14, "learning_rate": 4.3243643691104134e-05, "loss": 1.1504, "step": 4560 }, { "epoch": 0.14, "learning_rate": 4.322882712025129e-05, "loss": 0.9311, "step": 4570 }, { "epoch": 0.14, "learning_rate": 4.321401054939845e-05, "loss": 1.0303, "step": 4580 }, { "epoch": 0.14, "learning_rate": 4.319919397854561e-05, "loss": 0.9909, "step": 4590 }, { "epoch": 0.14, "learning_rate": 4.318437740769277e-05, "loss": 0.9847, "step": 4600 }, { "epoch": 0.14, "learning_rate": 4.316956083683992e-05, "loss": 0.8849, "step": 4610 }, { "epoch": 0.14, "learning_rate": 4.315474426598708e-05, "loss": 1.0116, "step": 4620 }, { "epoch": 0.14, "learning_rate": 4.313992769513424e-05, "loss": 1.1569, "step": 4630 }, { "epoch": 0.14, "learning_rate": 4.31251111242814e-05, "loss": 0.8751, "step": 4640 }, { "epoch": 0.14, "learning_rate": 4.3110294553428556e-05, "loss": 0.9307, "step": 4650 }, { "epoch": 0.14, "learning_rate": 4.309547798257571e-05, "loss": 1.0062, "step": 4660 }, { "epoch": 0.14, "learning_rate": 4.308066141172287e-05, "loss": 0.9799, "step": 4670 }, { "epoch": 0.14, "learning_rate": 4.306584484087003e-05, "loss": 0.9787, "step": 4680 }, { "epoch": 0.14, "learning_rate": 4.305102827001719e-05, "loss": 0.9239, "step": 4690 }, { "epoch": 0.14, "learning_rate": 4.303621169916435e-05, "loss": 0.9535, "step": 4700 }, { "epoch": 0.14, "learning_rate": 4.3021395128311506e-05, "loss": 1.0515, "step": 4710 }, { "epoch": 0.14, "learning_rate": 4.300657855745866e-05, "loss": 1.037, "step": 4720 }, { "epoch": 0.14, "learning_rate": 4.299176198660582e-05, "loss": 1.1239, "step": 4730 }, { "epoch": 0.14, "learning_rate": 4.297694541575298e-05, "loss": 1.0813, "step": 4740 }, { "epoch": 0.14, "learning_rate": 4.296212884490014e-05, "loss": 1.2004, "step": 4750 }, { "epoch": 0.14, "learning_rate": 4.29473122740473e-05, "loss": 0.9914, "step": 4760 }, { "epoch": 0.14, "learning_rate": 4.2932495703194456e-05, "loss": 0.9044, "step": 4770 }, { "epoch": 0.14, "learning_rate": 4.2917679132341613e-05, "loss": 1.0464, "step": 4780 }, { "epoch": 0.14, "learning_rate": 4.290286256148877e-05, "loss": 0.9826, "step": 4790 }, { "epoch": 0.14, "learning_rate": 4.288804599063593e-05, "loss": 0.9495, "step": 4800 }, { "epoch": 0.14, "learning_rate": 4.287322941978309e-05, "loss": 1.0386, "step": 4810 }, { "epoch": 0.14, "learning_rate": 4.285841284893025e-05, "loss": 0.9099, "step": 4820 }, { "epoch": 0.14, "learning_rate": 4.28435962780774e-05, "loss": 0.9316, "step": 4830 }, { "epoch": 0.14, "learning_rate": 4.2828779707224564e-05, "loss": 1.0725, "step": 4840 }, { "epoch": 0.14, "learning_rate": 4.281396313637172e-05, "loss": 0.8463, "step": 4850 }, { "epoch": 0.14, "learning_rate": 4.279914656551888e-05, "loss": 1.1508, "step": 4860 }, { "epoch": 0.14, "learning_rate": 4.278432999466604e-05, "loss": 0.9393, "step": 4870 }, { "epoch": 0.14, "learning_rate": 4.276951342381319e-05, "loss": 1.0294, "step": 4880 }, { "epoch": 0.14, "learning_rate": 4.275469685296035e-05, "loss": 1.1232, "step": 4890 }, { "epoch": 0.15, "learning_rate": 4.273988028210751e-05, "loss": 0.9461, "step": 4900 }, { "epoch": 0.15, "learning_rate": 4.272506371125467e-05, "loss": 0.8287, "step": 4910 }, { "epoch": 0.15, "learning_rate": 4.271024714040183e-05, "loss": 1.0617, "step": 4920 }, { "epoch": 0.15, "learning_rate": 4.2695430569548986e-05, "loss": 0.9236, "step": 4930 }, { "epoch": 0.15, "learning_rate": 4.268061399869614e-05, "loss": 0.9911, "step": 4940 }, { "epoch": 0.15, "learning_rate": 4.26657974278433e-05, "loss": 1.1111, "step": 4950 }, { "epoch": 0.15, "learning_rate": 4.265098085699046e-05, "loss": 0.9179, "step": 4960 }, { "epoch": 0.15, "learning_rate": 4.263616428613762e-05, "loss": 0.8431, "step": 4970 }, { "epoch": 0.15, "learning_rate": 4.262134771528478e-05, "loss": 0.9731, "step": 4980 }, { "epoch": 0.15, "learning_rate": 4.2606531144431936e-05, "loss": 0.9344, "step": 4990 }, { "epoch": 0.15, "learning_rate": 4.259171457357909e-05, "loss": 1.0364, "step": 5000 }, { "epoch": 0.15, "learning_rate": 4.257689800272625e-05, "loss": 0.9216, "step": 5010 }, { "epoch": 0.15, "learning_rate": 4.256208143187341e-05, "loss": 0.9568, "step": 5020 }, { "epoch": 0.15, "learning_rate": 4.254726486102057e-05, "loss": 0.9962, "step": 5030 }, { "epoch": 0.15, "learning_rate": 4.253244829016773e-05, "loss": 1.1549, "step": 5040 }, { "epoch": 0.15, "learning_rate": 4.251763171931488e-05, "loss": 0.9061, "step": 5050 }, { "epoch": 0.15, "learning_rate": 4.250281514846204e-05, "loss": 0.9633, "step": 5060 }, { "epoch": 0.15, "learning_rate": 4.24879985776092e-05, "loss": 0.9426, "step": 5070 }, { "epoch": 0.15, "learning_rate": 4.247318200675636e-05, "loss": 1.0316, "step": 5080 }, { "epoch": 0.15, "learning_rate": 4.245836543590352e-05, "loss": 0.879, "step": 5090 }, { "epoch": 0.15, "learning_rate": 4.244354886505067e-05, "loss": 0.8395, "step": 5100 }, { "epoch": 0.15, "learning_rate": 4.242873229419783e-05, "loss": 1.022, "step": 5110 }, { "epoch": 0.15, "learning_rate": 4.241391572334499e-05, "loss": 0.9806, "step": 5120 }, { "epoch": 0.15, "learning_rate": 4.239909915249215e-05, "loss": 0.9603, "step": 5130 }, { "epoch": 0.15, "learning_rate": 4.238428258163931e-05, "loss": 0.8212, "step": 5140 }, { "epoch": 0.15, "learning_rate": 4.2369466010786465e-05, "loss": 0.9402, "step": 5150 }, { "epoch": 0.15, "learning_rate": 4.235464943993362e-05, "loss": 0.9632, "step": 5160 }, { "epoch": 0.15, "learning_rate": 4.233983286908078e-05, "loss": 0.857, "step": 5170 }, { "epoch": 0.15, "learning_rate": 4.232501629822794e-05, "loss": 1.0024, "step": 5180 }, { "epoch": 0.15, "learning_rate": 4.23101997273751e-05, "loss": 0.9256, "step": 5190 }, { "epoch": 0.15, "learning_rate": 4.229538315652226e-05, "loss": 0.9668, "step": 5200 }, { "epoch": 0.15, "learning_rate": 4.2280566585669415e-05, "loss": 0.9422, "step": 5210 }, { "epoch": 0.15, "learning_rate": 4.226575001481657e-05, "loss": 0.9228, "step": 5220 }, { "epoch": 0.15, "learning_rate": 4.225093344396373e-05, "loss": 0.9124, "step": 5230 }, { "epoch": 0.16, "learning_rate": 4.223611687311089e-05, "loss": 1.0929, "step": 5240 }, { "epoch": 0.16, "learning_rate": 4.222130030225805e-05, "loss": 0.9791, "step": 5250 }, { "epoch": 0.16, "learning_rate": 4.220648373140521e-05, "loss": 1.0369, "step": 5260 }, { "epoch": 0.16, "learning_rate": 4.219166716055236e-05, "loss": 1.0614, "step": 5270 }, { "epoch": 0.16, "learning_rate": 4.217685058969952e-05, "loss": 1.0704, "step": 5280 }, { "epoch": 0.16, "learning_rate": 4.216203401884668e-05, "loss": 0.977, "step": 5290 }, { "epoch": 0.16, "learning_rate": 4.214721744799384e-05, "loss": 1.0282, "step": 5300 }, { "epoch": 0.16, "learning_rate": 4.2132400877141e-05, "loss": 0.9825, "step": 5310 }, { "epoch": 0.16, "learning_rate": 4.211758430628815e-05, "loss": 1.0764, "step": 5320 }, { "epoch": 0.16, "learning_rate": 4.210276773543531e-05, "loss": 0.9701, "step": 5330 }, { "epoch": 0.16, "learning_rate": 4.208795116458247e-05, "loss": 1.0994, "step": 5340 }, { "epoch": 0.16, "learning_rate": 4.207313459372963e-05, "loss": 0.8733, "step": 5350 }, { "epoch": 0.16, "learning_rate": 4.205831802287679e-05, "loss": 1.0822, "step": 5360 }, { "epoch": 0.16, "learning_rate": 4.204350145202395e-05, "loss": 0.9434, "step": 5370 }, { "epoch": 0.16, "learning_rate": 4.20286848811711e-05, "loss": 0.9525, "step": 5380 }, { "epoch": 0.16, "learning_rate": 4.201386831031826e-05, "loss": 0.9808, "step": 5390 }, { "epoch": 0.16, "learning_rate": 4.199905173946542e-05, "loss": 0.9915, "step": 5400 }, { "epoch": 0.16, "learning_rate": 4.198423516861258e-05, "loss": 0.9708, "step": 5410 }, { "epoch": 0.16, "learning_rate": 4.196941859775974e-05, "loss": 1.0643, "step": 5420 }, { "epoch": 0.16, "learning_rate": 4.1954602026906895e-05, "loss": 1.0476, "step": 5430 }, { "epoch": 0.16, "learning_rate": 4.193978545605405e-05, "loss": 1.0005, "step": 5440 }, { "epoch": 0.16, "learning_rate": 4.192496888520121e-05, "loss": 1.0663, "step": 5450 }, { "epoch": 0.16, "learning_rate": 4.1910152314348366e-05, "loss": 1.0395, "step": 5460 }, { "epoch": 0.16, "learning_rate": 4.189533574349553e-05, "loss": 1.0204, "step": 5470 }, { "epoch": 0.16, "learning_rate": 4.188051917264269e-05, "loss": 0.9644, "step": 5480 }, { "epoch": 0.16, "learning_rate": 4.186570260178984e-05, "loss": 0.983, "step": 5490 }, { "epoch": 0.16, "learning_rate": 4.1850886030937e-05, "loss": 1.0131, "step": 5500 }, { "epoch": 0.16, "learning_rate": 4.183606946008416e-05, "loss": 1.0005, "step": 5510 }, { "epoch": 0.16, "learning_rate": 4.1821252889231316e-05, "loss": 0.9998, "step": 5520 }, { "epoch": 0.16, "learning_rate": 4.180643631837848e-05, "loss": 0.9927, "step": 5530 }, { "epoch": 0.16, "learning_rate": 4.179161974752564e-05, "loss": 0.8264, "step": 5540 }, { "epoch": 0.16, "learning_rate": 4.177680317667279e-05, "loss": 1.0266, "step": 5550 }, { "epoch": 0.16, "learning_rate": 4.176198660581995e-05, "loss": 0.8833, "step": 5560 }, { "epoch": 0.17, "learning_rate": 4.174717003496711e-05, "loss": 0.9666, "step": 5570 }, { "epoch": 0.17, "learning_rate": 4.173235346411427e-05, "loss": 1.0156, "step": 5580 }, { "epoch": 0.17, "learning_rate": 4.171753689326143e-05, "loss": 1.0261, "step": 5590 }, { "epoch": 0.17, "learning_rate": 4.170272032240858e-05, "loss": 1.0929, "step": 5600 }, { "epoch": 0.17, "learning_rate": 4.168790375155574e-05, "loss": 0.9007, "step": 5610 }, { "epoch": 0.17, "learning_rate": 4.16730871807029e-05, "loss": 0.9849, "step": 5620 }, { "epoch": 0.17, "learning_rate": 4.165827060985006e-05, "loss": 0.9266, "step": 5630 }, { "epoch": 0.17, "learning_rate": 4.164345403899722e-05, "loss": 0.8687, "step": 5640 }, { "epoch": 0.17, "learning_rate": 4.1628637468144374e-05, "loss": 1.0976, "step": 5650 }, { "epoch": 0.17, "learning_rate": 4.161382089729153e-05, "loss": 1.001, "step": 5660 }, { "epoch": 0.17, "learning_rate": 4.159900432643869e-05, "loss": 0.922, "step": 5670 }, { "epoch": 0.17, "learning_rate": 4.158418775558585e-05, "loss": 1.0987, "step": 5680 }, { "epoch": 0.17, "learning_rate": 4.156937118473301e-05, "loss": 1.035, "step": 5690 }, { "epoch": 0.17, "learning_rate": 4.155455461388017e-05, "loss": 0.9958, "step": 5700 }, { "epoch": 0.17, "learning_rate": 4.1539738043027324e-05, "loss": 0.8683, "step": 5710 }, { "epoch": 0.17, "learning_rate": 4.152492147217448e-05, "loss": 0.8124, "step": 5720 }, { "epoch": 0.17, "learning_rate": 4.151010490132164e-05, "loss": 1.0338, "step": 5730 }, { "epoch": 0.17, "learning_rate": 4.1495288330468796e-05, "loss": 0.9689, "step": 5740 }, { "epoch": 0.17, "learning_rate": 4.148047175961596e-05, "loss": 1.0404, "step": 5750 }, { "epoch": 0.17, "learning_rate": 4.146565518876312e-05, "loss": 0.9264, "step": 5760 }, { "epoch": 0.17, "learning_rate": 4.145083861791027e-05, "loss": 1.0461, "step": 5770 }, { "epoch": 0.17, "learning_rate": 4.143602204705743e-05, "loss": 0.8862, "step": 5780 }, { "epoch": 0.17, "learning_rate": 4.142120547620459e-05, "loss": 0.912, "step": 5790 }, { "epoch": 0.17, "learning_rate": 4.1406388905351746e-05, "loss": 0.8659, "step": 5800 }, { "epoch": 0.17, "learning_rate": 4.139157233449891e-05, "loss": 0.9026, "step": 5810 }, { "epoch": 0.17, "learning_rate": 4.137675576364606e-05, "loss": 1.0321, "step": 5820 }, { "epoch": 0.17, "learning_rate": 4.136193919279322e-05, "loss": 0.9552, "step": 5830 }, { "epoch": 0.17, "learning_rate": 4.134712262194038e-05, "loss": 1.0345, "step": 5840 }, { "epoch": 0.17, "learning_rate": 4.133230605108754e-05, "loss": 0.9723, "step": 5850 }, { "epoch": 0.17, "learning_rate": 4.1317489480234696e-05, "loss": 0.9258, "step": 5860 }, { "epoch": 0.17, "learning_rate": 4.1302672909381853e-05, "loss": 0.9752, "step": 5870 }, { "epoch": 0.17, "learning_rate": 4.128785633852901e-05, "loss": 1.0206, "step": 5880 }, { "epoch": 0.17, "learning_rate": 4.127303976767617e-05, "loss": 0.9742, "step": 5890 }, { "epoch": 0.17, "learning_rate": 4.125822319682333e-05, "loss": 0.9583, "step": 5900 }, { "epoch": 0.18, "learning_rate": 4.124340662597049e-05, "loss": 0.8648, "step": 5910 }, { "epoch": 0.18, "learning_rate": 4.1228590055117646e-05, "loss": 1.0787, "step": 5920 }, { "epoch": 0.18, "learning_rate": 4.1213773484264804e-05, "loss": 0.9813, "step": 5930 }, { "epoch": 0.18, "learning_rate": 4.119895691341196e-05, "loss": 0.944, "step": 5940 }, { "epoch": 0.18, "learning_rate": 4.118414034255912e-05, "loss": 1.0075, "step": 5950 }, { "epoch": 0.18, "learning_rate": 4.116932377170628e-05, "loss": 0.8044, "step": 5960 }, { "epoch": 0.18, "learning_rate": 4.115450720085344e-05, "loss": 0.9448, "step": 5970 }, { "epoch": 0.18, "learning_rate": 4.1139690630000597e-05, "loss": 0.7978, "step": 5980 }, { "epoch": 0.18, "learning_rate": 4.1124874059147754e-05, "loss": 0.8473, "step": 5990 }, { "epoch": 0.18, "learning_rate": 4.111005748829491e-05, "loss": 1.087, "step": 6000 }, { "epoch": 0.18, "learning_rate": 4.109524091744207e-05, "loss": 1.024, "step": 6010 }, { "epoch": 0.18, "learning_rate": 4.1080424346589226e-05, "loss": 1.0458, "step": 6020 }, { "epoch": 0.18, "learning_rate": 4.106560777573639e-05, "loss": 1.0961, "step": 6030 }, { "epoch": 0.18, "learning_rate": 4.105079120488354e-05, "loss": 0.9593, "step": 6040 }, { "epoch": 0.18, "learning_rate": 4.10359746340307e-05, "loss": 1.1049, "step": 6050 }, { "epoch": 0.18, "learning_rate": 4.102115806317786e-05, "loss": 1.0515, "step": 6060 }, { "epoch": 0.18, "learning_rate": 4.100634149232502e-05, "loss": 0.9561, "step": 6070 }, { "epoch": 0.18, "learning_rate": 4.0991524921472176e-05, "loss": 0.9915, "step": 6080 }, { "epoch": 0.18, "learning_rate": 4.097670835061933e-05, "loss": 0.942, "step": 6090 }, { "epoch": 0.18, "learning_rate": 4.096189177976649e-05, "loss": 0.9765, "step": 6100 }, { "epoch": 0.18, "learning_rate": 4.094707520891365e-05, "loss": 1.0278, "step": 6110 }, { "epoch": 0.18, "learning_rate": 4.093225863806081e-05, "loss": 0.9689, "step": 6120 }, { "epoch": 0.18, "learning_rate": 4.091744206720797e-05, "loss": 1.0525, "step": 6130 }, { "epoch": 0.18, "learning_rate": 4.0902625496355126e-05, "loss": 0.923, "step": 6140 }, { "epoch": 0.18, "learning_rate": 4.088780892550228e-05, "loss": 0.9831, "step": 6150 }, { "epoch": 0.18, "learning_rate": 4.087299235464944e-05, "loss": 0.919, "step": 6160 }, { "epoch": 0.18, "learning_rate": 4.08581757837966e-05, "loss": 1.0675, "step": 6170 }, { "epoch": 0.18, "learning_rate": 4.084335921294376e-05, "loss": 1.0158, "step": 6180 }, { "epoch": 0.18, "learning_rate": 4.082854264209092e-05, "loss": 0.837, "step": 6190 }, { "epoch": 0.18, "learning_rate": 4.0813726071238076e-05, "loss": 1.0987, "step": 6200 }, { "epoch": 0.18, "learning_rate": 4.079890950038523e-05, "loss": 1.0349, "step": 6210 }, { "epoch": 0.18, "learning_rate": 4.078409292953239e-05, "loss": 0.8741, "step": 6220 }, { "epoch": 0.18, "learning_rate": 4.076927635867955e-05, "loss": 0.7855, "step": 6230 }, { "epoch": 0.18, "learning_rate": 4.075445978782671e-05, "loss": 0.8909, "step": 6240 }, { "epoch": 0.19, "learning_rate": 4.073964321697387e-05, "loss": 1.0483, "step": 6250 }, { "epoch": 0.19, "learning_rate": 4.072482664612102e-05, "loss": 1.0883, "step": 6260 }, { "epoch": 0.19, "learning_rate": 4.0710010075268183e-05, "loss": 0.9039, "step": 6270 }, { "epoch": 0.19, "learning_rate": 4.069519350441534e-05, "loss": 0.9711, "step": 6280 }, { "epoch": 0.19, "learning_rate": 4.06803769335625e-05, "loss": 1.0803, "step": 6290 }, { "epoch": 0.19, "learning_rate": 4.0665560362709655e-05, "loss": 1.0707, "step": 6300 }, { "epoch": 0.19, "learning_rate": 4.065074379185681e-05, "loss": 0.9018, "step": 6310 }, { "epoch": 0.19, "learning_rate": 4.063592722100397e-05, "loss": 0.866, "step": 6320 }, { "epoch": 0.19, "learning_rate": 4.062111065015113e-05, "loss": 0.9473, "step": 6330 }, { "epoch": 0.19, "learning_rate": 4.060629407929829e-05, "loss": 0.9391, "step": 6340 }, { "epoch": 0.19, "learning_rate": 4.059147750844545e-05, "loss": 0.9851, "step": 6350 }, { "epoch": 0.19, "learning_rate": 4.0576660937592605e-05, "loss": 0.9714, "step": 6360 }, { "epoch": 0.19, "learning_rate": 4.056184436673976e-05, "loss": 0.9525, "step": 6370 }, { "epoch": 0.19, "learning_rate": 4.054702779588692e-05, "loss": 0.9674, "step": 6380 }, { "epoch": 0.19, "learning_rate": 4.053221122503408e-05, "loss": 0.9294, "step": 6390 }, { "epoch": 0.19, "learning_rate": 4.051739465418124e-05, "loss": 0.9353, "step": 6400 }, { "epoch": 0.19, "learning_rate": 4.05025780833284e-05, "loss": 0.9975, "step": 6410 }, { "epoch": 0.19, "learning_rate": 4.0487761512475555e-05, "loss": 1.066, "step": 6420 }, { "epoch": 0.19, "learning_rate": 4.047294494162271e-05, "loss": 0.8707, "step": 6430 }, { "epoch": 0.19, "learning_rate": 4.045812837076987e-05, "loss": 0.957, "step": 6440 }, { "epoch": 0.19, "learning_rate": 4.044331179991703e-05, "loss": 1.0121, "step": 6450 }, { "epoch": 0.19, "learning_rate": 4.042849522906419e-05, "loss": 1.0795, "step": 6460 }, { "epoch": 0.19, "learning_rate": 4.041367865821135e-05, "loss": 0.9982, "step": 6470 }, { "epoch": 0.19, "learning_rate": 4.03988620873585e-05, "loss": 0.934, "step": 6480 }, { "epoch": 0.19, "learning_rate": 4.038404551650566e-05, "loss": 0.9686, "step": 6490 }, { "epoch": 0.19, "learning_rate": 4.036922894565282e-05, "loss": 1.0467, "step": 6500 }, { "epoch": 0.19, "learning_rate": 4.035441237479998e-05, "loss": 0.9294, "step": 6510 }, { "epoch": 0.19, "learning_rate": 4.033959580394714e-05, "loss": 1.1935, "step": 6520 }, { "epoch": 0.19, "learning_rate": 4.03247792330943e-05, "loss": 0.9153, "step": 6530 }, { "epoch": 0.19, "learning_rate": 4.030996266224145e-05, "loss": 0.9231, "step": 6540 }, { "epoch": 0.19, "learning_rate": 4.029514609138861e-05, "loss": 0.9228, "step": 6550 }, { "epoch": 0.19, "learning_rate": 4.028032952053577e-05, "loss": 0.9463, "step": 6560 }, { "epoch": 0.19, "learning_rate": 4.026551294968293e-05, "loss": 0.9007, "step": 6570 }, { "epoch": 0.19, "learning_rate": 4.0250696378830085e-05, "loss": 0.8671, "step": 6580 }, { "epoch": 0.2, "learning_rate": 4.023587980797724e-05, "loss": 0.9874, "step": 6590 }, { "epoch": 0.2, "learning_rate": 4.02210632371244e-05, "loss": 1.0277, "step": 6600 }, { "epoch": 0.2, "learning_rate": 4.0206246666271556e-05, "loss": 1.1138, "step": 6610 }, { "epoch": 0.2, "learning_rate": 4.019143009541872e-05, "loss": 0.9372, "step": 6620 }, { "epoch": 0.2, "learning_rate": 4.017661352456588e-05, "loss": 1.0057, "step": 6630 }, { "epoch": 0.2, "learning_rate": 4.0161796953713035e-05, "loss": 0.8845, "step": 6640 }, { "epoch": 0.2, "learning_rate": 4.014698038286019e-05, "loss": 0.8917, "step": 6650 }, { "epoch": 0.2, "learning_rate": 4.013216381200735e-05, "loss": 1.1626, "step": 6660 }, { "epoch": 0.2, "learning_rate": 4.0117347241154507e-05, "loss": 0.8921, "step": 6670 }, { "epoch": 0.2, "learning_rate": 4.010253067030167e-05, "loss": 1.0248, "step": 6680 }, { "epoch": 0.2, "learning_rate": 4.008771409944883e-05, "loss": 1.1001, "step": 6690 }, { "epoch": 0.2, "learning_rate": 4.0072897528595985e-05, "loss": 0.969, "step": 6700 }, { "epoch": 0.2, "learning_rate": 4.005808095774314e-05, "loss": 0.8646, "step": 6710 }, { "epoch": 0.2, "learning_rate": 4.00432643868903e-05, "loss": 1.0792, "step": 6720 }, { "epoch": 0.2, "learning_rate": 4.002844781603746e-05, "loss": 0.9803, "step": 6730 }, { "epoch": 0.2, "learning_rate": 4.001363124518462e-05, "loss": 0.9235, "step": 6740 }, { "epoch": 0.2, "learning_rate": 3.999881467433178e-05, "loss": 0.9415, "step": 6750 }, { "epoch": 0.2, "learning_rate": 3.998399810347893e-05, "loss": 0.7717, "step": 6760 }, { "epoch": 0.2, "learning_rate": 3.996918153262609e-05, "loss": 0.9583, "step": 6770 }, { "epoch": 0.2, "learning_rate": 3.995436496177325e-05, "loss": 1.0054, "step": 6780 }, { "epoch": 0.2, "learning_rate": 3.993954839092041e-05, "loss": 1.0008, "step": 6790 }, { "epoch": 0.2, "learning_rate": 3.992473182006757e-05, "loss": 0.9701, "step": 6800 }, { "epoch": 0.2, "learning_rate": 3.990991524921472e-05, "loss": 0.8991, "step": 6810 }, { "epoch": 0.2, "learning_rate": 3.989509867836188e-05, "loss": 0.8607, "step": 6820 }, { "epoch": 0.2, "learning_rate": 3.988028210750904e-05, "loss": 0.8719, "step": 6830 }, { "epoch": 0.2, "learning_rate": 3.98654655366562e-05, "loss": 0.9115, "step": 6840 }, { "epoch": 0.2, "learning_rate": 3.985064896580336e-05, "loss": 1.0135, "step": 6850 }, { "epoch": 0.2, "learning_rate": 3.9835832394950514e-05, "loss": 0.9497, "step": 6860 }, { "epoch": 0.2, "learning_rate": 3.982101582409767e-05, "loss": 0.9448, "step": 6870 }, { "epoch": 0.2, "learning_rate": 3.980619925324483e-05, "loss": 0.9963, "step": 6880 }, { "epoch": 0.2, "learning_rate": 3.9791382682391986e-05, "loss": 0.8949, "step": 6890 }, { "epoch": 0.2, "learning_rate": 3.977656611153915e-05, "loss": 0.9696, "step": 6900 }, { "epoch": 0.2, "learning_rate": 3.976174954068631e-05, "loss": 1.01, "step": 6910 }, { "epoch": 0.21, "learning_rate": 3.9746932969833465e-05, "loss": 0.8498, "step": 6920 }, { "epoch": 0.21, "learning_rate": 3.973211639898062e-05, "loss": 1.0444, "step": 6930 }, { "epoch": 0.21, "learning_rate": 3.971729982812778e-05, "loss": 0.9505, "step": 6940 }, { "epoch": 0.21, "learning_rate": 3.9702483257274936e-05, "loss": 1.0077, "step": 6950 }, { "epoch": 0.21, "learning_rate": 3.96876666864221e-05, "loss": 0.9244, "step": 6960 }, { "epoch": 0.21, "learning_rate": 3.967285011556926e-05, "loss": 0.9844, "step": 6970 }, { "epoch": 0.21, "learning_rate": 3.965803354471641e-05, "loss": 1.0796, "step": 6980 }, { "epoch": 0.21, "learning_rate": 3.964321697386357e-05, "loss": 1.012, "step": 6990 }, { "epoch": 0.21, "learning_rate": 3.962840040301073e-05, "loss": 0.9539, "step": 7000 }, { "epoch": 0.21, "learning_rate": 3.9613583832157886e-05, "loss": 0.9021, "step": 7010 }, { "epoch": 0.21, "learning_rate": 3.959876726130505e-05, "loss": 0.945, "step": 7020 }, { "epoch": 0.21, "learning_rate": 3.95839506904522e-05, "loss": 0.8773, "step": 7030 }, { "epoch": 0.21, "learning_rate": 3.956913411959936e-05, "loss": 0.8579, "step": 7040 }, { "epoch": 0.21, "learning_rate": 3.955431754874652e-05, "loss": 1.0599, "step": 7050 }, { "epoch": 0.21, "learning_rate": 3.953950097789368e-05, "loss": 0.9359, "step": 7060 }, { "epoch": 0.21, "learning_rate": 3.9524684407040837e-05, "loss": 0.9445, "step": 7070 }, { "epoch": 0.21, "learning_rate": 3.9509867836187994e-05, "loss": 1.0743, "step": 7080 }, { "epoch": 0.21, "learning_rate": 3.949505126533515e-05, "loss": 1.0425, "step": 7090 }, { "epoch": 0.21, "learning_rate": 3.948023469448231e-05, "loss": 0.861, "step": 7100 }, { "epoch": 0.21, "learning_rate": 3.946541812362947e-05, "loss": 1.0149, "step": 7110 }, { "epoch": 0.21, "learning_rate": 3.945060155277663e-05, "loss": 0.8354, "step": 7120 }, { "epoch": 0.21, "learning_rate": 3.943578498192379e-05, "loss": 1.02, "step": 7130 }, { "epoch": 0.21, "learning_rate": 3.9420968411070944e-05, "loss": 1.0401, "step": 7140 }, { "epoch": 0.21, "learning_rate": 3.94061518402181e-05, "loss": 0.9268, "step": 7150 }, { "epoch": 0.21, "learning_rate": 3.939133526936526e-05, "loss": 0.9768, "step": 7160 }, { "epoch": 0.21, "learning_rate": 3.9376518698512416e-05, "loss": 0.8638, "step": 7170 }, { "epoch": 0.21, "learning_rate": 3.936170212765958e-05, "loss": 1.0255, "step": 7180 }, { "epoch": 0.21, "learning_rate": 3.934688555680674e-05, "loss": 0.9134, "step": 7190 }, { "epoch": 0.21, "learning_rate": 3.933206898595389e-05, "loss": 0.8389, "step": 7200 }, { "epoch": 0.21, "learning_rate": 3.931725241510105e-05, "loss": 0.8027, "step": 7210 }, { "epoch": 0.21, "learning_rate": 3.930243584424821e-05, "loss": 0.8722, "step": 7220 }, { "epoch": 0.21, "learning_rate": 3.9287619273395366e-05, "loss": 1.066, "step": 7230 }, { "epoch": 0.21, "learning_rate": 3.927280270254253e-05, "loss": 0.8792, "step": 7240 }, { "epoch": 0.21, "learning_rate": 3.925798613168968e-05, "loss": 0.9674, "step": 7250 }, { "epoch": 0.22, "learning_rate": 3.924316956083684e-05, "loss": 1.0199, "step": 7260 }, { "epoch": 0.22, "learning_rate": 3.9228352989984e-05, "loss": 0.9654, "step": 7270 }, { "epoch": 0.22, "learning_rate": 3.921353641913116e-05, "loss": 0.8794, "step": 7280 }, { "epoch": 0.22, "learning_rate": 3.9198719848278316e-05, "loss": 0.9812, "step": 7290 }, { "epoch": 0.22, "learning_rate": 3.918390327742547e-05, "loss": 0.979, "step": 7300 }, { "epoch": 0.22, "learning_rate": 3.916908670657263e-05, "loss": 0.9721, "step": 7310 }, { "epoch": 0.22, "learning_rate": 3.915427013571979e-05, "loss": 0.8746, "step": 7320 }, { "epoch": 0.22, "learning_rate": 3.913945356486695e-05, "loss": 0.9368, "step": 7330 }, { "epoch": 0.22, "learning_rate": 3.912463699401411e-05, "loss": 0.918, "step": 7340 }, { "epoch": 0.22, "learning_rate": 3.9109820423161266e-05, "loss": 0.9911, "step": 7350 }, { "epoch": 0.22, "learning_rate": 3.9095003852308423e-05, "loss": 0.8009, "step": 7360 }, { "epoch": 0.22, "learning_rate": 3.908018728145558e-05, "loss": 0.9735, "step": 7370 }, { "epoch": 0.22, "learning_rate": 3.906537071060274e-05, "loss": 0.9262, "step": 7380 }, { "epoch": 0.22, "learning_rate": 3.90505541397499e-05, "loss": 0.9529, "step": 7390 }, { "epoch": 0.22, "learning_rate": 3.903573756889706e-05, "loss": 0.8379, "step": 7400 }, { "epoch": 0.22, "learning_rate": 3.9020920998044216e-05, "loss": 0.981, "step": 7410 }, { "epoch": 0.22, "learning_rate": 3.900610442719137e-05, "loss": 1.0949, "step": 7420 }, { "epoch": 0.22, "learning_rate": 3.899128785633853e-05, "loss": 1.0619, "step": 7430 }, { "epoch": 0.22, "learning_rate": 3.897647128548569e-05, "loss": 0.9317, "step": 7440 }, { "epoch": 0.22, "learning_rate": 3.8961654714632845e-05, "loss": 1.0149, "step": 7450 }, { "epoch": 0.22, "learning_rate": 3.894683814378001e-05, "loss": 0.9187, "step": 7460 }, { "epoch": 0.22, "learning_rate": 3.893202157292716e-05, "loss": 0.8749, "step": 7470 }, { "epoch": 0.22, "learning_rate": 3.891720500207432e-05, "loss": 0.9824, "step": 7480 }, { "epoch": 0.22, "learning_rate": 3.890238843122148e-05, "loss": 0.8977, "step": 7490 }, { "epoch": 0.22, "learning_rate": 3.888757186036864e-05, "loss": 0.9572, "step": 7500 }, { "epoch": 0.22, "learning_rate": 3.8872755289515795e-05, "loss": 0.8585, "step": 7510 }, { "epoch": 0.22, "learning_rate": 3.885793871866296e-05, "loss": 0.919, "step": 7520 }, { "epoch": 0.22, "learning_rate": 3.884312214781011e-05, "loss": 1.094, "step": 7530 }, { "epoch": 0.22, "learning_rate": 3.882830557695727e-05, "loss": 0.8935, "step": 7540 }, { "epoch": 0.22, "learning_rate": 3.881348900610443e-05, "loss": 1.0104, "step": 7550 }, { "epoch": 0.22, "learning_rate": 3.879867243525159e-05, "loss": 0.9139, "step": 7560 }, { "epoch": 0.22, "learning_rate": 3.8783855864398746e-05, "loss": 0.901, "step": 7570 }, { "epoch": 0.22, "learning_rate": 3.87690392935459e-05, "loss": 0.9488, "step": 7580 }, { "epoch": 0.22, "learning_rate": 3.875422272269306e-05, "loss": 1.0754, "step": 7590 }, { "epoch": 0.23, "learning_rate": 3.873940615184022e-05, "loss": 0.9621, "step": 7600 }, { "epoch": 0.23, "learning_rate": 3.872458958098738e-05, "loss": 0.849, "step": 7610 }, { "epoch": 0.23, "learning_rate": 3.870977301013454e-05, "loss": 1.0123, "step": 7620 }, { "epoch": 0.23, "learning_rate": 3.8694956439281696e-05, "loss": 1.0595, "step": 7630 }, { "epoch": 0.23, "learning_rate": 3.868013986842885e-05, "loss": 1.0262, "step": 7640 }, { "epoch": 0.23, "learning_rate": 3.866532329757601e-05, "loss": 1.1201, "step": 7650 }, { "epoch": 0.23, "learning_rate": 3.865050672672317e-05, "loss": 0.8926, "step": 7660 }, { "epoch": 0.23, "learning_rate": 3.863569015587033e-05, "loss": 0.8442, "step": 7670 }, { "epoch": 0.23, "learning_rate": 3.862087358501749e-05, "loss": 0.961, "step": 7680 }, { "epoch": 0.23, "learning_rate": 3.8606057014164646e-05, "loss": 0.9387, "step": 7690 }, { "epoch": 0.23, "learning_rate": 3.8591240443311796e-05, "loss": 0.8551, "step": 7700 }, { "epoch": 0.23, "learning_rate": 3.857642387245896e-05, "loss": 0.8485, "step": 7710 }, { "epoch": 0.23, "learning_rate": 3.856160730160612e-05, "loss": 1.1136, "step": 7720 }, { "epoch": 0.23, "learning_rate": 3.8546790730753275e-05, "loss": 0.9127, "step": 7730 }, { "epoch": 0.23, "learning_rate": 3.853197415990044e-05, "loss": 0.9789, "step": 7740 }, { "epoch": 0.23, "learning_rate": 3.851715758904759e-05, "loss": 1.0526, "step": 7750 }, { "epoch": 0.23, "learning_rate": 3.8502341018194747e-05, "loss": 0.8497, "step": 7760 }, { "epoch": 0.23, "learning_rate": 3.848752444734191e-05, "loss": 0.839, "step": 7770 }, { "epoch": 0.23, "learning_rate": 3.847270787648907e-05, "loss": 0.9596, "step": 7780 }, { "epoch": 0.23, "learning_rate": 3.8457891305636225e-05, "loss": 0.8706, "step": 7790 }, { "epoch": 0.23, "learning_rate": 3.844307473478338e-05, "loss": 1.0107, "step": 7800 }, { "epoch": 0.23, "learning_rate": 3.842825816393054e-05, "loss": 0.9414, "step": 7810 }, { "epoch": 0.23, "learning_rate": 3.84134415930777e-05, "loss": 0.8719, "step": 7820 }, { "epoch": 0.23, "learning_rate": 3.839862502222486e-05, "loss": 0.8495, "step": 7830 }, { "epoch": 0.23, "learning_rate": 3.838380845137202e-05, "loss": 0.9224, "step": 7840 }, { "epoch": 0.23, "learning_rate": 3.8368991880519175e-05, "loss": 0.9091, "step": 7850 }, { "epoch": 0.23, "learning_rate": 3.835417530966633e-05, "loss": 0.9843, "step": 7860 }, { "epoch": 0.23, "learning_rate": 3.833935873881349e-05, "loss": 1.1209, "step": 7870 }, { "epoch": 0.23, "learning_rate": 3.832454216796065e-05, "loss": 1.0426, "step": 7880 }, { "epoch": 0.23, "learning_rate": 3.830972559710781e-05, "loss": 0.8406, "step": 7890 }, { "epoch": 0.23, "learning_rate": 3.829490902625497e-05, "loss": 0.861, "step": 7900 }, { "epoch": 0.23, "learning_rate": 3.8280092455402125e-05, "loss": 1.0723, "step": 7910 }, { "epoch": 0.23, "learning_rate": 3.826527588454928e-05, "loss": 0.9372, "step": 7920 }, { "epoch": 0.23, "learning_rate": 3.825045931369644e-05, "loss": 1.0065, "step": 7930 }, { "epoch": 0.24, "learning_rate": 3.82356427428436e-05, "loss": 0.7633, "step": 7940 }, { "epoch": 0.24, "learning_rate": 3.822082617199076e-05, "loss": 0.9963, "step": 7950 }, { "epoch": 0.24, "learning_rate": 3.820600960113792e-05, "loss": 0.8949, "step": 7960 }, { "epoch": 0.24, "learning_rate": 3.819119303028507e-05, "loss": 1.0342, "step": 7970 }, { "epoch": 0.24, "learning_rate": 3.8176376459432226e-05, "loss": 1.0523, "step": 7980 }, { "epoch": 0.24, "learning_rate": 3.816155988857939e-05, "loss": 0.9841, "step": 7990 }, { "epoch": 0.24, "learning_rate": 3.814674331772655e-05, "loss": 1.0401, "step": 8000 }, { "epoch": 0.24, "learning_rate": 3.8131926746873704e-05, "loss": 0.8284, "step": 8010 }, { "epoch": 0.24, "learning_rate": 3.811711017602086e-05, "loss": 0.9808, "step": 8020 }, { "epoch": 0.24, "learning_rate": 3.810229360516802e-05, "loss": 0.8408, "step": 8030 }, { "epoch": 0.24, "learning_rate": 3.8087477034315176e-05, "loss": 0.9917, "step": 8040 }, { "epoch": 0.24, "learning_rate": 3.807266046346234e-05, "loss": 0.904, "step": 8050 }, { "epoch": 0.24, "learning_rate": 3.80578438926095e-05, "loss": 0.9815, "step": 8060 }, { "epoch": 0.24, "learning_rate": 3.8043027321756655e-05, "loss": 0.9306, "step": 8070 }, { "epoch": 0.24, "learning_rate": 3.802821075090381e-05, "loss": 0.9941, "step": 8080 }, { "epoch": 0.24, "learning_rate": 3.801339418005097e-05, "loss": 0.8978, "step": 8090 }, { "epoch": 0.24, "learning_rate": 3.7998577609198126e-05, "loss": 0.941, "step": 8100 }, { "epoch": 0.24, "learning_rate": 3.798376103834529e-05, "loss": 0.918, "step": 8110 }, { "epoch": 0.24, "learning_rate": 3.796894446749245e-05, "loss": 0.7802, "step": 8120 }, { "epoch": 0.24, "learning_rate": 3.7954127896639605e-05, "loss": 0.9219, "step": 8130 }, { "epoch": 0.24, "learning_rate": 3.793931132578676e-05, "loss": 1.0816, "step": 8140 }, { "epoch": 0.24, "learning_rate": 3.792449475493392e-05, "loss": 1.0741, "step": 8150 }, { "epoch": 0.24, "learning_rate": 3.7909678184081077e-05, "loss": 1.0095, "step": 8160 }, { "epoch": 0.24, "learning_rate": 3.789486161322824e-05, "loss": 0.962, "step": 8170 }, { "epoch": 0.24, "learning_rate": 3.78800450423754e-05, "loss": 0.8198, "step": 8180 }, { "epoch": 0.24, "learning_rate": 3.786522847152255e-05, "loss": 0.9874, "step": 8190 }, { "epoch": 0.24, "learning_rate": 3.785041190066971e-05, "loss": 0.997, "step": 8200 }, { "epoch": 0.24, "learning_rate": 3.783559532981687e-05, "loss": 1.0218, "step": 8210 }, { "epoch": 0.24, "learning_rate": 3.782077875896403e-05, "loss": 0.7353, "step": 8220 }, { "epoch": 0.24, "learning_rate": 3.780596218811119e-05, "loss": 0.9453, "step": 8230 }, { "epoch": 0.24, "learning_rate": 3.779114561725834e-05, "loss": 0.9067, "step": 8240 }, { "epoch": 0.24, "learning_rate": 3.77763290464055e-05, "loss": 0.9107, "step": 8250 }, { "epoch": 0.24, "learning_rate": 3.7761512475552656e-05, "loss": 0.8818, "step": 8260 }, { "epoch": 0.25, "learning_rate": 3.774669590469982e-05, "loss": 1.065, "step": 8270 }, { "epoch": 0.25, "learning_rate": 3.773187933384698e-05, "loss": 0.9564, "step": 8280 }, { "epoch": 0.25, "learning_rate": 3.7717062762994134e-05, "loss": 1.0434, "step": 8290 }, { "epoch": 0.25, "learning_rate": 3.770224619214129e-05, "loss": 1.0674, "step": 8300 }, { "epoch": 0.25, "learning_rate": 3.768742962128845e-05, "loss": 1.0128, "step": 8310 }, { "epoch": 0.25, "learning_rate": 3.7672613050435606e-05, "loss": 0.9528, "step": 8320 }, { "epoch": 0.25, "learning_rate": 3.765779647958277e-05, "loss": 1.088, "step": 8330 }, { "epoch": 0.25, "learning_rate": 3.764297990872993e-05, "loss": 1.0491, "step": 8340 }, { "epoch": 0.25, "learning_rate": 3.7628163337877084e-05, "loss": 1.0906, "step": 8350 }, { "epoch": 0.25, "learning_rate": 3.761334676702424e-05, "loss": 0.9301, "step": 8360 }, { "epoch": 0.25, "learning_rate": 3.75985301961714e-05, "loss": 0.8296, "step": 8370 }, { "epoch": 0.25, "learning_rate": 3.7583713625318556e-05, "loss": 0.9707, "step": 8380 }, { "epoch": 0.25, "learning_rate": 3.756889705446572e-05, "loss": 0.9587, "step": 8390 }, { "epoch": 0.25, "learning_rate": 3.755408048361288e-05, "loss": 1.0983, "step": 8400 }, { "epoch": 0.25, "learning_rate": 3.753926391276003e-05, "loss": 0.7814, "step": 8410 }, { "epoch": 0.25, "learning_rate": 3.752444734190719e-05, "loss": 0.9767, "step": 8420 }, { "epoch": 0.25, "learning_rate": 3.750963077105435e-05, "loss": 0.9191, "step": 8430 }, { "epoch": 0.25, "learning_rate": 3.7494814200201506e-05, "loss": 1.133, "step": 8440 }, { "epoch": 0.25, "learning_rate": 3.747999762934867e-05, "loss": 0.7876, "step": 8450 }, { "epoch": 0.25, "learning_rate": 3.746518105849583e-05, "loss": 1.0342, "step": 8460 }, { "epoch": 0.25, "learning_rate": 3.745036448764298e-05, "loss": 0.9359, "step": 8470 }, { "epoch": 0.25, "learning_rate": 3.743554791679014e-05, "loss": 1.0841, "step": 8480 }, { "epoch": 0.25, "learning_rate": 3.74207313459373e-05, "loss": 0.9303, "step": 8490 }, { "epoch": 0.25, "learning_rate": 3.7405914775084456e-05, "loss": 0.947, "step": 8500 }, { "epoch": 0.25, "learning_rate": 3.739109820423162e-05, "loss": 0.9999, "step": 8510 }, { "epoch": 0.25, "learning_rate": 3.737628163337877e-05, "loss": 0.9578, "step": 8520 }, { "epoch": 0.25, "learning_rate": 3.736146506252593e-05, "loss": 0.9773, "step": 8530 }, { "epoch": 0.25, "learning_rate": 3.7346648491673085e-05, "loss": 0.9933, "step": 8540 }, { "epoch": 0.25, "learning_rate": 3.733183192082025e-05, "loss": 0.8242, "step": 8550 }, { "epoch": 0.25, "learning_rate": 3.7317015349967406e-05, "loss": 1.1116, "step": 8560 }, { "epoch": 0.25, "learning_rate": 3.7302198779114564e-05, "loss": 1.1778, "step": 8570 }, { "epoch": 0.25, "learning_rate": 3.728738220826172e-05, "loss": 0.9228, "step": 8580 }, { "epoch": 0.25, "learning_rate": 3.727256563740888e-05, "loss": 1.0696, "step": 8590 }, { "epoch": 0.25, "learning_rate": 3.7257749066556035e-05, "loss": 1.104, "step": 8600 }, { "epoch": 0.26, "learning_rate": 3.72429324957032e-05, "loss": 0.7433, "step": 8610 }, { "epoch": 0.26, "learning_rate": 3.722811592485036e-05, "loss": 0.9487, "step": 8620 }, { "epoch": 0.26, "learning_rate": 3.7213299353997514e-05, "loss": 0.9236, "step": 8630 }, { "epoch": 0.26, "learning_rate": 3.719848278314467e-05, "loss": 0.9623, "step": 8640 }, { "epoch": 0.26, "learning_rate": 3.718366621229183e-05, "loss": 1.021, "step": 8650 }, { "epoch": 0.26, "learning_rate": 3.7168849641438986e-05, "loss": 0.749, "step": 8660 }, { "epoch": 0.26, "learning_rate": 3.715403307058615e-05, "loss": 0.8871, "step": 8670 }, { "epoch": 0.26, "learning_rate": 3.713921649973331e-05, "loss": 0.8794, "step": 8680 }, { "epoch": 0.26, "learning_rate": 3.712439992888046e-05, "loss": 1.0036, "step": 8690 }, { "epoch": 0.26, "learning_rate": 3.710958335802762e-05, "loss": 0.9535, "step": 8700 }, { "epoch": 0.26, "learning_rate": 3.709476678717478e-05, "loss": 0.9561, "step": 8710 }, { "epoch": 0.26, "learning_rate": 3.7079950216321936e-05, "loss": 1.0284, "step": 8720 }, { "epoch": 0.26, "learning_rate": 3.70651336454691e-05, "loss": 0.9994, "step": 8730 }, { "epoch": 0.26, "learning_rate": 3.705031707461625e-05, "loss": 0.8966, "step": 8740 }, { "epoch": 0.26, "learning_rate": 3.703550050376341e-05, "loss": 0.8629, "step": 8750 }, { "epoch": 0.26, "learning_rate": 3.702068393291057e-05, "loss": 0.9028, "step": 8760 }, { "epoch": 0.26, "learning_rate": 3.700586736205773e-05, "loss": 0.949, "step": 8770 }, { "epoch": 0.26, "learning_rate": 3.6991050791204886e-05, "loss": 0.9702, "step": 8780 }, { "epoch": 0.26, "learning_rate": 3.697623422035204e-05, "loss": 0.8712, "step": 8790 }, { "epoch": 0.26, "learning_rate": 3.69614176494992e-05, "loss": 0.8101, "step": 8800 }, { "epoch": 0.26, "learning_rate": 3.694660107864636e-05, "loss": 0.9015, "step": 8810 }, { "epoch": 0.26, "learning_rate": 3.6931784507793515e-05, "loss": 1.0166, "step": 8820 }, { "epoch": 0.26, "learning_rate": 3.691696793694068e-05, "loss": 1.0416, "step": 8830 }, { "epoch": 0.26, "learning_rate": 3.6902151366087836e-05, "loss": 0.9694, "step": 8840 }, { "epoch": 0.26, "learning_rate": 3.688733479523499e-05, "loss": 0.9738, "step": 8850 }, { "epoch": 0.26, "learning_rate": 3.687251822438215e-05, "loss": 1.0464, "step": 8860 }, { "epoch": 0.26, "learning_rate": 3.685770165352931e-05, "loss": 0.9831, "step": 8870 }, { "epoch": 0.26, "learning_rate": 3.6842885082676465e-05, "loss": 0.8784, "step": 8880 }, { "epoch": 0.26, "learning_rate": 3.682806851182363e-05, "loss": 0.9262, "step": 8890 }, { "epoch": 0.26, "learning_rate": 3.6813251940970786e-05, "loss": 0.9408, "step": 8900 }, { "epoch": 0.26, "learning_rate": 3.679843537011794e-05, "loss": 1.0725, "step": 8910 }, { "epoch": 0.26, "learning_rate": 3.67836187992651e-05, "loss": 0.9675, "step": 8920 }, { "epoch": 0.26, "learning_rate": 3.676880222841226e-05, "loss": 0.9087, "step": 8930 }, { "epoch": 0.26, "learning_rate": 3.6753985657559415e-05, "loss": 0.9308, "step": 8940 }, { "epoch": 0.27, "learning_rate": 3.673916908670658e-05, "loss": 1.0129, "step": 8950 }, { "epoch": 0.27, "learning_rate": 3.672435251585373e-05, "loss": 0.7679, "step": 8960 }, { "epoch": 0.27, "learning_rate": 3.670953594500089e-05, "loss": 0.865, "step": 8970 }, { "epoch": 0.27, "learning_rate": 3.669471937414805e-05, "loss": 0.9606, "step": 8980 }, { "epoch": 0.27, "learning_rate": 3.667990280329521e-05, "loss": 0.9638, "step": 8990 }, { "epoch": 0.27, "learning_rate": 3.6665086232442365e-05, "loss": 0.9136, "step": 9000 }, { "epoch": 0.27, "learning_rate": 3.665026966158952e-05, "loss": 1.0182, "step": 9010 }, { "epoch": 0.27, "learning_rate": 3.663545309073668e-05, "loss": 0.7749, "step": 9020 }, { "epoch": 0.27, "learning_rate": 3.662063651988384e-05, "loss": 0.9811, "step": 9030 }, { "epoch": 0.27, "learning_rate": 3.6605819949031e-05, "loss": 0.9869, "step": 9040 }, { "epoch": 0.27, "learning_rate": 3.659100337817816e-05, "loss": 1.0581, "step": 9050 }, { "epoch": 0.27, "learning_rate": 3.6576186807325316e-05, "loss": 0.8655, "step": 9060 }, { "epoch": 0.27, "learning_rate": 3.656137023647247e-05, "loss": 0.9068, "step": 9070 }, { "epoch": 0.27, "learning_rate": 3.654655366561963e-05, "loss": 0.8552, "step": 9080 }, { "epoch": 0.27, "learning_rate": 3.653173709476679e-05, "loss": 0.948, "step": 9090 }, { "epoch": 0.27, "learning_rate": 3.6516920523913944e-05, "loss": 0.8968, "step": 9100 }, { "epoch": 0.27, "learning_rate": 3.650210395306111e-05, "loss": 0.9175, "step": 9110 }, { "epoch": 0.27, "learning_rate": 3.6487287382208266e-05, "loss": 1.0115, "step": 9120 }, { "epoch": 0.27, "learning_rate": 3.6472470811355416e-05, "loss": 0.8408, "step": 9130 }, { "epoch": 0.27, "learning_rate": 3.645765424050258e-05, "loss": 0.9666, "step": 9140 }, { "epoch": 0.27, "learning_rate": 3.644283766964974e-05, "loss": 0.861, "step": 9150 }, { "epoch": 0.27, "learning_rate": 3.6428021098796895e-05, "loss": 0.8259, "step": 9160 }, { "epoch": 0.27, "learning_rate": 3.641320452794406e-05, "loss": 0.8378, "step": 9170 }, { "epoch": 0.27, "learning_rate": 3.639838795709121e-05, "loss": 0.9386, "step": 9180 }, { "epoch": 0.27, "learning_rate": 3.6383571386238366e-05, "loss": 0.8758, "step": 9190 }, { "epoch": 0.27, "learning_rate": 3.636875481538553e-05, "loss": 0.9089, "step": 9200 }, { "epoch": 0.27, "learning_rate": 3.635393824453269e-05, "loss": 0.8981, "step": 9210 }, { "epoch": 0.27, "learning_rate": 3.6339121673679845e-05, "loss": 1.0558, "step": 9220 }, { "epoch": 0.27, "learning_rate": 3.6324305102827e-05, "loss": 0.9055, "step": 9230 }, { "epoch": 0.27, "learning_rate": 3.630948853197416e-05, "loss": 0.9934, "step": 9240 }, { "epoch": 0.27, "learning_rate": 3.6294671961121317e-05, "loss": 0.9359, "step": 9250 }, { "epoch": 0.27, "learning_rate": 3.627985539026848e-05, "loss": 0.8586, "step": 9260 }, { "epoch": 0.27, "learning_rate": 3.626503881941564e-05, "loss": 0.9114, "step": 9270 }, { "epoch": 0.27, "learning_rate": 3.6250222248562795e-05, "loss": 0.8345, "step": 9280 }, { "epoch": 0.28, "learning_rate": 3.623540567770995e-05, "loss": 0.9753, "step": 9290 }, { "epoch": 0.28, "learning_rate": 3.622058910685711e-05, "loss": 0.8734, "step": 9300 }, { "epoch": 0.28, "learning_rate": 3.620577253600427e-05, "loss": 0.9912, "step": 9310 }, { "epoch": 0.28, "learning_rate": 3.619095596515143e-05, "loss": 0.9848, "step": 9320 }, { "epoch": 0.28, "learning_rate": 3.617613939429859e-05, "loss": 0.9692, "step": 9330 }, { "epoch": 0.28, "learning_rate": 3.6161322823445745e-05, "loss": 0.9421, "step": 9340 }, { "epoch": 0.28, "learning_rate": 3.61465062525929e-05, "loss": 0.9079, "step": 9350 }, { "epoch": 0.28, "learning_rate": 3.613168968174006e-05, "loss": 1.0107, "step": 9360 }, { "epoch": 0.28, "learning_rate": 3.611687311088722e-05, "loss": 0.902, "step": 9370 }, { "epoch": 0.28, "learning_rate": 3.6102056540034374e-05, "loss": 0.9594, "step": 9380 }, { "epoch": 0.28, "learning_rate": 3.608723996918154e-05, "loss": 0.7266, "step": 9390 }, { "epoch": 0.28, "learning_rate": 3.607242339832869e-05, "loss": 1.0683, "step": 9400 }, { "epoch": 0.28, "learning_rate": 3.6057606827475846e-05, "loss": 0.9873, "step": 9410 }, { "epoch": 0.28, "learning_rate": 3.604279025662301e-05, "loss": 0.9732, "step": 9420 }, { "epoch": 0.28, "learning_rate": 3.602797368577017e-05, "loss": 0.8969, "step": 9430 }, { "epoch": 0.28, "learning_rate": 3.6013157114917324e-05, "loss": 0.9062, "step": 9440 }, { "epoch": 0.28, "learning_rate": 3.599834054406449e-05, "loss": 1.0581, "step": 9450 }, { "epoch": 0.28, "learning_rate": 3.598352397321164e-05, "loss": 0.882, "step": 9460 }, { "epoch": 0.28, "learning_rate": 3.5968707402358796e-05, "loss": 0.755, "step": 9470 }, { "epoch": 0.28, "learning_rate": 3.595389083150596e-05, "loss": 0.8925, "step": 9480 }, { "epoch": 0.28, "learning_rate": 3.593907426065312e-05, "loss": 0.9063, "step": 9490 }, { "epoch": 0.28, "learning_rate": 3.5924257689800274e-05, "loss": 1.0206, "step": 9500 }, { "epoch": 0.28, "learning_rate": 3.590944111894743e-05, "loss": 0.8922, "step": 9510 }, { "epoch": 0.28, "learning_rate": 3.589462454809459e-05, "loss": 0.9306, "step": 9520 }, { "epoch": 0.28, "learning_rate": 3.5879807977241746e-05, "loss": 1.1392, "step": 9530 }, { "epoch": 0.28, "learning_rate": 3.586499140638891e-05, "loss": 0.9836, "step": 9540 }, { "epoch": 0.28, "learning_rate": 3.585017483553607e-05, "loss": 0.8159, "step": 9550 }, { "epoch": 0.28, "learning_rate": 3.5835358264683225e-05, "loss": 0.7914, "step": 9560 }, { "epoch": 0.28, "learning_rate": 3.582054169383038e-05, "loss": 0.8551, "step": 9570 }, { "epoch": 0.28, "learning_rate": 3.580572512297754e-05, "loss": 0.8242, "step": 9580 }, { "epoch": 0.28, "learning_rate": 3.5790908552124696e-05, "loss": 1.0202, "step": 9590 }, { "epoch": 0.28, "learning_rate": 3.577609198127186e-05, "loss": 0.9322, "step": 9600 }, { "epoch": 0.28, "learning_rate": 3.576127541041902e-05, "loss": 0.9435, "step": 9610 }, { "epoch": 0.29, "learning_rate": 3.5746458839566175e-05, "loss": 0.7511, "step": 9620 }, { "epoch": 0.29, "learning_rate": 3.573164226871333e-05, "loss": 0.9171, "step": 9630 }, { "epoch": 0.29, "learning_rate": 3.571682569786049e-05, "loss": 0.8806, "step": 9640 }, { "epoch": 0.29, "learning_rate": 3.5702009127007646e-05, "loss": 0.935, "step": 9650 }, { "epoch": 0.29, "learning_rate": 3.5687192556154804e-05, "loss": 1.114, "step": 9660 }, { "epoch": 0.29, "learning_rate": 3.567237598530197e-05, "loss": 0.8805, "step": 9670 }, { "epoch": 0.29, "learning_rate": 3.565755941444912e-05, "loss": 1.0953, "step": 9680 }, { "epoch": 0.29, "learning_rate": 3.5642742843596275e-05, "loss": 0.9546, "step": 9690 }, { "epoch": 0.29, "learning_rate": 3.562792627274344e-05, "loss": 1.0753, "step": 9700 }, { "epoch": 0.29, "learning_rate": 3.56131097018906e-05, "loss": 0.9717, "step": 9710 }, { "epoch": 0.29, "learning_rate": 3.5598293131037754e-05, "loss": 0.8966, "step": 9720 }, { "epoch": 0.29, "learning_rate": 3.558347656018491e-05, "loss": 1.0347, "step": 9730 }, { "epoch": 0.29, "learning_rate": 3.556865998933207e-05, "loss": 0.9045, "step": 9740 }, { "epoch": 0.29, "learning_rate": 3.5553843418479226e-05, "loss": 0.984, "step": 9750 }, { "epoch": 0.29, "learning_rate": 3.553902684762639e-05, "loss": 0.8778, "step": 9760 }, { "epoch": 0.29, "learning_rate": 3.552421027677355e-05, "loss": 0.9551, "step": 9770 }, { "epoch": 0.29, "learning_rate": 3.5509393705920704e-05, "loss": 0.9584, "step": 9780 }, { "epoch": 0.29, "learning_rate": 3.549457713506786e-05, "loss": 0.948, "step": 9790 }, { "epoch": 0.29, "learning_rate": 3.547976056421502e-05, "loss": 1.0751, "step": 9800 }, { "epoch": 0.29, "learning_rate": 3.5464943993362176e-05, "loss": 0.9411, "step": 9810 }, { "epoch": 0.29, "learning_rate": 3.545012742250934e-05, "loss": 0.8559, "step": 9820 }, { "epoch": 0.29, "learning_rate": 3.54353108516565e-05, "loss": 1.0811, "step": 9830 }, { "epoch": 0.29, "learning_rate": 3.5420494280803654e-05, "loss": 0.7798, "step": 9840 }, { "epoch": 0.29, "learning_rate": 3.540567770995081e-05, "loss": 0.9944, "step": 9850 }, { "epoch": 0.29, "learning_rate": 3.539086113909797e-05, "loss": 0.8336, "step": 9860 }, { "epoch": 0.29, "learning_rate": 3.5376044568245126e-05, "loss": 1.0403, "step": 9870 }, { "epoch": 0.29, "learning_rate": 3.536122799739229e-05, "loss": 0.8135, "step": 9880 }, { "epoch": 0.29, "learning_rate": 3.534641142653945e-05, "loss": 0.8156, "step": 9890 }, { "epoch": 0.29, "learning_rate": 3.53315948556866e-05, "loss": 0.95, "step": 9900 }, { "epoch": 0.29, "learning_rate": 3.531677828483376e-05, "loss": 0.8798, "step": 9910 }, { "epoch": 0.29, "learning_rate": 3.530196171398092e-05, "loss": 0.9948, "step": 9920 }, { "epoch": 0.29, "learning_rate": 3.5287145143128076e-05, "loss": 0.9372, "step": 9930 }, { "epoch": 0.29, "learning_rate": 3.527232857227523e-05, "loss": 0.9383, "step": 9940 }, { "epoch": 0.29, "learning_rate": 3.525751200142239e-05, "loss": 1.0386, "step": 9950 }, { "epoch": 0.3, "learning_rate": 3.524269543056955e-05, "loss": 0.9028, "step": 9960 }, { "epoch": 0.3, "learning_rate": 3.5227878859716705e-05, "loss": 1.0153, "step": 9970 }, { "epoch": 0.3, "learning_rate": 3.521306228886387e-05, "loss": 0.8812, "step": 9980 }, { "epoch": 0.3, "learning_rate": 3.5198245718011026e-05, "loss": 1.0119, "step": 9990 }, { "epoch": 0.3, "learning_rate": 3.5183429147158183e-05, "loss": 0.9994, "step": 10000 }, { "epoch": 0.3, "learning_rate": 3.516861257630534e-05, "loss": 0.869, "step": 10010 }, { "epoch": 0.3, "learning_rate": 3.51537960054525e-05, "loss": 0.8866, "step": 10020 }, { "epoch": 0.3, "learning_rate": 3.5138979434599655e-05, "loss": 0.8339, "step": 10030 }, { "epoch": 0.3, "learning_rate": 3.512416286374682e-05, "loss": 0.7662, "step": 10040 }, { "epoch": 0.3, "learning_rate": 3.5109346292893976e-05, "loss": 0.808, "step": 10050 }, { "epoch": 0.3, "learning_rate": 3.5094529722041134e-05, "loss": 0.8775, "step": 10060 }, { "epoch": 0.3, "learning_rate": 3.507971315118829e-05, "loss": 0.8669, "step": 10070 }, { "epoch": 0.3, "learning_rate": 3.506489658033545e-05, "loss": 0.8483, "step": 10080 }, { "epoch": 0.3, "learning_rate": 3.5050080009482605e-05, "loss": 1.1318, "step": 10090 }, { "epoch": 0.3, "learning_rate": 3.503526343862977e-05, "loss": 0.9721, "step": 10100 }, { "epoch": 0.3, "learning_rate": 3.5020446867776927e-05, "loss": 0.9693, "step": 10110 }, { "epoch": 0.3, "learning_rate": 3.500563029692408e-05, "loss": 0.9194, "step": 10120 }, { "epoch": 0.3, "learning_rate": 3.499081372607124e-05, "loss": 1.0312, "step": 10130 }, { "epoch": 0.3, "learning_rate": 3.49759971552184e-05, "loss": 0.9082, "step": 10140 }, { "epoch": 0.3, "learning_rate": 3.4961180584365556e-05, "loss": 0.969, "step": 10150 }, { "epoch": 0.3, "learning_rate": 3.494636401351272e-05, "loss": 0.9248, "step": 10160 }, { "epoch": 0.3, "learning_rate": 3.493154744265987e-05, "loss": 1.0493, "step": 10170 }, { "epoch": 0.3, "learning_rate": 3.491673087180703e-05, "loss": 0.8598, "step": 10180 }, { "epoch": 0.3, "learning_rate": 3.490191430095419e-05, "loss": 1.0603, "step": 10190 }, { "epoch": 0.3, "learning_rate": 3.488709773010135e-05, "loss": 1.0821, "step": 10200 }, { "epoch": 0.3, "learning_rate": 3.4872281159248506e-05, "loss": 0.9239, "step": 10210 }, { "epoch": 0.3, "learning_rate": 3.485746458839566e-05, "loss": 0.9464, "step": 10220 }, { "epoch": 0.3, "learning_rate": 3.484264801754282e-05, "loss": 0.9686, "step": 10230 }, { "epoch": 0.3, "learning_rate": 3.482783144668998e-05, "loss": 0.9362, "step": 10240 }, { "epoch": 0.3, "learning_rate": 3.4813014875837135e-05, "loss": 0.9344, "step": 10250 }, { "epoch": 0.3, "learning_rate": 3.47981983049843e-05, "loss": 1.0437, "step": 10260 }, { "epoch": 0.3, "learning_rate": 3.4783381734131456e-05, "loss": 0.9584, "step": 10270 }, { "epoch": 0.3, "learning_rate": 3.476856516327861e-05, "loss": 0.9738, "step": 10280 }, { "epoch": 0.3, "learning_rate": 3.475374859242577e-05, "loss": 0.8704, "step": 10290 }, { "epoch": 0.31, "learning_rate": 3.473893202157293e-05, "loss": 0.8944, "step": 10300 }, { "epoch": 0.31, "learning_rate": 3.4724115450720085e-05, "loss": 0.794, "step": 10310 }, { "epoch": 0.31, "learning_rate": 3.470929887986725e-05, "loss": 1.0281, "step": 10320 }, { "epoch": 0.31, "learning_rate": 3.4694482309014406e-05, "loss": 1.0935, "step": 10330 }, { "epoch": 0.31, "learning_rate": 3.4679665738161556e-05, "loss": 1.0274, "step": 10340 }, { "epoch": 0.31, "learning_rate": 3.466484916730872e-05, "loss": 1.0199, "step": 10350 }, { "epoch": 0.31, "learning_rate": 3.465003259645588e-05, "loss": 1.0275, "step": 10360 }, { "epoch": 0.31, "learning_rate": 3.4635216025603035e-05, "loss": 0.9856, "step": 10370 }, { "epoch": 0.31, "learning_rate": 3.46203994547502e-05, "loss": 1.0552, "step": 10380 }, { "epoch": 0.31, "learning_rate": 3.460558288389735e-05, "loss": 1.0258, "step": 10390 }, { "epoch": 0.31, "learning_rate": 3.459076631304451e-05, "loss": 0.7794, "step": 10400 }, { "epoch": 0.31, "learning_rate": 3.457594974219167e-05, "loss": 0.8032, "step": 10410 }, { "epoch": 0.31, "learning_rate": 3.456113317133883e-05, "loss": 0.9867, "step": 10420 }, { "epoch": 0.31, "learning_rate": 3.4546316600485985e-05, "loss": 0.939, "step": 10430 }, { "epoch": 0.31, "learning_rate": 3.453150002963315e-05, "loss": 0.9754, "step": 10440 }, { "epoch": 0.31, "learning_rate": 3.45166834587803e-05, "loss": 0.9524, "step": 10450 }, { "epoch": 0.31, "learning_rate": 3.450186688792746e-05, "loss": 0.8299, "step": 10460 }, { "epoch": 0.31, "learning_rate": 3.448705031707462e-05, "loss": 0.8595, "step": 10470 }, { "epoch": 0.31, "learning_rate": 3.447223374622178e-05, "loss": 0.7695, "step": 10480 }, { "epoch": 0.31, "learning_rate": 3.4457417175368935e-05, "loss": 0.774, "step": 10490 }, { "epoch": 0.31, "learning_rate": 3.444260060451609e-05, "loss": 0.8111, "step": 10500 }, { "epoch": 0.31, "learning_rate": 3.442778403366325e-05, "loss": 0.92, "step": 10510 }, { "epoch": 0.31, "learning_rate": 3.441296746281041e-05, "loss": 0.8905, "step": 10520 }, { "epoch": 0.31, "learning_rate": 3.4398150891957564e-05, "loss": 0.7788, "step": 10530 }, { "epoch": 0.31, "learning_rate": 3.438333432110473e-05, "loss": 0.7675, "step": 10540 }, { "epoch": 0.31, "learning_rate": 3.4368517750251885e-05, "loss": 0.9089, "step": 10550 }, { "epoch": 0.31, "learning_rate": 3.4353701179399036e-05, "loss": 0.984, "step": 10560 }, { "epoch": 0.31, "learning_rate": 3.43388846085462e-05, "loss": 0.9988, "step": 10570 }, { "epoch": 0.31, "learning_rate": 3.432406803769336e-05, "loss": 0.9656, "step": 10580 }, { "epoch": 0.31, "learning_rate": 3.4309251466840514e-05, "loss": 1.0129, "step": 10590 }, { "epoch": 0.31, "learning_rate": 3.429443489598768e-05, "loss": 0.9738, "step": 10600 }, { "epoch": 0.31, "learning_rate": 3.4279618325134836e-05, "loss": 0.9136, "step": 10610 }, { "epoch": 0.31, "learning_rate": 3.4264801754281986e-05, "loss": 0.9943, "step": 10620 }, { "epoch": 0.32, "learning_rate": 3.424998518342915e-05, "loss": 0.8487, "step": 10630 }, { "epoch": 0.32, "learning_rate": 3.423516861257631e-05, "loss": 0.8095, "step": 10640 }, { "epoch": 0.32, "learning_rate": 3.4220352041723465e-05, "loss": 1.0404, "step": 10650 }, { "epoch": 0.32, "learning_rate": 3.420553547087063e-05, "loss": 0.886, "step": 10660 }, { "epoch": 0.32, "learning_rate": 3.419071890001778e-05, "loss": 1.0033, "step": 10670 }, { "epoch": 0.32, "learning_rate": 3.4175902329164936e-05, "loss": 0.8785, "step": 10680 }, { "epoch": 0.32, "learning_rate": 3.41610857583121e-05, "loss": 0.9679, "step": 10690 }, { "epoch": 0.32, "learning_rate": 3.414626918745926e-05, "loss": 0.9523, "step": 10700 }, { "epoch": 0.32, "learning_rate": 3.4131452616606415e-05, "loss": 0.8169, "step": 10710 }, { "epoch": 0.32, "learning_rate": 3.411663604575357e-05, "loss": 1.0053, "step": 10720 }, { "epoch": 0.32, "learning_rate": 3.410181947490073e-05, "loss": 0.8736, "step": 10730 }, { "epoch": 0.32, "learning_rate": 3.4087002904047886e-05, "loss": 0.9577, "step": 10740 }, { "epoch": 0.32, "learning_rate": 3.407218633319505e-05, "loss": 0.8155, "step": 10750 }, { "epoch": 0.32, "learning_rate": 3.405736976234221e-05, "loss": 0.8046, "step": 10760 }, { "epoch": 0.32, "learning_rate": 3.4042553191489365e-05, "loss": 0.8739, "step": 10770 }, { "epoch": 0.32, "learning_rate": 3.402773662063652e-05, "loss": 0.8769, "step": 10780 }, { "epoch": 0.32, "learning_rate": 3.401292004978368e-05, "loss": 0.9993, "step": 10790 }, { "epoch": 0.32, "learning_rate": 3.399810347893084e-05, "loss": 0.9002, "step": 10800 }, { "epoch": 0.32, "learning_rate": 3.3983286908077994e-05, "loss": 1.0381, "step": 10810 }, { "epoch": 0.32, "learning_rate": 3.396847033722516e-05, "loss": 0.8421, "step": 10820 }, { "epoch": 0.32, "learning_rate": 3.3953653766372315e-05, "loss": 0.8218, "step": 10830 }, { "epoch": 0.32, "learning_rate": 3.3938837195519466e-05, "loss": 0.9304, "step": 10840 }, { "epoch": 0.32, "learning_rate": 3.392402062466663e-05, "loss": 0.8721, "step": 10850 }, { "epoch": 0.32, "learning_rate": 3.390920405381379e-05, "loss": 0.9466, "step": 10860 }, { "epoch": 0.32, "learning_rate": 3.3894387482960944e-05, "loss": 0.8224, "step": 10870 }, { "epoch": 0.32, "learning_rate": 3.387957091210811e-05, "loss": 0.8728, "step": 10880 }, { "epoch": 0.32, "learning_rate": 3.386475434125526e-05, "loss": 1.0159, "step": 10890 }, { "epoch": 0.32, "learning_rate": 3.3849937770402416e-05, "loss": 0.9289, "step": 10900 }, { "epoch": 0.32, "learning_rate": 3.383512119954958e-05, "loss": 0.8679, "step": 10910 }, { "epoch": 0.32, "learning_rate": 3.382030462869674e-05, "loss": 1.0493, "step": 10920 }, { "epoch": 0.32, "learning_rate": 3.3805488057843894e-05, "loss": 0.8017, "step": 10930 }, { "epoch": 0.32, "learning_rate": 3.379067148699105e-05, "loss": 0.7417, "step": 10940 }, { "epoch": 0.32, "learning_rate": 3.377585491613821e-05, "loss": 0.8948, "step": 10950 }, { "epoch": 0.32, "learning_rate": 3.3761038345285366e-05, "loss": 0.9638, "step": 10960 }, { "epoch": 0.33, "learning_rate": 3.374622177443253e-05, "loss": 0.9754, "step": 10970 }, { "epoch": 0.33, "learning_rate": 3.373140520357969e-05, "loss": 0.8849, "step": 10980 }, { "epoch": 0.33, "learning_rate": 3.3716588632726844e-05, "loss": 0.9619, "step": 10990 }, { "epoch": 0.33, "learning_rate": 3.3701772061874e-05, "loss": 1.0277, "step": 11000 }, { "epoch": 0.33, "learning_rate": 3.368695549102116e-05, "loss": 0.9002, "step": 11010 }, { "epoch": 0.33, "learning_rate": 3.3672138920168316e-05, "loss": 1.0167, "step": 11020 }, { "epoch": 0.33, "learning_rate": 3.365732234931548e-05, "loss": 1.1125, "step": 11030 }, { "epoch": 0.33, "learning_rate": 3.364250577846264e-05, "loss": 0.9814, "step": 11040 }, { "epoch": 0.33, "learning_rate": 3.3627689207609795e-05, "loss": 0.8165, "step": 11050 }, { "epoch": 0.33, "learning_rate": 3.3612872636756945e-05, "loss": 1.0137, "step": 11060 }, { "epoch": 0.33, "learning_rate": 3.359805606590411e-05, "loss": 0.9903, "step": 11070 }, { "epoch": 0.33, "learning_rate": 3.3583239495051266e-05, "loss": 0.9677, "step": 11080 }, { "epoch": 0.33, "learning_rate": 3.3568422924198423e-05, "loss": 0.9325, "step": 11090 }, { "epoch": 0.33, "learning_rate": 3.355360635334559e-05, "loss": 0.9825, "step": 11100 }, { "epoch": 0.33, "learning_rate": 3.353878978249274e-05, "loss": 0.8533, "step": 11110 }, { "epoch": 0.33, "learning_rate": 3.3523973211639895e-05, "loss": 1.0349, "step": 11120 }, { "epoch": 0.33, "learning_rate": 3.350915664078706e-05, "loss": 0.8865, "step": 11130 }, { "epoch": 0.33, "learning_rate": 3.3494340069934216e-05, "loss": 1.0031, "step": 11140 }, { "epoch": 0.33, "learning_rate": 3.3479523499081374e-05, "loss": 0.8539, "step": 11150 }, { "epoch": 0.33, "learning_rate": 3.346470692822853e-05, "loss": 0.8381, "step": 11160 }, { "epoch": 0.33, "learning_rate": 3.344989035737569e-05, "loss": 0.9522, "step": 11170 }, { "epoch": 0.33, "learning_rate": 3.3435073786522845e-05, "loss": 0.9666, "step": 11180 }, { "epoch": 0.33, "learning_rate": 3.342025721567001e-05, "loss": 0.819, "step": 11190 }, { "epoch": 0.33, "learning_rate": 3.3405440644817167e-05, "loss": 0.9633, "step": 11200 }, { "epoch": 0.33, "learning_rate": 3.3390624073964324e-05, "loss": 1.0502, "step": 11210 }, { "epoch": 0.33, "learning_rate": 3.337580750311148e-05, "loss": 0.909, "step": 11220 }, { "epoch": 0.33, "learning_rate": 3.336099093225864e-05, "loss": 1.0321, "step": 11230 }, { "epoch": 0.33, "learning_rate": 3.3346174361405796e-05, "loss": 1.063, "step": 11240 }, { "epoch": 0.33, "learning_rate": 3.333135779055296e-05, "loss": 0.9974, "step": 11250 }, { "epoch": 0.33, "learning_rate": 3.331654121970012e-05, "loss": 0.9965, "step": 11260 }, { "epoch": 0.33, "learning_rate": 3.3301724648847274e-05, "loss": 0.7699, "step": 11270 }, { "epoch": 0.33, "learning_rate": 3.328690807799443e-05, "loss": 0.9671, "step": 11280 }, { "epoch": 0.33, "learning_rate": 3.327209150714159e-05, "loss": 0.9856, "step": 11290 }, { "epoch": 0.33, "learning_rate": 3.3257274936288746e-05, "loss": 0.8764, "step": 11300 }, { "epoch": 0.34, "learning_rate": 3.324245836543591e-05, "loss": 0.9155, "step": 11310 }, { "epoch": 0.34, "learning_rate": 3.322764179458307e-05, "loss": 1.0399, "step": 11320 }, { "epoch": 0.34, "learning_rate": 3.321282522373022e-05, "loss": 0.8548, "step": 11330 }, { "epoch": 0.34, "learning_rate": 3.3198008652877375e-05, "loss": 1.0151, "step": 11340 }, { "epoch": 0.34, "learning_rate": 3.318319208202454e-05, "loss": 0.7865, "step": 11350 }, { "epoch": 0.34, "learning_rate": 3.3168375511171696e-05, "loss": 0.8713, "step": 11360 }, { "epoch": 0.34, "learning_rate": 3.315355894031885e-05, "loss": 1.0597, "step": 11370 }, { "epoch": 0.34, "learning_rate": 3.313874236946602e-05, "loss": 0.8394, "step": 11380 }, { "epoch": 0.34, "learning_rate": 3.312392579861317e-05, "loss": 0.9528, "step": 11390 }, { "epoch": 0.34, "learning_rate": 3.3109109227760325e-05, "loss": 0.8585, "step": 11400 }, { "epoch": 0.34, "learning_rate": 3.309429265690749e-05, "loss": 0.8442, "step": 11410 }, { "epoch": 0.34, "learning_rate": 3.3079476086054646e-05, "loss": 1.0275, "step": 11420 }, { "epoch": 0.34, "learning_rate": 3.30646595152018e-05, "loss": 1.0349, "step": 11430 }, { "epoch": 0.34, "learning_rate": 3.304984294434896e-05, "loss": 0.7873, "step": 11440 }, { "epoch": 0.34, "learning_rate": 3.303502637349612e-05, "loss": 0.9402, "step": 11450 }, { "epoch": 0.34, "learning_rate": 3.3020209802643275e-05, "loss": 0.8423, "step": 11460 }, { "epoch": 0.34, "learning_rate": 3.300539323179044e-05, "loss": 0.846, "step": 11470 }, { "epoch": 0.34, "learning_rate": 3.2990576660937596e-05, "loss": 0.9767, "step": 11480 }, { "epoch": 0.34, "learning_rate": 3.2975760090084753e-05, "loss": 0.8893, "step": 11490 }, { "epoch": 0.34, "learning_rate": 3.296094351923191e-05, "loss": 1.0934, "step": 11500 }, { "epoch": 0.34, "learning_rate": 3.294612694837907e-05, "loss": 0.8449, "step": 11510 }, { "epoch": 0.34, "learning_rate": 3.2931310377526225e-05, "loss": 0.9332, "step": 11520 }, { "epoch": 0.34, "learning_rate": 3.291649380667339e-05, "loss": 0.9488, "step": 11530 }, { "epoch": 0.34, "learning_rate": 3.2901677235820546e-05, "loss": 0.7982, "step": 11540 }, { "epoch": 0.34, "learning_rate": 3.2886860664967704e-05, "loss": 0.9872, "step": 11550 }, { "epoch": 0.34, "learning_rate": 3.287204409411486e-05, "loss": 0.8299, "step": 11560 }, { "epoch": 0.34, "learning_rate": 3.285722752326202e-05, "loss": 0.8706, "step": 11570 }, { "epoch": 0.34, "learning_rate": 3.2842410952409175e-05, "loss": 1.0442, "step": 11580 }, { "epoch": 0.34, "learning_rate": 3.282759438155634e-05, "loss": 0.9559, "step": 11590 }, { "epoch": 0.34, "learning_rate": 3.2812777810703497e-05, "loss": 0.8583, "step": 11600 }, { "epoch": 0.34, "learning_rate": 3.279796123985065e-05, "loss": 0.9606, "step": 11610 }, { "epoch": 0.34, "learning_rate": 3.2783144668997804e-05, "loss": 0.9162, "step": 11620 }, { "epoch": 0.34, "learning_rate": 3.276832809814497e-05, "loss": 1.0091, "step": 11630 }, { "epoch": 0.34, "learning_rate": 3.2753511527292125e-05, "loss": 0.9522, "step": 11640 }, { "epoch": 0.35, "learning_rate": 3.273869495643928e-05, "loss": 0.9861, "step": 11650 }, { "epoch": 0.35, "learning_rate": 3.272387838558644e-05, "loss": 0.9244, "step": 11660 }, { "epoch": 0.35, "learning_rate": 3.27090618147336e-05, "loss": 0.9683, "step": 11670 }, { "epoch": 0.35, "learning_rate": 3.2694245243880754e-05, "loss": 0.8495, "step": 11680 }, { "epoch": 0.35, "learning_rate": 3.267942867302792e-05, "loss": 0.876, "step": 11690 }, { "epoch": 0.35, "learning_rate": 3.2664612102175076e-05, "loss": 0.8485, "step": 11700 }, { "epoch": 0.35, "learning_rate": 3.264979553132223e-05, "loss": 0.9864, "step": 11710 }, { "epoch": 0.35, "learning_rate": 3.263497896046939e-05, "loss": 0.9987, "step": 11720 }, { "epoch": 0.35, "learning_rate": 3.262016238961655e-05, "loss": 0.8928, "step": 11730 }, { "epoch": 0.35, "learning_rate": 3.2605345818763705e-05, "loss": 0.8925, "step": 11740 }, { "epoch": 0.35, "learning_rate": 3.259052924791087e-05, "loss": 0.9792, "step": 11750 }, { "epoch": 0.35, "learning_rate": 3.2575712677058026e-05, "loss": 0.953, "step": 11760 }, { "epoch": 0.35, "learning_rate": 3.256089610620518e-05, "loss": 0.8147, "step": 11770 }, { "epoch": 0.35, "learning_rate": 3.254607953535234e-05, "loss": 0.8235, "step": 11780 }, { "epoch": 0.35, "learning_rate": 3.25312629644995e-05, "loss": 0.8287, "step": 11790 }, { "epoch": 0.35, "learning_rate": 3.2516446393646655e-05, "loss": 0.993, "step": 11800 }, { "epoch": 0.35, "learning_rate": 3.250162982279382e-05, "loss": 0.9838, "step": 11810 }, { "epoch": 0.35, "learning_rate": 3.2486813251940976e-05, "loss": 1.0423, "step": 11820 }, { "epoch": 0.35, "learning_rate": 3.2471996681088126e-05, "loss": 0.9387, "step": 11830 }, { "epoch": 0.35, "learning_rate": 3.245718011023529e-05, "loss": 0.893, "step": 11840 }, { "epoch": 0.35, "learning_rate": 3.244236353938245e-05, "loss": 0.9076, "step": 11850 }, { "epoch": 0.35, "learning_rate": 3.2427546968529605e-05, "loss": 0.8877, "step": 11860 }, { "epoch": 0.35, "learning_rate": 3.241273039767677e-05, "loss": 0.8991, "step": 11870 }, { "epoch": 0.35, "learning_rate": 3.239791382682392e-05, "loss": 0.9059, "step": 11880 }, { "epoch": 0.35, "learning_rate": 3.2383097255971077e-05, "loss": 0.9591, "step": 11890 }, { "epoch": 0.35, "learning_rate": 3.2368280685118234e-05, "loss": 1.0124, "step": 11900 }, { "epoch": 0.35, "learning_rate": 3.23534641142654e-05, "loss": 0.9362, "step": 11910 }, { "epoch": 0.35, "learning_rate": 3.2338647543412555e-05, "loss": 0.87, "step": 11920 }, { "epoch": 0.35, "learning_rate": 3.232383097255971e-05, "loss": 0.925, "step": 11930 }, { "epoch": 0.35, "learning_rate": 3.230901440170687e-05, "loss": 0.9574, "step": 11940 }, { "epoch": 0.35, "learning_rate": 3.229419783085403e-05, "loss": 0.936, "step": 11950 }, { "epoch": 0.35, "learning_rate": 3.2279381260001184e-05, "loss": 0.7628, "step": 11960 }, { "epoch": 0.35, "learning_rate": 3.226456468914835e-05, "loss": 0.9247, "step": 11970 }, { "epoch": 0.36, "learning_rate": 3.2249748118295505e-05, "loss": 1.0546, "step": 11980 }, { "epoch": 0.36, "learning_rate": 3.223493154744266e-05, "loss": 0.7548, "step": 11990 }, { "epoch": 0.36, "learning_rate": 3.222011497658982e-05, "loss": 0.8808, "step": 12000 }, { "epoch": 0.36, "learning_rate": 3.220529840573698e-05, "loss": 0.875, "step": 12010 }, { "epoch": 0.36, "learning_rate": 3.2190481834884134e-05, "loss": 0.9583, "step": 12020 }, { "epoch": 0.36, "learning_rate": 3.21756652640313e-05, "loss": 0.9905, "step": 12030 }, { "epoch": 0.36, "learning_rate": 3.2160848693178455e-05, "loss": 0.9819, "step": 12040 }, { "epoch": 0.36, "learning_rate": 3.2146032122325606e-05, "loss": 0.8693, "step": 12050 }, { "epoch": 0.36, "learning_rate": 3.213121555147277e-05, "loss": 0.851, "step": 12060 }, { "epoch": 0.36, "learning_rate": 3.211639898061993e-05, "loss": 0.9138, "step": 12070 }, { "epoch": 0.36, "learning_rate": 3.2101582409767084e-05, "loss": 0.95, "step": 12080 }, { "epoch": 0.36, "learning_rate": 3.208676583891425e-05, "loss": 1.0171, "step": 12090 }, { "epoch": 0.36, "learning_rate": 3.20719492680614e-05, "loss": 0.7627, "step": 12100 }, { "epoch": 0.36, "learning_rate": 3.2057132697208556e-05, "loss": 1.0109, "step": 12110 }, { "epoch": 0.36, "learning_rate": 3.204231612635572e-05, "loss": 0.9426, "step": 12120 }, { "epoch": 0.36, "learning_rate": 3.202749955550288e-05, "loss": 0.9074, "step": 12130 }, { "epoch": 0.36, "learning_rate": 3.2012682984650035e-05, "loss": 0.8692, "step": 12140 }, { "epoch": 0.36, "learning_rate": 3.199786641379719e-05, "loss": 0.8587, "step": 12150 }, { "epoch": 0.36, "learning_rate": 3.198304984294435e-05, "loss": 0.8806, "step": 12160 }, { "epoch": 0.36, "learning_rate": 3.1968233272091506e-05, "loss": 0.8254, "step": 12170 }, { "epoch": 0.36, "learning_rate": 3.1953416701238663e-05, "loss": 0.9484, "step": 12180 }, { "epoch": 0.36, "learning_rate": 3.193860013038583e-05, "loss": 1.0171, "step": 12190 }, { "epoch": 0.36, "learning_rate": 3.1923783559532985e-05, "loss": 0.8648, "step": 12200 }, { "epoch": 0.36, "learning_rate": 3.190896698868014e-05, "loss": 0.9771, "step": 12210 }, { "epoch": 0.36, "learning_rate": 3.18941504178273e-05, "loss": 0.9566, "step": 12220 }, { "epoch": 0.36, "learning_rate": 3.1879333846974456e-05, "loss": 1.0407, "step": 12230 }, { "epoch": 0.36, "learning_rate": 3.1864517276121614e-05, "loss": 0.8478, "step": 12240 }, { "epoch": 0.36, "learning_rate": 3.184970070526878e-05, "loss": 0.9864, "step": 12250 }, { "epoch": 0.36, "learning_rate": 3.1834884134415935e-05, "loss": 0.9417, "step": 12260 }, { "epoch": 0.36, "learning_rate": 3.1820067563563085e-05, "loss": 0.8376, "step": 12270 }, { "epoch": 0.36, "learning_rate": 3.180525099271025e-05, "loss": 0.8979, "step": 12280 }, { "epoch": 0.36, "learning_rate": 3.1790434421857407e-05, "loss": 0.9631, "step": 12290 }, { "epoch": 0.36, "learning_rate": 3.1775617851004564e-05, "loss": 0.9826, "step": 12300 }, { "epoch": 0.36, "learning_rate": 3.176080128015173e-05, "loss": 0.8654, "step": 12310 }, { "epoch": 0.37, "learning_rate": 3.174598470929888e-05, "loss": 0.8159, "step": 12320 }, { "epoch": 0.37, "learning_rate": 3.1731168138446035e-05, "loss": 1.0192, "step": 12330 }, { "epoch": 0.37, "learning_rate": 3.17163515675932e-05, "loss": 1.0343, "step": 12340 }, { "epoch": 0.37, "learning_rate": 3.170153499674036e-05, "loss": 0.8398, "step": 12350 }, { "epoch": 0.37, "learning_rate": 3.1686718425887514e-05, "loss": 0.8501, "step": 12360 }, { "epoch": 0.37, "learning_rate": 3.167190185503468e-05, "loss": 0.8306, "step": 12370 }, { "epoch": 0.37, "learning_rate": 3.165708528418183e-05, "loss": 0.8984, "step": 12380 }, { "epoch": 0.37, "learning_rate": 3.1642268713328986e-05, "loss": 0.8919, "step": 12390 }, { "epoch": 0.37, "learning_rate": 3.162745214247615e-05, "loss": 0.8088, "step": 12400 }, { "epoch": 0.37, "learning_rate": 3.161263557162331e-05, "loss": 0.9202, "step": 12410 }, { "epoch": 0.37, "learning_rate": 3.1597819000770464e-05, "loss": 0.9711, "step": 12420 }, { "epoch": 0.37, "learning_rate": 3.158300242991762e-05, "loss": 0.8329, "step": 12430 }, { "epoch": 0.37, "learning_rate": 3.156818585906478e-05, "loss": 1.0967, "step": 12440 }, { "epoch": 0.37, "learning_rate": 3.1553369288211936e-05, "loss": 0.9618, "step": 12450 }, { "epoch": 0.37, "learning_rate": 3.153855271735909e-05, "loss": 0.9283, "step": 12460 }, { "epoch": 0.37, "learning_rate": 3.152373614650626e-05, "loss": 1.1064, "step": 12470 }, { "epoch": 0.37, "learning_rate": 3.1508919575653414e-05, "loss": 1.0143, "step": 12480 }, { "epoch": 0.37, "learning_rate": 3.1494103004800565e-05, "loss": 0.8655, "step": 12490 }, { "epoch": 0.37, "learning_rate": 3.147928643394773e-05, "loss": 0.965, "step": 12500 }, { "epoch": 0.37, "learning_rate": 3.1464469863094886e-05, "loss": 0.8912, "step": 12510 }, { "epoch": 0.37, "learning_rate": 3.144965329224204e-05, "loss": 0.8465, "step": 12520 }, { "epoch": 0.37, "learning_rate": 3.143483672138921e-05, "loss": 0.9123, "step": 12530 }, { "epoch": 0.37, "learning_rate": 3.1420020150536364e-05, "loss": 0.8008, "step": 12540 }, { "epoch": 0.37, "learning_rate": 3.1405203579683515e-05, "loss": 0.9382, "step": 12550 }, { "epoch": 0.37, "learning_rate": 3.139038700883068e-05, "loss": 0.8803, "step": 12560 }, { "epoch": 0.37, "learning_rate": 3.1375570437977836e-05, "loss": 1.0119, "step": 12570 }, { "epoch": 0.37, "learning_rate": 3.1360753867124993e-05, "loss": 0.9621, "step": 12580 }, { "epoch": 0.37, "learning_rate": 3.134593729627216e-05, "loss": 0.8976, "step": 12590 }, { "epoch": 0.37, "learning_rate": 3.133112072541931e-05, "loss": 0.9723, "step": 12600 }, { "epoch": 0.37, "learning_rate": 3.1316304154566465e-05, "loss": 0.9753, "step": 12610 }, { "epoch": 0.37, "learning_rate": 3.130148758371363e-05, "loss": 0.8936, "step": 12620 }, { "epoch": 0.37, "learning_rate": 3.1286671012860786e-05, "loss": 1.0737, "step": 12630 }, { "epoch": 0.37, "learning_rate": 3.1271854442007944e-05, "loss": 0.8781, "step": 12640 }, { "epoch": 0.37, "learning_rate": 3.12570378711551e-05, "loss": 0.8997, "step": 12650 }, { "epoch": 0.38, "learning_rate": 3.124222130030226e-05, "loss": 0.9192, "step": 12660 }, { "epoch": 0.38, "learning_rate": 3.1227404729449415e-05, "loss": 0.8524, "step": 12670 }, { "epoch": 0.38, "learning_rate": 3.121258815859658e-05, "loss": 1.0335, "step": 12680 }, { "epoch": 0.38, "learning_rate": 3.1197771587743737e-05, "loss": 0.9639, "step": 12690 }, { "epoch": 0.38, "learning_rate": 3.1182955016890894e-05, "loss": 0.818, "step": 12700 }, { "epoch": 0.38, "learning_rate": 3.116813844603805e-05, "loss": 0.9447, "step": 12710 }, { "epoch": 0.38, "learning_rate": 3.115332187518521e-05, "loss": 0.9139, "step": 12720 }, { "epoch": 0.38, "learning_rate": 3.1138505304332365e-05, "loss": 1.004, "step": 12730 }, { "epoch": 0.38, "learning_rate": 3.112368873347952e-05, "loss": 0.8301, "step": 12740 }, { "epoch": 0.38, "learning_rate": 3.110887216262669e-05, "loss": 1.0328, "step": 12750 }, { "epoch": 0.38, "learning_rate": 3.1094055591773844e-05, "loss": 0.9522, "step": 12760 }, { "epoch": 0.38, "learning_rate": 3.1079239020920994e-05, "loss": 0.8784, "step": 12770 }, { "epoch": 0.38, "learning_rate": 3.106442245006816e-05, "loss": 0.9544, "step": 12780 }, { "epoch": 0.38, "learning_rate": 3.1049605879215316e-05, "loss": 0.9545, "step": 12790 }, { "epoch": 0.38, "learning_rate": 3.103478930836247e-05, "loss": 0.8227, "step": 12800 }, { "epoch": 0.38, "learning_rate": 3.101997273750964e-05, "loss": 1.0182, "step": 12810 }, { "epoch": 0.38, "learning_rate": 3.100515616665679e-05, "loss": 0.9353, "step": 12820 }, { "epoch": 0.38, "learning_rate": 3.0990339595803945e-05, "loss": 1.0409, "step": 12830 }, { "epoch": 0.38, "learning_rate": 3.097552302495111e-05, "loss": 0.9231, "step": 12840 }, { "epoch": 0.38, "learning_rate": 3.0960706454098266e-05, "loss": 1.0812, "step": 12850 }, { "epoch": 0.38, "learning_rate": 3.094588988324542e-05, "loss": 0.8666, "step": 12860 }, { "epoch": 0.38, "learning_rate": 3.093107331239258e-05, "loss": 0.8488, "step": 12870 }, { "epoch": 0.38, "learning_rate": 3.091625674153974e-05, "loss": 0.8775, "step": 12880 }, { "epoch": 0.38, "learning_rate": 3.0901440170686895e-05, "loss": 0.8833, "step": 12890 }, { "epoch": 0.38, "learning_rate": 3.088662359983406e-05, "loss": 0.8949, "step": 12900 }, { "epoch": 0.38, "learning_rate": 3.0871807028981216e-05, "loss": 0.9946, "step": 12910 }, { "epoch": 0.38, "learning_rate": 3.085699045812837e-05, "loss": 0.9376, "step": 12920 }, { "epoch": 0.38, "learning_rate": 3.084217388727553e-05, "loss": 0.8217, "step": 12930 }, { "epoch": 0.38, "learning_rate": 3.082735731642269e-05, "loss": 0.8726, "step": 12940 }, { "epoch": 0.38, "learning_rate": 3.0812540745569845e-05, "loss": 0.912, "step": 12950 }, { "epoch": 0.38, "learning_rate": 3.079772417471701e-05, "loss": 0.985, "step": 12960 }, { "epoch": 0.38, "learning_rate": 3.0782907603864166e-05, "loss": 0.8384, "step": 12970 }, { "epoch": 0.38, "learning_rate": 3.076809103301132e-05, "loss": 0.8627, "step": 12980 }, { "epoch": 0.38, "learning_rate": 3.0753274462158474e-05, "loss": 0.8514, "step": 12990 }, { "epoch": 0.39, "learning_rate": 3.073845789130564e-05, "loss": 0.8775, "step": 13000 }, { "epoch": 0.39, "learning_rate": 3.0723641320452795e-05, "loss": 0.8567, "step": 13010 }, { "epoch": 0.39, "learning_rate": 3.070882474959995e-05, "loss": 0.7999, "step": 13020 }, { "epoch": 0.39, "learning_rate": 3.0694008178747116e-05, "loss": 0.8365, "step": 13030 }, { "epoch": 0.39, "learning_rate": 3.067919160789427e-05, "loss": 0.83, "step": 13040 }, { "epoch": 0.39, "learning_rate": 3.0664375037041424e-05, "loss": 0.9431, "step": 13050 }, { "epoch": 0.39, "learning_rate": 3.064955846618859e-05, "loss": 0.838, "step": 13060 }, { "epoch": 0.39, "learning_rate": 3.0634741895335745e-05, "loss": 1.0418, "step": 13070 }, { "epoch": 0.39, "learning_rate": 3.06199253244829e-05, "loss": 0.7926, "step": 13080 }, { "epoch": 0.39, "learning_rate": 3.060510875363006e-05, "loss": 0.8476, "step": 13090 }, { "epoch": 0.39, "learning_rate": 3.059029218277722e-05, "loss": 0.9605, "step": 13100 }, { "epoch": 0.39, "learning_rate": 3.0575475611924374e-05, "loss": 0.8607, "step": 13110 }, { "epoch": 0.39, "learning_rate": 3.056065904107154e-05, "loss": 1.0471, "step": 13120 }, { "epoch": 0.39, "learning_rate": 3.0545842470218695e-05, "loss": 0.8444, "step": 13130 }, { "epoch": 0.39, "learning_rate": 3.053102589936585e-05, "loss": 0.9976, "step": 13140 }, { "epoch": 0.39, "learning_rate": 3.0516209328513013e-05, "loss": 0.9886, "step": 13150 }, { "epoch": 0.39, "learning_rate": 3.050139275766017e-05, "loss": 0.8636, "step": 13160 }, { "epoch": 0.39, "learning_rate": 3.0486576186807324e-05, "loss": 0.8408, "step": 13170 }, { "epoch": 0.39, "learning_rate": 3.047175961595449e-05, "loss": 0.8352, "step": 13180 }, { "epoch": 0.39, "learning_rate": 3.0456943045101642e-05, "loss": 1.0622, "step": 13190 }, { "epoch": 0.39, "learning_rate": 3.04421264742488e-05, "loss": 0.9629, "step": 13200 }, { "epoch": 0.39, "learning_rate": 3.0427309903395963e-05, "loss": 0.9512, "step": 13210 }, { "epoch": 0.39, "learning_rate": 3.0412493332543117e-05, "loss": 0.8721, "step": 13220 }, { "epoch": 0.39, "learning_rate": 3.0397676761690274e-05, "loss": 0.9748, "step": 13230 }, { "epoch": 0.39, "learning_rate": 3.0382860190837435e-05, "loss": 1.0791, "step": 13240 }, { "epoch": 0.39, "learning_rate": 3.0368043619984592e-05, "loss": 0.8108, "step": 13250 }, { "epoch": 0.39, "learning_rate": 3.035322704913175e-05, "loss": 0.9094, "step": 13260 }, { "epoch": 0.39, "learning_rate": 3.0338410478278907e-05, "loss": 1.0127, "step": 13270 }, { "epoch": 0.39, "learning_rate": 3.0323593907426067e-05, "loss": 0.8709, "step": 13280 }, { "epoch": 0.39, "learning_rate": 3.0308777336573225e-05, "loss": 0.9389, "step": 13290 }, { "epoch": 0.39, "learning_rate": 3.0293960765720382e-05, "loss": 0.9801, "step": 13300 }, { "epoch": 0.39, "learning_rate": 3.0279144194867543e-05, "loss": 1.082, "step": 13310 }, { "epoch": 0.39, "learning_rate": 3.02643276240147e-05, "loss": 0.8681, "step": 13320 }, { "epoch": 0.4, "learning_rate": 3.0249511053161857e-05, "loss": 0.8893, "step": 13330 }, { "epoch": 0.4, "learning_rate": 3.0234694482309018e-05, "loss": 0.9865, "step": 13340 }, { "epoch": 0.4, "learning_rate": 3.0219877911456175e-05, "loss": 0.8258, "step": 13350 }, { "epoch": 0.4, "learning_rate": 3.020506134060333e-05, "loss": 0.9767, "step": 13360 }, { "epoch": 0.4, "learning_rate": 3.0190244769750493e-05, "loss": 0.8932, "step": 13370 }, { "epoch": 0.4, "learning_rate": 3.017542819889765e-05, "loss": 0.8748, "step": 13380 }, { "epoch": 0.4, "learning_rate": 3.0160611628044804e-05, "loss": 0.9297, "step": 13390 }, { "epoch": 0.4, "learning_rate": 3.0145795057191968e-05, "loss": 0.7237, "step": 13400 }, { "epoch": 0.4, "learning_rate": 3.013097848633912e-05, "loss": 1.0156, "step": 13410 }, { "epoch": 0.4, "learning_rate": 3.011616191548628e-05, "loss": 0.8948, "step": 13420 }, { "epoch": 0.4, "learning_rate": 3.0101345344633443e-05, "loss": 0.9977, "step": 13430 }, { "epoch": 0.4, "learning_rate": 3.0086528773780597e-05, "loss": 0.9071, "step": 13440 }, { "epoch": 0.4, "learning_rate": 3.0071712202927754e-05, "loss": 0.9769, "step": 13450 }, { "epoch": 0.4, "learning_rate": 3.0056895632074915e-05, "loss": 0.9851, "step": 13460 }, { "epoch": 0.4, "learning_rate": 3.0042079061222072e-05, "loss": 0.9729, "step": 13470 }, { "epoch": 0.4, "learning_rate": 3.002726249036923e-05, "loss": 0.8864, "step": 13480 }, { "epoch": 0.4, "learning_rate": 3.001244591951639e-05, "loss": 1.0173, "step": 13490 }, { "epoch": 0.4, "learning_rate": 2.9997629348663547e-05, "loss": 0.8762, "step": 13500 }, { "epoch": 0.4, "learning_rate": 2.9982812777810704e-05, "loss": 1.0338, "step": 13510 }, { "epoch": 0.4, "learning_rate": 2.9967996206957865e-05, "loss": 0.869, "step": 13520 }, { "epoch": 0.4, "learning_rate": 2.9953179636105022e-05, "loss": 0.978, "step": 13530 }, { "epoch": 0.4, "learning_rate": 2.993836306525218e-05, "loss": 0.8195, "step": 13540 }, { "epoch": 0.4, "learning_rate": 2.9923546494399336e-05, "loss": 1.0383, "step": 13550 }, { "epoch": 0.4, "learning_rate": 2.9908729923546497e-05, "loss": 0.8594, "step": 13560 }, { "epoch": 0.4, "learning_rate": 2.9893913352693654e-05, "loss": 0.9188, "step": 13570 }, { "epoch": 0.4, "learning_rate": 2.9879096781840808e-05, "loss": 0.9374, "step": 13580 }, { "epoch": 0.4, "learning_rate": 2.9864280210987972e-05, "loss": 0.7712, "step": 13590 }, { "epoch": 0.4, "learning_rate": 2.984946364013513e-05, "loss": 1.0269, "step": 13600 }, { "epoch": 0.4, "learning_rate": 2.9834647069282283e-05, "loss": 0.9598, "step": 13610 }, { "epoch": 0.4, "learning_rate": 2.9819830498429447e-05, "loss": 0.908, "step": 13620 }, { "epoch": 0.4, "learning_rate": 2.98050139275766e-05, "loss": 0.8749, "step": 13630 }, { "epoch": 0.4, "learning_rate": 2.9790197356723758e-05, "loss": 0.9237, "step": 13640 }, { "epoch": 0.4, "learning_rate": 2.9775380785870922e-05, "loss": 0.9706, "step": 13650 }, { "epoch": 0.4, "learning_rate": 2.9760564215018076e-05, "loss": 0.9432, "step": 13660 }, { "epoch": 0.41, "learning_rate": 2.9745747644165233e-05, "loss": 0.994, "step": 13670 }, { "epoch": 0.41, "learning_rate": 2.9730931073312397e-05, "loss": 0.981, "step": 13680 }, { "epoch": 0.41, "learning_rate": 2.971611450245955e-05, "loss": 0.8332, "step": 13690 }, { "epoch": 0.41, "learning_rate": 2.970129793160671e-05, "loss": 0.8669, "step": 13700 }, { "epoch": 0.41, "learning_rate": 2.968648136075387e-05, "loss": 0.8138, "step": 13710 }, { "epoch": 0.41, "learning_rate": 2.9671664789901026e-05, "loss": 0.9631, "step": 13720 }, { "epoch": 0.41, "learning_rate": 2.9656848219048184e-05, "loss": 0.8868, "step": 13730 }, { "epoch": 0.41, "learning_rate": 2.9642031648195344e-05, "loss": 0.8991, "step": 13740 }, { "epoch": 0.41, "learning_rate": 2.96272150773425e-05, "loss": 0.8355, "step": 13750 }, { "epoch": 0.41, "learning_rate": 2.961239850648966e-05, "loss": 0.8919, "step": 13760 }, { "epoch": 0.41, "learning_rate": 2.959758193563682e-05, "loss": 0.8202, "step": 13770 }, { "epoch": 0.41, "learning_rate": 2.9582765364783976e-05, "loss": 1.0218, "step": 13780 }, { "epoch": 0.41, "learning_rate": 2.9567948793931134e-05, "loss": 0.8098, "step": 13790 }, { "epoch": 0.41, "learning_rate": 2.9553132223078294e-05, "loss": 0.9244, "step": 13800 }, { "epoch": 0.41, "learning_rate": 2.953831565222545e-05, "loss": 0.9319, "step": 13810 }, { "epoch": 0.41, "learning_rate": 2.952349908137261e-05, "loss": 0.8676, "step": 13820 }, { "epoch": 0.41, "learning_rate": 2.9508682510519763e-05, "loss": 1.0167, "step": 13830 }, { "epoch": 0.41, "learning_rate": 2.9493865939666927e-05, "loss": 0.8974, "step": 13840 }, { "epoch": 0.41, "learning_rate": 2.9479049368814084e-05, "loss": 0.9078, "step": 13850 }, { "epoch": 0.41, "learning_rate": 2.9464232797961238e-05, "loss": 0.8083, "step": 13860 }, { "epoch": 0.41, "learning_rate": 2.9449416227108402e-05, "loss": 0.83, "step": 13870 }, { "epoch": 0.41, "learning_rate": 2.9434599656255556e-05, "loss": 0.9206, "step": 13880 }, { "epoch": 0.41, "learning_rate": 2.9419783085402713e-05, "loss": 0.945, "step": 13890 }, { "epoch": 0.41, "learning_rate": 2.9404966514549877e-05, "loss": 0.9216, "step": 13900 }, { "epoch": 0.41, "learning_rate": 2.939014994369703e-05, "loss": 0.7943, "step": 13910 }, { "epoch": 0.41, "learning_rate": 2.9375333372844188e-05, "loss": 0.9959, "step": 13920 }, { "epoch": 0.41, "learning_rate": 2.936051680199135e-05, "loss": 0.971, "step": 13930 }, { "epoch": 0.41, "learning_rate": 2.9345700231138506e-05, "loss": 0.9838, "step": 13940 }, { "epoch": 0.41, "learning_rate": 2.9330883660285663e-05, "loss": 0.8578, "step": 13950 }, { "epoch": 0.41, "learning_rate": 2.9316067089432824e-05, "loss": 0.9311, "step": 13960 }, { "epoch": 0.41, "learning_rate": 2.930125051857998e-05, "loss": 0.8394, "step": 13970 }, { "epoch": 0.41, "learning_rate": 2.9286433947727138e-05, "loss": 0.88, "step": 13980 }, { "epoch": 0.41, "learning_rate": 2.92716173768743e-05, "loss": 0.8746, "step": 13990 }, { "epoch": 0.41, "learning_rate": 2.9256800806021456e-05, "loss": 0.8464, "step": 14000 }, { "epoch": 0.42, "learning_rate": 2.9241984235168613e-05, "loss": 0.8344, "step": 14010 }, { "epoch": 0.42, "learning_rate": 2.9227167664315774e-05, "loss": 0.8508, "step": 14020 }, { "epoch": 0.42, "learning_rate": 2.921235109346293e-05, "loss": 0.9861, "step": 14030 }, { "epoch": 0.42, "learning_rate": 2.9197534522610088e-05, "loss": 0.9741, "step": 14040 }, { "epoch": 0.42, "learning_rate": 2.918271795175725e-05, "loss": 0.8541, "step": 14050 }, { "epoch": 0.42, "learning_rate": 2.9167901380904406e-05, "loss": 1.1122, "step": 14060 }, { "epoch": 0.42, "learning_rate": 2.9153084810051563e-05, "loss": 0.8881, "step": 14070 }, { "epoch": 0.42, "learning_rate": 2.9138268239198724e-05, "loss": 0.8445, "step": 14080 }, { "epoch": 0.42, "learning_rate": 2.912345166834588e-05, "loss": 0.7931, "step": 14090 }, { "epoch": 0.42, "learning_rate": 2.9108635097493035e-05, "loss": 0.8219, "step": 14100 }, { "epoch": 0.42, "learning_rate": 2.9093818526640192e-05, "loss": 0.9491, "step": 14110 }, { "epoch": 0.42, "learning_rate": 2.9079001955787356e-05, "loss": 1.0196, "step": 14120 }, { "epoch": 0.42, "learning_rate": 2.906418538493451e-05, "loss": 1.0039, "step": 14130 }, { "epoch": 0.42, "learning_rate": 2.9049368814081667e-05, "loss": 0.9614, "step": 14140 }, { "epoch": 0.42, "learning_rate": 2.903455224322883e-05, "loss": 1.0026, "step": 14150 }, { "epoch": 0.42, "learning_rate": 2.9019735672375985e-05, "loss": 0.9555, "step": 14160 }, { "epoch": 0.42, "learning_rate": 2.9004919101523142e-05, "loss": 0.9415, "step": 14170 }, { "epoch": 0.42, "learning_rate": 2.8990102530670303e-05, "loss": 0.6998, "step": 14180 }, { "epoch": 0.42, "learning_rate": 2.897528595981746e-05, "loss": 0.9666, "step": 14190 }, { "epoch": 0.42, "learning_rate": 2.8960469388964618e-05, "loss": 0.884, "step": 14200 }, { "epoch": 0.42, "learning_rate": 2.8945652818111778e-05, "loss": 0.8459, "step": 14210 }, { "epoch": 0.42, "learning_rate": 2.8930836247258935e-05, "loss": 0.8909, "step": 14220 }, { "epoch": 0.42, "learning_rate": 2.8916019676406093e-05, "loss": 0.9377, "step": 14230 }, { "epoch": 0.42, "learning_rate": 2.8901203105553253e-05, "loss": 0.9647, "step": 14240 }, { "epoch": 0.42, "learning_rate": 2.888638653470041e-05, "loss": 0.9309, "step": 14250 }, { "epoch": 0.42, "learning_rate": 2.8871569963847568e-05, "loss": 1.0228, "step": 14260 }, { "epoch": 0.42, "learning_rate": 2.8856753392994728e-05, "loss": 0.8874, "step": 14270 }, { "epoch": 0.42, "learning_rate": 2.8841936822141886e-05, "loss": 0.845, "step": 14280 }, { "epoch": 0.42, "learning_rate": 2.8827120251289043e-05, "loss": 0.8833, "step": 14290 }, { "epoch": 0.42, "learning_rate": 2.8812303680436203e-05, "loss": 1.0485, "step": 14300 }, { "epoch": 0.42, "learning_rate": 2.879748710958336e-05, "loss": 0.859, "step": 14310 }, { "epoch": 0.42, "learning_rate": 2.8782670538730518e-05, "loss": 0.8155, "step": 14320 }, { "epoch": 0.42, "learning_rate": 2.876785396787768e-05, "loss": 0.9114, "step": 14330 }, { "epoch": 0.42, "learning_rate": 2.8753037397024836e-05, "loss": 0.8955, "step": 14340 }, { "epoch": 0.43, "learning_rate": 2.873822082617199e-05, "loss": 0.8879, "step": 14350 }, { "epoch": 0.43, "learning_rate": 2.8723404255319154e-05, "loss": 0.9131, "step": 14360 }, { "epoch": 0.43, "learning_rate": 2.870858768446631e-05, "loss": 0.925, "step": 14370 }, { "epoch": 0.43, "learning_rate": 2.8693771113613465e-05, "loss": 0.8615, "step": 14380 }, { "epoch": 0.43, "learning_rate": 2.8678954542760622e-05, "loss": 0.9005, "step": 14390 }, { "epoch": 0.43, "learning_rate": 2.8664137971907782e-05, "loss": 1.0335, "step": 14400 }, { "epoch": 0.43, "learning_rate": 2.864932140105494e-05, "loss": 0.8189, "step": 14410 }, { "epoch": 0.43, "learning_rate": 2.8634504830202097e-05, "loss": 0.8096, "step": 14420 }, { "epoch": 0.43, "learning_rate": 2.8619688259349258e-05, "loss": 0.8743, "step": 14430 }, { "epoch": 0.43, "learning_rate": 2.8604871688496415e-05, "loss": 0.7303, "step": 14440 }, { "epoch": 0.43, "learning_rate": 2.8590055117643572e-05, "loss": 0.9973, "step": 14450 }, { "epoch": 0.43, "learning_rate": 2.8575238546790733e-05, "loss": 0.7832, "step": 14460 }, { "epoch": 0.43, "learning_rate": 2.856042197593789e-05, "loss": 0.9041, "step": 14470 }, { "epoch": 0.43, "learning_rate": 2.8545605405085047e-05, "loss": 0.8215, "step": 14480 }, { "epoch": 0.43, "learning_rate": 2.8530788834232208e-05, "loss": 0.9953, "step": 14490 }, { "epoch": 0.43, "learning_rate": 2.8515972263379365e-05, "loss": 0.9047, "step": 14500 }, { "epoch": 0.43, "learning_rate": 2.8501155692526522e-05, "loss": 0.9655, "step": 14510 }, { "epoch": 0.43, "learning_rate": 2.8486339121673683e-05, "loss": 0.8112, "step": 14520 }, { "epoch": 0.43, "learning_rate": 2.847152255082084e-05, "loss": 0.9085, "step": 14530 }, { "epoch": 0.43, "learning_rate": 2.8456705979967997e-05, "loss": 0.8962, "step": 14540 }, { "epoch": 0.43, "learning_rate": 2.8441889409115158e-05, "loss": 0.8624, "step": 14550 }, { "epoch": 0.43, "learning_rate": 2.8427072838262315e-05, "loss": 0.8384, "step": 14560 }, { "epoch": 0.43, "learning_rate": 2.841225626740947e-05, "loss": 0.8974, "step": 14570 }, { "epoch": 0.43, "learning_rate": 2.8397439696556633e-05, "loss": 0.9387, "step": 14580 }, { "epoch": 0.43, "learning_rate": 2.838262312570379e-05, "loss": 0.9768, "step": 14590 }, { "epoch": 0.43, "learning_rate": 2.8367806554850944e-05, "loss": 0.9538, "step": 14600 }, { "epoch": 0.43, "learning_rate": 2.8352989983998108e-05, "loss": 0.8618, "step": 14610 }, { "epoch": 0.43, "learning_rate": 2.8338173413145265e-05, "loss": 0.9872, "step": 14620 }, { "epoch": 0.43, "learning_rate": 2.832335684229242e-05, "loss": 0.9197, "step": 14630 }, { "epoch": 0.43, "learning_rate": 2.8308540271439583e-05, "loss": 0.9187, "step": 14640 }, { "epoch": 0.43, "learning_rate": 2.8293723700586737e-05, "loss": 0.9396, "step": 14650 }, { "epoch": 0.43, "learning_rate": 2.8278907129733894e-05, "loss": 0.8117, "step": 14660 }, { "epoch": 0.43, "learning_rate": 2.826409055888105e-05, "loss": 0.834, "step": 14670 }, { "epoch": 0.44, "learning_rate": 2.8249273988028212e-05, "loss": 0.8044, "step": 14680 }, { "epoch": 0.44, "learning_rate": 2.823445741717537e-05, "loss": 0.9714, "step": 14690 }, { "epoch": 0.44, "learning_rate": 2.8219640846322527e-05, "loss": 0.7972, "step": 14700 }, { "epoch": 0.44, "learning_rate": 2.8204824275469687e-05, "loss": 0.935, "step": 14710 }, { "epoch": 0.44, "learning_rate": 2.8190007704616844e-05, "loss": 1.164, "step": 14720 }, { "epoch": 0.44, "learning_rate": 2.8175191133764e-05, "loss": 0.8783, "step": 14730 }, { "epoch": 0.44, "learning_rate": 2.8160374562911162e-05, "loss": 0.8075, "step": 14740 }, { "epoch": 0.44, "learning_rate": 2.814555799205832e-05, "loss": 0.8762, "step": 14750 }, { "epoch": 0.44, "learning_rate": 2.8130741421205477e-05, "loss": 0.9275, "step": 14760 }, { "epoch": 0.44, "learning_rate": 2.8115924850352637e-05, "loss": 0.8824, "step": 14770 }, { "epoch": 0.44, "learning_rate": 2.8101108279499795e-05, "loss": 1.0268, "step": 14780 }, { "epoch": 0.44, "learning_rate": 2.8086291708646952e-05, "loss": 1.143, "step": 14790 }, { "epoch": 0.44, "learning_rate": 2.8071475137794112e-05, "loss": 0.8914, "step": 14800 }, { "epoch": 0.44, "learning_rate": 2.805665856694127e-05, "loss": 0.8631, "step": 14810 }, { "epoch": 0.44, "learning_rate": 2.8041841996088424e-05, "loss": 0.8337, "step": 14820 }, { "epoch": 0.44, "learning_rate": 2.8027025425235588e-05, "loss": 0.9228, "step": 14830 }, { "epoch": 0.44, "learning_rate": 2.8012208854382745e-05, "loss": 0.8923, "step": 14840 }, { "epoch": 0.44, "learning_rate": 2.79973922835299e-05, "loss": 0.8587, "step": 14850 }, { "epoch": 0.44, "learning_rate": 2.7982575712677063e-05, "loss": 0.9243, "step": 14860 }, { "epoch": 0.44, "learning_rate": 2.7967759141824216e-05, "loss": 0.9459, "step": 14870 }, { "epoch": 0.44, "learning_rate": 2.7952942570971374e-05, "loss": 0.9452, "step": 14880 }, { "epoch": 0.44, "learning_rate": 2.7938126000118538e-05, "loss": 0.9123, "step": 14890 }, { "epoch": 0.44, "learning_rate": 2.792330942926569e-05, "loss": 0.8826, "step": 14900 }, { "epoch": 0.44, "learning_rate": 2.790849285841285e-05, "loss": 0.8847, "step": 14910 }, { "epoch": 0.44, "learning_rate": 2.789367628756001e-05, "loss": 1.0471, "step": 14920 }, { "epoch": 0.44, "learning_rate": 2.7878859716707167e-05, "loss": 0.8778, "step": 14930 }, { "epoch": 0.44, "learning_rate": 2.7864043145854324e-05, "loss": 0.9198, "step": 14940 }, { "epoch": 0.44, "learning_rate": 2.784922657500148e-05, "loss": 0.8459, "step": 14950 }, { "epoch": 0.44, "learning_rate": 2.7834410004148642e-05, "loss": 0.9726, "step": 14960 }, { "epoch": 0.44, "learning_rate": 2.78195934332958e-05, "loss": 0.8508, "step": 14970 }, { "epoch": 0.44, "learning_rate": 2.7804776862442956e-05, "loss": 0.9304, "step": 14980 }, { "epoch": 0.44, "learning_rate": 2.7789960291590117e-05, "loss": 0.8876, "step": 14990 }, { "epoch": 0.44, "learning_rate": 2.7775143720737274e-05, "loss": 0.9692, "step": 15000 }, { "epoch": 0.44, "learning_rate": 2.776032714988443e-05, "loss": 0.8446, "step": 15010 }, { "epoch": 0.45, "learning_rate": 2.7745510579031592e-05, "loss": 0.8281, "step": 15020 }, { "epoch": 0.45, "learning_rate": 2.773069400817875e-05, "loss": 0.9253, "step": 15030 }, { "epoch": 0.45, "learning_rate": 2.7715877437325903e-05, "loss": 0.9492, "step": 15040 }, { "epoch": 0.45, "learning_rate": 2.7701060866473067e-05, "loss": 0.8651, "step": 15050 }, { "epoch": 0.45, "learning_rate": 2.7686244295620224e-05, "loss": 0.863, "step": 15060 }, { "epoch": 0.45, "learning_rate": 2.7671427724767378e-05, "loss": 0.8056, "step": 15070 }, { "epoch": 0.45, "learning_rate": 2.7656611153914542e-05, "loss": 0.8961, "step": 15080 }, { "epoch": 0.45, "learning_rate": 2.7641794583061696e-05, "loss": 0.9401, "step": 15090 }, { "epoch": 0.45, "learning_rate": 2.7626978012208853e-05, "loss": 0.8435, "step": 15100 }, { "epoch": 0.45, "learning_rate": 2.7612161441356017e-05, "loss": 0.9163, "step": 15110 }, { "epoch": 0.45, "learning_rate": 2.759734487050317e-05, "loss": 0.9933, "step": 15120 }, { "epoch": 0.45, "learning_rate": 2.7582528299650328e-05, "loss": 0.7188, "step": 15130 }, { "epoch": 0.45, "learning_rate": 2.7567711728797492e-05, "loss": 0.8124, "step": 15140 }, { "epoch": 0.45, "learning_rate": 2.7552895157944646e-05, "loss": 0.9366, "step": 15150 }, { "epoch": 0.45, "learning_rate": 2.7538078587091803e-05, "loss": 0.9292, "step": 15160 }, { "epoch": 0.45, "learning_rate": 2.7523262016238964e-05, "loss": 0.8255, "step": 15170 }, { "epoch": 0.45, "learning_rate": 2.750844544538612e-05, "loss": 0.8795, "step": 15180 }, { "epoch": 0.45, "learning_rate": 2.749362887453328e-05, "loss": 0.8525, "step": 15190 }, { "epoch": 0.45, "learning_rate": 2.747881230368044e-05, "loss": 0.8968, "step": 15200 }, { "epoch": 0.45, "learning_rate": 2.7463995732827596e-05, "loss": 0.8813, "step": 15210 }, { "epoch": 0.45, "learning_rate": 2.7449179161974753e-05, "loss": 0.9808, "step": 15220 }, { "epoch": 0.45, "learning_rate": 2.743436259112191e-05, "loss": 0.9589, "step": 15230 }, { "epoch": 0.45, "learning_rate": 2.741954602026907e-05, "loss": 0.8174, "step": 15240 }, { "epoch": 0.45, "learning_rate": 2.740472944941623e-05, "loss": 0.9346, "step": 15250 }, { "epoch": 0.45, "learning_rate": 2.7389912878563382e-05, "loss": 1.0015, "step": 15260 }, { "epoch": 0.45, "learning_rate": 2.7375096307710546e-05, "loss": 0.8213, "step": 15270 }, { "epoch": 0.45, "learning_rate": 2.7360279736857704e-05, "loss": 0.9302, "step": 15280 }, { "epoch": 0.45, "learning_rate": 2.7345463166004857e-05, "loss": 0.8862, "step": 15290 }, { "epoch": 0.45, "learning_rate": 2.733064659515202e-05, "loss": 0.9485, "step": 15300 }, { "epoch": 0.45, "learning_rate": 2.731583002429918e-05, "loss": 0.8125, "step": 15310 }, { "epoch": 0.45, "learning_rate": 2.7301013453446333e-05, "loss": 0.9412, "step": 15320 }, { "epoch": 0.45, "learning_rate": 2.7286196882593497e-05, "loss": 0.9581, "step": 15330 }, { "epoch": 0.45, "learning_rate": 2.727138031174065e-05, "loss": 0.8735, "step": 15340 }, { "epoch": 0.45, "learning_rate": 2.7256563740887808e-05, "loss": 0.9793, "step": 15350 }, { "epoch": 0.46, "learning_rate": 2.724174717003497e-05, "loss": 1.0512, "step": 15360 }, { "epoch": 0.46, "learning_rate": 2.7226930599182126e-05, "loss": 0.907, "step": 15370 }, { "epoch": 0.46, "learning_rate": 2.7212114028329283e-05, "loss": 0.9575, "step": 15380 }, { "epoch": 0.46, "learning_rate": 2.7197297457476443e-05, "loss": 0.8579, "step": 15390 }, { "epoch": 0.46, "learning_rate": 2.71824808866236e-05, "loss": 0.9854, "step": 15400 }, { "epoch": 0.46, "learning_rate": 2.7167664315770758e-05, "loss": 0.7837, "step": 15410 }, { "epoch": 0.46, "learning_rate": 2.715284774491792e-05, "loss": 0.858, "step": 15420 }, { "epoch": 0.46, "learning_rate": 2.7138031174065076e-05, "loss": 0.891, "step": 15430 }, { "epoch": 0.46, "learning_rate": 2.7123214603212233e-05, "loss": 0.8212, "step": 15440 }, { "epoch": 0.46, "learning_rate": 2.7108398032359394e-05, "loss": 0.8824, "step": 15450 }, { "epoch": 0.46, "learning_rate": 2.709358146150655e-05, "loss": 0.8595, "step": 15460 }, { "epoch": 0.46, "learning_rate": 2.7078764890653708e-05, "loss": 0.8807, "step": 15470 }, { "epoch": 0.46, "learning_rate": 2.706394831980087e-05, "loss": 0.9255, "step": 15480 }, { "epoch": 0.46, "learning_rate": 2.7049131748948026e-05, "loss": 0.8835, "step": 15490 }, { "epoch": 0.46, "learning_rate": 2.7034315178095183e-05, "loss": 0.8357, "step": 15500 }, { "epoch": 0.46, "learning_rate": 2.7019498607242337e-05, "loss": 0.8606, "step": 15510 }, { "epoch": 0.46, "learning_rate": 2.70046820363895e-05, "loss": 0.8943, "step": 15520 }, { "epoch": 0.46, "learning_rate": 2.6989865465536658e-05, "loss": 0.9866, "step": 15530 }, { "epoch": 0.46, "learning_rate": 2.6975048894683812e-05, "loss": 0.9325, "step": 15540 }, { "epoch": 0.46, "learning_rate": 2.6960232323830976e-05, "loss": 0.851, "step": 15550 }, { "epoch": 0.46, "learning_rate": 2.694541575297813e-05, "loss": 0.9898, "step": 15560 }, { "epoch": 0.46, "learning_rate": 2.6930599182125287e-05, "loss": 0.9487, "step": 15570 }, { "epoch": 0.46, "learning_rate": 2.691578261127245e-05, "loss": 0.8888, "step": 15580 }, { "epoch": 0.46, "learning_rate": 2.6900966040419605e-05, "loss": 0.9397, "step": 15590 }, { "epoch": 0.46, "learning_rate": 2.6886149469566762e-05, "loss": 0.8789, "step": 15600 }, { "epoch": 0.46, "learning_rate": 2.6871332898713926e-05, "loss": 0.8987, "step": 15610 }, { "epoch": 0.46, "learning_rate": 2.685651632786108e-05, "loss": 1.0085, "step": 15620 }, { "epoch": 0.46, "learning_rate": 2.6841699757008237e-05, "loss": 0.9579, "step": 15630 }, { "epoch": 0.46, "learning_rate": 2.6826883186155398e-05, "loss": 0.8685, "step": 15640 }, { "epoch": 0.46, "learning_rate": 2.6812066615302555e-05, "loss": 0.9063, "step": 15650 }, { "epoch": 0.46, "learning_rate": 2.6797250044449712e-05, "loss": 1.0567, "step": 15660 }, { "epoch": 0.46, "learning_rate": 2.6782433473596873e-05, "loss": 0.8241, "step": 15670 }, { "epoch": 0.46, "learning_rate": 2.676761690274403e-05, "loss": 0.8085, "step": 15680 }, { "epoch": 0.46, "learning_rate": 2.6752800331891187e-05, "loss": 0.7809, "step": 15690 }, { "epoch": 0.47, "learning_rate": 2.6737983761038348e-05, "loss": 0.8839, "step": 15700 }, { "epoch": 0.47, "learning_rate": 2.6723167190185505e-05, "loss": 0.8962, "step": 15710 }, { "epoch": 0.47, "learning_rate": 2.6708350619332663e-05, "loss": 0.9903, "step": 15720 }, { "epoch": 0.47, "learning_rate": 2.6693534048479823e-05, "loss": 0.769, "step": 15730 }, { "epoch": 0.47, "learning_rate": 2.667871747762698e-05, "loss": 0.9137, "step": 15740 }, { "epoch": 0.47, "learning_rate": 2.6663900906774138e-05, "loss": 0.8526, "step": 15750 }, { "epoch": 0.47, "learning_rate": 2.6649084335921298e-05, "loss": 0.9874, "step": 15760 }, { "epoch": 0.47, "learning_rate": 2.6634267765068455e-05, "loss": 0.9854, "step": 15770 }, { "epoch": 0.47, "learning_rate": 2.6619451194215613e-05, "loss": 1.0488, "step": 15780 }, { "epoch": 0.47, "learning_rate": 2.6604634623362767e-05, "loss": 0.8769, "step": 15790 }, { "epoch": 0.47, "learning_rate": 2.658981805250993e-05, "loss": 0.9002, "step": 15800 }, { "epoch": 0.47, "learning_rate": 2.6575001481657084e-05, "loss": 0.7225, "step": 15810 }, { "epoch": 0.47, "learning_rate": 2.656018491080424e-05, "loss": 0.9178, "step": 15820 }, { "epoch": 0.47, "learning_rate": 2.6545368339951406e-05, "loss": 0.8703, "step": 15830 }, { "epoch": 0.47, "learning_rate": 2.653055176909856e-05, "loss": 0.9041, "step": 15840 }, { "epoch": 0.47, "learning_rate": 2.6515735198245717e-05, "loss": 0.9494, "step": 15850 }, { "epoch": 0.47, "learning_rate": 2.6500918627392877e-05, "loss": 0.8609, "step": 15860 }, { "epoch": 0.47, "learning_rate": 2.6486102056540035e-05, "loss": 1.0311, "step": 15870 }, { "epoch": 0.47, "learning_rate": 2.6471285485687192e-05, "loss": 0.8774, "step": 15880 }, { "epoch": 0.47, "learning_rate": 2.6456468914834352e-05, "loss": 0.8898, "step": 15890 }, { "epoch": 0.47, "learning_rate": 2.644165234398151e-05, "loss": 0.9353, "step": 15900 }, { "epoch": 0.47, "learning_rate": 2.6426835773128667e-05, "loss": 0.8315, "step": 15910 }, { "epoch": 0.47, "learning_rate": 2.6412019202275828e-05, "loss": 1.0566, "step": 15920 }, { "epoch": 0.47, "learning_rate": 2.6397202631422985e-05, "loss": 0.9669, "step": 15930 }, { "epoch": 0.47, "learning_rate": 2.6382386060570142e-05, "loss": 0.9258, "step": 15940 }, { "epoch": 0.47, "learning_rate": 2.6367569489717303e-05, "loss": 0.7841, "step": 15950 }, { "epoch": 0.47, "learning_rate": 2.635275291886446e-05, "loss": 1.0183, "step": 15960 }, { "epoch": 0.47, "learning_rate": 2.6337936348011617e-05, "loss": 0.9442, "step": 15970 }, { "epoch": 0.47, "learning_rate": 2.6323119777158778e-05, "loss": 0.9813, "step": 15980 }, { "epoch": 0.47, "learning_rate": 2.6308303206305935e-05, "loss": 0.9232, "step": 15990 }, { "epoch": 0.47, "learning_rate": 2.6293486635453092e-05, "loss": 0.9389, "step": 16000 }, { "epoch": 0.47, "learning_rate": 2.6278670064600253e-05, "loss": 0.8465, "step": 16010 }, { "epoch": 0.47, "learning_rate": 2.626385349374741e-05, "loss": 0.8455, "step": 16020 }, { "epoch": 0.48, "learning_rate": 2.6249036922894564e-05, "loss": 0.9775, "step": 16030 }, { "epoch": 0.48, "learning_rate": 2.6234220352041728e-05, "loss": 0.9338, "step": 16040 }, { "epoch": 0.48, "learning_rate": 2.6219403781188885e-05, "loss": 0.7642, "step": 16050 }, { "epoch": 0.48, "learning_rate": 2.620458721033604e-05, "loss": 0.9005, "step": 16060 }, { "epoch": 0.48, "learning_rate": 2.6189770639483196e-05, "loss": 0.8258, "step": 16070 }, { "epoch": 0.48, "learning_rate": 2.617495406863036e-05, "loss": 0.9772, "step": 16080 }, { "epoch": 0.48, "learning_rate": 2.6160137497777514e-05, "loss": 0.9044, "step": 16090 }, { "epoch": 0.48, "learning_rate": 2.614532092692467e-05, "loss": 0.9335, "step": 16100 }, { "epoch": 0.48, "learning_rate": 2.6130504356071832e-05, "loss": 0.8672, "step": 16110 }, { "epoch": 0.48, "learning_rate": 2.611568778521899e-05, "loss": 1.017, "step": 16120 }, { "epoch": 0.48, "learning_rate": 2.6100871214366146e-05, "loss": 0.9566, "step": 16130 }, { "epoch": 0.48, "learning_rate": 2.6086054643513307e-05, "loss": 0.7851, "step": 16140 }, { "epoch": 0.48, "learning_rate": 2.6071238072660464e-05, "loss": 0.8653, "step": 16150 }, { "epoch": 0.48, "learning_rate": 2.605642150180762e-05, "loss": 0.9346, "step": 16160 }, { "epoch": 0.48, "learning_rate": 2.6041604930954782e-05, "loss": 0.8218, "step": 16170 }, { "epoch": 0.48, "learning_rate": 2.602678836010194e-05, "loss": 0.8127, "step": 16180 }, { "epoch": 0.48, "learning_rate": 2.6011971789249096e-05, "loss": 1.0239, "step": 16190 }, { "epoch": 0.48, "learning_rate": 2.5997155218396257e-05, "loss": 0.9663, "step": 16200 }, { "epoch": 0.48, "learning_rate": 2.5982338647543414e-05, "loss": 0.9385, "step": 16210 }, { "epoch": 0.48, "learning_rate": 2.596752207669057e-05, "loss": 0.8683, "step": 16220 }, { "epoch": 0.48, "learning_rate": 2.5952705505837732e-05, "loss": 0.8967, "step": 16230 }, { "epoch": 0.48, "learning_rate": 2.593788893498489e-05, "loss": 0.6821, "step": 16240 }, { "epoch": 0.48, "learning_rate": 2.5923072364132047e-05, "loss": 0.8757, "step": 16250 }, { "epoch": 0.48, "learning_rate": 2.5908255793279207e-05, "loss": 0.8384, "step": 16260 }, { "epoch": 0.48, "learning_rate": 2.5893439222426365e-05, "loss": 0.9881, "step": 16270 }, { "epoch": 0.48, "learning_rate": 2.587862265157352e-05, "loss": 1.0065, "step": 16280 }, { "epoch": 0.48, "learning_rate": 2.5863806080720682e-05, "loss": 0.8451, "step": 16290 }, { "epoch": 0.48, "learning_rate": 2.584898950986784e-05, "loss": 0.8693, "step": 16300 }, { "epoch": 0.48, "learning_rate": 2.5834172939014993e-05, "loss": 0.8972, "step": 16310 }, { "epoch": 0.48, "learning_rate": 2.5819356368162157e-05, "loss": 0.9758, "step": 16320 }, { "epoch": 0.48, "learning_rate": 2.580453979730931e-05, "loss": 0.8606, "step": 16330 }, { "epoch": 0.48, "learning_rate": 2.578972322645647e-05, "loss": 0.9072, "step": 16340 }, { "epoch": 0.48, "learning_rate": 2.5774906655603626e-05, "loss": 0.9198, "step": 16350 }, { "epoch": 0.48, "learning_rate": 2.5760090084750786e-05, "loss": 0.847, "step": 16360 }, { "epoch": 0.49, "learning_rate": 2.5745273513897944e-05, "loss": 0.9478, "step": 16370 }, { "epoch": 0.49, "learning_rate": 2.57304569430451e-05, "loss": 0.7333, "step": 16380 }, { "epoch": 0.49, "learning_rate": 2.571564037219226e-05, "loss": 1.0021, "step": 16390 }, { "epoch": 0.49, "learning_rate": 2.570082380133942e-05, "loss": 0.9394, "step": 16400 }, { "epoch": 0.49, "learning_rate": 2.5686007230486576e-05, "loss": 0.8365, "step": 16410 }, { "epoch": 0.49, "learning_rate": 2.5671190659633737e-05, "loss": 0.781, "step": 16420 }, { "epoch": 0.49, "learning_rate": 2.5656374088780894e-05, "loss": 0.9276, "step": 16430 }, { "epoch": 0.49, "learning_rate": 2.564155751792805e-05, "loss": 0.9678, "step": 16440 }, { "epoch": 0.49, "learning_rate": 2.562674094707521e-05, "loss": 0.93, "step": 16450 }, { "epoch": 0.49, "learning_rate": 2.561192437622237e-05, "loss": 0.9031, "step": 16460 }, { "epoch": 0.49, "learning_rate": 2.5597107805369526e-05, "loss": 1.0367, "step": 16470 }, { "epoch": 0.49, "learning_rate": 2.5582291234516687e-05, "loss": 0.8493, "step": 16480 }, { "epoch": 0.49, "learning_rate": 2.5567474663663844e-05, "loss": 0.7942, "step": 16490 }, { "epoch": 0.49, "learning_rate": 2.5552658092810998e-05, "loss": 0.9409, "step": 16500 }, { "epoch": 0.49, "learning_rate": 2.5537841521958162e-05, "loss": 0.7449, "step": 16510 }, { "epoch": 0.49, "learning_rate": 2.552302495110532e-05, "loss": 0.9053, "step": 16520 }, { "epoch": 0.49, "learning_rate": 2.5508208380252473e-05, "loss": 0.8583, "step": 16530 }, { "epoch": 0.49, "learning_rate": 2.5493391809399637e-05, "loss": 0.8682, "step": 16540 }, { "epoch": 0.49, "learning_rate": 2.547857523854679e-05, "loss": 0.9189, "step": 16550 }, { "epoch": 0.49, "learning_rate": 2.5463758667693948e-05, "loss": 0.7976, "step": 16560 }, { "epoch": 0.49, "learning_rate": 2.5448942096841112e-05, "loss": 0.8626, "step": 16570 }, { "epoch": 0.49, "learning_rate": 2.5434125525988266e-05, "loss": 0.7991, "step": 16580 }, { "epoch": 0.49, "learning_rate": 2.5419308955135423e-05, "loss": 0.9152, "step": 16590 }, { "epoch": 0.49, "learning_rate": 2.5404492384282587e-05, "loss": 1.0176, "step": 16600 }, { "epoch": 0.49, "learning_rate": 2.538967581342974e-05, "loss": 0.8972, "step": 16610 }, { "epoch": 0.49, "learning_rate": 2.5374859242576898e-05, "loss": 0.8773, "step": 16620 }, { "epoch": 0.49, "learning_rate": 2.5360042671724055e-05, "loss": 0.9817, "step": 16630 }, { "epoch": 0.49, "learning_rate": 2.5345226100871216e-05, "loss": 0.8889, "step": 16640 }, { "epoch": 0.49, "learning_rate": 2.5330409530018373e-05, "loss": 0.8452, "step": 16650 }, { "epoch": 0.49, "learning_rate": 2.531559295916553e-05, "loss": 0.8898, "step": 16660 }, { "epoch": 0.49, "learning_rate": 2.530077638831269e-05, "loss": 0.8766, "step": 16670 }, { "epoch": 0.49, "learning_rate": 2.528595981745985e-05, "loss": 0.9172, "step": 16680 }, { "epoch": 0.49, "learning_rate": 2.5271143246607006e-05, "loss": 0.8526, "step": 16690 }, { "epoch": 0.49, "learning_rate": 2.5256326675754166e-05, "loss": 0.9177, "step": 16700 }, { "epoch": 0.5, "learning_rate": 2.5241510104901323e-05, "loss": 0.8059, "step": 16710 }, { "epoch": 0.5, "learning_rate": 2.5226693534048477e-05, "loss": 0.8278, "step": 16720 }, { "epoch": 0.5, "learning_rate": 2.521187696319564e-05, "loss": 0.9526, "step": 16730 }, { "epoch": 0.5, "learning_rate": 2.51970603923428e-05, "loss": 0.9538, "step": 16740 }, { "epoch": 0.5, "learning_rate": 2.5182243821489952e-05, "loss": 0.9277, "step": 16750 }, { "epoch": 0.5, "learning_rate": 2.5167427250637116e-05, "loss": 0.865, "step": 16760 }, { "epoch": 0.5, "learning_rate": 2.5152610679784274e-05, "loss": 0.9414, "step": 16770 }, { "epoch": 0.5, "learning_rate": 2.5137794108931427e-05, "loss": 0.7853, "step": 16780 }, { "epoch": 0.5, "learning_rate": 2.512297753807859e-05, "loss": 0.9213, "step": 16790 }, { "epoch": 0.5, "learning_rate": 2.5108160967225745e-05, "loss": 0.9364, "step": 16800 }, { "epoch": 0.5, "learning_rate": 2.5093344396372903e-05, "loss": 0.8423, "step": 16810 }, { "epoch": 0.5, "learning_rate": 2.5078527825520067e-05, "loss": 0.9448, "step": 16820 }, { "epoch": 0.5, "learning_rate": 2.506371125466722e-05, "loss": 0.9168, "step": 16830 }, { "epoch": 0.5, "learning_rate": 2.5048894683814378e-05, "loss": 0.9517, "step": 16840 }, { "epoch": 0.5, "learning_rate": 2.5034078112961538e-05, "loss": 0.8643, "step": 16850 }, { "epoch": 0.5, "learning_rate": 2.5019261542108695e-05, "loss": 0.916, "step": 16860 }, { "epoch": 0.5, "learning_rate": 2.5004444971255853e-05, "loss": 1.0083, "step": 16870 }, { "epoch": 0.5, "learning_rate": 2.4989628400403013e-05, "loss": 0.892, "step": 16880 }, { "epoch": 0.5, "learning_rate": 2.497481182955017e-05, "loss": 1.0555, "step": 16890 }, { "epoch": 0.5, "learning_rate": 2.4959995258697328e-05, "loss": 0.8717, "step": 16900 }, { "epoch": 0.5, "learning_rate": 2.4945178687844485e-05, "loss": 0.8993, "step": 16910 }, { "epoch": 0.5, "learning_rate": 2.4930362116991646e-05, "loss": 0.8272, "step": 16920 }, { "epoch": 0.5, "learning_rate": 2.4915545546138803e-05, "loss": 0.8452, "step": 16930 }, { "epoch": 0.5, "learning_rate": 2.490072897528596e-05, "loss": 0.9096, "step": 16940 }, { "epoch": 0.5, "learning_rate": 2.488591240443312e-05, "loss": 0.754, "step": 16950 }, { "epoch": 0.5, "learning_rate": 2.4871095833580278e-05, "loss": 0.9685, "step": 16960 }, { "epoch": 0.5, "learning_rate": 2.4856279262727435e-05, "loss": 0.8254, "step": 16970 }, { "epoch": 0.5, "learning_rate": 2.4841462691874592e-05, "loss": 1.0172, "step": 16980 }, { "epoch": 0.5, "learning_rate": 2.4826646121021753e-05, "loss": 0.8977, "step": 16990 }, { "epoch": 0.5, "learning_rate": 2.481182955016891e-05, "loss": 0.9804, "step": 17000 }, { "epoch": 0.5, "learning_rate": 2.4797012979316067e-05, "loss": 0.9287, "step": 17010 }, { "epoch": 0.5, "learning_rate": 2.4782196408463225e-05, "loss": 0.7927, "step": 17020 }, { "epoch": 0.5, "learning_rate": 2.4767379837610385e-05, "loss": 0.9531, "step": 17030 }, { "epoch": 0.5, "learning_rate": 2.4752563266757543e-05, "loss": 0.8374, "step": 17040 }, { "epoch": 0.51, "learning_rate": 2.47377466959047e-05, "loss": 0.8209, "step": 17050 }, { "epoch": 0.51, "learning_rate": 2.472293012505186e-05, "loss": 0.8736, "step": 17060 }, { "epoch": 0.51, "learning_rate": 2.4708113554199018e-05, "loss": 0.8482, "step": 17070 }, { "epoch": 0.51, "learning_rate": 2.4693296983346175e-05, "loss": 1.0184, "step": 17080 }, { "epoch": 0.51, "learning_rate": 2.4678480412493336e-05, "loss": 0.9015, "step": 17090 }, { "epoch": 0.51, "learning_rate": 2.4663663841640493e-05, "loss": 0.891, "step": 17100 }, { "epoch": 0.51, "learning_rate": 2.464884727078765e-05, "loss": 0.84, "step": 17110 }, { "epoch": 0.51, "learning_rate": 2.4634030699934807e-05, "loss": 0.9163, "step": 17120 }, { "epoch": 0.51, "learning_rate": 2.4619214129081964e-05, "loss": 0.8362, "step": 17130 }, { "epoch": 0.51, "learning_rate": 2.4604397558229125e-05, "loss": 0.7383, "step": 17140 }, { "epoch": 0.51, "learning_rate": 2.4589580987376282e-05, "loss": 0.8876, "step": 17150 }, { "epoch": 0.51, "learning_rate": 2.457476441652344e-05, "loss": 0.9511, "step": 17160 }, { "epoch": 0.51, "learning_rate": 2.45599478456706e-05, "loss": 0.9941, "step": 17170 }, { "epoch": 0.51, "learning_rate": 2.4545131274817757e-05, "loss": 0.7721, "step": 17180 }, { "epoch": 0.51, "learning_rate": 2.4530314703964915e-05, "loss": 0.7751, "step": 17190 }, { "epoch": 0.51, "learning_rate": 2.4515498133112075e-05, "loss": 0.7806, "step": 17200 }, { "epoch": 0.51, "learning_rate": 2.4500681562259232e-05, "loss": 0.926, "step": 17210 }, { "epoch": 0.51, "learning_rate": 2.448586499140639e-05, "loss": 0.9392, "step": 17220 }, { "epoch": 0.51, "learning_rate": 2.447104842055355e-05, "loss": 0.8768, "step": 17230 }, { "epoch": 0.51, "learning_rate": 2.4456231849700708e-05, "loss": 0.7338, "step": 17240 }, { "epoch": 0.51, "learning_rate": 2.4441415278847865e-05, "loss": 0.9202, "step": 17250 }, { "epoch": 0.51, "learning_rate": 2.4426598707995022e-05, "loss": 0.9434, "step": 17260 }, { "epoch": 0.51, "learning_rate": 2.441178213714218e-05, "loss": 0.7249, "step": 17270 }, { "epoch": 0.51, "learning_rate": 2.439696556628934e-05, "loss": 0.8375, "step": 17280 }, { "epoch": 0.51, "learning_rate": 2.4382148995436497e-05, "loss": 0.7855, "step": 17290 }, { "epoch": 0.51, "learning_rate": 2.4367332424583654e-05, "loss": 0.9012, "step": 17300 }, { "epoch": 0.51, "learning_rate": 2.4352515853730815e-05, "loss": 0.9847, "step": 17310 }, { "epoch": 0.51, "learning_rate": 2.4337699282877972e-05, "loss": 0.8869, "step": 17320 }, { "epoch": 0.51, "learning_rate": 2.432288271202513e-05, "loss": 0.8767, "step": 17330 }, { "epoch": 0.51, "learning_rate": 2.430806614117229e-05, "loss": 0.815, "step": 17340 }, { "epoch": 0.51, "learning_rate": 2.4293249570319447e-05, "loss": 0.8581, "step": 17350 }, { "epoch": 0.51, "learning_rate": 2.4278432999466604e-05, "loss": 0.8126, "step": 17360 }, { "epoch": 0.51, "learning_rate": 2.4263616428613765e-05, "loss": 0.8067, "step": 17370 }, { "epoch": 0.52, "learning_rate": 2.424879985776092e-05, "loss": 0.8889, "step": 17380 }, { "epoch": 0.52, "learning_rate": 2.423398328690808e-05, "loss": 0.8773, "step": 17390 }, { "epoch": 0.52, "learning_rate": 2.4219166716055237e-05, "loss": 0.8586, "step": 17400 }, { "epoch": 0.52, "learning_rate": 2.4204350145202394e-05, "loss": 0.8651, "step": 17410 }, { "epoch": 0.52, "learning_rate": 2.4189533574349555e-05, "loss": 0.9208, "step": 17420 }, { "epoch": 0.52, "learning_rate": 2.4174717003496712e-05, "loss": 1.0027, "step": 17430 }, { "epoch": 0.52, "learning_rate": 2.415990043264387e-05, "loss": 0.8523, "step": 17440 }, { "epoch": 0.52, "learning_rate": 2.414508386179103e-05, "loss": 0.9177, "step": 17450 }, { "epoch": 0.52, "learning_rate": 2.4130267290938187e-05, "loss": 0.9654, "step": 17460 }, { "epoch": 0.52, "learning_rate": 2.4115450720085344e-05, "loss": 0.9022, "step": 17470 }, { "epoch": 0.52, "learning_rate": 2.4100634149232505e-05, "loss": 0.999, "step": 17480 }, { "epoch": 0.52, "learning_rate": 2.408581757837966e-05, "loss": 0.7946, "step": 17490 }, { "epoch": 0.52, "learning_rate": 2.407100100752682e-05, "loss": 0.806, "step": 17500 }, { "epoch": 0.52, "learning_rate": 2.405618443667398e-05, "loss": 0.9306, "step": 17510 }, { "epoch": 0.52, "learning_rate": 2.4041367865821134e-05, "loss": 0.8505, "step": 17520 }, { "epoch": 0.52, "learning_rate": 2.4026551294968294e-05, "loss": 0.7275, "step": 17530 }, { "epoch": 0.52, "learning_rate": 2.401173472411545e-05, "loss": 0.85, "step": 17540 }, { "epoch": 0.52, "learning_rate": 2.399691815326261e-05, "loss": 0.9039, "step": 17550 }, { "epoch": 0.52, "learning_rate": 2.398210158240977e-05, "loss": 0.908, "step": 17560 }, { "epoch": 0.52, "learning_rate": 2.3967285011556927e-05, "loss": 0.774, "step": 17570 }, { "epoch": 0.52, "learning_rate": 2.3952468440704084e-05, "loss": 0.9109, "step": 17580 }, { "epoch": 0.52, "learning_rate": 2.3937651869851245e-05, "loss": 0.9756, "step": 17590 }, { "epoch": 0.52, "learning_rate": 2.39228352989984e-05, "loss": 0.8371, "step": 17600 }, { "epoch": 0.52, "learning_rate": 2.390801872814556e-05, "loss": 0.8419, "step": 17610 }, { "epoch": 0.52, "learning_rate": 2.389320215729272e-05, "loss": 0.9165, "step": 17620 }, { "epoch": 0.52, "learning_rate": 2.3878385586439873e-05, "loss": 0.9916, "step": 17630 }, { "epoch": 0.52, "learning_rate": 2.3863569015587034e-05, "loss": 1.0279, "step": 17640 }, { "epoch": 0.52, "learning_rate": 2.3848752444734195e-05, "loss": 0.8075, "step": 17650 }, { "epoch": 0.52, "learning_rate": 2.383393587388135e-05, "loss": 0.7937, "step": 17660 }, { "epoch": 0.52, "learning_rate": 2.381911930302851e-05, "loss": 0.9099, "step": 17670 }, { "epoch": 0.52, "learning_rate": 2.3804302732175666e-05, "loss": 0.8146, "step": 17680 }, { "epoch": 0.52, "learning_rate": 2.3789486161322824e-05, "loss": 0.8404, "step": 17690 }, { "epoch": 0.52, "learning_rate": 2.3774669590469984e-05, "loss": 0.8843, "step": 17700 }, { "epoch": 0.52, "learning_rate": 2.375985301961714e-05, "loss": 0.9091, "step": 17710 }, { "epoch": 0.53, "learning_rate": 2.37450364487643e-05, "loss": 0.7976, "step": 17720 }, { "epoch": 0.53, "learning_rate": 2.373021987791146e-05, "loss": 0.8443, "step": 17730 }, { "epoch": 0.53, "learning_rate": 2.3715403307058613e-05, "loss": 0.8589, "step": 17740 }, { "epoch": 0.53, "learning_rate": 2.3700586736205774e-05, "loss": 0.9244, "step": 17750 }, { "epoch": 0.53, "learning_rate": 2.3685770165352934e-05, "loss": 0.6903, "step": 17760 }, { "epoch": 0.53, "learning_rate": 2.3670953594500088e-05, "loss": 0.8427, "step": 17770 }, { "epoch": 0.53, "learning_rate": 2.365613702364725e-05, "loss": 1.0646, "step": 17780 }, { "epoch": 0.53, "learning_rate": 2.3641320452794406e-05, "loss": 0.7852, "step": 17790 }, { "epoch": 0.53, "learning_rate": 2.3626503881941563e-05, "loss": 0.7886, "step": 17800 }, { "epoch": 0.53, "learning_rate": 2.3611687311088724e-05, "loss": 0.8728, "step": 17810 }, { "epoch": 0.53, "learning_rate": 2.359687074023588e-05, "loss": 0.8495, "step": 17820 }, { "epoch": 0.53, "learning_rate": 2.358205416938304e-05, "loss": 1.0353, "step": 17830 }, { "epoch": 0.53, "learning_rate": 2.35672375985302e-05, "loss": 0.7727, "step": 17840 }, { "epoch": 0.53, "learning_rate": 2.3552421027677353e-05, "loss": 0.7972, "step": 17850 }, { "epoch": 0.53, "learning_rate": 2.3537604456824514e-05, "loss": 0.8583, "step": 17860 }, { "epoch": 0.53, "learning_rate": 2.3522787885971674e-05, "loss": 0.7446, "step": 17870 }, { "epoch": 0.53, "learning_rate": 2.3507971315118828e-05, "loss": 0.7902, "step": 17880 }, { "epoch": 0.53, "learning_rate": 2.349315474426599e-05, "loss": 0.9152, "step": 17890 }, { "epoch": 0.53, "learning_rate": 2.3478338173413146e-05, "loss": 0.7742, "step": 17900 }, { "epoch": 0.53, "learning_rate": 2.3463521602560303e-05, "loss": 0.8286, "step": 17910 }, { "epoch": 0.53, "learning_rate": 2.3448705031707464e-05, "loss": 0.9388, "step": 17920 }, { "epoch": 0.53, "learning_rate": 2.343388846085462e-05, "loss": 0.942, "step": 17930 }, { "epoch": 0.53, "learning_rate": 2.3419071890001778e-05, "loss": 0.8437, "step": 17940 }, { "epoch": 0.53, "learning_rate": 2.340425531914894e-05, "loss": 0.8614, "step": 17950 }, { "epoch": 0.53, "learning_rate": 2.3389438748296093e-05, "loss": 0.8642, "step": 17960 }, { "epoch": 0.53, "learning_rate": 2.3374622177443253e-05, "loss": 0.7555, "step": 17970 }, { "epoch": 0.53, "learning_rate": 2.3359805606590414e-05, "loss": 1.0767, "step": 17980 }, { "epoch": 0.53, "learning_rate": 2.3344989035737568e-05, "loss": 0.8096, "step": 17990 }, { "epoch": 0.53, "learning_rate": 2.333017246488473e-05, "loss": 0.8931, "step": 18000 }, { "epoch": 0.53, "learning_rate": 2.3315355894031886e-05, "loss": 0.8635, "step": 18010 }, { "epoch": 0.53, "learning_rate": 2.3300539323179043e-05, "loss": 0.9228, "step": 18020 }, { "epoch": 0.53, "learning_rate": 2.3285722752326203e-05, "loss": 0.9533, "step": 18030 }, { "epoch": 0.53, "learning_rate": 2.327090618147336e-05, "loss": 0.7538, "step": 18040 }, { "epoch": 0.53, "learning_rate": 2.3256089610620518e-05, "loss": 0.8418, "step": 18050 }, { "epoch": 0.54, "learning_rate": 2.324127303976768e-05, "loss": 0.7138, "step": 18060 }, { "epoch": 0.54, "learning_rate": 2.3226456468914836e-05, "loss": 0.8289, "step": 18070 }, { "epoch": 0.54, "learning_rate": 2.3211639898061993e-05, "loss": 0.8393, "step": 18080 }, { "epoch": 0.54, "learning_rate": 2.3196823327209154e-05, "loss": 0.8786, "step": 18090 }, { "epoch": 0.54, "learning_rate": 2.3182006756356307e-05, "loss": 0.8824, "step": 18100 }, { "epoch": 0.54, "learning_rate": 2.3167190185503468e-05, "loss": 0.8648, "step": 18110 }, { "epoch": 0.54, "learning_rate": 2.315237361465063e-05, "loss": 0.9172, "step": 18120 }, { "epoch": 0.54, "learning_rate": 2.3137557043797783e-05, "loss": 0.7848, "step": 18130 }, { "epoch": 0.54, "learning_rate": 2.3122740472944943e-05, "loss": 0.7298, "step": 18140 }, { "epoch": 0.54, "learning_rate": 2.31079239020921e-05, "loss": 0.867, "step": 18150 }, { "epoch": 0.54, "learning_rate": 2.3093107331239258e-05, "loss": 0.7374, "step": 18160 }, { "epoch": 0.54, "learning_rate": 2.3078290760386418e-05, "loss": 0.8786, "step": 18170 }, { "epoch": 0.54, "learning_rate": 2.3063474189533575e-05, "loss": 1.009, "step": 18180 }, { "epoch": 0.54, "learning_rate": 2.3048657618680733e-05, "loss": 0.8278, "step": 18190 }, { "epoch": 0.54, "learning_rate": 2.3033841047827893e-05, "loss": 0.8863, "step": 18200 }, { "epoch": 0.54, "learning_rate": 2.301902447697505e-05, "loss": 1.0623, "step": 18210 }, { "epoch": 0.54, "learning_rate": 2.3004207906122208e-05, "loss": 0.7678, "step": 18220 }, { "epoch": 0.54, "learning_rate": 2.298939133526937e-05, "loss": 0.9686, "step": 18230 }, { "epoch": 0.54, "learning_rate": 2.2974574764416522e-05, "loss": 0.8615, "step": 18240 }, { "epoch": 0.54, "learning_rate": 2.2959758193563683e-05, "loss": 0.8721, "step": 18250 }, { "epoch": 0.54, "learning_rate": 2.294494162271084e-05, "loss": 0.8698, "step": 18260 }, { "epoch": 0.54, "learning_rate": 2.2930125051857997e-05, "loss": 0.8678, "step": 18270 }, { "epoch": 0.54, "learning_rate": 2.2915308481005158e-05, "loss": 0.7442, "step": 18280 }, { "epoch": 0.54, "learning_rate": 2.2900491910152315e-05, "loss": 0.9643, "step": 18290 }, { "epoch": 0.54, "learning_rate": 2.2885675339299472e-05, "loss": 0.8196, "step": 18300 }, { "epoch": 0.54, "learning_rate": 2.2870858768446633e-05, "loss": 0.8376, "step": 18310 }, { "epoch": 0.54, "learning_rate": 2.285604219759379e-05, "loss": 0.7705, "step": 18320 }, { "epoch": 0.54, "learning_rate": 2.2841225626740948e-05, "loss": 0.88, "step": 18330 }, { "epoch": 0.54, "learning_rate": 2.2826409055888108e-05, "loss": 0.7661, "step": 18340 }, { "epoch": 0.54, "learning_rate": 2.2811592485035265e-05, "loss": 0.8063, "step": 18350 }, { "epoch": 0.54, "learning_rate": 2.2796775914182423e-05, "loss": 0.8005, "step": 18360 }, { "epoch": 0.54, "learning_rate": 2.278195934332958e-05, "loss": 0.7737, "step": 18370 }, { "epoch": 0.54, "learning_rate": 2.2767142772476737e-05, "loss": 0.8472, "step": 18380 }, { "epoch": 0.54, "learning_rate": 2.2752326201623898e-05, "loss": 0.782, "step": 18390 }, { "epoch": 0.55, "learning_rate": 2.2737509630771055e-05, "loss": 0.7141, "step": 18400 }, { "epoch": 0.55, "learning_rate": 2.2722693059918212e-05, "loss": 0.9833, "step": 18410 }, { "epoch": 0.55, "learning_rate": 2.2707876489065373e-05, "loss": 0.7897, "step": 18420 }, { "epoch": 0.55, "learning_rate": 2.269305991821253e-05, "loss": 0.8591, "step": 18430 }, { "epoch": 0.55, "learning_rate": 2.2678243347359687e-05, "loss": 0.9508, "step": 18440 }, { "epoch": 0.55, "learning_rate": 2.2663426776506848e-05, "loss": 0.8874, "step": 18450 }, { "epoch": 0.55, "learning_rate": 2.2648610205654005e-05, "loss": 0.925, "step": 18460 }, { "epoch": 0.55, "learning_rate": 2.2633793634801162e-05, "loss": 0.7757, "step": 18470 }, { "epoch": 0.55, "learning_rate": 2.261897706394832e-05, "loss": 0.7419, "step": 18480 }, { "epoch": 0.55, "learning_rate": 2.260416049309548e-05, "loss": 0.9423, "step": 18490 }, { "epoch": 0.55, "learning_rate": 2.2589343922242637e-05, "loss": 0.8271, "step": 18500 }, { "epoch": 0.55, "learning_rate": 2.2574527351389795e-05, "loss": 0.8333, "step": 18510 }, { "epoch": 0.55, "learning_rate": 2.2559710780536952e-05, "loss": 0.9858, "step": 18520 }, { "epoch": 0.55, "learning_rate": 2.2544894209684112e-05, "loss": 0.8814, "step": 18530 }, { "epoch": 0.55, "learning_rate": 2.253007763883127e-05, "loss": 1.0032, "step": 18540 }, { "epoch": 0.55, "learning_rate": 2.2515261067978427e-05, "loss": 0.7925, "step": 18550 }, { "epoch": 0.55, "learning_rate": 2.2500444497125588e-05, "loss": 0.7896, "step": 18560 }, { "epoch": 0.55, "learning_rate": 2.2485627926272745e-05, "loss": 0.9071, "step": 18570 }, { "epoch": 0.55, "learning_rate": 2.2470811355419902e-05, "loss": 0.999, "step": 18580 }, { "epoch": 0.55, "learning_rate": 2.245599478456706e-05, "loss": 0.8147, "step": 18590 }, { "epoch": 0.55, "learning_rate": 2.244117821371422e-05, "loss": 0.8457, "step": 18600 }, { "epoch": 0.55, "learning_rate": 2.2426361642861377e-05, "loss": 0.9476, "step": 18610 }, { "epoch": 0.55, "learning_rate": 2.2411545072008534e-05, "loss": 0.7915, "step": 18620 }, { "epoch": 0.55, "learning_rate": 2.2396728501155695e-05, "loss": 0.8055, "step": 18630 }, { "epoch": 0.55, "learning_rate": 2.2381911930302852e-05, "loss": 0.9684, "step": 18640 }, { "epoch": 0.55, "learning_rate": 2.236709535945001e-05, "loss": 0.8395, "step": 18650 }, { "epoch": 0.55, "learning_rate": 2.2352278788597167e-05, "loss": 0.8448, "step": 18660 }, { "epoch": 0.55, "learning_rate": 2.2337462217744327e-05, "loss": 0.8679, "step": 18670 }, { "epoch": 0.55, "learning_rate": 2.2322645646891485e-05, "loss": 0.7937, "step": 18680 }, { "epoch": 0.55, "learning_rate": 2.2307829076038642e-05, "loss": 0.9354, "step": 18690 }, { "epoch": 0.55, "learning_rate": 2.2293012505185802e-05, "loss": 0.9462, "step": 18700 }, { "epoch": 0.55, "learning_rate": 2.227819593433296e-05, "loss": 0.7613, "step": 18710 }, { "epoch": 0.55, "learning_rate": 2.2263379363480117e-05, "loss": 0.7807, "step": 18720 }, { "epoch": 0.56, "learning_rate": 2.2248562792627274e-05, "loss": 0.8408, "step": 18730 }, { "epoch": 0.56, "learning_rate": 2.2233746221774435e-05, "loss": 0.9313, "step": 18740 }, { "epoch": 0.56, "learning_rate": 2.2218929650921592e-05, "loss": 0.8566, "step": 18750 }, { "epoch": 0.56, "learning_rate": 2.220411308006875e-05, "loss": 0.7942, "step": 18760 }, { "epoch": 0.56, "learning_rate": 2.218929650921591e-05, "loss": 0.9131, "step": 18770 }, { "epoch": 0.56, "learning_rate": 2.2174479938363067e-05, "loss": 0.8862, "step": 18780 }, { "epoch": 0.56, "learning_rate": 2.2159663367510224e-05, "loss": 0.8691, "step": 18790 }, { "epoch": 0.56, "learning_rate": 2.214484679665738e-05, "loss": 0.9793, "step": 18800 }, { "epoch": 0.56, "learning_rate": 2.2130030225804542e-05, "loss": 0.9441, "step": 18810 }, { "epoch": 0.56, "learning_rate": 2.21152136549517e-05, "loss": 0.842, "step": 18820 }, { "epoch": 0.56, "learning_rate": 2.2100397084098857e-05, "loss": 0.8368, "step": 18830 }, { "epoch": 0.56, "learning_rate": 2.2085580513246014e-05, "loss": 0.9774, "step": 18840 }, { "epoch": 0.56, "learning_rate": 2.2070763942393174e-05, "loss": 0.8356, "step": 18850 }, { "epoch": 0.56, "learning_rate": 2.205594737154033e-05, "loss": 0.7599, "step": 18860 }, { "epoch": 0.56, "learning_rate": 2.204113080068749e-05, "loss": 0.8256, "step": 18870 }, { "epoch": 0.56, "learning_rate": 2.202631422983465e-05, "loss": 0.9039, "step": 18880 }, { "epoch": 0.56, "learning_rate": 2.2011497658981807e-05, "loss": 0.9825, "step": 18890 }, { "epoch": 0.56, "learning_rate": 2.1996681088128964e-05, "loss": 0.8668, "step": 18900 }, { "epoch": 0.56, "learning_rate": 2.1981864517276125e-05, "loss": 0.8308, "step": 18910 }, { "epoch": 0.56, "learning_rate": 2.1967047946423282e-05, "loss": 0.9404, "step": 18920 }, { "epoch": 0.56, "learning_rate": 2.195223137557044e-05, "loss": 0.8151, "step": 18930 }, { "epoch": 0.56, "learning_rate": 2.1937414804717596e-05, "loss": 0.9006, "step": 18940 }, { "epoch": 0.56, "learning_rate": 2.1922598233864754e-05, "loss": 0.8403, "step": 18950 }, { "epoch": 0.56, "learning_rate": 2.1907781663011914e-05, "loss": 0.8765, "step": 18960 }, { "epoch": 0.56, "learning_rate": 2.189296509215907e-05, "loss": 0.8613, "step": 18970 }, { "epoch": 0.56, "learning_rate": 2.187814852130623e-05, "loss": 0.7064, "step": 18980 }, { "epoch": 0.56, "learning_rate": 2.186333195045339e-05, "loss": 0.8803, "step": 18990 }, { "epoch": 0.56, "learning_rate": 2.1848515379600546e-05, "loss": 0.8661, "step": 19000 }, { "epoch": 0.56, "learning_rate": 2.1833698808747704e-05, "loss": 0.8495, "step": 19010 }, { "epoch": 0.56, "learning_rate": 2.1818882237894864e-05, "loss": 0.8133, "step": 19020 }, { "epoch": 0.56, "learning_rate": 2.180406566704202e-05, "loss": 0.7198, "step": 19030 }, { "epoch": 0.56, "learning_rate": 2.178924909618918e-05, "loss": 0.8265, "step": 19040 }, { "epoch": 0.56, "learning_rate": 2.177443252533634e-05, "loss": 0.7314, "step": 19050 }, { "epoch": 0.56, "learning_rate": 2.1759615954483493e-05, "loss": 0.8324, "step": 19060 }, { "epoch": 0.57, "learning_rate": 2.1744799383630654e-05, "loss": 0.8576, "step": 19070 }, { "epoch": 0.57, "learning_rate": 2.172998281277781e-05, "loss": 0.7959, "step": 19080 }, { "epoch": 0.57, "learning_rate": 2.171516624192497e-05, "loss": 0.8669, "step": 19090 }, { "epoch": 0.57, "learning_rate": 2.170034967107213e-05, "loss": 0.9387, "step": 19100 }, { "epoch": 0.57, "learning_rate": 2.1685533100219286e-05, "loss": 0.867, "step": 19110 }, { "epoch": 0.57, "learning_rate": 2.1670716529366443e-05, "loss": 0.7616, "step": 19120 }, { "epoch": 0.57, "learning_rate": 2.1655899958513604e-05, "loss": 0.9017, "step": 19130 }, { "epoch": 0.57, "learning_rate": 2.164108338766076e-05, "loss": 0.9809, "step": 19140 }, { "epoch": 0.57, "learning_rate": 2.162626681680792e-05, "loss": 0.911, "step": 19150 }, { "epoch": 0.57, "learning_rate": 2.161145024595508e-05, "loss": 0.9344, "step": 19160 }, { "epoch": 0.57, "learning_rate": 2.1596633675102236e-05, "loss": 0.8076, "step": 19170 }, { "epoch": 0.57, "learning_rate": 2.1581817104249394e-05, "loss": 0.8818, "step": 19180 }, { "epoch": 0.57, "learning_rate": 2.1567000533396554e-05, "loss": 0.9721, "step": 19190 }, { "epoch": 0.57, "learning_rate": 2.1552183962543708e-05, "loss": 0.8332, "step": 19200 }, { "epoch": 0.57, "learning_rate": 2.153736739169087e-05, "loss": 0.898, "step": 19210 }, { "epoch": 0.57, "learning_rate": 2.1522550820838026e-05, "loss": 0.9453, "step": 19220 }, { "epoch": 0.57, "learning_rate": 2.1507734249985183e-05, "loss": 0.7826, "step": 19230 }, { "epoch": 0.57, "learning_rate": 2.1492917679132344e-05, "loss": 0.9795, "step": 19240 }, { "epoch": 0.57, "learning_rate": 2.14781011082795e-05, "loss": 0.9173, "step": 19250 }, { "epoch": 0.57, "learning_rate": 2.1463284537426658e-05, "loss": 0.9476, "step": 19260 }, { "epoch": 0.57, "learning_rate": 2.144846796657382e-05, "loss": 0.964, "step": 19270 }, { "epoch": 0.57, "learning_rate": 2.1433651395720976e-05, "loss": 0.9072, "step": 19280 }, { "epoch": 0.57, "learning_rate": 2.1418834824868133e-05, "loss": 0.8462, "step": 19290 }, { "epoch": 0.57, "learning_rate": 2.1404018254015294e-05, "loss": 0.874, "step": 19300 }, { "epoch": 0.57, "learning_rate": 2.1389201683162448e-05, "loss": 1.0157, "step": 19310 }, { "epoch": 0.57, "learning_rate": 2.137438511230961e-05, "loss": 0.8508, "step": 19320 }, { "epoch": 0.57, "learning_rate": 2.135956854145677e-05, "loss": 0.9037, "step": 19330 }, { "epoch": 0.57, "learning_rate": 2.1344751970603923e-05, "loss": 0.9253, "step": 19340 }, { "epoch": 0.57, "learning_rate": 2.1329935399751083e-05, "loss": 0.7522, "step": 19350 }, { "epoch": 0.57, "learning_rate": 2.131511882889824e-05, "loss": 0.9502, "step": 19360 }, { "epoch": 0.57, "learning_rate": 2.1300302258045398e-05, "loss": 0.7547, "step": 19370 }, { "epoch": 0.57, "learning_rate": 2.128548568719256e-05, "loss": 0.9124, "step": 19380 }, { "epoch": 0.57, "learning_rate": 2.1270669116339716e-05, "loss": 0.8023, "step": 19390 }, { "epoch": 0.57, "learning_rate": 2.1255852545486873e-05, "loss": 0.8538, "step": 19400 }, { "epoch": 0.58, "learning_rate": 2.1241035974634034e-05, "loss": 0.7735, "step": 19410 }, { "epoch": 0.58, "learning_rate": 2.1226219403781187e-05, "loss": 0.8246, "step": 19420 }, { "epoch": 0.58, "learning_rate": 2.1211402832928348e-05, "loss": 0.755, "step": 19430 }, { "epoch": 0.58, "learning_rate": 2.119658626207551e-05, "loss": 0.7665, "step": 19440 }, { "epoch": 0.58, "learning_rate": 2.1181769691222663e-05, "loss": 1.0283, "step": 19450 }, { "epoch": 0.58, "learning_rate": 2.1166953120369823e-05, "loss": 0.8871, "step": 19460 }, { "epoch": 0.58, "learning_rate": 2.115213654951698e-05, "loss": 0.9079, "step": 19470 }, { "epoch": 0.58, "learning_rate": 2.1137319978664138e-05, "loss": 0.7618, "step": 19480 }, { "epoch": 0.58, "learning_rate": 2.1122503407811298e-05, "loss": 0.8599, "step": 19490 }, { "epoch": 0.58, "learning_rate": 2.1107686836958456e-05, "loss": 0.842, "step": 19500 }, { "epoch": 0.58, "learning_rate": 2.1092870266105613e-05, "loss": 0.8677, "step": 19510 }, { "epoch": 0.58, "learning_rate": 2.1078053695252773e-05, "loss": 0.9027, "step": 19520 }, { "epoch": 0.58, "learning_rate": 2.1063237124399927e-05, "loss": 0.8858, "step": 19530 }, { "epoch": 0.58, "learning_rate": 2.1048420553547088e-05, "loss": 0.9126, "step": 19540 }, { "epoch": 0.58, "learning_rate": 2.103360398269425e-05, "loss": 0.9025, "step": 19550 }, { "epoch": 0.58, "learning_rate": 2.1018787411841402e-05, "loss": 0.8532, "step": 19560 }, { "epoch": 0.58, "learning_rate": 2.1003970840988563e-05, "loss": 0.8272, "step": 19570 }, { "epoch": 0.58, "learning_rate": 2.0989154270135724e-05, "loss": 0.8753, "step": 19580 }, { "epoch": 0.58, "learning_rate": 2.0974337699282877e-05, "loss": 0.9098, "step": 19590 }, { "epoch": 0.58, "learning_rate": 2.0959521128430038e-05, "loss": 0.9761, "step": 19600 }, { "epoch": 0.58, "learning_rate": 2.0944704557577195e-05, "loss": 0.8511, "step": 19610 }, { "epoch": 0.58, "learning_rate": 2.0929887986724352e-05, "loss": 0.9308, "step": 19620 }, { "epoch": 0.58, "learning_rate": 2.0915071415871513e-05, "loss": 0.9555, "step": 19630 }, { "epoch": 0.58, "learning_rate": 2.0900254845018667e-05, "loss": 0.9046, "step": 19640 }, { "epoch": 0.58, "learning_rate": 2.0885438274165828e-05, "loss": 0.7832, "step": 19650 }, { "epoch": 0.58, "learning_rate": 2.0870621703312988e-05, "loss": 0.8923, "step": 19660 }, { "epoch": 0.58, "learning_rate": 2.0855805132460142e-05, "loss": 0.8718, "step": 19670 }, { "epoch": 0.58, "learning_rate": 2.0840988561607303e-05, "loss": 0.8448, "step": 19680 }, { "epoch": 0.58, "learning_rate": 2.0826171990754463e-05, "loss": 1.1105, "step": 19690 }, { "epoch": 0.58, "learning_rate": 2.0811355419901617e-05, "loss": 0.8333, "step": 19700 }, { "epoch": 0.58, "learning_rate": 2.0796538849048778e-05, "loss": 0.8141, "step": 19710 }, { "epoch": 0.58, "learning_rate": 2.0781722278195935e-05, "loss": 0.9571, "step": 19720 }, { "epoch": 0.58, "learning_rate": 2.0766905707343092e-05, "loss": 0.7987, "step": 19730 }, { "epoch": 0.58, "learning_rate": 2.0752089136490253e-05, "loss": 0.9294, "step": 19740 }, { "epoch": 0.59, "learning_rate": 2.073727256563741e-05, "loss": 0.9727, "step": 19750 }, { "epoch": 0.59, "learning_rate": 2.0722455994784567e-05, "loss": 1.01, "step": 19760 }, { "epoch": 0.59, "learning_rate": 2.0707639423931728e-05, "loss": 0.8694, "step": 19770 }, { "epoch": 0.59, "learning_rate": 2.0692822853078882e-05, "loss": 0.8795, "step": 19780 }, { "epoch": 0.59, "learning_rate": 2.0678006282226042e-05, "loss": 0.8247, "step": 19790 }, { "epoch": 0.59, "learning_rate": 2.0663189711373203e-05, "loss": 0.9454, "step": 19800 }, { "epoch": 0.59, "learning_rate": 2.0648373140520357e-05, "loss": 0.9298, "step": 19810 }, { "epoch": 0.59, "learning_rate": 2.0633556569667517e-05, "loss": 1.0117, "step": 19820 }, { "epoch": 0.59, "learning_rate": 2.0618739998814675e-05, "loss": 0.7869, "step": 19830 }, { "epoch": 0.59, "learning_rate": 2.0603923427961832e-05, "loss": 0.9181, "step": 19840 }, { "epoch": 0.59, "learning_rate": 2.0589106857108993e-05, "loss": 1.0305, "step": 19850 }, { "epoch": 0.59, "learning_rate": 2.057429028625615e-05, "loss": 0.814, "step": 19860 }, { "epoch": 0.59, "learning_rate": 2.0559473715403307e-05, "loss": 0.7786, "step": 19870 }, { "epoch": 0.59, "learning_rate": 2.0544657144550468e-05, "loss": 0.706, "step": 19880 }, { "epoch": 0.59, "learning_rate": 2.0529840573697625e-05, "loss": 0.9476, "step": 19890 }, { "epoch": 0.59, "learning_rate": 2.0515024002844782e-05, "loss": 0.7293, "step": 19900 }, { "epoch": 0.59, "learning_rate": 2.0500207431991943e-05, "loss": 0.8174, "step": 19910 }, { "epoch": 0.59, "learning_rate": 2.0485390861139097e-05, "loss": 0.9313, "step": 19920 }, { "epoch": 0.59, "learning_rate": 2.0470574290286257e-05, "loss": 0.8605, "step": 19930 }, { "epoch": 0.59, "learning_rate": 2.0455757719433414e-05, "loss": 0.9768, "step": 19940 }, { "epoch": 0.59, "learning_rate": 2.044094114858057e-05, "loss": 0.8925, "step": 19950 }, { "epoch": 0.59, "learning_rate": 2.0426124577727732e-05, "loss": 0.8249, "step": 19960 }, { "epoch": 0.59, "learning_rate": 2.041130800687489e-05, "loss": 0.9222, "step": 19970 }, { "epoch": 0.59, "learning_rate": 2.0396491436022047e-05, "loss": 0.8031, "step": 19980 }, { "epoch": 0.59, "learning_rate": 2.0381674865169207e-05, "loss": 0.8355, "step": 19990 }, { "epoch": 0.59, "learning_rate": 2.0366858294316365e-05, "loss": 0.7415, "step": 20000 }, { "epoch": 0.59, "learning_rate": 2.0352041723463522e-05, "loss": 0.8848, "step": 20010 }, { "epoch": 0.59, "learning_rate": 2.0337225152610682e-05, "loss": 0.8046, "step": 20020 }, { "epoch": 0.59, "learning_rate": 2.032240858175784e-05, "loss": 0.858, "step": 20030 }, { "epoch": 0.59, "learning_rate": 2.0307592010904997e-05, "loss": 0.8371, "step": 20040 }, { "epoch": 0.59, "learning_rate": 2.0292775440052154e-05, "loss": 0.9214, "step": 20050 }, { "epoch": 0.59, "learning_rate": 2.027795886919931e-05, "loss": 0.8854, "step": 20060 }, { "epoch": 0.59, "learning_rate": 2.0263142298346472e-05, "loss": 0.8968, "step": 20070 }, { "epoch": 0.6, "learning_rate": 2.024832572749363e-05, "loss": 0.8748, "step": 20080 }, { "epoch": 0.6, "learning_rate": 2.0233509156640786e-05, "loss": 1.0004, "step": 20090 }, { "epoch": 0.6, "learning_rate": 2.0218692585787947e-05, "loss": 0.8403, "step": 20100 }, { "epoch": 0.6, "learning_rate": 2.0203876014935104e-05, "loss": 0.8263, "step": 20110 }, { "epoch": 0.6, "learning_rate": 2.018905944408226e-05, "loss": 0.8777, "step": 20120 }, { "epoch": 0.6, "learning_rate": 2.0174242873229422e-05, "loss": 0.9249, "step": 20130 }, { "epoch": 0.6, "learning_rate": 2.015942630237658e-05, "loss": 0.7976, "step": 20140 }, { "epoch": 0.6, "learning_rate": 2.0144609731523737e-05, "loss": 0.8771, "step": 20150 }, { "epoch": 0.6, "learning_rate": 2.0129793160670897e-05, "loss": 0.786, "step": 20160 }, { "epoch": 0.6, "learning_rate": 2.0114976589818054e-05, "loss": 0.882, "step": 20170 }, { "epoch": 0.6, "learning_rate": 2.0100160018965212e-05, "loss": 0.9749, "step": 20180 }, { "epoch": 0.6, "learning_rate": 2.008534344811237e-05, "loss": 0.9407, "step": 20190 }, { "epoch": 0.6, "learning_rate": 2.0070526877259526e-05, "loss": 0.8734, "step": 20200 }, { "epoch": 0.6, "learning_rate": 2.0055710306406687e-05, "loss": 0.824, "step": 20210 }, { "epoch": 0.6, "learning_rate": 2.0040893735553844e-05, "loss": 0.8913, "step": 20220 }, { "epoch": 0.6, "learning_rate": 2.0026077164701e-05, "loss": 0.9268, "step": 20230 }, { "epoch": 0.6, "learning_rate": 2.0011260593848162e-05, "loss": 0.9078, "step": 20240 }, { "epoch": 0.6, "learning_rate": 1.999644402299532e-05, "loss": 0.8387, "step": 20250 }, { "epoch": 0.6, "learning_rate": 1.9981627452142476e-05, "loss": 0.851, "step": 20260 }, { "epoch": 0.6, "learning_rate": 1.9966810881289637e-05, "loss": 0.8193, "step": 20270 }, { "epoch": 0.6, "learning_rate": 1.9951994310436794e-05, "loss": 0.8916, "step": 20280 }, { "epoch": 0.6, "learning_rate": 1.993717773958395e-05, "loss": 0.8234, "step": 20290 }, { "epoch": 0.6, "learning_rate": 1.992236116873111e-05, "loss": 0.903, "step": 20300 }, { "epoch": 0.6, "learning_rate": 1.990754459787827e-05, "loss": 1.025, "step": 20310 }, { "epoch": 0.6, "learning_rate": 1.9892728027025427e-05, "loss": 1.0031, "step": 20320 }, { "epoch": 0.6, "learning_rate": 1.9877911456172584e-05, "loss": 0.9271, "step": 20330 }, { "epoch": 0.6, "learning_rate": 1.986309488531974e-05, "loss": 0.8388, "step": 20340 }, { "epoch": 0.6, "learning_rate": 1.98482783144669e-05, "loss": 0.8144, "step": 20350 }, { "epoch": 0.6, "learning_rate": 1.983346174361406e-05, "loss": 1.049, "step": 20360 }, { "epoch": 0.6, "learning_rate": 1.9818645172761216e-05, "loss": 0.8028, "step": 20370 }, { "epoch": 0.6, "learning_rate": 1.9803828601908377e-05, "loss": 0.9394, "step": 20380 }, { "epoch": 0.6, "learning_rate": 1.9789012031055534e-05, "loss": 0.9326, "step": 20390 }, { "epoch": 0.6, "learning_rate": 1.977419546020269e-05, "loss": 0.9962, "step": 20400 }, { "epoch": 0.6, "learning_rate": 1.975937888934985e-05, "loss": 0.8867, "step": 20410 }, { "epoch": 0.61, "learning_rate": 1.974456231849701e-05, "loss": 0.7435, "step": 20420 }, { "epoch": 0.61, "learning_rate": 1.9729745747644166e-05, "loss": 0.7816, "step": 20430 }, { "epoch": 0.61, "learning_rate": 1.9714929176791323e-05, "loss": 0.9469, "step": 20440 }, { "epoch": 0.61, "learning_rate": 1.9700112605938484e-05, "loss": 0.8169, "step": 20450 }, { "epoch": 0.61, "learning_rate": 1.968529603508564e-05, "loss": 0.8735, "step": 20460 }, { "epoch": 0.61, "learning_rate": 1.96704794642328e-05, "loss": 1.0336, "step": 20470 }, { "epoch": 0.61, "learning_rate": 1.9655662893379956e-05, "loss": 0.8156, "step": 20480 }, { "epoch": 0.61, "learning_rate": 1.9640846322527116e-05, "loss": 0.9808, "step": 20490 }, { "epoch": 0.61, "learning_rate": 1.9626029751674274e-05, "loss": 0.79, "step": 20500 }, { "epoch": 0.61, "learning_rate": 1.961121318082143e-05, "loss": 0.8405, "step": 20510 }, { "epoch": 0.61, "learning_rate": 1.9596396609968588e-05, "loss": 0.953, "step": 20520 }, { "epoch": 0.61, "learning_rate": 1.958158003911575e-05, "loss": 0.7861, "step": 20530 }, { "epoch": 0.61, "learning_rate": 1.9566763468262906e-05, "loss": 0.7984, "step": 20540 }, { "epoch": 0.61, "learning_rate": 1.9551946897410063e-05, "loss": 0.9889, "step": 20550 }, { "epoch": 0.61, "learning_rate": 1.9537130326557224e-05, "loss": 0.841, "step": 20560 }, { "epoch": 0.61, "learning_rate": 1.952231375570438e-05, "loss": 0.8406, "step": 20570 }, { "epoch": 0.61, "learning_rate": 1.9507497184851538e-05, "loss": 0.9461, "step": 20580 }, { "epoch": 0.61, "learning_rate": 1.94926806139987e-05, "loss": 0.8538, "step": 20590 }, { "epoch": 0.61, "learning_rate": 1.9477864043145856e-05, "loss": 0.854, "step": 20600 }, { "epoch": 0.61, "learning_rate": 1.9463047472293013e-05, "loss": 0.8276, "step": 20610 }, { "epoch": 0.61, "learning_rate": 1.944823090144017e-05, "loss": 0.955, "step": 20620 }, { "epoch": 0.61, "learning_rate": 1.943341433058733e-05, "loss": 0.8104, "step": 20630 }, { "epoch": 0.61, "learning_rate": 1.941859775973449e-05, "loss": 0.9592, "step": 20640 }, { "epoch": 0.61, "learning_rate": 1.9403781188881646e-05, "loss": 0.8689, "step": 20650 }, { "epoch": 0.61, "learning_rate": 1.9388964618028803e-05, "loss": 0.9637, "step": 20660 }, { "epoch": 0.61, "learning_rate": 1.9374148047175964e-05, "loss": 0.7728, "step": 20670 }, { "epoch": 0.61, "learning_rate": 1.935933147632312e-05, "loss": 0.7657, "step": 20680 }, { "epoch": 0.61, "learning_rate": 1.9344514905470278e-05, "loss": 0.8607, "step": 20690 }, { "epoch": 0.61, "learning_rate": 1.932969833461744e-05, "loss": 0.8825, "step": 20700 }, { "epoch": 0.61, "learning_rate": 1.9314881763764596e-05, "loss": 0.924, "step": 20710 }, { "epoch": 0.61, "learning_rate": 1.9300065192911753e-05, "loss": 0.855, "step": 20720 }, { "epoch": 0.61, "learning_rate": 1.9285248622058914e-05, "loss": 0.7599, "step": 20730 }, { "epoch": 0.61, "learning_rate": 1.927043205120607e-05, "loss": 0.8022, "step": 20740 }, { "epoch": 0.61, "learning_rate": 1.9255615480353228e-05, "loss": 0.7713, "step": 20750 }, { "epoch": 0.62, "learning_rate": 1.9240798909500385e-05, "loss": 0.9565, "step": 20760 }, { "epoch": 0.62, "learning_rate": 1.9225982338647543e-05, "loss": 0.9293, "step": 20770 }, { "epoch": 0.62, "learning_rate": 1.9211165767794703e-05, "loss": 0.8294, "step": 20780 }, { "epoch": 0.62, "learning_rate": 1.919634919694186e-05, "loss": 0.8606, "step": 20790 }, { "epoch": 0.62, "learning_rate": 1.9181532626089018e-05, "loss": 0.8515, "step": 20800 }, { "epoch": 0.62, "learning_rate": 1.916671605523618e-05, "loss": 0.853, "step": 20810 }, { "epoch": 0.62, "learning_rate": 1.9151899484383336e-05, "loss": 0.7633, "step": 20820 }, { "epoch": 0.62, "learning_rate": 1.9137082913530493e-05, "loss": 0.8862, "step": 20830 }, { "epoch": 0.62, "learning_rate": 1.9122266342677653e-05, "loss": 0.9932, "step": 20840 }, { "epoch": 0.62, "learning_rate": 1.910744977182481e-05, "loss": 0.8002, "step": 20850 }, { "epoch": 0.62, "learning_rate": 1.9092633200971968e-05, "loss": 0.9051, "step": 20860 }, { "epoch": 0.62, "learning_rate": 1.907781663011913e-05, "loss": 0.9279, "step": 20870 }, { "epoch": 0.62, "learning_rate": 1.9063000059266282e-05, "loss": 0.8137, "step": 20880 }, { "epoch": 0.62, "learning_rate": 1.9048183488413443e-05, "loss": 0.8661, "step": 20890 }, { "epoch": 0.62, "learning_rate": 1.90333669175606e-05, "loss": 0.9539, "step": 20900 }, { "epoch": 0.62, "learning_rate": 1.9018550346707757e-05, "loss": 0.8177, "step": 20910 }, { "epoch": 0.62, "learning_rate": 1.9003733775854918e-05, "loss": 0.7602, "step": 20920 }, { "epoch": 0.62, "learning_rate": 1.8988917205002075e-05, "loss": 0.7922, "step": 20930 }, { "epoch": 0.62, "learning_rate": 1.8974100634149233e-05, "loss": 0.8677, "step": 20940 }, { "epoch": 0.62, "learning_rate": 1.8959284063296393e-05, "loss": 0.7631, "step": 20950 }, { "epoch": 0.62, "learning_rate": 1.894446749244355e-05, "loss": 0.8979, "step": 20960 }, { "epoch": 0.62, "learning_rate": 1.8929650921590708e-05, "loss": 0.8185, "step": 20970 }, { "epoch": 0.62, "learning_rate": 1.8914834350737868e-05, "loss": 0.892, "step": 20980 }, { "epoch": 0.62, "learning_rate": 1.8900017779885022e-05, "loss": 0.9548, "step": 20990 }, { "epoch": 0.62, "learning_rate": 1.8885201209032183e-05, "loss": 0.8945, "step": 21000 }, { "epoch": 0.62, "learning_rate": 1.8870384638179343e-05, "loss": 0.8548, "step": 21010 }, { "epoch": 0.62, "learning_rate": 1.8855568067326497e-05, "loss": 0.9585, "step": 21020 }, { "epoch": 0.62, "learning_rate": 1.8840751496473658e-05, "loss": 0.8668, "step": 21030 }, { "epoch": 0.62, "learning_rate": 1.8825934925620815e-05, "loss": 0.9046, "step": 21040 }, { "epoch": 0.62, "learning_rate": 1.8811118354767972e-05, "loss": 0.8567, "step": 21050 }, { "epoch": 0.62, "learning_rate": 1.8796301783915133e-05, "loss": 0.791, "step": 21060 }, { "epoch": 0.62, "learning_rate": 1.878148521306229e-05, "loss": 0.8112, "step": 21070 }, { "epoch": 0.62, "learning_rate": 1.8766668642209447e-05, "loss": 0.8138, "step": 21080 }, { "epoch": 0.62, "learning_rate": 1.8751852071356608e-05, "loss": 0.9992, "step": 21090 }, { "epoch": 0.63, "learning_rate": 1.8737035500503762e-05, "loss": 0.8576, "step": 21100 }, { "epoch": 0.63, "learning_rate": 1.8722218929650922e-05, "loss": 0.7868, "step": 21110 }, { "epoch": 0.63, "learning_rate": 1.8707402358798083e-05, "loss": 0.8043, "step": 21120 }, { "epoch": 0.63, "learning_rate": 1.8692585787945237e-05, "loss": 0.7189, "step": 21130 }, { "epoch": 0.63, "learning_rate": 1.8677769217092397e-05, "loss": 0.7823, "step": 21140 }, { "epoch": 0.63, "learning_rate": 1.8662952646239558e-05, "loss": 0.7672, "step": 21150 }, { "epoch": 0.63, "learning_rate": 1.8648136075386712e-05, "loss": 1.001, "step": 21160 }, { "epoch": 0.63, "learning_rate": 1.8633319504533873e-05, "loss": 0.7417, "step": 21170 }, { "epoch": 0.63, "learning_rate": 1.861850293368103e-05, "loss": 0.8655, "step": 21180 }, { "epoch": 0.63, "learning_rate": 1.8603686362828187e-05, "loss": 0.7832, "step": 21190 }, { "epoch": 0.63, "learning_rate": 1.8588869791975348e-05, "loss": 0.924, "step": 21200 }, { "epoch": 0.63, "learning_rate": 1.8574053221122505e-05, "loss": 0.9696, "step": 21210 }, { "epoch": 0.63, "learning_rate": 1.8559236650269662e-05, "loss": 0.883, "step": 21220 }, { "epoch": 0.63, "learning_rate": 1.8544420079416823e-05, "loss": 0.9714, "step": 21230 }, { "epoch": 0.63, "learning_rate": 1.8529603508563977e-05, "loss": 0.9046, "step": 21240 }, { "epoch": 0.63, "learning_rate": 1.8514786937711137e-05, "loss": 0.9156, "step": 21250 }, { "epoch": 0.63, "learning_rate": 1.8499970366858298e-05, "loss": 0.9072, "step": 21260 }, { "epoch": 0.63, "learning_rate": 1.848515379600545e-05, "loss": 0.9005, "step": 21270 }, { "epoch": 0.63, "learning_rate": 1.8470337225152612e-05, "loss": 0.8195, "step": 21280 }, { "epoch": 0.63, "learning_rate": 1.845552065429977e-05, "loss": 0.8322, "step": 21290 }, { "epoch": 0.63, "learning_rate": 1.8440704083446927e-05, "loss": 0.9191, "step": 21300 }, { "epoch": 0.63, "learning_rate": 1.8425887512594087e-05, "loss": 0.8699, "step": 21310 }, { "epoch": 0.63, "learning_rate": 1.8411070941741245e-05, "loss": 0.9575, "step": 21320 }, { "epoch": 0.63, "learning_rate": 1.8396254370888402e-05, "loss": 0.8498, "step": 21330 }, { "epoch": 0.63, "learning_rate": 1.8381437800035562e-05, "loss": 0.8958, "step": 21340 }, { "epoch": 0.63, "learning_rate": 1.8366621229182716e-05, "loss": 0.864, "step": 21350 }, { "epoch": 0.63, "learning_rate": 1.8351804658329877e-05, "loss": 0.9615, "step": 21360 }, { "epoch": 0.63, "learning_rate": 1.8336988087477038e-05, "loss": 0.788, "step": 21370 }, { "epoch": 0.63, "learning_rate": 1.832217151662419e-05, "loss": 0.8295, "step": 21380 }, { "epoch": 0.63, "learning_rate": 1.8307354945771352e-05, "loss": 0.8751, "step": 21390 }, { "epoch": 0.63, "learning_rate": 1.829253837491851e-05, "loss": 0.8718, "step": 21400 }, { "epoch": 0.63, "learning_rate": 1.8277721804065666e-05, "loss": 0.8962, "step": 21410 }, { "epoch": 0.63, "learning_rate": 1.8262905233212827e-05, "loss": 0.8029, "step": 21420 }, { "epoch": 0.64, "learning_rate": 1.8248088662359984e-05, "loss": 0.9122, "step": 21430 }, { "epoch": 0.64, "learning_rate": 1.823327209150714e-05, "loss": 0.7332, "step": 21440 }, { "epoch": 0.64, "learning_rate": 1.8218455520654302e-05, "loss": 0.8847, "step": 21450 }, { "epoch": 0.64, "learning_rate": 1.8203638949801456e-05, "loss": 0.8831, "step": 21460 }, { "epoch": 0.64, "learning_rate": 1.8188822378948617e-05, "loss": 0.9425, "step": 21470 }, { "epoch": 0.64, "learning_rate": 1.8174005808095777e-05, "loss": 0.7179, "step": 21480 }, { "epoch": 0.64, "learning_rate": 1.815918923724293e-05, "loss": 0.9403, "step": 21490 }, { "epoch": 0.64, "learning_rate": 1.8144372666390092e-05, "loss": 0.8041, "step": 21500 }, { "epoch": 0.64, "learning_rate": 1.812955609553725e-05, "loss": 0.9635, "step": 21510 }, { "epoch": 0.64, "learning_rate": 1.8114739524684406e-05, "loss": 0.8108, "step": 21520 }, { "epoch": 0.64, "learning_rate": 1.8099922953831567e-05, "loss": 0.9086, "step": 21530 }, { "epoch": 0.64, "learning_rate": 1.8085106382978724e-05, "loss": 0.7554, "step": 21540 }, { "epoch": 0.64, "learning_rate": 1.807028981212588e-05, "loss": 0.999, "step": 21550 }, { "epoch": 0.64, "learning_rate": 1.8055473241273042e-05, "loss": 0.9111, "step": 21560 }, { "epoch": 0.64, "learning_rate": 1.80406566704202e-05, "loss": 1.0083, "step": 21570 }, { "epoch": 0.64, "learning_rate": 1.8025840099567356e-05, "loss": 0.8531, "step": 21580 }, { "epoch": 0.64, "learning_rate": 1.8011023528714517e-05, "loss": 0.8125, "step": 21590 }, { "epoch": 0.64, "learning_rate": 1.799620695786167e-05, "loss": 0.8432, "step": 21600 }, { "epoch": 0.64, "learning_rate": 1.798139038700883e-05, "loss": 0.9143, "step": 21610 }, { "epoch": 0.64, "learning_rate": 1.7966573816155992e-05, "loss": 0.8225, "step": 21620 }, { "epoch": 0.64, "learning_rate": 1.7951757245303146e-05, "loss": 0.8217, "step": 21630 }, { "epoch": 0.64, "learning_rate": 1.7936940674450307e-05, "loss": 0.8318, "step": 21640 }, { "epoch": 0.64, "learning_rate": 1.7922124103597464e-05, "loss": 0.8802, "step": 21650 }, { "epoch": 0.64, "learning_rate": 1.790730753274462e-05, "loss": 0.6746, "step": 21660 }, { "epoch": 0.64, "learning_rate": 1.789249096189178e-05, "loss": 0.8367, "step": 21670 }, { "epoch": 0.64, "learning_rate": 1.787767439103894e-05, "loss": 0.8144, "step": 21680 }, { "epoch": 0.64, "learning_rate": 1.7862857820186096e-05, "loss": 0.8638, "step": 21690 }, { "epoch": 0.64, "learning_rate": 1.7848041249333257e-05, "loss": 0.9311, "step": 21700 }, { "epoch": 0.64, "learning_rate": 1.7833224678480414e-05, "loss": 0.8604, "step": 21710 }, { "epoch": 0.64, "learning_rate": 1.781840810762757e-05, "loss": 0.7928, "step": 21720 }, { "epoch": 0.64, "learning_rate": 1.7803591536774732e-05, "loss": 0.7322, "step": 21730 }, { "epoch": 0.64, "learning_rate": 1.7788774965921886e-05, "loss": 0.824, "step": 21740 }, { "epoch": 0.64, "learning_rate": 1.7773958395069046e-05, "loss": 0.965, "step": 21750 }, { "epoch": 0.64, "learning_rate": 1.7759141824216203e-05, "loss": 0.8722, "step": 21760 }, { "epoch": 0.65, "learning_rate": 1.774432525336336e-05, "loss": 0.8038, "step": 21770 }, { "epoch": 0.65, "learning_rate": 1.772950868251052e-05, "loss": 0.8402, "step": 21780 }, { "epoch": 0.65, "learning_rate": 1.771469211165768e-05, "loss": 0.9165, "step": 21790 }, { "epoch": 0.65, "learning_rate": 1.7699875540804836e-05, "loss": 0.8432, "step": 21800 }, { "epoch": 0.65, "learning_rate": 1.7685058969951996e-05, "loss": 0.8241, "step": 21810 }, { "epoch": 0.65, "learning_rate": 1.7670242399099154e-05, "loss": 0.8838, "step": 21820 }, { "epoch": 0.65, "learning_rate": 1.765542582824631e-05, "loss": 0.8166, "step": 21830 }, { "epoch": 0.65, "learning_rate": 1.764060925739347e-05, "loss": 0.8159, "step": 21840 }, { "epoch": 0.65, "learning_rate": 1.762579268654063e-05, "loss": 0.8546, "step": 21850 }, { "epoch": 0.65, "learning_rate": 1.7610976115687786e-05, "loss": 1.0096, "step": 21860 }, { "epoch": 0.65, "learning_rate": 1.7596159544834943e-05, "loss": 0.821, "step": 21870 }, { "epoch": 0.65, "learning_rate": 1.75813429739821e-05, "loss": 0.9978, "step": 21880 }, { "epoch": 0.65, "learning_rate": 1.756652640312926e-05, "loss": 0.9032, "step": 21890 }, { "epoch": 0.65, "learning_rate": 1.7551709832276418e-05, "loss": 0.6687, "step": 21900 }, { "epoch": 0.65, "learning_rate": 1.7536893261423576e-05, "loss": 0.9232, "step": 21910 }, { "epoch": 0.65, "learning_rate": 1.7522076690570736e-05, "loss": 0.8338, "step": 21920 }, { "epoch": 0.65, "learning_rate": 1.7507260119717893e-05, "loss": 0.7658, "step": 21930 }, { "epoch": 0.65, "learning_rate": 1.749244354886505e-05, "loss": 0.7666, "step": 21940 }, { "epoch": 0.65, "learning_rate": 1.747762697801221e-05, "loss": 0.783, "step": 21950 }, { "epoch": 0.65, "learning_rate": 1.746281040715937e-05, "loss": 0.7576, "step": 21960 }, { "epoch": 0.65, "learning_rate": 1.7447993836306526e-05, "loss": 0.8817, "step": 21970 }, { "epoch": 0.65, "learning_rate": 1.7433177265453683e-05, "loss": 0.8559, "step": 21980 }, { "epoch": 0.65, "learning_rate": 1.7418360694600844e-05, "loss": 0.9449, "step": 21990 }, { "epoch": 0.65, "learning_rate": 1.7403544123748e-05, "loss": 0.9124, "step": 22000 }, { "epoch": 0.65, "learning_rate": 1.7388727552895158e-05, "loss": 1.0082, "step": 22010 }, { "epoch": 0.65, "learning_rate": 1.7373910982042315e-05, "loss": 0.7337, "step": 22020 }, { "epoch": 0.65, "learning_rate": 1.7359094411189476e-05, "loss": 0.8386, "step": 22030 }, { "epoch": 0.65, "learning_rate": 1.7344277840336633e-05, "loss": 0.825, "step": 22040 }, { "epoch": 0.65, "learning_rate": 1.732946126948379e-05, "loss": 0.7686, "step": 22050 }, { "epoch": 0.65, "learning_rate": 1.731464469863095e-05, "loss": 0.9501, "step": 22060 }, { "epoch": 0.65, "learning_rate": 1.7299828127778108e-05, "loss": 0.8041, "step": 22070 }, { "epoch": 0.65, "learning_rate": 1.7285011556925265e-05, "loss": 0.8971, "step": 22080 }, { "epoch": 0.65, "learning_rate": 1.7270194986072426e-05, "loss": 0.8153, "step": 22090 }, { "epoch": 0.65, "learning_rate": 1.7255378415219583e-05, "loss": 0.8948, "step": 22100 }, { "epoch": 0.66, "learning_rate": 1.724056184436674e-05, "loss": 0.9888, "step": 22110 }, { "epoch": 0.66, "learning_rate": 1.7225745273513898e-05, "loss": 0.7854, "step": 22120 }, { "epoch": 0.66, "learning_rate": 1.721092870266106e-05, "loss": 0.8631, "step": 22130 }, { "epoch": 0.66, "learning_rate": 1.7196112131808216e-05, "loss": 0.9226, "step": 22140 }, { "epoch": 0.66, "learning_rate": 1.7181295560955373e-05, "loss": 0.876, "step": 22150 }, { "epoch": 0.66, "learning_rate": 1.716647899010253e-05, "loss": 0.8988, "step": 22160 }, { "epoch": 0.66, "learning_rate": 1.715166241924969e-05, "loss": 0.8474, "step": 22170 }, { "epoch": 0.66, "learning_rate": 1.7136845848396848e-05, "loss": 0.9297, "step": 22180 }, { "epoch": 0.66, "learning_rate": 1.7122029277544005e-05, "loss": 0.8784, "step": 22190 }, { "epoch": 0.66, "learning_rate": 1.7107212706691166e-05, "loss": 0.7333, "step": 22200 }, { "epoch": 0.66, "learning_rate": 1.7092396135838323e-05, "loss": 0.889, "step": 22210 }, { "epoch": 0.66, "learning_rate": 1.707757956498548e-05, "loss": 0.8144, "step": 22220 }, { "epoch": 0.66, "learning_rate": 1.7062762994132637e-05, "loss": 0.9886, "step": 22230 }, { "epoch": 0.66, "learning_rate": 1.7047946423279798e-05, "loss": 0.9524, "step": 22240 }, { "epoch": 0.66, "learning_rate": 1.7033129852426955e-05, "loss": 0.9731, "step": 22250 }, { "epoch": 0.66, "learning_rate": 1.7018313281574113e-05, "loss": 0.7887, "step": 22260 }, { "epoch": 0.66, "learning_rate": 1.7003496710721273e-05, "loss": 0.81, "step": 22270 }, { "epoch": 0.66, "learning_rate": 1.698868013986843e-05, "loss": 0.9525, "step": 22280 }, { "epoch": 0.66, "learning_rate": 1.6973863569015588e-05, "loss": 0.772, "step": 22290 }, { "epoch": 0.66, "learning_rate": 1.6959046998162745e-05, "loss": 0.7318, "step": 22300 }, { "epoch": 0.66, "learning_rate": 1.6944230427309905e-05, "loss": 0.9601, "step": 22310 }, { "epoch": 0.66, "learning_rate": 1.6929413856457063e-05, "loss": 0.8658, "step": 22320 }, { "epoch": 0.66, "learning_rate": 1.691459728560422e-05, "loss": 0.9724, "step": 22330 }, { "epoch": 0.66, "learning_rate": 1.6899780714751377e-05, "loss": 0.9571, "step": 22340 }, { "epoch": 0.66, "learning_rate": 1.6884964143898538e-05, "loss": 0.8063, "step": 22350 }, { "epoch": 0.66, "learning_rate": 1.6870147573045695e-05, "loss": 0.82, "step": 22360 }, { "epoch": 0.66, "learning_rate": 1.6855331002192852e-05, "loss": 0.921, "step": 22370 }, { "epoch": 0.66, "learning_rate": 1.6840514431340013e-05, "loss": 0.8703, "step": 22380 }, { "epoch": 0.66, "learning_rate": 1.682569786048717e-05, "loss": 1.0329, "step": 22390 }, { "epoch": 0.66, "learning_rate": 1.6810881289634327e-05, "loss": 0.9818, "step": 22400 }, { "epoch": 0.66, "learning_rate": 1.6796064718781488e-05, "loss": 0.9763, "step": 22410 }, { "epoch": 0.66, "learning_rate": 1.6781248147928645e-05, "loss": 0.9212, "step": 22420 }, { "epoch": 0.66, "learning_rate": 1.6766431577075802e-05, "loss": 0.7776, "step": 22430 }, { "epoch": 0.66, "learning_rate": 1.675161500622296e-05, "loss": 0.8514, "step": 22440 }, { "epoch": 0.67, "learning_rate": 1.6736798435370117e-05, "loss": 1.021, "step": 22450 }, { "epoch": 0.67, "learning_rate": 1.6721981864517278e-05, "loss": 0.9384, "step": 22460 }, { "epoch": 0.67, "learning_rate": 1.6707165293664435e-05, "loss": 0.7759, "step": 22470 }, { "epoch": 0.67, "learning_rate": 1.6692348722811592e-05, "loss": 0.8767, "step": 22480 }, { "epoch": 0.67, "learning_rate": 1.6677532151958753e-05, "loss": 0.7918, "step": 22490 }, { "epoch": 0.67, "learning_rate": 1.666271558110591e-05, "loss": 0.8851, "step": 22500 }, { "epoch": 0.67, "learning_rate": 1.6647899010253067e-05, "loss": 0.9851, "step": 22510 }, { "epoch": 0.67, "learning_rate": 1.6633082439400228e-05, "loss": 0.9906, "step": 22520 }, { "epoch": 0.67, "learning_rate": 1.6618265868547385e-05, "loss": 0.9234, "step": 22530 }, { "epoch": 0.67, "learning_rate": 1.6603449297694542e-05, "loss": 0.8392, "step": 22540 }, { "epoch": 0.67, "learning_rate": 1.6588632726841703e-05, "loss": 0.8439, "step": 22550 }, { "epoch": 0.67, "learning_rate": 1.6573816155988857e-05, "loss": 0.8402, "step": 22560 }, { "epoch": 0.67, "learning_rate": 1.6558999585136017e-05, "loss": 0.7782, "step": 22570 }, { "epoch": 0.67, "learning_rate": 1.6544183014283174e-05, "loss": 0.9362, "step": 22580 }, { "epoch": 0.67, "learning_rate": 1.6529366443430332e-05, "loss": 0.8373, "step": 22590 }, { "epoch": 0.67, "learning_rate": 1.6514549872577492e-05, "loss": 0.8784, "step": 22600 }, { "epoch": 0.67, "learning_rate": 1.649973330172465e-05, "loss": 0.678, "step": 22610 }, { "epoch": 0.67, "learning_rate": 1.6484916730871807e-05, "loss": 0.7156, "step": 22620 }, { "epoch": 0.67, "learning_rate": 1.6470100160018967e-05, "loss": 0.7588, "step": 22630 }, { "epoch": 0.67, "learning_rate": 1.6455283589166125e-05, "loss": 0.9054, "step": 22640 }, { "epoch": 0.67, "learning_rate": 1.6440467018313282e-05, "loss": 0.7793, "step": 22650 }, { "epoch": 0.67, "learning_rate": 1.6425650447460443e-05, "loss": 0.7883, "step": 22660 }, { "epoch": 0.67, "learning_rate": 1.64108338766076e-05, "loss": 0.8559, "step": 22670 }, { "epoch": 0.67, "learning_rate": 1.6396017305754757e-05, "loss": 0.7894, "step": 22680 }, { "epoch": 0.67, "learning_rate": 1.6381200734901918e-05, "loss": 0.7528, "step": 22690 }, { "epoch": 0.67, "learning_rate": 1.636638416404907e-05, "loss": 0.8207, "step": 22700 }, { "epoch": 0.67, "learning_rate": 1.6351567593196232e-05, "loss": 1.0498, "step": 22710 }, { "epoch": 0.67, "learning_rate": 1.633675102234339e-05, "loss": 0.7424, "step": 22720 }, { "epoch": 0.67, "learning_rate": 1.6321934451490547e-05, "loss": 1.0651, "step": 22730 }, { "epoch": 0.67, "learning_rate": 1.6307117880637707e-05, "loss": 0.8204, "step": 22740 }, { "epoch": 0.67, "learning_rate": 1.6292301309784864e-05, "loss": 0.8968, "step": 22750 }, { "epoch": 0.67, "learning_rate": 1.627748473893202e-05, "loss": 0.8417, "step": 22760 }, { "epoch": 0.67, "learning_rate": 1.6262668168079182e-05, "loss": 0.7999, "step": 22770 }, { "epoch": 0.68, "learning_rate": 1.624785159722634e-05, "loss": 0.8191, "step": 22780 }, { "epoch": 0.68, "learning_rate": 1.6233035026373497e-05, "loss": 0.8413, "step": 22790 }, { "epoch": 0.68, "learning_rate": 1.6218218455520657e-05, "loss": 0.9028, "step": 22800 }, { "epoch": 0.68, "learning_rate": 1.620340188466781e-05, "loss": 0.9951, "step": 22810 }, { "epoch": 0.68, "learning_rate": 1.6188585313814972e-05, "loss": 0.8284, "step": 22820 }, { "epoch": 0.68, "learning_rate": 1.6173768742962132e-05, "loss": 0.8921, "step": 22830 }, { "epoch": 0.68, "learning_rate": 1.6158952172109286e-05, "loss": 0.7294, "step": 22840 }, { "epoch": 0.68, "learning_rate": 1.6144135601256447e-05, "loss": 0.892, "step": 22850 }, { "epoch": 0.68, "learning_rate": 1.6129319030403604e-05, "loss": 0.7497, "step": 22860 }, { "epoch": 0.68, "learning_rate": 1.611450245955076e-05, "loss": 0.8052, "step": 22870 }, { "epoch": 0.68, "learning_rate": 1.6099685888697922e-05, "loss": 0.7653, "step": 22880 }, { "epoch": 0.68, "learning_rate": 1.608486931784508e-05, "loss": 1.0207, "step": 22890 }, { "epoch": 0.68, "learning_rate": 1.6070052746992236e-05, "loss": 0.7666, "step": 22900 }, { "epoch": 0.68, "learning_rate": 1.6055236176139397e-05, "loss": 0.7809, "step": 22910 }, { "epoch": 0.68, "learning_rate": 1.604041960528655e-05, "loss": 0.8804, "step": 22920 }, { "epoch": 0.68, "learning_rate": 1.602560303443371e-05, "loss": 0.8108, "step": 22930 }, { "epoch": 0.68, "learning_rate": 1.6010786463580872e-05, "loss": 0.8421, "step": 22940 }, { "epoch": 0.68, "learning_rate": 1.5995969892728026e-05, "loss": 0.9035, "step": 22950 }, { "epoch": 0.68, "learning_rate": 1.5981153321875187e-05, "loss": 0.7245, "step": 22960 }, { "epoch": 0.68, "learning_rate": 1.5966336751022344e-05, "loss": 0.8261, "step": 22970 }, { "epoch": 0.68, "learning_rate": 1.59515201801695e-05, "loss": 0.9342, "step": 22980 }, { "epoch": 0.68, "learning_rate": 1.593670360931666e-05, "loss": 0.7752, "step": 22990 }, { "epoch": 0.68, "learning_rate": 1.592188703846382e-05, "loss": 0.8349, "step": 23000 }, { "epoch": 0.68, "learning_rate": 1.5907070467610976e-05, "loss": 0.7854, "step": 23010 }, { "epoch": 0.68, "learning_rate": 1.5892253896758137e-05, "loss": 0.866, "step": 23020 }, { "epoch": 0.68, "learning_rate": 1.587743732590529e-05, "loss": 0.8379, "step": 23030 }, { "epoch": 0.68, "learning_rate": 1.586262075505245e-05, "loss": 0.771, "step": 23040 }, { "epoch": 0.68, "learning_rate": 1.5847804184199612e-05, "loss": 0.7034, "step": 23050 }, { "epoch": 0.68, "learning_rate": 1.5832987613346766e-05, "loss": 0.8434, "step": 23060 }, { "epoch": 0.68, "learning_rate": 1.5818171042493926e-05, "loss": 0.7547, "step": 23070 }, { "epoch": 0.68, "learning_rate": 1.5803354471641087e-05, "loss": 0.8964, "step": 23080 }, { "epoch": 0.68, "learning_rate": 1.578853790078824e-05, "loss": 0.8574, "step": 23090 }, { "epoch": 0.68, "learning_rate": 1.57737213299354e-05, "loss": 0.8381, "step": 23100 }, { "epoch": 0.68, "learning_rate": 1.575890475908256e-05, "loss": 0.8343, "step": 23110 }, { "epoch": 0.69, "learning_rate": 1.5744088188229716e-05, "loss": 0.8274, "step": 23120 }, { "epoch": 0.69, "learning_rate": 1.5729271617376876e-05, "loss": 0.8053, "step": 23130 }, { "epoch": 0.69, "learning_rate": 1.571445504652403e-05, "loss": 0.8456, "step": 23140 }, { "epoch": 0.69, "learning_rate": 1.569963847567119e-05, "loss": 0.969, "step": 23150 }, { "epoch": 0.69, "learning_rate": 1.568482190481835e-05, "loss": 0.8612, "step": 23160 }, { "epoch": 0.69, "learning_rate": 1.5670005333965505e-05, "loss": 0.8344, "step": 23170 }, { "epoch": 0.69, "learning_rate": 1.5655188763112666e-05, "loss": 0.9009, "step": 23180 }, { "epoch": 0.69, "learning_rate": 1.5640372192259827e-05, "loss": 0.9595, "step": 23190 }, { "epoch": 0.69, "learning_rate": 1.562555562140698e-05, "loss": 0.8761, "step": 23200 }, { "epoch": 0.69, "learning_rate": 1.561073905055414e-05, "loss": 0.8111, "step": 23210 }, { "epoch": 0.69, "learning_rate": 1.55959224797013e-05, "loss": 0.8634, "step": 23220 }, { "epoch": 0.69, "learning_rate": 1.5581105908848456e-05, "loss": 0.835, "step": 23230 }, { "epoch": 0.69, "learning_rate": 1.5566289337995616e-05, "loss": 0.9587, "step": 23240 }, { "epoch": 0.69, "learning_rate": 1.5551472767142773e-05, "loss": 0.7974, "step": 23250 }, { "epoch": 0.69, "learning_rate": 1.553665619628993e-05, "loss": 0.7252, "step": 23260 }, { "epoch": 0.69, "learning_rate": 1.552183962543709e-05, "loss": 1.0557, "step": 23270 }, { "epoch": 0.69, "learning_rate": 1.5507023054584245e-05, "loss": 1.0059, "step": 23280 }, { "epoch": 0.69, "learning_rate": 1.5492206483731406e-05, "loss": 0.8877, "step": 23290 }, { "epoch": 0.69, "learning_rate": 1.5477389912878566e-05, "loss": 0.8916, "step": 23300 }, { "epoch": 0.69, "learning_rate": 1.546257334202572e-05, "loss": 0.8149, "step": 23310 }, { "epoch": 0.69, "learning_rate": 1.544775677117288e-05, "loss": 0.8561, "step": 23320 }, { "epoch": 0.69, "learning_rate": 1.5432940200320038e-05, "loss": 0.8927, "step": 23330 }, { "epoch": 0.69, "learning_rate": 1.5418123629467195e-05, "loss": 0.7797, "step": 23340 }, { "epoch": 0.69, "learning_rate": 1.5403307058614356e-05, "loss": 0.7539, "step": 23350 }, { "epoch": 0.69, "learning_rate": 1.5388490487761513e-05, "loss": 0.9578, "step": 23360 }, { "epoch": 0.69, "learning_rate": 1.537367391690867e-05, "loss": 0.9525, "step": 23370 }, { "epoch": 0.69, "learning_rate": 1.535885734605583e-05, "loss": 0.8471, "step": 23380 }, { "epoch": 0.69, "learning_rate": 1.5344040775202988e-05, "loss": 0.864, "step": 23390 }, { "epoch": 0.69, "learning_rate": 1.5329224204350145e-05, "loss": 0.8827, "step": 23400 }, { "epoch": 0.69, "learning_rate": 1.5314407633497306e-05, "loss": 0.8778, "step": 23410 }, { "epoch": 0.69, "learning_rate": 1.529959106264446e-05, "loss": 0.8729, "step": 23420 }, { "epoch": 0.69, "learning_rate": 1.528477449179162e-05, "loss": 0.954, "step": 23430 }, { "epoch": 0.69, "learning_rate": 1.5269957920938778e-05, "loss": 0.7923, "step": 23440 }, { "epoch": 0.69, "learning_rate": 1.5255141350085935e-05, "loss": 0.8829, "step": 23450 }, { "epoch": 0.7, "learning_rate": 1.5240324779233096e-05, "loss": 0.865, "step": 23460 }, { "epoch": 0.7, "learning_rate": 1.5225508208380255e-05, "loss": 0.8871, "step": 23470 }, { "epoch": 0.7, "learning_rate": 1.521069163752741e-05, "loss": 0.8585, "step": 23480 }, { "epoch": 0.7, "learning_rate": 1.5195875066674569e-05, "loss": 0.9678, "step": 23490 }, { "epoch": 0.7, "learning_rate": 1.518105849582173e-05, "loss": 0.8421, "step": 23500 }, { "epoch": 0.7, "learning_rate": 1.5166241924968885e-05, "loss": 0.7487, "step": 23510 }, { "epoch": 0.7, "learning_rate": 1.5151425354116044e-05, "loss": 0.86, "step": 23520 }, { "epoch": 0.7, "learning_rate": 1.5136608783263203e-05, "loss": 0.8443, "step": 23530 }, { "epoch": 0.7, "learning_rate": 1.512179221241036e-05, "loss": 0.8046, "step": 23540 }, { "epoch": 0.7, "learning_rate": 1.510697564155752e-05, "loss": 0.9261, "step": 23550 }, { "epoch": 0.7, "learning_rate": 1.5092159070704676e-05, "loss": 0.8575, "step": 23560 }, { "epoch": 0.7, "learning_rate": 1.5077342499851835e-05, "loss": 0.8521, "step": 23570 }, { "epoch": 0.7, "learning_rate": 1.5062525928998994e-05, "loss": 0.8765, "step": 23580 }, { "epoch": 0.7, "learning_rate": 1.504770935814615e-05, "loss": 0.9086, "step": 23590 }, { "epoch": 0.7, "learning_rate": 1.5032892787293309e-05, "loss": 0.8513, "step": 23600 }, { "epoch": 0.7, "learning_rate": 1.501807621644047e-05, "loss": 0.9946, "step": 23610 }, { "epoch": 0.7, "learning_rate": 1.5003259645587625e-05, "loss": 0.9462, "step": 23620 }, { "epoch": 0.7, "learning_rate": 1.4988443074734784e-05, "loss": 0.8317, "step": 23630 }, { "epoch": 0.7, "learning_rate": 1.4973626503881943e-05, "loss": 0.9471, "step": 23640 }, { "epoch": 0.7, "learning_rate": 1.49588099330291e-05, "loss": 0.8202, "step": 23650 }, { "epoch": 0.7, "learning_rate": 1.4943993362176259e-05, "loss": 0.8416, "step": 23660 }, { "epoch": 0.7, "learning_rate": 1.4929176791323418e-05, "loss": 0.8742, "step": 23670 }, { "epoch": 0.7, "learning_rate": 1.4914360220470575e-05, "loss": 0.787, "step": 23680 }, { "epoch": 0.7, "learning_rate": 1.4899543649617734e-05, "loss": 0.8054, "step": 23690 }, { "epoch": 0.7, "learning_rate": 1.488472707876489e-05, "loss": 0.8448, "step": 23700 }, { "epoch": 0.7, "learning_rate": 1.4869910507912048e-05, "loss": 0.7907, "step": 23710 }, { "epoch": 0.7, "learning_rate": 1.4855093937059209e-05, "loss": 0.7657, "step": 23720 }, { "epoch": 0.7, "learning_rate": 1.4840277366206365e-05, "loss": 0.7114, "step": 23730 }, { "epoch": 0.7, "learning_rate": 1.4825460795353524e-05, "loss": 0.9068, "step": 23740 }, { "epoch": 0.7, "learning_rate": 1.4810644224500682e-05, "loss": 0.7937, "step": 23750 }, { "epoch": 0.7, "learning_rate": 1.479582765364784e-05, "loss": 0.7901, "step": 23760 }, { "epoch": 0.7, "learning_rate": 1.4781011082794999e-05, "loss": 0.7857, "step": 23770 }, { "epoch": 0.7, "learning_rate": 1.4766194511942158e-05, "loss": 0.8562, "step": 23780 }, { "epoch": 0.7, "learning_rate": 1.4751377941089315e-05, "loss": 0.7484, "step": 23790 }, { "epoch": 0.71, "learning_rate": 1.4736561370236474e-05, "loss": 0.8844, "step": 23800 }, { "epoch": 0.71, "learning_rate": 1.4721744799383633e-05, "loss": 0.9456, "step": 23810 }, { "epoch": 0.71, "learning_rate": 1.470692822853079e-05, "loss": 0.8661, "step": 23820 }, { "epoch": 0.71, "learning_rate": 1.4692111657677949e-05, "loss": 0.8893, "step": 23830 }, { "epoch": 0.71, "learning_rate": 1.4677295086825104e-05, "loss": 0.8261, "step": 23840 }, { "epoch": 0.71, "learning_rate": 1.4662478515972263e-05, "loss": 0.9312, "step": 23850 }, { "epoch": 0.71, "learning_rate": 1.4647661945119422e-05, "loss": 0.764, "step": 23860 }, { "epoch": 0.71, "learning_rate": 1.463284537426658e-05, "loss": 0.7764, "step": 23870 }, { "epoch": 0.71, "learning_rate": 1.4618028803413738e-05, "loss": 0.809, "step": 23880 }, { "epoch": 0.71, "learning_rate": 1.4603212232560897e-05, "loss": 0.8024, "step": 23890 }, { "epoch": 0.71, "learning_rate": 1.4588395661708055e-05, "loss": 1.019, "step": 23900 }, { "epoch": 0.71, "learning_rate": 1.4573579090855213e-05, "loss": 0.8105, "step": 23910 }, { "epoch": 0.71, "learning_rate": 1.4558762520002372e-05, "loss": 0.8173, "step": 23920 }, { "epoch": 0.71, "learning_rate": 1.454394594914953e-05, "loss": 0.8577, "step": 23930 }, { "epoch": 0.71, "learning_rate": 1.4529129378296689e-05, "loss": 0.86, "step": 23940 }, { "epoch": 0.71, "learning_rate": 1.4514312807443847e-05, "loss": 0.8168, "step": 23950 }, { "epoch": 0.71, "learning_rate": 1.4499496236591003e-05, "loss": 0.7918, "step": 23960 }, { "epoch": 0.71, "learning_rate": 1.4484679665738164e-05, "loss": 0.8372, "step": 23970 }, { "epoch": 0.71, "learning_rate": 1.446986309488532e-05, "loss": 0.8965, "step": 23980 }, { "epoch": 0.71, "learning_rate": 1.4455046524032478e-05, "loss": 0.7732, "step": 23990 }, { "epoch": 0.71, "learning_rate": 1.4440229953179637e-05, "loss": 0.9507, "step": 24000 }, { "epoch": 0.71, "learning_rate": 1.4425413382326794e-05, "loss": 0.8227, "step": 24010 }, { "epoch": 0.71, "learning_rate": 1.4410596811473953e-05, "loss": 0.7898, "step": 24020 }, { "epoch": 0.71, "learning_rate": 1.4395780240621112e-05, "loss": 0.7134, "step": 24030 }, { "epoch": 0.71, "learning_rate": 1.438096366976827e-05, "loss": 0.7505, "step": 24040 }, { "epoch": 0.71, "learning_rate": 1.4366147098915428e-05, "loss": 0.8048, "step": 24050 }, { "epoch": 0.71, "learning_rate": 1.4351330528062587e-05, "loss": 0.7904, "step": 24060 }, { "epoch": 0.71, "learning_rate": 1.4336513957209743e-05, "loss": 0.837, "step": 24070 }, { "epoch": 0.71, "learning_rate": 1.4321697386356903e-05, "loss": 0.9069, "step": 24080 }, { "epoch": 0.71, "learning_rate": 1.4306880815504062e-05, "loss": 0.9193, "step": 24090 }, { "epoch": 0.71, "learning_rate": 1.4292064244651218e-05, "loss": 0.7904, "step": 24100 }, { "epoch": 0.71, "learning_rate": 1.4277247673798377e-05, "loss": 0.811, "step": 24110 }, { "epoch": 0.71, "learning_rate": 1.4262431102945534e-05, "loss": 0.6747, "step": 24120 }, { "epoch": 0.72, "learning_rate": 1.4247614532092693e-05, "loss": 0.8461, "step": 24130 }, { "epoch": 0.72, "learning_rate": 1.4232797961239852e-05, "loss": 0.9374, "step": 24140 }, { "epoch": 0.72, "learning_rate": 1.4217981390387009e-05, "loss": 0.8721, "step": 24150 }, { "epoch": 0.72, "learning_rate": 1.4203164819534168e-05, "loss": 0.8494, "step": 24160 }, { "epoch": 0.72, "learning_rate": 1.4188348248681327e-05, "loss": 0.867, "step": 24170 }, { "epoch": 0.72, "learning_rate": 1.4173531677828482e-05, "loss": 0.7724, "step": 24180 }, { "epoch": 0.72, "learning_rate": 1.4158715106975643e-05, "loss": 0.8602, "step": 24190 }, { "epoch": 0.72, "learning_rate": 1.4143898536122802e-05, "loss": 0.8801, "step": 24200 }, { "epoch": 0.72, "learning_rate": 1.4129081965269958e-05, "loss": 0.9309, "step": 24210 }, { "epoch": 0.72, "learning_rate": 1.4114265394417116e-05, "loss": 0.7233, "step": 24220 }, { "epoch": 0.72, "learning_rate": 1.4099448823564277e-05, "loss": 0.8141, "step": 24230 }, { "epoch": 0.72, "learning_rate": 1.4084632252711433e-05, "loss": 0.9368, "step": 24240 }, { "epoch": 0.72, "learning_rate": 1.4069815681858592e-05, "loss": 1.0427, "step": 24250 }, { "epoch": 0.72, "learning_rate": 1.4054999111005749e-05, "loss": 0.7932, "step": 24260 }, { "epoch": 0.72, "learning_rate": 1.4040182540152908e-05, "loss": 0.7997, "step": 24270 }, { "epoch": 0.72, "learning_rate": 1.4025365969300067e-05, "loss": 0.7913, "step": 24280 }, { "epoch": 0.72, "learning_rate": 1.4010549398447224e-05, "loss": 0.6766, "step": 24290 }, { "epoch": 0.72, "learning_rate": 1.3995732827594383e-05, "loss": 0.9067, "step": 24300 }, { "epoch": 0.72, "learning_rate": 1.3980916256741542e-05, "loss": 0.8415, "step": 24310 }, { "epoch": 0.72, "learning_rate": 1.3966099685888697e-05, "loss": 0.7203, "step": 24320 }, { "epoch": 0.72, "learning_rate": 1.3951283115035856e-05, "loss": 0.7764, "step": 24330 }, { "epoch": 0.72, "learning_rate": 1.3936466544183017e-05, "loss": 0.8258, "step": 24340 }, { "epoch": 0.72, "learning_rate": 1.3921649973330172e-05, "loss": 0.8167, "step": 24350 }, { "epoch": 0.72, "learning_rate": 1.3906833402477331e-05, "loss": 0.8011, "step": 24360 }, { "epoch": 0.72, "learning_rate": 1.389201683162449e-05, "loss": 0.8363, "step": 24370 }, { "epoch": 0.72, "learning_rate": 1.3877200260771647e-05, "loss": 0.7716, "step": 24380 }, { "epoch": 0.72, "learning_rate": 1.3862383689918806e-05, "loss": 0.8876, "step": 24390 }, { "epoch": 0.72, "learning_rate": 1.3847567119065964e-05, "loss": 0.7226, "step": 24400 }, { "epoch": 0.72, "learning_rate": 1.3832750548213123e-05, "loss": 0.7654, "step": 24410 }, { "epoch": 0.72, "learning_rate": 1.3817933977360281e-05, "loss": 0.8115, "step": 24420 }, { "epoch": 0.72, "learning_rate": 1.3803117406507437e-05, "loss": 0.8248, "step": 24430 }, { "epoch": 0.72, "learning_rate": 1.3788300835654596e-05, "loss": 0.8159, "step": 24440 }, { "epoch": 0.72, "learning_rate": 1.3773484264801757e-05, "loss": 0.7409, "step": 24450 }, { "epoch": 0.72, "learning_rate": 1.3758667693948912e-05, "loss": 0.8088, "step": 24460 }, { "epoch": 0.73, "learning_rate": 1.3743851123096071e-05, "loss": 0.7538, "step": 24470 }, { "epoch": 0.73, "learning_rate": 1.372903455224323e-05, "loss": 0.9526, "step": 24480 }, { "epoch": 0.73, "learning_rate": 1.3714217981390387e-05, "loss": 0.9346, "step": 24490 }, { "epoch": 0.73, "learning_rate": 1.3699401410537546e-05, "loss": 0.955, "step": 24500 }, { "epoch": 0.73, "learning_rate": 1.3684584839684705e-05, "loss": 0.8281, "step": 24510 }, { "epoch": 0.73, "learning_rate": 1.3669768268831862e-05, "loss": 0.7606, "step": 24520 }, { "epoch": 0.73, "learning_rate": 1.3654951697979021e-05, "loss": 0.8452, "step": 24530 }, { "epoch": 0.73, "learning_rate": 1.3640135127126177e-05, "loss": 0.7429, "step": 24540 }, { "epoch": 0.73, "learning_rate": 1.3625318556273337e-05, "loss": 0.8333, "step": 24550 }, { "epoch": 0.73, "learning_rate": 1.3610501985420496e-05, "loss": 0.8538, "step": 24560 }, { "epoch": 0.73, "learning_rate": 1.3595685414567652e-05, "loss": 0.8278, "step": 24570 }, { "epoch": 0.73, "learning_rate": 1.358086884371481e-05, "loss": 0.8386, "step": 24580 }, { "epoch": 0.73, "learning_rate": 1.356605227286197e-05, "loss": 0.9109, "step": 24590 }, { "epoch": 0.73, "learning_rate": 1.3551235702009127e-05, "loss": 0.66, "step": 24600 }, { "epoch": 0.73, "learning_rate": 1.3536419131156286e-05, "loss": 0.9268, "step": 24610 }, { "epoch": 0.73, "learning_rate": 1.3521602560303445e-05, "loss": 0.7134, "step": 24620 }, { "epoch": 0.73, "learning_rate": 1.3506785989450602e-05, "loss": 0.6895, "step": 24630 }, { "epoch": 0.73, "learning_rate": 1.3491969418597761e-05, "loss": 0.8673, "step": 24640 }, { "epoch": 0.73, "learning_rate": 1.347715284774492e-05, "loss": 0.7157, "step": 24650 }, { "epoch": 0.73, "learning_rate": 1.3462336276892077e-05, "loss": 0.9058, "step": 24660 }, { "epoch": 0.73, "learning_rate": 1.3447519706039236e-05, "loss": 1.0182, "step": 24670 }, { "epoch": 0.73, "learning_rate": 1.3432703135186391e-05, "loss": 0.8496, "step": 24680 }, { "epoch": 0.73, "learning_rate": 1.341788656433355e-05, "loss": 0.7644, "step": 24690 }, { "epoch": 0.73, "learning_rate": 1.3403069993480711e-05, "loss": 0.9253, "step": 24700 }, { "epoch": 0.73, "learning_rate": 1.3388253422627867e-05, "loss": 0.873, "step": 24710 }, { "epoch": 0.73, "learning_rate": 1.3373436851775026e-05, "loss": 0.7271, "step": 24720 }, { "epoch": 0.73, "learning_rate": 1.3358620280922184e-05, "loss": 0.8746, "step": 24730 }, { "epoch": 0.73, "learning_rate": 1.3343803710069342e-05, "loss": 0.83, "step": 24740 }, { "epoch": 0.73, "learning_rate": 1.33289871392165e-05, "loss": 0.7188, "step": 24750 }, { "epoch": 0.73, "learning_rate": 1.331417056836366e-05, "loss": 0.6843, "step": 24760 }, { "epoch": 0.73, "learning_rate": 1.3299353997510817e-05, "loss": 0.8361, "step": 24770 }, { "epoch": 0.73, "learning_rate": 1.3284537426657976e-05, "loss": 0.9013, "step": 24780 }, { "epoch": 0.73, "learning_rate": 1.3269720855805135e-05, "loss": 0.9251, "step": 24790 }, { "epoch": 0.73, "learning_rate": 1.325490428495229e-05, "loss": 0.7374, "step": 24800 }, { "epoch": 0.74, "learning_rate": 1.324008771409945e-05, "loss": 0.9033, "step": 24810 }, { "epoch": 0.74, "learning_rate": 1.3225271143246606e-05, "loss": 0.7938, "step": 24820 }, { "epoch": 0.74, "learning_rate": 1.3210454572393765e-05, "loss": 0.8459, "step": 24830 }, { "epoch": 0.74, "learning_rate": 1.3195638001540924e-05, "loss": 0.7765, "step": 24840 }, { "epoch": 0.74, "learning_rate": 1.3180821430688081e-05, "loss": 0.7869, "step": 24850 }, { "epoch": 0.74, "learning_rate": 1.316600485983524e-05, "loss": 0.8801, "step": 24860 }, { "epoch": 0.74, "learning_rate": 1.31511882889824e-05, "loss": 0.9698, "step": 24870 }, { "epoch": 0.74, "learning_rate": 1.3136371718129556e-05, "loss": 0.8312, "step": 24880 }, { "epoch": 0.74, "learning_rate": 1.3121555147276715e-05, "loss": 0.9598, "step": 24890 }, { "epoch": 0.74, "learning_rate": 1.3106738576423874e-05, "loss": 0.8063, "step": 24900 }, { "epoch": 0.74, "learning_rate": 1.309192200557103e-05, "loss": 0.964, "step": 24910 }, { "epoch": 0.74, "learning_rate": 1.307710543471819e-05, "loss": 0.7865, "step": 24920 }, { "epoch": 0.74, "learning_rate": 1.306228886386535e-05, "loss": 0.7829, "step": 24930 }, { "epoch": 0.74, "learning_rate": 1.3047472293012505e-05, "loss": 0.8451, "step": 24940 }, { "epoch": 0.74, "learning_rate": 1.3032655722159664e-05, "loss": 0.7222, "step": 24950 }, { "epoch": 0.74, "learning_rate": 1.3017839151306821e-05, "loss": 0.7559, "step": 24960 }, { "epoch": 0.74, "learning_rate": 1.300302258045398e-05, "loss": 0.9221, "step": 24970 }, { "epoch": 0.74, "learning_rate": 1.2988206009601139e-05, "loss": 0.809, "step": 24980 }, { "epoch": 0.74, "learning_rate": 1.2973389438748296e-05, "loss": 0.7493, "step": 24990 }, { "epoch": 0.74, "learning_rate": 1.2958572867895455e-05, "loss": 0.9133, "step": 25000 }, { "epoch": 0.74, "learning_rate": 1.2943756297042614e-05, "loss": 0.796, "step": 25010 }, { "epoch": 0.74, "learning_rate": 1.2928939726189771e-05, "loss": 0.7717, "step": 25020 }, { "epoch": 0.74, "learning_rate": 1.291412315533693e-05, "loss": 0.954, "step": 25030 }, { "epoch": 0.74, "learning_rate": 1.2899306584484089e-05, "loss": 0.803, "step": 25040 }, { "epoch": 0.74, "learning_rate": 1.2884490013631245e-05, "loss": 0.7036, "step": 25050 }, { "epoch": 0.74, "learning_rate": 1.2869673442778404e-05, "loss": 0.8272, "step": 25060 }, { "epoch": 0.74, "learning_rate": 1.2854856871925564e-05, "loss": 0.8529, "step": 25070 }, { "epoch": 0.74, "learning_rate": 1.284004030107272e-05, "loss": 0.9032, "step": 25080 }, { "epoch": 0.74, "learning_rate": 1.2825223730219879e-05, "loss": 0.8864, "step": 25090 }, { "epoch": 0.74, "learning_rate": 1.2810407159367036e-05, "loss": 0.7249, "step": 25100 }, { "epoch": 0.74, "learning_rate": 1.2795590588514195e-05, "loss": 0.9738, "step": 25110 }, { "epoch": 0.74, "learning_rate": 1.2780774017661354e-05, "loss": 0.8423, "step": 25120 }, { "epoch": 0.74, "learning_rate": 1.2765957446808511e-05, "loss": 0.951, "step": 25130 }, { "epoch": 0.74, "learning_rate": 1.275114087595567e-05, "loss": 0.8822, "step": 25140 }, { "epoch": 0.75, "learning_rate": 1.2736324305102829e-05, "loss": 0.9904, "step": 25150 }, { "epoch": 0.75, "learning_rate": 1.2721507734249984e-05, "loss": 0.9531, "step": 25160 }, { "epoch": 0.75, "learning_rate": 1.2706691163397143e-05, "loss": 0.7682, "step": 25170 }, { "epoch": 0.75, "learning_rate": 1.2691874592544304e-05, "loss": 0.8224, "step": 25180 }, { "epoch": 0.75, "learning_rate": 1.267705802169146e-05, "loss": 0.8131, "step": 25190 }, { "epoch": 0.75, "learning_rate": 1.2662241450838618e-05, "loss": 0.877, "step": 25200 }, { "epoch": 0.75, "learning_rate": 1.2647424879985777e-05, "loss": 0.8206, "step": 25210 }, { "epoch": 0.75, "learning_rate": 1.2632608309132935e-05, "loss": 0.8753, "step": 25220 }, { "epoch": 0.75, "learning_rate": 1.2617791738280093e-05, "loss": 0.8928, "step": 25230 }, { "epoch": 0.75, "learning_rate": 1.260297516742725e-05, "loss": 0.8714, "step": 25240 }, { "epoch": 0.75, "learning_rate": 1.258815859657441e-05, "loss": 0.7754, "step": 25250 }, { "epoch": 0.75, "learning_rate": 1.2573342025721569e-05, "loss": 1.0292, "step": 25260 }, { "epoch": 0.75, "learning_rate": 1.2558525454868724e-05, "loss": 0.8371, "step": 25270 }, { "epoch": 0.75, "learning_rate": 1.2543708884015885e-05, "loss": 0.9046, "step": 25280 }, { "epoch": 0.75, "learning_rate": 1.2528892313163044e-05, "loss": 0.729, "step": 25290 }, { "epoch": 0.75, "learning_rate": 1.25140757423102e-05, "loss": 0.7027, "step": 25300 }, { "epoch": 0.75, "learning_rate": 1.2499259171457358e-05, "loss": 0.7705, "step": 25310 }, { "epoch": 0.75, "learning_rate": 1.2484442600604517e-05, "loss": 0.8154, "step": 25320 }, { "epoch": 0.75, "learning_rate": 1.2469626029751674e-05, "loss": 0.9587, "step": 25330 }, { "epoch": 0.75, "learning_rate": 1.2454809458898833e-05, "loss": 0.8896, "step": 25340 }, { "epoch": 0.75, "learning_rate": 1.243999288804599e-05, "loss": 0.8733, "step": 25350 }, { "epoch": 0.75, "learning_rate": 1.242517631719315e-05, "loss": 1.0025, "step": 25360 }, { "epoch": 0.75, "learning_rate": 1.2410359746340308e-05, "loss": 0.7811, "step": 25370 }, { "epoch": 0.75, "learning_rate": 1.2395543175487466e-05, "loss": 0.8052, "step": 25380 }, { "epoch": 0.75, "learning_rate": 1.2380726604634624e-05, "loss": 0.8242, "step": 25390 }, { "epoch": 0.75, "learning_rate": 1.2365910033781782e-05, "loss": 0.7086, "step": 25400 }, { "epoch": 0.75, "learning_rate": 1.235109346292894e-05, "loss": 0.8182, "step": 25410 }, { "epoch": 0.75, "learning_rate": 1.2336276892076098e-05, "loss": 0.8977, "step": 25420 }, { "epoch": 0.75, "learning_rate": 1.2321460321223257e-05, "loss": 0.8453, "step": 25430 }, { "epoch": 0.75, "learning_rate": 1.2306643750370416e-05, "loss": 0.9202, "step": 25440 }, { "epoch": 0.75, "learning_rate": 1.2291827179517573e-05, "loss": 0.8283, "step": 25450 }, { "epoch": 0.75, "learning_rate": 1.2277010608664732e-05, "loss": 0.8236, "step": 25460 }, { "epoch": 0.75, "learning_rate": 1.2262194037811889e-05, "loss": 0.7732, "step": 25470 }, { "epoch": 0.76, "learning_rate": 1.2247377466959048e-05, "loss": 0.8318, "step": 25480 }, { "epoch": 0.76, "learning_rate": 1.2232560896106205e-05, "loss": 0.6824, "step": 25490 }, { "epoch": 0.76, "learning_rate": 1.2217744325253364e-05, "loss": 0.7716, "step": 25500 }, { "epoch": 0.76, "learning_rate": 1.2202927754400523e-05, "loss": 0.8433, "step": 25510 }, { "epoch": 0.76, "learning_rate": 1.218811118354768e-05, "loss": 0.7985, "step": 25520 }, { "epoch": 0.76, "learning_rate": 1.2173294612694838e-05, "loss": 0.7839, "step": 25530 }, { "epoch": 0.76, "learning_rate": 1.2158478041841996e-05, "loss": 0.7955, "step": 25540 }, { "epoch": 0.76, "learning_rate": 1.2143661470989155e-05, "loss": 0.9498, "step": 25550 }, { "epoch": 0.76, "learning_rate": 1.2128844900136313e-05, "loss": 0.7866, "step": 25560 }, { "epoch": 0.76, "learning_rate": 1.2114028329283472e-05, "loss": 0.9004, "step": 25570 }, { "epoch": 0.76, "learning_rate": 1.209921175843063e-05, "loss": 0.8698, "step": 25580 }, { "epoch": 0.76, "learning_rate": 1.2084395187577788e-05, "loss": 0.8557, "step": 25590 }, { "epoch": 0.76, "learning_rate": 1.2069578616724945e-05, "loss": 0.8167, "step": 25600 }, { "epoch": 0.76, "learning_rate": 1.2054762045872104e-05, "loss": 0.9107, "step": 25610 }, { "epoch": 0.76, "learning_rate": 1.2039945475019263e-05, "loss": 0.7753, "step": 25620 }, { "epoch": 0.76, "learning_rate": 1.202512890416642e-05, "loss": 0.8963, "step": 25630 }, { "epoch": 0.76, "learning_rate": 1.2010312333313577e-05, "loss": 0.9922, "step": 25640 }, { "epoch": 0.76, "learning_rate": 1.1995495762460738e-05, "loss": 0.8771, "step": 25650 }, { "epoch": 0.76, "learning_rate": 1.1980679191607895e-05, "loss": 0.9021, "step": 25660 }, { "epoch": 0.76, "learning_rate": 1.1965862620755052e-05, "loss": 0.8496, "step": 25670 }, { "epoch": 0.76, "learning_rate": 1.1951046049902211e-05, "loss": 0.8117, "step": 25680 }, { "epoch": 0.76, "learning_rate": 1.193622947904937e-05, "loss": 0.8069, "step": 25690 }, { "epoch": 0.76, "learning_rate": 1.1921412908196527e-05, "loss": 0.7271, "step": 25700 }, { "epoch": 0.76, "learning_rate": 1.1906596337343685e-05, "loss": 0.7499, "step": 25710 }, { "epoch": 0.76, "learning_rate": 1.1891779766490845e-05, "loss": 0.7856, "step": 25720 }, { "epoch": 0.76, "learning_rate": 1.1876963195638003e-05, "loss": 0.7633, "step": 25730 }, { "epoch": 0.76, "learning_rate": 1.186214662478516e-05, "loss": 0.8777, "step": 25740 }, { "epoch": 0.76, "learning_rate": 1.1847330053932319e-05, "loss": 0.8289, "step": 25750 }, { "epoch": 0.76, "learning_rate": 1.1832513483079478e-05, "loss": 0.9075, "step": 25760 }, { "epoch": 0.76, "learning_rate": 1.1817696912226635e-05, "loss": 0.8961, "step": 25770 }, { "epoch": 0.76, "learning_rate": 1.1802880341373792e-05, "loss": 0.7867, "step": 25780 }, { "epoch": 0.76, "learning_rate": 1.1788063770520951e-05, "loss": 0.7762, "step": 25790 }, { "epoch": 0.76, "learning_rate": 1.177324719966811e-05, "loss": 0.8488, "step": 25800 }, { "epoch": 0.76, "learning_rate": 1.1758430628815267e-05, "loss": 0.8515, "step": 25810 }, { "epoch": 0.77, "learning_rate": 1.1743614057962424e-05, "loss": 0.8932, "step": 25820 }, { "epoch": 0.77, "learning_rate": 1.1728797487109585e-05, "loss": 0.965, "step": 25830 }, { "epoch": 0.77, "learning_rate": 1.1713980916256742e-05, "loss": 0.8563, "step": 25840 }, { "epoch": 0.77, "learning_rate": 1.16991643454039e-05, "loss": 0.8934, "step": 25850 }, { "epoch": 0.77, "learning_rate": 1.1684347774551058e-05, "loss": 0.7615, "step": 25860 }, { "epoch": 0.77, "learning_rate": 1.1669531203698217e-05, "loss": 0.8583, "step": 25870 }, { "epoch": 0.77, "learning_rate": 1.1654714632845375e-05, "loss": 0.7768, "step": 25880 }, { "epoch": 0.77, "learning_rate": 1.1639898061992532e-05, "loss": 0.8065, "step": 25890 }, { "epoch": 0.77, "learning_rate": 1.162508149113969e-05, "loss": 0.7745, "step": 25900 }, { "epoch": 0.77, "learning_rate": 1.161026492028685e-05, "loss": 1.0746, "step": 25910 }, { "epoch": 0.77, "learning_rate": 1.1595448349434007e-05, "loss": 0.8585, "step": 25920 }, { "epoch": 0.77, "learning_rate": 1.1580631778581166e-05, "loss": 0.956, "step": 25930 }, { "epoch": 0.77, "learning_rate": 1.1565815207728325e-05, "loss": 0.8679, "step": 25940 }, { "epoch": 0.77, "learning_rate": 1.1550998636875482e-05, "loss": 0.7557, "step": 25950 }, { "epoch": 0.77, "learning_rate": 1.153618206602264e-05, "loss": 0.7569, "step": 25960 }, { "epoch": 0.77, "learning_rate": 1.1521365495169798e-05, "loss": 0.7993, "step": 25970 }, { "epoch": 0.77, "learning_rate": 1.1506548924316957e-05, "loss": 0.6295, "step": 25980 }, { "epoch": 0.77, "learning_rate": 1.1491732353464114e-05, "loss": 0.7848, "step": 25990 }, { "epoch": 0.77, "learning_rate": 1.1476915782611273e-05, "loss": 0.7587, "step": 26000 }, { "epoch": 0.77, "learning_rate": 1.1462099211758432e-05, "loss": 0.7593, "step": 26010 }, { "epoch": 0.77, "learning_rate": 1.144728264090559e-05, "loss": 0.8481, "step": 26020 }, { "epoch": 0.77, "learning_rate": 1.1432466070052747e-05, "loss": 0.8453, "step": 26030 }, { "epoch": 0.77, "learning_rate": 1.1417649499199906e-05, "loss": 0.9776, "step": 26040 }, { "epoch": 0.77, "learning_rate": 1.1402832928347064e-05, "loss": 0.8218, "step": 26050 }, { "epoch": 0.77, "learning_rate": 1.1388016357494222e-05, "loss": 0.9226, "step": 26060 }, { "epoch": 0.77, "learning_rate": 1.137319978664138e-05, "loss": 0.8553, "step": 26070 }, { "epoch": 0.77, "learning_rate": 1.1358383215788538e-05, "loss": 0.8774, "step": 26080 }, { "epoch": 0.77, "learning_rate": 1.1343566644935697e-05, "loss": 0.8202, "step": 26090 }, { "epoch": 0.77, "learning_rate": 1.1328750074082854e-05, "loss": 0.9213, "step": 26100 }, { "epoch": 0.77, "learning_rate": 1.1313933503230013e-05, "loss": 0.8127, "step": 26110 }, { "epoch": 0.77, "learning_rate": 1.1299116932377172e-05, "loss": 0.9204, "step": 26120 }, { "epoch": 0.77, "learning_rate": 1.1284300361524329e-05, "loss": 0.8596, "step": 26130 }, { "epoch": 0.77, "learning_rate": 1.1269483790671488e-05, "loss": 0.9033, "step": 26140 }, { "epoch": 0.77, "learning_rate": 1.1254667219818645e-05, "loss": 0.9182, "step": 26150 }, { "epoch": 0.78, "learning_rate": 1.1239850648965804e-05, "loss": 0.862, "step": 26160 }, { "epoch": 0.78, "learning_rate": 1.1225034078112961e-05, "loss": 0.6718, "step": 26170 }, { "epoch": 0.78, "learning_rate": 1.121021750726012e-05, "loss": 0.7975, "step": 26180 }, { "epoch": 0.78, "learning_rate": 1.119540093640728e-05, "loss": 0.7768, "step": 26190 }, { "epoch": 0.78, "learning_rate": 1.1180584365554437e-05, "loss": 0.8502, "step": 26200 }, { "epoch": 0.78, "learning_rate": 1.1165767794701595e-05, "loss": 0.8791, "step": 26210 }, { "epoch": 0.78, "learning_rate": 1.1150951223848753e-05, "loss": 0.7101, "step": 26220 }, { "epoch": 0.78, "learning_rate": 1.1136134652995912e-05, "loss": 0.8722, "step": 26230 }, { "epoch": 0.78, "learning_rate": 1.1121318082143069e-05, "loss": 0.9821, "step": 26240 }, { "epoch": 0.78, "learning_rate": 1.1106501511290228e-05, "loss": 0.8882, "step": 26250 }, { "epoch": 0.78, "learning_rate": 1.1091684940437385e-05, "loss": 0.8302, "step": 26260 }, { "epoch": 0.78, "learning_rate": 1.1076868369584544e-05, "loss": 0.7746, "step": 26270 }, { "epoch": 0.78, "learning_rate": 1.1062051798731703e-05, "loss": 0.8006, "step": 26280 }, { "epoch": 0.78, "learning_rate": 1.104723522787886e-05, "loss": 0.8414, "step": 26290 }, { "epoch": 0.78, "learning_rate": 1.1032418657026019e-05, "loss": 0.9094, "step": 26300 }, { "epoch": 0.78, "learning_rate": 1.1017602086173176e-05, "loss": 0.842, "step": 26310 }, { "epoch": 0.78, "learning_rate": 1.1002785515320335e-05, "loss": 0.8943, "step": 26320 }, { "epoch": 0.78, "learning_rate": 1.0987968944467492e-05, "loss": 0.7498, "step": 26330 }, { "epoch": 0.78, "learning_rate": 1.0973152373614651e-05, "loss": 0.8495, "step": 26340 }, { "epoch": 0.78, "learning_rate": 1.095833580276181e-05, "loss": 0.8342, "step": 26350 }, { "epoch": 0.78, "learning_rate": 1.0943519231908967e-05, "loss": 0.8103, "step": 26360 }, { "epoch": 0.78, "learning_rate": 1.0928702661056125e-05, "loss": 0.7783, "step": 26370 }, { "epoch": 0.78, "learning_rate": 1.0913886090203284e-05, "loss": 0.7801, "step": 26380 }, { "epoch": 0.78, "learning_rate": 1.0899069519350443e-05, "loss": 0.8705, "step": 26390 }, { "epoch": 0.78, "learning_rate": 1.08842529484976e-05, "loss": 0.8933, "step": 26400 }, { "epoch": 0.78, "learning_rate": 1.0869436377644759e-05, "loss": 0.6934, "step": 26410 }, { "epoch": 0.78, "learning_rate": 1.0854619806791918e-05, "loss": 0.7823, "step": 26420 }, { "epoch": 0.78, "learning_rate": 1.0839803235939075e-05, "loss": 0.819, "step": 26430 }, { "epoch": 0.78, "learning_rate": 1.0824986665086232e-05, "loss": 0.7221, "step": 26440 }, { "epoch": 0.78, "learning_rate": 1.0810170094233391e-05, "loss": 0.8498, "step": 26450 }, { "epoch": 0.78, "learning_rate": 1.079535352338055e-05, "loss": 0.7632, "step": 26460 }, { "epoch": 0.78, "learning_rate": 1.0780536952527707e-05, "loss": 0.8229, "step": 26470 }, { "epoch": 0.78, "learning_rate": 1.0765720381674866e-05, "loss": 0.7822, "step": 26480 }, { "epoch": 0.78, "learning_rate": 1.0750903810822025e-05, "loss": 0.7216, "step": 26490 }, { "epoch": 0.79, "learning_rate": 1.0736087239969182e-05, "loss": 0.855, "step": 26500 }, { "epoch": 0.79, "learning_rate": 1.072127066911634e-05, "loss": 0.9372, "step": 26510 }, { "epoch": 0.79, "learning_rate": 1.0706454098263498e-05, "loss": 0.8273, "step": 26520 }, { "epoch": 0.79, "learning_rate": 1.0691637527410657e-05, "loss": 0.8511, "step": 26530 }, { "epoch": 0.79, "learning_rate": 1.0676820956557815e-05, "loss": 0.953, "step": 26540 }, { "epoch": 0.79, "learning_rate": 1.0662004385704972e-05, "loss": 0.7749, "step": 26550 }, { "epoch": 0.79, "learning_rate": 1.0647187814852132e-05, "loss": 0.9035, "step": 26560 }, { "epoch": 0.79, "learning_rate": 1.063237124399929e-05, "loss": 0.7956, "step": 26570 }, { "epoch": 0.79, "learning_rate": 1.0617554673146447e-05, "loss": 0.8819, "step": 26580 }, { "epoch": 0.79, "learning_rate": 1.0602738102293606e-05, "loss": 0.9241, "step": 26590 }, { "epoch": 0.79, "learning_rate": 1.0587921531440765e-05, "loss": 0.8081, "step": 26600 }, { "epoch": 0.79, "learning_rate": 1.0573104960587922e-05, "loss": 0.8689, "step": 26610 }, { "epoch": 0.79, "learning_rate": 1.055828838973508e-05, "loss": 0.8158, "step": 26620 }, { "epoch": 0.79, "learning_rate": 1.0543471818882238e-05, "loss": 0.9474, "step": 26630 }, { "epoch": 0.79, "learning_rate": 1.0528655248029397e-05, "loss": 0.8814, "step": 26640 }, { "epoch": 0.79, "learning_rate": 1.0513838677176554e-05, "loss": 0.8688, "step": 26650 }, { "epoch": 0.79, "learning_rate": 1.0499022106323712e-05, "loss": 0.8209, "step": 26660 }, { "epoch": 0.79, "learning_rate": 1.0484205535470872e-05, "loss": 0.7129, "step": 26670 }, { "epoch": 0.79, "learning_rate": 1.046938896461803e-05, "loss": 0.8338, "step": 26680 }, { "epoch": 0.79, "learning_rate": 1.0454572393765187e-05, "loss": 0.876, "step": 26690 }, { "epoch": 0.79, "learning_rate": 1.0439755822912346e-05, "loss": 0.8838, "step": 26700 }, { "epoch": 0.79, "learning_rate": 1.0424939252059504e-05, "loss": 0.9687, "step": 26710 }, { "epoch": 0.79, "learning_rate": 1.0410122681206662e-05, "loss": 0.9331, "step": 26720 }, { "epoch": 0.79, "learning_rate": 1.0395306110353819e-05, "loss": 0.8596, "step": 26730 }, { "epoch": 0.79, "learning_rate": 1.038048953950098e-05, "loss": 0.9814, "step": 26740 }, { "epoch": 0.79, "learning_rate": 1.0365672968648137e-05, "loss": 0.8254, "step": 26750 }, { "epoch": 0.79, "learning_rate": 1.0350856397795294e-05, "loss": 0.8314, "step": 26760 }, { "epoch": 0.79, "learning_rate": 1.0336039826942453e-05, "loss": 0.8254, "step": 26770 }, { "epoch": 0.79, "learning_rate": 1.0321223256089612e-05, "loss": 0.8073, "step": 26780 }, { "epoch": 0.79, "learning_rate": 1.0306406685236769e-05, "loss": 0.8626, "step": 26790 }, { "epoch": 0.79, "learning_rate": 1.0291590114383926e-05, "loss": 0.8962, "step": 26800 }, { "epoch": 0.79, "learning_rate": 1.0276773543531085e-05, "loss": 0.8781, "step": 26810 }, { "epoch": 0.79, "learning_rate": 1.0261956972678244e-05, "loss": 0.8088, "step": 26820 }, { "epoch": 0.8, "learning_rate": 1.0247140401825401e-05, "loss": 0.9101, "step": 26830 }, { "epoch": 0.8, "learning_rate": 1.023232383097256e-05, "loss": 0.8275, "step": 26840 }, { "epoch": 0.8, "learning_rate": 1.021750726011972e-05, "loss": 0.7513, "step": 26850 }, { "epoch": 0.8, "learning_rate": 1.0202690689266877e-05, "loss": 0.9617, "step": 26860 }, { "epoch": 0.8, "learning_rate": 1.0187874118414034e-05, "loss": 0.8926, "step": 26870 }, { "epoch": 0.8, "learning_rate": 1.0173057547561193e-05, "loss": 0.8115, "step": 26880 }, { "epoch": 0.8, "learning_rate": 1.0158240976708352e-05, "loss": 0.8416, "step": 26890 }, { "epoch": 0.8, "learning_rate": 1.0143424405855509e-05, "loss": 0.9499, "step": 26900 }, { "epoch": 0.8, "learning_rate": 1.0128607835002668e-05, "loss": 0.8245, "step": 26910 }, { "epoch": 0.8, "learning_rate": 1.0113791264149827e-05, "loss": 0.8692, "step": 26920 }, { "epoch": 0.8, "learning_rate": 1.0098974693296984e-05, "loss": 0.7294, "step": 26930 }, { "epoch": 0.8, "learning_rate": 1.0084158122444141e-05, "loss": 0.911, "step": 26940 }, { "epoch": 0.8, "learning_rate": 1.00693415515913e-05, "loss": 0.8318, "step": 26950 }, { "epoch": 0.8, "learning_rate": 1.0054524980738459e-05, "loss": 0.8141, "step": 26960 }, { "epoch": 0.8, "learning_rate": 1.0039708409885616e-05, "loss": 0.7381, "step": 26970 }, { "epoch": 0.8, "learning_rate": 1.0024891839032775e-05, "loss": 0.8144, "step": 26980 }, { "epoch": 0.8, "learning_rate": 1.0010075268179932e-05, "loss": 0.825, "step": 26990 }, { "epoch": 0.8, "learning_rate": 9.995258697327091e-06, "loss": 0.7575, "step": 27000 }, { "epoch": 0.8, "learning_rate": 9.980442126474249e-06, "loss": 0.8501, "step": 27010 }, { "epoch": 0.8, "learning_rate": 9.965625555621407e-06, "loss": 0.8956, "step": 27020 }, { "epoch": 0.8, "learning_rate": 9.950808984768566e-06, "loss": 0.8825, "step": 27030 }, { "epoch": 0.8, "learning_rate": 9.935992413915724e-06, "loss": 0.9027, "step": 27040 }, { "epoch": 0.8, "learning_rate": 9.921175843062883e-06, "loss": 0.9106, "step": 27050 }, { "epoch": 0.8, "learning_rate": 9.90635927221004e-06, "loss": 0.8476, "step": 27060 }, { "epoch": 0.8, "learning_rate": 9.891542701357199e-06, "loss": 0.7358, "step": 27070 }, { "epoch": 0.8, "learning_rate": 9.876726130504356e-06, "loss": 0.856, "step": 27080 }, { "epoch": 0.8, "learning_rate": 9.861909559651515e-06, "loss": 0.7659, "step": 27090 }, { "epoch": 0.8, "learning_rate": 9.847092988798672e-06, "loss": 0.9216, "step": 27100 }, { "epoch": 0.8, "learning_rate": 9.832276417945831e-06, "loss": 0.729, "step": 27110 }, { "epoch": 0.8, "learning_rate": 9.81745984709299e-06, "loss": 0.8307, "step": 27120 }, { "epoch": 0.8, "learning_rate": 9.802643276240147e-06, "loss": 0.8572, "step": 27130 }, { "epoch": 0.8, "learning_rate": 9.787826705387306e-06, "loss": 0.8346, "step": 27140 }, { "epoch": 0.8, "learning_rate": 9.773010134534463e-06, "loss": 1.0176, "step": 27150 }, { "epoch": 0.8, "learning_rate": 9.758193563681622e-06, "loss": 0.8436, "step": 27160 }, { "epoch": 0.81, "learning_rate": 9.74337699282878e-06, "loss": 0.8001, "step": 27170 }, { "epoch": 0.81, "learning_rate": 9.728560421975938e-06, "loss": 0.7491, "step": 27180 }, { "epoch": 0.81, "learning_rate": 9.713743851123097e-06, "loss": 0.7306, "step": 27190 }, { "epoch": 0.81, "learning_rate": 9.698927280270255e-06, "loss": 0.8969, "step": 27200 }, { "epoch": 0.81, "learning_rate": 9.684110709417414e-06, "loss": 0.7704, "step": 27210 }, { "epoch": 0.81, "learning_rate": 9.66929413856457e-06, "loss": 0.8897, "step": 27220 }, { "epoch": 0.81, "learning_rate": 9.65447756771173e-06, "loss": 0.7944, "step": 27230 }, { "epoch": 0.81, "learning_rate": 9.639660996858887e-06, "loss": 0.7888, "step": 27240 }, { "epoch": 0.81, "learning_rate": 9.624844426006046e-06, "loss": 0.8273, "step": 27250 }, { "epoch": 0.81, "learning_rate": 9.610027855153205e-06, "loss": 0.8328, "step": 27260 }, { "epoch": 0.81, "learning_rate": 9.595211284300362e-06, "loss": 0.8889, "step": 27270 }, { "epoch": 0.81, "learning_rate": 9.58039471344752e-06, "loss": 0.7882, "step": 27280 }, { "epoch": 0.81, "learning_rate": 9.565578142594678e-06, "loss": 0.8158, "step": 27290 }, { "epoch": 0.81, "learning_rate": 9.550761571741837e-06, "loss": 0.7459, "step": 27300 }, { "epoch": 0.81, "learning_rate": 9.535945000888994e-06, "loss": 0.87, "step": 27310 }, { "epoch": 0.81, "learning_rate": 9.521128430036153e-06, "loss": 0.7732, "step": 27320 }, { "epoch": 0.81, "learning_rate": 9.506311859183312e-06, "loss": 0.8097, "step": 27330 }, { "epoch": 0.81, "learning_rate": 9.49149528833047e-06, "loss": 0.8793, "step": 27340 }, { "epoch": 0.81, "learning_rate": 9.476678717477627e-06, "loss": 0.9013, "step": 27350 }, { "epoch": 0.81, "learning_rate": 9.461862146624786e-06, "loss": 0.8694, "step": 27360 }, { "epoch": 0.81, "learning_rate": 9.447045575771945e-06, "loss": 0.8583, "step": 27370 }, { "epoch": 0.81, "learning_rate": 9.432229004919102e-06, "loss": 0.8503, "step": 27380 }, { "epoch": 0.81, "learning_rate": 9.417412434066259e-06, "loss": 0.8637, "step": 27390 }, { "epoch": 0.81, "learning_rate": 9.40259586321342e-06, "loss": 0.7903, "step": 27400 }, { "epoch": 0.81, "learning_rate": 9.387779292360577e-06, "loss": 0.8728, "step": 27410 }, { "epoch": 0.81, "learning_rate": 9.372962721507734e-06, "loss": 0.7439, "step": 27420 }, { "epoch": 0.81, "learning_rate": 9.358146150654893e-06, "loss": 0.8156, "step": 27430 }, { "epoch": 0.81, "learning_rate": 9.343329579802052e-06, "loss": 0.9474, "step": 27440 }, { "epoch": 0.81, "learning_rate": 9.328513008949209e-06, "loss": 0.9365, "step": 27450 }, { "epoch": 0.81, "learning_rate": 9.313696438096366e-06, "loss": 0.786, "step": 27460 }, { "epoch": 0.81, "learning_rate": 9.298879867243527e-06, "loss": 0.7976, "step": 27470 }, { "epoch": 0.81, "learning_rate": 9.284063296390684e-06, "loss": 0.9132, "step": 27480 }, { "epoch": 0.81, "learning_rate": 9.269246725537841e-06, "loss": 0.8022, "step": 27490 }, { "epoch": 0.81, "learning_rate": 9.254430154685e-06, "loss": 0.8411, "step": 27500 }, { "epoch": 0.82, "learning_rate": 9.23961358383216e-06, "loss": 0.8011, "step": 27510 }, { "epoch": 0.82, "learning_rate": 9.224797012979317e-06, "loss": 0.9507, "step": 27520 }, { "epoch": 0.82, "learning_rate": 9.209980442126474e-06, "loss": 0.7688, "step": 27530 }, { "epoch": 0.82, "learning_rate": 9.195163871273633e-06, "loss": 0.7427, "step": 27540 }, { "epoch": 0.82, "learning_rate": 9.180347300420792e-06, "loss": 0.7929, "step": 27550 }, { "epoch": 0.82, "learning_rate": 9.165530729567949e-06, "loss": 0.7609, "step": 27560 }, { "epoch": 0.82, "learning_rate": 9.150714158715106e-06, "loss": 0.8253, "step": 27570 }, { "epoch": 0.82, "learning_rate": 9.135897587862267e-06, "loss": 0.8494, "step": 27580 }, { "epoch": 0.82, "learning_rate": 9.121081017009424e-06, "loss": 0.7224, "step": 27590 }, { "epoch": 0.82, "learning_rate": 9.106264446156581e-06, "loss": 0.8241, "step": 27600 }, { "epoch": 0.82, "learning_rate": 9.09144787530374e-06, "loss": 0.8178, "step": 27610 }, { "epoch": 0.82, "learning_rate": 9.076631304450899e-06, "loss": 0.83, "step": 27620 }, { "epoch": 0.82, "learning_rate": 9.061814733598056e-06, "loss": 0.6989, "step": 27630 }, { "epoch": 0.82, "learning_rate": 9.046998162745214e-06, "loss": 1.1115, "step": 27640 }, { "epoch": 0.82, "learning_rate": 9.032181591892374e-06, "loss": 0.8682, "step": 27650 }, { "epoch": 0.82, "learning_rate": 9.017365021039531e-06, "loss": 0.8742, "step": 27660 }, { "epoch": 0.82, "learning_rate": 9.002548450186689e-06, "loss": 0.9065, "step": 27670 }, { "epoch": 0.82, "learning_rate": 8.987731879333848e-06, "loss": 0.7826, "step": 27680 }, { "epoch": 0.82, "learning_rate": 8.972915308481006e-06, "loss": 0.6103, "step": 27690 }, { "epoch": 0.82, "learning_rate": 8.958098737628164e-06, "loss": 0.795, "step": 27700 }, { "epoch": 0.82, "learning_rate": 8.943282166775321e-06, "loss": 0.9199, "step": 27710 }, { "epoch": 0.82, "learning_rate": 8.92846559592248e-06, "loss": 0.8204, "step": 27720 }, { "epoch": 0.82, "learning_rate": 8.913649025069639e-06, "loss": 0.7308, "step": 27730 }, { "epoch": 0.82, "learning_rate": 8.898832454216796e-06, "loss": 0.6076, "step": 27740 }, { "epoch": 0.82, "learning_rate": 8.884015883363955e-06, "loss": 0.9196, "step": 27750 }, { "epoch": 0.82, "learning_rate": 8.869199312511114e-06, "loss": 0.8956, "step": 27760 }, { "epoch": 0.82, "learning_rate": 8.854382741658271e-06, "loss": 0.8484, "step": 27770 }, { "epoch": 0.82, "learning_rate": 8.839566170805428e-06, "loss": 0.8989, "step": 27780 }, { "epoch": 0.82, "learning_rate": 8.824749599952587e-06, "loss": 0.8727, "step": 27790 }, { "epoch": 0.82, "learning_rate": 8.809933029099746e-06, "loss": 0.7134, "step": 27800 }, { "epoch": 0.82, "learning_rate": 8.795116458246903e-06, "loss": 0.8573, "step": 27810 }, { "epoch": 0.82, "learning_rate": 8.780299887394062e-06, "loss": 0.812, "step": 27820 }, { "epoch": 0.82, "learning_rate": 8.76548331654122e-06, "loss": 0.7814, "step": 27830 }, { "epoch": 0.82, "learning_rate": 8.750666745688378e-06, "loss": 0.7689, "step": 27840 }, { "epoch": 0.83, "learning_rate": 8.735850174835536e-06, "loss": 0.8401, "step": 27850 }, { "epoch": 0.83, "learning_rate": 8.721033603982695e-06, "loss": 0.7971, "step": 27860 }, { "epoch": 0.83, "learning_rate": 8.706217033129854e-06, "loss": 0.7765, "step": 27870 }, { "epoch": 0.83, "learning_rate": 8.69140046227701e-06, "loss": 0.9027, "step": 27880 }, { "epoch": 0.83, "learning_rate": 8.67658389142417e-06, "loss": 0.964, "step": 27890 }, { "epoch": 0.83, "learning_rate": 8.661767320571327e-06, "loss": 0.7856, "step": 27900 }, { "epoch": 0.83, "learning_rate": 8.646950749718486e-06, "loss": 1.0029, "step": 27910 }, { "epoch": 0.83, "learning_rate": 8.632134178865643e-06, "loss": 0.8419, "step": 27920 }, { "epoch": 0.83, "learning_rate": 8.617317608012802e-06, "loss": 0.8328, "step": 27930 }, { "epoch": 0.83, "learning_rate": 8.602501037159961e-06, "loss": 0.75, "step": 27940 }, { "epoch": 0.83, "learning_rate": 8.587684466307118e-06, "loss": 0.9438, "step": 27950 }, { "epoch": 0.83, "learning_rate": 8.572867895454277e-06, "loss": 0.8421, "step": 27960 }, { "epoch": 0.83, "learning_rate": 8.558051324601434e-06, "loss": 1.1142, "step": 27970 }, { "epoch": 0.83, "learning_rate": 8.543234753748593e-06, "loss": 1.0156, "step": 27980 }, { "epoch": 0.83, "learning_rate": 8.52841818289575e-06, "loss": 0.9018, "step": 27990 }, { "epoch": 0.83, "learning_rate": 8.51360161204291e-06, "loss": 0.7118, "step": 28000 }, { "epoch": 0.83, "learning_rate": 8.498785041190067e-06, "loss": 0.8623, "step": 28010 }, { "epoch": 0.83, "learning_rate": 8.483968470337226e-06, "loss": 0.773, "step": 28020 }, { "epoch": 0.83, "learning_rate": 8.469151899484385e-06, "loss": 0.8335, "step": 28030 }, { "epoch": 0.83, "learning_rate": 8.454335328631542e-06, "loss": 0.7075, "step": 28040 }, { "epoch": 0.83, "learning_rate": 8.4395187577787e-06, "loss": 0.9465, "step": 28050 }, { "epoch": 0.83, "learning_rate": 8.424702186925858e-06, "loss": 0.8742, "step": 28060 }, { "epoch": 0.83, "learning_rate": 8.409885616073017e-06, "loss": 0.7308, "step": 28070 }, { "epoch": 0.83, "learning_rate": 8.395069045220174e-06, "loss": 0.8197, "step": 28080 }, { "epoch": 0.83, "learning_rate": 8.380252474367333e-06, "loss": 0.752, "step": 28090 }, { "epoch": 0.83, "learning_rate": 8.365435903514492e-06, "loss": 0.8653, "step": 28100 }, { "epoch": 0.83, "learning_rate": 8.35061933266165e-06, "loss": 0.7343, "step": 28110 }, { "epoch": 0.83, "learning_rate": 8.335802761808806e-06, "loss": 0.7, "step": 28120 }, { "epoch": 0.83, "learning_rate": 8.320986190955965e-06, "loss": 0.7569, "step": 28130 }, { "epoch": 0.83, "learning_rate": 8.306169620103124e-06, "loss": 1.0034, "step": 28140 }, { "epoch": 0.83, "learning_rate": 8.291353049250281e-06, "loss": 0.8393, "step": 28150 }, { "epoch": 0.83, "learning_rate": 8.27653647839744e-06, "loss": 0.8237, "step": 28160 }, { "epoch": 0.83, "learning_rate": 8.2617199075446e-06, "loss": 0.787, "step": 28170 }, { "epoch": 0.84, "learning_rate": 8.246903336691757e-06, "loss": 0.8261, "step": 28180 }, { "epoch": 0.84, "learning_rate": 8.232086765838914e-06, "loss": 0.7429, "step": 28190 }, { "epoch": 0.84, "learning_rate": 8.217270194986073e-06, "loss": 0.7722, "step": 28200 }, { "epoch": 0.84, "learning_rate": 8.202453624133232e-06, "loss": 0.8481, "step": 28210 }, { "epoch": 0.84, "learning_rate": 8.187637053280389e-06, "loss": 0.8171, "step": 28220 }, { "epoch": 0.84, "learning_rate": 8.172820482427548e-06, "loss": 0.6574, "step": 28230 }, { "epoch": 0.84, "learning_rate": 8.158003911574707e-06, "loss": 0.8484, "step": 28240 }, { "epoch": 0.84, "learning_rate": 8.143187340721864e-06, "loss": 0.8342, "step": 28250 }, { "epoch": 0.84, "learning_rate": 8.128370769869021e-06, "loss": 0.9326, "step": 28260 }, { "epoch": 0.84, "learning_rate": 8.11355419901618e-06, "loss": 0.724, "step": 28270 }, { "epoch": 0.84, "learning_rate": 8.098737628163339e-06, "loss": 0.9796, "step": 28280 }, { "epoch": 0.84, "learning_rate": 8.083921057310496e-06, "loss": 0.7743, "step": 28290 }, { "epoch": 0.84, "learning_rate": 8.069104486457654e-06, "loss": 0.9398, "step": 28300 }, { "epoch": 0.84, "learning_rate": 8.054287915604814e-06, "loss": 0.7642, "step": 28310 }, { "epoch": 0.84, "learning_rate": 8.039471344751971e-06, "loss": 0.8557, "step": 28320 }, { "epoch": 0.84, "learning_rate": 8.024654773899129e-06, "loss": 0.787, "step": 28330 }, { "epoch": 0.84, "learning_rate": 8.009838203046288e-06, "loss": 0.8425, "step": 28340 }, { "epoch": 0.84, "learning_rate": 7.995021632193446e-06, "loss": 0.9097, "step": 28350 }, { "epoch": 0.84, "learning_rate": 7.980205061340604e-06, "loss": 0.8853, "step": 28360 }, { "epoch": 0.84, "learning_rate": 7.965388490487761e-06, "loss": 0.8632, "step": 28370 }, { "epoch": 0.84, "learning_rate": 7.950571919634922e-06, "loss": 0.6685, "step": 28380 }, { "epoch": 0.84, "learning_rate": 7.935755348782079e-06, "loss": 0.7471, "step": 28390 }, { "epoch": 0.84, "learning_rate": 7.920938777929236e-06, "loss": 0.9071, "step": 28400 }, { "epoch": 0.84, "learning_rate": 7.906122207076393e-06, "loss": 0.8453, "step": 28410 }, { "epoch": 0.84, "learning_rate": 7.891305636223554e-06, "loss": 1.0258, "step": 28420 }, { "epoch": 0.84, "learning_rate": 7.876489065370711e-06, "loss": 0.8297, "step": 28430 }, { "epoch": 0.84, "learning_rate": 7.861672494517868e-06, "loss": 0.6788, "step": 28440 }, { "epoch": 0.84, "learning_rate": 7.846855923665027e-06, "loss": 0.8631, "step": 28450 }, { "epoch": 0.84, "learning_rate": 7.832039352812186e-06, "loss": 0.8629, "step": 28460 }, { "epoch": 0.84, "learning_rate": 7.817222781959343e-06, "loss": 0.8019, "step": 28470 }, { "epoch": 0.84, "learning_rate": 7.8024062111065e-06, "loss": 0.8317, "step": 28480 }, { "epoch": 0.84, "learning_rate": 7.787589640253661e-06, "loss": 0.722, "step": 28490 }, { "epoch": 0.84, "learning_rate": 7.772773069400818e-06, "loss": 0.8622, "step": 28500 }, { "epoch": 0.84, "learning_rate": 7.757956498547976e-06, "loss": 0.8397, "step": 28510 }, { "epoch": 0.85, "learning_rate": 7.743139927695135e-06, "loss": 0.7164, "step": 28520 }, { "epoch": 0.85, "learning_rate": 7.728323356842294e-06, "loss": 0.853, "step": 28530 }, { "epoch": 0.85, "learning_rate": 7.71350678598945e-06, "loss": 0.7922, "step": 28540 }, { "epoch": 0.85, "learning_rate": 7.698690215136608e-06, "loss": 0.893, "step": 28550 }, { "epoch": 0.85, "learning_rate": 7.683873644283767e-06, "loss": 0.8251, "step": 28560 }, { "epoch": 0.85, "learning_rate": 7.669057073430926e-06, "loss": 0.9007, "step": 28570 }, { "epoch": 0.85, "learning_rate": 7.654240502578083e-06, "loss": 0.7733, "step": 28580 }, { "epoch": 0.85, "learning_rate": 7.639423931725242e-06, "loss": 0.822, "step": 28590 }, { "epoch": 0.85, "learning_rate": 7.6246073608724e-06, "loss": 0.7719, "step": 28600 }, { "epoch": 0.85, "learning_rate": 7.609790790019558e-06, "loss": 0.8517, "step": 28610 }, { "epoch": 0.85, "learning_rate": 7.5949742191667155e-06, "loss": 0.855, "step": 28620 }, { "epoch": 0.85, "learning_rate": 7.580157648313875e-06, "loss": 0.9967, "step": 28630 }, { "epoch": 0.85, "learning_rate": 7.5653410774610325e-06, "loss": 0.9014, "step": 28640 }, { "epoch": 0.85, "learning_rate": 7.5505245066081905e-06, "loss": 0.8207, "step": 28650 }, { "epoch": 0.85, "learning_rate": 7.5357079357553495e-06, "loss": 0.742, "step": 28660 }, { "epoch": 0.85, "learning_rate": 7.5208913649025075e-06, "loss": 0.6991, "step": 28670 }, { "epoch": 0.85, "learning_rate": 7.506074794049666e-06, "loss": 0.8417, "step": 28680 }, { "epoch": 0.85, "learning_rate": 7.491258223196823e-06, "loss": 0.6742, "step": 28690 }, { "epoch": 0.85, "learning_rate": 7.476441652343983e-06, "loss": 0.811, "step": 28700 }, { "epoch": 0.85, "learning_rate": 7.46162508149114e-06, "loss": 0.8786, "step": 28710 }, { "epoch": 0.85, "learning_rate": 7.446808510638298e-06, "loss": 0.8787, "step": 28720 }, { "epoch": 0.85, "learning_rate": 7.431991939785457e-06, "loss": 0.918, "step": 28730 }, { "epoch": 0.85, "learning_rate": 7.417175368932615e-06, "loss": 0.7166, "step": 28740 }, { "epoch": 0.85, "learning_rate": 7.402358798079773e-06, "loss": 0.754, "step": 28750 }, { "epoch": 0.85, "learning_rate": 7.38754222722693e-06, "loss": 0.9353, "step": 28760 }, { "epoch": 0.85, "learning_rate": 7.372725656374089e-06, "loss": 0.7204, "step": 28770 }, { "epoch": 0.85, "learning_rate": 7.357909085521247e-06, "loss": 0.8485, "step": 28780 }, { "epoch": 0.85, "learning_rate": 7.343092514668405e-06, "loss": 0.7644, "step": 28790 }, { "epoch": 0.85, "learning_rate": 7.328275943815564e-06, "loss": 0.7375, "step": 28800 }, { "epoch": 0.85, "learning_rate": 7.313459372962722e-06, "loss": 0.9146, "step": 28810 }, { "epoch": 0.85, "learning_rate": 7.29864280210988e-06, "loss": 0.8822, "step": 28820 }, { "epoch": 0.85, "learning_rate": 7.283826231257038e-06, "loss": 0.8327, "step": 28830 }, { "epoch": 0.85, "learning_rate": 7.269009660404197e-06, "loss": 0.6424, "step": 28840 }, { "epoch": 0.85, "learning_rate": 7.254193089551355e-06, "loss": 0.7733, "step": 28850 }, { "epoch": 0.86, "learning_rate": 7.239376518698513e-06, "loss": 0.8612, "step": 28860 }, { "epoch": 0.86, "learning_rate": 7.224559947845672e-06, "loss": 0.7965, "step": 28870 }, { "epoch": 0.86, "learning_rate": 7.20974337699283e-06, "loss": 0.8674, "step": 28880 }, { "epoch": 0.86, "learning_rate": 7.194926806139987e-06, "loss": 0.8873, "step": 28890 }, { "epoch": 0.86, "learning_rate": 7.180110235287145e-06, "loss": 0.8673, "step": 28900 }, { "epoch": 0.86, "learning_rate": 7.165293664434304e-06, "loss": 0.8005, "step": 28910 }, { "epoch": 0.86, "learning_rate": 7.150477093581462e-06, "loss": 0.7605, "step": 28920 }, { "epoch": 0.86, "learning_rate": 7.135660522728619e-06, "loss": 0.7572, "step": 28930 }, { "epoch": 0.86, "learning_rate": 7.120843951875779e-06, "loss": 0.8536, "step": 28940 }, { "epoch": 0.86, "learning_rate": 7.106027381022936e-06, "loss": 0.8039, "step": 28950 }, { "epoch": 0.86, "learning_rate": 7.091210810170094e-06, "loss": 0.6013, "step": 28960 }, { "epoch": 0.86, "learning_rate": 7.0763942393172525e-06, "loss": 0.8377, "step": 28970 }, { "epoch": 0.86, "learning_rate": 7.061577668464411e-06, "loss": 0.7033, "step": 28980 }, { "epoch": 0.86, "learning_rate": 7.0467610976115695e-06, "loss": 0.692, "step": 28990 }, { "epoch": 0.86, "learning_rate": 7.031944526758727e-06, "loss": 0.8825, "step": 29000 }, { "epoch": 0.86, "learning_rate": 7.0171279559058865e-06, "loss": 0.8131, "step": 29010 }, { "epoch": 0.86, "learning_rate": 7.002311385053044e-06, "loss": 0.7374, "step": 29020 }, { "epoch": 0.86, "learning_rate": 6.987494814200202e-06, "loss": 0.637, "step": 29030 }, { "epoch": 0.86, "learning_rate": 6.97267824334736e-06, "loss": 0.8886, "step": 29040 }, { "epoch": 0.86, "learning_rate": 6.957861672494519e-06, "loss": 0.9611, "step": 29050 }, { "epoch": 0.86, "learning_rate": 6.943045101641676e-06, "loss": 0.8604, "step": 29060 }, { "epoch": 0.86, "learning_rate": 6.928228530788834e-06, "loss": 0.8934, "step": 29070 }, { "epoch": 0.86, "learning_rate": 6.913411959935993e-06, "loss": 0.768, "step": 29080 }, { "epoch": 0.86, "learning_rate": 6.898595389083151e-06, "loss": 0.8102, "step": 29090 }, { "epoch": 0.86, "learning_rate": 6.883778818230309e-06, "loss": 0.8218, "step": 29100 }, { "epoch": 0.86, "learning_rate": 6.8689622473774664e-06, "loss": 0.8032, "step": 29110 }, { "epoch": 0.86, "learning_rate": 6.854145676524626e-06, "loss": 0.8869, "step": 29120 }, { "epoch": 0.86, "learning_rate": 6.8393291056717834e-06, "loss": 0.8738, "step": 29130 }, { "epoch": 0.86, "learning_rate": 6.8245125348189415e-06, "loss": 0.6768, "step": 29140 }, { "epoch": 0.86, "learning_rate": 6.8096959639661004e-06, "loss": 0.9605, "step": 29150 }, { "epoch": 0.86, "learning_rate": 6.7948793931132585e-06, "loss": 0.7273, "step": 29160 }, { "epoch": 0.86, "learning_rate": 6.780062822260417e-06, "loss": 0.8115, "step": 29170 }, { "epoch": 0.86, "learning_rate": 6.765246251407574e-06, "loss": 0.7178, "step": 29180 }, { "epoch": 0.86, "learning_rate": 6.750429680554734e-06, "loss": 0.8241, "step": 29190 }, { "epoch": 0.87, "learning_rate": 6.735613109701891e-06, "loss": 0.6951, "step": 29200 }, { "epoch": 0.87, "learning_rate": 6.720796538849049e-06, "loss": 0.9257, "step": 29210 }, { "epoch": 0.87, "learning_rate": 6.705979967996208e-06, "loss": 0.8817, "step": 29220 }, { "epoch": 0.87, "learning_rate": 6.691163397143366e-06, "loss": 0.8851, "step": 29230 }, { "epoch": 0.87, "learning_rate": 6.676346826290523e-06, "loss": 0.7645, "step": 29240 }, { "epoch": 0.87, "learning_rate": 6.661530255437681e-06, "loss": 0.8227, "step": 29250 }, { "epoch": 0.87, "learning_rate": 6.64671368458484e-06, "loss": 0.8884, "step": 29260 }, { "epoch": 0.87, "learning_rate": 6.631897113731998e-06, "loss": 0.9422, "step": 29270 }, { "epoch": 0.87, "learning_rate": 6.617080542879156e-06, "loss": 0.7692, "step": 29280 }, { "epoch": 0.87, "learning_rate": 6.602263972026315e-06, "loss": 0.7218, "step": 29290 }, { "epoch": 0.87, "learning_rate": 6.587447401173473e-06, "loss": 0.7579, "step": 29300 }, { "epoch": 0.87, "learning_rate": 6.5726308303206306e-06, "loss": 0.8306, "step": 29310 }, { "epoch": 0.87, "learning_rate": 6.557814259467789e-06, "loss": 0.7826, "step": 29320 }, { "epoch": 0.87, "learning_rate": 6.5429976886149476e-06, "loss": 0.8808, "step": 29330 }, { "epoch": 0.87, "learning_rate": 6.528181117762106e-06, "loss": 0.7251, "step": 29340 }, { "epoch": 0.87, "learning_rate": 6.513364546909263e-06, "loss": 0.879, "step": 29350 }, { "epoch": 0.87, "learning_rate": 6.498547976056423e-06, "loss": 0.7672, "step": 29360 }, { "epoch": 0.87, "learning_rate": 6.48373140520358e-06, "loss": 0.8023, "step": 29370 }, { "epoch": 0.87, "learning_rate": 6.468914834350738e-06, "loss": 0.7502, "step": 29380 }, { "epoch": 0.87, "learning_rate": 6.454098263497896e-06, "loss": 0.7522, "step": 29390 }, { "epoch": 0.87, "learning_rate": 6.439281692645055e-06, "loss": 0.8307, "step": 29400 }, { "epoch": 0.87, "learning_rate": 6.424465121792213e-06, "loss": 0.8703, "step": 29410 }, { "epoch": 0.87, "learning_rate": 6.40964855093937e-06, "loss": 0.8241, "step": 29420 }, { "epoch": 0.87, "learning_rate": 6.39483198008653e-06, "loss": 0.8221, "step": 29430 }, { "epoch": 0.87, "learning_rate": 6.380015409233687e-06, "loss": 0.8893, "step": 29440 }, { "epoch": 0.87, "learning_rate": 6.365198838380845e-06, "loss": 0.8828, "step": 29450 }, { "epoch": 0.87, "learning_rate": 6.3503822675280034e-06, "loss": 0.8587, "step": 29460 }, { "epoch": 0.87, "learning_rate": 6.335565696675162e-06, "loss": 0.7406, "step": 29470 }, { "epoch": 0.87, "learning_rate": 6.3207491258223204e-06, "loss": 0.7206, "step": 29480 }, { "epoch": 0.87, "learning_rate": 6.305932554969478e-06, "loss": 0.8015, "step": 29490 }, { "epoch": 0.87, "learning_rate": 6.291115984116637e-06, "loss": 0.7798, "step": 29500 }, { "epoch": 0.87, "learning_rate": 6.276299413263795e-06, "loss": 0.8116, "step": 29510 }, { "epoch": 0.87, "learning_rate": 6.261482842410953e-06, "loss": 0.8226, "step": 29520 }, { "epoch": 0.88, "learning_rate": 6.246666271558111e-06, "loss": 0.8893, "step": 29530 }, { "epoch": 0.88, "learning_rate": 6.231849700705269e-06, "loss": 0.7837, "step": 29540 }, { "epoch": 0.88, "learning_rate": 6.217033129852427e-06, "loss": 0.6093, "step": 29550 }, { "epoch": 0.88, "learning_rate": 6.202216558999585e-06, "loss": 0.789, "step": 29560 }, { "epoch": 0.88, "learning_rate": 6.187399988146743e-06, "loss": 0.8502, "step": 29570 }, { "epoch": 0.88, "learning_rate": 6.172583417293902e-06, "loss": 0.8546, "step": 29580 }, { "epoch": 0.88, "learning_rate": 6.15776684644106e-06, "loss": 0.9091, "step": 29590 }, { "epoch": 0.88, "learning_rate": 6.142950275588218e-06, "loss": 0.7525, "step": 29600 }, { "epoch": 0.88, "learning_rate": 6.128133704735376e-06, "loss": 0.8636, "step": 29610 }, { "epoch": 0.88, "learning_rate": 6.113317133882534e-06, "loss": 0.7831, "step": 29620 }, { "epoch": 0.88, "learning_rate": 6.0985005630296925e-06, "loss": 0.833, "step": 29630 }, { "epoch": 0.88, "learning_rate": 6.0836839921768506e-06, "loss": 0.8562, "step": 29640 }, { "epoch": 0.88, "learning_rate": 6.0688674213240095e-06, "loss": 0.7481, "step": 29650 }, { "epoch": 0.88, "learning_rate": 6.054050850471167e-06, "loss": 0.8869, "step": 29660 }, { "epoch": 0.88, "learning_rate": 6.039234279618326e-06, "loss": 0.6972, "step": 29670 }, { "epoch": 0.88, "learning_rate": 6.024417708765484e-06, "loss": 0.8546, "step": 29680 }, { "epoch": 0.88, "learning_rate": 6.009601137912642e-06, "loss": 0.7603, "step": 29690 }, { "epoch": 0.88, "learning_rate": 5.9947845670598e-06, "loss": 0.8197, "step": 29700 }, { "epoch": 0.88, "learning_rate": 5.979967996206958e-06, "loss": 0.73, "step": 29710 }, { "epoch": 0.88, "learning_rate": 5.965151425354117e-06, "loss": 0.7717, "step": 29720 }, { "epoch": 0.88, "learning_rate": 5.950334854501274e-06, "loss": 0.6476, "step": 29730 }, { "epoch": 0.88, "learning_rate": 5.935518283648433e-06, "loss": 0.8989, "step": 29740 }, { "epoch": 0.88, "learning_rate": 5.92070171279559e-06, "loss": 0.9831, "step": 29750 }, { "epoch": 0.88, "learning_rate": 5.905885141942749e-06, "loss": 0.9013, "step": 29760 }, { "epoch": 0.88, "learning_rate": 5.891068571089907e-06, "loss": 0.7805, "step": 29770 }, { "epoch": 0.88, "learning_rate": 5.876252000237065e-06, "loss": 0.7722, "step": 29780 }, { "epoch": 0.88, "learning_rate": 5.8614354293842235e-06, "loss": 0.8622, "step": 29790 }, { "epoch": 0.88, "learning_rate": 5.8466188585313815e-06, "loss": 0.7649, "step": 29800 }, { "epoch": 0.88, "learning_rate": 5.8318022876785405e-06, "loss": 0.867, "step": 29810 }, { "epoch": 0.88, "learning_rate": 5.816985716825698e-06, "loss": 0.8248, "step": 29820 }, { "epoch": 0.88, "learning_rate": 5.802169145972857e-06, "loss": 0.8189, "step": 29830 }, { "epoch": 0.88, "learning_rate": 5.787352575120014e-06, "loss": 0.7743, "step": 29840 }, { "epoch": 0.88, "learning_rate": 5.772536004267173e-06, "loss": 0.8254, "step": 29850 }, { "epoch": 0.88, "learning_rate": 5.757719433414331e-06, "loss": 0.8056, "step": 29860 }, { "epoch": 0.89, "learning_rate": 5.742902862561489e-06, "loss": 0.7944, "step": 29870 }, { "epoch": 0.89, "learning_rate": 5.728086291708647e-06, "loss": 0.9064, "step": 29880 }, { "epoch": 0.89, "learning_rate": 5.713269720855805e-06, "loss": 0.8732, "step": 29890 }, { "epoch": 0.89, "learning_rate": 5.698453150002964e-06, "loss": 0.804, "step": 29900 }, { "epoch": 0.89, "learning_rate": 5.683636579150121e-06, "loss": 0.9746, "step": 29910 }, { "epoch": 0.89, "learning_rate": 5.66882000829728e-06, "loss": 0.8479, "step": 29920 }, { "epoch": 0.89, "learning_rate": 5.654003437444438e-06, "loss": 0.8559, "step": 29930 }, { "epoch": 0.89, "learning_rate": 5.639186866591596e-06, "loss": 0.8752, "step": 29940 }, { "epoch": 0.89, "learning_rate": 5.624370295738754e-06, "loss": 0.7864, "step": 29950 }, { "epoch": 0.89, "learning_rate": 5.6095537248859125e-06, "loss": 0.8694, "step": 29960 }, { "epoch": 0.89, "learning_rate": 5.594737154033071e-06, "loss": 0.8, "step": 29970 }, { "epoch": 0.89, "learning_rate": 5.579920583180229e-06, "loss": 0.8002, "step": 29980 }, { "epoch": 0.89, "learning_rate": 5.565104012327388e-06, "loss": 0.895, "step": 29990 }, { "epoch": 0.89, "learning_rate": 5.550287441474546e-06, "loss": 0.7581, "step": 30000 }, { "epoch": 0.89, "learning_rate": 5.535470870621704e-06, "loss": 0.9158, "step": 30010 }, { "epoch": 0.89, "learning_rate": 5.520654299768862e-06, "loss": 0.9058, "step": 30020 }, { "epoch": 0.89, "learning_rate": 5.50583772891602e-06, "loss": 0.8668, "step": 30030 }, { "epoch": 0.89, "learning_rate": 5.491021158063178e-06, "loss": 0.7331, "step": 30040 }, { "epoch": 0.89, "learning_rate": 5.476204587210336e-06, "loss": 0.8737, "step": 30050 }, { "epoch": 0.89, "learning_rate": 5.461388016357494e-06, "loss": 0.7617, "step": 30060 }, { "epoch": 0.89, "learning_rate": 5.446571445504653e-06, "loss": 0.8632, "step": 30070 }, { "epoch": 0.89, "learning_rate": 5.43175487465181e-06, "loss": 0.8363, "step": 30080 }, { "epoch": 0.89, "learning_rate": 5.416938303798969e-06, "loss": 0.8232, "step": 30090 }, { "epoch": 0.89, "learning_rate": 5.402121732946127e-06, "loss": 0.8688, "step": 30100 }, { "epoch": 0.89, "learning_rate": 5.387305162093285e-06, "loss": 0.7766, "step": 30110 }, { "epoch": 0.89, "learning_rate": 5.3724885912404435e-06, "loss": 0.8947, "step": 30120 }, { "epoch": 0.89, "learning_rate": 5.3576720203876015e-06, "loss": 0.6847, "step": 30130 }, { "epoch": 0.89, "learning_rate": 5.3428554495347605e-06, "loss": 0.7965, "step": 30140 }, { "epoch": 0.89, "learning_rate": 5.328038878681918e-06, "loss": 0.8281, "step": 30150 }, { "epoch": 0.89, "learning_rate": 5.313222307829077e-06, "loss": 1.0558, "step": 30160 }, { "epoch": 0.89, "learning_rate": 5.298405736976234e-06, "loss": 0.8634, "step": 30170 }, { "epoch": 0.89, "learning_rate": 5.283589166123393e-06, "loss": 0.7995, "step": 30180 }, { "epoch": 0.89, "learning_rate": 5.268772595270551e-06, "loss": 0.7261, "step": 30190 }, { "epoch": 0.89, "learning_rate": 5.253956024417709e-06, "loss": 0.8498, "step": 30200 }, { "epoch": 0.9, "learning_rate": 5.239139453564868e-06, "loss": 0.8425, "step": 30210 }, { "epoch": 0.9, "learning_rate": 5.224322882712025e-06, "loss": 0.926, "step": 30220 }, { "epoch": 0.9, "learning_rate": 5.209506311859184e-06, "loss": 0.726, "step": 30230 }, { "epoch": 0.9, "learning_rate": 5.194689741006341e-06, "loss": 0.8751, "step": 30240 }, { "epoch": 0.9, "learning_rate": 5.1798731701535e-06, "loss": 0.8552, "step": 30250 }, { "epoch": 0.9, "learning_rate": 5.165056599300657e-06, "loss": 0.7732, "step": 30260 }, { "epoch": 0.9, "learning_rate": 5.150240028447816e-06, "loss": 0.8612, "step": 30270 }, { "epoch": 0.9, "learning_rate": 5.1354234575949744e-06, "loss": 0.685, "step": 30280 }, { "epoch": 0.9, "learning_rate": 5.1206068867421325e-06, "loss": 0.799, "step": 30290 }, { "epoch": 0.9, "learning_rate": 5.105790315889291e-06, "loss": 0.74, "step": 30300 }, { "epoch": 0.9, "learning_rate": 5.090973745036449e-06, "loss": 0.8859, "step": 30310 }, { "epoch": 0.9, "learning_rate": 5.076157174183608e-06, "loss": 0.7469, "step": 30320 }, { "epoch": 0.9, "learning_rate": 5.061340603330765e-06, "loss": 0.7738, "step": 30330 }, { "epoch": 0.9, "learning_rate": 5.046524032477924e-06, "loss": 0.8269, "step": 30340 }, { "epoch": 0.9, "learning_rate": 5.031707461625082e-06, "loss": 0.9851, "step": 30350 }, { "epoch": 0.9, "learning_rate": 5.01689089077224e-06, "loss": 0.7928, "step": 30360 }, { "epoch": 0.9, "learning_rate": 5.002074319919398e-06, "loss": 0.7639, "step": 30370 }, { "epoch": 0.9, "learning_rate": 4.987257749066556e-06, "loss": 0.9153, "step": 30380 }, { "epoch": 0.9, "learning_rate": 4.972441178213714e-06, "loss": 0.9317, "step": 30390 }, { "epoch": 0.9, "learning_rate": 4.957624607360872e-06, "loss": 0.8111, "step": 30400 }, { "epoch": 0.9, "learning_rate": 4.942808036508031e-06, "loss": 0.8389, "step": 30410 }, { "epoch": 0.9, "learning_rate": 4.927991465655189e-06, "loss": 0.8964, "step": 30420 }, { "epoch": 0.9, "learning_rate": 4.913174894802347e-06, "loss": 0.729, "step": 30430 }, { "epoch": 0.9, "learning_rate": 4.898358323949505e-06, "loss": 0.8435, "step": 30440 }, { "epoch": 0.9, "learning_rate": 4.8835417530966635e-06, "loss": 0.6626, "step": 30450 }, { "epoch": 0.9, "learning_rate": 4.8687251822438216e-06, "loss": 0.7776, "step": 30460 }, { "epoch": 0.9, "learning_rate": 4.85390861139098e-06, "loss": 0.731, "step": 30470 }, { "epoch": 0.9, "learning_rate": 4.839092040538138e-06, "loss": 0.7716, "step": 30480 }, { "epoch": 0.9, "learning_rate": 4.824275469685297e-06, "loss": 0.8201, "step": 30490 }, { "epoch": 0.9, "learning_rate": 4.809458898832455e-06, "loss": 0.8544, "step": 30500 }, { "epoch": 0.9, "learning_rate": 4.794642327979613e-06, "loss": 0.8127, "step": 30510 }, { "epoch": 0.9, "learning_rate": 4.779825757126771e-06, "loss": 0.7945, "step": 30520 }, { "epoch": 0.9, "learning_rate": 4.765009186273929e-06, "loss": 0.7658, "step": 30530 }, { "epoch": 0.9, "learning_rate": 4.750192615421087e-06, "loss": 0.709, "step": 30540 }, { "epoch": 0.91, "learning_rate": 4.735376044568245e-06, "loss": 0.7834, "step": 30550 }, { "epoch": 0.91, "learning_rate": 4.720559473715404e-06, "loss": 0.7525, "step": 30560 }, { "epoch": 0.91, "learning_rate": 4.705742902862561e-06, "loss": 0.8995, "step": 30570 }, { "epoch": 0.91, "learning_rate": 4.69092633200972e-06, "loss": 0.6898, "step": 30580 }, { "epoch": 0.91, "learning_rate": 4.6761097611568774e-06, "loss": 0.7307, "step": 30590 }, { "epoch": 0.91, "learning_rate": 4.661293190304036e-06, "loss": 0.764, "step": 30600 }, { "epoch": 0.91, "learning_rate": 4.6464766194511944e-06, "loss": 0.7606, "step": 30610 }, { "epoch": 0.91, "learning_rate": 4.6316600485983525e-06, "loss": 0.8327, "step": 30620 }, { "epoch": 0.91, "learning_rate": 4.6168434777455114e-06, "loss": 0.7644, "step": 30630 }, { "epoch": 0.91, "learning_rate": 4.602026906892669e-06, "loss": 0.6861, "step": 30640 }, { "epoch": 0.91, "learning_rate": 4.587210336039828e-06, "loss": 0.7229, "step": 30650 }, { "epoch": 0.91, "learning_rate": 4.572393765186985e-06, "loss": 0.79, "step": 30660 }, { "epoch": 0.91, "learning_rate": 4.557577194334144e-06, "loss": 0.9027, "step": 30670 }, { "epoch": 0.91, "learning_rate": 4.542760623481301e-06, "loss": 0.8741, "step": 30680 }, { "epoch": 0.91, "learning_rate": 4.52794405262846e-06, "loss": 0.9217, "step": 30690 }, { "epoch": 0.91, "learning_rate": 4.513127481775618e-06, "loss": 0.8175, "step": 30700 }, { "epoch": 0.91, "learning_rate": 4.498310910922776e-06, "loss": 0.7815, "step": 30710 }, { "epoch": 0.91, "learning_rate": 4.483494340069935e-06, "loss": 0.7731, "step": 30720 }, { "epoch": 0.91, "learning_rate": 4.468677769217092e-06, "loss": 0.7689, "step": 30730 }, { "epoch": 0.91, "learning_rate": 4.453861198364251e-06, "loss": 0.8046, "step": 30740 }, { "epoch": 0.91, "learning_rate": 4.439044627511408e-06, "loss": 0.8386, "step": 30750 }, { "epoch": 0.91, "learning_rate": 4.424228056658567e-06, "loss": 0.7341, "step": 30760 }, { "epoch": 0.91, "learning_rate": 4.409411485805725e-06, "loss": 0.9079, "step": 30770 }, { "epoch": 0.91, "learning_rate": 4.3945949149528835e-06, "loss": 0.8967, "step": 30780 }, { "epoch": 0.91, "learning_rate": 4.3797783441000416e-06, "loss": 1.0217, "step": 30790 }, { "epoch": 0.91, "learning_rate": 4.3649617732472e-06, "loss": 0.7746, "step": 30800 }, { "epoch": 0.91, "learning_rate": 4.350145202394358e-06, "loss": 0.7299, "step": 30810 }, { "epoch": 0.91, "learning_rate": 4.335328631541516e-06, "loss": 0.8941, "step": 30820 }, { "epoch": 0.91, "learning_rate": 4.320512060688675e-06, "loss": 0.812, "step": 30830 }, { "epoch": 0.91, "learning_rate": 4.305695489835833e-06, "loss": 0.9509, "step": 30840 }, { "epoch": 0.91, "learning_rate": 4.290878918982991e-06, "loss": 0.779, "step": 30850 }, { "epoch": 0.91, "learning_rate": 4.276062348130149e-06, "loss": 0.8534, "step": 30860 }, { "epoch": 0.91, "learning_rate": 4.261245777277307e-06, "loss": 0.6726, "step": 30870 }, { "epoch": 0.92, "learning_rate": 4.246429206424465e-06, "loss": 0.8623, "step": 30880 }, { "epoch": 0.92, "learning_rate": 4.231612635571623e-06, "loss": 0.7388, "step": 30890 }, { "epoch": 0.92, "learning_rate": 4.216796064718781e-06, "loss": 0.7825, "step": 30900 }, { "epoch": 0.92, "learning_rate": 4.20197949386594e-06, "loss": 0.719, "step": 30910 }, { "epoch": 0.92, "learning_rate": 4.187162923013098e-06, "loss": 0.9652, "step": 30920 }, { "epoch": 0.92, "learning_rate": 4.172346352160256e-06, "loss": 0.8504, "step": 30930 }, { "epoch": 0.92, "learning_rate": 4.1575297813074144e-06, "loss": 0.809, "step": 30940 }, { "epoch": 0.92, "learning_rate": 4.1427132104545725e-06, "loss": 0.857, "step": 30950 }, { "epoch": 0.92, "learning_rate": 4.127896639601731e-06, "loss": 0.7532, "step": 30960 }, { "epoch": 0.92, "learning_rate": 4.113080068748889e-06, "loss": 0.861, "step": 30970 }, { "epoch": 0.92, "learning_rate": 4.098263497896048e-06, "loss": 0.8542, "step": 30980 }, { "epoch": 0.92, "learning_rate": 4.083446927043205e-06, "loss": 0.707, "step": 30990 }, { "epoch": 0.92, "learning_rate": 4.068630356190364e-06, "loss": 0.7639, "step": 31000 }, { "epoch": 0.92, "learning_rate": 4.053813785337522e-06, "loss": 0.8398, "step": 31010 }, { "epoch": 0.92, "learning_rate": 4.03899721448468e-06, "loss": 0.846, "step": 31020 }, { "epoch": 0.92, "learning_rate": 4.024180643631838e-06, "loss": 0.88, "step": 31030 }, { "epoch": 0.92, "learning_rate": 4.009364072778996e-06, "loss": 0.8333, "step": 31040 }, { "epoch": 0.92, "learning_rate": 3.994547501926155e-06, "loss": 0.8985, "step": 31050 }, { "epoch": 0.92, "learning_rate": 3.979730931073312e-06, "loss": 0.7808, "step": 31060 }, { "epoch": 0.92, "learning_rate": 3.964914360220471e-06, "loss": 0.9343, "step": 31070 }, { "epoch": 0.92, "learning_rate": 3.950097789367628e-06, "loss": 0.8095, "step": 31080 }, { "epoch": 0.92, "learning_rate": 3.935281218514787e-06, "loss": 0.9956, "step": 31090 }, { "epoch": 0.92, "learning_rate": 3.920464647661945e-06, "loss": 0.8299, "step": 31100 }, { "epoch": 0.92, "learning_rate": 3.9056480768091035e-06, "loss": 0.8431, "step": 31110 }, { "epoch": 0.92, "learning_rate": 3.8908315059562616e-06, "loss": 0.8308, "step": 31120 }, { "epoch": 0.92, "learning_rate": 3.87601493510342e-06, "loss": 0.7654, "step": 31130 }, { "epoch": 0.92, "learning_rate": 3.861198364250579e-06, "loss": 0.8367, "step": 31140 }, { "epoch": 0.92, "learning_rate": 3.846381793397736e-06, "loss": 0.8608, "step": 31150 }, { "epoch": 0.92, "learning_rate": 3.831565222544895e-06, "loss": 0.8565, "step": 31160 }, { "epoch": 0.92, "learning_rate": 3.816748651692052e-06, "loss": 0.7515, "step": 31170 }, { "epoch": 0.92, "learning_rate": 3.801932080839211e-06, "loss": 0.7589, "step": 31180 }, { "epoch": 0.92, "learning_rate": 3.7871155099863694e-06, "loss": 0.7388, "step": 31190 }, { "epoch": 0.92, "learning_rate": 3.772298939133527e-06, "loss": 0.8201, "step": 31200 }, { "epoch": 0.92, "learning_rate": 3.7574823682806856e-06, "loss": 0.7638, "step": 31210 }, { "epoch": 0.93, "learning_rate": 3.7426657974278432e-06, "loss": 0.8043, "step": 31220 }, { "epoch": 0.93, "learning_rate": 3.7278492265750017e-06, "loss": 0.747, "step": 31230 }, { "epoch": 0.93, "learning_rate": 3.7130326557221594e-06, "loss": 0.7445, "step": 31240 }, { "epoch": 0.93, "learning_rate": 3.698216084869318e-06, "loss": 0.8245, "step": 31250 }, { "epoch": 0.93, "learning_rate": 3.6833995140164764e-06, "loss": 0.8488, "step": 31260 }, { "epoch": 0.93, "learning_rate": 3.6685829431636345e-06, "loss": 0.9464, "step": 31270 }, { "epoch": 0.93, "learning_rate": 3.6537663723107925e-06, "loss": 0.7752, "step": 31280 }, { "epoch": 0.93, "learning_rate": 3.6389498014579506e-06, "loss": 0.821, "step": 31290 }, { "epoch": 0.93, "learning_rate": 3.624133230605109e-06, "loss": 0.8872, "step": 31300 }, { "epoch": 0.93, "learning_rate": 3.6093166597522668e-06, "loss": 0.8284, "step": 31310 }, { "epoch": 0.93, "learning_rate": 3.5945000888994253e-06, "loss": 0.9218, "step": 31320 }, { "epoch": 0.93, "learning_rate": 3.5796835180465838e-06, "loss": 0.7825, "step": 31330 }, { "epoch": 0.93, "learning_rate": 3.5648669471937414e-06, "loss": 0.8917, "step": 31340 }, { "epoch": 0.93, "learning_rate": 3.5500503763409e-06, "loss": 0.6889, "step": 31350 }, { "epoch": 0.93, "learning_rate": 3.5352338054880576e-06, "loss": 0.8553, "step": 31360 }, { "epoch": 0.93, "learning_rate": 3.520417234635216e-06, "loss": 0.858, "step": 31370 }, { "epoch": 0.93, "learning_rate": 3.505600663782374e-06, "loss": 0.7792, "step": 31380 }, { "epoch": 0.93, "learning_rate": 3.4907840929295327e-06, "loss": 0.7633, "step": 31390 }, { "epoch": 0.93, "learning_rate": 3.475967522076691e-06, "loss": 0.7359, "step": 31400 }, { "epoch": 0.93, "learning_rate": 3.461150951223849e-06, "loss": 0.9595, "step": 31410 }, { "epoch": 0.93, "learning_rate": 3.4463343803710073e-06, "loss": 0.8637, "step": 31420 }, { "epoch": 0.93, "learning_rate": 3.431517809518165e-06, "loss": 0.8134, "step": 31430 }, { "epoch": 0.93, "learning_rate": 3.4167012386653235e-06, "loss": 0.8257, "step": 31440 }, { "epoch": 0.93, "learning_rate": 3.401884667812481e-06, "loss": 0.8745, "step": 31450 }, { "epoch": 0.93, "learning_rate": 3.3870680969596397e-06, "loss": 0.8975, "step": 31460 }, { "epoch": 0.93, "learning_rate": 3.372251526106798e-06, "loss": 0.849, "step": 31470 }, { "epoch": 0.93, "learning_rate": 3.3574349552539562e-06, "loss": 0.8976, "step": 31480 }, { "epoch": 0.93, "learning_rate": 3.3426183844011147e-06, "loss": 0.7464, "step": 31490 }, { "epoch": 0.93, "learning_rate": 3.3278018135482724e-06, "loss": 0.7268, "step": 31500 }, { "epoch": 0.93, "learning_rate": 3.312985242695431e-06, "loss": 0.8978, "step": 31510 }, { "epoch": 0.93, "learning_rate": 3.2981686718425886e-06, "loss": 0.9268, "step": 31520 }, { "epoch": 0.93, "learning_rate": 3.283352100989747e-06, "loss": 0.8545, "step": 31530 }, { "epoch": 0.93, "learning_rate": 3.2685355301369056e-06, "loss": 0.8155, "step": 31540 }, { "epoch": 0.93, "learning_rate": 3.2537189592840632e-06, "loss": 0.7821, "step": 31550 }, { "epoch": 0.94, "learning_rate": 3.2389023884312217e-06, "loss": 0.8581, "step": 31560 }, { "epoch": 0.94, "learning_rate": 3.22408581757838e-06, "loss": 0.7314, "step": 31570 }, { "epoch": 0.94, "learning_rate": 3.209269246725538e-06, "loss": 0.7354, "step": 31580 }, { "epoch": 0.94, "learning_rate": 3.194452675872696e-06, "loss": 0.7087, "step": 31590 }, { "epoch": 0.94, "learning_rate": 3.1796361050198545e-06, "loss": 0.8197, "step": 31600 }, { "epoch": 0.94, "learning_rate": 3.164819534167013e-06, "loss": 0.8743, "step": 31610 }, { "epoch": 0.94, "learning_rate": 3.1500029633141706e-06, "loss": 0.8032, "step": 31620 }, { "epoch": 0.94, "learning_rate": 3.135186392461329e-06, "loss": 0.8348, "step": 31630 }, { "epoch": 0.94, "learning_rate": 3.120369821608487e-06, "loss": 1.0197, "step": 31640 }, { "epoch": 0.94, "learning_rate": 3.1055532507556453e-06, "loss": 0.8379, "step": 31650 }, { "epoch": 0.94, "learning_rate": 3.0907366799028034e-06, "loss": 0.9561, "step": 31660 }, { "epoch": 0.94, "learning_rate": 3.0759201090499614e-06, "loss": 0.745, "step": 31670 }, { "epoch": 0.94, "learning_rate": 3.06110353819712e-06, "loss": 0.8192, "step": 31680 }, { "epoch": 0.94, "learning_rate": 3.046286967344278e-06, "loss": 0.7573, "step": 31690 }, { "epoch": 0.94, "learning_rate": 3.031470396491436e-06, "loss": 0.7973, "step": 31700 }, { "epoch": 0.94, "learning_rate": 3.0166538256385946e-06, "loss": 0.8163, "step": 31710 }, { "epoch": 0.94, "learning_rate": 3.0018372547857527e-06, "loss": 0.6931, "step": 31720 }, { "epoch": 0.94, "learning_rate": 2.9870206839329108e-06, "loss": 0.7237, "step": 31730 }, { "epoch": 0.94, "learning_rate": 2.972204113080069e-06, "loss": 0.8131, "step": 31740 }, { "epoch": 0.94, "learning_rate": 2.957387542227227e-06, "loss": 0.7158, "step": 31750 }, { "epoch": 0.94, "learning_rate": 2.942570971374385e-06, "loss": 0.9403, "step": 31760 }, { "epoch": 0.94, "learning_rate": 2.927754400521543e-06, "loss": 0.9201, "step": 31770 }, { "epoch": 0.94, "learning_rate": 2.9129378296687016e-06, "loss": 0.9178, "step": 31780 }, { "epoch": 0.94, "learning_rate": 2.89812125881586e-06, "loss": 0.9062, "step": 31790 }, { "epoch": 0.94, "learning_rate": 2.883304687963018e-06, "loss": 0.8674, "step": 31800 }, { "epoch": 0.94, "learning_rate": 2.8684881171101763e-06, "loss": 0.7798, "step": 31810 }, { "epoch": 0.94, "learning_rate": 2.8536715462573343e-06, "loss": 0.7193, "step": 31820 }, { "epoch": 0.94, "learning_rate": 2.8388549754044924e-06, "loss": 0.7906, "step": 31830 }, { "epoch": 0.94, "learning_rate": 2.8240384045516505e-06, "loss": 0.8077, "step": 31840 }, { "epoch": 0.94, "learning_rate": 2.809221833698809e-06, "loss": 0.7481, "step": 31850 }, { "epoch": 0.94, "learning_rate": 2.794405262845967e-06, "loss": 0.8428, "step": 31860 }, { "epoch": 0.94, "learning_rate": 2.779588691993125e-06, "loss": 0.7441, "step": 31870 }, { "epoch": 0.94, "learning_rate": 2.7647721211402832e-06, "loss": 0.7401, "step": 31880 }, { "epoch": 0.95, "learning_rate": 2.7499555502874417e-06, "loss": 0.869, "step": 31890 }, { "epoch": 0.95, "learning_rate": 2.7351389794346e-06, "loss": 0.8421, "step": 31900 }, { "epoch": 0.95, "learning_rate": 2.720322408581758e-06, "loss": 0.783, "step": 31910 }, { "epoch": 0.95, "learning_rate": 2.7055058377289164e-06, "loss": 0.8691, "step": 31920 }, { "epoch": 0.95, "learning_rate": 2.6906892668760745e-06, "loss": 0.6008, "step": 31930 }, { "epoch": 0.95, "learning_rate": 2.6758726960232326e-06, "loss": 0.8923, "step": 31940 }, { "epoch": 0.95, "learning_rate": 2.6610561251703906e-06, "loss": 0.8449, "step": 31950 }, { "epoch": 0.95, "learning_rate": 2.6462395543175487e-06, "loss": 0.7947, "step": 31960 }, { "epoch": 0.95, "learning_rate": 2.631422983464707e-06, "loss": 0.7598, "step": 31970 }, { "epoch": 0.95, "learning_rate": 2.616606412611865e-06, "loss": 0.8125, "step": 31980 }, { "epoch": 0.95, "learning_rate": 2.6017898417590234e-06, "loss": 0.8404, "step": 31990 }, { "epoch": 0.95, "learning_rate": 2.586973270906182e-06, "loss": 0.8872, "step": 32000 }, { "epoch": 0.95, "learning_rate": 2.57215670005334e-06, "loss": 0.9069, "step": 32010 }, { "epoch": 0.95, "learning_rate": 2.557340129200498e-06, "loss": 0.7658, "step": 32020 }, { "epoch": 0.95, "learning_rate": 2.542523558347656e-06, "loss": 0.9541, "step": 32030 }, { "epoch": 0.95, "learning_rate": 2.527706987494814e-06, "loss": 0.8887, "step": 32040 }, { "epoch": 0.95, "learning_rate": 2.5128904166419723e-06, "loss": 0.9671, "step": 32050 }, { "epoch": 0.95, "learning_rate": 2.4980738457891308e-06, "loss": 0.8185, "step": 32060 }, { "epoch": 0.95, "learning_rate": 2.483257274936289e-06, "loss": 0.8478, "step": 32070 }, { "epoch": 0.95, "learning_rate": 2.468440704083447e-06, "loss": 0.7373, "step": 32080 }, { "epoch": 0.95, "learning_rate": 2.453624133230605e-06, "loss": 0.7813, "step": 32090 }, { "epoch": 0.95, "learning_rate": 2.4388075623777635e-06, "loss": 0.8227, "step": 32100 }, { "epoch": 0.95, "learning_rate": 2.4239909915249216e-06, "loss": 0.734, "step": 32110 }, { "epoch": 0.95, "learning_rate": 2.4091744206720797e-06, "loss": 0.8542, "step": 32120 }, { "epoch": 0.95, "learning_rate": 2.394357849819238e-06, "loss": 0.7181, "step": 32130 }, { "epoch": 0.95, "learning_rate": 2.3795412789663963e-06, "loss": 0.8298, "step": 32140 }, { "epoch": 0.95, "learning_rate": 2.3647247081135543e-06, "loss": 0.8034, "step": 32150 }, { "epoch": 0.95, "learning_rate": 2.3499081372607124e-06, "loss": 0.8093, "step": 32160 }, { "epoch": 0.95, "learning_rate": 2.3350915664078705e-06, "loss": 0.8805, "step": 32170 }, { "epoch": 0.95, "learning_rate": 2.3202749955550286e-06, "loss": 0.8055, "step": 32180 }, { "epoch": 0.95, "learning_rate": 2.305458424702187e-06, "loss": 0.8558, "step": 32190 }, { "epoch": 0.95, "learning_rate": 2.290641853849345e-06, "loss": 0.8407, "step": 32200 }, { "epoch": 0.95, "learning_rate": 2.2758252829965037e-06, "loss": 0.9348, "step": 32210 }, { "epoch": 0.95, "learning_rate": 2.2610087121436617e-06, "loss": 0.779, "step": 32220 }, { "epoch": 0.96, "learning_rate": 2.24619214129082e-06, "loss": 0.6843, "step": 32230 }, { "epoch": 0.96, "learning_rate": 2.231375570437978e-06, "loss": 0.8221, "step": 32240 }, { "epoch": 0.96, "learning_rate": 2.216558999585136e-06, "loss": 0.8315, "step": 32250 }, { "epoch": 0.96, "learning_rate": 2.201742428732294e-06, "loss": 0.7218, "step": 32260 }, { "epoch": 0.96, "learning_rate": 2.1869258578794526e-06, "loss": 0.9209, "step": 32270 }, { "epoch": 0.96, "learning_rate": 2.1721092870266106e-06, "loss": 0.8556, "step": 32280 }, { "epoch": 0.96, "learning_rate": 2.1572927161737687e-06, "loss": 0.9266, "step": 32290 }, { "epoch": 0.96, "learning_rate": 2.1424761453209272e-06, "loss": 0.8504, "step": 32300 }, { "epoch": 0.96, "learning_rate": 2.1276595744680853e-06, "loss": 0.7773, "step": 32310 }, { "epoch": 0.96, "learning_rate": 2.1128430036152434e-06, "loss": 0.878, "step": 32320 }, { "epoch": 0.96, "learning_rate": 2.0980264327624015e-06, "loss": 0.8129, "step": 32330 }, { "epoch": 0.96, "learning_rate": 2.08320986190956e-06, "loss": 0.8973, "step": 32340 }, { "epoch": 0.96, "learning_rate": 2.068393291056718e-06, "loss": 0.9417, "step": 32350 }, { "epoch": 0.96, "learning_rate": 2.053576720203876e-06, "loss": 0.7483, "step": 32360 }, { "epoch": 0.96, "learning_rate": 2.038760149351034e-06, "loss": 0.6655, "step": 32370 }, { "epoch": 0.96, "learning_rate": 2.0239435784981923e-06, "loss": 0.9379, "step": 32380 }, { "epoch": 0.96, "learning_rate": 2.0091270076453504e-06, "loss": 0.8425, "step": 32390 }, { "epoch": 0.96, "learning_rate": 1.994310436792509e-06, "loss": 0.827, "step": 32400 }, { "epoch": 0.96, "learning_rate": 1.9794938659396674e-06, "loss": 0.8824, "step": 32410 }, { "epoch": 0.96, "learning_rate": 1.9646772950868255e-06, "loss": 0.8184, "step": 32420 }, { "epoch": 0.96, "learning_rate": 1.9498607242339835e-06, "loss": 0.7644, "step": 32430 }, { "epoch": 0.96, "learning_rate": 1.9350441533811416e-06, "loss": 0.8675, "step": 32440 }, { "epoch": 0.96, "learning_rate": 1.9202275825282997e-06, "loss": 0.787, "step": 32450 }, { "epoch": 0.96, "learning_rate": 1.9054110116754578e-06, "loss": 0.8345, "step": 32460 }, { "epoch": 0.96, "learning_rate": 1.890594440822616e-06, "loss": 0.8468, "step": 32470 }, { "epoch": 0.96, "learning_rate": 1.8757778699697744e-06, "loss": 0.7413, "step": 32480 }, { "epoch": 0.96, "learning_rate": 1.8609612991169326e-06, "loss": 0.7738, "step": 32490 }, { "epoch": 0.96, "learning_rate": 1.8461447282640907e-06, "loss": 0.7922, "step": 32500 }, { "epoch": 0.96, "learning_rate": 1.8313281574112488e-06, "loss": 0.8119, "step": 32510 }, { "epoch": 0.96, "learning_rate": 1.8165115865584069e-06, "loss": 0.8329, "step": 32520 }, { "epoch": 0.96, "learning_rate": 1.8016950157055652e-06, "loss": 0.6763, "step": 32530 }, { "epoch": 0.96, "learning_rate": 1.7868784448527233e-06, "loss": 0.6897, "step": 32540 }, { "epoch": 0.96, "learning_rate": 1.7720618739998818e-06, "loss": 0.854, "step": 32550 }, { "epoch": 0.96, "learning_rate": 1.7572453031470398e-06, "loss": 0.775, "step": 32560 }, { "epoch": 0.97, "learning_rate": 1.742428732294198e-06, "loss": 0.9592, "step": 32570 }, { "epoch": 0.97, "learning_rate": 1.727612161441356e-06, "loss": 0.7021, "step": 32580 }, { "epoch": 0.97, "learning_rate": 1.7127955905885143e-06, "loss": 0.8669, "step": 32590 }, { "epoch": 0.97, "learning_rate": 1.6979790197356724e-06, "loss": 0.9422, "step": 32600 }, { "epoch": 0.97, "learning_rate": 1.6831624488828304e-06, "loss": 0.7729, "step": 32610 }, { "epoch": 0.97, "learning_rate": 1.668345878029989e-06, "loss": 0.8819, "step": 32620 }, { "epoch": 0.97, "learning_rate": 1.653529307177147e-06, "loss": 0.8327, "step": 32630 }, { "epoch": 0.97, "learning_rate": 1.6387127363243053e-06, "loss": 0.7456, "step": 32640 }, { "epoch": 0.97, "learning_rate": 1.6238961654714634e-06, "loss": 0.8397, "step": 32650 }, { "epoch": 0.97, "learning_rate": 1.6090795946186215e-06, "loss": 0.8461, "step": 32660 }, { "epoch": 0.97, "learning_rate": 1.5942630237657796e-06, "loss": 0.8236, "step": 32670 }, { "epoch": 0.97, "learning_rate": 1.5794464529129378e-06, "loss": 0.9109, "step": 32680 }, { "epoch": 0.97, "learning_rate": 1.5646298820600961e-06, "loss": 0.7619, "step": 32690 }, { "epoch": 0.97, "learning_rate": 1.5498133112072542e-06, "loss": 0.7171, "step": 32700 }, { "epoch": 0.97, "learning_rate": 1.5349967403544125e-06, "loss": 0.7824, "step": 32710 }, { "epoch": 0.97, "learning_rate": 1.5201801695015706e-06, "loss": 0.8839, "step": 32720 }, { "epoch": 0.97, "learning_rate": 1.5053635986487287e-06, "loss": 0.7877, "step": 32730 }, { "epoch": 0.97, "learning_rate": 1.4905470277958872e-06, "loss": 0.7949, "step": 32740 }, { "epoch": 0.97, "learning_rate": 1.4757304569430452e-06, "loss": 0.7999, "step": 32750 }, { "epoch": 0.97, "learning_rate": 1.4609138860902033e-06, "loss": 0.8508, "step": 32760 }, { "epoch": 0.97, "learning_rate": 1.4460973152373614e-06, "loss": 0.8006, "step": 32770 }, { "epoch": 0.97, "learning_rate": 1.4312807443845197e-06, "loss": 0.8227, "step": 32780 }, { "epoch": 0.97, "learning_rate": 1.416464173531678e-06, "loss": 0.9299, "step": 32790 }, { "epoch": 0.97, "learning_rate": 1.401647602678836e-06, "loss": 0.8562, "step": 32800 }, { "epoch": 0.97, "learning_rate": 1.3868310318259944e-06, "loss": 0.8914, "step": 32810 }, { "epoch": 0.97, "learning_rate": 1.3720144609731524e-06, "loss": 0.8771, "step": 32820 }, { "epoch": 0.97, "learning_rate": 1.3571978901203105e-06, "loss": 0.8683, "step": 32830 }, { "epoch": 0.97, "learning_rate": 1.3423813192674688e-06, "loss": 0.8071, "step": 32840 }, { "epoch": 0.97, "learning_rate": 1.327564748414627e-06, "loss": 0.7712, "step": 32850 }, { "epoch": 0.97, "learning_rate": 1.3127481775617852e-06, "loss": 0.6798, "step": 32860 }, { "epoch": 0.97, "learning_rate": 1.2979316067089433e-06, "loss": 0.863, "step": 32870 }, { "epoch": 0.97, "learning_rate": 1.2831150358561016e-06, "loss": 0.879, "step": 32880 }, { "epoch": 0.97, "learning_rate": 1.2682984650032596e-06, "loss": 0.9034, "step": 32890 }, { "epoch": 0.97, "learning_rate": 1.253481894150418e-06, "loss": 0.8587, "step": 32900 }, { "epoch": 0.98, "learning_rate": 1.238665323297576e-06, "loss": 0.773, "step": 32910 }, { "epoch": 0.98, "learning_rate": 1.2238487524447343e-06, "loss": 0.8008, "step": 32920 }, { "epoch": 0.98, "learning_rate": 1.2090321815918924e-06, "loss": 0.7931, "step": 32930 }, { "epoch": 0.98, "learning_rate": 1.1942156107390507e-06, "loss": 0.7081, "step": 32940 }, { "epoch": 0.98, "learning_rate": 1.179399039886209e-06, "loss": 0.808, "step": 32950 }, { "epoch": 0.98, "learning_rate": 1.164582469033367e-06, "loss": 0.8978, "step": 32960 }, { "epoch": 0.98, "learning_rate": 1.1497658981805251e-06, "loss": 0.8346, "step": 32970 }, { "epoch": 0.98, "learning_rate": 1.1349493273276832e-06, "loss": 0.7151, "step": 32980 }, { "epoch": 0.98, "learning_rate": 1.1201327564748415e-06, "loss": 0.6956, "step": 32990 }, { "epoch": 0.98, "learning_rate": 1.1053161856219998e-06, "loss": 0.8311, "step": 33000 } ], "logging_steps": 10, "max_steps": 33746, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 4.97334915956736e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }