diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,53985 @@ +{ + "best_metric": 0.013290228322148323, + "best_model_checkpoint": "./save/eng-zho_all_facebook/wav2vec2-large-xlsr-53/checkpoint-33648", + "epoch": 16.0, + "global_step": 89728, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999108416547794e-05, + "loss": 0.0532, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.99983059914408e-05, + "loss": 0.0519, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999741440798859e-05, + "loss": 0.0503, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 4.999652282453638e-05, + "loss": 0.0487, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.999563124108417e-05, + "loss": 0.0444, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 4.999473965763195e-05, + "loss": 0.0416, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993848074179744e-05, + "loss": 0.0399, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992956490727535e-05, + "loss": 0.0377, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 4.999206490727532e-05, + "loss": 0.0387, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 4.999117332382311e-05, + "loss": 0.0367, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 4.99902817403709e-05, + "loss": 0.0375, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989390156918693e-05, + "loss": 0.0368, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 4.998849857346648e-05, + "loss": 0.0357, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 4.998760699001427e-05, + "loss": 0.0354, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 4.998671540656206e-05, + "loss": 0.0364, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 4.9985823823109845e-05, + "loss": 0.0365, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 4.9984932239657636e-05, + "loss": 0.0357, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 4.998404065620542e-05, + "loss": 0.0356, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 4.998314907275321e-05, + "loss": 0.0353, + "step": 190 + }, + { + "epoch": 0.04, + "learning_rate": 4.9982257489300996e-05, + "loss": 0.0358, + "step": 200 + }, + { + "epoch": 0.04, + "learning_rate": 4.998136590584879e-05, + "loss": 0.0363, + "step": 210 + }, + { + "epoch": 0.04, + "learning_rate": 4.998047432239658e-05, + "loss": 0.0348, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 4.997958273894437e-05, + "loss": 0.0364, + "step": 230 + }, + { + "epoch": 0.04, + "learning_rate": 4.997869115549216e-05, + "loss": 0.0382, + "step": 240 + }, + { + "epoch": 0.04, + "learning_rate": 4.9977799572039945e-05, + "loss": 0.0352, + "step": 250 + }, + { + "epoch": 0.05, + "learning_rate": 4.9976907988587736e-05, + "loss": 0.0348, + "step": 260 + }, + { + "epoch": 0.05, + "learning_rate": 4.997601640513552e-05, + "loss": 0.0357, + "step": 270 + }, + { + "epoch": 0.05, + "learning_rate": 4.997512482168331e-05, + "loss": 0.0326, + "step": 280 + }, + { + "epoch": 0.05, + "learning_rate": 4.9974233238231097e-05, + "loss": 0.033, + "step": 290 + }, + { + "epoch": 0.05, + "learning_rate": 4.997334165477889e-05, + "loss": 0.0346, + "step": 300 + }, + { + "epoch": 0.06, + "learning_rate": 4.997245007132668e-05, + "loss": 0.0361, + "step": 310 + }, + { + "epoch": 0.06, + "learning_rate": 4.9971558487874463e-05, + "loss": 0.0346, + "step": 320 + }, + { + "epoch": 0.06, + "learning_rate": 4.997066690442226e-05, + "loss": 0.035, + "step": 330 + }, + { + "epoch": 0.06, + "learning_rate": 4.9969775320970046e-05, + "loss": 0.0337, + "step": 340 + }, + { + "epoch": 0.06, + "learning_rate": 4.996888373751784e-05, + "loss": 0.0336, + "step": 350 + }, + { + "epoch": 0.06, + "learning_rate": 4.996799215406562e-05, + "loss": 0.0334, + "step": 360 + }, + { + "epoch": 0.07, + "learning_rate": 4.996710057061341e-05, + "loss": 0.0306, + "step": 370 + }, + { + "epoch": 0.07, + "learning_rate": 4.99662089871612e-05, + "loss": 0.0319, + "step": 380 + }, + { + "epoch": 0.07, + "learning_rate": 4.996531740370899e-05, + "loss": 0.0348, + "step": 390 + }, + { + "epoch": 0.07, + "learning_rate": 4.996442582025678e-05, + "loss": 0.0325, + "step": 400 + }, + { + "epoch": 0.07, + "learning_rate": 4.9963534236804564e-05, + "loss": 0.034, + "step": 410 + }, + { + "epoch": 0.07, + "learning_rate": 4.9962642653352355e-05, + "loss": 0.031, + "step": 420 + }, + { + "epoch": 0.08, + "learning_rate": 4.996175106990014e-05, + "loss": 0.032, + "step": 430 + }, + { + "epoch": 0.08, + "learning_rate": 4.996085948644794e-05, + "loss": 0.0342, + "step": 440 + }, + { + "epoch": 0.08, + "learning_rate": 4.995996790299572e-05, + "loss": 0.0336, + "step": 450 + }, + { + "epoch": 0.08, + "learning_rate": 4.995907631954351e-05, + "loss": 0.0344, + "step": 460 + }, + { + "epoch": 0.08, + "learning_rate": 4.9958184736091304e-05, + "loss": 0.0319, + "step": 470 + }, + { + "epoch": 0.09, + "learning_rate": 4.995729315263909e-05, + "loss": 0.0336, + "step": 480 + }, + { + "epoch": 0.09, + "learning_rate": 4.995640156918688e-05, + "loss": 0.0318, + "step": 490 + }, + { + "epoch": 0.09, + "learning_rate": 4.9955509985734665e-05, + "loss": 0.0305, + "step": 500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9954618402282456e-05, + "loss": 0.0311, + "step": 510 + }, + { + "epoch": 0.09, + "learning_rate": 4.995372681883024e-05, + "loss": 0.0298, + "step": 520 + }, + { + "epoch": 0.09, + "learning_rate": 4.995283523537803e-05, + "loss": 0.0319, + "step": 530 + }, + { + "epoch": 0.1, + "learning_rate": 4.995194365192582e-05, + "loss": 0.0303, + "step": 540 + }, + { + "epoch": 0.1, + "learning_rate": 4.9951052068473614e-05, + "loss": 0.029, + "step": 550 + }, + { + "epoch": 0.1, + "learning_rate": 4.9950160485021405e-05, + "loss": 0.0316, + "step": 560 + }, + { + "epoch": 0.1, + "learning_rate": 4.994926890156919e-05, + "loss": 0.0304, + "step": 570 + }, + { + "epoch": 0.1, + "learning_rate": 4.99484664764622e-05, + "loss": 0.0288, + "step": 580 + }, + { + "epoch": 0.11, + "learning_rate": 4.994757489300999e-05, + "loss": 0.0308, + "step": 590 + }, + { + "epoch": 0.11, + "learning_rate": 4.994668330955778e-05, + "loss": 0.0319, + "step": 600 + }, + { + "epoch": 0.11, + "learning_rate": 4.9945791726105564e-05, + "loss": 0.0304, + "step": 610 + }, + { + "epoch": 0.11, + "learning_rate": 4.9944900142653356e-05, + "loss": 0.0311, + "step": 620 + }, + { + "epoch": 0.11, + "learning_rate": 4.994400855920115e-05, + "loss": 0.0295, + "step": 630 + }, + { + "epoch": 0.11, + "learning_rate": 4.994311697574893e-05, + "loss": 0.0306, + "step": 640 + }, + { + "epoch": 0.12, + "learning_rate": 4.994222539229672e-05, + "loss": 0.0304, + "step": 650 + }, + { + "epoch": 0.12, + "learning_rate": 4.994133380884451e-05, + "loss": 0.029, + "step": 660 + }, + { + "epoch": 0.12, + "learning_rate": 4.99404422253923e-05, + "loss": 0.0296, + "step": 670 + }, + { + "epoch": 0.12, + "learning_rate": 4.993955064194009e-05, + "loss": 0.0297, + "step": 680 + }, + { + "epoch": 0.12, + "learning_rate": 4.9938659058487874e-05, + "loss": 0.0288, + "step": 690 + }, + { + "epoch": 0.12, + "learning_rate": 4.993776747503567e-05, + "loss": 0.0314, + "step": 700 + }, + { + "epoch": 0.13, + "learning_rate": 4.9936875891583456e-05, + "loss": 0.0295, + "step": 710 + }, + { + "epoch": 0.13, + "learning_rate": 4.993598430813125e-05, + "loss": 0.0305, + "step": 720 + }, + { + "epoch": 0.13, + "learning_rate": 4.993509272467903e-05, + "loss": 0.03, + "step": 730 + }, + { + "epoch": 0.13, + "learning_rate": 4.993420114122682e-05, + "loss": 0.0304, + "step": 740 + }, + { + "epoch": 0.13, + "learning_rate": 4.993330955777461e-05, + "loss": 0.0304, + "step": 750 + }, + { + "epoch": 0.14, + "learning_rate": 4.99324179743224e-05, + "loss": 0.0309, + "step": 760 + }, + { + "epoch": 0.14, + "learning_rate": 4.993152639087019e-05, + "loss": 0.0308, + "step": 770 + }, + { + "epoch": 0.14, + "learning_rate": 4.9930634807417974e-05, + "loss": 0.0286, + "step": 780 + }, + { + "epoch": 0.14, + "learning_rate": 4.9929743223965765e-05, + "loss": 0.0282, + "step": 790 + }, + { + "epoch": 0.14, + "learning_rate": 4.992885164051355e-05, + "loss": 0.0302, + "step": 800 + }, + { + "epoch": 0.14, + "learning_rate": 4.992796005706135e-05, + "loss": 0.028, + "step": 810 + }, + { + "epoch": 0.15, + "learning_rate": 4.992706847360913e-05, + "loss": 0.0278, + "step": 820 + }, + { + "epoch": 0.15, + "learning_rate": 4.9926176890156923e-05, + "loss": 0.0295, + "step": 830 + }, + { + "epoch": 0.15, + "learning_rate": 4.992528530670471e-05, + "loss": 0.0284, + "step": 840 + }, + { + "epoch": 0.15, + "learning_rate": 4.99243937232525e-05, + "loss": 0.0299, + "step": 850 + }, + { + "epoch": 0.15, + "learning_rate": 4.992350213980029e-05, + "loss": 0.0278, + "step": 860 + }, + { + "epoch": 0.16, + "learning_rate": 4.9922610556348075e-05, + "loss": 0.031, + "step": 870 + }, + { + "epoch": 0.16, + "learning_rate": 4.9921718972895866e-05, + "loss": 0.0291, + "step": 880 + }, + { + "epoch": 0.16, + "learning_rate": 4.992082738944365e-05, + "loss": 0.03, + "step": 890 + }, + { + "epoch": 0.16, + "learning_rate": 4.991993580599144e-05, + "loss": 0.0304, + "step": 900 + }, + { + "epoch": 0.16, + "learning_rate": 4.991904422253923e-05, + "loss": 0.0321, + "step": 910 + }, + { + "epoch": 0.16, + "learning_rate": 4.9918152639087024e-05, + "loss": 0.0261, + "step": 920 + }, + { + "epoch": 0.17, + "learning_rate": 4.9917261055634815e-05, + "loss": 0.0299, + "step": 930 + }, + { + "epoch": 0.17, + "learning_rate": 4.99163694721826e-05, + "loss": 0.0282, + "step": 940 + }, + { + "epoch": 0.17, + "learning_rate": 4.991547788873039e-05, + "loss": 0.0294, + "step": 950 + }, + { + "epoch": 0.17, + "learning_rate": 4.9914586305278175e-05, + "loss": 0.0284, + "step": 960 + }, + { + "epoch": 0.17, + "learning_rate": 4.9913694721825967e-05, + "loss": 0.027, + "step": 970 + }, + { + "epoch": 0.17, + "learning_rate": 4.991280313837375e-05, + "loss": 0.0277, + "step": 980 + }, + { + "epoch": 0.18, + "learning_rate": 4.991191155492154e-05, + "loss": 0.0279, + "step": 990 + }, + { + "epoch": 0.18, + "learning_rate": 4.9911019971469333e-05, + "loss": 0.0268, + "step": 1000 + }, + { + "epoch": 0.18, + "learning_rate": 4.991012838801712e-05, + "loss": 0.0286, + "step": 1010 + }, + { + "epoch": 0.18, + "learning_rate": 4.990923680456491e-05, + "loss": 0.0254, + "step": 1020 + }, + { + "epoch": 0.18, + "learning_rate": 4.99083452211127e-05, + "loss": 0.0276, + "step": 1030 + }, + { + "epoch": 0.19, + "learning_rate": 4.990745363766049e-05, + "loss": 0.0305, + "step": 1040 + }, + { + "epoch": 0.19, + "learning_rate": 4.9906562054208276e-05, + "loss": 0.0284, + "step": 1050 + }, + { + "epoch": 0.19, + "learning_rate": 4.990567047075607e-05, + "loss": 0.0271, + "step": 1060 + }, + { + "epoch": 0.19, + "learning_rate": 4.990477888730385e-05, + "loss": 0.0284, + "step": 1070 + }, + { + "epoch": 0.19, + "learning_rate": 4.990388730385164e-05, + "loss": 0.026, + "step": 1080 + }, + { + "epoch": 0.19, + "learning_rate": 4.9902995720399434e-05, + "loss": 0.0278, + "step": 1090 + }, + { + "epoch": 0.2, + "learning_rate": 4.990210413694722e-05, + "loss": 0.0264, + "step": 1100 + }, + { + "epoch": 0.2, + "learning_rate": 4.990121255349501e-05, + "loss": 0.0254, + "step": 1110 + }, + { + "epoch": 0.2, + "learning_rate": 4.9900320970042794e-05, + "loss": 0.0269, + "step": 1120 + }, + { + "epoch": 0.2, + "learning_rate": 4.9899429386590585e-05, + "loss": 0.0271, + "step": 1130 + }, + { + "epoch": 0.2, + "learning_rate": 4.9898537803138376e-05, + "loss": 0.028, + "step": 1140 + }, + { + "epoch": 0.21, + "learning_rate": 4.989764621968617e-05, + "loss": 0.0265, + "step": 1150 + }, + { + "epoch": 0.21, + "learning_rate": 4.989675463623396e-05, + "loss": 0.0294, + "step": 1160 + }, + { + "epoch": 0.21, + "learning_rate": 4.989586305278174e-05, + "loss": 0.0268, + "step": 1170 + }, + { + "epoch": 0.21, + "learning_rate": 4.9894971469329535e-05, + "loss": 0.0254, + "step": 1180 + }, + { + "epoch": 0.21, + "learning_rate": 4.989407988587732e-05, + "loss": 0.0262, + "step": 1190 + }, + { + "epoch": 0.21, + "learning_rate": 4.989318830242511e-05, + "loss": 0.026, + "step": 1200 + }, + { + "epoch": 0.22, + "learning_rate": 4.9892296718972895e-05, + "loss": 0.0258, + "step": 1210 + }, + { + "epoch": 0.22, + "learning_rate": 4.9891405135520686e-05, + "loss": 0.0255, + "step": 1220 + }, + { + "epoch": 0.22, + "learning_rate": 4.989051355206848e-05, + "loss": 0.0247, + "step": 1230 + }, + { + "epoch": 0.22, + "learning_rate": 4.988962196861626e-05, + "loss": 0.0253, + "step": 1240 + }, + { + "epoch": 0.22, + "learning_rate": 4.988873038516406e-05, + "loss": 0.0233, + "step": 1250 + }, + { + "epoch": 0.22, + "learning_rate": 4.9887838801711844e-05, + "loss": 0.0263, + "step": 1260 + }, + { + "epoch": 0.23, + "learning_rate": 4.9886947218259635e-05, + "loss": 0.0247, + "step": 1270 + }, + { + "epoch": 0.23, + "learning_rate": 4.988605563480742e-05, + "loss": 0.0251, + "step": 1280 + }, + { + "epoch": 0.23, + "learning_rate": 4.988516405135521e-05, + "loss": 0.0232, + "step": 1290 + }, + { + "epoch": 0.23, + "learning_rate": 4.9884272467902995e-05, + "loss": 0.0223, + "step": 1300 + }, + { + "epoch": 0.23, + "learning_rate": 4.9883380884450786e-05, + "loss": 0.0246, + "step": 1310 + }, + { + "epoch": 0.24, + "learning_rate": 4.988248930099858e-05, + "loss": 0.0236, + "step": 1320 + }, + { + "epoch": 0.24, + "learning_rate": 4.988159771754636e-05, + "loss": 0.0258, + "step": 1330 + }, + { + "epoch": 0.24, + "learning_rate": 4.988070613409415e-05, + "loss": 0.0274, + "step": 1340 + }, + { + "epoch": 0.24, + "learning_rate": 4.987981455064194e-05, + "loss": 0.0263, + "step": 1350 + }, + { + "epoch": 0.24, + "learning_rate": 4.9878922967189736e-05, + "loss": 0.0288, + "step": 1360 + }, + { + "epoch": 0.24, + "learning_rate": 4.987803138373752e-05, + "loss": 0.0252, + "step": 1370 + }, + { + "epoch": 0.25, + "learning_rate": 4.987713980028531e-05, + "loss": 0.0239, + "step": 1380 + }, + { + "epoch": 0.25, + "learning_rate": 4.987633737517832e-05, + "loss": 0.0237, + "step": 1390 + }, + { + "epoch": 0.25, + "learning_rate": 4.9875445791726104e-05, + "loss": 0.0237, + "step": 1400 + }, + { + "epoch": 0.25, + "learning_rate": 4.98745542082739e-05, + "loss": 0.0213, + "step": 1410 + }, + { + "epoch": 0.25, + "learning_rate": 4.9873662624821686e-05, + "loss": 0.0233, + "step": 1420 + }, + { + "epoch": 0.25, + "learning_rate": 4.987277104136948e-05, + "loss": 0.022, + "step": 1430 + }, + { + "epoch": 0.26, + "learning_rate": 4.987187945791726e-05, + "loss": 0.0267, + "step": 1440 + }, + { + "epoch": 0.26, + "learning_rate": 4.987098787446505e-05, + "loss": 0.0261, + "step": 1450 + }, + { + "epoch": 0.26, + "learning_rate": 4.9870096291012844e-05, + "loss": 0.0221, + "step": 1460 + }, + { + "epoch": 0.26, + "learning_rate": 4.986920470756063e-05, + "loss": 0.022, + "step": 1470 + }, + { + "epoch": 0.26, + "learning_rate": 4.986831312410842e-05, + "loss": 0.0238, + "step": 1480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9867421540656204e-05, + "loss": 0.0249, + "step": 1490 + }, + { + "epoch": 0.27, + "learning_rate": 4.9866529957203996e-05, + "loss": 0.0212, + "step": 1500 + }, + { + "epoch": 0.27, + "learning_rate": 4.986563837375178e-05, + "loss": 0.0247, + "step": 1510 + }, + { + "epoch": 0.27, + "learning_rate": 4.986474679029958e-05, + "loss": 0.0238, + "step": 1520 + }, + { + "epoch": 0.27, + "learning_rate": 4.986385520684736e-05, + "loss": 0.0246, + "step": 1530 + }, + { + "epoch": 0.27, + "learning_rate": 4.9862963623395154e-05, + "loss": 0.0218, + "step": 1540 + }, + { + "epoch": 0.28, + "learning_rate": 4.9862072039942945e-05, + "loss": 0.0245, + "step": 1550 + }, + { + "epoch": 0.28, + "learning_rate": 4.986118045649073e-05, + "loss": 0.0206, + "step": 1560 + }, + { + "epoch": 0.28, + "learning_rate": 4.986028887303852e-05, + "loss": 0.0203, + "step": 1570 + }, + { + "epoch": 0.28, + "learning_rate": 4.9859397289586305e-05, + "loss": 0.0244, + "step": 1580 + }, + { + "epoch": 0.28, + "learning_rate": 4.9858505706134096e-05, + "loss": 0.0266, + "step": 1590 + }, + { + "epoch": 0.29, + "learning_rate": 4.985761412268188e-05, + "loss": 0.0223, + "step": 1600 + }, + { + "epoch": 0.29, + "learning_rate": 4.985672253922967e-05, + "loss": 0.0224, + "step": 1610 + }, + { + "epoch": 0.29, + "learning_rate": 4.985583095577746e-05, + "loss": 0.0221, + "step": 1620 + }, + { + "epoch": 0.29, + "learning_rate": 4.9854939372325254e-05, + "loss": 0.0222, + "step": 1630 + }, + { + "epoch": 0.29, + "learning_rate": 4.9854047788873045e-05, + "loss": 0.0224, + "step": 1640 + }, + { + "epoch": 0.29, + "learning_rate": 4.985315620542083e-05, + "loss": 0.0231, + "step": 1650 + }, + { + "epoch": 0.3, + "learning_rate": 4.985226462196862e-05, + "loss": 0.0207, + "step": 1660 + }, + { + "epoch": 0.3, + "learning_rate": 4.9851373038516405e-05, + "loss": 0.0251, + "step": 1670 + }, + { + "epoch": 0.3, + "learning_rate": 4.98504814550642e-05, + "loss": 0.0187, + "step": 1680 + }, + { + "epoch": 0.3, + "learning_rate": 4.984958987161199e-05, + "loss": 0.021, + "step": 1690 + }, + { + "epoch": 0.3, + "learning_rate": 4.984869828815977e-05, + "loss": 0.0174, + "step": 1700 + }, + { + "epoch": 0.3, + "learning_rate": 4.9847806704707564e-05, + "loss": 0.0176, + "step": 1710 + }, + { + "epoch": 0.31, + "learning_rate": 4.984691512125535e-05, + "loss": 0.018, + "step": 1720 + }, + { + "epoch": 0.31, + "learning_rate": 4.984602353780314e-05, + "loss": 0.0188, + "step": 1730 + }, + { + "epoch": 0.31, + "learning_rate": 4.984513195435093e-05, + "loss": 0.0219, + "step": 1740 + }, + { + "epoch": 0.31, + "learning_rate": 4.984424037089872e-05, + "loss": 0.0235, + "step": 1750 + }, + { + "epoch": 0.31, + "learning_rate": 4.9843348787446506e-05, + "loss": 0.0196, + "step": 1760 + }, + { + "epoch": 0.32, + "learning_rate": 4.98424572039943e-05, + "loss": 0.0198, + "step": 1770 + }, + { + "epoch": 0.32, + "learning_rate": 4.984156562054209e-05, + "loss": 0.0198, + "step": 1780 + }, + { + "epoch": 0.32, + "learning_rate": 4.984067403708987e-05, + "loss": 0.0238, + "step": 1790 + }, + { + "epoch": 0.32, + "learning_rate": 4.9839782453637664e-05, + "loss": 0.0186, + "step": 1800 + }, + { + "epoch": 0.32, + "learning_rate": 4.983889087018545e-05, + "loss": 0.0227, + "step": 1810 + }, + { + "epoch": 0.32, + "learning_rate": 4.983799928673324e-05, + "loss": 0.0214, + "step": 1820 + }, + { + "epoch": 0.33, + "learning_rate": 4.9837107703281024e-05, + "loss": 0.0211, + "step": 1830 + }, + { + "epoch": 0.33, + "learning_rate": 4.9836216119828815e-05, + "loss": 0.0204, + "step": 1840 + }, + { + "epoch": 0.33, + "learning_rate": 4.9835324536376607e-05, + "loss": 0.0187, + "step": 1850 + }, + { + "epoch": 0.33, + "learning_rate": 4.98344329529244e-05, + "loss": 0.0191, + "step": 1860 + }, + { + "epoch": 0.33, + "learning_rate": 4.983354136947219e-05, + "loss": 0.023, + "step": 1870 + }, + { + "epoch": 0.34, + "learning_rate": 4.9832649786019973e-05, + "loss": 0.0211, + "step": 1880 + }, + { + "epoch": 0.34, + "learning_rate": 4.9831758202567765e-05, + "loss": 0.0203, + "step": 1890 + }, + { + "epoch": 0.34, + "learning_rate": 4.983086661911555e-05, + "loss": 0.0208, + "step": 1900 + }, + { + "epoch": 0.34, + "learning_rate": 4.982997503566334e-05, + "loss": 0.0239, + "step": 1910 + }, + { + "epoch": 0.34, + "learning_rate": 4.982908345221113e-05, + "loss": 0.019, + "step": 1920 + }, + { + "epoch": 0.34, + "learning_rate": 4.9828191868758916e-05, + "loss": 0.0172, + "step": 1930 + }, + { + "epoch": 0.35, + "learning_rate": 4.982730028530671e-05, + "loss": 0.0186, + "step": 1940 + }, + { + "epoch": 0.35, + "learning_rate": 4.982640870185449e-05, + "loss": 0.02, + "step": 1950 + }, + { + "epoch": 0.35, + "learning_rate": 4.982551711840229e-05, + "loss": 0.0175, + "step": 1960 + }, + { + "epoch": 0.35, + "learning_rate": 4.9824625534950074e-05, + "loss": 0.0225, + "step": 1970 + }, + { + "epoch": 0.35, + "learning_rate": 4.9823733951497865e-05, + "loss": 0.0192, + "step": 1980 + }, + { + "epoch": 0.35, + "learning_rate": 4.982284236804565e-05, + "loss": 0.0201, + "step": 1990 + }, + { + "epoch": 0.36, + "learning_rate": 4.982195078459344e-05, + "loss": 0.0191, + "step": 2000 + }, + { + "epoch": 0.36, + "learning_rate": 4.982105920114123e-05, + "loss": 0.0214, + "step": 2010 + }, + { + "epoch": 0.36, + "learning_rate": 4.9820167617689016e-05, + "loss": 0.0173, + "step": 2020 + }, + { + "epoch": 0.36, + "learning_rate": 4.981927603423681e-05, + "loss": 0.0206, + "step": 2030 + }, + { + "epoch": 0.36, + "learning_rate": 4.981838445078459e-05, + "loss": 0.0163, + "step": 2040 + }, + { + "epoch": 0.37, + "learning_rate": 4.981749286733238e-05, + "loss": 0.0187, + "step": 2050 + }, + { + "epoch": 0.37, + "learning_rate": 4.981660128388017e-05, + "loss": 0.0213, + "step": 2060 + }, + { + "epoch": 0.37, + "learning_rate": 4.9815709700427966e-05, + "loss": 0.0226, + "step": 2070 + }, + { + "epoch": 0.37, + "learning_rate": 4.981481811697575e-05, + "loss": 0.0177, + "step": 2080 + }, + { + "epoch": 0.37, + "learning_rate": 4.981392653352354e-05, + "loss": 0.0194, + "step": 2090 + }, + { + "epoch": 0.37, + "learning_rate": 4.981303495007133e-05, + "loss": 0.0164, + "step": 2100 + }, + { + "epoch": 0.38, + "learning_rate": 4.981214336661912e-05, + "loss": 0.0146, + "step": 2110 + }, + { + "epoch": 0.38, + "learning_rate": 4.981125178316691e-05, + "loss": 0.0201, + "step": 2120 + }, + { + "epoch": 0.38, + "learning_rate": 4.981036019971469e-05, + "loss": 0.0179, + "step": 2130 + }, + { + "epoch": 0.38, + "learning_rate": 4.9809468616262484e-05, + "loss": 0.0188, + "step": 2140 + }, + { + "epoch": 0.38, + "learning_rate": 4.9808577032810275e-05, + "loss": 0.0202, + "step": 2150 + }, + { + "epoch": 0.39, + "learning_rate": 4.980768544935806e-05, + "loss": 0.02, + "step": 2160 + }, + { + "epoch": 0.39, + "learning_rate": 4.980679386590585e-05, + "loss": 0.0163, + "step": 2170 + }, + { + "epoch": 0.39, + "learning_rate": 4.980590228245364e-05, + "loss": 0.0235, + "step": 2180 + }, + { + "epoch": 0.39, + "learning_rate": 4.980501069900143e-05, + "loss": 0.0242, + "step": 2190 + }, + { + "epoch": 0.39, + "learning_rate": 4.980411911554922e-05, + "loss": 0.0209, + "step": 2200 + }, + { + "epoch": 0.39, + "learning_rate": 4.980322753209701e-05, + "loss": 0.0191, + "step": 2210 + }, + { + "epoch": 0.4, + "learning_rate": 4.980233594864479e-05, + "loss": 0.0166, + "step": 2220 + }, + { + "epoch": 0.4, + "learning_rate": 4.9801444365192584e-05, + "loss": 0.0177, + "step": 2230 + }, + { + "epoch": 0.4, + "learning_rate": 4.9800552781740376e-05, + "loss": 0.0183, + "step": 2240 + }, + { + "epoch": 0.4, + "learning_rate": 4.979966119828816e-05, + "loss": 0.0189, + "step": 2250 + }, + { + "epoch": 0.4, + "learning_rate": 4.979876961483595e-05, + "loss": 0.0189, + "step": 2260 + }, + { + "epoch": 0.4, + "learning_rate": 4.9797878031383736e-05, + "loss": 0.0162, + "step": 2270 + }, + { + "epoch": 0.41, + "learning_rate": 4.979698644793153e-05, + "loss": 0.0158, + "step": 2280 + }, + { + "epoch": 0.41, + "learning_rate": 4.979609486447932e-05, + "loss": 0.0193, + "step": 2290 + }, + { + "epoch": 0.41, + "learning_rate": 4.979520328102711e-05, + "loss": 0.0189, + "step": 2300 + }, + { + "epoch": 0.41, + "learning_rate": 4.9794311697574894e-05, + "loss": 0.0204, + "step": 2310 + }, + { + "epoch": 0.41, + "learning_rate": 4.9793420114122685e-05, + "loss": 0.0235, + "step": 2320 + }, + { + "epoch": 0.42, + "learning_rate": 4.9792528530670476e-05, + "loss": 0.0163, + "step": 2330 + }, + { + "epoch": 0.42, + "learning_rate": 4.979163694721826e-05, + "loss": 0.0226, + "step": 2340 + }, + { + "epoch": 0.42, + "learning_rate": 4.979074536376605e-05, + "loss": 0.0173, + "step": 2350 + }, + { + "epoch": 0.42, + "learning_rate": 4.9789853780313836e-05, + "loss": 0.017, + "step": 2360 + }, + { + "epoch": 0.42, + "learning_rate": 4.978896219686163e-05, + "loss": 0.0152, + "step": 2370 + }, + { + "epoch": 0.42, + "learning_rate": 4.978807061340942e-05, + "loss": 0.0174, + "step": 2380 + }, + { + "epoch": 0.43, + "learning_rate": 4.97871790299572e-05, + "loss": 0.0205, + "step": 2390 + }, + { + "epoch": 0.43, + "learning_rate": 4.9786287446505e-05, + "loss": 0.0198, + "step": 2400 + }, + { + "epoch": 0.43, + "learning_rate": 4.9785395863052786e-05, + "loss": 0.0215, + "step": 2410 + }, + { + "epoch": 0.43, + "learning_rate": 4.978450427960058e-05, + "loss": 0.0188, + "step": 2420 + }, + { + "epoch": 0.43, + "learning_rate": 4.978361269614836e-05, + "loss": 0.0178, + "step": 2430 + }, + { + "epoch": 0.44, + "learning_rate": 4.978272111269615e-05, + "loss": 0.014, + "step": 2440 + }, + { + "epoch": 0.44, + "learning_rate": 4.978182952924394e-05, + "loss": 0.0164, + "step": 2450 + }, + { + "epoch": 0.44, + "learning_rate": 4.978093794579173e-05, + "loss": 0.0179, + "step": 2460 + }, + { + "epoch": 0.44, + "learning_rate": 4.978004636233952e-05, + "loss": 0.0161, + "step": 2470 + }, + { + "epoch": 0.44, + "learning_rate": 4.9779154778887304e-05, + "loss": 0.0149, + "step": 2480 + }, + { + "epoch": 0.44, + "learning_rate": 4.9778263195435095e-05, + "loss": 0.0161, + "step": 2490 + }, + { + "epoch": 0.45, + "learning_rate": 4.977737161198288e-05, + "loss": 0.018, + "step": 2500 + }, + { + "epoch": 0.45, + "learning_rate": 4.977648002853068e-05, + "loss": 0.017, + "step": 2510 + }, + { + "epoch": 0.45, + "learning_rate": 4.977558844507846e-05, + "loss": 0.0163, + "step": 2520 + }, + { + "epoch": 0.45, + "learning_rate": 4.977469686162625e-05, + "loss": 0.0166, + "step": 2530 + }, + { + "epoch": 0.45, + "learning_rate": 4.977380527817404e-05, + "loss": 0.0172, + "step": 2540 + }, + { + "epoch": 0.45, + "learning_rate": 4.977291369472183e-05, + "loss": 0.0186, + "step": 2550 + }, + { + "epoch": 0.46, + "learning_rate": 4.977202211126962e-05, + "loss": 0.0154, + "step": 2560 + }, + { + "epoch": 0.46, + "learning_rate": 4.9771130527817404e-05, + "loss": 0.017, + "step": 2570 + }, + { + "epoch": 0.46, + "learning_rate": 4.9770238944365195e-05, + "loss": 0.0179, + "step": 2580 + }, + { + "epoch": 0.46, + "learning_rate": 4.976934736091298e-05, + "loss": 0.017, + "step": 2590 + }, + { + "epoch": 0.46, + "learning_rate": 4.976845577746077e-05, + "loss": 0.0162, + "step": 2600 + }, + { + "epoch": 0.47, + "learning_rate": 4.976756419400856e-05, + "loss": 0.0186, + "step": 2610 + }, + { + "epoch": 0.47, + "learning_rate": 4.9766672610556354e-05, + "loss": 0.018, + "step": 2620 + }, + { + "epoch": 0.47, + "learning_rate": 4.976578102710414e-05, + "loss": 0.0189, + "step": 2630 + }, + { + "epoch": 0.47, + "learning_rate": 4.976488944365193e-05, + "loss": 0.018, + "step": 2640 + }, + { + "epoch": 0.47, + "learning_rate": 4.976399786019972e-05, + "loss": 0.0173, + "step": 2650 + }, + { + "epoch": 0.47, + "learning_rate": 4.9763106276747505e-05, + "loss": 0.02, + "step": 2660 + }, + { + "epoch": 0.48, + "learning_rate": 4.9762214693295296e-05, + "loss": 0.0182, + "step": 2670 + }, + { + "epoch": 0.48, + "learning_rate": 4.976132310984308e-05, + "loss": 0.0173, + "step": 2680 + }, + { + "epoch": 0.48, + "learning_rate": 4.976043152639087e-05, + "loss": 0.0142, + "step": 2690 + }, + { + "epoch": 0.48, + "learning_rate": 4.975953994293866e-05, + "loss": 0.0185, + "step": 2700 + }, + { + "epoch": 0.48, + "learning_rate": 4.975864835948645e-05, + "loss": 0.0185, + "step": 2710 + }, + { + "epoch": 0.49, + "learning_rate": 4.975775677603424e-05, + "loss": 0.0169, + "step": 2720 + }, + { + "epoch": 0.49, + "learning_rate": 4.975686519258203e-05, + "loss": 0.0177, + "step": 2730 + }, + { + "epoch": 0.49, + "learning_rate": 4.975597360912982e-05, + "loss": 0.0176, + "step": 2740 + }, + { + "epoch": 0.49, + "learning_rate": 4.9755082025677605e-05, + "loss": 0.0137, + "step": 2750 + }, + { + "epoch": 0.49, + "learning_rate": 4.9754190442225397e-05, + "loss": 0.0132, + "step": 2760 + }, + { + "epoch": 0.49, + "learning_rate": 4.975329885877318e-05, + "loss": 0.0173, + "step": 2770 + }, + { + "epoch": 0.5, + "learning_rate": 4.975240727532097e-05, + "loss": 0.0158, + "step": 2780 + }, + { + "epoch": 0.5, + "learning_rate": 4.9751515691868763e-05, + "loss": 0.0133, + "step": 2790 + }, + { + "epoch": 0.5, + "learning_rate": 4.975062410841655e-05, + "loss": 0.0187, + "step": 2800 + }, + { + "epoch": 0.5, + "learning_rate": 4.974973252496434e-05, + "loss": 0.0148, + "step": 2810 + }, + { + "epoch": 0.5, + "learning_rate": 4.9748840941512124e-05, + "loss": 0.0156, + "step": 2820 + }, + { + "epoch": 0.5, + "learning_rate": 4.9747949358059915e-05, + "loss": 0.0163, + "step": 2830 + }, + { + "epoch": 0.51, + "learning_rate": 4.9747057774607706e-05, + "loss": 0.0168, + "step": 2840 + }, + { + "epoch": 0.51, + "learning_rate": 4.97461661911555e-05, + "loss": 0.0172, + "step": 2850 + }, + { + "epoch": 0.51, + "learning_rate": 4.974527460770328e-05, + "loss": 0.0149, + "step": 2860 + }, + { + "epoch": 0.51, + "learning_rate": 4.974438302425107e-05, + "loss": 0.0156, + "step": 2870 + }, + { + "epoch": 0.51, + "learning_rate": 4.9743491440798864e-05, + "loss": 0.0164, + "step": 2880 + }, + { + "epoch": 0.52, + "learning_rate": 4.974259985734665e-05, + "loss": 0.0162, + "step": 2890 + }, + { + "epoch": 0.52, + "learning_rate": 4.974170827389444e-05, + "loss": 0.0158, + "step": 2900 + }, + { + "epoch": 0.52, + "learning_rate": 4.9740816690442224e-05, + "loss": 0.0137, + "step": 2910 + }, + { + "epoch": 0.52, + "learning_rate": 4.9739925106990015e-05, + "loss": 0.0155, + "step": 2920 + }, + { + "epoch": 0.52, + "learning_rate": 4.9739033523537807e-05, + "loss": 0.0193, + "step": 2930 + }, + { + "epoch": 0.52, + "learning_rate": 4.973814194008559e-05, + "loss": 0.0168, + "step": 2940 + }, + { + "epoch": 0.53, + "learning_rate": 4.973725035663339e-05, + "loss": 0.0152, + "step": 2950 + }, + { + "epoch": 0.53, + "learning_rate": 4.973635877318117e-05, + "loss": 0.0156, + "step": 2960 + }, + { + "epoch": 0.53, + "learning_rate": 4.9735467189728965e-05, + "loss": 0.0135, + "step": 2970 + }, + { + "epoch": 0.53, + "learning_rate": 4.973457560627675e-05, + "loss": 0.0167, + "step": 2980 + }, + { + "epoch": 0.53, + "learning_rate": 4.973368402282454e-05, + "loss": 0.0126, + "step": 2990 + }, + { + "epoch": 0.53, + "learning_rate": 4.9732792439372325e-05, + "loss": 0.017, + "step": 3000 + }, + { + "epoch": 0.54, + "learning_rate": 4.9731900855920116e-05, + "loss": 0.0185, + "step": 3010 + }, + { + "epoch": 0.54, + "learning_rate": 4.973100927246791e-05, + "loss": 0.0164, + "step": 3020 + }, + { + "epoch": 0.54, + "learning_rate": 4.973011768901569e-05, + "loss": 0.0176, + "step": 3030 + }, + { + "epoch": 0.54, + "learning_rate": 4.972922610556348e-05, + "loss": 0.0198, + "step": 3040 + }, + { + "epoch": 0.54, + "learning_rate": 4.972833452211127e-05, + "loss": 0.0172, + "step": 3050 + }, + { + "epoch": 0.55, + "learning_rate": 4.9727442938659065e-05, + "loss": 0.0188, + "step": 3060 + }, + { + "epoch": 0.55, + "learning_rate": 4.972655135520685e-05, + "loss": 0.0162, + "step": 3070 + }, + { + "epoch": 0.55, + "learning_rate": 4.972565977175464e-05, + "loss": 0.0147, + "step": 3080 + }, + { + "epoch": 0.55, + "learning_rate": 4.9724768188302425e-05, + "loss": 0.0125, + "step": 3090 + }, + { + "epoch": 0.55, + "learning_rate": 4.9723876604850216e-05, + "loss": 0.0138, + "step": 3100 + }, + { + "epoch": 0.55, + "learning_rate": 4.972298502139801e-05, + "loss": 0.0108, + "step": 3110 + }, + { + "epoch": 0.56, + "learning_rate": 4.972209343794579e-05, + "loss": 0.0173, + "step": 3120 + }, + { + "epoch": 0.56, + "learning_rate": 4.972120185449358e-05, + "loss": 0.0148, + "step": 3130 + }, + { + "epoch": 0.56, + "learning_rate": 4.972031027104137e-05, + "loss": 0.0176, + "step": 3140 + }, + { + "epoch": 0.56, + "learning_rate": 4.971941868758916e-05, + "loss": 0.0202, + "step": 3150 + }, + { + "epoch": 0.56, + "learning_rate": 4.971852710413695e-05, + "loss": 0.017, + "step": 3160 + }, + { + "epoch": 0.57, + "learning_rate": 4.971763552068474e-05, + "loss": 0.015, + "step": 3170 + }, + { + "epoch": 0.57, + "learning_rate": 4.971674393723253e-05, + "loss": 0.0164, + "step": 3180 + }, + { + "epoch": 0.57, + "learning_rate": 4.971585235378032e-05, + "loss": 0.012, + "step": 3190 + }, + { + "epoch": 0.57, + "learning_rate": 4.971496077032811e-05, + "loss": 0.0194, + "step": 3200 + }, + { + "epoch": 0.57, + "learning_rate": 4.971406918687589e-05, + "loss": 0.0147, + "step": 3210 + }, + { + "epoch": 0.57, + "learning_rate": 4.9713177603423684e-05, + "loss": 0.0129, + "step": 3220 + }, + { + "epoch": 0.58, + "learning_rate": 4.971228601997147e-05, + "loss": 0.0132, + "step": 3230 + }, + { + "epoch": 0.58, + "learning_rate": 4.971139443651926e-05, + "loss": 0.0155, + "step": 3240 + }, + { + "epoch": 0.58, + "learning_rate": 4.971050285306705e-05, + "loss": 0.0168, + "step": 3250 + }, + { + "epoch": 0.58, + "learning_rate": 4.9709611269614835e-05, + "loss": 0.0155, + "step": 3260 + }, + { + "epoch": 0.58, + "learning_rate": 4.9708719686162626e-05, + "loss": 0.0163, + "step": 3270 + }, + { + "epoch": 0.58, + "learning_rate": 4.970782810271042e-05, + "loss": 0.017, + "step": 3280 + }, + { + "epoch": 0.59, + "learning_rate": 4.970693651925821e-05, + "loss": 0.0155, + "step": 3290 + }, + { + "epoch": 0.59, + "learning_rate": 4.970604493580599e-05, + "loss": 0.02, + "step": 3300 + }, + { + "epoch": 0.59, + "learning_rate": 4.9705153352353784e-05, + "loss": 0.0147, + "step": 3310 + }, + { + "epoch": 0.59, + "learning_rate": 4.970426176890157e-05, + "loss": 0.0147, + "step": 3320 + }, + { + "epoch": 0.59, + "learning_rate": 4.970337018544936e-05, + "loss": 0.0175, + "step": 3330 + }, + { + "epoch": 0.6, + "learning_rate": 4.970247860199715e-05, + "loss": 0.0136, + "step": 3340 + }, + { + "epoch": 0.6, + "learning_rate": 4.9701587018544936e-05, + "loss": 0.0162, + "step": 3350 + }, + { + "epoch": 0.6, + "learning_rate": 4.970069543509273e-05, + "loss": 0.0145, + "step": 3360 + }, + { + "epoch": 0.6, + "learning_rate": 4.969980385164051e-05, + "loss": 0.015, + "step": 3370 + }, + { + "epoch": 0.6, + "learning_rate": 4.96989122681883e-05, + "loss": 0.0136, + "step": 3380 + }, + { + "epoch": 0.6, + "learning_rate": 4.9698020684736094e-05, + "loss": 0.014, + "step": 3390 + }, + { + "epoch": 0.61, + "learning_rate": 4.9697129101283885e-05, + "loss": 0.014, + "step": 3400 + }, + { + "epoch": 0.61, + "learning_rate": 4.9696237517831676e-05, + "loss": 0.0173, + "step": 3410 + }, + { + "epoch": 0.61, + "learning_rate": 4.969534593437946e-05, + "loss": 0.0164, + "step": 3420 + }, + { + "epoch": 0.61, + "learning_rate": 4.969445435092725e-05, + "loss": 0.0129, + "step": 3430 + }, + { + "epoch": 0.61, + "learning_rate": 4.9693562767475036e-05, + "loss": 0.0119, + "step": 3440 + }, + { + "epoch": 0.62, + "learning_rate": 4.969267118402283e-05, + "loss": 0.0133, + "step": 3450 + }, + { + "epoch": 0.62, + "learning_rate": 4.969177960057061e-05, + "loss": 0.0199, + "step": 3460 + }, + { + "epoch": 0.62, + "learning_rate": 4.96908880171184e-05, + "loss": 0.0143, + "step": 3470 + }, + { + "epoch": 0.62, + "learning_rate": 4.9689996433666194e-05, + "loss": 0.0177, + "step": 3480 + }, + { + "epoch": 0.62, + "learning_rate": 4.968910485021398e-05, + "loss": 0.0149, + "step": 3490 + }, + { + "epoch": 0.62, + "learning_rate": 4.968821326676178e-05, + "loss": 0.0181, + "step": 3500 + }, + { + "epoch": 0.63, + "learning_rate": 4.968732168330956e-05, + "loss": 0.019, + "step": 3510 + }, + { + "epoch": 0.63, + "learning_rate": 4.968643009985735e-05, + "loss": 0.0146, + "step": 3520 + }, + { + "epoch": 0.63, + "learning_rate": 4.968553851640514e-05, + "loss": 0.0128, + "step": 3530 + }, + { + "epoch": 0.63, + "learning_rate": 4.968464693295293e-05, + "loss": 0.0142, + "step": 3540 + }, + { + "epoch": 0.63, + "learning_rate": 4.968375534950071e-05, + "loss": 0.0168, + "step": 3550 + }, + { + "epoch": 0.63, + "learning_rate": 4.9682863766048504e-05, + "loss": 0.0168, + "step": 3560 + }, + { + "epoch": 0.64, + "learning_rate": 4.9681972182596295e-05, + "loss": 0.0149, + "step": 3570 + }, + { + "epoch": 0.64, + "learning_rate": 4.968108059914408e-05, + "loss": 0.0137, + "step": 3580 + }, + { + "epoch": 0.64, + "learning_rate": 4.968018901569187e-05, + "loss": 0.0126, + "step": 3590 + }, + { + "epoch": 0.64, + "learning_rate": 4.9679297432239655e-05, + "loss": 0.0104, + "step": 3600 + }, + { + "epoch": 0.64, + "learning_rate": 4.967840584878745e-05, + "loss": 0.0139, + "step": 3610 + }, + { + "epoch": 0.65, + "learning_rate": 4.967751426533524e-05, + "loss": 0.014, + "step": 3620 + }, + { + "epoch": 0.65, + "learning_rate": 4.967662268188303e-05, + "loss": 0.0155, + "step": 3630 + }, + { + "epoch": 0.65, + "learning_rate": 4.967573109843082e-05, + "loss": 0.0197, + "step": 3640 + }, + { + "epoch": 0.65, + "learning_rate": 4.9674839514978604e-05, + "loss": 0.0181, + "step": 3650 + }, + { + "epoch": 0.65, + "learning_rate": 4.9673947931526395e-05, + "loss": 0.0152, + "step": 3660 + }, + { + "epoch": 0.65, + "learning_rate": 4.967305634807418e-05, + "loss": 0.0153, + "step": 3670 + }, + { + "epoch": 0.66, + "learning_rate": 4.967216476462197e-05, + "loss": 0.0132, + "step": 3680 + }, + { + "epoch": 0.66, + "learning_rate": 4.9671273181169756e-05, + "loss": 0.0151, + "step": 3690 + }, + { + "epoch": 0.66, + "learning_rate": 4.967038159771755e-05, + "loss": 0.0119, + "step": 3700 + }, + { + "epoch": 0.66, + "learning_rate": 4.966949001426534e-05, + "loss": 0.0149, + "step": 3710 + }, + { + "epoch": 0.66, + "learning_rate": 4.966859843081313e-05, + "loss": 0.0108, + "step": 3720 + }, + { + "epoch": 0.67, + "learning_rate": 4.966770684736092e-05, + "loss": 0.0129, + "step": 3730 + }, + { + "epoch": 0.67, + "learning_rate": 4.9666815263908705e-05, + "loss": 0.0111, + "step": 3740 + }, + { + "epoch": 0.67, + "learning_rate": 4.9665923680456496e-05, + "loss": 0.0166, + "step": 3750 + }, + { + "epoch": 0.67, + "learning_rate": 4.966503209700428e-05, + "loss": 0.0128, + "step": 3760 + }, + { + "epoch": 0.67, + "learning_rate": 4.966414051355207e-05, + "loss": 0.0114, + "step": 3770 + }, + { + "epoch": 0.67, + "learning_rate": 4.9663248930099856e-05, + "loss": 0.0125, + "step": 3780 + }, + { + "epoch": 0.68, + "learning_rate": 4.966235734664765e-05, + "loss": 0.0134, + "step": 3790 + }, + { + "epoch": 0.68, + "learning_rate": 4.966155492154066e-05, + "loss": 0.0161, + "step": 3800 + }, + { + "epoch": 0.68, + "learning_rate": 4.9660663338088447e-05, + "loss": 0.0116, + "step": 3810 + }, + { + "epoch": 0.68, + "learning_rate": 4.965977175463624e-05, + "loss": 0.013, + "step": 3820 + }, + { + "epoch": 0.68, + "learning_rate": 4.965888017118402e-05, + "loss": 0.0107, + "step": 3830 + }, + { + "epoch": 0.68, + "learning_rate": 4.965798858773181e-05, + "loss": 0.0159, + "step": 3840 + }, + { + "epoch": 0.69, + "learning_rate": 4.9657097004279605e-05, + "loss": 0.0151, + "step": 3850 + }, + { + "epoch": 0.69, + "learning_rate": 4.965620542082739e-05, + "loss": 0.0154, + "step": 3860 + }, + { + "epoch": 0.69, + "learning_rate": 4.965531383737518e-05, + "loss": 0.0103, + "step": 3870 + }, + { + "epoch": 0.69, + "learning_rate": 4.965442225392297e-05, + "loss": 0.0118, + "step": 3880 + }, + { + "epoch": 0.69, + "learning_rate": 4.965353067047076e-05, + "loss": 0.0165, + "step": 3890 + }, + { + "epoch": 0.7, + "learning_rate": 4.965263908701855e-05, + "loss": 0.01, + "step": 3900 + }, + { + "epoch": 0.7, + "learning_rate": 4.965174750356634e-05, + "loss": 0.0162, + "step": 3910 + }, + { + "epoch": 0.7, + "learning_rate": 4.965085592011412e-05, + "loss": 0.0133, + "step": 3920 + }, + { + "epoch": 0.7, + "learning_rate": 4.9649964336661914e-05, + "loss": 0.0137, + "step": 3930 + }, + { + "epoch": 0.7, + "learning_rate": 4.9649072753209705e-05, + "loss": 0.013, + "step": 3940 + }, + { + "epoch": 0.7, + "learning_rate": 4.964818116975749e-05, + "loss": 0.0159, + "step": 3950 + }, + { + "epoch": 0.71, + "learning_rate": 4.964728958630528e-05, + "loss": 0.0092, + "step": 3960 + }, + { + "epoch": 0.71, + "learning_rate": 4.9646398002853065e-05, + "loss": 0.0128, + "step": 3970 + }, + { + "epoch": 0.71, + "learning_rate": 4.9645506419400856e-05, + "loss": 0.0158, + "step": 3980 + }, + { + "epoch": 0.71, + "learning_rate": 4.964461483594865e-05, + "loss": 0.0132, + "step": 3990 + }, + { + "epoch": 0.71, + "learning_rate": 4.964372325249644e-05, + "loss": 0.0159, + "step": 4000 + }, + { + "epoch": 0.72, + "learning_rate": 4.964283166904422e-05, + "loss": 0.0132, + "step": 4010 + }, + { + "epoch": 0.72, + "learning_rate": 4.9641940085592015e-05, + "loss": 0.0131, + "step": 4020 + }, + { + "epoch": 0.72, + "learning_rate": 4.9641048502139806e-05, + "loss": 0.0154, + "step": 4030 + }, + { + "epoch": 0.72, + "learning_rate": 4.964015691868759e-05, + "loss": 0.0161, + "step": 4040 + }, + { + "epoch": 0.72, + "learning_rate": 4.963926533523538e-05, + "loss": 0.0129, + "step": 4050 + }, + { + "epoch": 0.72, + "learning_rate": 4.9638373751783166e-05, + "loss": 0.0134, + "step": 4060 + }, + { + "epoch": 0.73, + "learning_rate": 4.963748216833096e-05, + "loss": 0.0115, + "step": 4070 + }, + { + "epoch": 0.73, + "learning_rate": 4.963659058487875e-05, + "loss": 0.013, + "step": 4080 + }, + { + "epoch": 0.73, + "learning_rate": 4.963569900142653e-05, + "loss": 0.0127, + "step": 4090 + }, + { + "epoch": 0.73, + "learning_rate": 4.9634807417974324e-05, + "loss": 0.0147, + "step": 4100 + }, + { + "epoch": 0.73, + "learning_rate": 4.9633915834522115e-05, + "loss": 0.0156, + "step": 4110 + }, + { + "epoch": 0.73, + "learning_rate": 4.9633024251069906e-05, + "loss": 0.0135, + "step": 4120 + }, + { + "epoch": 0.74, + "learning_rate": 4.963213266761769e-05, + "loss": 0.0149, + "step": 4130 + }, + { + "epoch": 0.74, + "learning_rate": 4.963124108416548e-05, + "loss": 0.0174, + "step": 4140 + }, + { + "epoch": 0.74, + "learning_rate": 4.9630349500713266e-05, + "loss": 0.0127, + "step": 4150 + }, + { + "epoch": 0.74, + "learning_rate": 4.962945791726106e-05, + "loss": 0.0143, + "step": 4160 + }, + { + "epoch": 0.74, + "learning_rate": 4.962856633380885e-05, + "loss": 0.0163, + "step": 4170 + }, + { + "epoch": 0.75, + "learning_rate": 4.962767475035663e-05, + "loss": 0.0112, + "step": 4180 + }, + { + "epoch": 0.75, + "learning_rate": 4.9626783166904424e-05, + "loss": 0.0098, + "step": 4190 + }, + { + "epoch": 0.75, + "learning_rate": 4.962589158345221e-05, + "loss": 0.0122, + "step": 4200 + }, + { + "epoch": 0.75, + "learning_rate": 4.962500000000001e-05, + "loss": 0.0153, + "step": 4210 + }, + { + "epoch": 0.75, + "learning_rate": 4.962410841654779e-05, + "loss": 0.0112, + "step": 4220 + }, + { + "epoch": 0.75, + "learning_rate": 4.962321683309558e-05, + "loss": 0.0112, + "step": 4230 + }, + { + "epoch": 0.76, + "learning_rate": 4.962232524964337e-05, + "loss": 0.0133, + "step": 4240 + }, + { + "epoch": 0.76, + "learning_rate": 4.962143366619116e-05, + "loss": 0.0146, + "step": 4250 + }, + { + "epoch": 0.76, + "learning_rate": 4.962054208273895e-05, + "loss": 0.0122, + "step": 4260 + }, + { + "epoch": 0.76, + "learning_rate": 4.9619650499286734e-05, + "loss": 0.0116, + "step": 4270 + }, + { + "epoch": 0.76, + "learning_rate": 4.9618758915834525e-05, + "loss": 0.0125, + "step": 4280 + }, + { + "epoch": 0.76, + "learning_rate": 4.961786733238231e-05, + "loss": 0.0163, + "step": 4290 + }, + { + "epoch": 0.77, + "learning_rate": 4.96169757489301e-05, + "loss": 0.0106, + "step": 4300 + }, + { + "epoch": 0.77, + "learning_rate": 4.961608416547789e-05, + "loss": 0.0089, + "step": 4310 + }, + { + "epoch": 0.77, + "learning_rate": 4.961519258202568e-05, + "loss": 0.0162, + "step": 4320 + }, + { + "epoch": 0.77, + "learning_rate": 4.961430099857347e-05, + "loss": 0.0128, + "step": 4330 + }, + { + "epoch": 0.77, + "learning_rate": 4.961340941512126e-05, + "loss": 0.0159, + "step": 4340 + }, + { + "epoch": 0.78, + "learning_rate": 4.961251783166905e-05, + "loss": 0.0135, + "step": 4350 + }, + { + "epoch": 0.78, + "learning_rate": 4.9611626248216834e-05, + "loss": 0.016, + "step": 4360 + }, + { + "epoch": 0.78, + "learning_rate": 4.9610734664764626e-05, + "loss": 0.0173, + "step": 4370 + }, + { + "epoch": 0.78, + "learning_rate": 4.960984308131241e-05, + "loss": 0.0157, + "step": 4380 + }, + { + "epoch": 0.78, + "learning_rate": 4.96089514978602e-05, + "loss": 0.0147, + "step": 4390 + }, + { + "epoch": 0.78, + "learning_rate": 4.960805991440799e-05, + "loss": 0.0121, + "step": 4400 + }, + { + "epoch": 0.79, + "learning_rate": 4.960716833095578e-05, + "loss": 0.0138, + "step": 4410 + }, + { + "epoch": 0.79, + "learning_rate": 4.960627674750357e-05, + "loss": 0.011, + "step": 4420 + }, + { + "epoch": 0.79, + "learning_rate": 4.960538516405136e-05, + "loss": 0.0156, + "step": 4430 + }, + { + "epoch": 0.79, + "learning_rate": 4.960449358059915e-05, + "loss": 0.0114, + "step": 4440 + }, + { + "epoch": 0.79, + "learning_rate": 4.9603601997146935e-05, + "loss": 0.0149, + "step": 4450 + }, + { + "epoch": 0.8, + "learning_rate": 4.9602710413694726e-05, + "loss": 0.0093, + "step": 4460 + }, + { + "epoch": 0.8, + "learning_rate": 4.960181883024251e-05, + "loss": 0.012, + "step": 4470 + }, + { + "epoch": 0.8, + "learning_rate": 4.96009272467903e-05, + "loss": 0.0139, + "step": 4480 + }, + { + "epoch": 0.8, + "learning_rate": 4.960003566333809e-05, + "loss": 0.0105, + "step": 4490 + }, + { + "epoch": 0.8, + "learning_rate": 4.959914407988588e-05, + "loss": 0.0157, + "step": 4500 + }, + { + "epoch": 0.8, + "learning_rate": 4.959825249643367e-05, + "loss": 0.0126, + "step": 4510 + }, + { + "epoch": 0.81, + "learning_rate": 4.959736091298145e-05, + "loss": 0.0132, + "step": 4520 + }, + { + "epoch": 0.81, + "learning_rate": 4.9596469329529244e-05, + "loss": 0.0158, + "step": 4530 + }, + { + "epoch": 0.81, + "learning_rate": 4.9595577746077035e-05, + "loss": 0.014, + "step": 4540 + }, + { + "epoch": 0.81, + "learning_rate": 4.959468616262483e-05, + "loss": 0.0116, + "step": 4550 + }, + { + "epoch": 0.81, + "learning_rate": 4.959379457917261e-05, + "loss": 0.017, + "step": 4560 + }, + { + "epoch": 0.81, + "learning_rate": 4.95929029957204e-05, + "loss": 0.0124, + "step": 4570 + }, + { + "epoch": 0.82, + "learning_rate": 4.9592011412268194e-05, + "loss": 0.0126, + "step": 4580 + }, + { + "epoch": 0.82, + "learning_rate": 4.959111982881598e-05, + "loss": 0.012, + "step": 4590 + }, + { + "epoch": 0.82, + "learning_rate": 4.959022824536377e-05, + "loss": 0.0142, + "step": 4600 + }, + { + "epoch": 0.82, + "learning_rate": 4.9589336661911554e-05, + "loss": 0.0146, + "step": 4610 + }, + { + "epoch": 0.82, + "learning_rate": 4.9588445078459345e-05, + "loss": 0.0111, + "step": 4620 + }, + { + "epoch": 0.83, + "learning_rate": 4.9587553495007136e-05, + "loss": 0.0106, + "step": 4630 + }, + { + "epoch": 0.83, + "learning_rate": 4.958666191155492e-05, + "loss": 0.016, + "step": 4640 + }, + { + "epoch": 0.83, + "learning_rate": 4.958577032810272e-05, + "loss": 0.0111, + "step": 4650 + }, + { + "epoch": 0.83, + "learning_rate": 4.95848787446505e-05, + "loss": 0.0122, + "step": 4660 + }, + { + "epoch": 0.83, + "learning_rate": 4.9583987161198294e-05, + "loss": 0.0134, + "step": 4670 + }, + { + "epoch": 0.83, + "learning_rate": 4.958309557774608e-05, + "loss": 0.0165, + "step": 4680 + }, + { + "epoch": 0.84, + "learning_rate": 4.958220399429387e-05, + "loss": 0.0117, + "step": 4690 + }, + { + "epoch": 0.84, + "learning_rate": 4.9581312410841654e-05, + "loss": 0.0102, + "step": 4700 + }, + { + "epoch": 0.84, + "learning_rate": 4.9580420827389445e-05, + "loss": 0.0151, + "step": 4710 + }, + { + "epoch": 0.84, + "learning_rate": 4.9579529243937237e-05, + "loss": 0.0134, + "step": 4720 + }, + { + "epoch": 0.84, + "learning_rate": 4.957863766048502e-05, + "loss": 0.0107, + "step": 4730 + }, + { + "epoch": 0.85, + "learning_rate": 4.957774607703281e-05, + "loss": 0.0121, + "step": 4740 + }, + { + "epoch": 0.85, + "learning_rate": 4.95768544935806e-05, + "loss": 0.0114, + "step": 4750 + }, + { + "epoch": 0.85, + "learning_rate": 4.9575962910128395e-05, + "loss": 0.0135, + "step": 4760 + }, + { + "epoch": 0.85, + "learning_rate": 4.957507132667618e-05, + "loss": 0.0139, + "step": 4770 + }, + { + "epoch": 0.85, + "learning_rate": 4.957417974322397e-05, + "loss": 0.01, + "step": 4780 + }, + { + "epoch": 0.85, + "learning_rate": 4.9573288159771755e-05, + "loss": 0.0134, + "step": 4790 + }, + { + "epoch": 0.86, + "learning_rate": 4.9572396576319546e-05, + "loss": 0.0149, + "step": 4800 + }, + { + "epoch": 0.86, + "learning_rate": 4.957150499286734e-05, + "loss": 0.0129, + "step": 4810 + }, + { + "epoch": 0.86, + "learning_rate": 4.957061340941512e-05, + "loss": 0.0182, + "step": 4820 + }, + { + "epoch": 0.86, + "learning_rate": 4.956972182596291e-05, + "loss": 0.0088, + "step": 4830 + }, + { + "epoch": 0.86, + "learning_rate": 4.95688302425107e-05, + "loss": 0.0137, + "step": 4840 + }, + { + "epoch": 0.86, + "learning_rate": 4.956793865905849e-05, + "loss": 0.0103, + "step": 4850 + }, + { + "epoch": 0.87, + "learning_rate": 4.956704707560628e-05, + "loss": 0.0111, + "step": 4860 + }, + { + "epoch": 0.87, + "learning_rate": 4.956615549215407e-05, + "loss": 0.0162, + "step": 4870 + }, + { + "epoch": 0.87, + "learning_rate": 4.956526390870186e-05, + "loss": 0.0147, + "step": 4880 + }, + { + "epoch": 0.87, + "learning_rate": 4.9564372325249646e-05, + "loss": 0.0131, + "step": 4890 + }, + { + "epoch": 0.87, + "learning_rate": 4.956348074179744e-05, + "loss": 0.0108, + "step": 4900 + }, + { + "epoch": 0.88, + "learning_rate": 4.956258915834522e-05, + "loss": 0.0138, + "step": 4910 + }, + { + "epoch": 0.88, + "learning_rate": 4.956169757489301e-05, + "loss": 0.0113, + "step": 4920 + }, + { + "epoch": 0.88, + "learning_rate": 4.95608059914408e-05, + "loss": 0.0151, + "step": 4930 + }, + { + "epoch": 0.88, + "learning_rate": 4.955991440798859e-05, + "loss": 0.0178, + "step": 4940 + }, + { + "epoch": 0.88, + "learning_rate": 4.955902282453638e-05, + "loss": 0.0132, + "step": 4950 + }, + { + "epoch": 0.88, + "learning_rate": 4.9558131241084165e-05, + "loss": 0.0127, + "step": 4960 + }, + { + "epoch": 0.89, + "learning_rate": 4.9557239657631956e-05, + "loss": 0.0135, + "step": 4970 + }, + { + "epoch": 0.89, + "learning_rate": 4.955634807417975e-05, + "loss": 0.0123, + "step": 4980 + }, + { + "epoch": 0.89, + "learning_rate": 4.955545649072754e-05, + "loss": 0.0081, + "step": 4990 + }, + { + "epoch": 0.89, + "learning_rate": 4.955456490727532e-05, + "loss": 0.0135, + "step": 5000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9553673323823114e-05, + "loss": 0.0187, + "step": 5010 + }, + { + "epoch": 0.9, + "learning_rate": 4.95527817403709e-05, + "loss": 0.0134, + "step": 5020 + }, + { + "epoch": 0.9, + "learning_rate": 4.955189015691869e-05, + "loss": 0.0095, + "step": 5030 + }, + { + "epoch": 0.9, + "learning_rate": 4.9551087731811704e-05, + "loss": 0.0145, + "step": 5040 + }, + { + "epoch": 0.9, + "learning_rate": 4.955019614835949e-05, + "loss": 0.0179, + "step": 5050 + }, + { + "epoch": 0.9, + "learning_rate": 4.954930456490728e-05, + "loss": 0.014, + "step": 5060 + }, + { + "epoch": 0.9, + "learning_rate": 4.9548412981455064e-05, + "loss": 0.0148, + "step": 5070 + }, + { + "epoch": 0.91, + "learning_rate": 4.9547521398002856e-05, + "loss": 0.0114, + "step": 5080 + }, + { + "epoch": 0.91, + "learning_rate": 4.954662981455065e-05, + "loss": 0.0093, + "step": 5090 + }, + { + "epoch": 0.91, + "learning_rate": 4.954573823109843e-05, + "loss": 0.011, + "step": 5100 + }, + { + "epoch": 0.91, + "learning_rate": 4.954484664764622e-05, + "loss": 0.0129, + "step": 5110 + }, + { + "epoch": 0.91, + "learning_rate": 4.954395506419401e-05, + "loss": 0.0157, + "step": 5120 + }, + { + "epoch": 0.91, + "learning_rate": 4.95430634807418e-05, + "loss": 0.0128, + "step": 5130 + }, + { + "epoch": 0.92, + "learning_rate": 4.954217189728959e-05, + "loss": 0.0153, + "step": 5140 + }, + { + "epoch": 0.92, + "learning_rate": 4.954128031383738e-05, + "loss": 0.0108, + "step": 5150 + }, + { + "epoch": 0.92, + "learning_rate": 4.9540388730385165e-05, + "loss": 0.0179, + "step": 5160 + }, + { + "epoch": 0.92, + "learning_rate": 4.9539497146932956e-05, + "loss": 0.011, + "step": 5170 + }, + { + "epoch": 0.92, + "learning_rate": 4.953860556348075e-05, + "loss": 0.0127, + "step": 5180 + }, + { + "epoch": 0.93, + "learning_rate": 4.953771398002853e-05, + "loss": 0.0121, + "step": 5190 + }, + { + "epoch": 0.93, + "learning_rate": 4.953682239657632e-05, + "loss": 0.0131, + "step": 5200 + }, + { + "epoch": 0.93, + "learning_rate": 4.953593081312411e-05, + "loss": 0.0107, + "step": 5210 + }, + { + "epoch": 0.93, + "learning_rate": 4.95350392296719e-05, + "loss": 0.0147, + "step": 5220 + }, + { + "epoch": 0.93, + "learning_rate": 4.953414764621968e-05, + "loss": 0.0116, + "step": 5230 + }, + { + "epoch": 0.93, + "learning_rate": 4.9533256062767474e-05, + "loss": 0.0081, + "step": 5240 + }, + { + "epoch": 0.94, + "learning_rate": 4.9532364479315266e-05, + "loss": 0.0126, + "step": 5250 + }, + { + "epoch": 0.94, + "learning_rate": 4.953147289586306e-05, + "loss": 0.0114, + "step": 5260 + }, + { + "epoch": 0.94, + "learning_rate": 4.953058131241085e-05, + "loss": 0.0112, + "step": 5270 + }, + { + "epoch": 0.94, + "learning_rate": 4.952968972895863e-05, + "loss": 0.0131, + "step": 5280 + }, + { + "epoch": 0.94, + "learning_rate": 4.9528798145506424e-05, + "loss": 0.0137, + "step": 5290 + }, + { + "epoch": 0.95, + "learning_rate": 4.952790656205421e-05, + "loss": 0.0152, + "step": 5300 + }, + { + "epoch": 0.95, + "learning_rate": 4.9527014978602e-05, + "loss": 0.0137, + "step": 5310 + }, + { + "epoch": 0.95, + "learning_rate": 4.952612339514979e-05, + "loss": 0.0136, + "step": 5320 + }, + { + "epoch": 0.95, + "learning_rate": 4.9525231811697575e-05, + "loss": 0.0115, + "step": 5330 + }, + { + "epoch": 0.95, + "learning_rate": 4.9524340228245366e-05, + "loss": 0.0176, + "step": 5340 + }, + { + "epoch": 0.95, + "learning_rate": 4.952344864479315e-05, + "loss": 0.0124, + "step": 5350 + }, + { + "epoch": 0.96, + "learning_rate": 4.952255706134095e-05, + "loss": 0.0142, + "step": 5360 + }, + { + "epoch": 0.96, + "learning_rate": 4.952166547788873e-05, + "loss": 0.0111, + "step": 5370 + }, + { + "epoch": 0.96, + "learning_rate": 4.9520773894436524e-05, + "loss": 0.0135, + "step": 5380 + }, + { + "epoch": 0.96, + "learning_rate": 4.951988231098431e-05, + "loss": 0.0135, + "step": 5390 + }, + { + "epoch": 0.96, + "learning_rate": 4.95189907275321e-05, + "loss": 0.0116, + "step": 5400 + }, + { + "epoch": 0.96, + "learning_rate": 4.951809914407989e-05, + "loss": 0.0094, + "step": 5410 + }, + { + "epoch": 0.97, + "learning_rate": 4.9517207560627675e-05, + "loss": 0.0122, + "step": 5420 + }, + { + "epoch": 0.97, + "learning_rate": 4.951631597717547e-05, + "loss": 0.0113, + "step": 5430 + }, + { + "epoch": 0.97, + "learning_rate": 4.951542439372325e-05, + "loss": 0.0142, + "step": 5440 + }, + { + "epoch": 0.97, + "learning_rate": 4.951453281027104e-05, + "loss": 0.0141, + "step": 5450 + }, + { + "epoch": 0.97, + "learning_rate": 4.951364122681883e-05, + "loss": 0.0087, + "step": 5460 + }, + { + "epoch": 0.98, + "learning_rate": 4.9512749643366625e-05, + "loss": 0.0125, + "step": 5470 + }, + { + "epoch": 0.98, + "learning_rate": 4.951185805991441e-05, + "loss": 0.0141, + "step": 5480 + }, + { + "epoch": 0.98, + "learning_rate": 4.95109664764622e-05, + "loss": 0.0108, + "step": 5490 + }, + { + "epoch": 0.98, + "learning_rate": 4.951007489300999e-05, + "loss": 0.0135, + "step": 5500 + }, + { + "epoch": 0.98, + "learning_rate": 4.9509183309557776e-05, + "loss": 0.0107, + "step": 5510 + }, + { + "epoch": 0.98, + "learning_rate": 4.950829172610557e-05, + "loss": 0.0123, + "step": 5520 + }, + { + "epoch": 0.99, + "learning_rate": 4.950740014265335e-05, + "loss": 0.0122, + "step": 5530 + }, + { + "epoch": 0.99, + "learning_rate": 4.950650855920114e-05, + "loss": 0.0131, + "step": 5540 + }, + { + "epoch": 0.99, + "learning_rate": 4.9505616975748934e-05, + "loss": 0.0103, + "step": 5550 + }, + { + "epoch": 0.99, + "learning_rate": 4.950472539229672e-05, + "loss": 0.0099, + "step": 5560 + }, + { + "epoch": 0.99, + "learning_rate": 4.950383380884451e-05, + "loss": 0.0097, + "step": 5570 + }, + { + "epoch": 1.0, + "learning_rate": 4.95029422253923e-05, + "loss": 0.0118, + "step": 5580 + }, + { + "epoch": 1.0, + "learning_rate": 4.950205064194009e-05, + "loss": 0.0181, + "step": 5590 + }, + { + "epoch": 1.0, + "learning_rate": 4.9501159058487877e-05, + "loss": 0.0128, + "step": 5600 + }, + { + "epoch": 1.0, + "eval_loss": 0.01809591054916382, + "eval_runtime": 196.4064, + "eval_samples_per_second": 23.619, + "eval_steps_per_second": 2.953, + "step": 5608 + }, + { + "epoch": 1.0, + "learning_rate": 4.950026747503567e-05, + "loss": 0.0096, + "step": 5610 + }, + { + "epoch": 1.0, + "learning_rate": 4.949937589158345e-05, + "loss": 0.0119, + "step": 5620 + }, + { + "epoch": 1.0, + "learning_rate": 4.9498484308131243e-05, + "loss": 0.0152, + "step": 5630 + }, + { + "epoch": 1.01, + "learning_rate": 4.9497592724679035e-05, + "loss": 0.009, + "step": 5640 + }, + { + "epoch": 1.01, + "learning_rate": 4.949670114122682e-05, + "loss": 0.0102, + "step": 5650 + }, + { + "epoch": 1.01, + "learning_rate": 4.949580955777461e-05, + "loss": 0.008, + "step": 5660 + }, + { + "epoch": 1.01, + "learning_rate": 4.9494917974322395e-05, + "loss": 0.0095, + "step": 5670 + }, + { + "epoch": 1.01, + "learning_rate": 4.9494026390870186e-05, + "loss": 0.0111, + "step": 5680 + }, + { + "epoch": 1.01, + "learning_rate": 4.949313480741798e-05, + "loss": 0.0133, + "step": 5690 + }, + { + "epoch": 1.02, + "learning_rate": 4.949224322396577e-05, + "loss": 0.012, + "step": 5700 + }, + { + "epoch": 1.02, + "learning_rate": 4.949135164051355e-05, + "loss": 0.0077, + "step": 5710 + }, + { + "epoch": 1.02, + "learning_rate": 4.9490460057061344e-05, + "loss": 0.0108, + "step": 5720 + }, + { + "epoch": 1.02, + "learning_rate": 4.9489568473609135e-05, + "loss": 0.0156, + "step": 5730 + }, + { + "epoch": 1.02, + "learning_rate": 4.948867689015692e-05, + "loss": 0.0113, + "step": 5740 + }, + { + "epoch": 1.03, + "learning_rate": 4.948778530670471e-05, + "loss": 0.0079, + "step": 5750 + }, + { + "epoch": 1.03, + "learning_rate": 4.9486893723252495e-05, + "loss": 0.0094, + "step": 5760 + }, + { + "epoch": 1.03, + "learning_rate": 4.9486002139800286e-05, + "loss": 0.0124, + "step": 5770 + }, + { + "epoch": 1.03, + "learning_rate": 4.948511055634808e-05, + "loss": 0.0081, + "step": 5780 + }, + { + "epoch": 1.03, + "learning_rate": 4.948421897289586e-05, + "loss": 0.0116, + "step": 5790 + }, + { + "epoch": 1.03, + "learning_rate": 4.948332738944365e-05, + "loss": 0.0099, + "step": 5800 + }, + { + "epoch": 1.04, + "learning_rate": 4.9482435805991445e-05, + "loss": 0.0076, + "step": 5810 + }, + { + "epoch": 1.04, + "learning_rate": 4.9481544222539236e-05, + "loss": 0.014, + "step": 5820 + }, + { + "epoch": 1.04, + "learning_rate": 4.948065263908702e-05, + "loss": 0.0104, + "step": 5830 + }, + { + "epoch": 1.04, + "learning_rate": 4.947976105563481e-05, + "loss": 0.0069, + "step": 5840 + }, + { + "epoch": 1.04, + "learning_rate": 4.9478869472182596e-05, + "loss": 0.0121, + "step": 5850 + }, + { + "epoch": 1.04, + "learning_rate": 4.947797788873039e-05, + "loss": 0.0104, + "step": 5860 + }, + { + "epoch": 1.05, + "learning_rate": 4.947708630527818e-05, + "loss": 0.0133, + "step": 5870 + }, + { + "epoch": 1.05, + "learning_rate": 4.947619472182596e-05, + "loss": 0.0115, + "step": 5880 + }, + { + "epoch": 1.05, + "learning_rate": 4.9475303138373754e-05, + "loss": 0.0111, + "step": 5890 + }, + { + "epoch": 1.05, + "learning_rate": 4.947441155492154e-05, + "loss": 0.0108, + "step": 5900 + }, + { + "epoch": 1.05, + "learning_rate": 4.9473519971469336e-05, + "loss": 0.0131, + "step": 5910 + }, + { + "epoch": 1.06, + "learning_rate": 4.947262838801712e-05, + "loss": 0.0105, + "step": 5920 + }, + { + "epoch": 1.06, + "learning_rate": 4.947173680456491e-05, + "loss": 0.0118, + "step": 5930 + }, + { + "epoch": 1.06, + "learning_rate": 4.9470845221112696e-05, + "loss": 0.0087, + "step": 5940 + }, + { + "epoch": 1.06, + "learning_rate": 4.946995363766049e-05, + "loss": 0.007, + "step": 5950 + }, + { + "epoch": 1.06, + "learning_rate": 4.946906205420828e-05, + "loss": 0.0146, + "step": 5960 + }, + { + "epoch": 1.06, + "learning_rate": 4.946817047075606e-05, + "loss": 0.0133, + "step": 5970 + }, + { + "epoch": 1.07, + "learning_rate": 4.9467278887303854e-05, + "loss": 0.0152, + "step": 5980 + }, + { + "epoch": 1.07, + "learning_rate": 4.946638730385164e-05, + "loss": 0.0108, + "step": 5990 + }, + { + "epoch": 1.07, + "learning_rate": 4.946549572039943e-05, + "loss": 0.0116, + "step": 6000 + }, + { + "epoch": 1.07, + "learning_rate": 4.946460413694722e-05, + "loss": 0.0155, + "step": 6010 + }, + { + "epoch": 1.07, + "learning_rate": 4.946371255349501e-05, + "loss": 0.0113, + "step": 6020 + }, + { + "epoch": 1.08, + "learning_rate": 4.94628209700428e-05, + "loss": 0.0112, + "step": 6030 + }, + { + "epoch": 1.08, + "learning_rate": 4.946192938659059e-05, + "loss": 0.0111, + "step": 6040 + }, + { + "epoch": 1.08, + "learning_rate": 4.946103780313838e-05, + "loss": 0.0117, + "step": 6050 + }, + { + "epoch": 1.08, + "learning_rate": 4.9460146219686164e-05, + "loss": 0.0112, + "step": 6060 + }, + { + "epoch": 1.08, + "learning_rate": 4.9459254636233955e-05, + "loss": 0.011, + "step": 6070 + }, + { + "epoch": 1.08, + "learning_rate": 4.945836305278174e-05, + "loss": 0.0095, + "step": 6080 + }, + { + "epoch": 1.09, + "learning_rate": 4.945747146932953e-05, + "loss": 0.0131, + "step": 6090 + }, + { + "epoch": 1.09, + "learning_rate": 4.945657988587732e-05, + "loss": 0.0091, + "step": 6100 + }, + { + "epoch": 1.09, + "learning_rate": 4.9455688302425106e-05, + "loss": 0.0111, + "step": 6110 + }, + { + "epoch": 1.09, + "learning_rate": 4.94547967189729e-05, + "loss": 0.0062, + "step": 6120 + }, + { + "epoch": 1.09, + "learning_rate": 4.945390513552069e-05, + "loss": 0.015, + "step": 6130 + }, + { + "epoch": 1.09, + "learning_rate": 4.945301355206848e-05, + "loss": 0.0091, + "step": 6140 + }, + { + "epoch": 1.1, + "learning_rate": 4.9452121968616264e-05, + "loss": 0.0118, + "step": 6150 + }, + { + "epoch": 1.1, + "learning_rate": 4.9451230385164056e-05, + "loss": 0.0148, + "step": 6160 + }, + { + "epoch": 1.1, + "learning_rate": 4.945033880171184e-05, + "loss": 0.0103, + "step": 6170 + }, + { + "epoch": 1.1, + "learning_rate": 4.944944721825963e-05, + "loss": 0.0121, + "step": 6180 + }, + { + "epoch": 1.1, + "learning_rate": 4.944855563480742e-05, + "loss": 0.011, + "step": 6190 + }, + { + "epoch": 1.11, + "learning_rate": 4.944766405135521e-05, + "loss": 0.0103, + "step": 6200 + }, + { + "epoch": 1.11, + "learning_rate": 4.9446772467903e-05, + "loss": 0.0092, + "step": 6210 + }, + { + "epoch": 1.11, + "learning_rate": 4.944588088445078e-05, + "loss": 0.006, + "step": 6220 + }, + { + "epoch": 1.11, + "learning_rate": 4.9444989300998574e-05, + "loss": 0.0101, + "step": 6230 + }, + { + "epoch": 1.11, + "learning_rate": 4.9444097717546365e-05, + "loss": 0.0094, + "step": 6240 + }, + { + "epoch": 1.11, + "learning_rate": 4.9443206134094156e-05, + "loss": 0.0092, + "step": 6250 + }, + { + "epoch": 1.12, + "learning_rate": 4.944231455064194e-05, + "loss": 0.0125, + "step": 6260 + }, + { + "epoch": 1.12, + "learning_rate": 4.944142296718973e-05, + "loss": 0.0119, + "step": 6270 + }, + { + "epoch": 1.12, + "learning_rate": 4.944053138373752e-05, + "loss": 0.0087, + "step": 6280 + }, + { + "epoch": 1.12, + "learning_rate": 4.943963980028531e-05, + "loss": 0.0107, + "step": 6290 + }, + { + "epoch": 1.12, + "learning_rate": 4.94387482168331e-05, + "loss": 0.0118, + "step": 6300 + }, + { + "epoch": 1.13, + "learning_rate": 4.943785663338088e-05, + "loss": 0.0152, + "step": 6310 + }, + { + "epoch": 1.13, + "learning_rate": 4.9436965049928674e-05, + "loss": 0.0136, + "step": 6320 + }, + { + "epoch": 1.13, + "learning_rate": 4.9436073466476466e-05, + "loss": 0.0103, + "step": 6330 + }, + { + "epoch": 1.13, + "learning_rate": 4.943518188302425e-05, + "loss": 0.0092, + "step": 6340 + }, + { + "epoch": 1.13, + "learning_rate": 4.943429029957205e-05, + "loss": 0.0114, + "step": 6350 + }, + { + "epoch": 1.13, + "learning_rate": 4.943339871611983e-05, + "loss": 0.0123, + "step": 6360 + }, + { + "epoch": 1.14, + "learning_rate": 4.9432507132667624e-05, + "loss": 0.0116, + "step": 6370 + }, + { + "epoch": 1.14, + "learning_rate": 4.943161554921541e-05, + "loss": 0.0125, + "step": 6380 + }, + { + "epoch": 1.14, + "learning_rate": 4.94307239657632e-05, + "loss": 0.0098, + "step": 6390 + }, + { + "epoch": 1.14, + "learning_rate": 4.9429832382310984e-05, + "loss": 0.008, + "step": 6400 + }, + { + "epoch": 1.14, + "learning_rate": 4.9428940798858775e-05, + "loss": 0.0092, + "step": 6410 + }, + { + "epoch": 1.14, + "learning_rate": 4.9428049215406566e-05, + "loss": 0.0077, + "step": 6420 + }, + { + "epoch": 1.15, + "learning_rate": 4.942715763195435e-05, + "loss": 0.0129, + "step": 6430 + }, + { + "epoch": 1.15, + "learning_rate": 4.942626604850214e-05, + "loss": 0.0108, + "step": 6440 + }, + { + "epoch": 1.15, + "learning_rate": 4.9425374465049926e-05, + "loss": 0.0136, + "step": 6450 + }, + { + "epoch": 1.15, + "learning_rate": 4.9424482881597724e-05, + "loss": 0.013, + "step": 6460 + }, + { + "epoch": 1.15, + "learning_rate": 4.942359129814551e-05, + "loss": 0.0109, + "step": 6470 + }, + { + "epoch": 1.16, + "learning_rate": 4.94226997146933e-05, + "loss": 0.0123, + "step": 6480 + }, + { + "epoch": 1.16, + "learning_rate": 4.9421808131241084e-05, + "loss": 0.0114, + "step": 6490 + }, + { + "epoch": 1.16, + "learning_rate": 4.9420916547788875e-05, + "loss": 0.0145, + "step": 6500 + }, + { + "epoch": 1.16, + "learning_rate": 4.942002496433667e-05, + "loss": 0.0094, + "step": 6510 + }, + { + "epoch": 1.16, + "learning_rate": 4.941913338088445e-05, + "loss": 0.0102, + "step": 6520 + }, + { + "epoch": 1.16, + "learning_rate": 4.941824179743224e-05, + "loss": 0.0102, + "step": 6530 + }, + { + "epoch": 1.17, + "learning_rate": 4.941735021398003e-05, + "loss": 0.0115, + "step": 6540 + }, + { + "epoch": 1.17, + "learning_rate": 4.941645863052782e-05, + "loss": 0.0107, + "step": 6550 + }, + { + "epoch": 1.17, + "learning_rate": 4.941556704707561e-05, + "loss": 0.0084, + "step": 6560 + }, + { + "epoch": 1.17, + "learning_rate": 4.94146754636234e-05, + "loss": 0.0119, + "step": 6570 + }, + { + "epoch": 1.17, + "learning_rate": 4.941378388017119e-05, + "loss": 0.0115, + "step": 6580 + }, + { + "epoch": 1.18, + "learning_rate": 4.9412892296718976e-05, + "loss": 0.0111, + "step": 6590 + }, + { + "epoch": 1.18, + "learning_rate": 4.941200071326677e-05, + "loss": 0.0128, + "step": 6600 + }, + { + "epoch": 1.18, + "learning_rate": 4.941110912981455e-05, + "loss": 0.0106, + "step": 6610 + }, + { + "epoch": 1.18, + "learning_rate": 4.941021754636234e-05, + "loss": 0.0146, + "step": 6620 + }, + { + "epoch": 1.18, + "learning_rate": 4.940932596291013e-05, + "loss": 0.0144, + "step": 6630 + }, + { + "epoch": 1.18, + "learning_rate": 4.940843437945792e-05, + "loss": 0.0094, + "step": 6640 + }, + { + "epoch": 1.19, + "learning_rate": 4.940754279600571e-05, + "loss": 0.0117, + "step": 6650 + }, + { + "epoch": 1.19, + "learning_rate": 4.9406651212553494e-05, + "loss": 0.0121, + "step": 6660 + }, + { + "epoch": 1.19, + "learning_rate": 4.9405759629101285e-05, + "loss": 0.014, + "step": 6670 + }, + { + "epoch": 1.19, + "learning_rate": 4.9404868045649077e-05, + "loss": 0.0121, + "step": 6680 + }, + { + "epoch": 1.19, + "learning_rate": 4.940397646219687e-05, + "loss": 0.0106, + "step": 6690 + }, + { + "epoch": 1.19, + "learning_rate": 4.940308487874465e-05, + "loss": 0.0114, + "step": 6700 + }, + { + "epoch": 1.2, + "learning_rate": 4.9402193295292443e-05, + "loss": 0.0131, + "step": 6710 + }, + { + "epoch": 1.2, + "learning_rate": 4.940130171184023e-05, + "loss": 0.0083, + "step": 6720 + }, + { + "epoch": 1.2, + "learning_rate": 4.940041012838802e-05, + "loss": 0.0117, + "step": 6730 + }, + { + "epoch": 1.2, + "learning_rate": 4.939951854493581e-05, + "loss": 0.0127, + "step": 6740 + }, + { + "epoch": 1.2, + "learning_rate": 4.9398626961483595e-05, + "loss": 0.0085, + "step": 6750 + }, + { + "epoch": 1.21, + "learning_rate": 4.9397735378031386e-05, + "loss": 0.0076, + "step": 6760 + }, + { + "epoch": 1.21, + "learning_rate": 4.939684379457917e-05, + "loss": 0.0119, + "step": 6770 + }, + { + "epoch": 1.21, + "learning_rate": 4.939595221112696e-05, + "loss": 0.0123, + "step": 6780 + }, + { + "epoch": 1.21, + "learning_rate": 4.939506062767475e-05, + "loss": 0.0106, + "step": 6790 + }, + { + "epoch": 1.21, + "learning_rate": 4.9394169044222544e-05, + "loss": 0.0081, + "step": 6800 + }, + { + "epoch": 1.21, + "learning_rate": 4.9393277460770335e-05, + "loss": 0.0091, + "step": 6810 + }, + { + "epoch": 1.22, + "learning_rate": 4.939238587731812e-05, + "loss": 0.0083, + "step": 6820 + }, + { + "epoch": 1.22, + "learning_rate": 4.939149429386591e-05, + "loss": 0.0127, + "step": 6830 + }, + { + "epoch": 1.22, + "learning_rate": 4.9390602710413695e-05, + "loss": 0.01, + "step": 6840 + }, + { + "epoch": 1.22, + "learning_rate": 4.9389711126961486e-05, + "loss": 0.013, + "step": 6850 + }, + { + "epoch": 1.22, + "learning_rate": 4.938881954350927e-05, + "loss": 0.0077, + "step": 6860 + }, + { + "epoch": 1.23, + "learning_rate": 4.938792796005706e-05, + "loss": 0.0115, + "step": 6870 + }, + { + "epoch": 1.23, + "learning_rate": 4.938703637660485e-05, + "loss": 0.0093, + "step": 6880 + }, + { + "epoch": 1.23, + "learning_rate": 4.938614479315264e-05, + "loss": 0.0108, + "step": 6890 + }, + { + "epoch": 1.23, + "learning_rate": 4.9385253209700436e-05, + "loss": 0.0083, + "step": 6900 + }, + { + "epoch": 1.23, + "learning_rate": 4.938436162624822e-05, + "loss": 0.0143, + "step": 6910 + }, + { + "epoch": 1.23, + "learning_rate": 4.938347004279601e-05, + "loss": 0.0101, + "step": 6920 + }, + { + "epoch": 1.24, + "learning_rate": 4.9382578459343796e-05, + "loss": 0.0115, + "step": 6930 + }, + { + "epoch": 1.24, + "learning_rate": 4.938168687589159e-05, + "loss": 0.011, + "step": 6940 + }, + { + "epoch": 1.24, + "learning_rate": 4.938079529243937e-05, + "loss": 0.013, + "step": 6950 + }, + { + "epoch": 1.24, + "learning_rate": 4.937990370898716e-05, + "loss": 0.0074, + "step": 6960 + }, + { + "epoch": 1.24, + "learning_rate": 4.9379012125534954e-05, + "loss": 0.0111, + "step": 6970 + }, + { + "epoch": 1.24, + "learning_rate": 4.937812054208274e-05, + "loss": 0.0107, + "step": 6980 + }, + { + "epoch": 1.25, + "learning_rate": 4.937722895863053e-05, + "loss": 0.0136, + "step": 6990 + }, + { + "epoch": 1.25, + "learning_rate": 4.9376337375178314e-05, + "loss": 0.012, + "step": 7000 + }, + { + "epoch": 1.25, + "learning_rate": 4.937544579172611e-05, + "loss": 0.0076, + "step": 7010 + }, + { + "epoch": 1.25, + "learning_rate": 4.9374554208273896e-05, + "loss": 0.0081, + "step": 7020 + }, + { + "epoch": 1.25, + "learning_rate": 4.937366262482169e-05, + "loss": 0.0106, + "step": 7030 + }, + { + "epoch": 1.26, + "learning_rate": 4.937277104136948e-05, + "loss": 0.0115, + "step": 7040 + }, + { + "epoch": 1.26, + "learning_rate": 4.937187945791726e-05, + "loss": 0.011, + "step": 7050 + }, + { + "epoch": 1.26, + "learning_rate": 4.9370987874465054e-05, + "loss": 0.0108, + "step": 7060 + }, + { + "epoch": 1.26, + "learning_rate": 4.937009629101284e-05, + "loss": 0.0074, + "step": 7070 + }, + { + "epoch": 1.26, + "learning_rate": 4.936920470756063e-05, + "loss": 0.0131, + "step": 7080 + }, + { + "epoch": 1.26, + "learning_rate": 4.9368313124108415e-05, + "loss": 0.0145, + "step": 7090 + }, + { + "epoch": 1.27, + "learning_rate": 4.9367421540656206e-05, + "loss": 0.0106, + "step": 7100 + }, + { + "epoch": 1.27, + "learning_rate": 4.9366529957204e-05, + "loss": 0.0112, + "step": 7110 + }, + { + "epoch": 1.27, + "learning_rate": 4.936563837375179e-05, + "loss": 0.0081, + "step": 7120 + }, + { + "epoch": 1.27, + "learning_rate": 4.936474679029958e-05, + "loss": 0.0118, + "step": 7130 + }, + { + "epoch": 1.27, + "learning_rate": 4.9363855206847364e-05, + "loss": 0.0081, + "step": 7140 + }, + { + "epoch": 1.27, + "learning_rate": 4.9362963623395155e-05, + "loss": 0.0118, + "step": 7150 + }, + { + "epoch": 1.28, + "learning_rate": 4.936207203994294e-05, + "loss": 0.0117, + "step": 7160 + }, + { + "epoch": 1.28, + "learning_rate": 4.936118045649073e-05, + "loss": 0.0108, + "step": 7170 + }, + { + "epoch": 1.28, + "learning_rate": 4.9360288873038515e-05, + "loss": 0.0133, + "step": 7180 + }, + { + "epoch": 1.28, + "learning_rate": 4.9359397289586306e-05, + "loss": 0.0124, + "step": 7190 + }, + { + "epoch": 1.28, + "learning_rate": 4.93585057061341e-05, + "loss": 0.0092, + "step": 7200 + }, + { + "epoch": 1.29, + "learning_rate": 4.935761412268188e-05, + "loss": 0.0103, + "step": 7210 + }, + { + "epoch": 1.29, + "learning_rate": 4.935672253922967e-05, + "loss": 0.0104, + "step": 7220 + }, + { + "epoch": 1.29, + "learning_rate": 4.9355830955777464e-05, + "loss": 0.01, + "step": 7230 + }, + { + "epoch": 1.29, + "learning_rate": 4.9354939372325256e-05, + "loss": 0.0123, + "step": 7240 + }, + { + "epoch": 1.29, + "learning_rate": 4.935404778887304e-05, + "loss": 0.0132, + "step": 7250 + }, + { + "epoch": 1.29, + "learning_rate": 4.935315620542083e-05, + "loss": 0.0116, + "step": 7260 + }, + { + "epoch": 1.3, + "learning_rate": 4.935226462196862e-05, + "loss": 0.0113, + "step": 7270 + }, + { + "epoch": 1.3, + "learning_rate": 4.935137303851641e-05, + "loss": 0.0099, + "step": 7280 + }, + { + "epoch": 1.3, + "learning_rate": 4.93504814550642e-05, + "loss": 0.0108, + "step": 7290 + }, + { + "epoch": 1.3, + "learning_rate": 4.934958987161198e-05, + "loss": 0.0129, + "step": 7300 + }, + { + "epoch": 1.3, + "learning_rate": 4.9348698288159774e-05, + "loss": 0.0119, + "step": 7310 + }, + { + "epoch": 1.31, + "learning_rate": 4.934780670470756e-05, + "loss": 0.0096, + "step": 7320 + }, + { + "epoch": 1.31, + "learning_rate": 4.934691512125535e-05, + "loss": 0.0106, + "step": 7330 + }, + { + "epoch": 1.31, + "learning_rate": 4.934602353780314e-05, + "loss": 0.0106, + "step": 7340 + }, + { + "epoch": 1.31, + "learning_rate": 4.934513195435093e-05, + "loss": 0.0101, + "step": 7350 + }, + { + "epoch": 1.31, + "learning_rate": 4.934424037089872e-05, + "loss": 0.0086, + "step": 7360 + }, + { + "epoch": 1.31, + "learning_rate": 4.934334878744651e-05, + "loss": 0.0072, + "step": 7370 + }, + { + "epoch": 1.32, + "learning_rate": 4.93424572039943e-05, + "loss": 0.0085, + "step": 7380 + }, + { + "epoch": 1.32, + "learning_rate": 4.934156562054208e-05, + "loss": 0.01, + "step": 7390 + }, + { + "epoch": 1.32, + "learning_rate": 4.9340674037089874e-05, + "loss": 0.0158, + "step": 7400 + }, + { + "epoch": 1.32, + "learning_rate": 4.933978245363766e-05, + "loss": 0.0076, + "step": 7410 + }, + { + "epoch": 1.32, + "learning_rate": 4.933889087018545e-05, + "loss": 0.0148, + "step": 7420 + }, + { + "epoch": 1.32, + "learning_rate": 4.933799928673324e-05, + "loss": 0.0115, + "step": 7430 + }, + { + "epoch": 1.33, + "learning_rate": 4.9337107703281026e-05, + "loss": 0.0124, + "step": 7440 + }, + { + "epoch": 1.33, + "learning_rate": 4.9336216119828824e-05, + "loss": 0.0104, + "step": 7450 + }, + { + "epoch": 1.33, + "learning_rate": 4.933532453637661e-05, + "loss": 0.0075, + "step": 7460 + }, + { + "epoch": 1.33, + "learning_rate": 4.93344329529244e-05, + "loss": 0.0112, + "step": 7470 + }, + { + "epoch": 1.33, + "learning_rate": 4.9333541369472184e-05, + "loss": 0.0131, + "step": 7480 + }, + { + "epoch": 1.34, + "learning_rate": 4.9332649786019975e-05, + "loss": 0.0098, + "step": 7490 + }, + { + "epoch": 1.34, + "learning_rate": 4.9331758202567766e-05, + "loss": 0.011, + "step": 7500 + }, + { + "epoch": 1.34, + "learning_rate": 4.933086661911555e-05, + "loss": 0.0072, + "step": 7510 + }, + { + "epoch": 1.34, + "learning_rate": 4.932997503566334e-05, + "loss": 0.0116, + "step": 7520 + }, + { + "epoch": 1.34, + "learning_rate": 4.9329083452211126e-05, + "loss": 0.0117, + "step": 7530 + }, + { + "epoch": 1.34, + "learning_rate": 4.932819186875892e-05, + "loss": 0.0083, + "step": 7540 + }, + { + "epoch": 1.35, + "learning_rate": 4.93273002853067e-05, + "loss": 0.0143, + "step": 7550 + }, + { + "epoch": 1.35, + "learning_rate": 4.93264087018545e-05, + "loss": 0.01, + "step": 7560 + }, + { + "epoch": 1.35, + "learning_rate": 4.9325517118402284e-05, + "loss": 0.0103, + "step": 7570 + }, + { + "epoch": 1.35, + "learning_rate": 4.9324625534950075e-05, + "loss": 0.013, + "step": 7580 + }, + { + "epoch": 1.35, + "learning_rate": 4.9323733951497867e-05, + "loss": 0.0123, + "step": 7590 + }, + { + "epoch": 1.36, + "learning_rate": 4.932284236804565e-05, + "loss": 0.0102, + "step": 7600 + }, + { + "epoch": 1.36, + "learning_rate": 4.932195078459344e-05, + "loss": 0.0103, + "step": 7610 + }, + { + "epoch": 1.36, + "learning_rate": 4.932105920114123e-05, + "loss": 0.0137, + "step": 7620 + }, + { + "epoch": 1.36, + "learning_rate": 4.932016761768902e-05, + "loss": 0.0105, + "step": 7630 + }, + { + "epoch": 1.36, + "learning_rate": 4.93192760342368e-05, + "loss": 0.0118, + "step": 7640 + }, + { + "epoch": 1.36, + "learning_rate": 4.9318384450784594e-05, + "loss": 0.0109, + "step": 7650 + }, + { + "epoch": 1.37, + "learning_rate": 4.9317492867332385e-05, + "loss": 0.0121, + "step": 7660 + }, + { + "epoch": 1.37, + "learning_rate": 4.9316601283880176e-05, + "loss": 0.0105, + "step": 7670 + }, + { + "epoch": 1.37, + "learning_rate": 4.931570970042797e-05, + "loss": 0.007, + "step": 7680 + }, + { + "epoch": 1.37, + "learning_rate": 4.931481811697575e-05, + "loss": 0.0117, + "step": 7690 + }, + { + "epoch": 1.37, + "learning_rate": 4.931392653352354e-05, + "loss": 0.0121, + "step": 7700 + }, + { + "epoch": 1.37, + "learning_rate": 4.931303495007133e-05, + "loss": 0.0096, + "step": 7710 + }, + { + "epoch": 1.38, + "learning_rate": 4.931214336661912e-05, + "loss": 0.009, + "step": 7720 + }, + { + "epoch": 1.38, + "learning_rate": 4.931125178316691e-05, + "loss": 0.0125, + "step": 7730 + }, + { + "epoch": 1.38, + "learning_rate": 4.9310360199714694e-05, + "loss": 0.0082, + "step": 7740 + }, + { + "epoch": 1.38, + "learning_rate": 4.9309468616262485e-05, + "loss": 0.0098, + "step": 7750 + }, + { + "epoch": 1.38, + "learning_rate": 4.930857703281027e-05, + "loss": 0.01, + "step": 7760 + }, + { + "epoch": 1.39, + "learning_rate": 4.930768544935806e-05, + "loss": 0.015, + "step": 7770 + }, + { + "epoch": 1.39, + "learning_rate": 4.930679386590585e-05, + "loss": 0.0152, + "step": 7780 + }, + { + "epoch": 1.39, + "learning_rate": 4.930590228245364e-05, + "loss": 0.0132, + "step": 7790 + }, + { + "epoch": 1.39, + "learning_rate": 4.930501069900143e-05, + "loss": 0.0084, + "step": 7800 + }, + { + "epoch": 1.39, + "learning_rate": 4.930411911554922e-05, + "loss": 0.0109, + "step": 7810 + }, + { + "epoch": 1.39, + "learning_rate": 4.930322753209701e-05, + "loss": 0.0082, + "step": 7820 + }, + { + "epoch": 1.4, + "learning_rate": 4.9302335948644795e-05, + "loss": 0.0094, + "step": 7830 + }, + { + "epoch": 1.4, + "learning_rate": 4.9301444365192586e-05, + "loss": 0.0112, + "step": 7840 + }, + { + "epoch": 1.4, + "learning_rate": 4.930055278174037e-05, + "loss": 0.0073, + "step": 7850 + }, + { + "epoch": 1.4, + "learning_rate": 4.929966119828816e-05, + "loss": 0.0141, + "step": 7860 + }, + { + "epoch": 1.4, + "learning_rate": 4.9298769614835946e-05, + "loss": 0.0076, + "step": 7870 + }, + { + "epoch": 1.41, + "learning_rate": 4.929787803138374e-05, + "loss": 0.0072, + "step": 7880 + }, + { + "epoch": 1.41, + "learning_rate": 4.929698644793153e-05, + "loss": 0.0098, + "step": 7890 + }, + { + "epoch": 1.41, + "learning_rate": 4.929609486447932e-05, + "loss": 0.0128, + "step": 7900 + }, + { + "epoch": 1.41, + "learning_rate": 4.929520328102711e-05, + "loss": 0.0105, + "step": 7910 + }, + { + "epoch": 1.41, + "learning_rate": 4.9294311697574895e-05, + "loss": 0.0087, + "step": 7920 + }, + { + "epoch": 1.41, + "learning_rate": 4.9293420114122686e-05, + "loss": 0.0094, + "step": 7930 + }, + { + "epoch": 1.42, + "learning_rate": 4.929252853067047e-05, + "loss": 0.0168, + "step": 7940 + }, + { + "epoch": 1.42, + "learning_rate": 4.929163694721826e-05, + "loss": 0.0096, + "step": 7950 + }, + { + "epoch": 1.42, + "learning_rate": 4.929074536376605e-05, + "loss": 0.009, + "step": 7960 + }, + { + "epoch": 1.42, + "learning_rate": 4.928985378031384e-05, + "loss": 0.013, + "step": 7970 + }, + { + "epoch": 1.42, + "learning_rate": 4.928896219686163e-05, + "loss": 0.0141, + "step": 7980 + }, + { + "epoch": 1.42, + "learning_rate": 4.928807061340941e-05, + "loss": 0.012, + "step": 7990 + }, + { + "epoch": 1.43, + "learning_rate": 4.928717902995721e-05, + "loss": 0.0103, + "step": 8000 + }, + { + "epoch": 1.43, + "learning_rate": 4.9286287446504996e-05, + "loss": 0.0088, + "step": 8010 + }, + { + "epoch": 1.43, + "learning_rate": 4.928539586305279e-05, + "loss": 0.0078, + "step": 8020 + }, + { + "epoch": 1.43, + "learning_rate": 4.928450427960057e-05, + "loss": 0.0127, + "step": 8030 + }, + { + "epoch": 1.43, + "learning_rate": 4.928361269614836e-05, + "loss": 0.0078, + "step": 8040 + }, + { + "epoch": 1.44, + "learning_rate": 4.9282721112696154e-05, + "loss": 0.0115, + "step": 8050 + }, + { + "epoch": 1.44, + "learning_rate": 4.928182952924394e-05, + "loss": 0.0113, + "step": 8060 + }, + { + "epoch": 1.44, + "learning_rate": 4.928093794579173e-05, + "loss": 0.012, + "step": 8070 + }, + { + "epoch": 1.44, + "learning_rate": 4.9280046362339514e-05, + "loss": 0.0141, + "step": 8080 + }, + { + "epoch": 1.44, + "learning_rate": 4.9279154778887305e-05, + "loss": 0.0079, + "step": 8090 + }, + { + "epoch": 1.44, + "learning_rate": 4.927826319543509e-05, + "loss": 0.0093, + "step": 8100 + }, + { + "epoch": 1.45, + "learning_rate": 4.927737161198289e-05, + "loss": 0.0085, + "step": 8110 + }, + { + "epoch": 1.45, + "learning_rate": 4.927648002853067e-05, + "loss": 0.0106, + "step": 8120 + }, + { + "epoch": 1.45, + "learning_rate": 4.927558844507846e-05, + "loss": 0.0121, + "step": 8130 + }, + { + "epoch": 1.45, + "learning_rate": 4.9274696861626254e-05, + "loss": 0.0137, + "step": 8140 + }, + { + "epoch": 1.45, + "learning_rate": 4.927380527817404e-05, + "loss": 0.0095, + "step": 8150 + }, + { + "epoch": 1.46, + "learning_rate": 4.927291369472183e-05, + "loss": 0.0149, + "step": 8160 + }, + { + "epoch": 1.46, + "learning_rate": 4.9272022111269614e-05, + "loss": 0.0091, + "step": 8170 + }, + { + "epoch": 1.46, + "learning_rate": 4.9271130527817406e-05, + "loss": 0.0081, + "step": 8180 + }, + { + "epoch": 1.46, + "learning_rate": 4.92702389443652e-05, + "loss": 0.0095, + "step": 8190 + }, + { + "epoch": 1.46, + "learning_rate": 4.926934736091298e-05, + "loss": 0.0078, + "step": 8200 + }, + { + "epoch": 1.46, + "learning_rate": 4.926845577746077e-05, + "loss": 0.0084, + "step": 8210 + }, + { + "epoch": 1.47, + "learning_rate": 4.9267564194008564e-05, + "loss": 0.0125, + "step": 8220 + }, + { + "epoch": 1.47, + "learning_rate": 4.9266672610556355e-05, + "loss": 0.0116, + "step": 8230 + }, + { + "epoch": 1.47, + "learning_rate": 4.926578102710414e-05, + "loss": 0.0081, + "step": 8240 + }, + { + "epoch": 1.47, + "learning_rate": 4.926488944365193e-05, + "loss": 0.0116, + "step": 8250 + }, + { + "epoch": 1.47, + "learning_rate": 4.9263997860199715e-05, + "loss": 0.0097, + "step": 8260 + }, + { + "epoch": 1.47, + "learning_rate": 4.9263106276747506e-05, + "loss": 0.0091, + "step": 8270 + }, + { + "epoch": 1.48, + "learning_rate": 4.92622146932953e-05, + "loss": 0.0086, + "step": 8280 + }, + { + "epoch": 1.48, + "learning_rate": 4.926132310984308e-05, + "loss": 0.0087, + "step": 8290 + }, + { + "epoch": 1.48, + "learning_rate": 4.926043152639087e-05, + "loss": 0.013, + "step": 8300 + }, + { + "epoch": 1.48, + "learning_rate": 4.925953994293866e-05, + "loss": 0.0082, + "step": 8310 + }, + { + "epoch": 1.48, + "learning_rate": 4.925864835948645e-05, + "loss": 0.0087, + "step": 8320 + }, + { + "epoch": 1.49, + "learning_rate": 4.925775677603424e-05, + "loss": 0.0112, + "step": 8330 + }, + { + "epoch": 1.49, + "learning_rate": 4.925686519258203e-05, + "loss": 0.0126, + "step": 8340 + }, + { + "epoch": 1.49, + "learning_rate": 4.9255973609129816e-05, + "loss": 0.0095, + "step": 8350 + }, + { + "epoch": 1.49, + "learning_rate": 4.925508202567761e-05, + "loss": 0.0088, + "step": 8360 + }, + { + "epoch": 1.49, + "learning_rate": 4.92541904422254e-05, + "loss": 0.0088, + "step": 8370 + }, + { + "epoch": 1.49, + "learning_rate": 4.925329885877318e-05, + "loss": 0.0096, + "step": 8380 + }, + { + "epoch": 1.5, + "learning_rate": 4.9252407275320974e-05, + "loss": 0.0114, + "step": 8390 + }, + { + "epoch": 1.5, + "learning_rate": 4.925151569186876e-05, + "loss": 0.0101, + "step": 8400 + }, + { + "epoch": 1.5, + "learning_rate": 4.925062410841655e-05, + "loss": 0.0083, + "step": 8410 + }, + { + "epoch": 1.5, + "learning_rate": 4.9249732524964334e-05, + "loss": 0.0088, + "step": 8420 + }, + { + "epoch": 1.5, + "learning_rate": 4.9248840941512125e-05, + "loss": 0.0115, + "step": 8430 + }, + { + "epoch": 1.5, + "learning_rate": 4.9247949358059916e-05, + "loss": 0.0084, + "step": 8440 + }, + { + "epoch": 1.51, + "learning_rate": 4.924705777460771e-05, + "loss": 0.0121, + "step": 8450 + }, + { + "epoch": 1.51, + "learning_rate": 4.92461661911555e-05, + "loss": 0.0137, + "step": 8460 + }, + { + "epoch": 1.51, + "learning_rate": 4.924527460770328e-05, + "loss": 0.0102, + "step": 8470 + }, + { + "epoch": 1.51, + "learning_rate": 4.9244383024251074e-05, + "loss": 0.0107, + "step": 8480 + }, + { + "epoch": 1.51, + "learning_rate": 4.924349144079886e-05, + "loss": 0.0149, + "step": 8490 + }, + { + "epoch": 1.52, + "learning_rate": 4.924259985734665e-05, + "loss": 0.0101, + "step": 8500 + }, + { + "epoch": 1.52, + "learning_rate": 4.924170827389444e-05, + "loss": 0.0079, + "step": 8510 + }, + { + "epoch": 1.52, + "learning_rate": 4.9240816690442225e-05, + "loss": 0.0086, + "step": 8520 + }, + { + "epoch": 1.52, + "learning_rate": 4.923992510699002e-05, + "loss": 0.0085, + "step": 8530 + }, + { + "epoch": 1.52, + "learning_rate": 4.92390335235378e-05, + "loss": 0.0077, + "step": 8540 + }, + { + "epoch": 1.52, + "learning_rate": 4.92381419400856e-05, + "loss": 0.0097, + "step": 8550 + }, + { + "epoch": 1.53, + "learning_rate": 4.9237250356633384e-05, + "loss": 0.0101, + "step": 8560 + }, + { + "epoch": 1.53, + "learning_rate": 4.9236358773181175e-05, + "loss": 0.0097, + "step": 8570 + }, + { + "epoch": 1.53, + "learning_rate": 4.923546718972896e-05, + "loss": 0.0091, + "step": 8580 + }, + { + "epoch": 1.53, + "learning_rate": 4.923457560627675e-05, + "loss": 0.0107, + "step": 8590 + }, + { + "epoch": 1.53, + "learning_rate": 4.923368402282454e-05, + "loss": 0.0081, + "step": 8600 + }, + { + "epoch": 1.54, + "learning_rate": 4.9232792439372326e-05, + "loss": 0.0061, + "step": 8610 + }, + { + "epoch": 1.54, + "learning_rate": 4.923190085592012e-05, + "loss": 0.0074, + "step": 8620 + }, + { + "epoch": 1.54, + "learning_rate": 4.92310092724679e-05, + "loss": 0.013, + "step": 8630 + }, + { + "epoch": 1.54, + "learning_rate": 4.923011768901569e-05, + "loss": 0.015, + "step": 8640 + }, + { + "epoch": 1.54, + "learning_rate": 4.922922610556348e-05, + "loss": 0.0097, + "step": 8650 + }, + { + "epoch": 1.54, + "learning_rate": 4.9228334522111275e-05, + "loss": 0.0116, + "step": 8660 + }, + { + "epoch": 1.55, + "learning_rate": 4.922744293865906e-05, + "loss": 0.0105, + "step": 8670 + }, + { + "epoch": 1.55, + "learning_rate": 4.922655135520685e-05, + "loss": 0.0101, + "step": 8680 + }, + { + "epoch": 1.55, + "learning_rate": 4.922565977175464e-05, + "loss": 0.007, + "step": 8690 + }, + { + "epoch": 1.55, + "learning_rate": 4.922476818830243e-05, + "loss": 0.0101, + "step": 8700 + }, + { + "epoch": 1.55, + "learning_rate": 4.922387660485022e-05, + "loss": 0.0065, + "step": 8710 + }, + { + "epoch": 1.55, + "learning_rate": 4.9222985021398e-05, + "loss": 0.0112, + "step": 8720 + }, + { + "epoch": 1.56, + "learning_rate": 4.9222093437945793e-05, + "loss": 0.0124, + "step": 8730 + }, + { + "epoch": 1.56, + "learning_rate": 4.9221201854493585e-05, + "loss": 0.0128, + "step": 8740 + }, + { + "epoch": 1.56, + "learning_rate": 4.922031027104137e-05, + "loss": 0.0103, + "step": 8750 + }, + { + "epoch": 1.56, + "learning_rate": 4.921941868758916e-05, + "loss": 0.0103, + "step": 8760 + }, + { + "epoch": 1.56, + "learning_rate": 4.921852710413695e-05, + "loss": 0.0072, + "step": 8770 + }, + { + "epoch": 1.57, + "learning_rate": 4.921763552068474e-05, + "loss": 0.0075, + "step": 8780 + }, + { + "epoch": 1.57, + "learning_rate": 4.921674393723253e-05, + "loss": 0.0086, + "step": 8790 + }, + { + "epoch": 1.57, + "learning_rate": 4.921585235378032e-05, + "loss": 0.0126, + "step": 8800 + }, + { + "epoch": 1.57, + "learning_rate": 4.92149607703281e-05, + "loss": 0.0089, + "step": 8810 + }, + { + "epoch": 1.57, + "learning_rate": 4.9214069186875894e-05, + "loss": 0.0133, + "step": 8820 + }, + { + "epoch": 1.57, + "learning_rate": 4.9213177603423685e-05, + "loss": 0.0094, + "step": 8830 + }, + { + "epoch": 1.58, + "learning_rate": 4.921228601997147e-05, + "loss": 0.0082, + "step": 8840 + }, + { + "epoch": 1.58, + "learning_rate": 4.921139443651926e-05, + "loss": 0.0072, + "step": 8850 + }, + { + "epoch": 1.58, + "learning_rate": 4.9210502853067045e-05, + "loss": 0.0083, + "step": 8860 + }, + { + "epoch": 1.58, + "learning_rate": 4.9209611269614837e-05, + "loss": 0.0082, + "step": 8870 + }, + { + "epoch": 1.58, + "learning_rate": 4.920871968616263e-05, + "loss": 0.0087, + "step": 8880 + }, + { + "epoch": 1.59, + "learning_rate": 4.920782810271042e-05, + "loss": 0.0084, + "step": 8890 + }, + { + "epoch": 1.59, + "learning_rate": 4.92069365192582e-05, + "loss": 0.0106, + "step": 8900 + }, + { + "epoch": 1.59, + "learning_rate": 4.9206044935805995e-05, + "loss": 0.0103, + "step": 8910 + }, + { + "epoch": 1.59, + "learning_rate": 4.9205153352353786e-05, + "loss": 0.0111, + "step": 8920 + }, + { + "epoch": 1.59, + "learning_rate": 4.920426176890157e-05, + "loss": 0.0072, + "step": 8930 + }, + { + "epoch": 1.59, + "learning_rate": 4.920337018544936e-05, + "loss": 0.0107, + "step": 8940 + }, + { + "epoch": 1.6, + "learning_rate": 4.9202478601997146e-05, + "loss": 0.0093, + "step": 8950 + }, + { + "epoch": 1.6, + "learning_rate": 4.920158701854494e-05, + "loss": 0.0069, + "step": 8960 + }, + { + "epoch": 1.6, + "learning_rate": 4.920069543509273e-05, + "loss": 0.0105, + "step": 8970 + }, + { + "epoch": 1.6, + "learning_rate": 4.919980385164051e-05, + "loss": 0.0102, + "step": 8980 + }, + { + "epoch": 1.6, + "learning_rate": 4.919891226818831e-05, + "loss": 0.0131, + "step": 8990 + }, + { + "epoch": 1.6, + "learning_rate": 4.9198020684736095e-05, + "loss": 0.0088, + "step": 9000 + }, + { + "epoch": 1.61, + "learning_rate": 4.9197129101283886e-05, + "loss": 0.01, + "step": 9010 + }, + { + "epoch": 1.61, + "learning_rate": 4.919623751783167e-05, + "loss": 0.0085, + "step": 9020 + }, + { + "epoch": 1.61, + "learning_rate": 4.919534593437946e-05, + "loss": 0.0078, + "step": 9030 + }, + { + "epoch": 1.61, + "learning_rate": 4.9194454350927246e-05, + "loss": 0.0074, + "step": 9040 + }, + { + "epoch": 1.61, + "learning_rate": 4.919356276747504e-05, + "loss": 0.011, + "step": 9050 + }, + { + "epoch": 1.62, + "learning_rate": 4.919267118402283e-05, + "loss": 0.0133, + "step": 9060 + }, + { + "epoch": 1.62, + "learning_rate": 4.919177960057061e-05, + "loss": 0.0088, + "step": 9070 + }, + { + "epoch": 1.62, + "learning_rate": 4.9190888017118405e-05, + "loss": 0.0119, + "step": 9080 + }, + { + "epoch": 1.62, + "learning_rate": 4.918999643366619e-05, + "loss": 0.0097, + "step": 9090 + }, + { + "epoch": 1.62, + "learning_rate": 4.918910485021399e-05, + "loss": 0.0143, + "step": 9100 + }, + { + "epoch": 1.62, + "learning_rate": 4.918821326676177e-05, + "loss": 0.009, + "step": 9110 + }, + { + "epoch": 1.63, + "learning_rate": 4.918732168330956e-05, + "loss": 0.0116, + "step": 9120 + }, + { + "epoch": 1.63, + "learning_rate": 4.918643009985735e-05, + "loss": 0.0096, + "step": 9130 + }, + { + "epoch": 1.63, + "learning_rate": 4.918553851640514e-05, + "loss": 0.0085, + "step": 9140 + }, + { + "epoch": 1.63, + "learning_rate": 4.918464693295293e-05, + "loss": 0.009, + "step": 9150 + }, + { + "epoch": 1.63, + "learning_rate": 4.9183755349500714e-05, + "loss": 0.0116, + "step": 9160 + }, + { + "epoch": 1.64, + "learning_rate": 4.9182863766048505e-05, + "loss": 0.0107, + "step": 9170 + }, + { + "epoch": 1.64, + "learning_rate": 4.918197218259629e-05, + "loss": 0.009, + "step": 9180 + }, + { + "epoch": 1.64, + "learning_rate": 4.918108059914408e-05, + "loss": 0.0073, + "step": 9190 + }, + { + "epoch": 1.64, + "learning_rate": 4.918018901569187e-05, + "loss": 0.011, + "step": 9200 + }, + { + "epoch": 1.64, + "learning_rate": 4.917929743223966e-05, + "loss": 0.0082, + "step": 9210 + }, + { + "epoch": 1.64, + "learning_rate": 4.9178405848787454e-05, + "loss": 0.0114, + "step": 9220 + }, + { + "epoch": 1.65, + "learning_rate": 4.917751426533524e-05, + "loss": 0.0141, + "step": 9230 + }, + { + "epoch": 1.65, + "learning_rate": 4.917662268188303e-05, + "loss": 0.0142, + "step": 9240 + }, + { + "epoch": 1.65, + "learning_rate": 4.9175731098430814e-05, + "loss": 0.0091, + "step": 9250 + }, + { + "epoch": 1.65, + "learning_rate": 4.9174839514978606e-05, + "loss": 0.0086, + "step": 9260 + }, + { + "epoch": 1.65, + "learning_rate": 4.917394793152639e-05, + "loss": 0.0106, + "step": 9270 + }, + { + "epoch": 1.65, + "learning_rate": 4.917305634807418e-05, + "loss": 0.0084, + "step": 9280 + }, + { + "epoch": 1.66, + "learning_rate": 4.917216476462197e-05, + "loss": 0.012, + "step": 9290 + }, + { + "epoch": 1.66, + "learning_rate": 4.917127318116976e-05, + "loss": 0.0067, + "step": 9300 + }, + { + "epoch": 1.66, + "learning_rate": 4.917038159771755e-05, + "loss": 0.0073, + "step": 9310 + }, + { + "epoch": 1.66, + "learning_rate": 4.916949001426534e-05, + "loss": 0.0123, + "step": 9320 + }, + { + "epoch": 1.66, + "learning_rate": 4.916868758915835e-05, + "loss": 0.0071, + "step": 9330 + }, + { + "epoch": 1.67, + "learning_rate": 4.916779600570613e-05, + "loss": 0.013, + "step": 9340 + }, + { + "epoch": 1.67, + "learning_rate": 4.916690442225392e-05, + "loss": 0.0126, + "step": 9350 + }, + { + "epoch": 1.67, + "learning_rate": 4.9166012838801714e-05, + "loss": 0.0137, + "step": 9360 + }, + { + "epoch": 1.67, + "learning_rate": 4.9165121255349505e-05, + "loss": 0.0104, + "step": 9370 + }, + { + "epoch": 1.67, + "learning_rate": 4.91642296718973e-05, + "loss": 0.0092, + "step": 9380 + }, + { + "epoch": 1.67, + "learning_rate": 4.916333808844508e-05, + "loss": 0.0103, + "step": 9390 + }, + { + "epoch": 1.68, + "learning_rate": 4.916244650499287e-05, + "loss": 0.0102, + "step": 9400 + }, + { + "epoch": 1.68, + "learning_rate": 4.916155492154066e-05, + "loss": 0.0103, + "step": 9410 + }, + { + "epoch": 1.68, + "learning_rate": 4.916066333808845e-05, + "loss": 0.0122, + "step": 9420 + }, + { + "epoch": 1.68, + "learning_rate": 4.915977175463623e-05, + "loss": 0.0104, + "step": 9430 + }, + { + "epoch": 1.68, + "learning_rate": 4.9158880171184024e-05, + "loss": 0.012, + "step": 9440 + }, + { + "epoch": 1.69, + "learning_rate": 4.9157988587731815e-05, + "loss": 0.0095, + "step": 9450 + }, + { + "epoch": 1.69, + "learning_rate": 4.91570970042796e-05, + "loss": 0.0088, + "step": 9460 + }, + { + "epoch": 1.69, + "learning_rate": 4.915620542082739e-05, + "loss": 0.0139, + "step": 9470 + }, + { + "epoch": 1.69, + "learning_rate": 4.915531383737518e-05, + "loss": 0.011, + "step": 9480 + }, + { + "epoch": 1.69, + "learning_rate": 4.915442225392297e-05, + "loss": 0.0138, + "step": 9490 + }, + { + "epoch": 1.69, + "learning_rate": 4.915353067047076e-05, + "loss": 0.0078, + "step": 9500 + }, + { + "epoch": 1.7, + "learning_rate": 4.915263908701855e-05, + "loss": 0.0082, + "step": 9510 + }, + { + "epoch": 1.7, + "learning_rate": 4.915174750356634e-05, + "loss": 0.0087, + "step": 9520 + }, + { + "epoch": 1.7, + "learning_rate": 4.9150855920114124e-05, + "loss": 0.0088, + "step": 9530 + }, + { + "epoch": 1.7, + "learning_rate": 4.9149964336661915e-05, + "loss": 0.009, + "step": 9540 + }, + { + "epoch": 1.7, + "learning_rate": 4.91490727532097e-05, + "loss": 0.0103, + "step": 9550 + }, + { + "epoch": 1.7, + "learning_rate": 4.914818116975749e-05, + "loss": 0.0099, + "step": 9560 + }, + { + "epoch": 1.71, + "learning_rate": 4.9147289586305275e-05, + "loss": 0.0076, + "step": 9570 + }, + { + "epoch": 1.71, + "learning_rate": 4.914639800285307e-05, + "loss": 0.0098, + "step": 9580 + }, + { + "epoch": 1.71, + "learning_rate": 4.914550641940086e-05, + "loss": 0.0119, + "step": 9590 + }, + { + "epoch": 1.71, + "learning_rate": 4.914461483594865e-05, + "loss": 0.0125, + "step": 9600 + }, + { + "epoch": 1.71, + "learning_rate": 4.914372325249644e-05, + "loss": 0.008, + "step": 9610 + }, + { + "epoch": 1.72, + "learning_rate": 4.9142831669044225e-05, + "loss": 0.0118, + "step": 9620 + }, + { + "epoch": 1.72, + "learning_rate": 4.9141940085592016e-05, + "loss": 0.0135, + "step": 9630 + }, + { + "epoch": 1.72, + "learning_rate": 4.91410485021398e-05, + "loss": 0.012, + "step": 9640 + }, + { + "epoch": 1.72, + "learning_rate": 4.914015691868759e-05, + "loss": 0.0061, + "step": 9650 + }, + { + "epoch": 1.72, + "learning_rate": 4.9139265335235376e-05, + "loss": 0.0063, + "step": 9660 + }, + { + "epoch": 1.72, + "learning_rate": 4.913837375178317e-05, + "loss": 0.0106, + "step": 9670 + }, + { + "epoch": 1.73, + "learning_rate": 4.913748216833096e-05, + "loss": 0.0091, + "step": 9680 + }, + { + "epoch": 1.73, + "learning_rate": 4.913659058487874e-05, + "loss": 0.013, + "step": 9690 + }, + { + "epoch": 1.73, + "learning_rate": 4.913569900142654e-05, + "loss": 0.0106, + "step": 9700 + }, + { + "epoch": 1.73, + "learning_rate": 4.9134807417974325e-05, + "loss": 0.0122, + "step": 9710 + }, + { + "epoch": 1.73, + "learning_rate": 4.9133915834522116e-05, + "loss": 0.0098, + "step": 9720 + }, + { + "epoch": 1.74, + "learning_rate": 4.91330242510699e-05, + "loss": 0.0094, + "step": 9730 + }, + { + "epoch": 1.74, + "learning_rate": 4.913213266761769e-05, + "loss": 0.009, + "step": 9740 + }, + { + "epoch": 1.74, + "learning_rate": 4.913124108416548e-05, + "loss": 0.0079, + "step": 9750 + }, + { + "epoch": 1.74, + "learning_rate": 4.913034950071327e-05, + "loss": 0.0075, + "step": 9760 + }, + { + "epoch": 1.74, + "learning_rate": 4.912945791726106e-05, + "loss": 0.0076, + "step": 9770 + }, + { + "epoch": 1.74, + "learning_rate": 4.9128566333808843e-05, + "loss": 0.0084, + "step": 9780 + }, + { + "epoch": 1.75, + "learning_rate": 4.9127674750356635e-05, + "loss": 0.008, + "step": 9790 + }, + { + "epoch": 1.75, + "learning_rate": 4.912678316690442e-05, + "loss": 0.0078, + "step": 9800 + }, + { + "epoch": 1.75, + "learning_rate": 4.912589158345222e-05, + "loss": 0.0078, + "step": 9810 + }, + { + "epoch": 1.75, + "learning_rate": 4.9125e-05, + "loss": 0.0112, + "step": 9820 + }, + { + "epoch": 1.75, + "learning_rate": 4.912410841654779e-05, + "loss": 0.0134, + "step": 9830 + }, + { + "epoch": 1.75, + "learning_rate": 4.9123216833095584e-05, + "loss": 0.0089, + "step": 9840 + }, + { + "epoch": 1.76, + "learning_rate": 4.912232524964337e-05, + "loss": 0.0112, + "step": 9850 + }, + { + "epoch": 1.76, + "learning_rate": 4.912143366619116e-05, + "loss": 0.0107, + "step": 9860 + }, + { + "epoch": 1.76, + "learning_rate": 4.9120542082738944e-05, + "loss": 0.0112, + "step": 9870 + }, + { + "epoch": 1.76, + "learning_rate": 4.9119650499286735e-05, + "loss": 0.0124, + "step": 9880 + }, + { + "epoch": 1.76, + "learning_rate": 4.911875891583452e-05, + "loss": 0.0105, + "step": 9890 + }, + { + "epoch": 1.77, + "learning_rate": 4.911786733238231e-05, + "loss": 0.0117, + "step": 9900 + }, + { + "epoch": 1.77, + "learning_rate": 4.91169757489301e-05, + "loss": 0.0115, + "step": 9910 + }, + { + "epoch": 1.77, + "learning_rate": 4.911608416547789e-05, + "loss": 0.0075, + "step": 9920 + }, + { + "epoch": 1.77, + "learning_rate": 4.9115192582025684e-05, + "loss": 0.0082, + "step": 9930 + }, + { + "epoch": 1.77, + "learning_rate": 4.911430099857347e-05, + "loss": 0.0093, + "step": 9940 + }, + { + "epoch": 1.77, + "learning_rate": 4.911340941512126e-05, + "loss": 0.0132, + "step": 9950 + }, + { + "epoch": 1.78, + "learning_rate": 4.9112517831669045e-05, + "loss": 0.0095, + "step": 9960 + }, + { + "epoch": 1.78, + "learning_rate": 4.9111626248216836e-05, + "loss": 0.0093, + "step": 9970 + }, + { + "epoch": 1.78, + "learning_rate": 4.911073466476463e-05, + "loss": 0.0082, + "step": 9980 + }, + { + "epoch": 1.78, + "learning_rate": 4.910984308131241e-05, + "loss": 0.0073, + "step": 9990 + }, + { + "epoch": 1.78, + "learning_rate": 4.91089514978602e-05, + "loss": 0.0093, + "step": 10000 + }, + { + "epoch": 1.78, + "learning_rate": 4.910805991440799e-05, + "loss": 0.0066, + "step": 10010 + }, + { + "epoch": 1.79, + "learning_rate": 4.910716833095578e-05, + "loss": 0.0098, + "step": 10020 + }, + { + "epoch": 1.79, + "learning_rate": 4.910627674750357e-05, + "loss": 0.0093, + "step": 10030 + }, + { + "epoch": 1.79, + "learning_rate": 4.910538516405136e-05, + "loss": 0.0075, + "step": 10040 + }, + { + "epoch": 1.79, + "learning_rate": 4.9104493580599145e-05, + "loss": 0.007, + "step": 10050 + }, + { + "epoch": 1.79, + "learning_rate": 4.9103601997146936e-05, + "loss": 0.012, + "step": 10060 + }, + { + "epoch": 1.8, + "learning_rate": 4.910271041369473e-05, + "loss": 0.0096, + "step": 10070 + }, + { + "epoch": 1.8, + "learning_rate": 4.910181883024251e-05, + "loss": 0.01, + "step": 10080 + }, + { + "epoch": 1.8, + "learning_rate": 4.91009272467903e-05, + "loss": 0.0074, + "step": 10090 + }, + { + "epoch": 1.8, + "learning_rate": 4.910003566333809e-05, + "loss": 0.0084, + "step": 10100 + }, + { + "epoch": 1.8, + "learning_rate": 4.909914407988588e-05, + "loss": 0.0104, + "step": 10110 + }, + { + "epoch": 1.8, + "learning_rate": 4.909825249643366e-05, + "loss": 0.0091, + "step": 10120 + }, + { + "epoch": 1.81, + "learning_rate": 4.9097360912981454e-05, + "loss": 0.0096, + "step": 10130 + }, + { + "epoch": 1.81, + "learning_rate": 4.9096469329529246e-05, + "loss": 0.0102, + "step": 10140 + }, + { + "epoch": 1.81, + "learning_rate": 4.909557774607704e-05, + "loss": 0.0094, + "step": 10150 + }, + { + "epoch": 1.81, + "learning_rate": 4.909468616262483e-05, + "loss": 0.0098, + "step": 10160 + }, + { + "epoch": 1.81, + "learning_rate": 4.909379457917261e-05, + "loss": 0.0116, + "step": 10170 + }, + { + "epoch": 1.82, + "learning_rate": 4.9092902995720404e-05, + "loss": 0.0132, + "step": 10180 + }, + { + "epoch": 1.82, + "learning_rate": 4.909201141226819e-05, + "loss": 0.0069, + "step": 10190 + }, + { + "epoch": 1.82, + "learning_rate": 4.909111982881598e-05, + "loss": 0.0091, + "step": 10200 + }, + { + "epoch": 1.82, + "learning_rate": 4.909022824536377e-05, + "loss": 0.0105, + "step": 10210 + }, + { + "epoch": 1.82, + "learning_rate": 4.9089336661911555e-05, + "loss": 0.009, + "step": 10220 + }, + { + "epoch": 1.82, + "learning_rate": 4.9088445078459346e-05, + "loss": 0.0125, + "step": 10230 + }, + { + "epoch": 1.83, + "learning_rate": 4.908755349500713e-05, + "loss": 0.0122, + "step": 10240 + }, + { + "epoch": 1.83, + "learning_rate": 4.908666191155493e-05, + "loss": 0.0081, + "step": 10250 + }, + { + "epoch": 1.83, + "learning_rate": 4.908577032810271e-05, + "loss": 0.0109, + "step": 10260 + }, + { + "epoch": 1.83, + "learning_rate": 4.9084878744650504e-05, + "loss": 0.0095, + "step": 10270 + }, + { + "epoch": 1.83, + "learning_rate": 4.908398716119829e-05, + "loss": 0.0097, + "step": 10280 + }, + { + "epoch": 1.83, + "learning_rate": 4.908309557774608e-05, + "loss": 0.0103, + "step": 10290 + }, + { + "epoch": 1.84, + "learning_rate": 4.908220399429387e-05, + "loss": 0.0112, + "step": 10300 + }, + { + "epoch": 1.84, + "learning_rate": 4.9081312410841656e-05, + "loss": 0.0086, + "step": 10310 + }, + { + "epoch": 1.84, + "learning_rate": 4.908042082738945e-05, + "loss": 0.0098, + "step": 10320 + }, + { + "epoch": 1.84, + "learning_rate": 4.907952924393723e-05, + "loss": 0.0098, + "step": 10330 + }, + { + "epoch": 1.84, + "learning_rate": 4.907863766048502e-05, + "loss": 0.0088, + "step": 10340 + }, + { + "epoch": 1.85, + "learning_rate": 4.907774607703281e-05, + "loss": 0.0091, + "step": 10350 + }, + { + "epoch": 1.85, + "learning_rate": 4.9076854493580605e-05, + "loss": 0.0075, + "step": 10360 + }, + { + "epoch": 1.85, + "learning_rate": 4.907596291012839e-05, + "loss": 0.01, + "step": 10370 + }, + { + "epoch": 1.85, + "learning_rate": 4.907507132667618e-05, + "loss": 0.0094, + "step": 10380 + }, + { + "epoch": 1.85, + "learning_rate": 4.907417974322397e-05, + "loss": 0.0117, + "step": 10390 + }, + { + "epoch": 1.85, + "learning_rate": 4.9073288159771756e-05, + "loss": 0.0064, + "step": 10400 + }, + { + "epoch": 1.86, + "learning_rate": 4.907239657631955e-05, + "loss": 0.0084, + "step": 10410 + }, + { + "epoch": 1.86, + "learning_rate": 4.907150499286733e-05, + "loss": 0.0098, + "step": 10420 + }, + { + "epoch": 1.86, + "learning_rate": 4.907061340941512e-05, + "loss": 0.0065, + "step": 10430 + }, + { + "epoch": 1.86, + "learning_rate": 4.9069721825962914e-05, + "loss": 0.0081, + "step": 10440 + }, + { + "epoch": 1.86, + "learning_rate": 4.90688302425107e-05, + "loss": 0.0098, + "step": 10450 + }, + { + "epoch": 1.87, + "learning_rate": 4.906793865905849e-05, + "loss": 0.008, + "step": 10460 + }, + { + "epoch": 1.87, + "learning_rate": 4.906704707560628e-05, + "loss": 0.0081, + "step": 10470 + }, + { + "epoch": 1.87, + "learning_rate": 4.906615549215407e-05, + "loss": 0.0079, + "step": 10480 + }, + { + "epoch": 1.87, + "learning_rate": 4.906526390870186e-05, + "loss": 0.0087, + "step": 10490 + }, + { + "epoch": 1.87, + "learning_rate": 4.906437232524965e-05, + "loss": 0.0103, + "step": 10500 + }, + { + "epoch": 1.87, + "learning_rate": 4.906348074179743e-05, + "loss": 0.0106, + "step": 10510 + }, + { + "epoch": 1.88, + "learning_rate": 4.9062589158345224e-05, + "loss": 0.0105, + "step": 10520 + }, + { + "epoch": 1.88, + "learning_rate": 4.9061697574893015e-05, + "loss": 0.008, + "step": 10530 + }, + { + "epoch": 1.88, + "learning_rate": 4.90608059914408e-05, + "loss": 0.0076, + "step": 10540 + }, + { + "epoch": 1.88, + "learning_rate": 4.905991440798859e-05, + "loss": 0.0073, + "step": 10550 + }, + { + "epoch": 1.88, + "learning_rate": 4.9059022824536375e-05, + "loss": 0.0103, + "step": 10560 + }, + { + "epoch": 1.88, + "learning_rate": 4.9058131241084166e-05, + "loss": 0.0093, + "step": 10570 + }, + { + "epoch": 1.89, + "learning_rate": 4.905723965763196e-05, + "loss": 0.0068, + "step": 10580 + }, + { + "epoch": 1.89, + "learning_rate": 4.905634807417975e-05, + "loss": 0.0117, + "step": 10590 + }, + { + "epoch": 1.89, + "learning_rate": 4.905545649072753e-05, + "loss": 0.0121, + "step": 10600 + }, + { + "epoch": 1.89, + "learning_rate": 4.9054564907275324e-05, + "loss": 0.0086, + "step": 10610 + }, + { + "epoch": 1.89, + "learning_rate": 4.9053673323823115e-05, + "loss": 0.0066, + "step": 10620 + }, + { + "epoch": 1.9, + "learning_rate": 4.90527817403709e-05, + "loss": 0.0082, + "step": 10630 + }, + { + "epoch": 1.9, + "learning_rate": 4.905189015691869e-05, + "loss": 0.008, + "step": 10640 + }, + { + "epoch": 1.9, + "learning_rate": 4.9050998573466475e-05, + "loss": 0.0113, + "step": 10650 + }, + { + "epoch": 1.9, + "learning_rate": 4.9050106990014267e-05, + "loss": 0.0104, + "step": 10660 + }, + { + "epoch": 1.9, + "learning_rate": 4.904921540656206e-05, + "loss": 0.009, + "step": 10670 + }, + { + "epoch": 1.9, + "learning_rate": 4.904832382310984e-05, + "loss": 0.012, + "step": 10680 + }, + { + "epoch": 1.91, + "learning_rate": 4.904743223965764e-05, + "loss": 0.0065, + "step": 10690 + }, + { + "epoch": 1.91, + "learning_rate": 4.9046540656205425e-05, + "loss": 0.0099, + "step": 10700 + }, + { + "epoch": 1.91, + "learning_rate": 4.9045649072753216e-05, + "loss": 0.0096, + "step": 10710 + }, + { + "epoch": 1.91, + "learning_rate": 4.9044757489301e-05, + "loss": 0.0096, + "step": 10720 + }, + { + "epoch": 1.91, + "learning_rate": 4.904386590584879e-05, + "loss": 0.016, + "step": 10730 + }, + { + "epoch": 1.92, + "learning_rate": 4.9042974322396576e-05, + "loss": 0.0149, + "step": 10740 + }, + { + "epoch": 1.92, + "learning_rate": 4.904208273894437e-05, + "loss": 0.0145, + "step": 10750 + }, + { + "epoch": 1.92, + "learning_rate": 4.904119115549216e-05, + "loss": 0.0101, + "step": 10760 + }, + { + "epoch": 1.92, + "learning_rate": 4.904029957203994e-05, + "loss": 0.0109, + "step": 10770 + }, + { + "epoch": 1.92, + "learning_rate": 4.9039407988587734e-05, + "loss": 0.0103, + "step": 10780 + }, + { + "epoch": 1.92, + "learning_rate": 4.903851640513552e-05, + "loss": 0.0125, + "step": 10790 + }, + { + "epoch": 1.93, + "learning_rate": 4.9037624821683316e-05, + "loss": 0.0092, + "step": 10800 + }, + { + "epoch": 1.93, + "learning_rate": 4.90367332382311e-05, + "loss": 0.0093, + "step": 10810 + }, + { + "epoch": 1.93, + "learning_rate": 4.903584165477889e-05, + "loss": 0.0097, + "step": 10820 + }, + { + "epoch": 1.93, + "learning_rate": 4.9034950071326676e-05, + "loss": 0.0135, + "step": 10830 + }, + { + "epoch": 1.93, + "learning_rate": 4.903405848787447e-05, + "loss": 0.0082, + "step": 10840 + }, + { + "epoch": 1.93, + "learning_rate": 4.903316690442226e-05, + "loss": 0.0087, + "step": 10850 + }, + { + "epoch": 1.94, + "learning_rate": 4.903227532097004e-05, + "loss": 0.0096, + "step": 10860 + }, + { + "epoch": 1.94, + "learning_rate": 4.9031383737517835e-05, + "loss": 0.01, + "step": 10870 + }, + { + "epoch": 1.94, + "learning_rate": 4.903049215406562e-05, + "loss": 0.0122, + "step": 10880 + }, + { + "epoch": 1.94, + "learning_rate": 4.902960057061341e-05, + "loss": 0.0109, + "step": 10890 + }, + { + "epoch": 1.94, + "learning_rate": 4.90287089871612e-05, + "loss": 0.0065, + "step": 10900 + }, + { + "epoch": 1.95, + "learning_rate": 4.902781740370899e-05, + "loss": 0.0104, + "step": 10910 + }, + { + "epoch": 1.95, + "learning_rate": 4.9026925820256784e-05, + "loss": 0.0106, + "step": 10920 + }, + { + "epoch": 1.95, + "learning_rate": 4.902603423680457e-05, + "loss": 0.0054, + "step": 10930 + }, + { + "epoch": 1.95, + "learning_rate": 4.902514265335236e-05, + "loss": 0.0098, + "step": 10940 + }, + { + "epoch": 1.95, + "learning_rate": 4.9024251069900144e-05, + "loss": 0.0083, + "step": 10950 + }, + { + "epoch": 1.95, + "learning_rate": 4.9023359486447935e-05, + "loss": 0.0079, + "step": 10960 + }, + { + "epoch": 1.96, + "learning_rate": 4.902246790299572e-05, + "loss": 0.0098, + "step": 10970 + }, + { + "epoch": 1.96, + "learning_rate": 4.902157631954351e-05, + "loss": 0.0093, + "step": 10980 + }, + { + "epoch": 1.96, + "learning_rate": 4.90206847360913e-05, + "loss": 0.0076, + "step": 10990 + }, + { + "epoch": 1.96, + "learning_rate": 4.9019793152639086e-05, + "loss": 0.0083, + "step": 11000 + }, + { + "epoch": 1.96, + "learning_rate": 4.901890156918688e-05, + "loss": 0.0132, + "step": 11010 + }, + { + "epoch": 1.97, + "learning_rate": 4.901800998573467e-05, + "loss": 0.0146, + "step": 11020 + }, + { + "epoch": 1.97, + "learning_rate": 4.901711840228246e-05, + "loss": 0.0071, + "step": 11030 + }, + { + "epoch": 1.97, + "learning_rate": 4.9016226818830244e-05, + "loss": 0.0107, + "step": 11040 + }, + { + "epoch": 1.97, + "learning_rate": 4.9015335235378036e-05, + "loss": 0.0105, + "step": 11050 + }, + { + "epoch": 1.97, + "learning_rate": 4.901444365192582e-05, + "loss": 0.0095, + "step": 11060 + }, + { + "epoch": 1.97, + "learning_rate": 4.901355206847361e-05, + "loss": 0.0094, + "step": 11070 + }, + { + "epoch": 1.98, + "learning_rate": 4.90126604850214e-05, + "loss": 0.0089, + "step": 11080 + }, + { + "epoch": 1.98, + "learning_rate": 4.901176890156919e-05, + "loss": 0.0126, + "step": 11090 + }, + { + "epoch": 1.98, + "learning_rate": 4.901087731811698e-05, + "loss": 0.0095, + "step": 11100 + }, + { + "epoch": 1.98, + "learning_rate": 4.900998573466476e-05, + "loss": 0.0101, + "step": 11110 + }, + { + "epoch": 1.98, + "learning_rate": 4.9009094151212554e-05, + "loss": 0.0105, + "step": 11120 + }, + { + "epoch": 1.98, + "learning_rate": 4.9008202567760345e-05, + "loss": 0.0086, + "step": 11130 + }, + { + "epoch": 1.99, + "learning_rate": 4.9007310984308136e-05, + "loss": 0.0087, + "step": 11140 + }, + { + "epoch": 1.99, + "learning_rate": 4.900641940085593e-05, + "loss": 0.007, + "step": 11150 + }, + { + "epoch": 1.99, + "learning_rate": 4.900552781740371e-05, + "loss": 0.0119, + "step": 11160 + }, + { + "epoch": 1.99, + "learning_rate": 4.90046362339515e-05, + "loss": 0.0098, + "step": 11170 + }, + { + "epoch": 1.99, + "learning_rate": 4.900374465049929e-05, + "loss": 0.0104, + "step": 11180 + }, + { + "epoch": 2.0, + "learning_rate": 4.900285306704708e-05, + "loss": 0.007, + "step": 11190 + }, + { + "epoch": 2.0, + "learning_rate": 4.900196148359486e-05, + "loss": 0.009, + "step": 11200 + }, + { + "epoch": 2.0, + "learning_rate": 4.9001069900142654e-05, + "loss": 0.0098, + "step": 11210 + }, + { + "epoch": 2.0, + "eval_loss": 0.015248224139213562, + "eval_runtime": 196.1154, + "eval_samples_per_second": 23.654, + "eval_steps_per_second": 2.957, + "step": 11216 + }, + { + "epoch": 2.0, + "learning_rate": 4.9000178316690446e-05, + "loss": 0.0093, + "step": 11220 + }, + { + "epoch": 2.0, + "learning_rate": 4.899928673323823e-05, + "loss": 0.0068, + "step": 11230 + }, + { + "epoch": 2.0, + "learning_rate": 4.899839514978603e-05, + "loss": 0.0063, + "step": 11240 + }, + { + "epoch": 2.01, + "learning_rate": 4.899750356633381e-05, + "loss": 0.0063, + "step": 11250 + }, + { + "epoch": 2.01, + "learning_rate": 4.8996611982881604e-05, + "loss": 0.0088, + "step": 11260 + }, + { + "epoch": 2.01, + "learning_rate": 4.899572039942939e-05, + "loss": 0.0105, + "step": 11270 + }, + { + "epoch": 2.01, + "learning_rate": 4.899482881597718e-05, + "loss": 0.007, + "step": 11280 + }, + { + "epoch": 2.01, + "learning_rate": 4.8993937232524964e-05, + "loss": 0.0076, + "step": 11290 + }, + { + "epoch": 2.01, + "learning_rate": 4.8993045649072755e-05, + "loss": 0.0101, + "step": 11300 + }, + { + "epoch": 2.02, + "learning_rate": 4.8992154065620546e-05, + "loss": 0.006, + "step": 11310 + }, + { + "epoch": 2.02, + "learning_rate": 4.899126248216833e-05, + "loss": 0.0088, + "step": 11320 + }, + { + "epoch": 2.02, + "learning_rate": 4.899037089871612e-05, + "loss": 0.0079, + "step": 11330 + }, + { + "epoch": 2.02, + "learning_rate": 4.8989479315263906e-05, + "loss": 0.008, + "step": 11340 + }, + { + "epoch": 2.02, + "learning_rate": 4.8988587731811704e-05, + "loss": 0.0086, + "step": 11350 + }, + { + "epoch": 2.03, + "learning_rate": 4.898769614835949e-05, + "loss": 0.0093, + "step": 11360 + }, + { + "epoch": 2.03, + "learning_rate": 4.898680456490728e-05, + "loss": 0.0121, + "step": 11370 + }, + { + "epoch": 2.03, + "learning_rate": 4.898591298145507e-05, + "loss": 0.0091, + "step": 11380 + }, + { + "epoch": 2.03, + "learning_rate": 4.8985021398002856e-05, + "loss": 0.0094, + "step": 11390 + }, + { + "epoch": 2.03, + "learning_rate": 4.898412981455065e-05, + "loss": 0.0112, + "step": 11400 + }, + { + "epoch": 2.03, + "learning_rate": 4.898323823109843e-05, + "loss": 0.0072, + "step": 11410 + }, + { + "epoch": 2.04, + "learning_rate": 4.898234664764622e-05, + "loss": 0.0086, + "step": 11420 + }, + { + "epoch": 2.04, + "learning_rate": 4.898145506419401e-05, + "loss": 0.0071, + "step": 11430 + }, + { + "epoch": 2.04, + "learning_rate": 4.89805634807418e-05, + "loss": 0.008, + "step": 11440 + }, + { + "epoch": 2.04, + "learning_rate": 4.897967189728959e-05, + "loss": 0.008, + "step": 11450 + }, + { + "epoch": 2.04, + "learning_rate": 4.897878031383738e-05, + "loss": 0.0075, + "step": 11460 + }, + { + "epoch": 2.05, + "learning_rate": 4.897788873038517e-05, + "loss": 0.008, + "step": 11470 + }, + { + "epoch": 2.05, + "learning_rate": 4.8976997146932956e-05, + "loss": 0.0111, + "step": 11480 + }, + { + "epoch": 2.05, + "learning_rate": 4.897610556348075e-05, + "loss": 0.0074, + "step": 11490 + }, + { + "epoch": 2.05, + "learning_rate": 4.897521398002853e-05, + "loss": 0.0073, + "step": 11500 + }, + { + "epoch": 2.05, + "learning_rate": 4.897432239657632e-05, + "loss": 0.0078, + "step": 11510 + }, + { + "epoch": 2.05, + "learning_rate": 4.897343081312411e-05, + "loss": 0.0111, + "step": 11520 + }, + { + "epoch": 2.06, + "learning_rate": 4.89725392296719e-05, + "loss": 0.008, + "step": 11530 + }, + { + "epoch": 2.06, + "learning_rate": 4.897164764621969e-05, + "loss": 0.0073, + "step": 11540 + }, + { + "epoch": 2.06, + "learning_rate": 4.8970756062767474e-05, + "loss": 0.0075, + "step": 11550 + }, + { + "epoch": 2.06, + "learning_rate": 4.8969864479315265e-05, + "loss": 0.0103, + "step": 11560 + }, + { + "epoch": 2.06, + "learning_rate": 4.896897289586306e-05, + "loss": 0.0053, + "step": 11570 + }, + { + "epoch": 2.06, + "learning_rate": 4.896808131241085e-05, + "loss": 0.0085, + "step": 11580 + }, + { + "epoch": 2.07, + "learning_rate": 4.896718972895863e-05, + "loss": 0.0127, + "step": 11590 + }, + { + "epoch": 2.07, + "learning_rate": 4.896638730385164e-05, + "loss": 0.0096, + "step": 11600 + }, + { + "epoch": 2.07, + "learning_rate": 4.896549572039943e-05, + "loss": 0.0094, + "step": 11610 + }, + { + "epoch": 2.07, + "learning_rate": 4.896460413694722e-05, + "loss": 0.0073, + "step": 11620 + }, + { + "epoch": 2.07, + "learning_rate": 4.8963712553495014e-05, + "loss": 0.0071, + "step": 11630 + }, + { + "epoch": 2.08, + "learning_rate": 4.89628209700428e-05, + "loss": 0.0067, + "step": 11640 + }, + { + "epoch": 2.08, + "learning_rate": 4.896192938659059e-05, + "loss": 0.0081, + "step": 11650 + }, + { + "epoch": 2.08, + "learning_rate": 4.8961037803138374e-05, + "loss": 0.009, + "step": 11660 + }, + { + "epoch": 2.08, + "learning_rate": 4.8960146219686165e-05, + "loss": 0.0113, + "step": 11670 + }, + { + "epoch": 2.08, + "learning_rate": 4.8959254636233956e-05, + "loss": 0.0083, + "step": 11680 + }, + { + "epoch": 2.08, + "learning_rate": 4.895836305278174e-05, + "loss": 0.0082, + "step": 11690 + }, + { + "epoch": 2.09, + "learning_rate": 4.895747146932953e-05, + "loss": 0.011, + "step": 11700 + }, + { + "epoch": 2.09, + "learning_rate": 4.8956579885877317e-05, + "loss": 0.0087, + "step": 11710 + }, + { + "epoch": 2.09, + "learning_rate": 4.895568830242511e-05, + "loss": 0.0105, + "step": 11720 + }, + { + "epoch": 2.09, + "learning_rate": 4.89547967189729e-05, + "loss": 0.0092, + "step": 11730 + }, + { + "epoch": 2.09, + "learning_rate": 4.895390513552069e-05, + "loss": 0.0068, + "step": 11740 + }, + { + "epoch": 2.1, + "learning_rate": 4.8953013552068475e-05, + "loss": 0.0083, + "step": 11750 + }, + { + "epoch": 2.1, + "learning_rate": 4.8952121968616266e-05, + "loss": 0.0074, + "step": 11760 + }, + { + "epoch": 2.1, + "learning_rate": 4.895123038516406e-05, + "loss": 0.0073, + "step": 11770 + }, + { + "epoch": 2.1, + "learning_rate": 4.895033880171184e-05, + "loss": 0.0093, + "step": 11780 + }, + { + "epoch": 2.1, + "learning_rate": 4.894944721825963e-05, + "loss": 0.0064, + "step": 11790 + }, + { + "epoch": 2.1, + "learning_rate": 4.894855563480742e-05, + "loss": 0.0073, + "step": 11800 + }, + { + "epoch": 2.11, + "learning_rate": 4.894766405135521e-05, + "loss": 0.0074, + "step": 11810 + }, + { + "epoch": 2.11, + "learning_rate": 4.894677246790299e-05, + "loss": 0.0091, + "step": 11820 + }, + { + "epoch": 2.11, + "learning_rate": 4.8945880884450784e-05, + "loss": 0.0065, + "step": 11830 + }, + { + "epoch": 2.11, + "learning_rate": 4.8944989300998575e-05, + "loss": 0.0086, + "step": 11840 + }, + { + "epoch": 2.11, + "learning_rate": 4.8944097717546366e-05, + "loss": 0.0064, + "step": 11850 + }, + { + "epoch": 2.11, + "learning_rate": 4.894320613409416e-05, + "loss": 0.0088, + "step": 11860 + }, + { + "epoch": 2.12, + "learning_rate": 4.894231455064194e-05, + "loss": 0.0062, + "step": 11870 + }, + { + "epoch": 2.12, + "learning_rate": 4.894142296718973e-05, + "loss": 0.0102, + "step": 11880 + }, + { + "epoch": 2.12, + "learning_rate": 4.894053138373752e-05, + "loss": 0.01, + "step": 11890 + }, + { + "epoch": 2.12, + "learning_rate": 4.893963980028531e-05, + "loss": 0.0084, + "step": 11900 + }, + { + "epoch": 2.12, + "learning_rate": 4.89387482168331e-05, + "loss": 0.0067, + "step": 11910 + }, + { + "epoch": 2.13, + "learning_rate": 4.8937856633380884e-05, + "loss": 0.0093, + "step": 11920 + }, + { + "epoch": 2.13, + "learning_rate": 4.8936965049928676e-05, + "loss": 0.0077, + "step": 11930 + }, + { + "epoch": 2.13, + "learning_rate": 4.893607346647646e-05, + "loss": 0.0123, + "step": 11940 + }, + { + "epoch": 2.13, + "learning_rate": 4.893518188302426e-05, + "loss": 0.0052, + "step": 11950 + }, + { + "epoch": 2.13, + "learning_rate": 4.893429029957204e-05, + "loss": 0.0091, + "step": 11960 + }, + { + "epoch": 2.13, + "learning_rate": 4.8933398716119834e-05, + "loss": 0.0088, + "step": 11970 + }, + { + "epoch": 2.14, + "learning_rate": 4.893250713266762e-05, + "loss": 0.0094, + "step": 11980 + }, + { + "epoch": 2.14, + "learning_rate": 4.893161554921541e-05, + "loss": 0.0075, + "step": 11990 + }, + { + "epoch": 2.14, + "learning_rate": 4.89307239657632e-05, + "loss": 0.0074, + "step": 12000 + }, + { + "epoch": 2.14, + "learning_rate": 4.8929832382310985e-05, + "loss": 0.0075, + "step": 12010 + }, + { + "epoch": 2.14, + "learning_rate": 4.8928940798858776e-05, + "loss": 0.0073, + "step": 12020 + }, + { + "epoch": 2.15, + "learning_rate": 4.892804921540656e-05, + "loss": 0.0098, + "step": 12030 + }, + { + "epoch": 2.15, + "learning_rate": 4.892715763195435e-05, + "loss": 0.0075, + "step": 12040 + }, + { + "epoch": 2.15, + "learning_rate": 4.8926266048502136e-05, + "loss": 0.0072, + "step": 12050 + }, + { + "epoch": 2.15, + "learning_rate": 4.8925374465049934e-05, + "loss": 0.0061, + "step": 12060 + }, + { + "epoch": 2.15, + "learning_rate": 4.892448288159772e-05, + "loss": 0.0107, + "step": 12070 + }, + { + "epoch": 2.15, + "learning_rate": 4.892359129814551e-05, + "loss": 0.0082, + "step": 12080 + }, + { + "epoch": 2.16, + "learning_rate": 4.89226997146933e-05, + "loss": 0.0048, + "step": 12090 + }, + { + "epoch": 2.16, + "learning_rate": 4.8921808131241086e-05, + "loss": 0.0073, + "step": 12100 + }, + { + "epoch": 2.16, + "learning_rate": 4.892091654778888e-05, + "loss": 0.0097, + "step": 12110 + }, + { + "epoch": 2.16, + "learning_rate": 4.892002496433666e-05, + "loss": 0.0071, + "step": 12120 + }, + { + "epoch": 2.16, + "learning_rate": 4.891913338088445e-05, + "loss": 0.008, + "step": 12130 + }, + { + "epoch": 2.16, + "learning_rate": 4.8918241797432244e-05, + "loss": 0.0063, + "step": 12140 + }, + { + "epoch": 2.17, + "learning_rate": 4.891735021398003e-05, + "loss": 0.0095, + "step": 12150 + }, + { + "epoch": 2.17, + "learning_rate": 4.891645863052782e-05, + "loss": 0.0095, + "step": 12160 + }, + { + "epoch": 2.17, + "learning_rate": 4.891556704707561e-05, + "loss": 0.0101, + "step": 12170 + }, + { + "epoch": 2.17, + "learning_rate": 4.89146754636234e-05, + "loss": 0.0103, + "step": 12180 + }, + { + "epoch": 2.17, + "learning_rate": 4.8913783880171186e-05, + "loss": 0.0091, + "step": 12190 + }, + { + "epoch": 2.18, + "learning_rate": 4.891289229671898e-05, + "loss": 0.0133, + "step": 12200 + }, + { + "epoch": 2.18, + "learning_rate": 4.891200071326676e-05, + "loss": 0.0047, + "step": 12210 + }, + { + "epoch": 2.18, + "learning_rate": 4.891110912981455e-05, + "loss": 0.0073, + "step": 12220 + }, + { + "epoch": 2.18, + "learning_rate": 4.8910217546362344e-05, + "loss": 0.0065, + "step": 12230 + }, + { + "epoch": 2.18, + "learning_rate": 4.890932596291013e-05, + "loss": 0.0075, + "step": 12240 + }, + { + "epoch": 2.18, + "learning_rate": 4.890843437945792e-05, + "loss": 0.01, + "step": 12250 + }, + { + "epoch": 2.19, + "learning_rate": 4.8907542796005704e-05, + "loss": 0.0087, + "step": 12260 + }, + { + "epoch": 2.19, + "learning_rate": 4.8906651212553496e-05, + "loss": 0.01, + "step": 12270 + }, + { + "epoch": 2.19, + "learning_rate": 4.890575962910129e-05, + "loss": 0.0077, + "step": 12280 + }, + { + "epoch": 2.19, + "learning_rate": 4.890486804564908e-05, + "loss": 0.0097, + "step": 12290 + }, + { + "epoch": 2.19, + "learning_rate": 4.890397646219686e-05, + "loss": 0.008, + "step": 12300 + }, + { + "epoch": 2.2, + "learning_rate": 4.8903084878744654e-05, + "loss": 0.005, + "step": 12310 + }, + { + "epoch": 2.2, + "learning_rate": 4.8902193295292445e-05, + "loss": 0.0072, + "step": 12320 + }, + { + "epoch": 2.2, + "learning_rate": 4.890130171184023e-05, + "loss": 0.0077, + "step": 12330 + }, + { + "epoch": 2.2, + "learning_rate": 4.890041012838802e-05, + "loss": 0.0088, + "step": 12340 + }, + { + "epoch": 2.2, + "learning_rate": 4.8899518544935805e-05, + "loss": 0.0068, + "step": 12350 + }, + { + "epoch": 2.2, + "learning_rate": 4.8898626961483596e-05, + "loss": 0.0071, + "step": 12360 + }, + { + "epoch": 2.21, + "learning_rate": 4.889773537803139e-05, + "loss": 0.0077, + "step": 12370 + }, + { + "epoch": 2.21, + "learning_rate": 4.889684379457917e-05, + "loss": 0.0081, + "step": 12380 + }, + { + "epoch": 2.21, + "learning_rate": 4.889595221112697e-05, + "loss": 0.0067, + "step": 12390 + }, + { + "epoch": 2.21, + "learning_rate": 4.8895060627674754e-05, + "loss": 0.0099, + "step": 12400 + }, + { + "epoch": 2.21, + "learning_rate": 4.8894169044222545e-05, + "loss": 0.0068, + "step": 12410 + }, + { + "epoch": 2.21, + "learning_rate": 4.889327746077033e-05, + "loss": 0.0069, + "step": 12420 + }, + { + "epoch": 2.22, + "learning_rate": 4.889238587731812e-05, + "loss": 0.0066, + "step": 12430 + }, + { + "epoch": 2.22, + "learning_rate": 4.8891494293865905e-05, + "loss": 0.0064, + "step": 12440 + }, + { + "epoch": 2.22, + "learning_rate": 4.88906027104137e-05, + "loss": 0.0113, + "step": 12450 + }, + { + "epoch": 2.22, + "learning_rate": 4.888971112696149e-05, + "loss": 0.0095, + "step": 12460 + }, + { + "epoch": 2.22, + "learning_rate": 4.888881954350927e-05, + "loss": 0.0071, + "step": 12470 + }, + { + "epoch": 2.23, + "learning_rate": 4.8887927960057063e-05, + "loss": 0.0101, + "step": 12480 + }, + { + "epoch": 2.23, + "learning_rate": 4.888703637660485e-05, + "loss": 0.0084, + "step": 12490 + }, + { + "epoch": 2.23, + "learning_rate": 4.8886144793152646e-05, + "loss": 0.0065, + "step": 12500 + }, + { + "epoch": 2.23, + "learning_rate": 4.888525320970043e-05, + "loss": 0.0085, + "step": 12510 + }, + { + "epoch": 2.23, + "learning_rate": 4.888436162624822e-05, + "loss": 0.0068, + "step": 12520 + }, + { + "epoch": 2.23, + "learning_rate": 4.8883470042796006e-05, + "loss": 0.0047, + "step": 12530 + }, + { + "epoch": 2.24, + "learning_rate": 4.88825784593438e-05, + "loss": 0.0106, + "step": 12540 + }, + { + "epoch": 2.24, + "learning_rate": 4.888168687589159e-05, + "loss": 0.0067, + "step": 12550 + }, + { + "epoch": 2.24, + "learning_rate": 4.888079529243937e-05, + "loss": 0.0082, + "step": 12560 + }, + { + "epoch": 2.24, + "learning_rate": 4.8879903708987164e-05, + "loss": 0.0088, + "step": 12570 + }, + { + "epoch": 2.24, + "learning_rate": 4.887901212553495e-05, + "loss": 0.0087, + "step": 12580 + }, + { + "epoch": 2.25, + "learning_rate": 4.887812054208274e-05, + "loss": 0.0102, + "step": 12590 + }, + { + "epoch": 2.25, + "learning_rate": 4.887722895863053e-05, + "loss": 0.0092, + "step": 12600 + }, + { + "epoch": 2.25, + "learning_rate": 4.887633737517832e-05, + "loss": 0.0113, + "step": 12610 + }, + { + "epoch": 2.25, + "learning_rate": 4.8875445791726107e-05, + "loss": 0.01, + "step": 12620 + }, + { + "epoch": 2.25, + "learning_rate": 4.88745542082739e-05, + "loss": 0.0095, + "step": 12630 + }, + { + "epoch": 2.25, + "learning_rate": 4.887366262482169e-05, + "loss": 0.0095, + "step": 12640 + }, + { + "epoch": 2.26, + "learning_rate": 4.8872771041369473e-05, + "loss": 0.0071, + "step": 12650 + }, + { + "epoch": 2.26, + "learning_rate": 4.8871879457917265e-05, + "loss": 0.0092, + "step": 12660 + }, + { + "epoch": 2.26, + "learning_rate": 4.887098787446505e-05, + "loss": 0.0075, + "step": 12670 + }, + { + "epoch": 2.26, + "learning_rate": 4.887009629101284e-05, + "loss": 0.007, + "step": 12680 + }, + { + "epoch": 2.26, + "learning_rate": 4.886920470756063e-05, + "loss": 0.0094, + "step": 12690 + }, + { + "epoch": 2.26, + "learning_rate": 4.8868313124108416e-05, + "loss": 0.0077, + "step": 12700 + }, + { + "epoch": 2.27, + "learning_rate": 4.886742154065621e-05, + "loss": 0.0114, + "step": 12710 + }, + { + "epoch": 2.27, + "learning_rate": 4.8866529957204e-05, + "loss": 0.0077, + "step": 12720 + }, + { + "epoch": 2.27, + "learning_rate": 4.886563837375179e-05, + "loss": 0.0046, + "step": 12730 + }, + { + "epoch": 2.27, + "learning_rate": 4.8864746790299574e-05, + "loss": 0.0113, + "step": 12740 + }, + { + "epoch": 2.27, + "learning_rate": 4.8863855206847365e-05, + "loss": 0.0061, + "step": 12750 + }, + { + "epoch": 2.28, + "learning_rate": 4.886296362339515e-05, + "loss": 0.0102, + "step": 12760 + }, + { + "epoch": 2.28, + "learning_rate": 4.886207203994294e-05, + "loss": 0.0098, + "step": 12770 + }, + { + "epoch": 2.28, + "learning_rate": 4.886118045649073e-05, + "loss": 0.0073, + "step": 12780 + }, + { + "epoch": 2.28, + "learning_rate": 4.8860288873038516e-05, + "loss": 0.0091, + "step": 12790 + }, + { + "epoch": 2.28, + "learning_rate": 4.885939728958631e-05, + "loss": 0.0073, + "step": 12800 + }, + { + "epoch": 2.28, + "learning_rate": 4.885850570613409e-05, + "loss": 0.0072, + "step": 12810 + }, + { + "epoch": 2.29, + "learning_rate": 4.885761412268188e-05, + "loss": 0.0059, + "step": 12820 + }, + { + "epoch": 2.29, + "learning_rate": 4.8856722539229675e-05, + "loss": 0.0063, + "step": 12830 + }, + { + "epoch": 2.29, + "learning_rate": 4.8855830955777466e-05, + "loss": 0.0048, + "step": 12840 + }, + { + "epoch": 2.29, + "learning_rate": 4.885493937232525e-05, + "loss": 0.0079, + "step": 12850 + }, + { + "epoch": 2.29, + "learning_rate": 4.885404778887304e-05, + "loss": 0.0094, + "step": 12860 + }, + { + "epoch": 2.29, + "learning_rate": 4.885315620542083e-05, + "loss": 0.0074, + "step": 12870 + }, + { + "epoch": 2.3, + "learning_rate": 4.885226462196862e-05, + "loss": 0.0092, + "step": 12880 + }, + { + "epoch": 2.3, + "learning_rate": 4.885137303851641e-05, + "loss": 0.0055, + "step": 12890 + }, + { + "epoch": 2.3, + "learning_rate": 4.885048145506419e-05, + "loss": 0.0083, + "step": 12900 + }, + { + "epoch": 2.3, + "learning_rate": 4.8849589871611984e-05, + "loss": 0.0104, + "step": 12910 + }, + { + "epoch": 2.3, + "learning_rate": 4.8848698288159775e-05, + "loss": 0.0098, + "step": 12920 + }, + { + "epoch": 2.31, + "learning_rate": 4.884780670470756e-05, + "loss": 0.01, + "step": 12930 + }, + { + "epoch": 2.31, + "learning_rate": 4.884691512125536e-05, + "loss": 0.0068, + "step": 12940 + }, + { + "epoch": 2.31, + "learning_rate": 4.884602353780314e-05, + "loss": 0.0079, + "step": 12950 + }, + { + "epoch": 2.31, + "learning_rate": 4.884513195435093e-05, + "loss": 0.0115, + "step": 12960 + }, + { + "epoch": 2.31, + "learning_rate": 4.884424037089872e-05, + "loss": 0.01, + "step": 12970 + }, + { + "epoch": 2.31, + "learning_rate": 4.884334878744651e-05, + "loss": 0.01, + "step": 12980 + }, + { + "epoch": 2.32, + "learning_rate": 4.884245720399429e-05, + "loss": 0.0086, + "step": 12990 + }, + { + "epoch": 2.32, + "learning_rate": 4.8841565620542084e-05, + "loss": 0.0075, + "step": 13000 + }, + { + "epoch": 2.32, + "learning_rate": 4.8840674037089876e-05, + "loss": 0.0089, + "step": 13010 + }, + { + "epoch": 2.32, + "learning_rate": 4.883978245363766e-05, + "loss": 0.0095, + "step": 13020 + }, + { + "epoch": 2.32, + "learning_rate": 4.883889087018545e-05, + "loss": 0.0183, + "step": 13030 + }, + { + "epoch": 2.33, + "learning_rate": 4.8837999286733236e-05, + "loss": 0.0095, + "step": 13040 + }, + { + "epoch": 2.33, + "learning_rate": 4.8837107703281034e-05, + "loss": 0.0075, + "step": 13050 + }, + { + "epoch": 2.33, + "learning_rate": 4.883621611982882e-05, + "loss": 0.0073, + "step": 13060 + }, + { + "epoch": 2.33, + "learning_rate": 4.883532453637661e-05, + "loss": 0.0075, + "step": 13070 + }, + { + "epoch": 2.33, + "learning_rate": 4.8834432952924394e-05, + "loss": 0.0084, + "step": 13080 + }, + { + "epoch": 2.33, + "learning_rate": 4.8833541369472185e-05, + "loss": 0.0085, + "step": 13090 + }, + { + "epoch": 2.34, + "learning_rate": 4.8832649786019976e-05, + "loss": 0.0063, + "step": 13100 + }, + { + "epoch": 2.34, + "learning_rate": 4.883175820256776e-05, + "loss": 0.005, + "step": 13110 + }, + { + "epoch": 2.34, + "learning_rate": 4.883086661911555e-05, + "loss": 0.0066, + "step": 13120 + }, + { + "epoch": 2.34, + "learning_rate": 4.8829975035663336e-05, + "loss": 0.0088, + "step": 13130 + }, + { + "epoch": 2.34, + "learning_rate": 4.882908345221113e-05, + "loss": 0.0088, + "step": 13140 + }, + { + "epoch": 2.34, + "learning_rate": 4.882819186875892e-05, + "loss": 0.0079, + "step": 13150 + }, + { + "epoch": 2.35, + "learning_rate": 4.882730028530671e-05, + "loss": 0.0083, + "step": 13160 + }, + { + "epoch": 2.35, + "learning_rate": 4.88264087018545e-05, + "loss": 0.0087, + "step": 13170 + }, + { + "epoch": 2.35, + "learning_rate": 4.8825517118402286e-05, + "loss": 0.0085, + "step": 13180 + }, + { + "epoch": 2.35, + "learning_rate": 4.882462553495008e-05, + "loss": 0.0097, + "step": 13190 + }, + { + "epoch": 2.35, + "learning_rate": 4.882373395149786e-05, + "loss": 0.0074, + "step": 13200 + }, + { + "epoch": 2.36, + "learning_rate": 4.882284236804565e-05, + "loss": 0.0092, + "step": 13210 + }, + { + "epoch": 2.36, + "learning_rate": 4.882195078459344e-05, + "loss": 0.0057, + "step": 13220 + }, + { + "epoch": 2.36, + "learning_rate": 4.882105920114123e-05, + "loss": 0.0168, + "step": 13230 + }, + { + "epoch": 2.36, + "learning_rate": 4.882016761768902e-05, + "loss": 0.0094, + "step": 13240 + }, + { + "epoch": 2.36, + "learning_rate": 4.8819276034236804e-05, + "loss": 0.012, + "step": 13250 + }, + { + "epoch": 2.36, + "learning_rate": 4.8818384450784595e-05, + "loss": 0.0062, + "step": 13260 + }, + { + "epoch": 2.37, + "learning_rate": 4.8817492867332386e-05, + "loss": 0.0098, + "step": 13270 + }, + { + "epoch": 2.37, + "learning_rate": 4.881660128388018e-05, + "loss": 0.0093, + "step": 13280 + }, + { + "epoch": 2.37, + "learning_rate": 4.881570970042796e-05, + "loss": 0.009, + "step": 13290 + }, + { + "epoch": 2.37, + "learning_rate": 4.881481811697575e-05, + "loss": 0.0077, + "step": 13300 + }, + { + "epoch": 2.37, + "learning_rate": 4.881392653352354e-05, + "loss": 0.0064, + "step": 13310 + }, + { + "epoch": 2.38, + "learning_rate": 4.881303495007133e-05, + "loss": 0.0071, + "step": 13320 + }, + { + "epoch": 2.38, + "learning_rate": 4.881214336661912e-05, + "loss": 0.0092, + "step": 13330 + }, + { + "epoch": 2.38, + "learning_rate": 4.8811251783166904e-05, + "loss": 0.0124, + "step": 13340 + }, + { + "epoch": 2.38, + "learning_rate": 4.8810360199714695e-05, + "loss": 0.0128, + "step": 13350 + }, + { + "epoch": 2.38, + "learning_rate": 4.880946861626248e-05, + "loss": 0.0083, + "step": 13360 + }, + { + "epoch": 2.38, + "learning_rate": 4.880857703281027e-05, + "loss": 0.0058, + "step": 13370 + }, + { + "epoch": 2.39, + "learning_rate": 4.880768544935806e-05, + "loss": 0.0059, + "step": 13380 + }, + { + "epoch": 2.39, + "learning_rate": 4.8806793865905854e-05, + "loss": 0.0094, + "step": 13390 + }, + { + "epoch": 2.39, + "learning_rate": 4.8805902282453645e-05, + "loss": 0.0092, + "step": 13400 + }, + { + "epoch": 2.39, + "learning_rate": 4.880501069900143e-05, + "loss": 0.0096, + "step": 13410 + }, + { + "epoch": 2.39, + "learning_rate": 4.880411911554922e-05, + "loss": 0.0082, + "step": 13420 + }, + { + "epoch": 2.39, + "learning_rate": 4.8803227532097005e-05, + "loss": 0.0086, + "step": 13430 + }, + { + "epoch": 2.4, + "learning_rate": 4.8802335948644796e-05, + "loss": 0.0071, + "step": 13440 + }, + { + "epoch": 2.4, + "learning_rate": 4.880144436519258e-05, + "loss": 0.0106, + "step": 13450 + }, + { + "epoch": 2.4, + "learning_rate": 4.880055278174037e-05, + "loss": 0.0096, + "step": 13460 + }, + { + "epoch": 2.4, + "learning_rate": 4.879966119828816e-05, + "loss": 0.0107, + "step": 13470 + }, + { + "epoch": 2.4, + "learning_rate": 4.879876961483595e-05, + "loss": 0.01, + "step": 13480 + }, + { + "epoch": 2.41, + "learning_rate": 4.8797878031383745e-05, + "loss": 0.0061, + "step": 13490 + }, + { + "epoch": 2.41, + "learning_rate": 4.879698644793153e-05, + "loss": 0.0104, + "step": 13500 + }, + { + "epoch": 2.41, + "learning_rate": 4.879609486447932e-05, + "loss": 0.0098, + "step": 13510 + }, + { + "epoch": 2.41, + "learning_rate": 4.8795203281027105e-05, + "loss": 0.0078, + "step": 13520 + }, + { + "epoch": 2.41, + "learning_rate": 4.8794311697574897e-05, + "loss": 0.01, + "step": 13530 + }, + { + "epoch": 2.41, + "learning_rate": 4.879342011412268e-05, + "loss": 0.0079, + "step": 13540 + }, + { + "epoch": 2.42, + "learning_rate": 4.879252853067047e-05, + "loss": 0.0101, + "step": 13550 + }, + { + "epoch": 2.42, + "learning_rate": 4.8791636947218263e-05, + "loss": 0.0061, + "step": 13560 + }, + { + "epoch": 2.42, + "learning_rate": 4.879074536376605e-05, + "loss": 0.0071, + "step": 13570 + }, + { + "epoch": 2.42, + "learning_rate": 4.878985378031384e-05, + "loss": 0.0088, + "step": 13580 + }, + { + "epoch": 2.42, + "learning_rate": 4.8788962196861624e-05, + "loss": 0.0068, + "step": 13590 + }, + { + "epoch": 2.43, + "learning_rate": 4.878807061340942e-05, + "loss": 0.0073, + "step": 13600 + }, + { + "epoch": 2.43, + "learning_rate": 4.8787179029957206e-05, + "loss": 0.0077, + "step": 13610 + }, + { + "epoch": 2.43, + "learning_rate": 4.8786287446505e-05, + "loss": 0.0074, + "step": 13620 + }, + { + "epoch": 2.43, + "learning_rate": 4.878539586305279e-05, + "loss": 0.0093, + "step": 13630 + }, + { + "epoch": 2.43, + "learning_rate": 4.878450427960057e-05, + "loss": 0.0101, + "step": 13640 + }, + { + "epoch": 2.43, + "learning_rate": 4.8783612696148364e-05, + "loss": 0.0095, + "step": 13650 + }, + { + "epoch": 2.44, + "learning_rate": 4.878272111269615e-05, + "loss": 0.0097, + "step": 13660 + }, + { + "epoch": 2.44, + "learning_rate": 4.878182952924394e-05, + "loss": 0.007, + "step": 13670 + }, + { + "epoch": 2.44, + "learning_rate": 4.8780937945791724e-05, + "loss": 0.0081, + "step": 13680 + }, + { + "epoch": 2.44, + "learning_rate": 4.8780046362339515e-05, + "loss": 0.0087, + "step": 13690 + }, + { + "epoch": 2.44, + "learning_rate": 4.8779154778887307e-05, + "loss": 0.0115, + "step": 13700 + }, + { + "epoch": 2.44, + "learning_rate": 4.87782631954351e-05, + "loss": 0.0064, + "step": 13710 + }, + { + "epoch": 2.45, + "learning_rate": 4.8777460770328106e-05, + "loss": 0.0098, + "step": 13720 + }, + { + "epoch": 2.45, + "learning_rate": 4.877656918687589e-05, + "loss": 0.0081, + "step": 13730 + }, + { + "epoch": 2.45, + "learning_rate": 4.877567760342368e-05, + "loss": 0.0082, + "step": 13740 + }, + { + "epoch": 2.45, + "learning_rate": 4.8774786019971466e-05, + "loss": 0.0065, + "step": 13750 + }, + { + "epoch": 2.45, + "learning_rate": 4.8773894436519264e-05, + "loss": 0.0068, + "step": 13760 + }, + { + "epoch": 2.46, + "learning_rate": 4.877300285306705e-05, + "loss": 0.0088, + "step": 13770 + }, + { + "epoch": 2.46, + "learning_rate": 4.877211126961484e-05, + "loss": 0.0083, + "step": 13780 + }, + { + "epoch": 2.46, + "learning_rate": 4.877121968616263e-05, + "loss": 0.0075, + "step": 13790 + }, + { + "epoch": 2.46, + "learning_rate": 4.8770328102710415e-05, + "loss": 0.0063, + "step": 13800 + }, + { + "epoch": 2.46, + "learning_rate": 4.8769436519258206e-05, + "loss": 0.0082, + "step": 13810 + }, + { + "epoch": 2.46, + "learning_rate": 4.876854493580599e-05, + "loss": 0.0053, + "step": 13820 + }, + { + "epoch": 2.47, + "learning_rate": 4.876765335235378e-05, + "loss": 0.0085, + "step": 13830 + }, + { + "epoch": 2.47, + "learning_rate": 4.876676176890157e-05, + "loss": 0.0072, + "step": 13840 + }, + { + "epoch": 2.47, + "learning_rate": 4.876587018544936e-05, + "loss": 0.0052, + "step": 13850 + }, + { + "epoch": 2.47, + "learning_rate": 4.876497860199715e-05, + "loss": 0.0093, + "step": 13860 + }, + { + "epoch": 2.47, + "learning_rate": 4.876408701854494e-05, + "loss": 0.0132, + "step": 13870 + }, + { + "epoch": 2.48, + "learning_rate": 4.876319543509273e-05, + "loss": 0.0075, + "step": 13880 + }, + { + "epoch": 2.48, + "learning_rate": 4.8762303851640516e-05, + "loss": 0.0094, + "step": 13890 + }, + { + "epoch": 2.48, + "learning_rate": 4.876141226818831e-05, + "loss": 0.006, + "step": 13900 + }, + { + "epoch": 2.48, + "learning_rate": 4.876052068473609e-05, + "loss": 0.0098, + "step": 13910 + }, + { + "epoch": 2.48, + "learning_rate": 4.875962910128388e-05, + "loss": 0.01, + "step": 13920 + }, + { + "epoch": 2.48, + "learning_rate": 4.8758737517831674e-05, + "loss": 0.0059, + "step": 13930 + }, + { + "epoch": 2.49, + "learning_rate": 4.875784593437946e-05, + "loss": 0.0043, + "step": 13940 + }, + { + "epoch": 2.49, + "learning_rate": 4.875695435092725e-05, + "loss": 0.0104, + "step": 13950 + }, + { + "epoch": 2.49, + "learning_rate": 4.8756062767475034e-05, + "loss": 0.0081, + "step": 13960 + }, + { + "epoch": 2.49, + "learning_rate": 4.8755171184022825e-05, + "loss": 0.0068, + "step": 13970 + }, + { + "epoch": 2.49, + "learning_rate": 4.8754279600570616e-05, + "loss": 0.0077, + "step": 13980 + }, + { + "epoch": 2.49, + "learning_rate": 4.875338801711841e-05, + "loss": 0.0153, + "step": 13990 + }, + { + "epoch": 2.5, + "learning_rate": 4.875249643366619e-05, + "loss": 0.0111, + "step": 14000 + }, + { + "epoch": 2.5, + "learning_rate": 4.875160485021398e-05, + "loss": 0.0071, + "step": 14010 + }, + { + "epoch": 2.5, + "learning_rate": 4.8750713266761774e-05, + "loss": 0.0116, + "step": 14020 + }, + { + "epoch": 2.5, + "learning_rate": 4.874982168330956e-05, + "loss": 0.0092, + "step": 14030 + }, + { + "epoch": 2.5, + "learning_rate": 4.874893009985735e-05, + "loss": 0.0076, + "step": 14040 + }, + { + "epoch": 2.51, + "learning_rate": 4.8748038516405134e-05, + "loss": 0.009, + "step": 14050 + }, + { + "epoch": 2.51, + "learning_rate": 4.8747146932952926e-05, + "loss": 0.0067, + "step": 14060 + }, + { + "epoch": 2.51, + "learning_rate": 4.874625534950072e-05, + "loss": 0.0126, + "step": 14070 + }, + { + "epoch": 2.51, + "learning_rate": 4.87453637660485e-05, + "loss": 0.0079, + "step": 14080 + }, + { + "epoch": 2.51, + "learning_rate": 4.874447218259629e-05, + "loss": 0.0092, + "step": 14090 + }, + { + "epoch": 2.51, + "learning_rate": 4.8743580599144084e-05, + "loss": 0.0086, + "step": 14100 + }, + { + "epoch": 2.52, + "learning_rate": 4.8742689015691875e-05, + "loss": 0.0096, + "step": 14110 + }, + { + "epoch": 2.52, + "learning_rate": 4.874179743223966e-05, + "loss": 0.0099, + "step": 14120 + }, + { + "epoch": 2.52, + "learning_rate": 4.874090584878745e-05, + "loss": 0.0123, + "step": 14130 + }, + { + "epoch": 2.52, + "learning_rate": 4.8740014265335235e-05, + "loss": 0.008, + "step": 14140 + }, + { + "epoch": 2.52, + "learning_rate": 4.8739122681883026e-05, + "loss": 0.012, + "step": 14150 + }, + { + "epoch": 2.52, + "learning_rate": 4.873823109843082e-05, + "loss": 0.0082, + "step": 14160 + }, + { + "epoch": 2.53, + "learning_rate": 4.87373395149786e-05, + "loss": 0.01, + "step": 14170 + }, + { + "epoch": 2.53, + "learning_rate": 4.873644793152639e-05, + "loss": 0.0073, + "step": 14180 + }, + { + "epoch": 2.53, + "learning_rate": 4.873555634807418e-05, + "loss": 0.009, + "step": 14190 + }, + { + "epoch": 2.53, + "learning_rate": 4.8734664764621975e-05, + "loss": 0.0101, + "step": 14200 + }, + { + "epoch": 2.53, + "learning_rate": 4.873377318116976e-05, + "loss": 0.0101, + "step": 14210 + }, + { + "epoch": 2.54, + "learning_rate": 4.873288159771755e-05, + "loss": 0.0095, + "step": 14220 + }, + { + "epoch": 2.54, + "learning_rate": 4.8731990014265335e-05, + "loss": 0.0085, + "step": 14230 + }, + { + "epoch": 2.54, + "learning_rate": 4.873109843081313e-05, + "loss": 0.0075, + "step": 14240 + }, + { + "epoch": 2.54, + "learning_rate": 4.873020684736092e-05, + "loss": 0.0066, + "step": 14250 + }, + { + "epoch": 2.54, + "learning_rate": 4.87293152639087e-05, + "loss": 0.009, + "step": 14260 + }, + { + "epoch": 2.54, + "learning_rate": 4.8728423680456494e-05, + "loss": 0.0097, + "step": 14270 + }, + { + "epoch": 2.55, + "learning_rate": 4.872753209700428e-05, + "loss": 0.007, + "step": 14280 + }, + { + "epoch": 2.55, + "learning_rate": 4.872664051355207e-05, + "loss": 0.0062, + "step": 14290 + }, + { + "epoch": 2.55, + "learning_rate": 4.872574893009986e-05, + "loss": 0.0093, + "step": 14300 + }, + { + "epoch": 2.55, + "learning_rate": 4.872485734664765e-05, + "loss": 0.0065, + "step": 14310 + }, + { + "epoch": 2.55, + "learning_rate": 4.8723965763195436e-05, + "loss": 0.0089, + "step": 14320 + }, + { + "epoch": 2.56, + "learning_rate": 4.872307417974323e-05, + "loss": 0.0099, + "step": 14330 + }, + { + "epoch": 2.56, + "learning_rate": 4.872218259629102e-05, + "loss": 0.0056, + "step": 14340 + }, + { + "epoch": 2.56, + "learning_rate": 4.87212910128388e-05, + "loss": 0.0076, + "step": 14350 + }, + { + "epoch": 2.56, + "learning_rate": 4.8720399429386594e-05, + "loss": 0.0072, + "step": 14360 + }, + { + "epoch": 2.56, + "learning_rate": 4.871950784593438e-05, + "loss": 0.0077, + "step": 14370 + }, + { + "epoch": 2.56, + "learning_rate": 4.871861626248217e-05, + "loss": 0.007, + "step": 14380 + }, + { + "epoch": 2.57, + "learning_rate": 4.871772467902996e-05, + "loss": 0.0076, + "step": 14390 + }, + { + "epoch": 2.57, + "learning_rate": 4.8716833095577745e-05, + "loss": 0.0102, + "step": 14400 + }, + { + "epoch": 2.57, + "learning_rate": 4.8715941512125537e-05, + "loss": 0.0095, + "step": 14410 + }, + { + "epoch": 2.57, + "learning_rate": 4.871504992867333e-05, + "loss": 0.0114, + "step": 14420 + }, + { + "epoch": 2.57, + "learning_rate": 4.871415834522112e-05, + "loss": 0.0052, + "step": 14430 + }, + { + "epoch": 2.57, + "learning_rate": 4.8713266761768903e-05, + "loss": 0.0102, + "step": 14440 + }, + { + "epoch": 2.58, + "learning_rate": 4.8712375178316695e-05, + "loss": 0.0087, + "step": 14450 + }, + { + "epoch": 2.58, + "learning_rate": 4.871148359486448e-05, + "loss": 0.008, + "step": 14460 + }, + { + "epoch": 2.58, + "learning_rate": 4.871059201141227e-05, + "loss": 0.0096, + "step": 14470 + }, + { + "epoch": 2.58, + "learning_rate": 4.870970042796006e-05, + "loss": 0.0068, + "step": 14480 + }, + { + "epoch": 2.58, + "learning_rate": 4.870889800285307e-05, + "loss": 0.0066, + "step": 14490 + }, + { + "epoch": 2.59, + "learning_rate": 4.870800641940086e-05, + "loss": 0.0076, + "step": 14500 + }, + { + "epoch": 2.59, + "learning_rate": 4.8707114835948645e-05, + "loss": 0.009, + "step": 14510 + }, + { + "epoch": 2.59, + "learning_rate": 4.8706223252496436e-05, + "loss": 0.0099, + "step": 14520 + }, + { + "epoch": 2.59, + "learning_rate": 4.870533166904422e-05, + "loss": 0.0061, + "step": 14530 + }, + { + "epoch": 2.59, + "learning_rate": 4.870444008559201e-05, + "loss": 0.0089, + "step": 14540 + }, + { + "epoch": 2.59, + "learning_rate": 4.87035485021398e-05, + "loss": 0.0111, + "step": 14550 + }, + { + "epoch": 2.6, + "learning_rate": 4.870265691868759e-05, + "loss": 0.0073, + "step": 14560 + }, + { + "epoch": 2.6, + "learning_rate": 4.8701765335235386e-05, + "loss": 0.0067, + "step": 14570 + }, + { + "epoch": 2.6, + "learning_rate": 4.870087375178317e-05, + "loss": 0.0076, + "step": 14580 + }, + { + "epoch": 2.6, + "learning_rate": 4.869998216833096e-05, + "loss": 0.0075, + "step": 14590 + }, + { + "epoch": 2.6, + "learning_rate": 4.8699090584878746e-05, + "loss": 0.008, + "step": 14600 + }, + { + "epoch": 2.61, + "learning_rate": 4.869819900142654e-05, + "loss": 0.0092, + "step": 14610 + }, + { + "epoch": 2.61, + "learning_rate": 4.869730741797433e-05, + "loss": 0.0098, + "step": 14620 + }, + { + "epoch": 2.61, + "learning_rate": 4.869641583452211e-05, + "loss": 0.0104, + "step": 14630 + }, + { + "epoch": 2.61, + "learning_rate": 4.8695524251069904e-05, + "loss": 0.0072, + "step": 14640 + }, + { + "epoch": 2.61, + "learning_rate": 4.869463266761769e-05, + "loss": 0.0105, + "step": 14650 + }, + { + "epoch": 2.61, + "learning_rate": 4.869374108416548e-05, + "loss": 0.0081, + "step": 14660 + }, + { + "epoch": 2.62, + "learning_rate": 4.8692849500713264e-05, + "loss": 0.0101, + "step": 14670 + }, + { + "epoch": 2.62, + "learning_rate": 4.869195791726106e-05, + "loss": 0.0092, + "step": 14680 + }, + { + "epoch": 2.62, + "learning_rate": 4.8691066333808846e-05, + "loss": 0.0072, + "step": 14690 + }, + { + "epoch": 2.62, + "learning_rate": 4.869017475035664e-05, + "loss": 0.0075, + "step": 14700 + }, + { + "epoch": 2.62, + "learning_rate": 4.868928316690443e-05, + "loss": 0.0093, + "step": 14710 + }, + { + "epoch": 2.62, + "learning_rate": 4.868839158345221e-05, + "loss": 0.0062, + "step": 14720 + }, + { + "epoch": 2.63, + "learning_rate": 4.8687500000000004e-05, + "loss": 0.0121, + "step": 14730 + }, + { + "epoch": 2.63, + "learning_rate": 4.868660841654779e-05, + "loss": 0.0054, + "step": 14740 + }, + { + "epoch": 2.63, + "learning_rate": 4.868571683309558e-05, + "loss": 0.0071, + "step": 14750 + }, + { + "epoch": 2.63, + "learning_rate": 4.8684825249643364e-05, + "loss": 0.0084, + "step": 14760 + }, + { + "epoch": 2.63, + "learning_rate": 4.8683933666191156e-05, + "loss": 0.0112, + "step": 14770 + }, + { + "epoch": 2.64, + "learning_rate": 4.868304208273895e-05, + "loss": 0.008, + "step": 14780 + }, + { + "epoch": 2.64, + "learning_rate": 4.868215049928674e-05, + "loss": 0.008, + "step": 14790 + }, + { + "epoch": 2.64, + "learning_rate": 4.868125891583453e-05, + "loss": 0.0057, + "step": 14800 + }, + { + "epoch": 2.64, + "learning_rate": 4.8680367332382314e-05, + "loss": 0.0107, + "step": 14810 + }, + { + "epoch": 2.64, + "learning_rate": 4.8679475748930105e-05, + "loss": 0.0107, + "step": 14820 + }, + { + "epoch": 2.64, + "learning_rate": 4.867858416547789e-05, + "loss": 0.0061, + "step": 14830 + }, + { + "epoch": 2.65, + "learning_rate": 4.867769258202568e-05, + "loss": 0.0079, + "step": 14840 + }, + { + "epoch": 2.65, + "learning_rate": 4.867680099857347e-05, + "loss": 0.0072, + "step": 14850 + }, + { + "epoch": 2.65, + "learning_rate": 4.8675909415121256e-05, + "loss": 0.0098, + "step": 14860 + }, + { + "epoch": 2.65, + "learning_rate": 4.867501783166905e-05, + "loss": 0.0063, + "step": 14870 + }, + { + "epoch": 2.65, + "learning_rate": 4.867412624821683e-05, + "loss": 0.008, + "step": 14880 + }, + { + "epoch": 2.66, + "learning_rate": 4.867323466476462e-05, + "loss": 0.0118, + "step": 14890 + }, + { + "epoch": 2.66, + "learning_rate": 4.8672343081312414e-05, + "loss": 0.0062, + "step": 14900 + }, + { + "epoch": 2.66, + "learning_rate": 4.8671451497860205e-05, + "loss": 0.0094, + "step": 14910 + }, + { + "epoch": 2.66, + "learning_rate": 4.867055991440799e-05, + "loss": 0.0066, + "step": 14920 + }, + { + "epoch": 2.66, + "learning_rate": 4.866966833095578e-05, + "loss": 0.008, + "step": 14930 + }, + { + "epoch": 2.66, + "learning_rate": 4.866877674750357e-05, + "loss": 0.0068, + "step": 14940 + }, + { + "epoch": 2.67, + "learning_rate": 4.866788516405136e-05, + "loss": 0.009, + "step": 14950 + }, + { + "epoch": 2.67, + "learning_rate": 4.866699358059915e-05, + "loss": 0.0099, + "step": 14960 + }, + { + "epoch": 2.67, + "learning_rate": 4.866610199714693e-05, + "loss": 0.0092, + "step": 14970 + }, + { + "epoch": 2.67, + "learning_rate": 4.8665210413694724e-05, + "loss": 0.0103, + "step": 14980 + }, + { + "epoch": 2.67, + "learning_rate": 4.866431883024251e-05, + "loss": 0.0108, + "step": 14990 + }, + { + "epoch": 2.67, + "learning_rate": 4.86634272467903e-05, + "loss": 0.0096, + "step": 15000 + }, + { + "epoch": 2.68, + "learning_rate": 4.866253566333809e-05, + "loss": 0.0073, + "step": 15010 + }, + { + "epoch": 2.68, + "learning_rate": 4.866164407988588e-05, + "loss": 0.0046, + "step": 15020 + }, + { + "epoch": 2.68, + "learning_rate": 4.866075249643367e-05, + "loss": 0.0063, + "step": 15030 + }, + { + "epoch": 2.68, + "learning_rate": 4.865986091298146e-05, + "loss": 0.0089, + "step": 15040 + }, + { + "epoch": 2.68, + "learning_rate": 4.865896932952925e-05, + "loss": 0.0054, + "step": 15050 + }, + { + "epoch": 2.69, + "learning_rate": 4.865807774607703e-05, + "loss": 0.0056, + "step": 15060 + }, + { + "epoch": 2.69, + "learning_rate": 4.8657186162624824e-05, + "loss": 0.0073, + "step": 15070 + }, + { + "epoch": 2.69, + "learning_rate": 4.8656294579172615e-05, + "loss": 0.0088, + "step": 15080 + }, + { + "epoch": 2.69, + "learning_rate": 4.86554029957204e-05, + "loss": 0.0092, + "step": 15090 + }, + { + "epoch": 2.69, + "learning_rate": 4.865451141226819e-05, + "loss": 0.0056, + "step": 15100 + }, + { + "epoch": 2.69, + "learning_rate": 4.8653619828815975e-05, + "loss": 0.0089, + "step": 15110 + }, + { + "epoch": 2.7, + "learning_rate": 4.865272824536377e-05, + "loss": 0.0069, + "step": 15120 + }, + { + "epoch": 2.7, + "learning_rate": 4.865183666191156e-05, + "loss": 0.0086, + "step": 15130 + }, + { + "epoch": 2.7, + "learning_rate": 4.865094507845935e-05, + "loss": 0.0085, + "step": 15140 + }, + { + "epoch": 2.7, + "learning_rate": 4.8650053495007134e-05, + "loss": 0.0097, + "step": 15150 + }, + { + "epoch": 2.7, + "learning_rate": 4.8649161911554925e-05, + "loss": 0.0068, + "step": 15160 + }, + { + "epoch": 2.71, + "learning_rate": 4.8648270328102716e-05, + "loss": 0.0066, + "step": 15170 + }, + { + "epoch": 2.71, + "learning_rate": 4.86473787446505e-05, + "loss": 0.0065, + "step": 15180 + }, + { + "epoch": 2.71, + "learning_rate": 4.864648716119829e-05, + "loss": 0.0074, + "step": 15190 + }, + { + "epoch": 2.71, + "learning_rate": 4.8645595577746076e-05, + "loss": 0.0058, + "step": 15200 + }, + { + "epoch": 2.71, + "learning_rate": 4.864470399429387e-05, + "loss": 0.0075, + "step": 15210 + }, + { + "epoch": 2.71, + "learning_rate": 4.864381241084165e-05, + "loss": 0.0085, + "step": 15220 + }, + { + "epoch": 2.72, + "learning_rate": 4.864292082738944e-05, + "loss": 0.0093, + "step": 15230 + }, + { + "epoch": 2.72, + "learning_rate": 4.8642029243937234e-05, + "loss": 0.0092, + "step": 15240 + }, + { + "epoch": 2.72, + "learning_rate": 4.8641137660485025e-05, + "loss": 0.0073, + "step": 15250 + }, + { + "epoch": 2.72, + "learning_rate": 4.8640246077032817e-05, + "loss": 0.0075, + "step": 15260 + }, + { + "epoch": 2.72, + "learning_rate": 4.86393544935806e-05, + "loss": 0.0098, + "step": 15270 + }, + { + "epoch": 2.72, + "learning_rate": 4.863846291012839e-05, + "loss": 0.0116, + "step": 15280 + }, + { + "epoch": 2.73, + "learning_rate": 4.8637571326676177e-05, + "loss": 0.0084, + "step": 15290 + }, + { + "epoch": 2.73, + "learning_rate": 4.863667974322397e-05, + "loss": 0.0072, + "step": 15300 + }, + { + "epoch": 2.73, + "learning_rate": 4.863578815977176e-05, + "loss": 0.0071, + "step": 15310 + }, + { + "epoch": 2.73, + "learning_rate": 4.8634896576319543e-05, + "loss": 0.0108, + "step": 15320 + }, + { + "epoch": 2.73, + "learning_rate": 4.8634004992867335e-05, + "loss": 0.0064, + "step": 15330 + }, + { + "epoch": 2.74, + "learning_rate": 4.863311340941512e-05, + "loss": 0.0087, + "step": 15340 + }, + { + "epoch": 2.74, + "learning_rate": 4.863222182596292e-05, + "loss": 0.0054, + "step": 15350 + }, + { + "epoch": 2.74, + "learning_rate": 4.86313302425107e-05, + "loss": 0.009, + "step": 15360 + }, + { + "epoch": 2.74, + "learning_rate": 4.863043865905849e-05, + "loss": 0.0056, + "step": 15370 + }, + { + "epoch": 2.74, + "learning_rate": 4.862954707560628e-05, + "loss": 0.0088, + "step": 15380 + }, + { + "epoch": 2.74, + "learning_rate": 4.862865549215407e-05, + "loss": 0.0066, + "step": 15390 + }, + { + "epoch": 2.75, + "learning_rate": 4.862776390870186e-05, + "loss": 0.0075, + "step": 15400 + }, + { + "epoch": 2.75, + "learning_rate": 4.8626872325249644e-05, + "loss": 0.0103, + "step": 15410 + }, + { + "epoch": 2.75, + "learning_rate": 4.8625980741797435e-05, + "loss": 0.0069, + "step": 15420 + }, + { + "epoch": 2.75, + "learning_rate": 4.862508915834522e-05, + "loss": 0.0071, + "step": 15430 + }, + { + "epoch": 2.75, + "learning_rate": 4.862419757489301e-05, + "loss": 0.0077, + "step": 15440 + }, + { + "epoch": 2.75, + "learning_rate": 4.8623305991440795e-05, + "loss": 0.0098, + "step": 15450 + }, + { + "epoch": 2.76, + "learning_rate": 4.862241440798859e-05, + "loss": 0.0084, + "step": 15460 + }, + { + "epoch": 2.76, + "learning_rate": 4.862152282453638e-05, + "loss": 0.0103, + "step": 15470 + }, + { + "epoch": 2.76, + "learning_rate": 4.862063124108417e-05, + "loss": 0.0059, + "step": 15480 + }, + { + "epoch": 2.76, + "learning_rate": 4.861973965763196e-05, + "loss": 0.0057, + "step": 15490 + }, + { + "epoch": 2.76, + "learning_rate": 4.8618848074179745e-05, + "loss": 0.0093, + "step": 15500 + }, + { + "epoch": 2.77, + "learning_rate": 4.8617956490727536e-05, + "loss": 0.0076, + "step": 15510 + }, + { + "epoch": 2.77, + "learning_rate": 4.861706490727532e-05, + "loss": 0.0099, + "step": 15520 + }, + { + "epoch": 2.77, + "learning_rate": 4.861617332382311e-05, + "loss": 0.0069, + "step": 15530 + }, + { + "epoch": 2.77, + "learning_rate": 4.86152817403709e-05, + "loss": 0.0042, + "step": 15540 + }, + { + "epoch": 2.77, + "learning_rate": 4.861439015691869e-05, + "loss": 0.0066, + "step": 15550 + }, + { + "epoch": 2.77, + "learning_rate": 4.861349857346648e-05, + "loss": 0.0094, + "step": 15560 + }, + { + "epoch": 2.78, + "learning_rate": 4.861260699001427e-05, + "loss": 0.0058, + "step": 15570 + }, + { + "epoch": 2.78, + "learning_rate": 4.861171540656206e-05, + "loss": 0.0083, + "step": 15580 + }, + { + "epoch": 2.78, + "learning_rate": 4.8610823823109845e-05, + "loss": 0.0098, + "step": 15590 + }, + { + "epoch": 2.78, + "learning_rate": 4.8609932239657636e-05, + "loss": 0.0071, + "step": 15600 + }, + { + "epoch": 2.78, + "learning_rate": 4.860904065620542e-05, + "loss": 0.0091, + "step": 15610 + }, + { + "epoch": 2.79, + "learning_rate": 4.860814907275321e-05, + "loss": 0.0083, + "step": 15620 + }, + { + "epoch": 2.79, + "learning_rate": 4.8607257489301e-05, + "loss": 0.0079, + "step": 15630 + }, + { + "epoch": 2.79, + "learning_rate": 4.860636590584879e-05, + "loss": 0.0083, + "step": 15640 + }, + { + "epoch": 2.79, + "learning_rate": 4.860547432239658e-05, + "loss": 0.0065, + "step": 15650 + }, + { + "epoch": 2.79, + "learning_rate": 4.860458273894436e-05, + "loss": 0.0098, + "step": 15660 + }, + { + "epoch": 2.79, + "learning_rate": 4.8603691155492154e-05, + "loss": 0.0095, + "step": 15670 + }, + { + "epoch": 2.8, + "learning_rate": 4.8602799572039946e-05, + "loss": 0.0063, + "step": 15680 + }, + { + "epoch": 2.8, + "learning_rate": 4.860190798858774e-05, + "loss": 0.0049, + "step": 15690 + }, + { + "epoch": 2.8, + "learning_rate": 4.860101640513552e-05, + "loss": 0.0073, + "step": 15700 + }, + { + "epoch": 2.8, + "learning_rate": 4.860012482168331e-05, + "loss": 0.0085, + "step": 15710 + }, + { + "epoch": 2.8, + "learning_rate": 4.8599233238231104e-05, + "loss": 0.0066, + "step": 15720 + }, + { + "epoch": 2.8, + "learning_rate": 4.859834165477889e-05, + "loss": 0.0098, + "step": 15730 + }, + { + "epoch": 2.81, + "learning_rate": 4.859745007132668e-05, + "loss": 0.0059, + "step": 15740 + }, + { + "epoch": 2.81, + "learning_rate": 4.8596558487874464e-05, + "loss": 0.0087, + "step": 15750 + }, + { + "epoch": 2.81, + "learning_rate": 4.8595666904422255e-05, + "loss": 0.0063, + "step": 15760 + }, + { + "epoch": 2.81, + "learning_rate": 4.8594775320970046e-05, + "loss": 0.0095, + "step": 15770 + }, + { + "epoch": 2.81, + "learning_rate": 4.859388373751783e-05, + "loss": 0.0095, + "step": 15780 + }, + { + "epoch": 2.82, + "learning_rate": 4.859299215406562e-05, + "loss": 0.0122, + "step": 15790 + }, + { + "epoch": 2.82, + "learning_rate": 4.859210057061341e-05, + "loss": 0.0107, + "step": 15800 + }, + { + "epoch": 2.82, + "learning_rate": 4.8591208987161204e-05, + "loss": 0.0066, + "step": 15810 + }, + { + "epoch": 2.82, + "learning_rate": 4.859031740370899e-05, + "loss": 0.0107, + "step": 15820 + }, + { + "epoch": 2.82, + "learning_rate": 4.858942582025678e-05, + "loss": 0.0071, + "step": 15830 + }, + { + "epoch": 2.82, + "learning_rate": 4.8588534236804564e-05, + "loss": 0.0088, + "step": 15840 + }, + { + "epoch": 2.83, + "learning_rate": 4.8587642653352356e-05, + "loss": 0.0109, + "step": 15850 + }, + { + "epoch": 2.83, + "learning_rate": 4.858675106990015e-05, + "loss": 0.0085, + "step": 15860 + }, + { + "epoch": 2.83, + "learning_rate": 4.858585948644793e-05, + "loss": 0.0091, + "step": 15870 + }, + { + "epoch": 2.83, + "learning_rate": 4.858496790299572e-05, + "loss": 0.0061, + "step": 15880 + }, + { + "epoch": 2.83, + "learning_rate": 4.858407631954351e-05, + "loss": 0.0118, + "step": 15890 + }, + { + "epoch": 2.84, + "learning_rate": 4.8583184736091305e-05, + "loss": 0.0057, + "step": 15900 + }, + { + "epoch": 2.84, + "learning_rate": 4.858229315263909e-05, + "loss": 0.0076, + "step": 15910 + }, + { + "epoch": 2.84, + "learning_rate": 4.858140156918688e-05, + "loss": 0.0086, + "step": 15920 + }, + { + "epoch": 2.84, + "learning_rate": 4.8580509985734665e-05, + "loss": 0.0063, + "step": 15930 + }, + { + "epoch": 2.84, + "learning_rate": 4.8579618402282456e-05, + "loss": 0.0057, + "step": 15940 + }, + { + "epoch": 2.84, + "learning_rate": 4.857872681883025e-05, + "loss": 0.0086, + "step": 15950 + }, + { + "epoch": 2.85, + "learning_rate": 4.857783523537803e-05, + "loss": 0.0064, + "step": 15960 + }, + { + "epoch": 2.85, + "learning_rate": 4.857694365192582e-05, + "loss": 0.0079, + "step": 15970 + }, + { + "epoch": 2.85, + "learning_rate": 4.857605206847361e-05, + "loss": 0.0084, + "step": 15980 + }, + { + "epoch": 2.85, + "learning_rate": 4.85751604850214e-05, + "loss": 0.0083, + "step": 15990 + }, + { + "epoch": 2.85, + "learning_rate": 4.857426890156919e-05, + "loss": 0.0066, + "step": 16000 + }, + { + "epoch": 2.85, + "learning_rate": 4.857337731811698e-05, + "loss": 0.0083, + "step": 16010 + }, + { + "epoch": 2.86, + "learning_rate": 4.8572485734664766e-05, + "loss": 0.0124, + "step": 16020 + }, + { + "epoch": 2.86, + "learning_rate": 4.857159415121256e-05, + "loss": 0.0119, + "step": 16030 + }, + { + "epoch": 2.86, + "learning_rate": 4.857070256776035e-05, + "loss": 0.0074, + "step": 16040 + }, + { + "epoch": 2.86, + "learning_rate": 4.856981098430813e-05, + "loss": 0.0077, + "step": 16050 + }, + { + "epoch": 2.86, + "learning_rate": 4.8568919400855924e-05, + "loss": 0.007, + "step": 16060 + }, + { + "epoch": 2.87, + "learning_rate": 4.856802781740371e-05, + "loss": 0.0093, + "step": 16070 + }, + { + "epoch": 2.87, + "learning_rate": 4.85671362339515e-05, + "loss": 0.0047, + "step": 16080 + }, + { + "epoch": 2.87, + "learning_rate": 4.856624465049929e-05, + "loss": 0.0085, + "step": 16090 + }, + { + "epoch": 2.87, + "learning_rate": 4.8565353067047075e-05, + "loss": 0.0071, + "step": 16100 + }, + { + "epoch": 2.87, + "learning_rate": 4.8564461483594866e-05, + "loss": 0.009, + "step": 16110 + }, + { + "epoch": 2.87, + "learning_rate": 4.856356990014266e-05, + "loss": 0.0092, + "step": 16120 + }, + { + "epoch": 2.88, + "learning_rate": 4.856267831669045e-05, + "loss": 0.0145, + "step": 16130 + }, + { + "epoch": 2.88, + "learning_rate": 4.856178673323823e-05, + "loss": 0.0064, + "step": 16140 + }, + { + "epoch": 2.88, + "learning_rate": 4.8560895149786024e-05, + "loss": 0.0075, + "step": 16150 + }, + { + "epoch": 2.88, + "learning_rate": 4.856000356633381e-05, + "loss": 0.0098, + "step": 16160 + }, + { + "epoch": 2.88, + "learning_rate": 4.85591119828816e-05, + "loss": 0.0089, + "step": 16170 + }, + { + "epoch": 2.89, + "learning_rate": 4.855822039942939e-05, + "loss": 0.0087, + "step": 16180 + }, + { + "epoch": 2.89, + "learning_rate": 4.8557328815977175e-05, + "loss": 0.0083, + "step": 16190 + }, + { + "epoch": 2.89, + "learning_rate": 4.855643723252497e-05, + "loss": 0.0075, + "step": 16200 + }, + { + "epoch": 2.89, + "learning_rate": 4.855554564907275e-05, + "loss": 0.006, + "step": 16210 + }, + { + "epoch": 2.89, + "learning_rate": 4.855465406562054e-05, + "loss": 0.0053, + "step": 16220 + }, + { + "epoch": 2.89, + "learning_rate": 4.8553762482168334e-05, + "loss": 0.007, + "step": 16230 + }, + { + "epoch": 2.9, + "learning_rate": 4.8552870898716125e-05, + "loss": 0.0079, + "step": 16240 + }, + { + "epoch": 2.9, + "learning_rate": 4.855197931526391e-05, + "loss": 0.0065, + "step": 16250 + }, + { + "epoch": 2.9, + "learning_rate": 4.85510877318117e-05, + "loss": 0.0066, + "step": 16260 + }, + { + "epoch": 2.9, + "learning_rate": 4.855019614835949e-05, + "loss": 0.008, + "step": 16270 + }, + { + "epoch": 2.9, + "learning_rate": 4.8549304564907276e-05, + "loss": 0.0087, + "step": 16280 + }, + { + "epoch": 2.9, + "learning_rate": 4.854841298145507e-05, + "loss": 0.0103, + "step": 16290 + }, + { + "epoch": 2.91, + "learning_rate": 4.854752139800285e-05, + "loss": 0.0063, + "step": 16300 + }, + { + "epoch": 2.91, + "learning_rate": 4.854662981455064e-05, + "loss": 0.0064, + "step": 16310 + }, + { + "epoch": 2.91, + "learning_rate": 4.8545738231098434e-05, + "loss": 0.0073, + "step": 16320 + }, + { + "epoch": 2.91, + "learning_rate": 4.854484664764622e-05, + "loss": 0.0106, + "step": 16330 + }, + { + "epoch": 2.91, + "learning_rate": 4.8543955064194016e-05, + "loss": 0.0071, + "step": 16340 + }, + { + "epoch": 2.92, + "learning_rate": 4.85430634807418e-05, + "loss": 0.0107, + "step": 16350 + }, + { + "epoch": 2.92, + "learning_rate": 4.854217189728959e-05, + "loss": 0.006, + "step": 16360 + }, + { + "epoch": 2.92, + "learning_rate": 4.8541280313837377e-05, + "loss": 0.0145, + "step": 16370 + }, + { + "epoch": 2.92, + "learning_rate": 4.854038873038517e-05, + "loss": 0.0067, + "step": 16380 + }, + { + "epoch": 2.92, + "learning_rate": 4.853949714693295e-05, + "loss": 0.0073, + "step": 16390 + }, + { + "epoch": 2.92, + "learning_rate": 4.8538605563480743e-05, + "loss": 0.0068, + "step": 16400 + }, + { + "epoch": 2.93, + "learning_rate": 4.8537713980028535e-05, + "loss": 0.0052, + "step": 16410 + }, + { + "epoch": 2.93, + "learning_rate": 4.853682239657632e-05, + "loss": 0.0071, + "step": 16420 + }, + { + "epoch": 2.93, + "learning_rate": 4.853593081312411e-05, + "loss": 0.0081, + "step": 16430 + }, + { + "epoch": 2.93, + "learning_rate": 4.8535039229671895e-05, + "loss": 0.0107, + "step": 16440 + }, + { + "epoch": 2.93, + "learning_rate": 4.853414764621969e-05, + "loss": 0.0068, + "step": 16450 + }, + { + "epoch": 2.94, + "learning_rate": 4.853325606276748e-05, + "loss": 0.0041, + "step": 16460 + }, + { + "epoch": 2.94, + "learning_rate": 4.853236447931527e-05, + "loss": 0.0069, + "step": 16470 + }, + { + "epoch": 2.94, + "learning_rate": 4.853147289586305e-05, + "loss": 0.014, + "step": 16480 + }, + { + "epoch": 2.94, + "learning_rate": 4.8530581312410844e-05, + "loss": 0.0089, + "step": 16490 + }, + { + "epoch": 2.94, + "learning_rate": 4.8529689728958635e-05, + "loss": 0.01, + "step": 16500 + }, + { + "epoch": 2.94, + "learning_rate": 4.852879814550642e-05, + "loss": 0.0079, + "step": 16510 + }, + { + "epoch": 2.95, + "learning_rate": 4.852790656205421e-05, + "loss": 0.0102, + "step": 16520 + }, + { + "epoch": 2.95, + "learning_rate": 4.8527014978601995e-05, + "loss": 0.007, + "step": 16530 + }, + { + "epoch": 2.95, + "learning_rate": 4.8526123395149786e-05, + "loss": 0.0059, + "step": 16540 + }, + { + "epoch": 2.95, + "learning_rate": 4.852523181169758e-05, + "loss": 0.0069, + "step": 16550 + }, + { + "epoch": 2.95, + "learning_rate": 4.852434022824537e-05, + "loss": 0.0073, + "step": 16560 + }, + { + "epoch": 2.95, + "learning_rate": 4.852344864479316e-05, + "loss": 0.0083, + "step": 16570 + }, + { + "epoch": 2.96, + "learning_rate": 4.8522557061340945e-05, + "loss": 0.0086, + "step": 16580 + }, + { + "epoch": 2.96, + "learning_rate": 4.8521665477888736e-05, + "loss": 0.0094, + "step": 16590 + }, + { + "epoch": 2.96, + "learning_rate": 4.852077389443652e-05, + "loss": 0.0069, + "step": 16600 + }, + { + "epoch": 2.96, + "learning_rate": 4.851988231098431e-05, + "loss": 0.0091, + "step": 16610 + }, + { + "epoch": 2.96, + "learning_rate": 4.8518990727532096e-05, + "loss": 0.0151, + "step": 16620 + }, + { + "epoch": 2.97, + "learning_rate": 4.851809914407989e-05, + "loss": 0.008, + "step": 16630 + }, + { + "epoch": 2.97, + "learning_rate": 4.851720756062768e-05, + "loss": 0.006, + "step": 16640 + }, + { + "epoch": 2.97, + "learning_rate": 4.851631597717546e-05, + "loss": 0.0068, + "step": 16650 + }, + { + "epoch": 2.97, + "learning_rate": 4.8515424393723254e-05, + "loss": 0.0036, + "step": 16660 + }, + { + "epoch": 2.97, + "learning_rate": 4.8514532810271045e-05, + "loss": 0.0068, + "step": 16670 + }, + { + "epoch": 2.97, + "learning_rate": 4.8513641226818836e-05, + "loss": 0.0083, + "step": 16680 + }, + { + "epoch": 2.98, + "learning_rate": 4.851274964336662e-05, + "loss": 0.0074, + "step": 16690 + }, + { + "epoch": 2.98, + "learning_rate": 4.851185805991441e-05, + "loss": 0.0091, + "step": 16700 + }, + { + "epoch": 2.98, + "learning_rate": 4.8510966476462196e-05, + "loss": 0.0096, + "step": 16710 + }, + { + "epoch": 2.98, + "learning_rate": 4.851007489300999e-05, + "loss": 0.0084, + "step": 16720 + }, + { + "epoch": 2.98, + "learning_rate": 4.850918330955778e-05, + "loss": 0.0111, + "step": 16730 + }, + { + "epoch": 2.99, + "learning_rate": 4.850829172610556e-05, + "loss": 0.0121, + "step": 16740 + }, + { + "epoch": 2.99, + "learning_rate": 4.8507400142653354e-05, + "loss": 0.0082, + "step": 16750 + }, + { + "epoch": 2.99, + "learning_rate": 4.850650855920114e-05, + "loss": 0.0093, + "step": 16760 + }, + { + "epoch": 2.99, + "learning_rate": 4.850561697574893e-05, + "loss": 0.0081, + "step": 16770 + }, + { + "epoch": 2.99, + "learning_rate": 4.850472539229672e-05, + "loss": 0.0104, + "step": 16780 + }, + { + "epoch": 2.99, + "learning_rate": 4.850383380884451e-05, + "loss": 0.0065, + "step": 16790 + }, + { + "epoch": 3.0, + "learning_rate": 4.8502942225392304e-05, + "loss": 0.0086, + "step": 16800 + }, + { + "epoch": 3.0, + "learning_rate": 4.850205064194009e-05, + "loss": 0.0045, + "step": 16810 + }, + { + "epoch": 3.0, + "learning_rate": 4.850115905848788e-05, + "loss": 0.0074, + "step": 16820 + }, + { + "epoch": 3.0, + "eval_loss": 0.016142597422003746, + "eval_runtime": 195.6313, + "eval_samples_per_second": 23.713, + "eval_steps_per_second": 2.965, + "step": 16824 + }, + { + "epoch": 3.0, + "learning_rate": 4.8500267475035664e-05, + "loss": 0.0086, + "step": 16830 + }, + { + "epoch": 3.0, + "learning_rate": 4.8499375891583455e-05, + "loss": 0.0051, + "step": 16840 + }, + { + "epoch": 3.0, + "learning_rate": 4.849848430813124e-05, + "loss": 0.0065, + "step": 16850 + }, + { + "epoch": 3.01, + "learning_rate": 4.849759272467903e-05, + "loss": 0.0073, + "step": 16860 + }, + { + "epoch": 3.01, + "learning_rate": 4.849670114122682e-05, + "loss": 0.0099, + "step": 16870 + }, + { + "epoch": 3.01, + "learning_rate": 4.8495809557774606e-05, + "loss": 0.0062, + "step": 16880 + }, + { + "epoch": 3.01, + "learning_rate": 4.8494917974322404e-05, + "loss": 0.0057, + "step": 16890 + }, + { + "epoch": 3.01, + "learning_rate": 4.849402639087019e-05, + "loss": 0.0092, + "step": 16900 + }, + { + "epoch": 3.02, + "learning_rate": 4.849313480741798e-05, + "loss": 0.0087, + "step": 16910 + }, + { + "epoch": 3.02, + "learning_rate": 4.8492243223965764e-05, + "loss": 0.006, + "step": 16920 + }, + { + "epoch": 3.02, + "learning_rate": 4.8491351640513556e-05, + "loss": 0.0105, + "step": 16930 + }, + { + "epoch": 3.02, + "learning_rate": 4.849046005706134e-05, + "loss": 0.0067, + "step": 16940 + }, + { + "epoch": 3.02, + "learning_rate": 4.848956847360913e-05, + "loss": 0.0085, + "step": 16950 + }, + { + "epoch": 3.02, + "learning_rate": 4.848867689015692e-05, + "loss": 0.008, + "step": 16960 + }, + { + "epoch": 3.03, + "learning_rate": 4.848778530670471e-05, + "loss": 0.006, + "step": 16970 + }, + { + "epoch": 3.03, + "learning_rate": 4.84868937232525e-05, + "loss": 0.0069, + "step": 16980 + }, + { + "epoch": 3.03, + "learning_rate": 4.848600213980028e-05, + "loss": 0.0092, + "step": 16990 + }, + { + "epoch": 3.03, + "learning_rate": 4.848511055634808e-05, + "loss": 0.0079, + "step": 17000 + }, + { + "epoch": 3.03, + "learning_rate": 4.8484218972895865e-05, + "loss": 0.0079, + "step": 17010 + }, + { + "epoch": 3.03, + "learning_rate": 4.8483327389443656e-05, + "loss": 0.0062, + "step": 17020 + }, + { + "epoch": 3.04, + "learning_rate": 4.848243580599145e-05, + "loss": 0.0094, + "step": 17030 + }, + { + "epoch": 3.04, + "learning_rate": 4.848154422253923e-05, + "loss": 0.0095, + "step": 17040 + }, + { + "epoch": 3.04, + "learning_rate": 4.848065263908702e-05, + "loss": 0.0052, + "step": 17050 + }, + { + "epoch": 3.04, + "learning_rate": 4.847976105563481e-05, + "loss": 0.0083, + "step": 17060 + }, + { + "epoch": 3.04, + "learning_rate": 4.84788694721826e-05, + "loss": 0.0057, + "step": 17070 + }, + { + "epoch": 3.05, + "learning_rate": 4.847797788873038e-05, + "loss": 0.0127, + "step": 17080 + }, + { + "epoch": 3.05, + "learning_rate": 4.8477086305278174e-05, + "loss": 0.0091, + "step": 17090 + }, + { + "epoch": 3.05, + "learning_rate": 4.8476194721825965e-05, + "loss": 0.0083, + "step": 17100 + }, + { + "epoch": 3.05, + "learning_rate": 4.847530313837376e-05, + "loss": 0.0071, + "step": 17110 + }, + { + "epoch": 3.05, + "learning_rate": 4.847441155492155e-05, + "loss": 0.0065, + "step": 17120 + }, + { + "epoch": 3.05, + "learning_rate": 4.847351997146933e-05, + "loss": 0.0058, + "step": 17130 + }, + { + "epoch": 3.06, + "learning_rate": 4.8472628388017124e-05, + "loss": 0.0058, + "step": 17140 + }, + { + "epoch": 3.06, + "learning_rate": 4.847173680456491e-05, + "loss": 0.0085, + "step": 17150 + }, + { + "epoch": 3.06, + "learning_rate": 4.84708452211127e-05, + "loss": 0.007, + "step": 17160 + }, + { + "epoch": 3.06, + "learning_rate": 4.8469953637660484e-05, + "loss": 0.011, + "step": 17170 + }, + { + "epoch": 3.06, + "learning_rate": 4.8469062054208275e-05, + "loss": 0.0059, + "step": 17180 + }, + { + "epoch": 3.07, + "learning_rate": 4.8468170470756066e-05, + "loss": 0.0067, + "step": 17190 + }, + { + "epoch": 3.07, + "learning_rate": 4.846727888730385e-05, + "loss": 0.0075, + "step": 17200 + }, + { + "epoch": 3.07, + "learning_rate": 4.846638730385164e-05, + "loss": 0.0067, + "step": 17210 + }, + { + "epoch": 3.07, + "learning_rate": 4.846549572039943e-05, + "loss": 0.0072, + "step": 17220 + }, + { + "epoch": 3.07, + "learning_rate": 4.8464604136947224e-05, + "loss": 0.0088, + "step": 17230 + }, + { + "epoch": 3.07, + "learning_rate": 4.846371255349501e-05, + "loss": 0.0074, + "step": 17240 + }, + { + "epoch": 3.08, + "learning_rate": 4.84628209700428e-05, + "loss": 0.0082, + "step": 17250 + }, + { + "epoch": 3.08, + "learning_rate": 4.846192938659059e-05, + "loss": 0.0085, + "step": 17260 + }, + { + "epoch": 3.08, + "learning_rate": 4.8461037803138375e-05, + "loss": 0.0088, + "step": 17270 + }, + { + "epoch": 3.08, + "learning_rate": 4.846014621968617e-05, + "loss": 0.0027, + "step": 17280 + }, + { + "epoch": 3.08, + "learning_rate": 4.845925463623395e-05, + "loss": 0.0068, + "step": 17290 + }, + { + "epoch": 3.08, + "learning_rate": 4.845836305278174e-05, + "loss": 0.0065, + "step": 17300 + }, + { + "epoch": 3.09, + "learning_rate": 4.845747146932953e-05, + "loss": 0.0055, + "step": 17310 + }, + { + "epoch": 3.09, + "learning_rate": 4.845657988587732e-05, + "loss": 0.008, + "step": 17320 + }, + { + "epoch": 3.09, + "learning_rate": 4.845568830242511e-05, + "loss": 0.0056, + "step": 17330 + }, + { + "epoch": 3.09, + "learning_rate": 4.84547967189729e-05, + "loss": 0.0064, + "step": 17340 + }, + { + "epoch": 3.09, + "learning_rate": 4.845390513552069e-05, + "loss": 0.0073, + "step": 17350 + }, + { + "epoch": 3.1, + "learning_rate": 4.8453013552068476e-05, + "loss": 0.0086, + "step": 17360 + }, + { + "epoch": 3.1, + "learning_rate": 4.845212196861627e-05, + "loss": 0.0085, + "step": 17370 + }, + { + "epoch": 3.1, + "learning_rate": 4.845123038516405e-05, + "loss": 0.0069, + "step": 17380 + }, + { + "epoch": 3.1, + "learning_rate": 4.845033880171184e-05, + "loss": 0.0078, + "step": 17390 + }, + { + "epoch": 3.1, + "learning_rate": 4.844944721825963e-05, + "loss": 0.0078, + "step": 17400 + }, + { + "epoch": 3.1, + "learning_rate": 4.844855563480742e-05, + "loss": 0.0094, + "step": 17410 + }, + { + "epoch": 3.11, + "learning_rate": 4.844766405135521e-05, + "loss": 0.0082, + "step": 17420 + }, + { + "epoch": 3.11, + "learning_rate": 4.8446772467902994e-05, + "loss": 0.0076, + "step": 17430 + }, + { + "epoch": 3.11, + "learning_rate": 4.844588088445079e-05, + "loss": 0.0087, + "step": 17440 + }, + { + "epoch": 3.11, + "learning_rate": 4.8444989300998577e-05, + "loss": 0.0046, + "step": 17450 + }, + { + "epoch": 3.11, + "learning_rate": 4.844409771754637e-05, + "loss": 0.0066, + "step": 17460 + }, + { + "epoch": 3.12, + "learning_rate": 4.844320613409415e-05, + "loss": 0.0067, + "step": 17470 + }, + { + "epoch": 3.12, + "learning_rate": 4.844231455064194e-05, + "loss": 0.0053, + "step": 17480 + }, + { + "epoch": 3.12, + "learning_rate": 4.8441422967189735e-05, + "loss": 0.0056, + "step": 17490 + }, + { + "epoch": 3.12, + "learning_rate": 4.844053138373752e-05, + "loss": 0.0087, + "step": 17500 + }, + { + "epoch": 3.12, + "learning_rate": 4.843963980028531e-05, + "loss": 0.0057, + "step": 17510 + }, + { + "epoch": 3.12, + "learning_rate": 4.8438748216833095e-05, + "loss": 0.0076, + "step": 17520 + }, + { + "epoch": 3.13, + "learning_rate": 4.8437856633380886e-05, + "loss": 0.0062, + "step": 17530 + }, + { + "epoch": 3.13, + "learning_rate": 4.843696504992867e-05, + "loss": 0.0071, + "step": 17540 + }, + { + "epoch": 3.13, + "learning_rate": 4.843607346647647e-05, + "loss": 0.009, + "step": 17550 + }, + { + "epoch": 3.13, + "learning_rate": 4.843518188302425e-05, + "loss": 0.0059, + "step": 17560 + }, + { + "epoch": 3.13, + "learning_rate": 4.8434290299572044e-05, + "loss": 0.0055, + "step": 17570 + }, + { + "epoch": 3.13, + "learning_rate": 4.8433398716119835e-05, + "loss": 0.0106, + "step": 17580 + }, + { + "epoch": 3.14, + "learning_rate": 4.843250713266762e-05, + "loss": 0.0072, + "step": 17590 + }, + { + "epoch": 3.14, + "learning_rate": 4.843161554921541e-05, + "loss": 0.01, + "step": 17600 + }, + { + "epoch": 3.14, + "learning_rate": 4.8430723965763195e-05, + "loss": 0.0073, + "step": 17610 + }, + { + "epoch": 3.14, + "learning_rate": 4.8429832382310986e-05, + "loss": 0.0063, + "step": 17620 + }, + { + "epoch": 3.14, + "learning_rate": 4.842894079885877e-05, + "loss": 0.0073, + "step": 17630 + }, + { + "epoch": 3.15, + "learning_rate": 4.842804921540656e-05, + "loss": 0.005, + "step": 17640 + }, + { + "epoch": 3.15, + "learning_rate": 4.842715763195435e-05, + "loss": 0.0089, + "step": 17650 + }, + { + "epoch": 3.15, + "learning_rate": 4.8426266048502145e-05, + "loss": 0.0101, + "step": 17660 + }, + { + "epoch": 3.15, + "learning_rate": 4.8425374465049936e-05, + "loss": 0.0084, + "step": 17670 + }, + { + "epoch": 3.15, + "learning_rate": 4.842448288159772e-05, + "loss": 0.0069, + "step": 17680 + }, + { + "epoch": 3.15, + "learning_rate": 4.842359129814551e-05, + "loss": 0.0048, + "step": 17690 + }, + { + "epoch": 3.16, + "learning_rate": 4.8422699714693296e-05, + "loss": 0.008, + "step": 17700 + }, + { + "epoch": 3.16, + "learning_rate": 4.842180813124109e-05, + "loss": 0.0044, + "step": 17710 + }, + { + "epoch": 3.16, + "learning_rate": 4.842091654778888e-05, + "loss": 0.0114, + "step": 17720 + }, + { + "epoch": 3.16, + "learning_rate": 4.842002496433666e-05, + "loss": 0.0057, + "step": 17730 + }, + { + "epoch": 3.16, + "learning_rate": 4.8419133380884454e-05, + "loss": 0.005, + "step": 17740 + }, + { + "epoch": 3.17, + "learning_rate": 4.841824179743224e-05, + "loss": 0.0064, + "step": 17750 + }, + { + "epoch": 3.17, + "learning_rate": 4.841735021398003e-05, + "loss": 0.0084, + "step": 17760 + }, + { + "epoch": 3.17, + "learning_rate": 4.841645863052782e-05, + "loss": 0.0059, + "step": 17770 + }, + { + "epoch": 3.17, + "learning_rate": 4.841556704707561e-05, + "loss": 0.0071, + "step": 17780 + }, + { + "epoch": 3.17, + "learning_rate": 4.8414675463623396e-05, + "loss": 0.0058, + "step": 17790 + }, + { + "epoch": 3.17, + "learning_rate": 4.841378388017119e-05, + "loss": 0.0094, + "step": 17800 + }, + { + "epoch": 3.18, + "learning_rate": 4.841289229671898e-05, + "loss": 0.0057, + "step": 17810 + }, + { + "epoch": 3.18, + "learning_rate": 4.841200071326676e-05, + "loss": 0.005, + "step": 17820 + }, + { + "epoch": 3.18, + "learning_rate": 4.8411109129814554e-05, + "loss": 0.0082, + "step": 17830 + }, + { + "epoch": 3.18, + "learning_rate": 4.841021754636234e-05, + "loss": 0.0061, + "step": 17840 + }, + { + "epoch": 3.18, + "learning_rate": 4.840932596291013e-05, + "loss": 0.0095, + "step": 17850 + }, + { + "epoch": 3.18, + "learning_rate": 4.8408434379457914e-05, + "loss": 0.0076, + "step": 17860 + }, + { + "epoch": 3.19, + "learning_rate": 4.8407542796005706e-05, + "loss": 0.0059, + "step": 17870 + }, + { + "epoch": 3.19, + "learning_rate": 4.84066512125535e-05, + "loss": 0.008, + "step": 17880 + }, + { + "epoch": 3.19, + "learning_rate": 4.840575962910129e-05, + "loss": 0.0088, + "step": 17890 + }, + { + "epoch": 3.19, + "learning_rate": 4.840486804564908e-05, + "loss": 0.0065, + "step": 17900 + }, + { + "epoch": 3.19, + "learning_rate": 4.8403976462196864e-05, + "loss": 0.0076, + "step": 17910 + }, + { + "epoch": 3.2, + "learning_rate": 4.8403084878744655e-05, + "loss": 0.0073, + "step": 17920 + }, + { + "epoch": 3.2, + "learning_rate": 4.840219329529244e-05, + "loss": 0.0047, + "step": 17930 + }, + { + "epoch": 3.2, + "learning_rate": 4.840130171184023e-05, + "loss": 0.0057, + "step": 17940 + }, + { + "epoch": 3.2, + "learning_rate": 4.840041012838802e-05, + "loss": 0.0051, + "step": 17950 + }, + { + "epoch": 3.2, + "learning_rate": 4.8399518544935806e-05, + "loss": 0.009, + "step": 17960 + }, + { + "epoch": 3.2, + "learning_rate": 4.83986269614836e-05, + "loss": 0.0115, + "step": 17970 + }, + { + "epoch": 3.21, + "learning_rate": 4.839773537803138e-05, + "loss": 0.0066, + "step": 17980 + }, + { + "epoch": 3.21, + "learning_rate": 4.839684379457918e-05, + "loss": 0.0078, + "step": 17990 + }, + { + "epoch": 3.21, + "learning_rate": 4.8395952211126964e-05, + "loss": 0.009, + "step": 18000 + }, + { + "epoch": 3.21, + "learning_rate": 4.8395060627674756e-05, + "loss": 0.0058, + "step": 18010 + }, + { + "epoch": 3.21, + "learning_rate": 4.839416904422254e-05, + "loss": 0.0057, + "step": 18020 + }, + { + "epoch": 3.22, + "learning_rate": 4.839327746077033e-05, + "loss": 0.0074, + "step": 18030 + }, + { + "epoch": 3.22, + "learning_rate": 4.839238587731812e-05, + "loss": 0.0093, + "step": 18040 + }, + { + "epoch": 3.22, + "learning_rate": 4.839149429386591e-05, + "loss": 0.0076, + "step": 18050 + }, + { + "epoch": 3.22, + "learning_rate": 4.83906027104137e-05, + "loss": 0.0086, + "step": 18060 + }, + { + "epoch": 3.22, + "learning_rate": 4.838971112696148e-05, + "loss": 0.0077, + "step": 18070 + }, + { + "epoch": 3.22, + "learning_rate": 4.8388819543509274e-05, + "loss": 0.0065, + "step": 18080 + }, + { + "epoch": 3.23, + "learning_rate": 4.838792796005706e-05, + "loss": 0.006, + "step": 18090 + }, + { + "epoch": 3.23, + "learning_rate": 4.8387036376604856e-05, + "loss": 0.0048, + "step": 18100 + }, + { + "epoch": 3.23, + "learning_rate": 4.838614479315264e-05, + "loss": 0.0033, + "step": 18110 + }, + { + "epoch": 3.23, + "learning_rate": 4.838525320970043e-05, + "loss": 0.0058, + "step": 18120 + }, + { + "epoch": 3.23, + "learning_rate": 4.838436162624822e-05, + "loss": 0.0056, + "step": 18130 + }, + { + "epoch": 3.23, + "learning_rate": 4.838347004279601e-05, + "loss": 0.0114, + "step": 18140 + }, + { + "epoch": 3.24, + "learning_rate": 4.83825784593438e-05, + "loss": 0.0095, + "step": 18150 + }, + { + "epoch": 3.24, + "learning_rate": 4.838168687589158e-05, + "loss": 0.0046, + "step": 18160 + }, + { + "epoch": 3.24, + "learning_rate": 4.8380795292439374e-05, + "loss": 0.0079, + "step": 18170 + }, + { + "epoch": 3.24, + "learning_rate": 4.8379903708987165e-05, + "loss": 0.008, + "step": 18180 + }, + { + "epoch": 3.24, + "learning_rate": 4.837901212553495e-05, + "loss": 0.0098, + "step": 18190 + }, + { + "epoch": 3.25, + "learning_rate": 4.837812054208274e-05, + "loss": 0.0075, + "step": 18200 + }, + { + "epoch": 3.25, + "learning_rate": 4.837722895863053e-05, + "loss": 0.0058, + "step": 18210 + }, + { + "epoch": 3.25, + "learning_rate": 4.8376337375178324e-05, + "loss": 0.0064, + "step": 18220 + }, + { + "epoch": 3.25, + "learning_rate": 4.837544579172611e-05, + "loss": 0.0079, + "step": 18230 + }, + { + "epoch": 3.25, + "learning_rate": 4.83745542082739e-05, + "loss": 0.0058, + "step": 18240 + }, + { + "epoch": 3.25, + "learning_rate": 4.8373662624821684e-05, + "loss": 0.0072, + "step": 18250 + }, + { + "epoch": 3.26, + "learning_rate": 4.8372771041369475e-05, + "loss": 0.0102, + "step": 18260 + }, + { + "epoch": 3.26, + "learning_rate": 4.8371879457917266e-05, + "loss": 0.0061, + "step": 18270 + }, + { + "epoch": 3.26, + "learning_rate": 4.837098787446505e-05, + "loss": 0.0086, + "step": 18280 + }, + { + "epoch": 3.26, + "learning_rate": 4.837009629101284e-05, + "loss": 0.006, + "step": 18290 + }, + { + "epoch": 3.26, + "learning_rate": 4.8369204707560626e-05, + "loss": 0.0063, + "step": 18300 + }, + { + "epoch": 3.26, + "learning_rate": 4.836831312410842e-05, + "loss": 0.008, + "step": 18310 + }, + { + "epoch": 3.27, + "learning_rate": 4.836742154065621e-05, + "loss": 0.0077, + "step": 18320 + }, + { + "epoch": 3.27, + "learning_rate": 4.8366529957204e-05, + "loss": 0.0068, + "step": 18330 + }, + { + "epoch": 3.27, + "learning_rate": 4.8365638373751784e-05, + "loss": 0.0068, + "step": 18340 + }, + { + "epoch": 3.27, + "learning_rate": 4.8364746790299575e-05, + "loss": 0.0067, + "step": 18350 + }, + { + "epoch": 3.27, + "learning_rate": 4.8363855206847367e-05, + "loss": 0.0069, + "step": 18360 + }, + { + "epoch": 3.28, + "learning_rate": 4.836296362339515e-05, + "loss": 0.0091, + "step": 18370 + }, + { + "epoch": 3.28, + "learning_rate": 4.836207203994294e-05, + "loss": 0.0091, + "step": 18380 + }, + { + "epoch": 3.28, + "learning_rate": 4.836118045649073e-05, + "loss": 0.0091, + "step": 18390 + }, + { + "epoch": 3.28, + "learning_rate": 4.836028887303852e-05, + "loss": 0.006, + "step": 18400 + }, + { + "epoch": 3.28, + "learning_rate": 4.83593972895863e-05, + "loss": 0.0077, + "step": 18410 + }, + { + "epoch": 3.28, + "learning_rate": 4.8358505706134094e-05, + "loss": 0.0071, + "step": 18420 + }, + { + "epoch": 3.29, + "learning_rate": 4.8357614122681885e-05, + "loss": 0.0068, + "step": 18430 + }, + { + "epoch": 3.29, + "learning_rate": 4.8356722539229676e-05, + "loss": 0.0046, + "step": 18440 + }, + { + "epoch": 3.29, + "learning_rate": 4.835583095577747e-05, + "loss": 0.0042, + "step": 18450 + }, + { + "epoch": 3.29, + "learning_rate": 4.835493937232525e-05, + "loss": 0.0063, + "step": 18460 + }, + { + "epoch": 3.29, + "learning_rate": 4.835404778887304e-05, + "loss": 0.0067, + "step": 18470 + }, + { + "epoch": 3.3, + "learning_rate": 4.835315620542083e-05, + "loss": 0.0105, + "step": 18480 + }, + { + "epoch": 3.3, + "learning_rate": 4.835226462196862e-05, + "loss": 0.0073, + "step": 18490 + }, + { + "epoch": 3.3, + "learning_rate": 4.835137303851641e-05, + "loss": 0.0085, + "step": 18500 + }, + { + "epoch": 3.3, + "learning_rate": 4.8350481455064194e-05, + "loss": 0.0078, + "step": 18510 + }, + { + "epoch": 3.3, + "learning_rate": 4.8349589871611985e-05, + "loss": 0.0069, + "step": 18520 + }, + { + "epoch": 3.3, + "learning_rate": 4.834869828815977e-05, + "loss": 0.0078, + "step": 18530 + }, + { + "epoch": 3.31, + "learning_rate": 4.834780670470757e-05, + "loss": 0.0087, + "step": 18540 + }, + { + "epoch": 3.31, + "learning_rate": 4.834691512125535e-05, + "loss": 0.0051, + "step": 18550 + }, + { + "epoch": 3.31, + "learning_rate": 4.834602353780314e-05, + "loss": 0.0072, + "step": 18560 + }, + { + "epoch": 3.31, + "learning_rate": 4.834513195435093e-05, + "loss": 0.0077, + "step": 18570 + }, + { + "epoch": 3.31, + "learning_rate": 4.834424037089872e-05, + "loss": 0.0083, + "step": 18580 + }, + { + "epoch": 3.31, + "learning_rate": 4.834334878744651e-05, + "loss": 0.0056, + "step": 18590 + }, + { + "epoch": 3.32, + "learning_rate": 4.8342457203994295e-05, + "loss": 0.0093, + "step": 18600 + }, + { + "epoch": 3.32, + "learning_rate": 4.8341565620542086e-05, + "loss": 0.0052, + "step": 18610 + }, + { + "epoch": 3.32, + "learning_rate": 4.834067403708987e-05, + "loss": 0.0063, + "step": 18620 + }, + { + "epoch": 3.32, + "learning_rate": 4.833978245363766e-05, + "loss": 0.0062, + "step": 18630 + }, + { + "epoch": 3.32, + "learning_rate": 4.8338890870185446e-05, + "loss": 0.008, + "step": 18640 + }, + { + "epoch": 3.33, + "learning_rate": 4.8337999286733244e-05, + "loss": 0.0113, + "step": 18650 + }, + { + "epoch": 3.33, + "learning_rate": 4.833710770328103e-05, + "loss": 0.0082, + "step": 18660 + }, + { + "epoch": 3.33, + "learning_rate": 4.833621611982882e-05, + "loss": 0.0092, + "step": 18670 + }, + { + "epoch": 3.33, + "learning_rate": 4.833532453637661e-05, + "loss": 0.0063, + "step": 18680 + }, + { + "epoch": 3.33, + "learning_rate": 4.833452211126961e-05, + "loss": 0.0104, + "step": 18690 + }, + { + "epoch": 3.33, + "learning_rate": 4.833363052781741e-05, + "loss": 0.0088, + "step": 18700 + }, + { + "epoch": 3.34, + "learning_rate": 4.8332738944365194e-05, + "loss": 0.0066, + "step": 18710 + }, + { + "epoch": 3.34, + "learning_rate": 4.8331847360912986e-05, + "loss": 0.0061, + "step": 18720 + }, + { + "epoch": 3.34, + "learning_rate": 4.833095577746078e-05, + "loss": 0.0063, + "step": 18730 + }, + { + "epoch": 3.34, + "learning_rate": 4.833006419400856e-05, + "loss": 0.0103, + "step": 18740 + }, + { + "epoch": 3.34, + "learning_rate": 4.832917261055635e-05, + "loss": 0.0065, + "step": 18750 + }, + { + "epoch": 3.35, + "learning_rate": 4.832828102710414e-05, + "loss": 0.0056, + "step": 18760 + }, + { + "epoch": 3.35, + "learning_rate": 4.832738944365193e-05, + "loss": 0.0048, + "step": 18770 + }, + { + "epoch": 3.35, + "learning_rate": 4.832649786019971e-05, + "loss": 0.0077, + "step": 18780 + }, + { + "epoch": 3.35, + "learning_rate": 4.8325606276747504e-05, + "loss": 0.0058, + "step": 18790 + }, + { + "epoch": 3.35, + "learning_rate": 4.8324714693295295e-05, + "loss": 0.0051, + "step": 18800 + }, + { + "epoch": 3.35, + "learning_rate": 4.8323823109843086e-05, + "loss": 0.0089, + "step": 18810 + }, + { + "epoch": 3.36, + "learning_rate": 4.832293152639088e-05, + "loss": 0.0049, + "step": 18820 + }, + { + "epoch": 3.36, + "learning_rate": 4.832203994293866e-05, + "loss": 0.0073, + "step": 18830 + }, + { + "epoch": 3.36, + "learning_rate": 4.832114835948645e-05, + "loss": 0.0046, + "step": 18840 + }, + { + "epoch": 3.36, + "learning_rate": 4.832025677603424e-05, + "loss": 0.0048, + "step": 18850 + }, + { + "epoch": 3.36, + "learning_rate": 4.831936519258203e-05, + "loss": 0.005, + "step": 18860 + }, + { + "epoch": 3.36, + "learning_rate": 4.831847360912981e-05, + "loss": 0.007, + "step": 18870 + }, + { + "epoch": 3.37, + "learning_rate": 4.8317582025677604e-05, + "loss": 0.0039, + "step": 18880 + }, + { + "epoch": 3.37, + "learning_rate": 4.8316690442225396e-05, + "loss": 0.0065, + "step": 18890 + }, + { + "epoch": 3.37, + "learning_rate": 4.831579885877318e-05, + "loss": 0.0041, + "step": 18900 + }, + { + "epoch": 3.37, + "learning_rate": 4.831490727532097e-05, + "loss": 0.0086, + "step": 18910 + }, + { + "epoch": 3.37, + "learning_rate": 4.831401569186876e-05, + "loss": 0.0072, + "step": 18920 + }, + { + "epoch": 3.38, + "learning_rate": 4.8313124108416554e-05, + "loss": 0.006, + "step": 18930 + }, + { + "epoch": 3.38, + "learning_rate": 4.831223252496434e-05, + "loss": 0.0058, + "step": 18940 + }, + { + "epoch": 3.38, + "learning_rate": 4.8311430099857346e-05, + "loss": 0.0083, + "step": 18950 + }, + { + "epoch": 3.38, + "learning_rate": 4.831053851640514e-05, + "loss": 0.0077, + "step": 18960 + }, + { + "epoch": 3.38, + "learning_rate": 4.830964693295293e-05, + "loss": 0.0095, + "step": 18970 + }, + { + "epoch": 3.38, + "learning_rate": 4.830875534950072e-05, + "loss": 0.0078, + "step": 18980 + }, + { + "epoch": 3.39, + "learning_rate": 4.8307863766048504e-05, + "loss": 0.0073, + "step": 18990 + }, + { + "epoch": 3.39, + "learning_rate": 4.8306972182596295e-05, + "loss": 0.0089, + "step": 19000 + }, + { + "epoch": 3.39, + "learning_rate": 4.830608059914408e-05, + "loss": 0.0066, + "step": 19010 + }, + { + "epoch": 3.39, + "learning_rate": 4.830518901569187e-05, + "loss": 0.0087, + "step": 19020 + }, + { + "epoch": 3.39, + "learning_rate": 4.830429743223966e-05, + "loss": 0.0084, + "step": 19030 + }, + { + "epoch": 3.4, + "learning_rate": 4.830340584878745e-05, + "loss": 0.0082, + "step": 19040 + }, + { + "epoch": 3.4, + "learning_rate": 4.830251426533524e-05, + "loss": 0.0086, + "step": 19050 + }, + { + "epoch": 3.4, + "learning_rate": 4.830162268188302e-05, + "loss": 0.0067, + "step": 19060 + }, + { + "epoch": 3.4, + "learning_rate": 4.830073109843082e-05, + "loss": 0.0083, + "step": 19070 + }, + { + "epoch": 3.4, + "learning_rate": 4.8299839514978605e-05, + "loss": 0.0075, + "step": 19080 + }, + { + "epoch": 3.4, + "learning_rate": 4.8298947931526396e-05, + "loss": 0.0057, + "step": 19090 + }, + { + "epoch": 3.41, + "learning_rate": 4.829805634807418e-05, + "loss": 0.009, + "step": 19100 + }, + { + "epoch": 3.41, + "learning_rate": 4.829716476462197e-05, + "loss": 0.008, + "step": 19110 + }, + { + "epoch": 3.41, + "learning_rate": 4.829627318116976e-05, + "loss": 0.0062, + "step": 19120 + }, + { + "epoch": 3.41, + "learning_rate": 4.829538159771755e-05, + "loss": 0.0054, + "step": 19130 + }, + { + "epoch": 3.41, + "learning_rate": 4.829449001426534e-05, + "loss": 0.0072, + "step": 19140 + }, + { + "epoch": 3.41, + "learning_rate": 4.829359843081312e-05, + "loss": 0.0036, + "step": 19150 + }, + { + "epoch": 3.42, + "learning_rate": 4.8292706847360914e-05, + "loss": 0.0049, + "step": 19160 + }, + { + "epoch": 3.42, + "learning_rate": 4.82918152639087e-05, + "loss": 0.0074, + "step": 19170 + }, + { + "epoch": 3.42, + "learning_rate": 4.8290923680456496e-05, + "loss": 0.0073, + "step": 19180 + }, + { + "epoch": 3.42, + "learning_rate": 4.829003209700428e-05, + "loss": 0.0073, + "step": 19190 + }, + { + "epoch": 3.42, + "learning_rate": 4.828914051355207e-05, + "loss": 0.0065, + "step": 19200 + }, + { + "epoch": 3.43, + "learning_rate": 4.828824893009986e-05, + "loss": 0.0101, + "step": 19210 + }, + { + "epoch": 3.43, + "learning_rate": 4.828735734664765e-05, + "loss": 0.008, + "step": 19220 + }, + { + "epoch": 3.43, + "learning_rate": 4.828646576319544e-05, + "loss": 0.0058, + "step": 19230 + }, + { + "epoch": 3.43, + "learning_rate": 4.8285574179743223e-05, + "loss": 0.0074, + "step": 19240 + }, + { + "epoch": 3.43, + "learning_rate": 4.8284682596291015e-05, + "loss": 0.0068, + "step": 19250 + }, + { + "epoch": 3.43, + "learning_rate": 4.8283791012838806e-05, + "loss": 0.007, + "step": 19260 + }, + { + "epoch": 3.44, + "learning_rate": 4.828289942938659e-05, + "loss": 0.0079, + "step": 19270 + }, + { + "epoch": 3.44, + "learning_rate": 4.828200784593438e-05, + "loss": 0.0093, + "step": 19280 + }, + { + "epoch": 3.44, + "learning_rate": 4.828111626248217e-05, + "loss": 0.0083, + "step": 19290 + }, + { + "epoch": 3.44, + "learning_rate": 4.8280224679029964e-05, + "loss": 0.0084, + "step": 19300 + }, + { + "epoch": 3.44, + "learning_rate": 4.827933309557775e-05, + "loss": 0.0093, + "step": 19310 + }, + { + "epoch": 3.45, + "learning_rate": 4.827844151212554e-05, + "loss": 0.006, + "step": 19320 + }, + { + "epoch": 3.45, + "learning_rate": 4.8277549928673324e-05, + "loss": 0.0056, + "step": 19330 + }, + { + "epoch": 3.45, + "learning_rate": 4.8276658345221115e-05, + "loss": 0.0095, + "step": 19340 + }, + { + "epoch": 3.45, + "learning_rate": 4.8275766761768906e-05, + "loss": 0.0053, + "step": 19350 + }, + { + "epoch": 3.45, + "learning_rate": 4.827487517831669e-05, + "loss": 0.0078, + "step": 19360 + }, + { + "epoch": 3.45, + "learning_rate": 4.827398359486448e-05, + "loss": 0.0063, + "step": 19370 + }, + { + "epoch": 3.46, + "learning_rate": 4.8273092011412266e-05, + "loss": 0.0095, + "step": 19380 + }, + { + "epoch": 3.46, + "learning_rate": 4.827220042796006e-05, + "loss": 0.0106, + "step": 19390 + }, + { + "epoch": 3.46, + "learning_rate": 4.827130884450785e-05, + "loss": 0.0061, + "step": 19400 + }, + { + "epoch": 3.46, + "learning_rate": 4.827041726105564e-05, + "loss": 0.0088, + "step": 19410 + }, + { + "epoch": 3.46, + "learning_rate": 4.8269525677603425e-05, + "loss": 0.0099, + "step": 19420 + }, + { + "epoch": 3.46, + "learning_rate": 4.8268634094151216e-05, + "loss": 0.0086, + "step": 19430 + }, + { + "epoch": 3.47, + "learning_rate": 4.826774251069901e-05, + "loss": 0.0056, + "step": 19440 + }, + { + "epoch": 3.47, + "learning_rate": 4.826685092724679e-05, + "loss": 0.0071, + "step": 19450 + }, + { + "epoch": 3.47, + "learning_rate": 4.826595934379458e-05, + "loss": 0.0062, + "step": 19460 + }, + { + "epoch": 3.47, + "learning_rate": 4.826506776034237e-05, + "loss": 0.009, + "step": 19470 + }, + { + "epoch": 3.47, + "learning_rate": 4.826417617689016e-05, + "loss": 0.0077, + "step": 19480 + }, + { + "epoch": 3.48, + "learning_rate": 4.826328459343795e-05, + "loss": 0.009, + "step": 19490 + }, + { + "epoch": 3.48, + "learning_rate": 4.8262393009985734e-05, + "loss": 0.0065, + "step": 19500 + }, + { + "epoch": 3.48, + "learning_rate": 4.826150142653353e-05, + "loss": 0.0055, + "step": 19510 + }, + { + "epoch": 3.48, + "learning_rate": 4.8260609843081316e-05, + "loss": 0.0057, + "step": 19520 + }, + { + "epoch": 3.48, + "learning_rate": 4.825971825962911e-05, + "loss": 0.0055, + "step": 19530 + }, + { + "epoch": 3.48, + "learning_rate": 4.825882667617689e-05, + "loss": 0.0067, + "step": 19540 + }, + { + "epoch": 3.49, + "learning_rate": 4.825793509272468e-05, + "loss": 0.0065, + "step": 19550 + }, + { + "epoch": 3.49, + "learning_rate": 4.825704350927247e-05, + "loss": 0.0084, + "step": 19560 + }, + { + "epoch": 3.49, + "learning_rate": 4.825615192582026e-05, + "loss": 0.0067, + "step": 19570 + }, + { + "epoch": 3.49, + "learning_rate": 4.825526034236805e-05, + "loss": 0.0066, + "step": 19580 + }, + { + "epoch": 3.49, + "learning_rate": 4.8254368758915834e-05, + "loss": 0.0064, + "step": 19590 + }, + { + "epoch": 3.5, + "learning_rate": 4.8253477175463626e-05, + "loss": 0.0077, + "step": 19600 + }, + { + "epoch": 3.5, + "learning_rate": 4.825258559201141e-05, + "loss": 0.0064, + "step": 19610 + }, + { + "epoch": 3.5, + "learning_rate": 4.825169400855921e-05, + "loss": 0.0081, + "step": 19620 + }, + { + "epoch": 3.5, + "learning_rate": 4.825080242510699e-05, + "loss": 0.0072, + "step": 19630 + }, + { + "epoch": 3.5, + "learning_rate": 4.8249910841654784e-05, + "loss": 0.0076, + "step": 19640 + }, + { + "epoch": 3.5, + "learning_rate": 4.824901925820257e-05, + "loss": 0.0086, + "step": 19650 + }, + { + "epoch": 3.51, + "learning_rate": 4.824812767475036e-05, + "loss": 0.0082, + "step": 19660 + }, + { + "epoch": 3.51, + "learning_rate": 4.824723609129815e-05, + "loss": 0.0101, + "step": 19670 + }, + { + "epoch": 3.51, + "learning_rate": 4.8246344507845935e-05, + "loss": 0.009, + "step": 19680 + }, + { + "epoch": 3.51, + "learning_rate": 4.8245452924393726e-05, + "loss": 0.008, + "step": 19690 + }, + { + "epoch": 3.51, + "learning_rate": 4.824456134094151e-05, + "loss": 0.0106, + "step": 19700 + }, + { + "epoch": 3.51, + "learning_rate": 4.82436697574893e-05, + "loss": 0.0088, + "step": 19710 + }, + { + "epoch": 3.52, + "learning_rate": 4.824277817403709e-05, + "loss": 0.0067, + "step": 19720 + }, + { + "epoch": 3.52, + "learning_rate": 4.8241886590584884e-05, + "loss": 0.0059, + "step": 19730 + }, + { + "epoch": 3.52, + "learning_rate": 4.8240995007132675e-05, + "loss": 0.0053, + "step": 19740 + }, + { + "epoch": 3.52, + "learning_rate": 4.824010342368046e-05, + "loss": 0.0079, + "step": 19750 + }, + { + "epoch": 3.52, + "learning_rate": 4.823921184022825e-05, + "loss": 0.0056, + "step": 19760 + }, + { + "epoch": 3.53, + "learning_rate": 4.8238320256776036e-05, + "loss": 0.0102, + "step": 19770 + }, + { + "epoch": 3.53, + "learning_rate": 4.823742867332383e-05, + "loss": 0.007, + "step": 19780 + }, + { + "epoch": 3.53, + "learning_rate": 4.823653708987161e-05, + "loss": 0.0053, + "step": 19790 + }, + { + "epoch": 3.53, + "learning_rate": 4.82356455064194e-05, + "loss": 0.0076, + "step": 19800 + }, + { + "epoch": 3.53, + "learning_rate": 4.8234753922967194e-05, + "loss": 0.0071, + "step": 19810 + }, + { + "epoch": 3.53, + "learning_rate": 4.823386233951498e-05, + "loss": 0.0044, + "step": 19820 + }, + { + "epoch": 3.54, + "learning_rate": 4.823297075606277e-05, + "loss": 0.0065, + "step": 19830 + }, + { + "epoch": 3.54, + "learning_rate": 4.823207917261056e-05, + "loss": 0.0074, + "step": 19840 + }, + { + "epoch": 3.54, + "learning_rate": 4.823118758915835e-05, + "loss": 0.0093, + "step": 19850 + }, + { + "epoch": 3.54, + "learning_rate": 4.8230296005706136e-05, + "loss": 0.0092, + "step": 19860 + }, + { + "epoch": 3.54, + "learning_rate": 4.822940442225393e-05, + "loss": 0.0066, + "step": 19870 + }, + { + "epoch": 3.54, + "learning_rate": 4.822851283880171e-05, + "loss": 0.0104, + "step": 19880 + }, + { + "epoch": 3.55, + "learning_rate": 4.82276212553495e-05, + "loss": 0.0088, + "step": 19890 + }, + { + "epoch": 3.55, + "learning_rate": 4.8226729671897294e-05, + "loss": 0.0101, + "step": 19900 + }, + { + "epoch": 3.55, + "learning_rate": 4.822583808844508e-05, + "loss": 0.0098, + "step": 19910 + }, + { + "epoch": 3.55, + "learning_rate": 4.822494650499287e-05, + "loss": 0.0082, + "step": 19920 + }, + { + "epoch": 3.55, + "learning_rate": 4.8224054921540654e-05, + "loss": 0.0062, + "step": 19930 + }, + { + "epoch": 3.56, + "learning_rate": 4.8223163338088445e-05, + "loss": 0.007, + "step": 19940 + }, + { + "epoch": 3.56, + "learning_rate": 4.822227175463624e-05, + "loss": 0.0073, + "step": 19950 + }, + { + "epoch": 3.56, + "learning_rate": 4.822138017118403e-05, + "loss": 0.0069, + "step": 19960 + }, + { + "epoch": 3.56, + "learning_rate": 4.822048858773182e-05, + "loss": 0.0056, + "step": 19970 + }, + { + "epoch": 3.56, + "learning_rate": 4.8219597004279604e-05, + "loss": 0.0085, + "step": 19980 + }, + { + "epoch": 3.56, + "learning_rate": 4.8218705420827395e-05, + "loss": 0.0056, + "step": 19990 + }, + { + "epoch": 3.57, + "learning_rate": 4.821781383737518e-05, + "loss": 0.004, + "step": 20000 + }, + { + "epoch": 3.57, + "learning_rate": 4.821692225392297e-05, + "loss": 0.0043, + "step": 20010 + }, + { + "epoch": 3.57, + "learning_rate": 4.8216030670470755e-05, + "loss": 0.0077, + "step": 20020 + }, + { + "epoch": 3.57, + "learning_rate": 4.8215139087018546e-05, + "loss": 0.0079, + "step": 20030 + }, + { + "epoch": 3.57, + "learning_rate": 4.821424750356634e-05, + "loss": 0.0033, + "step": 20040 + }, + { + "epoch": 3.58, + "learning_rate": 4.821335592011412e-05, + "loss": 0.0071, + "step": 20050 + }, + { + "epoch": 3.58, + "learning_rate": 4.821246433666191e-05, + "loss": 0.0097, + "step": 20060 + }, + { + "epoch": 3.58, + "learning_rate": 4.8211572753209704e-05, + "loss": 0.0069, + "step": 20070 + }, + { + "epoch": 3.58, + "learning_rate": 4.8210681169757495e-05, + "loss": 0.0081, + "step": 20080 + }, + { + "epoch": 3.58, + "learning_rate": 4.820978958630528e-05, + "loss": 0.006, + "step": 20090 + }, + { + "epoch": 3.58, + "learning_rate": 4.820889800285307e-05, + "loss": 0.0097, + "step": 20100 + }, + { + "epoch": 3.59, + "learning_rate": 4.8208006419400855e-05, + "loss": 0.0066, + "step": 20110 + }, + { + "epoch": 3.59, + "learning_rate": 4.8207114835948647e-05, + "loss": 0.006, + "step": 20120 + }, + { + "epoch": 3.59, + "learning_rate": 4.820622325249644e-05, + "loss": 0.0082, + "step": 20130 + }, + { + "epoch": 3.59, + "learning_rate": 4.820533166904422e-05, + "loss": 0.0083, + "step": 20140 + }, + { + "epoch": 3.59, + "learning_rate": 4.8204440085592013e-05, + "loss": 0.0052, + "step": 20150 + }, + { + "epoch": 3.59, + "learning_rate": 4.82035485021398e-05, + "loss": 0.0086, + "step": 20160 + }, + { + "epoch": 3.6, + "learning_rate": 4.820265691868759e-05, + "loss": 0.0048, + "step": 20170 + }, + { + "epoch": 3.6, + "learning_rate": 4.820176533523538e-05, + "loss": 0.0051, + "step": 20180 + }, + { + "epoch": 3.6, + "learning_rate": 4.820087375178317e-05, + "loss": 0.0052, + "step": 20190 + }, + { + "epoch": 3.6, + "learning_rate": 4.819998216833096e-05, + "loss": 0.0072, + "step": 20200 + }, + { + "epoch": 3.6, + "learning_rate": 4.819909058487875e-05, + "loss": 0.0056, + "step": 20210 + }, + { + "epoch": 3.61, + "learning_rate": 4.819819900142654e-05, + "loss": 0.0063, + "step": 20220 + }, + { + "epoch": 3.61, + "learning_rate": 4.819730741797432e-05, + "loss": 0.0093, + "step": 20230 + }, + { + "epoch": 3.61, + "learning_rate": 4.8196415834522114e-05, + "loss": 0.0069, + "step": 20240 + }, + { + "epoch": 3.61, + "learning_rate": 4.81955242510699e-05, + "loss": 0.0049, + "step": 20250 + }, + { + "epoch": 3.61, + "learning_rate": 4.819463266761769e-05, + "loss": 0.0054, + "step": 20260 + }, + { + "epoch": 3.61, + "learning_rate": 4.819374108416548e-05, + "loss": 0.0101, + "step": 20270 + }, + { + "epoch": 3.62, + "learning_rate": 4.8192849500713265e-05, + "loss": 0.0144, + "step": 20280 + }, + { + "epoch": 3.62, + "learning_rate": 4.819195791726106e-05, + "loss": 0.0101, + "step": 20290 + }, + { + "epoch": 3.62, + "learning_rate": 4.819106633380885e-05, + "loss": 0.0078, + "step": 20300 + }, + { + "epoch": 3.62, + "learning_rate": 4.819017475035664e-05, + "loss": 0.0043, + "step": 20310 + }, + { + "epoch": 3.62, + "learning_rate": 4.818928316690442e-05, + "loss": 0.0063, + "step": 20320 + }, + { + "epoch": 3.63, + "learning_rate": 4.8188391583452215e-05, + "loss": 0.0048, + "step": 20330 + }, + { + "epoch": 3.63, + "learning_rate": 4.81875e-05, + "loss": 0.0105, + "step": 20340 + }, + { + "epoch": 3.63, + "learning_rate": 4.818660841654779e-05, + "loss": 0.0057, + "step": 20350 + }, + { + "epoch": 3.63, + "learning_rate": 4.818571683309558e-05, + "loss": 0.0077, + "step": 20360 + }, + { + "epoch": 3.63, + "learning_rate": 4.8184825249643366e-05, + "loss": 0.0087, + "step": 20370 + }, + { + "epoch": 3.63, + "learning_rate": 4.818393366619116e-05, + "loss": 0.0066, + "step": 20380 + }, + { + "epoch": 3.64, + "learning_rate": 4.818304208273894e-05, + "loss": 0.0054, + "step": 20390 + }, + { + "epoch": 3.64, + "learning_rate": 4.818215049928674e-05, + "loss": 0.005, + "step": 20400 + }, + { + "epoch": 3.64, + "learning_rate": 4.8181258915834524e-05, + "loss": 0.0083, + "step": 20410 + }, + { + "epoch": 3.64, + "learning_rate": 4.8180367332382315e-05, + "loss": 0.0068, + "step": 20420 + }, + { + "epoch": 3.64, + "learning_rate": 4.8179475748930106e-05, + "loss": 0.0044, + "step": 20430 + }, + { + "epoch": 3.64, + "learning_rate": 4.817858416547789e-05, + "loss": 0.009, + "step": 20440 + }, + { + "epoch": 3.65, + "learning_rate": 4.817769258202568e-05, + "loss": 0.0049, + "step": 20450 + }, + { + "epoch": 3.65, + "learning_rate": 4.8176800998573466e-05, + "loss": 0.0077, + "step": 20460 + }, + { + "epoch": 3.65, + "learning_rate": 4.817590941512126e-05, + "loss": 0.0057, + "step": 20470 + }, + { + "epoch": 3.65, + "learning_rate": 4.817501783166904e-05, + "loss": 0.0077, + "step": 20480 + }, + { + "epoch": 3.65, + "learning_rate": 4.817412624821683e-05, + "loss": 0.0054, + "step": 20490 + }, + { + "epoch": 3.66, + "learning_rate": 4.8173234664764624e-05, + "loss": 0.0074, + "step": 20500 + }, + { + "epoch": 3.66, + "learning_rate": 4.8172343081312416e-05, + "loss": 0.0071, + "step": 20510 + }, + { + "epoch": 3.66, + "learning_rate": 4.817145149786021e-05, + "loss": 0.0046, + "step": 20520 + }, + { + "epoch": 3.66, + "learning_rate": 4.817055991440799e-05, + "loss": 0.0093, + "step": 20530 + }, + { + "epoch": 3.66, + "learning_rate": 4.816966833095578e-05, + "loss": 0.0063, + "step": 20540 + }, + { + "epoch": 3.66, + "learning_rate": 4.816877674750357e-05, + "loss": 0.0053, + "step": 20550 + }, + { + "epoch": 3.67, + "learning_rate": 4.816788516405136e-05, + "loss": 0.0105, + "step": 20560 + }, + { + "epoch": 3.67, + "learning_rate": 4.816699358059914e-05, + "loss": 0.0093, + "step": 20570 + }, + { + "epoch": 3.67, + "learning_rate": 4.8166101997146934e-05, + "loss": 0.0059, + "step": 20580 + }, + { + "epoch": 3.67, + "learning_rate": 4.8165210413694725e-05, + "loss": 0.0081, + "step": 20590 + }, + { + "epoch": 3.67, + "learning_rate": 4.816431883024251e-05, + "loss": 0.0051, + "step": 20600 + }, + { + "epoch": 3.68, + "learning_rate": 4.81634272467903e-05, + "loss": 0.009, + "step": 20610 + }, + { + "epoch": 3.68, + "learning_rate": 4.816253566333809e-05, + "loss": 0.0063, + "step": 20620 + }, + { + "epoch": 3.68, + "learning_rate": 4.816164407988588e-05, + "loss": 0.0049, + "step": 20630 + }, + { + "epoch": 3.68, + "learning_rate": 4.816075249643367e-05, + "loss": 0.0048, + "step": 20640 + }, + { + "epoch": 3.68, + "learning_rate": 4.815986091298146e-05, + "loss": 0.0127, + "step": 20650 + }, + { + "epoch": 3.68, + "learning_rate": 4.815896932952924e-05, + "loss": 0.007, + "step": 20660 + }, + { + "epoch": 3.69, + "learning_rate": 4.8158077746077034e-05, + "loss": 0.0055, + "step": 20670 + }, + { + "epoch": 3.69, + "learning_rate": 4.8157186162624826e-05, + "loss": 0.0069, + "step": 20680 + }, + { + "epoch": 3.69, + "learning_rate": 4.815629457917261e-05, + "loss": 0.0066, + "step": 20690 + }, + { + "epoch": 3.69, + "learning_rate": 4.81554029957204e-05, + "loss": 0.008, + "step": 20700 + }, + { + "epoch": 3.69, + "learning_rate": 4.8154511412268186e-05, + "loss": 0.0089, + "step": 20710 + }, + { + "epoch": 3.69, + "learning_rate": 4.815361982881598e-05, + "loss": 0.0077, + "step": 20720 + }, + { + "epoch": 3.7, + "learning_rate": 4.815272824536377e-05, + "loss": 0.0076, + "step": 20730 + }, + { + "epoch": 3.7, + "learning_rate": 4.815183666191156e-05, + "loss": 0.0076, + "step": 20740 + }, + { + "epoch": 3.7, + "learning_rate": 4.815094507845935e-05, + "loss": 0.0028, + "step": 20750 + }, + { + "epoch": 3.7, + "learning_rate": 4.8150053495007135e-05, + "loss": 0.0061, + "step": 20760 + }, + { + "epoch": 3.7, + "learning_rate": 4.8149161911554926e-05, + "loss": 0.006, + "step": 20770 + }, + { + "epoch": 3.71, + "learning_rate": 4.814827032810271e-05, + "loss": 0.0076, + "step": 20780 + }, + { + "epoch": 3.71, + "learning_rate": 4.81473787446505e-05, + "loss": 0.004, + "step": 20790 + }, + { + "epoch": 3.71, + "learning_rate": 4.8146487161198286e-05, + "loss": 0.0056, + "step": 20800 + }, + { + "epoch": 3.71, + "learning_rate": 4.814559557774608e-05, + "loss": 0.0046, + "step": 20810 + }, + { + "epoch": 3.71, + "learning_rate": 4.814470399429387e-05, + "loss": 0.0089, + "step": 20820 + }, + { + "epoch": 3.71, + "learning_rate": 4.814381241084165e-05, + "loss": 0.0047, + "step": 20830 + }, + { + "epoch": 3.72, + "learning_rate": 4.814292082738945e-05, + "loss": 0.0063, + "step": 20840 + }, + { + "epoch": 3.72, + "learning_rate": 4.8142029243937236e-05, + "loss": 0.0043, + "step": 20850 + }, + { + "epoch": 3.72, + "learning_rate": 4.814113766048503e-05, + "loss": 0.01, + "step": 20860 + }, + { + "epoch": 3.72, + "learning_rate": 4.814024607703281e-05, + "loss": 0.0073, + "step": 20870 + }, + { + "epoch": 3.72, + "learning_rate": 4.81393544935806e-05, + "loss": 0.0053, + "step": 20880 + }, + { + "epoch": 3.73, + "learning_rate": 4.813846291012839e-05, + "loss": 0.0071, + "step": 20890 + }, + { + "epoch": 3.73, + "learning_rate": 4.813757132667618e-05, + "loss": 0.0067, + "step": 20900 + }, + { + "epoch": 3.73, + "learning_rate": 4.813667974322397e-05, + "loss": 0.0069, + "step": 20910 + }, + { + "epoch": 3.73, + "learning_rate": 4.8135788159771754e-05, + "loss": 0.0061, + "step": 20920 + }, + { + "epoch": 3.73, + "learning_rate": 4.8134896576319545e-05, + "loss": 0.0065, + "step": 20930 + }, + { + "epoch": 3.73, + "learning_rate": 4.813400499286733e-05, + "loss": 0.0073, + "step": 20940 + }, + { + "epoch": 3.74, + "learning_rate": 4.813311340941513e-05, + "loss": 0.008, + "step": 20950 + }, + { + "epoch": 3.74, + "learning_rate": 4.813222182596291e-05, + "loss": 0.0098, + "step": 20960 + }, + { + "epoch": 3.74, + "learning_rate": 4.81313302425107e-05, + "loss": 0.0092, + "step": 20970 + }, + { + "epoch": 3.74, + "learning_rate": 4.8130438659058494e-05, + "loss": 0.0059, + "step": 20980 + }, + { + "epoch": 3.74, + "learning_rate": 4.812954707560628e-05, + "loss": 0.005, + "step": 20990 + }, + { + "epoch": 3.74, + "learning_rate": 4.812865549215407e-05, + "loss": 0.009, + "step": 21000 + }, + { + "epoch": 3.75, + "learning_rate": 4.8127763908701854e-05, + "loss": 0.0059, + "step": 21010 + }, + { + "epoch": 3.75, + "learning_rate": 4.8126872325249645e-05, + "loss": 0.0079, + "step": 21020 + }, + { + "epoch": 3.75, + "learning_rate": 4.812598074179743e-05, + "loss": 0.0088, + "step": 21030 + }, + { + "epoch": 3.75, + "learning_rate": 4.812508915834522e-05, + "loss": 0.0093, + "step": 21040 + }, + { + "epoch": 3.75, + "learning_rate": 4.812419757489301e-05, + "loss": 0.0082, + "step": 21050 + }, + { + "epoch": 3.76, + "learning_rate": 4.8123305991440803e-05, + "loss": 0.0064, + "step": 21060 + }, + { + "epoch": 3.76, + "learning_rate": 4.8122414407988595e-05, + "loss": 0.0079, + "step": 21070 + }, + { + "epoch": 3.76, + "learning_rate": 4.812152282453638e-05, + "loss": 0.0077, + "step": 21080 + }, + { + "epoch": 3.76, + "learning_rate": 4.812063124108417e-05, + "loss": 0.0085, + "step": 21090 + }, + { + "epoch": 3.76, + "learning_rate": 4.8119739657631955e-05, + "loss": 0.0095, + "step": 21100 + }, + { + "epoch": 3.76, + "learning_rate": 4.8118848074179746e-05, + "loss": 0.0061, + "step": 21110 + }, + { + "epoch": 3.77, + "learning_rate": 4.811795649072753e-05, + "loss": 0.0067, + "step": 21120 + }, + { + "epoch": 3.77, + "learning_rate": 4.811706490727532e-05, + "loss": 0.0099, + "step": 21130 + }, + { + "epoch": 3.77, + "learning_rate": 4.811617332382311e-05, + "loss": 0.0076, + "step": 21140 + }, + { + "epoch": 3.77, + "learning_rate": 4.81152817403709e-05, + "loss": 0.008, + "step": 21150 + }, + { + "epoch": 3.77, + "learning_rate": 4.811439015691869e-05, + "loss": 0.0064, + "step": 21160 + }, + { + "epoch": 3.77, + "learning_rate": 4.811349857346648e-05, + "loss": 0.0078, + "step": 21170 + }, + { + "epoch": 3.78, + "learning_rate": 4.811260699001427e-05, + "loss": 0.0073, + "step": 21180 + }, + { + "epoch": 3.78, + "learning_rate": 4.8111715406562055e-05, + "loss": 0.0084, + "step": 21190 + }, + { + "epoch": 3.78, + "learning_rate": 4.8110823823109847e-05, + "loss": 0.0054, + "step": 21200 + }, + { + "epoch": 3.78, + "learning_rate": 4.810993223965764e-05, + "loss": 0.0066, + "step": 21210 + }, + { + "epoch": 3.78, + "learning_rate": 4.810904065620542e-05, + "loss": 0.0072, + "step": 21220 + }, + { + "epoch": 3.79, + "learning_rate": 4.8108149072753213e-05, + "loss": 0.0058, + "step": 21230 + }, + { + "epoch": 3.79, + "learning_rate": 4.8107257489301e-05, + "loss": 0.0065, + "step": 21240 + }, + { + "epoch": 3.79, + "learning_rate": 4.810636590584879e-05, + "loss": 0.0122, + "step": 21250 + }, + { + "epoch": 3.79, + "learning_rate": 4.8105474322396573e-05, + "loss": 0.005, + "step": 21260 + }, + { + "epoch": 3.79, + "learning_rate": 4.8104582738944365e-05, + "loss": 0.0064, + "step": 21270 + }, + { + "epoch": 3.79, + "learning_rate": 4.8103691155492156e-05, + "loss": 0.0094, + "step": 21280 + }, + { + "epoch": 3.8, + "learning_rate": 4.810279957203995e-05, + "loss": 0.0067, + "step": 21290 + }, + { + "epoch": 3.8, + "learning_rate": 4.810190798858774e-05, + "loss": 0.006, + "step": 21300 + }, + { + "epoch": 3.8, + "learning_rate": 4.810101640513552e-05, + "loss": 0.0076, + "step": 21310 + }, + { + "epoch": 3.8, + "learning_rate": 4.8100124821683314e-05, + "loss": 0.0067, + "step": 21320 + }, + { + "epoch": 3.8, + "learning_rate": 4.80992332382311e-05, + "loss": 0.008, + "step": 21330 + }, + { + "epoch": 3.81, + "learning_rate": 4.809834165477889e-05, + "loss": 0.0058, + "step": 21340 + }, + { + "epoch": 3.81, + "learning_rate": 4.8097450071326674e-05, + "loss": 0.0078, + "step": 21350 + }, + { + "epoch": 3.81, + "learning_rate": 4.8096558487874465e-05, + "loss": 0.0072, + "step": 21360 + }, + { + "epoch": 3.81, + "learning_rate": 4.8095666904422256e-05, + "loss": 0.0111, + "step": 21370 + }, + { + "epoch": 3.81, + "learning_rate": 4.809477532097004e-05, + "loss": 0.0104, + "step": 21380 + }, + { + "epoch": 3.81, + "learning_rate": 4.809388373751784e-05, + "loss": 0.0076, + "step": 21390 + }, + { + "epoch": 3.82, + "learning_rate": 4.809299215406562e-05, + "loss": 0.0083, + "step": 21400 + }, + { + "epoch": 3.82, + "learning_rate": 4.8092100570613415e-05, + "loss": 0.0063, + "step": 21410 + }, + { + "epoch": 3.82, + "learning_rate": 4.80912089871612e-05, + "loss": 0.0065, + "step": 21420 + }, + { + "epoch": 3.82, + "learning_rate": 4.809031740370899e-05, + "loss": 0.0104, + "step": 21430 + }, + { + "epoch": 3.82, + "learning_rate": 4.808942582025678e-05, + "loss": 0.0065, + "step": 21440 + }, + { + "epoch": 3.82, + "learning_rate": 4.8088534236804566e-05, + "loss": 0.0069, + "step": 21450 + }, + { + "epoch": 3.83, + "learning_rate": 4.808764265335236e-05, + "loss": 0.0086, + "step": 21460 + }, + { + "epoch": 3.83, + "learning_rate": 4.808675106990014e-05, + "loss": 0.0087, + "step": 21470 + }, + { + "epoch": 3.83, + "learning_rate": 4.808585948644793e-05, + "loss": 0.0052, + "step": 21480 + }, + { + "epoch": 3.83, + "learning_rate": 4.808496790299572e-05, + "loss": 0.009, + "step": 21490 + }, + { + "epoch": 3.83, + "learning_rate": 4.8084076319543515e-05, + "loss": 0.0054, + "step": 21500 + }, + { + "epoch": 3.84, + "learning_rate": 4.80831847360913e-05, + "loss": 0.0048, + "step": 21510 + }, + { + "epoch": 3.84, + "learning_rate": 4.808229315263909e-05, + "loss": 0.0076, + "step": 21520 + }, + { + "epoch": 3.84, + "learning_rate": 4.808140156918688e-05, + "loss": 0.0054, + "step": 21530 + }, + { + "epoch": 3.84, + "learning_rate": 4.8080509985734666e-05, + "loss": 0.0051, + "step": 21540 + }, + { + "epoch": 3.84, + "learning_rate": 4.807961840228246e-05, + "loss": 0.012, + "step": 21550 + }, + { + "epoch": 3.84, + "learning_rate": 4.807872681883024e-05, + "loss": 0.0093, + "step": 21560 + }, + { + "epoch": 3.85, + "learning_rate": 4.807783523537803e-05, + "loss": 0.0078, + "step": 21570 + }, + { + "epoch": 3.85, + "learning_rate": 4.807694365192582e-05, + "loss": 0.0129, + "step": 21580 + }, + { + "epoch": 3.85, + "learning_rate": 4.807605206847361e-05, + "loss": 0.0105, + "step": 21590 + }, + { + "epoch": 3.85, + "learning_rate": 4.80751604850214e-05, + "loss": 0.0106, + "step": 21600 + }, + { + "epoch": 3.85, + "learning_rate": 4.807426890156919e-05, + "loss": 0.0066, + "step": 21610 + }, + { + "epoch": 3.86, + "learning_rate": 4.807337731811698e-05, + "loss": 0.0067, + "step": 21620 + }, + { + "epoch": 3.86, + "learning_rate": 4.807248573466477e-05, + "loss": 0.0066, + "step": 21630 + }, + { + "epoch": 3.86, + "learning_rate": 4.807159415121256e-05, + "loss": 0.0082, + "step": 21640 + }, + { + "epoch": 3.86, + "learning_rate": 4.807070256776034e-05, + "loss": 0.006, + "step": 21650 + }, + { + "epoch": 3.86, + "learning_rate": 4.8069810984308134e-05, + "loss": 0.0088, + "step": 21660 + }, + { + "epoch": 3.86, + "learning_rate": 4.8068919400855925e-05, + "loss": 0.0053, + "step": 21670 + }, + { + "epoch": 3.87, + "learning_rate": 4.806802781740371e-05, + "loss": 0.0079, + "step": 21680 + }, + { + "epoch": 3.87, + "learning_rate": 4.80671362339515e-05, + "loss": 0.0059, + "step": 21690 + }, + { + "epoch": 3.87, + "learning_rate": 4.8066244650499285e-05, + "loss": 0.0088, + "step": 21700 + }, + { + "epoch": 3.87, + "learning_rate": 4.8065353067047076e-05, + "loss": 0.0067, + "step": 21710 + }, + { + "epoch": 3.87, + "learning_rate": 4.806446148359487e-05, + "loss": 0.0101, + "step": 21720 + }, + { + "epoch": 3.87, + "learning_rate": 4.806356990014266e-05, + "loss": 0.0089, + "step": 21730 + }, + { + "epoch": 3.88, + "learning_rate": 4.806267831669044e-05, + "loss": 0.0056, + "step": 21740 + }, + { + "epoch": 3.88, + "learning_rate": 4.8061786733238234e-05, + "loss": 0.0105, + "step": 21750 + }, + { + "epoch": 3.88, + "learning_rate": 4.8060895149786026e-05, + "loss": 0.0076, + "step": 21760 + }, + { + "epoch": 3.88, + "learning_rate": 4.806000356633381e-05, + "loss": 0.006, + "step": 21770 + }, + { + "epoch": 3.88, + "learning_rate": 4.80591119828816e-05, + "loss": 0.0071, + "step": 21780 + }, + { + "epoch": 3.89, + "learning_rate": 4.8058220399429386e-05, + "loss": 0.0075, + "step": 21790 + }, + { + "epoch": 3.89, + "learning_rate": 4.805732881597718e-05, + "loss": 0.0091, + "step": 21800 + }, + { + "epoch": 3.89, + "learning_rate": 4.805643723252496e-05, + "loss": 0.0058, + "step": 21810 + }, + { + "epoch": 3.89, + "learning_rate": 4.805554564907275e-05, + "loss": 0.0066, + "step": 21820 + }, + { + "epoch": 3.89, + "learning_rate": 4.8054654065620544e-05, + "loss": 0.0106, + "step": 21830 + }, + { + "epoch": 3.89, + "learning_rate": 4.8053762482168335e-05, + "loss": 0.0035, + "step": 21840 + }, + { + "epoch": 3.9, + "learning_rate": 4.8052870898716126e-05, + "loss": 0.0068, + "step": 21850 + }, + { + "epoch": 3.9, + "learning_rate": 4.805197931526391e-05, + "loss": 0.0056, + "step": 21860 + }, + { + "epoch": 3.9, + "learning_rate": 4.80510877318117e-05, + "loss": 0.0085, + "step": 21870 + }, + { + "epoch": 3.9, + "learning_rate": 4.8050196148359486e-05, + "loss": 0.0104, + "step": 21880 + }, + { + "epoch": 3.9, + "learning_rate": 4.804930456490728e-05, + "loss": 0.0125, + "step": 21890 + }, + { + "epoch": 3.91, + "learning_rate": 4.804841298145507e-05, + "loss": 0.0058, + "step": 21900 + }, + { + "epoch": 3.91, + "learning_rate": 4.804752139800285e-05, + "loss": 0.0071, + "step": 21910 + }, + { + "epoch": 3.91, + "learning_rate": 4.8046629814550644e-05, + "loss": 0.005, + "step": 21920 + }, + { + "epoch": 3.91, + "learning_rate": 4.804573823109843e-05, + "loss": 0.0077, + "step": 21930 + }, + { + "epoch": 3.91, + "learning_rate": 4.804484664764623e-05, + "loss": 0.0085, + "step": 21940 + }, + { + "epoch": 3.91, + "learning_rate": 4.804395506419401e-05, + "loss": 0.0054, + "step": 21950 + }, + { + "epoch": 3.92, + "learning_rate": 4.80430634807418e-05, + "loss": 0.0101, + "step": 21960 + }, + { + "epoch": 3.92, + "learning_rate": 4.804217189728959e-05, + "loss": 0.0088, + "step": 21970 + }, + { + "epoch": 3.92, + "learning_rate": 4.804128031383738e-05, + "loss": 0.0087, + "step": 21980 + }, + { + "epoch": 3.92, + "learning_rate": 4.804038873038517e-05, + "loss": 0.005, + "step": 21990 + }, + { + "epoch": 3.92, + "learning_rate": 4.8039497146932954e-05, + "loss": 0.0083, + "step": 22000 + }, + { + "epoch": 3.92, + "learning_rate": 4.8038605563480745e-05, + "loss": 0.0096, + "step": 22010 + }, + { + "epoch": 3.93, + "learning_rate": 4.803771398002853e-05, + "loss": 0.006, + "step": 22020 + }, + { + "epoch": 3.93, + "learning_rate": 4.803682239657632e-05, + "loss": 0.0049, + "step": 22030 + }, + { + "epoch": 3.93, + "learning_rate": 4.8035930813124105e-05, + "loss": 0.0087, + "step": 22040 + }, + { + "epoch": 3.93, + "learning_rate": 4.80350392296719e-05, + "loss": 0.0087, + "step": 22050 + }, + { + "epoch": 3.93, + "learning_rate": 4.803414764621969e-05, + "loss": 0.0074, + "step": 22060 + }, + { + "epoch": 3.94, + "learning_rate": 4.8033345221112695e-05, + "loss": 0.0077, + "step": 22070 + }, + { + "epoch": 3.94, + "learning_rate": 4.8032453637660487e-05, + "loss": 0.0091, + "step": 22080 + }, + { + "epoch": 3.94, + "learning_rate": 4.803156205420828e-05, + "loss": 0.0064, + "step": 22090 + }, + { + "epoch": 3.94, + "learning_rate": 4.803067047075607e-05, + "loss": 0.0131, + "step": 22100 + }, + { + "epoch": 3.94, + "learning_rate": 4.8029778887303853e-05, + "loss": 0.0069, + "step": 22110 + }, + { + "epoch": 3.94, + "learning_rate": 4.8028887303851645e-05, + "loss": 0.0071, + "step": 22120 + }, + { + "epoch": 3.95, + "learning_rate": 4.802799572039943e-05, + "loss": 0.0105, + "step": 22130 + }, + { + "epoch": 3.95, + "learning_rate": 4.802710413694722e-05, + "loss": 0.0071, + "step": 22140 + }, + { + "epoch": 3.95, + "learning_rate": 4.802621255349501e-05, + "loss": 0.0082, + "step": 22150 + }, + { + "epoch": 3.95, + "learning_rate": 4.8025320970042796e-05, + "loss": 0.0056, + "step": 22160 + }, + { + "epoch": 3.95, + "learning_rate": 4.802442938659059e-05, + "loss": 0.005, + "step": 22170 + }, + { + "epoch": 3.96, + "learning_rate": 4.802353780313837e-05, + "loss": 0.0085, + "step": 22180 + }, + { + "epoch": 3.96, + "learning_rate": 4.802264621968616e-05, + "loss": 0.0045, + "step": 22190 + }, + { + "epoch": 3.96, + "learning_rate": 4.8021754636233954e-05, + "loss": 0.0079, + "step": 22200 + }, + { + "epoch": 3.96, + "learning_rate": 4.8020863052781745e-05, + "loss": 0.006, + "step": 22210 + }, + { + "epoch": 3.96, + "learning_rate": 4.8019971469329536e-05, + "loss": 0.0049, + "step": 22220 + }, + { + "epoch": 3.96, + "learning_rate": 4.801907988587732e-05, + "loss": 0.0054, + "step": 22230 + }, + { + "epoch": 3.97, + "learning_rate": 4.801818830242511e-05, + "loss": 0.0049, + "step": 22240 + }, + { + "epoch": 3.97, + "learning_rate": 4.8017296718972896e-05, + "loss": 0.0112, + "step": 22250 + }, + { + "epoch": 3.97, + "learning_rate": 4.801640513552069e-05, + "loss": 0.0061, + "step": 22260 + }, + { + "epoch": 3.97, + "learning_rate": 4.801551355206847e-05, + "loss": 0.0059, + "step": 22270 + }, + { + "epoch": 3.97, + "learning_rate": 4.801462196861626e-05, + "loss": 0.0073, + "step": 22280 + }, + { + "epoch": 3.97, + "learning_rate": 4.8013730385164055e-05, + "loss": 0.0042, + "step": 22290 + }, + { + "epoch": 3.98, + "learning_rate": 4.801283880171184e-05, + "loss": 0.0072, + "step": 22300 + }, + { + "epoch": 3.98, + "learning_rate": 4.801194721825964e-05, + "loss": 0.006, + "step": 22310 + }, + { + "epoch": 3.98, + "learning_rate": 4.801105563480742e-05, + "loss": 0.0071, + "step": 22320 + }, + { + "epoch": 3.98, + "learning_rate": 4.801016405135521e-05, + "loss": 0.0083, + "step": 22330 + }, + { + "epoch": 3.98, + "learning_rate": 4.8009272467903e-05, + "loss": 0.0089, + "step": 22340 + }, + { + "epoch": 3.99, + "learning_rate": 4.800838088445079e-05, + "loss": 0.0078, + "step": 22350 + }, + { + "epoch": 3.99, + "learning_rate": 4.800748930099857e-05, + "loss": 0.0067, + "step": 22360 + }, + { + "epoch": 3.99, + "learning_rate": 4.8006597717546364e-05, + "loss": 0.0067, + "step": 22370 + }, + { + "epoch": 3.99, + "learning_rate": 4.8005706134094155e-05, + "loss": 0.0077, + "step": 22380 + }, + { + "epoch": 3.99, + "learning_rate": 4.800481455064194e-05, + "loss": 0.0047, + "step": 22390 + }, + { + "epoch": 3.99, + "learning_rate": 4.800392296718973e-05, + "loss": 0.0067, + "step": 22400 + }, + { + "epoch": 4.0, + "learning_rate": 4.8003031383737515e-05, + "loss": 0.0092, + "step": 22410 + }, + { + "epoch": 4.0, + "learning_rate": 4.800213980028531e-05, + "loss": 0.007, + "step": 22420 + }, + { + "epoch": 4.0, + "learning_rate": 4.80012482168331e-05, + "loss": 0.0062, + "step": 22430 + }, + { + "epoch": 4.0, + "eval_loss": 0.014736342243850231, + "eval_runtime": 195.8169, + "eval_samples_per_second": 23.69, + "eval_steps_per_second": 2.962, + "step": 22432 + }, + { + "epoch": 4.0, + "learning_rate": 4.800035663338089e-05, + "loss": 0.0056, + "step": 22440 + }, + { + "epoch": 4.0, + "learning_rate": 4.799946504992868e-05, + "loss": 0.0065, + "step": 22450 + }, + { + "epoch": 4.0, + "learning_rate": 4.7998573466476464e-05, + "loss": 0.004, + "step": 22460 + }, + { + "epoch": 4.01, + "learning_rate": 4.7997681883024256e-05, + "loss": 0.0041, + "step": 22470 + }, + { + "epoch": 4.01, + "learning_rate": 4.799679029957204e-05, + "loss": 0.005, + "step": 22480 + }, + { + "epoch": 4.01, + "learning_rate": 4.799589871611983e-05, + "loss": 0.0067, + "step": 22490 + }, + { + "epoch": 4.01, + "learning_rate": 4.7995007132667616e-05, + "loss": 0.0068, + "step": 22500 + }, + { + "epoch": 4.01, + "learning_rate": 4.799411554921541e-05, + "loss": 0.0066, + "step": 22510 + }, + { + "epoch": 4.02, + "learning_rate": 4.79932239657632e-05, + "loss": 0.008, + "step": 22520 + }, + { + "epoch": 4.02, + "learning_rate": 4.799233238231099e-05, + "loss": 0.0057, + "step": 22530 + }, + { + "epoch": 4.02, + "learning_rate": 4.799144079885878e-05, + "loss": 0.0062, + "step": 22540 + }, + { + "epoch": 4.02, + "learning_rate": 4.7990549215406565e-05, + "loss": 0.0073, + "step": 22550 + }, + { + "epoch": 4.02, + "learning_rate": 4.7989657631954356e-05, + "loss": 0.0072, + "step": 22560 + }, + { + "epoch": 4.02, + "learning_rate": 4.798876604850214e-05, + "loss": 0.0083, + "step": 22570 + }, + { + "epoch": 4.03, + "learning_rate": 4.798787446504993e-05, + "loss": 0.0085, + "step": 22580 + }, + { + "epoch": 4.03, + "learning_rate": 4.7986982881597716e-05, + "loss": 0.0051, + "step": 22590 + }, + { + "epoch": 4.03, + "learning_rate": 4.798609129814551e-05, + "loss": 0.0104, + "step": 22600 + }, + { + "epoch": 4.03, + "learning_rate": 4.79851997146933e-05, + "loss": 0.0107, + "step": 22610 + }, + { + "epoch": 4.03, + "learning_rate": 4.798430813124108e-05, + "loss": 0.0077, + "step": 22620 + }, + { + "epoch": 4.04, + "learning_rate": 4.7983416547788874e-05, + "loss": 0.0081, + "step": 22630 + }, + { + "epoch": 4.04, + "learning_rate": 4.798252496433666e-05, + "loss": 0.0061, + "step": 22640 + }, + { + "epoch": 4.04, + "learning_rate": 4.798163338088446e-05, + "loss": 0.0069, + "step": 22650 + }, + { + "epoch": 4.04, + "learning_rate": 4.798074179743224e-05, + "loss": 0.009, + "step": 22660 + }, + { + "epoch": 4.04, + "learning_rate": 4.797985021398003e-05, + "loss": 0.0058, + "step": 22670 + }, + { + "epoch": 4.04, + "learning_rate": 4.7978958630527824e-05, + "loss": 0.0083, + "step": 22680 + }, + { + "epoch": 4.05, + "learning_rate": 4.797806704707561e-05, + "loss": 0.0059, + "step": 22690 + }, + { + "epoch": 4.05, + "learning_rate": 4.79771754636234e-05, + "loss": 0.0101, + "step": 22700 + }, + { + "epoch": 4.05, + "learning_rate": 4.7976283880171184e-05, + "loss": 0.0063, + "step": 22710 + }, + { + "epoch": 4.05, + "learning_rate": 4.7975392296718975e-05, + "loss": 0.0082, + "step": 22720 + }, + { + "epoch": 4.05, + "learning_rate": 4.797450071326676e-05, + "loss": 0.0069, + "step": 22730 + }, + { + "epoch": 4.05, + "learning_rate": 4.797360912981455e-05, + "loss": 0.0054, + "step": 22740 + }, + { + "epoch": 4.06, + "learning_rate": 4.797271754636234e-05, + "loss": 0.0096, + "step": 22750 + }, + { + "epoch": 4.06, + "learning_rate": 4.797182596291013e-05, + "loss": 0.0085, + "step": 22760 + }, + { + "epoch": 4.06, + "learning_rate": 4.7970934379457924e-05, + "loss": 0.0058, + "step": 22770 + }, + { + "epoch": 4.06, + "learning_rate": 4.797004279600571e-05, + "loss": 0.0072, + "step": 22780 + }, + { + "epoch": 4.06, + "learning_rate": 4.79691512125535e-05, + "loss": 0.0058, + "step": 22790 + }, + { + "epoch": 4.07, + "learning_rate": 4.7968259629101284e-05, + "loss": 0.0063, + "step": 22800 + }, + { + "epoch": 4.07, + "learning_rate": 4.7967368045649075e-05, + "loss": 0.0076, + "step": 22810 + }, + { + "epoch": 4.07, + "learning_rate": 4.796647646219686e-05, + "loss": 0.0075, + "step": 22820 + }, + { + "epoch": 4.07, + "learning_rate": 4.796558487874465e-05, + "loss": 0.0068, + "step": 22830 + }, + { + "epoch": 4.07, + "learning_rate": 4.796469329529244e-05, + "loss": 0.0059, + "step": 22840 + }, + { + "epoch": 4.07, + "learning_rate": 4.796380171184023e-05, + "loss": 0.0066, + "step": 22850 + }, + { + "epoch": 4.08, + "learning_rate": 4.796291012838802e-05, + "loss": 0.0044, + "step": 22860 + }, + { + "epoch": 4.08, + "learning_rate": 4.796201854493581e-05, + "loss": 0.0047, + "step": 22870 + }, + { + "epoch": 4.08, + "learning_rate": 4.79611269614836e-05, + "loss": 0.0044, + "step": 22880 + }, + { + "epoch": 4.08, + "learning_rate": 4.7960235378031385e-05, + "loss": 0.0075, + "step": 22890 + }, + { + "epoch": 4.08, + "learning_rate": 4.7959343794579176e-05, + "loss": 0.0053, + "step": 22900 + }, + { + "epoch": 4.09, + "learning_rate": 4.795845221112697e-05, + "loss": 0.0059, + "step": 22910 + }, + { + "epoch": 4.09, + "learning_rate": 4.795756062767475e-05, + "loss": 0.0077, + "step": 22920 + }, + { + "epoch": 4.09, + "learning_rate": 4.795666904422254e-05, + "loss": 0.0061, + "step": 22930 + }, + { + "epoch": 4.09, + "learning_rate": 4.795577746077033e-05, + "loss": 0.0059, + "step": 22940 + }, + { + "epoch": 4.09, + "learning_rate": 4.795488587731812e-05, + "loss": 0.0061, + "step": 22950 + }, + { + "epoch": 4.09, + "learning_rate": 4.79539942938659e-05, + "loss": 0.0047, + "step": 22960 + }, + { + "epoch": 4.1, + "learning_rate": 4.7953102710413694e-05, + "loss": 0.0081, + "step": 22970 + }, + { + "epoch": 4.1, + "learning_rate": 4.7952211126961485e-05, + "loss": 0.006, + "step": 22980 + }, + { + "epoch": 4.1, + "learning_rate": 4.7951319543509277e-05, + "loss": 0.0053, + "step": 22990 + }, + { + "epoch": 4.1, + "learning_rate": 4.795042796005707e-05, + "loss": 0.0052, + "step": 23000 + }, + { + "epoch": 4.1, + "learning_rate": 4.794953637660485e-05, + "loss": 0.0085, + "step": 23010 + }, + { + "epoch": 4.1, + "learning_rate": 4.7948644793152643e-05, + "loss": 0.0054, + "step": 23020 + }, + { + "epoch": 4.11, + "learning_rate": 4.794775320970043e-05, + "loss": 0.0066, + "step": 23030 + }, + { + "epoch": 4.11, + "learning_rate": 4.794686162624822e-05, + "loss": 0.0077, + "step": 23040 + }, + { + "epoch": 4.11, + "learning_rate": 4.7945970042796004e-05, + "loss": 0.0042, + "step": 23050 + }, + { + "epoch": 4.11, + "learning_rate": 4.7945078459343795e-05, + "loss": 0.0073, + "step": 23060 + }, + { + "epoch": 4.11, + "learning_rate": 4.7944186875891586e-05, + "loss": 0.0092, + "step": 23070 + }, + { + "epoch": 4.12, + "learning_rate": 4.794329529243937e-05, + "loss": 0.0054, + "step": 23080 + }, + { + "epoch": 4.12, + "learning_rate": 4.794240370898717e-05, + "loss": 0.0053, + "step": 23090 + }, + { + "epoch": 4.12, + "learning_rate": 4.794151212553495e-05, + "loss": 0.0064, + "step": 23100 + }, + { + "epoch": 4.12, + "learning_rate": 4.7940620542082744e-05, + "loss": 0.0055, + "step": 23110 + }, + { + "epoch": 4.12, + "learning_rate": 4.793972895863053e-05, + "loss": 0.0081, + "step": 23120 + }, + { + "epoch": 4.12, + "learning_rate": 4.793883737517832e-05, + "loss": 0.0087, + "step": 23130 + }, + { + "epoch": 4.13, + "learning_rate": 4.793794579172611e-05, + "loss": 0.0058, + "step": 23140 + }, + { + "epoch": 4.13, + "learning_rate": 4.7937054208273895e-05, + "loss": 0.0065, + "step": 23150 + }, + { + "epoch": 4.13, + "learning_rate": 4.7936162624821687e-05, + "loss": 0.0091, + "step": 23160 + }, + { + "epoch": 4.13, + "learning_rate": 4.793527104136947e-05, + "loss": 0.0051, + "step": 23170 + }, + { + "epoch": 4.13, + "learning_rate": 4.793437945791726e-05, + "loss": 0.0048, + "step": 23180 + }, + { + "epoch": 4.14, + "learning_rate": 4.7933487874465047e-05, + "loss": 0.0067, + "step": 23190 + }, + { + "epoch": 4.14, + "learning_rate": 4.7932596291012845e-05, + "loss": 0.0039, + "step": 23200 + }, + { + "epoch": 4.14, + "learning_rate": 4.793170470756063e-05, + "loss": 0.0055, + "step": 23210 + }, + { + "epoch": 4.14, + "learning_rate": 4.793081312410842e-05, + "loss": 0.0058, + "step": 23220 + }, + { + "epoch": 4.14, + "learning_rate": 4.792992154065621e-05, + "loss": 0.0058, + "step": 23230 + }, + { + "epoch": 4.14, + "learning_rate": 4.7929029957203996e-05, + "loss": 0.0062, + "step": 23240 + }, + { + "epoch": 4.15, + "learning_rate": 4.792813837375179e-05, + "loss": 0.0045, + "step": 23250 + }, + { + "epoch": 4.15, + "learning_rate": 4.792724679029957e-05, + "loss": 0.0033, + "step": 23260 + }, + { + "epoch": 4.15, + "learning_rate": 4.792635520684736e-05, + "loss": 0.0052, + "step": 23270 + }, + { + "epoch": 4.15, + "learning_rate": 4.792546362339515e-05, + "loss": 0.0078, + "step": 23280 + }, + { + "epoch": 4.15, + "learning_rate": 4.792457203994294e-05, + "loss": 0.0053, + "step": 23290 + }, + { + "epoch": 4.15, + "learning_rate": 4.792368045649073e-05, + "loss": 0.0052, + "step": 23300 + }, + { + "epoch": 4.16, + "learning_rate": 4.792278887303852e-05, + "loss": 0.0043, + "step": 23310 + }, + { + "epoch": 4.16, + "learning_rate": 4.792189728958631e-05, + "loss": 0.0061, + "step": 23320 + }, + { + "epoch": 4.16, + "learning_rate": 4.7921005706134096e-05, + "loss": 0.0062, + "step": 23330 + }, + { + "epoch": 4.16, + "learning_rate": 4.792011412268189e-05, + "loss": 0.0056, + "step": 23340 + }, + { + "epoch": 4.16, + "learning_rate": 4.791922253922967e-05, + "loss": 0.0065, + "step": 23350 + }, + { + "epoch": 4.17, + "learning_rate": 4.791833095577746e-05, + "loss": 0.0091, + "step": 23360 + }, + { + "epoch": 4.17, + "learning_rate": 4.7917439372325254e-05, + "loss": 0.0077, + "step": 23370 + }, + { + "epoch": 4.17, + "learning_rate": 4.791654778887304e-05, + "loss": 0.0059, + "step": 23380 + }, + { + "epoch": 4.17, + "learning_rate": 4.791565620542083e-05, + "loss": 0.0074, + "step": 23390 + }, + { + "epoch": 4.17, + "learning_rate": 4.7914764621968615e-05, + "loss": 0.006, + "step": 23400 + }, + { + "epoch": 4.17, + "learning_rate": 4.7913873038516406e-05, + "loss": 0.006, + "step": 23410 + }, + { + "epoch": 4.18, + "learning_rate": 4.79129814550642e-05, + "loss": 0.0071, + "step": 23420 + }, + { + "epoch": 4.18, + "learning_rate": 4.791208987161199e-05, + "loss": 0.008, + "step": 23430 + }, + { + "epoch": 4.18, + "learning_rate": 4.791119828815977e-05, + "loss": 0.0063, + "step": 23440 + }, + { + "epoch": 4.18, + "learning_rate": 4.7910306704707564e-05, + "loss": 0.0083, + "step": 23450 + }, + { + "epoch": 4.18, + "learning_rate": 4.7909415121255355e-05, + "loss": 0.0054, + "step": 23460 + }, + { + "epoch": 4.19, + "learning_rate": 4.790852353780314e-05, + "loss": 0.0066, + "step": 23470 + }, + { + "epoch": 4.19, + "learning_rate": 4.790763195435093e-05, + "loss": 0.0077, + "step": 23480 + }, + { + "epoch": 4.19, + "learning_rate": 4.7906740370898715e-05, + "loss": 0.0053, + "step": 23490 + }, + { + "epoch": 4.19, + "learning_rate": 4.7905848787446506e-05, + "loss": 0.01, + "step": 23500 + }, + { + "epoch": 4.19, + "learning_rate": 4.790495720399429e-05, + "loss": 0.007, + "step": 23510 + }, + { + "epoch": 4.19, + "learning_rate": 4.790406562054208e-05, + "loss": 0.006, + "step": 23520 + }, + { + "epoch": 4.2, + "learning_rate": 4.790317403708987e-05, + "loss": 0.0082, + "step": 23530 + }, + { + "epoch": 4.2, + "learning_rate": 4.7902282453637664e-05, + "loss": 0.0068, + "step": 23540 + }, + { + "epoch": 4.2, + "learning_rate": 4.7901390870185456e-05, + "loss": 0.0062, + "step": 23550 + }, + { + "epoch": 4.2, + "learning_rate": 4.790049928673324e-05, + "loss": 0.0094, + "step": 23560 + }, + { + "epoch": 4.2, + "learning_rate": 4.789960770328103e-05, + "loss": 0.0075, + "step": 23570 + }, + { + "epoch": 4.2, + "learning_rate": 4.7898716119828816e-05, + "loss": 0.0055, + "step": 23580 + }, + { + "epoch": 4.21, + "learning_rate": 4.789782453637661e-05, + "loss": 0.0039, + "step": 23590 + }, + { + "epoch": 4.21, + "learning_rate": 4.78969329529244e-05, + "loss": 0.0093, + "step": 23600 + }, + { + "epoch": 4.21, + "learning_rate": 4.789604136947218e-05, + "loss": 0.0067, + "step": 23610 + }, + { + "epoch": 4.21, + "learning_rate": 4.7895149786019974e-05, + "loss": 0.0061, + "step": 23620 + }, + { + "epoch": 4.21, + "learning_rate": 4.789425820256776e-05, + "loss": 0.0089, + "step": 23630 + }, + { + "epoch": 4.22, + "learning_rate": 4.7893366619115556e-05, + "loss": 0.0072, + "step": 23640 + }, + { + "epoch": 4.22, + "learning_rate": 4.789247503566334e-05, + "loss": 0.0079, + "step": 23650 + }, + { + "epoch": 4.22, + "learning_rate": 4.789158345221113e-05, + "loss": 0.0091, + "step": 23660 + }, + { + "epoch": 4.22, + "learning_rate": 4.7890691868758916e-05, + "loss": 0.008, + "step": 23670 + }, + { + "epoch": 4.22, + "learning_rate": 4.788980028530671e-05, + "loss": 0.0069, + "step": 23680 + }, + { + "epoch": 4.22, + "learning_rate": 4.78889087018545e-05, + "loss": 0.0064, + "step": 23690 + }, + { + "epoch": 4.23, + "learning_rate": 4.788801711840228e-05, + "loss": 0.0077, + "step": 23700 + }, + { + "epoch": 4.23, + "learning_rate": 4.7887125534950074e-05, + "loss": 0.0059, + "step": 23710 + }, + { + "epoch": 4.23, + "learning_rate": 4.788623395149786e-05, + "loss": 0.0073, + "step": 23720 + }, + { + "epoch": 4.23, + "learning_rate": 4.788534236804565e-05, + "loss": 0.0045, + "step": 23730 + }, + { + "epoch": 4.23, + "learning_rate": 4.7884450784593434e-05, + "loss": 0.0075, + "step": 23740 + }, + { + "epoch": 4.24, + "learning_rate": 4.788355920114123e-05, + "loss": 0.0099, + "step": 23750 + }, + { + "epoch": 4.24, + "learning_rate": 4.788266761768902e-05, + "loss": 0.0038, + "step": 23760 + }, + { + "epoch": 4.24, + "learning_rate": 4.788177603423681e-05, + "loss": 0.0057, + "step": 23770 + }, + { + "epoch": 4.24, + "learning_rate": 4.78808844507846e-05, + "loss": 0.0072, + "step": 23780 + }, + { + "epoch": 4.24, + "learning_rate": 4.7879992867332384e-05, + "loss": 0.0096, + "step": 23790 + }, + { + "epoch": 4.24, + "learning_rate": 4.7879101283880175e-05, + "loss": 0.0043, + "step": 23800 + }, + { + "epoch": 4.25, + "learning_rate": 4.787820970042796e-05, + "loss": 0.0074, + "step": 23810 + }, + { + "epoch": 4.25, + "learning_rate": 4.787731811697575e-05, + "loss": 0.0084, + "step": 23820 + }, + { + "epoch": 4.25, + "learning_rate": 4.787642653352354e-05, + "loss": 0.0056, + "step": 23830 + }, + { + "epoch": 4.25, + "learning_rate": 4.7875534950071326e-05, + "loss": 0.0079, + "step": 23840 + }, + { + "epoch": 4.25, + "learning_rate": 4.787464336661912e-05, + "loss": 0.0046, + "step": 23850 + }, + { + "epoch": 4.25, + "learning_rate": 4.787375178316691e-05, + "loss": 0.0055, + "step": 23860 + }, + { + "epoch": 4.26, + "learning_rate": 4.78728601997147e-05, + "loss": 0.0081, + "step": 23870 + }, + { + "epoch": 4.26, + "learning_rate": 4.7871968616262484e-05, + "loss": 0.0129, + "step": 23880 + }, + { + "epoch": 4.26, + "learning_rate": 4.7871077032810275e-05, + "loss": 0.0078, + "step": 23890 + }, + { + "epoch": 4.26, + "learning_rate": 4.787018544935806e-05, + "loss": 0.0065, + "step": 23900 + }, + { + "epoch": 4.26, + "learning_rate": 4.786929386590585e-05, + "loss": 0.0058, + "step": 23910 + }, + { + "epoch": 4.27, + "learning_rate": 4.786840228245364e-05, + "loss": 0.0088, + "step": 23920 + }, + { + "epoch": 4.27, + "learning_rate": 4.786751069900143e-05, + "loss": 0.0071, + "step": 23930 + }, + { + "epoch": 4.27, + "learning_rate": 4.786661911554922e-05, + "loss": 0.0057, + "step": 23940 + }, + { + "epoch": 4.27, + "learning_rate": 4.7865727532097e-05, + "loss": 0.0066, + "step": 23950 + }, + { + "epoch": 4.27, + "learning_rate": 4.7864835948644794e-05, + "loss": 0.0053, + "step": 23960 + }, + { + "epoch": 4.27, + "learning_rate": 4.7863944365192585e-05, + "loss": 0.0059, + "step": 23970 + }, + { + "epoch": 4.28, + "learning_rate": 4.7863052781740376e-05, + "loss": 0.005, + "step": 23980 + }, + { + "epoch": 4.28, + "learning_rate": 4.786216119828816e-05, + "loss": 0.0043, + "step": 23990 + }, + { + "epoch": 4.28, + "learning_rate": 4.786126961483595e-05, + "loss": 0.0058, + "step": 24000 + }, + { + "epoch": 4.28, + "learning_rate": 4.786037803138374e-05, + "loss": 0.0054, + "step": 24010 + }, + { + "epoch": 4.28, + "learning_rate": 4.785948644793153e-05, + "loss": 0.0061, + "step": 24020 + }, + { + "epoch": 4.28, + "learning_rate": 4.785859486447932e-05, + "loss": 0.007, + "step": 24030 + }, + { + "epoch": 4.29, + "learning_rate": 4.78577032810271e-05, + "loss": 0.0089, + "step": 24040 + }, + { + "epoch": 4.29, + "learning_rate": 4.7856811697574894e-05, + "loss": 0.009, + "step": 24050 + }, + { + "epoch": 4.29, + "learning_rate": 4.7855920114122685e-05, + "loss": 0.01, + "step": 24060 + }, + { + "epoch": 4.29, + "learning_rate": 4.785502853067047e-05, + "loss": 0.0056, + "step": 24070 + }, + { + "epoch": 4.29, + "learning_rate": 4.785413694721826e-05, + "loss": 0.0055, + "step": 24080 + }, + { + "epoch": 4.3, + "learning_rate": 4.785324536376605e-05, + "loss": 0.0062, + "step": 24090 + }, + { + "epoch": 4.3, + "learning_rate": 4.7852353780313843e-05, + "loss": 0.0076, + "step": 24100 + }, + { + "epoch": 4.3, + "learning_rate": 4.785146219686163e-05, + "loss": 0.0091, + "step": 24110 + }, + { + "epoch": 4.3, + "learning_rate": 4.785057061340942e-05, + "loss": 0.0081, + "step": 24120 + }, + { + "epoch": 4.3, + "learning_rate": 4.7849679029957203e-05, + "loss": 0.0035, + "step": 24130 + }, + { + "epoch": 4.3, + "learning_rate": 4.7848787446504995e-05, + "loss": 0.0057, + "step": 24140 + }, + { + "epoch": 4.31, + "learning_rate": 4.7847895863052786e-05, + "loss": 0.0076, + "step": 24150 + }, + { + "epoch": 4.31, + "learning_rate": 4.784700427960057e-05, + "loss": 0.007, + "step": 24160 + }, + { + "epoch": 4.31, + "learning_rate": 4.784611269614836e-05, + "loss": 0.0072, + "step": 24170 + }, + { + "epoch": 4.31, + "learning_rate": 4.7845221112696146e-05, + "loss": 0.0056, + "step": 24180 + }, + { + "epoch": 4.31, + "learning_rate": 4.7844329529243944e-05, + "loss": 0.0117, + "step": 24190 + }, + { + "epoch": 4.32, + "learning_rate": 4.784343794579173e-05, + "loss": 0.007, + "step": 24200 + }, + { + "epoch": 4.32, + "learning_rate": 4.784254636233952e-05, + "loss": 0.0058, + "step": 24210 + }, + { + "epoch": 4.32, + "learning_rate": 4.7841654778887304e-05, + "loss": 0.008, + "step": 24220 + }, + { + "epoch": 4.32, + "learning_rate": 4.7840763195435095e-05, + "loss": 0.0044, + "step": 24230 + }, + { + "epoch": 4.32, + "learning_rate": 4.7839871611982886e-05, + "loss": 0.0067, + "step": 24240 + }, + { + "epoch": 4.32, + "learning_rate": 4.783898002853067e-05, + "loss": 0.0064, + "step": 24250 + }, + { + "epoch": 4.33, + "learning_rate": 4.783808844507846e-05, + "loss": 0.0084, + "step": 24260 + }, + { + "epoch": 4.33, + "learning_rate": 4.7837196861626247e-05, + "loss": 0.0068, + "step": 24270 + }, + { + "epoch": 4.33, + "learning_rate": 4.783630527817404e-05, + "loss": 0.0073, + "step": 24280 + }, + { + "epoch": 4.33, + "learning_rate": 4.783541369472183e-05, + "loss": 0.0066, + "step": 24290 + }, + { + "epoch": 4.33, + "learning_rate": 4.783452211126962e-05, + "loss": 0.0091, + "step": 24300 + }, + { + "epoch": 4.33, + "learning_rate": 4.7833630527817405e-05, + "loss": 0.0068, + "step": 24310 + }, + { + "epoch": 4.34, + "learning_rate": 4.7832738944365196e-05, + "loss": 0.009, + "step": 24320 + }, + { + "epoch": 4.34, + "learning_rate": 4.783184736091299e-05, + "loss": 0.0074, + "step": 24330 + }, + { + "epoch": 4.34, + "learning_rate": 4.783095577746077e-05, + "loss": 0.0056, + "step": 24340 + }, + { + "epoch": 4.34, + "learning_rate": 4.783006419400856e-05, + "loss": 0.0075, + "step": 24350 + }, + { + "epoch": 4.34, + "learning_rate": 4.782917261055635e-05, + "loss": 0.0039, + "step": 24360 + }, + { + "epoch": 4.35, + "learning_rate": 4.782828102710414e-05, + "loss": 0.007, + "step": 24370 + }, + { + "epoch": 4.35, + "learning_rate": 4.782738944365193e-05, + "loss": 0.0057, + "step": 24380 + }, + { + "epoch": 4.35, + "learning_rate": 4.7826497860199714e-05, + "loss": 0.0024, + "step": 24390 + }, + { + "epoch": 4.35, + "learning_rate": 4.7825606276747505e-05, + "loss": 0.0067, + "step": 24400 + }, + { + "epoch": 4.35, + "learning_rate": 4.7824714693295296e-05, + "loss": 0.0089, + "step": 24410 + }, + { + "epoch": 4.35, + "learning_rate": 4.782382310984309e-05, + "loss": 0.0046, + "step": 24420 + }, + { + "epoch": 4.36, + "learning_rate": 4.782293152639087e-05, + "loss": 0.0081, + "step": 24430 + }, + { + "epoch": 4.36, + "learning_rate": 4.782203994293866e-05, + "loss": 0.0096, + "step": 24440 + }, + { + "epoch": 4.36, + "learning_rate": 4.782114835948645e-05, + "loss": 0.0039, + "step": 24450 + }, + { + "epoch": 4.36, + "learning_rate": 4.782025677603424e-05, + "loss": 0.0053, + "step": 24460 + }, + { + "epoch": 4.36, + "learning_rate": 4.781936519258203e-05, + "loss": 0.0111, + "step": 24470 + }, + { + "epoch": 4.37, + "learning_rate": 4.7818473609129815e-05, + "loss": 0.0041, + "step": 24480 + }, + { + "epoch": 4.37, + "learning_rate": 4.7817582025677606e-05, + "loss": 0.0083, + "step": 24490 + }, + { + "epoch": 4.37, + "learning_rate": 4.7816779600570614e-05, + "loss": 0.0113, + "step": 24500 + }, + { + "epoch": 4.37, + "learning_rate": 4.7815888017118405e-05, + "loss": 0.0046, + "step": 24510 + }, + { + "epoch": 4.37, + "learning_rate": 4.781499643366619e-05, + "loss": 0.0039, + "step": 24520 + }, + { + "epoch": 4.37, + "learning_rate": 4.781410485021398e-05, + "loss": 0.0054, + "step": 24530 + }, + { + "epoch": 4.38, + "learning_rate": 4.781321326676177e-05, + "loss": 0.0062, + "step": 24540 + }, + { + "epoch": 4.38, + "learning_rate": 4.7812321683309556e-05, + "loss": 0.0071, + "step": 24550 + }, + { + "epoch": 4.38, + "learning_rate": 4.7811430099857354e-05, + "loss": 0.0047, + "step": 24560 + }, + { + "epoch": 4.38, + "learning_rate": 4.781053851640514e-05, + "loss": 0.0066, + "step": 24570 + }, + { + "epoch": 4.38, + "learning_rate": 4.780964693295293e-05, + "loss": 0.0051, + "step": 24580 + }, + { + "epoch": 4.38, + "learning_rate": 4.7808755349500714e-05, + "loss": 0.0061, + "step": 24590 + }, + { + "epoch": 4.39, + "learning_rate": 4.7807863766048506e-05, + "loss": 0.01, + "step": 24600 + }, + { + "epoch": 4.39, + "learning_rate": 4.78069721825963e-05, + "loss": 0.0059, + "step": 24610 + }, + { + "epoch": 4.39, + "learning_rate": 4.780608059914408e-05, + "loss": 0.0082, + "step": 24620 + }, + { + "epoch": 4.39, + "learning_rate": 4.780518901569187e-05, + "loss": 0.0046, + "step": 24630 + }, + { + "epoch": 4.39, + "learning_rate": 4.780429743223966e-05, + "loss": 0.0093, + "step": 24640 + }, + { + "epoch": 4.4, + "learning_rate": 4.780340584878745e-05, + "loss": 0.0047, + "step": 24650 + }, + { + "epoch": 4.4, + "learning_rate": 4.780251426533523e-05, + "loss": 0.0073, + "step": 24660 + }, + { + "epoch": 4.4, + "learning_rate": 4.780162268188303e-05, + "loss": 0.0062, + "step": 24670 + }, + { + "epoch": 4.4, + "learning_rate": 4.7800731098430815e-05, + "loss": 0.0081, + "step": 24680 + }, + { + "epoch": 4.4, + "learning_rate": 4.7799839514978606e-05, + "loss": 0.0055, + "step": 24690 + }, + { + "epoch": 4.4, + "learning_rate": 4.77989479315264e-05, + "loss": 0.0069, + "step": 24700 + }, + { + "epoch": 4.41, + "learning_rate": 4.779805634807418e-05, + "loss": 0.0076, + "step": 24710 + }, + { + "epoch": 4.41, + "learning_rate": 4.779716476462197e-05, + "loss": 0.0069, + "step": 24720 + }, + { + "epoch": 4.41, + "learning_rate": 4.779627318116976e-05, + "loss": 0.0082, + "step": 24730 + }, + { + "epoch": 4.41, + "learning_rate": 4.779538159771755e-05, + "loss": 0.0077, + "step": 24740 + }, + { + "epoch": 4.41, + "learning_rate": 4.779449001426533e-05, + "loss": 0.0058, + "step": 24750 + }, + { + "epoch": 4.42, + "learning_rate": 4.7793598430813124e-05, + "loss": 0.008, + "step": 24760 + }, + { + "epoch": 4.42, + "learning_rate": 4.7792706847360915e-05, + "loss": 0.0072, + "step": 24770 + }, + { + "epoch": 4.42, + "learning_rate": 4.779181526390871e-05, + "loss": 0.0088, + "step": 24780 + }, + { + "epoch": 4.42, + "learning_rate": 4.77909236804565e-05, + "loss": 0.0081, + "step": 24790 + }, + { + "epoch": 4.42, + "learning_rate": 4.779003209700428e-05, + "loss": 0.0042, + "step": 24800 + }, + { + "epoch": 4.42, + "learning_rate": 4.7789140513552074e-05, + "loss": 0.0076, + "step": 24810 + }, + { + "epoch": 4.43, + "learning_rate": 4.778824893009986e-05, + "loss": 0.0097, + "step": 24820 + }, + { + "epoch": 4.43, + "learning_rate": 4.778735734664765e-05, + "loss": 0.0103, + "step": 24830 + }, + { + "epoch": 4.43, + "learning_rate": 4.778646576319544e-05, + "loss": 0.0061, + "step": 24840 + }, + { + "epoch": 4.43, + "learning_rate": 4.7785574179743225e-05, + "loss": 0.0069, + "step": 24850 + }, + { + "epoch": 4.43, + "learning_rate": 4.7784682596291016e-05, + "loss": 0.0057, + "step": 24860 + }, + { + "epoch": 4.43, + "learning_rate": 4.7783880171184024e-05, + "loss": 0.0083, + "step": 24870 + }, + { + "epoch": 4.44, + "learning_rate": 4.7782988587731815e-05, + "loss": 0.0047, + "step": 24880 + }, + { + "epoch": 4.44, + "learning_rate": 4.77820970042796e-05, + "loss": 0.0057, + "step": 24890 + }, + { + "epoch": 4.44, + "learning_rate": 4.778120542082739e-05, + "loss": 0.0067, + "step": 24900 + }, + { + "epoch": 4.44, + "learning_rate": 4.778031383737518e-05, + "loss": 0.0067, + "step": 24910 + }, + { + "epoch": 4.44, + "learning_rate": 4.7779422253922967e-05, + "loss": 0.0053, + "step": 24920 + }, + { + "epoch": 4.45, + "learning_rate": 4.777853067047076e-05, + "loss": 0.0065, + "step": 24930 + }, + { + "epoch": 4.45, + "learning_rate": 4.777763908701855e-05, + "loss": 0.0093, + "step": 24940 + }, + { + "epoch": 4.45, + "learning_rate": 4.777674750356634e-05, + "loss": 0.0054, + "step": 24950 + }, + { + "epoch": 4.45, + "learning_rate": 4.7775855920114125e-05, + "loss": 0.0075, + "step": 24960 + }, + { + "epoch": 4.45, + "learning_rate": 4.7774964336661916e-05, + "loss": 0.0059, + "step": 24970 + }, + { + "epoch": 4.45, + "learning_rate": 4.77740727532097e-05, + "loss": 0.008, + "step": 24980 + }, + { + "epoch": 4.46, + "learning_rate": 4.777318116975749e-05, + "loss": 0.0063, + "step": 24990 + }, + { + "epoch": 4.46, + "learning_rate": 4.777228958630528e-05, + "loss": 0.0108, + "step": 25000 + }, + { + "epoch": 4.46, + "learning_rate": 4.777139800285307e-05, + "loss": 0.0067, + "step": 25010 + }, + { + "epoch": 4.46, + "learning_rate": 4.777050641940086e-05, + "loss": 0.0046, + "step": 25020 + }, + { + "epoch": 4.46, + "learning_rate": 4.776961483594864e-05, + "loss": 0.0039, + "step": 25030 + }, + { + "epoch": 4.47, + "learning_rate": 4.7768723252496434e-05, + "loss": 0.0082, + "step": 25040 + }, + { + "epoch": 4.47, + "learning_rate": 4.7767831669044225e-05, + "loss": 0.0039, + "step": 25050 + }, + { + "epoch": 4.47, + "learning_rate": 4.7766940085592016e-05, + "loss": 0.0076, + "step": 25060 + }, + { + "epoch": 4.47, + "learning_rate": 4.77660485021398e-05, + "loss": 0.0063, + "step": 25070 + }, + { + "epoch": 4.47, + "learning_rate": 4.776515691868759e-05, + "loss": 0.0056, + "step": 25080 + }, + { + "epoch": 4.47, + "learning_rate": 4.776426533523538e-05, + "loss": 0.0099, + "step": 25090 + }, + { + "epoch": 4.48, + "learning_rate": 4.776337375178317e-05, + "loss": 0.0078, + "step": 25100 + }, + { + "epoch": 4.48, + "learning_rate": 4.776248216833096e-05, + "loss": 0.0056, + "step": 25110 + }, + { + "epoch": 4.48, + "learning_rate": 4.776159058487874e-05, + "loss": 0.0059, + "step": 25120 + }, + { + "epoch": 4.48, + "learning_rate": 4.7760699001426535e-05, + "loss": 0.0066, + "step": 25130 + }, + { + "epoch": 4.48, + "learning_rate": 4.7759807417974326e-05, + "loss": 0.0059, + "step": 25140 + }, + { + "epoch": 4.48, + "learning_rate": 4.775891583452211e-05, + "loss": 0.0063, + "step": 25150 + }, + { + "epoch": 4.49, + "learning_rate": 4.775802425106991e-05, + "loss": 0.0053, + "step": 25160 + }, + { + "epoch": 4.49, + "learning_rate": 4.775713266761769e-05, + "loss": 0.0062, + "step": 25170 + }, + { + "epoch": 4.49, + "learning_rate": 4.7756241084165484e-05, + "loss": 0.0118, + "step": 25180 + }, + { + "epoch": 4.49, + "learning_rate": 4.775534950071327e-05, + "loss": 0.0072, + "step": 25190 + }, + { + "epoch": 4.49, + "learning_rate": 4.775445791726106e-05, + "loss": 0.0049, + "step": 25200 + }, + { + "epoch": 4.5, + "learning_rate": 4.7753566333808844e-05, + "loss": 0.0076, + "step": 25210 + }, + { + "epoch": 4.5, + "learning_rate": 4.7752674750356635e-05, + "loss": 0.0102, + "step": 25220 + }, + { + "epoch": 4.5, + "learning_rate": 4.7751783166904426e-05, + "loss": 0.0071, + "step": 25230 + }, + { + "epoch": 4.5, + "learning_rate": 4.775089158345221e-05, + "loss": 0.0033, + "step": 25240 + }, + { + "epoch": 4.5, + "learning_rate": 4.775e-05, + "loss": 0.0055, + "step": 25250 + }, + { + "epoch": 4.5, + "learning_rate": 4.7749108416547786e-05, + "loss": 0.0058, + "step": 25260 + }, + { + "epoch": 4.51, + "learning_rate": 4.7748216833095584e-05, + "loss": 0.0062, + "step": 25270 + }, + { + "epoch": 4.51, + "learning_rate": 4.774732524964337e-05, + "loss": 0.0038, + "step": 25280 + }, + { + "epoch": 4.51, + "learning_rate": 4.774643366619116e-05, + "loss": 0.0066, + "step": 25290 + }, + { + "epoch": 4.51, + "learning_rate": 4.7745542082738944e-05, + "loss": 0.0059, + "step": 25300 + }, + { + "epoch": 4.51, + "learning_rate": 4.7744650499286736e-05, + "loss": 0.0066, + "step": 25310 + }, + { + "epoch": 4.51, + "learning_rate": 4.774375891583453e-05, + "loss": 0.0047, + "step": 25320 + }, + { + "epoch": 4.52, + "learning_rate": 4.774286733238231e-05, + "loss": 0.0063, + "step": 25330 + }, + { + "epoch": 4.52, + "learning_rate": 4.77419757489301e-05, + "loss": 0.0057, + "step": 25340 + }, + { + "epoch": 4.52, + "learning_rate": 4.774108416547789e-05, + "loss": 0.0073, + "step": 25350 + }, + { + "epoch": 4.52, + "learning_rate": 4.774019258202568e-05, + "loss": 0.0078, + "step": 25360 + }, + { + "epoch": 4.52, + "learning_rate": 4.773930099857347e-05, + "loss": 0.0059, + "step": 25370 + }, + { + "epoch": 4.53, + "learning_rate": 4.773840941512126e-05, + "loss": 0.0078, + "step": 25380 + }, + { + "epoch": 4.53, + "learning_rate": 4.773751783166905e-05, + "loss": 0.0063, + "step": 25390 + }, + { + "epoch": 4.53, + "learning_rate": 4.7736626248216836e-05, + "loss": 0.006, + "step": 25400 + }, + { + "epoch": 4.53, + "learning_rate": 4.773573466476463e-05, + "loss": 0.0055, + "step": 25410 + }, + { + "epoch": 4.53, + "learning_rate": 4.773484308131241e-05, + "loss": 0.0081, + "step": 25420 + }, + { + "epoch": 4.53, + "learning_rate": 4.77339514978602e-05, + "loss": 0.0054, + "step": 25430 + }, + { + "epoch": 4.54, + "learning_rate": 4.773305991440799e-05, + "loss": 0.0066, + "step": 25440 + }, + { + "epoch": 4.54, + "learning_rate": 4.773216833095578e-05, + "loss": 0.0088, + "step": 25450 + }, + { + "epoch": 4.54, + "learning_rate": 4.773127674750357e-05, + "loss": 0.0059, + "step": 25460 + }, + { + "epoch": 4.54, + "learning_rate": 4.7730385164051354e-05, + "loss": 0.0082, + "step": 25470 + }, + { + "epoch": 4.54, + "learning_rate": 4.7729493580599146e-05, + "loss": 0.0062, + "step": 25480 + }, + { + "epoch": 4.55, + "learning_rate": 4.772860199714694e-05, + "loss": 0.004, + "step": 25490 + }, + { + "epoch": 4.55, + "learning_rate": 4.772771041369473e-05, + "loss": 0.0062, + "step": 25500 + }, + { + "epoch": 4.55, + "learning_rate": 4.772681883024251e-05, + "loss": 0.0087, + "step": 25510 + }, + { + "epoch": 4.55, + "learning_rate": 4.7725927246790304e-05, + "loss": 0.0075, + "step": 25520 + }, + { + "epoch": 4.55, + "learning_rate": 4.772503566333809e-05, + "loss": 0.0046, + "step": 25530 + }, + { + "epoch": 4.55, + "learning_rate": 4.772414407988588e-05, + "loss": 0.0054, + "step": 25540 + }, + { + "epoch": 4.56, + "learning_rate": 4.772325249643367e-05, + "loss": 0.0075, + "step": 25550 + }, + { + "epoch": 4.56, + "learning_rate": 4.7722360912981455e-05, + "loss": 0.0062, + "step": 25560 + }, + { + "epoch": 4.56, + "learning_rate": 4.7721469329529246e-05, + "loss": 0.0052, + "step": 25570 + }, + { + "epoch": 4.56, + "learning_rate": 4.772057774607703e-05, + "loss": 0.0074, + "step": 25580 + }, + { + "epoch": 4.56, + "learning_rate": 4.771968616262482e-05, + "loss": 0.0065, + "step": 25590 + }, + { + "epoch": 4.56, + "learning_rate": 4.771879457917261e-05, + "loss": 0.0095, + "step": 25600 + }, + { + "epoch": 4.57, + "learning_rate": 4.7717902995720404e-05, + "loss": 0.0031, + "step": 25610 + }, + { + "epoch": 4.57, + "learning_rate": 4.7717011412268195e-05, + "loss": 0.0044, + "step": 25620 + }, + { + "epoch": 4.57, + "learning_rate": 4.771611982881598e-05, + "loss": 0.0077, + "step": 25630 + }, + { + "epoch": 4.57, + "learning_rate": 4.771522824536377e-05, + "loss": 0.005, + "step": 25640 + }, + { + "epoch": 4.57, + "learning_rate": 4.7714336661911555e-05, + "loss": 0.0058, + "step": 25650 + }, + { + "epoch": 4.58, + "learning_rate": 4.771344507845935e-05, + "loss": 0.0057, + "step": 25660 + }, + { + "epoch": 4.58, + "learning_rate": 4.771255349500713e-05, + "loss": 0.0076, + "step": 25670 + }, + { + "epoch": 4.58, + "learning_rate": 4.771166191155492e-05, + "loss": 0.0065, + "step": 25680 + }, + { + "epoch": 4.58, + "learning_rate": 4.7710770328102714e-05, + "loss": 0.0048, + "step": 25690 + }, + { + "epoch": 4.58, + "learning_rate": 4.77098787446505e-05, + "loss": 0.0058, + "step": 25700 + }, + { + "epoch": 4.58, + "learning_rate": 4.7708987161198296e-05, + "loss": 0.0049, + "step": 25710 + }, + { + "epoch": 4.59, + "learning_rate": 4.770809557774608e-05, + "loss": 0.0087, + "step": 25720 + }, + { + "epoch": 4.59, + "learning_rate": 4.770720399429387e-05, + "loss": 0.0094, + "step": 25730 + }, + { + "epoch": 4.59, + "learning_rate": 4.7706312410841656e-05, + "loss": 0.0038, + "step": 25740 + }, + { + "epoch": 4.59, + "learning_rate": 4.770542082738945e-05, + "loss": 0.0074, + "step": 25750 + }, + { + "epoch": 4.59, + "learning_rate": 4.770452924393723e-05, + "loss": 0.0053, + "step": 25760 + }, + { + "epoch": 4.6, + "learning_rate": 4.770363766048502e-05, + "loss": 0.006, + "step": 25770 + }, + { + "epoch": 4.6, + "learning_rate": 4.7702746077032814e-05, + "loss": 0.0068, + "step": 25780 + }, + { + "epoch": 4.6, + "learning_rate": 4.77018544935806e-05, + "loss": 0.0045, + "step": 25790 + }, + { + "epoch": 4.6, + "learning_rate": 4.770096291012839e-05, + "loss": 0.0088, + "step": 25800 + }, + { + "epoch": 4.6, + "learning_rate": 4.7700071326676174e-05, + "loss": 0.0066, + "step": 25810 + }, + { + "epoch": 4.6, + "learning_rate": 4.769917974322397e-05, + "loss": 0.009, + "step": 25820 + }, + { + "epoch": 4.61, + "learning_rate": 4.7698288159771757e-05, + "loss": 0.0052, + "step": 25830 + }, + { + "epoch": 4.61, + "learning_rate": 4.769739657631955e-05, + "loss": 0.0061, + "step": 25840 + }, + { + "epoch": 4.61, + "learning_rate": 4.769650499286734e-05, + "loss": 0.0061, + "step": 25850 + }, + { + "epoch": 4.61, + "learning_rate": 4.7695613409415123e-05, + "loss": 0.0052, + "step": 25860 + }, + { + "epoch": 4.61, + "learning_rate": 4.7694721825962915e-05, + "loss": 0.007, + "step": 25870 + }, + { + "epoch": 4.61, + "learning_rate": 4.76938302425107e-05, + "loss": 0.0031, + "step": 25880 + }, + { + "epoch": 4.62, + "learning_rate": 4.769293865905849e-05, + "loss": 0.0069, + "step": 25890 + }, + { + "epoch": 4.62, + "learning_rate": 4.7692047075606275e-05, + "loss": 0.0038, + "step": 25900 + }, + { + "epoch": 4.62, + "learning_rate": 4.7691155492154066e-05, + "loss": 0.006, + "step": 25910 + }, + { + "epoch": 4.62, + "learning_rate": 4.769026390870186e-05, + "loss": 0.0071, + "step": 25920 + }, + { + "epoch": 4.62, + "learning_rate": 4.768937232524965e-05, + "loss": 0.0047, + "step": 25930 + }, + { + "epoch": 4.63, + "learning_rate": 4.768848074179744e-05, + "loss": 0.0085, + "step": 25940 + }, + { + "epoch": 4.63, + "learning_rate": 4.7687589158345224e-05, + "loss": 0.0089, + "step": 25950 + }, + { + "epoch": 4.63, + "learning_rate": 4.7686697574893015e-05, + "loss": 0.0101, + "step": 25960 + }, + { + "epoch": 4.63, + "learning_rate": 4.76858059914408e-05, + "loss": 0.0068, + "step": 25970 + }, + { + "epoch": 4.63, + "learning_rate": 4.768491440798859e-05, + "loss": 0.0083, + "step": 25980 + }, + { + "epoch": 4.63, + "learning_rate": 4.7684022824536375e-05, + "loss": 0.0069, + "step": 25990 + }, + { + "epoch": 4.64, + "learning_rate": 4.7683131241084166e-05, + "loss": 0.0081, + "step": 26000 + }, + { + "epoch": 4.64, + "learning_rate": 4.768223965763196e-05, + "loss": 0.0059, + "step": 26010 + }, + { + "epoch": 4.64, + "learning_rate": 4.768134807417974e-05, + "loss": 0.0071, + "step": 26020 + }, + { + "epoch": 4.64, + "learning_rate": 4.768045649072753e-05, + "loss": 0.0036, + "step": 26030 + }, + { + "epoch": 4.64, + "learning_rate": 4.7679564907275325e-05, + "loss": 0.0062, + "step": 26040 + }, + { + "epoch": 4.65, + "learning_rate": 4.7678673323823116e-05, + "loss": 0.0069, + "step": 26050 + }, + { + "epoch": 4.65, + "learning_rate": 4.76777817403709e-05, + "loss": 0.0056, + "step": 26060 + }, + { + "epoch": 4.65, + "learning_rate": 4.767689015691869e-05, + "loss": 0.0077, + "step": 26070 + }, + { + "epoch": 4.65, + "learning_rate": 4.767599857346648e-05, + "loss": 0.0079, + "step": 26080 + }, + { + "epoch": 4.65, + "learning_rate": 4.767510699001427e-05, + "loss": 0.0056, + "step": 26090 + }, + { + "epoch": 4.65, + "learning_rate": 4.767421540656206e-05, + "loss": 0.0058, + "step": 26100 + }, + { + "epoch": 4.66, + "learning_rate": 4.767332382310984e-05, + "loss": 0.0063, + "step": 26110 + }, + { + "epoch": 4.66, + "learning_rate": 4.7672432239657634e-05, + "loss": 0.0055, + "step": 26120 + }, + { + "epoch": 4.66, + "learning_rate": 4.767154065620542e-05, + "loss": 0.0087, + "step": 26130 + }, + { + "epoch": 4.66, + "learning_rate": 4.767064907275321e-05, + "loss": 0.0074, + "step": 26140 + }, + { + "epoch": 4.66, + "learning_rate": 4.7669757489301e-05, + "loss": 0.007, + "step": 26150 + }, + { + "epoch": 4.66, + "learning_rate": 4.766886590584879e-05, + "loss": 0.0088, + "step": 26160 + }, + { + "epoch": 4.67, + "learning_rate": 4.766797432239658e-05, + "loss": 0.0076, + "step": 26170 + }, + { + "epoch": 4.67, + "learning_rate": 4.766708273894437e-05, + "loss": 0.0092, + "step": 26180 + }, + { + "epoch": 4.67, + "learning_rate": 4.766619115549216e-05, + "loss": 0.006, + "step": 26190 + }, + { + "epoch": 4.67, + "learning_rate": 4.766529957203994e-05, + "loss": 0.01, + "step": 26200 + }, + { + "epoch": 4.67, + "learning_rate": 4.7664407988587734e-05, + "loss": 0.007, + "step": 26210 + }, + { + "epoch": 4.68, + "learning_rate": 4.766351640513552e-05, + "loss": 0.004, + "step": 26220 + }, + { + "epoch": 4.68, + "learning_rate": 4.766262482168331e-05, + "loss": 0.0088, + "step": 26230 + }, + { + "epoch": 4.68, + "learning_rate": 4.76617332382311e-05, + "loss": 0.0081, + "step": 26240 + }, + { + "epoch": 4.68, + "learning_rate": 4.7660841654778886e-05, + "loss": 0.0062, + "step": 26250 + }, + { + "epoch": 4.68, + "learning_rate": 4.7659950071326684e-05, + "loss": 0.0079, + "step": 26260 + }, + { + "epoch": 4.68, + "learning_rate": 4.765905848787447e-05, + "loss": 0.0067, + "step": 26270 + }, + { + "epoch": 4.69, + "learning_rate": 4.765816690442226e-05, + "loss": 0.0056, + "step": 26280 + }, + { + "epoch": 4.69, + "learning_rate": 4.7657275320970044e-05, + "loss": 0.0081, + "step": 26290 + }, + { + "epoch": 4.69, + "learning_rate": 4.7656383737517835e-05, + "loss": 0.0064, + "step": 26300 + }, + { + "epoch": 4.69, + "learning_rate": 4.7655492154065626e-05, + "loss": 0.0071, + "step": 26310 + }, + { + "epoch": 4.69, + "learning_rate": 4.765460057061341e-05, + "loss": 0.0056, + "step": 26320 + }, + { + "epoch": 4.7, + "learning_rate": 4.76537089871612e-05, + "loss": 0.0054, + "step": 26330 + }, + { + "epoch": 4.7, + "learning_rate": 4.7652817403708986e-05, + "loss": 0.0065, + "step": 26340 + }, + { + "epoch": 4.7, + "learning_rate": 4.765192582025678e-05, + "loss": 0.0059, + "step": 26350 + }, + { + "epoch": 4.7, + "learning_rate": 4.765103423680456e-05, + "loss": 0.01, + "step": 26360 + }, + { + "epoch": 4.7, + "learning_rate": 4.765014265335236e-05, + "loss": 0.0055, + "step": 26370 + }, + { + "epoch": 4.7, + "learning_rate": 4.7649251069900144e-05, + "loss": 0.0065, + "step": 26380 + }, + { + "epoch": 4.71, + "learning_rate": 4.7648359486447936e-05, + "loss": 0.0057, + "step": 26390 + }, + { + "epoch": 4.71, + "learning_rate": 4.764746790299573e-05, + "loss": 0.0066, + "step": 26400 + }, + { + "epoch": 4.71, + "learning_rate": 4.764657631954351e-05, + "loss": 0.0067, + "step": 26410 + }, + { + "epoch": 4.71, + "learning_rate": 4.76456847360913e-05, + "loss": 0.0067, + "step": 26420 + }, + { + "epoch": 4.71, + "learning_rate": 4.764479315263909e-05, + "loss": 0.0065, + "step": 26430 + }, + { + "epoch": 4.71, + "learning_rate": 4.764390156918688e-05, + "loss": 0.0083, + "step": 26440 + }, + { + "epoch": 4.72, + "learning_rate": 4.764300998573466e-05, + "loss": 0.0083, + "step": 26450 + }, + { + "epoch": 4.72, + "learning_rate": 4.7642118402282454e-05, + "loss": 0.0071, + "step": 26460 + }, + { + "epoch": 4.72, + "learning_rate": 4.7641226818830245e-05, + "loss": 0.0047, + "step": 26470 + }, + { + "epoch": 4.72, + "learning_rate": 4.7640335235378036e-05, + "loss": 0.0052, + "step": 26480 + }, + { + "epoch": 4.72, + "learning_rate": 4.763944365192583e-05, + "loss": 0.0044, + "step": 26490 + }, + { + "epoch": 4.73, + "learning_rate": 4.763855206847361e-05, + "loss": 0.0066, + "step": 26500 + }, + { + "epoch": 4.73, + "learning_rate": 4.76376604850214e-05, + "loss": 0.0075, + "step": 26510 + }, + { + "epoch": 4.73, + "learning_rate": 4.763676890156919e-05, + "loss": 0.0065, + "step": 26520 + }, + { + "epoch": 4.73, + "learning_rate": 4.763587731811698e-05, + "loss": 0.011, + "step": 26530 + }, + { + "epoch": 4.73, + "learning_rate": 4.763498573466477e-05, + "loss": 0.0061, + "step": 26540 + }, + { + "epoch": 4.73, + "learning_rate": 4.7634094151212554e-05, + "loss": 0.0045, + "step": 26550 + }, + { + "epoch": 4.74, + "learning_rate": 4.7633202567760345e-05, + "loss": 0.0065, + "step": 26560 + }, + { + "epoch": 4.74, + "learning_rate": 4.763231098430813e-05, + "loss": 0.0059, + "step": 26570 + }, + { + "epoch": 4.74, + "learning_rate": 4.763141940085592e-05, + "loss": 0.006, + "step": 26580 + }, + { + "epoch": 4.74, + "learning_rate": 4.763052781740371e-05, + "loss": 0.005, + "step": 26590 + }, + { + "epoch": 4.74, + "learning_rate": 4.7629636233951504e-05, + "loss": 0.0071, + "step": 26600 + }, + { + "epoch": 4.75, + "learning_rate": 4.762874465049929e-05, + "loss": 0.0033, + "step": 26610 + }, + { + "epoch": 4.75, + "learning_rate": 4.762785306704708e-05, + "loss": 0.0097, + "step": 26620 + }, + { + "epoch": 4.75, + "learning_rate": 4.762696148359487e-05, + "loss": 0.0078, + "step": 26630 + }, + { + "epoch": 4.75, + "learning_rate": 4.7626069900142655e-05, + "loss": 0.0051, + "step": 26640 + }, + { + "epoch": 4.75, + "learning_rate": 4.7625178316690446e-05, + "loss": 0.0058, + "step": 26650 + }, + { + "epoch": 4.75, + "learning_rate": 4.762428673323823e-05, + "loss": 0.0059, + "step": 26660 + }, + { + "epoch": 4.76, + "learning_rate": 4.762339514978602e-05, + "loss": 0.0081, + "step": 26670 + }, + { + "epoch": 4.76, + "learning_rate": 4.7622503566333806e-05, + "loss": 0.0084, + "step": 26680 + }, + { + "epoch": 4.76, + "learning_rate": 4.76216119828816e-05, + "loss": 0.0075, + "step": 26690 + }, + { + "epoch": 4.76, + "learning_rate": 4.762072039942939e-05, + "loss": 0.005, + "step": 26700 + }, + { + "epoch": 4.76, + "learning_rate": 4.761982881597718e-05, + "loss": 0.0068, + "step": 26710 + }, + { + "epoch": 4.76, + "learning_rate": 4.761893723252497e-05, + "loss": 0.0051, + "step": 26720 + }, + { + "epoch": 4.77, + "learning_rate": 4.7618045649072755e-05, + "loss": 0.0094, + "step": 26730 + }, + { + "epoch": 4.77, + "learning_rate": 4.761715406562055e-05, + "loss": 0.0066, + "step": 26740 + }, + { + "epoch": 4.77, + "learning_rate": 4.761626248216833e-05, + "loss": 0.0052, + "step": 26750 + }, + { + "epoch": 4.77, + "learning_rate": 4.761537089871612e-05, + "loss": 0.0095, + "step": 26760 + }, + { + "epoch": 4.77, + "learning_rate": 4.7614479315263913e-05, + "loss": 0.0061, + "step": 26770 + }, + { + "epoch": 4.78, + "learning_rate": 4.76135877318117e-05, + "loss": 0.0069, + "step": 26780 + }, + { + "epoch": 4.78, + "learning_rate": 4.761269614835949e-05, + "loss": 0.0072, + "step": 26790 + }, + { + "epoch": 4.78, + "learning_rate": 4.7611804564907274e-05, + "loss": 0.0092, + "step": 26800 + }, + { + "epoch": 4.78, + "learning_rate": 4.761091298145507e-05, + "loss": 0.0081, + "step": 26810 + }, + { + "epoch": 4.78, + "learning_rate": 4.7610021398002856e-05, + "loss": 0.0061, + "step": 26820 + }, + { + "epoch": 4.78, + "learning_rate": 4.760912981455065e-05, + "loss": 0.0083, + "step": 26830 + }, + { + "epoch": 4.79, + "learning_rate": 4.760823823109843e-05, + "loss": 0.0071, + "step": 26840 + }, + { + "epoch": 4.79, + "learning_rate": 4.760734664764622e-05, + "loss": 0.0051, + "step": 26850 + }, + { + "epoch": 4.79, + "learning_rate": 4.7606455064194014e-05, + "loss": 0.0086, + "step": 26860 + }, + { + "epoch": 4.79, + "learning_rate": 4.76055634807418e-05, + "loss": 0.0088, + "step": 26870 + }, + { + "epoch": 4.79, + "learning_rate": 4.760467189728959e-05, + "loss": 0.0038, + "step": 26880 + }, + { + "epoch": 4.79, + "learning_rate": 4.7603780313837374e-05, + "loss": 0.0062, + "step": 26890 + }, + { + "epoch": 4.8, + "learning_rate": 4.7602888730385165e-05, + "loss": 0.0052, + "step": 26900 + }, + { + "epoch": 4.8, + "learning_rate": 4.760199714693295e-05, + "loss": 0.0065, + "step": 26910 + }, + { + "epoch": 4.8, + "learning_rate": 4.760110556348075e-05, + "loss": 0.0042, + "step": 26920 + }, + { + "epoch": 4.8, + "learning_rate": 4.760021398002853e-05, + "loss": 0.0064, + "step": 26930 + }, + { + "epoch": 4.8, + "learning_rate": 4.759932239657632e-05, + "loss": 0.0056, + "step": 26940 + }, + { + "epoch": 4.81, + "learning_rate": 4.7598430813124115e-05, + "loss": 0.007, + "step": 26950 + }, + { + "epoch": 4.81, + "learning_rate": 4.75975392296719e-05, + "loss": 0.0079, + "step": 26960 + }, + { + "epoch": 4.81, + "learning_rate": 4.759664764621969e-05, + "loss": 0.0095, + "step": 26970 + }, + { + "epoch": 4.81, + "learning_rate": 4.7595756062767475e-05, + "loss": 0.0045, + "step": 26980 + }, + { + "epoch": 4.81, + "learning_rate": 4.7594864479315266e-05, + "loss": 0.0081, + "step": 26990 + }, + { + "epoch": 4.81, + "learning_rate": 4.759397289586306e-05, + "loss": 0.0054, + "step": 27000 + }, + { + "epoch": 4.82, + "learning_rate": 4.759308131241084e-05, + "loss": 0.0048, + "step": 27010 + }, + { + "epoch": 4.82, + "learning_rate": 4.759218972895863e-05, + "loss": 0.0091, + "step": 27020 + }, + { + "epoch": 4.82, + "learning_rate": 4.7591298145506424e-05, + "loss": 0.0064, + "step": 27030 + }, + { + "epoch": 4.82, + "learning_rate": 4.7590406562054215e-05, + "loss": 0.0034, + "step": 27040 + }, + { + "epoch": 4.82, + "learning_rate": 4.7589514978602e-05, + "loss": 0.0069, + "step": 27050 + }, + { + "epoch": 4.83, + "learning_rate": 4.758862339514979e-05, + "loss": 0.0058, + "step": 27060 + }, + { + "epoch": 4.83, + "learning_rate": 4.7587731811697575e-05, + "loss": 0.0078, + "step": 27070 + }, + { + "epoch": 4.83, + "learning_rate": 4.7586840228245366e-05, + "loss": 0.0044, + "step": 27080 + }, + { + "epoch": 4.83, + "learning_rate": 4.758594864479316e-05, + "loss": 0.0056, + "step": 27090 + }, + { + "epoch": 4.83, + "learning_rate": 4.758505706134094e-05, + "loss": 0.0044, + "step": 27100 + }, + { + "epoch": 4.83, + "learning_rate": 4.758416547788873e-05, + "loss": 0.0052, + "step": 27110 + }, + { + "epoch": 4.84, + "learning_rate": 4.758327389443652e-05, + "loss": 0.0087, + "step": 27120 + }, + { + "epoch": 4.84, + "learning_rate": 4.758238231098431e-05, + "loss": 0.0048, + "step": 27130 + }, + { + "epoch": 4.84, + "learning_rate": 4.75814907275321e-05, + "loss": 0.0092, + "step": 27140 + }, + { + "epoch": 4.84, + "learning_rate": 4.758059914407989e-05, + "loss": 0.0054, + "step": 27150 + }, + { + "epoch": 4.84, + "learning_rate": 4.7579707560627676e-05, + "loss": 0.0053, + "step": 27160 + }, + { + "epoch": 4.84, + "learning_rate": 4.757881597717547e-05, + "loss": 0.0073, + "step": 27170 + }, + { + "epoch": 4.85, + "learning_rate": 4.757792439372326e-05, + "loss": 0.0075, + "step": 27180 + }, + { + "epoch": 4.85, + "learning_rate": 4.757703281027104e-05, + "loss": 0.0104, + "step": 27190 + }, + { + "epoch": 4.85, + "learning_rate": 4.7576141226818834e-05, + "loss": 0.0084, + "step": 27200 + }, + { + "epoch": 4.85, + "learning_rate": 4.757524964336662e-05, + "loss": 0.0062, + "step": 27210 + }, + { + "epoch": 4.85, + "learning_rate": 4.757435805991441e-05, + "loss": 0.0054, + "step": 27220 + }, + { + "epoch": 4.86, + "learning_rate": 4.75734664764622e-05, + "loss": 0.005, + "step": 27230 + }, + { + "epoch": 4.86, + "learning_rate": 4.7572574893009985e-05, + "loss": 0.0044, + "step": 27240 + }, + { + "epoch": 4.86, + "learning_rate": 4.7571683309557776e-05, + "loss": 0.0054, + "step": 27250 + }, + { + "epoch": 4.86, + "learning_rate": 4.757079172610557e-05, + "loss": 0.0105, + "step": 27260 + }, + { + "epoch": 4.86, + "learning_rate": 4.756990014265336e-05, + "loss": 0.0056, + "step": 27270 + }, + { + "epoch": 4.86, + "learning_rate": 4.756900855920114e-05, + "loss": 0.0053, + "step": 27280 + }, + { + "epoch": 4.87, + "learning_rate": 4.7568116975748934e-05, + "loss": 0.0069, + "step": 27290 + }, + { + "epoch": 4.87, + "learning_rate": 4.756722539229672e-05, + "loss": 0.0052, + "step": 27300 + }, + { + "epoch": 4.87, + "learning_rate": 4.756633380884451e-05, + "loss": 0.0042, + "step": 27310 + }, + { + "epoch": 4.87, + "learning_rate": 4.75654422253923e-05, + "loss": 0.0069, + "step": 27320 + }, + { + "epoch": 4.87, + "learning_rate": 4.7564550641940086e-05, + "loss": 0.0044, + "step": 27330 + }, + { + "epoch": 4.88, + "learning_rate": 4.756365905848788e-05, + "loss": 0.0094, + "step": 27340 + }, + { + "epoch": 4.88, + "learning_rate": 4.756276747503566e-05, + "loss": 0.0048, + "step": 27350 + }, + { + "epoch": 4.88, + "learning_rate": 4.756187589158346e-05, + "loss": 0.0083, + "step": 27360 + }, + { + "epoch": 4.88, + "learning_rate": 4.7560984308131244e-05, + "loss": 0.0079, + "step": 27370 + }, + { + "epoch": 4.88, + "learning_rate": 4.7560092724679035e-05, + "loss": 0.0056, + "step": 27380 + }, + { + "epoch": 4.88, + "learning_rate": 4.755920114122682e-05, + "loss": 0.0051, + "step": 27390 + }, + { + "epoch": 4.89, + "learning_rate": 4.755830955777461e-05, + "loss": 0.0098, + "step": 27400 + }, + { + "epoch": 4.89, + "learning_rate": 4.75574179743224e-05, + "loss": 0.007, + "step": 27410 + }, + { + "epoch": 4.89, + "learning_rate": 4.7556526390870186e-05, + "loss": 0.0055, + "step": 27420 + }, + { + "epoch": 4.89, + "learning_rate": 4.755563480741798e-05, + "loss": 0.0063, + "step": 27430 + }, + { + "epoch": 4.89, + "learning_rate": 4.755474322396576e-05, + "loss": 0.0041, + "step": 27440 + }, + { + "epoch": 4.89, + "learning_rate": 4.755385164051355e-05, + "loss": 0.0045, + "step": 27450 + }, + { + "epoch": 4.9, + "learning_rate": 4.755296005706134e-05, + "loss": 0.0072, + "step": 27460 + }, + { + "epoch": 4.9, + "learning_rate": 4.7552068473609136e-05, + "loss": 0.0093, + "step": 27470 + }, + { + "epoch": 4.9, + "learning_rate": 4.755117689015692e-05, + "loss": 0.0062, + "step": 27480 + }, + { + "epoch": 4.9, + "learning_rate": 4.755028530670471e-05, + "loss": 0.0056, + "step": 27490 + }, + { + "epoch": 4.9, + "learning_rate": 4.75493937232525e-05, + "loss": 0.0072, + "step": 27500 + }, + { + "epoch": 4.91, + "learning_rate": 4.754850213980029e-05, + "loss": 0.0088, + "step": 27510 + }, + { + "epoch": 4.91, + "learning_rate": 4.754761055634808e-05, + "loss": 0.006, + "step": 27520 + }, + { + "epoch": 4.91, + "learning_rate": 4.754671897289586e-05, + "loss": 0.0068, + "step": 27530 + }, + { + "epoch": 4.91, + "learning_rate": 4.7545827389443654e-05, + "loss": 0.0053, + "step": 27540 + }, + { + "epoch": 4.91, + "learning_rate": 4.7544935805991445e-05, + "loss": 0.005, + "step": 27550 + }, + { + "epoch": 4.91, + "learning_rate": 4.754404422253923e-05, + "loss": 0.0056, + "step": 27560 + }, + { + "epoch": 4.92, + "learning_rate": 4.754315263908702e-05, + "loss": 0.0081, + "step": 27570 + }, + { + "epoch": 4.92, + "learning_rate": 4.7542261055634805e-05, + "loss": 0.0073, + "step": 27580 + }, + { + "epoch": 4.92, + "learning_rate": 4.75413694721826e-05, + "loss": 0.0072, + "step": 27590 + }, + { + "epoch": 4.92, + "learning_rate": 4.754047788873039e-05, + "loss": 0.0069, + "step": 27600 + }, + { + "epoch": 4.92, + "learning_rate": 4.753958630527818e-05, + "loss": 0.0091, + "step": 27610 + }, + { + "epoch": 4.93, + "learning_rate": 4.753869472182596e-05, + "loss": 0.0064, + "step": 27620 + }, + { + "epoch": 4.93, + "learning_rate": 4.7537803138373754e-05, + "loss": 0.0063, + "step": 27630 + }, + { + "epoch": 4.93, + "learning_rate": 4.7536911554921545e-05, + "loss": 0.0038, + "step": 27640 + }, + { + "epoch": 4.93, + "learning_rate": 4.753601997146933e-05, + "loss": 0.0088, + "step": 27650 + }, + { + "epoch": 4.93, + "learning_rate": 4.753512838801712e-05, + "loss": 0.0055, + "step": 27660 + }, + { + "epoch": 4.93, + "learning_rate": 4.7534236804564906e-05, + "loss": 0.007, + "step": 27670 + }, + { + "epoch": 4.94, + "learning_rate": 4.75333452211127e-05, + "loss": 0.0056, + "step": 27680 + }, + { + "epoch": 4.94, + "learning_rate": 4.753245363766048e-05, + "loss": 0.0053, + "step": 27690 + }, + { + "epoch": 4.94, + "learning_rate": 4.753156205420828e-05, + "loss": 0.008, + "step": 27700 + }, + { + "epoch": 4.94, + "learning_rate": 4.7530670470756064e-05, + "loss": 0.0067, + "step": 27710 + }, + { + "epoch": 4.94, + "learning_rate": 4.7529778887303855e-05, + "loss": 0.0072, + "step": 27720 + }, + { + "epoch": 4.94, + "learning_rate": 4.7528887303851646e-05, + "loss": 0.0063, + "step": 27730 + }, + { + "epoch": 4.95, + "learning_rate": 4.752799572039943e-05, + "loss": 0.011, + "step": 27740 + }, + { + "epoch": 4.95, + "learning_rate": 4.752710413694722e-05, + "loss": 0.0062, + "step": 27750 + }, + { + "epoch": 4.95, + "learning_rate": 4.7526212553495006e-05, + "loss": 0.0055, + "step": 27760 + }, + { + "epoch": 4.95, + "learning_rate": 4.75253209700428e-05, + "loss": 0.0054, + "step": 27770 + }, + { + "epoch": 4.95, + "learning_rate": 4.752442938659059e-05, + "loss": 0.0077, + "step": 27780 + }, + { + "epoch": 4.96, + "learning_rate": 4.752353780313837e-05, + "loss": 0.0058, + "step": 27790 + }, + { + "epoch": 4.96, + "learning_rate": 4.7522646219686164e-05, + "loss": 0.005, + "step": 27800 + }, + { + "epoch": 4.96, + "learning_rate": 4.7521754636233955e-05, + "loss": 0.0054, + "step": 27810 + }, + { + "epoch": 4.96, + "learning_rate": 4.7520863052781747e-05, + "loss": 0.0034, + "step": 27820 + }, + { + "epoch": 4.96, + "learning_rate": 4.751997146932953e-05, + "loss": 0.0051, + "step": 27830 + }, + { + "epoch": 4.96, + "learning_rate": 4.751907988587732e-05, + "loss": 0.0051, + "step": 27840 + }, + { + "epoch": 4.97, + "learning_rate": 4.751818830242511e-05, + "loss": 0.0038, + "step": 27850 + }, + { + "epoch": 4.97, + "learning_rate": 4.75172967189729e-05, + "loss": 0.0089, + "step": 27860 + }, + { + "epoch": 4.97, + "learning_rate": 4.751640513552069e-05, + "loss": 0.0043, + "step": 27870 + }, + { + "epoch": 4.97, + "learning_rate": 4.7515513552068474e-05, + "loss": 0.0051, + "step": 27880 + }, + { + "epoch": 4.97, + "learning_rate": 4.7514621968616265e-05, + "loss": 0.0083, + "step": 27890 + }, + { + "epoch": 4.98, + "learning_rate": 4.751373038516405e-05, + "loss": 0.0093, + "step": 27900 + }, + { + "epoch": 4.98, + "learning_rate": 4.751283880171184e-05, + "loss": 0.0075, + "step": 27910 + }, + { + "epoch": 4.98, + "learning_rate": 4.751194721825963e-05, + "loss": 0.0052, + "step": 27920 + }, + { + "epoch": 4.98, + "learning_rate": 4.751105563480742e-05, + "loss": 0.0096, + "step": 27930 + }, + { + "epoch": 4.98, + "learning_rate": 4.751016405135521e-05, + "loss": 0.0079, + "step": 27940 + }, + { + "epoch": 4.98, + "learning_rate": 4.7509272467903e-05, + "loss": 0.005, + "step": 27950 + }, + { + "epoch": 4.99, + "learning_rate": 4.750838088445079e-05, + "loss": 0.0074, + "step": 27960 + }, + { + "epoch": 4.99, + "learning_rate": 4.7507489300998574e-05, + "loss": 0.0042, + "step": 27970 + }, + { + "epoch": 4.99, + "learning_rate": 4.7506597717546365e-05, + "loss": 0.008, + "step": 27980 + }, + { + "epoch": 4.99, + "learning_rate": 4.750570613409415e-05, + "loss": 0.0048, + "step": 27990 + }, + { + "epoch": 4.99, + "learning_rate": 4.750481455064194e-05, + "loss": 0.0077, + "step": 28000 + }, + { + "epoch": 4.99, + "learning_rate": 4.750392296718973e-05, + "loss": 0.0074, + "step": 28010 + }, + { + "epoch": 5.0, + "learning_rate": 4.7503031383737517e-05, + "loss": 0.007, + "step": 28020 + }, + { + "epoch": 5.0, + "learning_rate": 4.7502139800285315e-05, + "loss": 0.0052, + "step": 28030 + }, + { + "epoch": 5.0, + "learning_rate": 4.75012482168331e-05, + "loss": 0.0043, + "step": 28040 + }, + { + "epoch": 5.0, + "eval_loss": 0.013962038792669773, + "eval_runtime": 195.5702, + "eval_samples_per_second": 23.72, + "eval_steps_per_second": 2.966, + "step": 28040 + }, + { + "epoch": 5.0, + "learning_rate": 4.750035663338089e-05, + "loss": 0.005, + "step": 28050 + }, + { + "epoch": 5.0, + "learning_rate": 4.7499465049928675e-05, + "loss": 0.005, + "step": 28060 + }, + { + "epoch": 5.01, + "learning_rate": 4.7498573466476466e-05, + "loss": 0.0057, + "step": 28070 + }, + { + "epoch": 5.01, + "learning_rate": 4.749768188302425e-05, + "loss": 0.0064, + "step": 28080 + }, + { + "epoch": 5.01, + "learning_rate": 4.749679029957204e-05, + "loss": 0.0053, + "step": 28090 + }, + { + "epoch": 5.01, + "learning_rate": 4.749589871611983e-05, + "loss": 0.0059, + "step": 28100 + }, + { + "epoch": 5.01, + "learning_rate": 4.749500713266762e-05, + "loss": 0.0045, + "step": 28110 + }, + { + "epoch": 5.01, + "learning_rate": 4.749411554921541e-05, + "loss": 0.004, + "step": 28120 + }, + { + "epoch": 5.02, + "learning_rate": 4.749322396576319e-05, + "loss": 0.0062, + "step": 28130 + }, + { + "epoch": 5.02, + "learning_rate": 4.749233238231099e-05, + "loss": 0.0084, + "step": 28140 + }, + { + "epoch": 5.02, + "learning_rate": 4.7491440798858775e-05, + "loss": 0.0058, + "step": 28150 + }, + { + "epoch": 5.02, + "learning_rate": 4.7490549215406566e-05, + "loss": 0.0061, + "step": 28160 + }, + { + "epoch": 5.02, + "learning_rate": 4.748965763195435e-05, + "loss": 0.0061, + "step": 28170 + }, + { + "epoch": 5.02, + "learning_rate": 4.748876604850214e-05, + "loss": 0.0093, + "step": 28180 + }, + { + "epoch": 5.03, + "learning_rate": 4.748787446504993e-05, + "loss": 0.0031, + "step": 28190 + }, + { + "epoch": 5.03, + "learning_rate": 4.748698288159772e-05, + "loss": 0.0065, + "step": 28200 + }, + { + "epoch": 5.03, + "learning_rate": 4.748609129814551e-05, + "loss": 0.0066, + "step": 28210 + }, + { + "epoch": 5.03, + "learning_rate": 4.748519971469329e-05, + "loss": 0.0061, + "step": 28220 + }, + { + "epoch": 5.03, + "learning_rate": 4.7484308131241085e-05, + "loss": 0.0046, + "step": 28230 + }, + { + "epoch": 5.04, + "learning_rate": 4.7483416547788876e-05, + "loss": 0.0051, + "step": 28240 + }, + { + "epoch": 5.04, + "learning_rate": 4.748252496433667e-05, + "loss": 0.0073, + "step": 28250 + }, + { + "epoch": 5.04, + "learning_rate": 4.748163338088446e-05, + "loss": 0.0104, + "step": 28260 + }, + { + "epoch": 5.04, + "learning_rate": 4.748074179743224e-05, + "loss": 0.0056, + "step": 28270 + }, + { + "epoch": 5.04, + "learning_rate": 4.7479850213980034e-05, + "loss": 0.0079, + "step": 28280 + }, + { + "epoch": 5.04, + "learning_rate": 4.747895863052782e-05, + "loss": 0.0082, + "step": 28290 + }, + { + "epoch": 5.05, + "learning_rate": 4.747806704707561e-05, + "loss": 0.0134, + "step": 28300 + }, + { + "epoch": 5.05, + "learning_rate": 4.7477175463623394e-05, + "loss": 0.0038, + "step": 28310 + }, + { + "epoch": 5.05, + "learning_rate": 4.7476283880171185e-05, + "loss": 0.0054, + "step": 28320 + }, + { + "epoch": 5.05, + "learning_rate": 4.7475392296718976e-05, + "loss": 0.0073, + "step": 28330 + }, + { + "epoch": 5.05, + "learning_rate": 4.747450071326676e-05, + "loss": 0.0069, + "step": 28340 + }, + { + "epoch": 5.06, + "learning_rate": 4.747360912981455e-05, + "loss": 0.0061, + "step": 28350 + }, + { + "epoch": 5.06, + "learning_rate": 4.747271754636234e-05, + "loss": 0.0028, + "step": 28360 + }, + { + "epoch": 5.06, + "learning_rate": 4.7471825962910134e-05, + "loss": 0.0078, + "step": 28370 + }, + { + "epoch": 5.06, + "learning_rate": 4.747093437945792e-05, + "loss": 0.0066, + "step": 28380 + }, + { + "epoch": 5.06, + "learning_rate": 4.747004279600571e-05, + "loss": 0.0068, + "step": 28390 + }, + { + "epoch": 5.06, + "learning_rate": 4.7469151212553494e-05, + "loss": 0.0039, + "step": 28400 + }, + { + "epoch": 5.07, + "learning_rate": 4.7468259629101286e-05, + "loss": 0.0066, + "step": 28410 + }, + { + "epoch": 5.07, + "learning_rate": 4.746736804564908e-05, + "loss": 0.0039, + "step": 28420 + }, + { + "epoch": 5.07, + "learning_rate": 4.746647646219686e-05, + "loss": 0.0042, + "step": 28430 + }, + { + "epoch": 5.07, + "learning_rate": 4.746558487874465e-05, + "loss": 0.0066, + "step": 28440 + }, + { + "epoch": 5.07, + "learning_rate": 4.746469329529244e-05, + "loss": 0.0058, + "step": 28450 + }, + { + "epoch": 5.07, + "learning_rate": 4.746380171184023e-05, + "loss": 0.0071, + "step": 28460 + }, + { + "epoch": 5.08, + "learning_rate": 4.746291012838802e-05, + "loss": 0.0056, + "step": 28470 + }, + { + "epoch": 5.08, + "learning_rate": 4.746201854493581e-05, + "loss": 0.0062, + "step": 28480 + }, + { + "epoch": 5.08, + "learning_rate": 4.74611269614836e-05, + "loss": 0.004, + "step": 28490 + }, + { + "epoch": 5.08, + "learning_rate": 4.7460235378031386e-05, + "loss": 0.0054, + "step": 28500 + }, + { + "epoch": 5.08, + "learning_rate": 4.745934379457918e-05, + "loss": 0.0096, + "step": 28510 + }, + { + "epoch": 5.09, + "learning_rate": 4.745845221112696e-05, + "loss": 0.0098, + "step": 28520 + }, + { + "epoch": 5.09, + "learning_rate": 4.745756062767475e-05, + "loss": 0.0065, + "step": 28530 + }, + { + "epoch": 5.09, + "learning_rate": 4.745666904422254e-05, + "loss": 0.009, + "step": 28540 + }, + { + "epoch": 5.09, + "learning_rate": 4.745577746077033e-05, + "loss": 0.0062, + "step": 28550 + }, + { + "epoch": 5.09, + "learning_rate": 4.745488587731812e-05, + "loss": 0.0058, + "step": 28560 + }, + { + "epoch": 5.09, + "learning_rate": 4.7453994293865904e-05, + "loss": 0.0052, + "step": 28570 + }, + { + "epoch": 5.1, + "learning_rate": 4.74531027104137e-05, + "loss": 0.0091, + "step": 28580 + }, + { + "epoch": 5.1, + "learning_rate": 4.745221112696149e-05, + "loss": 0.0049, + "step": 28590 + }, + { + "epoch": 5.1, + "learning_rate": 4.745131954350928e-05, + "loss": 0.0064, + "step": 28600 + }, + { + "epoch": 5.1, + "learning_rate": 4.745042796005706e-05, + "loss": 0.0064, + "step": 28610 + }, + { + "epoch": 5.1, + "learning_rate": 4.7449536376604854e-05, + "loss": 0.007, + "step": 28620 + }, + { + "epoch": 5.11, + "learning_rate": 4.744864479315264e-05, + "loss": 0.0041, + "step": 28630 + }, + { + "epoch": 5.11, + "learning_rate": 4.744775320970043e-05, + "loss": 0.0051, + "step": 28640 + }, + { + "epoch": 5.11, + "learning_rate": 4.744686162624822e-05, + "loss": 0.0096, + "step": 28650 + }, + { + "epoch": 5.11, + "learning_rate": 4.7445970042796005e-05, + "loss": 0.0054, + "step": 28660 + }, + { + "epoch": 5.11, + "learning_rate": 4.7445078459343796e-05, + "loss": 0.0052, + "step": 28670 + }, + { + "epoch": 5.11, + "learning_rate": 4.744418687589158e-05, + "loss": 0.0067, + "step": 28680 + }, + { + "epoch": 5.12, + "learning_rate": 4.744329529243938e-05, + "loss": 0.0068, + "step": 28690 + }, + { + "epoch": 5.12, + "learning_rate": 4.744240370898716e-05, + "loss": 0.0079, + "step": 28700 + }, + { + "epoch": 5.12, + "learning_rate": 4.7441512125534954e-05, + "loss": 0.0036, + "step": 28710 + }, + { + "epoch": 5.12, + "learning_rate": 4.7440620542082745e-05, + "loss": 0.0069, + "step": 28720 + }, + { + "epoch": 5.12, + "learning_rate": 4.743972895863053e-05, + "loss": 0.0046, + "step": 28730 + }, + { + "epoch": 5.12, + "learning_rate": 4.743883737517832e-05, + "loss": 0.0052, + "step": 28740 + }, + { + "epoch": 5.13, + "learning_rate": 4.7437945791726105e-05, + "loss": 0.0054, + "step": 28750 + }, + { + "epoch": 5.13, + "learning_rate": 4.74370542082739e-05, + "loss": 0.0052, + "step": 28760 + }, + { + "epoch": 5.13, + "learning_rate": 4.743616262482168e-05, + "loss": 0.0056, + "step": 28770 + }, + { + "epoch": 5.13, + "learning_rate": 4.743527104136947e-05, + "loss": 0.0047, + "step": 28780 + }, + { + "epoch": 5.13, + "learning_rate": 4.7434379457917264e-05, + "loss": 0.0055, + "step": 28790 + }, + { + "epoch": 5.14, + "learning_rate": 4.7433487874465055e-05, + "loss": 0.0061, + "step": 28800 + }, + { + "epoch": 5.14, + "learning_rate": 4.7432596291012846e-05, + "loss": 0.0051, + "step": 28810 + }, + { + "epoch": 5.14, + "learning_rate": 4.743170470756063e-05, + "loss": 0.0052, + "step": 28820 + }, + { + "epoch": 5.14, + "learning_rate": 4.743081312410842e-05, + "loss": 0.0098, + "step": 28830 + }, + { + "epoch": 5.14, + "learning_rate": 4.7429921540656206e-05, + "loss": 0.0077, + "step": 28840 + }, + { + "epoch": 5.14, + "learning_rate": 4.7429029957204e-05, + "loss": 0.0063, + "step": 28850 + }, + { + "epoch": 5.15, + "learning_rate": 4.742813837375178e-05, + "loss": 0.0064, + "step": 28860 + }, + { + "epoch": 5.15, + "learning_rate": 4.742724679029957e-05, + "loss": 0.0081, + "step": 28870 + }, + { + "epoch": 5.15, + "learning_rate": 4.7426355206847364e-05, + "loss": 0.0048, + "step": 28880 + }, + { + "epoch": 5.15, + "learning_rate": 4.742546362339515e-05, + "loss": 0.0079, + "step": 28890 + }, + { + "epoch": 5.15, + "learning_rate": 4.742457203994294e-05, + "loss": 0.0081, + "step": 28900 + }, + { + "epoch": 5.16, + "learning_rate": 4.742368045649073e-05, + "loss": 0.0064, + "step": 28910 + }, + { + "epoch": 5.16, + "learning_rate": 4.742278887303852e-05, + "loss": 0.0041, + "step": 28920 + }, + { + "epoch": 5.16, + "learning_rate": 4.742189728958631e-05, + "loss": 0.0072, + "step": 28930 + }, + { + "epoch": 5.16, + "learning_rate": 4.74210057061341e-05, + "loss": 0.0058, + "step": 28940 + }, + { + "epoch": 5.16, + "learning_rate": 4.742011412268189e-05, + "loss": 0.0041, + "step": 28950 + }, + { + "epoch": 5.16, + "learning_rate": 4.7419222539229673e-05, + "loss": 0.0047, + "step": 28960 + }, + { + "epoch": 5.17, + "learning_rate": 4.7418330955777465e-05, + "loss": 0.0037, + "step": 28970 + }, + { + "epoch": 5.17, + "learning_rate": 4.741743937232525e-05, + "loss": 0.0025, + "step": 28980 + }, + { + "epoch": 5.17, + "learning_rate": 4.741654778887304e-05, + "loss": 0.0024, + "step": 28990 + }, + { + "epoch": 5.17, + "learning_rate": 4.7415656205420825e-05, + "loss": 0.0045, + "step": 29000 + }, + { + "epoch": 5.17, + "learning_rate": 4.7414764621968616e-05, + "loss": 0.0066, + "step": 29010 + }, + { + "epoch": 5.17, + "learning_rate": 4.741387303851641e-05, + "loss": 0.0066, + "step": 29020 + }, + { + "epoch": 5.18, + "learning_rate": 4.74129814550642e-05, + "loss": 0.006, + "step": 29030 + }, + { + "epoch": 5.18, + "learning_rate": 4.741208987161199e-05, + "loss": 0.009, + "step": 29040 + }, + { + "epoch": 5.18, + "learning_rate": 4.7411198288159774e-05, + "loss": 0.0073, + "step": 29050 + }, + { + "epoch": 5.18, + "learning_rate": 4.7410306704707565e-05, + "loss": 0.0093, + "step": 29060 + }, + { + "epoch": 5.18, + "learning_rate": 4.740941512125535e-05, + "loss": 0.0075, + "step": 29070 + }, + { + "epoch": 5.19, + "learning_rate": 4.740852353780314e-05, + "loss": 0.0042, + "step": 29080 + }, + { + "epoch": 5.19, + "learning_rate": 4.7407631954350925e-05, + "loss": 0.0072, + "step": 29090 + }, + { + "epoch": 5.19, + "learning_rate": 4.740682952924394e-05, + "loss": 0.0058, + "step": 29100 + }, + { + "epoch": 5.19, + "learning_rate": 4.740593794579173e-05, + "loss": 0.0073, + "step": 29110 + }, + { + "epoch": 5.19, + "learning_rate": 4.7405046362339516e-05, + "loss": 0.0061, + "step": 29120 + }, + { + "epoch": 5.19, + "learning_rate": 4.740415477888731e-05, + "loss": 0.0041, + "step": 29130 + }, + { + "epoch": 5.2, + "learning_rate": 4.740326319543509e-05, + "loss": 0.0075, + "step": 29140 + }, + { + "epoch": 5.2, + "learning_rate": 4.740237161198288e-05, + "loss": 0.0052, + "step": 29150 + }, + { + "epoch": 5.2, + "learning_rate": 4.740148002853067e-05, + "loss": 0.0052, + "step": 29160 + }, + { + "epoch": 5.2, + "learning_rate": 4.7400588445078465e-05, + "loss": 0.0048, + "step": 29170 + }, + { + "epoch": 5.2, + "learning_rate": 4.739969686162625e-05, + "loss": 0.0057, + "step": 29180 + }, + { + "epoch": 5.21, + "learning_rate": 4.739880527817404e-05, + "loss": 0.0052, + "step": 29190 + }, + { + "epoch": 5.21, + "learning_rate": 4.739791369472183e-05, + "loss": 0.0071, + "step": 29200 + }, + { + "epoch": 5.21, + "learning_rate": 4.7397022111269616e-05, + "loss": 0.0078, + "step": 29210 + }, + { + "epoch": 5.21, + "learning_rate": 4.739613052781741e-05, + "loss": 0.0045, + "step": 29220 + }, + { + "epoch": 5.21, + "learning_rate": 4.739523894436519e-05, + "loss": 0.0036, + "step": 29230 + }, + { + "epoch": 5.21, + "learning_rate": 4.739434736091298e-05, + "loss": 0.004, + "step": 29240 + }, + { + "epoch": 5.22, + "learning_rate": 4.7393455777460774e-05, + "loss": 0.0036, + "step": 29250 + }, + { + "epoch": 5.22, + "learning_rate": 4.739256419400856e-05, + "loss": 0.0077, + "step": 29260 + }, + { + "epoch": 5.22, + "learning_rate": 4.739167261055635e-05, + "loss": 0.0044, + "step": 29270 + }, + { + "epoch": 5.22, + "learning_rate": 4.739078102710414e-05, + "loss": 0.0051, + "step": 29280 + }, + { + "epoch": 5.22, + "learning_rate": 4.738988944365193e-05, + "loss": 0.0086, + "step": 29290 + }, + { + "epoch": 5.22, + "learning_rate": 4.738899786019972e-05, + "loss": 0.0075, + "step": 29300 + }, + { + "epoch": 5.23, + "learning_rate": 4.738810627674751e-05, + "loss": 0.0097, + "step": 29310 + }, + { + "epoch": 5.23, + "learning_rate": 4.738721469329529e-05, + "loss": 0.0056, + "step": 29320 + }, + { + "epoch": 5.23, + "learning_rate": 4.7386323109843084e-05, + "loss": 0.0045, + "step": 29330 + }, + { + "epoch": 5.23, + "learning_rate": 4.7385431526390875e-05, + "loss": 0.0047, + "step": 29340 + }, + { + "epoch": 5.23, + "learning_rate": 4.738453994293866e-05, + "loss": 0.0045, + "step": 29350 + }, + { + "epoch": 5.24, + "learning_rate": 4.738364835948645e-05, + "loss": 0.0049, + "step": 29360 + }, + { + "epoch": 5.24, + "learning_rate": 4.7382756776034235e-05, + "loss": 0.0037, + "step": 29370 + }, + { + "epoch": 5.24, + "learning_rate": 4.7381865192582026e-05, + "loss": 0.0038, + "step": 29380 + }, + { + "epoch": 5.24, + "learning_rate": 4.738097360912982e-05, + "loss": 0.0069, + "step": 29390 + }, + { + "epoch": 5.24, + "learning_rate": 4.738008202567761e-05, + "loss": 0.006, + "step": 29400 + }, + { + "epoch": 5.24, + "learning_rate": 4.737919044222539e-05, + "loss": 0.0055, + "step": 29410 + }, + { + "epoch": 5.25, + "learning_rate": 4.7378298858773184e-05, + "loss": 0.007, + "step": 29420 + }, + { + "epoch": 5.25, + "learning_rate": 4.7377407275320976e-05, + "loss": 0.0057, + "step": 29430 + }, + { + "epoch": 5.25, + "learning_rate": 4.737651569186876e-05, + "loss": 0.0053, + "step": 29440 + }, + { + "epoch": 5.25, + "learning_rate": 4.737562410841655e-05, + "loss": 0.0058, + "step": 29450 + }, + { + "epoch": 5.25, + "learning_rate": 4.7374732524964336e-05, + "loss": 0.0058, + "step": 29460 + }, + { + "epoch": 5.25, + "learning_rate": 4.737384094151213e-05, + "loss": 0.0048, + "step": 29470 + }, + { + "epoch": 5.26, + "learning_rate": 4.737294935805992e-05, + "loss": 0.0046, + "step": 29480 + }, + { + "epoch": 5.26, + "learning_rate": 4.73720577746077e-05, + "loss": 0.0083, + "step": 29490 + }, + { + "epoch": 5.26, + "learning_rate": 4.73711661911555e-05, + "loss": 0.0038, + "step": 29500 + }, + { + "epoch": 5.26, + "learning_rate": 4.7370274607703285e-05, + "loss": 0.0039, + "step": 29510 + }, + { + "epoch": 5.26, + "learning_rate": 4.7369383024251076e-05, + "loss": 0.0029, + "step": 29520 + }, + { + "epoch": 5.27, + "learning_rate": 4.736849144079886e-05, + "loss": 0.0059, + "step": 29530 + }, + { + "epoch": 5.27, + "learning_rate": 4.736759985734665e-05, + "loss": 0.0065, + "step": 29540 + }, + { + "epoch": 5.27, + "learning_rate": 4.7366708273894436e-05, + "loss": 0.0066, + "step": 29550 + }, + { + "epoch": 5.27, + "learning_rate": 4.736581669044223e-05, + "loss": 0.0074, + "step": 29560 + }, + { + "epoch": 5.27, + "learning_rate": 4.736492510699002e-05, + "loss": 0.0059, + "step": 29570 + }, + { + "epoch": 5.27, + "learning_rate": 4.73640335235378e-05, + "loss": 0.0049, + "step": 29580 + }, + { + "epoch": 5.28, + "learning_rate": 4.7363141940085594e-05, + "loss": 0.0053, + "step": 29590 + }, + { + "epoch": 5.28, + "learning_rate": 4.736225035663338e-05, + "loss": 0.006, + "step": 29600 + }, + { + "epoch": 5.28, + "learning_rate": 4.736135877318118e-05, + "loss": 0.0073, + "step": 29610 + }, + { + "epoch": 5.28, + "learning_rate": 4.736046718972896e-05, + "loss": 0.01, + "step": 29620 + }, + { + "epoch": 5.28, + "learning_rate": 4.735957560627675e-05, + "loss": 0.006, + "step": 29630 + }, + { + "epoch": 5.29, + "learning_rate": 4.735868402282454e-05, + "loss": 0.005, + "step": 29640 + }, + { + "epoch": 5.29, + "learning_rate": 4.735779243937233e-05, + "loss": 0.0052, + "step": 29650 + }, + { + "epoch": 5.29, + "learning_rate": 4.735690085592012e-05, + "loss": 0.0048, + "step": 29660 + }, + { + "epoch": 5.29, + "learning_rate": 4.7356009272467904e-05, + "loss": 0.0048, + "step": 29670 + }, + { + "epoch": 5.29, + "learning_rate": 4.7355117689015695e-05, + "loss": 0.0068, + "step": 29680 + }, + { + "epoch": 5.29, + "learning_rate": 4.735422610556348e-05, + "loss": 0.0052, + "step": 29690 + }, + { + "epoch": 5.3, + "learning_rate": 4.735333452211127e-05, + "loss": 0.0056, + "step": 29700 + }, + { + "epoch": 5.3, + "learning_rate": 4.735244293865906e-05, + "loss": 0.0055, + "step": 29710 + }, + { + "epoch": 5.3, + "learning_rate": 4.735155135520685e-05, + "loss": 0.0064, + "step": 29720 + }, + { + "epoch": 5.3, + "learning_rate": 4.7350659771754644e-05, + "loss": 0.0048, + "step": 29730 + }, + { + "epoch": 5.3, + "learning_rate": 4.734976818830243e-05, + "loss": 0.0036, + "step": 29740 + }, + { + "epoch": 5.3, + "learning_rate": 4.734887660485022e-05, + "loss": 0.0077, + "step": 29750 + }, + { + "epoch": 5.31, + "learning_rate": 4.7347985021398004e-05, + "loss": 0.0035, + "step": 29760 + }, + { + "epoch": 5.31, + "learning_rate": 4.7347093437945795e-05, + "loss": 0.0053, + "step": 29770 + }, + { + "epoch": 5.31, + "learning_rate": 4.734620185449358e-05, + "loss": 0.0056, + "step": 29780 + }, + { + "epoch": 5.31, + "learning_rate": 4.734531027104137e-05, + "loss": 0.008, + "step": 29790 + }, + { + "epoch": 5.31, + "learning_rate": 4.734441868758916e-05, + "loss": 0.0067, + "step": 29800 + }, + { + "epoch": 5.32, + "learning_rate": 4.734352710413695e-05, + "loss": 0.0129, + "step": 29810 + }, + { + "epoch": 5.32, + "learning_rate": 4.734263552068474e-05, + "loss": 0.0057, + "step": 29820 + }, + { + "epoch": 5.32, + "learning_rate": 4.734174393723253e-05, + "loss": 0.0043, + "step": 29830 + }, + { + "epoch": 5.32, + "learning_rate": 4.734085235378032e-05, + "loss": 0.0033, + "step": 29840 + }, + { + "epoch": 5.32, + "learning_rate": 4.7339960770328105e-05, + "loss": 0.007, + "step": 29850 + }, + { + "epoch": 5.32, + "learning_rate": 4.7339069186875896e-05, + "loss": 0.0059, + "step": 29860 + }, + { + "epoch": 5.33, + "learning_rate": 4.733817760342368e-05, + "loss": 0.0036, + "step": 29870 + }, + { + "epoch": 5.33, + "learning_rate": 4.733728601997147e-05, + "loss": 0.0043, + "step": 29880 + }, + { + "epoch": 5.33, + "learning_rate": 4.733639443651926e-05, + "loss": 0.0047, + "step": 29890 + }, + { + "epoch": 5.33, + "learning_rate": 4.733550285306705e-05, + "loss": 0.004, + "step": 29900 + }, + { + "epoch": 5.33, + "learning_rate": 4.733461126961484e-05, + "loss": 0.0061, + "step": 29910 + }, + { + "epoch": 5.34, + "learning_rate": 4.733371968616262e-05, + "loss": 0.0068, + "step": 29920 + }, + { + "epoch": 5.34, + "learning_rate": 4.7332828102710414e-05, + "loss": 0.0075, + "step": 29930 + }, + { + "epoch": 5.34, + "learning_rate": 4.7331936519258205e-05, + "loss": 0.0062, + "step": 29940 + }, + { + "epoch": 5.34, + "learning_rate": 4.7331044935805996e-05, + "loss": 0.0054, + "step": 29950 + }, + { + "epoch": 5.34, + "learning_rate": 4.733015335235379e-05, + "loss": 0.004, + "step": 29960 + }, + { + "epoch": 5.34, + "learning_rate": 4.732926176890157e-05, + "loss": 0.0058, + "step": 29970 + }, + { + "epoch": 5.35, + "learning_rate": 4.732837018544936e-05, + "loss": 0.0095, + "step": 29980 + }, + { + "epoch": 5.35, + "learning_rate": 4.732747860199715e-05, + "loss": 0.0025, + "step": 29990 + }, + { + "epoch": 5.35, + "learning_rate": 4.732658701854494e-05, + "loss": 0.0052, + "step": 30000 + }, + { + "epoch": 5.35, + "learning_rate": 4.732569543509272e-05, + "loss": 0.0053, + "step": 30010 + }, + { + "epoch": 5.35, + "learning_rate": 4.7324803851640515e-05, + "loss": 0.009, + "step": 30020 + }, + { + "epoch": 5.35, + "learning_rate": 4.7323912268188306e-05, + "loss": 0.0081, + "step": 30030 + }, + { + "epoch": 5.36, + "learning_rate": 4.732302068473609e-05, + "loss": 0.0058, + "step": 30040 + }, + { + "epoch": 5.36, + "learning_rate": 4.732212910128389e-05, + "loss": 0.0052, + "step": 30050 + }, + { + "epoch": 5.36, + "learning_rate": 4.732123751783167e-05, + "loss": 0.0065, + "step": 30060 + }, + { + "epoch": 5.36, + "learning_rate": 4.7320345934379464e-05, + "loss": 0.0043, + "step": 30070 + }, + { + "epoch": 5.36, + "learning_rate": 4.731945435092725e-05, + "loss": 0.0089, + "step": 30080 + }, + { + "epoch": 5.37, + "learning_rate": 4.731856276747504e-05, + "loss": 0.0031, + "step": 30090 + }, + { + "epoch": 5.37, + "learning_rate": 4.7317671184022824e-05, + "loss": 0.0059, + "step": 30100 + }, + { + "epoch": 5.37, + "learning_rate": 4.7316779600570615e-05, + "loss": 0.0055, + "step": 30110 + }, + { + "epoch": 5.37, + "learning_rate": 4.7315888017118406e-05, + "loss": 0.0067, + "step": 30120 + }, + { + "epoch": 5.37, + "learning_rate": 4.731499643366619e-05, + "loss": 0.0066, + "step": 30130 + }, + { + "epoch": 5.37, + "learning_rate": 4.731410485021398e-05, + "loss": 0.0046, + "step": 30140 + }, + { + "epoch": 5.38, + "learning_rate": 4.7313213266761766e-05, + "loss": 0.0035, + "step": 30150 + }, + { + "epoch": 5.38, + "learning_rate": 4.731232168330956e-05, + "loss": 0.0046, + "step": 30160 + }, + { + "epoch": 5.38, + "learning_rate": 4.731143009985735e-05, + "loss": 0.0061, + "step": 30170 + }, + { + "epoch": 5.38, + "learning_rate": 4.731053851640514e-05, + "loss": 0.0057, + "step": 30180 + }, + { + "epoch": 5.38, + "learning_rate": 4.730964693295293e-05, + "loss": 0.0066, + "step": 30190 + }, + { + "epoch": 5.39, + "learning_rate": 4.7308755349500716e-05, + "loss": 0.006, + "step": 30200 + }, + { + "epoch": 5.39, + "learning_rate": 4.730786376604851e-05, + "loss": 0.0116, + "step": 30210 + }, + { + "epoch": 5.39, + "learning_rate": 4.730697218259629e-05, + "loss": 0.0072, + "step": 30220 + }, + { + "epoch": 5.39, + "learning_rate": 4.730608059914408e-05, + "loss": 0.0061, + "step": 30230 + }, + { + "epoch": 5.39, + "learning_rate": 4.730518901569187e-05, + "loss": 0.0049, + "step": 30240 + }, + { + "epoch": 5.39, + "learning_rate": 4.730429743223966e-05, + "loss": 0.0051, + "step": 30250 + }, + { + "epoch": 5.4, + "learning_rate": 4.730340584878745e-05, + "loss": 0.0045, + "step": 30260 + }, + { + "epoch": 5.4, + "learning_rate": 4.7302514265335234e-05, + "loss": 0.0062, + "step": 30270 + }, + { + "epoch": 5.4, + "learning_rate": 4.730162268188303e-05, + "loss": 0.0075, + "step": 30280 + }, + { + "epoch": 5.4, + "learning_rate": 4.7300731098430816e-05, + "loss": 0.0055, + "step": 30290 + }, + { + "epoch": 5.4, + "learning_rate": 4.729983951497861e-05, + "loss": 0.008, + "step": 30300 + }, + { + "epoch": 5.4, + "learning_rate": 4.729894793152639e-05, + "loss": 0.0085, + "step": 30310 + }, + { + "epoch": 5.41, + "learning_rate": 4.729805634807418e-05, + "loss": 0.0064, + "step": 30320 + }, + { + "epoch": 5.41, + "learning_rate": 4.729716476462197e-05, + "loss": 0.0068, + "step": 30330 + }, + { + "epoch": 5.41, + "learning_rate": 4.729627318116976e-05, + "loss": 0.0041, + "step": 30340 + }, + { + "epoch": 5.41, + "learning_rate": 4.729538159771755e-05, + "loss": 0.0038, + "step": 30350 + }, + { + "epoch": 5.41, + "learning_rate": 4.7294490014265334e-05, + "loss": 0.0053, + "step": 30360 + }, + { + "epoch": 5.42, + "learning_rate": 4.7293598430813126e-05, + "loss": 0.006, + "step": 30370 + }, + { + "epoch": 5.42, + "learning_rate": 4.729270684736091e-05, + "loss": 0.0088, + "step": 30380 + }, + { + "epoch": 5.42, + "learning_rate": 4.729181526390871e-05, + "loss": 0.0067, + "step": 30390 + }, + { + "epoch": 5.42, + "learning_rate": 4.729092368045649e-05, + "loss": 0.0067, + "step": 30400 + }, + { + "epoch": 5.42, + "learning_rate": 4.7290032097004284e-05, + "loss": 0.0085, + "step": 30410 + }, + { + "epoch": 5.42, + "learning_rate": 4.7289140513552075e-05, + "loss": 0.005, + "step": 30420 + }, + { + "epoch": 5.43, + "learning_rate": 4.728824893009986e-05, + "loss": 0.0046, + "step": 30430 + }, + { + "epoch": 5.43, + "learning_rate": 4.728735734664765e-05, + "loss": 0.0079, + "step": 30440 + }, + { + "epoch": 5.43, + "learning_rate": 4.7286465763195435e-05, + "loss": 0.009, + "step": 30450 + }, + { + "epoch": 5.43, + "learning_rate": 4.7285574179743226e-05, + "loss": 0.0062, + "step": 30460 + }, + { + "epoch": 5.43, + "learning_rate": 4.728468259629101e-05, + "loss": 0.006, + "step": 30470 + }, + { + "epoch": 5.44, + "learning_rate": 4.72837910128388e-05, + "loss": 0.0048, + "step": 30480 + }, + { + "epoch": 5.44, + "learning_rate": 4.728289942938659e-05, + "loss": 0.0073, + "step": 30490 + }, + { + "epoch": 5.44, + "learning_rate": 4.7282007845934384e-05, + "loss": 0.0054, + "step": 30500 + }, + { + "epoch": 5.44, + "learning_rate": 4.7281116262482175e-05, + "loss": 0.0076, + "step": 30510 + }, + { + "epoch": 5.44, + "learning_rate": 4.728022467902996e-05, + "loss": 0.0055, + "step": 30520 + }, + { + "epoch": 5.44, + "learning_rate": 4.727933309557775e-05, + "loss": 0.0038, + "step": 30530 + }, + { + "epoch": 5.45, + "learning_rate": 4.7278441512125536e-05, + "loss": 0.0074, + "step": 30540 + }, + { + "epoch": 5.45, + "learning_rate": 4.727754992867333e-05, + "loss": 0.0049, + "step": 30550 + }, + { + "epoch": 5.45, + "learning_rate": 4.727665834522111e-05, + "loss": 0.0046, + "step": 30560 + }, + { + "epoch": 5.45, + "learning_rate": 4.72757667617689e-05, + "loss": 0.0069, + "step": 30570 + }, + { + "epoch": 5.45, + "learning_rate": 4.7274875178316694e-05, + "loss": 0.0098, + "step": 30580 + }, + { + "epoch": 5.45, + "learning_rate": 4.727398359486448e-05, + "loss": 0.0041, + "step": 30590 + }, + { + "epoch": 5.46, + "learning_rate": 4.727309201141227e-05, + "loss": 0.0066, + "step": 30600 + }, + { + "epoch": 5.46, + "learning_rate": 4.727220042796006e-05, + "loss": 0.0042, + "step": 30610 + }, + { + "epoch": 5.46, + "learning_rate": 4.727130884450785e-05, + "loss": 0.0084, + "step": 30620 + }, + { + "epoch": 5.46, + "learning_rate": 4.7270417261055636e-05, + "loss": 0.0077, + "step": 30630 + }, + { + "epoch": 5.46, + "learning_rate": 4.726952567760343e-05, + "loss": 0.0068, + "step": 30640 + }, + { + "epoch": 5.47, + "learning_rate": 4.726863409415121e-05, + "loss": 0.0054, + "step": 30650 + }, + { + "epoch": 5.47, + "learning_rate": 4.7267742510699e-05, + "loss": 0.0082, + "step": 30660 + }, + { + "epoch": 5.47, + "learning_rate": 4.7266850927246794e-05, + "loss": 0.0065, + "step": 30670 + }, + { + "epoch": 5.47, + "learning_rate": 4.726595934379458e-05, + "loss": 0.0065, + "step": 30680 + }, + { + "epoch": 5.47, + "learning_rate": 4.726506776034237e-05, + "loss": 0.0041, + "step": 30690 + }, + { + "epoch": 5.47, + "learning_rate": 4.7264176176890154e-05, + "loss": 0.0025, + "step": 30700 + }, + { + "epoch": 5.48, + "learning_rate": 4.7263284593437945e-05, + "loss": 0.0041, + "step": 30710 + }, + { + "epoch": 5.48, + "learning_rate": 4.726239300998574e-05, + "loss": 0.0054, + "step": 30720 + }, + { + "epoch": 5.48, + "learning_rate": 4.726150142653353e-05, + "loss": 0.0052, + "step": 30730 + }, + { + "epoch": 5.48, + "learning_rate": 4.726060984308132e-05, + "loss": 0.0034, + "step": 30740 + }, + { + "epoch": 5.48, + "learning_rate": 4.7259718259629104e-05, + "loss": 0.0057, + "step": 30750 + }, + { + "epoch": 5.49, + "learning_rate": 4.7258826676176895e-05, + "loss": 0.0059, + "step": 30760 + }, + { + "epoch": 5.49, + "learning_rate": 4.725793509272468e-05, + "loss": 0.011, + "step": 30770 + }, + { + "epoch": 5.49, + "learning_rate": 4.725704350927247e-05, + "loss": 0.0047, + "step": 30780 + }, + { + "epoch": 5.49, + "learning_rate": 4.7256151925820255e-05, + "loss": 0.0055, + "step": 30790 + }, + { + "epoch": 5.49, + "learning_rate": 4.7255260342368046e-05, + "loss": 0.006, + "step": 30800 + }, + { + "epoch": 5.49, + "learning_rate": 4.725436875891584e-05, + "loss": 0.0068, + "step": 30810 + }, + { + "epoch": 5.5, + "learning_rate": 4.725347717546362e-05, + "loss": 0.006, + "step": 30820 + }, + { + "epoch": 5.5, + "learning_rate": 4.725258559201142e-05, + "loss": 0.0069, + "step": 30830 + }, + { + "epoch": 5.5, + "learning_rate": 4.7251694008559204e-05, + "loss": 0.0045, + "step": 30840 + }, + { + "epoch": 5.5, + "learning_rate": 4.7250802425106995e-05, + "loss": 0.0049, + "step": 30850 + }, + { + "epoch": 5.5, + "learning_rate": 4.724991084165478e-05, + "loss": 0.0061, + "step": 30860 + }, + { + "epoch": 5.5, + "learning_rate": 4.724901925820257e-05, + "loss": 0.0052, + "step": 30870 + }, + { + "epoch": 5.51, + "learning_rate": 4.7248127674750355e-05, + "loss": 0.0054, + "step": 30880 + }, + { + "epoch": 5.51, + "learning_rate": 4.7247236091298147e-05, + "loss": 0.0053, + "step": 30890 + }, + { + "epoch": 5.51, + "learning_rate": 4.724634450784594e-05, + "loss": 0.0111, + "step": 30900 + }, + { + "epoch": 5.51, + "learning_rate": 4.724545292439372e-05, + "loss": 0.007, + "step": 30910 + }, + { + "epoch": 5.51, + "learning_rate": 4.7244561340941513e-05, + "loss": 0.0057, + "step": 30920 + }, + { + "epoch": 5.52, + "learning_rate": 4.72436697574893e-05, + "loss": 0.0051, + "step": 30930 + }, + { + "epoch": 5.52, + "learning_rate": 4.7242778174037096e-05, + "loss": 0.0026, + "step": 30940 + }, + { + "epoch": 5.52, + "learning_rate": 4.724188659058488e-05, + "loss": 0.0056, + "step": 30950 + }, + { + "epoch": 5.52, + "learning_rate": 4.724099500713267e-05, + "loss": 0.006, + "step": 30960 + }, + { + "epoch": 5.52, + "learning_rate": 4.724010342368046e-05, + "loss": 0.0067, + "step": 30970 + }, + { + "epoch": 5.52, + "learning_rate": 4.723921184022825e-05, + "loss": 0.0044, + "step": 30980 + }, + { + "epoch": 5.53, + "learning_rate": 4.723832025677604e-05, + "loss": 0.0086, + "step": 30990 + }, + { + "epoch": 5.53, + "learning_rate": 4.723742867332382e-05, + "loss": 0.0057, + "step": 31000 + }, + { + "epoch": 5.53, + "learning_rate": 4.7236537089871614e-05, + "loss": 0.0044, + "step": 31010 + }, + { + "epoch": 5.53, + "learning_rate": 4.72356455064194e-05, + "loss": 0.0048, + "step": 31020 + }, + { + "epoch": 5.53, + "learning_rate": 4.723475392296719e-05, + "loss": 0.0055, + "step": 31030 + }, + { + "epoch": 5.53, + "learning_rate": 4.723386233951498e-05, + "loss": 0.0066, + "step": 31040 + }, + { + "epoch": 5.54, + "learning_rate": 4.723297075606277e-05, + "loss": 0.0084, + "step": 31050 + }, + { + "epoch": 5.54, + "learning_rate": 4.723207917261056e-05, + "loss": 0.007, + "step": 31060 + }, + { + "epoch": 5.54, + "learning_rate": 4.723118758915835e-05, + "loss": 0.0082, + "step": 31070 + }, + { + "epoch": 5.54, + "learning_rate": 4.723029600570614e-05, + "loss": 0.0033, + "step": 31080 + }, + { + "epoch": 5.54, + "learning_rate": 4.722940442225392e-05, + "loss": 0.0077, + "step": 31090 + }, + { + "epoch": 5.55, + "learning_rate": 4.7228512838801715e-05, + "loss": 0.008, + "step": 31100 + }, + { + "epoch": 5.55, + "learning_rate": 4.72276212553495e-05, + "loss": 0.0087, + "step": 31110 + }, + { + "epoch": 5.55, + "learning_rate": 4.722672967189729e-05, + "loss": 0.0089, + "step": 31120 + }, + { + "epoch": 5.55, + "learning_rate": 4.722583808844508e-05, + "loss": 0.008, + "step": 31130 + }, + { + "epoch": 5.55, + "learning_rate": 4.7224946504992866e-05, + "loss": 0.004, + "step": 31140 + }, + { + "epoch": 5.55, + "learning_rate": 4.722405492154066e-05, + "loss": 0.0083, + "step": 31150 + }, + { + "epoch": 5.56, + "learning_rate": 4.722316333808845e-05, + "loss": 0.0066, + "step": 31160 + }, + { + "epoch": 5.56, + "learning_rate": 4.722227175463624e-05, + "loss": 0.0053, + "step": 31170 + }, + { + "epoch": 5.56, + "learning_rate": 4.7221380171184024e-05, + "loss": 0.005, + "step": 31180 + }, + { + "epoch": 5.56, + "learning_rate": 4.7220488587731815e-05, + "loss": 0.0044, + "step": 31190 + }, + { + "epoch": 5.56, + "learning_rate": 4.7219597004279606e-05, + "loss": 0.0031, + "step": 31200 + }, + { + "epoch": 5.57, + "learning_rate": 4.721870542082739e-05, + "loss": 0.0063, + "step": 31210 + }, + { + "epoch": 5.57, + "learning_rate": 4.721781383737518e-05, + "loss": 0.0044, + "step": 31220 + }, + { + "epoch": 5.57, + "learning_rate": 4.7216922253922966e-05, + "loss": 0.0074, + "step": 31230 + }, + { + "epoch": 5.57, + "learning_rate": 4.721603067047076e-05, + "loss": 0.0034, + "step": 31240 + }, + { + "epoch": 5.57, + "learning_rate": 4.721513908701854e-05, + "loss": 0.0067, + "step": 31250 + }, + { + "epoch": 5.57, + "learning_rate": 4.721424750356633e-05, + "loss": 0.0062, + "step": 31260 + }, + { + "epoch": 5.58, + "learning_rate": 4.7213355920114124e-05, + "loss": 0.0063, + "step": 31270 + }, + { + "epoch": 5.58, + "learning_rate": 4.7212464336661916e-05, + "loss": 0.0089, + "step": 31280 + }, + { + "epoch": 5.58, + "learning_rate": 4.721157275320971e-05, + "loss": 0.0052, + "step": 31290 + }, + { + "epoch": 5.58, + "learning_rate": 4.721068116975749e-05, + "loss": 0.0054, + "step": 31300 + }, + { + "epoch": 5.58, + "learning_rate": 4.720978958630528e-05, + "loss": 0.0041, + "step": 31310 + }, + { + "epoch": 5.58, + "learning_rate": 4.720889800285307e-05, + "loss": 0.0045, + "step": 31320 + }, + { + "epoch": 5.59, + "learning_rate": 4.720800641940086e-05, + "loss": 0.009, + "step": 31330 + }, + { + "epoch": 5.59, + "learning_rate": 4.720711483594864e-05, + "loss": 0.0035, + "step": 31340 + }, + { + "epoch": 5.59, + "learning_rate": 4.7206223252496434e-05, + "loss": 0.0058, + "step": 31350 + }, + { + "epoch": 5.59, + "learning_rate": 4.7205331669044225e-05, + "loss": 0.0027, + "step": 31360 + }, + { + "epoch": 5.59, + "learning_rate": 4.720444008559201e-05, + "loss": 0.0076, + "step": 31370 + }, + { + "epoch": 5.6, + "learning_rate": 4.720354850213981e-05, + "loss": 0.0051, + "step": 31380 + }, + { + "epoch": 5.6, + "learning_rate": 4.720265691868759e-05, + "loss": 0.0068, + "step": 31390 + }, + { + "epoch": 5.6, + "learning_rate": 4.720176533523538e-05, + "loss": 0.0066, + "step": 31400 + }, + { + "epoch": 5.6, + "learning_rate": 4.720087375178317e-05, + "loss": 0.0053, + "step": 31410 + }, + { + "epoch": 5.6, + "learning_rate": 4.719998216833096e-05, + "loss": 0.0046, + "step": 31420 + }, + { + "epoch": 5.6, + "learning_rate": 4.719909058487875e-05, + "loss": 0.0082, + "step": 31430 + }, + { + "epoch": 5.61, + "learning_rate": 4.7198199001426534e-05, + "loss": 0.0079, + "step": 31440 + }, + { + "epoch": 5.61, + "learning_rate": 4.7197307417974326e-05, + "loss": 0.0064, + "step": 31450 + }, + { + "epoch": 5.61, + "learning_rate": 4.719641583452211e-05, + "loss": 0.0065, + "step": 31460 + }, + { + "epoch": 5.61, + "learning_rate": 4.71955242510699e-05, + "loss": 0.0056, + "step": 31470 + }, + { + "epoch": 5.61, + "learning_rate": 4.7194632667617686e-05, + "loss": 0.0099, + "step": 31480 + }, + { + "epoch": 5.62, + "learning_rate": 4.7193741084165484e-05, + "loss": 0.0049, + "step": 31490 + }, + { + "epoch": 5.62, + "learning_rate": 4.719284950071327e-05, + "loss": 0.0049, + "step": 31500 + }, + { + "epoch": 5.62, + "learning_rate": 4.719195791726106e-05, + "loss": 0.0053, + "step": 31510 + }, + { + "epoch": 5.62, + "learning_rate": 4.719106633380885e-05, + "loss": 0.0072, + "step": 31520 + }, + { + "epoch": 5.62, + "learning_rate": 4.7190174750356635e-05, + "loss": 0.0063, + "step": 31530 + }, + { + "epoch": 5.62, + "learning_rate": 4.7189283166904426e-05, + "loss": 0.0037, + "step": 31540 + }, + { + "epoch": 5.63, + "learning_rate": 4.718839158345221e-05, + "loss": 0.0048, + "step": 31550 + }, + { + "epoch": 5.63, + "learning_rate": 4.71875e-05, + "loss": 0.0055, + "step": 31560 + }, + { + "epoch": 5.63, + "learning_rate": 4.7186608416547786e-05, + "loss": 0.0089, + "step": 31570 + }, + { + "epoch": 5.63, + "learning_rate": 4.718571683309558e-05, + "loss": 0.0062, + "step": 31580 + }, + { + "epoch": 5.63, + "learning_rate": 4.718482524964337e-05, + "loss": 0.0072, + "step": 31590 + }, + { + "epoch": 5.63, + "learning_rate": 4.718393366619116e-05, + "loss": 0.005, + "step": 31600 + }, + { + "epoch": 5.64, + "learning_rate": 4.718304208273895e-05, + "loss": 0.0042, + "step": 31610 + }, + { + "epoch": 5.64, + "learning_rate": 4.7182150499286736e-05, + "loss": 0.0046, + "step": 31620 + }, + { + "epoch": 5.64, + "learning_rate": 4.718125891583453e-05, + "loss": 0.0036, + "step": 31630 + }, + { + "epoch": 5.64, + "learning_rate": 4.718036733238231e-05, + "loss": 0.0067, + "step": 31640 + }, + { + "epoch": 5.64, + "learning_rate": 4.71794757489301e-05, + "loss": 0.006, + "step": 31650 + }, + { + "epoch": 5.65, + "learning_rate": 4.7178584165477894e-05, + "loss": 0.0066, + "step": 31660 + }, + { + "epoch": 5.65, + "learning_rate": 4.717769258202568e-05, + "loss": 0.005, + "step": 31670 + }, + { + "epoch": 5.65, + "learning_rate": 4.717680099857347e-05, + "loss": 0.0062, + "step": 31680 + }, + { + "epoch": 5.65, + "learning_rate": 4.7175909415121254e-05, + "loss": 0.0047, + "step": 31690 + }, + { + "epoch": 5.65, + "learning_rate": 4.7175017831669045e-05, + "loss": 0.0039, + "step": 31700 + }, + { + "epoch": 5.65, + "learning_rate": 4.7174126248216836e-05, + "loss": 0.0054, + "step": 31710 + }, + { + "epoch": 5.66, + "learning_rate": 4.717323466476463e-05, + "loss": 0.0053, + "step": 31720 + }, + { + "epoch": 5.66, + "learning_rate": 4.7172432239657635e-05, + "loss": 0.0085, + "step": 31730 + }, + { + "epoch": 5.66, + "learning_rate": 4.717154065620542e-05, + "loss": 0.0042, + "step": 31740 + }, + { + "epoch": 5.66, + "learning_rate": 4.717064907275322e-05, + "loss": 0.007, + "step": 31750 + }, + { + "epoch": 5.66, + "learning_rate": 4.7169757489301e-05, + "loss": 0.0045, + "step": 31760 + }, + { + "epoch": 5.67, + "learning_rate": 4.716886590584879e-05, + "loss": 0.0061, + "step": 31770 + }, + { + "epoch": 5.67, + "learning_rate": 4.716797432239658e-05, + "loss": 0.0084, + "step": 31780 + }, + { + "epoch": 5.67, + "learning_rate": 4.716708273894437e-05, + "loss": 0.0038, + "step": 31790 + }, + { + "epoch": 5.67, + "learning_rate": 4.7166191155492153e-05, + "loss": 0.0075, + "step": 31800 + }, + { + "epoch": 5.67, + "learning_rate": 4.7165299572039945e-05, + "loss": 0.0052, + "step": 31810 + }, + { + "epoch": 5.67, + "learning_rate": 4.7164407988587736e-05, + "loss": 0.0082, + "step": 31820 + }, + { + "epoch": 5.68, + "learning_rate": 4.716351640513552e-05, + "loss": 0.0039, + "step": 31830 + }, + { + "epoch": 5.68, + "learning_rate": 4.716262482168331e-05, + "loss": 0.0069, + "step": 31840 + }, + { + "epoch": 5.68, + "learning_rate": 4.7161733238231096e-05, + "loss": 0.0063, + "step": 31850 + }, + { + "epoch": 5.68, + "learning_rate": 4.7160841654778894e-05, + "loss": 0.0072, + "step": 31860 + }, + { + "epoch": 5.68, + "learning_rate": 4.715995007132668e-05, + "loss": 0.0045, + "step": 31870 + }, + { + "epoch": 5.68, + "learning_rate": 4.715905848787447e-05, + "loss": 0.0059, + "step": 31880 + }, + { + "epoch": 5.69, + "learning_rate": 4.7158166904422254e-05, + "loss": 0.0038, + "step": 31890 + }, + { + "epoch": 5.69, + "learning_rate": 4.7157275320970045e-05, + "loss": 0.009, + "step": 31900 + }, + { + "epoch": 5.69, + "learning_rate": 4.7156383737517836e-05, + "loss": 0.0074, + "step": 31910 + }, + { + "epoch": 5.69, + "learning_rate": 4.715549215406562e-05, + "loss": 0.0048, + "step": 31920 + }, + { + "epoch": 5.69, + "learning_rate": 4.715460057061341e-05, + "loss": 0.006, + "step": 31930 + }, + { + "epoch": 5.7, + "learning_rate": 4.7153708987161196e-05, + "loss": 0.006, + "step": 31940 + }, + { + "epoch": 5.7, + "learning_rate": 4.715281740370899e-05, + "loss": 0.0069, + "step": 31950 + }, + { + "epoch": 5.7, + "learning_rate": 4.715192582025678e-05, + "loss": 0.0076, + "step": 31960 + }, + { + "epoch": 5.7, + "learning_rate": 4.715103423680457e-05, + "loss": 0.0042, + "step": 31970 + }, + { + "epoch": 5.7, + "learning_rate": 4.715014265335236e-05, + "loss": 0.003, + "step": 31980 + }, + { + "epoch": 5.7, + "learning_rate": 4.7149251069900146e-05, + "loss": 0.0052, + "step": 31990 + }, + { + "epoch": 5.71, + "learning_rate": 4.714835948644794e-05, + "loss": 0.0062, + "step": 32000 + }, + { + "epoch": 5.71, + "learning_rate": 4.714746790299572e-05, + "loss": 0.0051, + "step": 32010 + }, + { + "epoch": 5.71, + "learning_rate": 4.714657631954351e-05, + "loss": 0.0058, + "step": 32020 + }, + { + "epoch": 5.71, + "learning_rate": 4.71456847360913e-05, + "loss": 0.0064, + "step": 32030 + }, + { + "epoch": 5.71, + "learning_rate": 4.714479315263909e-05, + "loss": 0.0075, + "step": 32040 + }, + { + "epoch": 5.72, + "learning_rate": 4.714390156918688e-05, + "loss": 0.0052, + "step": 32050 + }, + { + "epoch": 5.72, + "learning_rate": 4.7143009985734664e-05, + "loss": 0.0065, + "step": 32060 + }, + { + "epoch": 5.72, + "learning_rate": 4.7142118402282455e-05, + "loss": 0.0062, + "step": 32070 + }, + { + "epoch": 5.72, + "learning_rate": 4.7141226818830246e-05, + "loss": 0.0049, + "step": 32080 + }, + { + "epoch": 5.72, + "learning_rate": 4.714033523537804e-05, + "loss": 0.0059, + "step": 32090 + }, + { + "epoch": 5.72, + "learning_rate": 4.713944365192582e-05, + "loss": 0.0059, + "step": 32100 + }, + { + "epoch": 5.73, + "learning_rate": 4.713855206847361e-05, + "loss": 0.0061, + "step": 32110 + }, + { + "epoch": 5.73, + "learning_rate": 4.71376604850214e-05, + "loss": 0.0059, + "step": 32120 + }, + { + "epoch": 5.73, + "learning_rate": 4.713676890156919e-05, + "loss": 0.0061, + "step": 32130 + }, + { + "epoch": 5.73, + "learning_rate": 4.713587731811698e-05, + "loss": 0.0048, + "step": 32140 + }, + { + "epoch": 5.73, + "learning_rate": 4.7134985734664764e-05, + "loss": 0.009, + "step": 32150 + }, + { + "epoch": 5.73, + "learning_rate": 4.7134094151212556e-05, + "loss": 0.0037, + "step": 32160 + }, + { + "epoch": 5.74, + "learning_rate": 4.713320256776034e-05, + "loss": 0.0059, + "step": 32170 + }, + { + "epoch": 5.74, + "learning_rate": 4.713231098430813e-05, + "loss": 0.0061, + "step": 32180 + }, + { + "epoch": 5.74, + "learning_rate": 4.713141940085592e-05, + "loss": 0.0071, + "step": 32190 + }, + { + "epoch": 5.74, + "learning_rate": 4.7130527817403714e-05, + "loss": 0.0077, + "step": 32200 + }, + { + "epoch": 5.74, + "learning_rate": 4.7129636233951505e-05, + "loss": 0.0053, + "step": 32210 + }, + { + "epoch": 5.75, + "learning_rate": 4.712874465049929e-05, + "loss": 0.0076, + "step": 32220 + }, + { + "epoch": 5.75, + "learning_rate": 4.712785306704708e-05, + "loss": 0.0082, + "step": 32230 + }, + { + "epoch": 5.75, + "learning_rate": 4.7126961483594865e-05, + "loss": 0.0062, + "step": 32240 + }, + { + "epoch": 5.75, + "learning_rate": 4.7126069900142656e-05, + "loss": 0.0041, + "step": 32250 + }, + { + "epoch": 5.75, + "learning_rate": 4.712517831669044e-05, + "loss": 0.0064, + "step": 32260 + }, + { + "epoch": 5.75, + "learning_rate": 4.712428673323823e-05, + "loss": 0.0104, + "step": 32270 + }, + { + "epoch": 5.76, + "learning_rate": 4.712339514978602e-05, + "loss": 0.0062, + "step": 32280 + }, + { + "epoch": 5.76, + "learning_rate": 4.712250356633381e-05, + "loss": 0.0057, + "step": 32290 + }, + { + "epoch": 5.76, + "learning_rate": 4.7121611982881606e-05, + "loss": 0.0025, + "step": 32300 + }, + { + "epoch": 5.76, + "learning_rate": 4.712072039942939e-05, + "loss": 0.0067, + "step": 32310 + }, + { + "epoch": 5.76, + "learning_rate": 4.711982881597718e-05, + "loss": 0.0057, + "step": 32320 + }, + { + "epoch": 5.76, + "learning_rate": 4.7118937232524966e-05, + "loss": 0.0074, + "step": 32330 + }, + { + "epoch": 5.77, + "learning_rate": 4.711804564907276e-05, + "loss": 0.0078, + "step": 32340 + }, + { + "epoch": 5.77, + "learning_rate": 4.711715406562054e-05, + "loss": 0.0063, + "step": 32350 + }, + { + "epoch": 5.77, + "learning_rate": 4.711626248216833e-05, + "loss": 0.0114, + "step": 32360 + }, + { + "epoch": 5.77, + "learning_rate": 4.7115370898716124e-05, + "loss": 0.0068, + "step": 32370 + }, + { + "epoch": 5.77, + "learning_rate": 4.711447931526391e-05, + "loss": 0.0079, + "step": 32380 + }, + { + "epoch": 5.78, + "learning_rate": 4.71135877318117e-05, + "loss": 0.0093, + "step": 32390 + }, + { + "epoch": 5.78, + "learning_rate": 4.7112696148359484e-05, + "loss": 0.005, + "step": 32400 + }, + { + "epoch": 5.78, + "learning_rate": 4.711180456490728e-05, + "loss": 0.0046, + "step": 32410 + }, + { + "epoch": 5.78, + "learning_rate": 4.7110912981455066e-05, + "loss": 0.0085, + "step": 32420 + }, + { + "epoch": 5.78, + "learning_rate": 4.711002139800286e-05, + "loss": 0.0087, + "step": 32430 + }, + { + "epoch": 5.78, + "learning_rate": 4.710912981455065e-05, + "loss": 0.0038, + "step": 32440 + }, + { + "epoch": 5.79, + "learning_rate": 4.710823823109843e-05, + "loss": 0.005, + "step": 32450 + }, + { + "epoch": 5.79, + "learning_rate": 4.7107346647646224e-05, + "loss": 0.0038, + "step": 32460 + }, + { + "epoch": 5.79, + "learning_rate": 4.710645506419401e-05, + "loss": 0.0054, + "step": 32470 + }, + { + "epoch": 5.79, + "learning_rate": 4.71055634807418e-05, + "loss": 0.0046, + "step": 32480 + }, + { + "epoch": 5.79, + "learning_rate": 4.7104671897289584e-05, + "loss": 0.0091, + "step": 32490 + }, + { + "epoch": 5.8, + "learning_rate": 4.7103780313837376e-05, + "loss": 0.0066, + "step": 32500 + }, + { + "epoch": 5.8, + "learning_rate": 4.710288873038517e-05, + "loss": 0.0044, + "step": 32510 + }, + { + "epoch": 5.8, + "learning_rate": 4.710199714693296e-05, + "loss": 0.0057, + "step": 32520 + }, + { + "epoch": 5.8, + "learning_rate": 4.710110556348075e-05, + "loss": 0.008, + "step": 32530 + }, + { + "epoch": 5.8, + "learning_rate": 4.7100213980028534e-05, + "loss": 0.0054, + "step": 32540 + }, + { + "epoch": 5.8, + "learning_rate": 4.7099322396576325e-05, + "loss": 0.0071, + "step": 32550 + }, + { + "epoch": 5.81, + "learning_rate": 4.709843081312411e-05, + "loss": 0.0042, + "step": 32560 + }, + { + "epoch": 5.81, + "learning_rate": 4.70975392296719e-05, + "loss": 0.0037, + "step": 32570 + }, + { + "epoch": 5.81, + "learning_rate": 4.7096647646219685e-05, + "loss": 0.0034, + "step": 32580 + }, + { + "epoch": 5.81, + "learning_rate": 4.7095756062767476e-05, + "loss": 0.0051, + "step": 32590 + }, + { + "epoch": 5.81, + "learning_rate": 4.709486447931527e-05, + "loss": 0.0086, + "step": 32600 + }, + { + "epoch": 5.81, + "learning_rate": 4.709397289586305e-05, + "loss": 0.0072, + "step": 32610 + }, + { + "epoch": 5.82, + "learning_rate": 4.709308131241084e-05, + "loss": 0.0065, + "step": 32620 + }, + { + "epoch": 5.82, + "learning_rate": 4.709218972895863e-05, + "loss": 0.0068, + "step": 32630 + }, + { + "epoch": 5.82, + "learning_rate": 4.7091298145506425e-05, + "loss": 0.0095, + "step": 32640 + }, + { + "epoch": 5.82, + "learning_rate": 4.709040656205421e-05, + "loss": 0.0056, + "step": 32650 + }, + { + "epoch": 5.82, + "learning_rate": 4.7089514978602e-05, + "loss": 0.0101, + "step": 32660 + }, + { + "epoch": 5.83, + "learning_rate": 4.708862339514979e-05, + "loss": 0.0063, + "step": 32670 + }, + { + "epoch": 5.83, + "learning_rate": 4.708773181169758e-05, + "loss": 0.0039, + "step": 32680 + }, + { + "epoch": 5.83, + "learning_rate": 4.708684022824537e-05, + "loss": 0.0067, + "step": 32690 + }, + { + "epoch": 5.83, + "learning_rate": 4.708594864479315e-05, + "loss": 0.0058, + "step": 32700 + }, + { + "epoch": 5.83, + "learning_rate": 4.7085057061340943e-05, + "loss": 0.0104, + "step": 32710 + }, + { + "epoch": 5.83, + "learning_rate": 4.708416547788873e-05, + "loss": 0.0066, + "step": 32720 + }, + { + "epoch": 5.84, + "learning_rate": 4.708327389443652e-05, + "loss": 0.0098, + "step": 32730 + }, + { + "epoch": 5.84, + "learning_rate": 4.708238231098431e-05, + "loss": 0.0072, + "step": 32740 + }, + { + "epoch": 5.84, + "learning_rate": 4.70814907275321e-05, + "loss": 0.0035, + "step": 32750 + }, + { + "epoch": 5.84, + "learning_rate": 4.708059914407989e-05, + "loss": 0.0092, + "step": 32760 + }, + { + "epoch": 5.84, + "learning_rate": 4.707970756062768e-05, + "loss": 0.0059, + "step": 32770 + }, + { + "epoch": 5.85, + "learning_rate": 4.707881597717547e-05, + "loss": 0.0061, + "step": 32780 + }, + { + "epoch": 5.85, + "learning_rate": 4.707792439372325e-05, + "loss": 0.007, + "step": 32790 + }, + { + "epoch": 5.85, + "learning_rate": 4.7077032810271044e-05, + "loss": 0.0081, + "step": 32800 + }, + { + "epoch": 5.85, + "learning_rate": 4.707614122681883e-05, + "loss": 0.0054, + "step": 32810 + }, + { + "epoch": 5.85, + "learning_rate": 4.707524964336662e-05, + "loss": 0.0053, + "step": 32820 + }, + { + "epoch": 5.85, + "learning_rate": 4.707435805991441e-05, + "loss": 0.0049, + "step": 32830 + }, + { + "epoch": 5.86, + "learning_rate": 4.7073466476462195e-05, + "loss": 0.0059, + "step": 32840 + }, + { + "epoch": 5.86, + "learning_rate": 4.7072574893009987e-05, + "loss": 0.0051, + "step": 32850 + }, + { + "epoch": 5.86, + "learning_rate": 4.707168330955778e-05, + "loss": 0.0064, + "step": 32860 + }, + { + "epoch": 5.86, + "learning_rate": 4.707079172610557e-05, + "loss": 0.0058, + "step": 32870 + }, + { + "epoch": 5.86, + "learning_rate": 4.7069900142653353e-05, + "loss": 0.0046, + "step": 32880 + }, + { + "epoch": 5.86, + "learning_rate": 4.7069008559201145e-05, + "loss": 0.0063, + "step": 32890 + }, + { + "epoch": 5.87, + "learning_rate": 4.7068116975748936e-05, + "loss": 0.0049, + "step": 32900 + }, + { + "epoch": 5.87, + "learning_rate": 4.706722539229672e-05, + "loss": 0.0058, + "step": 32910 + }, + { + "epoch": 5.87, + "learning_rate": 4.706633380884451e-05, + "loss": 0.0052, + "step": 32920 + }, + { + "epoch": 5.87, + "learning_rate": 4.7065442225392296e-05, + "loss": 0.0061, + "step": 32930 + }, + { + "epoch": 5.87, + "learning_rate": 4.706455064194009e-05, + "loss": 0.0063, + "step": 32940 + }, + { + "epoch": 5.88, + "learning_rate": 4.706365905848787e-05, + "loss": 0.007, + "step": 32950 + }, + { + "epoch": 5.88, + "learning_rate": 4.706276747503566e-05, + "loss": 0.0065, + "step": 32960 + }, + { + "epoch": 5.88, + "learning_rate": 4.7061875891583454e-05, + "loss": 0.0071, + "step": 32970 + }, + { + "epoch": 5.88, + "learning_rate": 4.7060984308131245e-05, + "loss": 0.0054, + "step": 32980 + }, + { + "epoch": 5.88, + "learning_rate": 4.7060092724679036e-05, + "loss": 0.0039, + "step": 32990 + }, + { + "epoch": 5.88, + "learning_rate": 4.705920114122682e-05, + "loss": 0.0078, + "step": 33000 + }, + { + "epoch": 5.89, + "learning_rate": 4.705830955777461e-05, + "loss": 0.0056, + "step": 33010 + }, + { + "epoch": 5.89, + "learning_rate": 4.7057417974322396e-05, + "loss": 0.0061, + "step": 33020 + }, + { + "epoch": 5.89, + "learning_rate": 4.705652639087019e-05, + "loss": 0.0079, + "step": 33030 + }, + { + "epoch": 5.89, + "learning_rate": 4.705563480741797e-05, + "loss": 0.0096, + "step": 33040 + }, + { + "epoch": 5.89, + "learning_rate": 4.705474322396576e-05, + "loss": 0.0059, + "step": 33050 + }, + { + "epoch": 5.9, + "learning_rate": 4.7053851640513555e-05, + "loss": 0.0057, + "step": 33060 + }, + { + "epoch": 5.9, + "learning_rate": 4.705296005706134e-05, + "loss": 0.009, + "step": 33070 + }, + { + "epoch": 5.9, + "learning_rate": 4.705206847360914e-05, + "loss": 0.0058, + "step": 33080 + }, + { + "epoch": 5.9, + "learning_rate": 4.705117689015692e-05, + "loss": 0.0033, + "step": 33090 + }, + { + "epoch": 5.9, + "learning_rate": 4.705028530670471e-05, + "loss": 0.005, + "step": 33100 + }, + { + "epoch": 5.9, + "learning_rate": 4.70493937232525e-05, + "loss": 0.0059, + "step": 33110 + }, + { + "epoch": 5.91, + "learning_rate": 4.704850213980029e-05, + "loss": 0.009, + "step": 33120 + }, + { + "epoch": 5.91, + "learning_rate": 4.704761055634808e-05, + "loss": 0.0043, + "step": 33130 + }, + { + "epoch": 5.91, + "learning_rate": 4.7046718972895864e-05, + "loss": 0.004, + "step": 33140 + }, + { + "epoch": 5.91, + "learning_rate": 4.7045827389443655e-05, + "loss": 0.0048, + "step": 33150 + }, + { + "epoch": 5.91, + "learning_rate": 4.704493580599144e-05, + "loss": 0.0069, + "step": 33160 + }, + { + "epoch": 5.91, + "learning_rate": 4.704404422253923e-05, + "loss": 0.0041, + "step": 33170 + }, + { + "epoch": 5.92, + "learning_rate": 4.7043152639087015e-05, + "loss": 0.0055, + "step": 33180 + }, + { + "epoch": 5.92, + "learning_rate": 4.704226105563481e-05, + "loss": 0.0037, + "step": 33190 + }, + { + "epoch": 5.92, + "learning_rate": 4.70413694721826e-05, + "loss": 0.0056, + "step": 33200 + }, + { + "epoch": 5.92, + "learning_rate": 4.704047788873039e-05, + "loss": 0.005, + "step": 33210 + }, + { + "epoch": 5.92, + "learning_rate": 4.703958630527818e-05, + "loss": 0.0062, + "step": 33220 + }, + { + "epoch": 5.93, + "learning_rate": 4.7038694721825964e-05, + "loss": 0.0053, + "step": 33230 + }, + { + "epoch": 5.93, + "learning_rate": 4.7037803138373756e-05, + "loss": 0.004, + "step": 33240 + }, + { + "epoch": 5.93, + "learning_rate": 4.703691155492154e-05, + "loss": 0.0051, + "step": 33250 + }, + { + "epoch": 5.93, + "learning_rate": 4.703601997146933e-05, + "loss": 0.005, + "step": 33260 + }, + { + "epoch": 5.93, + "learning_rate": 4.7035128388017116e-05, + "loss": 0.0063, + "step": 33270 + }, + { + "epoch": 5.93, + "learning_rate": 4.703423680456491e-05, + "loss": 0.0073, + "step": 33280 + }, + { + "epoch": 5.94, + "learning_rate": 4.70333452211127e-05, + "loss": 0.0044, + "step": 33290 + }, + { + "epoch": 5.94, + "learning_rate": 4.703245363766049e-05, + "loss": 0.007, + "step": 33300 + }, + { + "epoch": 5.94, + "learning_rate": 4.703156205420828e-05, + "loss": 0.0063, + "step": 33310 + }, + { + "epoch": 5.94, + "learning_rate": 4.7030670470756065e-05, + "loss": 0.0076, + "step": 33320 + }, + { + "epoch": 5.94, + "learning_rate": 4.7029778887303856e-05, + "loss": 0.0053, + "step": 33330 + }, + { + "epoch": 5.95, + "learning_rate": 4.702888730385164e-05, + "loss": 0.0081, + "step": 33340 + }, + { + "epoch": 5.95, + "learning_rate": 4.702799572039943e-05, + "loss": 0.0064, + "step": 33350 + }, + { + "epoch": 5.95, + "learning_rate": 4.702710413694722e-05, + "loss": 0.0075, + "step": 33360 + }, + { + "epoch": 5.95, + "learning_rate": 4.702621255349501e-05, + "loss": 0.0063, + "step": 33370 + }, + { + "epoch": 5.95, + "learning_rate": 4.70253209700428e-05, + "loss": 0.0037, + "step": 33380 + }, + { + "epoch": 5.95, + "learning_rate": 4.702442938659058e-05, + "loss": 0.0075, + "step": 33390 + }, + { + "epoch": 5.96, + "learning_rate": 4.7023537803138374e-05, + "loss": 0.0058, + "step": 33400 + }, + { + "epoch": 5.96, + "learning_rate": 4.7022646219686166e-05, + "loss": 0.0074, + "step": 33410 + }, + { + "epoch": 5.96, + "learning_rate": 4.702175463623396e-05, + "loss": 0.0062, + "step": 33420 + }, + { + "epoch": 5.96, + "learning_rate": 4.702086305278174e-05, + "loss": 0.0067, + "step": 33430 + }, + { + "epoch": 5.96, + "learning_rate": 4.701997146932953e-05, + "loss": 0.0035, + "step": 33440 + }, + { + "epoch": 5.96, + "learning_rate": 4.7019079885877324e-05, + "loss": 0.0079, + "step": 33450 + }, + { + "epoch": 5.97, + "learning_rate": 4.701818830242511e-05, + "loss": 0.0032, + "step": 33460 + }, + { + "epoch": 5.97, + "learning_rate": 4.70172967189729e-05, + "loss": 0.0045, + "step": 33470 + }, + { + "epoch": 5.97, + "learning_rate": 4.7016405135520684e-05, + "loss": 0.0086, + "step": 33480 + }, + { + "epoch": 5.97, + "learning_rate": 4.7015513552068475e-05, + "loss": 0.0094, + "step": 33490 + }, + { + "epoch": 5.97, + "learning_rate": 4.701462196861626e-05, + "loss": 0.0061, + "step": 33500 + }, + { + "epoch": 5.98, + "learning_rate": 4.701373038516405e-05, + "loss": 0.0071, + "step": 33510 + }, + { + "epoch": 5.98, + "learning_rate": 4.701283880171184e-05, + "loss": 0.0057, + "step": 33520 + }, + { + "epoch": 5.98, + "learning_rate": 4.701194721825963e-05, + "loss": 0.0067, + "step": 33530 + }, + { + "epoch": 5.98, + "learning_rate": 4.7011055634807424e-05, + "loss": 0.0056, + "step": 33540 + }, + { + "epoch": 5.98, + "learning_rate": 4.701016405135521e-05, + "loss": 0.0051, + "step": 33550 + }, + { + "epoch": 5.98, + "learning_rate": 4.7009272467903e-05, + "loss": 0.0074, + "step": 33560 + }, + { + "epoch": 5.99, + "learning_rate": 4.7008380884450784e-05, + "loss": 0.007, + "step": 33570 + }, + { + "epoch": 5.99, + "learning_rate": 4.7007489300998575e-05, + "loss": 0.0113, + "step": 33580 + }, + { + "epoch": 5.99, + "learning_rate": 4.700659771754637e-05, + "loss": 0.0074, + "step": 33590 + }, + { + "epoch": 5.99, + "learning_rate": 4.700570613409415e-05, + "loss": 0.0072, + "step": 33600 + }, + { + "epoch": 5.99, + "learning_rate": 4.700481455064194e-05, + "loss": 0.0038, + "step": 33610 + }, + { + "epoch": 6.0, + "learning_rate": 4.700392296718973e-05, + "loss": 0.0092, + "step": 33620 + }, + { + "epoch": 6.0, + "learning_rate": 4.7003031383737525e-05, + "loss": 0.0056, + "step": 33630 + }, + { + "epoch": 6.0, + "learning_rate": 4.700213980028531e-05, + "loss": 0.0045, + "step": 33640 + }, + { + "epoch": 6.0, + "eval_loss": 0.013290228322148323, + "eval_runtime": 197.0062, + "eval_samples_per_second": 23.547, + "eval_steps_per_second": 2.944, + "step": 33648 + }, + { + "epoch": 6.0, + "learning_rate": 4.70012482168331e-05, + "loss": 0.0063, + "step": 33650 + }, + { + "epoch": 6.0, + "learning_rate": 4.7000356633380885e-05, + "loss": 0.0057, + "step": 33660 + }, + { + "epoch": 6.0, + "learning_rate": 4.6999465049928676e-05, + "loss": 0.0058, + "step": 33670 + }, + { + "epoch": 6.01, + "learning_rate": 4.699857346647647e-05, + "loss": 0.0042, + "step": 33680 + }, + { + "epoch": 6.01, + "learning_rate": 4.699768188302425e-05, + "loss": 0.0037, + "step": 33690 + }, + { + "epoch": 6.01, + "learning_rate": 4.699679029957204e-05, + "loss": 0.0072, + "step": 33700 + }, + { + "epoch": 6.01, + "learning_rate": 4.699589871611983e-05, + "loss": 0.0061, + "step": 33710 + }, + { + "epoch": 6.01, + "learning_rate": 4.699500713266762e-05, + "loss": 0.0075, + "step": 33720 + }, + { + "epoch": 6.01, + "learning_rate": 4.69941155492154e-05, + "loss": 0.0041, + "step": 33730 + }, + { + "epoch": 6.02, + "learning_rate": 4.69932239657632e-05, + "loss": 0.0045, + "step": 33740 + }, + { + "epoch": 6.02, + "learning_rate": 4.6992332382310985e-05, + "loss": 0.0052, + "step": 33750 + }, + { + "epoch": 6.02, + "learning_rate": 4.6991440798858777e-05, + "loss": 0.0059, + "step": 33760 + }, + { + "epoch": 6.02, + "learning_rate": 4.699054921540657e-05, + "loss": 0.0047, + "step": 33770 + }, + { + "epoch": 6.02, + "learning_rate": 4.698965763195435e-05, + "loss": 0.0054, + "step": 33780 + }, + { + "epoch": 6.03, + "learning_rate": 4.6988766048502143e-05, + "loss": 0.0033, + "step": 33790 + }, + { + "epoch": 6.03, + "learning_rate": 4.698787446504993e-05, + "loss": 0.0055, + "step": 33800 + }, + { + "epoch": 6.03, + "learning_rate": 4.698698288159772e-05, + "loss": 0.0056, + "step": 33810 + }, + { + "epoch": 6.03, + "learning_rate": 4.698609129814551e-05, + "loss": 0.008, + "step": 33820 + }, + { + "epoch": 6.03, + "learning_rate": 4.6985199714693295e-05, + "loss": 0.0055, + "step": 33830 + }, + { + "epoch": 6.03, + "learning_rate": 4.6984308131241086e-05, + "loss": 0.0056, + "step": 33840 + }, + { + "epoch": 6.04, + "learning_rate": 4.698341654778888e-05, + "loss": 0.0055, + "step": 33850 + }, + { + "epoch": 6.04, + "learning_rate": 4.698252496433667e-05, + "loss": 0.0057, + "step": 33860 + }, + { + "epoch": 6.04, + "learning_rate": 4.698163338088445e-05, + "loss": 0.0032, + "step": 33870 + }, + { + "epoch": 6.04, + "learning_rate": 4.6980741797432244e-05, + "loss": 0.0058, + "step": 33880 + }, + { + "epoch": 6.04, + "learning_rate": 4.697985021398003e-05, + "loss": 0.0045, + "step": 33890 + }, + { + "epoch": 6.04, + "learning_rate": 4.697895863052782e-05, + "loss": 0.0043, + "step": 33900 + }, + { + "epoch": 6.05, + "learning_rate": 4.697806704707561e-05, + "loss": 0.0057, + "step": 33910 + }, + { + "epoch": 6.05, + "learning_rate": 4.6977175463623395e-05, + "loss": 0.0046, + "step": 33920 + }, + { + "epoch": 6.05, + "learning_rate": 4.6976283880171187e-05, + "loss": 0.0072, + "step": 33930 + }, + { + "epoch": 6.05, + "learning_rate": 4.697539229671897e-05, + "loss": 0.0041, + "step": 33940 + }, + { + "epoch": 6.05, + "learning_rate": 4.697450071326676e-05, + "loss": 0.0039, + "step": 33950 + }, + { + "epoch": 6.06, + "learning_rate": 4.697360912981455e-05, + "loss": 0.0046, + "step": 33960 + }, + { + "epoch": 6.06, + "learning_rate": 4.6972717546362345e-05, + "loss": 0.0054, + "step": 33970 + }, + { + "epoch": 6.06, + "learning_rate": 4.697182596291013e-05, + "loss": 0.0055, + "step": 33980 + }, + { + "epoch": 6.06, + "learning_rate": 4.697093437945792e-05, + "loss": 0.0043, + "step": 33990 + }, + { + "epoch": 6.06, + "learning_rate": 4.697004279600571e-05, + "loss": 0.0039, + "step": 34000 + }, + { + "epoch": 6.06, + "learning_rate": 4.6969151212553496e-05, + "loss": 0.0085, + "step": 34010 + }, + { + "epoch": 6.07, + "learning_rate": 4.696825962910129e-05, + "loss": 0.0075, + "step": 34020 + }, + { + "epoch": 6.07, + "learning_rate": 4.696736804564907e-05, + "loss": 0.0076, + "step": 34030 + }, + { + "epoch": 6.07, + "learning_rate": 4.696647646219686e-05, + "loss": 0.008, + "step": 34040 + }, + { + "epoch": 6.07, + "learning_rate": 4.6965584878744654e-05, + "loss": 0.0048, + "step": 34050 + }, + { + "epoch": 6.07, + "learning_rate": 4.696469329529244e-05, + "loss": 0.0058, + "step": 34060 + }, + { + "epoch": 6.08, + "learning_rate": 4.696380171184023e-05, + "loss": 0.0059, + "step": 34070 + }, + { + "epoch": 6.08, + "learning_rate": 4.696291012838802e-05, + "loss": 0.0083, + "step": 34080 + }, + { + "epoch": 6.08, + "learning_rate": 4.696201854493581e-05, + "loss": 0.005, + "step": 34090 + }, + { + "epoch": 6.08, + "learning_rate": 4.6961126961483596e-05, + "loss": 0.0063, + "step": 34100 + }, + { + "epoch": 6.08, + "learning_rate": 4.696023537803139e-05, + "loss": 0.0055, + "step": 34110 + }, + { + "epoch": 6.08, + "learning_rate": 4.695934379457917e-05, + "loss": 0.007, + "step": 34120 + }, + { + "epoch": 6.09, + "learning_rate": 4.695845221112696e-05, + "loss": 0.0064, + "step": 34130 + }, + { + "epoch": 6.09, + "learning_rate": 4.6957560627674754e-05, + "loss": 0.0053, + "step": 34140 + }, + { + "epoch": 6.09, + "learning_rate": 4.695666904422254e-05, + "loss": 0.0052, + "step": 34150 + }, + { + "epoch": 6.09, + "learning_rate": 4.695577746077033e-05, + "loss": 0.0039, + "step": 34160 + }, + { + "epoch": 6.09, + "learning_rate": 4.6954885877318115e-05, + "loss": 0.0057, + "step": 34170 + }, + { + "epoch": 6.09, + "learning_rate": 4.695399429386591e-05, + "loss": 0.0067, + "step": 34180 + }, + { + "epoch": 6.1, + "learning_rate": 4.69531027104137e-05, + "loss": 0.0047, + "step": 34190 + }, + { + "epoch": 6.1, + "learning_rate": 4.695221112696149e-05, + "loss": 0.007, + "step": 34200 + }, + { + "epoch": 6.1, + "learning_rate": 4.695131954350927e-05, + "loss": 0.0069, + "step": 34210 + }, + { + "epoch": 6.1, + "learning_rate": 4.6950427960057064e-05, + "loss": 0.007, + "step": 34220 + }, + { + "epoch": 6.1, + "learning_rate": 4.6949536376604855e-05, + "loss": 0.0063, + "step": 34230 + }, + { + "epoch": 6.11, + "learning_rate": 4.694864479315264e-05, + "loss": 0.0031, + "step": 34240 + }, + { + "epoch": 6.11, + "learning_rate": 4.694775320970043e-05, + "loss": 0.0052, + "step": 34250 + }, + { + "epoch": 6.11, + "learning_rate": 4.6946861626248215e-05, + "loss": 0.0042, + "step": 34260 + }, + { + "epoch": 6.11, + "learning_rate": 4.6945970042796006e-05, + "loss": 0.0052, + "step": 34270 + }, + { + "epoch": 6.11, + "learning_rate": 4.69450784593438e-05, + "loss": 0.005, + "step": 34280 + }, + { + "epoch": 6.11, + "learning_rate": 4.694418687589159e-05, + "loss": 0.0057, + "step": 34290 + }, + { + "epoch": 6.12, + "learning_rate": 4.694329529243937e-05, + "loss": 0.0037, + "step": 34300 + }, + { + "epoch": 6.12, + "learning_rate": 4.6942403708987164e-05, + "loss": 0.0055, + "step": 34310 + }, + { + "epoch": 6.12, + "learning_rate": 4.6941512125534956e-05, + "loss": 0.0043, + "step": 34320 + }, + { + "epoch": 6.12, + "learning_rate": 4.694062054208274e-05, + "loss": 0.0083, + "step": 34330 + }, + { + "epoch": 6.12, + "learning_rate": 4.693972895863053e-05, + "loss": 0.0058, + "step": 34340 + }, + { + "epoch": 6.13, + "learning_rate": 4.6938837375178316e-05, + "loss": 0.0064, + "step": 34350 + }, + { + "epoch": 6.13, + "learning_rate": 4.693794579172611e-05, + "loss": 0.0059, + "step": 34360 + }, + { + "epoch": 6.13, + "learning_rate": 4.69370542082739e-05, + "loss": 0.0045, + "step": 34370 + }, + { + "epoch": 6.13, + "learning_rate": 4.693616262482168e-05, + "loss": 0.006, + "step": 34380 + }, + { + "epoch": 6.13, + "learning_rate": 4.6935271041369474e-05, + "loss": 0.0034, + "step": 34390 + }, + { + "epoch": 6.13, + "learning_rate": 4.6934379457917265e-05, + "loss": 0.0032, + "step": 34400 + }, + { + "epoch": 6.14, + "learning_rate": 4.6933487874465056e-05, + "loss": 0.0063, + "step": 34410 + }, + { + "epoch": 6.14, + "learning_rate": 4.693259629101284e-05, + "loss": 0.0033, + "step": 34420 + }, + { + "epoch": 6.14, + "learning_rate": 4.693170470756063e-05, + "loss": 0.0028, + "step": 34430 + }, + { + "epoch": 6.14, + "learning_rate": 4.6930813124108416e-05, + "loss": 0.0059, + "step": 34440 + }, + { + "epoch": 6.14, + "learning_rate": 4.692992154065621e-05, + "loss": 0.0061, + "step": 34450 + }, + { + "epoch": 6.14, + "learning_rate": 4.6929029957204e-05, + "loss": 0.0052, + "step": 34460 + }, + { + "epoch": 6.15, + "learning_rate": 4.692813837375178e-05, + "loss": 0.0044, + "step": 34470 + }, + { + "epoch": 6.15, + "learning_rate": 4.6927246790299574e-05, + "loss": 0.003, + "step": 34480 + }, + { + "epoch": 6.15, + "learning_rate": 4.692635520684736e-05, + "loss": 0.0055, + "step": 34490 + }, + { + "epoch": 6.15, + "learning_rate": 4.692546362339515e-05, + "loss": 0.0068, + "step": 34500 + }, + { + "epoch": 6.15, + "learning_rate": 4.692457203994294e-05, + "loss": 0.0052, + "step": 34510 + }, + { + "epoch": 6.16, + "learning_rate": 4.692368045649073e-05, + "loss": 0.005, + "step": 34520 + }, + { + "epoch": 6.16, + "learning_rate": 4.692278887303852e-05, + "loss": 0.0028, + "step": 34530 + }, + { + "epoch": 6.16, + "learning_rate": 4.692189728958631e-05, + "loss": 0.0042, + "step": 34540 + }, + { + "epoch": 6.16, + "learning_rate": 4.69210057061341e-05, + "loss": 0.0046, + "step": 34550 + }, + { + "epoch": 6.16, + "learning_rate": 4.6920114122681884e-05, + "loss": 0.0067, + "step": 34560 + }, + { + "epoch": 6.16, + "learning_rate": 4.6919222539229675e-05, + "loss": 0.004, + "step": 34570 + }, + { + "epoch": 6.17, + "learning_rate": 4.691833095577746e-05, + "loss": 0.0046, + "step": 34580 + }, + { + "epoch": 6.17, + "learning_rate": 4.691743937232525e-05, + "loss": 0.0038, + "step": 34590 + }, + { + "epoch": 6.17, + "learning_rate": 4.691654778887304e-05, + "loss": 0.0051, + "step": 34600 + }, + { + "epoch": 6.17, + "learning_rate": 4.6915656205420826e-05, + "loss": 0.004, + "step": 34610 + }, + { + "epoch": 6.17, + "learning_rate": 4.6914764621968624e-05, + "loss": 0.0073, + "step": 34620 + }, + { + "epoch": 6.18, + "learning_rate": 4.691387303851641e-05, + "loss": 0.0047, + "step": 34630 + }, + { + "epoch": 6.18, + "learning_rate": 4.69129814550642e-05, + "loss": 0.004, + "step": 34640 + }, + { + "epoch": 6.18, + "learning_rate": 4.6912089871611984e-05, + "loss": 0.0063, + "step": 34650 + }, + { + "epoch": 6.18, + "learning_rate": 4.6911198288159775e-05, + "loss": 0.0055, + "step": 34660 + }, + { + "epoch": 6.18, + "learning_rate": 4.691030670470756e-05, + "loss": 0.0052, + "step": 34670 + }, + { + "epoch": 6.18, + "learning_rate": 4.690941512125535e-05, + "loss": 0.0062, + "step": 34680 + }, + { + "epoch": 6.19, + "learning_rate": 4.690852353780314e-05, + "loss": 0.0071, + "step": 34690 + }, + { + "epoch": 6.19, + "learning_rate": 4.690763195435093e-05, + "loss": 0.0079, + "step": 34700 + }, + { + "epoch": 6.19, + "learning_rate": 4.690674037089872e-05, + "loss": 0.0056, + "step": 34710 + }, + { + "epoch": 6.19, + "learning_rate": 4.69058487874465e-05, + "loss": 0.007, + "step": 34720 + }, + { + "epoch": 6.19, + "learning_rate": 4.69049572039943e-05, + "loss": 0.0071, + "step": 34730 + }, + { + "epoch": 6.19, + "learning_rate": 4.6904065620542085e-05, + "loss": 0.0075, + "step": 34740 + }, + { + "epoch": 6.2, + "learning_rate": 4.6903174037089876e-05, + "loss": 0.0043, + "step": 34750 + }, + { + "epoch": 6.2, + "learning_rate": 4.690228245363766e-05, + "loss": 0.0057, + "step": 34760 + }, + { + "epoch": 6.2, + "learning_rate": 4.690139087018545e-05, + "loss": 0.0057, + "step": 34770 + }, + { + "epoch": 6.2, + "learning_rate": 4.690049928673324e-05, + "loss": 0.0064, + "step": 34780 + }, + { + "epoch": 6.2, + "learning_rate": 4.689960770328103e-05, + "loss": 0.006, + "step": 34790 + }, + { + "epoch": 6.21, + "learning_rate": 4.689871611982882e-05, + "loss": 0.0059, + "step": 34800 + }, + { + "epoch": 6.21, + "learning_rate": 4.68978245363766e-05, + "loss": 0.0066, + "step": 34810 + }, + { + "epoch": 6.21, + "learning_rate": 4.6896932952924394e-05, + "loss": 0.0058, + "step": 34820 + }, + { + "epoch": 6.21, + "learning_rate": 4.6896041369472185e-05, + "loss": 0.0037, + "step": 34830 + }, + { + "epoch": 6.21, + "learning_rate": 4.6895149786019977e-05, + "loss": 0.0056, + "step": 34840 + }, + { + "epoch": 6.21, + "learning_rate": 4.689425820256777e-05, + "loss": 0.0049, + "step": 34850 + }, + { + "epoch": 6.22, + "learning_rate": 4.689336661911555e-05, + "loss": 0.0046, + "step": 34860 + }, + { + "epoch": 6.22, + "learning_rate": 4.6892475035663343e-05, + "loss": 0.0044, + "step": 34870 + }, + { + "epoch": 6.22, + "learning_rate": 4.689158345221113e-05, + "loss": 0.0077, + "step": 34880 + }, + { + "epoch": 6.22, + "learning_rate": 4.689069186875892e-05, + "loss": 0.0065, + "step": 34890 + }, + { + "epoch": 6.22, + "learning_rate": 4.6889800285306703e-05, + "loss": 0.006, + "step": 34900 + }, + { + "epoch": 6.23, + "learning_rate": 4.6888908701854495e-05, + "loss": 0.0049, + "step": 34910 + }, + { + "epoch": 6.23, + "learning_rate": 4.6888017118402286e-05, + "loss": 0.0039, + "step": 34920 + }, + { + "epoch": 6.23, + "learning_rate": 4.688712553495007e-05, + "loss": 0.0059, + "step": 34930 + }, + { + "epoch": 6.23, + "learning_rate": 4.688623395149786e-05, + "loss": 0.0037, + "step": 34940 + }, + { + "epoch": 6.23, + "learning_rate": 4.688534236804565e-05, + "loss": 0.0058, + "step": 34950 + }, + { + "epoch": 6.23, + "learning_rate": 4.6884450784593444e-05, + "loss": 0.0063, + "step": 34960 + }, + { + "epoch": 6.24, + "learning_rate": 4.688355920114123e-05, + "loss": 0.0062, + "step": 34970 + }, + { + "epoch": 6.24, + "learning_rate": 4.688266761768902e-05, + "loss": 0.0068, + "step": 34980 + }, + { + "epoch": 6.24, + "learning_rate": 4.6881776034236804e-05, + "loss": 0.0067, + "step": 34990 + }, + { + "epoch": 6.24, + "learning_rate": 4.6880884450784595e-05, + "loss": 0.0051, + "step": 35000 + }, + { + "epoch": 6.24, + "learning_rate": 4.6879992867332386e-05, + "loss": 0.0064, + "step": 35010 + }, + { + "epoch": 6.24, + "learning_rate": 4.687910128388017e-05, + "loss": 0.0038, + "step": 35020 + }, + { + "epoch": 6.25, + "learning_rate": 4.687820970042796e-05, + "loss": 0.0045, + "step": 35030 + }, + { + "epoch": 6.25, + "learning_rate": 4.6877318116975747e-05, + "loss": 0.0063, + "step": 35040 + }, + { + "epoch": 6.25, + "learning_rate": 4.687642653352354e-05, + "loss": 0.0028, + "step": 35050 + }, + { + "epoch": 6.25, + "learning_rate": 4.687553495007133e-05, + "loss": 0.0051, + "step": 35060 + }, + { + "epoch": 6.25, + "learning_rate": 4.687464336661912e-05, + "loss": 0.006, + "step": 35070 + }, + { + "epoch": 6.26, + "learning_rate": 4.687375178316691e-05, + "loss": 0.0044, + "step": 35080 + }, + { + "epoch": 6.26, + "learning_rate": 4.6872860199714696e-05, + "loss": 0.0065, + "step": 35090 + }, + { + "epoch": 6.26, + "learning_rate": 4.687196861626249e-05, + "loss": 0.0046, + "step": 35100 + }, + { + "epoch": 6.26, + "learning_rate": 4.687107703281027e-05, + "loss": 0.0042, + "step": 35110 + }, + { + "epoch": 6.26, + "learning_rate": 4.687018544935806e-05, + "loss": 0.0079, + "step": 35120 + }, + { + "epoch": 6.26, + "learning_rate": 4.686929386590585e-05, + "loss": 0.0061, + "step": 35130 + }, + { + "epoch": 6.27, + "learning_rate": 4.686840228245364e-05, + "loss": 0.0056, + "step": 35140 + }, + { + "epoch": 6.27, + "learning_rate": 4.686751069900143e-05, + "loss": 0.0051, + "step": 35150 + }, + { + "epoch": 6.27, + "learning_rate": 4.6866619115549214e-05, + "loss": 0.0084, + "step": 35160 + }, + { + "epoch": 6.27, + "learning_rate": 4.686572753209701e-05, + "loss": 0.005, + "step": 35170 + }, + { + "epoch": 6.27, + "learning_rate": 4.6864835948644796e-05, + "loss": 0.0062, + "step": 35180 + }, + { + "epoch": 6.27, + "learning_rate": 4.686394436519259e-05, + "loss": 0.0071, + "step": 35190 + }, + { + "epoch": 6.28, + "learning_rate": 4.686305278174037e-05, + "loss": 0.0052, + "step": 35200 + }, + { + "epoch": 6.28, + "learning_rate": 4.686216119828816e-05, + "loss": 0.0071, + "step": 35210 + }, + { + "epoch": 6.28, + "learning_rate": 4.686126961483595e-05, + "loss": 0.0059, + "step": 35220 + }, + { + "epoch": 6.28, + "learning_rate": 4.686037803138374e-05, + "loss": 0.006, + "step": 35230 + }, + { + "epoch": 6.28, + "learning_rate": 4.685948644793153e-05, + "loss": 0.0037, + "step": 35240 + }, + { + "epoch": 6.29, + "learning_rate": 4.6858594864479315e-05, + "loss": 0.0058, + "step": 35250 + }, + { + "epoch": 6.29, + "learning_rate": 4.6857703281027106e-05, + "loss": 0.0051, + "step": 35260 + }, + { + "epoch": 6.29, + "learning_rate": 4.685681169757489e-05, + "loss": 0.0043, + "step": 35270 + }, + { + "epoch": 6.29, + "learning_rate": 4.685592011412269e-05, + "loss": 0.0064, + "step": 35280 + }, + { + "epoch": 6.29, + "learning_rate": 4.685502853067047e-05, + "loss": 0.0048, + "step": 35290 + }, + { + "epoch": 6.29, + "learning_rate": 4.6854136947218264e-05, + "loss": 0.0033, + "step": 35300 + }, + { + "epoch": 6.3, + "learning_rate": 4.6853245363766055e-05, + "loss": 0.0082, + "step": 35310 + }, + { + "epoch": 6.3, + "learning_rate": 4.685235378031384e-05, + "loss": 0.0058, + "step": 35320 + }, + { + "epoch": 6.3, + "learning_rate": 4.685146219686163e-05, + "loss": 0.0057, + "step": 35330 + }, + { + "epoch": 6.3, + "learning_rate": 4.6850570613409415e-05, + "loss": 0.0051, + "step": 35340 + }, + { + "epoch": 6.3, + "learning_rate": 4.6849679029957206e-05, + "loss": 0.0052, + "step": 35350 + }, + { + "epoch": 6.31, + "learning_rate": 4.684878744650499e-05, + "loss": 0.0071, + "step": 35360 + }, + { + "epoch": 6.31, + "learning_rate": 4.684789586305278e-05, + "loss": 0.0038, + "step": 35370 + }, + { + "epoch": 6.31, + "learning_rate": 4.684700427960057e-05, + "loss": 0.0083, + "step": 35380 + }, + { + "epoch": 6.31, + "learning_rate": 4.6846112696148364e-05, + "loss": 0.0055, + "step": 35390 + }, + { + "epoch": 6.31, + "learning_rate": 4.6845221112696156e-05, + "loss": 0.0055, + "step": 35400 + }, + { + "epoch": 6.31, + "learning_rate": 4.684432952924394e-05, + "loss": 0.0047, + "step": 35410 + }, + { + "epoch": 6.32, + "learning_rate": 4.684343794579173e-05, + "loss": 0.0033, + "step": 35420 + }, + { + "epoch": 6.32, + "learning_rate": 4.6842546362339516e-05, + "loss": 0.0049, + "step": 35430 + }, + { + "epoch": 6.32, + "learning_rate": 4.684165477888731e-05, + "loss": 0.005, + "step": 35440 + }, + { + "epoch": 6.32, + "learning_rate": 4.684076319543509e-05, + "loss": 0.0054, + "step": 35450 + }, + { + "epoch": 6.32, + "learning_rate": 4.683987161198288e-05, + "loss": 0.0049, + "step": 35460 + }, + { + "epoch": 6.32, + "learning_rate": 4.6838980028530674e-05, + "loss": 0.0057, + "step": 35470 + }, + { + "epoch": 6.33, + "learning_rate": 4.683808844507846e-05, + "loss": 0.0095, + "step": 35480 + }, + { + "epoch": 6.33, + "learning_rate": 4.683719686162625e-05, + "loss": 0.0056, + "step": 35490 + }, + { + "epoch": 6.33, + "learning_rate": 4.683630527817404e-05, + "loss": 0.0049, + "step": 35500 + }, + { + "epoch": 6.33, + "learning_rate": 4.683541369472183e-05, + "loss": 0.0052, + "step": 35510 + }, + { + "epoch": 6.33, + "learning_rate": 4.6834522111269616e-05, + "loss": 0.0058, + "step": 35520 + }, + { + "epoch": 6.34, + "learning_rate": 4.683363052781741e-05, + "loss": 0.0048, + "step": 35530 + }, + { + "epoch": 6.34, + "learning_rate": 4.68327389443652e-05, + "loss": 0.0038, + "step": 35540 + }, + { + "epoch": 6.34, + "learning_rate": 4.683184736091298e-05, + "loss": 0.0057, + "step": 35550 + }, + { + "epoch": 6.34, + "learning_rate": 4.6830955777460774e-05, + "loss": 0.0082, + "step": 35560 + }, + { + "epoch": 6.34, + "learning_rate": 4.683006419400856e-05, + "loss": 0.0039, + "step": 35570 + }, + { + "epoch": 6.34, + "learning_rate": 4.682917261055635e-05, + "loss": 0.0049, + "step": 35580 + }, + { + "epoch": 6.35, + "learning_rate": 4.6828281027104134e-05, + "loss": 0.0051, + "step": 35590 + }, + { + "epoch": 6.35, + "learning_rate": 4.6827389443651926e-05, + "loss": 0.006, + "step": 35600 + }, + { + "epoch": 6.35, + "learning_rate": 4.682649786019972e-05, + "loss": 0.0048, + "step": 35610 + }, + { + "epoch": 6.35, + "learning_rate": 4.682560627674751e-05, + "loss": 0.0092, + "step": 35620 + }, + { + "epoch": 6.35, + "learning_rate": 4.68247146932953e-05, + "loss": 0.0085, + "step": 35630 + }, + { + "epoch": 6.36, + "learning_rate": 4.6823823109843084e-05, + "loss": 0.0064, + "step": 35640 + }, + { + "epoch": 6.36, + "learning_rate": 4.6822931526390875e-05, + "loss": 0.0033, + "step": 35650 + }, + { + "epoch": 6.36, + "learning_rate": 4.682203994293866e-05, + "loss": 0.004, + "step": 35660 + }, + { + "epoch": 6.36, + "learning_rate": 4.682114835948645e-05, + "loss": 0.0059, + "step": 35670 + }, + { + "epoch": 6.36, + "learning_rate": 4.6820256776034235e-05, + "loss": 0.0067, + "step": 35680 + }, + { + "epoch": 6.36, + "learning_rate": 4.6819365192582026e-05, + "loss": 0.0049, + "step": 35690 + }, + { + "epoch": 6.37, + "learning_rate": 4.681847360912982e-05, + "loss": 0.0046, + "step": 35700 + }, + { + "epoch": 6.37, + "learning_rate": 4.68175820256776e-05, + "loss": 0.004, + "step": 35710 + }, + { + "epoch": 6.37, + "learning_rate": 4.68166904422254e-05, + "loss": 0.0041, + "step": 35720 + }, + { + "epoch": 6.37, + "learning_rate": 4.6815798858773184e-05, + "loss": 0.0057, + "step": 35730 + }, + { + "epoch": 6.37, + "learning_rate": 4.6814907275320975e-05, + "loss": 0.0046, + "step": 35740 + }, + { + "epoch": 6.37, + "learning_rate": 4.681401569186876e-05, + "loss": 0.0046, + "step": 35750 + }, + { + "epoch": 6.38, + "learning_rate": 4.681312410841655e-05, + "loss": 0.0054, + "step": 35760 + }, + { + "epoch": 6.38, + "learning_rate": 4.681223252496434e-05, + "loss": 0.0048, + "step": 35770 + }, + { + "epoch": 6.38, + "learning_rate": 4.681134094151213e-05, + "loss": 0.0052, + "step": 35780 + }, + { + "epoch": 6.38, + "learning_rate": 4.681044935805992e-05, + "loss": 0.0055, + "step": 35790 + }, + { + "epoch": 6.38, + "learning_rate": 4.68095577746077e-05, + "loss": 0.0061, + "step": 35800 + }, + { + "epoch": 6.39, + "learning_rate": 4.6808666191155494e-05, + "loss": 0.0042, + "step": 35810 + }, + { + "epoch": 6.39, + "learning_rate": 4.680777460770328e-05, + "loss": 0.0063, + "step": 35820 + }, + { + "epoch": 6.39, + "learning_rate": 4.6806883024251076e-05, + "loss": 0.0053, + "step": 35830 + }, + { + "epoch": 6.39, + "learning_rate": 4.680599144079886e-05, + "loss": 0.0034, + "step": 35840 + }, + { + "epoch": 6.39, + "learning_rate": 4.680518901569187e-05, + "loss": 0.006, + "step": 35850 + }, + { + "epoch": 6.39, + "learning_rate": 4.680429743223966e-05, + "loss": 0.0061, + "step": 35860 + }, + { + "epoch": 6.4, + "learning_rate": 4.6803405848787444e-05, + "loss": 0.0049, + "step": 35870 + }, + { + "epoch": 6.4, + "learning_rate": 4.680251426533524e-05, + "loss": 0.004, + "step": 35880 + }, + { + "epoch": 6.4, + "learning_rate": 4.6801622681883026e-05, + "loss": 0.0054, + "step": 35890 + }, + { + "epoch": 6.4, + "learning_rate": 4.680073109843082e-05, + "loss": 0.005, + "step": 35900 + }, + { + "epoch": 6.4, + "learning_rate": 4.67998395149786e-05, + "loss": 0.0054, + "step": 35910 + }, + { + "epoch": 6.41, + "learning_rate": 4.679894793152639e-05, + "loss": 0.007, + "step": 35920 + }, + { + "epoch": 6.41, + "learning_rate": 4.6798056348074185e-05, + "loss": 0.0067, + "step": 35930 + }, + { + "epoch": 6.41, + "learning_rate": 4.679716476462197e-05, + "loss": 0.005, + "step": 35940 + }, + { + "epoch": 6.41, + "learning_rate": 4.6796362339514984e-05, + "loss": 0.0084, + "step": 35950 + }, + { + "epoch": 6.41, + "learning_rate": 4.679547075606277e-05, + "loss": 0.0054, + "step": 35960 + }, + { + "epoch": 6.41, + "learning_rate": 4.679457917261056e-05, + "loss": 0.0048, + "step": 35970 + }, + { + "epoch": 6.42, + "learning_rate": 4.6793687589158344e-05, + "loss": 0.0057, + "step": 35980 + }, + { + "epoch": 6.42, + "learning_rate": 4.6792796005706135e-05, + "loss": 0.0044, + "step": 35990 + }, + { + "epoch": 6.42, + "learning_rate": 4.6791904422253926e-05, + "loss": 0.0069, + "step": 36000 + }, + { + "epoch": 6.42, + "learning_rate": 4.679101283880171e-05, + "loss": 0.0036, + "step": 36010 + }, + { + "epoch": 6.42, + "learning_rate": 4.67901212553495e-05, + "loss": 0.0052, + "step": 36020 + }, + { + "epoch": 6.42, + "learning_rate": 4.678922967189729e-05, + "loss": 0.0061, + "step": 36030 + }, + { + "epoch": 6.43, + "learning_rate": 4.6788338088445084e-05, + "loss": 0.0039, + "step": 36040 + }, + { + "epoch": 6.43, + "learning_rate": 4.678744650499287e-05, + "loss": 0.004, + "step": 36050 + }, + { + "epoch": 6.43, + "learning_rate": 4.678655492154066e-05, + "loss": 0.0043, + "step": 36060 + }, + { + "epoch": 6.43, + "learning_rate": 4.678566333808845e-05, + "loss": 0.0089, + "step": 36070 + }, + { + "epoch": 6.43, + "learning_rate": 4.6784771754636236e-05, + "loss": 0.0045, + "step": 36080 + }, + { + "epoch": 6.44, + "learning_rate": 4.678388017118403e-05, + "loss": 0.0063, + "step": 36090 + }, + { + "epoch": 6.44, + "learning_rate": 4.678298858773181e-05, + "loss": 0.0067, + "step": 36100 + }, + { + "epoch": 6.44, + "learning_rate": 4.67820970042796e-05, + "loss": 0.0036, + "step": 36110 + }, + { + "epoch": 6.44, + "learning_rate": 4.678120542082739e-05, + "loss": 0.0065, + "step": 36120 + }, + { + "epoch": 6.44, + "learning_rate": 4.678031383737518e-05, + "loss": 0.0043, + "step": 36130 + }, + { + "epoch": 6.44, + "learning_rate": 4.677942225392297e-05, + "loss": 0.0066, + "step": 36140 + }, + { + "epoch": 6.45, + "learning_rate": 4.677853067047076e-05, + "loss": 0.0037, + "step": 36150 + }, + { + "epoch": 6.45, + "learning_rate": 4.677763908701855e-05, + "loss": 0.0076, + "step": 36160 + }, + { + "epoch": 6.45, + "learning_rate": 4.6776747503566336e-05, + "loss": 0.0053, + "step": 36170 + }, + { + "epoch": 6.45, + "learning_rate": 4.677585592011413e-05, + "loss": 0.0068, + "step": 36180 + }, + { + "epoch": 6.45, + "learning_rate": 4.677496433666191e-05, + "loss": 0.008, + "step": 36190 + }, + { + "epoch": 6.46, + "learning_rate": 4.67740727532097e-05, + "loss": 0.004, + "step": 36200 + }, + { + "epoch": 6.46, + "learning_rate": 4.677318116975749e-05, + "loss": 0.009, + "step": 36210 + }, + { + "epoch": 6.46, + "learning_rate": 4.677228958630528e-05, + "loss": 0.0063, + "step": 36220 + }, + { + "epoch": 6.46, + "learning_rate": 4.677139800285307e-05, + "loss": 0.0062, + "step": 36230 + }, + { + "epoch": 6.46, + "learning_rate": 4.6770506419400854e-05, + "loss": 0.0047, + "step": 36240 + }, + { + "epoch": 6.46, + "learning_rate": 4.676961483594865e-05, + "loss": 0.0076, + "step": 36250 + }, + { + "epoch": 6.47, + "learning_rate": 4.676872325249644e-05, + "loss": 0.0079, + "step": 36260 + }, + { + "epoch": 6.47, + "learning_rate": 4.676783166904423e-05, + "loss": 0.0059, + "step": 36270 + }, + { + "epoch": 6.47, + "learning_rate": 4.676694008559201e-05, + "loss": 0.005, + "step": 36280 + }, + { + "epoch": 6.47, + "learning_rate": 4.6766048502139804e-05, + "loss": 0.0064, + "step": 36290 + }, + { + "epoch": 6.47, + "learning_rate": 4.6765156918687595e-05, + "loss": 0.0057, + "step": 36300 + }, + { + "epoch": 6.47, + "learning_rate": 4.676426533523538e-05, + "loss": 0.0054, + "step": 36310 + }, + { + "epoch": 6.48, + "learning_rate": 4.676337375178317e-05, + "loss": 0.006, + "step": 36320 + }, + { + "epoch": 6.48, + "learning_rate": 4.6762482168330955e-05, + "loss": 0.0043, + "step": 36330 + }, + { + "epoch": 6.48, + "learning_rate": 4.6761590584878746e-05, + "loss": 0.0057, + "step": 36340 + }, + { + "epoch": 6.48, + "learning_rate": 4.676069900142653e-05, + "loss": 0.0069, + "step": 36350 + }, + { + "epoch": 6.48, + "learning_rate": 4.675980741797433e-05, + "loss": 0.0056, + "step": 36360 + }, + { + "epoch": 6.49, + "learning_rate": 4.675891583452211e-05, + "loss": 0.0037, + "step": 36370 + }, + { + "epoch": 6.49, + "learning_rate": 4.6758024251069904e-05, + "loss": 0.0058, + "step": 36380 + }, + { + "epoch": 6.49, + "learning_rate": 4.6757132667617695e-05, + "loss": 0.0041, + "step": 36390 + }, + { + "epoch": 6.49, + "learning_rate": 4.675624108416548e-05, + "loss": 0.0052, + "step": 36400 + }, + { + "epoch": 6.49, + "learning_rate": 4.675534950071327e-05, + "loss": 0.0034, + "step": 36410 + }, + { + "epoch": 6.49, + "learning_rate": 4.6754457917261055e-05, + "loss": 0.0042, + "step": 36420 + }, + { + "epoch": 6.5, + "learning_rate": 4.675356633380885e-05, + "loss": 0.0068, + "step": 36430 + }, + { + "epoch": 6.5, + "learning_rate": 4.675267475035663e-05, + "loss": 0.0063, + "step": 36440 + }, + { + "epoch": 6.5, + "learning_rate": 4.675178316690442e-05, + "loss": 0.0076, + "step": 36450 + }, + { + "epoch": 6.5, + "learning_rate": 4.6750891583452214e-05, + "loss": 0.0067, + "step": 36460 + }, + { + "epoch": 6.5, + "learning_rate": 4.6750000000000005e-05, + "loss": 0.0061, + "step": 36470 + }, + { + "epoch": 6.5, + "learning_rate": 4.6749108416547796e-05, + "loss": 0.0064, + "step": 36480 + }, + { + "epoch": 6.51, + "learning_rate": 4.674821683309558e-05, + "loss": 0.0078, + "step": 36490 + }, + { + "epoch": 6.51, + "learning_rate": 4.674732524964337e-05, + "loss": 0.0065, + "step": 36500 + }, + { + "epoch": 6.51, + "learning_rate": 4.6746433666191156e-05, + "loss": 0.0057, + "step": 36510 + }, + { + "epoch": 6.51, + "learning_rate": 4.674554208273895e-05, + "loss": 0.0057, + "step": 36520 + }, + { + "epoch": 6.51, + "learning_rate": 4.674465049928674e-05, + "loss": 0.0062, + "step": 36530 + }, + { + "epoch": 6.52, + "learning_rate": 4.674375891583452e-05, + "loss": 0.0066, + "step": 36540 + }, + { + "epoch": 6.52, + "learning_rate": 4.6742867332382314e-05, + "loss": 0.0035, + "step": 36550 + }, + { + "epoch": 6.52, + "learning_rate": 4.67419757489301e-05, + "loss": 0.0072, + "step": 36560 + }, + { + "epoch": 6.52, + "learning_rate": 4.674108416547789e-05, + "loss": 0.0033, + "step": 36570 + }, + { + "epoch": 6.52, + "learning_rate": 4.674019258202568e-05, + "loss": 0.0067, + "step": 36580 + }, + { + "epoch": 6.52, + "learning_rate": 4.673930099857347e-05, + "loss": 0.0036, + "step": 36590 + }, + { + "epoch": 6.53, + "learning_rate": 4.6738409415121257e-05, + "loss": 0.0041, + "step": 36600 + }, + { + "epoch": 6.53, + "learning_rate": 4.673751783166905e-05, + "loss": 0.0071, + "step": 36610 + }, + { + "epoch": 6.53, + "learning_rate": 4.673662624821684e-05, + "loss": 0.0067, + "step": 36620 + }, + { + "epoch": 6.53, + "learning_rate": 4.6735734664764623e-05, + "loss": 0.0064, + "step": 36630 + }, + { + "epoch": 6.53, + "learning_rate": 4.6734843081312415e-05, + "loss": 0.0045, + "step": 36640 + }, + { + "epoch": 6.54, + "learning_rate": 4.67339514978602e-05, + "loss": 0.0079, + "step": 36650 + }, + { + "epoch": 6.54, + "learning_rate": 4.673305991440799e-05, + "loss": 0.0046, + "step": 36660 + }, + { + "epoch": 6.54, + "learning_rate": 4.6732168330955775e-05, + "loss": 0.0056, + "step": 36670 + }, + { + "epoch": 6.54, + "learning_rate": 4.6731276747503566e-05, + "loss": 0.005, + "step": 36680 + }, + { + "epoch": 6.54, + "learning_rate": 4.673038516405136e-05, + "loss": 0.0062, + "step": 36690 + }, + { + "epoch": 6.54, + "learning_rate": 4.672949358059915e-05, + "loss": 0.0045, + "step": 36700 + }, + { + "epoch": 6.55, + "learning_rate": 4.672860199714694e-05, + "loss": 0.0091, + "step": 36710 + }, + { + "epoch": 6.55, + "learning_rate": 4.6727710413694724e-05, + "loss": 0.0058, + "step": 36720 + }, + { + "epoch": 6.55, + "learning_rate": 4.6726818830242515e-05, + "loss": 0.0048, + "step": 36730 + }, + { + "epoch": 6.55, + "learning_rate": 4.67259272467903e-05, + "loss": 0.0069, + "step": 36740 + }, + { + "epoch": 6.55, + "learning_rate": 4.672503566333809e-05, + "loss": 0.0074, + "step": 36750 + }, + { + "epoch": 6.55, + "learning_rate": 4.672414407988588e-05, + "loss": 0.0038, + "step": 36760 + }, + { + "epoch": 6.56, + "learning_rate": 4.6723252496433666e-05, + "loss": 0.0076, + "step": 36770 + }, + { + "epoch": 6.56, + "learning_rate": 4.672236091298146e-05, + "loss": 0.0049, + "step": 36780 + }, + { + "epoch": 6.56, + "learning_rate": 4.672146932952924e-05, + "loss": 0.0042, + "step": 36790 + }, + { + "epoch": 6.56, + "learning_rate": 4.672057774607704e-05, + "loss": 0.0043, + "step": 36800 + }, + { + "epoch": 6.56, + "learning_rate": 4.6719686162624825e-05, + "loss": 0.0054, + "step": 36810 + }, + { + "epoch": 6.57, + "learning_rate": 4.6718794579172616e-05, + "loss": 0.0021, + "step": 36820 + }, + { + "epoch": 6.57, + "learning_rate": 4.67179029957204e-05, + "loss": 0.0037, + "step": 36830 + }, + { + "epoch": 6.57, + "learning_rate": 4.671701141226819e-05, + "loss": 0.0058, + "step": 36840 + }, + { + "epoch": 6.57, + "learning_rate": 4.671611982881598e-05, + "loss": 0.0055, + "step": 36850 + }, + { + "epoch": 6.57, + "learning_rate": 4.671522824536377e-05, + "loss": 0.0041, + "step": 36860 + }, + { + "epoch": 6.57, + "learning_rate": 4.671433666191156e-05, + "loss": 0.0039, + "step": 36870 + }, + { + "epoch": 6.58, + "learning_rate": 4.671344507845934e-05, + "loss": 0.0055, + "step": 36880 + }, + { + "epoch": 6.58, + "learning_rate": 4.6712553495007134e-05, + "loss": 0.0063, + "step": 36890 + }, + { + "epoch": 6.58, + "learning_rate": 4.671166191155492e-05, + "loss": 0.0032, + "step": 36900 + }, + { + "epoch": 6.58, + "learning_rate": 4.6710770328102716e-05, + "loss": 0.0043, + "step": 36910 + }, + { + "epoch": 6.58, + "learning_rate": 4.67098787446505e-05, + "loss": 0.0098, + "step": 36920 + }, + { + "epoch": 6.59, + "learning_rate": 4.670898716119829e-05, + "loss": 0.0067, + "step": 36930 + }, + { + "epoch": 6.59, + "learning_rate": 4.670809557774608e-05, + "loss": 0.0074, + "step": 36940 + }, + { + "epoch": 6.59, + "learning_rate": 4.670720399429387e-05, + "loss": 0.0079, + "step": 36950 + }, + { + "epoch": 6.59, + "learning_rate": 4.670631241084166e-05, + "loss": 0.0076, + "step": 36960 + }, + { + "epoch": 6.59, + "learning_rate": 4.670542082738944e-05, + "loss": 0.0054, + "step": 36970 + }, + { + "epoch": 6.59, + "learning_rate": 4.6704529243937234e-05, + "loss": 0.005, + "step": 36980 + }, + { + "epoch": 6.6, + "learning_rate": 4.6703637660485026e-05, + "loss": 0.0034, + "step": 36990 + }, + { + "epoch": 6.6, + "learning_rate": 4.670274607703281e-05, + "loss": 0.0062, + "step": 37000 + }, + { + "epoch": 6.6, + "learning_rate": 4.67018544935806e-05, + "loss": 0.0053, + "step": 37010 + }, + { + "epoch": 6.6, + "learning_rate": 4.670096291012839e-05, + "loss": 0.0044, + "step": 37020 + }, + { + "epoch": 6.6, + "learning_rate": 4.6700071326676184e-05, + "loss": 0.0061, + "step": 37030 + }, + { + "epoch": 6.6, + "learning_rate": 4.669917974322397e-05, + "loss": 0.006, + "step": 37040 + }, + { + "epoch": 6.61, + "learning_rate": 4.669828815977176e-05, + "loss": 0.0039, + "step": 37050 + }, + { + "epoch": 6.61, + "learning_rate": 4.6697396576319544e-05, + "loss": 0.0047, + "step": 37060 + }, + { + "epoch": 6.61, + "learning_rate": 4.6696504992867335e-05, + "loss": 0.006, + "step": 37070 + }, + { + "epoch": 6.61, + "learning_rate": 4.6695613409415126e-05, + "loss": 0.0052, + "step": 37080 + }, + { + "epoch": 6.61, + "learning_rate": 4.669472182596291e-05, + "loss": 0.0088, + "step": 37090 + }, + { + "epoch": 6.62, + "learning_rate": 4.66938302425107e-05, + "loss": 0.0049, + "step": 37100 + }, + { + "epoch": 6.62, + "learning_rate": 4.6692938659058486e-05, + "loss": 0.007, + "step": 37110 + }, + { + "epoch": 6.62, + "learning_rate": 4.669204707560628e-05, + "loss": 0.0036, + "step": 37120 + }, + { + "epoch": 6.62, + "learning_rate": 4.669115549215407e-05, + "loss": 0.0071, + "step": 37130 + }, + { + "epoch": 6.62, + "learning_rate": 4.669026390870186e-05, + "loss": 0.0089, + "step": 37140 + }, + { + "epoch": 6.62, + "learning_rate": 4.6689372325249644e-05, + "loss": 0.0056, + "step": 37150 + }, + { + "epoch": 6.63, + "learning_rate": 4.6688480741797436e-05, + "loss": 0.0071, + "step": 37160 + }, + { + "epoch": 6.63, + "learning_rate": 4.668758915834523e-05, + "loss": 0.0056, + "step": 37170 + }, + { + "epoch": 6.63, + "learning_rate": 4.668669757489301e-05, + "loss": 0.0068, + "step": 37180 + }, + { + "epoch": 6.63, + "learning_rate": 4.66858059914408e-05, + "loss": 0.0048, + "step": 37190 + }, + { + "epoch": 6.63, + "learning_rate": 4.668491440798859e-05, + "loss": 0.0042, + "step": 37200 + }, + { + "epoch": 6.64, + "learning_rate": 4.668402282453638e-05, + "loss": 0.0063, + "step": 37210 + }, + { + "epoch": 6.64, + "learning_rate": 4.668313124108417e-05, + "loss": 0.0064, + "step": 37220 + }, + { + "epoch": 6.64, + "learning_rate": 4.6682239657631954e-05, + "loss": 0.0045, + "step": 37230 + }, + { + "epoch": 6.64, + "learning_rate": 4.6681348074179745e-05, + "loss": 0.0063, + "step": 37240 + }, + { + "epoch": 6.64, + "learning_rate": 4.6680456490727536e-05, + "loss": 0.0037, + "step": 37250 + }, + { + "epoch": 6.64, + "learning_rate": 4.667956490727533e-05, + "loss": 0.0048, + "step": 37260 + }, + { + "epoch": 6.65, + "learning_rate": 4.667867332382311e-05, + "loss": 0.0064, + "step": 37270 + }, + { + "epoch": 6.65, + "learning_rate": 4.66777817403709e-05, + "loss": 0.0042, + "step": 37280 + }, + { + "epoch": 6.65, + "learning_rate": 4.667689015691869e-05, + "loss": 0.0032, + "step": 37290 + }, + { + "epoch": 6.65, + "learning_rate": 4.667599857346648e-05, + "loss": 0.008, + "step": 37300 + }, + { + "epoch": 6.65, + "learning_rate": 4.667510699001427e-05, + "loss": 0.0067, + "step": 37310 + }, + { + "epoch": 6.65, + "learning_rate": 4.6674215406562054e-05, + "loss": 0.0078, + "step": 37320 + }, + { + "epoch": 6.66, + "learning_rate": 4.6673323823109845e-05, + "loss": 0.0056, + "step": 37330 + }, + { + "epoch": 6.66, + "learning_rate": 4.667243223965763e-05, + "loss": 0.0038, + "step": 37340 + }, + { + "epoch": 6.66, + "learning_rate": 4.667154065620543e-05, + "loss": 0.0082, + "step": 37350 + }, + { + "epoch": 6.66, + "learning_rate": 4.667064907275321e-05, + "loss": 0.0071, + "step": 37360 + }, + { + "epoch": 6.66, + "learning_rate": 4.6669757489301004e-05, + "loss": 0.0095, + "step": 37370 + }, + { + "epoch": 6.67, + "learning_rate": 4.666886590584879e-05, + "loss": 0.0059, + "step": 37380 + }, + { + "epoch": 6.67, + "learning_rate": 4.666797432239658e-05, + "loss": 0.0086, + "step": 37390 + }, + { + "epoch": 6.67, + "learning_rate": 4.666708273894437e-05, + "loss": 0.0033, + "step": 37400 + }, + { + "epoch": 6.67, + "learning_rate": 4.6666191155492155e-05, + "loss": 0.0032, + "step": 37410 + }, + { + "epoch": 6.67, + "learning_rate": 4.6665299572039946e-05, + "loss": 0.0048, + "step": 37420 + }, + { + "epoch": 6.67, + "learning_rate": 4.666440798858773e-05, + "loss": 0.0055, + "step": 37430 + }, + { + "epoch": 6.68, + "learning_rate": 4.666351640513552e-05, + "loss": 0.0055, + "step": 37440 + }, + { + "epoch": 6.68, + "learning_rate": 4.6662624821683306e-05, + "loss": 0.0034, + "step": 37450 + }, + { + "epoch": 6.68, + "learning_rate": 4.6661733238231104e-05, + "loss": 0.0031, + "step": 37460 + }, + { + "epoch": 6.68, + "learning_rate": 4.666084165477889e-05, + "loss": 0.0065, + "step": 37470 + }, + { + "epoch": 6.68, + "learning_rate": 4.665995007132668e-05, + "loss": 0.0059, + "step": 37480 + }, + { + "epoch": 6.69, + "learning_rate": 4.665905848787447e-05, + "loss": 0.0057, + "step": 37490 + }, + { + "epoch": 6.69, + "learning_rate": 4.6658166904422255e-05, + "loss": 0.0048, + "step": 37500 + }, + { + "epoch": 6.69, + "learning_rate": 4.6657275320970047e-05, + "loss": 0.0079, + "step": 37510 + }, + { + "epoch": 6.69, + "learning_rate": 4.665638373751783e-05, + "loss": 0.0063, + "step": 37520 + }, + { + "epoch": 6.69, + "learning_rate": 4.665549215406562e-05, + "loss": 0.0081, + "step": 37530 + }, + { + "epoch": 6.69, + "learning_rate": 4.6654600570613413e-05, + "loss": 0.0038, + "step": 37540 + }, + { + "epoch": 6.7, + "learning_rate": 4.66537089871612e-05, + "loss": 0.005, + "step": 37550 + }, + { + "epoch": 6.7, + "learning_rate": 4.665281740370899e-05, + "loss": 0.0085, + "step": 37560 + }, + { + "epoch": 6.7, + "learning_rate": 4.665192582025678e-05, + "loss": 0.0038, + "step": 37570 + }, + { + "epoch": 6.7, + "learning_rate": 4.665112339514979e-05, + "loss": 0.0089, + "step": 37580 + }, + { + "epoch": 6.7, + "learning_rate": 4.665023181169757e-05, + "loss": 0.0041, + "step": 37590 + }, + { + "epoch": 6.7, + "learning_rate": 4.6649340228245364e-05, + "loss": 0.0052, + "step": 37600 + }, + { + "epoch": 6.71, + "learning_rate": 4.6648448644793155e-05, + "loss": 0.0075, + "step": 37610 + }, + { + "epoch": 6.71, + "learning_rate": 4.6647557061340946e-05, + "loss": 0.0065, + "step": 37620 + }, + { + "epoch": 6.71, + "learning_rate": 4.664666547788874e-05, + "loss": 0.0048, + "step": 37630 + }, + { + "epoch": 6.71, + "learning_rate": 4.664577389443652e-05, + "loss": 0.006, + "step": 37640 + }, + { + "epoch": 6.71, + "learning_rate": 4.664488231098431e-05, + "loss": 0.007, + "step": 37650 + }, + { + "epoch": 6.72, + "learning_rate": 4.66439907275321e-05, + "loss": 0.0054, + "step": 37660 + }, + { + "epoch": 6.72, + "learning_rate": 4.664309914407989e-05, + "loss": 0.0027, + "step": 37670 + }, + { + "epoch": 6.72, + "learning_rate": 4.664220756062767e-05, + "loss": 0.0056, + "step": 37680 + }, + { + "epoch": 6.72, + "learning_rate": 4.6641315977175465e-05, + "loss": 0.0062, + "step": 37690 + }, + { + "epoch": 6.72, + "learning_rate": 4.6640424393723256e-05, + "loss": 0.0052, + "step": 37700 + }, + { + "epoch": 6.72, + "learning_rate": 4.663953281027104e-05, + "loss": 0.0067, + "step": 37710 + }, + { + "epoch": 6.73, + "learning_rate": 4.663864122681883e-05, + "loss": 0.0031, + "step": 37720 + }, + { + "epoch": 6.73, + "learning_rate": 4.663774964336662e-05, + "loss": 0.0063, + "step": 37730 + }, + { + "epoch": 6.73, + "learning_rate": 4.6636858059914414e-05, + "loss": 0.0051, + "step": 37740 + }, + { + "epoch": 6.73, + "learning_rate": 4.66359664764622e-05, + "loss": 0.0061, + "step": 37750 + }, + { + "epoch": 6.73, + "learning_rate": 4.663507489300999e-05, + "loss": 0.0049, + "step": 37760 + }, + { + "epoch": 6.74, + "learning_rate": 4.663418330955778e-05, + "loss": 0.004, + "step": 37770 + }, + { + "epoch": 6.74, + "learning_rate": 4.6633291726105565e-05, + "loss": 0.0078, + "step": 37780 + }, + { + "epoch": 6.74, + "learning_rate": 4.6632400142653356e-05, + "loss": 0.0049, + "step": 37790 + }, + { + "epoch": 6.74, + "learning_rate": 4.663150855920114e-05, + "loss": 0.0049, + "step": 37800 + }, + { + "epoch": 6.74, + "learning_rate": 4.663061697574893e-05, + "loss": 0.0095, + "step": 37810 + }, + { + "epoch": 6.74, + "learning_rate": 4.6629725392296716e-05, + "loss": 0.0054, + "step": 37820 + }, + { + "epoch": 6.75, + "learning_rate": 4.662883380884451e-05, + "loss": 0.0029, + "step": 37830 + }, + { + "epoch": 6.75, + "learning_rate": 4.66279422253923e-05, + "loss": 0.0046, + "step": 37840 + }, + { + "epoch": 6.75, + "learning_rate": 4.662705064194009e-05, + "loss": 0.0047, + "step": 37850 + }, + { + "epoch": 6.75, + "learning_rate": 4.662615905848788e-05, + "loss": 0.0034, + "step": 37860 + }, + { + "epoch": 6.75, + "learning_rate": 4.6625267475035666e-05, + "loss": 0.0083, + "step": 37870 + }, + { + "epoch": 6.75, + "learning_rate": 4.662437589158346e-05, + "loss": 0.0047, + "step": 37880 + }, + { + "epoch": 6.76, + "learning_rate": 4.662348430813124e-05, + "loss": 0.0049, + "step": 37890 + }, + { + "epoch": 6.76, + "learning_rate": 4.662259272467903e-05, + "loss": 0.0034, + "step": 37900 + }, + { + "epoch": 6.76, + "learning_rate": 4.662170114122682e-05, + "loss": 0.0067, + "step": 37910 + }, + { + "epoch": 6.76, + "learning_rate": 4.662080955777461e-05, + "loss": 0.006, + "step": 37920 + }, + { + "epoch": 6.76, + "learning_rate": 4.66199179743224e-05, + "loss": 0.0056, + "step": 37930 + }, + { + "epoch": 6.77, + "learning_rate": 4.6619026390870184e-05, + "loss": 0.0093, + "step": 37940 + }, + { + "epoch": 6.77, + "learning_rate": 4.661813480741798e-05, + "loss": 0.0051, + "step": 37950 + }, + { + "epoch": 6.77, + "learning_rate": 4.6617243223965766e-05, + "loss": 0.0067, + "step": 37960 + }, + { + "epoch": 6.77, + "learning_rate": 4.661635164051356e-05, + "loss": 0.006, + "step": 37970 + }, + { + "epoch": 6.77, + "learning_rate": 4.661546005706134e-05, + "loss": 0.0045, + "step": 37980 + }, + { + "epoch": 6.77, + "learning_rate": 4.661456847360913e-05, + "loss": 0.0048, + "step": 37990 + }, + { + "epoch": 6.78, + "learning_rate": 4.6613676890156924e-05, + "loss": 0.0059, + "step": 38000 + }, + { + "epoch": 6.78, + "learning_rate": 4.661278530670471e-05, + "loss": 0.0071, + "step": 38010 + }, + { + "epoch": 6.78, + "learning_rate": 4.66118937232525e-05, + "loss": 0.0051, + "step": 38020 + }, + { + "epoch": 6.78, + "learning_rate": 4.6611002139800284e-05, + "loss": 0.0039, + "step": 38030 + }, + { + "epoch": 6.78, + "learning_rate": 4.6610110556348076e-05, + "loss": 0.0056, + "step": 38040 + }, + { + "epoch": 6.78, + "learning_rate": 4.660921897289586e-05, + "loss": 0.0047, + "step": 38050 + }, + { + "epoch": 6.79, + "learning_rate": 4.660832738944366e-05, + "loss": 0.0063, + "step": 38060 + }, + { + "epoch": 6.79, + "learning_rate": 4.660743580599144e-05, + "loss": 0.004, + "step": 38070 + }, + { + "epoch": 6.79, + "learning_rate": 4.6606544222539234e-05, + "loss": 0.0038, + "step": 38080 + }, + { + "epoch": 6.79, + "learning_rate": 4.6605652639087025e-05, + "loss": 0.0067, + "step": 38090 + }, + { + "epoch": 6.79, + "learning_rate": 4.660476105563481e-05, + "loss": 0.0043, + "step": 38100 + }, + { + "epoch": 6.8, + "learning_rate": 4.66038694721826e-05, + "loss": 0.0045, + "step": 38110 + }, + { + "epoch": 6.8, + "learning_rate": 4.6602977888730385e-05, + "loss": 0.0048, + "step": 38120 + }, + { + "epoch": 6.8, + "learning_rate": 4.6602086305278176e-05, + "loss": 0.0047, + "step": 38130 + }, + { + "epoch": 6.8, + "learning_rate": 4.660119472182596e-05, + "loss": 0.0037, + "step": 38140 + }, + { + "epoch": 6.8, + "learning_rate": 4.660030313837375e-05, + "loss": 0.0055, + "step": 38150 + }, + { + "epoch": 6.8, + "learning_rate": 4.659941155492154e-05, + "loss": 0.0066, + "step": 38160 + }, + { + "epoch": 6.81, + "learning_rate": 4.6598519971469334e-05, + "loss": 0.0068, + "step": 38170 + }, + { + "epoch": 6.81, + "learning_rate": 4.6597628388017125e-05, + "loss": 0.0065, + "step": 38180 + }, + { + "epoch": 6.81, + "learning_rate": 4.659673680456491e-05, + "loss": 0.0049, + "step": 38190 + }, + { + "epoch": 6.81, + "learning_rate": 4.65958452211127e-05, + "loss": 0.0045, + "step": 38200 + }, + { + "epoch": 6.81, + "learning_rate": 4.6594953637660485e-05, + "loss": 0.0087, + "step": 38210 + }, + { + "epoch": 6.82, + "learning_rate": 4.659406205420828e-05, + "loss": 0.0045, + "step": 38220 + }, + { + "epoch": 6.82, + "learning_rate": 4.659317047075607e-05, + "loss": 0.0046, + "step": 38230 + }, + { + "epoch": 6.82, + "learning_rate": 4.659227888730385e-05, + "loss": 0.0062, + "step": 38240 + }, + { + "epoch": 6.82, + "learning_rate": 4.6591387303851644e-05, + "loss": 0.0071, + "step": 38250 + }, + { + "epoch": 6.82, + "learning_rate": 4.659049572039943e-05, + "loss": 0.0065, + "step": 38260 + }, + { + "epoch": 6.82, + "learning_rate": 4.658960413694722e-05, + "loss": 0.0046, + "step": 38270 + }, + { + "epoch": 6.83, + "learning_rate": 4.658871255349501e-05, + "loss": 0.0044, + "step": 38280 + }, + { + "epoch": 6.83, + "learning_rate": 4.65878209700428e-05, + "loss": 0.0047, + "step": 38290 + }, + { + "epoch": 6.83, + "learning_rate": 4.6586929386590586e-05, + "loss": 0.0079, + "step": 38300 + }, + { + "epoch": 6.83, + "learning_rate": 4.658603780313838e-05, + "loss": 0.0025, + "step": 38310 + }, + { + "epoch": 6.83, + "learning_rate": 4.658514621968617e-05, + "loss": 0.0034, + "step": 38320 + }, + { + "epoch": 6.83, + "learning_rate": 4.658425463623395e-05, + "loss": 0.0055, + "step": 38330 + }, + { + "epoch": 6.84, + "learning_rate": 4.6583363052781744e-05, + "loss": 0.0052, + "step": 38340 + }, + { + "epoch": 6.84, + "learning_rate": 4.658247146932953e-05, + "loss": 0.0026, + "step": 38350 + }, + { + "epoch": 6.84, + "learning_rate": 4.658157988587732e-05, + "loss": 0.007, + "step": 38360 + }, + { + "epoch": 6.84, + "learning_rate": 4.6580688302425104e-05, + "loss": 0.0046, + "step": 38370 + }, + { + "epoch": 6.84, + "learning_rate": 4.6579796718972895e-05, + "loss": 0.0072, + "step": 38380 + }, + { + "epoch": 6.85, + "learning_rate": 4.657890513552069e-05, + "loss": 0.0067, + "step": 38390 + }, + { + "epoch": 6.85, + "learning_rate": 4.657801355206848e-05, + "loss": 0.0058, + "step": 38400 + }, + { + "epoch": 6.85, + "learning_rate": 4.657712196861627e-05, + "loss": 0.0057, + "step": 38410 + }, + { + "epoch": 6.85, + "learning_rate": 4.6576230385164053e-05, + "loss": 0.0044, + "step": 38420 + }, + { + "epoch": 6.85, + "learning_rate": 4.6575338801711845e-05, + "loss": 0.0066, + "step": 38430 + }, + { + "epoch": 6.85, + "learning_rate": 4.657444721825963e-05, + "loss": 0.0036, + "step": 38440 + }, + { + "epoch": 6.86, + "learning_rate": 4.657355563480742e-05, + "loss": 0.0061, + "step": 38450 + }, + { + "epoch": 6.86, + "learning_rate": 4.657266405135521e-05, + "loss": 0.0058, + "step": 38460 + }, + { + "epoch": 6.86, + "learning_rate": 4.6571772467902996e-05, + "loss": 0.0086, + "step": 38470 + }, + { + "epoch": 6.86, + "learning_rate": 4.657088088445079e-05, + "loss": 0.0063, + "step": 38480 + }, + { + "epoch": 6.86, + "learning_rate": 4.656998930099857e-05, + "loss": 0.0033, + "step": 38490 + }, + { + "epoch": 6.87, + "learning_rate": 4.656909771754637e-05, + "loss": 0.0076, + "step": 38500 + }, + { + "epoch": 6.87, + "learning_rate": 4.6568206134094154e-05, + "loss": 0.007, + "step": 38510 + }, + { + "epoch": 6.87, + "learning_rate": 4.6567314550641945e-05, + "loss": 0.005, + "step": 38520 + }, + { + "epoch": 6.87, + "learning_rate": 4.656642296718973e-05, + "loss": 0.0046, + "step": 38530 + }, + { + "epoch": 6.87, + "learning_rate": 4.656553138373752e-05, + "loss": 0.0061, + "step": 38540 + }, + { + "epoch": 6.87, + "learning_rate": 4.656463980028531e-05, + "loss": 0.0046, + "step": 38550 + }, + { + "epoch": 6.88, + "learning_rate": 4.6563748216833097e-05, + "loss": 0.0061, + "step": 38560 + }, + { + "epoch": 6.88, + "learning_rate": 4.656285663338089e-05, + "loss": 0.0077, + "step": 38570 + }, + { + "epoch": 6.88, + "learning_rate": 4.656196504992867e-05, + "loss": 0.0046, + "step": 38580 + }, + { + "epoch": 6.88, + "learning_rate": 4.656107346647646e-05, + "loss": 0.0062, + "step": 38590 + }, + { + "epoch": 6.88, + "learning_rate": 4.656018188302425e-05, + "loss": 0.0029, + "step": 38600 + }, + { + "epoch": 6.88, + "learning_rate": 4.6559290299572046e-05, + "loss": 0.0071, + "step": 38610 + }, + { + "epoch": 6.89, + "learning_rate": 4.655839871611983e-05, + "loss": 0.0056, + "step": 38620 + }, + { + "epoch": 6.89, + "learning_rate": 4.655750713266762e-05, + "loss": 0.0064, + "step": 38630 + }, + { + "epoch": 6.89, + "learning_rate": 4.655661554921541e-05, + "loss": 0.0049, + "step": 38640 + }, + { + "epoch": 6.89, + "learning_rate": 4.65557239657632e-05, + "loss": 0.0072, + "step": 38650 + }, + { + "epoch": 6.89, + "learning_rate": 4.655483238231099e-05, + "loss": 0.0055, + "step": 38660 + }, + { + "epoch": 6.9, + "learning_rate": 4.655394079885877e-05, + "loss": 0.0033, + "step": 38670 + }, + { + "epoch": 6.9, + "learning_rate": 4.6553049215406564e-05, + "loss": 0.0067, + "step": 38680 + }, + { + "epoch": 6.9, + "learning_rate": 4.655215763195435e-05, + "loss": 0.0061, + "step": 38690 + }, + { + "epoch": 6.9, + "learning_rate": 4.655126604850214e-05, + "loss": 0.0066, + "step": 38700 + }, + { + "epoch": 6.9, + "learning_rate": 4.655037446504993e-05, + "loss": 0.0063, + "step": 38710 + }, + { + "epoch": 6.9, + "learning_rate": 4.654948288159772e-05, + "loss": 0.0114, + "step": 38720 + }, + { + "epoch": 6.91, + "learning_rate": 4.654859129814551e-05, + "loss": 0.0054, + "step": 38730 + }, + { + "epoch": 6.91, + "learning_rate": 4.65476997146933e-05, + "loss": 0.0044, + "step": 38740 + }, + { + "epoch": 6.91, + "learning_rate": 4.654680813124109e-05, + "loss": 0.0037, + "step": 38750 + }, + { + "epoch": 6.91, + "learning_rate": 4.654591654778887e-05, + "loss": 0.0054, + "step": 38760 + }, + { + "epoch": 6.91, + "learning_rate": 4.6545024964336665e-05, + "loss": 0.0074, + "step": 38770 + }, + { + "epoch": 6.92, + "learning_rate": 4.6544133380884456e-05, + "loss": 0.0034, + "step": 38780 + }, + { + "epoch": 6.92, + "learning_rate": 4.654324179743224e-05, + "loss": 0.0099, + "step": 38790 + }, + { + "epoch": 6.92, + "learning_rate": 4.654235021398003e-05, + "loss": 0.0067, + "step": 38800 + }, + { + "epoch": 6.92, + "learning_rate": 4.6541458630527816e-05, + "loss": 0.0034, + "step": 38810 + }, + { + "epoch": 6.92, + "learning_rate": 4.654056704707561e-05, + "loss": 0.0044, + "step": 38820 + }, + { + "epoch": 6.92, + "learning_rate": 4.65396754636234e-05, + "loss": 0.0031, + "step": 38830 + }, + { + "epoch": 6.93, + "learning_rate": 4.653878388017119e-05, + "loss": 0.0052, + "step": 38840 + }, + { + "epoch": 6.93, + "learning_rate": 4.6537892296718974e-05, + "loss": 0.0062, + "step": 38850 + }, + { + "epoch": 6.93, + "learning_rate": 4.6537000713266765e-05, + "loss": 0.0043, + "step": 38860 + }, + { + "epoch": 6.93, + "learning_rate": 4.6536109129814556e-05, + "loss": 0.006, + "step": 38870 + }, + { + "epoch": 6.93, + "learning_rate": 4.653521754636234e-05, + "loss": 0.0082, + "step": 38880 + }, + { + "epoch": 6.93, + "learning_rate": 4.653432596291013e-05, + "loss": 0.0068, + "step": 38890 + }, + { + "epoch": 6.94, + "learning_rate": 4.6533434379457916e-05, + "loss": 0.0031, + "step": 38900 + }, + { + "epoch": 6.94, + "learning_rate": 4.653254279600571e-05, + "loss": 0.0036, + "step": 38910 + }, + { + "epoch": 6.94, + "learning_rate": 4.653165121255349e-05, + "loss": 0.0049, + "step": 38920 + }, + { + "epoch": 6.94, + "learning_rate": 4.653075962910128e-05, + "loss": 0.0051, + "step": 38930 + }, + { + "epoch": 6.94, + "learning_rate": 4.6529868045649074e-05, + "loss": 0.0076, + "step": 38940 + }, + { + "epoch": 6.95, + "learning_rate": 4.6528976462196866e-05, + "loss": 0.0056, + "step": 38950 + }, + { + "epoch": 6.95, + "learning_rate": 4.652808487874466e-05, + "loss": 0.0051, + "step": 38960 + }, + { + "epoch": 6.95, + "learning_rate": 4.652719329529244e-05, + "loss": 0.0089, + "step": 38970 + }, + { + "epoch": 6.95, + "learning_rate": 4.652630171184023e-05, + "loss": 0.0051, + "step": 38980 + }, + { + "epoch": 6.95, + "learning_rate": 4.652541012838802e-05, + "loss": 0.0079, + "step": 38990 + }, + { + "epoch": 6.95, + "learning_rate": 4.652451854493581e-05, + "loss": 0.0056, + "step": 39000 + }, + { + "epoch": 6.96, + "learning_rate": 4.65236269614836e-05, + "loss": 0.0064, + "step": 39010 + }, + { + "epoch": 6.96, + "learning_rate": 4.6522735378031384e-05, + "loss": 0.005, + "step": 39020 + }, + { + "epoch": 6.96, + "learning_rate": 4.6521843794579175e-05, + "loss": 0.0048, + "step": 39030 + }, + { + "epoch": 6.96, + "learning_rate": 4.652095221112696e-05, + "loss": 0.0047, + "step": 39040 + }, + { + "epoch": 6.96, + "learning_rate": 4.652006062767476e-05, + "loss": 0.0069, + "step": 39050 + }, + { + "epoch": 6.97, + "learning_rate": 4.651916904422254e-05, + "loss": 0.0048, + "step": 39060 + }, + { + "epoch": 6.97, + "learning_rate": 4.651827746077033e-05, + "loss": 0.0067, + "step": 39070 + }, + { + "epoch": 6.97, + "learning_rate": 4.651738587731812e-05, + "loss": 0.0048, + "step": 39080 + }, + { + "epoch": 6.97, + "learning_rate": 4.651649429386591e-05, + "loss": 0.0043, + "step": 39090 + }, + { + "epoch": 6.97, + "learning_rate": 4.65156027104137e-05, + "loss": 0.002, + "step": 39100 + }, + { + "epoch": 6.97, + "learning_rate": 4.6514711126961484e-05, + "loss": 0.0037, + "step": 39110 + }, + { + "epoch": 6.98, + "learning_rate": 4.6513819543509276e-05, + "loss": 0.0055, + "step": 39120 + }, + { + "epoch": 6.98, + "learning_rate": 4.651292796005706e-05, + "loss": 0.0067, + "step": 39130 + }, + { + "epoch": 6.98, + "learning_rate": 4.651203637660485e-05, + "loss": 0.0064, + "step": 39140 + }, + { + "epoch": 6.98, + "learning_rate": 4.6511144793152636e-05, + "loss": 0.008, + "step": 39150 + }, + { + "epoch": 6.98, + "learning_rate": 4.6510253209700434e-05, + "loss": 0.0053, + "step": 39160 + }, + { + "epoch": 6.98, + "learning_rate": 4.650936162624822e-05, + "loss": 0.0068, + "step": 39170 + }, + { + "epoch": 6.99, + "learning_rate": 4.650847004279601e-05, + "loss": 0.0049, + "step": 39180 + }, + { + "epoch": 6.99, + "learning_rate": 4.65075784593438e-05, + "loss": 0.0046, + "step": 39190 + }, + { + "epoch": 6.99, + "learning_rate": 4.6506686875891585e-05, + "loss": 0.0033, + "step": 39200 + }, + { + "epoch": 6.99, + "learning_rate": 4.6505795292439376e-05, + "loss": 0.0051, + "step": 39210 + }, + { + "epoch": 6.99, + "learning_rate": 4.650490370898716e-05, + "loss": 0.0065, + "step": 39220 + }, + { + "epoch": 7.0, + "learning_rate": 4.650401212553495e-05, + "loss": 0.0043, + "step": 39230 + }, + { + "epoch": 7.0, + "learning_rate": 4.650312054208274e-05, + "loss": 0.0047, + "step": 39240 + }, + { + "epoch": 7.0, + "learning_rate": 4.650222895863053e-05, + "loss": 0.0038, + "step": 39250 + }, + { + "epoch": 7.0, + "eval_loss": 0.01523965410888195, + "eval_runtime": 196.0516, + "eval_samples_per_second": 23.662, + "eval_steps_per_second": 2.958, + "step": 39256 + }, + { + "epoch": 7.0, + "learning_rate": 4.650133737517832e-05, + "loss": 0.0069, + "step": 39260 + }, + { + "epoch": 7.0, + "learning_rate": 4.650044579172611e-05, + "loss": 0.0038, + "step": 39270 + }, + { + "epoch": 7.0, + "learning_rate": 4.64995542082739e-05, + "loss": 0.0074, + "step": 39280 + }, + { + "epoch": 7.01, + "learning_rate": 4.6498662624821685e-05, + "loss": 0.004, + "step": 39290 + }, + { + "epoch": 7.01, + "learning_rate": 4.649777104136948e-05, + "loss": 0.0042, + "step": 39300 + }, + { + "epoch": 7.01, + "learning_rate": 4.649687945791726e-05, + "loss": 0.0032, + "step": 39310 + }, + { + "epoch": 7.01, + "learning_rate": 4.649598787446505e-05, + "loss": 0.0056, + "step": 39320 + }, + { + "epoch": 7.01, + "learning_rate": 4.6495096291012844e-05, + "loss": 0.0032, + "step": 39330 + }, + { + "epoch": 7.01, + "learning_rate": 4.649420470756063e-05, + "loss": 0.0055, + "step": 39340 + }, + { + "epoch": 7.02, + "learning_rate": 4.649331312410842e-05, + "loss": 0.0061, + "step": 39350 + }, + { + "epoch": 7.02, + "learning_rate": 4.6492421540656204e-05, + "loss": 0.0031, + "step": 39360 + }, + { + "epoch": 7.02, + "learning_rate": 4.6491529957203995e-05, + "loss": 0.0048, + "step": 39370 + }, + { + "epoch": 7.02, + "learning_rate": 4.6490638373751786e-05, + "loss": 0.0055, + "step": 39380 + }, + { + "epoch": 7.02, + "learning_rate": 4.648974679029958e-05, + "loss": 0.0074, + "step": 39390 + }, + { + "epoch": 7.03, + "learning_rate": 4.648885520684736e-05, + "loss": 0.0031, + "step": 39400 + }, + { + "epoch": 7.03, + "learning_rate": 4.648796362339515e-05, + "loss": 0.004, + "step": 39410 + }, + { + "epoch": 7.03, + "learning_rate": 4.6487072039942944e-05, + "loss": 0.0052, + "step": 39420 + }, + { + "epoch": 7.03, + "learning_rate": 4.648618045649073e-05, + "loss": 0.0044, + "step": 39430 + }, + { + "epoch": 7.03, + "learning_rate": 4.648528887303852e-05, + "loss": 0.004, + "step": 39440 + }, + { + "epoch": 7.03, + "learning_rate": 4.6484397289586304e-05, + "loss": 0.0048, + "step": 39450 + }, + { + "epoch": 7.04, + "learning_rate": 4.6483505706134095e-05, + "loss": 0.0037, + "step": 39460 + }, + { + "epoch": 7.04, + "learning_rate": 4.6482614122681887e-05, + "loss": 0.0049, + "step": 39470 + }, + { + "epoch": 7.04, + "learning_rate": 4.648172253922967e-05, + "loss": 0.0039, + "step": 39480 + }, + { + "epoch": 7.04, + "learning_rate": 4.648083095577747e-05, + "loss": 0.0034, + "step": 39490 + }, + { + "epoch": 7.04, + "learning_rate": 4.6479939372325253e-05, + "loss": 0.0078, + "step": 39500 + }, + { + "epoch": 7.05, + "learning_rate": 4.6479047788873045e-05, + "loss": 0.0086, + "step": 39510 + }, + { + "epoch": 7.05, + "learning_rate": 4.647815620542083e-05, + "loss": 0.0025, + "step": 39520 + }, + { + "epoch": 7.05, + "learning_rate": 4.647726462196862e-05, + "loss": 0.004, + "step": 39530 + }, + { + "epoch": 7.05, + "learning_rate": 4.6476373038516405e-05, + "loss": 0.005, + "step": 39540 + }, + { + "epoch": 7.05, + "learning_rate": 4.6475481455064196e-05, + "loss": 0.0058, + "step": 39550 + }, + { + "epoch": 7.05, + "learning_rate": 4.647458987161199e-05, + "loss": 0.0053, + "step": 39560 + }, + { + "epoch": 7.06, + "learning_rate": 4.647369828815977e-05, + "loss": 0.0046, + "step": 39570 + }, + { + "epoch": 7.06, + "learning_rate": 4.647280670470756e-05, + "loss": 0.0062, + "step": 39580 + }, + { + "epoch": 7.06, + "learning_rate": 4.647191512125535e-05, + "loss": 0.0057, + "step": 39590 + }, + { + "epoch": 7.06, + "learning_rate": 4.6471023537803145e-05, + "loss": 0.0064, + "step": 39600 + }, + { + "epoch": 7.06, + "learning_rate": 4.647013195435093e-05, + "loss": 0.0079, + "step": 39610 + }, + { + "epoch": 7.06, + "learning_rate": 4.646924037089872e-05, + "loss": 0.0038, + "step": 39620 + }, + { + "epoch": 7.07, + "learning_rate": 4.6468348787446505e-05, + "loss": 0.0045, + "step": 39630 + }, + { + "epoch": 7.07, + "learning_rate": 4.6467457203994296e-05, + "loss": 0.0049, + "step": 39640 + }, + { + "epoch": 7.07, + "learning_rate": 4.646656562054209e-05, + "loss": 0.0042, + "step": 39650 + }, + { + "epoch": 7.07, + "learning_rate": 4.646567403708987e-05, + "loss": 0.0046, + "step": 39660 + }, + { + "epoch": 7.07, + "learning_rate": 4.646478245363766e-05, + "loss": 0.0061, + "step": 39670 + }, + { + "epoch": 7.08, + "learning_rate": 4.646389087018545e-05, + "loss": 0.0054, + "step": 39680 + }, + { + "epoch": 7.08, + "learning_rate": 4.646299928673324e-05, + "loss": 0.0059, + "step": 39690 + }, + { + "epoch": 7.08, + "learning_rate": 4.646210770328103e-05, + "loss": 0.0058, + "step": 39700 + }, + { + "epoch": 7.08, + "learning_rate": 4.646121611982882e-05, + "loss": 0.0056, + "step": 39710 + }, + { + "epoch": 7.08, + "learning_rate": 4.646032453637661e-05, + "loss": 0.009, + "step": 39720 + }, + { + "epoch": 7.08, + "learning_rate": 4.64594329529244e-05, + "loss": 0.0049, + "step": 39730 + }, + { + "epoch": 7.09, + "learning_rate": 4.645854136947219e-05, + "loss": 0.0064, + "step": 39740 + }, + { + "epoch": 7.09, + "learning_rate": 4.645764978601997e-05, + "loss": 0.0052, + "step": 39750 + }, + { + "epoch": 7.09, + "learning_rate": 4.6456758202567764e-05, + "loss": 0.0039, + "step": 39760 + }, + { + "epoch": 7.09, + "learning_rate": 4.645586661911555e-05, + "loss": 0.0065, + "step": 39770 + }, + { + "epoch": 7.09, + "learning_rate": 4.645497503566334e-05, + "loss": 0.0037, + "step": 39780 + }, + { + "epoch": 7.1, + "learning_rate": 4.645408345221113e-05, + "loss": 0.0033, + "step": 39790 + }, + { + "epoch": 7.1, + "learning_rate": 4.6453191868758915e-05, + "loss": 0.0055, + "step": 39800 + }, + { + "epoch": 7.1, + "learning_rate": 4.6452300285306706e-05, + "loss": 0.0057, + "step": 39810 + }, + { + "epoch": 7.1, + "learning_rate": 4.64514087018545e-05, + "loss": 0.004, + "step": 39820 + }, + { + "epoch": 7.1, + "learning_rate": 4.645051711840229e-05, + "loss": 0.0047, + "step": 39830 + }, + { + "epoch": 7.1, + "learning_rate": 4.644962553495007e-05, + "loss": 0.0039, + "step": 39840 + }, + { + "epoch": 7.11, + "learning_rate": 4.6448733951497864e-05, + "loss": 0.005, + "step": 39850 + }, + { + "epoch": 7.11, + "learning_rate": 4.644784236804565e-05, + "loss": 0.0039, + "step": 39860 + }, + { + "epoch": 7.11, + "learning_rate": 4.644695078459344e-05, + "loss": 0.0046, + "step": 39870 + }, + { + "epoch": 7.11, + "learning_rate": 4.644605920114123e-05, + "loss": 0.0073, + "step": 39880 + }, + { + "epoch": 7.11, + "learning_rate": 4.6445167617689016e-05, + "loss": 0.0042, + "step": 39890 + }, + { + "epoch": 7.11, + "learning_rate": 4.644427603423681e-05, + "loss": 0.0042, + "step": 39900 + }, + { + "epoch": 7.12, + "learning_rate": 4.644338445078459e-05, + "loss": 0.0059, + "step": 39910 + }, + { + "epoch": 7.12, + "learning_rate": 4.644249286733238e-05, + "loss": 0.005, + "step": 39920 + }, + { + "epoch": 7.12, + "learning_rate": 4.6441601283880174e-05, + "loss": 0.0051, + "step": 39930 + }, + { + "epoch": 7.12, + "learning_rate": 4.6440709700427965e-05, + "loss": 0.0048, + "step": 39940 + }, + { + "epoch": 7.12, + "learning_rate": 4.6439818116975756e-05, + "loss": 0.0039, + "step": 39950 + }, + { + "epoch": 7.13, + "learning_rate": 4.643892653352354e-05, + "loss": 0.0055, + "step": 39960 + }, + { + "epoch": 7.13, + "learning_rate": 4.643803495007133e-05, + "loss": 0.0031, + "step": 39970 + }, + { + "epoch": 7.13, + "learning_rate": 4.6437143366619116e-05, + "loss": 0.0045, + "step": 39980 + }, + { + "epoch": 7.13, + "learning_rate": 4.643625178316691e-05, + "loss": 0.0041, + "step": 39990 + }, + { + "epoch": 7.13, + "learning_rate": 4.643536019971469e-05, + "loss": 0.0061, + "step": 40000 + }, + { + "epoch": 7.13, + "learning_rate": 4.643446861626248e-05, + "loss": 0.0039, + "step": 40010 + }, + { + "epoch": 7.14, + "learning_rate": 4.6433577032810274e-05, + "loss": 0.0037, + "step": 40020 + }, + { + "epoch": 7.14, + "learning_rate": 4.643268544935806e-05, + "loss": 0.0056, + "step": 40030 + }, + { + "epoch": 7.14, + "learning_rate": 4.643179386590586e-05, + "loss": 0.003, + "step": 40040 + }, + { + "epoch": 7.14, + "learning_rate": 4.643090228245364e-05, + "loss": 0.0021, + "step": 40050 + }, + { + "epoch": 7.14, + "learning_rate": 4.643001069900143e-05, + "loss": 0.0035, + "step": 40060 + }, + { + "epoch": 7.15, + "learning_rate": 4.642911911554922e-05, + "loss": 0.0064, + "step": 40070 + }, + { + "epoch": 7.15, + "learning_rate": 4.642822753209701e-05, + "loss": 0.0027, + "step": 40080 + }, + { + "epoch": 7.15, + "learning_rate": 4.642733594864479e-05, + "loss": 0.0056, + "step": 40090 + }, + { + "epoch": 7.15, + "learning_rate": 4.6426444365192584e-05, + "loss": 0.0038, + "step": 40100 + }, + { + "epoch": 7.15, + "learning_rate": 4.6425552781740375e-05, + "loss": 0.0041, + "step": 40110 + }, + { + "epoch": 7.15, + "learning_rate": 4.642466119828816e-05, + "loss": 0.0036, + "step": 40120 + }, + { + "epoch": 7.16, + "learning_rate": 4.642376961483595e-05, + "loss": 0.0029, + "step": 40130 + }, + { + "epoch": 7.16, + "learning_rate": 4.6422878031383735e-05, + "loss": 0.0048, + "step": 40140 + }, + { + "epoch": 7.16, + "learning_rate": 4.6421986447931526e-05, + "loss": 0.0042, + "step": 40150 + }, + { + "epoch": 7.16, + "learning_rate": 4.642109486447932e-05, + "loss": 0.0062, + "step": 40160 + }, + { + "epoch": 7.16, + "learning_rate": 4.642020328102711e-05, + "loss": 0.005, + "step": 40170 + }, + { + "epoch": 7.16, + "learning_rate": 4.64193116975749e-05, + "loss": 0.0052, + "step": 40180 + }, + { + "epoch": 7.17, + "learning_rate": 4.6418420114122684e-05, + "loss": 0.0064, + "step": 40190 + }, + { + "epoch": 7.17, + "learning_rate": 4.6417528530670475e-05, + "loss": 0.0048, + "step": 40200 + }, + { + "epoch": 7.17, + "learning_rate": 4.641663694721826e-05, + "loss": 0.005, + "step": 40210 + }, + { + "epoch": 7.17, + "learning_rate": 4.641574536376605e-05, + "loss": 0.006, + "step": 40220 + }, + { + "epoch": 7.17, + "learning_rate": 4.6414853780313836e-05, + "loss": 0.0049, + "step": 40230 + }, + { + "epoch": 7.18, + "learning_rate": 4.641396219686163e-05, + "loss": 0.0042, + "step": 40240 + }, + { + "epoch": 7.18, + "learning_rate": 4.641307061340942e-05, + "loss": 0.0053, + "step": 40250 + }, + { + "epoch": 7.18, + "learning_rate": 4.64121790299572e-05, + "loss": 0.0073, + "step": 40260 + }, + { + "epoch": 7.18, + "learning_rate": 4.6411287446505e-05, + "loss": 0.0049, + "step": 40270 + }, + { + "epoch": 7.18, + "learning_rate": 4.6410395863052785e-05, + "loss": 0.0054, + "step": 40280 + }, + { + "epoch": 7.18, + "learning_rate": 4.6409504279600576e-05, + "loss": 0.0048, + "step": 40290 + }, + { + "epoch": 7.19, + "learning_rate": 4.640861269614836e-05, + "loss": 0.0073, + "step": 40300 + }, + { + "epoch": 7.19, + "learning_rate": 4.640772111269615e-05, + "loss": 0.0042, + "step": 40310 + }, + { + "epoch": 7.19, + "learning_rate": 4.6406829529243936e-05, + "loss": 0.0063, + "step": 40320 + }, + { + "epoch": 7.19, + "learning_rate": 4.640593794579173e-05, + "loss": 0.003, + "step": 40330 + }, + { + "epoch": 7.19, + "learning_rate": 4.640504636233952e-05, + "loss": 0.0098, + "step": 40340 + }, + { + "epoch": 7.2, + "learning_rate": 4.64041547788873e-05, + "loss": 0.0052, + "step": 40350 + }, + { + "epoch": 7.2, + "learning_rate": 4.6403263195435094e-05, + "loss": 0.0058, + "step": 40360 + }, + { + "epoch": 7.2, + "learning_rate": 4.640237161198288e-05, + "loss": 0.0047, + "step": 40370 + }, + { + "epoch": 7.2, + "learning_rate": 4.640148002853068e-05, + "loss": 0.0043, + "step": 40380 + }, + { + "epoch": 7.2, + "learning_rate": 4.640058844507846e-05, + "loss": 0.0044, + "step": 40390 + }, + { + "epoch": 7.2, + "learning_rate": 4.639969686162625e-05, + "loss": 0.0075, + "step": 40400 + }, + { + "epoch": 7.21, + "learning_rate": 4.6398805278174043e-05, + "loss": 0.0029, + "step": 40410 + }, + { + "epoch": 7.21, + "learning_rate": 4.639791369472183e-05, + "loss": 0.006, + "step": 40420 + }, + { + "epoch": 7.21, + "learning_rate": 4.639702211126962e-05, + "loss": 0.0051, + "step": 40430 + }, + { + "epoch": 7.21, + "learning_rate": 4.6396130527817404e-05, + "loss": 0.0037, + "step": 40440 + }, + { + "epoch": 7.21, + "learning_rate": 4.6395238944365195e-05, + "loss": 0.004, + "step": 40450 + }, + { + "epoch": 7.21, + "learning_rate": 4.639434736091298e-05, + "loss": 0.0066, + "step": 40460 + }, + { + "epoch": 7.22, + "learning_rate": 4.639345577746077e-05, + "loss": 0.0048, + "step": 40470 + }, + { + "epoch": 7.22, + "learning_rate": 4.639256419400856e-05, + "loss": 0.0054, + "step": 40480 + }, + { + "epoch": 7.22, + "learning_rate": 4.639167261055635e-05, + "loss": 0.0054, + "step": 40490 + }, + { + "epoch": 7.22, + "learning_rate": 4.6390781027104144e-05, + "loss": 0.0054, + "step": 40500 + }, + { + "epoch": 7.22, + "learning_rate": 4.638988944365193e-05, + "loss": 0.0066, + "step": 40510 + }, + { + "epoch": 7.23, + "learning_rate": 4.638899786019972e-05, + "loss": 0.0056, + "step": 40520 + }, + { + "epoch": 7.23, + "learning_rate": 4.6388106276747504e-05, + "loss": 0.0032, + "step": 40530 + }, + { + "epoch": 7.23, + "learning_rate": 4.6387214693295295e-05, + "loss": 0.0054, + "step": 40540 + }, + { + "epoch": 7.23, + "learning_rate": 4.638632310984308e-05, + "loss": 0.0041, + "step": 40550 + }, + { + "epoch": 7.23, + "learning_rate": 4.638543152639087e-05, + "loss": 0.0049, + "step": 40560 + }, + { + "epoch": 7.23, + "learning_rate": 4.638453994293866e-05, + "loss": 0.0044, + "step": 40570 + }, + { + "epoch": 7.24, + "learning_rate": 4.6383648359486447e-05, + "loss": 0.0053, + "step": 40580 + }, + { + "epoch": 7.24, + "learning_rate": 4.638275677603424e-05, + "loss": 0.0052, + "step": 40590 + }, + { + "epoch": 7.24, + "learning_rate": 4.638186519258203e-05, + "loss": 0.0053, + "step": 40600 + }, + { + "epoch": 7.24, + "learning_rate": 4.638097360912982e-05, + "loss": 0.0044, + "step": 40610 + }, + { + "epoch": 7.24, + "learning_rate": 4.6380082025677605e-05, + "loss": 0.0039, + "step": 40620 + }, + { + "epoch": 7.25, + "learning_rate": 4.6379190442225396e-05, + "loss": 0.0065, + "step": 40630 + }, + { + "epoch": 7.25, + "learning_rate": 4.637829885877318e-05, + "loss": 0.0045, + "step": 40640 + }, + { + "epoch": 7.25, + "learning_rate": 4.637740727532097e-05, + "loss": 0.0076, + "step": 40650 + }, + { + "epoch": 7.25, + "learning_rate": 4.637651569186876e-05, + "loss": 0.0091, + "step": 40660 + }, + { + "epoch": 7.25, + "learning_rate": 4.637562410841655e-05, + "loss": 0.0062, + "step": 40670 + }, + { + "epoch": 7.25, + "learning_rate": 4.637473252496434e-05, + "loss": 0.0043, + "step": 40680 + }, + { + "epoch": 7.26, + "learning_rate": 4.637384094151212e-05, + "loss": 0.0064, + "step": 40690 + }, + { + "epoch": 7.26, + "learning_rate": 4.6372949358059914e-05, + "loss": 0.0028, + "step": 40700 + }, + { + "epoch": 7.26, + "learning_rate": 4.6372057774607705e-05, + "loss": 0.0058, + "step": 40710 + }, + { + "epoch": 7.26, + "learning_rate": 4.6371166191155496e-05, + "loss": 0.0054, + "step": 40720 + }, + { + "epoch": 7.26, + "learning_rate": 4.637027460770329e-05, + "loss": 0.0053, + "step": 40730 + }, + { + "epoch": 7.26, + "learning_rate": 4.636938302425107e-05, + "loss": 0.0063, + "step": 40740 + }, + { + "epoch": 7.27, + "learning_rate": 4.636849144079886e-05, + "loss": 0.0033, + "step": 40750 + }, + { + "epoch": 7.27, + "learning_rate": 4.636759985734665e-05, + "loss": 0.0055, + "step": 40760 + }, + { + "epoch": 7.27, + "learning_rate": 4.636670827389444e-05, + "loss": 0.0043, + "step": 40770 + }, + { + "epoch": 7.27, + "learning_rate": 4.636581669044222e-05, + "loss": 0.0035, + "step": 40780 + }, + { + "epoch": 7.27, + "learning_rate": 4.6364925106990015e-05, + "loss": 0.0034, + "step": 40790 + }, + { + "epoch": 7.28, + "learning_rate": 4.6364033523537806e-05, + "loss": 0.0081, + "step": 40800 + }, + { + "epoch": 7.28, + "learning_rate": 4.636314194008559e-05, + "loss": 0.0039, + "step": 40810 + }, + { + "epoch": 7.28, + "learning_rate": 4.636225035663339e-05, + "loss": 0.0043, + "step": 40820 + }, + { + "epoch": 7.28, + "learning_rate": 4.636135877318117e-05, + "loss": 0.0051, + "step": 40830 + }, + { + "epoch": 7.28, + "learning_rate": 4.6360467189728964e-05, + "loss": 0.0058, + "step": 40840 + }, + { + "epoch": 7.28, + "learning_rate": 4.635957560627675e-05, + "loss": 0.0065, + "step": 40850 + }, + { + "epoch": 7.29, + "learning_rate": 4.635868402282454e-05, + "loss": 0.004, + "step": 40860 + }, + { + "epoch": 7.29, + "learning_rate": 4.6357792439372324e-05, + "loss": 0.0052, + "step": 40870 + }, + { + "epoch": 7.29, + "learning_rate": 4.6356900855920115e-05, + "loss": 0.0083, + "step": 40880 + }, + { + "epoch": 7.29, + "learning_rate": 4.6356009272467906e-05, + "loss": 0.0061, + "step": 40890 + }, + { + "epoch": 7.29, + "learning_rate": 4.635511768901569e-05, + "loss": 0.0061, + "step": 40900 + }, + { + "epoch": 7.29, + "learning_rate": 4.635422610556348e-05, + "loss": 0.0043, + "step": 40910 + }, + { + "epoch": 7.3, + "learning_rate": 4.6353334522111266e-05, + "loss": 0.005, + "step": 40920 + }, + { + "epoch": 7.3, + "learning_rate": 4.6352442938659064e-05, + "loss": 0.0034, + "step": 40930 + }, + { + "epoch": 7.3, + "learning_rate": 4.635155135520685e-05, + "loss": 0.0044, + "step": 40940 + }, + { + "epoch": 7.3, + "learning_rate": 4.635065977175464e-05, + "loss": 0.0059, + "step": 40950 + }, + { + "epoch": 7.3, + "learning_rate": 4.634976818830243e-05, + "loss": 0.0033, + "step": 40960 + }, + { + "epoch": 7.31, + "learning_rate": 4.6348876604850216e-05, + "loss": 0.0048, + "step": 40970 + }, + { + "epoch": 7.31, + "learning_rate": 4.634798502139801e-05, + "loss": 0.0027, + "step": 40980 + }, + { + "epoch": 7.31, + "learning_rate": 4.634709343794579e-05, + "loss": 0.0038, + "step": 40990 + }, + { + "epoch": 7.31, + "learning_rate": 4.634620185449358e-05, + "loss": 0.0057, + "step": 41000 + }, + { + "epoch": 7.31, + "learning_rate": 4.634531027104137e-05, + "loss": 0.0048, + "step": 41010 + }, + { + "epoch": 7.31, + "learning_rate": 4.634441868758916e-05, + "loss": 0.0041, + "step": 41020 + }, + { + "epoch": 7.32, + "learning_rate": 4.634352710413695e-05, + "loss": 0.003, + "step": 41030 + }, + { + "epoch": 7.32, + "learning_rate": 4.634263552068474e-05, + "loss": 0.0044, + "step": 41040 + }, + { + "epoch": 7.32, + "learning_rate": 4.634174393723253e-05, + "loss": 0.0059, + "step": 41050 + }, + { + "epoch": 7.32, + "learning_rate": 4.6340852353780316e-05, + "loss": 0.0045, + "step": 41060 + }, + { + "epoch": 7.32, + "learning_rate": 4.633996077032811e-05, + "loss": 0.0035, + "step": 41070 + }, + { + "epoch": 7.33, + "learning_rate": 4.633906918687589e-05, + "loss": 0.0041, + "step": 41080 + }, + { + "epoch": 7.33, + "learning_rate": 4.633817760342368e-05, + "loss": 0.0075, + "step": 41090 + }, + { + "epoch": 7.33, + "learning_rate": 4.633728601997147e-05, + "loss": 0.0059, + "step": 41100 + }, + { + "epoch": 7.33, + "learning_rate": 4.633639443651926e-05, + "loss": 0.0064, + "step": 41110 + }, + { + "epoch": 7.33, + "learning_rate": 4.633550285306705e-05, + "loss": 0.0048, + "step": 41120 + }, + { + "epoch": 7.33, + "learning_rate": 4.6334611269614834e-05, + "loss": 0.0031, + "step": 41130 + }, + { + "epoch": 7.34, + "learning_rate": 4.6333719686162626e-05, + "loss": 0.0055, + "step": 41140 + }, + { + "epoch": 7.34, + "learning_rate": 4.633282810271042e-05, + "loss": 0.0044, + "step": 41150 + }, + { + "epoch": 7.34, + "learning_rate": 4.633193651925821e-05, + "loss": 0.007, + "step": 41160 + }, + { + "epoch": 7.34, + "learning_rate": 4.633104493580599e-05, + "loss": 0.0044, + "step": 41170 + }, + { + "epoch": 7.34, + "learning_rate": 4.6330153352353784e-05, + "loss": 0.0066, + "step": 41180 + }, + { + "epoch": 7.34, + "learning_rate": 4.6329261768901575e-05, + "loss": 0.0059, + "step": 41190 + }, + { + "epoch": 7.35, + "learning_rate": 4.632837018544936e-05, + "loss": 0.0038, + "step": 41200 + }, + { + "epoch": 7.35, + "learning_rate": 4.632747860199715e-05, + "loss": 0.0035, + "step": 41210 + }, + { + "epoch": 7.35, + "learning_rate": 4.6326587018544935e-05, + "loss": 0.0052, + "step": 41220 + }, + { + "epoch": 7.35, + "learning_rate": 4.6325695435092726e-05, + "loss": 0.0062, + "step": 41230 + }, + { + "epoch": 7.35, + "learning_rate": 4.632480385164051e-05, + "loss": 0.0039, + "step": 41240 + }, + { + "epoch": 7.36, + "learning_rate": 4.63239122681883e-05, + "loss": 0.0044, + "step": 41250 + }, + { + "epoch": 7.36, + "learning_rate": 4.632302068473609e-05, + "loss": 0.0044, + "step": 41260 + }, + { + "epoch": 7.36, + "learning_rate": 4.6322129101283884e-05, + "loss": 0.0054, + "step": 41270 + }, + { + "epoch": 7.36, + "learning_rate": 4.6321237517831675e-05, + "loss": 0.0031, + "step": 41280 + }, + { + "epoch": 7.36, + "learning_rate": 4.632034593437946e-05, + "loss": 0.0059, + "step": 41290 + }, + { + "epoch": 7.36, + "learning_rate": 4.631945435092725e-05, + "loss": 0.0066, + "step": 41300 + }, + { + "epoch": 7.37, + "learning_rate": 4.6318562767475036e-05, + "loss": 0.0034, + "step": 41310 + }, + { + "epoch": 7.37, + "learning_rate": 4.631767118402283e-05, + "loss": 0.0031, + "step": 41320 + }, + { + "epoch": 7.37, + "learning_rate": 4.631677960057061e-05, + "loss": 0.0043, + "step": 41330 + }, + { + "epoch": 7.37, + "learning_rate": 4.63158880171184e-05, + "loss": 0.0027, + "step": 41340 + }, + { + "epoch": 7.37, + "learning_rate": 4.6314996433666194e-05, + "loss": 0.004, + "step": 41350 + }, + { + "epoch": 7.38, + "learning_rate": 4.631410485021398e-05, + "loss": 0.0051, + "step": 41360 + }, + { + "epoch": 7.38, + "learning_rate": 4.6313213266761776e-05, + "loss": 0.0019, + "step": 41370 + }, + { + "epoch": 7.38, + "learning_rate": 4.631232168330956e-05, + "loss": 0.0042, + "step": 41380 + }, + { + "epoch": 7.38, + "learning_rate": 4.631143009985735e-05, + "loss": 0.0046, + "step": 41390 + }, + { + "epoch": 7.38, + "learning_rate": 4.6310538516405136e-05, + "loss": 0.0064, + "step": 41400 + }, + { + "epoch": 7.38, + "learning_rate": 4.630964693295293e-05, + "loss": 0.0041, + "step": 41410 + }, + { + "epoch": 7.39, + "learning_rate": 4.630875534950072e-05, + "loss": 0.0047, + "step": 41420 + }, + { + "epoch": 7.39, + "learning_rate": 4.63078637660485e-05, + "loss": 0.0028, + "step": 41430 + }, + { + "epoch": 7.39, + "learning_rate": 4.6306972182596294e-05, + "loss": 0.0091, + "step": 41440 + }, + { + "epoch": 7.39, + "learning_rate": 4.630608059914408e-05, + "loss": 0.0029, + "step": 41450 + }, + { + "epoch": 7.39, + "learning_rate": 4.630518901569187e-05, + "loss": 0.0064, + "step": 41460 + }, + { + "epoch": 7.39, + "learning_rate": 4.6304297432239654e-05, + "loss": 0.0044, + "step": 41470 + }, + { + "epoch": 7.4, + "learning_rate": 4.630340584878745e-05, + "loss": 0.0041, + "step": 41480 + }, + { + "epoch": 7.4, + "learning_rate": 4.630251426533524e-05, + "loss": 0.009, + "step": 41490 + }, + { + "epoch": 7.4, + "learning_rate": 4.630162268188303e-05, + "loss": 0.0064, + "step": 41500 + }, + { + "epoch": 7.4, + "learning_rate": 4.630073109843082e-05, + "loss": 0.0063, + "step": 41510 + }, + { + "epoch": 7.4, + "learning_rate": 4.6299839514978604e-05, + "loss": 0.0064, + "step": 41520 + }, + { + "epoch": 7.41, + "learning_rate": 4.6298947931526395e-05, + "loss": 0.0054, + "step": 41530 + }, + { + "epoch": 7.41, + "learning_rate": 4.629805634807418e-05, + "loss": 0.0048, + "step": 41540 + }, + { + "epoch": 7.41, + "learning_rate": 4.629716476462197e-05, + "loss": 0.0049, + "step": 41550 + }, + { + "epoch": 7.41, + "learning_rate": 4.6296273181169755e-05, + "loss": 0.0044, + "step": 41560 + }, + { + "epoch": 7.41, + "learning_rate": 4.6295381597717546e-05, + "loss": 0.0037, + "step": 41570 + }, + { + "epoch": 7.41, + "learning_rate": 4.629449001426534e-05, + "loss": 0.0071, + "step": 41580 + }, + { + "epoch": 7.42, + "learning_rate": 4.629359843081313e-05, + "loss": 0.006, + "step": 41590 + }, + { + "epoch": 7.42, + "learning_rate": 4.629270684736092e-05, + "loss": 0.0058, + "step": 41600 + }, + { + "epoch": 7.42, + "learning_rate": 4.6291815263908704e-05, + "loss": 0.003, + "step": 41610 + }, + { + "epoch": 7.42, + "learning_rate": 4.6290923680456495e-05, + "loss": 0.004, + "step": 41620 + }, + { + "epoch": 7.42, + "learning_rate": 4.629003209700428e-05, + "loss": 0.0035, + "step": 41630 + }, + { + "epoch": 7.43, + "learning_rate": 4.628914051355207e-05, + "loss": 0.0085, + "step": 41640 + }, + { + "epoch": 7.43, + "learning_rate": 4.628824893009986e-05, + "loss": 0.0067, + "step": 41650 + }, + { + "epoch": 7.43, + "learning_rate": 4.6287357346647647e-05, + "loss": 0.0028, + "step": 41660 + }, + { + "epoch": 7.43, + "learning_rate": 4.628646576319544e-05, + "loss": 0.0037, + "step": 41670 + }, + { + "epoch": 7.43, + "learning_rate": 4.628557417974322e-05, + "loss": 0.0025, + "step": 41680 + }, + { + "epoch": 7.43, + "learning_rate": 4.6284682596291013e-05, + "loss": 0.0059, + "step": 41690 + }, + { + "epoch": 7.44, + "learning_rate": 4.6283791012838805e-05, + "loss": 0.0084, + "step": 41700 + }, + { + "epoch": 7.44, + "learning_rate": 4.6282899429386596e-05, + "loss": 0.0057, + "step": 41710 + }, + { + "epoch": 7.44, + "learning_rate": 4.628200784593438e-05, + "loss": 0.0054, + "step": 41720 + }, + { + "epoch": 7.44, + "learning_rate": 4.628111626248217e-05, + "loss": 0.0056, + "step": 41730 + }, + { + "epoch": 7.44, + "learning_rate": 4.628022467902996e-05, + "loss": 0.0056, + "step": 41740 + }, + { + "epoch": 7.44, + "learning_rate": 4.627933309557775e-05, + "loss": 0.0073, + "step": 41750 + }, + { + "epoch": 7.45, + "learning_rate": 4.627844151212554e-05, + "loss": 0.0053, + "step": 41760 + }, + { + "epoch": 7.45, + "learning_rate": 4.627754992867332e-05, + "loss": 0.0072, + "step": 41770 + }, + { + "epoch": 7.45, + "learning_rate": 4.6276658345221114e-05, + "loss": 0.0065, + "step": 41780 + }, + { + "epoch": 7.45, + "learning_rate": 4.62757667617689e-05, + "loss": 0.0039, + "step": 41790 + }, + { + "epoch": 7.45, + "learning_rate": 4.627487517831669e-05, + "loss": 0.0032, + "step": 41800 + }, + { + "epoch": 7.46, + "learning_rate": 4.627398359486448e-05, + "loss": 0.0034, + "step": 41810 + }, + { + "epoch": 7.46, + "learning_rate": 4.627309201141227e-05, + "loss": 0.0048, + "step": 41820 + }, + { + "epoch": 7.46, + "learning_rate": 4.627220042796006e-05, + "loss": 0.0051, + "step": 41830 + }, + { + "epoch": 7.46, + "learning_rate": 4.627130884450785e-05, + "loss": 0.0047, + "step": 41840 + }, + { + "epoch": 7.46, + "learning_rate": 4.627041726105564e-05, + "loss": 0.0043, + "step": 41850 + }, + { + "epoch": 7.46, + "learning_rate": 4.626952567760342e-05, + "loss": 0.0073, + "step": 41860 + }, + { + "epoch": 7.47, + "learning_rate": 4.6268634094151215e-05, + "loss": 0.0026, + "step": 41870 + }, + { + "epoch": 7.47, + "learning_rate": 4.6267742510699006e-05, + "loss": 0.0063, + "step": 41880 + }, + { + "epoch": 7.47, + "learning_rate": 4.626685092724679e-05, + "loss": 0.0048, + "step": 41890 + }, + { + "epoch": 7.47, + "learning_rate": 4.626595934379458e-05, + "loss": 0.0066, + "step": 41900 + }, + { + "epoch": 7.47, + "learning_rate": 4.6265067760342366e-05, + "loss": 0.0049, + "step": 41910 + }, + { + "epoch": 7.48, + "learning_rate": 4.6264176176890164e-05, + "loss": 0.0078, + "step": 41920 + }, + { + "epoch": 7.48, + "learning_rate": 4.626328459343795e-05, + "loss": 0.0037, + "step": 41930 + }, + { + "epoch": 7.48, + "learning_rate": 4.626239300998574e-05, + "loss": 0.0072, + "step": 41940 + }, + { + "epoch": 7.48, + "learning_rate": 4.6261501426533524e-05, + "loss": 0.0039, + "step": 41950 + }, + { + "epoch": 7.48, + "learning_rate": 4.6260609843081315e-05, + "loss": 0.0064, + "step": 41960 + }, + { + "epoch": 7.48, + "learning_rate": 4.6259718259629106e-05, + "loss": 0.0053, + "step": 41970 + }, + { + "epoch": 7.49, + "learning_rate": 4.625882667617689e-05, + "loss": 0.0052, + "step": 41980 + }, + { + "epoch": 7.49, + "learning_rate": 4.625793509272468e-05, + "loss": 0.0034, + "step": 41990 + }, + { + "epoch": 7.49, + "learning_rate": 4.6257043509272466e-05, + "loss": 0.0057, + "step": 42000 + }, + { + "epoch": 7.49, + "learning_rate": 4.625615192582026e-05, + "loss": 0.0062, + "step": 42010 + }, + { + "epoch": 7.49, + "learning_rate": 4.625526034236804e-05, + "loss": 0.0067, + "step": 42020 + }, + { + "epoch": 7.49, + "learning_rate": 4.625436875891584e-05, + "loss": 0.0094, + "step": 42030 + }, + { + "epoch": 7.5, + "learning_rate": 4.6253477175463624e-05, + "loss": 0.0042, + "step": 42040 + }, + { + "epoch": 7.5, + "learning_rate": 4.6252585592011416e-05, + "loss": 0.0041, + "step": 42050 + }, + { + "epoch": 7.5, + "learning_rate": 4.625169400855921e-05, + "loss": 0.0045, + "step": 42060 + }, + { + "epoch": 7.5, + "learning_rate": 4.625080242510699e-05, + "loss": 0.0063, + "step": 42070 + }, + { + "epoch": 7.5, + "learning_rate": 4.624991084165478e-05, + "loss": 0.0068, + "step": 42080 + }, + { + "epoch": 7.51, + "learning_rate": 4.624901925820257e-05, + "loss": 0.0059, + "step": 42090 + }, + { + "epoch": 7.51, + "learning_rate": 4.624812767475036e-05, + "loss": 0.0025, + "step": 42100 + }, + { + "epoch": 7.51, + "learning_rate": 4.624723609129815e-05, + "loss": 0.0048, + "step": 42110 + }, + { + "epoch": 7.51, + "learning_rate": 4.6246344507845934e-05, + "loss": 0.0081, + "step": 42120 + }, + { + "epoch": 7.51, + "learning_rate": 4.6245452924393725e-05, + "loss": 0.005, + "step": 42130 + }, + { + "epoch": 7.51, + "learning_rate": 4.6244561340941516e-05, + "loss": 0.0046, + "step": 42140 + }, + { + "epoch": 7.52, + "learning_rate": 4.624366975748931e-05, + "loss": 0.0057, + "step": 42150 + }, + { + "epoch": 7.52, + "learning_rate": 4.624277817403709e-05, + "loss": 0.0036, + "step": 42160 + }, + { + "epoch": 7.52, + "learning_rate": 4.624188659058488e-05, + "loss": 0.0048, + "step": 42170 + }, + { + "epoch": 7.52, + "learning_rate": 4.624099500713267e-05, + "loss": 0.0034, + "step": 42180 + }, + { + "epoch": 7.52, + "learning_rate": 4.624010342368046e-05, + "loss": 0.005, + "step": 42190 + }, + { + "epoch": 7.52, + "learning_rate": 4.623921184022825e-05, + "loss": 0.0054, + "step": 42200 + }, + { + "epoch": 7.53, + "learning_rate": 4.6238320256776034e-05, + "loss": 0.0041, + "step": 42210 + }, + { + "epoch": 7.53, + "learning_rate": 4.6237428673323826e-05, + "loss": 0.005, + "step": 42220 + }, + { + "epoch": 7.53, + "learning_rate": 4.623653708987161e-05, + "loss": 0.0027, + "step": 42230 + }, + { + "epoch": 7.53, + "learning_rate": 4.62356455064194e-05, + "loss": 0.005, + "step": 42240 + }, + { + "epoch": 7.53, + "learning_rate": 4.623475392296719e-05, + "loss": 0.0062, + "step": 42250 + }, + { + "epoch": 7.54, + "learning_rate": 4.6233862339514984e-05, + "loss": 0.0058, + "step": 42260 + }, + { + "epoch": 7.54, + "learning_rate": 4.623297075606277e-05, + "loss": 0.0058, + "step": 42270 + }, + { + "epoch": 7.54, + "learning_rate": 4.623207917261056e-05, + "loss": 0.0045, + "step": 42280 + }, + { + "epoch": 7.54, + "learning_rate": 4.623118758915835e-05, + "loss": 0.0043, + "step": 42290 + }, + { + "epoch": 7.54, + "learning_rate": 4.6230296005706135e-05, + "loss": 0.0045, + "step": 42300 + }, + { + "epoch": 7.54, + "learning_rate": 4.6229404422253926e-05, + "loss": 0.0051, + "step": 42310 + }, + { + "epoch": 7.55, + "learning_rate": 4.622851283880171e-05, + "loss": 0.0038, + "step": 42320 + }, + { + "epoch": 7.55, + "learning_rate": 4.62276212553495e-05, + "loss": 0.008, + "step": 42330 + }, + { + "epoch": 7.55, + "learning_rate": 4.622672967189729e-05, + "loss": 0.0086, + "step": 42340 + }, + { + "epoch": 7.55, + "learning_rate": 4.622583808844508e-05, + "loss": 0.0042, + "step": 42350 + }, + { + "epoch": 7.55, + "learning_rate": 4.6224946504992875e-05, + "loss": 0.0032, + "step": 42360 + }, + { + "epoch": 7.56, + "learning_rate": 4.622405492154066e-05, + "loss": 0.0072, + "step": 42370 + }, + { + "epoch": 7.56, + "learning_rate": 4.622316333808845e-05, + "loss": 0.006, + "step": 42380 + }, + { + "epoch": 7.56, + "learning_rate": 4.6222271754636235e-05, + "loss": 0.0081, + "step": 42390 + }, + { + "epoch": 7.56, + "learning_rate": 4.622138017118403e-05, + "loss": 0.0029, + "step": 42400 + }, + { + "epoch": 7.56, + "learning_rate": 4.622048858773181e-05, + "loss": 0.005, + "step": 42410 + }, + { + "epoch": 7.56, + "learning_rate": 4.6219686162624826e-05, + "loss": 0.007, + "step": 42420 + }, + { + "epoch": 7.57, + "learning_rate": 4.621879457917262e-05, + "loss": 0.0035, + "step": 42430 + }, + { + "epoch": 7.57, + "learning_rate": 4.62179029957204e-05, + "loss": 0.004, + "step": 42440 + }, + { + "epoch": 7.57, + "learning_rate": 4.621701141226819e-05, + "loss": 0.0048, + "step": 42450 + }, + { + "epoch": 7.57, + "learning_rate": 4.621611982881598e-05, + "loss": 0.0052, + "step": 42460 + }, + { + "epoch": 7.57, + "learning_rate": 4.621522824536377e-05, + "loss": 0.0074, + "step": 42470 + }, + { + "epoch": 7.57, + "learning_rate": 4.621433666191155e-05, + "loss": 0.0026, + "step": 42480 + }, + { + "epoch": 7.58, + "learning_rate": 4.6213445078459344e-05, + "loss": 0.0057, + "step": 42490 + }, + { + "epoch": 7.58, + "learning_rate": 4.6212553495007135e-05, + "loss": 0.0039, + "step": 42500 + }, + { + "epoch": 7.58, + "learning_rate": 4.6211661911554926e-05, + "loss": 0.0079, + "step": 42510 + }, + { + "epoch": 7.58, + "learning_rate": 4.621077032810272e-05, + "loss": 0.008, + "step": 42520 + }, + { + "epoch": 7.58, + "learning_rate": 4.62098787446505e-05, + "loss": 0.0048, + "step": 42530 + }, + { + "epoch": 7.59, + "learning_rate": 4.620898716119829e-05, + "loss": 0.0053, + "step": 42540 + }, + { + "epoch": 7.59, + "learning_rate": 4.620809557774608e-05, + "loss": 0.0079, + "step": 42550 + }, + { + "epoch": 7.59, + "learning_rate": 4.620720399429387e-05, + "loss": 0.004, + "step": 42560 + }, + { + "epoch": 7.59, + "learning_rate": 4.6206312410841653e-05, + "loss": 0.0059, + "step": 42570 + }, + { + "epoch": 7.59, + "learning_rate": 4.6205420827389445e-05, + "loss": 0.0032, + "step": 42580 + }, + { + "epoch": 7.59, + "learning_rate": 4.6204529243937236e-05, + "loss": 0.0054, + "step": 42590 + }, + { + "epoch": 7.6, + "learning_rate": 4.620363766048502e-05, + "loss": 0.0044, + "step": 42600 + }, + { + "epoch": 7.6, + "learning_rate": 4.620274607703281e-05, + "loss": 0.0096, + "step": 42610 + }, + { + "epoch": 7.6, + "learning_rate": 4.6201854493580596e-05, + "loss": 0.006, + "step": 42620 + }, + { + "epoch": 7.6, + "learning_rate": 4.6200962910128394e-05, + "loss": 0.0056, + "step": 42630 + }, + { + "epoch": 7.6, + "learning_rate": 4.620007132667618e-05, + "loss": 0.0062, + "step": 42640 + }, + { + "epoch": 7.61, + "learning_rate": 4.619917974322397e-05, + "loss": 0.0048, + "step": 42650 + }, + { + "epoch": 7.61, + "learning_rate": 4.619828815977176e-05, + "loss": 0.0056, + "step": 42660 + }, + { + "epoch": 7.61, + "learning_rate": 4.6197396576319545e-05, + "loss": 0.0093, + "step": 42670 + }, + { + "epoch": 7.61, + "learning_rate": 4.6196504992867336e-05, + "loss": 0.0073, + "step": 42680 + }, + { + "epoch": 7.61, + "learning_rate": 4.619561340941512e-05, + "loss": 0.0064, + "step": 42690 + }, + { + "epoch": 7.61, + "learning_rate": 4.619472182596291e-05, + "loss": 0.003, + "step": 42700 + }, + { + "epoch": 7.62, + "learning_rate": 4.6193830242510696e-05, + "loss": 0.0069, + "step": 42710 + }, + { + "epoch": 7.62, + "learning_rate": 4.619293865905849e-05, + "loss": 0.0044, + "step": 42720 + }, + { + "epoch": 7.62, + "learning_rate": 4.619204707560628e-05, + "loss": 0.0053, + "step": 42730 + }, + { + "epoch": 7.62, + "learning_rate": 4.619115549215407e-05, + "loss": 0.0065, + "step": 42740 + }, + { + "epoch": 7.62, + "learning_rate": 4.619026390870186e-05, + "loss": 0.0064, + "step": 42750 + }, + { + "epoch": 7.62, + "learning_rate": 4.6189372325249646e-05, + "loss": 0.003, + "step": 42760 + }, + { + "epoch": 7.63, + "learning_rate": 4.618848074179744e-05, + "loss": 0.0038, + "step": 42770 + }, + { + "epoch": 7.63, + "learning_rate": 4.618758915834522e-05, + "loss": 0.007, + "step": 42780 + }, + { + "epoch": 7.63, + "learning_rate": 4.618669757489301e-05, + "loss": 0.0057, + "step": 42790 + }, + { + "epoch": 7.63, + "learning_rate": 4.61858059914408e-05, + "loss": 0.0032, + "step": 42800 + }, + { + "epoch": 7.63, + "learning_rate": 4.618491440798859e-05, + "loss": 0.0033, + "step": 42810 + }, + { + "epoch": 7.64, + "learning_rate": 4.618402282453638e-05, + "loss": 0.0033, + "step": 42820 + }, + { + "epoch": 7.64, + "learning_rate": 4.6183131241084164e-05, + "loss": 0.0082, + "step": 42830 + }, + { + "epoch": 7.64, + "learning_rate": 4.6182239657631955e-05, + "loss": 0.0054, + "step": 42840 + }, + { + "epoch": 7.64, + "learning_rate": 4.6181348074179746e-05, + "loss": 0.0052, + "step": 42850 + }, + { + "epoch": 7.64, + "learning_rate": 4.618045649072754e-05, + "loss": 0.0043, + "step": 42860 + }, + { + "epoch": 7.64, + "learning_rate": 4.617956490727532e-05, + "loss": 0.0058, + "step": 42870 + }, + { + "epoch": 7.65, + "learning_rate": 4.617867332382311e-05, + "loss": 0.0064, + "step": 42880 + }, + { + "epoch": 7.65, + "learning_rate": 4.6177781740370904e-05, + "loss": 0.0058, + "step": 42890 + }, + { + "epoch": 7.65, + "learning_rate": 4.617689015691869e-05, + "loss": 0.0049, + "step": 42900 + }, + { + "epoch": 7.65, + "learning_rate": 4.617599857346648e-05, + "loss": 0.0063, + "step": 42910 + }, + { + "epoch": 7.65, + "learning_rate": 4.6175106990014264e-05, + "loss": 0.0051, + "step": 42920 + }, + { + "epoch": 7.66, + "learning_rate": 4.6174215406562056e-05, + "loss": 0.0076, + "step": 42930 + }, + { + "epoch": 7.66, + "learning_rate": 4.617332382310984e-05, + "loss": 0.0051, + "step": 42940 + }, + { + "epoch": 7.66, + "learning_rate": 4.617243223965763e-05, + "loss": 0.0061, + "step": 42950 + }, + { + "epoch": 7.66, + "learning_rate": 4.617154065620542e-05, + "loss": 0.0067, + "step": 42960 + }, + { + "epoch": 7.66, + "learning_rate": 4.6170649072753214e-05, + "loss": 0.0053, + "step": 42970 + }, + { + "epoch": 7.66, + "learning_rate": 4.6169757489301005e-05, + "loss": 0.0042, + "step": 42980 + }, + { + "epoch": 7.67, + "learning_rate": 4.616886590584879e-05, + "loss": 0.0044, + "step": 42990 + }, + { + "epoch": 7.67, + "learning_rate": 4.616797432239658e-05, + "loss": 0.0049, + "step": 43000 + }, + { + "epoch": 7.67, + "learning_rate": 4.6167082738944365e-05, + "loss": 0.0037, + "step": 43010 + }, + { + "epoch": 7.67, + "learning_rate": 4.6166191155492156e-05, + "loss": 0.0063, + "step": 43020 + }, + { + "epoch": 7.67, + "learning_rate": 4.616529957203994e-05, + "loss": 0.0046, + "step": 43030 + }, + { + "epoch": 7.67, + "learning_rate": 4.616440798858773e-05, + "loss": 0.0047, + "step": 43040 + }, + { + "epoch": 7.68, + "learning_rate": 4.616351640513552e-05, + "loss": 0.0051, + "step": 43050 + }, + { + "epoch": 7.68, + "learning_rate": 4.616262482168331e-05, + "loss": 0.0054, + "step": 43060 + }, + { + "epoch": 7.68, + "learning_rate": 4.6161733238231106e-05, + "loss": 0.006, + "step": 43070 + }, + { + "epoch": 7.68, + "learning_rate": 4.616084165477889e-05, + "loss": 0.0028, + "step": 43080 + }, + { + "epoch": 7.68, + "learning_rate": 4.615995007132668e-05, + "loss": 0.0051, + "step": 43090 + }, + { + "epoch": 7.69, + "learning_rate": 4.6159058487874466e-05, + "loss": 0.0038, + "step": 43100 + }, + { + "epoch": 7.69, + "learning_rate": 4.615816690442226e-05, + "loss": 0.0045, + "step": 43110 + }, + { + "epoch": 7.69, + "learning_rate": 4.615727532097005e-05, + "loss": 0.0082, + "step": 43120 + }, + { + "epoch": 7.69, + "learning_rate": 4.615638373751783e-05, + "loss": 0.0061, + "step": 43130 + }, + { + "epoch": 7.69, + "learning_rate": 4.6155492154065624e-05, + "loss": 0.0029, + "step": 43140 + }, + { + "epoch": 7.69, + "learning_rate": 4.615460057061341e-05, + "loss": 0.0042, + "step": 43150 + }, + { + "epoch": 7.7, + "learning_rate": 4.61537089871612e-05, + "loss": 0.0048, + "step": 43160 + }, + { + "epoch": 7.7, + "learning_rate": 4.6152817403708984e-05, + "loss": 0.0047, + "step": 43170 + }, + { + "epoch": 7.7, + "learning_rate": 4.615192582025678e-05, + "loss": 0.0052, + "step": 43180 + }, + { + "epoch": 7.7, + "learning_rate": 4.6151034236804566e-05, + "loss": 0.0047, + "step": 43190 + }, + { + "epoch": 7.7, + "learning_rate": 4.615014265335236e-05, + "loss": 0.0069, + "step": 43200 + }, + { + "epoch": 7.71, + "learning_rate": 4.614925106990015e-05, + "loss": 0.0048, + "step": 43210 + }, + { + "epoch": 7.71, + "learning_rate": 4.614835948644793e-05, + "loss": 0.0072, + "step": 43220 + }, + { + "epoch": 7.71, + "learning_rate": 4.6147467902995724e-05, + "loss": 0.0062, + "step": 43230 + }, + { + "epoch": 7.71, + "learning_rate": 4.614657631954351e-05, + "loss": 0.0062, + "step": 43240 + }, + { + "epoch": 7.71, + "learning_rate": 4.61456847360913e-05, + "loss": 0.0083, + "step": 43250 + }, + { + "epoch": 7.71, + "learning_rate": 4.6144793152639084e-05, + "loss": 0.0058, + "step": 43260 + }, + { + "epoch": 7.72, + "learning_rate": 4.6143901569186876e-05, + "loss": 0.0084, + "step": 43270 + }, + { + "epoch": 7.72, + "learning_rate": 4.614300998573467e-05, + "loss": 0.0055, + "step": 43280 + }, + { + "epoch": 7.72, + "learning_rate": 4.614211840228246e-05, + "loss": 0.0048, + "step": 43290 + }, + { + "epoch": 7.72, + "learning_rate": 4.614122681883025e-05, + "loss": 0.0035, + "step": 43300 + }, + { + "epoch": 7.72, + "learning_rate": 4.6140335235378034e-05, + "loss": 0.0031, + "step": 43310 + }, + { + "epoch": 7.72, + "learning_rate": 4.6139443651925825e-05, + "loss": 0.0041, + "step": 43320 + }, + { + "epoch": 7.73, + "learning_rate": 4.613855206847361e-05, + "loss": 0.0076, + "step": 43330 + }, + { + "epoch": 7.73, + "learning_rate": 4.61376604850214e-05, + "loss": 0.0054, + "step": 43340 + }, + { + "epoch": 7.73, + "learning_rate": 4.613676890156919e-05, + "loss": 0.0042, + "step": 43350 + }, + { + "epoch": 7.73, + "learning_rate": 4.6135877318116976e-05, + "loss": 0.0074, + "step": 43360 + }, + { + "epoch": 7.73, + "learning_rate": 4.613498573466477e-05, + "loss": 0.0046, + "step": 43370 + }, + { + "epoch": 7.74, + "learning_rate": 4.613409415121255e-05, + "loss": 0.0082, + "step": 43380 + }, + { + "epoch": 7.74, + "learning_rate": 4.613320256776034e-05, + "loss": 0.0059, + "step": 43390 + }, + { + "epoch": 7.74, + "learning_rate": 4.6132310984308134e-05, + "loss": 0.0045, + "step": 43400 + }, + { + "epoch": 7.74, + "learning_rate": 4.6131419400855925e-05, + "loss": 0.0029, + "step": 43410 + }, + { + "epoch": 7.74, + "learning_rate": 4.613052781740371e-05, + "loss": 0.006, + "step": 43420 + }, + { + "epoch": 7.74, + "learning_rate": 4.61296362339515e-05, + "loss": 0.0058, + "step": 43430 + }, + { + "epoch": 7.75, + "learning_rate": 4.612874465049929e-05, + "loss": 0.0088, + "step": 43440 + }, + { + "epoch": 7.75, + "learning_rate": 4.612785306704708e-05, + "loss": 0.0083, + "step": 43450 + }, + { + "epoch": 7.75, + "learning_rate": 4.612696148359487e-05, + "loss": 0.0054, + "step": 43460 + }, + { + "epoch": 7.75, + "learning_rate": 4.612606990014265e-05, + "loss": 0.0051, + "step": 43470 + }, + { + "epoch": 7.75, + "learning_rate": 4.6125178316690443e-05, + "loss": 0.0037, + "step": 43480 + }, + { + "epoch": 7.75, + "learning_rate": 4.612428673323823e-05, + "loss": 0.0046, + "step": 43490 + }, + { + "epoch": 7.76, + "learning_rate": 4.612339514978602e-05, + "loss": 0.0095, + "step": 43500 + }, + { + "epoch": 7.76, + "learning_rate": 4.612250356633381e-05, + "loss": 0.0045, + "step": 43510 + }, + { + "epoch": 7.76, + "learning_rate": 4.61216119828816e-05, + "loss": 0.0062, + "step": 43520 + }, + { + "epoch": 7.76, + "learning_rate": 4.612072039942939e-05, + "loss": 0.0056, + "step": 43530 + }, + { + "epoch": 7.76, + "learning_rate": 4.611982881597718e-05, + "loss": 0.0075, + "step": 43540 + }, + { + "epoch": 7.77, + "learning_rate": 4.611893723252497e-05, + "loss": 0.0056, + "step": 43550 + }, + { + "epoch": 7.77, + "learning_rate": 4.611804564907275e-05, + "loss": 0.0057, + "step": 43560 + }, + { + "epoch": 7.77, + "learning_rate": 4.6117154065620544e-05, + "loss": 0.0039, + "step": 43570 + }, + { + "epoch": 7.77, + "learning_rate": 4.6116262482168335e-05, + "loss": 0.0064, + "step": 43580 + }, + { + "epoch": 7.77, + "learning_rate": 4.611537089871612e-05, + "loss": 0.0036, + "step": 43590 + }, + { + "epoch": 7.77, + "learning_rate": 4.611447931526391e-05, + "loss": 0.0059, + "step": 43600 + }, + { + "epoch": 7.78, + "learning_rate": 4.6113587731811695e-05, + "loss": 0.0065, + "step": 43610 + }, + { + "epoch": 7.78, + "learning_rate": 4.611269614835949e-05, + "loss": 0.0032, + "step": 43620 + }, + { + "epoch": 7.78, + "learning_rate": 4.611180456490728e-05, + "loss": 0.0036, + "step": 43630 + }, + { + "epoch": 7.78, + "learning_rate": 4.611091298145507e-05, + "loss": 0.0047, + "step": 43640 + }, + { + "epoch": 7.78, + "learning_rate": 4.6110021398002853e-05, + "loss": 0.0032, + "step": 43650 + }, + { + "epoch": 7.79, + "learning_rate": 4.6109129814550645e-05, + "loss": 0.006, + "step": 43660 + }, + { + "epoch": 7.79, + "learning_rate": 4.6108238231098436e-05, + "loss": 0.0036, + "step": 43670 + }, + { + "epoch": 7.79, + "learning_rate": 4.610734664764622e-05, + "loss": 0.0035, + "step": 43680 + }, + { + "epoch": 7.79, + "learning_rate": 4.610645506419401e-05, + "loss": 0.0049, + "step": 43690 + }, + { + "epoch": 7.79, + "learning_rate": 4.6105563480741796e-05, + "loss": 0.0041, + "step": 43700 + }, + { + "epoch": 7.79, + "learning_rate": 4.610467189728959e-05, + "loss": 0.006, + "step": 43710 + }, + { + "epoch": 7.8, + "learning_rate": 4.610378031383737e-05, + "loss": 0.0045, + "step": 43720 + }, + { + "epoch": 7.8, + "learning_rate": 4.610288873038517e-05, + "loss": 0.0039, + "step": 43730 + }, + { + "epoch": 7.8, + "learning_rate": 4.6101997146932954e-05, + "loss": 0.005, + "step": 43740 + }, + { + "epoch": 7.8, + "learning_rate": 4.6101105563480745e-05, + "loss": 0.0072, + "step": 43750 + }, + { + "epoch": 7.8, + "learning_rate": 4.6100213980028536e-05, + "loss": 0.0039, + "step": 43760 + }, + { + "epoch": 7.8, + "learning_rate": 4.609932239657632e-05, + "loss": 0.0034, + "step": 43770 + }, + { + "epoch": 7.81, + "learning_rate": 4.609843081312411e-05, + "loss": 0.0058, + "step": 43780 + }, + { + "epoch": 7.81, + "learning_rate": 4.6097539229671896e-05, + "loss": 0.0033, + "step": 43790 + }, + { + "epoch": 7.81, + "learning_rate": 4.609664764621969e-05, + "loss": 0.0092, + "step": 43800 + }, + { + "epoch": 7.81, + "learning_rate": 4.609575606276748e-05, + "loss": 0.0034, + "step": 43810 + }, + { + "epoch": 7.81, + "learning_rate": 4.609486447931526e-05, + "loss": 0.0056, + "step": 43820 + }, + { + "epoch": 7.82, + "learning_rate": 4.6093972895863055e-05, + "loss": 0.0049, + "step": 43830 + }, + { + "epoch": 7.82, + "learning_rate": 4.6093081312410846e-05, + "loss": 0.0081, + "step": 43840 + }, + { + "epoch": 7.82, + "learning_rate": 4.609218972895864e-05, + "loss": 0.0046, + "step": 43850 + }, + { + "epoch": 7.82, + "learning_rate": 4.609129814550642e-05, + "loss": 0.0073, + "step": 43860 + }, + { + "epoch": 7.82, + "learning_rate": 4.609040656205421e-05, + "loss": 0.0043, + "step": 43870 + }, + { + "epoch": 7.82, + "learning_rate": 4.6089514978602e-05, + "loss": 0.0041, + "step": 43880 + }, + { + "epoch": 7.83, + "learning_rate": 4.608862339514979e-05, + "loss": 0.0053, + "step": 43890 + }, + { + "epoch": 7.83, + "learning_rate": 4.608773181169758e-05, + "loss": 0.0063, + "step": 43900 + }, + { + "epoch": 7.83, + "learning_rate": 4.6086840228245364e-05, + "loss": 0.0037, + "step": 43910 + }, + { + "epoch": 7.83, + "learning_rate": 4.6085948644793155e-05, + "loss": 0.0047, + "step": 43920 + }, + { + "epoch": 7.83, + "learning_rate": 4.608505706134094e-05, + "loss": 0.003, + "step": 43930 + }, + { + "epoch": 7.84, + "learning_rate": 4.608416547788873e-05, + "loss": 0.0059, + "step": 43940 + }, + { + "epoch": 7.84, + "learning_rate": 4.608327389443652e-05, + "loss": 0.0048, + "step": 43950 + }, + { + "epoch": 7.84, + "learning_rate": 4.608238231098431e-05, + "loss": 0.0086, + "step": 43960 + }, + { + "epoch": 7.84, + "learning_rate": 4.60814907275321e-05, + "loss": 0.0094, + "step": 43970 + }, + { + "epoch": 7.84, + "learning_rate": 4.608059914407989e-05, + "loss": 0.0072, + "step": 43980 + }, + { + "epoch": 7.84, + "learning_rate": 4.607970756062768e-05, + "loss": 0.0061, + "step": 43990 + }, + { + "epoch": 7.85, + "learning_rate": 4.6078815977175464e-05, + "loss": 0.0047, + "step": 44000 + }, + { + "epoch": 7.85, + "learning_rate": 4.6077924393723256e-05, + "loss": 0.0047, + "step": 44010 + }, + { + "epoch": 7.85, + "learning_rate": 4.607703281027104e-05, + "loss": 0.0057, + "step": 44020 + }, + { + "epoch": 7.85, + "learning_rate": 4.607614122681883e-05, + "loss": 0.0027, + "step": 44030 + }, + { + "epoch": 7.85, + "learning_rate": 4.607524964336662e-05, + "loss": 0.0051, + "step": 44040 + }, + { + "epoch": 7.85, + "learning_rate": 4.607435805991441e-05, + "loss": 0.0044, + "step": 44050 + }, + { + "epoch": 7.86, + "learning_rate": 4.60734664764622e-05, + "loss": 0.0057, + "step": 44060 + }, + { + "epoch": 7.86, + "learning_rate": 4.607257489300999e-05, + "loss": 0.0063, + "step": 44070 + }, + { + "epoch": 7.86, + "learning_rate": 4.607168330955778e-05, + "loss": 0.0031, + "step": 44080 + }, + { + "epoch": 7.86, + "learning_rate": 4.6070791726105565e-05, + "loss": 0.0063, + "step": 44090 + }, + { + "epoch": 7.86, + "learning_rate": 4.6069900142653356e-05, + "loss": 0.0035, + "step": 44100 + }, + { + "epoch": 7.87, + "learning_rate": 4.606900855920114e-05, + "loss": 0.0083, + "step": 44110 + }, + { + "epoch": 7.87, + "learning_rate": 4.606811697574893e-05, + "loss": 0.0062, + "step": 44120 + }, + { + "epoch": 7.87, + "learning_rate": 4.606722539229672e-05, + "loss": 0.0068, + "step": 44130 + }, + { + "epoch": 7.87, + "learning_rate": 4.606633380884451e-05, + "loss": 0.0085, + "step": 44140 + }, + { + "epoch": 7.87, + "learning_rate": 4.60654422253923e-05, + "loss": 0.0057, + "step": 44150 + }, + { + "epoch": 7.87, + "learning_rate": 4.606455064194008e-05, + "loss": 0.0055, + "step": 44160 + }, + { + "epoch": 7.88, + "learning_rate": 4.606365905848788e-05, + "loss": 0.0036, + "step": 44170 + }, + { + "epoch": 7.88, + "learning_rate": 4.6062767475035666e-05, + "loss": 0.0063, + "step": 44180 + }, + { + "epoch": 7.88, + "learning_rate": 4.606187589158346e-05, + "loss": 0.0069, + "step": 44190 + }, + { + "epoch": 7.88, + "learning_rate": 4.606098430813124e-05, + "loss": 0.0057, + "step": 44200 + }, + { + "epoch": 7.88, + "learning_rate": 4.606009272467903e-05, + "loss": 0.005, + "step": 44210 + }, + { + "epoch": 7.89, + "learning_rate": 4.6059201141226824e-05, + "loss": 0.0052, + "step": 44220 + }, + { + "epoch": 7.89, + "learning_rate": 4.605830955777461e-05, + "loss": 0.0042, + "step": 44230 + }, + { + "epoch": 7.89, + "learning_rate": 4.60574179743224e-05, + "loss": 0.0035, + "step": 44240 + }, + { + "epoch": 7.89, + "learning_rate": 4.6056526390870184e-05, + "loss": 0.0043, + "step": 44250 + }, + { + "epoch": 7.89, + "learning_rate": 4.6055634807417975e-05, + "loss": 0.0033, + "step": 44260 + }, + { + "epoch": 7.89, + "learning_rate": 4.6054743223965766e-05, + "loss": 0.0042, + "step": 44270 + }, + { + "epoch": 7.9, + "learning_rate": 4.605385164051356e-05, + "loss": 0.0045, + "step": 44280 + }, + { + "epoch": 7.9, + "learning_rate": 4.605296005706134e-05, + "loss": 0.0047, + "step": 44290 + }, + { + "epoch": 7.9, + "learning_rate": 4.605206847360913e-05, + "loss": 0.0051, + "step": 44300 + }, + { + "epoch": 7.9, + "learning_rate": 4.6051176890156924e-05, + "loss": 0.0037, + "step": 44310 + }, + { + "epoch": 7.9, + "learning_rate": 4.605028530670471e-05, + "loss": 0.0086, + "step": 44320 + }, + { + "epoch": 7.9, + "learning_rate": 4.60493937232525e-05, + "loss": 0.0043, + "step": 44330 + }, + { + "epoch": 7.91, + "learning_rate": 4.6048502139800284e-05, + "loss": 0.0046, + "step": 44340 + }, + { + "epoch": 7.91, + "learning_rate": 4.6047610556348075e-05, + "loss": 0.0047, + "step": 44350 + }, + { + "epoch": 7.91, + "learning_rate": 4.604671897289587e-05, + "loss": 0.0052, + "step": 44360 + }, + { + "epoch": 7.91, + "learning_rate": 4.604582738944365e-05, + "loss": 0.0028, + "step": 44370 + }, + { + "epoch": 7.91, + "learning_rate": 4.604493580599144e-05, + "loss": 0.0048, + "step": 44380 + }, + { + "epoch": 7.92, + "learning_rate": 4.6044044222539234e-05, + "loss": 0.0076, + "step": 44390 + }, + { + "epoch": 7.92, + "learning_rate": 4.6043152639087025e-05, + "loss": 0.0051, + "step": 44400 + }, + { + "epoch": 7.92, + "learning_rate": 4.604226105563481e-05, + "loss": 0.0088, + "step": 44410 + }, + { + "epoch": 7.92, + "learning_rate": 4.60413694721826e-05, + "loss": 0.0032, + "step": 44420 + }, + { + "epoch": 7.92, + "learning_rate": 4.6040477888730385e-05, + "loss": 0.0031, + "step": 44430 + }, + { + "epoch": 7.92, + "learning_rate": 4.6039586305278176e-05, + "loss": 0.0035, + "step": 44440 + }, + { + "epoch": 7.93, + "learning_rate": 4.603869472182597e-05, + "loss": 0.0056, + "step": 44450 + }, + { + "epoch": 7.93, + "learning_rate": 4.603780313837375e-05, + "loss": 0.005, + "step": 44460 + }, + { + "epoch": 7.93, + "learning_rate": 4.603691155492154e-05, + "loss": 0.0054, + "step": 44470 + }, + { + "epoch": 7.93, + "learning_rate": 4.603601997146933e-05, + "loss": 0.0088, + "step": 44480 + }, + { + "epoch": 7.93, + "learning_rate": 4.603512838801712e-05, + "loss": 0.004, + "step": 44490 + }, + { + "epoch": 7.94, + "learning_rate": 4.603423680456491e-05, + "loss": 0.0062, + "step": 44500 + }, + { + "epoch": 7.94, + "learning_rate": 4.60333452211127e-05, + "loss": 0.0057, + "step": 44510 + }, + { + "epoch": 7.94, + "learning_rate": 4.6032453637660485e-05, + "loss": 0.0055, + "step": 44520 + }, + { + "epoch": 7.94, + "learning_rate": 4.6031562054208277e-05, + "loss": 0.0083, + "step": 44530 + }, + { + "epoch": 7.94, + "learning_rate": 4.603067047075607e-05, + "loss": 0.0047, + "step": 44540 + }, + { + "epoch": 7.94, + "learning_rate": 4.602977888730385e-05, + "loss": 0.0049, + "step": 44550 + }, + { + "epoch": 7.95, + "learning_rate": 4.6028887303851643e-05, + "loss": 0.0039, + "step": 44560 + }, + { + "epoch": 7.95, + "learning_rate": 4.602799572039943e-05, + "loss": 0.0032, + "step": 44570 + }, + { + "epoch": 7.95, + "learning_rate": 4.602710413694722e-05, + "loss": 0.0075, + "step": 44580 + }, + { + "epoch": 7.95, + "learning_rate": 4.602621255349501e-05, + "loss": 0.004, + "step": 44590 + }, + { + "epoch": 7.95, + "learning_rate": 4.6025320970042795e-05, + "loss": 0.0043, + "step": 44600 + }, + { + "epoch": 7.95, + "learning_rate": 4.602442938659059e-05, + "loss": 0.0052, + "step": 44610 + }, + { + "epoch": 7.96, + "learning_rate": 4.602353780313838e-05, + "loss": 0.0066, + "step": 44620 + }, + { + "epoch": 7.96, + "learning_rate": 4.602264621968617e-05, + "loss": 0.0031, + "step": 44630 + }, + { + "epoch": 7.96, + "learning_rate": 4.602175463623395e-05, + "loss": 0.0059, + "step": 44640 + }, + { + "epoch": 7.96, + "learning_rate": 4.6020863052781744e-05, + "loss": 0.0038, + "step": 44650 + }, + { + "epoch": 7.96, + "learning_rate": 4.601997146932953e-05, + "loss": 0.0062, + "step": 44660 + }, + { + "epoch": 7.97, + "learning_rate": 4.601907988587732e-05, + "loss": 0.0056, + "step": 44670 + }, + { + "epoch": 7.97, + "learning_rate": 4.601818830242511e-05, + "loss": 0.0042, + "step": 44680 + }, + { + "epoch": 7.97, + "learning_rate": 4.6017296718972895e-05, + "loss": 0.0066, + "step": 44690 + }, + { + "epoch": 7.97, + "learning_rate": 4.6016405135520686e-05, + "loss": 0.0029, + "step": 44700 + }, + { + "epoch": 7.97, + "learning_rate": 4.601551355206847e-05, + "loss": 0.0051, + "step": 44710 + }, + { + "epoch": 7.97, + "learning_rate": 4.601462196861627e-05, + "loss": 0.0019, + "step": 44720 + }, + { + "epoch": 7.98, + "learning_rate": 4.601373038516405e-05, + "loss": 0.007, + "step": 44730 + }, + { + "epoch": 7.98, + "learning_rate": 4.6012838801711845e-05, + "loss": 0.0073, + "step": 44740 + }, + { + "epoch": 7.98, + "learning_rate": 4.601194721825963e-05, + "loss": 0.0069, + "step": 44750 + }, + { + "epoch": 7.98, + "learning_rate": 4.601105563480742e-05, + "loss": 0.0059, + "step": 44760 + }, + { + "epoch": 7.98, + "learning_rate": 4.601016405135521e-05, + "loss": 0.0049, + "step": 44770 + }, + { + "epoch": 7.99, + "learning_rate": 4.6009272467902996e-05, + "loss": 0.0036, + "step": 44780 + }, + { + "epoch": 7.99, + "learning_rate": 4.600838088445079e-05, + "loss": 0.0041, + "step": 44790 + }, + { + "epoch": 7.99, + "learning_rate": 4.600748930099857e-05, + "loss": 0.0044, + "step": 44800 + }, + { + "epoch": 7.99, + "learning_rate": 4.600659771754636e-05, + "loss": 0.0031, + "step": 44810 + }, + { + "epoch": 7.99, + "learning_rate": 4.6005706134094154e-05, + "loss": 0.0047, + "step": 44820 + }, + { + "epoch": 7.99, + "learning_rate": 4.6004814550641945e-05, + "loss": 0.006, + "step": 44830 + }, + { + "epoch": 8.0, + "learning_rate": 4.6003922967189736e-05, + "loss": 0.0049, + "step": 44840 + }, + { + "epoch": 8.0, + "learning_rate": 4.600303138373752e-05, + "loss": 0.0045, + "step": 44850 + }, + { + "epoch": 8.0, + "learning_rate": 4.600213980028531e-05, + "loss": 0.0065, + "step": 44860 + }, + { + "epoch": 8.0, + "eval_loss": 0.0144475307315588, + "eval_runtime": 195.9591, + "eval_samples_per_second": 23.673, + "eval_steps_per_second": 2.96, + "step": 44864 + }, + { + "epoch": 8.0, + "learning_rate": 4.6001248216833096e-05, + "loss": 0.002, + "step": 44870 + }, + { + "epoch": 8.0, + "learning_rate": 4.600035663338089e-05, + "loss": 0.0058, + "step": 44880 + }, + { + "epoch": 8.0, + "learning_rate": 4.599946504992867e-05, + "loss": 0.0038, + "step": 44890 + }, + { + "epoch": 8.01, + "learning_rate": 4.599857346647646e-05, + "loss": 0.0027, + "step": 44900 + }, + { + "epoch": 8.01, + "learning_rate": 4.5997681883024254e-05, + "loss": 0.004, + "step": 44910 + }, + { + "epoch": 8.01, + "learning_rate": 4.599679029957204e-05, + "loss": 0.0034, + "step": 44920 + }, + { + "epoch": 8.01, + "learning_rate": 4.599589871611983e-05, + "loss": 0.0058, + "step": 44930 + }, + { + "epoch": 8.01, + "learning_rate": 4.599500713266762e-05, + "loss": 0.0039, + "step": 44940 + }, + { + "epoch": 8.02, + "learning_rate": 4.599411554921541e-05, + "loss": 0.0061, + "step": 44950 + }, + { + "epoch": 8.02, + "learning_rate": 4.59932239657632e-05, + "loss": 0.0041, + "step": 44960 + }, + { + "epoch": 8.02, + "learning_rate": 4.599233238231099e-05, + "loss": 0.0044, + "step": 44970 + }, + { + "epoch": 8.02, + "learning_rate": 4.599144079885877e-05, + "loss": 0.0033, + "step": 44980 + }, + { + "epoch": 8.02, + "learning_rate": 4.5990549215406564e-05, + "loss": 0.0061, + "step": 44990 + }, + { + "epoch": 8.02, + "learning_rate": 4.5989657631954355e-05, + "loss": 0.0021, + "step": 45000 + }, + { + "epoch": 8.03, + "learning_rate": 4.598876604850214e-05, + "loss": 0.0048, + "step": 45010 + }, + { + "epoch": 8.03, + "learning_rate": 4.598787446504993e-05, + "loss": 0.0042, + "step": 45020 + }, + { + "epoch": 8.03, + "learning_rate": 4.5986982881597715e-05, + "loss": 0.0042, + "step": 45030 + }, + { + "epoch": 8.03, + "learning_rate": 4.5986091298145506e-05, + "loss": 0.0062, + "step": 45040 + }, + { + "epoch": 8.03, + "learning_rate": 4.59851997146933e-05, + "loss": 0.0044, + "step": 45050 + }, + { + "epoch": 8.03, + "learning_rate": 4.598430813124109e-05, + "loss": 0.0053, + "step": 45060 + }, + { + "epoch": 8.04, + "learning_rate": 4.598341654778888e-05, + "loss": 0.0038, + "step": 45070 + }, + { + "epoch": 8.04, + "learning_rate": 4.5982524964336664e-05, + "loss": 0.0046, + "step": 45080 + }, + { + "epoch": 8.04, + "learning_rate": 4.5981633380884456e-05, + "loss": 0.0048, + "step": 45090 + }, + { + "epoch": 8.04, + "learning_rate": 4.598074179743224e-05, + "loss": 0.0022, + "step": 45100 + }, + { + "epoch": 8.04, + "learning_rate": 4.597985021398003e-05, + "loss": 0.0067, + "step": 45110 + }, + { + "epoch": 8.05, + "learning_rate": 4.5978958630527816e-05, + "loss": 0.0032, + "step": 45120 + }, + { + "epoch": 8.05, + "learning_rate": 4.597806704707561e-05, + "loss": 0.0043, + "step": 45130 + }, + { + "epoch": 8.05, + "learning_rate": 4.59771754636234e-05, + "loss": 0.0027, + "step": 45140 + }, + { + "epoch": 8.05, + "learning_rate": 4.597628388017118e-05, + "loss": 0.0053, + "step": 45150 + }, + { + "epoch": 8.05, + "learning_rate": 4.597539229671898e-05, + "loss": 0.0048, + "step": 45160 + }, + { + "epoch": 8.05, + "learning_rate": 4.5974500713266765e-05, + "loss": 0.0025, + "step": 45170 + }, + { + "epoch": 8.06, + "learning_rate": 4.5973609129814556e-05, + "loss": 0.0044, + "step": 45180 + }, + { + "epoch": 8.06, + "learning_rate": 4.597271754636234e-05, + "loss": 0.0066, + "step": 45190 + }, + { + "epoch": 8.06, + "learning_rate": 4.597182596291013e-05, + "loss": 0.004, + "step": 45200 + }, + { + "epoch": 8.06, + "learning_rate": 4.5970934379457916e-05, + "loss": 0.004, + "step": 45210 + }, + { + "epoch": 8.06, + "learning_rate": 4.597004279600571e-05, + "loss": 0.0049, + "step": 45220 + }, + { + "epoch": 8.07, + "learning_rate": 4.59691512125535e-05, + "loss": 0.0038, + "step": 45230 + }, + { + "epoch": 8.07, + "learning_rate": 4.596825962910128e-05, + "loss": 0.0036, + "step": 45240 + }, + { + "epoch": 8.07, + "learning_rate": 4.5967368045649074e-05, + "loss": 0.004, + "step": 45250 + }, + { + "epoch": 8.07, + "learning_rate": 4.596647646219686e-05, + "loss": 0.0053, + "step": 45260 + }, + { + "epoch": 8.07, + "learning_rate": 4.596558487874466e-05, + "loss": 0.0057, + "step": 45270 + }, + { + "epoch": 8.07, + "learning_rate": 4.596469329529244e-05, + "loss": 0.0072, + "step": 45280 + }, + { + "epoch": 8.08, + "learning_rate": 4.596380171184023e-05, + "loss": 0.0051, + "step": 45290 + }, + { + "epoch": 8.08, + "learning_rate": 4.5962910128388024e-05, + "loss": 0.0055, + "step": 45300 + }, + { + "epoch": 8.08, + "learning_rate": 4.596201854493581e-05, + "loss": 0.0058, + "step": 45310 + }, + { + "epoch": 8.08, + "learning_rate": 4.59611269614836e-05, + "loss": 0.0041, + "step": 45320 + }, + { + "epoch": 8.08, + "learning_rate": 4.5960235378031384e-05, + "loss": 0.005, + "step": 45330 + }, + { + "epoch": 8.08, + "learning_rate": 4.5959343794579175e-05, + "loss": 0.0077, + "step": 45340 + }, + { + "epoch": 8.09, + "learning_rate": 4.595845221112696e-05, + "loss": 0.0052, + "step": 45350 + }, + { + "epoch": 8.09, + "learning_rate": 4.595756062767475e-05, + "loss": 0.0046, + "step": 45360 + }, + { + "epoch": 8.09, + "learning_rate": 4.595666904422254e-05, + "loss": 0.0035, + "step": 45370 + }, + { + "epoch": 8.09, + "learning_rate": 4.595577746077033e-05, + "loss": 0.0036, + "step": 45380 + }, + { + "epoch": 8.09, + "learning_rate": 4.5954885877318124e-05, + "loss": 0.0019, + "step": 45390 + }, + { + "epoch": 8.1, + "learning_rate": 4.595399429386591e-05, + "loss": 0.0071, + "step": 45400 + }, + { + "epoch": 8.1, + "learning_rate": 4.59531027104137e-05, + "loss": 0.0056, + "step": 45410 + }, + { + "epoch": 8.1, + "learning_rate": 4.5952211126961484e-05, + "loss": 0.0065, + "step": 45420 + }, + { + "epoch": 8.1, + "learning_rate": 4.5951319543509275e-05, + "loss": 0.0043, + "step": 45430 + }, + { + "epoch": 8.1, + "learning_rate": 4.595042796005706e-05, + "loss": 0.0061, + "step": 45440 + }, + { + "epoch": 8.1, + "learning_rate": 4.594953637660485e-05, + "loss": 0.0068, + "step": 45450 + }, + { + "epoch": 8.11, + "learning_rate": 4.594864479315264e-05, + "loss": 0.0072, + "step": 45460 + }, + { + "epoch": 8.11, + "learning_rate": 4.594775320970043e-05, + "loss": 0.0037, + "step": 45470 + }, + { + "epoch": 8.11, + "learning_rate": 4.594686162624822e-05, + "loss": 0.0032, + "step": 45480 + }, + { + "epoch": 8.11, + "learning_rate": 4.594597004279601e-05, + "loss": 0.005, + "step": 45490 + }, + { + "epoch": 8.11, + "learning_rate": 4.59450784593438e-05, + "loss": 0.0047, + "step": 45500 + }, + { + "epoch": 8.12, + "learning_rate": 4.5944186875891585e-05, + "loss": 0.0044, + "step": 45510 + }, + { + "epoch": 8.12, + "learning_rate": 4.5943295292439376e-05, + "loss": 0.0054, + "step": 45520 + }, + { + "epoch": 8.12, + "learning_rate": 4.594240370898717e-05, + "loss": 0.0055, + "step": 45530 + }, + { + "epoch": 8.12, + "learning_rate": 4.594151212553495e-05, + "loss": 0.0038, + "step": 45540 + }, + { + "epoch": 8.12, + "learning_rate": 4.594062054208274e-05, + "loss": 0.0044, + "step": 45550 + }, + { + "epoch": 8.12, + "learning_rate": 4.593972895863053e-05, + "loss": 0.0051, + "step": 45560 + }, + { + "epoch": 8.13, + "learning_rate": 4.593883737517832e-05, + "loss": 0.0063, + "step": 45570 + }, + { + "epoch": 8.13, + "learning_rate": 4.59379457917261e-05, + "loss": 0.0035, + "step": 45580 + }, + { + "epoch": 8.13, + "learning_rate": 4.5937054208273894e-05, + "loss": 0.0036, + "step": 45590 + }, + { + "epoch": 8.13, + "learning_rate": 4.5936162624821685e-05, + "loss": 0.0066, + "step": 45600 + }, + { + "epoch": 8.13, + "learning_rate": 4.5935271041369477e-05, + "loss": 0.0036, + "step": 45610 + }, + { + "epoch": 8.13, + "learning_rate": 4.593437945791727e-05, + "loss": 0.006, + "step": 45620 + }, + { + "epoch": 8.14, + "learning_rate": 4.593348787446505e-05, + "loss": 0.0039, + "step": 45630 + }, + { + "epoch": 8.14, + "learning_rate": 4.5932596291012843e-05, + "loss": 0.0032, + "step": 45640 + }, + { + "epoch": 8.14, + "learning_rate": 4.593170470756063e-05, + "loss": 0.0064, + "step": 45650 + }, + { + "epoch": 8.14, + "learning_rate": 4.593081312410842e-05, + "loss": 0.0041, + "step": 45660 + }, + { + "epoch": 8.14, + "learning_rate": 4.5929921540656203e-05, + "loss": 0.0037, + "step": 45670 + }, + { + "epoch": 8.15, + "learning_rate": 4.5929029957203995e-05, + "loss": 0.005, + "step": 45680 + }, + { + "epoch": 8.15, + "learning_rate": 4.5928138373751786e-05, + "loss": 0.0033, + "step": 45690 + }, + { + "epoch": 8.15, + "learning_rate": 4.592724679029957e-05, + "loss": 0.003, + "step": 45700 + }, + { + "epoch": 8.15, + "learning_rate": 4.592635520684737e-05, + "loss": 0.0036, + "step": 45710 + }, + { + "epoch": 8.15, + "learning_rate": 4.592546362339515e-05, + "loss": 0.0047, + "step": 45720 + }, + { + "epoch": 8.15, + "learning_rate": 4.5924572039942944e-05, + "loss": 0.0045, + "step": 45730 + }, + { + "epoch": 8.16, + "learning_rate": 4.592368045649073e-05, + "loss": 0.0024, + "step": 45740 + }, + { + "epoch": 8.16, + "learning_rate": 4.592278887303852e-05, + "loss": 0.0028, + "step": 45750 + }, + { + "epoch": 8.16, + "learning_rate": 4.592189728958631e-05, + "loss": 0.0049, + "step": 45760 + }, + { + "epoch": 8.16, + "learning_rate": 4.5921005706134095e-05, + "loss": 0.0068, + "step": 45770 + }, + { + "epoch": 8.16, + "learning_rate": 4.592020328102711e-05, + "loss": 0.0056, + "step": 45780 + }, + { + "epoch": 8.17, + "learning_rate": 4.5919311697574894e-05, + "loss": 0.0036, + "step": 45790 + }, + { + "epoch": 8.17, + "learning_rate": 4.5918420114122686e-05, + "loss": 0.0072, + "step": 45800 + }, + { + "epoch": 8.17, + "learning_rate": 4.591752853067047e-05, + "loss": 0.008, + "step": 45810 + }, + { + "epoch": 8.17, + "learning_rate": 4.591663694721826e-05, + "loss": 0.005, + "step": 45820 + }, + { + "epoch": 8.17, + "learning_rate": 4.591574536376605e-05, + "loss": 0.0058, + "step": 45830 + }, + { + "epoch": 8.17, + "learning_rate": 4.591485378031384e-05, + "loss": 0.0039, + "step": 45840 + }, + { + "epoch": 8.18, + "learning_rate": 4.591396219686163e-05, + "loss": 0.0085, + "step": 45850 + }, + { + "epoch": 8.18, + "learning_rate": 4.591307061340941e-05, + "loss": 0.0046, + "step": 45860 + }, + { + "epoch": 8.18, + "learning_rate": 4.591217902995721e-05, + "loss": 0.004, + "step": 45870 + }, + { + "epoch": 8.18, + "learning_rate": 4.5911287446504995e-05, + "loss": 0.0047, + "step": 45880 + }, + { + "epoch": 8.18, + "learning_rate": 4.5910395863052786e-05, + "loss": 0.0039, + "step": 45890 + }, + { + "epoch": 8.18, + "learning_rate": 4.590950427960057e-05, + "loss": 0.0028, + "step": 45900 + }, + { + "epoch": 8.19, + "learning_rate": 4.590861269614836e-05, + "loss": 0.0038, + "step": 45910 + }, + { + "epoch": 8.19, + "learning_rate": 4.590772111269615e-05, + "loss": 0.0054, + "step": 45920 + }, + { + "epoch": 8.19, + "learning_rate": 4.590682952924394e-05, + "loss": 0.0079, + "step": 45930 + }, + { + "epoch": 8.19, + "learning_rate": 4.590593794579173e-05, + "loss": 0.0081, + "step": 45940 + }, + { + "epoch": 8.19, + "learning_rate": 4.590504636233951e-05, + "loss": 0.0038, + "step": 45950 + }, + { + "epoch": 8.2, + "learning_rate": 4.5904154778887304e-05, + "loss": 0.0076, + "step": 45960 + }, + { + "epoch": 8.2, + "learning_rate": 4.5903263195435096e-05, + "loss": 0.0047, + "step": 45970 + }, + { + "epoch": 8.2, + "learning_rate": 4.590237161198289e-05, + "loss": 0.0048, + "step": 45980 + }, + { + "epoch": 8.2, + "learning_rate": 4.590148002853067e-05, + "loss": 0.0047, + "step": 45990 + }, + { + "epoch": 8.2, + "learning_rate": 4.590058844507846e-05, + "loss": 0.0087, + "step": 46000 + }, + { + "epoch": 8.2, + "learning_rate": 4.5899696861626254e-05, + "loss": 0.0034, + "step": 46010 + }, + { + "epoch": 8.21, + "learning_rate": 4.589880527817404e-05, + "loss": 0.0051, + "step": 46020 + }, + { + "epoch": 8.21, + "learning_rate": 4.589791369472183e-05, + "loss": 0.0051, + "step": 46030 + }, + { + "epoch": 8.21, + "learning_rate": 4.5897022111269614e-05, + "loss": 0.0082, + "step": 46040 + }, + { + "epoch": 8.21, + "learning_rate": 4.5896130527817405e-05, + "loss": 0.0041, + "step": 46050 + }, + { + "epoch": 8.21, + "learning_rate": 4.5895238944365196e-05, + "loss": 0.0035, + "step": 46060 + }, + { + "epoch": 8.22, + "learning_rate": 4.589434736091298e-05, + "loss": 0.0034, + "step": 46070 + }, + { + "epoch": 8.22, + "learning_rate": 4.589345577746077e-05, + "loss": 0.0049, + "step": 46080 + }, + { + "epoch": 8.22, + "learning_rate": 4.589256419400856e-05, + "loss": 0.0028, + "step": 46090 + }, + { + "epoch": 8.22, + "learning_rate": 4.5891672610556354e-05, + "loss": 0.003, + "step": 46100 + }, + { + "epoch": 8.22, + "learning_rate": 4.589078102710414e-05, + "loss": 0.0051, + "step": 46110 + }, + { + "epoch": 8.22, + "learning_rate": 4.588988944365193e-05, + "loss": 0.0045, + "step": 46120 + }, + { + "epoch": 8.23, + "learning_rate": 4.5888997860199714e-05, + "loss": 0.0106, + "step": 46130 + }, + { + "epoch": 8.23, + "learning_rate": 4.5888106276747506e-05, + "loss": 0.005, + "step": 46140 + }, + { + "epoch": 8.23, + "learning_rate": 4.58872146932953e-05, + "loss": 0.0071, + "step": 46150 + }, + { + "epoch": 8.23, + "learning_rate": 4.588632310984308e-05, + "loss": 0.0061, + "step": 46160 + }, + { + "epoch": 8.23, + "learning_rate": 4.588543152639087e-05, + "loss": 0.005, + "step": 46170 + }, + { + "epoch": 8.23, + "learning_rate": 4.588453994293866e-05, + "loss": 0.0062, + "step": 46180 + }, + { + "epoch": 8.24, + "learning_rate": 4.588364835948645e-05, + "loss": 0.0038, + "step": 46190 + }, + { + "epoch": 8.24, + "learning_rate": 4.588275677603424e-05, + "loss": 0.0043, + "step": 46200 + }, + { + "epoch": 8.24, + "learning_rate": 4.588186519258203e-05, + "loss": 0.0029, + "step": 46210 + }, + { + "epoch": 8.24, + "learning_rate": 4.5880973609129815e-05, + "loss": 0.0063, + "step": 46220 + }, + { + "epoch": 8.24, + "learning_rate": 4.5880082025677606e-05, + "loss": 0.0028, + "step": 46230 + }, + { + "epoch": 8.25, + "learning_rate": 4.58791904422254e-05, + "loss": 0.0038, + "step": 46240 + }, + { + "epoch": 8.25, + "learning_rate": 4.587829885877318e-05, + "loss": 0.0039, + "step": 46250 + }, + { + "epoch": 8.25, + "learning_rate": 4.587740727532097e-05, + "loss": 0.0027, + "step": 46260 + }, + { + "epoch": 8.25, + "learning_rate": 4.587651569186876e-05, + "loss": 0.0026, + "step": 46270 + }, + { + "epoch": 8.25, + "learning_rate": 4.587562410841655e-05, + "loss": 0.0047, + "step": 46280 + }, + { + "epoch": 8.25, + "learning_rate": 4.587473252496434e-05, + "loss": 0.0038, + "step": 46290 + }, + { + "epoch": 8.26, + "learning_rate": 4.5873840941512124e-05, + "loss": 0.0051, + "step": 46300 + }, + { + "epoch": 8.26, + "learning_rate": 4.587294935805992e-05, + "loss": 0.0049, + "step": 46310 + }, + { + "epoch": 8.26, + "learning_rate": 4.587205777460771e-05, + "loss": 0.0036, + "step": 46320 + }, + { + "epoch": 8.26, + "learning_rate": 4.58711661911555e-05, + "loss": 0.0058, + "step": 46330 + }, + { + "epoch": 8.26, + "learning_rate": 4.587027460770328e-05, + "loss": 0.0038, + "step": 46340 + }, + { + "epoch": 8.26, + "learning_rate": 4.5869383024251073e-05, + "loss": 0.003, + "step": 46350 + }, + { + "epoch": 8.27, + "learning_rate": 4.586849144079886e-05, + "loss": 0.0034, + "step": 46360 + }, + { + "epoch": 8.27, + "learning_rate": 4.586759985734665e-05, + "loss": 0.0081, + "step": 46370 + }, + { + "epoch": 8.27, + "learning_rate": 4.586670827389444e-05, + "loss": 0.0038, + "step": 46380 + }, + { + "epoch": 8.27, + "learning_rate": 4.5865816690442225e-05, + "loss": 0.0074, + "step": 46390 + }, + { + "epoch": 8.27, + "learning_rate": 4.5864925106990016e-05, + "loss": 0.0023, + "step": 46400 + }, + { + "epoch": 8.28, + "learning_rate": 4.58640335235378e-05, + "loss": 0.0041, + "step": 46410 + }, + { + "epoch": 8.28, + "learning_rate": 4.58631419400856e-05, + "loss": 0.0043, + "step": 46420 + }, + { + "epoch": 8.28, + "learning_rate": 4.586225035663338e-05, + "loss": 0.0034, + "step": 46430 + }, + { + "epoch": 8.28, + "learning_rate": 4.5861358773181174e-05, + "loss": 0.0047, + "step": 46440 + }, + { + "epoch": 8.28, + "learning_rate": 4.586046718972896e-05, + "loss": 0.0031, + "step": 46450 + }, + { + "epoch": 8.28, + "learning_rate": 4.585957560627675e-05, + "loss": 0.0047, + "step": 46460 + }, + { + "epoch": 8.29, + "learning_rate": 4.585868402282454e-05, + "loss": 0.0065, + "step": 46470 + }, + { + "epoch": 8.29, + "learning_rate": 4.5857792439372325e-05, + "loss": 0.003, + "step": 46480 + }, + { + "epoch": 8.29, + "learning_rate": 4.5856900855920117e-05, + "loss": 0.0059, + "step": 46490 + }, + { + "epoch": 8.29, + "learning_rate": 4.58560092724679e-05, + "loss": 0.0025, + "step": 46500 + }, + { + "epoch": 8.29, + "learning_rate": 4.585511768901569e-05, + "loss": 0.0038, + "step": 46510 + }, + { + "epoch": 8.3, + "learning_rate": 4.5854226105563483e-05, + "loss": 0.009, + "step": 46520 + }, + { + "epoch": 8.3, + "learning_rate": 4.5853334522111275e-05, + "loss": 0.005, + "step": 46530 + }, + { + "epoch": 8.3, + "learning_rate": 4.5852442938659066e-05, + "loss": 0.0028, + "step": 46540 + }, + { + "epoch": 8.3, + "learning_rate": 4.585155135520685e-05, + "loss": 0.0067, + "step": 46550 + }, + { + "epoch": 8.3, + "learning_rate": 4.585065977175464e-05, + "loss": 0.0065, + "step": 46560 + }, + { + "epoch": 8.3, + "learning_rate": 4.5849768188302426e-05, + "loss": 0.0051, + "step": 46570 + }, + { + "epoch": 8.31, + "learning_rate": 4.584887660485022e-05, + "loss": 0.0048, + "step": 46580 + }, + { + "epoch": 8.31, + "learning_rate": 4.5847985021398e-05, + "loss": 0.0032, + "step": 46590 + }, + { + "epoch": 8.31, + "learning_rate": 4.584709343794579e-05, + "loss": 0.0025, + "step": 46600 + }, + { + "epoch": 8.31, + "learning_rate": 4.5846201854493584e-05, + "loss": 0.0053, + "step": 46610 + }, + { + "epoch": 8.31, + "learning_rate": 4.584531027104137e-05, + "loss": 0.0041, + "step": 46620 + }, + { + "epoch": 8.31, + "learning_rate": 4.584441868758916e-05, + "loss": 0.0071, + "step": 46630 + }, + { + "epoch": 8.32, + "learning_rate": 4.584352710413695e-05, + "loss": 0.0022, + "step": 46640 + }, + { + "epoch": 8.32, + "learning_rate": 4.584263552068474e-05, + "loss": 0.0025, + "step": 46650 + }, + { + "epoch": 8.32, + "learning_rate": 4.5841743937232526e-05, + "loss": 0.0065, + "step": 46660 + }, + { + "epoch": 8.32, + "learning_rate": 4.584085235378032e-05, + "loss": 0.0051, + "step": 46670 + }, + { + "epoch": 8.32, + "learning_rate": 4.58399607703281e-05, + "loss": 0.0053, + "step": 46680 + }, + { + "epoch": 8.33, + "learning_rate": 4.583906918687589e-05, + "loss": 0.0041, + "step": 46690 + }, + { + "epoch": 8.33, + "learning_rate": 4.5838177603423685e-05, + "loss": 0.0043, + "step": 46700 + }, + { + "epoch": 8.33, + "learning_rate": 4.583728601997147e-05, + "loss": 0.0039, + "step": 46710 + }, + { + "epoch": 8.33, + "learning_rate": 4.583639443651926e-05, + "loss": 0.005, + "step": 46720 + }, + { + "epoch": 8.33, + "learning_rate": 4.5835502853067045e-05, + "loss": 0.0036, + "step": 46730 + }, + { + "epoch": 8.33, + "learning_rate": 4.5834611269614836e-05, + "loss": 0.0075, + "step": 46740 + }, + { + "epoch": 8.34, + "learning_rate": 4.583371968616263e-05, + "loss": 0.0032, + "step": 46750 + }, + { + "epoch": 8.34, + "learning_rate": 4.583282810271042e-05, + "loss": 0.0031, + "step": 46760 + }, + { + "epoch": 8.34, + "learning_rate": 4.583193651925821e-05, + "loss": 0.0072, + "step": 46770 + }, + { + "epoch": 8.34, + "learning_rate": 4.5831044935805994e-05, + "loss": 0.0039, + "step": 46780 + }, + { + "epoch": 8.34, + "learning_rate": 4.5830153352353785e-05, + "loss": 0.0052, + "step": 46790 + }, + { + "epoch": 8.35, + "learning_rate": 4.582926176890157e-05, + "loss": 0.0048, + "step": 46800 + }, + { + "epoch": 8.35, + "learning_rate": 4.582837018544936e-05, + "loss": 0.0036, + "step": 46810 + }, + { + "epoch": 8.35, + "learning_rate": 4.5827478601997145e-05, + "loss": 0.0058, + "step": 46820 + }, + { + "epoch": 8.35, + "learning_rate": 4.5826587018544936e-05, + "loss": 0.0038, + "step": 46830 + }, + { + "epoch": 8.35, + "learning_rate": 4.582569543509273e-05, + "loss": 0.0039, + "step": 46840 + }, + { + "epoch": 8.35, + "learning_rate": 4.582480385164051e-05, + "loss": 0.0051, + "step": 46850 + }, + { + "epoch": 8.36, + "learning_rate": 4.582391226818831e-05, + "loss": 0.0038, + "step": 46860 + }, + { + "epoch": 8.36, + "learning_rate": 4.5823020684736094e-05, + "loss": 0.0032, + "step": 46870 + }, + { + "epoch": 8.36, + "learning_rate": 4.5822129101283886e-05, + "loss": 0.0031, + "step": 46880 + }, + { + "epoch": 8.36, + "learning_rate": 4.582123751783167e-05, + "loss": 0.0068, + "step": 46890 + }, + { + "epoch": 8.36, + "learning_rate": 4.582034593437946e-05, + "loss": 0.0032, + "step": 46900 + }, + { + "epoch": 8.36, + "learning_rate": 4.5819454350927246e-05, + "loss": 0.0074, + "step": 46910 + }, + { + "epoch": 8.37, + "learning_rate": 4.581856276747504e-05, + "loss": 0.0049, + "step": 46920 + }, + { + "epoch": 8.37, + "learning_rate": 4.581767118402283e-05, + "loss": 0.0035, + "step": 46930 + }, + { + "epoch": 8.37, + "learning_rate": 4.581677960057061e-05, + "loss": 0.0074, + "step": 46940 + }, + { + "epoch": 8.37, + "learning_rate": 4.5815888017118404e-05, + "loss": 0.0062, + "step": 46950 + }, + { + "epoch": 8.37, + "learning_rate": 4.581499643366619e-05, + "loss": 0.0074, + "step": 46960 + }, + { + "epoch": 8.38, + "learning_rate": 4.5814104850213986e-05, + "loss": 0.0038, + "step": 46970 + }, + { + "epoch": 8.38, + "learning_rate": 4.581321326676177e-05, + "loss": 0.0033, + "step": 46980 + }, + { + "epoch": 8.38, + "learning_rate": 4.581232168330956e-05, + "loss": 0.0085, + "step": 46990 + }, + { + "epoch": 8.38, + "learning_rate": 4.581143009985735e-05, + "loss": 0.0056, + "step": 47000 + }, + { + "epoch": 8.38, + "learning_rate": 4.581053851640514e-05, + "loss": 0.0056, + "step": 47010 + }, + { + "epoch": 8.38, + "learning_rate": 4.580964693295293e-05, + "loss": 0.0076, + "step": 47020 + }, + { + "epoch": 8.39, + "learning_rate": 4.580875534950071e-05, + "loss": 0.0059, + "step": 47030 + }, + { + "epoch": 8.39, + "learning_rate": 4.5807863766048504e-05, + "loss": 0.0037, + "step": 47040 + }, + { + "epoch": 8.39, + "learning_rate": 4.580697218259629e-05, + "loss": 0.0041, + "step": 47050 + }, + { + "epoch": 8.39, + "learning_rate": 4.580608059914408e-05, + "loss": 0.0028, + "step": 47060 + }, + { + "epoch": 8.39, + "learning_rate": 4.580518901569187e-05, + "loss": 0.0054, + "step": 47070 + }, + { + "epoch": 8.4, + "learning_rate": 4.580429743223966e-05, + "loss": 0.006, + "step": 47080 + }, + { + "epoch": 8.4, + "learning_rate": 4.5803405848787454e-05, + "loss": 0.0048, + "step": 47090 + }, + { + "epoch": 8.4, + "learning_rate": 4.580251426533524e-05, + "loss": 0.0045, + "step": 47100 + }, + { + "epoch": 8.4, + "learning_rate": 4.580162268188303e-05, + "loss": 0.0065, + "step": 47110 + }, + { + "epoch": 8.4, + "learning_rate": 4.5800731098430814e-05, + "loss": 0.0043, + "step": 47120 + }, + { + "epoch": 8.4, + "learning_rate": 4.5799839514978605e-05, + "loss": 0.0075, + "step": 47130 + }, + { + "epoch": 8.41, + "learning_rate": 4.579894793152639e-05, + "loss": 0.004, + "step": 47140 + }, + { + "epoch": 8.41, + "learning_rate": 4.579805634807418e-05, + "loss": 0.0081, + "step": 47150 + }, + { + "epoch": 8.41, + "learning_rate": 4.579716476462197e-05, + "loss": 0.0059, + "step": 47160 + }, + { + "epoch": 8.41, + "learning_rate": 4.5796273181169756e-05, + "loss": 0.005, + "step": 47170 + }, + { + "epoch": 8.41, + "learning_rate": 4.579538159771755e-05, + "loss": 0.0036, + "step": 47180 + }, + { + "epoch": 8.41, + "learning_rate": 4.579449001426534e-05, + "loss": 0.011, + "step": 47190 + }, + { + "epoch": 8.42, + "learning_rate": 4.579359843081313e-05, + "loss": 0.0071, + "step": 47200 + }, + { + "epoch": 8.42, + "learning_rate": 4.5792706847360914e-05, + "loss": 0.006, + "step": 47210 + }, + { + "epoch": 8.42, + "learning_rate": 4.5791815263908705e-05, + "loss": 0.0034, + "step": 47220 + }, + { + "epoch": 8.42, + "learning_rate": 4.57909236804565e-05, + "loss": 0.0038, + "step": 47230 + }, + { + "epoch": 8.42, + "learning_rate": 4.579003209700428e-05, + "loss": 0.0036, + "step": 47240 + }, + { + "epoch": 8.43, + "learning_rate": 4.578914051355207e-05, + "loss": 0.0043, + "step": 47250 + }, + { + "epoch": 8.43, + "learning_rate": 4.578824893009986e-05, + "loss": 0.0039, + "step": 47260 + }, + { + "epoch": 8.43, + "learning_rate": 4.578735734664765e-05, + "loss": 0.0031, + "step": 47270 + }, + { + "epoch": 8.43, + "learning_rate": 4.578646576319543e-05, + "loss": 0.0078, + "step": 47280 + }, + { + "epoch": 8.43, + "learning_rate": 4.5785574179743224e-05, + "loss": 0.0051, + "step": 47290 + }, + { + "epoch": 8.43, + "learning_rate": 4.5784682596291015e-05, + "loss": 0.0065, + "step": 47300 + }, + { + "epoch": 8.44, + "learning_rate": 4.5783791012838806e-05, + "loss": 0.0064, + "step": 47310 + }, + { + "epoch": 8.44, + "learning_rate": 4.57828994293866e-05, + "loss": 0.0063, + "step": 47320 + }, + { + "epoch": 8.44, + "learning_rate": 4.578200784593438e-05, + "loss": 0.0068, + "step": 47330 + }, + { + "epoch": 8.44, + "learning_rate": 4.578111626248217e-05, + "loss": 0.0055, + "step": 47340 + }, + { + "epoch": 8.44, + "learning_rate": 4.578022467902996e-05, + "loss": 0.0071, + "step": 47350 + }, + { + "epoch": 8.45, + "learning_rate": 4.577933309557775e-05, + "loss": 0.0054, + "step": 47360 + }, + { + "epoch": 8.45, + "learning_rate": 4.577844151212553e-05, + "loss": 0.0045, + "step": 47370 + }, + { + "epoch": 8.45, + "learning_rate": 4.5777549928673324e-05, + "loss": 0.0068, + "step": 47380 + }, + { + "epoch": 8.45, + "learning_rate": 4.5776658345221115e-05, + "loss": 0.005, + "step": 47390 + }, + { + "epoch": 8.45, + "learning_rate": 4.57757667617689e-05, + "loss": 0.0031, + "step": 47400 + }, + { + "epoch": 8.45, + "learning_rate": 4.57748751783167e-05, + "loss": 0.007, + "step": 47410 + }, + { + "epoch": 8.46, + "learning_rate": 4.577398359486448e-05, + "loss": 0.0035, + "step": 47420 + }, + { + "epoch": 8.46, + "learning_rate": 4.5773092011412273e-05, + "loss": 0.0068, + "step": 47430 + }, + { + "epoch": 8.46, + "learning_rate": 4.577220042796006e-05, + "loss": 0.004, + "step": 47440 + }, + { + "epoch": 8.46, + "learning_rate": 4.577130884450785e-05, + "loss": 0.0043, + "step": 47450 + }, + { + "epoch": 8.46, + "learning_rate": 4.577041726105564e-05, + "loss": 0.0069, + "step": 47460 + }, + { + "epoch": 8.46, + "learning_rate": 4.5769525677603425e-05, + "loss": 0.0018, + "step": 47470 + }, + { + "epoch": 8.47, + "learning_rate": 4.5768634094151216e-05, + "loss": 0.0043, + "step": 47480 + }, + { + "epoch": 8.47, + "learning_rate": 4.5767742510699e-05, + "loss": 0.0068, + "step": 47490 + }, + { + "epoch": 8.47, + "learning_rate": 4.576685092724679e-05, + "loss": 0.0046, + "step": 47500 + }, + { + "epoch": 8.47, + "learning_rate": 4.5765959343794576e-05, + "loss": 0.0043, + "step": 47510 + }, + { + "epoch": 8.47, + "learning_rate": 4.5765067760342374e-05, + "loss": 0.0045, + "step": 47520 + }, + { + "epoch": 8.48, + "learning_rate": 4.576417617689016e-05, + "loss": 0.0033, + "step": 47530 + }, + { + "epoch": 8.48, + "learning_rate": 4.576328459343795e-05, + "loss": 0.0043, + "step": 47540 + }, + { + "epoch": 8.48, + "learning_rate": 4.576239300998574e-05, + "loss": 0.006, + "step": 47550 + }, + { + "epoch": 8.48, + "learning_rate": 4.5761501426533525e-05, + "loss": 0.0044, + "step": 47560 + }, + { + "epoch": 8.48, + "learning_rate": 4.5760609843081317e-05, + "loss": 0.0063, + "step": 47570 + }, + { + "epoch": 8.48, + "learning_rate": 4.57597182596291e-05, + "loss": 0.0072, + "step": 47580 + }, + { + "epoch": 8.49, + "learning_rate": 4.575882667617689e-05, + "loss": 0.0058, + "step": 47590 + }, + { + "epoch": 8.49, + "learning_rate": 4.5757935092724677e-05, + "loss": 0.0028, + "step": 47600 + }, + { + "epoch": 8.49, + "learning_rate": 4.575704350927247e-05, + "loss": 0.0047, + "step": 47610 + }, + { + "epoch": 8.49, + "learning_rate": 4.575615192582026e-05, + "loss": 0.0056, + "step": 47620 + }, + { + "epoch": 8.49, + "learning_rate": 4.575526034236805e-05, + "loss": 0.0042, + "step": 47630 + }, + { + "epoch": 8.5, + "learning_rate": 4.575436875891584e-05, + "loss": 0.0043, + "step": 47640 + }, + { + "epoch": 8.5, + "learning_rate": 4.5753477175463626e-05, + "loss": 0.0046, + "step": 47650 + }, + { + "epoch": 8.5, + "learning_rate": 4.575258559201142e-05, + "loss": 0.0037, + "step": 47660 + }, + { + "epoch": 8.5, + "learning_rate": 4.57516940085592e-05, + "loss": 0.0027, + "step": 47670 + }, + { + "epoch": 8.5, + "learning_rate": 4.575080242510699e-05, + "loss": 0.003, + "step": 47680 + }, + { + "epoch": 8.5, + "learning_rate": 4.5749910841654784e-05, + "loss": 0.006, + "step": 47690 + }, + { + "epoch": 8.51, + "learning_rate": 4.574901925820257e-05, + "loss": 0.0036, + "step": 47700 + }, + { + "epoch": 8.51, + "learning_rate": 4.574812767475036e-05, + "loss": 0.0023, + "step": 47710 + }, + { + "epoch": 8.51, + "learning_rate": 4.5747236091298144e-05, + "loss": 0.0066, + "step": 47720 + }, + { + "epoch": 8.51, + "learning_rate": 4.5746344507845935e-05, + "loss": 0.0054, + "step": 47730 + }, + { + "epoch": 8.51, + "learning_rate": 4.5745452924393726e-05, + "loss": 0.0066, + "step": 47740 + }, + { + "epoch": 8.51, + "learning_rate": 4.574456134094152e-05, + "loss": 0.0057, + "step": 47750 + }, + { + "epoch": 8.52, + "learning_rate": 4.57436697574893e-05, + "loss": 0.0028, + "step": 47760 + }, + { + "epoch": 8.52, + "learning_rate": 4.574277817403709e-05, + "loss": 0.0067, + "step": 47770 + }, + { + "epoch": 8.52, + "learning_rate": 4.5741886590584884e-05, + "loss": 0.0055, + "step": 47780 + }, + { + "epoch": 8.52, + "learning_rate": 4.574099500713267e-05, + "loss": 0.0035, + "step": 47790 + }, + { + "epoch": 8.52, + "learning_rate": 4.574010342368046e-05, + "loss": 0.0032, + "step": 47800 + }, + { + "epoch": 8.53, + "learning_rate": 4.5739211840228245e-05, + "loss": 0.0045, + "step": 47810 + }, + { + "epoch": 8.53, + "learning_rate": 4.5738320256776036e-05, + "loss": 0.0026, + "step": 47820 + }, + { + "epoch": 8.53, + "learning_rate": 4.573742867332382e-05, + "loss": 0.0037, + "step": 47830 + }, + { + "epoch": 8.53, + "learning_rate": 4.573653708987161e-05, + "loss": 0.0041, + "step": 47840 + }, + { + "epoch": 8.53, + "learning_rate": 4.57356455064194e-05, + "loss": 0.0053, + "step": 47850 + }, + { + "epoch": 8.53, + "learning_rate": 4.5734753922967194e-05, + "loss": 0.0047, + "step": 47860 + }, + { + "epoch": 8.54, + "learning_rate": 4.5733862339514985e-05, + "loss": 0.0029, + "step": 47870 + }, + { + "epoch": 8.54, + "learning_rate": 4.573297075606277e-05, + "loss": 0.0045, + "step": 47880 + }, + { + "epoch": 8.54, + "learning_rate": 4.573207917261056e-05, + "loss": 0.0026, + "step": 47890 + }, + { + "epoch": 8.54, + "learning_rate": 4.5731187589158345e-05, + "loss": 0.0062, + "step": 47900 + }, + { + "epoch": 8.54, + "learning_rate": 4.5730296005706136e-05, + "loss": 0.006, + "step": 47910 + }, + { + "epoch": 8.54, + "learning_rate": 4.572940442225393e-05, + "loss": 0.0077, + "step": 47920 + }, + { + "epoch": 8.55, + "learning_rate": 4.572851283880171e-05, + "loss": 0.0059, + "step": 47930 + }, + { + "epoch": 8.55, + "learning_rate": 4.57276212553495e-05, + "loss": 0.0058, + "step": 47940 + }, + { + "epoch": 8.55, + "learning_rate": 4.572672967189729e-05, + "loss": 0.0072, + "step": 47950 + }, + { + "epoch": 8.55, + "learning_rate": 4.5725838088445086e-05, + "loss": 0.0028, + "step": 47960 + }, + { + "epoch": 8.55, + "learning_rate": 4.572494650499287e-05, + "loss": 0.004, + "step": 47970 + }, + { + "epoch": 8.56, + "learning_rate": 4.572405492154066e-05, + "loss": 0.0041, + "step": 47980 + }, + { + "epoch": 8.56, + "learning_rate": 4.5723163338088446e-05, + "loss": 0.0044, + "step": 47990 + }, + { + "epoch": 8.56, + "learning_rate": 4.572227175463624e-05, + "loss": 0.0053, + "step": 48000 + }, + { + "epoch": 8.56, + "learning_rate": 4.572138017118403e-05, + "loss": 0.0025, + "step": 48010 + }, + { + "epoch": 8.56, + "learning_rate": 4.572048858773181e-05, + "loss": 0.0088, + "step": 48020 + }, + { + "epoch": 8.56, + "learning_rate": 4.5719597004279604e-05, + "loss": 0.0034, + "step": 48030 + }, + { + "epoch": 8.57, + "learning_rate": 4.571870542082739e-05, + "loss": 0.0048, + "step": 48040 + }, + { + "epoch": 8.57, + "learning_rate": 4.571781383737518e-05, + "loss": 0.0035, + "step": 48050 + }, + { + "epoch": 8.57, + "learning_rate": 4.5716922253922964e-05, + "loss": 0.0039, + "step": 48060 + }, + { + "epoch": 8.57, + "learning_rate": 4.571603067047076e-05, + "loss": 0.0034, + "step": 48070 + }, + { + "epoch": 8.57, + "learning_rate": 4.5715139087018546e-05, + "loss": 0.0049, + "step": 48080 + }, + { + "epoch": 8.58, + "learning_rate": 4.571424750356634e-05, + "loss": 0.0025, + "step": 48090 + }, + { + "epoch": 8.58, + "learning_rate": 4.571335592011413e-05, + "loss": 0.0053, + "step": 48100 + }, + { + "epoch": 8.58, + "learning_rate": 4.571246433666191e-05, + "loss": 0.0042, + "step": 48110 + }, + { + "epoch": 8.58, + "learning_rate": 4.5711572753209704e-05, + "loss": 0.0033, + "step": 48120 + }, + { + "epoch": 8.58, + "learning_rate": 4.571068116975749e-05, + "loss": 0.0043, + "step": 48130 + }, + { + "epoch": 8.58, + "learning_rate": 4.570978958630528e-05, + "loss": 0.0052, + "step": 48140 + }, + { + "epoch": 8.59, + "learning_rate": 4.570889800285307e-05, + "loss": 0.0045, + "step": 48150 + }, + { + "epoch": 8.59, + "learning_rate": 4.5708006419400856e-05, + "loss": 0.0057, + "step": 48160 + }, + { + "epoch": 8.59, + "learning_rate": 4.570720399429387e-05, + "loss": 0.0064, + "step": 48170 + }, + { + "epoch": 8.59, + "learning_rate": 4.5706312410841655e-05, + "loss": 0.0041, + "step": 48180 + }, + { + "epoch": 8.59, + "learning_rate": 4.5705420827389446e-05, + "loss": 0.0027, + "step": 48190 + }, + { + "epoch": 8.59, + "learning_rate": 4.570452924393723e-05, + "loss": 0.0042, + "step": 48200 + }, + { + "epoch": 8.6, + "learning_rate": 4.570363766048502e-05, + "loss": 0.0032, + "step": 48210 + }, + { + "epoch": 8.6, + "learning_rate": 4.570274607703281e-05, + "loss": 0.0046, + "step": 48220 + }, + { + "epoch": 8.6, + "learning_rate": 4.5701854493580604e-05, + "loss": 0.0039, + "step": 48230 + }, + { + "epoch": 8.6, + "learning_rate": 4.5700962910128395e-05, + "loss": 0.0038, + "step": 48240 + }, + { + "epoch": 8.6, + "learning_rate": 4.570007132667618e-05, + "loss": 0.0046, + "step": 48250 + }, + { + "epoch": 8.61, + "learning_rate": 4.569917974322397e-05, + "loss": 0.0066, + "step": 48260 + }, + { + "epoch": 8.61, + "learning_rate": 4.5698288159771755e-05, + "loss": 0.0037, + "step": 48270 + }, + { + "epoch": 8.61, + "learning_rate": 4.5697396576319547e-05, + "loss": 0.004, + "step": 48280 + }, + { + "epoch": 8.61, + "learning_rate": 4.569650499286733e-05, + "loss": 0.0059, + "step": 48290 + }, + { + "epoch": 8.61, + "learning_rate": 4.569561340941512e-05, + "loss": 0.0035, + "step": 48300 + }, + { + "epoch": 8.61, + "learning_rate": 4.5694721825962913e-05, + "loss": 0.0066, + "step": 48310 + }, + { + "epoch": 8.62, + "learning_rate": 4.56938302425107e-05, + "loss": 0.0049, + "step": 48320 + }, + { + "epoch": 8.62, + "learning_rate": 4.569293865905849e-05, + "loss": 0.0045, + "step": 48330 + }, + { + "epoch": 8.62, + "learning_rate": 4.569204707560628e-05, + "loss": 0.0037, + "step": 48340 + }, + { + "epoch": 8.62, + "learning_rate": 4.569115549215407e-05, + "loss": 0.0059, + "step": 48350 + }, + { + "epoch": 8.62, + "learning_rate": 4.5690263908701856e-05, + "loss": 0.0072, + "step": 48360 + }, + { + "epoch": 8.63, + "learning_rate": 4.568937232524965e-05, + "loss": 0.0047, + "step": 48370 + }, + { + "epoch": 8.63, + "learning_rate": 4.568848074179743e-05, + "loss": 0.006, + "step": 48380 + }, + { + "epoch": 8.63, + "learning_rate": 4.568758915834522e-05, + "loss": 0.0056, + "step": 48390 + }, + { + "epoch": 8.63, + "learning_rate": 4.5686697574893014e-05, + "loss": 0.0043, + "step": 48400 + }, + { + "epoch": 8.63, + "learning_rate": 4.56858059914408e-05, + "loss": 0.0059, + "step": 48410 + }, + { + "epoch": 8.63, + "learning_rate": 4.568491440798859e-05, + "loss": 0.0037, + "step": 48420 + }, + { + "epoch": 8.64, + "learning_rate": 4.5684022824536374e-05, + "loss": 0.0037, + "step": 48430 + }, + { + "epoch": 8.64, + "learning_rate": 4.5683131241084165e-05, + "loss": 0.0047, + "step": 48440 + }, + { + "epoch": 8.64, + "learning_rate": 4.5682239657631957e-05, + "loss": 0.0048, + "step": 48450 + }, + { + "epoch": 8.64, + "learning_rate": 4.568134807417975e-05, + "loss": 0.0042, + "step": 48460 + }, + { + "epoch": 8.64, + "learning_rate": 4.568045649072754e-05, + "loss": 0.0045, + "step": 48470 + }, + { + "epoch": 8.64, + "learning_rate": 4.567956490727532e-05, + "loss": 0.0032, + "step": 48480 + }, + { + "epoch": 8.65, + "learning_rate": 4.5678673323823115e-05, + "loss": 0.0031, + "step": 48490 + }, + { + "epoch": 8.65, + "learning_rate": 4.56777817403709e-05, + "loss": 0.0043, + "step": 48500 + }, + { + "epoch": 8.65, + "learning_rate": 4.567689015691869e-05, + "loss": 0.0068, + "step": 48510 + }, + { + "epoch": 8.65, + "learning_rate": 4.5675998573466475e-05, + "loss": 0.0037, + "step": 48520 + }, + { + "epoch": 8.65, + "learning_rate": 4.5675106990014266e-05, + "loss": 0.0035, + "step": 48530 + }, + { + "epoch": 8.66, + "learning_rate": 4.567421540656206e-05, + "loss": 0.0052, + "step": 48540 + }, + { + "epoch": 8.66, + "learning_rate": 4.567332382310984e-05, + "loss": 0.0045, + "step": 48550 + }, + { + "epoch": 8.66, + "learning_rate": 4.567243223965764e-05, + "loss": 0.0062, + "step": 48560 + }, + { + "epoch": 8.66, + "learning_rate": 4.5671540656205424e-05, + "loss": 0.0068, + "step": 48570 + }, + { + "epoch": 8.66, + "learning_rate": 4.5670649072753215e-05, + "loss": 0.0049, + "step": 48580 + }, + { + "epoch": 8.66, + "learning_rate": 4.5669757489301e-05, + "loss": 0.0046, + "step": 48590 + }, + { + "epoch": 8.67, + "learning_rate": 4.566886590584879e-05, + "loss": 0.003, + "step": 48600 + }, + { + "epoch": 8.67, + "learning_rate": 4.5667974322396575e-05, + "loss": 0.0048, + "step": 48610 + }, + { + "epoch": 8.67, + "learning_rate": 4.5667082738944366e-05, + "loss": 0.0042, + "step": 48620 + }, + { + "epoch": 8.67, + "learning_rate": 4.566619115549216e-05, + "loss": 0.0058, + "step": 48630 + }, + { + "epoch": 8.67, + "learning_rate": 4.566529957203994e-05, + "loss": 0.0037, + "step": 48640 + }, + { + "epoch": 8.68, + "learning_rate": 4.566440798858773e-05, + "loss": 0.0046, + "step": 48650 + }, + { + "epoch": 8.68, + "learning_rate": 4.566351640513552e-05, + "loss": 0.003, + "step": 48660 + }, + { + "epoch": 8.68, + "learning_rate": 4.5662624821683316e-05, + "loss": 0.0032, + "step": 48670 + }, + { + "epoch": 8.68, + "learning_rate": 4.56617332382311e-05, + "loss": 0.004, + "step": 48680 + }, + { + "epoch": 8.68, + "learning_rate": 4.566084165477889e-05, + "loss": 0.0042, + "step": 48690 + }, + { + "epoch": 8.68, + "learning_rate": 4.565995007132668e-05, + "loss": 0.0033, + "step": 48700 + }, + { + "epoch": 8.69, + "learning_rate": 4.565905848787447e-05, + "loss": 0.0057, + "step": 48710 + }, + { + "epoch": 8.69, + "learning_rate": 4.565816690442226e-05, + "loss": 0.0076, + "step": 48720 + }, + { + "epoch": 8.69, + "learning_rate": 4.565727532097004e-05, + "loss": 0.0044, + "step": 48730 + }, + { + "epoch": 8.69, + "learning_rate": 4.5656383737517834e-05, + "loss": 0.0093, + "step": 48740 + }, + { + "epoch": 8.69, + "learning_rate": 4.565549215406562e-05, + "loss": 0.0079, + "step": 48750 + }, + { + "epoch": 8.69, + "learning_rate": 4.565460057061341e-05, + "loss": 0.0063, + "step": 48760 + }, + { + "epoch": 8.7, + "learning_rate": 4.56537089871612e-05, + "loss": 0.0048, + "step": 48770 + }, + { + "epoch": 8.7, + "learning_rate": 4.565281740370899e-05, + "loss": 0.0045, + "step": 48780 + }, + { + "epoch": 8.7, + "learning_rate": 4.565192582025678e-05, + "loss": 0.0049, + "step": 48790 + }, + { + "epoch": 8.7, + "learning_rate": 4.565103423680457e-05, + "loss": 0.0044, + "step": 48800 + }, + { + "epoch": 8.7, + "learning_rate": 4.565014265335236e-05, + "loss": 0.0044, + "step": 48810 + }, + { + "epoch": 8.71, + "learning_rate": 4.564925106990014e-05, + "loss": 0.0064, + "step": 48820 + }, + { + "epoch": 8.71, + "learning_rate": 4.5648359486447934e-05, + "loss": 0.0042, + "step": 48830 + }, + { + "epoch": 8.71, + "learning_rate": 4.564746790299572e-05, + "loss": 0.0037, + "step": 48840 + }, + { + "epoch": 8.71, + "learning_rate": 4.564657631954351e-05, + "loss": 0.0046, + "step": 48850 + }, + { + "epoch": 8.71, + "learning_rate": 4.56456847360913e-05, + "loss": 0.004, + "step": 48860 + }, + { + "epoch": 8.71, + "learning_rate": 4.5644793152639086e-05, + "loss": 0.0053, + "step": 48870 + }, + { + "epoch": 8.72, + "learning_rate": 4.564390156918688e-05, + "loss": 0.004, + "step": 48880 + }, + { + "epoch": 8.72, + "learning_rate": 4.564300998573467e-05, + "loss": 0.0035, + "step": 48890 + }, + { + "epoch": 8.72, + "learning_rate": 4.564211840228246e-05, + "loss": 0.0053, + "step": 48900 + }, + { + "epoch": 8.72, + "learning_rate": 4.5641226818830244e-05, + "loss": 0.0071, + "step": 48910 + }, + { + "epoch": 8.72, + "learning_rate": 4.5640335235378035e-05, + "loss": 0.0029, + "step": 48920 + }, + { + "epoch": 8.73, + "learning_rate": 4.5639443651925826e-05, + "loss": 0.0028, + "step": 48930 + }, + { + "epoch": 8.73, + "learning_rate": 4.563855206847361e-05, + "loss": 0.0088, + "step": 48940 + }, + { + "epoch": 8.73, + "learning_rate": 4.56376604850214e-05, + "loss": 0.0058, + "step": 48950 + }, + { + "epoch": 8.73, + "learning_rate": 4.5636768901569186e-05, + "loss": 0.0026, + "step": 48960 + }, + { + "epoch": 8.73, + "learning_rate": 4.563587731811698e-05, + "loss": 0.0061, + "step": 48970 + }, + { + "epoch": 8.73, + "learning_rate": 4.563498573466476e-05, + "loss": 0.0038, + "step": 48980 + }, + { + "epoch": 8.74, + "learning_rate": 4.563409415121255e-05, + "loss": 0.0106, + "step": 48990 + }, + { + "epoch": 8.74, + "learning_rate": 4.5633202567760344e-05, + "loss": 0.004, + "step": 49000 + }, + { + "epoch": 8.74, + "learning_rate": 4.5632310984308136e-05, + "loss": 0.0064, + "step": 49010 + }, + { + "epoch": 8.74, + "learning_rate": 4.563141940085593e-05, + "loss": 0.0061, + "step": 49020 + }, + { + "epoch": 8.74, + "learning_rate": 4.563052781740371e-05, + "loss": 0.0069, + "step": 49030 + }, + { + "epoch": 8.74, + "learning_rate": 4.56296362339515e-05, + "loss": 0.0046, + "step": 49040 + }, + { + "epoch": 8.75, + "learning_rate": 4.562874465049929e-05, + "loss": 0.0069, + "step": 49050 + }, + { + "epoch": 8.75, + "learning_rate": 4.562785306704708e-05, + "loss": 0.0032, + "step": 49060 + }, + { + "epoch": 8.75, + "learning_rate": 4.562696148359486e-05, + "loss": 0.005, + "step": 49070 + }, + { + "epoch": 8.75, + "learning_rate": 4.5626069900142654e-05, + "loss": 0.0062, + "step": 49080 + }, + { + "epoch": 8.75, + "learning_rate": 4.5625178316690445e-05, + "loss": 0.0069, + "step": 49090 + }, + { + "epoch": 8.76, + "learning_rate": 4.562428673323823e-05, + "loss": 0.0028, + "step": 49100 + }, + { + "epoch": 8.76, + "learning_rate": 4.562339514978603e-05, + "loss": 0.0041, + "step": 49110 + }, + { + "epoch": 8.76, + "learning_rate": 4.562250356633381e-05, + "loss": 0.0051, + "step": 49120 + }, + { + "epoch": 8.76, + "learning_rate": 4.56216119828816e-05, + "loss": 0.006, + "step": 49130 + }, + { + "epoch": 8.76, + "learning_rate": 4.562072039942939e-05, + "loss": 0.0026, + "step": 49140 + }, + { + "epoch": 8.76, + "learning_rate": 4.561982881597718e-05, + "loss": 0.0045, + "step": 49150 + }, + { + "epoch": 8.77, + "learning_rate": 4.561893723252497e-05, + "loss": 0.0049, + "step": 49160 + }, + { + "epoch": 8.77, + "learning_rate": 4.5618045649072754e-05, + "loss": 0.0033, + "step": 49170 + }, + { + "epoch": 8.77, + "learning_rate": 4.5617154065620545e-05, + "loss": 0.0034, + "step": 49180 + }, + { + "epoch": 8.77, + "learning_rate": 4.561626248216833e-05, + "loss": 0.0045, + "step": 49190 + }, + { + "epoch": 8.77, + "learning_rate": 4.561537089871612e-05, + "loss": 0.0055, + "step": 49200 + }, + { + "epoch": 8.77, + "learning_rate": 4.5614479315263906e-05, + "loss": 0.0039, + "step": 49210 + }, + { + "epoch": 8.78, + "learning_rate": 4.5613587731811704e-05, + "loss": 0.006, + "step": 49220 + }, + { + "epoch": 8.78, + "learning_rate": 4.561269614835949e-05, + "loss": 0.0044, + "step": 49230 + }, + { + "epoch": 8.78, + "learning_rate": 4.561180456490728e-05, + "loss": 0.0027, + "step": 49240 + }, + { + "epoch": 8.78, + "learning_rate": 4.561091298145507e-05, + "loss": 0.0028, + "step": 49250 + }, + { + "epoch": 8.78, + "learning_rate": 4.5610021398002855e-05, + "loss": 0.006, + "step": 49260 + }, + { + "epoch": 8.79, + "learning_rate": 4.5609129814550646e-05, + "loss": 0.0027, + "step": 49270 + }, + { + "epoch": 8.79, + "learning_rate": 4.560823823109843e-05, + "loss": 0.0026, + "step": 49280 + }, + { + "epoch": 8.79, + "learning_rate": 4.560734664764622e-05, + "loss": 0.0036, + "step": 49290 + }, + { + "epoch": 8.79, + "learning_rate": 4.5606455064194006e-05, + "loss": 0.0055, + "step": 49300 + }, + { + "epoch": 8.79, + "learning_rate": 4.56055634807418e-05, + "loss": 0.0023, + "step": 49310 + }, + { + "epoch": 8.79, + "learning_rate": 4.560467189728959e-05, + "loss": 0.0038, + "step": 49320 + }, + { + "epoch": 8.8, + "learning_rate": 4.560378031383738e-05, + "loss": 0.0031, + "step": 49330 + }, + { + "epoch": 8.8, + "learning_rate": 4.560288873038517e-05, + "loss": 0.0047, + "step": 49340 + }, + { + "epoch": 8.8, + "learning_rate": 4.5601997146932955e-05, + "loss": 0.004, + "step": 49350 + }, + { + "epoch": 8.8, + "learning_rate": 4.5601105563480747e-05, + "loss": 0.0034, + "step": 49360 + }, + { + "epoch": 8.8, + "learning_rate": 4.560021398002853e-05, + "loss": 0.0047, + "step": 49370 + }, + { + "epoch": 8.81, + "learning_rate": 4.559932239657632e-05, + "loss": 0.0038, + "step": 49380 + }, + { + "epoch": 8.81, + "learning_rate": 4.5598430813124113e-05, + "loss": 0.0035, + "step": 49390 + }, + { + "epoch": 8.81, + "learning_rate": 4.55975392296719e-05, + "loss": 0.0064, + "step": 49400 + }, + { + "epoch": 8.81, + "learning_rate": 4.559664764621969e-05, + "loss": 0.004, + "step": 49410 + }, + { + "epoch": 8.81, + "learning_rate": 4.5595756062767473e-05, + "loss": 0.0069, + "step": 49420 + }, + { + "epoch": 8.81, + "learning_rate": 4.5594864479315265e-05, + "loss": 0.0059, + "step": 49430 + }, + { + "epoch": 8.82, + "learning_rate": 4.5593972895863056e-05, + "loss": 0.0048, + "step": 49440 + }, + { + "epoch": 8.82, + "learning_rate": 4.559308131241085e-05, + "loss": 0.0031, + "step": 49450 + }, + { + "epoch": 8.82, + "learning_rate": 4.559218972895863e-05, + "loss": 0.0024, + "step": 49460 + }, + { + "epoch": 8.82, + "learning_rate": 4.559129814550642e-05, + "loss": 0.0036, + "step": 49470 + }, + { + "epoch": 8.82, + "learning_rate": 4.5590406562054214e-05, + "loss": 0.0049, + "step": 49480 + }, + { + "epoch": 8.82, + "learning_rate": 4.5589514978602e-05, + "loss": 0.0046, + "step": 49490 + }, + { + "epoch": 8.83, + "learning_rate": 4.558862339514979e-05, + "loss": 0.0076, + "step": 49500 + }, + { + "epoch": 8.83, + "learning_rate": 4.5587731811697574e-05, + "loss": 0.0046, + "step": 49510 + }, + { + "epoch": 8.83, + "learning_rate": 4.5586840228245365e-05, + "loss": 0.0046, + "step": 49520 + }, + { + "epoch": 8.83, + "learning_rate": 4.558594864479315e-05, + "loss": 0.0062, + "step": 49530 + }, + { + "epoch": 8.83, + "learning_rate": 4.558505706134094e-05, + "loss": 0.0048, + "step": 49540 + }, + { + "epoch": 8.84, + "learning_rate": 4.558416547788873e-05, + "loss": 0.0051, + "step": 49550 + }, + { + "epoch": 8.84, + "learning_rate": 4.558327389443652e-05, + "loss": 0.0053, + "step": 49560 + }, + { + "epoch": 8.84, + "learning_rate": 4.5582382310984315e-05, + "loss": 0.0043, + "step": 49570 + }, + { + "epoch": 8.84, + "learning_rate": 4.55814907275321e-05, + "loss": 0.0048, + "step": 49580 + }, + { + "epoch": 8.84, + "learning_rate": 4.558059914407989e-05, + "loss": 0.0036, + "step": 49590 + }, + { + "epoch": 8.84, + "learning_rate": 4.5579707560627675e-05, + "loss": 0.0061, + "step": 49600 + }, + { + "epoch": 8.85, + "learning_rate": 4.5578815977175466e-05, + "loss": 0.007, + "step": 49610 + }, + { + "epoch": 8.85, + "learning_rate": 4.557792439372325e-05, + "loss": 0.0042, + "step": 49620 + }, + { + "epoch": 8.85, + "learning_rate": 4.557703281027104e-05, + "loss": 0.0036, + "step": 49630 + }, + { + "epoch": 8.85, + "learning_rate": 4.557614122681883e-05, + "loss": 0.0032, + "step": 49640 + }, + { + "epoch": 8.85, + "learning_rate": 4.557524964336662e-05, + "loss": 0.0062, + "step": 49650 + }, + { + "epoch": 8.86, + "learning_rate": 4.5574358059914415e-05, + "loss": 0.0058, + "step": 49660 + }, + { + "epoch": 8.86, + "learning_rate": 4.55734664764622e-05, + "loss": 0.0067, + "step": 49670 + }, + { + "epoch": 8.86, + "learning_rate": 4.557257489300999e-05, + "loss": 0.0056, + "step": 49680 + }, + { + "epoch": 8.86, + "learning_rate": 4.5571683309557775e-05, + "loss": 0.0055, + "step": 49690 + }, + { + "epoch": 8.86, + "learning_rate": 4.5570791726105566e-05, + "loss": 0.0035, + "step": 49700 + }, + { + "epoch": 8.86, + "learning_rate": 4.556990014265336e-05, + "loss": 0.0069, + "step": 49710 + }, + { + "epoch": 8.87, + "learning_rate": 4.556900855920114e-05, + "loss": 0.0055, + "step": 49720 + }, + { + "epoch": 8.87, + "learning_rate": 4.556811697574893e-05, + "loss": 0.0049, + "step": 49730 + }, + { + "epoch": 8.87, + "learning_rate": 4.556722539229672e-05, + "loss": 0.0064, + "step": 49740 + }, + { + "epoch": 8.87, + "learning_rate": 4.556633380884451e-05, + "loss": 0.0062, + "step": 49750 + }, + { + "epoch": 8.87, + "learning_rate": 4.556544222539229e-05, + "loss": 0.0052, + "step": 49760 + }, + { + "epoch": 8.87, + "learning_rate": 4.556455064194009e-05, + "loss": 0.0071, + "step": 49770 + }, + { + "epoch": 8.88, + "learning_rate": 4.5563659058487876e-05, + "loss": 0.0061, + "step": 49780 + }, + { + "epoch": 8.88, + "learning_rate": 4.556276747503567e-05, + "loss": 0.0062, + "step": 49790 + }, + { + "epoch": 8.88, + "learning_rate": 4.556187589158346e-05, + "loss": 0.0052, + "step": 49800 + }, + { + "epoch": 8.88, + "learning_rate": 4.556098430813124e-05, + "loss": 0.0045, + "step": 49810 + }, + { + "epoch": 8.88, + "learning_rate": 4.5560092724679034e-05, + "loss": 0.0059, + "step": 49820 + }, + { + "epoch": 8.89, + "learning_rate": 4.555920114122682e-05, + "loss": 0.003, + "step": 49830 + }, + { + "epoch": 8.89, + "learning_rate": 4.555830955777461e-05, + "loss": 0.0071, + "step": 49840 + }, + { + "epoch": 8.89, + "learning_rate": 4.5557417974322394e-05, + "loss": 0.0067, + "step": 49850 + }, + { + "epoch": 8.89, + "learning_rate": 4.5556526390870185e-05, + "loss": 0.0045, + "step": 49860 + }, + { + "epoch": 8.89, + "learning_rate": 4.5555634807417976e-05, + "loss": 0.0037, + "step": 49870 + }, + { + "epoch": 8.89, + "learning_rate": 4.555474322396577e-05, + "loss": 0.0034, + "step": 49880 + }, + { + "epoch": 8.9, + "learning_rate": 4.555385164051356e-05, + "loss": 0.007, + "step": 49890 + }, + { + "epoch": 8.9, + "learning_rate": 4.555296005706134e-05, + "loss": 0.0056, + "step": 49900 + }, + { + "epoch": 8.9, + "learning_rate": 4.5552068473609134e-05, + "loss": 0.0041, + "step": 49910 + }, + { + "epoch": 8.9, + "learning_rate": 4.555117689015692e-05, + "loss": 0.0074, + "step": 49920 + }, + { + "epoch": 8.9, + "learning_rate": 4.555028530670471e-05, + "loss": 0.0051, + "step": 49930 + }, + { + "epoch": 8.91, + "learning_rate": 4.55493937232525e-05, + "loss": 0.0056, + "step": 49940 + }, + { + "epoch": 8.91, + "learning_rate": 4.5548502139800286e-05, + "loss": 0.0071, + "step": 49950 + }, + { + "epoch": 8.91, + "learning_rate": 4.554761055634808e-05, + "loss": 0.004, + "step": 49960 + }, + { + "epoch": 8.91, + "learning_rate": 4.554671897289586e-05, + "loss": 0.0057, + "step": 49970 + }, + { + "epoch": 8.91, + "learning_rate": 4.554582738944365e-05, + "loss": 0.0046, + "step": 49980 + }, + { + "epoch": 8.91, + "learning_rate": 4.5544935805991444e-05, + "loss": 0.0039, + "step": 49990 + }, + { + "epoch": 8.92, + "learning_rate": 4.5544044222539235e-05, + "loss": 0.0051, + "step": 50000 + }, + { + "epoch": 8.92, + "learning_rate": 4.554315263908702e-05, + "loss": 0.0056, + "step": 50010 + }, + { + "epoch": 8.92, + "learning_rate": 4.554226105563481e-05, + "loss": 0.0035, + "step": 50020 + }, + { + "epoch": 8.92, + "learning_rate": 4.55413694721826e-05, + "loss": 0.003, + "step": 50030 + }, + { + "epoch": 8.92, + "learning_rate": 4.5540477888730386e-05, + "loss": 0.0051, + "step": 50040 + }, + { + "epoch": 8.92, + "learning_rate": 4.553958630527818e-05, + "loss": 0.0062, + "step": 50050 + }, + { + "epoch": 8.93, + "learning_rate": 4.553869472182596e-05, + "loss": 0.0045, + "step": 50060 + }, + { + "epoch": 8.93, + "learning_rate": 4.553780313837375e-05, + "loss": 0.0059, + "step": 50070 + }, + { + "epoch": 8.93, + "learning_rate": 4.553691155492154e-05, + "loss": 0.0055, + "step": 50080 + }, + { + "epoch": 8.93, + "learning_rate": 4.553601997146933e-05, + "loss": 0.0069, + "step": 50090 + }, + { + "epoch": 8.93, + "learning_rate": 4.553512838801712e-05, + "loss": 0.0055, + "step": 50100 + }, + { + "epoch": 8.94, + "learning_rate": 4.553423680456491e-05, + "loss": 0.0036, + "step": 50110 + }, + { + "epoch": 8.94, + "learning_rate": 4.55333452211127e-05, + "loss": 0.0063, + "step": 50120 + }, + { + "epoch": 8.94, + "learning_rate": 4.553245363766049e-05, + "loss": 0.0049, + "step": 50130 + }, + { + "epoch": 8.94, + "learning_rate": 4.553156205420828e-05, + "loss": 0.0037, + "step": 50140 + }, + { + "epoch": 8.94, + "learning_rate": 4.553067047075606e-05, + "loss": 0.0041, + "step": 50150 + }, + { + "epoch": 8.94, + "learning_rate": 4.5529778887303854e-05, + "loss": 0.0064, + "step": 50160 + }, + { + "epoch": 8.95, + "learning_rate": 4.5528887303851645e-05, + "loss": 0.0051, + "step": 50170 + }, + { + "epoch": 8.95, + "learning_rate": 4.552799572039943e-05, + "loss": 0.006, + "step": 50180 + }, + { + "epoch": 8.95, + "learning_rate": 4.552710413694722e-05, + "loss": 0.0044, + "step": 50190 + }, + { + "epoch": 8.95, + "learning_rate": 4.5526212553495005e-05, + "loss": 0.0044, + "step": 50200 + }, + { + "epoch": 8.95, + "learning_rate": 4.55253209700428e-05, + "loss": 0.0053, + "step": 50210 + }, + { + "epoch": 8.96, + "learning_rate": 4.552442938659059e-05, + "loss": 0.0055, + "step": 50220 + }, + { + "epoch": 8.96, + "learning_rate": 4.552353780313838e-05, + "loss": 0.0039, + "step": 50230 + }, + { + "epoch": 8.96, + "learning_rate": 4.552264621968616e-05, + "loss": 0.0049, + "step": 50240 + }, + { + "epoch": 8.96, + "learning_rate": 4.5521754636233954e-05, + "loss": 0.0063, + "step": 50250 + }, + { + "epoch": 8.96, + "learning_rate": 4.5520863052781745e-05, + "loss": 0.0035, + "step": 50260 + }, + { + "epoch": 8.96, + "learning_rate": 4.551997146932953e-05, + "loss": 0.0052, + "step": 50270 + }, + { + "epoch": 8.97, + "learning_rate": 4.551907988587732e-05, + "loss": 0.0054, + "step": 50280 + }, + { + "epoch": 8.97, + "learning_rate": 4.5518188302425105e-05, + "loss": 0.0051, + "step": 50290 + }, + { + "epoch": 8.97, + "learning_rate": 4.55172967189729e-05, + "loss": 0.0038, + "step": 50300 + }, + { + "epoch": 8.97, + "learning_rate": 4.551640513552068e-05, + "loss": 0.0041, + "step": 50310 + }, + { + "epoch": 8.97, + "learning_rate": 4.551551355206848e-05, + "loss": 0.0101, + "step": 50320 + }, + { + "epoch": 8.97, + "learning_rate": 4.5514621968616264e-05, + "loss": 0.0062, + "step": 50330 + }, + { + "epoch": 8.98, + "learning_rate": 4.5513730385164055e-05, + "loss": 0.0048, + "step": 50340 + }, + { + "epoch": 8.98, + "learning_rate": 4.5512838801711846e-05, + "loss": 0.0074, + "step": 50350 + }, + { + "epoch": 8.98, + "learning_rate": 4.551194721825963e-05, + "loss": 0.0094, + "step": 50360 + }, + { + "epoch": 8.98, + "learning_rate": 4.551105563480742e-05, + "loss": 0.0047, + "step": 50370 + }, + { + "epoch": 8.98, + "learning_rate": 4.5510164051355206e-05, + "loss": 0.0054, + "step": 50380 + }, + { + "epoch": 8.99, + "learning_rate": 4.5509272467903e-05, + "loss": 0.0047, + "step": 50390 + }, + { + "epoch": 8.99, + "learning_rate": 4.550838088445079e-05, + "loss": 0.0063, + "step": 50400 + }, + { + "epoch": 8.99, + "learning_rate": 4.550748930099857e-05, + "loss": 0.005, + "step": 50410 + }, + { + "epoch": 8.99, + "learning_rate": 4.5506597717546364e-05, + "loss": 0.0045, + "step": 50420 + }, + { + "epoch": 8.99, + "learning_rate": 4.5505706134094155e-05, + "loss": 0.0057, + "step": 50430 + }, + { + "epoch": 8.99, + "learning_rate": 4.5504814550641947e-05, + "loss": 0.0037, + "step": 50440 + }, + { + "epoch": 9.0, + "learning_rate": 4.550392296718973e-05, + "loss": 0.0081, + "step": 50450 + }, + { + "epoch": 9.0, + "learning_rate": 4.550303138373752e-05, + "loss": 0.004, + "step": 50460 + }, + { + "epoch": 9.0, + "learning_rate": 4.5502139800285307e-05, + "loss": 0.0036, + "step": 50470 + }, + { + "epoch": 9.0, + "eval_loss": 0.014716032892465591, + "eval_runtime": 197.987, + "eval_samples_per_second": 23.431, + "eval_steps_per_second": 2.929, + "step": 50472 + }, + { + "epoch": 9.0, + "learning_rate": 4.55012482168331e-05, + "loss": 0.0047, + "step": 50480 + }, + { + "epoch": 9.0, + "learning_rate": 4.550035663338089e-05, + "loss": 0.0052, + "step": 50490 + }, + { + "epoch": 9.0, + "learning_rate": 4.5499465049928673e-05, + "loss": 0.0039, + "step": 50500 + }, + { + "epoch": 9.01, + "learning_rate": 4.5498573466476465e-05, + "loss": 0.0028, + "step": 50510 + }, + { + "epoch": 9.01, + "learning_rate": 4.549768188302425e-05, + "loss": 0.0035, + "step": 50520 + }, + { + "epoch": 9.01, + "learning_rate": 4.549679029957204e-05, + "loss": 0.0074, + "step": 50530 + }, + { + "epoch": 9.01, + "learning_rate": 4.549589871611983e-05, + "loss": 0.0032, + "step": 50540 + }, + { + "epoch": 9.01, + "learning_rate": 4.549500713266762e-05, + "loss": 0.0076, + "step": 50550 + }, + { + "epoch": 9.02, + "learning_rate": 4.549411554921541e-05, + "loss": 0.0052, + "step": 50560 + }, + { + "epoch": 9.02, + "learning_rate": 4.54932239657632e-05, + "loss": 0.0069, + "step": 50570 + }, + { + "epoch": 9.02, + "learning_rate": 4.549233238231099e-05, + "loss": 0.0041, + "step": 50580 + }, + { + "epoch": 9.02, + "learning_rate": 4.5491440798858774e-05, + "loss": 0.003, + "step": 50590 + }, + { + "epoch": 9.02, + "learning_rate": 4.5490549215406565e-05, + "loss": 0.0047, + "step": 50600 + }, + { + "epoch": 9.02, + "learning_rate": 4.548965763195435e-05, + "loss": 0.0049, + "step": 50610 + }, + { + "epoch": 9.03, + "learning_rate": 4.548876604850214e-05, + "loss": 0.0032, + "step": 50620 + }, + { + "epoch": 9.03, + "learning_rate": 4.548787446504993e-05, + "loss": 0.0084, + "step": 50630 + }, + { + "epoch": 9.03, + "learning_rate": 4.5486982881597717e-05, + "loss": 0.004, + "step": 50640 + }, + { + "epoch": 9.03, + "learning_rate": 4.5486091298145514e-05, + "loss": 0.0034, + "step": 50650 + }, + { + "epoch": 9.03, + "learning_rate": 4.54851997146933e-05, + "loss": 0.0042, + "step": 50660 + }, + { + "epoch": 9.04, + "learning_rate": 4.548430813124109e-05, + "loss": 0.0036, + "step": 50670 + }, + { + "epoch": 9.04, + "learning_rate": 4.5483416547788875e-05, + "loss": 0.0035, + "step": 50680 + }, + { + "epoch": 9.04, + "learning_rate": 4.5482524964336666e-05, + "loss": 0.0019, + "step": 50690 + }, + { + "epoch": 9.04, + "learning_rate": 4.548163338088445e-05, + "loss": 0.0046, + "step": 50700 + }, + { + "epoch": 9.04, + "learning_rate": 4.548074179743224e-05, + "loss": 0.0061, + "step": 50710 + }, + { + "epoch": 9.04, + "learning_rate": 4.547985021398003e-05, + "loss": 0.0033, + "step": 50720 + }, + { + "epoch": 9.05, + "learning_rate": 4.547895863052782e-05, + "loss": 0.0033, + "step": 50730 + }, + { + "epoch": 9.05, + "learning_rate": 4.547806704707561e-05, + "loss": 0.003, + "step": 50740 + }, + { + "epoch": 9.05, + "learning_rate": 4.547717546362339e-05, + "loss": 0.0027, + "step": 50750 + }, + { + "epoch": 9.05, + "learning_rate": 4.547628388017119e-05, + "loss": 0.0025, + "step": 50760 + }, + { + "epoch": 9.05, + "learning_rate": 4.5475392296718975e-05, + "loss": 0.0032, + "step": 50770 + }, + { + "epoch": 9.05, + "learning_rate": 4.5474500713266766e-05, + "loss": 0.0063, + "step": 50780 + }, + { + "epoch": 9.06, + "learning_rate": 4.547360912981455e-05, + "loss": 0.0054, + "step": 50790 + }, + { + "epoch": 9.06, + "learning_rate": 4.547271754636234e-05, + "loss": 0.0045, + "step": 50800 + }, + { + "epoch": 9.06, + "learning_rate": 4.547182596291013e-05, + "loss": 0.0041, + "step": 50810 + }, + { + "epoch": 9.06, + "learning_rate": 4.547093437945792e-05, + "loss": 0.0029, + "step": 50820 + }, + { + "epoch": 9.06, + "learning_rate": 4.547004279600571e-05, + "loss": 0.0049, + "step": 50830 + }, + { + "epoch": 9.07, + "learning_rate": 4.546915121255349e-05, + "loss": 0.0037, + "step": 50840 + }, + { + "epoch": 9.07, + "learning_rate": 4.5468259629101284e-05, + "loss": 0.0042, + "step": 50850 + }, + { + "epoch": 9.07, + "learning_rate": 4.5467368045649076e-05, + "loss": 0.0058, + "step": 50860 + }, + { + "epoch": 9.07, + "learning_rate": 4.546647646219687e-05, + "loss": 0.0041, + "step": 50870 + }, + { + "epoch": 9.07, + "learning_rate": 4.546558487874466e-05, + "loss": 0.0033, + "step": 50880 + }, + { + "epoch": 9.07, + "learning_rate": 4.546469329529244e-05, + "loss": 0.0024, + "step": 50890 + }, + { + "epoch": 9.08, + "learning_rate": 4.5463801711840234e-05, + "loss": 0.003, + "step": 50900 + }, + { + "epoch": 9.08, + "learning_rate": 4.546291012838802e-05, + "loss": 0.0058, + "step": 50910 + }, + { + "epoch": 9.08, + "learning_rate": 4.546201854493581e-05, + "loss": 0.0049, + "step": 50920 + }, + { + "epoch": 9.08, + "learning_rate": 4.5461126961483594e-05, + "loss": 0.0027, + "step": 50930 + }, + { + "epoch": 9.08, + "learning_rate": 4.5460235378031385e-05, + "loss": 0.0053, + "step": 50940 + }, + { + "epoch": 9.09, + "learning_rate": 4.5459343794579176e-05, + "loss": 0.0031, + "step": 50950 + }, + { + "epoch": 9.09, + "learning_rate": 4.545845221112696e-05, + "loss": 0.0054, + "step": 50960 + }, + { + "epoch": 9.09, + "learning_rate": 4.545756062767475e-05, + "loss": 0.0025, + "step": 50970 + }, + { + "epoch": 9.09, + "learning_rate": 4.545666904422254e-05, + "loss": 0.005, + "step": 50980 + }, + { + "epoch": 9.09, + "learning_rate": 4.5455777460770334e-05, + "loss": 0.0048, + "step": 50990 + }, + { + "epoch": 9.09, + "learning_rate": 4.545488587731812e-05, + "loss": 0.0045, + "step": 51000 + }, + { + "epoch": 9.1, + "learning_rate": 4.545399429386591e-05, + "loss": 0.0049, + "step": 51010 + }, + { + "epoch": 9.1, + "learning_rate": 4.5453102710413694e-05, + "loss": 0.0047, + "step": 51020 + }, + { + "epoch": 9.1, + "learning_rate": 4.5452211126961486e-05, + "loss": 0.0038, + "step": 51030 + }, + { + "epoch": 9.1, + "learning_rate": 4.545131954350928e-05, + "loss": 0.0053, + "step": 51040 + }, + { + "epoch": 9.1, + "learning_rate": 4.545042796005706e-05, + "loss": 0.0062, + "step": 51050 + }, + { + "epoch": 9.1, + "learning_rate": 4.544953637660485e-05, + "loss": 0.0053, + "step": 51060 + }, + { + "epoch": 9.11, + "learning_rate": 4.544864479315264e-05, + "loss": 0.0062, + "step": 51070 + }, + { + "epoch": 9.11, + "learning_rate": 4.544775320970043e-05, + "loss": 0.0061, + "step": 51080 + }, + { + "epoch": 9.11, + "learning_rate": 4.544686162624822e-05, + "loss": 0.005, + "step": 51090 + }, + { + "epoch": 9.11, + "learning_rate": 4.544597004279601e-05, + "loss": 0.0039, + "step": 51100 + }, + { + "epoch": 9.11, + "learning_rate": 4.54450784593438e-05, + "loss": 0.0034, + "step": 51110 + }, + { + "epoch": 9.12, + "learning_rate": 4.5444186875891586e-05, + "loss": 0.0058, + "step": 51120 + }, + { + "epoch": 9.12, + "learning_rate": 4.544329529243938e-05, + "loss": 0.0041, + "step": 51130 + }, + { + "epoch": 9.12, + "learning_rate": 4.544240370898716e-05, + "loss": 0.0038, + "step": 51140 + }, + { + "epoch": 9.12, + "learning_rate": 4.544151212553495e-05, + "loss": 0.0043, + "step": 51150 + }, + { + "epoch": 9.12, + "learning_rate": 4.544062054208274e-05, + "loss": 0.0027, + "step": 51160 + }, + { + "epoch": 9.12, + "learning_rate": 4.543972895863053e-05, + "loss": 0.0029, + "step": 51170 + }, + { + "epoch": 9.13, + "learning_rate": 4.543883737517832e-05, + "loss": 0.0048, + "step": 51180 + }, + { + "epoch": 9.13, + "learning_rate": 4.5437945791726104e-05, + "loss": 0.0034, + "step": 51190 + }, + { + "epoch": 9.13, + "learning_rate": 4.54370542082739e-05, + "loss": 0.0036, + "step": 51200 + }, + { + "epoch": 9.13, + "learning_rate": 4.543616262482169e-05, + "loss": 0.0064, + "step": 51210 + }, + { + "epoch": 9.13, + "learning_rate": 4.543527104136948e-05, + "loss": 0.007, + "step": 51220 + }, + { + "epoch": 9.14, + "learning_rate": 4.543437945791726e-05, + "loss": 0.0049, + "step": 51230 + }, + { + "epoch": 9.14, + "learning_rate": 4.5433487874465054e-05, + "loss": 0.0026, + "step": 51240 + }, + { + "epoch": 9.14, + "learning_rate": 4.543259629101284e-05, + "loss": 0.0045, + "step": 51250 + }, + { + "epoch": 9.14, + "learning_rate": 4.543170470756063e-05, + "loss": 0.0033, + "step": 51260 + }, + { + "epoch": 9.14, + "learning_rate": 4.543081312410842e-05, + "loss": 0.0043, + "step": 51270 + }, + { + "epoch": 9.14, + "learning_rate": 4.5429921540656205e-05, + "loss": 0.0039, + "step": 51280 + }, + { + "epoch": 9.15, + "learning_rate": 4.5429029957203996e-05, + "loss": 0.0072, + "step": 51290 + }, + { + "epoch": 9.15, + "learning_rate": 4.542813837375178e-05, + "loss": 0.0033, + "step": 51300 + }, + { + "epoch": 9.15, + "learning_rate": 4.542724679029958e-05, + "loss": 0.0038, + "step": 51310 + }, + { + "epoch": 9.15, + "learning_rate": 4.542635520684736e-05, + "loss": 0.0045, + "step": 51320 + }, + { + "epoch": 9.15, + "learning_rate": 4.5425463623395154e-05, + "loss": 0.0031, + "step": 51330 + }, + { + "epoch": 9.15, + "learning_rate": 4.5424572039942945e-05, + "loss": 0.004, + "step": 51340 + }, + { + "epoch": 9.16, + "learning_rate": 4.542368045649073e-05, + "loss": 0.005, + "step": 51350 + }, + { + "epoch": 9.16, + "learning_rate": 4.542278887303852e-05, + "loss": 0.0046, + "step": 51360 + }, + { + "epoch": 9.16, + "learning_rate": 4.5421897289586305e-05, + "loss": 0.0032, + "step": 51370 + }, + { + "epoch": 9.16, + "learning_rate": 4.54210057061341e-05, + "loss": 0.0094, + "step": 51380 + }, + { + "epoch": 9.16, + "learning_rate": 4.542011412268188e-05, + "loss": 0.0046, + "step": 51390 + }, + { + "epoch": 9.17, + "learning_rate": 4.541922253922967e-05, + "loss": 0.003, + "step": 51400 + }, + { + "epoch": 9.17, + "learning_rate": 4.5418330955777463e-05, + "loss": 0.0069, + "step": 51410 + }, + { + "epoch": 9.17, + "learning_rate": 4.5417439372325255e-05, + "loss": 0.003, + "step": 51420 + }, + { + "epoch": 9.17, + "learning_rate": 4.5416547788873046e-05, + "loss": 0.004, + "step": 51430 + }, + { + "epoch": 9.17, + "learning_rate": 4.541574536376605e-05, + "loss": 0.0075, + "step": 51440 + }, + { + "epoch": 9.17, + "learning_rate": 4.541485378031384e-05, + "loss": 0.0027, + "step": 51450 + }, + { + "epoch": 9.18, + "learning_rate": 4.541396219686162e-05, + "loss": 0.0025, + "step": 51460 + }, + { + "epoch": 9.18, + "learning_rate": 4.541307061340942e-05, + "loss": 0.0056, + "step": 51470 + }, + { + "epoch": 9.18, + "learning_rate": 4.5412179029957205e-05, + "loss": 0.004, + "step": 51480 + }, + { + "epoch": 9.18, + "learning_rate": 4.5411287446504996e-05, + "loss": 0.0043, + "step": 51490 + }, + { + "epoch": 9.18, + "learning_rate": 4.541039586305279e-05, + "loss": 0.0059, + "step": 51500 + }, + { + "epoch": 9.19, + "learning_rate": 4.540950427960057e-05, + "loss": 0.0046, + "step": 51510 + }, + { + "epoch": 9.19, + "learning_rate": 4.540861269614836e-05, + "loss": 0.0042, + "step": 51520 + }, + { + "epoch": 9.19, + "learning_rate": 4.540772111269615e-05, + "loss": 0.0045, + "step": 51530 + }, + { + "epoch": 9.19, + "learning_rate": 4.540682952924394e-05, + "loss": 0.0033, + "step": 51540 + }, + { + "epoch": 9.19, + "learning_rate": 4.540593794579172e-05, + "loss": 0.0068, + "step": 51550 + }, + { + "epoch": 9.19, + "learning_rate": 4.5405046362339515e-05, + "loss": 0.0049, + "step": 51560 + }, + { + "epoch": 9.2, + "learning_rate": 4.5404154778887306e-05, + "loss": 0.0069, + "step": 51570 + }, + { + "epoch": 9.2, + "learning_rate": 4.54032631954351e-05, + "loss": 0.005, + "step": 51580 + }, + { + "epoch": 9.2, + "learning_rate": 4.540237161198289e-05, + "loss": 0.0038, + "step": 51590 + }, + { + "epoch": 9.2, + "learning_rate": 4.540148002853067e-05, + "loss": 0.0031, + "step": 51600 + }, + { + "epoch": 9.2, + "learning_rate": 4.5400588445078464e-05, + "loss": 0.0038, + "step": 51610 + }, + { + "epoch": 9.2, + "learning_rate": 4.539969686162625e-05, + "loss": 0.0037, + "step": 51620 + }, + { + "epoch": 9.21, + "learning_rate": 4.539880527817404e-05, + "loss": 0.0066, + "step": 51630 + }, + { + "epoch": 9.21, + "learning_rate": 4.539791369472183e-05, + "loss": 0.0042, + "step": 51640 + }, + { + "epoch": 9.21, + "learning_rate": 4.5397022111269615e-05, + "loss": 0.0035, + "step": 51650 + }, + { + "epoch": 9.21, + "learning_rate": 4.5396130527817406e-05, + "loss": 0.0027, + "step": 51660 + }, + { + "epoch": 9.21, + "learning_rate": 4.539523894436519e-05, + "loss": 0.0042, + "step": 51670 + }, + { + "epoch": 9.22, + "learning_rate": 4.539434736091298e-05, + "loss": 0.0045, + "step": 51680 + }, + { + "epoch": 9.22, + "learning_rate": 4.539345577746077e-05, + "loss": 0.0041, + "step": 51690 + }, + { + "epoch": 9.22, + "learning_rate": 4.5392564194008564e-05, + "loss": 0.0041, + "step": 51700 + }, + { + "epoch": 9.22, + "learning_rate": 4.539167261055635e-05, + "loss": 0.0038, + "step": 51710 + }, + { + "epoch": 9.22, + "learning_rate": 4.539078102710414e-05, + "loss": 0.003, + "step": 51720 + }, + { + "epoch": 9.22, + "learning_rate": 4.538988944365193e-05, + "loss": 0.0037, + "step": 51730 + }, + { + "epoch": 9.23, + "learning_rate": 4.5388997860199716e-05, + "loss": 0.0023, + "step": 51740 + }, + { + "epoch": 9.23, + "learning_rate": 4.538810627674751e-05, + "loss": 0.0026, + "step": 51750 + }, + { + "epoch": 9.23, + "learning_rate": 4.538721469329529e-05, + "loss": 0.0027, + "step": 51760 + }, + { + "epoch": 9.23, + "learning_rate": 4.538632310984308e-05, + "loss": 0.0024, + "step": 51770 + }, + { + "epoch": 9.23, + "learning_rate": 4.538543152639087e-05, + "loss": 0.005, + "step": 51780 + }, + { + "epoch": 9.24, + "learning_rate": 4.538453994293866e-05, + "loss": 0.0074, + "step": 51790 + }, + { + "epoch": 9.24, + "learning_rate": 4.538364835948645e-05, + "loss": 0.0054, + "step": 51800 + }, + { + "epoch": 9.24, + "learning_rate": 4.538275677603424e-05, + "loss": 0.0064, + "step": 51810 + }, + { + "epoch": 9.24, + "learning_rate": 4.538186519258203e-05, + "loss": 0.0031, + "step": 51820 + }, + { + "epoch": 9.24, + "learning_rate": 4.5380973609129816e-05, + "loss": 0.0038, + "step": 51830 + }, + { + "epoch": 9.24, + "learning_rate": 4.538008202567761e-05, + "loss": 0.0047, + "step": 51840 + }, + { + "epoch": 9.25, + "learning_rate": 4.537919044222539e-05, + "loss": 0.0061, + "step": 51850 + }, + { + "epoch": 9.25, + "learning_rate": 4.537829885877318e-05, + "loss": 0.0038, + "step": 51860 + }, + { + "epoch": 9.25, + "learning_rate": 4.5377407275320974e-05, + "loss": 0.0039, + "step": 51870 + }, + { + "epoch": 9.25, + "learning_rate": 4.537651569186876e-05, + "loss": 0.0041, + "step": 51880 + }, + { + "epoch": 9.25, + "learning_rate": 4.537562410841655e-05, + "loss": 0.0048, + "step": 51890 + }, + { + "epoch": 9.25, + "learning_rate": 4.5374732524964334e-05, + "loss": 0.0042, + "step": 51900 + }, + { + "epoch": 9.26, + "learning_rate": 4.537384094151213e-05, + "loss": 0.0055, + "step": 51910 + }, + { + "epoch": 9.26, + "learning_rate": 4.537294935805992e-05, + "loss": 0.0048, + "step": 51920 + }, + { + "epoch": 9.26, + "learning_rate": 4.537205777460771e-05, + "loss": 0.0035, + "step": 51930 + }, + { + "epoch": 9.26, + "learning_rate": 4.537116619115549e-05, + "loss": 0.0018, + "step": 51940 + }, + { + "epoch": 9.26, + "learning_rate": 4.5370274607703284e-05, + "loss": 0.004, + "step": 51950 + }, + { + "epoch": 9.27, + "learning_rate": 4.5369383024251075e-05, + "loss": 0.0071, + "step": 51960 + }, + { + "epoch": 9.27, + "learning_rate": 4.536849144079886e-05, + "loss": 0.0055, + "step": 51970 + }, + { + "epoch": 9.27, + "learning_rate": 4.536759985734665e-05, + "loss": 0.0048, + "step": 51980 + }, + { + "epoch": 9.27, + "learning_rate": 4.5366708273894435e-05, + "loss": 0.0028, + "step": 51990 + }, + { + "epoch": 9.27, + "learning_rate": 4.5365816690442226e-05, + "loss": 0.0056, + "step": 52000 + }, + { + "epoch": 9.27, + "learning_rate": 4.536492510699001e-05, + "loss": 0.0063, + "step": 52010 + }, + { + "epoch": 9.28, + "learning_rate": 4.536403352353781e-05, + "loss": 0.0038, + "step": 52020 + }, + { + "epoch": 9.28, + "learning_rate": 4.536314194008559e-05, + "loss": 0.006, + "step": 52030 + }, + { + "epoch": 9.28, + "learning_rate": 4.5362250356633384e-05, + "loss": 0.0029, + "step": 52040 + }, + { + "epoch": 9.28, + "learning_rate": 4.5361358773181175e-05, + "loss": 0.0042, + "step": 52050 + }, + { + "epoch": 9.28, + "learning_rate": 4.536046718972896e-05, + "loss": 0.0039, + "step": 52060 + }, + { + "epoch": 9.28, + "learning_rate": 4.535957560627675e-05, + "loss": 0.0049, + "step": 52070 + }, + { + "epoch": 9.29, + "learning_rate": 4.5358684022824536e-05, + "loss": 0.0037, + "step": 52080 + }, + { + "epoch": 9.29, + "learning_rate": 4.535779243937233e-05, + "loss": 0.0026, + "step": 52090 + }, + { + "epoch": 9.29, + "learning_rate": 4.535690085592012e-05, + "loss": 0.003, + "step": 52100 + }, + { + "epoch": 9.29, + "learning_rate": 4.53560092724679e-05, + "loss": 0.002, + "step": 52110 + }, + { + "epoch": 9.29, + "learning_rate": 4.5355117689015694e-05, + "loss": 0.0014, + "step": 52120 + }, + { + "epoch": 9.3, + "learning_rate": 4.5354226105563485e-05, + "loss": 0.003, + "step": 52130 + }, + { + "epoch": 9.3, + "learning_rate": 4.5353334522111276e-05, + "loss": 0.0054, + "step": 52140 + }, + { + "epoch": 9.3, + "learning_rate": 4.535244293865906e-05, + "loss": 0.0074, + "step": 52150 + }, + { + "epoch": 9.3, + "learning_rate": 4.535155135520685e-05, + "loss": 0.0054, + "step": 52160 + }, + { + "epoch": 9.3, + "learning_rate": 4.5350659771754636e-05, + "loss": 0.0119, + "step": 52170 + }, + { + "epoch": 9.3, + "learning_rate": 4.534976818830243e-05, + "loss": 0.0043, + "step": 52180 + }, + { + "epoch": 9.31, + "learning_rate": 4.534887660485022e-05, + "loss": 0.0057, + "step": 52190 + }, + { + "epoch": 9.31, + "learning_rate": 4.5347985021398e-05, + "loss": 0.0059, + "step": 52200 + }, + { + "epoch": 9.31, + "learning_rate": 4.5347093437945794e-05, + "loss": 0.0052, + "step": 52210 + }, + { + "epoch": 9.31, + "learning_rate": 4.534620185449358e-05, + "loss": 0.0028, + "step": 52220 + }, + { + "epoch": 9.31, + "learning_rate": 4.534531027104137e-05, + "loss": 0.0049, + "step": 52230 + }, + { + "epoch": 9.32, + "learning_rate": 4.534441868758916e-05, + "loss": 0.008, + "step": 52240 + }, + { + "epoch": 9.32, + "learning_rate": 4.534352710413695e-05, + "loss": 0.0047, + "step": 52250 + }, + { + "epoch": 9.32, + "learning_rate": 4.534263552068474e-05, + "loss": 0.0026, + "step": 52260 + }, + { + "epoch": 9.32, + "learning_rate": 4.534174393723253e-05, + "loss": 0.0059, + "step": 52270 + }, + { + "epoch": 9.32, + "learning_rate": 4.534085235378032e-05, + "loss": 0.0054, + "step": 52280 + }, + { + "epoch": 9.32, + "learning_rate": 4.5339960770328104e-05, + "loss": 0.0038, + "step": 52290 + }, + { + "epoch": 9.33, + "learning_rate": 4.5339069186875895e-05, + "loss": 0.0036, + "step": 52300 + }, + { + "epoch": 9.33, + "learning_rate": 4.533817760342368e-05, + "loss": 0.0027, + "step": 52310 + }, + { + "epoch": 9.33, + "learning_rate": 4.533728601997147e-05, + "loss": 0.0078, + "step": 52320 + }, + { + "epoch": 9.33, + "learning_rate": 4.533639443651926e-05, + "loss": 0.0018, + "step": 52330 + }, + { + "epoch": 9.33, + "learning_rate": 4.5335502853067046e-05, + "loss": 0.0033, + "step": 52340 + }, + { + "epoch": 9.33, + "learning_rate": 4.5334611269614844e-05, + "loss": 0.0049, + "step": 52350 + }, + { + "epoch": 9.34, + "learning_rate": 4.533371968616263e-05, + "loss": 0.0044, + "step": 52360 + }, + { + "epoch": 9.34, + "learning_rate": 4.533282810271042e-05, + "loss": 0.0064, + "step": 52370 + }, + { + "epoch": 9.34, + "learning_rate": 4.5331936519258204e-05, + "loss": 0.0094, + "step": 52380 + }, + { + "epoch": 9.34, + "learning_rate": 4.5331044935805995e-05, + "loss": 0.0044, + "step": 52390 + }, + { + "epoch": 9.34, + "learning_rate": 4.533015335235378e-05, + "loss": 0.0051, + "step": 52400 + }, + { + "epoch": 9.35, + "learning_rate": 4.532926176890157e-05, + "loss": 0.0051, + "step": 52410 + }, + { + "epoch": 9.35, + "learning_rate": 4.532837018544936e-05, + "loss": 0.0034, + "step": 52420 + }, + { + "epoch": 9.35, + "learning_rate": 4.5327478601997147e-05, + "loss": 0.0042, + "step": 52430 + }, + { + "epoch": 9.35, + "learning_rate": 4.532658701854494e-05, + "loss": 0.0054, + "step": 52440 + }, + { + "epoch": 9.35, + "learning_rate": 4.532569543509272e-05, + "loss": 0.0032, + "step": 52450 + }, + { + "epoch": 9.35, + "learning_rate": 4.532480385164052e-05, + "loss": 0.0045, + "step": 52460 + }, + { + "epoch": 9.36, + "learning_rate": 4.5323912268188305e-05, + "loss": 0.0056, + "step": 52470 + }, + { + "epoch": 9.36, + "learning_rate": 4.5323020684736096e-05, + "loss": 0.0049, + "step": 52480 + }, + { + "epoch": 9.36, + "learning_rate": 4.532212910128388e-05, + "loss": 0.005, + "step": 52490 + }, + { + "epoch": 9.36, + "learning_rate": 4.532123751783167e-05, + "loss": 0.0053, + "step": 52500 + }, + { + "epoch": 9.36, + "learning_rate": 4.532034593437946e-05, + "loss": 0.002, + "step": 52510 + }, + { + "epoch": 9.37, + "learning_rate": 4.531945435092725e-05, + "loss": 0.0028, + "step": 52520 + }, + { + "epoch": 9.37, + "learning_rate": 4.531856276747504e-05, + "loss": 0.0038, + "step": 52530 + }, + { + "epoch": 9.37, + "learning_rate": 4.531767118402282e-05, + "loss": 0.0045, + "step": 52540 + }, + { + "epoch": 9.37, + "learning_rate": 4.5316779600570614e-05, + "loss": 0.0054, + "step": 52550 + }, + { + "epoch": 9.37, + "learning_rate": 4.5315888017118405e-05, + "loss": 0.0034, + "step": 52560 + }, + { + "epoch": 9.37, + "learning_rate": 4.5314996433666196e-05, + "loss": 0.0039, + "step": 52570 + }, + { + "epoch": 9.38, + "learning_rate": 4.531410485021398e-05, + "loss": 0.0047, + "step": 52580 + }, + { + "epoch": 9.38, + "learning_rate": 4.531321326676177e-05, + "loss": 0.0099, + "step": 52590 + }, + { + "epoch": 9.38, + "learning_rate": 4.531232168330956e-05, + "loss": 0.0029, + "step": 52600 + }, + { + "epoch": 9.38, + "learning_rate": 4.531143009985735e-05, + "loss": 0.0044, + "step": 52610 + }, + { + "epoch": 9.38, + "learning_rate": 4.531053851640514e-05, + "loss": 0.0031, + "step": 52620 + }, + { + "epoch": 9.38, + "learning_rate": 4.530964693295292e-05, + "loss": 0.0075, + "step": 52630 + }, + { + "epoch": 9.39, + "learning_rate": 4.5308755349500715e-05, + "loss": 0.0036, + "step": 52640 + }, + { + "epoch": 9.39, + "learning_rate": 4.5307863766048506e-05, + "loss": 0.0061, + "step": 52650 + }, + { + "epoch": 9.39, + "learning_rate": 4.530697218259629e-05, + "loss": 0.0043, + "step": 52660 + }, + { + "epoch": 9.39, + "learning_rate": 4.530608059914408e-05, + "loss": 0.0049, + "step": 52670 + }, + { + "epoch": 9.39, + "learning_rate": 4.530518901569187e-05, + "loss": 0.0032, + "step": 52680 + }, + { + "epoch": 9.4, + "learning_rate": 4.5304297432239664e-05, + "loss": 0.0045, + "step": 52690 + }, + { + "epoch": 9.4, + "learning_rate": 4.530340584878745e-05, + "loss": 0.0084, + "step": 52700 + }, + { + "epoch": 9.4, + "learning_rate": 4.530251426533524e-05, + "loss": 0.0046, + "step": 52710 + }, + { + "epoch": 9.4, + "learning_rate": 4.5301622681883024e-05, + "loss": 0.0031, + "step": 52720 + }, + { + "epoch": 9.4, + "learning_rate": 4.5300731098430815e-05, + "loss": 0.0032, + "step": 52730 + }, + { + "epoch": 9.4, + "learning_rate": 4.5299839514978606e-05, + "loss": 0.0059, + "step": 52740 + }, + { + "epoch": 9.41, + "learning_rate": 4.529894793152639e-05, + "loss": 0.0067, + "step": 52750 + }, + { + "epoch": 9.41, + "learning_rate": 4.529805634807418e-05, + "loss": 0.0036, + "step": 52760 + }, + { + "epoch": 9.41, + "learning_rate": 4.5297164764621966e-05, + "loss": 0.0035, + "step": 52770 + }, + { + "epoch": 9.41, + "learning_rate": 4.529627318116976e-05, + "loss": 0.0054, + "step": 52780 + }, + { + "epoch": 9.41, + "learning_rate": 4.529538159771755e-05, + "loss": 0.006, + "step": 52790 + }, + { + "epoch": 9.42, + "learning_rate": 4.529449001426534e-05, + "loss": 0.003, + "step": 52800 + }, + { + "epoch": 9.42, + "learning_rate": 4.5293598430813124e-05, + "loss": 0.0038, + "step": 52810 + }, + { + "epoch": 9.42, + "learning_rate": 4.5292706847360916e-05, + "loss": 0.0043, + "step": 52820 + }, + { + "epoch": 9.42, + "learning_rate": 4.529181526390871e-05, + "loss": 0.0046, + "step": 52830 + }, + { + "epoch": 9.42, + "learning_rate": 4.529092368045649e-05, + "loss": 0.0029, + "step": 52840 + }, + { + "epoch": 9.42, + "learning_rate": 4.529003209700428e-05, + "loss": 0.0049, + "step": 52850 + }, + { + "epoch": 9.43, + "learning_rate": 4.528914051355207e-05, + "loss": 0.0053, + "step": 52860 + }, + { + "epoch": 9.43, + "learning_rate": 4.528824893009986e-05, + "loss": 0.0083, + "step": 52870 + }, + { + "epoch": 9.43, + "learning_rate": 4.528735734664765e-05, + "loss": 0.0049, + "step": 52880 + }, + { + "epoch": 9.43, + "learning_rate": 4.5286465763195434e-05, + "loss": 0.0039, + "step": 52890 + }, + { + "epoch": 9.43, + "learning_rate": 4.528557417974323e-05, + "loss": 0.0032, + "step": 52900 + }, + { + "epoch": 9.43, + "learning_rate": 4.5284682596291016e-05, + "loss": 0.0052, + "step": 52910 + }, + { + "epoch": 9.44, + "learning_rate": 4.528379101283881e-05, + "loss": 0.0034, + "step": 52920 + }, + { + "epoch": 9.44, + "learning_rate": 4.528289942938659e-05, + "loss": 0.0071, + "step": 52930 + }, + { + "epoch": 9.44, + "learning_rate": 4.528200784593438e-05, + "loss": 0.0041, + "step": 52940 + }, + { + "epoch": 9.44, + "learning_rate": 4.528111626248217e-05, + "loss": 0.006, + "step": 52950 + }, + { + "epoch": 9.44, + "learning_rate": 4.528022467902996e-05, + "loss": 0.0037, + "step": 52960 + }, + { + "epoch": 9.45, + "learning_rate": 4.527933309557775e-05, + "loss": 0.0036, + "step": 52970 + }, + { + "epoch": 9.45, + "learning_rate": 4.5278441512125534e-05, + "loss": 0.0036, + "step": 52980 + }, + { + "epoch": 9.45, + "learning_rate": 4.5277549928673326e-05, + "loss": 0.0054, + "step": 52990 + }, + { + "epoch": 9.45, + "learning_rate": 4.527665834522111e-05, + "loss": 0.0033, + "step": 53000 + }, + { + "epoch": 9.45, + "learning_rate": 4.527576676176891e-05, + "loss": 0.0028, + "step": 53010 + }, + { + "epoch": 9.45, + "learning_rate": 4.527487517831669e-05, + "loss": 0.0044, + "step": 53020 + }, + { + "epoch": 9.46, + "learning_rate": 4.5273983594864484e-05, + "loss": 0.0059, + "step": 53030 + }, + { + "epoch": 9.46, + "learning_rate": 4.527309201141227e-05, + "loss": 0.0062, + "step": 53040 + }, + { + "epoch": 9.46, + "learning_rate": 4.527220042796006e-05, + "loss": 0.0047, + "step": 53050 + }, + { + "epoch": 9.46, + "learning_rate": 4.527130884450785e-05, + "loss": 0.0062, + "step": 53060 + }, + { + "epoch": 9.46, + "learning_rate": 4.5270417261055635e-05, + "loss": 0.0036, + "step": 53070 + }, + { + "epoch": 9.47, + "learning_rate": 4.5269525677603426e-05, + "loss": 0.0038, + "step": 53080 + }, + { + "epoch": 9.47, + "learning_rate": 4.526863409415121e-05, + "loss": 0.0074, + "step": 53090 + }, + { + "epoch": 9.47, + "learning_rate": 4.5267742510699e-05, + "loss": 0.0039, + "step": 53100 + }, + { + "epoch": 9.47, + "learning_rate": 4.526685092724679e-05, + "loss": 0.0029, + "step": 53110 + }, + { + "epoch": 9.47, + "learning_rate": 4.5265959343794584e-05, + "loss": 0.004, + "step": 53120 + }, + { + "epoch": 9.47, + "learning_rate": 4.5265067760342375e-05, + "loss": 0.003, + "step": 53130 + }, + { + "epoch": 9.48, + "learning_rate": 4.526417617689016e-05, + "loss": 0.0033, + "step": 53140 + }, + { + "epoch": 9.48, + "learning_rate": 4.526328459343795e-05, + "loss": 0.0055, + "step": 53150 + }, + { + "epoch": 9.48, + "learning_rate": 4.5262393009985735e-05, + "loss": 0.0037, + "step": 53160 + }, + { + "epoch": 9.48, + "learning_rate": 4.526150142653353e-05, + "loss": 0.0042, + "step": 53170 + }, + { + "epoch": 9.48, + "learning_rate": 4.526060984308131e-05, + "loss": 0.0032, + "step": 53180 + }, + { + "epoch": 9.48, + "learning_rate": 4.52597182596291e-05, + "loss": 0.0041, + "step": 53190 + }, + { + "epoch": 9.49, + "learning_rate": 4.5258826676176894e-05, + "loss": 0.0036, + "step": 53200 + }, + { + "epoch": 9.49, + "learning_rate": 4.525793509272468e-05, + "loss": 0.0052, + "step": 53210 + }, + { + "epoch": 9.49, + "learning_rate": 4.525704350927247e-05, + "loss": 0.0074, + "step": 53220 + }, + { + "epoch": 9.49, + "learning_rate": 4.525615192582026e-05, + "loss": 0.0046, + "step": 53230 + }, + { + "epoch": 9.49, + "learning_rate": 4.525526034236805e-05, + "loss": 0.0036, + "step": 53240 + }, + { + "epoch": 9.5, + "learning_rate": 4.5254368758915836e-05, + "loss": 0.002, + "step": 53250 + }, + { + "epoch": 9.5, + "learning_rate": 4.525347717546363e-05, + "loss": 0.0047, + "step": 53260 + }, + { + "epoch": 9.5, + "learning_rate": 4.525258559201141e-05, + "loss": 0.004, + "step": 53270 + }, + { + "epoch": 9.5, + "learning_rate": 4.52516940085592e-05, + "loss": 0.0055, + "step": 53280 + }, + { + "epoch": 9.5, + "learning_rate": 4.5250802425106994e-05, + "loss": 0.0047, + "step": 53290 + }, + { + "epoch": 9.5, + "learning_rate": 4.524991084165478e-05, + "loss": 0.0041, + "step": 53300 + }, + { + "epoch": 9.51, + "learning_rate": 4.524901925820257e-05, + "loss": 0.0055, + "step": 53310 + }, + { + "epoch": 9.51, + "learning_rate": 4.5248127674750354e-05, + "loss": 0.0041, + "step": 53320 + }, + { + "epoch": 9.51, + "learning_rate": 4.5247236091298145e-05, + "loss": 0.0032, + "step": 53330 + }, + { + "epoch": 9.51, + "learning_rate": 4.5246344507845937e-05, + "loss": 0.0028, + "step": 53340 + }, + { + "epoch": 9.51, + "learning_rate": 4.524545292439373e-05, + "loss": 0.0048, + "step": 53350 + }, + { + "epoch": 9.51, + "learning_rate": 4.524456134094152e-05, + "loss": 0.0066, + "step": 53360 + }, + { + "epoch": 9.52, + "learning_rate": 4.5243669757489303e-05, + "loss": 0.0031, + "step": 53370 + }, + { + "epoch": 9.52, + "learning_rate": 4.5242778174037095e-05, + "loss": 0.0039, + "step": 53380 + }, + { + "epoch": 9.52, + "learning_rate": 4.524188659058488e-05, + "loss": 0.0041, + "step": 53390 + }, + { + "epoch": 9.52, + "learning_rate": 4.524099500713267e-05, + "loss": 0.004, + "step": 53400 + }, + { + "epoch": 9.52, + "learning_rate": 4.5240103423680455e-05, + "loss": 0.0065, + "step": 53410 + }, + { + "epoch": 9.53, + "learning_rate": 4.5239211840228246e-05, + "loss": 0.0064, + "step": 53420 + }, + { + "epoch": 9.53, + "learning_rate": 4.523832025677604e-05, + "loss": 0.0035, + "step": 53430 + }, + { + "epoch": 9.53, + "learning_rate": 4.523742867332382e-05, + "loss": 0.0031, + "step": 53440 + }, + { + "epoch": 9.53, + "learning_rate": 4.523653708987162e-05, + "loss": 0.0032, + "step": 53450 + }, + { + "epoch": 9.53, + "learning_rate": 4.5235645506419404e-05, + "loss": 0.006, + "step": 53460 + }, + { + "epoch": 9.53, + "learning_rate": 4.5234753922967195e-05, + "loss": 0.0044, + "step": 53470 + }, + { + "epoch": 9.54, + "learning_rate": 4.523386233951498e-05, + "loss": 0.005, + "step": 53480 + }, + { + "epoch": 9.54, + "learning_rate": 4.523297075606277e-05, + "loss": 0.0042, + "step": 53490 + }, + { + "epoch": 9.54, + "learning_rate": 4.5232079172610555e-05, + "loss": 0.0051, + "step": 53500 + }, + { + "epoch": 9.54, + "learning_rate": 4.5231187589158347e-05, + "loss": 0.0033, + "step": 53510 + }, + { + "epoch": 9.54, + "learning_rate": 4.523029600570614e-05, + "loss": 0.004, + "step": 53520 + }, + { + "epoch": 9.55, + "learning_rate": 4.522940442225392e-05, + "loss": 0.0028, + "step": 53530 + }, + { + "epoch": 9.55, + "learning_rate": 4.522851283880171e-05, + "loss": 0.0027, + "step": 53540 + }, + { + "epoch": 9.55, + "learning_rate": 4.52276212553495e-05, + "loss": 0.0045, + "step": 53550 + }, + { + "epoch": 9.55, + "learning_rate": 4.5226729671897296e-05, + "loss": 0.0034, + "step": 53560 + }, + { + "epoch": 9.55, + "learning_rate": 4.522583808844508e-05, + "loss": 0.004, + "step": 53570 + }, + { + "epoch": 9.55, + "learning_rate": 4.522494650499287e-05, + "loss": 0.003, + "step": 53580 + }, + { + "epoch": 9.56, + "learning_rate": 4.522405492154066e-05, + "loss": 0.0045, + "step": 53590 + }, + { + "epoch": 9.56, + "learning_rate": 4.522316333808845e-05, + "loss": 0.004, + "step": 53600 + }, + { + "epoch": 9.56, + "learning_rate": 4.522227175463624e-05, + "loss": 0.0036, + "step": 53610 + }, + { + "epoch": 9.56, + "learning_rate": 4.522138017118402e-05, + "loss": 0.0031, + "step": 53620 + }, + { + "epoch": 9.56, + "learning_rate": 4.5220488587731814e-05, + "loss": 0.0044, + "step": 53630 + }, + { + "epoch": 9.56, + "learning_rate": 4.52195970042796e-05, + "loss": 0.004, + "step": 53640 + }, + { + "epoch": 9.57, + "learning_rate": 4.521870542082739e-05, + "loss": 0.0045, + "step": 53650 + }, + { + "epoch": 9.57, + "learning_rate": 4.521781383737518e-05, + "loss": 0.0047, + "step": 53660 + }, + { + "epoch": 9.57, + "learning_rate": 4.521692225392297e-05, + "loss": 0.0045, + "step": 53670 + }, + { + "epoch": 9.57, + "learning_rate": 4.521603067047076e-05, + "loss": 0.0043, + "step": 53680 + }, + { + "epoch": 9.57, + "learning_rate": 4.521513908701855e-05, + "loss": 0.0043, + "step": 53690 + }, + { + "epoch": 9.58, + "learning_rate": 4.521424750356634e-05, + "loss": 0.0042, + "step": 53700 + }, + { + "epoch": 9.58, + "learning_rate": 4.521335592011412e-05, + "loss": 0.0037, + "step": 53710 + }, + { + "epoch": 9.58, + "learning_rate": 4.5212464336661914e-05, + "loss": 0.0036, + "step": 53720 + }, + { + "epoch": 9.58, + "learning_rate": 4.52115727532097e-05, + "loss": 0.002, + "step": 53730 + }, + { + "epoch": 9.58, + "learning_rate": 4.521068116975749e-05, + "loss": 0.0047, + "step": 53740 + }, + { + "epoch": 9.58, + "learning_rate": 4.520978958630528e-05, + "loss": 0.0063, + "step": 53750 + }, + { + "epoch": 9.59, + "learning_rate": 4.5208898002853066e-05, + "loss": 0.0046, + "step": 53760 + }, + { + "epoch": 9.59, + "learning_rate": 4.520800641940086e-05, + "loss": 0.0052, + "step": 53770 + }, + { + "epoch": 9.59, + "learning_rate": 4.520711483594865e-05, + "loss": 0.0037, + "step": 53780 + }, + { + "epoch": 9.59, + "learning_rate": 4.520622325249644e-05, + "loss": 0.0032, + "step": 53790 + }, + { + "epoch": 9.59, + "learning_rate": 4.5205331669044224e-05, + "loss": 0.0056, + "step": 53800 + }, + { + "epoch": 9.6, + "learning_rate": 4.5204440085592015e-05, + "loss": 0.0031, + "step": 53810 + }, + { + "epoch": 9.6, + "learning_rate": 4.5203548502139806e-05, + "loss": 0.0036, + "step": 53820 + }, + { + "epoch": 9.6, + "learning_rate": 4.520265691868759e-05, + "loss": 0.0046, + "step": 53830 + }, + { + "epoch": 9.6, + "learning_rate": 4.520176533523538e-05, + "loss": 0.0062, + "step": 53840 + }, + { + "epoch": 9.6, + "learning_rate": 4.5200873751783166e-05, + "loss": 0.0041, + "step": 53850 + }, + { + "epoch": 9.6, + "learning_rate": 4.519998216833096e-05, + "loss": 0.0046, + "step": 53860 + }, + { + "epoch": 9.61, + "learning_rate": 4.519909058487874e-05, + "loss": 0.0056, + "step": 53870 + }, + { + "epoch": 9.61, + "learning_rate": 4.519819900142653e-05, + "loss": 0.0044, + "step": 53880 + }, + { + "epoch": 9.61, + "learning_rate": 4.5197307417974324e-05, + "loss": 0.0053, + "step": 53890 + }, + { + "epoch": 9.61, + "learning_rate": 4.5196415834522116e-05, + "loss": 0.0039, + "step": 53900 + }, + { + "epoch": 9.61, + "learning_rate": 4.519552425106991e-05, + "loss": 0.0027, + "step": 53910 + }, + { + "epoch": 9.61, + "learning_rate": 4.519463266761769e-05, + "loss": 0.0069, + "step": 53920 + }, + { + "epoch": 9.62, + "learning_rate": 4.519374108416548e-05, + "loss": 0.006, + "step": 53930 + }, + { + "epoch": 9.62, + "learning_rate": 4.519284950071327e-05, + "loss": 0.0058, + "step": 53940 + }, + { + "epoch": 9.62, + "learning_rate": 4.519195791726106e-05, + "loss": 0.0033, + "step": 53950 + }, + { + "epoch": 9.62, + "learning_rate": 4.519106633380884e-05, + "loss": 0.0029, + "step": 53960 + }, + { + "epoch": 9.62, + "learning_rate": 4.5190174750356634e-05, + "loss": 0.0041, + "step": 53970 + }, + { + "epoch": 9.63, + "learning_rate": 4.5189283166904425e-05, + "loss": 0.0038, + "step": 53980 + }, + { + "epoch": 9.63, + "learning_rate": 4.518839158345221e-05, + "loss": 0.0029, + "step": 53990 + }, + { + "epoch": 9.63, + "learning_rate": 4.518750000000001e-05, + "loss": 0.0042, + "step": 54000 + }, + { + "epoch": 9.63, + "learning_rate": 4.518660841654779e-05, + "loss": 0.0057, + "step": 54010 + }, + { + "epoch": 9.63, + "learning_rate": 4.518571683309558e-05, + "loss": 0.0033, + "step": 54020 + }, + { + "epoch": 9.63, + "learning_rate": 4.518482524964337e-05, + "loss": 0.004, + "step": 54030 + }, + { + "epoch": 9.64, + "learning_rate": 4.518393366619116e-05, + "loss": 0.0075, + "step": 54040 + }, + { + "epoch": 9.64, + "learning_rate": 4.518304208273895e-05, + "loss": 0.0038, + "step": 54050 + }, + { + "epoch": 9.64, + "learning_rate": 4.5182150499286734e-05, + "loss": 0.0035, + "step": 54060 + }, + { + "epoch": 9.64, + "learning_rate": 4.5181258915834526e-05, + "loss": 0.0048, + "step": 54070 + }, + { + "epoch": 9.64, + "learning_rate": 4.518036733238231e-05, + "loss": 0.005, + "step": 54080 + }, + { + "epoch": 9.65, + "learning_rate": 4.51794757489301e-05, + "loss": 0.0048, + "step": 54090 + }, + { + "epoch": 9.65, + "learning_rate": 4.5178584165477886e-05, + "loss": 0.0061, + "step": 54100 + }, + { + "epoch": 9.65, + "learning_rate": 4.5177692582025684e-05, + "loss": 0.0068, + "step": 54110 + }, + { + "epoch": 9.65, + "learning_rate": 4.517680099857347e-05, + "loss": 0.004, + "step": 54120 + }, + { + "epoch": 9.65, + "learning_rate": 4.517590941512126e-05, + "loss": 0.0051, + "step": 54130 + }, + { + "epoch": 9.65, + "learning_rate": 4.517501783166905e-05, + "loss": 0.0042, + "step": 54140 + }, + { + "epoch": 9.66, + "learning_rate": 4.5174126248216835e-05, + "loss": 0.0041, + "step": 54150 + }, + { + "epoch": 9.66, + "learning_rate": 4.5173234664764626e-05, + "loss": 0.0056, + "step": 54160 + }, + { + "epoch": 9.66, + "learning_rate": 4.517234308131241e-05, + "loss": 0.0045, + "step": 54170 + }, + { + "epoch": 9.66, + "learning_rate": 4.51714514978602e-05, + "loss": 0.003, + "step": 54180 + }, + { + "epoch": 9.66, + "learning_rate": 4.5170559914407986e-05, + "loss": 0.0021, + "step": 54190 + }, + { + "epoch": 9.66, + "learning_rate": 4.516966833095578e-05, + "loss": 0.0045, + "step": 54200 + }, + { + "epoch": 9.67, + "learning_rate": 4.516877674750357e-05, + "loss": 0.0041, + "step": 54210 + }, + { + "epoch": 9.67, + "learning_rate": 4.516788516405136e-05, + "loss": 0.0047, + "step": 54220 + }, + { + "epoch": 9.67, + "learning_rate": 4.516699358059915e-05, + "loss": 0.0042, + "step": 54230 + }, + { + "epoch": 9.67, + "learning_rate": 4.5166101997146935e-05, + "loss": 0.0026, + "step": 54240 + }, + { + "epoch": 9.67, + "learning_rate": 4.516521041369473e-05, + "loss": 0.0032, + "step": 54250 + }, + { + "epoch": 9.68, + "learning_rate": 4.516431883024251e-05, + "loss": 0.0044, + "step": 54260 + }, + { + "epoch": 9.68, + "learning_rate": 4.51634272467903e-05, + "loss": 0.0063, + "step": 54270 + }, + { + "epoch": 9.68, + "learning_rate": 4.5162535663338094e-05, + "loss": 0.0048, + "step": 54280 + }, + { + "epoch": 9.68, + "learning_rate": 4.516164407988588e-05, + "loss": 0.0044, + "step": 54290 + }, + { + "epoch": 9.68, + "learning_rate": 4.516075249643367e-05, + "loss": 0.0055, + "step": 54300 + }, + { + "epoch": 9.68, + "learning_rate": 4.5159860912981454e-05, + "loss": 0.0082, + "step": 54310 + }, + { + "epoch": 9.69, + "learning_rate": 4.5158969329529245e-05, + "loss": 0.0057, + "step": 54320 + }, + { + "epoch": 9.69, + "learning_rate": 4.5158077746077036e-05, + "loss": 0.0052, + "step": 54330 + }, + { + "epoch": 9.69, + "learning_rate": 4.515718616262483e-05, + "loss": 0.0049, + "step": 54340 + }, + { + "epoch": 9.69, + "learning_rate": 4.515629457917261e-05, + "loss": 0.0075, + "step": 54350 + }, + { + "epoch": 9.69, + "learning_rate": 4.51554029957204e-05, + "loss": 0.0045, + "step": 54360 + }, + { + "epoch": 9.7, + "learning_rate": 4.5154511412268194e-05, + "loss": 0.0056, + "step": 54370 + }, + { + "epoch": 9.7, + "learning_rate": 4.515361982881598e-05, + "loss": 0.0037, + "step": 54380 + }, + { + "epoch": 9.7, + "learning_rate": 4.515272824536377e-05, + "loss": 0.0069, + "step": 54390 + }, + { + "epoch": 9.7, + "learning_rate": 4.5151836661911554e-05, + "loss": 0.0054, + "step": 54400 + }, + { + "epoch": 9.7, + "learning_rate": 4.5150945078459345e-05, + "loss": 0.0054, + "step": 54410 + }, + { + "epoch": 9.7, + "learning_rate": 4.515005349500713e-05, + "loss": 0.0055, + "step": 54420 + }, + { + "epoch": 9.71, + "learning_rate": 4.514916191155492e-05, + "loss": 0.0046, + "step": 54430 + }, + { + "epoch": 9.71, + "learning_rate": 4.514827032810271e-05, + "loss": 0.0067, + "step": 54440 + }, + { + "epoch": 9.71, + "learning_rate": 4.5147378744650503e-05, + "loss": 0.0065, + "step": 54450 + }, + { + "epoch": 9.71, + "learning_rate": 4.5146487161198295e-05, + "loss": 0.0038, + "step": 54460 + }, + { + "epoch": 9.71, + "learning_rate": 4.514559557774608e-05, + "loss": 0.0032, + "step": 54470 + }, + { + "epoch": 9.71, + "learning_rate": 4.514470399429387e-05, + "loss": 0.0025, + "step": 54480 + }, + { + "epoch": 9.72, + "learning_rate": 4.5143812410841655e-05, + "loss": 0.0049, + "step": 54490 + }, + { + "epoch": 9.72, + "learning_rate": 4.5142920827389446e-05, + "loss": 0.0046, + "step": 54500 + }, + { + "epoch": 9.72, + "learning_rate": 4.514202924393724e-05, + "loss": 0.006, + "step": 54510 + }, + { + "epoch": 9.72, + "learning_rate": 4.514113766048502e-05, + "loss": 0.0039, + "step": 54520 + }, + { + "epoch": 9.72, + "learning_rate": 4.514024607703281e-05, + "loss": 0.0024, + "step": 54530 + }, + { + "epoch": 9.73, + "learning_rate": 4.51393544935806e-05, + "loss": 0.0029, + "step": 54540 + }, + { + "epoch": 9.73, + "learning_rate": 4.5138462910128395e-05, + "loss": 0.0031, + "step": 54550 + }, + { + "epoch": 9.73, + "learning_rate": 4.513757132667618e-05, + "loss": 0.0035, + "step": 54560 + }, + { + "epoch": 9.73, + "learning_rate": 4.513667974322397e-05, + "loss": 0.0032, + "step": 54570 + }, + { + "epoch": 9.73, + "learning_rate": 4.5135788159771755e-05, + "loss": 0.0032, + "step": 54580 + }, + { + "epoch": 9.73, + "learning_rate": 4.5134896576319546e-05, + "loss": 0.0054, + "step": 54590 + }, + { + "epoch": 9.74, + "learning_rate": 4.513400499286734e-05, + "loss": 0.0066, + "step": 54600 + }, + { + "epoch": 9.74, + "learning_rate": 4.513311340941512e-05, + "loss": 0.0037, + "step": 54610 + }, + { + "epoch": 9.74, + "learning_rate": 4.513222182596291e-05, + "loss": 0.0048, + "step": 54620 + }, + { + "epoch": 9.74, + "learning_rate": 4.51313302425107e-05, + "loss": 0.0059, + "step": 54630 + }, + { + "epoch": 9.74, + "learning_rate": 4.513043865905849e-05, + "loss": 0.0059, + "step": 54640 + }, + { + "epoch": 9.75, + "learning_rate": 4.5129547075606273e-05, + "loss": 0.0079, + "step": 54650 + }, + { + "epoch": 9.75, + "learning_rate": 4.512865549215407e-05, + "loss": 0.0038, + "step": 54660 + }, + { + "epoch": 9.75, + "learning_rate": 4.5127763908701856e-05, + "loss": 0.0048, + "step": 54670 + }, + { + "epoch": 9.75, + "learning_rate": 4.512687232524965e-05, + "loss": 0.0029, + "step": 54680 + }, + { + "epoch": 9.75, + "learning_rate": 4.512598074179744e-05, + "loss": 0.0053, + "step": 54690 + }, + { + "epoch": 9.75, + "learning_rate": 4.512508915834522e-05, + "loss": 0.0049, + "step": 54700 + }, + { + "epoch": 9.76, + "learning_rate": 4.5124197574893014e-05, + "loss": 0.004, + "step": 54710 + }, + { + "epoch": 9.76, + "learning_rate": 4.51233059914408e-05, + "loss": 0.0028, + "step": 54720 + }, + { + "epoch": 9.76, + "learning_rate": 4.512241440798859e-05, + "loss": 0.005, + "step": 54730 + }, + { + "epoch": 9.76, + "learning_rate": 4.512152282453638e-05, + "loss": 0.0074, + "step": 54740 + }, + { + "epoch": 9.76, + "learning_rate": 4.5120631241084165e-05, + "loss": 0.004, + "step": 54750 + }, + { + "epoch": 9.76, + "learning_rate": 4.5119739657631956e-05, + "loss": 0.0047, + "step": 54760 + }, + { + "epoch": 9.77, + "learning_rate": 4.511884807417975e-05, + "loss": 0.0069, + "step": 54770 + }, + { + "epoch": 9.77, + "learning_rate": 4.511795649072754e-05, + "loss": 0.0049, + "step": 54780 + }, + { + "epoch": 9.77, + "learning_rate": 4.511706490727532e-05, + "loss": 0.0049, + "step": 54790 + }, + { + "epoch": 9.77, + "learning_rate": 4.5116173323823114e-05, + "loss": 0.0028, + "step": 54800 + }, + { + "epoch": 9.77, + "learning_rate": 4.51152817403709e-05, + "loss": 0.0047, + "step": 54810 + }, + { + "epoch": 9.78, + "learning_rate": 4.511439015691869e-05, + "loss": 0.0041, + "step": 54820 + }, + { + "epoch": 9.78, + "learning_rate": 4.511349857346648e-05, + "loss": 0.0051, + "step": 54830 + }, + { + "epoch": 9.78, + "learning_rate": 4.5112606990014266e-05, + "loss": 0.0034, + "step": 54840 + }, + { + "epoch": 9.78, + "learning_rate": 4.511171540656206e-05, + "loss": 0.0037, + "step": 54850 + }, + { + "epoch": 9.78, + "learning_rate": 4.511082382310984e-05, + "loss": 0.0036, + "step": 54860 + }, + { + "epoch": 9.78, + "learning_rate": 4.510993223965763e-05, + "loss": 0.0031, + "step": 54870 + }, + { + "epoch": 9.79, + "learning_rate": 4.5109040656205424e-05, + "loss": 0.0042, + "step": 54880 + }, + { + "epoch": 9.79, + "learning_rate": 4.5108149072753215e-05, + "loss": 0.0039, + "step": 54890 + }, + { + "epoch": 9.79, + "learning_rate": 4.5107257489301e-05, + "loss": 0.0033, + "step": 54900 + }, + { + "epoch": 9.79, + "learning_rate": 4.510636590584879e-05, + "loss": 0.0047, + "step": 54910 + }, + { + "epoch": 9.79, + "learning_rate": 4.510547432239658e-05, + "loss": 0.0026, + "step": 54920 + }, + { + "epoch": 9.79, + "learning_rate": 4.5104582738944366e-05, + "loss": 0.0043, + "step": 54930 + }, + { + "epoch": 9.8, + "learning_rate": 4.510369115549216e-05, + "loss": 0.0052, + "step": 54940 + }, + { + "epoch": 9.8, + "learning_rate": 4.510279957203994e-05, + "loss": 0.0067, + "step": 54950 + }, + { + "epoch": 9.8, + "learning_rate": 4.510190798858773e-05, + "loss": 0.0027, + "step": 54960 + }, + { + "epoch": 9.8, + "learning_rate": 4.5101016405135524e-05, + "loss": 0.0042, + "step": 54970 + }, + { + "epoch": 9.8, + "learning_rate": 4.510012482168331e-05, + "loss": 0.0048, + "step": 54980 + }, + { + "epoch": 9.81, + "learning_rate": 4.50992332382311e-05, + "loss": 0.0078, + "step": 54990 + }, + { + "epoch": 9.81, + "learning_rate": 4.509834165477889e-05, + "loss": 0.0058, + "step": 55000 + }, + { + "epoch": 9.81, + "learning_rate": 4.509745007132668e-05, + "loss": 0.0031, + "step": 55010 + }, + { + "epoch": 9.81, + "learning_rate": 4.509655848787447e-05, + "loss": 0.003, + "step": 55020 + }, + { + "epoch": 9.81, + "learning_rate": 4.509566690442226e-05, + "loss": 0.0028, + "step": 55030 + }, + { + "epoch": 9.81, + "learning_rate": 4.509477532097004e-05, + "loss": 0.006, + "step": 55040 + }, + { + "epoch": 9.82, + "learning_rate": 4.5093883737517834e-05, + "loss": 0.0039, + "step": 55050 + }, + { + "epoch": 9.82, + "learning_rate": 4.5092992154065625e-05, + "loss": 0.0052, + "step": 55060 + }, + { + "epoch": 9.82, + "learning_rate": 4.509210057061341e-05, + "loss": 0.0044, + "step": 55070 + }, + { + "epoch": 9.82, + "learning_rate": 4.50912089871612e-05, + "loss": 0.0046, + "step": 55080 + }, + { + "epoch": 9.82, + "learning_rate": 4.5090317403708985e-05, + "loss": 0.0065, + "step": 55090 + }, + { + "epoch": 9.83, + "learning_rate": 4.508942582025678e-05, + "loss": 0.0053, + "step": 55100 + }, + { + "epoch": 9.83, + "learning_rate": 4.508853423680457e-05, + "loss": 0.0058, + "step": 55110 + }, + { + "epoch": 9.83, + "learning_rate": 4.508764265335236e-05, + "loss": 0.003, + "step": 55120 + }, + { + "epoch": 9.83, + "learning_rate": 4.508675106990014e-05, + "loss": 0.0046, + "step": 55130 + }, + { + "epoch": 9.83, + "learning_rate": 4.5085859486447934e-05, + "loss": 0.0089, + "step": 55140 + }, + { + "epoch": 9.83, + "learning_rate": 4.5084967902995725e-05, + "loss": 0.0048, + "step": 55150 + }, + { + "epoch": 9.84, + "learning_rate": 4.508407631954351e-05, + "loss": 0.0051, + "step": 55160 + }, + { + "epoch": 9.84, + "learning_rate": 4.50831847360913e-05, + "loss": 0.0052, + "step": 55170 + }, + { + "epoch": 9.84, + "learning_rate": 4.5082293152639086e-05, + "loss": 0.0048, + "step": 55180 + }, + { + "epoch": 9.84, + "learning_rate": 4.508140156918688e-05, + "loss": 0.002, + "step": 55190 + }, + { + "epoch": 9.84, + "learning_rate": 4.508050998573467e-05, + "loss": 0.0037, + "step": 55200 + }, + { + "epoch": 9.84, + "learning_rate": 4.507961840228245e-05, + "loss": 0.0077, + "step": 55210 + }, + { + "epoch": 9.85, + "learning_rate": 4.5078726818830244e-05, + "loss": 0.0042, + "step": 55220 + }, + { + "epoch": 9.85, + "learning_rate": 4.5077835235378035e-05, + "loss": 0.0046, + "step": 55230 + }, + { + "epoch": 9.85, + "learning_rate": 4.5076943651925826e-05, + "loss": 0.0033, + "step": 55240 + }, + { + "epoch": 9.85, + "learning_rate": 4.507605206847361e-05, + "loss": 0.0058, + "step": 55250 + }, + { + "epoch": 9.85, + "learning_rate": 4.50751604850214e-05, + "loss": 0.0032, + "step": 55260 + }, + { + "epoch": 9.86, + "learning_rate": 4.5074268901569186e-05, + "loss": 0.0028, + "step": 55270 + }, + { + "epoch": 9.86, + "learning_rate": 4.507337731811698e-05, + "loss": 0.0037, + "step": 55280 + }, + { + "epoch": 9.86, + "learning_rate": 4.507248573466477e-05, + "loss": 0.002, + "step": 55290 + }, + { + "epoch": 9.86, + "learning_rate": 4.507159415121255e-05, + "loss": 0.0055, + "step": 55300 + }, + { + "epoch": 9.86, + "learning_rate": 4.5070702567760344e-05, + "loss": 0.0042, + "step": 55310 + }, + { + "epoch": 9.86, + "learning_rate": 4.506981098430813e-05, + "loss": 0.0042, + "step": 55320 + }, + { + "epoch": 9.87, + "learning_rate": 4.506891940085593e-05, + "loss": 0.0079, + "step": 55330 + }, + { + "epoch": 9.87, + "learning_rate": 4.506802781740371e-05, + "loss": 0.0037, + "step": 55340 + }, + { + "epoch": 9.87, + "learning_rate": 4.50671362339515e-05, + "loss": 0.0027, + "step": 55350 + }, + { + "epoch": 9.87, + "learning_rate": 4.506624465049929e-05, + "loss": 0.0033, + "step": 55360 + }, + { + "epoch": 9.87, + "learning_rate": 4.506535306704708e-05, + "loss": 0.0057, + "step": 55370 + }, + { + "epoch": 9.88, + "learning_rate": 4.506446148359487e-05, + "loss": 0.0068, + "step": 55380 + }, + { + "epoch": 9.88, + "learning_rate": 4.5063569900142654e-05, + "loss": 0.0036, + "step": 55390 + }, + { + "epoch": 9.88, + "learning_rate": 4.5062678316690445e-05, + "loss": 0.0056, + "step": 55400 + }, + { + "epoch": 9.88, + "learning_rate": 4.506178673323823e-05, + "loss": 0.0056, + "step": 55410 + }, + { + "epoch": 9.88, + "learning_rate": 4.506089514978602e-05, + "loss": 0.0044, + "step": 55420 + }, + { + "epoch": 9.88, + "learning_rate": 4.506000356633381e-05, + "loss": 0.0062, + "step": 55430 + }, + { + "epoch": 9.89, + "learning_rate": 4.50591119828816e-05, + "loss": 0.0046, + "step": 55440 + }, + { + "epoch": 9.89, + "learning_rate": 4.505822039942939e-05, + "loss": 0.006, + "step": 55450 + }, + { + "epoch": 9.89, + "learning_rate": 4.505732881597718e-05, + "loss": 0.0039, + "step": 55460 + }, + { + "epoch": 9.89, + "learning_rate": 4.505643723252497e-05, + "loss": 0.0029, + "step": 55470 + }, + { + "epoch": 9.89, + "learning_rate": 4.5055545649072754e-05, + "loss": 0.0063, + "step": 55480 + }, + { + "epoch": 9.89, + "learning_rate": 4.5054654065620545e-05, + "loss": 0.0041, + "step": 55490 + }, + { + "epoch": 9.9, + "learning_rate": 4.505376248216833e-05, + "loss": 0.0081, + "step": 55500 + }, + { + "epoch": 9.9, + "learning_rate": 4.505287089871612e-05, + "loss": 0.003, + "step": 55510 + }, + { + "epoch": 9.9, + "learning_rate": 4.505197931526391e-05, + "loss": 0.0058, + "step": 55520 + }, + { + "epoch": 9.9, + "learning_rate": 4.5051087731811697e-05, + "loss": 0.0062, + "step": 55530 + }, + { + "epoch": 9.9, + "learning_rate": 4.505019614835949e-05, + "loss": 0.0071, + "step": 55540 + }, + { + "epoch": 9.91, + "learning_rate": 4.504930456490728e-05, + "loss": 0.0057, + "step": 55550 + }, + { + "epoch": 9.91, + "learning_rate": 4.504841298145507e-05, + "loss": 0.003, + "step": 55560 + }, + { + "epoch": 9.91, + "learning_rate": 4.504761055634807e-05, + "loss": 0.0054, + "step": 55570 + }, + { + "epoch": 9.91, + "learning_rate": 4.504671897289586e-05, + "loss": 0.005, + "step": 55580 + }, + { + "epoch": 9.91, + "learning_rate": 4.5045827389443654e-05, + "loss": 0.0052, + "step": 55590 + }, + { + "epoch": 9.91, + "learning_rate": 4.5044935805991445e-05, + "loss": 0.0048, + "step": 55600 + }, + { + "epoch": 9.92, + "learning_rate": 4.5044044222539236e-05, + "loss": 0.0048, + "step": 55610 + }, + { + "epoch": 9.92, + "learning_rate": 4.504315263908702e-05, + "loss": 0.0033, + "step": 55620 + }, + { + "epoch": 9.92, + "learning_rate": 4.504226105563481e-05, + "loss": 0.0067, + "step": 55630 + }, + { + "epoch": 9.92, + "learning_rate": 4.5041369472182596e-05, + "loss": 0.004, + "step": 55640 + }, + { + "epoch": 9.92, + "learning_rate": 4.504047788873039e-05, + "loss": 0.0034, + "step": 55650 + }, + { + "epoch": 9.93, + "learning_rate": 4.503958630527817e-05, + "loss": 0.0052, + "step": 55660 + }, + { + "epoch": 9.93, + "learning_rate": 4.503869472182596e-05, + "loss": 0.0027, + "step": 55670 + }, + { + "epoch": 9.93, + "learning_rate": 4.5037803138373754e-05, + "loss": 0.0044, + "step": 55680 + }, + { + "epoch": 9.93, + "learning_rate": 4.503691155492154e-05, + "loss": 0.0056, + "step": 55690 + }, + { + "epoch": 9.93, + "learning_rate": 4.503601997146934e-05, + "loss": 0.0057, + "step": 55700 + }, + { + "epoch": 9.93, + "learning_rate": 4.503512838801712e-05, + "loss": 0.0043, + "step": 55710 + }, + { + "epoch": 9.94, + "learning_rate": 4.503423680456491e-05, + "loss": 0.0027, + "step": 55720 + }, + { + "epoch": 9.94, + "learning_rate": 4.50333452211127e-05, + "loss": 0.004, + "step": 55730 + }, + { + "epoch": 9.94, + "learning_rate": 4.503245363766049e-05, + "loss": 0.0042, + "step": 55740 + }, + { + "epoch": 9.94, + "learning_rate": 4.503156205420828e-05, + "loss": 0.0037, + "step": 55750 + }, + { + "epoch": 9.94, + "learning_rate": 4.5030670470756064e-05, + "loss": 0.0028, + "step": 55760 + }, + { + "epoch": 9.94, + "learning_rate": 4.5029778887303855e-05, + "loss": 0.0042, + "step": 55770 + }, + { + "epoch": 9.95, + "learning_rate": 4.502888730385164e-05, + "loss": 0.0044, + "step": 55780 + }, + { + "epoch": 9.95, + "learning_rate": 4.502799572039943e-05, + "loss": 0.006, + "step": 55790 + }, + { + "epoch": 9.95, + "learning_rate": 4.5027104136947215e-05, + "loss": 0.0025, + "step": 55800 + }, + { + "epoch": 9.95, + "learning_rate": 4.502621255349501e-05, + "loss": 0.0041, + "step": 55810 + }, + { + "epoch": 9.95, + "learning_rate": 4.50253209700428e-05, + "loss": 0.0031, + "step": 55820 + }, + { + "epoch": 9.96, + "learning_rate": 4.502442938659059e-05, + "loss": 0.0029, + "step": 55830 + }, + { + "epoch": 9.96, + "learning_rate": 4.502353780313838e-05, + "loss": 0.0056, + "step": 55840 + }, + { + "epoch": 9.96, + "learning_rate": 4.5022646219686164e-05, + "loss": 0.0039, + "step": 55850 + }, + { + "epoch": 9.96, + "learning_rate": 4.5021754636233956e-05, + "loss": 0.0032, + "step": 55860 + }, + { + "epoch": 9.96, + "learning_rate": 4.502086305278174e-05, + "loss": 0.0027, + "step": 55870 + }, + { + "epoch": 9.96, + "learning_rate": 4.501997146932953e-05, + "loss": 0.0041, + "step": 55880 + }, + { + "epoch": 9.97, + "learning_rate": 4.5019079885877316e-05, + "loss": 0.005, + "step": 55890 + }, + { + "epoch": 9.97, + "learning_rate": 4.501818830242511e-05, + "loss": 0.0034, + "step": 55900 + }, + { + "epoch": 9.97, + "learning_rate": 4.50172967189729e-05, + "loss": 0.0032, + "step": 55910 + }, + { + "epoch": 9.97, + "learning_rate": 4.501640513552069e-05, + "loss": 0.0042, + "step": 55920 + }, + { + "epoch": 9.97, + "learning_rate": 4.501551355206848e-05, + "loss": 0.0026, + "step": 55930 + }, + { + "epoch": 9.98, + "learning_rate": 4.5014621968616265e-05, + "loss": 0.004, + "step": 55940 + }, + { + "epoch": 9.98, + "learning_rate": 4.5013730385164056e-05, + "loss": 0.0049, + "step": 55950 + }, + { + "epoch": 9.98, + "learning_rate": 4.501283880171184e-05, + "loss": 0.0028, + "step": 55960 + }, + { + "epoch": 9.98, + "learning_rate": 4.501194721825963e-05, + "loss": 0.0081, + "step": 55970 + }, + { + "epoch": 9.98, + "learning_rate": 4.501105563480742e-05, + "loss": 0.0037, + "step": 55980 + }, + { + "epoch": 9.98, + "learning_rate": 4.501016405135521e-05, + "loss": 0.0028, + "step": 55990 + }, + { + "epoch": 9.99, + "learning_rate": 4.5009272467903e-05, + "loss": 0.0047, + "step": 56000 + }, + { + "epoch": 9.99, + "learning_rate": 4.500838088445078e-05, + "loss": 0.0058, + "step": 56010 + }, + { + "epoch": 9.99, + "learning_rate": 4.5007489300998574e-05, + "loss": 0.003, + "step": 56020 + }, + { + "epoch": 9.99, + "learning_rate": 4.5006597717546365e-05, + "loss": 0.0042, + "step": 56030 + }, + { + "epoch": 9.99, + "learning_rate": 4.500570613409416e-05, + "loss": 0.0032, + "step": 56040 + }, + { + "epoch": 9.99, + "learning_rate": 4.500481455064194e-05, + "loss": 0.0035, + "step": 56050 + }, + { + "epoch": 10.0, + "learning_rate": 4.500392296718973e-05, + "loss": 0.0053, + "step": 56060 + }, + { + "epoch": 10.0, + "learning_rate": 4.5003031383737524e-05, + "loss": 0.0055, + "step": 56070 + }, + { + "epoch": 10.0, + "learning_rate": 4.500213980028531e-05, + "loss": 0.0052, + "step": 56080 + }, + { + "epoch": 10.0, + "eval_loss": 0.018937913700938225, + "eval_runtime": 195.8855, + "eval_samples_per_second": 23.682, + "eval_steps_per_second": 2.961, + "step": 56080 + }, + { + "epoch": 10.0, + "learning_rate": 4.50012482168331e-05, + "loss": 0.0038, + "step": 56090 + }, + { + "epoch": 10.0, + "learning_rate": 4.5000356633380884e-05, + "loss": 0.0034, + "step": 56100 + }, + { + "epoch": 10.01, + "learning_rate": 4.4999465049928675e-05, + "loss": 0.003, + "step": 56110 + }, + { + "epoch": 10.01, + "learning_rate": 4.499866262482168e-05, + "loss": 0.005, + "step": 56120 + }, + { + "epoch": 10.01, + "learning_rate": 4.4997771041369474e-05, + "loss": 0.0026, + "step": 56130 + }, + { + "epoch": 10.01, + "learning_rate": 4.4996879457917265e-05, + "loss": 0.0019, + "step": 56140 + }, + { + "epoch": 10.01, + "learning_rate": 4.499598787446505e-05, + "loss": 0.0045, + "step": 56150 + }, + { + "epoch": 10.01, + "learning_rate": 4.499509629101284e-05, + "loss": 0.0028, + "step": 56160 + }, + { + "epoch": 10.02, + "learning_rate": 4.4994204707560625e-05, + "loss": 0.0072, + "step": 56170 + }, + { + "epoch": 10.02, + "learning_rate": 4.4993313124108417e-05, + "loss": 0.0046, + "step": 56180 + }, + { + "epoch": 10.02, + "learning_rate": 4.499242154065621e-05, + "loss": 0.0042, + "step": 56190 + }, + { + "epoch": 10.02, + "learning_rate": 4.4991529957204e-05, + "loss": 0.0057, + "step": 56200 + }, + { + "epoch": 10.02, + "learning_rate": 4.4990638373751783e-05, + "loss": 0.003, + "step": 56210 + }, + { + "epoch": 10.02, + "learning_rate": 4.4989746790299575e-05, + "loss": 0.0032, + "step": 56220 + }, + { + "epoch": 10.03, + "learning_rate": 4.4988855206847366e-05, + "loss": 0.0033, + "step": 56230 + }, + { + "epoch": 10.03, + "learning_rate": 4.498796362339515e-05, + "loss": 0.0024, + "step": 56240 + }, + { + "epoch": 10.03, + "learning_rate": 4.498707203994294e-05, + "loss": 0.0021, + "step": 56250 + }, + { + "epoch": 10.03, + "learning_rate": 4.4986180456490726e-05, + "loss": 0.004, + "step": 56260 + }, + { + "epoch": 10.03, + "learning_rate": 4.498528887303852e-05, + "loss": 0.006, + "step": 56270 + }, + { + "epoch": 10.04, + "learning_rate": 4.498439728958631e-05, + "loss": 0.0047, + "step": 56280 + }, + { + "epoch": 10.04, + "learning_rate": 4.498350570613409e-05, + "loss": 0.0031, + "step": 56290 + }, + { + "epoch": 10.04, + "learning_rate": 4.498261412268189e-05, + "loss": 0.0043, + "step": 56300 + }, + { + "epoch": 10.04, + "learning_rate": 4.4981722539229675e-05, + "loss": 0.0048, + "step": 56310 + }, + { + "epoch": 10.04, + "learning_rate": 4.4980830955777466e-05, + "loss": 0.0029, + "step": 56320 + }, + { + "epoch": 10.04, + "learning_rate": 4.497993937232525e-05, + "loss": 0.0054, + "step": 56330 + }, + { + "epoch": 10.05, + "learning_rate": 4.497904778887304e-05, + "loss": 0.0033, + "step": 56340 + }, + { + "epoch": 10.05, + "learning_rate": 4.4978156205420826e-05, + "loss": 0.0035, + "step": 56350 + }, + { + "epoch": 10.05, + "learning_rate": 4.497726462196862e-05, + "loss": 0.0035, + "step": 56360 + }, + { + "epoch": 10.05, + "learning_rate": 4.497637303851641e-05, + "loss": 0.004, + "step": 56370 + }, + { + "epoch": 10.05, + "learning_rate": 4.497548145506419e-05, + "loss": 0.0023, + "step": 56380 + }, + { + "epoch": 10.06, + "learning_rate": 4.4974589871611985e-05, + "loss": 0.0033, + "step": 56390 + }, + { + "epoch": 10.06, + "learning_rate": 4.497369828815977e-05, + "loss": 0.004, + "step": 56400 + }, + { + "epoch": 10.06, + "learning_rate": 4.497280670470757e-05, + "loss": 0.0034, + "step": 56410 + }, + { + "epoch": 10.06, + "learning_rate": 4.497191512125535e-05, + "loss": 0.0061, + "step": 56420 + }, + { + "epoch": 10.06, + "learning_rate": 4.497102353780314e-05, + "loss": 0.0042, + "step": 56430 + }, + { + "epoch": 10.06, + "learning_rate": 4.497013195435093e-05, + "loss": 0.004, + "step": 56440 + }, + { + "epoch": 10.07, + "learning_rate": 4.496924037089872e-05, + "loss": 0.0059, + "step": 56450 + }, + { + "epoch": 10.07, + "learning_rate": 4.496834878744651e-05, + "loss": 0.004, + "step": 56460 + }, + { + "epoch": 10.07, + "learning_rate": 4.4967457203994294e-05, + "loss": 0.0035, + "step": 56470 + }, + { + "epoch": 10.07, + "learning_rate": 4.4966565620542085e-05, + "loss": 0.0039, + "step": 56480 + }, + { + "epoch": 10.07, + "learning_rate": 4.496567403708987e-05, + "loss": 0.0036, + "step": 56490 + }, + { + "epoch": 10.07, + "learning_rate": 4.496478245363766e-05, + "loss": 0.0032, + "step": 56500 + }, + { + "epoch": 10.08, + "learning_rate": 4.496389087018545e-05, + "loss": 0.0028, + "step": 56510 + }, + { + "epoch": 10.08, + "learning_rate": 4.496299928673324e-05, + "loss": 0.0027, + "step": 56520 + }, + { + "epoch": 10.08, + "learning_rate": 4.4962107703281034e-05, + "loss": 0.0063, + "step": 56530 + }, + { + "epoch": 10.08, + "learning_rate": 4.496121611982882e-05, + "loss": 0.0041, + "step": 56540 + }, + { + "epoch": 10.08, + "learning_rate": 4.496032453637661e-05, + "loss": 0.0028, + "step": 56550 + }, + { + "epoch": 10.09, + "learning_rate": 4.4959432952924394e-05, + "loss": 0.0073, + "step": 56560 + }, + { + "epoch": 10.09, + "learning_rate": 4.4958541369472186e-05, + "loss": 0.0032, + "step": 56570 + }, + { + "epoch": 10.09, + "learning_rate": 4.495764978601997e-05, + "loss": 0.0038, + "step": 56580 + }, + { + "epoch": 10.09, + "learning_rate": 4.495675820256776e-05, + "loss": 0.0057, + "step": 56590 + }, + { + "epoch": 10.09, + "learning_rate": 4.495586661911555e-05, + "loss": 0.0043, + "step": 56600 + }, + { + "epoch": 10.09, + "learning_rate": 4.495497503566334e-05, + "loss": 0.002, + "step": 56610 + }, + { + "epoch": 10.1, + "learning_rate": 4.495408345221113e-05, + "loss": 0.0041, + "step": 56620 + }, + { + "epoch": 10.1, + "learning_rate": 4.495319186875892e-05, + "loss": 0.002, + "step": 56630 + }, + { + "epoch": 10.1, + "learning_rate": 4.495230028530671e-05, + "loss": 0.0038, + "step": 56640 + }, + { + "epoch": 10.1, + "learning_rate": 4.4951408701854495e-05, + "loss": 0.0053, + "step": 56650 + }, + { + "epoch": 10.1, + "learning_rate": 4.4950517118402286e-05, + "loss": 0.0037, + "step": 56660 + }, + { + "epoch": 10.11, + "learning_rate": 4.494962553495007e-05, + "loss": 0.008, + "step": 56670 + }, + { + "epoch": 10.11, + "learning_rate": 4.494873395149786e-05, + "loss": 0.0044, + "step": 56680 + }, + { + "epoch": 10.11, + "learning_rate": 4.494784236804565e-05, + "loss": 0.0044, + "step": 56690 + }, + { + "epoch": 10.11, + "learning_rate": 4.494695078459344e-05, + "loss": 0.0053, + "step": 56700 + }, + { + "epoch": 10.11, + "learning_rate": 4.494605920114123e-05, + "loss": 0.0031, + "step": 56710 + }, + { + "epoch": 10.11, + "learning_rate": 4.494516761768901e-05, + "loss": 0.0057, + "step": 56720 + }, + { + "epoch": 10.12, + "learning_rate": 4.4944276034236804e-05, + "loss": 0.0033, + "step": 56730 + }, + { + "epoch": 10.12, + "learning_rate": 4.4943384450784596e-05, + "loss": 0.0068, + "step": 56740 + }, + { + "epoch": 10.12, + "learning_rate": 4.494249286733239e-05, + "loss": 0.0046, + "step": 56750 + }, + { + "epoch": 10.12, + "learning_rate": 4.4941690442225395e-05, + "loss": 0.0043, + "step": 56760 + }, + { + "epoch": 10.12, + "learning_rate": 4.494079885877318e-05, + "loss": 0.0029, + "step": 56770 + }, + { + "epoch": 10.12, + "learning_rate": 4.493990727532098e-05, + "loss": 0.004, + "step": 56780 + }, + { + "epoch": 10.13, + "learning_rate": 4.493901569186876e-05, + "loss": 0.0026, + "step": 56790 + }, + { + "epoch": 10.13, + "learning_rate": 4.493812410841655e-05, + "loss": 0.0032, + "step": 56800 + }, + { + "epoch": 10.13, + "learning_rate": 4.493723252496434e-05, + "loss": 0.0041, + "step": 56810 + }, + { + "epoch": 10.13, + "learning_rate": 4.493634094151213e-05, + "loss": 0.0031, + "step": 56820 + }, + { + "epoch": 10.13, + "learning_rate": 4.493544935805992e-05, + "loss": 0.0028, + "step": 56830 + }, + { + "epoch": 10.14, + "learning_rate": 4.4934557774607704e-05, + "loss": 0.0041, + "step": 56840 + }, + { + "epoch": 10.14, + "learning_rate": 4.4933666191155495e-05, + "loss": 0.0045, + "step": 56850 + }, + { + "epoch": 10.14, + "learning_rate": 4.493277460770328e-05, + "loss": 0.0041, + "step": 56860 + }, + { + "epoch": 10.14, + "learning_rate": 4.493188302425107e-05, + "loss": 0.0053, + "step": 56870 + }, + { + "epoch": 10.14, + "learning_rate": 4.4930991440798855e-05, + "loss": 0.0018, + "step": 56880 + }, + { + "epoch": 10.14, + "learning_rate": 4.4930099857346653e-05, + "loss": 0.0042, + "step": 56890 + }, + { + "epoch": 10.15, + "learning_rate": 4.492920827389444e-05, + "loss": 0.0025, + "step": 56900 + }, + { + "epoch": 10.15, + "learning_rate": 4.492831669044223e-05, + "loss": 0.0047, + "step": 56910 + }, + { + "epoch": 10.15, + "learning_rate": 4.492742510699002e-05, + "loss": 0.0053, + "step": 56920 + }, + { + "epoch": 10.15, + "learning_rate": 4.4926533523537805e-05, + "loss": 0.0027, + "step": 56930 + }, + { + "epoch": 10.15, + "learning_rate": 4.4925641940085596e-05, + "loss": 0.0039, + "step": 56940 + }, + { + "epoch": 10.16, + "learning_rate": 4.492475035663338e-05, + "loss": 0.004, + "step": 56950 + }, + { + "epoch": 10.16, + "learning_rate": 4.492385877318117e-05, + "loss": 0.0054, + "step": 56960 + }, + { + "epoch": 10.16, + "learning_rate": 4.492296718972896e-05, + "loss": 0.0053, + "step": 56970 + }, + { + "epoch": 10.16, + "learning_rate": 4.492207560627675e-05, + "loss": 0.0046, + "step": 56980 + }, + { + "epoch": 10.16, + "learning_rate": 4.492118402282454e-05, + "loss": 0.0049, + "step": 56990 + }, + { + "epoch": 10.16, + "learning_rate": 4.492029243937233e-05, + "loss": 0.0055, + "step": 57000 + }, + { + "epoch": 10.17, + "learning_rate": 4.491940085592012e-05, + "loss": 0.0021, + "step": 57010 + }, + { + "epoch": 10.17, + "learning_rate": 4.4918509272467905e-05, + "loss": 0.0063, + "step": 57020 + }, + { + "epoch": 10.17, + "learning_rate": 4.4917617689015697e-05, + "loss": 0.0085, + "step": 57030 + }, + { + "epoch": 10.17, + "learning_rate": 4.491672610556348e-05, + "loss": 0.0034, + "step": 57040 + }, + { + "epoch": 10.17, + "learning_rate": 4.491583452211127e-05, + "loss": 0.0013, + "step": 57050 + }, + { + "epoch": 10.17, + "learning_rate": 4.491494293865906e-05, + "loss": 0.0045, + "step": 57060 + }, + { + "epoch": 10.18, + "learning_rate": 4.491405135520685e-05, + "loss": 0.0054, + "step": 57070 + }, + { + "epoch": 10.18, + "learning_rate": 4.491315977175464e-05, + "loss": 0.0047, + "step": 57080 + }, + { + "epoch": 10.18, + "learning_rate": 4.4912268188302423e-05, + "loss": 0.0023, + "step": 57090 + }, + { + "epoch": 10.18, + "learning_rate": 4.4911376604850215e-05, + "loss": 0.0049, + "step": 57100 + }, + { + "epoch": 10.18, + "learning_rate": 4.4910485021398006e-05, + "loss": 0.0064, + "step": 57110 + }, + { + "epoch": 10.19, + "learning_rate": 4.49095934379458e-05, + "loss": 0.0057, + "step": 57120 + }, + { + "epoch": 10.19, + "learning_rate": 4.490870185449358e-05, + "loss": 0.0035, + "step": 57130 + }, + { + "epoch": 10.19, + "learning_rate": 4.490781027104137e-05, + "loss": 0.0043, + "step": 57140 + }, + { + "epoch": 10.19, + "learning_rate": 4.4906918687589164e-05, + "loss": 0.0031, + "step": 57150 + }, + { + "epoch": 10.19, + "learning_rate": 4.490602710413695e-05, + "loss": 0.0055, + "step": 57160 + }, + { + "epoch": 10.19, + "learning_rate": 4.490513552068474e-05, + "loss": 0.0026, + "step": 57170 + }, + { + "epoch": 10.2, + "learning_rate": 4.4904243937232524e-05, + "loss": 0.0027, + "step": 57180 + }, + { + "epoch": 10.2, + "learning_rate": 4.4903352353780315e-05, + "loss": 0.0024, + "step": 57190 + }, + { + "epoch": 10.2, + "learning_rate": 4.4902460770328106e-05, + "loss": 0.0049, + "step": 57200 + }, + { + "epoch": 10.2, + "learning_rate": 4.490156918687589e-05, + "loss": 0.0039, + "step": 57210 + }, + { + "epoch": 10.2, + "learning_rate": 4.490067760342368e-05, + "loss": 0.0043, + "step": 57220 + }, + { + "epoch": 10.21, + "learning_rate": 4.489978601997147e-05, + "loss": 0.0035, + "step": 57230 + }, + { + "epoch": 10.21, + "learning_rate": 4.4898894436519264e-05, + "loss": 0.0028, + "step": 57240 + }, + { + "epoch": 10.21, + "learning_rate": 4.489800285306705e-05, + "loss": 0.0026, + "step": 57250 + }, + { + "epoch": 10.21, + "learning_rate": 4.489711126961484e-05, + "loss": 0.0036, + "step": 57260 + }, + { + "epoch": 10.21, + "learning_rate": 4.4896219686162625e-05, + "loss": 0.0039, + "step": 57270 + }, + { + "epoch": 10.21, + "learning_rate": 4.4895328102710416e-05, + "loss": 0.0027, + "step": 57280 + }, + { + "epoch": 10.22, + "learning_rate": 4.489443651925821e-05, + "loss": 0.0056, + "step": 57290 + }, + { + "epoch": 10.22, + "learning_rate": 4.489354493580599e-05, + "loss": 0.0025, + "step": 57300 + }, + { + "epoch": 10.22, + "learning_rate": 4.489265335235378e-05, + "loss": 0.0051, + "step": 57310 + }, + { + "epoch": 10.22, + "learning_rate": 4.489176176890157e-05, + "loss": 0.0034, + "step": 57320 + }, + { + "epoch": 10.22, + "learning_rate": 4.4890870185449365e-05, + "loss": 0.0063, + "step": 57330 + }, + { + "epoch": 10.22, + "learning_rate": 4.488997860199715e-05, + "loss": 0.006, + "step": 57340 + }, + { + "epoch": 10.23, + "learning_rate": 4.488908701854494e-05, + "loss": 0.0046, + "step": 57350 + }, + { + "epoch": 10.23, + "learning_rate": 4.4888195435092725e-05, + "loss": 0.0033, + "step": 57360 + }, + { + "epoch": 10.23, + "learning_rate": 4.4887303851640516e-05, + "loss": 0.0051, + "step": 57370 + }, + { + "epoch": 10.23, + "learning_rate": 4.488641226818831e-05, + "loss": 0.0043, + "step": 57380 + }, + { + "epoch": 10.23, + "learning_rate": 4.488552068473609e-05, + "loss": 0.0057, + "step": 57390 + }, + { + "epoch": 10.24, + "learning_rate": 4.488462910128388e-05, + "loss": 0.0036, + "step": 57400 + }, + { + "epoch": 10.24, + "learning_rate": 4.488373751783167e-05, + "loss": 0.0035, + "step": 57410 + }, + { + "epoch": 10.24, + "learning_rate": 4.488284593437946e-05, + "loss": 0.0064, + "step": 57420 + }, + { + "epoch": 10.24, + "learning_rate": 4.488195435092724e-05, + "loss": 0.005, + "step": 57430 + }, + { + "epoch": 10.24, + "learning_rate": 4.488106276747504e-05, + "loss": 0.0038, + "step": 57440 + }, + { + "epoch": 10.24, + "learning_rate": 4.4880171184022826e-05, + "loss": 0.0053, + "step": 57450 + }, + { + "epoch": 10.25, + "learning_rate": 4.487927960057062e-05, + "loss": 0.0076, + "step": 57460 + }, + { + "epoch": 10.25, + "learning_rate": 4.487838801711841e-05, + "loss": 0.0048, + "step": 57470 + }, + { + "epoch": 10.25, + "learning_rate": 4.487749643366619e-05, + "loss": 0.0051, + "step": 57480 + }, + { + "epoch": 10.25, + "learning_rate": 4.4876604850213984e-05, + "loss": 0.0063, + "step": 57490 + }, + { + "epoch": 10.25, + "learning_rate": 4.487571326676177e-05, + "loss": 0.0046, + "step": 57500 + }, + { + "epoch": 10.25, + "learning_rate": 4.487482168330956e-05, + "loss": 0.0031, + "step": 57510 + }, + { + "epoch": 10.26, + "learning_rate": 4.487393009985735e-05, + "loss": 0.0059, + "step": 57520 + }, + { + "epoch": 10.26, + "learning_rate": 4.4873038516405135e-05, + "loss": 0.0029, + "step": 57530 + }, + { + "epoch": 10.26, + "learning_rate": 4.4872146932952926e-05, + "loss": 0.0073, + "step": 57540 + }, + { + "epoch": 10.26, + "learning_rate": 4.487125534950072e-05, + "loss": 0.0037, + "step": 57550 + }, + { + "epoch": 10.26, + "learning_rate": 4.487036376604851e-05, + "loss": 0.0039, + "step": 57560 + }, + { + "epoch": 10.27, + "learning_rate": 4.486947218259629e-05, + "loss": 0.0049, + "step": 57570 + }, + { + "epoch": 10.27, + "learning_rate": 4.4868580599144084e-05, + "loss": 0.0049, + "step": 57580 + }, + { + "epoch": 10.27, + "learning_rate": 4.486768901569187e-05, + "loss": 0.0042, + "step": 57590 + }, + { + "epoch": 10.27, + "learning_rate": 4.486679743223966e-05, + "loss": 0.0063, + "step": 57600 + }, + { + "epoch": 10.27, + "learning_rate": 4.486590584878745e-05, + "loss": 0.006, + "step": 57610 + }, + { + "epoch": 10.27, + "learning_rate": 4.4865014265335236e-05, + "loss": 0.0048, + "step": 57620 + }, + { + "epoch": 10.28, + "learning_rate": 4.486412268188303e-05, + "loss": 0.0035, + "step": 57630 + }, + { + "epoch": 10.28, + "learning_rate": 4.486323109843081e-05, + "loss": 0.0038, + "step": 57640 + }, + { + "epoch": 10.28, + "learning_rate": 4.48623395149786e-05, + "loss": 0.0048, + "step": 57650 + }, + { + "epoch": 10.28, + "learning_rate": 4.486144793152639e-05, + "loss": 0.0045, + "step": 57660 + }, + { + "epoch": 10.28, + "learning_rate": 4.4860556348074185e-05, + "loss": 0.0028, + "step": 57670 + }, + { + "epoch": 10.29, + "learning_rate": 4.485966476462197e-05, + "loss": 0.003, + "step": 57680 + }, + { + "epoch": 10.29, + "learning_rate": 4.485877318116976e-05, + "loss": 0.0032, + "step": 57690 + }, + { + "epoch": 10.29, + "learning_rate": 4.485788159771755e-05, + "loss": 0.0038, + "step": 57700 + }, + { + "epoch": 10.29, + "learning_rate": 4.4856990014265336e-05, + "loss": 0.0076, + "step": 57710 + }, + { + "epoch": 10.29, + "learning_rate": 4.485609843081313e-05, + "loss": 0.0042, + "step": 57720 + }, + { + "epoch": 10.29, + "learning_rate": 4.485520684736091e-05, + "loss": 0.0041, + "step": 57730 + }, + { + "epoch": 10.3, + "learning_rate": 4.48543152639087e-05, + "loss": 0.0051, + "step": 57740 + }, + { + "epoch": 10.3, + "learning_rate": 4.4853423680456494e-05, + "loss": 0.0045, + "step": 57750 + }, + { + "epoch": 10.3, + "learning_rate": 4.485253209700428e-05, + "loss": 0.0028, + "step": 57760 + }, + { + "epoch": 10.3, + "learning_rate": 4.485164051355207e-05, + "loss": 0.0045, + "step": 57770 + }, + { + "epoch": 10.3, + "learning_rate": 4.485074893009986e-05, + "loss": 0.0045, + "step": 57780 + }, + { + "epoch": 10.3, + "learning_rate": 4.484985734664765e-05, + "loss": 0.0022, + "step": 57790 + }, + { + "epoch": 10.31, + "learning_rate": 4.484896576319544e-05, + "loss": 0.0038, + "step": 57800 + }, + { + "epoch": 10.31, + "learning_rate": 4.484807417974323e-05, + "loss": 0.0045, + "step": 57810 + }, + { + "epoch": 10.31, + "learning_rate": 4.484718259629101e-05, + "loss": 0.0044, + "step": 57820 + }, + { + "epoch": 10.31, + "learning_rate": 4.4846291012838804e-05, + "loss": 0.0067, + "step": 57830 + }, + { + "epoch": 10.31, + "learning_rate": 4.4845399429386595e-05, + "loss": 0.0049, + "step": 57840 + }, + { + "epoch": 10.32, + "learning_rate": 4.484450784593438e-05, + "loss": 0.0045, + "step": 57850 + }, + { + "epoch": 10.32, + "learning_rate": 4.484361626248217e-05, + "loss": 0.0064, + "step": 57860 + }, + { + "epoch": 10.32, + "learning_rate": 4.4842724679029955e-05, + "loss": 0.002, + "step": 57870 + }, + { + "epoch": 10.32, + "learning_rate": 4.4841833095577746e-05, + "loss": 0.0039, + "step": 57880 + }, + { + "epoch": 10.32, + "learning_rate": 4.484094151212554e-05, + "loss": 0.0036, + "step": 57890 + }, + { + "epoch": 10.32, + "learning_rate": 4.484004992867333e-05, + "loss": 0.0037, + "step": 57900 + }, + { + "epoch": 10.33, + "learning_rate": 4.483915834522111e-05, + "loss": 0.0036, + "step": 57910 + }, + { + "epoch": 10.33, + "learning_rate": 4.4838266761768904e-05, + "loss": 0.0045, + "step": 57920 + }, + { + "epoch": 10.33, + "learning_rate": 4.4837375178316695e-05, + "loss": 0.0058, + "step": 57930 + }, + { + "epoch": 10.33, + "learning_rate": 4.483648359486448e-05, + "loss": 0.0049, + "step": 57940 + }, + { + "epoch": 10.33, + "learning_rate": 4.483559201141227e-05, + "loss": 0.0024, + "step": 57950 + }, + { + "epoch": 10.34, + "learning_rate": 4.4834700427960055e-05, + "loss": 0.0032, + "step": 57960 + }, + { + "epoch": 10.34, + "learning_rate": 4.483380884450785e-05, + "loss": 0.0026, + "step": 57970 + }, + { + "epoch": 10.34, + "learning_rate": 4.483291726105564e-05, + "loss": 0.0042, + "step": 57980 + }, + { + "epoch": 10.34, + "learning_rate": 4.483202567760342e-05, + "loss": 0.0038, + "step": 57990 + }, + { + "epoch": 10.34, + "learning_rate": 4.483113409415122e-05, + "loss": 0.0019, + "step": 58000 + }, + { + "epoch": 10.34, + "learning_rate": 4.4830242510699005e-05, + "loss": 0.0063, + "step": 58010 + }, + { + "epoch": 10.35, + "learning_rate": 4.4829350927246796e-05, + "loss": 0.0038, + "step": 58020 + }, + { + "epoch": 10.35, + "learning_rate": 4.482845934379458e-05, + "loss": 0.0029, + "step": 58030 + }, + { + "epoch": 10.35, + "learning_rate": 4.482756776034237e-05, + "loss": 0.0021, + "step": 58040 + }, + { + "epoch": 10.35, + "learning_rate": 4.4826676176890156e-05, + "loss": 0.0038, + "step": 58050 + }, + { + "epoch": 10.35, + "learning_rate": 4.482578459343795e-05, + "loss": 0.003, + "step": 58060 + }, + { + "epoch": 10.35, + "learning_rate": 4.482489300998574e-05, + "loss": 0.0051, + "step": 58070 + }, + { + "epoch": 10.36, + "learning_rate": 4.482400142653352e-05, + "loss": 0.0035, + "step": 58080 + }, + { + "epoch": 10.36, + "learning_rate": 4.4823109843081314e-05, + "loss": 0.0045, + "step": 58090 + }, + { + "epoch": 10.36, + "learning_rate": 4.48222182596291e-05, + "loss": 0.0062, + "step": 58100 + }, + { + "epoch": 10.36, + "learning_rate": 4.4821326676176896e-05, + "loss": 0.005, + "step": 58110 + }, + { + "epoch": 10.36, + "learning_rate": 4.482043509272468e-05, + "loss": 0.0046, + "step": 58120 + }, + { + "epoch": 10.37, + "learning_rate": 4.481954350927247e-05, + "loss": 0.0046, + "step": 58130 + }, + { + "epoch": 10.37, + "learning_rate": 4.4818651925820257e-05, + "loss": 0.0046, + "step": 58140 + }, + { + "epoch": 10.37, + "learning_rate": 4.481776034236805e-05, + "loss": 0.0066, + "step": 58150 + }, + { + "epoch": 10.37, + "learning_rate": 4.481686875891584e-05, + "loss": 0.0049, + "step": 58160 + }, + { + "epoch": 10.37, + "learning_rate": 4.4815977175463623e-05, + "loss": 0.0068, + "step": 58170 + }, + { + "epoch": 10.37, + "learning_rate": 4.4815085592011415e-05, + "loss": 0.0031, + "step": 58180 + }, + { + "epoch": 10.38, + "learning_rate": 4.48141940085592e-05, + "loss": 0.0055, + "step": 58190 + }, + { + "epoch": 10.38, + "learning_rate": 4.481330242510699e-05, + "loss": 0.0046, + "step": 58200 + }, + { + "epoch": 10.38, + "learning_rate": 4.481241084165478e-05, + "loss": 0.0034, + "step": 58210 + }, + { + "epoch": 10.38, + "learning_rate": 4.481151925820257e-05, + "loss": 0.0048, + "step": 58220 + }, + { + "epoch": 10.38, + "learning_rate": 4.4810627674750364e-05, + "loss": 0.0064, + "step": 58230 + }, + { + "epoch": 10.39, + "learning_rate": 4.480973609129815e-05, + "loss": 0.0055, + "step": 58240 + }, + { + "epoch": 10.39, + "learning_rate": 4.480884450784594e-05, + "loss": 0.0065, + "step": 58250 + }, + { + "epoch": 10.39, + "learning_rate": 4.4807952924393724e-05, + "loss": 0.002, + "step": 58260 + }, + { + "epoch": 10.39, + "learning_rate": 4.4807061340941515e-05, + "loss": 0.0041, + "step": 58270 + }, + { + "epoch": 10.39, + "learning_rate": 4.48061697574893e-05, + "loss": 0.0055, + "step": 58280 + }, + { + "epoch": 10.39, + "learning_rate": 4.480527817403709e-05, + "loss": 0.0059, + "step": 58290 + }, + { + "epoch": 10.4, + "learning_rate": 4.480438659058488e-05, + "loss": 0.0024, + "step": 58300 + }, + { + "epoch": 10.4, + "learning_rate": 4.4803495007132666e-05, + "loss": 0.0051, + "step": 58310 + }, + { + "epoch": 10.4, + "learning_rate": 4.480260342368046e-05, + "loss": 0.0036, + "step": 58320 + }, + { + "epoch": 10.4, + "learning_rate": 4.480171184022825e-05, + "loss": 0.0033, + "step": 58330 + }, + { + "epoch": 10.4, + "learning_rate": 4.480082025677604e-05, + "loss": 0.0051, + "step": 58340 + }, + { + "epoch": 10.4, + "learning_rate": 4.4799928673323825e-05, + "loss": 0.0053, + "step": 58350 + }, + { + "epoch": 10.41, + "learning_rate": 4.4799037089871616e-05, + "loss": 0.0088, + "step": 58360 + }, + { + "epoch": 10.41, + "learning_rate": 4.47981455064194e-05, + "loss": 0.0046, + "step": 58370 + }, + { + "epoch": 10.41, + "learning_rate": 4.479725392296719e-05, + "loss": 0.0097, + "step": 58380 + }, + { + "epoch": 10.41, + "learning_rate": 4.479636233951498e-05, + "loss": 0.0037, + "step": 58390 + }, + { + "epoch": 10.41, + "learning_rate": 4.479547075606277e-05, + "loss": 0.0034, + "step": 58400 + }, + { + "epoch": 10.42, + "learning_rate": 4.479457917261056e-05, + "loss": 0.0029, + "step": 58410 + }, + { + "epoch": 10.42, + "learning_rate": 4.479368758915834e-05, + "loss": 0.0036, + "step": 58420 + }, + { + "epoch": 10.42, + "learning_rate": 4.4792796005706134e-05, + "loss": 0.0053, + "step": 58430 + }, + { + "epoch": 10.42, + "learning_rate": 4.4791904422253925e-05, + "loss": 0.0041, + "step": 58440 + }, + { + "epoch": 10.42, + "learning_rate": 4.4791012838801716e-05, + "loss": 0.0056, + "step": 58450 + }, + { + "epoch": 10.42, + "learning_rate": 4.479012125534951e-05, + "loss": 0.005, + "step": 58460 + }, + { + "epoch": 10.43, + "learning_rate": 4.478922967189729e-05, + "loss": 0.005, + "step": 58470 + }, + { + "epoch": 10.43, + "learning_rate": 4.478833808844508e-05, + "loss": 0.0043, + "step": 58480 + }, + { + "epoch": 10.43, + "learning_rate": 4.478744650499287e-05, + "loss": 0.0038, + "step": 58490 + }, + { + "epoch": 10.43, + "learning_rate": 4.478655492154066e-05, + "loss": 0.0043, + "step": 58500 + }, + { + "epoch": 10.43, + "learning_rate": 4.478566333808844e-05, + "loss": 0.0037, + "step": 58510 + }, + { + "epoch": 10.44, + "learning_rate": 4.4784771754636234e-05, + "loss": 0.0041, + "step": 58520 + }, + { + "epoch": 10.44, + "learning_rate": 4.4783880171184026e-05, + "loss": 0.0031, + "step": 58530 + }, + { + "epoch": 10.44, + "learning_rate": 4.478298858773181e-05, + "loss": 0.0052, + "step": 58540 + }, + { + "epoch": 10.44, + "learning_rate": 4.478209700427961e-05, + "loss": 0.005, + "step": 58550 + }, + { + "epoch": 10.44, + "learning_rate": 4.478120542082739e-05, + "loss": 0.0032, + "step": 58560 + }, + { + "epoch": 10.44, + "learning_rate": 4.4780313837375184e-05, + "loss": 0.0036, + "step": 58570 + }, + { + "epoch": 10.45, + "learning_rate": 4.477942225392297e-05, + "loss": 0.0045, + "step": 58580 + }, + { + "epoch": 10.45, + "learning_rate": 4.477853067047076e-05, + "loss": 0.006, + "step": 58590 + }, + { + "epoch": 10.45, + "learning_rate": 4.4777639087018544e-05, + "loss": 0.0039, + "step": 58600 + }, + { + "epoch": 10.45, + "learning_rate": 4.4776747503566335e-05, + "loss": 0.0029, + "step": 58610 + }, + { + "epoch": 10.45, + "learning_rate": 4.4775855920114126e-05, + "loss": 0.0044, + "step": 58620 + }, + { + "epoch": 10.45, + "learning_rate": 4.477496433666191e-05, + "loss": 0.0058, + "step": 58630 + }, + { + "epoch": 10.46, + "learning_rate": 4.47740727532097e-05, + "loss": 0.002, + "step": 58640 + }, + { + "epoch": 10.46, + "learning_rate": 4.4773181169757486e-05, + "loss": 0.0036, + "step": 58650 + }, + { + "epoch": 10.46, + "learning_rate": 4.4772289586305284e-05, + "loss": 0.0044, + "step": 58660 + }, + { + "epoch": 10.46, + "learning_rate": 4.477139800285307e-05, + "loss": 0.0045, + "step": 58670 + }, + { + "epoch": 10.46, + "learning_rate": 4.477050641940086e-05, + "loss": 0.0066, + "step": 58680 + }, + { + "epoch": 10.47, + "learning_rate": 4.476961483594865e-05, + "loss": 0.0031, + "step": 58690 + }, + { + "epoch": 10.47, + "learning_rate": 4.4768723252496436e-05, + "loss": 0.0051, + "step": 58700 + }, + { + "epoch": 10.47, + "learning_rate": 4.476783166904423e-05, + "loss": 0.0049, + "step": 58710 + }, + { + "epoch": 10.47, + "learning_rate": 4.476694008559201e-05, + "loss": 0.0031, + "step": 58720 + }, + { + "epoch": 10.47, + "learning_rate": 4.47660485021398e-05, + "loss": 0.0036, + "step": 58730 + }, + { + "epoch": 10.47, + "learning_rate": 4.476515691868759e-05, + "loss": 0.004, + "step": 58740 + }, + { + "epoch": 10.48, + "learning_rate": 4.476426533523538e-05, + "loss": 0.0031, + "step": 58750 + }, + { + "epoch": 10.48, + "learning_rate": 4.476337375178317e-05, + "loss": 0.0037, + "step": 58760 + }, + { + "epoch": 10.48, + "learning_rate": 4.476248216833096e-05, + "loss": 0.0043, + "step": 58770 + }, + { + "epoch": 10.48, + "learning_rate": 4.476159058487875e-05, + "loss": 0.0046, + "step": 58780 + }, + { + "epoch": 10.48, + "learning_rate": 4.4760699001426536e-05, + "loss": 0.0039, + "step": 58790 + }, + { + "epoch": 10.49, + "learning_rate": 4.475980741797433e-05, + "loss": 0.0043, + "step": 58800 + }, + { + "epoch": 10.49, + "learning_rate": 4.475891583452211e-05, + "loss": 0.0029, + "step": 58810 + }, + { + "epoch": 10.49, + "learning_rate": 4.47580242510699e-05, + "loss": 0.0057, + "step": 58820 + }, + { + "epoch": 10.49, + "learning_rate": 4.475713266761769e-05, + "loss": 0.0041, + "step": 58830 + }, + { + "epoch": 10.49, + "learning_rate": 4.475624108416548e-05, + "loss": 0.0043, + "step": 58840 + }, + { + "epoch": 10.49, + "learning_rate": 4.475534950071327e-05, + "loss": 0.0076, + "step": 58850 + }, + { + "epoch": 10.5, + "learning_rate": 4.4754457917261054e-05, + "loss": 0.0037, + "step": 58860 + }, + { + "epoch": 10.5, + "learning_rate": 4.4753566333808845e-05, + "loss": 0.0031, + "step": 58870 + }, + { + "epoch": 10.5, + "learning_rate": 4.475267475035664e-05, + "loss": 0.0042, + "step": 58880 + }, + { + "epoch": 10.5, + "learning_rate": 4.475178316690443e-05, + "loss": 0.0045, + "step": 58890 + }, + { + "epoch": 10.5, + "learning_rate": 4.475089158345221e-05, + "loss": 0.005, + "step": 58900 + }, + { + "epoch": 10.5, + "learning_rate": 4.4750000000000004e-05, + "loss": 0.0034, + "step": 58910 + }, + { + "epoch": 10.51, + "learning_rate": 4.4749108416547795e-05, + "loss": 0.0049, + "step": 58920 + }, + { + "epoch": 10.51, + "learning_rate": 4.474821683309558e-05, + "loss": 0.005, + "step": 58930 + }, + { + "epoch": 10.51, + "learning_rate": 4.474732524964337e-05, + "loss": 0.0048, + "step": 58940 + }, + { + "epoch": 10.51, + "learning_rate": 4.4746433666191155e-05, + "loss": 0.0059, + "step": 58950 + }, + { + "epoch": 10.51, + "learning_rate": 4.4745542082738946e-05, + "loss": 0.0044, + "step": 58960 + }, + { + "epoch": 10.52, + "learning_rate": 4.474465049928673e-05, + "loss": 0.0023, + "step": 58970 + }, + { + "epoch": 10.52, + "learning_rate": 4.474375891583452e-05, + "loss": 0.004, + "step": 58980 + }, + { + "epoch": 10.52, + "learning_rate": 4.474286733238231e-05, + "loss": 0.0037, + "step": 58990 + }, + { + "epoch": 10.52, + "learning_rate": 4.4741975748930104e-05, + "loss": 0.0048, + "step": 59000 + }, + { + "epoch": 10.52, + "learning_rate": 4.4741084165477895e-05, + "loss": 0.0041, + "step": 59010 + }, + { + "epoch": 10.52, + "learning_rate": 4.474019258202568e-05, + "loss": 0.0059, + "step": 59020 + }, + { + "epoch": 10.53, + "learning_rate": 4.473930099857347e-05, + "loss": 0.0038, + "step": 59030 + }, + { + "epoch": 10.53, + "learning_rate": 4.4738409415121255e-05, + "loss": 0.0043, + "step": 59040 + }, + { + "epoch": 10.53, + "learning_rate": 4.4737517831669047e-05, + "loss": 0.0025, + "step": 59050 + }, + { + "epoch": 10.53, + "learning_rate": 4.473662624821683e-05, + "loss": 0.0065, + "step": 59060 + }, + { + "epoch": 10.53, + "learning_rate": 4.473573466476462e-05, + "loss": 0.0047, + "step": 59070 + }, + { + "epoch": 10.53, + "learning_rate": 4.4734843081312413e-05, + "loss": 0.0026, + "step": 59080 + }, + { + "epoch": 10.54, + "learning_rate": 4.47339514978602e-05, + "loss": 0.0038, + "step": 59090 + }, + { + "epoch": 10.54, + "learning_rate": 4.4733059914407996e-05, + "loss": 0.0054, + "step": 59100 + }, + { + "epoch": 10.54, + "learning_rate": 4.473216833095578e-05, + "loss": 0.0043, + "step": 59110 + }, + { + "epoch": 10.54, + "learning_rate": 4.473127674750357e-05, + "loss": 0.0041, + "step": 59120 + }, + { + "epoch": 10.54, + "learning_rate": 4.4730385164051356e-05, + "loss": 0.0072, + "step": 59130 + }, + { + "epoch": 10.55, + "learning_rate": 4.472949358059915e-05, + "loss": 0.0043, + "step": 59140 + }, + { + "epoch": 10.55, + "learning_rate": 4.472860199714694e-05, + "loss": 0.0053, + "step": 59150 + }, + { + "epoch": 10.55, + "learning_rate": 4.472771041369472e-05, + "loss": 0.0027, + "step": 59160 + }, + { + "epoch": 10.55, + "learning_rate": 4.4726818830242514e-05, + "loss": 0.0053, + "step": 59170 + }, + { + "epoch": 10.55, + "learning_rate": 4.47259272467903e-05, + "loss": 0.0032, + "step": 59180 + }, + { + "epoch": 10.55, + "learning_rate": 4.472503566333809e-05, + "loss": 0.0052, + "step": 59190 + }, + { + "epoch": 10.56, + "learning_rate": 4.4724144079885874e-05, + "loss": 0.003, + "step": 59200 + }, + { + "epoch": 10.56, + "learning_rate": 4.472325249643367e-05, + "loss": 0.0049, + "step": 59210 + }, + { + "epoch": 10.56, + "learning_rate": 4.4722360912981457e-05, + "loss": 0.0061, + "step": 59220 + }, + { + "epoch": 10.56, + "learning_rate": 4.472146932952925e-05, + "loss": 0.0045, + "step": 59230 + }, + { + "epoch": 10.56, + "learning_rate": 4.472057774607704e-05, + "loss": 0.0045, + "step": 59240 + }, + { + "epoch": 10.57, + "learning_rate": 4.471968616262482e-05, + "loss": 0.0051, + "step": 59250 + }, + { + "epoch": 10.57, + "learning_rate": 4.4718794579172615e-05, + "loss": 0.0034, + "step": 59260 + }, + { + "epoch": 10.57, + "learning_rate": 4.47179029957204e-05, + "loss": 0.0043, + "step": 59270 + }, + { + "epoch": 10.57, + "learning_rate": 4.471701141226819e-05, + "loss": 0.0049, + "step": 59280 + }, + { + "epoch": 10.57, + "learning_rate": 4.4716119828815975e-05, + "loss": 0.0044, + "step": 59290 + }, + { + "epoch": 10.57, + "learning_rate": 4.4715228245363766e-05, + "loss": 0.0037, + "step": 59300 + }, + { + "epoch": 10.58, + "learning_rate": 4.471433666191156e-05, + "loss": 0.0023, + "step": 59310 + }, + { + "epoch": 10.58, + "learning_rate": 4.471344507845935e-05, + "loss": 0.0045, + "step": 59320 + }, + { + "epoch": 10.58, + "learning_rate": 4.471255349500714e-05, + "loss": 0.0045, + "step": 59330 + }, + { + "epoch": 10.58, + "learning_rate": 4.4711661911554924e-05, + "loss": 0.003, + "step": 59340 + }, + { + "epoch": 10.58, + "learning_rate": 4.4710770328102715e-05, + "loss": 0.0046, + "step": 59350 + }, + { + "epoch": 10.58, + "learning_rate": 4.47098787446505e-05, + "loss": 0.0035, + "step": 59360 + }, + { + "epoch": 10.59, + "learning_rate": 4.470898716119829e-05, + "loss": 0.0032, + "step": 59370 + }, + { + "epoch": 10.59, + "learning_rate": 4.470809557774608e-05, + "loss": 0.0033, + "step": 59380 + }, + { + "epoch": 10.59, + "learning_rate": 4.4707203994293866e-05, + "loss": 0.0046, + "step": 59390 + }, + { + "epoch": 10.59, + "learning_rate": 4.470631241084166e-05, + "loss": 0.0041, + "step": 59400 + }, + { + "epoch": 10.59, + "learning_rate": 4.470542082738944e-05, + "loss": 0.0062, + "step": 59410 + }, + { + "epoch": 10.6, + "learning_rate": 4.470452924393723e-05, + "loss": 0.0044, + "step": 59420 + }, + { + "epoch": 10.6, + "learning_rate": 4.4703637660485024e-05, + "loss": 0.0022, + "step": 59430 + }, + { + "epoch": 10.6, + "learning_rate": 4.4702746077032816e-05, + "loss": 0.0058, + "step": 59440 + }, + { + "epoch": 10.6, + "learning_rate": 4.47018544935806e-05, + "loss": 0.004, + "step": 59450 + }, + { + "epoch": 10.6, + "learning_rate": 4.470096291012839e-05, + "loss": 0.0029, + "step": 59460 + }, + { + "epoch": 10.6, + "learning_rate": 4.470007132667618e-05, + "loss": 0.0047, + "step": 59470 + }, + { + "epoch": 10.61, + "learning_rate": 4.469917974322397e-05, + "loss": 0.0031, + "step": 59480 + }, + { + "epoch": 10.61, + "learning_rate": 4.469828815977176e-05, + "loss": 0.0024, + "step": 59490 + }, + { + "epoch": 10.61, + "learning_rate": 4.469739657631954e-05, + "loss": 0.0024, + "step": 59500 + }, + { + "epoch": 10.61, + "learning_rate": 4.4696504992867334e-05, + "loss": 0.0039, + "step": 59510 + }, + { + "epoch": 10.61, + "learning_rate": 4.469561340941512e-05, + "loss": 0.0037, + "step": 59520 + }, + { + "epoch": 10.62, + "learning_rate": 4.469472182596291e-05, + "loss": 0.0039, + "step": 59530 + }, + { + "epoch": 10.62, + "learning_rate": 4.46938302425107e-05, + "loss": 0.0029, + "step": 59540 + }, + { + "epoch": 10.62, + "learning_rate": 4.469293865905849e-05, + "loss": 0.0052, + "step": 59550 + }, + { + "epoch": 10.62, + "learning_rate": 4.469204707560628e-05, + "loss": 0.0044, + "step": 59560 + }, + { + "epoch": 10.62, + "learning_rate": 4.469115549215407e-05, + "loss": 0.0038, + "step": 59570 + }, + { + "epoch": 10.62, + "learning_rate": 4.469026390870186e-05, + "loss": 0.0041, + "step": 59580 + }, + { + "epoch": 10.63, + "learning_rate": 4.468937232524964e-05, + "loss": 0.0042, + "step": 59590 + }, + { + "epoch": 10.63, + "learning_rate": 4.4688480741797434e-05, + "loss": 0.0037, + "step": 59600 + }, + { + "epoch": 10.63, + "learning_rate": 4.468758915834522e-05, + "loss": 0.0039, + "step": 59610 + }, + { + "epoch": 10.63, + "learning_rate": 4.468669757489301e-05, + "loss": 0.0057, + "step": 59620 + }, + { + "epoch": 10.63, + "learning_rate": 4.46858059914408e-05, + "loss": 0.0054, + "step": 59630 + }, + { + "epoch": 10.63, + "learning_rate": 4.4684914407988586e-05, + "loss": 0.0031, + "step": 59640 + }, + { + "epoch": 10.64, + "learning_rate": 4.4684022824536384e-05, + "loss": 0.0051, + "step": 59650 + }, + { + "epoch": 10.64, + "learning_rate": 4.468313124108417e-05, + "loss": 0.0048, + "step": 59660 + }, + { + "epoch": 10.64, + "learning_rate": 4.468223965763196e-05, + "loss": 0.006, + "step": 59670 + }, + { + "epoch": 10.64, + "learning_rate": 4.4681348074179744e-05, + "loss": 0.0037, + "step": 59680 + }, + { + "epoch": 10.64, + "learning_rate": 4.4680456490727535e-05, + "loss": 0.0031, + "step": 59690 + }, + { + "epoch": 10.65, + "learning_rate": 4.4679564907275326e-05, + "loss": 0.0037, + "step": 59700 + }, + { + "epoch": 10.65, + "learning_rate": 4.467867332382311e-05, + "loss": 0.0038, + "step": 59710 + }, + { + "epoch": 10.65, + "learning_rate": 4.46777817403709e-05, + "loss": 0.0052, + "step": 59720 + }, + { + "epoch": 10.65, + "learning_rate": 4.4676890156918686e-05, + "loss": 0.0055, + "step": 59730 + }, + { + "epoch": 10.65, + "learning_rate": 4.467599857346648e-05, + "loss": 0.0086, + "step": 59740 + }, + { + "epoch": 10.65, + "learning_rate": 4.467510699001426e-05, + "loss": 0.004, + "step": 59750 + }, + { + "epoch": 10.66, + "learning_rate": 4.467421540656206e-05, + "loss": 0.0059, + "step": 59760 + }, + { + "epoch": 10.66, + "learning_rate": 4.4673323823109844e-05, + "loss": 0.0034, + "step": 59770 + }, + { + "epoch": 10.66, + "learning_rate": 4.4672432239657636e-05, + "loss": 0.004, + "step": 59780 + }, + { + "epoch": 10.66, + "learning_rate": 4.467154065620543e-05, + "loss": 0.0036, + "step": 59790 + }, + { + "epoch": 10.66, + "learning_rate": 4.467064907275321e-05, + "loss": 0.0029, + "step": 59800 + }, + { + "epoch": 10.67, + "learning_rate": 4.4669757489301e-05, + "loss": 0.0025, + "step": 59810 + }, + { + "epoch": 10.67, + "learning_rate": 4.466886590584879e-05, + "loss": 0.0061, + "step": 59820 + }, + { + "epoch": 10.67, + "learning_rate": 4.466797432239658e-05, + "loss": 0.0026, + "step": 59830 + }, + { + "epoch": 10.67, + "learning_rate": 4.466708273894436e-05, + "loss": 0.0025, + "step": 59840 + }, + { + "epoch": 10.67, + "learning_rate": 4.4666191155492154e-05, + "loss": 0.0018, + "step": 59850 + }, + { + "epoch": 10.67, + "learning_rate": 4.4665299572039945e-05, + "loss": 0.0034, + "step": 59860 + }, + { + "epoch": 10.68, + "learning_rate": 4.4664407988587736e-05, + "loss": 0.0029, + "step": 59870 + }, + { + "epoch": 10.68, + "learning_rate": 4.466351640513553e-05, + "loss": 0.0034, + "step": 59880 + }, + { + "epoch": 10.68, + "learning_rate": 4.466262482168331e-05, + "loss": 0.0019, + "step": 59890 + }, + { + "epoch": 10.68, + "learning_rate": 4.46617332382311e-05, + "loss": 0.0029, + "step": 59900 + }, + { + "epoch": 10.68, + "learning_rate": 4.466084165477889e-05, + "loss": 0.0047, + "step": 59910 + }, + { + "epoch": 10.68, + "learning_rate": 4.465995007132668e-05, + "loss": 0.0045, + "step": 59920 + }, + { + "epoch": 10.69, + "learning_rate": 4.465905848787447e-05, + "loss": 0.0051, + "step": 59930 + }, + { + "epoch": 10.69, + "learning_rate": 4.4658166904422254e-05, + "loss": 0.0059, + "step": 59940 + }, + { + "epoch": 10.69, + "learning_rate": 4.4657275320970045e-05, + "loss": 0.0021, + "step": 59950 + }, + { + "epoch": 10.69, + "learning_rate": 4.465638373751783e-05, + "loss": 0.0047, + "step": 59960 + }, + { + "epoch": 10.69, + "learning_rate": 4.465549215406562e-05, + "loss": 0.0039, + "step": 59970 + }, + { + "epoch": 10.7, + "learning_rate": 4.465460057061341e-05, + "loss": 0.0031, + "step": 59980 + }, + { + "epoch": 10.7, + "learning_rate": 4.4653708987161203e-05, + "loss": 0.0065, + "step": 59990 + }, + { + "epoch": 10.7, + "learning_rate": 4.465281740370899e-05, + "loss": 0.0026, + "step": 60000 + }, + { + "epoch": 10.7, + "learning_rate": 4.465192582025678e-05, + "loss": 0.0034, + "step": 60010 + }, + { + "epoch": 10.7, + "learning_rate": 4.465103423680457e-05, + "loss": 0.0021, + "step": 60020 + }, + { + "epoch": 10.7, + "learning_rate": 4.4650142653352355e-05, + "loss": 0.0047, + "step": 60030 + }, + { + "epoch": 10.71, + "learning_rate": 4.4649251069900146e-05, + "loss": 0.004, + "step": 60040 + }, + { + "epoch": 10.71, + "learning_rate": 4.464835948644793e-05, + "loss": 0.0034, + "step": 60050 + }, + { + "epoch": 10.71, + "learning_rate": 4.464746790299572e-05, + "loss": 0.003, + "step": 60060 + }, + { + "epoch": 10.71, + "learning_rate": 4.4646576319543506e-05, + "loss": 0.0034, + "step": 60070 + }, + { + "epoch": 10.71, + "learning_rate": 4.46456847360913e-05, + "loss": 0.0025, + "step": 60080 + }, + { + "epoch": 10.72, + "learning_rate": 4.464479315263909e-05, + "loss": 0.0048, + "step": 60090 + }, + { + "epoch": 10.72, + "learning_rate": 4.464390156918688e-05, + "loss": 0.0051, + "step": 60100 + }, + { + "epoch": 10.72, + "learning_rate": 4.464300998573467e-05, + "loss": 0.0058, + "step": 60110 + }, + { + "epoch": 10.72, + "learning_rate": 4.4642118402282455e-05, + "loss": 0.0031, + "step": 60120 + }, + { + "epoch": 10.72, + "learning_rate": 4.4641226818830247e-05, + "loss": 0.0058, + "step": 60130 + }, + { + "epoch": 10.72, + "learning_rate": 4.464033523537803e-05, + "loss": 0.0058, + "step": 60140 + }, + { + "epoch": 10.73, + "learning_rate": 4.463944365192582e-05, + "loss": 0.0029, + "step": 60150 + }, + { + "epoch": 10.73, + "learning_rate": 4.4638552068473613e-05, + "loss": 0.0029, + "step": 60160 + }, + { + "epoch": 10.73, + "learning_rate": 4.46376604850214e-05, + "loss": 0.0069, + "step": 60170 + }, + { + "epoch": 10.73, + "learning_rate": 4.463676890156919e-05, + "loss": 0.0035, + "step": 60180 + }, + { + "epoch": 10.73, + "learning_rate": 4.4635877318116973e-05, + "loss": 0.0025, + "step": 60190 + }, + { + "epoch": 10.73, + "learning_rate": 4.463498573466477e-05, + "loss": 0.0077, + "step": 60200 + }, + { + "epoch": 10.74, + "learning_rate": 4.4634094151212556e-05, + "loss": 0.0041, + "step": 60210 + }, + { + "epoch": 10.74, + "learning_rate": 4.463320256776035e-05, + "loss": 0.0057, + "step": 60220 + }, + { + "epoch": 10.74, + "learning_rate": 4.463231098430813e-05, + "loss": 0.0046, + "step": 60230 + }, + { + "epoch": 10.74, + "learning_rate": 4.463141940085592e-05, + "loss": 0.0052, + "step": 60240 + }, + { + "epoch": 10.74, + "learning_rate": 4.4630527817403714e-05, + "loss": 0.0038, + "step": 60250 + }, + { + "epoch": 10.75, + "learning_rate": 4.46296362339515e-05, + "loss": 0.002, + "step": 60260 + }, + { + "epoch": 10.75, + "learning_rate": 4.462874465049929e-05, + "loss": 0.0027, + "step": 60270 + }, + { + "epoch": 10.75, + "learning_rate": 4.4627853067047074e-05, + "loss": 0.0033, + "step": 60280 + }, + { + "epoch": 10.75, + "learning_rate": 4.4626961483594865e-05, + "loss": 0.0034, + "step": 60290 + }, + { + "epoch": 10.75, + "learning_rate": 4.462606990014265e-05, + "loss": 0.0025, + "step": 60300 + }, + { + "epoch": 10.75, + "learning_rate": 4.462517831669045e-05, + "loss": 0.0035, + "step": 60310 + }, + { + "epoch": 10.76, + "learning_rate": 4.462428673323823e-05, + "loss": 0.0038, + "step": 60320 + }, + { + "epoch": 10.76, + "learning_rate": 4.462339514978602e-05, + "loss": 0.008, + "step": 60330 + }, + { + "epoch": 10.76, + "learning_rate": 4.4622503566333815e-05, + "loss": 0.0036, + "step": 60340 + }, + { + "epoch": 10.76, + "learning_rate": 4.46216119828816e-05, + "loss": 0.0015, + "step": 60350 + }, + { + "epoch": 10.76, + "learning_rate": 4.462072039942939e-05, + "loss": 0.0032, + "step": 60360 + }, + { + "epoch": 10.76, + "learning_rate": 4.4619828815977175e-05, + "loss": 0.0034, + "step": 60370 + }, + { + "epoch": 10.77, + "learning_rate": 4.4618937232524966e-05, + "loss": 0.0055, + "step": 60380 + }, + { + "epoch": 10.77, + "learning_rate": 4.461804564907276e-05, + "loss": 0.0024, + "step": 60390 + }, + { + "epoch": 10.77, + "learning_rate": 4.461715406562054e-05, + "loss": 0.0032, + "step": 60400 + }, + { + "epoch": 10.77, + "learning_rate": 4.461626248216833e-05, + "loss": 0.0046, + "step": 60410 + }, + { + "epoch": 10.77, + "learning_rate": 4.4615370898716124e-05, + "loss": 0.0044, + "step": 60420 + }, + { + "epoch": 10.78, + "learning_rate": 4.4614479315263915e-05, + "loss": 0.0081, + "step": 60430 + }, + { + "epoch": 10.78, + "learning_rate": 4.46135877318117e-05, + "loss": 0.006, + "step": 60440 + }, + { + "epoch": 10.78, + "learning_rate": 4.461269614835949e-05, + "loss": 0.0058, + "step": 60450 + }, + { + "epoch": 10.78, + "learning_rate": 4.4611804564907275e-05, + "loss": 0.0075, + "step": 60460 + }, + { + "epoch": 10.78, + "learning_rate": 4.4610912981455066e-05, + "loss": 0.0045, + "step": 60470 + }, + { + "epoch": 10.78, + "learning_rate": 4.461002139800286e-05, + "loss": 0.0054, + "step": 60480 + }, + { + "epoch": 10.79, + "learning_rate": 4.460912981455064e-05, + "loss": 0.004, + "step": 60490 + }, + { + "epoch": 10.79, + "learning_rate": 4.460823823109843e-05, + "loss": 0.0044, + "step": 60500 + }, + { + "epoch": 10.79, + "learning_rate": 4.460734664764622e-05, + "loss": 0.0026, + "step": 60510 + }, + { + "epoch": 10.79, + "learning_rate": 4.460645506419401e-05, + "loss": 0.0026, + "step": 60520 + }, + { + "epoch": 10.79, + "learning_rate": 4.46055634807418e-05, + "loss": 0.0045, + "step": 60530 + }, + { + "epoch": 10.8, + "learning_rate": 4.460467189728959e-05, + "loss": 0.0021, + "step": 60540 + }, + { + "epoch": 10.8, + "learning_rate": 4.4603780313837376e-05, + "loss": 0.0049, + "step": 60550 + }, + { + "epoch": 10.8, + "learning_rate": 4.460288873038517e-05, + "loss": 0.0025, + "step": 60560 + }, + { + "epoch": 10.8, + "learning_rate": 4.460199714693296e-05, + "loss": 0.0025, + "step": 60570 + }, + { + "epoch": 10.8, + "learning_rate": 4.460110556348074e-05, + "loss": 0.0032, + "step": 60580 + }, + { + "epoch": 10.8, + "learning_rate": 4.4600213980028534e-05, + "loss": 0.0038, + "step": 60590 + }, + { + "epoch": 10.81, + "learning_rate": 4.459932239657632e-05, + "loss": 0.0041, + "step": 60600 + }, + { + "epoch": 10.81, + "learning_rate": 4.459843081312411e-05, + "loss": 0.0069, + "step": 60610 + }, + { + "epoch": 10.81, + "learning_rate": 4.45975392296719e-05, + "loss": 0.0037, + "step": 60620 + }, + { + "epoch": 10.81, + "learning_rate": 4.4596647646219685e-05, + "loss": 0.0062, + "step": 60630 + }, + { + "epoch": 10.81, + "learning_rate": 4.459575606276748e-05, + "loss": 0.0035, + "step": 60640 + }, + { + "epoch": 10.81, + "learning_rate": 4.459486447931527e-05, + "loss": 0.0023, + "step": 60650 + }, + { + "epoch": 10.82, + "learning_rate": 4.459397289586306e-05, + "loss": 0.0044, + "step": 60660 + }, + { + "epoch": 10.82, + "learning_rate": 4.459308131241084e-05, + "loss": 0.004, + "step": 60670 + }, + { + "epoch": 10.82, + "learning_rate": 4.4592189728958634e-05, + "loss": 0.0045, + "step": 60680 + }, + { + "epoch": 10.82, + "learning_rate": 4.459129814550642e-05, + "loss": 0.0047, + "step": 60690 + }, + { + "epoch": 10.82, + "learning_rate": 4.459040656205421e-05, + "loss": 0.0061, + "step": 60700 + }, + { + "epoch": 10.83, + "learning_rate": 4.4589514978602e-05, + "loss": 0.0027, + "step": 60710 + }, + { + "epoch": 10.83, + "learning_rate": 4.4588623395149786e-05, + "loss": 0.0082, + "step": 60720 + }, + { + "epoch": 10.83, + "learning_rate": 4.458773181169758e-05, + "loss": 0.0025, + "step": 60730 + }, + { + "epoch": 10.83, + "learning_rate": 4.458684022824536e-05, + "loss": 0.0046, + "step": 60740 + }, + { + "epoch": 10.83, + "learning_rate": 4.458594864479316e-05, + "loss": 0.0032, + "step": 60750 + }, + { + "epoch": 10.83, + "learning_rate": 4.4585057061340944e-05, + "loss": 0.0052, + "step": 60760 + }, + { + "epoch": 10.84, + "learning_rate": 4.4584165477888735e-05, + "loss": 0.004, + "step": 60770 + }, + { + "epoch": 10.84, + "learning_rate": 4.458327389443652e-05, + "loss": 0.0062, + "step": 60780 + }, + { + "epoch": 10.84, + "learning_rate": 4.458238231098431e-05, + "loss": 0.0038, + "step": 60790 + }, + { + "epoch": 10.84, + "learning_rate": 4.45814907275321e-05, + "loss": 0.0025, + "step": 60800 + }, + { + "epoch": 10.84, + "learning_rate": 4.4580599144079886e-05, + "loss": 0.0036, + "step": 60810 + }, + { + "epoch": 10.85, + "learning_rate": 4.457970756062768e-05, + "loss": 0.0031, + "step": 60820 + }, + { + "epoch": 10.85, + "learning_rate": 4.457881597717546e-05, + "loss": 0.0041, + "step": 60830 + }, + { + "epoch": 10.85, + "learning_rate": 4.457792439372325e-05, + "loss": 0.0085, + "step": 60840 + }, + { + "epoch": 10.85, + "learning_rate": 4.4577032810271044e-05, + "loss": 0.003, + "step": 60850 + }, + { + "epoch": 10.85, + "learning_rate": 4.4576141226818835e-05, + "loss": 0.0028, + "step": 60860 + }, + { + "epoch": 10.85, + "learning_rate": 4.457524964336663e-05, + "loss": 0.0045, + "step": 60870 + }, + { + "epoch": 10.86, + "learning_rate": 4.457435805991441e-05, + "loss": 0.0044, + "step": 60880 + }, + { + "epoch": 10.86, + "learning_rate": 4.45734664764622e-05, + "loss": 0.0048, + "step": 60890 + }, + { + "epoch": 10.86, + "learning_rate": 4.457257489300999e-05, + "loss": 0.0031, + "step": 60900 + }, + { + "epoch": 10.86, + "learning_rate": 4.457168330955778e-05, + "loss": 0.0015, + "step": 60910 + }, + { + "epoch": 10.86, + "learning_rate": 4.457079172610556e-05, + "loss": 0.0039, + "step": 60920 + }, + { + "epoch": 10.86, + "learning_rate": 4.4569900142653354e-05, + "loss": 0.0044, + "step": 60930 + }, + { + "epoch": 10.87, + "learning_rate": 4.4569008559201145e-05, + "loss": 0.0031, + "step": 60940 + }, + { + "epoch": 10.87, + "learning_rate": 4.456811697574893e-05, + "loss": 0.0039, + "step": 60950 + }, + { + "epoch": 10.87, + "learning_rate": 4.456722539229672e-05, + "loss": 0.0037, + "step": 60960 + }, + { + "epoch": 10.87, + "learning_rate": 4.456633380884451e-05, + "loss": 0.0031, + "step": 60970 + }, + { + "epoch": 10.87, + "learning_rate": 4.45654422253923e-05, + "loss": 0.0032, + "step": 60980 + }, + { + "epoch": 10.88, + "learning_rate": 4.456455064194009e-05, + "loss": 0.0039, + "step": 60990 + }, + { + "epoch": 10.88, + "learning_rate": 4.456365905848788e-05, + "loss": 0.0045, + "step": 61000 + }, + { + "epoch": 10.88, + "learning_rate": 4.456276747503566e-05, + "loss": 0.0044, + "step": 61010 + }, + { + "epoch": 10.88, + "learning_rate": 4.4561875891583454e-05, + "loss": 0.0035, + "step": 61020 + }, + { + "epoch": 10.88, + "learning_rate": 4.4560984308131245e-05, + "loss": 0.0035, + "step": 61030 + }, + { + "epoch": 10.88, + "learning_rate": 4.456009272467903e-05, + "loss": 0.0023, + "step": 61040 + }, + { + "epoch": 10.89, + "learning_rate": 4.455920114122682e-05, + "loss": 0.0031, + "step": 61050 + }, + { + "epoch": 10.89, + "learning_rate": 4.4558309557774605e-05, + "loss": 0.0032, + "step": 61060 + }, + { + "epoch": 10.89, + "learning_rate": 4.45574179743224e-05, + "loss": 0.0044, + "step": 61070 + }, + { + "epoch": 10.89, + "learning_rate": 4.455652639087019e-05, + "loss": 0.0038, + "step": 61080 + }, + { + "epoch": 10.89, + "learning_rate": 4.455563480741798e-05, + "loss": 0.0066, + "step": 61090 + }, + { + "epoch": 10.9, + "learning_rate": 4.455474322396577e-05, + "loss": 0.0039, + "step": 61100 + }, + { + "epoch": 10.9, + "learning_rate": 4.4553851640513555e-05, + "loss": 0.0054, + "step": 61110 + }, + { + "epoch": 10.9, + "learning_rate": 4.4552960057061346e-05, + "loss": 0.0044, + "step": 61120 + }, + { + "epoch": 10.9, + "learning_rate": 4.455206847360913e-05, + "loss": 0.0025, + "step": 61130 + }, + { + "epoch": 10.9, + "learning_rate": 4.455117689015692e-05, + "loss": 0.003, + "step": 61140 + }, + { + "epoch": 10.9, + "learning_rate": 4.4550285306704706e-05, + "loss": 0.002, + "step": 61150 + }, + { + "epoch": 10.91, + "learning_rate": 4.45493937232525e-05, + "loss": 0.0044, + "step": 61160 + }, + { + "epoch": 10.91, + "learning_rate": 4.454850213980029e-05, + "loss": 0.0058, + "step": 61170 + }, + { + "epoch": 10.91, + "learning_rate": 4.454761055634807e-05, + "loss": 0.0026, + "step": 61180 + }, + { + "epoch": 10.91, + "learning_rate": 4.454671897289587e-05, + "loss": 0.0064, + "step": 61190 + }, + { + "epoch": 10.91, + "learning_rate": 4.4545827389443655e-05, + "loss": 0.0041, + "step": 61200 + }, + { + "epoch": 10.91, + "learning_rate": 4.4544935805991447e-05, + "loss": 0.0041, + "step": 61210 + }, + { + "epoch": 10.92, + "learning_rate": 4.454404422253923e-05, + "loss": 0.0074, + "step": 61220 + }, + { + "epoch": 10.92, + "learning_rate": 4.454315263908702e-05, + "loss": 0.0064, + "step": 61230 + }, + { + "epoch": 10.92, + "learning_rate": 4.4542261055634807e-05, + "loss": 0.0062, + "step": 61240 + }, + { + "epoch": 10.92, + "learning_rate": 4.45413694721826e-05, + "loss": 0.0056, + "step": 61250 + }, + { + "epoch": 10.92, + "learning_rate": 4.454047788873039e-05, + "loss": 0.0031, + "step": 61260 + }, + { + "epoch": 10.93, + "learning_rate": 4.4539586305278173e-05, + "loss": 0.0029, + "step": 61270 + }, + { + "epoch": 10.93, + "learning_rate": 4.4538694721825965e-05, + "loss": 0.0041, + "step": 61280 + }, + { + "epoch": 10.93, + "learning_rate": 4.453780313837375e-05, + "loss": 0.0026, + "step": 61290 + }, + { + "epoch": 10.93, + "learning_rate": 4.453691155492155e-05, + "loss": 0.0034, + "step": 61300 + }, + { + "epoch": 10.93, + "learning_rate": 4.453601997146933e-05, + "loss": 0.0051, + "step": 61310 + }, + { + "epoch": 10.93, + "learning_rate": 4.453512838801712e-05, + "loss": 0.0042, + "step": 61320 + }, + { + "epoch": 10.94, + "learning_rate": 4.4534236804564914e-05, + "loss": 0.0063, + "step": 61330 + }, + { + "epoch": 10.94, + "learning_rate": 4.45333452211127e-05, + "loss": 0.0052, + "step": 61340 + }, + { + "epoch": 10.94, + "learning_rate": 4.453245363766049e-05, + "loss": 0.0044, + "step": 61350 + }, + { + "epoch": 10.94, + "learning_rate": 4.4531562054208274e-05, + "loss": 0.0056, + "step": 61360 + }, + { + "epoch": 10.94, + "learning_rate": 4.4530670470756065e-05, + "loss": 0.0052, + "step": 61370 + }, + { + "epoch": 10.95, + "learning_rate": 4.452977888730385e-05, + "loss": 0.0038, + "step": 61380 + }, + { + "epoch": 10.95, + "learning_rate": 4.452888730385164e-05, + "loss": 0.0034, + "step": 61390 + }, + { + "epoch": 10.95, + "learning_rate": 4.452799572039943e-05, + "loss": 0.0051, + "step": 61400 + }, + { + "epoch": 10.95, + "learning_rate": 4.452710413694722e-05, + "loss": 0.006, + "step": 61410 + }, + { + "epoch": 10.95, + "learning_rate": 4.4526212553495014e-05, + "loss": 0.0036, + "step": 61420 + }, + { + "epoch": 10.95, + "learning_rate": 4.45253209700428e-05, + "loss": 0.0032, + "step": 61430 + }, + { + "epoch": 10.96, + "learning_rate": 4.452442938659059e-05, + "loss": 0.004, + "step": 61440 + }, + { + "epoch": 10.96, + "learning_rate": 4.4523537803138375e-05, + "loss": 0.0031, + "step": 61450 + }, + { + "epoch": 10.96, + "learning_rate": 4.4522646219686166e-05, + "loss": 0.0039, + "step": 61460 + }, + { + "epoch": 10.96, + "learning_rate": 4.452175463623395e-05, + "loss": 0.0047, + "step": 61470 + }, + { + "epoch": 10.96, + "learning_rate": 4.452086305278174e-05, + "loss": 0.004, + "step": 61480 + }, + { + "epoch": 10.96, + "learning_rate": 4.451997146932953e-05, + "loss": 0.0051, + "step": 61490 + }, + { + "epoch": 10.97, + "learning_rate": 4.451907988587732e-05, + "loss": 0.0014, + "step": 61500 + }, + { + "epoch": 10.97, + "learning_rate": 4.451818830242511e-05, + "loss": 0.0068, + "step": 61510 + }, + { + "epoch": 10.97, + "learning_rate": 4.45172967189729e-05, + "loss": 0.004, + "step": 61520 + }, + { + "epoch": 10.97, + "learning_rate": 4.451640513552069e-05, + "loss": 0.0088, + "step": 61530 + }, + { + "epoch": 10.97, + "learning_rate": 4.4515513552068475e-05, + "loss": 0.0033, + "step": 61540 + }, + { + "epoch": 10.98, + "learning_rate": 4.4514621968616266e-05, + "loss": 0.0042, + "step": 61550 + }, + { + "epoch": 10.98, + "learning_rate": 4.451373038516405e-05, + "loss": 0.0048, + "step": 61560 + }, + { + "epoch": 10.98, + "learning_rate": 4.451283880171184e-05, + "loss": 0.0024, + "step": 61570 + }, + { + "epoch": 10.98, + "learning_rate": 4.451194721825963e-05, + "loss": 0.004, + "step": 61580 + }, + { + "epoch": 10.98, + "learning_rate": 4.451105563480742e-05, + "loss": 0.0043, + "step": 61590 + }, + { + "epoch": 10.98, + "learning_rate": 4.451016405135521e-05, + "loss": 0.0053, + "step": 61600 + }, + { + "epoch": 10.99, + "learning_rate": 4.450927246790299e-05, + "loss": 0.0031, + "step": 61610 + }, + { + "epoch": 10.99, + "learning_rate": 4.4508380884450784e-05, + "loss": 0.0054, + "step": 61620 + }, + { + "epoch": 10.99, + "learning_rate": 4.4507489300998576e-05, + "loss": 0.0033, + "step": 61630 + }, + { + "epoch": 10.99, + "learning_rate": 4.450659771754637e-05, + "loss": 0.004, + "step": 61640 + }, + { + "epoch": 10.99, + "learning_rate": 4.450570613409416e-05, + "loss": 0.0046, + "step": 61650 + }, + { + "epoch": 11.0, + "learning_rate": 4.450481455064194e-05, + "loss": 0.0055, + "step": 61660 + }, + { + "epoch": 11.0, + "learning_rate": 4.4503922967189734e-05, + "loss": 0.0034, + "step": 61670 + }, + { + "epoch": 11.0, + "learning_rate": 4.450303138373752e-05, + "loss": 0.0049, + "step": 61680 + }, + { + "epoch": 11.0, + "eval_loss": 0.018656417727470398, + "eval_runtime": 195.9206, + "eval_samples_per_second": 23.678, + "eval_steps_per_second": 2.96, + "step": 61688 + }, + { + "epoch": 11.0, + "learning_rate": 4.450213980028531e-05, + "loss": 0.0024, + "step": 61690 + }, + { + "epoch": 11.0, + "learning_rate": 4.4501248216833094e-05, + "loss": 0.0022, + "step": 61700 + }, + { + "epoch": 11.0, + "learning_rate": 4.4500356633380885e-05, + "loss": 0.0028, + "step": 61710 + }, + { + "epoch": 11.01, + "learning_rate": 4.4499465049928676e-05, + "loss": 0.0021, + "step": 61720 + }, + { + "epoch": 11.01, + "learning_rate": 4.449857346647646e-05, + "loss": 0.0048, + "step": 61730 + }, + { + "epoch": 11.01, + "learning_rate": 4.449768188302426e-05, + "loss": 0.0034, + "step": 61740 + }, + { + "epoch": 11.01, + "learning_rate": 4.449679029957204e-05, + "loss": 0.0031, + "step": 61750 + }, + { + "epoch": 11.01, + "learning_rate": 4.4495898716119834e-05, + "loss": 0.0042, + "step": 61760 + }, + { + "epoch": 11.01, + "learning_rate": 4.449500713266762e-05, + "loss": 0.0045, + "step": 61770 + }, + { + "epoch": 11.02, + "learning_rate": 4.449411554921541e-05, + "loss": 0.0026, + "step": 61780 + }, + { + "epoch": 11.02, + "learning_rate": 4.4493223965763194e-05, + "loss": 0.004, + "step": 61790 + }, + { + "epoch": 11.02, + "learning_rate": 4.4492332382310986e-05, + "loss": 0.005, + "step": 61800 + }, + { + "epoch": 11.02, + "learning_rate": 4.449144079885878e-05, + "loss": 0.0025, + "step": 61810 + }, + { + "epoch": 11.02, + "learning_rate": 4.449054921540656e-05, + "loss": 0.0056, + "step": 61820 + }, + { + "epoch": 11.03, + "learning_rate": 4.448965763195435e-05, + "loss": 0.0046, + "step": 61830 + }, + { + "epoch": 11.03, + "learning_rate": 4.448876604850214e-05, + "loss": 0.0039, + "step": 61840 + }, + { + "epoch": 11.03, + "learning_rate": 4.4487874465049935e-05, + "loss": 0.003, + "step": 61850 + }, + { + "epoch": 11.03, + "learning_rate": 4.448698288159772e-05, + "loss": 0.006, + "step": 61860 + }, + { + "epoch": 11.03, + "learning_rate": 4.448609129814551e-05, + "loss": 0.0031, + "step": 61870 + }, + { + "epoch": 11.03, + "learning_rate": 4.44851997146933e-05, + "loss": 0.0028, + "step": 61880 + }, + { + "epoch": 11.04, + "learning_rate": 4.4484308131241086e-05, + "loss": 0.0047, + "step": 61890 + }, + { + "epoch": 11.04, + "learning_rate": 4.448341654778888e-05, + "loss": 0.007, + "step": 61900 + }, + { + "epoch": 11.04, + "learning_rate": 4.448252496433666e-05, + "loss": 0.0035, + "step": 61910 + }, + { + "epoch": 11.04, + "learning_rate": 4.448163338088445e-05, + "loss": 0.0051, + "step": 61920 + }, + { + "epoch": 11.04, + "learning_rate": 4.448074179743224e-05, + "loss": 0.003, + "step": 61930 + }, + { + "epoch": 11.04, + "learning_rate": 4.447985021398003e-05, + "loss": 0.0036, + "step": 61940 + }, + { + "epoch": 11.05, + "learning_rate": 4.447895863052782e-05, + "loss": 0.0057, + "step": 61950 + }, + { + "epoch": 11.05, + "learning_rate": 4.447806704707561e-05, + "loss": 0.0035, + "step": 61960 + }, + { + "epoch": 11.05, + "learning_rate": 4.44771754636234e-05, + "loss": 0.0029, + "step": 61970 + }, + { + "epoch": 11.05, + "learning_rate": 4.447628388017119e-05, + "loss": 0.0036, + "step": 61980 + }, + { + "epoch": 11.05, + "learning_rate": 4.447539229671898e-05, + "loss": 0.0024, + "step": 61990 + }, + { + "epoch": 11.06, + "learning_rate": 4.447450071326676e-05, + "loss": 0.0029, + "step": 62000 + }, + { + "epoch": 11.06, + "learning_rate": 4.4473609129814554e-05, + "loss": 0.003, + "step": 62010 + }, + { + "epoch": 11.06, + "learning_rate": 4.447271754636234e-05, + "loss": 0.0035, + "step": 62020 + }, + { + "epoch": 11.06, + "learning_rate": 4.447182596291013e-05, + "loss": 0.0052, + "step": 62030 + }, + { + "epoch": 11.06, + "learning_rate": 4.447093437945792e-05, + "loss": 0.0036, + "step": 62040 + }, + { + "epoch": 11.06, + "learning_rate": 4.4470042796005705e-05, + "loss": 0.0033, + "step": 62050 + }, + { + "epoch": 11.07, + "learning_rate": 4.4469151212553496e-05, + "loss": 0.0039, + "step": 62060 + }, + { + "epoch": 11.07, + "learning_rate": 4.446825962910129e-05, + "loss": 0.0024, + "step": 62070 + }, + { + "epoch": 11.07, + "learning_rate": 4.446736804564908e-05, + "loss": 0.0051, + "step": 62080 + }, + { + "epoch": 11.07, + "learning_rate": 4.446647646219686e-05, + "loss": 0.003, + "step": 62090 + }, + { + "epoch": 11.07, + "learning_rate": 4.4465584878744654e-05, + "loss": 0.0026, + "step": 62100 + }, + { + "epoch": 11.08, + "learning_rate": 4.4464693295292445e-05, + "loss": 0.006, + "step": 62110 + }, + { + "epoch": 11.08, + "learning_rate": 4.446380171184023e-05, + "loss": 0.0026, + "step": 62120 + }, + { + "epoch": 11.08, + "learning_rate": 4.446291012838802e-05, + "loss": 0.0045, + "step": 62130 + }, + { + "epoch": 11.08, + "learning_rate": 4.4462018544935805e-05, + "loss": 0.0054, + "step": 62140 + }, + { + "epoch": 11.08, + "learning_rate": 4.44611269614836e-05, + "loss": 0.0025, + "step": 62150 + }, + { + "epoch": 11.08, + "learning_rate": 4.446023537803138e-05, + "loss": 0.0036, + "step": 62160 + }, + { + "epoch": 11.09, + "learning_rate": 4.445934379457917e-05, + "loss": 0.0084, + "step": 62170 + }, + { + "epoch": 11.09, + "learning_rate": 4.4458452211126963e-05, + "loss": 0.0021, + "step": 62180 + }, + { + "epoch": 11.09, + "learning_rate": 4.4457560627674755e-05, + "loss": 0.0035, + "step": 62190 + }, + { + "epoch": 11.09, + "learning_rate": 4.4456669044222546e-05, + "loss": 0.0037, + "step": 62200 + }, + { + "epoch": 11.09, + "learning_rate": 4.445577746077033e-05, + "loss": 0.0038, + "step": 62210 + }, + { + "epoch": 11.09, + "learning_rate": 4.445488587731812e-05, + "loss": 0.0044, + "step": 62220 + }, + { + "epoch": 11.1, + "learning_rate": 4.4453994293865906e-05, + "loss": 0.0059, + "step": 62230 + }, + { + "epoch": 11.1, + "learning_rate": 4.44531027104137e-05, + "loss": 0.0028, + "step": 62240 + }, + { + "epoch": 11.1, + "learning_rate": 4.445221112696148e-05, + "loss": 0.0021, + "step": 62250 + }, + { + "epoch": 11.1, + "learning_rate": 4.445131954350927e-05, + "loss": 0.006, + "step": 62260 + }, + { + "epoch": 11.1, + "learning_rate": 4.4450427960057064e-05, + "loss": 0.0034, + "step": 62270 + }, + { + "epoch": 11.11, + "learning_rate": 4.444953637660485e-05, + "loss": 0.0034, + "step": 62280 + }, + { + "epoch": 11.11, + "learning_rate": 4.4448644793152646e-05, + "loss": 0.0017, + "step": 62290 + }, + { + "epoch": 11.11, + "learning_rate": 4.444775320970043e-05, + "loss": 0.0046, + "step": 62300 + }, + { + "epoch": 11.11, + "learning_rate": 4.444686162624822e-05, + "loss": 0.002, + "step": 62310 + }, + { + "epoch": 11.11, + "learning_rate": 4.4445970042796007e-05, + "loss": 0.0037, + "step": 62320 + }, + { + "epoch": 11.11, + "learning_rate": 4.44450784593438e-05, + "loss": 0.0042, + "step": 62330 + }, + { + "epoch": 11.12, + "learning_rate": 4.444418687589159e-05, + "loss": 0.003, + "step": 62340 + }, + { + "epoch": 11.12, + "learning_rate": 4.4443295292439373e-05, + "loss": 0.0033, + "step": 62350 + }, + { + "epoch": 11.12, + "learning_rate": 4.4442403708987165e-05, + "loss": 0.0044, + "step": 62360 + }, + { + "epoch": 11.12, + "learning_rate": 4.444151212553495e-05, + "loss": 0.0032, + "step": 62370 + }, + { + "epoch": 11.12, + "learning_rate": 4.444062054208274e-05, + "loss": 0.005, + "step": 62380 + }, + { + "epoch": 11.13, + "learning_rate": 4.4439728958630525e-05, + "loss": 0.0069, + "step": 62390 + }, + { + "epoch": 11.13, + "learning_rate": 4.443883737517832e-05, + "loss": 0.0038, + "step": 62400 + }, + { + "epoch": 11.13, + "learning_rate": 4.443794579172611e-05, + "loss": 0.0042, + "step": 62410 + }, + { + "epoch": 11.13, + "learning_rate": 4.44370542082739e-05, + "loss": 0.0045, + "step": 62420 + }, + { + "epoch": 11.13, + "learning_rate": 4.443616262482169e-05, + "loss": 0.0026, + "step": 62430 + }, + { + "epoch": 11.13, + "learning_rate": 4.4435271041369474e-05, + "loss": 0.0031, + "step": 62440 + }, + { + "epoch": 11.14, + "learning_rate": 4.4434379457917265e-05, + "loss": 0.0037, + "step": 62450 + }, + { + "epoch": 11.14, + "learning_rate": 4.443348787446505e-05, + "loss": 0.0022, + "step": 62460 + }, + { + "epoch": 11.14, + "learning_rate": 4.443259629101284e-05, + "loss": 0.003, + "step": 62470 + }, + { + "epoch": 11.14, + "learning_rate": 4.4431704707560625e-05, + "loss": 0.0038, + "step": 62480 + }, + { + "epoch": 11.14, + "learning_rate": 4.4430813124108416e-05, + "loss": 0.0045, + "step": 62490 + }, + { + "epoch": 11.14, + "learning_rate": 4.442992154065621e-05, + "loss": 0.0031, + "step": 62500 + }, + { + "epoch": 11.15, + "learning_rate": 4.4429029957204e-05, + "loss": 0.003, + "step": 62510 + }, + { + "epoch": 11.15, + "learning_rate": 4.442813837375179e-05, + "loss": 0.0021, + "step": 62520 + }, + { + "epoch": 11.15, + "learning_rate": 4.4427246790299575e-05, + "loss": 0.0052, + "step": 62530 + }, + { + "epoch": 11.15, + "learning_rate": 4.4426355206847366e-05, + "loss": 0.003, + "step": 62540 + }, + { + "epoch": 11.15, + "learning_rate": 4.442546362339515e-05, + "loss": 0.0042, + "step": 62550 + }, + { + "epoch": 11.16, + "learning_rate": 4.442457203994294e-05, + "loss": 0.0071, + "step": 62560 + }, + { + "epoch": 11.16, + "learning_rate": 4.442368045649073e-05, + "loss": 0.0032, + "step": 62570 + }, + { + "epoch": 11.16, + "learning_rate": 4.442278887303852e-05, + "loss": 0.0068, + "step": 62580 + }, + { + "epoch": 11.16, + "learning_rate": 4.442189728958631e-05, + "loss": 0.0079, + "step": 62590 + }, + { + "epoch": 11.16, + "learning_rate": 4.442100570613409e-05, + "loss": 0.0033, + "step": 62600 + }, + { + "epoch": 11.16, + "learning_rate": 4.4420114122681884e-05, + "loss": 0.0032, + "step": 62610 + }, + { + "epoch": 11.17, + "learning_rate": 4.441922253922967e-05, + "loss": 0.0046, + "step": 62620 + }, + { + "epoch": 11.17, + "learning_rate": 4.4418330955777466e-05, + "loss": 0.0033, + "step": 62630 + }, + { + "epoch": 11.17, + "learning_rate": 4.441743937232525e-05, + "loss": 0.0028, + "step": 62640 + }, + { + "epoch": 11.17, + "learning_rate": 4.441654778887304e-05, + "loss": 0.0014, + "step": 62650 + }, + { + "epoch": 11.17, + "learning_rate": 4.441565620542083e-05, + "loss": 0.0042, + "step": 62660 + }, + { + "epoch": 11.18, + "learning_rate": 4.441476462196862e-05, + "loss": 0.0051, + "step": 62670 + }, + { + "epoch": 11.18, + "learning_rate": 4.441387303851641e-05, + "loss": 0.0023, + "step": 62680 + }, + { + "epoch": 11.18, + "learning_rate": 4.441298145506419e-05, + "loss": 0.0052, + "step": 62690 + }, + { + "epoch": 11.18, + "learning_rate": 4.4412089871611984e-05, + "loss": 0.0055, + "step": 62700 + }, + { + "epoch": 11.18, + "learning_rate": 4.441119828815977e-05, + "loss": 0.0037, + "step": 62710 + }, + { + "epoch": 11.18, + "learning_rate": 4.441030670470756e-05, + "loss": 0.0028, + "step": 62720 + }, + { + "epoch": 11.19, + "learning_rate": 4.440941512125535e-05, + "loss": 0.0032, + "step": 62730 + }, + { + "epoch": 11.19, + "learning_rate": 4.440852353780314e-05, + "loss": 0.0037, + "step": 62740 + }, + { + "epoch": 11.19, + "learning_rate": 4.4407631954350934e-05, + "loss": 0.0035, + "step": 62750 + }, + { + "epoch": 11.19, + "learning_rate": 4.440674037089872e-05, + "loss": 0.0041, + "step": 62760 + }, + { + "epoch": 11.19, + "learning_rate": 4.440584878744651e-05, + "loss": 0.0032, + "step": 62770 + }, + { + "epoch": 11.19, + "learning_rate": 4.4404957203994294e-05, + "loss": 0.0034, + "step": 62780 + }, + { + "epoch": 11.2, + "learning_rate": 4.4404065620542085e-05, + "loss": 0.0063, + "step": 62790 + }, + { + "epoch": 11.2, + "learning_rate": 4.4403174037089876e-05, + "loss": 0.0026, + "step": 62800 + }, + { + "epoch": 11.2, + "learning_rate": 4.440228245363766e-05, + "loss": 0.0017, + "step": 62810 + }, + { + "epoch": 11.2, + "learning_rate": 4.440139087018545e-05, + "loss": 0.0015, + "step": 62820 + }, + { + "epoch": 11.2, + "learning_rate": 4.4400499286733236e-05, + "loss": 0.0045, + "step": 62830 + }, + { + "epoch": 11.21, + "learning_rate": 4.439960770328103e-05, + "loss": 0.002, + "step": 62840 + }, + { + "epoch": 11.21, + "learning_rate": 4.439871611982882e-05, + "loss": 0.0019, + "step": 62850 + }, + { + "epoch": 11.21, + "learning_rate": 4.439782453637661e-05, + "loss": 0.0029, + "step": 62860 + }, + { + "epoch": 11.21, + "learning_rate": 4.4396932952924394e-05, + "loss": 0.004, + "step": 62870 + }, + { + "epoch": 11.21, + "learning_rate": 4.4396041369472186e-05, + "loss": 0.0035, + "step": 62880 + }, + { + "epoch": 11.21, + "learning_rate": 4.439514978601998e-05, + "loss": 0.0029, + "step": 62890 + }, + { + "epoch": 11.22, + "learning_rate": 4.439425820256776e-05, + "loss": 0.0031, + "step": 62900 + }, + { + "epoch": 11.22, + "learning_rate": 4.439336661911555e-05, + "loss": 0.0031, + "step": 62910 + }, + { + "epoch": 11.22, + "learning_rate": 4.439247503566334e-05, + "loss": 0.0034, + "step": 62920 + }, + { + "epoch": 11.22, + "learning_rate": 4.439158345221113e-05, + "loss": 0.0062, + "step": 62930 + }, + { + "epoch": 11.22, + "learning_rate": 4.439069186875891e-05, + "loss": 0.003, + "step": 62940 + }, + { + "epoch": 11.23, + "learning_rate": 4.4389800285306704e-05, + "loss": 0.0024, + "step": 62950 + }, + { + "epoch": 11.23, + "learning_rate": 4.4388908701854495e-05, + "loss": 0.0038, + "step": 62960 + }, + { + "epoch": 11.23, + "learning_rate": 4.4388017118402286e-05, + "loss": 0.0025, + "step": 62970 + }, + { + "epoch": 11.23, + "learning_rate": 4.438712553495008e-05, + "loss": 0.004, + "step": 62980 + }, + { + "epoch": 11.23, + "learning_rate": 4.438623395149786e-05, + "loss": 0.0043, + "step": 62990 + }, + { + "epoch": 11.23, + "learning_rate": 4.438534236804565e-05, + "loss": 0.0031, + "step": 63000 + }, + { + "epoch": 11.24, + "learning_rate": 4.438453994293866e-05, + "loss": 0.0029, + "step": 63010 + }, + { + "epoch": 11.24, + "learning_rate": 4.438364835948645e-05, + "loss": 0.0057, + "step": 63020 + }, + { + "epoch": 11.24, + "learning_rate": 4.438275677603424e-05, + "loss": 0.0038, + "step": 63030 + }, + { + "epoch": 11.24, + "learning_rate": 4.438186519258203e-05, + "loss": 0.0038, + "step": 63040 + }, + { + "epoch": 11.24, + "learning_rate": 4.438097360912982e-05, + "loss": 0.0052, + "step": 63050 + }, + { + "epoch": 11.24, + "learning_rate": 4.4380082025677603e-05, + "loss": 0.0041, + "step": 63060 + }, + { + "epoch": 11.25, + "learning_rate": 4.4379190442225395e-05, + "loss": 0.0035, + "step": 63070 + }, + { + "epoch": 11.25, + "learning_rate": 4.437829885877318e-05, + "loss": 0.0049, + "step": 63080 + }, + { + "epoch": 11.25, + "learning_rate": 4.437740727532097e-05, + "loss": 0.0018, + "step": 63090 + }, + { + "epoch": 11.25, + "learning_rate": 4.437651569186876e-05, + "loss": 0.0061, + "step": 63100 + }, + { + "epoch": 11.25, + "learning_rate": 4.437562410841655e-05, + "loss": 0.0041, + "step": 63110 + }, + { + "epoch": 11.26, + "learning_rate": 4.4374732524964344e-05, + "loss": 0.0041, + "step": 63120 + }, + { + "epoch": 11.26, + "learning_rate": 4.437384094151213e-05, + "loss": 0.0034, + "step": 63130 + }, + { + "epoch": 11.26, + "learning_rate": 4.437294935805992e-05, + "loss": 0.0052, + "step": 63140 + }, + { + "epoch": 11.26, + "learning_rate": 4.4372057774607704e-05, + "loss": 0.0023, + "step": 63150 + }, + { + "epoch": 11.26, + "learning_rate": 4.4371166191155495e-05, + "loss": 0.0035, + "step": 63160 + }, + { + "epoch": 11.26, + "learning_rate": 4.437027460770328e-05, + "loss": 0.0032, + "step": 63170 + }, + { + "epoch": 11.27, + "learning_rate": 4.436938302425107e-05, + "loss": 0.0037, + "step": 63180 + }, + { + "epoch": 11.27, + "learning_rate": 4.436849144079886e-05, + "loss": 0.0043, + "step": 63190 + }, + { + "epoch": 11.27, + "learning_rate": 4.4367599857346647e-05, + "loss": 0.0064, + "step": 63200 + }, + { + "epoch": 11.27, + "learning_rate": 4.436670827389444e-05, + "loss": 0.003, + "step": 63210 + }, + { + "epoch": 11.27, + "learning_rate": 4.436581669044223e-05, + "loss": 0.0016, + "step": 63220 + }, + { + "epoch": 11.27, + "learning_rate": 4.436492510699002e-05, + "loss": 0.0033, + "step": 63230 + }, + { + "epoch": 11.28, + "learning_rate": 4.4364033523537805e-05, + "loss": 0.007, + "step": 63240 + }, + { + "epoch": 11.28, + "learning_rate": 4.4363141940085596e-05, + "loss": 0.0063, + "step": 63250 + }, + { + "epoch": 11.28, + "learning_rate": 4.436225035663338e-05, + "loss": 0.0019, + "step": 63260 + }, + { + "epoch": 11.28, + "learning_rate": 4.436135877318117e-05, + "loss": 0.0041, + "step": 63270 + }, + { + "epoch": 11.28, + "learning_rate": 4.436046718972896e-05, + "loss": 0.008, + "step": 63280 + }, + { + "epoch": 11.29, + "learning_rate": 4.435957560627675e-05, + "loss": 0.0024, + "step": 63290 + }, + { + "epoch": 11.29, + "learning_rate": 4.435868402282454e-05, + "loss": 0.0042, + "step": 63300 + }, + { + "epoch": 11.29, + "learning_rate": 4.435779243937232e-05, + "loss": 0.0052, + "step": 63310 + }, + { + "epoch": 11.29, + "learning_rate": 4.4356900855920114e-05, + "loss": 0.0041, + "step": 63320 + }, + { + "epoch": 11.29, + "learning_rate": 4.4356009272467905e-05, + "loss": 0.0072, + "step": 63330 + }, + { + "epoch": 11.29, + "learning_rate": 4.4355117689015696e-05, + "loss": 0.0055, + "step": 63340 + }, + { + "epoch": 11.3, + "learning_rate": 4.435422610556349e-05, + "loss": 0.006, + "step": 63350 + }, + { + "epoch": 11.3, + "learning_rate": 4.435333452211127e-05, + "loss": 0.0035, + "step": 63360 + }, + { + "epoch": 11.3, + "learning_rate": 4.435244293865906e-05, + "loss": 0.003, + "step": 63370 + }, + { + "epoch": 11.3, + "learning_rate": 4.435155135520685e-05, + "loss": 0.0049, + "step": 63380 + }, + { + "epoch": 11.3, + "learning_rate": 4.435065977175464e-05, + "loss": 0.003, + "step": 63390 + }, + { + "epoch": 11.31, + "learning_rate": 4.434976818830242e-05, + "loss": 0.0028, + "step": 63400 + }, + { + "epoch": 11.31, + "learning_rate": 4.4348876604850215e-05, + "loss": 0.0051, + "step": 63410 + }, + { + "epoch": 11.31, + "learning_rate": 4.4347985021398006e-05, + "loss": 0.003, + "step": 63420 + }, + { + "epoch": 11.31, + "learning_rate": 4.434709343794579e-05, + "loss": 0.0024, + "step": 63430 + }, + { + "epoch": 11.31, + "learning_rate": 4.434620185449359e-05, + "loss": 0.0021, + "step": 63440 + }, + { + "epoch": 11.31, + "learning_rate": 4.434531027104137e-05, + "loss": 0.0037, + "step": 63450 + }, + { + "epoch": 11.32, + "learning_rate": 4.4344418687589164e-05, + "loss": 0.0033, + "step": 63460 + }, + { + "epoch": 11.32, + "learning_rate": 4.434352710413695e-05, + "loss": 0.0034, + "step": 63470 + }, + { + "epoch": 11.32, + "learning_rate": 4.434263552068474e-05, + "loss": 0.0044, + "step": 63480 + }, + { + "epoch": 11.32, + "learning_rate": 4.4341743937232524e-05, + "loss": 0.0023, + "step": 63490 + }, + { + "epoch": 11.32, + "learning_rate": 4.4340852353780315e-05, + "loss": 0.0026, + "step": 63500 + }, + { + "epoch": 11.32, + "learning_rate": 4.4339960770328106e-05, + "loss": 0.0045, + "step": 63510 + }, + { + "epoch": 11.33, + "learning_rate": 4.433906918687589e-05, + "loss": 0.0028, + "step": 63520 + }, + { + "epoch": 11.33, + "learning_rate": 4.433817760342368e-05, + "loss": 0.0035, + "step": 63530 + }, + { + "epoch": 11.33, + "learning_rate": 4.4337286019971466e-05, + "loss": 0.0022, + "step": 63540 + }, + { + "epoch": 11.33, + "learning_rate": 4.4336394436519264e-05, + "loss": 0.0037, + "step": 63550 + }, + { + "epoch": 11.33, + "learning_rate": 4.433550285306705e-05, + "loss": 0.0086, + "step": 63560 + }, + { + "epoch": 11.34, + "learning_rate": 4.433461126961484e-05, + "loss": 0.0078, + "step": 63570 + }, + { + "epoch": 11.34, + "learning_rate": 4.433371968616263e-05, + "loss": 0.0037, + "step": 63580 + }, + { + "epoch": 11.34, + "learning_rate": 4.4332828102710416e-05, + "loss": 0.006, + "step": 63590 + }, + { + "epoch": 11.34, + "learning_rate": 4.433193651925821e-05, + "loss": 0.003, + "step": 63600 + }, + { + "epoch": 11.34, + "learning_rate": 4.433104493580599e-05, + "loss": 0.0038, + "step": 63610 + }, + { + "epoch": 11.34, + "learning_rate": 4.433015335235378e-05, + "loss": 0.0028, + "step": 63620 + }, + { + "epoch": 11.35, + "learning_rate": 4.432926176890157e-05, + "loss": 0.0048, + "step": 63630 + }, + { + "epoch": 11.35, + "learning_rate": 4.432837018544936e-05, + "loss": 0.0029, + "step": 63640 + }, + { + "epoch": 11.35, + "learning_rate": 4.432747860199715e-05, + "loss": 0.0038, + "step": 63650 + }, + { + "epoch": 11.35, + "learning_rate": 4.432658701854494e-05, + "loss": 0.0024, + "step": 63660 + }, + { + "epoch": 11.35, + "learning_rate": 4.432569543509273e-05, + "loss": 0.0031, + "step": 63670 + }, + { + "epoch": 11.36, + "learning_rate": 4.4324803851640516e-05, + "loss": 0.0043, + "step": 63680 + }, + { + "epoch": 11.36, + "learning_rate": 4.432391226818831e-05, + "loss": 0.0035, + "step": 63690 + }, + { + "epoch": 11.36, + "learning_rate": 4.432302068473609e-05, + "loss": 0.0022, + "step": 63700 + }, + { + "epoch": 11.36, + "learning_rate": 4.432212910128388e-05, + "loss": 0.0033, + "step": 63710 + }, + { + "epoch": 11.36, + "learning_rate": 4.432123751783167e-05, + "loss": 0.0029, + "step": 63720 + }, + { + "epoch": 11.36, + "learning_rate": 4.432034593437946e-05, + "loss": 0.0029, + "step": 63730 + }, + { + "epoch": 11.37, + "learning_rate": 4.431945435092725e-05, + "loss": 0.003, + "step": 63740 + }, + { + "epoch": 11.37, + "learning_rate": 4.4318562767475034e-05, + "loss": 0.0037, + "step": 63750 + }, + { + "epoch": 11.37, + "learning_rate": 4.4317671184022826e-05, + "loss": 0.0031, + "step": 63760 + }, + { + "epoch": 11.37, + "learning_rate": 4.431677960057062e-05, + "loss": 0.0043, + "step": 63770 + }, + { + "epoch": 11.37, + "learning_rate": 4.431588801711841e-05, + "loss": 0.0031, + "step": 63780 + }, + { + "epoch": 11.37, + "learning_rate": 4.431499643366619e-05, + "loss": 0.0033, + "step": 63790 + }, + { + "epoch": 11.38, + "learning_rate": 4.4314104850213984e-05, + "loss": 0.0039, + "step": 63800 + }, + { + "epoch": 11.38, + "learning_rate": 4.4313213266761775e-05, + "loss": 0.0043, + "step": 63810 + }, + { + "epoch": 11.38, + "learning_rate": 4.431232168330956e-05, + "loss": 0.0021, + "step": 63820 + }, + { + "epoch": 11.38, + "learning_rate": 4.431143009985735e-05, + "loss": 0.0014, + "step": 63830 + }, + { + "epoch": 11.38, + "learning_rate": 4.4310538516405135e-05, + "loss": 0.0026, + "step": 63840 + }, + { + "epoch": 11.39, + "learning_rate": 4.4309646932952926e-05, + "loss": 0.0049, + "step": 63850 + }, + { + "epoch": 11.39, + "learning_rate": 4.430875534950071e-05, + "loss": 0.0055, + "step": 63860 + }, + { + "epoch": 11.39, + "learning_rate": 4.43078637660485e-05, + "loss": 0.0033, + "step": 63870 + }, + { + "epoch": 11.39, + "learning_rate": 4.430697218259629e-05, + "loss": 0.0029, + "step": 63880 + }, + { + "epoch": 11.39, + "learning_rate": 4.4306080599144084e-05, + "loss": 0.004, + "step": 63890 + }, + { + "epoch": 11.39, + "learning_rate": 4.4305189015691875e-05, + "loss": 0.0038, + "step": 63900 + }, + { + "epoch": 11.4, + "learning_rate": 4.430429743223966e-05, + "loss": 0.0032, + "step": 63910 + }, + { + "epoch": 11.4, + "learning_rate": 4.430340584878745e-05, + "loss": 0.0022, + "step": 63920 + }, + { + "epoch": 11.4, + "learning_rate": 4.4302514265335235e-05, + "loss": 0.0032, + "step": 63930 + }, + { + "epoch": 11.4, + "learning_rate": 4.430162268188303e-05, + "loss": 0.003, + "step": 63940 + }, + { + "epoch": 11.4, + "learning_rate": 4.430073109843081e-05, + "loss": 0.0015, + "step": 63950 + }, + { + "epoch": 11.41, + "learning_rate": 4.42998395149786e-05, + "loss": 0.004, + "step": 63960 + }, + { + "epoch": 11.41, + "learning_rate": 4.4298947931526394e-05, + "loss": 0.0027, + "step": 63970 + }, + { + "epoch": 11.41, + "learning_rate": 4.429805634807418e-05, + "loss": 0.002, + "step": 63980 + }, + { + "epoch": 11.41, + "learning_rate": 4.4297164764621976e-05, + "loss": 0.0021, + "step": 63990 + }, + { + "epoch": 11.41, + "learning_rate": 4.429627318116976e-05, + "loss": 0.0065, + "step": 64000 + }, + { + "epoch": 11.41, + "learning_rate": 4.429538159771755e-05, + "loss": 0.0031, + "step": 64010 + }, + { + "epoch": 11.42, + "learning_rate": 4.4294490014265336e-05, + "loss": 0.005, + "step": 64020 + }, + { + "epoch": 11.42, + "learning_rate": 4.429359843081313e-05, + "loss": 0.0041, + "step": 64030 + }, + { + "epoch": 11.42, + "learning_rate": 4.429270684736092e-05, + "loss": 0.0041, + "step": 64040 + }, + { + "epoch": 11.42, + "learning_rate": 4.42918152639087e-05, + "loss": 0.0044, + "step": 64050 + }, + { + "epoch": 11.42, + "learning_rate": 4.4290923680456494e-05, + "loss": 0.0039, + "step": 64060 + }, + { + "epoch": 11.42, + "learning_rate": 4.429003209700428e-05, + "loss": 0.0039, + "step": 64070 + }, + { + "epoch": 11.43, + "learning_rate": 4.428914051355207e-05, + "loss": 0.0042, + "step": 64080 + }, + { + "epoch": 11.43, + "learning_rate": 4.4288248930099854e-05, + "loss": 0.0037, + "step": 64090 + }, + { + "epoch": 11.43, + "learning_rate": 4.428735734664765e-05, + "loss": 0.0051, + "step": 64100 + }, + { + "epoch": 11.43, + "learning_rate": 4.4286465763195437e-05, + "loss": 0.0054, + "step": 64110 + }, + { + "epoch": 11.43, + "learning_rate": 4.428557417974323e-05, + "loss": 0.0038, + "step": 64120 + }, + { + "epoch": 11.44, + "learning_rate": 4.428468259629102e-05, + "loss": 0.0047, + "step": 64130 + }, + { + "epoch": 11.44, + "learning_rate": 4.4283791012838803e-05, + "loss": 0.0054, + "step": 64140 + }, + { + "epoch": 11.44, + "learning_rate": 4.4282899429386595e-05, + "loss": 0.0029, + "step": 64150 + }, + { + "epoch": 11.44, + "learning_rate": 4.428200784593438e-05, + "loss": 0.0045, + "step": 64160 + }, + { + "epoch": 11.44, + "learning_rate": 4.428111626248217e-05, + "loss": 0.004, + "step": 64170 + }, + { + "epoch": 11.44, + "learning_rate": 4.4280224679029955e-05, + "loss": 0.0048, + "step": 64180 + }, + { + "epoch": 11.45, + "learning_rate": 4.4279333095577746e-05, + "loss": 0.0014, + "step": 64190 + }, + { + "epoch": 11.45, + "learning_rate": 4.427844151212554e-05, + "loss": 0.0028, + "step": 64200 + }, + { + "epoch": 11.45, + "learning_rate": 4.427754992867333e-05, + "loss": 0.0024, + "step": 64210 + }, + { + "epoch": 11.45, + "learning_rate": 4.427665834522112e-05, + "loss": 0.0065, + "step": 64220 + }, + { + "epoch": 11.45, + "learning_rate": 4.4275766761768904e-05, + "loss": 0.0037, + "step": 64230 + }, + { + "epoch": 11.46, + "learning_rate": 4.4274875178316695e-05, + "loss": 0.0037, + "step": 64240 + }, + { + "epoch": 11.46, + "learning_rate": 4.427398359486448e-05, + "loss": 0.0028, + "step": 64250 + }, + { + "epoch": 11.46, + "learning_rate": 4.427309201141227e-05, + "loss": 0.0026, + "step": 64260 + }, + { + "epoch": 11.46, + "learning_rate": 4.427220042796006e-05, + "loss": 0.0046, + "step": 64270 + }, + { + "epoch": 11.46, + "learning_rate": 4.4271308844507847e-05, + "loss": 0.0035, + "step": 64280 + }, + { + "epoch": 11.46, + "learning_rate": 4.427041726105564e-05, + "loss": 0.0027, + "step": 64290 + }, + { + "epoch": 11.47, + "learning_rate": 4.426952567760342e-05, + "loss": 0.0037, + "step": 64300 + }, + { + "epoch": 11.47, + "learning_rate": 4.426863409415121e-05, + "loss": 0.0038, + "step": 64310 + }, + { + "epoch": 11.47, + "learning_rate": 4.4267742510699005e-05, + "loss": 0.0035, + "step": 64320 + }, + { + "epoch": 11.47, + "learning_rate": 4.4266850927246796e-05, + "loss": 0.007, + "step": 64330 + }, + { + "epoch": 11.47, + "learning_rate": 4.426595934379458e-05, + "loss": 0.0028, + "step": 64340 + }, + { + "epoch": 11.47, + "learning_rate": 4.426506776034237e-05, + "loss": 0.0032, + "step": 64350 + }, + { + "epoch": 11.48, + "learning_rate": 4.426417617689016e-05, + "loss": 0.0027, + "step": 64360 + }, + { + "epoch": 11.48, + "learning_rate": 4.426328459343795e-05, + "loss": 0.0033, + "step": 64370 + }, + { + "epoch": 11.48, + "learning_rate": 4.426239300998574e-05, + "loss": 0.0045, + "step": 64380 + }, + { + "epoch": 11.48, + "learning_rate": 4.426150142653352e-05, + "loss": 0.0027, + "step": 64390 + }, + { + "epoch": 11.48, + "learning_rate": 4.4260609843081314e-05, + "loss": 0.0027, + "step": 64400 + }, + { + "epoch": 11.49, + "learning_rate": 4.42597182596291e-05, + "loss": 0.0045, + "step": 64410 + }, + { + "epoch": 11.49, + "learning_rate": 4.425882667617689e-05, + "loss": 0.0031, + "step": 64420 + }, + { + "epoch": 11.49, + "learning_rate": 4.425793509272468e-05, + "loss": 0.005, + "step": 64430 + }, + { + "epoch": 11.49, + "learning_rate": 4.425704350927247e-05, + "loss": 0.0029, + "step": 64440 + }, + { + "epoch": 11.49, + "learning_rate": 4.425615192582026e-05, + "loss": 0.0026, + "step": 64450 + }, + { + "epoch": 11.49, + "learning_rate": 4.425526034236805e-05, + "loss": 0.0028, + "step": 64460 + }, + { + "epoch": 11.5, + "learning_rate": 4.425436875891584e-05, + "loss": 0.006, + "step": 64470 + }, + { + "epoch": 11.5, + "learning_rate": 4.425347717546362e-05, + "loss": 0.0018, + "step": 64480 + }, + { + "epoch": 11.5, + "learning_rate": 4.4252585592011414e-05, + "loss": 0.0057, + "step": 64490 + }, + { + "epoch": 11.5, + "learning_rate": 4.4251694008559206e-05, + "loss": 0.0032, + "step": 64500 + }, + { + "epoch": 11.5, + "learning_rate": 4.425080242510699e-05, + "loss": 0.0031, + "step": 64510 + }, + { + "epoch": 11.5, + "learning_rate": 4.424991084165478e-05, + "loss": 0.0026, + "step": 64520 + }, + { + "epoch": 11.51, + "learning_rate": 4.4249019258202566e-05, + "loss": 0.0045, + "step": 64530 + }, + { + "epoch": 11.51, + "learning_rate": 4.4248127674750364e-05, + "loss": 0.007, + "step": 64540 + }, + { + "epoch": 11.51, + "learning_rate": 4.424723609129815e-05, + "loss": 0.0035, + "step": 64550 + }, + { + "epoch": 11.51, + "learning_rate": 4.424634450784594e-05, + "loss": 0.0052, + "step": 64560 + }, + { + "epoch": 11.51, + "learning_rate": 4.4245452924393724e-05, + "loss": 0.0089, + "step": 64570 + }, + { + "epoch": 11.52, + "learning_rate": 4.4244561340941515e-05, + "loss": 0.0031, + "step": 64580 + }, + { + "epoch": 11.52, + "learning_rate": 4.4243669757489306e-05, + "loss": 0.0059, + "step": 64590 + }, + { + "epoch": 11.52, + "learning_rate": 4.424277817403709e-05, + "loss": 0.0046, + "step": 64600 + }, + { + "epoch": 11.52, + "learning_rate": 4.424188659058488e-05, + "loss": 0.002, + "step": 64610 + }, + { + "epoch": 11.52, + "learning_rate": 4.4240995007132666e-05, + "loss": 0.0054, + "step": 64620 + }, + { + "epoch": 11.52, + "learning_rate": 4.424010342368046e-05, + "loss": 0.0031, + "step": 64630 + }, + { + "epoch": 11.53, + "learning_rate": 4.423921184022824e-05, + "loss": 0.0041, + "step": 64640 + }, + { + "epoch": 11.53, + "learning_rate": 4.423832025677604e-05, + "loss": 0.0045, + "step": 64650 + }, + { + "epoch": 11.53, + "learning_rate": 4.4237428673323824e-05, + "loss": 0.0032, + "step": 64660 + }, + { + "epoch": 11.53, + "learning_rate": 4.4236537089871616e-05, + "loss": 0.0056, + "step": 64670 + }, + { + "epoch": 11.53, + "learning_rate": 4.423564550641941e-05, + "loss": 0.0059, + "step": 64680 + }, + { + "epoch": 11.54, + "learning_rate": 4.423475392296719e-05, + "loss": 0.0062, + "step": 64690 + }, + { + "epoch": 11.54, + "learning_rate": 4.423386233951498e-05, + "loss": 0.0029, + "step": 64700 + }, + { + "epoch": 11.54, + "learning_rate": 4.423297075606277e-05, + "loss": 0.0049, + "step": 64710 + }, + { + "epoch": 11.54, + "learning_rate": 4.423207917261056e-05, + "loss": 0.0023, + "step": 64720 + }, + { + "epoch": 11.54, + "learning_rate": 4.423118758915835e-05, + "loss": 0.0035, + "step": 64730 + }, + { + "epoch": 11.54, + "learning_rate": 4.4230296005706134e-05, + "loss": 0.0028, + "step": 64740 + }, + { + "epoch": 11.55, + "learning_rate": 4.4229404422253925e-05, + "loss": 0.0025, + "step": 64750 + }, + { + "epoch": 11.55, + "learning_rate": 4.4228512838801716e-05, + "loss": 0.0033, + "step": 64760 + }, + { + "epoch": 11.55, + "learning_rate": 4.422762125534951e-05, + "loss": 0.0027, + "step": 64770 + }, + { + "epoch": 11.55, + "learning_rate": 4.422672967189729e-05, + "loss": 0.0039, + "step": 64780 + }, + { + "epoch": 11.55, + "learning_rate": 4.422583808844508e-05, + "loss": 0.0056, + "step": 64790 + }, + { + "epoch": 11.55, + "learning_rate": 4.422494650499287e-05, + "loss": 0.0035, + "step": 64800 + }, + { + "epoch": 11.56, + "learning_rate": 4.422405492154066e-05, + "loss": 0.0036, + "step": 64810 + }, + { + "epoch": 11.56, + "learning_rate": 4.422316333808845e-05, + "loss": 0.0028, + "step": 64820 + }, + { + "epoch": 11.56, + "learning_rate": 4.4222271754636234e-05, + "loss": 0.0023, + "step": 64830 + }, + { + "epoch": 11.56, + "learning_rate": 4.4221380171184026e-05, + "loss": 0.007, + "step": 64840 + }, + { + "epoch": 11.56, + "learning_rate": 4.422048858773181e-05, + "loss": 0.0049, + "step": 64850 + }, + { + "epoch": 11.57, + "learning_rate": 4.42195970042796e-05, + "loss": 0.004, + "step": 64860 + }, + { + "epoch": 11.57, + "learning_rate": 4.421870542082739e-05, + "loss": 0.0046, + "step": 64870 + }, + { + "epoch": 11.57, + "learning_rate": 4.4217813837375184e-05, + "loss": 0.0042, + "step": 64880 + }, + { + "epoch": 11.57, + "learning_rate": 4.421692225392297e-05, + "loss": 0.0024, + "step": 64890 + }, + { + "epoch": 11.57, + "learning_rate": 4.421603067047076e-05, + "loss": 0.004, + "step": 64900 + }, + { + "epoch": 11.57, + "learning_rate": 4.421513908701855e-05, + "loss": 0.0036, + "step": 64910 + }, + { + "epoch": 11.58, + "learning_rate": 4.4214247503566335e-05, + "loss": 0.0034, + "step": 64920 + }, + { + "epoch": 11.58, + "learning_rate": 4.4213355920114126e-05, + "loss": 0.0033, + "step": 64930 + }, + { + "epoch": 11.58, + "learning_rate": 4.421246433666191e-05, + "loss": 0.0036, + "step": 64940 + }, + { + "epoch": 11.58, + "learning_rate": 4.42115727532097e-05, + "loss": 0.0022, + "step": 64950 + }, + { + "epoch": 11.58, + "learning_rate": 4.421068116975749e-05, + "loss": 0.0034, + "step": 64960 + }, + { + "epoch": 11.59, + "learning_rate": 4.420978958630528e-05, + "loss": 0.0046, + "step": 64970 + }, + { + "epoch": 11.59, + "learning_rate": 4.420889800285307e-05, + "loss": 0.0027, + "step": 64980 + }, + { + "epoch": 11.59, + "learning_rate": 4.420800641940086e-05, + "loss": 0.0046, + "step": 64990 + }, + { + "epoch": 11.59, + "learning_rate": 4.420711483594865e-05, + "loss": 0.0042, + "step": 65000 + }, + { + "epoch": 11.59, + "learning_rate": 4.4206223252496435e-05, + "loss": 0.0028, + "step": 65010 + }, + { + "epoch": 11.59, + "learning_rate": 4.420533166904423e-05, + "loss": 0.0062, + "step": 65020 + }, + { + "epoch": 11.6, + "learning_rate": 4.420444008559201e-05, + "loss": 0.0035, + "step": 65030 + }, + { + "epoch": 11.6, + "learning_rate": 4.42035485021398e-05, + "loss": 0.0031, + "step": 65040 + }, + { + "epoch": 11.6, + "learning_rate": 4.4202656918687594e-05, + "loss": 0.0034, + "step": 65050 + }, + { + "epoch": 11.6, + "learning_rate": 4.420176533523538e-05, + "loss": 0.0034, + "step": 65060 + }, + { + "epoch": 11.6, + "learning_rate": 4.420096291012839e-05, + "loss": 0.0032, + "step": 65070 + }, + { + "epoch": 11.6, + "learning_rate": 4.420007132667618e-05, + "loss": 0.0045, + "step": 65080 + }, + { + "epoch": 11.61, + "learning_rate": 4.419917974322397e-05, + "loss": 0.0025, + "step": 65090 + }, + { + "epoch": 11.61, + "learning_rate": 4.419828815977175e-05, + "loss": 0.0031, + "step": 65100 + }, + { + "epoch": 11.61, + "learning_rate": 4.4197396576319544e-05, + "loss": 0.0036, + "step": 65110 + }, + { + "epoch": 11.61, + "learning_rate": 4.4196504992867335e-05, + "loss": 0.0021, + "step": 65120 + }, + { + "epoch": 11.61, + "learning_rate": 4.419561340941512e-05, + "loss": 0.0048, + "step": 65130 + }, + { + "epoch": 11.62, + "learning_rate": 4.419472182596292e-05, + "loss": 0.0031, + "step": 65140 + }, + { + "epoch": 11.62, + "learning_rate": 4.41938302425107e-05, + "loss": 0.004, + "step": 65150 + }, + { + "epoch": 11.62, + "learning_rate": 4.419293865905849e-05, + "loss": 0.0074, + "step": 65160 + }, + { + "epoch": 11.62, + "learning_rate": 4.419204707560628e-05, + "loss": 0.0053, + "step": 65170 + }, + { + "epoch": 11.62, + "learning_rate": 4.419115549215407e-05, + "loss": 0.0033, + "step": 65180 + }, + { + "epoch": 11.62, + "learning_rate": 4.419026390870185e-05, + "loss": 0.0038, + "step": 65190 + }, + { + "epoch": 11.63, + "learning_rate": 4.4189372325249645e-05, + "loss": 0.0038, + "step": 65200 + }, + { + "epoch": 11.63, + "learning_rate": 4.4188480741797436e-05, + "loss": 0.0046, + "step": 65210 + }, + { + "epoch": 11.63, + "learning_rate": 4.418758915834522e-05, + "loss": 0.0032, + "step": 65220 + }, + { + "epoch": 11.63, + "learning_rate": 4.418669757489301e-05, + "loss": 0.0036, + "step": 65230 + }, + { + "epoch": 11.63, + "learning_rate": 4.4185805991440796e-05, + "loss": 0.0045, + "step": 65240 + }, + { + "epoch": 11.64, + "learning_rate": 4.4184914407988594e-05, + "loss": 0.004, + "step": 65250 + }, + { + "epoch": 11.64, + "learning_rate": 4.418402282453638e-05, + "loss": 0.0059, + "step": 65260 + }, + { + "epoch": 11.64, + "learning_rate": 4.418313124108417e-05, + "loss": 0.0039, + "step": 65270 + }, + { + "epoch": 11.64, + "learning_rate": 4.418223965763196e-05, + "loss": 0.0028, + "step": 65280 + }, + { + "epoch": 11.64, + "learning_rate": 4.4181348074179745e-05, + "loss": 0.0049, + "step": 65290 + }, + { + "epoch": 11.64, + "learning_rate": 4.4180456490727536e-05, + "loss": 0.0034, + "step": 65300 + }, + { + "epoch": 11.65, + "learning_rate": 4.417956490727532e-05, + "loss": 0.0036, + "step": 65310 + }, + { + "epoch": 11.65, + "learning_rate": 4.417867332382311e-05, + "loss": 0.004, + "step": 65320 + }, + { + "epoch": 11.65, + "learning_rate": 4.4177781740370896e-05, + "loss": 0.0035, + "step": 65330 + }, + { + "epoch": 11.65, + "learning_rate": 4.417689015691869e-05, + "loss": 0.0046, + "step": 65340 + }, + { + "epoch": 11.65, + "learning_rate": 4.417599857346648e-05, + "loss": 0.0036, + "step": 65350 + }, + { + "epoch": 11.65, + "learning_rate": 4.417510699001427e-05, + "loss": 0.0038, + "step": 65360 + }, + { + "epoch": 11.66, + "learning_rate": 4.417421540656206e-05, + "loss": 0.0031, + "step": 65370 + }, + { + "epoch": 11.66, + "learning_rate": 4.4173323823109846e-05, + "loss": 0.003, + "step": 65380 + }, + { + "epoch": 11.66, + "learning_rate": 4.417243223965764e-05, + "loss": 0.005, + "step": 65390 + }, + { + "epoch": 11.66, + "learning_rate": 4.417154065620542e-05, + "loss": 0.003, + "step": 65400 + }, + { + "epoch": 11.66, + "learning_rate": 4.417064907275321e-05, + "loss": 0.0061, + "step": 65410 + }, + { + "epoch": 11.67, + "learning_rate": 4.4169757489301e-05, + "loss": 0.004, + "step": 65420 + }, + { + "epoch": 11.67, + "learning_rate": 4.416886590584879e-05, + "loss": 0.004, + "step": 65430 + }, + { + "epoch": 11.67, + "learning_rate": 4.416797432239658e-05, + "loss": 0.0044, + "step": 65440 + }, + { + "epoch": 11.67, + "learning_rate": 4.4167082738944364e-05, + "loss": 0.0032, + "step": 65450 + }, + { + "epoch": 11.67, + "learning_rate": 4.4166191155492155e-05, + "loss": 0.0022, + "step": 65460 + }, + { + "epoch": 11.67, + "learning_rate": 4.4165299572039946e-05, + "loss": 0.0032, + "step": 65470 + }, + { + "epoch": 11.68, + "learning_rate": 4.416440798858774e-05, + "loss": 0.0062, + "step": 65480 + }, + { + "epoch": 11.68, + "learning_rate": 4.416351640513552e-05, + "loss": 0.0065, + "step": 65490 + }, + { + "epoch": 11.68, + "learning_rate": 4.416262482168331e-05, + "loss": 0.003, + "step": 65500 + }, + { + "epoch": 11.68, + "learning_rate": 4.4161733238231104e-05, + "loss": 0.0036, + "step": 65510 + }, + { + "epoch": 11.68, + "learning_rate": 4.416084165477889e-05, + "loss": 0.0033, + "step": 65520 + }, + { + "epoch": 11.69, + "learning_rate": 4.415995007132668e-05, + "loss": 0.006, + "step": 65530 + }, + { + "epoch": 11.69, + "learning_rate": 4.4159058487874464e-05, + "loss": 0.0034, + "step": 65540 + }, + { + "epoch": 11.69, + "learning_rate": 4.4158166904422256e-05, + "loss": 0.0056, + "step": 65550 + }, + { + "epoch": 11.69, + "learning_rate": 4.415727532097004e-05, + "loss": 0.0041, + "step": 65560 + }, + { + "epoch": 11.69, + "learning_rate": 4.415638373751783e-05, + "loss": 0.0051, + "step": 65570 + }, + { + "epoch": 11.69, + "learning_rate": 4.415549215406562e-05, + "loss": 0.0027, + "step": 65580 + }, + { + "epoch": 11.7, + "learning_rate": 4.4154600570613414e-05, + "loss": 0.0058, + "step": 65590 + }, + { + "epoch": 11.7, + "learning_rate": 4.4153708987161205e-05, + "loss": 0.0036, + "step": 65600 + }, + { + "epoch": 11.7, + "learning_rate": 4.415281740370899e-05, + "loss": 0.0049, + "step": 65610 + }, + { + "epoch": 11.7, + "learning_rate": 4.415192582025678e-05, + "loss": 0.0035, + "step": 65620 + }, + { + "epoch": 11.7, + "learning_rate": 4.4151034236804565e-05, + "loss": 0.0027, + "step": 65630 + }, + { + "epoch": 11.7, + "learning_rate": 4.4150142653352356e-05, + "loss": 0.0024, + "step": 65640 + }, + { + "epoch": 11.71, + "learning_rate": 4.414925106990014e-05, + "loss": 0.0043, + "step": 65650 + }, + { + "epoch": 11.71, + "learning_rate": 4.414835948644793e-05, + "loss": 0.0028, + "step": 65660 + }, + { + "epoch": 11.71, + "learning_rate": 4.414746790299572e-05, + "loss": 0.0045, + "step": 65670 + }, + { + "epoch": 11.71, + "learning_rate": 4.414657631954351e-05, + "loss": 0.0057, + "step": 65680 + }, + { + "epoch": 11.71, + "learning_rate": 4.4145684736091305e-05, + "loss": 0.0041, + "step": 65690 + }, + { + "epoch": 11.72, + "learning_rate": 4.414479315263909e-05, + "loss": 0.0037, + "step": 65700 + }, + { + "epoch": 11.72, + "learning_rate": 4.414390156918688e-05, + "loss": 0.0042, + "step": 65710 + }, + { + "epoch": 11.72, + "learning_rate": 4.4143009985734666e-05, + "loss": 0.0036, + "step": 65720 + }, + { + "epoch": 11.72, + "learning_rate": 4.414211840228246e-05, + "loss": 0.006, + "step": 65730 + }, + { + "epoch": 11.72, + "learning_rate": 4.414122681883025e-05, + "loss": 0.0048, + "step": 65740 + }, + { + "epoch": 11.72, + "learning_rate": 4.414033523537803e-05, + "loss": 0.0052, + "step": 65750 + }, + { + "epoch": 11.73, + "learning_rate": 4.4139443651925824e-05, + "loss": 0.0027, + "step": 65760 + }, + { + "epoch": 11.73, + "learning_rate": 4.413855206847361e-05, + "loss": 0.0029, + "step": 65770 + }, + { + "epoch": 11.73, + "learning_rate": 4.41376604850214e-05, + "loss": 0.0053, + "step": 65780 + }, + { + "epoch": 11.73, + "learning_rate": 4.4136768901569184e-05, + "loss": 0.0017, + "step": 65790 + }, + { + "epoch": 11.73, + "learning_rate": 4.413587731811698e-05, + "loss": 0.003, + "step": 65800 + }, + { + "epoch": 11.74, + "learning_rate": 4.4134985734664766e-05, + "loss": 0.0046, + "step": 65810 + }, + { + "epoch": 11.74, + "learning_rate": 4.413409415121256e-05, + "loss": 0.003, + "step": 65820 + }, + { + "epoch": 11.74, + "learning_rate": 4.413320256776035e-05, + "loss": 0.003, + "step": 65830 + }, + { + "epoch": 11.74, + "learning_rate": 4.413231098430813e-05, + "loss": 0.0072, + "step": 65840 + }, + { + "epoch": 11.74, + "learning_rate": 4.4131419400855924e-05, + "loss": 0.0064, + "step": 65850 + }, + { + "epoch": 11.74, + "learning_rate": 4.413052781740371e-05, + "loss": 0.0032, + "step": 65860 + }, + { + "epoch": 11.75, + "learning_rate": 4.41296362339515e-05, + "loss": 0.0057, + "step": 65870 + }, + { + "epoch": 11.75, + "learning_rate": 4.4128744650499284e-05, + "loss": 0.0028, + "step": 65880 + }, + { + "epoch": 11.75, + "learning_rate": 4.4127853067047075e-05, + "loss": 0.0033, + "step": 65890 + }, + { + "epoch": 11.75, + "learning_rate": 4.412696148359487e-05, + "loss": 0.0045, + "step": 65900 + }, + { + "epoch": 11.75, + "learning_rate": 4.412606990014266e-05, + "loss": 0.0033, + "step": 65910 + }, + { + "epoch": 11.75, + "learning_rate": 4.412517831669045e-05, + "loss": 0.0027, + "step": 65920 + }, + { + "epoch": 11.76, + "learning_rate": 4.4124286733238234e-05, + "loss": 0.0063, + "step": 65930 + }, + { + "epoch": 11.76, + "learning_rate": 4.4123395149786025e-05, + "loss": 0.0036, + "step": 65940 + }, + { + "epoch": 11.76, + "learning_rate": 4.412250356633381e-05, + "loss": 0.0045, + "step": 65950 + }, + { + "epoch": 11.76, + "learning_rate": 4.41216119828816e-05, + "loss": 0.0033, + "step": 65960 + }, + { + "epoch": 11.76, + "learning_rate": 4.412072039942939e-05, + "loss": 0.0018, + "step": 65970 + }, + { + "epoch": 11.77, + "learning_rate": 4.4119828815977176e-05, + "loss": 0.0035, + "step": 65980 + }, + { + "epoch": 11.77, + "learning_rate": 4.411893723252497e-05, + "loss": 0.006, + "step": 65990 + }, + { + "epoch": 11.77, + "learning_rate": 4.411804564907275e-05, + "loss": 0.005, + "step": 66000 + }, + { + "epoch": 11.77, + "learning_rate": 4.411715406562054e-05, + "loss": 0.0035, + "step": 66010 + }, + { + "epoch": 11.77, + "learning_rate": 4.4116262482168334e-05, + "loss": 0.0049, + "step": 66020 + }, + { + "epoch": 11.77, + "learning_rate": 4.4115370898716125e-05, + "loss": 0.0043, + "step": 66030 + }, + { + "epoch": 11.78, + "learning_rate": 4.411447931526391e-05, + "loss": 0.0041, + "step": 66040 + }, + { + "epoch": 11.78, + "learning_rate": 4.41135877318117e-05, + "loss": 0.004, + "step": 66050 + }, + { + "epoch": 11.78, + "learning_rate": 4.411269614835949e-05, + "loss": 0.0023, + "step": 66060 + }, + { + "epoch": 11.78, + "learning_rate": 4.4111804564907277e-05, + "loss": 0.004, + "step": 66070 + }, + { + "epoch": 11.78, + "learning_rate": 4.411091298145507e-05, + "loss": 0.0049, + "step": 66080 + }, + { + "epoch": 11.78, + "learning_rate": 4.411002139800285e-05, + "loss": 0.0046, + "step": 66090 + }, + { + "epoch": 11.79, + "learning_rate": 4.4109129814550643e-05, + "loss": 0.0041, + "step": 66100 + }, + { + "epoch": 11.79, + "learning_rate": 4.410823823109843e-05, + "loss": 0.0027, + "step": 66110 + }, + { + "epoch": 11.79, + "learning_rate": 4.410734664764622e-05, + "loss": 0.0042, + "step": 66120 + }, + { + "epoch": 11.79, + "learning_rate": 4.410645506419401e-05, + "loss": 0.0028, + "step": 66130 + }, + { + "epoch": 11.79, + "learning_rate": 4.41055634807418e-05, + "loss": 0.0042, + "step": 66140 + }, + { + "epoch": 11.8, + "learning_rate": 4.410467189728959e-05, + "loss": 0.0069, + "step": 66150 + }, + { + "epoch": 11.8, + "learning_rate": 4.410378031383738e-05, + "loss": 0.0029, + "step": 66160 + }, + { + "epoch": 11.8, + "learning_rate": 4.410288873038517e-05, + "loss": 0.0041, + "step": 66170 + }, + { + "epoch": 11.8, + "learning_rate": 4.410199714693295e-05, + "loss": 0.0032, + "step": 66180 + }, + { + "epoch": 11.8, + "learning_rate": 4.4101105563480744e-05, + "loss": 0.0026, + "step": 66190 + }, + { + "epoch": 11.8, + "learning_rate": 4.4100213980028535e-05, + "loss": 0.0056, + "step": 66200 + }, + { + "epoch": 11.81, + "learning_rate": 4.409932239657632e-05, + "loss": 0.0021, + "step": 66210 + }, + { + "epoch": 11.81, + "learning_rate": 4.409843081312411e-05, + "loss": 0.0029, + "step": 66220 + }, + { + "epoch": 11.81, + "learning_rate": 4.4097539229671895e-05, + "loss": 0.0058, + "step": 66230 + }, + { + "epoch": 11.81, + "learning_rate": 4.409664764621969e-05, + "loss": 0.0021, + "step": 66240 + }, + { + "epoch": 11.81, + "learning_rate": 4.409575606276748e-05, + "loss": 0.0035, + "step": 66250 + }, + { + "epoch": 11.82, + "learning_rate": 4.409486447931527e-05, + "loss": 0.006, + "step": 66260 + }, + { + "epoch": 11.82, + "learning_rate": 4.409397289586305e-05, + "loss": 0.0033, + "step": 66270 + }, + { + "epoch": 11.82, + "learning_rate": 4.4093081312410845e-05, + "loss": 0.0029, + "step": 66280 + }, + { + "epoch": 11.82, + "learning_rate": 4.4092189728958636e-05, + "loss": 0.0041, + "step": 66290 + }, + { + "epoch": 11.82, + "learning_rate": 4.409129814550642e-05, + "loss": 0.0018, + "step": 66300 + }, + { + "epoch": 11.82, + "learning_rate": 4.409040656205421e-05, + "loss": 0.004, + "step": 66310 + }, + { + "epoch": 11.83, + "learning_rate": 4.4089514978601996e-05, + "loss": 0.0031, + "step": 66320 + }, + { + "epoch": 11.83, + "learning_rate": 4.408862339514979e-05, + "loss": 0.0034, + "step": 66330 + }, + { + "epoch": 11.83, + "learning_rate": 4.408773181169757e-05, + "loss": 0.0039, + "step": 66340 + }, + { + "epoch": 11.83, + "learning_rate": 4.408684022824537e-05, + "loss": 0.0017, + "step": 66350 + }, + { + "epoch": 11.83, + "learning_rate": 4.4085948644793154e-05, + "loss": 0.004, + "step": 66360 + }, + { + "epoch": 11.83, + "learning_rate": 4.4085057061340945e-05, + "loss": 0.0048, + "step": 66370 + }, + { + "epoch": 11.84, + "learning_rate": 4.4084165477888736e-05, + "loss": 0.0064, + "step": 66380 + }, + { + "epoch": 11.84, + "learning_rate": 4.408327389443652e-05, + "loss": 0.0028, + "step": 66390 + }, + { + "epoch": 11.84, + "learning_rate": 4.408238231098431e-05, + "loss": 0.0049, + "step": 66400 + }, + { + "epoch": 11.84, + "learning_rate": 4.4081490727532096e-05, + "loss": 0.0039, + "step": 66410 + }, + { + "epoch": 11.84, + "learning_rate": 4.408059914407989e-05, + "loss": 0.0045, + "step": 66420 + }, + { + "epoch": 11.85, + "learning_rate": 4.407970756062768e-05, + "loss": 0.0044, + "step": 66430 + }, + { + "epoch": 11.85, + "learning_rate": 4.407881597717546e-05, + "loss": 0.0046, + "step": 66440 + }, + { + "epoch": 11.85, + "learning_rate": 4.4077924393723254e-05, + "loss": 0.0049, + "step": 66450 + }, + { + "epoch": 11.85, + "learning_rate": 4.4077032810271046e-05, + "loss": 0.0045, + "step": 66460 + }, + { + "epoch": 11.85, + "learning_rate": 4.407614122681884e-05, + "loss": 0.003, + "step": 66470 + }, + { + "epoch": 11.85, + "learning_rate": 4.407524964336662e-05, + "loss": 0.0044, + "step": 66480 + }, + { + "epoch": 11.86, + "learning_rate": 4.407435805991441e-05, + "loss": 0.0033, + "step": 66490 + }, + { + "epoch": 11.86, + "learning_rate": 4.40734664764622e-05, + "loss": 0.0035, + "step": 66500 + }, + { + "epoch": 11.86, + "learning_rate": 4.407257489300999e-05, + "loss": 0.0044, + "step": 66510 + }, + { + "epoch": 11.86, + "learning_rate": 4.407168330955778e-05, + "loss": 0.0052, + "step": 66520 + }, + { + "epoch": 11.86, + "learning_rate": 4.4070791726105564e-05, + "loss": 0.0041, + "step": 66530 + }, + { + "epoch": 11.87, + "learning_rate": 4.4069900142653355e-05, + "loss": 0.0061, + "step": 66540 + }, + { + "epoch": 11.87, + "learning_rate": 4.406900855920114e-05, + "loss": 0.0044, + "step": 66550 + }, + { + "epoch": 11.87, + "learning_rate": 4.406811697574893e-05, + "loss": 0.006, + "step": 66560 + }, + { + "epoch": 11.87, + "learning_rate": 4.406722539229672e-05, + "loss": 0.0027, + "step": 66570 + }, + { + "epoch": 11.87, + "learning_rate": 4.406642296718973e-05, + "loss": 0.0032, + "step": 66580 + }, + { + "epoch": 11.87, + "learning_rate": 4.406553138373752e-05, + "loss": 0.0049, + "step": 66590 + }, + { + "epoch": 11.88, + "learning_rate": 4.4064639800285306e-05, + "loss": 0.0047, + "step": 66600 + }, + { + "epoch": 11.88, + "learning_rate": 4.40637482168331e-05, + "loss": 0.0031, + "step": 66610 + }, + { + "epoch": 11.88, + "learning_rate": 4.406285663338089e-05, + "loss": 0.0061, + "step": 66620 + }, + { + "epoch": 11.88, + "learning_rate": 4.406196504992868e-05, + "loss": 0.0023, + "step": 66630 + }, + { + "epoch": 11.88, + "learning_rate": 4.4061073466476464e-05, + "loss": 0.0051, + "step": 66640 + }, + { + "epoch": 11.88, + "learning_rate": 4.4060181883024255e-05, + "loss": 0.0025, + "step": 66650 + }, + { + "epoch": 11.89, + "learning_rate": 4.405929029957204e-05, + "loss": 0.0063, + "step": 66660 + }, + { + "epoch": 11.89, + "learning_rate": 4.405839871611983e-05, + "loss": 0.0049, + "step": 66670 + }, + { + "epoch": 11.89, + "learning_rate": 4.405750713266762e-05, + "loss": 0.0048, + "step": 66680 + }, + { + "epoch": 11.89, + "learning_rate": 4.4056615549215406e-05, + "loss": 0.0043, + "step": 66690 + }, + { + "epoch": 11.89, + "learning_rate": 4.40557239657632e-05, + "loss": 0.0028, + "step": 66700 + }, + { + "epoch": 11.9, + "learning_rate": 4.405483238231098e-05, + "loss": 0.004, + "step": 66710 + }, + { + "epoch": 11.9, + "learning_rate": 4.405394079885877e-05, + "loss": 0.0039, + "step": 66720 + }, + { + "epoch": 11.9, + "learning_rate": 4.4053049215406564e-05, + "loss": 0.0029, + "step": 66730 + }, + { + "epoch": 11.9, + "learning_rate": 4.4052157631954355e-05, + "loss": 0.0052, + "step": 66740 + }, + { + "epoch": 11.9, + "learning_rate": 4.4051266048502147e-05, + "loss": 0.0025, + "step": 66750 + }, + { + "epoch": 11.9, + "learning_rate": 4.405037446504993e-05, + "loss": 0.0039, + "step": 66760 + }, + { + "epoch": 11.91, + "learning_rate": 4.404948288159772e-05, + "loss": 0.0012, + "step": 66770 + }, + { + "epoch": 11.91, + "learning_rate": 4.404859129814551e-05, + "loss": 0.0032, + "step": 66780 + }, + { + "epoch": 11.91, + "learning_rate": 4.40476997146933e-05, + "loss": 0.0048, + "step": 66790 + }, + { + "epoch": 11.91, + "learning_rate": 4.404680813124108e-05, + "loss": 0.0029, + "step": 66800 + }, + { + "epoch": 11.91, + "learning_rate": 4.4045916547788874e-05, + "loss": 0.0023, + "step": 66810 + }, + { + "epoch": 11.92, + "learning_rate": 4.4045024964336665e-05, + "loss": 0.003, + "step": 66820 + }, + { + "epoch": 11.92, + "learning_rate": 4.404413338088445e-05, + "loss": 0.0062, + "step": 66830 + }, + { + "epoch": 11.92, + "learning_rate": 4.404324179743225e-05, + "loss": 0.0036, + "step": 66840 + }, + { + "epoch": 11.92, + "learning_rate": 4.404235021398003e-05, + "loss": 0.0089, + "step": 66850 + }, + { + "epoch": 11.92, + "learning_rate": 4.404145863052782e-05, + "loss": 0.0032, + "step": 66860 + }, + { + "epoch": 11.92, + "learning_rate": 4.404056704707561e-05, + "loss": 0.0034, + "step": 66870 + }, + { + "epoch": 11.93, + "learning_rate": 4.40396754636234e-05, + "loss": 0.0035, + "step": 66880 + }, + { + "epoch": 11.93, + "learning_rate": 4.403878388017118e-05, + "loss": 0.0023, + "step": 66890 + }, + { + "epoch": 11.93, + "learning_rate": 4.4037892296718974e-05, + "loss": 0.0022, + "step": 66900 + }, + { + "epoch": 11.93, + "learning_rate": 4.4037000713266765e-05, + "loss": 0.003, + "step": 66910 + }, + { + "epoch": 11.93, + "learning_rate": 4.403610912981455e-05, + "loss": 0.0059, + "step": 66920 + }, + { + "epoch": 11.93, + "learning_rate": 4.403521754636234e-05, + "loss": 0.0052, + "step": 66930 + }, + { + "epoch": 11.94, + "learning_rate": 4.4034325962910125e-05, + "loss": 0.0041, + "step": 66940 + }, + { + "epoch": 11.94, + "learning_rate": 4.403343437945792e-05, + "loss": 0.0053, + "step": 66950 + }, + { + "epoch": 11.94, + "learning_rate": 4.403254279600571e-05, + "loss": 0.0035, + "step": 66960 + }, + { + "epoch": 11.94, + "learning_rate": 4.40316512125535e-05, + "loss": 0.005, + "step": 66970 + }, + { + "epoch": 11.94, + "learning_rate": 4.403075962910129e-05, + "loss": 0.0027, + "step": 66980 + }, + { + "epoch": 11.95, + "learning_rate": 4.4029868045649075e-05, + "loss": 0.0047, + "step": 66990 + }, + { + "epoch": 11.95, + "learning_rate": 4.4028976462196866e-05, + "loss": 0.0034, + "step": 67000 + }, + { + "epoch": 11.95, + "learning_rate": 4.402808487874465e-05, + "loss": 0.0025, + "step": 67010 + }, + { + "epoch": 11.95, + "learning_rate": 4.402719329529244e-05, + "loss": 0.0037, + "step": 67020 + }, + { + "epoch": 11.95, + "learning_rate": 4.4026301711840226e-05, + "loss": 0.0033, + "step": 67030 + }, + { + "epoch": 11.95, + "learning_rate": 4.402541012838802e-05, + "loss": 0.004, + "step": 67040 + }, + { + "epoch": 11.96, + "learning_rate": 4.402451854493581e-05, + "loss": 0.0031, + "step": 67050 + }, + { + "epoch": 11.96, + "learning_rate": 4.40236269614836e-05, + "loss": 0.0043, + "step": 67060 + }, + { + "epoch": 11.96, + "learning_rate": 4.402273537803139e-05, + "loss": 0.0044, + "step": 67070 + }, + { + "epoch": 11.96, + "learning_rate": 4.4021843794579175e-05, + "loss": 0.0041, + "step": 67080 + }, + { + "epoch": 11.96, + "learning_rate": 4.4020952211126966e-05, + "loss": 0.0037, + "step": 67090 + }, + { + "epoch": 11.97, + "learning_rate": 4.402006062767475e-05, + "loss": 0.0047, + "step": 67100 + }, + { + "epoch": 11.97, + "learning_rate": 4.401916904422254e-05, + "loss": 0.0019, + "step": 67110 + }, + { + "epoch": 11.97, + "learning_rate": 4.4018277460770326e-05, + "loss": 0.0035, + "step": 67120 + }, + { + "epoch": 11.97, + "learning_rate": 4.401738587731812e-05, + "loss": 0.0026, + "step": 67130 + }, + { + "epoch": 11.97, + "learning_rate": 4.401649429386591e-05, + "loss": 0.0026, + "step": 67140 + }, + { + "epoch": 11.97, + "learning_rate": 4.401560271041369e-05, + "loss": 0.0038, + "step": 67150 + }, + { + "epoch": 11.98, + "learning_rate": 4.4014711126961485e-05, + "loss": 0.0069, + "step": 67160 + }, + { + "epoch": 11.98, + "learning_rate": 4.4013819543509276e-05, + "loss": 0.0051, + "step": 67170 + }, + { + "epoch": 11.98, + "learning_rate": 4.401292796005707e-05, + "loss": 0.0058, + "step": 67180 + }, + { + "epoch": 11.98, + "learning_rate": 4.401203637660485e-05, + "loss": 0.0025, + "step": 67190 + }, + { + "epoch": 11.98, + "learning_rate": 4.401114479315264e-05, + "loss": 0.0051, + "step": 67200 + }, + { + "epoch": 11.98, + "learning_rate": 4.4010253209700434e-05, + "loss": 0.0029, + "step": 67210 + }, + { + "epoch": 11.99, + "learning_rate": 4.400936162624822e-05, + "loss": 0.0035, + "step": 67220 + }, + { + "epoch": 11.99, + "learning_rate": 4.400847004279601e-05, + "loss": 0.0027, + "step": 67230 + }, + { + "epoch": 11.99, + "learning_rate": 4.4007578459343794e-05, + "loss": 0.0032, + "step": 67240 + }, + { + "epoch": 11.99, + "learning_rate": 4.4006686875891585e-05, + "loss": 0.0017, + "step": 67250 + }, + { + "epoch": 11.99, + "learning_rate": 4.400579529243937e-05, + "loss": 0.006, + "step": 67260 + }, + { + "epoch": 12.0, + "learning_rate": 4.400490370898716e-05, + "loss": 0.0044, + "step": 67270 + }, + { + "epoch": 12.0, + "learning_rate": 4.400401212553495e-05, + "loss": 0.0032, + "step": 67280 + }, + { + "epoch": 12.0, + "learning_rate": 4.400312054208274e-05, + "loss": 0.0031, + "step": 67290 + }, + { + "epoch": 12.0, + "eval_loss": 0.01598210446536541, + "eval_runtime": 195.6628, + "eval_samples_per_second": 23.709, + "eval_steps_per_second": 2.964, + "step": 67296 + }, + { + "epoch": 12.0, + "learning_rate": 4.4002228958630534e-05, + "loss": 0.0026, + "step": 67300 + }, + { + "epoch": 12.0, + "learning_rate": 4.400133737517832e-05, + "loss": 0.0024, + "step": 67310 + }, + { + "epoch": 12.0, + "learning_rate": 4.400044579172611e-05, + "loss": 0.0027, + "step": 67320 + }, + { + "epoch": 12.01, + "learning_rate": 4.3999554208273894e-05, + "loss": 0.0022, + "step": 67330 + }, + { + "epoch": 12.01, + "learning_rate": 4.3998662624821686e-05, + "loss": 0.004, + "step": 67340 + }, + { + "epoch": 12.01, + "learning_rate": 4.399777104136947e-05, + "loss": 0.0037, + "step": 67350 + }, + { + "epoch": 12.01, + "learning_rate": 4.399687945791726e-05, + "loss": 0.0026, + "step": 67360 + }, + { + "epoch": 12.01, + "learning_rate": 4.399598787446505e-05, + "loss": 0.0036, + "step": 67370 + }, + { + "epoch": 12.01, + "learning_rate": 4.399509629101284e-05, + "loss": 0.0032, + "step": 67380 + }, + { + "epoch": 12.02, + "learning_rate": 4.3994204707560635e-05, + "loss": 0.0032, + "step": 67390 + }, + { + "epoch": 12.02, + "learning_rate": 4.399331312410842e-05, + "loss": 0.0039, + "step": 67400 + }, + { + "epoch": 12.02, + "learning_rate": 4.399242154065621e-05, + "loss": 0.0019, + "step": 67410 + }, + { + "epoch": 12.02, + "learning_rate": 4.3991529957203995e-05, + "loss": 0.0034, + "step": 67420 + }, + { + "epoch": 12.02, + "learning_rate": 4.3990638373751786e-05, + "loss": 0.0031, + "step": 67430 + }, + { + "epoch": 12.03, + "learning_rate": 4.398974679029958e-05, + "loss": 0.0062, + "step": 67440 + }, + { + "epoch": 12.03, + "learning_rate": 4.398885520684736e-05, + "loss": 0.0045, + "step": 67450 + }, + { + "epoch": 12.03, + "learning_rate": 4.398796362339515e-05, + "loss": 0.0048, + "step": 67460 + }, + { + "epoch": 12.03, + "learning_rate": 4.398707203994294e-05, + "loss": 0.0037, + "step": 67470 + }, + { + "epoch": 12.03, + "learning_rate": 4.398618045649073e-05, + "loss": 0.0035, + "step": 67480 + }, + { + "epoch": 12.03, + "learning_rate": 4.398528887303851e-05, + "loss": 0.0044, + "step": 67490 + }, + { + "epoch": 12.04, + "learning_rate": 4.398448644793153e-05, + "loss": 0.0055, + "step": 67500 + }, + { + "epoch": 12.04, + "learning_rate": 4.398359486447932e-05, + "loss": 0.0035, + "step": 67510 + }, + { + "epoch": 12.04, + "learning_rate": 4.3982703281027104e-05, + "loss": 0.0037, + "step": 67520 + }, + { + "epoch": 12.04, + "learning_rate": 4.3981811697574895e-05, + "loss": 0.0031, + "step": 67530 + }, + { + "epoch": 12.04, + "learning_rate": 4.3980920114122686e-05, + "loss": 0.0027, + "step": 67540 + }, + { + "epoch": 12.05, + "learning_rate": 4.398002853067048e-05, + "loss": 0.0036, + "step": 67550 + }, + { + "epoch": 12.05, + "learning_rate": 4.397913694721826e-05, + "loss": 0.0033, + "step": 67560 + }, + { + "epoch": 12.05, + "learning_rate": 4.397824536376605e-05, + "loss": 0.0015, + "step": 67570 + }, + { + "epoch": 12.05, + "learning_rate": 4.397735378031384e-05, + "loss": 0.0024, + "step": 67580 + }, + { + "epoch": 12.05, + "learning_rate": 4.397646219686163e-05, + "loss": 0.0051, + "step": 67590 + }, + { + "epoch": 12.05, + "learning_rate": 4.397557061340942e-05, + "loss": 0.0028, + "step": 67600 + }, + { + "epoch": 12.06, + "learning_rate": 4.3974679029957204e-05, + "loss": 0.0018, + "step": 67610 + }, + { + "epoch": 12.06, + "learning_rate": 4.3973787446504995e-05, + "loss": 0.0035, + "step": 67620 + }, + { + "epoch": 12.06, + "learning_rate": 4.397289586305278e-05, + "loss": 0.0042, + "step": 67630 + }, + { + "epoch": 12.06, + "learning_rate": 4.397200427960057e-05, + "loss": 0.0059, + "step": 67640 + }, + { + "epoch": 12.06, + "learning_rate": 4.3971112696148355e-05, + "loss": 0.003, + "step": 67650 + }, + { + "epoch": 12.06, + "learning_rate": 4.3970221112696153e-05, + "loss": 0.0021, + "step": 67660 + }, + { + "epoch": 12.07, + "learning_rate": 4.396932952924394e-05, + "loss": 0.0037, + "step": 67670 + }, + { + "epoch": 12.07, + "learning_rate": 4.396843794579173e-05, + "loss": 0.0036, + "step": 67680 + }, + { + "epoch": 12.07, + "learning_rate": 4.396754636233952e-05, + "loss": 0.0047, + "step": 67690 + }, + { + "epoch": 12.07, + "learning_rate": 4.3966654778887305e-05, + "loss": 0.0051, + "step": 67700 + }, + { + "epoch": 12.07, + "learning_rate": 4.3965763195435096e-05, + "loss": 0.0027, + "step": 67710 + }, + { + "epoch": 12.08, + "learning_rate": 4.396487161198288e-05, + "loss": 0.0034, + "step": 67720 + }, + { + "epoch": 12.08, + "learning_rate": 4.396398002853067e-05, + "loss": 0.0032, + "step": 67730 + }, + { + "epoch": 12.08, + "learning_rate": 4.396308844507846e-05, + "loss": 0.0028, + "step": 67740 + }, + { + "epoch": 12.08, + "learning_rate": 4.396219686162625e-05, + "loss": 0.0036, + "step": 67750 + }, + { + "epoch": 12.08, + "learning_rate": 4.396130527817404e-05, + "loss": 0.0041, + "step": 67760 + }, + { + "epoch": 12.08, + "learning_rate": 4.396041369472183e-05, + "loss": 0.0025, + "step": 67770 + }, + { + "epoch": 12.09, + "learning_rate": 4.395952211126962e-05, + "loss": 0.0046, + "step": 67780 + }, + { + "epoch": 12.09, + "learning_rate": 4.3958630527817405e-05, + "loss": 0.0026, + "step": 67790 + }, + { + "epoch": 12.09, + "learning_rate": 4.3957738944365197e-05, + "loss": 0.003, + "step": 67800 + }, + { + "epoch": 12.09, + "learning_rate": 4.395684736091298e-05, + "loss": 0.0062, + "step": 67810 + }, + { + "epoch": 12.09, + "learning_rate": 4.395595577746077e-05, + "loss": 0.0038, + "step": 67820 + }, + { + "epoch": 12.1, + "learning_rate": 4.395506419400856e-05, + "loss": 0.0033, + "step": 67830 + }, + { + "epoch": 12.1, + "learning_rate": 4.395417261055635e-05, + "loss": 0.0027, + "step": 67840 + }, + { + "epoch": 12.1, + "learning_rate": 4.395328102710414e-05, + "loss": 0.0035, + "step": 67850 + }, + { + "epoch": 12.1, + "learning_rate": 4.3952389443651923e-05, + "loss": 0.0028, + "step": 67860 + }, + { + "epoch": 12.1, + "learning_rate": 4.3951497860199715e-05, + "loss": 0.0039, + "step": 67870 + }, + { + "epoch": 12.1, + "learning_rate": 4.3950606276747506e-05, + "loss": 0.0025, + "step": 67880 + }, + { + "epoch": 12.11, + "learning_rate": 4.39497146932953e-05, + "loss": 0.0054, + "step": 67890 + }, + { + "epoch": 12.11, + "learning_rate": 4.394882310984308e-05, + "loss": 0.0039, + "step": 67900 + }, + { + "epoch": 12.11, + "learning_rate": 4.394793152639087e-05, + "loss": 0.0046, + "step": 67910 + }, + { + "epoch": 12.11, + "learning_rate": 4.3947039942938664e-05, + "loss": 0.0026, + "step": 67920 + }, + { + "epoch": 12.11, + "learning_rate": 4.394614835948645e-05, + "loss": 0.0043, + "step": 67930 + }, + { + "epoch": 12.11, + "learning_rate": 4.394525677603424e-05, + "loss": 0.0071, + "step": 67940 + }, + { + "epoch": 12.12, + "learning_rate": 4.3944365192582024e-05, + "loss": 0.0022, + "step": 67950 + }, + { + "epoch": 12.12, + "learning_rate": 4.3943473609129815e-05, + "loss": 0.0021, + "step": 67960 + }, + { + "epoch": 12.12, + "learning_rate": 4.3942582025677606e-05, + "loss": 0.0019, + "step": 67970 + }, + { + "epoch": 12.12, + "learning_rate": 4.394169044222539e-05, + "loss": 0.0053, + "step": 67980 + }, + { + "epoch": 12.12, + "learning_rate": 4.394079885877319e-05, + "loss": 0.0031, + "step": 67990 + }, + { + "epoch": 12.13, + "learning_rate": 4.393990727532097e-05, + "loss": 0.0027, + "step": 68000 + }, + { + "epoch": 12.13, + "learning_rate": 4.3939015691868764e-05, + "loss": 0.0035, + "step": 68010 + }, + { + "epoch": 12.13, + "learning_rate": 4.393812410841655e-05, + "loss": 0.0048, + "step": 68020 + }, + { + "epoch": 12.13, + "learning_rate": 4.393723252496434e-05, + "loss": 0.0015, + "step": 68030 + }, + { + "epoch": 12.13, + "learning_rate": 4.3936340941512125e-05, + "loss": 0.0016, + "step": 68040 + }, + { + "epoch": 12.13, + "learning_rate": 4.3935449358059916e-05, + "loss": 0.0027, + "step": 68050 + }, + { + "epoch": 12.14, + "learning_rate": 4.393455777460771e-05, + "loss": 0.0015, + "step": 68060 + }, + { + "epoch": 12.14, + "learning_rate": 4.393366619115549e-05, + "loss": 0.0024, + "step": 68070 + }, + { + "epoch": 12.14, + "learning_rate": 4.393277460770328e-05, + "loss": 0.0037, + "step": 68080 + }, + { + "epoch": 12.14, + "learning_rate": 4.393188302425107e-05, + "loss": 0.0036, + "step": 68090 + }, + { + "epoch": 12.14, + "learning_rate": 4.3930991440798865e-05, + "loss": 0.0029, + "step": 68100 + }, + { + "epoch": 12.15, + "learning_rate": 4.393009985734665e-05, + "loss": 0.0028, + "step": 68110 + }, + { + "epoch": 12.15, + "learning_rate": 4.392920827389444e-05, + "loss": 0.0072, + "step": 68120 + }, + { + "epoch": 12.15, + "learning_rate": 4.3928316690442225e-05, + "loss": 0.008, + "step": 68130 + }, + { + "epoch": 12.15, + "learning_rate": 4.3927425106990016e-05, + "loss": 0.0037, + "step": 68140 + }, + { + "epoch": 12.15, + "learning_rate": 4.392653352353781e-05, + "loss": 0.0054, + "step": 68150 + }, + { + "epoch": 12.15, + "learning_rate": 4.392564194008559e-05, + "loss": 0.0031, + "step": 68160 + }, + { + "epoch": 12.16, + "learning_rate": 4.392475035663338e-05, + "loss": 0.0033, + "step": 68170 + }, + { + "epoch": 12.16, + "learning_rate": 4.392385877318117e-05, + "loss": 0.0026, + "step": 68180 + }, + { + "epoch": 12.16, + "learning_rate": 4.392296718972896e-05, + "loss": 0.0027, + "step": 68190 + }, + { + "epoch": 12.16, + "learning_rate": 4.392207560627675e-05, + "loss": 0.0025, + "step": 68200 + }, + { + "epoch": 12.16, + "learning_rate": 4.392118402282454e-05, + "loss": 0.0042, + "step": 68210 + }, + { + "epoch": 12.16, + "learning_rate": 4.392029243937233e-05, + "loss": 0.0057, + "step": 68220 + }, + { + "epoch": 12.17, + "learning_rate": 4.391940085592012e-05, + "loss": 0.0024, + "step": 68230 + }, + { + "epoch": 12.17, + "learning_rate": 4.391850927246791e-05, + "loss": 0.0036, + "step": 68240 + }, + { + "epoch": 12.17, + "learning_rate": 4.391761768901569e-05, + "loss": 0.0021, + "step": 68250 + }, + { + "epoch": 12.17, + "learning_rate": 4.3916726105563484e-05, + "loss": 0.0059, + "step": 68260 + }, + { + "epoch": 12.17, + "learning_rate": 4.391583452211127e-05, + "loss": 0.0061, + "step": 68270 + }, + { + "epoch": 12.18, + "learning_rate": 4.391494293865906e-05, + "loss": 0.0032, + "step": 68280 + }, + { + "epoch": 12.18, + "learning_rate": 4.391405135520685e-05, + "loss": 0.0027, + "step": 68290 + }, + { + "epoch": 12.18, + "learning_rate": 4.3913159771754635e-05, + "loss": 0.0046, + "step": 68300 + }, + { + "epoch": 12.18, + "learning_rate": 4.3912268188302426e-05, + "loss": 0.0026, + "step": 68310 + }, + { + "epoch": 12.18, + "learning_rate": 4.391137660485022e-05, + "loss": 0.0022, + "step": 68320 + }, + { + "epoch": 12.18, + "learning_rate": 4.391048502139801e-05, + "loss": 0.0047, + "step": 68330 + }, + { + "epoch": 12.19, + "learning_rate": 4.390959343794579e-05, + "loss": 0.0039, + "step": 68340 + }, + { + "epoch": 12.19, + "learning_rate": 4.3908701854493584e-05, + "loss": 0.0017, + "step": 68350 + }, + { + "epoch": 12.19, + "learning_rate": 4.390781027104137e-05, + "loss": 0.0027, + "step": 68360 + }, + { + "epoch": 12.19, + "learning_rate": 4.390691868758916e-05, + "loss": 0.0033, + "step": 68370 + }, + { + "epoch": 12.19, + "learning_rate": 4.390602710413695e-05, + "loss": 0.0041, + "step": 68380 + }, + { + "epoch": 12.2, + "learning_rate": 4.3905135520684736e-05, + "loss": 0.0027, + "step": 68390 + }, + { + "epoch": 12.2, + "learning_rate": 4.390424393723253e-05, + "loss": 0.0027, + "step": 68400 + }, + { + "epoch": 12.2, + "learning_rate": 4.390335235378031e-05, + "loss": 0.0082, + "step": 68410 + }, + { + "epoch": 12.2, + "learning_rate": 4.39024607703281e-05, + "loss": 0.0021, + "step": 68420 + }, + { + "epoch": 12.2, + "learning_rate": 4.3901569186875894e-05, + "loss": 0.0021, + "step": 68430 + }, + { + "epoch": 12.2, + "learning_rate": 4.3900677603423685e-05, + "loss": 0.0022, + "step": 68440 + }, + { + "epoch": 12.21, + "learning_rate": 4.3899786019971476e-05, + "loss": 0.0034, + "step": 68450 + }, + { + "epoch": 12.21, + "learning_rate": 4.389889443651926e-05, + "loss": 0.0024, + "step": 68460 + }, + { + "epoch": 12.21, + "learning_rate": 4.389800285306705e-05, + "loss": 0.0029, + "step": 68470 + }, + { + "epoch": 12.21, + "learning_rate": 4.3897111269614836e-05, + "loss": 0.0011, + "step": 68480 + }, + { + "epoch": 12.21, + "learning_rate": 4.389621968616263e-05, + "loss": 0.0042, + "step": 68490 + }, + { + "epoch": 12.21, + "learning_rate": 4.389532810271041e-05, + "loss": 0.0039, + "step": 68500 + }, + { + "epoch": 12.22, + "learning_rate": 4.38944365192582e-05, + "loss": 0.0029, + "step": 68510 + }, + { + "epoch": 12.22, + "learning_rate": 4.3893544935805994e-05, + "loss": 0.0064, + "step": 68520 + }, + { + "epoch": 12.22, + "learning_rate": 4.389265335235378e-05, + "loss": 0.0042, + "step": 68530 + }, + { + "epoch": 12.22, + "learning_rate": 4.389176176890158e-05, + "loss": 0.0043, + "step": 68540 + }, + { + "epoch": 12.22, + "learning_rate": 4.389087018544936e-05, + "loss": 0.0041, + "step": 68550 + }, + { + "epoch": 12.23, + "learning_rate": 4.388997860199715e-05, + "loss": 0.0045, + "step": 68560 + }, + { + "epoch": 12.23, + "learning_rate": 4.388908701854494e-05, + "loss": 0.004, + "step": 68570 + }, + { + "epoch": 12.23, + "learning_rate": 4.388819543509273e-05, + "loss": 0.0032, + "step": 68580 + }, + { + "epoch": 12.23, + "learning_rate": 4.388730385164051e-05, + "loss": 0.0066, + "step": 68590 + }, + { + "epoch": 12.23, + "learning_rate": 4.3886412268188304e-05, + "loss": 0.0079, + "step": 68600 + }, + { + "epoch": 12.23, + "learning_rate": 4.3885520684736095e-05, + "loss": 0.0039, + "step": 68610 + }, + { + "epoch": 12.24, + "learning_rate": 4.388462910128388e-05, + "loss": 0.0031, + "step": 68620 + }, + { + "epoch": 12.24, + "learning_rate": 4.388373751783167e-05, + "loss": 0.0044, + "step": 68630 + }, + { + "epoch": 12.24, + "learning_rate": 4.3882845934379455e-05, + "loss": 0.0047, + "step": 68640 + }, + { + "epoch": 12.24, + "learning_rate": 4.388195435092725e-05, + "loss": 0.0019, + "step": 68650 + }, + { + "epoch": 12.24, + "learning_rate": 4.388106276747504e-05, + "loss": 0.0037, + "step": 68660 + }, + { + "epoch": 12.25, + "learning_rate": 4.388017118402283e-05, + "loss": 0.0033, + "step": 68670 + }, + { + "epoch": 12.25, + "learning_rate": 4.387927960057062e-05, + "loss": 0.0038, + "step": 68680 + }, + { + "epoch": 12.25, + "learning_rate": 4.3878388017118404e-05, + "loss": 0.0028, + "step": 68690 + }, + { + "epoch": 12.25, + "learning_rate": 4.3877496433666195e-05, + "loss": 0.0048, + "step": 68700 + }, + { + "epoch": 12.25, + "learning_rate": 4.387660485021398e-05, + "loss": 0.0033, + "step": 68710 + }, + { + "epoch": 12.25, + "learning_rate": 4.387571326676177e-05, + "loss": 0.0025, + "step": 68720 + }, + { + "epoch": 12.26, + "learning_rate": 4.3874821683309555e-05, + "loss": 0.0045, + "step": 68730 + }, + { + "epoch": 12.26, + "learning_rate": 4.387393009985735e-05, + "loss": 0.0018, + "step": 68740 + }, + { + "epoch": 12.26, + "learning_rate": 4.387303851640514e-05, + "loss": 0.0024, + "step": 68750 + }, + { + "epoch": 12.26, + "learning_rate": 4.387214693295293e-05, + "loss": 0.0018, + "step": 68760 + }, + { + "epoch": 12.26, + "learning_rate": 4.387125534950072e-05, + "loss": 0.0038, + "step": 68770 + }, + { + "epoch": 12.26, + "learning_rate": 4.3870363766048505e-05, + "loss": 0.0026, + "step": 68780 + }, + { + "epoch": 12.27, + "learning_rate": 4.3869472182596296e-05, + "loss": 0.0022, + "step": 68790 + }, + { + "epoch": 12.27, + "learning_rate": 4.386858059914408e-05, + "loss": 0.0017, + "step": 68800 + }, + { + "epoch": 12.27, + "learning_rate": 4.386768901569187e-05, + "loss": 0.0046, + "step": 68810 + }, + { + "epoch": 12.27, + "learning_rate": 4.3866797432239656e-05, + "loss": 0.0035, + "step": 68820 + }, + { + "epoch": 12.27, + "learning_rate": 4.386590584878745e-05, + "loss": 0.0032, + "step": 68830 + }, + { + "epoch": 12.28, + "learning_rate": 4.386501426533524e-05, + "loss": 0.0038, + "step": 68840 + }, + { + "epoch": 12.28, + "learning_rate": 4.386412268188302e-05, + "loss": 0.0033, + "step": 68850 + }, + { + "epoch": 12.28, + "learning_rate": 4.3863231098430814e-05, + "loss": 0.0038, + "step": 68860 + }, + { + "epoch": 12.28, + "learning_rate": 4.3862339514978605e-05, + "loss": 0.0043, + "step": 68870 + }, + { + "epoch": 12.28, + "learning_rate": 4.3861447931526396e-05, + "loss": 0.0019, + "step": 68880 + }, + { + "epoch": 12.28, + "learning_rate": 4.386055634807418e-05, + "loss": 0.0021, + "step": 68890 + }, + { + "epoch": 12.29, + "learning_rate": 4.385966476462197e-05, + "loss": 0.0039, + "step": 68900 + }, + { + "epoch": 12.29, + "learning_rate": 4.385877318116976e-05, + "loss": 0.0027, + "step": 68910 + }, + { + "epoch": 12.29, + "learning_rate": 4.385788159771755e-05, + "loss": 0.0029, + "step": 68920 + }, + { + "epoch": 12.29, + "learning_rate": 4.385699001426534e-05, + "loss": 0.0031, + "step": 68930 + }, + { + "epoch": 12.29, + "learning_rate": 4.3856098430813123e-05, + "loss": 0.0059, + "step": 68940 + }, + { + "epoch": 12.29, + "learning_rate": 4.3855206847360915e-05, + "loss": 0.0037, + "step": 68950 + }, + { + "epoch": 12.3, + "learning_rate": 4.38543152639087e-05, + "loss": 0.0044, + "step": 68960 + }, + { + "epoch": 12.3, + "learning_rate": 4.385342368045649e-05, + "loss": 0.0055, + "step": 68970 + }, + { + "epoch": 12.3, + "learning_rate": 4.385253209700428e-05, + "loss": 0.0037, + "step": 68980 + }, + { + "epoch": 12.3, + "learning_rate": 4.385164051355207e-05, + "loss": 0.0038, + "step": 68990 + }, + { + "epoch": 12.3, + "learning_rate": 4.3850748930099864e-05, + "loss": 0.0041, + "step": 69000 + }, + { + "epoch": 12.31, + "learning_rate": 4.384985734664765e-05, + "loss": 0.0025, + "step": 69010 + }, + { + "epoch": 12.31, + "learning_rate": 4.384896576319544e-05, + "loss": 0.0063, + "step": 69020 + }, + { + "epoch": 12.31, + "learning_rate": 4.3848074179743224e-05, + "loss": 0.0036, + "step": 69030 + }, + { + "epoch": 12.31, + "learning_rate": 4.3847182596291015e-05, + "loss": 0.0066, + "step": 69040 + }, + { + "epoch": 12.31, + "learning_rate": 4.38462910128388e-05, + "loss": 0.0036, + "step": 69050 + }, + { + "epoch": 12.31, + "learning_rate": 4.384539942938659e-05, + "loss": 0.0054, + "step": 69060 + }, + { + "epoch": 12.32, + "learning_rate": 4.384450784593438e-05, + "loss": 0.0061, + "step": 69070 + }, + { + "epoch": 12.32, + "learning_rate": 4.3843616262482166e-05, + "loss": 0.0038, + "step": 69080 + }, + { + "epoch": 12.32, + "learning_rate": 4.3842724679029964e-05, + "loss": 0.0046, + "step": 69090 + }, + { + "epoch": 12.32, + "learning_rate": 4.384183309557775e-05, + "loss": 0.0028, + "step": 69100 + }, + { + "epoch": 12.32, + "learning_rate": 4.384094151212554e-05, + "loss": 0.0028, + "step": 69110 + }, + { + "epoch": 12.33, + "learning_rate": 4.3840049928673325e-05, + "loss": 0.0037, + "step": 69120 + }, + { + "epoch": 12.33, + "learning_rate": 4.3839158345221116e-05, + "loss": 0.0043, + "step": 69130 + }, + { + "epoch": 12.33, + "learning_rate": 4.383826676176891e-05, + "loss": 0.003, + "step": 69140 + }, + { + "epoch": 12.33, + "learning_rate": 4.383737517831669e-05, + "loss": 0.0038, + "step": 69150 + }, + { + "epoch": 12.33, + "learning_rate": 4.383648359486448e-05, + "loss": 0.0063, + "step": 69160 + }, + { + "epoch": 12.33, + "learning_rate": 4.383559201141227e-05, + "loss": 0.0041, + "step": 69170 + }, + { + "epoch": 12.34, + "learning_rate": 4.383470042796006e-05, + "loss": 0.0022, + "step": 69180 + }, + { + "epoch": 12.34, + "learning_rate": 4.383380884450784e-05, + "loss": 0.0027, + "step": 69190 + }, + { + "epoch": 12.34, + "learning_rate": 4.383291726105564e-05, + "loss": 0.0034, + "step": 69200 + }, + { + "epoch": 12.34, + "learning_rate": 4.3832025677603425e-05, + "loss": 0.0029, + "step": 69210 + }, + { + "epoch": 12.34, + "learning_rate": 4.3831134094151216e-05, + "loss": 0.0028, + "step": 69220 + }, + { + "epoch": 12.34, + "learning_rate": 4.383024251069901e-05, + "loss": 0.0048, + "step": 69230 + }, + { + "epoch": 12.35, + "learning_rate": 4.382935092724679e-05, + "loss": 0.0031, + "step": 69240 + }, + { + "epoch": 12.35, + "learning_rate": 4.382845934379458e-05, + "loss": 0.0069, + "step": 69250 + }, + { + "epoch": 12.35, + "learning_rate": 4.382756776034237e-05, + "loss": 0.0062, + "step": 69260 + }, + { + "epoch": 12.35, + "learning_rate": 4.382667617689016e-05, + "loss": 0.0025, + "step": 69270 + }, + { + "epoch": 12.35, + "learning_rate": 4.382578459343794e-05, + "loss": 0.0042, + "step": 69280 + }, + { + "epoch": 12.36, + "learning_rate": 4.3824893009985734e-05, + "loss": 0.0024, + "step": 69290 + }, + { + "epoch": 12.36, + "learning_rate": 4.3824001426533526e-05, + "loss": 0.0028, + "step": 69300 + }, + { + "epoch": 12.36, + "learning_rate": 4.382310984308132e-05, + "loss": 0.0027, + "step": 69310 + }, + { + "epoch": 12.36, + "learning_rate": 4.382221825962911e-05, + "loss": 0.0036, + "step": 69320 + }, + { + "epoch": 12.36, + "learning_rate": 4.382132667617689e-05, + "loss": 0.0017, + "step": 69330 + }, + { + "epoch": 12.36, + "learning_rate": 4.3820435092724684e-05, + "loss": 0.0033, + "step": 69340 + }, + { + "epoch": 12.37, + "learning_rate": 4.381954350927247e-05, + "loss": 0.0037, + "step": 69350 + }, + { + "epoch": 12.37, + "learning_rate": 4.381865192582026e-05, + "loss": 0.0017, + "step": 69360 + }, + { + "epoch": 12.37, + "learning_rate": 4.381776034236805e-05, + "loss": 0.003, + "step": 69370 + }, + { + "epoch": 12.37, + "learning_rate": 4.3816868758915835e-05, + "loss": 0.0053, + "step": 69380 + }, + { + "epoch": 12.37, + "learning_rate": 4.3815977175463626e-05, + "loss": 0.0055, + "step": 69390 + }, + { + "epoch": 12.38, + "learning_rate": 4.381508559201141e-05, + "loss": 0.0031, + "step": 69400 + }, + { + "epoch": 12.38, + "learning_rate": 4.38141940085592e-05, + "loss": 0.005, + "step": 69410 + }, + { + "epoch": 12.38, + "learning_rate": 4.381330242510699e-05, + "loss": 0.0041, + "step": 69420 + }, + { + "epoch": 12.38, + "learning_rate": 4.3812410841654784e-05, + "loss": 0.0062, + "step": 69430 + }, + { + "epoch": 12.38, + "learning_rate": 4.381151925820257e-05, + "loss": 0.005, + "step": 69440 + }, + { + "epoch": 12.38, + "learning_rate": 4.381062767475036e-05, + "loss": 0.0054, + "step": 69450 + }, + { + "epoch": 12.39, + "learning_rate": 4.380973609129815e-05, + "loss": 0.0015, + "step": 69460 + }, + { + "epoch": 12.39, + "learning_rate": 4.3808844507845936e-05, + "loss": 0.0079, + "step": 69470 + }, + { + "epoch": 12.39, + "learning_rate": 4.380795292439373e-05, + "loss": 0.005, + "step": 69480 + }, + { + "epoch": 12.39, + "learning_rate": 4.380706134094151e-05, + "loss": 0.0018, + "step": 69490 + }, + { + "epoch": 12.39, + "learning_rate": 4.38061697574893e-05, + "loss": 0.0035, + "step": 69500 + }, + { + "epoch": 12.39, + "learning_rate": 4.380527817403709e-05, + "loss": 0.0024, + "step": 69510 + }, + { + "epoch": 12.4, + "learning_rate": 4.380438659058488e-05, + "loss": 0.0023, + "step": 69520 + }, + { + "epoch": 12.4, + "learning_rate": 4.380349500713267e-05, + "loss": 0.0042, + "step": 69530 + }, + { + "epoch": 12.4, + "learning_rate": 4.380260342368046e-05, + "loss": 0.0026, + "step": 69540 + }, + { + "epoch": 12.4, + "learning_rate": 4.380171184022825e-05, + "loss": 0.0034, + "step": 69550 + }, + { + "epoch": 12.4, + "learning_rate": 4.3800820256776036e-05, + "loss": 0.0048, + "step": 69560 + }, + { + "epoch": 12.41, + "learning_rate": 4.379992867332383e-05, + "loss": 0.0028, + "step": 69570 + }, + { + "epoch": 12.41, + "learning_rate": 4.379903708987161e-05, + "loss": 0.0042, + "step": 69580 + }, + { + "epoch": 12.41, + "learning_rate": 4.37981455064194e-05, + "loss": 0.0039, + "step": 69590 + }, + { + "epoch": 12.41, + "learning_rate": 4.379725392296719e-05, + "loss": 0.0045, + "step": 69600 + }, + { + "epoch": 12.41, + "learning_rate": 4.379636233951498e-05, + "loss": 0.004, + "step": 69610 + }, + { + "epoch": 12.41, + "learning_rate": 4.379547075606277e-05, + "loss": 0.0028, + "step": 69620 + }, + { + "epoch": 12.42, + "learning_rate": 4.3794579172610554e-05, + "loss": 0.0043, + "step": 69630 + }, + { + "epoch": 12.42, + "learning_rate": 4.379368758915835e-05, + "loss": 0.0023, + "step": 69640 + }, + { + "epoch": 12.42, + "learning_rate": 4.379279600570614e-05, + "loss": 0.0032, + "step": 69650 + }, + { + "epoch": 12.42, + "learning_rate": 4.379190442225393e-05, + "loss": 0.0017, + "step": 69660 + }, + { + "epoch": 12.42, + "learning_rate": 4.379101283880171e-05, + "loss": 0.0029, + "step": 69670 + }, + { + "epoch": 12.43, + "learning_rate": 4.3790121255349504e-05, + "loss": 0.0066, + "step": 69680 + }, + { + "epoch": 12.43, + "learning_rate": 4.3789229671897295e-05, + "loss": 0.0034, + "step": 69690 + }, + { + "epoch": 12.43, + "learning_rate": 4.378833808844508e-05, + "loss": 0.0037, + "step": 69700 + }, + { + "epoch": 12.43, + "learning_rate": 4.378744650499287e-05, + "loss": 0.0032, + "step": 69710 + }, + { + "epoch": 12.43, + "learning_rate": 4.3786554921540655e-05, + "loss": 0.0043, + "step": 69720 + }, + { + "epoch": 12.43, + "learning_rate": 4.3785663338088446e-05, + "loss": 0.0036, + "step": 69730 + }, + { + "epoch": 12.44, + "learning_rate": 4.378477175463623e-05, + "loss": 0.0048, + "step": 69740 + }, + { + "epoch": 12.44, + "learning_rate": 4.378388017118403e-05, + "loss": 0.0033, + "step": 69750 + }, + { + "epoch": 12.44, + "learning_rate": 4.378298858773181e-05, + "loss": 0.0031, + "step": 69760 + }, + { + "epoch": 12.44, + "learning_rate": 4.3782097004279604e-05, + "loss": 0.004, + "step": 69770 + }, + { + "epoch": 12.44, + "learning_rate": 4.3781205420827395e-05, + "loss": 0.0046, + "step": 69780 + }, + { + "epoch": 12.44, + "learning_rate": 4.378031383737518e-05, + "loss": 0.0035, + "step": 69790 + }, + { + "epoch": 12.45, + "learning_rate": 4.377942225392297e-05, + "loss": 0.0023, + "step": 69800 + }, + { + "epoch": 12.45, + "learning_rate": 4.3778530670470755e-05, + "loss": 0.0044, + "step": 69810 + }, + { + "epoch": 12.45, + "learning_rate": 4.3777639087018547e-05, + "loss": 0.0026, + "step": 69820 + }, + { + "epoch": 12.45, + "learning_rate": 4.377674750356633e-05, + "loss": 0.0031, + "step": 69830 + }, + { + "epoch": 12.45, + "learning_rate": 4.377585592011412e-05, + "loss": 0.0019, + "step": 69840 + }, + { + "epoch": 12.46, + "learning_rate": 4.3774964336661913e-05, + "loss": 0.0031, + "step": 69850 + }, + { + "epoch": 12.46, + "learning_rate": 4.3774072753209705e-05, + "loss": 0.0035, + "step": 69860 + }, + { + "epoch": 12.46, + "learning_rate": 4.3773181169757496e-05, + "loss": 0.0047, + "step": 69870 + }, + { + "epoch": 12.46, + "learning_rate": 4.377228958630528e-05, + "loss": 0.002, + "step": 69880 + }, + { + "epoch": 12.46, + "learning_rate": 4.377139800285307e-05, + "loss": 0.0019, + "step": 69890 + }, + { + "epoch": 12.46, + "learning_rate": 4.3770506419400856e-05, + "loss": 0.005, + "step": 69900 + }, + { + "epoch": 12.47, + "learning_rate": 4.376961483594865e-05, + "loss": 0.0027, + "step": 69910 + }, + { + "epoch": 12.47, + "learning_rate": 4.376872325249644e-05, + "loss": 0.0028, + "step": 69920 + }, + { + "epoch": 12.47, + "learning_rate": 4.376783166904422e-05, + "loss": 0.0037, + "step": 69930 + }, + { + "epoch": 12.47, + "learning_rate": 4.3766940085592014e-05, + "loss": 0.0019, + "step": 69940 + }, + { + "epoch": 12.47, + "learning_rate": 4.37660485021398e-05, + "loss": 0.0061, + "step": 69950 + }, + { + "epoch": 12.48, + "learning_rate": 4.376515691868759e-05, + "loss": 0.0028, + "step": 69960 + }, + { + "epoch": 12.48, + "learning_rate": 4.376426533523538e-05, + "loss": 0.0028, + "step": 69970 + }, + { + "epoch": 12.48, + "learning_rate": 4.376337375178317e-05, + "loss": 0.0014, + "step": 69980 + }, + { + "epoch": 12.48, + "learning_rate": 4.3762482168330956e-05, + "loss": 0.004, + "step": 69990 + }, + { + "epoch": 12.48, + "learning_rate": 4.376159058487875e-05, + "loss": 0.0022, + "step": 70000 + }, + { + "epoch": 12.48, + "learning_rate": 4.376069900142654e-05, + "loss": 0.0053, + "step": 70010 + }, + { + "epoch": 12.49, + "learning_rate": 4.375980741797432e-05, + "loss": 0.0047, + "step": 70020 + }, + { + "epoch": 12.49, + "learning_rate": 4.3758915834522115e-05, + "loss": 0.0021, + "step": 70030 + }, + { + "epoch": 12.49, + "learning_rate": 4.37580242510699e-05, + "loss": 0.0035, + "step": 70040 + }, + { + "epoch": 12.49, + "learning_rate": 4.375713266761769e-05, + "loss": 0.0041, + "step": 70050 + }, + { + "epoch": 12.49, + "learning_rate": 4.3756241084165475e-05, + "loss": 0.0032, + "step": 70060 + }, + { + "epoch": 12.49, + "learning_rate": 4.3755349500713266e-05, + "loss": 0.0036, + "step": 70070 + }, + { + "epoch": 12.5, + "learning_rate": 4.375445791726106e-05, + "loss": 0.0031, + "step": 70080 + }, + { + "epoch": 12.5, + "learning_rate": 4.375356633380885e-05, + "loss": 0.0034, + "step": 70090 + }, + { + "epoch": 12.5, + "learning_rate": 4.375267475035664e-05, + "loss": 0.0031, + "step": 70100 + }, + { + "epoch": 12.5, + "learning_rate": 4.3751783166904424e-05, + "loss": 0.0065, + "step": 70110 + }, + { + "epoch": 12.5, + "learning_rate": 4.3750891583452215e-05, + "loss": 0.007, + "step": 70120 + }, + { + "epoch": 12.51, + "learning_rate": 4.375e-05, + "loss": 0.0029, + "step": 70130 + }, + { + "epoch": 12.51, + "learning_rate": 4.374910841654779e-05, + "loss": 0.0041, + "step": 70140 + }, + { + "epoch": 12.51, + "learning_rate": 4.374821683309558e-05, + "loss": 0.0047, + "step": 70150 + }, + { + "epoch": 12.51, + "learning_rate": 4.3747325249643366e-05, + "loss": 0.0034, + "step": 70160 + }, + { + "epoch": 12.51, + "learning_rate": 4.374643366619116e-05, + "loss": 0.0065, + "step": 70170 + }, + { + "epoch": 12.51, + "learning_rate": 4.374554208273894e-05, + "loss": 0.0038, + "step": 70180 + }, + { + "epoch": 12.52, + "learning_rate": 4.374465049928674e-05, + "loss": 0.0058, + "step": 70190 + }, + { + "epoch": 12.52, + "learning_rate": 4.3743758915834524e-05, + "loss": 0.0027, + "step": 70200 + }, + { + "epoch": 12.52, + "learning_rate": 4.3742867332382316e-05, + "loss": 0.0038, + "step": 70210 + }, + { + "epoch": 12.52, + "learning_rate": 4.37419757489301e-05, + "loss": 0.0036, + "step": 70220 + }, + { + "epoch": 12.52, + "learning_rate": 4.374108416547789e-05, + "loss": 0.0051, + "step": 70230 + }, + { + "epoch": 12.52, + "learning_rate": 4.374019258202568e-05, + "loss": 0.0027, + "step": 70240 + }, + { + "epoch": 12.53, + "learning_rate": 4.373930099857347e-05, + "loss": 0.005, + "step": 70250 + }, + { + "epoch": 12.53, + "learning_rate": 4.373840941512126e-05, + "loss": 0.0031, + "step": 70260 + }, + { + "epoch": 12.53, + "learning_rate": 4.373751783166904e-05, + "loss": 0.003, + "step": 70270 + }, + { + "epoch": 12.53, + "learning_rate": 4.3736626248216834e-05, + "loss": 0.003, + "step": 70280 + }, + { + "epoch": 12.53, + "learning_rate": 4.373573466476462e-05, + "loss": 0.0047, + "step": 70290 + }, + { + "epoch": 12.54, + "learning_rate": 4.3734843081312416e-05, + "loss": 0.0037, + "step": 70300 + }, + { + "epoch": 12.54, + "learning_rate": 4.37339514978602e-05, + "loss": 0.0034, + "step": 70310 + }, + { + "epoch": 12.54, + "learning_rate": 4.373305991440799e-05, + "loss": 0.0043, + "step": 70320 + }, + { + "epoch": 12.54, + "learning_rate": 4.373216833095578e-05, + "loss": 0.0025, + "step": 70330 + }, + { + "epoch": 12.54, + "learning_rate": 4.373127674750357e-05, + "loss": 0.0019, + "step": 70340 + }, + { + "epoch": 12.54, + "learning_rate": 4.373038516405136e-05, + "loss": 0.0037, + "step": 70350 + }, + { + "epoch": 12.55, + "learning_rate": 4.372949358059914e-05, + "loss": 0.0032, + "step": 70360 + }, + { + "epoch": 12.55, + "learning_rate": 4.3728601997146934e-05, + "loss": 0.0019, + "step": 70370 + }, + { + "epoch": 12.55, + "learning_rate": 4.3727710413694726e-05, + "loss": 0.0039, + "step": 70380 + }, + { + "epoch": 12.55, + "learning_rate": 4.372681883024251e-05, + "loss": 0.004, + "step": 70390 + }, + { + "epoch": 12.55, + "learning_rate": 4.37259272467903e-05, + "loss": 0.0041, + "step": 70400 + }, + { + "epoch": 12.56, + "learning_rate": 4.372503566333809e-05, + "loss": 0.0027, + "step": 70410 + }, + { + "epoch": 12.56, + "learning_rate": 4.3724144079885884e-05, + "loss": 0.0059, + "step": 70420 + }, + { + "epoch": 12.56, + "learning_rate": 4.372325249643367e-05, + "loss": 0.0037, + "step": 70430 + }, + { + "epoch": 12.56, + "learning_rate": 4.372236091298146e-05, + "loss": 0.0039, + "step": 70440 + }, + { + "epoch": 12.56, + "learning_rate": 4.3721469329529244e-05, + "loss": 0.0037, + "step": 70450 + }, + { + "epoch": 12.56, + "learning_rate": 4.3720577746077035e-05, + "loss": 0.0017, + "step": 70460 + }, + { + "epoch": 12.57, + "learning_rate": 4.3719686162624826e-05, + "loss": 0.0062, + "step": 70470 + }, + { + "epoch": 12.57, + "learning_rate": 4.371879457917261e-05, + "loss": 0.0042, + "step": 70480 + }, + { + "epoch": 12.57, + "learning_rate": 4.37179029957204e-05, + "loss": 0.003, + "step": 70490 + }, + { + "epoch": 12.57, + "learning_rate": 4.3717011412268186e-05, + "loss": 0.0039, + "step": 70500 + }, + { + "epoch": 12.57, + "learning_rate": 4.371611982881598e-05, + "loss": 0.0029, + "step": 70510 + }, + { + "epoch": 12.57, + "learning_rate": 4.371522824536377e-05, + "loss": 0.0053, + "step": 70520 + }, + { + "epoch": 12.58, + "learning_rate": 4.371433666191156e-05, + "loss": 0.0049, + "step": 70530 + }, + { + "epoch": 12.58, + "learning_rate": 4.3713445078459344e-05, + "loss": 0.0044, + "step": 70540 + }, + { + "epoch": 12.58, + "learning_rate": 4.3712553495007136e-05, + "loss": 0.0041, + "step": 70550 + }, + { + "epoch": 12.58, + "learning_rate": 4.371166191155493e-05, + "loss": 0.0066, + "step": 70560 + }, + { + "epoch": 12.58, + "learning_rate": 4.371077032810271e-05, + "loss": 0.0032, + "step": 70570 + }, + { + "epoch": 12.59, + "learning_rate": 4.37098787446505e-05, + "loss": 0.0051, + "step": 70580 + }, + { + "epoch": 12.59, + "learning_rate": 4.370898716119829e-05, + "loss": 0.0039, + "step": 70590 + }, + { + "epoch": 12.59, + "learning_rate": 4.370809557774608e-05, + "loss": 0.0067, + "step": 70600 + }, + { + "epoch": 12.59, + "learning_rate": 4.370720399429387e-05, + "loss": 0.0024, + "step": 70610 + }, + { + "epoch": 12.59, + "learning_rate": 4.3706312410841654e-05, + "loss": 0.0024, + "step": 70620 + }, + { + "epoch": 12.59, + "learning_rate": 4.370542082738945e-05, + "loss": 0.0038, + "step": 70630 + }, + { + "epoch": 12.6, + "learning_rate": 4.3704529243937236e-05, + "loss": 0.0022, + "step": 70640 + }, + { + "epoch": 12.6, + "learning_rate": 4.370363766048503e-05, + "loss": 0.0028, + "step": 70650 + }, + { + "epoch": 12.6, + "learning_rate": 4.370274607703281e-05, + "loss": 0.0022, + "step": 70660 + }, + { + "epoch": 12.6, + "learning_rate": 4.37018544935806e-05, + "loss": 0.0031, + "step": 70670 + }, + { + "epoch": 12.6, + "learning_rate": 4.370096291012839e-05, + "loss": 0.0025, + "step": 70680 + }, + { + "epoch": 12.61, + "learning_rate": 4.370007132667618e-05, + "loss": 0.0032, + "step": 70690 + }, + { + "epoch": 12.61, + "learning_rate": 4.369917974322397e-05, + "loss": 0.006, + "step": 70700 + }, + { + "epoch": 12.61, + "learning_rate": 4.3698288159771754e-05, + "loss": 0.0046, + "step": 70710 + }, + { + "epoch": 12.61, + "learning_rate": 4.3697396576319545e-05, + "loss": 0.0045, + "step": 70720 + }, + { + "epoch": 12.61, + "learning_rate": 4.369650499286733e-05, + "loss": 0.0049, + "step": 70730 + }, + { + "epoch": 12.61, + "learning_rate": 4.369561340941513e-05, + "loss": 0.0032, + "step": 70740 + }, + { + "epoch": 12.62, + "learning_rate": 4.369472182596291e-05, + "loss": 0.005, + "step": 70750 + }, + { + "epoch": 12.62, + "learning_rate": 4.3693830242510703e-05, + "loss": 0.0029, + "step": 70760 + }, + { + "epoch": 12.62, + "learning_rate": 4.369293865905849e-05, + "loss": 0.0035, + "step": 70770 + }, + { + "epoch": 12.62, + "learning_rate": 4.369204707560628e-05, + "loss": 0.0045, + "step": 70780 + }, + { + "epoch": 12.62, + "learning_rate": 4.369115549215407e-05, + "loss": 0.0053, + "step": 70790 + }, + { + "epoch": 12.62, + "learning_rate": 4.3690263908701855e-05, + "loss": 0.0072, + "step": 70800 + }, + { + "epoch": 12.63, + "learning_rate": 4.3689372325249646e-05, + "loss": 0.0033, + "step": 70810 + }, + { + "epoch": 12.63, + "learning_rate": 4.368848074179743e-05, + "loss": 0.0042, + "step": 70820 + }, + { + "epoch": 12.63, + "learning_rate": 4.368758915834522e-05, + "loss": 0.0049, + "step": 70830 + }, + { + "epoch": 12.63, + "learning_rate": 4.368669757489301e-05, + "loss": 0.0078, + "step": 70840 + }, + { + "epoch": 12.63, + "learning_rate": 4.3685805991440804e-05, + "loss": 0.0033, + "step": 70850 + }, + { + "epoch": 12.64, + "learning_rate": 4.3684914407988595e-05, + "loss": 0.002, + "step": 70860 + }, + { + "epoch": 12.64, + "learning_rate": 4.368402282453638e-05, + "loss": 0.0022, + "step": 70870 + }, + { + "epoch": 12.64, + "learning_rate": 4.368313124108417e-05, + "loss": 0.0038, + "step": 70880 + }, + { + "epoch": 12.64, + "learning_rate": 4.3682239657631955e-05, + "loss": 0.0049, + "step": 70890 + }, + { + "epoch": 12.64, + "learning_rate": 4.3681348074179747e-05, + "loss": 0.0028, + "step": 70900 + }, + { + "epoch": 12.64, + "learning_rate": 4.368045649072753e-05, + "loss": 0.0027, + "step": 70910 + }, + { + "epoch": 12.65, + "learning_rate": 4.367956490727532e-05, + "loss": 0.0024, + "step": 70920 + }, + { + "epoch": 12.65, + "learning_rate": 4.3678673323823113e-05, + "loss": 0.003, + "step": 70930 + }, + { + "epoch": 12.65, + "learning_rate": 4.36777817403709e-05, + "loss": 0.0032, + "step": 70940 + }, + { + "epoch": 12.65, + "learning_rate": 4.367689015691869e-05, + "loss": 0.003, + "step": 70950 + }, + { + "epoch": 12.65, + "learning_rate": 4.367599857346648e-05, + "loss": 0.0043, + "step": 70960 + }, + { + "epoch": 12.66, + "learning_rate": 4.367510699001427e-05, + "loss": 0.0023, + "step": 70970 + }, + { + "epoch": 12.66, + "learning_rate": 4.3674215406562056e-05, + "loss": 0.0067, + "step": 70980 + }, + { + "epoch": 12.66, + "learning_rate": 4.367332382310985e-05, + "loss": 0.003, + "step": 70990 + }, + { + "epoch": 12.66, + "learning_rate": 4.367243223965763e-05, + "loss": 0.0048, + "step": 71000 + }, + { + "epoch": 12.66, + "learning_rate": 4.367154065620542e-05, + "loss": 0.0024, + "step": 71010 + }, + { + "epoch": 12.66, + "learning_rate": 4.3670649072753214e-05, + "loss": 0.0041, + "step": 71020 + }, + { + "epoch": 12.67, + "learning_rate": 4.3669757489301e-05, + "loss": 0.0046, + "step": 71030 + }, + { + "epoch": 12.67, + "learning_rate": 4.366886590584879e-05, + "loss": 0.0037, + "step": 71040 + }, + { + "epoch": 12.67, + "learning_rate": 4.3667974322396574e-05, + "loss": 0.003, + "step": 71050 + }, + { + "epoch": 12.67, + "learning_rate": 4.3667082738944365e-05, + "loss": 0.0032, + "step": 71060 + }, + { + "epoch": 12.67, + "learning_rate": 4.3666191155492156e-05, + "loss": 0.0029, + "step": 71070 + }, + { + "epoch": 12.67, + "learning_rate": 4.366529957203995e-05, + "loss": 0.0039, + "step": 71080 + }, + { + "epoch": 12.68, + "learning_rate": 4.366440798858774e-05, + "loss": 0.0041, + "step": 71090 + }, + { + "epoch": 12.68, + "learning_rate": 4.366351640513552e-05, + "loss": 0.0061, + "step": 71100 + }, + { + "epoch": 12.68, + "learning_rate": 4.3662624821683315e-05, + "loss": 0.0025, + "step": 71110 + }, + { + "epoch": 12.68, + "learning_rate": 4.36617332382311e-05, + "loss": 0.004, + "step": 71120 + }, + { + "epoch": 12.68, + "learning_rate": 4.366084165477889e-05, + "loss": 0.0028, + "step": 71130 + }, + { + "epoch": 12.69, + "learning_rate": 4.3659950071326675e-05, + "loss": 0.0022, + "step": 71140 + }, + { + "epoch": 12.69, + "learning_rate": 4.3659058487874466e-05, + "loss": 0.0019, + "step": 71150 + }, + { + "epoch": 12.69, + "learning_rate": 4.365816690442226e-05, + "loss": 0.0036, + "step": 71160 + }, + { + "epoch": 12.69, + "learning_rate": 4.365727532097004e-05, + "loss": 0.0026, + "step": 71170 + }, + { + "epoch": 12.69, + "learning_rate": 4.365638373751784e-05, + "loss": 0.0022, + "step": 71180 + }, + { + "epoch": 12.69, + "learning_rate": 4.3655492154065624e-05, + "loss": 0.0058, + "step": 71190 + }, + { + "epoch": 12.7, + "learning_rate": 4.3654600570613415e-05, + "loss": 0.0041, + "step": 71200 + }, + { + "epoch": 12.7, + "learning_rate": 4.36537089871612e-05, + "loss": 0.0031, + "step": 71210 + }, + { + "epoch": 12.7, + "learning_rate": 4.365281740370899e-05, + "loss": 0.0025, + "step": 71220 + }, + { + "epoch": 12.7, + "learning_rate": 4.3651925820256775e-05, + "loss": 0.0046, + "step": 71230 + }, + { + "epoch": 12.7, + "learning_rate": 4.3651034236804566e-05, + "loss": 0.0045, + "step": 71240 + }, + { + "epoch": 12.71, + "learning_rate": 4.365014265335236e-05, + "loss": 0.0043, + "step": 71250 + }, + { + "epoch": 12.71, + "learning_rate": 4.364925106990014e-05, + "loss": 0.0058, + "step": 71260 + }, + { + "epoch": 12.71, + "learning_rate": 4.364835948644793e-05, + "loss": 0.0045, + "step": 71270 + }, + { + "epoch": 12.71, + "learning_rate": 4.364746790299572e-05, + "loss": 0.0035, + "step": 71280 + }, + { + "epoch": 12.71, + "learning_rate": 4.3646576319543516e-05, + "loss": 0.0023, + "step": 71290 + }, + { + "epoch": 12.71, + "learning_rate": 4.36456847360913e-05, + "loss": 0.0055, + "step": 71300 + }, + { + "epoch": 12.72, + "learning_rate": 4.364479315263909e-05, + "loss": 0.0031, + "step": 71310 + }, + { + "epoch": 12.72, + "learning_rate": 4.364390156918688e-05, + "loss": 0.0012, + "step": 71320 + }, + { + "epoch": 12.72, + "learning_rate": 4.364300998573467e-05, + "loss": 0.0024, + "step": 71330 + }, + { + "epoch": 12.72, + "learning_rate": 4.364211840228246e-05, + "loss": 0.002, + "step": 71340 + }, + { + "epoch": 12.72, + "learning_rate": 4.364122681883024e-05, + "loss": 0.0028, + "step": 71350 + }, + { + "epoch": 12.72, + "learning_rate": 4.3640335235378034e-05, + "loss": 0.0052, + "step": 71360 + }, + { + "epoch": 12.73, + "learning_rate": 4.363944365192582e-05, + "loss": 0.0048, + "step": 71370 + }, + { + "epoch": 12.73, + "learning_rate": 4.363855206847361e-05, + "loss": 0.0027, + "step": 71380 + }, + { + "epoch": 12.73, + "learning_rate": 4.36376604850214e-05, + "loss": 0.0021, + "step": 71390 + }, + { + "epoch": 12.73, + "learning_rate": 4.363676890156919e-05, + "loss": 0.0044, + "step": 71400 + }, + { + "epoch": 12.73, + "learning_rate": 4.363587731811698e-05, + "loss": 0.0054, + "step": 71410 + }, + { + "epoch": 12.74, + "learning_rate": 4.363498573466477e-05, + "loss": 0.008, + "step": 71420 + }, + { + "epoch": 12.74, + "learning_rate": 4.363409415121256e-05, + "loss": 0.0044, + "step": 71430 + }, + { + "epoch": 12.74, + "learning_rate": 4.363320256776034e-05, + "loss": 0.0065, + "step": 71440 + }, + { + "epoch": 12.74, + "learning_rate": 4.3632310984308134e-05, + "loss": 0.0039, + "step": 71450 + }, + { + "epoch": 12.74, + "learning_rate": 4.363141940085592e-05, + "loss": 0.0028, + "step": 71460 + }, + { + "epoch": 12.74, + "learning_rate": 4.363052781740371e-05, + "loss": 0.0021, + "step": 71470 + }, + { + "epoch": 12.75, + "learning_rate": 4.36296362339515e-05, + "loss": 0.006, + "step": 71480 + }, + { + "epoch": 12.75, + "learning_rate": 4.3628744650499286e-05, + "loss": 0.0021, + "step": 71490 + }, + { + "epoch": 12.75, + "learning_rate": 4.362785306704708e-05, + "loss": 0.0035, + "step": 71500 + }, + { + "epoch": 12.75, + "learning_rate": 4.362696148359487e-05, + "loss": 0.0058, + "step": 71510 + }, + { + "epoch": 12.75, + "learning_rate": 4.362606990014266e-05, + "loss": 0.003, + "step": 71520 + }, + { + "epoch": 12.75, + "learning_rate": 4.3625178316690444e-05, + "loss": 0.0045, + "step": 71530 + }, + { + "epoch": 12.76, + "learning_rate": 4.3624286733238235e-05, + "loss": 0.0038, + "step": 71540 + }, + { + "epoch": 12.76, + "learning_rate": 4.362339514978602e-05, + "loss": 0.0036, + "step": 71550 + }, + { + "epoch": 12.76, + "learning_rate": 4.362250356633381e-05, + "loss": 0.0047, + "step": 71560 + }, + { + "epoch": 12.76, + "learning_rate": 4.36216119828816e-05, + "loss": 0.0052, + "step": 71570 + }, + { + "epoch": 12.76, + "learning_rate": 4.3620720399429386e-05, + "loss": 0.0053, + "step": 71580 + }, + { + "epoch": 12.77, + "learning_rate": 4.361982881597718e-05, + "loss": 0.0049, + "step": 71590 + }, + { + "epoch": 12.77, + "learning_rate": 4.361893723252496e-05, + "loss": 0.0066, + "step": 71600 + }, + { + "epoch": 12.77, + "learning_rate": 4.361804564907275e-05, + "loss": 0.0036, + "step": 71610 + }, + { + "epoch": 12.77, + "learning_rate": 4.3617154065620544e-05, + "loss": 0.0044, + "step": 71620 + }, + { + "epoch": 12.77, + "learning_rate": 4.3616262482168335e-05, + "loss": 0.003, + "step": 71630 + }, + { + "epoch": 12.77, + "learning_rate": 4.361537089871613e-05, + "loss": 0.0029, + "step": 71640 + }, + { + "epoch": 12.78, + "learning_rate": 4.361447931526391e-05, + "loss": 0.003, + "step": 71650 + }, + { + "epoch": 12.78, + "learning_rate": 4.36135877318117e-05, + "loss": 0.0037, + "step": 71660 + }, + { + "epoch": 12.78, + "learning_rate": 4.361269614835949e-05, + "loss": 0.0028, + "step": 71670 + }, + { + "epoch": 12.78, + "learning_rate": 4.361180456490728e-05, + "loss": 0.0049, + "step": 71680 + }, + { + "epoch": 12.78, + "learning_rate": 4.361091298145506e-05, + "loss": 0.0053, + "step": 71690 + }, + { + "epoch": 12.79, + "learning_rate": 4.3610021398002854e-05, + "loss": 0.0029, + "step": 71700 + }, + { + "epoch": 12.79, + "learning_rate": 4.3609129814550645e-05, + "loss": 0.0046, + "step": 71710 + }, + { + "epoch": 12.79, + "learning_rate": 4.360823823109843e-05, + "loss": 0.0015, + "step": 71720 + }, + { + "epoch": 12.79, + "learning_rate": 4.360734664764623e-05, + "loss": 0.0057, + "step": 71730 + }, + { + "epoch": 12.79, + "learning_rate": 4.360645506419401e-05, + "loss": 0.0062, + "step": 71740 + }, + { + "epoch": 12.79, + "learning_rate": 4.36055634807418e-05, + "loss": 0.0025, + "step": 71750 + }, + { + "epoch": 12.8, + "learning_rate": 4.360467189728959e-05, + "loss": 0.0051, + "step": 71760 + }, + { + "epoch": 12.8, + "learning_rate": 4.360378031383738e-05, + "loss": 0.002, + "step": 71770 + }, + { + "epoch": 12.8, + "learning_rate": 4.360288873038516e-05, + "loss": 0.0037, + "step": 71780 + }, + { + "epoch": 12.8, + "learning_rate": 4.3601997146932954e-05, + "loss": 0.0041, + "step": 71790 + }, + { + "epoch": 12.8, + "learning_rate": 4.3601105563480745e-05, + "loss": 0.0054, + "step": 71800 + }, + { + "epoch": 12.8, + "learning_rate": 4.360021398002853e-05, + "loss": 0.0047, + "step": 71810 + }, + { + "epoch": 12.81, + "learning_rate": 4.359932239657632e-05, + "loss": 0.002, + "step": 71820 + }, + { + "epoch": 12.81, + "learning_rate": 4.3598430813124105e-05, + "loss": 0.0044, + "step": 71830 + }, + { + "epoch": 12.81, + "learning_rate": 4.3597539229671903e-05, + "loss": 0.0029, + "step": 71840 + }, + { + "epoch": 12.81, + "learning_rate": 4.359664764621969e-05, + "loss": 0.0044, + "step": 71850 + }, + { + "epoch": 12.81, + "learning_rate": 4.359575606276748e-05, + "loss": 0.0031, + "step": 71860 + }, + { + "epoch": 12.82, + "learning_rate": 4.359486447931527e-05, + "loss": 0.0034, + "step": 71870 + }, + { + "epoch": 12.82, + "learning_rate": 4.3593972895863055e-05, + "loss": 0.0042, + "step": 71880 + }, + { + "epoch": 12.82, + "learning_rate": 4.3593081312410846e-05, + "loss": 0.003, + "step": 71890 + }, + { + "epoch": 12.82, + "learning_rate": 4.359218972895863e-05, + "loss": 0.0031, + "step": 71900 + }, + { + "epoch": 12.82, + "learning_rate": 4.359129814550642e-05, + "loss": 0.0022, + "step": 71910 + }, + { + "epoch": 12.82, + "learning_rate": 4.3590406562054206e-05, + "loss": 0.0018, + "step": 71920 + }, + { + "epoch": 12.83, + "learning_rate": 4.3589514978602e-05, + "loss": 0.0016, + "step": 71930 + }, + { + "epoch": 12.83, + "learning_rate": 4.358862339514979e-05, + "loss": 0.004, + "step": 71940 + }, + { + "epoch": 12.83, + "learning_rate": 4.358773181169758e-05, + "loss": 0.0036, + "step": 71950 + }, + { + "epoch": 12.83, + "learning_rate": 4.358684022824537e-05, + "loss": 0.0049, + "step": 71960 + }, + { + "epoch": 12.83, + "learning_rate": 4.3585948644793155e-05, + "loss": 0.0059, + "step": 71970 + }, + { + "epoch": 12.84, + "learning_rate": 4.3585057061340947e-05, + "loss": 0.0054, + "step": 71980 + }, + { + "epoch": 12.84, + "learning_rate": 4.358416547788873e-05, + "loss": 0.003, + "step": 71990 + }, + { + "epoch": 12.84, + "learning_rate": 4.358327389443652e-05, + "loss": 0.0041, + "step": 72000 + }, + { + "epoch": 12.84, + "learning_rate": 4.3582382310984307e-05, + "loss": 0.0028, + "step": 72010 + }, + { + "epoch": 12.84, + "learning_rate": 4.35814907275321e-05, + "loss": 0.0043, + "step": 72020 + }, + { + "epoch": 12.84, + "learning_rate": 4.358059914407989e-05, + "loss": 0.0018, + "step": 72030 + }, + { + "epoch": 12.85, + "learning_rate": 4.3579707560627673e-05, + "loss": 0.002, + "step": 72040 + }, + { + "epoch": 12.85, + "learning_rate": 4.3578815977175465e-05, + "loss": 0.0028, + "step": 72050 + }, + { + "epoch": 12.85, + "learning_rate": 4.3577924393723256e-05, + "loss": 0.0033, + "step": 72060 + }, + { + "epoch": 12.85, + "learning_rate": 4.357703281027105e-05, + "loss": 0.0032, + "step": 72070 + }, + { + "epoch": 12.85, + "learning_rate": 4.357614122681883e-05, + "loss": 0.0024, + "step": 72080 + }, + { + "epoch": 12.85, + "learning_rate": 4.357524964336662e-05, + "loss": 0.0026, + "step": 72090 + }, + { + "epoch": 12.86, + "learning_rate": 4.3574358059914414e-05, + "loss": 0.004, + "step": 72100 + }, + { + "epoch": 12.86, + "learning_rate": 4.35734664764622e-05, + "loss": 0.0059, + "step": 72110 + }, + { + "epoch": 12.86, + "learning_rate": 4.357257489300999e-05, + "loss": 0.0017, + "step": 72120 + }, + { + "epoch": 12.86, + "learning_rate": 4.3571683309557774e-05, + "loss": 0.003, + "step": 72130 + }, + { + "epoch": 12.86, + "learning_rate": 4.3570791726105565e-05, + "loss": 0.0054, + "step": 72140 + }, + { + "epoch": 12.87, + "learning_rate": 4.356990014265335e-05, + "loss": 0.0031, + "step": 72150 + }, + { + "epoch": 12.87, + "learning_rate": 4.356900855920114e-05, + "loss": 0.0033, + "step": 72160 + }, + { + "epoch": 12.87, + "learning_rate": 4.356811697574893e-05, + "loss": 0.0043, + "step": 72170 + }, + { + "epoch": 12.87, + "learning_rate": 4.356722539229672e-05, + "loss": 0.0029, + "step": 72180 + }, + { + "epoch": 12.87, + "learning_rate": 4.3566333808844514e-05, + "loss": 0.0031, + "step": 72190 + }, + { + "epoch": 12.87, + "learning_rate": 4.35654422253923e-05, + "loss": 0.0028, + "step": 72200 + }, + { + "epoch": 12.88, + "learning_rate": 4.356455064194009e-05, + "loss": 0.0048, + "step": 72210 + }, + { + "epoch": 12.88, + "learning_rate": 4.3563659058487875e-05, + "loss": 0.0031, + "step": 72220 + }, + { + "epoch": 12.88, + "learning_rate": 4.3562767475035666e-05, + "loss": 0.0047, + "step": 72230 + }, + { + "epoch": 12.88, + "learning_rate": 4.356187589158345e-05, + "loss": 0.0032, + "step": 72240 + }, + { + "epoch": 12.88, + "learning_rate": 4.356098430813124e-05, + "loss": 0.0035, + "step": 72250 + }, + { + "epoch": 12.89, + "learning_rate": 4.356009272467903e-05, + "loss": 0.0028, + "step": 72260 + }, + { + "epoch": 12.89, + "learning_rate": 4.355920114122682e-05, + "loss": 0.0037, + "step": 72270 + }, + { + "epoch": 12.89, + "learning_rate": 4.3558309557774615e-05, + "loss": 0.002, + "step": 72280 + }, + { + "epoch": 12.89, + "learning_rate": 4.35574179743224e-05, + "loss": 0.0018, + "step": 72290 + }, + { + "epoch": 12.89, + "learning_rate": 4.355652639087019e-05, + "loss": 0.0056, + "step": 72300 + }, + { + "epoch": 12.89, + "learning_rate": 4.3555634807417975e-05, + "loss": 0.0025, + "step": 72310 + }, + { + "epoch": 12.9, + "learning_rate": 4.3554743223965766e-05, + "loss": 0.0024, + "step": 72320 + }, + { + "epoch": 12.9, + "learning_rate": 4.355385164051356e-05, + "loss": 0.0055, + "step": 72330 + }, + { + "epoch": 12.9, + "learning_rate": 4.355296005706134e-05, + "loss": 0.0038, + "step": 72340 + }, + { + "epoch": 12.9, + "learning_rate": 4.355206847360913e-05, + "loss": 0.0049, + "step": 72350 + }, + { + "epoch": 12.9, + "learning_rate": 4.355117689015692e-05, + "loss": 0.0037, + "step": 72360 + }, + { + "epoch": 12.9, + "learning_rate": 4.355028530670471e-05, + "loss": 0.0033, + "step": 72370 + }, + { + "epoch": 12.91, + "learning_rate": 4.354939372325249e-05, + "loss": 0.0024, + "step": 72380 + }, + { + "epoch": 12.91, + "learning_rate": 4.354850213980029e-05, + "loss": 0.0018, + "step": 72390 + }, + { + "epoch": 12.91, + "learning_rate": 4.3547610556348076e-05, + "loss": 0.0032, + "step": 72400 + }, + { + "epoch": 12.91, + "learning_rate": 4.354671897289587e-05, + "loss": 0.0031, + "step": 72410 + }, + { + "epoch": 12.91, + "learning_rate": 4.354582738944366e-05, + "loss": 0.0021, + "step": 72420 + }, + { + "epoch": 12.92, + "learning_rate": 4.354493580599144e-05, + "loss": 0.006, + "step": 72430 + }, + { + "epoch": 12.92, + "learning_rate": 4.3544044222539234e-05, + "loss": 0.0076, + "step": 72440 + }, + { + "epoch": 12.92, + "learning_rate": 4.354315263908702e-05, + "loss": 0.0054, + "step": 72450 + }, + { + "epoch": 12.92, + "learning_rate": 4.354226105563481e-05, + "loss": 0.0026, + "step": 72460 + }, + { + "epoch": 12.92, + "learning_rate": 4.3541369472182594e-05, + "loss": 0.0036, + "step": 72470 + }, + { + "epoch": 12.92, + "learning_rate": 4.3540477888730385e-05, + "loss": 0.0031, + "step": 72480 + }, + { + "epoch": 12.93, + "learning_rate": 4.3539586305278176e-05, + "loss": 0.0038, + "step": 72490 + }, + { + "epoch": 12.93, + "learning_rate": 4.353869472182597e-05, + "loss": 0.0019, + "step": 72500 + }, + { + "epoch": 12.93, + "learning_rate": 4.353780313837376e-05, + "loss": 0.0045, + "step": 72510 + }, + { + "epoch": 12.93, + "learning_rate": 4.353691155492154e-05, + "loss": 0.0028, + "step": 72520 + }, + { + "epoch": 12.93, + "learning_rate": 4.3536019971469334e-05, + "loss": 0.0046, + "step": 72530 + }, + { + "epoch": 12.94, + "learning_rate": 4.353512838801712e-05, + "loss": 0.0044, + "step": 72540 + }, + { + "epoch": 12.94, + "learning_rate": 4.353423680456491e-05, + "loss": 0.0036, + "step": 72550 + }, + { + "epoch": 12.94, + "learning_rate": 4.35333452211127e-05, + "loss": 0.0036, + "step": 72560 + }, + { + "epoch": 12.94, + "learning_rate": 4.3532453637660486e-05, + "loss": 0.0024, + "step": 72570 + }, + { + "epoch": 12.94, + "learning_rate": 4.353156205420828e-05, + "loss": 0.0043, + "step": 72580 + }, + { + "epoch": 12.94, + "learning_rate": 4.353067047075606e-05, + "loss": 0.004, + "step": 72590 + }, + { + "epoch": 12.95, + "learning_rate": 4.352977888730385e-05, + "loss": 0.0026, + "step": 72600 + }, + { + "epoch": 12.95, + "learning_rate": 4.3528887303851644e-05, + "loss": 0.0028, + "step": 72610 + }, + { + "epoch": 12.95, + "learning_rate": 4.3527995720399435e-05, + "loss": 0.0059, + "step": 72620 + }, + { + "epoch": 12.95, + "learning_rate": 4.352710413694722e-05, + "loss": 0.003, + "step": 72630 + }, + { + "epoch": 12.95, + "learning_rate": 4.352621255349501e-05, + "loss": 0.0021, + "step": 72640 + }, + { + "epoch": 12.95, + "learning_rate": 4.35253209700428e-05, + "loss": 0.0068, + "step": 72650 + }, + { + "epoch": 12.96, + "learning_rate": 4.3524429386590586e-05, + "loss": 0.0046, + "step": 72660 + }, + { + "epoch": 12.96, + "learning_rate": 4.352353780313838e-05, + "loss": 0.0024, + "step": 72670 + }, + { + "epoch": 12.96, + "learning_rate": 4.352264621968616e-05, + "loss": 0.005, + "step": 72680 + }, + { + "epoch": 12.96, + "learning_rate": 4.352175463623395e-05, + "loss": 0.0039, + "step": 72690 + }, + { + "epoch": 12.96, + "learning_rate": 4.352086305278174e-05, + "loss": 0.0042, + "step": 72700 + }, + { + "epoch": 12.97, + "learning_rate": 4.351997146932953e-05, + "loss": 0.0047, + "step": 72710 + }, + { + "epoch": 12.97, + "learning_rate": 4.351907988587732e-05, + "loss": 0.0027, + "step": 72720 + }, + { + "epoch": 12.97, + "learning_rate": 4.351818830242511e-05, + "loss": 0.0023, + "step": 72730 + }, + { + "epoch": 12.97, + "learning_rate": 4.35172967189729e-05, + "loss": 0.0029, + "step": 72740 + }, + { + "epoch": 12.97, + "learning_rate": 4.351640513552069e-05, + "loss": 0.0029, + "step": 72750 + }, + { + "epoch": 12.97, + "learning_rate": 4.351551355206848e-05, + "loss": 0.0061, + "step": 72760 + }, + { + "epoch": 12.98, + "learning_rate": 4.351462196861626e-05, + "loss": 0.0032, + "step": 72770 + }, + { + "epoch": 12.98, + "learning_rate": 4.3513730385164054e-05, + "loss": 0.0042, + "step": 72780 + }, + { + "epoch": 12.98, + "learning_rate": 4.3512838801711845e-05, + "loss": 0.0022, + "step": 72790 + }, + { + "epoch": 12.98, + "learning_rate": 4.351194721825963e-05, + "loss": 0.0025, + "step": 72800 + }, + { + "epoch": 12.98, + "learning_rate": 4.351105563480742e-05, + "loss": 0.0023, + "step": 72810 + }, + { + "epoch": 12.99, + "learning_rate": 4.3510164051355205e-05, + "loss": 0.0042, + "step": 72820 + }, + { + "epoch": 12.99, + "learning_rate": 4.3509272467902996e-05, + "loss": 0.0037, + "step": 72830 + }, + { + "epoch": 12.99, + "learning_rate": 4.350838088445079e-05, + "loss": 0.0046, + "step": 72840 + }, + { + "epoch": 12.99, + "learning_rate": 4.350748930099858e-05, + "loss": 0.0034, + "step": 72850 + }, + { + "epoch": 12.99, + "learning_rate": 4.350659771754636e-05, + "loss": 0.0035, + "step": 72860 + }, + { + "epoch": 12.99, + "learning_rate": 4.3505706134094154e-05, + "loss": 0.0039, + "step": 72870 + }, + { + "epoch": 13.0, + "learning_rate": 4.3504814550641945e-05, + "loss": 0.0047, + "step": 72880 + }, + { + "epoch": 13.0, + "learning_rate": 4.350392296718973e-05, + "loss": 0.0039, + "step": 72890 + }, + { + "epoch": 13.0, + "learning_rate": 4.350303138373752e-05, + "loss": 0.0038, + "step": 72900 + }, + { + "epoch": 13.0, + "eval_loss": 0.02078627049922943, + "eval_runtime": 196.3604, + "eval_samples_per_second": 23.625, + "eval_steps_per_second": 2.954, + "step": 72904 + }, + { + "epoch": 13.0, + "learning_rate": 4.3502139800285305e-05, + "loss": 0.0032, + "step": 72910 + }, + { + "epoch": 13.0, + "learning_rate": 4.35012482168331e-05, + "loss": 0.0034, + "step": 72920 + }, + { + "epoch": 13.0, + "learning_rate": 4.350035663338088e-05, + "loss": 0.0011, + "step": 72930 + }, + { + "epoch": 13.01, + "learning_rate": 4.349946504992867e-05, + "loss": 0.0058, + "step": 72940 + }, + { + "epoch": 13.01, + "learning_rate": 4.3498573466476463e-05, + "loss": 0.0044, + "step": 72950 + }, + { + "epoch": 13.01, + "learning_rate": 4.3497681883024255e-05, + "loss": 0.0044, + "step": 72960 + }, + { + "epoch": 13.01, + "learning_rate": 4.3496790299572046e-05, + "loss": 0.0037, + "step": 72970 + }, + { + "epoch": 13.01, + "learning_rate": 4.349589871611983e-05, + "loss": 0.0027, + "step": 72980 + }, + { + "epoch": 13.02, + "learning_rate": 4.349500713266762e-05, + "loss": 0.0046, + "step": 72990 + }, + { + "epoch": 13.02, + "learning_rate": 4.3494115549215406e-05, + "loss": 0.0045, + "step": 73000 + }, + { + "epoch": 13.02, + "learning_rate": 4.34932239657632e-05, + "loss": 0.0028, + "step": 73010 + }, + { + "epoch": 13.02, + "learning_rate": 4.349233238231099e-05, + "loss": 0.0048, + "step": 73020 + }, + { + "epoch": 13.02, + "learning_rate": 4.349144079885877e-05, + "loss": 0.0033, + "step": 73030 + }, + { + "epoch": 13.02, + "learning_rate": 4.3490549215406564e-05, + "loss": 0.0022, + "step": 73040 + }, + { + "epoch": 13.03, + "learning_rate": 4.348965763195435e-05, + "loss": 0.0033, + "step": 73050 + }, + { + "epoch": 13.03, + "learning_rate": 4.3488766048502146e-05, + "loss": 0.003, + "step": 73060 + }, + { + "epoch": 13.03, + "learning_rate": 4.348787446504993e-05, + "loss": 0.0026, + "step": 73070 + }, + { + "epoch": 13.03, + "learning_rate": 4.348698288159772e-05, + "loss": 0.0034, + "step": 73080 + }, + { + "epoch": 13.03, + "learning_rate": 4.3486091298145507e-05, + "loss": 0.0031, + "step": 73090 + }, + { + "epoch": 13.03, + "learning_rate": 4.34851997146933e-05, + "loss": 0.0028, + "step": 73100 + }, + { + "epoch": 13.04, + "learning_rate": 4.348430813124109e-05, + "loss": 0.0031, + "step": 73110 + }, + { + "epoch": 13.04, + "learning_rate": 4.3483416547788873e-05, + "loss": 0.0013, + "step": 73120 + }, + { + "epoch": 13.04, + "learning_rate": 4.3482524964336665e-05, + "loss": 0.0029, + "step": 73130 + }, + { + "epoch": 13.04, + "learning_rate": 4.348163338088445e-05, + "loss": 0.0049, + "step": 73140 + }, + { + "epoch": 13.04, + "learning_rate": 4.348074179743224e-05, + "loss": 0.0034, + "step": 73150 + }, + { + "epoch": 13.05, + "learning_rate": 4.3479850213980025e-05, + "loss": 0.0032, + "step": 73160 + }, + { + "epoch": 13.05, + "learning_rate": 4.347895863052782e-05, + "loss": 0.003, + "step": 73170 + }, + { + "epoch": 13.05, + "learning_rate": 4.347806704707561e-05, + "loss": 0.0018, + "step": 73180 + }, + { + "epoch": 13.05, + "learning_rate": 4.34771754636234e-05, + "loss": 0.0014, + "step": 73190 + }, + { + "epoch": 13.05, + "learning_rate": 4.347628388017119e-05, + "loss": 0.0038, + "step": 73200 + }, + { + "epoch": 13.05, + "learning_rate": 4.3475392296718974e-05, + "loss": 0.0044, + "step": 73210 + }, + { + "epoch": 13.06, + "learning_rate": 4.3474500713266765e-05, + "loss": 0.0021, + "step": 73220 + }, + { + "epoch": 13.06, + "learning_rate": 4.347369828815977e-05, + "loss": 0.0077, + "step": 73230 + }, + { + "epoch": 13.06, + "learning_rate": 4.3472806704707564e-05, + "loss": 0.0055, + "step": 73240 + }, + { + "epoch": 13.06, + "learning_rate": 4.347191512125535e-05, + "loss": 0.0037, + "step": 73250 + }, + { + "epoch": 13.06, + "learning_rate": 4.347102353780314e-05, + "loss": 0.0026, + "step": 73260 + }, + { + "epoch": 13.07, + "learning_rate": 4.347013195435093e-05, + "loss": 0.0031, + "step": 73270 + }, + { + "epoch": 13.07, + "learning_rate": 4.3469240370898716e-05, + "loss": 0.0026, + "step": 73280 + }, + { + "epoch": 13.07, + "learning_rate": 4.346834878744651e-05, + "loss": 0.0033, + "step": 73290 + }, + { + "epoch": 13.07, + "learning_rate": 4.346745720399429e-05, + "loss": 0.0028, + "step": 73300 + }, + { + "epoch": 13.07, + "learning_rate": 4.346656562054208e-05, + "loss": 0.0059, + "step": 73310 + }, + { + "epoch": 13.07, + "learning_rate": 4.3465674037089874e-05, + "loss": 0.0029, + "step": 73320 + }, + { + "epoch": 13.08, + "learning_rate": 4.3464782453637665e-05, + "loss": 0.0013, + "step": 73330 + }, + { + "epoch": 13.08, + "learning_rate": 4.3463890870185456e-05, + "loss": 0.0032, + "step": 73340 + }, + { + "epoch": 13.08, + "learning_rate": 4.346299928673324e-05, + "loss": 0.0036, + "step": 73350 + }, + { + "epoch": 13.08, + "learning_rate": 4.346210770328103e-05, + "loss": 0.0025, + "step": 73360 + }, + { + "epoch": 13.08, + "learning_rate": 4.3461216119828816e-05, + "loss": 0.0029, + "step": 73370 + }, + { + "epoch": 13.08, + "learning_rate": 4.346032453637661e-05, + "loss": 0.003, + "step": 73380 + }, + { + "epoch": 13.09, + "learning_rate": 4.345943295292439e-05, + "loss": 0.0025, + "step": 73390 + }, + { + "epoch": 13.09, + "learning_rate": 4.345854136947218e-05, + "loss": 0.0036, + "step": 73400 + }, + { + "epoch": 13.09, + "learning_rate": 4.3457649786019974e-05, + "loss": 0.0027, + "step": 73410 + }, + { + "epoch": 13.09, + "learning_rate": 4.345675820256776e-05, + "loss": 0.0027, + "step": 73420 + }, + { + "epoch": 13.09, + "learning_rate": 4.345586661911556e-05, + "loss": 0.0044, + "step": 73430 + }, + { + "epoch": 13.1, + "learning_rate": 4.345497503566334e-05, + "loss": 0.0019, + "step": 73440 + }, + { + "epoch": 13.1, + "learning_rate": 4.345408345221113e-05, + "loss": 0.0045, + "step": 73450 + }, + { + "epoch": 13.1, + "learning_rate": 4.345319186875892e-05, + "loss": 0.002, + "step": 73460 + }, + { + "epoch": 13.1, + "learning_rate": 4.345230028530671e-05, + "loss": 0.0031, + "step": 73470 + }, + { + "epoch": 13.1, + "learning_rate": 4.345140870185449e-05, + "loss": 0.0038, + "step": 73480 + }, + { + "epoch": 13.1, + "learning_rate": 4.3450517118402284e-05, + "loss": 0.0039, + "step": 73490 + }, + { + "epoch": 13.11, + "learning_rate": 4.3449625534950075e-05, + "loss": 0.0034, + "step": 73500 + }, + { + "epoch": 13.11, + "learning_rate": 4.344873395149786e-05, + "loss": 0.0022, + "step": 73510 + }, + { + "epoch": 13.11, + "learning_rate": 4.344784236804565e-05, + "loss": 0.0026, + "step": 73520 + }, + { + "epoch": 13.11, + "learning_rate": 4.3446950784593435e-05, + "loss": 0.002, + "step": 73530 + }, + { + "epoch": 13.11, + "learning_rate": 4.344605920114123e-05, + "loss": 0.004, + "step": 73540 + }, + { + "epoch": 13.12, + "learning_rate": 4.344516761768902e-05, + "loss": 0.0024, + "step": 73550 + }, + { + "epoch": 13.12, + "learning_rate": 4.344427603423681e-05, + "loss": 0.0064, + "step": 73560 + }, + { + "epoch": 13.12, + "learning_rate": 4.34433844507846e-05, + "loss": 0.0039, + "step": 73570 + }, + { + "epoch": 13.12, + "learning_rate": 4.3442492867332384e-05, + "loss": 0.0021, + "step": 73580 + }, + { + "epoch": 13.12, + "learning_rate": 4.3441601283880175e-05, + "loss": 0.0031, + "step": 73590 + }, + { + "epoch": 13.12, + "learning_rate": 4.344070970042796e-05, + "loss": 0.0035, + "step": 73600 + }, + { + "epoch": 13.13, + "learning_rate": 4.343981811697575e-05, + "loss": 0.0023, + "step": 73610 + }, + { + "epoch": 13.13, + "learning_rate": 4.3438926533523536e-05, + "loss": 0.0048, + "step": 73620 + }, + { + "epoch": 13.13, + "learning_rate": 4.343803495007133e-05, + "loss": 0.0035, + "step": 73630 + }, + { + "epoch": 13.13, + "learning_rate": 4.343714336661912e-05, + "loss": 0.0025, + "step": 73640 + }, + { + "epoch": 13.13, + "learning_rate": 4.343625178316691e-05, + "loss": 0.002, + "step": 73650 + }, + { + "epoch": 13.13, + "learning_rate": 4.34353601997147e-05, + "loss": 0.0027, + "step": 73660 + }, + { + "epoch": 13.14, + "learning_rate": 4.3434468616262485e-05, + "loss": 0.0022, + "step": 73670 + }, + { + "epoch": 13.14, + "learning_rate": 4.3433577032810276e-05, + "loss": 0.0027, + "step": 73680 + }, + { + "epoch": 13.14, + "learning_rate": 4.343268544935806e-05, + "loss": 0.0014, + "step": 73690 + }, + { + "epoch": 13.14, + "learning_rate": 4.343179386590585e-05, + "loss": 0.0046, + "step": 73700 + }, + { + "epoch": 13.14, + "learning_rate": 4.3430902282453636e-05, + "loss": 0.0037, + "step": 73710 + }, + { + "epoch": 13.15, + "learning_rate": 4.343001069900143e-05, + "loss": 0.0029, + "step": 73720 + }, + { + "epoch": 13.15, + "learning_rate": 4.342911911554922e-05, + "loss": 0.0038, + "step": 73730 + }, + { + "epoch": 13.15, + "learning_rate": 4.3428227532097e-05, + "loss": 0.002, + "step": 73740 + }, + { + "epoch": 13.15, + "learning_rate": 4.3427335948644794e-05, + "loss": 0.0018, + "step": 73750 + }, + { + "epoch": 13.15, + "learning_rate": 4.3426444365192585e-05, + "loss": 0.0019, + "step": 73760 + }, + { + "epoch": 13.15, + "learning_rate": 4.3425552781740377e-05, + "loss": 0.0039, + "step": 73770 + }, + { + "epoch": 13.16, + "learning_rate": 4.342466119828816e-05, + "loss": 0.0039, + "step": 73780 + }, + { + "epoch": 13.16, + "learning_rate": 4.342376961483595e-05, + "loss": 0.003, + "step": 73790 + }, + { + "epoch": 13.16, + "learning_rate": 4.3422878031383743e-05, + "loss": 0.0036, + "step": 73800 + }, + { + "epoch": 13.16, + "learning_rate": 4.342198644793153e-05, + "loss": 0.0025, + "step": 73810 + }, + { + "epoch": 13.16, + "learning_rate": 4.342109486447932e-05, + "loss": 0.0035, + "step": 73820 + }, + { + "epoch": 13.17, + "learning_rate": 4.3420203281027103e-05, + "loss": 0.0027, + "step": 73830 + }, + { + "epoch": 13.17, + "learning_rate": 4.3419311697574895e-05, + "loss": 0.0035, + "step": 73840 + }, + { + "epoch": 13.17, + "learning_rate": 4.341842011412268e-05, + "loss": 0.0043, + "step": 73850 + }, + { + "epoch": 13.17, + "learning_rate": 4.341752853067047e-05, + "loss": 0.0019, + "step": 73860 + }, + { + "epoch": 13.17, + "learning_rate": 4.341663694721826e-05, + "loss": 0.0046, + "step": 73870 + }, + { + "epoch": 13.17, + "learning_rate": 4.341574536376605e-05, + "loss": 0.0021, + "step": 73880 + }, + { + "epoch": 13.18, + "learning_rate": 4.3414853780313844e-05, + "loss": 0.0037, + "step": 73890 + }, + { + "epoch": 13.18, + "learning_rate": 4.341396219686163e-05, + "loss": 0.0045, + "step": 73900 + }, + { + "epoch": 13.18, + "learning_rate": 4.341307061340942e-05, + "loss": 0.0035, + "step": 73910 + }, + { + "epoch": 13.18, + "learning_rate": 4.3412179029957204e-05, + "loss": 0.0049, + "step": 73920 + }, + { + "epoch": 13.18, + "learning_rate": 4.3411287446504995e-05, + "loss": 0.0037, + "step": 73930 + }, + { + "epoch": 13.18, + "learning_rate": 4.341039586305278e-05, + "loss": 0.0024, + "step": 73940 + }, + { + "epoch": 13.19, + "learning_rate": 4.340950427960057e-05, + "loss": 0.003, + "step": 73950 + }, + { + "epoch": 13.19, + "learning_rate": 4.340861269614836e-05, + "loss": 0.0027, + "step": 73960 + }, + { + "epoch": 13.19, + "learning_rate": 4.3407721112696147e-05, + "loss": 0.0032, + "step": 73970 + }, + { + "epoch": 13.19, + "learning_rate": 4.3406829529243945e-05, + "loss": 0.0018, + "step": 73980 + }, + { + "epoch": 13.19, + "learning_rate": 4.340593794579173e-05, + "loss": 0.0027, + "step": 73990 + }, + { + "epoch": 13.2, + "learning_rate": 4.340504636233952e-05, + "loss": 0.0045, + "step": 74000 + }, + { + "epoch": 13.2, + "learning_rate": 4.3404154778887305e-05, + "loss": 0.0026, + "step": 74010 + }, + { + "epoch": 13.2, + "learning_rate": 4.3403263195435096e-05, + "loss": 0.0031, + "step": 74020 + }, + { + "epoch": 13.2, + "learning_rate": 4.340237161198289e-05, + "loss": 0.0014, + "step": 74030 + }, + { + "epoch": 13.2, + "learning_rate": 4.340148002853067e-05, + "loss": 0.0049, + "step": 74040 + }, + { + "epoch": 13.2, + "learning_rate": 4.340058844507846e-05, + "loss": 0.0035, + "step": 74050 + }, + { + "epoch": 13.21, + "learning_rate": 4.339969686162625e-05, + "loss": 0.0036, + "step": 74060 + }, + { + "epoch": 13.21, + "learning_rate": 4.339880527817404e-05, + "loss": 0.0068, + "step": 74070 + }, + { + "epoch": 13.21, + "learning_rate": 4.339791369472182e-05, + "loss": 0.0033, + "step": 74080 + }, + { + "epoch": 13.21, + "learning_rate": 4.339702211126962e-05, + "loss": 0.0037, + "step": 74090 + }, + { + "epoch": 13.21, + "learning_rate": 4.3396130527817405e-05, + "loss": 0.0027, + "step": 74100 + }, + { + "epoch": 13.22, + "learning_rate": 4.3395238944365196e-05, + "loss": 0.0027, + "step": 74110 + }, + { + "epoch": 13.22, + "learning_rate": 4.339434736091299e-05, + "loss": 0.0039, + "step": 74120 + }, + { + "epoch": 13.22, + "learning_rate": 4.339345577746077e-05, + "loss": 0.0027, + "step": 74130 + }, + { + "epoch": 13.22, + "learning_rate": 4.339256419400856e-05, + "loss": 0.0032, + "step": 74140 + }, + { + "epoch": 13.22, + "learning_rate": 4.339167261055635e-05, + "loss": 0.0025, + "step": 74150 + }, + { + "epoch": 13.22, + "learning_rate": 4.339078102710414e-05, + "loss": 0.0022, + "step": 74160 + }, + { + "epoch": 13.23, + "learning_rate": 4.338988944365192e-05, + "loss": 0.0026, + "step": 74170 + }, + { + "epoch": 13.23, + "learning_rate": 4.3388997860199715e-05, + "loss": 0.0037, + "step": 74180 + }, + { + "epoch": 13.23, + "learning_rate": 4.3388106276747506e-05, + "loss": 0.0031, + "step": 74190 + }, + { + "epoch": 13.23, + "learning_rate": 4.33872146932953e-05, + "loss": 0.0035, + "step": 74200 + }, + { + "epoch": 13.23, + "learning_rate": 4.338632310984309e-05, + "loss": 0.0037, + "step": 74210 + }, + { + "epoch": 13.23, + "learning_rate": 4.338543152639087e-05, + "loss": 0.0013, + "step": 74220 + }, + { + "epoch": 13.24, + "learning_rate": 4.3384539942938664e-05, + "loss": 0.0023, + "step": 74230 + }, + { + "epoch": 13.24, + "learning_rate": 4.338364835948645e-05, + "loss": 0.0037, + "step": 74240 + }, + { + "epoch": 13.24, + "learning_rate": 4.338275677603424e-05, + "loss": 0.0023, + "step": 74250 + }, + { + "epoch": 13.24, + "learning_rate": 4.338186519258203e-05, + "loss": 0.0014, + "step": 74260 + }, + { + "epoch": 13.24, + "learning_rate": 4.3380973609129815e-05, + "loss": 0.0029, + "step": 74270 + }, + { + "epoch": 13.25, + "learning_rate": 4.3380082025677606e-05, + "loss": 0.0027, + "step": 74280 + }, + { + "epoch": 13.25, + "learning_rate": 4.337919044222539e-05, + "loss": 0.0046, + "step": 74290 + }, + { + "epoch": 13.25, + "learning_rate": 4.337829885877318e-05, + "loss": 0.001, + "step": 74300 + }, + { + "epoch": 13.25, + "learning_rate": 4.337740727532097e-05, + "loss": 0.0025, + "step": 74310 + }, + { + "epoch": 13.25, + "learning_rate": 4.3376515691868764e-05, + "loss": 0.0026, + "step": 74320 + }, + { + "epoch": 13.25, + "learning_rate": 4.337562410841655e-05, + "loss": 0.0012, + "step": 74330 + }, + { + "epoch": 13.26, + "learning_rate": 4.337473252496434e-05, + "loss": 0.0012, + "step": 74340 + }, + { + "epoch": 13.26, + "learning_rate": 4.337384094151213e-05, + "loss": 0.0039, + "step": 74350 + }, + { + "epoch": 13.26, + "learning_rate": 4.3372949358059916e-05, + "loss": 0.0044, + "step": 74360 + }, + { + "epoch": 13.26, + "learning_rate": 4.337205777460771e-05, + "loss": 0.0035, + "step": 74370 + }, + { + "epoch": 13.26, + "learning_rate": 4.337116619115549e-05, + "loss": 0.0036, + "step": 74380 + }, + { + "epoch": 13.26, + "learning_rate": 4.337027460770328e-05, + "loss": 0.0058, + "step": 74390 + }, + { + "epoch": 13.27, + "learning_rate": 4.336938302425107e-05, + "loss": 0.0035, + "step": 74400 + }, + { + "epoch": 13.27, + "learning_rate": 4.336849144079886e-05, + "loss": 0.0038, + "step": 74410 + }, + { + "epoch": 13.27, + "learning_rate": 4.336759985734665e-05, + "loss": 0.003, + "step": 74420 + }, + { + "epoch": 13.27, + "learning_rate": 4.336670827389444e-05, + "loss": 0.0028, + "step": 74430 + }, + { + "epoch": 13.27, + "learning_rate": 4.336581669044223e-05, + "loss": 0.004, + "step": 74440 + }, + { + "epoch": 13.28, + "learning_rate": 4.3364925106990016e-05, + "loss": 0.003, + "step": 74450 + }, + { + "epoch": 13.28, + "learning_rate": 4.336403352353781e-05, + "loss": 0.0048, + "step": 74460 + }, + { + "epoch": 13.28, + "learning_rate": 4.336314194008559e-05, + "loss": 0.005, + "step": 74470 + }, + { + "epoch": 13.28, + "learning_rate": 4.336225035663338e-05, + "loss": 0.0021, + "step": 74480 + }, + { + "epoch": 13.28, + "learning_rate": 4.3361358773181174e-05, + "loss": 0.006, + "step": 74490 + }, + { + "epoch": 13.28, + "learning_rate": 4.336046718972896e-05, + "loss": 0.0031, + "step": 74500 + }, + { + "epoch": 13.29, + "learning_rate": 4.335957560627675e-05, + "loss": 0.0024, + "step": 74510 + }, + { + "epoch": 13.29, + "learning_rate": 4.3358684022824534e-05, + "loss": 0.0047, + "step": 74520 + }, + { + "epoch": 13.29, + "learning_rate": 4.335779243937233e-05, + "loss": 0.0032, + "step": 74530 + }, + { + "epoch": 13.29, + "learning_rate": 4.335690085592012e-05, + "loss": 0.0026, + "step": 74540 + }, + { + "epoch": 13.29, + "learning_rate": 4.335600927246791e-05, + "loss": 0.002, + "step": 74550 + }, + { + "epoch": 13.3, + "learning_rate": 4.335511768901569e-05, + "loss": 0.0032, + "step": 74560 + }, + { + "epoch": 13.3, + "learning_rate": 4.3354226105563484e-05, + "loss": 0.0028, + "step": 74570 + }, + { + "epoch": 13.3, + "learning_rate": 4.3353334522111275e-05, + "loss": 0.0038, + "step": 74580 + }, + { + "epoch": 13.3, + "learning_rate": 4.335244293865906e-05, + "loss": 0.0034, + "step": 74590 + }, + { + "epoch": 13.3, + "learning_rate": 4.335155135520685e-05, + "loss": 0.0021, + "step": 74600 + }, + { + "epoch": 13.3, + "learning_rate": 4.3350659771754635e-05, + "loss": 0.0043, + "step": 74610 + }, + { + "epoch": 13.31, + "learning_rate": 4.3349768188302426e-05, + "loss": 0.0053, + "step": 74620 + }, + { + "epoch": 13.31, + "learning_rate": 4.334887660485021e-05, + "loss": 0.005, + "step": 74630 + }, + { + "epoch": 13.31, + "learning_rate": 4.334798502139801e-05, + "loss": 0.0032, + "step": 74640 + }, + { + "epoch": 13.31, + "learning_rate": 4.334709343794579e-05, + "loss": 0.003, + "step": 74650 + }, + { + "epoch": 13.31, + "learning_rate": 4.3346201854493584e-05, + "loss": 0.0057, + "step": 74660 + }, + { + "epoch": 13.31, + "learning_rate": 4.3345310271041375e-05, + "loss": 0.0017, + "step": 74670 + }, + { + "epoch": 13.32, + "learning_rate": 4.334441868758916e-05, + "loss": 0.0027, + "step": 74680 + }, + { + "epoch": 13.32, + "learning_rate": 4.334352710413695e-05, + "loss": 0.0028, + "step": 74690 + }, + { + "epoch": 13.32, + "learning_rate": 4.3342635520684735e-05, + "loss": 0.0018, + "step": 74700 + }, + { + "epoch": 13.32, + "learning_rate": 4.334174393723253e-05, + "loss": 0.0056, + "step": 74710 + }, + { + "epoch": 13.32, + "learning_rate": 4.334085235378032e-05, + "loss": 0.0012, + "step": 74720 + }, + { + "epoch": 13.33, + "learning_rate": 4.33399607703281e-05, + "loss": 0.0024, + "step": 74730 + }, + { + "epoch": 13.33, + "learning_rate": 4.3339069186875894e-05, + "loss": 0.004, + "step": 74740 + }, + { + "epoch": 13.33, + "learning_rate": 4.3338177603423685e-05, + "loss": 0.0022, + "step": 74750 + }, + { + "epoch": 13.33, + "learning_rate": 4.3337286019971476e-05, + "loss": 0.0035, + "step": 74760 + }, + { + "epoch": 13.33, + "learning_rate": 4.333639443651926e-05, + "loss": 0.0041, + "step": 74770 + }, + { + "epoch": 13.33, + "learning_rate": 4.333550285306705e-05, + "loss": 0.0029, + "step": 74780 + }, + { + "epoch": 13.34, + "learning_rate": 4.3334611269614836e-05, + "loss": 0.0037, + "step": 74790 + }, + { + "epoch": 13.34, + "learning_rate": 4.333371968616263e-05, + "loss": 0.0034, + "step": 74800 + }, + { + "epoch": 13.34, + "learning_rate": 4.333282810271042e-05, + "loss": 0.0031, + "step": 74810 + }, + { + "epoch": 13.34, + "learning_rate": 4.33319365192582e-05, + "loss": 0.0034, + "step": 74820 + }, + { + "epoch": 13.34, + "learning_rate": 4.3331044935805994e-05, + "loss": 0.0023, + "step": 74830 + }, + { + "epoch": 13.35, + "learning_rate": 4.333015335235378e-05, + "loss": 0.0052, + "step": 74840 + }, + { + "epoch": 13.35, + "learning_rate": 4.332926176890157e-05, + "loss": 0.0028, + "step": 74850 + }, + { + "epoch": 13.35, + "learning_rate": 4.332837018544936e-05, + "loss": 0.0022, + "step": 74860 + }, + { + "epoch": 13.35, + "learning_rate": 4.332747860199715e-05, + "loss": 0.0023, + "step": 74870 + }, + { + "epoch": 13.35, + "learning_rate": 4.3326587018544937e-05, + "loss": 0.0024, + "step": 74880 + }, + { + "epoch": 13.35, + "learning_rate": 4.332569543509273e-05, + "loss": 0.0022, + "step": 74890 + }, + { + "epoch": 13.36, + "learning_rate": 4.332480385164052e-05, + "loss": 0.0034, + "step": 74900 + }, + { + "epoch": 13.36, + "learning_rate": 4.3323912268188303e-05, + "loss": 0.0027, + "step": 74910 + }, + { + "epoch": 13.36, + "learning_rate": 4.3323020684736095e-05, + "loss": 0.005, + "step": 74920 + }, + { + "epoch": 13.36, + "learning_rate": 4.332212910128388e-05, + "loss": 0.0037, + "step": 74930 + }, + { + "epoch": 13.36, + "learning_rate": 4.332123751783167e-05, + "loss": 0.0034, + "step": 74940 + }, + { + "epoch": 13.36, + "learning_rate": 4.332034593437946e-05, + "loss": 0.0033, + "step": 74950 + }, + { + "epoch": 13.37, + "learning_rate": 4.3319454350927246e-05, + "loss": 0.0042, + "step": 74960 + }, + { + "epoch": 13.37, + "learning_rate": 4.331856276747504e-05, + "loss": 0.0038, + "step": 74970 + }, + { + "epoch": 13.37, + "learning_rate": 4.331767118402283e-05, + "loss": 0.003, + "step": 74980 + }, + { + "epoch": 13.37, + "learning_rate": 4.331677960057062e-05, + "loss": 0.0023, + "step": 74990 + }, + { + "epoch": 13.37, + "learning_rate": 4.3315888017118404e-05, + "loss": 0.0025, + "step": 75000 + }, + { + "epoch": 13.38, + "learning_rate": 4.3314996433666195e-05, + "loss": 0.0021, + "step": 75010 + }, + { + "epoch": 13.38, + "learning_rate": 4.331410485021398e-05, + "loss": 0.0049, + "step": 75020 + }, + { + "epoch": 13.38, + "learning_rate": 4.331321326676177e-05, + "loss": 0.0041, + "step": 75030 + }, + { + "epoch": 13.38, + "learning_rate": 4.331232168330956e-05, + "loss": 0.004, + "step": 75040 + }, + { + "epoch": 13.38, + "learning_rate": 4.3311430099857347e-05, + "loss": 0.0011, + "step": 75050 + }, + { + "epoch": 13.38, + "learning_rate": 4.331053851640514e-05, + "loss": 0.0031, + "step": 75060 + }, + { + "epoch": 13.39, + "learning_rate": 4.330964693295292e-05, + "loss": 0.0037, + "step": 75070 + }, + { + "epoch": 13.39, + "learning_rate": 4.330875534950072e-05, + "loss": 0.0043, + "step": 75080 + }, + { + "epoch": 13.39, + "learning_rate": 4.3307863766048505e-05, + "loss": 0.0042, + "step": 75090 + }, + { + "epoch": 13.39, + "learning_rate": 4.3306972182596296e-05, + "loss": 0.0027, + "step": 75100 + }, + { + "epoch": 13.39, + "learning_rate": 4.330608059914408e-05, + "loss": 0.0021, + "step": 75110 + }, + { + "epoch": 13.4, + "learning_rate": 4.330518901569187e-05, + "loss": 0.0033, + "step": 75120 + }, + { + "epoch": 13.4, + "learning_rate": 4.330429743223966e-05, + "loss": 0.0018, + "step": 75130 + }, + { + "epoch": 13.4, + "learning_rate": 4.330340584878745e-05, + "loss": 0.0034, + "step": 75140 + }, + { + "epoch": 13.4, + "learning_rate": 4.330251426533524e-05, + "loss": 0.0033, + "step": 75150 + }, + { + "epoch": 13.4, + "learning_rate": 4.330162268188302e-05, + "loss": 0.0037, + "step": 75160 + }, + { + "epoch": 13.4, + "learning_rate": 4.3300731098430814e-05, + "loss": 0.0045, + "step": 75170 + }, + { + "epoch": 13.41, + "learning_rate": 4.3299839514978605e-05, + "loss": 0.0017, + "step": 75180 + }, + { + "epoch": 13.41, + "learning_rate": 4.329894793152639e-05, + "loss": 0.002, + "step": 75190 + }, + { + "epoch": 13.41, + "learning_rate": 4.329805634807418e-05, + "loss": 0.0028, + "step": 75200 + }, + { + "epoch": 13.41, + "learning_rate": 4.329716476462197e-05, + "loss": 0.0052, + "step": 75210 + }, + { + "epoch": 13.41, + "learning_rate": 4.329627318116976e-05, + "loss": 0.003, + "step": 75220 + }, + { + "epoch": 13.41, + "learning_rate": 4.329538159771755e-05, + "loss": 0.002, + "step": 75230 + }, + { + "epoch": 13.42, + "learning_rate": 4.329449001426534e-05, + "loss": 0.0037, + "step": 75240 + }, + { + "epoch": 13.42, + "learning_rate": 4.329359843081312e-05, + "loss": 0.0025, + "step": 75250 + }, + { + "epoch": 13.42, + "learning_rate": 4.3292706847360914e-05, + "loss": 0.0032, + "step": 75260 + }, + { + "epoch": 13.42, + "learning_rate": 4.3291815263908706e-05, + "loss": 0.003, + "step": 75270 + }, + { + "epoch": 13.42, + "learning_rate": 4.329092368045649e-05, + "loss": 0.0026, + "step": 75280 + }, + { + "epoch": 13.43, + "learning_rate": 4.329003209700428e-05, + "loss": 0.0023, + "step": 75290 + }, + { + "epoch": 13.43, + "learning_rate": 4.3289140513552066e-05, + "loss": 0.0071, + "step": 75300 + }, + { + "epoch": 13.43, + "learning_rate": 4.3288248930099864e-05, + "loss": 0.0021, + "step": 75310 + }, + { + "epoch": 13.43, + "learning_rate": 4.328735734664765e-05, + "loss": 0.0022, + "step": 75320 + }, + { + "epoch": 13.43, + "learning_rate": 4.328646576319544e-05, + "loss": 0.0041, + "step": 75330 + }, + { + "epoch": 13.43, + "learning_rate": 4.3285574179743224e-05, + "loss": 0.0064, + "step": 75340 + }, + { + "epoch": 13.44, + "learning_rate": 4.3284682596291015e-05, + "loss": 0.0033, + "step": 75350 + }, + { + "epoch": 13.44, + "learning_rate": 4.3283791012838806e-05, + "loss": 0.0026, + "step": 75360 + }, + { + "epoch": 13.44, + "learning_rate": 4.328289942938659e-05, + "loss": 0.0031, + "step": 75370 + }, + { + "epoch": 13.44, + "learning_rate": 4.328200784593438e-05, + "loss": 0.003, + "step": 75380 + }, + { + "epoch": 13.44, + "learning_rate": 4.3281116262482166e-05, + "loss": 0.0038, + "step": 75390 + }, + { + "epoch": 13.45, + "learning_rate": 4.328022467902996e-05, + "loss": 0.0016, + "step": 75400 + }, + { + "epoch": 13.45, + "learning_rate": 4.327933309557775e-05, + "loss": 0.0018, + "step": 75410 + }, + { + "epoch": 13.45, + "learning_rate": 4.327844151212554e-05, + "loss": 0.0026, + "step": 75420 + }, + { + "epoch": 13.45, + "learning_rate": 4.3277549928673324e-05, + "loss": 0.0038, + "step": 75430 + }, + { + "epoch": 13.45, + "learning_rate": 4.3276658345221116e-05, + "loss": 0.0044, + "step": 75440 + }, + { + "epoch": 13.45, + "learning_rate": 4.327576676176891e-05, + "loss": 0.0041, + "step": 75450 + }, + { + "epoch": 13.46, + "learning_rate": 4.327487517831669e-05, + "loss": 0.0015, + "step": 75460 + }, + { + "epoch": 13.46, + "learning_rate": 4.327398359486448e-05, + "loss": 0.0029, + "step": 75470 + }, + { + "epoch": 13.46, + "learning_rate": 4.327309201141227e-05, + "loss": 0.0036, + "step": 75480 + }, + { + "epoch": 13.46, + "learning_rate": 4.327220042796006e-05, + "loss": 0.0034, + "step": 75490 + }, + { + "epoch": 13.46, + "learning_rate": 4.327130884450785e-05, + "loss": 0.0045, + "step": 75500 + }, + { + "epoch": 13.46, + "learning_rate": 4.3270417261055634e-05, + "loss": 0.0047, + "step": 75510 + }, + { + "epoch": 13.47, + "learning_rate": 4.3269525677603425e-05, + "loss": 0.0024, + "step": 75520 + }, + { + "epoch": 13.47, + "learning_rate": 4.3268634094151216e-05, + "loss": 0.002, + "step": 75530 + }, + { + "epoch": 13.47, + "learning_rate": 4.326774251069901e-05, + "loss": 0.0036, + "step": 75540 + }, + { + "epoch": 13.47, + "learning_rate": 4.326685092724679e-05, + "loss": 0.0042, + "step": 75550 + }, + { + "epoch": 13.47, + "learning_rate": 4.326595934379458e-05, + "loss": 0.0034, + "step": 75560 + }, + { + "epoch": 13.48, + "learning_rate": 4.326506776034237e-05, + "loss": 0.005, + "step": 75570 + }, + { + "epoch": 13.48, + "learning_rate": 4.326417617689016e-05, + "loss": 0.0048, + "step": 75580 + }, + { + "epoch": 13.48, + "learning_rate": 4.326328459343795e-05, + "loss": 0.0033, + "step": 75590 + }, + { + "epoch": 13.48, + "learning_rate": 4.3262393009985734e-05, + "loss": 0.0028, + "step": 75600 + }, + { + "epoch": 13.48, + "learning_rate": 4.3261501426533526e-05, + "loss": 0.0039, + "step": 75610 + }, + { + "epoch": 13.48, + "learning_rate": 4.326060984308131e-05, + "loss": 0.0039, + "step": 75620 + }, + { + "epoch": 13.49, + "learning_rate": 4.32597182596291e-05, + "loss": 0.0042, + "step": 75630 + }, + { + "epoch": 13.49, + "learning_rate": 4.325882667617689e-05, + "loss": 0.0044, + "step": 75640 + }, + { + "epoch": 13.49, + "learning_rate": 4.3257935092724684e-05, + "loss": 0.0017, + "step": 75650 + }, + { + "epoch": 13.49, + "learning_rate": 4.325704350927247e-05, + "loss": 0.0026, + "step": 75660 + }, + { + "epoch": 13.49, + "learning_rate": 4.325615192582026e-05, + "loss": 0.0046, + "step": 75670 + }, + { + "epoch": 13.5, + "learning_rate": 4.325526034236805e-05, + "loss": 0.0021, + "step": 75680 + }, + { + "epoch": 13.5, + "learning_rate": 4.3254368758915835e-05, + "loss": 0.0029, + "step": 75690 + }, + { + "epoch": 13.5, + "learning_rate": 4.3253477175463626e-05, + "loss": 0.003, + "step": 75700 + }, + { + "epoch": 13.5, + "learning_rate": 4.325258559201141e-05, + "loss": 0.0019, + "step": 75710 + }, + { + "epoch": 13.5, + "learning_rate": 4.32516940085592e-05, + "loss": 0.0028, + "step": 75720 + }, + { + "epoch": 13.5, + "learning_rate": 4.325080242510699e-05, + "loss": 0.0039, + "step": 75730 + }, + { + "epoch": 13.51, + "learning_rate": 4.324991084165478e-05, + "loss": 0.0039, + "step": 75740 + }, + { + "epoch": 13.51, + "learning_rate": 4.324910841654779e-05, + "loss": 0.0041, + "step": 75750 + }, + { + "epoch": 13.51, + "learning_rate": 4.3248216833095577e-05, + "loss": 0.003, + "step": 75760 + }, + { + "epoch": 13.51, + "learning_rate": 4.324732524964337e-05, + "loss": 0.0037, + "step": 75770 + }, + { + "epoch": 13.51, + "learning_rate": 4.324643366619115e-05, + "loss": 0.0024, + "step": 75780 + }, + { + "epoch": 13.51, + "learning_rate": 4.324554208273895e-05, + "loss": 0.0014, + "step": 75790 + }, + { + "epoch": 13.52, + "learning_rate": 4.3244650499286735e-05, + "loss": 0.0032, + "step": 75800 + }, + { + "epoch": 13.52, + "learning_rate": 4.3243758915834526e-05, + "loss": 0.0039, + "step": 75810 + }, + { + "epoch": 13.52, + "learning_rate": 4.324286733238232e-05, + "loss": 0.0058, + "step": 75820 + }, + { + "epoch": 13.52, + "learning_rate": 4.32419757489301e-05, + "loss": 0.0036, + "step": 75830 + }, + { + "epoch": 13.52, + "learning_rate": 4.324108416547789e-05, + "loss": 0.0044, + "step": 75840 + }, + { + "epoch": 13.53, + "learning_rate": 4.324019258202568e-05, + "loss": 0.0022, + "step": 75850 + }, + { + "epoch": 13.53, + "learning_rate": 4.323930099857347e-05, + "loss": 0.0029, + "step": 75860 + }, + { + "epoch": 13.53, + "learning_rate": 4.323840941512125e-05, + "loss": 0.0035, + "step": 75870 + }, + { + "epoch": 13.53, + "learning_rate": 4.3237517831669044e-05, + "loss": 0.0034, + "step": 75880 + }, + { + "epoch": 13.53, + "learning_rate": 4.3236626248216835e-05, + "loss": 0.004, + "step": 75890 + }, + { + "epoch": 13.53, + "learning_rate": 4.3235734664764626e-05, + "loss": 0.003, + "step": 75900 + }, + { + "epoch": 13.54, + "learning_rate": 4.323484308131242e-05, + "loss": 0.0017, + "step": 75910 + }, + { + "epoch": 13.54, + "learning_rate": 4.32339514978602e-05, + "loss": 0.0019, + "step": 75920 + }, + { + "epoch": 13.54, + "learning_rate": 4.323305991440799e-05, + "loss": 0.0037, + "step": 75930 + }, + { + "epoch": 13.54, + "learning_rate": 4.323216833095578e-05, + "loss": 0.0033, + "step": 75940 + }, + { + "epoch": 13.54, + "learning_rate": 4.323127674750357e-05, + "loss": 0.003, + "step": 75950 + }, + { + "epoch": 13.54, + "learning_rate": 4.323038516405136e-05, + "loss": 0.0064, + "step": 75960 + }, + { + "epoch": 13.55, + "learning_rate": 4.3229493580599145e-05, + "loss": 0.0037, + "step": 75970 + }, + { + "epoch": 13.55, + "learning_rate": 4.3228601997146936e-05, + "loss": 0.002, + "step": 75980 + }, + { + "epoch": 13.55, + "learning_rate": 4.322771041369472e-05, + "loss": 0.0025, + "step": 75990 + }, + { + "epoch": 13.55, + "learning_rate": 4.322681883024251e-05, + "loss": 0.0034, + "step": 76000 + }, + { + "epoch": 13.55, + "learning_rate": 4.32259272467903e-05, + "loss": 0.0049, + "step": 76010 + }, + { + "epoch": 13.56, + "learning_rate": 4.3225035663338094e-05, + "loss": 0.0035, + "step": 76020 + }, + { + "epoch": 13.56, + "learning_rate": 4.322414407988588e-05, + "loss": 0.0034, + "step": 76030 + }, + { + "epoch": 13.56, + "learning_rate": 4.322325249643367e-05, + "loss": 0.004, + "step": 76040 + }, + { + "epoch": 13.56, + "learning_rate": 4.322236091298146e-05, + "loss": 0.003, + "step": 76050 + }, + { + "epoch": 13.56, + "learning_rate": 4.3221469329529245e-05, + "loss": 0.0017, + "step": 76060 + }, + { + "epoch": 13.56, + "learning_rate": 4.3220577746077036e-05, + "loss": 0.003, + "step": 76070 + }, + { + "epoch": 13.57, + "learning_rate": 4.321968616262482e-05, + "loss": 0.0023, + "step": 76080 + }, + { + "epoch": 13.57, + "learning_rate": 4.321879457917261e-05, + "loss": 0.002, + "step": 76090 + }, + { + "epoch": 13.57, + "learning_rate": 4.3217902995720396e-05, + "loss": 0.0041, + "step": 76100 + }, + { + "epoch": 13.57, + "learning_rate": 4.321701141226819e-05, + "loss": 0.003, + "step": 76110 + }, + { + "epoch": 13.57, + "learning_rate": 4.321611982881598e-05, + "loss": 0.0036, + "step": 76120 + }, + { + "epoch": 13.58, + "learning_rate": 4.321522824536377e-05, + "loss": 0.0033, + "step": 76130 + }, + { + "epoch": 13.58, + "learning_rate": 4.321433666191156e-05, + "loss": 0.0026, + "step": 76140 + }, + { + "epoch": 13.58, + "learning_rate": 4.3213445078459346e-05, + "loss": 0.0024, + "step": 76150 + }, + { + "epoch": 13.58, + "learning_rate": 4.321255349500714e-05, + "loss": 0.0044, + "step": 76160 + }, + { + "epoch": 13.58, + "learning_rate": 4.321166191155492e-05, + "loss": 0.0024, + "step": 76170 + }, + { + "epoch": 13.58, + "learning_rate": 4.321077032810271e-05, + "loss": 0.0027, + "step": 76180 + }, + { + "epoch": 13.59, + "learning_rate": 4.3209878744650504e-05, + "loss": 0.0031, + "step": 76190 + }, + { + "epoch": 13.59, + "learning_rate": 4.320898716119829e-05, + "loss": 0.0037, + "step": 76200 + }, + { + "epoch": 13.59, + "learning_rate": 4.320809557774608e-05, + "loss": 0.0029, + "step": 76210 + }, + { + "epoch": 13.59, + "learning_rate": 4.3207203994293864e-05, + "loss": 0.005, + "step": 76220 + }, + { + "epoch": 13.59, + "learning_rate": 4.320631241084166e-05, + "loss": 0.0029, + "step": 76230 + }, + { + "epoch": 13.59, + "learning_rate": 4.3205420827389446e-05, + "loss": 0.0051, + "step": 76240 + }, + { + "epoch": 13.6, + "learning_rate": 4.320452924393724e-05, + "loss": 0.0041, + "step": 76250 + }, + { + "epoch": 13.6, + "learning_rate": 4.320363766048502e-05, + "loss": 0.0021, + "step": 76260 + }, + { + "epoch": 13.6, + "learning_rate": 4.320274607703281e-05, + "loss": 0.0036, + "step": 76270 + }, + { + "epoch": 13.6, + "learning_rate": 4.3201854493580604e-05, + "loss": 0.004, + "step": 76280 + }, + { + "epoch": 13.6, + "learning_rate": 4.320096291012839e-05, + "loss": 0.0035, + "step": 76290 + }, + { + "epoch": 13.61, + "learning_rate": 4.320007132667618e-05, + "loss": 0.0024, + "step": 76300 + }, + { + "epoch": 13.61, + "learning_rate": 4.3199179743223964e-05, + "loss": 0.0028, + "step": 76310 + }, + { + "epoch": 13.61, + "learning_rate": 4.3198288159771756e-05, + "loss": 0.0032, + "step": 76320 + }, + { + "epoch": 13.61, + "learning_rate": 4.319739657631954e-05, + "loss": 0.0026, + "step": 76330 + }, + { + "epoch": 13.61, + "learning_rate": 4.319650499286734e-05, + "loss": 0.0025, + "step": 76340 + }, + { + "epoch": 13.61, + "learning_rate": 4.319561340941512e-05, + "loss": 0.0024, + "step": 76350 + }, + { + "epoch": 13.62, + "learning_rate": 4.3194721825962914e-05, + "loss": 0.0021, + "step": 76360 + }, + { + "epoch": 13.62, + "learning_rate": 4.3193830242510705e-05, + "loss": 0.0034, + "step": 76370 + }, + { + "epoch": 13.62, + "learning_rate": 4.319293865905849e-05, + "loss": 0.0027, + "step": 76380 + }, + { + "epoch": 13.62, + "learning_rate": 4.319204707560628e-05, + "loss": 0.0061, + "step": 76390 + }, + { + "epoch": 13.62, + "learning_rate": 4.3191155492154065e-05, + "loss": 0.005, + "step": 76400 + }, + { + "epoch": 13.63, + "learning_rate": 4.3190263908701856e-05, + "loss": 0.0031, + "step": 76410 + }, + { + "epoch": 13.63, + "learning_rate": 4.318937232524965e-05, + "loss": 0.0038, + "step": 76420 + }, + { + "epoch": 13.63, + "learning_rate": 4.318848074179743e-05, + "loss": 0.0015, + "step": 76430 + }, + { + "epoch": 13.63, + "learning_rate": 4.318758915834522e-05, + "loss": 0.0026, + "step": 76440 + }, + { + "epoch": 13.63, + "learning_rate": 4.3186697574893014e-05, + "loss": 0.0045, + "step": 76450 + }, + { + "epoch": 13.63, + "learning_rate": 4.3185805991440805e-05, + "loss": 0.0047, + "step": 76460 + }, + { + "epoch": 13.64, + "learning_rate": 4.318491440798859e-05, + "loss": 0.0029, + "step": 76470 + }, + { + "epoch": 13.64, + "learning_rate": 4.318402282453638e-05, + "loss": 0.0033, + "step": 76480 + }, + { + "epoch": 13.64, + "learning_rate": 4.3183131241084166e-05, + "loss": 0.0018, + "step": 76490 + }, + { + "epoch": 13.64, + "learning_rate": 4.318223965763196e-05, + "loss": 0.0025, + "step": 76500 + }, + { + "epoch": 13.64, + "learning_rate": 4.318134807417975e-05, + "loss": 0.0016, + "step": 76510 + }, + { + "epoch": 13.64, + "learning_rate": 4.318045649072753e-05, + "loss": 0.0051, + "step": 76520 + }, + { + "epoch": 13.65, + "learning_rate": 4.3179564907275324e-05, + "loss": 0.0075, + "step": 76530 + }, + { + "epoch": 13.65, + "learning_rate": 4.317867332382311e-05, + "loss": 0.0046, + "step": 76540 + }, + { + "epoch": 13.65, + "learning_rate": 4.31777817403709e-05, + "loss": 0.0049, + "step": 76550 + }, + { + "epoch": 13.65, + "learning_rate": 4.317689015691869e-05, + "loss": 0.0028, + "step": 76560 + }, + { + "epoch": 13.65, + "learning_rate": 4.317599857346648e-05, + "loss": 0.0051, + "step": 76570 + }, + { + "epoch": 13.66, + "learning_rate": 4.3175106990014266e-05, + "loss": 0.0035, + "step": 76580 + }, + { + "epoch": 13.66, + "learning_rate": 4.317421540656206e-05, + "loss": 0.0024, + "step": 76590 + }, + { + "epoch": 13.66, + "learning_rate": 4.317332382310985e-05, + "loss": 0.0036, + "step": 76600 + }, + { + "epoch": 13.66, + "learning_rate": 4.317243223965763e-05, + "loss": 0.0032, + "step": 76610 + }, + { + "epoch": 13.66, + "learning_rate": 4.3171540656205424e-05, + "loss": 0.0018, + "step": 76620 + }, + { + "epoch": 13.66, + "learning_rate": 4.317064907275321e-05, + "loss": 0.0024, + "step": 76630 + }, + { + "epoch": 13.67, + "learning_rate": 4.3169757489301e-05, + "loss": 0.0045, + "step": 76640 + }, + { + "epoch": 13.67, + "learning_rate": 4.316886590584879e-05, + "loss": 0.0007, + "step": 76650 + }, + { + "epoch": 13.67, + "learning_rate": 4.3167974322396575e-05, + "loss": 0.0031, + "step": 76660 + }, + { + "epoch": 13.67, + "learning_rate": 4.316708273894437e-05, + "loss": 0.0035, + "step": 76670 + }, + { + "epoch": 13.67, + "learning_rate": 4.316619115549216e-05, + "loss": 0.0051, + "step": 76680 + }, + { + "epoch": 13.68, + "learning_rate": 4.316529957203995e-05, + "loss": 0.0037, + "step": 76690 + }, + { + "epoch": 13.68, + "learning_rate": 4.3164407988587733e-05, + "loss": 0.0031, + "step": 76700 + }, + { + "epoch": 13.68, + "learning_rate": 4.3163516405135525e-05, + "loss": 0.005, + "step": 76710 + }, + { + "epoch": 13.68, + "learning_rate": 4.316262482168331e-05, + "loss": 0.0015, + "step": 76720 + }, + { + "epoch": 13.68, + "learning_rate": 4.3161822396576324e-05, + "loss": 0.0038, + "step": 76730 + }, + { + "epoch": 13.68, + "learning_rate": 4.3160930813124115e-05, + "loss": 0.0022, + "step": 76740 + }, + { + "epoch": 13.69, + "learning_rate": 4.31600392296719e-05, + "loss": 0.0029, + "step": 76750 + }, + { + "epoch": 13.69, + "learning_rate": 4.315914764621969e-05, + "loss": 0.0021, + "step": 76760 + }, + { + "epoch": 13.69, + "learning_rate": 4.3158256062767475e-05, + "loss": 0.0021, + "step": 76770 + }, + { + "epoch": 13.69, + "learning_rate": 4.3157364479315266e-05, + "loss": 0.0053, + "step": 76780 + }, + { + "epoch": 13.69, + "learning_rate": 4.315647289586305e-05, + "loss": 0.0033, + "step": 76790 + }, + { + "epoch": 13.69, + "learning_rate": 4.315558131241084e-05, + "loss": 0.0031, + "step": 76800 + }, + { + "epoch": 13.7, + "learning_rate": 4.315468972895863e-05, + "loss": 0.0051, + "step": 76810 + }, + { + "epoch": 13.7, + "learning_rate": 4.315379814550642e-05, + "loss": 0.0024, + "step": 76820 + }, + { + "epoch": 13.7, + "learning_rate": 4.3152906562054216e-05, + "loss": 0.0025, + "step": 76830 + }, + { + "epoch": 13.7, + "learning_rate": 4.3152014978602e-05, + "loss": 0.0027, + "step": 76840 + }, + { + "epoch": 13.7, + "learning_rate": 4.315112339514979e-05, + "loss": 0.003, + "step": 76850 + }, + { + "epoch": 13.71, + "learning_rate": 4.3150231811697576e-05, + "loss": 0.0046, + "step": 76860 + }, + { + "epoch": 13.71, + "learning_rate": 4.314934022824537e-05, + "loss": 0.0043, + "step": 76870 + }, + { + "epoch": 13.71, + "learning_rate": 4.314844864479315e-05, + "loss": 0.0032, + "step": 76880 + }, + { + "epoch": 13.71, + "learning_rate": 4.314755706134094e-05, + "loss": 0.0032, + "step": 76890 + }, + { + "epoch": 13.71, + "learning_rate": 4.3146665477888734e-05, + "loss": 0.0044, + "step": 76900 + }, + { + "epoch": 13.71, + "learning_rate": 4.314577389443652e-05, + "loss": 0.0022, + "step": 76910 + }, + { + "epoch": 13.72, + "learning_rate": 4.314488231098431e-05, + "loss": 0.0037, + "step": 76920 + }, + { + "epoch": 13.72, + "learning_rate": 4.3143990727532094e-05, + "loss": 0.0018, + "step": 76930 + }, + { + "epoch": 13.72, + "learning_rate": 4.314309914407989e-05, + "loss": 0.004, + "step": 76940 + }, + { + "epoch": 13.72, + "learning_rate": 4.3142207560627676e-05, + "loss": 0.0035, + "step": 76950 + }, + { + "epoch": 13.72, + "learning_rate": 4.314131597717547e-05, + "loss": 0.0049, + "step": 76960 + }, + { + "epoch": 13.73, + "learning_rate": 4.314042439372326e-05, + "loss": 0.0027, + "step": 76970 + }, + { + "epoch": 13.73, + "learning_rate": 4.313953281027104e-05, + "loss": 0.0031, + "step": 76980 + }, + { + "epoch": 13.73, + "learning_rate": 4.3138641226818834e-05, + "loss": 0.0065, + "step": 76990 + }, + { + "epoch": 13.73, + "learning_rate": 4.313774964336662e-05, + "loss": 0.0038, + "step": 77000 + }, + { + "epoch": 13.73, + "learning_rate": 4.313685805991441e-05, + "loss": 0.0024, + "step": 77010 + }, + { + "epoch": 13.73, + "learning_rate": 4.3135966476462194e-05, + "loss": 0.0036, + "step": 77020 + }, + { + "epoch": 13.74, + "learning_rate": 4.3135074893009986e-05, + "loss": 0.0048, + "step": 77030 + }, + { + "epoch": 13.74, + "learning_rate": 4.313418330955778e-05, + "loss": 0.0051, + "step": 77040 + }, + { + "epoch": 13.74, + "learning_rate": 4.313329172610557e-05, + "loss": 0.0025, + "step": 77050 + }, + { + "epoch": 13.74, + "learning_rate": 4.313240014265336e-05, + "loss": 0.0022, + "step": 77060 + }, + { + "epoch": 13.74, + "learning_rate": 4.3131508559201144e-05, + "loss": 0.0036, + "step": 77070 + }, + { + "epoch": 13.74, + "learning_rate": 4.3130616975748935e-05, + "loss": 0.0028, + "step": 77080 + }, + { + "epoch": 13.75, + "learning_rate": 4.312972539229672e-05, + "loss": 0.0018, + "step": 77090 + }, + { + "epoch": 13.75, + "learning_rate": 4.312883380884451e-05, + "loss": 0.0039, + "step": 77100 + }, + { + "epoch": 13.75, + "learning_rate": 4.3127942225392295e-05, + "loss": 0.0028, + "step": 77110 + }, + { + "epoch": 13.75, + "learning_rate": 4.3127050641940086e-05, + "loss": 0.0051, + "step": 77120 + }, + { + "epoch": 13.75, + "learning_rate": 4.312615905848788e-05, + "loss": 0.0049, + "step": 77130 + }, + { + "epoch": 13.76, + "learning_rate": 4.312526747503566e-05, + "loss": 0.0045, + "step": 77140 + }, + { + "epoch": 13.76, + "learning_rate": 4.312437589158345e-05, + "loss": 0.0037, + "step": 77150 + }, + { + "epoch": 13.76, + "learning_rate": 4.3123484308131244e-05, + "loss": 0.0029, + "step": 77160 + }, + { + "epoch": 13.76, + "learning_rate": 4.3122592724679036e-05, + "loss": 0.0033, + "step": 77170 + }, + { + "epoch": 13.76, + "learning_rate": 4.312170114122682e-05, + "loss": 0.0039, + "step": 77180 + }, + { + "epoch": 13.76, + "learning_rate": 4.312080955777461e-05, + "loss": 0.0016, + "step": 77190 + }, + { + "epoch": 13.77, + "learning_rate": 4.31199179743224e-05, + "loss": 0.004, + "step": 77200 + }, + { + "epoch": 13.77, + "learning_rate": 4.311902639087019e-05, + "loss": 0.0029, + "step": 77210 + }, + { + "epoch": 13.77, + "learning_rate": 4.311813480741798e-05, + "loss": 0.0032, + "step": 77220 + }, + { + "epoch": 13.77, + "learning_rate": 4.311724322396576e-05, + "loss": 0.0035, + "step": 77230 + }, + { + "epoch": 13.77, + "learning_rate": 4.3116351640513554e-05, + "loss": 0.0055, + "step": 77240 + }, + { + "epoch": 13.77, + "learning_rate": 4.311546005706134e-05, + "loss": 0.0033, + "step": 77250 + }, + { + "epoch": 13.78, + "learning_rate": 4.311456847360913e-05, + "loss": 0.0074, + "step": 77260 + }, + { + "epoch": 13.78, + "learning_rate": 4.311367689015692e-05, + "loss": 0.0013, + "step": 77270 + }, + { + "epoch": 13.78, + "learning_rate": 4.311278530670471e-05, + "loss": 0.0035, + "step": 77280 + }, + { + "epoch": 13.78, + "learning_rate": 4.31118937232525e-05, + "loss": 0.0014, + "step": 77290 + }, + { + "epoch": 13.78, + "learning_rate": 4.311100213980029e-05, + "loss": 0.0026, + "step": 77300 + }, + { + "epoch": 13.79, + "learning_rate": 4.311011055634808e-05, + "loss": 0.0033, + "step": 77310 + }, + { + "epoch": 13.79, + "learning_rate": 4.310921897289586e-05, + "loss": 0.0049, + "step": 77320 + }, + { + "epoch": 13.79, + "learning_rate": 4.3108327389443654e-05, + "loss": 0.003, + "step": 77330 + }, + { + "epoch": 13.79, + "learning_rate": 4.310743580599144e-05, + "loss": 0.0032, + "step": 77340 + }, + { + "epoch": 13.79, + "learning_rate": 4.310654422253923e-05, + "loss": 0.0032, + "step": 77350 + }, + { + "epoch": 13.79, + "learning_rate": 4.310565263908702e-05, + "loss": 0.004, + "step": 77360 + }, + { + "epoch": 13.8, + "learning_rate": 4.3104761055634806e-05, + "loss": 0.0058, + "step": 77370 + }, + { + "epoch": 13.8, + "learning_rate": 4.3103869472182604e-05, + "loss": 0.0022, + "step": 77380 + }, + { + "epoch": 13.8, + "learning_rate": 4.310297788873039e-05, + "loss": 0.0037, + "step": 77390 + }, + { + "epoch": 13.8, + "learning_rate": 4.310208630527818e-05, + "loss": 0.0033, + "step": 77400 + }, + { + "epoch": 13.8, + "learning_rate": 4.3101194721825964e-05, + "loss": 0.0029, + "step": 77410 + }, + { + "epoch": 13.81, + "learning_rate": 4.3100303138373755e-05, + "loss": 0.0037, + "step": 77420 + }, + { + "epoch": 13.81, + "learning_rate": 4.3099411554921546e-05, + "loss": 0.0024, + "step": 77430 + }, + { + "epoch": 13.81, + "learning_rate": 4.309851997146933e-05, + "loss": 0.0025, + "step": 77440 + }, + { + "epoch": 13.81, + "learning_rate": 4.309762838801712e-05, + "loss": 0.0033, + "step": 77450 + }, + { + "epoch": 13.81, + "learning_rate": 4.3096736804564906e-05, + "loss": 0.0035, + "step": 77460 + }, + { + "epoch": 13.81, + "learning_rate": 4.30958452211127e-05, + "loss": 0.0025, + "step": 77470 + }, + { + "epoch": 13.82, + "learning_rate": 4.309495363766048e-05, + "loss": 0.0026, + "step": 77480 + }, + { + "epoch": 13.82, + "learning_rate": 4.309406205420828e-05, + "loss": 0.005, + "step": 77490 + }, + { + "epoch": 13.82, + "learning_rate": 4.3093170470756064e-05, + "loss": 0.0023, + "step": 77500 + }, + { + "epoch": 13.82, + "learning_rate": 4.3092278887303855e-05, + "loss": 0.0073, + "step": 77510 + }, + { + "epoch": 13.82, + "learning_rate": 4.3091387303851647e-05, + "loss": 0.0019, + "step": 77520 + }, + { + "epoch": 13.82, + "learning_rate": 4.309049572039943e-05, + "loss": 0.0012, + "step": 77530 + }, + { + "epoch": 13.83, + "learning_rate": 4.308960413694722e-05, + "loss": 0.003, + "step": 77540 + }, + { + "epoch": 13.83, + "learning_rate": 4.308871255349501e-05, + "loss": 0.003, + "step": 77550 + }, + { + "epoch": 13.83, + "learning_rate": 4.30878209700428e-05, + "loss": 0.0033, + "step": 77560 + }, + { + "epoch": 13.83, + "learning_rate": 4.308692938659058e-05, + "loss": 0.0034, + "step": 77570 + }, + { + "epoch": 13.83, + "learning_rate": 4.3086037803138374e-05, + "loss": 0.0036, + "step": 77580 + }, + { + "epoch": 13.84, + "learning_rate": 4.3085146219686165e-05, + "loss": 0.0047, + "step": 77590 + }, + { + "epoch": 13.84, + "learning_rate": 4.3084254636233956e-05, + "loss": 0.0018, + "step": 77600 + }, + { + "epoch": 13.84, + "learning_rate": 4.308336305278175e-05, + "loss": 0.0029, + "step": 77610 + }, + { + "epoch": 13.84, + "learning_rate": 4.308247146932953e-05, + "loss": 0.0031, + "step": 77620 + }, + { + "epoch": 13.84, + "learning_rate": 4.308157988587732e-05, + "loss": 0.004, + "step": 77630 + }, + { + "epoch": 13.84, + "learning_rate": 4.308068830242511e-05, + "loss": 0.0045, + "step": 77640 + }, + { + "epoch": 13.85, + "learning_rate": 4.30797967189729e-05, + "loss": 0.0041, + "step": 77650 + }, + { + "epoch": 13.85, + "learning_rate": 4.307890513552069e-05, + "loss": 0.0042, + "step": 77660 + }, + { + "epoch": 13.85, + "learning_rate": 4.3078013552068474e-05, + "loss": 0.0036, + "step": 77670 + }, + { + "epoch": 13.85, + "learning_rate": 4.3077121968616265e-05, + "loss": 0.0035, + "step": 77680 + }, + { + "epoch": 13.85, + "learning_rate": 4.307623038516405e-05, + "loss": 0.0031, + "step": 77690 + }, + { + "epoch": 13.86, + "learning_rate": 4.307533880171184e-05, + "loss": 0.0054, + "step": 77700 + }, + { + "epoch": 13.86, + "learning_rate": 4.307444721825963e-05, + "loss": 0.0022, + "step": 77710 + }, + { + "epoch": 13.86, + "learning_rate": 4.307355563480742e-05, + "loss": 0.0032, + "step": 77720 + }, + { + "epoch": 13.86, + "learning_rate": 4.307266405135521e-05, + "loss": 0.0041, + "step": 77730 + }, + { + "epoch": 13.86, + "learning_rate": 4.3071772467903e-05, + "loss": 0.0034, + "step": 77740 + }, + { + "epoch": 13.86, + "learning_rate": 4.307088088445079e-05, + "loss": 0.0038, + "step": 77750 + }, + { + "epoch": 13.87, + "learning_rate": 4.3069989300998575e-05, + "loss": 0.0027, + "step": 77760 + }, + { + "epoch": 13.87, + "learning_rate": 4.3069097717546366e-05, + "loss": 0.0031, + "step": 77770 + }, + { + "epoch": 13.87, + "learning_rate": 4.306820613409415e-05, + "loss": 0.0029, + "step": 77780 + }, + { + "epoch": 13.87, + "learning_rate": 4.306731455064194e-05, + "loss": 0.0028, + "step": 77790 + }, + { + "epoch": 13.87, + "learning_rate": 4.3066422967189726e-05, + "loss": 0.0024, + "step": 77800 + }, + { + "epoch": 13.87, + "learning_rate": 4.306553138373752e-05, + "loss": 0.0029, + "step": 77810 + }, + { + "epoch": 13.88, + "learning_rate": 4.306463980028531e-05, + "loss": 0.0033, + "step": 77820 + }, + { + "epoch": 13.88, + "learning_rate": 4.30637482168331e-05, + "loss": 0.0031, + "step": 77830 + }, + { + "epoch": 13.88, + "learning_rate": 4.306285663338089e-05, + "loss": 0.0032, + "step": 77840 + }, + { + "epoch": 13.88, + "learning_rate": 4.3061965049928675e-05, + "loss": 0.004, + "step": 77850 + }, + { + "epoch": 13.88, + "learning_rate": 4.3061073466476466e-05, + "loss": 0.0043, + "step": 77860 + }, + { + "epoch": 13.89, + "learning_rate": 4.306018188302425e-05, + "loss": 0.0033, + "step": 77870 + }, + { + "epoch": 13.89, + "learning_rate": 4.305929029957204e-05, + "loss": 0.0036, + "step": 77880 + }, + { + "epoch": 13.89, + "learning_rate": 4.305839871611983e-05, + "loss": 0.002, + "step": 77890 + }, + { + "epoch": 13.89, + "learning_rate": 4.305750713266762e-05, + "loss": 0.0047, + "step": 77900 + }, + { + "epoch": 13.89, + "learning_rate": 4.305661554921541e-05, + "loss": 0.0043, + "step": 77910 + }, + { + "epoch": 13.89, + "learning_rate": 4.305572396576319e-05, + "loss": 0.003, + "step": 77920 + }, + { + "epoch": 13.9, + "learning_rate": 4.305483238231099e-05, + "loss": 0.0055, + "step": 77930 + }, + { + "epoch": 13.9, + "learning_rate": 4.3053940798858776e-05, + "loss": 0.0035, + "step": 77940 + }, + { + "epoch": 13.9, + "learning_rate": 4.305304921540657e-05, + "loss": 0.003, + "step": 77950 + }, + { + "epoch": 13.9, + "learning_rate": 4.305215763195435e-05, + "loss": 0.006, + "step": 77960 + }, + { + "epoch": 13.9, + "learning_rate": 4.305126604850214e-05, + "loss": 0.0059, + "step": 77970 + }, + { + "epoch": 13.91, + "learning_rate": 4.3050374465049934e-05, + "loss": 0.0043, + "step": 77980 + }, + { + "epoch": 13.91, + "learning_rate": 4.304948288159772e-05, + "loss": 0.004, + "step": 77990 + }, + { + "epoch": 13.91, + "learning_rate": 4.304859129814551e-05, + "loss": 0.0056, + "step": 78000 + }, + { + "epoch": 13.91, + "learning_rate": 4.3047699714693294e-05, + "loss": 0.0043, + "step": 78010 + }, + { + "epoch": 13.91, + "learning_rate": 4.3046808131241085e-05, + "loss": 0.0048, + "step": 78020 + }, + { + "epoch": 13.91, + "learning_rate": 4.304591654778887e-05, + "loss": 0.0027, + "step": 78030 + }, + { + "epoch": 13.92, + "learning_rate": 4.304502496433667e-05, + "loss": 0.0039, + "step": 78040 + }, + { + "epoch": 13.92, + "learning_rate": 4.304413338088445e-05, + "loss": 0.0025, + "step": 78050 + }, + { + "epoch": 13.92, + "learning_rate": 4.304324179743224e-05, + "loss": 0.0022, + "step": 78060 + }, + { + "epoch": 13.92, + "learning_rate": 4.3042350213980034e-05, + "loss": 0.0033, + "step": 78070 + }, + { + "epoch": 13.92, + "learning_rate": 4.304145863052782e-05, + "loss": 0.0037, + "step": 78080 + }, + { + "epoch": 13.92, + "learning_rate": 4.304056704707561e-05, + "loss": 0.0029, + "step": 78090 + }, + { + "epoch": 13.93, + "learning_rate": 4.3039675463623394e-05, + "loss": 0.0028, + "step": 78100 + }, + { + "epoch": 13.93, + "learning_rate": 4.3038783880171186e-05, + "loss": 0.0026, + "step": 78110 + }, + { + "epoch": 13.93, + "learning_rate": 4.303789229671898e-05, + "loss": 0.0021, + "step": 78120 + }, + { + "epoch": 13.93, + "learning_rate": 4.303700071326676e-05, + "loss": 0.0065, + "step": 78130 + }, + { + "epoch": 13.93, + "learning_rate": 4.303610912981455e-05, + "loss": 0.0033, + "step": 78140 + }, + { + "epoch": 13.94, + "learning_rate": 4.3035217546362344e-05, + "loss": 0.0022, + "step": 78150 + }, + { + "epoch": 13.94, + "learning_rate": 4.3034325962910135e-05, + "loss": 0.0038, + "step": 78160 + }, + { + "epoch": 13.94, + "learning_rate": 4.303343437945792e-05, + "loss": 0.0043, + "step": 78170 + }, + { + "epoch": 13.94, + "learning_rate": 4.303254279600571e-05, + "loss": 0.0037, + "step": 78180 + }, + { + "epoch": 13.94, + "learning_rate": 4.3031651212553495e-05, + "loss": 0.0037, + "step": 78190 + }, + { + "epoch": 13.94, + "learning_rate": 4.3030759629101286e-05, + "loss": 0.0037, + "step": 78200 + }, + { + "epoch": 13.95, + "learning_rate": 4.302986804564908e-05, + "loss": 0.0035, + "step": 78210 + }, + { + "epoch": 13.95, + "learning_rate": 4.302897646219686e-05, + "loss": 0.0044, + "step": 78220 + }, + { + "epoch": 13.95, + "learning_rate": 4.302808487874465e-05, + "loss": 0.0035, + "step": 78230 + }, + { + "epoch": 13.95, + "learning_rate": 4.302719329529244e-05, + "loss": 0.0026, + "step": 78240 + }, + { + "epoch": 13.95, + "learning_rate": 4.302630171184023e-05, + "loss": 0.0029, + "step": 78250 + }, + { + "epoch": 13.96, + "learning_rate": 4.302541012838802e-05, + "loss": 0.0023, + "step": 78260 + }, + { + "epoch": 13.96, + "learning_rate": 4.302451854493581e-05, + "loss": 0.0022, + "step": 78270 + }, + { + "epoch": 13.96, + "learning_rate": 4.3023626961483596e-05, + "loss": 0.002, + "step": 78280 + }, + { + "epoch": 13.96, + "learning_rate": 4.302273537803139e-05, + "loss": 0.0041, + "step": 78290 + }, + { + "epoch": 13.96, + "learning_rate": 4.302184379457918e-05, + "loss": 0.0041, + "step": 78300 + }, + { + "epoch": 13.96, + "learning_rate": 4.302095221112696e-05, + "loss": 0.002, + "step": 78310 + }, + { + "epoch": 13.97, + "learning_rate": 4.3020060627674754e-05, + "loss": 0.006, + "step": 78320 + }, + { + "epoch": 13.97, + "learning_rate": 4.301916904422254e-05, + "loss": 0.0021, + "step": 78330 + }, + { + "epoch": 13.97, + "learning_rate": 4.301827746077033e-05, + "loss": 0.0025, + "step": 78340 + }, + { + "epoch": 13.97, + "learning_rate": 4.3017385877318114e-05, + "loss": 0.0029, + "step": 78350 + }, + { + "epoch": 13.97, + "learning_rate": 4.3016494293865905e-05, + "loss": 0.0019, + "step": 78360 + }, + { + "epoch": 13.97, + "learning_rate": 4.3015602710413696e-05, + "loss": 0.0029, + "step": 78370 + }, + { + "epoch": 13.98, + "learning_rate": 4.301471112696149e-05, + "loss": 0.0025, + "step": 78380 + }, + { + "epoch": 13.98, + "learning_rate": 4.301381954350928e-05, + "loss": 0.0017, + "step": 78390 + }, + { + "epoch": 13.98, + "learning_rate": 4.301292796005706e-05, + "loss": 0.0035, + "step": 78400 + }, + { + "epoch": 13.98, + "learning_rate": 4.3012036376604854e-05, + "loss": 0.003, + "step": 78410 + }, + { + "epoch": 13.98, + "learning_rate": 4.301114479315264e-05, + "loss": 0.0027, + "step": 78420 + }, + { + "epoch": 13.99, + "learning_rate": 4.301025320970043e-05, + "loss": 0.0028, + "step": 78430 + }, + { + "epoch": 13.99, + "learning_rate": 4.300936162624822e-05, + "loss": 0.0028, + "step": 78440 + }, + { + "epoch": 13.99, + "learning_rate": 4.3008470042796005e-05, + "loss": 0.0024, + "step": 78450 + }, + { + "epoch": 13.99, + "learning_rate": 4.30075784593438e-05, + "loss": 0.002, + "step": 78460 + }, + { + "epoch": 13.99, + "learning_rate": 4.300668687589158e-05, + "loss": 0.0025, + "step": 78470 + }, + { + "epoch": 13.99, + "learning_rate": 4.300579529243938e-05, + "loss": 0.003, + "step": 78480 + }, + { + "epoch": 14.0, + "learning_rate": 4.3004903708987164e-05, + "loss": 0.0058, + "step": 78490 + }, + { + "epoch": 14.0, + "learning_rate": 4.3004012125534955e-05, + "loss": 0.0039, + "step": 78500 + }, + { + "epoch": 14.0, + "learning_rate": 4.300312054208274e-05, + "loss": 0.0038, + "step": 78510 + }, + { + "epoch": 14.0, + "eval_loss": 0.019745901226997375, + "eval_runtime": 196.3873, + "eval_samples_per_second": 23.622, + "eval_steps_per_second": 2.953, + "step": 78512 + }, + { + "epoch": 14.0, + "learning_rate": 4.300222895863053e-05, + "loss": 0.0024, + "step": 78520 + }, + { + "epoch": 14.0, + "learning_rate": 4.300133737517832e-05, + "loss": 0.0034, + "step": 78530 + }, + { + "epoch": 14.0, + "learning_rate": 4.3000445791726106e-05, + "loss": 0.0031, + "step": 78540 + }, + { + "epoch": 14.01, + "learning_rate": 4.29995542082739e-05, + "loss": 0.0048, + "step": 78550 + }, + { + "epoch": 14.01, + "learning_rate": 4.299866262482168e-05, + "loss": 0.0019, + "step": 78560 + }, + { + "epoch": 14.01, + "learning_rate": 4.299777104136947e-05, + "loss": 0.0021, + "step": 78570 + }, + { + "epoch": 14.01, + "learning_rate": 4.299687945791726e-05, + "loss": 0.0039, + "step": 78580 + }, + { + "epoch": 14.01, + "learning_rate": 4.2995987874465055e-05, + "loss": 0.0021, + "step": 78590 + }, + { + "epoch": 14.02, + "learning_rate": 4.299509629101284e-05, + "loss": 0.0026, + "step": 78600 + }, + { + "epoch": 14.02, + "learning_rate": 4.299420470756063e-05, + "loss": 0.0024, + "step": 78610 + }, + { + "epoch": 14.02, + "learning_rate": 4.299331312410842e-05, + "loss": 0.0034, + "step": 78620 + }, + { + "epoch": 14.02, + "learning_rate": 4.2992421540656207e-05, + "loss": 0.0022, + "step": 78630 + }, + { + "epoch": 14.02, + "learning_rate": 4.2991529957204e-05, + "loss": 0.0031, + "step": 78640 + }, + { + "epoch": 14.02, + "learning_rate": 4.299063837375178e-05, + "loss": 0.0023, + "step": 78650 + }, + { + "epoch": 14.03, + "learning_rate": 4.2989746790299573e-05, + "loss": 0.0011, + "step": 78660 + }, + { + "epoch": 14.03, + "learning_rate": 4.2988855206847365e-05, + "loss": 0.004, + "step": 78670 + }, + { + "epoch": 14.03, + "learning_rate": 4.298796362339515e-05, + "loss": 0.0029, + "step": 78680 + }, + { + "epoch": 14.03, + "learning_rate": 4.298707203994294e-05, + "loss": 0.0021, + "step": 78690 + }, + { + "epoch": 14.03, + "learning_rate": 4.298618045649073e-05, + "loss": 0.0019, + "step": 78700 + }, + { + "epoch": 14.04, + "learning_rate": 4.298528887303852e-05, + "loss": 0.0041, + "step": 78710 + }, + { + "epoch": 14.04, + "learning_rate": 4.298439728958631e-05, + "loss": 0.0025, + "step": 78720 + }, + { + "epoch": 14.04, + "learning_rate": 4.29835057061341e-05, + "loss": 0.0043, + "step": 78730 + }, + { + "epoch": 14.04, + "learning_rate": 4.298261412268188e-05, + "loss": 0.0027, + "step": 78740 + }, + { + "epoch": 14.04, + "learning_rate": 4.2981722539229674e-05, + "loss": 0.004, + "step": 78750 + }, + { + "epoch": 14.04, + "learning_rate": 4.2980830955777465e-05, + "loss": 0.002, + "step": 78760 + }, + { + "epoch": 14.05, + "learning_rate": 4.297993937232525e-05, + "loss": 0.0046, + "step": 78770 + }, + { + "epoch": 14.05, + "learning_rate": 4.297904778887304e-05, + "loss": 0.0037, + "step": 78780 + }, + { + "epoch": 14.05, + "learning_rate": 4.2978156205420825e-05, + "loss": 0.0028, + "step": 78790 + }, + { + "epoch": 14.05, + "learning_rate": 4.2977264621968617e-05, + "loss": 0.0029, + "step": 78800 + }, + { + "epoch": 14.05, + "learning_rate": 4.297637303851641e-05, + "loss": 0.002, + "step": 78810 + }, + { + "epoch": 14.05, + "learning_rate": 4.29754814550642e-05, + "loss": 0.003, + "step": 78820 + }, + { + "epoch": 14.06, + "learning_rate": 4.297458987161198e-05, + "loss": 0.0022, + "step": 78830 + }, + { + "epoch": 14.06, + "learning_rate": 4.2973698288159775e-05, + "loss": 0.0018, + "step": 78840 + }, + { + "epoch": 14.06, + "learning_rate": 4.2972806704707566e-05, + "loss": 0.0034, + "step": 78850 + }, + { + "epoch": 14.06, + "learning_rate": 4.297191512125535e-05, + "loss": 0.0017, + "step": 78860 + }, + { + "epoch": 14.06, + "learning_rate": 4.297102353780314e-05, + "loss": 0.0022, + "step": 78870 + }, + { + "epoch": 14.07, + "learning_rate": 4.2970131954350926e-05, + "loss": 0.0027, + "step": 78880 + }, + { + "epoch": 14.07, + "learning_rate": 4.296924037089872e-05, + "loss": 0.0044, + "step": 78890 + }, + { + "epoch": 14.07, + "learning_rate": 4.296834878744651e-05, + "loss": 0.0025, + "step": 78900 + }, + { + "epoch": 14.07, + "learning_rate": 4.296745720399429e-05, + "loss": 0.0029, + "step": 78910 + }, + { + "epoch": 14.07, + "learning_rate": 4.296656562054209e-05, + "loss": 0.0018, + "step": 78920 + }, + { + "epoch": 14.07, + "learning_rate": 4.2965674037089875e-05, + "loss": 0.0025, + "step": 78930 + }, + { + "epoch": 14.08, + "learning_rate": 4.2964782453637666e-05, + "loss": 0.0032, + "step": 78940 + }, + { + "epoch": 14.08, + "learning_rate": 4.296389087018545e-05, + "loss": 0.003, + "step": 78950 + }, + { + "epoch": 14.08, + "learning_rate": 4.296299928673324e-05, + "loss": 0.0015, + "step": 78960 + }, + { + "epoch": 14.08, + "learning_rate": 4.2962107703281026e-05, + "loss": 0.0024, + "step": 78970 + }, + { + "epoch": 14.08, + "learning_rate": 4.296121611982882e-05, + "loss": 0.0026, + "step": 78980 + }, + { + "epoch": 14.09, + "learning_rate": 4.296032453637661e-05, + "loss": 0.0017, + "step": 78990 + }, + { + "epoch": 14.09, + "learning_rate": 4.295943295292439e-05, + "loss": 0.0055, + "step": 79000 + }, + { + "epoch": 14.09, + "learning_rate": 4.2958541369472184e-05, + "loss": 0.0058, + "step": 79010 + }, + { + "epoch": 14.09, + "learning_rate": 4.295764978601997e-05, + "loss": 0.0015, + "step": 79020 + }, + { + "epoch": 14.09, + "learning_rate": 4.295675820256777e-05, + "loss": 0.0026, + "step": 79030 + }, + { + "epoch": 14.09, + "learning_rate": 4.295586661911555e-05, + "loss": 0.0033, + "step": 79040 + }, + { + "epoch": 14.1, + "learning_rate": 4.295497503566334e-05, + "loss": 0.0031, + "step": 79050 + }, + { + "epoch": 14.1, + "learning_rate": 4.295408345221113e-05, + "loss": 0.0014, + "step": 79060 + }, + { + "epoch": 14.1, + "learning_rate": 4.295319186875892e-05, + "loss": 0.0019, + "step": 79070 + }, + { + "epoch": 14.1, + "learning_rate": 4.295230028530671e-05, + "loss": 0.0032, + "step": 79080 + }, + { + "epoch": 14.1, + "learning_rate": 4.2951408701854494e-05, + "loss": 0.0042, + "step": 79090 + }, + { + "epoch": 14.1, + "learning_rate": 4.2950517118402285e-05, + "loss": 0.002, + "step": 79100 + }, + { + "epoch": 14.11, + "learning_rate": 4.294962553495007e-05, + "loss": 0.0023, + "step": 79110 + }, + { + "epoch": 14.11, + "learning_rate": 4.294873395149786e-05, + "loss": 0.003, + "step": 79120 + }, + { + "epoch": 14.11, + "learning_rate": 4.294784236804565e-05, + "loss": 0.0055, + "step": 79130 + }, + { + "epoch": 14.11, + "learning_rate": 4.294695078459344e-05, + "loss": 0.0059, + "step": 79140 + }, + { + "epoch": 14.11, + "learning_rate": 4.2946059201141234e-05, + "loss": 0.0023, + "step": 79150 + }, + { + "epoch": 14.12, + "learning_rate": 4.294516761768902e-05, + "loss": 0.0017, + "step": 79160 + }, + { + "epoch": 14.12, + "learning_rate": 4.294427603423681e-05, + "loss": 0.002, + "step": 79170 + }, + { + "epoch": 14.12, + "learning_rate": 4.2943384450784594e-05, + "loss": 0.0041, + "step": 79180 + }, + { + "epoch": 14.12, + "learning_rate": 4.2942492867332386e-05, + "loss": 0.0036, + "step": 79190 + }, + { + "epoch": 14.12, + "learning_rate": 4.294160128388017e-05, + "loss": 0.0043, + "step": 79200 + }, + { + "epoch": 14.12, + "learning_rate": 4.294070970042796e-05, + "loss": 0.0031, + "step": 79210 + }, + { + "epoch": 14.13, + "learning_rate": 4.293981811697575e-05, + "loss": 0.0046, + "step": 79220 + }, + { + "epoch": 14.13, + "learning_rate": 4.293892653352354e-05, + "loss": 0.002, + "step": 79230 + }, + { + "epoch": 14.13, + "learning_rate": 4.293803495007133e-05, + "loss": 0.0035, + "step": 79240 + }, + { + "epoch": 14.13, + "learning_rate": 4.293714336661912e-05, + "loss": 0.0033, + "step": 79250 + }, + { + "epoch": 14.13, + "learning_rate": 4.293625178316691e-05, + "loss": 0.0027, + "step": 79260 + }, + { + "epoch": 14.14, + "learning_rate": 4.2935360199714695e-05, + "loss": 0.0021, + "step": 79270 + }, + { + "epoch": 14.14, + "learning_rate": 4.2934468616262486e-05, + "loss": 0.0016, + "step": 79280 + }, + { + "epoch": 14.14, + "learning_rate": 4.293357703281027e-05, + "loss": 0.0039, + "step": 79290 + }, + { + "epoch": 14.14, + "learning_rate": 4.293268544935806e-05, + "loss": 0.0076, + "step": 79300 + }, + { + "epoch": 14.14, + "learning_rate": 4.293179386590585e-05, + "loss": 0.0021, + "step": 79310 + }, + { + "epoch": 14.14, + "learning_rate": 4.293090228245364e-05, + "loss": 0.0042, + "step": 79320 + }, + { + "epoch": 14.15, + "learning_rate": 4.293001069900143e-05, + "loss": 0.0064, + "step": 79330 + }, + { + "epoch": 14.15, + "learning_rate": 4.292911911554921e-05, + "loss": 0.0038, + "step": 79340 + }, + { + "epoch": 14.15, + "learning_rate": 4.2928227532097004e-05, + "loss": 0.0057, + "step": 79350 + }, + { + "epoch": 14.15, + "learning_rate": 4.2927335948644796e-05, + "loss": 0.0024, + "step": 79360 + }, + { + "epoch": 14.15, + "learning_rate": 4.292644436519259e-05, + "loss": 0.0027, + "step": 79370 + }, + { + "epoch": 14.15, + "learning_rate": 4.292555278174038e-05, + "loss": 0.003, + "step": 79380 + }, + { + "epoch": 14.16, + "learning_rate": 4.292466119828816e-05, + "loss": 0.0015, + "step": 79390 + }, + { + "epoch": 14.16, + "learning_rate": 4.2923769614835954e-05, + "loss": 0.0035, + "step": 79400 + }, + { + "epoch": 14.16, + "learning_rate": 4.292287803138374e-05, + "loss": 0.0024, + "step": 79410 + }, + { + "epoch": 14.16, + "learning_rate": 4.292198644793153e-05, + "loss": 0.0018, + "step": 79420 + }, + { + "epoch": 14.16, + "learning_rate": 4.2921094864479314e-05, + "loss": 0.0033, + "step": 79430 + }, + { + "epoch": 14.17, + "learning_rate": 4.2920203281027105e-05, + "loss": 0.0039, + "step": 79440 + }, + { + "epoch": 14.17, + "learning_rate": 4.2919311697574896e-05, + "loss": 0.0021, + "step": 79450 + }, + { + "epoch": 14.17, + "learning_rate": 4.291842011412268e-05, + "loss": 0.0039, + "step": 79460 + }, + { + "epoch": 14.17, + "learning_rate": 4.291752853067048e-05, + "loss": 0.0033, + "step": 79470 + }, + { + "epoch": 14.17, + "learning_rate": 4.291663694721826e-05, + "loss": 0.0025, + "step": 79480 + }, + { + "epoch": 14.17, + "learning_rate": 4.2915745363766054e-05, + "loss": 0.0039, + "step": 79490 + }, + { + "epoch": 14.18, + "learning_rate": 4.291485378031384e-05, + "loss": 0.0022, + "step": 79500 + }, + { + "epoch": 14.18, + "learning_rate": 4.291396219686163e-05, + "loss": 0.0069, + "step": 79510 + }, + { + "epoch": 14.18, + "learning_rate": 4.2913070613409414e-05, + "loss": 0.0036, + "step": 79520 + }, + { + "epoch": 14.18, + "learning_rate": 4.2912179029957205e-05, + "loss": 0.0043, + "step": 79530 + }, + { + "epoch": 14.18, + "learning_rate": 4.2911287446505e-05, + "loss": 0.0021, + "step": 79540 + }, + { + "epoch": 14.19, + "learning_rate": 4.291039586305278e-05, + "loss": 0.0033, + "step": 79550 + }, + { + "epoch": 14.19, + "learning_rate": 4.290950427960057e-05, + "loss": 0.003, + "step": 79560 + }, + { + "epoch": 14.19, + "learning_rate": 4.290861269614836e-05, + "loss": 0.0015, + "step": 79570 + }, + { + "epoch": 14.19, + "learning_rate": 4.2907721112696155e-05, + "loss": 0.0031, + "step": 79580 + }, + { + "epoch": 14.19, + "learning_rate": 4.290682952924394e-05, + "loss": 0.0029, + "step": 79590 + }, + { + "epoch": 14.19, + "learning_rate": 4.290593794579173e-05, + "loss": 0.0027, + "step": 79600 + }, + { + "epoch": 14.2, + "learning_rate": 4.290504636233952e-05, + "loss": 0.0032, + "step": 79610 + }, + { + "epoch": 14.2, + "learning_rate": 4.2904154778887306e-05, + "loss": 0.004, + "step": 79620 + }, + { + "epoch": 14.2, + "learning_rate": 4.29032631954351e-05, + "loss": 0.0025, + "step": 79630 + }, + { + "epoch": 14.2, + "learning_rate": 4.290237161198288e-05, + "loss": 0.0025, + "step": 79640 + }, + { + "epoch": 14.2, + "learning_rate": 4.290148002853067e-05, + "loss": 0.0023, + "step": 79650 + }, + { + "epoch": 14.2, + "learning_rate": 4.290058844507846e-05, + "loss": 0.0025, + "step": 79660 + }, + { + "epoch": 14.21, + "learning_rate": 4.289969686162625e-05, + "loss": 0.0028, + "step": 79670 + }, + { + "epoch": 14.21, + "learning_rate": 4.289880527817404e-05, + "loss": 0.0043, + "step": 79680 + }, + { + "epoch": 14.21, + "learning_rate": 4.289791369472183e-05, + "loss": 0.0012, + "step": 79690 + }, + { + "epoch": 14.21, + "learning_rate": 4.289702211126962e-05, + "loss": 0.0034, + "step": 79700 + }, + { + "epoch": 14.21, + "learning_rate": 4.2896130527817407e-05, + "loss": 0.0025, + "step": 79710 + }, + { + "epoch": 14.22, + "learning_rate": 4.28952389443652e-05, + "loss": 0.0034, + "step": 79720 + }, + { + "epoch": 14.22, + "learning_rate": 4.289434736091298e-05, + "loss": 0.0028, + "step": 79730 + }, + { + "epoch": 14.22, + "learning_rate": 4.2893455777460773e-05, + "loss": 0.0041, + "step": 79740 + }, + { + "epoch": 14.22, + "learning_rate": 4.289256419400856e-05, + "loss": 0.0029, + "step": 79750 + }, + { + "epoch": 14.22, + "learning_rate": 4.289167261055635e-05, + "loss": 0.0022, + "step": 79760 + }, + { + "epoch": 14.22, + "learning_rate": 4.289078102710414e-05, + "loss": 0.0033, + "step": 79770 + }, + { + "epoch": 14.23, + "learning_rate": 4.2889889443651925e-05, + "loss": 0.0058, + "step": 79780 + }, + { + "epoch": 14.23, + "learning_rate": 4.2888997860199716e-05, + "loss": 0.0028, + "step": 79790 + }, + { + "epoch": 14.23, + "learning_rate": 4.288810627674751e-05, + "loss": 0.0035, + "step": 79800 + }, + { + "epoch": 14.23, + "learning_rate": 4.28872146932953e-05, + "loss": 0.0031, + "step": 79810 + }, + { + "epoch": 14.23, + "learning_rate": 4.288632310984308e-05, + "loss": 0.0037, + "step": 79820 + }, + { + "epoch": 14.24, + "learning_rate": 4.2885431526390874e-05, + "loss": 0.003, + "step": 79830 + }, + { + "epoch": 14.24, + "learning_rate": 4.2884539942938665e-05, + "loss": 0.0038, + "step": 79840 + }, + { + "epoch": 14.24, + "learning_rate": 4.288364835948645e-05, + "loss": 0.0061, + "step": 79850 + }, + { + "epoch": 14.24, + "learning_rate": 4.288275677603424e-05, + "loss": 0.0025, + "step": 79860 + }, + { + "epoch": 14.24, + "learning_rate": 4.2881865192582025e-05, + "loss": 0.0027, + "step": 79870 + }, + { + "epoch": 14.24, + "learning_rate": 4.2880973609129816e-05, + "loss": 0.0074, + "step": 79880 + }, + { + "epoch": 14.25, + "learning_rate": 4.28800820256776e-05, + "loss": 0.0021, + "step": 79890 + }, + { + "epoch": 14.25, + "learning_rate": 4.287919044222539e-05, + "loss": 0.003, + "step": 79900 + }, + { + "epoch": 14.25, + "learning_rate": 4.287829885877318e-05, + "loss": 0.0014, + "step": 79910 + }, + { + "epoch": 14.25, + "learning_rate": 4.2877407275320975e-05, + "loss": 0.0059, + "step": 79920 + }, + { + "epoch": 14.25, + "learning_rate": 4.2876515691868766e-05, + "loss": 0.0039, + "step": 79930 + }, + { + "epoch": 14.25, + "learning_rate": 4.287562410841655e-05, + "loss": 0.0031, + "step": 79940 + }, + { + "epoch": 14.26, + "learning_rate": 4.287473252496434e-05, + "loss": 0.0031, + "step": 79950 + }, + { + "epoch": 14.26, + "learning_rate": 4.2873840941512126e-05, + "loss": 0.0039, + "step": 79960 + }, + { + "epoch": 14.26, + "learning_rate": 4.287294935805992e-05, + "loss": 0.0037, + "step": 79970 + }, + { + "epoch": 14.26, + "learning_rate": 4.28720577746077e-05, + "loss": 0.0025, + "step": 79980 + }, + { + "epoch": 14.26, + "learning_rate": 4.287116619115549e-05, + "loss": 0.0038, + "step": 79990 + }, + { + "epoch": 14.27, + "learning_rate": 4.2870274607703284e-05, + "loss": 0.0046, + "step": 80000 + }, + { + "epoch": 14.27, + "learning_rate": 4.286938302425107e-05, + "loss": 0.0029, + "step": 80010 + }, + { + "epoch": 14.27, + "learning_rate": 4.2868491440798866e-05, + "loss": 0.0016, + "step": 80020 + }, + { + "epoch": 14.27, + "learning_rate": 4.286759985734665e-05, + "loss": 0.0061, + "step": 80030 + }, + { + "epoch": 14.27, + "learning_rate": 4.286670827389444e-05, + "loss": 0.0026, + "step": 80040 + }, + { + "epoch": 14.27, + "learning_rate": 4.2865816690442226e-05, + "loss": 0.004, + "step": 80050 + }, + { + "epoch": 14.28, + "learning_rate": 4.286492510699002e-05, + "loss": 0.002, + "step": 80060 + }, + { + "epoch": 14.28, + "learning_rate": 4.286403352353781e-05, + "loss": 0.003, + "step": 80070 + }, + { + "epoch": 14.28, + "learning_rate": 4.286314194008559e-05, + "loss": 0.0036, + "step": 80080 + }, + { + "epoch": 14.28, + "learning_rate": 4.2862250356633384e-05, + "loss": 0.0024, + "step": 80090 + }, + { + "epoch": 14.28, + "learning_rate": 4.286135877318117e-05, + "loss": 0.0039, + "step": 80100 + }, + { + "epoch": 14.28, + "learning_rate": 4.286046718972896e-05, + "loss": 0.003, + "step": 80110 + }, + { + "epoch": 14.29, + "learning_rate": 4.2859575606276745e-05, + "loss": 0.0053, + "step": 80120 + }, + { + "epoch": 14.29, + "learning_rate": 4.285868402282454e-05, + "loss": 0.0029, + "step": 80130 + }, + { + "epoch": 14.29, + "learning_rate": 4.285779243937233e-05, + "loss": 0.0026, + "step": 80140 + }, + { + "epoch": 14.29, + "learning_rate": 4.285690085592012e-05, + "loss": 0.0038, + "step": 80150 + }, + { + "epoch": 14.29, + "learning_rate": 4.285600927246791e-05, + "loss": 0.0045, + "step": 80160 + }, + { + "epoch": 14.3, + "learning_rate": 4.2855117689015694e-05, + "loss": 0.0024, + "step": 80170 + }, + { + "epoch": 14.3, + "learning_rate": 4.2854226105563485e-05, + "loss": 0.003, + "step": 80180 + }, + { + "epoch": 14.3, + "learning_rate": 4.285333452211127e-05, + "loss": 0.0045, + "step": 80190 + }, + { + "epoch": 14.3, + "learning_rate": 4.285244293865906e-05, + "loss": 0.0022, + "step": 80200 + }, + { + "epoch": 14.3, + "learning_rate": 4.2851551355206845e-05, + "loss": 0.0046, + "step": 80210 + }, + { + "epoch": 14.3, + "learning_rate": 4.2850659771754636e-05, + "loss": 0.0019, + "step": 80220 + }, + { + "epoch": 14.31, + "learning_rate": 4.284976818830243e-05, + "loss": 0.0028, + "step": 80230 + }, + { + "epoch": 14.31, + "learning_rate": 4.284887660485021e-05, + "loss": 0.003, + "step": 80240 + }, + { + "epoch": 14.31, + "learning_rate": 4.284798502139801e-05, + "loss": 0.0027, + "step": 80250 + }, + { + "epoch": 14.31, + "learning_rate": 4.2847093437945794e-05, + "loss": 0.0027, + "step": 80260 + }, + { + "epoch": 14.31, + "learning_rate": 4.2846201854493586e-05, + "loss": 0.0018, + "step": 80270 + }, + { + "epoch": 14.32, + "learning_rate": 4.284531027104137e-05, + "loss": 0.0032, + "step": 80280 + }, + { + "epoch": 14.32, + "learning_rate": 4.284441868758916e-05, + "loss": 0.0026, + "step": 80290 + }, + { + "epoch": 14.32, + "learning_rate": 4.2843527104136946e-05, + "loss": 0.0048, + "step": 80300 + }, + { + "epoch": 14.32, + "learning_rate": 4.284263552068474e-05, + "loss": 0.0034, + "step": 80310 + }, + { + "epoch": 14.32, + "learning_rate": 4.284174393723253e-05, + "loss": 0.0021, + "step": 80320 + }, + { + "epoch": 14.32, + "learning_rate": 4.284085235378031e-05, + "loss": 0.0034, + "step": 80330 + }, + { + "epoch": 14.33, + "learning_rate": 4.2839960770328104e-05, + "loss": 0.0027, + "step": 80340 + }, + { + "epoch": 14.33, + "learning_rate": 4.283906918687589e-05, + "loss": 0.0042, + "step": 80350 + }, + { + "epoch": 14.33, + "learning_rate": 4.2838177603423686e-05, + "loss": 0.0026, + "step": 80360 + }, + { + "epoch": 14.33, + "learning_rate": 4.283728601997147e-05, + "loss": 0.0044, + "step": 80370 + }, + { + "epoch": 14.33, + "learning_rate": 4.283639443651926e-05, + "loss": 0.0015, + "step": 80380 + }, + { + "epoch": 14.33, + "learning_rate": 4.283550285306705e-05, + "loss": 0.0067, + "step": 80390 + }, + { + "epoch": 14.34, + "learning_rate": 4.283461126961484e-05, + "loss": 0.0025, + "step": 80400 + }, + { + "epoch": 14.34, + "learning_rate": 4.283371968616263e-05, + "loss": 0.003, + "step": 80410 + }, + { + "epoch": 14.34, + "learning_rate": 4.283282810271041e-05, + "loss": 0.0025, + "step": 80420 + }, + { + "epoch": 14.34, + "learning_rate": 4.2831936519258204e-05, + "loss": 0.0017, + "step": 80430 + }, + { + "epoch": 14.34, + "learning_rate": 4.283104493580599e-05, + "loss": 0.0023, + "step": 80440 + }, + { + "epoch": 14.35, + "learning_rate": 4.283015335235378e-05, + "loss": 0.0043, + "step": 80450 + }, + { + "epoch": 14.35, + "learning_rate": 4.282926176890157e-05, + "loss": 0.0019, + "step": 80460 + }, + { + "epoch": 14.35, + "learning_rate": 4.282837018544936e-05, + "loss": 0.0035, + "step": 80470 + }, + { + "epoch": 14.35, + "learning_rate": 4.2827478601997154e-05, + "loss": 0.002, + "step": 80480 + }, + { + "epoch": 14.35, + "learning_rate": 4.282658701854494e-05, + "loss": 0.0037, + "step": 80490 + }, + { + "epoch": 14.35, + "learning_rate": 4.282569543509273e-05, + "loss": 0.0036, + "step": 80500 + }, + { + "epoch": 14.36, + "learning_rate": 4.2824803851640514e-05, + "loss": 0.0041, + "step": 80510 + }, + { + "epoch": 14.36, + "learning_rate": 4.2823912268188305e-05, + "loss": 0.0033, + "step": 80520 + }, + { + "epoch": 14.36, + "learning_rate": 4.282302068473609e-05, + "loss": 0.0038, + "step": 80530 + }, + { + "epoch": 14.36, + "learning_rate": 4.282212910128388e-05, + "loss": 0.0029, + "step": 80540 + }, + { + "epoch": 14.36, + "learning_rate": 4.282123751783167e-05, + "loss": 0.0028, + "step": 80550 + }, + { + "epoch": 14.37, + "learning_rate": 4.2820345934379456e-05, + "loss": 0.002, + "step": 80560 + }, + { + "epoch": 14.37, + "learning_rate": 4.281945435092725e-05, + "loss": 0.0029, + "step": 80570 + }, + { + "epoch": 14.37, + "learning_rate": 4.281856276747504e-05, + "loss": 0.0022, + "step": 80580 + }, + { + "epoch": 14.37, + "learning_rate": 4.281767118402283e-05, + "loss": 0.0021, + "step": 80590 + }, + { + "epoch": 14.37, + "learning_rate": 4.2816779600570614e-05, + "loss": 0.004, + "step": 80600 + }, + { + "epoch": 14.37, + "learning_rate": 4.2815888017118405e-05, + "loss": 0.0022, + "step": 80610 + }, + { + "epoch": 14.38, + "learning_rate": 4.28149964336662e-05, + "loss": 0.0035, + "step": 80620 + }, + { + "epoch": 14.38, + "learning_rate": 4.281410485021398e-05, + "loss": 0.0021, + "step": 80630 + }, + { + "epoch": 14.38, + "learning_rate": 4.281321326676177e-05, + "loss": 0.0028, + "step": 80640 + }, + { + "epoch": 14.38, + "learning_rate": 4.281232168330956e-05, + "loss": 0.0026, + "step": 80650 + }, + { + "epoch": 14.38, + "learning_rate": 4.281143009985735e-05, + "loss": 0.0024, + "step": 80660 + }, + { + "epoch": 14.38, + "learning_rate": 4.281053851640513e-05, + "loss": 0.005, + "step": 80670 + }, + { + "epoch": 14.39, + "learning_rate": 4.2809646932952924e-05, + "loss": 0.0021, + "step": 80680 + }, + { + "epoch": 14.39, + "learning_rate": 4.2808755349500715e-05, + "loss": 0.0019, + "step": 80690 + }, + { + "epoch": 14.39, + "learning_rate": 4.2807863766048506e-05, + "loss": 0.0015, + "step": 80700 + }, + { + "epoch": 14.39, + "learning_rate": 4.28069721825963e-05, + "loss": 0.0018, + "step": 80710 + }, + { + "epoch": 14.39, + "learning_rate": 4.280608059914408e-05, + "loss": 0.0023, + "step": 80720 + }, + { + "epoch": 14.4, + "learning_rate": 4.280518901569187e-05, + "loss": 0.0033, + "step": 80730 + }, + { + "epoch": 14.4, + "learning_rate": 4.280429743223966e-05, + "loss": 0.0012, + "step": 80740 + }, + { + "epoch": 14.4, + "learning_rate": 4.280340584878745e-05, + "loss": 0.0034, + "step": 80750 + }, + { + "epoch": 14.4, + "learning_rate": 4.280251426533523e-05, + "loss": 0.0057, + "step": 80760 + }, + { + "epoch": 14.4, + "learning_rate": 4.2801622681883024e-05, + "loss": 0.002, + "step": 80770 + }, + { + "epoch": 14.4, + "learning_rate": 4.2800731098430815e-05, + "loss": 0.0026, + "step": 80780 + }, + { + "epoch": 14.41, + "learning_rate": 4.27998395149786e-05, + "loss": 0.0038, + "step": 80790 + }, + { + "epoch": 14.41, + "learning_rate": 4.27989479315264e-05, + "loss": 0.0034, + "step": 80800 + }, + { + "epoch": 14.41, + "learning_rate": 4.279805634807418e-05, + "loss": 0.0028, + "step": 80810 + }, + { + "epoch": 14.41, + "learning_rate": 4.279716476462197e-05, + "loss": 0.0024, + "step": 80820 + }, + { + "epoch": 14.41, + "learning_rate": 4.279627318116976e-05, + "loss": 0.0015, + "step": 80830 + }, + { + "epoch": 14.42, + "learning_rate": 4.279538159771755e-05, + "loss": 0.0029, + "step": 80840 + }, + { + "epoch": 14.42, + "learning_rate": 4.279449001426534e-05, + "loss": 0.0022, + "step": 80850 + }, + { + "epoch": 14.42, + "learning_rate": 4.2793598430813125e-05, + "loss": 0.0013, + "step": 80860 + }, + { + "epoch": 14.42, + "learning_rate": 4.2792706847360916e-05, + "loss": 0.004, + "step": 80870 + }, + { + "epoch": 14.42, + "learning_rate": 4.27918152639087e-05, + "loss": 0.0054, + "step": 80880 + }, + { + "epoch": 14.42, + "learning_rate": 4.279092368045649e-05, + "loss": 0.0035, + "step": 80890 + }, + { + "epoch": 14.43, + "learning_rate": 4.2790032097004276e-05, + "loss": 0.0016, + "step": 80900 + }, + { + "epoch": 14.43, + "learning_rate": 4.2789140513552074e-05, + "loss": 0.0018, + "step": 80910 + }, + { + "epoch": 14.43, + "learning_rate": 4.278824893009986e-05, + "loss": 0.0012, + "step": 80920 + }, + { + "epoch": 14.43, + "learning_rate": 4.278735734664765e-05, + "loss": 0.0033, + "step": 80930 + }, + { + "epoch": 14.43, + "learning_rate": 4.278646576319544e-05, + "loss": 0.003, + "step": 80940 + }, + { + "epoch": 14.43, + "learning_rate": 4.2785574179743225e-05, + "loss": 0.0039, + "step": 80950 + }, + { + "epoch": 14.44, + "learning_rate": 4.2784682596291016e-05, + "loss": 0.0058, + "step": 80960 + }, + { + "epoch": 14.44, + "learning_rate": 4.27837910128388e-05, + "loss": 0.0022, + "step": 80970 + }, + { + "epoch": 14.44, + "learning_rate": 4.278289942938659e-05, + "loss": 0.0022, + "step": 80980 + }, + { + "epoch": 14.44, + "learning_rate": 4.2782007845934377e-05, + "loss": 0.002, + "step": 80990 + }, + { + "epoch": 14.44, + "learning_rate": 4.278111626248217e-05, + "loss": 0.0066, + "step": 81000 + }, + { + "epoch": 14.45, + "learning_rate": 4.278022467902996e-05, + "loss": 0.0016, + "step": 81010 + }, + { + "epoch": 14.45, + "learning_rate": 4.277933309557775e-05, + "loss": 0.004, + "step": 81020 + }, + { + "epoch": 14.45, + "learning_rate": 4.277844151212554e-05, + "loss": 0.0043, + "step": 81030 + }, + { + "epoch": 14.45, + "learning_rate": 4.2777549928673326e-05, + "loss": 0.0043, + "step": 81040 + }, + { + "epoch": 14.45, + "learning_rate": 4.277665834522112e-05, + "loss": 0.0036, + "step": 81050 + }, + { + "epoch": 14.45, + "learning_rate": 4.27757667617689e-05, + "loss": 0.0035, + "step": 81060 + }, + { + "epoch": 14.46, + "learning_rate": 4.277487517831669e-05, + "loss": 0.0041, + "step": 81070 + }, + { + "epoch": 14.46, + "learning_rate": 4.2773983594864484e-05, + "loss": 0.0023, + "step": 81080 + }, + { + "epoch": 14.46, + "learning_rate": 4.277309201141227e-05, + "loss": 0.0024, + "step": 81090 + }, + { + "epoch": 14.46, + "learning_rate": 4.277220042796006e-05, + "loss": 0.0052, + "step": 81100 + }, + { + "epoch": 14.46, + "learning_rate": 4.2771308844507844e-05, + "loss": 0.0033, + "step": 81110 + }, + { + "epoch": 14.47, + "learning_rate": 4.2770417261055635e-05, + "loss": 0.0036, + "step": 81120 + }, + { + "epoch": 14.47, + "learning_rate": 4.2769525677603426e-05, + "loss": 0.004, + "step": 81130 + }, + { + "epoch": 14.47, + "learning_rate": 4.276863409415122e-05, + "loss": 0.0032, + "step": 81140 + }, + { + "epoch": 14.47, + "learning_rate": 4.2767742510699e-05, + "loss": 0.002, + "step": 81150 + }, + { + "epoch": 14.47, + "learning_rate": 4.276685092724679e-05, + "loss": 0.0044, + "step": 81160 + }, + { + "epoch": 14.47, + "learning_rate": 4.2765959343794584e-05, + "loss": 0.0017, + "step": 81170 + }, + { + "epoch": 14.48, + "learning_rate": 4.276506776034237e-05, + "loss": 0.0029, + "step": 81180 + }, + { + "epoch": 14.48, + "learning_rate": 4.276417617689016e-05, + "loss": 0.0045, + "step": 81190 + }, + { + "epoch": 14.48, + "learning_rate": 4.2763284593437944e-05, + "loss": 0.0031, + "step": 81200 + }, + { + "epoch": 14.48, + "learning_rate": 4.2762393009985736e-05, + "loss": 0.0017, + "step": 81210 + }, + { + "epoch": 14.48, + "learning_rate": 4.276150142653352e-05, + "loss": 0.003, + "step": 81220 + }, + { + "epoch": 14.48, + "learning_rate": 4.276060984308131e-05, + "loss": 0.0038, + "step": 81230 + }, + { + "epoch": 14.49, + "learning_rate": 4.27597182596291e-05, + "loss": 0.003, + "step": 81240 + }, + { + "epoch": 14.49, + "learning_rate": 4.2758826676176894e-05, + "loss": 0.0031, + "step": 81250 + }, + { + "epoch": 14.49, + "learning_rate": 4.2757935092724685e-05, + "loss": 0.003, + "step": 81260 + }, + { + "epoch": 14.49, + "learning_rate": 4.275704350927247e-05, + "loss": 0.0038, + "step": 81270 + }, + { + "epoch": 14.49, + "learning_rate": 4.275615192582026e-05, + "loss": 0.0053, + "step": 81280 + }, + { + "epoch": 14.5, + "learning_rate": 4.2755260342368045e-05, + "loss": 0.0035, + "step": 81290 + }, + { + "epoch": 14.5, + "learning_rate": 4.2754368758915836e-05, + "loss": 0.0036, + "step": 81300 + }, + { + "epoch": 14.5, + "learning_rate": 4.275347717546363e-05, + "loss": 0.0038, + "step": 81310 + }, + { + "epoch": 14.5, + "learning_rate": 4.275258559201141e-05, + "loss": 0.0021, + "step": 81320 + }, + { + "epoch": 14.5, + "learning_rate": 4.27516940085592e-05, + "loss": 0.0035, + "step": 81330 + }, + { + "epoch": 14.5, + "learning_rate": 4.275080242510699e-05, + "loss": 0.0022, + "step": 81340 + }, + { + "epoch": 14.51, + "learning_rate": 4.2749910841654786e-05, + "loss": 0.0027, + "step": 81350 + }, + { + "epoch": 14.51, + "learning_rate": 4.274901925820257e-05, + "loss": 0.0031, + "step": 81360 + }, + { + "epoch": 14.51, + "learning_rate": 4.274812767475036e-05, + "loss": 0.0031, + "step": 81370 + }, + { + "epoch": 14.51, + "learning_rate": 4.2747236091298146e-05, + "loss": 0.0026, + "step": 81380 + }, + { + "epoch": 14.51, + "learning_rate": 4.274634450784594e-05, + "loss": 0.0031, + "step": 81390 + }, + { + "epoch": 14.51, + "learning_rate": 4.274545292439373e-05, + "loss": 0.0041, + "step": 81400 + }, + { + "epoch": 14.52, + "learning_rate": 4.274456134094151e-05, + "loss": 0.0033, + "step": 81410 + }, + { + "epoch": 14.52, + "learning_rate": 4.2743669757489304e-05, + "loss": 0.0025, + "step": 81420 + }, + { + "epoch": 14.52, + "learning_rate": 4.274277817403709e-05, + "loss": 0.0056, + "step": 81430 + }, + { + "epoch": 14.52, + "learning_rate": 4.274188659058488e-05, + "loss": 0.0016, + "step": 81440 + }, + { + "epoch": 14.52, + "learning_rate": 4.2740995007132664e-05, + "loss": 0.0032, + "step": 81450 + }, + { + "epoch": 14.53, + "learning_rate": 4.274010342368046e-05, + "loss": 0.0015, + "step": 81460 + }, + { + "epoch": 14.53, + "learning_rate": 4.2739211840228246e-05, + "loss": 0.0027, + "step": 81470 + }, + { + "epoch": 14.53, + "learning_rate": 4.273832025677604e-05, + "loss": 0.0013, + "step": 81480 + }, + { + "epoch": 14.53, + "learning_rate": 4.273742867332383e-05, + "loss": 0.0017, + "step": 81490 + }, + { + "epoch": 14.53, + "learning_rate": 4.273653708987161e-05, + "loss": 0.0023, + "step": 81500 + }, + { + "epoch": 14.53, + "learning_rate": 4.2735645506419404e-05, + "loss": 0.0027, + "step": 81510 + }, + { + "epoch": 14.54, + "learning_rate": 4.273475392296719e-05, + "loss": 0.0065, + "step": 81520 + }, + { + "epoch": 14.54, + "learning_rate": 4.273386233951498e-05, + "loss": 0.0014, + "step": 81530 + }, + { + "epoch": 14.54, + "learning_rate": 4.273297075606277e-05, + "loss": 0.003, + "step": 81540 + }, + { + "epoch": 14.54, + "learning_rate": 4.2732079172610556e-05, + "loss": 0.0028, + "step": 81550 + }, + { + "epoch": 14.54, + "learning_rate": 4.273118758915835e-05, + "loss": 0.0027, + "step": 81560 + }, + { + "epoch": 14.55, + "learning_rate": 4.273029600570614e-05, + "loss": 0.0027, + "step": 81570 + }, + { + "epoch": 14.55, + "learning_rate": 4.272940442225393e-05, + "loss": 0.0029, + "step": 81580 + }, + { + "epoch": 14.55, + "learning_rate": 4.2728512838801714e-05, + "loss": 0.0029, + "step": 81590 + }, + { + "epoch": 14.55, + "learning_rate": 4.2727621255349505e-05, + "loss": 0.0031, + "step": 81600 + }, + { + "epoch": 14.55, + "learning_rate": 4.272672967189729e-05, + "loss": 0.0022, + "step": 81610 + }, + { + "epoch": 14.55, + "learning_rate": 4.272583808844508e-05, + "loss": 0.0029, + "step": 81620 + }, + { + "epoch": 14.56, + "learning_rate": 4.272494650499287e-05, + "loss": 0.0022, + "step": 81630 + }, + { + "epoch": 14.56, + "learning_rate": 4.2724054921540656e-05, + "loss": 0.0012, + "step": 81640 + }, + { + "epoch": 14.56, + "learning_rate": 4.272316333808845e-05, + "loss": 0.007, + "step": 81650 + }, + { + "epoch": 14.56, + "learning_rate": 4.272227175463623e-05, + "loss": 0.0022, + "step": 81660 + }, + { + "epoch": 14.56, + "learning_rate": 4.272138017118402e-05, + "loss": 0.0055, + "step": 81670 + }, + { + "epoch": 14.56, + "learning_rate": 4.2720488587731814e-05, + "loss": 0.0014, + "step": 81680 + }, + { + "epoch": 14.57, + "learning_rate": 4.2719597004279605e-05, + "loss": 0.0026, + "step": 81690 + }, + { + "epoch": 14.57, + "learning_rate": 4.271870542082739e-05, + "loss": 0.0024, + "step": 81700 + }, + { + "epoch": 14.57, + "learning_rate": 4.271781383737518e-05, + "loss": 0.0035, + "step": 81710 + }, + { + "epoch": 14.57, + "learning_rate": 4.271692225392297e-05, + "loss": 0.003, + "step": 81720 + }, + { + "epoch": 14.57, + "learning_rate": 4.271603067047076e-05, + "loss": 0.0028, + "step": 81730 + }, + { + "epoch": 14.58, + "learning_rate": 4.271513908701855e-05, + "loss": 0.0023, + "step": 81740 + }, + { + "epoch": 14.58, + "learning_rate": 4.271424750356633e-05, + "loss": 0.0037, + "step": 81750 + }, + { + "epoch": 14.58, + "learning_rate": 4.2713355920114124e-05, + "loss": 0.0041, + "step": 81760 + }, + { + "epoch": 14.58, + "learning_rate": 4.2712464336661915e-05, + "loss": 0.0024, + "step": 81770 + }, + { + "epoch": 14.58, + "learning_rate": 4.27115727532097e-05, + "loss": 0.0071, + "step": 81780 + }, + { + "epoch": 14.58, + "learning_rate": 4.27106811697575e-05, + "loss": 0.0037, + "step": 81790 + }, + { + "epoch": 14.59, + "learning_rate": 4.270978958630528e-05, + "loss": 0.0046, + "step": 81800 + }, + { + "epoch": 14.59, + "learning_rate": 4.270889800285307e-05, + "loss": 0.0034, + "step": 81810 + }, + { + "epoch": 14.59, + "learning_rate": 4.270800641940086e-05, + "loss": 0.0028, + "step": 81820 + }, + { + "epoch": 14.59, + "learning_rate": 4.270711483594865e-05, + "loss": 0.0045, + "step": 81830 + }, + { + "epoch": 14.59, + "learning_rate": 4.270622325249643e-05, + "loss": 0.0057, + "step": 81840 + }, + { + "epoch": 14.6, + "learning_rate": 4.2705331669044224e-05, + "loss": 0.0028, + "step": 81850 + }, + { + "epoch": 14.6, + "learning_rate": 4.2704440085592015e-05, + "loss": 0.0025, + "step": 81860 + }, + { + "epoch": 14.6, + "learning_rate": 4.27035485021398e-05, + "loss": 0.0048, + "step": 81870 + }, + { + "epoch": 14.6, + "learning_rate": 4.270265691868759e-05, + "loss": 0.0013, + "step": 81880 + }, + { + "epoch": 14.6, + "learning_rate": 4.2701765335235375e-05, + "loss": 0.0023, + "step": 81890 + }, + { + "epoch": 14.6, + "learning_rate": 4.270087375178317e-05, + "loss": 0.0021, + "step": 81900 + }, + { + "epoch": 14.61, + "learning_rate": 4.269998216833096e-05, + "loss": 0.0045, + "step": 81910 + }, + { + "epoch": 14.61, + "learning_rate": 4.269909058487875e-05, + "loss": 0.0023, + "step": 81920 + }, + { + "epoch": 14.61, + "learning_rate": 4.2698199001426533e-05, + "loss": 0.0033, + "step": 81930 + }, + { + "epoch": 14.61, + "learning_rate": 4.2697307417974325e-05, + "loss": 0.0026, + "step": 81940 + }, + { + "epoch": 14.61, + "learning_rate": 4.2696415834522116e-05, + "loss": 0.0028, + "step": 81950 + }, + { + "epoch": 14.61, + "learning_rate": 4.26955242510699e-05, + "loss": 0.0031, + "step": 81960 + }, + { + "epoch": 14.62, + "learning_rate": 4.269463266761769e-05, + "loss": 0.0043, + "step": 81970 + }, + { + "epoch": 14.62, + "learning_rate": 4.2693741084165476e-05, + "loss": 0.0015, + "step": 81980 + }, + { + "epoch": 14.62, + "learning_rate": 4.269284950071327e-05, + "loss": 0.0027, + "step": 81990 + }, + { + "epoch": 14.62, + "learning_rate": 4.269195791726106e-05, + "loss": 0.0033, + "step": 82000 + }, + { + "epoch": 14.62, + "learning_rate": 4.269106633380885e-05, + "loss": 0.0032, + "step": 82010 + }, + { + "epoch": 14.63, + "learning_rate": 4.269017475035664e-05, + "loss": 0.002, + "step": 82020 + }, + { + "epoch": 14.63, + "learning_rate": 4.2689283166904425e-05, + "loss": 0.0033, + "step": 82030 + }, + { + "epoch": 14.63, + "learning_rate": 4.2688391583452216e-05, + "loss": 0.0017, + "step": 82040 + }, + { + "epoch": 14.63, + "learning_rate": 4.26875e-05, + "loss": 0.0038, + "step": 82050 + }, + { + "epoch": 14.63, + "learning_rate": 4.268660841654779e-05, + "loss": 0.0061, + "step": 82060 + }, + { + "epoch": 14.63, + "learning_rate": 4.2685716833095576e-05, + "loss": 0.0037, + "step": 82070 + }, + { + "epoch": 14.64, + "learning_rate": 4.268482524964337e-05, + "loss": 0.0034, + "step": 82080 + }, + { + "epoch": 14.64, + "learning_rate": 4.268393366619116e-05, + "loss": 0.0029, + "step": 82090 + }, + { + "epoch": 14.64, + "learning_rate": 4.268304208273894e-05, + "loss": 0.0026, + "step": 82100 + }, + { + "epoch": 14.64, + "learning_rate": 4.2682150499286735e-05, + "loss": 0.0024, + "step": 82110 + }, + { + "epoch": 14.64, + "learning_rate": 4.2681258915834526e-05, + "loss": 0.0035, + "step": 82120 + }, + { + "epoch": 14.65, + "learning_rate": 4.268036733238232e-05, + "loss": 0.0027, + "step": 82130 + }, + { + "epoch": 14.65, + "learning_rate": 4.26794757489301e-05, + "loss": 0.005, + "step": 82140 + }, + { + "epoch": 14.65, + "learning_rate": 4.267858416547789e-05, + "loss": 0.0025, + "step": 82150 + }, + { + "epoch": 14.65, + "learning_rate": 4.267769258202568e-05, + "loss": 0.0013, + "step": 82160 + }, + { + "epoch": 14.65, + "learning_rate": 4.267680099857347e-05, + "loss": 0.0026, + "step": 82170 + }, + { + "epoch": 14.65, + "learning_rate": 4.267590941512126e-05, + "loss": 0.0036, + "step": 82180 + }, + { + "epoch": 14.66, + "learning_rate": 4.2675017831669044e-05, + "loss": 0.0014, + "step": 82190 + }, + { + "epoch": 14.66, + "learning_rate": 4.2674126248216835e-05, + "loss": 0.0024, + "step": 82200 + }, + { + "epoch": 14.66, + "learning_rate": 4.267323466476462e-05, + "loss": 0.0022, + "step": 82210 + }, + { + "epoch": 14.66, + "learning_rate": 4.267234308131241e-05, + "loss": 0.0026, + "step": 82220 + }, + { + "epoch": 14.66, + "learning_rate": 4.26714514978602e-05, + "loss": 0.0033, + "step": 82230 + }, + { + "epoch": 14.66, + "learning_rate": 4.267055991440799e-05, + "loss": 0.0025, + "step": 82240 + }, + { + "epoch": 14.67, + "learning_rate": 4.266966833095578e-05, + "loss": 0.0028, + "step": 82250 + }, + { + "epoch": 14.67, + "learning_rate": 4.266877674750357e-05, + "loss": 0.0028, + "step": 82260 + }, + { + "epoch": 14.67, + "learning_rate": 4.266788516405136e-05, + "loss": 0.0041, + "step": 82270 + }, + { + "epoch": 14.67, + "learning_rate": 4.2666993580599144e-05, + "loss": 0.0048, + "step": 82280 + }, + { + "epoch": 14.67, + "learning_rate": 4.2666101997146936e-05, + "loss": 0.0036, + "step": 82290 + }, + { + "epoch": 14.68, + "learning_rate": 4.266521041369472e-05, + "loss": 0.0035, + "step": 82300 + }, + { + "epoch": 14.68, + "learning_rate": 4.266431883024251e-05, + "loss": 0.0053, + "step": 82310 + }, + { + "epoch": 14.68, + "learning_rate": 4.26634272467903e-05, + "loss": 0.0057, + "step": 82320 + }, + { + "epoch": 14.68, + "learning_rate": 4.266253566333809e-05, + "loss": 0.0032, + "step": 82330 + }, + { + "epoch": 14.68, + "learning_rate": 4.2661644079885885e-05, + "loss": 0.0042, + "step": 82340 + }, + { + "epoch": 14.68, + "learning_rate": 4.266075249643367e-05, + "loss": 0.0037, + "step": 82350 + }, + { + "epoch": 14.69, + "learning_rate": 4.265986091298146e-05, + "loss": 0.0012, + "step": 82360 + }, + { + "epoch": 14.69, + "learning_rate": 4.2658969329529245e-05, + "loss": 0.0027, + "step": 82370 + }, + { + "epoch": 14.69, + "learning_rate": 4.2658077746077036e-05, + "loss": 0.0033, + "step": 82380 + }, + { + "epoch": 14.69, + "learning_rate": 4.265718616262482e-05, + "loss": 0.0021, + "step": 82390 + }, + { + "epoch": 14.69, + "learning_rate": 4.265629457917261e-05, + "loss": 0.0032, + "step": 82400 + }, + { + "epoch": 14.7, + "learning_rate": 4.26554029957204e-05, + "loss": 0.0054, + "step": 82410 + }, + { + "epoch": 14.7, + "learning_rate": 4.265451141226819e-05, + "loss": 0.0024, + "step": 82420 + }, + { + "epoch": 14.7, + "learning_rate": 4.265361982881598e-05, + "loss": 0.003, + "step": 82430 + }, + { + "epoch": 14.7, + "learning_rate": 4.265272824536376e-05, + "loss": 0.0024, + "step": 82440 + }, + { + "epoch": 14.7, + "learning_rate": 4.265183666191156e-05, + "loss": 0.003, + "step": 82450 + }, + { + "epoch": 14.7, + "learning_rate": 4.2650945078459346e-05, + "loss": 0.0037, + "step": 82460 + }, + { + "epoch": 14.71, + "learning_rate": 4.265005349500714e-05, + "loss": 0.0026, + "step": 82470 + }, + { + "epoch": 14.71, + "learning_rate": 4.264916191155492e-05, + "loss": 0.0014, + "step": 82480 + }, + { + "epoch": 14.71, + "learning_rate": 4.264827032810271e-05, + "loss": 0.0045, + "step": 82490 + }, + { + "epoch": 14.71, + "learning_rate": 4.2647378744650504e-05, + "loss": 0.0037, + "step": 82500 + }, + { + "epoch": 14.71, + "learning_rate": 4.264648716119829e-05, + "loss": 0.0034, + "step": 82510 + }, + { + "epoch": 14.71, + "learning_rate": 4.264559557774608e-05, + "loss": 0.0028, + "step": 82520 + }, + { + "epoch": 14.72, + "learning_rate": 4.2644703994293864e-05, + "loss": 0.0034, + "step": 82530 + }, + { + "epoch": 14.72, + "learning_rate": 4.2643812410841655e-05, + "loss": 0.0022, + "step": 82540 + }, + { + "epoch": 14.72, + "learning_rate": 4.2642920827389446e-05, + "loss": 0.002, + "step": 82550 + }, + { + "epoch": 14.72, + "learning_rate": 4.264202924393724e-05, + "loss": 0.0029, + "step": 82560 + }, + { + "epoch": 14.72, + "learning_rate": 4.264113766048503e-05, + "loss": 0.0018, + "step": 82570 + }, + { + "epoch": 14.73, + "learning_rate": 4.264024607703281e-05, + "loss": 0.0017, + "step": 82580 + }, + { + "epoch": 14.73, + "learning_rate": 4.2639354493580604e-05, + "loss": 0.0045, + "step": 82590 + }, + { + "epoch": 14.73, + "learning_rate": 4.263846291012839e-05, + "loss": 0.0048, + "step": 82600 + }, + { + "epoch": 14.73, + "learning_rate": 4.263757132667618e-05, + "loss": 0.0037, + "step": 82610 + }, + { + "epoch": 14.73, + "learning_rate": 4.2636679743223964e-05, + "loss": 0.0027, + "step": 82620 + }, + { + "epoch": 14.73, + "learning_rate": 4.2635788159771755e-05, + "loss": 0.007, + "step": 82630 + }, + { + "epoch": 14.74, + "learning_rate": 4.263489657631955e-05, + "loss": 0.0025, + "step": 82640 + }, + { + "epoch": 14.74, + "learning_rate": 4.263400499286733e-05, + "loss": 0.0024, + "step": 82650 + }, + { + "epoch": 14.74, + "learning_rate": 4.263311340941512e-05, + "loss": 0.0046, + "step": 82660 + }, + { + "epoch": 14.74, + "learning_rate": 4.2632221825962914e-05, + "loss": 0.0015, + "step": 82670 + }, + { + "epoch": 14.74, + "learning_rate": 4.263141940085592e-05, + "loss": 0.0052, + "step": 82680 + }, + { + "epoch": 14.75, + "learning_rate": 4.2630527817403706e-05, + "loss": 0.0042, + "step": 82690 + }, + { + "epoch": 14.75, + "learning_rate": 4.26296362339515e-05, + "loss": 0.0032, + "step": 82700 + }, + { + "epoch": 14.75, + "learning_rate": 4.262874465049929e-05, + "loss": 0.0025, + "step": 82710 + }, + { + "epoch": 14.75, + "learning_rate": 4.262785306704708e-05, + "loss": 0.0047, + "step": 82720 + }, + { + "epoch": 14.75, + "learning_rate": 4.262696148359487e-05, + "loss": 0.0047, + "step": 82730 + }, + { + "epoch": 14.75, + "learning_rate": 4.2626069900142655e-05, + "loss": 0.0027, + "step": 82740 + }, + { + "epoch": 14.76, + "learning_rate": 4.2625178316690446e-05, + "loss": 0.0016, + "step": 82750 + }, + { + "epoch": 14.76, + "learning_rate": 4.262428673323823e-05, + "loss": 0.0038, + "step": 82760 + }, + { + "epoch": 14.76, + "learning_rate": 4.262339514978602e-05, + "loss": 0.0027, + "step": 82770 + }, + { + "epoch": 14.76, + "learning_rate": 4.262250356633381e-05, + "loss": 0.002, + "step": 82780 + }, + { + "epoch": 14.76, + "learning_rate": 4.26216119828816e-05, + "loss": 0.0071, + "step": 82790 + }, + { + "epoch": 14.76, + "learning_rate": 4.262072039942939e-05, + "loss": 0.0049, + "step": 82800 + }, + { + "epoch": 14.77, + "learning_rate": 4.2619828815977173e-05, + "loss": 0.0039, + "step": 82810 + }, + { + "epoch": 14.77, + "learning_rate": 4.2618937232524965e-05, + "loss": 0.0019, + "step": 82820 + }, + { + "epoch": 14.77, + "learning_rate": 4.2618045649072756e-05, + "loss": 0.0035, + "step": 82830 + }, + { + "epoch": 14.77, + "learning_rate": 4.261715406562055e-05, + "loss": 0.0035, + "step": 82840 + }, + { + "epoch": 14.77, + "learning_rate": 4.261626248216833e-05, + "loss": 0.0033, + "step": 82850 + }, + { + "epoch": 14.78, + "learning_rate": 4.261537089871612e-05, + "loss": 0.0042, + "step": 82860 + }, + { + "epoch": 14.78, + "learning_rate": 4.2614479315263914e-05, + "loss": 0.0051, + "step": 82870 + }, + { + "epoch": 14.78, + "learning_rate": 4.26135877318117e-05, + "loss": 0.0048, + "step": 82880 + }, + { + "epoch": 14.78, + "learning_rate": 4.261269614835949e-05, + "loss": 0.0037, + "step": 82890 + }, + { + "epoch": 14.78, + "learning_rate": 4.2611804564907274e-05, + "loss": 0.0033, + "step": 82900 + }, + { + "epoch": 14.78, + "learning_rate": 4.2610912981455065e-05, + "loss": 0.0029, + "step": 82910 + }, + { + "epoch": 14.79, + "learning_rate": 4.261002139800285e-05, + "loss": 0.0025, + "step": 82920 + }, + { + "epoch": 14.79, + "learning_rate": 4.260912981455064e-05, + "loss": 0.0018, + "step": 82930 + }, + { + "epoch": 14.79, + "learning_rate": 4.260823823109843e-05, + "loss": 0.003, + "step": 82940 + }, + { + "epoch": 14.79, + "learning_rate": 4.260734664764622e-05, + "loss": 0.0028, + "step": 82950 + }, + { + "epoch": 14.79, + "learning_rate": 4.2606455064194014e-05, + "loss": 0.0027, + "step": 82960 + }, + { + "epoch": 14.79, + "learning_rate": 4.26055634807418e-05, + "loss": 0.0061, + "step": 82970 + }, + { + "epoch": 14.8, + "learning_rate": 4.260467189728959e-05, + "loss": 0.0042, + "step": 82980 + }, + { + "epoch": 14.8, + "learning_rate": 4.2603780313837375e-05, + "loss": 0.0039, + "step": 82990 + }, + { + "epoch": 14.8, + "learning_rate": 4.2602888730385166e-05, + "loss": 0.0035, + "step": 83000 + }, + { + "epoch": 14.8, + "learning_rate": 4.260199714693296e-05, + "loss": 0.0041, + "step": 83010 + }, + { + "epoch": 14.8, + "learning_rate": 4.260110556348074e-05, + "loss": 0.0015, + "step": 83020 + }, + { + "epoch": 14.81, + "learning_rate": 4.260021398002853e-05, + "loss": 0.0011, + "step": 83030 + }, + { + "epoch": 14.81, + "learning_rate": 4.259932239657632e-05, + "loss": 0.0023, + "step": 83040 + }, + { + "epoch": 14.81, + "learning_rate": 4.2598430813124115e-05, + "loss": 0.0047, + "step": 83050 + }, + { + "epoch": 14.81, + "learning_rate": 4.25975392296719e-05, + "loss": 0.0032, + "step": 83060 + }, + { + "epoch": 14.81, + "learning_rate": 4.259664764621969e-05, + "loss": 0.0027, + "step": 83070 + }, + { + "epoch": 14.81, + "learning_rate": 4.2595756062767475e-05, + "loss": 0.0045, + "step": 83080 + }, + { + "epoch": 14.82, + "learning_rate": 4.2594864479315266e-05, + "loss": 0.003, + "step": 83090 + }, + { + "epoch": 14.82, + "learning_rate": 4.259397289586306e-05, + "loss": 0.0029, + "step": 83100 + }, + { + "epoch": 14.82, + "learning_rate": 4.259308131241084e-05, + "loss": 0.0026, + "step": 83110 + }, + { + "epoch": 14.82, + "learning_rate": 4.259218972895863e-05, + "loss": 0.0027, + "step": 83120 + }, + { + "epoch": 14.82, + "learning_rate": 4.259129814550642e-05, + "loss": 0.0036, + "step": 83130 + }, + { + "epoch": 14.83, + "learning_rate": 4.259040656205421e-05, + "loss": 0.0051, + "step": 83140 + }, + { + "epoch": 14.83, + "learning_rate": 4.258951497860199e-05, + "loss": 0.0026, + "step": 83150 + }, + { + "epoch": 14.83, + "learning_rate": 4.258862339514979e-05, + "loss": 0.0074, + "step": 83160 + }, + { + "epoch": 14.83, + "learning_rate": 4.2587731811697576e-05, + "loss": 0.0066, + "step": 83170 + }, + { + "epoch": 14.83, + "learning_rate": 4.258684022824537e-05, + "loss": 0.0016, + "step": 83180 + }, + { + "epoch": 14.83, + "learning_rate": 4.258594864479316e-05, + "loss": 0.0047, + "step": 83190 + }, + { + "epoch": 14.84, + "learning_rate": 4.258505706134094e-05, + "loss": 0.0041, + "step": 83200 + }, + { + "epoch": 14.84, + "learning_rate": 4.2584165477888734e-05, + "loss": 0.0018, + "step": 83210 + }, + { + "epoch": 14.84, + "learning_rate": 4.258327389443652e-05, + "loss": 0.0028, + "step": 83220 + }, + { + "epoch": 14.84, + "learning_rate": 4.258238231098431e-05, + "loss": 0.0031, + "step": 83230 + }, + { + "epoch": 14.84, + "learning_rate": 4.25814907275321e-05, + "loss": 0.0027, + "step": 83240 + }, + { + "epoch": 14.84, + "learning_rate": 4.2580599144079885e-05, + "loss": 0.0036, + "step": 83250 + }, + { + "epoch": 14.85, + "learning_rate": 4.2579707560627676e-05, + "loss": 0.0038, + "step": 83260 + }, + { + "epoch": 14.85, + "learning_rate": 4.257881597717547e-05, + "loss": 0.0034, + "step": 83270 + }, + { + "epoch": 14.85, + "learning_rate": 4.257792439372326e-05, + "loss": 0.0016, + "step": 83280 + }, + { + "epoch": 14.85, + "learning_rate": 4.257703281027104e-05, + "loss": 0.0041, + "step": 83290 + }, + { + "epoch": 14.85, + "learning_rate": 4.2576141226818834e-05, + "loss": 0.0033, + "step": 83300 + }, + { + "epoch": 14.86, + "learning_rate": 4.257524964336662e-05, + "loss": 0.0039, + "step": 83310 + }, + { + "epoch": 14.86, + "learning_rate": 4.257435805991441e-05, + "loss": 0.0027, + "step": 83320 + }, + { + "epoch": 14.86, + "learning_rate": 4.25734664764622e-05, + "loss": 0.0067, + "step": 83330 + }, + { + "epoch": 14.86, + "learning_rate": 4.2572574893009986e-05, + "loss": 0.0048, + "step": 83340 + }, + { + "epoch": 14.86, + "learning_rate": 4.257168330955778e-05, + "loss": 0.004, + "step": 83350 + }, + { + "epoch": 14.86, + "learning_rate": 4.257079172610556e-05, + "loss": 0.0036, + "step": 83360 + }, + { + "epoch": 14.87, + "learning_rate": 4.256990014265335e-05, + "loss": 0.0051, + "step": 83370 + }, + { + "epoch": 14.87, + "learning_rate": 4.2569008559201144e-05, + "loss": 0.0037, + "step": 83380 + }, + { + "epoch": 14.87, + "learning_rate": 4.2568116975748935e-05, + "loss": 0.0043, + "step": 83390 + }, + { + "epoch": 14.87, + "learning_rate": 4.256722539229672e-05, + "loss": 0.0017, + "step": 83400 + }, + { + "epoch": 14.87, + "learning_rate": 4.256633380884451e-05, + "loss": 0.0029, + "step": 83410 + }, + { + "epoch": 14.88, + "learning_rate": 4.25654422253923e-05, + "loss": 0.004, + "step": 83420 + }, + { + "epoch": 14.88, + "learning_rate": 4.2564550641940086e-05, + "loss": 0.0022, + "step": 83430 + }, + { + "epoch": 14.88, + "learning_rate": 4.256365905848788e-05, + "loss": 0.002, + "step": 83440 + }, + { + "epoch": 14.88, + "learning_rate": 4.256276747503566e-05, + "loss": 0.0036, + "step": 83450 + }, + { + "epoch": 14.88, + "learning_rate": 4.256187589158345e-05, + "loss": 0.0032, + "step": 83460 + }, + { + "epoch": 14.88, + "learning_rate": 4.2560984308131244e-05, + "loss": 0.0061, + "step": 83470 + }, + { + "epoch": 14.89, + "learning_rate": 4.256009272467903e-05, + "loss": 0.002, + "step": 83480 + }, + { + "epoch": 14.89, + "learning_rate": 4.255920114122682e-05, + "loss": 0.0084, + "step": 83490 + }, + { + "epoch": 14.89, + "learning_rate": 4.255830955777461e-05, + "loss": 0.002, + "step": 83500 + }, + { + "epoch": 14.89, + "learning_rate": 4.25574179743224e-05, + "loss": 0.0029, + "step": 83510 + }, + { + "epoch": 14.89, + "learning_rate": 4.255652639087019e-05, + "loss": 0.0039, + "step": 83520 + }, + { + "epoch": 14.89, + "learning_rate": 4.255563480741798e-05, + "loss": 0.0042, + "step": 83530 + }, + { + "epoch": 14.9, + "learning_rate": 4.255474322396576e-05, + "loss": 0.0033, + "step": 83540 + }, + { + "epoch": 14.9, + "learning_rate": 4.2553851640513554e-05, + "loss": 0.0055, + "step": 83550 + }, + { + "epoch": 14.9, + "learning_rate": 4.2552960057061345e-05, + "loss": 0.0031, + "step": 83560 + }, + { + "epoch": 14.9, + "learning_rate": 4.255206847360913e-05, + "loss": 0.0019, + "step": 83570 + }, + { + "epoch": 14.9, + "learning_rate": 4.255117689015692e-05, + "loss": 0.0024, + "step": 83580 + }, + { + "epoch": 14.91, + "learning_rate": 4.2550285306704705e-05, + "loss": 0.004, + "step": 83590 + }, + { + "epoch": 14.91, + "learning_rate": 4.25493937232525e-05, + "loss": 0.0037, + "step": 83600 + }, + { + "epoch": 14.91, + "learning_rate": 4.254850213980029e-05, + "loss": 0.0023, + "step": 83610 + }, + { + "epoch": 14.91, + "learning_rate": 4.254761055634808e-05, + "loss": 0.0039, + "step": 83620 + }, + { + "epoch": 14.91, + "learning_rate": 4.254671897289586e-05, + "loss": 0.0041, + "step": 83630 + }, + { + "epoch": 14.91, + "learning_rate": 4.2545827389443654e-05, + "loss": 0.0042, + "step": 83640 + }, + { + "epoch": 14.92, + "learning_rate": 4.2544935805991445e-05, + "loss": 0.0032, + "step": 83650 + }, + { + "epoch": 14.92, + "learning_rate": 4.254404422253923e-05, + "loss": 0.003, + "step": 83660 + }, + { + "epoch": 14.92, + "learning_rate": 4.254315263908702e-05, + "loss": 0.0015, + "step": 83670 + }, + { + "epoch": 14.92, + "learning_rate": 4.2542261055634805e-05, + "loss": 0.0029, + "step": 83680 + }, + { + "epoch": 14.92, + "learning_rate": 4.25413694721826e-05, + "loss": 0.0026, + "step": 83690 + }, + { + "epoch": 14.93, + "learning_rate": 4.254047788873039e-05, + "loss": 0.0026, + "step": 83700 + }, + { + "epoch": 14.93, + "learning_rate": 4.253958630527818e-05, + "loss": 0.0046, + "step": 83710 + }, + { + "epoch": 14.93, + "learning_rate": 4.2538694721825963e-05, + "loss": 0.0029, + "step": 83720 + }, + { + "epoch": 14.93, + "learning_rate": 4.2537803138373755e-05, + "loss": 0.0037, + "step": 83730 + }, + { + "epoch": 14.93, + "learning_rate": 4.2536911554921546e-05, + "loss": 0.0047, + "step": 83740 + }, + { + "epoch": 14.93, + "learning_rate": 4.253601997146933e-05, + "loss": 0.0044, + "step": 83750 + }, + { + "epoch": 14.94, + "learning_rate": 4.253512838801712e-05, + "loss": 0.003, + "step": 83760 + }, + { + "epoch": 14.94, + "learning_rate": 4.2534236804564906e-05, + "loss": 0.0038, + "step": 83770 + }, + { + "epoch": 14.94, + "learning_rate": 4.25333452211127e-05, + "loss": 0.0038, + "step": 83780 + }, + { + "epoch": 14.94, + "learning_rate": 4.253245363766049e-05, + "loss": 0.0026, + "step": 83790 + }, + { + "epoch": 14.94, + "learning_rate": 4.253156205420827e-05, + "loss": 0.0021, + "step": 83800 + }, + { + "epoch": 14.94, + "learning_rate": 4.2530670470756064e-05, + "loss": 0.0032, + "step": 83810 + }, + { + "epoch": 14.95, + "learning_rate": 4.2529778887303855e-05, + "loss": 0.003, + "step": 83820 + }, + { + "epoch": 14.95, + "learning_rate": 4.2528887303851646e-05, + "loss": 0.0038, + "step": 83830 + }, + { + "epoch": 14.95, + "learning_rate": 4.252799572039943e-05, + "loss": 0.0038, + "step": 83840 + }, + { + "epoch": 14.95, + "learning_rate": 4.252710413694722e-05, + "loss": 0.0014, + "step": 83850 + }, + { + "epoch": 14.95, + "learning_rate": 4.2526212553495007e-05, + "loss": 0.0027, + "step": 83860 + }, + { + "epoch": 14.96, + "learning_rate": 4.25253209700428e-05, + "loss": 0.0028, + "step": 83870 + }, + { + "epoch": 14.96, + "learning_rate": 4.252442938659059e-05, + "loss": 0.003, + "step": 83880 + }, + { + "epoch": 14.96, + "learning_rate": 4.252353780313837e-05, + "loss": 0.0029, + "step": 83890 + }, + { + "epoch": 14.96, + "learning_rate": 4.2522646219686165e-05, + "loss": 0.0024, + "step": 83900 + }, + { + "epoch": 14.96, + "learning_rate": 4.252175463623395e-05, + "loss": 0.0024, + "step": 83910 + }, + { + "epoch": 14.96, + "learning_rate": 4.252086305278174e-05, + "loss": 0.0032, + "step": 83920 + }, + { + "epoch": 14.97, + "learning_rate": 4.251997146932953e-05, + "loss": 0.0035, + "step": 83930 + }, + { + "epoch": 14.97, + "learning_rate": 4.251907988587732e-05, + "loss": 0.0018, + "step": 83940 + }, + { + "epoch": 14.97, + "learning_rate": 4.251818830242511e-05, + "loss": 0.0019, + "step": 83950 + }, + { + "epoch": 14.97, + "learning_rate": 4.25172967189729e-05, + "loss": 0.0038, + "step": 83960 + }, + { + "epoch": 14.97, + "learning_rate": 4.251640513552069e-05, + "loss": 0.0033, + "step": 83970 + }, + { + "epoch": 14.98, + "learning_rate": 4.2515513552068474e-05, + "loss": 0.0019, + "step": 83980 + }, + { + "epoch": 14.98, + "learning_rate": 4.2514621968616265e-05, + "loss": 0.0031, + "step": 83990 + }, + { + "epoch": 14.98, + "learning_rate": 4.251373038516405e-05, + "loss": 0.0033, + "step": 84000 + }, + { + "epoch": 14.98, + "learning_rate": 4.251283880171184e-05, + "loss": 0.0038, + "step": 84010 + }, + { + "epoch": 14.98, + "learning_rate": 4.251194721825963e-05, + "loss": 0.0016, + "step": 84020 + }, + { + "epoch": 14.98, + "learning_rate": 4.2511055634807416e-05, + "loss": 0.0019, + "step": 84030 + }, + { + "epoch": 14.99, + "learning_rate": 4.2510164051355214e-05, + "loss": 0.0012, + "step": 84040 + }, + { + "epoch": 14.99, + "learning_rate": 4.2509272467903e-05, + "loss": 0.004, + "step": 84050 + }, + { + "epoch": 14.99, + "learning_rate": 4.250838088445079e-05, + "loss": 0.0031, + "step": 84060 + }, + { + "epoch": 14.99, + "learning_rate": 4.2507489300998575e-05, + "loss": 0.0016, + "step": 84070 + }, + { + "epoch": 14.99, + "learning_rate": 4.2506597717546366e-05, + "loss": 0.0041, + "step": 84080 + }, + { + "epoch": 14.99, + "learning_rate": 4.250570613409415e-05, + "loss": 0.0038, + "step": 84090 + }, + { + "epoch": 15.0, + "learning_rate": 4.250481455064194e-05, + "loss": 0.0051, + "step": 84100 + }, + { + "epoch": 15.0, + "learning_rate": 4.250392296718973e-05, + "loss": 0.0018, + "step": 84110 + }, + { + "epoch": 15.0, + "learning_rate": 4.250303138373752e-05, + "loss": 0.0056, + "step": 84120 + }, + { + "epoch": 15.0, + "eval_loss": 0.020025817677378654, + "eval_runtime": 195.701, + "eval_samples_per_second": 23.705, + "eval_steps_per_second": 2.964, + "step": 84120 + }, + { + "epoch": 15.0, + "learning_rate": 4.250213980028531e-05, + "loss": 0.0035, + "step": 84130 + }, + { + "epoch": 15.0, + "learning_rate": 4.250124821683309e-05, + "loss": 0.0013, + "step": 84140 + }, + { + "epoch": 15.01, + "learning_rate": 4.250035663338089e-05, + "loss": 0.003, + "step": 84150 + }, + { + "epoch": 15.01, + "learning_rate": 4.2499465049928675e-05, + "loss": 0.0035, + "step": 84160 + }, + { + "epoch": 15.01, + "learning_rate": 4.2498573466476466e-05, + "loss": 0.002, + "step": 84170 + }, + { + "epoch": 15.01, + "learning_rate": 4.249768188302425e-05, + "loss": 0.0025, + "step": 84180 + }, + { + "epoch": 15.01, + "learning_rate": 4.249679029957204e-05, + "loss": 0.0027, + "step": 84190 + }, + { + "epoch": 15.01, + "learning_rate": 4.249589871611983e-05, + "loss": 0.0013, + "step": 84200 + }, + { + "epoch": 15.02, + "learning_rate": 4.249500713266762e-05, + "loss": 0.0041, + "step": 84210 + }, + { + "epoch": 15.02, + "learning_rate": 4.249411554921541e-05, + "loss": 0.0025, + "step": 84220 + }, + { + "epoch": 15.02, + "learning_rate": 4.249322396576319e-05, + "loss": 0.0018, + "step": 84230 + }, + { + "epoch": 15.02, + "learning_rate": 4.2492332382310984e-05, + "loss": 0.0007, + "step": 84240 + }, + { + "epoch": 15.02, + "learning_rate": 4.2491440798858776e-05, + "loss": 0.0019, + "step": 84250 + }, + { + "epoch": 15.02, + "learning_rate": 4.249054921540657e-05, + "loss": 0.0019, + "step": 84260 + }, + { + "epoch": 15.03, + "learning_rate": 4.248965763195436e-05, + "loss": 0.0021, + "step": 84270 + }, + { + "epoch": 15.03, + "learning_rate": 4.248876604850214e-05, + "loss": 0.0065, + "step": 84280 + }, + { + "epoch": 15.03, + "learning_rate": 4.2487874465049934e-05, + "loss": 0.0015, + "step": 84290 + }, + { + "epoch": 15.03, + "learning_rate": 4.248698288159772e-05, + "loss": 0.0028, + "step": 84300 + }, + { + "epoch": 15.03, + "learning_rate": 4.248609129814551e-05, + "loss": 0.0026, + "step": 84310 + }, + { + "epoch": 15.04, + "learning_rate": 4.2485199714693294e-05, + "loss": 0.0018, + "step": 84320 + }, + { + "epoch": 15.04, + "learning_rate": 4.2484308131241085e-05, + "loss": 0.0039, + "step": 84330 + }, + { + "epoch": 15.04, + "learning_rate": 4.2483416547788876e-05, + "loss": 0.0019, + "step": 84340 + }, + { + "epoch": 15.04, + "learning_rate": 4.248252496433666e-05, + "loss": 0.004, + "step": 84350 + }, + { + "epoch": 15.04, + "learning_rate": 4.248163338088445e-05, + "loss": 0.002, + "step": 84360 + }, + { + "epoch": 15.04, + "learning_rate": 4.248074179743224e-05, + "loss": 0.0034, + "step": 84370 + }, + { + "epoch": 15.05, + "learning_rate": 4.2479850213980034e-05, + "loss": 0.0022, + "step": 84380 + }, + { + "epoch": 15.05, + "learning_rate": 4.247895863052782e-05, + "loss": 0.001, + "step": 84390 + }, + { + "epoch": 15.05, + "learning_rate": 4.247806704707561e-05, + "loss": 0.0029, + "step": 84400 + }, + { + "epoch": 15.05, + "learning_rate": 4.2477175463623394e-05, + "loss": 0.0037, + "step": 84410 + }, + { + "epoch": 15.05, + "learning_rate": 4.2476283880171186e-05, + "loss": 0.0013, + "step": 84420 + }, + { + "epoch": 15.06, + "learning_rate": 4.247539229671898e-05, + "loss": 0.003, + "step": 84430 + }, + { + "epoch": 15.06, + "learning_rate": 4.247450071326676e-05, + "loss": 0.0017, + "step": 84440 + }, + { + "epoch": 15.06, + "learning_rate": 4.247360912981455e-05, + "loss": 0.004, + "step": 84450 + }, + { + "epoch": 15.06, + "learning_rate": 4.247271754636234e-05, + "loss": 0.0018, + "step": 84460 + }, + { + "epoch": 15.06, + "learning_rate": 4.247182596291013e-05, + "loss": 0.0023, + "step": 84470 + }, + { + "epoch": 15.06, + "learning_rate": 4.247093437945792e-05, + "loss": 0.0028, + "step": 84480 + }, + { + "epoch": 15.07, + "learning_rate": 4.247004279600571e-05, + "loss": 0.0017, + "step": 84490 + }, + { + "epoch": 15.07, + "learning_rate": 4.24691512125535e-05, + "loss": 0.0011, + "step": 84500 + }, + { + "epoch": 15.07, + "learning_rate": 4.2468259629101286e-05, + "loss": 0.0023, + "step": 84510 + }, + { + "epoch": 15.07, + "learning_rate": 4.246736804564908e-05, + "loss": 0.0038, + "step": 84520 + }, + { + "epoch": 15.07, + "learning_rate": 4.246647646219686e-05, + "loss": 0.002, + "step": 84530 + }, + { + "epoch": 15.07, + "learning_rate": 4.246558487874465e-05, + "loss": 0.0067, + "step": 84540 + }, + { + "epoch": 15.08, + "learning_rate": 4.246469329529244e-05, + "loss": 0.0035, + "step": 84550 + }, + { + "epoch": 15.08, + "learning_rate": 4.246380171184023e-05, + "loss": 0.0036, + "step": 84560 + }, + { + "epoch": 15.08, + "learning_rate": 4.246291012838802e-05, + "loss": 0.0051, + "step": 84570 + }, + { + "epoch": 15.08, + "learning_rate": 4.2462018544935804e-05, + "loss": 0.0036, + "step": 84580 + }, + { + "epoch": 15.08, + "learning_rate": 4.24611269614836e-05, + "loss": 0.0044, + "step": 84590 + }, + { + "epoch": 15.09, + "learning_rate": 4.246023537803139e-05, + "loss": 0.0034, + "step": 84600 + }, + { + "epoch": 15.09, + "learning_rate": 4.245934379457918e-05, + "loss": 0.0036, + "step": 84610 + }, + { + "epoch": 15.09, + "learning_rate": 4.245845221112696e-05, + "loss": 0.003, + "step": 84620 + }, + { + "epoch": 15.09, + "learning_rate": 4.2457560627674754e-05, + "loss": 0.0023, + "step": 84630 + }, + { + "epoch": 15.09, + "learning_rate": 4.245666904422254e-05, + "loss": 0.0028, + "step": 84640 + }, + { + "epoch": 15.09, + "learning_rate": 4.245577746077033e-05, + "loss": 0.0028, + "step": 84650 + }, + { + "epoch": 15.1, + "learning_rate": 4.245488587731812e-05, + "loss": 0.0037, + "step": 84660 + }, + { + "epoch": 15.1, + "learning_rate": 4.2453994293865905e-05, + "loss": 0.0034, + "step": 84670 + }, + { + "epoch": 15.1, + "learning_rate": 4.2453102710413696e-05, + "loss": 0.0023, + "step": 84680 + }, + { + "epoch": 15.1, + "learning_rate": 4.245221112696148e-05, + "loss": 0.0023, + "step": 84690 + }, + { + "epoch": 15.1, + "learning_rate": 4.245131954350928e-05, + "loss": 0.0025, + "step": 84700 + }, + { + "epoch": 15.11, + "learning_rate": 4.245042796005706e-05, + "loss": 0.0047, + "step": 84710 + }, + { + "epoch": 15.11, + "learning_rate": 4.2449536376604854e-05, + "loss": 0.0019, + "step": 84720 + }, + { + "epoch": 15.11, + "learning_rate": 4.2448644793152645e-05, + "loss": 0.0028, + "step": 84730 + }, + { + "epoch": 15.11, + "learning_rate": 4.244775320970043e-05, + "loss": 0.0047, + "step": 84740 + }, + { + "epoch": 15.11, + "learning_rate": 4.244686162624822e-05, + "loss": 0.002, + "step": 84750 + }, + { + "epoch": 15.11, + "learning_rate": 4.2445970042796005e-05, + "loss": 0.0018, + "step": 84760 + }, + { + "epoch": 15.12, + "learning_rate": 4.2445078459343797e-05, + "loss": 0.0023, + "step": 84770 + }, + { + "epoch": 15.12, + "learning_rate": 4.244418687589158e-05, + "loss": 0.0021, + "step": 84780 + }, + { + "epoch": 15.12, + "learning_rate": 4.244329529243937e-05, + "loss": 0.0025, + "step": 84790 + }, + { + "epoch": 15.12, + "learning_rate": 4.2442403708987163e-05, + "loss": 0.0029, + "step": 84800 + }, + { + "epoch": 15.12, + "learning_rate": 4.2441512125534955e-05, + "loss": 0.0022, + "step": 84810 + }, + { + "epoch": 15.12, + "learning_rate": 4.2440620542082746e-05, + "loss": 0.0021, + "step": 84820 + }, + { + "epoch": 15.13, + "learning_rate": 4.243972895863053e-05, + "loss": 0.0027, + "step": 84830 + }, + { + "epoch": 15.13, + "learning_rate": 4.243883737517832e-05, + "loss": 0.0029, + "step": 84840 + }, + { + "epoch": 15.13, + "learning_rate": 4.2437945791726106e-05, + "loss": 0.007, + "step": 84850 + }, + { + "epoch": 15.13, + "learning_rate": 4.24370542082739e-05, + "loss": 0.0012, + "step": 84860 + }, + { + "epoch": 15.13, + "learning_rate": 4.243616262482168e-05, + "loss": 0.0031, + "step": 84870 + }, + { + "epoch": 15.14, + "learning_rate": 4.243527104136947e-05, + "loss": 0.0027, + "step": 84880 + }, + { + "epoch": 15.14, + "learning_rate": 4.2434379457917264e-05, + "loss": 0.0022, + "step": 84890 + }, + { + "epoch": 15.14, + "learning_rate": 4.243348787446505e-05, + "loss": 0.0016, + "step": 84900 + }, + { + "epoch": 15.14, + "learning_rate": 4.243259629101284e-05, + "loss": 0.0029, + "step": 84910 + }, + { + "epoch": 15.14, + "learning_rate": 4.243170470756063e-05, + "loss": 0.0017, + "step": 84920 + }, + { + "epoch": 15.14, + "learning_rate": 4.243081312410842e-05, + "loss": 0.0035, + "step": 84930 + }, + { + "epoch": 15.15, + "learning_rate": 4.2429921540656206e-05, + "loss": 0.0044, + "step": 84940 + }, + { + "epoch": 15.15, + "learning_rate": 4.2429029957204e-05, + "loss": 0.0024, + "step": 84950 + }, + { + "epoch": 15.15, + "learning_rate": 4.242813837375179e-05, + "loss": 0.0023, + "step": 84960 + }, + { + "epoch": 15.15, + "learning_rate": 4.242724679029957e-05, + "loss": 0.0032, + "step": 84970 + }, + { + "epoch": 15.15, + "learning_rate": 4.2426355206847365e-05, + "loss": 0.0045, + "step": 84980 + }, + { + "epoch": 15.16, + "learning_rate": 4.242546362339515e-05, + "loss": 0.0057, + "step": 84990 + }, + { + "epoch": 15.16, + "learning_rate": 4.2424661198288164e-05, + "loss": 0.0066, + "step": 85000 + }, + { + "epoch": 15.16, + "learning_rate": 4.242376961483595e-05, + "loss": 0.003, + "step": 85010 + }, + { + "epoch": 15.16, + "learning_rate": 4.242287803138374e-05, + "loss": 0.003, + "step": 85020 + }, + { + "epoch": 15.16, + "learning_rate": 4.242198644793153e-05, + "loss": 0.0013, + "step": 85030 + }, + { + "epoch": 15.16, + "learning_rate": 4.2421094864479315e-05, + "loss": 0.0047, + "step": 85040 + }, + { + "epoch": 15.17, + "learning_rate": 4.2420203281027106e-05, + "loss": 0.0071, + "step": 85050 + }, + { + "epoch": 15.17, + "learning_rate": 4.241931169757489e-05, + "loss": 0.0017, + "step": 85060 + }, + { + "epoch": 15.17, + "learning_rate": 4.241842011412269e-05, + "loss": 0.0025, + "step": 85070 + }, + { + "epoch": 15.17, + "learning_rate": 4.241752853067047e-05, + "loss": 0.0037, + "step": 85080 + }, + { + "epoch": 15.17, + "learning_rate": 4.2416636947218264e-05, + "loss": 0.0059, + "step": 85090 + }, + { + "epoch": 15.17, + "learning_rate": 4.241574536376605e-05, + "loss": 0.0032, + "step": 85100 + }, + { + "epoch": 15.18, + "learning_rate": 4.241485378031384e-05, + "loss": 0.0027, + "step": 85110 + }, + { + "epoch": 15.18, + "learning_rate": 4.241396219686163e-05, + "loss": 0.0045, + "step": 85120 + }, + { + "epoch": 15.18, + "learning_rate": 4.2413070613409416e-05, + "loss": 0.0015, + "step": 85130 + }, + { + "epoch": 15.18, + "learning_rate": 4.241217902995721e-05, + "loss": 0.0011, + "step": 85140 + }, + { + "epoch": 15.18, + "learning_rate": 4.241128744650499e-05, + "loss": 0.0025, + "step": 85150 + }, + { + "epoch": 15.19, + "learning_rate": 4.241039586305278e-05, + "loss": 0.0033, + "step": 85160 + }, + { + "epoch": 15.19, + "learning_rate": 4.2409504279600574e-05, + "loss": 0.0023, + "step": 85170 + }, + { + "epoch": 15.19, + "learning_rate": 4.240861269614836e-05, + "loss": 0.0026, + "step": 85180 + }, + { + "epoch": 15.19, + "learning_rate": 4.240772111269615e-05, + "loss": 0.007, + "step": 85190 + }, + { + "epoch": 15.19, + "learning_rate": 4.240682952924394e-05, + "loss": 0.0014, + "step": 85200 + }, + { + "epoch": 15.19, + "learning_rate": 4.240593794579173e-05, + "loss": 0.0026, + "step": 85210 + }, + { + "epoch": 15.2, + "learning_rate": 4.2405046362339516e-05, + "loss": 0.0024, + "step": 85220 + }, + { + "epoch": 15.2, + "learning_rate": 4.240415477888731e-05, + "loss": 0.0011, + "step": 85230 + }, + { + "epoch": 15.2, + "learning_rate": 4.240326319543509e-05, + "loss": 0.0033, + "step": 85240 + }, + { + "epoch": 15.2, + "learning_rate": 4.240237161198288e-05, + "loss": 0.0017, + "step": 85250 + }, + { + "epoch": 15.2, + "learning_rate": 4.2401480028530674e-05, + "loss": 0.0018, + "step": 85260 + }, + { + "epoch": 15.21, + "learning_rate": 4.240058844507846e-05, + "loss": 0.0045, + "step": 85270 + }, + { + "epoch": 15.21, + "learning_rate": 4.239969686162625e-05, + "loss": 0.0025, + "step": 85280 + }, + { + "epoch": 15.21, + "learning_rate": 4.2398805278174034e-05, + "loss": 0.0038, + "step": 85290 + }, + { + "epoch": 15.21, + "learning_rate": 4.239791369472183e-05, + "loss": 0.0032, + "step": 85300 + }, + { + "epoch": 15.21, + "learning_rate": 4.239702211126962e-05, + "loss": 0.0019, + "step": 85310 + }, + { + "epoch": 15.21, + "learning_rate": 4.239613052781741e-05, + "loss": 0.0042, + "step": 85320 + }, + { + "epoch": 15.22, + "learning_rate": 4.239523894436519e-05, + "loss": 0.0042, + "step": 85330 + }, + { + "epoch": 15.22, + "learning_rate": 4.2394347360912984e-05, + "loss": 0.0015, + "step": 85340 + }, + { + "epoch": 15.22, + "learning_rate": 4.2393455777460775e-05, + "loss": 0.003, + "step": 85350 + }, + { + "epoch": 15.22, + "learning_rate": 4.239256419400856e-05, + "loss": 0.0029, + "step": 85360 + }, + { + "epoch": 15.22, + "learning_rate": 4.239167261055635e-05, + "loss": 0.0038, + "step": 85370 + }, + { + "epoch": 15.22, + "learning_rate": 4.2390781027104135e-05, + "loss": 0.0034, + "step": 85380 + }, + { + "epoch": 15.23, + "learning_rate": 4.2389889443651926e-05, + "loss": 0.0024, + "step": 85390 + }, + { + "epoch": 15.23, + "learning_rate": 4.238899786019972e-05, + "loss": 0.0021, + "step": 85400 + }, + { + "epoch": 15.23, + "learning_rate": 4.238810627674751e-05, + "loss": 0.0065, + "step": 85410 + }, + { + "epoch": 15.23, + "learning_rate": 4.238721469329529e-05, + "loss": 0.0034, + "step": 85420 + }, + { + "epoch": 15.23, + "learning_rate": 4.2386323109843084e-05, + "loss": 0.0025, + "step": 85430 + }, + { + "epoch": 15.24, + "learning_rate": 4.2385431526390875e-05, + "loss": 0.0028, + "step": 85440 + }, + { + "epoch": 15.24, + "learning_rate": 4.238453994293866e-05, + "loss": 0.0014, + "step": 85450 + }, + { + "epoch": 15.24, + "learning_rate": 4.238364835948645e-05, + "loss": 0.0068, + "step": 85460 + }, + { + "epoch": 15.24, + "learning_rate": 4.2382756776034235e-05, + "loss": 0.0027, + "step": 85470 + }, + { + "epoch": 15.24, + "learning_rate": 4.238186519258203e-05, + "loss": 0.0017, + "step": 85480 + }, + { + "epoch": 15.24, + "learning_rate": 4.238097360912982e-05, + "loss": 0.0041, + "step": 85490 + }, + { + "epoch": 15.25, + "learning_rate": 4.23800820256776e-05, + "loss": 0.0019, + "step": 85500 + }, + { + "epoch": 15.25, + "learning_rate": 4.2379190442225394e-05, + "loss": 0.0022, + "step": 85510 + }, + { + "epoch": 15.25, + "learning_rate": 4.2378298858773185e-05, + "loss": 0.0025, + "step": 85520 + }, + { + "epoch": 15.25, + "learning_rate": 4.2377407275320976e-05, + "loss": 0.001, + "step": 85530 + }, + { + "epoch": 15.25, + "learning_rate": 4.237651569186876e-05, + "loss": 0.0017, + "step": 85540 + }, + { + "epoch": 15.25, + "learning_rate": 4.237562410841655e-05, + "loss": 0.0021, + "step": 85550 + }, + { + "epoch": 15.26, + "learning_rate": 4.2374732524964336e-05, + "loss": 0.0027, + "step": 85560 + }, + { + "epoch": 15.26, + "learning_rate": 4.237384094151213e-05, + "loss": 0.0059, + "step": 85570 + }, + { + "epoch": 15.26, + "learning_rate": 4.237294935805992e-05, + "loss": 0.009, + "step": 85580 + }, + { + "epoch": 15.26, + "learning_rate": 4.23720577746077e-05, + "loss": 0.0017, + "step": 85590 + }, + { + "epoch": 15.26, + "learning_rate": 4.2371166191155494e-05, + "loss": 0.0041, + "step": 85600 + }, + { + "epoch": 15.27, + "learning_rate": 4.237027460770328e-05, + "loss": 0.0029, + "step": 85610 + }, + { + "epoch": 15.27, + "learning_rate": 4.236938302425107e-05, + "loss": 0.0017, + "step": 85620 + }, + { + "epoch": 15.27, + "learning_rate": 4.236849144079886e-05, + "loss": 0.003, + "step": 85630 + }, + { + "epoch": 15.27, + "learning_rate": 4.236759985734665e-05, + "loss": 0.0028, + "step": 85640 + }, + { + "epoch": 15.27, + "learning_rate": 4.2366708273894437e-05, + "loss": 0.0039, + "step": 85650 + }, + { + "epoch": 15.27, + "learning_rate": 4.236581669044223e-05, + "loss": 0.0024, + "step": 85660 + }, + { + "epoch": 15.28, + "learning_rate": 4.236492510699002e-05, + "loss": 0.0025, + "step": 85670 + }, + { + "epoch": 15.28, + "learning_rate": 4.2364033523537803e-05, + "loss": 0.0026, + "step": 85680 + }, + { + "epoch": 15.28, + "learning_rate": 4.2363141940085595e-05, + "loss": 0.0025, + "step": 85690 + }, + { + "epoch": 15.28, + "learning_rate": 4.236225035663338e-05, + "loss": 0.0037, + "step": 85700 + }, + { + "epoch": 15.28, + "learning_rate": 4.236135877318117e-05, + "loss": 0.0038, + "step": 85710 + }, + { + "epoch": 15.29, + "learning_rate": 4.236046718972896e-05, + "loss": 0.0018, + "step": 85720 + }, + { + "epoch": 15.29, + "learning_rate": 4.2359575606276746e-05, + "loss": 0.0051, + "step": 85730 + }, + { + "epoch": 15.29, + "learning_rate": 4.2358684022824544e-05, + "loss": 0.0037, + "step": 85740 + }, + { + "epoch": 15.29, + "learning_rate": 4.235779243937233e-05, + "loss": 0.0016, + "step": 85750 + }, + { + "epoch": 15.29, + "learning_rate": 4.235690085592012e-05, + "loss": 0.0014, + "step": 85760 + }, + { + "epoch": 15.29, + "learning_rate": 4.2356009272467904e-05, + "loss": 0.0045, + "step": 85770 + }, + { + "epoch": 15.3, + "learning_rate": 4.2355117689015695e-05, + "loss": 0.0028, + "step": 85780 + }, + { + "epoch": 15.3, + "learning_rate": 4.235422610556348e-05, + "loss": 0.0031, + "step": 85790 + }, + { + "epoch": 15.3, + "learning_rate": 4.235333452211127e-05, + "loss": 0.0023, + "step": 85800 + }, + { + "epoch": 15.3, + "learning_rate": 4.235244293865906e-05, + "loss": 0.0011, + "step": 85810 + }, + { + "epoch": 15.3, + "learning_rate": 4.2351551355206846e-05, + "loss": 0.0022, + "step": 85820 + }, + { + "epoch": 15.3, + "learning_rate": 4.235065977175464e-05, + "loss": 0.0015, + "step": 85830 + }, + { + "epoch": 15.31, + "learning_rate": 4.234976818830242e-05, + "loss": 0.0027, + "step": 85840 + }, + { + "epoch": 15.31, + "learning_rate": 4.234887660485022e-05, + "loss": 0.0037, + "step": 85850 + }, + { + "epoch": 15.31, + "learning_rate": 4.2347985021398005e-05, + "loss": 0.0025, + "step": 85860 + }, + { + "epoch": 15.31, + "learning_rate": 4.2347093437945796e-05, + "loss": 0.0028, + "step": 85870 + }, + { + "epoch": 15.31, + "learning_rate": 4.234620185449358e-05, + "loss": 0.0025, + "step": 85880 + }, + { + "epoch": 15.32, + "learning_rate": 4.234531027104137e-05, + "loss": 0.0024, + "step": 85890 + }, + { + "epoch": 15.32, + "learning_rate": 4.234441868758916e-05, + "loss": 0.0025, + "step": 85900 + }, + { + "epoch": 15.32, + "learning_rate": 4.234352710413695e-05, + "loss": 0.0033, + "step": 85910 + }, + { + "epoch": 15.32, + "learning_rate": 4.234263552068474e-05, + "loss": 0.0031, + "step": 85920 + }, + { + "epoch": 15.32, + "learning_rate": 4.234174393723252e-05, + "loss": 0.0018, + "step": 85930 + }, + { + "epoch": 15.32, + "learning_rate": 4.2340852353780314e-05, + "loss": 0.0015, + "step": 85940 + }, + { + "epoch": 15.33, + "learning_rate": 4.2339960770328105e-05, + "loss": 0.0019, + "step": 85950 + }, + { + "epoch": 15.33, + "learning_rate": 4.2339069186875896e-05, + "loss": 0.0015, + "step": 85960 + }, + { + "epoch": 15.33, + "learning_rate": 4.233817760342369e-05, + "loss": 0.005, + "step": 85970 + }, + { + "epoch": 15.33, + "learning_rate": 4.233728601997147e-05, + "loss": 0.0035, + "step": 85980 + }, + { + "epoch": 15.33, + "learning_rate": 4.233639443651926e-05, + "loss": 0.0026, + "step": 85990 + }, + { + "epoch": 15.34, + "learning_rate": 4.233550285306705e-05, + "loss": 0.0036, + "step": 86000 + }, + { + "epoch": 15.34, + "learning_rate": 4.233461126961484e-05, + "loss": 0.0025, + "step": 86010 + }, + { + "epoch": 15.34, + "learning_rate": 4.233371968616262e-05, + "loss": 0.0018, + "step": 86020 + }, + { + "epoch": 15.34, + "learning_rate": 4.2332828102710414e-05, + "loss": 0.0032, + "step": 86030 + }, + { + "epoch": 15.34, + "learning_rate": 4.2331936519258206e-05, + "loss": 0.0044, + "step": 86040 + }, + { + "epoch": 15.34, + "learning_rate": 4.233104493580599e-05, + "loss": 0.0024, + "step": 86050 + }, + { + "epoch": 15.35, + "learning_rate": 4.233015335235378e-05, + "loss": 0.0046, + "step": 86060 + }, + { + "epoch": 15.35, + "learning_rate": 4.232926176890157e-05, + "loss": 0.0023, + "step": 86070 + }, + { + "epoch": 15.35, + "learning_rate": 4.2328370185449364e-05, + "loss": 0.0023, + "step": 86080 + }, + { + "epoch": 15.35, + "learning_rate": 4.232747860199715e-05, + "loss": 0.0031, + "step": 86090 + }, + { + "epoch": 15.35, + "learning_rate": 4.232658701854494e-05, + "loss": 0.0029, + "step": 86100 + }, + { + "epoch": 15.35, + "learning_rate": 4.2325695435092724e-05, + "loss": 0.0022, + "step": 86110 + }, + { + "epoch": 15.36, + "learning_rate": 4.2324803851640515e-05, + "loss": 0.0028, + "step": 86120 + }, + { + "epoch": 15.36, + "learning_rate": 4.2323912268188306e-05, + "loss": 0.0028, + "step": 86130 + }, + { + "epoch": 15.36, + "learning_rate": 4.232302068473609e-05, + "loss": 0.0026, + "step": 86140 + }, + { + "epoch": 15.36, + "learning_rate": 4.232212910128388e-05, + "loss": 0.0068, + "step": 86150 + }, + { + "epoch": 15.36, + "learning_rate": 4.2321237517831666e-05, + "loss": 0.0036, + "step": 86160 + }, + { + "epoch": 15.37, + "learning_rate": 4.232034593437946e-05, + "loss": 0.0065, + "step": 86170 + }, + { + "epoch": 15.37, + "learning_rate": 4.231945435092725e-05, + "loss": 0.002, + "step": 86180 + }, + { + "epoch": 15.37, + "learning_rate": 4.231856276747504e-05, + "loss": 0.0015, + "step": 86190 + }, + { + "epoch": 15.37, + "learning_rate": 4.231767118402283e-05, + "loss": 0.0031, + "step": 86200 + }, + { + "epoch": 15.37, + "learning_rate": 4.2316779600570616e-05, + "loss": 0.0027, + "step": 86210 + }, + { + "epoch": 15.37, + "learning_rate": 4.231588801711841e-05, + "loss": 0.0025, + "step": 86220 + }, + { + "epoch": 15.38, + "learning_rate": 4.231499643366619e-05, + "loss": 0.0027, + "step": 86230 + }, + { + "epoch": 15.38, + "learning_rate": 4.231410485021398e-05, + "loss": 0.0028, + "step": 86240 + }, + { + "epoch": 15.38, + "learning_rate": 4.231321326676177e-05, + "loss": 0.0036, + "step": 86250 + }, + { + "epoch": 15.38, + "learning_rate": 4.231232168330956e-05, + "loss": 0.0021, + "step": 86260 + }, + { + "epoch": 15.38, + "learning_rate": 4.231143009985735e-05, + "loss": 0.0022, + "step": 86270 + }, + { + "epoch": 15.39, + "learning_rate": 4.2310538516405134e-05, + "loss": 0.0051, + "step": 86280 + }, + { + "epoch": 15.39, + "learning_rate": 4.230964693295293e-05, + "loss": 0.002, + "step": 86290 + }, + { + "epoch": 15.39, + "learning_rate": 4.2308755349500716e-05, + "loss": 0.0018, + "step": 86300 + }, + { + "epoch": 15.39, + "learning_rate": 4.230786376604851e-05, + "loss": 0.003, + "step": 86310 + }, + { + "epoch": 15.39, + "learning_rate": 4.230697218259629e-05, + "loss": 0.0027, + "step": 86320 + }, + { + "epoch": 15.39, + "learning_rate": 4.230608059914408e-05, + "loss": 0.0054, + "step": 86330 + }, + { + "epoch": 15.4, + "learning_rate": 4.230518901569187e-05, + "loss": 0.0034, + "step": 86340 + }, + { + "epoch": 15.4, + "learning_rate": 4.230429743223966e-05, + "loss": 0.0025, + "step": 86350 + }, + { + "epoch": 15.4, + "learning_rate": 4.230340584878745e-05, + "loss": 0.0011, + "step": 86360 + }, + { + "epoch": 15.4, + "learning_rate": 4.2302514265335234e-05, + "loss": 0.0026, + "step": 86370 + }, + { + "epoch": 15.4, + "learning_rate": 4.2301622681883026e-05, + "loss": 0.0033, + "step": 86380 + }, + { + "epoch": 15.4, + "learning_rate": 4.230073109843081e-05, + "loss": 0.0028, + "step": 86390 + }, + { + "epoch": 15.41, + "learning_rate": 4.229983951497861e-05, + "loss": 0.0018, + "step": 86400 + }, + { + "epoch": 15.41, + "learning_rate": 4.229894793152639e-05, + "loss": 0.0043, + "step": 86410 + }, + { + "epoch": 15.41, + "learning_rate": 4.2298056348074184e-05, + "loss": 0.0035, + "step": 86420 + }, + { + "epoch": 15.41, + "learning_rate": 4.2297164764621975e-05, + "loss": 0.0021, + "step": 86430 + }, + { + "epoch": 15.41, + "learning_rate": 4.229627318116976e-05, + "loss": 0.0035, + "step": 86440 + }, + { + "epoch": 15.42, + "learning_rate": 4.229538159771755e-05, + "loss": 0.0037, + "step": 86450 + }, + { + "epoch": 15.42, + "learning_rate": 4.2294490014265335e-05, + "loss": 0.0019, + "step": 86460 + }, + { + "epoch": 15.42, + "learning_rate": 4.2293598430813126e-05, + "loss": 0.0047, + "step": 86470 + }, + { + "epoch": 15.42, + "learning_rate": 4.229270684736091e-05, + "loss": 0.005, + "step": 86480 + }, + { + "epoch": 15.42, + "learning_rate": 4.22918152639087e-05, + "loss": 0.0019, + "step": 86490 + }, + { + "epoch": 15.42, + "learning_rate": 4.229092368045649e-05, + "loss": 0.0037, + "step": 86500 + }, + { + "epoch": 15.43, + "learning_rate": 4.2290032097004284e-05, + "loss": 0.0017, + "step": 86510 + }, + { + "epoch": 15.43, + "learning_rate": 4.2289140513552075e-05, + "loss": 0.0025, + "step": 86520 + }, + { + "epoch": 15.43, + "learning_rate": 4.228824893009986e-05, + "loss": 0.003, + "step": 86530 + }, + { + "epoch": 15.43, + "learning_rate": 4.228735734664765e-05, + "loss": 0.0049, + "step": 86540 + }, + { + "epoch": 15.43, + "learning_rate": 4.2286465763195435e-05, + "loss": 0.0011, + "step": 86550 + }, + { + "epoch": 15.44, + "learning_rate": 4.228557417974323e-05, + "loss": 0.0023, + "step": 86560 + }, + { + "epoch": 15.44, + "learning_rate": 4.228468259629101e-05, + "loss": 0.0024, + "step": 86570 + }, + { + "epoch": 15.44, + "learning_rate": 4.22837910128388e-05, + "loss": 0.002, + "step": 86580 + }, + { + "epoch": 15.44, + "learning_rate": 4.2282899429386593e-05, + "loss": 0.0031, + "step": 86590 + }, + { + "epoch": 15.44, + "learning_rate": 4.228200784593438e-05, + "loss": 0.0021, + "step": 86600 + }, + { + "epoch": 15.44, + "learning_rate": 4.228111626248217e-05, + "loss": 0.0023, + "step": 86610 + }, + { + "epoch": 15.45, + "learning_rate": 4.228022467902996e-05, + "loss": 0.0023, + "step": 86620 + }, + { + "epoch": 15.45, + "learning_rate": 4.227933309557775e-05, + "loss": 0.0033, + "step": 86630 + }, + { + "epoch": 15.45, + "learning_rate": 4.2278441512125536e-05, + "loss": 0.0014, + "step": 86640 + }, + { + "epoch": 15.45, + "learning_rate": 4.227754992867333e-05, + "loss": 0.0016, + "step": 86650 + }, + { + "epoch": 15.45, + "learning_rate": 4.227665834522112e-05, + "loss": 0.0022, + "step": 86660 + }, + { + "epoch": 15.45, + "learning_rate": 4.22757667617689e-05, + "loss": 0.0031, + "step": 86670 + }, + { + "epoch": 15.46, + "learning_rate": 4.2274875178316694e-05, + "loss": 0.0044, + "step": 86680 + }, + { + "epoch": 15.46, + "learning_rate": 4.227398359486448e-05, + "loss": 0.0019, + "step": 86690 + }, + { + "epoch": 15.46, + "learning_rate": 4.227309201141227e-05, + "loss": 0.0042, + "step": 86700 + }, + { + "epoch": 15.46, + "learning_rate": 4.2272200427960054e-05, + "loss": 0.0029, + "step": 86710 + }, + { + "epoch": 15.46, + "learning_rate": 4.2271308844507845e-05, + "loss": 0.0041, + "step": 86720 + }, + { + "epoch": 15.47, + "learning_rate": 4.2270417261055637e-05, + "loss": 0.0059, + "step": 86730 + }, + { + "epoch": 15.47, + "learning_rate": 4.226952567760343e-05, + "loss": 0.0036, + "step": 86740 + }, + { + "epoch": 15.47, + "learning_rate": 4.226863409415122e-05, + "loss": 0.0012, + "step": 86750 + }, + { + "epoch": 15.47, + "learning_rate": 4.2267742510699003e-05, + "loss": 0.0034, + "step": 86760 + }, + { + "epoch": 15.47, + "learning_rate": 4.2266850927246795e-05, + "loss": 0.003, + "step": 86770 + }, + { + "epoch": 15.47, + "learning_rate": 4.226595934379458e-05, + "loss": 0.0031, + "step": 86780 + }, + { + "epoch": 15.48, + "learning_rate": 4.226506776034237e-05, + "loss": 0.0048, + "step": 86790 + }, + { + "epoch": 15.48, + "learning_rate": 4.2264176176890155e-05, + "loss": 0.004, + "step": 86800 + }, + { + "epoch": 15.48, + "learning_rate": 4.2263284593437946e-05, + "loss": 0.002, + "step": 86810 + }, + { + "epoch": 15.48, + "learning_rate": 4.226239300998574e-05, + "loss": 0.006, + "step": 86820 + }, + { + "epoch": 15.48, + "learning_rate": 4.226150142653352e-05, + "loss": 0.0033, + "step": 86830 + }, + { + "epoch": 15.49, + "learning_rate": 4.226060984308132e-05, + "loss": 0.0032, + "step": 86840 + }, + { + "epoch": 15.49, + "learning_rate": 4.2259718259629104e-05, + "loss": 0.0025, + "step": 86850 + }, + { + "epoch": 15.49, + "learning_rate": 4.2258826676176895e-05, + "loss": 0.0031, + "step": 86860 + }, + { + "epoch": 15.49, + "learning_rate": 4.225793509272468e-05, + "loss": 0.0023, + "step": 86870 + }, + { + "epoch": 15.49, + "learning_rate": 4.225704350927247e-05, + "loss": 0.0032, + "step": 86880 + }, + { + "epoch": 15.49, + "learning_rate": 4.225615192582026e-05, + "loss": 0.0028, + "step": 86890 + }, + { + "epoch": 15.5, + "learning_rate": 4.2255260342368046e-05, + "loss": 0.004, + "step": 86900 + }, + { + "epoch": 15.5, + "learning_rate": 4.225436875891584e-05, + "loss": 0.0027, + "step": 86910 + }, + { + "epoch": 15.5, + "learning_rate": 4.225347717546362e-05, + "loss": 0.0016, + "step": 86920 + }, + { + "epoch": 15.5, + "learning_rate": 4.225258559201141e-05, + "loss": 0.0019, + "step": 86930 + }, + { + "epoch": 15.5, + "learning_rate": 4.22516940085592e-05, + "loss": 0.0028, + "step": 86940 + }, + { + "epoch": 15.5, + "learning_rate": 4.2250802425106996e-05, + "loss": 0.0015, + "step": 86950 + }, + { + "epoch": 15.51, + "learning_rate": 4.224991084165478e-05, + "loss": 0.0024, + "step": 86960 + }, + { + "epoch": 15.51, + "learning_rate": 4.224901925820257e-05, + "loss": 0.0029, + "step": 86970 + }, + { + "epoch": 15.51, + "learning_rate": 4.224812767475036e-05, + "loss": 0.0024, + "step": 86980 + }, + { + "epoch": 15.51, + "learning_rate": 4.224723609129815e-05, + "loss": 0.0043, + "step": 86990 + }, + { + "epoch": 15.51, + "learning_rate": 4.224634450784594e-05, + "loss": 0.0017, + "step": 87000 + }, + { + "epoch": 15.52, + "learning_rate": 4.224545292439372e-05, + "loss": 0.0025, + "step": 87010 + }, + { + "epoch": 15.52, + "learning_rate": 4.2244561340941514e-05, + "loss": 0.0046, + "step": 87020 + }, + { + "epoch": 15.52, + "learning_rate": 4.22436697574893e-05, + "loss": 0.0041, + "step": 87030 + }, + { + "epoch": 15.52, + "learning_rate": 4.224277817403709e-05, + "loss": 0.0023, + "step": 87040 + }, + { + "epoch": 15.52, + "learning_rate": 4.224188659058488e-05, + "loss": 0.0034, + "step": 87050 + }, + { + "epoch": 15.52, + "learning_rate": 4.224099500713267e-05, + "loss": 0.0015, + "step": 87060 + }, + { + "epoch": 15.53, + "learning_rate": 4.224010342368046e-05, + "loss": 0.0025, + "step": 87070 + }, + { + "epoch": 15.53, + "learning_rate": 4.223921184022825e-05, + "loss": 0.0015, + "step": 87080 + }, + { + "epoch": 15.53, + "learning_rate": 4.223832025677604e-05, + "loss": 0.0023, + "step": 87090 + }, + { + "epoch": 15.53, + "learning_rate": 4.223742867332382e-05, + "loss": 0.0015, + "step": 87100 + }, + { + "epoch": 15.53, + "learning_rate": 4.2236537089871614e-05, + "loss": 0.0022, + "step": 87110 + }, + { + "epoch": 15.53, + "learning_rate": 4.2235645506419406e-05, + "loss": 0.0012, + "step": 87120 + }, + { + "epoch": 15.54, + "learning_rate": 4.223475392296719e-05, + "loss": 0.0023, + "step": 87130 + }, + { + "epoch": 15.54, + "learning_rate": 4.223386233951498e-05, + "loss": 0.001, + "step": 87140 + }, + { + "epoch": 15.54, + "learning_rate": 4.2232970756062766e-05, + "loss": 0.0029, + "step": 87150 + }, + { + "epoch": 15.54, + "learning_rate": 4.223207917261056e-05, + "loss": 0.0019, + "step": 87160 + }, + { + "epoch": 15.54, + "learning_rate": 4.223118758915835e-05, + "loss": 0.0032, + "step": 87170 + }, + { + "epoch": 15.55, + "learning_rate": 4.223029600570614e-05, + "loss": 0.0024, + "step": 87180 + }, + { + "epoch": 15.55, + "learning_rate": 4.2229404422253924e-05, + "loss": 0.0016, + "step": 87190 + }, + { + "epoch": 15.55, + "learning_rate": 4.2228512838801715e-05, + "loss": 0.0013, + "step": 87200 + }, + { + "epoch": 15.55, + "learning_rate": 4.2227621255349506e-05, + "loss": 0.0026, + "step": 87210 + }, + { + "epoch": 15.55, + "learning_rate": 4.222672967189729e-05, + "loss": 0.0021, + "step": 87220 + }, + { + "epoch": 15.55, + "learning_rate": 4.222583808844508e-05, + "loss": 0.0019, + "step": 87230 + }, + { + "epoch": 15.56, + "learning_rate": 4.2224946504992866e-05, + "loss": 0.0034, + "step": 87240 + }, + { + "epoch": 15.56, + "learning_rate": 4.222405492154066e-05, + "loss": 0.0014, + "step": 87250 + }, + { + "epoch": 15.56, + "learning_rate": 4.222316333808844e-05, + "loss": 0.0018, + "step": 87260 + }, + { + "epoch": 15.56, + "learning_rate": 4.222227175463623e-05, + "loss": 0.0037, + "step": 87270 + }, + { + "epoch": 15.56, + "learning_rate": 4.2221380171184024e-05, + "loss": 0.0027, + "step": 87280 + }, + { + "epoch": 15.57, + "learning_rate": 4.2220488587731816e-05, + "loss": 0.0047, + "step": 87290 + }, + { + "epoch": 15.57, + "learning_rate": 4.221959700427961e-05, + "loss": 0.004, + "step": 87300 + }, + { + "epoch": 15.57, + "learning_rate": 4.221870542082739e-05, + "loss": 0.0018, + "step": 87310 + }, + { + "epoch": 15.57, + "learning_rate": 4.221781383737518e-05, + "loss": 0.0051, + "step": 87320 + }, + { + "epoch": 15.57, + "learning_rate": 4.221692225392297e-05, + "loss": 0.0061, + "step": 87330 + }, + { + "epoch": 15.57, + "learning_rate": 4.221603067047076e-05, + "loss": 0.0023, + "step": 87340 + }, + { + "epoch": 15.58, + "learning_rate": 4.221513908701855e-05, + "loss": 0.0044, + "step": 87350 + }, + { + "epoch": 15.58, + "learning_rate": 4.2214247503566334e-05, + "loss": 0.0025, + "step": 87360 + }, + { + "epoch": 15.58, + "learning_rate": 4.2213355920114125e-05, + "loss": 0.0038, + "step": 87370 + }, + { + "epoch": 15.58, + "learning_rate": 4.221246433666191e-05, + "loss": 0.0046, + "step": 87380 + }, + { + "epoch": 15.58, + "learning_rate": 4.221157275320971e-05, + "loss": 0.0012, + "step": 87390 + }, + { + "epoch": 15.58, + "learning_rate": 4.221068116975749e-05, + "loss": 0.0042, + "step": 87400 + }, + { + "epoch": 15.59, + "learning_rate": 4.220978958630528e-05, + "loss": 0.003, + "step": 87410 + }, + { + "epoch": 15.59, + "learning_rate": 4.220889800285307e-05, + "loss": 0.003, + "step": 87420 + }, + { + "epoch": 15.59, + "learning_rate": 4.220800641940086e-05, + "loss": 0.0026, + "step": 87430 + }, + { + "epoch": 15.59, + "learning_rate": 4.220711483594865e-05, + "loss": 0.0039, + "step": 87440 + }, + { + "epoch": 15.59, + "learning_rate": 4.2206223252496434e-05, + "loss": 0.0041, + "step": 87450 + }, + { + "epoch": 15.6, + "learning_rate": 4.2205331669044225e-05, + "loss": 0.0026, + "step": 87460 + }, + { + "epoch": 15.6, + "learning_rate": 4.220444008559201e-05, + "loss": 0.0023, + "step": 87470 + }, + { + "epoch": 15.6, + "learning_rate": 4.22035485021398e-05, + "loss": 0.0035, + "step": 87480 + }, + { + "epoch": 15.6, + "learning_rate": 4.2202656918687586e-05, + "loss": 0.0029, + "step": 87490 + }, + { + "epoch": 15.6, + "learning_rate": 4.2201765335235384e-05, + "loss": 0.0027, + "step": 87500 + }, + { + "epoch": 15.6, + "learning_rate": 4.220087375178317e-05, + "loss": 0.0033, + "step": 87510 + }, + { + "epoch": 15.61, + "learning_rate": 4.219998216833096e-05, + "loss": 0.0052, + "step": 87520 + }, + { + "epoch": 15.61, + "learning_rate": 4.219909058487875e-05, + "loss": 0.0022, + "step": 87530 + }, + { + "epoch": 15.61, + "learning_rate": 4.2198199001426535e-05, + "loss": 0.0049, + "step": 87540 + }, + { + "epoch": 15.61, + "learning_rate": 4.2197307417974326e-05, + "loss": 0.0026, + "step": 87550 + }, + { + "epoch": 15.61, + "learning_rate": 4.219641583452211e-05, + "loss": 0.0023, + "step": 87560 + }, + { + "epoch": 15.62, + "learning_rate": 4.21955242510699e-05, + "loss": 0.0039, + "step": 87570 + }, + { + "epoch": 15.62, + "learning_rate": 4.219463266761769e-05, + "loss": 0.0035, + "step": 87580 + }, + { + "epoch": 15.62, + "learning_rate": 4.219374108416548e-05, + "loss": 0.0021, + "step": 87590 + }, + { + "epoch": 15.62, + "learning_rate": 4.219284950071327e-05, + "loss": 0.0024, + "step": 87600 + }, + { + "epoch": 15.62, + "learning_rate": 4.219195791726106e-05, + "loss": 0.0036, + "step": 87610 + }, + { + "epoch": 15.62, + "learning_rate": 4.219106633380885e-05, + "loss": 0.0021, + "step": 87620 + }, + { + "epoch": 15.63, + "learning_rate": 4.2190174750356635e-05, + "loss": 0.0036, + "step": 87630 + }, + { + "epoch": 15.63, + "learning_rate": 4.2189283166904427e-05, + "loss": 0.003, + "step": 87640 + }, + { + "epoch": 15.63, + "learning_rate": 4.218839158345221e-05, + "loss": 0.0016, + "step": 87650 + }, + { + "epoch": 15.63, + "learning_rate": 4.21875e-05, + "loss": 0.0031, + "step": 87660 + }, + { + "epoch": 15.63, + "learning_rate": 4.2186608416547793e-05, + "loss": 0.002, + "step": 87670 + }, + { + "epoch": 15.63, + "learning_rate": 4.218571683309558e-05, + "loss": 0.0019, + "step": 87680 + }, + { + "epoch": 15.64, + "learning_rate": 4.218482524964337e-05, + "loss": 0.0055, + "step": 87690 + }, + { + "epoch": 15.64, + "learning_rate": 4.2183933666191154e-05, + "loss": 0.0022, + "step": 87700 + }, + { + "epoch": 15.64, + "learning_rate": 4.2183042082738945e-05, + "loss": 0.0032, + "step": 87710 + }, + { + "epoch": 15.64, + "learning_rate": 4.2182150499286736e-05, + "loss": 0.0024, + "step": 87720 + }, + { + "epoch": 15.64, + "learning_rate": 4.218125891583453e-05, + "loss": 0.0026, + "step": 87730 + }, + { + "epoch": 15.65, + "learning_rate": 4.218036733238231e-05, + "loss": 0.0021, + "step": 87740 + }, + { + "epoch": 15.65, + "learning_rate": 4.21794757489301e-05, + "loss": 0.0026, + "step": 87750 + }, + { + "epoch": 15.65, + "learning_rate": 4.2178584165477894e-05, + "loss": 0.0023, + "step": 87760 + }, + { + "epoch": 15.65, + "learning_rate": 4.217769258202568e-05, + "loss": 0.0008, + "step": 87770 + }, + { + "epoch": 15.65, + "learning_rate": 4.217680099857347e-05, + "loss": 0.0019, + "step": 87780 + }, + { + "epoch": 15.65, + "learning_rate": 4.2175909415121254e-05, + "loss": 0.0026, + "step": 87790 + }, + { + "epoch": 15.66, + "learning_rate": 4.2175017831669045e-05, + "loss": 0.0035, + "step": 87800 + }, + { + "epoch": 15.66, + "learning_rate": 4.217412624821683e-05, + "loss": 0.003, + "step": 87810 + }, + { + "epoch": 15.66, + "learning_rate": 4.217323466476462e-05, + "loss": 0.0038, + "step": 87820 + }, + { + "epoch": 15.66, + "learning_rate": 4.217234308131241e-05, + "loss": 0.0023, + "step": 87830 + }, + { + "epoch": 15.66, + "learning_rate": 4.21714514978602e-05, + "loss": 0.0019, + "step": 87840 + }, + { + "epoch": 15.67, + "learning_rate": 4.2170559914407995e-05, + "loss": 0.0038, + "step": 87850 + }, + { + "epoch": 15.67, + "learning_rate": 4.216966833095578e-05, + "loss": 0.0024, + "step": 87860 + }, + { + "epoch": 15.67, + "learning_rate": 4.216877674750357e-05, + "loss": 0.0022, + "step": 87870 + }, + { + "epoch": 15.67, + "learning_rate": 4.2167885164051355e-05, + "loss": 0.006, + "step": 87880 + }, + { + "epoch": 15.67, + "learning_rate": 4.2166993580599146e-05, + "loss": 0.002, + "step": 87890 + }, + { + "epoch": 15.67, + "learning_rate": 4.216610199714694e-05, + "loss": 0.0021, + "step": 87900 + }, + { + "epoch": 15.68, + "learning_rate": 4.216521041369472e-05, + "loss": 0.0027, + "step": 87910 + }, + { + "epoch": 15.68, + "learning_rate": 4.216431883024251e-05, + "loss": 0.0022, + "step": 87920 + }, + { + "epoch": 15.68, + "learning_rate": 4.21634272467903e-05, + "loss": 0.0024, + "step": 87930 + }, + { + "epoch": 15.68, + "learning_rate": 4.2162535663338095e-05, + "loss": 0.0042, + "step": 87940 + }, + { + "epoch": 15.68, + "learning_rate": 4.216164407988588e-05, + "loss": 0.0025, + "step": 87950 + }, + { + "epoch": 15.68, + "learning_rate": 4.216075249643367e-05, + "loss": 0.0038, + "step": 87960 + }, + { + "epoch": 15.69, + "learning_rate": 4.2159860912981455e-05, + "loss": 0.0066, + "step": 87970 + }, + { + "epoch": 15.69, + "learning_rate": 4.2158969329529246e-05, + "loss": 0.0026, + "step": 87980 + }, + { + "epoch": 15.69, + "learning_rate": 4.215807774607704e-05, + "loss": 0.0013, + "step": 87990 + }, + { + "epoch": 15.69, + "learning_rate": 4.215718616262482e-05, + "loss": 0.0018, + "step": 88000 + }, + { + "epoch": 15.69, + "learning_rate": 4.215629457917261e-05, + "loss": 0.0021, + "step": 88010 + }, + { + "epoch": 15.7, + "learning_rate": 4.21554029957204e-05, + "loss": 0.0033, + "step": 88020 + }, + { + "epoch": 15.7, + "learning_rate": 4.215451141226819e-05, + "loss": 0.0029, + "step": 88030 + }, + { + "epoch": 15.7, + "learning_rate": 4.215361982881597e-05, + "loss": 0.003, + "step": 88040 + }, + { + "epoch": 15.7, + "learning_rate": 4.215272824536377e-05, + "loss": 0.0025, + "step": 88050 + }, + { + "epoch": 15.7, + "learning_rate": 4.2151836661911556e-05, + "loss": 0.0039, + "step": 88060 + }, + { + "epoch": 15.7, + "learning_rate": 4.215094507845935e-05, + "loss": 0.0039, + "step": 88070 + }, + { + "epoch": 15.71, + "learning_rate": 4.215005349500714e-05, + "loss": 0.0019, + "step": 88080 + }, + { + "epoch": 15.71, + "learning_rate": 4.214916191155492e-05, + "loss": 0.0018, + "step": 88090 + }, + { + "epoch": 15.71, + "learning_rate": 4.2148270328102714e-05, + "loss": 0.0027, + "step": 88100 + }, + { + "epoch": 15.71, + "learning_rate": 4.21473787446505e-05, + "loss": 0.0034, + "step": 88110 + }, + { + "epoch": 15.71, + "learning_rate": 4.214648716119829e-05, + "loss": 0.0013, + "step": 88120 + }, + { + "epoch": 15.72, + "learning_rate": 4.214559557774608e-05, + "loss": 0.0033, + "step": 88130 + }, + { + "epoch": 15.72, + "learning_rate": 4.2144703994293865e-05, + "loss": 0.0036, + "step": 88140 + }, + { + "epoch": 15.72, + "learning_rate": 4.2143812410841656e-05, + "loss": 0.0043, + "step": 88150 + }, + { + "epoch": 15.72, + "learning_rate": 4.214292082738945e-05, + "loss": 0.003, + "step": 88160 + }, + { + "epoch": 15.72, + "learning_rate": 4.214202924393724e-05, + "loss": 0.003, + "step": 88170 + }, + { + "epoch": 15.72, + "learning_rate": 4.214113766048502e-05, + "loss": 0.004, + "step": 88180 + }, + { + "epoch": 15.73, + "learning_rate": 4.2140246077032814e-05, + "loss": 0.0026, + "step": 88190 + }, + { + "epoch": 15.73, + "learning_rate": 4.21393544935806e-05, + "loss": 0.0019, + "step": 88200 + }, + { + "epoch": 15.73, + "learning_rate": 4.213846291012839e-05, + "loss": 0.0023, + "step": 88210 + }, + { + "epoch": 15.73, + "learning_rate": 4.213757132667618e-05, + "loss": 0.0027, + "step": 88220 + }, + { + "epoch": 15.73, + "learning_rate": 4.2136679743223966e-05, + "loss": 0.0033, + "step": 88230 + }, + { + "epoch": 15.73, + "learning_rate": 4.213578815977176e-05, + "loss": 0.0053, + "step": 88240 + }, + { + "epoch": 15.74, + "learning_rate": 4.213489657631954e-05, + "loss": 0.0024, + "step": 88250 + }, + { + "epoch": 15.74, + "learning_rate": 4.213400499286733e-05, + "loss": 0.0043, + "step": 88260 + }, + { + "epoch": 15.74, + "learning_rate": 4.2133113409415124e-05, + "loss": 0.0035, + "step": 88270 + }, + { + "epoch": 15.74, + "learning_rate": 4.2132221825962915e-05, + "loss": 0.0034, + "step": 88280 + }, + { + "epoch": 15.74, + "learning_rate": 4.21313302425107e-05, + "loss": 0.0022, + "step": 88290 + }, + { + "epoch": 15.75, + "learning_rate": 4.213043865905849e-05, + "loss": 0.0034, + "step": 88300 + }, + { + "epoch": 15.75, + "learning_rate": 4.212954707560628e-05, + "loss": 0.0043, + "step": 88310 + }, + { + "epoch": 15.75, + "learning_rate": 4.2128655492154066e-05, + "loss": 0.0032, + "step": 88320 + }, + { + "epoch": 15.75, + "learning_rate": 4.212776390870186e-05, + "loss": 0.0021, + "step": 88330 + }, + { + "epoch": 15.75, + "learning_rate": 4.212687232524964e-05, + "loss": 0.002, + "step": 88340 + }, + { + "epoch": 15.75, + "learning_rate": 4.212598074179743e-05, + "loss": 0.0055, + "step": 88350 + }, + { + "epoch": 15.76, + "learning_rate": 4.2125089158345224e-05, + "loss": 0.0021, + "step": 88360 + }, + { + "epoch": 15.76, + "learning_rate": 4.212419757489301e-05, + "loss": 0.0032, + "step": 88370 + }, + { + "epoch": 15.76, + "learning_rate": 4.212330599144081e-05, + "loss": 0.0028, + "step": 88380 + }, + { + "epoch": 15.76, + "learning_rate": 4.212241440798859e-05, + "loss": 0.0042, + "step": 88390 + }, + { + "epoch": 15.76, + "learning_rate": 4.212152282453638e-05, + "loss": 0.0026, + "step": 88400 + }, + { + "epoch": 15.76, + "learning_rate": 4.212063124108417e-05, + "loss": 0.0025, + "step": 88410 + }, + { + "epoch": 15.77, + "learning_rate": 4.211973965763196e-05, + "loss": 0.003, + "step": 88420 + }, + { + "epoch": 15.77, + "learning_rate": 4.211884807417974e-05, + "loss": 0.0046, + "step": 88430 + }, + { + "epoch": 15.77, + "learning_rate": 4.2117956490727534e-05, + "loss": 0.0015, + "step": 88440 + }, + { + "epoch": 15.77, + "learning_rate": 4.2117064907275325e-05, + "loss": 0.0016, + "step": 88450 + }, + { + "epoch": 15.77, + "learning_rate": 4.211617332382311e-05, + "loss": 0.0043, + "step": 88460 + }, + { + "epoch": 15.78, + "learning_rate": 4.2115370898716124e-05, + "loss": 0.0037, + "step": 88470 + }, + { + "epoch": 15.78, + "learning_rate": 4.211447931526391e-05, + "loss": 0.0024, + "step": 88480 + }, + { + "epoch": 15.78, + "learning_rate": 4.21135877318117e-05, + "loss": 0.0023, + "step": 88490 + }, + { + "epoch": 15.78, + "learning_rate": 4.2112696148359484e-05, + "loss": 0.003, + "step": 88500 + }, + { + "epoch": 15.78, + "learning_rate": 4.2111804564907275e-05, + "loss": 0.0026, + "step": 88510 + }, + { + "epoch": 15.78, + "learning_rate": 4.2110912981455067e-05, + "loss": 0.0022, + "step": 88520 + }, + { + "epoch": 15.79, + "learning_rate": 4.211002139800285e-05, + "loss": 0.0023, + "step": 88530 + }, + { + "epoch": 15.79, + "learning_rate": 4.210912981455065e-05, + "loss": 0.003, + "step": 88540 + }, + { + "epoch": 15.79, + "learning_rate": 4.2108238231098433e-05, + "loss": 0.0021, + "step": 88550 + }, + { + "epoch": 15.79, + "learning_rate": 4.2107346647646225e-05, + "loss": 0.002, + "step": 88560 + }, + { + "epoch": 15.79, + "learning_rate": 4.210645506419401e-05, + "loss": 0.0015, + "step": 88570 + }, + { + "epoch": 15.8, + "learning_rate": 4.21055634807418e-05, + "loss": 0.0015, + "step": 88580 + }, + { + "epoch": 15.8, + "learning_rate": 4.210467189728959e-05, + "loss": 0.0024, + "step": 88590 + }, + { + "epoch": 15.8, + "learning_rate": 4.2103780313837376e-05, + "loss": 0.0018, + "step": 88600 + }, + { + "epoch": 15.8, + "learning_rate": 4.210288873038517e-05, + "loss": 0.0022, + "step": 88610 + }, + { + "epoch": 15.8, + "learning_rate": 4.210199714693295e-05, + "loss": 0.0045, + "step": 88620 + }, + { + "epoch": 15.8, + "learning_rate": 4.210110556348074e-05, + "loss": 0.0022, + "step": 88630 + }, + { + "epoch": 15.81, + "learning_rate": 4.210021398002853e-05, + "loss": 0.004, + "step": 88640 + }, + { + "epoch": 15.81, + "learning_rate": 4.2099322396576325e-05, + "loss": 0.003, + "step": 88650 + }, + { + "epoch": 15.81, + "learning_rate": 4.209843081312411e-05, + "loss": 0.0034, + "step": 88660 + }, + { + "epoch": 15.81, + "learning_rate": 4.20975392296719e-05, + "loss": 0.0025, + "step": 88670 + }, + { + "epoch": 15.81, + "learning_rate": 4.209664764621969e-05, + "loss": 0.0048, + "step": 88680 + }, + { + "epoch": 15.81, + "learning_rate": 4.2095756062767477e-05, + "loss": 0.0023, + "step": 88690 + }, + { + "epoch": 15.82, + "learning_rate": 4.209486447931527e-05, + "loss": 0.0022, + "step": 88700 + }, + { + "epoch": 15.82, + "learning_rate": 4.209397289586305e-05, + "loss": 0.0031, + "step": 88710 + }, + { + "epoch": 15.82, + "learning_rate": 4.209308131241084e-05, + "loss": 0.0033, + "step": 88720 + }, + { + "epoch": 15.82, + "learning_rate": 4.209218972895863e-05, + "loss": 0.0032, + "step": 88730 + }, + { + "epoch": 15.82, + "learning_rate": 4.209129814550642e-05, + "loss": 0.0015, + "step": 88740 + }, + { + "epoch": 15.83, + "learning_rate": 4.209040656205421e-05, + "loss": 0.0036, + "step": 88750 + }, + { + "epoch": 15.83, + "learning_rate": 4.2089514978602e-05, + "loss": 0.0059, + "step": 88760 + }, + { + "epoch": 15.83, + "learning_rate": 4.208862339514979e-05, + "loss": 0.0028, + "step": 88770 + }, + { + "epoch": 15.83, + "learning_rate": 4.208773181169758e-05, + "loss": 0.004, + "step": 88780 + }, + { + "epoch": 15.83, + "learning_rate": 4.208684022824537e-05, + "loss": 0.0037, + "step": 88790 + }, + { + "epoch": 15.83, + "learning_rate": 4.208594864479315e-05, + "loss": 0.0026, + "step": 88800 + }, + { + "epoch": 15.84, + "learning_rate": 4.2085057061340944e-05, + "loss": 0.0023, + "step": 88810 + }, + { + "epoch": 15.84, + "learning_rate": 4.2084165477888735e-05, + "loss": 0.0051, + "step": 88820 + }, + { + "epoch": 15.84, + "learning_rate": 4.208327389443652e-05, + "loss": 0.0018, + "step": 88830 + }, + { + "epoch": 15.84, + "learning_rate": 4.208238231098431e-05, + "loss": 0.0044, + "step": 88840 + }, + { + "epoch": 15.84, + "learning_rate": 4.2081490727532095e-05, + "loss": 0.0032, + "step": 88850 + }, + { + "epoch": 15.85, + "learning_rate": 4.2080599144079886e-05, + "loss": 0.0025, + "step": 88860 + }, + { + "epoch": 15.85, + "learning_rate": 4.207970756062768e-05, + "loss": 0.0016, + "step": 88870 + }, + { + "epoch": 15.85, + "learning_rate": 4.207881597717547e-05, + "loss": 0.0031, + "step": 88880 + }, + { + "epoch": 15.85, + "learning_rate": 4.207792439372325e-05, + "loss": 0.0031, + "step": 88890 + }, + { + "epoch": 15.85, + "learning_rate": 4.2077032810271044e-05, + "loss": 0.0028, + "step": 88900 + }, + { + "epoch": 15.85, + "learning_rate": 4.2076141226818836e-05, + "loss": 0.0027, + "step": 88910 + }, + { + "epoch": 15.86, + "learning_rate": 4.207524964336662e-05, + "loss": 0.0031, + "step": 88920 + }, + { + "epoch": 15.86, + "learning_rate": 4.207435805991441e-05, + "loss": 0.0021, + "step": 88930 + }, + { + "epoch": 15.86, + "learning_rate": 4.2073466476462196e-05, + "loss": 0.0024, + "step": 88940 + }, + { + "epoch": 15.86, + "learning_rate": 4.207257489300999e-05, + "loss": 0.0037, + "step": 88950 + }, + { + "epoch": 15.86, + "learning_rate": 4.207168330955777e-05, + "loss": 0.0028, + "step": 88960 + }, + { + "epoch": 15.86, + "learning_rate": 4.207079172610556e-05, + "loss": 0.0039, + "step": 88970 + }, + { + "epoch": 15.87, + "learning_rate": 4.2069900142653354e-05, + "loss": 0.002, + "step": 88980 + }, + { + "epoch": 15.87, + "learning_rate": 4.2069008559201145e-05, + "loss": 0.0045, + "step": 88990 + }, + { + "epoch": 15.87, + "learning_rate": 4.2068116975748936e-05, + "loss": 0.0038, + "step": 89000 + }, + { + "epoch": 15.87, + "learning_rate": 4.206722539229672e-05, + "loss": 0.0045, + "step": 89010 + }, + { + "epoch": 15.87, + "learning_rate": 4.206633380884451e-05, + "loss": 0.0014, + "step": 89020 + }, + { + "epoch": 15.88, + "learning_rate": 4.2065442225392296e-05, + "loss": 0.0019, + "step": 89030 + }, + { + "epoch": 15.88, + "learning_rate": 4.206455064194009e-05, + "loss": 0.0036, + "step": 89040 + }, + { + "epoch": 15.88, + "learning_rate": 4.206365905848787e-05, + "loss": 0.0053, + "step": 89050 + }, + { + "epoch": 15.88, + "learning_rate": 4.206276747503566e-05, + "loss": 0.0025, + "step": 89060 + }, + { + "epoch": 15.88, + "learning_rate": 4.2061875891583454e-05, + "loss": 0.003, + "step": 89070 + }, + { + "epoch": 15.88, + "learning_rate": 4.206098430813124e-05, + "loss": 0.0017, + "step": 89080 + }, + { + "epoch": 15.89, + "learning_rate": 4.206009272467904e-05, + "loss": 0.0023, + "step": 89090 + }, + { + "epoch": 15.89, + "learning_rate": 4.205920114122682e-05, + "loss": 0.0027, + "step": 89100 + }, + { + "epoch": 15.89, + "learning_rate": 4.205830955777461e-05, + "loss": 0.0022, + "step": 89110 + }, + { + "epoch": 15.89, + "learning_rate": 4.20574179743224e-05, + "loss": 0.0033, + "step": 89120 + }, + { + "epoch": 15.89, + "learning_rate": 4.205652639087019e-05, + "loss": 0.0029, + "step": 89130 + }, + { + "epoch": 15.9, + "learning_rate": 4.205563480741798e-05, + "loss": 0.0036, + "step": 89140 + }, + { + "epoch": 15.9, + "learning_rate": 4.2054743223965764e-05, + "loss": 0.0024, + "step": 89150 + }, + { + "epoch": 15.9, + "learning_rate": 4.2053851640513555e-05, + "loss": 0.0019, + "step": 89160 + }, + { + "epoch": 15.9, + "learning_rate": 4.205296005706134e-05, + "loss": 0.0014, + "step": 89170 + }, + { + "epoch": 15.9, + "learning_rate": 4.205206847360913e-05, + "loss": 0.0015, + "step": 89180 + }, + { + "epoch": 15.9, + "learning_rate": 4.2051176890156915e-05, + "loss": 0.0026, + "step": 89190 + }, + { + "epoch": 15.91, + "learning_rate": 4.205028530670471e-05, + "loss": 0.0021, + "step": 89200 + }, + { + "epoch": 15.91, + "learning_rate": 4.20493937232525e-05, + "loss": 0.004, + "step": 89210 + }, + { + "epoch": 15.91, + "learning_rate": 4.2048591298145505e-05, + "loss": 0.0052, + "step": 89220 + }, + { + "epoch": 15.91, + "learning_rate": 4.20476997146933e-05, + "loss": 0.0025, + "step": 89230 + }, + { + "epoch": 15.91, + "learning_rate": 4.204680813124109e-05, + "loss": 0.0024, + "step": 89240 + }, + { + "epoch": 15.91, + "learning_rate": 4.204591654778888e-05, + "loss": 0.003, + "step": 89250 + }, + { + "epoch": 15.92, + "learning_rate": 4.2045024964336664e-05, + "loss": 0.0035, + "step": 89260 + }, + { + "epoch": 15.92, + "learning_rate": 4.2044133380884455e-05, + "loss": 0.0026, + "step": 89270 + }, + { + "epoch": 15.92, + "learning_rate": 4.204324179743224e-05, + "loss": 0.0021, + "step": 89280 + }, + { + "epoch": 15.92, + "learning_rate": 4.204235021398003e-05, + "loss": 0.0034, + "step": 89290 + }, + { + "epoch": 15.92, + "learning_rate": 4.204145863052782e-05, + "loss": 0.0062, + "step": 89300 + }, + { + "epoch": 15.93, + "learning_rate": 4.2040567047075606e-05, + "loss": 0.0016, + "step": 89310 + }, + { + "epoch": 15.93, + "learning_rate": 4.20396754636234e-05, + "loss": 0.0038, + "step": 89320 + }, + { + "epoch": 15.93, + "learning_rate": 4.203878388017118e-05, + "loss": 0.0026, + "step": 89330 + }, + { + "epoch": 15.93, + "learning_rate": 4.203789229671897e-05, + "loss": 0.0019, + "step": 89340 + }, + { + "epoch": 15.93, + "learning_rate": 4.2037000713266764e-05, + "loss": 0.0021, + "step": 89350 + }, + { + "epoch": 15.93, + "learning_rate": 4.2036109129814555e-05, + "loss": 0.0029, + "step": 89360 + }, + { + "epoch": 15.94, + "learning_rate": 4.2035217546362347e-05, + "loss": 0.0021, + "step": 89370 + }, + { + "epoch": 15.94, + "learning_rate": 4.203432596291013e-05, + "loss": 0.0021, + "step": 89380 + }, + { + "epoch": 15.94, + "learning_rate": 4.203343437945792e-05, + "loss": 0.0031, + "step": 89390 + }, + { + "epoch": 15.94, + "learning_rate": 4.2032542796005707e-05, + "loss": 0.002, + "step": 89400 + }, + { + "epoch": 15.94, + "learning_rate": 4.20316512125535e-05, + "loss": 0.0022, + "step": 89410 + }, + { + "epoch": 15.95, + "learning_rate": 4.203075962910128e-05, + "loss": 0.0041, + "step": 89420 + }, + { + "epoch": 15.95, + "learning_rate": 4.2029868045649073e-05, + "loss": 0.0045, + "step": 89430 + }, + { + "epoch": 15.95, + "learning_rate": 4.2028976462196865e-05, + "loss": 0.0032, + "step": 89440 + }, + { + "epoch": 15.95, + "learning_rate": 4.202808487874465e-05, + "loss": 0.0036, + "step": 89450 + }, + { + "epoch": 15.95, + "learning_rate": 4.202719329529245e-05, + "loss": 0.002, + "step": 89460 + }, + { + "epoch": 15.95, + "learning_rate": 4.202630171184023e-05, + "loss": 0.0028, + "step": 89470 + }, + { + "epoch": 15.96, + "learning_rate": 4.202541012838802e-05, + "loss": 0.003, + "step": 89480 + }, + { + "epoch": 15.96, + "learning_rate": 4.202451854493581e-05, + "loss": 0.0035, + "step": 89490 + }, + { + "epoch": 15.96, + "learning_rate": 4.20236269614836e-05, + "loss": 0.0014, + "step": 89500 + }, + { + "epoch": 15.96, + "learning_rate": 4.202273537803138e-05, + "loss": 0.0022, + "step": 89510 + }, + { + "epoch": 15.96, + "learning_rate": 4.2021843794579174e-05, + "loss": 0.0046, + "step": 89520 + }, + { + "epoch": 15.96, + "learning_rate": 4.2020952211126965e-05, + "loss": 0.0021, + "step": 89530 + }, + { + "epoch": 15.97, + "learning_rate": 4.202006062767475e-05, + "loss": 0.003, + "step": 89540 + }, + { + "epoch": 15.97, + "learning_rate": 4.201916904422254e-05, + "loss": 0.0008, + "step": 89550 + }, + { + "epoch": 15.97, + "learning_rate": 4.2018277460770325e-05, + "loss": 0.0014, + "step": 89560 + }, + { + "epoch": 15.97, + "learning_rate": 4.201738587731812e-05, + "loss": 0.0031, + "step": 89570 + }, + { + "epoch": 15.97, + "learning_rate": 4.201649429386591e-05, + "loss": 0.0028, + "step": 89580 + }, + { + "epoch": 15.98, + "learning_rate": 4.20156027104137e-05, + "loss": 0.0039, + "step": 89590 + }, + { + "epoch": 15.98, + "learning_rate": 4.201471112696149e-05, + "loss": 0.0061, + "step": 89600 + }, + { + "epoch": 15.98, + "learning_rate": 4.2013819543509275e-05, + "loss": 0.0016, + "step": 89610 + }, + { + "epoch": 15.98, + "learning_rate": 4.2012927960057066e-05, + "loss": 0.0028, + "step": 89620 + }, + { + "epoch": 15.98, + "learning_rate": 4.201203637660485e-05, + "loss": 0.0025, + "step": 89630 + }, + { + "epoch": 15.98, + "learning_rate": 4.201114479315264e-05, + "loss": 0.0018, + "step": 89640 + }, + { + "epoch": 15.99, + "learning_rate": 4.2010253209700426e-05, + "loss": 0.0018, + "step": 89650 + }, + { + "epoch": 15.99, + "learning_rate": 4.200936162624822e-05, + "loss": 0.0017, + "step": 89660 + }, + { + "epoch": 15.99, + "learning_rate": 4.200847004279601e-05, + "loss": 0.0019, + "step": 89670 + }, + { + "epoch": 15.99, + "learning_rate": 4.20075784593438e-05, + "loss": 0.0048, + "step": 89680 + }, + { + "epoch": 15.99, + "learning_rate": 4.200668687589159e-05, + "loss": 0.0029, + "step": 89690 + }, + { + "epoch": 16.0, + "learning_rate": 4.2005795292439375e-05, + "loss": 0.0022, + "step": 89700 + }, + { + "epoch": 16.0, + "learning_rate": 4.2004903708987166e-05, + "loss": 0.0031, + "step": 89710 + }, + { + "epoch": 16.0, + "learning_rate": 4.200401212553495e-05, + "loss": 0.001, + "step": 89720 + }, + { + "epoch": 16.0, + "eval_loss": 0.020624889060854912, + "eval_runtime": 195.8925, + "eval_samples_per_second": 23.681, + "eval_steps_per_second": 2.961, + "step": 89728 + }, + { + "epoch": 16.0, + "step": 89728, + "total_flos": 8.436933281646816e+19, + "train_loss": 0.006225432415173189, + "train_runtime": 115573.2859, + "train_samples_per_second": 38.819, + "train_steps_per_second": 4.852 + } + ], + "max_steps": 560800, + "num_train_epochs": 100, + "total_flos": 8.436933281646816e+19, + "trial_name": null, + "trial_params": null +}