diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,19821 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.97789367628756, + "eval_steps": 500, + "global_step": 33000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.998518342914716e-05, + "loss": 1.1436, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9970366858294324e-05, + "loss": 1.0946, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9955550287441474e-05, + "loss": 1.0475, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.994073371658863e-05, + "loss": 1.2085, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.9925917145735795e-05, + "loss": 1.0201, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.991110057488295e-05, + "loss": 1.1401, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.989628400403011e-05, + "loss": 1.2025, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 4.988146743317727e-05, + "loss": 1.0663, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 4.9866650862324424e-05, + "loss": 1.0889, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 4.985183429147158e-05, + "loss": 1.1078, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 4.9837017720618746e-05, + "loss": 1.0007, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 4.98222011497659e-05, + "loss": 1.1888, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 4.980738457891306e-05, + "loss": 1.1755, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 4.979256800806022e-05, + "loss": 1.1285, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 4.9777751437207374e-05, + "loss": 1.0683, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 4.976293486635453e-05, + "loss": 1.0682, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.974811829550169e-05, + "loss": 1.0443, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.973330172464885e-05, + "loss": 1.0094, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.971848515379601e-05, + "loss": 1.0373, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.970366858294316e-05, + "loss": 1.0986, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9688852012090325e-05, + "loss": 1.0402, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 4.967403544123748e-05, + "loss": 1.1408, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 4.965921887038464e-05, + "loss": 1.1502, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 4.96444022995318e-05, + "loss": 1.1159, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 4.9629585728678954e-05, + "loss": 1.0239, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 4.961476915782611e-05, + "loss": 1.1654, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 4.9599952586973275e-05, + "loss": 1.1863, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 4.958513601612043e-05, + "loss": 1.0276, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 4.957031944526759e-05, + "loss": 1.1198, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 4.9555502874414746e-05, + "loss": 1.0463, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 4.9540686303561904e-05, + "loss": 0.966, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 4.952586973270906e-05, + "loss": 0.9627, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 4.9511053161856225e-05, + "loss": 1.1312, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 4.949623659100338e-05, + "loss": 0.9223, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 4.948142002015054e-05, + "loss": 1.1389, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 4.94666034492977e-05, + "loss": 0.8973, + "step": 360 + }, + { + "epoch": 0.01, + "learning_rate": 4.9451786878444854e-05, + "loss": 1.0691, + "step": 370 + }, + { + "epoch": 0.01, + "learning_rate": 4.943697030759201e-05, + "loss": 1.099, + "step": 380 + }, + { + "epoch": 0.01, + "learning_rate": 4.9422153736739175e-05, + "loss": 1.0559, + "step": 390 + }, + { + "epoch": 0.01, + "learning_rate": 4.940733716588633e-05, + "loss": 0.9345, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 4.939252059503349e-05, + "loss": 1.1638, + "step": 410 + }, + { + "epoch": 0.01, + "learning_rate": 4.937770402418064e-05, + "loss": 1.0137, + "step": 420 + }, + { + "epoch": 0.01, + "learning_rate": 4.9362887453327804e-05, + "loss": 1.1298, + "step": 430 + }, + { + "epoch": 0.01, + "learning_rate": 4.934807088247496e-05, + "loss": 1.297, + "step": 440 + }, + { + "epoch": 0.01, + "learning_rate": 4.933325431162212e-05, + "loss": 1.1458, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 4.931843774076928e-05, + "loss": 1.2104, + "step": 460 + }, + { + "epoch": 0.01, + "learning_rate": 4.930362116991643e-05, + "loss": 1.0494, + "step": 470 + }, + { + "epoch": 0.01, + "learning_rate": 4.928880459906359e-05, + "loss": 1.0527, + "step": 480 + }, + { + "epoch": 0.01, + "learning_rate": 4.9273988028210754e-05, + "loss": 0.9439, + "step": 490 + }, + { + "epoch": 0.01, + "learning_rate": 4.925917145735791e-05, + "loss": 1.1118, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 4.924435488650507e-05, + "loss": 1.1182, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 4.922953831565223e-05, + "loss": 1.0818, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 4.921472174479938e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 4.919990517394654e-05, + "loss": 1.1353, + "step": 540 + }, + { + "epoch": 0.02, + "learning_rate": 4.9185088603093704e-05, + "loss": 0.9229, + "step": 550 + }, + { + "epoch": 0.02, + "learning_rate": 4.917027203224086e-05, + "loss": 0.9945, + "step": 560 + }, + { + "epoch": 0.02, + "learning_rate": 4.915545546138802e-05, + "loss": 1.0377, + "step": 570 + }, + { + "epoch": 0.02, + "learning_rate": 4.9140638890535176e-05, + "loss": 0.9988, + "step": 580 + }, + { + "epoch": 0.02, + "learning_rate": 4.912582231968233e-05, + "loss": 1.0964, + "step": 590 + }, + { + "epoch": 0.02, + "learning_rate": 4.911100574882949e-05, + "loss": 1.0607, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 4.9096189177976655e-05, + "loss": 1.2273, + "step": 610 + }, + { + "epoch": 0.02, + "learning_rate": 4.908137260712381e-05, + "loss": 1.2177, + "step": 620 + }, + { + "epoch": 0.02, + "learning_rate": 4.906655603627097e-05, + "loss": 0.9447, + "step": 630 + }, + { + "epoch": 0.02, + "learning_rate": 4.9051739465418126e-05, + "loss": 1.0235, + "step": 640 + }, + { + "epoch": 0.02, + "learning_rate": 4.9036922894565283e-05, + "loss": 0.8942, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 4.902210632371244e-05, + "loss": 1.1545, + "step": 660 + }, + { + "epoch": 0.02, + "learning_rate": 4.9007289752859605e-05, + "loss": 0.9827, + "step": 670 + }, + { + "epoch": 0.02, + "learning_rate": 4.899247318200676e-05, + "loss": 1.004, + "step": 680 + }, + { + "epoch": 0.02, + "learning_rate": 4.897765661115392e-05, + "loss": 1.1165, + "step": 690 + }, + { + "epoch": 0.02, + "learning_rate": 4.896284004030107e-05, + "loss": 0.9904, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 4.8948023469448234e-05, + "loss": 1.0665, + "step": 710 + }, + { + "epoch": 0.02, + "learning_rate": 4.893320689859539e-05, + "loss": 0.9563, + "step": 720 + }, + { + "epoch": 0.02, + "learning_rate": 4.891839032774255e-05, + "loss": 1.2268, + "step": 730 + }, + { + "epoch": 0.02, + "learning_rate": 4.890357375688971e-05, + "loss": 1.0352, + "step": 740 + }, + { + "epoch": 0.02, + "learning_rate": 4.888875718603686e-05, + "loss": 1.0372, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 4.887394061518402e-05, + "loss": 0.9848, + "step": 760 + }, + { + "epoch": 0.02, + "learning_rate": 4.8859124044331184e-05, + "loss": 1.0746, + "step": 770 + }, + { + "epoch": 0.02, + "learning_rate": 4.884430747347834e-05, + "loss": 1.1633, + "step": 780 + }, + { + "epoch": 0.02, + "learning_rate": 4.88294909026255e-05, + "loss": 1.1765, + "step": 790 + }, + { + "epoch": 0.02, + "learning_rate": 4.8814674331772656e-05, + "loss": 0.9531, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 4.879985776091981e-05, + "loss": 0.9954, + "step": 810 + }, + { + "epoch": 0.02, + "learning_rate": 4.878504119006697e-05, + "loss": 0.9633, + "step": 820 + }, + { + "epoch": 0.02, + "learning_rate": 4.8770224619214134e-05, + "loss": 1.2382, + "step": 830 + }, + { + "epoch": 0.02, + "learning_rate": 4.875540804836129e-05, + "loss": 1.005, + "step": 840 + }, + { + "epoch": 0.03, + "learning_rate": 4.874059147750845e-05, + "loss": 1.0923, + "step": 850 + }, + { + "epoch": 0.03, + "learning_rate": 4.8725774906655606e-05, + "loss": 0.9299, + "step": 860 + }, + { + "epoch": 0.03, + "learning_rate": 4.871095833580276e-05, + "loss": 1.0834, + "step": 870 + }, + { + "epoch": 0.03, + "learning_rate": 4.869614176494992e-05, + "loss": 0.9882, + "step": 880 + }, + { + "epoch": 0.03, + "learning_rate": 4.8681325194097084e-05, + "loss": 1.0145, + "step": 890 + }, + { + "epoch": 0.03, + "learning_rate": 4.866650862324424e-05, + "loss": 1.0234, + "step": 900 + }, + { + "epoch": 0.03, + "learning_rate": 4.86516920523914e-05, + "loss": 1.1596, + "step": 910 + }, + { + "epoch": 0.03, + "learning_rate": 4.8636875481538556e-05, + "loss": 1.1069, + "step": 920 + }, + { + "epoch": 0.03, + "learning_rate": 4.862205891068571e-05, + "loss": 1.012, + "step": 930 + }, + { + "epoch": 0.03, + "learning_rate": 4.860724233983287e-05, + "loss": 1.0147, + "step": 940 + }, + { + "epoch": 0.03, + "learning_rate": 4.8592425768980034e-05, + "loss": 1.0027, + "step": 950 + }, + { + "epoch": 0.03, + "learning_rate": 4.857760919812719e-05, + "loss": 1.0529, + "step": 960 + }, + { + "epoch": 0.03, + "learning_rate": 4.856279262727434e-05, + "loss": 1.0556, + "step": 970 + }, + { + "epoch": 0.03, + "learning_rate": 4.85479760564215e-05, + "loss": 0.8887, + "step": 980 + }, + { + "epoch": 0.03, + "learning_rate": 4.853315948556866e-05, + "loss": 1.1216, + "step": 990 + }, + { + "epoch": 0.03, + "learning_rate": 4.851834291471582e-05, + "loss": 1.0372, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 4.850352634386298e-05, + "loss": 1.0452, + "step": 1010 + }, + { + "epoch": 0.03, + "learning_rate": 4.8488709773010135e-05, + "loss": 1.0786, + "step": 1020 + }, + { + "epoch": 0.03, + "learning_rate": 4.847389320215729e-05, + "loss": 1.0466, + "step": 1030 + }, + { + "epoch": 0.03, + "learning_rate": 4.845907663130445e-05, + "loss": 1.161, + "step": 1040 + }, + { + "epoch": 0.03, + "learning_rate": 4.8444260060451613e-05, + "loss": 1.0792, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 4.842944348959877e-05, + "loss": 1.1396, + "step": 1060 + }, + { + "epoch": 0.03, + "learning_rate": 4.841462691874593e-05, + "loss": 1.0736, + "step": 1070 + }, + { + "epoch": 0.03, + "learning_rate": 4.8399810347893085e-05, + "loss": 1.104, + "step": 1080 + }, + { + "epoch": 0.03, + "learning_rate": 4.838499377704024e-05, + "loss": 0.948, + "step": 1090 + }, + { + "epoch": 0.03, + "learning_rate": 4.83701772061874e-05, + "loss": 0.965, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 4.8355360635334564e-05, + "loss": 1.1474, + "step": 1110 + }, + { + "epoch": 0.03, + "learning_rate": 4.834054406448172e-05, + "loss": 1.0499, + "step": 1120 + }, + { + "epoch": 0.03, + "learning_rate": 4.832572749362888e-05, + "loss": 1.0691, + "step": 1130 + }, + { + "epoch": 0.03, + "learning_rate": 4.8310910922776035e-05, + "loss": 1.1725, + "step": 1140 + }, + { + "epoch": 0.03, + "learning_rate": 4.829609435192319e-05, + "loss": 1.0933, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 4.828127778107035e-05, + "loss": 1.1244, + "step": 1160 + }, + { + "epoch": 0.03, + "learning_rate": 4.8266461210217514e-05, + "loss": 1.0848, + "step": 1170 + }, + { + "epoch": 0.03, + "learning_rate": 4.825164463936467e-05, + "loss": 1.2447, + "step": 1180 + }, + { + "epoch": 0.04, + "learning_rate": 4.823682806851182e-05, + "loss": 1.0453, + "step": 1190 + }, + { + "epoch": 0.04, + "learning_rate": 4.8222011497658985e-05, + "loss": 1.0077, + "step": 1200 + }, + { + "epoch": 0.04, + "learning_rate": 4.820719492680614e-05, + "loss": 1.1749, + "step": 1210 + }, + { + "epoch": 0.04, + "learning_rate": 4.81923783559533e-05, + "loss": 1.021, + "step": 1220 + }, + { + "epoch": 0.04, + "learning_rate": 4.8177561785100464e-05, + "loss": 1.1465, + "step": 1230 + }, + { + "epoch": 0.04, + "learning_rate": 4.8162745214247614e-05, + "loss": 1.0847, + "step": 1240 + }, + { + "epoch": 0.04, + "learning_rate": 4.814792864339477e-05, + "loss": 1.0417, + "step": 1250 + }, + { + "epoch": 0.04, + "learning_rate": 4.813311207254193e-05, + "loss": 0.9796, + "step": 1260 + }, + { + "epoch": 0.04, + "learning_rate": 4.811829550168909e-05, + "loss": 1.148, + "step": 1270 + }, + { + "epoch": 0.04, + "learning_rate": 4.810347893083625e-05, + "loss": 1.0537, + "step": 1280 + }, + { + "epoch": 0.04, + "learning_rate": 4.808866235998341e-05, + "loss": 1.0994, + "step": 1290 + }, + { + "epoch": 0.04, + "learning_rate": 4.8073845789130565e-05, + "loss": 0.9678, + "step": 1300 + }, + { + "epoch": 0.04, + "learning_rate": 4.805902921827772e-05, + "loss": 1.0571, + "step": 1310 + }, + { + "epoch": 0.04, + "learning_rate": 4.804421264742488e-05, + "loss": 1.0449, + "step": 1320 + }, + { + "epoch": 0.04, + "learning_rate": 4.802939607657204e-05, + "loss": 1.0071, + "step": 1330 + }, + { + "epoch": 0.04, + "learning_rate": 4.80145795057192e-05, + "loss": 0.8959, + "step": 1340 + }, + { + "epoch": 0.04, + "learning_rate": 4.799976293486636e-05, + "loss": 1.0767, + "step": 1350 + }, + { + "epoch": 0.04, + "learning_rate": 4.7984946364013515e-05, + "loss": 0.9891, + "step": 1360 + }, + { + "epoch": 0.04, + "learning_rate": 4.797012979316067e-05, + "loss": 1.0674, + "step": 1370 + }, + { + "epoch": 0.04, + "learning_rate": 4.795531322230783e-05, + "loss": 1.0322, + "step": 1380 + }, + { + "epoch": 0.04, + "learning_rate": 4.794049665145499e-05, + "loss": 1.0522, + "step": 1390 + }, + { + "epoch": 0.04, + "learning_rate": 4.792568008060215e-05, + "loss": 1.2132, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 4.79108635097493e-05, + "loss": 1.1291, + "step": 1410 + }, + { + "epoch": 0.04, + "learning_rate": 4.7896046938896465e-05, + "loss": 1.0099, + "step": 1420 + }, + { + "epoch": 0.04, + "learning_rate": 4.788123036804362e-05, + "loss": 1.029, + "step": 1430 + }, + { + "epoch": 0.04, + "learning_rate": 4.786641379719078e-05, + "loss": 0.9722, + "step": 1440 + }, + { + "epoch": 0.04, + "learning_rate": 4.7851597226337943e-05, + "loss": 1.023, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 4.7836780655485094e-05, + "loss": 1.0579, + "step": 1460 + }, + { + "epoch": 0.04, + "learning_rate": 4.782196408463225e-05, + "loss": 1.0452, + "step": 1470 + }, + { + "epoch": 0.04, + "learning_rate": 4.7807147513779415e-05, + "loss": 1.2105, + "step": 1480 + }, + { + "epoch": 0.04, + "learning_rate": 4.779233094292657e-05, + "loss": 1.0491, + "step": 1490 + }, + { + "epoch": 0.04, + "learning_rate": 4.777751437207373e-05, + "loss": 0.9678, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 4.7762697801220894e-05, + "loss": 0.9197, + "step": 1510 + }, + { + "epoch": 0.05, + "learning_rate": 4.7747881230368044e-05, + "loss": 0.9753, + "step": 1520 + }, + { + "epoch": 0.05, + "learning_rate": 4.77330646595152e-05, + "loss": 1.0212, + "step": 1530 + }, + { + "epoch": 0.05, + "learning_rate": 4.771824808866236e-05, + "loss": 1.1102, + "step": 1540 + }, + { + "epoch": 0.05, + "learning_rate": 4.770343151780952e-05, + "loss": 1.0254, + "step": 1550 + }, + { + "epoch": 0.05, + "learning_rate": 4.768861494695668e-05, + "loss": 1.1006, + "step": 1560 + }, + { + "epoch": 0.05, + "learning_rate": 4.767379837610384e-05, + "loss": 0.9692, + "step": 1570 + }, + { + "epoch": 0.05, + "learning_rate": 4.7658981805250994e-05, + "loss": 1.076, + "step": 1580 + }, + { + "epoch": 0.05, + "learning_rate": 4.764416523439815e-05, + "loss": 0.986, + "step": 1590 + }, + { + "epoch": 0.05, + "learning_rate": 4.762934866354531e-05, + "loss": 0.9922, + "step": 1600 + }, + { + "epoch": 0.05, + "learning_rate": 4.761453209269247e-05, + "loss": 1.1154, + "step": 1610 + }, + { + "epoch": 0.05, + "learning_rate": 4.759971552183963e-05, + "loss": 1.1606, + "step": 1620 + }, + { + "epoch": 0.05, + "learning_rate": 4.758489895098678e-05, + "loss": 1.0098, + "step": 1630 + }, + { + "epoch": 0.05, + "learning_rate": 4.7570082380133944e-05, + "loss": 1.0313, + "step": 1640 + }, + { + "epoch": 0.05, + "learning_rate": 4.75552658092811e-05, + "loss": 1.0075, + "step": 1650 + }, + { + "epoch": 0.05, + "learning_rate": 4.754044923842826e-05, + "loss": 1.1056, + "step": 1660 + }, + { + "epoch": 0.05, + "learning_rate": 4.752563266757542e-05, + "loss": 1.0331, + "step": 1670 + }, + { + "epoch": 0.05, + "learning_rate": 4.751081609672258e-05, + "loss": 0.9307, + "step": 1680 + }, + { + "epoch": 0.05, + "learning_rate": 4.749599952586973e-05, + "loss": 0.9668, + "step": 1690 + }, + { + "epoch": 0.05, + "learning_rate": 4.7481182955016895e-05, + "loss": 0.9923, + "step": 1700 + }, + { + "epoch": 0.05, + "learning_rate": 4.746636638416405e-05, + "loss": 0.9852, + "step": 1710 + }, + { + "epoch": 0.05, + "learning_rate": 4.745154981331121e-05, + "loss": 0.959, + "step": 1720 + }, + { + "epoch": 0.05, + "learning_rate": 4.743673324245837e-05, + "loss": 1.0467, + "step": 1730 + }, + { + "epoch": 0.05, + "learning_rate": 4.7421916671605523e-05, + "loss": 1.0341, + "step": 1740 + }, + { + "epoch": 0.05, + "learning_rate": 4.740710010075268e-05, + "loss": 1.0286, + "step": 1750 + }, + { + "epoch": 0.05, + "learning_rate": 4.7392283529899845e-05, + "loss": 1.0069, + "step": 1760 + }, + { + "epoch": 0.05, + "learning_rate": 4.7377466959047e-05, + "loss": 0.9959, + "step": 1770 + }, + { + "epoch": 0.05, + "learning_rate": 4.736265038819416e-05, + "loss": 1.032, + "step": 1780 + }, + { + "epoch": 0.05, + "learning_rate": 4.7347833817341316e-05, + "loss": 1.0334, + "step": 1790 + }, + { + "epoch": 0.05, + "learning_rate": 4.7333017246488474e-05, + "loss": 0.9271, + "step": 1800 + }, + { + "epoch": 0.05, + "learning_rate": 4.731820067563563e-05, + "loss": 1.0898, + "step": 1810 + }, + { + "epoch": 0.05, + "learning_rate": 4.730338410478279e-05, + "loss": 0.8218, + "step": 1820 + }, + { + "epoch": 0.05, + "learning_rate": 4.728856753392995e-05, + "loss": 0.9664, + "step": 1830 + }, + { + "epoch": 0.05, + "learning_rate": 4.727375096307711e-05, + "loss": 1.1085, + "step": 1840 + }, + { + "epoch": 0.05, + "learning_rate": 4.7258934392224267e-05, + "loss": 1.0249, + "step": 1850 + }, + { + "epoch": 0.06, + "learning_rate": 4.7244117821371424e-05, + "loss": 0.9883, + "step": 1860 + }, + { + "epoch": 0.06, + "learning_rate": 4.722930125051858e-05, + "loss": 1.0239, + "step": 1870 + }, + { + "epoch": 0.06, + "learning_rate": 4.721448467966574e-05, + "loss": 0.9759, + "step": 1880 + }, + { + "epoch": 0.06, + "learning_rate": 4.71996681088129e-05, + "loss": 0.9996, + "step": 1890 + }, + { + "epoch": 0.06, + "learning_rate": 4.718485153796006e-05, + "loss": 0.8492, + "step": 1900 + }, + { + "epoch": 0.06, + "learning_rate": 4.717003496710721e-05, + "loss": 0.9642, + "step": 1910 + }, + { + "epoch": 0.06, + "learning_rate": 4.7155218396254374e-05, + "loss": 0.9916, + "step": 1920 + }, + { + "epoch": 0.06, + "learning_rate": 4.714040182540153e-05, + "loss": 0.902, + "step": 1930 + }, + { + "epoch": 0.06, + "learning_rate": 4.712558525454869e-05, + "loss": 1.1098, + "step": 1940 + }, + { + "epoch": 0.06, + "learning_rate": 4.711076868369585e-05, + "loss": 1.0721, + "step": 1950 + }, + { + "epoch": 0.06, + "learning_rate": 4.7095952112843e-05, + "loss": 1.0584, + "step": 1960 + }, + { + "epoch": 0.06, + "learning_rate": 4.708113554199016e-05, + "loss": 1.0544, + "step": 1970 + }, + { + "epoch": 0.06, + "learning_rate": 4.7066318971137324e-05, + "loss": 1.1046, + "step": 1980 + }, + { + "epoch": 0.06, + "learning_rate": 4.705150240028448e-05, + "loss": 0.93, + "step": 1990 + }, + { + "epoch": 0.06, + "learning_rate": 4.703668582943164e-05, + "loss": 1.1538, + "step": 2000 + }, + { + "epoch": 0.06, + "learning_rate": 4.7021869258578796e-05, + "loss": 1.1012, + "step": 2010 + }, + { + "epoch": 0.06, + "learning_rate": 4.700705268772595e-05, + "loss": 1.056, + "step": 2020 + }, + { + "epoch": 0.06, + "learning_rate": 4.699223611687311e-05, + "loss": 1.0762, + "step": 2030 + }, + { + "epoch": 0.06, + "learning_rate": 4.6977419546020274e-05, + "loss": 1.0884, + "step": 2040 + }, + { + "epoch": 0.06, + "learning_rate": 4.696260297516743e-05, + "loss": 1.1416, + "step": 2050 + }, + { + "epoch": 0.06, + "learning_rate": 4.694778640431459e-05, + "loss": 1.0391, + "step": 2060 + }, + { + "epoch": 0.06, + "learning_rate": 4.6932969833461746e-05, + "loss": 1.1021, + "step": 2070 + }, + { + "epoch": 0.06, + "learning_rate": 4.69181532626089e-05, + "loss": 1.1112, + "step": 2080 + }, + { + "epoch": 0.06, + "learning_rate": 4.690333669175606e-05, + "loss": 1.1211, + "step": 2090 + }, + { + "epoch": 0.06, + "learning_rate": 4.688852012090322e-05, + "loss": 0.9748, + "step": 2100 + }, + { + "epoch": 0.06, + "learning_rate": 4.687370355005038e-05, + "loss": 0.945, + "step": 2110 + }, + { + "epoch": 0.06, + "learning_rate": 4.685888697919754e-05, + "loss": 1.1073, + "step": 2120 + }, + { + "epoch": 0.06, + "learning_rate": 4.684407040834469e-05, + "loss": 1.0968, + "step": 2130 + }, + { + "epoch": 0.06, + "learning_rate": 4.6829253837491853e-05, + "loss": 1.0562, + "step": 2140 + }, + { + "epoch": 0.06, + "learning_rate": 4.681443726663901e-05, + "loss": 1.0573, + "step": 2150 + }, + { + "epoch": 0.06, + "learning_rate": 4.679962069578617e-05, + "loss": 1.0862, + "step": 2160 + }, + { + "epoch": 0.06, + "learning_rate": 4.678480412493333e-05, + "loss": 1.0493, + "step": 2170 + }, + { + "epoch": 0.06, + "learning_rate": 4.676998755408048e-05, + "loss": 1.019, + "step": 2180 + }, + { + "epoch": 0.06, + "learning_rate": 4.675517098322764e-05, + "loss": 0.8864, + "step": 2190 + }, + { + "epoch": 0.07, + "learning_rate": 4.6740354412374804e-05, + "loss": 0.9383, + "step": 2200 + }, + { + "epoch": 0.07, + "learning_rate": 4.672553784152196e-05, + "loss": 1.0499, + "step": 2210 + }, + { + "epoch": 0.07, + "learning_rate": 4.671072127066912e-05, + "loss": 0.9938, + "step": 2220 + }, + { + "epoch": 0.07, + "learning_rate": 4.6695904699816275e-05, + "loss": 1.0305, + "step": 2230 + }, + { + "epoch": 0.07, + "learning_rate": 4.668108812896343e-05, + "loss": 0.9835, + "step": 2240 + }, + { + "epoch": 0.07, + "learning_rate": 4.666627155811059e-05, + "loss": 0.8994, + "step": 2250 + }, + { + "epoch": 0.07, + "learning_rate": 4.6651454987257754e-05, + "loss": 1.0598, + "step": 2260 + }, + { + "epoch": 0.07, + "learning_rate": 4.663663841640491e-05, + "loss": 0.9745, + "step": 2270 + }, + { + "epoch": 0.07, + "learning_rate": 4.662182184555207e-05, + "loss": 0.9789, + "step": 2280 + }, + { + "epoch": 0.07, + "learning_rate": 4.6607005274699225e-05, + "loss": 1.0458, + "step": 2290 + }, + { + "epoch": 0.07, + "learning_rate": 4.659218870384638e-05, + "loss": 0.9545, + "step": 2300 + }, + { + "epoch": 0.07, + "learning_rate": 4.657737213299354e-05, + "loss": 0.9021, + "step": 2310 + }, + { + "epoch": 0.07, + "learning_rate": 4.6562555562140704e-05, + "loss": 0.971, + "step": 2320 + }, + { + "epoch": 0.07, + "learning_rate": 4.654773899128786e-05, + "loss": 1.0396, + "step": 2330 + }, + { + "epoch": 0.07, + "learning_rate": 4.653292242043502e-05, + "loss": 0.8743, + "step": 2340 + }, + { + "epoch": 0.07, + "learning_rate": 4.6518105849582176e-05, + "loss": 0.967, + "step": 2350 + }, + { + "epoch": 0.07, + "learning_rate": 4.650328927872933e-05, + "loss": 1.1139, + "step": 2360 + }, + { + "epoch": 0.07, + "learning_rate": 4.648847270787649e-05, + "loss": 0.9882, + "step": 2370 + }, + { + "epoch": 0.07, + "learning_rate": 4.647365613702365e-05, + "loss": 1.0006, + "step": 2380 + }, + { + "epoch": 0.07, + "learning_rate": 4.645883956617081e-05, + "loss": 0.9794, + "step": 2390 + }, + { + "epoch": 0.07, + "learning_rate": 4.644402299531796e-05, + "loss": 0.9471, + "step": 2400 + }, + { + "epoch": 0.07, + "learning_rate": 4.642920642446512e-05, + "loss": 0.8593, + "step": 2410 + }, + { + "epoch": 0.07, + "learning_rate": 4.641438985361228e-05, + "loss": 1.0272, + "step": 2420 + }, + { + "epoch": 0.07, + "learning_rate": 4.639957328275944e-05, + "loss": 0.9176, + "step": 2430 + }, + { + "epoch": 0.07, + "learning_rate": 4.63847567119066e-05, + "loss": 0.9669, + "step": 2440 + }, + { + "epoch": 0.07, + "learning_rate": 4.636994014105376e-05, + "loss": 1.014, + "step": 2450 + }, + { + "epoch": 0.07, + "learning_rate": 4.635512357020091e-05, + "loss": 0.7286, + "step": 2460 + }, + { + "epoch": 0.07, + "learning_rate": 4.634030699934807e-05, + "loss": 1.114, + "step": 2470 + }, + { + "epoch": 0.07, + "learning_rate": 4.632549042849523e-05, + "loss": 0.9132, + "step": 2480 + }, + { + "epoch": 0.07, + "learning_rate": 4.631067385764239e-05, + "loss": 1.0984, + "step": 2490 + }, + { + "epoch": 0.07, + "learning_rate": 4.629585728678955e-05, + "loss": 1.028, + "step": 2500 + }, + { + "epoch": 0.07, + "learning_rate": 4.6281040715936705e-05, + "loss": 1.1322, + "step": 2510 + }, + { + "epoch": 0.07, + "learning_rate": 4.626622414508386e-05, + "loss": 0.9703, + "step": 2520 + }, + { + "epoch": 0.07, + "learning_rate": 4.625140757423102e-05, + "loss": 1.0486, + "step": 2530 + }, + { + "epoch": 0.08, + "learning_rate": 4.623659100337818e-05, + "loss": 1.1308, + "step": 2540 + }, + { + "epoch": 0.08, + "learning_rate": 4.622177443252534e-05, + "loss": 0.9167, + "step": 2550 + }, + { + "epoch": 0.08, + "learning_rate": 4.62069578616725e-05, + "loss": 0.9322, + "step": 2560 + }, + { + "epoch": 0.08, + "learning_rate": 4.6192141290819655e-05, + "loss": 1.1608, + "step": 2570 + }, + { + "epoch": 0.08, + "learning_rate": 4.617732471996681e-05, + "loss": 1.0079, + "step": 2580 + }, + { + "epoch": 0.08, + "learning_rate": 4.616250814911397e-05, + "loss": 0.9097, + "step": 2590 + }, + { + "epoch": 0.08, + "learning_rate": 4.6147691578261134e-05, + "loss": 1.0079, + "step": 2600 + }, + { + "epoch": 0.08, + "learning_rate": 4.613287500740829e-05, + "loss": 1.0918, + "step": 2610 + }, + { + "epoch": 0.08, + "learning_rate": 4.611805843655545e-05, + "loss": 1.0862, + "step": 2620 + }, + { + "epoch": 0.08, + "learning_rate": 4.6103241865702605e-05, + "loss": 1.0602, + "step": 2630 + }, + { + "epoch": 0.08, + "learning_rate": 4.608842529484976e-05, + "loss": 1.1089, + "step": 2640 + }, + { + "epoch": 0.08, + "learning_rate": 4.607360872399692e-05, + "loss": 0.9527, + "step": 2650 + }, + { + "epoch": 0.08, + "learning_rate": 4.605879215314408e-05, + "loss": 1.0643, + "step": 2660 + }, + { + "epoch": 0.08, + "learning_rate": 4.604397558229124e-05, + "loss": 0.9497, + "step": 2670 + }, + { + "epoch": 0.08, + "learning_rate": 4.602915901143839e-05, + "loss": 1.048, + "step": 2680 + }, + { + "epoch": 0.08, + "learning_rate": 4.601434244058555e-05, + "loss": 1.1039, + "step": 2690 + }, + { + "epoch": 0.08, + "learning_rate": 4.599952586973271e-05, + "loss": 0.9535, + "step": 2700 + }, + { + "epoch": 0.08, + "learning_rate": 4.598470929887987e-05, + "loss": 1.1689, + "step": 2710 + }, + { + "epoch": 0.08, + "learning_rate": 4.596989272802703e-05, + "loss": 0.9185, + "step": 2720 + }, + { + "epoch": 0.08, + "learning_rate": 4.5955076157174184e-05, + "loss": 0.9434, + "step": 2730 + }, + { + "epoch": 0.08, + "learning_rate": 4.594025958632134e-05, + "loss": 1.085, + "step": 2740 + }, + { + "epoch": 0.08, + "learning_rate": 4.59254430154685e-05, + "loss": 0.9472, + "step": 2750 + }, + { + "epoch": 0.08, + "learning_rate": 4.591062644461566e-05, + "loss": 1.1535, + "step": 2760 + }, + { + "epoch": 0.08, + "learning_rate": 4.589580987376282e-05, + "loss": 0.9407, + "step": 2770 + }, + { + "epoch": 0.08, + "learning_rate": 4.588099330290998e-05, + "loss": 1.0422, + "step": 2780 + }, + { + "epoch": 0.08, + "learning_rate": 4.5866176732057135e-05, + "loss": 0.9648, + "step": 2790 + }, + { + "epoch": 0.08, + "learning_rate": 4.585136016120429e-05, + "loss": 1.0928, + "step": 2800 + }, + { + "epoch": 0.08, + "learning_rate": 4.583654359035145e-05, + "loss": 0.9872, + "step": 2810 + }, + { + "epoch": 0.08, + "learning_rate": 4.582172701949861e-05, + "loss": 0.9209, + "step": 2820 + }, + { + "epoch": 0.08, + "learning_rate": 4.580691044864577e-05, + "loss": 1.114, + "step": 2830 + }, + { + "epoch": 0.08, + "learning_rate": 4.579209387779293e-05, + "loss": 1.1243, + "step": 2840 + }, + { + "epoch": 0.08, + "learning_rate": 4.5777277306940085e-05, + "loss": 0.9924, + "step": 2850 + }, + { + "epoch": 0.08, + "learning_rate": 4.576246073608724e-05, + "loss": 0.9567, + "step": 2860 + }, + { + "epoch": 0.09, + "learning_rate": 4.57476441652344e-05, + "loss": 0.8971, + "step": 2870 + }, + { + "epoch": 0.09, + "learning_rate": 4.573282759438156e-05, + "loss": 0.9983, + "step": 2880 + }, + { + "epoch": 0.09, + "learning_rate": 4.571801102352872e-05, + "loss": 1.0459, + "step": 2890 + }, + { + "epoch": 0.09, + "learning_rate": 4.570319445267587e-05, + "loss": 1.0651, + "step": 2900 + }, + { + "epoch": 0.09, + "learning_rate": 4.5688377881823035e-05, + "loss": 1.1088, + "step": 2910 + }, + { + "epoch": 0.09, + "learning_rate": 4.567356131097019e-05, + "loss": 1.1004, + "step": 2920 + }, + { + "epoch": 0.09, + "learning_rate": 4.565874474011735e-05, + "loss": 1.0982, + "step": 2930 + }, + { + "epoch": 0.09, + "learning_rate": 4.5643928169264507e-05, + "loss": 1.1044, + "step": 2940 + }, + { + "epoch": 0.09, + "learning_rate": 4.5629111598411664e-05, + "loss": 0.9959, + "step": 2950 + }, + { + "epoch": 0.09, + "learning_rate": 4.561429502755882e-05, + "loss": 0.9248, + "step": 2960 + }, + { + "epoch": 0.09, + "learning_rate": 4.559947845670598e-05, + "loss": 1.0083, + "step": 2970 + }, + { + "epoch": 0.09, + "learning_rate": 4.558466188585314e-05, + "loss": 1.0696, + "step": 2980 + }, + { + "epoch": 0.09, + "learning_rate": 4.55698453150003e-05, + "loss": 1.0809, + "step": 2990 + }, + { + "epoch": 0.09, + "learning_rate": 4.555502874414746e-05, + "loss": 0.9692, + "step": 3000 + }, + { + "epoch": 0.09, + "learning_rate": 4.5540212173294614e-05, + "loss": 0.8689, + "step": 3010 + }, + { + "epoch": 0.09, + "learning_rate": 4.552539560244177e-05, + "loss": 1.0184, + "step": 3020 + }, + { + "epoch": 0.09, + "learning_rate": 4.551057903158893e-05, + "loss": 0.9662, + "step": 3030 + }, + { + "epoch": 0.09, + "learning_rate": 4.549576246073609e-05, + "loss": 0.9472, + "step": 3040 + }, + { + "epoch": 0.09, + "learning_rate": 4.548094588988325e-05, + "loss": 1.0923, + "step": 3050 + }, + { + "epoch": 0.09, + "learning_rate": 4.546612931903041e-05, + "loss": 0.9392, + "step": 3060 + }, + { + "epoch": 0.09, + "learning_rate": 4.5451312748177564e-05, + "loss": 1.0266, + "step": 3070 + }, + { + "epoch": 0.09, + "learning_rate": 4.543649617732472e-05, + "loss": 1.0079, + "step": 3080 + }, + { + "epoch": 0.09, + "learning_rate": 4.542167960647188e-05, + "loss": 1.0063, + "step": 3090 + }, + { + "epoch": 0.09, + "learning_rate": 4.540686303561904e-05, + "loss": 0.9931, + "step": 3100 + }, + { + "epoch": 0.09, + "learning_rate": 4.53920464647662e-05, + "loss": 0.8938, + "step": 3110 + }, + { + "epoch": 0.09, + "learning_rate": 4.537722989391335e-05, + "loss": 1.0389, + "step": 3120 + }, + { + "epoch": 0.09, + "learning_rate": 4.5362413323060514e-05, + "loss": 1.0111, + "step": 3130 + }, + { + "epoch": 0.09, + "learning_rate": 4.534759675220767e-05, + "loss": 0.9162, + "step": 3140 + }, + { + "epoch": 0.09, + "learning_rate": 4.533278018135483e-05, + "loss": 1.0123, + "step": 3150 + }, + { + "epoch": 0.09, + "learning_rate": 4.531796361050199e-05, + "loss": 1.007, + "step": 3160 + }, + { + "epoch": 0.09, + "learning_rate": 4.530314703964914e-05, + "loss": 1.0215, + "step": 3170 + }, + { + "epoch": 0.09, + "learning_rate": 4.52883304687963e-05, + "loss": 0.9833, + "step": 3180 + }, + { + "epoch": 0.09, + "learning_rate": 4.5273513897943464e-05, + "loss": 1.0657, + "step": 3190 + }, + { + "epoch": 0.09, + "learning_rate": 4.525869732709062e-05, + "loss": 1.0104, + "step": 3200 + }, + { + "epoch": 0.1, + "learning_rate": 4.524388075623778e-05, + "loss": 1.136, + "step": 3210 + }, + { + "epoch": 0.1, + "learning_rate": 4.5229064185384936e-05, + "loss": 1.0972, + "step": 3220 + }, + { + "epoch": 0.1, + "learning_rate": 4.5214247614532093e-05, + "loss": 0.9414, + "step": 3230 + }, + { + "epoch": 0.1, + "learning_rate": 4.519943104367925e-05, + "loss": 1.0983, + "step": 3240 + }, + { + "epoch": 0.1, + "learning_rate": 4.518461447282641e-05, + "loss": 0.9094, + "step": 3250 + }, + { + "epoch": 0.1, + "learning_rate": 4.516979790197357e-05, + "loss": 0.9771, + "step": 3260 + }, + { + "epoch": 0.1, + "learning_rate": 4.515498133112073e-05, + "loss": 1.0007, + "step": 3270 + }, + { + "epoch": 0.1, + "learning_rate": 4.5140164760267886e-05, + "loss": 1.0244, + "step": 3280 + }, + { + "epoch": 0.1, + "learning_rate": 4.5125348189415044e-05, + "loss": 1.0073, + "step": 3290 + }, + { + "epoch": 0.1, + "learning_rate": 4.51105316185622e-05, + "loss": 0.9975, + "step": 3300 + }, + { + "epoch": 0.1, + "learning_rate": 4.509571504770936e-05, + "loss": 1.1689, + "step": 3310 + }, + { + "epoch": 0.1, + "learning_rate": 4.508089847685652e-05, + "loss": 0.8902, + "step": 3320 + }, + { + "epoch": 0.1, + "learning_rate": 4.506608190600368e-05, + "loss": 0.9794, + "step": 3330 + }, + { + "epoch": 0.1, + "learning_rate": 4.505126533515083e-05, + "loss": 0.8807, + "step": 3340 + }, + { + "epoch": 0.1, + "learning_rate": 4.5036448764297994e-05, + "loss": 0.9263, + "step": 3350 + }, + { + "epoch": 0.1, + "learning_rate": 4.502163219344515e-05, + "loss": 0.8875, + "step": 3360 + }, + { + "epoch": 0.1, + "learning_rate": 4.500681562259231e-05, + "loss": 0.9376, + "step": 3370 + }, + { + "epoch": 0.1, + "learning_rate": 4.499199905173947e-05, + "loss": 1.0126, + "step": 3380 + }, + { + "epoch": 0.1, + "learning_rate": 4.497718248088662e-05, + "loss": 1.0124, + "step": 3390 + }, + { + "epoch": 0.1, + "learning_rate": 4.496236591003378e-05, + "loss": 0.7539, + "step": 3400 + }, + { + "epoch": 0.1, + "learning_rate": 4.4947549339180944e-05, + "loss": 0.8332, + "step": 3410 + }, + { + "epoch": 0.1, + "learning_rate": 4.49327327683281e-05, + "loss": 1.027, + "step": 3420 + }, + { + "epoch": 0.1, + "learning_rate": 4.491791619747526e-05, + "loss": 0.8988, + "step": 3430 + }, + { + "epoch": 0.1, + "learning_rate": 4.490309962662242e-05, + "loss": 1.1417, + "step": 3440 + }, + { + "epoch": 0.1, + "learning_rate": 4.488828305576957e-05, + "loss": 1.0029, + "step": 3450 + }, + { + "epoch": 0.1, + "learning_rate": 4.487346648491673e-05, + "loss": 1.0582, + "step": 3460 + }, + { + "epoch": 0.1, + "learning_rate": 4.4858649914063894e-05, + "loss": 1.0007, + "step": 3470 + }, + { + "epoch": 0.1, + "learning_rate": 4.484383334321105e-05, + "loss": 0.8922, + "step": 3480 + }, + { + "epoch": 0.1, + "learning_rate": 4.482901677235821e-05, + "loss": 1.0126, + "step": 3490 + }, + { + "epoch": 0.1, + "learning_rate": 4.4814200201505366e-05, + "loss": 0.8675, + "step": 3500 + }, + { + "epoch": 0.1, + "learning_rate": 4.479938363065252e-05, + "loss": 0.9286, + "step": 3510 + }, + { + "epoch": 0.1, + "learning_rate": 4.478456705979968e-05, + "loss": 1.0148, + "step": 3520 + }, + { + "epoch": 0.1, + "learning_rate": 4.476975048894684e-05, + "loss": 1.0747, + "step": 3530 + }, + { + "epoch": 0.1, + "learning_rate": 4.4754933918094e-05, + "loss": 1.0847, + "step": 3540 + }, + { + "epoch": 0.11, + "learning_rate": 4.474011734724116e-05, + "loss": 1.0646, + "step": 3550 + }, + { + "epoch": 0.11, + "learning_rate": 4.472530077638831e-05, + "loss": 1.0878, + "step": 3560 + }, + { + "epoch": 0.11, + "learning_rate": 4.471048420553547e-05, + "loss": 0.9359, + "step": 3570 + }, + { + "epoch": 0.11, + "learning_rate": 4.469566763468263e-05, + "loss": 1.1106, + "step": 3580 + }, + { + "epoch": 0.11, + "learning_rate": 4.468085106382979e-05, + "loss": 0.949, + "step": 3590 + }, + { + "epoch": 0.11, + "learning_rate": 4.466603449297695e-05, + "loss": 0.9367, + "step": 3600 + }, + { + "epoch": 0.11, + "learning_rate": 4.465121792212411e-05, + "loss": 0.9948, + "step": 3610 + }, + { + "epoch": 0.11, + "learning_rate": 4.463640135127126e-05, + "loss": 1.0947, + "step": 3620 + }, + { + "epoch": 0.11, + "learning_rate": 4.462158478041842e-05, + "loss": 0.9545, + "step": 3630 + }, + { + "epoch": 0.11, + "learning_rate": 4.460676820956558e-05, + "loss": 1.0944, + "step": 3640 + }, + { + "epoch": 0.11, + "learning_rate": 4.459195163871274e-05, + "loss": 1.0387, + "step": 3650 + }, + { + "epoch": 0.11, + "learning_rate": 4.45771350678599e-05, + "loss": 0.9061, + "step": 3660 + }, + { + "epoch": 0.11, + "learning_rate": 4.456231849700705e-05, + "loss": 1.0306, + "step": 3670 + }, + { + "epoch": 0.11, + "learning_rate": 4.454750192615421e-05, + "loss": 1.0257, + "step": 3680 + }, + { + "epoch": 0.11, + "learning_rate": 4.4532685355301374e-05, + "loss": 0.9344, + "step": 3690 + }, + { + "epoch": 0.11, + "learning_rate": 4.451786878444853e-05, + "loss": 0.9438, + "step": 3700 + }, + { + "epoch": 0.11, + "learning_rate": 4.450305221359569e-05, + "loss": 1.0341, + "step": 3710 + }, + { + "epoch": 0.11, + "learning_rate": 4.4488235642742845e-05, + "loss": 1.0352, + "step": 3720 + }, + { + "epoch": 0.11, + "learning_rate": 4.447341907189e-05, + "loss": 1.1207, + "step": 3730 + }, + { + "epoch": 0.11, + "learning_rate": 4.445860250103716e-05, + "loss": 1.0834, + "step": 3740 + }, + { + "epoch": 0.11, + "learning_rate": 4.4443785930184324e-05, + "loss": 0.8907, + "step": 3750 + }, + { + "epoch": 0.11, + "learning_rate": 4.442896935933148e-05, + "loss": 1.1111, + "step": 3760 + }, + { + "epoch": 0.11, + "learning_rate": 4.441415278847864e-05, + "loss": 0.9999, + "step": 3770 + }, + { + "epoch": 0.11, + "learning_rate": 4.4399336217625795e-05, + "loss": 1.0504, + "step": 3780 + }, + { + "epoch": 0.11, + "learning_rate": 4.438451964677295e-05, + "loss": 0.9121, + "step": 3790 + }, + { + "epoch": 0.11, + "learning_rate": 4.436970307592011e-05, + "loss": 0.9015, + "step": 3800 + }, + { + "epoch": 0.11, + "learning_rate": 4.435488650506727e-05, + "loss": 1.0593, + "step": 3810 + }, + { + "epoch": 0.11, + "learning_rate": 4.434006993421443e-05, + "loss": 1.0612, + "step": 3820 + }, + { + "epoch": 0.11, + "learning_rate": 4.432525336336159e-05, + "loss": 0.9286, + "step": 3830 + }, + { + "epoch": 0.11, + "learning_rate": 4.431043679250874e-05, + "loss": 1.0359, + "step": 3840 + }, + { + "epoch": 0.11, + "learning_rate": 4.42956202216559e-05, + "loss": 1.0434, + "step": 3850 + }, + { + "epoch": 0.11, + "learning_rate": 4.428080365080306e-05, + "loss": 0.9933, + "step": 3860 + }, + { + "epoch": 0.11, + "learning_rate": 4.426598707995022e-05, + "loss": 0.9287, + "step": 3870 + }, + { + "epoch": 0.11, + "learning_rate": 4.425117050909738e-05, + "loss": 1.0855, + "step": 3880 + }, + { + "epoch": 0.12, + "learning_rate": 4.423635393824453e-05, + "loss": 1.0433, + "step": 3890 + }, + { + "epoch": 0.12, + "learning_rate": 4.422153736739169e-05, + "loss": 1.072, + "step": 3900 + }, + { + "epoch": 0.12, + "learning_rate": 4.420672079653885e-05, + "loss": 0.9849, + "step": 3910 + }, + { + "epoch": 0.12, + "learning_rate": 4.419190422568601e-05, + "loss": 0.999, + "step": 3920 + }, + { + "epoch": 0.12, + "learning_rate": 4.417708765483317e-05, + "loss": 0.8054, + "step": 3930 + }, + { + "epoch": 0.12, + "learning_rate": 4.4162271083980325e-05, + "loss": 0.9708, + "step": 3940 + }, + { + "epoch": 0.12, + "learning_rate": 4.414745451312748e-05, + "loss": 1.0625, + "step": 3950 + }, + { + "epoch": 0.12, + "learning_rate": 4.413263794227464e-05, + "loss": 0.9717, + "step": 3960 + }, + { + "epoch": 0.12, + "learning_rate": 4.41178213714218e-05, + "loss": 0.9269, + "step": 3970 + }, + { + "epoch": 0.12, + "learning_rate": 4.410300480056896e-05, + "loss": 1.0109, + "step": 3980 + }, + { + "epoch": 0.12, + "learning_rate": 4.408818822971612e-05, + "loss": 1.1448, + "step": 3990 + }, + { + "epoch": 0.12, + "learning_rate": 4.4073371658863275e-05, + "loss": 1.0363, + "step": 4000 + }, + { + "epoch": 0.12, + "learning_rate": 4.405855508801043e-05, + "loss": 1.0063, + "step": 4010 + }, + { + "epoch": 0.12, + "learning_rate": 4.404373851715759e-05, + "loss": 0.8822, + "step": 4020 + }, + { + "epoch": 0.12, + "learning_rate": 4.402892194630475e-05, + "loss": 1.0641, + "step": 4030 + }, + { + "epoch": 0.12, + "learning_rate": 4.401410537545191e-05, + "loss": 1.0352, + "step": 4040 + }, + { + "epoch": 0.12, + "learning_rate": 4.399928880459907e-05, + "loss": 1.073, + "step": 4050 + }, + { + "epoch": 0.12, + "learning_rate": 4.398447223374622e-05, + "loss": 1.0877, + "step": 4060 + }, + { + "epoch": 0.12, + "learning_rate": 4.396965566289338e-05, + "loss": 0.9055, + "step": 4070 + }, + { + "epoch": 0.12, + "learning_rate": 4.395483909204054e-05, + "loss": 1.0689, + "step": 4080 + }, + { + "epoch": 0.12, + "learning_rate": 4.39400225211877e-05, + "loss": 1.0445, + "step": 4090 + }, + { + "epoch": 0.12, + "learning_rate": 4.392520595033486e-05, + "loss": 0.9072, + "step": 4100 + }, + { + "epoch": 0.12, + "learning_rate": 4.391038937948201e-05, + "loss": 0.9371, + "step": 4110 + }, + { + "epoch": 0.12, + "learning_rate": 4.389557280862917e-05, + "loss": 0.8655, + "step": 4120 + }, + { + "epoch": 0.12, + "learning_rate": 4.388075623777633e-05, + "loss": 1.1038, + "step": 4130 + }, + { + "epoch": 0.12, + "learning_rate": 4.386593966692349e-05, + "loss": 1.0431, + "step": 4140 + }, + { + "epoch": 0.12, + "learning_rate": 4.385112309607065e-05, + "loss": 0.9737, + "step": 4150 + }, + { + "epoch": 0.12, + "learning_rate": 4.3836306525217804e-05, + "loss": 1.0146, + "step": 4160 + }, + { + "epoch": 0.12, + "learning_rate": 4.382148995436496e-05, + "loss": 0.9463, + "step": 4170 + }, + { + "epoch": 0.12, + "learning_rate": 4.380667338351212e-05, + "loss": 1.048, + "step": 4180 + }, + { + "epoch": 0.12, + "learning_rate": 4.379185681265928e-05, + "loss": 0.9281, + "step": 4190 + }, + { + "epoch": 0.12, + "learning_rate": 4.377704024180644e-05, + "loss": 1.0834, + "step": 4200 + }, + { + "epoch": 0.12, + "learning_rate": 4.37622236709536e-05, + "loss": 1.007, + "step": 4210 + }, + { + "epoch": 0.13, + "learning_rate": 4.3747407100100754e-05, + "loss": 0.9686, + "step": 4220 + }, + { + "epoch": 0.13, + "learning_rate": 4.373259052924791e-05, + "loss": 1.0291, + "step": 4230 + }, + { + "epoch": 0.13, + "learning_rate": 4.371777395839507e-05, + "loss": 1.0465, + "step": 4240 + }, + { + "epoch": 0.13, + "learning_rate": 4.370295738754223e-05, + "loss": 0.953, + "step": 4250 + }, + { + "epoch": 0.13, + "learning_rate": 4.368814081668939e-05, + "loss": 1.0508, + "step": 4260 + }, + { + "epoch": 0.13, + "learning_rate": 4.367332424583655e-05, + "loss": 1.0006, + "step": 4270 + }, + { + "epoch": 0.13, + "learning_rate": 4.3658507674983704e-05, + "loss": 1.1245, + "step": 4280 + }, + { + "epoch": 0.13, + "learning_rate": 4.364369110413086e-05, + "loss": 0.9801, + "step": 4290 + }, + { + "epoch": 0.13, + "learning_rate": 4.362887453327802e-05, + "loss": 0.9444, + "step": 4300 + }, + { + "epoch": 0.13, + "learning_rate": 4.361405796242518e-05, + "loss": 1.0968, + "step": 4310 + }, + { + "epoch": 0.13, + "learning_rate": 4.359924139157234e-05, + "loss": 0.9368, + "step": 4320 + }, + { + "epoch": 0.13, + "learning_rate": 4.358442482071949e-05, + "loss": 0.9025, + "step": 4330 + }, + { + "epoch": 0.13, + "learning_rate": 4.356960824986665e-05, + "loss": 0.8605, + "step": 4340 + }, + { + "epoch": 0.13, + "learning_rate": 4.355479167901381e-05, + "loss": 0.9366, + "step": 4350 + }, + { + "epoch": 0.13, + "learning_rate": 4.353997510816097e-05, + "loss": 0.8986, + "step": 4360 + }, + { + "epoch": 0.13, + "learning_rate": 4.3525158537308126e-05, + "loss": 0.9287, + "step": 4370 + }, + { + "epoch": 0.13, + "learning_rate": 4.3510341966455284e-05, + "loss": 1.0341, + "step": 4380 + }, + { + "epoch": 0.13, + "learning_rate": 4.349552539560244e-05, + "loss": 1.0212, + "step": 4390 + }, + { + "epoch": 0.13, + "learning_rate": 4.34807088247496e-05, + "loss": 0.8867, + "step": 4400 + }, + { + "epoch": 0.13, + "learning_rate": 4.346589225389676e-05, + "loss": 0.9691, + "step": 4410 + }, + { + "epoch": 0.13, + "learning_rate": 4.345107568304392e-05, + "loss": 0.9443, + "step": 4420 + }, + { + "epoch": 0.13, + "learning_rate": 4.3436259112191076e-05, + "loss": 1.0488, + "step": 4430 + }, + { + "epoch": 0.13, + "learning_rate": 4.3421442541338234e-05, + "loss": 1.1335, + "step": 4440 + }, + { + "epoch": 0.13, + "learning_rate": 4.340662597048539e-05, + "loss": 0.9009, + "step": 4450 + }, + { + "epoch": 0.13, + "learning_rate": 4.339180939963255e-05, + "loss": 1.0512, + "step": 4460 + }, + { + "epoch": 0.13, + "learning_rate": 4.337699282877971e-05, + "loss": 0.89, + "step": 4470 + }, + { + "epoch": 0.13, + "learning_rate": 4.336217625792687e-05, + "loss": 0.9175, + "step": 4480 + }, + { + "epoch": 0.13, + "learning_rate": 4.334735968707403e-05, + "loss": 1.0487, + "step": 4490 + }, + { + "epoch": 0.13, + "learning_rate": 4.3332543116221184e-05, + "loss": 0.9249, + "step": 4500 + }, + { + "epoch": 0.13, + "learning_rate": 4.331772654536834e-05, + "loss": 0.9176, + "step": 4510 + }, + { + "epoch": 0.13, + "learning_rate": 4.33029099745155e-05, + "loss": 1.0861, + "step": 4520 + }, + { + "epoch": 0.13, + "learning_rate": 4.328809340366266e-05, + "loss": 1.0183, + "step": 4530 + }, + { + "epoch": 0.13, + "learning_rate": 4.327327683280982e-05, + "loss": 0.9382, + "step": 4540 + }, + { + "epoch": 0.13, + "learning_rate": 4.325846026195697e-05, + "loss": 1.011, + "step": 4550 + }, + { + "epoch": 0.14, + "learning_rate": 4.3243643691104134e-05, + "loss": 1.1504, + "step": 4560 + }, + { + "epoch": 0.14, + "learning_rate": 4.322882712025129e-05, + "loss": 0.9311, + "step": 4570 + }, + { + "epoch": 0.14, + "learning_rate": 4.321401054939845e-05, + "loss": 1.0303, + "step": 4580 + }, + { + "epoch": 0.14, + "learning_rate": 4.319919397854561e-05, + "loss": 0.9909, + "step": 4590 + }, + { + "epoch": 0.14, + "learning_rate": 4.318437740769277e-05, + "loss": 0.9847, + "step": 4600 + }, + { + "epoch": 0.14, + "learning_rate": 4.316956083683992e-05, + "loss": 0.8849, + "step": 4610 + }, + { + "epoch": 0.14, + "learning_rate": 4.315474426598708e-05, + "loss": 1.0116, + "step": 4620 + }, + { + "epoch": 0.14, + "learning_rate": 4.313992769513424e-05, + "loss": 1.1569, + "step": 4630 + }, + { + "epoch": 0.14, + "learning_rate": 4.31251111242814e-05, + "loss": 0.8751, + "step": 4640 + }, + { + "epoch": 0.14, + "learning_rate": 4.3110294553428556e-05, + "loss": 0.9307, + "step": 4650 + }, + { + "epoch": 0.14, + "learning_rate": 4.309547798257571e-05, + "loss": 1.0062, + "step": 4660 + }, + { + "epoch": 0.14, + "learning_rate": 4.308066141172287e-05, + "loss": 0.9799, + "step": 4670 + }, + { + "epoch": 0.14, + "learning_rate": 4.306584484087003e-05, + "loss": 0.9787, + "step": 4680 + }, + { + "epoch": 0.14, + "learning_rate": 4.305102827001719e-05, + "loss": 0.9239, + "step": 4690 + }, + { + "epoch": 0.14, + "learning_rate": 4.303621169916435e-05, + "loss": 0.9535, + "step": 4700 + }, + { + "epoch": 0.14, + "learning_rate": 4.3021395128311506e-05, + "loss": 1.0515, + "step": 4710 + }, + { + "epoch": 0.14, + "learning_rate": 4.300657855745866e-05, + "loss": 1.037, + "step": 4720 + }, + { + "epoch": 0.14, + "learning_rate": 4.299176198660582e-05, + "loss": 1.1239, + "step": 4730 + }, + { + "epoch": 0.14, + "learning_rate": 4.297694541575298e-05, + "loss": 1.0813, + "step": 4740 + }, + { + "epoch": 0.14, + "learning_rate": 4.296212884490014e-05, + "loss": 1.2004, + "step": 4750 + }, + { + "epoch": 0.14, + "learning_rate": 4.29473122740473e-05, + "loss": 0.9914, + "step": 4760 + }, + { + "epoch": 0.14, + "learning_rate": 4.2932495703194456e-05, + "loss": 0.9044, + "step": 4770 + }, + { + "epoch": 0.14, + "learning_rate": 4.2917679132341613e-05, + "loss": 1.0464, + "step": 4780 + }, + { + "epoch": 0.14, + "learning_rate": 4.290286256148877e-05, + "loss": 0.9826, + "step": 4790 + }, + { + "epoch": 0.14, + "learning_rate": 4.288804599063593e-05, + "loss": 0.9495, + "step": 4800 + }, + { + "epoch": 0.14, + "learning_rate": 4.287322941978309e-05, + "loss": 1.0386, + "step": 4810 + }, + { + "epoch": 0.14, + "learning_rate": 4.285841284893025e-05, + "loss": 0.9099, + "step": 4820 + }, + { + "epoch": 0.14, + "learning_rate": 4.28435962780774e-05, + "loss": 0.9316, + "step": 4830 + }, + { + "epoch": 0.14, + "learning_rate": 4.2828779707224564e-05, + "loss": 1.0725, + "step": 4840 + }, + { + "epoch": 0.14, + "learning_rate": 4.281396313637172e-05, + "loss": 0.8463, + "step": 4850 + }, + { + "epoch": 0.14, + "learning_rate": 4.279914656551888e-05, + "loss": 1.1508, + "step": 4860 + }, + { + "epoch": 0.14, + "learning_rate": 4.278432999466604e-05, + "loss": 0.9393, + "step": 4870 + }, + { + "epoch": 0.14, + "learning_rate": 4.276951342381319e-05, + "loss": 1.0294, + "step": 4880 + }, + { + "epoch": 0.14, + "learning_rate": 4.275469685296035e-05, + "loss": 1.1232, + "step": 4890 + }, + { + "epoch": 0.15, + "learning_rate": 4.273988028210751e-05, + "loss": 0.9461, + "step": 4900 + }, + { + "epoch": 0.15, + "learning_rate": 4.272506371125467e-05, + "loss": 0.8287, + "step": 4910 + }, + { + "epoch": 0.15, + "learning_rate": 4.271024714040183e-05, + "loss": 1.0617, + "step": 4920 + }, + { + "epoch": 0.15, + "learning_rate": 4.2695430569548986e-05, + "loss": 0.9236, + "step": 4930 + }, + { + "epoch": 0.15, + "learning_rate": 4.268061399869614e-05, + "loss": 0.9911, + "step": 4940 + }, + { + "epoch": 0.15, + "learning_rate": 4.26657974278433e-05, + "loss": 1.1111, + "step": 4950 + }, + { + "epoch": 0.15, + "learning_rate": 4.265098085699046e-05, + "loss": 0.9179, + "step": 4960 + }, + { + "epoch": 0.15, + "learning_rate": 4.263616428613762e-05, + "loss": 0.8431, + "step": 4970 + }, + { + "epoch": 0.15, + "learning_rate": 4.262134771528478e-05, + "loss": 0.9731, + "step": 4980 + }, + { + "epoch": 0.15, + "learning_rate": 4.2606531144431936e-05, + "loss": 0.9344, + "step": 4990 + }, + { + "epoch": 0.15, + "learning_rate": 4.259171457357909e-05, + "loss": 1.0364, + "step": 5000 + }, + { + "epoch": 0.15, + "learning_rate": 4.257689800272625e-05, + "loss": 0.9216, + "step": 5010 + }, + { + "epoch": 0.15, + "learning_rate": 4.256208143187341e-05, + "loss": 0.9568, + "step": 5020 + }, + { + "epoch": 0.15, + "learning_rate": 4.254726486102057e-05, + "loss": 0.9962, + "step": 5030 + }, + { + "epoch": 0.15, + "learning_rate": 4.253244829016773e-05, + "loss": 1.1549, + "step": 5040 + }, + { + "epoch": 0.15, + "learning_rate": 4.251763171931488e-05, + "loss": 0.9061, + "step": 5050 + }, + { + "epoch": 0.15, + "learning_rate": 4.250281514846204e-05, + "loss": 0.9633, + "step": 5060 + }, + { + "epoch": 0.15, + "learning_rate": 4.24879985776092e-05, + "loss": 0.9426, + "step": 5070 + }, + { + "epoch": 0.15, + "learning_rate": 4.247318200675636e-05, + "loss": 1.0316, + "step": 5080 + }, + { + "epoch": 0.15, + "learning_rate": 4.245836543590352e-05, + "loss": 0.879, + "step": 5090 + }, + { + "epoch": 0.15, + "learning_rate": 4.244354886505067e-05, + "loss": 0.8395, + "step": 5100 + }, + { + "epoch": 0.15, + "learning_rate": 4.242873229419783e-05, + "loss": 1.022, + "step": 5110 + }, + { + "epoch": 0.15, + "learning_rate": 4.241391572334499e-05, + "loss": 0.9806, + "step": 5120 + }, + { + "epoch": 0.15, + "learning_rate": 4.239909915249215e-05, + "loss": 0.9603, + "step": 5130 + }, + { + "epoch": 0.15, + "learning_rate": 4.238428258163931e-05, + "loss": 0.8212, + "step": 5140 + }, + { + "epoch": 0.15, + "learning_rate": 4.2369466010786465e-05, + "loss": 0.9402, + "step": 5150 + }, + { + "epoch": 0.15, + "learning_rate": 4.235464943993362e-05, + "loss": 0.9632, + "step": 5160 + }, + { + "epoch": 0.15, + "learning_rate": 4.233983286908078e-05, + "loss": 0.857, + "step": 5170 + }, + { + "epoch": 0.15, + "learning_rate": 4.232501629822794e-05, + "loss": 1.0024, + "step": 5180 + }, + { + "epoch": 0.15, + "learning_rate": 4.23101997273751e-05, + "loss": 0.9256, + "step": 5190 + }, + { + "epoch": 0.15, + "learning_rate": 4.229538315652226e-05, + "loss": 0.9668, + "step": 5200 + }, + { + "epoch": 0.15, + "learning_rate": 4.2280566585669415e-05, + "loss": 0.9422, + "step": 5210 + }, + { + "epoch": 0.15, + "learning_rate": 4.226575001481657e-05, + "loss": 0.9228, + "step": 5220 + }, + { + "epoch": 0.15, + "learning_rate": 4.225093344396373e-05, + "loss": 0.9124, + "step": 5230 + }, + { + "epoch": 0.16, + "learning_rate": 4.223611687311089e-05, + "loss": 1.0929, + "step": 5240 + }, + { + "epoch": 0.16, + "learning_rate": 4.222130030225805e-05, + "loss": 0.9791, + "step": 5250 + }, + { + "epoch": 0.16, + "learning_rate": 4.220648373140521e-05, + "loss": 1.0369, + "step": 5260 + }, + { + "epoch": 0.16, + "learning_rate": 4.219166716055236e-05, + "loss": 1.0614, + "step": 5270 + }, + { + "epoch": 0.16, + "learning_rate": 4.217685058969952e-05, + "loss": 1.0704, + "step": 5280 + }, + { + "epoch": 0.16, + "learning_rate": 4.216203401884668e-05, + "loss": 0.977, + "step": 5290 + }, + { + "epoch": 0.16, + "learning_rate": 4.214721744799384e-05, + "loss": 1.0282, + "step": 5300 + }, + { + "epoch": 0.16, + "learning_rate": 4.2132400877141e-05, + "loss": 0.9825, + "step": 5310 + }, + { + "epoch": 0.16, + "learning_rate": 4.211758430628815e-05, + "loss": 1.0764, + "step": 5320 + }, + { + "epoch": 0.16, + "learning_rate": 4.210276773543531e-05, + "loss": 0.9701, + "step": 5330 + }, + { + "epoch": 0.16, + "learning_rate": 4.208795116458247e-05, + "loss": 1.0994, + "step": 5340 + }, + { + "epoch": 0.16, + "learning_rate": 4.207313459372963e-05, + "loss": 0.8733, + "step": 5350 + }, + { + "epoch": 0.16, + "learning_rate": 4.205831802287679e-05, + "loss": 1.0822, + "step": 5360 + }, + { + "epoch": 0.16, + "learning_rate": 4.204350145202395e-05, + "loss": 0.9434, + "step": 5370 + }, + { + "epoch": 0.16, + "learning_rate": 4.20286848811711e-05, + "loss": 0.9525, + "step": 5380 + }, + { + "epoch": 0.16, + "learning_rate": 4.201386831031826e-05, + "loss": 0.9808, + "step": 5390 + }, + { + "epoch": 0.16, + "learning_rate": 4.199905173946542e-05, + "loss": 0.9915, + "step": 5400 + }, + { + "epoch": 0.16, + "learning_rate": 4.198423516861258e-05, + "loss": 0.9708, + "step": 5410 + }, + { + "epoch": 0.16, + "learning_rate": 4.196941859775974e-05, + "loss": 1.0643, + "step": 5420 + }, + { + "epoch": 0.16, + "learning_rate": 4.1954602026906895e-05, + "loss": 1.0476, + "step": 5430 + }, + { + "epoch": 0.16, + "learning_rate": 4.193978545605405e-05, + "loss": 1.0005, + "step": 5440 + }, + { + "epoch": 0.16, + "learning_rate": 4.192496888520121e-05, + "loss": 1.0663, + "step": 5450 + }, + { + "epoch": 0.16, + "learning_rate": 4.1910152314348366e-05, + "loss": 1.0395, + "step": 5460 + }, + { + "epoch": 0.16, + "learning_rate": 4.189533574349553e-05, + "loss": 1.0204, + "step": 5470 + }, + { + "epoch": 0.16, + "learning_rate": 4.188051917264269e-05, + "loss": 0.9644, + "step": 5480 + }, + { + "epoch": 0.16, + "learning_rate": 4.186570260178984e-05, + "loss": 0.983, + "step": 5490 + }, + { + "epoch": 0.16, + "learning_rate": 4.1850886030937e-05, + "loss": 1.0131, + "step": 5500 + }, + { + "epoch": 0.16, + "learning_rate": 4.183606946008416e-05, + "loss": 1.0005, + "step": 5510 + }, + { + "epoch": 0.16, + "learning_rate": 4.1821252889231316e-05, + "loss": 0.9998, + "step": 5520 + }, + { + "epoch": 0.16, + "learning_rate": 4.180643631837848e-05, + "loss": 0.9927, + "step": 5530 + }, + { + "epoch": 0.16, + "learning_rate": 4.179161974752564e-05, + "loss": 0.8264, + "step": 5540 + }, + { + "epoch": 0.16, + "learning_rate": 4.177680317667279e-05, + "loss": 1.0266, + "step": 5550 + }, + { + "epoch": 0.16, + "learning_rate": 4.176198660581995e-05, + "loss": 0.8833, + "step": 5560 + }, + { + "epoch": 0.17, + "learning_rate": 4.174717003496711e-05, + "loss": 0.9666, + "step": 5570 + }, + { + "epoch": 0.17, + "learning_rate": 4.173235346411427e-05, + "loss": 1.0156, + "step": 5580 + }, + { + "epoch": 0.17, + "learning_rate": 4.171753689326143e-05, + "loss": 1.0261, + "step": 5590 + }, + { + "epoch": 0.17, + "learning_rate": 4.170272032240858e-05, + "loss": 1.0929, + "step": 5600 + }, + { + "epoch": 0.17, + "learning_rate": 4.168790375155574e-05, + "loss": 0.9007, + "step": 5610 + }, + { + "epoch": 0.17, + "learning_rate": 4.16730871807029e-05, + "loss": 0.9849, + "step": 5620 + }, + { + "epoch": 0.17, + "learning_rate": 4.165827060985006e-05, + "loss": 0.9266, + "step": 5630 + }, + { + "epoch": 0.17, + "learning_rate": 4.164345403899722e-05, + "loss": 0.8687, + "step": 5640 + }, + { + "epoch": 0.17, + "learning_rate": 4.1628637468144374e-05, + "loss": 1.0976, + "step": 5650 + }, + { + "epoch": 0.17, + "learning_rate": 4.161382089729153e-05, + "loss": 1.001, + "step": 5660 + }, + { + "epoch": 0.17, + "learning_rate": 4.159900432643869e-05, + "loss": 0.922, + "step": 5670 + }, + { + "epoch": 0.17, + "learning_rate": 4.158418775558585e-05, + "loss": 1.0987, + "step": 5680 + }, + { + "epoch": 0.17, + "learning_rate": 4.156937118473301e-05, + "loss": 1.035, + "step": 5690 + }, + { + "epoch": 0.17, + "learning_rate": 4.155455461388017e-05, + "loss": 0.9958, + "step": 5700 + }, + { + "epoch": 0.17, + "learning_rate": 4.1539738043027324e-05, + "loss": 0.8683, + "step": 5710 + }, + { + "epoch": 0.17, + "learning_rate": 4.152492147217448e-05, + "loss": 0.8124, + "step": 5720 + }, + { + "epoch": 0.17, + "learning_rate": 4.151010490132164e-05, + "loss": 1.0338, + "step": 5730 + }, + { + "epoch": 0.17, + "learning_rate": 4.1495288330468796e-05, + "loss": 0.9689, + "step": 5740 + }, + { + "epoch": 0.17, + "learning_rate": 4.148047175961596e-05, + "loss": 1.0404, + "step": 5750 + }, + { + "epoch": 0.17, + "learning_rate": 4.146565518876312e-05, + "loss": 0.9264, + "step": 5760 + }, + { + "epoch": 0.17, + "learning_rate": 4.145083861791027e-05, + "loss": 1.0461, + "step": 5770 + }, + { + "epoch": 0.17, + "learning_rate": 4.143602204705743e-05, + "loss": 0.8862, + "step": 5780 + }, + { + "epoch": 0.17, + "learning_rate": 4.142120547620459e-05, + "loss": 0.912, + "step": 5790 + }, + { + "epoch": 0.17, + "learning_rate": 4.1406388905351746e-05, + "loss": 0.8659, + "step": 5800 + }, + { + "epoch": 0.17, + "learning_rate": 4.139157233449891e-05, + "loss": 0.9026, + "step": 5810 + }, + { + "epoch": 0.17, + "learning_rate": 4.137675576364606e-05, + "loss": 1.0321, + "step": 5820 + }, + { + "epoch": 0.17, + "learning_rate": 4.136193919279322e-05, + "loss": 0.9552, + "step": 5830 + }, + { + "epoch": 0.17, + "learning_rate": 4.134712262194038e-05, + "loss": 1.0345, + "step": 5840 + }, + { + "epoch": 0.17, + "learning_rate": 4.133230605108754e-05, + "loss": 0.9723, + "step": 5850 + }, + { + "epoch": 0.17, + "learning_rate": 4.1317489480234696e-05, + "loss": 0.9258, + "step": 5860 + }, + { + "epoch": 0.17, + "learning_rate": 4.1302672909381853e-05, + "loss": 0.9752, + "step": 5870 + }, + { + "epoch": 0.17, + "learning_rate": 4.128785633852901e-05, + "loss": 1.0206, + "step": 5880 + }, + { + "epoch": 0.17, + "learning_rate": 4.127303976767617e-05, + "loss": 0.9742, + "step": 5890 + }, + { + "epoch": 0.17, + "learning_rate": 4.125822319682333e-05, + "loss": 0.9583, + "step": 5900 + }, + { + "epoch": 0.18, + "learning_rate": 4.124340662597049e-05, + "loss": 0.8648, + "step": 5910 + }, + { + "epoch": 0.18, + "learning_rate": 4.1228590055117646e-05, + "loss": 1.0787, + "step": 5920 + }, + { + "epoch": 0.18, + "learning_rate": 4.1213773484264804e-05, + "loss": 0.9813, + "step": 5930 + }, + { + "epoch": 0.18, + "learning_rate": 4.119895691341196e-05, + "loss": 0.944, + "step": 5940 + }, + { + "epoch": 0.18, + "learning_rate": 4.118414034255912e-05, + "loss": 1.0075, + "step": 5950 + }, + { + "epoch": 0.18, + "learning_rate": 4.116932377170628e-05, + "loss": 0.8044, + "step": 5960 + }, + { + "epoch": 0.18, + "learning_rate": 4.115450720085344e-05, + "loss": 0.9448, + "step": 5970 + }, + { + "epoch": 0.18, + "learning_rate": 4.1139690630000597e-05, + "loss": 0.7978, + "step": 5980 + }, + { + "epoch": 0.18, + "learning_rate": 4.1124874059147754e-05, + "loss": 0.8473, + "step": 5990 + }, + { + "epoch": 0.18, + "learning_rate": 4.111005748829491e-05, + "loss": 1.087, + "step": 6000 + }, + { + "epoch": 0.18, + "learning_rate": 4.109524091744207e-05, + "loss": 1.024, + "step": 6010 + }, + { + "epoch": 0.18, + "learning_rate": 4.1080424346589226e-05, + "loss": 1.0458, + "step": 6020 + }, + { + "epoch": 0.18, + "learning_rate": 4.106560777573639e-05, + "loss": 1.0961, + "step": 6030 + }, + { + "epoch": 0.18, + "learning_rate": 4.105079120488354e-05, + "loss": 0.9593, + "step": 6040 + }, + { + "epoch": 0.18, + "learning_rate": 4.10359746340307e-05, + "loss": 1.1049, + "step": 6050 + }, + { + "epoch": 0.18, + "learning_rate": 4.102115806317786e-05, + "loss": 1.0515, + "step": 6060 + }, + { + "epoch": 0.18, + "learning_rate": 4.100634149232502e-05, + "loss": 0.9561, + "step": 6070 + }, + { + "epoch": 0.18, + "learning_rate": 4.0991524921472176e-05, + "loss": 0.9915, + "step": 6080 + }, + { + "epoch": 0.18, + "learning_rate": 4.097670835061933e-05, + "loss": 0.942, + "step": 6090 + }, + { + "epoch": 0.18, + "learning_rate": 4.096189177976649e-05, + "loss": 0.9765, + "step": 6100 + }, + { + "epoch": 0.18, + "learning_rate": 4.094707520891365e-05, + "loss": 1.0278, + "step": 6110 + }, + { + "epoch": 0.18, + "learning_rate": 4.093225863806081e-05, + "loss": 0.9689, + "step": 6120 + }, + { + "epoch": 0.18, + "learning_rate": 4.091744206720797e-05, + "loss": 1.0525, + "step": 6130 + }, + { + "epoch": 0.18, + "learning_rate": 4.0902625496355126e-05, + "loss": 0.923, + "step": 6140 + }, + { + "epoch": 0.18, + "learning_rate": 4.088780892550228e-05, + "loss": 0.9831, + "step": 6150 + }, + { + "epoch": 0.18, + "learning_rate": 4.087299235464944e-05, + "loss": 0.919, + "step": 6160 + }, + { + "epoch": 0.18, + "learning_rate": 4.08581757837966e-05, + "loss": 1.0675, + "step": 6170 + }, + { + "epoch": 0.18, + "learning_rate": 4.084335921294376e-05, + "loss": 1.0158, + "step": 6180 + }, + { + "epoch": 0.18, + "learning_rate": 4.082854264209092e-05, + "loss": 0.837, + "step": 6190 + }, + { + "epoch": 0.18, + "learning_rate": 4.0813726071238076e-05, + "loss": 1.0987, + "step": 6200 + }, + { + "epoch": 0.18, + "learning_rate": 4.079890950038523e-05, + "loss": 1.0349, + "step": 6210 + }, + { + "epoch": 0.18, + "learning_rate": 4.078409292953239e-05, + "loss": 0.8741, + "step": 6220 + }, + { + "epoch": 0.18, + "learning_rate": 4.076927635867955e-05, + "loss": 0.7855, + "step": 6230 + }, + { + "epoch": 0.18, + "learning_rate": 4.075445978782671e-05, + "loss": 0.8909, + "step": 6240 + }, + { + "epoch": 0.19, + "learning_rate": 4.073964321697387e-05, + "loss": 1.0483, + "step": 6250 + }, + { + "epoch": 0.19, + "learning_rate": 4.072482664612102e-05, + "loss": 1.0883, + "step": 6260 + }, + { + "epoch": 0.19, + "learning_rate": 4.0710010075268183e-05, + "loss": 0.9039, + "step": 6270 + }, + { + "epoch": 0.19, + "learning_rate": 4.069519350441534e-05, + "loss": 0.9711, + "step": 6280 + }, + { + "epoch": 0.19, + "learning_rate": 4.06803769335625e-05, + "loss": 1.0803, + "step": 6290 + }, + { + "epoch": 0.19, + "learning_rate": 4.0665560362709655e-05, + "loss": 1.0707, + "step": 6300 + }, + { + "epoch": 0.19, + "learning_rate": 4.065074379185681e-05, + "loss": 0.9018, + "step": 6310 + }, + { + "epoch": 0.19, + "learning_rate": 4.063592722100397e-05, + "loss": 0.866, + "step": 6320 + }, + { + "epoch": 0.19, + "learning_rate": 4.062111065015113e-05, + "loss": 0.9473, + "step": 6330 + }, + { + "epoch": 0.19, + "learning_rate": 4.060629407929829e-05, + "loss": 0.9391, + "step": 6340 + }, + { + "epoch": 0.19, + "learning_rate": 4.059147750844545e-05, + "loss": 0.9851, + "step": 6350 + }, + { + "epoch": 0.19, + "learning_rate": 4.0576660937592605e-05, + "loss": 0.9714, + "step": 6360 + }, + { + "epoch": 0.19, + "learning_rate": 4.056184436673976e-05, + "loss": 0.9525, + "step": 6370 + }, + { + "epoch": 0.19, + "learning_rate": 4.054702779588692e-05, + "loss": 0.9674, + "step": 6380 + }, + { + "epoch": 0.19, + "learning_rate": 4.053221122503408e-05, + "loss": 0.9294, + "step": 6390 + }, + { + "epoch": 0.19, + "learning_rate": 4.051739465418124e-05, + "loss": 0.9353, + "step": 6400 + }, + { + "epoch": 0.19, + "learning_rate": 4.05025780833284e-05, + "loss": 0.9975, + "step": 6410 + }, + { + "epoch": 0.19, + "learning_rate": 4.0487761512475555e-05, + "loss": 1.066, + "step": 6420 + }, + { + "epoch": 0.19, + "learning_rate": 4.047294494162271e-05, + "loss": 0.8707, + "step": 6430 + }, + { + "epoch": 0.19, + "learning_rate": 4.045812837076987e-05, + "loss": 0.957, + "step": 6440 + }, + { + "epoch": 0.19, + "learning_rate": 4.044331179991703e-05, + "loss": 1.0121, + "step": 6450 + }, + { + "epoch": 0.19, + "learning_rate": 4.042849522906419e-05, + "loss": 1.0795, + "step": 6460 + }, + { + "epoch": 0.19, + "learning_rate": 4.041367865821135e-05, + "loss": 0.9982, + "step": 6470 + }, + { + "epoch": 0.19, + "learning_rate": 4.03988620873585e-05, + "loss": 0.934, + "step": 6480 + }, + { + "epoch": 0.19, + "learning_rate": 4.038404551650566e-05, + "loss": 0.9686, + "step": 6490 + }, + { + "epoch": 0.19, + "learning_rate": 4.036922894565282e-05, + "loss": 1.0467, + "step": 6500 + }, + { + "epoch": 0.19, + "learning_rate": 4.035441237479998e-05, + "loss": 0.9294, + "step": 6510 + }, + { + "epoch": 0.19, + "learning_rate": 4.033959580394714e-05, + "loss": 1.1935, + "step": 6520 + }, + { + "epoch": 0.19, + "learning_rate": 4.03247792330943e-05, + "loss": 0.9153, + "step": 6530 + }, + { + "epoch": 0.19, + "learning_rate": 4.030996266224145e-05, + "loss": 0.9231, + "step": 6540 + }, + { + "epoch": 0.19, + "learning_rate": 4.029514609138861e-05, + "loss": 0.9228, + "step": 6550 + }, + { + "epoch": 0.19, + "learning_rate": 4.028032952053577e-05, + "loss": 0.9463, + "step": 6560 + }, + { + "epoch": 0.19, + "learning_rate": 4.026551294968293e-05, + "loss": 0.9007, + "step": 6570 + }, + { + "epoch": 0.19, + "learning_rate": 4.0250696378830085e-05, + "loss": 0.8671, + "step": 6580 + }, + { + "epoch": 0.2, + "learning_rate": 4.023587980797724e-05, + "loss": 0.9874, + "step": 6590 + }, + { + "epoch": 0.2, + "learning_rate": 4.02210632371244e-05, + "loss": 1.0277, + "step": 6600 + }, + { + "epoch": 0.2, + "learning_rate": 4.0206246666271556e-05, + "loss": 1.1138, + "step": 6610 + }, + { + "epoch": 0.2, + "learning_rate": 4.019143009541872e-05, + "loss": 0.9372, + "step": 6620 + }, + { + "epoch": 0.2, + "learning_rate": 4.017661352456588e-05, + "loss": 1.0057, + "step": 6630 + }, + { + "epoch": 0.2, + "learning_rate": 4.0161796953713035e-05, + "loss": 0.8845, + "step": 6640 + }, + { + "epoch": 0.2, + "learning_rate": 4.014698038286019e-05, + "loss": 0.8917, + "step": 6650 + }, + { + "epoch": 0.2, + "learning_rate": 4.013216381200735e-05, + "loss": 1.1626, + "step": 6660 + }, + { + "epoch": 0.2, + "learning_rate": 4.0117347241154507e-05, + "loss": 0.8921, + "step": 6670 + }, + { + "epoch": 0.2, + "learning_rate": 4.010253067030167e-05, + "loss": 1.0248, + "step": 6680 + }, + { + "epoch": 0.2, + "learning_rate": 4.008771409944883e-05, + "loss": 1.1001, + "step": 6690 + }, + { + "epoch": 0.2, + "learning_rate": 4.0072897528595985e-05, + "loss": 0.969, + "step": 6700 + }, + { + "epoch": 0.2, + "learning_rate": 4.005808095774314e-05, + "loss": 0.8646, + "step": 6710 + }, + { + "epoch": 0.2, + "learning_rate": 4.00432643868903e-05, + "loss": 1.0792, + "step": 6720 + }, + { + "epoch": 0.2, + "learning_rate": 4.002844781603746e-05, + "loss": 0.9803, + "step": 6730 + }, + { + "epoch": 0.2, + "learning_rate": 4.001363124518462e-05, + "loss": 0.9235, + "step": 6740 + }, + { + "epoch": 0.2, + "learning_rate": 3.999881467433178e-05, + "loss": 0.9415, + "step": 6750 + }, + { + "epoch": 0.2, + "learning_rate": 3.998399810347893e-05, + "loss": 0.7717, + "step": 6760 + }, + { + "epoch": 0.2, + "learning_rate": 3.996918153262609e-05, + "loss": 0.9583, + "step": 6770 + }, + { + "epoch": 0.2, + "learning_rate": 3.995436496177325e-05, + "loss": 1.0054, + "step": 6780 + }, + { + "epoch": 0.2, + "learning_rate": 3.993954839092041e-05, + "loss": 1.0008, + "step": 6790 + }, + { + "epoch": 0.2, + "learning_rate": 3.992473182006757e-05, + "loss": 0.9701, + "step": 6800 + }, + { + "epoch": 0.2, + "learning_rate": 3.990991524921472e-05, + "loss": 0.8991, + "step": 6810 + }, + { + "epoch": 0.2, + "learning_rate": 3.989509867836188e-05, + "loss": 0.8607, + "step": 6820 + }, + { + "epoch": 0.2, + "learning_rate": 3.988028210750904e-05, + "loss": 0.8719, + "step": 6830 + }, + { + "epoch": 0.2, + "learning_rate": 3.98654655366562e-05, + "loss": 0.9115, + "step": 6840 + }, + { + "epoch": 0.2, + "learning_rate": 3.985064896580336e-05, + "loss": 1.0135, + "step": 6850 + }, + { + "epoch": 0.2, + "learning_rate": 3.9835832394950514e-05, + "loss": 0.9497, + "step": 6860 + }, + { + "epoch": 0.2, + "learning_rate": 3.982101582409767e-05, + "loss": 0.9448, + "step": 6870 + }, + { + "epoch": 0.2, + "learning_rate": 3.980619925324483e-05, + "loss": 0.9963, + "step": 6880 + }, + { + "epoch": 0.2, + "learning_rate": 3.9791382682391986e-05, + "loss": 0.8949, + "step": 6890 + }, + { + "epoch": 0.2, + "learning_rate": 3.977656611153915e-05, + "loss": 0.9696, + "step": 6900 + }, + { + "epoch": 0.2, + "learning_rate": 3.976174954068631e-05, + "loss": 1.01, + "step": 6910 + }, + { + "epoch": 0.21, + "learning_rate": 3.9746932969833465e-05, + "loss": 0.8498, + "step": 6920 + }, + { + "epoch": 0.21, + "learning_rate": 3.973211639898062e-05, + "loss": 1.0444, + "step": 6930 + }, + { + "epoch": 0.21, + "learning_rate": 3.971729982812778e-05, + "loss": 0.9505, + "step": 6940 + }, + { + "epoch": 0.21, + "learning_rate": 3.9702483257274936e-05, + "loss": 1.0077, + "step": 6950 + }, + { + "epoch": 0.21, + "learning_rate": 3.96876666864221e-05, + "loss": 0.9244, + "step": 6960 + }, + { + "epoch": 0.21, + "learning_rate": 3.967285011556926e-05, + "loss": 0.9844, + "step": 6970 + }, + { + "epoch": 0.21, + "learning_rate": 3.965803354471641e-05, + "loss": 1.0796, + "step": 6980 + }, + { + "epoch": 0.21, + "learning_rate": 3.964321697386357e-05, + "loss": 1.012, + "step": 6990 + }, + { + "epoch": 0.21, + "learning_rate": 3.962840040301073e-05, + "loss": 0.9539, + "step": 7000 + }, + { + "epoch": 0.21, + "learning_rate": 3.9613583832157886e-05, + "loss": 0.9021, + "step": 7010 + }, + { + "epoch": 0.21, + "learning_rate": 3.959876726130505e-05, + "loss": 0.945, + "step": 7020 + }, + { + "epoch": 0.21, + "learning_rate": 3.95839506904522e-05, + "loss": 0.8773, + "step": 7030 + }, + { + "epoch": 0.21, + "learning_rate": 3.956913411959936e-05, + "loss": 0.8579, + "step": 7040 + }, + { + "epoch": 0.21, + "learning_rate": 3.955431754874652e-05, + "loss": 1.0599, + "step": 7050 + }, + { + "epoch": 0.21, + "learning_rate": 3.953950097789368e-05, + "loss": 0.9359, + "step": 7060 + }, + { + "epoch": 0.21, + "learning_rate": 3.9524684407040837e-05, + "loss": 0.9445, + "step": 7070 + }, + { + "epoch": 0.21, + "learning_rate": 3.9509867836187994e-05, + "loss": 1.0743, + "step": 7080 + }, + { + "epoch": 0.21, + "learning_rate": 3.949505126533515e-05, + "loss": 1.0425, + "step": 7090 + }, + { + "epoch": 0.21, + "learning_rate": 3.948023469448231e-05, + "loss": 0.861, + "step": 7100 + }, + { + "epoch": 0.21, + "learning_rate": 3.946541812362947e-05, + "loss": 1.0149, + "step": 7110 + }, + { + "epoch": 0.21, + "learning_rate": 3.945060155277663e-05, + "loss": 0.8354, + "step": 7120 + }, + { + "epoch": 0.21, + "learning_rate": 3.943578498192379e-05, + "loss": 1.02, + "step": 7130 + }, + { + "epoch": 0.21, + "learning_rate": 3.9420968411070944e-05, + "loss": 1.0401, + "step": 7140 + }, + { + "epoch": 0.21, + "learning_rate": 3.94061518402181e-05, + "loss": 0.9268, + "step": 7150 + }, + { + "epoch": 0.21, + "learning_rate": 3.939133526936526e-05, + "loss": 0.9768, + "step": 7160 + }, + { + "epoch": 0.21, + "learning_rate": 3.9376518698512416e-05, + "loss": 0.8638, + "step": 7170 + }, + { + "epoch": 0.21, + "learning_rate": 3.936170212765958e-05, + "loss": 1.0255, + "step": 7180 + }, + { + "epoch": 0.21, + "learning_rate": 3.934688555680674e-05, + "loss": 0.9134, + "step": 7190 + }, + { + "epoch": 0.21, + "learning_rate": 3.933206898595389e-05, + "loss": 0.8389, + "step": 7200 + }, + { + "epoch": 0.21, + "learning_rate": 3.931725241510105e-05, + "loss": 0.8027, + "step": 7210 + }, + { + "epoch": 0.21, + "learning_rate": 3.930243584424821e-05, + "loss": 0.8722, + "step": 7220 + }, + { + "epoch": 0.21, + "learning_rate": 3.9287619273395366e-05, + "loss": 1.066, + "step": 7230 + }, + { + "epoch": 0.21, + "learning_rate": 3.927280270254253e-05, + "loss": 0.8792, + "step": 7240 + }, + { + "epoch": 0.21, + "learning_rate": 3.925798613168968e-05, + "loss": 0.9674, + "step": 7250 + }, + { + "epoch": 0.22, + "learning_rate": 3.924316956083684e-05, + "loss": 1.0199, + "step": 7260 + }, + { + "epoch": 0.22, + "learning_rate": 3.9228352989984e-05, + "loss": 0.9654, + "step": 7270 + }, + { + "epoch": 0.22, + "learning_rate": 3.921353641913116e-05, + "loss": 0.8794, + "step": 7280 + }, + { + "epoch": 0.22, + "learning_rate": 3.9198719848278316e-05, + "loss": 0.9812, + "step": 7290 + }, + { + "epoch": 0.22, + "learning_rate": 3.918390327742547e-05, + "loss": 0.979, + "step": 7300 + }, + { + "epoch": 0.22, + "learning_rate": 3.916908670657263e-05, + "loss": 0.9721, + "step": 7310 + }, + { + "epoch": 0.22, + "learning_rate": 3.915427013571979e-05, + "loss": 0.8746, + "step": 7320 + }, + { + "epoch": 0.22, + "learning_rate": 3.913945356486695e-05, + "loss": 0.9368, + "step": 7330 + }, + { + "epoch": 0.22, + "learning_rate": 3.912463699401411e-05, + "loss": 0.918, + "step": 7340 + }, + { + "epoch": 0.22, + "learning_rate": 3.9109820423161266e-05, + "loss": 0.9911, + "step": 7350 + }, + { + "epoch": 0.22, + "learning_rate": 3.9095003852308423e-05, + "loss": 0.8009, + "step": 7360 + }, + { + "epoch": 0.22, + "learning_rate": 3.908018728145558e-05, + "loss": 0.9735, + "step": 7370 + }, + { + "epoch": 0.22, + "learning_rate": 3.906537071060274e-05, + "loss": 0.9262, + "step": 7380 + }, + { + "epoch": 0.22, + "learning_rate": 3.90505541397499e-05, + "loss": 0.9529, + "step": 7390 + }, + { + "epoch": 0.22, + "learning_rate": 3.903573756889706e-05, + "loss": 0.8379, + "step": 7400 + }, + { + "epoch": 0.22, + "learning_rate": 3.9020920998044216e-05, + "loss": 0.981, + "step": 7410 + }, + { + "epoch": 0.22, + "learning_rate": 3.900610442719137e-05, + "loss": 1.0949, + "step": 7420 + }, + { + "epoch": 0.22, + "learning_rate": 3.899128785633853e-05, + "loss": 1.0619, + "step": 7430 + }, + { + "epoch": 0.22, + "learning_rate": 3.897647128548569e-05, + "loss": 0.9317, + "step": 7440 + }, + { + "epoch": 0.22, + "learning_rate": 3.8961654714632845e-05, + "loss": 1.0149, + "step": 7450 + }, + { + "epoch": 0.22, + "learning_rate": 3.894683814378001e-05, + "loss": 0.9187, + "step": 7460 + }, + { + "epoch": 0.22, + "learning_rate": 3.893202157292716e-05, + "loss": 0.8749, + "step": 7470 + }, + { + "epoch": 0.22, + "learning_rate": 3.891720500207432e-05, + "loss": 0.9824, + "step": 7480 + }, + { + "epoch": 0.22, + "learning_rate": 3.890238843122148e-05, + "loss": 0.8977, + "step": 7490 + }, + { + "epoch": 0.22, + "learning_rate": 3.888757186036864e-05, + "loss": 0.9572, + "step": 7500 + }, + { + "epoch": 0.22, + "learning_rate": 3.8872755289515795e-05, + "loss": 0.8585, + "step": 7510 + }, + { + "epoch": 0.22, + "learning_rate": 3.885793871866296e-05, + "loss": 0.919, + "step": 7520 + }, + { + "epoch": 0.22, + "learning_rate": 3.884312214781011e-05, + "loss": 1.094, + "step": 7530 + }, + { + "epoch": 0.22, + "learning_rate": 3.882830557695727e-05, + "loss": 0.8935, + "step": 7540 + }, + { + "epoch": 0.22, + "learning_rate": 3.881348900610443e-05, + "loss": 1.0104, + "step": 7550 + }, + { + "epoch": 0.22, + "learning_rate": 3.879867243525159e-05, + "loss": 0.9139, + "step": 7560 + }, + { + "epoch": 0.22, + "learning_rate": 3.8783855864398746e-05, + "loss": 0.901, + "step": 7570 + }, + { + "epoch": 0.22, + "learning_rate": 3.87690392935459e-05, + "loss": 0.9488, + "step": 7580 + }, + { + "epoch": 0.22, + "learning_rate": 3.875422272269306e-05, + "loss": 1.0754, + "step": 7590 + }, + { + "epoch": 0.23, + "learning_rate": 3.873940615184022e-05, + "loss": 0.9621, + "step": 7600 + }, + { + "epoch": 0.23, + "learning_rate": 3.872458958098738e-05, + "loss": 0.849, + "step": 7610 + }, + { + "epoch": 0.23, + "learning_rate": 3.870977301013454e-05, + "loss": 1.0123, + "step": 7620 + }, + { + "epoch": 0.23, + "learning_rate": 3.8694956439281696e-05, + "loss": 1.0595, + "step": 7630 + }, + { + "epoch": 0.23, + "learning_rate": 3.868013986842885e-05, + "loss": 1.0262, + "step": 7640 + }, + { + "epoch": 0.23, + "learning_rate": 3.866532329757601e-05, + "loss": 1.1201, + "step": 7650 + }, + { + "epoch": 0.23, + "learning_rate": 3.865050672672317e-05, + "loss": 0.8926, + "step": 7660 + }, + { + "epoch": 0.23, + "learning_rate": 3.863569015587033e-05, + "loss": 0.8442, + "step": 7670 + }, + { + "epoch": 0.23, + "learning_rate": 3.862087358501749e-05, + "loss": 0.961, + "step": 7680 + }, + { + "epoch": 0.23, + "learning_rate": 3.8606057014164646e-05, + "loss": 0.9387, + "step": 7690 + }, + { + "epoch": 0.23, + "learning_rate": 3.8591240443311796e-05, + "loss": 0.8551, + "step": 7700 + }, + { + "epoch": 0.23, + "learning_rate": 3.857642387245896e-05, + "loss": 0.8485, + "step": 7710 + }, + { + "epoch": 0.23, + "learning_rate": 3.856160730160612e-05, + "loss": 1.1136, + "step": 7720 + }, + { + "epoch": 0.23, + "learning_rate": 3.8546790730753275e-05, + "loss": 0.9127, + "step": 7730 + }, + { + "epoch": 0.23, + "learning_rate": 3.853197415990044e-05, + "loss": 0.9789, + "step": 7740 + }, + { + "epoch": 0.23, + "learning_rate": 3.851715758904759e-05, + "loss": 1.0526, + "step": 7750 + }, + { + "epoch": 0.23, + "learning_rate": 3.8502341018194747e-05, + "loss": 0.8497, + "step": 7760 + }, + { + "epoch": 0.23, + "learning_rate": 3.848752444734191e-05, + "loss": 0.839, + "step": 7770 + }, + { + "epoch": 0.23, + "learning_rate": 3.847270787648907e-05, + "loss": 0.9596, + "step": 7780 + }, + { + "epoch": 0.23, + "learning_rate": 3.8457891305636225e-05, + "loss": 0.8706, + "step": 7790 + }, + { + "epoch": 0.23, + "learning_rate": 3.844307473478338e-05, + "loss": 1.0107, + "step": 7800 + }, + { + "epoch": 0.23, + "learning_rate": 3.842825816393054e-05, + "loss": 0.9414, + "step": 7810 + }, + { + "epoch": 0.23, + "learning_rate": 3.84134415930777e-05, + "loss": 0.8719, + "step": 7820 + }, + { + "epoch": 0.23, + "learning_rate": 3.839862502222486e-05, + "loss": 0.8495, + "step": 7830 + }, + { + "epoch": 0.23, + "learning_rate": 3.838380845137202e-05, + "loss": 0.9224, + "step": 7840 + }, + { + "epoch": 0.23, + "learning_rate": 3.8368991880519175e-05, + "loss": 0.9091, + "step": 7850 + }, + { + "epoch": 0.23, + "learning_rate": 3.835417530966633e-05, + "loss": 0.9843, + "step": 7860 + }, + { + "epoch": 0.23, + "learning_rate": 3.833935873881349e-05, + "loss": 1.1209, + "step": 7870 + }, + { + "epoch": 0.23, + "learning_rate": 3.832454216796065e-05, + "loss": 1.0426, + "step": 7880 + }, + { + "epoch": 0.23, + "learning_rate": 3.830972559710781e-05, + "loss": 0.8406, + "step": 7890 + }, + { + "epoch": 0.23, + "learning_rate": 3.829490902625497e-05, + "loss": 0.861, + "step": 7900 + }, + { + "epoch": 0.23, + "learning_rate": 3.8280092455402125e-05, + "loss": 1.0723, + "step": 7910 + }, + { + "epoch": 0.23, + "learning_rate": 3.826527588454928e-05, + "loss": 0.9372, + "step": 7920 + }, + { + "epoch": 0.23, + "learning_rate": 3.825045931369644e-05, + "loss": 1.0065, + "step": 7930 + }, + { + "epoch": 0.24, + "learning_rate": 3.82356427428436e-05, + "loss": 0.7633, + "step": 7940 + }, + { + "epoch": 0.24, + "learning_rate": 3.822082617199076e-05, + "loss": 0.9963, + "step": 7950 + }, + { + "epoch": 0.24, + "learning_rate": 3.820600960113792e-05, + "loss": 0.8949, + "step": 7960 + }, + { + "epoch": 0.24, + "learning_rate": 3.819119303028507e-05, + "loss": 1.0342, + "step": 7970 + }, + { + "epoch": 0.24, + "learning_rate": 3.8176376459432226e-05, + "loss": 1.0523, + "step": 7980 + }, + { + "epoch": 0.24, + "learning_rate": 3.816155988857939e-05, + "loss": 0.9841, + "step": 7990 + }, + { + "epoch": 0.24, + "learning_rate": 3.814674331772655e-05, + "loss": 1.0401, + "step": 8000 + }, + { + "epoch": 0.24, + "learning_rate": 3.8131926746873704e-05, + "loss": 0.8284, + "step": 8010 + }, + { + "epoch": 0.24, + "learning_rate": 3.811711017602086e-05, + "loss": 0.9808, + "step": 8020 + }, + { + "epoch": 0.24, + "learning_rate": 3.810229360516802e-05, + "loss": 0.8408, + "step": 8030 + }, + { + "epoch": 0.24, + "learning_rate": 3.8087477034315176e-05, + "loss": 0.9917, + "step": 8040 + }, + { + "epoch": 0.24, + "learning_rate": 3.807266046346234e-05, + "loss": 0.904, + "step": 8050 + }, + { + "epoch": 0.24, + "learning_rate": 3.80578438926095e-05, + "loss": 0.9815, + "step": 8060 + }, + { + "epoch": 0.24, + "learning_rate": 3.8043027321756655e-05, + "loss": 0.9306, + "step": 8070 + }, + { + "epoch": 0.24, + "learning_rate": 3.802821075090381e-05, + "loss": 0.9941, + "step": 8080 + }, + { + "epoch": 0.24, + "learning_rate": 3.801339418005097e-05, + "loss": 0.8978, + "step": 8090 + }, + { + "epoch": 0.24, + "learning_rate": 3.7998577609198126e-05, + "loss": 0.941, + "step": 8100 + }, + { + "epoch": 0.24, + "learning_rate": 3.798376103834529e-05, + "loss": 0.918, + "step": 8110 + }, + { + "epoch": 0.24, + "learning_rate": 3.796894446749245e-05, + "loss": 0.7802, + "step": 8120 + }, + { + "epoch": 0.24, + "learning_rate": 3.7954127896639605e-05, + "loss": 0.9219, + "step": 8130 + }, + { + "epoch": 0.24, + "learning_rate": 3.793931132578676e-05, + "loss": 1.0816, + "step": 8140 + }, + { + "epoch": 0.24, + "learning_rate": 3.792449475493392e-05, + "loss": 1.0741, + "step": 8150 + }, + { + "epoch": 0.24, + "learning_rate": 3.7909678184081077e-05, + "loss": 1.0095, + "step": 8160 + }, + { + "epoch": 0.24, + "learning_rate": 3.789486161322824e-05, + "loss": 0.962, + "step": 8170 + }, + { + "epoch": 0.24, + "learning_rate": 3.78800450423754e-05, + "loss": 0.8198, + "step": 8180 + }, + { + "epoch": 0.24, + "learning_rate": 3.786522847152255e-05, + "loss": 0.9874, + "step": 8190 + }, + { + "epoch": 0.24, + "learning_rate": 3.785041190066971e-05, + "loss": 0.997, + "step": 8200 + }, + { + "epoch": 0.24, + "learning_rate": 3.783559532981687e-05, + "loss": 1.0218, + "step": 8210 + }, + { + "epoch": 0.24, + "learning_rate": 3.782077875896403e-05, + "loss": 0.7353, + "step": 8220 + }, + { + "epoch": 0.24, + "learning_rate": 3.780596218811119e-05, + "loss": 0.9453, + "step": 8230 + }, + { + "epoch": 0.24, + "learning_rate": 3.779114561725834e-05, + "loss": 0.9067, + "step": 8240 + }, + { + "epoch": 0.24, + "learning_rate": 3.77763290464055e-05, + "loss": 0.9107, + "step": 8250 + }, + { + "epoch": 0.24, + "learning_rate": 3.7761512475552656e-05, + "loss": 0.8818, + "step": 8260 + }, + { + "epoch": 0.25, + "learning_rate": 3.774669590469982e-05, + "loss": 1.065, + "step": 8270 + }, + { + "epoch": 0.25, + "learning_rate": 3.773187933384698e-05, + "loss": 0.9564, + "step": 8280 + }, + { + "epoch": 0.25, + "learning_rate": 3.7717062762994134e-05, + "loss": 1.0434, + "step": 8290 + }, + { + "epoch": 0.25, + "learning_rate": 3.770224619214129e-05, + "loss": 1.0674, + "step": 8300 + }, + { + "epoch": 0.25, + "learning_rate": 3.768742962128845e-05, + "loss": 1.0128, + "step": 8310 + }, + { + "epoch": 0.25, + "learning_rate": 3.7672613050435606e-05, + "loss": 0.9528, + "step": 8320 + }, + { + "epoch": 0.25, + "learning_rate": 3.765779647958277e-05, + "loss": 1.088, + "step": 8330 + }, + { + "epoch": 0.25, + "learning_rate": 3.764297990872993e-05, + "loss": 1.0491, + "step": 8340 + }, + { + "epoch": 0.25, + "learning_rate": 3.7628163337877084e-05, + "loss": 1.0906, + "step": 8350 + }, + { + "epoch": 0.25, + "learning_rate": 3.761334676702424e-05, + "loss": 0.9301, + "step": 8360 + }, + { + "epoch": 0.25, + "learning_rate": 3.75985301961714e-05, + "loss": 0.8296, + "step": 8370 + }, + { + "epoch": 0.25, + "learning_rate": 3.7583713625318556e-05, + "loss": 0.9707, + "step": 8380 + }, + { + "epoch": 0.25, + "learning_rate": 3.756889705446572e-05, + "loss": 0.9587, + "step": 8390 + }, + { + "epoch": 0.25, + "learning_rate": 3.755408048361288e-05, + "loss": 1.0983, + "step": 8400 + }, + { + "epoch": 0.25, + "learning_rate": 3.753926391276003e-05, + "loss": 0.7814, + "step": 8410 + }, + { + "epoch": 0.25, + "learning_rate": 3.752444734190719e-05, + "loss": 0.9767, + "step": 8420 + }, + { + "epoch": 0.25, + "learning_rate": 3.750963077105435e-05, + "loss": 0.9191, + "step": 8430 + }, + { + "epoch": 0.25, + "learning_rate": 3.7494814200201506e-05, + "loss": 1.133, + "step": 8440 + }, + { + "epoch": 0.25, + "learning_rate": 3.747999762934867e-05, + "loss": 0.7876, + "step": 8450 + }, + { + "epoch": 0.25, + "learning_rate": 3.746518105849583e-05, + "loss": 1.0342, + "step": 8460 + }, + { + "epoch": 0.25, + "learning_rate": 3.745036448764298e-05, + "loss": 0.9359, + "step": 8470 + }, + { + "epoch": 0.25, + "learning_rate": 3.743554791679014e-05, + "loss": 1.0841, + "step": 8480 + }, + { + "epoch": 0.25, + "learning_rate": 3.74207313459373e-05, + "loss": 0.9303, + "step": 8490 + }, + { + "epoch": 0.25, + "learning_rate": 3.7405914775084456e-05, + "loss": 0.947, + "step": 8500 + }, + { + "epoch": 0.25, + "learning_rate": 3.739109820423162e-05, + "loss": 0.9999, + "step": 8510 + }, + { + "epoch": 0.25, + "learning_rate": 3.737628163337877e-05, + "loss": 0.9578, + "step": 8520 + }, + { + "epoch": 0.25, + "learning_rate": 3.736146506252593e-05, + "loss": 0.9773, + "step": 8530 + }, + { + "epoch": 0.25, + "learning_rate": 3.7346648491673085e-05, + "loss": 0.9933, + "step": 8540 + }, + { + "epoch": 0.25, + "learning_rate": 3.733183192082025e-05, + "loss": 0.8242, + "step": 8550 + }, + { + "epoch": 0.25, + "learning_rate": 3.7317015349967406e-05, + "loss": 1.1116, + "step": 8560 + }, + { + "epoch": 0.25, + "learning_rate": 3.7302198779114564e-05, + "loss": 1.1778, + "step": 8570 + }, + { + "epoch": 0.25, + "learning_rate": 3.728738220826172e-05, + "loss": 0.9228, + "step": 8580 + }, + { + "epoch": 0.25, + "learning_rate": 3.727256563740888e-05, + "loss": 1.0696, + "step": 8590 + }, + { + "epoch": 0.25, + "learning_rate": 3.7257749066556035e-05, + "loss": 1.104, + "step": 8600 + }, + { + "epoch": 0.26, + "learning_rate": 3.72429324957032e-05, + "loss": 0.7433, + "step": 8610 + }, + { + "epoch": 0.26, + "learning_rate": 3.722811592485036e-05, + "loss": 0.9487, + "step": 8620 + }, + { + "epoch": 0.26, + "learning_rate": 3.7213299353997514e-05, + "loss": 0.9236, + "step": 8630 + }, + { + "epoch": 0.26, + "learning_rate": 3.719848278314467e-05, + "loss": 0.9623, + "step": 8640 + }, + { + "epoch": 0.26, + "learning_rate": 3.718366621229183e-05, + "loss": 1.021, + "step": 8650 + }, + { + "epoch": 0.26, + "learning_rate": 3.7168849641438986e-05, + "loss": 0.749, + "step": 8660 + }, + { + "epoch": 0.26, + "learning_rate": 3.715403307058615e-05, + "loss": 0.8871, + "step": 8670 + }, + { + "epoch": 0.26, + "learning_rate": 3.713921649973331e-05, + "loss": 0.8794, + "step": 8680 + }, + { + "epoch": 0.26, + "learning_rate": 3.712439992888046e-05, + "loss": 1.0036, + "step": 8690 + }, + { + "epoch": 0.26, + "learning_rate": 3.710958335802762e-05, + "loss": 0.9535, + "step": 8700 + }, + { + "epoch": 0.26, + "learning_rate": 3.709476678717478e-05, + "loss": 0.9561, + "step": 8710 + }, + { + "epoch": 0.26, + "learning_rate": 3.7079950216321936e-05, + "loss": 1.0284, + "step": 8720 + }, + { + "epoch": 0.26, + "learning_rate": 3.70651336454691e-05, + "loss": 0.9994, + "step": 8730 + }, + { + "epoch": 0.26, + "learning_rate": 3.705031707461625e-05, + "loss": 0.8966, + "step": 8740 + }, + { + "epoch": 0.26, + "learning_rate": 3.703550050376341e-05, + "loss": 0.8629, + "step": 8750 + }, + { + "epoch": 0.26, + "learning_rate": 3.702068393291057e-05, + "loss": 0.9028, + "step": 8760 + }, + { + "epoch": 0.26, + "learning_rate": 3.700586736205773e-05, + "loss": 0.949, + "step": 8770 + }, + { + "epoch": 0.26, + "learning_rate": 3.6991050791204886e-05, + "loss": 0.9702, + "step": 8780 + }, + { + "epoch": 0.26, + "learning_rate": 3.697623422035204e-05, + "loss": 0.8712, + "step": 8790 + }, + { + "epoch": 0.26, + "learning_rate": 3.69614176494992e-05, + "loss": 0.8101, + "step": 8800 + }, + { + "epoch": 0.26, + "learning_rate": 3.694660107864636e-05, + "loss": 0.9015, + "step": 8810 + }, + { + "epoch": 0.26, + "learning_rate": 3.6931784507793515e-05, + "loss": 1.0166, + "step": 8820 + }, + { + "epoch": 0.26, + "learning_rate": 3.691696793694068e-05, + "loss": 1.0416, + "step": 8830 + }, + { + "epoch": 0.26, + "learning_rate": 3.6902151366087836e-05, + "loss": 0.9694, + "step": 8840 + }, + { + "epoch": 0.26, + "learning_rate": 3.688733479523499e-05, + "loss": 0.9738, + "step": 8850 + }, + { + "epoch": 0.26, + "learning_rate": 3.687251822438215e-05, + "loss": 1.0464, + "step": 8860 + }, + { + "epoch": 0.26, + "learning_rate": 3.685770165352931e-05, + "loss": 0.9831, + "step": 8870 + }, + { + "epoch": 0.26, + "learning_rate": 3.6842885082676465e-05, + "loss": 0.8784, + "step": 8880 + }, + { + "epoch": 0.26, + "learning_rate": 3.682806851182363e-05, + "loss": 0.9262, + "step": 8890 + }, + { + "epoch": 0.26, + "learning_rate": 3.6813251940970786e-05, + "loss": 0.9408, + "step": 8900 + }, + { + "epoch": 0.26, + "learning_rate": 3.679843537011794e-05, + "loss": 1.0725, + "step": 8910 + }, + { + "epoch": 0.26, + "learning_rate": 3.67836187992651e-05, + "loss": 0.9675, + "step": 8920 + }, + { + "epoch": 0.26, + "learning_rate": 3.676880222841226e-05, + "loss": 0.9087, + "step": 8930 + }, + { + "epoch": 0.26, + "learning_rate": 3.6753985657559415e-05, + "loss": 0.9308, + "step": 8940 + }, + { + "epoch": 0.27, + "learning_rate": 3.673916908670658e-05, + "loss": 1.0129, + "step": 8950 + }, + { + "epoch": 0.27, + "learning_rate": 3.672435251585373e-05, + "loss": 0.7679, + "step": 8960 + }, + { + "epoch": 0.27, + "learning_rate": 3.670953594500089e-05, + "loss": 0.865, + "step": 8970 + }, + { + "epoch": 0.27, + "learning_rate": 3.669471937414805e-05, + "loss": 0.9606, + "step": 8980 + }, + { + "epoch": 0.27, + "learning_rate": 3.667990280329521e-05, + "loss": 0.9638, + "step": 8990 + }, + { + "epoch": 0.27, + "learning_rate": 3.6665086232442365e-05, + "loss": 0.9136, + "step": 9000 + }, + { + "epoch": 0.27, + "learning_rate": 3.665026966158952e-05, + "loss": 1.0182, + "step": 9010 + }, + { + "epoch": 0.27, + "learning_rate": 3.663545309073668e-05, + "loss": 0.7749, + "step": 9020 + }, + { + "epoch": 0.27, + "learning_rate": 3.662063651988384e-05, + "loss": 0.9811, + "step": 9030 + }, + { + "epoch": 0.27, + "learning_rate": 3.6605819949031e-05, + "loss": 0.9869, + "step": 9040 + }, + { + "epoch": 0.27, + "learning_rate": 3.659100337817816e-05, + "loss": 1.0581, + "step": 9050 + }, + { + "epoch": 0.27, + "learning_rate": 3.6576186807325316e-05, + "loss": 0.8655, + "step": 9060 + }, + { + "epoch": 0.27, + "learning_rate": 3.656137023647247e-05, + "loss": 0.9068, + "step": 9070 + }, + { + "epoch": 0.27, + "learning_rate": 3.654655366561963e-05, + "loss": 0.8552, + "step": 9080 + }, + { + "epoch": 0.27, + "learning_rate": 3.653173709476679e-05, + "loss": 0.948, + "step": 9090 + }, + { + "epoch": 0.27, + "learning_rate": 3.6516920523913944e-05, + "loss": 0.8968, + "step": 9100 + }, + { + "epoch": 0.27, + "learning_rate": 3.650210395306111e-05, + "loss": 0.9175, + "step": 9110 + }, + { + "epoch": 0.27, + "learning_rate": 3.6487287382208266e-05, + "loss": 1.0115, + "step": 9120 + }, + { + "epoch": 0.27, + "learning_rate": 3.6472470811355416e-05, + "loss": 0.8408, + "step": 9130 + }, + { + "epoch": 0.27, + "learning_rate": 3.645765424050258e-05, + "loss": 0.9666, + "step": 9140 + }, + { + "epoch": 0.27, + "learning_rate": 3.644283766964974e-05, + "loss": 0.861, + "step": 9150 + }, + { + "epoch": 0.27, + "learning_rate": 3.6428021098796895e-05, + "loss": 0.8259, + "step": 9160 + }, + { + "epoch": 0.27, + "learning_rate": 3.641320452794406e-05, + "loss": 0.8378, + "step": 9170 + }, + { + "epoch": 0.27, + "learning_rate": 3.639838795709121e-05, + "loss": 0.9386, + "step": 9180 + }, + { + "epoch": 0.27, + "learning_rate": 3.6383571386238366e-05, + "loss": 0.8758, + "step": 9190 + }, + { + "epoch": 0.27, + "learning_rate": 3.636875481538553e-05, + "loss": 0.9089, + "step": 9200 + }, + { + "epoch": 0.27, + "learning_rate": 3.635393824453269e-05, + "loss": 0.8981, + "step": 9210 + }, + { + "epoch": 0.27, + "learning_rate": 3.6339121673679845e-05, + "loss": 1.0558, + "step": 9220 + }, + { + "epoch": 0.27, + "learning_rate": 3.6324305102827e-05, + "loss": 0.9055, + "step": 9230 + }, + { + "epoch": 0.27, + "learning_rate": 3.630948853197416e-05, + "loss": 0.9934, + "step": 9240 + }, + { + "epoch": 0.27, + "learning_rate": 3.6294671961121317e-05, + "loss": 0.9359, + "step": 9250 + }, + { + "epoch": 0.27, + "learning_rate": 3.627985539026848e-05, + "loss": 0.8586, + "step": 9260 + }, + { + "epoch": 0.27, + "learning_rate": 3.626503881941564e-05, + "loss": 0.9114, + "step": 9270 + }, + { + "epoch": 0.27, + "learning_rate": 3.6250222248562795e-05, + "loss": 0.8345, + "step": 9280 + }, + { + "epoch": 0.28, + "learning_rate": 3.623540567770995e-05, + "loss": 0.9753, + "step": 9290 + }, + { + "epoch": 0.28, + "learning_rate": 3.622058910685711e-05, + "loss": 0.8734, + "step": 9300 + }, + { + "epoch": 0.28, + "learning_rate": 3.620577253600427e-05, + "loss": 0.9912, + "step": 9310 + }, + { + "epoch": 0.28, + "learning_rate": 3.619095596515143e-05, + "loss": 0.9848, + "step": 9320 + }, + { + "epoch": 0.28, + "learning_rate": 3.617613939429859e-05, + "loss": 0.9692, + "step": 9330 + }, + { + "epoch": 0.28, + "learning_rate": 3.6161322823445745e-05, + "loss": 0.9421, + "step": 9340 + }, + { + "epoch": 0.28, + "learning_rate": 3.61465062525929e-05, + "loss": 0.9079, + "step": 9350 + }, + { + "epoch": 0.28, + "learning_rate": 3.613168968174006e-05, + "loss": 1.0107, + "step": 9360 + }, + { + "epoch": 0.28, + "learning_rate": 3.611687311088722e-05, + "loss": 0.902, + "step": 9370 + }, + { + "epoch": 0.28, + "learning_rate": 3.6102056540034374e-05, + "loss": 0.9594, + "step": 9380 + }, + { + "epoch": 0.28, + "learning_rate": 3.608723996918154e-05, + "loss": 0.7266, + "step": 9390 + }, + { + "epoch": 0.28, + "learning_rate": 3.607242339832869e-05, + "loss": 1.0683, + "step": 9400 + }, + { + "epoch": 0.28, + "learning_rate": 3.6057606827475846e-05, + "loss": 0.9873, + "step": 9410 + }, + { + "epoch": 0.28, + "learning_rate": 3.604279025662301e-05, + "loss": 0.9732, + "step": 9420 + }, + { + "epoch": 0.28, + "learning_rate": 3.602797368577017e-05, + "loss": 0.8969, + "step": 9430 + }, + { + "epoch": 0.28, + "learning_rate": 3.6013157114917324e-05, + "loss": 0.9062, + "step": 9440 + }, + { + "epoch": 0.28, + "learning_rate": 3.599834054406449e-05, + "loss": 1.0581, + "step": 9450 + }, + { + "epoch": 0.28, + "learning_rate": 3.598352397321164e-05, + "loss": 0.882, + "step": 9460 + }, + { + "epoch": 0.28, + "learning_rate": 3.5968707402358796e-05, + "loss": 0.755, + "step": 9470 + }, + { + "epoch": 0.28, + "learning_rate": 3.595389083150596e-05, + "loss": 0.8925, + "step": 9480 + }, + { + "epoch": 0.28, + "learning_rate": 3.593907426065312e-05, + "loss": 0.9063, + "step": 9490 + }, + { + "epoch": 0.28, + "learning_rate": 3.5924257689800274e-05, + "loss": 1.0206, + "step": 9500 + }, + { + "epoch": 0.28, + "learning_rate": 3.590944111894743e-05, + "loss": 0.8922, + "step": 9510 + }, + { + "epoch": 0.28, + "learning_rate": 3.589462454809459e-05, + "loss": 0.9306, + "step": 9520 + }, + { + "epoch": 0.28, + "learning_rate": 3.5879807977241746e-05, + "loss": 1.1392, + "step": 9530 + }, + { + "epoch": 0.28, + "learning_rate": 3.586499140638891e-05, + "loss": 0.9836, + "step": 9540 + }, + { + "epoch": 0.28, + "learning_rate": 3.585017483553607e-05, + "loss": 0.8159, + "step": 9550 + }, + { + "epoch": 0.28, + "learning_rate": 3.5835358264683225e-05, + "loss": 0.7914, + "step": 9560 + }, + { + "epoch": 0.28, + "learning_rate": 3.582054169383038e-05, + "loss": 0.8551, + "step": 9570 + }, + { + "epoch": 0.28, + "learning_rate": 3.580572512297754e-05, + "loss": 0.8242, + "step": 9580 + }, + { + "epoch": 0.28, + "learning_rate": 3.5790908552124696e-05, + "loss": 1.0202, + "step": 9590 + }, + { + "epoch": 0.28, + "learning_rate": 3.577609198127186e-05, + "loss": 0.9322, + "step": 9600 + }, + { + "epoch": 0.28, + "learning_rate": 3.576127541041902e-05, + "loss": 0.9435, + "step": 9610 + }, + { + "epoch": 0.29, + "learning_rate": 3.5746458839566175e-05, + "loss": 0.7511, + "step": 9620 + }, + { + "epoch": 0.29, + "learning_rate": 3.573164226871333e-05, + "loss": 0.9171, + "step": 9630 + }, + { + "epoch": 0.29, + "learning_rate": 3.571682569786049e-05, + "loss": 0.8806, + "step": 9640 + }, + { + "epoch": 0.29, + "learning_rate": 3.5702009127007646e-05, + "loss": 0.935, + "step": 9650 + }, + { + "epoch": 0.29, + "learning_rate": 3.5687192556154804e-05, + "loss": 1.114, + "step": 9660 + }, + { + "epoch": 0.29, + "learning_rate": 3.567237598530197e-05, + "loss": 0.8805, + "step": 9670 + }, + { + "epoch": 0.29, + "learning_rate": 3.565755941444912e-05, + "loss": 1.0953, + "step": 9680 + }, + { + "epoch": 0.29, + "learning_rate": 3.5642742843596275e-05, + "loss": 0.9546, + "step": 9690 + }, + { + "epoch": 0.29, + "learning_rate": 3.562792627274344e-05, + "loss": 1.0753, + "step": 9700 + }, + { + "epoch": 0.29, + "learning_rate": 3.56131097018906e-05, + "loss": 0.9717, + "step": 9710 + }, + { + "epoch": 0.29, + "learning_rate": 3.5598293131037754e-05, + "loss": 0.8966, + "step": 9720 + }, + { + "epoch": 0.29, + "learning_rate": 3.558347656018491e-05, + "loss": 1.0347, + "step": 9730 + }, + { + "epoch": 0.29, + "learning_rate": 3.556865998933207e-05, + "loss": 0.9045, + "step": 9740 + }, + { + "epoch": 0.29, + "learning_rate": 3.5553843418479226e-05, + "loss": 0.984, + "step": 9750 + }, + { + "epoch": 0.29, + "learning_rate": 3.553902684762639e-05, + "loss": 0.8778, + "step": 9760 + }, + { + "epoch": 0.29, + "learning_rate": 3.552421027677355e-05, + "loss": 0.9551, + "step": 9770 + }, + { + "epoch": 0.29, + "learning_rate": 3.5509393705920704e-05, + "loss": 0.9584, + "step": 9780 + }, + { + "epoch": 0.29, + "learning_rate": 3.549457713506786e-05, + "loss": 0.948, + "step": 9790 + }, + { + "epoch": 0.29, + "learning_rate": 3.547976056421502e-05, + "loss": 1.0751, + "step": 9800 + }, + { + "epoch": 0.29, + "learning_rate": 3.5464943993362176e-05, + "loss": 0.9411, + "step": 9810 + }, + { + "epoch": 0.29, + "learning_rate": 3.545012742250934e-05, + "loss": 0.8559, + "step": 9820 + }, + { + "epoch": 0.29, + "learning_rate": 3.54353108516565e-05, + "loss": 1.0811, + "step": 9830 + }, + { + "epoch": 0.29, + "learning_rate": 3.5420494280803654e-05, + "loss": 0.7798, + "step": 9840 + }, + { + "epoch": 0.29, + "learning_rate": 3.540567770995081e-05, + "loss": 0.9944, + "step": 9850 + }, + { + "epoch": 0.29, + "learning_rate": 3.539086113909797e-05, + "loss": 0.8336, + "step": 9860 + }, + { + "epoch": 0.29, + "learning_rate": 3.5376044568245126e-05, + "loss": 1.0403, + "step": 9870 + }, + { + "epoch": 0.29, + "learning_rate": 3.536122799739229e-05, + "loss": 0.8135, + "step": 9880 + }, + { + "epoch": 0.29, + "learning_rate": 3.534641142653945e-05, + "loss": 0.8156, + "step": 9890 + }, + { + "epoch": 0.29, + "learning_rate": 3.53315948556866e-05, + "loss": 0.95, + "step": 9900 + }, + { + "epoch": 0.29, + "learning_rate": 3.531677828483376e-05, + "loss": 0.8798, + "step": 9910 + }, + { + "epoch": 0.29, + "learning_rate": 3.530196171398092e-05, + "loss": 0.9948, + "step": 9920 + }, + { + "epoch": 0.29, + "learning_rate": 3.5287145143128076e-05, + "loss": 0.9372, + "step": 9930 + }, + { + "epoch": 0.29, + "learning_rate": 3.527232857227523e-05, + "loss": 0.9383, + "step": 9940 + }, + { + "epoch": 0.29, + "learning_rate": 3.525751200142239e-05, + "loss": 1.0386, + "step": 9950 + }, + { + "epoch": 0.3, + "learning_rate": 3.524269543056955e-05, + "loss": 0.9028, + "step": 9960 + }, + { + "epoch": 0.3, + "learning_rate": 3.5227878859716705e-05, + "loss": 1.0153, + "step": 9970 + }, + { + "epoch": 0.3, + "learning_rate": 3.521306228886387e-05, + "loss": 0.8812, + "step": 9980 + }, + { + "epoch": 0.3, + "learning_rate": 3.5198245718011026e-05, + "loss": 1.0119, + "step": 9990 + }, + { + "epoch": 0.3, + "learning_rate": 3.5183429147158183e-05, + "loss": 0.9994, + "step": 10000 + }, + { + "epoch": 0.3, + "learning_rate": 3.516861257630534e-05, + "loss": 0.869, + "step": 10010 + }, + { + "epoch": 0.3, + "learning_rate": 3.51537960054525e-05, + "loss": 0.8866, + "step": 10020 + }, + { + "epoch": 0.3, + "learning_rate": 3.5138979434599655e-05, + "loss": 0.8339, + "step": 10030 + }, + { + "epoch": 0.3, + "learning_rate": 3.512416286374682e-05, + "loss": 0.7662, + "step": 10040 + }, + { + "epoch": 0.3, + "learning_rate": 3.5109346292893976e-05, + "loss": 0.808, + "step": 10050 + }, + { + "epoch": 0.3, + "learning_rate": 3.5094529722041134e-05, + "loss": 0.8775, + "step": 10060 + }, + { + "epoch": 0.3, + "learning_rate": 3.507971315118829e-05, + "loss": 0.8669, + "step": 10070 + }, + { + "epoch": 0.3, + "learning_rate": 3.506489658033545e-05, + "loss": 0.8483, + "step": 10080 + }, + { + "epoch": 0.3, + "learning_rate": 3.5050080009482605e-05, + "loss": 1.1318, + "step": 10090 + }, + { + "epoch": 0.3, + "learning_rate": 3.503526343862977e-05, + "loss": 0.9721, + "step": 10100 + }, + { + "epoch": 0.3, + "learning_rate": 3.5020446867776927e-05, + "loss": 0.9693, + "step": 10110 + }, + { + "epoch": 0.3, + "learning_rate": 3.500563029692408e-05, + "loss": 0.9194, + "step": 10120 + }, + { + "epoch": 0.3, + "learning_rate": 3.499081372607124e-05, + "loss": 1.0312, + "step": 10130 + }, + { + "epoch": 0.3, + "learning_rate": 3.49759971552184e-05, + "loss": 0.9082, + "step": 10140 + }, + { + "epoch": 0.3, + "learning_rate": 3.4961180584365556e-05, + "loss": 0.969, + "step": 10150 + }, + { + "epoch": 0.3, + "learning_rate": 3.494636401351272e-05, + "loss": 0.9248, + "step": 10160 + }, + { + "epoch": 0.3, + "learning_rate": 3.493154744265987e-05, + "loss": 1.0493, + "step": 10170 + }, + { + "epoch": 0.3, + "learning_rate": 3.491673087180703e-05, + "loss": 0.8598, + "step": 10180 + }, + { + "epoch": 0.3, + "learning_rate": 3.490191430095419e-05, + "loss": 1.0603, + "step": 10190 + }, + { + "epoch": 0.3, + "learning_rate": 3.488709773010135e-05, + "loss": 1.0821, + "step": 10200 + }, + { + "epoch": 0.3, + "learning_rate": 3.4872281159248506e-05, + "loss": 0.9239, + "step": 10210 + }, + { + "epoch": 0.3, + "learning_rate": 3.485746458839566e-05, + "loss": 0.9464, + "step": 10220 + }, + { + "epoch": 0.3, + "learning_rate": 3.484264801754282e-05, + "loss": 0.9686, + "step": 10230 + }, + { + "epoch": 0.3, + "learning_rate": 3.482783144668998e-05, + "loss": 0.9362, + "step": 10240 + }, + { + "epoch": 0.3, + "learning_rate": 3.4813014875837135e-05, + "loss": 0.9344, + "step": 10250 + }, + { + "epoch": 0.3, + "learning_rate": 3.47981983049843e-05, + "loss": 1.0437, + "step": 10260 + }, + { + "epoch": 0.3, + "learning_rate": 3.4783381734131456e-05, + "loss": 0.9584, + "step": 10270 + }, + { + "epoch": 0.3, + "learning_rate": 3.476856516327861e-05, + "loss": 0.9738, + "step": 10280 + }, + { + "epoch": 0.3, + "learning_rate": 3.475374859242577e-05, + "loss": 0.8704, + "step": 10290 + }, + { + "epoch": 0.31, + "learning_rate": 3.473893202157293e-05, + "loss": 0.8944, + "step": 10300 + }, + { + "epoch": 0.31, + "learning_rate": 3.4724115450720085e-05, + "loss": 0.794, + "step": 10310 + }, + { + "epoch": 0.31, + "learning_rate": 3.470929887986725e-05, + "loss": 1.0281, + "step": 10320 + }, + { + "epoch": 0.31, + "learning_rate": 3.4694482309014406e-05, + "loss": 1.0935, + "step": 10330 + }, + { + "epoch": 0.31, + "learning_rate": 3.4679665738161556e-05, + "loss": 1.0274, + "step": 10340 + }, + { + "epoch": 0.31, + "learning_rate": 3.466484916730872e-05, + "loss": 1.0199, + "step": 10350 + }, + { + "epoch": 0.31, + "learning_rate": 3.465003259645588e-05, + "loss": 1.0275, + "step": 10360 + }, + { + "epoch": 0.31, + "learning_rate": 3.4635216025603035e-05, + "loss": 0.9856, + "step": 10370 + }, + { + "epoch": 0.31, + "learning_rate": 3.46203994547502e-05, + "loss": 1.0552, + "step": 10380 + }, + { + "epoch": 0.31, + "learning_rate": 3.460558288389735e-05, + "loss": 1.0258, + "step": 10390 + }, + { + "epoch": 0.31, + "learning_rate": 3.459076631304451e-05, + "loss": 0.7794, + "step": 10400 + }, + { + "epoch": 0.31, + "learning_rate": 3.457594974219167e-05, + "loss": 0.8032, + "step": 10410 + }, + { + "epoch": 0.31, + "learning_rate": 3.456113317133883e-05, + "loss": 0.9867, + "step": 10420 + }, + { + "epoch": 0.31, + "learning_rate": 3.4546316600485985e-05, + "loss": 0.939, + "step": 10430 + }, + { + "epoch": 0.31, + "learning_rate": 3.453150002963315e-05, + "loss": 0.9754, + "step": 10440 + }, + { + "epoch": 0.31, + "learning_rate": 3.45166834587803e-05, + "loss": 0.9524, + "step": 10450 + }, + { + "epoch": 0.31, + "learning_rate": 3.450186688792746e-05, + "loss": 0.8299, + "step": 10460 + }, + { + "epoch": 0.31, + "learning_rate": 3.448705031707462e-05, + "loss": 0.8595, + "step": 10470 + }, + { + "epoch": 0.31, + "learning_rate": 3.447223374622178e-05, + "loss": 0.7695, + "step": 10480 + }, + { + "epoch": 0.31, + "learning_rate": 3.4457417175368935e-05, + "loss": 0.774, + "step": 10490 + }, + { + "epoch": 0.31, + "learning_rate": 3.444260060451609e-05, + "loss": 0.8111, + "step": 10500 + }, + { + "epoch": 0.31, + "learning_rate": 3.442778403366325e-05, + "loss": 0.92, + "step": 10510 + }, + { + "epoch": 0.31, + "learning_rate": 3.441296746281041e-05, + "loss": 0.8905, + "step": 10520 + }, + { + "epoch": 0.31, + "learning_rate": 3.4398150891957564e-05, + "loss": 0.7788, + "step": 10530 + }, + { + "epoch": 0.31, + "learning_rate": 3.438333432110473e-05, + "loss": 0.7675, + "step": 10540 + }, + { + "epoch": 0.31, + "learning_rate": 3.4368517750251885e-05, + "loss": 0.9089, + "step": 10550 + }, + { + "epoch": 0.31, + "learning_rate": 3.4353701179399036e-05, + "loss": 0.984, + "step": 10560 + }, + { + "epoch": 0.31, + "learning_rate": 3.43388846085462e-05, + "loss": 0.9988, + "step": 10570 + }, + { + "epoch": 0.31, + "learning_rate": 3.432406803769336e-05, + "loss": 0.9656, + "step": 10580 + }, + { + "epoch": 0.31, + "learning_rate": 3.4309251466840514e-05, + "loss": 1.0129, + "step": 10590 + }, + { + "epoch": 0.31, + "learning_rate": 3.429443489598768e-05, + "loss": 0.9738, + "step": 10600 + }, + { + "epoch": 0.31, + "learning_rate": 3.4279618325134836e-05, + "loss": 0.9136, + "step": 10610 + }, + { + "epoch": 0.31, + "learning_rate": 3.4264801754281986e-05, + "loss": 0.9943, + "step": 10620 + }, + { + "epoch": 0.32, + "learning_rate": 3.424998518342915e-05, + "loss": 0.8487, + "step": 10630 + }, + { + "epoch": 0.32, + "learning_rate": 3.423516861257631e-05, + "loss": 0.8095, + "step": 10640 + }, + { + "epoch": 0.32, + "learning_rate": 3.4220352041723465e-05, + "loss": 1.0404, + "step": 10650 + }, + { + "epoch": 0.32, + "learning_rate": 3.420553547087063e-05, + "loss": 0.886, + "step": 10660 + }, + { + "epoch": 0.32, + "learning_rate": 3.419071890001778e-05, + "loss": 1.0033, + "step": 10670 + }, + { + "epoch": 0.32, + "learning_rate": 3.4175902329164936e-05, + "loss": 0.8785, + "step": 10680 + }, + { + "epoch": 0.32, + "learning_rate": 3.41610857583121e-05, + "loss": 0.9679, + "step": 10690 + }, + { + "epoch": 0.32, + "learning_rate": 3.414626918745926e-05, + "loss": 0.9523, + "step": 10700 + }, + { + "epoch": 0.32, + "learning_rate": 3.4131452616606415e-05, + "loss": 0.8169, + "step": 10710 + }, + { + "epoch": 0.32, + "learning_rate": 3.411663604575357e-05, + "loss": 1.0053, + "step": 10720 + }, + { + "epoch": 0.32, + "learning_rate": 3.410181947490073e-05, + "loss": 0.8736, + "step": 10730 + }, + { + "epoch": 0.32, + "learning_rate": 3.4087002904047886e-05, + "loss": 0.9577, + "step": 10740 + }, + { + "epoch": 0.32, + "learning_rate": 3.407218633319505e-05, + "loss": 0.8155, + "step": 10750 + }, + { + "epoch": 0.32, + "learning_rate": 3.405736976234221e-05, + "loss": 0.8046, + "step": 10760 + }, + { + "epoch": 0.32, + "learning_rate": 3.4042553191489365e-05, + "loss": 0.8739, + "step": 10770 + }, + { + "epoch": 0.32, + "learning_rate": 3.402773662063652e-05, + "loss": 0.8769, + "step": 10780 + }, + { + "epoch": 0.32, + "learning_rate": 3.401292004978368e-05, + "loss": 0.9993, + "step": 10790 + }, + { + "epoch": 0.32, + "learning_rate": 3.399810347893084e-05, + "loss": 0.9002, + "step": 10800 + }, + { + "epoch": 0.32, + "learning_rate": 3.3983286908077994e-05, + "loss": 1.0381, + "step": 10810 + }, + { + "epoch": 0.32, + "learning_rate": 3.396847033722516e-05, + "loss": 0.8421, + "step": 10820 + }, + { + "epoch": 0.32, + "learning_rate": 3.3953653766372315e-05, + "loss": 0.8218, + "step": 10830 + }, + { + "epoch": 0.32, + "learning_rate": 3.3938837195519466e-05, + "loss": 0.9304, + "step": 10840 + }, + { + "epoch": 0.32, + "learning_rate": 3.392402062466663e-05, + "loss": 0.8721, + "step": 10850 + }, + { + "epoch": 0.32, + "learning_rate": 3.390920405381379e-05, + "loss": 0.9466, + "step": 10860 + }, + { + "epoch": 0.32, + "learning_rate": 3.3894387482960944e-05, + "loss": 0.8224, + "step": 10870 + }, + { + "epoch": 0.32, + "learning_rate": 3.387957091210811e-05, + "loss": 0.8728, + "step": 10880 + }, + { + "epoch": 0.32, + "learning_rate": 3.386475434125526e-05, + "loss": 1.0159, + "step": 10890 + }, + { + "epoch": 0.32, + "learning_rate": 3.3849937770402416e-05, + "loss": 0.9289, + "step": 10900 + }, + { + "epoch": 0.32, + "learning_rate": 3.383512119954958e-05, + "loss": 0.8679, + "step": 10910 + }, + { + "epoch": 0.32, + "learning_rate": 3.382030462869674e-05, + "loss": 1.0493, + "step": 10920 + }, + { + "epoch": 0.32, + "learning_rate": 3.3805488057843894e-05, + "loss": 0.8017, + "step": 10930 + }, + { + "epoch": 0.32, + "learning_rate": 3.379067148699105e-05, + "loss": 0.7417, + "step": 10940 + }, + { + "epoch": 0.32, + "learning_rate": 3.377585491613821e-05, + "loss": 0.8948, + "step": 10950 + }, + { + "epoch": 0.32, + "learning_rate": 3.3761038345285366e-05, + "loss": 0.9638, + "step": 10960 + }, + { + "epoch": 0.33, + "learning_rate": 3.374622177443253e-05, + "loss": 0.9754, + "step": 10970 + }, + { + "epoch": 0.33, + "learning_rate": 3.373140520357969e-05, + "loss": 0.8849, + "step": 10980 + }, + { + "epoch": 0.33, + "learning_rate": 3.3716588632726844e-05, + "loss": 0.9619, + "step": 10990 + }, + { + "epoch": 0.33, + "learning_rate": 3.3701772061874e-05, + "loss": 1.0277, + "step": 11000 + }, + { + "epoch": 0.33, + "learning_rate": 3.368695549102116e-05, + "loss": 0.9002, + "step": 11010 + }, + { + "epoch": 0.33, + "learning_rate": 3.3672138920168316e-05, + "loss": 1.0167, + "step": 11020 + }, + { + "epoch": 0.33, + "learning_rate": 3.365732234931548e-05, + "loss": 1.1125, + "step": 11030 + }, + { + "epoch": 0.33, + "learning_rate": 3.364250577846264e-05, + "loss": 0.9814, + "step": 11040 + }, + { + "epoch": 0.33, + "learning_rate": 3.3627689207609795e-05, + "loss": 0.8165, + "step": 11050 + }, + { + "epoch": 0.33, + "learning_rate": 3.3612872636756945e-05, + "loss": 1.0137, + "step": 11060 + }, + { + "epoch": 0.33, + "learning_rate": 3.359805606590411e-05, + "loss": 0.9903, + "step": 11070 + }, + { + "epoch": 0.33, + "learning_rate": 3.3583239495051266e-05, + "loss": 0.9677, + "step": 11080 + }, + { + "epoch": 0.33, + "learning_rate": 3.3568422924198423e-05, + "loss": 0.9325, + "step": 11090 + }, + { + "epoch": 0.33, + "learning_rate": 3.355360635334559e-05, + "loss": 0.9825, + "step": 11100 + }, + { + "epoch": 0.33, + "learning_rate": 3.353878978249274e-05, + "loss": 0.8533, + "step": 11110 + }, + { + "epoch": 0.33, + "learning_rate": 3.3523973211639895e-05, + "loss": 1.0349, + "step": 11120 + }, + { + "epoch": 0.33, + "learning_rate": 3.350915664078706e-05, + "loss": 0.8865, + "step": 11130 + }, + { + "epoch": 0.33, + "learning_rate": 3.3494340069934216e-05, + "loss": 1.0031, + "step": 11140 + }, + { + "epoch": 0.33, + "learning_rate": 3.3479523499081374e-05, + "loss": 0.8539, + "step": 11150 + }, + { + "epoch": 0.33, + "learning_rate": 3.346470692822853e-05, + "loss": 0.8381, + "step": 11160 + }, + { + "epoch": 0.33, + "learning_rate": 3.344989035737569e-05, + "loss": 0.9522, + "step": 11170 + }, + { + "epoch": 0.33, + "learning_rate": 3.3435073786522845e-05, + "loss": 0.9666, + "step": 11180 + }, + { + "epoch": 0.33, + "learning_rate": 3.342025721567001e-05, + "loss": 0.819, + "step": 11190 + }, + { + "epoch": 0.33, + "learning_rate": 3.3405440644817167e-05, + "loss": 0.9633, + "step": 11200 + }, + { + "epoch": 0.33, + "learning_rate": 3.3390624073964324e-05, + "loss": 1.0502, + "step": 11210 + }, + { + "epoch": 0.33, + "learning_rate": 3.337580750311148e-05, + "loss": 0.909, + "step": 11220 + }, + { + "epoch": 0.33, + "learning_rate": 3.336099093225864e-05, + "loss": 1.0321, + "step": 11230 + }, + { + "epoch": 0.33, + "learning_rate": 3.3346174361405796e-05, + "loss": 1.063, + "step": 11240 + }, + { + "epoch": 0.33, + "learning_rate": 3.333135779055296e-05, + "loss": 0.9974, + "step": 11250 + }, + { + "epoch": 0.33, + "learning_rate": 3.331654121970012e-05, + "loss": 0.9965, + "step": 11260 + }, + { + "epoch": 0.33, + "learning_rate": 3.3301724648847274e-05, + "loss": 0.7699, + "step": 11270 + }, + { + "epoch": 0.33, + "learning_rate": 3.328690807799443e-05, + "loss": 0.9671, + "step": 11280 + }, + { + "epoch": 0.33, + "learning_rate": 3.327209150714159e-05, + "loss": 0.9856, + "step": 11290 + }, + { + "epoch": 0.33, + "learning_rate": 3.3257274936288746e-05, + "loss": 0.8764, + "step": 11300 + }, + { + "epoch": 0.34, + "learning_rate": 3.324245836543591e-05, + "loss": 0.9155, + "step": 11310 + }, + { + "epoch": 0.34, + "learning_rate": 3.322764179458307e-05, + "loss": 1.0399, + "step": 11320 + }, + { + "epoch": 0.34, + "learning_rate": 3.321282522373022e-05, + "loss": 0.8548, + "step": 11330 + }, + { + "epoch": 0.34, + "learning_rate": 3.3198008652877375e-05, + "loss": 1.0151, + "step": 11340 + }, + { + "epoch": 0.34, + "learning_rate": 3.318319208202454e-05, + "loss": 0.7865, + "step": 11350 + }, + { + "epoch": 0.34, + "learning_rate": 3.3168375511171696e-05, + "loss": 0.8713, + "step": 11360 + }, + { + "epoch": 0.34, + "learning_rate": 3.315355894031885e-05, + "loss": 1.0597, + "step": 11370 + }, + { + "epoch": 0.34, + "learning_rate": 3.313874236946602e-05, + "loss": 0.8394, + "step": 11380 + }, + { + "epoch": 0.34, + "learning_rate": 3.312392579861317e-05, + "loss": 0.9528, + "step": 11390 + }, + { + "epoch": 0.34, + "learning_rate": 3.3109109227760325e-05, + "loss": 0.8585, + "step": 11400 + }, + { + "epoch": 0.34, + "learning_rate": 3.309429265690749e-05, + "loss": 0.8442, + "step": 11410 + }, + { + "epoch": 0.34, + "learning_rate": 3.3079476086054646e-05, + "loss": 1.0275, + "step": 11420 + }, + { + "epoch": 0.34, + "learning_rate": 3.30646595152018e-05, + "loss": 1.0349, + "step": 11430 + }, + { + "epoch": 0.34, + "learning_rate": 3.304984294434896e-05, + "loss": 0.7873, + "step": 11440 + }, + { + "epoch": 0.34, + "learning_rate": 3.303502637349612e-05, + "loss": 0.9402, + "step": 11450 + }, + { + "epoch": 0.34, + "learning_rate": 3.3020209802643275e-05, + "loss": 0.8423, + "step": 11460 + }, + { + "epoch": 0.34, + "learning_rate": 3.300539323179044e-05, + "loss": 0.846, + "step": 11470 + }, + { + "epoch": 0.34, + "learning_rate": 3.2990576660937596e-05, + "loss": 0.9767, + "step": 11480 + }, + { + "epoch": 0.34, + "learning_rate": 3.2975760090084753e-05, + "loss": 0.8893, + "step": 11490 + }, + { + "epoch": 0.34, + "learning_rate": 3.296094351923191e-05, + "loss": 1.0934, + "step": 11500 + }, + { + "epoch": 0.34, + "learning_rate": 3.294612694837907e-05, + "loss": 0.8449, + "step": 11510 + }, + { + "epoch": 0.34, + "learning_rate": 3.2931310377526225e-05, + "loss": 0.9332, + "step": 11520 + }, + { + "epoch": 0.34, + "learning_rate": 3.291649380667339e-05, + "loss": 0.9488, + "step": 11530 + }, + { + "epoch": 0.34, + "learning_rate": 3.2901677235820546e-05, + "loss": 0.7982, + "step": 11540 + }, + { + "epoch": 0.34, + "learning_rate": 3.2886860664967704e-05, + "loss": 0.9872, + "step": 11550 + }, + { + "epoch": 0.34, + "learning_rate": 3.287204409411486e-05, + "loss": 0.8299, + "step": 11560 + }, + { + "epoch": 0.34, + "learning_rate": 3.285722752326202e-05, + "loss": 0.8706, + "step": 11570 + }, + { + "epoch": 0.34, + "learning_rate": 3.2842410952409175e-05, + "loss": 1.0442, + "step": 11580 + }, + { + "epoch": 0.34, + "learning_rate": 3.282759438155634e-05, + "loss": 0.9559, + "step": 11590 + }, + { + "epoch": 0.34, + "learning_rate": 3.2812777810703497e-05, + "loss": 0.8583, + "step": 11600 + }, + { + "epoch": 0.34, + "learning_rate": 3.279796123985065e-05, + "loss": 0.9606, + "step": 11610 + }, + { + "epoch": 0.34, + "learning_rate": 3.2783144668997804e-05, + "loss": 0.9162, + "step": 11620 + }, + { + "epoch": 0.34, + "learning_rate": 3.276832809814497e-05, + "loss": 1.0091, + "step": 11630 + }, + { + "epoch": 0.34, + "learning_rate": 3.2753511527292125e-05, + "loss": 0.9522, + "step": 11640 + }, + { + "epoch": 0.35, + "learning_rate": 3.273869495643928e-05, + "loss": 0.9861, + "step": 11650 + }, + { + "epoch": 0.35, + "learning_rate": 3.272387838558644e-05, + "loss": 0.9244, + "step": 11660 + }, + { + "epoch": 0.35, + "learning_rate": 3.27090618147336e-05, + "loss": 0.9683, + "step": 11670 + }, + { + "epoch": 0.35, + "learning_rate": 3.2694245243880754e-05, + "loss": 0.8495, + "step": 11680 + }, + { + "epoch": 0.35, + "learning_rate": 3.267942867302792e-05, + "loss": 0.876, + "step": 11690 + }, + { + "epoch": 0.35, + "learning_rate": 3.2664612102175076e-05, + "loss": 0.8485, + "step": 11700 + }, + { + "epoch": 0.35, + "learning_rate": 3.264979553132223e-05, + "loss": 0.9864, + "step": 11710 + }, + { + "epoch": 0.35, + "learning_rate": 3.263497896046939e-05, + "loss": 0.9987, + "step": 11720 + }, + { + "epoch": 0.35, + "learning_rate": 3.262016238961655e-05, + "loss": 0.8928, + "step": 11730 + }, + { + "epoch": 0.35, + "learning_rate": 3.2605345818763705e-05, + "loss": 0.8925, + "step": 11740 + }, + { + "epoch": 0.35, + "learning_rate": 3.259052924791087e-05, + "loss": 0.9792, + "step": 11750 + }, + { + "epoch": 0.35, + "learning_rate": 3.2575712677058026e-05, + "loss": 0.953, + "step": 11760 + }, + { + "epoch": 0.35, + "learning_rate": 3.256089610620518e-05, + "loss": 0.8147, + "step": 11770 + }, + { + "epoch": 0.35, + "learning_rate": 3.254607953535234e-05, + "loss": 0.8235, + "step": 11780 + }, + { + "epoch": 0.35, + "learning_rate": 3.25312629644995e-05, + "loss": 0.8287, + "step": 11790 + }, + { + "epoch": 0.35, + "learning_rate": 3.2516446393646655e-05, + "loss": 0.993, + "step": 11800 + }, + { + "epoch": 0.35, + "learning_rate": 3.250162982279382e-05, + "loss": 0.9838, + "step": 11810 + }, + { + "epoch": 0.35, + "learning_rate": 3.2486813251940976e-05, + "loss": 1.0423, + "step": 11820 + }, + { + "epoch": 0.35, + "learning_rate": 3.2471996681088126e-05, + "loss": 0.9387, + "step": 11830 + }, + { + "epoch": 0.35, + "learning_rate": 3.245718011023529e-05, + "loss": 0.893, + "step": 11840 + }, + { + "epoch": 0.35, + "learning_rate": 3.244236353938245e-05, + "loss": 0.9076, + "step": 11850 + }, + { + "epoch": 0.35, + "learning_rate": 3.2427546968529605e-05, + "loss": 0.8877, + "step": 11860 + }, + { + "epoch": 0.35, + "learning_rate": 3.241273039767677e-05, + "loss": 0.8991, + "step": 11870 + }, + { + "epoch": 0.35, + "learning_rate": 3.239791382682392e-05, + "loss": 0.9059, + "step": 11880 + }, + { + "epoch": 0.35, + "learning_rate": 3.2383097255971077e-05, + "loss": 0.9591, + "step": 11890 + }, + { + "epoch": 0.35, + "learning_rate": 3.2368280685118234e-05, + "loss": 1.0124, + "step": 11900 + }, + { + "epoch": 0.35, + "learning_rate": 3.23534641142654e-05, + "loss": 0.9362, + "step": 11910 + }, + { + "epoch": 0.35, + "learning_rate": 3.2338647543412555e-05, + "loss": 0.87, + "step": 11920 + }, + { + "epoch": 0.35, + "learning_rate": 3.232383097255971e-05, + "loss": 0.925, + "step": 11930 + }, + { + "epoch": 0.35, + "learning_rate": 3.230901440170687e-05, + "loss": 0.9574, + "step": 11940 + }, + { + "epoch": 0.35, + "learning_rate": 3.229419783085403e-05, + "loss": 0.936, + "step": 11950 + }, + { + "epoch": 0.35, + "learning_rate": 3.2279381260001184e-05, + "loss": 0.7628, + "step": 11960 + }, + { + "epoch": 0.35, + "learning_rate": 3.226456468914835e-05, + "loss": 0.9247, + "step": 11970 + }, + { + "epoch": 0.36, + "learning_rate": 3.2249748118295505e-05, + "loss": 1.0546, + "step": 11980 + }, + { + "epoch": 0.36, + "learning_rate": 3.223493154744266e-05, + "loss": 0.7548, + "step": 11990 + }, + { + "epoch": 0.36, + "learning_rate": 3.222011497658982e-05, + "loss": 0.8808, + "step": 12000 + }, + { + "epoch": 0.36, + "learning_rate": 3.220529840573698e-05, + "loss": 0.875, + "step": 12010 + }, + { + "epoch": 0.36, + "learning_rate": 3.2190481834884134e-05, + "loss": 0.9583, + "step": 12020 + }, + { + "epoch": 0.36, + "learning_rate": 3.21756652640313e-05, + "loss": 0.9905, + "step": 12030 + }, + { + "epoch": 0.36, + "learning_rate": 3.2160848693178455e-05, + "loss": 0.9819, + "step": 12040 + }, + { + "epoch": 0.36, + "learning_rate": 3.2146032122325606e-05, + "loss": 0.8693, + "step": 12050 + }, + { + "epoch": 0.36, + "learning_rate": 3.213121555147277e-05, + "loss": 0.851, + "step": 12060 + }, + { + "epoch": 0.36, + "learning_rate": 3.211639898061993e-05, + "loss": 0.9138, + "step": 12070 + }, + { + "epoch": 0.36, + "learning_rate": 3.2101582409767084e-05, + "loss": 0.95, + "step": 12080 + }, + { + "epoch": 0.36, + "learning_rate": 3.208676583891425e-05, + "loss": 1.0171, + "step": 12090 + }, + { + "epoch": 0.36, + "learning_rate": 3.20719492680614e-05, + "loss": 0.7627, + "step": 12100 + }, + { + "epoch": 0.36, + "learning_rate": 3.2057132697208556e-05, + "loss": 1.0109, + "step": 12110 + }, + { + "epoch": 0.36, + "learning_rate": 3.204231612635572e-05, + "loss": 0.9426, + "step": 12120 + }, + { + "epoch": 0.36, + "learning_rate": 3.202749955550288e-05, + "loss": 0.9074, + "step": 12130 + }, + { + "epoch": 0.36, + "learning_rate": 3.2012682984650035e-05, + "loss": 0.8692, + "step": 12140 + }, + { + "epoch": 0.36, + "learning_rate": 3.199786641379719e-05, + "loss": 0.8587, + "step": 12150 + }, + { + "epoch": 0.36, + "learning_rate": 3.198304984294435e-05, + "loss": 0.8806, + "step": 12160 + }, + { + "epoch": 0.36, + "learning_rate": 3.1968233272091506e-05, + "loss": 0.8254, + "step": 12170 + }, + { + "epoch": 0.36, + "learning_rate": 3.1953416701238663e-05, + "loss": 0.9484, + "step": 12180 + }, + { + "epoch": 0.36, + "learning_rate": 3.193860013038583e-05, + "loss": 1.0171, + "step": 12190 + }, + { + "epoch": 0.36, + "learning_rate": 3.1923783559532985e-05, + "loss": 0.8648, + "step": 12200 + }, + { + "epoch": 0.36, + "learning_rate": 3.190896698868014e-05, + "loss": 0.9771, + "step": 12210 + }, + { + "epoch": 0.36, + "learning_rate": 3.18941504178273e-05, + "loss": 0.9566, + "step": 12220 + }, + { + "epoch": 0.36, + "learning_rate": 3.1879333846974456e-05, + "loss": 1.0407, + "step": 12230 + }, + { + "epoch": 0.36, + "learning_rate": 3.1864517276121614e-05, + "loss": 0.8478, + "step": 12240 + }, + { + "epoch": 0.36, + "learning_rate": 3.184970070526878e-05, + "loss": 0.9864, + "step": 12250 + }, + { + "epoch": 0.36, + "learning_rate": 3.1834884134415935e-05, + "loss": 0.9417, + "step": 12260 + }, + { + "epoch": 0.36, + "learning_rate": 3.1820067563563085e-05, + "loss": 0.8376, + "step": 12270 + }, + { + "epoch": 0.36, + "learning_rate": 3.180525099271025e-05, + "loss": 0.8979, + "step": 12280 + }, + { + "epoch": 0.36, + "learning_rate": 3.1790434421857407e-05, + "loss": 0.9631, + "step": 12290 + }, + { + "epoch": 0.36, + "learning_rate": 3.1775617851004564e-05, + "loss": 0.9826, + "step": 12300 + }, + { + "epoch": 0.36, + "learning_rate": 3.176080128015173e-05, + "loss": 0.8654, + "step": 12310 + }, + { + "epoch": 0.37, + "learning_rate": 3.174598470929888e-05, + "loss": 0.8159, + "step": 12320 + }, + { + "epoch": 0.37, + "learning_rate": 3.1731168138446035e-05, + "loss": 1.0192, + "step": 12330 + }, + { + "epoch": 0.37, + "learning_rate": 3.17163515675932e-05, + "loss": 1.0343, + "step": 12340 + }, + { + "epoch": 0.37, + "learning_rate": 3.170153499674036e-05, + "loss": 0.8398, + "step": 12350 + }, + { + "epoch": 0.37, + "learning_rate": 3.1686718425887514e-05, + "loss": 0.8501, + "step": 12360 + }, + { + "epoch": 0.37, + "learning_rate": 3.167190185503468e-05, + "loss": 0.8306, + "step": 12370 + }, + { + "epoch": 0.37, + "learning_rate": 3.165708528418183e-05, + "loss": 0.8984, + "step": 12380 + }, + { + "epoch": 0.37, + "learning_rate": 3.1642268713328986e-05, + "loss": 0.8919, + "step": 12390 + }, + { + "epoch": 0.37, + "learning_rate": 3.162745214247615e-05, + "loss": 0.8088, + "step": 12400 + }, + { + "epoch": 0.37, + "learning_rate": 3.161263557162331e-05, + "loss": 0.9202, + "step": 12410 + }, + { + "epoch": 0.37, + "learning_rate": 3.1597819000770464e-05, + "loss": 0.9711, + "step": 12420 + }, + { + "epoch": 0.37, + "learning_rate": 3.158300242991762e-05, + "loss": 0.8329, + "step": 12430 + }, + { + "epoch": 0.37, + "learning_rate": 3.156818585906478e-05, + "loss": 1.0967, + "step": 12440 + }, + { + "epoch": 0.37, + "learning_rate": 3.1553369288211936e-05, + "loss": 0.9618, + "step": 12450 + }, + { + "epoch": 0.37, + "learning_rate": 3.153855271735909e-05, + "loss": 0.9283, + "step": 12460 + }, + { + "epoch": 0.37, + "learning_rate": 3.152373614650626e-05, + "loss": 1.1064, + "step": 12470 + }, + { + "epoch": 0.37, + "learning_rate": 3.1508919575653414e-05, + "loss": 1.0143, + "step": 12480 + }, + { + "epoch": 0.37, + "learning_rate": 3.1494103004800565e-05, + "loss": 0.8655, + "step": 12490 + }, + { + "epoch": 0.37, + "learning_rate": 3.147928643394773e-05, + "loss": 0.965, + "step": 12500 + }, + { + "epoch": 0.37, + "learning_rate": 3.1464469863094886e-05, + "loss": 0.8912, + "step": 12510 + }, + { + "epoch": 0.37, + "learning_rate": 3.144965329224204e-05, + "loss": 0.8465, + "step": 12520 + }, + { + "epoch": 0.37, + "learning_rate": 3.143483672138921e-05, + "loss": 0.9123, + "step": 12530 + }, + { + "epoch": 0.37, + "learning_rate": 3.1420020150536364e-05, + "loss": 0.8008, + "step": 12540 + }, + { + "epoch": 0.37, + "learning_rate": 3.1405203579683515e-05, + "loss": 0.9382, + "step": 12550 + }, + { + "epoch": 0.37, + "learning_rate": 3.139038700883068e-05, + "loss": 0.8803, + "step": 12560 + }, + { + "epoch": 0.37, + "learning_rate": 3.1375570437977836e-05, + "loss": 1.0119, + "step": 12570 + }, + { + "epoch": 0.37, + "learning_rate": 3.1360753867124993e-05, + "loss": 0.9621, + "step": 12580 + }, + { + "epoch": 0.37, + "learning_rate": 3.134593729627216e-05, + "loss": 0.8976, + "step": 12590 + }, + { + "epoch": 0.37, + "learning_rate": 3.133112072541931e-05, + "loss": 0.9723, + "step": 12600 + }, + { + "epoch": 0.37, + "learning_rate": 3.1316304154566465e-05, + "loss": 0.9753, + "step": 12610 + }, + { + "epoch": 0.37, + "learning_rate": 3.130148758371363e-05, + "loss": 0.8936, + "step": 12620 + }, + { + "epoch": 0.37, + "learning_rate": 3.1286671012860786e-05, + "loss": 1.0737, + "step": 12630 + }, + { + "epoch": 0.37, + "learning_rate": 3.1271854442007944e-05, + "loss": 0.8781, + "step": 12640 + }, + { + "epoch": 0.37, + "learning_rate": 3.12570378711551e-05, + "loss": 0.8997, + "step": 12650 + }, + { + "epoch": 0.38, + "learning_rate": 3.124222130030226e-05, + "loss": 0.9192, + "step": 12660 + }, + { + "epoch": 0.38, + "learning_rate": 3.1227404729449415e-05, + "loss": 0.8524, + "step": 12670 + }, + { + "epoch": 0.38, + "learning_rate": 3.121258815859658e-05, + "loss": 1.0335, + "step": 12680 + }, + { + "epoch": 0.38, + "learning_rate": 3.1197771587743737e-05, + "loss": 0.9639, + "step": 12690 + }, + { + "epoch": 0.38, + "learning_rate": 3.1182955016890894e-05, + "loss": 0.818, + "step": 12700 + }, + { + "epoch": 0.38, + "learning_rate": 3.116813844603805e-05, + "loss": 0.9447, + "step": 12710 + }, + { + "epoch": 0.38, + "learning_rate": 3.115332187518521e-05, + "loss": 0.9139, + "step": 12720 + }, + { + "epoch": 0.38, + "learning_rate": 3.1138505304332365e-05, + "loss": 1.004, + "step": 12730 + }, + { + "epoch": 0.38, + "learning_rate": 3.112368873347952e-05, + "loss": 0.8301, + "step": 12740 + }, + { + "epoch": 0.38, + "learning_rate": 3.110887216262669e-05, + "loss": 1.0328, + "step": 12750 + }, + { + "epoch": 0.38, + "learning_rate": 3.1094055591773844e-05, + "loss": 0.9522, + "step": 12760 + }, + { + "epoch": 0.38, + "learning_rate": 3.1079239020920994e-05, + "loss": 0.8784, + "step": 12770 + }, + { + "epoch": 0.38, + "learning_rate": 3.106442245006816e-05, + "loss": 0.9544, + "step": 12780 + }, + { + "epoch": 0.38, + "learning_rate": 3.1049605879215316e-05, + "loss": 0.9545, + "step": 12790 + }, + { + "epoch": 0.38, + "learning_rate": 3.103478930836247e-05, + "loss": 0.8227, + "step": 12800 + }, + { + "epoch": 0.38, + "learning_rate": 3.101997273750964e-05, + "loss": 1.0182, + "step": 12810 + }, + { + "epoch": 0.38, + "learning_rate": 3.100515616665679e-05, + "loss": 0.9353, + "step": 12820 + }, + { + "epoch": 0.38, + "learning_rate": 3.0990339595803945e-05, + "loss": 1.0409, + "step": 12830 + }, + { + "epoch": 0.38, + "learning_rate": 3.097552302495111e-05, + "loss": 0.9231, + "step": 12840 + }, + { + "epoch": 0.38, + "learning_rate": 3.0960706454098266e-05, + "loss": 1.0812, + "step": 12850 + }, + { + "epoch": 0.38, + "learning_rate": 3.094588988324542e-05, + "loss": 0.8666, + "step": 12860 + }, + { + "epoch": 0.38, + "learning_rate": 3.093107331239258e-05, + "loss": 0.8488, + "step": 12870 + }, + { + "epoch": 0.38, + "learning_rate": 3.091625674153974e-05, + "loss": 0.8775, + "step": 12880 + }, + { + "epoch": 0.38, + "learning_rate": 3.0901440170686895e-05, + "loss": 0.8833, + "step": 12890 + }, + { + "epoch": 0.38, + "learning_rate": 3.088662359983406e-05, + "loss": 0.8949, + "step": 12900 + }, + { + "epoch": 0.38, + "learning_rate": 3.0871807028981216e-05, + "loss": 0.9946, + "step": 12910 + }, + { + "epoch": 0.38, + "learning_rate": 3.085699045812837e-05, + "loss": 0.9376, + "step": 12920 + }, + { + "epoch": 0.38, + "learning_rate": 3.084217388727553e-05, + "loss": 0.8217, + "step": 12930 + }, + { + "epoch": 0.38, + "learning_rate": 3.082735731642269e-05, + "loss": 0.8726, + "step": 12940 + }, + { + "epoch": 0.38, + "learning_rate": 3.0812540745569845e-05, + "loss": 0.912, + "step": 12950 + }, + { + "epoch": 0.38, + "learning_rate": 3.079772417471701e-05, + "loss": 0.985, + "step": 12960 + }, + { + "epoch": 0.38, + "learning_rate": 3.0782907603864166e-05, + "loss": 0.8384, + "step": 12970 + }, + { + "epoch": 0.38, + "learning_rate": 3.076809103301132e-05, + "loss": 0.8627, + "step": 12980 + }, + { + "epoch": 0.38, + "learning_rate": 3.0753274462158474e-05, + "loss": 0.8514, + "step": 12990 + }, + { + "epoch": 0.39, + "learning_rate": 3.073845789130564e-05, + "loss": 0.8775, + "step": 13000 + }, + { + "epoch": 0.39, + "learning_rate": 3.0723641320452795e-05, + "loss": 0.8567, + "step": 13010 + }, + { + "epoch": 0.39, + "learning_rate": 3.070882474959995e-05, + "loss": 0.7999, + "step": 13020 + }, + { + "epoch": 0.39, + "learning_rate": 3.0694008178747116e-05, + "loss": 0.8365, + "step": 13030 + }, + { + "epoch": 0.39, + "learning_rate": 3.067919160789427e-05, + "loss": 0.83, + "step": 13040 + }, + { + "epoch": 0.39, + "learning_rate": 3.0664375037041424e-05, + "loss": 0.9431, + "step": 13050 + }, + { + "epoch": 0.39, + "learning_rate": 3.064955846618859e-05, + "loss": 0.838, + "step": 13060 + }, + { + "epoch": 0.39, + "learning_rate": 3.0634741895335745e-05, + "loss": 1.0418, + "step": 13070 + }, + { + "epoch": 0.39, + "learning_rate": 3.06199253244829e-05, + "loss": 0.7926, + "step": 13080 + }, + { + "epoch": 0.39, + "learning_rate": 3.060510875363006e-05, + "loss": 0.8476, + "step": 13090 + }, + { + "epoch": 0.39, + "learning_rate": 3.059029218277722e-05, + "loss": 0.9605, + "step": 13100 + }, + { + "epoch": 0.39, + "learning_rate": 3.0575475611924374e-05, + "loss": 0.8607, + "step": 13110 + }, + { + "epoch": 0.39, + "learning_rate": 3.056065904107154e-05, + "loss": 1.0471, + "step": 13120 + }, + { + "epoch": 0.39, + "learning_rate": 3.0545842470218695e-05, + "loss": 0.8444, + "step": 13130 + }, + { + "epoch": 0.39, + "learning_rate": 3.053102589936585e-05, + "loss": 0.9976, + "step": 13140 + }, + { + "epoch": 0.39, + "learning_rate": 3.0516209328513013e-05, + "loss": 0.9886, + "step": 13150 + }, + { + "epoch": 0.39, + "learning_rate": 3.050139275766017e-05, + "loss": 0.8636, + "step": 13160 + }, + { + "epoch": 0.39, + "learning_rate": 3.0486576186807324e-05, + "loss": 0.8408, + "step": 13170 + }, + { + "epoch": 0.39, + "learning_rate": 3.047175961595449e-05, + "loss": 0.8352, + "step": 13180 + }, + { + "epoch": 0.39, + "learning_rate": 3.0456943045101642e-05, + "loss": 1.0622, + "step": 13190 + }, + { + "epoch": 0.39, + "learning_rate": 3.04421264742488e-05, + "loss": 0.9629, + "step": 13200 + }, + { + "epoch": 0.39, + "learning_rate": 3.0427309903395963e-05, + "loss": 0.9512, + "step": 13210 + }, + { + "epoch": 0.39, + "learning_rate": 3.0412493332543117e-05, + "loss": 0.8721, + "step": 13220 + }, + { + "epoch": 0.39, + "learning_rate": 3.0397676761690274e-05, + "loss": 0.9748, + "step": 13230 + }, + { + "epoch": 0.39, + "learning_rate": 3.0382860190837435e-05, + "loss": 1.0791, + "step": 13240 + }, + { + "epoch": 0.39, + "learning_rate": 3.0368043619984592e-05, + "loss": 0.8108, + "step": 13250 + }, + { + "epoch": 0.39, + "learning_rate": 3.035322704913175e-05, + "loss": 0.9094, + "step": 13260 + }, + { + "epoch": 0.39, + "learning_rate": 3.0338410478278907e-05, + "loss": 1.0127, + "step": 13270 + }, + { + "epoch": 0.39, + "learning_rate": 3.0323593907426067e-05, + "loss": 0.8709, + "step": 13280 + }, + { + "epoch": 0.39, + "learning_rate": 3.0308777336573225e-05, + "loss": 0.9389, + "step": 13290 + }, + { + "epoch": 0.39, + "learning_rate": 3.0293960765720382e-05, + "loss": 0.9801, + "step": 13300 + }, + { + "epoch": 0.39, + "learning_rate": 3.0279144194867543e-05, + "loss": 1.082, + "step": 13310 + }, + { + "epoch": 0.39, + "learning_rate": 3.02643276240147e-05, + "loss": 0.8681, + "step": 13320 + }, + { + "epoch": 0.4, + "learning_rate": 3.0249511053161857e-05, + "loss": 0.8893, + "step": 13330 + }, + { + "epoch": 0.4, + "learning_rate": 3.0234694482309018e-05, + "loss": 0.9865, + "step": 13340 + }, + { + "epoch": 0.4, + "learning_rate": 3.0219877911456175e-05, + "loss": 0.8258, + "step": 13350 + }, + { + "epoch": 0.4, + "learning_rate": 3.020506134060333e-05, + "loss": 0.9767, + "step": 13360 + }, + { + "epoch": 0.4, + "learning_rate": 3.0190244769750493e-05, + "loss": 0.8932, + "step": 13370 + }, + { + "epoch": 0.4, + "learning_rate": 3.017542819889765e-05, + "loss": 0.8748, + "step": 13380 + }, + { + "epoch": 0.4, + "learning_rate": 3.0160611628044804e-05, + "loss": 0.9297, + "step": 13390 + }, + { + "epoch": 0.4, + "learning_rate": 3.0145795057191968e-05, + "loss": 0.7237, + "step": 13400 + }, + { + "epoch": 0.4, + "learning_rate": 3.013097848633912e-05, + "loss": 1.0156, + "step": 13410 + }, + { + "epoch": 0.4, + "learning_rate": 3.011616191548628e-05, + "loss": 0.8948, + "step": 13420 + }, + { + "epoch": 0.4, + "learning_rate": 3.0101345344633443e-05, + "loss": 0.9977, + "step": 13430 + }, + { + "epoch": 0.4, + "learning_rate": 3.0086528773780597e-05, + "loss": 0.9071, + "step": 13440 + }, + { + "epoch": 0.4, + "learning_rate": 3.0071712202927754e-05, + "loss": 0.9769, + "step": 13450 + }, + { + "epoch": 0.4, + "learning_rate": 3.0056895632074915e-05, + "loss": 0.9851, + "step": 13460 + }, + { + "epoch": 0.4, + "learning_rate": 3.0042079061222072e-05, + "loss": 0.9729, + "step": 13470 + }, + { + "epoch": 0.4, + "learning_rate": 3.002726249036923e-05, + "loss": 0.8864, + "step": 13480 + }, + { + "epoch": 0.4, + "learning_rate": 3.001244591951639e-05, + "loss": 1.0173, + "step": 13490 + }, + { + "epoch": 0.4, + "learning_rate": 2.9997629348663547e-05, + "loss": 0.8762, + "step": 13500 + }, + { + "epoch": 0.4, + "learning_rate": 2.9982812777810704e-05, + "loss": 1.0338, + "step": 13510 + }, + { + "epoch": 0.4, + "learning_rate": 2.9967996206957865e-05, + "loss": 0.869, + "step": 13520 + }, + { + "epoch": 0.4, + "learning_rate": 2.9953179636105022e-05, + "loss": 0.978, + "step": 13530 + }, + { + "epoch": 0.4, + "learning_rate": 2.993836306525218e-05, + "loss": 0.8195, + "step": 13540 + }, + { + "epoch": 0.4, + "learning_rate": 2.9923546494399336e-05, + "loss": 1.0383, + "step": 13550 + }, + { + "epoch": 0.4, + "learning_rate": 2.9908729923546497e-05, + "loss": 0.8594, + "step": 13560 + }, + { + "epoch": 0.4, + "learning_rate": 2.9893913352693654e-05, + "loss": 0.9188, + "step": 13570 + }, + { + "epoch": 0.4, + "learning_rate": 2.9879096781840808e-05, + "loss": 0.9374, + "step": 13580 + }, + { + "epoch": 0.4, + "learning_rate": 2.9864280210987972e-05, + "loss": 0.7712, + "step": 13590 + }, + { + "epoch": 0.4, + "learning_rate": 2.984946364013513e-05, + "loss": 1.0269, + "step": 13600 + }, + { + "epoch": 0.4, + "learning_rate": 2.9834647069282283e-05, + "loss": 0.9598, + "step": 13610 + }, + { + "epoch": 0.4, + "learning_rate": 2.9819830498429447e-05, + "loss": 0.908, + "step": 13620 + }, + { + "epoch": 0.4, + "learning_rate": 2.98050139275766e-05, + "loss": 0.8749, + "step": 13630 + }, + { + "epoch": 0.4, + "learning_rate": 2.9790197356723758e-05, + "loss": 0.9237, + "step": 13640 + }, + { + "epoch": 0.4, + "learning_rate": 2.9775380785870922e-05, + "loss": 0.9706, + "step": 13650 + }, + { + "epoch": 0.4, + "learning_rate": 2.9760564215018076e-05, + "loss": 0.9432, + "step": 13660 + }, + { + "epoch": 0.41, + "learning_rate": 2.9745747644165233e-05, + "loss": 0.994, + "step": 13670 + }, + { + "epoch": 0.41, + "learning_rate": 2.9730931073312397e-05, + "loss": 0.981, + "step": 13680 + }, + { + "epoch": 0.41, + "learning_rate": 2.971611450245955e-05, + "loss": 0.8332, + "step": 13690 + }, + { + "epoch": 0.41, + "learning_rate": 2.970129793160671e-05, + "loss": 0.8669, + "step": 13700 + }, + { + "epoch": 0.41, + "learning_rate": 2.968648136075387e-05, + "loss": 0.8138, + "step": 13710 + }, + { + "epoch": 0.41, + "learning_rate": 2.9671664789901026e-05, + "loss": 0.9631, + "step": 13720 + }, + { + "epoch": 0.41, + "learning_rate": 2.9656848219048184e-05, + "loss": 0.8868, + "step": 13730 + }, + { + "epoch": 0.41, + "learning_rate": 2.9642031648195344e-05, + "loss": 0.8991, + "step": 13740 + }, + { + "epoch": 0.41, + "learning_rate": 2.96272150773425e-05, + "loss": 0.8355, + "step": 13750 + }, + { + "epoch": 0.41, + "learning_rate": 2.961239850648966e-05, + "loss": 0.8919, + "step": 13760 + }, + { + "epoch": 0.41, + "learning_rate": 2.959758193563682e-05, + "loss": 0.8202, + "step": 13770 + }, + { + "epoch": 0.41, + "learning_rate": 2.9582765364783976e-05, + "loss": 1.0218, + "step": 13780 + }, + { + "epoch": 0.41, + "learning_rate": 2.9567948793931134e-05, + "loss": 0.8098, + "step": 13790 + }, + { + "epoch": 0.41, + "learning_rate": 2.9553132223078294e-05, + "loss": 0.9244, + "step": 13800 + }, + { + "epoch": 0.41, + "learning_rate": 2.953831565222545e-05, + "loss": 0.9319, + "step": 13810 + }, + { + "epoch": 0.41, + "learning_rate": 2.952349908137261e-05, + "loss": 0.8676, + "step": 13820 + }, + { + "epoch": 0.41, + "learning_rate": 2.9508682510519763e-05, + "loss": 1.0167, + "step": 13830 + }, + { + "epoch": 0.41, + "learning_rate": 2.9493865939666927e-05, + "loss": 0.8974, + "step": 13840 + }, + { + "epoch": 0.41, + "learning_rate": 2.9479049368814084e-05, + "loss": 0.9078, + "step": 13850 + }, + { + "epoch": 0.41, + "learning_rate": 2.9464232797961238e-05, + "loss": 0.8083, + "step": 13860 + }, + { + "epoch": 0.41, + "learning_rate": 2.9449416227108402e-05, + "loss": 0.83, + "step": 13870 + }, + { + "epoch": 0.41, + "learning_rate": 2.9434599656255556e-05, + "loss": 0.9206, + "step": 13880 + }, + { + "epoch": 0.41, + "learning_rate": 2.9419783085402713e-05, + "loss": 0.945, + "step": 13890 + }, + { + "epoch": 0.41, + "learning_rate": 2.9404966514549877e-05, + "loss": 0.9216, + "step": 13900 + }, + { + "epoch": 0.41, + "learning_rate": 2.939014994369703e-05, + "loss": 0.7943, + "step": 13910 + }, + { + "epoch": 0.41, + "learning_rate": 2.9375333372844188e-05, + "loss": 0.9959, + "step": 13920 + }, + { + "epoch": 0.41, + "learning_rate": 2.936051680199135e-05, + "loss": 0.971, + "step": 13930 + }, + { + "epoch": 0.41, + "learning_rate": 2.9345700231138506e-05, + "loss": 0.9838, + "step": 13940 + }, + { + "epoch": 0.41, + "learning_rate": 2.9330883660285663e-05, + "loss": 0.8578, + "step": 13950 + }, + { + "epoch": 0.41, + "learning_rate": 2.9316067089432824e-05, + "loss": 0.9311, + "step": 13960 + }, + { + "epoch": 0.41, + "learning_rate": 2.930125051857998e-05, + "loss": 0.8394, + "step": 13970 + }, + { + "epoch": 0.41, + "learning_rate": 2.9286433947727138e-05, + "loss": 0.88, + "step": 13980 + }, + { + "epoch": 0.41, + "learning_rate": 2.92716173768743e-05, + "loss": 0.8746, + "step": 13990 + }, + { + "epoch": 0.41, + "learning_rate": 2.9256800806021456e-05, + "loss": 0.8464, + "step": 14000 + }, + { + "epoch": 0.42, + "learning_rate": 2.9241984235168613e-05, + "loss": 0.8344, + "step": 14010 + }, + { + "epoch": 0.42, + "learning_rate": 2.9227167664315774e-05, + "loss": 0.8508, + "step": 14020 + }, + { + "epoch": 0.42, + "learning_rate": 2.921235109346293e-05, + "loss": 0.9861, + "step": 14030 + }, + { + "epoch": 0.42, + "learning_rate": 2.9197534522610088e-05, + "loss": 0.9741, + "step": 14040 + }, + { + "epoch": 0.42, + "learning_rate": 2.918271795175725e-05, + "loss": 0.8541, + "step": 14050 + }, + { + "epoch": 0.42, + "learning_rate": 2.9167901380904406e-05, + "loss": 1.1122, + "step": 14060 + }, + { + "epoch": 0.42, + "learning_rate": 2.9153084810051563e-05, + "loss": 0.8881, + "step": 14070 + }, + { + "epoch": 0.42, + "learning_rate": 2.9138268239198724e-05, + "loss": 0.8445, + "step": 14080 + }, + { + "epoch": 0.42, + "learning_rate": 2.912345166834588e-05, + "loss": 0.7931, + "step": 14090 + }, + { + "epoch": 0.42, + "learning_rate": 2.9108635097493035e-05, + "loss": 0.8219, + "step": 14100 + }, + { + "epoch": 0.42, + "learning_rate": 2.9093818526640192e-05, + "loss": 0.9491, + "step": 14110 + }, + { + "epoch": 0.42, + "learning_rate": 2.9079001955787356e-05, + "loss": 1.0196, + "step": 14120 + }, + { + "epoch": 0.42, + "learning_rate": 2.906418538493451e-05, + "loss": 1.0039, + "step": 14130 + }, + { + "epoch": 0.42, + "learning_rate": 2.9049368814081667e-05, + "loss": 0.9614, + "step": 14140 + }, + { + "epoch": 0.42, + "learning_rate": 2.903455224322883e-05, + "loss": 1.0026, + "step": 14150 + }, + { + "epoch": 0.42, + "learning_rate": 2.9019735672375985e-05, + "loss": 0.9555, + "step": 14160 + }, + { + "epoch": 0.42, + "learning_rate": 2.9004919101523142e-05, + "loss": 0.9415, + "step": 14170 + }, + { + "epoch": 0.42, + "learning_rate": 2.8990102530670303e-05, + "loss": 0.6998, + "step": 14180 + }, + { + "epoch": 0.42, + "learning_rate": 2.897528595981746e-05, + "loss": 0.9666, + "step": 14190 + }, + { + "epoch": 0.42, + "learning_rate": 2.8960469388964618e-05, + "loss": 0.884, + "step": 14200 + }, + { + "epoch": 0.42, + "learning_rate": 2.8945652818111778e-05, + "loss": 0.8459, + "step": 14210 + }, + { + "epoch": 0.42, + "learning_rate": 2.8930836247258935e-05, + "loss": 0.8909, + "step": 14220 + }, + { + "epoch": 0.42, + "learning_rate": 2.8916019676406093e-05, + "loss": 0.9377, + "step": 14230 + }, + { + "epoch": 0.42, + "learning_rate": 2.8901203105553253e-05, + "loss": 0.9647, + "step": 14240 + }, + { + "epoch": 0.42, + "learning_rate": 2.888638653470041e-05, + "loss": 0.9309, + "step": 14250 + }, + { + "epoch": 0.42, + "learning_rate": 2.8871569963847568e-05, + "loss": 1.0228, + "step": 14260 + }, + { + "epoch": 0.42, + "learning_rate": 2.8856753392994728e-05, + "loss": 0.8874, + "step": 14270 + }, + { + "epoch": 0.42, + "learning_rate": 2.8841936822141886e-05, + "loss": 0.845, + "step": 14280 + }, + { + "epoch": 0.42, + "learning_rate": 2.8827120251289043e-05, + "loss": 0.8833, + "step": 14290 + }, + { + "epoch": 0.42, + "learning_rate": 2.8812303680436203e-05, + "loss": 1.0485, + "step": 14300 + }, + { + "epoch": 0.42, + "learning_rate": 2.879748710958336e-05, + "loss": 0.859, + "step": 14310 + }, + { + "epoch": 0.42, + "learning_rate": 2.8782670538730518e-05, + "loss": 0.8155, + "step": 14320 + }, + { + "epoch": 0.42, + "learning_rate": 2.876785396787768e-05, + "loss": 0.9114, + "step": 14330 + }, + { + "epoch": 0.42, + "learning_rate": 2.8753037397024836e-05, + "loss": 0.8955, + "step": 14340 + }, + { + "epoch": 0.43, + "learning_rate": 2.873822082617199e-05, + "loss": 0.8879, + "step": 14350 + }, + { + "epoch": 0.43, + "learning_rate": 2.8723404255319154e-05, + "loss": 0.9131, + "step": 14360 + }, + { + "epoch": 0.43, + "learning_rate": 2.870858768446631e-05, + "loss": 0.925, + "step": 14370 + }, + { + "epoch": 0.43, + "learning_rate": 2.8693771113613465e-05, + "loss": 0.8615, + "step": 14380 + }, + { + "epoch": 0.43, + "learning_rate": 2.8678954542760622e-05, + "loss": 0.9005, + "step": 14390 + }, + { + "epoch": 0.43, + "learning_rate": 2.8664137971907782e-05, + "loss": 1.0335, + "step": 14400 + }, + { + "epoch": 0.43, + "learning_rate": 2.864932140105494e-05, + "loss": 0.8189, + "step": 14410 + }, + { + "epoch": 0.43, + "learning_rate": 2.8634504830202097e-05, + "loss": 0.8096, + "step": 14420 + }, + { + "epoch": 0.43, + "learning_rate": 2.8619688259349258e-05, + "loss": 0.8743, + "step": 14430 + }, + { + "epoch": 0.43, + "learning_rate": 2.8604871688496415e-05, + "loss": 0.7303, + "step": 14440 + }, + { + "epoch": 0.43, + "learning_rate": 2.8590055117643572e-05, + "loss": 0.9973, + "step": 14450 + }, + { + "epoch": 0.43, + "learning_rate": 2.8575238546790733e-05, + "loss": 0.7832, + "step": 14460 + }, + { + "epoch": 0.43, + "learning_rate": 2.856042197593789e-05, + "loss": 0.9041, + "step": 14470 + }, + { + "epoch": 0.43, + "learning_rate": 2.8545605405085047e-05, + "loss": 0.8215, + "step": 14480 + }, + { + "epoch": 0.43, + "learning_rate": 2.8530788834232208e-05, + "loss": 0.9953, + "step": 14490 + }, + { + "epoch": 0.43, + "learning_rate": 2.8515972263379365e-05, + "loss": 0.9047, + "step": 14500 + }, + { + "epoch": 0.43, + "learning_rate": 2.8501155692526522e-05, + "loss": 0.9655, + "step": 14510 + }, + { + "epoch": 0.43, + "learning_rate": 2.8486339121673683e-05, + "loss": 0.8112, + "step": 14520 + }, + { + "epoch": 0.43, + "learning_rate": 2.847152255082084e-05, + "loss": 0.9085, + "step": 14530 + }, + { + "epoch": 0.43, + "learning_rate": 2.8456705979967997e-05, + "loss": 0.8962, + "step": 14540 + }, + { + "epoch": 0.43, + "learning_rate": 2.8441889409115158e-05, + "loss": 0.8624, + "step": 14550 + }, + { + "epoch": 0.43, + "learning_rate": 2.8427072838262315e-05, + "loss": 0.8384, + "step": 14560 + }, + { + "epoch": 0.43, + "learning_rate": 2.841225626740947e-05, + "loss": 0.8974, + "step": 14570 + }, + { + "epoch": 0.43, + "learning_rate": 2.8397439696556633e-05, + "loss": 0.9387, + "step": 14580 + }, + { + "epoch": 0.43, + "learning_rate": 2.838262312570379e-05, + "loss": 0.9768, + "step": 14590 + }, + { + "epoch": 0.43, + "learning_rate": 2.8367806554850944e-05, + "loss": 0.9538, + "step": 14600 + }, + { + "epoch": 0.43, + "learning_rate": 2.8352989983998108e-05, + "loss": 0.8618, + "step": 14610 + }, + { + "epoch": 0.43, + "learning_rate": 2.8338173413145265e-05, + "loss": 0.9872, + "step": 14620 + }, + { + "epoch": 0.43, + "learning_rate": 2.832335684229242e-05, + "loss": 0.9197, + "step": 14630 + }, + { + "epoch": 0.43, + "learning_rate": 2.8308540271439583e-05, + "loss": 0.9187, + "step": 14640 + }, + { + "epoch": 0.43, + "learning_rate": 2.8293723700586737e-05, + "loss": 0.9396, + "step": 14650 + }, + { + "epoch": 0.43, + "learning_rate": 2.8278907129733894e-05, + "loss": 0.8117, + "step": 14660 + }, + { + "epoch": 0.43, + "learning_rate": 2.826409055888105e-05, + "loss": 0.834, + "step": 14670 + }, + { + "epoch": 0.44, + "learning_rate": 2.8249273988028212e-05, + "loss": 0.8044, + "step": 14680 + }, + { + "epoch": 0.44, + "learning_rate": 2.823445741717537e-05, + "loss": 0.9714, + "step": 14690 + }, + { + "epoch": 0.44, + "learning_rate": 2.8219640846322527e-05, + "loss": 0.7972, + "step": 14700 + }, + { + "epoch": 0.44, + "learning_rate": 2.8204824275469687e-05, + "loss": 0.935, + "step": 14710 + }, + { + "epoch": 0.44, + "learning_rate": 2.8190007704616844e-05, + "loss": 1.164, + "step": 14720 + }, + { + "epoch": 0.44, + "learning_rate": 2.8175191133764e-05, + "loss": 0.8783, + "step": 14730 + }, + { + "epoch": 0.44, + "learning_rate": 2.8160374562911162e-05, + "loss": 0.8075, + "step": 14740 + }, + { + "epoch": 0.44, + "learning_rate": 2.814555799205832e-05, + "loss": 0.8762, + "step": 14750 + }, + { + "epoch": 0.44, + "learning_rate": 2.8130741421205477e-05, + "loss": 0.9275, + "step": 14760 + }, + { + "epoch": 0.44, + "learning_rate": 2.8115924850352637e-05, + "loss": 0.8824, + "step": 14770 + }, + { + "epoch": 0.44, + "learning_rate": 2.8101108279499795e-05, + "loss": 1.0268, + "step": 14780 + }, + { + "epoch": 0.44, + "learning_rate": 2.8086291708646952e-05, + "loss": 1.143, + "step": 14790 + }, + { + "epoch": 0.44, + "learning_rate": 2.8071475137794112e-05, + "loss": 0.8914, + "step": 14800 + }, + { + "epoch": 0.44, + "learning_rate": 2.805665856694127e-05, + "loss": 0.8631, + "step": 14810 + }, + { + "epoch": 0.44, + "learning_rate": 2.8041841996088424e-05, + "loss": 0.8337, + "step": 14820 + }, + { + "epoch": 0.44, + "learning_rate": 2.8027025425235588e-05, + "loss": 0.9228, + "step": 14830 + }, + { + "epoch": 0.44, + "learning_rate": 2.8012208854382745e-05, + "loss": 0.8923, + "step": 14840 + }, + { + "epoch": 0.44, + "learning_rate": 2.79973922835299e-05, + "loss": 0.8587, + "step": 14850 + }, + { + "epoch": 0.44, + "learning_rate": 2.7982575712677063e-05, + "loss": 0.9243, + "step": 14860 + }, + { + "epoch": 0.44, + "learning_rate": 2.7967759141824216e-05, + "loss": 0.9459, + "step": 14870 + }, + { + "epoch": 0.44, + "learning_rate": 2.7952942570971374e-05, + "loss": 0.9452, + "step": 14880 + }, + { + "epoch": 0.44, + "learning_rate": 2.7938126000118538e-05, + "loss": 0.9123, + "step": 14890 + }, + { + "epoch": 0.44, + "learning_rate": 2.792330942926569e-05, + "loss": 0.8826, + "step": 14900 + }, + { + "epoch": 0.44, + "learning_rate": 2.790849285841285e-05, + "loss": 0.8847, + "step": 14910 + }, + { + "epoch": 0.44, + "learning_rate": 2.789367628756001e-05, + "loss": 1.0471, + "step": 14920 + }, + { + "epoch": 0.44, + "learning_rate": 2.7878859716707167e-05, + "loss": 0.8778, + "step": 14930 + }, + { + "epoch": 0.44, + "learning_rate": 2.7864043145854324e-05, + "loss": 0.9198, + "step": 14940 + }, + { + "epoch": 0.44, + "learning_rate": 2.784922657500148e-05, + "loss": 0.8459, + "step": 14950 + }, + { + "epoch": 0.44, + "learning_rate": 2.7834410004148642e-05, + "loss": 0.9726, + "step": 14960 + }, + { + "epoch": 0.44, + "learning_rate": 2.78195934332958e-05, + "loss": 0.8508, + "step": 14970 + }, + { + "epoch": 0.44, + "learning_rate": 2.7804776862442956e-05, + "loss": 0.9304, + "step": 14980 + }, + { + "epoch": 0.44, + "learning_rate": 2.7789960291590117e-05, + "loss": 0.8876, + "step": 14990 + }, + { + "epoch": 0.44, + "learning_rate": 2.7775143720737274e-05, + "loss": 0.9692, + "step": 15000 + }, + { + "epoch": 0.44, + "learning_rate": 2.776032714988443e-05, + "loss": 0.8446, + "step": 15010 + }, + { + "epoch": 0.45, + "learning_rate": 2.7745510579031592e-05, + "loss": 0.8281, + "step": 15020 + }, + { + "epoch": 0.45, + "learning_rate": 2.773069400817875e-05, + "loss": 0.9253, + "step": 15030 + }, + { + "epoch": 0.45, + "learning_rate": 2.7715877437325903e-05, + "loss": 0.9492, + "step": 15040 + }, + { + "epoch": 0.45, + "learning_rate": 2.7701060866473067e-05, + "loss": 0.8651, + "step": 15050 + }, + { + "epoch": 0.45, + "learning_rate": 2.7686244295620224e-05, + "loss": 0.863, + "step": 15060 + }, + { + "epoch": 0.45, + "learning_rate": 2.7671427724767378e-05, + "loss": 0.8056, + "step": 15070 + }, + { + "epoch": 0.45, + "learning_rate": 2.7656611153914542e-05, + "loss": 0.8961, + "step": 15080 + }, + { + "epoch": 0.45, + "learning_rate": 2.7641794583061696e-05, + "loss": 0.9401, + "step": 15090 + }, + { + "epoch": 0.45, + "learning_rate": 2.7626978012208853e-05, + "loss": 0.8435, + "step": 15100 + }, + { + "epoch": 0.45, + "learning_rate": 2.7612161441356017e-05, + "loss": 0.9163, + "step": 15110 + }, + { + "epoch": 0.45, + "learning_rate": 2.759734487050317e-05, + "loss": 0.9933, + "step": 15120 + }, + { + "epoch": 0.45, + "learning_rate": 2.7582528299650328e-05, + "loss": 0.7188, + "step": 15130 + }, + { + "epoch": 0.45, + "learning_rate": 2.7567711728797492e-05, + "loss": 0.8124, + "step": 15140 + }, + { + "epoch": 0.45, + "learning_rate": 2.7552895157944646e-05, + "loss": 0.9366, + "step": 15150 + }, + { + "epoch": 0.45, + "learning_rate": 2.7538078587091803e-05, + "loss": 0.9292, + "step": 15160 + }, + { + "epoch": 0.45, + "learning_rate": 2.7523262016238964e-05, + "loss": 0.8255, + "step": 15170 + }, + { + "epoch": 0.45, + "learning_rate": 2.750844544538612e-05, + "loss": 0.8795, + "step": 15180 + }, + { + "epoch": 0.45, + "learning_rate": 2.749362887453328e-05, + "loss": 0.8525, + "step": 15190 + }, + { + "epoch": 0.45, + "learning_rate": 2.747881230368044e-05, + "loss": 0.8968, + "step": 15200 + }, + { + "epoch": 0.45, + "learning_rate": 2.7463995732827596e-05, + "loss": 0.8813, + "step": 15210 + }, + { + "epoch": 0.45, + "learning_rate": 2.7449179161974753e-05, + "loss": 0.9808, + "step": 15220 + }, + { + "epoch": 0.45, + "learning_rate": 2.743436259112191e-05, + "loss": 0.9589, + "step": 15230 + }, + { + "epoch": 0.45, + "learning_rate": 2.741954602026907e-05, + "loss": 0.8174, + "step": 15240 + }, + { + "epoch": 0.45, + "learning_rate": 2.740472944941623e-05, + "loss": 0.9346, + "step": 15250 + }, + { + "epoch": 0.45, + "learning_rate": 2.7389912878563382e-05, + "loss": 1.0015, + "step": 15260 + }, + { + "epoch": 0.45, + "learning_rate": 2.7375096307710546e-05, + "loss": 0.8213, + "step": 15270 + }, + { + "epoch": 0.45, + "learning_rate": 2.7360279736857704e-05, + "loss": 0.9302, + "step": 15280 + }, + { + "epoch": 0.45, + "learning_rate": 2.7345463166004857e-05, + "loss": 0.8862, + "step": 15290 + }, + { + "epoch": 0.45, + "learning_rate": 2.733064659515202e-05, + "loss": 0.9485, + "step": 15300 + }, + { + "epoch": 0.45, + "learning_rate": 2.731583002429918e-05, + "loss": 0.8125, + "step": 15310 + }, + { + "epoch": 0.45, + "learning_rate": 2.7301013453446333e-05, + "loss": 0.9412, + "step": 15320 + }, + { + "epoch": 0.45, + "learning_rate": 2.7286196882593497e-05, + "loss": 0.9581, + "step": 15330 + }, + { + "epoch": 0.45, + "learning_rate": 2.727138031174065e-05, + "loss": 0.8735, + "step": 15340 + }, + { + "epoch": 0.45, + "learning_rate": 2.7256563740887808e-05, + "loss": 0.9793, + "step": 15350 + }, + { + "epoch": 0.46, + "learning_rate": 2.724174717003497e-05, + "loss": 1.0512, + "step": 15360 + }, + { + "epoch": 0.46, + "learning_rate": 2.7226930599182126e-05, + "loss": 0.907, + "step": 15370 + }, + { + "epoch": 0.46, + "learning_rate": 2.7212114028329283e-05, + "loss": 0.9575, + "step": 15380 + }, + { + "epoch": 0.46, + "learning_rate": 2.7197297457476443e-05, + "loss": 0.8579, + "step": 15390 + }, + { + "epoch": 0.46, + "learning_rate": 2.71824808866236e-05, + "loss": 0.9854, + "step": 15400 + }, + { + "epoch": 0.46, + "learning_rate": 2.7167664315770758e-05, + "loss": 0.7837, + "step": 15410 + }, + { + "epoch": 0.46, + "learning_rate": 2.715284774491792e-05, + "loss": 0.858, + "step": 15420 + }, + { + "epoch": 0.46, + "learning_rate": 2.7138031174065076e-05, + "loss": 0.891, + "step": 15430 + }, + { + "epoch": 0.46, + "learning_rate": 2.7123214603212233e-05, + "loss": 0.8212, + "step": 15440 + }, + { + "epoch": 0.46, + "learning_rate": 2.7108398032359394e-05, + "loss": 0.8824, + "step": 15450 + }, + { + "epoch": 0.46, + "learning_rate": 2.709358146150655e-05, + "loss": 0.8595, + "step": 15460 + }, + { + "epoch": 0.46, + "learning_rate": 2.7078764890653708e-05, + "loss": 0.8807, + "step": 15470 + }, + { + "epoch": 0.46, + "learning_rate": 2.706394831980087e-05, + "loss": 0.9255, + "step": 15480 + }, + { + "epoch": 0.46, + "learning_rate": 2.7049131748948026e-05, + "loss": 0.8835, + "step": 15490 + }, + { + "epoch": 0.46, + "learning_rate": 2.7034315178095183e-05, + "loss": 0.8357, + "step": 15500 + }, + { + "epoch": 0.46, + "learning_rate": 2.7019498607242337e-05, + "loss": 0.8606, + "step": 15510 + }, + { + "epoch": 0.46, + "learning_rate": 2.70046820363895e-05, + "loss": 0.8943, + "step": 15520 + }, + { + "epoch": 0.46, + "learning_rate": 2.6989865465536658e-05, + "loss": 0.9866, + "step": 15530 + }, + { + "epoch": 0.46, + "learning_rate": 2.6975048894683812e-05, + "loss": 0.9325, + "step": 15540 + }, + { + "epoch": 0.46, + "learning_rate": 2.6960232323830976e-05, + "loss": 0.851, + "step": 15550 + }, + { + "epoch": 0.46, + "learning_rate": 2.694541575297813e-05, + "loss": 0.9898, + "step": 15560 + }, + { + "epoch": 0.46, + "learning_rate": 2.6930599182125287e-05, + "loss": 0.9487, + "step": 15570 + }, + { + "epoch": 0.46, + "learning_rate": 2.691578261127245e-05, + "loss": 0.8888, + "step": 15580 + }, + { + "epoch": 0.46, + "learning_rate": 2.6900966040419605e-05, + "loss": 0.9397, + "step": 15590 + }, + { + "epoch": 0.46, + "learning_rate": 2.6886149469566762e-05, + "loss": 0.8789, + "step": 15600 + }, + { + "epoch": 0.46, + "learning_rate": 2.6871332898713926e-05, + "loss": 0.8987, + "step": 15610 + }, + { + "epoch": 0.46, + "learning_rate": 2.685651632786108e-05, + "loss": 1.0085, + "step": 15620 + }, + { + "epoch": 0.46, + "learning_rate": 2.6841699757008237e-05, + "loss": 0.9579, + "step": 15630 + }, + { + "epoch": 0.46, + "learning_rate": 2.6826883186155398e-05, + "loss": 0.8685, + "step": 15640 + }, + { + "epoch": 0.46, + "learning_rate": 2.6812066615302555e-05, + "loss": 0.9063, + "step": 15650 + }, + { + "epoch": 0.46, + "learning_rate": 2.6797250044449712e-05, + "loss": 1.0567, + "step": 15660 + }, + { + "epoch": 0.46, + "learning_rate": 2.6782433473596873e-05, + "loss": 0.8241, + "step": 15670 + }, + { + "epoch": 0.46, + "learning_rate": 2.676761690274403e-05, + "loss": 0.8085, + "step": 15680 + }, + { + "epoch": 0.46, + "learning_rate": 2.6752800331891187e-05, + "loss": 0.7809, + "step": 15690 + }, + { + "epoch": 0.47, + "learning_rate": 2.6737983761038348e-05, + "loss": 0.8839, + "step": 15700 + }, + { + "epoch": 0.47, + "learning_rate": 2.6723167190185505e-05, + "loss": 0.8962, + "step": 15710 + }, + { + "epoch": 0.47, + "learning_rate": 2.6708350619332663e-05, + "loss": 0.9903, + "step": 15720 + }, + { + "epoch": 0.47, + "learning_rate": 2.6693534048479823e-05, + "loss": 0.769, + "step": 15730 + }, + { + "epoch": 0.47, + "learning_rate": 2.667871747762698e-05, + "loss": 0.9137, + "step": 15740 + }, + { + "epoch": 0.47, + "learning_rate": 2.6663900906774138e-05, + "loss": 0.8526, + "step": 15750 + }, + { + "epoch": 0.47, + "learning_rate": 2.6649084335921298e-05, + "loss": 0.9874, + "step": 15760 + }, + { + "epoch": 0.47, + "learning_rate": 2.6634267765068455e-05, + "loss": 0.9854, + "step": 15770 + }, + { + "epoch": 0.47, + "learning_rate": 2.6619451194215613e-05, + "loss": 1.0488, + "step": 15780 + }, + { + "epoch": 0.47, + "learning_rate": 2.6604634623362767e-05, + "loss": 0.8769, + "step": 15790 + }, + { + "epoch": 0.47, + "learning_rate": 2.658981805250993e-05, + "loss": 0.9002, + "step": 15800 + }, + { + "epoch": 0.47, + "learning_rate": 2.6575001481657084e-05, + "loss": 0.7225, + "step": 15810 + }, + { + "epoch": 0.47, + "learning_rate": 2.656018491080424e-05, + "loss": 0.9178, + "step": 15820 + }, + { + "epoch": 0.47, + "learning_rate": 2.6545368339951406e-05, + "loss": 0.8703, + "step": 15830 + }, + { + "epoch": 0.47, + "learning_rate": 2.653055176909856e-05, + "loss": 0.9041, + "step": 15840 + }, + { + "epoch": 0.47, + "learning_rate": 2.6515735198245717e-05, + "loss": 0.9494, + "step": 15850 + }, + { + "epoch": 0.47, + "learning_rate": 2.6500918627392877e-05, + "loss": 0.8609, + "step": 15860 + }, + { + "epoch": 0.47, + "learning_rate": 2.6486102056540035e-05, + "loss": 1.0311, + "step": 15870 + }, + { + "epoch": 0.47, + "learning_rate": 2.6471285485687192e-05, + "loss": 0.8774, + "step": 15880 + }, + { + "epoch": 0.47, + "learning_rate": 2.6456468914834352e-05, + "loss": 0.8898, + "step": 15890 + }, + { + "epoch": 0.47, + "learning_rate": 2.644165234398151e-05, + "loss": 0.9353, + "step": 15900 + }, + { + "epoch": 0.47, + "learning_rate": 2.6426835773128667e-05, + "loss": 0.8315, + "step": 15910 + }, + { + "epoch": 0.47, + "learning_rate": 2.6412019202275828e-05, + "loss": 1.0566, + "step": 15920 + }, + { + "epoch": 0.47, + "learning_rate": 2.6397202631422985e-05, + "loss": 0.9669, + "step": 15930 + }, + { + "epoch": 0.47, + "learning_rate": 2.6382386060570142e-05, + "loss": 0.9258, + "step": 15940 + }, + { + "epoch": 0.47, + "learning_rate": 2.6367569489717303e-05, + "loss": 0.7841, + "step": 15950 + }, + { + "epoch": 0.47, + "learning_rate": 2.635275291886446e-05, + "loss": 1.0183, + "step": 15960 + }, + { + "epoch": 0.47, + "learning_rate": 2.6337936348011617e-05, + "loss": 0.9442, + "step": 15970 + }, + { + "epoch": 0.47, + "learning_rate": 2.6323119777158778e-05, + "loss": 0.9813, + "step": 15980 + }, + { + "epoch": 0.47, + "learning_rate": 2.6308303206305935e-05, + "loss": 0.9232, + "step": 15990 + }, + { + "epoch": 0.47, + "learning_rate": 2.6293486635453092e-05, + "loss": 0.9389, + "step": 16000 + }, + { + "epoch": 0.47, + "learning_rate": 2.6278670064600253e-05, + "loss": 0.8465, + "step": 16010 + }, + { + "epoch": 0.47, + "learning_rate": 2.626385349374741e-05, + "loss": 0.8455, + "step": 16020 + }, + { + "epoch": 0.48, + "learning_rate": 2.6249036922894564e-05, + "loss": 0.9775, + "step": 16030 + }, + { + "epoch": 0.48, + "learning_rate": 2.6234220352041728e-05, + "loss": 0.9338, + "step": 16040 + }, + { + "epoch": 0.48, + "learning_rate": 2.6219403781188885e-05, + "loss": 0.7642, + "step": 16050 + }, + { + "epoch": 0.48, + "learning_rate": 2.620458721033604e-05, + "loss": 0.9005, + "step": 16060 + }, + { + "epoch": 0.48, + "learning_rate": 2.6189770639483196e-05, + "loss": 0.8258, + "step": 16070 + }, + { + "epoch": 0.48, + "learning_rate": 2.617495406863036e-05, + "loss": 0.9772, + "step": 16080 + }, + { + "epoch": 0.48, + "learning_rate": 2.6160137497777514e-05, + "loss": 0.9044, + "step": 16090 + }, + { + "epoch": 0.48, + "learning_rate": 2.614532092692467e-05, + "loss": 0.9335, + "step": 16100 + }, + { + "epoch": 0.48, + "learning_rate": 2.6130504356071832e-05, + "loss": 0.8672, + "step": 16110 + }, + { + "epoch": 0.48, + "learning_rate": 2.611568778521899e-05, + "loss": 1.017, + "step": 16120 + }, + { + "epoch": 0.48, + "learning_rate": 2.6100871214366146e-05, + "loss": 0.9566, + "step": 16130 + }, + { + "epoch": 0.48, + "learning_rate": 2.6086054643513307e-05, + "loss": 0.7851, + "step": 16140 + }, + { + "epoch": 0.48, + "learning_rate": 2.6071238072660464e-05, + "loss": 0.8653, + "step": 16150 + }, + { + "epoch": 0.48, + "learning_rate": 2.605642150180762e-05, + "loss": 0.9346, + "step": 16160 + }, + { + "epoch": 0.48, + "learning_rate": 2.6041604930954782e-05, + "loss": 0.8218, + "step": 16170 + }, + { + "epoch": 0.48, + "learning_rate": 2.602678836010194e-05, + "loss": 0.8127, + "step": 16180 + }, + { + "epoch": 0.48, + "learning_rate": 2.6011971789249096e-05, + "loss": 1.0239, + "step": 16190 + }, + { + "epoch": 0.48, + "learning_rate": 2.5997155218396257e-05, + "loss": 0.9663, + "step": 16200 + }, + { + "epoch": 0.48, + "learning_rate": 2.5982338647543414e-05, + "loss": 0.9385, + "step": 16210 + }, + { + "epoch": 0.48, + "learning_rate": 2.596752207669057e-05, + "loss": 0.8683, + "step": 16220 + }, + { + "epoch": 0.48, + "learning_rate": 2.5952705505837732e-05, + "loss": 0.8967, + "step": 16230 + }, + { + "epoch": 0.48, + "learning_rate": 2.593788893498489e-05, + "loss": 0.6821, + "step": 16240 + }, + { + "epoch": 0.48, + "learning_rate": 2.5923072364132047e-05, + "loss": 0.8757, + "step": 16250 + }, + { + "epoch": 0.48, + "learning_rate": 2.5908255793279207e-05, + "loss": 0.8384, + "step": 16260 + }, + { + "epoch": 0.48, + "learning_rate": 2.5893439222426365e-05, + "loss": 0.9881, + "step": 16270 + }, + { + "epoch": 0.48, + "learning_rate": 2.587862265157352e-05, + "loss": 1.0065, + "step": 16280 + }, + { + "epoch": 0.48, + "learning_rate": 2.5863806080720682e-05, + "loss": 0.8451, + "step": 16290 + }, + { + "epoch": 0.48, + "learning_rate": 2.584898950986784e-05, + "loss": 0.8693, + "step": 16300 + }, + { + "epoch": 0.48, + "learning_rate": 2.5834172939014993e-05, + "loss": 0.8972, + "step": 16310 + }, + { + "epoch": 0.48, + "learning_rate": 2.5819356368162157e-05, + "loss": 0.9758, + "step": 16320 + }, + { + "epoch": 0.48, + "learning_rate": 2.580453979730931e-05, + "loss": 0.8606, + "step": 16330 + }, + { + "epoch": 0.48, + "learning_rate": 2.578972322645647e-05, + "loss": 0.9072, + "step": 16340 + }, + { + "epoch": 0.48, + "learning_rate": 2.5774906655603626e-05, + "loss": 0.9198, + "step": 16350 + }, + { + "epoch": 0.48, + "learning_rate": 2.5760090084750786e-05, + "loss": 0.847, + "step": 16360 + }, + { + "epoch": 0.49, + "learning_rate": 2.5745273513897944e-05, + "loss": 0.9478, + "step": 16370 + }, + { + "epoch": 0.49, + "learning_rate": 2.57304569430451e-05, + "loss": 0.7333, + "step": 16380 + }, + { + "epoch": 0.49, + "learning_rate": 2.571564037219226e-05, + "loss": 1.0021, + "step": 16390 + }, + { + "epoch": 0.49, + "learning_rate": 2.570082380133942e-05, + "loss": 0.9394, + "step": 16400 + }, + { + "epoch": 0.49, + "learning_rate": 2.5686007230486576e-05, + "loss": 0.8365, + "step": 16410 + }, + { + "epoch": 0.49, + "learning_rate": 2.5671190659633737e-05, + "loss": 0.781, + "step": 16420 + }, + { + "epoch": 0.49, + "learning_rate": 2.5656374088780894e-05, + "loss": 0.9276, + "step": 16430 + }, + { + "epoch": 0.49, + "learning_rate": 2.564155751792805e-05, + "loss": 0.9678, + "step": 16440 + }, + { + "epoch": 0.49, + "learning_rate": 2.562674094707521e-05, + "loss": 0.93, + "step": 16450 + }, + { + "epoch": 0.49, + "learning_rate": 2.561192437622237e-05, + "loss": 0.9031, + "step": 16460 + }, + { + "epoch": 0.49, + "learning_rate": 2.5597107805369526e-05, + "loss": 1.0367, + "step": 16470 + }, + { + "epoch": 0.49, + "learning_rate": 2.5582291234516687e-05, + "loss": 0.8493, + "step": 16480 + }, + { + "epoch": 0.49, + "learning_rate": 2.5567474663663844e-05, + "loss": 0.7942, + "step": 16490 + }, + { + "epoch": 0.49, + "learning_rate": 2.5552658092810998e-05, + "loss": 0.9409, + "step": 16500 + }, + { + "epoch": 0.49, + "learning_rate": 2.5537841521958162e-05, + "loss": 0.7449, + "step": 16510 + }, + { + "epoch": 0.49, + "learning_rate": 2.552302495110532e-05, + "loss": 0.9053, + "step": 16520 + }, + { + "epoch": 0.49, + "learning_rate": 2.5508208380252473e-05, + "loss": 0.8583, + "step": 16530 + }, + { + "epoch": 0.49, + "learning_rate": 2.5493391809399637e-05, + "loss": 0.8682, + "step": 16540 + }, + { + "epoch": 0.49, + "learning_rate": 2.547857523854679e-05, + "loss": 0.9189, + "step": 16550 + }, + { + "epoch": 0.49, + "learning_rate": 2.5463758667693948e-05, + "loss": 0.7976, + "step": 16560 + }, + { + "epoch": 0.49, + "learning_rate": 2.5448942096841112e-05, + "loss": 0.8626, + "step": 16570 + }, + { + "epoch": 0.49, + "learning_rate": 2.5434125525988266e-05, + "loss": 0.7991, + "step": 16580 + }, + { + "epoch": 0.49, + "learning_rate": 2.5419308955135423e-05, + "loss": 0.9152, + "step": 16590 + }, + { + "epoch": 0.49, + "learning_rate": 2.5404492384282587e-05, + "loss": 1.0176, + "step": 16600 + }, + { + "epoch": 0.49, + "learning_rate": 2.538967581342974e-05, + "loss": 0.8972, + "step": 16610 + }, + { + "epoch": 0.49, + "learning_rate": 2.5374859242576898e-05, + "loss": 0.8773, + "step": 16620 + }, + { + "epoch": 0.49, + "learning_rate": 2.5360042671724055e-05, + "loss": 0.9817, + "step": 16630 + }, + { + "epoch": 0.49, + "learning_rate": 2.5345226100871216e-05, + "loss": 0.8889, + "step": 16640 + }, + { + "epoch": 0.49, + "learning_rate": 2.5330409530018373e-05, + "loss": 0.8452, + "step": 16650 + }, + { + "epoch": 0.49, + "learning_rate": 2.531559295916553e-05, + "loss": 0.8898, + "step": 16660 + }, + { + "epoch": 0.49, + "learning_rate": 2.530077638831269e-05, + "loss": 0.8766, + "step": 16670 + }, + { + "epoch": 0.49, + "learning_rate": 2.528595981745985e-05, + "loss": 0.9172, + "step": 16680 + }, + { + "epoch": 0.49, + "learning_rate": 2.5271143246607006e-05, + "loss": 0.8526, + "step": 16690 + }, + { + "epoch": 0.49, + "learning_rate": 2.5256326675754166e-05, + "loss": 0.9177, + "step": 16700 + }, + { + "epoch": 0.5, + "learning_rate": 2.5241510104901323e-05, + "loss": 0.8059, + "step": 16710 + }, + { + "epoch": 0.5, + "learning_rate": 2.5226693534048477e-05, + "loss": 0.8278, + "step": 16720 + }, + { + "epoch": 0.5, + "learning_rate": 2.521187696319564e-05, + "loss": 0.9526, + "step": 16730 + }, + { + "epoch": 0.5, + "learning_rate": 2.51970603923428e-05, + "loss": 0.9538, + "step": 16740 + }, + { + "epoch": 0.5, + "learning_rate": 2.5182243821489952e-05, + "loss": 0.9277, + "step": 16750 + }, + { + "epoch": 0.5, + "learning_rate": 2.5167427250637116e-05, + "loss": 0.865, + "step": 16760 + }, + { + "epoch": 0.5, + "learning_rate": 2.5152610679784274e-05, + "loss": 0.9414, + "step": 16770 + }, + { + "epoch": 0.5, + "learning_rate": 2.5137794108931427e-05, + "loss": 0.7853, + "step": 16780 + }, + { + "epoch": 0.5, + "learning_rate": 2.512297753807859e-05, + "loss": 0.9213, + "step": 16790 + }, + { + "epoch": 0.5, + "learning_rate": 2.5108160967225745e-05, + "loss": 0.9364, + "step": 16800 + }, + { + "epoch": 0.5, + "learning_rate": 2.5093344396372903e-05, + "loss": 0.8423, + "step": 16810 + }, + { + "epoch": 0.5, + "learning_rate": 2.5078527825520067e-05, + "loss": 0.9448, + "step": 16820 + }, + { + "epoch": 0.5, + "learning_rate": 2.506371125466722e-05, + "loss": 0.9168, + "step": 16830 + }, + { + "epoch": 0.5, + "learning_rate": 2.5048894683814378e-05, + "loss": 0.9517, + "step": 16840 + }, + { + "epoch": 0.5, + "learning_rate": 2.5034078112961538e-05, + "loss": 0.8643, + "step": 16850 + }, + { + "epoch": 0.5, + "learning_rate": 2.5019261542108695e-05, + "loss": 0.916, + "step": 16860 + }, + { + "epoch": 0.5, + "learning_rate": 2.5004444971255853e-05, + "loss": 1.0083, + "step": 16870 + }, + { + "epoch": 0.5, + "learning_rate": 2.4989628400403013e-05, + "loss": 0.892, + "step": 16880 + }, + { + "epoch": 0.5, + "learning_rate": 2.497481182955017e-05, + "loss": 1.0555, + "step": 16890 + }, + { + "epoch": 0.5, + "learning_rate": 2.4959995258697328e-05, + "loss": 0.8717, + "step": 16900 + }, + { + "epoch": 0.5, + "learning_rate": 2.4945178687844485e-05, + "loss": 0.8993, + "step": 16910 + }, + { + "epoch": 0.5, + "learning_rate": 2.4930362116991646e-05, + "loss": 0.8272, + "step": 16920 + }, + { + "epoch": 0.5, + "learning_rate": 2.4915545546138803e-05, + "loss": 0.8452, + "step": 16930 + }, + { + "epoch": 0.5, + "learning_rate": 2.490072897528596e-05, + "loss": 0.9096, + "step": 16940 + }, + { + "epoch": 0.5, + "learning_rate": 2.488591240443312e-05, + "loss": 0.754, + "step": 16950 + }, + { + "epoch": 0.5, + "learning_rate": 2.4871095833580278e-05, + "loss": 0.9685, + "step": 16960 + }, + { + "epoch": 0.5, + "learning_rate": 2.4856279262727435e-05, + "loss": 0.8254, + "step": 16970 + }, + { + "epoch": 0.5, + "learning_rate": 2.4841462691874592e-05, + "loss": 1.0172, + "step": 16980 + }, + { + "epoch": 0.5, + "learning_rate": 2.4826646121021753e-05, + "loss": 0.8977, + "step": 16990 + }, + { + "epoch": 0.5, + "learning_rate": 2.481182955016891e-05, + "loss": 0.9804, + "step": 17000 + }, + { + "epoch": 0.5, + "learning_rate": 2.4797012979316067e-05, + "loss": 0.9287, + "step": 17010 + }, + { + "epoch": 0.5, + "learning_rate": 2.4782196408463225e-05, + "loss": 0.7927, + "step": 17020 + }, + { + "epoch": 0.5, + "learning_rate": 2.4767379837610385e-05, + "loss": 0.9531, + "step": 17030 + }, + { + "epoch": 0.5, + "learning_rate": 2.4752563266757543e-05, + "loss": 0.8374, + "step": 17040 + }, + { + "epoch": 0.51, + "learning_rate": 2.47377466959047e-05, + "loss": 0.8209, + "step": 17050 + }, + { + "epoch": 0.51, + "learning_rate": 2.472293012505186e-05, + "loss": 0.8736, + "step": 17060 + }, + { + "epoch": 0.51, + "learning_rate": 2.4708113554199018e-05, + "loss": 0.8482, + "step": 17070 + }, + { + "epoch": 0.51, + "learning_rate": 2.4693296983346175e-05, + "loss": 1.0184, + "step": 17080 + }, + { + "epoch": 0.51, + "learning_rate": 2.4678480412493336e-05, + "loss": 0.9015, + "step": 17090 + }, + { + "epoch": 0.51, + "learning_rate": 2.4663663841640493e-05, + "loss": 0.891, + "step": 17100 + }, + { + "epoch": 0.51, + "learning_rate": 2.464884727078765e-05, + "loss": 0.84, + "step": 17110 + }, + { + "epoch": 0.51, + "learning_rate": 2.4634030699934807e-05, + "loss": 0.9163, + "step": 17120 + }, + { + "epoch": 0.51, + "learning_rate": 2.4619214129081964e-05, + "loss": 0.8362, + "step": 17130 + }, + { + "epoch": 0.51, + "learning_rate": 2.4604397558229125e-05, + "loss": 0.7383, + "step": 17140 + }, + { + "epoch": 0.51, + "learning_rate": 2.4589580987376282e-05, + "loss": 0.8876, + "step": 17150 + }, + { + "epoch": 0.51, + "learning_rate": 2.457476441652344e-05, + "loss": 0.9511, + "step": 17160 + }, + { + "epoch": 0.51, + "learning_rate": 2.45599478456706e-05, + "loss": 0.9941, + "step": 17170 + }, + { + "epoch": 0.51, + "learning_rate": 2.4545131274817757e-05, + "loss": 0.7721, + "step": 17180 + }, + { + "epoch": 0.51, + "learning_rate": 2.4530314703964915e-05, + "loss": 0.7751, + "step": 17190 + }, + { + "epoch": 0.51, + "learning_rate": 2.4515498133112075e-05, + "loss": 0.7806, + "step": 17200 + }, + { + "epoch": 0.51, + "learning_rate": 2.4500681562259232e-05, + "loss": 0.926, + "step": 17210 + }, + { + "epoch": 0.51, + "learning_rate": 2.448586499140639e-05, + "loss": 0.9392, + "step": 17220 + }, + { + "epoch": 0.51, + "learning_rate": 2.447104842055355e-05, + "loss": 0.8768, + "step": 17230 + }, + { + "epoch": 0.51, + "learning_rate": 2.4456231849700708e-05, + "loss": 0.7338, + "step": 17240 + }, + { + "epoch": 0.51, + "learning_rate": 2.4441415278847865e-05, + "loss": 0.9202, + "step": 17250 + }, + { + "epoch": 0.51, + "learning_rate": 2.4426598707995022e-05, + "loss": 0.9434, + "step": 17260 + }, + { + "epoch": 0.51, + "learning_rate": 2.441178213714218e-05, + "loss": 0.7249, + "step": 17270 + }, + { + "epoch": 0.51, + "learning_rate": 2.439696556628934e-05, + "loss": 0.8375, + "step": 17280 + }, + { + "epoch": 0.51, + "learning_rate": 2.4382148995436497e-05, + "loss": 0.7855, + "step": 17290 + }, + { + "epoch": 0.51, + "learning_rate": 2.4367332424583654e-05, + "loss": 0.9012, + "step": 17300 + }, + { + "epoch": 0.51, + "learning_rate": 2.4352515853730815e-05, + "loss": 0.9847, + "step": 17310 + }, + { + "epoch": 0.51, + "learning_rate": 2.4337699282877972e-05, + "loss": 0.8869, + "step": 17320 + }, + { + "epoch": 0.51, + "learning_rate": 2.432288271202513e-05, + "loss": 0.8767, + "step": 17330 + }, + { + "epoch": 0.51, + "learning_rate": 2.430806614117229e-05, + "loss": 0.815, + "step": 17340 + }, + { + "epoch": 0.51, + "learning_rate": 2.4293249570319447e-05, + "loss": 0.8581, + "step": 17350 + }, + { + "epoch": 0.51, + "learning_rate": 2.4278432999466604e-05, + "loss": 0.8126, + "step": 17360 + }, + { + "epoch": 0.51, + "learning_rate": 2.4263616428613765e-05, + "loss": 0.8067, + "step": 17370 + }, + { + "epoch": 0.52, + "learning_rate": 2.424879985776092e-05, + "loss": 0.8889, + "step": 17380 + }, + { + "epoch": 0.52, + "learning_rate": 2.423398328690808e-05, + "loss": 0.8773, + "step": 17390 + }, + { + "epoch": 0.52, + "learning_rate": 2.4219166716055237e-05, + "loss": 0.8586, + "step": 17400 + }, + { + "epoch": 0.52, + "learning_rate": 2.4204350145202394e-05, + "loss": 0.8651, + "step": 17410 + }, + { + "epoch": 0.52, + "learning_rate": 2.4189533574349555e-05, + "loss": 0.9208, + "step": 17420 + }, + { + "epoch": 0.52, + "learning_rate": 2.4174717003496712e-05, + "loss": 1.0027, + "step": 17430 + }, + { + "epoch": 0.52, + "learning_rate": 2.415990043264387e-05, + "loss": 0.8523, + "step": 17440 + }, + { + "epoch": 0.52, + "learning_rate": 2.414508386179103e-05, + "loss": 0.9177, + "step": 17450 + }, + { + "epoch": 0.52, + "learning_rate": 2.4130267290938187e-05, + "loss": 0.9654, + "step": 17460 + }, + { + "epoch": 0.52, + "learning_rate": 2.4115450720085344e-05, + "loss": 0.9022, + "step": 17470 + }, + { + "epoch": 0.52, + "learning_rate": 2.4100634149232505e-05, + "loss": 0.999, + "step": 17480 + }, + { + "epoch": 0.52, + "learning_rate": 2.408581757837966e-05, + "loss": 0.7946, + "step": 17490 + }, + { + "epoch": 0.52, + "learning_rate": 2.407100100752682e-05, + "loss": 0.806, + "step": 17500 + }, + { + "epoch": 0.52, + "learning_rate": 2.405618443667398e-05, + "loss": 0.9306, + "step": 17510 + }, + { + "epoch": 0.52, + "learning_rate": 2.4041367865821134e-05, + "loss": 0.8505, + "step": 17520 + }, + { + "epoch": 0.52, + "learning_rate": 2.4026551294968294e-05, + "loss": 0.7275, + "step": 17530 + }, + { + "epoch": 0.52, + "learning_rate": 2.401173472411545e-05, + "loss": 0.85, + "step": 17540 + }, + { + "epoch": 0.52, + "learning_rate": 2.399691815326261e-05, + "loss": 0.9039, + "step": 17550 + }, + { + "epoch": 0.52, + "learning_rate": 2.398210158240977e-05, + "loss": 0.908, + "step": 17560 + }, + { + "epoch": 0.52, + "learning_rate": 2.3967285011556927e-05, + "loss": 0.774, + "step": 17570 + }, + { + "epoch": 0.52, + "learning_rate": 2.3952468440704084e-05, + "loss": 0.9109, + "step": 17580 + }, + { + "epoch": 0.52, + "learning_rate": 2.3937651869851245e-05, + "loss": 0.9756, + "step": 17590 + }, + { + "epoch": 0.52, + "learning_rate": 2.39228352989984e-05, + "loss": 0.8371, + "step": 17600 + }, + { + "epoch": 0.52, + "learning_rate": 2.390801872814556e-05, + "loss": 0.8419, + "step": 17610 + }, + { + "epoch": 0.52, + "learning_rate": 2.389320215729272e-05, + "loss": 0.9165, + "step": 17620 + }, + { + "epoch": 0.52, + "learning_rate": 2.3878385586439873e-05, + "loss": 0.9916, + "step": 17630 + }, + { + "epoch": 0.52, + "learning_rate": 2.3863569015587034e-05, + "loss": 1.0279, + "step": 17640 + }, + { + "epoch": 0.52, + "learning_rate": 2.3848752444734195e-05, + "loss": 0.8075, + "step": 17650 + }, + { + "epoch": 0.52, + "learning_rate": 2.383393587388135e-05, + "loss": 0.7937, + "step": 17660 + }, + { + "epoch": 0.52, + "learning_rate": 2.381911930302851e-05, + "loss": 0.9099, + "step": 17670 + }, + { + "epoch": 0.52, + "learning_rate": 2.3804302732175666e-05, + "loss": 0.8146, + "step": 17680 + }, + { + "epoch": 0.52, + "learning_rate": 2.3789486161322824e-05, + "loss": 0.8404, + "step": 17690 + }, + { + "epoch": 0.52, + "learning_rate": 2.3774669590469984e-05, + "loss": 0.8843, + "step": 17700 + }, + { + "epoch": 0.52, + "learning_rate": 2.375985301961714e-05, + "loss": 0.9091, + "step": 17710 + }, + { + "epoch": 0.53, + "learning_rate": 2.37450364487643e-05, + "loss": 0.7976, + "step": 17720 + }, + { + "epoch": 0.53, + "learning_rate": 2.373021987791146e-05, + "loss": 0.8443, + "step": 17730 + }, + { + "epoch": 0.53, + "learning_rate": 2.3715403307058613e-05, + "loss": 0.8589, + "step": 17740 + }, + { + "epoch": 0.53, + "learning_rate": 2.3700586736205774e-05, + "loss": 0.9244, + "step": 17750 + }, + { + "epoch": 0.53, + "learning_rate": 2.3685770165352934e-05, + "loss": 0.6903, + "step": 17760 + }, + { + "epoch": 0.53, + "learning_rate": 2.3670953594500088e-05, + "loss": 0.8427, + "step": 17770 + }, + { + "epoch": 0.53, + "learning_rate": 2.365613702364725e-05, + "loss": 1.0646, + "step": 17780 + }, + { + "epoch": 0.53, + "learning_rate": 2.3641320452794406e-05, + "loss": 0.7852, + "step": 17790 + }, + { + "epoch": 0.53, + "learning_rate": 2.3626503881941563e-05, + "loss": 0.7886, + "step": 17800 + }, + { + "epoch": 0.53, + "learning_rate": 2.3611687311088724e-05, + "loss": 0.8728, + "step": 17810 + }, + { + "epoch": 0.53, + "learning_rate": 2.359687074023588e-05, + "loss": 0.8495, + "step": 17820 + }, + { + "epoch": 0.53, + "learning_rate": 2.358205416938304e-05, + "loss": 1.0353, + "step": 17830 + }, + { + "epoch": 0.53, + "learning_rate": 2.35672375985302e-05, + "loss": 0.7727, + "step": 17840 + }, + { + "epoch": 0.53, + "learning_rate": 2.3552421027677353e-05, + "loss": 0.7972, + "step": 17850 + }, + { + "epoch": 0.53, + "learning_rate": 2.3537604456824514e-05, + "loss": 0.8583, + "step": 17860 + }, + { + "epoch": 0.53, + "learning_rate": 2.3522787885971674e-05, + "loss": 0.7446, + "step": 17870 + }, + { + "epoch": 0.53, + "learning_rate": 2.3507971315118828e-05, + "loss": 0.7902, + "step": 17880 + }, + { + "epoch": 0.53, + "learning_rate": 2.349315474426599e-05, + "loss": 0.9152, + "step": 17890 + }, + { + "epoch": 0.53, + "learning_rate": 2.3478338173413146e-05, + "loss": 0.7742, + "step": 17900 + }, + { + "epoch": 0.53, + "learning_rate": 2.3463521602560303e-05, + "loss": 0.8286, + "step": 17910 + }, + { + "epoch": 0.53, + "learning_rate": 2.3448705031707464e-05, + "loss": 0.9388, + "step": 17920 + }, + { + "epoch": 0.53, + "learning_rate": 2.343388846085462e-05, + "loss": 0.942, + "step": 17930 + }, + { + "epoch": 0.53, + "learning_rate": 2.3419071890001778e-05, + "loss": 0.8437, + "step": 17940 + }, + { + "epoch": 0.53, + "learning_rate": 2.340425531914894e-05, + "loss": 0.8614, + "step": 17950 + }, + { + "epoch": 0.53, + "learning_rate": 2.3389438748296093e-05, + "loss": 0.8642, + "step": 17960 + }, + { + "epoch": 0.53, + "learning_rate": 2.3374622177443253e-05, + "loss": 0.7555, + "step": 17970 + }, + { + "epoch": 0.53, + "learning_rate": 2.3359805606590414e-05, + "loss": 1.0767, + "step": 17980 + }, + { + "epoch": 0.53, + "learning_rate": 2.3344989035737568e-05, + "loss": 0.8096, + "step": 17990 + }, + { + "epoch": 0.53, + "learning_rate": 2.333017246488473e-05, + "loss": 0.8931, + "step": 18000 + }, + { + "epoch": 0.53, + "learning_rate": 2.3315355894031886e-05, + "loss": 0.8635, + "step": 18010 + }, + { + "epoch": 0.53, + "learning_rate": 2.3300539323179043e-05, + "loss": 0.9228, + "step": 18020 + }, + { + "epoch": 0.53, + "learning_rate": 2.3285722752326203e-05, + "loss": 0.9533, + "step": 18030 + }, + { + "epoch": 0.53, + "learning_rate": 2.327090618147336e-05, + "loss": 0.7538, + "step": 18040 + }, + { + "epoch": 0.53, + "learning_rate": 2.3256089610620518e-05, + "loss": 0.8418, + "step": 18050 + }, + { + "epoch": 0.54, + "learning_rate": 2.324127303976768e-05, + "loss": 0.7138, + "step": 18060 + }, + { + "epoch": 0.54, + "learning_rate": 2.3226456468914836e-05, + "loss": 0.8289, + "step": 18070 + }, + { + "epoch": 0.54, + "learning_rate": 2.3211639898061993e-05, + "loss": 0.8393, + "step": 18080 + }, + { + "epoch": 0.54, + "learning_rate": 2.3196823327209154e-05, + "loss": 0.8786, + "step": 18090 + }, + { + "epoch": 0.54, + "learning_rate": 2.3182006756356307e-05, + "loss": 0.8824, + "step": 18100 + }, + { + "epoch": 0.54, + "learning_rate": 2.3167190185503468e-05, + "loss": 0.8648, + "step": 18110 + }, + { + "epoch": 0.54, + "learning_rate": 2.315237361465063e-05, + "loss": 0.9172, + "step": 18120 + }, + { + "epoch": 0.54, + "learning_rate": 2.3137557043797783e-05, + "loss": 0.7848, + "step": 18130 + }, + { + "epoch": 0.54, + "learning_rate": 2.3122740472944943e-05, + "loss": 0.7298, + "step": 18140 + }, + { + "epoch": 0.54, + "learning_rate": 2.31079239020921e-05, + "loss": 0.867, + "step": 18150 + }, + { + "epoch": 0.54, + "learning_rate": 2.3093107331239258e-05, + "loss": 0.7374, + "step": 18160 + }, + { + "epoch": 0.54, + "learning_rate": 2.3078290760386418e-05, + "loss": 0.8786, + "step": 18170 + }, + { + "epoch": 0.54, + "learning_rate": 2.3063474189533575e-05, + "loss": 1.009, + "step": 18180 + }, + { + "epoch": 0.54, + "learning_rate": 2.3048657618680733e-05, + "loss": 0.8278, + "step": 18190 + }, + { + "epoch": 0.54, + "learning_rate": 2.3033841047827893e-05, + "loss": 0.8863, + "step": 18200 + }, + { + "epoch": 0.54, + "learning_rate": 2.301902447697505e-05, + "loss": 1.0623, + "step": 18210 + }, + { + "epoch": 0.54, + "learning_rate": 2.3004207906122208e-05, + "loss": 0.7678, + "step": 18220 + }, + { + "epoch": 0.54, + "learning_rate": 2.298939133526937e-05, + "loss": 0.9686, + "step": 18230 + }, + { + "epoch": 0.54, + "learning_rate": 2.2974574764416522e-05, + "loss": 0.8615, + "step": 18240 + }, + { + "epoch": 0.54, + "learning_rate": 2.2959758193563683e-05, + "loss": 0.8721, + "step": 18250 + }, + { + "epoch": 0.54, + "learning_rate": 2.294494162271084e-05, + "loss": 0.8698, + "step": 18260 + }, + { + "epoch": 0.54, + "learning_rate": 2.2930125051857997e-05, + "loss": 0.8678, + "step": 18270 + }, + { + "epoch": 0.54, + "learning_rate": 2.2915308481005158e-05, + "loss": 0.7442, + "step": 18280 + }, + { + "epoch": 0.54, + "learning_rate": 2.2900491910152315e-05, + "loss": 0.9643, + "step": 18290 + }, + { + "epoch": 0.54, + "learning_rate": 2.2885675339299472e-05, + "loss": 0.8196, + "step": 18300 + }, + { + "epoch": 0.54, + "learning_rate": 2.2870858768446633e-05, + "loss": 0.8376, + "step": 18310 + }, + { + "epoch": 0.54, + "learning_rate": 2.285604219759379e-05, + "loss": 0.7705, + "step": 18320 + }, + { + "epoch": 0.54, + "learning_rate": 2.2841225626740948e-05, + "loss": 0.88, + "step": 18330 + }, + { + "epoch": 0.54, + "learning_rate": 2.2826409055888108e-05, + "loss": 0.7661, + "step": 18340 + }, + { + "epoch": 0.54, + "learning_rate": 2.2811592485035265e-05, + "loss": 0.8063, + "step": 18350 + }, + { + "epoch": 0.54, + "learning_rate": 2.2796775914182423e-05, + "loss": 0.8005, + "step": 18360 + }, + { + "epoch": 0.54, + "learning_rate": 2.278195934332958e-05, + "loss": 0.7737, + "step": 18370 + }, + { + "epoch": 0.54, + "learning_rate": 2.2767142772476737e-05, + "loss": 0.8472, + "step": 18380 + }, + { + "epoch": 0.54, + "learning_rate": 2.2752326201623898e-05, + "loss": 0.782, + "step": 18390 + }, + { + "epoch": 0.55, + "learning_rate": 2.2737509630771055e-05, + "loss": 0.7141, + "step": 18400 + }, + { + "epoch": 0.55, + "learning_rate": 2.2722693059918212e-05, + "loss": 0.9833, + "step": 18410 + }, + { + "epoch": 0.55, + "learning_rate": 2.2707876489065373e-05, + "loss": 0.7897, + "step": 18420 + }, + { + "epoch": 0.55, + "learning_rate": 2.269305991821253e-05, + "loss": 0.8591, + "step": 18430 + }, + { + "epoch": 0.55, + "learning_rate": 2.2678243347359687e-05, + "loss": 0.9508, + "step": 18440 + }, + { + "epoch": 0.55, + "learning_rate": 2.2663426776506848e-05, + "loss": 0.8874, + "step": 18450 + }, + { + "epoch": 0.55, + "learning_rate": 2.2648610205654005e-05, + "loss": 0.925, + "step": 18460 + }, + { + "epoch": 0.55, + "learning_rate": 2.2633793634801162e-05, + "loss": 0.7757, + "step": 18470 + }, + { + "epoch": 0.55, + "learning_rate": 2.261897706394832e-05, + "loss": 0.7419, + "step": 18480 + }, + { + "epoch": 0.55, + "learning_rate": 2.260416049309548e-05, + "loss": 0.9423, + "step": 18490 + }, + { + "epoch": 0.55, + "learning_rate": 2.2589343922242637e-05, + "loss": 0.8271, + "step": 18500 + }, + { + "epoch": 0.55, + "learning_rate": 2.2574527351389795e-05, + "loss": 0.8333, + "step": 18510 + }, + { + "epoch": 0.55, + "learning_rate": 2.2559710780536952e-05, + "loss": 0.9858, + "step": 18520 + }, + { + "epoch": 0.55, + "learning_rate": 2.2544894209684112e-05, + "loss": 0.8814, + "step": 18530 + }, + { + "epoch": 0.55, + "learning_rate": 2.253007763883127e-05, + "loss": 1.0032, + "step": 18540 + }, + { + "epoch": 0.55, + "learning_rate": 2.2515261067978427e-05, + "loss": 0.7925, + "step": 18550 + }, + { + "epoch": 0.55, + "learning_rate": 2.2500444497125588e-05, + "loss": 0.7896, + "step": 18560 + }, + { + "epoch": 0.55, + "learning_rate": 2.2485627926272745e-05, + "loss": 0.9071, + "step": 18570 + }, + { + "epoch": 0.55, + "learning_rate": 2.2470811355419902e-05, + "loss": 0.999, + "step": 18580 + }, + { + "epoch": 0.55, + "learning_rate": 2.245599478456706e-05, + "loss": 0.8147, + "step": 18590 + }, + { + "epoch": 0.55, + "learning_rate": 2.244117821371422e-05, + "loss": 0.8457, + "step": 18600 + }, + { + "epoch": 0.55, + "learning_rate": 2.2426361642861377e-05, + "loss": 0.9476, + "step": 18610 + }, + { + "epoch": 0.55, + "learning_rate": 2.2411545072008534e-05, + "loss": 0.7915, + "step": 18620 + }, + { + "epoch": 0.55, + "learning_rate": 2.2396728501155695e-05, + "loss": 0.8055, + "step": 18630 + }, + { + "epoch": 0.55, + "learning_rate": 2.2381911930302852e-05, + "loss": 0.9684, + "step": 18640 + }, + { + "epoch": 0.55, + "learning_rate": 2.236709535945001e-05, + "loss": 0.8395, + "step": 18650 + }, + { + "epoch": 0.55, + "learning_rate": 2.2352278788597167e-05, + "loss": 0.8448, + "step": 18660 + }, + { + "epoch": 0.55, + "learning_rate": 2.2337462217744327e-05, + "loss": 0.8679, + "step": 18670 + }, + { + "epoch": 0.55, + "learning_rate": 2.2322645646891485e-05, + "loss": 0.7937, + "step": 18680 + }, + { + "epoch": 0.55, + "learning_rate": 2.2307829076038642e-05, + "loss": 0.9354, + "step": 18690 + }, + { + "epoch": 0.55, + "learning_rate": 2.2293012505185802e-05, + "loss": 0.9462, + "step": 18700 + }, + { + "epoch": 0.55, + "learning_rate": 2.227819593433296e-05, + "loss": 0.7613, + "step": 18710 + }, + { + "epoch": 0.55, + "learning_rate": 2.2263379363480117e-05, + "loss": 0.7807, + "step": 18720 + }, + { + "epoch": 0.56, + "learning_rate": 2.2248562792627274e-05, + "loss": 0.8408, + "step": 18730 + }, + { + "epoch": 0.56, + "learning_rate": 2.2233746221774435e-05, + "loss": 0.9313, + "step": 18740 + }, + { + "epoch": 0.56, + "learning_rate": 2.2218929650921592e-05, + "loss": 0.8566, + "step": 18750 + }, + { + "epoch": 0.56, + "learning_rate": 2.220411308006875e-05, + "loss": 0.7942, + "step": 18760 + }, + { + "epoch": 0.56, + "learning_rate": 2.218929650921591e-05, + "loss": 0.9131, + "step": 18770 + }, + { + "epoch": 0.56, + "learning_rate": 2.2174479938363067e-05, + "loss": 0.8862, + "step": 18780 + }, + { + "epoch": 0.56, + "learning_rate": 2.2159663367510224e-05, + "loss": 0.8691, + "step": 18790 + }, + { + "epoch": 0.56, + "learning_rate": 2.214484679665738e-05, + "loss": 0.9793, + "step": 18800 + }, + { + "epoch": 0.56, + "learning_rate": 2.2130030225804542e-05, + "loss": 0.9441, + "step": 18810 + }, + { + "epoch": 0.56, + "learning_rate": 2.21152136549517e-05, + "loss": 0.842, + "step": 18820 + }, + { + "epoch": 0.56, + "learning_rate": 2.2100397084098857e-05, + "loss": 0.8368, + "step": 18830 + }, + { + "epoch": 0.56, + "learning_rate": 2.2085580513246014e-05, + "loss": 0.9774, + "step": 18840 + }, + { + "epoch": 0.56, + "learning_rate": 2.2070763942393174e-05, + "loss": 0.8356, + "step": 18850 + }, + { + "epoch": 0.56, + "learning_rate": 2.205594737154033e-05, + "loss": 0.7599, + "step": 18860 + }, + { + "epoch": 0.56, + "learning_rate": 2.204113080068749e-05, + "loss": 0.8256, + "step": 18870 + }, + { + "epoch": 0.56, + "learning_rate": 2.202631422983465e-05, + "loss": 0.9039, + "step": 18880 + }, + { + "epoch": 0.56, + "learning_rate": 2.2011497658981807e-05, + "loss": 0.9825, + "step": 18890 + }, + { + "epoch": 0.56, + "learning_rate": 2.1996681088128964e-05, + "loss": 0.8668, + "step": 18900 + }, + { + "epoch": 0.56, + "learning_rate": 2.1981864517276125e-05, + "loss": 0.8308, + "step": 18910 + }, + { + "epoch": 0.56, + "learning_rate": 2.1967047946423282e-05, + "loss": 0.9404, + "step": 18920 + }, + { + "epoch": 0.56, + "learning_rate": 2.195223137557044e-05, + "loss": 0.8151, + "step": 18930 + }, + { + "epoch": 0.56, + "learning_rate": 2.1937414804717596e-05, + "loss": 0.9006, + "step": 18940 + }, + { + "epoch": 0.56, + "learning_rate": 2.1922598233864754e-05, + "loss": 0.8403, + "step": 18950 + }, + { + "epoch": 0.56, + "learning_rate": 2.1907781663011914e-05, + "loss": 0.8765, + "step": 18960 + }, + { + "epoch": 0.56, + "learning_rate": 2.189296509215907e-05, + "loss": 0.8613, + "step": 18970 + }, + { + "epoch": 0.56, + "learning_rate": 2.187814852130623e-05, + "loss": 0.7064, + "step": 18980 + }, + { + "epoch": 0.56, + "learning_rate": 2.186333195045339e-05, + "loss": 0.8803, + "step": 18990 + }, + { + "epoch": 0.56, + "learning_rate": 2.1848515379600546e-05, + "loss": 0.8661, + "step": 19000 + }, + { + "epoch": 0.56, + "learning_rate": 2.1833698808747704e-05, + "loss": 0.8495, + "step": 19010 + }, + { + "epoch": 0.56, + "learning_rate": 2.1818882237894864e-05, + "loss": 0.8133, + "step": 19020 + }, + { + "epoch": 0.56, + "learning_rate": 2.180406566704202e-05, + "loss": 0.7198, + "step": 19030 + }, + { + "epoch": 0.56, + "learning_rate": 2.178924909618918e-05, + "loss": 0.8265, + "step": 19040 + }, + { + "epoch": 0.56, + "learning_rate": 2.177443252533634e-05, + "loss": 0.7314, + "step": 19050 + }, + { + "epoch": 0.56, + "learning_rate": 2.1759615954483493e-05, + "loss": 0.8324, + "step": 19060 + }, + { + "epoch": 0.57, + "learning_rate": 2.1744799383630654e-05, + "loss": 0.8576, + "step": 19070 + }, + { + "epoch": 0.57, + "learning_rate": 2.172998281277781e-05, + "loss": 0.7959, + "step": 19080 + }, + { + "epoch": 0.57, + "learning_rate": 2.171516624192497e-05, + "loss": 0.8669, + "step": 19090 + }, + { + "epoch": 0.57, + "learning_rate": 2.170034967107213e-05, + "loss": 0.9387, + "step": 19100 + }, + { + "epoch": 0.57, + "learning_rate": 2.1685533100219286e-05, + "loss": 0.867, + "step": 19110 + }, + { + "epoch": 0.57, + "learning_rate": 2.1670716529366443e-05, + "loss": 0.7616, + "step": 19120 + }, + { + "epoch": 0.57, + "learning_rate": 2.1655899958513604e-05, + "loss": 0.9017, + "step": 19130 + }, + { + "epoch": 0.57, + "learning_rate": 2.164108338766076e-05, + "loss": 0.9809, + "step": 19140 + }, + { + "epoch": 0.57, + "learning_rate": 2.162626681680792e-05, + "loss": 0.911, + "step": 19150 + }, + { + "epoch": 0.57, + "learning_rate": 2.161145024595508e-05, + "loss": 0.9344, + "step": 19160 + }, + { + "epoch": 0.57, + "learning_rate": 2.1596633675102236e-05, + "loss": 0.8076, + "step": 19170 + }, + { + "epoch": 0.57, + "learning_rate": 2.1581817104249394e-05, + "loss": 0.8818, + "step": 19180 + }, + { + "epoch": 0.57, + "learning_rate": 2.1567000533396554e-05, + "loss": 0.9721, + "step": 19190 + }, + { + "epoch": 0.57, + "learning_rate": 2.1552183962543708e-05, + "loss": 0.8332, + "step": 19200 + }, + { + "epoch": 0.57, + "learning_rate": 2.153736739169087e-05, + "loss": 0.898, + "step": 19210 + }, + { + "epoch": 0.57, + "learning_rate": 2.1522550820838026e-05, + "loss": 0.9453, + "step": 19220 + }, + { + "epoch": 0.57, + "learning_rate": 2.1507734249985183e-05, + "loss": 0.7826, + "step": 19230 + }, + { + "epoch": 0.57, + "learning_rate": 2.1492917679132344e-05, + "loss": 0.9795, + "step": 19240 + }, + { + "epoch": 0.57, + "learning_rate": 2.14781011082795e-05, + "loss": 0.9173, + "step": 19250 + }, + { + "epoch": 0.57, + "learning_rate": 2.1463284537426658e-05, + "loss": 0.9476, + "step": 19260 + }, + { + "epoch": 0.57, + "learning_rate": 2.144846796657382e-05, + "loss": 0.964, + "step": 19270 + }, + { + "epoch": 0.57, + "learning_rate": 2.1433651395720976e-05, + "loss": 0.9072, + "step": 19280 + }, + { + "epoch": 0.57, + "learning_rate": 2.1418834824868133e-05, + "loss": 0.8462, + "step": 19290 + }, + { + "epoch": 0.57, + "learning_rate": 2.1404018254015294e-05, + "loss": 0.874, + "step": 19300 + }, + { + "epoch": 0.57, + "learning_rate": 2.1389201683162448e-05, + "loss": 1.0157, + "step": 19310 + }, + { + "epoch": 0.57, + "learning_rate": 2.137438511230961e-05, + "loss": 0.8508, + "step": 19320 + }, + { + "epoch": 0.57, + "learning_rate": 2.135956854145677e-05, + "loss": 0.9037, + "step": 19330 + }, + { + "epoch": 0.57, + "learning_rate": 2.1344751970603923e-05, + "loss": 0.9253, + "step": 19340 + }, + { + "epoch": 0.57, + "learning_rate": 2.1329935399751083e-05, + "loss": 0.7522, + "step": 19350 + }, + { + "epoch": 0.57, + "learning_rate": 2.131511882889824e-05, + "loss": 0.9502, + "step": 19360 + }, + { + "epoch": 0.57, + "learning_rate": 2.1300302258045398e-05, + "loss": 0.7547, + "step": 19370 + }, + { + "epoch": 0.57, + "learning_rate": 2.128548568719256e-05, + "loss": 0.9124, + "step": 19380 + }, + { + "epoch": 0.57, + "learning_rate": 2.1270669116339716e-05, + "loss": 0.8023, + "step": 19390 + }, + { + "epoch": 0.57, + "learning_rate": 2.1255852545486873e-05, + "loss": 0.8538, + "step": 19400 + }, + { + "epoch": 0.58, + "learning_rate": 2.1241035974634034e-05, + "loss": 0.7735, + "step": 19410 + }, + { + "epoch": 0.58, + "learning_rate": 2.1226219403781187e-05, + "loss": 0.8246, + "step": 19420 + }, + { + "epoch": 0.58, + "learning_rate": 2.1211402832928348e-05, + "loss": 0.755, + "step": 19430 + }, + { + "epoch": 0.58, + "learning_rate": 2.119658626207551e-05, + "loss": 0.7665, + "step": 19440 + }, + { + "epoch": 0.58, + "learning_rate": 2.1181769691222663e-05, + "loss": 1.0283, + "step": 19450 + }, + { + "epoch": 0.58, + "learning_rate": 2.1166953120369823e-05, + "loss": 0.8871, + "step": 19460 + }, + { + "epoch": 0.58, + "learning_rate": 2.115213654951698e-05, + "loss": 0.9079, + "step": 19470 + }, + { + "epoch": 0.58, + "learning_rate": 2.1137319978664138e-05, + "loss": 0.7618, + "step": 19480 + }, + { + "epoch": 0.58, + "learning_rate": 2.1122503407811298e-05, + "loss": 0.8599, + "step": 19490 + }, + { + "epoch": 0.58, + "learning_rate": 2.1107686836958456e-05, + "loss": 0.842, + "step": 19500 + }, + { + "epoch": 0.58, + "learning_rate": 2.1092870266105613e-05, + "loss": 0.8677, + "step": 19510 + }, + { + "epoch": 0.58, + "learning_rate": 2.1078053695252773e-05, + "loss": 0.9027, + "step": 19520 + }, + { + "epoch": 0.58, + "learning_rate": 2.1063237124399927e-05, + "loss": 0.8858, + "step": 19530 + }, + { + "epoch": 0.58, + "learning_rate": 2.1048420553547088e-05, + "loss": 0.9126, + "step": 19540 + }, + { + "epoch": 0.58, + "learning_rate": 2.103360398269425e-05, + "loss": 0.9025, + "step": 19550 + }, + { + "epoch": 0.58, + "learning_rate": 2.1018787411841402e-05, + "loss": 0.8532, + "step": 19560 + }, + { + "epoch": 0.58, + "learning_rate": 2.1003970840988563e-05, + "loss": 0.8272, + "step": 19570 + }, + { + "epoch": 0.58, + "learning_rate": 2.0989154270135724e-05, + "loss": 0.8753, + "step": 19580 + }, + { + "epoch": 0.58, + "learning_rate": 2.0974337699282877e-05, + "loss": 0.9098, + "step": 19590 + }, + { + "epoch": 0.58, + "learning_rate": 2.0959521128430038e-05, + "loss": 0.9761, + "step": 19600 + }, + { + "epoch": 0.58, + "learning_rate": 2.0944704557577195e-05, + "loss": 0.8511, + "step": 19610 + }, + { + "epoch": 0.58, + "learning_rate": 2.0929887986724352e-05, + "loss": 0.9308, + "step": 19620 + }, + { + "epoch": 0.58, + "learning_rate": 2.0915071415871513e-05, + "loss": 0.9555, + "step": 19630 + }, + { + "epoch": 0.58, + "learning_rate": 2.0900254845018667e-05, + "loss": 0.9046, + "step": 19640 + }, + { + "epoch": 0.58, + "learning_rate": 2.0885438274165828e-05, + "loss": 0.7832, + "step": 19650 + }, + { + "epoch": 0.58, + "learning_rate": 2.0870621703312988e-05, + "loss": 0.8923, + "step": 19660 + }, + { + "epoch": 0.58, + "learning_rate": 2.0855805132460142e-05, + "loss": 0.8718, + "step": 19670 + }, + { + "epoch": 0.58, + "learning_rate": 2.0840988561607303e-05, + "loss": 0.8448, + "step": 19680 + }, + { + "epoch": 0.58, + "learning_rate": 2.0826171990754463e-05, + "loss": 1.1105, + "step": 19690 + }, + { + "epoch": 0.58, + "learning_rate": 2.0811355419901617e-05, + "loss": 0.8333, + "step": 19700 + }, + { + "epoch": 0.58, + "learning_rate": 2.0796538849048778e-05, + "loss": 0.8141, + "step": 19710 + }, + { + "epoch": 0.58, + "learning_rate": 2.0781722278195935e-05, + "loss": 0.9571, + "step": 19720 + }, + { + "epoch": 0.58, + "learning_rate": 2.0766905707343092e-05, + "loss": 0.7987, + "step": 19730 + }, + { + "epoch": 0.58, + "learning_rate": 2.0752089136490253e-05, + "loss": 0.9294, + "step": 19740 + }, + { + "epoch": 0.59, + "learning_rate": 2.073727256563741e-05, + "loss": 0.9727, + "step": 19750 + }, + { + "epoch": 0.59, + "learning_rate": 2.0722455994784567e-05, + "loss": 1.01, + "step": 19760 + }, + { + "epoch": 0.59, + "learning_rate": 2.0707639423931728e-05, + "loss": 0.8694, + "step": 19770 + }, + { + "epoch": 0.59, + "learning_rate": 2.0692822853078882e-05, + "loss": 0.8795, + "step": 19780 + }, + { + "epoch": 0.59, + "learning_rate": 2.0678006282226042e-05, + "loss": 0.8247, + "step": 19790 + }, + { + "epoch": 0.59, + "learning_rate": 2.0663189711373203e-05, + "loss": 0.9454, + "step": 19800 + }, + { + "epoch": 0.59, + "learning_rate": 2.0648373140520357e-05, + "loss": 0.9298, + "step": 19810 + }, + { + "epoch": 0.59, + "learning_rate": 2.0633556569667517e-05, + "loss": 1.0117, + "step": 19820 + }, + { + "epoch": 0.59, + "learning_rate": 2.0618739998814675e-05, + "loss": 0.7869, + "step": 19830 + }, + { + "epoch": 0.59, + "learning_rate": 2.0603923427961832e-05, + "loss": 0.9181, + "step": 19840 + }, + { + "epoch": 0.59, + "learning_rate": 2.0589106857108993e-05, + "loss": 1.0305, + "step": 19850 + }, + { + "epoch": 0.59, + "learning_rate": 2.057429028625615e-05, + "loss": 0.814, + "step": 19860 + }, + { + "epoch": 0.59, + "learning_rate": 2.0559473715403307e-05, + "loss": 0.7786, + "step": 19870 + }, + { + "epoch": 0.59, + "learning_rate": 2.0544657144550468e-05, + "loss": 0.706, + "step": 19880 + }, + { + "epoch": 0.59, + "learning_rate": 2.0529840573697625e-05, + "loss": 0.9476, + "step": 19890 + }, + { + "epoch": 0.59, + "learning_rate": 2.0515024002844782e-05, + "loss": 0.7293, + "step": 19900 + }, + { + "epoch": 0.59, + "learning_rate": 2.0500207431991943e-05, + "loss": 0.8174, + "step": 19910 + }, + { + "epoch": 0.59, + "learning_rate": 2.0485390861139097e-05, + "loss": 0.9313, + "step": 19920 + }, + { + "epoch": 0.59, + "learning_rate": 2.0470574290286257e-05, + "loss": 0.8605, + "step": 19930 + }, + { + "epoch": 0.59, + "learning_rate": 2.0455757719433414e-05, + "loss": 0.9768, + "step": 19940 + }, + { + "epoch": 0.59, + "learning_rate": 2.044094114858057e-05, + "loss": 0.8925, + "step": 19950 + }, + { + "epoch": 0.59, + "learning_rate": 2.0426124577727732e-05, + "loss": 0.8249, + "step": 19960 + }, + { + "epoch": 0.59, + "learning_rate": 2.041130800687489e-05, + "loss": 0.9222, + "step": 19970 + }, + { + "epoch": 0.59, + "learning_rate": 2.0396491436022047e-05, + "loss": 0.8031, + "step": 19980 + }, + { + "epoch": 0.59, + "learning_rate": 2.0381674865169207e-05, + "loss": 0.8355, + "step": 19990 + }, + { + "epoch": 0.59, + "learning_rate": 2.0366858294316365e-05, + "loss": 0.7415, + "step": 20000 + }, + { + "epoch": 0.59, + "learning_rate": 2.0352041723463522e-05, + "loss": 0.8848, + "step": 20010 + }, + { + "epoch": 0.59, + "learning_rate": 2.0337225152610682e-05, + "loss": 0.8046, + "step": 20020 + }, + { + "epoch": 0.59, + "learning_rate": 2.032240858175784e-05, + "loss": 0.858, + "step": 20030 + }, + { + "epoch": 0.59, + "learning_rate": 2.0307592010904997e-05, + "loss": 0.8371, + "step": 20040 + }, + { + "epoch": 0.59, + "learning_rate": 2.0292775440052154e-05, + "loss": 0.9214, + "step": 20050 + }, + { + "epoch": 0.59, + "learning_rate": 2.027795886919931e-05, + "loss": 0.8854, + "step": 20060 + }, + { + "epoch": 0.59, + "learning_rate": 2.0263142298346472e-05, + "loss": 0.8968, + "step": 20070 + }, + { + "epoch": 0.6, + "learning_rate": 2.024832572749363e-05, + "loss": 0.8748, + "step": 20080 + }, + { + "epoch": 0.6, + "learning_rate": 2.0233509156640786e-05, + "loss": 1.0004, + "step": 20090 + }, + { + "epoch": 0.6, + "learning_rate": 2.0218692585787947e-05, + "loss": 0.8403, + "step": 20100 + }, + { + "epoch": 0.6, + "learning_rate": 2.0203876014935104e-05, + "loss": 0.8263, + "step": 20110 + }, + { + "epoch": 0.6, + "learning_rate": 2.018905944408226e-05, + "loss": 0.8777, + "step": 20120 + }, + { + "epoch": 0.6, + "learning_rate": 2.0174242873229422e-05, + "loss": 0.9249, + "step": 20130 + }, + { + "epoch": 0.6, + "learning_rate": 2.015942630237658e-05, + "loss": 0.7976, + "step": 20140 + }, + { + "epoch": 0.6, + "learning_rate": 2.0144609731523737e-05, + "loss": 0.8771, + "step": 20150 + }, + { + "epoch": 0.6, + "learning_rate": 2.0129793160670897e-05, + "loss": 0.786, + "step": 20160 + }, + { + "epoch": 0.6, + "learning_rate": 2.0114976589818054e-05, + "loss": 0.882, + "step": 20170 + }, + { + "epoch": 0.6, + "learning_rate": 2.0100160018965212e-05, + "loss": 0.9749, + "step": 20180 + }, + { + "epoch": 0.6, + "learning_rate": 2.008534344811237e-05, + "loss": 0.9407, + "step": 20190 + }, + { + "epoch": 0.6, + "learning_rate": 2.0070526877259526e-05, + "loss": 0.8734, + "step": 20200 + }, + { + "epoch": 0.6, + "learning_rate": 2.0055710306406687e-05, + "loss": 0.824, + "step": 20210 + }, + { + "epoch": 0.6, + "learning_rate": 2.0040893735553844e-05, + "loss": 0.8913, + "step": 20220 + }, + { + "epoch": 0.6, + "learning_rate": 2.0026077164701e-05, + "loss": 0.9268, + "step": 20230 + }, + { + "epoch": 0.6, + "learning_rate": 2.0011260593848162e-05, + "loss": 0.9078, + "step": 20240 + }, + { + "epoch": 0.6, + "learning_rate": 1.999644402299532e-05, + "loss": 0.8387, + "step": 20250 + }, + { + "epoch": 0.6, + "learning_rate": 1.9981627452142476e-05, + "loss": 0.851, + "step": 20260 + }, + { + "epoch": 0.6, + "learning_rate": 1.9966810881289637e-05, + "loss": 0.8193, + "step": 20270 + }, + { + "epoch": 0.6, + "learning_rate": 1.9951994310436794e-05, + "loss": 0.8916, + "step": 20280 + }, + { + "epoch": 0.6, + "learning_rate": 1.993717773958395e-05, + "loss": 0.8234, + "step": 20290 + }, + { + "epoch": 0.6, + "learning_rate": 1.992236116873111e-05, + "loss": 0.903, + "step": 20300 + }, + { + "epoch": 0.6, + "learning_rate": 1.990754459787827e-05, + "loss": 1.025, + "step": 20310 + }, + { + "epoch": 0.6, + "learning_rate": 1.9892728027025427e-05, + "loss": 1.0031, + "step": 20320 + }, + { + "epoch": 0.6, + "learning_rate": 1.9877911456172584e-05, + "loss": 0.9271, + "step": 20330 + }, + { + "epoch": 0.6, + "learning_rate": 1.986309488531974e-05, + "loss": 0.8388, + "step": 20340 + }, + { + "epoch": 0.6, + "learning_rate": 1.98482783144669e-05, + "loss": 0.8144, + "step": 20350 + }, + { + "epoch": 0.6, + "learning_rate": 1.983346174361406e-05, + "loss": 1.049, + "step": 20360 + }, + { + "epoch": 0.6, + "learning_rate": 1.9818645172761216e-05, + "loss": 0.8028, + "step": 20370 + }, + { + "epoch": 0.6, + "learning_rate": 1.9803828601908377e-05, + "loss": 0.9394, + "step": 20380 + }, + { + "epoch": 0.6, + "learning_rate": 1.9789012031055534e-05, + "loss": 0.9326, + "step": 20390 + }, + { + "epoch": 0.6, + "learning_rate": 1.977419546020269e-05, + "loss": 0.9962, + "step": 20400 + }, + { + "epoch": 0.6, + "learning_rate": 1.975937888934985e-05, + "loss": 0.8867, + "step": 20410 + }, + { + "epoch": 0.61, + "learning_rate": 1.974456231849701e-05, + "loss": 0.7435, + "step": 20420 + }, + { + "epoch": 0.61, + "learning_rate": 1.9729745747644166e-05, + "loss": 0.7816, + "step": 20430 + }, + { + "epoch": 0.61, + "learning_rate": 1.9714929176791323e-05, + "loss": 0.9469, + "step": 20440 + }, + { + "epoch": 0.61, + "learning_rate": 1.9700112605938484e-05, + "loss": 0.8169, + "step": 20450 + }, + { + "epoch": 0.61, + "learning_rate": 1.968529603508564e-05, + "loss": 0.8735, + "step": 20460 + }, + { + "epoch": 0.61, + "learning_rate": 1.96704794642328e-05, + "loss": 1.0336, + "step": 20470 + }, + { + "epoch": 0.61, + "learning_rate": 1.9655662893379956e-05, + "loss": 0.8156, + "step": 20480 + }, + { + "epoch": 0.61, + "learning_rate": 1.9640846322527116e-05, + "loss": 0.9808, + "step": 20490 + }, + { + "epoch": 0.61, + "learning_rate": 1.9626029751674274e-05, + "loss": 0.79, + "step": 20500 + }, + { + "epoch": 0.61, + "learning_rate": 1.961121318082143e-05, + "loss": 0.8405, + "step": 20510 + }, + { + "epoch": 0.61, + "learning_rate": 1.9596396609968588e-05, + "loss": 0.953, + "step": 20520 + }, + { + "epoch": 0.61, + "learning_rate": 1.958158003911575e-05, + "loss": 0.7861, + "step": 20530 + }, + { + "epoch": 0.61, + "learning_rate": 1.9566763468262906e-05, + "loss": 0.7984, + "step": 20540 + }, + { + "epoch": 0.61, + "learning_rate": 1.9551946897410063e-05, + "loss": 0.9889, + "step": 20550 + }, + { + "epoch": 0.61, + "learning_rate": 1.9537130326557224e-05, + "loss": 0.841, + "step": 20560 + }, + { + "epoch": 0.61, + "learning_rate": 1.952231375570438e-05, + "loss": 0.8406, + "step": 20570 + }, + { + "epoch": 0.61, + "learning_rate": 1.9507497184851538e-05, + "loss": 0.9461, + "step": 20580 + }, + { + "epoch": 0.61, + "learning_rate": 1.94926806139987e-05, + "loss": 0.8538, + "step": 20590 + }, + { + "epoch": 0.61, + "learning_rate": 1.9477864043145856e-05, + "loss": 0.854, + "step": 20600 + }, + { + "epoch": 0.61, + "learning_rate": 1.9463047472293013e-05, + "loss": 0.8276, + "step": 20610 + }, + { + "epoch": 0.61, + "learning_rate": 1.944823090144017e-05, + "loss": 0.955, + "step": 20620 + }, + { + "epoch": 0.61, + "learning_rate": 1.943341433058733e-05, + "loss": 0.8104, + "step": 20630 + }, + { + "epoch": 0.61, + "learning_rate": 1.941859775973449e-05, + "loss": 0.9592, + "step": 20640 + }, + { + "epoch": 0.61, + "learning_rate": 1.9403781188881646e-05, + "loss": 0.8689, + "step": 20650 + }, + { + "epoch": 0.61, + "learning_rate": 1.9388964618028803e-05, + "loss": 0.9637, + "step": 20660 + }, + { + "epoch": 0.61, + "learning_rate": 1.9374148047175964e-05, + "loss": 0.7728, + "step": 20670 + }, + { + "epoch": 0.61, + "learning_rate": 1.935933147632312e-05, + "loss": 0.7657, + "step": 20680 + }, + { + "epoch": 0.61, + "learning_rate": 1.9344514905470278e-05, + "loss": 0.8607, + "step": 20690 + }, + { + "epoch": 0.61, + "learning_rate": 1.932969833461744e-05, + "loss": 0.8825, + "step": 20700 + }, + { + "epoch": 0.61, + "learning_rate": 1.9314881763764596e-05, + "loss": 0.924, + "step": 20710 + }, + { + "epoch": 0.61, + "learning_rate": 1.9300065192911753e-05, + "loss": 0.855, + "step": 20720 + }, + { + "epoch": 0.61, + "learning_rate": 1.9285248622058914e-05, + "loss": 0.7599, + "step": 20730 + }, + { + "epoch": 0.61, + "learning_rate": 1.927043205120607e-05, + "loss": 0.8022, + "step": 20740 + }, + { + "epoch": 0.61, + "learning_rate": 1.9255615480353228e-05, + "loss": 0.7713, + "step": 20750 + }, + { + "epoch": 0.62, + "learning_rate": 1.9240798909500385e-05, + "loss": 0.9565, + "step": 20760 + }, + { + "epoch": 0.62, + "learning_rate": 1.9225982338647543e-05, + "loss": 0.9293, + "step": 20770 + }, + { + "epoch": 0.62, + "learning_rate": 1.9211165767794703e-05, + "loss": 0.8294, + "step": 20780 + }, + { + "epoch": 0.62, + "learning_rate": 1.919634919694186e-05, + "loss": 0.8606, + "step": 20790 + }, + { + "epoch": 0.62, + "learning_rate": 1.9181532626089018e-05, + "loss": 0.8515, + "step": 20800 + }, + { + "epoch": 0.62, + "learning_rate": 1.916671605523618e-05, + "loss": 0.853, + "step": 20810 + }, + { + "epoch": 0.62, + "learning_rate": 1.9151899484383336e-05, + "loss": 0.7633, + "step": 20820 + }, + { + "epoch": 0.62, + "learning_rate": 1.9137082913530493e-05, + "loss": 0.8862, + "step": 20830 + }, + { + "epoch": 0.62, + "learning_rate": 1.9122266342677653e-05, + "loss": 0.9932, + "step": 20840 + }, + { + "epoch": 0.62, + "learning_rate": 1.910744977182481e-05, + "loss": 0.8002, + "step": 20850 + }, + { + "epoch": 0.62, + "learning_rate": 1.9092633200971968e-05, + "loss": 0.9051, + "step": 20860 + }, + { + "epoch": 0.62, + "learning_rate": 1.907781663011913e-05, + "loss": 0.9279, + "step": 20870 + }, + { + "epoch": 0.62, + "learning_rate": 1.9063000059266282e-05, + "loss": 0.8137, + "step": 20880 + }, + { + "epoch": 0.62, + "learning_rate": 1.9048183488413443e-05, + "loss": 0.8661, + "step": 20890 + }, + { + "epoch": 0.62, + "learning_rate": 1.90333669175606e-05, + "loss": 0.9539, + "step": 20900 + }, + { + "epoch": 0.62, + "learning_rate": 1.9018550346707757e-05, + "loss": 0.8177, + "step": 20910 + }, + { + "epoch": 0.62, + "learning_rate": 1.9003733775854918e-05, + "loss": 0.7602, + "step": 20920 + }, + { + "epoch": 0.62, + "learning_rate": 1.8988917205002075e-05, + "loss": 0.7922, + "step": 20930 + }, + { + "epoch": 0.62, + "learning_rate": 1.8974100634149233e-05, + "loss": 0.8677, + "step": 20940 + }, + { + "epoch": 0.62, + "learning_rate": 1.8959284063296393e-05, + "loss": 0.7631, + "step": 20950 + }, + { + "epoch": 0.62, + "learning_rate": 1.894446749244355e-05, + "loss": 0.8979, + "step": 20960 + }, + { + "epoch": 0.62, + "learning_rate": 1.8929650921590708e-05, + "loss": 0.8185, + "step": 20970 + }, + { + "epoch": 0.62, + "learning_rate": 1.8914834350737868e-05, + "loss": 0.892, + "step": 20980 + }, + { + "epoch": 0.62, + "learning_rate": 1.8900017779885022e-05, + "loss": 0.9548, + "step": 20990 + }, + { + "epoch": 0.62, + "learning_rate": 1.8885201209032183e-05, + "loss": 0.8945, + "step": 21000 + }, + { + "epoch": 0.62, + "learning_rate": 1.8870384638179343e-05, + "loss": 0.8548, + "step": 21010 + }, + { + "epoch": 0.62, + "learning_rate": 1.8855568067326497e-05, + "loss": 0.9585, + "step": 21020 + }, + { + "epoch": 0.62, + "learning_rate": 1.8840751496473658e-05, + "loss": 0.8668, + "step": 21030 + }, + { + "epoch": 0.62, + "learning_rate": 1.8825934925620815e-05, + "loss": 0.9046, + "step": 21040 + }, + { + "epoch": 0.62, + "learning_rate": 1.8811118354767972e-05, + "loss": 0.8567, + "step": 21050 + }, + { + "epoch": 0.62, + "learning_rate": 1.8796301783915133e-05, + "loss": 0.791, + "step": 21060 + }, + { + "epoch": 0.62, + "learning_rate": 1.878148521306229e-05, + "loss": 0.8112, + "step": 21070 + }, + { + "epoch": 0.62, + "learning_rate": 1.8766668642209447e-05, + "loss": 0.8138, + "step": 21080 + }, + { + "epoch": 0.62, + "learning_rate": 1.8751852071356608e-05, + "loss": 0.9992, + "step": 21090 + }, + { + "epoch": 0.63, + "learning_rate": 1.8737035500503762e-05, + "loss": 0.8576, + "step": 21100 + }, + { + "epoch": 0.63, + "learning_rate": 1.8722218929650922e-05, + "loss": 0.7868, + "step": 21110 + }, + { + "epoch": 0.63, + "learning_rate": 1.8707402358798083e-05, + "loss": 0.8043, + "step": 21120 + }, + { + "epoch": 0.63, + "learning_rate": 1.8692585787945237e-05, + "loss": 0.7189, + "step": 21130 + }, + { + "epoch": 0.63, + "learning_rate": 1.8677769217092397e-05, + "loss": 0.7823, + "step": 21140 + }, + { + "epoch": 0.63, + "learning_rate": 1.8662952646239558e-05, + "loss": 0.7672, + "step": 21150 + }, + { + "epoch": 0.63, + "learning_rate": 1.8648136075386712e-05, + "loss": 1.001, + "step": 21160 + }, + { + "epoch": 0.63, + "learning_rate": 1.8633319504533873e-05, + "loss": 0.7417, + "step": 21170 + }, + { + "epoch": 0.63, + "learning_rate": 1.861850293368103e-05, + "loss": 0.8655, + "step": 21180 + }, + { + "epoch": 0.63, + "learning_rate": 1.8603686362828187e-05, + "loss": 0.7832, + "step": 21190 + }, + { + "epoch": 0.63, + "learning_rate": 1.8588869791975348e-05, + "loss": 0.924, + "step": 21200 + }, + { + "epoch": 0.63, + "learning_rate": 1.8574053221122505e-05, + "loss": 0.9696, + "step": 21210 + }, + { + "epoch": 0.63, + "learning_rate": 1.8559236650269662e-05, + "loss": 0.883, + "step": 21220 + }, + { + "epoch": 0.63, + "learning_rate": 1.8544420079416823e-05, + "loss": 0.9714, + "step": 21230 + }, + { + "epoch": 0.63, + "learning_rate": 1.8529603508563977e-05, + "loss": 0.9046, + "step": 21240 + }, + { + "epoch": 0.63, + "learning_rate": 1.8514786937711137e-05, + "loss": 0.9156, + "step": 21250 + }, + { + "epoch": 0.63, + "learning_rate": 1.8499970366858298e-05, + "loss": 0.9072, + "step": 21260 + }, + { + "epoch": 0.63, + "learning_rate": 1.848515379600545e-05, + "loss": 0.9005, + "step": 21270 + }, + { + "epoch": 0.63, + "learning_rate": 1.8470337225152612e-05, + "loss": 0.8195, + "step": 21280 + }, + { + "epoch": 0.63, + "learning_rate": 1.845552065429977e-05, + "loss": 0.8322, + "step": 21290 + }, + { + "epoch": 0.63, + "learning_rate": 1.8440704083446927e-05, + "loss": 0.9191, + "step": 21300 + }, + { + "epoch": 0.63, + "learning_rate": 1.8425887512594087e-05, + "loss": 0.8699, + "step": 21310 + }, + { + "epoch": 0.63, + "learning_rate": 1.8411070941741245e-05, + "loss": 0.9575, + "step": 21320 + }, + { + "epoch": 0.63, + "learning_rate": 1.8396254370888402e-05, + "loss": 0.8498, + "step": 21330 + }, + { + "epoch": 0.63, + "learning_rate": 1.8381437800035562e-05, + "loss": 0.8958, + "step": 21340 + }, + { + "epoch": 0.63, + "learning_rate": 1.8366621229182716e-05, + "loss": 0.864, + "step": 21350 + }, + { + "epoch": 0.63, + "learning_rate": 1.8351804658329877e-05, + "loss": 0.9615, + "step": 21360 + }, + { + "epoch": 0.63, + "learning_rate": 1.8336988087477038e-05, + "loss": 0.788, + "step": 21370 + }, + { + "epoch": 0.63, + "learning_rate": 1.832217151662419e-05, + "loss": 0.8295, + "step": 21380 + }, + { + "epoch": 0.63, + "learning_rate": 1.8307354945771352e-05, + "loss": 0.8751, + "step": 21390 + }, + { + "epoch": 0.63, + "learning_rate": 1.829253837491851e-05, + "loss": 0.8718, + "step": 21400 + }, + { + "epoch": 0.63, + "learning_rate": 1.8277721804065666e-05, + "loss": 0.8962, + "step": 21410 + }, + { + "epoch": 0.63, + "learning_rate": 1.8262905233212827e-05, + "loss": 0.8029, + "step": 21420 + }, + { + "epoch": 0.64, + "learning_rate": 1.8248088662359984e-05, + "loss": 0.9122, + "step": 21430 + }, + { + "epoch": 0.64, + "learning_rate": 1.823327209150714e-05, + "loss": 0.7332, + "step": 21440 + }, + { + "epoch": 0.64, + "learning_rate": 1.8218455520654302e-05, + "loss": 0.8847, + "step": 21450 + }, + { + "epoch": 0.64, + "learning_rate": 1.8203638949801456e-05, + "loss": 0.8831, + "step": 21460 + }, + { + "epoch": 0.64, + "learning_rate": 1.8188822378948617e-05, + "loss": 0.9425, + "step": 21470 + }, + { + "epoch": 0.64, + "learning_rate": 1.8174005808095777e-05, + "loss": 0.7179, + "step": 21480 + }, + { + "epoch": 0.64, + "learning_rate": 1.815918923724293e-05, + "loss": 0.9403, + "step": 21490 + }, + { + "epoch": 0.64, + "learning_rate": 1.8144372666390092e-05, + "loss": 0.8041, + "step": 21500 + }, + { + "epoch": 0.64, + "learning_rate": 1.812955609553725e-05, + "loss": 0.9635, + "step": 21510 + }, + { + "epoch": 0.64, + "learning_rate": 1.8114739524684406e-05, + "loss": 0.8108, + "step": 21520 + }, + { + "epoch": 0.64, + "learning_rate": 1.8099922953831567e-05, + "loss": 0.9086, + "step": 21530 + }, + { + "epoch": 0.64, + "learning_rate": 1.8085106382978724e-05, + "loss": 0.7554, + "step": 21540 + }, + { + "epoch": 0.64, + "learning_rate": 1.807028981212588e-05, + "loss": 0.999, + "step": 21550 + }, + { + "epoch": 0.64, + "learning_rate": 1.8055473241273042e-05, + "loss": 0.9111, + "step": 21560 + }, + { + "epoch": 0.64, + "learning_rate": 1.80406566704202e-05, + "loss": 1.0083, + "step": 21570 + }, + { + "epoch": 0.64, + "learning_rate": 1.8025840099567356e-05, + "loss": 0.8531, + "step": 21580 + }, + { + "epoch": 0.64, + "learning_rate": 1.8011023528714517e-05, + "loss": 0.8125, + "step": 21590 + }, + { + "epoch": 0.64, + "learning_rate": 1.799620695786167e-05, + "loss": 0.8432, + "step": 21600 + }, + { + "epoch": 0.64, + "learning_rate": 1.798139038700883e-05, + "loss": 0.9143, + "step": 21610 + }, + { + "epoch": 0.64, + "learning_rate": 1.7966573816155992e-05, + "loss": 0.8225, + "step": 21620 + }, + { + "epoch": 0.64, + "learning_rate": 1.7951757245303146e-05, + "loss": 0.8217, + "step": 21630 + }, + { + "epoch": 0.64, + "learning_rate": 1.7936940674450307e-05, + "loss": 0.8318, + "step": 21640 + }, + { + "epoch": 0.64, + "learning_rate": 1.7922124103597464e-05, + "loss": 0.8802, + "step": 21650 + }, + { + "epoch": 0.64, + "learning_rate": 1.790730753274462e-05, + "loss": 0.6746, + "step": 21660 + }, + { + "epoch": 0.64, + "learning_rate": 1.789249096189178e-05, + "loss": 0.8367, + "step": 21670 + }, + { + "epoch": 0.64, + "learning_rate": 1.787767439103894e-05, + "loss": 0.8144, + "step": 21680 + }, + { + "epoch": 0.64, + "learning_rate": 1.7862857820186096e-05, + "loss": 0.8638, + "step": 21690 + }, + { + "epoch": 0.64, + "learning_rate": 1.7848041249333257e-05, + "loss": 0.9311, + "step": 21700 + }, + { + "epoch": 0.64, + "learning_rate": 1.7833224678480414e-05, + "loss": 0.8604, + "step": 21710 + }, + { + "epoch": 0.64, + "learning_rate": 1.781840810762757e-05, + "loss": 0.7928, + "step": 21720 + }, + { + "epoch": 0.64, + "learning_rate": 1.7803591536774732e-05, + "loss": 0.7322, + "step": 21730 + }, + { + "epoch": 0.64, + "learning_rate": 1.7788774965921886e-05, + "loss": 0.824, + "step": 21740 + }, + { + "epoch": 0.64, + "learning_rate": 1.7773958395069046e-05, + "loss": 0.965, + "step": 21750 + }, + { + "epoch": 0.64, + "learning_rate": 1.7759141824216203e-05, + "loss": 0.8722, + "step": 21760 + }, + { + "epoch": 0.65, + "learning_rate": 1.774432525336336e-05, + "loss": 0.8038, + "step": 21770 + }, + { + "epoch": 0.65, + "learning_rate": 1.772950868251052e-05, + "loss": 0.8402, + "step": 21780 + }, + { + "epoch": 0.65, + "learning_rate": 1.771469211165768e-05, + "loss": 0.9165, + "step": 21790 + }, + { + "epoch": 0.65, + "learning_rate": 1.7699875540804836e-05, + "loss": 0.8432, + "step": 21800 + }, + { + "epoch": 0.65, + "learning_rate": 1.7685058969951996e-05, + "loss": 0.8241, + "step": 21810 + }, + { + "epoch": 0.65, + "learning_rate": 1.7670242399099154e-05, + "loss": 0.8838, + "step": 21820 + }, + { + "epoch": 0.65, + "learning_rate": 1.765542582824631e-05, + "loss": 0.8166, + "step": 21830 + }, + { + "epoch": 0.65, + "learning_rate": 1.764060925739347e-05, + "loss": 0.8159, + "step": 21840 + }, + { + "epoch": 0.65, + "learning_rate": 1.762579268654063e-05, + "loss": 0.8546, + "step": 21850 + }, + { + "epoch": 0.65, + "learning_rate": 1.7610976115687786e-05, + "loss": 1.0096, + "step": 21860 + }, + { + "epoch": 0.65, + "learning_rate": 1.7596159544834943e-05, + "loss": 0.821, + "step": 21870 + }, + { + "epoch": 0.65, + "learning_rate": 1.75813429739821e-05, + "loss": 0.9978, + "step": 21880 + }, + { + "epoch": 0.65, + "learning_rate": 1.756652640312926e-05, + "loss": 0.9032, + "step": 21890 + }, + { + "epoch": 0.65, + "learning_rate": 1.7551709832276418e-05, + "loss": 0.6687, + "step": 21900 + }, + { + "epoch": 0.65, + "learning_rate": 1.7536893261423576e-05, + "loss": 0.9232, + "step": 21910 + }, + { + "epoch": 0.65, + "learning_rate": 1.7522076690570736e-05, + "loss": 0.8338, + "step": 21920 + }, + { + "epoch": 0.65, + "learning_rate": 1.7507260119717893e-05, + "loss": 0.7658, + "step": 21930 + }, + { + "epoch": 0.65, + "learning_rate": 1.749244354886505e-05, + "loss": 0.7666, + "step": 21940 + }, + { + "epoch": 0.65, + "learning_rate": 1.747762697801221e-05, + "loss": 0.783, + "step": 21950 + }, + { + "epoch": 0.65, + "learning_rate": 1.746281040715937e-05, + "loss": 0.7576, + "step": 21960 + }, + { + "epoch": 0.65, + "learning_rate": 1.7447993836306526e-05, + "loss": 0.8817, + "step": 21970 + }, + { + "epoch": 0.65, + "learning_rate": 1.7433177265453683e-05, + "loss": 0.8559, + "step": 21980 + }, + { + "epoch": 0.65, + "learning_rate": 1.7418360694600844e-05, + "loss": 0.9449, + "step": 21990 + }, + { + "epoch": 0.65, + "learning_rate": 1.7403544123748e-05, + "loss": 0.9124, + "step": 22000 + }, + { + "epoch": 0.65, + "learning_rate": 1.7388727552895158e-05, + "loss": 1.0082, + "step": 22010 + }, + { + "epoch": 0.65, + "learning_rate": 1.7373910982042315e-05, + "loss": 0.7337, + "step": 22020 + }, + { + "epoch": 0.65, + "learning_rate": 1.7359094411189476e-05, + "loss": 0.8386, + "step": 22030 + }, + { + "epoch": 0.65, + "learning_rate": 1.7344277840336633e-05, + "loss": 0.825, + "step": 22040 + }, + { + "epoch": 0.65, + "learning_rate": 1.732946126948379e-05, + "loss": 0.7686, + "step": 22050 + }, + { + "epoch": 0.65, + "learning_rate": 1.731464469863095e-05, + "loss": 0.9501, + "step": 22060 + }, + { + "epoch": 0.65, + "learning_rate": 1.7299828127778108e-05, + "loss": 0.8041, + "step": 22070 + }, + { + "epoch": 0.65, + "learning_rate": 1.7285011556925265e-05, + "loss": 0.8971, + "step": 22080 + }, + { + "epoch": 0.65, + "learning_rate": 1.7270194986072426e-05, + "loss": 0.8153, + "step": 22090 + }, + { + "epoch": 0.65, + "learning_rate": 1.7255378415219583e-05, + "loss": 0.8948, + "step": 22100 + }, + { + "epoch": 0.66, + "learning_rate": 1.724056184436674e-05, + "loss": 0.9888, + "step": 22110 + }, + { + "epoch": 0.66, + "learning_rate": 1.7225745273513898e-05, + "loss": 0.7854, + "step": 22120 + }, + { + "epoch": 0.66, + "learning_rate": 1.721092870266106e-05, + "loss": 0.8631, + "step": 22130 + }, + { + "epoch": 0.66, + "learning_rate": 1.7196112131808216e-05, + "loss": 0.9226, + "step": 22140 + }, + { + "epoch": 0.66, + "learning_rate": 1.7181295560955373e-05, + "loss": 0.876, + "step": 22150 + }, + { + "epoch": 0.66, + "learning_rate": 1.716647899010253e-05, + "loss": 0.8988, + "step": 22160 + }, + { + "epoch": 0.66, + "learning_rate": 1.715166241924969e-05, + "loss": 0.8474, + "step": 22170 + }, + { + "epoch": 0.66, + "learning_rate": 1.7136845848396848e-05, + "loss": 0.9297, + "step": 22180 + }, + { + "epoch": 0.66, + "learning_rate": 1.7122029277544005e-05, + "loss": 0.8784, + "step": 22190 + }, + { + "epoch": 0.66, + "learning_rate": 1.7107212706691166e-05, + "loss": 0.7333, + "step": 22200 + }, + { + "epoch": 0.66, + "learning_rate": 1.7092396135838323e-05, + "loss": 0.889, + "step": 22210 + }, + { + "epoch": 0.66, + "learning_rate": 1.707757956498548e-05, + "loss": 0.8144, + "step": 22220 + }, + { + "epoch": 0.66, + "learning_rate": 1.7062762994132637e-05, + "loss": 0.9886, + "step": 22230 + }, + { + "epoch": 0.66, + "learning_rate": 1.7047946423279798e-05, + "loss": 0.9524, + "step": 22240 + }, + { + "epoch": 0.66, + "learning_rate": 1.7033129852426955e-05, + "loss": 0.9731, + "step": 22250 + }, + { + "epoch": 0.66, + "learning_rate": 1.7018313281574113e-05, + "loss": 0.7887, + "step": 22260 + }, + { + "epoch": 0.66, + "learning_rate": 1.7003496710721273e-05, + "loss": 0.81, + "step": 22270 + }, + { + "epoch": 0.66, + "learning_rate": 1.698868013986843e-05, + "loss": 0.9525, + "step": 22280 + }, + { + "epoch": 0.66, + "learning_rate": 1.6973863569015588e-05, + "loss": 0.772, + "step": 22290 + }, + { + "epoch": 0.66, + "learning_rate": 1.6959046998162745e-05, + "loss": 0.7318, + "step": 22300 + }, + { + "epoch": 0.66, + "learning_rate": 1.6944230427309905e-05, + "loss": 0.9601, + "step": 22310 + }, + { + "epoch": 0.66, + "learning_rate": 1.6929413856457063e-05, + "loss": 0.8658, + "step": 22320 + }, + { + "epoch": 0.66, + "learning_rate": 1.691459728560422e-05, + "loss": 0.9724, + "step": 22330 + }, + { + "epoch": 0.66, + "learning_rate": 1.6899780714751377e-05, + "loss": 0.9571, + "step": 22340 + }, + { + "epoch": 0.66, + "learning_rate": 1.6884964143898538e-05, + "loss": 0.8063, + "step": 22350 + }, + { + "epoch": 0.66, + "learning_rate": 1.6870147573045695e-05, + "loss": 0.82, + "step": 22360 + }, + { + "epoch": 0.66, + "learning_rate": 1.6855331002192852e-05, + "loss": 0.921, + "step": 22370 + }, + { + "epoch": 0.66, + "learning_rate": 1.6840514431340013e-05, + "loss": 0.8703, + "step": 22380 + }, + { + "epoch": 0.66, + "learning_rate": 1.682569786048717e-05, + "loss": 1.0329, + "step": 22390 + }, + { + "epoch": 0.66, + "learning_rate": 1.6810881289634327e-05, + "loss": 0.9818, + "step": 22400 + }, + { + "epoch": 0.66, + "learning_rate": 1.6796064718781488e-05, + "loss": 0.9763, + "step": 22410 + }, + { + "epoch": 0.66, + "learning_rate": 1.6781248147928645e-05, + "loss": 0.9212, + "step": 22420 + }, + { + "epoch": 0.66, + "learning_rate": 1.6766431577075802e-05, + "loss": 0.7776, + "step": 22430 + }, + { + "epoch": 0.66, + "learning_rate": 1.675161500622296e-05, + "loss": 0.8514, + "step": 22440 + }, + { + "epoch": 0.67, + "learning_rate": 1.6736798435370117e-05, + "loss": 1.021, + "step": 22450 + }, + { + "epoch": 0.67, + "learning_rate": 1.6721981864517278e-05, + "loss": 0.9384, + "step": 22460 + }, + { + "epoch": 0.67, + "learning_rate": 1.6707165293664435e-05, + "loss": 0.7759, + "step": 22470 + }, + { + "epoch": 0.67, + "learning_rate": 1.6692348722811592e-05, + "loss": 0.8767, + "step": 22480 + }, + { + "epoch": 0.67, + "learning_rate": 1.6677532151958753e-05, + "loss": 0.7918, + "step": 22490 + }, + { + "epoch": 0.67, + "learning_rate": 1.666271558110591e-05, + "loss": 0.8851, + "step": 22500 + }, + { + "epoch": 0.67, + "learning_rate": 1.6647899010253067e-05, + "loss": 0.9851, + "step": 22510 + }, + { + "epoch": 0.67, + "learning_rate": 1.6633082439400228e-05, + "loss": 0.9906, + "step": 22520 + }, + { + "epoch": 0.67, + "learning_rate": 1.6618265868547385e-05, + "loss": 0.9234, + "step": 22530 + }, + { + "epoch": 0.67, + "learning_rate": 1.6603449297694542e-05, + "loss": 0.8392, + "step": 22540 + }, + { + "epoch": 0.67, + "learning_rate": 1.6588632726841703e-05, + "loss": 0.8439, + "step": 22550 + }, + { + "epoch": 0.67, + "learning_rate": 1.6573816155988857e-05, + "loss": 0.8402, + "step": 22560 + }, + { + "epoch": 0.67, + "learning_rate": 1.6558999585136017e-05, + "loss": 0.7782, + "step": 22570 + }, + { + "epoch": 0.67, + "learning_rate": 1.6544183014283174e-05, + "loss": 0.9362, + "step": 22580 + }, + { + "epoch": 0.67, + "learning_rate": 1.6529366443430332e-05, + "loss": 0.8373, + "step": 22590 + }, + { + "epoch": 0.67, + "learning_rate": 1.6514549872577492e-05, + "loss": 0.8784, + "step": 22600 + }, + { + "epoch": 0.67, + "learning_rate": 1.649973330172465e-05, + "loss": 0.678, + "step": 22610 + }, + { + "epoch": 0.67, + "learning_rate": 1.6484916730871807e-05, + "loss": 0.7156, + "step": 22620 + }, + { + "epoch": 0.67, + "learning_rate": 1.6470100160018967e-05, + "loss": 0.7588, + "step": 22630 + }, + { + "epoch": 0.67, + "learning_rate": 1.6455283589166125e-05, + "loss": 0.9054, + "step": 22640 + }, + { + "epoch": 0.67, + "learning_rate": 1.6440467018313282e-05, + "loss": 0.7793, + "step": 22650 + }, + { + "epoch": 0.67, + "learning_rate": 1.6425650447460443e-05, + "loss": 0.7883, + "step": 22660 + }, + { + "epoch": 0.67, + "learning_rate": 1.64108338766076e-05, + "loss": 0.8559, + "step": 22670 + }, + { + "epoch": 0.67, + "learning_rate": 1.6396017305754757e-05, + "loss": 0.7894, + "step": 22680 + }, + { + "epoch": 0.67, + "learning_rate": 1.6381200734901918e-05, + "loss": 0.7528, + "step": 22690 + }, + { + "epoch": 0.67, + "learning_rate": 1.636638416404907e-05, + "loss": 0.8207, + "step": 22700 + }, + { + "epoch": 0.67, + "learning_rate": 1.6351567593196232e-05, + "loss": 1.0498, + "step": 22710 + }, + { + "epoch": 0.67, + "learning_rate": 1.633675102234339e-05, + "loss": 0.7424, + "step": 22720 + }, + { + "epoch": 0.67, + "learning_rate": 1.6321934451490547e-05, + "loss": 1.0651, + "step": 22730 + }, + { + "epoch": 0.67, + "learning_rate": 1.6307117880637707e-05, + "loss": 0.8204, + "step": 22740 + }, + { + "epoch": 0.67, + "learning_rate": 1.6292301309784864e-05, + "loss": 0.8968, + "step": 22750 + }, + { + "epoch": 0.67, + "learning_rate": 1.627748473893202e-05, + "loss": 0.8417, + "step": 22760 + }, + { + "epoch": 0.67, + "learning_rate": 1.6262668168079182e-05, + "loss": 0.7999, + "step": 22770 + }, + { + "epoch": 0.68, + "learning_rate": 1.624785159722634e-05, + "loss": 0.8191, + "step": 22780 + }, + { + "epoch": 0.68, + "learning_rate": 1.6233035026373497e-05, + "loss": 0.8413, + "step": 22790 + }, + { + "epoch": 0.68, + "learning_rate": 1.6218218455520657e-05, + "loss": 0.9028, + "step": 22800 + }, + { + "epoch": 0.68, + "learning_rate": 1.620340188466781e-05, + "loss": 0.9951, + "step": 22810 + }, + { + "epoch": 0.68, + "learning_rate": 1.6188585313814972e-05, + "loss": 0.8284, + "step": 22820 + }, + { + "epoch": 0.68, + "learning_rate": 1.6173768742962132e-05, + "loss": 0.8921, + "step": 22830 + }, + { + "epoch": 0.68, + "learning_rate": 1.6158952172109286e-05, + "loss": 0.7294, + "step": 22840 + }, + { + "epoch": 0.68, + "learning_rate": 1.6144135601256447e-05, + "loss": 0.892, + "step": 22850 + }, + { + "epoch": 0.68, + "learning_rate": 1.6129319030403604e-05, + "loss": 0.7497, + "step": 22860 + }, + { + "epoch": 0.68, + "learning_rate": 1.611450245955076e-05, + "loss": 0.8052, + "step": 22870 + }, + { + "epoch": 0.68, + "learning_rate": 1.6099685888697922e-05, + "loss": 0.7653, + "step": 22880 + }, + { + "epoch": 0.68, + "learning_rate": 1.608486931784508e-05, + "loss": 1.0207, + "step": 22890 + }, + { + "epoch": 0.68, + "learning_rate": 1.6070052746992236e-05, + "loss": 0.7666, + "step": 22900 + }, + { + "epoch": 0.68, + "learning_rate": 1.6055236176139397e-05, + "loss": 0.7809, + "step": 22910 + }, + { + "epoch": 0.68, + "learning_rate": 1.604041960528655e-05, + "loss": 0.8804, + "step": 22920 + }, + { + "epoch": 0.68, + "learning_rate": 1.602560303443371e-05, + "loss": 0.8108, + "step": 22930 + }, + { + "epoch": 0.68, + "learning_rate": 1.6010786463580872e-05, + "loss": 0.8421, + "step": 22940 + }, + { + "epoch": 0.68, + "learning_rate": 1.5995969892728026e-05, + "loss": 0.9035, + "step": 22950 + }, + { + "epoch": 0.68, + "learning_rate": 1.5981153321875187e-05, + "loss": 0.7245, + "step": 22960 + }, + { + "epoch": 0.68, + "learning_rate": 1.5966336751022344e-05, + "loss": 0.8261, + "step": 22970 + }, + { + "epoch": 0.68, + "learning_rate": 1.59515201801695e-05, + "loss": 0.9342, + "step": 22980 + }, + { + "epoch": 0.68, + "learning_rate": 1.593670360931666e-05, + "loss": 0.7752, + "step": 22990 + }, + { + "epoch": 0.68, + "learning_rate": 1.592188703846382e-05, + "loss": 0.8349, + "step": 23000 + }, + { + "epoch": 0.68, + "learning_rate": 1.5907070467610976e-05, + "loss": 0.7854, + "step": 23010 + }, + { + "epoch": 0.68, + "learning_rate": 1.5892253896758137e-05, + "loss": 0.866, + "step": 23020 + }, + { + "epoch": 0.68, + "learning_rate": 1.587743732590529e-05, + "loss": 0.8379, + "step": 23030 + }, + { + "epoch": 0.68, + "learning_rate": 1.586262075505245e-05, + "loss": 0.771, + "step": 23040 + }, + { + "epoch": 0.68, + "learning_rate": 1.5847804184199612e-05, + "loss": 0.7034, + "step": 23050 + }, + { + "epoch": 0.68, + "learning_rate": 1.5832987613346766e-05, + "loss": 0.8434, + "step": 23060 + }, + { + "epoch": 0.68, + "learning_rate": 1.5818171042493926e-05, + "loss": 0.7547, + "step": 23070 + }, + { + "epoch": 0.68, + "learning_rate": 1.5803354471641087e-05, + "loss": 0.8964, + "step": 23080 + }, + { + "epoch": 0.68, + "learning_rate": 1.578853790078824e-05, + "loss": 0.8574, + "step": 23090 + }, + { + "epoch": 0.68, + "learning_rate": 1.57737213299354e-05, + "loss": 0.8381, + "step": 23100 + }, + { + "epoch": 0.68, + "learning_rate": 1.575890475908256e-05, + "loss": 0.8343, + "step": 23110 + }, + { + "epoch": 0.69, + "learning_rate": 1.5744088188229716e-05, + "loss": 0.8274, + "step": 23120 + }, + { + "epoch": 0.69, + "learning_rate": 1.5729271617376876e-05, + "loss": 0.8053, + "step": 23130 + }, + { + "epoch": 0.69, + "learning_rate": 1.571445504652403e-05, + "loss": 0.8456, + "step": 23140 + }, + { + "epoch": 0.69, + "learning_rate": 1.569963847567119e-05, + "loss": 0.969, + "step": 23150 + }, + { + "epoch": 0.69, + "learning_rate": 1.568482190481835e-05, + "loss": 0.8612, + "step": 23160 + }, + { + "epoch": 0.69, + "learning_rate": 1.5670005333965505e-05, + "loss": 0.8344, + "step": 23170 + }, + { + "epoch": 0.69, + "learning_rate": 1.5655188763112666e-05, + "loss": 0.9009, + "step": 23180 + }, + { + "epoch": 0.69, + "learning_rate": 1.5640372192259827e-05, + "loss": 0.9595, + "step": 23190 + }, + { + "epoch": 0.69, + "learning_rate": 1.562555562140698e-05, + "loss": 0.8761, + "step": 23200 + }, + { + "epoch": 0.69, + "learning_rate": 1.561073905055414e-05, + "loss": 0.8111, + "step": 23210 + }, + { + "epoch": 0.69, + "learning_rate": 1.55959224797013e-05, + "loss": 0.8634, + "step": 23220 + }, + { + "epoch": 0.69, + "learning_rate": 1.5581105908848456e-05, + "loss": 0.835, + "step": 23230 + }, + { + "epoch": 0.69, + "learning_rate": 1.5566289337995616e-05, + "loss": 0.9587, + "step": 23240 + }, + { + "epoch": 0.69, + "learning_rate": 1.5551472767142773e-05, + "loss": 0.7974, + "step": 23250 + }, + { + "epoch": 0.69, + "learning_rate": 1.553665619628993e-05, + "loss": 0.7252, + "step": 23260 + }, + { + "epoch": 0.69, + "learning_rate": 1.552183962543709e-05, + "loss": 1.0557, + "step": 23270 + }, + { + "epoch": 0.69, + "learning_rate": 1.5507023054584245e-05, + "loss": 1.0059, + "step": 23280 + }, + { + "epoch": 0.69, + "learning_rate": 1.5492206483731406e-05, + "loss": 0.8877, + "step": 23290 + }, + { + "epoch": 0.69, + "learning_rate": 1.5477389912878566e-05, + "loss": 0.8916, + "step": 23300 + }, + { + "epoch": 0.69, + "learning_rate": 1.546257334202572e-05, + "loss": 0.8149, + "step": 23310 + }, + { + "epoch": 0.69, + "learning_rate": 1.544775677117288e-05, + "loss": 0.8561, + "step": 23320 + }, + { + "epoch": 0.69, + "learning_rate": 1.5432940200320038e-05, + "loss": 0.8927, + "step": 23330 + }, + { + "epoch": 0.69, + "learning_rate": 1.5418123629467195e-05, + "loss": 0.7797, + "step": 23340 + }, + { + "epoch": 0.69, + "learning_rate": 1.5403307058614356e-05, + "loss": 0.7539, + "step": 23350 + }, + { + "epoch": 0.69, + "learning_rate": 1.5388490487761513e-05, + "loss": 0.9578, + "step": 23360 + }, + { + "epoch": 0.69, + "learning_rate": 1.537367391690867e-05, + "loss": 0.9525, + "step": 23370 + }, + { + "epoch": 0.69, + "learning_rate": 1.535885734605583e-05, + "loss": 0.8471, + "step": 23380 + }, + { + "epoch": 0.69, + "learning_rate": 1.5344040775202988e-05, + "loss": 0.864, + "step": 23390 + }, + { + "epoch": 0.69, + "learning_rate": 1.5329224204350145e-05, + "loss": 0.8827, + "step": 23400 + }, + { + "epoch": 0.69, + "learning_rate": 1.5314407633497306e-05, + "loss": 0.8778, + "step": 23410 + }, + { + "epoch": 0.69, + "learning_rate": 1.529959106264446e-05, + "loss": 0.8729, + "step": 23420 + }, + { + "epoch": 0.69, + "learning_rate": 1.528477449179162e-05, + "loss": 0.954, + "step": 23430 + }, + { + "epoch": 0.69, + "learning_rate": 1.5269957920938778e-05, + "loss": 0.7923, + "step": 23440 + }, + { + "epoch": 0.69, + "learning_rate": 1.5255141350085935e-05, + "loss": 0.8829, + "step": 23450 + }, + { + "epoch": 0.7, + "learning_rate": 1.5240324779233096e-05, + "loss": 0.865, + "step": 23460 + }, + { + "epoch": 0.7, + "learning_rate": 1.5225508208380255e-05, + "loss": 0.8871, + "step": 23470 + }, + { + "epoch": 0.7, + "learning_rate": 1.521069163752741e-05, + "loss": 0.8585, + "step": 23480 + }, + { + "epoch": 0.7, + "learning_rate": 1.5195875066674569e-05, + "loss": 0.9678, + "step": 23490 + }, + { + "epoch": 0.7, + "learning_rate": 1.518105849582173e-05, + "loss": 0.8421, + "step": 23500 + }, + { + "epoch": 0.7, + "learning_rate": 1.5166241924968885e-05, + "loss": 0.7487, + "step": 23510 + }, + { + "epoch": 0.7, + "learning_rate": 1.5151425354116044e-05, + "loss": 0.86, + "step": 23520 + }, + { + "epoch": 0.7, + "learning_rate": 1.5136608783263203e-05, + "loss": 0.8443, + "step": 23530 + }, + { + "epoch": 0.7, + "learning_rate": 1.512179221241036e-05, + "loss": 0.8046, + "step": 23540 + }, + { + "epoch": 0.7, + "learning_rate": 1.510697564155752e-05, + "loss": 0.9261, + "step": 23550 + }, + { + "epoch": 0.7, + "learning_rate": 1.5092159070704676e-05, + "loss": 0.8575, + "step": 23560 + }, + { + "epoch": 0.7, + "learning_rate": 1.5077342499851835e-05, + "loss": 0.8521, + "step": 23570 + }, + { + "epoch": 0.7, + "learning_rate": 1.5062525928998994e-05, + "loss": 0.8765, + "step": 23580 + }, + { + "epoch": 0.7, + "learning_rate": 1.504770935814615e-05, + "loss": 0.9086, + "step": 23590 + }, + { + "epoch": 0.7, + "learning_rate": 1.5032892787293309e-05, + "loss": 0.8513, + "step": 23600 + }, + { + "epoch": 0.7, + "learning_rate": 1.501807621644047e-05, + "loss": 0.9946, + "step": 23610 + }, + { + "epoch": 0.7, + "learning_rate": 1.5003259645587625e-05, + "loss": 0.9462, + "step": 23620 + }, + { + "epoch": 0.7, + "learning_rate": 1.4988443074734784e-05, + "loss": 0.8317, + "step": 23630 + }, + { + "epoch": 0.7, + "learning_rate": 1.4973626503881943e-05, + "loss": 0.9471, + "step": 23640 + }, + { + "epoch": 0.7, + "learning_rate": 1.49588099330291e-05, + "loss": 0.8202, + "step": 23650 + }, + { + "epoch": 0.7, + "learning_rate": 1.4943993362176259e-05, + "loss": 0.8416, + "step": 23660 + }, + { + "epoch": 0.7, + "learning_rate": 1.4929176791323418e-05, + "loss": 0.8742, + "step": 23670 + }, + { + "epoch": 0.7, + "learning_rate": 1.4914360220470575e-05, + "loss": 0.787, + "step": 23680 + }, + { + "epoch": 0.7, + "learning_rate": 1.4899543649617734e-05, + "loss": 0.8054, + "step": 23690 + }, + { + "epoch": 0.7, + "learning_rate": 1.488472707876489e-05, + "loss": 0.8448, + "step": 23700 + }, + { + "epoch": 0.7, + "learning_rate": 1.4869910507912048e-05, + "loss": 0.7907, + "step": 23710 + }, + { + "epoch": 0.7, + "learning_rate": 1.4855093937059209e-05, + "loss": 0.7657, + "step": 23720 + }, + { + "epoch": 0.7, + "learning_rate": 1.4840277366206365e-05, + "loss": 0.7114, + "step": 23730 + }, + { + "epoch": 0.7, + "learning_rate": 1.4825460795353524e-05, + "loss": 0.9068, + "step": 23740 + }, + { + "epoch": 0.7, + "learning_rate": 1.4810644224500682e-05, + "loss": 0.7937, + "step": 23750 + }, + { + "epoch": 0.7, + "learning_rate": 1.479582765364784e-05, + "loss": 0.7901, + "step": 23760 + }, + { + "epoch": 0.7, + "learning_rate": 1.4781011082794999e-05, + "loss": 0.7857, + "step": 23770 + }, + { + "epoch": 0.7, + "learning_rate": 1.4766194511942158e-05, + "loss": 0.8562, + "step": 23780 + }, + { + "epoch": 0.7, + "learning_rate": 1.4751377941089315e-05, + "loss": 0.7484, + "step": 23790 + }, + { + "epoch": 0.71, + "learning_rate": 1.4736561370236474e-05, + "loss": 0.8844, + "step": 23800 + }, + { + "epoch": 0.71, + "learning_rate": 1.4721744799383633e-05, + "loss": 0.9456, + "step": 23810 + }, + { + "epoch": 0.71, + "learning_rate": 1.470692822853079e-05, + "loss": 0.8661, + "step": 23820 + }, + { + "epoch": 0.71, + "learning_rate": 1.4692111657677949e-05, + "loss": 0.8893, + "step": 23830 + }, + { + "epoch": 0.71, + "learning_rate": 1.4677295086825104e-05, + "loss": 0.8261, + "step": 23840 + }, + { + "epoch": 0.71, + "learning_rate": 1.4662478515972263e-05, + "loss": 0.9312, + "step": 23850 + }, + { + "epoch": 0.71, + "learning_rate": 1.4647661945119422e-05, + "loss": 0.764, + "step": 23860 + }, + { + "epoch": 0.71, + "learning_rate": 1.463284537426658e-05, + "loss": 0.7764, + "step": 23870 + }, + { + "epoch": 0.71, + "learning_rate": 1.4618028803413738e-05, + "loss": 0.809, + "step": 23880 + }, + { + "epoch": 0.71, + "learning_rate": 1.4603212232560897e-05, + "loss": 0.8024, + "step": 23890 + }, + { + "epoch": 0.71, + "learning_rate": 1.4588395661708055e-05, + "loss": 1.019, + "step": 23900 + }, + { + "epoch": 0.71, + "learning_rate": 1.4573579090855213e-05, + "loss": 0.8105, + "step": 23910 + }, + { + "epoch": 0.71, + "learning_rate": 1.4558762520002372e-05, + "loss": 0.8173, + "step": 23920 + }, + { + "epoch": 0.71, + "learning_rate": 1.454394594914953e-05, + "loss": 0.8577, + "step": 23930 + }, + { + "epoch": 0.71, + "learning_rate": 1.4529129378296689e-05, + "loss": 0.86, + "step": 23940 + }, + { + "epoch": 0.71, + "learning_rate": 1.4514312807443847e-05, + "loss": 0.8168, + "step": 23950 + }, + { + "epoch": 0.71, + "learning_rate": 1.4499496236591003e-05, + "loss": 0.7918, + "step": 23960 + }, + { + "epoch": 0.71, + "learning_rate": 1.4484679665738164e-05, + "loss": 0.8372, + "step": 23970 + }, + { + "epoch": 0.71, + "learning_rate": 1.446986309488532e-05, + "loss": 0.8965, + "step": 23980 + }, + { + "epoch": 0.71, + "learning_rate": 1.4455046524032478e-05, + "loss": 0.7732, + "step": 23990 + }, + { + "epoch": 0.71, + "learning_rate": 1.4440229953179637e-05, + "loss": 0.9507, + "step": 24000 + }, + { + "epoch": 0.71, + "learning_rate": 1.4425413382326794e-05, + "loss": 0.8227, + "step": 24010 + }, + { + "epoch": 0.71, + "learning_rate": 1.4410596811473953e-05, + "loss": 0.7898, + "step": 24020 + }, + { + "epoch": 0.71, + "learning_rate": 1.4395780240621112e-05, + "loss": 0.7134, + "step": 24030 + }, + { + "epoch": 0.71, + "learning_rate": 1.438096366976827e-05, + "loss": 0.7505, + "step": 24040 + }, + { + "epoch": 0.71, + "learning_rate": 1.4366147098915428e-05, + "loss": 0.8048, + "step": 24050 + }, + { + "epoch": 0.71, + "learning_rate": 1.4351330528062587e-05, + "loss": 0.7904, + "step": 24060 + }, + { + "epoch": 0.71, + "learning_rate": 1.4336513957209743e-05, + "loss": 0.837, + "step": 24070 + }, + { + "epoch": 0.71, + "learning_rate": 1.4321697386356903e-05, + "loss": 0.9069, + "step": 24080 + }, + { + "epoch": 0.71, + "learning_rate": 1.4306880815504062e-05, + "loss": 0.9193, + "step": 24090 + }, + { + "epoch": 0.71, + "learning_rate": 1.4292064244651218e-05, + "loss": 0.7904, + "step": 24100 + }, + { + "epoch": 0.71, + "learning_rate": 1.4277247673798377e-05, + "loss": 0.811, + "step": 24110 + }, + { + "epoch": 0.71, + "learning_rate": 1.4262431102945534e-05, + "loss": 0.6747, + "step": 24120 + }, + { + "epoch": 0.72, + "learning_rate": 1.4247614532092693e-05, + "loss": 0.8461, + "step": 24130 + }, + { + "epoch": 0.72, + "learning_rate": 1.4232797961239852e-05, + "loss": 0.9374, + "step": 24140 + }, + { + "epoch": 0.72, + "learning_rate": 1.4217981390387009e-05, + "loss": 0.8721, + "step": 24150 + }, + { + "epoch": 0.72, + "learning_rate": 1.4203164819534168e-05, + "loss": 0.8494, + "step": 24160 + }, + { + "epoch": 0.72, + "learning_rate": 1.4188348248681327e-05, + "loss": 0.867, + "step": 24170 + }, + { + "epoch": 0.72, + "learning_rate": 1.4173531677828482e-05, + "loss": 0.7724, + "step": 24180 + }, + { + "epoch": 0.72, + "learning_rate": 1.4158715106975643e-05, + "loss": 0.8602, + "step": 24190 + }, + { + "epoch": 0.72, + "learning_rate": 1.4143898536122802e-05, + "loss": 0.8801, + "step": 24200 + }, + { + "epoch": 0.72, + "learning_rate": 1.4129081965269958e-05, + "loss": 0.9309, + "step": 24210 + }, + { + "epoch": 0.72, + "learning_rate": 1.4114265394417116e-05, + "loss": 0.7233, + "step": 24220 + }, + { + "epoch": 0.72, + "learning_rate": 1.4099448823564277e-05, + "loss": 0.8141, + "step": 24230 + }, + { + "epoch": 0.72, + "learning_rate": 1.4084632252711433e-05, + "loss": 0.9368, + "step": 24240 + }, + { + "epoch": 0.72, + "learning_rate": 1.4069815681858592e-05, + "loss": 1.0427, + "step": 24250 + }, + { + "epoch": 0.72, + "learning_rate": 1.4054999111005749e-05, + "loss": 0.7932, + "step": 24260 + }, + { + "epoch": 0.72, + "learning_rate": 1.4040182540152908e-05, + "loss": 0.7997, + "step": 24270 + }, + { + "epoch": 0.72, + "learning_rate": 1.4025365969300067e-05, + "loss": 0.7913, + "step": 24280 + }, + { + "epoch": 0.72, + "learning_rate": 1.4010549398447224e-05, + "loss": 0.6766, + "step": 24290 + }, + { + "epoch": 0.72, + "learning_rate": 1.3995732827594383e-05, + "loss": 0.9067, + "step": 24300 + }, + { + "epoch": 0.72, + "learning_rate": 1.3980916256741542e-05, + "loss": 0.8415, + "step": 24310 + }, + { + "epoch": 0.72, + "learning_rate": 1.3966099685888697e-05, + "loss": 0.7203, + "step": 24320 + }, + { + "epoch": 0.72, + "learning_rate": 1.3951283115035856e-05, + "loss": 0.7764, + "step": 24330 + }, + { + "epoch": 0.72, + "learning_rate": 1.3936466544183017e-05, + "loss": 0.8258, + "step": 24340 + }, + { + "epoch": 0.72, + "learning_rate": 1.3921649973330172e-05, + "loss": 0.8167, + "step": 24350 + }, + { + "epoch": 0.72, + "learning_rate": 1.3906833402477331e-05, + "loss": 0.8011, + "step": 24360 + }, + { + "epoch": 0.72, + "learning_rate": 1.389201683162449e-05, + "loss": 0.8363, + "step": 24370 + }, + { + "epoch": 0.72, + "learning_rate": 1.3877200260771647e-05, + "loss": 0.7716, + "step": 24380 + }, + { + "epoch": 0.72, + "learning_rate": 1.3862383689918806e-05, + "loss": 0.8876, + "step": 24390 + }, + { + "epoch": 0.72, + "learning_rate": 1.3847567119065964e-05, + "loss": 0.7226, + "step": 24400 + }, + { + "epoch": 0.72, + "learning_rate": 1.3832750548213123e-05, + "loss": 0.7654, + "step": 24410 + }, + { + "epoch": 0.72, + "learning_rate": 1.3817933977360281e-05, + "loss": 0.8115, + "step": 24420 + }, + { + "epoch": 0.72, + "learning_rate": 1.3803117406507437e-05, + "loss": 0.8248, + "step": 24430 + }, + { + "epoch": 0.72, + "learning_rate": 1.3788300835654596e-05, + "loss": 0.8159, + "step": 24440 + }, + { + "epoch": 0.72, + "learning_rate": 1.3773484264801757e-05, + "loss": 0.7409, + "step": 24450 + }, + { + "epoch": 0.72, + "learning_rate": 1.3758667693948912e-05, + "loss": 0.8088, + "step": 24460 + }, + { + "epoch": 0.73, + "learning_rate": 1.3743851123096071e-05, + "loss": 0.7538, + "step": 24470 + }, + { + "epoch": 0.73, + "learning_rate": 1.372903455224323e-05, + "loss": 0.9526, + "step": 24480 + }, + { + "epoch": 0.73, + "learning_rate": 1.3714217981390387e-05, + "loss": 0.9346, + "step": 24490 + }, + { + "epoch": 0.73, + "learning_rate": 1.3699401410537546e-05, + "loss": 0.955, + "step": 24500 + }, + { + "epoch": 0.73, + "learning_rate": 1.3684584839684705e-05, + "loss": 0.8281, + "step": 24510 + }, + { + "epoch": 0.73, + "learning_rate": 1.3669768268831862e-05, + "loss": 0.7606, + "step": 24520 + }, + { + "epoch": 0.73, + "learning_rate": 1.3654951697979021e-05, + "loss": 0.8452, + "step": 24530 + }, + { + "epoch": 0.73, + "learning_rate": 1.3640135127126177e-05, + "loss": 0.7429, + "step": 24540 + }, + { + "epoch": 0.73, + "learning_rate": 1.3625318556273337e-05, + "loss": 0.8333, + "step": 24550 + }, + { + "epoch": 0.73, + "learning_rate": 1.3610501985420496e-05, + "loss": 0.8538, + "step": 24560 + }, + { + "epoch": 0.73, + "learning_rate": 1.3595685414567652e-05, + "loss": 0.8278, + "step": 24570 + }, + { + "epoch": 0.73, + "learning_rate": 1.358086884371481e-05, + "loss": 0.8386, + "step": 24580 + }, + { + "epoch": 0.73, + "learning_rate": 1.356605227286197e-05, + "loss": 0.9109, + "step": 24590 + }, + { + "epoch": 0.73, + "learning_rate": 1.3551235702009127e-05, + "loss": 0.66, + "step": 24600 + }, + { + "epoch": 0.73, + "learning_rate": 1.3536419131156286e-05, + "loss": 0.9268, + "step": 24610 + }, + { + "epoch": 0.73, + "learning_rate": 1.3521602560303445e-05, + "loss": 0.7134, + "step": 24620 + }, + { + "epoch": 0.73, + "learning_rate": 1.3506785989450602e-05, + "loss": 0.6895, + "step": 24630 + }, + { + "epoch": 0.73, + "learning_rate": 1.3491969418597761e-05, + "loss": 0.8673, + "step": 24640 + }, + { + "epoch": 0.73, + "learning_rate": 1.347715284774492e-05, + "loss": 0.7157, + "step": 24650 + }, + { + "epoch": 0.73, + "learning_rate": 1.3462336276892077e-05, + "loss": 0.9058, + "step": 24660 + }, + { + "epoch": 0.73, + "learning_rate": 1.3447519706039236e-05, + "loss": 1.0182, + "step": 24670 + }, + { + "epoch": 0.73, + "learning_rate": 1.3432703135186391e-05, + "loss": 0.8496, + "step": 24680 + }, + { + "epoch": 0.73, + "learning_rate": 1.341788656433355e-05, + "loss": 0.7644, + "step": 24690 + }, + { + "epoch": 0.73, + "learning_rate": 1.3403069993480711e-05, + "loss": 0.9253, + "step": 24700 + }, + { + "epoch": 0.73, + "learning_rate": 1.3388253422627867e-05, + "loss": 0.873, + "step": 24710 + }, + { + "epoch": 0.73, + "learning_rate": 1.3373436851775026e-05, + "loss": 0.7271, + "step": 24720 + }, + { + "epoch": 0.73, + "learning_rate": 1.3358620280922184e-05, + "loss": 0.8746, + "step": 24730 + }, + { + "epoch": 0.73, + "learning_rate": 1.3343803710069342e-05, + "loss": 0.83, + "step": 24740 + }, + { + "epoch": 0.73, + "learning_rate": 1.33289871392165e-05, + "loss": 0.7188, + "step": 24750 + }, + { + "epoch": 0.73, + "learning_rate": 1.331417056836366e-05, + "loss": 0.6843, + "step": 24760 + }, + { + "epoch": 0.73, + "learning_rate": 1.3299353997510817e-05, + "loss": 0.8361, + "step": 24770 + }, + { + "epoch": 0.73, + "learning_rate": 1.3284537426657976e-05, + "loss": 0.9013, + "step": 24780 + }, + { + "epoch": 0.73, + "learning_rate": 1.3269720855805135e-05, + "loss": 0.9251, + "step": 24790 + }, + { + "epoch": 0.73, + "learning_rate": 1.325490428495229e-05, + "loss": 0.7374, + "step": 24800 + }, + { + "epoch": 0.74, + "learning_rate": 1.324008771409945e-05, + "loss": 0.9033, + "step": 24810 + }, + { + "epoch": 0.74, + "learning_rate": 1.3225271143246606e-05, + "loss": 0.7938, + "step": 24820 + }, + { + "epoch": 0.74, + "learning_rate": 1.3210454572393765e-05, + "loss": 0.8459, + "step": 24830 + }, + { + "epoch": 0.74, + "learning_rate": 1.3195638001540924e-05, + "loss": 0.7765, + "step": 24840 + }, + { + "epoch": 0.74, + "learning_rate": 1.3180821430688081e-05, + "loss": 0.7869, + "step": 24850 + }, + { + "epoch": 0.74, + "learning_rate": 1.316600485983524e-05, + "loss": 0.8801, + "step": 24860 + }, + { + "epoch": 0.74, + "learning_rate": 1.31511882889824e-05, + "loss": 0.9698, + "step": 24870 + }, + { + "epoch": 0.74, + "learning_rate": 1.3136371718129556e-05, + "loss": 0.8312, + "step": 24880 + }, + { + "epoch": 0.74, + "learning_rate": 1.3121555147276715e-05, + "loss": 0.9598, + "step": 24890 + }, + { + "epoch": 0.74, + "learning_rate": 1.3106738576423874e-05, + "loss": 0.8063, + "step": 24900 + }, + { + "epoch": 0.74, + "learning_rate": 1.309192200557103e-05, + "loss": 0.964, + "step": 24910 + }, + { + "epoch": 0.74, + "learning_rate": 1.307710543471819e-05, + "loss": 0.7865, + "step": 24920 + }, + { + "epoch": 0.74, + "learning_rate": 1.306228886386535e-05, + "loss": 0.7829, + "step": 24930 + }, + { + "epoch": 0.74, + "learning_rate": 1.3047472293012505e-05, + "loss": 0.8451, + "step": 24940 + }, + { + "epoch": 0.74, + "learning_rate": 1.3032655722159664e-05, + "loss": 0.7222, + "step": 24950 + }, + { + "epoch": 0.74, + "learning_rate": 1.3017839151306821e-05, + "loss": 0.7559, + "step": 24960 + }, + { + "epoch": 0.74, + "learning_rate": 1.300302258045398e-05, + "loss": 0.9221, + "step": 24970 + }, + { + "epoch": 0.74, + "learning_rate": 1.2988206009601139e-05, + "loss": 0.809, + "step": 24980 + }, + { + "epoch": 0.74, + "learning_rate": 1.2973389438748296e-05, + "loss": 0.7493, + "step": 24990 + }, + { + "epoch": 0.74, + "learning_rate": 1.2958572867895455e-05, + "loss": 0.9133, + "step": 25000 + }, + { + "epoch": 0.74, + "learning_rate": 1.2943756297042614e-05, + "loss": 0.796, + "step": 25010 + }, + { + "epoch": 0.74, + "learning_rate": 1.2928939726189771e-05, + "loss": 0.7717, + "step": 25020 + }, + { + "epoch": 0.74, + "learning_rate": 1.291412315533693e-05, + "loss": 0.954, + "step": 25030 + }, + { + "epoch": 0.74, + "learning_rate": 1.2899306584484089e-05, + "loss": 0.803, + "step": 25040 + }, + { + "epoch": 0.74, + "learning_rate": 1.2884490013631245e-05, + "loss": 0.7036, + "step": 25050 + }, + { + "epoch": 0.74, + "learning_rate": 1.2869673442778404e-05, + "loss": 0.8272, + "step": 25060 + }, + { + "epoch": 0.74, + "learning_rate": 1.2854856871925564e-05, + "loss": 0.8529, + "step": 25070 + }, + { + "epoch": 0.74, + "learning_rate": 1.284004030107272e-05, + "loss": 0.9032, + "step": 25080 + }, + { + "epoch": 0.74, + "learning_rate": 1.2825223730219879e-05, + "loss": 0.8864, + "step": 25090 + }, + { + "epoch": 0.74, + "learning_rate": 1.2810407159367036e-05, + "loss": 0.7249, + "step": 25100 + }, + { + "epoch": 0.74, + "learning_rate": 1.2795590588514195e-05, + "loss": 0.9738, + "step": 25110 + }, + { + "epoch": 0.74, + "learning_rate": 1.2780774017661354e-05, + "loss": 0.8423, + "step": 25120 + }, + { + "epoch": 0.74, + "learning_rate": 1.2765957446808511e-05, + "loss": 0.951, + "step": 25130 + }, + { + "epoch": 0.74, + "learning_rate": 1.275114087595567e-05, + "loss": 0.8822, + "step": 25140 + }, + { + "epoch": 0.75, + "learning_rate": 1.2736324305102829e-05, + "loss": 0.9904, + "step": 25150 + }, + { + "epoch": 0.75, + "learning_rate": 1.2721507734249984e-05, + "loss": 0.9531, + "step": 25160 + }, + { + "epoch": 0.75, + "learning_rate": 1.2706691163397143e-05, + "loss": 0.7682, + "step": 25170 + }, + { + "epoch": 0.75, + "learning_rate": 1.2691874592544304e-05, + "loss": 0.8224, + "step": 25180 + }, + { + "epoch": 0.75, + "learning_rate": 1.267705802169146e-05, + "loss": 0.8131, + "step": 25190 + }, + { + "epoch": 0.75, + "learning_rate": 1.2662241450838618e-05, + "loss": 0.877, + "step": 25200 + }, + { + "epoch": 0.75, + "learning_rate": 1.2647424879985777e-05, + "loss": 0.8206, + "step": 25210 + }, + { + "epoch": 0.75, + "learning_rate": 1.2632608309132935e-05, + "loss": 0.8753, + "step": 25220 + }, + { + "epoch": 0.75, + "learning_rate": 1.2617791738280093e-05, + "loss": 0.8928, + "step": 25230 + }, + { + "epoch": 0.75, + "learning_rate": 1.260297516742725e-05, + "loss": 0.8714, + "step": 25240 + }, + { + "epoch": 0.75, + "learning_rate": 1.258815859657441e-05, + "loss": 0.7754, + "step": 25250 + }, + { + "epoch": 0.75, + "learning_rate": 1.2573342025721569e-05, + "loss": 1.0292, + "step": 25260 + }, + { + "epoch": 0.75, + "learning_rate": 1.2558525454868724e-05, + "loss": 0.8371, + "step": 25270 + }, + { + "epoch": 0.75, + "learning_rate": 1.2543708884015885e-05, + "loss": 0.9046, + "step": 25280 + }, + { + "epoch": 0.75, + "learning_rate": 1.2528892313163044e-05, + "loss": 0.729, + "step": 25290 + }, + { + "epoch": 0.75, + "learning_rate": 1.25140757423102e-05, + "loss": 0.7027, + "step": 25300 + }, + { + "epoch": 0.75, + "learning_rate": 1.2499259171457358e-05, + "loss": 0.7705, + "step": 25310 + }, + { + "epoch": 0.75, + "learning_rate": 1.2484442600604517e-05, + "loss": 0.8154, + "step": 25320 + }, + { + "epoch": 0.75, + "learning_rate": 1.2469626029751674e-05, + "loss": 0.9587, + "step": 25330 + }, + { + "epoch": 0.75, + "learning_rate": 1.2454809458898833e-05, + "loss": 0.8896, + "step": 25340 + }, + { + "epoch": 0.75, + "learning_rate": 1.243999288804599e-05, + "loss": 0.8733, + "step": 25350 + }, + { + "epoch": 0.75, + "learning_rate": 1.242517631719315e-05, + "loss": 1.0025, + "step": 25360 + }, + { + "epoch": 0.75, + "learning_rate": 1.2410359746340308e-05, + "loss": 0.7811, + "step": 25370 + }, + { + "epoch": 0.75, + "learning_rate": 1.2395543175487466e-05, + "loss": 0.8052, + "step": 25380 + }, + { + "epoch": 0.75, + "learning_rate": 1.2380726604634624e-05, + "loss": 0.8242, + "step": 25390 + }, + { + "epoch": 0.75, + "learning_rate": 1.2365910033781782e-05, + "loss": 0.7086, + "step": 25400 + }, + { + "epoch": 0.75, + "learning_rate": 1.235109346292894e-05, + "loss": 0.8182, + "step": 25410 + }, + { + "epoch": 0.75, + "learning_rate": 1.2336276892076098e-05, + "loss": 0.8977, + "step": 25420 + }, + { + "epoch": 0.75, + "learning_rate": 1.2321460321223257e-05, + "loss": 0.8453, + "step": 25430 + }, + { + "epoch": 0.75, + "learning_rate": 1.2306643750370416e-05, + "loss": 0.9202, + "step": 25440 + }, + { + "epoch": 0.75, + "learning_rate": 1.2291827179517573e-05, + "loss": 0.8283, + "step": 25450 + }, + { + "epoch": 0.75, + "learning_rate": 1.2277010608664732e-05, + "loss": 0.8236, + "step": 25460 + }, + { + "epoch": 0.75, + "learning_rate": 1.2262194037811889e-05, + "loss": 0.7732, + "step": 25470 + }, + { + "epoch": 0.76, + "learning_rate": 1.2247377466959048e-05, + "loss": 0.8318, + "step": 25480 + }, + { + "epoch": 0.76, + "learning_rate": 1.2232560896106205e-05, + "loss": 0.6824, + "step": 25490 + }, + { + "epoch": 0.76, + "learning_rate": 1.2217744325253364e-05, + "loss": 0.7716, + "step": 25500 + }, + { + "epoch": 0.76, + "learning_rate": 1.2202927754400523e-05, + "loss": 0.8433, + "step": 25510 + }, + { + "epoch": 0.76, + "learning_rate": 1.218811118354768e-05, + "loss": 0.7985, + "step": 25520 + }, + { + "epoch": 0.76, + "learning_rate": 1.2173294612694838e-05, + "loss": 0.7839, + "step": 25530 + }, + { + "epoch": 0.76, + "learning_rate": 1.2158478041841996e-05, + "loss": 0.7955, + "step": 25540 + }, + { + "epoch": 0.76, + "learning_rate": 1.2143661470989155e-05, + "loss": 0.9498, + "step": 25550 + }, + { + "epoch": 0.76, + "learning_rate": 1.2128844900136313e-05, + "loss": 0.7866, + "step": 25560 + }, + { + "epoch": 0.76, + "learning_rate": 1.2114028329283472e-05, + "loss": 0.9004, + "step": 25570 + }, + { + "epoch": 0.76, + "learning_rate": 1.209921175843063e-05, + "loss": 0.8698, + "step": 25580 + }, + { + "epoch": 0.76, + "learning_rate": 1.2084395187577788e-05, + "loss": 0.8557, + "step": 25590 + }, + { + "epoch": 0.76, + "learning_rate": 1.2069578616724945e-05, + "loss": 0.8167, + "step": 25600 + }, + { + "epoch": 0.76, + "learning_rate": 1.2054762045872104e-05, + "loss": 0.9107, + "step": 25610 + }, + { + "epoch": 0.76, + "learning_rate": 1.2039945475019263e-05, + "loss": 0.7753, + "step": 25620 + }, + { + "epoch": 0.76, + "learning_rate": 1.202512890416642e-05, + "loss": 0.8963, + "step": 25630 + }, + { + "epoch": 0.76, + "learning_rate": 1.2010312333313577e-05, + "loss": 0.9922, + "step": 25640 + }, + { + "epoch": 0.76, + "learning_rate": 1.1995495762460738e-05, + "loss": 0.8771, + "step": 25650 + }, + { + "epoch": 0.76, + "learning_rate": 1.1980679191607895e-05, + "loss": 0.9021, + "step": 25660 + }, + { + "epoch": 0.76, + "learning_rate": 1.1965862620755052e-05, + "loss": 0.8496, + "step": 25670 + }, + { + "epoch": 0.76, + "learning_rate": 1.1951046049902211e-05, + "loss": 0.8117, + "step": 25680 + }, + { + "epoch": 0.76, + "learning_rate": 1.193622947904937e-05, + "loss": 0.8069, + "step": 25690 + }, + { + "epoch": 0.76, + "learning_rate": 1.1921412908196527e-05, + "loss": 0.7271, + "step": 25700 + }, + { + "epoch": 0.76, + "learning_rate": 1.1906596337343685e-05, + "loss": 0.7499, + "step": 25710 + }, + { + "epoch": 0.76, + "learning_rate": 1.1891779766490845e-05, + "loss": 0.7856, + "step": 25720 + }, + { + "epoch": 0.76, + "learning_rate": 1.1876963195638003e-05, + "loss": 0.7633, + "step": 25730 + }, + { + "epoch": 0.76, + "learning_rate": 1.186214662478516e-05, + "loss": 0.8777, + "step": 25740 + }, + { + "epoch": 0.76, + "learning_rate": 1.1847330053932319e-05, + "loss": 0.8289, + "step": 25750 + }, + { + "epoch": 0.76, + "learning_rate": 1.1832513483079478e-05, + "loss": 0.9075, + "step": 25760 + }, + { + "epoch": 0.76, + "learning_rate": 1.1817696912226635e-05, + "loss": 0.8961, + "step": 25770 + }, + { + "epoch": 0.76, + "learning_rate": 1.1802880341373792e-05, + "loss": 0.7867, + "step": 25780 + }, + { + "epoch": 0.76, + "learning_rate": 1.1788063770520951e-05, + "loss": 0.7762, + "step": 25790 + }, + { + "epoch": 0.76, + "learning_rate": 1.177324719966811e-05, + "loss": 0.8488, + "step": 25800 + }, + { + "epoch": 0.76, + "learning_rate": 1.1758430628815267e-05, + "loss": 0.8515, + "step": 25810 + }, + { + "epoch": 0.77, + "learning_rate": 1.1743614057962424e-05, + "loss": 0.8932, + "step": 25820 + }, + { + "epoch": 0.77, + "learning_rate": 1.1728797487109585e-05, + "loss": 0.965, + "step": 25830 + }, + { + "epoch": 0.77, + "learning_rate": 1.1713980916256742e-05, + "loss": 0.8563, + "step": 25840 + }, + { + "epoch": 0.77, + "learning_rate": 1.16991643454039e-05, + "loss": 0.8934, + "step": 25850 + }, + { + "epoch": 0.77, + "learning_rate": 1.1684347774551058e-05, + "loss": 0.7615, + "step": 25860 + }, + { + "epoch": 0.77, + "learning_rate": 1.1669531203698217e-05, + "loss": 0.8583, + "step": 25870 + }, + { + "epoch": 0.77, + "learning_rate": 1.1654714632845375e-05, + "loss": 0.7768, + "step": 25880 + }, + { + "epoch": 0.77, + "learning_rate": 1.1639898061992532e-05, + "loss": 0.8065, + "step": 25890 + }, + { + "epoch": 0.77, + "learning_rate": 1.162508149113969e-05, + "loss": 0.7745, + "step": 25900 + }, + { + "epoch": 0.77, + "learning_rate": 1.161026492028685e-05, + "loss": 1.0746, + "step": 25910 + }, + { + "epoch": 0.77, + "learning_rate": 1.1595448349434007e-05, + "loss": 0.8585, + "step": 25920 + }, + { + "epoch": 0.77, + "learning_rate": 1.1580631778581166e-05, + "loss": 0.956, + "step": 25930 + }, + { + "epoch": 0.77, + "learning_rate": 1.1565815207728325e-05, + "loss": 0.8679, + "step": 25940 + }, + { + "epoch": 0.77, + "learning_rate": 1.1550998636875482e-05, + "loss": 0.7557, + "step": 25950 + }, + { + "epoch": 0.77, + "learning_rate": 1.153618206602264e-05, + "loss": 0.7569, + "step": 25960 + }, + { + "epoch": 0.77, + "learning_rate": 1.1521365495169798e-05, + "loss": 0.7993, + "step": 25970 + }, + { + "epoch": 0.77, + "learning_rate": 1.1506548924316957e-05, + "loss": 0.6295, + "step": 25980 + }, + { + "epoch": 0.77, + "learning_rate": 1.1491732353464114e-05, + "loss": 0.7848, + "step": 25990 + }, + { + "epoch": 0.77, + "learning_rate": 1.1476915782611273e-05, + "loss": 0.7587, + "step": 26000 + }, + { + "epoch": 0.77, + "learning_rate": 1.1462099211758432e-05, + "loss": 0.7593, + "step": 26010 + }, + { + "epoch": 0.77, + "learning_rate": 1.144728264090559e-05, + "loss": 0.8481, + "step": 26020 + }, + { + "epoch": 0.77, + "learning_rate": 1.1432466070052747e-05, + "loss": 0.8453, + "step": 26030 + }, + { + "epoch": 0.77, + "learning_rate": 1.1417649499199906e-05, + "loss": 0.9776, + "step": 26040 + }, + { + "epoch": 0.77, + "learning_rate": 1.1402832928347064e-05, + "loss": 0.8218, + "step": 26050 + }, + { + "epoch": 0.77, + "learning_rate": 1.1388016357494222e-05, + "loss": 0.9226, + "step": 26060 + }, + { + "epoch": 0.77, + "learning_rate": 1.137319978664138e-05, + "loss": 0.8553, + "step": 26070 + }, + { + "epoch": 0.77, + "learning_rate": 1.1358383215788538e-05, + "loss": 0.8774, + "step": 26080 + }, + { + "epoch": 0.77, + "learning_rate": 1.1343566644935697e-05, + "loss": 0.8202, + "step": 26090 + }, + { + "epoch": 0.77, + "learning_rate": 1.1328750074082854e-05, + "loss": 0.9213, + "step": 26100 + }, + { + "epoch": 0.77, + "learning_rate": 1.1313933503230013e-05, + "loss": 0.8127, + "step": 26110 + }, + { + "epoch": 0.77, + "learning_rate": 1.1299116932377172e-05, + "loss": 0.9204, + "step": 26120 + }, + { + "epoch": 0.77, + "learning_rate": 1.1284300361524329e-05, + "loss": 0.8596, + "step": 26130 + }, + { + "epoch": 0.77, + "learning_rate": 1.1269483790671488e-05, + "loss": 0.9033, + "step": 26140 + }, + { + "epoch": 0.77, + "learning_rate": 1.1254667219818645e-05, + "loss": 0.9182, + "step": 26150 + }, + { + "epoch": 0.78, + "learning_rate": 1.1239850648965804e-05, + "loss": 0.862, + "step": 26160 + }, + { + "epoch": 0.78, + "learning_rate": 1.1225034078112961e-05, + "loss": 0.6718, + "step": 26170 + }, + { + "epoch": 0.78, + "learning_rate": 1.121021750726012e-05, + "loss": 0.7975, + "step": 26180 + }, + { + "epoch": 0.78, + "learning_rate": 1.119540093640728e-05, + "loss": 0.7768, + "step": 26190 + }, + { + "epoch": 0.78, + "learning_rate": 1.1180584365554437e-05, + "loss": 0.8502, + "step": 26200 + }, + { + "epoch": 0.78, + "learning_rate": 1.1165767794701595e-05, + "loss": 0.8791, + "step": 26210 + }, + { + "epoch": 0.78, + "learning_rate": 1.1150951223848753e-05, + "loss": 0.7101, + "step": 26220 + }, + { + "epoch": 0.78, + "learning_rate": 1.1136134652995912e-05, + "loss": 0.8722, + "step": 26230 + }, + { + "epoch": 0.78, + "learning_rate": 1.1121318082143069e-05, + "loss": 0.9821, + "step": 26240 + }, + { + "epoch": 0.78, + "learning_rate": 1.1106501511290228e-05, + "loss": 0.8882, + "step": 26250 + }, + { + "epoch": 0.78, + "learning_rate": 1.1091684940437385e-05, + "loss": 0.8302, + "step": 26260 + }, + { + "epoch": 0.78, + "learning_rate": 1.1076868369584544e-05, + "loss": 0.7746, + "step": 26270 + }, + { + "epoch": 0.78, + "learning_rate": 1.1062051798731703e-05, + "loss": 0.8006, + "step": 26280 + }, + { + "epoch": 0.78, + "learning_rate": 1.104723522787886e-05, + "loss": 0.8414, + "step": 26290 + }, + { + "epoch": 0.78, + "learning_rate": 1.1032418657026019e-05, + "loss": 0.9094, + "step": 26300 + }, + { + "epoch": 0.78, + "learning_rate": 1.1017602086173176e-05, + "loss": 0.842, + "step": 26310 + }, + { + "epoch": 0.78, + "learning_rate": 1.1002785515320335e-05, + "loss": 0.8943, + "step": 26320 + }, + { + "epoch": 0.78, + "learning_rate": 1.0987968944467492e-05, + "loss": 0.7498, + "step": 26330 + }, + { + "epoch": 0.78, + "learning_rate": 1.0973152373614651e-05, + "loss": 0.8495, + "step": 26340 + }, + { + "epoch": 0.78, + "learning_rate": 1.095833580276181e-05, + "loss": 0.8342, + "step": 26350 + }, + { + "epoch": 0.78, + "learning_rate": 1.0943519231908967e-05, + "loss": 0.8103, + "step": 26360 + }, + { + "epoch": 0.78, + "learning_rate": 1.0928702661056125e-05, + "loss": 0.7783, + "step": 26370 + }, + { + "epoch": 0.78, + "learning_rate": 1.0913886090203284e-05, + "loss": 0.7801, + "step": 26380 + }, + { + "epoch": 0.78, + "learning_rate": 1.0899069519350443e-05, + "loss": 0.8705, + "step": 26390 + }, + { + "epoch": 0.78, + "learning_rate": 1.08842529484976e-05, + "loss": 0.8933, + "step": 26400 + }, + { + "epoch": 0.78, + "learning_rate": 1.0869436377644759e-05, + "loss": 0.6934, + "step": 26410 + }, + { + "epoch": 0.78, + "learning_rate": 1.0854619806791918e-05, + "loss": 0.7823, + "step": 26420 + }, + { + "epoch": 0.78, + "learning_rate": 1.0839803235939075e-05, + "loss": 0.819, + "step": 26430 + }, + { + "epoch": 0.78, + "learning_rate": 1.0824986665086232e-05, + "loss": 0.7221, + "step": 26440 + }, + { + "epoch": 0.78, + "learning_rate": 1.0810170094233391e-05, + "loss": 0.8498, + "step": 26450 + }, + { + "epoch": 0.78, + "learning_rate": 1.079535352338055e-05, + "loss": 0.7632, + "step": 26460 + }, + { + "epoch": 0.78, + "learning_rate": 1.0780536952527707e-05, + "loss": 0.8229, + "step": 26470 + }, + { + "epoch": 0.78, + "learning_rate": 1.0765720381674866e-05, + "loss": 0.7822, + "step": 26480 + }, + { + "epoch": 0.78, + "learning_rate": 1.0750903810822025e-05, + "loss": 0.7216, + "step": 26490 + }, + { + "epoch": 0.79, + "learning_rate": 1.0736087239969182e-05, + "loss": 0.855, + "step": 26500 + }, + { + "epoch": 0.79, + "learning_rate": 1.072127066911634e-05, + "loss": 0.9372, + "step": 26510 + }, + { + "epoch": 0.79, + "learning_rate": 1.0706454098263498e-05, + "loss": 0.8273, + "step": 26520 + }, + { + "epoch": 0.79, + "learning_rate": 1.0691637527410657e-05, + "loss": 0.8511, + "step": 26530 + }, + { + "epoch": 0.79, + "learning_rate": 1.0676820956557815e-05, + "loss": 0.953, + "step": 26540 + }, + { + "epoch": 0.79, + "learning_rate": 1.0662004385704972e-05, + "loss": 0.7749, + "step": 26550 + }, + { + "epoch": 0.79, + "learning_rate": 1.0647187814852132e-05, + "loss": 0.9035, + "step": 26560 + }, + { + "epoch": 0.79, + "learning_rate": 1.063237124399929e-05, + "loss": 0.7956, + "step": 26570 + }, + { + "epoch": 0.79, + "learning_rate": 1.0617554673146447e-05, + "loss": 0.8819, + "step": 26580 + }, + { + "epoch": 0.79, + "learning_rate": 1.0602738102293606e-05, + "loss": 0.9241, + "step": 26590 + }, + { + "epoch": 0.79, + "learning_rate": 1.0587921531440765e-05, + "loss": 0.8081, + "step": 26600 + }, + { + "epoch": 0.79, + "learning_rate": 1.0573104960587922e-05, + "loss": 0.8689, + "step": 26610 + }, + { + "epoch": 0.79, + "learning_rate": 1.055828838973508e-05, + "loss": 0.8158, + "step": 26620 + }, + { + "epoch": 0.79, + "learning_rate": 1.0543471818882238e-05, + "loss": 0.9474, + "step": 26630 + }, + { + "epoch": 0.79, + "learning_rate": 1.0528655248029397e-05, + "loss": 0.8814, + "step": 26640 + }, + { + "epoch": 0.79, + "learning_rate": 1.0513838677176554e-05, + "loss": 0.8688, + "step": 26650 + }, + { + "epoch": 0.79, + "learning_rate": 1.0499022106323712e-05, + "loss": 0.8209, + "step": 26660 + }, + { + "epoch": 0.79, + "learning_rate": 1.0484205535470872e-05, + "loss": 0.7129, + "step": 26670 + }, + { + "epoch": 0.79, + "learning_rate": 1.046938896461803e-05, + "loss": 0.8338, + "step": 26680 + }, + { + "epoch": 0.79, + "learning_rate": 1.0454572393765187e-05, + "loss": 0.876, + "step": 26690 + }, + { + "epoch": 0.79, + "learning_rate": 1.0439755822912346e-05, + "loss": 0.8838, + "step": 26700 + }, + { + "epoch": 0.79, + "learning_rate": 1.0424939252059504e-05, + "loss": 0.9687, + "step": 26710 + }, + { + "epoch": 0.79, + "learning_rate": 1.0410122681206662e-05, + "loss": 0.9331, + "step": 26720 + }, + { + "epoch": 0.79, + "learning_rate": 1.0395306110353819e-05, + "loss": 0.8596, + "step": 26730 + }, + { + "epoch": 0.79, + "learning_rate": 1.038048953950098e-05, + "loss": 0.9814, + "step": 26740 + }, + { + "epoch": 0.79, + "learning_rate": 1.0365672968648137e-05, + "loss": 0.8254, + "step": 26750 + }, + { + "epoch": 0.79, + "learning_rate": 1.0350856397795294e-05, + "loss": 0.8314, + "step": 26760 + }, + { + "epoch": 0.79, + "learning_rate": 1.0336039826942453e-05, + "loss": 0.8254, + "step": 26770 + }, + { + "epoch": 0.79, + "learning_rate": 1.0321223256089612e-05, + "loss": 0.8073, + "step": 26780 + }, + { + "epoch": 0.79, + "learning_rate": 1.0306406685236769e-05, + "loss": 0.8626, + "step": 26790 + }, + { + "epoch": 0.79, + "learning_rate": 1.0291590114383926e-05, + "loss": 0.8962, + "step": 26800 + }, + { + "epoch": 0.79, + "learning_rate": 1.0276773543531085e-05, + "loss": 0.8781, + "step": 26810 + }, + { + "epoch": 0.79, + "learning_rate": 1.0261956972678244e-05, + "loss": 0.8088, + "step": 26820 + }, + { + "epoch": 0.8, + "learning_rate": 1.0247140401825401e-05, + "loss": 0.9101, + "step": 26830 + }, + { + "epoch": 0.8, + "learning_rate": 1.023232383097256e-05, + "loss": 0.8275, + "step": 26840 + }, + { + "epoch": 0.8, + "learning_rate": 1.021750726011972e-05, + "loss": 0.7513, + "step": 26850 + }, + { + "epoch": 0.8, + "learning_rate": 1.0202690689266877e-05, + "loss": 0.9617, + "step": 26860 + }, + { + "epoch": 0.8, + "learning_rate": 1.0187874118414034e-05, + "loss": 0.8926, + "step": 26870 + }, + { + "epoch": 0.8, + "learning_rate": 1.0173057547561193e-05, + "loss": 0.8115, + "step": 26880 + }, + { + "epoch": 0.8, + "learning_rate": 1.0158240976708352e-05, + "loss": 0.8416, + "step": 26890 + }, + { + "epoch": 0.8, + "learning_rate": 1.0143424405855509e-05, + "loss": 0.9499, + "step": 26900 + }, + { + "epoch": 0.8, + "learning_rate": 1.0128607835002668e-05, + "loss": 0.8245, + "step": 26910 + }, + { + "epoch": 0.8, + "learning_rate": 1.0113791264149827e-05, + "loss": 0.8692, + "step": 26920 + }, + { + "epoch": 0.8, + "learning_rate": 1.0098974693296984e-05, + "loss": 0.7294, + "step": 26930 + }, + { + "epoch": 0.8, + "learning_rate": 1.0084158122444141e-05, + "loss": 0.911, + "step": 26940 + }, + { + "epoch": 0.8, + "learning_rate": 1.00693415515913e-05, + "loss": 0.8318, + "step": 26950 + }, + { + "epoch": 0.8, + "learning_rate": 1.0054524980738459e-05, + "loss": 0.8141, + "step": 26960 + }, + { + "epoch": 0.8, + "learning_rate": 1.0039708409885616e-05, + "loss": 0.7381, + "step": 26970 + }, + { + "epoch": 0.8, + "learning_rate": 1.0024891839032775e-05, + "loss": 0.8144, + "step": 26980 + }, + { + "epoch": 0.8, + "learning_rate": 1.0010075268179932e-05, + "loss": 0.825, + "step": 26990 + }, + { + "epoch": 0.8, + "learning_rate": 9.995258697327091e-06, + "loss": 0.7575, + "step": 27000 + }, + { + "epoch": 0.8, + "learning_rate": 9.980442126474249e-06, + "loss": 0.8501, + "step": 27010 + }, + { + "epoch": 0.8, + "learning_rate": 9.965625555621407e-06, + "loss": 0.8956, + "step": 27020 + }, + { + "epoch": 0.8, + "learning_rate": 9.950808984768566e-06, + "loss": 0.8825, + "step": 27030 + }, + { + "epoch": 0.8, + "learning_rate": 9.935992413915724e-06, + "loss": 0.9027, + "step": 27040 + }, + { + "epoch": 0.8, + "learning_rate": 9.921175843062883e-06, + "loss": 0.9106, + "step": 27050 + }, + { + "epoch": 0.8, + "learning_rate": 9.90635927221004e-06, + "loss": 0.8476, + "step": 27060 + }, + { + "epoch": 0.8, + "learning_rate": 9.891542701357199e-06, + "loss": 0.7358, + "step": 27070 + }, + { + "epoch": 0.8, + "learning_rate": 9.876726130504356e-06, + "loss": 0.856, + "step": 27080 + }, + { + "epoch": 0.8, + "learning_rate": 9.861909559651515e-06, + "loss": 0.7659, + "step": 27090 + }, + { + "epoch": 0.8, + "learning_rate": 9.847092988798672e-06, + "loss": 0.9216, + "step": 27100 + }, + { + "epoch": 0.8, + "learning_rate": 9.832276417945831e-06, + "loss": 0.729, + "step": 27110 + }, + { + "epoch": 0.8, + "learning_rate": 9.81745984709299e-06, + "loss": 0.8307, + "step": 27120 + }, + { + "epoch": 0.8, + "learning_rate": 9.802643276240147e-06, + "loss": 0.8572, + "step": 27130 + }, + { + "epoch": 0.8, + "learning_rate": 9.787826705387306e-06, + "loss": 0.8346, + "step": 27140 + }, + { + "epoch": 0.8, + "learning_rate": 9.773010134534463e-06, + "loss": 1.0176, + "step": 27150 + }, + { + "epoch": 0.8, + "learning_rate": 9.758193563681622e-06, + "loss": 0.8436, + "step": 27160 + }, + { + "epoch": 0.81, + "learning_rate": 9.74337699282878e-06, + "loss": 0.8001, + "step": 27170 + }, + { + "epoch": 0.81, + "learning_rate": 9.728560421975938e-06, + "loss": 0.7491, + "step": 27180 + }, + { + "epoch": 0.81, + "learning_rate": 9.713743851123097e-06, + "loss": 0.7306, + "step": 27190 + }, + { + "epoch": 0.81, + "learning_rate": 9.698927280270255e-06, + "loss": 0.8969, + "step": 27200 + }, + { + "epoch": 0.81, + "learning_rate": 9.684110709417414e-06, + "loss": 0.7704, + "step": 27210 + }, + { + "epoch": 0.81, + "learning_rate": 9.66929413856457e-06, + "loss": 0.8897, + "step": 27220 + }, + { + "epoch": 0.81, + "learning_rate": 9.65447756771173e-06, + "loss": 0.7944, + "step": 27230 + }, + { + "epoch": 0.81, + "learning_rate": 9.639660996858887e-06, + "loss": 0.7888, + "step": 27240 + }, + { + "epoch": 0.81, + "learning_rate": 9.624844426006046e-06, + "loss": 0.8273, + "step": 27250 + }, + { + "epoch": 0.81, + "learning_rate": 9.610027855153205e-06, + "loss": 0.8328, + "step": 27260 + }, + { + "epoch": 0.81, + "learning_rate": 9.595211284300362e-06, + "loss": 0.8889, + "step": 27270 + }, + { + "epoch": 0.81, + "learning_rate": 9.58039471344752e-06, + "loss": 0.7882, + "step": 27280 + }, + { + "epoch": 0.81, + "learning_rate": 9.565578142594678e-06, + "loss": 0.8158, + "step": 27290 + }, + { + "epoch": 0.81, + "learning_rate": 9.550761571741837e-06, + "loss": 0.7459, + "step": 27300 + }, + { + "epoch": 0.81, + "learning_rate": 9.535945000888994e-06, + "loss": 0.87, + "step": 27310 + }, + { + "epoch": 0.81, + "learning_rate": 9.521128430036153e-06, + "loss": 0.7732, + "step": 27320 + }, + { + "epoch": 0.81, + "learning_rate": 9.506311859183312e-06, + "loss": 0.8097, + "step": 27330 + }, + { + "epoch": 0.81, + "learning_rate": 9.49149528833047e-06, + "loss": 0.8793, + "step": 27340 + }, + { + "epoch": 0.81, + "learning_rate": 9.476678717477627e-06, + "loss": 0.9013, + "step": 27350 + }, + { + "epoch": 0.81, + "learning_rate": 9.461862146624786e-06, + "loss": 0.8694, + "step": 27360 + }, + { + "epoch": 0.81, + "learning_rate": 9.447045575771945e-06, + "loss": 0.8583, + "step": 27370 + }, + { + "epoch": 0.81, + "learning_rate": 9.432229004919102e-06, + "loss": 0.8503, + "step": 27380 + }, + { + "epoch": 0.81, + "learning_rate": 9.417412434066259e-06, + "loss": 0.8637, + "step": 27390 + }, + { + "epoch": 0.81, + "learning_rate": 9.40259586321342e-06, + "loss": 0.7903, + "step": 27400 + }, + { + "epoch": 0.81, + "learning_rate": 9.387779292360577e-06, + "loss": 0.8728, + "step": 27410 + }, + { + "epoch": 0.81, + "learning_rate": 9.372962721507734e-06, + "loss": 0.7439, + "step": 27420 + }, + { + "epoch": 0.81, + "learning_rate": 9.358146150654893e-06, + "loss": 0.8156, + "step": 27430 + }, + { + "epoch": 0.81, + "learning_rate": 9.343329579802052e-06, + "loss": 0.9474, + "step": 27440 + }, + { + "epoch": 0.81, + "learning_rate": 9.328513008949209e-06, + "loss": 0.9365, + "step": 27450 + }, + { + "epoch": 0.81, + "learning_rate": 9.313696438096366e-06, + "loss": 0.786, + "step": 27460 + }, + { + "epoch": 0.81, + "learning_rate": 9.298879867243527e-06, + "loss": 0.7976, + "step": 27470 + }, + { + "epoch": 0.81, + "learning_rate": 9.284063296390684e-06, + "loss": 0.9132, + "step": 27480 + }, + { + "epoch": 0.81, + "learning_rate": 9.269246725537841e-06, + "loss": 0.8022, + "step": 27490 + }, + { + "epoch": 0.81, + "learning_rate": 9.254430154685e-06, + "loss": 0.8411, + "step": 27500 + }, + { + "epoch": 0.82, + "learning_rate": 9.23961358383216e-06, + "loss": 0.8011, + "step": 27510 + }, + { + "epoch": 0.82, + "learning_rate": 9.224797012979317e-06, + "loss": 0.9507, + "step": 27520 + }, + { + "epoch": 0.82, + "learning_rate": 9.209980442126474e-06, + "loss": 0.7688, + "step": 27530 + }, + { + "epoch": 0.82, + "learning_rate": 9.195163871273633e-06, + "loss": 0.7427, + "step": 27540 + }, + { + "epoch": 0.82, + "learning_rate": 9.180347300420792e-06, + "loss": 0.7929, + "step": 27550 + }, + { + "epoch": 0.82, + "learning_rate": 9.165530729567949e-06, + "loss": 0.7609, + "step": 27560 + }, + { + "epoch": 0.82, + "learning_rate": 9.150714158715106e-06, + "loss": 0.8253, + "step": 27570 + }, + { + "epoch": 0.82, + "learning_rate": 9.135897587862267e-06, + "loss": 0.8494, + "step": 27580 + }, + { + "epoch": 0.82, + "learning_rate": 9.121081017009424e-06, + "loss": 0.7224, + "step": 27590 + }, + { + "epoch": 0.82, + "learning_rate": 9.106264446156581e-06, + "loss": 0.8241, + "step": 27600 + }, + { + "epoch": 0.82, + "learning_rate": 9.09144787530374e-06, + "loss": 0.8178, + "step": 27610 + }, + { + "epoch": 0.82, + "learning_rate": 9.076631304450899e-06, + "loss": 0.83, + "step": 27620 + }, + { + "epoch": 0.82, + "learning_rate": 9.061814733598056e-06, + "loss": 0.6989, + "step": 27630 + }, + { + "epoch": 0.82, + "learning_rate": 9.046998162745214e-06, + "loss": 1.1115, + "step": 27640 + }, + { + "epoch": 0.82, + "learning_rate": 9.032181591892374e-06, + "loss": 0.8682, + "step": 27650 + }, + { + "epoch": 0.82, + "learning_rate": 9.017365021039531e-06, + "loss": 0.8742, + "step": 27660 + }, + { + "epoch": 0.82, + "learning_rate": 9.002548450186689e-06, + "loss": 0.9065, + "step": 27670 + }, + { + "epoch": 0.82, + "learning_rate": 8.987731879333848e-06, + "loss": 0.7826, + "step": 27680 + }, + { + "epoch": 0.82, + "learning_rate": 8.972915308481006e-06, + "loss": 0.6103, + "step": 27690 + }, + { + "epoch": 0.82, + "learning_rate": 8.958098737628164e-06, + "loss": 0.795, + "step": 27700 + }, + { + "epoch": 0.82, + "learning_rate": 8.943282166775321e-06, + "loss": 0.9199, + "step": 27710 + }, + { + "epoch": 0.82, + "learning_rate": 8.92846559592248e-06, + "loss": 0.8204, + "step": 27720 + }, + { + "epoch": 0.82, + "learning_rate": 8.913649025069639e-06, + "loss": 0.7308, + "step": 27730 + }, + { + "epoch": 0.82, + "learning_rate": 8.898832454216796e-06, + "loss": 0.6076, + "step": 27740 + }, + { + "epoch": 0.82, + "learning_rate": 8.884015883363955e-06, + "loss": 0.9196, + "step": 27750 + }, + { + "epoch": 0.82, + "learning_rate": 8.869199312511114e-06, + "loss": 0.8956, + "step": 27760 + }, + { + "epoch": 0.82, + "learning_rate": 8.854382741658271e-06, + "loss": 0.8484, + "step": 27770 + }, + { + "epoch": 0.82, + "learning_rate": 8.839566170805428e-06, + "loss": 0.8989, + "step": 27780 + }, + { + "epoch": 0.82, + "learning_rate": 8.824749599952587e-06, + "loss": 0.8727, + "step": 27790 + }, + { + "epoch": 0.82, + "learning_rate": 8.809933029099746e-06, + "loss": 0.7134, + "step": 27800 + }, + { + "epoch": 0.82, + "learning_rate": 8.795116458246903e-06, + "loss": 0.8573, + "step": 27810 + }, + { + "epoch": 0.82, + "learning_rate": 8.780299887394062e-06, + "loss": 0.812, + "step": 27820 + }, + { + "epoch": 0.82, + "learning_rate": 8.76548331654122e-06, + "loss": 0.7814, + "step": 27830 + }, + { + "epoch": 0.82, + "learning_rate": 8.750666745688378e-06, + "loss": 0.7689, + "step": 27840 + }, + { + "epoch": 0.83, + "learning_rate": 8.735850174835536e-06, + "loss": 0.8401, + "step": 27850 + }, + { + "epoch": 0.83, + "learning_rate": 8.721033603982695e-06, + "loss": 0.7971, + "step": 27860 + }, + { + "epoch": 0.83, + "learning_rate": 8.706217033129854e-06, + "loss": 0.7765, + "step": 27870 + }, + { + "epoch": 0.83, + "learning_rate": 8.69140046227701e-06, + "loss": 0.9027, + "step": 27880 + }, + { + "epoch": 0.83, + "learning_rate": 8.67658389142417e-06, + "loss": 0.964, + "step": 27890 + }, + { + "epoch": 0.83, + "learning_rate": 8.661767320571327e-06, + "loss": 0.7856, + "step": 27900 + }, + { + "epoch": 0.83, + "learning_rate": 8.646950749718486e-06, + "loss": 1.0029, + "step": 27910 + }, + { + "epoch": 0.83, + "learning_rate": 8.632134178865643e-06, + "loss": 0.8419, + "step": 27920 + }, + { + "epoch": 0.83, + "learning_rate": 8.617317608012802e-06, + "loss": 0.8328, + "step": 27930 + }, + { + "epoch": 0.83, + "learning_rate": 8.602501037159961e-06, + "loss": 0.75, + "step": 27940 + }, + { + "epoch": 0.83, + "learning_rate": 8.587684466307118e-06, + "loss": 0.9438, + "step": 27950 + }, + { + "epoch": 0.83, + "learning_rate": 8.572867895454277e-06, + "loss": 0.8421, + "step": 27960 + }, + { + "epoch": 0.83, + "learning_rate": 8.558051324601434e-06, + "loss": 1.1142, + "step": 27970 + }, + { + "epoch": 0.83, + "learning_rate": 8.543234753748593e-06, + "loss": 1.0156, + "step": 27980 + }, + { + "epoch": 0.83, + "learning_rate": 8.52841818289575e-06, + "loss": 0.9018, + "step": 27990 + }, + { + "epoch": 0.83, + "learning_rate": 8.51360161204291e-06, + "loss": 0.7118, + "step": 28000 + }, + { + "epoch": 0.83, + "learning_rate": 8.498785041190067e-06, + "loss": 0.8623, + "step": 28010 + }, + { + "epoch": 0.83, + "learning_rate": 8.483968470337226e-06, + "loss": 0.773, + "step": 28020 + }, + { + "epoch": 0.83, + "learning_rate": 8.469151899484385e-06, + "loss": 0.8335, + "step": 28030 + }, + { + "epoch": 0.83, + "learning_rate": 8.454335328631542e-06, + "loss": 0.7075, + "step": 28040 + }, + { + "epoch": 0.83, + "learning_rate": 8.4395187577787e-06, + "loss": 0.9465, + "step": 28050 + }, + { + "epoch": 0.83, + "learning_rate": 8.424702186925858e-06, + "loss": 0.8742, + "step": 28060 + }, + { + "epoch": 0.83, + "learning_rate": 8.409885616073017e-06, + "loss": 0.7308, + "step": 28070 + }, + { + "epoch": 0.83, + "learning_rate": 8.395069045220174e-06, + "loss": 0.8197, + "step": 28080 + }, + { + "epoch": 0.83, + "learning_rate": 8.380252474367333e-06, + "loss": 0.752, + "step": 28090 + }, + { + "epoch": 0.83, + "learning_rate": 8.365435903514492e-06, + "loss": 0.8653, + "step": 28100 + }, + { + "epoch": 0.83, + "learning_rate": 8.35061933266165e-06, + "loss": 0.7343, + "step": 28110 + }, + { + "epoch": 0.83, + "learning_rate": 8.335802761808806e-06, + "loss": 0.7, + "step": 28120 + }, + { + "epoch": 0.83, + "learning_rate": 8.320986190955965e-06, + "loss": 0.7569, + "step": 28130 + }, + { + "epoch": 0.83, + "learning_rate": 8.306169620103124e-06, + "loss": 1.0034, + "step": 28140 + }, + { + "epoch": 0.83, + "learning_rate": 8.291353049250281e-06, + "loss": 0.8393, + "step": 28150 + }, + { + "epoch": 0.83, + "learning_rate": 8.27653647839744e-06, + "loss": 0.8237, + "step": 28160 + }, + { + "epoch": 0.83, + "learning_rate": 8.2617199075446e-06, + "loss": 0.787, + "step": 28170 + }, + { + "epoch": 0.84, + "learning_rate": 8.246903336691757e-06, + "loss": 0.8261, + "step": 28180 + }, + { + "epoch": 0.84, + "learning_rate": 8.232086765838914e-06, + "loss": 0.7429, + "step": 28190 + }, + { + "epoch": 0.84, + "learning_rate": 8.217270194986073e-06, + "loss": 0.7722, + "step": 28200 + }, + { + "epoch": 0.84, + "learning_rate": 8.202453624133232e-06, + "loss": 0.8481, + "step": 28210 + }, + { + "epoch": 0.84, + "learning_rate": 8.187637053280389e-06, + "loss": 0.8171, + "step": 28220 + }, + { + "epoch": 0.84, + "learning_rate": 8.172820482427548e-06, + "loss": 0.6574, + "step": 28230 + }, + { + "epoch": 0.84, + "learning_rate": 8.158003911574707e-06, + "loss": 0.8484, + "step": 28240 + }, + { + "epoch": 0.84, + "learning_rate": 8.143187340721864e-06, + "loss": 0.8342, + "step": 28250 + }, + { + "epoch": 0.84, + "learning_rate": 8.128370769869021e-06, + "loss": 0.9326, + "step": 28260 + }, + { + "epoch": 0.84, + "learning_rate": 8.11355419901618e-06, + "loss": 0.724, + "step": 28270 + }, + { + "epoch": 0.84, + "learning_rate": 8.098737628163339e-06, + "loss": 0.9796, + "step": 28280 + }, + { + "epoch": 0.84, + "learning_rate": 8.083921057310496e-06, + "loss": 0.7743, + "step": 28290 + }, + { + "epoch": 0.84, + "learning_rate": 8.069104486457654e-06, + "loss": 0.9398, + "step": 28300 + }, + { + "epoch": 0.84, + "learning_rate": 8.054287915604814e-06, + "loss": 0.7642, + "step": 28310 + }, + { + "epoch": 0.84, + "learning_rate": 8.039471344751971e-06, + "loss": 0.8557, + "step": 28320 + }, + { + "epoch": 0.84, + "learning_rate": 8.024654773899129e-06, + "loss": 0.787, + "step": 28330 + }, + { + "epoch": 0.84, + "learning_rate": 8.009838203046288e-06, + "loss": 0.8425, + "step": 28340 + }, + { + "epoch": 0.84, + "learning_rate": 7.995021632193446e-06, + "loss": 0.9097, + "step": 28350 + }, + { + "epoch": 0.84, + "learning_rate": 7.980205061340604e-06, + "loss": 0.8853, + "step": 28360 + }, + { + "epoch": 0.84, + "learning_rate": 7.965388490487761e-06, + "loss": 0.8632, + "step": 28370 + }, + { + "epoch": 0.84, + "learning_rate": 7.950571919634922e-06, + "loss": 0.6685, + "step": 28380 + }, + { + "epoch": 0.84, + "learning_rate": 7.935755348782079e-06, + "loss": 0.7471, + "step": 28390 + }, + { + "epoch": 0.84, + "learning_rate": 7.920938777929236e-06, + "loss": 0.9071, + "step": 28400 + }, + { + "epoch": 0.84, + "learning_rate": 7.906122207076393e-06, + "loss": 0.8453, + "step": 28410 + }, + { + "epoch": 0.84, + "learning_rate": 7.891305636223554e-06, + "loss": 1.0258, + "step": 28420 + }, + { + "epoch": 0.84, + "learning_rate": 7.876489065370711e-06, + "loss": 0.8297, + "step": 28430 + }, + { + "epoch": 0.84, + "learning_rate": 7.861672494517868e-06, + "loss": 0.6788, + "step": 28440 + }, + { + "epoch": 0.84, + "learning_rate": 7.846855923665027e-06, + "loss": 0.8631, + "step": 28450 + }, + { + "epoch": 0.84, + "learning_rate": 7.832039352812186e-06, + "loss": 0.8629, + "step": 28460 + }, + { + "epoch": 0.84, + "learning_rate": 7.817222781959343e-06, + "loss": 0.8019, + "step": 28470 + }, + { + "epoch": 0.84, + "learning_rate": 7.8024062111065e-06, + "loss": 0.8317, + "step": 28480 + }, + { + "epoch": 0.84, + "learning_rate": 7.787589640253661e-06, + "loss": 0.722, + "step": 28490 + }, + { + "epoch": 0.84, + "learning_rate": 7.772773069400818e-06, + "loss": 0.8622, + "step": 28500 + }, + { + "epoch": 0.84, + "learning_rate": 7.757956498547976e-06, + "loss": 0.8397, + "step": 28510 + }, + { + "epoch": 0.85, + "learning_rate": 7.743139927695135e-06, + "loss": 0.7164, + "step": 28520 + }, + { + "epoch": 0.85, + "learning_rate": 7.728323356842294e-06, + "loss": 0.853, + "step": 28530 + }, + { + "epoch": 0.85, + "learning_rate": 7.71350678598945e-06, + "loss": 0.7922, + "step": 28540 + }, + { + "epoch": 0.85, + "learning_rate": 7.698690215136608e-06, + "loss": 0.893, + "step": 28550 + }, + { + "epoch": 0.85, + "learning_rate": 7.683873644283767e-06, + "loss": 0.8251, + "step": 28560 + }, + { + "epoch": 0.85, + "learning_rate": 7.669057073430926e-06, + "loss": 0.9007, + "step": 28570 + }, + { + "epoch": 0.85, + "learning_rate": 7.654240502578083e-06, + "loss": 0.7733, + "step": 28580 + }, + { + "epoch": 0.85, + "learning_rate": 7.639423931725242e-06, + "loss": 0.822, + "step": 28590 + }, + { + "epoch": 0.85, + "learning_rate": 7.6246073608724e-06, + "loss": 0.7719, + "step": 28600 + }, + { + "epoch": 0.85, + "learning_rate": 7.609790790019558e-06, + "loss": 0.8517, + "step": 28610 + }, + { + "epoch": 0.85, + "learning_rate": 7.5949742191667155e-06, + "loss": 0.855, + "step": 28620 + }, + { + "epoch": 0.85, + "learning_rate": 7.580157648313875e-06, + "loss": 0.9967, + "step": 28630 + }, + { + "epoch": 0.85, + "learning_rate": 7.5653410774610325e-06, + "loss": 0.9014, + "step": 28640 + }, + { + "epoch": 0.85, + "learning_rate": 7.5505245066081905e-06, + "loss": 0.8207, + "step": 28650 + }, + { + "epoch": 0.85, + "learning_rate": 7.5357079357553495e-06, + "loss": 0.742, + "step": 28660 + }, + { + "epoch": 0.85, + "learning_rate": 7.5208913649025075e-06, + "loss": 0.6991, + "step": 28670 + }, + { + "epoch": 0.85, + "learning_rate": 7.506074794049666e-06, + "loss": 0.8417, + "step": 28680 + }, + { + "epoch": 0.85, + "learning_rate": 7.491258223196823e-06, + "loss": 0.6742, + "step": 28690 + }, + { + "epoch": 0.85, + "learning_rate": 7.476441652343983e-06, + "loss": 0.811, + "step": 28700 + }, + { + "epoch": 0.85, + "learning_rate": 7.46162508149114e-06, + "loss": 0.8786, + "step": 28710 + }, + { + "epoch": 0.85, + "learning_rate": 7.446808510638298e-06, + "loss": 0.8787, + "step": 28720 + }, + { + "epoch": 0.85, + "learning_rate": 7.431991939785457e-06, + "loss": 0.918, + "step": 28730 + }, + { + "epoch": 0.85, + "learning_rate": 7.417175368932615e-06, + "loss": 0.7166, + "step": 28740 + }, + { + "epoch": 0.85, + "learning_rate": 7.402358798079773e-06, + "loss": 0.754, + "step": 28750 + }, + { + "epoch": 0.85, + "learning_rate": 7.38754222722693e-06, + "loss": 0.9353, + "step": 28760 + }, + { + "epoch": 0.85, + "learning_rate": 7.372725656374089e-06, + "loss": 0.7204, + "step": 28770 + }, + { + "epoch": 0.85, + "learning_rate": 7.357909085521247e-06, + "loss": 0.8485, + "step": 28780 + }, + { + "epoch": 0.85, + "learning_rate": 7.343092514668405e-06, + "loss": 0.7644, + "step": 28790 + }, + { + "epoch": 0.85, + "learning_rate": 7.328275943815564e-06, + "loss": 0.7375, + "step": 28800 + }, + { + "epoch": 0.85, + "learning_rate": 7.313459372962722e-06, + "loss": 0.9146, + "step": 28810 + }, + { + "epoch": 0.85, + "learning_rate": 7.29864280210988e-06, + "loss": 0.8822, + "step": 28820 + }, + { + "epoch": 0.85, + "learning_rate": 7.283826231257038e-06, + "loss": 0.8327, + "step": 28830 + }, + { + "epoch": 0.85, + "learning_rate": 7.269009660404197e-06, + "loss": 0.6424, + "step": 28840 + }, + { + "epoch": 0.85, + "learning_rate": 7.254193089551355e-06, + "loss": 0.7733, + "step": 28850 + }, + { + "epoch": 0.86, + "learning_rate": 7.239376518698513e-06, + "loss": 0.8612, + "step": 28860 + }, + { + "epoch": 0.86, + "learning_rate": 7.224559947845672e-06, + "loss": 0.7965, + "step": 28870 + }, + { + "epoch": 0.86, + "learning_rate": 7.20974337699283e-06, + "loss": 0.8674, + "step": 28880 + }, + { + "epoch": 0.86, + "learning_rate": 7.194926806139987e-06, + "loss": 0.8873, + "step": 28890 + }, + { + "epoch": 0.86, + "learning_rate": 7.180110235287145e-06, + "loss": 0.8673, + "step": 28900 + }, + { + "epoch": 0.86, + "learning_rate": 7.165293664434304e-06, + "loss": 0.8005, + "step": 28910 + }, + { + "epoch": 0.86, + "learning_rate": 7.150477093581462e-06, + "loss": 0.7605, + "step": 28920 + }, + { + "epoch": 0.86, + "learning_rate": 7.135660522728619e-06, + "loss": 0.7572, + "step": 28930 + }, + { + "epoch": 0.86, + "learning_rate": 7.120843951875779e-06, + "loss": 0.8536, + "step": 28940 + }, + { + "epoch": 0.86, + "learning_rate": 7.106027381022936e-06, + "loss": 0.8039, + "step": 28950 + }, + { + "epoch": 0.86, + "learning_rate": 7.091210810170094e-06, + "loss": 0.6013, + "step": 28960 + }, + { + "epoch": 0.86, + "learning_rate": 7.0763942393172525e-06, + "loss": 0.8377, + "step": 28970 + }, + { + "epoch": 0.86, + "learning_rate": 7.061577668464411e-06, + "loss": 0.7033, + "step": 28980 + }, + { + "epoch": 0.86, + "learning_rate": 7.0467610976115695e-06, + "loss": 0.692, + "step": 28990 + }, + { + "epoch": 0.86, + "learning_rate": 7.031944526758727e-06, + "loss": 0.8825, + "step": 29000 + }, + { + "epoch": 0.86, + "learning_rate": 7.0171279559058865e-06, + "loss": 0.8131, + "step": 29010 + }, + { + "epoch": 0.86, + "learning_rate": 7.002311385053044e-06, + "loss": 0.7374, + "step": 29020 + }, + { + "epoch": 0.86, + "learning_rate": 6.987494814200202e-06, + "loss": 0.637, + "step": 29030 + }, + { + "epoch": 0.86, + "learning_rate": 6.97267824334736e-06, + "loss": 0.8886, + "step": 29040 + }, + { + "epoch": 0.86, + "learning_rate": 6.957861672494519e-06, + "loss": 0.9611, + "step": 29050 + }, + { + "epoch": 0.86, + "learning_rate": 6.943045101641676e-06, + "loss": 0.8604, + "step": 29060 + }, + { + "epoch": 0.86, + "learning_rate": 6.928228530788834e-06, + "loss": 0.8934, + "step": 29070 + }, + { + "epoch": 0.86, + "learning_rate": 6.913411959935993e-06, + "loss": 0.768, + "step": 29080 + }, + { + "epoch": 0.86, + "learning_rate": 6.898595389083151e-06, + "loss": 0.8102, + "step": 29090 + }, + { + "epoch": 0.86, + "learning_rate": 6.883778818230309e-06, + "loss": 0.8218, + "step": 29100 + }, + { + "epoch": 0.86, + "learning_rate": 6.8689622473774664e-06, + "loss": 0.8032, + "step": 29110 + }, + { + "epoch": 0.86, + "learning_rate": 6.854145676524626e-06, + "loss": 0.8869, + "step": 29120 + }, + { + "epoch": 0.86, + "learning_rate": 6.8393291056717834e-06, + "loss": 0.8738, + "step": 29130 + }, + { + "epoch": 0.86, + "learning_rate": 6.8245125348189415e-06, + "loss": 0.6768, + "step": 29140 + }, + { + "epoch": 0.86, + "learning_rate": 6.8096959639661004e-06, + "loss": 0.9605, + "step": 29150 + }, + { + "epoch": 0.86, + "learning_rate": 6.7948793931132585e-06, + "loss": 0.7273, + "step": 29160 + }, + { + "epoch": 0.86, + "learning_rate": 6.780062822260417e-06, + "loss": 0.8115, + "step": 29170 + }, + { + "epoch": 0.86, + "learning_rate": 6.765246251407574e-06, + "loss": 0.7178, + "step": 29180 + }, + { + "epoch": 0.86, + "learning_rate": 6.750429680554734e-06, + "loss": 0.8241, + "step": 29190 + }, + { + "epoch": 0.87, + "learning_rate": 6.735613109701891e-06, + "loss": 0.6951, + "step": 29200 + }, + { + "epoch": 0.87, + "learning_rate": 6.720796538849049e-06, + "loss": 0.9257, + "step": 29210 + }, + { + "epoch": 0.87, + "learning_rate": 6.705979967996208e-06, + "loss": 0.8817, + "step": 29220 + }, + { + "epoch": 0.87, + "learning_rate": 6.691163397143366e-06, + "loss": 0.8851, + "step": 29230 + }, + { + "epoch": 0.87, + "learning_rate": 6.676346826290523e-06, + "loss": 0.7645, + "step": 29240 + }, + { + "epoch": 0.87, + "learning_rate": 6.661530255437681e-06, + "loss": 0.8227, + "step": 29250 + }, + { + "epoch": 0.87, + "learning_rate": 6.64671368458484e-06, + "loss": 0.8884, + "step": 29260 + }, + { + "epoch": 0.87, + "learning_rate": 6.631897113731998e-06, + "loss": 0.9422, + "step": 29270 + }, + { + "epoch": 0.87, + "learning_rate": 6.617080542879156e-06, + "loss": 0.7692, + "step": 29280 + }, + { + "epoch": 0.87, + "learning_rate": 6.602263972026315e-06, + "loss": 0.7218, + "step": 29290 + }, + { + "epoch": 0.87, + "learning_rate": 6.587447401173473e-06, + "loss": 0.7579, + "step": 29300 + }, + { + "epoch": 0.87, + "learning_rate": 6.5726308303206306e-06, + "loss": 0.8306, + "step": 29310 + }, + { + "epoch": 0.87, + "learning_rate": 6.557814259467789e-06, + "loss": 0.7826, + "step": 29320 + }, + { + "epoch": 0.87, + "learning_rate": 6.5429976886149476e-06, + "loss": 0.8808, + "step": 29330 + }, + { + "epoch": 0.87, + "learning_rate": 6.528181117762106e-06, + "loss": 0.7251, + "step": 29340 + }, + { + "epoch": 0.87, + "learning_rate": 6.513364546909263e-06, + "loss": 0.879, + "step": 29350 + }, + { + "epoch": 0.87, + "learning_rate": 6.498547976056423e-06, + "loss": 0.7672, + "step": 29360 + }, + { + "epoch": 0.87, + "learning_rate": 6.48373140520358e-06, + "loss": 0.8023, + "step": 29370 + }, + { + "epoch": 0.87, + "learning_rate": 6.468914834350738e-06, + "loss": 0.7502, + "step": 29380 + }, + { + "epoch": 0.87, + "learning_rate": 6.454098263497896e-06, + "loss": 0.7522, + "step": 29390 + }, + { + "epoch": 0.87, + "learning_rate": 6.439281692645055e-06, + "loss": 0.8307, + "step": 29400 + }, + { + "epoch": 0.87, + "learning_rate": 6.424465121792213e-06, + "loss": 0.8703, + "step": 29410 + }, + { + "epoch": 0.87, + "learning_rate": 6.40964855093937e-06, + "loss": 0.8241, + "step": 29420 + }, + { + "epoch": 0.87, + "learning_rate": 6.39483198008653e-06, + "loss": 0.8221, + "step": 29430 + }, + { + "epoch": 0.87, + "learning_rate": 6.380015409233687e-06, + "loss": 0.8893, + "step": 29440 + }, + { + "epoch": 0.87, + "learning_rate": 6.365198838380845e-06, + "loss": 0.8828, + "step": 29450 + }, + { + "epoch": 0.87, + "learning_rate": 6.3503822675280034e-06, + "loss": 0.8587, + "step": 29460 + }, + { + "epoch": 0.87, + "learning_rate": 6.335565696675162e-06, + "loss": 0.7406, + "step": 29470 + }, + { + "epoch": 0.87, + "learning_rate": 6.3207491258223204e-06, + "loss": 0.7206, + "step": 29480 + }, + { + "epoch": 0.87, + "learning_rate": 6.305932554969478e-06, + "loss": 0.8015, + "step": 29490 + }, + { + "epoch": 0.87, + "learning_rate": 6.291115984116637e-06, + "loss": 0.7798, + "step": 29500 + }, + { + "epoch": 0.87, + "learning_rate": 6.276299413263795e-06, + "loss": 0.8116, + "step": 29510 + }, + { + "epoch": 0.87, + "learning_rate": 6.261482842410953e-06, + "loss": 0.8226, + "step": 29520 + }, + { + "epoch": 0.88, + "learning_rate": 6.246666271558111e-06, + "loss": 0.8893, + "step": 29530 + }, + { + "epoch": 0.88, + "learning_rate": 6.231849700705269e-06, + "loss": 0.7837, + "step": 29540 + }, + { + "epoch": 0.88, + "learning_rate": 6.217033129852427e-06, + "loss": 0.6093, + "step": 29550 + }, + { + "epoch": 0.88, + "learning_rate": 6.202216558999585e-06, + "loss": 0.789, + "step": 29560 + }, + { + "epoch": 0.88, + "learning_rate": 6.187399988146743e-06, + "loss": 0.8502, + "step": 29570 + }, + { + "epoch": 0.88, + "learning_rate": 6.172583417293902e-06, + "loss": 0.8546, + "step": 29580 + }, + { + "epoch": 0.88, + "learning_rate": 6.15776684644106e-06, + "loss": 0.9091, + "step": 29590 + }, + { + "epoch": 0.88, + "learning_rate": 6.142950275588218e-06, + "loss": 0.7525, + "step": 29600 + }, + { + "epoch": 0.88, + "learning_rate": 6.128133704735376e-06, + "loss": 0.8636, + "step": 29610 + }, + { + "epoch": 0.88, + "learning_rate": 6.113317133882534e-06, + "loss": 0.7831, + "step": 29620 + }, + { + "epoch": 0.88, + "learning_rate": 6.0985005630296925e-06, + "loss": 0.833, + "step": 29630 + }, + { + "epoch": 0.88, + "learning_rate": 6.0836839921768506e-06, + "loss": 0.8562, + "step": 29640 + }, + { + "epoch": 0.88, + "learning_rate": 6.0688674213240095e-06, + "loss": 0.7481, + "step": 29650 + }, + { + "epoch": 0.88, + "learning_rate": 6.054050850471167e-06, + "loss": 0.8869, + "step": 29660 + }, + { + "epoch": 0.88, + "learning_rate": 6.039234279618326e-06, + "loss": 0.6972, + "step": 29670 + }, + { + "epoch": 0.88, + "learning_rate": 6.024417708765484e-06, + "loss": 0.8546, + "step": 29680 + }, + { + "epoch": 0.88, + "learning_rate": 6.009601137912642e-06, + "loss": 0.7603, + "step": 29690 + }, + { + "epoch": 0.88, + "learning_rate": 5.9947845670598e-06, + "loss": 0.8197, + "step": 29700 + }, + { + "epoch": 0.88, + "learning_rate": 5.979967996206958e-06, + "loss": 0.73, + "step": 29710 + }, + { + "epoch": 0.88, + "learning_rate": 5.965151425354117e-06, + "loss": 0.7717, + "step": 29720 + }, + { + "epoch": 0.88, + "learning_rate": 5.950334854501274e-06, + "loss": 0.6476, + "step": 29730 + }, + { + "epoch": 0.88, + "learning_rate": 5.935518283648433e-06, + "loss": 0.8989, + "step": 29740 + }, + { + "epoch": 0.88, + "learning_rate": 5.92070171279559e-06, + "loss": 0.9831, + "step": 29750 + }, + { + "epoch": 0.88, + "learning_rate": 5.905885141942749e-06, + "loss": 0.9013, + "step": 29760 + }, + { + "epoch": 0.88, + "learning_rate": 5.891068571089907e-06, + "loss": 0.7805, + "step": 29770 + }, + { + "epoch": 0.88, + "learning_rate": 5.876252000237065e-06, + "loss": 0.7722, + "step": 29780 + }, + { + "epoch": 0.88, + "learning_rate": 5.8614354293842235e-06, + "loss": 0.8622, + "step": 29790 + }, + { + "epoch": 0.88, + "learning_rate": 5.8466188585313815e-06, + "loss": 0.7649, + "step": 29800 + }, + { + "epoch": 0.88, + "learning_rate": 5.8318022876785405e-06, + "loss": 0.867, + "step": 29810 + }, + { + "epoch": 0.88, + "learning_rate": 5.816985716825698e-06, + "loss": 0.8248, + "step": 29820 + }, + { + "epoch": 0.88, + "learning_rate": 5.802169145972857e-06, + "loss": 0.8189, + "step": 29830 + }, + { + "epoch": 0.88, + "learning_rate": 5.787352575120014e-06, + "loss": 0.7743, + "step": 29840 + }, + { + "epoch": 0.88, + "learning_rate": 5.772536004267173e-06, + "loss": 0.8254, + "step": 29850 + }, + { + "epoch": 0.88, + "learning_rate": 5.757719433414331e-06, + "loss": 0.8056, + "step": 29860 + }, + { + "epoch": 0.89, + "learning_rate": 5.742902862561489e-06, + "loss": 0.7944, + "step": 29870 + }, + { + "epoch": 0.89, + "learning_rate": 5.728086291708647e-06, + "loss": 0.9064, + "step": 29880 + }, + { + "epoch": 0.89, + "learning_rate": 5.713269720855805e-06, + "loss": 0.8732, + "step": 29890 + }, + { + "epoch": 0.89, + "learning_rate": 5.698453150002964e-06, + "loss": 0.804, + "step": 29900 + }, + { + "epoch": 0.89, + "learning_rate": 5.683636579150121e-06, + "loss": 0.9746, + "step": 29910 + }, + { + "epoch": 0.89, + "learning_rate": 5.66882000829728e-06, + "loss": 0.8479, + "step": 29920 + }, + { + "epoch": 0.89, + "learning_rate": 5.654003437444438e-06, + "loss": 0.8559, + "step": 29930 + }, + { + "epoch": 0.89, + "learning_rate": 5.639186866591596e-06, + "loss": 0.8752, + "step": 29940 + }, + { + "epoch": 0.89, + "learning_rate": 5.624370295738754e-06, + "loss": 0.7864, + "step": 29950 + }, + { + "epoch": 0.89, + "learning_rate": 5.6095537248859125e-06, + "loss": 0.8694, + "step": 29960 + }, + { + "epoch": 0.89, + "learning_rate": 5.594737154033071e-06, + "loss": 0.8, + "step": 29970 + }, + { + "epoch": 0.89, + "learning_rate": 5.579920583180229e-06, + "loss": 0.8002, + "step": 29980 + }, + { + "epoch": 0.89, + "learning_rate": 5.565104012327388e-06, + "loss": 0.895, + "step": 29990 + }, + { + "epoch": 0.89, + "learning_rate": 5.550287441474546e-06, + "loss": 0.7581, + "step": 30000 + }, + { + "epoch": 0.89, + "learning_rate": 5.535470870621704e-06, + "loss": 0.9158, + "step": 30010 + }, + { + "epoch": 0.89, + "learning_rate": 5.520654299768862e-06, + "loss": 0.9058, + "step": 30020 + }, + { + "epoch": 0.89, + "learning_rate": 5.50583772891602e-06, + "loss": 0.8668, + "step": 30030 + }, + { + "epoch": 0.89, + "learning_rate": 5.491021158063178e-06, + "loss": 0.7331, + "step": 30040 + }, + { + "epoch": 0.89, + "learning_rate": 5.476204587210336e-06, + "loss": 0.8737, + "step": 30050 + }, + { + "epoch": 0.89, + "learning_rate": 5.461388016357494e-06, + "loss": 0.7617, + "step": 30060 + }, + { + "epoch": 0.89, + "learning_rate": 5.446571445504653e-06, + "loss": 0.8632, + "step": 30070 + }, + { + "epoch": 0.89, + "learning_rate": 5.43175487465181e-06, + "loss": 0.8363, + "step": 30080 + }, + { + "epoch": 0.89, + "learning_rate": 5.416938303798969e-06, + "loss": 0.8232, + "step": 30090 + }, + { + "epoch": 0.89, + "learning_rate": 5.402121732946127e-06, + "loss": 0.8688, + "step": 30100 + }, + { + "epoch": 0.89, + "learning_rate": 5.387305162093285e-06, + "loss": 0.7766, + "step": 30110 + }, + { + "epoch": 0.89, + "learning_rate": 5.3724885912404435e-06, + "loss": 0.8947, + "step": 30120 + }, + { + "epoch": 0.89, + "learning_rate": 5.3576720203876015e-06, + "loss": 0.6847, + "step": 30130 + }, + { + "epoch": 0.89, + "learning_rate": 5.3428554495347605e-06, + "loss": 0.7965, + "step": 30140 + }, + { + "epoch": 0.89, + "learning_rate": 5.328038878681918e-06, + "loss": 0.8281, + "step": 30150 + }, + { + "epoch": 0.89, + "learning_rate": 5.313222307829077e-06, + "loss": 1.0558, + "step": 30160 + }, + { + "epoch": 0.89, + "learning_rate": 5.298405736976234e-06, + "loss": 0.8634, + "step": 30170 + }, + { + "epoch": 0.89, + "learning_rate": 5.283589166123393e-06, + "loss": 0.7995, + "step": 30180 + }, + { + "epoch": 0.89, + "learning_rate": 5.268772595270551e-06, + "loss": 0.7261, + "step": 30190 + }, + { + "epoch": 0.89, + "learning_rate": 5.253956024417709e-06, + "loss": 0.8498, + "step": 30200 + }, + { + "epoch": 0.9, + "learning_rate": 5.239139453564868e-06, + "loss": 0.8425, + "step": 30210 + }, + { + "epoch": 0.9, + "learning_rate": 5.224322882712025e-06, + "loss": 0.926, + "step": 30220 + }, + { + "epoch": 0.9, + "learning_rate": 5.209506311859184e-06, + "loss": 0.726, + "step": 30230 + }, + { + "epoch": 0.9, + "learning_rate": 5.194689741006341e-06, + "loss": 0.8751, + "step": 30240 + }, + { + "epoch": 0.9, + "learning_rate": 5.1798731701535e-06, + "loss": 0.8552, + "step": 30250 + }, + { + "epoch": 0.9, + "learning_rate": 5.165056599300657e-06, + "loss": 0.7732, + "step": 30260 + }, + { + "epoch": 0.9, + "learning_rate": 5.150240028447816e-06, + "loss": 0.8612, + "step": 30270 + }, + { + "epoch": 0.9, + "learning_rate": 5.1354234575949744e-06, + "loss": 0.685, + "step": 30280 + }, + { + "epoch": 0.9, + "learning_rate": 5.1206068867421325e-06, + "loss": 0.799, + "step": 30290 + }, + { + "epoch": 0.9, + "learning_rate": 5.105790315889291e-06, + "loss": 0.74, + "step": 30300 + }, + { + "epoch": 0.9, + "learning_rate": 5.090973745036449e-06, + "loss": 0.8859, + "step": 30310 + }, + { + "epoch": 0.9, + "learning_rate": 5.076157174183608e-06, + "loss": 0.7469, + "step": 30320 + }, + { + "epoch": 0.9, + "learning_rate": 5.061340603330765e-06, + "loss": 0.7738, + "step": 30330 + }, + { + "epoch": 0.9, + "learning_rate": 5.046524032477924e-06, + "loss": 0.8269, + "step": 30340 + }, + { + "epoch": 0.9, + "learning_rate": 5.031707461625082e-06, + "loss": 0.9851, + "step": 30350 + }, + { + "epoch": 0.9, + "learning_rate": 5.01689089077224e-06, + "loss": 0.7928, + "step": 30360 + }, + { + "epoch": 0.9, + "learning_rate": 5.002074319919398e-06, + "loss": 0.7639, + "step": 30370 + }, + { + "epoch": 0.9, + "learning_rate": 4.987257749066556e-06, + "loss": 0.9153, + "step": 30380 + }, + { + "epoch": 0.9, + "learning_rate": 4.972441178213714e-06, + "loss": 0.9317, + "step": 30390 + }, + { + "epoch": 0.9, + "learning_rate": 4.957624607360872e-06, + "loss": 0.8111, + "step": 30400 + }, + { + "epoch": 0.9, + "learning_rate": 4.942808036508031e-06, + "loss": 0.8389, + "step": 30410 + }, + { + "epoch": 0.9, + "learning_rate": 4.927991465655189e-06, + "loss": 0.8964, + "step": 30420 + }, + { + "epoch": 0.9, + "learning_rate": 4.913174894802347e-06, + "loss": 0.729, + "step": 30430 + }, + { + "epoch": 0.9, + "learning_rate": 4.898358323949505e-06, + "loss": 0.8435, + "step": 30440 + }, + { + "epoch": 0.9, + "learning_rate": 4.8835417530966635e-06, + "loss": 0.6626, + "step": 30450 + }, + { + "epoch": 0.9, + "learning_rate": 4.8687251822438216e-06, + "loss": 0.7776, + "step": 30460 + }, + { + "epoch": 0.9, + "learning_rate": 4.85390861139098e-06, + "loss": 0.731, + "step": 30470 + }, + { + "epoch": 0.9, + "learning_rate": 4.839092040538138e-06, + "loss": 0.7716, + "step": 30480 + }, + { + "epoch": 0.9, + "learning_rate": 4.824275469685297e-06, + "loss": 0.8201, + "step": 30490 + }, + { + "epoch": 0.9, + "learning_rate": 4.809458898832455e-06, + "loss": 0.8544, + "step": 30500 + }, + { + "epoch": 0.9, + "learning_rate": 4.794642327979613e-06, + "loss": 0.8127, + "step": 30510 + }, + { + "epoch": 0.9, + "learning_rate": 4.779825757126771e-06, + "loss": 0.7945, + "step": 30520 + }, + { + "epoch": 0.9, + "learning_rate": 4.765009186273929e-06, + "loss": 0.7658, + "step": 30530 + }, + { + "epoch": 0.9, + "learning_rate": 4.750192615421087e-06, + "loss": 0.709, + "step": 30540 + }, + { + "epoch": 0.91, + "learning_rate": 4.735376044568245e-06, + "loss": 0.7834, + "step": 30550 + }, + { + "epoch": 0.91, + "learning_rate": 4.720559473715404e-06, + "loss": 0.7525, + "step": 30560 + }, + { + "epoch": 0.91, + "learning_rate": 4.705742902862561e-06, + "loss": 0.8995, + "step": 30570 + }, + { + "epoch": 0.91, + "learning_rate": 4.69092633200972e-06, + "loss": 0.6898, + "step": 30580 + }, + { + "epoch": 0.91, + "learning_rate": 4.6761097611568774e-06, + "loss": 0.7307, + "step": 30590 + }, + { + "epoch": 0.91, + "learning_rate": 4.661293190304036e-06, + "loss": 0.764, + "step": 30600 + }, + { + "epoch": 0.91, + "learning_rate": 4.6464766194511944e-06, + "loss": 0.7606, + "step": 30610 + }, + { + "epoch": 0.91, + "learning_rate": 4.6316600485983525e-06, + "loss": 0.8327, + "step": 30620 + }, + { + "epoch": 0.91, + "learning_rate": 4.6168434777455114e-06, + "loss": 0.7644, + "step": 30630 + }, + { + "epoch": 0.91, + "learning_rate": 4.602026906892669e-06, + "loss": 0.6861, + "step": 30640 + }, + { + "epoch": 0.91, + "learning_rate": 4.587210336039828e-06, + "loss": 0.7229, + "step": 30650 + }, + { + "epoch": 0.91, + "learning_rate": 4.572393765186985e-06, + "loss": 0.79, + "step": 30660 + }, + { + "epoch": 0.91, + "learning_rate": 4.557577194334144e-06, + "loss": 0.9027, + "step": 30670 + }, + { + "epoch": 0.91, + "learning_rate": 4.542760623481301e-06, + "loss": 0.8741, + "step": 30680 + }, + { + "epoch": 0.91, + "learning_rate": 4.52794405262846e-06, + "loss": 0.9217, + "step": 30690 + }, + { + "epoch": 0.91, + "learning_rate": 4.513127481775618e-06, + "loss": 0.8175, + "step": 30700 + }, + { + "epoch": 0.91, + "learning_rate": 4.498310910922776e-06, + "loss": 0.7815, + "step": 30710 + }, + { + "epoch": 0.91, + "learning_rate": 4.483494340069935e-06, + "loss": 0.7731, + "step": 30720 + }, + { + "epoch": 0.91, + "learning_rate": 4.468677769217092e-06, + "loss": 0.7689, + "step": 30730 + }, + { + "epoch": 0.91, + "learning_rate": 4.453861198364251e-06, + "loss": 0.8046, + "step": 30740 + }, + { + "epoch": 0.91, + "learning_rate": 4.439044627511408e-06, + "loss": 0.8386, + "step": 30750 + }, + { + "epoch": 0.91, + "learning_rate": 4.424228056658567e-06, + "loss": 0.7341, + "step": 30760 + }, + { + "epoch": 0.91, + "learning_rate": 4.409411485805725e-06, + "loss": 0.9079, + "step": 30770 + }, + { + "epoch": 0.91, + "learning_rate": 4.3945949149528835e-06, + "loss": 0.8967, + "step": 30780 + }, + { + "epoch": 0.91, + "learning_rate": 4.3797783441000416e-06, + "loss": 1.0217, + "step": 30790 + }, + { + "epoch": 0.91, + "learning_rate": 4.3649617732472e-06, + "loss": 0.7746, + "step": 30800 + }, + { + "epoch": 0.91, + "learning_rate": 4.350145202394358e-06, + "loss": 0.7299, + "step": 30810 + }, + { + "epoch": 0.91, + "learning_rate": 4.335328631541516e-06, + "loss": 0.8941, + "step": 30820 + }, + { + "epoch": 0.91, + "learning_rate": 4.320512060688675e-06, + "loss": 0.812, + "step": 30830 + }, + { + "epoch": 0.91, + "learning_rate": 4.305695489835833e-06, + "loss": 0.9509, + "step": 30840 + }, + { + "epoch": 0.91, + "learning_rate": 4.290878918982991e-06, + "loss": 0.779, + "step": 30850 + }, + { + "epoch": 0.91, + "learning_rate": 4.276062348130149e-06, + "loss": 0.8534, + "step": 30860 + }, + { + "epoch": 0.91, + "learning_rate": 4.261245777277307e-06, + "loss": 0.6726, + "step": 30870 + }, + { + "epoch": 0.92, + "learning_rate": 4.246429206424465e-06, + "loss": 0.8623, + "step": 30880 + }, + { + "epoch": 0.92, + "learning_rate": 4.231612635571623e-06, + "loss": 0.7388, + "step": 30890 + }, + { + "epoch": 0.92, + "learning_rate": 4.216796064718781e-06, + "loss": 0.7825, + "step": 30900 + }, + { + "epoch": 0.92, + "learning_rate": 4.20197949386594e-06, + "loss": 0.719, + "step": 30910 + }, + { + "epoch": 0.92, + "learning_rate": 4.187162923013098e-06, + "loss": 0.9652, + "step": 30920 + }, + { + "epoch": 0.92, + "learning_rate": 4.172346352160256e-06, + "loss": 0.8504, + "step": 30930 + }, + { + "epoch": 0.92, + "learning_rate": 4.1575297813074144e-06, + "loss": 0.809, + "step": 30940 + }, + { + "epoch": 0.92, + "learning_rate": 4.1427132104545725e-06, + "loss": 0.857, + "step": 30950 + }, + { + "epoch": 0.92, + "learning_rate": 4.127896639601731e-06, + "loss": 0.7532, + "step": 30960 + }, + { + "epoch": 0.92, + "learning_rate": 4.113080068748889e-06, + "loss": 0.861, + "step": 30970 + }, + { + "epoch": 0.92, + "learning_rate": 4.098263497896048e-06, + "loss": 0.8542, + "step": 30980 + }, + { + "epoch": 0.92, + "learning_rate": 4.083446927043205e-06, + "loss": 0.707, + "step": 30990 + }, + { + "epoch": 0.92, + "learning_rate": 4.068630356190364e-06, + "loss": 0.7639, + "step": 31000 + }, + { + "epoch": 0.92, + "learning_rate": 4.053813785337522e-06, + "loss": 0.8398, + "step": 31010 + }, + { + "epoch": 0.92, + "learning_rate": 4.03899721448468e-06, + "loss": 0.846, + "step": 31020 + }, + { + "epoch": 0.92, + "learning_rate": 4.024180643631838e-06, + "loss": 0.88, + "step": 31030 + }, + { + "epoch": 0.92, + "learning_rate": 4.009364072778996e-06, + "loss": 0.8333, + "step": 31040 + }, + { + "epoch": 0.92, + "learning_rate": 3.994547501926155e-06, + "loss": 0.8985, + "step": 31050 + }, + { + "epoch": 0.92, + "learning_rate": 3.979730931073312e-06, + "loss": 0.7808, + "step": 31060 + }, + { + "epoch": 0.92, + "learning_rate": 3.964914360220471e-06, + "loss": 0.9343, + "step": 31070 + }, + { + "epoch": 0.92, + "learning_rate": 3.950097789367628e-06, + "loss": 0.8095, + "step": 31080 + }, + { + "epoch": 0.92, + "learning_rate": 3.935281218514787e-06, + "loss": 0.9956, + "step": 31090 + }, + { + "epoch": 0.92, + "learning_rate": 3.920464647661945e-06, + "loss": 0.8299, + "step": 31100 + }, + { + "epoch": 0.92, + "learning_rate": 3.9056480768091035e-06, + "loss": 0.8431, + "step": 31110 + }, + { + "epoch": 0.92, + "learning_rate": 3.8908315059562616e-06, + "loss": 0.8308, + "step": 31120 + }, + { + "epoch": 0.92, + "learning_rate": 3.87601493510342e-06, + "loss": 0.7654, + "step": 31130 + }, + { + "epoch": 0.92, + "learning_rate": 3.861198364250579e-06, + "loss": 0.8367, + "step": 31140 + }, + { + "epoch": 0.92, + "learning_rate": 3.846381793397736e-06, + "loss": 0.8608, + "step": 31150 + }, + { + "epoch": 0.92, + "learning_rate": 3.831565222544895e-06, + "loss": 0.8565, + "step": 31160 + }, + { + "epoch": 0.92, + "learning_rate": 3.816748651692052e-06, + "loss": 0.7515, + "step": 31170 + }, + { + "epoch": 0.92, + "learning_rate": 3.801932080839211e-06, + "loss": 0.7589, + "step": 31180 + }, + { + "epoch": 0.92, + "learning_rate": 3.7871155099863694e-06, + "loss": 0.7388, + "step": 31190 + }, + { + "epoch": 0.92, + "learning_rate": 3.772298939133527e-06, + "loss": 0.8201, + "step": 31200 + }, + { + "epoch": 0.92, + "learning_rate": 3.7574823682806856e-06, + "loss": 0.7638, + "step": 31210 + }, + { + "epoch": 0.93, + "learning_rate": 3.7426657974278432e-06, + "loss": 0.8043, + "step": 31220 + }, + { + "epoch": 0.93, + "learning_rate": 3.7278492265750017e-06, + "loss": 0.747, + "step": 31230 + }, + { + "epoch": 0.93, + "learning_rate": 3.7130326557221594e-06, + "loss": 0.7445, + "step": 31240 + }, + { + "epoch": 0.93, + "learning_rate": 3.698216084869318e-06, + "loss": 0.8245, + "step": 31250 + }, + { + "epoch": 0.93, + "learning_rate": 3.6833995140164764e-06, + "loss": 0.8488, + "step": 31260 + }, + { + "epoch": 0.93, + "learning_rate": 3.6685829431636345e-06, + "loss": 0.9464, + "step": 31270 + }, + { + "epoch": 0.93, + "learning_rate": 3.6537663723107925e-06, + "loss": 0.7752, + "step": 31280 + }, + { + "epoch": 0.93, + "learning_rate": 3.6389498014579506e-06, + "loss": 0.821, + "step": 31290 + }, + { + "epoch": 0.93, + "learning_rate": 3.624133230605109e-06, + "loss": 0.8872, + "step": 31300 + }, + { + "epoch": 0.93, + "learning_rate": 3.6093166597522668e-06, + "loss": 0.8284, + "step": 31310 + }, + { + "epoch": 0.93, + "learning_rate": 3.5945000888994253e-06, + "loss": 0.9218, + "step": 31320 + }, + { + "epoch": 0.93, + "learning_rate": 3.5796835180465838e-06, + "loss": 0.7825, + "step": 31330 + }, + { + "epoch": 0.93, + "learning_rate": 3.5648669471937414e-06, + "loss": 0.8917, + "step": 31340 + }, + { + "epoch": 0.93, + "learning_rate": 3.5500503763409e-06, + "loss": 0.6889, + "step": 31350 + }, + { + "epoch": 0.93, + "learning_rate": 3.5352338054880576e-06, + "loss": 0.8553, + "step": 31360 + }, + { + "epoch": 0.93, + "learning_rate": 3.520417234635216e-06, + "loss": 0.858, + "step": 31370 + }, + { + "epoch": 0.93, + "learning_rate": 3.505600663782374e-06, + "loss": 0.7792, + "step": 31380 + }, + { + "epoch": 0.93, + "learning_rate": 3.4907840929295327e-06, + "loss": 0.7633, + "step": 31390 + }, + { + "epoch": 0.93, + "learning_rate": 3.475967522076691e-06, + "loss": 0.7359, + "step": 31400 + }, + { + "epoch": 0.93, + "learning_rate": 3.461150951223849e-06, + "loss": 0.9595, + "step": 31410 + }, + { + "epoch": 0.93, + "learning_rate": 3.4463343803710073e-06, + "loss": 0.8637, + "step": 31420 + }, + { + "epoch": 0.93, + "learning_rate": 3.431517809518165e-06, + "loss": 0.8134, + "step": 31430 + }, + { + "epoch": 0.93, + "learning_rate": 3.4167012386653235e-06, + "loss": 0.8257, + "step": 31440 + }, + { + "epoch": 0.93, + "learning_rate": 3.401884667812481e-06, + "loss": 0.8745, + "step": 31450 + }, + { + "epoch": 0.93, + "learning_rate": 3.3870680969596397e-06, + "loss": 0.8975, + "step": 31460 + }, + { + "epoch": 0.93, + "learning_rate": 3.372251526106798e-06, + "loss": 0.849, + "step": 31470 + }, + { + "epoch": 0.93, + "learning_rate": 3.3574349552539562e-06, + "loss": 0.8976, + "step": 31480 + }, + { + "epoch": 0.93, + "learning_rate": 3.3426183844011147e-06, + "loss": 0.7464, + "step": 31490 + }, + { + "epoch": 0.93, + "learning_rate": 3.3278018135482724e-06, + "loss": 0.7268, + "step": 31500 + }, + { + "epoch": 0.93, + "learning_rate": 3.312985242695431e-06, + "loss": 0.8978, + "step": 31510 + }, + { + "epoch": 0.93, + "learning_rate": 3.2981686718425886e-06, + "loss": 0.9268, + "step": 31520 + }, + { + "epoch": 0.93, + "learning_rate": 3.283352100989747e-06, + "loss": 0.8545, + "step": 31530 + }, + { + "epoch": 0.93, + "learning_rate": 3.2685355301369056e-06, + "loss": 0.8155, + "step": 31540 + }, + { + "epoch": 0.93, + "learning_rate": 3.2537189592840632e-06, + "loss": 0.7821, + "step": 31550 + }, + { + "epoch": 0.94, + "learning_rate": 3.2389023884312217e-06, + "loss": 0.8581, + "step": 31560 + }, + { + "epoch": 0.94, + "learning_rate": 3.22408581757838e-06, + "loss": 0.7314, + "step": 31570 + }, + { + "epoch": 0.94, + "learning_rate": 3.209269246725538e-06, + "loss": 0.7354, + "step": 31580 + }, + { + "epoch": 0.94, + "learning_rate": 3.194452675872696e-06, + "loss": 0.7087, + "step": 31590 + }, + { + "epoch": 0.94, + "learning_rate": 3.1796361050198545e-06, + "loss": 0.8197, + "step": 31600 + }, + { + "epoch": 0.94, + "learning_rate": 3.164819534167013e-06, + "loss": 0.8743, + "step": 31610 + }, + { + "epoch": 0.94, + "learning_rate": 3.1500029633141706e-06, + "loss": 0.8032, + "step": 31620 + }, + { + "epoch": 0.94, + "learning_rate": 3.135186392461329e-06, + "loss": 0.8348, + "step": 31630 + }, + { + "epoch": 0.94, + "learning_rate": 3.120369821608487e-06, + "loss": 1.0197, + "step": 31640 + }, + { + "epoch": 0.94, + "learning_rate": 3.1055532507556453e-06, + "loss": 0.8379, + "step": 31650 + }, + { + "epoch": 0.94, + "learning_rate": 3.0907366799028034e-06, + "loss": 0.9561, + "step": 31660 + }, + { + "epoch": 0.94, + "learning_rate": 3.0759201090499614e-06, + "loss": 0.745, + "step": 31670 + }, + { + "epoch": 0.94, + "learning_rate": 3.06110353819712e-06, + "loss": 0.8192, + "step": 31680 + }, + { + "epoch": 0.94, + "learning_rate": 3.046286967344278e-06, + "loss": 0.7573, + "step": 31690 + }, + { + "epoch": 0.94, + "learning_rate": 3.031470396491436e-06, + "loss": 0.7973, + "step": 31700 + }, + { + "epoch": 0.94, + "learning_rate": 3.0166538256385946e-06, + "loss": 0.8163, + "step": 31710 + }, + { + "epoch": 0.94, + "learning_rate": 3.0018372547857527e-06, + "loss": 0.6931, + "step": 31720 + }, + { + "epoch": 0.94, + "learning_rate": 2.9870206839329108e-06, + "loss": 0.7237, + "step": 31730 + }, + { + "epoch": 0.94, + "learning_rate": 2.972204113080069e-06, + "loss": 0.8131, + "step": 31740 + }, + { + "epoch": 0.94, + "learning_rate": 2.957387542227227e-06, + "loss": 0.7158, + "step": 31750 + }, + { + "epoch": 0.94, + "learning_rate": 2.942570971374385e-06, + "loss": 0.9403, + "step": 31760 + }, + { + "epoch": 0.94, + "learning_rate": 2.927754400521543e-06, + "loss": 0.9201, + "step": 31770 + }, + { + "epoch": 0.94, + "learning_rate": 2.9129378296687016e-06, + "loss": 0.9178, + "step": 31780 + }, + { + "epoch": 0.94, + "learning_rate": 2.89812125881586e-06, + "loss": 0.9062, + "step": 31790 + }, + { + "epoch": 0.94, + "learning_rate": 2.883304687963018e-06, + "loss": 0.8674, + "step": 31800 + }, + { + "epoch": 0.94, + "learning_rate": 2.8684881171101763e-06, + "loss": 0.7798, + "step": 31810 + }, + { + "epoch": 0.94, + "learning_rate": 2.8536715462573343e-06, + "loss": 0.7193, + "step": 31820 + }, + { + "epoch": 0.94, + "learning_rate": 2.8388549754044924e-06, + "loss": 0.7906, + "step": 31830 + }, + { + "epoch": 0.94, + "learning_rate": 2.8240384045516505e-06, + "loss": 0.8077, + "step": 31840 + }, + { + "epoch": 0.94, + "learning_rate": 2.809221833698809e-06, + "loss": 0.7481, + "step": 31850 + }, + { + "epoch": 0.94, + "learning_rate": 2.794405262845967e-06, + "loss": 0.8428, + "step": 31860 + }, + { + "epoch": 0.94, + "learning_rate": 2.779588691993125e-06, + "loss": 0.7441, + "step": 31870 + }, + { + "epoch": 0.94, + "learning_rate": 2.7647721211402832e-06, + "loss": 0.7401, + "step": 31880 + }, + { + "epoch": 0.95, + "learning_rate": 2.7499555502874417e-06, + "loss": 0.869, + "step": 31890 + }, + { + "epoch": 0.95, + "learning_rate": 2.7351389794346e-06, + "loss": 0.8421, + "step": 31900 + }, + { + "epoch": 0.95, + "learning_rate": 2.720322408581758e-06, + "loss": 0.783, + "step": 31910 + }, + { + "epoch": 0.95, + "learning_rate": 2.7055058377289164e-06, + "loss": 0.8691, + "step": 31920 + }, + { + "epoch": 0.95, + "learning_rate": 2.6906892668760745e-06, + "loss": 0.6008, + "step": 31930 + }, + { + "epoch": 0.95, + "learning_rate": 2.6758726960232326e-06, + "loss": 0.8923, + "step": 31940 + }, + { + "epoch": 0.95, + "learning_rate": 2.6610561251703906e-06, + "loss": 0.8449, + "step": 31950 + }, + { + "epoch": 0.95, + "learning_rate": 2.6462395543175487e-06, + "loss": 0.7947, + "step": 31960 + }, + { + "epoch": 0.95, + "learning_rate": 2.631422983464707e-06, + "loss": 0.7598, + "step": 31970 + }, + { + "epoch": 0.95, + "learning_rate": 2.616606412611865e-06, + "loss": 0.8125, + "step": 31980 + }, + { + "epoch": 0.95, + "learning_rate": 2.6017898417590234e-06, + "loss": 0.8404, + "step": 31990 + }, + { + "epoch": 0.95, + "learning_rate": 2.586973270906182e-06, + "loss": 0.8872, + "step": 32000 + }, + { + "epoch": 0.95, + "learning_rate": 2.57215670005334e-06, + "loss": 0.9069, + "step": 32010 + }, + { + "epoch": 0.95, + "learning_rate": 2.557340129200498e-06, + "loss": 0.7658, + "step": 32020 + }, + { + "epoch": 0.95, + "learning_rate": 2.542523558347656e-06, + "loss": 0.9541, + "step": 32030 + }, + { + "epoch": 0.95, + "learning_rate": 2.527706987494814e-06, + "loss": 0.8887, + "step": 32040 + }, + { + "epoch": 0.95, + "learning_rate": 2.5128904166419723e-06, + "loss": 0.9671, + "step": 32050 + }, + { + "epoch": 0.95, + "learning_rate": 2.4980738457891308e-06, + "loss": 0.8185, + "step": 32060 + }, + { + "epoch": 0.95, + "learning_rate": 2.483257274936289e-06, + "loss": 0.8478, + "step": 32070 + }, + { + "epoch": 0.95, + "learning_rate": 2.468440704083447e-06, + "loss": 0.7373, + "step": 32080 + }, + { + "epoch": 0.95, + "learning_rate": 2.453624133230605e-06, + "loss": 0.7813, + "step": 32090 + }, + { + "epoch": 0.95, + "learning_rate": 2.4388075623777635e-06, + "loss": 0.8227, + "step": 32100 + }, + { + "epoch": 0.95, + "learning_rate": 2.4239909915249216e-06, + "loss": 0.734, + "step": 32110 + }, + { + "epoch": 0.95, + "learning_rate": 2.4091744206720797e-06, + "loss": 0.8542, + "step": 32120 + }, + { + "epoch": 0.95, + "learning_rate": 2.394357849819238e-06, + "loss": 0.7181, + "step": 32130 + }, + { + "epoch": 0.95, + "learning_rate": 2.3795412789663963e-06, + "loss": 0.8298, + "step": 32140 + }, + { + "epoch": 0.95, + "learning_rate": 2.3647247081135543e-06, + "loss": 0.8034, + "step": 32150 + }, + { + "epoch": 0.95, + "learning_rate": 2.3499081372607124e-06, + "loss": 0.8093, + "step": 32160 + }, + { + "epoch": 0.95, + "learning_rate": 2.3350915664078705e-06, + "loss": 0.8805, + "step": 32170 + }, + { + "epoch": 0.95, + "learning_rate": 2.3202749955550286e-06, + "loss": 0.8055, + "step": 32180 + }, + { + "epoch": 0.95, + "learning_rate": 2.305458424702187e-06, + "loss": 0.8558, + "step": 32190 + }, + { + "epoch": 0.95, + "learning_rate": 2.290641853849345e-06, + "loss": 0.8407, + "step": 32200 + }, + { + "epoch": 0.95, + "learning_rate": 2.2758252829965037e-06, + "loss": 0.9348, + "step": 32210 + }, + { + "epoch": 0.95, + "learning_rate": 2.2610087121436617e-06, + "loss": 0.779, + "step": 32220 + }, + { + "epoch": 0.96, + "learning_rate": 2.24619214129082e-06, + "loss": 0.6843, + "step": 32230 + }, + { + "epoch": 0.96, + "learning_rate": 2.231375570437978e-06, + "loss": 0.8221, + "step": 32240 + }, + { + "epoch": 0.96, + "learning_rate": 2.216558999585136e-06, + "loss": 0.8315, + "step": 32250 + }, + { + "epoch": 0.96, + "learning_rate": 2.201742428732294e-06, + "loss": 0.7218, + "step": 32260 + }, + { + "epoch": 0.96, + "learning_rate": 2.1869258578794526e-06, + "loss": 0.9209, + "step": 32270 + }, + { + "epoch": 0.96, + "learning_rate": 2.1721092870266106e-06, + "loss": 0.8556, + "step": 32280 + }, + { + "epoch": 0.96, + "learning_rate": 2.1572927161737687e-06, + "loss": 0.9266, + "step": 32290 + }, + { + "epoch": 0.96, + "learning_rate": 2.1424761453209272e-06, + "loss": 0.8504, + "step": 32300 + }, + { + "epoch": 0.96, + "learning_rate": 2.1276595744680853e-06, + "loss": 0.7773, + "step": 32310 + }, + { + "epoch": 0.96, + "learning_rate": 2.1128430036152434e-06, + "loss": 0.878, + "step": 32320 + }, + { + "epoch": 0.96, + "learning_rate": 2.0980264327624015e-06, + "loss": 0.8129, + "step": 32330 + }, + { + "epoch": 0.96, + "learning_rate": 2.08320986190956e-06, + "loss": 0.8973, + "step": 32340 + }, + { + "epoch": 0.96, + "learning_rate": 2.068393291056718e-06, + "loss": 0.9417, + "step": 32350 + }, + { + "epoch": 0.96, + "learning_rate": 2.053576720203876e-06, + "loss": 0.7483, + "step": 32360 + }, + { + "epoch": 0.96, + "learning_rate": 2.038760149351034e-06, + "loss": 0.6655, + "step": 32370 + }, + { + "epoch": 0.96, + "learning_rate": 2.0239435784981923e-06, + "loss": 0.9379, + "step": 32380 + }, + { + "epoch": 0.96, + "learning_rate": 2.0091270076453504e-06, + "loss": 0.8425, + "step": 32390 + }, + { + "epoch": 0.96, + "learning_rate": 1.994310436792509e-06, + "loss": 0.827, + "step": 32400 + }, + { + "epoch": 0.96, + "learning_rate": 1.9794938659396674e-06, + "loss": 0.8824, + "step": 32410 + }, + { + "epoch": 0.96, + "learning_rate": 1.9646772950868255e-06, + "loss": 0.8184, + "step": 32420 + }, + { + "epoch": 0.96, + "learning_rate": 1.9498607242339835e-06, + "loss": 0.7644, + "step": 32430 + }, + { + "epoch": 0.96, + "learning_rate": 1.9350441533811416e-06, + "loss": 0.8675, + "step": 32440 + }, + { + "epoch": 0.96, + "learning_rate": 1.9202275825282997e-06, + "loss": 0.787, + "step": 32450 + }, + { + "epoch": 0.96, + "learning_rate": 1.9054110116754578e-06, + "loss": 0.8345, + "step": 32460 + }, + { + "epoch": 0.96, + "learning_rate": 1.890594440822616e-06, + "loss": 0.8468, + "step": 32470 + }, + { + "epoch": 0.96, + "learning_rate": 1.8757778699697744e-06, + "loss": 0.7413, + "step": 32480 + }, + { + "epoch": 0.96, + "learning_rate": 1.8609612991169326e-06, + "loss": 0.7738, + "step": 32490 + }, + { + "epoch": 0.96, + "learning_rate": 1.8461447282640907e-06, + "loss": 0.7922, + "step": 32500 + }, + { + "epoch": 0.96, + "learning_rate": 1.8313281574112488e-06, + "loss": 0.8119, + "step": 32510 + }, + { + "epoch": 0.96, + "learning_rate": 1.8165115865584069e-06, + "loss": 0.8329, + "step": 32520 + }, + { + "epoch": 0.96, + "learning_rate": 1.8016950157055652e-06, + "loss": 0.6763, + "step": 32530 + }, + { + "epoch": 0.96, + "learning_rate": 1.7868784448527233e-06, + "loss": 0.6897, + "step": 32540 + }, + { + "epoch": 0.96, + "learning_rate": 1.7720618739998818e-06, + "loss": 0.854, + "step": 32550 + }, + { + "epoch": 0.96, + "learning_rate": 1.7572453031470398e-06, + "loss": 0.775, + "step": 32560 + }, + { + "epoch": 0.97, + "learning_rate": 1.742428732294198e-06, + "loss": 0.9592, + "step": 32570 + }, + { + "epoch": 0.97, + "learning_rate": 1.727612161441356e-06, + "loss": 0.7021, + "step": 32580 + }, + { + "epoch": 0.97, + "learning_rate": 1.7127955905885143e-06, + "loss": 0.8669, + "step": 32590 + }, + { + "epoch": 0.97, + "learning_rate": 1.6979790197356724e-06, + "loss": 0.9422, + "step": 32600 + }, + { + "epoch": 0.97, + "learning_rate": 1.6831624488828304e-06, + "loss": 0.7729, + "step": 32610 + }, + { + "epoch": 0.97, + "learning_rate": 1.668345878029989e-06, + "loss": 0.8819, + "step": 32620 + }, + { + "epoch": 0.97, + "learning_rate": 1.653529307177147e-06, + "loss": 0.8327, + "step": 32630 + }, + { + "epoch": 0.97, + "learning_rate": 1.6387127363243053e-06, + "loss": 0.7456, + "step": 32640 + }, + { + "epoch": 0.97, + "learning_rate": 1.6238961654714634e-06, + "loss": 0.8397, + "step": 32650 + }, + { + "epoch": 0.97, + "learning_rate": 1.6090795946186215e-06, + "loss": 0.8461, + "step": 32660 + }, + { + "epoch": 0.97, + "learning_rate": 1.5942630237657796e-06, + "loss": 0.8236, + "step": 32670 + }, + { + "epoch": 0.97, + "learning_rate": 1.5794464529129378e-06, + "loss": 0.9109, + "step": 32680 + }, + { + "epoch": 0.97, + "learning_rate": 1.5646298820600961e-06, + "loss": 0.7619, + "step": 32690 + }, + { + "epoch": 0.97, + "learning_rate": 1.5498133112072542e-06, + "loss": 0.7171, + "step": 32700 + }, + { + "epoch": 0.97, + "learning_rate": 1.5349967403544125e-06, + "loss": 0.7824, + "step": 32710 + }, + { + "epoch": 0.97, + "learning_rate": 1.5201801695015706e-06, + "loss": 0.8839, + "step": 32720 + }, + { + "epoch": 0.97, + "learning_rate": 1.5053635986487287e-06, + "loss": 0.7877, + "step": 32730 + }, + { + "epoch": 0.97, + "learning_rate": 1.4905470277958872e-06, + "loss": 0.7949, + "step": 32740 + }, + { + "epoch": 0.97, + "learning_rate": 1.4757304569430452e-06, + "loss": 0.7999, + "step": 32750 + }, + { + "epoch": 0.97, + "learning_rate": 1.4609138860902033e-06, + "loss": 0.8508, + "step": 32760 + }, + { + "epoch": 0.97, + "learning_rate": 1.4460973152373614e-06, + "loss": 0.8006, + "step": 32770 + }, + { + "epoch": 0.97, + "learning_rate": 1.4312807443845197e-06, + "loss": 0.8227, + "step": 32780 + }, + { + "epoch": 0.97, + "learning_rate": 1.416464173531678e-06, + "loss": 0.9299, + "step": 32790 + }, + { + "epoch": 0.97, + "learning_rate": 1.401647602678836e-06, + "loss": 0.8562, + "step": 32800 + }, + { + "epoch": 0.97, + "learning_rate": 1.3868310318259944e-06, + "loss": 0.8914, + "step": 32810 + }, + { + "epoch": 0.97, + "learning_rate": 1.3720144609731524e-06, + "loss": 0.8771, + "step": 32820 + }, + { + "epoch": 0.97, + "learning_rate": 1.3571978901203105e-06, + "loss": 0.8683, + "step": 32830 + }, + { + "epoch": 0.97, + "learning_rate": 1.3423813192674688e-06, + "loss": 0.8071, + "step": 32840 + }, + { + "epoch": 0.97, + "learning_rate": 1.327564748414627e-06, + "loss": 0.7712, + "step": 32850 + }, + { + "epoch": 0.97, + "learning_rate": 1.3127481775617852e-06, + "loss": 0.6798, + "step": 32860 + }, + { + "epoch": 0.97, + "learning_rate": 1.2979316067089433e-06, + "loss": 0.863, + "step": 32870 + }, + { + "epoch": 0.97, + "learning_rate": 1.2831150358561016e-06, + "loss": 0.879, + "step": 32880 + }, + { + "epoch": 0.97, + "learning_rate": 1.2682984650032596e-06, + "loss": 0.9034, + "step": 32890 + }, + { + "epoch": 0.97, + "learning_rate": 1.253481894150418e-06, + "loss": 0.8587, + "step": 32900 + }, + { + "epoch": 0.98, + "learning_rate": 1.238665323297576e-06, + "loss": 0.773, + "step": 32910 + }, + { + "epoch": 0.98, + "learning_rate": 1.2238487524447343e-06, + "loss": 0.8008, + "step": 32920 + }, + { + "epoch": 0.98, + "learning_rate": 1.2090321815918924e-06, + "loss": 0.7931, + "step": 32930 + }, + { + "epoch": 0.98, + "learning_rate": 1.1942156107390507e-06, + "loss": 0.7081, + "step": 32940 + }, + { + "epoch": 0.98, + "learning_rate": 1.179399039886209e-06, + "loss": 0.808, + "step": 32950 + }, + { + "epoch": 0.98, + "learning_rate": 1.164582469033367e-06, + "loss": 0.8978, + "step": 32960 + }, + { + "epoch": 0.98, + "learning_rate": 1.1497658981805251e-06, + "loss": 0.8346, + "step": 32970 + }, + { + "epoch": 0.98, + "learning_rate": 1.1349493273276832e-06, + "loss": 0.7151, + "step": 32980 + }, + { + "epoch": 0.98, + "learning_rate": 1.1201327564748415e-06, + "loss": 0.6956, + "step": 32990 + }, + { + "epoch": 0.98, + "learning_rate": 1.1053161856219998e-06, + "loss": 0.8311, + "step": 33000 + } + ], + "logging_steps": 10, + "max_steps": 33746, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 4.97334915956736e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}