diff --git "a/VQA_lora_PMC_LLaMA_PMCCLIP/choice/checkpoint-4000/trainer_state.json" "b/VQA_lora_PMC_LLaMA_PMCCLIP/choice/checkpoint-4000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/VQA_lora_PMC_LLaMA_PMCCLIP/choice/checkpoint-4000/trainer_state.json" @@ -0,0 +1,24080 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.8933092224231465, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.6e-07, + "loss": 1.0298, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 3.2e-07, + "loss": 0.9204, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 4.800000000000001e-07, + "loss": 0.9751, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 6.4e-07, + "loss": 0.9744, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-07, + "loss": 0.9722, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 9.600000000000001e-07, + "loss": 0.8623, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 1.12e-06, + "loss": 0.9453, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 1.28e-06, + "loss": 0.9832, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 1.44e-06, + "loss": 0.9021, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.9519, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 1.76e-06, + "loss": 0.98, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 1.9200000000000003e-06, + "loss": 0.9255, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 2.08e-06, + "loss": 0.9575, + "step": 13 + }, + { + "epoch": 0.01, + "learning_rate": 2.24e-06, + "loss": 0.9324, + "step": 14 + }, + { + "epoch": 0.01, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.8699, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 2.56e-06, + "loss": 0.8057, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 2.7200000000000002e-06, + "loss": 0.8328, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 2.88e-06, + "loss": 0.7427, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 3.04e-06, + "loss": 0.8071, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.8086, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 3.3600000000000004e-06, + "loss": 0.7517, + "step": 21 + }, + { + "epoch": 0.02, + "learning_rate": 3.52e-06, + "loss": 0.6747, + "step": 22 + }, + { + "epoch": 0.02, + "learning_rate": 3.6800000000000003e-06, + "loss": 0.6746, + "step": 23 + }, + { + "epoch": 0.02, + "learning_rate": 3.8400000000000005e-06, + "loss": 0.6533, + "step": 24 + }, + { + "epoch": 0.02, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6287, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.16e-06, + "loss": 0.5751, + "step": 26 + }, + { + "epoch": 0.02, + "learning_rate": 4.32e-06, + "loss": 0.5809, + "step": 27 + }, + { + "epoch": 0.02, + "learning_rate": 4.48e-06, + "loss": 0.5381, + "step": 28 + }, + { + "epoch": 0.02, + "learning_rate": 4.6400000000000005e-06, + "loss": 0.4198, + "step": 29 + }, + { + "epoch": 0.02, + "learning_rate": 4.800000000000001e-06, + "loss": 0.3943, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.960000000000001e-06, + "loss": 0.401, + "step": 31 + }, + { + "epoch": 0.02, + "learning_rate": 5.12e-06, + "loss": 0.3701, + "step": 32 + }, + { + "epoch": 0.02, + "learning_rate": 5.28e-06, + "loss": 0.364, + "step": 33 + }, + { + "epoch": 0.02, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.3278, + "step": 34 + }, + { + "epoch": 0.03, + "learning_rate": 5.600000000000001e-06, + "loss": 0.303, + "step": 35 + }, + { + "epoch": 0.03, + "learning_rate": 5.76e-06, + "loss": 0.2937, + "step": 36 + }, + { + "epoch": 0.03, + "learning_rate": 5.92e-06, + "loss": 0.2938, + "step": 37 + }, + { + "epoch": 0.03, + "learning_rate": 6.08e-06, + "loss": 0.2686, + "step": 38 + }, + { + "epoch": 0.03, + "learning_rate": 6.24e-06, + "loss": 0.2626, + "step": 39 + }, + { + "epoch": 0.03, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.2544, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 6.560000000000001e-06, + "loss": 0.2406, + "step": 41 + }, + { + "epoch": 0.03, + "learning_rate": 6.720000000000001e-06, + "loss": 0.2302, + "step": 42 + }, + { + "epoch": 0.03, + "learning_rate": 6.88e-06, + "loss": 0.2073, + "step": 43 + }, + { + "epoch": 0.03, + "learning_rate": 7.04e-06, + "loss": 0.2256, + "step": 44 + }, + { + "epoch": 0.03, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.2046, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 7.360000000000001e-06, + "loss": 0.1965, + "step": 46 + }, + { + "epoch": 0.03, + "learning_rate": 7.520000000000001e-06, + "loss": 0.1911, + "step": 47 + }, + { + "epoch": 0.03, + "learning_rate": 7.680000000000001e-06, + "loss": 0.2046, + "step": 48 + }, + { + "epoch": 0.04, + "learning_rate": 7.840000000000001e-06, + "loss": 0.17, + "step": 49 + }, + { + "epoch": 0.04, + "learning_rate": 8.000000000000001e-06, + "loss": 0.2005, + "step": 50 + }, + { + "epoch": 0.04, + "learning_rate": 8.16e-06, + "loss": 0.1688, + "step": 51 + }, + { + "epoch": 0.04, + "learning_rate": 8.32e-06, + "loss": 0.1754, + "step": 52 + }, + { + "epoch": 0.04, + "learning_rate": 8.48e-06, + "loss": 0.1828, + "step": 53 + }, + { + "epoch": 0.04, + "learning_rate": 8.64e-06, + "loss": 0.1679, + "step": 54 + }, + { + "epoch": 0.04, + "learning_rate": 8.8e-06, + "loss": 0.1667, + "step": 55 + }, + { + "epoch": 0.04, + "learning_rate": 8.96e-06, + "loss": 0.1599, + "step": 56 + }, + { + "epoch": 0.04, + "learning_rate": 9.12e-06, + "loss": 0.207, + "step": 57 + }, + { + "epoch": 0.04, + "learning_rate": 9.280000000000001e-06, + "loss": 0.1746, + "step": 58 + }, + { + "epoch": 0.04, + "learning_rate": 9.440000000000001e-06, + "loss": 0.1826, + "step": 59 + }, + { + "epoch": 0.04, + "learning_rate": 9.600000000000001e-06, + "loss": 0.1906, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 9.760000000000001e-06, + "loss": 0.1665, + "step": 61 + }, + { + "epoch": 0.04, + "learning_rate": 9.920000000000002e-06, + "loss": 0.1638, + "step": 62 + }, + { + "epoch": 0.05, + "learning_rate": 1.008e-05, + "loss": 0.1688, + "step": 63 + }, + { + "epoch": 0.05, + "learning_rate": 1.024e-05, + "loss": 0.1682, + "step": 64 + }, + { + "epoch": 0.05, + "learning_rate": 1.04e-05, + "loss": 0.1606, + "step": 65 + }, + { + "epoch": 0.05, + "learning_rate": 1.056e-05, + "loss": 0.1596, + "step": 66 + }, + { + "epoch": 0.05, + "learning_rate": 1.072e-05, + "loss": 0.1528, + "step": 67 + }, + { + "epoch": 0.05, + "learning_rate": 1.0880000000000001e-05, + "loss": 0.1633, + "step": 68 + }, + { + "epoch": 0.05, + "learning_rate": 1.1040000000000001e-05, + "loss": 0.1566, + "step": 69 + }, + { + "epoch": 0.05, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.1474, + "step": 70 + }, + { + "epoch": 0.05, + "learning_rate": 1.136e-05, + "loss": 0.1806, + "step": 71 + }, + { + "epoch": 0.05, + "learning_rate": 1.152e-05, + "loss": 0.1834, + "step": 72 + }, + { + "epoch": 0.05, + "learning_rate": 1.168e-05, + "loss": 0.1748, + "step": 73 + }, + { + "epoch": 0.05, + "learning_rate": 1.184e-05, + "loss": 0.1515, + "step": 74 + }, + { + "epoch": 0.05, + "learning_rate": 1.2e-05, + "loss": 0.1592, + "step": 75 + }, + { + "epoch": 0.05, + "learning_rate": 1.216e-05, + "loss": 0.1494, + "step": 76 + }, + { + "epoch": 0.06, + "learning_rate": 1.232e-05, + "loss": 0.1744, + "step": 77 + }, + { + "epoch": 0.06, + "learning_rate": 1.248e-05, + "loss": 0.1516, + "step": 78 + }, + { + "epoch": 0.06, + "learning_rate": 1.2640000000000001e-05, + "loss": 0.157, + "step": 79 + }, + { + "epoch": 0.06, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.1559, + "step": 80 + }, + { + "epoch": 0.06, + "learning_rate": 1.2960000000000001e-05, + "loss": 0.1463, + "step": 81 + }, + { + "epoch": 0.06, + "learning_rate": 1.3120000000000001e-05, + "loss": 0.1542, + "step": 82 + }, + { + "epoch": 0.06, + "learning_rate": 1.3280000000000002e-05, + "loss": 0.1552, + "step": 83 + }, + { + "epoch": 0.06, + "learning_rate": 1.3440000000000002e-05, + "loss": 0.1584, + "step": 84 + }, + { + "epoch": 0.06, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.1644, + "step": 85 + }, + { + "epoch": 0.06, + "learning_rate": 1.376e-05, + "loss": 0.144, + "step": 86 + }, + { + "epoch": 0.06, + "learning_rate": 1.392e-05, + "loss": 0.1672, + "step": 87 + }, + { + "epoch": 0.06, + "learning_rate": 1.408e-05, + "loss": 0.1444, + "step": 88 + }, + { + "epoch": 0.06, + "learning_rate": 1.4240000000000001e-05, + "loss": 0.1522, + "step": 89 + }, + { + "epoch": 0.07, + "learning_rate": 1.4400000000000001e-05, + "loss": 0.1572, + "step": 90 + }, + { + "epoch": 0.07, + "learning_rate": 1.4560000000000001e-05, + "loss": 0.1475, + "step": 91 + }, + { + "epoch": 0.07, + "learning_rate": 1.4720000000000001e-05, + "loss": 0.1471, + "step": 92 + }, + { + "epoch": 0.07, + "learning_rate": 1.4880000000000002e-05, + "loss": 0.1599, + "step": 93 + }, + { + "epoch": 0.07, + "learning_rate": 1.5040000000000002e-05, + "loss": 0.1643, + "step": 94 + }, + { + "epoch": 0.07, + "learning_rate": 1.5200000000000002e-05, + "loss": 0.1558, + "step": 95 + }, + { + "epoch": 0.07, + "learning_rate": 1.5360000000000002e-05, + "loss": 0.1447, + "step": 96 + }, + { + "epoch": 0.07, + "learning_rate": 1.552e-05, + "loss": 0.1592, + "step": 97 + }, + { + "epoch": 0.07, + "learning_rate": 1.5680000000000002e-05, + "loss": 0.1457, + "step": 98 + }, + { + "epoch": 0.07, + "learning_rate": 1.584e-05, + "loss": 0.1504, + "step": 99 + }, + { + "epoch": 0.07, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.1599, + "step": 100 + }, + { + "epoch": 0.07, + "learning_rate": 1.616e-05, + "loss": 0.1507, + "step": 101 + }, + { + "epoch": 0.07, + "learning_rate": 1.632e-05, + "loss": 0.1455, + "step": 102 + }, + { + "epoch": 0.07, + "learning_rate": 1.648e-05, + "loss": 0.1476, + "step": 103 + }, + { + "epoch": 0.08, + "learning_rate": 1.664e-05, + "loss": 0.1463, + "step": 104 + }, + { + "epoch": 0.08, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.152, + "step": 105 + }, + { + "epoch": 0.08, + "learning_rate": 1.696e-05, + "loss": 0.1447, + "step": 106 + }, + { + "epoch": 0.08, + "learning_rate": 1.7120000000000002e-05, + "loss": 0.151, + "step": 107 + }, + { + "epoch": 0.08, + "learning_rate": 1.728e-05, + "loss": 0.1436, + "step": 108 + }, + { + "epoch": 0.08, + "learning_rate": 1.7440000000000002e-05, + "loss": 0.1431, + "step": 109 + }, + { + "epoch": 0.08, + "learning_rate": 1.76e-05, + "loss": 0.1568, + "step": 110 + }, + { + "epoch": 0.08, + "learning_rate": 1.7760000000000003e-05, + "loss": 0.1468, + "step": 111 + }, + { + "epoch": 0.08, + "learning_rate": 1.792e-05, + "loss": 0.1602, + "step": 112 + }, + { + "epoch": 0.08, + "learning_rate": 1.8080000000000003e-05, + "loss": 0.1486, + "step": 113 + }, + { + "epoch": 0.08, + "learning_rate": 1.824e-05, + "loss": 0.1358, + "step": 114 + }, + { + "epoch": 0.08, + "learning_rate": 1.8400000000000003e-05, + "loss": 0.1535, + "step": 115 + }, + { + "epoch": 0.08, + "learning_rate": 1.8560000000000002e-05, + "loss": 0.151, + "step": 116 + }, + { + "epoch": 0.08, + "learning_rate": 1.8720000000000004e-05, + "loss": 0.1479, + "step": 117 + }, + { + "epoch": 0.09, + "learning_rate": 1.8880000000000002e-05, + "loss": 0.1508, + "step": 118 + }, + { + "epoch": 0.09, + "learning_rate": 1.904e-05, + "loss": 0.1454, + "step": 119 + }, + { + "epoch": 0.09, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.1486, + "step": 120 + }, + { + "epoch": 0.09, + "learning_rate": 1.936e-05, + "loss": 0.1463, + "step": 121 + }, + { + "epoch": 0.09, + "learning_rate": 1.9520000000000003e-05, + "loss": 0.1381, + "step": 122 + }, + { + "epoch": 0.09, + "learning_rate": 1.968e-05, + "loss": 0.1397, + "step": 123 + }, + { + "epoch": 0.09, + "learning_rate": 1.9840000000000003e-05, + "loss": 0.1544, + "step": 124 + }, + { + "epoch": 0.09, + "learning_rate": 2e-05, + "loss": 0.1391, + "step": 125 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999996947880165e-05, + "loss": 0.1518, + "step": 126 + }, + { + "epoch": 0.09, + "learning_rate": 1.999998779152252e-05, + "loss": 0.1593, + "step": 127 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999972530932657e-05, + "loss": 0.1462, + "step": 128 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999951166119886e-05, + "loss": 0.1394, + "step": 129 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999923697097254e-05, + "loss": 0.1452, + "step": 130 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999890123881527e-05, + "loss": 0.1561, + "step": 131 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999850446493196e-05, + "loss": 0.1424, + "step": 132 + }, + { + "epoch": 0.1, + "learning_rate": 1.999980466495649e-05, + "loss": 0.1604, + "step": 133 + }, + { + "epoch": 0.1, + "learning_rate": 1.999975277929934e-05, + "loss": 0.1469, + "step": 134 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999694789553435e-05, + "loss": 0.1378, + "step": 135 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999630695754163e-05, + "loss": 0.1548, + "step": 136 + }, + { + "epoch": 0.1, + "learning_rate": 1.999956049794065e-05, + "loss": 0.1415, + "step": 137 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999484196155748e-05, + "loss": 0.1346, + "step": 138 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999401790446033e-05, + "loss": 0.1414, + "step": 139 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999313280861806e-05, + "loss": 0.1449, + "step": 140 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999218667457096e-05, + "loss": 0.1497, + "step": 141 + }, + { + "epoch": 0.1, + "learning_rate": 1.999911795028966e-05, + "loss": 0.1439, + "step": 142 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999011129420976e-05, + "loss": 0.1366, + "step": 143 + }, + { + "epoch": 0.1, + "learning_rate": 1.999889820491625e-05, + "loss": 0.1476, + "step": 144 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998779176844414e-05, + "loss": 0.1363, + "step": 145 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998654045278126e-05, + "loss": 0.145, + "step": 146 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998522810293766e-05, + "loss": 0.1465, + "step": 147 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998385471971448e-05, + "loss": 0.1445, + "step": 148 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998242030395004e-05, + "loss": 0.1526, + "step": 149 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998092485651994e-05, + "loss": 0.1418, + "step": 150 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997936837833704e-05, + "loss": 0.153, + "step": 151 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997775087035148e-05, + "loss": 0.1432, + "step": 152 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997607233355055e-05, + "loss": 0.1419, + "step": 153 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997433276895897e-05, + "loss": 0.1501, + "step": 154 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997253217763853e-05, + "loss": 0.1396, + "step": 155 + }, + { + "epoch": 0.11, + "learning_rate": 1.999706705606884e-05, + "loss": 0.1376, + "step": 156 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996874791924493e-05, + "loss": 0.1429, + "step": 157 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996676425448175e-05, + "loss": 0.1421, + "step": 158 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996471956760978e-05, + "loss": 0.1301, + "step": 159 + }, + { + "epoch": 0.12, + "learning_rate": 1.999626138598771e-05, + "loss": 0.1325, + "step": 160 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996044713256907e-05, + "loss": 0.1332, + "step": 161 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995821938700835e-05, + "loss": 0.1406, + "step": 162 + }, + { + "epoch": 0.12, + "learning_rate": 1.999559306245548e-05, + "loss": 0.1241, + "step": 163 + }, + { + "epoch": 0.12, + "learning_rate": 1.999535808466055e-05, + "loss": 0.1405, + "step": 164 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995117005459484e-05, + "loss": 0.141, + "step": 165 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994869824999448e-05, + "loss": 0.1424, + "step": 166 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994616543431315e-05, + "loss": 0.1431, + "step": 167 + }, + { + "epoch": 0.12, + "learning_rate": 1.999435716090971e-05, + "loss": 0.1393, + "step": 168 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994091677592945e-05, + "loss": 0.1507, + "step": 169 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993820093643097e-05, + "loss": 0.135, + "step": 170 + }, + { + "epoch": 0.12, + "learning_rate": 1.999354240922594e-05, + "loss": 0.1478, + "step": 171 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993258624510977e-05, + "loss": 0.1472, + "step": 172 + }, + { + "epoch": 0.13, + "learning_rate": 1.999296873967144e-05, + "loss": 0.1352, + "step": 173 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992672754884276e-05, + "loss": 0.142, + "step": 174 + }, + { + "epoch": 0.13, + "learning_rate": 1.999237067033017e-05, + "loss": 0.137, + "step": 175 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992062486193517e-05, + "loss": 0.136, + "step": 176 + }, + { + "epoch": 0.13, + "learning_rate": 1.999174820266244e-05, + "loss": 0.1342, + "step": 177 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991427819928784e-05, + "loss": 0.133, + "step": 178 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991101338188125e-05, + "loss": 0.1361, + "step": 179 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990768757639747e-05, + "loss": 0.1307, + "step": 180 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990430078486665e-05, + "loss": 0.1429, + "step": 181 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990085300935627e-05, + "loss": 0.1432, + "step": 182 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989734425197084e-05, + "loss": 0.1443, + "step": 183 + }, + { + "epoch": 0.13, + "learning_rate": 1.998937745148522e-05, + "loss": 0.1457, + "step": 184 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989014380017943e-05, + "loss": 0.1449, + "step": 185 + }, + { + "epoch": 0.13, + "learning_rate": 1.998864521101688e-05, + "loss": 0.1274, + "step": 186 + }, + { + "epoch": 0.14, + "learning_rate": 1.9988269944707377e-05, + "loss": 0.1499, + "step": 187 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987888581318515e-05, + "loss": 0.1405, + "step": 188 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987501121083076e-05, + "loss": 0.1424, + "step": 189 + }, + { + "epoch": 0.14, + "learning_rate": 1.998710756423758e-05, + "loss": 0.1389, + "step": 190 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986707911022263e-05, + "loss": 0.1347, + "step": 191 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986302161681087e-05, + "loss": 0.1371, + "step": 192 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985890316461728e-05, + "loss": 0.1423, + "step": 193 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985472375615582e-05, + "loss": 0.1377, + "step": 194 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985048339397775e-05, + "loss": 0.1472, + "step": 195 + }, + { + "epoch": 0.14, + "learning_rate": 1.998461820806715e-05, + "loss": 0.1361, + "step": 196 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984181981886266e-05, + "loss": 0.1396, + "step": 197 + }, + { + "epoch": 0.14, + "learning_rate": 1.9983739661121406e-05, + "loss": 0.1544, + "step": 198 + }, + { + "epoch": 0.14, + "learning_rate": 1.9983291246042574e-05, + "loss": 0.139, + "step": 199 + }, + { + "epoch": 0.14, + "learning_rate": 1.9982836736923495e-05, + "loss": 0.1371, + "step": 200 + }, + { + "epoch": 0.15, + "learning_rate": 1.998237613404161e-05, + "loss": 0.1432, + "step": 201 + }, + { + "epoch": 0.15, + "learning_rate": 1.9981909437678082e-05, + "loss": 0.1384, + "step": 202 + }, + { + "epoch": 0.15, + "learning_rate": 1.9981436648117797e-05, + "loss": 0.145, + "step": 203 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980957765649354e-05, + "loss": 0.1466, + "step": 204 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980472790565075e-05, + "loss": 0.1358, + "step": 205 + }, + { + "epoch": 0.15, + "learning_rate": 1.9979981723161e-05, + "loss": 0.1363, + "step": 206 + }, + { + "epoch": 0.15, + "learning_rate": 1.997948456373689e-05, + "loss": 0.1383, + "step": 207 + }, + { + "epoch": 0.15, + "learning_rate": 1.997898131259622e-05, + "loss": 0.1466, + "step": 208 + }, + { + "epoch": 0.15, + "learning_rate": 1.9978471970046186e-05, + "loss": 0.1229, + "step": 209 + }, + { + "epoch": 0.15, + "learning_rate": 1.997795653639771e-05, + "loss": 0.1377, + "step": 210 + }, + { + "epoch": 0.15, + "learning_rate": 1.997743501196542e-05, + "loss": 0.1372, + "step": 211 + }, + { + "epoch": 0.15, + "learning_rate": 1.9976907397067664e-05, + "loss": 0.1281, + "step": 212 + }, + { + "epoch": 0.15, + "learning_rate": 1.9976373692026518e-05, + "loss": 0.1339, + "step": 213 + }, + { + "epoch": 0.15, + "learning_rate": 1.997583389716776e-05, + "loss": 0.1501, + "step": 214 + }, + { + "epoch": 0.16, + "learning_rate": 1.9975288012820903e-05, + "loss": 0.1431, + "step": 215 + }, + { + "epoch": 0.16, + "learning_rate": 1.9974736039319156e-05, + "loss": 0.1452, + "step": 216 + }, + { + "epoch": 0.16, + "learning_rate": 1.997417797699947e-05, + "loss": 0.1444, + "step": 217 + }, + { + "epoch": 0.16, + "learning_rate": 1.9973613826202493e-05, + "loss": 0.1337, + "step": 218 + }, + { + "epoch": 0.16, + "learning_rate": 1.9973043587272592e-05, + "loss": 0.146, + "step": 219 + }, + { + "epoch": 0.16, + "learning_rate": 1.9972467260557864e-05, + "loss": 0.1259, + "step": 220 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971884846410104e-05, + "loss": 0.1317, + "step": 221 + }, + { + "epoch": 0.16, + "learning_rate": 1.997129634518484e-05, + "loss": 0.1436, + "step": 222 + }, + { + "epoch": 0.16, + "learning_rate": 1.99707017572413e-05, + "loss": 0.1375, + "step": 223 + }, + { + "epoch": 0.16, + "learning_rate": 1.997010108294244e-05, + "loss": 0.1595, + "step": 224 + }, + { + "epoch": 0.16, + "learning_rate": 1.996949432265492e-05, + "loss": 0.1358, + "step": 225 + }, + { + "epoch": 0.16, + "learning_rate": 1.9968881476749126e-05, + "loss": 0.138, + "step": 226 + }, + { + "epoch": 0.16, + "learning_rate": 1.9968262545599152e-05, + "loss": 0.1472, + "step": 227 + }, + { + "epoch": 0.16, + "learning_rate": 1.9967637529582806e-05, + "loss": 0.1484, + "step": 228 + }, + { + "epoch": 0.17, + "learning_rate": 1.9967006429081616e-05, + "loss": 0.1444, + "step": 229 + }, + { + "epoch": 0.17, + "learning_rate": 1.9966369244480825e-05, + "loss": 0.1458, + "step": 230 + }, + { + "epoch": 0.17, + "learning_rate": 1.9965725976169374e-05, + "loss": 0.1374, + "step": 231 + }, + { + "epoch": 0.17, + "learning_rate": 1.9965076624539944e-05, + "loss": 0.1327, + "step": 232 + }, + { + "epoch": 0.17, + "learning_rate": 1.99644211899889e-05, + "loss": 0.1378, + "step": 233 + }, + { + "epoch": 0.17, + "learning_rate": 1.9963759672916344e-05, + "loss": 0.1417, + "step": 234 + }, + { + "epoch": 0.17, + "learning_rate": 1.9963092073726083e-05, + "loss": 0.1329, + "step": 235 + }, + { + "epoch": 0.17, + "learning_rate": 1.996241839282563e-05, + "loss": 0.14, + "step": 236 + }, + { + "epoch": 0.17, + "learning_rate": 1.996173863062622e-05, + "loss": 0.1352, + "step": 237 + }, + { + "epoch": 0.17, + "learning_rate": 1.9961052787542794e-05, + "loss": 0.1329, + "step": 238 + }, + { + "epoch": 0.17, + "learning_rate": 1.996036086399401e-05, + "loss": 0.1408, + "step": 239 + }, + { + "epoch": 0.17, + "learning_rate": 1.9959662860402228e-05, + "loss": 0.1334, + "step": 240 + }, + { + "epoch": 0.17, + "learning_rate": 1.9958958777193534e-05, + "loss": 0.1475, + "step": 241 + }, + { + "epoch": 0.18, + "learning_rate": 1.9958248614797712e-05, + "loss": 0.1371, + "step": 242 + }, + { + "epoch": 0.18, + "learning_rate": 1.9957532373648263e-05, + "loss": 0.1412, + "step": 243 + }, + { + "epoch": 0.18, + "learning_rate": 1.99568100541824e-05, + "loss": 0.1491, + "step": 244 + }, + { + "epoch": 0.18, + "learning_rate": 1.9956081656841045e-05, + "loss": 0.1326, + "step": 245 + }, + { + "epoch": 0.18, + "learning_rate": 1.995534718206882e-05, + "loss": 0.1198, + "step": 246 + }, + { + "epoch": 0.18, + "learning_rate": 1.9954606630314082e-05, + "loss": 0.1376, + "step": 247 + }, + { + "epoch": 0.18, + "learning_rate": 1.995386000202887e-05, + "loss": 0.1307, + "step": 248 + }, + { + "epoch": 0.18, + "learning_rate": 1.9953107297668946e-05, + "loss": 0.1423, + "step": 249 + }, + { + "epoch": 0.18, + "learning_rate": 1.9952348517693774e-05, + "loss": 0.1488, + "step": 250 + }, + { + "epoch": 0.18, + "learning_rate": 1.9951583662566543e-05, + "loss": 0.1397, + "step": 251 + }, + { + "epoch": 0.18, + "learning_rate": 1.995081273275413e-05, + "loss": 0.1367, + "step": 252 + }, + { + "epoch": 0.18, + "learning_rate": 1.995003572872713e-05, + "loss": 0.1416, + "step": 253 + }, + { + "epoch": 0.18, + "learning_rate": 1.994925265095985e-05, + "loss": 0.1292, + "step": 254 + }, + { + "epoch": 0.18, + "learning_rate": 1.9948463499930297e-05, + "loss": 0.1487, + "step": 255 + }, + { + "epoch": 0.19, + "learning_rate": 1.9947668276120182e-05, + "loss": 0.1341, + "step": 256 + }, + { + "epoch": 0.19, + "learning_rate": 1.9946866980014935e-05, + "loss": 0.1412, + "step": 257 + }, + { + "epoch": 0.19, + "learning_rate": 1.9946059612103684e-05, + "loss": 0.1313, + "step": 258 + }, + { + "epoch": 0.19, + "learning_rate": 1.9945246172879268e-05, + "loss": 0.1367, + "step": 259 + }, + { + "epoch": 0.19, + "learning_rate": 1.994442666283823e-05, + "loss": 0.1387, + "step": 260 + }, + { + "epoch": 0.19, + "learning_rate": 1.9943601082480812e-05, + "loss": 0.1406, + "step": 261 + }, + { + "epoch": 0.19, + "learning_rate": 1.9942769432310973e-05, + "loss": 0.1453, + "step": 262 + }, + { + "epoch": 0.19, + "learning_rate": 1.9941931712836373e-05, + "loss": 0.1405, + "step": 263 + }, + { + "epoch": 0.19, + "learning_rate": 1.9941087924568377e-05, + "loss": 0.1424, + "step": 264 + }, + { + "epoch": 0.19, + "learning_rate": 1.994023806802205e-05, + "loss": 0.136, + "step": 265 + }, + { + "epoch": 0.19, + "learning_rate": 1.9939382143716167e-05, + "loss": 0.131, + "step": 266 + }, + { + "epoch": 0.19, + "learning_rate": 1.9938520152173203e-05, + "loss": 0.1432, + "step": 267 + }, + { + "epoch": 0.19, + "learning_rate": 1.993765209391934e-05, + "loss": 0.126, + "step": 268 + }, + { + "epoch": 0.19, + "learning_rate": 1.993677796948446e-05, + "loss": 0.1476, + "step": 269 + }, + { + "epoch": 0.2, + "learning_rate": 1.9935897779402152e-05, + "loss": 0.1395, + "step": 270 + }, + { + "epoch": 0.2, + "learning_rate": 1.9935011524209705e-05, + "loss": 0.141, + "step": 271 + }, + { + "epoch": 0.2, + "learning_rate": 1.9934119204448107e-05, + "loss": 0.1368, + "step": 272 + }, + { + "epoch": 0.2, + "learning_rate": 1.9933220820662053e-05, + "loss": 0.1305, + "step": 273 + }, + { + "epoch": 0.2, + "learning_rate": 1.993231637339994e-05, + "loss": 0.1359, + "step": 274 + }, + { + "epoch": 0.2, + "learning_rate": 1.993140586321386e-05, + "loss": 0.1334, + "step": 275 + }, + { + "epoch": 0.2, + "learning_rate": 1.9930489290659615e-05, + "loss": 0.1371, + "step": 276 + }, + { + "epoch": 0.2, + "learning_rate": 1.9929566656296696e-05, + "loss": 0.146, + "step": 277 + }, + { + "epoch": 0.2, + "learning_rate": 1.992863796068831e-05, + "loss": 0.1263, + "step": 278 + }, + { + "epoch": 0.2, + "learning_rate": 1.9927703204401353e-05, + "loss": 0.1303, + "step": 279 + }, + { + "epoch": 0.2, + "learning_rate": 1.9926762388006416e-05, + "loss": 0.1286, + "step": 280 + }, + { + "epoch": 0.2, + "learning_rate": 1.9925815512077803e-05, + "loss": 0.124, + "step": 281 + }, + { + "epoch": 0.2, + "learning_rate": 1.9924862577193508e-05, + "loss": 0.1396, + "step": 282 + }, + { + "epoch": 0.2, + "learning_rate": 1.9923903583935226e-05, + "loss": 0.1276, + "step": 283 + }, + { + "epoch": 0.21, + "learning_rate": 1.9922938532888342e-05, + "loss": 0.1356, + "step": 284 + }, + { + "epoch": 0.21, + "learning_rate": 1.9921967424641957e-05, + "loss": 0.1239, + "step": 285 + }, + { + "epoch": 0.21, + "learning_rate": 1.9920990259788854e-05, + "loss": 0.1454, + "step": 286 + }, + { + "epoch": 0.21, + "learning_rate": 1.9920007038925516e-05, + "loss": 0.1382, + "step": 287 + }, + { + "epoch": 0.21, + "learning_rate": 1.9919017762652125e-05, + "loss": 0.1361, + "step": 288 + }, + { + "epoch": 0.21, + "learning_rate": 1.991802243157256e-05, + "loss": 0.1402, + "step": 289 + }, + { + "epoch": 0.21, + "learning_rate": 1.99170210462944e-05, + "loss": 0.136, + "step": 290 + }, + { + "epoch": 0.21, + "learning_rate": 1.9916013607428908e-05, + "loss": 0.1323, + "step": 291 + }, + { + "epoch": 0.21, + "learning_rate": 1.991500011559105e-05, + "loss": 0.1374, + "step": 292 + }, + { + "epoch": 0.21, + "learning_rate": 1.9913980571399486e-05, + "loss": 0.1462, + "step": 293 + }, + { + "epoch": 0.21, + "learning_rate": 1.991295497547657e-05, + "loss": 0.1473, + "step": 294 + }, + { + "epoch": 0.21, + "learning_rate": 1.9911923328448353e-05, + "loss": 0.1373, + "step": 295 + }, + { + "epoch": 0.21, + "learning_rate": 1.9910885630944575e-05, + "loss": 0.1389, + "step": 296 + }, + { + "epoch": 0.21, + "learning_rate": 1.990984188359867e-05, + "loss": 0.1286, + "step": 297 + }, + { + "epoch": 0.22, + "learning_rate": 1.9908792087047766e-05, + "loss": 0.1353, + "step": 298 + }, + { + "epoch": 0.22, + "learning_rate": 1.9907736241932692e-05, + "loss": 0.1392, + "step": 299 + }, + { + "epoch": 0.22, + "learning_rate": 1.990667434889795e-05, + "loss": 0.1392, + "step": 300 + }, + { + "epoch": 0.22, + "learning_rate": 1.990560640859175e-05, + "loss": 0.1387, + "step": 301 + }, + { + "epoch": 0.22, + "learning_rate": 1.990453242166599e-05, + "loss": 0.1331, + "step": 302 + }, + { + "epoch": 0.22, + "learning_rate": 1.9903452388776253e-05, + "loss": 0.1307, + "step": 303 + }, + { + "epoch": 0.22, + "learning_rate": 1.9902366310581823e-05, + "loss": 0.1544, + "step": 304 + }, + { + "epoch": 0.22, + "learning_rate": 1.9901274187745663e-05, + "loss": 0.1432, + "step": 305 + }, + { + "epoch": 0.22, + "learning_rate": 1.9900176020934437e-05, + "loss": 0.125, + "step": 306 + }, + { + "epoch": 0.22, + "learning_rate": 1.9899071810818485e-05, + "loss": 0.1406, + "step": 307 + }, + { + "epoch": 0.22, + "learning_rate": 1.9897961558071843e-05, + "loss": 0.1508, + "step": 308 + }, + { + "epoch": 0.22, + "learning_rate": 1.9896845263372242e-05, + "loss": 0.1356, + "step": 309 + }, + { + "epoch": 0.22, + "learning_rate": 1.9895722927401094e-05, + "loss": 0.148, + "step": 310 + }, + { + "epoch": 0.22, + "learning_rate": 1.9894594550843494e-05, + "loss": 0.1277, + "step": 311 + }, + { + "epoch": 0.23, + "learning_rate": 1.9893460134388237e-05, + "loss": 0.1229, + "step": 312 + }, + { + "epoch": 0.23, + "learning_rate": 1.9892319678727797e-05, + "loss": 0.1344, + "step": 313 + }, + { + "epoch": 0.23, + "learning_rate": 1.989117318455833e-05, + "loss": 0.1337, + "step": 314 + }, + { + "epoch": 0.23, + "learning_rate": 1.989002065257969e-05, + "loss": 0.1319, + "step": 315 + }, + { + "epoch": 0.23, + "learning_rate": 1.9888862083495407e-05, + "loss": 0.1394, + "step": 316 + }, + { + "epoch": 0.23, + "learning_rate": 1.98876974780127e-05, + "loss": 0.1255, + "step": 317 + }, + { + "epoch": 0.23, + "learning_rate": 1.9886526836842468e-05, + "loss": 0.1262, + "step": 318 + }, + { + "epoch": 0.23, + "learning_rate": 1.9885350160699305e-05, + "loss": 0.1372, + "step": 319 + }, + { + "epoch": 0.23, + "learning_rate": 1.988416745030148e-05, + "loss": 0.1415, + "step": 320 + }, + { + "epoch": 0.23, + "learning_rate": 1.988297870637095e-05, + "loss": 0.1318, + "step": 321 + }, + { + "epoch": 0.23, + "learning_rate": 1.9881783929633344e-05, + "loss": 0.1377, + "step": 322 + }, + { + "epoch": 0.23, + "learning_rate": 1.9880583120817992e-05, + "loss": 0.1372, + "step": 323 + }, + { + "epoch": 0.23, + "learning_rate": 1.987937628065789e-05, + "loss": 0.1312, + "step": 324 + }, + { + "epoch": 0.24, + "learning_rate": 1.9878163409889727e-05, + "loss": 0.1359, + "step": 325 + }, + { + "epoch": 0.24, + "learning_rate": 1.9876944509253867e-05, + "loss": 0.1401, + "step": 326 + }, + { + "epoch": 0.24, + "learning_rate": 1.9875719579494357e-05, + "loss": 0.1416, + "step": 327 + }, + { + "epoch": 0.24, + "learning_rate": 1.987448862135892e-05, + "loss": 0.1401, + "step": 328 + }, + { + "epoch": 0.24, + "learning_rate": 1.9873251635598966e-05, + "loss": 0.1351, + "step": 329 + }, + { + "epoch": 0.24, + "learning_rate": 1.9872008622969576e-05, + "loss": 0.1404, + "step": 330 + }, + { + "epoch": 0.24, + "learning_rate": 1.987075958422952e-05, + "loss": 0.1346, + "step": 331 + }, + { + "epoch": 0.24, + "learning_rate": 1.986950452014124e-05, + "loss": 0.142, + "step": 332 + }, + { + "epoch": 0.24, + "learning_rate": 1.9868243431470857e-05, + "loss": 0.1389, + "step": 333 + }, + { + "epoch": 0.24, + "learning_rate": 1.9866976318988165e-05, + "loss": 0.1404, + "step": 334 + }, + { + "epoch": 0.24, + "learning_rate": 1.9865703183466645e-05, + "loss": 0.142, + "step": 335 + }, + { + "epoch": 0.24, + "learning_rate": 1.986442402568345e-05, + "loss": 0.1281, + "step": 336 + }, + { + "epoch": 0.24, + "learning_rate": 1.9863138846419408e-05, + "loss": 0.1515, + "step": 337 + }, + { + "epoch": 0.24, + "learning_rate": 1.986184764645902e-05, + "loss": 0.1444, + "step": 338 + }, + { + "epoch": 0.25, + "learning_rate": 1.9860550426590465e-05, + "loss": 0.1414, + "step": 339 + }, + { + "epoch": 0.25, + "learning_rate": 1.9859247187605604e-05, + "loss": 0.1412, + "step": 340 + }, + { + "epoch": 0.25, + "learning_rate": 1.985793793029996e-05, + "loss": 0.1412, + "step": 341 + }, + { + "epoch": 0.25, + "learning_rate": 1.985662265547274e-05, + "loss": 0.1288, + "step": 342 + }, + { + "epoch": 0.25, + "learning_rate": 1.9855301363926807e-05, + "loss": 0.1305, + "step": 343 + }, + { + "epoch": 0.25, + "learning_rate": 1.985397405646872e-05, + "loss": 0.1469, + "step": 344 + }, + { + "epoch": 0.25, + "learning_rate": 1.98526407339087e-05, + "loss": 0.1363, + "step": 345 + }, + { + "epoch": 0.25, + "learning_rate": 1.9851301397060633e-05, + "loss": 0.1232, + "step": 346 + }, + { + "epoch": 0.25, + "learning_rate": 1.984995604674208e-05, + "loss": 0.1393, + "step": 347 + }, + { + "epoch": 0.25, + "learning_rate": 1.9848604683774288e-05, + "loss": 0.1438, + "step": 348 + }, + { + "epoch": 0.25, + "learning_rate": 1.984724730898215e-05, + "loss": 0.1386, + "step": 349 + }, + { + "epoch": 0.25, + "learning_rate": 1.9845883923194243e-05, + "loss": 0.1411, + "step": 350 + }, + { + "epoch": 0.25, + "learning_rate": 1.984451452724281e-05, + "loss": 0.1331, + "step": 351 + }, + { + "epoch": 0.25, + "learning_rate": 1.9843139121963766e-05, + "loss": 0.1418, + "step": 352 + }, + { + "epoch": 0.26, + "learning_rate": 1.984175770819669e-05, + "loss": 0.1364, + "step": 353 + }, + { + "epoch": 0.26, + "learning_rate": 1.9840370286784826e-05, + "loss": 0.1371, + "step": 354 + }, + { + "epoch": 0.26, + "learning_rate": 1.9838976858575095e-05, + "loss": 0.1411, + "step": 355 + }, + { + "epoch": 0.26, + "learning_rate": 1.9837577424418074e-05, + "loss": 0.1441, + "step": 356 + }, + { + "epoch": 0.26, + "learning_rate": 1.9836171985168018e-05, + "loss": 0.1363, + "step": 357 + }, + { + "epoch": 0.26, + "learning_rate": 1.9834760541682834e-05, + "loss": 0.1481, + "step": 358 + }, + { + "epoch": 0.26, + "learning_rate": 1.98333430948241e-05, + "loss": 0.136, + "step": 359 + }, + { + "epoch": 0.26, + "learning_rate": 1.983191964545707e-05, + "loss": 0.1358, + "step": 360 + }, + { + "epoch": 0.26, + "learning_rate": 1.9830490194450642e-05, + "loss": 0.1291, + "step": 361 + }, + { + "epoch": 0.26, + "learning_rate": 1.9829054742677387e-05, + "loss": 0.1516, + "step": 362 + }, + { + "epoch": 0.26, + "learning_rate": 1.9827613291013546e-05, + "loss": 0.1387, + "step": 363 + }, + { + "epoch": 0.26, + "learning_rate": 1.982616584033901e-05, + "loss": 0.123, + "step": 364 + }, + { + "epoch": 0.26, + "learning_rate": 1.9824712391537337e-05, + "loss": 0.144, + "step": 365 + }, + { + "epoch": 0.26, + "learning_rate": 1.982325294549575e-05, + "loss": 0.1345, + "step": 366 + }, + { + "epoch": 0.27, + "learning_rate": 1.982178750310513e-05, + "loss": 0.1412, + "step": 367 + }, + { + "epoch": 0.27, + "learning_rate": 1.9820316065260017e-05, + "loss": 0.1336, + "step": 368 + }, + { + "epoch": 0.27, + "learning_rate": 1.981883863285861e-05, + "loss": 0.1321, + "step": 369 + }, + { + "epoch": 0.27, + "learning_rate": 1.9817355206802777e-05, + "loss": 0.1327, + "step": 370 + }, + { + "epoch": 0.27, + "learning_rate": 1.9815865787998026e-05, + "loss": 0.1412, + "step": 371 + }, + { + "epoch": 0.27, + "learning_rate": 1.981437037735354e-05, + "loss": 0.14, + "step": 372 + }, + { + "epoch": 0.27, + "learning_rate": 1.9812868975782152e-05, + "loss": 0.1369, + "step": 373 + }, + { + "epoch": 0.27, + "learning_rate": 1.9811361584200356e-05, + "loss": 0.1437, + "step": 374 + }, + { + "epoch": 0.27, + "learning_rate": 1.9809848203528298e-05, + "loss": 0.1284, + "step": 375 + }, + { + "epoch": 0.27, + "learning_rate": 1.9808328834689778e-05, + "loss": 0.137, + "step": 376 + }, + { + "epoch": 0.27, + "learning_rate": 1.9806803478612263e-05, + "loss": 0.1434, + "step": 377 + }, + { + "epoch": 0.27, + "learning_rate": 1.9805272136226857e-05, + "loss": 0.131, + "step": 378 + }, + { + "epoch": 0.27, + "learning_rate": 1.980373480846834e-05, + "loss": 0.1464, + "step": 379 + }, + { + "epoch": 0.27, + "learning_rate": 1.9802191496275123e-05, + "loss": 0.1343, + "step": 380 + }, + { + "epoch": 0.28, + "learning_rate": 1.980064220058929e-05, + "loss": 0.1432, + "step": 381 + }, + { + "epoch": 0.28, + "learning_rate": 1.979908692235656e-05, + "loss": 0.1365, + "step": 382 + }, + { + "epoch": 0.28, + "learning_rate": 1.9797525662526318e-05, + "loss": 0.1466, + "step": 383 + }, + { + "epoch": 0.28, + "learning_rate": 1.979595842205159e-05, + "loss": 0.1482, + "step": 384 + }, + { + "epoch": 0.28, + "learning_rate": 1.979438520188906e-05, + "loss": 0.1427, + "step": 385 + }, + { + "epoch": 0.28, + "learning_rate": 1.979280600299906e-05, + "loss": 0.1369, + "step": 386 + }, + { + "epoch": 0.28, + "learning_rate": 1.979122082634557e-05, + "loss": 0.1392, + "step": 387 + }, + { + "epoch": 0.28, + "learning_rate": 1.9789629672896215e-05, + "loss": 0.134, + "step": 388 + }, + { + "epoch": 0.28, + "learning_rate": 1.978803254362228e-05, + "loss": 0.1294, + "step": 389 + }, + { + "epoch": 0.28, + "learning_rate": 1.9786429439498688e-05, + "loss": 0.1398, + "step": 390 + }, + { + "epoch": 0.28, + "learning_rate": 1.9784820361504015e-05, + "loss": 0.1363, + "step": 391 + }, + { + "epoch": 0.28, + "learning_rate": 1.9783205310620474e-05, + "loss": 0.1408, + "step": 392 + }, + { + "epoch": 0.28, + "learning_rate": 1.978158428783394e-05, + "loss": 0.1506, + "step": 393 + }, + { + "epoch": 0.28, + "learning_rate": 1.9779957294133915e-05, + "loss": 0.1341, + "step": 394 + }, + { + "epoch": 0.29, + "learning_rate": 1.9778324330513563e-05, + "loss": 0.1407, + "step": 395 + }, + { + "epoch": 0.29, + "learning_rate": 1.9776685397969678e-05, + "loss": 0.1358, + "step": 396 + }, + { + "epoch": 0.29, + "learning_rate": 1.9775040497502707e-05, + "loss": 0.1329, + "step": 397 + }, + { + "epoch": 0.29, + "learning_rate": 1.9773389630116737e-05, + "loss": 0.1405, + "step": 398 + }, + { + "epoch": 0.29, + "learning_rate": 1.9771732796819492e-05, + "loss": 0.1407, + "step": 399 + }, + { + "epoch": 0.29, + "learning_rate": 1.977006999862235e-05, + "loss": 0.1332, + "step": 400 + }, + { + "epoch": 0.29, + "learning_rate": 1.9768401236540318e-05, + "loss": 0.1359, + "step": 401 + }, + { + "epoch": 0.29, + "learning_rate": 1.9766726511592047e-05, + "loss": 0.1365, + "step": 402 + }, + { + "epoch": 0.29, + "learning_rate": 1.9765045824799834e-05, + "loss": 0.1336, + "step": 403 + }, + { + "epoch": 0.29, + "learning_rate": 1.9763359177189612e-05, + "loss": 0.1373, + "step": 404 + }, + { + "epoch": 0.29, + "learning_rate": 1.9761666569790943e-05, + "loss": 0.1336, + "step": 405 + }, + { + "epoch": 0.29, + "learning_rate": 1.9759968003637044e-05, + "loss": 0.1423, + "step": 406 + }, + { + "epoch": 0.29, + "learning_rate": 1.9758263479764753e-05, + "loss": 0.1328, + "step": 407 + }, + { + "epoch": 0.3, + "learning_rate": 1.9756552999214554e-05, + "loss": 0.1344, + "step": 408 + }, + { + "epoch": 0.3, + "learning_rate": 1.975483656303057e-05, + "loss": 0.136, + "step": 409 + }, + { + "epoch": 0.3, + "learning_rate": 1.975311417226055e-05, + "loss": 0.1424, + "step": 410 + }, + { + "epoch": 0.3, + "learning_rate": 1.9751385827955886e-05, + "loss": 0.1264, + "step": 411 + }, + { + "epoch": 0.3, + "learning_rate": 1.9749651531171597e-05, + "loss": 0.1342, + "step": 412 + }, + { + "epoch": 0.3, + "learning_rate": 1.974791128296634e-05, + "loss": 0.1339, + "step": 413 + }, + { + "epoch": 0.3, + "learning_rate": 1.974616508440241e-05, + "loss": 0.1431, + "step": 414 + }, + { + "epoch": 0.3, + "learning_rate": 1.9744412936545722e-05, + "loss": 0.1334, + "step": 415 + }, + { + "epoch": 0.3, + "learning_rate": 1.974265484046583e-05, + "loss": 0.1397, + "step": 416 + }, + { + "epoch": 0.3, + "learning_rate": 1.974089079723592e-05, + "loss": 0.1351, + "step": 417 + }, + { + "epoch": 0.3, + "learning_rate": 1.9739120807932805e-05, + "loss": 0.1303, + "step": 418 + }, + { + "epoch": 0.3, + "learning_rate": 1.973734487363693e-05, + "loss": 0.1358, + "step": 419 + }, + { + "epoch": 0.3, + "learning_rate": 1.9735562995432365e-05, + "loss": 0.1365, + "step": 420 + }, + { + "epoch": 0.3, + "learning_rate": 1.9733775174406814e-05, + "loss": 0.1273, + "step": 421 + }, + { + "epoch": 0.31, + "learning_rate": 1.9731981411651607e-05, + "loss": 0.1353, + "step": 422 + }, + { + "epoch": 0.31, + "learning_rate": 1.9730181708261694e-05, + "loss": 0.1436, + "step": 423 + }, + { + "epoch": 0.31, + "learning_rate": 1.9728376065335663e-05, + "loss": 0.1302, + "step": 424 + }, + { + "epoch": 0.31, + "learning_rate": 1.972656448397572e-05, + "loss": 0.144, + "step": 425 + }, + { + "epoch": 0.31, + "learning_rate": 1.9724746965287695e-05, + "loss": 0.1558, + "step": 426 + }, + { + "epoch": 0.31, + "learning_rate": 1.9722923510381048e-05, + "loss": 0.1356, + "step": 427 + }, + { + "epoch": 0.31, + "learning_rate": 1.972109412036886e-05, + "loss": 0.1388, + "step": 428 + }, + { + "epoch": 0.31, + "learning_rate": 1.9719258796367832e-05, + "loss": 0.1359, + "step": 429 + }, + { + "epoch": 0.31, + "learning_rate": 1.9717417539498295e-05, + "loss": 0.1255, + "step": 430 + }, + { + "epoch": 0.31, + "learning_rate": 1.9715570350884187e-05, + "loss": 0.1341, + "step": 431 + }, + { + "epoch": 0.31, + "learning_rate": 1.9713717231653083e-05, + "loss": 0.144, + "step": 432 + }, + { + "epoch": 0.31, + "learning_rate": 1.971185818293617e-05, + "loss": 0.1313, + "step": 433 + }, + { + "epoch": 0.31, + "learning_rate": 1.970999320586826e-05, + "loss": 0.1317, + "step": 434 + }, + { + "epoch": 0.31, + "learning_rate": 1.970812230158777e-05, + "loss": 0.1368, + "step": 435 + }, + { + "epoch": 0.32, + "learning_rate": 1.9706245471236752e-05, + "loss": 0.1377, + "step": 436 + }, + { + "epoch": 0.32, + "learning_rate": 1.970436271596087e-05, + "loss": 0.1389, + "step": 437 + }, + { + "epoch": 0.32, + "learning_rate": 1.9702474036909395e-05, + "loss": 0.1301, + "step": 438 + }, + { + "epoch": 0.32, + "learning_rate": 1.970057943523523e-05, + "loss": 0.1367, + "step": 439 + }, + { + "epoch": 0.32, + "learning_rate": 1.969867891209488e-05, + "loss": 0.1389, + "step": 440 + }, + { + "epoch": 0.32, + "learning_rate": 1.9696772468648474e-05, + "loss": 0.1384, + "step": 441 + }, + { + "epoch": 0.32, + "learning_rate": 1.9694860106059744e-05, + "loss": 0.1396, + "step": 442 + }, + { + "epoch": 0.32, + "learning_rate": 1.9692941825496048e-05, + "loss": 0.1356, + "step": 443 + }, + { + "epoch": 0.32, + "learning_rate": 1.969101762812835e-05, + "loss": 0.1394, + "step": 444 + }, + { + "epoch": 0.32, + "learning_rate": 1.9689087515131224e-05, + "loss": 0.1282, + "step": 445 + }, + { + "epoch": 0.32, + "learning_rate": 1.9687151487682858e-05, + "loss": 0.1464, + "step": 446 + }, + { + "epoch": 0.32, + "learning_rate": 1.9685209546965048e-05, + "loss": 0.1339, + "step": 447 + }, + { + "epoch": 0.32, + "learning_rate": 1.968326169416321e-05, + "loss": 0.1343, + "step": 448 + }, + { + "epoch": 0.32, + "learning_rate": 1.9681307930466346e-05, + "loss": 0.134, + "step": 449 + }, + { + "epoch": 0.33, + "learning_rate": 1.967934825706709e-05, + "loss": 0.1222, + "step": 450 + }, + { + "epoch": 0.33, + "learning_rate": 1.9677382675161668e-05, + "loss": 0.1339, + "step": 451 + }, + { + "epoch": 0.33, + "learning_rate": 1.9675411185949927e-05, + "loss": 0.1282, + "step": 452 + }, + { + "epoch": 0.33, + "learning_rate": 1.9673433790635302e-05, + "loss": 0.1391, + "step": 453 + }, + { + "epoch": 0.33, + "learning_rate": 1.9671450490424842e-05, + "loss": 0.14, + "step": 454 + }, + { + "epoch": 0.33, + "learning_rate": 1.966946128652921e-05, + "loss": 0.1374, + "step": 455 + }, + { + "epoch": 0.33, + "learning_rate": 1.966746618016266e-05, + "loss": 0.1367, + "step": 456 + }, + { + "epoch": 0.33, + "learning_rate": 1.9665465172543045e-05, + "loss": 0.1308, + "step": 457 + }, + { + "epoch": 0.33, + "learning_rate": 1.966345826489184e-05, + "loss": 0.1293, + "step": 458 + }, + { + "epoch": 0.33, + "learning_rate": 1.96614454584341e-05, + "loss": 0.134, + "step": 459 + }, + { + "epoch": 0.33, + "learning_rate": 1.9659426754398495e-05, + "loss": 0.1393, + "step": 460 + }, + { + "epoch": 0.33, + "learning_rate": 1.965740215401729e-05, + "loss": 0.1435, + "step": 461 + }, + { + "epoch": 0.33, + "learning_rate": 1.965537165852635e-05, + "loss": 0.1341, + "step": 462 + }, + { + "epoch": 0.33, + "learning_rate": 1.9653335269165135e-05, + "loss": 0.1311, + "step": 463 + }, + { + "epoch": 0.34, + "learning_rate": 1.965129298717671e-05, + "loss": 0.1369, + "step": 464 + }, + { + "epoch": 0.34, + "learning_rate": 1.964924481380773e-05, + "loss": 0.1392, + "step": 465 + }, + { + "epoch": 0.34, + "learning_rate": 1.9647190750308452e-05, + "loss": 0.1358, + "step": 466 + }, + { + "epoch": 0.34, + "learning_rate": 1.9645130797932722e-05, + "loss": 0.1338, + "step": 467 + }, + { + "epoch": 0.34, + "learning_rate": 1.9643064957937986e-05, + "loss": 0.1449, + "step": 468 + }, + { + "epoch": 0.34, + "learning_rate": 1.964099323158528e-05, + "loss": 0.1323, + "step": 469 + }, + { + "epoch": 0.34, + "learning_rate": 1.963891562013924e-05, + "loss": 0.1455, + "step": 470 + }, + { + "epoch": 0.34, + "learning_rate": 1.963683212486809e-05, + "loss": 0.1303, + "step": 471 + }, + { + "epoch": 0.34, + "learning_rate": 1.9634742747043637e-05, + "loss": 0.1438, + "step": 472 + }, + { + "epoch": 0.34, + "learning_rate": 1.9632647487941296e-05, + "loss": 0.1281, + "step": 473 + }, + { + "epoch": 0.34, + "learning_rate": 1.9630546348840064e-05, + "loss": 0.1395, + "step": 474 + }, + { + "epoch": 0.34, + "learning_rate": 1.962843933102252e-05, + "loss": 0.1442, + "step": 475 + }, + { + "epoch": 0.34, + "learning_rate": 1.962632643577484e-05, + "loss": 0.1321, + "step": 476 + }, + { + "epoch": 0.35, + "learning_rate": 1.962420766438679e-05, + "loss": 0.1433, + "step": 477 + }, + { + "epoch": 0.35, + "learning_rate": 1.9622083018151713e-05, + "loss": 0.1288, + "step": 478 + }, + { + "epoch": 0.35, + "learning_rate": 1.961995249836655e-05, + "loss": 0.1378, + "step": 479 + }, + { + "epoch": 0.35, + "learning_rate": 1.9617816106331818e-05, + "loss": 0.1333, + "step": 480 + }, + { + "epoch": 0.35, + "learning_rate": 1.9615673843351622e-05, + "loss": 0.1402, + "step": 481 + }, + { + "epoch": 0.35, + "learning_rate": 1.961352571073365e-05, + "loss": 0.135, + "step": 482 + }, + { + "epoch": 0.35, + "learning_rate": 1.9611371709789172e-05, + "loss": 0.1393, + "step": 483 + }, + { + "epoch": 0.35, + "learning_rate": 1.960921184183305e-05, + "loss": 0.1378, + "step": 484 + }, + { + "epoch": 0.35, + "learning_rate": 1.9607046108183712e-05, + "loss": 0.1376, + "step": 485 + }, + { + "epoch": 0.35, + "learning_rate": 1.9604874510163175e-05, + "loss": 0.1412, + "step": 486 + }, + { + "epoch": 0.35, + "learning_rate": 1.9602697049097035e-05, + "loss": 0.1444, + "step": 487 + }, + { + "epoch": 0.35, + "learning_rate": 1.9600513726314464e-05, + "loss": 0.133, + "step": 488 + }, + { + "epoch": 0.35, + "learning_rate": 1.959832454314822e-05, + "loss": 0.1281, + "step": 489 + }, + { + "epoch": 0.35, + "learning_rate": 1.9596129500934627e-05, + "loss": 0.1356, + "step": 490 + }, + { + "epoch": 0.36, + "learning_rate": 1.9593928601013595e-05, + "loss": 0.1375, + "step": 491 + }, + { + "epoch": 0.36, + "learning_rate": 1.9591721844728606e-05, + "loss": 0.1311, + "step": 492 + }, + { + "epoch": 0.36, + "learning_rate": 1.9589509233426716e-05, + "loss": 0.1356, + "step": 493 + }, + { + "epoch": 0.36, + "learning_rate": 1.9587290768458557e-05, + "loss": 0.1359, + "step": 494 + }, + { + "epoch": 0.36, + "learning_rate": 1.958506645117833e-05, + "loss": 0.133, + "step": 495 + }, + { + "epoch": 0.36, + "learning_rate": 1.9582836282943814e-05, + "loss": 0.139, + "step": 496 + }, + { + "epoch": 0.36, + "learning_rate": 1.958060026511636e-05, + "loss": 0.134, + "step": 497 + }, + { + "epoch": 0.36, + "learning_rate": 1.9578358399060878e-05, + "loss": 0.1316, + "step": 498 + }, + { + "epoch": 0.36, + "learning_rate": 1.9576110686145867e-05, + "loss": 0.1427, + "step": 499 + }, + { + "epoch": 0.36, + "learning_rate": 1.9573857127743378e-05, + "loss": 0.1366, + "step": 500 + }, + { + "epoch": 0.36, + "eval_loss": 0.13298147916793823, + "eval_runtime": 711.7389, + "eval_samples_per_second": 70.25, + "eval_steps_per_second": 2.196, + "step": 500 + }, + { + "epoch": 0.36, + "learning_rate": 1.9571597725229043e-05, + "loss": 0.1479, + "step": 501 + }, + { + "epoch": 0.36, + "learning_rate": 1.9569332479982045e-05, + "loss": 0.1405, + "step": 502 + }, + { + "epoch": 0.36, + "learning_rate": 1.9567061393385155e-05, + "loss": 0.1434, + "step": 503 + }, + { + "epoch": 0.36, + "learning_rate": 1.9564784466824698e-05, + "loss": 0.1327, + "step": 504 + }, + { + "epoch": 0.37, + "learning_rate": 1.9562501701690558e-05, + "loss": 0.1363, + "step": 505 + }, + { + "epoch": 0.37, + "learning_rate": 1.9560213099376194e-05, + "loss": 0.1322, + "step": 506 + }, + { + "epoch": 0.37, + "learning_rate": 1.9557918661278622e-05, + "loss": 0.1384, + "step": 507 + }, + { + "epoch": 0.37, + "learning_rate": 1.955561838879842e-05, + "loss": 0.1344, + "step": 508 + }, + { + "epoch": 0.37, + "learning_rate": 1.9553312283339735e-05, + "loss": 0.1333, + "step": 509 + }, + { + "epoch": 0.37, + "learning_rate": 1.955100034631026e-05, + "loss": 0.1276, + "step": 510 + }, + { + "epoch": 0.37, + "learning_rate": 1.9548682579121266e-05, + "loss": 0.1402, + "step": 511 + }, + { + "epoch": 0.37, + "learning_rate": 1.9546358983187576e-05, + "loss": 0.1228, + "step": 512 + }, + { + "epoch": 0.37, + "learning_rate": 1.9544029559927556e-05, + "loss": 0.1378, + "step": 513 + }, + { + "epoch": 0.37, + "learning_rate": 1.954169431076315e-05, + "loss": 0.1499, + "step": 514 + }, + { + "epoch": 0.37, + "learning_rate": 1.953935323711985e-05, + "loss": 0.1346, + "step": 515 + }, + { + "epoch": 0.37, + "learning_rate": 1.95370063404267e-05, + "loss": 0.1431, + "step": 516 + }, + { + "epoch": 0.37, + "learning_rate": 1.9534653622116307e-05, + "loss": 0.1338, + "step": 517 + }, + { + "epoch": 0.37, + "learning_rate": 1.9532295083624824e-05, + "loss": 0.1448, + "step": 518 + }, + { + "epoch": 0.38, + "learning_rate": 1.952993072639196e-05, + "loss": 0.1358, + "step": 519 + }, + { + "epoch": 0.38, + "learning_rate": 1.9527560551860972e-05, + "loss": 0.1368, + "step": 520 + }, + { + "epoch": 0.38, + "learning_rate": 1.9525184561478676e-05, + "loss": 0.1371, + "step": 521 + }, + { + "epoch": 0.38, + "learning_rate": 1.952280275669543e-05, + "loss": 0.1352, + "step": 522 + }, + { + "epoch": 0.38, + "learning_rate": 1.9520415138965148e-05, + "loss": 0.1321, + "step": 523 + }, + { + "epoch": 0.38, + "learning_rate": 1.9518021709745284e-05, + "loss": 0.14, + "step": 524 + }, + { + "epoch": 0.38, + "learning_rate": 1.9515622470496848e-05, + "loss": 0.1352, + "step": 525 + }, + { + "epoch": 0.38, + "learning_rate": 1.9513217422684397e-05, + "loss": 0.1277, + "step": 526 + }, + { + "epoch": 0.38, + "learning_rate": 1.9510806567776023e-05, + "loss": 0.1339, + "step": 527 + }, + { + "epoch": 0.38, + "learning_rate": 1.9508389907243374e-05, + "loss": 0.1378, + "step": 528 + }, + { + "epoch": 0.38, + "learning_rate": 1.9505967442561632e-05, + "loss": 0.1306, + "step": 529 + }, + { + "epoch": 0.38, + "learning_rate": 1.9503539175209535e-05, + "loss": 0.1208, + "step": 530 + }, + { + "epoch": 0.38, + "learning_rate": 1.9501105106669352e-05, + "loss": 0.1357, + "step": 531 + }, + { + "epoch": 0.38, + "learning_rate": 1.9498665238426894e-05, + "loss": 0.1425, + "step": 532 + }, + { + "epoch": 0.39, + "learning_rate": 1.949621957197152e-05, + "loss": 0.1347, + "step": 533 + }, + { + "epoch": 0.39, + "learning_rate": 1.9493768108796122e-05, + "loss": 0.1361, + "step": 534 + }, + { + "epoch": 0.39, + "learning_rate": 1.949131085039713e-05, + "loss": 0.1393, + "step": 535 + }, + { + "epoch": 0.39, + "learning_rate": 1.9488847798274516e-05, + "loss": 0.1359, + "step": 536 + }, + { + "epoch": 0.39, + "learning_rate": 1.948637895393178e-05, + "loss": 0.1378, + "step": 537 + }, + { + "epoch": 0.39, + "learning_rate": 1.9483904318875974e-05, + "loss": 0.1241, + "step": 538 + }, + { + "epoch": 0.39, + "learning_rate": 1.9481423894617668e-05, + "loss": 0.1357, + "step": 539 + }, + { + "epoch": 0.39, + "learning_rate": 1.947893768267097e-05, + "loss": 0.1312, + "step": 540 + }, + { + "epoch": 0.39, + "learning_rate": 1.947644568455353e-05, + "loss": 0.1447, + "step": 541 + }, + { + "epoch": 0.39, + "learning_rate": 1.9473947901786518e-05, + "loss": 0.1319, + "step": 542 + }, + { + "epoch": 0.39, + "learning_rate": 1.9471444335894644e-05, + "loss": 0.1391, + "step": 543 + }, + { + "epoch": 0.39, + "learning_rate": 1.946893498840614e-05, + "loss": 0.1405, + "step": 544 + }, + { + "epoch": 0.39, + "learning_rate": 1.946641986085278e-05, + "loss": 0.1379, + "step": 545 + }, + { + "epoch": 0.39, + "learning_rate": 1.946389895476985e-05, + "loss": 0.1259, + "step": 546 + }, + { + "epoch": 0.4, + "learning_rate": 1.9461372271696172e-05, + "loss": 0.1427, + "step": 547 + }, + { + "epoch": 0.4, + "learning_rate": 1.94588398131741e-05, + "loss": 0.1401, + "step": 548 + }, + { + "epoch": 0.4, + "learning_rate": 1.9456301580749507e-05, + "loss": 0.1431, + "step": 549 + }, + { + "epoch": 0.4, + "learning_rate": 1.9453757575971785e-05, + "loss": 0.146, + "step": 550 + }, + { + "epoch": 0.4, + "learning_rate": 1.9451207800393853e-05, + "loss": 0.1436, + "step": 551 + }, + { + "epoch": 0.4, + "learning_rate": 1.9448652255572166e-05, + "loss": 0.1401, + "step": 552 + }, + { + "epoch": 0.4, + "learning_rate": 1.9446090943066686e-05, + "loss": 0.1325, + "step": 553 + }, + { + "epoch": 0.4, + "learning_rate": 1.9443523864440895e-05, + "loss": 0.1338, + "step": 554 + }, + { + "epoch": 0.4, + "learning_rate": 1.94409510212618e-05, + "loss": 0.1353, + "step": 555 + }, + { + "epoch": 0.4, + "learning_rate": 1.9438372415099934e-05, + "loss": 0.1458, + "step": 556 + }, + { + "epoch": 0.4, + "learning_rate": 1.9435788047529328e-05, + "loss": 0.1394, + "step": 557 + }, + { + "epoch": 0.4, + "learning_rate": 1.943319792012755e-05, + "loss": 0.1368, + "step": 558 + }, + { + "epoch": 0.4, + "learning_rate": 1.9430602034475676e-05, + "loss": 0.1465, + "step": 559 + }, + { + "epoch": 0.41, + "learning_rate": 1.9428000392158295e-05, + "loss": 0.1338, + "step": 560 + }, + { + "epoch": 0.41, + "learning_rate": 1.942539299476351e-05, + "loss": 0.1309, + "step": 561 + }, + { + "epoch": 0.41, + "learning_rate": 1.942277984388294e-05, + "loss": 0.1389, + "step": 562 + }, + { + "epoch": 0.41, + "learning_rate": 1.9420160941111716e-05, + "loss": 0.1398, + "step": 563 + }, + { + "epoch": 0.41, + "learning_rate": 1.941753628804848e-05, + "loss": 0.1375, + "step": 564 + }, + { + "epoch": 0.41, + "learning_rate": 1.9414905886295377e-05, + "loss": 0.1359, + "step": 565 + }, + { + "epoch": 0.41, + "learning_rate": 1.9412269737458077e-05, + "loss": 0.1421, + "step": 566 + }, + { + "epoch": 0.41, + "learning_rate": 1.9409627843145742e-05, + "loss": 0.1375, + "step": 567 + }, + { + "epoch": 0.41, + "learning_rate": 1.9406980204971048e-05, + "loss": 0.1286, + "step": 568 + }, + { + "epoch": 0.41, + "learning_rate": 1.9404326824550176e-05, + "loss": 0.1327, + "step": 569 + }, + { + "epoch": 0.41, + "learning_rate": 1.940166770350282e-05, + "loss": 0.1379, + "step": 570 + }, + { + "epoch": 0.41, + "learning_rate": 1.939900284345216e-05, + "loss": 0.1349, + "step": 571 + }, + { + "epoch": 0.41, + "learning_rate": 1.9396332246024897e-05, + "loss": 0.1461, + "step": 572 + }, + { + "epoch": 0.41, + "learning_rate": 1.9393655912851233e-05, + "loss": 0.127, + "step": 573 + }, + { + "epoch": 0.42, + "learning_rate": 1.9390973845564853e-05, + "loss": 0.1312, + "step": 574 + }, + { + "epoch": 0.42, + "learning_rate": 1.9388286045802965e-05, + "loss": 0.1363, + "step": 575 + }, + { + "epoch": 0.42, + "learning_rate": 1.9385592515206263e-05, + "loss": 0.137, + "step": 576 + }, + { + "epoch": 0.42, + "learning_rate": 1.9382893255418948e-05, + "loss": 0.1434, + "step": 577 + }, + { + "epoch": 0.42, + "learning_rate": 1.93801882680887e-05, + "loss": 0.1406, + "step": 578 + }, + { + "epoch": 0.42, + "learning_rate": 1.937747755486672e-05, + "loss": 0.1439, + "step": 579 + }, + { + "epoch": 0.42, + "learning_rate": 1.937476111740769e-05, + "loss": 0.1416, + "step": 580 + }, + { + "epoch": 0.42, + "learning_rate": 1.9372038957369784e-05, + "loss": 0.1436, + "step": 581 + }, + { + "epoch": 0.42, + "learning_rate": 1.936931107641468e-05, + "loss": 0.1383, + "step": 582 + }, + { + "epoch": 0.42, + "learning_rate": 1.9366577476207533e-05, + "loss": 0.1413, + "step": 583 + }, + { + "epoch": 0.42, + "learning_rate": 1.9363838158417008e-05, + "loss": 0.1337, + "step": 584 + }, + { + "epoch": 0.42, + "learning_rate": 1.9361093124715245e-05, + "loss": 0.1282, + "step": 585 + }, + { + "epoch": 0.42, + "learning_rate": 1.935834237677788e-05, + "loss": 0.1344, + "step": 586 + }, + { + "epoch": 0.42, + "learning_rate": 1.935558591628403e-05, + "loss": 0.1288, + "step": 587 + }, + { + "epoch": 0.43, + "learning_rate": 1.9352823744916317e-05, + "loss": 0.1353, + "step": 588 + }, + { + "epoch": 0.43, + "learning_rate": 1.9350055864360825e-05, + "loss": 0.1336, + "step": 589 + }, + { + "epoch": 0.43, + "learning_rate": 1.934728227630714e-05, + "loss": 0.1446, + "step": 590 + }, + { + "epoch": 0.43, + "learning_rate": 1.934450298244832e-05, + "loss": 0.1396, + "step": 591 + }, + { + "epoch": 0.43, + "learning_rate": 1.9341717984480924e-05, + "loss": 0.1274, + "step": 592 + }, + { + "epoch": 0.43, + "learning_rate": 1.933892728410497e-05, + "loss": 0.1305, + "step": 593 + }, + { + "epoch": 0.43, + "learning_rate": 1.933613088302398e-05, + "loss": 0.123, + "step": 594 + }, + { + "epoch": 0.43, + "learning_rate": 1.9333328782944933e-05, + "loss": 0.1343, + "step": 595 + }, + { + "epoch": 0.43, + "learning_rate": 1.9330520985578303e-05, + "loss": 0.1301, + "step": 596 + }, + { + "epoch": 0.43, + "learning_rate": 1.932770749263804e-05, + "loss": 0.1438, + "step": 597 + }, + { + "epoch": 0.43, + "learning_rate": 1.932488830584156e-05, + "loss": 0.1348, + "step": 598 + }, + { + "epoch": 0.43, + "learning_rate": 1.932206342690977e-05, + "loss": 0.1339, + "step": 599 + }, + { + "epoch": 0.43, + "learning_rate": 1.9319232857567036e-05, + "loss": 0.1495, + "step": 600 + }, + { + "epoch": 0.43, + "learning_rate": 1.9316396599541215e-05, + "loss": 0.1369, + "step": 601 + }, + { + "epoch": 0.44, + "learning_rate": 1.9313554654563614e-05, + "loss": 0.133, + "step": 602 + }, + { + "epoch": 0.44, + "learning_rate": 1.931070702436904e-05, + "loss": 0.1392, + "step": 603 + }, + { + "epoch": 0.44, + "learning_rate": 1.9307853710695744e-05, + "loss": 0.141, + "step": 604 + }, + { + "epoch": 0.44, + "learning_rate": 1.930499471528546e-05, + "loss": 0.1391, + "step": 605 + }, + { + "epoch": 0.44, + "learning_rate": 1.9302130039883385e-05, + "loss": 0.1316, + "step": 606 + }, + { + "epoch": 0.44, + "learning_rate": 1.9299259686238184e-05, + "loss": 0.1335, + "step": 607 + }, + { + "epoch": 0.44, + "learning_rate": 1.9296383656101998e-05, + "loss": 0.1406, + "step": 608 + }, + { + "epoch": 0.44, + "learning_rate": 1.929350195123042e-05, + "loss": 0.133, + "step": 609 + }, + { + "epoch": 0.44, + "learning_rate": 1.929061457338251e-05, + "loss": 0.1392, + "step": 610 + }, + { + "epoch": 0.44, + "learning_rate": 1.9287721524320797e-05, + "loss": 0.1293, + "step": 611 + }, + { + "epoch": 0.44, + "learning_rate": 1.928482280581126e-05, + "loss": 0.1357, + "step": 612 + }, + { + "epoch": 0.44, + "learning_rate": 1.9281918419623356e-05, + "loss": 0.1331, + "step": 613 + }, + { + "epoch": 0.44, + "learning_rate": 1.9279008367529983e-05, + "loss": 0.1294, + "step": 614 + }, + { + "epoch": 0.44, + "learning_rate": 1.927609265130751e-05, + "loss": 0.1317, + "step": 615 + }, + { + "epoch": 0.45, + "learning_rate": 1.9273171272735764e-05, + "loss": 0.1368, + "step": 616 + }, + { + "epoch": 0.45, + "learning_rate": 1.927024423359802e-05, + "loss": 0.1334, + "step": 617 + }, + { + "epoch": 0.45, + "learning_rate": 1.926731153568101e-05, + "loss": 0.1321, + "step": 618 + }, + { + "epoch": 0.45, + "learning_rate": 1.9264373180774928e-05, + "loss": 0.1333, + "step": 619 + }, + { + "epoch": 0.45, + "learning_rate": 1.926142917067342e-05, + "loss": 0.1318, + "step": 620 + }, + { + "epoch": 0.45, + "learning_rate": 1.9258479507173572e-05, + "loss": 0.1283, + "step": 621 + }, + { + "epoch": 0.45, + "learning_rate": 1.9255524192075933e-05, + "loss": 0.1349, + "step": 622 + }, + { + "epoch": 0.45, + "learning_rate": 1.9252563227184494e-05, + "loss": 0.1535, + "step": 623 + }, + { + "epoch": 0.45, + "learning_rate": 1.924959661430671e-05, + "loss": 0.1313, + "step": 624 + }, + { + "epoch": 0.45, + "learning_rate": 1.9246624355253464e-05, + "loss": 0.1429, + "step": 625 + }, + { + "epoch": 0.45, + "learning_rate": 1.9243646451839093e-05, + "loss": 0.142, + "step": 626 + }, + { + "epoch": 0.45, + "learning_rate": 1.9240662905881384e-05, + "loss": 0.1305, + "step": 627 + }, + { + "epoch": 0.45, + "learning_rate": 1.9237673719201564e-05, + "loss": 0.1295, + "step": 628 + }, + { + "epoch": 0.45, + "learning_rate": 1.9234678893624303e-05, + "loss": 0.1314, + "step": 629 + }, + { + "epoch": 0.46, + "learning_rate": 1.923167843097772e-05, + "loss": 0.1252, + "step": 630 + }, + { + "epoch": 0.46, + "learning_rate": 1.922867233309336e-05, + "loss": 0.1313, + "step": 631 + }, + { + "epoch": 0.46, + "learning_rate": 1.9225660601806225e-05, + "loss": 0.1348, + "step": 632 + }, + { + "epoch": 0.46, + "learning_rate": 1.9222643238954746e-05, + "loss": 0.1272, + "step": 633 + }, + { + "epoch": 0.46, + "learning_rate": 1.9219620246380793e-05, + "loss": 0.1353, + "step": 634 + }, + { + "epoch": 0.46, + "learning_rate": 1.921659162592967e-05, + "loss": 0.1214, + "step": 635 + }, + { + "epoch": 0.46, + "learning_rate": 1.9213557379450124e-05, + "loss": 0.1334, + "step": 636 + }, + { + "epoch": 0.46, + "learning_rate": 1.921051750879433e-05, + "loss": 0.118, + "step": 637 + }, + { + "epoch": 0.46, + "learning_rate": 1.9207472015817902e-05, + "loss": 0.1241, + "step": 638 + }, + { + "epoch": 0.46, + "learning_rate": 1.9204420902379872e-05, + "loss": 0.134, + "step": 639 + }, + { + "epoch": 0.46, + "learning_rate": 1.920136417034272e-05, + "loss": 0.1326, + "step": 640 + }, + { + "epoch": 0.46, + "learning_rate": 1.9198301821572348e-05, + "loss": 0.1234, + "step": 641 + }, + { + "epoch": 0.46, + "learning_rate": 1.919523385793809e-05, + "loss": 0.1414, + "step": 642 + }, + { + "epoch": 0.47, + "learning_rate": 1.9192160281312696e-05, + "loss": 0.1363, + "step": 643 + }, + { + "epoch": 0.47, + "learning_rate": 1.9189081093572357e-05, + "loss": 0.141, + "step": 644 + }, + { + "epoch": 0.47, + "learning_rate": 1.9185996296596682e-05, + "loss": 0.1265, + "step": 645 + }, + { + "epoch": 0.47, + "learning_rate": 1.9182905892268704e-05, + "loss": 0.1309, + "step": 646 + }, + { + "epoch": 0.47, + "learning_rate": 1.9179809882474886e-05, + "loss": 0.1284, + "step": 647 + }, + { + "epoch": 0.47, + "learning_rate": 1.9176708269105092e-05, + "loss": 0.1384, + "step": 648 + }, + { + "epoch": 0.47, + "learning_rate": 1.9173601054052637e-05, + "loss": 0.1382, + "step": 649 + }, + { + "epoch": 0.47, + "learning_rate": 1.917048823921423e-05, + "loss": 0.1392, + "step": 650 + }, + { + "epoch": 0.47, + "learning_rate": 1.9167369826490014e-05, + "loss": 0.1371, + "step": 651 + }, + { + "epoch": 0.47, + "learning_rate": 1.9164245817783536e-05, + "loss": 0.1162, + "step": 652 + }, + { + "epoch": 0.47, + "learning_rate": 1.916111621500177e-05, + "loss": 0.1281, + "step": 653 + }, + { + "epoch": 0.47, + "learning_rate": 1.91579810200551e-05, + "loss": 0.1434, + "step": 654 + }, + { + "epoch": 0.47, + "learning_rate": 1.9154840234857324e-05, + "loss": 0.1326, + "step": 655 + }, + { + "epoch": 0.47, + "learning_rate": 1.915169386132565e-05, + "loss": 0.1279, + "step": 656 + }, + { + "epoch": 0.48, + "learning_rate": 1.914854190138071e-05, + "loss": 0.1379, + "step": 657 + }, + { + "epoch": 0.48, + "learning_rate": 1.9145384356946522e-05, + "loss": 0.125, + "step": 658 + }, + { + "epoch": 0.48, + "learning_rate": 1.9142221229950535e-05, + "loss": 0.1343, + "step": 659 + }, + { + "epoch": 0.48, + "learning_rate": 1.9139052522323592e-05, + "loss": 0.135, + "step": 660 + }, + { + "epoch": 0.48, + "learning_rate": 1.9135878235999955e-05, + "loss": 0.1342, + "step": 661 + }, + { + "epoch": 0.48, + "learning_rate": 1.9132698372917282e-05, + "loss": 0.1419, + "step": 662 + }, + { + "epoch": 0.48, + "learning_rate": 1.9129512935016638e-05, + "loss": 0.1336, + "step": 663 + }, + { + "epoch": 0.48, + "learning_rate": 1.9126321924242485e-05, + "loss": 0.1422, + "step": 664 + }, + { + "epoch": 0.48, + "learning_rate": 1.9123125342542697e-05, + "loss": 0.1312, + "step": 665 + }, + { + "epoch": 0.48, + "learning_rate": 1.9119923191868547e-05, + "loss": 0.1418, + "step": 666 + }, + { + "epoch": 0.48, + "learning_rate": 1.91167154741747e-05, + "loss": 0.1299, + "step": 667 + }, + { + "epoch": 0.48, + "learning_rate": 1.9113502191419225e-05, + "loss": 0.1382, + "step": 668 + }, + { + "epoch": 0.48, + "learning_rate": 1.9110283345563587e-05, + "loss": 0.129, + "step": 669 + }, + { + "epoch": 0.48, + "learning_rate": 1.9107058938572645e-05, + "loss": 0.142, + "step": 670 + }, + { + "epoch": 0.49, + "learning_rate": 1.910382897241466e-05, + "loss": 0.1407, + "step": 671 + }, + { + "epoch": 0.49, + "learning_rate": 1.9100593449061272e-05, + "loss": 0.1365, + "step": 672 + }, + { + "epoch": 0.49, + "learning_rate": 1.9097352370487532e-05, + "loss": 0.1339, + "step": 673 + }, + { + "epoch": 0.49, + "learning_rate": 1.9094105738671864e-05, + "loss": 0.138, + "step": 674 + }, + { + "epoch": 0.49, + "learning_rate": 1.909085355559609e-05, + "loss": 0.1356, + "step": 675 + }, + { + "epoch": 0.49, + "learning_rate": 1.908759582324543e-05, + "loss": 0.1307, + "step": 676 + }, + { + "epoch": 0.49, + "learning_rate": 1.908433254360847e-05, + "loss": 0.1342, + "step": 677 + }, + { + "epoch": 0.49, + "learning_rate": 1.90810637186772e-05, + "loss": 0.1318, + "step": 678 + }, + { + "epoch": 0.49, + "learning_rate": 1.907778935044699e-05, + "loss": 0.1247, + "step": 679 + }, + { + "epoch": 0.49, + "learning_rate": 1.907450944091659e-05, + "loss": 0.1454, + "step": 680 + }, + { + "epoch": 0.49, + "learning_rate": 1.9071223992088133e-05, + "loss": 0.1375, + "step": 681 + }, + { + "epoch": 0.49, + "learning_rate": 1.906793300596714e-05, + "loss": 0.1313, + "step": 682 + }, + { + "epoch": 0.49, + "learning_rate": 1.9064636484562513e-05, + "loss": 0.1307, + "step": 683 + }, + { + "epoch": 0.49, + "learning_rate": 1.9061334429886517e-05, + "loss": 0.1452, + "step": 684 + }, + { + "epoch": 0.5, + "learning_rate": 1.9058026843954813e-05, + "loss": 0.1342, + "step": 685 + }, + { + "epoch": 0.5, + "learning_rate": 1.9054713728786424e-05, + "loss": 0.137, + "step": 686 + }, + { + "epoch": 0.5, + "learning_rate": 1.905139508640376e-05, + "loss": 0.1365, + "step": 687 + }, + { + "epoch": 0.5, + "learning_rate": 1.90480709188326e-05, + "loss": 0.1418, + "step": 688 + }, + { + "epoch": 0.5, + "learning_rate": 1.9044741228102093e-05, + "loss": 0.1443, + "step": 689 + }, + { + "epoch": 0.5, + "learning_rate": 1.904140601624476e-05, + "loss": 0.1328, + "step": 690 + }, + { + "epoch": 0.5, + "learning_rate": 1.90380652852965e-05, + "loss": 0.1277, + "step": 691 + }, + { + "epoch": 0.5, + "learning_rate": 1.9034719037296573e-05, + "loss": 0.1396, + "step": 692 + }, + { + "epoch": 0.5, + "learning_rate": 1.903136727428761e-05, + "loss": 0.1246, + "step": 693 + }, + { + "epoch": 0.5, + "learning_rate": 1.9028009998315603e-05, + "loss": 0.1315, + "step": 694 + }, + { + "epoch": 0.5, + "learning_rate": 1.9024647211429916e-05, + "loss": 0.1376, + "step": 695 + }, + { + "epoch": 0.5, + "learning_rate": 1.902127891568328e-05, + "loss": 0.1355, + "step": 696 + }, + { + "epoch": 0.5, + "learning_rate": 1.9017905113131774e-05, + "loss": 0.14, + "step": 697 + }, + { + "epoch": 0.5, + "learning_rate": 1.9014525805834854e-05, + "loss": 0.128, + "step": 698 + }, + { + "epoch": 0.51, + "learning_rate": 1.901114099585533e-05, + "loss": 0.132, + "step": 699 + }, + { + "epoch": 0.51, + "learning_rate": 1.900775068525937e-05, + "loss": 0.1266, + "step": 700 + }, + { + "epoch": 0.51, + "learning_rate": 1.9004354876116503e-05, + "loss": 0.1314, + "step": 701 + }, + { + "epoch": 0.51, + "learning_rate": 1.9000953570499605e-05, + "loss": 0.1245, + "step": 702 + }, + { + "epoch": 0.51, + "learning_rate": 1.899754677048492e-05, + "loss": 0.1339, + "step": 703 + }, + { + "epoch": 0.51, + "learning_rate": 1.8994134478152045e-05, + "loss": 0.1391, + "step": 704 + }, + { + "epoch": 0.51, + "learning_rate": 1.899071669558392e-05, + "loss": 0.1356, + "step": 705 + }, + { + "epoch": 0.51, + "learning_rate": 1.898729342486684e-05, + "loss": 0.1296, + "step": 706 + }, + { + "epoch": 0.51, + "learning_rate": 1.8983864668090452e-05, + "loss": 0.1339, + "step": 707 + }, + { + "epoch": 0.51, + "learning_rate": 1.898043042734775e-05, + "loss": 0.1449, + "step": 708 + }, + { + "epoch": 0.51, + "learning_rate": 1.8976990704735082e-05, + "loss": 0.1291, + "step": 709 + }, + { + "epoch": 0.51, + "learning_rate": 1.8973545502352134e-05, + "loss": 0.1407, + "step": 710 + }, + { + "epoch": 0.51, + "learning_rate": 1.897009482230194e-05, + "loss": 0.133, + "step": 711 + }, + { + "epoch": 0.52, + "learning_rate": 1.896663866669088e-05, + "loss": 0.1184, + "step": 712 + }, + { + "epoch": 0.52, + "learning_rate": 1.896317703762867e-05, + "loss": 0.1348, + "step": 713 + }, + { + "epoch": 0.52, + "learning_rate": 1.8959709937228375e-05, + "loss": 0.1302, + "step": 714 + }, + { + "epoch": 0.52, + "learning_rate": 1.8956237367606395e-05, + "loss": 0.146, + "step": 715 + }, + { + "epoch": 0.52, + "learning_rate": 1.895275933088247e-05, + "loss": 0.132, + "step": 716 + }, + { + "epoch": 0.52, + "learning_rate": 1.8949275829179675e-05, + "loss": 0.1353, + "step": 717 + }, + { + "epoch": 0.52, + "learning_rate": 1.8945786864624428e-05, + "loss": 0.1308, + "step": 718 + }, + { + "epoch": 0.52, + "learning_rate": 1.894229243934647e-05, + "loss": 0.1358, + "step": 719 + }, + { + "epoch": 0.52, + "learning_rate": 1.8938792555478885e-05, + "loss": 0.1349, + "step": 720 + }, + { + "epoch": 0.52, + "learning_rate": 1.8935287215158087e-05, + "loss": 0.1409, + "step": 721 + }, + { + "epoch": 0.52, + "learning_rate": 1.893177642052382e-05, + "loss": 0.1406, + "step": 722 + }, + { + "epoch": 0.52, + "learning_rate": 1.8928260173719156e-05, + "loss": 0.1236, + "step": 723 + }, + { + "epoch": 0.52, + "learning_rate": 1.8924738476890492e-05, + "loss": 0.1324, + "step": 724 + }, + { + "epoch": 0.52, + "learning_rate": 1.8921211332187566e-05, + "loss": 0.1339, + "step": 725 + }, + { + "epoch": 0.53, + "learning_rate": 1.8917678741763425e-05, + "loss": 0.134, + "step": 726 + }, + { + "epoch": 0.53, + "learning_rate": 1.8914140707774448e-05, + "loss": 0.1318, + "step": 727 + }, + { + "epoch": 0.53, + "learning_rate": 1.891059723238033e-05, + "loss": 0.1351, + "step": 728 + }, + { + "epoch": 0.53, + "learning_rate": 1.8907048317744107e-05, + "loss": 0.1341, + "step": 729 + }, + { + "epoch": 0.53, + "learning_rate": 1.890349396603211e-05, + "loss": 0.1359, + "step": 730 + }, + { + "epoch": 0.53, + "learning_rate": 1.8899934179414004e-05, + "loss": 0.1355, + "step": 731 + }, + { + "epoch": 0.53, + "learning_rate": 1.889636896006277e-05, + "loss": 0.127, + "step": 732 + }, + { + "epoch": 0.53, + "learning_rate": 1.8892798310154698e-05, + "loss": 0.1268, + "step": 733 + }, + { + "epoch": 0.53, + "learning_rate": 1.8889222231869407e-05, + "loss": 0.1302, + "step": 734 + }, + { + "epoch": 0.53, + "learning_rate": 1.888564072738981e-05, + "loss": 0.1354, + "step": 735 + }, + { + "epoch": 0.53, + "learning_rate": 1.888205379890215e-05, + "loss": 0.1295, + "step": 736 + }, + { + "epoch": 0.53, + "learning_rate": 1.8878461448595976e-05, + "loss": 0.1284, + "step": 737 + }, + { + "epoch": 0.53, + "learning_rate": 1.8874863678664143e-05, + "loss": 0.1311, + "step": 738 + }, + { + "epoch": 0.53, + "learning_rate": 1.887126049130281e-05, + "loss": 0.1462, + "step": 739 + }, + { + "epoch": 0.54, + "learning_rate": 1.8867651888711456e-05, + "loss": 0.1398, + "step": 740 + }, + { + "epoch": 0.54, + "learning_rate": 1.8864037873092857e-05, + "loss": 0.1189, + "step": 741 + }, + { + "epoch": 0.54, + "learning_rate": 1.886041844665309e-05, + "loss": 0.1283, + "step": 742 + }, + { + "epoch": 0.54, + "learning_rate": 1.885679361160155e-05, + "loss": 0.1289, + "step": 743 + }, + { + "epoch": 0.54, + "learning_rate": 1.885316337015091e-05, + "loss": 0.1315, + "step": 744 + }, + { + "epoch": 0.54, + "learning_rate": 1.8849527724517167e-05, + "loss": 0.1496, + "step": 745 + }, + { + "epoch": 0.54, + "learning_rate": 1.88458866769196e-05, + "loss": 0.1313, + "step": 746 + }, + { + "epoch": 0.54, + "learning_rate": 1.8842240229580796e-05, + "loss": 0.1376, + "step": 747 + }, + { + "epoch": 0.54, + "learning_rate": 1.883858838472663e-05, + "loss": 0.127, + "step": 748 + }, + { + "epoch": 0.54, + "learning_rate": 1.883493114458628e-05, + "loss": 0.133, + "step": 749 + }, + { + "epoch": 0.54, + "learning_rate": 1.8831268511392208e-05, + "loss": 0.1357, + "step": 750 + }, + { + "epoch": 0.54, + "learning_rate": 1.882760048738018e-05, + "loss": 0.1282, + "step": 751 + }, + { + "epoch": 0.54, + "learning_rate": 1.882392707478924e-05, + "loss": 0.1325, + "step": 752 + }, + { + "epoch": 0.54, + "learning_rate": 1.8820248275861725e-05, + "loss": 0.1279, + "step": 753 + }, + { + "epoch": 0.55, + "learning_rate": 1.8816564092843268e-05, + "loss": 0.1309, + "step": 754 + }, + { + "epoch": 0.55, + "learning_rate": 1.881287452798278e-05, + "loss": 0.1336, + "step": 755 + }, + { + "epoch": 0.55, + "learning_rate": 1.880917958353246e-05, + "loss": 0.1412, + "step": 756 + }, + { + "epoch": 0.55, + "learning_rate": 1.880547926174779e-05, + "loss": 0.1277, + "step": 757 + }, + { + "epoch": 0.55, + "learning_rate": 1.8801773564887536e-05, + "loss": 0.1243, + "step": 758 + }, + { + "epoch": 0.55, + "learning_rate": 1.8798062495213745e-05, + "loss": 0.1225, + "step": 759 + }, + { + "epoch": 0.55, + "learning_rate": 1.879434605499174e-05, + "loss": 0.1259, + "step": 760 + }, + { + "epoch": 0.55, + "learning_rate": 1.8790624246490127e-05, + "loss": 0.1399, + "step": 761 + }, + { + "epoch": 0.55, + "learning_rate": 1.878689707198079e-05, + "loss": 0.1394, + "step": 762 + }, + { + "epoch": 0.55, + "learning_rate": 1.878316453373888e-05, + "loss": 0.1313, + "step": 763 + }, + { + "epoch": 0.55, + "learning_rate": 1.8779426634042833e-05, + "loss": 0.1509, + "step": 764 + }, + { + "epoch": 0.55, + "learning_rate": 1.8775683375174346e-05, + "loss": 0.1317, + "step": 765 + }, + { + "epoch": 0.55, + "learning_rate": 1.8771934759418402e-05, + "loss": 0.1358, + "step": 766 + }, + { + "epoch": 0.55, + "learning_rate": 1.876818078906324e-05, + "loss": 0.1407, + "step": 767 + }, + { + "epoch": 0.56, + "learning_rate": 1.8764421466400376e-05, + "loss": 0.1373, + "step": 768 + }, + { + "epoch": 0.56, + "learning_rate": 1.876065679372459e-05, + "loss": 0.1387, + "step": 769 + }, + { + "epoch": 0.56, + "learning_rate": 1.8756886773333927e-05, + "loss": 0.1318, + "step": 770 + }, + { + "epoch": 0.56, + "learning_rate": 1.87531114075297e-05, + "loss": 0.1367, + "step": 771 + }, + { + "epoch": 0.56, + "learning_rate": 1.8749330698616484e-05, + "loss": 0.1278, + "step": 772 + }, + { + "epoch": 0.56, + "learning_rate": 1.874554464890211e-05, + "loss": 0.1326, + "step": 773 + }, + { + "epoch": 0.56, + "learning_rate": 1.874175326069768e-05, + "loss": 0.1412, + "step": 774 + }, + { + "epoch": 0.56, + "learning_rate": 1.8737956536317537e-05, + "loss": 0.1414, + "step": 775 + }, + { + "epoch": 0.56, + "learning_rate": 1.8734154478079306e-05, + "loss": 0.1236, + "step": 776 + }, + { + "epoch": 0.56, + "learning_rate": 1.8730347088303843e-05, + "loss": 0.1277, + "step": 777 + }, + { + "epoch": 0.56, + "learning_rate": 1.872653436931528e-05, + "loss": 0.1239, + "step": 778 + }, + { + "epoch": 0.56, + "learning_rate": 1.8722716323440984e-05, + "loss": 0.1337, + "step": 779 + }, + { + "epoch": 0.56, + "learning_rate": 1.8718892953011588e-05, + "loss": 0.1349, + "step": 780 + }, + { + "epoch": 0.56, + "learning_rate": 1.8715064260360965e-05, + "loss": 0.136, + "step": 781 + }, + { + "epoch": 0.57, + "learning_rate": 1.8711230247826238e-05, + "loss": 0.1334, + "step": 782 + }, + { + "epoch": 0.57, + "learning_rate": 1.8707390917747787e-05, + "loss": 0.1342, + "step": 783 + }, + { + "epoch": 0.57, + "learning_rate": 1.870354627246923e-05, + "loss": 0.1312, + "step": 784 + }, + { + "epoch": 0.57, + "learning_rate": 1.8699696314337422e-05, + "loss": 0.1271, + "step": 785 + }, + { + "epoch": 0.57, + "learning_rate": 1.869584104570248e-05, + "loss": 0.1325, + "step": 786 + }, + { + "epoch": 0.57, + "learning_rate": 1.869198046891775e-05, + "loss": 0.1255, + "step": 787 + }, + { + "epoch": 0.57, + "learning_rate": 1.8688114586339818e-05, + "loss": 0.1345, + "step": 788 + }, + { + "epoch": 0.57, + "learning_rate": 1.868424340032851e-05, + "loss": 0.1336, + "step": 789 + }, + { + "epoch": 0.57, + "learning_rate": 1.8680366913246894e-05, + "loss": 0.1372, + "step": 790 + }, + { + "epoch": 0.57, + "learning_rate": 1.867648512746127e-05, + "loss": 0.136, + "step": 791 + }, + { + "epoch": 0.57, + "learning_rate": 1.8672598045341175e-05, + "loss": 0.1339, + "step": 792 + }, + { + "epoch": 0.57, + "learning_rate": 1.8668705669259377e-05, + "loss": 0.1295, + "step": 793 + }, + { + "epoch": 0.57, + "learning_rate": 1.8664808001591866e-05, + "loss": 0.1354, + "step": 794 + }, + { + "epoch": 0.58, + "learning_rate": 1.8660905044717888e-05, + "loss": 0.1292, + "step": 795 + }, + { + "epoch": 0.58, + "learning_rate": 1.865699680101989e-05, + "loss": 0.1343, + "step": 796 + }, + { + "epoch": 0.58, + "learning_rate": 1.8653083272883556e-05, + "loss": 0.1422, + "step": 797 + }, + { + "epoch": 0.58, + "learning_rate": 1.8649164462697805e-05, + "loss": 0.1313, + "step": 798 + }, + { + "epoch": 0.58, + "learning_rate": 1.864524037285477e-05, + "loss": 0.1293, + "step": 799 + }, + { + "epoch": 0.58, + "learning_rate": 1.864131100574981e-05, + "loss": 0.1381, + "step": 800 + }, + { + "epoch": 0.58, + "learning_rate": 1.8637376363781504e-05, + "loss": 0.1256, + "step": 801 + }, + { + "epoch": 0.58, + "learning_rate": 1.8633436449351648e-05, + "loss": 0.1326, + "step": 802 + }, + { + "epoch": 0.58, + "learning_rate": 1.862949126486527e-05, + "loss": 0.1265, + "step": 803 + }, + { + "epoch": 0.58, + "learning_rate": 1.8625540812730592e-05, + "loss": 0.1394, + "step": 804 + }, + { + "epoch": 0.58, + "learning_rate": 1.8621585095359075e-05, + "loss": 0.1345, + "step": 805 + }, + { + "epoch": 0.58, + "learning_rate": 1.861762411516538e-05, + "loss": 0.1322, + "step": 806 + }, + { + "epoch": 0.58, + "learning_rate": 1.8613657874567383e-05, + "loss": 0.1325, + "step": 807 + }, + { + "epoch": 0.58, + "learning_rate": 1.8609686375986176e-05, + "loss": 0.1332, + "step": 808 + }, + { + "epoch": 0.59, + "learning_rate": 1.8605709621846055e-05, + "loss": 0.1298, + "step": 809 + }, + { + "epoch": 0.59, + "learning_rate": 1.8601727614574522e-05, + "loss": 0.1325, + "step": 810 + }, + { + "epoch": 0.59, + "learning_rate": 1.85977403566023e-05, + "loss": 0.1394, + "step": 811 + }, + { + "epoch": 0.59, + "learning_rate": 1.8593747850363298e-05, + "loss": 0.1273, + "step": 812 + }, + { + "epoch": 0.59, + "learning_rate": 1.8589750098294636e-05, + "loss": 0.1381, + "step": 813 + }, + { + "epoch": 0.59, + "learning_rate": 1.8585747102836643e-05, + "loss": 0.1417, + "step": 814 + }, + { + "epoch": 0.59, + "learning_rate": 1.8581738866432847e-05, + "loss": 0.1433, + "step": 815 + }, + { + "epoch": 0.59, + "learning_rate": 1.857772539152996e-05, + "loss": 0.1367, + "step": 816 + }, + { + "epoch": 0.59, + "learning_rate": 1.8573706680577913e-05, + "loss": 0.1384, + "step": 817 + }, + { + "epoch": 0.59, + "learning_rate": 1.8569682736029822e-05, + "loss": 0.1252, + "step": 818 + }, + { + "epoch": 0.59, + "learning_rate": 1.8565653560341994e-05, + "loss": 0.1358, + "step": 819 + }, + { + "epoch": 0.59, + "learning_rate": 1.856161915597394e-05, + "loss": 0.1364, + "step": 820 + }, + { + "epoch": 0.59, + "learning_rate": 1.8557579525388353e-05, + "loss": 0.1348, + "step": 821 + }, + { + "epoch": 0.59, + "learning_rate": 1.8553534671051126e-05, + "loss": 0.1438, + "step": 822 + }, + { + "epoch": 0.6, + "learning_rate": 1.8549484595431326e-05, + "loss": 0.1351, + "step": 823 + }, + { + "epoch": 0.6, + "learning_rate": 1.8545429301001225e-05, + "loss": 0.1367, + "step": 824 + }, + { + "epoch": 0.6, + "learning_rate": 1.854136879023627e-05, + "loss": 0.1357, + "step": 825 + }, + { + "epoch": 0.6, + "learning_rate": 1.853730306561509e-05, + "loss": 0.1367, + "step": 826 + }, + { + "epoch": 0.6, + "learning_rate": 1.8533232129619502e-05, + "loss": 0.1366, + "step": 827 + }, + { + "epoch": 0.6, + "learning_rate": 1.8529155984734503e-05, + "loss": 0.1318, + "step": 828 + }, + { + "epoch": 0.6, + "learning_rate": 1.8525074633448276e-05, + "loss": 0.1292, + "step": 829 + }, + { + "epoch": 0.6, + "learning_rate": 1.8520988078252168e-05, + "loss": 0.1335, + "step": 830 + }, + { + "epoch": 0.6, + "learning_rate": 1.8516896321640712e-05, + "loss": 0.1292, + "step": 831 + }, + { + "epoch": 0.6, + "learning_rate": 1.8512799366111615e-05, + "loss": 0.1313, + "step": 832 + }, + { + "epoch": 0.6, + "learning_rate": 1.8508697214165758e-05, + "loss": 0.1284, + "step": 833 + }, + { + "epoch": 0.6, + "learning_rate": 1.850458986830719e-05, + "loss": 0.134, + "step": 834 + }, + { + "epoch": 0.6, + "learning_rate": 1.8500477331043133e-05, + "loss": 0.1215, + "step": 835 + }, + { + "epoch": 0.6, + "learning_rate": 1.8496359604883984e-05, + "loss": 0.1389, + "step": 836 + }, + { + "epoch": 0.61, + "learning_rate": 1.84922366923433e-05, + "loss": 0.1489, + "step": 837 + }, + { + "epoch": 0.61, + "learning_rate": 1.8488108595937798e-05, + "loss": 0.1442, + "step": 838 + }, + { + "epoch": 0.61, + "learning_rate": 1.8483975318187377e-05, + "loss": 0.1316, + "step": 839 + }, + { + "epoch": 0.61, + "learning_rate": 1.8479836861615084e-05, + "loss": 0.1292, + "step": 840 + }, + { + "epoch": 0.61, + "learning_rate": 1.847569322874713e-05, + "loss": 0.1372, + "step": 841 + }, + { + "epoch": 0.61, + "learning_rate": 1.8471544422112894e-05, + "loss": 0.1364, + "step": 842 + }, + { + "epoch": 0.61, + "learning_rate": 1.84673904442449e-05, + "loss": 0.141, + "step": 843 + }, + { + "epoch": 0.61, + "learning_rate": 1.8463231297678838e-05, + "loss": 0.145, + "step": 844 + }, + { + "epoch": 0.61, + "learning_rate": 1.8459066984953555e-05, + "loss": 0.1245, + "step": 845 + }, + { + "epoch": 0.61, + "learning_rate": 1.8454897508611044e-05, + "loss": 0.1301, + "step": 846 + }, + { + "epoch": 0.61, + "learning_rate": 1.8450722871196452e-05, + "loss": 0.1407, + "step": 847 + }, + { + "epoch": 0.61, + "learning_rate": 1.844654307525808e-05, + "loss": 0.1348, + "step": 848 + }, + { + "epoch": 0.61, + "learning_rate": 1.8442358123347374e-05, + "loss": 0.1373, + "step": 849 + }, + { + "epoch": 0.61, + "learning_rate": 1.8438168018018927e-05, + "loss": 0.1354, + "step": 850 + }, + { + "epoch": 0.62, + "learning_rate": 1.8433972761830486e-05, + "loss": 0.1204, + "step": 851 + }, + { + "epoch": 0.62, + "learning_rate": 1.842977235734293e-05, + "loss": 0.1319, + "step": 852 + }, + { + "epoch": 0.62, + "learning_rate": 1.842556680712029e-05, + "loss": 0.1341, + "step": 853 + }, + { + "epoch": 0.62, + "learning_rate": 1.8421356113729734e-05, + "loss": 0.1355, + "step": 854 + }, + { + "epoch": 0.62, + "learning_rate": 1.841714027974157e-05, + "loss": 0.1359, + "step": 855 + }, + { + "epoch": 0.62, + "learning_rate": 1.841291930772924e-05, + "loss": 0.136, + "step": 856 + }, + { + "epoch": 0.62, + "learning_rate": 1.8408693200269336e-05, + "loss": 0.1345, + "step": 857 + }, + { + "epoch": 0.62, + "learning_rate": 1.8404461959941565e-05, + "loss": 0.131, + "step": 858 + }, + { + "epoch": 0.62, + "learning_rate": 1.8400225589328783e-05, + "loss": 0.1288, + "step": 859 + }, + { + "epoch": 0.62, + "learning_rate": 1.839598409101697e-05, + "loss": 0.1294, + "step": 860 + }, + { + "epoch": 0.62, + "learning_rate": 1.839173746759524e-05, + "loss": 0.1309, + "step": 861 + }, + { + "epoch": 0.62, + "learning_rate": 1.8387485721655837e-05, + "loss": 0.125, + "step": 862 + }, + { + "epoch": 0.62, + "learning_rate": 1.8383228855794124e-05, + "loss": 0.139, + "step": 863 + }, + { + "epoch": 0.62, + "learning_rate": 1.8378966872608594e-05, + "loss": 0.1337, + "step": 864 + }, + { + "epoch": 0.63, + "learning_rate": 1.8374699774700863e-05, + "loss": 0.1482, + "step": 865 + }, + { + "epoch": 0.63, + "learning_rate": 1.837042756467567e-05, + "loss": 0.1337, + "step": 866 + }, + { + "epoch": 0.63, + "learning_rate": 1.8366150245140876e-05, + "loss": 0.1318, + "step": 867 + }, + { + "epoch": 0.63, + "learning_rate": 1.836186781870746e-05, + "loss": 0.1537, + "step": 868 + }, + { + "epoch": 0.63, + "learning_rate": 1.835758028798952e-05, + "loss": 0.1302, + "step": 869 + }, + { + "epoch": 0.63, + "learning_rate": 1.835328765560426e-05, + "loss": 0.1315, + "step": 870 + }, + { + "epoch": 0.63, + "learning_rate": 1.834898992417201e-05, + "loss": 0.1352, + "step": 871 + }, + { + "epoch": 0.63, + "learning_rate": 1.834468709631621e-05, + "loss": 0.1346, + "step": 872 + }, + { + "epoch": 0.63, + "learning_rate": 1.8340379174663405e-05, + "loss": 0.1345, + "step": 873 + }, + { + "epoch": 0.63, + "learning_rate": 1.833606616184326e-05, + "loss": 0.1314, + "step": 874 + }, + { + "epoch": 0.63, + "learning_rate": 1.833174806048853e-05, + "loss": 0.1338, + "step": 875 + }, + { + "epoch": 0.63, + "learning_rate": 1.83274248732351e-05, + "loss": 0.1325, + "step": 876 + }, + { + "epoch": 0.63, + "learning_rate": 1.8323096602721943e-05, + "loss": 0.1262, + "step": 877 + }, + { + "epoch": 0.64, + "learning_rate": 1.8318763251591137e-05, + "loss": 0.1348, + "step": 878 + }, + { + "epoch": 0.64, + "learning_rate": 1.8314424822487865e-05, + "loss": 0.1189, + "step": 879 + }, + { + "epoch": 0.64, + "learning_rate": 1.831008131806041e-05, + "loss": 0.127, + "step": 880 + }, + { + "epoch": 0.64, + "learning_rate": 1.8305732740960147e-05, + "loss": 0.1206, + "step": 881 + }, + { + "epoch": 0.64, + "learning_rate": 1.8301379093841554e-05, + "loss": 0.1412, + "step": 882 + }, + { + "epoch": 0.64, + "learning_rate": 1.8297020379362197e-05, + "loss": 0.1361, + "step": 883 + }, + { + "epoch": 0.64, + "learning_rate": 1.8292656600182747e-05, + "loss": 0.132, + "step": 884 + }, + { + "epoch": 0.64, + "learning_rate": 1.8288287758966956e-05, + "loss": 0.1285, + "step": 885 + }, + { + "epoch": 0.64, + "learning_rate": 1.8283913858381673e-05, + "loss": 0.1321, + "step": 886 + }, + { + "epoch": 0.64, + "learning_rate": 1.8279534901096824e-05, + "loss": 0.1406, + "step": 887 + }, + { + "epoch": 0.64, + "learning_rate": 1.8275150889785435e-05, + "loss": 0.1284, + "step": 888 + }, + { + "epoch": 0.64, + "learning_rate": 1.8270761827123612e-05, + "loss": 0.1333, + "step": 889 + }, + { + "epoch": 0.64, + "learning_rate": 1.8266367715790543e-05, + "loss": 0.1346, + "step": 890 + }, + { + "epoch": 0.64, + "learning_rate": 1.8261968558468495e-05, + "loss": 0.1308, + "step": 891 + }, + { + "epoch": 0.65, + "learning_rate": 1.8257564357842822e-05, + "loss": 0.1352, + "step": 892 + }, + { + "epoch": 0.65, + "learning_rate": 1.825315511660196e-05, + "loss": 0.1407, + "step": 893 + }, + { + "epoch": 0.65, + "learning_rate": 1.8248740837437405e-05, + "loss": 0.1254, + "step": 894 + }, + { + "epoch": 0.65, + "learning_rate": 1.8244321523043744e-05, + "loss": 0.1266, + "step": 895 + }, + { + "epoch": 0.65, + "learning_rate": 1.823989717611863e-05, + "loss": 0.13, + "step": 896 + }, + { + "epoch": 0.65, + "learning_rate": 1.8235467799362795e-05, + "loss": 0.1465, + "step": 897 + }, + { + "epoch": 0.65, + "learning_rate": 1.8231033395480034e-05, + "loss": 0.1261, + "step": 898 + }, + { + "epoch": 0.65, + "learning_rate": 1.8226593967177212e-05, + "loss": 0.1398, + "step": 899 + }, + { + "epoch": 0.65, + "learning_rate": 1.8222149517164264e-05, + "loss": 0.127, + "step": 900 + }, + { + "epoch": 0.65, + "learning_rate": 1.8217700048154192e-05, + "loss": 0.1328, + "step": 901 + }, + { + "epoch": 0.65, + "learning_rate": 1.821324556286305e-05, + "loss": 0.1295, + "step": 902 + }, + { + "epoch": 0.65, + "learning_rate": 1.820878606400997e-05, + "loss": 0.145, + "step": 903 + }, + { + "epoch": 0.65, + "learning_rate": 1.8204321554317136e-05, + "loss": 0.1314, + "step": 904 + }, + { + "epoch": 0.65, + "learning_rate": 1.819985203650979e-05, + "loss": 0.1269, + "step": 905 + }, + { + "epoch": 0.66, + "learning_rate": 1.8195377513316233e-05, + "loss": 0.1324, + "step": 906 + }, + { + "epoch": 0.66, + "learning_rate": 1.8190897987467822e-05, + "loss": 0.1364, + "step": 907 + }, + { + "epoch": 0.66, + "learning_rate": 1.8186413461698965e-05, + "loss": 0.1437, + "step": 908 + }, + { + "epoch": 0.66, + "learning_rate": 1.8181923938747123e-05, + "loss": 0.1296, + "step": 909 + }, + { + "epoch": 0.66, + "learning_rate": 1.8177429421352815e-05, + "loss": 0.137, + "step": 910 + }, + { + "epoch": 0.66, + "learning_rate": 1.81729299122596e-05, + "loss": 0.1347, + "step": 911 + }, + { + "epoch": 0.66, + "learning_rate": 1.8168425414214075e-05, + "loss": 0.134, + "step": 912 + }, + { + "epoch": 0.66, + "learning_rate": 1.816391592996591e-05, + "loss": 0.1312, + "step": 913 + }, + { + "epoch": 0.66, + "learning_rate": 1.8159401462267794e-05, + "loss": 0.1299, + "step": 914 + }, + { + "epoch": 0.66, + "learning_rate": 1.8154882013875466e-05, + "loss": 0.1284, + "step": 915 + }, + { + "epoch": 0.66, + "learning_rate": 1.8150357587547708e-05, + "loss": 0.1329, + "step": 916 + }, + { + "epoch": 0.66, + "learning_rate": 1.8145828186046336e-05, + "loss": 0.1307, + "step": 917 + }, + { + "epoch": 0.66, + "learning_rate": 1.8141293812136206e-05, + "loss": 0.1256, + "step": 918 + }, + { + "epoch": 0.66, + "learning_rate": 1.813675446858521e-05, + "loss": 0.1252, + "step": 919 + }, + { + "epoch": 0.67, + "learning_rate": 1.813221015816427e-05, + "loss": 0.1326, + "step": 920 + }, + { + "epoch": 0.67, + "learning_rate": 1.8127660883647346e-05, + "loss": 0.1381, + "step": 921 + }, + { + "epoch": 0.67, + "learning_rate": 1.8123106647811417e-05, + "loss": 0.1363, + "step": 922 + }, + { + "epoch": 0.67, + "learning_rate": 1.8118547453436504e-05, + "loss": 0.1344, + "step": 923 + }, + { + "epoch": 0.67, + "learning_rate": 1.8113983303305646e-05, + "loss": 0.1315, + "step": 924 + }, + { + "epoch": 0.67, + "learning_rate": 1.810941420020491e-05, + "loss": 0.1332, + "step": 925 + }, + { + "epoch": 0.67, + "learning_rate": 1.8104840146923387e-05, + "loss": 0.1375, + "step": 926 + }, + { + "epoch": 0.67, + "learning_rate": 1.8100261146253185e-05, + "loss": 0.1244, + "step": 927 + }, + { + "epoch": 0.67, + "learning_rate": 1.809567720098944e-05, + "loss": 0.1259, + "step": 928 + }, + { + "epoch": 0.67, + "learning_rate": 1.8091088313930303e-05, + "loss": 0.1307, + "step": 929 + }, + { + "epoch": 0.67, + "learning_rate": 1.8086494487876934e-05, + "loss": 0.137, + "step": 930 + }, + { + "epoch": 0.67, + "learning_rate": 1.808189572563352e-05, + "loss": 0.1342, + "step": 931 + }, + { + "epoch": 0.67, + "learning_rate": 1.807729203000726e-05, + "loss": 0.1203, + "step": 932 + }, + { + "epoch": 0.67, + "learning_rate": 1.807268340380835e-05, + "loss": 0.1423, + "step": 933 + }, + { + "epoch": 0.68, + "learning_rate": 1.806806984985001e-05, + "loss": 0.1309, + "step": 934 + }, + { + "epoch": 0.68, + "learning_rate": 1.806345137094846e-05, + "loss": 0.1342, + "step": 935 + }, + { + "epoch": 0.68, + "learning_rate": 1.805882796992294e-05, + "loss": 0.1254, + "step": 936 + }, + { + "epoch": 0.68, + "learning_rate": 1.805419964959568e-05, + "loss": 0.145, + "step": 937 + }, + { + "epoch": 0.68, + "learning_rate": 1.804956641279192e-05, + "loss": 0.1295, + "step": 938 + }, + { + "epoch": 0.68, + "learning_rate": 1.804492826233989e-05, + "loss": 0.1311, + "step": 939 + }, + { + "epoch": 0.68, + "learning_rate": 1.8040285201070836e-05, + "loss": 0.1274, + "step": 940 + }, + { + "epoch": 0.68, + "learning_rate": 1.803563723181899e-05, + "loss": 0.133, + "step": 941 + }, + { + "epoch": 0.68, + "learning_rate": 1.8030984357421592e-05, + "loss": 0.1284, + "step": 942 + }, + { + "epoch": 0.68, + "learning_rate": 1.8026326580718855e-05, + "loss": 0.1408, + "step": 943 + }, + { + "epoch": 0.68, + "learning_rate": 1.802166390455401e-05, + "loss": 0.1393, + "step": 944 + }, + { + "epoch": 0.68, + "learning_rate": 1.8016996331773254e-05, + "loss": 0.1255, + "step": 945 + }, + { + "epoch": 0.68, + "learning_rate": 1.80123238652258e-05, + "loss": 0.1249, + "step": 946 + }, + { + "epoch": 0.68, + "learning_rate": 1.800764650776382e-05, + "loss": 0.1363, + "step": 947 + }, + { + "epoch": 0.69, + "learning_rate": 1.800296426224249e-05, + "loss": 0.127, + "step": 948 + }, + { + "epoch": 0.69, + "learning_rate": 1.799827713151997e-05, + "loss": 0.1301, + "step": 949 + }, + { + "epoch": 0.69, + "learning_rate": 1.7993585118457387e-05, + "loss": 0.1312, + "step": 950 + }, + { + "epoch": 0.69, + "learning_rate": 1.7988888225918866e-05, + "loss": 0.1153, + "step": 951 + }, + { + "epoch": 0.69, + "learning_rate": 1.79841864567715e-05, + "loss": 0.1373, + "step": 952 + }, + { + "epoch": 0.69, + "learning_rate": 1.7979479813885365e-05, + "loss": 0.1289, + "step": 953 + }, + { + "epoch": 0.69, + "learning_rate": 1.7974768300133503e-05, + "loss": 0.1397, + "step": 954 + }, + { + "epoch": 0.69, + "learning_rate": 1.7970051918391938e-05, + "loss": 0.1293, + "step": 955 + }, + { + "epoch": 0.69, + "learning_rate": 1.796533067153966e-05, + "loss": 0.1279, + "step": 956 + }, + { + "epoch": 0.69, + "learning_rate": 1.7960604562458635e-05, + "loss": 0.1334, + "step": 957 + }, + { + "epoch": 0.69, + "learning_rate": 1.7955873594033796e-05, + "loss": 0.1362, + "step": 958 + }, + { + "epoch": 0.69, + "learning_rate": 1.7951137769153028e-05, + "loss": 0.1321, + "step": 959 + }, + { + "epoch": 0.69, + "learning_rate": 1.7946397090707203e-05, + "loss": 0.1377, + "step": 960 + }, + { + "epoch": 0.7, + "learning_rate": 1.794165156159014e-05, + "loss": 0.1339, + "step": 961 + }, + { + "epoch": 0.7, + "learning_rate": 1.7936901184698625e-05, + "loss": 0.1209, + "step": 962 + }, + { + "epoch": 0.7, + "learning_rate": 1.7932145962932398e-05, + "loss": 0.1302, + "step": 963 + }, + { + "epoch": 0.7, + "learning_rate": 1.7927385899194164e-05, + "loss": 0.1319, + "step": 964 + }, + { + "epoch": 0.7, + "learning_rate": 1.7922620996389587e-05, + "loss": 0.1311, + "step": 965 + }, + { + "epoch": 0.7, + "learning_rate": 1.791785125742726e-05, + "loss": 0.1331, + "step": 966 + }, + { + "epoch": 0.7, + "learning_rate": 1.7913076685218765e-05, + "loss": 0.1417, + "step": 967 + }, + { + "epoch": 0.7, + "learning_rate": 1.7908297282678602e-05, + "loss": 0.1318, + "step": 968 + }, + { + "epoch": 0.7, + "learning_rate": 1.790351305272424e-05, + "loss": 0.1242, + "step": 969 + }, + { + "epoch": 0.7, + "learning_rate": 1.7898723998276086e-05, + "loss": 0.1339, + "step": 970 + }, + { + "epoch": 0.7, + "learning_rate": 1.789393012225749e-05, + "loss": 0.1286, + "step": 971 + }, + { + "epoch": 0.7, + "learning_rate": 1.7889131427594754e-05, + "loss": 0.1292, + "step": 972 + }, + { + "epoch": 0.7, + "learning_rate": 1.7884327917217113e-05, + "loss": 0.129, + "step": 973 + }, + { + "epoch": 0.7, + "learning_rate": 1.7879519594056748e-05, + "loss": 0.1355, + "step": 974 + }, + { + "epoch": 0.71, + "learning_rate": 1.787470646104877e-05, + "loss": 0.1189, + "step": 975 + }, + { + "epoch": 0.71, + "learning_rate": 1.7869888521131234e-05, + "loss": 0.1311, + "step": 976 + }, + { + "epoch": 0.71, + "learning_rate": 1.7865065777245127e-05, + "loss": 0.1177, + "step": 977 + }, + { + "epoch": 0.71, + "learning_rate": 1.7860238232334364e-05, + "loss": 0.1293, + "step": 978 + }, + { + "epoch": 0.71, + "learning_rate": 1.7855405889345796e-05, + "loss": 0.1359, + "step": 979 + }, + { + "epoch": 0.71, + "learning_rate": 1.78505687512292e-05, + "loss": 0.1373, + "step": 980 + }, + { + "epoch": 0.71, + "learning_rate": 1.7845726820937287e-05, + "loss": 0.1366, + "step": 981 + }, + { + "epoch": 0.71, + "learning_rate": 1.7840880101425676e-05, + "loss": 0.1188, + "step": 982 + }, + { + "epoch": 0.71, + "learning_rate": 1.783602859565293e-05, + "loss": 0.1238, + "step": 983 + }, + { + "epoch": 0.71, + "learning_rate": 1.7831172306580522e-05, + "loss": 0.1335, + "step": 984 + }, + { + "epoch": 0.71, + "learning_rate": 1.7826311237172843e-05, + "loss": 0.1389, + "step": 985 + }, + { + "epoch": 0.71, + "learning_rate": 1.7821445390397213e-05, + "loss": 0.1438, + "step": 986 + }, + { + "epoch": 0.71, + "learning_rate": 1.7816574769223857e-05, + "loss": 0.1332, + "step": 987 + }, + { + "epoch": 0.71, + "learning_rate": 1.781169937662592e-05, + "loss": 0.1275, + "step": 988 + }, + { + "epoch": 0.72, + "learning_rate": 1.780681921557946e-05, + "loss": 0.1374, + "step": 989 + }, + { + "epoch": 0.72, + "learning_rate": 1.7801934289063444e-05, + "loss": 0.1281, + "step": 990 + }, + { + "epoch": 0.72, + "learning_rate": 1.7797044600059745e-05, + "loss": 0.1272, + "step": 991 + }, + { + "epoch": 0.72, + "learning_rate": 1.7792150151553153e-05, + "loss": 0.1344, + "step": 992 + }, + { + "epoch": 0.72, + "learning_rate": 1.7787250946531348e-05, + "loss": 0.1425, + "step": 993 + }, + { + "epoch": 0.72, + "learning_rate": 1.7782346987984925e-05, + "loss": 0.1411, + "step": 994 + }, + { + "epoch": 0.72, + "learning_rate": 1.7777438278907384e-05, + "loss": 0.1482, + "step": 995 + }, + { + "epoch": 0.72, + "learning_rate": 1.7772524822295108e-05, + "loss": 0.1182, + "step": 996 + }, + { + "epoch": 0.72, + "learning_rate": 1.77676066211474e-05, + "loss": 0.1359, + "step": 997 + }, + { + "epoch": 0.72, + "learning_rate": 1.776268367846644e-05, + "loss": 0.1454, + "step": 998 + }, + { + "epoch": 0.72, + "learning_rate": 1.7757755997257315e-05, + "loss": 0.1423, + "step": 999 + }, + { + "epoch": 0.72, + "learning_rate": 1.7752823580527996e-05, + "loss": 0.1272, + "step": 1000 + }, + { + "epoch": 0.72, + "eval_loss": 0.12931811809539795, + "eval_runtime": 712.3744, + "eval_samples_per_second": 70.188, + "eval_steps_per_second": 2.194, + "step": 1000 + }, + { + "epoch": 0.72, + "learning_rate": 1.774788643128935e-05, + "loss": 0.1271, + "step": 1001 + }, + { + "epoch": 0.72, + "learning_rate": 1.774294455255513e-05, + "loss": 0.1303, + "step": 1002 + }, + { + "epoch": 0.73, + "learning_rate": 1.7737997947341977e-05, + "loss": 0.1332, + "step": 1003 + }, + { + "epoch": 0.73, + "learning_rate": 1.7733046618669423e-05, + "loss": 0.123, + "step": 1004 + }, + { + "epoch": 0.73, + "learning_rate": 1.7728090569559868e-05, + "loss": 0.1265, + "step": 1005 + }, + { + "epoch": 0.73, + "learning_rate": 1.772312980303861e-05, + "loss": 0.1275, + "step": 1006 + }, + { + "epoch": 0.73, + "learning_rate": 1.771816432213382e-05, + "loss": 0.1375, + "step": 1007 + }, + { + "epoch": 0.73, + "learning_rate": 1.7713194129876546e-05, + "loss": 0.1296, + "step": 1008 + }, + { + "epoch": 0.73, + "learning_rate": 1.770821922930071e-05, + "loss": 0.1225, + "step": 1009 + }, + { + "epoch": 0.73, + "learning_rate": 1.770323962344311e-05, + "loss": 0.1303, + "step": 1010 + }, + { + "epoch": 0.73, + "learning_rate": 1.769825531534342e-05, + "loss": 0.1331, + "step": 1011 + }, + { + "epoch": 0.73, + "learning_rate": 1.769326630804418e-05, + "loss": 0.1299, + "step": 1012 + }, + { + "epoch": 0.73, + "learning_rate": 1.76882726045908e-05, + "loss": 0.1244, + "step": 1013 + }, + { + "epoch": 0.73, + "learning_rate": 1.7683274208031552e-05, + "loss": 0.1283, + "step": 1014 + }, + { + "epoch": 0.73, + "learning_rate": 1.7678271121417586e-05, + "loss": 0.1321, + "step": 1015 + }, + { + "epoch": 0.73, + "learning_rate": 1.7673263347802897e-05, + "loss": 0.1266, + "step": 1016 + }, + { + "epoch": 0.74, + "learning_rate": 1.7668250890244355e-05, + "loss": 0.1303, + "step": 1017 + }, + { + "epoch": 0.74, + "learning_rate": 1.7663233751801684e-05, + "loss": 0.127, + "step": 1018 + }, + { + "epoch": 0.74, + "learning_rate": 1.765821193553746e-05, + "loss": 0.1312, + "step": 1019 + }, + { + "epoch": 0.74, + "learning_rate": 1.765318544451713e-05, + "loss": 0.1261, + "step": 1020 + }, + { + "epoch": 0.74, + "learning_rate": 1.7648154281808975e-05, + "loss": 0.1303, + "step": 1021 + }, + { + "epoch": 0.74, + "learning_rate": 1.7643118450484142e-05, + "loss": 0.127, + "step": 1022 + }, + { + "epoch": 0.74, + "learning_rate": 1.7638077953616624e-05, + "loss": 0.1272, + "step": 1023 + }, + { + "epoch": 0.74, + "learning_rate": 1.763303279428326e-05, + "loss": 0.1195, + "step": 1024 + }, + { + "epoch": 0.74, + "learning_rate": 1.7627982975563735e-05, + "loss": 0.1343, + "step": 1025 + }, + { + "epoch": 0.74, + "learning_rate": 1.7622928500540578e-05, + "loss": 0.1299, + "step": 1026 + }, + { + "epoch": 0.74, + "learning_rate": 1.7617869372299168e-05, + "loss": 0.1288, + "step": 1027 + }, + { + "epoch": 0.74, + "learning_rate": 1.761280559392771e-05, + "loss": 0.1506, + "step": 1028 + }, + { + "epoch": 0.74, + "learning_rate": 1.760773716851726e-05, + "loss": 0.1328, + "step": 1029 + }, + { + "epoch": 0.75, + "learning_rate": 1.760266409916171e-05, + "loss": 0.1403, + "step": 1030 + }, + { + "epoch": 0.75, + "learning_rate": 1.7597586388957777e-05, + "loss": 0.131, + "step": 1031 + }, + { + "epoch": 0.75, + "learning_rate": 1.759250404100502e-05, + "loss": 0.1238, + "step": 1032 + }, + { + "epoch": 0.75, + "learning_rate": 1.7587417058405822e-05, + "loss": 0.1306, + "step": 1033 + }, + { + "epoch": 0.75, + "learning_rate": 1.7582325444265404e-05, + "loss": 0.1365, + "step": 1034 + }, + { + "epoch": 0.75, + "learning_rate": 1.7577229201691807e-05, + "loss": 0.1273, + "step": 1035 + }, + { + "epoch": 0.75, + "learning_rate": 1.7572128333795902e-05, + "loss": 0.1276, + "step": 1036 + }, + { + "epoch": 0.75, + "learning_rate": 1.756702284369138e-05, + "loss": 0.1389, + "step": 1037 + }, + { + "epoch": 0.75, + "learning_rate": 1.756191273449475e-05, + "loss": 0.1406, + "step": 1038 + }, + { + "epoch": 0.75, + "learning_rate": 1.7556798009325347e-05, + "loss": 0.1435, + "step": 1039 + }, + { + "epoch": 0.75, + "learning_rate": 1.755167867130533e-05, + "loss": 0.1307, + "step": 1040 + }, + { + "epoch": 0.75, + "learning_rate": 1.7546554723559653e-05, + "loss": 0.1294, + "step": 1041 + }, + { + "epoch": 0.75, + "learning_rate": 1.7541426169216108e-05, + "loss": 0.122, + "step": 1042 + }, + { + "epoch": 0.75, + "learning_rate": 1.753629301140528e-05, + "loss": 0.1318, + "step": 1043 + }, + { + "epoch": 0.76, + "learning_rate": 1.7531155253260574e-05, + "loss": 0.1245, + "step": 1044 + }, + { + "epoch": 0.76, + "learning_rate": 1.75260128979182e-05, + "loss": 0.1375, + "step": 1045 + }, + { + "epoch": 0.76, + "learning_rate": 1.7520865948517175e-05, + "loss": 0.1289, + "step": 1046 + }, + { + "epoch": 0.76, + "learning_rate": 1.7515714408199323e-05, + "loss": 0.1381, + "step": 1047 + }, + { + "epoch": 0.76, + "learning_rate": 1.7510558280109266e-05, + "loss": 0.1262, + "step": 1048 + }, + { + "epoch": 0.76, + "learning_rate": 1.7505397567394424e-05, + "loss": 0.1295, + "step": 1049 + }, + { + "epoch": 0.76, + "learning_rate": 1.7500232273205025e-05, + "loss": 0.1323, + "step": 1050 + }, + { + "epoch": 0.76, + "learning_rate": 1.7495062400694086e-05, + "loss": 0.1352, + "step": 1051 + }, + { + "epoch": 0.76, + "learning_rate": 1.7489887953017428e-05, + "loss": 0.1323, + "step": 1052 + }, + { + "epoch": 0.76, + "learning_rate": 1.7484708933333642e-05, + "loss": 0.1302, + "step": 1053 + }, + { + "epoch": 0.76, + "learning_rate": 1.747952534480414e-05, + "loss": 0.1295, + "step": 1054 + }, + { + "epoch": 0.76, + "learning_rate": 1.7474337190593104e-05, + "loss": 0.1338, + "step": 1055 + }, + { + "epoch": 0.76, + "learning_rate": 1.746914447386751e-05, + "loss": 0.1266, + "step": 1056 + }, + { + "epoch": 0.76, + "learning_rate": 1.746394719779711e-05, + "loss": 0.1284, + "step": 1057 + }, + { + "epoch": 0.77, + "learning_rate": 1.7458745365554452e-05, + "loss": 0.1246, + "step": 1058 + }, + { + "epoch": 0.77, + "learning_rate": 1.7453538980314858e-05, + "loss": 0.1255, + "step": 1059 + }, + { + "epoch": 0.77, + "learning_rate": 1.744832804525643e-05, + "loss": 0.1291, + "step": 1060 + }, + { + "epoch": 0.77, + "learning_rate": 1.7443112563560048e-05, + "loss": 0.129, + "step": 1061 + }, + { + "epoch": 0.77, + "learning_rate": 1.7437892538409364e-05, + "loss": 0.1356, + "step": 1062 + }, + { + "epoch": 0.77, + "learning_rate": 1.7432667972990812e-05, + "loss": 0.1185, + "step": 1063 + }, + { + "epoch": 0.77, + "learning_rate": 1.7427438870493584e-05, + "loss": 0.1323, + "step": 1064 + }, + { + "epoch": 0.77, + "learning_rate": 1.742220523410966e-05, + "loss": 0.1259, + "step": 1065 + }, + { + "epoch": 0.77, + "learning_rate": 1.741696706703377e-05, + "loss": 0.1286, + "step": 1066 + }, + { + "epoch": 0.77, + "learning_rate": 1.7411724372463416e-05, + "loss": 0.1347, + "step": 1067 + }, + { + "epoch": 0.77, + "learning_rate": 1.7406477153598865e-05, + "loss": 0.1358, + "step": 1068 + }, + { + "epoch": 0.77, + "learning_rate": 1.7401225413643153e-05, + "loss": 0.1311, + "step": 1069 + }, + { + "epoch": 0.77, + "learning_rate": 1.7395969155802057e-05, + "loss": 0.1326, + "step": 1070 + }, + { + "epoch": 0.77, + "learning_rate": 1.739070838328413e-05, + "loss": 0.1365, + "step": 1071 + }, + { + "epoch": 0.78, + "learning_rate": 1.7385443099300673e-05, + "loss": 0.1236, + "step": 1072 + }, + { + "epoch": 0.78, + "learning_rate": 1.7380173307065736e-05, + "loss": 0.1306, + "step": 1073 + }, + { + "epoch": 0.78, + "learning_rate": 1.737489900979613e-05, + "loss": 0.1226, + "step": 1074 + }, + { + "epoch": 0.78, + "learning_rate": 1.736962021071141e-05, + "loss": 0.1321, + "step": 1075 + }, + { + "epoch": 0.78, + "learning_rate": 1.7364336913033888e-05, + "loss": 0.136, + "step": 1076 + }, + { + "epoch": 0.78, + "learning_rate": 1.7359049119988612e-05, + "loss": 0.132, + "step": 1077 + }, + { + "epoch": 0.78, + "learning_rate": 1.7353756834803377e-05, + "loss": 0.1271, + "step": 1078 + }, + { + "epoch": 0.78, + "learning_rate": 1.7348460060708717e-05, + "loss": 0.1386, + "step": 1079 + }, + { + "epoch": 0.78, + "learning_rate": 1.7343158800937918e-05, + "loss": 0.1391, + "step": 1080 + }, + { + "epoch": 0.78, + "learning_rate": 1.733785305872699e-05, + "loss": 0.1344, + "step": 1081 + }, + { + "epoch": 0.78, + "learning_rate": 1.7332542837314686e-05, + "loss": 0.1266, + "step": 1082 + }, + { + "epoch": 0.78, + "learning_rate": 1.7327228139942497e-05, + "loss": 0.1414, + "step": 1083 + }, + { + "epoch": 0.78, + "learning_rate": 1.7321908969854635e-05, + "loss": 0.1255, + "step": 1084 + }, + { + "epoch": 0.78, + "learning_rate": 1.7316585330298053e-05, + "loss": 0.1256, + "step": 1085 + }, + { + "epoch": 0.79, + "learning_rate": 1.7311257224522427e-05, + "loss": 0.1325, + "step": 1086 + }, + { + "epoch": 0.79, + "learning_rate": 1.730592465578016e-05, + "loss": 0.1204, + "step": 1087 + }, + { + "epoch": 0.79, + "learning_rate": 1.7300587627326383e-05, + "loss": 0.1187, + "step": 1088 + }, + { + "epoch": 0.79, + "learning_rate": 1.7295246142418943e-05, + "loss": 0.1407, + "step": 1089 + }, + { + "epoch": 0.79, + "learning_rate": 1.728990020431841e-05, + "loss": 0.122, + "step": 1090 + }, + { + "epoch": 0.79, + "learning_rate": 1.728454981628807e-05, + "loss": 0.1314, + "step": 1091 + }, + { + "epoch": 0.79, + "learning_rate": 1.7279194981593935e-05, + "loss": 0.1313, + "step": 1092 + }, + { + "epoch": 0.79, + "learning_rate": 1.727383570350472e-05, + "loss": 0.1387, + "step": 1093 + }, + { + "epoch": 0.79, + "learning_rate": 1.7268471985291858e-05, + "loss": 0.1249, + "step": 1094 + }, + { + "epoch": 0.79, + "learning_rate": 1.7263103830229493e-05, + "loss": 0.1374, + "step": 1095 + }, + { + "epoch": 0.79, + "learning_rate": 1.725773124159447e-05, + "loss": 0.1198, + "step": 1096 + }, + { + "epoch": 0.79, + "learning_rate": 1.7252354222666356e-05, + "loss": 0.1501, + "step": 1097 + }, + { + "epoch": 0.79, + "learning_rate": 1.72469727767274e-05, + "loss": 0.1375, + "step": 1098 + }, + { + "epoch": 0.79, + "learning_rate": 1.724158690706257e-05, + "loss": 0.1244, + "step": 1099 + }, + { + "epoch": 0.8, + "learning_rate": 1.723619661695954e-05, + "loss": 0.1347, + "step": 1100 + }, + { + "epoch": 0.8, + "learning_rate": 1.7230801909708653e-05, + "loss": 0.1248, + "step": 1101 + }, + { + "epoch": 0.8, + "learning_rate": 1.7225402788602986e-05, + "loss": 0.126, + "step": 1102 + }, + { + "epoch": 0.8, + "learning_rate": 1.721999925693828e-05, + "loss": 0.1322, + "step": 1103 + }, + { + "epoch": 0.8, + "learning_rate": 1.7214591318012985e-05, + "loss": 0.1245, + "step": 1104 + }, + { + "epoch": 0.8, + "learning_rate": 1.7209178975128237e-05, + "loss": 0.1334, + "step": 1105 + }, + { + "epoch": 0.8, + "learning_rate": 1.7203762231587856e-05, + "loss": 0.1269, + "step": 1106 + }, + { + "epoch": 0.8, + "learning_rate": 1.7198341090698357e-05, + "loss": 0.1262, + "step": 1107 + }, + { + "epoch": 0.8, + "learning_rate": 1.719291555576893e-05, + "loss": 0.1276, + "step": 1108 + }, + { + "epoch": 0.8, + "learning_rate": 1.7187485630111455e-05, + "loss": 0.1297, + "step": 1109 + }, + { + "epoch": 0.8, + "learning_rate": 1.7182051317040487e-05, + "loss": 0.1292, + "step": 1110 + }, + { + "epoch": 0.8, + "learning_rate": 1.717661261987326e-05, + "loss": 0.134, + "step": 1111 + }, + { + "epoch": 0.8, + "learning_rate": 1.7171169541929685e-05, + "loss": 0.1203, + "step": 1112 + }, + { + "epoch": 0.81, + "learning_rate": 1.7165722086532355e-05, + "loss": 0.1317, + "step": 1113 + }, + { + "epoch": 0.81, + "learning_rate": 1.7160270257006514e-05, + "loss": 0.1208, + "step": 1114 + }, + { + "epoch": 0.81, + "learning_rate": 1.71548140566801e-05, + "loss": 0.1257, + "step": 1115 + }, + { + "epoch": 0.81, + "learning_rate": 1.7149353488883704e-05, + "loss": 0.1404, + "step": 1116 + }, + { + "epoch": 0.81, + "learning_rate": 1.714388855695059e-05, + "loss": 0.135, + "step": 1117 + }, + { + "epoch": 0.81, + "learning_rate": 1.713841926421668e-05, + "loss": 0.1166, + "step": 1118 + }, + { + "epoch": 0.81, + "learning_rate": 1.713294561402056e-05, + "loss": 0.1299, + "step": 1119 + }, + { + "epoch": 0.81, + "learning_rate": 1.712746760970348e-05, + "loss": 0.1292, + "step": 1120 + }, + { + "epoch": 0.81, + "learning_rate": 1.712198525460935e-05, + "loss": 0.1194, + "step": 1121 + }, + { + "epoch": 0.81, + "learning_rate": 1.7116498552084727e-05, + "loss": 0.1394, + "step": 1122 + }, + { + "epoch": 0.81, + "learning_rate": 1.7111007505478822e-05, + "loss": 0.1354, + "step": 1123 + }, + { + "epoch": 0.81, + "learning_rate": 1.710551211814351e-05, + "loss": 0.1228, + "step": 1124 + }, + { + "epoch": 0.81, + "learning_rate": 1.7100012393433294e-05, + "loss": 0.1222, + "step": 1125 + }, + { + "epoch": 0.81, + "learning_rate": 1.709450833470535e-05, + "loss": 0.1406, + "step": 1126 + }, + { + "epoch": 0.82, + "learning_rate": 1.708899994531948e-05, + "loss": 0.124, + "step": 1127 + }, + { + "epoch": 0.82, + "learning_rate": 1.708348722863814e-05, + "loss": 0.1124, + "step": 1128 + }, + { + "epoch": 0.82, + "learning_rate": 1.707797018802643e-05, + "loss": 0.1249, + "step": 1129 + }, + { + "epoch": 0.82, + "learning_rate": 1.707244882685207e-05, + "loss": 0.1334, + "step": 1130 + }, + { + "epoch": 0.82, + "learning_rate": 1.7066923148485445e-05, + "loss": 0.1263, + "step": 1131 + }, + { + "epoch": 0.82, + "learning_rate": 1.706139315629955e-05, + "loss": 0.1322, + "step": 1132 + }, + { + "epoch": 0.82, + "learning_rate": 1.7055858853670035e-05, + "loss": 0.1175, + "step": 1133 + }, + { + "epoch": 0.82, + "learning_rate": 1.7050320243975165e-05, + "loss": 0.1278, + "step": 1134 + }, + { + "epoch": 0.82, + "learning_rate": 1.704477733059584e-05, + "loss": 0.1329, + "step": 1135 + }, + { + "epoch": 0.82, + "learning_rate": 1.703923011691559e-05, + "loss": 0.134, + "step": 1136 + }, + { + "epoch": 0.82, + "learning_rate": 1.703367860632057e-05, + "loss": 0.1307, + "step": 1137 + }, + { + "epoch": 0.82, + "learning_rate": 1.7028122802199545e-05, + "loss": 0.1327, + "step": 1138 + }, + { + "epoch": 0.82, + "learning_rate": 1.7022562707943918e-05, + "loss": 0.1315, + "step": 1139 + }, + { + "epoch": 0.82, + "learning_rate": 1.7016998326947705e-05, + "loss": 0.1349, + "step": 1140 + }, + { + "epoch": 0.83, + "learning_rate": 1.7011429662607532e-05, + "loss": 0.1257, + "step": 1141 + }, + { + "epoch": 0.83, + "learning_rate": 1.7005856718322654e-05, + "loss": 0.1307, + "step": 1142 + }, + { + "epoch": 0.83, + "learning_rate": 1.7000279497494918e-05, + "loss": 0.1255, + "step": 1143 + }, + { + "epoch": 0.83, + "learning_rate": 1.6994698003528803e-05, + "loss": 0.1281, + "step": 1144 + }, + { + "epoch": 0.83, + "learning_rate": 1.698911223983138e-05, + "loss": 0.1307, + "step": 1145 + }, + { + "epoch": 0.83, + "learning_rate": 1.698352220981234e-05, + "loss": 0.1312, + "step": 1146 + }, + { + "epoch": 0.83, + "learning_rate": 1.6977927916883966e-05, + "loss": 0.1272, + "step": 1147 + }, + { + "epoch": 0.83, + "learning_rate": 1.697232936446115e-05, + "loss": 0.1222, + "step": 1148 + }, + { + "epoch": 0.83, + "learning_rate": 1.696672655596138e-05, + "loss": 0.1338, + "step": 1149 + }, + { + "epoch": 0.83, + "learning_rate": 1.696111949480475e-05, + "loss": 0.1324, + "step": 1150 + }, + { + "epoch": 0.83, + "learning_rate": 1.6955508184413942e-05, + "loss": 0.1286, + "step": 1151 + }, + { + "epoch": 0.83, + "learning_rate": 1.6949892628214233e-05, + "loss": 0.1308, + "step": 1152 + }, + { + "epoch": 0.83, + "learning_rate": 1.6944272829633494e-05, + "loss": 0.1217, + "step": 1153 + }, + { + "epoch": 0.83, + "learning_rate": 1.6938648792102185e-05, + "loss": 0.1378, + "step": 1154 + }, + { + "epoch": 0.84, + "learning_rate": 1.6933020519053354e-05, + "loss": 0.1265, + "step": 1155 + }, + { + "epoch": 0.84, + "learning_rate": 1.6927388013922634e-05, + "loss": 0.1375, + "step": 1156 + }, + { + "epoch": 0.84, + "learning_rate": 1.6921751280148236e-05, + "loss": 0.1247, + "step": 1157 + }, + { + "epoch": 0.84, + "learning_rate": 1.6916110321170963e-05, + "loss": 0.1339, + "step": 1158 + }, + { + "epoch": 0.84, + "learning_rate": 1.691046514043419e-05, + "loss": 0.1274, + "step": 1159 + }, + { + "epoch": 0.84, + "learning_rate": 1.690481574138387e-05, + "loss": 0.1215, + "step": 1160 + }, + { + "epoch": 0.84, + "learning_rate": 1.6899162127468537e-05, + "loss": 0.1313, + "step": 1161 + }, + { + "epoch": 0.84, + "learning_rate": 1.6893504302139283e-05, + "loss": 0.1289, + "step": 1162 + }, + { + "epoch": 0.84, + "learning_rate": 1.6887842268849785e-05, + "loss": 0.1317, + "step": 1163 + }, + { + "epoch": 0.84, + "learning_rate": 1.6882176031056287e-05, + "loss": 0.1274, + "step": 1164 + }, + { + "epoch": 0.84, + "learning_rate": 1.687650559221759e-05, + "loss": 0.14, + "step": 1165 + }, + { + "epoch": 0.84, + "learning_rate": 1.687083095579507e-05, + "loss": 0.125, + "step": 1166 + }, + { + "epoch": 0.84, + "learning_rate": 1.6865152125252657e-05, + "loss": 0.1313, + "step": 1167 + }, + { + "epoch": 0.84, + "learning_rate": 1.685946910405685e-05, + "loss": 0.1318, + "step": 1168 + }, + { + "epoch": 0.85, + "learning_rate": 1.6853781895676703e-05, + "loss": 0.1336, + "step": 1169 + }, + { + "epoch": 0.85, + "learning_rate": 1.6848090503583816e-05, + "loss": 0.135, + "step": 1170 + }, + { + "epoch": 0.85, + "learning_rate": 1.6842394931252356e-05, + "loss": 0.1367, + "step": 1171 + }, + { + "epoch": 0.85, + "learning_rate": 1.683669518215904e-05, + "loss": 0.1236, + "step": 1172 + }, + { + "epoch": 0.85, + "learning_rate": 1.683099125978313e-05, + "loss": 0.1408, + "step": 1173 + }, + { + "epoch": 0.85, + "learning_rate": 1.6825283167606427e-05, + "loss": 0.1234, + "step": 1174 + }, + { + "epoch": 0.85, + "learning_rate": 1.68195709091133e-05, + "loss": 0.1405, + "step": 1175 + }, + { + "epoch": 0.85, + "learning_rate": 1.6813854487790644e-05, + "loss": 0.1308, + "step": 1176 + }, + { + "epoch": 0.85, + "learning_rate": 1.6808133907127898e-05, + "loss": 0.1348, + "step": 1177 + }, + { + "epoch": 0.85, + "learning_rate": 1.6802409170617045e-05, + "loss": 0.1339, + "step": 1178 + }, + { + "epoch": 0.85, + "learning_rate": 1.67966802817526e-05, + "loss": 0.1356, + "step": 1179 + }, + { + "epoch": 0.85, + "learning_rate": 1.6790947244031612e-05, + "loss": 0.1302, + "step": 1180 + }, + { + "epoch": 0.85, + "learning_rate": 1.6785210060953664e-05, + "loss": 0.1367, + "step": 1181 + }, + { + "epoch": 0.85, + "learning_rate": 1.6779468736020877e-05, + "loss": 0.1261, + "step": 1182 + }, + { + "epoch": 0.86, + "learning_rate": 1.6773723272737885e-05, + "loss": 0.1453, + "step": 1183 + }, + { + "epoch": 0.86, + "learning_rate": 1.676797367461186e-05, + "loss": 0.1255, + "step": 1184 + }, + { + "epoch": 0.86, + "learning_rate": 1.6762219945152497e-05, + "loss": 0.1246, + "step": 1185 + }, + { + "epoch": 0.86, + "learning_rate": 1.6756462087872005e-05, + "loss": 0.127, + "step": 1186 + }, + { + "epoch": 0.86, + "learning_rate": 1.6750700106285124e-05, + "loss": 0.136, + "step": 1187 + }, + { + "epoch": 0.86, + "learning_rate": 1.67449340039091e-05, + "loss": 0.1281, + "step": 1188 + }, + { + "epoch": 0.86, + "learning_rate": 1.6739163784263702e-05, + "loss": 0.1278, + "step": 1189 + }, + { + "epoch": 0.86, + "learning_rate": 1.6733389450871214e-05, + "loss": 0.1227, + "step": 1190 + }, + { + "epoch": 0.86, + "learning_rate": 1.6727611007256422e-05, + "loss": 0.1328, + "step": 1191 + }, + { + "epoch": 0.86, + "learning_rate": 1.672182845694663e-05, + "loss": 0.1363, + "step": 1192 + }, + { + "epoch": 0.86, + "learning_rate": 1.6716041803471645e-05, + "loss": 0.1271, + "step": 1193 + }, + { + "epoch": 0.86, + "learning_rate": 1.6710251050363777e-05, + "loss": 0.1238, + "step": 1194 + }, + { + "epoch": 0.86, + "learning_rate": 1.6704456201157842e-05, + "loss": 0.1235, + "step": 1195 + }, + { + "epoch": 0.87, + "learning_rate": 1.6698657259391157e-05, + "loss": 0.125, + "step": 1196 + }, + { + "epoch": 0.87, + "learning_rate": 1.669285422860353e-05, + "loss": 0.1329, + "step": 1197 + }, + { + "epoch": 0.87, + "learning_rate": 1.6687047112337274e-05, + "loss": 0.1191, + "step": 1198 + }, + { + "epoch": 0.87, + "learning_rate": 1.668123591413719e-05, + "loss": 0.1404, + "step": 1199 + }, + { + "epoch": 0.87, + "learning_rate": 1.6675420637550574e-05, + "loss": 0.133, + "step": 1200 + }, + { + "epoch": 0.87, + "learning_rate": 1.6669601286127208e-05, + "loss": 0.1304, + "step": 1201 + }, + { + "epoch": 0.87, + "learning_rate": 1.6663777863419367e-05, + "loss": 0.1288, + "step": 1202 + }, + { + "epoch": 0.87, + "learning_rate": 1.6657950372981802e-05, + "loss": 0.1341, + "step": 1203 + }, + { + "epoch": 0.87, + "learning_rate": 1.665211881837176e-05, + "loss": 0.1337, + "step": 1204 + }, + { + "epoch": 0.87, + "learning_rate": 1.6646283203148957e-05, + "loss": 0.1378, + "step": 1205 + }, + { + "epoch": 0.87, + "learning_rate": 1.6640443530875598e-05, + "loss": 0.1264, + "step": 1206 + }, + { + "epoch": 0.87, + "learning_rate": 1.6634599805116352e-05, + "loss": 0.1342, + "step": 1207 + }, + { + "epoch": 0.87, + "learning_rate": 1.662875202943837e-05, + "loss": 0.1304, + "step": 1208 + }, + { + "epoch": 0.87, + "learning_rate": 1.662290020741128e-05, + "loss": 0.1292, + "step": 1209 + }, + { + "epoch": 0.88, + "learning_rate": 1.6617044342607166e-05, + "loss": 0.1375, + "step": 1210 + }, + { + "epoch": 0.88, + "learning_rate": 1.66111844386006e-05, + "loss": 0.1301, + "step": 1211 + }, + { + "epoch": 0.88, + "learning_rate": 1.6605320498968596e-05, + "loss": 0.1214, + "step": 1212 + }, + { + "epoch": 0.88, + "learning_rate": 1.659945252729065e-05, + "loss": 0.1287, + "step": 1213 + }, + { + "epoch": 0.88, + "learning_rate": 1.659358052714871e-05, + "loss": 0.1292, + "step": 1214 + }, + { + "epoch": 0.88, + "learning_rate": 1.6587704502127187e-05, + "loss": 0.1376, + "step": 1215 + }, + { + "epoch": 0.88, + "learning_rate": 1.6581824455812945e-05, + "loss": 0.133, + "step": 1216 + }, + { + "epoch": 0.88, + "learning_rate": 1.6575940391795308e-05, + "loss": 0.1223, + "step": 1217 + }, + { + "epoch": 0.88, + "learning_rate": 1.657005231366605e-05, + "loss": 0.1313, + "step": 1218 + }, + { + "epoch": 0.88, + "learning_rate": 1.6564160225019392e-05, + "loss": 0.1262, + "step": 1219 + }, + { + "epoch": 0.88, + "learning_rate": 1.655826412945201e-05, + "loss": 0.1291, + "step": 1220 + }, + { + "epoch": 0.88, + "learning_rate": 1.655236403056302e-05, + "loss": 0.1359, + "step": 1221 + }, + { + "epoch": 0.88, + "learning_rate": 1.6546459931953983e-05, + "loss": 0.1249, + "step": 1222 + }, + { + "epoch": 0.88, + "learning_rate": 1.6540551837228905e-05, + "loss": 0.1297, + "step": 1223 + }, + { + "epoch": 0.89, + "learning_rate": 1.6534639749994223e-05, + "loss": 0.1216, + "step": 1224 + }, + { + "epoch": 0.89, + "learning_rate": 1.6528723673858822e-05, + "loss": 0.1364, + "step": 1225 + }, + { + "epoch": 0.89, + "learning_rate": 1.6522803612434015e-05, + "loss": 0.1372, + "step": 1226 + }, + { + "epoch": 0.89, + "learning_rate": 1.651687956933355e-05, + "loss": 0.1376, + "step": 1227 + }, + { + "epoch": 0.89, + "learning_rate": 1.6510951548173605e-05, + "loss": 0.1327, + "step": 1228 + }, + { + "epoch": 0.89, + "learning_rate": 1.6505019552572786e-05, + "loss": 0.1268, + "step": 1229 + }, + { + "epoch": 0.89, + "learning_rate": 1.6499083586152122e-05, + "loss": 0.1242, + "step": 1230 + }, + { + "epoch": 0.89, + "learning_rate": 1.6493143652535074e-05, + "loss": 0.1273, + "step": 1231 + }, + { + "epoch": 0.89, + "learning_rate": 1.6487199755347517e-05, + "loss": 0.1262, + "step": 1232 + }, + { + "epoch": 0.89, + "learning_rate": 1.6481251898217746e-05, + "loss": 0.1391, + "step": 1233 + }, + { + "epoch": 0.89, + "learning_rate": 1.6475300084776486e-05, + "loss": 0.1272, + "step": 1234 + }, + { + "epoch": 0.89, + "learning_rate": 1.6469344318656856e-05, + "loss": 0.1318, + "step": 1235 + }, + { + "epoch": 0.89, + "learning_rate": 1.6463384603494402e-05, + "loss": 0.1317, + "step": 1236 + }, + { + "epoch": 0.89, + "learning_rate": 1.6457420942927073e-05, + "loss": 0.1267, + "step": 1237 + }, + { + "epoch": 0.9, + "learning_rate": 1.645145334059524e-05, + "loss": 0.126, + "step": 1238 + }, + { + "epoch": 0.9, + "learning_rate": 1.644548180014166e-05, + "loss": 0.134, + "step": 1239 + }, + { + "epoch": 0.9, + "learning_rate": 1.643950632521151e-05, + "loss": 0.1236, + "step": 1240 + }, + { + "epoch": 0.9, + "learning_rate": 1.6433526919452362e-05, + "loss": 0.1158, + "step": 1241 + }, + { + "epoch": 0.9, + "learning_rate": 1.642754358651419e-05, + "loss": 0.125, + "step": 1242 + }, + { + "epoch": 0.9, + "learning_rate": 1.642155633004936e-05, + "loss": 0.1216, + "step": 1243 + }, + { + "epoch": 0.9, + "learning_rate": 1.641556515371264e-05, + "loss": 0.1263, + "step": 1244 + }, + { + "epoch": 0.9, + "learning_rate": 1.640957006116119e-05, + "loss": 0.1285, + "step": 1245 + }, + { + "epoch": 0.9, + "learning_rate": 1.6403571056054554e-05, + "loss": 0.1365, + "step": 1246 + }, + { + "epoch": 0.9, + "learning_rate": 1.6397568142054665e-05, + "loss": 0.1262, + "step": 1247 + }, + { + "epoch": 0.9, + "learning_rate": 1.6391561322825853e-05, + "loss": 0.1325, + "step": 1248 + }, + { + "epoch": 0.9, + "learning_rate": 1.638555060203482e-05, + "loss": 0.1346, + "step": 1249 + }, + { + "epoch": 0.9, + "learning_rate": 1.637953598335066e-05, + "loss": 0.1313, + "step": 1250 + }, + { + "epoch": 0.9, + "learning_rate": 1.637351747044483e-05, + "loss": 0.1347, + "step": 1251 + }, + { + "epoch": 0.91, + "learning_rate": 1.6367495066991187e-05, + "loss": 0.1439, + "step": 1252 + }, + { + "epoch": 0.91, + "learning_rate": 1.6361468776665944e-05, + "loss": 0.1314, + "step": 1253 + }, + { + "epoch": 0.91, + "learning_rate": 1.6355438603147686e-05, + "loss": 0.1216, + "step": 1254 + }, + { + "epoch": 0.91, + "learning_rate": 1.634940455011739e-05, + "loss": 0.123, + "step": 1255 + }, + { + "epoch": 0.91, + "learning_rate": 1.6343366621258376e-05, + "loss": 0.1343, + "step": 1256 + }, + { + "epoch": 0.91, + "learning_rate": 1.633732482025634e-05, + "loss": 0.1136, + "step": 1257 + }, + { + "epoch": 0.91, + "learning_rate": 1.633127915079935e-05, + "loss": 0.1293, + "step": 1258 + }, + { + "epoch": 0.91, + "learning_rate": 1.632522961657782e-05, + "loss": 0.1226, + "step": 1259 + }, + { + "epoch": 0.91, + "learning_rate": 1.6319176221284537e-05, + "loss": 0.1253, + "step": 1260 + }, + { + "epoch": 0.91, + "learning_rate": 1.631311896861463e-05, + "loss": 0.1248, + "step": 1261 + }, + { + "epoch": 0.91, + "learning_rate": 1.6307057862265602e-05, + "loss": 0.1299, + "step": 1262 + }, + { + "epoch": 0.91, + "learning_rate": 1.6300992905937292e-05, + "loss": 0.1215, + "step": 1263 + }, + { + "epoch": 0.91, + "learning_rate": 1.629492410333189e-05, + "loss": 0.1432, + "step": 1264 + }, + { + "epoch": 0.92, + "learning_rate": 1.628885145815395e-05, + "loss": 0.1302, + "step": 1265 + }, + { + "epoch": 0.92, + "learning_rate": 1.6282774974110347e-05, + "loss": 0.1177, + "step": 1266 + }, + { + "epoch": 0.92, + "learning_rate": 1.6276694654910326e-05, + "loss": 0.1199, + "step": 1267 + }, + { + "epoch": 0.92, + "learning_rate": 1.6270610504265453e-05, + "loss": 0.1315, + "step": 1268 + }, + { + "epoch": 0.92, + "learning_rate": 1.626452252588963e-05, + "loss": 0.1208, + "step": 1269 + }, + { + "epoch": 0.92, + "learning_rate": 1.6258430723499123e-05, + "loss": 0.1295, + "step": 1270 + }, + { + "epoch": 0.92, + "learning_rate": 1.6252335100812502e-05, + "loss": 0.1338, + "step": 1271 + }, + { + "epoch": 0.92, + "learning_rate": 1.6246235661550685e-05, + "loss": 0.1258, + "step": 1272 + }, + { + "epoch": 0.92, + "learning_rate": 1.6240132409436914e-05, + "loss": 0.127, + "step": 1273 + }, + { + "epoch": 0.92, + "learning_rate": 1.623402534819676e-05, + "loss": 0.1349, + "step": 1274 + }, + { + "epoch": 0.92, + "learning_rate": 1.6227914481558125e-05, + "loss": 0.118, + "step": 1275 + }, + { + "epoch": 0.92, + "learning_rate": 1.6221799813251222e-05, + "loss": 0.1183, + "step": 1276 + }, + { + "epoch": 0.92, + "learning_rate": 1.6215681347008595e-05, + "loss": 0.1261, + "step": 1277 + }, + { + "epoch": 0.92, + "learning_rate": 1.62095590865651e-05, + "loss": 0.1333, + "step": 1278 + }, + { + "epoch": 0.93, + "learning_rate": 1.6203433035657914e-05, + "loss": 0.1294, + "step": 1279 + }, + { + "epoch": 0.93, + "learning_rate": 1.619730319802652e-05, + "loss": 0.1188, + "step": 1280 + }, + { + "epoch": 0.93, + "learning_rate": 1.6191169577412728e-05, + "loss": 0.1342, + "step": 1281 + }, + { + "epoch": 0.93, + "learning_rate": 1.618503217756064e-05, + "loss": 0.1231, + "step": 1282 + }, + { + "epoch": 0.93, + "learning_rate": 1.617889100221667e-05, + "loss": 0.1234, + "step": 1283 + }, + { + "epoch": 0.93, + "learning_rate": 1.6172746055129544e-05, + "loss": 0.1393, + "step": 1284 + }, + { + "epoch": 0.93, + "learning_rate": 1.6166597340050278e-05, + "loss": 0.1272, + "step": 1285 + }, + { + "epoch": 0.93, + "learning_rate": 1.6160444860732203e-05, + "loss": 0.1215, + "step": 1286 + }, + { + "epoch": 0.93, + "learning_rate": 1.6154288620930935e-05, + "loss": 0.1297, + "step": 1287 + }, + { + "epoch": 0.93, + "learning_rate": 1.6148128624404393e-05, + "loss": 0.1282, + "step": 1288 + }, + { + "epoch": 0.93, + "learning_rate": 1.6141964874912783e-05, + "loss": 0.1307, + "step": 1289 + }, + { + "epoch": 0.93, + "learning_rate": 1.613579737621861e-05, + "loss": 0.1239, + "step": 1290 + }, + { + "epoch": 0.93, + "learning_rate": 1.6129626132086657e-05, + "loss": 0.1248, + "step": 1291 + }, + { + "epoch": 0.93, + "learning_rate": 1.6123451146284005e-05, + "loss": 0.1286, + "step": 1292 + }, + { + "epoch": 0.94, + "learning_rate": 1.611727242258001e-05, + "loss": 0.1315, + "step": 1293 + }, + { + "epoch": 0.94, + "learning_rate": 1.6111089964746318e-05, + "loss": 0.1286, + "step": 1294 + }, + { + "epoch": 0.94, + "learning_rate": 1.610490377655684e-05, + "loss": 0.1297, + "step": 1295 + }, + { + "epoch": 0.94, + "learning_rate": 1.609871386178778e-05, + "loss": 0.1226, + "step": 1296 + }, + { + "epoch": 0.94, + "learning_rate": 1.609252022421761e-05, + "loss": 0.1339, + "step": 1297 + }, + { + "epoch": 0.94, + "learning_rate": 1.608632286762708e-05, + "loss": 0.1239, + "step": 1298 + }, + { + "epoch": 0.94, + "learning_rate": 1.6080121795799197e-05, + "loss": 0.1235, + "step": 1299 + }, + { + "epoch": 0.94, + "learning_rate": 1.6073917012519244e-05, + "loss": 0.1319, + "step": 1300 + }, + { + "epoch": 0.94, + "learning_rate": 1.606770852157477e-05, + "loss": 0.1149, + "step": 1301 + }, + { + "epoch": 0.94, + "learning_rate": 1.606149632675559e-05, + "loss": 0.1281, + "step": 1302 + }, + { + "epoch": 0.94, + "learning_rate": 1.6055280431853775e-05, + "loss": 0.1334, + "step": 1303 + }, + { + "epoch": 0.94, + "learning_rate": 1.604906084066366e-05, + "loss": 0.1261, + "step": 1304 + }, + { + "epoch": 0.94, + "learning_rate": 1.604283755698182e-05, + "loss": 0.1265, + "step": 1305 + }, + { + "epoch": 0.94, + "learning_rate": 1.6036610584607113e-05, + "loss": 0.1394, + "step": 1306 + }, + { + "epoch": 0.95, + "learning_rate": 1.603037992734062e-05, + "loss": 0.1239, + "step": 1307 + }, + { + "epoch": 0.95, + "learning_rate": 1.602414558898569e-05, + "loss": 0.1322, + "step": 1308 + }, + { + "epoch": 0.95, + "learning_rate": 1.6017907573347912e-05, + "loss": 0.1274, + "step": 1309 + }, + { + "epoch": 0.95, + "learning_rate": 1.6011665884235114e-05, + "loss": 0.1305, + "step": 1310 + }, + { + "epoch": 0.95, + "learning_rate": 1.600542052545738e-05, + "loss": 0.126, + "step": 1311 + }, + { + "epoch": 0.95, + "learning_rate": 1.5999171500827026e-05, + "loss": 0.1238, + "step": 1312 + }, + { + "epoch": 0.95, + "learning_rate": 1.59929188141586e-05, + "loss": 0.1249, + "step": 1313 + }, + { + "epoch": 0.95, + "learning_rate": 1.5986662469268897e-05, + "loss": 0.1244, + "step": 1314 + }, + { + "epoch": 0.95, + "learning_rate": 1.5980402469976945e-05, + "loss": 0.1234, + "step": 1315 + }, + { + "epoch": 0.95, + "learning_rate": 1.5974138820103985e-05, + "loss": 0.1198, + "step": 1316 + }, + { + "epoch": 0.95, + "learning_rate": 1.5967871523473508e-05, + "loss": 0.128, + "step": 1317 + }, + { + "epoch": 0.95, + "learning_rate": 1.5961600583911222e-05, + "loss": 0.1292, + "step": 1318 + }, + { + "epoch": 0.95, + "learning_rate": 1.5955326005245052e-05, + "loss": 0.1331, + "step": 1319 + }, + { + "epoch": 0.95, + "learning_rate": 1.594904779130516e-05, + "loss": 0.1257, + "step": 1320 + }, + { + "epoch": 0.96, + "learning_rate": 1.5942765945923912e-05, + "loss": 0.1238, + "step": 1321 + }, + { + "epoch": 0.96, + "learning_rate": 1.59364804729359e-05, + "loss": 0.1181, + "step": 1322 + }, + { + "epoch": 0.96, + "learning_rate": 1.5930191376177924e-05, + "loss": 0.1275, + "step": 1323 + }, + { + "epoch": 0.96, + "learning_rate": 1.5923898659489004e-05, + "loss": 0.1341, + "step": 1324 + }, + { + "epoch": 0.96, + "learning_rate": 1.5917602326710363e-05, + "loss": 0.1207, + "step": 1325 + }, + { + "epoch": 0.96, + "learning_rate": 1.5911302381685433e-05, + "loss": 0.1397, + "step": 1326 + }, + { + "epoch": 0.96, + "learning_rate": 1.590499882825985e-05, + "loss": 0.1284, + "step": 1327 + }, + { + "epoch": 0.96, + "learning_rate": 1.589869167028146e-05, + "loss": 0.134, + "step": 1328 + }, + { + "epoch": 0.96, + "learning_rate": 1.5892380911600293e-05, + "loss": 0.1249, + "step": 1329 + }, + { + "epoch": 0.96, + "learning_rate": 1.5886066556068596e-05, + "loss": 0.1354, + "step": 1330 + }, + { + "epoch": 0.96, + "learning_rate": 1.5879748607540803e-05, + "loss": 0.1254, + "step": 1331 + }, + { + "epoch": 0.96, + "learning_rate": 1.5873427069873537e-05, + "loss": 0.1318, + "step": 1332 + }, + { + "epoch": 0.96, + "learning_rate": 1.5867101946925618e-05, + "loss": 0.1233, + "step": 1333 + }, + { + "epoch": 0.96, + "learning_rate": 1.586077324255805e-05, + "loss": 0.1394, + "step": 1334 + }, + { + "epoch": 0.97, + "learning_rate": 1.5854440960634033e-05, + "loss": 0.1288, + "step": 1335 + }, + { + "epoch": 0.97, + "learning_rate": 1.5848105105018935e-05, + "loss": 0.1202, + "step": 1336 + }, + { + "epoch": 0.97, + "learning_rate": 1.584176567958032e-05, + "loss": 0.1295, + "step": 1337 + }, + { + "epoch": 0.97, + "learning_rate": 1.583542268818792e-05, + "loss": 0.1333, + "step": 1338 + }, + { + "epoch": 0.97, + "learning_rate": 1.5829076134713656e-05, + "loss": 0.1253, + "step": 1339 + }, + { + "epoch": 0.97, + "learning_rate": 1.5822726023031613e-05, + "loss": 0.1273, + "step": 1340 + }, + { + "epoch": 0.97, + "learning_rate": 1.581637235701805e-05, + "loss": 0.1336, + "step": 1341 + }, + { + "epoch": 0.97, + "learning_rate": 1.5810015140551396e-05, + "loss": 0.1283, + "step": 1342 + }, + { + "epoch": 0.97, + "learning_rate": 1.580365437751225e-05, + "loss": 0.1329, + "step": 1343 + }, + { + "epoch": 0.97, + "learning_rate": 1.579729007178338e-05, + "loss": 0.1194, + "step": 1344 + }, + { + "epoch": 0.97, + "learning_rate": 1.5790922227249702e-05, + "loss": 0.1235, + "step": 1345 + }, + { + "epoch": 0.97, + "learning_rate": 1.5784550847798304e-05, + "loss": 0.1252, + "step": 1346 + }, + { + "epoch": 0.97, + "learning_rate": 1.577817593731843e-05, + "loss": 0.129, + "step": 1347 + }, + { + "epoch": 0.98, + "learning_rate": 1.5771797499701472e-05, + "loss": 0.1336, + "step": 1348 + }, + { + "epoch": 0.98, + "learning_rate": 1.5765415538840993e-05, + "loss": 0.1238, + "step": 1349 + }, + { + "epoch": 0.98, + "learning_rate": 1.5759030058632683e-05, + "loss": 0.1467, + "step": 1350 + }, + { + "epoch": 0.98, + "learning_rate": 1.57526410629744e-05, + "loss": 0.1308, + "step": 1351 + }, + { + "epoch": 0.98, + "learning_rate": 1.5746248555766136e-05, + "loss": 0.1335, + "step": 1352 + }, + { + "epoch": 0.98, + "learning_rate": 1.573985254091003e-05, + "loss": 0.1353, + "step": 1353 + }, + { + "epoch": 0.98, + "learning_rate": 1.573345302231037e-05, + "loss": 0.1359, + "step": 1354 + }, + { + "epoch": 0.98, + "learning_rate": 1.5727050003873567e-05, + "loss": 0.1335, + "step": 1355 + }, + { + "epoch": 0.98, + "learning_rate": 1.5720643489508178e-05, + "loss": 0.1445, + "step": 1356 + }, + { + "epoch": 0.98, + "learning_rate": 1.57142334831249e-05, + "loss": 0.1409, + "step": 1357 + }, + { + "epoch": 0.98, + "learning_rate": 1.570781998863655e-05, + "loss": 0.1312, + "step": 1358 + }, + { + "epoch": 0.98, + "learning_rate": 1.5701403009958075e-05, + "loss": 0.1345, + "step": 1359 + }, + { + "epoch": 0.98, + "learning_rate": 1.5694982551006556e-05, + "loss": 0.1263, + "step": 1360 + }, + { + "epoch": 0.98, + "learning_rate": 1.5688558615701195e-05, + "loss": 0.1313, + "step": 1361 + }, + { + "epoch": 0.99, + "learning_rate": 1.5682131207963317e-05, + "loss": 0.1373, + "step": 1362 + }, + { + "epoch": 0.99, + "learning_rate": 1.567570033171636e-05, + "loss": 0.1315, + "step": 1363 + }, + { + "epoch": 0.99, + "learning_rate": 1.5669265990885895e-05, + "loss": 0.1302, + "step": 1364 + }, + { + "epoch": 0.99, + "learning_rate": 1.566282818939959e-05, + "loss": 0.1291, + "step": 1365 + }, + { + "epoch": 0.99, + "learning_rate": 1.565638693118724e-05, + "loss": 0.1343, + "step": 1366 + }, + { + "epoch": 0.99, + "learning_rate": 1.5649942220180735e-05, + "loss": 0.1288, + "step": 1367 + }, + { + "epoch": 0.99, + "learning_rate": 1.564349406031409e-05, + "loss": 0.1286, + "step": 1368 + }, + { + "epoch": 0.99, + "learning_rate": 1.5637042455523407e-05, + "loss": 0.123, + "step": 1369 + }, + { + "epoch": 0.99, + "learning_rate": 1.5630587409746906e-05, + "loss": 0.116, + "step": 1370 + }, + { + "epoch": 0.99, + "learning_rate": 1.5624128926924903e-05, + "loss": 0.1373, + "step": 1371 + }, + { + "epoch": 0.99, + "learning_rate": 1.5617667010999805e-05, + "loss": 0.1205, + "step": 1372 + }, + { + "epoch": 0.99, + "learning_rate": 1.5611201665916124e-05, + "loss": 0.1236, + "step": 1373 + }, + { + "epoch": 0.99, + "learning_rate": 1.5604732895620465e-05, + "loss": 0.1265, + "step": 1374 + }, + { + "epoch": 0.99, + "learning_rate": 1.5598260704061514e-05, + "loss": 0.142, + "step": 1375 + }, + { + "epoch": 1.0, + "learning_rate": 1.5591785095190055e-05, + "loss": 0.1306, + "step": 1376 + }, + { + "epoch": 1.0, + "learning_rate": 1.5585306072958954e-05, + "loss": 0.1341, + "step": 1377 + }, + { + "epoch": 1.0, + "learning_rate": 1.5578823641323163e-05, + "loss": 0.1344, + "step": 1378 + }, + { + "epoch": 1.0, + "learning_rate": 1.557233780423971e-05, + "loss": 0.1216, + "step": 1379 + }, + { + "epoch": 1.0, + "learning_rate": 1.5565848565667705e-05, + "loss": 0.126, + "step": 1380 + }, + { + "epoch": 1.0, + "learning_rate": 1.555935592956834e-05, + "loss": 0.1357, + "step": 1381 + }, + { + "epoch": 1.0, + "learning_rate": 1.555285989990488e-05, + "loss": 0.1308, + "step": 1382 + }, + { + "epoch": 1.0, + "learning_rate": 1.554636048064264e-05, + "loss": 0.1384, + "step": 1383 + }, + { + "epoch": 1.0, + "learning_rate": 1.553985767574904e-05, + "loss": 0.1294, + "step": 1384 + }, + { + "epoch": 1.0, + "learning_rate": 1.5533351489193537e-05, + "loss": 0.1218, + "step": 1385 + }, + { + "epoch": 1.0, + "learning_rate": 1.5526841924947663e-05, + "loss": 0.1216, + "step": 1386 + }, + { + "epoch": 1.0, + "learning_rate": 1.552032898698502e-05, + "loss": 0.1262, + "step": 1387 + }, + { + "epoch": 1.0, + "learning_rate": 1.5513812679281253e-05, + "loss": 0.1217, + "step": 1388 + }, + { + "epoch": 1.0, + "learning_rate": 1.5507293005814075e-05, + "loss": 0.1222, + "step": 1389 + }, + { + "epoch": 1.01, + "learning_rate": 1.5500769970563257e-05, + "loss": 0.1431, + "step": 1390 + }, + { + "epoch": 1.01, + "learning_rate": 1.5494243577510607e-05, + "loss": 0.1325, + "step": 1391 + }, + { + "epoch": 1.01, + "learning_rate": 1.5487713830639996e-05, + "loss": 0.1238, + "step": 1392 + }, + { + "epoch": 1.01, + "learning_rate": 1.5481180733937334e-05, + "loss": 0.1306, + "step": 1393 + }, + { + "epoch": 1.01, + "learning_rate": 1.547464429139059e-05, + "loss": 0.1279, + "step": 1394 + }, + { + "epoch": 1.01, + "learning_rate": 1.5468104506989754e-05, + "loss": 0.1236, + "step": 1395 + }, + { + "epoch": 1.01, + "learning_rate": 1.5461561384726872e-05, + "loss": 0.1203, + "step": 1396 + }, + { + "epoch": 1.01, + "learning_rate": 1.5455014928596025e-05, + "loss": 0.1389, + "step": 1397 + }, + { + "epoch": 1.01, + "learning_rate": 1.5448465142593318e-05, + "loss": 0.1371, + "step": 1398 + }, + { + "epoch": 1.01, + "learning_rate": 1.544191203071691e-05, + "loss": 0.1238, + "step": 1399 + }, + { + "epoch": 1.01, + "learning_rate": 1.543535559696697e-05, + "loss": 0.1397, + "step": 1400 + }, + { + "epoch": 1.01, + "learning_rate": 1.54287958453457e-05, + "loss": 0.1339, + "step": 1401 + }, + { + "epoch": 1.01, + "learning_rate": 1.5422232779857333e-05, + "loss": 0.1276, + "step": 1402 + }, + { + "epoch": 1.01, + "learning_rate": 1.5415666404508125e-05, + "loss": 0.1238, + "step": 1403 + }, + { + "epoch": 1.02, + "learning_rate": 1.540909672330634e-05, + "loss": 0.1302, + "step": 1404 + }, + { + "epoch": 1.02, + "learning_rate": 1.540252374026228e-05, + "loss": 0.1246, + "step": 1405 + }, + { + "epoch": 1.02, + "learning_rate": 1.5395947459388242e-05, + "loss": 0.1333, + "step": 1406 + }, + { + "epoch": 1.02, + "learning_rate": 1.538936788469855e-05, + "loss": 0.137, + "step": 1407 + }, + { + "epoch": 1.02, + "learning_rate": 1.538278502020953e-05, + "loss": 0.1351, + "step": 1408 + }, + { + "epoch": 1.02, + "learning_rate": 1.5376198869939525e-05, + "loss": 0.1238, + "step": 1409 + }, + { + "epoch": 1.02, + "learning_rate": 1.536960943790888e-05, + "loss": 0.1253, + "step": 1410 + }, + { + "epoch": 1.02, + "learning_rate": 1.5363016728139936e-05, + "loss": 0.1221, + "step": 1411 + }, + { + "epoch": 1.02, + "learning_rate": 1.5356420744657044e-05, + "loss": 0.1309, + "step": 1412 + }, + { + "epoch": 1.02, + "learning_rate": 1.5349821491486555e-05, + "loss": 0.1321, + "step": 1413 + }, + { + "epoch": 1.02, + "learning_rate": 1.5343218972656805e-05, + "loss": 0.1246, + "step": 1414 + }, + { + "epoch": 1.02, + "learning_rate": 1.5336613192198132e-05, + "loss": 0.1302, + "step": 1415 + }, + { + "epoch": 1.02, + "learning_rate": 1.533000415414286e-05, + "loss": 0.1323, + "step": 1416 + }, + { + "epoch": 1.02, + "learning_rate": 1.532339186252531e-05, + "loss": 0.1162, + "step": 1417 + }, + { + "epoch": 1.03, + "learning_rate": 1.531677632138178e-05, + "loss": 0.1365, + "step": 1418 + }, + { + "epoch": 1.03, + "learning_rate": 1.5310157534750555e-05, + "loss": 0.1361, + "step": 1419 + }, + { + "epoch": 1.03, + "learning_rate": 1.5303535506671903e-05, + "loss": 0.1243, + "step": 1420 + }, + { + "epoch": 1.03, + "learning_rate": 1.5296910241188064e-05, + "loss": 0.1315, + "step": 1421 + }, + { + "epoch": 1.03, + "learning_rate": 1.5290281742343263e-05, + "loss": 0.1204, + "step": 1422 + }, + { + "epoch": 1.03, + "learning_rate": 1.528365001418369e-05, + "loss": 0.1212, + "step": 1423 + }, + { + "epoch": 1.03, + "learning_rate": 1.5277015060757515e-05, + "loss": 0.1242, + "step": 1424 + }, + { + "epoch": 1.03, + "learning_rate": 1.5270376886114874e-05, + "loss": 0.1428, + "step": 1425 + }, + { + "epoch": 1.03, + "learning_rate": 1.5263735494307862e-05, + "loss": 0.1441, + "step": 1426 + }, + { + "epoch": 1.03, + "learning_rate": 1.5257090889390554e-05, + "loss": 0.1251, + "step": 1427 + }, + { + "epoch": 1.03, + "learning_rate": 1.5250443075418963e-05, + "loss": 0.129, + "step": 1428 + }, + { + "epoch": 1.03, + "learning_rate": 1.5243792056451081e-05, + "loss": 0.1418, + "step": 1429 + }, + { + "epoch": 1.03, + "learning_rate": 1.5237137836546852e-05, + "loss": 0.1204, + "step": 1430 + }, + { + "epoch": 1.04, + "learning_rate": 1.5230480419768163e-05, + "loss": 0.1257, + "step": 1431 + }, + { + "epoch": 1.04, + "learning_rate": 1.5223819810178867e-05, + "loss": 0.1323, + "step": 1432 + }, + { + "epoch": 1.04, + "learning_rate": 1.521715601184476e-05, + "loss": 0.1326, + "step": 1433 + }, + { + "epoch": 1.04, + "learning_rate": 1.5210489028833584e-05, + "loss": 0.1279, + "step": 1434 + }, + { + "epoch": 1.04, + "learning_rate": 1.520381886521502e-05, + "loss": 0.1322, + "step": 1435 + }, + { + "epoch": 1.04, + "learning_rate": 1.5197145525060703e-05, + "loss": 0.1321, + "step": 1436 + }, + { + "epoch": 1.04, + "learning_rate": 1.5190469012444196e-05, + "loss": 0.1268, + "step": 1437 + }, + { + "epoch": 1.04, + "learning_rate": 1.5183789331441e-05, + "loss": 0.134, + "step": 1438 + }, + { + "epoch": 1.04, + "learning_rate": 1.5177106486128557e-05, + "loss": 0.1211, + "step": 1439 + }, + { + "epoch": 1.04, + "learning_rate": 1.5170420480586233e-05, + "loss": 0.1245, + "step": 1440 + }, + { + "epoch": 1.04, + "learning_rate": 1.5163731318895324e-05, + "loss": 0.1245, + "step": 1441 + }, + { + "epoch": 1.04, + "learning_rate": 1.5157039005139065e-05, + "loss": 0.1156, + "step": 1442 + }, + { + "epoch": 1.04, + "learning_rate": 1.5150343543402592e-05, + "loss": 0.1331, + "step": 1443 + }, + { + "epoch": 1.04, + "learning_rate": 1.5143644937772984e-05, + "loss": 0.1281, + "step": 1444 + }, + { + "epoch": 1.05, + "learning_rate": 1.5136943192339224e-05, + "loss": 0.1429, + "step": 1445 + }, + { + "epoch": 1.05, + "learning_rate": 1.5130238311192222e-05, + "loss": 0.1315, + "step": 1446 + }, + { + "epoch": 1.05, + "learning_rate": 1.5123530298424799e-05, + "loss": 0.1216, + "step": 1447 + }, + { + "epoch": 1.05, + "learning_rate": 1.511681915813168e-05, + "loss": 0.1383, + "step": 1448 + }, + { + "epoch": 1.05, + "learning_rate": 1.5110104894409516e-05, + "loss": 0.1424, + "step": 1449 + }, + { + "epoch": 1.05, + "learning_rate": 1.510338751135685e-05, + "loss": 0.1349, + "step": 1450 + }, + { + "epoch": 1.05, + "learning_rate": 1.5096667013074128e-05, + "loss": 0.1371, + "step": 1451 + }, + { + "epoch": 1.05, + "learning_rate": 1.5089943403663714e-05, + "loss": 0.129, + "step": 1452 + }, + { + "epoch": 1.05, + "learning_rate": 1.5083216687229852e-05, + "loss": 0.1147, + "step": 1453 + }, + { + "epoch": 1.05, + "learning_rate": 1.507648686787869e-05, + "loss": 0.1296, + "step": 1454 + }, + { + "epoch": 1.05, + "learning_rate": 1.506975394971828e-05, + "loss": 0.1367, + "step": 1455 + }, + { + "epoch": 1.05, + "learning_rate": 1.5063017936858551e-05, + "loss": 0.1163, + "step": 1456 + }, + { + "epoch": 1.05, + "learning_rate": 1.5056278833411324e-05, + "loss": 0.1329, + "step": 1457 + }, + { + "epoch": 1.05, + "learning_rate": 1.5049536643490312e-05, + "loss": 0.1234, + "step": 1458 + }, + { + "epoch": 1.06, + "learning_rate": 1.5042791371211111e-05, + "loss": 0.1455, + "step": 1459 + }, + { + "epoch": 1.06, + "learning_rate": 1.5036043020691196e-05, + "loss": 0.1204, + "step": 1460 + }, + { + "epoch": 1.06, + "learning_rate": 1.502929159604992e-05, + "loss": 0.1298, + "step": 1461 + }, + { + "epoch": 1.06, + "learning_rate": 1.5022537101408516e-05, + "loss": 0.1292, + "step": 1462 + }, + { + "epoch": 1.06, + "learning_rate": 1.501577954089009e-05, + "loss": 0.1237, + "step": 1463 + }, + { + "epoch": 1.06, + "learning_rate": 1.5009018918619616e-05, + "loss": 0.136, + "step": 1464 + }, + { + "epoch": 1.06, + "learning_rate": 1.5002255238723945e-05, + "loss": 0.131, + "step": 1465 + }, + { + "epoch": 1.06, + "learning_rate": 1.4995488505331782e-05, + "loss": 0.1274, + "step": 1466 + }, + { + "epoch": 1.06, + "learning_rate": 1.4988718722573707e-05, + "loss": 0.137, + "step": 1467 + }, + { + "epoch": 1.06, + "learning_rate": 1.498194589458216e-05, + "loss": 0.1293, + "step": 1468 + }, + { + "epoch": 1.06, + "learning_rate": 1.4975170025491434e-05, + "loss": 0.1261, + "step": 1469 + }, + { + "epoch": 1.06, + "learning_rate": 1.4968391119437686e-05, + "loss": 0.12, + "step": 1470 + }, + { + "epoch": 1.06, + "learning_rate": 1.496160918055892e-05, + "loss": 0.122, + "step": 1471 + }, + { + "epoch": 1.06, + "learning_rate": 1.4954824212994993e-05, + "loss": 0.1257, + "step": 1472 + }, + { + "epoch": 1.07, + "learning_rate": 1.4948036220887614e-05, + "loss": 0.1406, + "step": 1473 + }, + { + "epoch": 1.07, + "learning_rate": 1.4941245208380335e-05, + "loss": 0.1284, + "step": 1474 + }, + { + "epoch": 1.07, + "learning_rate": 1.4934451179618553e-05, + "loss": 0.1339, + "step": 1475 + }, + { + "epoch": 1.07, + "learning_rate": 1.4927654138749503e-05, + "loss": 0.1306, + "step": 1476 + }, + { + "epoch": 1.07, + "learning_rate": 1.4920854089922267e-05, + "loss": 0.1236, + "step": 1477 + }, + { + "epoch": 1.07, + "learning_rate": 1.4914051037287755e-05, + "loss": 0.1251, + "step": 1478 + }, + { + "epoch": 1.07, + "learning_rate": 1.4907244984998714e-05, + "loss": 0.1214, + "step": 1479 + }, + { + "epoch": 1.07, + "learning_rate": 1.4900435937209723e-05, + "loss": 0.1373, + "step": 1480 + }, + { + "epoch": 1.07, + "learning_rate": 1.4893623898077181e-05, + "loss": 0.1322, + "step": 1481 + }, + { + "epoch": 1.07, + "learning_rate": 1.4886808871759328e-05, + "loss": 0.1212, + "step": 1482 + }, + { + "epoch": 1.07, + "learning_rate": 1.4879990862416218e-05, + "loss": 0.1254, + "step": 1483 + }, + { + "epoch": 1.07, + "learning_rate": 1.4873169874209724e-05, + "loss": 0.1345, + "step": 1484 + }, + { + "epoch": 1.07, + "learning_rate": 1.486634591130354e-05, + "loss": 0.1254, + "step": 1485 + }, + { + "epoch": 1.07, + "learning_rate": 1.4859518977863181e-05, + "loss": 0.1327, + "step": 1486 + }, + { + "epoch": 1.08, + "learning_rate": 1.485268907805597e-05, + "loss": 0.1274, + "step": 1487 + }, + { + "epoch": 1.08, + "learning_rate": 1.484585621605104e-05, + "loss": 0.1315, + "step": 1488 + }, + { + "epoch": 1.08, + "learning_rate": 1.4839020396019334e-05, + "loss": 0.12, + "step": 1489 + }, + { + "epoch": 1.08, + "learning_rate": 1.4832181622133599e-05, + "loss": 0.1294, + "step": 1490 + }, + { + "epoch": 1.08, + "learning_rate": 1.4825339898568389e-05, + "loss": 0.1247, + "step": 1491 + }, + { + "epoch": 1.08, + "learning_rate": 1.4818495229500057e-05, + "loss": 0.1324, + "step": 1492 + }, + { + "epoch": 1.08, + "learning_rate": 1.4811647619106748e-05, + "loss": 0.1272, + "step": 1493 + }, + { + "epoch": 1.08, + "learning_rate": 1.480479707156841e-05, + "loss": 0.1249, + "step": 1494 + }, + { + "epoch": 1.08, + "learning_rate": 1.4797943591066784e-05, + "loss": 0.1264, + "step": 1495 + }, + { + "epoch": 1.08, + "learning_rate": 1.4791087181785396e-05, + "loss": 0.1315, + "step": 1496 + }, + { + "epoch": 1.08, + "learning_rate": 1.4784227847909562e-05, + "loss": 0.1297, + "step": 1497 + }, + { + "epoch": 1.08, + "learning_rate": 1.4777365593626384e-05, + "loss": 0.1256, + "step": 1498 + }, + { + "epoch": 1.08, + "learning_rate": 1.477050042312475e-05, + "loss": 0.1231, + "step": 1499 + }, + { + "epoch": 1.08, + "learning_rate": 1.4763632340595319e-05, + "loss": 0.134, + "step": 1500 + }, + { + "epoch": 1.08, + "eval_loss": 0.1258772611618042, + "eval_runtime": 712.5917, + "eval_samples_per_second": 70.166, + "eval_steps_per_second": 2.193, + "step": 1500 + }, + { + "epoch": 1.09, + "learning_rate": 1.4756761350230542e-05, + "loss": 0.1382, + "step": 1501 + }, + { + "epoch": 1.09, + "learning_rate": 1.4749887456224626e-05, + "loss": 0.1352, + "step": 1502 + }, + { + "epoch": 1.09, + "learning_rate": 1.4743010662773567e-05, + "loss": 0.1251, + "step": 1503 + }, + { + "epoch": 1.09, + "learning_rate": 1.4736130974075123e-05, + "loss": 0.1305, + "step": 1504 + }, + { + "epoch": 1.09, + "learning_rate": 1.4729248394328817e-05, + "loss": 0.1331, + "step": 1505 + }, + { + "epoch": 1.09, + "learning_rate": 1.4722362927735947e-05, + "loss": 0.1302, + "step": 1506 + }, + { + "epoch": 1.09, + "learning_rate": 1.4715474578499565e-05, + "loss": 0.1227, + "step": 1507 + }, + { + "epoch": 1.09, + "learning_rate": 1.470858335082448e-05, + "loss": 0.1363, + "step": 1508 + }, + { + "epoch": 1.09, + "learning_rate": 1.4701689248917267e-05, + "loss": 0.1269, + "step": 1509 + }, + { + "epoch": 1.09, + "learning_rate": 1.4694792276986252e-05, + "loss": 0.137, + "step": 1510 + }, + { + "epoch": 1.09, + "learning_rate": 1.4687892439241508e-05, + "loss": 0.1172, + "step": 1511 + }, + { + "epoch": 1.09, + "learning_rate": 1.4680989739894864e-05, + "loss": 0.1296, + "step": 1512 + }, + { + "epoch": 1.09, + "learning_rate": 1.4674084183159894e-05, + "loss": 0.1283, + "step": 1513 + }, + { + "epoch": 1.1, + "learning_rate": 1.4667175773251911e-05, + "loss": 0.127, + "step": 1514 + }, + { + "epoch": 1.1, + "learning_rate": 1.4660264514387978e-05, + "loss": 0.1173, + "step": 1515 + }, + { + "epoch": 1.1, + "learning_rate": 1.4653350410786892e-05, + "loss": 0.1328, + "step": 1516 + }, + { + "epoch": 1.1, + "learning_rate": 1.464643346666919e-05, + "loss": 0.1378, + "step": 1517 + }, + { + "epoch": 1.1, + "learning_rate": 1.4639513686257134e-05, + "loss": 0.1268, + "step": 1518 + }, + { + "epoch": 1.1, + "learning_rate": 1.463259107377473e-05, + "loss": 0.1349, + "step": 1519 + }, + { + "epoch": 1.1, + "learning_rate": 1.4625665633447702e-05, + "loss": 0.1273, + "step": 1520 + }, + { + "epoch": 1.1, + "learning_rate": 1.4618737369503506e-05, + "loss": 0.1388, + "step": 1521 + }, + { + "epoch": 1.1, + "learning_rate": 1.4611806286171324e-05, + "loss": 0.1327, + "step": 1522 + }, + { + "epoch": 1.1, + "learning_rate": 1.4604872387682052e-05, + "loss": 0.1303, + "step": 1523 + }, + { + "epoch": 1.1, + "learning_rate": 1.459793567826831e-05, + "loss": 0.1236, + "step": 1524 + }, + { + "epoch": 1.1, + "learning_rate": 1.4590996162164428e-05, + "loss": 0.1237, + "step": 1525 + }, + { + "epoch": 1.1, + "learning_rate": 1.4584053843606456e-05, + "loss": 0.1306, + "step": 1526 + }, + { + "epoch": 1.1, + "learning_rate": 1.457710872683215e-05, + "loss": 0.1275, + "step": 1527 + }, + { + "epoch": 1.11, + "learning_rate": 1.4570160816080978e-05, + "loss": 0.1351, + "step": 1528 + }, + { + "epoch": 1.11, + "learning_rate": 1.4563210115594109e-05, + "loss": 0.1305, + "step": 1529 + }, + { + "epoch": 1.11, + "learning_rate": 1.4556256629614417e-05, + "loss": 0.1339, + "step": 1530 + }, + { + "epoch": 1.11, + "learning_rate": 1.454930036238648e-05, + "loss": 0.1278, + "step": 1531 + }, + { + "epoch": 1.11, + "learning_rate": 1.4542341318156568e-05, + "loss": 0.1317, + "step": 1532 + }, + { + "epoch": 1.11, + "learning_rate": 1.4535379501172646e-05, + "loss": 0.1283, + "step": 1533 + }, + { + "epoch": 1.11, + "learning_rate": 1.4528414915684376e-05, + "loss": 0.1352, + "step": 1534 + }, + { + "epoch": 1.11, + "learning_rate": 1.452144756594311e-05, + "loss": 0.128, + "step": 1535 + }, + { + "epoch": 1.11, + "learning_rate": 1.4514477456201881e-05, + "loss": 0.1346, + "step": 1536 + }, + { + "epoch": 1.11, + "learning_rate": 1.4507504590715413e-05, + "loss": 0.1362, + "step": 1537 + }, + { + "epoch": 1.11, + "learning_rate": 1.450052897374011e-05, + "loss": 0.1269, + "step": 1538 + }, + { + "epoch": 1.11, + "learning_rate": 1.449355060953406e-05, + "loss": 0.1224, + "step": 1539 + }, + { + "epoch": 1.11, + "learning_rate": 1.4486569502357018e-05, + "loss": 0.1127, + "step": 1540 + }, + { + "epoch": 1.11, + "learning_rate": 1.447958565647042e-05, + "loss": 0.1402, + "step": 1541 + }, + { + "epoch": 1.12, + "learning_rate": 1.4472599076137374e-05, + "loss": 0.1348, + "step": 1542 + }, + { + "epoch": 1.12, + "learning_rate": 1.4465609765622656e-05, + "loss": 0.1256, + "step": 1543 + }, + { + "epoch": 1.12, + "learning_rate": 1.4458617729192713e-05, + "loss": 0.127, + "step": 1544 + }, + { + "epoch": 1.12, + "learning_rate": 1.4451622971115641e-05, + "loss": 0.1288, + "step": 1545 + }, + { + "epoch": 1.12, + "learning_rate": 1.444462549566122e-05, + "loss": 0.1278, + "step": 1546 + }, + { + "epoch": 1.12, + "learning_rate": 1.4437625307100869e-05, + "loss": 0.1232, + "step": 1547 + }, + { + "epoch": 1.12, + "learning_rate": 1.4430622409707674e-05, + "loss": 0.1215, + "step": 1548 + }, + { + "epoch": 1.12, + "learning_rate": 1.4423616807756367e-05, + "loss": 0.1299, + "step": 1549 + }, + { + "epoch": 1.12, + "learning_rate": 1.4416608505523341e-05, + "loss": 0.1339, + "step": 1550 + }, + { + "epoch": 1.12, + "learning_rate": 1.440959750728663e-05, + "loss": 0.1271, + "step": 1551 + }, + { + "epoch": 1.12, + "learning_rate": 1.4402583817325915e-05, + "loss": 0.1297, + "step": 1552 + }, + { + "epoch": 1.12, + "learning_rate": 1.4395567439922517e-05, + "loss": 0.1283, + "step": 1553 + }, + { + "epoch": 1.12, + "learning_rate": 1.4388548379359407e-05, + "loss": 0.122, + "step": 1554 + }, + { + "epoch": 1.12, + "learning_rate": 1.4381526639921183e-05, + "loss": 0.1374, + "step": 1555 + }, + { + "epoch": 1.13, + "learning_rate": 1.4374502225894085e-05, + "loss": 0.1357, + "step": 1556 + }, + { + "epoch": 1.13, + "learning_rate": 1.4367475141565984e-05, + "loss": 0.1253, + "step": 1557 + }, + { + "epoch": 1.13, + "learning_rate": 1.4360445391226384e-05, + "loss": 0.1239, + "step": 1558 + }, + { + "epoch": 1.13, + "learning_rate": 1.4353412979166404e-05, + "loss": 0.1294, + "step": 1559 + }, + { + "epoch": 1.13, + "learning_rate": 1.4346377909678805e-05, + "loss": 0.1283, + "step": 1560 + }, + { + "epoch": 1.13, + "learning_rate": 1.433934018705796e-05, + "loss": 0.1216, + "step": 1561 + }, + { + "epoch": 1.13, + "learning_rate": 1.4332299815599862e-05, + "loss": 0.1157, + "step": 1562 + }, + { + "epoch": 1.13, + "learning_rate": 1.4325256799602122e-05, + "loss": 0.1326, + "step": 1563 + }, + { + "epoch": 1.13, + "learning_rate": 1.431821114336397e-05, + "loss": 0.1266, + "step": 1564 + }, + { + "epoch": 1.13, + "learning_rate": 1.431116285118624e-05, + "loss": 0.1269, + "step": 1565 + }, + { + "epoch": 1.13, + "learning_rate": 1.4304111927371379e-05, + "loss": 0.1244, + "step": 1566 + }, + { + "epoch": 1.13, + "learning_rate": 1.4297058376223438e-05, + "loss": 0.1213, + "step": 1567 + }, + { + "epoch": 1.13, + "learning_rate": 1.4290002202048077e-05, + "loss": 0.1432, + "step": 1568 + }, + { + "epoch": 1.13, + "learning_rate": 1.4282943409152553e-05, + "loss": 0.1373, + "step": 1569 + }, + { + "epoch": 1.14, + "learning_rate": 1.427588200184572e-05, + "loss": 0.1379, + "step": 1570 + }, + { + "epoch": 1.14, + "learning_rate": 1.426881798443803e-05, + "loss": 0.1273, + "step": 1571 + }, + { + "epoch": 1.14, + "learning_rate": 1.4261751361241536e-05, + "loss": 0.1367, + "step": 1572 + }, + { + "epoch": 1.14, + "learning_rate": 1.4254682136569865e-05, + "loss": 0.124, + "step": 1573 + }, + { + "epoch": 1.14, + "learning_rate": 1.4247610314738247e-05, + "loss": 0.1272, + "step": 1574 + }, + { + "epoch": 1.14, + "learning_rate": 1.4240535900063487e-05, + "loss": 0.1372, + "step": 1575 + }, + { + "epoch": 1.14, + "learning_rate": 1.4233458896863984e-05, + "loss": 0.1291, + "step": 1576 + }, + { + "epoch": 1.14, + "learning_rate": 1.4226379309459702e-05, + "loss": 0.1298, + "step": 1577 + }, + { + "epoch": 1.14, + "learning_rate": 1.4219297142172196e-05, + "loss": 0.1297, + "step": 1578 + }, + { + "epoch": 1.14, + "learning_rate": 1.4212212399324591e-05, + "loss": 0.1288, + "step": 1579 + }, + { + "epoch": 1.14, + "learning_rate": 1.4205125085241583e-05, + "loss": 0.1334, + "step": 1580 + }, + { + "epoch": 1.14, + "learning_rate": 1.4198035204249436e-05, + "loss": 0.1212, + "step": 1581 + }, + { + "epoch": 1.14, + "learning_rate": 1.4190942760675985e-05, + "loss": 0.1208, + "step": 1582 + }, + { + "epoch": 1.15, + "learning_rate": 1.4183847758850629e-05, + "loss": 0.1237, + "step": 1583 + }, + { + "epoch": 1.15, + "learning_rate": 1.4176750203104324e-05, + "loss": 0.1293, + "step": 1584 + }, + { + "epoch": 1.15, + "learning_rate": 1.4169650097769592e-05, + "loss": 0.1316, + "step": 1585 + }, + { + "epoch": 1.15, + "learning_rate": 1.4162547447180501e-05, + "loss": 0.1202, + "step": 1586 + }, + { + "epoch": 1.15, + "learning_rate": 1.4155442255672687e-05, + "loss": 0.1322, + "step": 1587 + }, + { + "epoch": 1.15, + "learning_rate": 1.4148334527583325e-05, + "loss": 0.1287, + "step": 1588 + }, + { + "epoch": 1.15, + "learning_rate": 1.4141224267251143e-05, + "loss": 0.1296, + "step": 1589 + }, + { + "epoch": 1.15, + "learning_rate": 1.4134111479016416e-05, + "loss": 0.1289, + "step": 1590 + }, + { + "epoch": 1.15, + "learning_rate": 1.412699616722096e-05, + "loss": 0.1299, + "step": 1591 + }, + { + "epoch": 1.15, + "learning_rate": 1.4119878336208129e-05, + "loss": 0.1293, + "step": 1592 + }, + { + "epoch": 1.15, + "learning_rate": 1.4112757990322816e-05, + "loss": 0.1305, + "step": 1593 + }, + { + "epoch": 1.15, + "learning_rate": 1.4105635133911459e-05, + "loss": 0.1224, + "step": 1594 + }, + { + "epoch": 1.15, + "learning_rate": 1.4098509771322015e-05, + "loss": 0.1341, + "step": 1595 + }, + { + "epoch": 1.15, + "learning_rate": 1.4091381906903975e-05, + "loss": 0.1253, + "step": 1596 + }, + { + "epoch": 1.16, + "learning_rate": 1.4084251545008363e-05, + "loss": 0.128, + "step": 1597 + }, + { + "epoch": 1.16, + "learning_rate": 1.4077118689987716e-05, + "loss": 0.1286, + "step": 1598 + }, + { + "epoch": 1.16, + "learning_rate": 1.4069983346196105e-05, + "loss": 0.1267, + "step": 1599 + }, + { + "epoch": 1.16, + "learning_rate": 1.4062845517989114e-05, + "loss": 0.1136, + "step": 1600 + }, + { + "epoch": 1.16, + "learning_rate": 1.4055705209723845e-05, + "loss": 0.1323, + "step": 1601 + }, + { + "epoch": 1.16, + "learning_rate": 1.404856242575891e-05, + "loss": 0.1301, + "step": 1602 + }, + { + "epoch": 1.16, + "learning_rate": 1.4041417170454436e-05, + "loss": 0.117, + "step": 1603 + }, + { + "epoch": 1.16, + "learning_rate": 1.4034269448172062e-05, + "loss": 0.1331, + "step": 1604 + }, + { + "epoch": 1.16, + "learning_rate": 1.4027119263274927e-05, + "loss": 0.121, + "step": 1605 + }, + { + "epoch": 1.16, + "learning_rate": 1.4019966620127674e-05, + "loss": 0.1274, + "step": 1606 + }, + { + "epoch": 1.16, + "learning_rate": 1.4012811523096448e-05, + "loss": 0.1238, + "step": 1607 + }, + { + "epoch": 1.16, + "learning_rate": 1.4005653976548895e-05, + "loss": 0.1267, + "step": 1608 + }, + { + "epoch": 1.16, + "learning_rate": 1.3998493984854146e-05, + "loss": 0.1238, + "step": 1609 + }, + { + "epoch": 1.16, + "learning_rate": 1.3991331552382839e-05, + "loss": 0.1338, + "step": 1610 + }, + { + "epoch": 1.17, + "learning_rate": 1.398416668350709e-05, + "loss": 0.1286, + "step": 1611 + }, + { + "epoch": 1.17, + "learning_rate": 1.3976999382600509e-05, + "loss": 0.1224, + "step": 1612 + }, + { + "epoch": 1.17, + "learning_rate": 1.3969829654038187e-05, + "loss": 0.1285, + "step": 1613 + }, + { + "epoch": 1.17, + "learning_rate": 1.3962657502196697e-05, + "loss": 0.1218, + "step": 1614 + }, + { + "epoch": 1.17, + "learning_rate": 1.3955482931454095e-05, + "loss": 0.1248, + "step": 1615 + }, + { + "epoch": 1.17, + "learning_rate": 1.3948305946189909e-05, + "loss": 0.1214, + "step": 1616 + }, + { + "epoch": 1.17, + "learning_rate": 1.3941126550785145e-05, + "loss": 0.1153, + "step": 1617 + }, + { + "epoch": 1.17, + "learning_rate": 1.3933944749622275e-05, + "loss": 0.1327, + "step": 1618 + }, + { + "epoch": 1.17, + "learning_rate": 1.3926760547085242e-05, + "loss": 0.1146, + "step": 1619 + }, + { + "epoch": 1.17, + "learning_rate": 1.3919573947559462e-05, + "loss": 0.1389, + "step": 1620 + }, + { + "epoch": 1.17, + "learning_rate": 1.3912384955431801e-05, + "loss": 0.1218, + "step": 1621 + }, + { + "epoch": 1.17, + "learning_rate": 1.3905193575090595e-05, + "loss": 0.122, + "step": 1622 + }, + { + "epoch": 1.17, + "learning_rate": 1.3897999810925634e-05, + "loss": 0.1331, + "step": 1623 + }, + { + "epoch": 1.17, + "learning_rate": 1.3890803667328162e-05, + "loss": 0.1355, + "step": 1624 + }, + { + "epoch": 1.18, + "learning_rate": 1.3883605148690883e-05, + "loss": 0.1373, + "step": 1625 + }, + { + "epoch": 1.18, + "learning_rate": 1.387640425940794e-05, + "loss": 0.1193, + "step": 1626 + }, + { + "epoch": 1.18, + "learning_rate": 1.386920100387493e-05, + "loss": 0.1274, + "step": 1627 + }, + { + "epoch": 1.18, + "learning_rate": 1.3861995386488893e-05, + "loss": 0.1229, + "step": 1628 + }, + { + "epoch": 1.18, + "learning_rate": 1.3854787411648311e-05, + "loss": 0.1358, + "step": 1629 + }, + { + "epoch": 1.18, + "learning_rate": 1.3847577083753105e-05, + "loss": 0.125, + "step": 1630 + }, + { + "epoch": 1.18, + "learning_rate": 1.3840364407204634e-05, + "loss": 0.1231, + "step": 1631 + }, + { + "epoch": 1.18, + "learning_rate": 1.383314938640568e-05, + "loss": 0.1211, + "step": 1632 + }, + { + "epoch": 1.18, + "learning_rate": 1.3825932025760476e-05, + "loss": 0.1278, + "step": 1633 + }, + { + "epoch": 1.18, + "learning_rate": 1.3818712329674664e-05, + "loss": 0.1304, + "step": 1634 + }, + { + "epoch": 1.18, + "learning_rate": 1.3811490302555321e-05, + "loss": 0.1271, + "step": 1635 + }, + { + "epoch": 1.18, + "learning_rate": 1.3804265948810951e-05, + "loss": 0.1216, + "step": 1636 + }, + { + "epoch": 1.18, + "learning_rate": 1.3797039272851464e-05, + "loss": 0.1232, + "step": 1637 + }, + { + "epoch": 1.18, + "learning_rate": 1.3789810279088203e-05, + "loss": 0.1342, + "step": 1638 + }, + { + "epoch": 1.19, + "learning_rate": 1.3782578971933912e-05, + "loss": 0.1301, + "step": 1639 + }, + { + "epoch": 1.19, + "learning_rate": 1.3775345355802766e-05, + "loss": 0.1302, + "step": 1640 + }, + { + "epoch": 1.19, + "learning_rate": 1.3768109435110324e-05, + "loss": 0.1238, + "step": 1641 + }, + { + "epoch": 1.19, + "learning_rate": 1.3760871214273575e-05, + "loss": 0.1312, + "step": 1642 + }, + { + "epoch": 1.19, + "learning_rate": 1.3753630697710895e-05, + "loss": 0.1304, + "step": 1643 + }, + { + "epoch": 1.19, + "learning_rate": 1.3746387889842076e-05, + "loss": 0.1368, + "step": 1644 + }, + { + "epoch": 1.19, + "learning_rate": 1.3739142795088298e-05, + "loss": 0.1344, + "step": 1645 + }, + { + "epoch": 1.19, + "learning_rate": 1.3731895417872144e-05, + "loss": 0.1273, + "step": 1646 + }, + { + "epoch": 1.19, + "learning_rate": 1.3724645762617578e-05, + "loss": 0.1386, + "step": 1647 + }, + { + "epoch": 1.19, + "learning_rate": 1.3717393833749974e-05, + "loss": 0.1255, + "step": 1648 + }, + { + "epoch": 1.19, + "learning_rate": 1.3710139635696077e-05, + "loss": 0.1339, + "step": 1649 + }, + { + "epoch": 1.19, + "learning_rate": 1.3702883172884023e-05, + "loss": 0.1281, + "step": 1650 + }, + { + "epoch": 1.19, + "learning_rate": 1.3695624449743335e-05, + "loss": 0.1258, + "step": 1651 + }, + { + "epoch": 1.19, + "learning_rate": 1.3688363470704906e-05, + "loss": 0.1289, + "step": 1652 + }, + { + "epoch": 1.2, + "learning_rate": 1.3681100240201017e-05, + "loss": 0.1248, + "step": 1653 + }, + { + "epoch": 1.2, + "learning_rate": 1.3673834762665314e-05, + "loss": 0.1209, + "step": 1654 + }, + { + "epoch": 1.2, + "learning_rate": 1.3666567042532823e-05, + "loss": 0.1284, + "step": 1655 + }, + { + "epoch": 1.2, + "learning_rate": 1.3659297084239929e-05, + "loss": 0.1248, + "step": 1656 + }, + { + "epoch": 1.2, + "learning_rate": 1.3652024892224394e-05, + "loss": 0.1384, + "step": 1657 + }, + { + "epoch": 1.2, + "learning_rate": 1.3644750470925331e-05, + "loss": 0.132, + "step": 1658 + }, + { + "epoch": 1.2, + "learning_rate": 1.3637473824783229e-05, + "loss": 0.123, + "step": 1659 + }, + { + "epoch": 1.2, + "learning_rate": 1.3630194958239926e-05, + "loss": 0.1283, + "step": 1660 + }, + { + "epoch": 1.2, + "learning_rate": 1.3622913875738613e-05, + "loss": 0.1227, + "step": 1661 + }, + { + "epoch": 1.2, + "learning_rate": 1.361563058172384e-05, + "loss": 0.1223, + "step": 1662 + }, + { + "epoch": 1.2, + "learning_rate": 1.3608345080641504e-05, + "loss": 0.1282, + "step": 1663 + }, + { + "epoch": 1.2, + "learning_rate": 1.3601057376938847e-05, + "loss": 0.1373, + "step": 1664 + }, + { + "epoch": 1.2, + "learning_rate": 1.359376747506446e-05, + "loss": 0.123, + "step": 1665 + }, + { + "epoch": 1.21, + "learning_rate": 1.3586475379468271e-05, + "loss": 0.131, + "step": 1666 + }, + { + "epoch": 1.21, + "learning_rate": 1.3579181094601557e-05, + "loss": 0.1178, + "step": 1667 + }, + { + "epoch": 1.21, + "learning_rate": 1.3571884624916918e-05, + "loss": 0.1258, + "step": 1668 + }, + { + "epoch": 1.21, + "learning_rate": 1.3564585974868296e-05, + "loss": 0.1266, + "step": 1669 + }, + { + "epoch": 1.21, + "learning_rate": 1.355728514891096e-05, + "loss": 0.1239, + "step": 1670 + }, + { + "epoch": 1.21, + "learning_rate": 1.3549982151501512e-05, + "loss": 0.1283, + "step": 1671 + }, + { + "epoch": 1.21, + "learning_rate": 1.3542676987097878e-05, + "loss": 0.1244, + "step": 1672 + }, + { + "epoch": 1.21, + "learning_rate": 1.3535369660159301e-05, + "loss": 0.1269, + "step": 1673 + }, + { + "epoch": 1.21, + "learning_rate": 1.352806017514635e-05, + "loss": 0.138, + "step": 1674 + }, + { + "epoch": 1.21, + "learning_rate": 1.3520748536520908e-05, + "loss": 0.1322, + "step": 1675 + }, + { + "epoch": 1.21, + "learning_rate": 1.3513434748746181e-05, + "loss": 0.1256, + "step": 1676 + }, + { + "epoch": 1.21, + "learning_rate": 1.3506118816286675e-05, + "loss": 0.1279, + "step": 1677 + }, + { + "epoch": 1.21, + "learning_rate": 1.3498800743608212e-05, + "loss": 0.1225, + "step": 1678 + }, + { + "epoch": 1.21, + "learning_rate": 1.3491480535177916e-05, + "loss": 0.1201, + "step": 1679 + }, + { + "epoch": 1.22, + "learning_rate": 1.3484158195464224e-05, + "loss": 0.1252, + "step": 1680 + }, + { + "epoch": 1.22, + "learning_rate": 1.3476833728936862e-05, + "loss": 0.1364, + "step": 1681 + }, + { + "epoch": 1.22, + "learning_rate": 1.3469507140066862e-05, + "loss": 0.1219, + "step": 1682 + }, + { + "epoch": 1.22, + "learning_rate": 1.3462178433326548e-05, + "loss": 0.1316, + "step": 1683 + }, + { + "epoch": 1.22, + "learning_rate": 1.3454847613189542e-05, + "loss": 0.1322, + "step": 1684 + }, + { + "epoch": 1.22, + "learning_rate": 1.3447514684130747e-05, + "loss": 0.1212, + "step": 1685 + }, + { + "epoch": 1.22, + "learning_rate": 1.3440179650626364e-05, + "loss": 0.1233, + "step": 1686 + }, + { + "epoch": 1.22, + "learning_rate": 1.3432842517153868e-05, + "loss": 0.1255, + "step": 1687 + }, + { + "epoch": 1.22, + "learning_rate": 1.3425503288192024e-05, + "loss": 0.1245, + "step": 1688 + }, + { + "epoch": 1.22, + "learning_rate": 1.3418161968220873e-05, + "loss": 0.1183, + "step": 1689 + }, + { + "epoch": 1.22, + "learning_rate": 1.3410818561721735e-05, + "loss": 0.1323, + "step": 1690 + }, + { + "epoch": 1.22, + "learning_rate": 1.3403473073177198e-05, + "loss": 0.1257, + "step": 1691 + }, + { + "epoch": 1.22, + "learning_rate": 1.3396125507071128e-05, + "loss": 0.1289, + "step": 1692 + }, + { + "epoch": 1.22, + "learning_rate": 1.3388775867888648e-05, + "loss": 0.1433, + "step": 1693 + }, + { + "epoch": 1.23, + "learning_rate": 1.338142416011616e-05, + "loss": 0.1323, + "step": 1694 + }, + { + "epoch": 1.23, + "learning_rate": 1.3374070388241325e-05, + "loss": 0.1295, + "step": 1695 + }, + { + "epoch": 1.23, + "learning_rate": 1.3366714556753056e-05, + "loss": 0.1259, + "step": 1696 + }, + { + "epoch": 1.23, + "learning_rate": 1.3359356670141528e-05, + "loss": 0.1168, + "step": 1697 + }, + { + "epoch": 1.23, + "learning_rate": 1.3351996732898176e-05, + "loss": 0.131, + "step": 1698 + }, + { + "epoch": 1.23, + "learning_rate": 1.334463474951568e-05, + "loss": 0.1364, + "step": 1699 + }, + { + "epoch": 1.23, + "learning_rate": 1.333727072448797e-05, + "loss": 0.1332, + "step": 1700 + }, + { + "epoch": 1.23, + "learning_rate": 1.3329904662310227e-05, + "loss": 0.1272, + "step": 1701 + }, + { + "epoch": 1.23, + "learning_rate": 1.332253656747887e-05, + "loss": 0.1328, + "step": 1702 + }, + { + "epoch": 1.23, + "learning_rate": 1.3315166444491557e-05, + "loss": 0.1258, + "step": 1703 + }, + { + "epoch": 1.23, + "learning_rate": 1.3307794297847191e-05, + "loss": 0.1263, + "step": 1704 + }, + { + "epoch": 1.23, + "learning_rate": 1.330042013204591e-05, + "loss": 0.1223, + "step": 1705 + }, + { + "epoch": 1.23, + "learning_rate": 1.3293043951589077e-05, + "loss": 0.1261, + "step": 1706 + }, + { + "epoch": 1.23, + "learning_rate": 1.3285665760979292e-05, + "loss": 0.113, + "step": 1707 + }, + { + "epoch": 1.24, + "learning_rate": 1.3278285564720376e-05, + "loss": 0.1364, + "step": 1708 + }, + { + "epoch": 1.24, + "learning_rate": 1.3270903367317383e-05, + "loss": 0.126, + "step": 1709 + }, + { + "epoch": 1.24, + "learning_rate": 1.3263519173276578e-05, + "loss": 0.1352, + "step": 1710 + }, + { + "epoch": 1.24, + "learning_rate": 1.3256132987105452e-05, + "loss": 0.1213, + "step": 1711 + }, + { + "epoch": 1.24, + "learning_rate": 1.324874481331271e-05, + "loss": 0.1341, + "step": 1712 + }, + { + "epoch": 1.24, + "learning_rate": 1.3241354656408275e-05, + "loss": 0.1344, + "step": 1713 + }, + { + "epoch": 1.24, + "learning_rate": 1.3233962520903265e-05, + "loss": 0.1262, + "step": 1714 + }, + { + "epoch": 1.24, + "learning_rate": 1.3226568411310026e-05, + "loss": 0.1238, + "step": 1715 + }, + { + "epoch": 1.24, + "learning_rate": 1.3219172332142099e-05, + "loss": 0.1251, + "step": 1716 + }, + { + "epoch": 1.24, + "learning_rate": 1.3211774287914224e-05, + "loss": 0.1315, + "step": 1717 + }, + { + "epoch": 1.24, + "learning_rate": 1.3204374283142342e-05, + "loss": 0.1229, + "step": 1718 + }, + { + "epoch": 1.24, + "learning_rate": 1.3196972322343602e-05, + "loss": 0.1269, + "step": 1719 + }, + { + "epoch": 1.24, + "learning_rate": 1.3189568410036333e-05, + "loss": 0.1391, + "step": 1720 + }, + { + "epoch": 1.24, + "learning_rate": 1.3182162550740063e-05, + "loss": 0.1272, + "step": 1721 + }, + { + "epoch": 1.25, + "learning_rate": 1.3174754748975503e-05, + "loss": 0.1326, + "step": 1722 + }, + { + "epoch": 1.25, + "learning_rate": 1.3167345009264553e-05, + "loss": 0.1324, + "step": 1723 + }, + { + "epoch": 1.25, + "learning_rate": 1.31599333361303e-05, + "loss": 0.1365, + "step": 1724 + }, + { + "epoch": 1.25, + "learning_rate": 1.3152519734097003e-05, + "loss": 0.1279, + "step": 1725 + }, + { + "epoch": 1.25, + "learning_rate": 1.3145104207690102e-05, + "loss": 0.1253, + "step": 1726 + }, + { + "epoch": 1.25, + "learning_rate": 1.3137686761436212e-05, + "loss": 0.1244, + "step": 1727 + }, + { + "epoch": 1.25, + "learning_rate": 1.3130267399863124e-05, + "loss": 0.1302, + "step": 1728 + }, + { + "epoch": 1.25, + "learning_rate": 1.312284612749979e-05, + "loss": 0.1334, + "step": 1729 + }, + { + "epoch": 1.25, + "learning_rate": 1.3115422948876333e-05, + "loss": 0.1231, + "step": 1730 + }, + { + "epoch": 1.25, + "learning_rate": 1.3107997868524042e-05, + "loss": 0.1301, + "step": 1731 + }, + { + "epoch": 1.25, + "learning_rate": 1.3100570890975359e-05, + "loss": 0.1255, + "step": 1732 + }, + { + "epoch": 1.25, + "learning_rate": 1.3093142020763895e-05, + "loss": 0.1301, + "step": 1733 + }, + { + "epoch": 1.25, + "learning_rate": 1.3085711262424406e-05, + "loss": 0.1307, + "step": 1734 + }, + { + "epoch": 1.25, + "learning_rate": 1.3078278620492806e-05, + "loss": 0.1203, + "step": 1735 + }, + { + "epoch": 1.26, + "learning_rate": 1.3070844099506158e-05, + "loss": 0.1263, + "step": 1736 + }, + { + "epoch": 1.26, + "learning_rate": 1.3063407704002676e-05, + "loss": 0.1276, + "step": 1737 + }, + { + "epoch": 1.26, + "learning_rate": 1.3055969438521703e-05, + "loss": 0.1266, + "step": 1738 + }, + { + "epoch": 1.26, + "learning_rate": 1.3048529307603745e-05, + "loss": 0.1305, + "step": 1739 + }, + { + "epoch": 1.26, + "learning_rate": 1.3041087315790434e-05, + "loss": 0.1279, + "step": 1740 + }, + { + "epoch": 1.26, + "learning_rate": 1.3033643467624541e-05, + "loss": 0.1324, + "step": 1741 + }, + { + "epoch": 1.26, + "learning_rate": 1.3026197767649964e-05, + "loss": 0.1342, + "step": 1742 + }, + { + "epoch": 1.26, + "learning_rate": 1.3018750220411742e-05, + "loss": 0.1371, + "step": 1743 + }, + { + "epoch": 1.26, + "learning_rate": 1.3011300830456035e-05, + "loss": 0.1425, + "step": 1744 + }, + { + "epoch": 1.26, + "learning_rate": 1.3003849602330126e-05, + "loss": 0.1186, + "step": 1745 + }, + { + "epoch": 1.26, + "learning_rate": 1.2996396540582428e-05, + "loss": 0.131, + "step": 1746 + }, + { + "epoch": 1.26, + "learning_rate": 1.2988941649762463e-05, + "loss": 0.1187, + "step": 1747 + }, + { + "epoch": 1.26, + "learning_rate": 1.2981484934420881e-05, + "loss": 0.1215, + "step": 1748 + }, + { + "epoch": 1.27, + "learning_rate": 1.2974026399109437e-05, + "loss": 0.1179, + "step": 1749 + }, + { + "epoch": 1.27, + "learning_rate": 1.2966566048380998e-05, + "loss": 0.1334, + "step": 1750 + }, + { + "epoch": 1.27, + "learning_rate": 1.2959103886789546e-05, + "loss": 0.1275, + "step": 1751 + }, + { + "epoch": 1.27, + "learning_rate": 1.2951639918890157e-05, + "loss": 0.1194, + "step": 1752 + }, + { + "epoch": 1.27, + "learning_rate": 1.2944174149239022e-05, + "loss": 0.134, + "step": 1753 + }, + { + "epoch": 1.27, + "learning_rate": 1.293670658239342e-05, + "loss": 0.1299, + "step": 1754 + }, + { + "epoch": 1.27, + "learning_rate": 1.2929237222911737e-05, + "loss": 0.1313, + "step": 1755 + }, + { + "epoch": 1.27, + "learning_rate": 1.2921766075353446e-05, + "loss": 0.1425, + "step": 1756 + }, + { + "epoch": 1.27, + "learning_rate": 1.2914293144279117e-05, + "loss": 0.1346, + "step": 1757 + }, + { + "epoch": 1.27, + "learning_rate": 1.2906818434250403e-05, + "loss": 0.1229, + "step": 1758 + }, + { + "epoch": 1.27, + "learning_rate": 1.289934194983005e-05, + "loss": 0.1207, + "step": 1759 + }, + { + "epoch": 1.27, + "learning_rate": 1.2891863695581884e-05, + "loss": 0.1321, + "step": 1760 + }, + { + "epoch": 1.27, + "learning_rate": 1.2884383676070806e-05, + "loss": 0.1211, + "step": 1761 + }, + { + "epoch": 1.27, + "learning_rate": 1.2876901895862799e-05, + "loss": 0.1286, + "step": 1762 + }, + { + "epoch": 1.28, + "learning_rate": 1.2869418359524929e-05, + "loss": 0.1266, + "step": 1763 + }, + { + "epoch": 1.28, + "learning_rate": 1.2861933071625314e-05, + "loss": 0.1307, + "step": 1764 + }, + { + "epoch": 1.28, + "learning_rate": 1.2854446036733164e-05, + "loss": 0.1227, + "step": 1765 + }, + { + "epoch": 1.28, + "learning_rate": 1.284695725941874e-05, + "loss": 0.1284, + "step": 1766 + }, + { + "epoch": 1.28, + "learning_rate": 1.2839466744253368e-05, + "loss": 0.1237, + "step": 1767 + }, + { + "epoch": 1.28, + "learning_rate": 1.2831974495809445e-05, + "loss": 0.1187, + "step": 1768 + }, + { + "epoch": 1.28, + "learning_rate": 1.2824480518660414e-05, + "loss": 0.1285, + "step": 1769 + }, + { + "epoch": 1.28, + "learning_rate": 1.2816984817380779e-05, + "loss": 0.131, + "step": 1770 + }, + { + "epoch": 1.28, + "learning_rate": 1.2809487396546097e-05, + "loss": 0.1247, + "step": 1771 + }, + { + "epoch": 1.28, + "learning_rate": 1.280198826073297e-05, + "loss": 0.143, + "step": 1772 + }, + { + "epoch": 1.28, + "learning_rate": 1.2794487414519053e-05, + "loss": 0.1169, + "step": 1773 + }, + { + "epoch": 1.28, + "learning_rate": 1.2786984862483043e-05, + "loss": 0.1248, + "step": 1774 + }, + { + "epoch": 1.28, + "learning_rate": 1.2779480609204677e-05, + "loss": 0.1265, + "step": 1775 + }, + { + "epoch": 1.28, + "learning_rate": 1.2771974659264731e-05, + "loss": 0.1242, + "step": 1776 + }, + { + "epoch": 1.29, + "learning_rate": 1.2764467017245014e-05, + "loss": 0.1266, + "step": 1777 + }, + { + "epoch": 1.29, + "learning_rate": 1.2756957687728377e-05, + "loss": 0.1365, + "step": 1778 + }, + { + "epoch": 1.29, + "learning_rate": 1.2749446675298689e-05, + "loss": 0.1198, + "step": 1779 + }, + { + "epoch": 1.29, + "learning_rate": 1.2741933984540853e-05, + "loss": 0.1234, + "step": 1780 + }, + { + "epoch": 1.29, + "learning_rate": 1.2734419620040797e-05, + "loss": 0.1242, + "step": 1781 + }, + { + "epoch": 1.29, + "learning_rate": 1.2726903586385468e-05, + "loss": 0.1235, + "step": 1782 + }, + { + "epoch": 1.29, + "learning_rate": 1.2719385888162836e-05, + "loss": 0.1267, + "step": 1783 + }, + { + "epoch": 1.29, + "learning_rate": 1.271186652996188e-05, + "loss": 0.1264, + "step": 1784 + }, + { + "epoch": 1.29, + "learning_rate": 1.2704345516372599e-05, + "loss": 0.118, + "step": 1785 + }, + { + "epoch": 1.29, + "learning_rate": 1.2696822851986e-05, + "loss": 0.1252, + "step": 1786 + }, + { + "epoch": 1.29, + "learning_rate": 1.2689298541394093e-05, + "loss": 0.129, + "step": 1787 + }, + { + "epoch": 1.29, + "learning_rate": 1.2681772589189904e-05, + "loss": 0.1224, + "step": 1788 + }, + { + "epoch": 1.29, + "learning_rate": 1.2674244999967451e-05, + "loss": 0.1216, + "step": 1789 + }, + { + "epoch": 1.29, + "learning_rate": 1.2666715778321756e-05, + "loss": 0.1301, + "step": 1790 + }, + { + "epoch": 1.3, + "learning_rate": 1.2659184928848836e-05, + "loss": 0.1197, + "step": 1791 + }, + { + "epoch": 1.3, + "learning_rate": 1.2651652456145702e-05, + "loss": 0.1337, + "step": 1792 + }, + { + "epoch": 1.3, + "learning_rate": 1.2644118364810355e-05, + "loss": 0.1198, + "step": 1793 + }, + { + "epoch": 1.3, + "learning_rate": 1.2636582659441788e-05, + "loss": 0.1242, + "step": 1794 + }, + { + "epoch": 1.3, + "learning_rate": 1.2629045344639975e-05, + "loss": 0.1259, + "step": 1795 + }, + { + "epoch": 1.3, + "learning_rate": 1.2621506425005872e-05, + "loss": 0.132, + "step": 1796 + }, + { + "epoch": 1.3, + "learning_rate": 1.2613965905141414e-05, + "loss": 0.1145, + "step": 1797 + }, + { + "epoch": 1.3, + "learning_rate": 1.2606423789649523e-05, + "loss": 0.1168, + "step": 1798 + }, + { + "epoch": 1.3, + "learning_rate": 1.2598880083134077e-05, + "loss": 0.1207, + "step": 1799 + }, + { + "epoch": 1.3, + "learning_rate": 1.2591334790199942e-05, + "loss": 0.1235, + "step": 1800 + }, + { + "epoch": 1.3, + "learning_rate": 1.2583787915452947e-05, + "loss": 0.1271, + "step": 1801 + }, + { + "epoch": 1.3, + "learning_rate": 1.257623946349988e-05, + "loss": 0.126, + "step": 1802 + }, + { + "epoch": 1.3, + "learning_rate": 1.2568689438948498e-05, + "loss": 0.1263, + "step": 1803 + }, + { + "epoch": 1.3, + "learning_rate": 1.2561137846407519e-05, + "loss": 0.1356, + "step": 1804 + }, + { + "epoch": 1.31, + "learning_rate": 1.2553584690486616e-05, + "loss": 0.124, + "step": 1805 + }, + { + "epoch": 1.31, + "learning_rate": 1.2546029975796412e-05, + "loss": 0.1336, + "step": 1806 + }, + { + "epoch": 1.31, + "learning_rate": 1.2538473706948494e-05, + "loss": 0.1305, + "step": 1807 + }, + { + "epoch": 1.31, + "learning_rate": 1.2530915888555381e-05, + "loss": 0.1343, + "step": 1808 + }, + { + "epoch": 1.31, + "learning_rate": 1.2523356525230554e-05, + "loss": 0.1231, + "step": 1809 + }, + { + "epoch": 1.31, + "learning_rate": 1.2515795621588428e-05, + "loss": 0.1306, + "step": 1810 + }, + { + "epoch": 1.31, + "learning_rate": 1.250823318224435e-05, + "loss": 0.1208, + "step": 1811 + }, + { + "epoch": 1.31, + "learning_rate": 1.2500669211814626e-05, + "loss": 0.1335, + "step": 1812 + }, + { + "epoch": 1.31, + "learning_rate": 1.2493103714916484e-05, + "loss": 0.1296, + "step": 1813 + }, + { + "epoch": 1.31, + "learning_rate": 1.2485536696168081e-05, + "loss": 0.1242, + "step": 1814 + }, + { + "epoch": 1.31, + "learning_rate": 1.2477968160188505e-05, + "loss": 0.1305, + "step": 1815 + }, + { + "epoch": 1.31, + "learning_rate": 1.2470398111597776e-05, + "loss": 0.119, + "step": 1816 + }, + { + "epoch": 1.31, + "learning_rate": 1.246282655501683e-05, + "loss": 0.1295, + "step": 1817 + }, + { + "epoch": 1.32, + "learning_rate": 1.2455253495067528e-05, + "loss": 0.131, + "step": 1818 + }, + { + "epoch": 1.32, + "learning_rate": 1.2447678936372647e-05, + "loss": 0.1289, + "step": 1819 + }, + { + "epoch": 1.32, + "learning_rate": 1.2440102883555878e-05, + "loss": 0.1264, + "step": 1820 + }, + { + "epoch": 1.32, + "learning_rate": 1.2432525341241827e-05, + "loss": 0.1216, + "step": 1821 + }, + { + "epoch": 1.32, + "learning_rate": 1.2424946314056008e-05, + "loss": 0.1386, + "step": 1822 + }, + { + "epoch": 1.32, + "learning_rate": 1.2417365806624839e-05, + "loss": 0.122, + "step": 1823 + }, + { + "epoch": 1.32, + "learning_rate": 1.2409783823575645e-05, + "loss": 0.1245, + "step": 1824 + }, + { + "epoch": 1.32, + "learning_rate": 1.2402200369536647e-05, + "loss": 0.1362, + "step": 1825 + }, + { + "epoch": 1.32, + "learning_rate": 1.2394615449136971e-05, + "loss": 0.1279, + "step": 1826 + }, + { + "epoch": 1.32, + "learning_rate": 1.2387029067006633e-05, + "loss": 0.1259, + "step": 1827 + }, + { + "epoch": 1.32, + "learning_rate": 1.237944122777654e-05, + "loss": 0.1333, + "step": 1828 + }, + { + "epoch": 1.32, + "learning_rate": 1.2371851936078496e-05, + "loss": 0.1264, + "step": 1829 + }, + { + "epoch": 1.32, + "learning_rate": 1.2364261196545182e-05, + "loss": 0.128, + "step": 1830 + }, + { + "epoch": 1.32, + "learning_rate": 1.235666901381017e-05, + "loss": 0.1322, + "step": 1831 + }, + { + "epoch": 1.33, + "learning_rate": 1.2349075392507907e-05, + "loss": 0.1308, + "step": 1832 + }, + { + "epoch": 1.33, + "learning_rate": 1.2341480337273724e-05, + "loss": 0.1297, + "step": 1833 + }, + { + "epoch": 1.33, + "learning_rate": 1.2333883852743824e-05, + "loss": 0.1252, + "step": 1834 + }, + { + "epoch": 1.33, + "learning_rate": 1.2326285943555282e-05, + "loss": 0.1286, + "step": 1835 + }, + { + "epoch": 1.33, + "learning_rate": 1.2318686614346049e-05, + "loss": 0.1321, + "step": 1836 + }, + { + "epoch": 1.33, + "learning_rate": 1.2311085869754933e-05, + "loss": 0.1298, + "step": 1837 + }, + { + "epoch": 1.33, + "learning_rate": 1.2303483714421613e-05, + "loss": 0.1162, + "step": 1838 + }, + { + "epoch": 1.33, + "learning_rate": 1.2295880152986623e-05, + "loss": 0.1254, + "step": 1839 + }, + { + "epoch": 1.33, + "learning_rate": 1.2288275190091363e-05, + "loss": 0.1245, + "step": 1840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2280668830378084e-05, + "loss": 0.1224, + "step": 1841 + }, + { + "epoch": 1.33, + "learning_rate": 1.2273061078489889e-05, + "loss": 0.1355, + "step": 1842 + }, + { + "epoch": 1.33, + "learning_rate": 1.2265451939070733e-05, + "loss": 0.1315, + "step": 1843 + }, + { + "epoch": 1.33, + "learning_rate": 1.2257841416765416e-05, + "loss": 0.1216, + "step": 1844 + }, + { + "epoch": 1.33, + "learning_rate": 1.2250229516219586e-05, + "loss": 0.1236, + "step": 1845 + }, + { + "epoch": 1.34, + "learning_rate": 1.2242616242079723e-05, + "loss": 0.1301, + "step": 1846 + }, + { + "epoch": 1.34, + "learning_rate": 1.2235001598993162e-05, + "loss": 0.1351, + "step": 1847 + }, + { + "epoch": 1.34, + "learning_rate": 1.2227385591608054e-05, + "loss": 0.1221, + "step": 1848 + }, + { + "epoch": 1.34, + "learning_rate": 1.22197682245734e-05, + "loss": 0.1313, + "step": 1849 + }, + { + "epoch": 1.34, + "learning_rate": 1.2212149502539017e-05, + "loss": 0.1382, + "step": 1850 + }, + { + "epoch": 1.34, + "learning_rate": 1.2204529430155561e-05, + "loss": 0.1265, + "step": 1851 + }, + { + "epoch": 1.34, + "learning_rate": 1.2196908012074502e-05, + "loss": 0.1351, + "step": 1852 + }, + { + "epoch": 1.34, + "learning_rate": 1.218928525294814e-05, + "loss": 0.1335, + "step": 1853 + }, + { + "epoch": 1.34, + "learning_rate": 1.218166115742959e-05, + "loss": 0.1237, + "step": 1854 + }, + { + "epoch": 1.34, + "learning_rate": 1.217403573017278e-05, + "loss": 0.1335, + "step": 1855 + }, + { + "epoch": 1.34, + "learning_rate": 1.2166408975832453e-05, + "loss": 0.1332, + "step": 1856 + }, + { + "epoch": 1.34, + "learning_rate": 1.2158780899064167e-05, + "loss": 0.1359, + "step": 1857 + }, + { + "epoch": 1.34, + "learning_rate": 1.215115150452428e-05, + "loss": 0.1205, + "step": 1858 + }, + { + "epoch": 1.34, + "learning_rate": 1.2143520796869957e-05, + "loss": 0.1262, + "step": 1859 + }, + { + "epoch": 1.35, + "learning_rate": 1.2135888780759164e-05, + "loss": 0.1313, + "step": 1860 + }, + { + "epoch": 1.35, + "learning_rate": 1.212825546085067e-05, + "loss": 0.1281, + "step": 1861 + }, + { + "epoch": 1.35, + "learning_rate": 1.2120620841804032e-05, + "loss": 0.1164, + "step": 1862 + }, + { + "epoch": 1.35, + "learning_rate": 1.211298492827961e-05, + "loss": 0.1247, + "step": 1863 + }, + { + "epoch": 1.35, + "learning_rate": 1.2105347724938543e-05, + "loss": 0.1364, + "step": 1864 + }, + { + "epoch": 1.35, + "learning_rate": 1.2097709236442763e-05, + "loss": 0.1375, + "step": 1865 + }, + { + "epoch": 1.35, + "learning_rate": 1.209006946745499e-05, + "loss": 0.1278, + "step": 1866 + }, + { + "epoch": 1.35, + "learning_rate": 1.2082428422638724e-05, + "loss": 0.1255, + "step": 1867 + }, + { + "epoch": 1.35, + "learning_rate": 1.2074786106658238e-05, + "loss": 0.1158, + "step": 1868 + }, + { + "epoch": 1.35, + "learning_rate": 1.2067142524178584e-05, + "loss": 0.132, + "step": 1869 + }, + { + "epoch": 1.35, + "learning_rate": 1.205949767986559e-05, + "loss": 0.135, + "step": 1870 + }, + { + "epoch": 1.35, + "learning_rate": 1.2051851578385853e-05, + "loss": 0.1301, + "step": 1871 + }, + { + "epoch": 1.35, + "learning_rate": 1.2044204224406734e-05, + "loss": 0.1286, + "step": 1872 + }, + { + "epoch": 1.35, + "learning_rate": 1.2036555622596365e-05, + "loss": 0.1284, + "step": 1873 + }, + { + "epoch": 1.36, + "learning_rate": 1.202890577762363e-05, + "loss": 0.121, + "step": 1874 + }, + { + "epoch": 1.36, + "learning_rate": 1.2021254694158185e-05, + "loss": 0.1304, + "step": 1875 + }, + { + "epoch": 1.36, + "learning_rate": 1.2013602376870429e-05, + "loss": 0.1348, + "step": 1876 + }, + { + "epoch": 1.36, + "learning_rate": 1.200594883043152e-05, + "loss": 0.1358, + "step": 1877 + }, + { + "epoch": 1.36, + "learning_rate": 1.1998294059513369e-05, + "loss": 0.123, + "step": 1878 + }, + { + "epoch": 1.36, + "learning_rate": 1.1990638068788627e-05, + "loss": 0.1328, + "step": 1879 + }, + { + "epoch": 1.36, + "learning_rate": 1.1982980862930703e-05, + "loss": 0.1331, + "step": 1880 + }, + { + "epoch": 1.36, + "learning_rate": 1.1975322446613731e-05, + "loss": 0.1311, + "step": 1881 + }, + { + "epoch": 1.36, + "learning_rate": 1.1967662824512596e-05, + "loss": 0.1315, + "step": 1882 + }, + { + "epoch": 1.36, + "learning_rate": 1.1960002001302912e-05, + "loss": 0.1399, + "step": 1883 + }, + { + "epoch": 1.36, + "learning_rate": 1.195233998166103e-05, + "loss": 0.1361, + "step": 1884 + }, + { + "epoch": 1.36, + "learning_rate": 1.1944676770264034e-05, + "loss": 0.1239, + "step": 1885 + }, + { + "epoch": 1.36, + "learning_rate": 1.1937012371789727e-05, + "loss": 0.1344, + "step": 1886 + }, + { + "epoch": 1.36, + "learning_rate": 1.1929346790916645e-05, + "loss": 0.1247, + "step": 1887 + }, + { + "epoch": 1.37, + "learning_rate": 1.192168003232404e-05, + "loss": 0.1275, + "step": 1888 + }, + { + "epoch": 1.37, + "learning_rate": 1.1914012100691885e-05, + "loss": 0.1284, + "step": 1889 + }, + { + "epoch": 1.37, + "learning_rate": 1.1906343000700871e-05, + "loss": 0.1271, + "step": 1890 + }, + { + "epoch": 1.37, + "learning_rate": 1.1898672737032399e-05, + "loss": 0.1289, + "step": 1891 + }, + { + "epoch": 1.37, + "learning_rate": 1.1891001314368583e-05, + "loss": 0.1244, + "step": 1892 + }, + { + "epoch": 1.37, + "learning_rate": 1.1883328737392244e-05, + "loss": 0.1273, + "step": 1893 + }, + { + "epoch": 1.37, + "learning_rate": 1.1875655010786901e-05, + "loss": 0.1284, + "step": 1894 + }, + { + "epoch": 1.37, + "learning_rate": 1.186798013923679e-05, + "loss": 0.1274, + "step": 1895 + }, + { + "epoch": 1.37, + "learning_rate": 1.186030412742683e-05, + "loss": 0.1301, + "step": 1896 + }, + { + "epoch": 1.37, + "learning_rate": 1.1852626980042643e-05, + "loss": 0.1248, + "step": 1897 + }, + { + "epoch": 1.37, + "learning_rate": 1.1844948701770546e-05, + "loss": 0.1242, + "step": 1898 + }, + { + "epoch": 1.37, + "learning_rate": 1.1837269297297544e-05, + "loss": 0.1347, + "step": 1899 + }, + { + "epoch": 1.37, + "learning_rate": 1.1829588771311329e-05, + "loss": 0.1363, + "step": 1900 + }, + { + "epoch": 1.38, + "learning_rate": 1.1821907128500279e-05, + "loss": 0.1261, + "step": 1901 + }, + { + "epoch": 1.38, + "learning_rate": 1.181422437355345e-05, + "loss": 0.1208, + "step": 1902 + }, + { + "epoch": 1.38, + "learning_rate": 1.1806540511160581e-05, + "loss": 0.1255, + "step": 1903 + }, + { + "epoch": 1.38, + "learning_rate": 1.1798855546012085e-05, + "loss": 0.1292, + "step": 1904 + }, + { + "epoch": 1.38, + "learning_rate": 1.1791169482799053e-05, + "loss": 0.1277, + "step": 1905 + }, + { + "epoch": 1.38, + "learning_rate": 1.1783482326213236e-05, + "loss": 0.1212, + "step": 1906 + }, + { + "epoch": 1.38, + "learning_rate": 1.1775794080947065e-05, + "loss": 0.1245, + "step": 1907 + }, + { + "epoch": 1.38, + "learning_rate": 1.1768104751693625e-05, + "loss": 0.1267, + "step": 1908 + }, + { + "epoch": 1.38, + "learning_rate": 1.1760414343146667e-05, + "loss": 0.1386, + "step": 1909 + }, + { + "epoch": 1.38, + "learning_rate": 1.1752722860000602e-05, + "loss": 0.1264, + "step": 1910 + }, + { + "epoch": 1.38, + "learning_rate": 1.1745030306950495e-05, + "loss": 0.1415, + "step": 1911 + }, + { + "epoch": 1.38, + "learning_rate": 1.1737336688692068e-05, + "loss": 0.1342, + "step": 1912 + }, + { + "epoch": 1.38, + "learning_rate": 1.1729642009921685e-05, + "loss": 0.1277, + "step": 1913 + }, + { + "epoch": 1.38, + "learning_rate": 1.1721946275336365e-05, + "loss": 0.1357, + "step": 1914 + }, + { + "epoch": 1.39, + "learning_rate": 1.1714249489633766e-05, + "loss": 0.1283, + "step": 1915 + }, + { + "epoch": 1.39, + "learning_rate": 1.1706551657512195e-05, + "loss": 0.1298, + "step": 1916 + }, + { + "epoch": 1.39, + "learning_rate": 1.1698852783670591e-05, + "loss": 0.1211, + "step": 1917 + }, + { + "epoch": 1.39, + "learning_rate": 1.1691152872808529e-05, + "loss": 0.1315, + "step": 1918 + }, + { + "epoch": 1.39, + "learning_rate": 1.1683451929626224e-05, + "loss": 0.1179, + "step": 1919 + }, + { + "epoch": 1.39, + "learning_rate": 1.1675749958824511e-05, + "loss": 0.1302, + "step": 1920 + }, + { + "epoch": 1.39, + "learning_rate": 1.1668046965104863e-05, + "loss": 0.122, + "step": 1921 + }, + { + "epoch": 1.39, + "learning_rate": 1.1660342953169368e-05, + "loss": 0.1297, + "step": 1922 + }, + { + "epoch": 1.39, + "learning_rate": 1.1652637927720739e-05, + "loss": 0.1292, + "step": 1923 + }, + { + "epoch": 1.39, + "learning_rate": 1.164493189346231e-05, + "loss": 0.1324, + "step": 1924 + }, + { + "epoch": 1.39, + "learning_rate": 1.1637224855098029e-05, + "loss": 0.1289, + "step": 1925 + }, + { + "epoch": 1.39, + "learning_rate": 1.1629516817332458e-05, + "loss": 0.1353, + "step": 1926 + }, + { + "epoch": 1.39, + "learning_rate": 1.1621807784870767e-05, + "loss": 0.1184, + "step": 1927 + }, + { + "epoch": 1.39, + "learning_rate": 1.1614097762418733e-05, + "loss": 0.1402, + "step": 1928 + }, + { + "epoch": 1.4, + "learning_rate": 1.1606386754682739e-05, + "loss": 0.1216, + "step": 1929 + }, + { + "epoch": 1.4, + "learning_rate": 1.1598674766369768e-05, + "loss": 0.1296, + "step": 1930 + }, + { + "epoch": 1.4, + "learning_rate": 1.1590961802187406e-05, + "loss": 0.1337, + "step": 1931 + }, + { + "epoch": 1.4, + "learning_rate": 1.158324786684383e-05, + "loss": 0.1205, + "step": 1932 + }, + { + "epoch": 1.4, + "learning_rate": 1.1575532965047809e-05, + "loss": 0.123, + "step": 1933 + }, + { + "epoch": 1.4, + "learning_rate": 1.1567817101508702e-05, + "loss": 0.1336, + "step": 1934 + }, + { + "epoch": 1.4, + "learning_rate": 1.1560100280936464e-05, + "loss": 0.1227, + "step": 1935 + }, + { + "epoch": 1.4, + "learning_rate": 1.1552382508041623e-05, + "loss": 0.118, + "step": 1936 + }, + { + "epoch": 1.4, + "learning_rate": 1.154466378753529e-05, + "loss": 0.127, + "step": 1937 + }, + { + "epoch": 1.4, + "learning_rate": 1.153694412412916e-05, + "loss": 0.1329, + "step": 1938 + }, + { + "epoch": 1.4, + "learning_rate": 1.1529223522535502e-05, + "loss": 0.1305, + "step": 1939 + }, + { + "epoch": 1.4, + "learning_rate": 1.1521501987467153e-05, + "loss": 0.1196, + "step": 1940 + }, + { + "epoch": 1.4, + "learning_rate": 1.1513779523637527e-05, + "loss": 0.1294, + "step": 1941 + }, + { + "epoch": 1.4, + "learning_rate": 1.1506056135760597e-05, + "loss": 0.1238, + "step": 1942 + }, + { + "epoch": 1.41, + "learning_rate": 1.1498331828550906e-05, + "loss": 0.1277, + "step": 1943 + }, + { + "epoch": 1.41, + "learning_rate": 1.1490606606723559e-05, + "loss": 0.1288, + "step": 1944 + }, + { + "epoch": 1.41, + "learning_rate": 1.148288047499421e-05, + "loss": 0.1245, + "step": 1945 + }, + { + "epoch": 1.41, + "learning_rate": 1.1475153438079079e-05, + "loss": 0.1272, + "step": 1946 + }, + { + "epoch": 1.41, + "learning_rate": 1.1467425500694936e-05, + "loss": 0.1328, + "step": 1947 + }, + { + "epoch": 1.41, + "learning_rate": 1.1459696667559096e-05, + "loss": 0.1194, + "step": 1948 + }, + { + "epoch": 1.41, + "learning_rate": 1.1451966943389423e-05, + "loss": 0.136, + "step": 1949 + }, + { + "epoch": 1.41, + "learning_rate": 1.1444236332904331e-05, + "loss": 0.141, + "step": 1950 + }, + { + "epoch": 1.41, + "learning_rate": 1.1436504840822767e-05, + "loss": 0.1252, + "step": 1951 + }, + { + "epoch": 1.41, + "learning_rate": 1.1428772471864216e-05, + "loss": 0.1286, + "step": 1952 + }, + { + "epoch": 1.41, + "learning_rate": 1.1421039230748705e-05, + "loss": 0.1288, + "step": 1953 + }, + { + "epoch": 1.41, + "learning_rate": 1.1413305122196791e-05, + "loss": 0.1202, + "step": 1954 + }, + { + "epoch": 1.41, + "learning_rate": 1.140557015092956e-05, + "loss": 0.1221, + "step": 1955 + }, + { + "epoch": 1.41, + "learning_rate": 1.1397834321668616e-05, + "loss": 0.1305, + "step": 1956 + }, + { + "epoch": 1.42, + "learning_rate": 1.1390097639136101e-05, + "loss": 0.1339, + "step": 1957 + }, + { + "epoch": 1.42, + "learning_rate": 1.1382360108054674e-05, + "loss": 0.129, + "step": 1958 + }, + { + "epoch": 1.42, + "learning_rate": 1.1374621733147501e-05, + "loss": 0.1312, + "step": 1959 + }, + { + "epoch": 1.42, + "learning_rate": 1.1366882519138281e-05, + "loss": 0.1205, + "step": 1960 + }, + { + "epoch": 1.42, + "learning_rate": 1.1359142470751208e-05, + "loss": 0.1306, + "step": 1961 + }, + { + "epoch": 1.42, + "learning_rate": 1.1351401592710997e-05, + "loss": 0.1393, + "step": 1962 + }, + { + "epoch": 1.42, + "learning_rate": 1.1343659889742865e-05, + "loss": 0.1279, + "step": 1963 + }, + { + "epoch": 1.42, + "learning_rate": 1.1335917366572531e-05, + "loss": 0.1295, + "step": 1964 + }, + { + "epoch": 1.42, + "learning_rate": 1.132817402792622e-05, + "loss": 0.1276, + "step": 1965 + }, + { + "epoch": 1.42, + "learning_rate": 1.1320429878530647e-05, + "loss": 0.1161, + "step": 1966 + }, + { + "epoch": 1.42, + "learning_rate": 1.1312684923113029e-05, + "loss": 0.1174, + "step": 1967 + }, + { + "epoch": 1.42, + "learning_rate": 1.1304939166401075e-05, + "loss": 0.1203, + "step": 1968 + }, + { + "epoch": 1.42, + "learning_rate": 1.1297192613122977e-05, + "loss": 0.1227, + "step": 1969 + }, + { + "epoch": 1.42, + "learning_rate": 1.1289445268007417e-05, + "loss": 0.1147, + "step": 1970 + }, + { + "epoch": 1.43, + "learning_rate": 1.1281697135783563e-05, + "loss": 0.1322, + "step": 1971 + }, + { + "epoch": 1.43, + "learning_rate": 1.1273948221181058e-05, + "loss": 0.1251, + "step": 1972 + }, + { + "epoch": 1.43, + "learning_rate": 1.1266198528930026e-05, + "loss": 0.1246, + "step": 1973 + }, + { + "epoch": 1.43, + "learning_rate": 1.1258448063761063e-05, + "loss": 0.1291, + "step": 1974 + }, + { + "epoch": 1.43, + "learning_rate": 1.1250696830405243e-05, + "loss": 0.1237, + "step": 1975 + }, + { + "epoch": 1.43, + "learning_rate": 1.1242944833594101e-05, + "loss": 0.1225, + "step": 1976 + }, + { + "epoch": 1.43, + "learning_rate": 1.123519207805964e-05, + "loss": 0.1303, + "step": 1977 + }, + { + "epoch": 1.43, + "learning_rate": 1.1227438568534334e-05, + "loss": 0.1227, + "step": 1978 + }, + { + "epoch": 1.43, + "learning_rate": 1.1219684309751107e-05, + "loss": 0.1299, + "step": 1979 + }, + { + "epoch": 1.43, + "learning_rate": 1.1211929306443343e-05, + "loss": 0.1314, + "step": 1980 + }, + { + "epoch": 1.43, + "learning_rate": 1.1204173563344884e-05, + "loss": 0.1248, + "step": 1981 + }, + { + "epoch": 1.43, + "learning_rate": 1.1196417085190022e-05, + "loss": 0.1265, + "step": 1982 + }, + { + "epoch": 1.43, + "learning_rate": 1.1188659876713495e-05, + "loss": 0.122, + "step": 1983 + }, + { + "epoch": 1.44, + "learning_rate": 1.1180901942650493e-05, + "loss": 0.1281, + "step": 1984 + }, + { + "epoch": 1.44, + "learning_rate": 1.117314328773664e-05, + "loss": 0.1255, + "step": 1985 + }, + { + "epoch": 1.44, + "learning_rate": 1.1165383916708007e-05, + "loss": 0.1353, + "step": 1986 + }, + { + "epoch": 1.44, + "learning_rate": 1.1157623834301103e-05, + "loss": 0.1197, + "step": 1987 + }, + { + "epoch": 1.44, + "learning_rate": 1.1149863045252864e-05, + "loss": 0.1231, + "step": 1988 + }, + { + "epoch": 1.44, + "learning_rate": 1.1142101554300664e-05, + "loss": 0.1185, + "step": 1989 + }, + { + "epoch": 1.44, + "learning_rate": 1.1134339366182299e-05, + "loss": 0.1268, + "step": 1990 + }, + { + "epoch": 1.44, + "learning_rate": 1.1126576485636e-05, + "loss": 0.1268, + "step": 1991 + }, + { + "epoch": 1.44, + "learning_rate": 1.1118812917400411e-05, + "loss": 0.1324, + "step": 1992 + }, + { + "epoch": 1.44, + "learning_rate": 1.1111048666214607e-05, + "loss": 0.1355, + "step": 1993 + }, + { + "epoch": 1.44, + "learning_rate": 1.1103283736818066e-05, + "loss": 0.1313, + "step": 1994 + }, + { + "epoch": 1.44, + "learning_rate": 1.109551813395069e-05, + "loss": 0.1209, + "step": 1995 + }, + { + "epoch": 1.44, + "learning_rate": 1.1087751862352788e-05, + "loss": 0.1312, + "step": 1996 + }, + { + "epoch": 1.44, + "learning_rate": 1.1079984926765077e-05, + "loss": 0.1238, + "step": 1997 + }, + { + "epoch": 1.45, + "learning_rate": 1.1072217331928685e-05, + "loss": 0.1268, + "step": 1998 + }, + { + "epoch": 1.45, + "learning_rate": 1.1064449082585136e-05, + "loss": 0.1148, + "step": 1999 + }, + { + "epoch": 1.45, + "learning_rate": 1.1056680183476355e-05, + "loss": 0.1245, + "step": 2000 + }, + { + "epoch": 1.45, + "eval_loss": 0.12460336089134216, + "eval_runtime": 714.4442, + "eval_samples_per_second": 69.984, + "eval_steps_per_second": 2.188, + "step": 2000 + }, + { + "epoch": 1.45, + "learning_rate": 1.104891063934466e-05, + "loss": 0.131, + "step": 2001 + }, + { + "epoch": 1.45, + "learning_rate": 1.1041140454932778e-05, + "loss": 0.1142, + "step": 2002 + }, + { + "epoch": 1.45, + "learning_rate": 1.1033369634983804e-05, + "loss": 0.1252, + "step": 2003 + }, + { + "epoch": 1.45, + "learning_rate": 1.102559818424124e-05, + "loss": 0.1197, + "step": 2004 + }, + { + "epoch": 1.45, + "learning_rate": 1.1017826107448963e-05, + "loss": 0.1274, + "step": 2005 + }, + { + "epoch": 1.45, + "learning_rate": 1.1010053409351236e-05, + "loss": 0.132, + "step": 2006 + }, + { + "epoch": 1.45, + "learning_rate": 1.1002280094692698e-05, + "loss": 0.1287, + "step": 2007 + }, + { + "epoch": 1.45, + "learning_rate": 1.0994506168218371e-05, + "loss": 0.1259, + "step": 2008 + }, + { + "epoch": 1.45, + "learning_rate": 1.0986731634673641e-05, + "loss": 0.1295, + "step": 2009 + }, + { + "epoch": 1.45, + "learning_rate": 1.0978956498804275e-05, + "loss": 0.1277, + "step": 2010 + }, + { + "epoch": 1.45, + "learning_rate": 1.0971180765356396e-05, + "loss": 0.1272, + "step": 2011 + }, + { + "epoch": 1.46, + "learning_rate": 1.0963404439076501e-05, + "loss": 0.1323, + "step": 2012 + }, + { + "epoch": 1.46, + "learning_rate": 1.0955627524711447e-05, + "loss": 0.119, + "step": 2013 + }, + { + "epoch": 1.46, + "learning_rate": 1.094785002700845e-05, + "loss": 0.1169, + "step": 2014 + }, + { + "epoch": 1.46, + "learning_rate": 1.0940071950715079e-05, + "loss": 0.121, + "step": 2015 + }, + { + "epoch": 1.46, + "learning_rate": 1.0932293300579258e-05, + "loss": 0.1256, + "step": 2016 + }, + { + "epoch": 1.46, + "learning_rate": 1.0924514081349263e-05, + "loss": 0.14, + "step": 2017 + }, + { + "epoch": 1.46, + "learning_rate": 1.0916734297773712e-05, + "loss": 0.1308, + "step": 2018 + }, + { + "epoch": 1.46, + "learning_rate": 1.0908953954601576e-05, + "loss": 0.1334, + "step": 2019 + }, + { + "epoch": 1.46, + "learning_rate": 1.090117305658216e-05, + "loss": 0.1264, + "step": 2020 + }, + { + "epoch": 1.46, + "learning_rate": 1.0893391608465112e-05, + "loss": 0.1237, + "step": 2021 + }, + { + "epoch": 1.46, + "learning_rate": 1.0885609615000414e-05, + "loss": 0.1272, + "step": 2022 + }, + { + "epoch": 1.46, + "learning_rate": 1.087782708093838e-05, + "loss": 0.1249, + "step": 2023 + }, + { + "epoch": 1.46, + "learning_rate": 1.0870044011029656e-05, + "loss": 0.1205, + "step": 2024 + }, + { + "epoch": 1.46, + "learning_rate": 1.0862260410025219e-05, + "loss": 0.1258, + "step": 2025 + }, + { + "epoch": 1.47, + "learning_rate": 1.085447628267636e-05, + "loss": 0.1211, + "step": 2026 + }, + { + "epoch": 1.47, + "learning_rate": 1.0846691633734699e-05, + "loss": 0.1196, + "step": 2027 + }, + { + "epoch": 1.47, + "learning_rate": 1.0838906467952171e-05, + "loss": 0.1266, + "step": 2028 + }, + { + "epoch": 1.47, + "learning_rate": 1.0831120790081027e-05, + "loss": 0.1382, + "step": 2029 + }, + { + "epoch": 1.47, + "learning_rate": 1.0823334604873836e-05, + "loss": 0.1212, + "step": 2030 + }, + { + "epoch": 1.47, + "learning_rate": 1.0815547917083468e-05, + "loss": 0.1396, + "step": 2031 + }, + { + "epoch": 1.47, + "learning_rate": 1.0807760731463107e-05, + "loss": 0.1276, + "step": 2032 + }, + { + "epoch": 1.47, + "learning_rate": 1.0799973052766232e-05, + "loss": 0.1251, + "step": 2033 + }, + { + "epoch": 1.47, + "learning_rate": 1.0792184885746635e-05, + "loss": 0.1293, + "step": 2034 + }, + { + "epoch": 1.47, + "learning_rate": 1.0784396235158398e-05, + "loss": 0.1257, + "step": 2035 + }, + { + "epoch": 1.47, + "learning_rate": 1.0776607105755899e-05, + "loss": 0.1282, + "step": 2036 + }, + { + "epoch": 1.47, + "learning_rate": 1.0768817502293806e-05, + "loss": 0.1339, + "step": 2037 + }, + { + "epoch": 1.47, + "learning_rate": 1.0761027429527089e-05, + "loss": 0.1193, + "step": 2038 + }, + { + "epoch": 1.47, + "learning_rate": 1.0753236892210985e-05, + "loss": 0.1266, + "step": 2039 + }, + { + "epoch": 1.48, + "learning_rate": 1.074544589510103e-05, + "loss": 0.128, + "step": 2040 + }, + { + "epoch": 1.48, + "learning_rate": 1.0737654442953033e-05, + "loss": 0.1337, + "step": 2041 + }, + { + "epoch": 1.48, + "learning_rate": 1.0729862540523086e-05, + "loss": 0.116, + "step": 2042 + }, + { + "epoch": 1.48, + "learning_rate": 1.0722070192567552e-05, + "loss": 0.1332, + "step": 2043 + }, + { + "epoch": 1.48, + "learning_rate": 1.0714277403843065e-05, + "loss": 0.1231, + "step": 2044 + }, + { + "epoch": 1.48, + "learning_rate": 1.0706484179106534e-05, + "loss": 0.1385, + "step": 2045 + }, + { + "epoch": 1.48, + "learning_rate": 1.0698690523115125e-05, + "loss": 0.1292, + "step": 2046 + }, + { + "epoch": 1.48, + "learning_rate": 1.0690896440626277e-05, + "loss": 0.1281, + "step": 2047 + }, + { + "epoch": 1.48, + "learning_rate": 1.0683101936397682e-05, + "loss": 0.1257, + "step": 2048 + }, + { + "epoch": 1.48, + "learning_rate": 1.0675307015187292e-05, + "loss": 0.1165, + "step": 2049 + }, + { + "epoch": 1.48, + "learning_rate": 1.0667511681753315e-05, + "loss": 0.129, + "step": 2050 + }, + { + "epoch": 1.48, + "learning_rate": 1.065971594085421e-05, + "loss": 0.123, + "step": 2051 + }, + { + "epoch": 1.48, + "learning_rate": 1.0651919797248685e-05, + "loss": 0.1225, + "step": 2052 + }, + { + "epoch": 1.48, + "learning_rate": 1.0644123255695686e-05, + "loss": 0.1235, + "step": 2053 + }, + { + "epoch": 1.49, + "learning_rate": 1.0636326320954417e-05, + "loss": 0.1312, + "step": 2054 + }, + { + "epoch": 1.49, + "learning_rate": 1.0628528997784308e-05, + "loss": 0.1365, + "step": 2055 + }, + { + "epoch": 1.49, + "learning_rate": 1.062073129094504e-05, + "loss": 0.1376, + "step": 2056 + }, + { + "epoch": 1.49, + "learning_rate": 1.061293320519651e-05, + "loss": 0.1319, + "step": 2057 + }, + { + "epoch": 1.49, + "learning_rate": 1.0605134745298863e-05, + "loss": 0.1301, + "step": 2058 + }, + { + "epoch": 1.49, + "learning_rate": 1.0597335916012463e-05, + "loss": 0.1126, + "step": 2059 + }, + { + "epoch": 1.49, + "learning_rate": 1.0589536722097906e-05, + "loss": 0.1355, + "step": 2060 + }, + { + "epoch": 1.49, + "learning_rate": 1.0581737168316003e-05, + "loss": 0.1312, + "step": 2061 + }, + { + "epoch": 1.49, + "learning_rate": 1.057393725942779e-05, + "loss": 0.1281, + "step": 2062 + }, + { + "epoch": 1.49, + "learning_rate": 1.0566137000194514e-05, + "loss": 0.1381, + "step": 2063 + }, + { + "epoch": 1.49, + "learning_rate": 1.0558336395377648e-05, + "loss": 0.1267, + "step": 2064 + }, + { + "epoch": 1.49, + "learning_rate": 1.0550535449738866e-05, + "loss": 0.1292, + "step": 2065 + }, + { + "epoch": 1.49, + "learning_rate": 1.054273416804005e-05, + "loss": 0.1377, + "step": 2066 + }, + { + "epoch": 1.5, + "learning_rate": 1.053493255504329e-05, + "loss": 0.1164, + "step": 2067 + }, + { + "epoch": 1.5, + "learning_rate": 1.0527130615510877e-05, + "loss": 0.1188, + "step": 2068 + }, + { + "epoch": 1.5, + "learning_rate": 1.05193283542053e-05, + "loss": 0.1187, + "step": 2069 + }, + { + "epoch": 1.5, + "learning_rate": 1.0511525775889248e-05, + "loss": 0.1406, + "step": 2070 + }, + { + "epoch": 1.5, + "learning_rate": 1.0503722885325607e-05, + "loss": 0.1263, + "step": 2071 + }, + { + "epoch": 1.5, + "learning_rate": 1.0495919687277442e-05, + "loss": 0.1286, + "step": 2072 + }, + { + "epoch": 1.5, + "learning_rate": 1.0488116186508014e-05, + "loss": 0.1349, + "step": 2073 + }, + { + "epoch": 1.5, + "learning_rate": 1.0480312387780768e-05, + "loss": 0.1233, + "step": 2074 + }, + { + "epoch": 1.5, + "learning_rate": 1.0472508295859329e-05, + "loss": 0.1349, + "step": 2075 + }, + { + "epoch": 1.5, + "learning_rate": 1.0464703915507502e-05, + "loss": 0.1235, + "step": 2076 + }, + { + "epoch": 1.5, + "learning_rate": 1.0456899251489265e-05, + "loss": 0.1262, + "step": 2077 + }, + { + "epoch": 1.5, + "learning_rate": 1.0449094308568776e-05, + "loss": 0.1237, + "step": 2078 + }, + { + "epoch": 1.5, + "learning_rate": 1.0441289091510356e-05, + "loss": 0.1224, + "step": 2079 + }, + { + "epoch": 1.5, + "learning_rate": 1.04334836050785e-05, + "loss": 0.1301, + "step": 2080 + }, + { + "epoch": 1.51, + "learning_rate": 1.0425677854037862e-05, + "loss": 0.1248, + "step": 2081 + }, + { + "epoch": 1.51, + "learning_rate": 1.0417871843153258e-05, + "loss": 0.1357, + "step": 2082 + }, + { + "epoch": 1.51, + "learning_rate": 1.0410065577189665e-05, + "loss": 0.1255, + "step": 2083 + }, + { + "epoch": 1.51, + "learning_rate": 1.040225906091222e-05, + "loss": 0.1384, + "step": 2084 + }, + { + "epoch": 1.51, + "learning_rate": 1.0394452299086196e-05, + "loss": 0.1229, + "step": 2085 + }, + { + "epoch": 1.51, + "learning_rate": 1.0386645296477039e-05, + "loss": 0.1274, + "step": 2086 + }, + { + "epoch": 1.51, + "learning_rate": 1.0378838057850326e-05, + "loss": 0.1215, + "step": 2087 + }, + { + "epoch": 1.51, + "learning_rate": 1.0371030587971783e-05, + "loss": 0.1302, + "step": 2088 + }, + { + "epoch": 1.51, + "learning_rate": 1.0363222891607276e-05, + "loss": 0.1258, + "step": 2089 + }, + { + "epoch": 1.51, + "learning_rate": 1.035541497352281e-05, + "loss": 0.1198, + "step": 2090 + }, + { + "epoch": 1.51, + "learning_rate": 1.0347606838484527e-05, + "loss": 0.1302, + "step": 2091 + }, + { + "epoch": 1.51, + "learning_rate": 1.03397984912587e-05, + "loss": 0.127, + "step": 2092 + }, + { + "epoch": 1.51, + "learning_rate": 1.0331989936611728e-05, + "loss": 0.1354, + "step": 2093 + }, + { + "epoch": 1.51, + "learning_rate": 1.0324181179310143e-05, + "loss": 0.1253, + "step": 2094 + }, + { + "epoch": 1.52, + "learning_rate": 1.0316372224120595e-05, + "loss": 0.1179, + "step": 2095 + }, + { + "epoch": 1.52, + "learning_rate": 1.0308563075809859e-05, + "loss": 0.1252, + "step": 2096 + }, + { + "epoch": 1.52, + "learning_rate": 1.0300753739144824e-05, + "loss": 0.1292, + "step": 2097 + }, + { + "epoch": 1.52, + "learning_rate": 1.02929442188925e-05, + "loss": 0.1226, + "step": 2098 + }, + { + "epoch": 1.52, + "learning_rate": 1.0285134519820005e-05, + "loss": 0.1262, + "step": 2099 + }, + { + "epoch": 1.52, + "learning_rate": 1.0277324646694564e-05, + "loss": 0.1261, + "step": 2100 + }, + { + "epoch": 1.52, + "learning_rate": 1.0269514604283512e-05, + "loss": 0.1195, + "step": 2101 + }, + { + "epoch": 1.52, + "learning_rate": 1.0261704397354287e-05, + "loss": 0.134, + "step": 2102 + }, + { + "epoch": 1.52, + "learning_rate": 1.0253894030674425e-05, + "loss": 0.1159, + "step": 2103 + }, + { + "epoch": 1.52, + "learning_rate": 1.0246083509011561e-05, + "loss": 0.1198, + "step": 2104 + }, + { + "epoch": 1.52, + "learning_rate": 1.0238272837133426e-05, + "loss": 0.1231, + "step": 2105 + }, + { + "epoch": 1.52, + "learning_rate": 1.023046201980784e-05, + "loss": 0.1247, + "step": 2106 + }, + { + "epoch": 1.52, + "learning_rate": 1.0222651061802716e-05, + "loss": 0.133, + "step": 2107 + }, + { + "epoch": 1.52, + "learning_rate": 1.0214839967886044e-05, + "loss": 0.1292, + "step": 2108 + }, + { + "epoch": 1.53, + "learning_rate": 1.0207028742825908e-05, + "loss": 0.1221, + "step": 2109 + }, + { + "epoch": 1.53, + "learning_rate": 1.0199217391390466e-05, + "loss": 0.1107, + "step": 2110 + }, + { + "epoch": 1.53, + "learning_rate": 1.0191405918347954e-05, + "loss": 0.1205, + "step": 2111 + }, + { + "epoch": 1.53, + "learning_rate": 1.0183594328466679e-05, + "loss": 0.1163, + "step": 2112 + }, + { + "epoch": 1.53, + "learning_rate": 1.017578262651503e-05, + "loss": 0.1253, + "step": 2113 + }, + { + "epoch": 1.53, + "learning_rate": 1.0167970817261448e-05, + "loss": 0.1222, + "step": 2114 + }, + { + "epoch": 1.53, + "learning_rate": 1.0160158905474455e-05, + "loss": 0.1338, + "step": 2115 + }, + { + "epoch": 1.53, + "learning_rate": 1.0152346895922629e-05, + "loss": 0.1293, + "step": 2116 + }, + { + "epoch": 1.53, + "learning_rate": 1.0144534793374605e-05, + "loss": 0.1232, + "step": 2117 + }, + { + "epoch": 1.53, + "learning_rate": 1.013672260259908e-05, + "loss": 0.1237, + "step": 2118 + }, + { + "epoch": 1.53, + "learning_rate": 1.0128910328364803e-05, + "loss": 0.1242, + "step": 2119 + }, + { + "epoch": 1.53, + "learning_rate": 1.0121097975440571e-05, + "loss": 0.1354, + "step": 2120 + }, + { + "epoch": 1.53, + "learning_rate": 1.0113285548595231e-05, + "loss": 0.1328, + "step": 2121 + }, + { + "epoch": 1.53, + "learning_rate": 1.0105473052597676e-05, + "loss": 0.1303, + "step": 2122 + }, + { + "epoch": 1.54, + "learning_rate": 1.0097660492216845e-05, + "loss": 0.1296, + "step": 2123 + }, + { + "epoch": 1.54, + "learning_rate": 1.0089847872221708e-05, + "loss": 0.114, + "step": 2124 + }, + { + "epoch": 1.54, + "learning_rate": 1.0082035197381274e-05, + "loss": 0.1298, + "step": 2125 + }, + { + "epoch": 1.54, + "learning_rate": 1.0074222472464593e-05, + "loss": 0.1381, + "step": 2126 + }, + { + "epoch": 1.54, + "learning_rate": 1.0066409702240734e-05, + "loss": 0.1203, + "step": 2127 + }, + { + "epoch": 1.54, + "learning_rate": 1.00585968914788e-05, + "loss": 0.1422, + "step": 2128 + }, + { + "epoch": 1.54, + "learning_rate": 1.0050784044947919e-05, + "loss": 0.1219, + "step": 2129 + }, + { + "epoch": 1.54, + "learning_rate": 1.0042971167417239e-05, + "loss": 0.1272, + "step": 2130 + }, + { + "epoch": 1.54, + "learning_rate": 1.003515826365593e-05, + "loss": 0.1314, + "step": 2131 + }, + { + "epoch": 1.54, + "learning_rate": 1.0027345338433176e-05, + "loss": 0.1257, + "step": 2132 + }, + { + "epoch": 1.54, + "learning_rate": 1.001953239651817e-05, + "loss": 0.1462, + "step": 2133 + }, + { + "epoch": 1.54, + "learning_rate": 1.0011719442680123e-05, + "loss": 0.127, + "step": 2134 + }, + { + "epoch": 1.54, + "learning_rate": 1.0003906481688246e-05, + "loss": 0.1321, + "step": 2135 + }, + { + "epoch": 1.55, + "learning_rate": 9.996093518311758e-06, + "loss": 0.1276, + "step": 2136 + }, + { + "epoch": 1.55, + "learning_rate": 9.98828055731988e-06, + "loss": 0.1199, + "step": 2137 + }, + { + "epoch": 1.55, + "learning_rate": 9.980467603481832e-06, + "loss": 0.1319, + "step": 2138 + }, + { + "epoch": 1.55, + "learning_rate": 9.972654661566826e-06, + "loss": 0.1287, + "step": 2139 + }, + { + "epoch": 1.55, + "learning_rate": 9.964841736344071e-06, + "loss": 0.13, + "step": 2140 + }, + { + "epoch": 1.55, + "learning_rate": 9.95702883258276e-06, + "loss": 0.1337, + "step": 2141 + }, + { + "epoch": 1.55, + "learning_rate": 9.949215955052083e-06, + "loss": 0.1289, + "step": 2142 + }, + { + "epoch": 1.55, + "learning_rate": 9.941403108521203e-06, + "loss": 0.1237, + "step": 2143 + }, + { + "epoch": 1.55, + "learning_rate": 9.933590297759271e-06, + "loss": 0.1331, + "step": 2144 + }, + { + "epoch": 1.55, + "learning_rate": 9.92577752753541e-06, + "loss": 0.1282, + "step": 2145 + }, + { + "epoch": 1.55, + "learning_rate": 9.917964802618728e-06, + "loss": 0.1239, + "step": 2146 + }, + { + "epoch": 1.55, + "learning_rate": 9.910152127778297e-06, + "loss": 0.134, + "step": 2147 + }, + { + "epoch": 1.55, + "learning_rate": 9.902339507783159e-06, + "loss": 0.1136, + "step": 2148 + }, + { + "epoch": 1.55, + "learning_rate": 9.894526947402326e-06, + "loss": 0.1335, + "step": 2149 + }, + { + "epoch": 1.56, + "learning_rate": 9.886714451404772e-06, + "loss": 0.1349, + "step": 2150 + }, + { + "epoch": 1.56, + "learning_rate": 9.878902024559434e-06, + "loss": 0.1391, + "step": 2151 + }, + { + "epoch": 1.56, + "learning_rate": 9.871089671635202e-06, + "loss": 0.1341, + "step": 2152 + }, + { + "epoch": 1.56, + "learning_rate": 9.863277397400923e-06, + "loss": 0.1347, + "step": 2153 + }, + { + "epoch": 1.56, + "learning_rate": 9.855465206625397e-06, + "loss": 0.124, + "step": 2154 + }, + { + "epoch": 1.56, + "learning_rate": 9.847653104077373e-06, + "loss": 0.1291, + "step": 2155 + }, + { + "epoch": 1.56, + "learning_rate": 9.839841094525547e-06, + "loss": 0.1389, + "step": 2156 + }, + { + "epoch": 1.56, + "learning_rate": 9.832029182738555e-06, + "loss": 0.13, + "step": 2157 + }, + { + "epoch": 1.56, + "learning_rate": 9.824217373484975e-06, + "loss": 0.1271, + "step": 2158 + }, + { + "epoch": 1.56, + "learning_rate": 9.816405671533325e-06, + "loss": 0.1212, + "step": 2159 + }, + { + "epoch": 1.56, + "learning_rate": 9.808594081652051e-06, + "loss": 0.1251, + "step": 2160 + }, + { + "epoch": 1.56, + "learning_rate": 9.800782608609539e-06, + "loss": 0.1292, + "step": 2161 + }, + { + "epoch": 1.56, + "learning_rate": 9.792971257174097e-06, + "loss": 0.1389, + "step": 2162 + }, + { + "epoch": 1.56, + "learning_rate": 9.785160032113961e-06, + "loss": 0.1212, + "step": 2163 + }, + { + "epoch": 1.57, + "learning_rate": 9.77734893819729e-06, + "loss": 0.1275, + "step": 2164 + }, + { + "epoch": 1.57, + "learning_rate": 9.769537980192159e-06, + "loss": 0.1231, + "step": 2165 + }, + { + "epoch": 1.57, + "learning_rate": 9.761727162866574e-06, + "loss": 0.1284, + "step": 2166 + }, + { + "epoch": 1.57, + "learning_rate": 9.75391649098844e-06, + "loss": 0.1369, + "step": 2167 + }, + { + "epoch": 1.57, + "learning_rate": 9.746105969325577e-06, + "loss": 0.1242, + "step": 2168 + }, + { + "epoch": 1.57, + "learning_rate": 9.738295602645715e-06, + "loss": 0.1293, + "step": 2169 + }, + { + "epoch": 1.57, + "learning_rate": 9.73048539571649e-06, + "loss": 0.1285, + "step": 2170 + }, + { + "epoch": 1.57, + "learning_rate": 9.72267535330544e-06, + "loss": 0.1236, + "step": 2171 + }, + { + "epoch": 1.57, + "learning_rate": 9.714865480179998e-06, + "loss": 0.1263, + "step": 2172 + }, + { + "epoch": 1.57, + "learning_rate": 9.707055781107503e-06, + "loss": 0.1333, + "step": 2173 + }, + { + "epoch": 1.57, + "learning_rate": 9.69924626085518e-06, + "loss": 0.1266, + "step": 2174 + }, + { + "epoch": 1.57, + "learning_rate": 9.691436924190146e-06, + "loss": 0.1338, + "step": 2175 + }, + { + "epoch": 1.57, + "learning_rate": 9.68362777587941e-06, + "loss": 0.124, + "step": 2176 + }, + { + "epoch": 1.57, + "learning_rate": 9.675818820689862e-06, + "loss": 0.1252, + "step": 2177 + }, + { + "epoch": 1.58, + "learning_rate": 9.668010063388275e-06, + "loss": 0.1205, + "step": 2178 + }, + { + "epoch": 1.58, + "learning_rate": 9.660201508741304e-06, + "loss": 0.1225, + "step": 2179 + }, + { + "epoch": 1.58, + "learning_rate": 9.652393161515475e-06, + "loss": 0.1226, + "step": 2180 + }, + { + "epoch": 1.58, + "learning_rate": 9.644585026477193e-06, + "loss": 0.1366, + "step": 2181 + }, + { + "epoch": 1.58, + "learning_rate": 9.636777108392727e-06, + "loss": 0.1282, + "step": 2182 + }, + { + "epoch": 1.58, + "learning_rate": 9.62896941202822e-06, + "loss": 0.1281, + "step": 2183 + }, + { + "epoch": 1.58, + "learning_rate": 9.621161942149677e-06, + "loss": 0.1233, + "step": 2184 + }, + { + "epoch": 1.58, + "learning_rate": 9.613354703522965e-06, + "loss": 0.123, + "step": 2185 + }, + { + "epoch": 1.58, + "learning_rate": 9.605547700913806e-06, + "loss": 0.1372, + "step": 2186 + }, + { + "epoch": 1.58, + "learning_rate": 9.597740939087787e-06, + "loss": 0.1327, + "step": 2187 + }, + { + "epoch": 1.58, + "learning_rate": 9.589934422810339e-06, + "loss": 0.1318, + "step": 2188 + }, + { + "epoch": 1.58, + "learning_rate": 9.582128156846747e-06, + "loss": 0.1266, + "step": 2189 + }, + { + "epoch": 1.58, + "learning_rate": 9.574322145962143e-06, + "loss": 0.1216, + "step": 2190 + }, + { + "epoch": 1.58, + "learning_rate": 9.566516394921506e-06, + "loss": 0.1283, + "step": 2191 + }, + { + "epoch": 1.59, + "learning_rate": 9.558710908489648e-06, + "loss": 0.1183, + "step": 2192 + }, + { + "epoch": 1.59, + "learning_rate": 9.550905691431229e-06, + "loss": 0.1205, + "step": 2193 + }, + { + "epoch": 1.59, + "learning_rate": 9.54310074851074e-06, + "loss": 0.1252, + "step": 2194 + }, + { + "epoch": 1.59, + "learning_rate": 9.5352960844925e-06, + "loss": 0.1324, + "step": 2195 + }, + { + "epoch": 1.59, + "learning_rate": 9.527491704140671e-06, + "loss": 0.1238, + "step": 2196 + }, + { + "epoch": 1.59, + "learning_rate": 9.519687612219232e-06, + "loss": 0.1243, + "step": 2197 + }, + { + "epoch": 1.59, + "learning_rate": 9.511883813491986e-06, + "loss": 0.1255, + "step": 2198 + }, + { + "epoch": 1.59, + "learning_rate": 9.504080312722558e-06, + "loss": 0.1205, + "step": 2199 + }, + { + "epoch": 1.59, + "learning_rate": 9.496277114674393e-06, + "loss": 0.1267, + "step": 2200 + }, + { + "epoch": 1.59, + "learning_rate": 9.488474224110752e-06, + "loss": 0.1299, + "step": 2201 + }, + { + "epoch": 1.59, + "learning_rate": 9.480671645794702e-06, + "loss": 0.1232, + "step": 2202 + }, + { + "epoch": 1.59, + "learning_rate": 9.472869384489128e-06, + "loss": 0.1278, + "step": 2203 + }, + { + "epoch": 1.59, + "learning_rate": 9.465067444956716e-06, + "loss": 0.1209, + "step": 2204 + }, + { + "epoch": 1.59, + "learning_rate": 9.457265831959955e-06, + "loss": 0.1333, + "step": 2205 + }, + { + "epoch": 1.6, + "learning_rate": 9.449464550261137e-06, + "loss": 0.1358, + "step": 2206 + }, + { + "epoch": 1.6, + "learning_rate": 9.441663604622354e-06, + "loss": 0.1261, + "step": 2207 + }, + { + "epoch": 1.6, + "learning_rate": 9.433862999805487e-06, + "loss": 0.1309, + "step": 2208 + }, + { + "epoch": 1.6, + "learning_rate": 9.426062740572214e-06, + "loss": 0.1284, + "step": 2209 + }, + { + "epoch": 1.6, + "learning_rate": 9.418262831683999e-06, + "loss": 0.1238, + "step": 2210 + }, + { + "epoch": 1.6, + "learning_rate": 9.410463277902097e-06, + "loss": 0.1237, + "step": 2211 + }, + { + "epoch": 1.6, + "learning_rate": 9.402664083987539e-06, + "loss": 0.1295, + "step": 2212 + }, + { + "epoch": 1.6, + "learning_rate": 9.39486525470114e-06, + "loss": 0.1263, + "step": 2213 + }, + { + "epoch": 1.6, + "learning_rate": 9.387066794803494e-06, + "loss": 0.1331, + "step": 2214 + }, + { + "epoch": 1.6, + "learning_rate": 9.379268709054966e-06, + "loss": 0.1268, + "step": 2215 + }, + { + "epoch": 1.6, + "learning_rate": 9.371471002215695e-06, + "loss": 0.1341, + "step": 2216 + }, + { + "epoch": 1.6, + "learning_rate": 9.363673679045588e-06, + "loss": 0.1256, + "step": 2217 + }, + { + "epoch": 1.6, + "learning_rate": 9.355876744304318e-06, + "loss": 0.127, + "step": 2218 + }, + { + "epoch": 1.61, + "learning_rate": 9.348080202751322e-06, + "loss": 0.1242, + "step": 2219 + }, + { + "epoch": 1.61, + "learning_rate": 9.340284059145794e-06, + "loss": 0.1182, + "step": 2220 + }, + { + "epoch": 1.61, + "learning_rate": 9.33248831824669e-06, + "loss": 0.1292, + "step": 2221 + }, + { + "epoch": 1.61, + "learning_rate": 9.324692984812713e-06, + "loss": 0.1333, + "step": 2222 + }, + { + "epoch": 1.61, + "learning_rate": 9.316898063602325e-06, + "loss": 0.1392, + "step": 2223 + }, + { + "epoch": 1.61, + "learning_rate": 9.309103559373725e-06, + "loss": 0.1288, + "step": 2224 + }, + { + "epoch": 1.61, + "learning_rate": 9.301309476884875e-06, + "loss": 0.129, + "step": 2225 + }, + { + "epoch": 1.61, + "learning_rate": 9.293515820893468e-06, + "loss": 0.1225, + "step": 2226 + }, + { + "epoch": 1.61, + "learning_rate": 9.285722596156935e-06, + "loss": 0.1184, + "step": 2227 + }, + { + "epoch": 1.61, + "learning_rate": 9.277929807432448e-06, + "loss": 0.1249, + "step": 2228 + }, + { + "epoch": 1.61, + "learning_rate": 9.270137459476914e-06, + "loss": 0.119, + "step": 2229 + }, + { + "epoch": 1.61, + "learning_rate": 9.262345557046968e-06, + "loss": 0.1216, + "step": 2230 + }, + { + "epoch": 1.61, + "learning_rate": 9.254554104898973e-06, + "loss": 0.1288, + "step": 2231 + }, + { + "epoch": 1.61, + "learning_rate": 9.246763107789018e-06, + "loss": 0.1247, + "step": 2232 + }, + { + "epoch": 1.62, + "learning_rate": 9.238972570472915e-06, + "loss": 0.122, + "step": 2233 + }, + { + "epoch": 1.62, + "learning_rate": 9.231182497706195e-06, + "loss": 0.1328, + "step": 2234 + }, + { + "epoch": 1.62, + "learning_rate": 9.223392894244106e-06, + "loss": 0.1234, + "step": 2235 + }, + { + "epoch": 1.62, + "learning_rate": 9.215603764841605e-06, + "loss": 0.1177, + "step": 2236 + }, + { + "epoch": 1.62, + "learning_rate": 9.207815114253367e-06, + "loss": 0.1312, + "step": 2237 + }, + { + "epoch": 1.62, + "learning_rate": 9.20002694723377e-06, + "loss": 0.126, + "step": 2238 + }, + { + "epoch": 1.62, + "learning_rate": 9.192239268536898e-06, + "loss": 0.1298, + "step": 2239 + }, + { + "epoch": 1.62, + "learning_rate": 9.184452082916535e-06, + "loss": 0.1322, + "step": 2240 + }, + { + "epoch": 1.62, + "learning_rate": 9.176665395126168e-06, + "loss": 0.1288, + "step": 2241 + }, + { + "epoch": 1.62, + "learning_rate": 9.168879209918976e-06, + "loss": 0.14, + "step": 2242 + }, + { + "epoch": 1.62, + "learning_rate": 9.161093532047832e-06, + "loss": 0.1288, + "step": 2243 + }, + { + "epoch": 1.62, + "learning_rate": 9.153308366265304e-06, + "loss": 0.1293, + "step": 2244 + }, + { + "epoch": 1.62, + "learning_rate": 9.145523717323643e-06, + "loss": 0.1286, + "step": 2245 + }, + { + "epoch": 1.62, + "learning_rate": 9.137739589974784e-06, + "loss": 0.1278, + "step": 2246 + }, + { + "epoch": 1.63, + "learning_rate": 9.129955988970346e-06, + "loss": 0.1222, + "step": 2247 + }, + { + "epoch": 1.63, + "learning_rate": 9.122172919061624e-06, + "loss": 0.1272, + "step": 2248 + }, + { + "epoch": 1.63, + "learning_rate": 9.114390384999591e-06, + "loss": 0.1255, + "step": 2249 + }, + { + "epoch": 1.63, + "learning_rate": 9.106608391534894e-06, + "loss": 0.1278, + "step": 2250 + }, + { + "epoch": 1.63, + "learning_rate": 9.098826943417846e-06, + "loss": 0.1242, + "step": 2251 + }, + { + "epoch": 1.63, + "learning_rate": 9.091046045398429e-06, + "loss": 0.1279, + "step": 2252 + }, + { + "epoch": 1.63, + "learning_rate": 9.08326570222629e-06, + "loss": 0.1372, + "step": 2253 + }, + { + "epoch": 1.63, + "learning_rate": 9.075485918650739e-06, + "loss": 0.1349, + "step": 2254 + }, + { + "epoch": 1.63, + "learning_rate": 9.067706699420744e-06, + "loss": 0.1234, + "step": 2255 + }, + { + "epoch": 1.63, + "learning_rate": 9.059928049284921e-06, + "loss": 0.133, + "step": 2256 + }, + { + "epoch": 1.63, + "learning_rate": 9.05214997299155e-06, + "loss": 0.137, + "step": 2257 + }, + { + "epoch": 1.63, + "learning_rate": 9.044372475288551e-06, + "loss": 0.1309, + "step": 2258 + }, + { + "epoch": 1.63, + "learning_rate": 9.036595560923499e-06, + "loss": 0.1241, + "step": 2259 + }, + { + "epoch": 1.63, + "learning_rate": 9.028819234643606e-06, + "loss": 0.1259, + "step": 2260 + }, + { + "epoch": 1.64, + "learning_rate": 9.021043501195729e-06, + "loss": 0.1232, + "step": 2261 + }, + { + "epoch": 1.64, + "learning_rate": 9.01326836532636e-06, + "loss": 0.1262, + "step": 2262 + }, + { + "epoch": 1.64, + "learning_rate": 9.005493831781632e-06, + "loss": 0.1182, + "step": 2263 + }, + { + "epoch": 1.64, + "learning_rate": 8.997719905307303e-06, + "loss": 0.1363, + "step": 2264 + }, + { + "epoch": 1.64, + "learning_rate": 8.989946590648767e-06, + "loss": 0.1278, + "step": 2265 + }, + { + "epoch": 1.64, + "learning_rate": 8.98217389255104e-06, + "loss": 0.1241, + "step": 2266 + }, + { + "epoch": 1.64, + "learning_rate": 8.974401815758762e-06, + "loss": 0.127, + "step": 2267 + }, + { + "epoch": 1.64, + "learning_rate": 8.9666303650162e-06, + "loss": 0.1269, + "step": 2268 + }, + { + "epoch": 1.64, + "learning_rate": 8.958859545067227e-06, + "loss": 0.1257, + "step": 2269 + }, + { + "epoch": 1.64, + "learning_rate": 8.951089360655341e-06, + "loss": 0.1244, + "step": 2270 + }, + { + "epoch": 1.64, + "learning_rate": 8.94331981652365e-06, + "loss": 0.1258, + "step": 2271 + }, + { + "epoch": 1.64, + "learning_rate": 8.935550917414868e-06, + "loss": 0.127, + "step": 2272 + }, + { + "epoch": 1.64, + "learning_rate": 8.927782668071318e-06, + "loss": 0.1223, + "step": 2273 + }, + { + "epoch": 1.64, + "learning_rate": 8.920015073234926e-06, + "loss": 0.1241, + "step": 2274 + }, + { + "epoch": 1.65, + "learning_rate": 8.912248137647217e-06, + "loss": 0.1279, + "step": 2275 + }, + { + "epoch": 1.65, + "learning_rate": 8.904481866049312e-06, + "loss": 0.1242, + "step": 2276 + }, + { + "epoch": 1.65, + "learning_rate": 8.896716263181937e-06, + "loss": 0.1291, + "step": 2277 + }, + { + "epoch": 1.65, + "learning_rate": 8.888951333785396e-06, + "loss": 0.1234, + "step": 2278 + }, + { + "epoch": 1.65, + "learning_rate": 8.88118708259959e-06, + "loss": 0.1233, + "step": 2279 + }, + { + "epoch": 1.65, + "learning_rate": 8.873423514364003e-06, + "loss": 0.1219, + "step": 2280 + }, + { + "epoch": 1.65, + "learning_rate": 8.865660633817705e-06, + "loss": 0.1111, + "step": 2281 + }, + { + "epoch": 1.65, + "learning_rate": 8.857898445699341e-06, + "loss": 0.1306, + "step": 2282 + }, + { + "epoch": 1.65, + "learning_rate": 8.85013695474714e-06, + "loss": 0.1218, + "step": 2283 + }, + { + "epoch": 1.65, + "learning_rate": 8.8423761656989e-06, + "loss": 0.1216, + "step": 2284 + }, + { + "epoch": 1.65, + "learning_rate": 8.834616083291994e-06, + "loss": 0.1328, + "step": 2285 + }, + { + "epoch": 1.65, + "learning_rate": 8.826856712263363e-06, + "loss": 0.1237, + "step": 2286 + }, + { + "epoch": 1.65, + "learning_rate": 8.819098057349509e-06, + "loss": 0.126, + "step": 2287 + }, + { + "epoch": 1.65, + "learning_rate": 8.811340123286505e-06, + "loss": 0.1245, + "step": 2288 + }, + { + "epoch": 1.66, + "learning_rate": 8.80358291480998e-06, + "loss": 0.1241, + "step": 2289 + }, + { + "epoch": 1.66, + "learning_rate": 8.795826436655118e-06, + "loss": 0.1236, + "step": 2290 + }, + { + "epoch": 1.66, + "learning_rate": 8.78807069355666e-06, + "loss": 0.1295, + "step": 2291 + }, + { + "epoch": 1.66, + "learning_rate": 8.780315690248898e-06, + "loss": 0.1232, + "step": 2292 + }, + { + "epoch": 1.66, + "learning_rate": 8.772561431465669e-06, + "loss": 0.1436, + "step": 2293 + }, + { + "epoch": 1.66, + "learning_rate": 8.764807921940362e-06, + "loss": 0.1344, + "step": 2294 + }, + { + "epoch": 1.66, + "learning_rate": 8.757055166405904e-06, + "loss": 0.1283, + "step": 2295 + }, + { + "epoch": 1.66, + "learning_rate": 8.74930316959476e-06, + "loss": 0.1222, + "step": 2296 + }, + { + "epoch": 1.66, + "learning_rate": 8.741551936238939e-06, + "loss": 0.1244, + "step": 2297 + }, + { + "epoch": 1.66, + "learning_rate": 8.733801471069978e-06, + "loss": 0.1255, + "step": 2298 + }, + { + "epoch": 1.66, + "learning_rate": 8.726051778818945e-06, + "loss": 0.1317, + "step": 2299 + }, + { + "epoch": 1.66, + "learning_rate": 8.718302864216438e-06, + "loss": 0.1299, + "step": 2300 + }, + { + "epoch": 1.66, + "learning_rate": 8.710554731992585e-06, + "loss": 0.1187, + "step": 2301 + }, + { + "epoch": 1.67, + "learning_rate": 8.702807386877025e-06, + "loss": 0.1186, + "step": 2302 + }, + { + "epoch": 1.67, + "learning_rate": 8.695060833598928e-06, + "loss": 0.12, + "step": 2303 + }, + { + "epoch": 1.67, + "learning_rate": 8.687315076886975e-06, + "loss": 0.1261, + "step": 2304 + }, + { + "epoch": 1.67, + "learning_rate": 8.679570121469358e-06, + "loss": 0.12, + "step": 2305 + }, + { + "epoch": 1.67, + "learning_rate": 8.671825972073786e-06, + "loss": 0.1151, + "step": 2306 + }, + { + "epoch": 1.67, + "learning_rate": 8.664082633427474e-06, + "loss": 0.1096, + "step": 2307 + }, + { + "epoch": 1.67, + "learning_rate": 8.65634011025714e-06, + "loss": 0.1235, + "step": 2308 + }, + { + "epoch": 1.67, + "learning_rate": 8.648598407289008e-06, + "loss": 0.1158, + "step": 2309 + }, + { + "epoch": 1.67, + "learning_rate": 8.640857529248794e-06, + "loss": 0.1218, + "step": 2310 + }, + { + "epoch": 1.67, + "learning_rate": 8.633117480861722e-06, + "loss": 0.1219, + "step": 2311 + }, + { + "epoch": 1.67, + "learning_rate": 8.625378266852497e-06, + "loss": 0.1261, + "step": 2312 + }, + { + "epoch": 1.67, + "learning_rate": 8.617639891945328e-06, + "loss": 0.1272, + "step": 2313 + }, + { + "epoch": 1.67, + "learning_rate": 8.609902360863899e-06, + "loss": 0.1243, + "step": 2314 + }, + { + "epoch": 1.67, + "learning_rate": 8.602165678331385e-06, + "loss": 0.1264, + "step": 2315 + }, + { + "epoch": 1.68, + "learning_rate": 8.594429849070444e-06, + "loss": 0.1194, + "step": 2316 + }, + { + "epoch": 1.68, + "learning_rate": 8.58669487780321e-06, + "loss": 0.1386, + "step": 2317 + }, + { + "epoch": 1.68, + "learning_rate": 8.578960769251297e-06, + "loss": 0.1188, + "step": 2318 + }, + { + "epoch": 1.68, + "learning_rate": 8.571227528135789e-06, + "loss": 0.1276, + "step": 2319 + }, + { + "epoch": 1.68, + "learning_rate": 8.563495159177237e-06, + "loss": 0.1255, + "step": 2320 + }, + { + "epoch": 1.68, + "learning_rate": 8.555763667095672e-06, + "loss": 0.1272, + "step": 2321 + }, + { + "epoch": 1.68, + "learning_rate": 8.54803305661058e-06, + "loss": 0.1231, + "step": 2322 + }, + { + "epoch": 1.68, + "learning_rate": 8.54030333244091e-06, + "loss": 0.1242, + "step": 2323 + }, + { + "epoch": 1.68, + "learning_rate": 8.53257449930507e-06, + "loss": 0.1208, + "step": 2324 + }, + { + "epoch": 1.68, + "learning_rate": 8.524846561920925e-06, + "loss": 0.1285, + "step": 2325 + }, + { + "epoch": 1.68, + "learning_rate": 8.517119525005793e-06, + "loss": 0.1229, + "step": 2326 + }, + { + "epoch": 1.68, + "learning_rate": 8.509393393276446e-06, + "loss": 0.128, + "step": 2327 + }, + { + "epoch": 1.68, + "learning_rate": 8.501668171449095e-06, + "loss": 0.1184, + "step": 2328 + }, + { + "epoch": 1.68, + "learning_rate": 8.493943864239406e-06, + "loss": 0.1345, + "step": 2329 + }, + { + "epoch": 1.69, + "learning_rate": 8.486220476362476e-06, + "loss": 0.1308, + "step": 2330 + }, + { + "epoch": 1.69, + "learning_rate": 8.47849801253285e-06, + "loss": 0.1256, + "step": 2331 + }, + { + "epoch": 1.69, + "learning_rate": 8.470776477464501e-06, + "loss": 0.1313, + "step": 2332 + }, + { + "epoch": 1.69, + "learning_rate": 8.463055875870844e-06, + "loss": 0.1209, + "step": 2333 + }, + { + "epoch": 1.69, + "learning_rate": 8.455336212464715e-06, + "loss": 0.129, + "step": 2334 + }, + { + "epoch": 1.69, + "learning_rate": 8.447617491958382e-06, + "loss": 0.1204, + "step": 2335 + }, + { + "epoch": 1.69, + "learning_rate": 8.43989971906354e-06, + "loss": 0.1385, + "step": 2336 + }, + { + "epoch": 1.69, + "learning_rate": 8.432182898491301e-06, + "loss": 0.1308, + "step": 2337 + }, + { + "epoch": 1.69, + "learning_rate": 8.424467034952198e-06, + "loss": 0.1158, + "step": 2338 + }, + { + "epoch": 1.69, + "learning_rate": 8.416752133156177e-06, + "loss": 0.1323, + "step": 2339 + }, + { + "epoch": 1.69, + "learning_rate": 8.409038197812598e-06, + "loss": 0.1256, + "step": 2340 + }, + { + "epoch": 1.69, + "learning_rate": 8.401325233630232e-06, + "loss": 0.1258, + "step": 2341 + }, + { + "epoch": 1.69, + "learning_rate": 8.393613245317263e-06, + "loss": 0.1277, + "step": 2342 + }, + { + "epoch": 1.69, + "learning_rate": 8.385902237581269e-06, + "loss": 0.1277, + "step": 2343 + }, + { + "epoch": 1.7, + "learning_rate": 8.378192215129235e-06, + "loss": 0.1254, + "step": 2344 + }, + { + "epoch": 1.7, + "learning_rate": 8.370483182667543e-06, + "loss": 0.1301, + "step": 2345 + }, + { + "epoch": 1.7, + "learning_rate": 8.362775144901971e-06, + "loss": 0.1273, + "step": 2346 + }, + { + "epoch": 1.7, + "learning_rate": 8.355068106537691e-06, + "loss": 0.1285, + "step": 2347 + }, + { + "epoch": 1.7, + "learning_rate": 8.347362072279263e-06, + "loss": 0.119, + "step": 2348 + }, + { + "epoch": 1.7, + "learning_rate": 8.339657046830635e-06, + "loss": 0.1306, + "step": 2349 + }, + { + "epoch": 1.7, + "learning_rate": 8.331953034895139e-06, + "loss": 0.1289, + "step": 2350 + }, + { + "epoch": 1.7, + "learning_rate": 8.32425004117549e-06, + "loss": 0.1263, + "step": 2351 + }, + { + "epoch": 1.7, + "learning_rate": 8.31654807037378e-06, + "loss": 0.1282, + "step": 2352 + }, + { + "epoch": 1.7, + "learning_rate": 8.308847127191473e-06, + "loss": 0.117, + "step": 2353 + }, + { + "epoch": 1.7, + "learning_rate": 8.30114721632941e-06, + "loss": 0.1261, + "step": 2354 + }, + { + "epoch": 1.7, + "learning_rate": 8.293448342487807e-06, + "loss": 0.1258, + "step": 2355 + }, + { + "epoch": 1.7, + "learning_rate": 8.285750510366236e-06, + "loss": 0.1254, + "step": 2356 + }, + { + "epoch": 1.7, + "learning_rate": 8.278053724663638e-06, + "loss": 0.1259, + "step": 2357 + }, + { + "epoch": 1.71, + "learning_rate": 8.270357990078318e-06, + "loss": 0.1177, + "step": 2358 + }, + { + "epoch": 1.71, + "learning_rate": 8.262663311307936e-06, + "loss": 0.1235, + "step": 2359 + }, + { + "epoch": 1.71, + "learning_rate": 8.254969693049506e-06, + "loss": 0.118, + "step": 2360 + }, + { + "epoch": 1.71, + "learning_rate": 8.247277139999401e-06, + "loss": 0.1159, + "step": 2361 + }, + { + "epoch": 1.71, + "learning_rate": 8.239585656853338e-06, + "loss": 0.1245, + "step": 2362 + }, + { + "epoch": 1.71, + "learning_rate": 8.231895248306382e-06, + "loss": 0.1234, + "step": 2363 + }, + { + "epoch": 1.71, + "learning_rate": 8.22420591905294e-06, + "loss": 0.1249, + "step": 2364 + }, + { + "epoch": 1.71, + "learning_rate": 8.216517673786767e-06, + "loss": 0.1333, + "step": 2365 + }, + { + "epoch": 1.71, + "learning_rate": 8.208830517200952e-06, + "loss": 0.1185, + "step": 2366 + }, + { + "epoch": 1.71, + "learning_rate": 8.20114445398792e-06, + "loss": 0.1247, + "step": 2367 + }, + { + "epoch": 1.71, + "learning_rate": 8.193459488839426e-06, + "loss": 0.1285, + "step": 2368 + }, + { + "epoch": 1.71, + "learning_rate": 8.185775626446557e-06, + "loss": 0.1212, + "step": 2369 + }, + { + "epoch": 1.71, + "learning_rate": 8.178092871499728e-06, + "loss": 0.1288, + "step": 2370 + }, + { + "epoch": 1.72, + "learning_rate": 8.170411228688671e-06, + "loss": 0.1163, + "step": 2371 + }, + { + "epoch": 1.72, + "learning_rate": 8.162730702702456e-06, + "loss": 0.1275, + "step": 2372 + }, + { + "epoch": 1.72, + "learning_rate": 8.155051298229454e-06, + "loss": 0.1346, + "step": 2373 + }, + { + "epoch": 1.72, + "learning_rate": 8.147373019957357e-06, + "loss": 0.1286, + "step": 2374 + }, + { + "epoch": 1.72, + "learning_rate": 8.139695872573172e-06, + "loss": 0.1337, + "step": 2375 + }, + { + "epoch": 1.72, + "learning_rate": 8.132019860763212e-06, + "loss": 0.1261, + "step": 2376 + }, + { + "epoch": 1.72, + "learning_rate": 8.124344989213099e-06, + "loss": 0.1174, + "step": 2377 + }, + { + "epoch": 1.72, + "learning_rate": 8.11667126260776e-06, + "loss": 0.1198, + "step": 2378 + }, + { + "epoch": 1.72, + "learning_rate": 8.10899868563142e-06, + "loss": 0.1332, + "step": 2379 + }, + { + "epoch": 1.72, + "learning_rate": 8.101327262967603e-06, + "loss": 0.1177, + "step": 2380 + }, + { + "epoch": 1.72, + "learning_rate": 8.093656999299132e-06, + "loss": 0.1277, + "step": 2381 + }, + { + "epoch": 1.72, + "learning_rate": 8.085987899308117e-06, + "loss": 0.1262, + "step": 2382 + }, + { + "epoch": 1.72, + "learning_rate": 8.078319967675964e-06, + "loss": 0.122, + "step": 2383 + }, + { + "epoch": 1.72, + "learning_rate": 8.07065320908336e-06, + "loss": 0.1158, + "step": 2384 + }, + { + "epoch": 1.73, + "learning_rate": 8.062987628210276e-06, + "loss": 0.1223, + "step": 2385 + }, + { + "epoch": 1.73, + "learning_rate": 8.05532322973597e-06, + "loss": 0.1239, + "step": 2386 + }, + { + "epoch": 1.73, + "learning_rate": 8.047660018338971e-06, + "loss": 0.12, + "step": 2387 + }, + { + "epoch": 1.73, + "learning_rate": 8.03999799869709e-06, + "loss": 0.1238, + "step": 2388 + }, + { + "epoch": 1.73, + "learning_rate": 8.032337175487407e-06, + "loss": 0.1296, + "step": 2389 + }, + { + "epoch": 1.73, + "learning_rate": 8.024677553386272e-06, + "loss": 0.1286, + "step": 2390 + }, + { + "epoch": 1.73, + "learning_rate": 8.0170191370693e-06, + "loss": 0.1323, + "step": 2391 + }, + { + "epoch": 1.73, + "learning_rate": 8.009361931211375e-06, + "loss": 0.1234, + "step": 2392 + }, + { + "epoch": 1.73, + "learning_rate": 8.001705940486636e-06, + "loss": 0.1157, + "step": 2393 + }, + { + "epoch": 1.73, + "learning_rate": 7.994051169568486e-06, + "loss": 0.1255, + "step": 2394 + }, + { + "epoch": 1.73, + "learning_rate": 7.986397623129578e-06, + "loss": 0.1256, + "step": 2395 + }, + { + "epoch": 1.73, + "learning_rate": 7.978745305841822e-06, + "loss": 0.133, + "step": 2396 + }, + { + "epoch": 1.73, + "learning_rate": 7.971094222376375e-06, + "loss": 0.1262, + "step": 2397 + }, + { + "epoch": 1.73, + "learning_rate": 7.96344437740364e-06, + "loss": 0.1313, + "step": 2398 + }, + { + "epoch": 1.74, + "learning_rate": 7.955795775593271e-06, + "loss": 0.1165, + "step": 2399 + }, + { + "epoch": 1.74, + "learning_rate": 7.948148421614148e-06, + "loss": 0.118, + "step": 2400 + }, + { + "epoch": 1.74, + "learning_rate": 7.94050232013441e-06, + "loss": 0.1242, + "step": 2401 + }, + { + "epoch": 1.74, + "learning_rate": 7.932857475821416e-06, + "loss": 0.1238, + "step": 2402 + }, + { + "epoch": 1.74, + "learning_rate": 7.925213893341764e-06, + "loss": 0.1259, + "step": 2403 + }, + { + "epoch": 1.74, + "learning_rate": 7.917571577361276e-06, + "loss": 0.1201, + "step": 2404 + }, + { + "epoch": 1.74, + "learning_rate": 7.909930532545009e-06, + "loss": 0.1342, + "step": 2405 + }, + { + "epoch": 1.74, + "learning_rate": 7.902290763557237e-06, + "loss": 0.1328, + "step": 2406 + }, + { + "epoch": 1.74, + "learning_rate": 7.89465227506146e-06, + "loss": 0.1239, + "step": 2407 + }, + { + "epoch": 1.74, + "learning_rate": 7.887015071720394e-06, + "loss": 0.1304, + "step": 2408 + }, + { + "epoch": 1.74, + "learning_rate": 7.87937915819597e-06, + "loss": 0.1263, + "step": 2409 + }, + { + "epoch": 1.74, + "learning_rate": 7.871744539149332e-06, + "loss": 0.1292, + "step": 2410 + }, + { + "epoch": 1.74, + "learning_rate": 7.86411121924084e-06, + "loss": 0.1318, + "step": 2411 + }, + { + "epoch": 1.74, + "learning_rate": 7.856479203130047e-06, + "loss": 0.1294, + "step": 2412 + }, + { + "epoch": 1.75, + "learning_rate": 7.848848495475724e-06, + "loss": 0.119, + "step": 2413 + }, + { + "epoch": 1.75, + "learning_rate": 7.841219100935835e-06, + "loss": 0.132, + "step": 2414 + }, + { + "epoch": 1.75, + "learning_rate": 7.833591024167549e-06, + "loss": 0.124, + "step": 2415 + }, + { + "epoch": 1.75, + "learning_rate": 7.825964269827223e-06, + "loss": 0.1247, + "step": 2416 + }, + { + "epoch": 1.75, + "learning_rate": 7.818338842570413e-06, + "loss": 0.1285, + "step": 2417 + }, + { + "epoch": 1.75, + "learning_rate": 7.810714747051861e-06, + "loss": 0.1282, + "step": 2418 + }, + { + "epoch": 1.75, + "learning_rate": 7.803091987925501e-06, + "loss": 0.1256, + "step": 2419 + }, + { + "epoch": 1.75, + "learning_rate": 7.795470569844444e-06, + "loss": 0.1256, + "step": 2420 + }, + { + "epoch": 1.75, + "learning_rate": 7.787850497460987e-06, + "loss": 0.1295, + "step": 2421 + }, + { + "epoch": 1.75, + "learning_rate": 7.780231775426603e-06, + "loss": 0.1229, + "step": 2422 + }, + { + "epoch": 1.75, + "learning_rate": 7.772614408391947e-06, + "loss": 0.1282, + "step": 2423 + }, + { + "epoch": 1.75, + "learning_rate": 7.764998401006841e-06, + "loss": 0.1129, + "step": 2424 + }, + { + "epoch": 1.75, + "learning_rate": 7.757383757920278e-06, + "loss": 0.1352, + "step": 2425 + }, + { + "epoch": 1.75, + "learning_rate": 7.74977048378042e-06, + "loss": 0.1161, + "step": 2426 + }, + { + "epoch": 1.76, + "learning_rate": 7.742158583234587e-06, + "loss": 0.1251, + "step": 2427 + }, + { + "epoch": 1.76, + "learning_rate": 7.734548060929272e-06, + "loss": 0.1363, + "step": 2428 + }, + { + "epoch": 1.76, + "learning_rate": 7.726938921510116e-06, + "loss": 0.1244, + "step": 2429 + }, + { + "epoch": 1.76, + "learning_rate": 7.719331169621918e-06, + "loss": 0.1234, + "step": 2430 + }, + { + "epoch": 1.76, + "learning_rate": 7.711724809908638e-06, + "loss": 0.1194, + "step": 2431 + }, + { + "epoch": 1.76, + "learning_rate": 7.704119847013377e-06, + "loss": 0.1187, + "step": 2432 + }, + { + "epoch": 1.76, + "learning_rate": 7.696516285578388e-06, + "loss": 0.1219, + "step": 2433 + }, + { + "epoch": 1.76, + "learning_rate": 7.688914130245067e-06, + "loss": 0.1173, + "step": 2434 + }, + { + "epoch": 1.76, + "learning_rate": 7.681313385653951e-06, + "loss": 0.1186, + "step": 2435 + }, + { + "epoch": 1.76, + "learning_rate": 7.673714056444716e-06, + "loss": 0.1296, + "step": 2436 + }, + { + "epoch": 1.76, + "learning_rate": 7.666116147256178e-06, + "loss": 0.1348, + "step": 2437 + }, + { + "epoch": 1.76, + "learning_rate": 7.658519662726277e-06, + "loss": 0.1265, + "step": 2438 + }, + { + "epoch": 1.76, + "learning_rate": 7.650924607492096e-06, + "loss": 0.1273, + "step": 2439 + }, + { + "epoch": 1.76, + "learning_rate": 7.643330986189834e-06, + "loss": 0.1248, + "step": 2440 + }, + { + "epoch": 1.77, + "learning_rate": 7.635738803454821e-06, + "loss": 0.1235, + "step": 2441 + }, + { + "epoch": 1.77, + "learning_rate": 7.628148063921507e-06, + "loss": 0.1254, + "step": 2442 + }, + { + "epoch": 1.77, + "learning_rate": 7.620558772223461e-06, + "loss": 0.1273, + "step": 2443 + }, + { + "epoch": 1.77, + "learning_rate": 7.6129709329933695e-06, + "loss": 0.1216, + "step": 2444 + }, + { + "epoch": 1.77, + "learning_rate": 7.605384550863031e-06, + "loss": 0.1253, + "step": 2445 + }, + { + "epoch": 1.77, + "learning_rate": 7.597799630463355e-06, + "loss": 0.1262, + "step": 2446 + }, + { + "epoch": 1.77, + "learning_rate": 7.5902161764243584e-06, + "loss": 0.1293, + "step": 2447 + }, + { + "epoch": 1.77, + "learning_rate": 7.5826341933751635e-06, + "loss": 0.1328, + "step": 2448 + }, + { + "epoch": 1.77, + "learning_rate": 7.575053685943995e-06, + "loss": 0.1296, + "step": 2449 + }, + { + "epoch": 1.77, + "learning_rate": 7.567474658758176e-06, + "loss": 0.1216, + "step": 2450 + }, + { + "epoch": 1.77, + "learning_rate": 7.559897116444125e-06, + "loss": 0.1264, + "step": 2451 + }, + { + "epoch": 1.77, + "learning_rate": 7.552321063627357e-06, + "loss": 0.1268, + "step": 2452 + }, + { + "epoch": 1.77, + "learning_rate": 7.544746504932477e-06, + "loss": 0.1252, + "step": 2453 + }, + { + "epoch": 1.78, + "learning_rate": 7.537173444983174e-06, + "loss": 0.1287, + "step": 2454 + }, + { + "epoch": 1.78, + "learning_rate": 7.52960188840223e-06, + "loss": 0.1208, + "step": 2455 + }, + { + "epoch": 1.78, + "learning_rate": 7.522031839811497e-06, + "loss": 0.1249, + "step": 2456 + }, + { + "epoch": 1.78, + "learning_rate": 7.514463303831923e-06, + "loss": 0.1164, + "step": 2457 + }, + { + "epoch": 1.78, + "learning_rate": 7.506896285083518e-06, + "loss": 0.1221, + "step": 2458 + }, + { + "epoch": 1.78, + "learning_rate": 7.499330788185373e-06, + "loss": 0.1304, + "step": 2459 + }, + { + "epoch": 1.78, + "learning_rate": 7.49176681775565e-06, + "loss": 0.1224, + "step": 2460 + }, + { + "epoch": 1.78, + "learning_rate": 7.484204378411577e-06, + "loss": 0.1248, + "step": 2461 + }, + { + "epoch": 1.78, + "learning_rate": 7.476643474769449e-06, + "loss": 0.1298, + "step": 2462 + }, + { + "epoch": 1.78, + "learning_rate": 7.46908411144462e-06, + "loss": 0.1269, + "step": 2463 + }, + { + "epoch": 1.78, + "learning_rate": 7.461526293051511e-06, + "loss": 0.1313, + "step": 2464 + }, + { + "epoch": 1.78, + "learning_rate": 7.453970024203591e-06, + "loss": 0.1341, + "step": 2465 + }, + { + "epoch": 1.78, + "learning_rate": 7.4464153095133874e-06, + "loss": 0.122, + "step": 2466 + }, + { + "epoch": 1.78, + "learning_rate": 7.438862153592483e-06, + "loss": 0.134, + "step": 2467 + }, + { + "epoch": 1.79, + "learning_rate": 7.431310561051505e-06, + "loss": 0.1195, + "step": 2468 + }, + { + "epoch": 1.79, + "learning_rate": 7.423760536500124e-06, + "loss": 0.132, + "step": 2469 + }, + { + "epoch": 1.79, + "learning_rate": 7.416212084547057e-06, + "loss": 0.128, + "step": 2470 + }, + { + "epoch": 1.79, + "learning_rate": 7.408665209800059e-06, + "loss": 0.1335, + "step": 2471 + }, + { + "epoch": 1.79, + "learning_rate": 7.401119916865924e-06, + "loss": 0.1206, + "step": 2472 + }, + { + "epoch": 1.79, + "learning_rate": 7.393576210350482e-06, + "loss": 0.1432, + "step": 2473 + }, + { + "epoch": 1.79, + "learning_rate": 7.386034094858588e-06, + "loss": 0.119, + "step": 2474 + }, + { + "epoch": 1.79, + "learning_rate": 7.3784935749941324e-06, + "loss": 0.1295, + "step": 2475 + }, + { + "epoch": 1.79, + "learning_rate": 7.3709546553600286e-06, + "loss": 0.1271, + "step": 2476 + }, + { + "epoch": 1.79, + "learning_rate": 7.3634173405582145e-06, + "loss": 0.121, + "step": 2477 + }, + { + "epoch": 1.79, + "learning_rate": 7.355881635189647e-06, + "loss": 0.1273, + "step": 2478 + }, + { + "epoch": 1.79, + "learning_rate": 7.348347543854302e-06, + "loss": 0.1197, + "step": 2479 + }, + { + "epoch": 1.79, + "learning_rate": 7.3408150711511685e-06, + "loss": 0.1198, + "step": 2480 + }, + { + "epoch": 1.79, + "learning_rate": 7.333284221678248e-06, + "loss": 0.1313, + "step": 2481 + }, + { + "epoch": 1.8, + "learning_rate": 7.325755000032553e-06, + "loss": 0.1363, + "step": 2482 + }, + { + "epoch": 1.8, + "learning_rate": 7.318227410810101e-06, + "loss": 0.1237, + "step": 2483 + }, + { + "epoch": 1.8, + "learning_rate": 7.310701458605911e-06, + "loss": 0.1315, + "step": 2484 + }, + { + "epoch": 1.8, + "learning_rate": 7.303177148014007e-06, + "loss": 0.1361, + "step": 2485 + }, + { + "epoch": 1.8, + "learning_rate": 7.295654483627405e-06, + "loss": 0.1187, + "step": 2486 + }, + { + "epoch": 1.8, + "learning_rate": 7.288133470038124e-06, + "loss": 0.1217, + "step": 2487 + }, + { + "epoch": 1.8, + "learning_rate": 7.280614111837165e-06, + "loss": 0.1321, + "step": 2488 + }, + { + "epoch": 1.8, + "learning_rate": 7.273096413614531e-06, + "loss": 0.132, + "step": 2489 + }, + { + "epoch": 1.8, + "learning_rate": 7.265580379959203e-06, + "loss": 0.1261, + "step": 2490 + }, + { + "epoch": 1.8, + "learning_rate": 7.258066015459146e-06, + "loss": 0.1277, + "step": 2491 + }, + { + "epoch": 1.8, + "learning_rate": 7.250553324701313e-06, + "loss": 0.1269, + "step": 2492 + }, + { + "epoch": 1.8, + "learning_rate": 7.243042312271624e-06, + "loss": 0.1261, + "step": 2493 + }, + { + "epoch": 1.8, + "learning_rate": 7.235532982754986e-06, + "loss": 0.1394, + "step": 2494 + }, + { + "epoch": 1.8, + "learning_rate": 7.228025340735272e-06, + "loss": 0.1248, + "step": 2495 + }, + { + "epoch": 1.81, + "learning_rate": 7.220519390795325e-06, + "loss": 0.1257, + "step": 2496 + }, + { + "epoch": 1.81, + "learning_rate": 7.213015137516959e-06, + "loss": 0.1278, + "step": 2497 + }, + { + "epoch": 1.81, + "learning_rate": 7.20551258548095e-06, + "loss": 0.1241, + "step": 2498 + }, + { + "epoch": 1.81, + "learning_rate": 7.198011739267035e-06, + "loss": 0.1256, + "step": 2499 + }, + { + "epoch": 1.81, + "learning_rate": 7.190512603453907e-06, + "loss": 0.122, + "step": 2500 + }, + { + "epoch": 1.81, + "eval_loss": 0.12310730665922165, + "eval_runtime": 717.8764, + "eval_samples_per_second": 69.65, + "eval_steps_per_second": 2.177, + "step": 2500 + }, + { + "epoch": 1.81, + "learning_rate": 7.183015182619224e-06, + "loss": 0.1294, + "step": 2501 + }, + { + "epoch": 1.81, + "learning_rate": 7.17551948133959e-06, + "loss": 0.1244, + "step": 2502 + }, + { + "epoch": 1.81, + "learning_rate": 7.168025504190558e-06, + "loss": 0.138, + "step": 2503 + }, + { + "epoch": 1.81, + "learning_rate": 7.160533255746634e-06, + "loss": 0.1154, + "step": 2504 + }, + { + "epoch": 1.81, + "learning_rate": 7.153042740581264e-06, + "loss": 0.1346, + "step": 2505 + }, + { + "epoch": 1.81, + "learning_rate": 7.145553963266838e-06, + "loss": 0.1283, + "step": 2506 + }, + { + "epoch": 1.81, + "learning_rate": 7.138066928374687e-06, + "loss": 0.1306, + "step": 2507 + }, + { + "epoch": 1.81, + "learning_rate": 7.130581640475076e-06, + "loss": 0.1188, + "step": 2508 + }, + { + "epoch": 1.81, + "learning_rate": 7.123098104137203e-06, + "loss": 0.1344, + "step": 2509 + }, + { + "epoch": 1.82, + "learning_rate": 7.1156163239291995e-06, + "loss": 0.1354, + "step": 2510 + }, + { + "epoch": 1.82, + "learning_rate": 7.108136304418121e-06, + "loss": 0.1285, + "step": 2511 + }, + { + "epoch": 1.82, + "learning_rate": 7.100658050169953e-06, + "loss": 0.1306, + "step": 2512 + }, + { + "epoch": 1.82, + "learning_rate": 7.0931815657496005e-06, + "loss": 0.1217, + "step": 2513 + }, + { + "epoch": 1.82, + "learning_rate": 7.085706855720888e-06, + "loss": 0.1177, + "step": 2514 + }, + { + "epoch": 1.82, + "learning_rate": 7.078233924646559e-06, + "loss": 0.1314, + "step": 2515 + }, + { + "epoch": 1.82, + "learning_rate": 7.070762777088269e-06, + "loss": 0.1211, + "step": 2516 + }, + { + "epoch": 1.82, + "learning_rate": 7.063293417606585e-06, + "loss": 0.1259, + "step": 2517 + }, + { + "epoch": 1.82, + "learning_rate": 7.05582585076098e-06, + "loss": 0.1245, + "step": 2518 + }, + { + "epoch": 1.82, + "learning_rate": 7.048360081109844e-06, + "loss": 0.1483, + "step": 2519 + }, + { + "epoch": 1.82, + "learning_rate": 7.040896113210456e-06, + "loss": 0.1284, + "step": 2520 + }, + { + "epoch": 1.82, + "learning_rate": 7.033433951619002e-06, + "loss": 0.1282, + "step": 2521 + }, + { + "epoch": 1.82, + "learning_rate": 7.0259736008905655e-06, + "loss": 0.1247, + "step": 2522 + }, + { + "epoch": 1.82, + "learning_rate": 7.018515065579121e-06, + "loss": 0.1328, + "step": 2523 + }, + { + "epoch": 1.83, + "learning_rate": 7.011058350237538e-06, + "loss": 0.1241, + "step": 2524 + }, + { + "epoch": 1.83, + "learning_rate": 7.003603459417576e-06, + "loss": 0.1213, + "step": 2525 + }, + { + "epoch": 1.83, + "learning_rate": 6.996150397669876e-06, + "loss": 0.1239, + "step": 2526 + }, + { + "epoch": 1.83, + "learning_rate": 6.988699169543969e-06, + "loss": 0.1252, + "step": 2527 + }, + { + "epoch": 1.83, + "learning_rate": 6.981249779588261e-06, + "loss": 0.1209, + "step": 2528 + }, + { + "epoch": 1.83, + "learning_rate": 6.973802232350038e-06, + "loss": 0.1251, + "step": 2529 + }, + { + "epoch": 1.83, + "learning_rate": 6.966356532375462e-06, + "loss": 0.1285, + "step": 2530 + }, + { + "epoch": 1.83, + "learning_rate": 6.958912684209566e-06, + "loss": 0.1317, + "step": 2531 + }, + { + "epoch": 1.83, + "learning_rate": 6.951470692396256e-06, + "loss": 0.1319, + "step": 2532 + }, + { + "epoch": 1.83, + "learning_rate": 6.9440305614782985e-06, + "loss": 0.1165, + "step": 2533 + }, + { + "epoch": 1.83, + "learning_rate": 6.936592295997328e-06, + "loss": 0.1219, + "step": 2534 + }, + { + "epoch": 1.83, + "learning_rate": 6.929155900493843e-06, + "loss": 0.1253, + "step": 2535 + }, + { + "epoch": 1.83, + "learning_rate": 6.921721379507197e-06, + "loss": 0.1243, + "step": 2536 + }, + { + "epoch": 1.84, + "learning_rate": 6.914288737575597e-06, + "loss": 0.1317, + "step": 2537 + }, + { + "epoch": 1.84, + "learning_rate": 6.906857979236109e-06, + "loss": 0.1251, + "step": 2538 + }, + { + "epoch": 1.84, + "learning_rate": 6.899429109024644e-06, + "loss": 0.1231, + "step": 2539 + }, + { + "epoch": 1.84, + "learning_rate": 6.8920021314759635e-06, + "loss": 0.1282, + "step": 2540 + }, + { + "epoch": 1.84, + "learning_rate": 6.884577051123671e-06, + "loss": 0.1274, + "step": 2541 + }, + { + "epoch": 1.84, + "learning_rate": 6.877153872500215e-06, + "loss": 0.1227, + "step": 2542 + }, + { + "epoch": 1.84, + "learning_rate": 6.869732600136881e-06, + "loss": 0.1357, + "step": 2543 + }, + { + "epoch": 1.84, + "learning_rate": 6.862313238563791e-06, + "loss": 0.1344, + "step": 2544 + }, + { + "epoch": 1.84, + "learning_rate": 6.854895792309903e-06, + "loss": 0.1332, + "step": 2545 + }, + { + "epoch": 1.84, + "learning_rate": 6.847480265903002e-06, + "loss": 0.1177, + "step": 2546 + }, + { + "epoch": 1.84, + "learning_rate": 6.840066663869701e-06, + "loss": 0.1196, + "step": 2547 + }, + { + "epoch": 1.84, + "learning_rate": 6.832654990735446e-06, + "loss": 0.1283, + "step": 2548 + }, + { + "epoch": 1.84, + "learning_rate": 6.825245251024497e-06, + "loss": 0.1364, + "step": 2549 + }, + { + "epoch": 1.84, + "learning_rate": 6.817837449259938e-06, + "loss": 0.1233, + "step": 2550 + }, + { + "epoch": 1.85, + "learning_rate": 6.810431589963668e-06, + "loss": 0.1376, + "step": 2551 + }, + { + "epoch": 1.85, + "learning_rate": 6.803027677656398e-06, + "loss": 0.1195, + "step": 2552 + }, + { + "epoch": 1.85, + "learning_rate": 6.795625716857659e-06, + "loss": 0.1303, + "step": 2553 + }, + { + "epoch": 1.85, + "learning_rate": 6.7882257120857805e-06, + "loss": 0.1185, + "step": 2554 + }, + { + "epoch": 1.85, + "learning_rate": 6.7808276678579055e-06, + "loss": 0.119, + "step": 2555 + }, + { + "epoch": 1.85, + "learning_rate": 6.773431588689976e-06, + "loss": 0.1215, + "step": 2556 + }, + { + "epoch": 1.85, + "learning_rate": 6.7660374790967365e-06, + "loss": 0.1393, + "step": 2557 + }, + { + "epoch": 1.85, + "learning_rate": 6.75864534359173e-06, + "loss": 0.1244, + "step": 2558 + }, + { + "epoch": 1.85, + "learning_rate": 6.751255186687291e-06, + "loss": 0.1313, + "step": 2559 + }, + { + "epoch": 1.85, + "learning_rate": 6.743867012894551e-06, + "loss": 0.136, + "step": 2560 + }, + { + "epoch": 1.85, + "learning_rate": 6.736480826723425e-06, + "loss": 0.1299, + "step": 2561 + }, + { + "epoch": 1.85, + "learning_rate": 6.7290966326826215e-06, + "loss": 0.1237, + "step": 2562 + }, + { + "epoch": 1.85, + "learning_rate": 6.721714435279626e-06, + "loss": 0.1204, + "step": 2563 + }, + { + "epoch": 1.85, + "learning_rate": 6.714334239020712e-06, + "loss": 0.1265, + "step": 2564 + }, + { + "epoch": 1.86, + "learning_rate": 6.706956048410926e-06, + "loss": 0.125, + "step": 2565 + }, + { + "epoch": 1.86, + "learning_rate": 6.699579867954094e-06, + "loss": 0.1317, + "step": 2566 + }, + { + "epoch": 1.86, + "learning_rate": 6.692205702152812e-06, + "loss": 0.1394, + "step": 2567 + }, + { + "epoch": 1.86, + "learning_rate": 6.684833555508446e-06, + "loss": 0.1212, + "step": 2568 + }, + { + "epoch": 1.86, + "learning_rate": 6.677463432521134e-06, + "loss": 0.1256, + "step": 2569 + }, + { + "epoch": 1.86, + "learning_rate": 6.670095337689776e-06, + "loss": 0.1181, + "step": 2570 + }, + { + "epoch": 1.86, + "learning_rate": 6.6627292755120324e-06, + "loss": 0.1215, + "step": 2571 + }, + { + "epoch": 1.86, + "learning_rate": 6.655365250484325e-06, + "loss": 0.1295, + "step": 2572 + }, + { + "epoch": 1.86, + "learning_rate": 6.6480032671018276e-06, + "loss": 0.1269, + "step": 2573 + }, + { + "epoch": 1.86, + "learning_rate": 6.640643329858476e-06, + "loss": 0.1341, + "step": 2574 + }, + { + "epoch": 1.86, + "learning_rate": 6.633285443246951e-06, + "loss": 0.1211, + "step": 2575 + }, + { + "epoch": 1.86, + "learning_rate": 6.625929611758679e-06, + "loss": 0.1183, + "step": 2576 + }, + { + "epoch": 1.86, + "learning_rate": 6.6185758398838405e-06, + "loss": 0.1335, + "step": 2577 + }, + { + "epoch": 1.86, + "learning_rate": 6.611224132111351e-06, + "loss": 0.1322, + "step": 2578 + }, + { + "epoch": 1.87, + "learning_rate": 6.603874492928875e-06, + "loss": 0.1305, + "step": 2579 + }, + { + "epoch": 1.87, + "learning_rate": 6.596526926822802e-06, + "loss": 0.1332, + "step": 2580 + }, + { + "epoch": 1.87, + "learning_rate": 6.589181438278265e-06, + "loss": 0.1188, + "step": 2581 + }, + { + "epoch": 1.87, + "learning_rate": 6.581838031779126e-06, + "loss": 0.1213, + "step": 2582 + }, + { + "epoch": 1.87, + "learning_rate": 6.574496711807976e-06, + "loss": 0.1373, + "step": 2583 + }, + { + "epoch": 1.87, + "learning_rate": 6.5671574828461335e-06, + "loss": 0.1229, + "step": 2584 + }, + { + "epoch": 1.87, + "learning_rate": 6.55982034937364e-06, + "loss": 0.1299, + "step": 2585 + }, + { + "epoch": 1.87, + "learning_rate": 6.552485315869256e-06, + "loss": 0.1273, + "step": 2586 + }, + { + "epoch": 1.87, + "learning_rate": 6.545152386810463e-06, + "loss": 0.1259, + "step": 2587 + }, + { + "epoch": 1.87, + "learning_rate": 6.537821566673455e-06, + "loss": 0.1319, + "step": 2588 + }, + { + "epoch": 1.87, + "learning_rate": 6.5304928599331415e-06, + "loss": 0.1286, + "step": 2589 + }, + { + "epoch": 1.87, + "learning_rate": 6.523166271063142e-06, + "loss": 0.1365, + "step": 2590 + }, + { + "epoch": 1.87, + "learning_rate": 6.51584180453578e-06, + "loss": 0.129, + "step": 2591 + }, + { + "epoch": 1.87, + "learning_rate": 6.508519464822085e-06, + "loss": 0.1332, + "step": 2592 + }, + { + "epoch": 1.88, + "learning_rate": 6.501199256391791e-06, + "loss": 0.1231, + "step": 2593 + }, + { + "epoch": 1.88, + "learning_rate": 6.493881183713328e-06, + "loss": 0.1302, + "step": 2594 + }, + { + "epoch": 1.88, + "learning_rate": 6.486565251253821e-06, + "loss": 0.1223, + "step": 2595 + }, + { + "epoch": 1.88, + "learning_rate": 6.479251463479093e-06, + "loss": 0.1317, + "step": 2596 + }, + { + "epoch": 1.88, + "learning_rate": 6.4719398248536545e-06, + "loss": 0.1292, + "step": 2597 + }, + { + "epoch": 1.88, + "learning_rate": 6.464630339840703e-06, + "loss": 0.1265, + "step": 2598 + }, + { + "epoch": 1.88, + "learning_rate": 6.4573230129021265e-06, + "loss": 0.1231, + "step": 2599 + }, + { + "epoch": 1.88, + "learning_rate": 6.450017848498491e-06, + "loss": 0.115, + "step": 2600 + }, + { + "epoch": 1.88, + "learning_rate": 6.442714851089042e-06, + "loss": 0.1364, + "step": 2601 + }, + { + "epoch": 1.88, + "learning_rate": 6.4354140251317075e-06, + "loss": 0.1332, + "step": 2602 + }, + { + "epoch": 1.88, + "learning_rate": 6.428115375083086e-06, + "loss": 0.1316, + "step": 2603 + }, + { + "epoch": 1.88, + "learning_rate": 6.420818905398447e-06, + "loss": 0.1169, + "step": 2604 + }, + { + "epoch": 1.88, + "learning_rate": 6.413524620531731e-06, + "loss": 0.1257, + "step": 2605 + }, + { + "epoch": 1.88, + "learning_rate": 6.406232524935543e-06, + "loss": 0.1208, + "step": 2606 + }, + { + "epoch": 1.89, + "learning_rate": 6.3989426230611574e-06, + "loss": 0.1269, + "step": 2607 + }, + { + "epoch": 1.89, + "learning_rate": 6.3916549193585005e-06, + "loss": 0.1339, + "step": 2608 + }, + { + "epoch": 1.89, + "learning_rate": 6.384369418276164e-06, + "loss": 0.1265, + "step": 2609 + }, + { + "epoch": 1.89, + "learning_rate": 6.377086124261389e-06, + "loss": 0.1286, + "step": 2610 + }, + { + "epoch": 1.89, + "learning_rate": 6.369805041760078e-06, + "loss": 0.1223, + "step": 2611 + }, + { + "epoch": 1.89, + "learning_rate": 6.362526175216771e-06, + "loss": 0.1218, + "step": 2612 + }, + { + "epoch": 1.89, + "learning_rate": 6.3552495290746695e-06, + "loss": 0.1191, + "step": 2613 + }, + { + "epoch": 1.89, + "learning_rate": 6.34797510777561e-06, + "loss": 0.1229, + "step": 2614 + }, + { + "epoch": 1.89, + "learning_rate": 6.340702915760073e-06, + "loss": 0.1166, + "step": 2615 + }, + { + "epoch": 1.89, + "learning_rate": 6.33343295746718e-06, + "loss": 0.1221, + "step": 2616 + }, + { + "epoch": 1.89, + "learning_rate": 6.326165237334687e-06, + "loss": 0.1281, + "step": 2617 + }, + { + "epoch": 1.89, + "learning_rate": 6.318899759798986e-06, + "loss": 0.127, + "step": 2618 + }, + { + "epoch": 1.89, + "learning_rate": 6.311636529295096e-06, + "loss": 0.1225, + "step": 2619 + }, + { + "epoch": 1.9, + "learning_rate": 6.304375550256669e-06, + "loss": 0.1375, + "step": 2620 + }, + { + "epoch": 1.9, + "learning_rate": 6.297116827115979e-06, + "loss": 0.1274, + "step": 2621 + }, + { + "epoch": 1.9, + "learning_rate": 6.289860364303927e-06, + "loss": 0.1258, + "step": 2622 + }, + { + "epoch": 1.9, + "learning_rate": 6.282606166250029e-06, + "loss": 0.1207, + "step": 2623 + }, + { + "epoch": 1.9, + "learning_rate": 6.2753542373824235e-06, + "loss": 0.1245, + "step": 2624 + }, + { + "epoch": 1.9, + "learning_rate": 6.2681045821278605e-06, + "loss": 0.1298, + "step": 2625 + }, + { + "epoch": 1.9, + "learning_rate": 6.260857204911705e-06, + "loss": 0.1244, + "step": 2626 + }, + { + "epoch": 1.9, + "learning_rate": 6.253612110157927e-06, + "loss": 0.1376, + "step": 2627 + }, + { + "epoch": 1.9, + "learning_rate": 6.246369302289108e-06, + "loss": 0.1212, + "step": 2628 + }, + { + "epoch": 1.9, + "learning_rate": 6.239128785726431e-06, + "loss": 0.1278, + "step": 2629 + }, + { + "epoch": 1.9, + "learning_rate": 6.23189056488968e-06, + "loss": 0.1202, + "step": 2630 + }, + { + "epoch": 1.9, + "learning_rate": 6.224654644197241e-06, + "loss": 0.1308, + "step": 2631 + }, + { + "epoch": 1.9, + "learning_rate": 6.217421028066091e-06, + "loss": 0.124, + "step": 2632 + }, + { + "epoch": 1.9, + "learning_rate": 6.210189720911803e-06, + "loss": 0.1248, + "step": 2633 + }, + { + "epoch": 1.91, + "learning_rate": 6.20296072714854e-06, + "loss": 0.1197, + "step": 2634 + }, + { + "epoch": 1.91, + "learning_rate": 6.195734051189051e-06, + "loss": 0.119, + "step": 2635 + }, + { + "epoch": 1.91, + "learning_rate": 6.188509697444678e-06, + "loss": 0.1326, + "step": 2636 + }, + { + "epoch": 1.91, + "learning_rate": 6.181287670325337e-06, + "loss": 0.1252, + "step": 2637 + }, + { + "epoch": 1.91, + "learning_rate": 6.174067974239526e-06, + "loss": 0.12, + "step": 2638 + }, + { + "epoch": 1.91, + "learning_rate": 6.1668506135943195e-06, + "loss": 0.1281, + "step": 2639 + }, + { + "epoch": 1.91, + "learning_rate": 6.15963559279537e-06, + "loss": 0.1185, + "step": 2640 + }, + { + "epoch": 1.91, + "learning_rate": 6.152422916246896e-06, + "loss": 0.1154, + "step": 2641 + }, + { + "epoch": 1.91, + "learning_rate": 6.14521258835169e-06, + "loss": 0.1244, + "step": 2642 + }, + { + "epoch": 1.91, + "learning_rate": 6.138004613511109e-06, + "loss": 0.1259, + "step": 2643 + }, + { + "epoch": 1.91, + "learning_rate": 6.130798996125074e-06, + "loss": 0.1185, + "step": 2644 + }, + { + "epoch": 1.91, + "learning_rate": 6.123595740592063e-06, + "loss": 0.1374, + "step": 2645 + }, + { + "epoch": 1.91, + "learning_rate": 6.11639485130912e-06, + "loss": 0.118, + "step": 2646 + }, + { + "epoch": 1.91, + "learning_rate": 6.109196332671839e-06, + "loss": 0.1256, + "step": 2647 + }, + { + "epoch": 1.92, + "learning_rate": 6.102000189074369e-06, + "loss": 0.1308, + "step": 2648 + }, + { + "epoch": 1.92, + "learning_rate": 6.094806424909408e-06, + "loss": 0.1203, + "step": 2649 + }, + { + "epoch": 1.92, + "learning_rate": 6.0876150445682015e-06, + "loss": 0.1215, + "step": 2650 + }, + { + "epoch": 1.92, + "learning_rate": 6.0804260524405405e-06, + "loss": 0.1338, + "step": 2651 + }, + { + "epoch": 1.92, + "learning_rate": 6.073239452914758e-06, + "loss": 0.1137, + "step": 2652 + }, + { + "epoch": 1.92, + "learning_rate": 6.06605525037773e-06, + "loss": 0.1217, + "step": 2653 + }, + { + "epoch": 1.92, + "learning_rate": 6.0588734492148595e-06, + "loss": 0.1264, + "step": 2654 + }, + { + "epoch": 1.92, + "learning_rate": 6.051694053810094e-06, + "loss": 0.1297, + "step": 2655 + }, + { + "epoch": 1.92, + "learning_rate": 6.04451706854591e-06, + "loss": 0.1178, + "step": 2656 + }, + { + "epoch": 1.92, + "learning_rate": 6.037342497803307e-06, + "loss": 0.1229, + "step": 2657 + }, + { + "epoch": 1.92, + "learning_rate": 6.030170345961819e-06, + "loss": 0.1281, + "step": 2658 + }, + { + "epoch": 1.92, + "learning_rate": 6.023000617399496e-06, + "loss": 0.1231, + "step": 2659 + }, + { + "epoch": 1.92, + "learning_rate": 6.015833316492914e-06, + "loss": 0.1245, + "step": 2660 + }, + { + "epoch": 1.92, + "learning_rate": 6.008668447617165e-06, + "loss": 0.1139, + "step": 2661 + }, + { + "epoch": 1.93, + "learning_rate": 6.001506015145858e-06, + "loss": 0.1332, + "step": 2662 + }, + { + "epoch": 1.93, + "learning_rate": 5.994346023451112e-06, + "loss": 0.1322, + "step": 2663 + }, + { + "epoch": 1.93, + "learning_rate": 5.987188476903551e-06, + "loss": 0.1264, + "step": 2664 + }, + { + "epoch": 1.93, + "learning_rate": 5.9800333798723275e-06, + "loss": 0.1286, + "step": 2665 + }, + { + "epoch": 1.93, + "learning_rate": 5.972880736725074e-06, + "loss": 0.1362, + "step": 2666 + }, + { + "epoch": 1.93, + "learning_rate": 5.965730551827938e-06, + "loss": 0.1209, + "step": 2667 + }, + { + "epoch": 1.93, + "learning_rate": 5.958582829545564e-06, + "loss": 0.1279, + "step": 2668 + }, + { + "epoch": 1.93, + "learning_rate": 5.951437574241092e-06, + "loss": 0.1202, + "step": 2669 + }, + { + "epoch": 1.93, + "learning_rate": 5.944294790276157e-06, + "loss": 0.1244, + "step": 2670 + }, + { + "epoch": 1.93, + "learning_rate": 5.937154482010887e-06, + "loss": 0.1261, + "step": 2671 + }, + { + "epoch": 1.93, + "learning_rate": 5.930016653803896e-06, + "loss": 0.1277, + "step": 2672 + }, + { + "epoch": 1.93, + "learning_rate": 5.922881310012285e-06, + "loss": 0.1167, + "step": 2673 + }, + { + "epoch": 1.93, + "learning_rate": 5.915748454991641e-06, + "loss": 0.1408, + "step": 2674 + }, + { + "epoch": 1.93, + "learning_rate": 5.908618093096027e-06, + "loss": 0.1245, + "step": 2675 + }, + { + "epoch": 1.94, + "learning_rate": 5.901490228677989e-06, + "loss": 0.1261, + "step": 2676 + }, + { + "epoch": 1.94, + "learning_rate": 5.894364866088545e-06, + "loss": 0.1331, + "step": 2677 + }, + { + "epoch": 1.94, + "learning_rate": 5.887242009677186e-06, + "loss": 0.1255, + "step": 2678 + }, + { + "epoch": 1.94, + "learning_rate": 5.880121663791876e-06, + "loss": 0.1214, + "step": 2679 + }, + { + "epoch": 1.94, + "learning_rate": 5.873003832779045e-06, + "loss": 0.1297, + "step": 2680 + }, + { + "epoch": 1.94, + "learning_rate": 5.865888520983587e-06, + "loss": 0.1116, + "step": 2681 + }, + { + "epoch": 1.94, + "learning_rate": 5.8587757327488595e-06, + "loss": 0.1161, + "step": 2682 + }, + { + "epoch": 1.94, + "learning_rate": 5.851665472416678e-06, + "loss": 0.127, + "step": 2683 + }, + { + "epoch": 1.94, + "learning_rate": 5.844557744327316e-06, + "loss": 0.1261, + "step": 2684 + }, + { + "epoch": 1.94, + "learning_rate": 5.837452552819505e-06, + "loss": 0.1233, + "step": 2685 + }, + { + "epoch": 1.94, + "learning_rate": 5.830349902230414e-06, + "loss": 0.1236, + "step": 2686 + }, + { + "epoch": 1.94, + "learning_rate": 5.8232497968956825e-06, + "loss": 0.1191, + "step": 2687 + }, + { + "epoch": 1.94, + "learning_rate": 5.816152241149378e-06, + "loss": 0.1206, + "step": 2688 + }, + { + "epoch": 1.95, + "learning_rate": 5.809057239324022e-06, + "loss": 0.1248, + "step": 2689 + }, + { + "epoch": 1.95, + "learning_rate": 5.8019647957505694e-06, + "loss": 0.1281, + "step": 2690 + }, + { + "epoch": 1.95, + "learning_rate": 5.794874914758422e-06, + "loss": 0.1261, + "step": 2691 + }, + { + "epoch": 1.95, + "learning_rate": 5.787787600675414e-06, + "loss": 0.1165, + "step": 2692 + }, + { + "epoch": 1.95, + "learning_rate": 5.780702857827805e-06, + "loss": 0.1245, + "step": 2693 + }, + { + "epoch": 1.95, + "learning_rate": 5.773620690540298e-06, + "loss": 0.1266, + "step": 2694 + }, + { + "epoch": 1.95, + "learning_rate": 5.766541103136019e-06, + "loss": 0.117, + "step": 2695 + }, + { + "epoch": 1.95, + "learning_rate": 5.759464099936513e-06, + "loss": 0.1296, + "step": 2696 + }, + { + "epoch": 1.95, + "learning_rate": 5.752389685261757e-06, + "loss": 0.1201, + "step": 2697 + }, + { + "epoch": 1.95, + "learning_rate": 5.745317863430135e-06, + "loss": 0.1224, + "step": 2698 + }, + { + "epoch": 1.95, + "learning_rate": 5.738248638758468e-06, + "loss": 0.1318, + "step": 2699 + }, + { + "epoch": 1.95, + "learning_rate": 5.731182015561969e-06, + "loss": 0.122, + "step": 2700 + }, + { + "epoch": 1.95, + "learning_rate": 5.7241179981542805e-06, + "loss": 0.1275, + "step": 2701 + }, + { + "epoch": 1.95, + "learning_rate": 5.717056590847451e-06, + "loss": 0.131, + "step": 2702 + }, + { + "epoch": 1.96, + "learning_rate": 5.7099977979519235e-06, + "loss": 0.1234, + "step": 2703 + }, + { + "epoch": 1.96, + "learning_rate": 5.702941623776566e-06, + "loss": 0.1302, + "step": 2704 + }, + { + "epoch": 1.96, + "learning_rate": 5.695888072628624e-06, + "loss": 0.1261, + "step": 2705 + }, + { + "epoch": 1.96, + "learning_rate": 5.688837148813764e-06, + "loss": 0.1336, + "step": 2706 + }, + { + "epoch": 1.96, + "learning_rate": 5.68178885663603e-06, + "loss": 0.1202, + "step": 2707 + }, + { + "epoch": 1.96, + "learning_rate": 5.674743200397879e-06, + "loss": 0.1208, + "step": 2708 + }, + { + "epoch": 1.96, + "learning_rate": 5.667700184400139e-06, + "loss": 0.1168, + "step": 2709 + }, + { + "epoch": 1.96, + "learning_rate": 5.660659812942043e-06, + "loss": 0.1223, + "step": 2710 + }, + { + "epoch": 1.96, + "learning_rate": 5.653622090321196e-06, + "loss": 0.1231, + "step": 2711 + }, + { + "epoch": 1.96, + "learning_rate": 5.646587020833598e-06, + "loss": 0.1282, + "step": 2712 + }, + { + "epoch": 1.96, + "learning_rate": 5.63955460877362e-06, + "loss": 0.1125, + "step": 2713 + }, + { + "epoch": 1.96, + "learning_rate": 5.632524858434019e-06, + "loss": 0.144, + "step": 2714 + }, + { + "epoch": 1.96, + "learning_rate": 5.625497774105917e-06, + "loss": 0.1194, + "step": 2715 + }, + { + "epoch": 1.96, + "learning_rate": 5.618473360078822e-06, + "loss": 0.1185, + "step": 2716 + }, + { + "epoch": 1.97, + "learning_rate": 5.611451620640598e-06, + "loss": 0.1291, + "step": 2717 + }, + { + "epoch": 1.97, + "learning_rate": 5.604432560077489e-06, + "loss": 0.1258, + "step": 2718 + }, + { + "epoch": 1.97, + "learning_rate": 5.597416182674091e-06, + "loss": 0.121, + "step": 2719 + }, + { + "epoch": 1.97, + "learning_rate": 5.590402492713377e-06, + "loss": 0.126, + "step": 2720 + }, + { + "epoch": 1.97, + "learning_rate": 5.583391494476664e-06, + "loss": 0.122, + "step": 2721 + }, + { + "epoch": 1.97, + "learning_rate": 5.576383192243639e-06, + "loss": 0.1307, + "step": 2722 + }, + { + "epoch": 1.97, + "learning_rate": 5.569377590292329e-06, + "loss": 0.1179, + "step": 2723 + }, + { + "epoch": 1.97, + "learning_rate": 5.562374692899135e-06, + "loss": 0.1156, + "step": 2724 + }, + { + "epoch": 1.97, + "learning_rate": 5.555374504338782e-06, + "loss": 0.1287, + "step": 2725 + }, + { + "epoch": 1.97, + "learning_rate": 5.548377028884359e-06, + "loss": 0.1286, + "step": 2726 + }, + { + "epoch": 1.97, + "learning_rate": 5.54138227080729e-06, + "loss": 0.1342, + "step": 2727 + }, + { + "epoch": 1.97, + "learning_rate": 5.534390234377346e-06, + "loss": 0.1152, + "step": 2728 + }, + { + "epoch": 1.97, + "learning_rate": 5.527400923862627e-06, + "loss": 0.1299, + "step": 2729 + }, + { + "epoch": 1.97, + "learning_rate": 5.520414343529583e-06, + "loss": 0.118, + "step": 2730 + }, + { + "epoch": 1.98, + "learning_rate": 5.513430497642984e-06, + "loss": 0.1266, + "step": 2731 + }, + { + "epoch": 1.98, + "learning_rate": 5.5064493904659445e-06, + "loss": 0.1176, + "step": 2732 + }, + { + "epoch": 1.98, + "learning_rate": 5.49947102625989e-06, + "loss": 0.1304, + "step": 2733 + }, + { + "epoch": 1.98, + "learning_rate": 5.4924954092845905e-06, + "loss": 0.132, + "step": 2734 + }, + { + "epoch": 1.98, + "learning_rate": 5.485522543798122e-06, + "loss": 0.1243, + "step": 2735 + }, + { + "epoch": 1.98, + "learning_rate": 5.478552434056892e-06, + "loss": 0.1278, + "step": 2736 + }, + { + "epoch": 1.98, + "learning_rate": 5.471585084315627e-06, + "loss": 0.1263, + "step": 2737 + }, + { + "epoch": 1.98, + "learning_rate": 5.464620498827355e-06, + "loss": 0.1233, + "step": 2738 + }, + { + "epoch": 1.98, + "learning_rate": 5.457658681843436e-06, + "loss": 0.1355, + "step": 2739 + }, + { + "epoch": 1.98, + "learning_rate": 5.450699637613522e-06, + "loss": 0.1299, + "step": 2740 + }, + { + "epoch": 1.98, + "learning_rate": 5.443743370385584e-06, + "loss": 0.1216, + "step": 2741 + }, + { + "epoch": 1.98, + "learning_rate": 5.436789884405892e-06, + "loss": 0.1198, + "step": 2742 + }, + { + "epoch": 1.98, + "learning_rate": 5.429839183919025e-06, + "loss": 0.1331, + "step": 2743 + }, + { + "epoch": 1.98, + "learning_rate": 5.422891273167851e-06, + "loss": 0.1216, + "step": 2744 + }, + { + "epoch": 1.99, + "learning_rate": 5.415946156393548e-06, + "loss": 0.1174, + "step": 2745 + }, + { + "epoch": 1.99, + "learning_rate": 5.409003837835574e-06, + "loss": 0.1277, + "step": 2746 + }, + { + "epoch": 1.99, + "learning_rate": 5.402064321731695e-06, + "loss": 0.1257, + "step": 2747 + }, + { + "epoch": 1.99, + "learning_rate": 5.39512761231795e-06, + "loss": 0.1238, + "step": 2748 + }, + { + "epoch": 1.99, + "learning_rate": 5.38819371382868e-06, + "loss": 0.1272, + "step": 2749 + }, + { + "epoch": 1.99, + "learning_rate": 5.381262630496496e-06, + "loss": 0.136, + "step": 2750 + }, + { + "epoch": 1.99, + "learning_rate": 5.374334366552305e-06, + "loss": 0.117, + "step": 2751 + }, + { + "epoch": 1.99, + "learning_rate": 5.367408926225272e-06, + "loss": 0.118, + "step": 2752 + }, + { + "epoch": 1.99, + "learning_rate": 5.360486313742869e-06, + "loss": 0.1242, + "step": 2753 + }, + { + "epoch": 1.99, + "learning_rate": 5.353566533330812e-06, + "loss": 0.1219, + "step": 2754 + }, + { + "epoch": 1.99, + "learning_rate": 5.346649589213108e-06, + "loss": 0.1365, + "step": 2755 + }, + { + "epoch": 1.99, + "learning_rate": 5.339735485612021e-06, + "loss": 0.1224, + "step": 2756 + }, + { + "epoch": 1.99, + "learning_rate": 5.3328242267480915e-06, + "loss": 0.139, + "step": 2757 + }, + { + "epoch": 1.99, + "learning_rate": 5.325915816840108e-06, + "loss": 0.1351, + "step": 2758 + }, + { + "epoch": 2.0, + "learning_rate": 5.3190102601051386e-06, + "loss": 0.125, + "step": 2759 + }, + { + "epoch": 2.0, + "learning_rate": 5.312107560758492e-06, + "loss": 0.1299, + "step": 2760 + }, + { + "epoch": 2.0, + "learning_rate": 5.305207723013752e-06, + "loss": 0.1353, + "step": 2761 + }, + { + "epoch": 2.0, + "learning_rate": 5.298310751082733e-06, + "loss": 0.125, + "step": 2762 + }, + { + "epoch": 2.0, + "learning_rate": 5.2914166491755234e-06, + "loss": 0.1274, + "step": 2763 + }, + { + "epoch": 2.0, + "learning_rate": 5.2845254215004395e-06, + "loss": 0.1318, + "step": 2764 + }, + { + "epoch": 2.0, + "learning_rate": 5.2776370722640565e-06, + "loss": 0.1253, + "step": 2765 + }, + { + "epoch": 2.0, + "learning_rate": 5.270751605671185e-06, + "loss": 0.1282, + "step": 2766 + }, + { + "epoch": 2.0, + "learning_rate": 5.263869025924884e-06, + "loss": 0.1237, + "step": 2767 + }, + { + "epoch": 2.0, + "learning_rate": 5.256989337226437e-06, + "loss": 0.1192, + "step": 2768 + }, + { + "epoch": 2.0, + "learning_rate": 5.250112543775374e-06, + "loss": 0.1263, + "step": 2769 + }, + { + "epoch": 2.0, + "learning_rate": 5.2432386497694625e-06, + "loss": 0.1253, + "step": 2770 + }, + { + "epoch": 2.0, + "learning_rate": 5.236367659404679e-06, + "loss": 0.1232, + "step": 2771 + }, + { + "epoch": 2.01, + "learning_rate": 5.229499576875253e-06, + "loss": 0.1241, + "step": 2772 + }, + { + "epoch": 2.01, + "learning_rate": 5.222634406373616e-06, + "loss": 0.1189, + "step": 2773 + }, + { + "epoch": 2.01, + "learning_rate": 5.215772152090443e-06, + "loss": 0.1282, + "step": 2774 + }, + { + "epoch": 2.01, + "learning_rate": 5.2089128182146076e-06, + "loss": 0.1296, + "step": 2775 + }, + { + "epoch": 2.01, + "learning_rate": 5.202056408933222e-06, + "loss": 0.1215, + "step": 2776 + }, + { + "epoch": 2.01, + "learning_rate": 5.195202928431593e-06, + "loss": 0.1212, + "step": 2777 + }, + { + "epoch": 2.01, + "learning_rate": 5.188352380893258e-06, + "loss": 0.1155, + "step": 2778 + }, + { + "epoch": 2.01, + "learning_rate": 5.181504770499948e-06, + "loss": 0.1189, + "step": 2779 + }, + { + "epoch": 2.01, + "learning_rate": 5.1746601014316156e-06, + "loss": 0.1339, + "step": 2780 + }, + { + "epoch": 2.01, + "learning_rate": 5.167818377866403e-06, + "loss": 0.1142, + "step": 2781 + }, + { + "epoch": 2.01, + "learning_rate": 5.160979603980667e-06, + "loss": 0.1275, + "step": 2782 + }, + { + "epoch": 2.01, + "learning_rate": 5.154143783948959e-06, + "loss": 0.1235, + "step": 2783 + }, + { + "epoch": 2.01, + "learning_rate": 5.147310921944031e-06, + "loss": 0.1292, + "step": 2784 + }, + { + "epoch": 2.01, + "learning_rate": 5.140481022136817e-06, + "loss": 0.1137, + "step": 2785 + }, + { + "epoch": 2.02, + "learning_rate": 5.13365408869646e-06, + "loss": 0.1322, + "step": 2786 + }, + { + "epoch": 2.02, + "learning_rate": 5.126830125790277e-06, + "loss": 0.1297, + "step": 2787 + }, + { + "epoch": 2.02, + "learning_rate": 5.120009137583786e-06, + "loss": 0.1337, + "step": 2788 + }, + { + "epoch": 2.02, + "learning_rate": 5.113191128240672e-06, + "loss": 0.1195, + "step": 2789 + }, + { + "epoch": 2.02, + "learning_rate": 5.106376101922821e-06, + "loss": 0.1287, + "step": 2790 + }, + { + "epoch": 2.02, + "learning_rate": 5.099564062790281e-06, + "loss": 0.1253, + "step": 2791 + }, + { + "epoch": 2.02, + "learning_rate": 5.09275501500129e-06, + "loss": 0.1236, + "step": 2792 + }, + { + "epoch": 2.02, + "learning_rate": 5.085948962712246e-06, + "loss": 0.1194, + "step": 2793 + }, + { + "epoch": 2.02, + "learning_rate": 5.0791459100777365e-06, + "loss": 0.1274, + "step": 2794 + }, + { + "epoch": 2.02, + "learning_rate": 5.072345861250498e-06, + "loss": 0.1176, + "step": 2795 + }, + { + "epoch": 2.02, + "learning_rate": 5.065548820381452e-06, + "loss": 0.1297, + "step": 2796 + }, + { + "epoch": 2.02, + "learning_rate": 5.058754791619669e-06, + "loss": 0.1237, + "step": 2797 + }, + { + "epoch": 2.02, + "learning_rate": 5.0519637791123895e-06, + "loss": 0.1236, + "step": 2798 + }, + { + "epoch": 2.02, + "learning_rate": 5.04517578700501e-06, + "loss": 0.1284, + "step": 2799 + }, + { + "epoch": 2.03, + "learning_rate": 5.0383908194410845e-06, + "loss": 0.1186, + "step": 2800 + }, + { + "epoch": 2.03, + "learning_rate": 5.031608880562315e-06, + "loss": 0.1247, + "step": 2801 + }, + { + "epoch": 2.03, + "learning_rate": 5.024829974508569e-06, + "loss": 0.1313, + "step": 2802 + }, + { + "epoch": 2.03, + "learning_rate": 5.018054105417842e-06, + "loss": 0.1263, + "step": 2803 + }, + { + "epoch": 2.03, + "learning_rate": 5.011281277426294e-06, + "loss": 0.1227, + "step": 2804 + }, + { + "epoch": 2.03, + "learning_rate": 5.004511494668223e-06, + "loss": 0.1219, + "step": 2805 + }, + { + "epoch": 2.03, + "learning_rate": 4.9977447612760596e-06, + "loss": 0.124, + "step": 2806 + }, + { + "epoch": 2.03, + "learning_rate": 4.990981081380389e-06, + "loss": 0.1265, + "step": 2807 + }, + { + "epoch": 2.03, + "learning_rate": 4.984220459109914e-06, + "loss": 0.1211, + "step": 2808 + }, + { + "epoch": 2.03, + "learning_rate": 4.977462898591488e-06, + "loss": 0.1252, + "step": 2809 + }, + { + "epoch": 2.03, + "learning_rate": 4.970708403950083e-06, + "loss": 0.1249, + "step": 2810 + }, + { + "epoch": 2.03, + "learning_rate": 4.963956979308805e-06, + "loss": 0.1308, + "step": 2811 + }, + { + "epoch": 2.03, + "learning_rate": 4.957208628788891e-06, + "loss": 0.1259, + "step": 2812 + }, + { + "epoch": 2.03, + "learning_rate": 4.9504633565096884e-06, + "loss": 0.1221, + "step": 2813 + }, + { + "epoch": 2.04, + "learning_rate": 4.943721166588675e-06, + "loss": 0.1307, + "step": 2814 + }, + { + "epoch": 2.04, + "learning_rate": 4.936982063141452e-06, + "loss": 0.1291, + "step": 2815 + }, + { + "epoch": 2.04, + "learning_rate": 4.9302460502817196e-06, + "loss": 0.1318, + "step": 2816 + }, + { + "epoch": 2.04, + "learning_rate": 4.923513132121309e-06, + "loss": 0.1308, + "step": 2817 + }, + { + "epoch": 2.04, + "learning_rate": 4.916783312770149e-06, + "loss": 0.127, + "step": 2818 + }, + { + "epoch": 2.04, + "learning_rate": 4.910056596336289e-06, + "loss": 0.1222, + "step": 2819 + }, + { + "epoch": 2.04, + "learning_rate": 4.903332986925871e-06, + "loss": 0.1288, + "step": 2820 + }, + { + "epoch": 2.04, + "learning_rate": 4.896612488643154e-06, + "loss": 0.1193, + "step": 2821 + }, + { + "epoch": 2.04, + "learning_rate": 4.889895105590484e-06, + "loss": 0.1182, + "step": 2822 + }, + { + "epoch": 2.04, + "learning_rate": 4.883180841868321e-06, + "loss": 0.1295, + "step": 2823 + }, + { + "epoch": 2.04, + "learning_rate": 4.876469701575204e-06, + "loss": 0.1226, + "step": 2824 + }, + { + "epoch": 2.04, + "learning_rate": 4.869761688807782e-06, + "loss": 0.135, + "step": 2825 + }, + { + "epoch": 2.04, + "learning_rate": 4.863056807660779e-06, + "loss": 0.1233, + "step": 2826 + }, + { + "epoch": 2.04, + "learning_rate": 4.856355062227021e-06, + "loss": 0.1302, + "step": 2827 + }, + { + "epoch": 2.05, + "learning_rate": 4.849656456597411e-06, + "loss": 0.1208, + "step": 2828 + }, + { + "epoch": 2.05, + "learning_rate": 4.842960994860941e-06, + "loss": 0.1287, + "step": 2829 + }, + { + "epoch": 2.05, + "learning_rate": 4.836268681104676e-06, + "loss": 0.1342, + "step": 2830 + }, + { + "epoch": 2.05, + "learning_rate": 4.829579519413773e-06, + "loss": 0.123, + "step": 2831 + }, + { + "epoch": 2.05, + "learning_rate": 4.8228935138714475e-06, + "loss": 0.1311, + "step": 2832 + }, + { + "epoch": 2.05, + "learning_rate": 4.816210668559006e-06, + "loss": 0.1233, + "step": 2833 + }, + { + "epoch": 2.05, + "learning_rate": 4.8095309875558105e-06, + "loss": 0.1329, + "step": 2834 + }, + { + "epoch": 2.05, + "learning_rate": 4.802854474939304e-06, + "loss": 0.1234, + "step": 2835 + }, + { + "epoch": 2.05, + "learning_rate": 4.796181134784983e-06, + "loss": 0.1268, + "step": 2836 + }, + { + "epoch": 2.05, + "learning_rate": 4.789510971166419e-06, + "loss": 0.124, + "step": 2837 + }, + { + "epoch": 2.05, + "learning_rate": 4.7828439881552434e-06, + "loss": 0.1256, + "step": 2838 + }, + { + "epoch": 2.05, + "learning_rate": 4.776180189821134e-06, + "loss": 0.1285, + "step": 2839 + }, + { + "epoch": 2.05, + "learning_rate": 4.769519580231837e-06, + "loss": 0.125, + "step": 2840 + }, + { + "epoch": 2.05, + "learning_rate": 4.762862163453152e-06, + "loss": 0.119, + "step": 2841 + }, + { + "epoch": 2.06, + "learning_rate": 4.75620794354892e-06, + "loss": 0.1261, + "step": 2842 + }, + { + "epoch": 2.06, + "learning_rate": 4.74955692458104e-06, + "loss": 0.1165, + "step": 2843 + }, + { + "epoch": 2.06, + "learning_rate": 4.742909110609448e-06, + "loss": 0.1196, + "step": 2844 + }, + { + "epoch": 2.06, + "learning_rate": 4.736264505692139e-06, + "loss": 0.1322, + "step": 2845 + }, + { + "epoch": 2.06, + "learning_rate": 4.7296231138851264e-06, + "loss": 0.1228, + "step": 2846 + }, + { + "epoch": 2.06, + "learning_rate": 4.722984939242482e-06, + "loss": 0.1213, + "step": 2847 + }, + { + "epoch": 2.06, + "learning_rate": 4.716349985816311e-06, + "loss": 0.1329, + "step": 2848 + }, + { + "epoch": 2.06, + "learning_rate": 4.709718257656738e-06, + "loss": 0.1216, + "step": 2849 + }, + { + "epoch": 2.06, + "learning_rate": 4.703089758811938e-06, + "loss": 0.133, + "step": 2850 + }, + { + "epoch": 2.06, + "learning_rate": 4.696464493328098e-06, + "loss": 0.1259, + "step": 2851 + }, + { + "epoch": 2.06, + "learning_rate": 4.689842465249446e-06, + "loss": 0.1311, + "step": 2852 + }, + { + "epoch": 2.06, + "learning_rate": 4.68322367861822e-06, + "loss": 0.1239, + "step": 2853 + }, + { + "epoch": 2.06, + "learning_rate": 4.67660813747469e-06, + "loss": 0.1202, + "step": 2854 + }, + { + "epoch": 2.07, + "learning_rate": 4.669995845857139e-06, + "loss": 0.1198, + "step": 2855 + }, + { + "epoch": 2.07, + "learning_rate": 4.663386807801872e-06, + "loss": 0.1272, + "step": 2856 + }, + { + "epoch": 2.07, + "learning_rate": 4.6567810273431965e-06, + "loss": 0.1242, + "step": 2857 + }, + { + "epoch": 2.07, + "learning_rate": 4.650178508513449e-06, + "loss": 0.1296, + "step": 2858 + }, + { + "epoch": 2.07, + "learning_rate": 4.643579255342956e-06, + "loss": 0.1159, + "step": 2859 + }, + { + "epoch": 2.07, + "learning_rate": 4.636983271860069e-06, + "loss": 0.1247, + "step": 2860 + }, + { + "epoch": 2.07, + "learning_rate": 4.630390562091124e-06, + "loss": 0.123, + "step": 2861 + }, + { + "epoch": 2.07, + "learning_rate": 4.623801130060478e-06, + "loss": 0.1316, + "step": 2862 + }, + { + "epoch": 2.07, + "learning_rate": 4.617214979790472e-06, + "loss": 0.1303, + "step": 2863 + }, + { + "epoch": 2.07, + "learning_rate": 4.6106321153014566e-06, + "loss": 0.1184, + "step": 2864 + }, + { + "epoch": 2.07, + "learning_rate": 4.604052540611762e-06, + "loss": 0.1312, + "step": 2865 + }, + { + "epoch": 2.07, + "learning_rate": 4.597476259737727e-06, + "loss": 0.1304, + "step": 2866 + }, + { + "epoch": 2.07, + "learning_rate": 4.590903276693662e-06, + "loss": 0.1378, + "step": 2867 + }, + { + "epoch": 2.07, + "learning_rate": 4.5843335954918825e-06, + "loss": 0.1279, + "step": 2868 + }, + { + "epoch": 2.08, + "learning_rate": 4.5777672201426705e-06, + "loss": 0.1143, + "step": 2869 + }, + { + "epoch": 2.08, + "learning_rate": 4.571204154654303e-06, + "loss": 0.1382, + "step": 2870 + }, + { + "epoch": 2.08, + "learning_rate": 4.564644403033033e-06, + "loss": 0.13, + "step": 2871 + }, + { + "epoch": 2.08, + "learning_rate": 4.558087969283092e-06, + "loss": 0.1246, + "step": 2872 + }, + { + "epoch": 2.08, + "learning_rate": 4.551534857406681e-06, + "loss": 0.1196, + "step": 2873 + }, + { + "epoch": 2.08, + "learning_rate": 4.54498507140398e-06, + "loss": 0.1239, + "step": 2874 + }, + { + "epoch": 2.08, + "learning_rate": 4.53843861527313e-06, + "loss": 0.1212, + "step": 2875 + }, + { + "epoch": 2.08, + "learning_rate": 4.53189549301025e-06, + "loss": 0.1182, + "step": 2876 + }, + { + "epoch": 2.08, + "learning_rate": 4.525355708609414e-06, + "loss": 0.1195, + "step": 2877 + }, + { + "epoch": 2.08, + "learning_rate": 4.518819266062669e-06, + "loss": 0.1121, + "step": 2878 + }, + { + "epoch": 2.08, + "learning_rate": 4.512286169360007e-06, + "loss": 0.1305, + "step": 2879 + }, + { + "epoch": 2.08, + "learning_rate": 4.505756422489398e-06, + "loss": 0.1196, + "step": 2880 + }, + { + "epoch": 2.08, + "learning_rate": 4.499230029436747e-06, + "loss": 0.1186, + "step": 2881 + }, + { + "epoch": 2.08, + "learning_rate": 4.492706994185924e-06, + "loss": 0.1295, + "step": 2882 + }, + { + "epoch": 2.09, + "learning_rate": 4.486187320718749e-06, + "loss": 0.1253, + "step": 2883 + }, + { + "epoch": 2.09, + "learning_rate": 4.479671013014981e-06, + "loss": 0.1225, + "step": 2884 + }, + { + "epoch": 2.09, + "learning_rate": 4.473158075052338e-06, + "loss": 0.1214, + "step": 2885 + }, + { + "epoch": 2.09, + "learning_rate": 4.466648510806465e-06, + "loss": 0.1317, + "step": 2886 + }, + { + "epoch": 2.09, + "learning_rate": 4.460142324250964e-06, + "loss": 0.116, + "step": 2887 + }, + { + "epoch": 2.09, + "learning_rate": 4.453639519357359e-06, + "loss": 0.1137, + "step": 2888 + }, + { + "epoch": 2.09, + "learning_rate": 4.4471401000951255e-06, + "loss": 0.1239, + "step": 2889 + }, + { + "epoch": 2.09, + "learning_rate": 4.440644070431659e-06, + "loss": 0.1188, + "step": 2890 + }, + { + "epoch": 2.09, + "learning_rate": 4.434151434332297e-06, + "loss": 0.122, + "step": 2891 + }, + { + "epoch": 2.09, + "learning_rate": 4.4276621957602935e-06, + "loss": 0.1329, + "step": 2892 + }, + { + "epoch": 2.09, + "learning_rate": 4.421176358676844e-06, + "loss": 0.1164, + "step": 2893 + }, + { + "epoch": 2.09, + "learning_rate": 4.4146939270410506e-06, + "loss": 0.1164, + "step": 2894 + }, + { + "epoch": 2.09, + "learning_rate": 4.408214904809951e-06, + "loss": 0.1353, + "step": 2895 + }, + { + "epoch": 2.09, + "learning_rate": 4.401739295938489e-06, + "loss": 0.1256, + "step": 2896 + }, + { + "epoch": 2.1, + "learning_rate": 4.39526710437954e-06, + "loss": 0.1157, + "step": 2897 + }, + { + "epoch": 2.1, + "learning_rate": 4.388798334083877e-06, + "loss": 0.129, + "step": 2898 + }, + { + "epoch": 2.1, + "learning_rate": 4.382332989000196e-06, + "loss": 0.1288, + "step": 2899 + }, + { + "epoch": 2.1, + "learning_rate": 4.375871073075097e-06, + "loss": 0.1206, + "step": 2900 + }, + { + "epoch": 2.1, + "learning_rate": 4.3694125902530945e-06, + "loss": 0.1243, + "step": 2901 + }, + { + "epoch": 2.1, + "learning_rate": 4.362957544476594e-06, + "loss": 0.131, + "step": 2902 + }, + { + "epoch": 2.1, + "learning_rate": 4.356505939685915e-06, + "loss": 0.1187, + "step": 2903 + }, + { + "epoch": 2.1, + "learning_rate": 4.350057779819264e-06, + "loss": 0.1152, + "step": 2904 + }, + { + "epoch": 2.1, + "learning_rate": 4.3436130688127635e-06, + "loss": 0.1264, + "step": 2905 + }, + { + "epoch": 2.1, + "learning_rate": 4.337171810600409e-06, + "loss": 0.1401, + "step": 2906 + }, + { + "epoch": 2.1, + "learning_rate": 4.330734009114108e-06, + "loss": 0.1333, + "step": 2907 + }, + { + "epoch": 2.1, + "learning_rate": 4.32429966828364e-06, + "loss": 0.1212, + "step": 2908 + }, + { + "epoch": 2.1, + "learning_rate": 4.317868792036689e-06, + "loss": 0.1259, + "step": 2909 + }, + { + "epoch": 2.1, + "learning_rate": 4.311441384298809e-06, + "loss": 0.1308, + "step": 2910 + }, + { + "epoch": 2.11, + "learning_rate": 4.30501744899345e-06, + "loss": 0.1201, + "step": 2911 + }, + { + "epoch": 2.11, + "learning_rate": 4.29859699004193e-06, + "loss": 0.1279, + "step": 2912 + }, + { + "epoch": 2.11, + "learning_rate": 4.292180011363458e-06, + "loss": 0.122, + "step": 2913 + }, + { + "epoch": 2.11, + "learning_rate": 4.2857665168751025e-06, + "loss": 0.1303, + "step": 2914 + }, + { + "epoch": 2.11, + "learning_rate": 4.279356510491821e-06, + "loss": 0.1303, + "step": 2915 + }, + { + "epoch": 2.11, + "learning_rate": 4.2729499961264365e-06, + "loss": 0.1252, + "step": 2916 + }, + { + "epoch": 2.11, + "learning_rate": 4.266546977689633e-06, + "loss": 0.1242, + "step": 2917 + }, + { + "epoch": 2.11, + "learning_rate": 4.260147459089971e-06, + "loss": 0.1185, + "step": 2918 + }, + { + "epoch": 2.11, + "learning_rate": 4.253751444233867e-06, + "loss": 0.1276, + "step": 2919 + }, + { + "epoch": 2.11, + "learning_rate": 4.247358937025606e-06, + "loss": 0.1256, + "step": 2920 + }, + { + "epoch": 2.11, + "learning_rate": 4.24096994136732e-06, + "loss": 0.1254, + "step": 2921 + }, + { + "epoch": 2.11, + "learning_rate": 4.234584461159014e-06, + "loss": 0.1296, + "step": 2922 + }, + { + "epoch": 2.11, + "learning_rate": 4.22820250029853e-06, + "loss": 0.1222, + "step": 2923 + }, + { + "epoch": 2.12, + "learning_rate": 4.221824062681578e-06, + "loss": 0.1289, + "step": 2924 + }, + { + "epoch": 2.12, + "learning_rate": 4.2154491522017005e-06, + "loss": 0.1217, + "step": 2925 + }, + { + "epoch": 2.12, + "learning_rate": 4.209077772750304e-06, + "loss": 0.1186, + "step": 2926 + }, + { + "epoch": 2.12, + "learning_rate": 4.202709928216624e-06, + "loss": 0.1242, + "step": 2927 + }, + { + "epoch": 2.12, + "learning_rate": 4.196345622487748e-06, + "loss": 0.1293, + "step": 2928 + }, + { + "epoch": 2.12, + "learning_rate": 4.189984859448603e-06, + "loss": 0.1167, + "step": 2929 + }, + { + "epoch": 2.12, + "learning_rate": 4.183627642981952e-06, + "loss": 0.1191, + "step": 2930 + }, + { + "epoch": 2.12, + "learning_rate": 4.177273976968388e-06, + "loss": 0.1241, + "step": 2931 + }, + { + "epoch": 2.12, + "learning_rate": 4.170923865286345e-06, + "loss": 0.1308, + "step": 2932 + }, + { + "epoch": 2.12, + "learning_rate": 4.164577311812078e-06, + "loss": 0.1339, + "step": 2933 + }, + { + "epoch": 2.12, + "learning_rate": 4.158234320419683e-06, + "loss": 0.132, + "step": 2934 + }, + { + "epoch": 2.12, + "learning_rate": 4.151894894981065e-06, + "loss": 0.1169, + "step": 2935 + }, + { + "epoch": 2.12, + "learning_rate": 4.1455590393659715e-06, + "loss": 0.1361, + "step": 2936 + }, + { + "epoch": 2.12, + "learning_rate": 4.13922675744195e-06, + "loss": 0.1234, + "step": 2937 + }, + { + "epoch": 2.13, + "learning_rate": 4.132898053074387e-06, + "loss": 0.1346, + "step": 2938 + }, + { + "epoch": 2.13, + "learning_rate": 4.126572930126466e-06, + "loss": 0.1224, + "step": 2939 + }, + { + "epoch": 2.13, + "learning_rate": 4.120251392459201e-06, + "loss": 0.1291, + "step": 2940 + }, + { + "epoch": 2.13, + "learning_rate": 4.113933443931405e-06, + "loss": 0.132, + "step": 2941 + }, + { + "epoch": 2.13, + "learning_rate": 4.10761908839971e-06, + "loss": 0.1277, + "step": 2942 + }, + { + "epoch": 2.13, + "learning_rate": 4.101308329718545e-06, + "loss": 0.1329, + "step": 2943 + }, + { + "epoch": 2.13, + "learning_rate": 4.095001171740154e-06, + "loss": 0.1186, + "step": 2944 + }, + { + "epoch": 2.13, + "learning_rate": 4.088697618314571e-06, + "loss": 0.1288, + "step": 2945 + }, + { + "epoch": 2.13, + "learning_rate": 4.082397673289642e-06, + "loss": 0.1255, + "step": 2946 + }, + { + "epoch": 2.13, + "learning_rate": 4.076101340510998e-06, + "loss": 0.1242, + "step": 2947 + }, + { + "epoch": 2.13, + "learning_rate": 4.069808623822077e-06, + "loss": 0.1319, + "step": 2948 + }, + { + "epoch": 2.13, + "learning_rate": 4.063519527064104e-06, + "loss": 0.1294, + "step": 2949 + }, + { + "epoch": 2.13, + "learning_rate": 4.05723405407609e-06, + "loss": 0.1275, + "step": 2950 + }, + { + "epoch": 2.13, + "learning_rate": 4.050952208694844e-06, + "loss": 0.1308, + "step": 2951 + }, + { + "epoch": 2.14, + "learning_rate": 4.0446739947549485e-06, + "loss": 0.1285, + "step": 2952 + }, + { + "epoch": 2.14, + "learning_rate": 4.038399416088783e-06, + "loss": 0.1266, + "step": 2953 + }, + { + "epoch": 2.14, + "learning_rate": 4.032128476526493e-06, + "loss": 0.1292, + "step": 2954 + }, + { + "epoch": 2.14, + "learning_rate": 4.0258611798960175e-06, + "loss": 0.1353, + "step": 2955 + }, + { + "epoch": 2.14, + "learning_rate": 4.0195975300230585e-06, + "loss": 0.1337, + "step": 2956 + }, + { + "epoch": 2.14, + "learning_rate": 4.013337530731104e-06, + "loss": 0.1196, + "step": 2957 + }, + { + "epoch": 2.14, + "learning_rate": 4.0070811858413985e-06, + "loss": 0.1292, + "step": 2958 + }, + { + "epoch": 2.14, + "learning_rate": 4.000828499172976e-06, + "loss": 0.1245, + "step": 2959 + }, + { + "epoch": 2.14, + "learning_rate": 3.994579474542618e-06, + "loss": 0.1153, + "step": 2960 + }, + { + "epoch": 2.14, + "learning_rate": 3.988334115764887e-06, + "loss": 0.1254, + "step": 2961 + }, + { + "epoch": 2.14, + "learning_rate": 3.98209242665209e-06, + "loss": 0.1199, + "step": 2962 + }, + { + "epoch": 2.14, + "learning_rate": 3.975854411014312e-06, + "loss": 0.1254, + "step": 2963 + }, + { + "epoch": 2.14, + "learning_rate": 3.96962007265938e-06, + "loss": 0.1086, + "step": 2964 + }, + { + "epoch": 2.14, + "learning_rate": 3.96338941539289e-06, + "loss": 0.1312, + "step": 2965 + }, + { + "epoch": 2.15, + "learning_rate": 3.957162443018178e-06, + "loss": 0.1204, + "step": 2966 + }, + { + "epoch": 2.15, + "learning_rate": 3.950939159336346e-06, + "loss": 0.1292, + "step": 2967 + }, + { + "epoch": 2.15, + "learning_rate": 3.944719568146226e-06, + "loss": 0.1316, + "step": 2968 + }, + { + "epoch": 2.15, + "learning_rate": 3.938503673244413e-06, + "loss": 0.1249, + "step": 2969 + }, + { + "epoch": 2.15, + "learning_rate": 3.932291478425232e-06, + "loss": 0.1286, + "step": 2970 + }, + { + "epoch": 2.15, + "learning_rate": 3.926082987480761e-06, + "loss": 0.1207, + "step": 2971 + }, + { + "epoch": 2.15, + "learning_rate": 3.9198782042008074e-06, + "loss": 0.1235, + "step": 2972 + }, + { + "epoch": 2.15, + "learning_rate": 3.913677132372925e-06, + "loss": 0.1259, + "step": 2973 + }, + { + "epoch": 2.15, + "learning_rate": 3.907479775782389e-06, + "loss": 0.1289, + "step": 2974 + }, + { + "epoch": 2.15, + "learning_rate": 3.901286138212222e-06, + "loss": 0.1245, + "step": 2975 + }, + { + "epoch": 2.15, + "learning_rate": 3.895096223443162e-06, + "loss": 0.1216, + "step": 2976 + }, + { + "epoch": 2.15, + "learning_rate": 3.888910035253689e-06, + "loss": 0.1194, + "step": 2977 + }, + { + "epoch": 2.15, + "learning_rate": 3.882727577419993e-06, + "loss": 0.1247, + "step": 2978 + }, + { + "epoch": 2.15, + "learning_rate": 3.876548853716001e-06, + "loss": 0.1267, + "step": 2979 + }, + { + "epoch": 2.16, + "learning_rate": 3.870373867913348e-06, + "loss": 0.1168, + "step": 2980 + }, + { + "epoch": 2.16, + "learning_rate": 3.864202623781398e-06, + "loss": 0.1118, + "step": 2981 + }, + { + "epoch": 2.16, + "learning_rate": 3.858035125087222e-06, + "loss": 0.1152, + "step": 2982 + }, + { + "epoch": 2.16, + "learning_rate": 3.85187137559561e-06, + "loss": 0.118, + "step": 2983 + }, + { + "epoch": 2.16, + "learning_rate": 3.845711379069069e-06, + "loss": 0.1198, + "step": 2984 + }, + { + "epoch": 2.16, + "learning_rate": 3.8395551392678e-06, + "loss": 0.1167, + "step": 2985 + }, + { + "epoch": 2.16, + "learning_rate": 3.833402659949726e-06, + "loss": 0.1228, + "step": 2986 + }, + { + "epoch": 2.16, + "learning_rate": 3.827253944870461e-06, + "loss": 0.1235, + "step": 2987 + }, + { + "epoch": 2.16, + "learning_rate": 3.821108997783332e-06, + "loss": 0.1182, + "step": 2988 + }, + { + "epoch": 2.16, + "learning_rate": 3.814967822439364e-06, + "loss": 0.1097, + "step": 2989 + }, + { + "epoch": 2.16, + "learning_rate": 3.808830422587273e-06, + "loss": 0.1208, + "step": 2990 + }, + { + "epoch": 2.16, + "learning_rate": 3.80269680197348e-06, + "loss": 0.1144, + "step": 2991 + }, + { + "epoch": 2.16, + "learning_rate": 3.7965669643420876e-06, + "loss": 0.1297, + "step": 2992 + }, + { + "epoch": 2.16, + "learning_rate": 3.7904409134348996e-06, + "loss": 0.12, + "step": 2993 + }, + { + "epoch": 2.17, + "learning_rate": 3.7843186529914066e-06, + "loss": 0.1258, + "step": 2994 + }, + { + "epoch": 2.17, + "learning_rate": 3.778200186748778e-06, + "loss": 0.1305, + "step": 2995 + }, + { + "epoch": 2.17, + "learning_rate": 3.772085518441878e-06, + "loss": 0.1261, + "step": 2996 + }, + { + "epoch": 2.17, + "learning_rate": 3.7659746518032393e-06, + "loss": 0.1256, + "step": 2997 + }, + { + "epoch": 2.17, + "learning_rate": 3.75986759056309e-06, + "loss": 0.135, + "step": 2998 + }, + { + "epoch": 2.17, + "learning_rate": 3.753764338449317e-06, + "loss": 0.1293, + "step": 2999 + }, + { + "epoch": 2.17, + "learning_rate": 3.747664899187501e-06, + "loss": 0.114, + "step": 3000 + }, + { + "epoch": 2.17, + "eval_loss": 0.12288390845060349, + "eval_runtime": 716.6812, + "eval_samples_per_second": 69.766, + "eval_steps_per_second": 2.181, + "step": 3000 + }, + { + "epoch": 2.17, + "learning_rate": 3.7415692765008784e-06, + "loss": 0.1178, + "step": 3001 + }, + { + "epoch": 2.17, + "learning_rate": 3.7354774741103704e-06, + "loss": 0.1275, + "step": 3002 + }, + { + "epoch": 2.17, + "learning_rate": 3.729389495734551e-06, + "loss": 0.1193, + "step": 3003 + }, + { + "epoch": 2.17, + "learning_rate": 3.7233053450896773e-06, + "loss": 0.1268, + "step": 3004 + }, + { + "epoch": 2.17, + "learning_rate": 3.717225025889651e-06, + "loss": 0.128, + "step": 3005 + }, + { + "epoch": 2.17, + "learning_rate": 3.7111485418460535e-06, + "loss": 0.1223, + "step": 3006 + }, + { + "epoch": 2.18, + "learning_rate": 3.705075896668109e-06, + "loss": 0.1212, + "step": 3007 + }, + { + "epoch": 2.18, + "learning_rate": 3.6990070940627132e-06, + "loss": 0.1322, + "step": 3008 + }, + { + "epoch": 2.18, + "learning_rate": 3.6929421377344e-06, + "loss": 0.1224, + "step": 3009 + }, + { + "epoch": 2.18, + "learning_rate": 3.686881031385372e-06, + "loss": 0.1256, + "step": 3010 + }, + { + "epoch": 2.18, + "learning_rate": 3.6808237787154676e-06, + "loss": 0.1128, + "step": 3011 + }, + { + "epoch": 2.18, + "learning_rate": 3.6747703834221847e-06, + "loss": 0.1176, + "step": 3012 + }, + { + "epoch": 2.18, + "learning_rate": 3.6687208492006556e-06, + "loss": 0.1253, + "step": 3013 + }, + { + "epoch": 2.18, + "learning_rate": 3.6626751797436657e-06, + "loss": 0.1202, + "step": 3014 + }, + { + "epoch": 2.18, + "learning_rate": 3.6566333787416307e-06, + "loss": 0.1302, + "step": 3015 + }, + { + "epoch": 2.18, + "learning_rate": 3.6505954498826144e-06, + "loss": 0.1434, + "step": 3016 + }, + { + "epoch": 2.18, + "learning_rate": 3.644561396852313e-06, + "loss": 0.1259, + "step": 3017 + }, + { + "epoch": 2.18, + "learning_rate": 3.6385312233340607e-06, + "loss": 0.1272, + "step": 3018 + }, + { + "epoch": 2.18, + "learning_rate": 3.6325049330088138e-06, + "loss": 0.1175, + "step": 3019 + }, + { + "epoch": 2.18, + "learning_rate": 3.6264825295551687e-06, + "loss": 0.123, + "step": 3020 + }, + { + "epoch": 2.19, + "learning_rate": 3.620464016649341e-06, + "loss": 0.1223, + "step": 3021 + }, + { + "epoch": 2.19, + "learning_rate": 3.6144493979651805e-06, + "loss": 0.1245, + "step": 3022 + }, + { + "epoch": 2.19, + "learning_rate": 3.6084386771741475e-06, + "loss": 0.1284, + "step": 3023 + }, + { + "epoch": 2.19, + "learning_rate": 3.6024318579453377e-06, + "loss": 0.1253, + "step": 3024 + }, + { + "epoch": 2.19, + "learning_rate": 3.59642894394545e-06, + "loss": 0.1381, + "step": 3025 + }, + { + "epoch": 2.19, + "learning_rate": 3.5904299388388107e-06, + "loss": 0.1174, + "step": 3026 + }, + { + "epoch": 2.19, + "learning_rate": 3.58443484628736e-06, + "loss": 0.132, + "step": 3027 + }, + { + "epoch": 2.19, + "learning_rate": 3.5784436699506397e-06, + "loss": 0.1235, + "step": 3028 + }, + { + "epoch": 2.19, + "learning_rate": 3.572456413485813e-06, + "loss": 0.1245, + "step": 3029 + }, + { + "epoch": 2.19, + "learning_rate": 3.5664730805476375e-06, + "loss": 0.1248, + "step": 3030 + }, + { + "epoch": 2.19, + "learning_rate": 3.560493674788492e-06, + "loss": 0.1181, + "step": 3031 + }, + { + "epoch": 2.19, + "learning_rate": 3.5545181998583413e-06, + "loss": 0.1151, + "step": 3032 + }, + { + "epoch": 2.19, + "learning_rate": 3.548546659404765e-06, + "loss": 0.1194, + "step": 3033 + }, + { + "epoch": 2.19, + "learning_rate": 3.542579057072927e-06, + "loss": 0.1281, + "step": 3034 + }, + { + "epoch": 2.2, + "learning_rate": 3.5366153965056025e-06, + "loss": 0.1286, + "step": 3035 + }, + { + "epoch": 2.2, + "learning_rate": 3.530655681343147e-06, + "loss": 0.1248, + "step": 3036 + }, + { + "epoch": 2.2, + "learning_rate": 3.5246999152235186e-06, + "loss": 0.1113, + "step": 3037 + }, + { + "epoch": 2.2, + "learning_rate": 3.5187481017822533e-06, + "loss": 0.123, + "step": 3038 + }, + { + "epoch": 2.2, + "learning_rate": 3.512800244652488e-06, + "loss": 0.1272, + "step": 3039 + }, + { + "epoch": 2.2, + "learning_rate": 3.5068563474649296e-06, + "loss": 0.1308, + "step": 3040 + }, + { + "epoch": 2.2, + "learning_rate": 3.5009164138478825e-06, + "loss": 0.137, + "step": 3041 + }, + { + "epoch": 2.2, + "learning_rate": 3.4949804474272188e-06, + "loss": 0.1236, + "step": 3042 + }, + { + "epoch": 2.2, + "learning_rate": 3.4890484518264e-06, + "loss": 0.1243, + "step": 3043 + }, + { + "epoch": 2.2, + "learning_rate": 3.4831204306664535e-06, + "loss": 0.1202, + "step": 3044 + }, + { + "epoch": 2.2, + "learning_rate": 3.4771963875659896e-06, + "loss": 0.1159, + "step": 3045 + }, + { + "epoch": 2.2, + "learning_rate": 3.4712763261411775e-06, + "loss": 0.1359, + "step": 3046 + }, + { + "epoch": 2.2, + "learning_rate": 3.465360250005779e-06, + "loss": 0.129, + "step": 3047 + }, + { + "epoch": 2.2, + "learning_rate": 3.4594481627710976e-06, + "loss": 0.1258, + "step": 3048 + }, + { + "epoch": 2.21, + "learning_rate": 3.4535400680460194e-06, + "loss": 0.1354, + "step": 3049 + }, + { + "epoch": 2.21, + "learning_rate": 3.447635969436981e-06, + "loss": 0.1221, + "step": 3050 + }, + { + "epoch": 2.21, + "learning_rate": 3.441735870547992e-06, + "loss": 0.124, + "step": 3051 + }, + { + "epoch": 2.21, + "learning_rate": 3.4358397749806074e-06, + "loss": 0.123, + "step": 3052 + }, + { + "epoch": 2.21, + "learning_rate": 3.4299476863339533e-06, + "loss": 0.1319, + "step": 3053 + }, + { + "epoch": 2.21, + "learning_rate": 3.4240596082046927e-06, + "loss": 0.1225, + "step": 3054 + }, + { + "epoch": 2.21, + "learning_rate": 3.418175544187058e-06, + "loss": 0.1207, + "step": 3055 + }, + { + "epoch": 2.21, + "learning_rate": 3.412295497872816e-06, + "loss": 0.132, + "step": 3056 + }, + { + "epoch": 2.21, + "learning_rate": 3.4064194728512944e-06, + "loss": 0.1335, + "step": 3057 + }, + { + "epoch": 2.21, + "learning_rate": 3.4005474727093534e-06, + "loss": 0.1318, + "step": 3058 + }, + { + "epoch": 2.21, + "learning_rate": 3.3946795010314083e-06, + "loss": 0.1168, + "step": 3059 + }, + { + "epoch": 2.21, + "learning_rate": 3.3888155613994045e-06, + "loss": 0.1245, + "step": 3060 + }, + { + "epoch": 2.21, + "learning_rate": 3.3829556573928323e-06, + "loss": 0.1244, + "step": 3061 + }, + { + "epoch": 2.21, + "learning_rate": 3.377099792588724e-06, + "loss": 0.1262, + "step": 3062 + }, + { + "epoch": 2.22, + "learning_rate": 3.371247970561631e-06, + "loss": 0.1252, + "step": 3063 + }, + { + "epoch": 2.22, + "learning_rate": 3.3654001948836524e-06, + "loss": 0.1353, + "step": 3064 + }, + { + "epoch": 2.22, + "learning_rate": 3.359556469124404e-06, + "loss": 0.1158, + "step": 3065 + }, + { + "epoch": 2.22, + "learning_rate": 3.353716796851044e-06, + "loss": 0.1154, + "step": 3066 + }, + { + "epoch": 2.22, + "learning_rate": 3.347881181628241e-06, + "loss": 0.123, + "step": 3067 + }, + { + "epoch": 2.22, + "learning_rate": 3.3420496270182003e-06, + "loss": 0.1195, + "step": 3068 + }, + { + "epoch": 2.22, + "learning_rate": 3.336222136580637e-06, + "loss": 0.1327, + "step": 3069 + }, + { + "epoch": 2.22, + "learning_rate": 3.3303987138727965e-06, + "loss": 0.1272, + "step": 3070 + }, + { + "epoch": 2.22, + "learning_rate": 3.3245793624494304e-06, + "loss": 0.1245, + "step": 3071 + }, + { + "epoch": 2.22, + "learning_rate": 3.3187640858628157e-06, + "loss": 0.1373, + "step": 3072 + }, + { + "epoch": 2.22, + "learning_rate": 3.31295288766273e-06, + "loss": 0.1261, + "step": 3073 + }, + { + "epoch": 2.22, + "learning_rate": 3.307145771396475e-06, + "loss": 0.1258, + "step": 3074 + }, + { + "epoch": 2.22, + "learning_rate": 3.301342740608844e-06, + "loss": 0.1195, + "step": 3075 + }, + { + "epoch": 2.22, + "learning_rate": 3.295543798842159e-06, + "loss": 0.1172, + "step": 3076 + }, + { + "epoch": 2.23, + "learning_rate": 3.2897489496362234e-06, + "loss": 0.1274, + "step": 3077 + }, + { + "epoch": 2.23, + "learning_rate": 3.2839581965283573e-06, + "loss": 0.1165, + "step": 3078 + }, + { + "epoch": 2.23, + "learning_rate": 3.27817154305337e-06, + "loss": 0.1263, + "step": 3079 + }, + { + "epoch": 2.23, + "learning_rate": 3.2723889927435802e-06, + "loss": 0.1267, + "step": 3080 + }, + { + "epoch": 2.23, + "learning_rate": 3.2666105491287882e-06, + "loss": 0.1214, + "step": 3081 + }, + { + "epoch": 2.23, + "learning_rate": 3.2608362157362994e-06, + "loss": 0.1174, + "step": 3082 + }, + { + "epoch": 2.23, + "learning_rate": 3.2550659960909013e-06, + "loss": 0.1306, + "step": 3083 + }, + { + "epoch": 2.23, + "learning_rate": 3.24929989371488e-06, + "loss": 0.1271, + "step": 3084 + }, + { + "epoch": 2.23, + "learning_rate": 3.2435379121279962e-06, + "loss": 0.1142, + "step": 3085 + }, + { + "epoch": 2.23, + "learning_rate": 3.237780054847507e-06, + "loss": 0.1168, + "step": 3086 + }, + { + "epoch": 2.23, + "learning_rate": 3.2320263253881412e-06, + "loss": 0.1184, + "step": 3087 + }, + { + "epoch": 2.23, + "learning_rate": 3.226276727262119e-06, + "loss": 0.1225, + "step": 3088 + }, + { + "epoch": 2.23, + "learning_rate": 3.2205312639791264e-06, + "loss": 0.116, + "step": 3089 + }, + { + "epoch": 2.24, + "learning_rate": 3.214789939046338e-06, + "loss": 0.1347, + "step": 3090 + }, + { + "epoch": 2.24, + "learning_rate": 3.2090527559683913e-06, + "loss": 0.1228, + "step": 3091 + }, + { + "epoch": 2.24, + "learning_rate": 3.2033197182474053e-06, + "loss": 0.1374, + "step": 3092 + }, + { + "epoch": 2.24, + "learning_rate": 3.1975908293829573e-06, + "loss": 0.1173, + "step": 3093 + }, + { + "epoch": 2.24, + "learning_rate": 3.1918660928721023e-06, + "loss": 0.1279, + "step": 3094 + }, + { + "epoch": 2.24, + "learning_rate": 3.186145512209359e-06, + "loss": 0.1344, + "step": 3095 + }, + { + "epoch": 2.24, + "learning_rate": 3.180429090886702e-06, + "loss": 0.1145, + "step": 3096 + }, + { + "epoch": 2.24, + "learning_rate": 3.1747168323935763e-06, + "loss": 0.1195, + "step": 3097 + }, + { + "epoch": 2.24, + "learning_rate": 3.169008740216877e-06, + "loss": 0.1302, + "step": 3098 + }, + { + "epoch": 2.24, + "learning_rate": 3.1633048178409642e-06, + "loss": 0.1217, + "step": 3099 + }, + { + "epoch": 2.24, + "learning_rate": 3.1576050687476457e-06, + "loss": 0.1349, + "step": 3100 + }, + { + "epoch": 2.24, + "learning_rate": 3.1519094964161888e-06, + "loss": 0.1262, + "step": 3101 + }, + { + "epoch": 2.24, + "learning_rate": 3.146218104323302e-06, + "loss": 0.123, + "step": 3102 + }, + { + "epoch": 2.24, + "learning_rate": 3.1405308959431536e-06, + "loss": 0.1214, + "step": 3103 + }, + { + "epoch": 2.25, + "learning_rate": 3.1348478747473442e-06, + "loss": 0.1202, + "step": 3104 + }, + { + "epoch": 2.25, + "learning_rate": 3.129169044204933e-06, + "loss": 0.1353, + "step": 3105 + }, + { + "epoch": 2.25, + "learning_rate": 3.1234944077824102e-06, + "loss": 0.1243, + "step": 3106 + }, + { + "epoch": 2.25, + "learning_rate": 3.117823968943716e-06, + "loss": 0.1161, + "step": 3107 + }, + { + "epoch": 2.25, + "learning_rate": 3.112157731150214e-06, + "loss": 0.1206, + "step": 3108 + }, + { + "epoch": 2.25, + "learning_rate": 3.1064956978607187e-06, + "loss": 0.1188, + "step": 3109 + }, + { + "epoch": 2.25, + "learning_rate": 3.1008378725314647e-06, + "loss": 0.1285, + "step": 3110 + }, + { + "epoch": 2.25, + "learning_rate": 3.0951842586161295e-06, + "loss": 0.1205, + "step": 3111 + }, + { + "epoch": 2.25, + "learning_rate": 3.0895348595658093e-06, + "loss": 0.1342, + "step": 3112 + }, + { + "epoch": 2.25, + "learning_rate": 3.0838896788290384e-06, + "loss": 0.1323, + "step": 3113 + }, + { + "epoch": 2.25, + "learning_rate": 3.078248719851765e-06, + "loss": 0.1155, + "step": 3114 + }, + { + "epoch": 2.25, + "learning_rate": 3.0726119860773706e-06, + "loss": 0.1298, + "step": 3115 + }, + { + "epoch": 2.25, + "learning_rate": 3.066979480946648e-06, + "loss": 0.1232, + "step": 3116 + }, + { + "epoch": 2.25, + "learning_rate": 3.061351207897818e-06, + "loss": 0.1188, + "step": 3117 + }, + { + "epoch": 2.26, + "learning_rate": 3.055727170366508e-06, + "loss": 0.1382, + "step": 3118 + }, + { + "epoch": 2.26, + "learning_rate": 3.050107371785771e-06, + "loss": 0.114, + "step": 3119 + }, + { + "epoch": 2.26, + "learning_rate": 3.0444918155860603e-06, + "loss": 0.1288, + "step": 3120 + }, + { + "epoch": 2.26, + "learning_rate": 3.038880505195253e-06, + "loss": 0.1287, + "step": 3121 + }, + { + "epoch": 2.26, + "learning_rate": 3.0332734440386214e-06, + "loss": 0.1167, + "step": 3122 + }, + { + "epoch": 2.26, + "learning_rate": 3.0276706355388554e-06, + "loss": 0.1308, + "step": 3123 + }, + { + "epoch": 2.26, + "learning_rate": 3.022072083116038e-06, + "loss": 0.1354, + "step": 3124 + }, + { + "epoch": 2.26, + "learning_rate": 3.016477790187665e-06, + "loss": 0.1194, + "step": 3125 + }, + { + "epoch": 2.26, + "learning_rate": 3.0108877601686226e-06, + "loss": 0.1272, + "step": 3126 + }, + { + "epoch": 2.26, + "learning_rate": 3.0053019964712026e-06, + "loss": 0.1234, + "step": 3127 + }, + { + "epoch": 2.26, + "learning_rate": 2.9997205025050856e-06, + "loss": 0.1202, + "step": 3128 + }, + { + "epoch": 2.26, + "learning_rate": 2.9941432816773507e-06, + "loss": 0.1187, + "step": 3129 + }, + { + "epoch": 2.26, + "learning_rate": 2.98857033739247e-06, + "loss": 0.1149, + "step": 3130 + }, + { + "epoch": 2.26, + "learning_rate": 2.983001673052298e-06, + "loss": 0.1178, + "step": 3131 + }, + { + "epoch": 2.27, + "learning_rate": 2.9774372920560857e-06, + "loss": 0.1292, + "step": 3132 + }, + { + "epoch": 2.27, + "learning_rate": 2.971877197800458e-06, + "loss": 0.126, + "step": 3133 + }, + { + "epoch": 2.27, + "learning_rate": 2.9663213936794334e-06, + "loss": 0.1134, + "step": 3134 + }, + { + "epoch": 2.27, + "learning_rate": 2.9607698830844113e-06, + "loss": 0.1263, + "step": 3135 + }, + { + "epoch": 2.27, + "learning_rate": 2.9552226694041597e-06, + "loss": 0.1227, + "step": 3136 + }, + { + "epoch": 2.27, + "learning_rate": 2.949679756024838e-06, + "loss": 0.1328, + "step": 3137 + }, + { + "epoch": 2.27, + "learning_rate": 2.944141146329966e-06, + "loss": 0.1231, + "step": 3138 + }, + { + "epoch": 2.27, + "learning_rate": 2.9386068437004476e-06, + "loss": 0.1282, + "step": 3139 + }, + { + "epoch": 2.27, + "learning_rate": 2.933076851514558e-06, + "loss": 0.1185, + "step": 3140 + }, + { + "epoch": 2.27, + "learning_rate": 2.9275511731479287e-06, + "loss": 0.1284, + "step": 3141 + }, + { + "epoch": 2.27, + "learning_rate": 2.9220298119735746e-06, + "loss": 0.1368, + "step": 3142 + }, + { + "epoch": 2.27, + "learning_rate": 2.9165127713618578e-06, + "loss": 0.1276, + "step": 3143 + }, + { + "epoch": 2.27, + "learning_rate": 2.9110000546805217e-06, + "loss": 0.1268, + "step": 3144 + }, + { + "epoch": 2.27, + "learning_rate": 2.905491665294652e-06, + "loss": 0.123, + "step": 3145 + }, + { + "epoch": 2.28, + "learning_rate": 2.8999876065667077e-06, + "loss": 0.118, + "step": 3146 + }, + { + "epoch": 2.28, + "learning_rate": 2.894487881856494e-06, + "loss": 0.1343, + "step": 3147 + }, + { + "epoch": 2.28, + "learning_rate": 2.888992494521179e-06, + "loss": 0.1245, + "step": 3148 + }, + { + "epoch": 2.28, + "learning_rate": 2.8835014479152744e-06, + "loss": 0.1219, + "step": 3149 + }, + { + "epoch": 2.28, + "learning_rate": 2.8780147453906506e-06, + "loss": 0.1181, + "step": 3150 + }, + { + "epoch": 2.28, + "learning_rate": 2.8725323902965174e-06, + "loss": 0.1256, + "step": 3151 + }, + { + "epoch": 2.28, + "learning_rate": 2.867054385979443e-06, + "loss": 0.12, + "step": 3152 + }, + { + "epoch": 2.28, + "learning_rate": 2.8615807357833246e-06, + "loss": 0.1181, + "step": 3153 + }, + { + "epoch": 2.28, + "learning_rate": 2.8561114430494165e-06, + "loss": 0.1272, + "step": 3154 + }, + { + "epoch": 2.28, + "learning_rate": 2.8506465111162986e-06, + "loss": 0.1157, + "step": 3155 + }, + { + "epoch": 2.28, + "learning_rate": 2.8451859433199047e-06, + "loss": 0.121, + "step": 3156 + }, + { + "epoch": 2.28, + "learning_rate": 2.8397297429934877e-06, + "loss": 0.1263, + "step": 3157 + }, + { + "epoch": 2.28, + "learning_rate": 2.834277913467651e-06, + "loss": 0.1204, + "step": 3158 + }, + { + "epoch": 2.28, + "learning_rate": 2.8288304580703165e-06, + "loss": 0.1248, + "step": 3159 + }, + { + "epoch": 2.29, + "learning_rate": 2.8233873801267452e-06, + "loss": 0.1281, + "step": 3160 + }, + { + "epoch": 2.29, + "learning_rate": 2.817948682959518e-06, + "loss": 0.1201, + "step": 3161 + }, + { + "epoch": 2.29, + "learning_rate": 2.8125143698885483e-06, + "loss": 0.1353, + "step": 3162 + }, + { + "epoch": 2.29, + "learning_rate": 2.8070844442310707e-06, + "loss": 0.111, + "step": 3163 + }, + { + "epoch": 2.29, + "learning_rate": 2.8016589093016468e-06, + "loss": 0.1293, + "step": 3164 + }, + { + "epoch": 2.29, + "learning_rate": 2.7962377684121457e-06, + "loss": 0.1274, + "step": 3165 + }, + { + "epoch": 2.29, + "learning_rate": 2.7908210248717672e-06, + "loss": 0.112, + "step": 3166 + }, + { + "epoch": 2.29, + "learning_rate": 2.7854086819870162e-06, + "loss": 0.1292, + "step": 3167 + }, + { + "epoch": 2.29, + "learning_rate": 2.7800007430617228e-06, + "loss": 0.1206, + "step": 3168 + }, + { + "epoch": 2.29, + "learning_rate": 2.7745972113970164e-06, + "loss": 0.1194, + "step": 3169 + }, + { + "epoch": 2.29, + "learning_rate": 2.7691980902913473e-06, + "loss": 0.12, + "step": 3170 + }, + { + "epoch": 2.29, + "learning_rate": 2.763803383040463e-06, + "loss": 0.1281, + "step": 3171 + }, + { + "epoch": 2.29, + "learning_rate": 2.7584130929374264e-06, + "loss": 0.1306, + "step": 3172 + }, + { + "epoch": 2.3, + "learning_rate": 2.753027223272601e-06, + "loss": 0.135, + "step": 3173 + }, + { + "epoch": 2.3, + "learning_rate": 2.747645777333645e-06, + "loss": 0.1284, + "step": 3174 + }, + { + "epoch": 2.3, + "learning_rate": 2.7422687584055284e-06, + "loss": 0.1196, + "step": 3175 + }, + { + "epoch": 2.3, + "learning_rate": 2.7368961697705065e-06, + "loss": 0.1237, + "step": 3176 + }, + { + "epoch": 2.3, + "learning_rate": 2.7315280147081426e-06, + "loss": 0.1226, + "step": 3177 + }, + { + "epoch": 2.3, + "learning_rate": 2.7261642964952796e-06, + "loss": 0.1221, + "step": 3178 + }, + { + "epoch": 2.3, + "learning_rate": 2.7208050184060677e-06, + "loss": 0.1187, + "step": 3179 + }, + { + "epoch": 2.3, + "learning_rate": 2.7154501837119307e-06, + "loss": 0.1314, + "step": 3180 + }, + { + "epoch": 2.3, + "learning_rate": 2.7100997956815954e-06, + "loss": 0.1221, + "step": 3181 + }, + { + "epoch": 2.3, + "learning_rate": 2.7047538575810605e-06, + "loss": 0.1375, + "step": 3182 + }, + { + "epoch": 2.3, + "learning_rate": 2.699412372673621e-06, + "loss": 0.1188, + "step": 3183 + }, + { + "epoch": 2.3, + "learning_rate": 2.6940753442198407e-06, + "loss": 0.1125, + "step": 3184 + }, + { + "epoch": 2.3, + "learning_rate": 2.6887427754775763e-06, + "loss": 0.1295, + "step": 3185 + }, + { + "epoch": 2.3, + "learning_rate": 2.6834146697019482e-06, + "loss": 0.1153, + "step": 3186 + }, + { + "epoch": 2.31, + "learning_rate": 2.678091030145368e-06, + "loss": 0.1234, + "step": 3187 + }, + { + "epoch": 2.31, + "learning_rate": 2.672771860057506e-06, + "loss": 0.1314, + "step": 3188 + }, + { + "epoch": 2.31, + "learning_rate": 2.6674571626853163e-06, + "loss": 0.1282, + "step": 3189 + }, + { + "epoch": 2.31, + "learning_rate": 2.662146941273013e-06, + "loss": 0.1151, + "step": 3190 + }, + { + "epoch": 2.31, + "learning_rate": 2.656841199062087e-06, + "loss": 0.1322, + "step": 3191 + }, + { + "epoch": 2.31, + "learning_rate": 2.6515399392912845e-06, + "loss": 0.1283, + "step": 3192 + }, + { + "epoch": 2.31, + "learning_rate": 2.646243165196627e-06, + "loss": 0.1259, + "step": 3193 + }, + { + "epoch": 2.31, + "learning_rate": 2.640950880011389e-06, + "loss": 0.1255, + "step": 3194 + }, + { + "epoch": 2.31, + "learning_rate": 2.6356630869661137e-06, + "loss": 0.1261, + "step": 3195 + }, + { + "epoch": 2.31, + "learning_rate": 2.6303797892885897e-06, + "loss": 0.1303, + "step": 3196 + }, + { + "epoch": 2.31, + "learning_rate": 2.625100990203875e-06, + "loss": 0.1237, + "step": 3197 + }, + { + "epoch": 2.31, + "learning_rate": 2.6198266929342673e-06, + "loss": 0.122, + "step": 3198 + }, + { + "epoch": 2.31, + "learning_rate": 2.614556900699333e-06, + "loss": 0.131, + "step": 3199 + }, + { + "epoch": 2.31, + "learning_rate": 2.609291616715871e-06, + "loss": 0.1253, + "step": 3200 + }, + { + "epoch": 2.32, + "learning_rate": 2.6040308441979444e-06, + "loss": 0.108, + "step": 3201 + }, + { + "epoch": 2.32, + "learning_rate": 2.5987745863568494e-06, + "loss": 0.1272, + "step": 3202 + }, + { + "epoch": 2.32, + "learning_rate": 2.5935228464011352e-06, + "loss": 0.1265, + "step": 3203 + }, + { + "epoch": 2.32, + "learning_rate": 2.588275627536586e-06, + "loss": 0.1188, + "step": 3204 + }, + { + "epoch": 2.32, + "learning_rate": 2.583032932966235e-06, + "loss": 0.1268, + "step": 3205 + }, + { + "epoch": 2.32, + "learning_rate": 2.5777947658903422e-06, + "loss": 0.1305, + "step": 3206 + }, + { + "epoch": 2.32, + "learning_rate": 2.5725611295064144e-06, + "loss": 0.1262, + "step": 3207 + }, + { + "epoch": 2.32, + "learning_rate": 2.5673320270091918e-06, + "loss": 0.1172, + "step": 3208 + }, + { + "epoch": 2.32, + "learning_rate": 2.562107461590637e-06, + "loss": 0.1362, + "step": 3209 + }, + { + "epoch": 2.32, + "learning_rate": 2.5568874364399563e-06, + "loss": 0.1237, + "step": 3210 + }, + { + "epoch": 2.32, + "learning_rate": 2.551671954743572e-06, + "loss": 0.1414, + "step": 3211 + }, + { + "epoch": 2.32, + "learning_rate": 2.5464610196851457e-06, + "loss": 0.1327, + "step": 3212 + }, + { + "epoch": 2.32, + "learning_rate": 2.54125463444555e-06, + "loss": 0.1291, + "step": 3213 + }, + { + "epoch": 2.32, + "learning_rate": 2.536052802202894e-06, + "loss": 0.1199, + "step": 3214 + }, + { + "epoch": 2.33, + "learning_rate": 2.530855526132494e-06, + "loss": 0.1266, + "step": 3215 + }, + { + "epoch": 2.33, + "learning_rate": 2.5256628094069e-06, + "loss": 0.1179, + "step": 3216 + }, + { + "epoch": 2.33, + "learning_rate": 2.5204746551958627e-06, + "loss": 0.1215, + "step": 3217 + }, + { + "epoch": 2.33, + "learning_rate": 2.515291066666362e-06, + "loss": 0.1151, + "step": 3218 + }, + { + "epoch": 2.33, + "learning_rate": 2.510112046982579e-06, + "loss": 0.1399, + "step": 3219 + }, + { + "epoch": 2.33, + "learning_rate": 2.5049375993059166e-06, + "loss": 0.1231, + "step": 3220 + }, + { + "epoch": 2.33, + "learning_rate": 2.499767726794977e-06, + "loss": 0.1246, + "step": 3221 + }, + { + "epoch": 2.33, + "learning_rate": 2.4946024326055775e-06, + "loss": 0.1264, + "step": 3222 + }, + { + "epoch": 2.33, + "learning_rate": 2.489441719890736e-06, + "loss": 0.1374, + "step": 3223 + }, + { + "epoch": 2.33, + "learning_rate": 2.484285591800679e-06, + "loss": 0.127, + "step": 3224 + }, + { + "epoch": 2.33, + "learning_rate": 2.4791340514828245e-06, + "loss": 0.1174, + "step": 3225 + }, + { + "epoch": 2.33, + "learning_rate": 2.4739871020818017e-06, + "loss": 0.1251, + "step": 3226 + }, + { + "epoch": 2.33, + "learning_rate": 2.468844746739427e-06, + "loss": 0.1251, + "step": 3227 + }, + { + "epoch": 2.33, + "learning_rate": 2.4637069885947228e-06, + "loss": 0.1167, + "step": 3228 + }, + { + "epoch": 2.34, + "learning_rate": 2.4585738307838936e-06, + "loss": 0.1234, + "step": 3229 + }, + { + "epoch": 2.34, + "learning_rate": 2.4534452764403483e-06, + "loss": 0.1254, + "step": 3230 + }, + { + "epoch": 2.34, + "learning_rate": 2.4483213286946727e-06, + "loss": 0.1253, + "step": 3231 + }, + { + "epoch": 2.34, + "learning_rate": 2.443201990674655e-06, + "loss": 0.1315, + "step": 3232 + }, + { + "epoch": 2.34, + "learning_rate": 2.4380872655052533e-06, + "loss": 0.1235, + "step": 3233 + }, + { + "epoch": 2.34, + "learning_rate": 2.432977156308627e-06, + "loss": 0.1142, + "step": 3234 + }, + { + "epoch": 2.34, + "learning_rate": 2.4278716662041015e-06, + "loss": 0.1244, + "step": 3235 + }, + { + "epoch": 2.34, + "learning_rate": 2.422770798308196e-06, + "loss": 0.1329, + "step": 3236 + }, + { + "epoch": 2.34, + "learning_rate": 2.4176745557345994e-06, + "loss": 0.1266, + "step": 3237 + }, + { + "epoch": 2.34, + "learning_rate": 2.4125829415941824e-06, + "loss": 0.1306, + "step": 3238 + }, + { + "epoch": 2.34, + "learning_rate": 2.4074959589949844e-06, + "loss": 0.1293, + "step": 3239 + }, + { + "epoch": 2.34, + "learning_rate": 2.4024136110422246e-06, + "loss": 0.1289, + "step": 3240 + }, + { + "epoch": 2.34, + "learning_rate": 2.3973359008382934e-06, + "loss": 0.1269, + "step": 3241 + }, + { + "epoch": 2.35, + "learning_rate": 2.392262831482739e-06, + "loss": 0.1221, + "step": 3242 + }, + { + "epoch": 2.35, + "learning_rate": 2.3871944060722917e-06, + "loss": 0.1288, + "step": 3243 + }, + { + "epoch": 2.35, + "learning_rate": 2.382130627700835e-06, + "loss": 0.1344, + "step": 3244 + }, + { + "epoch": 2.35, + "learning_rate": 2.3770714994594236e-06, + "loss": 0.1193, + "step": 3245 + }, + { + "epoch": 2.35, + "learning_rate": 2.372017024436267e-06, + "loss": 0.1157, + "step": 3246 + }, + { + "epoch": 2.35, + "learning_rate": 2.3669672057167435e-06, + "loss": 0.1169, + "step": 3247 + }, + { + "epoch": 2.35, + "learning_rate": 2.3619220463833768e-06, + "loss": 0.1196, + "step": 3248 + }, + { + "epoch": 2.35, + "learning_rate": 2.35688154951586e-06, + "loss": 0.1257, + "step": 3249 + }, + { + "epoch": 2.35, + "learning_rate": 2.3518457181910272e-06, + "loss": 0.1137, + "step": 3250 + }, + { + "epoch": 2.35, + "learning_rate": 2.3468145554828724e-06, + "loss": 0.1302, + "step": 3251 + }, + { + "epoch": 2.35, + "learning_rate": 2.341788064462538e-06, + "loss": 0.1261, + "step": 3252 + }, + { + "epoch": 2.35, + "learning_rate": 2.336766248198319e-06, + "loss": 0.123, + "step": 3253 + }, + { + "epoch": 2.35, + "learning_rate": 2.3317491097556455e-06, + "loss": 0.1264, + "step": 3254 + }, + { + "epoch": 2.35, + "learning_rate": 2.3267366521971046e-06, + "loss": 0.1335, + "step": 3255 + }, + { + "epoch": 2.36, + "learning_rate": 2.3217288785824144e-06, + "loss": 0.1329, + "step": 3256 + }, + { + "epoch": 2.36, + "learning_rate": 2.3167257919684473e-06, + "loss": 0.1238, + "step": 3257 + }, + { + "epoch": 2.36, + "learning_rate": 2.3117273954092014e-06, + "loss": 0.1255, + "step": 3258 + }, + { + "epoch": 2.36, + "learning_rate": 2.3067336919558214e-06, + "loss": 0.1166, + "step": 3259 + }, + { + "epoch": 2.36, + "learning_rate": 2.3017446846565795e-06, + "loss": 0.1293, + "step": 3260 + }, + { + "epoch": 2.36, + "learning_rate": 2.296760376556891e-06, + "loss": 0.1167, + "step": 3261 + }, + { + "epoch": 2.36, + "learning_rate": 2.291780770699291e-06, + "loss": 0.1184, + "step": 3262 + }, + { + "epoch": 2.36, + "learning_rate": 2.2868058701234565e-06, + "loss": 0.1118, + "step": 3263 + }, + { + "epoch": 2.36, + "learning_rate": 2.2818356778661797e-06, + "loss": 0.123, + "step": 3264 + }, + { + "epoch": 2.36, + "learning_rate": 2.2768701969613905e-06, + "loss": 0.1232, + "step": 3265 + }, + { + "epoch": 2.36, + "learning_rate": 2.271909430440132e-06, + "loss": 0.1245, + "step": 3266 + }, + { + "epoch": 2.36, + "learning_rate": 2.2669533813305824e-06, + "loss": 0.1246, + "step": 3267 + }, + { + "epoch": 2.36, + "learning_rate": 2.2620020526580243e-06, + "loss": 0.1193, + "step": 3268 + }, + { + "epoch": 2.36, + "learning_rate": 2.2570554474448746e-06, + "loss": 0.113, + "step": 3269 + }, + { + "epoch": 2.37, + "learning_rate": 2.252113568710653e-06, + "loss": 0.1219, + "step": 3270 + }, + { + "epoch": 2.37, + "learning_rate": 2.247176419472009e-06, + "loss": 0.1259, + "step": 3271 + }, + { + "epoch": 2.37, + "learning_rate": 2.2422440027426883e-06, + "loss": 0.1335, + "step": 3272 + }, + { + "epoch": 2.37, + "learning_rate": 2.2373163215335604e-06, + "loss": 0.1188, + "step": 3273 + }, + { + "epoch": 2.37, + "learning_rate": 2.232393378852602e-06, + "loss": 0.1211, + "step": 3274 + }, + { + "epoch": 2.37, + "learning_rate": 2.227475177704891e-06, + "loss": 0.1281, + "step": 3275 + }, + { + "epoch": 2.37, + "learning_rate": 2.222561721092621e-06, + "loss": 0.1196, + "step": 3276 + }, + { + "epoch": 2.37, + "learning_rate": 2.2176530120150753e-06, + "loss": 0.1296, + "step": 3277 + }, + { + "epoch": 2.37, + "learning_rate": 2.212749053468657e-06, + "loss": 0.1258, + "step": 3278 + }, + { + "epoch": 2.37, + "learning_rate": 2.2078498484468512e-06, + "loss": 0.1308, + "step": 3279 + }, + { + "epoch": 2.37, + "learning_rate": 2.2029553999402574e-06, + "loss": 0.1317, + "step": 3280 + }, + { + "epoch": 2.37, + "learning_rate": 2.1980657109365577e-06, + "loss": 0.1243, + "step": 3281 + }, + { + "epoch": 2.37, + "learning_rate": 2.193180784420539e-06, + "loss": 0.1034, + "step": 3282 + }, + { + "epoch": 2.37, + "learning_rate": 2.188300623374078e-06, + "loss": 0.1259, + "step": 3283 + }, + { + "epoch": 2.38, + "learning_rate": 2.1834252307761426e-06, + "loss": 0.1244, + "step": 3284 + }, + { + "epoch": 2.38, + "learning_rate": 2.1785546096027865e-06, + "loss": 0.1373, + "step": 3285 + }, + { + "epoch": 2.38, + "learning_rate": 2.1736887628271575e-06, + "loss": 0.1234, + "step": 3286 + }, + { + "epoch": 2.38, + "learning_rate": 2.16882769341948e-06, + "loss": 0.1318, + "step": 3287 + }, + { + "epoch": 2.38, + "learning_rate": 2.1639714043470717e-06, + "loss": 0.1228, + "step": 3288 + }, + { + "epoch": 2.38, + "learning_rate": 2.1591198985743233e-06, + "loss": 0.1208, + "step": 3289 + }, + { + "epoch": 2.38, + "learning_rate": 2.154273179062717e-06, + "loss": 0.122, + "step": 3290 + }, + { + "epoch": 2.38, + "learning_rate": 2.149431248770799e-06, + "loss": 0.1219, + "step": 3291 + }, + { + "epoch": 2.38, + "learning_rate": 2.1445941106542057e-06, + "loss": 0.124, + "step": 3292 + }, + { + "epoch": 2.38, + "learning_rate": 2.1397617676656368e-06, + "loss": 0.1251, + "step": 3293 + }, + { + "epoch": 2.38, + "learning_rate": 2.1349342227548753e-06, + "loss": 0.1256, + "step": 3294 + }, + { + "epoch": 2.38, + "learning_rate": 2.130111478868766e-06, + "loss": 0.1294, + "step": 3295 + }, + { + "epoch": 2.38, + "learning_rate": 2.125293538951232e-06, + "loss": 0.1081, + "step": 3296 + }, + { + "epoch": 2.38, + "learning_rate": 2.1204804059432536e-06, + "loss": 0.1166, + "step": 3297 + }, + { + "epoch": 2.39, + "learning_rate": 2.1156720827828893e-06, + "loss": 0.1182, + "step": 3298 + }, + { + "epoch": 2.39, + "learning_rate": 2.1108685724052467e-06, + "loss": 0.1338, + "step": 3299 + }, + { + "epoch": 2.39, + "learning_rate": 2.1060698777425124e-06, + "loss": 0.1229, + "step": 3300 + }, + { + "epoch": 2.39, + "learning_rate": 2.1012760017239177e-06, + "loss": 0.1313, + "step": 3301 + }, + { + "epoch": 2.39, + "learning_rate": 2.0964869472757634e-06, + "loss": 0.123, + "step": 3302 + }, + { + "epoch": 2.39, + "learning_rate": 2.0917027173214e-06, + "loss": 0.1124, + "step": 3303 + }, + { + "epoch": 2.39, + "learning_rate": 2.0869233147812396e-06, + "loss": 0.1244, + "step": 3304 + }, + { + "epoch": 2.39, + "learning_rate": 2.0821487425727403e-06, + "loss": 0.1301, + "step": 3305 + }, + { + "epoch": 2.39, + "learning_rate": 2.07737900361042e-06, + "loss": 0.1201, + "step": 3306 + }, + { + "epoch": 2.39, + "learning_rate": 2.0726141008058364e-06, + "loss": 0.1159, + "step": 3307 + }, + { + "epoch": 2.39, + "learning_rate": 2.0678540370676036e-06, + "loss": 0.1262, + "step": 3308 + }, + { + "epoch": 2.39, + "learning_rate": 2.063098815301381e-06, + "loss": 0.1271, + "step": 3309 + }, + { + "epoch": 2.39, + "learning_rate": 2.0583484384098627e-06, + "loss": 0.1264, + "step": 3310 + }, + { + "epoch": 2.39, + "learning_rate": 2.053602909292799e-06, + "loss": 0.1229, + "step": 3311 + }, + { + "epoch": 2.4, + "learning_rate": 2.048862230846974e-06, + "loss": 0.1147, + "step": 3312 + }, + { + "epoch": 2.4, + "learning_rate": 2.044126405966207e-06, + "loss": 0.1351, + "step": 3313 + }, + { + "epoch": 2.4, + "learning_rate": 2.039395437541366e-06, + "loss": 0.1209, + "step": 3314 + }, + { + "epoch": 2.4, + "learning_rate": 2.034669328460339e-06, + "loss": 0.1158, + "step": 3315 + }, + { + "epoch": 2.4, + "learning_rate": 2.029948081608065e-06, + "loss": 0.1177, + "step": 3316 + }, + { + "epoch": 2.4, + "learning_rate": 2.025231699866499e-06, + "loss": 0.1128, + "step": 3317 + }, + { + "epoch": 2.4, + "learning_rate": 2.0205201861146363e-06, + "loss": 0.1241, + "step": 3318 + }, + { + "epoch": 2.4, + "learning_rate": 2.015813543228501e-06, + "loss": 0.1281, + "step": 3319 + }, + { + "epoch": 2.4, + "learning_rate": 2.0111117740811337e-06, + "loss": 0.1288, + "step": 3320 + }, + { + "epoch": 2.4, + "learning_rate": 2.0064148815426144e-06, + "loss": 0.1174, + "step": 3321 + }, + { + "epoch": 2.4, + "learning_rate": 2.0017228684800337e-06, + "loss": 0.1248, + "step": 3322 + }, + { + "epoch": 2.4, + "learning_rate": 1.997035737757512e-06, + "loss": 0.1367, + "step": 3323 + }, + { + "epoch": 2.4, + "learning_rate": 1.9923534922361833e-06, + "loss": 0.1194, + "step": 3324 + }, + { + "epoch": 2.41, + "learning_rate": 1.9876761347742056e-06, + "loss": 0.113, + "step": 3325 + }, + { + "epoch": 2.41, + "learning_rate": 1.983003668226746e-06, + "loss": 0.1239, + "step": 3326 + }, + { + "epoch": 2.41, + "learning_rate": 1.978336095445995e-06, + "loss": 0.135, + "step": 3327 + }, + { + "epoch": 2.41, + "learning_rate": 1.9736734192811456e-06, + "loss": 0.1299, + "step": 3328 + }, + { + "epoch": 2.41, + "learning_rate": 1.9690156425784133e-06, + "loss": 0.1274, + "step": 3329 + }, + { + "epoch": 2.41, + "learning_rate": 1.9643627681810095e-06, + "loss": 0.119, + "step": 3330 + }, + { + "epoch": 2.41, + "learning_rate": 1.9597147989291678e-06, + "loss": 0.1219, + "step": 3331 + }, + { + "epoch": 2.41, + "learning_rate": 1.9550717376601127e-06, + "loss": 0.1282, + "step": 3332 + }, + { + "epoch": 2.41, + "learning_rate": 1.950433587208086e-06, + "loss": 0.1316, + "step": 3333 + }, + { + "epoch": 2.41, + "learning_rate": 1.945800350404321e-06, + "loss": 0.1166, + "step": 3334 + }, + { + "epoch": 2.41, + "learning_rate": 1.941172030077062e-06, + "loss": 0.1271, + "step": 3335 + }, + { + "epoch": 2.41, + "learning_rate": 1.93654862905154e-06, + "loss": 0.1228, + "step": 3336 + }, + { + "epoch": 2.41, + "learning_rate": 1.9319301501499964e-06, + "loss": 0.124, + "step": 3337 + }, + { + "epoch": 2.41, + "learning_rate": 1.9273165961916563e-06, + "loss": 0.132, + "step": 3338 + }, + { + "epoch": 2.42, + "learning_rate": 1.9227079699927453e-06, + "loss": 0.1227, + "step": 3339 + }, + { + "epoch": 2.42, + "learning_rate": 1.918104274366479e-06, + "loss": 0.1417, + "step": 3340 + }, + { + "epoch": 2.42, + "learning_rate": 1.913505512123067e-06, + "loss": 0.1291, + "step": 3341 + }, + { + "epoch": 2.42, + "learning_rate": 1.9089116860697e-06, + "loss": 0.122, + "step": 3342 + }, + { + "epoch": 2.42, + "learning_rate": 1.9043227990105618e-06, + "loss": 0.1184, + "step": 3343 + }, + { + "epoch": 2.42, + "learning_rate": 1.8997388537468165e-06, + "loss": 0.1182, + "step": 3344 + }, + { + "epoch": 2.42, + "learning_rate": 1.895159853076618e-06, + "loss": 0.1273, + "step": 3345 + }, + { + "epoch": 2.42, + "learning_rate": 1.8905857997950927e-06, + "loss": 0.1258, + "step": 3346 + }, + { + "epoch": 2.42, + "learning_rate": 1.8860166966943583e-06, + "loss": 0.1267, + "step": 3347 + }, + { + "epoch": 2.42, + "learning_rate": 1.8814525465634981e-06, + "loss": 0.1241, + "step": 3348 + }, + { + "epoch": 2.42, + "learning_rate": 1.8768933521885868e-06, + "loss": 0.1222, + "step": 3349 + }, + { + "epoch": 2.42, + "learning_rate": 1.8723391163526582e-06, + "loss": 0.1224, + "step": 3350 + }, + { + "epoch": 2.42, + "learning_rate": 1.86778984183573e-06, + "loss": 0.1188, + "step": 3351 + }, + { + "epoch": 2.42, + "learning_rate": 1.8632455314147924e-06, + "loss": 0.1212, + "step": 3352 + }, + { + "epoch": 2.43, + "learning_rate": 1.8587061878637947e-06, + "loss": 0.1234, + "step": 3353 + }, + { + "epoch": 2.43, + "learning_rate": 1.8541718139536669e-06, + "loss": 0.1152, + "step": 3354 + }, + { + "epoch": 2.43, + "learning_rate": 1.8496424124522937e-06, + "loss": 0.1323, + "step": 3355 + }, + { + "epoch": 2.43, + "learning_rate": 1.8451179861245373e-06, + "loss": 0.1255, + "step": 3356 + }, + { + "epoch": 2.43, + "learning_rate": 1.8405985377322078e-06, + "loss": 0.1261, + "step": 3357 + }, + { + "epoch": 2.43, + "learning_rate": 1.8360840700340932e-06, + "loss": 0.1253, + "step": 3358 + }, + { + "epoch": 2.43, + "learning_rate": 1.8315745857859246e-06, + "loss": 0.1287, + "step": 3359 + }, + { + "epoch": 2.43, + "learning_rate": 1.8270700877404068e-06, + "loss": 0.1186, + "step": 3360 + }, + { + "epoch": 2.43, + "learning_rate": 1.822570578647187e-06, + "loss": 0.1238, + "step": 3361 + }, + { + "epoch": 2.43, + "learning_rate": 1.8180760612528792e-06, + "loss": 0.1292, + "step": 3362 + }, + { + "epoch": 2.43, + "learning_rate": 1.8135865383010387e-06, + "loss": 0.1268, + "step": 3363 + }, + { + "epoch": 2.43, + "learning_rate": 1.8091020125321835e-06, + "loss": 0.1274, + "step": 3364 + }, + { + "epoch": 2.43, + "learning_rate": 1.8046224866837702e-06, + "loss": 0.124, + "step": 3365 + }, + { + "epoch": 2.43, + "learning_rate": 1.8001479634902141e-06, + "loss": 0.1202, + "step": 3366 + }, + { + "epoch": 2.44, + "learning_rate": 1.7956784456828658e-06, + "loss": 0.1219, + "step": 3367 + }, + { + "epoch": 2.44, + "learning_rate": 1.7912139359900326e-06, + "loss": 0.122, + "step": 3368 + }, + { + "epoch": 2.44, + "learning_rate": 1.7867544371369483e-06, + "loss": 0.1221, + "step": 3369 + }, + { + "epoch": 2.44, + "learning_rate": 1.7822999518458107e-06, + "loss": 0.1249, + "step": 3370 + }, + { + "epoch": 2.44, + "learning_rate": 1.7778504828357345e-06, + "loss": 0.1198, + "step": 3371 + }, + { + "epoch": 2.44, + "learning_rate": 1.7734060328227886e-06, + "loss": 0.1226, + "step": 3372 + }, + { + "epoch": 2.44, + "learning_rate": 1.7689666045199661e-06, + "loss": 0.1282, + "step": 3373 + }, + { + "epoch": 2.44, + "learning_rate": 1.7645322006372057e-06, + "loss": 0.1302, + "step": 3374 + }, + { + "epoch": 2.44, + "learning_rate": 1.76010282388137e-06, + "loss": 0.1287, + "step": 3375 + }, + { + "epoch": 2.44, + "learning_rate": 1.7556784769562596e-06, + "loss": 0.1277, + "step": 3376 + }, + { + "epoch": 2.44, + "learning_rate": 1.7512591625625973e-06, + "loss": 0.1296, + "step": 3377 + }, + { + "epoch": 2.44, + "learning_rate": 1.7468448833980445e-06, + "loss": 0.1238, + "step": 3378 + }, + { + "epoch": 2.44, + "learning_rate": 1.7424356421571775e-06, + "loss": 0.1193, + "step": 3379 + }, + { + "epoch": 2.44, + "learning_rate": 1.738031441531508e-06, + "loss": 0.1207, + "step": 3380 + }, + { + "epoch": 2.45, + "learning_rate": 1.733632284209461e-06, + "loss": 0.1264, + "step": 3381 + }, + { + "epoch": 2.45, + "learning_rate": 1.7292381728763918e-06, + "loss": 0.1169, + "step": 3382 + }, + { + "epoch": 2.45, + "learning_rate": 1.724849110214567e-06, + "loss": 0.1409, + "step": 3383 + }, + { + "epoch": 2.45, + "learning_rate": 1.720465098903179e-06, + "loss": 0.1237, + "step": 3384 + }, + { + "epoch": 2.45, + "learning_rate": 1.7160861416183304e-06, + "loss": 0.1268, + "step": 3385 + }, + { + "epoch": 2.45, + "learning_rate": 1.7117122410330433e-06, + "loss": 0.1184, + "step": 3386 + }, + { + "epoch": 2.45, + "learning_rate": 1.707343399817254e-06, + "loss": 0.1254, + "step": 3387 + }, + { + "epoch": 2.45, + "learning_rate": 1.7029796206378035e-06, + "loss": 0.1191, + "step": 3388 + }, + { + "epoch": 2.45, + "learning_rate": 1.6986209061584502e-06, + "loss": 0.1285, + "step": 3389 + }, + { + "epoch": 2.45, + "learning_rate": 1.6942672590398556e-06, + "loss": 0.122, + "step": 3390 + }, + { + "epoch": 2.45, + "learning_rate": 1.689918681939593e-06, + "loss": 0.1223, + "step": 3391 + }, + { + "epoch": 2.45, + "learning_rate": 1.6855751775121355e-06, + "loss": 0.1275, + "step": 3392 + }, + { + "epoch": 2.45, + "learning_rate": 1.6812367484088643e-06, + "loss": 0.127, + "step": 3393 + }, + { + "epoch": 2.45, + "learning_rate": 1.676903397278057e-06, + "loss": 0.1241, + "step": 3394 + }, + { + "epoch": 2.46, + "learning_rate": 1.6725751267649005e-06, + "loss": 0.1244, + "step": 3395 + }, + { + "epoch": 2.46, + "learning_rate": 1.6682519395114694e-06, + "loss": 0.1404, + "step": 3396 + }, + { + "epoch": 2.46, + "learning_rate": 1.663933838156746e-06, + "loss": 0.1217, + "step": 3397 + }, + { + "epoch": 2.46, + "learning_rate": 1.6596208253365952e-06, + "loss": 0.1278, + "step": 3398 + }, + { + "epoch": 2.46, + "learning_rate": 1.6553129036837934e-06, + "loss": 0.1139, + "step": 3399 + }, + { + "epoch": 2.46, + "learning_rate": 1.6510100758279912e-06, + "loss": 0.1187, + "step": 3400 + }, + { + "epoch": 2.46, + "learning_rate": 1.6467123443957433e-06, + "loss": 0.1217, + "step": 3401 + }, + { + "epoch": 2.46, + "learning_rate": 1.6424197120104834e-06, + "loss": 0.1215, + "step": 3402 + }, + { + "epoch": 2.46, + "learning_rate": 1.638132181292541e-06, + "loss": 0.119, + "step": 3403 + }, + { + "epoch": 2.46, + "learning_rate": 1.6338497548591248e-06, + "loss": 0.1201, + "step": 3404 + }, + { + "epoch": 2.46, + "learning_rate": 1.6295724353243326e-06, + "loss": 0.1226, + "step": 3405 + }, + { + "epoch": 2.46, + "learning_rate": 1.62530022529914e-06, + "loss": 0.1293, + "step": 3406 + }, + { + "epoch": 2.46, + "learning_rate": 1.6210331273914103e-06, + "loss": 0.1279, + "step": 3407 + }, + { + "epoch": 2.47, + "learning_rate": 1.6167711442058786e-06, + "loss": 0.1331, + "step": 3408 + }, + { + "epoch": 2.47, + "learning_rate": 1.6125142783441649e-06, + "loss": 0.1232, + "step": 3409 + }, + { + "epoch": 2.47, + "learning_rate": 1.6082625324047585e-06, + "loss": 0.1259, + "step": 3410 + }, + { + "epoch": 2.47, + "learning_rate": 1.604015908983031e-06, + "loss": 0.1252, + "step": 3411 + }, + { + "epoch": 2.47, + "learning_rate": 1.5997744106712188e-06, + "loss": 0.1278, + "step": 3412 + }, + { + "epoch": 2.47, + "learning_rate": 1.5955380400584386e-06, + "loss": 0.1186, + "step": 3413 + }, + { + "epoch": 2.47, + "learning_rate": 1.5913067997306685e-06, + "loss": 0.1264, + "step": 3414 + }, + { + "epoch": 2.47, + "learning_rate": 1.5870806922707626e-06, + "loss": 0.1261, + "step": 3415 + }, + { + "epoch": 2.47, + "learning_rate": 1.582859720258434e-06, + "loss": 0.1243, + "step": 3416 + }, + { + "epoch": 2.47, + "learning_rate": 1.5786438862702702e-06, + "loss": 0.1185, + "step": 3417 + }, + { + "epoch": 2.47, + "learning_rate": 1.574433192879713e-06, + "loss": 0.1208, + "step": 3418 + }, + { + "epoch": 2.47, + "learning_rate": 1.5702276426570717e-06, + "loss": 0.1357, + "step": 3419 + }, + { + "epoch": 2.47, + "learning_rate": 1.5660272381695184e-06, + "loss": 0.1291, + "step": 3420 + }, + { + "epoch": 2.47, + "learning_rate": 1.5618319819810745e-06, + "loss": 0.1265, + "step": 3421 + }, + { + "epoch": 2.48, + "learning_rate": 1.5576418766526313e-06, + "loss": 0.1161, + "step": 3422 + }, + { + "epoch": 2.48, + "learning_rate": 1.553456924741924e-06, + "loss": 0.1225, + "step": 3423 + }, + { + "epoch": 2.48, + "learning_rate": 1.5492771288035534e-06, + "loss": 0.1254, + "step": 3424 + }, + { + "epoch": 2.48, + "learning_rate": 1.5451024913889601e-06, + "loss": 0.1045, + "step": 3425 + }, + { + "epoch": 2.48, + "learning_rate": 1.5409330150464498e-06, + "loss": 0.1183, + "step": 3426 + }, + { + "epoch": 2.48, + "learning_rate": 1.5367687023211642e-06, + "loss": 0.1271, + "step": 3427 + }, + { + "epoch": 2.48, + "learning_rate": 1.5326095557551023e-06, + "loss": 0.1164, + "step": 3428 + }, + { + "epoch": 2.48, + "learning_rate": 1.528455577887109e-06, + "loss": 0.1269, + "step": 3429 + }, + { + "epoch": 2.48, + "learning_rate": 1.5243067712528714e-06, + "loss": 0.1242, + "step": 3430 + }, + { + "epoch": 2.48, + "learning_rate": 1.520163138384918e-06, + "loss": 0.1241, + "step": 3431 + }, + { + "epoch": 2.48, + "learning_rate": 1.5160246818126246e-06, + "loss": 0.1209, + "step": 3432 + }, + { + "epoch": 2.48, + "learning_rate": 1.5118914040622013e-06, + "loss": 0.1326, + "step": 3433 + }, + { + "epoch": 2.48, + "learning_rate": 1.5077633076567033e-06, + "loss": 0.1279, + "step": 3434 + }, + { + "epoch": 2.48, + "learning_rate": 1.503640395116015e-06, + "loss": 0.1299, + "step": 3435 + }, + { + "epoch": 2.49, + "learning_rate": 1.4995226689568665e-06, + "loss": 0.1142, + "step": 3436 + }, + { + "epoch": 2.49, + "learning_rate": 1.4954101316928117e-06, + "loss": 0.1173, + "step": 3437 + }, + { + "epoch": 2.49, + "learning_rate": 1.4913027858342456e-06, + "loss": 0.1333, + "step": 3438 + }, + { + "epoch": 2.49, + "learning_rate": 1.487200633888386e-06, + "loss": 0.1252, + "step": 3439 + }, + { + "epoch": 2.49, + "learning_rate": 1.483103678359291e-06, + "loss": 0.118, + "step": 3440 + }, + { + "epoch": 2.49, + "learning_rate": 1.4790119217478338e-06, + "loss": 0.1255, + "step": 3441 + }, + { + "epoch": 2.49, + "learning_rate": 1.4749253665517272e-06, + "loss": 0.1284, + "step": 3442 + }, + { + "epoch": 2.49, + "learning_rate": 1.4708440152654968e-06, + "loss": 0.1217, + "step": 3443 + }, + { + "epoch": 2.49, + "learning_rate": 1.4667678703805022e-06, + "loss": 0.1322, + "step": 3444 + }, + { + "epoch": 2.49, + "learning_rate": 1.462696934384914e-06, + "loss": 0.1193, + "step": 3445 + }, + { + "epoch": 2.49, + "learning_rate": 1.4586312097637356e-06, + "loss": 0.1169, + "step": 3446 + }, + { + "epoch": 2.49, + "learning_rate": 1.4545706989987774e-06, + "loss": 0.1197, + "step": 3447 + }, + { + "epoch": 2.49, + "learning_rate": 1.4505154045686764e-06, + "loss": 0.1344, + "step": 3448 + }, + { + "epoch": 2.49, + "learning_rate": 1.4464653289488783e-06, + "loss": 0.1252, + "step": 3449 + }, + { + "epoch": 2.5, + "learning_rate": 1.44242047461165e-06, + "loss": 0.1211, + "step": 3450 + }, + { + "epoch": 2.5, + "learning_rate": 1.4383808440260626e-06, + "loss": 0.1172, + "step": 3451 + }, + { + "epoch": 2.5, + "learning_rate": 1.43434643965801e-06, + "loss": 0.1275, + "step": 3452 + }, + { + "epoch": 2.5, + "learning_rate": 1.4303172639701824e-06, + "loss": 0.1336, + "step": 3453 + }, + { + "epoch": 2.5, + "learning_rate": 1.4262933194220873e-06, + "loss": 0.1307, + "step": 3454 + }, + { + "epoch": 2.5, + "learning_rate": 1.4222746084700412e-06, + "loss": 0.1312, + "step": 3455 + }, + { + "epoch": 2.5, + "learning_rate": 1.4182611335671559e-06, + "loss": 0.138, + "step": 3456 + }, + { + "epoch": 2.5, + "learning_rate": 1.4142528971633563e-06, + "loss": 0.1343, + "step": 3457 + }, + { + "epoch": 2.5, + "learning_rate": 1.410249901705365e-06, + "loss": 0.1337, + "step": 3458 + }, + { + "epoch": 2.5, + "learning_rate": 1.4062521496367055e-06, + "loss": 0.1109, + "step": 3459 + }, + { + "epoch": 2.5, + "learning_rate": 1.402259643397703e-06, + "loss": 0.1136, + "step": 3460 + }, + { + "epoch": 2.5, + "learning_rate": 1.3982723854254777e-06, + "loss": 0.1307, + "step": 3461 + }, + { + "epoch": 2.5, + "learning_rate": 1.3942903781539484e-06, + "loss": 0.1256, + "step": 3462 + }, + { + "epoch": 2.5, + "learning_rate": 1.390313624013826e-06, + "loss": 0.1273, + "step": 3463 + }, + { + "epoch": 2.51, + "learning_rate": 1.386342125432617e-06, + "loss": 0.1219, + "step": 3464 + }, + { + "epoch": 2.51, + "learning_rate": 1.3823758848346236e-06, + "loss": 0.14, + "step": 3465 + }, + { + "epoch": 2.51, + "learning_rate": 1.3784149046409279e-06, + "loss": 0.1245, + "step": 3466 + }, + { + "epoch": 2.51, + "learning_rate": 1.3744591872694113e-06, + "loss": 0.1351, + "step": 3467 + }, + { + "epoch": 2.51, + "learning_rate": 1.3705087351347357e-06, + "loss": 0.1259, + "step": 3468 + }, + { + "epoch": 2.51, + "learning_rate": 1.3665635506483543e-06, + "loss": 0.1228, + "step": 3469 + }, + { + "epoch": 2.51, + "learning_rate": 1.3626236362184996e-06, + "loss": 0.1226, + "step": 3470 + }, + { + "epoch": 2.51, + "learning_rate": 1.3586889942501935e-06, + "loss": 0.1245, + "step": 3471 + }, + { + "epoch": 2.51, + "learning_rate": 1.3547596271452313e-06, + "loss": 0.1255, + "step": 3472 + }, + { + "epoch": 2.51, + "learning_rate": 1.3508355373021975e-06, + "loss": 0.1199, + "step": 3473 + }, + { + "epoch": 2.51, + "learning_rate": 1.3469167271164452e-06, + "loss": 0.1251, + "step": 3474 + }, + { + "epoch": 2.51, + "learning_rate": 1.3430031989801163e-06, + "loss": 0.1233, + "step": 3475 + }, + { + "epoch": 2.51, + "learning_rate": 1.3390949552821164e-06, + "loss": 0.1247, + "step": 3476 + }, + { + "epoch": 2.52, + "learning_rate": 1.3351919984081351e-06, + "loss": 0.1292, + "step": 3477 + }, + { + "epoch": 2.52, + "learning_rate": 1.3312943307406279e-06, + "loss": 0.1338, + "step": 3478 + }, + { + "epoch": 2.52, + "learning_rate": 1.327401954658828e-06, + "loss": 0.1346, + "step": 3479 + }, + { + "epoch": 2.52, + "learning_rate": 1.3235148725387304e-06, + "loss": 0.1173, + "step": 3480 + }, + { + "epoch": 2.52, + "learning_rate": 1.3196330867531093e-06, + "loss": 0.1359, + "step": 3481 + }, + { + "epoch": 2.52, + "learning_rate": 1.3157565996714927e-06, + "loss": 0.1292, + "step": 3482 + }, + { + "epoch": 2.52, + "learning_rate": 1.3118854136601877e-06, + "loss": 0.1198, + "step": 3483 + }, + { + "epoch": 2.52, + "learning_rate": 1.3080195310822541e-06, + "loss": 0.1173, + "step": 3484 + }, + { + "epoch": 2.52, + "learning_rate": 1.304158954297524e-06, + "loss": 0.1221, + "step": 3485 + }, + { + "epoch": 2.52, + "learning_rate": 1.3003036856625784e-06, + "loss": 0.1224, + "step": 3486 + }, + { + "epoch": 2.52, + "learning_rate": 1.2964537275307754e-06, + "loss": 0.1285, + "step": 3487 + }, + { + "epoch": 2.52, + "learning_rate": 1.292609082252214e-06, + "loss": 0.1288, + "step": 3488 + }, + { + "epoch": 2.52, + "learning_rate": 1.288769752173763e-06, + "loss": 0.1248, + "step": 3489 + }, + { + "epoch": 2.52, + "learning_rate": 1.2849357396390382e-06, + "loss": 0.1149, + "step": 3490 + }, + { + "epoch": 2.53, + "learning_rate": 1.2811070469884147e-06, + "loss": 0.1224, + "step": 3491 + }, + { + "epoch": 2.53, + "learning_rate": 1.2772836765590158e-06, + "loss": 0.128, + "step": 3492 + }, + { + "epoch": 2.53, + "learning_rate": 1.2734656306847226e-06, + "loss": 0.1211, + "step": 3493 + }, + { + "epoch": 2.53, + "learning_rate": 1.2696529116961565e-06, + "loss": 0.1192, + "step": 3494 + }, + { + "epoch": 2.53, + "learning_rate": 1.2658455219206978e-06, + "loss": 0.1275, + "step": 3495 + }, + { + "epoch": 2.53, + "learning_rate": 1.262043463682463e-06, + "loss": 0.1237, + "step": 3496 + }, + { + "epoch": 2.53, + "learning_rate": 1.2582467393023235e-06, + "loss": 0.1166, + "step": 3497 + }, + { + "epoch": 2.53, + "learning_rate": 1.2544553510978918e-06, + "loss": 0.1218, + "step": 3498 + }, + { + "epoch": 2.53, + "learning_rate": 1.250669301383518e-06, + "loss": 0.1307, + "step": 3499 + }, + { + "epoch": 2.53, + "learning_rate": 1.246888592470301e-06, + "loss": 0.1248, + "step": 3500 + }, + { + "epoch": 2.53, + "eval_loss": 0.1225692555308342, + "eval_runtime": 716.0296, + "eval_samples_per_second": 69.83, + "eval_steps_per_second": 2.183, + "step": 3500 + }, + { + "epoch": 2.53, + "learning_rate": 1.243113226666074e-06, + "loss": 0.1256, + "step": 3501 + }, + { + "epoch": 2.53, + "learning_rate": 1.239343206275414e-06, + "loss": 0.1289, + "step": 3502 + }, + { + "epoch": 2.53, + "learning_rate": 1.2355785335996263e-06, + "loss": 0.1259, + "step": 3503 + }, + { + "epoch": 2.53, + "learning_rate": 1.2318192109367632e-06, + "loss": 0.1295, + "step": 3504 + }, + { + "epoch": 2.54, + "learning_rate": 1.2280652405816006e-06, + "loss": 0.1277, + "step": 3505 + }, + { + "epoch": 2.54, + "learning_rate": 1.2243166248256566e-06, + "loss": 0.1274, + "step": 3506 + }, + { + "epoch": 2.54, + "learning_rate": 1.2205733659571706e-06, + "loss": 0.1275, + "step": 3507 + }, + { + "epoch": 2.54, + "learning_rate": 1.2168354662611225e-06, + "loss": 0.1313, + "step": 3508 + }, + { + "epoch": 2.54, + "learning_rate": 1.213102928019213e-06, + "loss": 0.1168, + "step": 3509 + }, + { + "epoch": 2.54, + "learning_rate": 1.209375753509875e-06, + "loss": 0.1228, + "step": 3510 + }, + { + "epoch": 2.54, + "learning_rate": 1.2056539450082616e-06, + "loss": 0.1192, + "step": 3511 + }, + { + "epoch": 2.54, + "learning_rate": 1.2019375047862592e-06, + "loss": 0.1174, + "step": 3512 + }, + { + "epoch": 2.54, + "learning_rate": 1.1982264351124662e-06, + "loss": 0.1315, + "step": 3513 + }, + { + "epoch": 2.54, + "learning_rate": 1.194520738252214e-06, + "loss": 0.1171, + "step": 3514 + }, + { + "epoch": 2.54, + "learning_rate": 1.1908204164675408e-06, + "loss": 0.117, + "step": 3515 + }, + { + "epoch": 2.54, + "learning_rate": 1.187125472017222e-06, + "loss": 0.1262, + "step": 3516 + }, + { + "epoch": 2.54, + "learning_rate": 1.1834359071567325e-06, + "loss": 0.1236, + "step": 3517 + }, + { + "epoch": 2.54, + "learning_rate": 1.1797517241382772e-06, + "loss": 0.1225, + "step": 3518 + }, + { + "epoch": 2.55, + "learning_rate": 1.1760729252107628e-06, + "loss": 0.132, + "step": 3519 + }, + { + "epoch": 2.55, + "learning_rate": 1.1723995126198228e-06, + "loss": 0.1177, + "step": 3520 + }, + { + "epoch": 2.55, + "learning_rate": 1.1687314886077904e-06, + "loss": 0.1141, + "step": 3521 + }, + { + "epoch": 2.55, + "learning_rate": 1.1650688554137212e-06, + "loss": 0.1169, + "step": 3522 + }, + { + "epoch": 2.55, + "learning_rate": 1.16141161527337e-06, + "loss": 0.121, + "step": 3523 + }, + { + "epoch": 2.55, + "learning_rate": 1.1577597704192056e-06, + "loss": 0.1381, + "step": 3524 + }, + { + "epoch": 2.55, + "learning_rate": 1.1541133230804002e-06, + "loss": 0.126, + "step": 3525 + }, + { + "epoch": 2.55, + "learning_rate": 1.1504722754828356e-06, + "loss": 0.1272, + "step": 3526 + }, + { + "epoch": 2.55, + "learning_rate": 1.1468366298490918e-06, + "loss": 0.1123, + "step": 3527 + }, + { + "epoch": 2.55, + "learning_rate": 1.1432063883984557e-06, + "loss": 0.1235, + "step": 3528 + }, + { + "epoch": 2.55, + "learning_rate": 1.1395815533469101e-06, + "loss": 0.1174, + "step": 3529 + }, + { + "epoch": 2.55, + "learning_rate": 1.1359621269071452e-06, + "loss": 0.1209, + "step": 3530 + }, + { + "epoch": 2.55, + "learning_rate": 1.1323481112885459e-06, + "loss": 0.1219, + "step": 3531 + }, + { + "epoch": 2.55, + "learning_rate": 1.1287395086971908e-06, + "loss": 0.1204, + "step": 3532 + }, + { + "epoch": 2.56, + "learning_rate": 1.1251363213358612e-06, + "loss": 0.1195, + "step": 3533 + }, + { + "epoch": 2.56, + "learning_rate": 1.1215385514040245e-06, + "loss": 0.1201, + "step": 3534 + }, + { + "epoch": 2.56, + "learning_rate": 1.1179462010978504e-06, + "loss": 0.1242, + "step": 3535 + }, + { + "epoch": 2.56, + "learning_rate": 1.1143592726101915e-06, + "loss": 0.1187, + "step": 3536 + }, + { + "epoch": 2.56, + "learning_rate": 1.1107777681305975e-06, + "loss": 0.1264, + "step": 3537 + }, + { + "epoch": 2.56, + "learning_rate": 1.1072016898453031e-06, + "loss": 0.1273, + "step": 3538 + }, + { + "epoch": 2.56, + "learning_rate": 1.103631039937234e-06, + "loss": 0.1222, + "step": 3539 + }, + { + "epoch": 2.56, + "learning_rate": 1.100065820585998e-06, + "loss": 0.1252, + "step": 3540 + }, + { + "epoch": 2.56, + "learning_rate": 1.0965060339678935e-06, + "loss": 0.125, + "step": 3541 + }, + { + "epoch": 2.56, + "learning_rate": 1.092951682255896e-06, + "loss": 0.1306, + "step": 3542 + }, + { + "epoch": 2.56, + "learning_rate": 1.0894027676196716e-06, + "loss": 0.1258, + "step": 3543 + }, + { + "epoch": 2.56, + "learning_rate": 1.0858592922255572e-06, + "loss": 0.1258, + "step": 3544 + }, + { + "epoch": 2.56, + "learning_rate": 1.0823212582365782e-06, + "loss": 0.1382, + "step": 3545 + }, + { + "epoch": 2.56, + "learning_rate": 1.078788667812436e-06, + "loss": 0.1334, + "step": 3546 + }, + { + "epoch": 2.57, + "learning_rate": 1.0752615231095086e-06, + "loss": 0.1265, + "step": 3547 + }, + { + "epoch": 2.57, + "learning_rate": 1.0717398262808466e-06, + "loss": 0.1186, + "step": 3548 + }, + { + "epoch": 2.57, + "learning_rate": 1.068223579476183e-06, + "loss": 0.1315, + "step": 3549 + }, + { + "epoch": 2.57, + "learning_rate": 1.0647127848419136e-06, + "loss": 0.1259, + "step": 3550 + }, + { + "epoch": 2.57, + "learning_rate": 1.0612074445211173e-06, + "loss": 0.1285, + "step": 3551 + }, + { + "epoch": 2.57, + "learning_rate": 1.0577075606535314e-06, + "loss": 0.1176, + "step": 3552 + }, + { + "epoch": 2.57, + "learning_rate": 1.0542131353755758e-06, + "loss": 0.1114, + "step": 3553 + }, + { + "epoch": 2.57, + "learning_rate": 1.0507241708203253e-06, + "loss": 0.1201, + "step": 3554 + }, + { + "epoch": 2.57, + "learning_rate": 1.047240669117533e-06, + "loss": 0.1222, + "step": 3555 + }, + { + "epoch": 2.57, + "learning_rate": 1.0437626323936067e-06, + "loss": 0.1237, + "step": 3556 + }, + { + "epoch": 2.57, + "learning_rate": 1.0402900627716273e-06, + "loss": 0.1252, + "step": 3557 + }, + { + "epoch": 2.57, + "learning_rate": 1.036822962371331e-06, + "loss": 0.1189, + "step": 3558 + }, + { + "epoch": 2.57, + "learning_rate": 1.033361333309123e-06, + "loss": 0.1276, + "step": 3559 + }, + { + "epoch": 2.58, + "learning_rate": 1.0299051776980607e-06, + "loss": 0.1214, + "step": 3560 + }, + { + "epoch": 2.58, + "learning_rate": 1.0264544976478684e-06, + "loss": 0.1246, + "step": 3561 + }, + { + "epoch": 2.58, + "learning_rate": 1.0230092952649196e-06, + "loss": 0.1156, + "step": 3562 + }, + { + "epoch": 2.58, + "learning_rate": 1.0195695726522525e-06, + "loss": 0.1292, + "step": 3563 + }, + { + "epoch": 2.58, + "learning_rate": 1.0161353319095523e-06, + "loss": 0.1228, + "step": 3564 + }, + { + "epoch": 2.58, + "learning_rate": 1.0127065751331645e-06, + "loss": 0.1312, + "step": 3565 + }, + { + "epoch": 2.58, + "learning_rate": 1.0092833044160844e-06, + "loss": 0.1159, + "step": 3566 + }, + { + "epoch": 2.58, + "learning_rate": 1.0058655218479564e-06, + "loss": 0.125, + "step": 3567 + }, + { + "epoch": 2.58, + "learning_rate": 1.00245322951508e-06, + "loss": 0.122, + "step": 3568 + }, + { + "epoch": 2.58, + "learning_rate": 9.990464295003966e-07, + "loss": 0.124, + "step": 3569 + }, + { + "epoch": 2.58, + "learning_rate": 9.95645123883503e-07, + "loss": 0.1173, + "step": 3570 + }, + { + "epoch": 2.58, + "learning_rate": 9.92249314740632e-07, + "loss": 0.1307, + "step": 3571 + }, + { + "epoch": 2.58, + "learning_rate": 9.888590041446733e-07, + "loss": 0.1217, + "step": 3572 + }, + { + "epoch": 2.58, + "learning_rate": 9.85474194165148e-07, + "loss": 0.1278, + "step": 3573 + }, + { + "epoch": 2.59, + "learning_rate": 9.820948868682278e-07, + "loss": 0.1244, + "step": 3574 + }, + { + "epoch": 2.59, + "learning_rate": 9.787210843167226e-07, + "loss": 0.1288, + "step": 3575 + }, + { + "epoch": 2.59, + "learning_rate": 9.753527885700852e-07, + "loss": 0.1248, + "step": 3576 + }, + { + "epoch": 2.59, + "learning_rate": 9.719900016843986e-07, + "loss": 0.1267, + "step": 3577 + }, + { + "epoch": 2.59, + "learning_rate": 9.68632725712394e-07, + "loss": 0.121, + "step": 3578 + }, + { + "epoch": 2.59, + "learning_rate": 9.652809627034277e-07, + "loss": 0.1316, + "step": 3579 + }, + { + "epoch": 2.59, + "learning_rate": 9.619347147035007e-07, + "loss": 0.1207, + "step": 3580 + }, + { + "epoch": 2.59, + "learning_rate": 9.585939837552405e-07, + "loss": 0.1223, + "step": 3581 + }, + { + "epoch": 2.59, + "learning_rate": 9.55258771897911e-07, + "loss": 0.1345, + "step": 3582 + }, + { + "epoch": 2.59, + "learning_rate": 9.519290811674021e-07, + "loss": 0.1281, + "step": 3583 + }, + { + "epoch": 2.59, + "learning_rate": 9.486049135962417e-07, + "loss": 0.1279, + "step": 3584 + }, + { + "epoch": 2.59, + "learning_rate": 9.452862712135769e-07, + "loss": 0.12, + "step": 3585 + }, + { + "epoch": 2.59, + "learning_rate": 9.419731560451906e-07, + "loss": 0.1234, + "step": 3586 + }, + { + "epoch": 2.59, + "learning_rate": 9.386655701134839e-07, + "loss": 0.1254, + "step": 3587 + }, + { + "epoch": 2.6, + "learning_rate": 9.353635154374896e-07, + "loss": 0.1179, + "step": 3588 + }, + { + "epoch": 2.6, + "learning_rate": 9.320669940328575e-07, + "loss": 0.1362, + "step": 3589 + }, + { + "epoch": 2.6, + "learning_rate": 9.287760079118679e-07, + "loss": 0.1215, + "step": 3590 + }, + { + "epoch": 2.6, + "learning_rate": 9.254905590834129e-07, + "loss": 0.1353, + "step": 3591 + }, + { + "epoch": 2.6, + "learning_rate": 9.222106495530136e-07, + "loss": 0.1217, + "step": 3592 + }, + { + "epoch": 2.6, + "learning_rate": 9.18936281322802e-07, + "loss": 0.1248, + "step": 3593 + }, + { + "epoch": 2.6, + "learning_rate": 9.156674563915324e-07, + "loss": 0.1244, + "step": 3594 + }, + { + "epoch": 2.6, + "learning_rate": 9.124041767545733e-07, + "loss": 0.1204, + "step": 3595 + }, + { + "epoch": 2.6, + "learning_rate": 9.0914644440391e-07, + "loss": 0.1266, + "step": 3596 + }, + { + "epoch": 2.6, + "learning_rate": 9.058942613281385e-07, + "loss": 0.1251, + "step": 3597 + }, + { + "epoch": 2.6, + "learning_rate": 9.026476295124698e-07, + "loss": 0.131, + "step": 3598 + }, + { + "epoch": 2.6, + "learning_rate": 8.994065509387285e-07, + "loss": 0.1239, + "step": 3599 + }, + { + "epoch": 2.6, + "learning_rate": 8.961710275853419e-07, + "loss": 0.1228, + "step": 3600 + }, + { + "epoch": 2.6, + "learning_rate": 8.929410614273559e-07, + "loss": 0.114, + "step": 3601 + }, + { + "epoch": 2.61, + "learning_rate": 8.897166544364144e-07, + "loss": 0.1281, + "step": 3602 + }, + { + "epoch": 2.61, + "learning_rate": 8.864978085807785e-07, + "loss": 0.1273, + "step": 3603 + }, + { + "epoch": 2.61, + "learning_rate": 8.832845258253031e-07, + "loss": 0.1232, + "step": 3604 + }, + { + "epoch": 2.61, + "learning_rate": 8.800768081314548e-07, + "loss": 0.1199, + "step": 3605 + }, + { + "epoch": 2.61, + "learning_rate": 8.76874657457305e-07, + "loss": 0.1279, + "step": 3606 + }, + { + "epoch": 2.61, + "learning_rate": 8.73678075757517e-07, + "loss": 0.1202, + "step": 3607 + }, + { + "epoch": 2.61, + "learning_rate": 8.704870649833652e-07, + "loss": 0.12, + "step": 3608 + }, + { + "epoch": 2.61, + "learning_rate": 8.673016270827195e-07, + "loss": 0.1302, + "step": 3609 + }, + { + "epoch": 2.61, + "learning_rate": 8.641217640000443e-07, + "loss": 0.1198, + "step": 3610 + }, + { + "epoch": 2.61, + "learning_rate": 8.609474776764082e-07, + "loss": 0.1172, + "step": 3611 + }, + { + "epoch": 2.61, + "learning_rate": 8.577787700494677e-07, + "loss": 0.1231, + "step": 3612 + }, + { + "epoch": 2.61, + "learning_rate": 8.546156430534813e-07, + "loss": 0.1227, + "step": 3613 + }, + { + "epoch": 2.61, + "learning_rate": 8.514580986192933e-07, + "loss": 0.1332, + "step": 3614 + }, + { + "epoch": 2.61, + "learning_rate": 8.483061386743496e-07, + "loss": 0.1301, + "step": 3615 + }, + { + "epoch": 2.62, + "learning_rate": 8.45159765142678e-07, + "loss": 0.1278, + "step": 3616 + }, + { + "epoch": 2.62, + "learning_rate": 8.42018979944903e-07, + "loss": 0.1227, + "step": 3617 + }, + { + "epoch": 2.62, + "learning_rate": 8.388837849982323e-07, + "loss": 0.1234, + "step": 3618 + }, + { + "epoch": 2.62, + "learning_rate": 8.357541822164672e-07, + "loss": 0.1198, + "step": 3619 + }, + { + "epoch": 2.62, + "learning_rate": 8.326301735099895e-07, + "loss": 0.1214, + "step": 3620 + }, + { + "epoch": 2.62, + "learning_rate": 8.295117607857717e-07, + "loss": 0.1304, + "step": 3621 + }, + { + "epoch": 2.62, + "learning_rate": 8.263989459473654e-07, + "loss": 0.1272, + "step": 3622 + }, + { + "epoch": 2.62, + "learning_rate": 8.232917308949096e-07, + "loss": 0.1201, + "step": 3623 + }, + { + "epoch": 2.62, + "learning_rate": 8.201901175251193e-07, + "loss": 0.1199, + "step": 3624 + }, + { + "epoch": 2.62, + "learning_rate": 8.170941077312977e-07, + "loss": 0.1267, + "step": 3625 + }, + { + "epoch": 2.62, + "learning_rate": 8.140037034033199e-07, + "loss": 0.1167, + "step": 3626 + }, + { + "epoch": 2.62, + "learning_rate": 8.109189064276457e-07, + "loss": 0.1385, + "step": 3627 + }, + { + "epoch": 2.62, + "learning_rate": 8.078397186873066e-07, + "loss": 0.1261, + "step": 3628 + }, + { + "epoch": 2.62, + "learning_rate": 8.047661420619146e-07, + "loss": 0.1266, + "step": 3629 + }, + { + "epoch": 2.63, + "learning_rate": 8.016981784276534e-07, + "loss": 0.1305, + "step": 3630 + }, + { + "epoch": 2.63, + "learning_rate": 7.986358296572827e-07, + "loss": 0.1281, + "step": 3631 + }, + { + "epoch": 2.63, + "learning_rate": 7.955790976201305e-07, + "loss": 0.1222, + "step": 3632 + }, + { + "epoch": 2.63, + "learning_rate": 7.925279841821032e-07, + "loss": 0.1344, + "step": 3633 + }, + { + "epoch": 2.63, + "learning_rate": 7.894824912056698e-07, + "loss": 0.1228, + "step": 3634 + }, + { + "epoch": 2.63, + "learning_rate": 7.864426205498776e-07, + "loss": 0.1202, + "step": 3635 + }, + { + "epoch": 2.63, + "learning_rate": 7.834083740703313e-07, + "loss": 0.1259, + "step": 3636 + }, + { + "epoch": 2.63, + "learning_rate": 7.803797536192103e-07, + "loss": 0.1218, + "step": 3637 + }, + { + "epoch": 2.63, + "learning_rate": 7.773567610452559e-07, + "loss": 0.1248, + "step": 3638 + }, + { + "epoch": 2.63, + "learning_rate": 7.743393981937764e-07, + "loss": 0.1254, + "step": 3639 + }, + { + "epoch": 2.63, + "learning_rate": 7.713276669066405e-07, + "loss": 0.1218, + "step": 3640 + }, + { + "epoch": 2.63, + "learning_rate": 7.683215690222845e-07, + "loss": 0.1289, + "step": 3641 + }, + { + "epoch": 2.63, + "learning_rate": 7.653211063756982e-07, + "loss": 0.1278, + "step": 3642 + }, + { + "epoch": 2.64, + "learning_rate": 7.623262807984377e-07, + "loss": 0.1244, + "step": 3643 + }, + { + "epoch": 2.64, + "learning_rate": 7.593370941186195e-07, + "loss": 0.1238, + "step": 3644 + }, + { + "epoch": 2.64, + "learning_rate": 7.563535481609097e-07, + "loss": 0.1241, + "step": 3645 + }, + { + "epoch": 2.64, + "learning_rate": 7.533756447465401e-07, + "loss": 0.1203, + "step": 3646 + }, + { + "epoch": 2.64, + "learning_rate": 7.504033856932924e-07, + "loss": 0.1244, + "step": 3647 + }, + { + "epoch": 2.64, + "learning_rate": 7.474367728155063e-07, + "loss": 0.1195, + "step": 3648 + }, + { + "epoch": 2.64, + "learning_rate": 7.444758079240699e-07, + "loss": 0.1216, + "step": 3649 + }, + { + "epoch": 2.64, + "learning_rate": 7.415204928264319e-07, + "loss": 0.1324, + "step": 3650 + }, + { + "epoch": 2.64, + "learning_rate": 7.385708293265837e-07, + "loss": 0.1219, + "step": 3651 + }, + { + "epoch": 2.64, + "learning_rate": 7.356268192250726e-07, + "loss": 0.1327, + "step": 3652 + }, + { + "epoch": 2.64, + "learning_rate": 7.326884643189913e-07, + "loss": 0.1269, + "step": 3653 + }, + { + "epoch": 2.64, + "learning_rate": 7.297557664019849e-07, + "loss": 0.1138, + "step": 3654 + }, + { + "epoch": 2.64, + "learning_rate": 7.268287272642393e-07, + "loss": 0.1358, + "step": 3655 + }, + { + "epoch": 2.64, + "learning_rate": 7.239073486924919e-07, + "loss": 0.1254, + "step": 3656 + }, + { + "epoch": 2.65, + "learning_rate": 7.209916324700206e-07, + "loss": 0.1253, + "step": 3657 + }, + { + "epoch": 2.65, + "learning_rate": 7.180815803766494e-07, + "loss": 0.1262, + "step": 3658 + }, + { + "epoch": 2.65, + "learning_rate": 7.15177194188742e-07, + "loss": 0.1277, + "step": 3659 + }, + { + "epoch": 2.65, + "learning_rate": 7.122784756792079e-07, + "loss": 0.1236, + "step": 3660 + }, + { + "epoch": 2.65, + "learning_rate": 7.093854266174916e-07, + "loss": 0.1209, + "step": 3661 + }, + { + "epoch": 2.65, + "learning_rate": 7.064980487695816e-07, + "loss": 0.1228, + "step": 3662 + }, + { + "epoch": 2.65, + "learning_rate": 7.036163438980015e-07, + "loss": 0.1242, + "step": 3663 + }, + { + "epoch": 2.65, + "learning_rate": 7.007403137618163e-07, + "loss": 0.1249, + "step": 3664 + }, + { + "epoch": 2.65, + "learning_rate": 6.978699601166183e-07, + "loss": 0.1318, + "step": 3665 + }, + { + "epoch": 2.65, + "learning_rate": 6.950052847145449e-07, + "loss": 0.1272, + "step": 3666 + }, + { + "epoch": 2.65, + "learning_rate": 6.921462893042586e-07, + "loss": 0.1247, + "step": 3667 + }, + { + "epoch": 2.65, + "learning_rate": 6.892929756309629e-07, + "loss": 0.1209, + "step": 3668 + }, + { + "epoch": 2.65, + "learning_rate": 6.864453454363839e-07, + "loss": 0.1148, + "step": 3669 + }, + { + "epoch": 2.65, + "learning_rate": 6.836034004587888e-07, + "loss": 0.1101, + "step": 3670 + }, + { + "epoch": 2.66, + "learning_rate": 6.807671424329643e-07, + "loss": 0.1309, + "step": 3671 + }, + { + "epoch": 2.66, + "learning_rate": 6.779365730902343e-07, + "loss": 0.1169, + "step": 3672 + }, + { + "epoch": 2.66, + "learning_rate": 6.751116941584423e-07, + "loss": 0.129, + "step": 3673 + }, + { + "epoch": 2.66, + "learning_rate": 6.722925073619646e-07, + "loss": 0.121, + "step": 3674 + }, + { + "epoch": 2.66, + "learning_rate": 6.69479014421699e-07, + "loss": 0.1261, + "step": 3675 + }, + { + "epoch": 2.66, + "learning_rate": 6.666712170550693e-07, + "loss": 0.1219, + "step": 3676 + }, + { + "epoch": 2.66, + "learning_rate": 6.638691169760247e-07, + "loss": 0.1237, + "step": 3677 + }, + { + "epoch": 2.66, + "learning_rate": 6.610727158950303e-07, + "loss": 0.1193, + "step": 3678 + }, + { + "epoch": 2.66, + "learning_rate": 6.582820155190795e-07, + "loss": 0.1256, + "step": 3679 + }, + { + "epoch": 2.66, + "learning_rate": 6.554970175516806e-07, + "loss": 0.1278, + "step": 3680 + }, + { + "epoch": 2.66, + "learning_rate": 6.52717723692865e-07, + "loss": 0.1307, + "step": 3681 + }, + { + "epoch": 2.66, + "learning_rate": 6.499441356391778e-07, + "loss": 0.1205, + "step": 3682 + }, + { + "epoch": 2.66, + "learning_rate": 6.471762550836868e-07, + "loss": 0.1366, + "step": 3683 + }, + { + "epoch": 2.66, + "learning_rate": 6.444140837159696e-07, + "loss": 0.1185, + "step": 3684 + }, + { + "epoch": 2.67, + "learning_rate": 6.416576232221239e-07, + "loss": 0.121, + "step": 3685 + }, + { + "epoch": 2.67, + "learning_rate": 6.389068752847571e-07, + "loss": 0.1267, + "step": 3686 + }, + { + "epoch": 2.67, + "learning_rate": 6.361618415829951e-07, + "loss": 0.1316, + "step": 3687 + }, + { + "epoch": 2.67, + "learning_rate": 6.334225237924685e-07, + "loss": 0.1259, + "step": 3688 + }, + { + "epoch": 2.67, + "learning_rate": 6.306889235853253e-07, + "loss": 0.1265, + "step": 3689 + }, + { + "epoch": 2.67, + "learning_rate": 6.279610426302185e-07, + "loss": 0.1185, + "step": 3690 + }, + { + "epoch": 2.67, + "learning_rate": 6.252388825923139e-07, + "loss": 0.1448, + "step": 3691 + }, + { + "epoch": 2.67, + "learning_rate": 6.225224451332801e-07, + "loss": 0.1106, + "step": 3692 + }, + { + "epoch": 2.67, + "learning_rate": 6.198117319113006e-07, + "loss": 0.1223, + "step": 3693 + }, + { + "epoch": 2.67, + "learning_rate": 6.171067445810553e-07, + "loss": 0.122, + "step": 3694 + }, + { + "epoch": 2.67, + "learning_rate": 6.144074847937376e-07, + "loss": 0.1326, + "step": 3695 + }, + { + "epoch": 2.67, + "learning_rate": 6.117139541970351e-07, + "loss": 0.1205, + "step": 3696 + }, + { + "epoch": 2.67, + "learning_rate": 6.090261544351472e-07, + "loss": 0.1229, + "step": 3697 + }, + { + "epoch": 2.67, + "learning_rate": 6.063440871487703e-07, + "loss": 0.1335, + "step": 3698 + }, + { + "epoch": 2.68, + "learning_rate": 6.036677539751024e-07, + "loss": 0.1248, + "step": 3699 + }, + { + "epoch": 2.68, + "learning_rate": 6.009971565478412e-07, + "loss": 0.133, + "step": 3700 + }, + { + "epoch": 2.68, + "learning_rate": 5.983322964971838e-07, + "loss": 0.1175, + "step": 3701 + }, + { + "epoch": 2.68, + "learning_rate": 5.956731754498246e-07, + "loss": 0.128, + "step": 3702 + }, + { + "epoch": 2.68, + "learning_rate": 5.930197950289551e-07, + "loss": 0.1199, + "step": 3703 + }, + { + "epoch": 2.68, + "learning_rate": 5.903721568542609e-07, + "loss": 0.1237, + "step": 3704 + }, + { + "epoch": 2.68, + "learning_rate": 5.877302625419256e-07, + "loss": 0.1181, + "step": 3705 + }, + { + "epoch": 2.68, + "learning_rate": 5.850941137046228e-07, + "loss": 0.1311, + "step": 3706 + }, + { + "epoch": 2.68, + "learning_rate": 5.824637119515242e-07, + "loss": 0.1263, + "step": 3707 + }, + { + "epoch": 2.68, + "learning_rate": 5.798390588882863e-07, + "loss": 0.1211, + "step": 3708 + }, + { + "epoch": 2.68, + "learning_rate": 5.772201561170631e-07, + "loss": 0.1307, + "step": 3709 + }, + { + "epoch": 2.68, + "learning_rate": 5.746070052364938e-07, + "loss": 0.1322, + "step": 3710 + }, + { + "epoch": 2.68, + "learning_rate": 5.719996078417078e-07, + "loss": 0.1164, + "step": 3711 + }, + { + "epoch": 2.68, + "learning_rate": 5.693979655243265e-07, + "loss": 0.1336, + "step": 3712 + }, + { + "epoch": 2.69, + "learning_rate": 5.668020798724505e-07, + "loss": 0.1263, + "step": 3713 + }, + { + "epoch": 2.69, + "learning_rate": 5.642119524706746e-07, + "loss": 0.1242, + "step": 3714 + }, + { + "epoch": 2.69, + "learning_rate": 5.616275849000707e-07, + "loss": 0.1299, + "step": 3715 + }, + { + "epoch": 2.69, + "learning_rate": 5.590489787382015e-07, + "loss": 0.1351, + "step": 3716 + }, + { + "epoch": 2.69, + "learning_rate": 5.564761355591075e-07, + "loss": 0.1264, + "step": 3717 + }, + { + "epoch": 2.69, + "learning_rate": 5.539090569333183e-07, + "loss": 0.1187, + "step": 3718 + }, + { + "epoch": 2.69, + "learning_rate": 5.513477444278347e-07, + "loss": 0.1304, + "step": 3719 + }, + { + "epoch": 2.69, + "learning_rate": 5.487921996061474e-07, + "loss": 0.1167, + "step": 3720 + }, + { + "epoch": 2.69, + "learning_rate": 5.462424240282183e-07, + "loss": 0.123, + "step": 3721 + }, + { + "epoch": 2.69, + "learning_rate": 5.436984192504957e-07, + "loss": 0.1174, + "step": 3722 + }, + { + "epoch": 2.69, + "learning_rate": 5.411601868258986e-07, + "loss": 0.1191, + "step": 3723 + }, + { + "epoch": 2.69, + "learning_rate": 5.386277283038277e-07, + "loss": 0.1238, + "step": 3724 + }, + { + "epoch": 2.69, + "learning_rate": 5.361010452301518e-07, + "loss": 0.122, + "step": 3725 + }, + { + "epoch": 2.7, + "learning_rate": 5.335801391472228e-07, + "loss": 0.1276, + "step": 3726 + }, + { + "epoch": 2.7, + "learning_rate": 5.310650115938598e-07, + "loss": 0.1265, + "step": 3727 + }, + { + "epoch": 2.7, + "learning_rate": 5.28555664105359e-07, + "loss": 0.1241, + "step": 3728 + }, + { + "epoch": 2.7, + "learning_rate": 5.260520982134831e-07, + "loss": 0.1208, + "step": 3729 + }, + { + "epoch": 2.7, + "learning_rate": 5.235543154464728e-07, + "loss": 0.1227, + "step": 3730 + }, + { + "epoch": 2.7, + "learning_rate": 5.210623173290308e-07, + "loss": 0.1129, + "step": 3731 + }, + { + "epoch": 2.7, + "learning_rate": 5.185761053823357e-07, + "loss": 0.133, + "step": 3732 + }, + { + "epoch": 2.7, + "learning_rate": 5.160956811240269e-07, + "loss": 0.1274, + "step": 3733 + }, + { + "epoch": 2.7, + "learning_rate": 5.136210460682201e-07, + "loss": 0.1257, + "step": 3734 + }, + { + "epoch": 2.7, + "learning_rate": 5.111522017254866e-07, + "loss": 0.1207, + "step": 3735 + }, + { + "epoch": 2.7, + "learning_rate": 5.086891496028723e-07, + "loss": 0.1249, + "step": 3736 + }, + { + "epoch": 2.7, + "learning_rate": 5.0623189120388e-07, + "loss": 0.1207, + "step": 3737 + }, + { + "epoch": 2.7, + "learning_rate": 5.037804280284808e-07, + "loss": 0.1239, + "step": 3738 + }, + { + "epoch": 2.7, + "learning_rate": 5.013347615731068e-07, + "loss": 0.1272, + "step": 3739 + }, + { + "epoch": 2.71, + "learning_rate": 4.988948933306515e-07, + "loss": 0.1242, + "step": 3740 + }, + { + "epoch": 2.71, + "learning_rate": 4.964608247904667e-07, + "loss": 0.1255, + "step": 3741 + }, + { + "epoch": 2.71, + "learning_rate": 4.940325574383697e-07, + "loss": 0.1143, + "step": 3742 + }, + { + "epoch": 2.71, + "learning_rate": 4.916100927566292e-07, + "loss": 0.1231, + "step": 3743 + }, + { + "epoch": 2.71, + "learning_rate": 4.891934322239788e-07, + "loss": 0.1315, + "step": 3744 + }, + { + "epoch": 2.71, + "learning_rate": 4.867825773156065e-07, + "loss": 0.1223, + "step": 3745 + }, + { + "epoch": 2.71, + "learning_rate": 4.843775295031516e-07, + "loss": 0.1183, + "step": 3746 + }, + { + "epoch": 2.71, + "learning_rate": 4.819782902547187e-07, + "loss": 0.123, + "step": 3747 + }, + { + "epoch": 2.71, + "learning_rate": 4.795848610348553e-07, + "loss": 0.1249, + "step": 3748 + }, + { + "epoch": 2.71, + "learning_rate": 4.771972433045735e-07, + "loss": 0.1288, + "step": 3749 + }, + { + "epoch": 2.71, + "learning_rate": 4.7481543852132663e-07, + "loss": 0.1201, + "step": 3750 + }, + { + "epoch": 2.71, + "learning_rate": 4.724394481390293e-07, + "loss": 0.1306, + "step": 3751 + }, + { + "epoch": 2.71, + "learning_rate": 4.7006927360804253e-07, + "loss": 0.1188, + "step": 3752 + }, + { + "epoch": 2.71, + "learning_rate": 4.677049163751768e-07, + "loss": 0.1187, + "step": 3753 + }, + { + "epoch": 2.72, + "learning_rate": 4.6534637788369196e-07, + "loss": 0.1169, + "step": 3754 + }, + { + "epoch": 2.72, + "learning_rate": 4.6299365957329866e-07, + "loss": 0.1268, + "step": 3755 + }, + { + "epoch": 2.72, + "learning_rate": 4.606467628801503e-07, + "loss": 0.134, + "step": 3756 + }, + { + "epoch": 2.72, + "learning_rate": 4.583056892368509e-07, + "loss": 0.1259, + "step": 3757 + }, + { + "epoch": 2.72, + "learning_rate": 4.5597044007244517e-07, + "loss": 0.1254, + "step": 3758 + }, + { + "epoch": 2.72, + "learning_rate": 4.5364101681242833e-07, + "loss": 0.1266, + "step": 3759 + }, + { + "epoch": 2.72, + "learning_rate": 4.51317420878733e-07, + "loss": 0.1301, + "step": 3760 + }, + { + "epoch": 2.72, + "learning_rate": 4.4899965368974006e-07, + "loss": 0.1197, + "step": 3761 + }, + { + "epoch": 2.72, + "learning_rate": 4.46687716660269e-07, + "loss": 0.1221, + "step": 3762 + }, + { + "epoch": 2.72, + "learning_rate": 4.443816112015831e-07, + "loss": 0.1258, + "step": 3763 + }, + { + "epoch": 2.72, + "learning_rate": 4.4208133872138184e-07, + "loss": 0.1234, + "step": 3764 + }, + { + "epoch": 2.72, + "learning_rate": 4.397869006238098e-07, + "loss": 0.125, + "step": 3765 + }, + { + "epoch": 2.72, + "learning_rate": 4.3749829830944444e-07, + "loss": 0.122, + "step": 3766 + }, + { + "epoch": 2.72, + "learning_rate": 4.3521553317530495e-07, + "loss": 0.1197, + "step": 3767 + }, + { + "epoch": 2.73, + "learning_rate": 4.329386066148444e-07, + "loss": 0.1239, + "step": 3768 + }, + { + "epoch": 2.73, + "learning_rate": 4.3066752001795554e-07, + "loss": 0.1323, + "step": 3769 + }, + { + "epoch": 2.73, + "learning_rate": 4.2840227477096154e-07, + "loss": 0.1192, + "step": 3770 + }, + { + "epoch": 2.73, + "learning_rate": 4.261428722566241e-07, + "loss": 0.125, + "step": 3771 + }, + { + "epoch": 2.73, + "learning_rate": 4.2388931385413555e-07, + "loss": 0.1219, + "step": 3772 + }, + { + "epoch": 2.73, + "learning_rate": 4.216416009391244e-07, + "loss": 0.1161, + "step": 3773 + }, + { + "epoch": 2.73, + "learning_rate": 4.1939973488364536e-07, + "loss": 0.1223, + "step": 3774 + }, + { + "epoch": 2.73, + "learning_rate": 4.171637170561893e-07, + "loss": 0.1273, + "step": 3775 + }, + { + "epoch": 2.73, + "learning_rate": 4.1493354882167323e-07, + "loss": 0.1214, + "step": 3776 + }, + { + "epoch": 2.73, + "learning_rate": 4.1270923154144713e-07, + "loss": 0.1263, + "step": 3777 + }, + { + "epoch": 2.73, + "learning_rate": 4.10490766573286e-07, + "loss": 0.1326, + "step": 3778 + }, + { + "epoch": 2.73, + "learning_rate": 4.082781552713955e-07, + "loss": 0.1225, + "step": 3779 + }, + { + "epoch": 2.73, + "learning_rate": 4.0607139898640515e-07, + "loss": 0.1273, + "step": 3780 + }, + { + "epoch": 2.73, + "learning_rate": 4.0387049906537415e-07, + "loss": 0.1229, + "step": 3781 + }, + { + "epoch": 2.74, + "learning_rate": 4.016754568517822e-07, + "loss": 0.122, + "step": 3782 + }, + { + "epoch": 2.74, + "learning_rate": 3.994862736855376e-07, + "loss": 0.1331, + "step": 3783 + }, + { + "epoch": 2.74, + "learning_rate": 3.97302950902968e-07, + "loss": 0.126, + "step": 3784 + }, + { + "epoch": 2.74, + "learning_rate": 3.9512548983682844e-07, + "loss": 0.1242, + "step": 3785 + }, + { + "epoch": 2.74, + "learning_rate": 3.9295389181629006e-07, + "loss": 0.138, + "step": 3786 + }, + { + "epoch": 2.74, + "learning_rate": 3.907881581669526e-07, + "loss": 0.1221, + "step": 3787 + }, + { + "epoch": 2.74, + "learning_rate": 3.886282902108274e-07, + "loss": 0.1267, + "step": 3788 + }, + { + "epoch": 2.74, + "learning_rate": 3.86474289266352e-07, + "loss": 0.1261, + "step": 3789 + }, + { + "epoch": 2.74, + "learning_rate": 3.8432615664838136e-07, + "loss": 0.1161, + "step": 3790 + }, + { + "epoch": 2.74, + "learning_rate": 3.821838936681854e-07, + "loss": 0.1287, + "step": 3791 + }, + { + "epoch": 2.74, + "learning_rate": 3.800475016334526e-07, + "loss": 0.1259, + "step": 3792 + }, + { + "epoch": 2.74, + "learning_rate": 3.779169818482886e-07, + "loss": 0.1212, + "step": 3793 + }, + { + "epoch": 2.74, + "learning_rate": 3.7579233561321427e-07, + "loss": 0.1171, + "step": 3794 + }, + { + "epoch": 2.75, + "learning_rate": 3.736735642251621e-07, + "loss": 0.1355, + "step": 3795 + }, + { + "epoch": 2.75, + "learning_rate": 3.7156066897748424e-07, + "loss": 0.1189, + "step": 3796 + }, + { + "epoch": 2.75, + "learning_rate": 3.694536511599389e-07, + "loss": 0.127, + "step": 3797 + }, + { + "epoch": 2.75, + "learning_rate": 3.673525120587051e-07, + "loss": 0.1346, + "step": 3798 + }, + { + "epoch": 2.75, + "learning_rate": 3.652572529563636e-07, + "loss": 0.1226, + "step": 3799 + }, + { + "epoch": 2.75, + "learning_rate": 3.6316787513191454e-07, + "loss": 0.1248, + "step": 3800 + }, + { + "epoch": 2.75, + "learning_rate": 3.610843798607611e-07, + "loss": 0.1095, + "step": 3801 + }, + { + "epoch": 2.75, + "learning_rate": 3.5900676841472137e-07, + "loss": 0.1168, + "step": 3802 + }, + { + "epoch": 2.75, + "learning_rate": 3.5693504206201656e-07, + "loss": 0.1233, + "step": 3803 + }, + { + "epoch": 2.75, + "learning_rate": 3.548692020672817e-07, + "loss": 0.1338, + "step": 3804 + }, + { + "epoch": 2.75, + "learning_rate": 3.528092496915514e-07, + "loss": 0.1308, + "step": 3805 + }, + { + "epoch": 2.75, + "learning_rate": 3.50755186192272e-07, + "loss": 0.1153, + "step": 3806 + }, + { + "epoch": 2.75, + "learning_rate": 3.487070128232917e-07, + "loss": 0.1237, + "step": 3807 + }, + { + "epoch": 2.75, + "learning_rate": 3.46664730834867e-07, + "loss": 0.1157, + "step": 3808 + }, + { + "epoch": 2.76, + "learning_rate": 3.4462834147365177e-07, + "loss": 0.126, + "step": 3809 + }, + { + "epoch": 2.76, + "learning_rate": 3.425978459827117e-07, + "loss": 0.1245, + "step": 3810 + }, + { + "epoch": 2.76, + "learning_rate": 3.405732456015054e-07, + "loss": 0.1183, + "step": 3811 + }, + { + "epoch": 2.76, + "learning_rate": 3.3855454156590195e-07, + "loss": 0.1157, + "step": 3812 + }, + { + "epoch": 2.76, + "learning_rate": 3.365417351081635e-07, + "loss": 0.1285, + "step": 3813 + }, + { + "epoch": 2.76, + "learning_rate": 3.3453482745695596e-07, + "loss": 0.1249, + "step": 3814 + }, + { + "epoch": 2.76, + "learning_rate": 3.32533819837344e-07, + "loss": 0.115, + "step": 3815 + }, + { + "epoch": 2.76, + "learning_rate": 3.305387134707916e-07, + "loss": 0.1344, + "step": 3816 + }, + { + "epoch": 2.76, + "learning_rate": 3.2854950957515785e-07, + "loss": 0.1277, + "step": 3817 + }, + { + "epoch": 2.76, + "learning_rate": 3.2656620936470153e-07, + "loss": 0.1223, + "step": 3818 + }, + { + "epoch": 2.76, + "learning_rate": 3.245888140500764e-07, + "loss": 0.1287, + "step": 3819 + }, + { + "epoch": 2.76, + "learning_rate": 3.2261732483833243e-07, + "loss": 0.1201, + "step": 3820 + }, + { + "epoch": 2.76, + "learning_rate": 3.2065174293291147e-07, + "loss": 0.1286, + "step": 3821 + }, + { + "epoch": 2.76, + "learning_rate": 3.1869206953365484e-07, + "loss": 0.113, + "step": 3822 + }, + { + "epoch": 2.77, + "learning_rate": 3.1673830583679344e-07, + "loss": 0.125, + "step": 3823 + }, + { + "epoch": 2.77, + "learning_rate": 3.1479045303494993e-07, + "loss": 0.1247, + "step": 3824 + }, + { + "epoch": 2.77, + "learning_rate": 3.128485123171432e-07, + "loss": 0.1354, + "step": 3825 + }, + { + "epoch": 2.77, + "learning_rate": 3.109124848687772e-07, + "loss": 0.1276, + "step": 3826 + }, + { + "epoch": 2.77, + "learning_rate": 3.0898237187165225e-07, + "loss": 0.1256, + "step": 3827 + }, + { + "epoch": 2.77, + "learning_rate": 3.0705817450395357e-07, + "loss": 0.1201, + "step": 3828 + }, + { + "epoch": 2.77, + "learning_rate": 3.051398939402583e-07, + "loss": 0.122, + "step": 3829 + }, + { + "epoch": 2.77, + "learning_rate": 3.0322753135152983e-07, + "loss": 0.1265, + "step": 3830 + }, + { + "epoch": 2.77, + "learning_rate": 3.0132108790512207e-07, + "loss": 0.1325, + "step": 3831 + }, + { + "epoch": 2.77, + "learning_rate": 2.9942056476477186e-07, + "loss": 0.1321, + "step": 3832 + }, + { + "epoch": 2.77, + "learning_rate": 2.9752596309060575e-07, + "loss": 0.123, + "step": 3833 + }, + { + "epoch": 2.77, + "learning_rate": 2.9563728403913193e-07, + "loss": 0.1254, + "step": 3834 + }, + { + "epoch": 2.77, + "learning_rate": 2.9375452876324816e-07, + "loss": 0.1175, + "step": 3835 + }, + { + "epoch": 2.77, + "learning_rate": 2.9187769841223066e-07, + "loss": 0.1204, + "step": 3836 + }, + { + "epoch": 2.78, + "learning_rate": 2.9000679413174306e-07, + "loss": 0.1349, + "step": 3837 + }, + { + "epoch": 2.78, + "learning_rate": 2.881418170638284e-07, + "loss": 0.1141, + "step": 3838 + }, + { + "epoch": 2.78, + "learning_rate": 2.8628276834691714e-07, + "loss": 0.1275, + "step": 3839 + }, + { + "epoch": 2.78, + "learning_rate": 2.844296491158127e-07, + "loss": 0.1322, + "step": 3840 + }, + { + "epoch": 2.78, + "learning_rate": 2.82582460501708e-07, + "loss": 0.118, + "step": 3841 + }, + { + "epoch": 2.78, + "learning_rate": 2.807412036321677e-07, + "loss": 0.1236, + "step": 3842 + }, + { + "epoch": 2.78, + "learning_rate": 2.789058796311406e-07, + "loss": 0.1306, + "step": 3843 + }, + { + "epoch": 2.78, + "learning_rate": 2.770764896189515e-07, + "loss": 0.122, + "step": 3844 + }, + { + "epoch": 2.78, + "learning_rate": 2.7525303471230614e-07, + "loss": 0.1251, + "step": 3845 + }, + { + "epoch": 2.78, + "learning_rate": 2.7343551602428187e-07, + "loss": 0.1276, + "step": 3846 + }, + { + "epoch": 2.78, + "learning_rate": 2.7162393466433903e-07, + "loss": 0.1218, + "step": 3847 + }, + { + "epoch": 2.78, + "learning_rate": 2.6981829173830744e-07, + "loss": 0.1179, + "step": 3848 + }, + { + "epoch": 2.78, + "learning_rate": 2.6801858834839656e-07, + "loss": 0.1274, + "step": 3849 + }, + { + "epoch": 2.78, + "learning_rate": 2.6622482559318765e-07, + "loss": 0.115, + "step": 3850 + }, + { + "epoch": 2.79, + "learning_rate": 2.64437004567637e-07, + "loss": 0.1195, + "step": 3851 + }, + { + "epoch": 2.79, + "learning_rate": 2.626551263630728e-07, + "loss": 0.13, + "step": 3852 + }, + { + "epoch": 2.79, + "learning_rate": 2.6087919206719716e-07, + "loss": 0.1224, + "step": 3853 + }, + { + "epoch": 2.79, + "learning_rate": 2.591092027640818e-07, + "loss": 0.1296, + "step": 3854 + }, + { + "epoch": 2.79, + "learning_rate": 2.573451595341714e-07, + "loss": 0.1208, + "step": 3855 + }, + { + "epoch": 2.79, + "learning_rate": 2.555870634542801e-07, + "loss": 0.1179, + "step": 3856 + }, + { + "epoch": 2.79, + "learning_rate": 2.538349155975917e-07, + "loss": 0.124, + "step": 3857 + }, + { + "epoch": 2.79, + "learning_rate": 2.5208871703365943e-07, + "loss": 0.1178, + "step": 3858 + }, + { + "epoch": 2.79, + "learning_rate": 2.503484688284041e-07, + "loss": 0.1225, + "step": 3859 + }, + { + "epoch": 2.79, + "learning_rate": 2.4861417204411707e-07, + "loss": 0.1246, + "step": 3860 + }, + { + "epoch": 2.79, + "learning_rate": 2.4688582773945146e-07, + "loss": 0.1373, + "step": 3861 + }, + { + "epoch": 2.79, + "learning_rate": 2.4516343696943226e-07, + "loss": 0.143, + "step": 3862 + }, + { + "epoch": 2.79, + "learning_rate": 2.4344700078544724e-07, + "loss": 0.1274, + "step": 3863 + }, + { + "epoch": 2.79, + "learning_rate": 2.417365202352506e-07, + "loss": 0.1321, + "step": 3864 + }, + { + "epoch": 2.8, + "learning_rate": 2.400319963629605e-07, + "loss": 0.1179, + "step": 3865 + }, + { + "epoch": 2.8, + "learning_rate": 2.3833343020905897e-07, + "loss": 0.1312, + "step": 3866 + }, + { + "epoch": 2.8, + "learning_rate": 2.3664082281039115e-07, + "loss": 0.1104, + "step": 3867 + }, + { + "epoch": 2.8, + "learning_rate": 2.3495417520016606e-07, + "loss": 0.1167, + "step": 3868 + }, + { + "epoch": 2.8, + "learning_rate": 2.3327348840795348e-07, + "loss": 0.1303, + "step": 3869 + }, + { + "epoch": 2.8, + "learning_rate": 2.3159876345968613e-07, + "loss": 0.1306, + "step": 3870 + }, + { + "epoch": 2.8, + "learning_rate": 2.2993000137765287e-07, + "loss": 0.1198, + "step": 3871 + }, + { + "epoch": 2.8, + "learning_rate": 2.2826720318050998e-07, + "loss": 0.1235, + "step": 3872 + }, + { + "epoch": 2.8, + "learning_rate": 2.266103698832667e-07, + "loss": 0.1233, + "step": 3873 + }, + { + "epoch": 2.8, + "learning_rate": 2.2495950249729505e-07, + "loss": 0.1275, + "step": 3874 + }, + { + "epoch": 2.8, + "learning_rate": 2.233146020303234e-07, + "loss": 0.1221, + "step": 3875 + }, + { + "epoch": 2.8, + "learning_rate": 2.2167566948643972e-07, + "loss": 0.1255, + "step": 3876 + }, + { + "epoch": 2.8, + "learning_rate": 2.2004270586608478e-07, + "loss": 0.128, + "step": 3877 + }, + { + "epoch": 2.81, + "learning_rate": 2.184157121660624e-07, + "loss": 0.1156, + "step": 3878 + }, + { + "epoch": 2.81, + "learning_rate": 2.1679468937952475e-07, + "loss": 0.1196, + "step": 3879 + }, + { + "epoch": 2.81, + "learning_rate": 2.15179638495987e-07, + "loss": 0.1217, + "step": 3880 + }, + { + "epoch": 2.81, + "learning_rate": 2.1357056050131165e-07, + "loss": 0.1312, + "step": 3881 + }, + { + "epoch": 2.81, + "learning_rate": 2.1196745637772076e-07, + "loss": 0.1204, + "step": 3882 + }, + { + "epoch": 2.81, + "learning_rate": 2.103703271037849e-07, + "loss": 0.127, + "step": 3883 + }, + { + "epoch": 2.81, + "learning_rate": 2.0877917365443313e-07, + "loss": 0.1281, + "step": 3884 + }, + { + "epoch": 2.81, + "learning_rate": 2.0719399700094067e-07, + "loss": 0.1357, + "step": 3885 + }, + { + "epoch": 2.81, + "learning_rate": 2.056147981109402e-07, + "loss": 0.1198, + "step": 3886 + }, + { + "epoch": 2.81, + "learning_rate": 2.040415779484106e-07, + "loss": 0.1246, + "step": 3887 + }, + { + "epoch": 2.81, + "learning_rate": 2.0247433747368372e-07, + "loss": 0.1326, + "step": 3888 + }, + { + "epoch": 2.81, + "learning_rate": 2.009130776434409e-07, + "loss": 0.118, + "step": 3889 + }, + { + "epoch": 2.81, + "learning_rate": 1.993577994107121e-07, + "loss": 0.1264, + "step": 3890 + }, + { + "epoch": 2.81, + "learning_rate": 1.978085037248767e-07, + "loss": 0.1159, + "step": 3891 + }, + { + "epoch": 2.82, + "learning_rate": 1.9626519153166156e-07, + "loss": 0.1245, + "step": 3892 + }, + { + "epoch": 2.82, + "learning_rate": 1.9472786377314311e-07, + "loss": 0.1138, + "step": 3893 + }, + { + "epoch": 2.82, + "learning_rate": 1.931965213877407e-07, + "loss": 0.1203, + "step": 3894 + }, + { + "epoch": 2.82, + "learning_rate": 1.9167116531022435e-07, + "loss": 0.1089, + "step": 3895 + }, + { + "epoch": 2.82, + "learning_rate": 1.901517964717059e-07, + "loss": 0.1231, + "step": 3896 + }, + { + "epoch": 2.82, + "learning_rate": 1.886384157996457e-07, + "loss": 0.1226, + "step": 3897 + }, + { + "epoch": 2.82, + "learning_rate": 1.8713102421784924e-07, + "loss": 0.1204, + "step": 3898 + }, + { + "epoch": 2.82, + "learning_rate": 1.8562962264646156e-07, + "loss": 0.1209, + "step": 3899 + }, + { + "epoch": 2.82, + "learning_rate": 1.8413421200197512e-07, + "loss": 0.1206, + "step": 3900 + }, + { + "epoch": 2.82, + "learning_rate": 1.826447931972253e-07, + "loss": 0.1232, + "step": 3901 + }, + { + "epoch": 2.82, + "learning_rate": 1.811613671413881e-07, + "loss": 0.1266, + "step": 3902 + }, + { + "epoch": 2.82, + "learning_rate": 1.796839347399837e-07, + "loss": 0.1202, + "step": 3903 + }, + { + "epoch": 2.82, + "learning_rate": 1.7821249689487062e-07, + "loss": 0.1207, + "step": 3904 + }, + { + "epoch": 2.82, + "learning_rate": 1.7674705450425044e-07, + "loss": 0.124, + "step": 3905 + }, + { + "epoch": 2.83, + "learning_rate": 1.7528760846266423e-07, + "loss": 0.1278, + "step": 3906 + }, + { + "epoch": 2.83, + "learning_rate": 1.738341596609927e-07, + "loss": 0.1218, + "step": 3907 + }, + { + "epoch": 2.83, + "learning_rate": 1.7238670898645615e-07, + "loss": 0.1165, + "step": 3908 + }, + { + "epoch": 2.83, + "learning_rate": 1.7094525732261336e-07, + "loss": 0.1194, + "step": 3909 + }, + { + "epoch": 2.83, + "learning_rate": 1.6950980554936047e-07, + "loss": 0.119, + "step": 3910 + }, + { + "epoch": 2.83, + "learning_rate": 1.6808035454293215e-07, + "loss": 0.1275, + "step": 3911 + }, + { + "epoch": 2.83, + "learning_rate": 1.6665690517589818e-07, + "loss": 0.119, + "step": 3912 + }, + { + "epoch": 2.83, + "learning_rate": 1.6523945831716792e-07, + "loss": 0.1307, + "step": 3913 + }, + { + "epoch": 2.83, + "learning_rate": 1.6382801483198374e-07, + "loss": 0.1208, + "step": 3914 + }, + { + "epoch": 2.83, + "learning_rate": 1.6242257558192642e-07, + "loss": 0.1325, + "step": 3915 + }, + { + "epoch": 2.83, + "learning_rate": 1.6102314142490638e-07, + "loss": 0.1246, + "step": 3916 + }, + { + "epoch": 2.83, + "learning_rate": 1.5962971321517583e-07, + "loss": 0.1194, + "step": 3917 + }, + { + "epoch": 2.83, + "learning_rate": 1.582422918033133e-07, + "loss": 0.1307, + "step": 3918 + }, + { + "epoch": 2.83, + "learning_rate": 1.5686087803623572e-07, + "loss": 0.1212, + "step": 3919 + }, + { + "epoch": 2.84, + "learning_rate": 1.5548547275719085e-07, + "loss": 0.1292, + "step": 3920 + }, + { + "epoch": 2.84, + "learning_rate": 1.5411607680575924e-07, + "loss": 0.1195, + "step": 3921 + }, + { + "epoch": 2.84, + "learning_rate": 1.527526910178523e-07, + "loss": 0.1254, + "step": 3922 + }, + { + "epoch": 2.84, + "learning_rate": 1.5139531622571424e-07, + "loss": 0.122, + "step": 3923 + }, + { + "epoch": 2.84, + "learning_rate": 1.5004395325792008e-07, + "loss": 0.1237, + "step": 3924 + }, + { + "epoch": 2.84, + "learning_rate": 1.4869860293937112e-07, + "loss": 0.1211, + "step": 3925 + }, + { + "epoch": 2.84, + "learning_rate": 1.473592660913037e-07, + "loss": 0.1219, + "step": 3926 + }, + { + "epoch": 2.84, + "learning_rate": 1.4602594353128164e-07, + "loss": 0.1256, + "step": 3927 + }, + { + "epoch": 2.84, + "learning_rate": 1.44698636073195e-07, + "loss": 0.1226, + "step": 3928 + }, + { + "epoch": 2.84, + "learning_rate": 1.433773445272657e-07, + "loss": 0.1326, + "step": 3929 + }, + { + "epoch": 2.84, + "learning_rate": 1.4206206970004076e-07, + "loss": 0.125, + "step": 3930 + }, + { + "epoch": 2.84, + "learning_rate": 1.4075281239439685e-07, + "loss": 0.1157, + "step": 3931 + }, + { + "epoch": 2.84, + "learning_rate": 1.394495734095347e-07, + "loss": 0.1219, + "step": 3932 + }, + { + "epoch": 2.84, + "learning_rate": 1.3815235354098233e-07, + "loss": 0.1222, + "step": 3933 + }, + { + "epoch": 2.85, + "learning_rate": 1.3686115358059415e-07, + "loss": 0.1183, + "step": 3934 + }, + { + "epoch": 2.85, + "learning_rate": 1.3557597431655078e-07, + "loss": 0.1292, + "step": 3935 + }, + { + "epoch": 2.85, + "learning_rate": 1.342968165333558e-07, + "loss": 0.1262, + "step": 3936 + }, + { + "epoch": 2.85, + "learning_rate": 1.3302368101183683e-07, + "loss": 0.1146, + "step": 3937 + }, + { + "epoch": 2.85, + "learning_rate": 1.3175656852914664e-07, + "loss": 0.1279, + "step": 3938 + }, + { + "epoch": 2.85, + "learning_rate": 1.304954798587621e-07, + "loss": 0.1241, + "step": 3939 + }, + { + "epoch": 2.85, + "learning_rate": 1.2924041577048185e-07, + "loss": 0.1351, + "step": 3940 + }, + { + "epoch": 2.85, + "learning_rate": 1.279913770304253e-07, + "loss": 0.1176, + "step": 3941 + }, + { + "epoch": 2.85, + "learning_rate": 1.26748364401037e-07, + "loss": 0.1244, + "step": 3942 + }, + { + "epoch": 2.85, + "learning_rate": 1.255113786410822e-07, + "loss": 0.1356, + "step": 3943 + }, + { + "epoch": 2.85, + "learning_rate": 1.242804205056458e-07, + "loss": 0.1357, + "step": 3944 + }, + { + "epoch": 2.85, + "learning_rate": 1.2305549074613342e-07, + "loss": 0.1264, + "step": 3945 + }, + { + "epoch": 2.85, + "learning_rate": 1.218365901102736e-07, + "loss": 0.1217, + "step": 3946 + }, + { + "epoch": 2.85, + "learning_rate": 1.2062371934211004e-07, + "loss": 0.1159, + "step": 3947 + }, + { + "epoch": 2.86, + "learning_rate": 1.194168791820105e-07, + "loss": 0.1217, + "step": 3948 + }, + { + "epoch": 2.86, + "learning_rate": 1.1821607036665794e-07, + "loss": 0.1152, + "step": 3949 + }, + { + "epoch": 2.86, + "learning_rate": 1.170212936290549e-07, + "loss": 0.1287, + "step": 3950 + }, + { + "epoch": 2.86, + "learning_rate": 1.1583254969852132e-07, + "loss": 0.1495, + "step": 3951 + }, + { + "epoch": 2.86, + "learning_rate": 1.1464983930069562e-07, + "loss": 0.1322, + "step": 3952 + }, + { + "epoch": 2.86, + "learning_rate": 1.1347316315753255e-07, + "loss": 0.1196, + "step": 3953 + }, + { + "epoch": 2.86, + "learning_rate": 1.1230252198730307e-07, + "loss": 0.1345, + "step": 3954 + }, + { + "epoch": 2.86, + "learning_rate": 1.1113791650459449e-07, + "loss": 0.1233, + "step": 3955 + }, + { + "epoch": 2.86, + "learning_rate": 1.0997934742031147e-07, + "loss": 0.1248, + "step": 3956 + }, + { + "epoch": 2.86, + "learning_rate": 1.088268154416694e-07, + "loss": 0.1302, + "step": 3957 + }, + { + "epoch": 2.86, + "learning_rate": 1.076803212722044e-07, + "loss": 0.1325, + "step": 3958 + }, + { + "epoch": 2.86, + "learning_rate": 1.0653986561176221e-07, + "loss": 0.1227, + "step": 3959 + }, + { + "epoch": 2.86, + "learning_rate": 1.0540544915650597e-07, + "loss": 0.1176, + "step": 3960 + }, + { + "epoch": 2.87, + "learning_rate": 1.0427707259890841e-07, + "loss": 0.1203, + "step": 3961 + }, + { + "epoch": 2.87, + "learning_rate": 1.0315473662775855e-07, + "loss": 0.1286, + "step": 3962 + }, + { + "epoch": 2.87, + "learning_rate": 1.0203844192815726e-07, + "loss": 0.1109, + "step": 3963 + }, + { + "epoch": 2.87, + "learning_rate": 1.0092818918151948e-07, + "loss": 0.1237, + "step": 3964 + }, + { + "epoch": 2.87, + "learning_rate": 9.98239790655664e-08, + "loss": 0.1211, + "step": 3965 + }, + { + "epoch": 2.87, + "learning_rate": 9.872581225433664e-08, + "loss": 0.1299, + "step": 3966 + }, + { + "epoch": 2.87, + "learning_rate": 9.76336894181773e-08, + "loss": 0.1271, + "step": 3967 + }, + { + "epoch": 2.87, + "learning_rate": 9.654761122374623e-08, + "loss": 0.1303, + "step": 3968 + }, + { + "epoch": 2.87, + "learning_rate": 9.546757833401088e-08, + "loss": 0.1371, + "step": 3969 + }, + { + "epoch": 2.87, + "learning_rate": 9.439359140825055e-08, + "loss": 0.1129, + "step": 3970 + }, + { + "epoch": 2.87, + "learning_rate": 9.332565110205193e-08, + "loss": 0.1274, + "step": 3971 + }, + { + "epoch": 2.87, + "learning_rate": 9.226375806731136e-08, + "loss": 0.1335, + "step": 3972 + }, + { + "epoch": 2.87, + "learning_rate": 9.120791295223364e-08, + "loss": 0.1226, + "step": 3973 + }, + { + "epoch": 2.87, + "learning_rate": 9.01581164013321e-08, + "loss": 0.1211, + "step": 3974 + }, + { + "epoch": 2.88, + "learning_rate": 8.911436905542747e-08, + "loss": 0.1189, + "step": 3975 + }, + { + "epoch": 2.88, + "learning_rate": 8.807667155164901e-08, + "loss": 0.1339, + "step": 3976 + }, + { + "epoch": 2.88, + "learning_rate": 8.704502452343111e-08, + "loss": 0.1277, + "step": 3977 + }, + { + "epoch": 2.88, + "learning_rate": 8.60194286005156e-08, + "loss": 0.1312, + "step": 3978 + }, + { + "epoch": 2.88, + "learning_rate": 8.49998844089528e-08, + "loss": 0.1293, + "step": 3979 + }, + { + "epoch": 2.88, + "learning_rate": 8.398639257109487e-08, + "loss": 0.1265, + "step": 3980 + }, + { + "epoch": 2.88, + "learning_rate": 8.29789537056025e-08, + "loss": 0.1226, + "step": 3981 + }, + { + "epoch": 2.88, + "learning_rate": 8.197756842743931e-08, + "loss": 0.1089, + "step": 3982 + }, + { + "epoch": 2.88, + "learning_rate": 8.098223734787635e-08, + "loss": 0.1292, + "step": 3983 + }, + { + "epoch": 2.88, + "learning_rate": 7.999296107448761e-08, + "loss": 0.1235, + "step": 3984 + }, + { + "epoch": 2.88, + "learning_rate": 7.900974021114893e-08, + "loss": 0.1299, + "step": 3985 + }, + { + "epoch": 2.88, + "learning_rate": 7.803257535804465e-08, + "loss": 0.1301, + "step": 3986 + }, + { + "epoch": 2.88, + "learning_rate": 7.706146711165873e-08, + "loss": 0.1292, + "step": 3987 + }, + { + "epoch": 2.88, + "learning_rate": 7.609641606477703e-08, + "loss": 0.1212, + "step": 3988 + }, + { + "epoch": 2.89, + "learning_rate": 7.513742280649272e-08, + "loss": 0.1133, + "step": 3989 + }, + { + "epoch": 2.89, + "learning_rate": 7.418448792219646e-08, + "loss": 0.1283, + "step": 3990 + }, + { + "epoch": 2.89, + "learning_rate": 7.323761199358404e-08, + "loss": 0.1316, + "step": 3991 + }, + { + "epoch": 2.89, + "learning_rate": 7.229679559864866e-08, + "loss": 0.1143, + "step": 3992 + }, + { + "epoch": 2.89, + "learning_rate": 7.136203931168983e-08, + "loss": 0.1313, + "step": 3993 + }, + { + "epoch": 2.89, + "learning_rate": 7.043334370330335e-08, + "loss": 0.1168, + "step": 3994 + }, + { + "epoch": 2.89, + "learning_rate": 6.951070934038795e-08, + "loss": 0.1249, + "step": 3995 + }, + { + "epoch": 2.89, + "learning_rate": 6.859413678614201e-08, + "loss": 0.1221, + "step": 3996 + }, + { + "epoch": 2.89, + "learning_rate": 6.768362660006356e-08, + "loss": 0.1197, + "step": 3997 + }, + { + "epoch": 2.89, + "learning_rate": 6.677917933794908e-08, + "loss": 0.1228, + "step": 3998 + }, + { + "epoch": 2.89, + "learning_rate": 6.588079555189586e-08, + "loss": 0.1205, + "step": 3999 + }, + { + "epoch": 2.89, + "learning_rate": 6.498847579029744e-08, + "loss": 0.1257, + "step": 4000 + }, + { + "epoch": 2.89, + "eval_loss": 0.12249945104122162, + "eval_runtime": 715.8846, + "eval_samples_per_second": 69.844, + "eval_steps_per_second": 2.183, + "step": 4000 + } + ], + "max_steps": 4146, + "num_train_epochs": 3, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +}