{ "best_metric": 0.7183188796043396, "best_model_checkpoint": "./test_ast\\checkpoint-1260", "epoch": 15.0, "eval_steps": 5, "global_step": 3915, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00019984674329501915, "loss": 3.2249, "step": 5 }, { "epoch": 0.02, "eval_accuracy": 0.09770114942528736, "eval_loss": 3.1166629791259766, "eval_runtime": 430.1734, "eval_samples_per_second": 0.404, "eval_steps_per_second": 0.051, "step": 5 }, { "epoch": 0.04, "learning_rate": 0.00019964240102171137, "loss": 3.3654, "step": 10 }, { "epoch": 0.04, "eval_accuracy": 0.16666666666666666, "eval_loss": 3.1070964336395264, "eval_runtime": 220.5651, "eval_samples_per_second": 0.789, "eval_steps_per_second": 0.1, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.00019938697318007664, "loss": 2.952, "step": 15 }, { "epoch": 0.06, "eval_accuracy": 0.28160919540229884, "eval_loss": 2.3668224811553955, "eval_runtime": 235.0668, "eval_samples_per_second": 0.74, "eval_steps_per_second": 0.094, "step": 15 }, { "epoch": 0.08, "learning_rate": 0.0001991315453384419, "loss": 2.6551, "step": 20 }, { "epoch": 0.08, "eval_accuracy": 0.22988505747126436, "eval_loss": 3.0940752029418945, "eval_runtime": 263.2242, "eval_samples_per_second": 0.661, "eval_steps_per_second": 0.084, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.00019887611749680716, "loss": 3.2285, "step": 25 }, { "epoch": 0.1, "eval_accuracy": 0.13793103448275862, "eval_loss": 2.4251976013183594, "eval_runtime": 240.2214, "eval_samples_per_second": 0.724, "eval_steps_per_second": 0.092, "step": 25 }, { "epoch": 0.11, "learning_rate": 0.00019862068965517243, "loss": 2.4251, "step": 30 }, { "epoch": 0.11, "eval_accuracy": 0.15517241379310345, "eval_loss": 2.3162126541137695, "eval_runtime": 271.4124, "eval_samples_per_second": 0.641, "eval_steps_per_second": 0.081, "step": 30 }, { "epoch": 0.13, "learning_rate": 0.0001983652618135377, "loss": 2.2216, "step": 35 }, { "epoch": 0.13, "eval_accuracy": 0.2413793103448276, "eval_loss": 2.2742679119110107, "eval_runtime": 292.5369, "eval_samples_per_second": 0.595, "eval_steps_per_second": 0.075, "step": 35 }, { "epoch": 0.15, "learning_rate": 0.00019810983397190295, "loss": 1.982, "step": 40 }, { "epoch": 0.15, "eval_accuracy": 0.25862068965517243, "eval_loss": 2.558276653289795, "eval_runtime": 282.7639, "eval_samples_per_second": 0.615, "eval_steps_per_second": 0.078, "step": 40 }, { "epoch": 0.17, "learning_rate": 0.0001978544061302682, "loss": 2.2904, "step": 45 }, { "epoch": 0.17, "eval_accuracy": 0.1896551724137931, "eval_loss": 2.2774674892425537, "eval_runtime": 272.0899, "eval_samples_per_second": 0.639, "eval_steps_per_second": 0.081, "step": 45 }, { "epoch": 0.19, "learning_rate": 0.00019759897828863348, "loss": 2.2136, "step": 50 }, { "epoch": 0.19, "eval_accuracy": 0.1724137931034483, "eval_loss": 2.264404535293579, "eval_runtime": 274.6774, "eval_samples_per_second": 0.633, "eval_steps_per_second": 0.08, "step": 50 }, { "epoch": 0.21, "learning_rate": 0.00019734355044699872, "loss": 1.9795, "step": 55 }, { "epoch": 0.21, "eval_accuracy": 0.27586206896551724, "eval_loss": 2.131664752960205, "eval_runtime": 289.0641, "eval_samples_per_second": 0.602, "eval_steps_per_second": 0.076, "step": 55 }, { "epoch": 0.23, "learning_rate": 0.000197088122605364, "loss": 2.1172, "step": 60 }, { "epoch": 0.23, "eval_accuracy": 0.2988505747126437, "eval_loss": 2.00719952583313, "eval_runtime": 237.5063, "eval_samples_per_second": 0.733, "eval_steps_per_second": 0.093, "step": 60 }, { "epoch": 0.25, "learning_rate": 0.00019683269476372924, "loss": 2.1899, "step": 65 }, { "epoch": 0.25, "eval_accuracy": 0.19540229885057472, "eval_loss": 1.9535428285598755, "eval_runtime": 127.0996, "eval_samples_per_second": 1.369, "eval_steps_per_second": 0.173, "step": 65 }, { "epoch": 0.27, "learning_rate": 0.00019657726692209453, "loss": 2.1116, "step": 70 }, { "epoch": 0.27, "eval_accuracy": 0.22988505747126436, "eval_loss": 1.9114965200424194, "eval_runtime": 123.0583, "eval_samples_per_second": 1.414, "eval_steps_per_second": 0.179, "step": 70 }, { "epoch": 0.29, "learning_rate": 0.00019632183908045977, "loss": 2.0904, "step": 75 }, { "epoch": 0.29, "eval_accuracy": 0.3160919540229885, "eval_loss": 1.9307175874710083, "eval_runtime": 110.3002, "eval_samples_per_second": 1.578, "eval_steps_per_second": 0.199, "step": 75 }, { "epoch": 0.31, "learning_rate": 0.00019606641123882503, "loss": 1.9124, "step": 80 }, { "epoch": 0.31, "eval_accuracy": 0.3390804597701149, "eval_loss": 1.8102548122406006, "eval_runtime": 111.5475, "eval_samples_per_second": 1.56, "eval_steps_per_second": 0.197, "step": 80 }, { "epoch": 0.33, "learning_rate": 0.0001958109833971903, "loss": 1.6039, "step": 85 }, { "epoch": 0.33, "eval_accuracy": 0.21839080459770116, "eval_loss": 1.9069994688034058, "eval_runtime": 109.692, "eval_samples_per_second": 1.586, "eval_steps_per_second": 0.201, "step": 85 }, { "epoch": 0.34, "learning_rate": 0.00019555555555555556, "loss": 1.5797, "step": 90 }, { "epoch": 0.34, "eval_accuracy": 0.3620689655172414, "eval_loss": 1.955863118171692, "eval_runtime": 110.8668, "eval_samples_per_second": 1.569, "eval_steps_per_second": 0.198, "step": 90 }, { "epoch": 0.36, "learning_rate": 0.00019530012771392082, "loss": 1.9217, "step": 95 }, { "epoch": 0.36, "eval_accuracy": 0.28735632183908044, "eval_loss": 1.7302300930023193, "eval_runtime": 109.9193, "eval_samples_per_second": 1.583, "eval_steps_per_second": 0.2, "step": 95 }, { "epoch": 0.38, "learning_rate": 0.00019504469987228609, "loss": 2.1192, "step": 100 }, { "epoch": 0.38, "eval_accuracy": 0.28735632183908044, "eval_loss": 2.121290445327759, "eval_runtime": 114.0902, "eval_samples_per_second": 1.525, "eval_steps_per_second": 0.193, "step": 100 }, { "epoch": 0.4, "learning_rate": 0.00019478927203065135, "loss": 1.991, "step": 105 }, { "epoch": 0.4, "eval_accuracy": 0.25287356321839083, "eval_loss": 1.8870444297790527, "eval_runtime": 111.5449, "eval_samples_per_second": 1.56, "eval_steps_per_second": 0.197, "step": 105 }, { "epoch": 0.42, "learning_rate": 0.0001945338441890166, "loss": 1.9855, "step": 110 }, { "epoch": 0.42, "eval_accuracy": 0.3448275862068966, "eval_loss": 1.853091835975647, "eval_runtime": 108.3618, "eval_samples_per_second": 1.606, "eval_steps_per_second": 0.203, "step": 110 }, { "epoch": 0.44, "learning_rate": 0.00019427841634738188, "loss": 1.6668, "step": 115 }, { "epoch": 0.44, "eval_accuracy": 0.3218390804597701, "eval_loss": 2.0078558921813965, "eval_runtime": 90.0246, "eval_samples_per_second": 1.933, "eval_steps_per_second": 0.244, "step": 115 }, { "epoch": 0.46, "learning_rate": 0.00019402298850574714, "loss": 1.4628, "step": 120 }, { "epoch": 0.46, "eval_accuracy": 0.3793103448275862, "eval_loss": 1.891345500946045, "eval_runtime": 90.081, "eval_samples_per_second": 1.932, "eval_steps_per_second": 0.244, "step": 120 }, { "epoch": 0.48, "learning_rate": 0.0001937675606641124, "loss": 1.8827, "step": 125 }, { "epoch": 0.48, "eval_accuracy": 0.2988505747126437, "eval_loss": 1.7698206901550293, "eval_runtime": 86.9617, "eval_samples_per_second": 2.001, "eval_steps_per_second": 0.253, "step": 125 }, { "epoch": 0.5, "learning_rate": 0.00019351213282247767, "loss": 1.9941, "step": 130 }, { "epoch": 0.5, "eval_accuracy": 0.29310344827586204, "eval_loss": 1.7076359987258911, "eval_runtime": 91.6164, "eval_samples_per_second": 1.899, "eval_steps_per_second": 0.24, "step": 130 }, { "epoch": 0.52, "learning_rate": 0.00019325670498084293, "loss": 1.844, "step": 135 }, { "epoch": 0.52, "eval_accuracy": 0.29310344827586204, "eval_loss": 1.722959280014038, "eval_runtime": 87.2052, "eval_samples_per_second": 1.995, "eval_steps_per_second": 0.252, "step": 135 }, { "epoch": 0.54, "learning_rate": 0.0001930012771392082, "loss": 1.5423, "step": 140 }, { "epoch": 0.54, "eval_accuracy": 0.3793103448275862, "eval_loss": 1.6389808654785156, "eval_runtime": 87.1239, "eval_samples_per_second": 1.997, "eval_steps_per_second": 0.253, "step": 140 }, { "epoch": 0.56, "learning_rate": 0.00019274584929757346, "loss": 1.9086, "step": 145 }, { "epoch": 0.56, "eval_accuracy": 0.3620689655172414, "eval_loss": 1.720744013786316, "eval_runtime": 85.5891, "eval_samples_per_second": 2.033, "eval_steps_per_second": 0.257, "step": 145 }, { "epoch": 0.57, "learning_rate": 0.0001924904214559387, "loss": 1.572, "step": 150 }, { "epoch": 0.57, "eval_accuracy": 0.3160919540229885, "eval_loss": 1.8218252658843994, "eval_runtime": 88.7779, "eval_samples_per_second": 1.96, "eval_steps_per_second": 0.248, "step": 150 }, { "epoch": 0.59, "learning_rate": 0.00019223499361430398, "loss": 1.8335, "step": 155 }, { "epoch": 0.59, "eval_accuracy": 0.39080459770114945, "eval_loss": 1.6242988109588623, "eval_runtime": 87.9658, "eval_samples_per_second": 1.978, "eval_steps_per_second": 0.25, "step": 155 }, { "epoch": 0.61, "learning_rate": 0.00019197956577266922, "loss": 1.5903, "step": 160 }, { "epoch": 0.61, "eval_accuracy": 0.3850574712643678, "eval_loss": 1.634774088859558, "eval_runtime": 87.315, "eval_samples_per_second": 1.993, "eval_steps_per_second": 0.252, "step": 160 }, { "epoch": 0.63, "learning_rate": 0.0001917241379310345, "loss": 1.7064, "step": 165 }, { "epoch": 0.63, "eval_accuracy": 0.42528735632183906, "eval_loss": 1.5469759702682495, "eval_runtime": 87.4736, "eval_samples_per_second": 1.989, "eval_steps_per_second": 0.252, "step": 165 }, { "epoch": 0.65, "learning_rate": 0.00019146871008939975, "loss": 1.3325, "step": 170 }, { "epoch": 0.65, "eval_accuracy": 0.43103448275862066, "eval_loss": 1.5237237215042114, "eval_runtime": 88.9581, "eval_samples_per_second": 1.956, "eval_steps_per_second": 0.247, "step": 170 }, { "epoch": 0.67, "learning_rate": 0.00019121328224776504, "loss": 1.5154, "step": 175 }, { "epoch": 0.67, "eval_accuracy": 0.42528735632183906, "eval_loss": 1.571236491203308, "eval_runtime": 84.6969, "eval_samples_per_second": 2.054, "eval_steps_per_second": 0.26, "step": 175 }, { "epoch": 0.69, "learning_rate": 0.00019095785440613027, "loss": 1.7564, "step": 180 }, { "epoch": 0.69, "eval_accuracy": 0.2988505747126437, "eval_loss": 1.5861365795135498, "eval_runtime": 89.2791, "eval_samples_per_second": 1.949, "eval_steps_per_second": 0.246, "step": 180 }, { "epoch": 0.71, "learning_rate": 0.00019070242656449554, "loss": 1.3903, "step": 185 }, { "epoch": 0.71, "eval_accuracy": 0.25862068965517243, "eval_loss": 1.8145408630371094, "eval_runtime": 85.8558, "eval_samples_per_second": 2.027, "eval_steps_per_second": 0.256, "step": 185 }, { "epoch": 0.73, "learning_rate": 0.0001904469987228608, "loss": 2.0853, "step": 190 }, { "epoch": 0.73, "eval_accuracy": 0.367816091954023, "eval_loss": 1.5079203844070435, "eval_runtime": 89.4633, "eval_samples_per_second": 1.945, "eval_steps_per_second": 0.246, "step": 190 }, { "epoch": 0.75, "learning_rate": 0.00019024265644955303, "loss": 1.5312, "step": 195 }, { "epoch": 0.75, "eval_accuracy": 0.4482758620689655, "eval_loss": 1.4408893585205078, "eval_runtime": 87.6476, "eval_samples_per_second": 1.985, "eval_steps_per_second": 0.251, "step": 195 }, { "epoch": 0.77, "learning_rate": 0.00018998722860791826, "loss": 1.2828, "step": 200 }, { "epoch": 0.77, "eval_accuracy": 0.39655172413793105, "eval_loss": 1.600140929222107, "eval_runtime": 86.6087, "eval_samples_per_second": 2.009, "eval_steps_per_second": 0.254, "step": 200 }, { "epoch": 0.79, "learning_rate": 0.00018973180076628355, "loss": 1.9389, "step": 205 }, { "epoch": 0.79, "eval_accuracy": 0.367816091954023, "eval_loss": 1.7927314043045044, "eval_runtime": 90.3234, "eval_samples_per_second": 1.926, "eval_steps_per_second": 0.244, "step": 205 }, { "epoch": 0.8, "learning_rate": 0.0001894763729246488, "loss": 1.5486, "step": 210 }, { "epoch": 0.8, "eval_accuracy": 0.39080459770114945, "eval_loss": 1.5749437808990479, "eval_runtime": 86.7654, "eval_samples_per_second": 2.005, "eval_steps_per_second": 0.254, "step": 210 }, { "epoch": 0.82, "learning_rate": 0.00018922094508301408, "loss": 1.4306, "step": 215 }, { "epoch": 0.82, "eval_accuracy": 0.28160919540229884, "eval_loss": 1.7231699228286743, "eval_runtime": 87.5623, "eval_samples_per_second": 1.987, "eval_steps_per_second": 0.251, "step": 215 }, { "epoch": 0.84, "learning_rate": 0.00018896551724137932, "loss": 1.814, "step": 220 }, { "epoch": 0.84, "eval_accuracy": 0.25862068965517243, "eval_loss": 1.7349094152450562, "eval_runtime": 87.3298, "eval_samples_per_second": 1.992, "eval_steps_per_second": 0.252, "step": 220 }, { "epoch": 0.86, "learning_rate": 0.00018871008939974458, "loss": 1.7483, "step": 225 }, { "epoch": 0.86, "eval_accuracy": 0.4482758620689655, "eval_loss": 1.606767177581787, "eval_runtime": 88.6573, "eval_samples_per_second": 1.963, "eval_steps_per_second": 0.248, "step": 225 }, { "epoch": 0.88, "learning_rate": 0.00018845466155810984, "loss": 1.836, "step": 230 }, { "epoch": 0.88, "eval_accuracy": 0.43103448275862066, "eval_loss": 1.6200461387634277, "eval_runtime": 86.663, "eval_samples_per_second": 2.008, "eval_steps_per_second": 0.254, "step": 230 }, { "epoch": 0.9, "learning_rate": 0.0001881992337164751, "loss": 1.8752, "step": 235 }, { "epoch": 0.9, "eval_accuracy": 0.41954022988505746, "eval_loss": 1.4891613721847534, "eval_runtime": 87.2585, "eval_samples_per_second": 1.994, "eval_steps_per_second": 0.252, "step": 235 }, { "epoch": 0.92, "learning_rate": 0.00018794380587484037, "loss": 1.3274, "step": 240 }, { "epoch": 0.92, "eval_accuracy": 0.4482758620689655, "eval_loss": 1.5101828575134277, "eval_runtime": 89.5236, "eval_samples_per_second": 1.944, "eval_steps_per_second": 0.246, "step": 240 }, { "epoch": 0.94, "learning_rate": 0.00018768837803320563, "loss": 1.1109, "step": 245 }, { "epoch": 0.94, "eval_accuracy": 0.45977011494252873, "eval_loss": 1.6440746784210205, "eval_runtime": 86.2143, "eval_samples_per_second": 2.018, "eval_steps_per_second": 0.255, "step": 245 }, { "epoch": 0.96, "learning_rate": 0.0001874329501915709, "loss": 1.137, "step": 250 }, { "epoch": 0.96, "eval_accuracy": 0.4885057471264368, "eval_loss": 1.6819708347320557, "eval_runtime": 86.701, "eval_samples_per_second": 2.007, "eval_steps_per_second": 0.254, "step": 250 }, { "epoch": 0.98, "learning_rate": 0.00018717752234993616, "loss": 2.029, "step": 255 }, { "epoch": 0.98, "eval_accuracy": 0.4425287356321839, "eval_loss": 1.5568251609802246, "eval_runtime": 89.066, "eval_samples_per_second": 1.954, "eval_steps_per_second": 0.247, "step": 255 }, { "epoch": 1.0, "learning_rate": 0.0001869220945083014, "loss": 1.3499, "step": 260 }, { "epoch": 1.0, "eval_accuracy": 0.47126436781609193, "eval_loss": 1.5453182458877563, "eval_runtime": 88.7892, "eval_samples_per_second": 1.96, "eval_steps_per_second": 0.248, "step": 260 }, { "epoch": 1.02, "learning_rate": 0.0001866666666666667, "loss": 1.6062, "step": 265 }, { "epoch": 1.02, "eval_accuracy": 0.3850574712643678, "eval_loss": 1.7614227533340454, "eval_runtime": 88.9814, "eval_samples_per_second": 1.955, "eval_steps_per_second": 0.247, "step": 265 }, { "epoch": 1.03, "learning_rate": 0.00018641123882503192, "loss": 1.2653, "step": 270 }, { "epoch": 1.03, "eval_accuracy": 0.40804597701149425, "eval_loss": 1.680598497390747, "eval_runtime": 89.0981, "eval_samples_per_second": 1.953, "eval_steps_per_second": 0.247, "step": 270 }, { "epoch": 1.05, "learning_rate": 0.0001861558109833972, "loss": 1.5162, "step": 275 }, { "epoch": 1.05, "eval_accuracy": 0.3160919540229885, "eval_loss": 2.1192500591278076, "eval_runtime": 87.4979, "eval_samples_per_second": 1.989, "eval_steps_per_second": 0.251, "step": 275 }, { "epoch": 1.07, "learning_rate": 0.00018590038314176245, "loss": 1.8098, "step": 280 }, { "epoch": 1.07, "eval_accuracy": 0.4540229885057471, "eval_loss": 1.5041106939315796, "eval_runtime": 86.6499, "eval_samples_per_second": 2.008, "eval_steps_per_second": 0.254, "step": 280 }, { "epoch": 1.09, "learning_rate": 0.00018564495530012774, "loss": 1.5888, "step": 285 }, { "epoch": 1.09, "eval_accuracy": 0.3735632183908046, "eval_loss": 1.6979694366455078, "eval_runtime": 84.5366, "eval_samples_per_second": 2.058, "eval_steps_per_second": 0.26, "step": 285 }, { "epoch": 1.11, "learning_rate": 0.00018538952745849298, "loss": 1.57, "step": 290 }, { "epoch": 1.11, "eval_accuracy": 0.3850574712643678, "eval_loss": 1.5114161968231201, "eval_runtime": 84.8072, "eval_samples_per_second": 2.052, "eval_steps_per_second": 0.259, "step": 290 }, { "epoch": 1.13, "learning_rate": 0.00018513409961685824, "loss": 1.3931, "step": 295 }, { "epoch": 1.13, "eval_accuracy": 0.40804597701149425, "eval_loss": 1.5041882991790771, "eval_runtime": 84.3882, "eval_samples_per_second": 2.062, "eval_steps_per_second": 0.261, "step": 295 }, { "epoch": 1.15, "learning_rate": 0.0001848786717752235, "loss": 1.494, "step": 300 }, { "epoch": 1.15, "eval_accuracy": 0.4425287356321839, "eval_loss": 1.4944647550582886, "eval_runtime": 87.1429, "eval_samples_per_second": 1.997, "eval_steps_per_second": 0.252, "step": 300 }, { "epoch": 1.17, "learning_rate": 0.00018462324393358877, "loss": 1.2355, "step": 305 }, { "epoch": 1.17, "eval_accuracy": 0.47701149425287354, "eval_loss": 1.4152653217315674, "eval_runtime": 84.3818, "eval_samples_per_second": 2.062, "eval_steps_per_second": 0.261, "step": 305 }, { "epoch": 1.19, "learning_rate": 0.00018436781609195403, "loss": 1.9234, "step": 310 }, { "epoch": 1.19, "eval_accuracy": 0.46551724137931033, "eval_loss": 1.3996832370758057, "eval_runtime": 87.1832, "eval_samples_per_second": 1.996, "eval_steps_per_second": 0.252, "step": 310 }, { "epoch": 1.21, "learning_rate": 0.0001841123882503193, "loss": 1.2396, "step": 315 }, { "epoch": 1.21, "eval_accuracy": 0.46551724137931033, "eval_loss": 1.3299652338027954, "eval_runtime": 84.5376, "eval_samples_per_second": 2.058, "eval_steps_per_second": 0.26, "step": 315 }, { "epoch": 1.23, "learning_rate": 0.00018385696040868456, "loss": 1.8784, "step": 320 }, { "epoch": 1.23, "eval_accuracy": 0.4425287356321839, "eval_loss": 1.5061637163162231, "eval_runtime": 84.8318, "eval_samples_per_second": 2.051, "eval_steps_per_second": 0.259, "step": 320 }, { "epoch": 1.25, "learning_rate": 0.00018360153256704982, "loss": 1.2335, "step": 325 }, { "epoch": 1.25, "eval_accuracy": 0.46551724137931033, "eval_loss": 1.3658419847488403, "eval_runtime": 86.8661, "eval_samples_per_second": 2.003, "eval_steps_per_second": 0.253, "step": 325 }, { "epoch": 1.26, "learning_rate": 0.00018334610472541506, "loss": 1.2988, "step": 330 }, { "epoch": 1.26, "eval_accuracy": 0.5114942528735632, "eval_loss": 1.4404170513153076, "eval_runtime": 84.7349, "eval_samples_per_second": 2.053, "eval_steps_per_second": 0.26, "step": 330 }, { "epoch": 1.28, "learning_rate": 0.00018309067688378035, "loss": 1.6458, "step": 335 }, { "epoch": 1.28, "eval_accuracy": 0.47701149425287354, "eval_loss": 1.389655590057373, "eval_runtime": 86.6616, "eval_samples_per_second": 2.008, "eval_steps_per_second": 0.254, "step": 335 }, { "epoch": 1.3, "learning_rate": 0.00018283524904214558, "loss": 1.4325, "step": 340 }, { "epoch": 1.3, "eval_accuracy": 0.3390804597701149, "eval_loss": 1.943527102470398, "eval_runtime": 84.9859, "eval_samples_per_second": 2.047, "eval_steps_per_second": 0.259, "step": 340 }, { "epoch": 1.32, "learning_rate": 0.00018257982120051087, "loss": 1.8258, "step": 345 }, { "epoch": 1.32, "eval_accuracy": 0.41379310344827586, "eval_loss": 1.674710988998413, "eval_runtime": 85.9104, "eval_samples_per_second": 2.025, "eval_steps_per_second": 0.256, "step": 345 }, { "epoch": 1.34, "learning_rate": 0.0001823243933588761, "loss": 1.6398, "step": 350 }, { "epoch": 1.34, "eval_accuracy": 0.43103448275862066, "eval_loss": 1.537279486656189, "eval_runtime": 87.0005, "eval_samples_per_second": 2.0, "eval_steps_per_second": 0.253, "step": 350 }, { "epoch": 1.36, "learning_rate": 0.0001820689655172414, "loss": 1.3836, "step": 355 }, { "epoch": 1.36, "eval_accuracy": 0.4540229885057471, "eval_loss": 1.530836820602417, "eval_runtime": 85.6209, "eval_samples_per_second": 2.032, "eval_steps_per_second": 0.257, "step": 355 }, { "epoch": 1.38, "learning_rate": 0.00018181353767560664, "loss": 1.1067, "step": 360 }, { "epoch": 1.38, "eval_accuracy": 0.45977011494252873, "eval_loss": 1.5031547546386719, "eval_runtime": 86.5596, "eval_samples_per_second": 2.01, "eval_steps_per_second": 0.254, "step": 360 }, { "epoch": 1.4, "learning_rate": 0.0001815581098339719, "loss": 1.4948, "step": 365 }, { "epoch": 1.4, "eval_accuracy": 0.47701149425287354, "eval_loss": 1.4820023775100708, "eval_runtime": 87.8688, "eval_samples_per_second": 1.98, "eval_steps_per_second": 0.25, "step": 365 }, { "epoch": 1.42, "learning_rate": 0.00018130268199233716, "loss": 1.3582, "step": 370 }, { "epoch": 1.42, "eval_accuracy": 0.41954022988505746, "eval_loss": 1.455491304397583, "eval_runtime": 85.3532, "eval_samples_per_second": 2.039, "eval_steps_per_second": 0.258, "step": 370 }, { "epoch": 1.44, "learning_rate": 0.00018104725415070243, "loss": 1.2616, "step": 375 }, { "epoch": 1.44, "eval_accuracy": 0.46551724137931033, "eval_loss": 1.3622076511383057, "eval_runtime": 85.6174, "eval_samples_per_second": 2.032, "eval_steps_per_second": 0.257, "step": 375 }, { "epoch": 1.46, "learning_rate": 0.0001807918263090677, "loss": 1.4582, "step": 380 }, { "epoch": 1.46, "eval_accuracy": 0.4942528735632184, "eval_loss": 1.2709373235702515, "eval_runtime": 87.9037, "eval_samples_per_second": 1.979, "eval_steps_per_second": 0.25, "step": 380 }, { "epoch": 1.48, "learning_rate": 0.00018053639846743295, "loss": 1.7958, "step": 385 }, { "epoch": 1.48, "eval_accuracy": 0.3620689655172414, "eval_loss": 1.5655514001846313, "eval_runtime": 89.7184, "eval_samples_per_second": 1.939, "eval_steps_per_second": 0.245, "step": 385 }, { "epoch": 1.49, "learning_rate": 0.00018028097062579822, "loss": 1.4743, "step": 390 }, { "epoch": 1.49, "eval_accuracy": 0.4367816091954023, "eval_loss": 1.3905311822891235, "eval_runtime": 87.4724, "eval_samples_per_second": 1.989, "eval_steps_per_second": 0.252, "step": 390 }, { "epoch": 1.51, "learning_rate": 0.00018002554278416348, "loss": 1.3111, "step": 395 }, { "epoch": 1.51, "eval_accuracy": 0.5287356321839081, "eval_loss": 1.3618022203445435, "eval_runtime": 85.8256, "eval_samples_per_second": 2.027, "eval_steps_per_second": 0.256, "step": 395 }, { "epoch": 1.53, "learning_rate": 0.00017977011494252874, "loss": 1.1186, "step": 400 }, { "epoch": 1.53, "eval_accuracy": 0.5, "eval_loss": 1.4678940773010254, "eval_runtime": 87.4839, "eval_samples_per_second": 1.989, "eval_steps_per_second": 0.251, "step": 400 }, { "epoch": 1.55, "learning_rate": 0.000179514687100894, "loss": 1.3566, "step": 405 }, { "epoch": 1.55, "eval_accuracy": 0.47701149425287354, "eval_loss": 1.6265980005264282, "eval_runtime": 85.8882, "eval_samples_per_second": 2.026, "eval_steps_per_second": 0.256, "step": 405 }, { "epoch": 1.57, "learning_rate": 0.00017925925925925927, "loss": 1.4949, "step": 410 }, { "epoch": 1.57, "eval_accuracy": 0.5057471264367817, "eval_loss": 1.4489529132843018, "eval_runtime": 84.8544, "eval_samples_per_second": 2.051, "eval_steps_per_second": 0.259, "step": 410 }, { "epoch": 1.59, "learning_rate": 0.00017900383141762453, "loss": 1.2182, "step": 415 }, { "epoch": 1.59, "eval_accuracy": 0.5459770114942529, "eval_loss": 1.152848720550537, "eval_runtime": 86.7262, "eval_samples_per_second": 2.006, "eval_steps_per_second": 0.254, "step": 415 }, { "epoch": 1.61, "learning_rate": 0.0001787484035759898, "loss": 1.1455, "step": 420 }, { "epoch": 1.61, "eval_accuracy": 0.4540229885057471, "eval_loss": 1.2889221906661987, "eval_runtime": 89.054, "eval_samples_per_second": 1.954, "eval_steps_per_second": 0.247, "step": 420 }, { "epoch": 1.63, "learning_rate": 0.00017849297573435506, "loss": 1.0795, "step": 425 }, { "epoch": 1.63, "eval_accuracy": 0.4827586206896552, "eval_loss": 1.4589430093765259, "eval_runtime": 85.7656, "eval_samples_per_second": 2.029, "eval_steps_per_second": 0.257, "step": 425 }, { "epoch": 1.65, "learning_rate": 0.00017823754789272032, "loss": 1.2771, "step": 430 }, { "epoch": 1.65, "eval_accuracy": 0.5114942528735632, "eval_loss": 1.5259735584259033, "eval_runtime": 85.8919, "eval_samples_per_second": 2.026, "eval_steps_per_second": 0.256, "step": 430 }, { "epoch": 1.67, "learning_rate": 0.0001779821200510856, "loss": 1.2858, "step": 435 }, { "epoch": 1.67, "eval_accuracy": 0.4885057471264368, "eval_loss": 1.4051034450531006, "eval_runtime": 86.1444, "eval_samples_per_second": 2.02, "eval_steps_per_second": 0.255, "step": 435 }, { "epoch": 1.69, "learning_rate": 0.00017772669220945085, "loss": 1.0829, "step": 440 }, { "epoch": 1.69, "eval_accuracy": 0.5344827586206896, "eval_loss": 1.3036625385284424, "eval_runtime": 85.2368, "eval_samples_per_second": 2.041, "eval_steps_per_second": 0.258, "step": 440 }, { "epoch": 1.7, "learning_rate": 0.00017747126436781609, "loss": 1.5866, "step": 445 }, { "epoch": 1.7, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.328389286994934, "eval_runtime": 85.533, "eval_samples_per_second": 2.034, "eval_steps_per_second": 0.257, "step": 445 }, { "epoch": 1.72, "learning_rate": 0.00017721583652618138, "loss": 1.2964, "step": 450 }, { "epoch": 1.72, "eval_accuracy": 0.5114942528735632, "eval_loss": 1.2732243537902832, "eval_runtime": 89.1901, "eval_samples_per_second": 1.951, "eval_steps_per_second": 0.247, "step": 450 }, { "epoch": 1.74, "learning_rate": 0.0001769604086845466, "loss": 1.1894, "step": 455 }, { "epoch": 1.74, "eval_accuracy": 0.42528735632183906, "eval_loss": 1.4987748861312866, "eval_runtime": 85.0991, "eval_samples_per_second": 2.045, "eval_steps_per_second": 0.259, "step": 455 }, { "epoch": 1.76, "learning_rate": 0.0001767049808429119, "loss": 1.5003, "step": 460 }, { "epoch": 1.76, "eval_accuracy": 0.46551724137931033, "eval_loss": 1.329574704170227, "eval_runtime": 86.0734, "eval_samples_per_second": 2.022, "eval_steps_per_second": 0.256, "step": 460 }, { "epoch": 1.78, "learning_rate": 0.00017644955300127714, "loss": 1.1569, "step": 465 }, { "epoch": 1.78, "eval_accuracy": 0.5, "eval_loss": 1.428106665611267, "eval_runtime": 83.952, "eval_samples_per_second": 2.073, "eval_steps_per_second": 0.262, "step": 465 }, { "epoch": 1.8, "learning_rate": 0.00017619412515964243, "loss": 1.0751, "step": 470 }, { "epoch": 1.8, "eval_accuracy": 0.47126436781609193, "eval_loss": 1.5787663459777832, "eval_runtime": 85.6942, "eval_samples_per_second": 2.03, "eval_steps_per_second": 0.257, "step": 470 }, { "epoch": 1.82, "learning_rate": 0.00017593869731800767, "loss": 1.592, "step": 475 }, { "epoch": 1.82, "eval_accuracy": 0.5, "eval_loss": 1.2109367847442627, "eval_runtime": 84.8219, "eval_samples_per_second": 2.051, "eval_steps_per_second": 0.259, "step": 475 }, { "epoch": 1.84, "learning_rate": 0.00017568326947637293, "loss": 1.4279, "step": 480 }, { "epoch": 1.84, "eval_accuracy": 0.5287356321839081, "eval_loss": 1.207238793373108, "eval_runtime": 86.384, "eval_samples_per_second": 2.014, "eval_steps_per_second": 0.255, "step": 480 }, { "epoch": 1.86, "learning_rate": 0.0001754278416347382, "loss": 1.19, "step": 485 }, { "epoch": 1.86, "eval_accuracy": 0.5402298850574713, "eval_loss": 1.276541829109192, "eval_runtime": 85.9845, "eval_samples_per_second": 2.024, "eval_steps_per_second": 0.256, "step": 485 }, { "epoch": 1.88, "learning_rate": 0.00017517241379310346, "loss": 1.3807, "step": 490 }, { "epoch": 1.88, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.3951900005340576, "eval_runtime": 86.4759, "eval_samples_per_second": 2.012, "eval_steps_per_second": 0.254, "step": 490 }, { "epoch": 1.9, "learning_rate": 0.00017491698595146872, "loss": 1.2857, "step": 495 }, { "epoch": 1.9, "eval_accuracy": 0.5057471264367817, "eval_loss": 1.2476894855499268, "eval_runtime": 85.1521, "eval_samples_per_second": 2.043, "eval_steps_per_second": 0.258, "step": 495 }, { "epoch": 1.92, "learning_rate": 0.00017466155810983398, "loss": 1.229, "step": 500 }, { "epoch": 1.92, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.2090439796447754, "eval_runtime": 86.6818, "eval_samples_per_second": 2.007, "eval_steps_per_second": 0.254, "step": 500 }, { "epoch": 1.93, "learning_rate": 0.00017440613026819925, "loss": 0.9704, "step": 505 }, { "epoch": 1.93, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.0011608600616455, "eval_runtime": 87.3092, "eval_samples_per_second": 1.993, "eval_steps_per_second": 0.252, "step": 505 }, { "epoch": 1.95, "learning_rate": 0.0001741507024265645, "loss": 1.2077, "step": 510 }, { "epoch": 1.95, "eval_accuracy": 0.6091954022988506, "eval_loss": 1.0754402875900269, "eval_runtime": 85.1447, "eval_samples_per_second": 2.044, "eval_steps_per_second": 0.258, "step": 510 }, { "epoch": 1.97, "learning_rate": 0.00017389527458492975, "loss": 1.31, "step": 515 }, { "epoch": 1.97, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.1699658632278442, "eval_runtime": 85.8978, "eval_samples_per_second": 2.026, "eval_steps_per_second": 0.256, "step": 515 }, { "epoch": 1.99, "learning_rate": 0.00017363984674329504, "loss": 0.9374, "step": 520 }, { "epoch": 1.99, "eval_accuracy": 0.5402298850574713, "eval_loss": 1.1662834882736206, "eval_runtime": 84.761, "eval_samples_per_second": 2.053, "eval_steps_per_second": 0.26, "step": 520 }, { "epoch": 2.01, "learning_rate": 0.00017338441890166027, "loss": 1.3027, "step": 525 }, { "epoch": 2.01, "eval_accuracy": 0.5057471264367817, "eval_loss": 1.2674177885055542, "eval_runtime": 87.1265, "eval_samples_per_second": 1.997, "eval_steps_per_second": 0.253, "step": 525 }, { "epoch": 2.03, "learning_rate": 0.00017312899106002556, "loss": 1.2744, "step": 530 }, { "epoch": 2.03, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.1397805213928223, "eval_runtime": 87.5192, "eval_samples_per_second": 1.988, "eval_steps_per_second": 0.251, "step": 530 }, { "epoch": 2.05, "learning_rate": 0.0001728735632183908, "loss": 0.6539, "step": 535 }, { "epoch": 2.05, "eval_accuracy": 0.5114942528735632, "eval_loss": 1.3558400869369507, "eval_runtime": 87.7373, "eval_samples_per_second": 1.983, "eval_steps_per_second": 0.251, "step": 535 }, { "epoch": 2.07, "learning_rate": 0.0001726181353767561, "loss": 1.3282, "step": 540 }, { "epoch": 2.07, "eval_accuracy": 0.4885057471264368, "eval_loss": 1.671147346496582, "eval_runtime": 85.9997, "eval_samples_per_second": 2.023, "eval_steps_per_second": 0.256, "step": 540 }, { "epoch": 2.09, "learning_rate": 0.00017236270753512133, "loss": 1.7389, "step": 545 }, { "epoch": 2.09, "eval_accuracy": 0.4885057471264368, "eval_loss": 1.4171918630599976, "eval_runtime": 86.1498, "eval_samples_per_second": 2.02, "eval_steps_per_second": 0.255, "step": 545 }, { "epoch": 2.11, "learning_rate": 0.0001721072796934866, "loss": 0.8713, "step": 550 }, { "epoch": 2.11, "eval_accuracy": 0.45977011494252873, "eval_loss": 1.4530651569366455, "eval_runtime": 84.7588, "eval_samples_per_second": 2.053, "eval_steps_per_second": 0.26, "step": 550 }, { "epoch": 2.13, "learning_rate": 0.00017185185185185185, "loss": 1.3864, "step": 555 }, { "epoch": 2.13, "eval_accuracy": 0.41954022988505746, "eval_loss": 1.4029399156570435, "eval_runtime": 86.0806, "eval_samples_per_second": 2.021, "eval_steps_per_second": 0.256, "step": 555 }, { "epoch": 2.15, "learning_rate": 0.00017159642401021712, "loss": 1.3272, "step": 560 }, { "epoch": 2.15, "eval_accuracy": 0.4942528735632184, "eval_loss": 1.2591314315795898, "eval_runtime": 85.0541, "eval_samples_per_second": 2.046, "eval_steps_per_second": 0.259, "step": 560 }, { "epoch": 2.16, "learning_rate": 0.00017134099616858238, "loss": 1.4224, "step": 565 }, { "epoch": 2.16, "eval_accuracy": 0.47701149425287354, "eval_loss": 1.4082285165786743, "eval_runtime": 86.6165, "eval_samples_per_second": 2.009, "eval_steps_per_second": 0.254, "step": 565 }, { "epoch": 2.18, "learning_rate": 0.00017108556832694764, "loss": 1.2348, "step": 570 }, { "epoch": 2.18, "eval_accuracy": 0.4942528735632184, "eval_loss": 1.2709393501281738, "eval_runtime": 86.2781, "eval_samples_per_second": 2.017, "eval_steps_per_second": 0.255, "step": 570 }, { "epoch": 2.2, "learning_rate": 0.0001708301404853129, "loss": 1.4141, "step": 575 }, { "epoch": 2.2, "eval_accuracy": 0.5057471264367817, "eval_loss": 1.338424801826477, "eval_runtime": 83.8318, "eval_samples_per_second": 2.076, "eval_steps_per_second": 0.262, "step": 575 }, { "epoch": 2.22, "learning_rate": 0.00017057471264367817, "loss": 1.3264, "step": 580 }, { "epoch": 2.22, "eval_accuracy": 0.5287356321839081, "eval_loss": 1.2666399478912354, "eval_runtime": 85.5732, "eval_samples_per_second": 2.033, "eval_steps_per_second": 0.257, "step": 580 }, { "epoch": 2.24, "learning_rate": 0.00017031928480204343, "loss": 1.1512, "step": 585 }, { "epoch": 2.24, "eval_accuracy": 0.5172413793103449, "eval_loss": 1.2081302404403687, "eval_runtime": 86.2749, "eval_samples_per_second": 2.017, "eval_steps_per_second": 0.255, "step": 585 }, { "epoch": 2.26, "learning_rate": 0.0001700638569604087, "loss": 1.0147, "step": 590 }, { "epoch": 2.26, "eval_accuracy": 0.5114942528735632, "eval_loss": 1.1952056884765625, "eval_runtime": 84.237, "eval_samples_per_second": 2.066, "eval_steps_per_second": 0.261, "step": 590 }, { "epoch": 2.28, "learning_rate": 0.00016980842911877396, "loss": 1.1854, "step": 595 }, { "epoch": 2.28, "eval_accuracy": 0.5459770114942529, "eval_loss": 1.1515438556671143, "eval_runtime": 89.8852, "eval_samples_per_second": 1.936, "eval_steps_per_second": 0.245, "step": 595 }, { "epoch": 2.3, "learning_rate": 0.00016955300127713922, "loss": 1.1736, "step": 600 }, { "epoch": 2.3, "eval_accuracy": 0.5517241379310345, "eval_loss": 1.132529377937317, "eval_runtime": 86.1072, "eval_samples_per_second": 2.021, "eval_steps_per_second": 0.255, "step": 600 }, { "epoch": 2.32, "learning_rate": 0.0001692975734355045, "loss": 0.8421, "step": 605 }, { "epoch": 2.32, "eval_accuracy": 0.47126436781609193, "eval_loss": 1.3057594299316406, "eval_runtime": 86.3601, "eval_samples_per_second": 2.015, "eval_steps_per_second": 0.255, "step": 605 }, { "epoch": 2.34, "learning_rate": 0.00016904214559386975, "loss": 1.0093, "step": 610 }, { "epoch": 2.34, "eval_accuracy": 0.5344827586206896, "eval_loss": 1.1371407508850098, "eval_runtime": 86.415, "eval_samples_per_second": 2.014, "eval_steps_per_second": 0.255, "step": 610 }, { "epoch": 2.36, "learning_rate": 0.000168786717752235, "loss": 1.096, "step": 615 }, { "epoch": 2.36, "eval_accuracy": 0.5402298850574713, "eval_loss": 1.1986898183822632, "eval_runtime": 86.4602, "eval_samples_per_second": 2.012, "eval_steps_per_second": 0.254, "step": 615 }, { "epoch": 2.38, "learning_rate": 0.00016853128991060025, "loss": 1.1738, "step": 620 }, { "epoch": 2.38, "eval_accuracy": 0.5229885057471264, "eval_loss": 1.163020372390747, "eval_runtime": 85.7622, "eval_samples_per_second": 2.029, "eval_steps_per_second": 0.257, "step": 620 }, { "epoch": 2.39, "learning_rate": 0.00016827586206896554, "loss": 0.7222, "step": 625 }, { "epoch": 2.39, "eval_accuracy": 0.45977011494252873, "eval_loss": 1.3792271614074707, "eval_runtime": 85.7187, "eval_samples_per_second": 2.03, "eval_steps_per_second": 0.257, "step": 625 }, { "epoch": 2.41, "learning_rate": 0.00016802043422733078, "loss": 1.259, "step": 630 }, { "epoch": 2.41, "eval_accuracy": 0.5057471264367817, "eval_loss": 1.4273347854614258, "eval_runtime": 85.4147, "eval_samples_per_second": 2.037, "eval_steps_per_second": 0.258, "step": 630 }, { "epoch": 2.43, "learning_rate": 0.00016776500638569607, "loss": 0.8788, "step": 635 }, { "epoch": 2.43, "eval_accuracy": 0.5114942528735632, "eval_loss": 1.4665697813034058, "eval_runtime": 86.7626, "eval_samples_per_second": 2.005, "eval_steps_per_second": 0.254, "step": 635 }, { "epoch": 2.45, "learning_rate": 0.0001675095785440613, "loss": 1.527, "step": 640 }, { "epoch": 2.45, "eval_accuracy": 0.47701149425287354, "eval_loss": 1.4998698234558105, "eval_runtime": 85.5223, "eval_samples_per_second": 2.035, "eval_steps_per_second": 0.257, "step": 640 }, { "epoch": 2.47, "learning_rate": 0.0001672541507024266, "loss": 1.1176, "step": 645 }, { "epoch": 2.47, "eval_accuracy": 0.45977011494252873, "eval_loss": 1.571102261543274, "eval_runtime": 84.3165, "eval_samples_per_second": 2.064, "eval_steps_per_second": 0.261, "step": 645 }, { "epoch": 2.49, "learning_rate": 0.00016699872286079183, "loss": 0.9834, "step": 650 }, { "epoch": 2.49, "eval_accuracy": 0.5, "eval_loss": 1.5396226644515991, "eval_runtime": 83.6612, "eval_samples_per_second": 2.08, "eval_steps_per_second": 0.263, "step": 650 }, { "epoch": 2.51, "learning_rate": 0.0001667432950191571, "loss": 1.046, "step": 655 }, { "epoch": 2.51, "eval_accuracy": 0.5344827586206896, "eval_loss": 1.2918277978897095, "eval_runtime": 84.0945, "eval_samples_per_second": 2.069, "eval_steps_per_second": 0.262, "step": 655 }, { "epoch": 2.53, "learning_rate": 0.00016648786717752236, "loss": 1.2347, "step": 660 }, { "epoch": 2.53, "eval_accuracy": 0.5459770114942529, "eval_loss": 1.3252575397491455, "eval_runtime": 86.0389, "eval_samples_per_second": 2.022, "eval_steps_per_second": 0.256, "step": 660 }, { "epoch": 2.55, "learning_rate": 0.00016623243933588762, "loss": 0.7441, "step": 665 }, { "epoch": 2.55, "eval_accuracy": 0.5344827586206896, "eval_loss": 1.4183677434921265, "eval_runtime": 84.6125, "eval_samples_per_second": 2.056, "eval_steps_per_second": 0.26, "step": 665 }, { "epoch": 2.57, "learning_rate": 0.00016597701149425288, "loss": 1.3217, "step": 670 }, { "epoch": 2.57, "eval_accuracy": 0.5689655172413793, "eval_loss": 1.2241300344467163, "eval_runtime": 84.8503, "eval_samples_per_second": 2.051, "eval_steps_per_second": 0.259, "step": 670 }, { "epoch": 2.59, "learning_rate": 0.00016572158365261815, "loss": 1.517, "step": 675 }, { "epoch": 2.59, "eval_accuracy": 0.4482758620689655, "eval_loss": 1.3935520648956299, "eval_runtime": 85.2745, "eval_samples_per_second": 2.04, "eval_steps_per_second": 0.258, "step": 675 }, { "epoch": 2.61, "learning_rate": 0.0001654661558109834, "loss": 1.5603, "step": 680 }, { "epoch": 2.61, "eval_accuracy": 0.5114942528735632, "eval_loss": 1.2586129903793335, "eval_runtime": 87.4344, "eval_samples_per_second": 1.99, "eval_steps_per_second": 0.252, "step": 680 }, { "epoch": 2.62, "learning_rate": 0.00016521072796934867, "loss": 1.0121, "step": 685 }, { "epoch": 2.62, "eval_accuracy": 0.5689655172413793, "eval_loss": 1.1527811288833618, "eval_runtime": 83.8274, "eval_samples_per_second": 2.076, "eval_steps_per_second": 0.262, "step": 685 }, { "epoch": 2.64, "learning_rate": 0.0001649553001277139, "loss": 0.9157, "step": 690 }, { "epoch": 2.64, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.1511900424957275, "eval_runtime": 85.1952, "eval_samples_per_second": 2.042, "eval_steps_per_second": 0.258, "step": 690 }, { "epoch": 2.66, "learning_rate": 0.0001646998722860792, "loss": 1.0072, "step": 695 }, { "epoch": 2.66, "eval_accuracy": 0.5574712643678161, "eval_loss": 1.2195591926574707, "eval_runtime": 84.6368, "eval_samples_per_second": 2.056, "eval_steps_per_second": 0.26, "step": 695 }, { "epoch": 2.68, "learning_rate": 0.00016444444444444444, "loss": 1.1345, "step": 700 }, { "epoch": 2.68, "eval_accuracy": 0.5632183908045977, "eval_loss": 1.088049054145813, "eval_runtime": 84.7373, "eval_samples_per_second": 2.053, "eval_steps_per_second": 0.26, "step": 700 }, { "epoch": 2.7, "learning_rate": 0.00016418901660280973, "loss": 1.1517, "step": 705 }, { "epoch": 2.7, "eval_accuracy": 0.5287356321839081, "eval_loss": 1.1540721654891968, "eval_runtime": 86.2918, "eval_samples_per_second": 2.016, "eval_steps_per_second": 0.255, "step": 705 }, { "epoch": 2.72, "learning_rate": 0.00016393358876117496, "loss": 1.2588, "step": 710 }, { "epoch": 2.72, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.1188956499099731, "eval_runtime": 83.958, "eval_samples_per_second": 2.072, "eval_steps_per_second": 0.262, "step": 710 }, { "epoch": 2.74, "learning_rate": 0.00016367816091954025, "loss": 0.9318, "step": 715 }, { "epoch": 2.74, "eval_accuracy": 0.5574712643678161, "eval_loss": 1.1550383567810059, "eval_runtime": 83.2348, "eval_samples_per_second": 2.09, "eval_steps_per_second": 0.264, "step": 715 }, { "epoch": 2.76, "learning_rate": 0.0001634227330779055, "loss": 0.7644, "step": 720 }, { "epoch": 2.76, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.1556813716888428, "eval_runtime": 83.996, "eval_samples_per_second": 2.072, "eval_steps_per_second": 0.262, "step": 720 }, { "epoch": 2.78, "learning_rate": 0.00016316730523627075, "loss": 1.2642, "step": 725 }, { "epoch": 2.78, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.0784178972244263, "eval_runtime": 86.6539, "eval_samples_per_second": 2.008, "eval_steps_per_second": 0.254, "step": 725 }, { "epoch": 2.8, "learning_rate": 0.00016291187739463602, "loss": 1.3219, "step": 730 }, { "epoch": 2.8, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.0925211906433105, "eval_runtime": 73.4182, "eval_samples_per_second": 2.37, "eval_steps_per_second": 0.3, "step": 730 }, { "epoch": 2.82, "learning_rate": 0.00016265644955300128, "loss": 1.2288, "step": 735 }, { "epoch": 2.82, "eval_accuracy": 0.6091954022988506, "eval_loss": 1.0299782752990723, "eval_runtime": 72.0457, "eval_samples_per_second": 2.415, "eval_steps_per_second": 0.305, "step": 735 }, { "epoch": 2.84, "learning_rate": 0.00016240102171136654, "loss": 0.8471, "step": 740 }, { "epoch": 2.84, "eval_accuracy": 0.6091954022988506, "eval_loss": 1.0471221208572388, "eval_runtime": 70.8776, "eval_samples_per_second": 2.455, "eval_steps_per_second": 0.31, "step": 740 }, { "epoch": 2.85, "learning_rate": 0.0001621455938697318, "loss": 0.9432, "step": 745 }, { "epoch": 2.85, "eval_accuracy": 0.5689655172413793, "eval_loss": 1.133671760559082, "eval_runtime": 73.7879, "eval_samples_per_second": 2.358, "eval_steps_per_second": 0.298, "step": 745 }, { "epoch": 2.87, "learning_rate": 0.00016189016602809707, "loss": 0.8942, "step": 750 }, { "epoch": 2.87, "eval_accuracy": 0.5977011494252874, "eval_loss": 1.034655213356018, "eval_runtime": 73.4056, "eval_samples_per_second": 2.37, "eval_steps_per_second": 0.3, "step": 750 }, { "epoch": 2.89, "learning_rate": 0.00016163473818646233, "loss": 0.8582, "step": 755 }, { "epoch": 2.89, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.9947394132614136, "eval_runtime": 73.3937, "eval_samples_per_second": 2.371, "eval_steps_per_second": 0.3, "step": 755 }, { "epoch": 2.91, "learning_rate": 0.0001613793103448276, "loss": 0.7802, "step": 760 }, { "epoch": 2.91, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.0483365058898926, "eval_runtime": 73.5783, "eval_samples_per_second": 2.365, "eval_steps_per_second": 0.299, "step": 760 }, { "epoch": 2.93, "learning_rate": 0.00016112388250319286, "loss": 0.9362, "step": 765 }, { "epoch": 2.93, "eval_accuracy": 0.603448275862069, "eval_loss": 1.1573561429977417, "eval_runtime": 74.7479, "eval_samples_per_second": 2.328, "eval_steps_per_second": 0.294, "step": 765 }, { "epoch": 2.95, "learning_rate": 0.00016086845466155812, "loss": 0.7052, "step": 770 }, { "epoch": 2.95, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.9469316005706787, "eval_runtime": 71.6241, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 770 }, { "epoch": 2.97, "learning_rate": 0.0001606130268199234, "loss": 0.9615, "step": 775 }, { "epoch": 2.97, "eval_accuracy": 0.5689655172413793, "eval_loss": 1.171260952949524, "eval_runtime": 74.1608, "eval_samples_per_second": 2.346, "eval_steps_per_second": 0.297, "step": 775 }, { "epoch": 2.99, "learning_rate": 0.00016035759897828865, "loss": 1.3158, "step": 780 }, { "epoch": 2.99, "eval_accuracy": 0.603448275862069, "eval_loss": 1.0248513221740723, "eval_runtime": 71.7492, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 780 }, { "epoch": 3.01, "learning_rate": 0.0001601021711366539, "loss": 1.4599, "step": 785 }, { "epoch": 3.01, "eval_accuracy": 0.5574712643678161, "eval_loss": 1.2699826955795288, "eval_runtime": 72.4519, "eval_samples_per_second": 2.402, "eval_steps_per_second": 0.304, "step": 785 }, { "epoch": 3.03, "learning_rate": 0.00015984674329501918, "loss": 1.3217, "step": 790 }, { "epoch": 3.03, "eval_accuracy": 0.5632183908045977, "eval_loss": 1.1198772192001343, "eval_runtime": 71.6, "eval_samples_per_second": 2.43, "eval_steps_per_second": 0.307, "step": 790 }, { "epoch": 3.05, "learning_rate": 0.0001595913154533844, "loss": 0.8367, "step": 795 }, { "epoch": 3.05, "eval_accuracy": 0.5689655172413793, "eval_loss": 1.045596957206726, "eval_runtime": 72.1733, "eval_samples_per_second": 2.411, "eval_steps_per_second": 0.305, "step": 795 }, { "epoch": 3.07, "learning_rate": 0.0001593358876117497, "loss": 0.7845, "step": 800 }, { "epoch": 3.07, "eval_accuracy": 0.5632183908045977, "eval_loss": 1.0649093389511108, "eval_runtime": 73.3982, "eval_samples_per_second": 2.371, "eval_steps_per_second": 0.3, "step": 800 }, { "epoch": 3.08, "learning_rate": 0.00015908045977011494, "loss": 0.8503, "step": 805 }, { "epoch": 3.08, "eval_accuracy": 0.6206896551724138, "eval_loss": 0.9966627359390259, "eval_runtime": 72.2401, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 805 }, { "epoch": 3.1, "learning_rate": 0.00015882503192848023, "loss": 0.6945, "step": 810 }, { "epoch": 3.1, "eval_accuracy": 0.6436781609195402, "eval_loss": 0.9507883191108704, "eval_runtime": 71.8074, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 810 }, { "epoch": 3.12, "learning_rate": 0.00015856960408684547, "loss": 0.916, "step": 815 }, { "epoch": 3.12, "eval_accuracy": 0.6436781609195402, "eval_loss": 1.0732645988464355, "eval_runtime": 72.2273, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 815 }, { "epoch": 3.14, "learning_rate": 0.00015831417624521076, "loss": 0.9392, "step": 820 }, { "epoch": 3.14, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.0749492645263672, "eval_runtime": 71.7733, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 820 }, { "epoch": 3.16, "learning_rate": 0.000158058748403576, "loss": 1.0007, "step": 825 }, { "epoch": 3.16, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.1707144975662231, "eval_runtime": 72.8924, "eval_samples_per_second": 2.387, "eval_steps_per_second": 0.302, "step": 825 }, { "epoch": 3.18, "learning_rate": 0.00015780332056194128, "loss": 1.2342, "step": 830 }, { "epoch": 3.18, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.2817833423614502, "eval_runtime": 74.9744, "eval_samples_per_second": 2.321, "eval_steps_per_second": 0.293, "step": 830 }, { "epoch": 3.2, "learning_rate": 0.00015754789272030652, "loss": 0.9419, "step": 835 }, { "epoch": 3.2, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.9361924529075623, "eval_runtime": 75.7884, "eval_samples_per_second": 2.296, "eval_steps_per_second": 0.29, "step": 835 }, { "epoch": 3.22, "learning_rate": 0.00015729246487867178, "loss": 0.7473, "step": 840 }, { "epoch": 3.22, "eval_accuracy": 0.5517241379310345, "eval_loss": 1.2352019548416138, "eval_runtime": 73.6409, "eval_samples_per_second": 2.363, "eval_steps_per_second": 0.299, "step": 840 }, { "epoch": 3.24, "learning_rate": 0.00015703703703703705, "loss": 1.3524, "step": 845 }, { "epoch": 3.24, "eval_accuracy": 0.5574712643678161, "eval_loss": 1.2916122674942017, "eval_runtime": 73.9743, "eval_samples_per_second": 2.352, "eval_steps_per_second": 0.297, "step": 845 }, { "epoch": 3.26, "learning_rate": 0.0001567816091954023, "loss": 0.7914, "step": 850 }, { "epoch": 3.26, "eval_accuracy": 0.603448275862069, "eval_loss": 1.0420141220092773, "eval_runtime": 73.374, "eval_samples_per_second": 2.371, "eval_steps_per_second": 0.3, "step": 850 }, { "epoch": 3.28, "learning_rate": 0.00015652618135376757, "loss": 0.9782, "step": 855 }, { "epoch": 3.28, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.0024324655532837, "eval_runtime": 73.5739, "eval_samples_per_second": 2.365, "eval_steps_per_second": 0.299, "step": 855 }, { "epoch": 3.3, "learning_rate": 0.00015627075351213284, "loss": 0.6729, "step": 860 }, { "epoch": 3.3, "eval_accuracy": 0.632183908045977, "eval_loss": 1.0617214441299438, "eval_runtime": 73.2116, "eval_samples_per_second": 2.377, "eval_steps_per_second": 0.3, "step": 860 }, { "epoch": 3.31, "learning_rate": 0.0001560153256704981, "loss": 1.1071, "step": 865 }, { "epoch": 3.31, "eval_accuracy": 0.5, "eval_loss": 1.5525238513946533, "eval_runtime": 74.0895, "eval_samples_per_second": 2.349, "eval_steps_per_second": 0.297, "step": 865 }, { "epoch": 3.33, "learning_rate": 0.00015575989782886336, "loss": 1.6989, "step": 870 }, { "epoch": 3.33, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.0040068626403809, "eval_runtime": 75.3064, "eval_samples_per_second": 2.311, "eval_steps_per_second": 0.292, "step": 870 }, { "epoch": 3.35, "learning_rate": 0.0001555044699872286, "loss": 0.7271, "step": 875 }, { "epoch": 3.35, "eval_accuracy": 0.5632183908045977, "eval_loss": 1.2051146030426025, "eval_runtime": 73.7475, "eval_samples_per_second": 2.359, "eval_steps_per_second": 0.298, "step": 875 }, { "epoch": 3.37, "learning_rate": 0.0001552490421455939, "loss": 0.8168, "step": 880 }, { "epoch": 3.37, "eval_accuracy": 0.6091954022988506, "eval_loss": 1.0161758661270142, "eval_runtime": 74.8589, "eval_samples_per_second": 2.324, "eval_steps_per_second": 0.294, "step": 880 }, { "epoch": 3.39, "learning_rate": 0.00015499361430395913, "loss": 1.0251, "step": 885 }, { "epoch": 3.39, "eval_accuracy": 0.5402298850574713, "eval_loss": 1.2411690950393677, "eval_runtime": 74.1808, "eval_samples_per_second": 2.346, "eval_steps_per_second": 0.297, "step": 885 }, { "epoch": 3.41, "learning_rate": 0.00015473818646232442, "loss": 1.262, "step": 890 }, { "epoch": 3.41, "eval_accuracy": 0.632183908045977, "eval_loss": 1.0844931602478027, "eval_runtime": 74.7088, "eval_samples_per_second": 2.329, "eval_steps_per_second": 0.294, "step": 890 }, { "epoch": 3.43, "learning_rate": 0.00015448275862068965, "loss": 0.752, "step": 895 }, { "epoch": 3.43, "eval_accuracy": 0.632183908045977, "eval_loss": 1.1151267290115356, "eval_runtime": 73.9476, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.298, "step": 895 }, { "epoch": 3.45, "learning_rate": 0.00015422733077905494, "loss": 0.781, "step": 900 }, { "epoch": 3.45, "eval_accuracy": 0.6436781609195402, "eval_loss": 1.1569331884384155, "eval_runtime": 73.2377, "eval_samples_per_second": 2.376, "eval_steps_per_second": 0.3, "step": 900 }, { "epoch": 3.47, "learning_rate": 0.00015397190293742018, "loss": 0.689, "step": 905 }, { "epoch": 3.47, "eval_accuracy": 0.5804597701149425, "eval_loss": 1.1399520635604858, "eval_runtime": 74.5155, "eval_samples_per_second": 2.335, "eval_steps_per_second": 0.295, "step": 905 }, { "epoch": 3.49, "learning_rate": 0.00015371647509578544, "loss": 1.1359, "step": 910 }, { "epoch": 3.49, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.2362200021743774, "eval_runtime": 73.5703, "eval_samples_per_second": 2.365, "eval_steps_per_second": 0.299, "step": 910 }, { "epoch": 3.51, "learning_rate": 0.0001534610472541507, "loss": 1.0256, "step": 915 }, { "epoch": 3.51, "eval_accuracy": 0.5, "eval_loss": 1.3777178525924683, "eval_runtime": 73.8937, "eval_samples_per_second": 2.355, "eval_steps_per_second": 0.298, "step": 915 }, { "epoch": 3.52, "learning_rate": 0.00015320561941251597, "loss": 1.0887, "step": 920 }, { "epoch": 3.52, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.1072226762771606, "eval_runtime": 73.4404, "eval_samples_per_second": 2.369, "eval_steps_per_second": 0.3, "step": 920 }, { "epoch": 3.54, "learning_rate": 0.00015295019157088123, "loss": 0.6106, "step": 925 }, { "epoch": 3.54, "eval_accuracy": 0.5459770114942529, "eval_loss": 1.234115481376648, "eval_runtime": 74.0108, "eval_samples_per_second": 2.351, "eval_steps_per_second": 0.297, "step": 925 }, { "epoch": 3.56, "learning_rate": 0.0001526947637292465, "loss": 1.3364, "step": 930 }, { "epoch": 3.56, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.1364753246307373, "eval_runtime": 73.2306, "eval_samples_per_second": 2.376, "eval_steps_per_second": 0.3, "step": 930 }, { "epoch": 3.58, "learning_rate": 0.00015243933588761176, "loss": 0.7705, "step": 935 }, { "epoch": 3.58, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.230558156967163, "eval_runtime": 74.0579, "eval_samples_per_second": 2.35, "eval_steps_per_second": 0.297, "step": 935 }, { "epoch": 3.6, "learning_rate": 0.00015218390804597702, "loss": 0.8368, "step": 940 }, { "epoch": 3.6, "eval_accuracy": 0.632183908045977, "eval_loss": 1.0860326290130615, "eval_runtime": 72.9069, "eval_samples_per_second": 2.387, "eval_steps_per_second": 0.302, "step": 940 }, { "epoch": 3.62, "learning_rate": 0.00015192848020434226, "loss": 0.8946, "step": 945 }, { "epoch": 3.62, "eval_accuracy": 0.5804597701149425, "eval_loss": 1.2349048852920532, "eval_runtime": 76.0014, "eval_samples_per_second": 2.289, "eval_steps_per_second": 0.289, "step": 945 }, { "epoch": 3.64, "learning_rate": 0.00015167305236270755, "loss": 1.226, "step": 950 }, { "epoch": 3.64, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.2052266597747803, "eval_runtime": 74.5817, "eval_samples_per_second": 2.333, "eval_steps_per_second": 0.295, "step": 950 }, { "epoch": 3.66, "learning_rate": 0.00015141762452107279, "loss": 1.1585, "step": 955 }, { "epoch": 3.66, "eval_accuracy": 0.6436781609195402, "eval_loss": 0.9784772396087646, "eval_runtime": 72.3958, "eval_samples_per_second": 2.403, "eval_steps_per_second": 0.304, "step": 955 }, { "epoch": 3.68, "learning_rate": 0.00015116219667943808, "loss": 0.7824, "step": 960 }, { "epoch": 3.68, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.8754329681396484, "eval_runtime": 71.8422, "eval_samples_per_second": 2.422, "eval_steps_per_second": 0.306, "step": 960 }, { "epoch": 3.7, "learning_rate": 0.0001509067688378033, "loss": 0.6437, "step": 965 }, { "epoch": 3.7, "eval_accuracy": 0.6264367816091954, "eval_loss": 0.9256632328033447, "eval_runtime": 72.4538, "eval_samples_per_second": 2.402, "eval_steps_per_second": 0.304, "step": 965 }, { "epoch": 3.72, "learning_rate": 0.0001506513409961686, "loss": 0.8286, "step": 970 }, { "epoch": 3.72, "eval_accuracy": 0.6724137931034483, "eval_loss": 0.9190903306007385, "eval_runtime": 71.6686, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 970 }, { "epoch": 3.74, "learning_rate": 0.00015039591315453384, "loss": 0.8671, "step": 975 }, { "epoch": 3.74, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.8495645523071289, "eval_runtime": 73.9876, "eval_samples_per_second": 2.352, "eval_steps_per_second": 0.297, "step": 975 }, { "epoch": 3.75, "learning_rate": 0.0001501404853128991, "loss": 0.5814, "step": 980 }, { "epoch": 3.75, "eval_accuracy": 0.6494252873563219, "eval_loss": 0.9788767695426941, "eval_runtime": 71.5848, "eval_samples_per_second": 2.431, "eval_steps_per_second": 0.307, "step": 980 }, { "epoch": 3.77, "learning_rate": 0.00014988505747126437, "loss": 0.7895, "step": 985 }, { "epoch": 3.77, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.032570719718933, "eval_runtime": 72.0801, "eval_samples_per_second": 2.414, "eval_steps_per_second": 0.305, "step": 985 }, { "epoch": 3.79, "learning_rate": 0.00014962962962962963, "loss": 0.8104, "step": 990 }, { "epoch": 3.79, "eval_accuracy": 0.632183908045977, "eval_loss": 0.9865307211875916, "eval_runtime": 71.6806, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 990 }, { "epoch": 3.81, "learning_rate": 0.0001493742017879949, "loss": 0.8277, "step": 995 }, { "epoch": 3.81, "eval_accuracy": 0.6091954022988506, "eval_loss": 1.0854288339614868, "eval_runtime": 72.3687, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 995 }, { "epoch": 3.83, "learning_rate": 0.00014911877394636016, "loss": 0.5801, "step": 1000 }, { "epoch": 3.83, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.0070602893829346, "eval_runtime": 72.8007, "eval_samples_per_second": 2.39, "eval_steps_per_second": 0.302, "step": 1000 }, { "epoch": 3.85, "learning_rate": 0.00014886334610472542, "loss": 0.6394, "step": 1005 }, { "epoch": 3.85, "eval_accuracy": 0.6264367816091954, "eval_loss": 0.9902334809303284, "eval_runtime": 72.164, "eval_samples_per_second": 2.411, "eval_steps_per_second": 0.305, "step": 1005 }, { "epoch": 3.87, "learning_rate": 0.00014860791826309068, "loss": 0.7135, "step": 1010 }, { "epoch": 3.87, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.1338768005371094, "eval_runtime": 73.5721, "eval_samples_per_second": 2.365, "eval_steps_per_second": 0.299, "step": 1010 }, { "epoch": 3.89, "learning_rate": 0.00014835249042145595, "loss": 1.3432, "step": 1015 }, { "epoch": 3.89, "eval_accuracy": 0.603448275862069, "eval_loss": 1.016777515411377, "eval_runtime": 72.2656, "eval_samples_per_second": 2.408, "eval_steps_per_second": 0.304, "step": 1015 }, { "epoch": 3.91, "learning_rate": 0.0001480970625798212, "loss": 0.8375, "step": 1020 }, { "epoch": 3.91, "eval_accuracy": 0.6436781609195402, "eval_loss": 0.8976129293441772, "eval_runtime": 71.6885, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 1020 }, { "epoch": 3.93, "learning_rate": 0.00014784163473818647, "loss": 1.2419, "step": 1025 }, { "epoch": 3.93, "eval_accuracy": 0.6149425287356322, "eval_loss": 1.0345048904418945, "eval_runtime": 72.2899, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 1025 }, { "epoch": 3.95, "learning_rate": 0.00014758620689655174, "loss": 1.2465, "step": 1030 }, { "epoch": 3.95, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.9065479636192322, "eval_runtime": 71.7202, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 1030 }, { "epoch": 3.97, "learning_rate": 0.000147330779054917, "loss": 0.8987, "step": 1035 }, { "epoch": 3.97, "eval_accuracy": 0.6839080459770115, "eval_loss": 0.8305312395095825, "eval_runtime": 74.2714, "eval_samples_per_second": 2.343, "eval_steps_per_second": 0.296, "step": 1035 }, { "epoch": 3.98, "learning_rate": 0.00014707535121328226, "loss": 0.6279, "step": 1040 }, { "epoch": 3.98, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.8689811825752258, "eval_runtime": 73.642, "eval_samples_per_second": 2.363, "eval_steps_per_second": 0.299, "step": 1040 }, { "epoch": 4.0, "learning_rate": 0.00014681992337164753, "loss": 1.1026, "step": 1045 }, { "epoch": 4.0, "eval_accuracy": 0.6724137931034483, "eval_loss": 0.8575055599212646, "eval_runtime": 72.2634, "eval_samples_per_second": 2.408, "eval_steps_per_second": 0.304, "step": 1045 }, { "epoch": 4.02, "learning_rate": 0.00014656449553001276, "loss": 0.5617, "step": 1050 }, { "epoch": 4.02, "eval_accuracy": 0.6436781609195402, "eval_loss": 0.8988448977470398, "eval_runtime": 73.0503, "eval_samples_per_second": 2.382, "eval_steps_per_second": 0.301, "step": 1050 }, { "epoch": 4.04, "learning_rate": 0.00014630906768837805, "loss": 0.5318, "step": 1055 }, { "epoch": 4.04, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.0455691814422607, "eval_runtime": 72.3237, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 1055 }, { "epoch": 4.06, "learning_rate": 0.0001460536398467433, "loss": 0.7041, "step": 1060 }, { "epoch": 4.06, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.9058274626731873, "eval_runtime": 71.8667, "eval_samples_per_second": 2.421, "eval_steps_per_second": 0.306, "step": 1060 }, { "epoch": 4.08, "learning_rate": 0.00014579821200510858, "loss": 0.688, "step": 1065 }, { "epoch": 4.08, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.0057711601257324, "eval_runtime": 72.4073, "eval_samples_per_second": 2.403, "eval_steps_per_second": 0.304, "step": 1065 }, { "epoch": 4.1, "learning_rate": 0.00014554278416347382, "loss": 0.8286, "step": 1070 }, { "epoch": 4.1, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.0452172756195068, "eval_runtime": 71.8108, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 1070 }, { "epoch": 4.12, "learning_rate": 0.0001452873563218391, "loss": 1.2596, "step": 1075 }, { "epoch": 4.12, "eval_accuracy": 0.5862068965517241, "eval_loss": 0.9942687749862671, "eval_runtime": 73.3993, "eval_samples_per_second": 2.371, "eval_steps_per_second": 0.3, "step": 1075 }, { "epoch": 4.14, "learning_rate": 0.00014503192848020434, "loss": 0.8448, "step": 1080 }, { "epoch": 4.14, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.044135570526123, "eval_runtime": 71.6723, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 1080 }, { "epoch": 4.16, "learning_rate": 0.0001447765006385696, "loss": 0.9666, "step": 1085 }, { "epoch": 4.16, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.0021615028381348, "eval_runtime": 72.2989, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 1085 }, { "epoch": 4.18, "learning_rate": 0.00014452107279693487, "loss": 1.0548, "step": 1090 }, { "epoch": 4.18, "eval_accuracy": 0.6724137931034483, "eval_loss": 0.8905701041221619, "eval_runtime": 71.7468, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 1090 }, { "epoch": 4.2, "learning_rate": 0.00014426564495530013, "loss": 0.823, "step": 1095 }, { "epoch": 4.2, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.835205614566803, "eval_runtime": 72.2334, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 1095 }, { "epoch": 4.21, "learning_rate": 0.0001440102171136654, "loss": 0.7588, "step": 1100 }, { "epoch": 4.21, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.8358584046363831, "eval_runtime": 71.7162, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 1100 }, { "epoch": 4.23, "learning_rate": 0.00014375478927203066, "loss": 0.5306, "step": 1105 }, { "epoch": 4.23, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.8987478613853455, "eval_runtime": 72.2456, "eval_samples_per_second": 2.408, "eval_steps_per_second": 0.305, "step": 1105 }, { "epoch": 4.25, "learning_rate": 0.00014349936143039592, "loss": 1.059, "step": 1110 }, { "epoch": 4.25, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.8400871753692627, "eval_runtime": 71.6522, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 1110 }, { "epoch": 4.27, "learning_rate": 0.00014324393358876119, "loss": 0.6116, "step": 1115 }, { "epoch": 4.27, "eval_accuracy": 0.6781609195402298, "eval_loss": 0.9104363322257996, "eval_runtime": 72.2465, "eval_samples_per_second": 2.408, "eval_steps_per_second": 0.305, "step": 1115 }, { "epoch": 4.29, "learning_rate": 0.00014298850574712642, "loss": 0.7483, "step": 1120 }, { "epoch": 4.29, "eval_accuracy": 0.6494252873563219, "eval_loss": 1.0067737102508545, "eval_runtime": 71.6939, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 1120 }, { "epoch": 4.31, "learning_rate": 0.0001427330779054917, "loss": 0.5231, "step": 1125 }, { "epoch": 4.31, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.9476281404495239, "eval_runtime": 72.2862, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 1125 }, { "epoch": 4.33, "learning_rate": 0.00014247765006385695, "loss": 0.5667, "step": 1130 }, { "epoch": 4.33, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.9047439694404602, "eval_runtime": 71.7674, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 1130 }, { "epoch": 4.35, "learning_rate": 0.00014222222222222224, "loss": 0.7785, "step": 1135 }, { "epoch": 4.35, "eval_accuracy": 0.6149425287356322, "eval_loss": 1.0280640125274658, "eval_runtime": 72.438, "eval_samples_per_second": 2.402, "eval_steps_per_second": 0.304, "step": 1135 }, { "epoch": 4.37, "learning_rate": 0.00014196679438058748, "loss": 1.0404, "step": 1140 }, { "epoch": 4.37, "eval_accuracy": 0.6781609195402298, "eval_loss": 0.9103832840919495, "eval_runtime": 73.4365, "eval_samples_per_second": 2.369, "eval_steps_per_second": 0.3, "step": 1140 }, { "epoch": 4.39, "learning_rate": 0.00014171136653895277, "loss": 0.5523, "step": 1145 }, { "epoch": 4.39, "eval_accuracy": 0.6436781609195402, "eval_loss": 1.0259004831314087, "eval_runtime": 72.2901, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 1145 }, { "epoch": 4.41, "learning_rate": 0.000141455938697318, "loss": 0.6387, "step": 1150 }, { "epoch": 4.41, "eval_accuracy": 0.5747126436781609, "eval_loss": 1.1877542734146118, "eval_runtime": 71.8711, "eval_samples_per_second": 2.421, "eval_steps_per_second": 0.306, "step": 1150 }, { "epoch": 4.43, "learning_rate": 0.00014120051085568327, "loss": 1.038, "step": 1155 }, { "epoch": 4.43, "eval_accuracy": 0.6609195402298851, "eval_loss": 1.065651774406433, "eval_runtime": 72.3814, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 1155 }, { "epoch": 4.44, "learning_rate": 0.00014094508301404853, "loss": 0.7566, "step": 1160 }, { "epoch": 4.44, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.8948299288749695, "eval_runtime": 71.6321, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 1160 }, { "epoch": 4.46, "learning_rate": 0.0001406896551724138, "loss": 0.5505, "step": 1165 }, { "epoch": 4.46, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.9844233393669128, "eval_runtime": 74.1566, "eval_samples_per_second": 2.346, "eval_steps_per_second": 0.297, "step": 1165 }, { "epoch": 4.48, "learning_rate": 0.00014043422733077906, "loss": 1.0917, "step": 1170 }, { "epoch": 4.48, "eval_accuracy": 0.6724137931034483, "eval_loss": 0.9192268252372742, "eval_runtime": 71.6227, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 1170 }, { "epoch": 4.5, "learning_rate": 0.00014017879948914432, "loss": 0.8048, "step": 1175 }, { "epoch": 4.5, "eval_accuracy": 0.6091954022988506, "eval_loss": 1.1002541780471802, "eval_runtime": 73.6156, "eval_samples_per_second": 2.364, "eval_steps_per_second": 0.299, "step": 1175 }, { "epoch": 4.52, "learning_rate": 0.00013992337164750958, "loss": 0.5951, "step": 1180 }, { "epoch": 4.52, "eval_accuracy": 0.6494252873563219, "eval_loss": 0.9471919536590576, "eval_runtime": 71.689, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 1180 }, { "epoch": 4.54, "learning_rate": 0.00013966794380587485, "loss": 0.603, "step": 1185 }, { "epoch": 4.54, "eval_accuracy": 0.5632183908045977, "eval_loss": 1.3395264148712158, "eval_runtime": 72.507, "eval_samples_per_second": 2.4, "eval_steps_per_second": 0.303, "step": 1185 }, { "epoch": 4.56, "learning_rate": 0.0001394125159642401, "loss": 1.0802, "step": 1190 }, { "epoch": 4.56, "eval_accuracy": 0.6149425287356322, "eval_loss": 1.052255392074585, "eval_runtime": 71.8669, "eval_samples_per_second": 2.421, "eval_steps_per_second": 0.306, "step": 1190 }, { "epoch": 4.58, "learning_rate": 0.00013915708812260537, "loss": 0.3689, "step": 1195 }, { "epoch": 4.58, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.0053684711456299, "eval_runtime": 72.2348, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 1195 }, { "epoch": 4.6, "learning_rate": 0.00013890166028097064, "loss": 0.5565, "step": 1200 }, { "epoch": 4.6, "eval_accuracy": 0.5862068965517241, "eval_loss": 1.400984525680542, "eval_runtime": 71.9603, "eval_samples_per_second": 2.418, "eval_steps_per_second": 0.306, "step": 1200 }, { "epoch": 4.62, "learning_rate": 0.0001386462324393359, "loss": 0.7934, "step": 1205 }, { "epoch": 4.62, "eval_accuracy": 0.5919540229885057, "eval_loss": 1.4771628379821777, "eval_runtime": 72.4137, "eval_samples_per_second": 2.403, "eval_steps_per_second": 0.304, "step": 1205 }, { "epoch": 4.64, "learning_rate": 0.00013839080459770116, "loss": 0.8364, "step": 1210 }, { "epoch": 4.64, "eval_accuracy": 0.632183908045977, "eval_loss": 1.2308330535888672, "eval_runtime": 71.7192, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 1210 }, { "epoch": 4.66, "learning_rate": 0.00013813537675606643, "loss": 0.8967, "step": 1215 }, { "epoch": 4.66, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.8904043436050415, "eval_runtime": 72.373, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 1215 }, { "epoch": 4.67, "learning_rate": 0.0001378799489144317, "loss": 0.5255, "step": 1220 }, { "epoch": 4.67, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.006020188331604, "eval_runtime": 73.6542, "eval_samples_per_second": 2.362, "eval_steps_per_second": 0.299, "step": 1220 }, { "epoch": 4.69, "learning_rate": 0.00013762452107279695, "loss": 0.693, "step": 1225 }, { "epoch": 4.69, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.9818925261497498, "eval_runtime": 72.3307, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 1225 }, { "epoch": 4.71, "learning_rate": 0.00013736909323116222, "loss": 1.1102, "step": 1230 }, { "epoch": 4.71, "eval_accuracy": 0.7241379310344828, "eval_loss": 0.8632426857948303, "eval_runtime": 71.648, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 1230 }, { "epoch": 4.73, "learning_rate": 0.00013711366538952745, "loss": 0.7586, "step": 1235 }, { "epoch": 4.73, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.8827661871910095, "eval_runtime": 72.3427, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.304, "step": 1235 }, { "epoch": 4.75, "learning_rate": 0.00013685823754789274, "loss": 0.5332, "step": 1240 }, { "epoch": 4.75, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.8106628656387329, "eval_runtime": 73.4839, "eval_samples_per_second": 2.368, "eval_steps_per_second": 0.299, "step": 1240 }, { "epoch": 4.77, "learning_rate": 0.00013660280970625798, "loss": 0.7737, "step": 1245 }, { "epoch": 4.77, "eval_accuracy": 0.7241379310344828, "eval_loss": 0.7830407023429871, "eval_runtime": 72.337, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.304, "step": 1245 }, { "epoch": 4.79, "learning_rate": 0.00013634738186462327, "loss": 0.8634, "step": 1250 }, { "epoch": 4.79, "eval_accuracy": 0.7068965517241379, "eval_loss": 0.799767792224884, "eval_runtime": 71.8401, "eval_samples_per_second": 2.422, "eval_steps_per_second": 0.306, "step": 1250 }, { "epoch": 4.81, "learning_rate": 0.0001360919540229885, "loss": 0.8592, "step": 1255 }, { "epoch": 4.81, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.8830769658088684, "eval_runtime": 73.1202, "eval_samples_per_second": 2.38, "eval_steps_per_second": 0.301, "step": 1255 }, { "epoch": 4.83, "learning_rate": 0.0001358365261813538, "loss": 0.3591, "step": 1260 }, { "epoch": 4.83, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.7183188796043396, "eval_runtime": 71.7735, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 1260 }, { "epoch": 4.85, "learning_rate": 0.00013558109833971903, "loss": 0.7214, "step": 1265 }, { "epoch": 4.85, "eval_accuracy": 0.7183908045977011, "eval_loss": 0.7452751398086548, "eval_runtime": 72.2803, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 1265 }, { "epoch": 4.87, "learning_rate": 0.0001353256704980843, "loss": 0.6555, "step": 1270 }, { "epoch": 4.87, "eval_accuracy": 0.6954022988505747, "eval_loss": 0.8549041748046875, "eval_runtime": 71.6872, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 1270 }, { "epoch": 4.89, "learning_rate": 0.00013507024265644956, "loss": 0.3736, "step": 1275 }, { "epoch": 4.89, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.8064850568771362, "eval_runtime": 72.2268, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 1275 }, { "epoch": 4.9, "learning_rate": 0.00013481481481481482, "loss": 0.6139, "step": 1280 }, { "epoch": 4.9, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.7727733850479126, "eval_runtime": 71.6667, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 1280 }, { "epoch": 4.92, "learning_rate": 0.00013455938697318009, "loss": 1.4349, "step": 1285 }, { "epoch": 4.92, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.7865081429481506, "eval_runtime": 72.3204, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 1285 }, { "epoch": 4.94, "learning_rate": 0.00013430395913154535, "loss": 0.6336, "step": 1290 }, { "epoch": 4.94, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.7983749508857727, "eval_runtime": 71.6401, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 1290 }, { "epoch": 4.96, "learning_rate": 0.0001340485312899106, "loss": 0.667, "step": 1295 }, { "epoch": 4.96, "eval_accuracy": 0.6494252873563219, "eval_loss": 1.0624412298202515, "eval_runtime": 72.1869, "eval_samples_per_second": 2.41, "eval_steps_per_second": 0.305, "step": 1295 }, { "epoch": 4.98, "learning_rate": 0.00013379310344827588, "loss": 0.4108, "step": 1300 }, { "epoch": 4.98, "eval_accuracy": 0.6781609195402298, "eval_loss": 0.9411900043487549, "eval_runtime": 72.8779, "eval_samples_per_second": 2.388, "eval_steps_per_second": 0.302, "step": 1300 }, { "epoch": 5.0, "learning_rate": 0.0001335376756066411, "loss": 1.0572, "step": 1305 }, { "epoch": 5.0, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.8950245380401611, "eval_runtime": 72.2902, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 1305 }, { "epoch": 5.02, "learning_rate": 0.0001332822477650064, "loss": 0.3411, "step": 1310 }, { "epoch": 5.02, "eval_accuracy": 0.735632183908046, "eval_loss": 0.8750669360160828, "eval_runtime": 73.4412, "eval_samples_per_second": 2.369, "eval_steps_per_second": 0.3, "step": 1310 }, { "epoch": 5.04, "learning_rate": 0.00013302681992337164, "loss": 0.3644, "step": 1315 }, { "epoch": 5.04, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.8155695796012878, "eval_runtime": 72.387, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 1315 }, { "epoch": 5.06, "learning_rate": 0.00013277139208173693, "loss": 0.2643, "step": 1320 }, { "epoch": 5.06, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.7830978631973267, "eval_runtime": 71.6267, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 1320 }, { "epoch": 5.08, "learning_rate": 0.00013251596424010217, "loss": 0.3629, "step": 1325 }, { "epoch": 5.08, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.644027829170227, "eval_runtime": 72.3202, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 1325 }, { "epoch": 5.1, "learning_rate": 0.00013226053639846746, "loss": 1.5008, "step": 1330 }, { "epoch": 5.1, "eval_accuracy": 0.6609195402298851, "eval_loss": 1.36289381980896, "eval_runtime": 73.5741, "eval_samples_per_second": 2.365, "eval_steps_per_second": 0.299, "step": 1330 }, { "epoch": 5.11, "learning_rate": 0.0001320051085568327, "loss": 1.1648, "step": 1335 }, { "epoch": 5.11, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.9745023250579834, "eval_runtime": 73.9076, "eval_samples_per_second": 2.354, "eval_steps_per_second": 0.298, "step": 1335 }, { "epoch": 5.13, "learning_rate": 0.00013174968071519796, "loss": 0.842, "step": 1340 }, { "epoch": 5.13, "eval_accuracy": 0.6781609195402298, "eval_loss": 0.9471400380134583, "eval_runtime": 73.3831, "eval_samples_per_second": 2.371, "eval_steps_per_second": 0.3, "step": 1340 }, { "epoch": 5.15, "learning_rate": 0.00013149425287356322, "loss": 0.4963, "step": 1345 }, { "epoch": 5.15, "eval_accuracy": 0.6379310344827587, "eval_loss": 1.1134085655212402, "eval_runtime": 74.2914, "eval_samples_per_second": 2.342, "eval_steps_per_second": 0.296, "step": 1345 }, { "epoch": 5.17, "learning_rate": 0.00013123882503192848, "loss": 0.6819, "step": 1350 }, { "epoch": 5.17, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.0473263263702393, "eval_runtime": 73.934, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.298, "step": 1350 }, { "epoch": 5.19, "learning_rate": 0.00013098339719029375, "loss": 0.2688, "step": 1355 }, { "epoch": 5.19, "eval_accuracy": 0.6091954022988506, "eval_loss": 1.323175311088562, "eval_runtime": 73.8927, "eval_samples_per_second": 2.355, "eval_steps_per_second": 0.298, "step": 1355 }, { "epoch": 5.21, "learning_rate": 0.000130727969348659, "loss": 0.7999, "step": 1360 }, { "epoch": 5.21, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.2077127695083618, "eval_runtime": 72.9338, "eval_samples_per_second": 2.386, "eval_steps_per_second": 0.302, "step": 1360 }, { "epoch": 5.23, "learning_rate": 0.00013047254150702427, "loss": 0.9447, "step": 1365 }, { "epoch": 5.23, "eval_accuracy": 0.632183908045977, "eval_loss": 1.119039535522461, "eval_runtime": 75.1877, "eval_samples_per_second": 2.314, "eval_steps_per_second": 0.293, "step": 1365 }, { "epoch": 5.25, "learning_rate": 0.00013021711366538954, "loss": 0.4866, "step": 1370 }, { "epoch": 5.25, "eval_accuracy": 0.5977011494252874, "eval_loss": 1.1412699222564697, "eval_runtime": 74.8277, "eval_samples_per_second": 2.325, "eval_steps_per_second": 0.294, "step": 1370 }, { "epoch": 5.27, "learning_rate": 0.0001299616858237548, "loss": 0.6268, "step": 1375 }, { "epoch": 5.27, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.1648024320602417, "eval_runtime": 74.1075, "eval_samples_per_second": 2.348, "eval_steps_per_second": 0.297, "step": 1375 }, { "epoch": 5.29, "learning_rate": 0.00012970625798212006, "loss": 1.0332, "step": 1380 }, { "epoch": 5.29, "eval_accuracy": 0.6264367816091954, "eval_loss": 1.0704519748687744, "eval_runtime": 73.409, "eval_samples_per_second": 2.37, "eval_steps_per_second": 0.3, "step": 1380 }, { "epoch": 5.31, "learning_rate": 0.00012945083014048533, "loss": 0.508, "step": 1385 }, { "epoch": 5.31, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.9115325212478638, "eval_runtime": 75.2816, "eval_samples_per_second": 2.311, "eval_steps_per_second": 0.292, "step": 1385 }, { "epoch": 5.33, "learning_rate": 0.0001291954022988506, "loss": 0.4844, "step": 1390 }, { "epoch": 5.33, "eval_accuracy": 0.7241379310344828, "eval_loss": 0.7564892768859863, "eval_runtime": 73.4276, "eval_samples_per_second": 2.37, "eval_steps_per_second": 0.3, "step": 1390 }, { "epoch": 5.34, "learning_rate": 0.00012893997445721583, "loss": 0.6529, "step": 1395 }, { "epoch": 5.34, "eval_accuracy": 0.7068965517241379, "eval_loss": 0.7532500624656677, "eval_runtime": 74.281, "eval_samples_per_second": 2.342, "eval_steps_per_second": 0.296, "step": 1395 }, { "epoch": 5.36, "learning_rate": 0.00012868454661558112, "loss": 0.674, "step": 1400 }, { "epoch": 5.36, "eval_accuracy": 0.6436781609195402, "eval_loss": 0.9024896621704102, "eval_runtime": 74.2674, "eval_samples_per_second": 2.343, "eval_steps_per_second": 0.296, "step": 1400 }, { "epoch": 5.38, "learning_rate": 0.00012842911877394635, "loss": 0.9236, "step": 1405 }, { "epoch": 5.38, "eval_accuracy": 0.7241379310344828, "eval_loss": 0.7951949834823608, "eval_runtime": 73.902, "eval_samples_per_second": 2.354, "eval_steps_per_second": 0.298, "step": 1405 }, { "epoch": 5.4, "learning_rate": 0.00012817369093231162, "loss": 0.523, "step": 1410 }, { "epoch": 5.4, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.7487069368362427, "eval_runtime": 74.6134, "eval_samples_per_second": 2.332, "eval_steps_per_second": 0.295, "step": 1410 }, { "epoch": 5.42, "learning_rate": 0.00012791826309067688, "loss": 0.3512, "step": 1415 }, { "epoch": 5.42, "eval_accuracy": 0.6781609195402298, "eval_loss": 0.9300501942634583, "eval_runtime": 74.1082, "eval_samples_per_second": 2.348, "eval_steps_per_second": 0.297, "step": 1415 }, { "epoch": 5.44, "learning_rate": 0.00012766283524904214, "loss": 0.5621, "step": 1420 }, { "epoch": 5.44, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.9454444050788879, "eval_runtime": 75.121, "eval_samples_per_second": 2.316, "eval_steps_per_second": 0.293, "step": 1420 }, { "epoch": 5.46, "learning_rate": 0.0001274074074074074, "loss": 0.3093, "step": 1425 }, { "epoch": 5.46, "eval_accuracy": 0.6954022988505747, "eval_loss": 0.8725366592407227, "eval_runtime": 74.2816, "eval_samples_per_second": 2.342, "eval_steps_per_second": 0.296, "step": 1425 }, { "epoch": 5.48, "learning_rate": 0.00012715197956577267, "loss": 0.5955, "step": 1430 }, { "epoch": 5.48, "eval_accuracy": 0.7068965517241379, "eval_loss": 0.8809516429901123, "eval_runtime": 73.6012, "eval_samples_per_second": 2.364, "eval_steps_per_second": 0.299, "step": 1430 }, { "epoch": 5.5, "learning_rate": 0.00012689655172413793, "loss": 0.2704, "step": 1435 }, { "epoch": 5.5, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.9333508610725403, "eval_runtime": 74.0008, "eval_samples_per_second": 2.351, "eval_steps_per_second": 0.297, "step": 1435 }, { "epoch": 5.52, "learning_rate": 0.0001266411238825032, "loss": 0.3438, "step": 1440 }, { "epoch": 5.52, "eval_accuracy": 0.6839080459770115, "eval_loss": 0.9162984490394592, "eval_runtime": 73.7475, "eval_samples_per_second": 2.359, "eval_steps_per_second": 0.298, "step": 1440 }, { "epoch": 5.54, "learning_rate": 0.00012638569604086846, "loss": 0.2719, "step": 1445 }, { "epoch": 5.54, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.9165803790092468, "eval_runtime": 73.1738, "eval_samples_per_second": 2.378, "eval_steps_per_second": 0.301, "step": 1445 }, { "epoch": 5.56, "learning_rate": 0.00012613026819923372, "loss": 0.4038, "step": 1450 }, { "epoch": 5.56, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.951847493648529, "eval_runtime": 73.7477, "eval_samples_per_second": 2.359, "eval_steps_per_second": 0.298, "step": 1450 }, { "epoch": 5.57, "learning_rate": 0.00012587484035759899, "loss": 0.8747, "step": 1455 }, { "epoch": 5.57, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.006085753440857, "eval_runtime": 73.6005, "eval_samples_per_second": 2.364, "eval_steps_per_second": 0.299, "step": 1455 }, { "epoch": 5.59, "learning_rate": 0.00012561941251596425, "loss": 0.5741, "step": 1460 }, { "epoch": 5.59, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.0438010692596436, "eval_runtime": 71.8134, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 1460 }, { "epoch": 5.61, "learning_rate": 0.0001253639846743295, "loss": 0.732, "step": 1465 }, { "epoch": 5.61, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.0875169038772583, "eval_runtime": 72.3827, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 1465 }, { "epoch": 5.63, "learning_rate": 0.00012510855683269478, "loss": 0.5971, "step": 1470 }, { "epoch": 5.63, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.062251091003418, "eval_runtime": 71.8803, "eval_samples_per_second": 2.421, "eval_steps_per_second": 0.306, "step": 1470 }, { "epoch": 5.65, "learning_rate": 0.00012485312899106004, "loss": 0.6264, "step": 1475 }, { "epoch": 5.65, "eval_accuracy": 0.6954022988505747, "eval_loss": 0.9514502882957458, "eval_runtime": 72.7337, "eval_samples_per_second": 2.392, "eval_steps_per_second": 0.302, "step": 1475 }, { "epoch": 5.67, "learning_rate": 0.00012459770114942528, "loss": 0.681, "step": 1480 }, { "epoch": 5.67, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.0149379968643188, "eval_runtime": 71.7065, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 1480 }, { "epoch": 5.69, "learning_rate": 0.00012434227330779057, "loss": 0.8418, "step": 1485 }, { "epoch": 5.69, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.1708879470825195, "eval_runtime": 74.2125, "eval_samples_per_second": 2.345, "eval_steps_per_second": 0.296, "step": 1485 }, { "epoch": 5.71, "learning_rate": 0.0001240868454661558, "loss": 0.5313, "step": 1490 }, { "epoch": 5.71, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.0242153406143188, "eval_runtime": 71.6182, "eval_samples_per_second": 2.43, "eval_steps_per_second": 0.307, "step": 1490 }, { "epoch": 5.73, "learning_rate": 0.0001238314176245211, "loss": 0.5052, "step": 1495 }, { "epoch": 5.73, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.254513144493103, "eval_runtime": 72.1503, "eval_samples_per_second": 2.412, "eval_steps_per_second": 0.305, "step": 1495 }, { "epoch": 5.75, "learning_rate": 0.00012357598978288633, "loss": 1.002, "step": 1500 }, { "epoch": 5.75, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.1145654916763306, "eval_runtime": 71.7734, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 1500 }, { "epoch": 5.77, "learning_rate": 0.00012332056194125162, "loss": 0.4721, "step": 1505 }, { "epoch": 5.77, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.091664433479309, "eval_runtime": 74.1208, "eval_samples_per_second": 2.348, "eval_steps_per_second": 0.297, "step": 1505 }, { "epoch": 5.79, "learning_rate": 0.00012306513409961686, "loss": 0.3988, "step": 1510 }, { "epoch": 5.79, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.0722365379333496, "eval_runtime": 73.3159, "eval_samples_per_second": 2.373, "eval_steps_per_second": 0.3, "step": 1510 }, { "epoch": 5.8, "learning_rate": 0.00012280970625798212, "loss": 0.8568, "step": 1515 }, { "epoch": 5.8, "eval_accuracy": 0.7068965517241379, "eval_loss": 0.8946850895881653, "eval_runtime": 72.2801, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 1515 }, { "epoch": 5.82, "learning_rate": 0.00012255427841634738, "loss": 0.4908, "step": 1520 }, { "epoch": 5.82, "eval_accuracy": 0.7241379310344828, "eval_loss": 0.8863809704780579, "eval_runtime": 71.7599, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 1520 }, { "epoch": 5.84, "learning_rate": 0.00012229885057471265, "loss": 0.5216, "step": 1525 }, { "epoch": 5.84, "eval_accuracy": 0.7068965517241379, "eval_loss": 0.9632396697998047, "eval_runtime": 72.267, "eval_samples_per_second": 2.408, "eval_steps_per_second": 0.304, "step": 1525 }, { "epoch": 5.86, "learning_rate": 0.00012204342273307792, "loss": 0.7425, "step": 1530 }, { "epoch": 5.86, "eval_accuracy": 0.735632183908046, "eval_loss": 0.828774631023407, "eval_runtime": 73.4716, "eval_samples_per_second": 2.368, "eval_steps_per_second": 0.299, "step": 1530 }, { "epoch": 5.88, "learning_rate": 0.00012178799489144317, "loss": 0.5288, "step": 1535 }, { "epoch": 5.88, "eval_accuracy": 0.6609195402298851, "eval_loss": 1.1222902536392212, "eval_runtime": 72.267, "eval_samples_per_second": 2.408, "eval_steps_per_second": 0.304, "step": 1535 }, { "epoch": 5.9, "learning_rate": 0.00012153256704980845, "loss": 0.7604, "step": 1540 }, { "epoch": 5.9, "eval_accuracy": 0.6436781609195402, "eval_loss": 1.065313458442688, "eval_runtime": 71.7733, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 1540 }, { "epoch": 5.92, "learning_rate": 0.0001212771392081737, "loss": 0.4761, "step": 1545 }, { "epoch": 5.92, "eval_accuracy": 0.6896551724137931, "eval_loss": 0.9595795273780823, "eval_runtime": 72.1868, "eval_samples_per_second": 2.41, "eval_steps_per_second": 0.305, "step": 1545 }, { "epoch": 5.94, "learning_rate": 0.00012102171136653895, "loss": 0.7556, "step": 1550 }, { "epoch": 5.94, "eval_accuracy": 0.6609195402298851, "eval_loss": 0.9294580817222595, "eval_runtime": 71.6133, "eval_samples_per_second": 2.43, "eval_steps_per_second": 0.307, "step": 1550 }, { "epoch": 5.96, "learning_rate": 0.00012076628352490423, "loss": 0.7834, "step": 1555 }, { "epoch": 5.96, "eval_accuracy": 0.6551724137931034, "eval_loss": 0.9481978416442871, "eval_runtime": 72.222, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 1555 }, { "epoch": 5.98, "learning_rate": 0.00012051085568326948, "loss": 0.8625, "step": 1560 }, { "epoch": 5.98, "eval_accuracy": 0.6781609195402298, "eval_loss": 0.9149760007858276, "eval_runtime": 71.6268, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 1560 }, { "epoch": 6.0, "learning_rate": 0.00012025542784163475, "loss": 0.4823, "step": 1565 }, { "epoch": 6.0, "eval_accuracy": 0.6954022988505747, "eval_loss": 0.8527363538742065, "eval_runtime": 72.4135, "eval_samples_per_second": 2.403, "eval_steps_per_second": 0.304, "step": 1565 }, { "epoch": 6.02, "learning_rate": 0.00012, "loss": 0.6611, "step": 1570 }, { "epoch": 6.02, "eval_accuracy": 0.6609195402298851, "eval_loss": 1.1094207763671875, "eval_runtime": 71.6394, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 1570 }, { "epoch": 6.03, "learning_rate": 0.00011974457215836528, "loss": 0.4618, "step": 1575 }, { "epoch": 6.03, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.9192668199539185, "eval_runtime": 74.1876, "eval_samples_per_second": 2.345, "eval_steps_per_second": 0.297, "step": 1575 }, { "epoch": 6.05, "learning_rate": 0.00011948914431673053, "loss": 0.3332, "step": 1580 }, { "epoch": 6.05, "eval_accuracy": 0.6839080459770115, "eval_loss": 0.8720999360084534, "eval_runtime": 73.8274, "eval_samples_per_second": 2.357, "eval_steps_per_second": 0.298, "step": 1580 }, { "epoch": 6.07, "learning_rate": 0.00011923371647509578, "loss": 0.4447, "step": 1585 }, { "epoch": 6.07, "eval_accuracy": 0.7413793103448276, "eval_loss": 0.8002211451530457, "eval_runtime": 74.081, "eval_samples_per_second": 2.349, "eval_steps_per_second": 0.297, "step": 1585 }, { "epoch": 6.09, "learning_rate": 0.00011897828863346106, "loss": 0.4332, "step": 1590 }, { "epoch": 6.09, "eval_accuracy": 0.7126436781609196, "eval_loss": 0.8471765518188477, "eval_runtime": 71.7734, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 1590 }, { "epoch": 6.11, "learning_rate": 0.0001187228607918263, "loss": 0.3504, "step": 1595 }, { "epoch": 6.11, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.8792366981506348, "eval_runtime": 72.3203, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 1595 }, { "epoch": 6.13, "learning_rate": 0.00011846743295019158, "loss": 0.1991, "step": 1600 }, { "epoch": 6.13, "eval_accuracy": 0.7011494252873564, "eval_loss": 0.9491644501686096, "eval_runtime": 71.7555, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 1600 }, { "epoch": 6.15, "learning_rate": 0.00011821200510855683, "loss": 0.2901, "step": 1605 }, { "epoch": 6.15, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.0009914636611938, "eval_runtime": 72.2758, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 1605 }, { "epoch": 6.17, "learning_rate": 0.00011795657726692211, "loss": 0.6851, "step": 1610 }, { "epoch": 6.17, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.0788379907608032, "eval_runtime": 71.7844, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 1610 }, { "epoch": 6.19, "learning_rate": 0.00011770114942528736, "loss": 0.5596, "step": 1615 }, { "epoch": 6.19, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.04649019241333, "eval_runtime": 72.2936, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 1615 }, { "epoch": 6.21, "learning_rate": 0.00011744572158365264, "loss": 0.4059, "step": 1620 }, { "epoch": 6.21, "eval_accuracy": 0.7413793103448276, "eval_loss": 0.9852347373962402, "eval_runtime": 71.7201, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 1620 }, { "epoch": 6.23, "learning_rate": 0.00011719029374201789, "loss": 0.4061, "step": 1625 }, { "epoch": 6.23, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.0554276704788208, "eval_runtime": 73.1205, "eval_samples_per_second": 2.38, "eval_steps_per_second": 0.301, "step": 1625 }, { "epoch": 6.25, "learning_rate": 0.00011693486590038314, "loss": 0.2499, "step": 1630 }, { "epoch": 6.25, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.0310966968536377, "eval_runtime": 72.0535, "eval_samples_per_second": 2.415, "eval_steps_per_second": 0.305, "step": 1630 }, { "epoch": 6.26, "learning_rate": 0.00011667943805874841, "loss": 0.3739, "step": 1635 }, { "epoch": 6.26, "eval_accuracy": 0.7701149425287356, "eval_loss": 0.8703017234802246, "eval_runtime": 74.24, "eval_samples_per_second": 2.344, "eval_steps_per_second": 0.296, "step": 1635 }, { "epoch": 6.28, "learning_rate": 0.00011642401021711366, "loss": 0.4323, "step": 1640 }, { "epoch": 6.28, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.0342254638671875, "eval_runtime": 71.7608, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 1640 }, { "epoch": 6.3, "learning_rate": 0.00011616858237547894, "loss": 0.417, "step": 1645 }, { "epoch": 6.3, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.004945993423462, "eval_runtime": 72.2243, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 1645 }, { "epoch": 6.32, "learning_rate": 0.00011591315453384419, "loss": 0.4087, "step": 1650 }, { "epoch": 6.32, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.0639106035232544, "eval_runtime": 71.646, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 1650 }, { "epoch": 6.34, "learning_rate": 0.00011565772669220947, "loss": 0.3422, "step": 1655 }, { "epoch": 6.34, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.1308996677398682, "eval_runtime": 73.9785, "eval_samples_per_second": 2.352, "eval_steps_per_second": 0.297, "step": 1655 }, { "epoch": 6.36, "learning_rate": 0.00011540229885057472, "loss": 0.4619, "step": 1660 }, { "epoch": 6.36, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.0824979543685913, "eval_runtime": 71.7053, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 1660 }, { "epoch": 6.38, "learning_rate": 0.00011514687100893997, "loss": 0.4767, "step": 1665 }, { "epoch": 6.38, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.046342134475708, "eval_runtime": 72.3509, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.304, "step": 1665 }, { "epoch": 6.4, "learning_rate": 0.00011489144316730524, "loss": 0.4737, "step": 1670 }, { "epoch": 6.4, "eval_accuracy": 0.7183908045977011, "eval_loss": 0.9138516783714294, "eval_runtime": 71.5867, "eval_samples_per_second": 2.431, "eval_steps_per_second": 0.307, "step": 1670 }, { "epoch": 6.42, "learning_rate": 0.00011463601532567049, "loss": 0.2711, "step": 1675 }, { "epoch": 6.42, "eval_accuracy": 0.7183908045977011, "eval_loss": 0.9746137857437134, "eval_runtime": 72.2002, "eval_samples_per_second": 2.41, "eval_steps_per_second": 0.305, "step": 1675 }, { "epoch": 6.44, "learning_rate": 0.00011438058748403577, "loss": 0.3956, "step": 1680 }, { "epoch": 6.44, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.1271107196807861, "eval_runtime": 71.5339, "eval_samples_per_second": 2.432, "eval_steps_per_second": 0.308, "step": 1680 }, { "epoch": 6.46, "learning_rate": 0.00011412515964240102, "loss": 0.5138, "step": 1685 }, { "epoch": 6.46, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.204206109046936, "eval_runtime": 73.8943, "eval_samples_per_second": 2.355, "eval_steps_per_second": 0.298, "step": 1685 }, { "epoch": 6.48, "learning_rate": 0.0001138697318007663, "loss": 0.3022, "step": 1690 }, { "epoch": 6.48, "eval_accuracy": 0.6436781609195402, "eval_loss": 1.4267605543136597, "eval_runtime": 73.7212, "eval_samples_per_second": 2.36, "eval_steps_per_second": 0.298, "step": 1690 }, { "epoch": 6.49, "learning_rate": 0.00011361430395913155, "loss": 0.4592, "step": 1695 }, { "epoch": 6.49, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.6285024881362915, "eval_runtime": 72.3063, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 1695 }, { "epoch": 6.51, "learning_rate": 0.00011335887611749681, "loss": 0.7306, "step": 1700 }, { "epoch": 6.51, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.4132676124572754, "eval_runtime": 73.4541, "eval_samples_per_second": 2.369, "eval_steps_per_second": 0.3, "step": 1700 }, { "epoch": 6.53, "learning_rate": 0.00011310344827586207, "loss": 0.2819, "step": 1705 }, { "epoch": 6.53, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.0855623483657837, "eval_runtime": 72.8007, "eval_samples_per_second": 2.39, "eval_steps_per_second": 0.302, "step": 1705 }, { "epoch": 6.55, "learning_rate": 0.00011284802043422734, "loss": 0.3341, "step": 1710 }, { "epoch": 6.55, "eval_accuracy": 0.6436781609195402, "eval_loss": 1.190242052078247, "eval_runtime": 73.4674, "eval_samples_per_second": 2.368, "eval_steps_per_second": 0.299, "step": 1710 }, { "epoch": 6.57, "learning_rate": 0.0001125925925925926, "loss": 0.2632, "step": 1715 }, { "epoch": 6.57, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.2406810522079468, "eval_runtime": 72.2134, "eval_samples_per_second": 2.41, "eval_steps_per_second": 0.305, "step": 1715 }, { "epoch": 6.59, "learning_rate": 0.00011233716475095786, "loss": 0.3776, "step": 1720 }, { "epoch": 6.59, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.3052959442138672, "eval_runtime": 71.6934, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 1720 }, { "epoch": 6.61, "learning_rate": 0.00011208173690932313, "loss": 0.4002, "step": 1725 }, { "epoch": 6.61, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.335128903388977, "eval_runtime": 72.1244, "eval_samples_per_second": 2.412, "eval_steps_per_second": 0.305, "step": 1725 }, { "epoch": 6.63, "learning_rate": 0.00011182630906768839, "loss": 0.4399, "step": 1730 }, { "epoch": 6.63, "eval_accuracy": 0.6494252873563219, "eval_loss": 1.5893123149871826, "eval_runtime": 71.5155, "eval_samples_per_second": 2.433, "eval_steps_per_second": 0.308, "step": 1730 }, { "epoch": 6.65, "learning_rate": 0.00011157088122605364, "loss": 0.7733, "step": 1735 }, { "epoch": 6.65, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.5668152570724487, "eval_runtime": 72.1994, "eval_samples_per_second": 2.41, "eval_steps_per_second": 0.305, "step": 1735 }, { "epoch": 6.67, "learning_rate": 0.00011131545338441892, "loss": 0.5065, "step": 1740 }, { "epoch": 6.67, "eval_accuracy": 0.6494252873563219, "eval_loss": 1.554033637046814, "eval_runtime": 72.0935, "eval_samples_per_second": 2.414, "eval_steps_per_second": 0.305, "step": 1740 }, { "epoch": 6.69, "learning_rate": 0.00011106002554278417, "loss": 0.8585, "step": 1745 }, { "epoch": 6.69, "eval_accuracy": 0.6206896551724138, "eval_loss": 1.5956915616989136, "eval_runtime": 72.2252, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 1745 }, { "epoch": 6.7, "learning_rate": 0.00011080459770114944, "loss": 0.798, "step": 1750 }, { "epoch": 6.7, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.2651970386505127, "eval_runtime": 71.7377, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 1750 }, { "epoch": 6.72, "learning_rate": 0.00011054916985951469, "loss": 0.6362, "step": 1755 }, { "epoch": 6.72, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.0616276264190674, "eval_runtime": 72.227, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 1755 }, { "epoch": 6.74, "learning_rate": 0.00011029374201787997, "loss": 0.6891, "step": 1760 }, { "epoch": 6.74, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.1532150506973267, "eval_runtime": 73.3739, "eval_samples_per_second": 2.371, "eval_steps_per_second": 0.3, "step": 1760 }, { "epoch": 6.76, "learning_rate": 0.00011003831417624522, "loss": 0.4885, "step": 1765 }, { "epoch": 6.76, "eval_accuracy": 0.7126436781609196, "eval_loss": 0.964917778968811, "eval_runtime": 72.3603, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.304, "step": 1765 }, { "epoch": 6.78, "learning_rate": 0.00010978288633461047, "loss": 0.2212, "step": 1770 }, { "epoch": 6.78, "eval_accuracy": 0.7126436781609196, "eval_loss": 0.8999590873718262, "eval_runtime": 71.6665, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 1770 }, { "epoch": 6.8, "learning_rate": 0.00010952745849297575, "loss": 0.3838, "step": 1775 }, { "epoch": 6.8, "eval_accuracy": 0.7183908045977011, "eval_loss": 0.9244369268417358, "eval_runtime": 72.3203, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 1775 }, { "epoch": 6.82, "learning_rate": 0.000109272030651341, "loss": 0.4909, "step": 1780 }, { "epoch": 6.82, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.8321231603622437, "eval_runtime": 73.7474, "eval_samples_per_second": 2.359, "eval_steps_per_second": 0.298, "step": 1780 }, { "epoch": 6.84, "learning_rate": 0.00010901660280970627, "loss": 0.3924, "step": 1785 }, { "epoch": 6.84, "eval_accuracy": 0.764367816091954, "eval_loss": 0.7822464108467102, "eval_runtime": 72.3997, "eval_samples_per_second": 2.403, "eval_steps_per_second": 0.304, "step": 1785 }, { "epoch": 6.86, "learning_rate": 0.00010876117496807152, "loss": 0.5337, "step": 1790 }, { "epoch": 6.86, "eval_accuracy": 0.7528735632183908, "eval_loss": 0.8443244099617004, "eval_runtime": 71.6807, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 1790 }, { "epoch": 6.88, "learning_rate": 0.0001085057471264368, "loss": 0.2258, "step": 1795 }, { "epoch": 6.88, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.8795732259750366, "eval_runtime": 72.2803, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 1795 }, { "epoch": 6.9, "learning_rate": 0.00010825031928480205, "loss": 0.496, "step": 1800 }, { "epoch": 6.9, "eval_accuracy": 0.7298850574712644, "eval_loss": 0.9166645407676697, "eval_runtime": 73.3739, "eval_samples_per_second": 2.371, "eval_steps_per_second": 0.3, "step": 1800 }, { "epoch": 6.92, "learning_rate": 0.0001079948914431673, "loss": 0.6166, "step": 1805 }, { "epoch": 6.92, "eval_accuracy": 0.7068965517241379, "eval_loss": 0.969086766242981, "eval_runtime": 72.2001, "eval_samples_per_second": 2.41, "eval_steps_per_second": 0.305, "step": 1805 }, { "epoch": 6.93, "learning_rate": 0.00010773946360153258, "loss": 0.1913, "step": 1810 }, { "epoch": 6.93, "eval_accuracy": 0.7413793103448276, "eval_loss": 0.9656856060028076, "eval_runtime": 73.6808, "eval_samples_per_second": 2.362, "eval_steps_per_second": 0.299, "step": 1810 }, { "epoch": 6.95, "learning_rate": 0.00010748403575989783, "loss": 0.5242, "step": 1815 }, { "epoch": 6.95, "eval_accuracy": 0.6609195402298851, "eval_loss": 1.2142490148544312, "eval_runtime": 72.4203, "eval_samples_per_second": 2.403, "eval_steps_per_second": 0.304, "step": 1815 }, { "epoch": 6.97, "learning_rate": 0.0001072286079182631, "loss": 0.8631, "step": 1820 }, { "epoch": 6.97, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.0861307382583618, "eval_runtime": 71.8136, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 1820 }, { "epoch": 6.99, "learning_rate": 0.00010697318007662835, "loss": 0.1719, "step": 1825 }, { "epoch": 6.99, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.0733994245529175, "eval_runtime": 72.1759, "eval_samples_per_second": 2.411, "eval_steps_per_second": 0.305, "step": 1825 }, { "epoch": 7.01, "learning_rate": 0.00010671775223499363, "loss": 0.3511, "step": 1830 }, { "epoch": 7.01, "eval_accuracy": 0.7528735632183908, "eval_loss": 0.9313093423843384, "eval_runtime": 71.8799, "eval_samples_per_second": 2.421, "eval_steps_per_second": 0.306, "step": 1830 }, { "epoch": 7.03, "learning_rate": 0.00010646232439335888, "loss": 0.314, "step": 1835 }, { "epoch": 7.03, "eval_accuracy": 0.7471264367816092, "eval_loss": 0.9551235437393188, "eval_runtime": 75.0945, "eval_samples_per_second": 2.317, "eval_steps_per_second": 0.293, "step": 1835 }, { "epoch": 7.05, "learning_rate": 0.00010620689655172413, "loss": 0.1106, "step": 1840 }, { "epoch": 7.05, "eval_accuracy": 0.7586206896551724, "eval_loss": 0.9258528351783752, "eval_runtime": 74.9492, "eval_samples_per_second": 2.322, "eval_steps_per_second": 0.294, "step": 1840 }, { "epoch": 7.07, "learning_rate": 0.0001059514687100894, "loss": 0.0608, "step": 1845 }, { "epoch": 7.07, "eval_accuracy": 0.7413793103448276, "eval_loss": 0.960231363773346, "eval_runtime": 74.015, "eval_samples_per_second": 2.351, "eval_steps_per_second": 0.297, "step": 1845 }, { "epoch": 7.09, "learning_rate": 0.00010569604086845466, "loss": 0.1119, "step": 1850 }, { "epoch": 7.09, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.0473183393478394, "eval_runtime": 75.0678, "eval_samples_per_second": 2.318, "eval_steps_per_second": 0.293, "step": 1850 }, { "epoch": 7.11, "learning_rate": 0.00010544061302681993, "loss": 0.153, "step": 1855 }, { "epoch": 7.11, "eval_accuracy": 0.735632183908046, "eval_loss": 1.066941499710083, "eval_runtime": 73.2783, "eval_samples_per_second": 2.375, "eval_steps_per_second": 0.3, "step": 1855 }, { "epoch": 7.13, "learning_rate": 0.00010518518518518518, "loss": 0.0751, "step": 1860 }, { "epoch": 7.13, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.1262329816818237, "eval_runtime": 73.4273, "eval_samples_per_second": 2.37, "eval_steps_per_second": 0.3, "step": 1860 }, { "epoch": 7.15, "learning_rate": 0.00010492975734355046, "loss": 0.0407, "step": 1865 }, { "epoch": 7.15, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.2633436918258667, "eval_runtime": 75.5481, "eval_samples_per_second": 2.303, "eval_steps_per_second": 0.291, "step": 1865 }, { "epoch": 7.16, "learning_rate": 0.00010467432950191571, "loss": 0.0824, "step": 1870 }, { "epoch": 7.16, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.3817542791366577, "eval_runtime": 75.1612, "eval_samples_per_second": 2.315, "eval_steps_per_second": 0.293, "step": 1870 }, { "epoch": 7.18, "learning_rate": 0.00010441890166028096, "loss": 0.6322, "step": 1875 }, { "epoch": 7.18, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.281891942024231, "eval_runtime": 76.8685, "eval_samples_per_second": 2.264, "eval_steps_per_second": 0.286, "step": 1875 }, { "epoch": 7.2, "learning_rate": 0.00010416347381864624, "loss": 0.1277, "step": 1880 }, { "epoch": 7.2, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.1478445529937744, "eval_runtime": 72.8672, "eval_samples_per_second": 2.388, "eval_steps_per_second": 0.302, "step": 1880 }, { "epoch": 7.22, "learning_rate": 0.00010390804597701149, "loss": 0.1752, "step": 1885 }, { "epoch": 7.22, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.2275943756103516, "eval_runtime": 74.0318, "eval_samples_per_second": 2.35, "eval_steps_per_second": 0.297, "step": 1885 }, { "epoch": 7.24, "learning_rate": 0.00010365261813537676, "loss": 0.4279, "step": 1890 }, { "epoch": 7.24, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.32535719871521, "eval_runtime": 73.7203, "eval_samples_per_second": 2.36, "eval_steps_per_second": 0.298, "step": 1890 }, { "epoch": 7.26, "learning_rate": 0.00010339719029374201, "loss": 0.1829, "step": 1895 }, { "epoch": 7.26, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.1822688579559326, "eval_runtime": 73.8624, "eval_samples_per_second": 2.356, "eval_steps_per_second": 0.298, "step": 1895 }, { "epoch": 7.28, "learning_rate": 0.00010314176245210729, "loss": 0.7235, "step": 1900 }, { "epoch": 7.28, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.2413686513900757, "eval_runtime": 73.5875, "eval_samples_per_second": 2.365, "eval_steps_per_second": 0.299, "step": 1900 }, { "epoch": 7.3, "learning_rate": 0.00010288633461047254, "loss": 0.1977, "step": 1905 }, { "epoch": 7.3, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.193420648574829, "eval_runtime": 75.4148, "eval_samples_per_second": 2.307, "eval_steps_per_second": 0.292, "step": 1905 }, { "epoch": 7.32, "learning_rate": 0.0001026309067688378, "loss": 0.2008, "step": 1910 }, { "epoch": 7.32, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.1319164037704468, "eval_runtime": 73.4679, "eval_samples_per_second": 2.368, "eval_steps_per_second": 0.299, "step": 1910 }, { "epoch": 7.34, "learning_rate": 0.00010237547892720307, "loss": 0.3229, "step": 1915 }, { "epoch": 7.34, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.2466601133346558, "eval_runtime": 74.0142, "eval_samples_per_second": 2.351, "eval_steps_per_second": 0.297, "step": 1915 }, { "epoch": 7.36, "learning_rate": 0.00010212005108556833, "loss": 0.2794, "step": 1920 }, { "epoch": 7.36, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.3975579738616943, "eval_runtime": 73.5307, "eval_samples_per_second": 2.366, "eval_steps_per_second": 0.299, "step": 1920 }, { "epoch": 7.38, "learning_rate": 0.00010186462324393359, "loss": 0.6104, "step": 1925 }, { "epoch": 7.38, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.4495618343353271, "eval_runtime": 75.3871, "eval_samples_per_second": 2.308, "eval_steps_per_second": 0.292, "step": 1925 }, { "epoch": 7.39, "learning_rate": 0.00010160919540229886, "loss": 0.3981, "step": 1930 }, { "epoch": 7.39, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.4250156879425049, "eval_runtime": 75.0034, "eval_samples_per_second": 2.32, "eval_steps_per_second": 0.293, "step": 1930 }, { "epoch": 7.41, "learning_rate": 0.00010135376756066412, "loss": 0.4133, "step": 1935 }, { "epoch": 7.41, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.3902997970581055, "eval_runtime": 74.0941, "eval_samples_per_second": 2.348, "eval_steps_per_second": 0.297, "step": 1935 }, { "epoch": 7.43, "learning_rate": 0.00010109833971902938, "loss": 0.2211, "step": 1940 }, { "epoch": 7.43, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.2912284135818481, "eval_runtime": 75.5221, "eval_samples_per_second": 2.304, "eval_steps_per_second": 0.291, "step": 1940 }, { "epoch": 7.45, "learning_rate": 0.00010084291187739463, "loss": 0.5127, "step": 1945 }, { "epoch": 7.45, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.1819477081298828, "eval_runtime": 76.0017, "eval_samples_per_second": 2.289, "eval_steps_per_second": 0.289, "step": 1945 }, { "epoch": 7.47, "learning_rate": 0.00010058748403575991, "loss": 0.4466, "step": 1950 }, { "epoch": 7.47, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.1182750463485718, "eval_runtime": 73.33, "eval_samples_per_second": 2.373, "eval_steps_per_second": 0.3, "step": 1950 }, { "epoch": 7.49, "learning_rate": 0.00010033205619412516, "loss": 0.3296, "step": 1955 }, { "epoch": 7.49, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.1901663541793823, "eval_runtime": 74.5747, "eval_samples_per_second": 2.333, "eval_steps_per_second": 0.295, "step": 1955 }, { "epoch": 7.51, "learning_rate": 0.00010007662835249044, "loss": 0.2157, "step": 1960 }, { "epoch": 7.51, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.127490520477295, "eval_runtime": 71.6666, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 1960 }, { "epoch": 7.53, "learning_rate": 9.982120051085569e-05, "loss": 0.1349, "step": 1965 }, { "epoch": 7.53, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.1302645206451416, "eval_runtime": 72.3204, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 1965 }, { "epoch": 7.55, "learning_rate": 9.956577266922095e-05, "loss": 0.2552, "step": 1970 }, { "epoch": 7.55, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.1957188844680786, "eval_runtime": 71.7737, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 1970 }, { "epoch": 7.57, "learning_rate": 9.931034482758621e-05, "loss": 0.0794, "step": 1975 }, { "epoch": 7.57, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.1891249418258667, "eval_runtime": 72.3104, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 1975 }, { "epoch": 7.59, "learning_rate": 9.905491698595148e-05, "loss": 0.3412, "step": 1980 }, { "epoch": 7.59, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.1822692155838013, "eval_runtime": 71.8274, "eval_samples_per_second": 2.422, "eval_steps_per_second": 0.306, "step": 1980 }, { "epoch": 7.61, "learning_rate": 9.879948914431674e-05, "loss": 0.066, "step": 1985 }, { "epoch": 7.61, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.2583097219467163, "eval_runtime": 72.2537, "eval_samples_per_second": 2.408, "eval_steps_per_second": 0.304, "step": 1985 }, { "epoch": 7.62, "learning_rate": 9.8544061302682e-05, "loss": 0.1778, "step": 1990 }, { "epoch": 7.62, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.46968412399292, "eval_runtime": 71.9868, "eval_samples_per_second": 2.417, "eval_steps_per_second": 0.306, "step": 1990 }, { "epoch": 7.64, "learning_rate": 9.828863346104727e-05, "loss": 0.2946, "step": 1995 }, { "epoch": 7.64, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.4956047534942627, "eval_runtime": 72.3605, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.304, "step": 1995 }, { "epoch": 7.66, "learning_rate": 9.803320561941252e-05, "loss": 0.5025, "step": 2000 }, { "epoch": 7.66, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.2947438955307007, "eval_runtime": 73.1739, "eval_samples_per_second": 2.378, "eval_steps_per_second": 0.301, "step": 2000 }, { "epoch": 7.68, "learning_rate": 9.777777777777778e-05, "loss": 0.1465, "step": 2005 }, { "epoch": 7.68, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.4249370098114014, "eval_runtime": 72.9688, "eval_samples_per_second": 2.385, "eval_steps_per_second": 0.301, "step": 2005 }, { "epoch": 7.7, "learning_rate": 9.752234993614304e-05, "loss": 0.4608, "step": 2010 }, { "epoch": 7.7, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.4163832664489746, "eval_runtime": 71.6967, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 2010 }, { "epoch": 7.72, "learning_rate": 9.72669220945083e-05, "loss": 0.0132, "step": 2015 }, { "epoch": 7.72, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.4231857061386108, "eval_runtime": 72.3309, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 2015 }, { "epoch": 7.74, "learning_rate": 9.701149425287357e-05, "loss": 0.3514, "step": 2020 }, { "epoch": 7.74, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.3790318965911865, "eval_runtime": 71.6757, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 2020 }, { "epoch": 7.76, "learning_rate": 9.675606641123883e-05, "loss": 0.1211, "step": 2025 }, { "epoch": 7.76, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.3964256048202515, "eval_runtime": 72.213, "eval_samples_per_second": 2.41, "eval_steps_per_second": 0.305, "step": 2025 }, { "epoch": 7.78, "learning_rate": 9.65006385696041e-05, "loss": 0.2947, "step": 2030 }, { "epoch": 7.78, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.3286200761795044, "eval_runtime": 71.7334, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 2030 }, { "epoch": 7.8, "learning_rate": 9.624521072796935e-05, "loss": 0.49, "step": 2035 }, { "epoch": 7.8, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.0810390710830688, "eval_runtime": 72.3336, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 2035 }, { "epoch": 7.82, "learning_rate": 9.598978288633461e-05, "loss": 0.165, "step": 2040 }, { "epoch": 7.82, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.024723768234253, "eval_runtime": 71.7199, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 2040 }, { "epoch": 7.84, "learning_rate": 9.573435504469987e-05, "loss": 0.138, "step": 2045 }, { "epoch": 7.84, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.224605679512024, "eval_runtime": 72.3204, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 2045 }, { "epoch": 7.85, "learning_rate": 9.547892720306514e-05, "loss": 0.2485, "step": 2050 }, { "epoch": 7.85, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.214393138885498, "eval_runtime": 71.5732, "eval_samples_per_second": 2.431, "eval_steps_per_second": 0.307, "step": 2050 }, { "epoch": 7.87, "learning_rate": 9.52234993614304e-05, "loss": 0.2188, "step": 2055 }, { "epoch": 7.87, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.2269409894943237, "eval_runtime": 73.9874, "eval_samples_per_second": 2.352, "eval_steps_per_second": 0.297, "step": 2055 }, { "epoch": 7.89, "learning_rate": 9.496807151979566e-05, "loss": 0.0995, "step": 2060 }, { "epoch": 7.89, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.3358938694000244, "eval_runtime": 73.7608, "eval_samples_per_second": 2.359, "eval_steps_per_second": 0.298, "step": 2060 }, { "epoch": 7.91, "learning_rate": 9.471264367816093e-05, "loss": 0.4366, "step": 2065 }, { "epoch": 7.91, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.3700920343399048, "eval_runtime": 72.2954, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 2065 }, { "epoch": 7.93, "learning_rate": 9.445721583652618e-05, "loss": 0.2468, "step": 2070 }, { "epoch": 7.93, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.5623064041137695, "eval_runtime": 71.8518, "eval_samples_per_second": 2.422, "eval_steps_per_second": 0.306, "step": 2070 }, { "epoch": 7.95, "learning_rate": 9.420178799489144e-05, "loss": 0.5595, "step": 2075 }, { "epoch": 7.95, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.3248261213302612, "eval_runtime": 72.2402, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 2075 }, { "epoch": 7.97, "learning_rate": 9.39463601532567e-05, "loss": 0.1288, "step": 2080 }, { "epoch": 7.97, "eval_accuracy": 0.735632183908046, "eval_loss": 1.1915298700332642, "eval_runtime": 73.574, "eval_samples_per_second": 2.365, "eval_steps_per_second": 0.299, "step": 2080 }, { "epoch": 7.99, "learning_rate": 9.369093231162197e-05, "loss": 0.6715, "step": 2085 }, { "epoch": 7.99, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.2690212726593018, "eval_runtime": 72.3729, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 2085 }, { "epoch": 8.01, "learning_rate": 9.343550446998723e-05, "loss": 0.2428, "step": 2090 }, { "epoch": 8.01, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.2000457048416138, "eval_runtime": 73.1206, "eval_samples_per_second": 2.38, "eval_steps_per_second": 0.301, "step": 2090 }, { "epoch": 8.03, "learning_rate": 9.318007662835249e-05, "loss": 0.1369, "step": 2095 }, { "epoch": 8.03, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.337141752243042, "eval_runtime": 72.28, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 2095 }, { "epoch": 8.05, "learning_rate": 9.292464878671776e-05, "loss": 0.0289, "step": 2100 }, { "epoch": 8.05, "eval_accuracy": 0.6551724137931034, "eval_loss": 1.6684503555297852, "eval_runtime": 71.8209, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 2100 }, { "epoch": 8.07, "learning_rate": 9.266922094508302e-05, "loss": 0.2577, "step": 2105 }, { "epoch": 8.07, "eval_accuracy": 0.6494252873563219, "eval_loss": 1.607564091682434, "eval_runtime": 74.3746, "eval_samples_per_second": 2.34, "eval_steps_per_second": 0.296, "step": 2105 }, { "epoch": 8.08, "learning_rate": 9.241379310344827e-05, "loss": 0.1756, "step": 2110 }, { "epoch": 8.08, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.4137848615646362, "eval_runtime": 71.6266, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 2110 }, { "epoch": 8.1, "learning_rate": 9.215836526181353e-05, "loss": 0.1628, "step": 2115 }, { "epoch": 8.1, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.380110502243042, "eval_runtime": 74.2276, "eval_samples_per_second": 2.344, "eval_steps_per_second": 0.296, "step": 2115 }, { "epoch": 8.12, "learning_rate": 9.19029374201788e-05, "loss": 0.0913, "step": 2120 }, { "epoch": 8.12, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.3470419645309448, "eval_runtime": 73.3072, "eval_samples_per_second": 2.374, "eval_steps_per_second": 0.3, "step": 2120 }, { "epoch": 8.14, "learning_rate": 9.164750957854406e-05, "loss": 0.0892, "step": 2125 }, { "epoch": 8.14, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.4106884002685547, "eval_runtime": 73.9475, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.298, "step": 2125 }, { "epoch": 8.16, "learning_rate": 9.139208173690932e-05, "loss": 0.3874, "step": 2130 }, { "epoch": 8.16, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.3398877382278442, "eval_runtime": 71.8799, "eval_samples_per_second": 2.421, "eval_steps_per_second": 0.306, "step": 2130 }, { "epoch": 8.18, "learning_rate": 9.113665389527459e-05, "loss": 0.1405, "step": 2135 }, { "epoch": 8.18, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.3665359020233154, "eval_runtime": 74.1921, "eval_samples_per_second": 2.345, "eval_steps_per_second": 0.297, "step": 2135 }, { "epoch": 8.2, "learning_rate": 9.088122605363985e-05, "loss": 0.0921, "step": 2140 }, { "epoch": 8.2, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.4718743562698364, "eval_runtime": 71.6672, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 2140 }, { "epoch": 8.22, "learning_rate": 9.062579821200511e-05, "loss": 0.1429, "step": 2145 }, { "epoch": 8.22, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.484999179840088, "eval_runtime": 72.347, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.304, "step": 2145 }, { "epoch": 8.24, "learning_rate": 9.037037037037038e-05, "loss": 0.1498, "step": 2150 }, { "epoch": 8.24, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.41448974609375, "eval_runtime": 73.4407, "eval_samples_per_second": 2.369, "eval_steps_per_second": 0.3, "step": 2150 }, { "epoch": 8.26, "learning_rate": 9.011494252873564e-05, "loss": 0.004, "step": 2155 }, { "epoch": 8.26, "eval_accuracy": 0.735632183908046, "eval_loss": 1.3708915710449219, "eval_runtime": 72.3246, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 2155 }, { "epoch": 8.28, "learning_rate": 8.98595146871009e-05, "loss": 0.0364, "step": 2160 }, { "epoch": 8.28, "eval_accuracy": 0.735632183908046, "eval_loss": 1.3031058311462402, "eval_runtime": 71.8044, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 2160 }, { "epoch": 8.3, "learning_rate": 8.960408684546617e-05, "loss": 0.0544, "step": 2165 }, { "epoch": 8.3, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.41126549243927, "eval_runtime": 74.1212, "eval_samples_per_second": 2.348, "eval_steps_per_second": 0.297, "step": 2165 }, { "epoch": 8.31, "learning_rate": 8.934865900383143e-05, "loss": 0.0952, "step": 2170 }, { "epoch": 8.31, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.532741904258728, "eval_runtime": 71.6535, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 2170 }, { "epoch": 8.33, "learning_rate": 8.90932311621967e-05, "loss": 0.0006, "step": 2175 }, { "epoch": 8.33, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.6938968896865845, "eval_runtime": 72.2802, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 2175 }, { "epoch": 8.35, "learning_rate": 8.883780332056194e-05, "loss": 0.2597, "step": 2180 }, { "epoch": 8.35, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.5596327781677246, "eval_runtime": 71.7186, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 2180 }, { "epoch": 8.37, "learning_rate": 8.85823754789272e-05, "loss": 0.2526, "step": 2185 }, { "epoch": 8.37, "eval_accuracy": 0.735632183908046, "eval_loss": 1.5019148588180542, "eval_runtime": 73.0896, "eval_samples_per_second": 2.381, "eval_steps_per_second": 0.301, "step": 2185 }, { "epoch": 8.39, "learning_rate": 8.832694763729247e-05, "loss": 0.102, "step": 2190 }, { "epoch": 8.39, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.612549901008606, "eval_runtime": 71.7757, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 2190 }, { "epoch": 8.41, "learning_rate": 8.807151979565773e-05, "loss": 0.02, "step": 2195 }, { "epoch": 8.41, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.7707464694976807, "eval_runtime": 72.3245, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 2195 }, { "epoch": 8.43, "learning_rate": 8.7816091954023e-05, "loss": 0.0062, "step": 2200 }, { "epoch": 8.43, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.5734792947769165, "eval_runtime": 73.3522, "eval_samples_per_second": 2.372, "eval_steps_per_second": 0.3, "step": 2200 }, { "epoch": 8.45, "learning_rate": 8.756066411238826e-05, "loss": 0.0429, "step": 2205 }, { "epoch": 8.45, "eval_accuracy": 0.735632183908046, "eval_loss": 1.4174237251281738, "eval_runtime": 72.276, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 2205 }, { "epoch": 8.47, "learning_rate": 8.730523627075352e-05, "loss": 0.272, "step": 2210 }, { "epoch": 8.47, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.4774861335754395, "eval_runtime": 72.0492, "eval_samples_per_second": 2.415, "eval_steps_per_second": 0.305, "step": 2210 }, { "epoch": 8.49, "learning_rate": 8.704980842911877e-05, "loss": 0.1014, "step": 2215 }, { "epoch": 8.49, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.556422472000122, "eval_runtime": 73.9875, "eval_samples_per_second": 2.352, "eval_steps_per_second": 0.297, "step": 2215 }, { "epoch": 8.51, "learning_rate": 8.679438058748404e-05, "loss": 0.1191, "step": 2220 }, { "epoch": 8.51, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.6515696048736572, "eval_runtime": 71.7867, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 2220 }, { "epoch": 8.52, "learning_rate": 8.65389527458493e-05, "loss": 0.1466, "step": 2225 }, { "epoch": 8.52, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.5438907146453857, "eval_runtime": 72.413, "eval_samples_per_second": 2.403, "eval_steps_per_second": 0.304, "step": 2225 }, { "epoch": 8.54, "learning_rate": 8.628352490421456e-05, "loss": 0.033, "step": 2230 }, { "epoch": 8.54, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.6586909294128418, "eval_runtime": 71.5867, "eval_samples_per_second": 2.431, "eval_steps_per_second": 0.307, "step": 2230 }, { "epoch": 8.56, "learning_rate": 8.602809706257983e-05, "loss": 0.4028, "step": 2235 }, { "epoch": 8.56, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.5925533771514893, "eval_runtime": 74.2608, "eval_samples_per_second": 2.343, "eval_steps_per_second": 0.296, "step": 2235 }, { "epoch": 8.58, "learning_rate": 8.577266922094509e-05, "loss": 0.29, "step": 2240 }, { "epoch": 8.58, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.4162274599075317, "eval_runtime": 71.6928, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 2240 }, { "epoch": 8.6, "learning_rate": 8.551724137931035e-05, "loss": 0.082, "step": 2245 }, { "epoch": 8.6, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.4092352390289307, "eval_runtime": 72.1601, "eval_samples_per_second": 2.411, "eval_steps_per_second": 0.305, "step": 2245 }, { "epoch": 8.62, "learning_rate": 8.52618135376756e-05, "loss": 0.0273, "step": 2250 }, { "epoch": 8.62, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.4052162170410156, "eval_runtime": 71.6429, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 2250 }, { "epoch": 8.64, "learning_rate": 8.500638569604087e-05, "loss": 0.2974, "step": 2255 }, { "epoch": 8.64, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.4225624799728394, "eval_runtime": 72.3814, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 2255 }, { "epoch": 8.66, "learning_rate": 8.475095785440613e-05, "loss": 0.7249, "step": 2260 }, { "epoch": 8.66, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.3933783769607544, "eval_runtime": 71.7734, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 2260 }, { "epoch": 8.68, "learning_rate": 8.449553001277139e-05, "loss": 0.1874, "step": 2265 }, { "epoch": 8.68, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.3755295276641846, "eval_runtime": 73.9422, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.298, "step": 2265 }, { "epoch": 8.7, "learning_rate": 8.424010217113666e-05, "loss": 0.0365, "step": 2270 }, { "epoch": 8.7, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.3688724040985107, "eval_runtime": 71.7961, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 2270 }, { "epoch": 8.72, "learning_rate": 8.398467432950192e-05, "loss": 0.1775, "step": 2275 }, { "epoch": 8.72, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.3662774562835693, "eval_runtime": 72.2725, "eval_samples_per_second": 2.408, "eval_steps_per_second": 0.304, "step": 2275 }, { "epoch": 8.74, "learning_rate": 8.372924648786718e-05, "loss": 0.0974, "step": 2280 }, { "epoch": 8.74, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.3852037191390991, "eval_runtime": 71.6891, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 2280 }, { "epoch": 8.75, "learning_rate": 8.347381864623243e-05, "loss": 0.1109, "step": 2285 }, { "epoch": 8.75, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.4648981094360352, "eval_runtime": 72.2936, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 2285 }, { "epoch": 8.77, "learning_rate": 8.32183908045977e-05, "loss": 0.2778, "step": 2290 }, { "epoch": 8.77, "eval_accuracy": 0.6781609195402298, "eval_loss": 1.435415267944336, "eval_runtime": 71.7606, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 2290 }, { "epoch": 8.79, "learning_rate": 8.296296296296296e-05, "loss": 0.0384, "step": 2295 }, { "epoch": 8.79, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.3910033702850342, "eval_runtime": 72.3796, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 2295 }, { "epoch": 8.81, "learning_rate": 8.270753512132822e-05, "loss": 0.1499, "step": 2300 }, { "epoch": 8.81, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.4266196489334106, "eval_runtime": 71.6688, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 2300 }, { "epoch": 8.83, "learning_rate": 8.245210727969349e-05, "loss": 0.0703, "step": 2305 }, { "epoch": 8.83, "eval_accuracy": 0.735632183908046, "eval_loss": 1.3794021606445312, "eval_runtime": 73.9299, "eval_samples_per_second": 2.354, "eval_steps_per_second": 0.298, "step": 2305 }, { "epoch": 8.85, "learning_rate": 8.219667943805875e-05, "loss": 0.2238, "step": 2310 }, { "epoch": 8.85, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.4704707860946655, "eval_runtime": 71.631, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 2310 }, { "epoch": 8.87, "learning_rate": 8.194125159642401e-05, "loss": 0.2418, "step": 2315 }, { "epoch": 8.87, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.4477012157440186, "eval_runtime": 73.9608, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.297, "step": 2315 }, { "epoch": 8.89, "learning_rate": 8.168582375478928e-05, "loss": 0.0854, "step": 2320 }, { "epoch": 8.89, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.453401803970337, "eval_runtime": 71.72, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 2320 }, { "epoch": 8.91, "learning_rate": 8.143039591315454e-05, "loss": 0.1613, "step": 2325 }, { "epoch": 8.91, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.5824450254440308, "eval_runtime": 72.4535, "eval_samples_per_second": 2.402, "eval_steps_per_second": 0.304, "step": 2325 }, { "epoch": 8.93, "learning_rate": 8.11749680715198e-05, "loss": 0.0599, "step": 2330 }, { "epoch": 8.93, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.5845459699630737, "eval_runtime": 73.534, "eval_samples_per_second": 2.366, "eval_steps_per_second": 0.299, "step": 2330 }, { "epoch": 8.95, "learning_rate": 8.091954022988507e-05, "loss": 0.2216, "step": 2335 }, { "epoch": 8.95, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.5560506582260132, "eval_runtime": 72.8271, "eval_samples_per_second": 2.389, "eval_steps_per_second": 0.302, "step": 2335 }, { "epoch": 8.97, "learning_rate": 8.066411238825033e-05, "loss": 0.023, "step": 2340 }, { "epoch": 8.97, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.5869792699813843, "eval_runtime": 71.7467, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 2340 }, { "epoch": 8.98, "learning_rate": 8.04086845466156e-05, "loss": 0.0166, "step": 2345 }, { "epoch": 8.98, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.6614141464233398, "eval_runtime": 72.2937, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 2345 }, { "epoch": 9.0, "learning_rate": 8.015325670498086e-05, "loss": 0.1114, "step": 2350 }, { "epoch": 9.0, "eval_accuracy": 0.735632183908046, "eval_loss": 1.5962401628494263, "eval_runtime": 71.8444, "eval_samples_per_second": 2.422, "eval_steps_per_second": 0.306, "step": 2350 }, { "epoch": 9.02, "learning_rate": 7.989782886334612e-05, "loss": 0.0254, "step": 2355 }, { "epoch": 9.02, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.578687310218811, "eval_runtime": 73.975, "eval_samples_per_second": 2.352, "eval_steps_per_second": 0.297, "step": 2355 }, { "epoch": 9.04, "learning_rate": 7.964240102171137e-05, "loss": 0.0807, "step": 2360 }, { "epoch": 9.04, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.6112695932388306, "eval_runtime": 73.1873, "eval_samples_per_second": 2.377, "eval_steps_per_second": 0.301, "step": 2360 }, { "epoch": 9.06, "learning_rate": 7.938697318007663e-05, "loss": 0.0151, "step": 2365 }, { "epoch": 9.06, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.6175737380981445, "eval_runtime": 72.2137, "eval_samples_per_second": 2.41, "eval_steps_per_second": 0.305, "step": 2365 }, { "epoch": 9.08, "learning_rate": 7.91315453384419e-05, "loss": 0.0234, "step": 2370 }, { "epoch": 9.08, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.6412409543991089, "eval_runtime": 73.4407, "eval_samples_per_second": 2.369, "eval_steps_per_second": 0.3, "step": 2370 }, { "epoch": 9.1, "learning_rate": 7.887611749680716e-05, "loss": 0.0901, "step": 2375 }, { "epoch": 9.1, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.6331058740615845, "eval_runtime": 72.4003, "eval_samples_per_second": 2.403, "eval_steps_per_second": 0.304, "step": 2375 }, { "epoch": 9.12, "learning_rate": 7.862068965517242e-05, "loss": 0.0032, "step": 2380 }, { "epoch": 9.12, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.7730814218521118, "eval_runtime": 71.8133, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 2380 }, { "epoch": 9.14, "learning_rate": 7.836526181353769e-05, "loss": 0.0657, "step": 2385 }, { "epoch": 9.14, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.7673834562301636, "eval_runtime": 72.4937, "eval_samples_per_second": 2.4, "eval_steps_per_second": 0.303, "step": 2385 }, { "epoch": 9.16, "learning_rate": 7.810983397190295e-05, "loss": 0.0035, "step": 2390 }, { "epoch": 9.16, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.7309309244155884, "eval_runtime": 71.574, "eval_samples_per_second": 2.431, "eval_steps_per_second": 0.307, "step": 2390 }, { "epoch": 9.18, "learning_rate": 7.78544061302682e-05, "loss": 0.019, "step": 2395 }, { "epoch": 9.18, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6792755126953125, "eval_runtime": 74.3875, "eval_samples_per_second": 2.339, "eval_steps_per_second": 0.296, "step": 2395 }, { "epoch": 9.2, "learning_rate": 7.759897828863346e-05, "loss": 0.0038, "step": 2400 }, { "epoch": 9.2, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.7092453241348267, "eval_runtime": 71.7278, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 2400 }, { "epoch": 9.21, "learning_rate": 7.734355044699873e-05, "loss": 0.0061, "step": 2405 }, { "epoch": 9.21, "eval_accuracy": 0.735632183908046, "eval_loss": 1.726812720298767, "eval_runtime": 72.2916, "eval_samples_per_second": 2.407, "eval_steps_per_second": 0.304, "step": 2405 }, { "epoch": 9.23, "learning_rate": 7.708812260536399e-05, "loss": 0.0077, "step": 2410 }, { "epoch": 9.23, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6786487102508545, "eval_runtime": 71.8439, "eval_samples_per_second": 2.422, "eval_steps_per_second": 0.306, "step": 2410 }, { "epoch": 9.25, "learning_rate": 7.683269476372925e-05, "loss": 0.1325, "step": 2415 }, { "epoch": 9.25, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7274757623672485, "eval_runtime": 72.374, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 2415 }, { "epoch": 9.27, "learning_rate": 7.657726692209452e-05, "loss": 0.013, "step": 2420 }, { "epoch": 9.27, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.8993436098098755, "eval_runtime": 73.5113, "eval_samples_per_second": 2.367, "eval_steps_per_second": 0.299, "step": 2420 }, { "epoch": 9.29, "learning_rate": 7.632183908045978e-05, "loss": 0.2623, "step": 2425 }, { "epoch": 9.29, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.9386109113693237, "eval_runtime": 74.2008, "eval_samples_per_second": 2.345, "eval_steps_per_second": 0.296, "step": 2425 }, { "epoch": 9.31, "learning_rate": 7.606641123882503e-05, "loss": 0.1043, "step": 2430 }, { "epoch": 9.31, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7973520755767822, "eval_runtime": 71.6127, "eval_samples_per_second": 2.43, "eval_steps_per_second": 0.307, "step": 2430 }, { "epoch": 9.33, "learning_rate": 7.581098339719029e-05, "loss": 0.2192, "step": 2435 }, { "epoch": 9.33, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.622019648551941, "eval_runtime": 73.9624, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.297, "step": 2435 }, { "epoch": 9.35, "learning_rate": 7.555555555555556e-05, "loss": 0.0019, "step": 2440 }, { "epoch": 9.35, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6389633417129517, "eval_runtime": 71.6133, "eval_samples_per_second": 2.43, "eval_steps_per_second": 0.307, "step": 2440 }, { "epoch": 9.37, "learning_rate": 7.530012771392082e-05, "loss": 0.0072, "step": 2445 }, { "epoch": 9.37, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.765933632850647, "eval_runtime": 74.1608, "eval_samples_per_second": 2.346, "eval_steps_per_second": 0.297, "step": 2445 }, { "epoch": 9.39, "learning_rate": 7.504469987228608e-05, "loss": 0.1955, "step": 2450 }, { "epoch": 9.39, "eval_accuracy": 0.6724137931034483, "eval_loss": 1.865466594696045, "eval_runtime": 71.8, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 2450 }, { "epoch": 9.41, "learning_rate": 7.478927203065135e-05, "loss": 0.0232, "step": 2455 }, { "epoch": 9.41, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.654495120048523, "eval_runtime": 74.2804, "eval_samples_per_second": 2.342, "eval_steps_per_second": 0.296, "step": 2455 }, { "epoch": 9.43, "learning_rate": 7.453384418901661e-05, "loss": 0.0479, "step": 2460 }, { "epoch": 9.43, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.589800238609314, "eval_runtime": 73.7881, "eval_samples_per_second": 2.358, "eval_steps_per_second": 0.298, "step": 2460 }, { "epoch": 9.44, "learning_rate": 7.427841634738186e-05, "loss": 0.0284, "step": 2465 }, { "epoch": 9.44, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6431066989898682, "eval_runtime": 74.4192, "eval_samples_per_second": 2.338, "eval_steps_per_second": 0.296, "step": 2465 }, { "epoch": 9.46, "learning_rate": 7.402298850574712e-05, "loss": 0.2414, "step": 2470 }, { "epoch": 9.46, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6797583103179932, "eval_runtime": 73.2814, "eval_samples_per_second": 2.374, "eval_steps_per_second": 0.3, "step": 2470 }, { "epoch": 9.48, "learning_rate": 7.376756066411239e-05, "loss": 0.037, "step": 2475 }, { "epoch": 9.48, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.7022534608840942, "eval_runtime": 76.0683, "eval_samples_per_second": 2.287, "eval_steps_per_second": 0.289, "step": 2475 }, { "epoch": 9.5, "learning_rate": 7.351213282247765e-05, "loss": 0.2024, "step": 2480 }, { "epoch": 9.5, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.7409052848815918, "eval_runtime": 75.5479, "eval_samples_per_second": 2.303, "eval_steps_per_second": 0.291, "step": 2480 }, { "epoch": 9.52, "learning_rate": 7.325670498084291e-05, "loss": 0.0136, "step": 2485 }, { "epoch": 9.52, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.5980173349380493, "eval_runtime": 74.9877, "eval_samples_per_second": 2.32, "eval_steps_per_second": 0.293, "step": 2485 }, { "epoch": 9.54, "learning_rate": 7.300127713920818e-05, "loss": 0.0064, "step": 2490 }, { "epoch": 9.54, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.572400450706482, "eval_runtime": 74.8005, "eval_samples_per_second": 2.326, "eval_steps_per_second": 0.294, "step": 2490 }, { "epoch": 9.56, "learning_rate": 7.274584929757344e-05, "loss": 0.0092, "step": 2495 }, { "epoch": 9.56, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5245319604873657, "eval_runtime": 73.814, "eval_samples_per_second": 2.357, "eval_steps_per_second": 0.298, "step": 2495 }, { "epoch": 9.58, "learning_rate": 7.24904214559387e-05, "loss": 0.0231, "step": 2500 }, { "epoch": 9.58, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.4510209560394287, "eval_runtime": 73.161, "eval_samples_per_second": 2.378, "eval_steps_per_second": 0.301, "step": 2500 }, { "epoch": 9.6, "learning_rate": 7.223499361430395e-05, "loss": 0.0152, "step": 2505 }, { "epoch": 9.6, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.5502389669418335, "eval_runtime": 76.6679, "eval_samples_per_second": 2.27, "eval_steps_per_second": 0.287, "step": 2505 }, { "epoch": 9.62, "learning_rate": 7.197956577266922e-05, "loss": 0.1021, "step": 2510 }, { "epoch": 9.62, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.6658586263656616, "eval_runtime": 75.2815, "eval_samples_per_second": 2.311, "eval_steps_per_second": 0.292, "step": 2510 }, { "epoch": 9.64, "learning_rate": 7.172413793103448e-05, "loss": 0.0043, "step": 2515 }, { "epoch": 9.64, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.735673189163208, "eval_runtime": 73.8942, "eval_samples_per_second": 2.355, "eval_steps_per_second": 0.298, "step": 2515 }, { "epoch": 9.66, "learning_rate": 7.146871008939974e-05, "loss": 0.0011, "step": 2520 }, { "epoch": 9.66, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.8052144050598145, "eval_runtime": 75.1879, "eval_samples_per_second": 2.314, "eval_steps_per_second": 0.293, "step": 2520 }, { "epoch": 9.67, "learning_rate": 7.1213282247765e-05, "loss": 0.1036, "step": 2525 }, { "epoch": 9.67, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.8808355331420898, "eval_runtime": 74.8079, "eval_samples_per_second": 2.326, "eval_steps_per_second": 0.294, "step": 2525 }, { "epoch": 9.69, "learning_rate": 7.095785440613027e-05, "loss": 0.0641, "step": 2530 }, { "epoch": 9.69, "eval_accuracy": 0.6839080459770115, "eval_loss": 1.7997287511825562, "eval_runtime": 73.4276, "eval_samples_per_second": 2.37, "eval_steps_per_second": 0.3, "step": 2530 }, { "epoch": 9.71, "learning_rate": 7.070242656449553e-05, "loss": 0.0263, "step": 2535 }, { "epoch": 9.71, "eval_accuracy": 0.6896551724137931, "eval_loss": 1.8069945573806763, "eval_runtime": 74.1898, "eval_samples_per_second": 2.345, "eval_steps_per_second": 0.297, "step": 2535 }, { "epoch": 9.73, "learning_rate": 7.04469987228608e-05, "loss": 0.1702, "step": 2540 }, { "epoch": 9.73, "eval_accuracy": 0.6954022988505747, "eval_loss": 1.8327751159667969, "eval_runtime": 73.9343, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.298, "step": 2540 }, { "epoch": 9.75, "learning_rate": 7.019157088122606e-05, "loss": 0.134, "step": 2545 }, { "epoch": 9.75, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.8617277145385742, "eval_runtime": 75.8555, "eval_samples_per_second": 2.294, "eval_steps_per_second": 0.29, "step": 2545 }, { "epoch": 9.77, "learning_rate": 6.993614303959132e-05, "loss": 0.1743, "step": 2550 }, { "epoch": 9.77, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.8030837774276733, "eval_runtime": 75.2054, "eval_samples_per_second": 2.314, "eval_steps_per_second": 0.293, "step": 2550 }, { "epoch": 9.79, "learning_rate": 6.968071519795659e-05, "loss": 0.0431, "step": 2555 }, { "epoch": 9.79, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.720353364944458, "eval_runtime": 73.8007, "eval_samples_per_second": 2.358, "eval_steps_per_second": 0.298, "step": 2555 }, { "epoch": 9.81, "learning_rate": 6.942528735632185e-05, "loss": 0.1325, "step": 2560 }, { "epoch": 9.81, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.6215603351593018, "eval_runtime": 74.9611, "eval_samples_per_second": 2.321, "eval_steps_per_second": 0.293, "step": 2560 }, { "epoch": 9.83, "learning_rate": 6.916985951468711e-05, "loss": 0.0021, "step": 2565 }, { "epoch": 9.83, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6373803615570068, "eval_runtime": 74.0676, "eval_samples_per_second": 2.349, "eval_steps_per_second": 0.297, "step": 2565 }, { "epoch": 9.85, "learning_rate": 6.891443167305238e-05, "loss": 0.0177, "step": 2570 }, { "epoch": 9.85, "eval_accuracy": 0.7011494252873564, "eval_loss": 1.7743432521820068, "eval_runtime": 73.9209, "eval_samples_per_second": 2.354, "eval_steps_per_second": 0.298, "step": 2570 }, { "epoch": 9.87, "learning_rate": 6.865900383141763e-05, "loss": 0.0059, "step": 2575 }, { "epoch": 9.87, "eval_accuracy": 0.7068965517241379, "eval_loss": 1.7778594493865967, "eval_runtime": 73.9917, "eval_samples_per_second": 2.352, "eval_steps_per_second": 0.297, "step": 2575 }, { "epoch": 9.89, "learning_rate": 6.840357598978289e-05, "loss": 0.0019, "step": 2580 }, { "epoch": 9.89, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.670163869857788, "eval_runtime": 75.1748, "eval_samples_per_second": 2.315, "eval_steps_per_second": 0.293, "step": 2580 }, { "epoch": 9.9, "learning_rate": 6.814814814814815e-05, "loss": 0.3578, "step": 2585 }, { "epoch": 9.9, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6000075340270996, "eval_runtime": 74.6944, "eval_samples_per_second": 2.329, "eval_steps_per_second": 0.295, "step": 2585 }, { "epoch": 9.92, "learning_rate": 6.789272030651342e-05, "loss": 0.0012, "step": 2590 }, { "epoch": 9.92, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.5551468133926392, "eval_runtime": 73.4262, "eval_samples_per_second": 2.37, "eval_steps_per_second": 0.3, "step": 2590 }, { "epoch": 9.94, "learning_rate": 6.763729246487868e-05, "loss": 0.0325, "step": 2595 }, { "epoch": 9.94, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.5792527198791504, "eval_runtime": 72.3934, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 2595 }, { "epoch": 9.96, "learning_rate": 6.738186462324394e-05, "loss": 0.0029, "step": 2600 }, { "epoch": 9.96, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.6270909309387207, "eval_runtime": 71.7327, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 2600 }, { "epoch": 9.98, "learning_rate": 6.71264367816092e-05, "loss": 0.0894, "step": 2605 }, { "epoch": 9.98, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.6158068180084229, "eval_runtime": 72.1335, "eval_samples_per_second": 2.412, "eval_steps_per_second": 0.305, "step": 2605 }, { "epoch": 10.0, "learning_rate": 6.687100893997446e-05, "loss": 0.0841, "step": 2610 }, { "epoch": 10.0, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6193947792053223, "eval_runtime": 71.5867, "eval_samples_per_second": 2.431, "eval_steps_per_second": 0.307, "step": 2610 }, { "epoch": 10.02, "learning_rate": 6.661558109833972e-05, "loss": 0.0199, "step": 2615 }, { "epoch": 10.02, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6935107707977295, "eval_runtime": 72.1289, "eval_samples_per_second": 2.412, "eval_steps_per_second": 0.305, "step": 2615 }, { "epoch": 10.04, "learning_rate": 6.636015325670498e-05, "loss": 0.0164, "step": 2620 }, { "epoch": 10.04, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.7199974060058594, "eval_runtime": 71.6036, "eval_samples_per_second": 2.43, "eval_steps_per_second": 0.307, "step": 2620 }, { "epoch": 10.06, "learning_rate": 6.610472541507025e-05, "loss": 0.017, "step": 2625 }, { "epoch": 10.06, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7973920106887817, "eval_runtime": 72.3533, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.304, "step": 2625 }, { "epoch": 10.08, "learning_rate": 6.584929757343551e-05, "loss": 0.0005, "step": 2630 }, { "epoch": 10.08, "eval_accuracy": 0.735632183908046, "eval_loss": 1.9108890295028687, "eval_runtime": 73.2282, "eval_samples_per_second": 2.376, "eval_steps_per_second": 0.3, "step": 2630 }, { "epoch": 10.1, "learning_rate": 6.559386973180077e-05, "loss": 0.0272, "step": 2635 }, { "epoch": 10.1, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.9884196519851685, "eval_runtime": 73.6979, "eval_samples_per_second": 2.361, "eval_steps_per_second": 0.299, "step": 2635 }, { "epoch": 10.11, "learning_rate": 6.533844189016604e-05, "loss": 0.0006, "step": 2640 }, { "epoch": 10.11, "eval_accuracy": 0.7241379310344828, "eval_loss": 2.0442349910736084, "eval_runtime": 71.6401, "eval_samples_per_second": 2.429, "eval_steps_per_second": 0.307, "step": 2640 }, { "epoch": 10.13, "learning_rate": 6.508301404853129e-05, "loss": 0.0003, "step": 2645 }, { "epoch": 10.13, "eval_accuracy": 0.735632183908046, "eval_loss": 2.0899100303649902, "eval_runtime": 74.2411, "eval_samples_per_second": 2.344, "eval_steps_per_second": 0.296, "step": 2645 }, { "epoch": 10.15, "learning_rate": 6.482758620689655e-05, "loss": 0.0064, "step": 2650 }, { "epoch": 10.15, "eval_accuracy": 0.7298850574712644, "eval_loss": 2.0910024642944336, "eval_runtime": 71.604, "eval_samples_per_second": 2.43, "eval_steps_per_second": 0.307, "step": 2650 }, { "epoch": 10.17, "learning_rate": 6.457215836526181e-05, "loss": 0.3947, "step": 2655 }, { "epoch": 10.17, "eval_accuracy": 0.7241379310344828, "eval_loss": 2.110867500305176, "eval_runtime": 72.2402, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 2655 }, { "epoch": 10.19, "learning_rate": 6.431673052362708e-05, "loss": 0.0026, "step": 2660 }, { "epoch": 10.19, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.997544765472412, "eval_runtime": 73.4539, "eval_samples_per_second": 2.369, "eval_steps_per_second": 0.3, "step": 2660 }, { "epoch": 10.21, "learning_rate": 6.406130268199234e-05, "loss": 0.0017, "step": 2665 }, { "epoch": 10.21, "eval_accuracy": 0.735632183908046, "eval_loss": 1.9001383781433105, "eval_runtime": 72.3167, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 2665 }, { "epoch": 10.23, "learning_rate": 6.38058748403576e-05, "loss": 0.0457, "step": 2670 }, { "epoch": 10.23, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7431246042251587, "eval_runtime": 71.7066, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 2670 }, { "epoch": 10.25, "learning_rate": 6.355044699872287e-05, "loss": 0.1929, "step": 2675 }, { "epoch": 10.25, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.6250685453414917, "eval_runtime": 74.0216, "eval_samples_per_second": 2.351, "eval_steps_per_second": 0.297, "step": 2675 }, { "epoch": 10.27, "learning_rate": 6.329501915708812e-05, "loss": 0.0095, "step": 2680 }, { "epoch": 10.27, "eval_accuracy": 0.735632183908046, "eval_loss": 1.595952033996582, "eval_runtime": 71.6176, "eval_samples_per_second": 2.43, "eval_steps_per_second": 0.307, "step": 2680 }, { "epoch": 10.29, "learning_rate": 6.303959131545338e-05, "loss": 0.0002, "step": 2685 }, { "epoch": 10.29, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6410387754440308, "eval_runtime": 72.2047, "eval_samples_per_second": 2.41, "eval_steps_per_second": 0.305, "step": 2685 }, { "epoch": 10.31, "learning_rate": 6.278416347381864e-05, "loss": 0.0325, "step": 2690 }, { "epoch": 10.31, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.673262357711792, "eval_runtime": 71.6131, "eval_samples_per_second": 2.43, "eval_steps_per_second": 0.307, "step": 2690 }, { "epoch": 10.33, "learning_rate": 6.25287356321839e-05, "loss": 0.0004, "step": 2695 }, { "epoch": 10.33, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.684037446975708, "eval_runtime": 74.3936, "eval_samples_per_second": 2.339, "eval_steps_per_second": 0.296, "step": 2695 }, { "epoch": 10.34, "learning_rate": 6.227330779054917e-05, "loss": 0.1141, "step": 2700 }, { "epoch": 10.34, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.6978554725646973, "eval_runtime": 73.7209, "eval_samples_per_second": 2.36, "eval_steps_per_second": 0.298, "step": 2700 }, { "epoch": 10.36, "learning_rate": 6.201787994891443e-05, "loss": 0.0059, "step": 2705 }, { "epoch": 10.36, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7011663913726807, "eval_runtime": 72.3468, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.304, "step": 2705 }, { "epoch": 10.38, "learning_rate": 6.17624521072797e-05, "loss": 0.0245, "step": 2710 }, { "epoch": 10.38, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.77028226852417, "eval_runtime": 72.2673, "eval_samples_per_second": 2.408, "eval_steps_per_second": 0.304, "step": 2710 }, { "epoch": 10.4, "learning_rate": 6.150702426564496e-05, "loss": 0.0076, "step": 2715 }, { "epoch": 10.4, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.8610811233520508, "eval_runtime": 72.307, "eval_samples_per_second": 2.406, "eval_steps_per_second": 0.304, "step": 2715 }, { "epoch": 10.42, "learning_rate": 6.125159642401021e-05, "loss": 0.0021, "step": 2720 }, { "epoch": 10.42, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.8926019668579102, "eval_runtime": 71.6919, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 2720 }, { "epoch": 10.44, "learning_rate": 6.099616858237548e-05, "loss": 0.0065, "step": 2725 }, { "epoch": 10.44, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.880444049835205, "eval_runtime": 72.3737, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 2725 }, { "epoch": 10.46, "learning_rate": 6.074074074074074e-05, "loss": 0.0056, "step": 2730 }, { "epoch": 10.46, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.802688479423523, "eval_runtime": 71.68, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 2730 }, { "epoch": 10.48, "learning_rate": 6.0485312899106007e-05, "loss": 0.0115, "step": 2735 }, { "epoch": 10.48, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.7529903650283813, "eval_runtime": 72.2535, "eval_samples_per_second": 2.408, "eval_steps_per_second": 0.304, "step": 2735 }, { "epoch": 10.5, "learning_rate": 6.022988505747127e-05, "loss": 0.007, "step": 2740 }, { "epoch": 10.5, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7274096012115479, "eval_runtime": 71.6133, "eval_samples_per_second": 2.43, "eval_steps_per_second": 0.307, "step": 2740 }, { "epoch": 10.52, "learning_rate": 5.997445721583653e-05, "loss": 0.0006, "step": 2745 }, { "epoch": 10.52, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7023934125900269, "eval_runtime": 72.1912, "eval_samples_per_second": 2.41, "eval_steps_per_second": 0.305, "step": 2745 }, { "epoch": 10.54, "learning_rate": 5.97190293742018e-05, "loss": 0.0039, "step": 2750 }, { "epoch": 10.54, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7069329023361206, "eval_runtime": 71.7141, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 2750 }, { "epoch": 10.56, "learning_rate": 5.9463601532567046e-05, "loss": 0.0006, "step": 2755 }, { "epoch": 10.56, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7325639724731445, "eval_runtime": 72.2136, "eval_samples_per_second": 2.41, "eval_steps_per_second": 0.305, "step": 2755 }, { "epoch": 10.57, "learning_rate": 5.920817369093231e-05, "loss": 0.0012, "step": 2760 }, { "epoch": 10.57, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7432584762573242, "eval_runtime": 71.6908, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 2760 }, { "epoch": 10.59, "learning_rate": 5.895274584929757e-05, "loss": 0.002, "step": 2765 }, { "epoch": 10.59, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6767596006393433, "eval_runtime": 74.0851, "eval_samples_per_second": 2.349, "eval_steps_per_second": 0.297, "step": 2765 }, { "epoch": 10.61, "learning_rate": 5.8697318007662837e-05, "loss": 0.0051, "step": 2770 }, { "epoch": 10.61, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6323719024658203, "eval_runtime": 71.7909, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 2770 }, { "epoch": 10.63, "learning_rate": 5.84418901660281e-05, "loss": 0.1266, "step": 2775 }, { "epoch": 10.63, "eval_accuracy": 0.735632183908046, "eval_loss": 1.59928560256958, "eval_runtime": 73.9918, "eval_samples_per_second": 2.352, "eval_steps_per_second": 0.297, "step": 2775 }, { "epoch": 10.65, "learning_rate": 5.818646232439336e-05, "loss": 0.0003, "step": 2780 }, { "epoch": 10.65, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6210390329360962, "eval_runtime": 71.7199, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 2780 }, { "epoch": 10.67, "learning_rate": 5.7931034482758627e-05, "loss": 0.0011, "step": 2785 }, { "epoch": 10.67, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6556010246276855, "eval_runtime": 72.3514, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.304, "step": 2785 }, { "epoch": 10.69, "learning_rate": 5.767560664112388e-05, "loss": 0.1543, "step": 2790 }, { "epoch": 10.69, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6259641647338867, "eval_runtime": 71.8043, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 2790 }, { "epoch": 10.71, "learning_rate": 5.7420178799489147e-05, "loss": 0.0174, "step": 2795 }, { "epoch": 10.71, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6242051124572754, "eval_runtime": 76.4373, "eval_samples_per_second": 2.276, "eval_steps_per_second": 0.288, "step": 2795 }, { "epoch": 10.73, "learning_rate": 5.716475095785441e-05, "loss": 0.0012, "step": 2800 }, { "epoch": 10.73, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.6227067708969116, "eval_runtime": 73.2688, "eval_samples_per_second": 2.375, "eval_steps_per_second": 0.3, "step": 2800 }, { "epoch": 10.75, "learning_rate": 5.690932311621967e-05, "loss": 0.0016, "step": 2805 }, { "epoch": 10.75, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.6738653182983398, "eval_runtime": 71.7359, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 2805 }, { "epoch": 10.77, "learning_rate": 5.665389527458494e-05, "loss": 0.2556, "step": 2810 }, { "epoch": 10.77, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6926209926605225, "eval_runtime": 71.1551, "eval_samples_per_second": 2.445, "eval_steps_per_second": 0.309, "step": 2810 }, { "epoch": 10.79, "learning_rate": 5.63984674329502e-05, "loss": 0.013, "step": 2815 }, { "epoch": 10.79, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7026501893997192, "eval_runtime": 71.7376, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 2815 }, { "epoch": 10.8, "learning_rate": 5.614303959131546e-05, "loss": 0.0002, "step": 2820 }, { "epoch": 10.8, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.7139840126037598, "eval_runtime": 73.0784, "eval_samples_per_second": 2.381, "eval_steps_per_second": 0.301, "step": 2820 }, { "epoch": 10.82, "learning_rate": 5.588761174968071e-05, "loss": 0.0023, "step": 2825 }, { "epoch": 10.82, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.7233027219772339, "eval_runtime": 71.8919, "eval_samples_per_second": 2.42, "eval_steps_per_second": 0.306, "step": 2825 }, { "epoch": 10.84, "learning_rate": 5.5632183908045976e-05, "loss": 0.0027, "step": 2830 }, { "epoch": 10.84, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.7237999439239502, "eval_runtime": 71.2043, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 2830 }, { "epoch": 10.86, "learning_rate": 5.537675606641124e-05, "loss": 0.0051, "step": 2835 }, { "epoch": 10.86, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.6586277484893799, "eval_runtime": 71.7622, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 2835 }, { "epoch": 10.88, "learning_rate": 5.51213282247765e-05, "loss": 0.0003, "step": 2840 }, { "epoch": 10.88, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6285842657089233, "eval_runtime": 73.1223, "eval_samples_per_second": 2.38, "eval_steps_per_second": 0.301, "step": 2840 }, { "epoch": 10.9, "learning_rate": 5.4865900383141767e-05, "loss": 0.0193, "step": 2845 }, { "epoch": 10.9, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.5850633382797241, "eval_runtime": 71.8192, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 2845 }, { "epoch": 10.92, "learning_rate": 5.461047254150703e-05, "loss": 0.0003, "step": 2850 }, { "epoch": 10.92, "eval_accuracy": 0.7758620689655172, "eval_loss": 1.5855857133865356, "eval_runtime": 71.1652, "eval_samples_per_second": 2.445, "eval_steps_per_second": 0.309, "step": 2850 }, { "epoch": 10.94, "learning_rate": 5.435504469987229e-05, "loss": 0.0087, "step": 2855 }, { "epoch": 10.94, "eval_accuracy": 0.7816091954022989, "eval_loss": 1.614903211593628, "eval_runtime": 71.7827, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 2855 }, { "epoch": 10.96, "learning_rate": 5.409961685823754e-05, "loss": 0.0048, "step": 2860 }, { "epoch": 10.96, "eval_accuracy": 0.7701149425287356, "eval_loss": 1.6289048194885254, "eval_runtime": 71.1167, "eval_samples_per_second": 2.447, "eval_steps_per_second": 0.309, "step": 2860 }, { "epoch": 10.98, "learning_rate": 5.3844189016602806e-05, "loss": 0.0005, "step": 2865 }, { "epoch": 10.98, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.6815837621688843, "eval_runtime": 73.4806, "eval_samples_per_second": 2.368, "eval_steps_per_second": 0.299, "step": 2865 }, { "epoch": 11.0, "learning_rate": 5.358876117496807e-05, "loss": 0.0028, "step": 2870 }, { "epoch": 11.0, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7306228876113892, "eval_runtime": 71.2539, "eval_samples_per_second": 2.442, "eval_steps_per_second": 0.309, "step": 2870 }, { "epoch": 11.02, "learning_rate": 5.333333333333333e-05, "loss": 0.2624, "step": 2875 }, { "epoch": 11.02, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7713919878005981, "eval_runtime": 73.4631, "eval_samples_per_second": 2.369, "eval_steps_per_second": 0.299, "step": 2875 }, { "epoch": 11.03, "learning_rate": 5.3077905491698597e-05, "loss": 0.1484, "step": 2880 }, { "epoch": 11.03, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.8120393753051758, "eval_runtime": 72.9705, "eval_samples_per_second": 2.385, "eval_steps_per_second": 0.301, "step": 2880 }, { "epoch": 11.05, "learning_rate": 5.282247765006386e-05, "loss": 0.0006, "step": 2885 }, { "epoch": 11.05, "eval_accuracy": 0.735632183908046, "eval_loss": 1.8391227722167969, "eval_runtime": 71.7975, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 2885 }, { "epoch": 11.07, "learning_rate": 5.256704980842912e-05, "loss": 0.0001, "step": 2890 }, { "epoch": 11.07, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.8372502326965332, "eval_runtime": 73.0853, "eval_samples_per_second": 2.381, "eval_steps_per_second": 0.301, "step": 2890 }, { "epoch": 11.09, "learning_rate": 5.231162196679439e-05, "loss": 0.2108, "step": 2895 }, { "epoch": 11.09, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7650718688964844, "eval_runtime": 71.8157, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 2895 }, { "epoch": 11.11, "learning_rate": 5.205619412515964e-05, "loss": 0.0008, "step": 2900 }, { "epoch": 11.11, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.761852741241455, "eval_runtime": 71.2906, "eval_samples_per_second": 2.441, "eval_steps_per_second": 0.309, "step": 2900 }, { "epoch": 11.13, "learning_rate": 5.1800766283524907e-05, "loss": 0.0007, "step": 2905 }, { "epoch": 11.13, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7633298635482788, "eval_runtime": 71.7947, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 2905 }, { "epoch": 11.15, "learning_rate": 5.154533844189017e-05, "loss": 0.0001, "step": 2910 }, { "epoch": 11.15, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7667099237442017, "eval_runtime": 71.8588, "eval_samples_per_second": 2.421, "eval_steps_per_second": 0.306, "step": 2910 }, { "epoch": 11.17, "learning_rate": 5.128991060025543e-05, "loss": 0.0001, "step": 2915 }, { "epoch": 11.17, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7692232131958008, "eval_runtime": 71.8252, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 2915 }, { "epoch": 11.19, "learning_rate": 5.10344827586207e-05, "loss": 0.0041, "step": 2920 }, { "epoch": 11.19, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7618753910064697, "eval_runtime": 71.1483, "eval_samples_per_second": 2.446, "eval_steps_per_second": 0.309, "step": 2920 }, { "epoch": 11.21, "learning_rate": 5.077905491698596e-05, "loss": 0.0004, "step": 2925 }, { "epoch": 11.21, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.744259238243103, "eval_runtime": 71.828, "eval_samples_per_second": 2.422, "eval_steps_per_second": 0.306, "step": 2925 }, { "epoch": 11.23, "learning_rate": 5.052362707535122e-05, "loss": 0.0016, "step": 2930 }, { "epoch": 11.23, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7307969331741333, "eval_runtime": 71.1919, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 2930 }, { "epoch": 11.25, "learning_rate": 5.026819923371647e-05, "loss": 0.0004, "step": 2935 }, { "epoch": 11.25, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.706060767173767, "eval_runtime": 71.6927, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 2935 }, { "epoch": 11.26, "learning_rate": 5.0012771392081737e-05, "loss": 0.0046, "step": 2940 }, { "epoch": 11.26, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7156652212142944, "eval_runtime": 71.1594, "eval_samples_per_second": 2.445, "eval_steps_per_second": 0.309, "step": 2940 }, { "epoch": 11.28, "learning_rate": 4.9757343550447e-05, "loss": 0.0002, "step": 2945 }, { "epoch": 11.28, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.7299869060516357, "eval_runtime": 71.7347, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 2945 }, { "epoch": 11.3, "learning_rate": 4.950191570881226e-05, "loss": 0.0005, "step": 2950 }, { "epoch": 11.3, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.739342212677002, "eval_runtime": 71.1889, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 2950 }, { "epoch": 11.32, "learning_rate": 4.9246487867177527e-05, "loss": 0.2592, "step": 2955 }, { "epoch": 11.32, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.7353452444076538, "eval_runtime": 71.7474, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 2955 }, { "epoch": 11.34, "learning_rate": 4.899106002554278e-05, "loss": 0.0018, "step": 2960 }, { "epoch": 11.34, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.753889799118042, "eval_runtime": 71.1986, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 2960 }, { "epoch": 11.36, "learning_rate": 4.8735632183908047e-05, "loss": 0.0002, "step": 2965 }, { "epoch": 11.36, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7838666439056396, "eval_runtime": 73.3097, "eval_samples_per_second": 2.373, "eval_steps_per_second": 0.3, "step": 2965 }, { "epoch": 11.38, "learning_rate": 4.848020434227331e-05, "loss": 0.0002, "step": 2970 }, { "epoch": 11.38, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.8079078197479248, "eval_runtime": 71.2087, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 2970 }, { "epoch": 11.4, "learning_rate": 4.822477650063857e-05, "loss": 0.3184, "step": 2975 }, { "epoch": 11.4, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.7978729009628296, "eval_runtime": 71.6881, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 2975 }, { "epoch": 11.42, "learning_rate": 4.796934865900383e-05, "loss": 0.0002, "step": 2980 }, { "epoch": 11.42, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.802263617515564, "eval_runtime": 71.289, "eval_samples_per_second": 2.441, "eval_steps_per_second": 0.309, "step": 2980 }, { "epoch": 11.44, "learning_rate": 4.771392081736909e-05, "loss": 0.0002, "step": 2985 }, { "epoch": 11.44, "eval_accuracy": 0.7183908045977011, "eval_loss": 1.8103998899459839, "eval_runtime": 71.6925, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 2985 }, { "epoch": 11.46, "learning_rate": 4.7458492975734357e-05, "loss": 0.0037, "step": 2990 }, { "epoch": 11.46, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.7962597608566284, "eval_runtime": 71.0509, "eval_samples_per_second": 2.449, "eval_steps_per_second": 0.31, "step": 2990 }, { "epoch": 11.48, "learning_rate": 4.720306513409962e-05, "loss": 0.0582, "step": 2995 }, { "epoch": 11.48, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7625627517700195, "eval_runtime": 71.6864, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 2995 }, { "epoch": 11.49, "learning_rate": 4.694763729246488e-05, "loss": 0.0001, "step": 3000 }, { "epoch": 11.49, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7620983123779297, "eval_runtime": 71.1842, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3000 }, { "epoch": 11.51, "learning_rate": 4.669220945083015e-05, "loss": 0.1902, "step": 3005 }, { "epoch": 11.51, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7105143070220947, "eval_runtime": 71.7636, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 3005 }, { "epoch": 11.53, "learning_rate": 4.643678160919541e-05, "loss": 0.1473, "step": 3010 }, { "epoch": 11.53, "eval_accuracy": 0.7586206896551724, "eval_loss": 1.674761176109314, "eval_runtime": 71.1642, "eval_samples_per_second": 2.445, "eval_steps_per_second": 0.309, "step": 3010 }, { "epoch": 11.55, "learning_rate": 4.6181353767560667e-05, "loss": 0.0052, "step": 3015 }, { "epoch": 11.55, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6427284479141235, "eval_runtime": 71.7334, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 3015 }, { "epoch": 11.57, "learning_rate": 4.592592592592593e-05, "loss": 0.0005, "step": 3020 }, { "epoch": 11.57, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.633091926574707, "eval_runtime": 71.2235, "eval_samples_per_second": 2.443, "eval_steps_per_second": 0.309, "step": 3020 }, { "epoch": 11.59, "learning_rate": 4.567049808429119e-05, "loss": 0.0001, "step": 3025 }, { "epoch": 11.59, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6355048418045044, "eval_runtime": 71.7881, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 3025 }, { "epoch": 11.61, "learning_rate": 4.541507024265646e-05, "loss": 0.0004, "step": 3030 }, { "epoch": 11.61, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6394970417022705, "eval_runtime": 73.0643, "eval_samples_per_second": 2.381, "eval_steps_per_second": 0.301, "step": 3030 }, { "epoch": 11.63, "learning_rate": 4.515964240102171e-05, "loss": 0.0001, "step": 3035 }, { "epoch": 11.63, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6441478729248047, "eval_runtime": 71.7452, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 3035 }, { "epoch": 11.65, "learning_rate": 4.4904214559386977e-05, "loss": 0.0001, "step": 3040 }, { "epoch": 11.65, "eval_accuracy": 0.735632183908046, "eval_loss": 1.6461490392684937, "eval_runtime": 71.208, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3040 }, { "epoch": 11.67, "learning_rate": 4.464878671775224e-05, "loss": 0.0001, "step": 3045 }, { "epoch": 11.67, "eval_accuracy": 0.735632183908046, "eval_loss": 1.649631142616272, "eval_runtime": 71.8178, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 3045 }, { "epoch": 11.69, "learning_rate": 4.4393358876117497e-05, "loss": 0.0254, "step": 3050 }, { "epoch": 11.69, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6433813571929932, "eval_runtime": 71.1821, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3050 }, { "epoch": 11.7, "learning_rate": 4.413793103448276e-05, "loss": 0.0007, "step": 3055 }, { "epoch": 11.7, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.642929196357727, "eval_runtime": 71.7678, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 3055 }, { "epoch": 11.72, "learning_rate": 4.388250319284802e-05, "loss": 0.0004, "step": 3060 }, { "epoch": 11.72, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.6454113721847534, "eval_runtime": 72.9367, "eval_samples_per_second": 2.386, "eval_steps_per_second": 0.302, "step": 3060 }, { "epoch": 11.74, "learning_rate": 4.362707535121329e-05, "loss": 0.0001, "step": 3065 }, { "epoch": 11.74, "eval_accuracy": 0.7528735632183908, "eval_loss": 1.6498183012008667, "eval_runtime": 71.6868, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 3065 }, { "epoch": 11.76, "learning_rate": 4.337164750957854e-05, "loss": 0.0004, "step": 3070 }, { "epoch": 11.76, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6580952405929565, "eval_runtime": 71.2302, "eval_samples_per_second": 2.443, "eval_steps_per_second": 0.309, "step": 3070 }, { "epoch": 11.78, "learning_rate": 4.3116219667943807e-05, "loss": 0.0003, "step": 3075 }, { "epoch": 11.78, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6635980606079102, "eval_runtime": 71.8047, "eval_samples_per_second": 2.423, "eval_steps_per_second": 0.306, "step": 3075 }, { "epoch": 11.8, "learning_rate": 4.286079182630907e-05, "loss": 0.0002, "step": 3080 }, { "epoch": 11.8, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6681902408599854, "eval_runtime": 71.207, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3080 }, { "epoch": 11.82, "learning_rate": 4.2605363984674326e-05, "loss": 0.0007, "step": 3085 }, { "epoch": 11.82, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6737765073776245, "eval_runtime": 71.7861, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 3085 }, { "epoch": 11.84, "learning_rate": 4.234993614303959e-05, "loss": 0.0007, "step": 3090 }, { "epoch": 11.84, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6775649785995483, "eval_runtime": 71.4067, "eval_samples_per_second": 2.437, "eval_steps_per_second": 0.308, "step": 3090 }, { "epoch": 11.86, "learning_rate": 4.209450830140485e-05, "loss": 0.002, "step": 3095 }, { "epoch": 11.86, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.6792525053024292, "eval_runtime": 75.5033, "eval_samples_per_second": 2.305, "eval_steps_per_second": 0.291, "step": 3095 }, { "epoch": 11.88, "learning_rate": 4.1839080459770117e-05, "loss": 0.0001, "step": 3100 }, { "epoch": 11.88, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6821579933166504, "eval_runtime": 72.572, "eval_samples_per_second": 2.398, "eval_steps_per_second": 0.303, "step": 3100 }, { "epoch": 11.9, "learning_rate": 4.158365261813538e-05, "loss": 0.0015, "step": 3105 }, { "epoch": 11.9, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.6829290390014648, "eval_runtime": 74.9292, "eval_samples_per_second": 2.322, "eval_steps_per_second": 0.294, "step": 3105 }, { "epoch": 11.92, "learning_rate": 4.132822477650064e-05, "loss": 0.0001, "step": 3110 }, { "epoch": 11.92, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7074753046035767, "eval_runtime": 72.5058, "eval_samples_per_second": 2.4, "eval_steps_per_second": 0.303, "step": 3110 }, { "epoch": 11.93, "learning_rate": 4.107279693486591e-05, "loss": 0.0002, "step": 3115 }, { "epoch": 11.93, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7354847192764282, "eval_runtime": 72.5819, "eval_samples_per_second": 2.397, "eval_steps_per_second": 0.303, "step": 3115 }, { "epoch": 11.95, "learning_rate": 4.081736909323116e-05, "loss": 0.0008, "step": 3120 }, { "epoch": 11.95, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7531324625015259, "eval_runtime": 74.5508, "eval_samples_per_second": 2.334, "eval_steps_per_second": 0.295, "step": 3120 }, { "epoch": 11.97, "learning_rate": 4.0561941251596427e-05, "loss": 0.0001, "step": 3125 }, { "epoch": 11.97, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7682172060012817, "eval_runtime": 72.5782, "eval_samples_per_second": 2.397, "eval_steps_per_second": 0.303, "step": 3125 }, { "epoch": 11.99, "learning_rate": 4.030651340996169e-05, "loss": 0.0004, "step": 3130 }, { "epoch": 11.99, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.774706244468689, "eval_runtime": 73.9012, "eval_samples_per_second": 2.354, "eval_steps_per_second": 0.298, "step": 3130 }, { "epoch": 12.01, "learning_rate": 4.005108556832695e-05, "loss": 0.0001, "step": 3135 }, { "epoch": 12.01, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.77650785446167, "eval_runtime": 75.9663, "eval_samples_per_second": 2.29, "eval_steps_per_second": 0.29, "step": 3135 }, { "epoch": 12.03, "learning_rate": 3.979565772669221e-05, "loss": 0.0001, "step": 3140 }, { "epoch": 12.03, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.778681993484497, "eval_runtime": 72.8484, "eval_samples_per_second": 2.389, "eval_steps_per_second": 0.302, "step": 3140 }, { "epoch": 12.05, "learning_rate": 3.954022988505747e-05, "loss": 0.0004, "step": 3145 }, { "epoch": 12.05, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.778651475906372, "eval_runtime": 74.5051, "eval_samples_per_second": 2.335, "eval_steps_per_second": 0.295, "step": 3145 }, { "epoch": 12.07, "learning_rate": 3.9284802043422737e-05, "loss": 0.1525, "step": 3150 }, { "epoch": 12.07, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.7690765857696533, "eval_runtime": 73.949, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.298, "step": 3150 }, { "epoch": 12.09, "learning_rate": 3.9029374201788e-05, "loss": 0.0001, "step": 3155 }, { "epoch": 12.09, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7667592763900757, "eval_runtime": 73.0801, "eval_samples_per_second": 2.381, "eval_steps_per_second": 0.301, "step": 3155 }, { "epoch": 12.11, "learning_rate": 3.8773946360153257e-05, "loss": 0.0001, "step": 3160 }, { "epoch": 12.11, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7664886713027954, "eval_runtime": 74.7535, "eval_samples_per_second": 2.328, "eval_steps_per_second": 0.294, "step": 3160 }, { "epoch": 12.13, "learning_rate": 3.851851851851852e-05, "loss": 0.0023, "step": 3165 }, { "epoch": 12.13, "eval_accuracy": 0.735632183908046, "eval_loss": 1.764859914779663, "eval_runtime": 73.1922, "eval_samples_per_second": 2.377, "eval_steps_per_second": 0.301, "step": 3165 }, { "epoch": 12.15, "learning_rate": 3.826309067688378e-05, "loss": 0.0004, "step": 3170 }, { "epoch": 12.15, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7692474126815796, "eval_runtime": 72.5207, "eval_samples_per_second": 2.399, "eval_steps_per_second": 0.303, "step": 3170 }, { "epoch": 12.16, "learning_rate": 3.800766283524904e-05, "loss": 0.0005, "step": 3175 }, { "epoch": 12.16, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7775356769561768, "eval_runtime": 73.2672, "eval_samples_per_second": 2.375, "eval_steps_per_second": 0.3, "step": 3175 }, { "epoch": 12.18, "learning_rate": 3.77522349936143e-05, "loss": 0.0001, "step": 3180 }, { "epoch": 12.18, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.784372091293335, "eval_runtime": 72.3493, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.304, "step": 3180 }, { "epoch": 12.2, "learning_rate": 3.7496807151979567e-05, "loss": 0.0001, "step": 3185 }, { "epoch": 12.2, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.7875841856002808, "eval_runtime": 72.8117, "eval_samples_per_second": 2.39, "eval_steps_per_second": 0.302, "step": 3185 }, { "epoch": 12.22, "learning_rate": 3.724137931034483e-05, "loss": 0.0, "step": 3190 }, { "epoch": 12.22, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.7894097566604614, "eval_runtime": 73.3527, "eval_samples_per_second": 2.372, "eval_steps_per_second": 0.3, "step": 3190 }, { "epoch": 12.24, "learning_rate": 3.6985951468710087e-05, "loss": 0.0002, "step": 3195 }, { "epoch": 12.24, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.7878044843673706, "eval_runtime": 75.4362, "eval_samples_per_second": 2.307, "eval_steps_per_second": 0.292, "step": 3195 }, { "epoch": 12.26, "learning_rate": 3.673052362707535e-05, "loss": 0.0001, "step": 3200 }, { "epoch": 12.26, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.787352442741394, "eval_runtime": 72.6018, "eval_samples_per_second": 2.397, "eval_steps_per_second": 0.303, "step": 3200 }, { "epoch": 12.28, "learning_rate": 3.647509578544061e-05, "loss": 0.0789, "step": 3205 }, { "epoch": 12.28, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.7876189947128296, "eval_runtime": 73.2483, "eval_samples_per_second": 2.375, "eval_steps_per_second": 0.3, "step": 3205 }, { "epoch": 12.3, "learning_rate": 3.6219667943805877e-05, "loss": 0.0001, "step": 3210 }, { "epoch": 12.3, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7912352085113525, "eval_runtime": 72.2227, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 3210 }, { "epoch": 12.32, "learning_rate": 3.596424010217114e-05, "loss": 0.0001, "step": 3215 }, { "epoch": 12.32, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7966572046279907, "eval_runtime": 74.8197, "eval_samples_per_second": 2.326, "eval_steps_per_second": 0.294, "step": 3215 }, { "epoch": 12.34, "learning_rate": 3.57088122605364e-05, "loss": 0.0003, "step": 3220 }, { "epoch": 12.34, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7997833490371704, "eval_runtime": 73.9379, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.298, "step": 3220 }, { "epoch": 12.36, "learning_rate": 3.545338441890167e-05, "loss": 0.0005, "step": 3225 }, { "epoch": 12.36, "eval_accuracy": 0.735632183908046, "eval_loss": 1.8003336191177368, "eval_runtime": 73.7895, "eval_samples_per_second": 2.358, "eval_steps_per_second": 0.298, "step": 3225 }, { "epoch": 12.38, "learning_rate": 3.519795657726692e-05, "loss": 0.0001, "step": 3230 }, { "epoch": 12.38, "eval_accuracy": 0.735632183908046, "eval_loss": 1.801089882850647, "eval_runtime": 73.3293, "eval_samples_per_second": 2.373, "eval_steps_per_second": 0.3, "step": 3230 }, { "epoch": 12.39, "learning_rate": 3.4942528735632187e-05, "loss": 0.0051, "step": 3235 }, { "epoch": 12.39, "eval_accuracy": 0.735632183908046, "eval_loss": 1.8076083660125732, "eval_runtime": 73.9488, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.298, "step": 3235 }, { "epoch": 12.41, "learning_rate": 3.468710089399745e-05, "loss": 0.1098, "step": 3240 }, { "epoch": 12.41, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.8316843509674072, "eval_runtime": 72.738, "eval_samples_per_second": 2.392, "eval_steps_per_second": 0.302, "step": 3240 }, { "epoch": 12.43, "learning_rate": 3.443167305236271e-05, "loss": 0.0009, "step": 3245 }, { "epoch": 12.43, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.8573225736618042, "eval_runtime": 71.7532, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 3245 }, { "epoch": 12.45, "learning_rate": 3.417624521072797e-05, "loss": 0.0001, "step": 3250 }, { "epoch": 12.45, "eval_accuracy": 0.7126436781609196, "eval_loss": 1.8779215812683105, "eval_runtime": 71.1624, "eval_samples_per_second": 2.445, "eval_steps_per_second": 0.309, "step": 3250 }, { "epoch": 12.47, "learning_rate": 3.392081736909323e-05, "loss": 0.1514, "step": 3255 }, { "epoch": 12.47, "eval_accuracy": 0.7241379310344828, "eval_loss": 1.8445746898651123, "eval_runtime": 73.7727, "eval_samples_per_second": 2.359, "eval_steps_per_second": 0.298, "step": 3255 }, { "epoch": 12.49, "learning_rate": 3.36653895274585e-05, "loss": 0.0002, "step": 3260 }, { "epoch": 12.49, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.8186622858047485, "eval_runtime": 72.9059, "eval_samples_per_second": 2.387, "eval_steps_per_second": 0.302, "step": 3260 }, { "epoch": 12.51, "learning_rate": 3.340996168582375e-05, "loss": 0.0024, "step": 3265 }, { "epoch": 12.51, "eval_accuracy": 0.735632183908046, "eval_loss": 1.791884183883667, "eval_runtime": 71.8399, "eval_samples_per_second": 2.422, "eval_steps_per_second": 0.306, "step": 3265 }, { "epoch": 12.53, "learning_rate": 3.3154533844189017e-05, "loss": 0.0003, "step": 3270 }, { "epoch": 12.53, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7768088579177856, "eval_runtime": 71.2715, "eval_samples_per_second": 2.441, "eval_steps_per_second": 0.309, "step": 3270 }, { "epoch": 12.55, "learning_rate": 3.289910600255428e-05, "loss": 0.0004, "step": 3275 }, { "epoch": 12.55, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.7849540710449219, "eval_runtime": 72.4582, "eval_samples_per_second": 2.401, "eval_steps_per_second": 0.304, "step": 3275 }, { "epoch": 12.57, "learning_rate": 3.264367816091954e-05, "loss": 0.2673, "step": 3280 }, { "epoch": 12.57, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.788151502609253, "eval_runtime": 71.2667, "eval_samples_per_second": 2.442, "eval_steps_per_second": 0.309, "step": 3280 }, { "epoch": 12.59, "learning_rate": 3.23882503192848e-05, "loss": 0.0002, "step": 3285 }, { "epoch": 12.59, "eval_accuracy": 0.735632183908046, "eval_loss": 1.791655421257019, "eval_runtime": 71.75, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 3285 }, { "epoch": 12.61, "learning_rate": 3.213282247765006e-05, "loss": 0.0001, "step": 3290 }, { "epoch": 12.61, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7958180904388428, "eval_runtime": 71.2081, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3290 }, { "epoch": 12.62, "learning_rate": 3.1877394636015327e-05, "loss": 0.0094, "step": 3295 }, { "epoch": 12.62, "eval_accuracy": 0.7298850574712644, "eval_loss": 1.811642050743103, "eval_runtime": 73.4826, "eval_samples_per_second": 2.368, "eval_steps_per_second": 0.299, "step": 3295 }, { "epoch": 12.64, "learning_rate": 3.162196679438058e-05, "loss": 0.0299, "step": 3300 }, { "epoch": 12.64, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.8312135934829712, "eval_runtime": 72.3831, "eval_samples_per_second": 2.404, "eval_steps_per_second": 0.304, "step": 3300 }, { "epoch": 12.66, "learning_rate": 3.1366538952745847e-05, "loss": 0.0002, "step": 3305 }, { "epoch": 12.66, "eval_accuracy": 0.735632183908046, "eval_loss": 1.8625733852386475, "eval_runtime": 71.8476, "eval_samples_per_second": 2.422, "eval_steps_per_second": 0.306, "step": 3305 }, { "epoch": 12.68, "learning_rate": 3.111111111111111e-05, "loss": 0.0007, "step": 3310 }, { "epoch": 12.68, "eval_accuracy": 0.735632183908046, "eval_loss": 1.8807812929153442, "eval_runtime": 71.1083, "eval_samples_per_second": 2.447, "eval_steps_per_second": 0.309, "step": 3310 }, { "epoch": 12.7, "learning_rate": 3.085568326947637e-05, "loss": 0.0002, "step": 3315 }, { "epoch": 12.7, "eval_accuracy": 0.735632183908046, "eval_loss": 1.8896175622940063, "eval_runtime": 71.6802, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 3315 }, { "epoch": 12.72, "learning_rate": 3.0600255427841637e-05, "loss": 0.0003, "step": 3320 }, { "epoch": 12.72, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.8892227411270142, "eval_runtime": 74.5342, "eval_samples_per_second": 2.334, "eval_steps_per_second": 0.295, "step": 3320 }, { "epoch": 12.74, "learning_rate": 3.0344827586206897e-05, "loss": 0.0001, "step": 3325 }, { "epoch": 12.74, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.8880239725112915, "eval_runtime": 73.6161, "eval_samples_per_second": 2.364, "eval_steps_per_second": 0.299, "step": 3325 }, { "epoch": 12.76, "learning_rate": 3.008939974457216e-05, "loss": 0.0013, "step": 3330 }, { "epoch": 12.76, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.874098539352417, "eval_runtime": 73.2537, "eval_samples_per_second": 2.375, "eval_steps_per_second": 0.3, "step": 3330 }, { "epoch": 12.78, "learning_rate": 2.9833971902937423e-05, "loss": 0.0029, "step": 3335 }, { "epoch": 12.78, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.8415035009384155, "eval_runtime": 71.8592, "eval_samples_per_second": 2.421, "eval_steps_per_second": 0.306, "step": 3335 }, { "epoch": 12.8, "learning_rate": 2.9578544061302683e-05, "loss": 0.0, "step": 3340 }, { "epoch": 12.8, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.8206106424331665, "eval_runtime": 71.14, "eval_samples_per_second": 2.446, "eval_steps_per_second": 0.309, "step": 3340 }, { "epoch": 12.82, "learning_rate": 2.9323116219667947e-05, "loss": 0.0001, "step": 3345 }, { "epoch": 12.82, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.80865478515625, "eval_runtime": 73.9426, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.298, "step": 3345 }, { "epoch": 12.84, "learning_rate": 2.906768837803321e-05, "loss": 0.0003, "step": 3350 }, { "epoch": 12.84, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.8006689548492432, "eval_runtime": 71.1637, "eval_samples_per_second": 2.445, "eval_steps_per_second": 0.309, "step": 3350 }, { "epoch": 12.85, "learning_rate": 2.8812260536398467e-05, "loss": 0.0001, "step": 3355 }, { "epoch": 12.85, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7933427095413208, "eval_runtime": 71.6538, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 3355 }, { "epoch": 12.87, "learning_rate": 2.855683269476373e-05, "loss": 0.0001, "step": 3360 }, { "epoch": 12.87, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7883424758911133, "eval_runtime": 71.2115, "eval_samples_per_second": 2.443, "eval_steps_per_second": 0.309, "step": 3360 }, { "epoch": 12.89, "learning_rate": 2.8301404853128993e-05, "loss": 0.0001, "step": 3365 }, { "epoch": 12.89, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7855582237243652, "eval_runtime": 71.6601, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 3365 }, { "epoch": 12.91, "learning_rate": 2.8045977011494257e-05, "loss": 0.0002, "step": 3370 }, { "epoch": 12.91, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7831445932388306, "eval_runtime": 72.466, "eval_samples_per_second": 2.401, "eval_steps_per_second": 0.304, "step": 3370 }, { "epoch": 12.93, "learning_rate": 2.7790549169859513e-05, "loss": 0.0001, "step": 3375 }, { "epoch": 12.93, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7811237573623657, "eval_runtime": 73.4621, "eval_samples_per_second": 2.369, "eval_steps_per_second": 0.299, "step": 3375 }, { "epoch": 12.95, "learning_rate": 2.7535121328224777e-05, "loss": 0.0001, "step": 3380 }, { "epoch": 12.95, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7792061567306519, "eval_runtime": 71.133, "eval_samples_per_second": 2.446, "eval_steps_per_second": 0.309, "step": 3380 }, { "epoch": 12.97, "learning_rate": 2.727969348659004e-05, "loss": 0.0003, "step": 3385 }, { "epoch": 12.97, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7786797285079956, "eval_runtime": 71.7413, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 3385 }, { "epoch": 12.99, "learning_rate": 2.70242656449553e-05, "loss": 0.0001, "step": 3390 }, { "epoch": 12.99, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7803369760513306, "eval_runtime": 71.2098, "eval_samples_per_second": 2.443, "eval_steps_per_second": 0.309, "step": 3390 }, { "epoch": 13.01, "learning_rate": 2.6768837803320563e-05, "loss": 0.0, "step": 3395 }, { "epoch": 13.01, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7803747653961182, "eval_runtime": 73.577, "eval_samples_per_second": 2.365, "eval_steps_per_second": 0.299, "step": 3395 }, { "epoch": 13.03, "learning_rate": 2.6513409961685827e-05, "loss": 0.0001, "step": 3400 }, { "epoch": 13.03, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7811692953109741, "eval_runtime": 71.1922, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3400 }, { "epoch": 13.05, "learning_rate": 2.625798212005109e-05, "loss": 0.0002, "step": 3405 }, { "epoch": 13.05, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7811998128890991, "eval_runtime": 71.7905, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 3405 }, { "epoch": 13.07, "learning_rate": 2.6002554278416347e-05, "loss": 0.0001, "step": 3410 }, { "epoch": 13.07, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7812321186065674, "eval_runtime": 71.2005, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3410 }, { "epoch": 13.08, "learning_rate": 2.574712643678161e-05, "loss": 0.0002, "step": 3415 }, { "epoch": 13.08, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7813913822174072, "eval_runtime": 73.6067, "eval_samples_per_second": 2.364, "eval_steps_per_second": 0.299, "step": 3415 }, { "epoch": 13.1, "learning_rate": 2.5491698595146873e-05, "loss": 0.0001, "step": 3420 }, { "epoch": 13.1, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7811163663864136, "eval_runtime": 71.1956, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3420 }, { "epoch": 13.12, "learning_rate": 2.5236270753512137e-05, "loss": 0.0001, "step": 3425 }, { "epoch": 13.12, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7811745405197144, "eval_runtime": 73.6252, "eval_samples_per_second": 2.363, "eval_steps_per_second": 0.299, "step": 3425 }, { "epoch": 13.14, "learning_rate": 2.4980842911877393e-05, "loss": 0.0001, "step": 3430 }, { "epoch": 13.14, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7812987565994263, "eval_runtime": 73.0837, "eval_samples_per_second": 2.381, "eval_steps_per_second": 0.301, "step": 3430 }, { "epoch": 13.16, "learning_rate": 2.4725415070242657e-05, "loss": 0.0001, "step": 3435 }, { "epoch": 13.16, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7799280881881714, "eval_runtime": 71.791, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 3435 }, { "epoch": 13.18, "learning_rate": 2.446998722860792e-05, "loss": 0.0001, "step": 3440 }, { "epoch": 13.18, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7800962924957275, "eval_runtime": 71.1707, "eval_samples_per_second": 2.445, "eval_steps_per_second": 0.309, "step": 3440 }, { "epoch": 13.2, "learning_rate": 2.4214559386973183e-05, "loss": 0.1536, "step": 3445 }, { "epoch": 13.2, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7748968601226807, "eval_runtime": 71.7615, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 3445 }, { "epoch": 13.22, "learning_rate": 2.3959131545338443e-05, "loss": 0.0001, "step": 3450 }, { "epoch": 13.22, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7677268981933594, "eval_runtime": 73.6553, "eval_samples_per_second": 2.362, "eval_steps_per_second": 0.299, "step": 3450 }, { "epoch": 13.24, "learning_rate": 2.3703703703703707e-05, "loss": 0.0001, "step": 3455 }, { "epoch": 13.24, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7640659809112549, "eval_runtime": 71.7772, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 3455 }, { "epoch": 13.26, "learning_rate": 2.3448275862068967e-05, "loss": 0.0001, "step": 3460 }, { "epoch": 13.26, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7622041702270508, "eval_runtime": 71.3282, "eval_samples_per_second": 2.439, "eval_steps_per_second": 0.308, "step": 3460 }, { "epoch": 13.28, "learning_rate": 2.319284802043423e-05, "loss": 0.0001, "step": 3465 }, { "epoch": 13.28, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.760646104812622, "eval_runtime": 71.7718, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 3465 }, { "epoch": 13.3, "learning_rate": 2.293742017879949e-05, "loss": 0.0001, "step": 3470 }, { "epoch": 13.3, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7595189809799194, "eval_runtime": 71.2335, "eval_samples_per_second": 2.443, "eval_steps_per_second": 0.309, "step": 3470 }, { "epoch": 13.31, "learning_rate": 2.268199233716475e-05, "loss": 0.0, "step": 3475 }, { "epoch": 13.31, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7589017152786255, "eval_runtime": 73.1315, "eval_samples_per_second": 2.379, "eval_steps_per_second": 0.301, "step": 3475 }, { "epoch": 13.33, "learning_rate": 2.2426564495530013e-05, "loss": 0.0108, "step": 3480 }, { "epoch": 13.33, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7552155256271362, "eval_runtime": 74.2293, "eval_samples_per_second": 2.344, "eval_steps_per_second": 0.296, "step": 3480 }, { "epoch": 13.35, "learning_rate": 2.2171136653895273e-05, "loss": 0.0088, "step": 3485 }, { "epoch": 13.35, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7531734704971313, "eval_runtime": 73.069, "eval_samples_per_second": 2.381, "eval_steps_per_second": 0.301, "step": 3485 }, { "epoch": 13.37, "learning_rate": 2.1915708812260537e-05, "loss": 0.003, "step": 3490 }, { "epoch": 13.37, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7560912370681763, "eval_runtime": 72.5457, "eval_samples_per_second": 2.398, "eval_steps_per_second": 0.303, "step": 3490 }, { "epoch": 13.39, "learning_rate": 2.16602809706258e-05, "loss": 0.0001, "step": 3495 }, { "epoch": 13.39, "eval_accuracy": 0.735632183908046, "eval_loss": 1.760205864906311, "eval_runtime": 73.1685, "eval_samples_per_second": 2.378, "eval_steps_per_second": 0.301, "step": 3495 }, { "epoch": 13.41, "learning_rate": 2.1404853128991063e-05, "loss": 0.0001, "step": 3500 }, { "epoch": 13.41, "eval_accuracy": 0.735632183908046, "eval_loss": 1.763502836227417, "eval_runtime": 74.1089, "eval_samples_per_second": 2.348, "eval_steps_per_second": 0.297, "step": 3500 }, { "epoch": 13.43, "learning_rate": 2.1149425287356323e-05, "loss": 0.0002, "step": 3505 }, { "epoch": 13.43, "eval_accuracy": 0.735632183908046, "eval_loss": 1.765455961227417, "eval_runtime": 72.6454, "eval_samples_per_second": 2.395, "eval_steps_per_second": 0.303, "step": 3505 }, { "epoch": 13.45, "learning_rate": 2.0893997445721587e-05, "loss": 0.1119, "step": 3510 }, { "epoch": 13.45, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7576346397399902, "eval_runtime": 72.2316, "eval_samples_per_second": 2.409, "eval_steps_per_second": 0.305, "step": 3510 }, { "epoch": 13.47, "learning_rate": 2.0638569604086847e-05, "loss": 0.0025, "step": 3515 }, { "epoch": 13.47, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7575277090072632, "eval_runtime": 73.2772, "eval_samples_per_second": 2.375, "eval_steps_per_second": 0.3, "step": 3515 }, { "epoch": 13.49, "learning_rate": 2.0383141762452107e-05, "loss": 0.0001, "step": 3520 }, { "epoch": 13.49, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7568941116333008, "eval_runtime": 74.9459, "eval_samples_per_second": 2.322, "eval_steps_per_second": 0.294, "step": 3520 }, { "epoch": 13.51, "learning_rate": 2.012771392081737e-05, "loss": 0.0, "step": 3525 }, { "epoch": 13.51, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7557793855667114, "eval_runtime": 72.5811, "eval_samples_per_second": 2.397, "eval_steps_per_second": 0.303, "step": 3525 }, { "epoch": 13.52, "learning_rate": 1.987228607918263e-05, "loss": 0.0001, "step": 3530 }, { "epoch": 13.52, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7556322813034058, "eval_runtime": 73.9541, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.297, "step": 3530 }, { "epoch": 13.54, "learning_rate": 1.9616858237547893e-05, "loss": 0.0001, "step": 3535 }, { "epoch": 13.54, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7552297115325928, "eval_runtime": 73.7388, "eval_samples_per_second": 2.36, "eval_steps_per_second": 0.298, "step": 3535 }, { "epoch": 13.56, "learning_rate": 1.9361430395913153e-05, "loss": 0.0, "step": 3540 }, { "epoch": 13.56, "eval_accuracy": 0.735632183908046, "eval_loss": 1.755204439163208, "eval_runtime": 74.1877, "eval_samples_per_second": 2.345, "eval_steps_per_second": 0.297, "step": 3540 }, { "epoch": 13.58, "learning_rate": 1.9106002554278417e-05, "loss": 0.0002, "step": 3545 }, { "epoch": 13.58, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7553986310958862, "eval_runtime": 73.9835, "eval_samples_per_second": 2.352, "eval_steps_per_second": 0.297, "step": 3545 }, { "epoch": 13.6, "learning_rate": 1.885057471264368e-05, "loss": 0.0001, "step": 3550 }, { "epoch": 13.6, "eval_accuracy": 0.735632183908046, "eval_loss": 1.755505084991455, "eval_runtime": 73.6282, "eval_samples_per_second": 2.363, "eval_steps_per_second": 0.299, "step": 3550 }, { "epoch": 13.62, "learning_rate": 1.8595146871008943e-05, "loss": 0.0001, "step": 3555 }, { "epoch": 13.62, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7556419372558594, "eval_runtime": 73.3262, "eval_samples_per_second": 2.373, "eval_steps_per_second": 0.3, "step": 3555 }, { "epoch": 13.64, "learning_rate": 1.8339719029374203e-05, "loss": 0.0001, "step": 3560 }, { "epoch": 13.64, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7553131580352783, "eval_runtime": 72.3509, "eval_samples_per_second": 2.405, "eval_steps_per_second": 0.304, "step": 3560 }, { "epoch": 13.66, "learning_rate": 1.8084291187739463e-05, "loss": 0.0002, "step": 3565 }, { "epoch": 13.66, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7552363872528076, "eval_runtime": 74.575, "eval_samples_per_second": 2.333, "eval_steps_per_second": 0.295, "step": 3565 }, { "epoch": 13.68, "learning_rate": 1.7828863346104727e-05, "loss": 0.0001, "step": 3570 }, { "epoch": 13.68, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7547740936279297, "eval_runtime": 74.1586, "eval_samples_per_second": 2.346, "eval_steps_per_second": 0.297, "step": 3570 }, { "epoch": 13.7, "learning_rate": 1.7573435504469987e-05, "loss": 0.0001, "step": 3575 }, { "epoch": 13.7, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7549880743026733, "eval_runtime": 73.4905, "eval_samples_per_second": 2.368, "eval_steps_per_second": 0.299, "step": 3575 }, { "epoch": 13.72, "learning_rate": 1.731800766283525e-05, "loss": 0.0, "step": 3580 }, { "epoch": 13.72, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7544682025909424, "eval_runtime": 72.5422, "eval_samples_per_second": 2.399, "eval_steps_per_second": 0.303, "step": 3580 }, { "epoch": 13.74, "learning_rate": 1.706257982120051e-05, "loss": 0.0001, "step": 3585 }, { "epoch": 13.74, "eval_accuracy": 0.735632183908046, "eval_loss": 1.755007028579712, "eval_runtime": 73.8715, "eval_samples_per_second": 2.355, "eval_steps_per_second": 0.298, "step": 3585 }, { "epoch": 13.75, "learning_rate": 1.6807151979565773e-05, "loss": 0.0568, "step": 3590 }, { "epoch": 13.75, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7550690174102783, "eval_runtime": 72.5531, "eval_samples_per_second": 2.398, "eval_steps_per_second": 0.303, "step": 3590 }, { "epoch": 13.77, "learning_rate": 1.6551724137931037e-05, "loss": 0.0001, "step": 3595 }, { "epoch": 13.77, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7550101280212402, "eval_runtime": 75.6777, "eval_samples_per_second": 2.299, "eval_steps_per_second": 0.291, "step": 3595 }, { "epoch": 13.79, "learning_rate": 1.62962962962963e-05, "loss": 0.0001, "step": 3600 }, { "epoch": 13.79, "eval_accuracy": 0.735632183908046, "eval_loss": 1.754870891571045, "eval_runtime": 72.8303, "eval_samples_per_second": 2.389, "eval_steps_per_second": 0.302, "step": 3600 }, { "epoch": 13.81, "learning_rate": 1.604086845466156e-05, "loss": 0.0001, "step": 3605 }, { "epoch": 13.81, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7544294595718384, "eval_runtime": 73.3275, "eval_samples_per_second": 2.373, "eval_steps_per_second": 0.3, "step": 3605 }, { "epoch": 13.83, "learning_rate": 1.578544061302682e-05, "loss": 0.0528, "step": 3610 }, { "epoch": 13.83, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7550995349884033, "eval_runtime": 71.3272, "eval_samples_per_second": 2.439, "eval_steps_per_second": 0.308, "step": 3610 }, { "epoch": 13.85, "learning_rate": 1.5530012771392083e-05, "loss": 0.0001, "step": 3615 }, { "epoch": 13.85, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7551816701889038, "eval_runtime": 71.892, "eval_samples_per_second": 2.42, "eval_steps_per_second": 0.306, "step": 3615 }, { "epoch": 13.87, "learning_rate": 1.5274584929757343e-05, "loss": 0.0001, "step": 3620 }, { "epoch": 13.87, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7544273138046265, "eval_runtime": 71.4145, "eval_samples_per_second": 2.436, "eval_steps_per_second": 0.308, "step": 3620 }, { "epoch": 13.89, "learning_rate": 1.5019157088122607e-05, "loss": 0.0001, "step": 3625 }, { "epoch": 13.89, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7543997764587402, "eval_runtime": 71.7733, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.307, "step": 3625 }, { "epoch": 13.91, "learning_rate": 1.4763729246487867e-05, "loss": 0.0001, "step": 3630 }, { "epoch": 13.91, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.753382682800293, "eval_runtime": 71.2431, "eval_samples_per_second": 2.442, "eval_steps_per_second": 0.309, "step": 3630 }, { "epoch": 13.93, "learning_rate": 1.450830140485313e-05, "loss": 0.0001, "step": 3635 }, { "epoch": 13.93, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7532877922058105, "eval_runtime": 71.7023, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 3635 }, { "epoch": 13.95, "learning_rate": 1.4252873563218392e-05, "loss": 0.0001, "step": 3640 }, { "epoch": 13.95, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7533704042434692, "eval_runtime": 71.9533, "eval_samples_per_second": 2.418, "eval_steps_per_second": 0.306, "step": 3640 }, { "epoch": 13.97, "learning_rate": 1.3997445721583655e-05, "loss": 0.0, "step": 3645 }, { "epoch": 13.97, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7539461851119995, "eval_runtime": 72.1079, "eval_samples_per_second": 2.413, "eval_steps_per_second": 0.305, "step": 3645 }, { "epoch": 13.98, "learning_rate": 1.3742017879948915e-05, "loss": 0.1209, "step": 3650 }, { "epoch": 13.98, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7514145374298096, "eval_runtime": 73.1219, "eval_samples_per_second": 2.38, "eval_steps_per_second": 0.301, "step": 3650 }, { "epoch": 14.0, "learning_rate": 1.3486590038314175e-05, "loss": 0.1395, "step": 3655 }, { "epoch": 14.0, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7531005144119263, "eval_runtime": 71.7851, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 3655 }, { "epoch": 14.02, "learning_rate": 1.3231162196679438e-05, "loss": 0.0037, "step": 3660 }, { "epoch": 14.02, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7534183263778687, "eval_runtime": 71.1617, "eval_samples_per_second": 2.445, "eval_steps_per_second": 0.309, "step": 3660 }, { "epoch": 14.04, "learning_rate": 1.29757343550447e-05, "loss": 0.0001, "step": 3665 }, { "epoch": 14.04, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7549954652786255, "eval_runtime": 71.701, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 3665 }, { "epoch": 14.06, "learning_rate": 1.2720306513409963e-05, "loss": 0.0001, "step": 3670 }, { "epoch": 14.06, "eval_accuracy": 0.735632183908046, "eval_loss": 1.754940390586853, "eval_runtime": 71.172, "eval_samples_per_second": 2.445, "eval_steps_per_second": 0.309, "step": 3670 }, { "epoch": 14.08, "learning_rate": 1.2464878671775223e-05, "loss": 0.0, "step": 3675 }, { "epoch": 14.08, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7556744813919067, "eval_runtime": 71.7906, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 3675 }, { "epoch": 14.1, "learning_rate": 1.2209450830140485e-05, "loss": 0.0001, "step": 3680 }, { "epoch": 14.1, "eval_accuracy": 0.735632183908046, "eval_loss": 1.755606770515442, "eval_runtime": 71.2401, "eval_samples_per_second": 2.442, "eval_steps_per_second": 0.309, "step": 3680 }, { "epoch": 14.12, "learning_rate": 1.1954022988505748e-05, "loss": 0.0001, "step": 3685 }, { "epoch": 14.12, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7560800313949585, "eval_runtime": 73.8699, "eval_samples_per_second": 2.355, "eval_steps_per_second": 0.298, "step": 3685 }, { "epoch": 14.14, "learning_rate": 1.169859514687101e-05, "loss": 0.0956, "step": 3690 }, { "epoch": 14.14, "eval_accuracy": 0.735632183908046, "eval_loss": 1.749796748161316, "eval_runtime": 72.7543, "eval_samples_per_second": 2.392, "eval_steps_per_second": 0.302, "step": 3690 }, { "epoch": 14.16, "learning_rate": 1.1443167305236272e-05, "loss": 0.0, "step": 3695 }, { "epoch": 14.16, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.745739459991455, "eval_runtime": 71.7312, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 3695 }, { "epoch": 14.18, "learning_rate": 1.1187739463601533e-05, "loss": 0.0001, "step": 3700 }, { "epoch": 14.18, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7437233924865723, "eval_runtime": 71.1505, "eval_samples_per_second": 2.446, "eval_steps_per_second": 0.309, "step": 3700 }, { "epoch": 14.2, "learning_rate": 1.0932311621966795e-05, "loss": 0.0001, "step": 3705 }, { "epoch": 14.2, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.742749571800232, "eval_runtime": 71.695, "eval_samples_per_second": 2.427, "eval_steps_per_second": 0.307, "step": 3705 }, { "epoch": 14.21, "learning_rate": 1.0676883780332057e-05, "loss": 0.0, "step": 3710 }, { "epoch": 14.21, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7426731586456299, "eval_runtime": 71.2028, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3710 }, { "epoch": 14.23, "learning_rate": 1.0421455938697318e-05, "loss": 0.0, "step": 3715 }, { "epoch": 14.23, "eval_accuracy": 0.7471264367816092, "eval_loss": 1.7424765825271606, "eval_runtime": 71.7529, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 3715 }, { "epoch": 14.25, "learning_rate": 1.016602809706258e-05, "loss": 0.0558, "step": 3720 }, { "epoch": 14.25, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7421131134033203, "eval_runtime": 71.1886, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3720 }, { "epoch": 14.27, "learning_rate": 9.910600255427842e-06, "loss": 0.0003, "step": 3725 }, { "epoch": 14.27, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7416549921035767, "eval_runtime": 71.8684, "eval_samples_per_second": 2.421, "eval_steps_per_second": 0.306, "step": 3725 }, { "epoch": 14.29, "learning_rate": 9.655172413793103e-06, "loss": 0.0001, "step": 3730 }, { "epoch": 14.29, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7416775226593018, "eval_runtime": 71.2129, "eval_samples_per_second": 2.443, "eval_steps_per_second": 0.309, "step": 3730 }, { "epoch": 14.31, "learning_rate": 9.399744572158365e-06, "loss": 0.0, "step": 3735 }, { "epoch": 14.31, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7417203187942505, "eval_runtime": 71.6762, "eval_samples_per_second": 2.428, "eval_steps_per_second": 0.307, "step": 3735 }, { "epoch": 14.33, "learning_rate": 9.144316730523628e-06, "loss": 0.0002, "step": 3740 }, { "epoch": 14.33, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7419242858886719, "eval_runtime": 71.2197, "eval_samples_per_second": 2.443, "eval_steps_per_second": 0.309, "step": 3740 }, { "epoch": 14.35, "learning_rate": 8.88888888888889e-06, "loss": 0.0078, "step": 3745 }, { "epoch": 14.35, "eval_accuracy": 0.7413793103448276, "eval_loss": 1.7439537048339844, "eval_runtime": 73.4872, "eval_samples_per_second": 2.368, "eval_steps_per_second": 0.299, "step": 3745 }, { "epoch": 14.37, "learning_rate": 8.633461047254152e-06, "loss": 0.0001, "step": 3750 }, { "epoch": 14.37, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7466919422149658, "eval_runtime": 72.9233, "eval_samples_per_second": 2.386, "eval_steps_per_second": 0.302, "step": 3750 }, { "epoch": 14.39, "learning_rate": 8.378033205619413e-06, "loss": 0.0009, "step": 3755 }, { "epoch": 14.39, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7497990131378174, "eval_runtime": 71.7246, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 3755 }, { "epoch": 14.41, "learning_rate": 8.122605363984675e-06, "loss": 0.0992, "step": 3760 }, { "epoch": 14.41, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7496720552444458, "eval_runtime": 71.1652, "eval_samples_per_second": 2.445, "eval_steps_per_second": 0.309, "step": 3760 }, { "epoch": 14.43, "learning_rate": 7.867177522349937e-06, "loss": 0.0004, "step": 3765 }, { "epoch": 14.43, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7489051818847656, "eval_runtime": 71.7293, "eval_samples_per_second": 2.426, "eval_steps_per_second": 0.307, "step": 3765 }, { "epoch": 14.44, "learning_rate": 7.611749680715198e-06, "loss": 0.0001, "step": 3770 }, { "epoch": 14.44, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7488287687301636, "eval_runtime": 71.1909, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3770 }, { "epoch": 14.46, "learning_rate": 7.35632183908046e-06, "loss": 0.0001, "step": 3775 }, { "epoch": 14.46, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7490508556365967, "eval_runtime": 71.7426, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 3775 }, { "epoch": 14.48, "learning_rate": 7.100893997445722e-06, "loss": 0.0001, "step": 3780 }, { "epoch": 14.48, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7489107847213745, "eval_runtime": 71.2131, "eval_samples_per_second": 2.443, "eval_steps_per_second": 0.309, "step": 3780 }, { "epoch": 14.5, "learning_rate": 6.845466155810984e-06, "loss": 0.0001, "step": 3785 }, { "epoch": 14.5, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7490137815475464, "eval_runtime": 71.8519, "eval_samples_per_second": 2.422, "eval_steps_per_second": 0.306, "step": 3785 }, { "epoch": 14.52, "learning_rate": 6.590038314176246e-06, "loss": 0.1097, "step": 3790 }, { "epoch": 14.52, "eval_accuracy": 0.735632183908046, "eval_loss": 1.750510573387146, "eval_runtime": 71.3059, "eval_samples_per_second": 2.44, "eval_steps_per_second": 0.309, "step": 3790 }, { "epoch": 14.54, "learning_rate": 6.3346104725415075e-06, "loss": 0.0001, "step": 3795 }, { "epoch": 14.54, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7523186206817627, "eval_runtime": 71.7857, "eval_samples_per_second": 2.424, "eval_steps_per_second": 0.306, "step": 3795 }, { "epoch": 14.56, "learning_rate": 6.079182630906769e-06, "loss": 0.0001, "step": 3800 }, { "epoch": 14.56, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7536077499389648, "eval_runtime": 73.2363, "eval_samples_per_second": 2.376, "eval_steps_per_second": 0.3, "step": 3800 }, { "epoch": 14.58, "learning_rate": 5.823754789272031e-06, "loss": 0.0001, "step": 3805 }, { "epoch": 14.58, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7539523839950562, "eval_runtime": 73.6071, "eval_samples_per_second": 2.364, "eval_steps_per_second": 0.299, "step": 3805 }, { "epoch": 14.6, "learning_rate": 5.568326947637293e-06, "loss": 0.0002, "step": 3810 }, { "epoch": 14.6, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7543648481369019, "eval_runtime": 72.9966, "eval_samples_per_second": 2.384, "eval_steps_per_second": 0.301, "step": 3810 }, { "epoch": 14.62, "learning_rate": 5.312899106002554e-06, "loss": 0.0001, "step": 3815 }, { "epoch": 14.62, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7542980909347534, "eval_runtime": 71.752, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 3815 }, { "epoch": 14.64, "learning_rate": 5.057471264367817e-06, "loss": 0.0001, "step": 3820 }, { "epoch": 14.64, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7545336484909058, "eval_runtime": 71.8754, "eval_samples_per_second": 2.421, "eval_steps_per_second": 0.306, "step": 3820 }, { "epoch": 14.66, "learning_rate": 4.802043422733078e-06, "loss": 0.0002, "step": 3825 }, { "epoch": 14.66, "eval_accuracy": 0.735632183908046, "eval_loss": 1.754623293876648, "eval_runtime": 71.767, "eval_samples_per_second": 2.425, "eval_steps_per_second": 0.307, "step": 3825 }, { "epoch": 14.67, "learning_rate": 4.54661558109834e-06, "loss": 0.0001, "step": 3830 }, { "epoch": 14.67, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7546714544296265, "eval_runtime": 73.0008, "eval_samples_per_second": 2.384, "eval_steps_per_second": 0.301, "step": 3830 }, { "epoch": 14.69, "learning_rate": 4.291187739463602e-06, "loss": 0.0001, "step": 3835 }, { "epoch": 14.69, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7549469470977783, "eval_runtime": 71.8595, "eval_samples_per_second": 2.421, "eval_steps_per_second": 0.306, "step": 3835 }, { "epoch": 14.71, "learning_rate": 4.035759897828863e-06, "loss": 0.0001, "step": 3840 }, { "epoch": 14.71, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7555478811264038, "eval_runtime": 71.1999, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.309, "step": 3840 }, { "epoch": 14.73, "learning_rate": 3.7803320561941254e-06, "loss": 0.0284, "step": 3845 }, { "epoch": 14.73, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7555177211761475, "eval_runtime": 73.5037, "eval_samples_per_second": 2.367, "eval_steps_per_second": 0.299, "step": 3845 }, { "epoch": 14.75, "learning_rate": 3.5249042145593875e-06, "loss": 0.0, "step": 3850 }, { "epoch": 14.75, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7556744813919067, "eval_runtime": 71.2208, "eval_samples_per_second": 2.443, "eval_steps_per_second": 0.309, "step": 3850 }, { "epoch": 14.77, "learning_rate": 3.269476372924649e-06, "loss": 0.0001, "step": 3855 }, { "epoch": 14.77, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7554370164871216, "eval_runtime": 75.3145, "eval_samples_per_second": 2.31, "eval_steps_per_second": 0.292, "step": 3855 }, { "epoch": 14.79, "learning_rate": 3.014048531289911e-06, "loss": 0.0, "step": 3860 }, { "epoch": 14.79, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7560198307037354, "eval_runtime": 74.6084, "eval_samples_per_second": 2.332, "eval_steps_per_second": 0.295, "step": 3860 }, { "epoch": 14.81, "learning_rate": 2.7586206896551725e-06, "loss": 0.0001, "step": 3865 }, { "epoch": 14.81, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7560211420059204, "eval_runtime": 73.5059, "eval_samples_per_second": 2.367, "eval_steps_per_second": 0.299, "step": 3865 }, { "epoch": 14.83, "learning_rate": 2.5031928480204346e-06, "loss": 0.0001, "step": 3870 }, { "epoch": 14.83, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7558296918869019, "eval_runtime": 72.5044, "eval_samples_per_second": 2.4, "eval_steps_per_second": 0.303, "step": 3870 }, { "epoch": 14.85, "learning_rate": 2.2477650063856962e-06, "loss": 0.0001, "step": 3875 }, { "epoch": 14.85, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7559691667556763, "eval_runtime": 74.4471, "eval_samples_per_second": 2.337, "eval_steps_per_second": 0.296, "step": 3875 }, { "epoch": 14.87, "learning_rate": 1.992337164750958e-06, "loss": 0.0002, "step": 3880 }, { "epoch": 14.87, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7558660507202148, "eval_runtime": 72.5388, "eval_samples_per_second": 2.399, "eval_steps_per_second": 0.303, "step": 3880 }, { "epoch": 14.89, "learning_rate": 1.7369093231162196e-06, "loss": 0.0, "step": 3885 }, { "epoch": 14.89, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7562229633331299, "eval_runtime": 72.5804, "eval_samples_per_second": 2.397, "eval_steps_per_second": 0.303, "step": 3885 }, { "epoch": 14.9, "learning_rate": 1.4814814814814817e-06, "loss": 0.0001, "step": 3890 }, { "epoch": 14.9, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7563174962997437, "eval_runtime": 74.0885, "eval_samples_per_second": 2.349, "eval_steps_per_second": 0.297, "step": 3890 }, { "epoch": 14.92, "learning_rate": 1.2260536398467433e-06, "loss": 0.0, "step": 3895 }, { "epoch": 14.92, "eval_accuracy": 0.735632183908046, "eval_loss": 1.756478190422058, "eval_runtime": 76.613, "eval_samples_per_second": 2.271, "eval_steps_per_second": 0.287, "step": 3895 }, { "epoch": 14.94, "learning_rate": 9.706257982120052e-07, "loss": 0.0, "step": 3900 }, { "epoch": 14.94, "eval_accuracy": 0.735632183908046, "eval_loss": 1.756520390510559, "eval_runtime": 72.5435, "eval_samples_per_second": 2.399, "eval_steps_per_second": 0.303, "step": 3900 }, { "epoch": 14.96, "learning_rate": 7.15197956577267e-07, "loss": 0.0001, "step": 3905 }, { "epoch": 14.96, "eval_accuracy": 0.735632183908046, "eval_loss": 1.7564539909362793, "eval_runtime": 74.574, "eval_samples_per_second": 2.333, "eval_steps_per_second": 0.295, "step": 3905 }, { "epoch": 14.98, "learning_rate": 4.5977011494252875e-07, "loss": 0.1303, "step": 3910 }, { "epoch": 14.98, "eval_accuracy": 0.735632183908046, "eval_loss": 1.756199598312378, "eval_runtime": 73.9195, "eval_samples_per_second": 2.354, "eval_steps_per_second": 0.298, "step": 3910 }, { "epoch": 15.0, "learning_rate": 2.0434227330779057e-07, "loss": 0.0001, "step": 3915 }, { "epoch": 15.0, "eval_accuracy": 0.735632183908046, "eval_loss": 1.755522608757019, "eval_runtime": 73.61, "eval_samples_per_second": 2.364, "eval_steps_per_second": 0.299, "step": 3915 }, { "epoch": 15.0, "step": 3915, "total_flos": 1.592360968692695e+18, "train_loss": 0.5163871185302369, "train_runtime": 118001.491, "train_samples_per_second": 0.199, "train_steps_per_second": 0.033 } ], "logging_steps": 5, "max_steps": 3915, "num_train_epochs": 15, "save_steps": 5, "total_flos": 1.592360968692695e+18, "trial_name": null, "trial_params": null }