{ "best_metric": null, "best_model_checkpoint": null, "epoch": 194.81998395783202, "global_step": 850000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 5.7300022920009174e-08, "loss": 1.5169, "step": 500 }, { "epoch": 0.11, "eval_loss": 1.2482632398605347, "eval_runtime": 8.6938, "eval_samples_per_second": 539.808, "eval_steps_per_second": 67.519, "step": 500 }, { "epoch": 0.23, "learning_rate": 1.1460004584001835e-07, "loss": 1.5027, "step": 1000 }, { "epoch": 0.23, "eval_loss": 1.24774169921875, "eval_runtime": 8.695, "eval_samples_per_second": 539.736, "eval_steps_per_second": 67.51, "step": 1000 }, { "epoch": 0.34, "learning_rate": 1.719000687600275e-07, "loss": 1.4826, "step": 1500 }, { "epoch": 0.34, "eval_loss": 1.2445777654647827, "eval_runtime": 8.6971, "eval_samples_per_second": 539.605, "eval_steps_per_second": 67.494, "step": 1500 }, { "epoch": 0.46, "learning_rate": 2.292000916800367e-07, "loss": 1.461, "step": 2000 }, { "epoch": 0.46, "eval_loss": 1.2269392013549805, "eval_runtime": 8.6967, "eval_samples_per_second": 539.627, "eval_steps_per_second": 67.497, "step": 2000 }, { "epoch": 0.57, "learning_rate": 2.865001146000459e-07, "loss": 1.4457, "step": 2500 }, { "epoch": 0.57, "eval_loss": 1.2174636125564575, "eval_runtime": 8.6891, "eval_samples_per_second": 540.104, "eval_steps_per_second": 67.556, "step": 2500 }, { "epoch": 0.69, "learning_rate": 3.43800137520055e-07, "loss": 1.4261, "step": 3000 }, { "epoch": 0.69, "eval_loss": 1.214718222618103, "eval_runtime": 8.6907, "eval_samples_per_second": 540.003, "eval_steps_per_second": 67.544, "step": 3000 }, { "epoch": 0.8, "learning_rate": 4.0110016044006417e-07, "loss": 1.4022, "step": 3500 }, { "epoch": 0.8, "eval_loss": 1.225386619567871, "eval_runtime": 8.695, "eval_samples_per_second": 539.733, "eval_steps_per_second": 67.51, "step": 3500 }, { "epoch": 0.92, "learning_rate": 4.584001833600734e-07, "loss": 1.3926, "step": 4000 }, { "epoch": 0.92, "eval_loss": 1.215023159980774, "eval_runtime": 8.6977, "eval_samples_per_second": 539.567, "eval_steps_per_second": 67.489, "step": 4000 }, { "epoch": 1.03, "learning_rate": 5.157002062800826e-07, "loss": 1.3778, "step": 4500 }, { "epoch": 1.03, "eval_loss": 1.218368411064148, "eval_runtime": 8.7052, "eval_samples_per_second": 539.102, "eval_steps_per_second": 67.431, "step": 4500 }, { "epoch": 1.15, "learning_rate": 5.730002292000918e-07, "loss": 1.3681, "step": 5000 }, { "epoch": 1.15, "eval_loss": 1.2097276449203491, "eval_runtime": 8.697, "eval_samples_per_second": 539.61, "eval_steps_per_second": 67.494, "step": 5000 }, { "epoch": 1.26, "learning_rate": 6.303002521201009e-07, "loss": 1.3498, "step": 5500 }, { "epoch": 1.26, "eval_loss": 1.2108970880508423, "eval_runtime": 8.6963, "eval_samples_per_second": 539.655, "eval_steps_per_second": 67.5, "step": 5500 }, { "epoch": 1.38, "learning_rate": 6.8760027504011e-07, "loss": 1.3566, "step": 6000 }, { "epoch": 1.38, "eval_loss": 1.2149543762207031, "eval_runtime": 8.7047, "eval_samples_per_second": 539.136, "eval_steps_per_second": 67.435, "step": 6000 }, { "epoch": 1.49, "learning_rate": 7.449002979601192e-07, "loss": 1.3452, "step": 6500 }, { "epoch": 1.49, "eval_loss": 1.2115674018859863, "eval_runtime": 8.6983, "eval_samples_per_second": 539.529, "eval_steps_per_second": 67.484, "step": 6500 }, { "epoch": 1.6, "learning_rate": 8.022003208801283e-07, "loss": 1.3397, "step": 7000 }, { "epoch": 1.6, "eval_loss": 1.202462077140808, "eval_runtime": 8.6985, "eval_samples_per_second": 539.52, "eval_steps_per_second": 67.483, "step": 7000 }, { "epoch": 1.72, "learning_rate": 8.595003438001376e-07, "loss": 1.3445, "step": 7500 }, { "epoch": 1.72, "eval_loss": 1.2136445045471191, "eval_runtime": 8.6953, "eval_samples_per_second": 539.715, "eval_steps_per_second": 67.508, "step": 7500 }, { "epoch": 1.83, "learning_rate": 9.168003667201468e-07, "loss": 1.3262, "step": 8000 }, { "epoch": 1.83, "eval_loss": 1.209503412246704, "eval_runtime": 8.7101, "eval_samples_per_second": 538.799, "eval_steps_per_second": 67.393, "step": 8000 }, { "epoch": 1.95, "learning_rate": 9.74100389640156e-07, "loss": 1.3212, "step": 8500 }, { "epoch": 1.95, "eval_loss": 1.2041652202606201, "eval_runtime": 8.6938, "eval_samples_per_second": 539.811, "eval_steps_per_second": 67.52, "step": 8500 }, { "epoch": 2.06, "learning_rate": 1.0314004125601651e-06, "loss": 1.3267, "step": 9000 }, { "epoch": 2.06, "eval_loss": 1.2057585716247559, "eval_runtime": 8.6967, "eval_samples_per_second": 539.632, "eval_steps_per_second": 67.497, "step": 9000 }, { "epoch": 2.18, "learning_rate": 1.0887004354801743e-06, "loss": 1.3146, "step": 9500 }, { "epoch": 2.18, "eval_loss": 1.2002052068710327, "eval_runtime": 8.697, "eval_samples_per_second": 539.613, "eval_steps_per_second": 67.495, "step": 9500 }, { "epoch": 2.29, "learning_rate": 1.1460004584001836e-06, "loss": 1.3145, "step": 10000 }, { "epoch": 2.29, "eval_loss": 1.2283295392990112, "eval_runtime": 8.7031, "eval_samples_per_second": 539.233, "eval_steps_per_second": 67.447, "step": 10000 }, { "epoch": 2.41, "learning_rate": 1.2033004813201926e-06, "loss": 1.3125, "step": 10500 }, { "epoch": 2.41, "eval_loss": 1.211876630783081, "eval_runtime": 8.6944, "eval_samples_per_second": 539.772, "eval_steps_per_second": 67.515, "step": 10500 }, { "epoch": 2.52, "learning_rate": 1.2606005042402018e-06, "loss": 1.3064, "step": 11000 }, { "epoch": 2.52, "eval_loss": 1.1980589628219604, "eval_runtime": 8.6949, "eval_samples_per_second": 539.745, "eval_steps_per_second": 67.511, "step": 11000 }, { "epoch": 2.64, "learning_rate": 1.317900527160211e-06, "loss": 1.2956, "step": 11500 }, { "epoch": 2.64, "eval_loss": 1.2005902528762817, "eval_runtime": 8.742, "eval_samples_per_second": 536.837, "eval_steps_per_second": 67.147, "step": 11500 }, { "epoch": 2.75, "learning_rate": 1.37520055008022e-06, "loss": 1.3025, "step": 12000 }, { "epoch": 2.75, "eval_loss": 1.1946187019348145, "eval_runtime": 8.7521, "eval_samples_per_second": 536.213, "eval_steps_per_second": 67.07, "step": 12000 }, { "epoch": 2.86, "learning_rate": 1.4325005730002292e-06, "loss": 1.2992, "step": 12500 }, { "epoch": 2.86, "eval_loss": 1.204544186592102, "eval_runtime": 8.7328, "eval_samples_per_second": 537.402, "eval_steps_per_second": 67.218, "step": 12500 }, { "epoch": 2.98, "learning_rate": 1.4898005959202385e-06, "loss": 1.2961, "step": 13000 }, { "epoch": 2.98, "eval_loss": 1.1969064474105835, "eval_runtime": 8.7382, "eval_samples_per_second": 537.068, "eval_steps_per_second": 67.176, "step": 13000 }, { "epoch": 3.09, "learning_rate": 1.5471006188402475e-06, "loss": 1.2884, "step": 13500 }, { "epoch": 3.09, "eval_loss": 1.217844843864441, "eval_runtime": 8.7388, "eval_samples_per_second": 537.028, "eval_steps_per_second": 67.171, "step": 13500 }, { "epoch": 3.21, "learning_rate": 1.6044006417602567e-06, "loss": 1.295, "step": 14000 }, { "epoch": 3.21, "eval_loss": 1.1969348192214966, "eval_runtime": 8.7409, "eval_samples_per_second": 536.901, "eval_steps_per_second": 67.156, "step": 14000 }, { "epoch": 3.32, "learning_rate": 1.6617006646802661e-06, "loss": 1.281, "step": 14500 }, { "epoch": 3.32, "eval_loss": 1.1996593475341797, "eval_runtime": 8.7549, "eval_samples_per_second": 536.042, "eval_steps_per_second": 67.048, "step": 14500 }, { "epoch": 3.44, "learning_rate": 1.7190006876002751e-06, "loss": 1.2828, "step": 15000 }, { "epoch": 3.44, "eval_loss": 1.1959216594696045, "eval_runtime": 8.7534, "eval_samples_per_second": 536.132, "eval_steps_per_second": 67.059, "step": 15000 }, { "epoch": 3.55, "learning_rate": 1.7763007105202846e-06, "loss": 1.2855, "step": 15500 }, { "epoch": 3.55, "eval_loss": 1.199737310409546, "eval_runtime": 8.7011, "eval_samples_per_second": 539.357, "eval_steps_per_second": 67.463, "step": 15500 }, { "epoch": 3.67, "learning_rate": 1.8336007334402936e-06, "loss": 1.2769, "step": 16000 }, { "epoch": 3.67, "eval_loss": 1.1873365640640259, "eval_runtime": 8.7088, "eval_samples_per_second": 538.881, "eval_steps_per_second": 67.403, "step": 16000 }, { "epoch": 3.78, "learning_rate": 1.8909007563603026e-06, "loss": 1.2794, "step": 16500 }, { "epoch": 3.78, "eval_loss": 1.2020219564437866, "eval_runtime": 8.6933, "eval_samples_per_second": 539.844, "eval_steps_per_second": 67.524, "step": 16500 }, { "epoch": 3.9, "learning_rate": 1.948200779280312e-06, "loss": 1.2731, "step": 17000 }, { "epoch": 3.9, "eval_loss": 1.1952511072158813, "eval_runtime": 8.6959, "eval_samples_per_second": 539.682, "eval_steps_per_second": 67.503, "step": 17000 }, { "epoch": 4.01, "learning_rate": 2.005500802200321e-06, "loss": 1.2759, "step": 17500 }, { "epoch": 4.01, "eval_loss": 1.2007830142974854, "eval_runtime": 8.6958, "eval_samples_per_second": 539.686, "eval_steps_per_second": 67.504, "step": 17500 }, { "epoch": 4.13, "learning_rate": 2.0628008251203302e-06, "loss": 1.2733, "step": 18000 }, { "epoch": 4.13, "eval_loss": 1.2060155868530273, "eval_runtime": 8.6984, "eval_samples_per_second": 539.522, "eval_steps_per_second": 67.483, "step": 18000 }, { "epoch": 4.24, "learning_rate": 2.120100848040339e-06, "loss": 1.2722, "step": 18500 }, { "epoch": 4.24, "eval_loss": 1.188662052154541, "eval_runtime": 8.6913, "eval_samples_per_second": 539.965, "eval_steps_per_second": 67.539, "step": 18500 }, { "epoch": 4.35, "learning_rate": 2.1774008709603487e-06, "loss": 1.2647, "step": 19000 }, { "epoch": 4.35, "eval_loss": 1.1955726146697998, "eval_runtime": 8.7004, "eval_samples_per_second": 539.402, "eval_steps_per_second": 67.468, "step": 19000 }, { "epoch": 4.47, "learning_rate": 2.2347008938803575e-06, "loss": 1.2645, "step": 19500 }, { "epoch": 4.47, "eval_loss": 1.183967113494873, "eval_runtime": 8.6917, "eval_samples_per_second": 539.943, "eval_steps_per_second": 67.536, "step": 19500 }, { "epoch": 4.58, "learning_rate": 2.292000916800367e-06, "loss": 1.2614, "step": 20000 }, { "epoch": 4.58, "eval_loss": 1.1970974206924438, "eval_runtime": 8.6949, "eval_samples_per_second": 539.742, "eval_steps_per_second": 67.511, "step": 20000 }, { "epoch": 4.7, "learning_rate": 2.349300939720376e-06, "loss": 1.2556, "step": 20500 }, { "epoch": 4.7, "eval_loss": 1.196185827255249, "eval_runtime": 8.6968, "eval_samples_per_second": 539.624, "eval_steps_per_second": 67.496, "step": 20500 }, { "epoch": 4.81, "learning_rate": 2.406600962640385e-06, "loss": 1.2636, "step": 21000 }, { "epoch": 4.81, "eval_loss": 1.190032958984375, "eval_runtime": 8.6956, "eval_samples_per_second": 539.698, "eval_steps_per_second": 67.505, "step": 21000 }, { "epoch": 4.93, "learning_rate": 2.4639009855603944e-06, "loss": 1.2658, "step": 21500 }, { "epoch": 4.93, "eval_loss": 1.1715595722198486, "eval_runtime": 8.6983, "eval_samples_per_second": 539.528, "eval_steps_per_second": 67.484, "step": 21500 }, { "epoch": 5.04, "learning_rate": 2.5212010084804036e-06, "loss": 1.2564, "step": 22000 }, { "epoch": 5.04, "eval_loss": 1.195684790611267, "eval_runtime": 8.6969, "eval_samples_per_second": 539.618, "eval_steps_per_second": 67.495, "step": 22000 }, { "epoch": 5.16, "learning_rate": 2.578501031400413e-06, "loss": 1.2529, "step": 22500 }, { "epoch": 5.16, "eval_loss": 1.1944663524627686, "eval_runtime": 8.6861, "eval_samples_per_second": 540.29, "eval_steps_per_second": 67.579, "step": 22500 }, { "epoch": 5.27, "learning_rate": 2.635801054320422e-06, "loss": 1.2535, "step": 23000 }, { "epoch": 5.27, "eval_loss": 1.1917227506637573, "eval_runtime": 8.6944, "eval_samples_per_second": 539.771, "eval_steps_per_second": 67.515, "step": 23000 }, { "epoch": 5.39, "learning_rate": 2.693101077240431e-06, "loss": 1.2511, "step": 23500 }, { "epoch": 5.39, "eval_loss": 1.1999666690826416, "eval_runtime": 8.6929, "eval_samples_per_second": 539.868, "eval_steps_per_second": 67.527, "step": 23500 }, { "epoch": 5.5, "learning_rate": 2.75040110016044e-06, "loss": 1.2479, "step": 24000 }, { "epoch": 5.5, "eval_loss": 1.1955519914627075, "eval_runtime": 8.6953, "eval_samples_per_second": 539.72, "eval_steps_per_second": 67.508, "step": 24000 }, { "epoch": 5.62, "learning_rate": 2.8077011230804493e-06, "loss": 1.2594, "step": 24500 }, { "epoch": 5.62, "eval_loss": 1.178593397140503, "eval_runtime": 8.6879, "eval_samples_per_second": 540.176, "eval_steps_per_second": 67.565, "step": 24500 }, { "epoch": 5.73, "learning_rate": 2.8650011460004585e-06, "loss": 1.2471, "step": 25000 }, { "epoch": 5.73, "eval_loss": 1.2001216411590576, "eval_runtime": 8.8695, "eval_samples_per_second": 529.116, "eval_steps_per_second": 66.182, "step": 25000 }, { "epoch": 5.84, "learning_rate": 2.9223011689204677e-06, "loss": 1.2407, "step": 25500 }, { "epoch": 5.84, "eval_loss": 1.2118862867355347, "eval_runtime": 8.7349, "eval_samples_per_second": 537.272, "eval_steps_per_second": 67.202, "step": 25500 }, { "epoch": 5.96, "learning_rate": 2.979601191840477e-06, "loss": 1.2411, "step": 26000 }, { "epoch": 5.96, "eval_loss": 1.1993587017059326, "eval_runtime": 8.7415, "eval_samples_per_second": 536.867, "eval_steps_per_second": 67.151, "step": 26000 }, { "epoch": 6.07, "learning_rate": 3.036901214760486e-06, "loss": 1.247, "step": 26500 }, { "epoch": 6.07, "eval_loss": 1.177435040473938, "eval_runtime": 8.7415, "eval_samples_per_second": 536.862, "eval_steps_per_second": 67.151, "step": 26500 }, { "epoch": 6.19, "learning_rate": 3.094201237680495e-06, "loss": 1.2446, "step": 27000 }, { "epoch": 6.19, "eval_loss": 1.2069361209869385, "eval_runtime": 8.7366, "eval_samples_per_second": 537.164, "eval_steps_per_second": 67.188, "step": 27000 }, { "epoch": 6.3, "learning_rate": 3.151501260600504e-06, "loss": 1.2252, "step": 27500 }, { "epoch": 6.3, "eval_loss": 1.1768550872802734, "eval_runtime": 8.7575, "eval_samples_per_second": 535.885, "eval_steps_per_second": 67.028, "step": 27500 }, { "epoch": 6.42, "learning_rate": 3.2088012835205134e-06, "loss": 1.2409, "step": 28000 }, { "epoch": 6.42, "eval_loss": 1.2138729095458984, "eval_runtime": 8.7381, "eval_samples_per_second": 537.074, "eval_steps_per_second": 67.177, "step": 28000 }, { "epoch": 6.53, "learning_rate": 3.266101306440523e-06, "loss": 1.2359, "step": 28500 }, { "epoch": 6.53, "eval_loss": 1.1923925876617432, "eval_runtime": 8.7309, "eval_samples_per_second": 537.516, "eval_steps_per_second": 67.232, "step": 28500 }, { "epoch": 6.65, "learning_rate": 3.3234013293605322e-06, "loss": 1.235, "step": 29000 }, { "epoch": 6.65, "eval_loss": 1.1961288452148438, "eval_runtime": 8.7303, "eval_samples_per_second": 537.551, "eval_steps_per_second": 67.237, "step": 29000 }, { "epoch": 6.76, "learning_rate": 3.380701352280541e-06, "loss": 1.2338, "step": 29500 }, { "epoch": 6.76, "eval_loss": 1.1852829456329346, "eval_runtime": 8.7265, "eval_samples_per_second": 537.785, "eval_steps_per_second": 67.266, "step": 29500 }, { "epoch": 6.88, "learning_rate": 3.4380013752005503e-06, "loss": 1.2237, "step": 30000 }, { "epoch": 6.88, "eval_loss": 1.2016907930374146, "eval_runtime": 8.7244, "eval_samples_per_second": 537.916, "eval_steps_per_second": 67.282, "step": 30000 }, { "epoch": 6.99, "learning_rate": 3.4953013981205595e-06, "loss": 1.2314, "step": 30500 }, { "epoch": 6.99, "eval_loss": 1.1824744939804077, "eval_runtime": 8.7288, "eval_samples_per_second": 537.644, "eval_steps_per_second": 67.248, "step": 30500 }, { "epoch": 7.11, "learning_rate": 3.552601421040569e-06, "loss": 1.2307, "step": 31000 }, { "epoch": 7.11, "eval_loss": 1.194525122642517, "eval_runtime": 8.7538, "eval_samples_per_second": 536.111, "eval_steps_per_second": 67.057, "step": 31000 }, { "epoch": 7.22, "learning_rate": 3.6099014439605775e-06, "loss": 1.2276, "step": 31500 }, { "epoch": 7.22, "eval_loss": 1.1899327039718628, "eval_runtime": 8.7298, "eval_samples_per_second": 537.582, "eval_steps_per_second": 67.241, "step": 31500 }, { "epoch": 7.33, "learning_rate": 3.667201466880587e-06, "loss": 1.2292, "step": 32000 }, { "epoch": 7.33, "eval_loss": 1.1973289251327515, "eval_runtime": 8.7654, "eval_samples_per_second": 535.399, "eval_steps_per_second": 66.968, "step": 32000 }, { "epoch": 7.45, "learning_rate": 3.7245014898005964e-06, "loss": 1.223, "step": 32500 }, { "epoch": 7.45, "eval_loss": 1.180310845375061, "eval_runtime": 8.6879, "eval_samples_per_second": 540.174, "eval_steps_per_second": 67.565, "step": 32500 }, { "epoch": 7.56, "learning_rate": 3.781801512720605e-06, "loss": 1.2254, "step": 33000 }, { "epoch": 7.56, "eval_loss": 1.1896222829818726, "eval_runtime": 8.738, "eval_samples_per_second": 537.08, "eval_steps_per_second": 67.178, "step": 33000 }, { "epoch": 7.68, "learning_rate": 3.839101535640614e-06, "loss": 1.2294, "step": 33500 }, { "epoch": 7.68, "eval_loss": 1.1893203258514404, "eval_runtime": 8.7398, "eval_samples_per_second": 536.97, "eval_steps_per_second": 67.164, "step": 33500 }, { "epoch": 7.79, "learning_rate": 3.896401558560624e-06, "loss": 1.2207, "step": 34000 }, { "epoch": 7.79, "eval_loss": 1.1965256929397583, "eval_runtime": 8.7429, "eval_samples_per_second": 536.779, "eval_steps_per_second": 67.14, "step": 34000 }, { "epoch": 7.91, "learning_rate": 3.953701581480633e-06, "loss": 1.2207, "step": 34500 }, { "epoch": 7.91, "eval_loss": 1.1925592422485352, "eval_runtime": 8.7394, "eval_samples_per_second": 536.992, "eval_steps_per_second": 67.167, "step": 34500 }, { "epoch": 8.02, "learning_rate": 4.011001604400642e-06, "loss": 1.2234, "step": 35000 }, { "epoch": 8.02, "eval_loss": 1.1854848861694336, "eval_runtime": 8.7558, "eval_samples_per_second": 535.99, "eval_steps_per_second": 67.042, "step": 35000 }, { "epoch": 8.14, "learning_rate": 4.068301627320651e-06, "loss": 1.2204, "step": 35500 }, { "epoch": 8.14, "eval_loss": 1.201168179512024, "eval_runtime": 8.7642, "eval_samples_per_second": 535.476, "eval_steps_per_second": 66.977, "step": 35500 }, { "epoch": 8.25, "learning_rate": 4.1256016502406605e-06, "loss": 1.2154, "step": 36000 }, { "epoch": 8.25, "eval_loss": 1.1880018711090088, "eval_runtime": 8.7536, "eval_samples_per_second": 536.121, "eval_steps_per_second": 67.058, "step": 36000 }, { "epoch": 8.37, "learning_rate": 4.18290167316067e-06, "loss": 1.2159, "step": 36500 }, { "epoch": 8.37, "eval_loss": 1.1985067129135132, "eval_runtime": 8.8912, "eval_samples_per_second": 527.826, "eval_steps_per_second": 66.02, "step": 36500 }, { "epoch": 8.48, "learning_rate": 4.240201696080678e-06, "loss": 1.215, "step": 37000 }, { "epoch": 8.48, "eval_loss": 1.192816138267517, "eval_runtime": 8.6855, "eval_samples_per_second": 540.329, "eval_steps_per_second": 67.584, "step": 37000 }, { "epoch": 8.59, "learning_rate": 4.297501719000688e-06, "loss": 1.2178, "step": 37500 }, { "epoch": 8.59, "eval_loss": 1.194384217262268, "eval_runtime": 8.6817, "eval_samples_per_second": 540.563, "eval_steps_per_second": 67.614, "step": 37500 }, { "epoch": 8.71, "learning_rate": 4.354801741920697e-06, "loss": 1.213, "step": 38000 }, { "epoch": 8.71, "eval_loss": 1.1961228847503662, "eval_runtime": 8.6827, "eval_samples_per_second": 540.5, "eval_steps_per_second": 67.606, "step": 38000 }, { "epoch": 8.82, "learning_rate": 4.412101764840706e-06, "loss": 1.2129, "step": 38500 }, { "epoch": 8.82, "eval_loss": 1.1909756660461426, "eval_runtime": 8.6827, "eval_samples_per_second": 540.502, "eval_steps_per_second": 67.606, "step": 38500 }, { "epoch": 8.94, "learning_rate": 4.469401787760715e-06, "loss": 1.2135, "step": 39000 }, { "epoch": 8.94, "eval_loss": 1.183637022972107, "eval_runtime": 9.0901, "eval_samples_per_second": 516.275, "eval_steps_per_second": 64.576, "step": 39000 }, { "epoch": 9.05, "learning_rate": 4.526701810680725e-06, "loss": 1.213, "step": 39500 }, { "epoch": 9.05, "eval_loss": 1.189724087715149, "eval_runtime": 8.7372, "eval_samples_per_second": 537.13, "eval_steps_per_second": 67.184, "step": 39500 }, { "epoch": 9.17, "learning_rate": 4.584001833600734e-06, "loss": 1.2018, "step": 40000 }, { "epoch": 9.17, "eval_loss": 1.1912376880645752, "eval_runtime": 8.7603, "eval_samples_per_second": 535.712, "eval_steps_per_second": 67.007, "step": 40000 }, { "epoch": 9.28, "learning_rate": 4.641301856520742e-06, "loss": 1.2089, "step": 40500 }, { "epoch": 9.28, "eval_loss": 1.18687903881073, "eval_runtime": 8.7342, "eval_samples_per_second": 537.312, "eval_steps_per_second": 67.207, "step": 40500 }, { "epoch": 9.4, "learning_rate": 4.698601879440752e-06, "loss": 1.2154, "step": 41000 }, { "epoch": 9.4, "eval_loss": 1.1724615097045898, "eval_runtime": 8.7348, "eval_samples_per_second": 537.275, "eval_steps_per_second": 67.202, "step": 41000 }, { "epoch": 9.51, "learning_rate": 4.7559019023607615e-06, "loss": 1.2109, "step": 41500 }, { "epoch": 9.51, "eval_loss": 1.184662938117981, "eval_runtime": 8.7423, "eval_samples_per_second": 536.814, "eval_steps_per_second": 67.145, "step": 41500 }, { "epoch": 9.63, "learning_rate": 4.81320192528077e-06, "loss": 1.2049, "step": 42000 }, { "epoch": 9.63, "eval_loss": 1.2051905393600464, "eval_runtime": 8.6737, "eval_samples_per_second": 541.059, "eval_steps_per_second": 67.676, "step": 42000 }, { "epoch": 9.74, "learning_rate": 4.870501948200779e-06, "loss": 1.2089, "step": 42500 }, { "epoch": 9.74, "eval_loss": 1.1915042400360107, "eval_runtime": 8.6716, "eval_samples_per_second": 541.194, "eval_steps_per_second": 67.693, "step": 42500 }, { "epoch": 9.86, "learning_rate": 4.927801971120789e-06, "loss": 1.2047, "step": 43000 }, { "epoch": 9.86, "eval_loss": 1.1896870136260986, "eval_runtime": 8.675, "eval_samples_per_second": 540.982, "eval_steps_per_second": 67.666, "step": 43000 }, { "epoch": 9.97, "learning_rate": 4.985101994040798e-06, "loss": 1.1949, "step": 43500 }, { "epoch": 9.97, "eval_loss": 1.1929206848144531, "eval_runtime": 8.6738, "eval_samples_per_second": 541.055, "eval_steps_per_second": 67.675, "step": 43500 }, { "epoch": 10.08, "learning_rate": 5.042402016960807e-06, "loss": 1.198, "step": 44000 }, { "epoch": 10.08, "eval_loss": 1.2023546695709229, "eval_runtime": 8.6717, "eval_samples_per_second": 541.189, "eval_steps_per_second": 67.692, "step": 44000 }, { "epoch": 10.2, "learning_rate": 5.099702039880816e-06, "loss": 1.1969, "step": 44500 }, { "epoch": 10.2, "eval_loss": 1.1880295276641846, "eval_runtime": 8.6741, "eval_samples_per_second": 541.035, "eval_steps_per_second": 67.673, "step": 44500 }, { "epoch": 10.31, "learning_rate": 5.157002062800826e-06, "loss": 1.1988, "step": 45000 }, { "epoch": 10.31, "eval_loss": 1.1840194463729858, "eval_runtime": 8.6742, "eval_samples_per_second": 541.027, "eval_steps_per_second": 67.672, "step": 45000 }, { "epoch": 10.43, "learning_rate": 5.214302085720834e-06, "loss": 1.1992, "step": 45500 }, { "epoch": 10.43, "eval_loss": 1.1847577095031738, "eval_runtime": 8.6731, "eval_samples_per_second": 541.1, "eval_steps_per_second": 67.681, "step": 45500 }, { "epoch": 10.54, "learning_rate": 5.271602108640844e-06, "loss": 1.1993, "step": 46000 }, { "epoch": 10.54, "eval_loss": 1.1817129850387573, "eval_runtime": 8.675, "eval_samples_per_second": 540.978, "eval_steps_per_second": 67.665, "step": 46000 }, { "epoch": 10.66, "learning_rate": 5.328902131560853e-06, "loss": 1.2008, "step": 46500 }, { "epoch": 10.66, "eval_loss": 1.1942256689071655, "eval_runtime": 8.6737, "eval_samples_per_second": 541.063, "eval_steps_per_second": 67.676, "step": 46500 }, { "epoch": 10.77, "learning_rate": 5.386202154480862e-06, "loss": 1.1953, "step": 47000 }, { "epoch": 10.77, "eval_loss": 1.1913926601409912, "eval_runtime": 8.6773, "eval_samples_per_second": 540.838, "eval_steps_per_second": 67.648, "step": 47000 }, { "epoch": 10.89, "learning_rate": 5.443502177400871e-06, "loss": 1.1991, "step": 47500 }, { "epoch": 10.89, "eval_loss": 1.1916037797927856, "eval_runtime": 8.6736, "eval_samples_per_second": 541.068, "eval_steps_per_second": 67.677, "step": 47500 }, { "epoch": 11.0, "learning_rate": 5.50080220032088e-06, "loss": 1.2103, "step": 48000 }, { "epoch": 11.0, "eval_loss": 1.1806532144546509, "eval_runtime": 8.6729, "eval_samples_per_second": 541.111, "eval_steps_per_second": 67.682, "step": 48000 }, { "epoch": 11.12, "learning_rate": 5.55810222324089e-06, "loss": 1.1925, "step": 48500 }, { "epoch": 11.12, "eval_loss": 1.1956487894058228, "eval_runtime": 8.6711, "eval_samples_per_second": 541.222, "eval_steps_per_second": 67.696, "step": 48500 }, { "epoch": 11.23, "learning_rate": 5.6154022461608985e-06, "loss": 1.1855, "step": 49000 }, { "epoch": 11.23, "eval_loss": 1.19271719455719, "eval_runtime": 8.6757, "eval_samples_per_second": 540.939, "eval_steps_per_second": 67.661, "step": 49000 }, { "epoch": 11.35, "learning_rate": 5.672702269080908e-06, "loss": 1.1941, "step": 49500 }, { "epoch": 11.35, "eval_loss": 1.1855933666229248, "eval_runtime": 8.6696, "eval_samples_per_second": 541.314, "eval_steps_per_second": 67.707, "step": 49500 }, { "epoch": 11.46, "learning_rate": 5.730002292000917e-06, "loss": 1.1987, "step": 50000 }, { "epoch": 11.46, "eval_loss": 1.1963047981262207, "eval_runtime": 8.7411, "eval_samples_per_second": 536.888, "eval_steps_per_second": 67.154, "step": 50000 }, { "epoch": 11.57, "learning_rate": 5.787302314920926e-06, "loss": 1.1967, "step": 50500 }, { "epoch": 11.57, "eval_loss": 1.1938581466674805, "eval_runtime": 8.7349, "eval_samples_per_second": 537.27, "eval_steps_per_second": 67.202, "step": 50500 }, { "epoch": 11.69, "learning_rate": 5.844602337840935e-06, "loss": 1.195, "step": 51000 }, { "epoch": 11.69, "eval_loss": 1.190481185913086, "eval_runtime": 8.7406, "eval_samples_per_second": 536.92, "eval_steps_per_second": 67.158, "step": 51000 }, { "epoch": 11.8, "learning_rate": 5.901902360760945e-06, "loss": 1.1881, "step": 51500 }, { "epoch": 11.8, "eval_loss": 1.2013212442398071, "eval_runtime": 8.7448, "eval_samples_per_second": 536.664, "eval_steps_per_second": 67.126, "step": 51500 }, { "epoch": 11.92, "learning_rate": 5.959202383680954e-06, "loss": 1.1911, "step": 52000 }, { "epoch": 11.92, "eval_loss": 1.2169877290725708, "eval_runtime": 8.7432, "eval_samples_per_second": 536.76, "eval_steps_per_second": 67.138, "step": 52000 }, { "epoch": 12.03, "learning_rate": 6.016502406600963e-06, "loss": 1.1869, "step": 52500 }, { "epoch": 12.03, "eval_loss": 1.1965845823287964, "eval_runtime": 8.737, "eval_samples_per_second": 537.141, "eval_steps_per_second": 67.186, "step": 52500 }, { "epoch": 12.15, "learning_rate": 6.073802429520972e-06, "loss": 1.1884, "step": 53000 }, { "epoch": 12.15, "eval_loss": 1.1910216808319092, "eval_runtime": 8.7454, "eval_samples_per_second": 536.623, "eval_steps_per_second": 67.121, "step": 53000 }, { "epoch": 12.26, "learning_rate": 6.131102452440982e-06, "loss": 1.1862, "step": 53500 }, { "epoch": 12.26, "eval_loss": 1.203157663345337, "eval_runtime": 8.7394, "eval_samples_per_second": 536.991, "eval_steps_per_second": 67.167, "step": 53500 }, { "epoch": 12.38, "learning_rate": 6.18840247536099e-06, "loss": 1.1784, "step": 54000 }, { "epoch": 12.38, "eval_loss": 1.2015249729156494, "eval_runtime": 8.7469, "eval_samples_per_second": 536.531, "eval_steps_per_second": 67.109, "step": 54000 }, { "epoch": 12.49, "learning_rate": 6.2457024982809995e-06, "loss": 1.1803, "step": 54500 }, { "epoch": 12.49, "eval_loss": 1.1970680952072144, "eval_runtime": 8.7428, "eval_samples_per_second": 536.785, "eval_steps_per_second": 67.141, "step": 54500 }, { "epoch": 12.61, "learning_rate": 6.303002521201008e-06, "loss": 1.1863, "step": 55000 }, { "epoch": 12.61, "eval_loss": 1.1986669301986694, "eval_runtime": 8.7478, "eval_samples_per_second": 536.476, "eval_steps_per_second": 67.102, "step": 55000 }, { "epoch": 12.72, "learning_rate": 6.360302544121018e-06, "loss": 1.1873, "step": 55500 }, { "epoch": 12.72, "eval_loss": 1.199426531791687, "eval_runtime": 8.7479, "eval_samples_per_second": 536.472, "eval_steps_per_second": 67.102, "step": 55500 }, { "epoch": 12.84, "learning_rate": 6.417602567041027e-06, "loss": 1.1823, "step": 56000 }, { "epoch": 12.84, "eval_loss": 1.1793687343597412, "eval_runtime": 8.6974, "eval_samples_per_second": 539.588, "eval_steps_per_second": 67.492, "step": 56000 }, { "epoch": 12.95, "learning_rate": 6.474902589961036e-06, "loss": 1.1809, "step": 56500 }, { "epoch": 12.95, "eval_loss": 1.1934062242507935, "eval_runtime": 8.6966, "eval_samples_per_second": 539.637, "eval_steps_per_second": 67.498, "step": 56500 }, { "epoch": 13.06, "learning_rate": 6.532202612881046e-06, "loss": 1.1794, "step": 57000 }, { "epoch": 13.06, "eval_loss": 1.197577714920044, "eval_runtime": 8.6997, "eval_samples_per_second": 539.442, "eval_steps_per_second": 67.473, "step": 57000 }, { "epoch": 13.18, "learning_rate": 6.589502635801055e-06, "loss": 1.1773, "step": 57500 }, { "epoch": 13.18, "eval_loss": 1.189896821975708, "eval_runtime": 8.6957, "eval_samples_per_second": 539.689, "eval_steps_per_second": 67.504, "step": 57500 }, { "epoch": 13.29, "learning_rate": 6.6468026587210645e-06, "loss": 1.1774, "step": 58000 }, { "epoch": 13.29, "eval_loss": 1.1816009283065796, "eval_runtime": 8.6946, "eval_samples_per_second": 539.761, "eval_steps_per_second": 67.513, "step": 58000 }, { "epoch": 13.41, "learning_rate": 6.7041026816410724e-06, "loss": 1.1802, "step": 58500 }, { "epoch": 13.41, "eval_loss": 1.189544677734375, "eval_runtime": 8.6918, "eval_samples_per_second": 539.935, "eval_steps_per_second": 67.535, "step": 58500 }, { "epoch": 13.52, "learning_rate": 6.761402704561082e-06, "loss": 1.1799, "step": 59000 }, { "epoch": 13.52, "eval_loss": 1.1884071826934814, "eval_runtime": 8.6949, "eval_samples_per_second": 539.742, "eval_steps_per_second": 67.511, "step": 59000 }, { "epoch": 13.64, "learning_rate": 6.818702727481091e-06, "loss": 1.172, "step": 59500 }, { "epoch": 13.64, "eval_loss": 1.2016750574111938, "eval_runtime": 8.6922, "eval_samples_per_second": 539.907, "eval_steps_per_second": 67.531, "step": 59500 }, { "epoch": 13.75, "learning_rate": 6.8760027504011005e-06, "loss": 1.174, "step": 60000 }, { "epoch": 13.75, "eval_loss": 1.185314416885376, "eval_runtime": 8.6943, "eval_samples_per_second": 539.778, "eval_steps_per_second": 67.515, "step": 60000 }, { "epoch": 13.87, "learning_rate": 6.93330277332111e-06, "loss": 1.1725, "step": 60500 }, { "epoch": 13.87, "eval_loss": 1.2013959884643555, "eval_runtime": 8.6953, "eval_samples_per_second": 539.716, "eval_steps_per_second": 67.508, "step": 60500 }, { "epoch": 13.98, "learning_rate": 6.990602796241119e-06, "loss": 1.1785, "step": 61000 }, { "epoch": 13.98, "eval_loss": 1.196581244468689, "eval_runtime": 8.6934, "eval_samples_per_second": 539.837, "eval_steps_per_second": 67.523, "step": 61000 }, { "epoch": 14.1, "learning_rate": 7.047902819161129e-06, "loss": 1.1706, "step": 61500 }, { "epoch": 14.1, "eval_loss": 1.1895380020141602, "eval_runtime": 8.6953, "eval_samples_per_second": 539.718, "eval_steps_per_second": 67.508, "step": 61500 }, { "epoch": 14.21, "learning_rate": 7.105202842081138e-06, "loss": 1.1774, "step": 62000 }, { "epoch": 14.21, "eval_loss": 1.1933034658432007, "eval_runtime": 8.694, "eval_samples_per_second": 539.798, "eval_steps_per_second": 67.518, "step": 62000 }, { "epoch": 14.32, "learning_rate": 7.162502865001146e-06, "loss": 1.17, "step": 62500 }, { "epoch": 14.32, "eval_loss": 1.189096450805664, "eval_runtime": 8.6898, "eval_samples_per_second": 540.061, "eval_steps_per_second": 67.551, "step": 62500 }, { "epoch": 14.44, "learning_rate": 7.219802887921155e-06, "loss": 1.1704, "step": 63000 }, { "epoch": 14.44, "eval_loss": 1.2022786140441895, "eval_runtime": 8.7104, "eval_samples_per_second": 538.78, "eval_steps_per_second": 67.391, "step": 63000 }, { "epoch": 14.55, "learning_rate": 7.277102910841165e-06, "loss": 1.1715, "step": 63500 }, { "epoch": 14.55, "eval_loss": 1.1846983432769775, "eval_runtime": 8.6985, "eval_samples_per_second": 539.518, "eval_steps_per_second": 67.483, "step": 63500 }, { "epoch": 14.67, "learning_rate": 7.334402933761174e-06, "loss": 1.1744, "step": 64000 }, { "epoch": 14.67, "eval_loss": 1.190294623374939, "eval_runtime": 8.6988, "eval_samples_per_second": 539.499, "eval_steps_per_second": 67.48, "step": 64000 }, { "epoch": 14.78, "learning_rate": 7.391702956681183e-06, "loss": 1.17, "step": 64500 }, { "epoch": 14.78, "eval_loss": 1.2001311779022217, "eval_runtime": 8.6953, "eval_samples_per_second": 539.715, "eval_steps_per_second": 67.508, "step": 64500 }, { "epoch": 14.9, "learning_rate": 7.449002979601193e-06, "loss": 1.1724, "step": 65000 }, { "epoch": 14.9, "eval_loss": 1.2139863967895508, "eval_runtime": 8.6954, "eval_samples_per_second": 539.709, "eval_steps_per_second": 67.507, "step": 65000 }, { "epoch": 15.01, "learning_rate": 7.506303002521202e-06, "loss": 1.1699, "step": 65500 }, { "epoch": 15.01, "eval_loss": 1.1941304206848145, "eval_runtime": 8.6928, "eval_samples_per_second": 539.871, "eval_steps_per_second": 67.527, "step": 65500 }, { "epoch": 15.13, "learning_rate": 7.56360302544121e-06, "loss": 1.1617, "step": 66000 }, { "epoch": 15.13, "eval_loss": 1.1910510063171387, "eval_runtime": 8.7015, "eval_samples_per_second": 539.33, "eval_steps_per_second": 67.459, "step": 66000 }, { "epoch": 15.24, "learning_rate": 7.620903048361219e-06, "loss": 1.16, "step": 66500 }, { "epoch": 15.24, "eval_loss": 1.2018942832946777, "eval_runtime": 8.6945, "eval_samples_per_second": 539.764, "eval_steps_per_second": 67.514, "step": 66500 }, { "epoch": 15.36, "learning_rate": 7.678203071281228e-06, "loss": 1.1668, "step": 67000 }, { "epoch": 15.36, "eval_loss": 1.205061674118042, "eval_runtime": 8.6981, "eval_samples_per_second": 539.543, "eval_steps_per_second": 67.486, "step": 67000 }, { "epoch": 15.47, "learning_rate": 7.735503094201238e-06, "loss": 1.1647, "step": 67500 }, { "epoch": 15.47, "eval_loss": 1.1958627700805664, "eval_runtime": 8.691, "eval_samples_per_second": 539.981, "eval_steps_per_second": 67.541, "step": 67500 }, { "epoch": 15.59, "learning_rate": 7.792803117121247e-06, "loss": 1.165, "step": 68000 }, { "epoch": 15.59, "eval_loss": 1.184870958328247, "eval_runtime": 8.6979, "eval_samples_per_second": 539.556, "eval_steps_per_second": 67.488, "step": 68000 }, { "epoch": 15.7, "learning_rate": 7.850103140041256e-06, "loss": 1.1586, "step": 68500 }, { "epoch": 15.7, "eval_loss": 1.2009097337722778, "eval_runtime": 8.6911, "eval_samples_per_second": 539.977, "eval_steps_per_second": 67.54, "step": 68500 }, { "epoch": 15.81, "learning_rate": 7.907403162961266e-06, "loss": 1.1642, "step": 69000 }, { "epoch": 15.81, "eval_loss": 1.2036429643630981, "eval_runtime": 8.695, "eval_samples_per_second": 539.736, "eval_steps_per_second": 67.51, "step": 69000 }, { "epoch": 15.93, "learning_rate": 7.964703185881275e-06, "loss": 1.1593, "step": 69500 }, { "epoch": 15.93, "eval_loss": 1.1913105249404907, "eval_runtime": 8.696, "eval_samples_per_second": 539.676, "eval_steps_per_second": 67.503, "step": 69500 }, { "epoch": 16.04, "learning_rate": 8.022003208801284e-06, "loss": 1.1569, "step": 70000 }, { "epoch": 16.04, "eval_loss": 1.194607138633728, "eval_runtime": 8.6992, "eval_samples_per_second": 539.476, "eval_steps_per_second": 67.478, "step": 70000 }, { "epoch": 16.16, "learning_rate": 8.079303231721293e-06, "loss": 1.1601, "step": 70500 }, { "epoch": 16.16, "eval_loss": 1.1960129737854004, "eval_runtime": 8.6958, "eval_samples_per_second": 539.685, "eval_steps_per_second": 67.504, "step": 70500 }, { "epoch": 16.27, "learning_rate": 8.136603254641302e-06, "loss": 1.1534, "step": 71000 }, { "epoch": 16.27, "eval_loss": 1.1922234296798706, "eval_runtime": 8.7013, "eval_samples_per_second": 539.344, "eval_steps_per_second": 67.461, "step": 71000 }, { "epoch": 16.39, "learning_rate": 8.193903277561312e-06, "loss": 1.1652, "step": 71500 }, { "epoch": 16.39, "eval_loss": 1.2058032751083374, "eval_runtime": 8.6891, "eval_samples_per_second": 540.103, "eval_steps_per_second": 67.556, "step": 71500 }, { "epoch": 16.5, "learning_rate": 8.251203300481321e-06, "loss": 1.163, "step": 72000 }, { "epoch": 16.5, "eval_loss": 1.2068945169448853, "eval_runtime": 8.6894, "eval_samples_per_second": 540.086, "eval_steps_per_second": 67.554, "step": 72000 }, { "epoch": 16.62, "learning_rate": 8.30850332340133e-06, "loss": 1.1527, "step": 72500 }, { "epoch": 16.62, "eval_loss": 1.209852695465088, "eval_runtime": 8.6929, "eval_samples_per_second": 539.865, "eval_steps_per_second": 67.526, "step": 72500 }, { "epoch": 16.73, "learning_rate": 8.36580334632134e-06, "loss": 1.1587, "step": 73000 }, { "epoch": 16.73, "eval_loss": 1.205710768699646, "eval_runtime": 8.7029, "eval_samples_per_second": 539.243, "eval_steps_per_second": 67.448, "step": 73000 }, { "epoch": 16.85, "learning_rate": 8.423103369241347e-06, "loss": 1.1622, "step": 73500 }, { "epoch": 16.85, "eval_loss": 1.1973174810409546, "eval_runtime": 8.6911, "eval_samples_per_second": 539.979, "eval_steps_per_second": 67.541, "step": 73500 }, { "epoch": 16.96, "learning_rate": 8.480403392161356e-06, "loss": 1.1628, "step": 74000 }, { "epoch": 16.96, "eval_loss": 1.2086446285247803, "eval_runtime": 8.6973, "eval_samples_per_second": 539.592, "eval_steps_per_second": 67.492, "step": 74000 }, { "epoch": 17.08, "learning_rate": 8.537703415081367e-06, "loss": 1.1558, "step": 74500 }, { "epoch": 17.08, "eval_loss": 1.203248381614685, "eval_runtime": 8.7059, "eval_samples_per_second": 539.061, "eval_steps_per_second": 67.426, "step": 74500 }, { "epoch": 17.19, "learning_rate": 8.595003438001375e-06, "loss": 1.1505, "step": 75000 }, { "epoch": 17.19, "eval_loss": 1.19260573387146, "eval_runtime": 8.6972, "eval_samples_per_second": 539.598, "eval_steps_per_second": 67.493, "step": 75000 }, { "epoch": 17.3, "learning_rate": 8.652303460921384e-06, "loss": 1.1509, "step": 75500 }, { "epoch": 17.3, "eval_loss": 1.1916782855987549, "eval_runtime": 8.6912, "eval_samples_per_second": 539.972, "eval_steps_per_second": 67.54, "step": 75500 }, { "epoch": 17.42, "learning_rate": 8.709603483841395e-06, "loss": 1.1537, "step": 76000 }, { "epoch": 17.42, "eval_loss": 1.1850289106369019, "eval_runtime": 8.691, "eval_samples_per_second": 539.987, "eval_steps_per_second": 67.541, "step": 76000 }, { "epoch": 17.53, "learning_rate": 8.766903506761404e-06, "loss": 1.15, "step": 76500 }, { "epoch": 17.53, "eval_loss": 1.2179312705993652, "eval_runtime": 8.6881, "eval_samples_per_second": 540.166, "eval_steps_per_second": 67.564, "step": 76500 }, { "epoch": 17.65, "learning_rate": 8.824203529681412e-06, "loss": 1.1474, "step": 77000 }, { "epoch": 17.65, "eval_loss": 1.2169504165649414, "eval_runtime": 8.6926, "eval_samples_per_second": 539.886, "eval_steps_per_second": 67.529, "step": 77000 }, { "epoch": 17.76, "learning_rate": 8.881503552601421e-06, "loss": 1.1598, "step": 77500 }, { "epoch": 17.76, "eval_loss": 1.2057197093963623, "eval_runtime": 8.6835, "eval_samples_per_second": 540.45, "eval_steps_per_second": 67.599, "step": 77500 }, { "epoch": 17.88, "learning_rate": 8.93880357552143e-06, "loss": 1.1468, "step": 78000 }, { "epoch": 17.88, "eval_loss": 1.1853656768798828, "eval_runtime": 8.6993, "eval_samples_per_second": 539.468, "eval_steps_per_second": 67.477, "step": 78000 }, { "epoch": 17.99, "learning_rate": 8.99610359844144e-06, "loss": 1.1499, "step": 78500 }, { "epoch": 17.99, "eval_loss": 1.2021933794021606, "eval_runtime": 8.6902, "eval_samples_per_second": 540.033, "eval_steps_per_second": 67.547, "step": 78500 }, { "epoch": 18.11, "learning_rate": 9.05340362136145e-06, "loss": 1.1433, "step": 79000 }, { "epoch": 18.11, "eval_loss": 1.1927874088287354, "eval_runtime": 8.6982, "eval_samples_per_second": 539.535, "eval_steps_per_second": 67.485, "step": 79000 }, { "epoch": 18.22, "learning_rate": 9.110703644281458e-06, "loss": 1.1568, "step": 79500 }, { "epoch": 18.22, "eval_loss": 1.205413579940796, "eval_runtime": 8.6969, "eval_samples_per_second": 539.62, "eval_steps_per_second": 67.496, "step": 79500 }, { "epoch": 18.34, "learning_rate": 9.168003667201468e-06, "loss": 1.1448, "step": 80000 }, { "epoch": 18.34, "eval_loss": 1.211775302886963, "eval_runtime": 8.7475, "eval_samples_per_second": 536.498, "eval_steps_per_second": 67.105, "step": 80000 }, { "epoch": 18.45, "learning_rate": 9.225303690121477e-06, "loss": 1.1552, "step": 80500 }, { "epoch": 18.45, "eval_loss": 1.208743691444397, "eval_runtime": 8.6543, "eval_samples_per_second": 542.274, "eval_steps_per_second": 67.828, "step": 80500 }, { "epoch": 18.57, "learning_rate": 9.282603713041484e-06, "loss": 1.154, "step": 81000 }, { "epoch": 18.57, "eval_loss": 1.214882493019104, "eval_runtime": 8.6627, "eval_samples_per_second": 541.747, "eval_steps_per_second": 67.762, "step": 81000 }, { "epoch": 18.68, "learning_rate": 9.339903735961495e-06, "loss": 1.1417, "step": 81500 }, { "epoch": 18.68, "eval_loss": 1.216241478919983, "eval_runtime": 8.6563, "eval_samples_per_second": 542.147, "eval_steps_per_second": 67.812, "step": 81500 }, { "epoch": 18.79, "learning_rate": 9.397203758881504e-06, "loss": 1.1459, "step": 82000 }, { "epoch": 18.79, "eval_loss": 1.2169584035873413, "eval_runtime": 8.6637, "eval_samples_per_second": 541.683, "eval_steps_per_second": 67.754, "step": 82000 }, { "epoch": 18.91, "learning_rate": 9.454503781801512e-06, "loss": 1.1513, "step": 82500 }, { "epoch": 18.91, "eval_loss": 1.2085603475570679, "eval_runtime": 8.6609, "eval_samples_per_second": 541.859, "eval_steps_per_second": 67.776, "step": 82500 }, { "epoch": 19.02, "learning_rate": 9.511803804721523e-06, "loss": 1.1498, "step": 83000 }, { "epoch": 19.02, "eval_loss": 1.196622371673584, "eval_runtime": 8.6633, "eval_samples_per_second": 541.712, "eval_steps_per_second": 67.757, "step": 83000 }, { "epoch": 19.14, "learning_rate": 9.569103827641532e-06, "loss": 1.1402, "step": 83500 }, { "epoch": 19.14, "eval_loss": 1.2100378274917603, "eval_runtime": 8.6588, "eval_samples_per_second": 541.992, "eval_steps_per_second": 67.792, "step": 83500 }, { "epoch": 19.25, "learning_rate": 9.62640385056154e-06, "loss": 1.1408, "step": 84000 }, { "epoch": 19.25, "eval_loss": 1.2080512046813965, "eval_runtime": 8.661, "eval_samples_per_second": 541.856, "eval_steps_per_second": 67.775, "step": 84000 }, { "epoch": 19.37, "learning_rate": 9.683703873481551e-06, "loss": 1.137, "step": 84500 }, { "epoch": 19.37, "eval_loss": 1.2046854496002197, "eval_runtime": 8.6546, "eval_samples_per_second": 542.252, "eval_steps_per_second": 67.825, "step": 84500 }, { "epoch": 19.48, "learning_rate": 9.741003896401558e-06, "loss": 1.1452, "step": 85000 }, { "epoch": 19.48, "eval_loss": 1.196044921875, "eval_runtime": 8.6622, "eval_samples_per_second": 541.781, "eval_steps_per_second": 67.766, "step": 85000 }, { "epoch": 19.6, "learning_rate": 9.798303919321569e-06, "loss": 1.1451, "step": 85500 }, { "epoch": 19.6, "eval_loss": 1.2086985111236572, "eval_runtime": 8.6584, "eval_samples_per_second": 542.018, "eval_steps_per_second": 67.796, "step": 85500 }, { "epoch": 19.71, "learning_rate": 9.855603942241577e-06, "loss": 1.1406, "step": 86000 }, { "epoch": 19.71, "eval_loss": 1.1989599466323853, "eval_runtime": 8.6608, "eval_samples_per_second": 541.869, "eval_steps_per_second": 67.777, "step": 86000 }, { "epoch": 19.83, "learning_rate": 9.912903965161586e-06, "loss": 1.145, "step": 86500 }, { "epoch": 19.83, "eval_loss": 1.1933842897415161, "eval_runtime": 8.6545, "eval_samples_per_second": 542.264, "eval_steps_per_second": 67.826, "step": 86500 }, { "epoch": 19.94, "learning_rate": 9.970203988081597e-06, "loss": 1.1418, "step": 87000 }, { "epoch": 19.94, "eval_loss": 1.1995594501495361, "eval_runtime": 8.6636, "eval_samples_per_second": 541.692, "eval_steps_per_second": 67.755, "step": 87000 }, { "epoch": 20.06, "learning_rate": 1.0027504011001606e-05, "loss": 1.1434, "step": 87500 }, { "epoch": 20.06, "eval_loss": 1.2122530937194824, "eval_runtime": 8.6517, "eval_samples_per_second": 542.439, "eval_steps_per_second": 67.848, "step": 87500 }, { "epoch": 20.17, "learning_rate": 1.0084804033921614e-05, "loss": 1.1375, "step": 88000 }, { "epoch": 20.17, "eval_loss": 1.2170482873916626, "eval_runtime": 8.661, "eval_samples_per_second": 541.852, "eval_steps_per_second": 67.775, "step": 88000 }, { "epoch": 20.28, "learning_rate": 1.0142104056841623e-05, "loss": 1.1377, "step": 88500 }, { "epoch": 20.28, "eval_loss": 1.2023061513900757, "eval_runtime": 8.6595, "eval_samples_per_second": 541.945, "eval_steps_per_second": 67.786, "step": 88500 }, { "epoch": 20.4, "learning_rate": 1.0199404079761632e-05, "loss": 1.1439, "step": 89000 }, { "epoch": 20.4, "eval_loss": 1.2127084732055664, "eval_runtime": 8.6625, "eval_samples_per_second": 541.759, "eval_steps_per_second": 67.763, "step": 89000 }, { "epoch": 20.51, "learning_rate": 1.025670410268164e-05, "loss": 1.1409, "step": 89500 }, { "epoch": 20.51, "eval_loss": 1.207607388496399, "eval_runtime": 8.6604, "eval_samples_per_second": 541.894, "eval_steps_per_second": 67.78, "step": 89500 }, { "epoch": 20.63, "learning_rate": 1.0314004125601651e-05, "loss": 1.1359, "step": 90000 }, { "epoch": 20.63, "eval_loss": 1.1995285749435425, "eval_runtime": 8.6608, "eval_samples_per_second": 541.865, "eval_steps_per_second": 67.776, "step": 90000 }, { "epoch": 20.74, "learning_rate": 1.037130414852166e-05, "loss": 1.1433, "step": 90500 }, { "epoch": 20.74, "eval_loss": 1.199556589126587, "eval_runtime": 8.6533, "eval_samples_per_second": 542.339, "eval_steps_per_second": 67.836, "step": 90500 }, { "epoch": 20.86, "learning_rate": 1.0428604171441669e-05, "loss": 1.1418, "step": 91000 }, { "epoch": 20.86, "eval_loss": 1.20955491065979, "eval_runtime": 8.6633, "eval_samples_per_second": 541.71, "eval_steps_per_second": 67.757, "step": 91000 }, { "epoch": 20.97, "learning_rate": 1.048590419436168e-05, "loss": 1.135, "step": 91500 }, { "epoch": 20.97, "eval_loss": 1.2079386711120605, "eval_runtime": 8.6597, "eval_samples_per_second": 541.937, "eval_steps_per_second": 67.785, "step": 91500 }, { "epoch": 21.09, "learning_rate": 1.0543204217281688e-05, "loss": 1.1296, "step": 92000 }, { "epoch": 21.09, "eval_loss": 1.2019649744033813, "eval_runtime": 8.6546, "eval_samples_per_second": 542.254, "eval_steps_per_second": 67.825, "step": 92000 }, { "epoch": 21.2, "learning_rate": 1.0600504240201695e-05, "loss": 1.1272, "step": 92500 }, { "epoch": 21.2, "eval_loss": 1.2092097997665405, "eval_runtime": 8.7354, "eval_samples_per_second": 537.24, "eval_steps_per_second": 67.198, "step": 92500 }, { "epoch": 21.32, "learning_rate": 1.0657804263121706e-05, "loss": 1.1324, "step": 93000 }, { "epoch": 21.32, "eval_loss": 1.2048821449279785, "eval_runtime": 8.7672, "eval_samples_per_second": 535.291, "eval_steps_per_second": 66.954, "step": 93000 }, { "epoch": 21.43, "learning_rate": 1.0715104286041714e-05, "loss": 1.1401, "step": 93500 }, { "epoch": 21.43, "eval_loss": 1.2121554613113403, "eval_runtime": 8.7347, "eval_samples_per_second": 537.281, "eval_steps_per_second": 67.203, "step": 93500 }, { "epoch": 21.54, "learning_rate": 1.0772404308961723e-05, "loss": 1.1308, "step": 94000 }, { "epoch": 21.54, "eval_loss": 1.2078057527542114, "eval_runtime": 8.7398, "eval_samples_per_second": 536.972, "eval_steps_per_second": 67.164, "step": 94000 }, { "epoch": 21.66, "learning_rate": 1.0829704331881734e-05, "loss": 1.1266, "step": 94500 }, { "epoch": 21.66, "eval_loss": 1.1985101699829102, "eval_runtime": 8.7341, "eval_samples_per_second": 537.32, "eval_steps_per_second": 67.208, "step": 94500 }, { "epoch": 21.77, "learning_rate": 1.0887004354801743e-05, "loss": 1.1321, "step": 95000 }, { "epoch": 21.77, "eval_loss": 1.1965982913970947, "eval_runtime": 8.7403, "eval_samples_per_second": 536.938, "eval_steps_per_second": 67.16, "step": 95000 }, { "epoch": 21.89, "learning_rate": 1.0944304377721751e-05, "loss": 1.1364, "step": 95500 }, { "epoch": 21.89, "eval_loss": 1.195939302444458, "eval_runtime": 8.7337, "eval_samples_per_second": 537.346, "eval_steps_per_second": 67.211, "step": 95500 }, { "epoch": 22.0, "learning_rate": 1.100160440064176e-05, "loss": 1.1389, "step": 96000 }, { "epoch": 22.0, "eval_loss": 1.2259595394134521, "eval_runtime": 8.7427, "eval_samples_per_second": 536.791, "eval_steps_per_second": 67.142, "step": 96000 }, { "epoch": 22.12, "learning_rate": 1.1058904423561769e-05, "loss": 1.1277, "step": 96500 }, { "epoch": 22.12, "eval_loss": 1.2042038440704346, "eval_runtime": 8.7443, "eval_samples_per_second": 536.69, "eval_steps_per_second": 67.129, "step": 96500 }, { "epoch": 22.23, "learning_rate": 1.111620444648178e-05, "loss": 1.1318, "step": 97000 }, { "epoch": 22.23, "eval_loss": 1.205905556678772, "eval_runtime": 8.7372, "eval_samples_per_second": 537.13, "eval_steps_per_second": 67.184, "step": 97000 }, { "epoch": 22.35, "learning_rate": 1.1173504469401788e-05, "loss": 1.1216, "step": 97500 }, { "epoch": 22.35, "eval_loss": 1.2146854400634766, "eval_runtime": 8.7354, "eval_samples_per_second": 537.24, "eval_steps_per_second": 67.198, "step": 97500 }, { "epoch": 22.46, "learning_rate": 1.1230804492321797e-05, "loss": 1.1302, "step": 98000 }, { "epoch": 22.46, "eval_loss": 1.2047946453094482, "eval_runtime": 8.7419, "eval_samples_per_second": 536.837, "eval_steps_per_second": 67.148, "step": 98000 }, { "epoch": 22.58, "learning_rate": 1.1288104515241808e-05, "loss": 1.1227, "step": 98500 }, { "epoch": 22.58, "eval_loss": 1.2033843994140625, "eval_runtime": 8.736, "eval_samples_per_second": 537.205, "eval_steps_per_second": 67.194, "step": 98500 }, { "epoch": 22.69, "learning_rate": 1.1345404538161816e-05, "loss": 1.1323, "step": 99000 }, { "epoch": 22.69, "eval_loss": 1.202348232269287, "eval_runtime": 8.7387, "eval_samples_per_second": 537.038, "eval_steps_per_second": 67.173, "step": 99000 }, { "epoch": 22.81, "learning_rate": 1.1402704561081825e-05, "loss": 1.1299, "step": 99500 }, { "epoch": 22.81, "eval_loss": 1.2057666778564453, "eval_runtime": 8.749, "eval_samples_per_second": 536.405, "eval_steps_per_second": 67.093, "step": 99500 }, { "epoch": 22.92, "learning_rate": 1.1460004584001834e-05, "loss": 1.1275, "step": 100000 }, { "epoch": 22.92, "eval_loss": 1.221665382385254, "eval_runtime": 8.7496, "eval_samples_per_second": 536.367, "eval_steps_per_second": 67.089, "step": 100000 }, { "epoch": 23.03, "learning_rate": 1.1517304606921843e-05, "loss": 1.1224, "step": 100500 }, { "epoch": 23.03, "eval_loss": 1.20888090133667, "eval_runtime": 8.7363, "eval_samples_per_second": 537.184, "eval_steps_per_second": 67.191, "step": 100500 }, { "epoch": 23.15, "learning_rate": 1.1574604629841851e-05, "loss": 1.1273, "step": 101000 }, { "epoch": 23.15, "eval_loss": 1.2236766815185547, "eval_runtime": 8.7474, "eval_samples_per_second": 536.502, "eval_steps_per_second": 67.106, "step": 101000 }, { "epoch": 23.26, "learning_rate": 1.1631904652761862e-05, "loss": 1.1178, "step": 101500 }, { "epoch": 23.26, "eval_loss": 1.2149614095687866, "eval_runtime": 8.7391, "eval_samples_per_second": 537.013, "eval_steps_per_second": 67.17, "step": 101500 }, { "epoch": 23.38, "learning_rate": 1.168920467568187e-05, "loss": 1.1169, "step": 102000 }, { "epoch": 23.38, "eval_loss": 1.2083278894424438, "eval_runtime": 8.7435, "eval_samples_per_second": 536.744, "eval_steps_per_second": 67.136, "step": 102000 }, { "epoch": 23.49, "learning_rate": 1.174650469860188e-05, "loss": 1.1248, "step": 102500 }, { "epoch": 23.49, "eval_loss": 1.2195032835006714, "eval_runtime": 8.7371, "eval_samples_per_second": 537.136, "eval_steps_per_second": 67.185, "step": 102500 }, { "epoch": 23.61, "learning_rate": 1.180380472152189e-05, "loss": 1.133, "step": 103000 }, { "epoch": 23.61, "eval_loss": 1.1978524923324585, "eval_runtime": 8.7471, "eval_samples_per_second": 536.519, "eval_steps_per_second": 67.108, "step": 103000 }, { "epoch": 23.72, "learning_rate": 1.1861104744441897e-05, "loss": 1.1282, "step": 103500 }, { "epoch": 23.72, "eval_loss": 1.2170518636703491, "eval_runtime": 8.7383, "eval_samples_per_second": 537.06, "eval_steps_per_second": 67.175, "step": 103500 }, { "epoch": 23.84, "learning_rate": 1.1918404767361908e-05, "loss": 1.1239, "step": 104000 }, { "epoch": 23.84, "eval_loss": 1.207922101020813, "eval_runtime": 8.7436, "eval_samples_per_second": 536.738, "eval_steps_per_second": 67.135, "step": 104000 }, { "epoch": 23.95, "learning_rate": 1.1975704790281916e-05, "loss": 1.1252, "step": 104500 }, { "epoch": 23.95, "eval_loss": 1.2199273109436035, "eval_runtime": 8.7511, "eval_samples_per_second": 536.276, "eval_steps_per_second": 67.077, "step": 104500 }, { "epoch": 24.07, "learning_rate": 1.2033004813201925e-05, "loss": 1.1285, "step": 105000 }, { "epoch": 24.07, "eval_loss": 1.2087541818618774, "eval_runtime": 8.7421, "eval_samples_per_second": 536.831, "eval_steps_per_second": 67.147, "step": 105000 }, { "epoch": 24.18, "learning_rate": 1.2090304836121936e-05, "loss": 1.1176, "step": 105500 }, { "epoch": 24.18, "eval_loss": 1.2136132717132568, "eval_runtime": 8.7659, "eval_samples_per_second": 535.369, "eval_steps_per_second": 66.964, "step": 105500 }, { "epoch": 24.3, "learning_rate": 1.2147604859041945e-05, "loss": 1.1245, "step": 106000 }, { "epoch": 24.3, "eval_loss": 1.215354323387146, "eval_runtime": 8.7425, "eval_samples_per_second": 536.801, "eval_steps_per_second": 67.143, "step": 106000 }, { "epoch": 24.41, "learning_rate": 1.2204904881961953e-05, "loss": 1.1214, "step": 106500 }, { "epoch": 24.41, "eval_loss": 1.2037323713302612, "eval_runtime": 8.739, "eval_samples_per_second": 537.017, "eval_steps_per_second": 67.17, "step": 106500 }, { "epoch": 24.52, "learning_rate": 1.2262204904881964e-05, "loss": 1.1189, "step": 107000 }, { "epoch": 24.52, "eval_loss": 1.214617371559143, "eval_runtime": 8.7482, "eval_samples_per_second": 536.452, "eval_steps_per_second": 67.099, "step": 107000 }, { "epoch": 24.64, "learning_rate": 1.2319504927801971e-05, "loss": 1.1196, "step": 107500 }, { "epoch": 24.64, "eval_loss": 1.2239211797714233, "eval_runtime": 8.7542, "eval_samples_per_second": 536.084, "eval_steps_per_second": 67.053, "step": 107500 }, { "epoch": 24.75, "learning_rate": 1.237680495072198e-05, "loss": 1.1194, "step": 108000 }, { "epoch": 24.75, "eval_loss": 1.2130053043365479, "eval_runtime": 8.7443, "eval_samples_per_second": 536.69, "eval_steps_per_second": 67.129, "step": 108000 }, { "epoch": 24.87, "learning_rate": 1.243410497364199e-05, "loss": 1.1206, "step": 108500 }, { "epoch": 24.87, "eval_loss": 1.2183654308319092, "eval_runtime": 8.7409, "eval_samples_per_second": 536.899, "eval_steps_per_second": 67.155, "step": 108500 }, { "epoch": 24.98, "learning_rate": 1.2491404996561999e-05, "loss": 1.1269, "step": 109000 }, { "epoch": 24.98, "eval_loss": 1.2311912775039673, "eval_runtime": 8.7441, "eval_samples_per_second": 536.705, "eval_steps_per_second": 67.131, "step": 109000 }, { "epoch": 25.1, "learning_rate": 1.2548705019482008e-05, "loss": 1.1205, "step": 109500 }, { "epoch": 25.1, "eval_loss": 1.2345490455627441, "eval_runtime": 8.7405, "eval_samples_per_second": 536.929, "eval_steps_per_second": 67.159, "step": 109500 }, { "epoch": 25.21, "learning_rate": 1.2606005042402017e-05, "loss": 1.1168, "step": 110000 }, { "epoch": 25.21, "eval_loss": 1.218522310256958, "eval_runtime": 8.7666, "eval_samples_per_second": 535.326, "eval_steps_per_second": 66.958, "step": 110000 }, { "epoch": 25.33, "learning_rate": 1.2663305065322025e-05, "loss": 1.1203, "step": 110500 }, { "epoch": 25.33, "eval_loss": 1.2130851745605469, "eval_runtime": 8.7456, "eval_samples_per_second": 536.611, "eval_steps_per_second": 67.119, "step": 110500 }, { "epoch": 25.44, "learning_rate": 1.2720605088242036e-05, "loss": 1.1136, "step": 111000 }, { "epoch": 25.44, "eval_loss": 1.2243788242340088, "eval_runtime": 8.7405, "eval_samples_per_second": 536.926, "eval_steps_per_second": 67.159, "step": 111000 }, { "epoch": 25.56, "learning_rate": 1.2777905111162045e-05, "loss": 1.121, "step": 111500 }, { "epoch": 25.56, "eval_loss": 1.2063595056533813, "eval_runtime": 8.7928, "eval_samples_per_second": 533.732, "eval_steps_per_second": 66.759, "step": 111500 }, { "epoch": 25.67, "learning_rate": 1.2835205134082054e-05, "loss": 1.121, "step": 112000 }, { "epoch": 25.67, "eval_loss": 1.2303956747055054, "eval_runtime": 8.6617, "eval_samples_per_second": 541.809, "eval_steps_per_second": 67.769, "step": 112000 }, { "epoch": 25.78, "learning_rate": 1.2892505157002064e-05, "loss": 1.1117, "step": 112500 }, { "epoch": 25.78, "eval_loss": 1.207320213317871, "eval_runtime": 8.6618, "eval_samples_per_second": 541.806, "eval_steps_per_second": 67.769, "step": 112500 }, { "epoch": 25.9, "learning_rate": 1.2949805179922073e-05, "loss": 1.1211, "step": 113000 }, { "epoch": 25.9, "eval_loss": 1.2101763486862183, "eval_runtime": 8.6657, "eval_samples_per_second": 541.56, "eval_steps_per_second": 67.738, "step": 113000 }, { "epoch": 26.01, "learning_rate": 1.3007105202842082e-05, "loss": 1.1085, "step": 113500 }, { "epoch": 26.01, "eval_loss": 1.2277805805206299, "eval_runtime": 8.659, "eval_samples_per_second": 541.982, "eval_steps_per_second": 67.791, "step": 113500 }, { "epoch": 26.13, "learning_rate": 1.3064405225762092e-05, "loss": 1.1169, "step": 114000 }, { "epoch": 26.13, "eval_loss": 1.215835452079773, "eval_runtime": 8.6603, "eval_samples_per_second": 541.898, "eval_steps_per_second": 67.781, "step": 114000 }, { "epoch": 26.24, "learning_rate": 1.3121705248682101e-05, "loss": 1.1105, "step": 114500 }, { "epoch": 26.24, "eval_loss": 1.2315971851348877, "eval_runtime": 8.6524, "eval_samples_per_second": 542.394, "eval_steps_per_second": 67.843, "step": 114500 }, { "epoch": 26.36, "learning_rate": 1.317900527160211e-05, "loss": 1.1114, "step": 115000 }, { "epoch": 26.36, "eval_loss": 1.225192666053772, "eval_runtime": 8.6587, "eval_samples_per_second": 541.998, "eval_steps_per_second": 67.793, "step": 115000 }, { "epoch": 26.47, "learning_rate": 1.323630529452212e-05, "loss": 1.1135, "step": 115500 }, { "epoch": 26.47, "eval_loss": 1.2351411581039429, "eval_runtime": 8.6587, "eval_samples_per_second": 541.996, "eval_steps_per_second": 67.793, "step": 115500 }, { "epoch": 26.59, "learning_rate": 1.3293605317442129e-05, "loss": 1.1103, "step": 116000 }, { "epoch": 26.59, "eval_loss": 1.2183340787887573, "eval_runtime": 8.661, "eval_samples_per_second": 541.852, "eval_steps_per_second": 67.775, "step": 116000 }, { "epoch": 26.7, "learning_rate": 1.3350905340362138e-05, "loss": 1.1141, "step": 116500 }, { "epoch": 26.7, "eval_loss": 1.2201545238494873, "eval_runtime": 8.6612, "eval_samples_per_second": 541.839, "eval_steps_per_second": 67.773, "step": 116500 }, { "epoch": 26.82, "learning_rate": 1.3408205363282145e-05, "loss": 1.1123, "step": 117000 }, { "epoch": 26.82, "eval_loss": 1.2225557565689087, "eval_runtime": 8.6615, "eval_samples_per_second": 541.822, "eval_steps_per_second": 67.771, "step": 117000 }, { "epoch": 26.93, "learning_rate": 1.3465505386202154e-05, "loss": 1.1047, "step": 117500 }, { "epoch": 26.93, "eval_loss": 1.2162261009216309, "eval_runtime": 8.6569, "eval_samples_per_second": 542.111, "eval_steps_per_second": 67.807, "step": 117500 }, { "epoch": 27.05, "learning_rate": 1.3522805409122164e-05, "loss": 1.1036, "step": 118000 }, { "epoch": 27.05, "eval_loss": 1.2316484451293945, "eval_runtime": 8.6602, "eval_samples_per_second": 541.904, "eval_steps_per_second": 67.781, "step": 118000 }, { "epoch": 27.16, "learning_rate": 1.3580105432042173e-05, "loss": 1.107, "step": 118500 }, { "epoch": 27.16, "eval_loss": 1.2190759181976318, "eval_runtime": 8.6598, "eval_samples_per_second": 541.926, "eval_steps_per_second": 67.784, "step": 118500 }, { "epoch": 27.27, "learning_rate": 1.3637405454962182e-05, "loss": 1.1087, "step": 119000 }, { "epoch": 27.27, "eval_loss": 1.2248270511627197, "eval_runtime": 8.6558, "eval_samples_per_second": 542.177, "eval_steps_per_second": 67.815, "step": 119000 }, { "epoch": 27.39, "learning_rate": 1.3694705477882192e-05, "loss": 1.1057, "step": 119500 }, { "epoch": 27.39, "eval_loss": 1.2065876722335815, "eval_runtime": 8.6606, "eval_samples_per_second": 541.877, "eval_steps_per_second": 67.778, "step": 119500 }, { "epoch": 27.5, "learning_rate": 1.3752005500802201e-05, "loss": 1.1106, "step": 120000 }, { "epoch": 27.5, "eval_loss": 1.234053373336792, "eval_runtime": 8.6646, "eval_samples_per_second": 541.632, "eval_steps_per_second": 67.747, "step": 120000 }, { "epoch": 27.62, "learning_rate": 1.380930552372221e-05, "loss": 1.1099, "step": 120500 }, { "epoch": 27.62, "eval_loss": 1.218479871749878, "eval_runtime": 8.6636, "eval_samples_per_second": 541.69, "eval_steps_per_second": 67.755, "step": 120500 }, { "epoch": 27.73, "learning_rate": 1.386660554664222e-05, "loss": 1.1109, "step": 121000 }, { "epoch": 27.73, "eval_loss": 1.2268691062927246, "eval_runtime": 8.6653, "eval_samples_per_second": 541.586, "eval_steps_per_second": 67.742, "step": 121000 }, { "epoch": 27.85, "learning_rate": 1.3923905569562229e-05, "loss": 1.1112, "step": 121500 }, { "epoch": 27.85, "eval_loss": 1.2292996644973755, "eval_runtime": 8.6566, "eval_samples_per_second": 542.131, "eval_steps_per_second": 67.81, "step": 121500 }, { "epoch": 27.96, "learning_rate": 1.3981205592482238e-05, "loss": 1.0978, "step": 122000 }, { "epoch": 27.96, "eval_loss": 1.2230393886566162, "eval_runtime": 8.6621, "eval_samples_per_second": 541.788, "eval_steps_per_second": 67.767, "step": 122000 }, { "epoch": 28.08, "learning_rate": 1.4038505615402248e-05, "loss": 1.1013, "step": 122500 }, { "epoch": 28.08, "eval_loss": 1.2095947265625, "eval_runtime": 8.6609, "eval_samples_per_second": 541.857, "eval_steps_per_second": 67.775, "step": 122500 }, { "epoch": 28.19, "learning_rate": 1.4095805638322257e-05, "loss": 1.1049, "step": 123000 }, { "epoch": 28.19, "eval_loss": 1.22176992893219, "eval_runtime": 8.6695, "eval_samples_per_second": 541.32, "eval_steps_per_second": 67.708, "step": 123000 }, { "epoch": 28.31, "learning_rate": 1.4153105661242266e-05, "loss": 1.1052, "step": 123500 }, { "epoch": 28.31, "eval_loss": 1.2156498432159424, "eval_runtime": 8.6604, "eval_samples_per_second": 541.891, "eval_steps_per_second": 67.78, "step": 123500 }, { "epoch": 28.42, "learning_rate": 1.4210405684162276e-05, "loss": 1.1038, "step": 124000 }, { "epoch": 28.42, "eval_loss": 1.225441336631775, "eval_runtime": 8.665, "eval_samples_per_second": 541.607, "eval_steps_per_second": 67.744, "step": 124000 }, { "epoch": 28.54, "learning_rate": 1.4267705707082282e-05, "loss": 1.1073, "step": 124500 }, { "epoch": 28.54, "eval_loss": 1.2338190078735352, "eval_runtime": 8.6625, "eval_samples_per_second": 541.762, "eval_steps_per_second": 67.763, "step": 124500 }, { "epoch": 28.65, "learning_rate": 1.4325005730002292e-05, "loss": 1.1089, "step": 125000 }, { "epoch": 28.65, "eval_loss": 1.221835732460022, "eval_runtime": 8.6659, "eval_samples_per_second": 541.55, "eval_steps_per_second": 67.737, "step": 125000 }, { "epoch": 28.76, "learning_rate": 1.4382305752922301e-05, "loss": 1.104, "step": 125500 }, { "epoch": 28.76, "eval_loss": 1.226941704750061, "eval_runtime": 8.666, "eval_samples_per_second": 541.542, "eval_steps_per_second": 67.736, "step": 125500 }, { "epoch": 28.88, "learning_rate": 1.443960577584231e-05, "loss": 1.1033, "step": 126000 }, { "epoch": 28.88, "eval_loss": 1.227961540222168, "eval_runtime": 8.6598, "eval_samples_per_second": 541.93, "eval_steps_per_second": 67.785, "step": 126000 }, { "epoch": 28.99, "learning_rate": 1.449690579876232e-05, "loss": 1.1014, "step": 126500 }, { "epoch": 28.99, "eval_loss": 1.234354853630066, "eval_runtime": 8.6624, "eval_samples_per_second": 541.764, "eval_steps_per_second": 67.764, "step": 126500 }, { "epoch": 29.11, "learning_rate": 1.455420582168233e-05, "loss": 1.0939, "step": 127000 }, { "epoch": 29.11, "eval_loss": 1.2329198122024536, "eval_runtime": 8.6622, "eval_samples_per_second": 541.78, "eval_steps_per_second": 67.766, "step": 127000 }, { "epoch": 29.22, "learning_rate": 1.4611505844602338e-05, "loss": 1.1024, "step": 127500 }, { "epoch": 29.22, "eval_loss": 1.2237578630447388, "eval_runtime": 8.6618, "eval_samples_per_second": 541.806, "eval_steps_per_second": 67.769, "step": 127500 }, { "epoch": 29.34, "learning_rate": 1.4668805867522349e-05, "loss": 1.097, "step": 128000 }, { "epoch": 29.34, "eval_loss": 1.2304518222808838, "eval_runtime": 8.6659, "eval_samples_per_second": 541.548, "eval_steps_per_second": 67.737, "step": 128000 }, { "epoch": 29.45, "learning_rate": 1.4726105890442357e-05, "loss": 1.1, "step": 128500 }, { "epoch": 29.45, "eval_loss": 1.232290506362915, "eval_runtime": 8.661, "eval_samples_per_second": 541.856, "eval_steps_per_second": 67.775, "step": 128500 }, { "epoch": 29.57, "learning_rate": 1.4783405913362366e-05, "loss": 1.1043, "step": 129000 }, { "epoch": 29.57, "eval_loss": 1.2481147050857544, "eval_runtime": 8.6626, "eval_samples_per_second": 541.753, "eval_steps_per_second": 67.762, "step": 129000 }, { "epoch": 29.68, "learning_rate": 1.4840705936282377e-05, "loss": 1.1012, "step": 129500 }, { "epoch": 29.68, "eval_loss": 1.2305197715759277, "eval_runtime": 8.6608, "eval_samples_per_second": 541.868, "eval_steps_per_second": 67.777, "step": 129500 }, { "epoch": 29.8, "learning_rate": 1.4898005959202385e-05, "loss": 1.1041, "step": 130000 }, { "epoch": 29.8, "eval_loss": 1.2325879335403442, "eval_runtime": 8.6579, "eval_samples_per_second": 542.05, "eval_steps_per_second": 67.8, "step": 130000 }, { "epoch": 29.91, "learning_rate": 1.4955305982122394e-05, "loss": 1.1069, "step": 130500 }, { "epoch": 29.91, "eval_loss": 1.2293685674667358, "eval_runtime": 8.6607, "eval_samples_per_second": 541.871, "eval_steps_per_second": 67.777, "step": 130500 }, { "epoch": 30.03, "learning_rate": 1.5012606005042405e-05, "loss": 1.1052, "step": 131000 }, { "epoch": 30.03, "eval_loss": 1.2376192808151245, "eval_runtime": 8.6621, "eval_samples_per_second": 541.787, "eval_steps_per_second": 67.767, "step": 131000 }, { "epoch": 30.14, "learning_rate": 1.5069906027962414e-05, "loss": 1.1011, "step": 131500 }, { "epoch": 30.14, "eval_loss": 1.2356747388839722, "eval_runtime": 8.6602, "eval_samples_per_second": 541.904, "eval_steps_per_second": 67.781, "step": 131500 }, { "epoch": 30.25, "learning_rate": 1.512720605088242e-05, "loss": 1.0969, "step": 132000 }, { "epoch": 30.25, "eval_loss": 1.2289435863494873, "eval_runtime": 8.6635, "eval_samples_per_second": 541.7, "eval_steps_per_second": 67.756, "step": 132000 }, { "epoch": 30.37, "learning_rate": 1.518450607380243e-05, "loss": 1.0885, "step": 132500 }, { "epoch": 30.37, "eval_loss": 1.2294517755508423, "eval_runtime": 8.6602, "eval_samples_per_second": 541.906, "eval_steps_per_second": 67.782, "step": 132500 }, { "epoch": 30.48, "learning_rate": 1.5241806096722438e-05, "loss": 1.0972, "step": 133000 }, { "epoch": 30.48, "eval_loss": 1.2401878833770752, "eval_runtime": 8.6641, "eval_samples_per_second": 541.658, "eval_steps_per_second": 67.751, "step": 133000 }, { "epoch": 30.6, "learning_rate": 1.529910611964245e-05, "loss": 1.091, "step": 133500 }, { "epoch": 30.6, "eval_loss": 1.226170301437378, "eval_runtime": 8.6607, "eval_samples_per_second": 541.876, "eval_steps_per_second": 67.778, "step": 133500 }, { "epoch": 30.71, "learning_rate": 1.5356406142562456e-05, "loss": 1.0983, "step": 134000 }, { "epoch": 30.71, "eval_loss": 1.2254958152770996, "eval_runtime": 8.6636, "eval_samples_per_second": 541.695, "eval_steps_per_second": 67.755, "step": 134000 }, { "epoch": 30.83, "learning_rate": 1.5413706165482466e-05, "loss": 1.0985, "step": 134500 }, { "epoch": 30.83, "eval_loss": 1.2254739999771118, "eval_runtime": 8.6593, "eval_samples_per_second": 541.962, "eval_steps_per_second": 67.789, "step": 134500 }, { "epoch": 30.94, "learning_rate": 1.5471006188402477e-05, "loss": 1.1016, "step": 135000 }, { "epoch": 30.94, "eval_loss": 1.242270827293396, "eval_runtime": 8.6659, "eval_samples_per_second": 541.546, "eval_steps_per_second": 67.737, "step": 135000 }, { "epoch": 31.06, "learning_rate": 1.5528306211322484e-05, "loss": 1.094, "step": 135500 }, { "epoch": 31.06, "eval_loss": 1.2418590784072876, "eval_runtime": 8.7488, "eval_samples_per_second": 536.418, "eval_steps_per_second": 67.095, "step": 135500 }, { "epoch": 31.17, "learning_rate": 1.5585606234242494e-05, "loss": 1.0931, "step": 136000 }, { "epoch": 31.17, "eval_loss": 1.2423757314682007, "eval_runtime": 8.6903, "eval_samples_per_second": 540.028, "eval_steps_per_second": 67.547, "step": 136000 }, { "epoch": 31.29, "learning_rate": 1.5642906257162505e-05, "loss": 1.0928, "step": 136500 }, { "epoch": 31.29, "eval_loss": 1.2476611137390137, "eval_runtime": 8.6991, "eval_samples_per_second": 539.479, "eval_steps_per_second": 67.478, "step": 136500 }, { "epoch": 31.4, "learning_rate": 1.5700206280082512e-05, "loss": 1.0976, "step": 137000 }, { "epoch": 31.4, "eval_loss": 1.2399616241455078, "eval_runtime": 8.7013, "eval_samples_per_second": 539.345, "eval_steps_per_second": 67.461, "step": 137000 }, { "epoch": 31.51, "learning_rate": 1.5757506303002522e-05, "loss": 1.1001, "step": 137500 }, { "epoch": 31.51, "eval_loss": 1.2302875518798828, "eval_runtime": 8.6985, "eval_samples_per_second": 539.518, "eval_steps_per_second": 67.483, "step": 137500 }, { "epoch": 31.63, "learning_rate": 1.5814806325922533e-05, "loss": 1.0913, "step": 138000 }, { "epoch": 31.63, "eval_loss": 1.2429412603378296, "eval_runtime": 8.7019, "eval_samples_per_second": 539.307, "eval_steps_per_second": 67.457, "step": 138000 }, { "epoch": 31.74, "learning_rate": 1.587210634884254e-05, "loss": 1.1, "step": 138500 }, { "epoch": 31.74, "eval_loss": 1.2249164581298828, "eval_runtime": 8.7017, "eval_samples_per_second": 539.323, "eval_steps_per_second": 67.458, "step": 138500 }, { "epoch": 31.86, "learning_rate": 1.592940637176255e-05, "loss": 1.1014, "step": 139000 }, { "epoch": 31.86, "eval_loss": 1.2509437799453735, "eval_runtime": 8.6985, "eval_samples_per_second": 539.515, "eval_steps_per_second": 67.483, "step": 139000 }, { "epoch": 31.97, "learning_rate": 1.5986706394682558e-05, "loss": 1.0912, "step": 139500 }, { "epoch": 31.97, "eval_loss": 1.243829369544983, "eval_runtime": 8.6982, "eval_samples_per_second": 539.538, "eval_steps_per_second": 67.485, "step": 139500 }, { "epoch": 32.09, "learning_rate": 1.6044006417602568e-05, "loss": 1.0853, "step": 140000 }, { "epoch": 32.09, "eval_loss": 1.2348285913467407, "eval_runtime": 8.7022, "eval_samples_per_second": 539.291, "eval_steps_per_second": 67.454, "step": 140000 }, { "epoch": 32.2, "learning_rate": 1.6101306440522575e-05, "loss": 1.0889, "step": 140500 }, { "epoch": 32.2, "eval_loss": 1.242098093032837, "eval_runtime": 8.6942, "eval_samples_per_second": 539.788, "eval_steps_per_second": 67.517, "step": 140500 }, { "epoch": 32.32, "learning_rate": 1.6158606463442586e-05, "loss": 1.0853, "step": 141000 }, { "epoch": 32.32, "eval_loss": 1.2536767721176147, "eval_runtime": 8.6963, "eval_samples_per_second": 539.653, "eval_steps_per_second": 67.5, "step": 141000 }, { "epoch": 32.43, "learning_rate": 1.6215906486362596e-05, "loss": 1.0945, "step": 141500 }, { "epoch": 32.43, "eval_loss": 1.241873025894165, "eval_runtime": 8.6973, "eval_samples_per_second": 539.594, "eval_steps_per_second": 67.492, "step": 141500 }, { "epoch": 32.55, "learning_rate": 1.6273206509282603e-05, "loss": 1.0931, "step": 142000 }, { "epoch": 32.55, "eval_loss": 1.242942214012146, "eval_runtime": 8.7047, "eval_samples_per_second": 539.132, "eval_steps_per_second": 67.435, "step": 142000 }, { "epoch": 32.66, "learning_rate": 1.6330506532202614e-05, "loss": 1.0839, "step": 142500 }, { "epoch": 32.66, "eval_loss": 1.2356377840042114, "eval_runtime": 8.6962, "eval_samples_per_second": 539.658, "eval_steps_per_second": 67.5, "step": 142500 }, { "epoch": 32.78, "learning_rate": 1.6387806555122624e-05, "loss": 1.0959, "step": 143000 }, { "epoch": 32.78, "eval_loss": 1.2494839429855347, "eval_runtime": 8.6981, "eval_samples_per_second": 539.544, "eval_steps_per_second": 67.486, "step": 143000 }, { "epoch": 32.89, "learning_rate": 1.644510657804263e-05, "loss": 1.0925, "step": 143500 }, { "epoch": 32.89, "eval_loss": 1.2540109157562256, "eval_runtime": 8.7048, "eval_samples_per_second": 539.126, "eval_steps_per_second": 67.434, "step": 143500 }, { "epoch": 33.0, "learning_rate": 1.6502406600962642e-05, "loss": 1.0968, "step": 144000 }, { "epoch": 33.0, "eval_loss": 1.240728497505188, "eval_runtime": 8.6983, "eval_samples_per_second": 539.529, "eval_steps_per_second": 67.484, "step": 144000 }, { "epoch": 33.12, "learning_rate": 1.6559706623882652e-05, "loss": 1.0809, "step": 144500 }, { "epoch": 33.12, "eval_loss": 1.2413946390151978, "eval_runtime": 8.6972, "eval_samples_per_second": 539.601, "eval_steps_per_second": 67.493, "step": 144500 }, { "epoch": 33.23, "learning_rate": 1.661700664680266e-05, "loss": 1.0875, "step": 145000 }, { "epoch": 33.23, "eval_loss": 1.2324668169021606, "eval_runtime": 8.7046, "eval_samples_per_second": 539.138, "eval_steps_per_second": 67.435, "step": 145000 }, { "epoch": 33.35, "learning_rate": 1.667430666972267e-05, "loss": 1.0869, "step": 145500 }, { "epoch": 33.35, "eval_loss": 1.2302987575531006, "eval_runtime": 8.6973, "eval_samples_per_second": 539.59, "eval_steps_per_second": 67.492, "step": 145500 }, { "epoch": 33.46, "learning_rate": 1.673160669264268e-05, "loss": 1.0846, "step": 146000 }, { "epoch": 33.46, "eval_loss": 1.2497087717056274, "eval_runtime": 8.7036, "eval_samples_per_second": 539.202, "eval_steps_per_second": 67.443, "step": 146000 }, { "epoch": 33.58, "learning_rate": 1.6788906715562688e-05, "loss": 1.0904, "step": 146500 }, { "epoch": 33.58, "eval_loss": 1.2268368005752563, "eval_runtime": 8.703, "eval_samples_per_second": 539.237, "eval_steps_per_second": 67.448, "step": 146500 }, { "epoch": 33.69, "learning_rate": 1.6846206738482695e-05, "loss": 1.0867, "step": 147000 }, { "epoch": 33.69, "eval_loss": 1.2364413738250732, "eval_runtime": 8.7063, "eval_samples_per_second": 539.035, "eval_steps_per_second": 67.422, "step": 147000 }, { "epoch": 33.81, "learning_rate": 1.6903506761402705e-05, "loss": 1.0882, "step": 147500 }, { "epoch": 33.81, "eval_loss": 1.2470132112503052, "eval_runtime": 8.7037, "eval_samples_per_second": 539.198, "eval_steps_per_second": 67.443, "step": 147500 }, { "epoch": 33.92, "learning_rate": 1.6960806784322712e-05, "loss": 1.096, "step": 148000 }, { "epoch": 33.92, "eval_loss": 1.2550737857818604, "eval_runtime": 8.7031, "eval_samples_per_second": 539.232, "eval_steps_per_second": 67.447, "step": 148000 }, { "epoch": 34.04, "learning_rate": 1.7018106807242723e-05, "loss": 1.0859, "step": 148500 }, { "epoch": 34.04, "eval_loss": 1.2413285970687866, "eval_runtime": 8.6979, "eval_samples_per_second": 539.557, "eval_steps_per_second": 67.488, "step": 148500 }, { "epoch": 34.15, "learning_rate": 1.7075406830162733e-05, "loss": 1.084, "step": 149000 }, { "epoch": 34.15, "eval_loss": 1.240527868270874, "eval_runtime": 8.7095, "eval_samples_per_second": 538.835, "eval_steps_per_second": 67.397, "step": 149000 }, { "epoch": 34.27, "learning_rate": 1.713270685308274e-05, "loss": 1.0819, "step": 149500 }, { "epoch": 34.27, "eval_loss": 1.240763545036316, "eval_runtime": 8.7028, "eval_samples_per_second": 539.25, "eval_steps_per_second": 67.449, "step": 149500 }, { "epoch": 34.38, "learning_rate": 1.719000687600275e-05, "loss": 1.0795, "step": 150000 }, { "epoch": 34.38, "eval_loss": 1.247377634048462, "eval_runtime": 8.7087, "eval_samples_per_second": 538.887, "eval_steps_per_second": 67.404, "step": 150000 }, { "epoch": 34.49, "learning_rate": 1.724730689892276e-05, "loss": 1.0862, "step": 150500 }, { "epoch": 34.49, "eval_loss": 1.2520489692687988, "eval_runtime": 8.7081, "eval_samples_per_second": 538.923, "eval_steps_per_second": 67.408, "step": 150500 }, { "epoch": 34.61, "learning_rate": 1.730460692184277e-05, "loss": 1.0881, "step": 151000 }, { "epoch": 34.61, "eval_loss": 1.2421196699142456, "eval_runtime": 8.7073, "eval_samples_per_second": 538.972, "eval_steps_per_second": 67.415, "step": 151000 }, { "epoch": 34.72, "learning_rate": 1.736190694476278e-05, "loss": 1.0929, "step": 151500 }, { "epoch": 34.72, "eval_loss": 1.2428258657455444, "eval_runtime": 8.7129, "eval_samples_per_second": 538.627, "eval_steps_per_second": 67.371, "step": 151500 }, { "epoch": 34.84, "learning_rate": 1.741920696768279e-05, "loss": 1.0814, "step": 152000 }, { "epoch": 34.84, "eval_loss": 1.2403675317764282, "eval_runtime": 8.7116, "eval_samples_per_second": 538.708, "eval_steps_per_second": 67.382, "step": 152000 }, { "epoch": 34.95, "learning_rate": 1.7476506990602797e-05, "loss": 1.0782, "step": 152500 }, { "epoch": 34.95, "eval_loss": 1.2512247562408447, "eval_runtime": 8.7069, "eval_samples_per_second": 538.998, "eval_steps_per_second": 67.418, "step": 152500 }, { "epoch": 35.07, "learning_rate": 1.7533807013522807e-05, "loss": 1.0833, "step": 153000 }, { "epoch": 35.07, "eval_loss": 1.2374869585037231, "eval_runtime": 8.7119, "eval_samples_per_second": 538.687, "eval_steps_per_second": 67.379, "step": 153000 }, { "epoch": 35.18, "learning_rate": 1.7591107036442818e-05, "loss": 1.0775, "step": 153500 }, { "epoch": 35.18, "eval_loss": 1.2496999502182007, "eval_runtime": 8.709, "eval_samples_per_second": 538.866, "eval_steps_per_second": 67.401, "step": 153500 }, { "epoch": 35.3, "learning_rate": 1.7648407059362825e-05, "loss": 1.077, "step": 154000 }, { "epoch": 35.3, "eval_loss": 1.2628381252288818, "eval_runtime": 8.7142, "eval_samples_per_second": 538.549, "eval_steps_per_second": 67.362, "step": 154000 }, { "epoch": 35.41, "learning_rate": 1.7705707082282832e-05, "loss": 1.0845, "step": 154500 }, { "epoch": 35.41, "eval_loss": 1.240352749824524, "eval_runtime": 8.7142, "eval_samples_per_second": 538.548, "eval_steps_per_second": 67.361, "step": 154500 }, { "epoch": 35.53, "learning_rate": 1.7763007105202842e-05, "loss": 1.0827, "step": 155000 }, { "epoch": 35.53, "eval_loss": 1.2425720691680908, "eval_runtime": 8.724, "eval_samples_per_second": 537.941, "eval_steps_per_second": 67.286, "step": 155000 }, { "epoch": 35.64, "learning_rate": 1.7820307128122853e-05, "loss": 1.0747, "step": 155500 }, { "epoch": 35.64, "eval_loss": 1.249588966369629, "eval_runtime": 8.7079, "eval_samples_per_second": 538.938, "eval_steps_per_second": 67.41, "step": 155500 }, { "epoch": 35.76, "learning_rate": 1.787760715104286e-05, "loss": 1.0893, "step": 156000 }, { "epoch": 35.76, "eval_loss": 1.24796462059021, "eval_runtime": 8.7146, "eval_samples_per_second": 538.524, "eval_steps_per_second": 67.359, "step": 156000 }, { "epoch": 35.87, "learning_rate": 1.793490717396287e-05, "loss": 1.0869, "step": 156500 }, { "epoch": 35.87, "eval_loss": 1.2518898248672485, "eval_runtime": 8.7122, "eval_samples_per_second": 538.67, "eval_steps_per_second": 67.377, "step": 156500 }, { "epoch": 35.98, "learning_rate": 1.799220719688288e-05, "loss": 1.0858, "step": 157000 }, { "epoch": 35.98, "eval_loss": 1.2398487329483032, "eval_runtime": 8.7112, "eval_samples_per_second": 538.73, "eval_steps_per_second": 67.384, "step": 157000 }, { "epoch": 36.1, "learning_rate": 1.8049507219802888e-05, "loss": 1.0798, "step": 157500 }, { "epoch": 36.1, "eval_loss": 1.2411547899246216, "eval_runtime": 8.7101, "eval_samples_per_second": 538.801, "eval_steps_per_second": 67.393, "step": 157500 }, { "epoch": 36.21, "learning_rate": 1.81068072427229e-05, "loss": 1.0728, "step": 158000 }, { "epoch": 36.21, "eval_loss": 1.2510422468185425, "eval_runtime": 8.7138, "eval_samples_per_second": 538.569, "eval_steps_per_second": 67.364, "step": 158000 }, { "epoch": 36.33, "learning_rate": 1.816410726564291e-05, "loss": 1.0788, "step": 158500 }, { "epoch": 36.33, "eval_loss": 1.2572190761566162, "eval_runtime": 8.7073, "eval_samples_per_second": 538.972, "eval_steps_per_second": 67.415, "step": 158500 }, { "epoch": 36.44, "learning_rate": 1.8221407288562916e-05, "loss": 1.0758, "step": 159000 }, { "epoch": 36.44, "eval_loss": 1.2638431787490845, "eval_runtime": 8.7139, "eval_samples_per_second": 538.564, "eval_steps_per_second": 67.364, "step": 159000 }, { "epoch": 36.56, "learning_rate": 1.8278707311482926e-05, "loss": 1.0818, "step": 159500 }, { "epoch": 36.56, "eval_loss": 1.247061014175415, "eval_runtime": 8.7107, "eval_samples_per_second": 538.763, "eval_steps_per_second": 67.388, "step": 159500 }, { "epoch": 36.67, "learning_rate": 1.8336007334402937e-05, "loss": 1.0843, "step": 160000 }, { "epoch": 36.67, "eval_loss": 1.243645191192627, "eval_runtime": 8.7104, "eval_samples_per_second": 538.783, "eval_steps_per_second": 67.391, "step": 160000 }, { "epoch": 36.79, "learning_rate": 1.8393307357322944e-05, "loss": 1.0786, "step": 160500 }, { "epoch": 36.79, "eval_loss": 1.2376375198364258, "eval_runtime": 8.7092, "eval_samples_per_second": 538.854, "eval_steps_per_second": 67.4, "step": 160500 }, { "epoch": 36.9, "learning_rate": 1.8450607380242955e-05, "loss": 1.076, "step": 161000 }, { "epoch": 36.9, "eval_loss": 1.262695074081421, "eval_runtime": 8.7104, "eval_samples_per_second": 538.781, "eval_steps_per_second": 67.391, "step": 161000 }, { "epoch": 37.02, "learning_rate": 1.8507907403162965e-05, "loss": 1.0851, "step": 161500 }, { "epoch": 37.02, "eval_loss": 1.258217692375183, "eval_runtime": 8.7024, "eval_samples_per_second": 539.278, "eval_steps_per_second": 67.453, "step": 161500 }, { "epoch": 37.13, "learning_rate": 1.856520742608297e-05, "loss": 1.0763, "step": 162000 }, { "epoch": 37.13, "eval_loss": 1.2523659467697144, "eval_runtime": 8.7139, "eval_samples_per_second": 538.563, "eval_steps_per_second": 67.363, "step": 162000 }, { "epoch": 37.24, "learning_rate": 1.862250744900298e-05, "loss": 1.0725, "step": 162500 }, { "epoch": 37.24, "eval_loss": 1.2512284517288208, "eval_runtime": 8.7176, "eval_samples_per_second": 538.339, "eval_steps_per_second": 67.335, "step": 162500 }, { "epoch": 37.36, "learning_rate": 1.867980747192299e-05, "loss": 1.0721, "step": 163000 }, { "epoch": 37.36, "eval_loss": 1.2499217987060547, "eval_runtime": 8.725, "eval_samples_per_second": 537.883, "eval_steps_per_second": 67.278, "step": 163000 }, { "epoch": 37.47, "learning_rate": 1.8737107494842997e-05, "loss": 1.0815, "step": 163500 }, { "epoch": 37.47, "eval_loss": 1.26304030418396, "eval_runtime": 8.7069, "eval_samples_per_second": 539.001, "eval_steps_per_second": 67.418, "step": 163500 }, { "epoch": 37.59, "learning_rate": 1.8794407517763007e-05, "loss": 1.0788, "step": 164000 }, { "epoch": 37.59, "eval_loss": 1.2580420970916748, "eval_runtime": 8.7096, "eval_samples_per_second": 538.83, "eval_steps_per_second": 67.397, "step": 164000 }, { "epoch": 37.7, "learning_rate": 1.8851707540683018e-05, "loss": 1.0741, "step": 164500 }, { "epoch": 37.7, "eval_loss": 1.2331500053405762, "eval_runtime": 8.7096, "eval_samples_per_second": 538.83, "eval_steps_per_second": 67.397, "step": 164500 }, { "epoch": 37.82, "learning_rate": 1.8909007563603025e-05, "loss": 1.0886, "step": 165000 }, { "epoch": 37.82, "eval_loss": 1.2559674978256226, "eval_runtime": 8.7157, "eval_samples_per_second": 538.453, "eval_steps_per_second": 67.35, "step": 165000 }, { "epoch": 37.93, "learning_rate": 1.8966307586523035e-05, "loss": 1.0773, "step": 165500 }, { "epoch": 37.93, "eval_loss": 1.2508002519607544, "eval_runtime": 8.7015, "eval_samples_per_second": 539.334, "eval_steps_per_second": 67.46, "step": 165500 }, { "epoch": 38.05, "learning_rate": 1.9023607609443046e-05, "loss": 1.0704, "step": 166000 }, { "epoch": 38.05, "eval_loss": 1.2630932331085205, "eval_runtime": 8.7099, "eval_samples_per_second": 538.81, "eval_steps_per_second": 67.394, "step": 166000 }, { "epoch": 38.16, "learning_rate": 1.9080907632363053e-05, "loss": 1.0748, "step": 166500 }, { "epoch": 38.16, "eval_loss": 1.2723890542984009, "eval_runtime": 8.7131, "eval_samples_per_second": 538.614, "eval_steps_per_second": 67.37, "step": 166500 }, { "epoch": 38.28, "learning_rate": 1.9138207655283064e-05, "loss": 1.078, "step": 167000 }, { "epoch": 38.28, "eval_loss": 1.2508131265640259, "eval_runtime": 8.7059, "eval_samples_per_second": 539.061, "eval_steps_per_second": 67.426, "step": 167000 }, { "epoch": 38.39, "learning_rate": 1.9195507678203074e-05, "loss": 1.0728, "step": 167500 }, { "epoch": 38.39, "eval_loss": 1.2604044675827026, "eval_runtime": 8.7031, "eval_samples_per_second": 539.23, "eval_steps_per_second": 67.447, "step": 167500 }, { "epoch": 38.51, "learning_rate": 1.925280770112308e-05, "loss": 1.0772, "step": 168000 }, { "epoch": 38.51, "eval_loss": 1.2582615613937378, "eval_runtime": 8.7131, "eval_samples_per_second": 538.611, "eval_steps_per_second": 67.369, "step": 168000 }, { "epoch": 38.62, "learning_rate": 1.931010772404309e-05, "loss": 1.0714, "step": 168500 }, { "epoch": 38.62, "eval_loss": 1.2641552686691284, "eval_runtime": 8.7131, "eval_samples_per_second": 538.615, "eval_steps_per_second": 67.37, "step": 168500 }, { "epoch": 38.73, "learning_rate": 1.9367407746963102e-05, "loss": 1.0855, "step": 169000 }, { "epoch": 38.73, "eval_loss": 1.2495558261871338, "eval_runtime": 8.7098, "eval_samples_per_second": 538.82, "eval_steps_per_second": 67.396, "step": 169000 }, { "epoch": 38.85, "learning_rate": 1.942470776988311e-05, "loss": 1.0759, "step": 169500 }, { "epoch": 38.85, "eval_loss": 1.2550148963928223, "eval_runtime": 8.7058, "eval_samples_per_second": 539.069, "eval_steps_per_second": 67.427, "step": 169500 }, { "epoch": 38.96, "learning_rate": 1.9482007792803116e-05, "loss": 1.0735, "step": 170000 }, { "epoch": 38.96, "eval_loss": 1.2585957050323486, "eval_runtime": 8.7159, "eval_samples_per_second": 538.443, "eval_steps_per_second": 67.348, "step": 170000 }, { "epoch": 39.08, "learning_rate": 1.9539307815723127e-05, "loss": 1.078, "step": 170500 }, { "epoch": 39.08, "eval_loss": 1.275242805480957, "eval_runtime": 8.7107, "eval_samples_per_second": 538.765, "eval_steps_per_second": 67.389, "step": 170500 }, { "epoch": 39.19, "learning_rate": 1.9596607838643137e-05, "loss": 1.0684, "step": 171000 }, { "epoch": 39.19, "eval_loss": 1.2557766437530518, "eval_runtime": 8.7208, "eval_samples_per_second": 538.139, "eval_steps_per_second": 67.31, "step": 171000 }, { "epoch": 39.31, "learning_rate": 1.9653907861563144e-05, "loss": 1.0671, "step": 171500 }, { "epoch": 39.31, "eval_loss": 1.25728178024292, "eval_runtime": 8.7138, "eval_samples_per_second": 538.569, "eval_steps_per_second": 67.364, "step": 171500 }, { "epoch": 39.42, "learning_rate": 1.9711207884483155e-05, "loss": 1.0713, "step": 172000 }, { "epoch": 39.42, "eval_loss": 1.2489017248153687, "eval_runtime": 8.7133, "eval_samples_per_second": 538.602, "eval_steps_per_second": 67.368, "step": 172000 }, { "epoch": 39.54, "learning_rate": 1.9768507907403165e-05, "loss": 1.0774, "step": 172500 }, { "epoch": 39.54, "eval_loss": 1.2491424083709717, "eval_runtime": 8.7117, "eval_samples_per_second": 538.701, "eval_steps_per_second": 67.381, "step": 172500 }, { "epoch": 39.65, "learning_rate": 1.9825807930323172e-05, "loss": 1.0694, "step": 173000 }, { "epoch": 39.65, "eval_loss": 1.280080795288086, "eval_runtime": 8.7107, "eval_samples_per_second": 538.761, "eval_steps_per_second": 67.388, "step": 173000 }, { "epoch": 39.77, "learning_rate": 1.9883107953243183e-05, "loss": 1.0727, "step": 173500 }, { "epoch": 39.77, "eval_loss": 1.265429139137268, "eval_runtime": 8.7122, "eval_samples_per_second": 538.671, "eval_steps_per_second": 67.377, "step": 173500 }, { "epoch": 39.88, "learning_rate": 1.9940407976163193e-05, "loss": 1.0775, "step": 174000 }, { "epoch": 39.88, "eval_loss": 1.2636425495147705, "eval_runtime": 8.7081, "eval_samples_per_second": 538.926, "eval_steps_per_second": 67.409, "step": 174000 }, { "epoch": 40.0, "learning_rate": 1.99977079990832e-05, "loss": 1.0695, "step": 174500 }, { "epoch": 40.0, "eval_loss": 1.270471215248108, "eval_runtime": 8.7157, "eval_samples_per_second": 538.452, "eval_steps_per_second": 67.35, "step": 174500 }, { "epoch": 40.11, "learning_rate": 2.005500802200321e-05, "loss": 1.0671, "step": 175000 }, { "epoch": 40.11, "eval_loss": 1.2714405059814453, "eval_runtime": 8.7121, "eval_samples_per_second": 538.675, "eval_steps_per_second": 67.377, "step": 175000 }, { "epoch": 40.22, "learning_rate": 2.011230804492322e-05, "loss": 1.0654, "step": 175500 }, { "epoch": 40.22, "eval_loss": 1.2563685178756714, "eval_runtime": 8.7101, "eval_samples_per_second": 538.801, "eval_steps_per_second": 67.393, "step": 175500 }, { "epoch": 40.34, "learning_rate": 2.016960806784323e-05, "loss": 1.0658, "step": 176000 }, { "epoch": 40.34, "eval_loss": 1.2590737342834473, "eval_runtime": 8.7177, "eval_samples_per_second": 538.328, "eval_steps_per_second": 67.334, "step": 176000 }, { "epoch": 40.45, "learning_rate": 2.022690809076324e-05, "loss": 1.0688, "step": 176500 }, { "epoch": 40.45, "eval_loss": 1.2645343542099, "eval_runtime": 8.7076, "eval_samples_per_second": 538.956, "eval_steps_per_second": 67.413, "step": 176500 }, { "epoch": 40.57, "learning_rate": 2.0284208113683246e-05, "loss": 1.0699, "step": 177000 }, { "epoch": 40.57, "eval_loss": 1.2639456987380981, "eval_runtime": 8.713, "eval_samples_per_second": 538.619, "eval_steps_per_second": 67.37, "step": 177000 }, { "epoch": 40.68, "learning_rate": 2.0341508136603253e-05, "loss": 1.0731, "step": 177500 }, { "epoch": 40.68, "eval_loss": 1.2533913850784302, "eval_runtime": 8.7078, "eval_samples_per_second": 538.94, "eval_steps_per_second": 67.411, "step": 177500 }, { "epoch": 40.8, "learning_rate": 2.0398808159523264e-05, "loss": 1.068, "step": 178000 }, { "epoch": 40.8, "eval_loss": 1.2594603300094604, "eval_runtime": 8.7063, "eval_samples_per_second": 539.032, "eval_steps_per_second": 67.422, "step": 178000 }, { "epoch": 40.91, "learning_rate": 2.0456108182443274e-05, "loss": 1.0689, "step": 178500 }, { "epoch": 40.91, "eval_loss": 1.2630608081817627, "eval_runtime": 8.7056, "eval_samples_per_second": 539.078, "eval_steps_per_second": 67.428, "step": 178500 }, { "epoch": 41.03, "learning_rate": 2.051340820536328e-05, "loss": 1.0719, "step": 179000 }, { "epoch": 41.03, "eval_loss": 1.2835136651992798, "eval_runtime": 8.7208, "eval_samples_per_second": 538.139, "eval_steps_per_second": 67.31, "step": 179000 }, { "epoch": 41.14, "learning_rate": 2.0570708228283292e-05, "loss": 1.0568, "step": 179500 }, { "epoch": 41.14, "eval_loss": 1.2505382299423218, "eval_runtime": 8.7069, "eval_samples_per_second": 538.998, "eval_steps_per_second": 67.418, "step": 179500 }, { "epoch": 41.26, "learning_rate": 2.0628008251203302e-05, "loss": 1.0573, "step": 180000 }, { "epoch": 41.26, "eval_loss": 1.2611666917800903, "eval_runtime": 8.7154, "eval_samples_per_second": 538.474, "eval_steps_per_second": 67.352, "step": 180000 }, { "epoch": 41.37, "learning_rate": 2.068530827412331e-05, "loss": 1.0659, "step": 180500 }, { "epoch": 41.37, "eval_loss": 1.2627363204956055, "eval_runtime": 8.7158, "eval_samples_per_second": 538.449, "eval_steps_per_second": 67.349, "step": 180500 }, { "epoch": 41.49, "learning_rate": 2.074260829704332e-05, "loss": 1.0662, "step": 181000 }, { "epoch": 41.49, "eval_loss": 1.271817684173584, "eval_runtime": 8.7079, "eval_samples_per_second": 538.934, "eval_steps_per_second": 67.41, "step": 181000 }, { "epoch": 41.6, "learning_rate": 2.079990831996333e-05, "loss": 1.0727, "step": 181500 }, { "epoch": 41.6, "eval_loss": 1.2779048681259155, "eval_runtime": 8.7161, "eval_samples_per_second": 538.429, "eval_steps_per_second": 67.347, "step": 181500 }, { "epoch": 41.71, "learning_rate": 2.0857208342883338e-05, "loss": 1.0658, "step": 182000 }, { "epoch": 41.71, "eval_loss": 1.2807464599609375, "eval_runtime": 8.7121, "eval_samples_per_second": 538.679, "eval_steps_per_second": 67.378, "step": 182000 }, { "epoch": 41.83, "learning_rate": 2.0914508365803348e-05, "loss": 1.0687, "step": 182500 }, { "epoch": 41.83, "eval_loss": 1.2837514877319336, "eval_runtime": 8.7471, "eval_samples_per_second": 536.523, "eval_steps_per_second": 67.108, "step": 182500 }, { "epoch": 41.94, "learning_rate": 2.097180838872336e-05, "loss": 1.068, "step": 183000 }, { "epoch": 41.94, "eval_loss": 1.2748181819915771, "eval_runtime": 8.7588, "eval_samples_per_second": 535.802, "eval_steps_per_second": 67.018, "step": 183000 }, { "epoch": 42.06, "learning_rate": 2.1029108411643366e-05, "loss": 1.0686, "step": 183500 }, { "epoch": 42.06, "eval_loss": 1.279675006866455, "eval_runtime": 8.7545, "eval_samples_per_second": 536.064, "eval_steps_per_second": 67.051, "step": 183500 }, { "epoch": 42.17, "learning_rate": 2.1086408434563376e-05, "loss": 1.0614, "step": 184000 }, { "epoch": 42.17, "eval_loss": 1.271543264389038, "eval_runtime": 8.7584, "eval_samples_per_second": 535.826, "eval_steps_per_second": 67.021, "step": 184000 }, { "epoch": 42.29, "learning_rate": 2.1143708457483383e-05, "loss": 1.0597, "step": 184500 }, { "epoch": 42.29, "eval_loss": 1.2615807056427002, "eval_runtime": 8.771, "eval_samples_per_second": 535.056, "eval_steps_per_second": 66.925, "step": 184500 }, { "epoch": 42.4, "learning_rate": 2.120100848040339e-05, "loss": 1.0633, "step": 185000 }, { "epoch": 42.4, "eval_loss": 1.264136791229248, "eval_runtime": 8.7525, "eval_samples_per_second": 536.191, "eval_steps_per_second": 67.067, "step": 185000 }, { "epoch": 42.52, "learning_rate": 2.12583085033234e-05, "loss": 1.0677, "step": 185500 }, { "epoch": 42.52, "eval_loss": 1.2680312395095825, "eval_runtime": 8.7596, "eval_samples_per_second": 535.752, "eval_steps_per_second": 67.012, "step": 185500 }, { "epoch": 42.63, "learning_rate": 2.131560852624341e-05, "loss": 1.0608, "step": 186000 }, { "epoch": 42.63, "eval_loss": 1.2813547849655151, "eval_runtime": 8.7765, "eval_samples_per_second": 534.722, "eval_steps_per_second": 66.883, "step": 186000 }, { "epoch": 42.75, "learning_rate": 2.137290854916342e-05, "loss": 1.0661, "step": 186500 }, { "epoch": 42.75, "eval_loss": 1.2788704633712769, "eval_runtime": 8.7639, "eval_samples_per_second": 535.491, "eval_steps_per_second": 66.979, "step": 186500 }, { "epoch": 42.86, "learning_rate": 2.143020857208343e-05, "loss": 1.071, "step": 187000 }, { "epoch": 42.86, "eval_loss": 1.2787542343139648, "eval_runtime": 8.7634, "eval_samples_per_second": 535.522, "eval_steps_per_second": 66.983, "step": 187000 }, { "epoch": 42.97, "learning_rate": 2.148750859500344e-05, "loss": 1.0694, "step": 187500 }, { "epoch": 42.97, "eval_loss": 1.2748671770095825, "eval_runtime": 8.7692, "eval_samples_per_second": 535.17, "eval_steps_per_second": 66.939, "step": 187500 }, { "epoch": 43.09, "learning_rate": 2.1544808617923447e-05, "loss": 1.0598, "step": 188000 }, { "epoch": 43.09, "eval_loss": 1.2722731828689575, "eval_runtime": 8.7671, "eval_samples_per_second": 535.296, "eval_steps_per_second": 66.955, "step": 188000 }, { "epoch": 43.2, "learning_rate": 2.1602108640843457e-05, "loss": 1.0503, "step": 188500 }, { "epoch": 43.2, "eval_loss": 1.2728936672210693, "eval_runtime": 8.7094, "eval_samples_per_second": 538.846, "eval_steps_per_second": 67.399, "step": 188500 }, { "epoch": 43.32, "learning_rate": 2.1659408663763468e-05, "loss": 1.0652, "step": 189000 }, { "epoch": 43.32, "eval_loss": 1.267683506011963, "eval_runtime": 8.7117, "eval_samples_per_second": 538.701, "eval_steps_per_second": 67.381, "step": 189000 }, { "epoch": 43.43, "learning_rate": 2.1716708686683475e-05, "loss": 1.0596, "step": 189500 }, { "epoch": 43.43, "eval_loss": 1.2627240419387817, "eval_runtime": 8.7103, "eval_samples_per_second": 538.79, "eval_steps_per_second": 67.392, "step": 189500 }, { "epoch": 43.55, "learning_rate": 2.1774008709603485e-05, "loss": 1.0625, "step": 190000 }, { "epoch": 43.55, "eval_loss": 1.2666631937026978, "eval_runtime": 8.714, "eval_samples_per_second": 538.561, "eval_steps_per_second": 67.363, "step": 190000 }, { "epoch": 43.66, "learning_rate": 2.1831308732523496e-05, "loss": 1.0668, "step": 190500 }, { "epoch": 43.66, "eval_loss": 1.269785761833191, "eval_runtime": 8.7151, "eval_samples_per_second": 538.489, "eval_steps_per_second": 67.354, "step": 190500 }, { "epoch": 43.78, "learning_rate": 2.1888608755443503e-05, "loss": 1.0626, "step": 191000 }, { "epoch": 43.78, "eval_loss": 1.2775789499282837, "eval_runtime": 8.7166, "eval_samples_per_second": 538.397, "eval_steps_per_second": 67.343, "step": 191000 }, { "epoch": 43.89, "learning_rate": 2.1945908778363513e-05, "loss": 1.0553, "step": 191500 }, { "epoch": 43.89, "eval_loss": 1.282322645187378, "eval_runtime": 8.7195, "eval_samples_per_second": 538.222, "eval_steps_per_second": 67.321, "step": 191500 }, { "epoch": 44.01, "learning_rate": 2.200320880128352e-05, "loss": 1.0685, "step": 192000 }, { "epoch": 44.01, "eval_loss": 1.2986459732055664, "eval_runtime": 8.7167, "eval_samples_per_second": 538.393, "eval_steps_per_second": 67.342, "step": 192000 }, { "epoch": 44.12, "learning_rate": 2.206050882420353e-05, "loss": 1.0566, "step": 192500 }, { "epoch": 44.12, "eval_loss": 1.25889253616333, "eval_runtime": 8.6993, "eval_samples_per_second": 539.472, "eval_steps_per_second": 67.477, "step": 192500 }, { "epoch": 44.24, "learning_rate": 2.2117808847123538e-05, "loss": 1.0603, "step": 193000 }, { "epoch": 44.24, "eval_loss": 1.2882779836654663, "eval_runtime": 8.7043, "eval_samples_per_second": 539.157, "eval_steps_per_second": 67.438, "step": 193000 }, { "epoch": 44.35, "learning_rate": 2.217510887004355e-05, "loss": 1.0569, "step": 193500 }, { "epoch": 44.35, "eval_loss": 1.2862576246261597, "eval_runtime": 8.7099, "eval_samples_per_second": 538.813, "eval_steps_per_second": 67.395, "step": 193500 }, { "epoch": 44.46, "learning_rate": 2.223240889296356e-05, "loss": 1.0572, "step": 194000 }, { "epoch": 44.46, "eval_loss": 1.2833551168441772, "eval_runtime": 8.6993, "eval_samples_per_second": 539.468, "eval_steps_per_second": 67.477, "step": 194000 }, { "epoch": 44.58, "learning_rate": 2.2289708915883566e-05, "loss": 1.0569, "step": 194500 }, { "epoch": 44.58, "eval_loss": 1.268021821975708, "eval_runtime": 8.7082, "eval_samples_per_second": 538.916, "eval_steps_per_second": 67.408, "step": 194500 }, { "epoch": 44.69, "learning_rate": 2.2347008938803576e-05, "loss": 1.0665, "step": 195000 }, { "epoch": 44.69, "eval_loss": 1.2718360424041748, "eval_runtime": 8.7137, "eval_samples_per_second": 538.579, "eval_steps_per_second": 67.365, "step": 195000 }, { "epoch": 44.81, "learning_rate": 2.2404308961723587e-05, "loss": 1.0625, "step": 195500 }, { "epoch": 44.81, "eval_loss": 1.2757216691970825, "eval_runtime": 8.7055, "eval_samples_per_second": 539.082, "eval_steps_per_second": 67.428, "step": 195500 }, { "epoch": 44.92, "learning_rate": 2.2461608984643594e-05, "loss": 1.0556, "step": 196000 }, { "epoch": 44.92, "eval_loss": 1.2830040454864502, "eval_runtime": 8.7068, "eval_samples_per_second": 539.006, "eval_steps_per_second": 67.419, "step": 196000 }, { "epoch": 45.04, "learning_rate": 2.2518909007563605e-05, "loss": 1.0623, "step": 196500 }, { "epoch": 45.04, "eval_loss": 1.2796387672424316, "eval_runtime": 8.7049, "eval_samples_per_second": 539.125, "eval_steps_per_second": 67.434, "step": 196500 }, { "epoch": 45.15, "learning_rate": 2.2576209030483615e-05, "loss": 1.0507, "step": 197000 }, { "epoch": 45.15, "eval_loss": 1.2703602313995361, "eval_runtime": 8.7042, "eval_samples_per_second": 539.164, "eval_steps_per_second": 67.439, "step": 197000 }, { "epoch": 45.27, "learning_rate": 2.2633509053403622e-05, "loss": 1.0604, "step": 197500 }, { "epoch": 45.27, "eval_loss": 1.2892682552337646, "eval_runtime": 8.7041, "eval_samples_per_second": 539.173, "eval_steps_per_second": 67.44, "step": 197500 }, { "epoch": 45.38, "learning_rate": 2.2690809076323633e-05, "loss": 1.0609, "step": 198000 }, { "epoch": 45.38, "eval_loss": 1.2903045415878296, "eval_runtime": 8.7076, "eval_samples_per_second": 538.957, "eval_steps_per_second": 67.413, "step": 198000 }, { "epoch": 45.5, "learning_rate": 2.2748109099243643e-05, "loss": 1.0505, "step": 198500 }, { "epoch": 45.5, "eval_loss": 1.2824740409851074, "eval_runtime": 8.7062, "eval_samples_per_second": 539.039, "eval_steps_per_second": 67.423, "step": 198500 }, { "epoch": 45.61, "learning_rate": 2.280540912216365e-05, "loss": 1.055, "step": 199000 }, { "epoch": 45.61, "eval_loss": 1.2946803569793701, "eval_runtime": 8.7135, "eval_samples_per_second": 538.59, "eval_steps_per_second": 67.367, "step": 199000 }, { "epoch": 45.73, "learning_rate": 2.2862709145083657e-05, "loss": 1.0626, "step": 199500 }, { "epoch": 45.73, "eval_loss": 1.2812422513961792, "eval_runtime": 8.7096, "eval_samples_per_second": 538.833, "eval_steps_per_second": 67.397, "step": 199500 }, { "epoch": 45.84, "learning_rate": 2.2920009168003668e-05, "loss": 1.0464, "step": 200000 }, { "epoch": 45.84, "eval_loss": 1.2730906009674072, "eval_runtime": 8.7072, "eval_samples_per_second": 538.977, "eval_steps_per_second": 67.415, "step": 200000 }, { "epoch": 45.95, "learning_rate": 2.2977309190923675e-05, "loss": 1.0589, "step": 200500 }, { "epoch": 45.95, "eval_loss": 1.2975456714630127, "eval_runtime": 8.7035, "eval_samples_per_second": 539.206, "eval_steps_per_second": 67.444, "step": 200500 }, { "epoch": 46.07, "learning_rate": 2.3034609213843685e-05, "loss": 1.0552, "step": 201000 }, { "epoch": 46.07, "eval_loss": 1.2844905853271484, "eval_runtime": 8.709, "eval_samples_per_second": 538.871, "eval_steps_per_second": 67.402, "step": 201000 }, { "epoch": 46.18, "learning_rate": 2.3091909236763696e-05, "loss": 1.0491, "step": 201500 }, { "epoch": 46.18, "eval_loss": 1.2856147289276123, "eval_runtime": 8.7043, "eval_samples_per_second": 539.158, "eval_steps_per_second": 67.438, "step": 201500 }, { "epoch": 46.3, "learning_rate": 2.3149209259683703e-05, "loss": 1.0507, "step": 202000 }, { "epoch": 46.3, "eval_loss": 1.2979093790054321, "eval_runtime": 8.7133, "eval_samples_per_second": 538.599, "eval_steps_per_second": 67.368, "step": 202000 }, { "epoch": 46.41, "learning_rate": 2.3206509282603713e-05, "loss": 1.0582, "step": 202500 }, { "epoch": 46.41, "eval_loss": 1.2893375158309937, "eval_runtime": 8.7049, "eval_samples_per_second": 539.12, "eval_steps_per_second": 67.433, "step": 202500 }, { "epoch": 46.53, "learning_rate": 2.3263809305523724e-05, "loss": 1.0543, "step": 203000 }, { "epoch": 46.53, "eval_loss": 1.2821950912475586, "eval_runtime": 8.7152, "eval_samples_per_second": 538.487, "eval_steps_per_second": 67.354, "step": 203000 }, { "epoch": 46.64, "learning_rate": 2.332110932844373e-05, "loss": 1.0553, "step": 203500 }, { "epoch": 46.64, "eval_loss": 1.3053985834121704, "eval_runtime": 8.711, "eval_samples_per_second": 538.745, "eval_steps_per_second": 67.386, "step": 203500 }, { "epoch": 46.76, "learning_rate": 2.337840935136374e-05, "loss": 1.0618, "step": 204000 }, { "epoch": 46.76, "eval_loss": 1.2936577796936035, "eval_runtime": 8.7074, "eval_samples_per_second": 538.965, "eval_steps_per_second": 67.414, "step": 204000 }, { "epoch": 46.87, "learning_rate": 2.3435709374283752e-05, "loss": 1.0534, "step": 204500 }, { "epoch": 46.87, "eval_loss": 1.2839508056640625, "eval_runtime": 8.7044, "eval_samples_per_second": 539.154, "eval_steps_per_second": 67.437, "step": 204500 }, { "epoch": 46.99, "learning_rate": 2.349300939720376e-05, "loss": 1.0636, "step": 205000 }, { "epoch": 46.99, "eval_loss": 1.2725616693496704, "eval_runtime": 8.7147, "eval_samples_per_second": 538.517, "eval_steps_per_second": 67.358, "step": 205000 }, { "epoch": 47.1, "learning_rate": 2.355030942012377e-05, "loss": 1.0437, "step": 205500 }, { "epoch": 47.1, "eval_loss": 1.288648009300232, "eval_runtime": 8.7048, "eval_samples_per_second": 539.131, "eval_steps_per_second": 67.434, "step": 205500 }, { "epoch": 47.22, "learning_rate": 2.360760944304378e-05, "loss": 1.052, "step": 206000 }, { "epoch": 47.22, "eval_loss": 1.2781565189361572, "eval_runtime": 8.7172, "eval_samples_per_second": 538.362, "eval_steps_per_second": 67.338, "step": 206000 }, { "epoch": 47.33, "learning_rate": 2.3664909465963787e-05, "loss": 1.0524, "step": 206500 }, { "epoch": 47.33, "eval_loss": 1.280619502067566, "eval_runtime": 8.704, "eval_samples_per_second": 539.176, "eval_steps_per_second": 67.44, "step": 206500 }, { "epoch": 47.44, "learning_rate": 2.3722209488883794e-05, "loss": 1.0551, "step": 207000 }, { "epoch": 47.44, "eval_loss": 1.3077430725097656, "eval_runtime": 8.707, "eval_samples_per_second": 538.993, "eval_steps_per_second": 67.417, "step": 207000 }, { "epoch": 47.56, "learning_rate": 2.3779509511803805e-05, "loss": 1.0494, "step": 207500 }, { "epoch": 47.56, "eval_loss": 1.2811168432235718, "eval_runtime": 8.7058, "eval_samples_per_second": 539.065, "eval_steps_per_second": 67.426, "step": 207500 }, { "epoch": 47.67, "learning_rate": 2.3836809534723815e-05, "loss": 1.0527, "step": 208000 }, { "epoch": 47.67, "eval_loss": 1.299216389656067, "eval_runtime": 8.714, "eval_samples_per_second": 538.556, "eval_steps_per_second": 67.363, "step": 208000 }, { "epoch": 47.79, "learning_rate": 2.3894109557643822e-05, "loss": 1.0539, "step": 208500 }, { "epoch": 47.79, "eval_loss": 1.2891658544540405, "eval_runtime": 8.7029, "eval_samples_per_second": 539.245, "eval_steps_per_second": 67.449, "step": 208500 }, { "epoch": 47.9, "learning_rate": 2.3951409580563833e-05, "loss": 1.0501, "step": 209000 }, { "epoch": 47.9, "eval_loss": 1.2862893342971802, "eval_runtime": 8.713, "eval_samples_per_second": 538.622, "eval_steps_per_second": 67.371, "step": 209000 }, { "epoch": 48.02, "learning_rate": 2.4008709603483843e-05, "loss": 1.0589, "step": 209500 }, { "epoch": 48.02, "eval_loss": 1.297025442123413, "eval_runtime": 8.7032, "eval_samples_per_second": 539.229, "eval_steps_per_second": 67.447, "step": 209500 }, { "epoch": 48.13, "learning_rate": 2.406600962640385e-05, "loss": 1.0491, "step": 210000 }, { "epoch": 48.13, "eval_loss": 1.288155198097229, "eval_runtime": 8.7075, "eval_samples_per_second": 538.963, "eval_steps_per_second": 67.413, "step": 210000 }, { "epoch": 48.25, "learning_rate": 2.412330964932386e-05, "loss": 1.0398, "step": 210500 }, { "epoch": 48.25, "eval_loss": 1.3030736446380615, "eval_runtime": 8.7145, "eval_samples_per_second": 538.528, "eval_steps_per_second": 67.359, "step": 210500 }, { "epoch": 48.36, "learning_rate": 2.418060967224387e-05, "loss": 1.0502, "step": 211000 }, { "epoch": 48.36, "eval_loss": 1.304490566253662, "eval_runtime": 8.7094, "eval_samples_per_second": 538.845, "eval_steps_per_second": 67.399, "step": 211000 }, { "epoch": 48.48, "learning_rate": 2.423790969516388e-05, "loss": 1.0492, "step": 211500 }, { "epoch": 48.48, "eval_loss": 1.2888822555541992, "eval_runtime": 8.7033, "eval_samples_per_second": 539.224, "eval_steps_per_second": 67.446, "step": 211500 }, { "epoch": 48.59, "learning_rate": 2.429520971808389e-05, "loss": 1.0539, "step": 212000 }, { "epoch": 48.59, "eval_loss": 1.2983222007751465, "eval_runtime": 8.7053, "eval_samples_per_second": 539.096, "eval_steps_per_second": 67.43, "step": 212000 }, { "epoch": 48.7, "learning_rate": 2.43525097410039e-05, "loss": 1.0521, "step": 212500 }, { "epoch": 48.7, "eval_loss": 1.2977162599563599, "eval_runtime": 8.7058, "eval_samples_per_second": 539.064, "eval_steps_per_second": 67.426, "step": 212500 }, { "epoch": 48.82, "learning_rate": 2.4409809763923907e-05, "loss": 1.0543, "step": 213000 }, { "epoch": 48.82, "eval_loss": 1.2916896343231201, "eval_runtime": 8.7025, "eval_samples_per_second": 539.268, "eval_steps_per_second": 67.452, "step": 213000 }, { "epoch": 48.93, "learning_rate": 2.4467109786843917e-05, "loss": 1.0531, "step": 213500 }, { "epoch": 48.93, "eval_loss": 1.2968626022338867, "eval_runtime": 8.706, "eval_samples_per_second": 539.051, "eval_steps_per_second": 67.424, "step": 213500 }, { "epoch": 49.05, "learning_rate": 2.4524409809763928e-05, "loss": 1.0535, "step": 214000 }, { "epoch": 49.05, "eval_loss": 1.2906414270401, "eval_runtime": 8.7068, "eval_samples_per_second": 539.001, "eval_steps_per_second": 67.418, "step": 214000 }, { "epoch": 49.16, "learning_rate": 2.458170983268393e-05, "loss": 1.0464, "step": 214500 }, { "epoch": 49.16, "eval_loss": 1.3081059455871582, "eval_runtime": 8.7073, "eval_samples_per_second": 538.975, "eval_steps_per_second": 67.415, "step": 214500 }, { "epoch": 49.28, "learning_rate": 2.4639009855603942e-05, "loss": 1.0441, "step": 215000 }, { "epoch": 49.28, "eval_loss": 1.3022935390472412, "eval_runtime": 8.721, "eval_samples_per_second": 538.127, "eval_steps_per_second": 67.309, "step": 215000 }, { "epoch": 49.39, "learning_rate": 2.4696309878523952e-05, "loss": 1.0472, "step": 215500 }, { "epoch": 49.39, "eval_loss": 1.2928719520568848, "eval_runtime": 8.7006, "eval_samples_per_second": 539.388, "eval_steps_per_second": 67.467, "step": 215500 }, { "epoch": 49.51, "learning_rate": 2.475360990144396e-05, "loss": 1.0468, "step": 216000 }, { "epoch": 49.51, "eval_loss": 1.3000774383544922, "eval_runtime": 8.7171, "eval_samples_per_second": 538.369, "eval_steps_per_second": 67.339, "step": 216000 }, { "epoch": 49.62, "learning_rate": 2.481090992436397e-05, "loss": 1.0511, "step": 216500 }, { "epoch": 49.62, "eval_loss": 1.2964544296264648, "eval_runtime": 8.6988, "eval_samples_per_second": 539.497, "eval_steps_per_second": 67.48, "step": 216500 }, { "epoch": 49.74, "learning_rate": 2.486820994728398e-05, "loss": 1.0526, "step": 217000 }, { "epoch": 49.74, "eval_loss": 1.2933745384216309, "eval_runtime": 8.7126, "eval_samples_per_second": 538.643, "eval_steps_per_second": 67.373, "step": 217000 }, { "epoch": 49.85, "learning_rate": 2.4925509970203988e-05, "loss": 1.0489, "step": 217500 }, { "epoch": 49.85, "eval_loss": 1.2954331636428833, "eval_runtime": 8.703, "eval_samples_per_second": 539.241, "eval_steps_per_second": 67.448, "step": 217500 }, { "epoch": 49.97, "learning_rate": 2.4982809993123998e-05, "loss": 1.0519, "step": 218000 }, { "epoch": 49.97, "eval_loss": 1.2971032857894897, "eval_runtime": 8.7002, "eval_samples_per_second": 539.413, "eval_steps_per_second": 67.47, "step": 218000 }, { "epoch": 50.08, "learning_rate": 2.5040110016044005e-05, "loss": 1.0487, "step": 218500 }, { "epoch": 50.08, "eval_loss": 1.3141885995864868, "eval_runtime": 8.701, "eval_samples_per_second": 539.365, "eval_steps_per_second": 67.464, "step": 218500 }, { "epoch": 50.19, "learning_rate": 2.5097410038964016e-05, "loss": 1.0463, "step": 219000 }, { "epoch": 50.19, "eval_loss": 1.3042432069778442, "eval_runtime": 8.7222, "eval_samples_per_second": 538.053, "eval_steps_per_second": 67.3, "step": 219000 }, { "epoch": 50.31, "learning_rate": 2.5154710061884023e-05, "loss": 1.0482, "step": 219500 }, { "epoch": 50.31, "eval_loss": 1.3184372186660767, "eval_runtime": 8.7008, "eval_samples_per_second": 539.373, "eval_steps_per_second": 67.465, "step": 219500 }, { "epoch": 50.42, "learning_rate": 2.5212010084804033e-05, "loss": 1.0478, "step": 220000 }, { "epoch": 50.42, "eval_loss": 1.3111543655395508, "eval_runtime": 8.7064, "eval_samples_per_second": 539.031, "eval_steps_per_second": 67.422, "step": 220000 }, { "epoch": 50.54, "learning_rate": 2.5269310107724044e-05, "loss": 1.0454, "step": 220500 }, { "epoch": 50.54, "eval_loss": 1.3013570308685303, "eval_runtime": 8.7172, "eval_samples_per_second": 538.359, "eval_steps_per_second": 67.338, "step": 220500 }, { "epoch": 50.65, "learning_rate": 2.532661013064405e-05, "loss": 1.0532, "step": 221000 }, { "epoch": 50.65, "eval_loss": 1.3102153539657593, "eval_runtime": 8.7114, "eval_samples_per_second": 538.717, "eval_steps_per_second": 67.383, "step": 221000 }, { "epoch": 50.77, "learning_rate": 2.538391015356406e-05, "loss": 1.0455, "step": 221500 }, { "epoch": 50.77, "eval_loss": 1.2987374067306519, "eval_runtime": 8.7056, "eval_samples_per_second": 539.078, "eval_steps_per_second": 67.428, "step": 221500 }, { "epoch": 50.88, "learning_rate": 2.5441210176484072e-05, "loss": 1.052, "step": 222000 }, { "epoch": 50.88, "eval_loss": 1.3218241930007935, "eval_runtime": 8.7165, "eval_samples_per_second": 538.401, "eval_steps_per_second": 67.343, "step": 222000 }, { "epoch": 51.0, "learning_rate": 2.549851019940408e-05, "loss": 1.0421, "step": 222500 }, { "epoch": 51.0, "eval_loss": 1.2992583513259888, "eval_runtime": 8.7062, "eval_samples_per_second": 539.041, "eval_steps_per_second": 67.423, "step": 222500 }, { "epoch": 51.11, "learning_rate": 2.555581022232409e-05, "loss": 1.0368, "step": 223000 }, { "epoch": 51.11, "eval_loss": 1.3003897666931152, "eval_runtime": 8.7111, "eval_samples_per_second": 538.738, "eval_steps_per_second": 67.385, "step": 223000 }, { "epoch": 51.23, "learning_rate": 2.56131102452441e-05, "loss": 1.0409, "step": 223500 }, { "epoch": 51.23, "eval_loss": 1.3009271621704102, "eval_runtime": 8.7047, "eval_samples_per_second": 539.135, "eval_steps_per_second": 67.435, "step": 223500 }, { "epoch": 51.34, "learning_rate": 2.5670410268164107e-05, "loss": 1.0414, "step": 224000 }, { "epoch": 51.34, "eval_loss": 1.3131580352783203, "eval_runtime": 8.7044, "eval_samples_per_second": 539.154, "eval_steps_per_second": 67.437, "step": 224000 }, { "epoch": 51.46, "learning_rate": 2.5727710291084117e-05, "loss": 1.043, "step": 224500 }, { "epoch": 51.46, "eval_loss": 1.2982429265975952, "eval_runtime": 8.7019, "eval_samples_per_second": 539.304, "eval_steps_per_second": 67.456, "step": 224500 }, { "epoch": 51.57, "learning_rate": 2.5785010314004128e-05, "loss": 1.0461, "step": 225000 }, { "epoch": 51.57, "eval_loss": 1.2857537269592285, "eval_runtime": 8.7112, "eval_samples_per_second": 538.733, "eval_steps_per_second": 67.385, "step": 225000 }, { "epoch": 51.68, "learning_rate": 2.5842310336924135e-05, "loss": 1.0514, "step": 225500 }, { "epoch": 51.68, "eval_loss": 1.2900124788284302, "eval_runtime": 8.7098, "eval_samples_per_second": 538.821, "eval_steps_per_second": 67.396, "step": 225500 }, { "epoch": 51.8, "learning_rate": 2.5899610359844146e-05, "loss": 1.046, "step": 226000 }, { "epoch": 51.8, "eval_loss": 1.3230184316635132, "eval_runtime": 8.719, "eval_samples_per_second": 538.248, "eval_steps_per_second": 67.324, "step": 226000 }, { "epoch": 51.91, "learning_rate": 2.5956910382764156e-05, "loss": 1.045, "step": 226500 }, { "epoch": 51.91, "eval_loss": 1.3171018362045288, "eval_runtime": 8.7106, "eval_samples_per_second": 538.767, "eval_steps_per_second": 67.389, "step": 226500 }, { "epoch": 52.03, "learning_rate": 2.6014210405684163e-05, "loss": 1.0549, "step": 227000 }, { "epoch": 52.03, "eval_loss": 1.3144716024398804, "eval_runtime": 8.7227, "eval_samples_per_second": 538.019, "eval_steps_per_second": 67.295, "step": 227000 }, { "epoch": 52.14, "learning_rate": 2.6071510428604174e-05, "loss": 1.0372, "step": 227500 }, { "epoch": 52.14, "eval_loss": 1.300628900527954, "eval_runtime": 8.7097, "eval_samples_per_second": 538.823, "eval_steps_per_second": 67.396, "step": 227500 }, { "epoch": 52.26, "learning_rate": 2.6128810451524184e-05, "loss": 1.045, "step": 228000 }, { "epoch": 52.26, "eval_loss": 1.302744746208191, "eval_runtime": 8.7198, "eval_samples_per_second": 538.2, "eval_steps_per_second": 67.318, "step": 228000 }, { "epoch": 52.37, "learning_rate": 2.618611047444419e-05, "loss": 1.0387, "step": 228500 }, { "epoch": 52.37, "eval_loss": 1.3012394905090332, "eval_runtime": 8.7019, "eval_samples_per_second": 539.31, "eval_steps_per_second": 67.457, "step": 228500 }, { "epoch": 52.49, "learning_rate": 2.6243410497364202e-05, "loss": 1.0382, "step": 229000 }, { "epoch": 52.49, "eval_loss": 1.3077116012573242, "eval_runtime": 8.7149, "eval_samples_per_second": 538.505, "eval_steps_per_second": 67.356, "step": 229000 }, { "epoch": 52.6, "learning_rate": 2.6300710520284212e-05, "loss": 1.0379, "step": 229500 }, { "epoch": 52.6, "eval_loss": 1.3044253587722778, "eval_runtime": 8.7045, "eval_samples_per_second": 539.149, "eval_steps_per_second": 67.437, "step": 229500 }, { "epoch": 52.72, "learning_rate": 2.635801054320422e-05, "loss": 1.0434, "step": 230000 }, { "epoch": 52.72, "eval_loss": 1.3109080791473389, "eval_runtime": 8.7054, "eval_samples_per_second": 539.09, "eval_steps_per_second": 67.429, "step": 230000 }, { "epoch": 52.83, "learning_rate": 2.641531056612423e-05, "loss": 1.0435, "step": 230500 }, { "epoch": 52.83, "eval_loss": 1.3128092288970947, "eval_runtime": 8.7071, "eval_samples_per_second": 538.983, "eval_steps_per_second": 67.416, "step": 230500 }, { "epoch": 52.95, "learning_rate": 2.647261058904424e-05, "loss": 1.0407, "step": 231000 }, { "epoch": 52.95, "eval_loss": 1.3103513717651367, "eval_runtime": 8.7099, "eval_samples_per_second": 538.81, "eval_steps_per_second": 67.394, "step": 231000 }, { "epoch": 53.06, "learning_rate": 2.6529910611964247e-05, "loss": 1.0397, "step": 231500 }, { "epoch": 53.06, "eval_loss": 1.3069210052490234, "eval_runtime": 8.715, "eval_samples_per_second": 538.495, "eval_steps_per_second": 67.355, "step": 231500 }, { "epoch": 53.17, "learning_rate": 2.6587210634884258e-05, "loss": 1.0363, "step": 232000 }, { "epoch": 53.17, "eval_loss": 1.3148322105407715, "eval_runtime": 8.7052, "eval_samples_per_second": 539.105, "eval_steps_per_second": 67.431, "step": 232000 }, { "epoch": 53.29, "learning_rate": 2.664451065780427e-05, "loss": 1.0428, "step": 232500 }, { "epoch": 53.29, "eval_loss": 1.3196145296096802, "eval_runtime": 8.7071, "eval_samples_per_second": 538.988, "eval_steps_per_second": 67.417, "step": 232500 }, { "epoch": 53.4, "learning_rate": 2.6701810680724276e-05, "loss": 1.0398, "step": 233000 }, { "epoch": 53.4, "eval_loss": 1.3163927793502808, "eval_runtime": 8.7087, "eval_samples_per_second": 538.887, "eval_steps_per_second": 67.404, "step": 233000 }, { "epoch": 53.52, "learning_rate": 2.675911070364428e-05, "loss": 1.0425, "step": 233500 }, { "epoch": 53.52, "eval_loss": 1.310468077659607, "eval_runtime": 8.7006, "eval_samples_per_second": 539.389, "eval_steps_per_second": 67.467, "step": 233500 }, { "epoch": 53.63, "learning_rate": 2.681641072656429e-05, "loss": 1.0371, "step": 234000 }, { "epoch": 53.63, "eval_loss": 1.3171497583389282, "eval_runtime": 8.7119, "eval_samples_per_second": 538.687, "eval_steps_per_second": 67.379, "step": 234000 }, { "epoch": 53.75, "learning_rate": 2.68737107494843e-05, "loss": 1.043, "step": 234500 }, { "epoch": 53.75, "eval_loss": 1.3138409852981567, "eval_runtime": 8.7051, "eval_samples_per_second": 539.109, "eval_steps_per_second": 67.432, "step": 234500 }, { "epoch": 53.86, "learning_rate": 2.6931010772404307e-05, "loss": 1.0429, "step": 235000 }, { "epoch": 53.86, "eval_loss": 1.3040622472763062, "eval_runtime": 8.72, "eval_samples_per_second": 538.189, "eval_steps_per_second": 67.317, "step": 235000 }, { "epoch": 53.98, "learning_rate": 2.6988310795324318e-05, "loss": 1.0385, "step": 235500 }, { "epoch": 53.98, "eval_loss": 1.3233661651611328, "eval_runtime": 8.7029, "eval_samples_per_second": 539.243, "eval_steps_per_second": 67.449, "step": 235500 }, { "epoch": 54.09, "learning_rate": 2.7045610818244328e-05, "loss": 1.0354, "step": 236000 }, { "epoch": 54.09, "eval_loss": 1.3306721448898315, "eval_runtime": 8.7091, "eval_samples_per_second": 538.862, "eval_steps_per_second": 67.401, "step": 236000 }, { "epoch": 54.21, "learning_rate": 2.7102910841164335e-05, "loss": 1.033, "step": 236500 }, { "epoch": 54.21, "eval_loss": 1.3197903633117676, "eval_runtime": 8.7007, "eval_samples_per_second": 539.384, "eval_steps_per_second": 67.466, "step": 236500 }, { "epoch": 54.32, "learning_rate": 2.7160210864084346e-05, "loss": 1.0365, "step": 237000 }, { "epoch": 54.32, "eval_loss": 1.315364956855774, "eval_runtime": 8.707, "eval_samples_per_second": 538.991, "eval_steps_per_second": 67.417, "step": 237000 }, { "epoch": 54.43, "learning_rate": 2.7217510887004356e-05, "loss": 1.0353, "step": 237500 }, { "epoch": 54.43, "eval_loss": 1.321337103843689, "eval_runtime": 8.7028, "eval_samples_per_second": 539.249, "eval_steps_per_second": 67.449, "step": 237500 }, { "epoch": 54.55, "learning_rate": 2.7274810909924363e-05, "loss": 1.0384, "step": 238000 }, { "epoch": 54.55, "eval_loss": 1.321872591972351, "eval_runtime": 8.7116, "eval_samples_per_second": 538.707, "eval_steps_per_second": 67.381, "step": 238000 }, { "epoch": 54.66, "learning_rate": 2.7332110932844374e-05, "loss": 1.0455, "step": 238500 }, { "epoch": 54.66, "eval_loss": 1.313440203666687, "eval_runtime": 8.7071, "eval_samples_per_second": 538.984, "eval_steps_per_second": 67.416, "step": 238500 }, { "epoch": 54.78, "learning_rate": 2.7389410955764384e-05, "loss": 1.0424, "step": 239000 }, { "epoch": 54.78, "eval_loss": 1.3253813982009888, "eval_runtime": 8.7115, "eval_samples_per_second": 538.716, "eval_steps_per_second": 67.383, "step": 239000 }, { "epoch": 54.89, "learning_rate": 2.744671097868439e-05, "loss": 1.0439, "step": 239500 }, { "epoch": 54.89, "eval_loss": 1.337465763092041, "eval_runtime": 8.7072, "eval_samples_per_second": 538.976, "eval_steps_per_second": 67.415, "step": 239500 }, { "epoch": 55.01, "learning_rate": 2.7504011001604402e-05, "loss": 1.0458, "step": 240000 }, { "epoch": 55.01, "eval_loss": 1.3188368082046509, "eval_runtime": 8.714, "eval_samples_per_second": 538.56, "eval_steps_per_second": 67.363, "step": 240000 }, { "epoch": 55.12, "learning_rate": 2.7561311024524413e-05, "loss": 1.0284, "step": 240500 }, { "epoch": 55.12, "eval_loss": 1.2979966402053833, "eval_runtime": 8.7118, "eval_samples_per_second": 538.696, "eval_steps_per_second": 67.38, "step": 240500 }, { "epoch": 55.24, "learning_rate": 2.761861104744442e-05, "loss": 1.0312, "step": 241000 }, { "epoch": 55.24, "eval_loss": 1.3198728561401367, "eval_runtime": 8.7117, "eval_samples_per_second": 538.699, "eval_steps_per_second": 67.38, "step": 241000 }, { "epoch": 55.35, "learning_rate": 2.767591107036443e-05, "loss": 1.0333, "step": 241500 }, { "epoch": 55.35, "eval_loss": 1.3313496112823486, "eval_runtime": 8.7052, "eval_samples_per_second": 539.103, "eval_steps_per_second": 67.431, "step": 241500 }, { "epoch": 55.47, "learning_rate": 2.773321109328444e-05, "loss": 1.0354, "step": 242000 }, { "epoch": 55.47, "eval_loss": 1.3492119312286377, "eval_runtime": 8.7131, "eval_samples_per_second": 538.616, "eval_steps_per_second": 67.37, "step": 242000 }, { "epoch": 55.58, "learning_rate": 2.7790511116204448e-05, "loss": 1.0385, "step": 242500 }, { "epoch": 55.58, "eval_loss": 1.315171241760254, "eval_runtime": 8.7089, "eval_samples_per_second": 538.875, "eval_steps_per_second": 67.402, "step": 242500 }, { "epoch": 55.7, "learning_rate": 2.7847811139124458e-05, "loss": 1.0368, "step": 243000 }, { "epoch": 55.7, "eval_loss": 1.3246351480484009, "eval_runtime": 8.7146, "eval_samples_per_second": 538.52, "eval_steps_per_second": 67.358, "step": 243000 }, { "epoch": 55.81, "learning_rate": 2.790511116204447e-05, "loss": 1.0376, "step": 243500 }, { "epoch": 55.81, "eval_loss": 1.3429311513900757, "eval_runtime": 8.7038, "eval_samples_per_second": 539.19, "eval_steps_per_second": 67.442, "step": 243500 }, { "epoch": 55.92, "learning_rate": 2.7962411184964476e-05, "loss": 1.0385, "step": 244000 }, { "epoch": 55.92, "eval_loss": 1.326585292816162, "eval_runtime": 8.7097, "eval_samples_per_second": 538.824, "eval_steps_per_second": 67.396, "step": 244000 }, { "epoch": 56.04, "learning_rate": 2.8019711207884486e-05, "loss": 1.0394, "step": 244500 }, { "epoch": 56.04, "eval_loss": 1.3181424140930176, "eval_runtime": 8.7063, "eval_samples_per_second": 539.036, "eval_steps_per_second": 67.423, "step": 244500 }, { "epoch": 56.15, "learning_rate": 2.8077011230804497e-05, "loss": 1.0297, "step": 245000 }, { "epoch": 56.15, "eval_loss": 1.320960521697998, "eval_runtime": 8.7088, "eval_samples_per_second": 538.879, "eval_steps_per_second": 67.403, "step": 245000 }, { "epoch": 56.27, "learning_rate": 2.8134311253724504e-05, "loss": 1.0342, "step": 245500 }, { "epoch": 56.27, "eval_loss": 1.3369871377944946, "eval_runtime": 8.7036, "eval_samples_per_second": 539.2, "eval_steps_per_second": 67.443, "step": 245500 }, { "epoch": 56.38, "learning_rate": 2.8191611276644514e-05, "loss": 1.0309, "step": 246000 }, { "epoch": 56.38, "eval_loss": 1.3352166414260864, "eval_runtime": 8.7112, "eval_samples_per_second": 538.731, "eval_steps_per_second": 67.384, "step": 246000 }, { "epoch": 56.5, "learning_rate": 2.8248911299564525e-05, "loss": 1.0292, "step": 246500 }, { "epoch": 56.5, "eval_loss": 1.3264946937561035, "eval_runtime": 8.6995, "eval_samples_per_second": 539.455, "eval_steps_per_second": 67.475, "step": 246500 }, { "epoch": 56.61, "learning_rate": 2.8306211322484532e-05, "loss": 1.0353, "step": 247000 }, { "epoch": 56.61, "eval_loss": 1.3346257209777832, "eval_runtime": 8.7156, "eval_samples_per_second": 538.463, "eval_steps_per_second": 67.351, "step": 247000 }, { "epoch": 56.73, "learning_rate": 2.8363511345404542e-05, "loss": 1.0402, "step": 247500 }, { "epoch": 56.73, "eval_loss": 1.3432605266571045, "eval_runtime": 8.7085, "eval_samples_per_second": 538.899, "eval_steps_per_second": 67.405, "step": 247500 }, { "epoch": 56.84, "learning_rate": 2.8420811368324553e-05, "loss": 1.0363, "step": 248000 }, { "epoch": 56.84, "eval_loss": 1.324818730354309, "eval_runtime": 8.7063, "eval_samples_per_second": 539.034, "eval_steps_per_second": 67.422, "step": 248000 }, { "epoch": 56.96, "learning_rate": 2.8478111391244557e-05, "loss": 1.045, "step": 248500 }, { "epoch": 56.96, "eval_loss": 1.3338062763214111, "eval_runtime": 8.7043, "eval_samples_per_second": 539.157, "eval_steps_per_second": 67.438, "step": 248500 }, { "epoch": 57.07, "learning_rate": 2.8535411414164564e-05, "loss": 1.0284, "step": 249000 }, { "epoch": 57.07, "eval_loss": 1.3207162618637085, "eval_runtime": 8.7099, "eval_samples_per_second": 538.815, "eval_steps_per_second": 67.395, "step": 249000 }, { "epoch": 57.19, "learning_rate": 2.8592711437084574e-05, "loss": 1.0333, "step": 249500 }, { "epoch": 57.19, "eval_loss": 1.3169333934783936, "eval_runtime": 8.7049, "eval_samples_per_second": 539.12, "eval_steps_per_second": 67.433, "step": 249500 }, { "epoch": 57.3, "learning_rate": 2.8650011460004585e-05, "loss": 1.0315, "step": 250000 }, { "epoch": 57.3, "eval_loss": 1.3148645162582397, "eval_runtime": 8.7096, "eval_samples_per_second": 538.83, "eval_steps_per_second": 67.397, "step": 250000 }, { "epoch": 57.41, "learning_rate": 2.8707311482924592e-05, "loss": 1.0298, "step": 250500 }, { "epoch": 57.41, "eval_loss": 1.3349968194961548, "eval_runtime": 8.7103, "eval_samples_per_second": 538.785, "eval_steps_per_second": 67.391, "step": 250500 }, { "epoch": 57.53, "learning_rate": 2.8764611505844602e-05, "loss": 1.0309, "step": 251000 }, { "epoch": 57.53, "eval_loss": 1.3324781656265259, "eval_runtime": 8.722, "eval_samples_per_second": 538.063, "eval_steps_per_second": 67.301, "step": 251000 }, { "epoch": 57.64, "learning_rate": 2.8821911528764613e-05, "loss": 1.0283, "step": 251500 }, { "epoch": 57.64, "eval_loss": 1.3391568660736084, "eval_runtime": 8.7097, "eval_samples_per_second": 538.823, "eval_steps_per_second": 67.396, "step": 251500 }, { "epoch": 57.76, "learning_rate": 2.887921155168462e-05, "loss": 1.0347, "step": 252000 }, { "epoch": 57.76, "eval_loss": 1.3147822618484497, "eval_runtime": 8.7111, "eval_samples_per_second": 538.736, "eval_steps_per_second": 67.385, "step": 252000 }, { "epoch": 57.87, "learning_rate": 2.893651157460463e-05, "loss": 1.0378, "step": 252500 }, { "epoch": 57.87, "eval_loss": 1.3177522420883179, "eval_runtime": 8.7055, "eval_samples_per_second": 539.087, "eval_steps_per_second": 67.429, "step": 252500 }, { "epoch": 57.99, "learning_rate": 2.899381159752464e-05, "loss": 1.0375, "step": 253000 }, { "epoch": 57.99, "eval_loss": 1.3051265478134155, "eval_runtime": 8.7064, "eval_samples_per_second": 539.027, "eval_steps_per_second": 67.421, "step": 253000 }, { "epoch": 58.1, "learning_rate": 2.9051111620444648e-05, "loss": 1.0273, "step": 253500 }, { "epoch": 58.1, "eval_loss": 1.3377172946929932, "eval_runtime": 8.7049, "eval_samples_per_second": 539.122, "eval_steps_per_second": 67.433, "step": 253500 }, { "epoch": 58.22, "learning_rate": 2.910841164336466e-05, "loss": 1.028, "step": 254000 }, { "epoch": 58.22, "eval_loss": 1.3515993356704712, "eval_runtime": 8.7086, "eval_samples_per_second": 538.893, "eval_steps_per_second": 67.405, "step": 254000 }, { "epoch": 58.33, "learning_rate": 2.916571166628467e-05, "loss": 1.0361, "step": 254500 }, { "epoch": 58.33, "eval_loss": 1.3322782516479492, "eval_runtime": 8.7056, "eval_samples_per_second": 539.079, "eval_steps_per_second": 67.428, "step": 254500 }, { "epoch": 58.45, "learning_rate": 2.9223011689204676e-05, "loss": 1.0281, "step": 255000 }, { "epoch": 58.45, "eval_loss": 1.3504353761672974, "eval_runtime": 8.7175, "eval_samples_per_second": 538.34, "eval_steps_per_second": 67.336, "step": 255000 }, { "epoch": 58.56, "learning_rate": 2.9280311712124687e-05, "loss": 1.0261, "step": 255500 }, { "epoch": 58.56, "eval_loss": 1.322317123413086, "eval_runtime": 8.707, "eval_samples_per_second": 538.99, "eval_steps_per_second": 67.417, "step": 255500 }, { "epoch": 58.68, "learning_rate": 2.9337611735044697e-05, "loss": 1.0326, "step": 256000 }, { "epoch": 58.68, "eval_loss": 1.337592601776123, "eval_runtime": 8.7112, "eval_samples_per_second": 538.731, "eval_steps_per_second": 67.384, "step": 256000 }, { "epoch": 58.79, "learning_rate": 2.9394911757964704e-05, "loss": 1.0375, "step": 256500 }, { "epoch": 58.79, "eval_loss": 1.3464411497116089, "eval_runtime": 8.7072, "eval_samples_per_second": 538.981, "eval_steps_per_second": 67.416, "step": 256500 }, { "epoch": 58.9, "learning_rate": 2.9452211780884715e-05, "loss": 1.034, "step": 257000 }, { "epoch": 58.9, "eval_loss": 1.3286625146865845, "eval_runtime": 8.7059, "eval_samples_per_second": 539.061, "eval_steps_per_second": 67.426, "step": 257000 }, { "epoch": 59.02, "learning_rate": 2.9509511803804725e-05, "loss": 1.0329, "step": 257500 }, { "epoch": 59.02, "eval_loss": 1.333856463432312, "eval_runtime": 8.7029, "eval_samples_per_second": 539.248, "eval_steps_per_second": 67.449, "step": 257500 }, { "epoch": 59.13, "learning_rate": 2.9566811826724732e-05, "loss": 1.0279, "step": 258000 }, { "epoch": 59.13, "eval_loss": 1.3382489681243896, "eval_runtime": 8.7038, "eval_samples_per_second": 539.192, "eval_steps_per_second": 67.442, "step": 258000 }, { "epoch": 59.25, "learning_rate": 2.9624111849644743e-05, "loss": 1.0293, "step": 258500 }, { "epoch": 59.25, "eval_loss": 1.3295152187347412, "eval_runtime": 8.7045, "eval_samples_per_second": 539.146, "eval_steps_per_second": 67.436, "step": 258500 }, { "epoch": 59.36, "learning_rate": 2.9681411872564753e-05, "loss": 1.0268, "step": 259000 }, { "epoch": 59.36, "eval_loss": 1.3343822956085205, "eval_runtime": 8.7216, "eval_samples_per_second": 538.092, "eval_steps_per_second": 67.305, "step": 259000 }, { "epoch": 59.48, "learning_rate": 2.973871189548476e-05, "loss": 1.0314, "step": 259500 }, { "epoch": 59.48, "eval_loss": 1.3260921239852905, "eval_runtime": 8.7035, "eval_samples_per_second": 539.209, "eval_steps_per_second": 67.444, "step": 259500 }, { "epoch": 59.59, "learning_rate": 2.979601191840477e-05, "loss": 1.0327, "step": 260000 }, { "epoch": 59.59, "eval_loss": 1.3370031118392944, "eval_runtime": 8.7076, "eval_samples_per_second": 538.955, "eval_steps_per_second": 67.412, "step": 260000 }, { "epoch": 59.71, "learning_rate": 2.985331194132478e-05, "loss": 1.0346, "step": 260500 }, { "epoch": 59.71, "eval_loss": 1.3278111219406128, "eval_runtime": 8.7052, "eval_samples_per_second": 539.105, "eval_steps_per_second": 67.431, "step": 260500 }, { "epoch": 59.82, "learning_rate": 2.991061196424479e-05, "loss": 1.033, "step": 261000 }, { "epoch": 59.82, "eval_loss": 1.3216747045516968, "eval_runtime": 8.7079, "eval_samples_per_second": 538.934, "eval_steps_per_second": 67.41, "step": 261000 }, { "epoch": 59.94, "learning_rate": 2.99679119871648e-05, "loss": 1.0364, "step": 261500 }, { "epoch": 59.94, "eval_loss": 1.3405216932296753, "eval_runtime": 8.7068, "eval_samples_per_second": 539.003, "eval_steps_per_second": 67.418, "step": 261500 }, { "epoch": 60.05, "learning_rate": 3.002521201008481e-05, "loss": 1.0273, "step": 262000 }, { "epoch": 60.05, "eval_loss": 1.3334136009216309, "eval_runtime": 8.7063, "eval_samples_per_second": 539.037, "eval_steps_per_second": 67.423, "step": 262000 }, { "epoch": 60.17, "learning_rate": 3.0082512033004817e-05, "loss": 1.0235, "step": 262500 }, { "epoch": 60.17, "eval_loss": 1.318435788154602, "eval_runtime": 8.7058, "eval_samples_per_second": 539.067, "eval_steps_per_second": 67.426, "step": 262500 }, { "epoch": 60.28, "learning_rate": 3.0139812055924827e-05, "loss": 1.0201, "step": 263000 }, { "epoch": 60.28, "eval_loss": 1.3509299755096436, "eval_runtime": 8.7073, "eval_samples_per_second": 538.972, "eval_steps_per_second": 67.415, "step": 263000 }, { "epoch": 60.39, "learning_rate": 3.019711207884483e-05, "loss": 1.0246, "step": 263500 }, { "epoch": 60.39, "eval_loss": 1.3365037441253662, "eval_runtime": 8.7043, "eval_samples_per_second": 539.157, "eval_steps_per_second": 67.438, "step": 263500 }, { "epoch": 60.51, "learning_rate": 3.025441210176484e-05, "loss": 1.0283, "step": 264000 }, { "epoch": 60.51, "eval_loss": 1.3379473686218262, "eval_runtime": 8.7014, "eval_samples_per_second": 539.337, "eval_steps_per_second": 67.46, "step": 264000 }, { "epoch": 60.62, "learning_rate": 3.031171212468485e-05, "loss": 1.0361, "step": 264500 }, { "epoch": 60.62, "eval_loss": 1.342536449432373, "eval_runtime": 8.6986, "eval_samples_per_second": 539.515, "eval_steps_per_second": 67.482, "step": 264500 }, { "epoch": 60.74, "learning_rate": 3.036901214760486e-05, "loss": 1.0356, "step": 265000 }, { "epoch": 60.74, "eval_loss": 1.3549495935440063, "eval_runtime": 8.7075, "eval_samples_per_second": 538.963, "eval_steps_per_second": 67.413, "step": 265000 }, { "epoch": 60.85, "learning_rate": 3.042631217052487e-05, "loss": 1.0268, "step": 265500 }, { "epoch": 60.85, "eval_loss": 1.3258453607559204, "eval_runtime": 8.7037, "eval_samples_per_second": 539.194, "eval_steps_per_second": 67.442, "step": 265500 }, { "epoch": 60.97, "learning_rate": 3.0483612193444876e-05, "loss": 1.0349, "step": 266000 }, { "epoch": 60.97, "eval_loss": 1.3435091972351074, "eval_runtime": 8.7066, "eval_samples_per_second": 539.019, "eval_steps_per_second": 67.42, "step": 266000 }, { "epoch": 61.08, "learning_rate": 3.054091221636489e-05, "loss": 1.026, "step": 266500 }, { "epoch": 61.08, "eval_loss": 1.353337049484253, "eval_runtime": 8.7045, "eval_samples_per_second": 539.146, "eval_steps_per_second": 67.436, "step": 266500 }, { "epoch": 61.2, "learning_rate": 3.05982122392849e-05, "loss": 1.0194, "step": 267000 }, { "epoch": 61.2, "eval_loss": 1.3410871028900146, "eval_runtime": 8.7167, "eval_samples_per_second": 538.39, "eval_steps_per_second": 67.342, "step": 267000 }, { "epoch": 61.31, "learning_rate": 3.065551226220491e-05, "loss": 1.0249, "step": 267500 }, { "epoch": 61.31, "eval_loss": 1.3355889320373535, "eval_runtime": 8.7024, "eval_samples_per_second": 539.278, "eval_steps_per_second": 67.453, "step": 267500 }, { "epoch": 61.43, "learning_rate": 3.071281228512491e-05, "loss": 1.0268, "step": 268000 }, { "epoch": 61.43, "eval_loss": 1.3445043563842773, "eval_runtime": 8.7066, "eval_samples_per_second": 539.019, "eval_steps_per_second": 67.42, "step": 268000 }, { "epoch": 61.54, "learning_rate": 3.077011230804492e-05, "loss": 1.0308, "step": 268500 }, { "epoch": 61.54, "eval_loss": 1.3484935760498047, "eval_runtime": 8.7051, "eval_samples_per_second": 539.107, "eval_steps_per_second": 67.432, "step": 268500 }, { "epoch": 61.65, "learning_rate": 3.082741233096493e-05, "loss": 1.0314, "step": 269000 }, { "epoch": 61.65, "eval_loss": 1.3493943214416504, "eval_runtime": 8.7064, "eval_samples_per_second": 539.03, "eval_steps_per_second": 67.422, "step": 269000 }, { "epoch": 61.77, "learning_rate": 3.088471235388494e-05, "loss": 1.0302, "step": 269500 }, { "epoch": 61.77, "eval_loss": 1.3538340330123901, "eval_runtime": 8.7042, "eval_samples_per_second": 539.165, "eval_steps_per_second": 67.439, "step": 269500 }, { "epoch": 61.88, "learning_rate": 3.0942012376804954e-05, "loss": 1.028, "step": 270000 }, { "epoch": 61.88, "eval_loss": 1.372446894645691, "eval_runtime": 8.7171, "eval_samples_per_second": 538.365, "eval_steps_per_second": 67.339, "step": 270000 }, { "epoch": 62.0, "learning_rate": 3.0999312399724964e-05, "loss": 1.0322, "step": 270500 }, { "epoch": 62.0, "eval_loss": 1.3564677238464355, "eval_runtime": 8.7099, "eval_samples_per_second": 538.814, "eval_steps_per_second": 67.395, "step": 270500 }, { "epoch": 62.11, "learning_rate": 3.105661242264497e-05, "loss": 1.0248, "step": 271000 }, { "epoch": 62.11, "eval_loss": 1.3714157342910767, "eval_runtime": 8.7096, "eval_samples_per_second": 538.833, "eval_steps_per_second": 67.397, "step": 271000 }, { "epoch": 62.23, "learning_rate": 3.111391244556498e-05, "loss": 1.0225, "step": 271500 }, { "epoch": 62.23, "eval_loss": 1.3365130424499512, "eval_runtime": 8.7027, "eval_samples_per_second": 539.255, "eval_steps_per_second": 67.45, "step": 271500 }, { "epoch": 62.34, "learning_rate": 3.117121246848499e-05, "loss": 1.0253, "step": 272000 }, { "epoch": 62.34, "eval_loss": 1.3657175302505493, "eval_runtime": 8.7056, "eval_samples_per_second": 539.076, "eval_steps_per_second": 67.428, "step": 272000 }, { "epoch": 62.46, "learning_rate": 3.1228512491405e-05, "loss": 1.0213, "step": 272500 }, { "epoch": 62.46, "eval_loss": 1.3591282367706299, "eval_runtime": 8.7078, "eval_samples_per_second": 538.943, "eval_steps_per_second": 67.411, "step": 272500 }, { "epoch": 62.57, "learning_rate": 3.128581251432501e-05, "loss": 1.0337, "step": 273000 }, { "epoch": 62.57, "eval_loss": 1.3526349067687988, "eval_runtime": 8.7047, "eval_samples_per_second": 539.134, "eval_steps_per_second": 67.435, "step": 273000 }, { "epoch": 62.69, "learning_rate": 3.134311253724502e-05, "loss": 1.0294, "step": 273500 }, { "epoch": 62.69, "eval_loss": 1.3463046550750732, "eval_runtime": 8.7041, "eval_samples_per_second": 539.169, "eval_steps_per_second": 67.439, "step": 273500 }, { "epoch": 62.8, "learning_rate": 3.1400412560165024e-05, "loss": 1.0365, "step": 274000 }, { "epoch": 62.8, "eval_loss": 1.3266334533691406, "eval_runtime": 8.7055, "eval_samples_per_second": 539.085, "eval_steps_per_second": 67.429, "step": 274000 }, { "epoch": 62.92, "learning_rate": 3.1457712583085034e-05, "loss": 1.035, "step": 274500 }, { "epoch": 62.92, "eval_loss": 1.3429316282272339, "eval_runtime": 8.7055, "eval_samples_per_second": 539.084, "eval_steps_per_second": 67.429, "step": 274500 }, { "epoch": 63.03, "learning_rate": 3.1515012606005045e-05, "loss": 1.0322, "step": 275000 }, { "epoch": 63.03, "eval_loss": 1.3727476596832275, "eval_runtime": 8.7174, "eval_samples_per_second": 538.347, "eval_steps_per_second": 67.336, "step": 275000 }, { "epoch": 63.14, "learning_rate": 3.1572312628925055e-05, "loss": 1.0208, "step": 275500 }, { "epoch": 63.14, "eval_loss": 1.3611018657684326, "eval_runtime": 8.7012, "eval_samples_per_second": 539.35, "eval_steps_per_second": 67.462, "step": 275500 }, { "epoch": 63.26, "learning_rate": 3.1629612651845066e-05, "loss": 1.0243, "step": 276000 }, { "epoch": 63.26, "eval_loss": 1.3515015840530396, "eval_runtime": 8.7072, "eval_samples_per_second": 538.981, "eval_steps_per_second": 67.416, "step": 276000 }, { "epoch": 63.37, "learning_rate": 3.1686912674765076e-05, "loss": 1.0227, "step": 276500 }, { "epoch": 63.37, "eval_loss": 1.3518571853637695, "eval_runtime": 8.7074, "eval_samples_per_second": 538.965, "eval_steps_per_second": 67.414, "step": 276500 }, { "epoch": 63.49, "learning_rate": 3.174421269768508e-05, "loss": 1.0242, "step": 277000 }, { "epoch": 63.49, "eval_loss": 1.3485227823257446, "eval_runtime": 8.7063, "eval_samples_per_second": 539.036, "eval_steps_per_second": 67.423, "step": 277000 }, { "epoch": 63.6, "learning_rate": 3.180151272060509e-05, "loss": 1.0249, "step": 277500 }, { "epoch": 63.6, "eval_loss": 1.3440685272216797, "eval_runtime": 8.7053, "eval_samples_per_second": 539.1, "eval_steps_per_second": 67.431, "step": 277500 }, { "epoch": 63.72, "learning_rate": 3.18588127435251e-05, "loss": 1.0275, "step": 278000 }, { "epoch": 63.72, "eval_loss": 1.349814772605896, "eval_runtime": 8.7041, "eval_samples_per_second": 539.174, "eval_steps_per_second": 67.44, "step": 278000 }, { "epoch": 63.83, "learning_rate": 3.1916112766445105e-05, "loss": 1.0306, "step": 278500 }, { "epoch": 63.83, "eval_loss": 1.3630696535110474, "eval_runtime": 8.7066, "eval_samples_per_second": 539.014, "eval_steps_per_second": 67.42, "step": 278500 }, { "epoch": 63.95, "learning_rate": 3.1973412789365115e-05, "loss": 1.0284, "step": 279000 }, { "epoch": 63.95, "eval_loss": 1.363878607749939, "eval_runtime": 8.7105, "eval_samples_per_second": 538.777, "eval_steps_per_second": 67.39, "step": 279000 }, { "epoch": 64.06, "learning_rate": 3.2030712812285126e-05, "loss": 1.0249, "step": 279500 }, { "epoch": 64.06, "eval_loss": 1.375864863395691, "eval_runtime": 8.7016, "eval_samples_per_second": 539.329, "eval_steps_per_second": 67.459, "step": 279500 }, { "epoch": 64.18, "learning_rate": 3.2088012835205136e-05, "loss": 1.0239, "step": 280000 }, { "epoch": 64.18, "eval_loss": 1.3358557224273682, "eval_runtime": 8.7037, "eval_samples_per_second": 539.195, "eval_steps_per_second": 67.442, "step": 280000 }, { "epoch": 64.29, "learning_rate": 3.214531285812514e-05, "loss": 1.0165, "step": 280500 }, { "epoch": 64.29, "eval_loss": 1.3529762029647827, "eval_runtime": 8.7016, "eval_samples_per_second": 539.326, "eval_steps_per_second": 67.459, "step": 280500 }, { "epoch": 64.41, "learning_rate": 3.220261288104515e-05, "loss": 1.0258, "step": 281000 }, { "epoch": 64.41, "eval_loss": 1.3594284057617188, "eval_runtime": 8.7092, "eval_samples_per_second": 538.856, "eval_steps_per_second": 67.4, "step": 281000 }, { "epoch": 64.52, "learning_rate": 3.225991290396516e-05, "loss": 1.0175, "step": 281500 }, { "epoch": 64.52, "eval_loss": 1.3715450763702393, "eval_runtime": 8.7, "eval_samples_per_second": 539.428, "eval_steps_per_second": 67.472, "step": 281500 }, { "epoch": 64.63, "learning_rate": 3.231721292688517e-05, "loss": 1.0239, "step": 282000 }, { "epoch": 64.63, "eval_loss": 1.3696200847625732, "eval_runtime": 8.7075, "eval_samples_per_second": 538.962, "eval_steps_per_second": 67.413, "step": 282000 }, { "epoch": 64.75, "learning_rate": 3.237451294980518e-05, "loss": 1.0235, "step": 282500 }, { "epoch": 64.75, "eval_loss": 1.3520768880844116, "eval_runtime": 8.7011, "eval_samples_per_second": 539.355, "eval_steps_per_second": 67.462, "step": 282500 }, { "epoch": 64.86, "learning_rate": 3.243181297272519e-05, "loss": 1.0317, "step": 283000 }, { "epoch": 64.86, "eval_loss": 1.3434075117111206, "eval_runtime": 8.7077, "eval_samples_per_second": 538.947, "eval_steps_per_second": 67.411, "step": 283000 }, { "epoch": 64.98, "learning_rate": 3.2489112995645196e-05, "loss": 1.0333, "step": 283500 }, { "epoch": 64.98, "eval_loss": 1.3761160373687744, "eval_runtime": 8.7014, "eval_samples_per_second": 539.341, "eval_steps_per_second": 67.461, "step": 283500 }, { "epoch": 65.09, "learning_rate": 3.254641301856521e-05, "loss": 1.0134, "step": 284000 }, { "epoch": 65.09, "eval_loss": 1.3384641408920288, "eval_runtime": 8.7007, "eval_samples_per_second": 539.379, "eval_steps_per_second": 67.465, "step": 284000 }, { "epoch": 65.21, "learning_rate": 3.260371304148522e-05, "loss": 1.0152, "step": 284500 }, { "epoch": 65.21, "eval_loss": 1.3741552829742432, "eval_runtime": 8.7052, "eval_samples_per_second": 539.105, "eval_steps_per_second": 67.431, "step": 284500 }, { "epoch": 65.32, "learning_rate": 3.266101306440523e-05, "loss": 1.023, "step": 285000 }, { "epoch": 65.32, "eval_loss": 1.374695062637329, "eval_runtime": 8.706, "eval_samples_per_second": 539.052, "eval_steps_per_second": 67.425, "step": 285000 }, { "epoch": 65.44, "learning_rate": 3.271831308732524e-05, "loss": 1.0251, "step": 285500 }, { "epoch": 65.44, "eval_loss": 1.3663524389266968, "eval_runtime": 8.7094, "eval_samples_per_second": 538.844, "eval_steps_per_second": 67.399, "step": 285500 }, { "epoch": 65.55, "learning_rate": 3.277561311024525e-05, "loss": 1.0239, "step": 286000 }, { "epoch": 65.55, "eval_loss": 1.3766542673110962, "eval_runtime": 8.7136, "eval_samples_per_second": 538.583, "eval_steps_per_second": 67.366, "step": 286000 }, { "epoch": 65.67, "learning_rate": 3.283291313316525e-05, "loss": 1.0278, "step": 286500 }, { "epoch": 65.67, "eval_loss": 1.3730065822601318, "eval_runtime": 8.7123, "eval_samples_per_second": 538.666, "eval_steps_per_second": 67.376, "step": 286500 }, { "epoch": 65.78, "learning_rate": 3.289021315608526e-05, "loss": 1.0155, "step": 287000 }, { "epoch": 65.78, "eval_loss": 1.3601492643356323, "eval_runtime": 8.7118, "eval_samples_per_second": 538.697, "eval_steps_per_second": 67.38, "step": 287000 }, { "epoch": 65.89, "learning_rate": 3.294751317900527e-05, "loss": 1.0315, "step": 287500 }, { "epoch": 65.89, "eval_loss": 1.3673959970474243, "eval_runtime": 8.7066, "eval_samples_per_second": 539.016, "eval_steps_per_second": 67.42, "step": 287500 }, { "epoch": 66.01, "learning_rate": 3.3004813201925284e-05, "loss": 1.0235, "step": 288000 }, { "epoch": 66.01, "eval_loss": 1.349502682685852, "eval_runtime": 8.7053, "eval_samples_per_second": 539.099, "eval_steps_per_second": 67.431, "step": 288000 }, { "epoch": 66.12, "learning_rate": 3.3062113224845294e-05, "loss": 1.0183, "step": 288500 }, { "epoch": 66.12, "eval_loss": 1.3614422082901, "eval_runtime": 8.7102, "eval_samples_per_second": 538.795, "eval_steps_per_second": 67.392, "step": 288500 }, { "epoch": 66.24, "learning_rate": 3.3119413247765305e-05, "loss": 1.0082, "step": 289000 }, { "epoch": 66.24, "eval_loss": 1.3827067613601685, "eval_runtime": 8.7095, "eval_samples_per_second": 538.835, "eval_steps_per_second": 67.397, "step": 289000 }, { "epoch": 66.35, "learning_rate": 3.317671327068531e-05, "loss": 1.0184, "step": 289500 }, { "epoch": 66.35, "eval_loss": 1.37798273563385, "eval_runtime": 8.7043, "eval_samples_per_second": 539.159, "eval_steps_per_second": 67.438, "step": 289500 }, { "epoch": 66.47, "learning_rate": 3.323401329360532e-05, "loss": 1.0182, "step": 290000 }, { "epoch": 66.47, "eval_loss": 1.3609392642974854, "eval_runtime": 8.6994, "eval_samples_per_second": 539.465, "eval_steps_per_second": 67.476, "step": 290000 }, { "epoch": 66.58, "learning_rate": 3.329131331652533e-05, "loss": 1.0173, "step": 290500 }, { "epoch": 66.58, "eval_loss": 1.369789958000183, "eval_runtime": 8.7038, "eval_samples_per_second": 539.188, "eval_steps_per_second": 67.442, "step": 290500 }, { "epoch": 66.7, "learning_rate": 3.334861333944534e-05, "loss": 1.0216, "step": 291000 }, { "epoch": 66.7, "eval_loss": 1.3768802881240845, "eval_runtime": 8.7117, "eval_samples_per_second": 538.703, "eval_steps_per_second": 67.381, "step": 291000 }, { "epoch": 66.81, "learning_rate": 3.340591336236535e-05, "loss": 1.0301, "step": 291500 }, { "epoch": 66.81, "eval_loss": 1.3659776449203491, "eval_runtime": 8.6994, "eval_samples_per_second": 539.461, "eval_steps_per_second": 67.476, "step": 291500 }, { "epoch": 66.93, "learning_rate": 3.346321338528536e-05, "loss": 1.0181, "step": 292000 }, { "epoch": 66.93, "eval_loss": 1.3582611083984375, "eval_runtime": 8.7055, "eval_samples_per_second": 539.083, "eval_steps_per_second": 67.428, "step": 292000 }, { "epoch": 67.04, "learning_rate": 3.3520513408205365e-05, "loss": 1.0254, "step": 292500 }, { "epoch": 67.04, "eval_loss": 1.3684972524642944, "eval_runtime": 8.7059, "eval_samples_per_second": 539.062, "eval_steps_per_second": 67.426, "step": 292500 }, { "epoch": 67.16, "learning_rate": 3.3577813431125375e-05, "loss": 1.0172, "step": 293000 }, { "epoch": 67.16, "eval_loss": 1.389109492301941, "eval_runtime": 8.7053, "eval_samples_per_second": 539.096, "eval_steps_per_second": 67.43, "step": 293000 }, { "epoch": 67.27, "learning_rate": 3.363511345404538e-05, "loss": 1.0178, "step": 293500 }, { "epoch": 67.27, "eval_loss": 1.3705748319625854, "eval_runtime": 8.7023, "eval_samples_per_second": 539.28, "eval_steps_per_second": 67.453, "step": 293500 }, { "epoch": 67.38, "learning_rate": 3.369241347696539e-05, "loss": 1.0148, "step": 294000 }, { "epoch": 67.38, "eval_loss": 1.3807605504989624, "eval_runtime": 8.7135, "eval_samples_per_second": 538.592, "eval_steps_per_second": 67.367, "step": 294000 }, { "epoch": 67.5, "learning_rate": 3.37497134998854e-05, "loss": 1.0209, "step": 294500 }, { "epoch": 67.5, "eval_loss": 1.3786827325820923, "eval_runtime": 8.7168, "eval_samples_per_second": 538.385, "eval_steps_per_second": 67.341, "step": 294500 }, { "epoch": 67.61, "learning_rate": 3.380701352280541e-05, "loss": 1.0221, "step": 295000 }, { "epoch": 67.61, "eval_loss": 1.3814486265182495, "eval_runtime": 8.7071, "eval_samples_per_second": 538.985, "eval_steps_per_second": 67.416, "step": 295000 }, { "epoch": 67.73, "learning_rate": 3.386431354572542e-05, "loss": 1.0167, "step": 295500 }, { "epoch": 67.73, "eval_loss": 1.363027572631836, "eval_runtime": 8.7026, "eval_samples_per_second": 539.263, "eval_steps_per_second": 67.451, "step": 295500 }, { "epoch": 67.84, "learning_rate": 3.3921613568645425e-05, "loss": 1.0296, "step": 296000 }, { "epoch": 67.84, "eval_loss": 1.3607882261276245, "eval_runtime": 8.7115, "eval_samples_per_second": 538.713, "eval_steps_per_second": 67.382, "step": 296000 }, { "epoch": 67.96, "learning_rate": 3.3978913591565435e-05, "loss": 1.0231, "step": 296500 }, { "epoch": 67.96, "eval_loss": 1.3567008972167969, "eval_runtime": 8.7034, "eval_samples_per_second": 539.214, "eval_steps_per_second": 67.445, "step": 296500 }, { "epoch": 68.07, "learning_rate": 3.4036213614485446e-05, "loss": 1.0189, "step": 297000 }, { "epoch": 68.07, "eval_loss": 1.3802858591079712, "eval_runtime": 8.702, "eval_samples_per_second": 539.299, "eval_steps_per_second": 67.455, "step": 297000 }, { "epoch": 68.19, "learning_rate": 3.4093513637405456e-05, "loss": 1.0124, "step": 297500 }, { "epoch": 68.19, "eval_loss": 1.3887970447540283, "eval_runtime": 8.6995, "eval_samples_per_second": 539.457, "eval_steps_per_second": 67.475, "step": 297500 }, { "epoch": 68.3, "learning_rate": 3.4150813660325467e-05, "loss": 1.0223, "step": 298000 }, { "epoch": 68.3, "eval_loss": 1.3773611783981323, "eval_runtime": 8.7094, "eval_samples_per_second": 538.842, "eval_steps_per_second": 67.398, "step": 298000 }, { "epoch": 68.42, "learning_rate": 3.420811368324548e-05, "loss": 1.017, "step": 298500 }, { "epoch": 68.42, "eval_loss": 1.3770660161972046, "eval_runtime": 8.6985, "eval_samples_per_second": 539.52, "eval_steps_per_second": 67.483, "step": 298500 }, { "epoch": 68.53, "learning_rate": 3.426541370616548e-05, "loss": 1.0165, "step": 299000 }, { "epoch": 68.53, "eval_loss": 1.3791484832763672, "eval_runtime": 8.7059, "eval_samples_per_second": 539.058, "eval_steps_per_second": 67.425, "step": 299000 }, { "epoch": 68.65, "learning_rate": 3.432271372908549e-05, "loss": 1.0156, "step": 299500 }, { "epoch": 68.65, "eval_loss": 1.3951102495193481, "eval_runtime": 8.7074, "eval_samples_per_second": 538.968, "eval_steps_per_second": 67.414, "step": 299500 }, { "epoch": 68.76, "learning_rate": 3.43800137520055e-05, "loss": 1.0275, "step": 300000 }, { "epoch": 68.76, "eval_loss": 1.376380443572998, "eval_runtime": 8.705, "eval_samples_per_second": 539.117, "eval_steps_per_second": 67.433, "step": 300000 }, { "epoch": 68.87, "learning_rate": 3.443731377492551e-05, "loss": 1.0224, "step": 300500 }, { "epoch": 68.87, "eval_loss": 1.3741403818130493, "eval_runtime": 8.7036, "eval_samples_per_second": 539.202, "eval_steps_per_second": 67.443, "step": 300500 }, { "epoch": 68.99, "learning_rate": 3.449461379784552e-05, "loss": 1.0237, "step": 301000 }, { "epoch": 68.99, "eval_loss": 1.3737797737121582, "eval_runtime": 8.7128, "eval_samples_per_second": 538.634, "eval_steps_per_second": 67.372, "step": 301000 }, { "epoch": 69.1, "learning_rate": 3.455191382076553e-05, "loss": 1.0181, "step": 301500 }, { "epoch": 69.1, "eval_loss": 1.3873779773712158, "eval_runtime": 8.7021, "eval_samples_per_second": 539.293, "eval_steps_per_second": 67.455, "step": 301500 }, { "epoch": 69.22, "learning_rate": 3.460921384368554e-05, "loss": 1.019, "step": 302000 }, { "epoch": 69.22, "eval_loss": 1.3679643869400024, "eval_runtime": 8.7104, "eval_samples_per_second": 538.779, "eval_steps_per_second": 67.39, "step": 302000 }, { "epoch": 69.33, "learning_rate": 3.466651386660555e-05, "loss": 1.0186, "step": 302500 }, { "epoch": 69.33, "eval_loss": 1.3586063385009766, "eval_runtime": 8.7141, "eval_samples_per_second": 538.555, "eval_steps_per_second": 67.362, "step": 302500 }, { "epoch": 69.45, "learning_rate": 3.472381388952556e-05, "loss": 1.0126, "step": 303000 }, { "epoch": 69.45, "eval_loss": 1.3916596174240112, "eval_runtime": 8.7124, "eval_samples_per_second": 538.66, "eval_steps_per_second": 67.376, "step": 303000 }, { "epoch": 69.56, "learning_rate": 3.478111391244557e-05, "loss": 1.0152, "step": 303500 }, { "epoch": 69.56, "eval_loss": 1.4082363843917847, "eval_runtime": 8.7023, "eval_samples_per_second": 539.284, "eval_steps_per_second": 67.454, "step": 303500 }, { "epoch": 69.68, "learning_rate": 3.483841393536558e-05, "loss": 1.0245, "step": 304000 }, { "epoch": 69.68, "eval_loss": 1.3900960683822632, "eval_runtime": 8.7146, "eval_samples_per_second": 538.522, "eval_steps_per_second": 67.358, "step": 304000 }, { "epoch": 69.79, "learning_rate": 3.489571395828559e-05, "loss": 1.0203, "step": 304500 }, { "epoch": 69.79, "eval_loss": 1.3862849473953247, "eval_runtime": 8.7064, "eval_samples_per_second": 539.029, "eval_steps_per_second": 67.422, "step": 304500 }, { "epoch": 69.91, "learning_rate": 3.495301398120559e-05, "loss": 1.0189, "step": 305000 }, { "epoch": 69.91, "eval_loss": 1.3776986598968506, "eval_runtime": 8.7064, "eval_samples_per_second": 539.031, "eval_steps_per_second": 67.422, "step": 305000 }, { "epoch": 70.02, "learning_rate": 3.5010314004125604e-05, "loss": 1.0249, "step": 305500 }, { "epoch": 70.02, "eval_loss": 1.38228440284729, "eval_runtime": 8.704, "eval_samples_per_second": 539.176, "eval_steps_per_second": 67.44, "step": 305500 }, { "epoch": 70.14, "learning_rate": 3.5067614027045614e-05, "loss": 1.0111, "step": 306000 }, { "epoch": 70.14, "eval_loss": 1.3753360509872437, "eval_runtime": 8.708, "eval_samples_per_second": 538.93, "eval_steps_per_second": 67.409, "step": 306000 }, { "epoch": 70.25, "learning_rate": 3.5124914049965625e-05, "loss": 1.0103, "step": 306500 }, { "epoch": 70.25, "eval_loss": 1.3972842693328857, "eval_runtime": 8.7072, "eval_samples_per_second": 538.978, "eval_steps_per_second": 67.415, "step": 306500 }, { "epoch": 70.36, "learning_rate": 3.5182214072885635e-05, "loss": 1.0183, "step": 307000 }, { "epoch": 70.36, "eval_loss": 1.383960485458374, "eval_runtime": 8.7179, "eval_samples_per_second": 538.319, "eval_steps_per_second": 67.333, "step": 307000 }, { "epoch": 70.48, "learning_rate": 3.5239514095805646e-05, "loss": 1.0119, "step": 307500 }, { "epoch": 70.48, "eval_loss": 1.3955528736114502, "eval_runtime": 8.7096, "eval_samples_per_second": 538.83, "eval_steps_per_second": 67.397, "step": 307500 }, { "epoch": 70.59, "learning_rate": 3.529681411872565e-05, "loss": 1.0119, "step": 308000 }, { "epoch": 70.59, "eval_loss": 1.4009934663772583, "eval_runtime": 8.7102, "eval_samples_per_second": 538.793, "eval_steps_per_second": 67.392, "step": 308000 }, { "epoch": 70.71, "learning_rate": 3.535411414164565e-05, "loss": 1.0177, "step": 308500 }, { "epoch": 70.71, "eval_loss": 1.3747743368148804, "eval_runtime": 8.709, "eval_samples_per_second": 538.867, "eval_steps_per_second": 67.401, "step": 308500 }, { "epoch": 70.82, "learning_rate": 3.5411414164565663e-05, "loss": 1.0176, "step": 309000 }, { "epoch": 70.82, "eval_loss": 1.3907277584075928, "eval_runtime": 8.702, "eval_samples_per_second": 539.299, "eval_steps_per_second": 67.456, "step": 309000 }, { "epoch": 70.94, "learning_rate": 3.5468714187485674e-05, "loss": 1.0249, "step": 309500 }, { "epoch": 70.94, "eval_loss": 1.3820849657058716, "eval_runtime": 8.701, "eval_samples_per_second": 539.366, "eval_steps_per_second": 67.464, "step": 309500 }, { "epoch": 71.05, "learning_rate": 3.5526014210405684e-05, "loss": 1.0087, "step": 310000 }, { "epoch": 71.05, "eval_loss": 1.3861970901489258, "eval_runtime": 8.7076, "eval_samples_per_second": 538.955, "eval_steps_per_second": 67.413, "step": 310000 }, { "epoch": 71.17, "learning_rate": 3.5583314233325695e-05, "loss": 1.0061, "step": 310500 }, { "epoch": 71.17, "eval_loss": 1.3845127820968628, "eval_runtime": 8.7082, "eval_samples_per_second": 538.915, "eval_steps_per_second": 67.407, "step": 310500 }, { "epoch": 71.28, "learning_rate": 3.5640614256245705e-05, "loss": 1.0171, "step": 311000 }, { "epoch": 71.28, "eval_loss": 1.3760930299758911, "eval_runtime": 8.7111, "eval_samples_per_second": 538.735, "eval_steps_per_second": 67.385, "step": 311000 }, { "epoch": 71.4, "learning_rate": 3.569791427916571e-05, "loss": 1.0089, "step": 311500 }, { "epoch": 71.4, "eval_loss": 1.38677978515625, "eval_runtime": 8.7103, "eval_samples_per_second": 538.789, "eval_steps_per_second": 67.392, "step": 311500 }, { "epoch": 71.51, "learning_rate": 3.575521430208572e-05, "loss": 1.0154, "step": 312000 }, { "epoch": 71.51, "eval_loss": 1.3720355033874512, "eval_runtime": 8.7122, "eval_samples_per_second": 538.672, "eval_steps_per_second": 67.377, "step": 312000 }, { "epoch": 71.62, "learning_rate": 3.581251432500573e-05, "loss": 1.013, "step": 312500 }, { "epoch": 71.62, "eval_loss": 1.4033665657043457, "eval_runtime": 8.7019, "eval_samples_per_second": 539.306, "eval_steps_per_second": 67.456, "step": 312500 }, { "epoch": 71.74, "learning_rate": 3.586981434792574e-05, "loss": 1.0234, "step": 313000 }, { "epoch": 71.74, "eval_loss": 1.3992605209350586, "eval_runtime": 8.7092, "eval_samples_per_second": 538.855, "eval_steps_per_second": 67.4, "step": 313000 }, { "epoch": 71.85, "learning_rate": 3.592711437084575e-05, "loss": 1.0225, "step": 313500 }, { "epoch": 71.85, "eval_loss": 1.377051591873169, "eval_runtime": 8.7092, "eval_samples_per_second": 538.858, "eval_steps_per_second": 67.4, "step": 313500 }, { "epoch": 71.97, "learning_rate": 3.598441439376576e-05, "loss": 1.0259, "step": 314000 }, { "epoch": 71.97, "eval_loss": 1.4058525562286377, "eval_runtime": 8.7064, "eval_samples_per_second": 539.029, "eval_steps_per_second": 67.422, "step": 314000 }, { "epoch": 72.08, "learning_rate": 3.6041714416685765e-05, "loss": 1.0124, "step": 314500 }, { "epoch": 72.08, "eval_loss": 1.3905483484268188, "eval_runtime": 8.705, "eval_samples_per_second": 539.114, "eval_steps_per_second": 67.432, "step": 314500 }, { "epoch": 72.2, "learning_rate": 3.6099014439605776e-05, "loss": 1.0109, "step": 315000 }, { "epoch": 72.2, "eval_loss": 1.4039469957351685, "eval_runtime": 8.7169, "eval_samples_per_second": 538.377, "eval_steps_per_second": 67.34, "step": 315000 }, { "epoch": 72.31, "learning_rate": 3.6156314462525786e-05, "loss": 1.0082, "step": 315500 }, { "epoch": 72.31, "eval_loss": 1.3876678943634033, "eval_runtime": 8.7064, "eval_samples_per_second": 539.028, "eval_steps_per_second": 67.422, "step": 315500 }, { "epoch": 72.43, "learning_rate": 3.62136144854458e-05, "loss": 1.0214, "step": 316000 }, { "epoch": 72.43, "eval_loss": 1.3978514671325684, "eval_runtime": 8.7112, "eval_samples_per_second": 538.731, "eval_steps_per_second": 67.384, "step": 316000 }, { "epoch": 72.54, "learning_rate": 3.627091450836581e-05, "loss": 1.02, "step": 316500 }, { "epoch": 72.54, "eval_loss": 1.3908727169036865, "eval_runtime": 8.7057, "eval_samples_per_second": 539.072, "eval_steps_per_second": 67.427, "step": 316500 }, { "epoch": 72.66, "learning_rate": 3.632821453128582e-05, "loss": 1.012, "step": 317000 }, { "epoch": 72.66, "eval_loss": 1.4017356634140015, "eval_runtime": 8.707, "eval_samples_per_second": 538.993, "eval_steps_per_second": 67.417, "step": 317000 }, { "epoch": 72.77, "learning_rate": 3.638551455420582e-05, "loss": 1.0169, "step": 317500 }, { "epoch": 72.77, "eval_loss": 1.3962574005126953, "eval_runtime": 8.7011, "eval_samples_per_second": 539.358, "eval_steps_per_second": 67.463, "step": 317500 }, { "epoch": 72.89, "learning_rate": 3.644281457712583e-05, "loss": 1.0228, "step": 318000 }, { "epoch": 72.89, "eval_loss": 1.384897232055664, "eval_runtime": 8.7123, "eval_samples_per_second": 538.663, "eval_steps_per_second": 67.376, "step": 318000 }, { "epoch": 73.0, "learning_rate": 3.650011460004584e-05, "loss": 1.0198, "step": 318500 }, { "epoch": 73.0, "eval_loss": 1.3963470458984375, "eval_runtime": 8.7085, "eval_samples_per_second": 538.896, "eval_steps_per_second": 67.405, "step": 318500 }, { "epoch": 73.11, "learning_rate": 3.655741462296585e-05, "loss": 1.0065, "step": 319000 }, { "epoch": 73.11, "eval_loss": 1.381343126296997, "eval_runtime": 8.7097, "eval_samples_per_second": 538.827, "eval_steps_per_second": 67.396, "step": 319000 }, { "epoch": 73.23, "learning_rate": 3.6614714645885863e-05, "loss": 1.0065, "step": 319500 }, { "epoch": 73.23, "eval_loss": 1.3852804899215698, "eval_runtime": 8.7104, "eval_samples_per_second": 538.78, "eval_steps_per_second": 67.391, "step": 319500 }, { "epoch": 73.34, "learning_rate": 3.6672014668805874e-05, "loss": 1.0102, "step": 320000 }, { "epoch": 73.34, "eval_loss": 1.407450795173645, "eval_runtime": 8.7118, "eval_samples_per_second": 538.693, "eval_steps_per_second": 67.38, "step": 320000 }, { "epoch": 73.46, "learning_rate": 3.672931469172588e-05, "loss": 1.0131, "step": 320500 }, { "epoch": 73.46, "eval_loss": 1.4081813097000122, "eval_runtime": 8.7045, "eval_samples_per_second": 539.149, "eval_steps_per_second": 67.437, "step": 320500 }, { "epoch": 73.57, "learning_rate": 3.678661471464589e-05, "loss": 1.0142, "step": 321000 }, { "epoch": 73.57, "eval_loss": 1.4033697843551636, "eval_runtime": 8.7137, "eval_samples_per_second": 538.576, "eval_steps_per_second": 67.365, "step": 321000 }, { "epoch": 73.69, "learning_rate": 3.68439147375659e-05, "loss": 1.0221, "step": 321500 }, { "epoch": 73.69, "eval_loss": 1.4035193920135498, "eval_runtime": 8.7072, "eval_samples_per_second": 538.979, "eval_steps_per_second": 67.415, "step": 321500 }, { "epoch": 73.8, "learning_rate": 3.690121476048591e-05, "loss": 1.015, "step": 322000 }, { "epoch": 73.8, "eval_loss": 1.3975181579589844, "eval_runtime": 8.7067, "eval_samples_per_second": 539.011, "eval_steps_per_second": 67.42, "step": 322000 }, { "epoch": 73.92, "learning_rate": 3.695851478340592e-05, "loss": 1.0145, "step": 322500 }, { "epoch": 73.92, "eval_loss": 1.3982576131820679, "eval_runtime": 8.7055, "eval_samples_per_second": 539.084, "eval_steps_per_second": 67.429, "step": 322500 }, { "epoch": 74.03, "learning_rate": 3.701581480632593e-05, "loss": 1.0204, "step": 323000 }, { "epoch": 74.03, "eval_loss": 1.428666353225708, "eval_runtime": 8.7215, "eval_samples_per_second": 538.097, "eval_steps_per_second": 67.305, "step": 323000 }, { "epoch": 74.15, "learning_rate": 3.7073114829245934e-05, "loss": 1.009, "step": 323500 }, { "epoch": 74.15, "eval_loss": 1.3894199132919312, "eval_runtime": 8.7052, "eval_samples_per_second": 539.103, "eval_steps_per_second": 67.431, "step": 323500 }, { "epoch": 74.26, "learning_rate": 3.713041485216594e-05, "loss": 1.0136, "step": 324000 }, { "epoch": 74.26, "eval_loss": 1.405260682106018, "eval_runtime": 8.7099, "eval_samples_per_second": 538.815, "eval_steps_per_second": 67.395, "step": 324000 }, { "epoch": 74.38, "learning_rate": 3.718771487508595e-05, "loss": 1.0139, "step": 324500 }, { "epoch": 74.38, "eval_loss": 1.4089142084121704, "eval_runtime": 8.699, "eval_samples_per_second": 539.489, "eval_steps_per_second": 67.479, "step": 324500 }, { "epoch": 74.49, "learning_rate": 3.724501489800596e-05, "loss": 1.012, "step": 325000 }, { "epoch": 74.49, "eval_loss": 1.4075771570205688, "eval_runtime": 8.7045, "eval_samples_per_second": 539.149, "eval_steps_per_second": 67.437, "step": 325000 }, { "epoch": 74.6, "learning_rate": 3.730231492092597e-05, "loss": 1.009, "step": 325500 }, { "epoch": 74.6, "eval_loss": 1.4152483940124512, "eval_runtime": 8.7096, "eval_samples_per_second": 538.833, "eval_steps_per_second": 67.397, "step": 325500 }, { "epoch": 74.72, "learning_rate": 3.735961494384598e-05, "loss": 1.0164, "step": 326000 }, { "epoch": 74.72, "eval_loss": 1.4037199020385742, "eval_runtime": 8.7098, "eval_samples_per_second": 538.819, "eval_steps_per_second": 67.395, "step": 326000 }, { "epoch": 74.83, "learning_rate": 3.741691496676599e-05, "loss": 1.0132, "step": 326500 }, { "epoch": 74.83, "eval_loss": 1.4003499746322632, "eval_runtime": 8.7065, "eval_samples_per_second": 539.023, "eval_steps_per_second": 67.421, "step": 326500 }, { "epoch": 74.95, "learning_rate": 3.7474214989685994e-05, "loss": 1.0127, "step": 327000 }, { "epoch": 74.95, "eval_loss": 1.4059809446334839, "eval_runtime": 8.7071, "eval_samples_per_second": 538.986, "eval_steps_per_second": 67.416, "step": 327000 }, { "epoch": 75.06, "learning_rate": 3.7531515012606004e-05, "loss": 1.0179, "step": 327500 }, { "epoch": 75.06, "eval_loss": 1.4044592380523682, "eval_runtime": 8.7097, "eval_samples_per_second": 538.822, "eval_steps_per_second": 67.396, "step": 327500 }, { "epoch": 75.18, "learning_rate": 3.7588815035526015e-05, "loss": 1.0012, "step": 328000 }, { "epoch": 75.18, "eval_loss": 1.4166276454925537, "eval_runtime": 8.7111, "eval_samples_per_second": 538.738, "eval_steps_per_second": 67.385, "step": 328000 }, { "epoch": 75.29, "learning_rate": 3.7646115058446025e-05, "loss": 1.0147, "step": 328500 }, { "epoch": 75.29, "eval_loss": 1.4005366563796997, "eval_runtime": 8.7079, "eval_samples_per_second": 538.938, "eval_steps_per_second": 67.41, "step": 328500 }, { "epoch": 75.41, "learning_rate": 3.7703415081366036e-05, "loss": 1.013, "step": 329000 }, { "epoch": 75.41, "eval_loss": 1.391733169555664, "eval_runtime": 8.7121, "eval_samples_per_second": 538.677, "eval_steps_per_second": 67.378, "step": 329000 }, { "epoch": 75.52, "learning_rate": 3.7760715104286046e-05, "loss": 1.005, "step": 329500 }, { "epoch": 75.52, "eval_loss": 1.4001396894454956, "eval_runtime": 8.7047, "eval_samples_per_second": 539.131, "eval_steps_per_second": 67.434, "step": 329500 }, { "epoch": 75.64, "learning_rate": 3.781801512720605e-05, "loss": 1.0193, "step": 330000 }, { "epoch": 75.64, "eval_loss": 1.4167556762695312, "eval_runtime": 8.7108, "eval_samples_per_second": 538.754, "eval_steps_per_second": 67.387, "step": 330000 }, { "epoch": 75.75, "learning_rate": 3.787531515012606e-05, "loss": 1.0201, "step": 330500 }, { "epoch": 75.75, "eval_loss": 1.4034087657928467, "eval_runtime": 8.7014, "eval_samples_per_second": 539.338, "eval_steps_per_second": 67.46, "step": 330500 }, { "epoch": 75.87, "learning_rate": 3.793261517304607e-05, "loss": 1.0156, "step": 331000 }, { "epoch": 75.87, "eval_loss": 1.4023092985153198, "eval_runtime": 8.727, "eval_samples_per_second": 537.756, "eval_steps_per_second": 67.262, "step": 331000 }, { "epoch": 75.98, "learning_rate": 3.798991519596608e-05, "loss": 1.0177, "step": 331500 }, { "epoch": 75.98, "eval_loss": 1.4122792482376099, "eval_runtime": 8.7074, "eval_samples_per_second": 538.967, "eval_steps_per_second": 67.414, "step": 331500 }, { "epoch": 76.09, "learning_rate": 3.804721521888609e-05, "loss": 1.0113, "step": 332000 }, { "epoch": 76.09, "eval_loss": 1.4002151489257812, "eval_runtime": 8.7095, "eval_samples_per_second": 538.834, "eval_steps_per_second": 67.397, "step": 332000 }, { "epoch": 76.21, "learning_rate": 3.81045152418061e-05, "loss": 1.0085, "step": 332500 }, { "epoch": 76.21, "eval_loss": 1.4258400201797485, "eval_runtime": 8.7063, "eval_samples_per_second": 539.032, "eval_steps_per_second": 67.422, "step": 332500 }, { "epoch": 76.32, "learning_rate": 3.8161815264726106e-05, "loss": 1.0098, "step": 333000 }, { "epoch": 76.32, "eval_loss": 1.4122438430786133, "eval_runtime": 8.7068, "eval_samples_per_second": 539.005, "eval_steps_per_second": 67.419, "step": 333000 }, { "epoch": 76.44, "learning_rate": 3.8219115287646117e-05, "loss": 1.0087, "step": 333500 }, { "epoch": 76.44, "eval_loss": 1.4085214138031006, "eval_runtime": 8.7061, "eval_samples_per_second": 539.047, "eval_steps_per_second": 67.424, "step": 333500 }, { "epoch": 76.55, "learning_rate": 3.827641531056613e-05, "loss": 1.0061, "step": 334000 }, { "epoch": 76.55, "eval_loss": 1.402705430984497, "eval_runtime": 8.7088, "eval_samples_per_second": 538.883, "eval_steps_per_second": 67.403, "step": 334000 }, { "epoch": 76.67, "learning_rate": 3.833371533348614e-05, "loss": 1.0089, "step": 334500 }, { "epoch": 76.67, "eval_loss": 1.4385571479797363, "eval_runtime": 8.7107, "eval_samples_per_second": 538.76, "eval_steps_per_second": 67.388, "step": 334500 }, { "epoch": 76.78, "learning_rate": 3.839101535640615e-05, "loss": 1.0134, "step": 335000 }, { "epoch": 76.78, "eval_loss": 1.4169107675552368, "eval_runtime": 8.706, "eval_samples_per_second": 539.055, "eval_steps_per_second": 67.425, "step": 335000 }, { "epoch": 76.9, "learning_rate": 3.844831537932616e-05, "loss": 1.012, "step": 335500 }, { "epoch": 76.9, "eval_loss": 1.40097975730896, "eval_runtime": 8.7063, "eval_samples_per_second": 539.036, "eval_steps_per_second": 67.423, "step": 335500 }, { "epoch": 77.01, "learning_rate": 3.850561540224616e-05, "loss": 1.0118, "step": 336000 }, { "epoch": 77.01, "eval_loss": 1.4155247211456299, "eval_runtime": 8.7045, "eval_samples_per_second": 539.148, "eval_steps_per_second": 67.437, "step": 336000 }, { "epoch": 77.13, "learning_rate": 3.856291542516617e-05, "loss": 1.0032, "step": 336500 }, { "epoch": 77.13, "eval_loss": 1.4304497241973877, "eval_runtime": 8.7098, "eval_samples_per_second": 538.817, "eval_steps_per_second": 67.395, "step": 336500 }, { "epoch": 77.24, "learning_rate": 3.862021544808618e-05, "loss": 1.0054, "step": 337000 }, { "epoch": 77.24, "eval_loss": 1.4152199029922485, "eval_runtime": 8.7905, "eval_samples_per_second": 533.874, "eval_steps_per_second": 66.777, "step": 337000 }, { "epoch": 77.35, "learning_rate": 3.8677515471006194e-05, "loss": 1.0045, "step": 337500 }, { "epoch": 77.35, "eval_loss": 1.4189225435256958, "eval_runtime": 8.7032, "eval_samples_per_second": 539.224, "eval_steps_per_second": 67.446, "step": 337500 }, { "epoch": 77.47, "learning_rate": 3.8734815493926204e-05, "loss": 1.0111, "step": 338000 }, { "epoch": 77.47, "eval_loss": 1.4227728843688965, "eval_runtime": 8.7121, "eval_samples_per_second": 538.678, "eval_steps_per_second": 67.378, "step": 338000 }, { "epoch": 77.58, "learning_rate": 3.8792115516846215e-05, "loss": 1.0126, "step": 338500 }, { "epoch": 77.58, "eval_loss": 1.4095380306243896, "eval_runtime": 8.7133, "eval_samples_per_second": 538.601, "eval_steps_per_second": 67.368, "step": 338500 }, { "epoch": 77.7, "learning_rate": 3.884941553976622e-05, "loss": 1.019, "step": 339000 }, { "epoch": 77.7, "eval_loss": 1.4165436029434204, "eval_runtime": 8.7183, "eval_samples_per_second": 538.293, "eval_steps_per_second": 67.33, "step": 339000 }, { "epoch": 77.81, "learning_rate": 3.890671556268622e-05, "loss": 1.013, "step": 339500 }, { "epoch": 77.81, "eval_loss": 1.429601788520813, "eval_runtime": 8.7136, "eval_samples_per_second": 538.582, "eval_steps_per_second": 67.366, "step": 339500 }, { "epoch": 77.93, "learning_rate": 3.896401558560623e-05, "loss": 1.0176, "step": 340000 }, { "epoch": 77.93, "eval_loss": 1.4349993467330933, "eval_runtime": 8.7023, "eval_samples_per_second": 539.283, "eval_steps_per_second": 67.453, "step": 340000 }, { "epoch": 78.04, "learning_rate": 3.902131560852624e-05, "loss": 1.0186, "step": 340500 }, { "epoch": 78.04, "eval_loss": 1.4340243339538574, "eval_runtime": 8.7082, "eval_samples_per_second": 538.917, "eval_steps_per_second": 67.408, "step": 340500 }, { "epoch": 78.16, "learning_rate": 3.9078615631446254e-05, "loss": 1.008, "step": 341000 }, { "epoch": 78.16, "eval_loss": 1.42844820022583, "eval_runtime": 8.71, "eval_samples_per_second": 538.809, "eval_steps_per_second": 67.394, "step": 341000 }, { "epoch": 78.27, "learning_rate": 3.9135915654366264e-05, "loss": 1.0076, "step": 341500 }, { "epoch": 78.27, "eval_loss": 1.4078553915023804, "eval_runtime": 8.7079, "eval_samples_per_second": 538.937, "eval_steps_per_second": 67.41, "step": 341500 }, { "epoch": 78.39, "learning_rate": 3.9193215677286275e-05, "loss": 1.0035, "step": 342000 }, { "epoch": 78.39, "eval_loss": 1.4130254983901978, "eval_runtime": 8.7627, "eval_samples_per_second": 535.564, "eval_steps_per_second": 66.988, "step": 342000 }, { "epoch": 78.5, "learning_rate": 3.925051570020628e-05, "loss": 1.0023, "step": 342500 }, { "epoch": 78.5, "eval_loss": 1.423601746559143, "eval_runtime": 8.8348, "eval_samples_per_second": 531.198, "eval_steps_per_second": 66.442, "step": 342500 }, { "epoch": 78.62, "learning_rate": 3.930781572312629e-05, "loss": 1.006, "step": 343000 }, { "epoch": 78.62, "eval_loss": 1.4238653182983398, "eval_runtime": 8.754, "eval_samples_per_second": 536.099, "eval_steps_per_second": 67.055, "step": 343000 }, { "epoch": 78.73, "learning_rate": 3.93651157460463e-05, "loss": 1.0189, "step": 343500 }, { "epoch": 78.73, "eval_loss": 1.4136544466018677, "eval_runtime": 8.6658, "eval_samples_per_second": 541.555, "eval_steps_per_second": 67.738, "step": 343500 }, { "epoch": 78.84, "learning_rate": 3.942241576896631e-05, "loss": 1.0152, "step": 344000 }, { "epoch": 78.84, "eval_loss": 1.4103562831878662, "eval_runtime": 8.6679, "eval_samples_per_second": 541.42, "eval_steps_per_second": 67.721, "step": 344000 }, { "epoch": 78.96, "learning_rate": 3.947971579188632e-05, "loss": 1.0177, "step": 344500 }, { "epoch": 78.96, "eval_loss": 1.4034671783447266, "eval_runtime": 8.6657, "eval_samples_per_second": 541.56, "eval_steps_per_second": 67.738, "step": 344500 }, { "epoch": 79.07, "learning_rate": 3.953701581480633e-05, "loss": 1.0085, "step": 345000 }, { "epoch": 79.07, "eval_loss": 1.4268403053283691, "eval_runtime": 8.6724, "eval_samples_per_second": 541.14, "eval_steps_per_second": 67.686, "step": 345000 }, { "epoch": 79.19, "learning_rate": 3.9594315837726334e-05, "loss": 1.0109, "step": 345500 }, { "epoch": 79.19, "eval_loss": 1.438470482826233, "eval_runtime": 8.6699, "eval_samples_per_second": 541.296, "eval_steps_per_second": 67.705, "step": 345500 }, { "epoch": 79.3, "learning_rate": 3.9651615860646345e-05, "loss": 1.0044, "step": 346000 }, { "epoch": 79.3, "eval_loss": 1.4240678548812866, "eval_runtime": 8.6746, "eval_samples_per_second": 541.004, "eval_steps_per_second": 67.669, "step": 346000 }, { "epoch": 79.42, "learning_rate": 3.9708915883566355e-05, "loss": 1.0075, "step": 346500 }, { "epoch": 79.42, "eval_loss": 1.437299370765686, "eval_runtime": 8.6708, "eval_samples_per_second": 541.242, "eval_steps_per_second": 67.698, "step": 346500 }, { "epoch": 79.53, "learning_rate": 3.9766215906486366e-05, "loss": 1.0079, "step": 347000 }, { "epoch": 79.53, "eval_loss": 1.4246315956115723, "eval_runtime": 8.6705, "eval_samples_per_second": 541.258, "eval_steps_per_second": 67.701, "step": 347000 }, { "epoch": 79.65, "learning_rate": 3.9823515929406376e-05, "loss": 1.0095, "step": 347500 }, { "epoch": 79.65, "eval_loss": 1.4245221614837646, "eval_runtime": 8.6675, "eval_samples_per_second": 541.446, "eval_steps_per_second": 67.724, "step": 347500 }, { "epoch": 79.76, "learning_rate": 3.988081595232639e-05, "loss": 1.0186, "step": 348000 }, { "epoch": 79.76, "eval_loss": 1.4426071643829346, "eval_runtime": 8.674, "eval_samples_per_second": 541.041, "eval_steps_per_second": 67.673, "step": 348000 }, { "epoch": 79.88, "learning_rate": 3.993811597524639e-05, "loss": 1.0187, "step": 348500 }, { "epoch": 79.88, "eval_loss": 1.4009429216384888, "eval_runtime": 8.667, "eval_samples_per_second": 541.477, "eval_steps_per_second": 67.728, "step": 348500 }, { "epoch": 79.99, "learning_rate": 3.99954159981664e-05, "loss": 1.0152, "step": 349000 }, { "epoch": 79.99, "eval_loss": 1.4294918775558472, "eval_runtime": 8.6723, "eval_samples_per_second": 541.147, "eval_steps_per_second": 67.687, "step": 349000 }, { "epoch": 80.11, "learning_rate": 4.005271602108641e-05, "loss": 1.0019, "step": 349500 }, { "epoch": 80.11, "eval_loss": 1.4263681173324585, "eval_runtime": 8.7929, "eval_samples_per_second": 533.728, "eval_steps_per_second": 66.759, "step": 349500 }, { "epoch": 80.22, "learning_rate": 4.011001604400642e-05, "loss": 1.001, "step": 350000 }, { "epoch": 80.22, "eval_loss": 1.4229109287261963, "eval_runtime": 8.7454, "eval_samples_per_second": 536.626, "eval_steps_per_second": 67.121, "step": 350000 }, { "epoch": 80.33, "learning_rate": 4.016731606692643e-05, "loss": 1.0072, "step": 350500 }, { "epoch": 80.33, "eval_loss": 1.4341545104980469, "eval_runtime": 8.7574, "eval_samples_per_second": 535.889, "eval_steps_per_second": 67.029, "step": 350500 }, { "epoch": 80.45, "learning_rate": 4.022461608984644e-05, "loss": 1.0071, "step": 351000 }, { "epoch": 80.45, "eval_loss": 1.432602047920227, "eval_runtime": 8.7546, "eval_samples_per_second": 536.06, "eval_steps_per_second": 67.05, "step": 351000 }, { "epoch": 80.56, "learning_rate": 4.028191611276645e-05, "loss": 1.0177, "step": 351500 }, { "epoch": 80.56, "eval_loss": 1.4319201707839966, "eval_runtime": 8.7481, "eval_samples_per_second": 536.459, "eval_steps_per_second": 67.1, "step": 351500 }, { "epoch": 80.68, "learning_rate": 4.033921613568646e-05, "loss": 1.0075, "step": 352000 }, { "epoch": 80.68, "eval_loss": 1.4318691492080688, "eval_runtime": 8.6785, "eval_samples_per_second": 540.764, "eval_steps_per_second": 67.639, "step": 352000 }, { "epoch": 80.79, "learning_rate": 4.039651615860647e-05, "loss": 1.0154, "step": 352500 }, { "epoch": 80.79, "eval_loss": 1.421528935432434, "eval_runtime": 8.6726, "eval_samples_per_second": 541.127, "eval_steps_per_second": 67.684, "step": 352500 }, { "epoch": 80.91, "learning_rate": 4.045381618152648e-05, "loss": 1.0138, "step": 353000 }, { "epoch": 80.91, "eval_loss": 1.4277173280715942, "eval_runtime": 8.6726, "eval_samples_per_second": 541.126, "eval_steps_per_second": 67.684, "step": 353000 }, { "epoch": 81.02, "learning_rate": 4.051111620444649e-05, "loss": 1.0151, "step": 353500 }, { "epoch": 81.02, "eval_loss": 1.422887921333313, "eval_runtime": 8.672, "eval_samples_per_second": 541.166, "eval_steps_per_second": 67.689, "step": 353500 }, { "epoch": 81.14, "learning_rate": 4.056841622736649e-05, "loss": 1.0011, "step": 354000 }, { "epoch": 81.14, "eval_loss": 1.4375884532928467, "eval_runtime": 8.6701, "eval_samples_per_second": 541.287, "eval_steps_per_second": 67.704, "step": 354000 }, { "epoch": 81.25, "learning_rate": 4.06257162502865e-05, "loss": 1.002, "step": 354500 }, { "epoch": 81.25, "eval_loss": 1.43974769115448, "eval_runtime": 8.6717, "eval_samples_per_second": 541.184, "eval_steps_per_second": 67.691, "step": 354500 }, { "epoch": 81.37, "learning_rate": 4.068301627320651e-05, "loss": 1.0025, "step": 355000 }, { "epoch": 81.37, "eval_loss": 1.4302562475204468, "eval_runtime": 8.6736, "eval_samples_per_second": 541.067, "eval_steps_per_second": 67.677, "step": 355000 }, { "epoch": 81.48, "learning_rate": 4.074031629612652e-05, "loss": 1.0107, "step": 355500 }, { "epoch": 81.48, "eval_loss": 1.4366774559020996, "eval_runtime": 8.6744, "eval_samples_per_second": 541.015, "eval_steps_per_second": 67.67, "step": 355500 }, { "epoch": 81.6, "learning_rate": 4.079761631904653e-05, "loss": 1.0064, "step": 356000 }, { "epoch": 81.6, "eval_loss": 1.4479013681411743, "eval_runtime": 8.6721, "eval_samples_per_second": 541.16, "eval_steps_per_second": 67.688, "step": 356000 }, { "epoch": 81.71, "learning_rate": 4.085491634196654e-05, "loss": 1.0118, "step": 356500 }, { "epoch": 81.71, "eval_loss": 1.4059827327728271, "eval_runtime": 8.669, "eval_samples_per_second": 541.355, "eval_steps_per_second": 67.713, "step": 356500 }, { "epoch": 81.82, "learning_rate": 4.091221636488655e-05, "loss": 1.0123, "step": 357000 }, { "epoch": 81.82, "eval_loss": 1.4489471912384033, "eval_runtime": 8.6741, "eval_samples_per_second": 541.034, "eval_steps_per_second": 67.672, "step": 357000 }, { "epoch": 81.94, "learning_rate": 4.096951638780656e-05, "loss": 1.0123, "step": 357500 }, { "epoch": 81.94, "eval_loss": 1.4318376779556274, "eval_runtime": 8.6708, "eval_samples_per_second": 541.242, "eval_steps_per_second": 67.699, "step": 357500 }, { "epoch": 82.05, "learning_rate": 4.102681641072656e-05, "loss": 1.0067, "step": 358000 }, { "epoch": 82.05, "eval_loss": 1.4338880777359009, "eval_runtime": 8.6731, "eval_samples_per_second": 541.096, "eval_steps_per_second": 67.68, "step": 358000 }, { "epoch": 82.17, "learning_rate": 4.108411643364657e-05, "loss": 1.0086, "step": 358500 }, { "epoch": 82.17, "eval_loss": 1.4194098711013794, "eval_runtime": 8.6672, "eval_samples_per_second": 541.467, "eval_steps_per_second": 67.727, "step": 358500 }, { "epoch": 82.28, "learning_rate": 4.1141416456566584e-05, "loss": 1.0018, "step": 359000 }, { "epoch": 82.28, "eval_loss": 1.4362928867340088, "eval_runtime": 8.7527, "eval_samples_per_second": 536.179, "eval_steps_per_second": 67.065, "step": 359000 }, { "epoch": 82.4, "learning_rate": 4.1198716479486594e-05, "loss": 1.0154, "step": 359500 }, { "epoch": 82.4, "eval_loss": 1.4303096532821655, "eval_runtime": 8.7469, "eval_samples_per_second": 536.534, "eval_steps_per_second": 67.11, "step": 359500 }, { "epoch": 82.51, "learning_rate": 4.1256016502406605e-05, "loss": 1.0081, "step": 360000 }, { "epoch": 82.51, "eval_loss": 1.4548158645629883, "eval_runtime": 8.7526, "eval_samples_per_second": 536.186, "eval_steps_per_second": 67.066, "step": 360000 }, { "epoch": 82.63, "learning_rate": 4.1313316525326615e-05, "loss": 1.0089, "step": 360500 }, { "epoch": 82.63, "eval_loss": 1.4506851434707642, "eval_runtime": 8.667, "eval_samples_per_second": 541.481, "eval_steps_per_second": 67.728, "step": 360500 }, { "epoch": 82.74, "learning_rate": 4.137061654824662e-05, "loss": 1.0148, "step": 361000 }, { "epoch": 82.74, "eval_loss": 1.4269438982009888, "eval_runtime": 8.6731, "eval_samples_per_second": 541.096, "eval_steps_per_second": 67.68, "step": 361000 }, { "epoch": 82.86, "learning_rate": 4.142791657116663e-05, "loss": 1.0136, "step": 361500 }, { "epoch": 82.86, "eval_loss": 1.4151461124420166, "eval_runtime": 8.6695, "eval_samples_per_second": 541.325, "eval_steps_per_second": 67.709, "step": 361500 }, { "epoch": 82.97, "learning_rate": 4.148521659408664e-05, "loss": 1.011, "step": 362000 }, { "epoch": 82.97, "eval_loss": 1.4382524490356445, "eval_runtime": 8.6715, "eval_samples_per_second": 541.198, "eval_steps_per_second": 67.693, "step": 362000 }, { "epoch": 83.09, "learning_rate": 4.154251661700665e-05, "loss": 1.0084, "step": 362500 }, { "epoch": 83.09, "eval_loss": 1.4467802047729492, "eval_runtime": 8.6686, "eval_samples_per_second": 541.381, "eval_steps_per_second": 67.716, "step": 362500 }, { "epoch": 83.2, "learning_rate": 4.159981663992666e-05, "loss": 0.9938, "step": 363000 }, { "epoch": 83.2, "eval_loss": 1.4338879585266113, "eval_runtime": 8.6729, "eval_samples_per_second": 541.11, "eval_steps_per_second": 67.682, "step": 363000 }, { "epoch": 83.31, "learning_rate": 4.1657116662846665e-05, "loss": 1.0018, "step": 363500 }, { "epoch": 83.31, "eval_loss": 1.4413965940475464, "eval_runtime": 8.6679, "eval_samples_per_second": 541.426, "eval_steps_per_second": 67.722, "step": 363500 }, { "epoch": 83.43, "learning_rate": 4.1714416685766675e-05, "loss": 1.0064, "step": 364000 }, { "epoch": 83.43, "eval_loss": 1.4399508237838745, "eval_runtime": 8.6733, "eval_samples_per_second": 541.089, "eval_steps_per_second": 67.679, "step": 364000 }, { "epoch": 83.54, "learning_rate": 4.1771716708686686e-05, "loss": 1.0056, "step": 364500 }, { "epoch": 83.54, "eval_loss": 1.4599405527114868, "eval_runtime": 8.6672, "eval_samples_per_second": 541.469, "eval_steps_per_second": 67.727, "step": 364500 }, { "epoch": 83.66, "learning_rate": 4.1829016731606696e-05, "loss": 1.0056, "step": 365000 }, { "epoch": 83.66, "eval_loss": 1.434768795967102, "eval_runtime": 8.6689, "eval_samples_per_second": 541.363, "eval_steps_per_second": 67.714, "step": 365000 }, { "epoch": 83.77, "learning_rate": 4.188631675452671e-05, "loss": 1.0132, "step": 365500 }, { "epoch": 83.77, "eval_loss": 1.4516572952270508, "eval_runtime": 8.6669, "eval_samples_per_second": 541.485, "eval_steps_per_second": 67.729, "step": 365500 }, { "epoch": 83.89, "learning_rate": 4.194361677744672e-05, "loss": 1.0141, "step": 366000 }, { "epoch": 83.89, "eval_loss": 1.4401417970657349, "eval_runtime": 8.6707, "eval_samples_per_second": 541.247, "eval_steps_per_second": 67.699, "step": 366000 }, { "epoch": 84.0, "learning_rate": 4.200091680036672e-05, "loss": 1.011, "step": 366500 }, { "epoch": 84.0, "eval_loss": 1.4415466785430908, "eval_runtime": 8.666, "eval_samples_per_second": 541.542, "eval_steps_per_second": 67.736, "step": 366500 }, { "epoch": 84.12, "learning_rate": 4.205821682328673e-05, "loss": 0.9961, "step": 367000 }, { "epoch": 84.12, "eval_loss": 1.4351595640182495, "eval_runtime": 8.6756, "eval_samples_per_second": 540.94, "eval_steps_per_second": 67.661, "step": 367000 }, { "epoch": 84.23, "learning_rate": 4.211551684620674e-05, "loss": 0.992, "step": 367500 }, { "epoch": 84.23, "eval_loss": 1.457275390625, "eval_runtime": 8.6709, "eval_samples_per_second": 541.234, "eval_steps_per_second": 67.698, "step": 367500 }, { "epoch": 84.35, "learning_rate": 4.217281686912675e-05, "loss": 1.0024, "step": 368000 }, { "epoch": 84.35, "eval_loss": 1.4416412115097046, "eval_runtime": 8.6748, "eval_samples_per_second": 540.992, "eval_steps_per_second": 67.667, "step": 368000 }, { "epoch": 84.46, "learning_rate": 4.223011689204676e-05, "loss": 1.0069, "step": 368500 }, { "epoch": 84.46, "eval_loss": 1.4691362380981445, "eval_runtime": 8.6722, "eval_samples_per_second": 541.155, "eval_steps_per_second": 67.688, "step": 368500 }, { "epoch": 84.57, "learning_rate": 4.2287416914966767e-05, "loss": 1.0048, "step": 369000 }, { "epoch": 84.57, "eval_loss": 1.4319953918457031, "eval_runtime": 8.6727, "eval_samples_per_second": 541.123, "eval_steps_per_second": 67.684, "step": 369000 }, { "epoch": 84.69, "learning_rate": 4.234471693788678e-05, "loss": 1.009, "step": 369500 }, { "epoch": 84.69, "eval_loss": 1.4571232795715332, "eval_runtime": 8.6704, "eval_samples_per_second": 541.267, "eval_steps_per_second": 67.702, "step": 369500 }, { "epoch": 84.8, "learning_rate": 4.240201696080678e-05, "loss": 1.0105, "step": 370000 }, { "epoch": 84.8, "eval_loss": 1.4496978521347046, "eval_runtime": 8.6755, "eval_samples_per_second": 540.947, "eval_steps_per_second": 67.662, "step": 370000 }, { "epoch": 84.92, "learning_rate": 4.245931698372679e-05, "loss": 1.0116, "step": 370500 }, { "epoch": 84.92, "eval_loss": 1.4418834447860718, "eval_runtime": 8.6659, "eval_samples_per_second": 541.549, "eval_steps_per_second": 67.737, "step": 370500 }, { "epoch": 85.03, "learning_rate": 4.25166170066468e-05, "loss": 1.0061, "step": 371000 }, { "epoch": 85.03, "eval_loss": 1.4491033554077148, "eval_runtime": 8.6719, "eval_samples_per_second": 541.173, "eval_steps_per_second": 67.69, "step": 371000 }, { "epoch": 85.15, "learning_rate": 4.257391702956681e-05, "loss": 1.0039, "step": 371500 }, { "epoch": 85.15, "eval_loss": 1.468346357345581, "eval_runtime": 8.6714, "eval_samples_per_second": 541.203, "eval_steps_per_second": 67.694, "step": 371500 }, { "epoch": 85.26, "learning_rate": 4.263121705248682e-05, "loss": 1.0056, "step": 372000 }, { "epoch": 85.26, "eval_loss": 1.4455246925354004, "eval_runtime": 8.6731, "eval_samples_per_second": 541.1, "eval_steps_per_second": 67.681, "step": 372000 }, { "epoch": 85.38, "learning_rate": 4.268851707540683e-05, "loss": 1.0037, "step": 372500 }, { "epoch": 85.38, "eval_loss": 1.4363850355148315, "eval_runtime": 8.7543, "eval_samples_per_second": 536.078, "eval_steps_per_second": 67.053, "step": 372500 }, { "epoch": 85.49, "learning_rate": 4.274581709832684e-05, "loss": 1.0025, "step": 373000 }, { "epoch": 85.49, "eval_loss": 1.440795660018921, "eval_runtime": 8.742, "eval_samples_per_second": 536.831, "eval_steps_per_second": 67.147, "step": 373000 }, { "epoch": 85.61, "learning_rate": 4.280311712124685e-05, "loss": 1.0097, "step": 373500 }, { "epoch": 85.61, "eval_loss": 1.4386217594146729, "eval_runtime": 8.6992, "eval_samples_per_second": 539.476, "eval_steps_per_second": 67.478, "step": 373500 }, { "epoch": 85.72, "learning_rate": 4.286041714416686e-05, "loss": 1.0074, "step": 374000 }, { "epoch": 85.72, "eval_loss": 1.4529812335968018, "eval_runtime": 8.7156, "eval_samples_per_second": 538.458, "eval_steps_per_second": 67.35, "step": 374000 }, { "epoch": 85.84, "learning_rate": 4.291771716708687e-05, "loss": 1.0148, "step": 374500 }, { "epoch": 85.84, "eval_loss": 1.4663665294647217, "eval_runtime": 8.7116, "eval_samples_per_second": 538.706, "eval_steps_per_second": 67.381, "step": 374500 }, { "epoch": 85.95, "learning_rate": 4.297501719000688e-05, "loss": 1.008, "step": 375000 }, { "epoch": 85.95, "eval_loss": 1.4560521841049194, "eval_runtime": 8.7124, "eval_samples_per_second": 538.656, "eval_steps_per_second": 67.375, "step": 375000 }, { "epoch": 86.06, "learning_rate": 4.303231721292689e-05, "loss": 1.0083, "step": 375500 }, { "epoch": 86.06, "eval_loss": 1.4623100757598877, "eval_runtime": 8.7067, "eval_samples_per_second": 539.008, "eval_steps_per_second": 67.419, "step": 375500 }, { "epoch": 86.18, "learning_rate": 4.308961723584689e-05, "loss": 0.997, "step": 376000 }, { "epoch": 86.18, "eval_loss": 1.478158950805664, "eval_runtime": 8.7047, "eval_samples_per_second": 539.133, "eval_steps_per_second": 67.435, "step": 376000 }, { "epoch": 86.29, "learning_rate": 4.3146917258766904e-05, "loss": 1.0018, "step": 376500 }, { "epoch": 86.29, "eval_loss": 1.4449756145477295, "eval_runtime": 8.7058, "eval_samples_per_second": 539.068, "eval_steps_per_second": 67.427, "step": 376500 }, { "epoch": 86.41, "learning_rate": 4.3204217281686914e-05, "loss": 1.0008, "step": 377000 }, { "epoch": 86.41, "eval_loss": 1.442144513130188, "eval_runtime": 8.7053, "eval_samples_per_second": 539.096, "eval_steps_per_second": 67.43, "step": 377000 }, { "epoch": 86.52, "learning_rate": 4.3261517304606925e-05, "loss": 0.9993, "step": 377500 }, { "epoch": 86.52, "eval_loss": 1.4451080560684204, "eval_runtime": 8.7054, "eval_samples_per_second": 539.088, "eval_steps_per_second": 67.429, "step": 377500 }, { "epoch": 86.64, "learning_rate": 4.3318817327526935e-05, "loss": 1.0085, "step": 378000 }, { "epoch": 86.64, "eval_loss": 1.434246301651001, "eval_runtime": 8.7115, "eval_samples_per_second": 538.711, "eval_steps_per_second": 67.382, "step": 378000 }, { "epoch": 86.75, "learning_rate": 4.3376117350446946e-05, "loss": 1.0115, "step": 378500 }, { "epoch": 86.75, "eval_loss": 1.4564591646194458, "eval_runtime": 8.7071, "eval_samples_per_second": 538.983, "eval_steps_per_second": 67.416, "step": 378500 }, { "epoch": 86.87, "learning_rate": 4.343341737336695e-05, "loss": 1.004, "step": 379000 }, { "epoch": 86.87, "eval_loss": 1.4626872539520264, "eval_runtime": 8.7135, "eval_samples_per_second": 538.589, "eval_steps_per_second": 67.367, "step": 379000 }, { "epoch": 86.98, "learning_rate": 4.349071739628696e-05, "loss": 1.0107, "step": 379500 }, { "epoch": 86.98, "eval_loss": 1.449440836906433, "eval_runtime": 8.703, "eval_samples_per_second": 539.239, "eval_steps_per_second": 67.448, "step": 379500 }, { "epoch": 87.1, "learning_rate": 4.354801741920697e-05, "loss": 1.0021, "step": 380000 }, { "epoch": 87.1, "eval_loss": 1.4712718725204468, "eval_runtime": 8.7104, "eval_samples_per_second": 538.779, "eval_steps_per_second": 67.39, "step": 380000 }, { "epoch": 87.21, "learning_rate": 4.360531744212698e-05, "loss": 1.0021, "step": 380500 }, { "epoch": 87.21, "eval_loss": 1.4592411518096924, "eval_runtime": 8.7055, "eval_samples_per_second": 539.084, "eval_steps_per_second": 67.429, "step": 380500 }, { "epoch": 87.33, "learning_rate": 4.366261746504699e-05, "loss": 1.0012, "step": 381000 }, { "epoch": 87.33, "eval_loss": 1.4660837650299072, "eval_runtime": 8.7086, "eval_samples_per_second": 538.893, "eval_steps_per_second": 67.405, "step": 381000 }, { "epoch": 87.44, "learning_rate": 4.3719917487967e-05, "loss": 1.0036, "step": 381500 }, { "epoch": 87.44, "eval_loss": 1.4377235174179077, "eval_runtime": 8.6976, "eval_samples_per_second": 539.576, "eval_steps_per_second": 67.49, "step": 381500 }, { "epoch": 87.55, "learning_rate": 4.3777217510887005e-05, "loss": 1.0144, "step": 382000 }, { "epoch": 87.55, "eval_loss": 1.452379822731018, "eval_runtime": 8.7049, "eval_samples_per_second": 539.123, "eval_steps_per_second": 67.433, "step": 382000 }, { "epoch": 87.67, "learning_rate": 4.3834517533807016e-05, "loss": 1.0095, "step": 382500 }, { "epoch": 87.67, "eval_loss": 1.4542733430862427, "eval_runtime": 8.7065, "eval_samples_per_second": 539.02, "eval_steps_per_second": 67.421, "step": 382500 }, { "epoch": 87.78, "learning_rate": 4.3891817556727026e-05, "loss": 1.0102, "step": 383000 }, { "epoch": 87.78, "eval_loss": 1.4689573049545288, "eval_runtime": 8.7072, "eval_samples_per_second": 538.982, "eval_steps_per_second": 67.416, "step": 383000 }, { "epoch": 87.9, "learning_rate": 4.394911757964704e-05, "loss": 1.0078, "step": 383500 }, { "epoch": 87.9, "eval_loss": 1.4515020847320557, "eval_runtime": 8.702, "eval_samples_per_second": 539.303, "eval_steps_per_second": 67.456, "step": 383500 }, { "epoch": 88.01, "learning_rate": 4.400641760256704e-05, "loss": 1.012, "step": 384000 }, { "epoch": 88.01, "eval_loss": 1.4683079719543457, "eval_runtime": 8.6951, "eval_samples_per_second": 539.728, "eval_steps_per_second": 67.509, "step": 384000 }, { "epoch": 88.13, "learning_rate": 4.406371762548705e-05, "loss": 0.9982, "step": 384500 }, { "epoch": 88.13, "eval_loss": 1.477966547012329, "eval_runtime": 8.7052, "eval_samples_per_second": 539.102, "eval_steps_per_second": 67.431, "step": 384500 }, { "epoch": 88.24, "learning_rate": 4.412101764840706e-05, "loss": 0.9995, "step": 385000 }, { "epoch": 88.24, "eval_loss": 1.4628689289093018, "eval_runtime": 8.7125, "eval_samples_per_second": 538.649, "eval_steps_per_second": 67.374, "step": 385000 }, { "epoch": 88.36, "learning_rate": 4.4178317671327065e-05, "loss": 1.0021, "step": 385500 }, { "epoch": 88.36, "eval_loss": 1.4765079021453857, "eval_runtime": 8.6993, "eval_samples_per_second": 539.466, "eval_steps_per_second": 67.476, "step": 385500 }, { "epoch": 88.47, "learning_rate": 4.4235617694247076e-05, "loss": 1.0007, "step": 386000 }, { "epoch": 88.47, "eval_loss": 1.4606109857559204, "eval_runtime": 8.7056, "eval_samples_per_second": 539.077, "eval_steps_per_second": 67.428, "step": 386000 }, { "epoch": 88.59, "learning_rate": 4.4292917717167086e-05, "loss": 1.0091, "step": 386500 }, { "epoch": 88.59, "eval_loss": 1.457536220550537, "eval_runtime": 8.7018, "eval_samples_per_second": 539.312, "eval_steps_per_second": 67.457, "step": 386500 }, { "epoch": 88.7, "learning_rate": 4.43502177400871e-05, "loss": 1.0079, "step": 387000 }, { "epoch": 88.7, "eval_loss": 1.4665502309799194, "eval_runtime": 8.7042, "eval_samples_per_second": 539.167, "eval_steps_per_second": 67.439, "step": 387000 }, { "epoch": 88.81, "learning_rate": 4.440751776300711e-05, "loss": 1.0091, "step": 387500 }, { "epoch": 88.81, "eval_loss": 1.482861876487732, "eval_runtime": 8.7038, "eval_samples_per_second": 539.189, "eval_steps_per_second": 67.442, "step": 387500 }, { "epoch": 88.93, "learning_rate": 4.446481778592712e-05, "loss": 1.0071, "step": 388000 }, { "epoch": 88.93, "eval_loss": 1.4672224521636963, "eval_runtime": 8.7106, "eval_samples_per_second": 538.772, "eval_steps_per_second": 67.39, "step": 388000 }, { "epoch": 89.04, "learning_rate": 4.452211780884712e-05, "loss": 1.0088, "step": 388500 }, { "epoch": 89.04, "eval_loss": 1.4672313928604126, "eval_runtime": 8.7032, "eval_samples_per_second": 539.224, "eval_steps_per_second": 67.446, "step": 388500 }, { "epoch": 89.16, "learning_rate": 4.457941783176713e-05, "loss": 0.9934, "step": 389000 }, { "epoch": 89.16, "eval_loss": 1.4708116054534912, "eval_runtime": 8.7108, "eval_samples_per_second": 538.759, "eval_steps_per_second": 67.388, "step": 389000 }, { "epoch": 89.27, "learning_rate": 4.463671785468714e-05, "loss": 0.9989, "step": 389500 }, { "epoch": 89.27, "eval_loss": 1.4508308172225952, "eval_runtime": 8.6998, "eval_samples_per_second": 539.439, "eval_steps_per_second": 67.473, "step": 389500 }, { "epoch": 89.39, "learning_rate": 4.469401787760715e-05, "loss": 1.0048, "step": 390000 }, { "epoch": 89.39, "eval_loss": 1.442422866821289, "eval_runtime": 8.7065, "eval_samples_per_second": 539.02, "eval_steps_per_second": 67.421, "step": 390000 }, { "epoch": 89.5, "learning_rate": 4.4751317900527163e-05, "loss": 1.0046, "step": 390500 }, { "epoch": 89.5, "eval_loss": 1.4934210777282715, "eval_runtime": 8.7049, "eval_samples_per_second": 539.125, "eval_steps_per_second": 67.434, "step": 390500 }, { "epoch": 89.62, "learning_rate": 4.4808617923447174e-05, "loss": 1.0013, "step": 391000 }, { "epoch": 89.62, "eval_loss": 1.4772695302963257, "eval_runtime": 8.7104, "eval_samples_per_second": 538.779, "eval_steps_per_second": 67.39, "step": 391000 }, { "epoch": 89.73, "learning_rate": 4.486591794636718e-05, "loss": 1.0097, "step": 391500 }, { "epoch": 89.73, "eval_loss": 1.4547902345657349, "eval_runtime": 8.7033, "eval_samples_per_second": 539.218, "eval_steps_per_second": 67.445, "step": 391500 }, { "epoch": 89.85, "learning_rate": 4.492321796928719e-05, "loss": 1.0155, "step": 392000 }, { "epoch": 89.85, "eval_loss": 1.4650427103042603, "eval_runtime": 8.7069, "eval_samples_per_second": 539.0, "eval_steps_per_second": 67.418, "step": 392000 }, { "epoch": 89.96, "learning_rate": 4.49805179922072e-05, "loss": 1.0093, "step": 392500 }, { "epoch": 89.96, "eval_loss": 1.4787232875823975, "eval_runtime": 8.7091, "eval_samples_per_second": 538.859, "eval_steps_per_second": 67.4, "step": 392500 }, { "epoch": 90.08, "learning_rate": 4.503781801512721e-05, "loss": 1.0046, "step": 393000 }, { "epoch": 90.08, "eval_loss": 1.474727988243103, "eval_runtime": 8.7082, "eval_samples_per_second": 538.919, "eval_steps_per_second": 67.408, "step": 393000 }, { "epoch": 90.19, "learning_rate": 4.509511803804722e-05, "loss": 0.9939, "step": 393500 }, { "epoch": 90.19, "eval_loss": 1.4780495166778564, "eval_runtime": 8.7079, "eval_samples_per_second": 538.933, "eval_steps_per_second": 67.41, "step": 393500 }, { "epoch": 90.3, "learning_rate": 4.515241806096723e-05, "loss": 1.0015, "step": 394000 }, { "epoch": 90.3, "eval_loss": 1.4654226303100586, "eval_runtime": 8.704, "eval_samples_per_second": 539.179, "eval_steps_per_second": 67.44, "step": 394000 }, { "epoch": 90.42, "learning_rate": 4.5209718083887234e-05, "loss": 1.0, "step": 394500 }, { "epoch": 90.42, "eval_loss": 1.4744341373443604, "eval_runtime": 8.6978, "eval_samples_per_second": 539.561, "eval_steps_per_second": 67.488, "step": 394500 }, { "epoch": 90.53, "learning_rate": 4.5267018106807244e-05, "loss": 1.0089, "step": 395000 }, { "epoch": 90.53, "eval_loss": 1.4572609663009644, "eval_runtime": 8.7183, "eval_samples_per_second": 538.293, "eval_steps_per_second": 67.33, "step": 395000 }, { "epoch": 90.65, "learning_rate": 4.5324318129727255e-05, "loss": 1.0034, "step": 395500 }, { "epoch": 90.65, "eval_loss": 1.46072256565094, "eval_runtime": 8.7044, "eval_samples_per_second": 539.152, "eval_steps_per_second": 67.437, "step": 395500 }, { "epoch": 90.76, "learning_rate": 4.5381618152647265e-05, "loss": 1.0112, "step": 396000 }, { "epoch": 90.76, "eval_loss": 1.4869415760040283, "eval_runtime": 8.7097, "eval_samples_per_second": 538.827, "eval_steps_per_second": 67.396, "step": 396000 }, { "epoch": 90.88, "learning_rate": 4.5438918175567276e-05, "loss": 1.0037, "step": 396500 }, { "epoch": 90.88, "eval_loss": 1.4742939472198486, "eval_runtime": 8.7005, "eval_samples_per_second": 539.392, "eval_steps_per_second": 67.467, "step": 396500 }, { "epoch": 90.99, "learning_rate": 4.5496218198487286e-05, "loss": 1.0136, "step": 397000 }, { "epoch": 90.99, "eval_loss": 1.4763630628585815, "eval_runtime": 8.7091, "eval_samples_per_second": 538.863, "eval_steps_per_second": 67.401, "step": 397000 }, { "epoch": 91.11, "learning_rate": 4.555351822140729e-05, "loss": 0.9956, "step": 397500 }, { "epoch": 91.11, "eval_loss": 1.4726101160049438, "eval_runtime": 8.7077, "eval_samples_per_second": 538.951, "eval_steps_per_second": 67.412, "step": 397500 }, { "epoch": 91.22, "learning_rate": 4.56108182443273e-05, "loss": 0.9999, "step": 398000 }, { "epoch": 91.22, "eval_loss": 1.462551236152649, "eval_runtime": 8.7094, "eval_samples_per_second": 538.84, "eval_steps_per_second": 67.398, "step": 398000 }, { "epoch": 91.34, "learning_rate": 4.566811826724731e-05, "loss": 0.9991, "step": 398500 }, { "epoch": 91.34, "eval_loss": 1.4679380655288696, "eval_runtime": 8.6997, "eval_samples_per_second": 539.443, "eval_steps_per_second": 67.474, "step": 398500 }, { "epoch": 91.45, "learning_rate": 4.5725418290167315e-05, "loss": 0.9997, "step": 399000 }, { "epoch": 91.45, "eval_loss": 1.4894410371780396, "eval_runtime": 8.7151, "eval_samples_per_second": 538.49, "eval_steps_per_second": 67.354, "step": 399000 }, { "epoch": 91.57, "learning_rate": 4.5782718313087325e-05, "loss": 1.0065, "step": 399500 }, { "epoch": 91.57, "eval_loss": 1.4572840929031372, "eval_runtime": 8.7055, "eval_samples_per_second": 539.087, "eval_steps_per_second": 67.429, "step": 399500 }, { "epoch": 91.68, "learning_rate": 4.5840018336007336e-05, "loss": 1.0074, "step": 400000 }, { "epoch": 91.68, "eval_loss": 1.4807215929031372, "eval_runtime": 8.7111, "eval_samples_per_second": 538.74, "eval_steps_per_second": 67.386, "step": 400000 }, { "epoch": 91.79, "learning_rate": 4.5897318358927346e-05, "loss": 1.0109, "step": 400500 }, { "epoch": 91.79, "eval_loss": 1.476940393447876, "eval_runtime": 8.7067, "eval_samples_per_second": 539.01, "eval_steps_per_second": 67.419, "step": 400500 }, { "epoch": 91.91, "learning_rate": 4.595461838184735e-05, "loss": 1.0062, "step": 401000 }, { "epoch": 91.91, "eval_loss": 1.4796017408370972, "eval_runtime": 8.709, "eval_samples_per_second": 538.866, "eval_steps_per_second": 67.401, "step": 401000 }, { "epoch": 92.02, "learning_rate": 4.601191840476736e-05, "loss": 1.0055, "step": 401500 }, { "epoch": 92.02, "eval_loss": 1.4907596111297607, "eval_runtime": 8.705, "eval_samples_per_second": 539.116, "eval_steps_per_second": 67.433, "step": 401500 }, { "epoch": 92.14, "learning_rate": 4.606921842768737e-05, "loss": 0.9971, "step": 402000 }, { "epoch": 92.14, "eval_loss": 1.4831644296646118, "eval_runtime": 8.707, "eval_samples_per_second": 538.989, "eval_steps_per_second": 67.417, "step": 402000 }, { "epoch": 92.25, "learning_rate": 4.612651845060738e-05, "loss": 0.9936, "step": 402500 }, { "epoch": 92.25, "eval_loss": 1.4940286874771118, "eval_runtime": 8.705, "eval_samples_per_second": 539.118, "eval_steps_per_second": 67.433, "step": 402500 }, { "epoch": 92.37, "learning_rate": 4.618381847352739e-05, "loss": 1.0003, "step": 403000 }, { "epoch": 92.37, "eval_loss": 1.4565112590789795, "eval_runtime": 8.7183, "eval_samples_per_second": 538.293, "eval_steps_per_second": 67.33, "step": 403000 }, { "epoch": 92.48, "learning_rate": 4.62411184964474e-05, "loss": 1.002, "step": 403500 }, { "epoch": 92.48, "eval_loss": 1.4878376722335815, "eval_runtime": 8.7006, "eval_samples_per_second": 539.39, "eval_steps_per_second": 67.467, "step": 403500 }, { "epoch": 92.6, "learning_rate": 4.6298418519367406e-05, "loss": 1.0068, "step": 404000 }, { "epoch": 92.6, "eval_loss": 1.4627454280853271, "eval_runtime": 8.7085, "eval_samples_per_second": 538.896, "eval_steps_per_second": 67.405, "step": 404000 }, { "epoch": 92.71, "learning_rate": 4.6355718542287416e-05, "loss": 1.0107, "step": 404500 }, { "epoch": 92.71, "eval_loss": 1.4862852096557617, "eval_runtime": 8.7139, "eval_samples_per_second": 538.566, "eval_steps_per_second": 67.364, "step": 404500 }, { "epoch": 92.83, "learning_rate": 4.641301856520743e-05, "loss": 1.0143, "step": 405000 }, { "epoch": 92.83, "eval_loss": 1.4826951026916504, "eval_runtime": 8.7087, "eval_samples_per_second": 538.885, "eval_steps_per_second": 67.404, "step": 405000 }, { "epoch": 92.94, "learning_rate": 4.647031858812744e-05, "loss": 1.0133, "step": 405500 }, { "epoch": 92.94, "eval_loss": 1.4742345809936523, "eval_runtime": 8.7113, "eval_samples_per_second": 538.725, "eval_steps_per_second": 67.384, "step": 405500 }, { "epoch": 93.06, "learning_rate": 4.652761861104745e-05, "loss": 1.0069, "step": 406000 }, { "epoch": 93.06, "eval_loss": 1.4923055171966553, "eval_runtime": 8.7077, "eval_samples_per_second": 538.945, "eval_steps_per_second": 67.411, "step": 406000 }, { "epoch": 93.17, "learning_rate": 4.658491863396746e-05, "loss": 0.9905, "step": 406500 }, { "epoch": 93.17, "eval_loss": 1.4879727363586426, "eval_runtime": 8.706, "eval_samples_per_second": 539.054, "eval_steps_per_second": 67.425, "step": 406500 }, { "epoch": 93.28, "learning_rate": 4.664221865688746e-05, "loss": 1.0066, "step": 407000 }, { "epoch": 93.28, "eval_loss": 1.4829518795013428, "eval_runtime": 8.7014, "eval_samples_per_second": 539.338, "eval_steps_per_second": 67.46, "step": 407000 }, { "epoch": 93.4, "learning_rate": 4.669951867980747e-05, "loss": 1.0064, "step": 407500 }, { "epoch": 93.4, "eval_loss": 1.4930949211120605, "eval_runtime": 8.7105, "eval_samples_per_second": 538.777, "eval_steps_per_second": 67.39, "step": 407500 }, { "epoch": 93.51, "learning_rate": 4.675681870272748e-05, "loss": 1.0032, "step": 408000 }, { "epoch": 93.51, "eval_loss": 1.4838510751724243, "eval_runtime": 8.7119, "eval_samples_per_second": 538.69, "eval_steps_per_second": 67.379, "step": 408000 }, { "epoch": 93.63, "learning_rate": 4.6814118725647494e-05, "loss": 1.0018, "step": 408500 }, { "epoch": 93.63, "eval_loss": 1.4774837493896484, "eval_runtime": 8.7043, "eval_samples_per_second": 539.158, "eval_steps_per_second": 67.438, "step": 408500 }, { "epoch": 93.74, "learning_rate": 4.6871418748567504e-05, "loss": 1.0085, "step": 409000 }, { "epoch": 93.74, "eval_loss": 1.4862353801727295, "eval_runtime": 8.7008, "eval_samples_per_second": 539.374, "eval_steps_per_second": 67.465, "step": 409000 }, { "epoch": 93.86, "learning_rate": 4.6928718771487515e-05, "loss": 1.0166, "step": 409500 }, { "epoch": 93.86, "eval_loss": 1.4798619747161865, "eval_runtime": 8.7005, "eval_samples_per_second": 539.397, "eval_steps_per_second": 67.468, "step": 409500 }, { "epoch": 93.97, "learning_rate": 4.698601879440752e-05, "loss": 1.0137, "step": 410000 }, { "epoch": 93.97, "eval_loss": 1.4795269966125488, "eval_runtime": 8.6988, "eval_samples_per_second": 539.499, "eval_steps_per_second": 67.48, "step": 410000 }, { "epoch": 94.09, "learning_rate": 4.704331881732753e-05, "loss": 1.0046, "step": 410500 }, { "epoch": 94.09, "eval_loss": 1.4875595569610596, "eval_runtime": 8.7035, "eval_samples_per_second": 539.207, "eval_steps_per_second": 67.444, "step": 410500 }, { "epoch": 94.2, "learning_rate": 4.710061884024754e-05, "loss": 0.9949, "step": 411000 }, { "epoch": 94.2, "eval_loss": 1.4773905277252197, "eval_runtime": 8.7149, "eval_samples_per_second": 538.5, "eval_steps_per_second": 67.356, "step": 411000 }, { "epoch": 94.32, "learning_rate": 4.715791886316755e-05, "loss": 1.0048, "step": 411500 }, { "epoch": 94.32, "eval_loss": 1.4770371913909912, "eval_runtime": 8.6995, "eval_samples_per_second": 539.456, "eval_steps_per_second": 67.475, "step": 411500 }, { "epoch": 94.43, "learning_rate": 4.721521888608756e-05, "loss": 1.0015, "step": 412000 }, { "epoch": 94.43, "eval_loss": 1.4775571823120117, "eval_runtime": 8.707, "eval_samples_per_second": 538.992, "eval_steps_per_second": 67.417, "step": 412000 }, { "epoch": 94.54, "learning_rate": 4.727251890900757e-05, "loss": 1.0067, "step": 412500 }, { "epoch": 94.54, "eval_loss": 1.4830892086029053, "eval_runtime": 8.7039, "eval_samples_per_second": 539.182, "eval_steps_per_second": 67.441, "step": 412500 }, { "epoch": 94.66, "learning_rate": 4.7329818931927575e-05, "loss": 1.0025, "step": 413000 }, { "epoch": 94.66, "eval_loss": 1.4778838157653809, "eval_runtime": 8.7064, "eval_samples_per_second": 539.032, "eval_steps_per_second": 67.422, "step": 413000 }, { "epoch": 94.77, "learning_rate": 4.7387118954847585e-05, "loss": 1.0101, "step": 413500 }, { "epoch": 94.77, "eval_loss": 1.4788851737976074, "eval_runtime": 8.6975, "eval_samples_per_second": 539.583, "eval_steps_per_second": 67.491, "step": 413500 }, { "epoch": 94.89, "learning_rate": 4.744441897776759e-05, "loss": 1.0104, "step": 414000 }, { "epoch": 94.89, "eval_loss": 1.4908266067504883, "eval_runtime": 8.7074, "eval_samples_per_second": 538.965, "eval_steps_per_second": 67.414, "step": 414000 }, { "epoch": 95.0, "learning_rate": 4.75017190006876e-05, "loss": 1.0072, "step": 414500 }, { "epoch": 95.0, "eval_loss": 1.4913634061813354, "eval_runtime": 8.7031, "eval_samples_per_second": 539.235, "eval_steps_per_second": 67.447, "step": 414500 }, { "epoch": 95.12, "learning_rate": 4.755901902360761e-05, "loss": 0.9986, "step": 415000 }, { "epoch": 95.12, "eval_loss": 1.498978614807129, "eval_runtime": 8.7108, "eval_samples_per_second": 538.755, "eval_steps_per_second": 67.387, "step": 415000 }, { "epoch": 95.23, "learning_rate": 4.761631904652762e-05, "loss": 0.99, "step": 415500 }, { "epoch": 95.23, "eval_loss": 1.4986730813980103, "eval_runtime": 8.7058, "eval_samples_per_second": 539.069, "eval_steps_per_second": 67.427, "step": 415500 }, { "epoch": 95.35, "learning_rate": 4.767361906944763e-05, "loss": 0.9986, "step": 416000 }, { "epoch": 95.35, "eval_loss": 1.4844081401824951, "eval_runtime": 8.7074, "eval_samples_per_second": 538.966, "eval_steps_per_second": 67.414, "step": 416000 }, { "epoch": 95.46, "learning_rate": 4.7730919092367634e-05, "loss": 0.9985, "step": 416500 }, { "epoch": 95.46, "eval_loss": 1.4969843626022339, "eval_runtime": 8.6931, "eval_samples_per_second": 539.852, "eval_steps_per_second": 67.525, "step": 416500 }, { "epoch": 95.58, "learning_rate": 4.7788219115287645e-05, "loss": 1.0078, "step": 417000 }, { "epoch": 95.58, "eval_loss": 1.5011699199676514, "eval_runtime": 8.7065, "eval_samples_per_second": 539.022, "eval_steps_per_second": 67.421, "step": 417000 }, { "epoch": 95.69, "learning_rate": 4.7845519138207655e-05, "loss": 1.0056, "step": 417500 }, { "epoch": 95.69, "eval_loss": 1.5061365365982056, "eval_runtime": 8.7003, "eval_samples_per_second": 539.406, "eval_steps_per_second": 67.469, "step": 417500 }, { "epoch": 95.81, "learning_rate": 4.7902819161127666e-05, "loss": 1.0106, "step": 418000 }, { "epoch": 95.81, "eval_loss": 1.49326753616333, "eval_runtime": 8.7108, "eval_samples_per_second": 538.755, "eval_steps_per_second": 67.387, "step": 418000 }, { "epoch": 95.92, "learning_rate": 4.7960119184047676e-05, "loss": 1.0144, "step": 418500 }, { "epoch": 95.92, "eval_loss": 1.4844014644622803, "eval_runtime": 8.6988, "eval_samples_per_second": 539.498, "eval_steps_per_second": 67.48, "step": 418500 }, { "epoch": 96.03, "learning_rate": 4.801741920696769e-05, "loss": 1.0069, "step": 419000 }, { "epoch": 96.03, "eval_loss": 1.4971331357955933, "eval_runtime": 8.7018, "eval_samples_per_second": 539.312, "eval_steps_per_second": 67.457, "step": 419000 }, { "epoch": 96.15, "learning_rate": 4.807471922988769e-05, "loss": 0.9996, "step": 419500 }, { "epoch": 96.15, "eval_loss": 1.508018970489502, "eval_runtime": 8.7024, "eval_samples_per_second": 539.28, "eval_steps_per_second": 67.453, "step": 419500 }, { "epoch": 96.26, "learning_rate": 4.81320192528077e-05, "loss": 0.9999, "step": 420000 }, { "epoch": 96.26, "eval_loss": 1.5034505128860474, "eval_runtime": 8.7074, "eval_samples_per_second": 538.966, "eval_steps_per_second": 67.414, "step": 420000 }, { "epoch": 96.38, "learning_rate": 4.818931927572771e-05, "loss": 1.0004, "step": 420500 }, { "epoch": 96.38, "eval_loss": 1.493147850036621, "eval_runtime": 8.7018, "eval_samples_per_second": 539.315, "eval_steps_per_second": 67.457, "step": 420500 }, { "epoch": 96.49, "learning_rate": 4.824661929864772e-05, "loss": 0.9996, "step": 421000 }, { "epoch": 96.49, "eval_loss": 1.4743448495864868, "eval_runtime": 8.7042, "eval_samples_per_second": 539.162, "eval_steps_per_second": 67.438, "step": 421000 }, { "epoch": 96.61, "learning_rate": 4.830391932156773e-05, "loss": 1.003, "step": 421500 }, { "epoch": 96.61, "eval_loss": 1.4793789386749268, "eval_runtime": 8.7022, "eval_samples_per_second": 539.287, "eval_steps_per_second": 67.454, "step": 421500 }, { "epoch": 96.72, "learning_rate": 4.836121934448774e-05, "loss": 1.0039, "step": 422000 }, { "epoch": 96.72, "eval_loss": 1.5025540590286255, "eval_runtime": 8.7087, "eval_samples_per_second": 538.889, "eval_steps_per_second": 67.404, "step": 422000 }, { "epoch": 96.84, "learning_rate": 4.841851936740775e-05, "loss": 1.0119, "step": 422500 }, { "epoch": 96.84, "eval_loss": 1.501444935798645, "eval_runtime": 8.6977, "eval_samples_per_second": 539.565, "eval_steps_per_second": 67.489, "step": 422500 }, { "epoch": 96.95, "learning_rate": 4.847581939032776e-05, "loss": 1.0137, "step": 423000 }, { "epoch": 96.95, "eval_loss": 1.4880220890045166, "eval_runtime": 8.7049, "eval_samples_per_second": 539.124, "eval_steps_per_second": 67.434, "step": 423000 }, { "epoch": 97.07, "learning_rate": 4.853311941324777e-05, "loss": 1.0005, "step": 423500 }, { "epoch": 97.07, "eval_loss": 1.4941115379333496, "eval_runtime": 8.7124, "eval_samples_per_second": 538.661, "eval_steps_per_second": 67.376, "step": 423500 }, { "epoch": 97.18, "learning_rate": 4.859041943616778e-05, "loss": 0.991, "step": 424000 }, { "epoch": 97.18, "eval_loss": 1.511069655418396, "eval_runtime": 8.7061, "eval_samples_per_second": 539.049, "eval_steps_per_second": 67.424, "step": 424000 }, { "epoch": 97.3, "learning_rate": 4.864771945908779e-05, "loss": 0.9992, "step": 424500 }, { "epoch": 97.3, "eval_loss": 1.4888603687286377, "eval_runtime": 8.706, "eval_samples_per_second": 539.05, "eval_steps_per_second": 67.424, "step": 424500 }, { "epoch": 97.41, "learning_rate": 4.87050194820078e-05, "loss": 1.0029, "step": 425000 }, { "epoch": 97.41, "eval_loss": 1.5109026432037354, "eval_runtime": 8.7064, "eval_samples_per_second": 539.027, "eval_steps_per_second": 67.421, "step": 425000 }, { "epoch": 97.52, "learning_rate": 4.87623195049278e-05, "loss": 1.0085, "step": 425500 }, { "epoch": 97.52, "eval_loss": 1.5057865381240845, "eval_runtime": 8.6991, "eval_samples_per_second": 539.48, "eval_steps_per_second": 67.478, "step": 425500 }, { "epoch": 97.64, "learning_rate": 4.8819619527847813e-05, "loss": 1.0143, "step": 426000 }, { "epoch": 97.64, "eval_loss": 1.51503586769104, "eval_runtime": 8.7091, "eval_samples_per_second": 538.863, "eval_steps_per_second": 67.401, "step": 426000 }, { "epoch": 97.75, "learning_rate": 4.8876919550767824e-05, "loss": 0.9995, "step": 426500 }, { "epoch": 97.75, "eval_loss": 1.507206678390503, "eval_runtime": 8.7067, "eval_samples_per_second": 539.012, "eval_steps_per_second": 67.42, "step": 426500 }, { "epoch": 97.87, "learning_rate": 4.8934219573687834e-05, "loss": 1.0025, "step": 427000 }, { "epoch": 97.87, "eval_loss": 1.4951661825180054, "eval_runtime": 8.7082, "eval_samples_per_second": 538.915, "eval_steps_per_second": 67.407, "step": 427000 }, { "epoch": 97.98, "learning_rate": 4.8991519596607845e-05, "loss": 1.0108, "step": 427500 }, { "epoch": 97.98, "eval_loss": 1.5064111948013306, "eval_runtime": 8.7052, "eval_samples_per_second": 539.103, "eval_steps_per_second": 67.431, "step": 427500 }, { "epoch": 98.1, "learning_rate": 4.9048819619527855e-05, "loss": 0.9939, "step": 428000 }, { "epoch": 98.1, "eval_loss": 1.5061509609222412, "eval_runtime": 8.7048, "eval_samples_per_second": 539.127, "eval_steps_per_second": 67.434, "step": 428000 }, { "epoch": 98.21, "learning_rate": 4.910611964244786e-05, "loss": 0.9966, "step": 428500 }, { "epoch": 98.21, "eval_loss": 1.5020614862442017, "eval_runtime": 8.7008, "eval_samples_per_second": 539.374, "eval_steps_per_second": 67.465, "step": 428500 }, { "epoch": 98.33, "learning_rate": 4.916341966536786e-05, "loss": 1.0031, "step": 429000 }, { "epoch": 98.33, "eval_loss": 1.475951075553894, "eval_runtime": 8.7095, "eval_samples_per_second": 538.834, "eval_steps_per_second": 67.397, "step": 429000 }, { "epoch": 98.44, "learning_rate": 4.922071968828787e-05, "loss": 1.0045, "step": 429500 }, { "epoch": 98.44, "eval_loss": 1.4942516088485718, "eval_runtime": 8.7147, "eval_samples_per_second": 538.516, "eval_steps_per_second": 67.358, "step": 429500 }, { "epoch": 98.56, "learning_rate": 4.9278019711207884e-05, "loss": 1.0012, "step": 430000 }, { "epoch": 98.56, "eval_loss": 1.4993478059768677, "eval_runtime": 8.7061, "eval_samples_per_second": 539.048, "eval_steps_per_second": 67.424, "step": 430000 }, { "epoch": 98.67, "learning_rate": 4.9335319734127894e-05, "loss": 1.0127, "step": 430500 }, { "epoch": 98.67, "eval_loss": 1.5137324333190918, "eval_runtime": 8.7038, "eval_samples_per_second": 539.189, "eval_steps_per_second": 67.442, "step": 430500 }, { "epoch": 98.79, "learning_rate": 4.9392619757047905e-05, "loss": 1.0083, "step": 431000 }, { "epoch": 98.79, "eval_loss": 1.5212175846099854, "eval_runtime": 8.7031, "eval_samples_per_second": 539.233, "eval_steps_per_second": 67.447, "step": 431000 }, { "epoch": 98.9, "learning_rate": 4.9449919779967915e-05, "loss": 1.0081, "step": 431500 }, { "epoch": 98.9, "eval_loss": 1.4942232370376587, "eval_runtime": 8.7183, "eval_samples_per_second": 538.29, "eval_steps_per_second": 67.329, "step": 431500 }, { "epoch": 99.01, "learning_rate": 4.950721980288792e-05, "loss": 1.0042, "step": 432000 }, { "epoch": 99.01, "eval_loss": 1.5383021831512451, "eval_runtime": 8.7019, "eval_samples_per_second": 539.309, "eval_steps_per_second": 67.457, "step": 432000 }, { "epoch": 99.13, "learning_rate": 4.956451982580793e-05, "loss": 0.995, "step": 432500 }, { "epoch": 99.13, "eval_loss": 1.4903078079223633, "eval_runtime": 8.7029, "eval_samples_per_second": 539.248, "eval_steps_per_second": 67.449, "step": 432500 }, { "epoch": 99.24, "learning_rate": 4.962181984872794e-05, "loss": 0.9991, "step": 433000 }, { "epoch": 99.24, "eval_loss": 1.5117533206939697, "eval_runtime": 8.705, "eval_samples_per_second": 539.117, "eval_steps_per_second": 67.433, "step": 433000 }, { "epoch": 99.36, "learning_rate": 4.967911987164795e-05, "loss": 0.9978, "step": 433500 }, { "epoch": 99.36, "eval_loss": 1.511852502822876, "eval_runtime": 8.7004, "eval_samples_per_second": 539.401, "eval_steps_per_second": 67.468, "step": 433500 }, { "epoch": 99.47, "learning_rate": 4.973641989456796e-05, "loss": 1.0072, "step": 434000 }, { "epoch": 99.47, "eval_loss": 1.4922456741333008, "eval_runtime": 8.7073, "eval_samples_per_second": 538.971, "eval_steps_per_second": 67.415, "step": 434000 }, { "epoch": 99.59, "learning_rate": 4.979371991748797e-05, "loss": 1.0069, "step": 434500 }, { "epoch": 99.59, "eval_loss": 1.509987235069275, "eval_runtime": 8.7077, "eval_samples_per_second": 538.946, "eval_steps_per_second": 67.411, "step": 434500 }, { "epoch": 99.7, "learning_rate": 4.9851019940407975e-05, "loss": 1.0112, "step": 435000 }, { "epoch": 99.7, "eval_loss": 1.5012742280960083, "eval_runtime": 8.7025, "eval_samples_per_second": 539.27, "eval_steps_per_second": 67.452, "step": 435000 }, { "epoch": 99.82, "learning_rate": 4.9908319963327986e-05, "loss": 1.0067, "step": 435500 }, { "epoch": 99.82, "eval_loss": 1.4927557706832886, "eval_runtime": 8.7023, "eval_samples_per_second": 539.286, "eval_steps_per_second": 67.454, "step": 435500 }, { "epoch": 99.93, "learning_rate": 4.9965619986247996e-05, "loss": 1.0118, "step": 436000 }, { "epoch": 99.93, "eval_loss": 1.5116112232208252, "eval_runtime": 8.7096, "eval_samples_per_second": 538.831, "eval_steps_per_second": 67.397, "step": 436000 }, { "epoch": 100.05, "learning_rate": 4.99999996799526e-05, "loss": 1.0176, "step": 436500 }, { "epoch": 100.05, "eval_loss": 1.4830495119094849, "eval_runtime": 8.7038, "eval_samples_per_second": 539.187, "eval_steps_per_second": 67.441, "step": 436500 }, { "epoch": 100.16, "learning_rate": 4.999999607941944e-05, "loss": 0.9952, "step": 437000 }, { "epoch": 100.16, "eval_loss": 1.5079327821731567, "eval_runtime": 8.704, "eval_samples_per_second": 539.18, "eval_steps_per_second": 67.441, "step": 437000 }, { "epoch": 100.28, "learning_rate": 4.999998847829444e-05, "loss": 0.9945, "step": 437500 }, { "epoch": 100.28, "eval_loss": 1.5125608444213867, "eval_runtime": 8.7053, "eval_samples_per_second": 539.1, "eval_steps_per_second": 67.431, "step": 437500 }, { "epoch": 100.39, "learning_rate": 4.999997687657884e-05, "loss": 1.0083, "step": 438000 }, { "epoch": 100.39, "eval_loss": 1.5467355251312256, "eval_runtime": 8.7102, "eval_samples_per_second": 538.793, "eval_steps_per_second": 67.392, "step": 438000 }, { "epoch": 100.5, "learning_rate": 4.999996127427447e-05, "loss": 0.9995, "step": 438500 }, { "epoch": 100.5, "eval_loss": 1.4962180852890015, "eval_runtime": 8.7078, "eval_samples_per_second": 538.944, "eval_steps_per_second": 67.411, "step": 438500 }, { "epoch": 100.62, "learning_rate": 4.999994167138384e-05, "loss": 0.9993, "step": 439000 }, { "epoch": 100.62, "eval_loss": 1.5406187772750854, "eval_runtime": 8.7059, "eval_samples_per_second": 539.059, "eval_steps_per_second": 67.425, "step": 439000 }, { "epoch": 100.73, "learning_rate": 4.999991806791008e-05, "loss": 1.0061, "step": 439500 }, { "epoch": 100.73, "eval_loss": 1.5179823637008667, "eval_runtime": 8.7029, "eval_samples_per_second": 539.246, "eval_steps_per_second": 67.449, "step": 439500 }, { "epoch": 100.85, "learning_rate": 4.9999890463856975e-05, "loss": 1.0098, "step": 440000 }, { "epoch": 100.85, "eval_loss": 1.518656611442566, "eval_runtime": 8.7094, "eval_samples_per_second": 538.844, "eval_steps_per_second": 67.399, "step": 440000 }, { "epoch": 100.96, "learning_rate": 4.9999858859228935e-05, "loss": 1.0138, "step": 440500 }, { "epoch": 100.96, "eval_loss": 1.5177662372589111, "eval_runtime": 8.7, "eval_samples_per_second": 539.428, "eval_steps_per_second": 67.472, "step": 440500 }, { "epoch": 101.08, "learning_rate": 4.999982325403103e-05, "loss": 1.0009, "step": 441000 }, { "epoch": 101.08, "eval_loss": 1.5326722860336304, "eval_runtime": 8.7105, "eval_samples_per_second": 538.777, "eval_steps_per_second": 67.39, "step": 441000 }, { "epoch": 101.19, "learning_rate": 4.9999783648268937e-05, "loss": 0.9939, "step": 441500 }, { "epoch": 101.19, "eval_loss": 1.5028088092803955, "eval_runtime": 8.7045, "eval_samples_per_second": 539.149, "eval_steps_per_second": 67.437, "step": 441500 }, { "epoch": 101.31, "learning_rate": 4.9999740041949006e-05, "loss": 1.0031, "step": 442000 }, { "epoch": 101.31, "eval_loss": 1.528794765472412, "eval_runtime": 8.7148, "eval_samples_per_second": 538.51, "eval_steps_per_second": 67.357, "step": 442000 }, { "epoch": 101.42, "learning_rate": 4.999969243507822e-05, "loss": 1.009, "step": 442500 }, { "epoch": 101.42, "eval_loss": 1.5179829597473145, "eval_runtime": 8.703, "eval_samples_per_second": 539.24, "eval_steps_per_second": 67.448, "step": 442500 }, { "epoch": 101.54, "learning_rate": 4.999964082766419e-05, "loss": 1.0092, "step": 443000 }, { "epoch": 101.54, "eval_loss": 1.5124599933624268, "eval_runtime": 8.7071, "eval_samples_per_second": 538.985, "eval_steps_per_second": 67.416, "step": 443000 }, { "epoch": 101.65, "learning_rate": 4.999958521971518e-05, "loss": 1.0045, "step": 443500 }, { "epoch": 101.65, "eval_loss": 1.520786166191101, "eval_runtime": 8.7015, "eval_samples_per_second": 539.33, "eval_steps_per_second": 67.459, "step": 443500 }, { "epoch": 101.76, "learning_rate": 4.999952561124008e-05, "loss": 1.007, "step": 444000 }, { "epoch": 101.76, "eval_loss": 1.5255627632141113, "eval_runtime": 8.7217, "eval_samples_per_second": 538.08, "eval_steps_per_second": 67.303, "step": 444000 }, { "epoch": 101.88, "learning_rate": 4.999946200224843e-05, "loss": 1.0053, "step": 444500 }, { "epoch": 101.88, "eval_loss": 1.5204198360443115, "eval_runtime": 8.7057, "eval_samples_per_second": 539.072, "eval_steps_per_second": 67.427, "step": 444500 }, { "epoch": 101.99, "learning_rate": 4.999939439275042e-05, "loss": 1.006, "step": 445000 }, { "epoch": 101.99, "eval_loss": 1.5378049612045288, "eval_runtime": 8.7079, "eval_samples_per_second": 538.936, "eval_steps_per_second": 67.41, "step": 445000 }, { "epoch": 102.11, "learning_rate": 4.9999322782756856e-05, "loss": 0.9964, "step": 445500 }, { "epoch": 102.11, "eval_loss": 1.498360276222229, "eval_runtime": 8.7014, "eval_samples_per_second": 539.341, "eval_steps_per_second": 67.461, "step": 445500 }, { "epoch": 102.22, "learning_rate": 4.9999247172279206e-05, "loss": 0.9962, "step": 446000 }, { "epoch": 102.22, "eval_loss": 1.501076340675354, "eval_runtime": 8.7042, "eval_samples_per_second": 539.164, "eval_steps_per_second": 67.439, "step": 446000 }, { "epoch": 102.34, "learning_rate": 4.9999167561329565e-05, "loss": 1.0039, "step": 446500 }, { "epoch": 102.34, "eval_loss": 1.514583945274353, "eval_runtime": 8.7024, "eval_samples_per_second": 539.277, "eval_steps_per_second": 67.453, "step": 446500 }, { "epoch": 102.45, "learning_rate": 4.999908394992068e-05, "loss": 1.0035, "step": 447000 }, { "epoch": 102.45, "eval_loss": 1.53276526927948, "eval_runtime": 8.7034, "eval_samples_per_second": 539.213, "eval_steps_per_second": 67.445, "step": 447000 }, { "epoch": 102.57, "learning_rate": 4.9998996338065916e-05, "loss": 1.0089, "step": 447500 }, { "epoch": 102.57, "eval_loss": 1.5149027109146118, "eval_runtime": 8.7139, "eval_samples_per_second": 538.568, "eval_steps_per_second": 67.364, "step": 447500 }, { "epoch": 102.68, "learning_rate": 4.9998904725779304e-05, "loss": 0.9999, "step": 448000 }, { "epoch": 102.68, "eval_loss": 1.512225866317749, "eval_runtime": 8.7103, "eval_samples_per_second": 538.787, "eval_steps_per_second": 67.391, "step": 448000 }, { "epoch": 102.8, "learning_rate": 4.99988091130755e-05, "loss": 0.9996, "step": 448500 }, { "epoch": 102.8, "eval_loss": 1.517555832862854, "eval_runtime": 8.7167, "eval_samples_per_second": 538.393, "eval_steps_per_second": 67.342, "step": 448500 }, { "epoch": 102.91, "learning_rate": 4.999870949996981e-05, "loss": 1.0048, "step": 449000 }, { "epoch": 102.91, "eval_loss": 1.5219563245773315, "eval_runtime": 8.7138, "eval_samples_per_second": 538.573, "eval_steps_per_second": 67.365, "step": 449000 }, { "epoch": 103.03, "learning_rate": 4.999860588647817e-05, "loss": 1.009, "step": 449500 }, { "epoch": 103.03, "eval_loss": 1.5057759284973145, "eval_runtime": 8.7091, "eval_samples_per_second": 538.861, "eval_steps_per_second": 67.401, "step": 449500 }, { "epoch": 103.14, "learning_rate": 4.999849827261716e-05, "loss": 0.994, "step": 450000 }, { "epoch": 103.14, "eval_loss": 1.5149004459381104, "eval_runtime": 8.7185, "eval_samples_per_second": 538.278, "eval_steps_per_second": 67.328, "step": 450000 }, { "epoch": 103.25, "learning_rate": 4.9998386658404e-05, "loss": 0.9904, "step": 450500 }, { "epoch": 103.25, "eval_loss": 1.524533987045288, "eval_runtime": 8.7149, "eval_samples_per_second": 538.501, "eval_steps_per_second": 67.356, "step": 450500 }, { "epoch": 103.37, "learning_rate": 4.999827104385656e-05, "loss": 1.0004, "step": 451000 }, { "epoch": 103.37, "eval_loss": 1.5156927108764648, "eval_runtime": 8.717, "eval_samples_per_second": 538.376, "eval_steps_per_second": 67.34, "step": 451000 }, { "epoch": 103.48, "learning_rate": 4.9998151428993325e-05, "loss": 1.0019, "step": 451500 }, { "epoch": 103.48, "eval_loss": 1.5108964443206787, "eval_runtime": 8.7128, "eval_samples_per_second": 538.632, "eval_steps_per_second": 67.372, "step": 451500 }, { "epoch": 103.6, "learning_rate": 4.9998027813833456e-05, "loss": 1.0006, "step": 452000 }, { "epoch": 103.6, "eval_loss": 1.5223747491836548, "eval_runtime": 8.7202, "eval_samples_per_second": 538.174, "eval_steps_per_second": 67.315, "step": 452000 }, { "epoch": 103.71, "learning_rate": 4.999790019839672e-05, "loss": 1.0034, "step": 452500 }, { "epoch": 103.71, "eval_loss": 1.5292478799819946, "eval_runtime": 8.7158, "eval_samples_per_second": 538.445, "eval_steps_per_second": 67.349, "step": 452500 }, { "epoch": 103.83, "learning_rate": 4.999776858270353e-05, "loss": 1.0017, "step": 453000 }, { "epoch": 103.83, "eval_loss": 1.5193545818328857, "eval_runtime": 8.7222, "eval_samples_per_second": 538.052, "eval_steps_per_second": 67.3, "step": 453000 }, { "epoch": 103.94, "learning_rate": 4.999763296677498e-05, "loss": 1.0033, "step": 453500 }, { "epoch": 103.94, "eval_loss": 1.5089852809906006, "eval_runtime": 8.7525, "eval_samples_per_second": 536.189, "eval_steps_per_second": 67.067, "step": 453500 }, { "epoch": 104.06, "learning_rate": 4.9997493350632735e-05, "loss": 1.0106, "step": 454000 }, { "epoch": 104.06, "eval_loss": 1.5238852500915527, "eval_runtime": 8.7524, "eval_samples_per_second": 536.196, "eval_steps_per_second": 67.067, "step": 454000 }, { "epoch": 104.17, "learning_rate": 4.999734973429916e-05, "loss": 0.991, "step": 454500 }, { "epoch": 104.17, "eval_loss": 1.5160801410675049, "eval_runtime": 8.7485, "eval_samples_per_second": 536.433, "eval_steps_per_second": 67.097, "step": 454500 }, { "epoch": 104.29, "learning_rate": 4.9997202117797226e-05, "loss": 0.996, "step": 455000 }, { "epoch": 104.29, "eval_loss": 1.5305112600326538, "eval_runtime": 8.7591, "eval_samples_per_second": 535.787, "eval_steps_per_second": 67.016, "step": 455000 }, { "epoch": 104.4, "learning_rate": 4.9997050501150566e-05, "loss": 1.0009, "step": 455500 }, { "epoch": 104.4, "eval_loss": 1.5229225158691406, "eval_runtime": 8.7594, "eval_samples_per_second": 535.765, "eval_steps_per_second": 67.013, "step": 455500 }, { "epoch": 104.52, "learning_rate": 4.999689488438343e-05, "loss": 0.9951, "step": 456000 }, { "epoch": 104.52, "eval_loss": 1.5439473390579224, "eval_runtime": 8.6898, "eval_samples_per_second": 540.057, "eval_steps_per_second": 67.55, "step": 456000 }, { "epoch": 104.63, "learning_rate": 4.999673526752073e-05, "loss": 1.0008, "step": 456500 }, { "epoch": 104.63, "eval_loss": 1.5422166585922241, "eval_runtime": 8.7777, "eval_samples_per_second": 534.652, "eval_steps_per_second": 66.874, "step": 456500 }, { "epoch": 104.74, "learning_rate": 4.9996571650588e-05, "loss": 1.0057, "step": 457000 }, { "epoch": 104.74, "eval_loss": 1.5304185152053833, "eval_runtime": 8.763, "eval_samples_per_second": 535.548, "eval_steps_per_second": 66.986, "step": 457000 }, { "epoch": 104.86, "learning_rate": 4.999640403361143e-05, "loss": 1.0032, "step": 457500 }, { "epoch": 104.86, "eval_loss": 1.528243899345398, "eval_runtime": 8.7618, "eval_samples_per_second": 535.623, "eval_steps_per_second": 66.996, "step": 457500 }, { "epoch": 104.97, "learning_rate": 4.999623241661784e-05, "loss": 0.9933, "step": 458000 }, { "epoch": 104.97, "eval_loss": 1.511940836906433, "eval_runtime": 8.7507, "eval_samples_per_second": 536.299, "eval_steps_per_second": 67.08, "step": 458000 }, { "epoch": 105.09, "learning_rate": 4.999605679963469e-05, "loss": 0.9925, "step": 458500 }, { "epoch": 105.09, "eval_loss": 1.54851496219635, "eval_runtime": 8.7618, "eval_samples_per_second": 535.62, "eval_steps_per_second": 66.995, "step": 458500 }, { "epoch": 105.2, "learning_rate": 4.9995877182690096e-05, "loss": 0.9858, "step": 459000 }, { "epoch": 105.2, "eval_loss": 1.5314478874206543, "eval_runtime": 8.7134, "eval_samples_per_second": 538.594, "eval_steps_per_second": 67.367, "step": 459000 }, { "epoch": 105.32, "learning_rate": 4.999569356581278e-05, "loss": 0.9982, "step": 459500 }, { "epoch": 105.32, "eval_loss": 1.5437909364700317, "eval_runtime": 8.7134, "eval_samples_per_second": 538.596, "eval_steps_per_second": 67.368, "step": 459500 }, { "epoch": 105.43, "learning_rate": 4.999550594903214e-05, "loss": 0.9916, "step": 460000 }, { "epoch": 105.43, "eval_loss": 1.5357139110565186, "eval_runtime": 8.7157, "eval_samples_per_second": 538.453, "eval_steps_per_second": 67.35, "step": 460000 }, { "epoch": 105.55, "learning_rate": 4.999531433237819e-05, "loss": 0.9962, "step": 460500 }, { "epoch": 105.55, "eval_loss": 1.5142685174942017, "eval_runtime": 8.7109, "eval_samples_per_second": 538.752, "eval_steps_per_second": 67.387, "step": 460500 }, { "epoch": 105.66, "learning_rate": 4.9995118715881606e-05, "loss": 1.0005, "step": 461000 }, { "epoch": 105.66, "eval_loss": 1.509539008140564, "eval_runtime": 8.726, "eval_samples_per_second": 537.818, "eval_steps_per_second": 67.27, "step": 461000 }, { "epoch": 105.78, "learning_rate": 4.999491909957368e-05, "loss": 0.9961, "step": 461500 }, { "epoch": 105.78, "eval_loss": 1.51961088180542, "eval_runtime": 8.7117, "eval_samples_per_second": 538.701, "eval_steps_per_second": 67.381, "step": 461500 }, { "epoch": 105.89, "learning_rate": 4.999471548348635e-05, "loss": 1.0035, "step": 462000 }, { "epoch": 105.89, "eval_loss": 1.5083444118499756, "eval_runtime": 8.7195, "eval_samples_per_second": 538.217, "eval_steps_per_second": 67.32, "step": 462000 }, { "epoch": 106.01, "learning_rate": 4.999450786765222e-05, "loss": 1.0053, "step": 462500 }, { "epoch": 106.01, "eval_loss": 1.5378599166870117, "eval_runtime": 8.7194, "eval_samples_per_second": 538.222, "eval_steps_per_second": 67.321, "step": 462500 }, { "epoch": 106.12, "learning_rate": 4.999429625210449e-05, "loss": 0.9915, "step": 463000 }, { "epoch": 106.12, "eval_loss": 1.5134644508361816, "eval_runtime": 8.7116, "eval_samples_per_second": 538.704, "eval_steps_per_second": 67.381, "step": 463000 }, { "epoch": 106.23, "learning_rate": 4.999408063687704e-05, "loss": 0.9892, "step": 463500 }, { "epoch": 106.23, "eval_loss": 1.5528517961502075, "eval_runtime": 8.7123, "eval_samples_per_second": 538.666, "eval_steps_per_second": 67.376, "step": 463500 }, { "epoch": 106.35, "learning_rate": 4.9993861022004374e-05, "loss": 0.9916, "step": 464000 }, { "epoch": 106.35, "eval_loss": 1.5156744718551636, "eval_runtime": 8.7128, "eval_samples_per_second": 538.63, "eval_steps_per_second": 67.372, "step": 464000 }, { "epoch": 106.46, "learning_rate": 4.999363740752162e-05, "loss": 0.9937, "step": 464500 }, { "epoch": 106.46, "eval_loss": 1.5204541683197021, "eval_runtime": 8.7038, "eval_samples_per_second": 539.193, "eval_steps_per_second": 67.442, "step": 464500 }, { "epoch": 106.58, "learning_rate": 4.999340979346458e-05, "loss": 0.9923, "step": 465000 }, { "epoch": 106.58, "eval_loss": 1.5290967226028442, "eval_runtime": 8.7153, "eval_samples_per_second": 538.475, "eval_steps_per_second": 67.352, "step": 465000 }, { "epoch": 106.69, "learning_rate": 4.999317817986966e-05, "loss": 0.9958, "step": 465500 }, { "epoch": 106.69, "eval_loss": 1.5046987533569336, "eval_runtime": 8.7157, "eval_samples_per_second": 538.453, "eval_steps_per_second": 67.35, "step": 465500 }, { "epoch": 106.81, "learning_rate": 4.9992942566773945e-05, "loss": 1.0014, "step": 466000 }, { "epoch": 106.81, "eval_loss": 1.5290082693099976, "eval_runtime": 8.7111, "eval_samples_per_second": 538.741, "eval_steps_per_second": 67.386, "step": 466000 }, { "epoch": 106.92, "learning_rate": 4.999270295421512e-05, "loss": 0.9981, "step": 466500 }, { "epoch": 106.92, "eval_loss": 1.5200011730194092, "eval_runtime": 8.7109, "eval_samples_per_second": 538.752, "eval_steps_per_second": 67.387, "step": 466500 }, { "epoch": 107.04, "learning_rate": 4.9992459342231534e-05, "loss": 1.0097, "step": 467000 }, { "epoch": 107.04, "eval_loss": 1.5475919246673584, "eval_runtime": 8.718, "eval_samples_per_second": 538.314, "eval_steps_per_second": 67.332, "step": 467000 }, { "epoch": 107.15, "learning_rate": 4.999221173086218e-05, "loss": 0.9928, "step": 467500 }, { "epoch": 107.15, "eval_loss": 1.5455296039581299, "eval_runtime": 8.7108, "eval_samples_per_second": 538.759, "eval_steps_per_second": 67.388, "step": 467500 }, { "epoch": 107.27, "learning_rate": 4.9991960120146666e-05, "loss": 0.9875, "step": 468000 }, { "epoch": 107.27, "eval_loss": 1.5273206233978271, "eval_runtime": 8.705, "eval_samples_per_second": 539.118, "eval_steps_per_second": 67.433, "step": 468000 }, { "epoch": 107.38, "learning_rate": 4.9991704510125256e-05, "loss": 0.9918, "step": 468500 }, { "epoch": 107.38, "eval_loss": 1.5214149951934814, "eval_runtime": 8.7101, "eval_samples_per_second": 538.798, "eval_steps_per_second": 67.393, "step": 468500 }, { "epoch": 107.49, "learning_rate": 4.9991444900838876e-05, "loss": 0.9891, "step": 469000 }, { "epoch": 107.49, "eval_loss": 1.538475513458252, "eval_runtime": 8.7115, "eval_samples_per_second": 538.711, "eval_steps_per_second": 67.382, "step": 469000 }, { "epoch": 107.61, "learning_rate": 4.9991181292329046e-05, "loss": 0.9983, "step": 469500 }, { "epoch": 107.61, "eval_loss": 1.5273301601409912, "eval_runtime": 8.7151, "eval_samples_per_second": 538.492, "eval_steps_per_second": 67.355, "step": 469500 }, { "epoch": 107.72, "learning_rate": 4.9990913684637966e-05, "loss": 1.0019, "step": 470000 }, { "epoch": 107.72, "eval_loss": 1.53476083278656, "eval_runtime": 8.7151, "eval_samples_per_second": 538.488, "eval_steps_per_second": 67.354, "step": 470000 }, { "epoch": 107.84, "learning_rate": 4.999064207780845e-05, "loss": 1.0006, "step": 470500 }, { "epoch": 107.84, "eval_loss": 1.5290677547454834, "eval_runtime": 8.7153, "eval_samples_per_second": 538.476, "eval_steps_per_second": 67.353, "step": 470500 }, { "epoch": 107.95, "learning_rate": 4.9990366471883954e-05, "loss": 0.9957, "step": 471000 }, { "epoch": 107.95, "eval_loss": 1.5282570123672485, "eval_runtime": 8.7137, "eval_samples_per_second": 538.578, "eval_steps_per_second": 67.365, "step": 471000 }, { "epoch": 108.07, "learning_rate": 4.99900868669086e-05, "loss": 0.9937, "step": 471500 }, { "epoch": 108.07, "eval_loss": 1.524748682975769, "eval_runtime": 8.7047, "eval_samples_per_second": 539.134, "eval_steps_per_second": 67.435, "step": 471500 }, { "epoch": 108.18, "learning_rate": 4.998980326292711e-05, "loss": 0.987, "step": 472000 }, { "epoch": 108.18, "eval_loss": 1.5219438076019287, "eval_runtime": 8.7191, "eval_samples_per_second": 538.246, "eval_steps_per_second": 67.324, "step": 472000 }, { "epoch": 108.3, "learning_rate": 4.99895156599849e-05, "loss": 0.9854, "step": 472500 }, { "epoch": 108.3, "eval_loss": 1.532366156578064, "eval_runtime": 8.7099, "eval_samples_per_second": 538.81, "eval_steps_per_second": 67.394, "step": 472500 }, { "epoch": 108.41, "learning_rate": 4.998922405812796e-05, "loss": 0.9963, "step": 473000 }, { "epoch": 108.41, "eval_loss": 1.5245909690856934, "eval_runtime": 8.714, "eval_samples_per_second": 538.56, "eval_steps_per_second": 67.363, "step": 473000 }, { "epoch": 108.53, "learning_rate": 4.9988928457402964e-05, "loss": 0.993, "step": 473500 }, { "epoch": 108.53, "eval_loss": 1.5529441833496094, "eval_runtime": 8.7221, "eval_samples_per_second": 538.057, "eval_steps_per_second": 67.3, "step": 473500 }, { "epoch": 108.64, "learning_rate": 4.998862885785722e-05, "loss": 0.9967, "step": 474000 }, { "epoch": 108.64, "eval_loss": 1.5374892950057983, "eval_runtime": 8.7164, "eval_samples_per_second": 538.409, "eval_steps_per_second": 67.344, "step": 474000 }, { "epoch": 108.76, "learning_rate": 4.998832525953867e-05, "loss": 0.9914, "step": 474500 }, { "epoch": 108.76, "eval_loss": 1.5572665929794312, "eval_runtime": 8.7137, "eval_samples_per_second": 538.576, "eval_steps_per_second": 67.365, "step": 474500 }, { "epoch": 108.87, "learning_rate": 4.998801766249589e-05, "loss": 0.9947, "step": 475000 }, { "epoch": 108.87, "eval_loss": 1.5253041982650757, "eval_runtime": 8.7171, "eval_samples_per_second": 538.366, "eval_steps_per_second": 67.339, "step": 475000 }, { "epoch": 108.98, "learning_rate": 4.998770606677812e-05, "loss": 1.0045, "step": 475500 }, { "epoch": 108.98, "eval_loss": 1.5381258726119995, "eval_runtime": 8.7141, "eval_samples_per_second": 538.551, "eval_steps_per_second": 67.362, "step": 475500 }, { "epoch": 109.1, "learning_rate": 4.9987390472435196e-05, "loss": 0.9876, "step": 476000 }, { "epoch": 109.1, "eval_loss": 1.53707754611969, "eval_runtime": 8.7099, "eval_samples_per_second": 538.811, "eval_steps_per_second": 67.394, "step": 476000 }, { "epoch": 109.21, "learning_rate": 4.998707087951764e-05, "loss": 0.9934, "step": 476500 }, { "epoch": 109.21, "eval_loss": 1.5339478254318237, "eval_runtime": 8.7185, "eval_samples_per_second": 538.282, "eval_steps_per_second": 67.328, "step": 476500 }, { "epoch": 109.33, "learning_rate": 4.99867472880766e-05, "loss": 0.9849, "step": 477000 }, { "epoch": 109.33, "eval_loss": 1.543542504310608, "eval_runtime": 8.718, "eval_samples_per_second": 538.31, "eval_steps_per_second": 67.332, "step": 477000 }, { "epoch": 109.44, "learning_rate": 4.9986419698163835e-05, "loss": 0.9916, "step": 477500 }, { "epoch": 109.44, "eval_loss": 1.5386221408843994, "eval_runtime": 8.7138, "eval_samples_per_second": 538.571, "eval_steps_per_second": 67.364, "step": 477500 }, { "epoch": 109.56, "learning_rate": 4.998608810983179e-05, "loss": 0.993, "step": 478000 }, { "epoch": 109.56, "eval_loss": 1.5468028783798218, "eval_runtime": 8.7174, "eval_samples_per_second": 538.348, "eval_steps_per_second": 67.337, "step": 478000 }, { "epoch": 109.67, "learning_rate": 4.998575252313351e-05, "loss": 0.9953, "step": 478500 }, { "epoch": 109.67, "eval_loss": 1.539225459098816, "eval_runtime": 8.7116, "eval_samples_per_second": 538.706, "eval_steps_per_second": 67.381, "step": 478500 }, { "epoch": 109.79, "learning_rate": 4.9985412938122705e-05, "loss": 0.9961, "step": 479000 }, { "epoch": 109.79, "eval_loss": 1.5442333221435547, "eval_runtime": 8.72, "eval_samples_per_second": 538.187, "eval_steps_per_second": 67.316, "step": 479000 }, { "epoch": 109.9, "learning_rate": 4.998506935485372e-05, "loss": 1.0031, "step": 479500 }, { "epoch": 109.9, "eval_loss": 1.5494986772537231, "eval_runtime": 8.7105, "eval_samples_per_second": 538.774, "eval_steps_per_second": 67.39, "step": 479500 }, { "epoch": 110.02, "learning_rate": 4.998472177338153e-05, "loss": 0.9943, "step": 480000 }, { "epoch": 110.02, "eval_loss": 1.5425715446472168, "eval_runtime": 8.7137, "eval_samples_per_second": 538.575, "eval_steps_per_second": 67.365, "step": 480000 }, { "epoch": 110.13, "learning_rate": 4.9984370193761755e-05, "loss": 0.9846, "step": 480500 }, { "epoch": 110.13, "eval_loss": 1.5290088653564453, "eval_runtime": 8.712, "eval_samples_per_second": 538.68, "eval_steps_per_second": 67.378, "step": 480500 }, { "epoch": 110.25, "learning_rate": 4.9984014616050653e-05, "loss": 0.984, "step": 481000 }, { "epoch": 110.25, "eval_loss": 1.5610860586166382, "eval_runtime": 8.7225, "eval_samples_per_second": 538.032, "eval_steps_per_second": 67.297, "step": 481000 }, { "epoch": 110.36, "learning_rate": 4.9983655040305145e-05, "loss": 0.9863, "step": 481500 }, { "epoch": 110.36, "eval_loss": 1.5464569330215454, "eval_runtime": 8.7161, "eval_samples_per_second": 538.432, "eval_steps_per_second": 67.347, "step": 481500 }, { "epoch": 110.47, "learning_rate": 4.9983291466582754e-05, "loss": 0.9936, "step": 482000 }, { "epoch": 110.47, "eval_loss": 1.5562679767608643, "eval_runtime": 8.7217, "eval_samples_per_second": 538.081, "eval_steps_per_second": 67.303, "step": 482000 }, { "epoch": 110.59, "learning_rate": 4.998292389494166e-05, "loss": 0.9876, "step": 482500 }, { "epoch": 110.59, "eval_loss": 1.5260345935821533, "eval_runtime": 8.7075, "eval_samples_per_second": 538.962, "eval_steps_per_second": 67.413, "step": 482500 }, { "epoch": 110.7, "learning_rate": 4.998255232544069e-05, "loss": 0.9981, "step": 483000 }, { "epoch": 110.7, "eval_loss": 1.5540817975997925, "eval_runtime": 8.7159, "eval_samples_per_second": 538.442, "eval_steps_per_second": 67.348, "step": 483000 }, { "epoch": 110.82, "learning_rate": 4.998217675813931e-05, "loss": 0.9941, "step": 483500 }, { "epoch": 110.82, "eval_loss": 1.5193630456924438, "eval_runtime": 8.7518, "eval_samples_per_second": 536.231, "eval_steps_per_second": 67.072, "step": 483500 }, { "epoch": 110.93, "learning_rate": 4.99817971930976e-05, "loss": 0.9984, "step": 484000 }, { "epoch": 110.93, "eval_loss": 1.5437209606170654, "eval_runtime": 8.7205, "eval_samples_per_second": 538.159, "eval_steps_per_second": 67.313, "step": 484000 }, { "epoch": 111.05, "learning_rate": 4.998141363037632e-05, "loss": 0.9885, "step": 484500 }, { "epoch": 111.05, "eval_loss": 1.5293524265289307, "eval_runtime": 8.7168, "eval_samples_per_second": 538.384, "eval_steps_per_second": 67.341, "step": 484500 }, { "epoch": 111.16, "learning_rate": 4.998102607003683e-05, "loss": 0.9877, "step": 485000 }, { "epoch": 111.16, "eval_loss": 1.5505014657974243, "eval_runtime": 8.7166, "eval_samples_per_second": 538.399, "eval_steps_per_second": 67.343, "step": 485000 }, { "epoch": 111.28, "learning_rate": 4.998063451214116e-05, "loss": 0.9864, "step": 485500 }, { "epoch": 111.28, "eval_loss": 1.5457996129989624, "eval_runtime": 8.7155, "eval_samples_per_second": 538.469, "eval_steps_per_second": 67.352, "step": 485500 }, { "epoch": 111.39, "learning_rate": 4.998023895675197e-05, "loss": 0.9816, "step": 486000 }, { "epoch": 111.39, "eval_loss": 1.5278356075286865, "eval_runtime": 8.7294, "eval_samples_per_second": 537.608, "eval_steps_per_second": 67.244, "step": 486000 }, { "epoch": 111.51, "learning_rate": 4.997983940393255e-05, "loss": 0.9813, "step": 486500 }, { "epoch": 111.51, "eval_loss": 1.5545495748519897, "eval_runtime": 8.7071, "eval_samples_per_second": 538.987, "eval_steps_per_second": 67.416, "step": 486500 }, { "epoch": 111.62, "learning_rate": 4.997943585374685e-05, "loss": 0.9931, "step": 487000 }, { "epoch": 111.62, "eval_loss": 1.536719799041748, "eval_runtime": 8.719, "eval_samples_per_second": 538.252, "eval_steps_per_second": 67.324, "step": 487000 }, { "epoch": 111.73, "learning_rate": 4.997902830625943e-05, "loss": 0.9884, "step": 487500 }, { "epoch": 111.73, "eval_loss": 1.550118088722229, "eval_runtime": 8.711, "eval_samples_per_second": 538.744, "eval_steps_per_second": 67.386, "step": 487500 }, { "epoch": 111.85, "learning_rate": 4.9978616761535535e-05, "loss": 0.9917, "step": 488000 }, { "epoch": 111.85, "eval_loss": 1.5616952180862427, "eval_runtime": 8.7263, "eval_samples_per_second": 537.799, "eval_steps_per_second": 67.268, "step": 488000 }, { "epoch": 111.96, "learning_rate": 4.9978201219640995e-05, "loss": 0.9903, "step": 488500 }, { "epoch": 111.96, "eval_loss": 1.546220302581787, "eval_runtime": 8.7132, "eval_samples_per_second": 538.606, "eval_steps_per_second": 67.369, "step": 488500 }, { "epoch": 112.08, "learning_rate": 4.997778168064232e-05, "loss": 0.9909, "step": 489000 }, { "epoch": 112.08, "eval_loss": 1.5234308242797852, "eval_runtime": 8.7163, "eval_samples_per_second": 538.416, "eval_steps_per_second": 67.345, "step": 489000 }, { "epoch": 112.19, "learning_rate": 4.9977358144606636e-05, "loss": 0.9793, "step": 489500 }, { "epoch": 112.19, "eval_loss": 1.5523329973220825, "eval_runtime": 8.708, "eval_samples_per_second": 538.927, "eval_steps_per_second": 67.409, "step": 489500 }, { "epoch": 112.31, "learning_rate": 4.997693061160173e-05, "loss": 0.9743, "step": 490000 }, { "epoch": 112.31, "eval_loss": 1.5496488809585571, "eval_runtime": 8.7162, "eval_samples_per_second": 538.423, "eval_steps_per_second": 67.346, "step": 490000 }, { "epoch": 112.42, "learning_rate": 4.997649908169602e-05, "loss": 0.9911, "step": 490500 }, { "epoch": 112.42, "eval_loss": 1.5421030521392822, "eval_runtime": 8.7238, "eval_samples_per_second": 537.954, "eval_steps_per_second": 67.287, "step": 490500 }, { "epoch": 112.54, "learning_rate": 4.997606355495854e-05, "loss": 0.9798, "step": 491000 }, { "epoch": 112.54, "eval_loss": 1.549683690071106, "eval_runtime": 8.7233, "eval_samples_per_second": 537.985, "eval_steps_per_second": 67.291, "step": 491000 }, { "epoch": 112.65, "learning_rate": 4.9975624031459e-05, "loss": 0.9901, "step": 491500 }, { "epoch": 112.65, "eval_loss": 1.5601458549499512, "eval_runtime": 8.7099, "eval_samples_per_second": 538.81, "eval_steps_per_second": 67.394, "step": 491500 }, { "epoch": 112.77, "learning_rate": 4.997518051126774e-05, "loss": 0.9898, "step": 492000 }, { "epoch": 112.77, "eval_loss": 1.5322703123092651, "eval_runtime": 8.7108, "eval_samples_per_second": 538.759, "eval_steps_per_second": 67.388, "step": 492000 }, { "epoch": 112.88, "learning_rate": 4.997473299445573e-05, "loss": 0.9882, "step": 492500 }, { "epoch": 112.88, "eval_loss": 1.545603632926941, "eval_runtime": 8.7159, "eval_samples_per_second": 538.441, "eval_steps_per_second": 67.348, "step": 492500 }, { "epoch": 113.0, "learning_rate": 4.997428148109457e-05, "loss": 0.9944, "step": 493000 }, { "epoch": 113.0, "eval_loss": 1.550269603729248, "eval_runtime": 8.7163, "eval_samples_per_second": 538.419, "eval_steps_per_second": 67.345, "step": 493000 }, { "epoch": 113.11, "learning_rate": 4.997382597125653e-05, "loss": 0.9807, "step": 493500 }, { "epoch": 113.11, "eval_loss": 1.5491199493408203, "eval_runtime": 8.7139, "eval_samples_per_second": 538.567, "eval_steps_per_second": 67.364, "step": 493500 }, { "epoch": 113.22, "learning_rate": 4.9973366465014487e-05, "loss": 0.9767, "step": 494000 }, { "epoch": 113.22, "eval_loss": 1.5690088272094727, "eval_runtime": 8.7173, "eval_samples_per_second": 538.354, "eval_steps_per_second": 67.337, "step": 494000 }, { "epoch": 113.34, "learning_rate": 4.997290296244199e-05, "loss": 0.9815, "step": 494500 }, { "epoch": 113.34, "eval_loss": 1.5729398727416992, "eval_runtime": 8.7129, "eval_samples_per_second": 538.629, "eval_steps_per_second": 67.372, "step": 494500 }, { "epoch": 113.45, "learning_rate": 4.997243546361319e-05, "loss": 0.9832, "step": 495000 }, { "epoch": 113.45, "eval_loss": 1.5527278184890747, "eval_runtime": 8.7198, "eval_samples_per_second": 538.2, "eval_steps_per_second": 67.318, "step": 495000 }, { "epoch": 113.57, "learning_rate": 4.997196396860292e-05, "loss": 0.9843, "step": 495500 }, { "epoch": 113.57, "eval_loss": 1.534860372543335, "eval_runtime": 8.7114, "eval_samples_per_second": 538.722, "eval_steps_per_second": 67.383, "step": 495500 }, { "epoch": 113.68, "learning_rate": 4.997148847748661e-05, "loss": 0.9914, "step": 496000 }, { "epoch": 113.68, "eval_loss": 1.5562418699264526, "eval_runtime": 8.7168, "eval_samples_per_second": 538.387, "eval_steps_per_second": 67.341, "step": 496000 }, { "epoch": 113.8, "learning_rate": 4.997100899034036e-05, "loss": 0.9853, "step": 496500 }, { "epoch": 113.8, "eval_loss": 1.5648597478866577, "eval_runtime": 8.7143, "eval_samples_per_second": 538.54, "eval_steps_per_second": 67.36, "step": 496500 }, { "epoch": 113.91, "learning_rate": 4.9970525507240904e-05, "loss": 0.9939, "step": 497000 }, { "epoch": 113.91, "eval_loss": 1.5399515628814697, "eval_runtime": 8.7149, "eval_samples_per_second": 538.502, "eval_steps_per_second": 67.356, "step": 497000 }, { "epoch": 114.03, "learning_rate": 4.9970038028265606e-05, "loss": 0.9888, "step": 497500 }, { "epoch": 114.03, "eval_loss": 1.553824543952942, "eval_runtime": 8.7138, "eval_samples_per_second": 538.573, "eval_steps_per_second": 67.365, "step": 497500 }, { "epoch": 114.14, "learning_rate": 4.9969546553492476e-05, "loss": 0.9756, "step": 498000 }, { "epoch": 114.14, "eval_loss": 1.562148094177246, "eval_runtime": 8.7146, "eval_samples_per_second": 538.52, "eval_steps_per_second": 67.358, "step": 498000 }, { "epoch": 114.26, "learning_rate": 4.996905108300015e-05, "loss": 0.9794, "step": 498500 }, { "epoch": 114.26, "eval_loss": 1.5621687173843384, "eval_runtime": 8.7223, "eval_samples_per_second": 538.048, "eval_steps_per_second": 67.299, "step": 498500 }, { "epoch": 114.37, "learning_rate": 4.996855161686793e-05, "loss": 0.984, "step": 499000 }, { "epoch": 114.37, "eval_loss": 1.5437289476394653, "eval_runtime": 8.7197, "eval_samples_per_second": 538.208, "eval_steps_per_second": 67.319, "step": 499000 }, { "epoch": 114.49, "learning_rate": 4.996804815517574e-05, "loss": 0.9864, "step": 499500 }, { "epoch": 114.49, "eval_loss": 1.5598416328430176, "eval_runtime": 8.7139, "eval_samples_per_second": 538.565, "eval_steps_per_second": 67.364, "step": 499500 }, { "epoch": 114.6, "learning_rate": 4.9967540698004136e-05, "loss": 0.9872, "step": 500000 }, { "epoch": 114.6, "eval_loss": 1.5519236326217651, "eval_runtime": 8.7197, "eval_samples_per_second": 538.204, "eval_steps_per_second": 67.318, "step": 500000 }, { "epoch": 114.71, "learning_rate": 4.9967029245434326e-05, "loss": 0.9811, "step": 500500 }, { "epoch": 114.71, "eval_loss": 1.5496835708618164, "eval_runtime": 8.723, "eval_samples_per_second": 538.005, "eval_steps_per_second": 67.294, "step": 500500 }, { "epoch": 114.83, "learning_rate": 4.996651379754816e-05, "loss": 0.9831, "step": 501000 }, { "epoch": 114.83, "eval_loss": 1.5279996395111084, "eval_runtime": 8.716, "eval_samples_per_second": 538.432, "eval_steps_per_second": 67.347, "step": 501000 }, { "epoch": 114.94, "learning_rate": 4.996599435442813e-05, "loss": 0.9882, "step": 501500 }, { "epoch": 114.94, "eval_loss": 1.5386055707931519, "eval_runtime": 8.7122, "eval_samples_per_second": 538.671, "eval_steps_per_second": 67.377, "step": 501500 }, { "epoch": 115.06, "learning_rate": 4.996547091615734e-05, "loss": 0.9798, "step": 502000 }, { "epoch": 115.06, "eval_loss": 1.5453013181686401, "eval_runtime": 8.7163, "eval_samples_per_second": 538.418, "eval_steps_per_second": 67.345, "step": 502000 }, { "epoch": 115.17, "learning_rate": 4.996494348281957e-05, "loss": 0.9746, "step": 502500 }, { "epoch": 115.17, "eval_loss": 1.5393743515014648, "eval_runtime": 8.7136, "eval_samples_per_second": 538.582, "eval_steps_per_second": 67.366, "step": 502500 }, { "epoch": 115.29, "learning_rate": 4.996441205449921e-05, "loss": 0.9796, "step": 503000 }, { "epoch": 115.29, "eval_loss": 1.5380384922027588, "eval_runtime": 8.7157, "eval_samples_per_second": 538.451, "eval_steps_per_second": 67.349, "step": 503000 }, { "epoch": 115.4, "learning_rate": 4.99638766312813e-05, "loss": 0.9773, "step": 503500 }, { "epoch": 115.4, "eval_loss": 1.5370591878890991, "eval_runtime": 8.7106, "eval_samples_per_second": 538.766, "eval_steps_per_second": 67.389, "step": 503500 }, { "epoch": 115.52, "learning_rate": 4.9963337213251526e-05, "loss": 0.9855, "step": 504000 }, { "epoch": 115.52, "eval_loss": 1.5624219179153442, "eval_runtime": 8.7161, "eval_samples_per_second": 538.431, "eval_steps_per_second": 67.347, "step": 504000 }, { "epoch": 115.63, "learning_rate": 4.996279380049621e-05, "loss": 0.9836, "step": 504500 }, { "epoch": 115.63, "eval_loss": 1.540080189704895, "eval_runtime": 8.7144, "eval_samples_per_second": 538.536, "eval_steps_per_second": 67.36, "step": 504500 }, { "epoch": 115.75, "learning_rate": 4.9962246393102306e-05, "loss": 0.984, "step": 505000 }, { "epoch": 115.75, "eval_loss": 1.5340124368667603, "eval_runtime": 8.7168, "eval_samples_per_second": 538.384, "eval_steps_per_second": 67.341, "step": 505000 }, { "epoch": 115.86, "learning_rate": 4.9961694991157416e-05, "loss": 0.9846, "step": 505500 }, { "epoch": 115.86, "eval_loss": 1.546715497970581, "eval_runtime": 8.7107, "eval_samples_per_second": 538.763, "eval_steps_per_second": 67.388, "step": 505500 }, { "epoch": 115.98, "learning_rate": 4.996113959474977e-05, "loss": 0.9912, "step": 506000 }, { "epoch": 115.98, "eval_loss": 1.5308336019515991, "eval_runtime": 8.7159, "eval_samples_per_second": 538.439, "eval_steps_per_second": 67.348, "step": 506000 }, { "epoch": 116.09, "learning_rate": 4.9960580203968254e-05, "loss": 0.9724, "step": 506500 }, { "epoch": 116.09, "eval_loss": 1.5460344552993774, "eval_runtime": 8.7092, "eval_samples_per_second": 538.856, "eval_steps_per_second": 67.4, "step": 506500 }, { "epoch": 116.2, "learning_rate": 4.996001681890238e-05, "loss": 0.9773, "step": 507000 }, { "epoch": 116.2, "eval_loss": 1.5416182279586792, "eval_runtime": 8.7112, "eval_samples_per_second": 538.73, "eval_steps_per_second": 67.384, "step": 507000 }, { "epoch": 116.32, "learning_rate": 4.99594494396423e-05, "loss": 0.9802, "step": 507500 }, { "epoch": 116.32, "eval_loss": 1.5512683391571045, "eval_runtime": 8.7151, "eval_samples_per_second": 538.493, "eval_steps_per_second": 67.355, "step": 507500 }, { "epoch": 116.43, "learning_rate": 4.995887806627881e-05, "loss": 0.9769, "step": 508000 }, { "epoch": 116.43, "eval_loss": 1.5552995204925537, "eval_runtime": 8.7083, "eval_samples_per_second": 538.909, "eval_steps_per_second": 67.407, "step": 508000 }, { "epoch": 116.55, "learning_rate": 4.995830269890335e-05, "loss": 0.9774, "step": 508500 }, { "epoch": 116.55, "eval_loss": 1.5408662557601929, "eval_runtime": 8.7116, "eval_samples_per_second": 538.708, "eval_steps_per_second": 67.382, "step": 508500 }, { "epoch": 116.66, "learning_rate": 4.995772333760798e-05, "loss": 0.9812, "step": 509000 }, { "epoch": 116.66, "eval_loss": 1.5601131916046143, "eval_runtime": 8.7189, "eval_samples_per_second": 538.254, "eval_steps_per_second": 67.325, "step": 509000 }, { "epoch": 116.78, "learning_rate": 4.9957139982485425e-05, "loss": 0.983, "step": 509500 }, { "epoch": 116.78, "eval_loss": 1.5532739162445068, "eval_runtime": 8.7159, "eval_samples_per_second": 538.44, "eval_steps_per_second": 67.348, "step": 509500 }, { "epoch": 116.89, "learning_rate": 4.9956552633629024e-05, "loss": 0.9815, "step": 510000 }, { "epoch": 116.89, "eval_loss": 1.5622001886367798, "eval_runtime": 8.7196, "eval_samples_per_second": 538.215, "eval_steps_per_second": 67.32, "step": 510000 }, { "epoch": 117.01, "learning_rate": 4.995596129113277e-05, "loss": 0.9895, "step": 510500 }, { "epoch": 117.01, "eval_loss": 1.5373715162277222, "eval_runtime": 8.709, "eval_samples_per_second": 538.865, "eval_steps_per_second": 67.401, "step": 510500 }, { "epoch": 117.12, "learning_rate": 4.9955365955091295e-05, "loss": 0.9751, "step": 511000 }, { "epoch": 117.12, "eval_loss": 1.5825059413909912, "eval_runtime": 8.7211, "eval_samples_per_second": 538.119, "eval_steps_per_second": 67.308, "step": 511000 }, { "epoch": 117.24, "learning_rate": 4.995476662559986e-05, "loss": 0.9719, "step": 511500 }, { "epoch": 117.24, "eval_loss": 1.5539088249206543, "eval_runtime": 8.7096, "eval_samples_per_second": 538.828, "eval_steps_per_second": 67.397, "step": 511500 }, { "epoch": 117.35, "learning_rate": 4.9954163302754386e-05, "loss": 0.9735, "step": 512000 }, { "epoch": 117.35, "eval_loss": 1.5788168907165527, "eval_runtime": 8.7063, "eval_samples_per_second": 539.033, "eval_steps_per_second": 67.422, "step": 512000 }, { "epoch": 117.46, "learning_rate": 4.99535559866514e-05, "loss": 0.9812, "step": 512500 }, { "epoch": 117.46, "eval_loss": 1.5479426383972168, "eval_runtime": 8.7108, "eval_samples_per_second": 538.758, "eval_steps_per_second": 67.388, "step": 512500 }, { "epoch": 117.58, "learning_rate": 4.9952944677388106e-05, "loss": 0.9807, "step": 513000 }, { "epoch": 117.58, "eval_loss": 1.554706335067749, "eval_runtime": 8.7133, "eval_samples_per_second": 538.604, "eval_steps_per_second": 67.369, "step": 513000 }, { "epoch": 117.69, "learning_rate": 4.995232937506231e-05, "loss": 0.9822, "step": 513500 }, { "epoch": 117.69, "eval_loss": 1.561903715133667, "eval_runtime": 8.7081, "eval_samples_per_second": 538.925, "eval_steps_per_second": 67.409, "step": 513500 }, { "epoch": 117.81, "learning_rate": 4.995171007977249e-05, "loss": 0.9877, "step": 514000 }, { "epoch": 117.81, "eval_loss": 1.550549864768982, "eval_runtime": 8.7127, "eval_samples_per_second": 538.636, "eval_steps_per_second": 67.373, "step": 514000 }, { "epoch": 117.92, "learning_rate": 4.995108679161774e-05, "loss": 0.9803, "step": 514500 }, { "epoch": 117.92, "eval_loss": 1.5534054040908813, "eval_runtime": 8.7097, "eval_samples_per_second": 538.824, "eval_steps_per_second": 67.396, "step": 514500 }, { "epoch": 118.04, "learning_rate": 4.99504595106978e-05, "loss": 0.9782, "step": 515000 }, { "epoch": 118.04, "eval_loss": 1.5442826747894287, "eval_runtime": 8.7081, "eval_samples_per_second": 538.923, "eval_steps_per_second": 67.408, "step": 515000 }, { "epoch": 118.15, "learning_rate": 4.9949828237113054e-05, "loss": 0.9693, "step": 515500 }, { "epoch": 118.15, "eval_loss": 1.576861023902893, "eval_runtime": 8.7137, "eval_samples_per_second": 538.575, "eval_steps_per_second": 67.365, "step": 515500 }, { "epoch": 118.27, "learning_rate": 4.994919297096452e-05, "loss": 0.9795, "step": 516000 }, { "epoch": 118.27, "eval_loss": 1.553696870803833, "eval_runtime": 8.7135, "eval_samples_per_second": 538.588, "eval_steps_per_second": 67.367, "step": 516000 }, { "epoch": 118.38, "learning_rate": 4.994855371235386e-05, "loss": 0.9768, "step": 516500 }, { "epoch": 118.38, "eval_loss": 1.5511404275894165, "eval_runtime": 8.7078, "eval_samples_per_second": 538.944, "eval_steps_per_second": 67.411, "step": 516500 }, { "epoch": 118.5, "learning_rate": 4.994791046138336e-05, "loss": 0.9798, "step": 517000 }, { "epoch": 118.5, "eval_loss": 1.5662953853607178, "eval_runtime": 8.7134, "eval_samples_per_second": 538.597, "eval_steps_per_second": 67.368, "step": 517000 }, { "epoch": 118.61, "learning_rate": 4.994726321815596e-05, "loss": 0.9758, "step": 517500 }, { "epoch": 118.61, "eval_loss": 1.556501030921936, "eval_runtime": 8.7079, "eval_samples_per_second": 538.937, "eval_steps_per_second": 67.41, "step": 517500 }, { "epoch": 118.73, "learning_rate": 4.994661198277523e-05, "loss": 0.9719, "step": 518000 }, { "epoch": 118.73, "eval_loss": 1.5516780614852905, "eval_runtime": 8.7188, "eval_samples_per_second": 538.261, "eval_steps_per_second": 67.326, "step": 518000 }, { "epoch": 118.84, "learning_rate": 4.99459567553454e-05, "loss": 0.9772, "step": 518500 }, { "epoch": 118.84, "eval_loss": 1.5682103633880615, "eval_runtime": 8.7158, "eval_samples_per_second": 538.45, "eval_steps_per_second": 67.349, "step": 518500 }, { "epoch": 118.95, "learning_rate": 4.99452975359713e-05, "loss": 0.9825, "step": 519000 }, { "epoch": 118.95, "eval_loss": 1.5541290044784546, "eval_runtime": 8.7235, "eval_samples_per_second": 537.973, "eval_steps_per_second": 67.29, "step": 519000 }, { "epoch": 119.07, "learning_rate": 4.994463432475843e-05, "loss": 0.9754, "step": 519500 }, { "epoch": 119.07, "eval_loss": 1.5591707229614258, "eval_runtime": 8.7105, "eval_samples_per_second": 538.778, "eval_steps_per_second": 67.39, "step": 519500 }, { "epoch": 119.18, "learning_rate": 4.994396712181293e-05, "loss": 0.9686, "step": 520000 }, { "epoch": 119.18, "eval_loss": 1.5549490451812744, "eval_runtime": 8.7105, "eval_samples_per_second": 538.777, "eval_steps_per_second": 67.39, "step": 520000 }, { "epoch": 119.3, "learning_rate": 4.994329592724155e-05, "loss": 0.9671, "step": 520500 }, { "epoch": 119.3, "eval_loss": 1.5561944246292114, "eval_runtime": 8.7097, "eval_samples_per_second": 538.825, "eval_steps_per_second": 67.396, "step": 520500 }, { "epoch": 119.41, "learning_rate": 4.994262074115171e-05, "loss": 0.9793, "step": 521000 }, { "epoch": 119.41, "eval_loss": 1.560978651046753, "eval_runtime": 8.7173, "eval_samples_per_second": 538.355, "eval_steps_per_second": 67.337, "step": 521000 }, { "epoch": 119.53, "learning_rate": 4.994194156365144e-05, "loss": 0.9801, "step": 521500 }, { "epoch": 119.53, "eval_loss": 1.5506314039230347, "eval_runtime": 8.706, "eval_samples_per_second": 539.052, "eval_steps_per_second": 67.425, "step": 521500 }, { "epoch": 119.64, "learning_rate": 4.994125839484946e-05, "loss": 0.9863, "step": 522000 }, { "epoch": 119.64, "eval_loss": 1.5694303512573242, "eval_runtime": 8.7142, "eval_samples_per_second": 538.548, "eval_steps_per_second": 67.362, "step": 522000 }, { "epoch": 119.76, "learning_rate": 4.9940571234855045e-05, "loss": 0.984, "step": 522500 }, { "epoch": 119.76, "eval_loss": 1.5682713985443115, "eval_runtime": 8.709, "eval_samples_per_second": 538.87, "eval_steps_per_second": 67.402, "step": 522500 }, { "epoch": 119.87, "learning_rate": 4.993988008377819e-05, "loss": 0.9784, "step": 523000 }, { "epoch": 119.87, "eval_loss": 1.5779436826705933, "eval_runtime": 8.7163, "eval_samples_per_second": 538.417, "eval_steps_per_second": 67.345, "step": 523000 }, { "epoch": 119.99, "learning_rate": 4.993918494172949e-05, "loss": 0.9814, "step": 523500 }, { "epoch": 119.99, "eval_loss": 1.5549801588058472, "eval_runtime": 8.7204, "eval_samples_per_second": 538.165, "eval_steps_per_second": 67.314, "step": 523500 }, { "epoch": 120.1, "learning_rate": 4.993848580882017e-05, "loss": 0.9662, "step": 524000 }, { "epoch": 120.1, "eval_loss": 1.6001840829849243, "eval_runtime": 8.7137, "eval_samples_per_second": 538.576, "eval_steps_per_second": 67.365, "step": 524000 }, { "epoch": 120.22, "learning_rate": 4.9937782685162124e-05, "loss": 0.9679, "step": 524500 }, { "epoch": 120.22, "eval_loss": 1.5670658349990845, "eval_runtime": 8.7162, "eval_samples_per_second": 538.422, "eval_steps_per_second": 67.346, "step": 524500 }, { "epoch": 120.33, "learning_rate": 4.993707557086786e-05, "loss": 0.9727, "step": 525000 }, { "epoch": 120.33, "eval_loss": 1.5603821277618408, "eval_runtime": 8.7127, "eval_samples_per_second": 538.639, "eval_steps_per_second": 67.373, "step": 525000 }, { "epoch": 120.44, "learning_rate": 4.9936364466050545e-05, "loss": 0.9729, "step": 525500 }, { "epoch": 120.44, "eval_loss": 1.5656133890151978, "eval_runtime": 8.7166, "eval_samples_per_second": 538.398, "eval_steps_per_second": 67.343, "step": 525500 }, { "epoch": 120.56, "learning_rate": 4.9935649370823954e-05, "loss": 0.9726, "step": 526000 }, { "epoch": 120.56, "eval_loss": 1.564464807510376, "eval_runtime": 8.7123, "eval_samples_per_second": 538.663, "eval_steps_per_second": 67.376, "step": 526000 }, { "epoch": 120.67, "learning_rate": 4.993493028530254e-05, "loss": 0.9741, "step": 526500 }, { "epoch": 120.67, "eval_loss": 1.5848658084869385, "eval_runtime": 8.7217, "eval_samples_per_second": 538.081, "eval_steps_per_second": 67.303, "step": 526500 }, { "epoch": 120.79, "learning_rate": 4.993420720960136e-05, "loss": 0.9737, "step": 527000 }, { "epoch": 120.79, "eval_loss": 1.5788402557373047, "eval_runtime": 8.7191, "eval_samples_per_second": 538.243, "eval_steps_per_second": 67.323, "step": 527000 }, { "epoch": 120.9, "learning_rate": 4.993348014383611e-05, "loss": 0.9808, "step": 527500 }, { "epoch": 120.9, "eval_loss": 1.5456550121307373, "eval_runtime": 8.7164, "eval_samples_per_second": 538.409, "eval_steps_per_second": 67.344, "step": 527500 }, { "epoch": 121.02, "learning_rate": 4.993274908812317e-05, "loss": 0.9741, "step": 528000 }, { "epoch": 121.02, "eval_loss": 1.563234567642212, "eval_runtime": 8.7166, "eval_samples_per_second": 538.395, "eval_steps_per_second": 67.342, "step": 528000 }, { "epoch": 121.13, "learning_rate": 4.99320140425795e-05, "loss": 0.9665, "step": 528500 }, { "epoch": 121.13, "eval_loss": 1.5771243572235107, "eval_runtime": 8.7136, "eval_samples_per_second": 538.581, "eval_steps_per_second": 67.366, "step": 528500 }, { "epoch": 121.25, "learning_rate": 4.993127500732274e-05, "loss": 0.9697, "step": 529000 }, { "epoch": 121.25, "eval_loss": 1.5625298023223877, "eval_runtime": 8.7133, "eval_samples_per_second": 538.601, "eval_steps_per_second": 67.368, "step": 529000 }, { "epoch": 121.36, "learning_rate": 4.9930531982471155e-05, "loss": 0.9684, "step": 529500 }, { "epoch": 121.36, "eval_loss": 1.5783413648605347, "eval_runtime": 8.715, "eval_samples_per_second": 538.495, "eval_steps_per_second": 67.355, "step": 529500 }, { "epoch": 121.48, "learning_rate": 4.992978496814362e-05, "loss": 0.9744, "step": 530000 }, { "epoch": 121.48, "eval_loss": 1.555356502532959, "eval_runtime": 8.7113, "eval_samples_per_second": 538.728, "eval_steps_per_second": 67.384, "step": 530000 }, { "epoch": 121.59, "learning_rate": 4.992903396445971e-05, "loss": 0.9786, "step": 530500 }, { "epoch": 121.59, "eval_loss": 1.5818202495574951, "eval_runtime": 8.7113, "eval_samples_per_second": 538.726, "eval_steps_per_second": 67.384, "step": 530500 }, { "epoch": 121.71, "learning_rate": 4.9928278971539575e-05, "loss": 0.9762, "step": 531000 }, { "epoch": 121.71, "eval_loss": 1.5509705543518066, "eval_runtime": 8.7177, "eval_samples_per_second": 538.33, "eval_steps_per_second": 67.334, "step": 531000 }, { "epoch": 121.82, "learning_rate": 4.9927519989504056e-05, "loss": 0.9866, "step": 531500 }, { "epoch": 121.82, "eval_loss": 1.5632859468460083, "eval_runtime": 8.73, "eval_samples_per_second": 537.569, "eval_steps_per_second": 67.239, "step": 531500 }, { "epoch": 121.93, "learning_rate": 4.992675701847459e-05, "loss": 0.9831, "step": 532000 }, { "epoch": 121.93, "eval_loss": 1.5621609687805176, "eval_runtime": 8.7213, "eval_samples_per_second": 538.108, "eval_steps_per_second": 67.306, "step": 532000 }, { "epoch": 122.05, "learning_rate": 4.992599005857328e-05, "loss": 0.9749, "step": 532500 }, { "epoch": 122.05, "eval_loss": 1.5674015283584595, "eval_runtime": 8.714, "eval_samples_per_second": 538.561, "eval_steps_per_second": 67.363, "step": 532500 }, { "epoch": 122.16, "learning_rate": 4.9925219109922847e-05, "loss": 0.9633, "step": 533000 }, { "epoch": 122.16, "eval_loss": 1.5764305591583252, "eval_runtime": 8.7188, "eval_samples_per_second": 538.261, "eval_steps_per_second": 67.326, "step": 533000 }, { "epoch": 122.28, "learning_rate": 4.992444417264668e-05, "loss": 0.9675, "step": 533500 }, { "epoch": 122.28, "eval_loss": 1.5679477453231812, "eval_runtime": 8.7147, "eval_samples_per_second": 538.512, "eval_steps_per_second": 67.357, "step": 533500 }, { "epoch": 122.39, "learning_rate": 4.9923665246868764e-05, "loss": 0.9745, "step": 534000 }, { "epoch": 122.39, "eval_loss": 1.5562214851379395, "eval_runtime": 8.721, "eval_samples_per_second": 538.128, "eval_steps_per_second": 67.309, "step": 534000 }, { "epoch": 122.51, "learning_rate": 4.992288233271377e-05, "loss": 0.9703, "step": 534500 }, { "epoch": 122.51, "eval_loss": 1.566327452659607, "eval_runtime": 8.718, "eval_samples_per_second": 538.315, "eval_steps_per_second": 67.332, "step": 534500 }, { "epoch": 122.62, "learning_rate": 4.992209543030696e-05, "loss": 0.9745, "step": 535000 }, { "epoch": 122.62, "eval_loss": 1.5645999908447266, "eval_runtime": 8.7231, "eval_samples_per_second": 537.995, "eval_steps_per_second": 67.292, "step": 535000 }, { "epoch": 122.74, "learning_rate": 4.992130453977428e-05, "loss": 0.9784, "step": 535500 }, { "epoch": 122.74, "eval_loss": 1.5762877464294434, "eval_runtime": 8.7135, "eval_samples_per_second": 538.593, "eval_steps_per_second": 67.367, "step": 535500 }, { "epoch": 122.85, "learning_rate": 4.992050966124226e-05, "loss": 0.9803, "step": 536000 }, { "epoch": 122.85, "eval_loss": 1.5624219179153442, "eval_runtime": 8.7341, "eval_samples_per_second": 537.317, "eval_steps_per_second": 67.208, "step": 536000 }, { "epoch": 122.97, "learning_rate": 4.9919710794838137e-05, "loss": 0.9724, "step": 536500 }, { "epoch": 122.97, "eval_loss": 1.56076979637146, "eval_runtime": 8.7133, "eval_samples_per_second": 538.6, "eval_steps_per_second": 67.368, "step": 536500 }, { "epoch": 123.08, "learning_rate": 4.991890794068972e-05, "loss": 0.9692, "step": 537000 }, { "epoch": 123.08, "eval_loss": 1.5681716203689575, "eval_runtime": 8.7184, "eval_samples_per_second": 538.288, "eval_steps_per_second": 67.329, "step": 537000 }, { "epoch": 123.2, "learning_rate": 4.9918101098925495e-05, "loss": 0.9587, "step": 537500 }, { "epoch": 123.2, "eval_loss": 1.5915952920913696, "eval_runtime": 8.7071, "eval_samples_per_second": 538.986, "eval_steps_per_second": 67.416, "step": 537500 }, { "epoch": 123.31, "learning_rate": 4.991729026967458e-05, "loss": 0.9667, "step": 538000 }, { "epoch": 123.31, "eval_loss": 1.5752240419387817, "eval_runtime": 8.7144, "eval_samples_per_second": 538.536, "eval_steps_per_second": 67.36, "step": 538000 }, { "epoch": 123.42, "learning_rate": 4.9916475453066716e-05, "loss": 0.9645, "step": 538500 }, { "epoch": 123.42, "eval_loss": 1.5799503326416016, "eval_runtime": 8.7172, "eval_samples_per_second": 538.364, "eval_steps_per_second": 67.339, "step": 538500 }, { "epoch": 123.54, "learning_rate": 4.9915656649232296e-05, "loss": 0.9741, "step": 539000 }, { "epoch": 123.54, "eval_loss": 1.5639289617538452, "eval_runtime": 8.7137, "eval_samples_per_second": 538.575, "eval_steps_per_second": 67.365, "step": 539000 }, { "epoch": 123.65, "learning_rate": 4.991483385830236e-05, "loss": 0.9723, "step": 539500 }, { "epoch": 123.65, "eval_loss": 1.5656386613845825, "eval_runtime": 8.7106, "eval_samples_per_second": 538.771, "eval_steps_per_second": 67.389, "step": 539500 }, { "epoch": 123.77, "learning_rate": 4.9914007080408556e-05, "loss": 0.9775, "step": 540000 }, { "epoch": 123.77, "eval_loss": 1.5740156173706055, "eval_runtime": 8.7186, "eval_samples_per_second": 538.275, "eval_steps_per_second": 67.327, "step": 540000 }, { "epoch": 123.88, "learning_rate": 4.99131763156832e-05, "loss": 0.9719, "step": 540500 }, { "epoch": 123.88, "eval_loss": 1.5653319358825684, "eval_runtime": 8.713, "eval_samples_per_second": 538.619, "eval_steps_per_second": 67.37, "step": 540500 }, { "epoch": 124.0, "learning_rate": 4.9912341564259236e-05, "loss": 0.9744, "step": 541000 }, { "epoch": 124.0, "eval_loss": 1.5773005485534668, "eval_runtime": 8.7196, "eval_samples_per_second": 538.215, "eval_steps_per_second": 67.32, "step": 541000 }, { "epoch": 124.11, "learning_rate": 4.991150282627023e-05, "loss": 0.9644, "step": 541500 }, { "epoch": 124.11, "eval_loss": 1.5746526718139648, "eval_runtime": 8.7165, "eval_samples_per_second": 538.405, "eval_steps_per_second": 67.344, "step": 541500 }, { "epoch": 124.23, "learning_rate": 4.991066010185041e-05, "loss": 0.9693, "step": 542000 }, { "epoch": 124.23, "eval_loss": 1.573323130607605, "eval_runtime": 8.7116, "eval_samples_per_second": 538.705, "eval_steps_per_second": 67.381, "step": 542000 }, { "epoch": 124.34, "learning_rate": 4.990981339113464e-05, "loss": 0.9722, "step": 542500 }, { "epoch": 124.34, "eval_loss": 1.5694447755813599, "eval_runtime": 8.7126, "eval_samples_per_second": 538.645, "eval_steps_per_second": 67.374, "step": 542500 }, { "epoch": 124.46, "learning_rate": 4.99089626942584e-05, "loss": 0.963, "step": 543000 }, { "epoch": 124.46, "eval_loss": 1.5799579620361328, "eval_runtime": 8.713, "eval_samples_per_second": 538.617, "eval_steps_per_second": 67.37, "step": 543000 }, { "epoch": 124.57, "learning_rate": 4.990810801135783e-05, "loss": 0.9703, "step": 543500 }, { "epoch": 124.57, "eval_loss": 1.573874592781067, "eval_runtime": 8.7126, "eval_samples_per_second": 538.644, "eval_steps_per_second": 67.373, "step": 543500 }, { "epoch": 124.68, "learning_rate": 4.990724934256969e-05, "loss": 0.9699, "step": 544000 }, { "epoch": 124.68, "eval_loss": 1.5732215642929077, "eval_runtime": 8.7241, "eval_samples_per_second": 537.936, "eval_steps_per_second": 67.285, "step": 544000 }, { "epoch": 124.8, "learning_rate": 4.99063866880314e-05, "loss": 0.9822, "step": 544500 }, { "epoch": 124.8, "eval_loss": 1.568105697631836, "eval_runtime": 8.712, "eval_samples_per_second": 538.685, "eval_steps_per_second": 67.379, "step": 544500 }, { "epoch": 124.91, "learning_rate": 4.9905520047881e-05, "loss": 0.9677, "step": 545000 }, { "epoch": 124.91, "eval_loss": 1.608232021331787, "eval_runtime": 8.7096, "eval_samples_per_second": 538.833, "eval_steps_per_second": 67.397, "step": 545000 }, { "epoch": 125.03, "learning_rate": 4.9904649422257156e-05, "loss": 0.9747, "step": 545500 }, { "epoch": 125.03, "eval_loss": 1.571833610534668, "eval_runtime": 8.7172, "eval_samples_per_second": 538.363, "eval_steps_per_second": 67.338, "step": 545500 }, { "epoch": 125.14, "learning_rate": 4.990377481129921e-05, "loss": 0.9614, "step": 546000 }, { "epoch": 125.14, "eval_loss": 1.5796605348587036, "eval_runtime": 8.7136, "eval_samples_per_second": 538.585, "eval_steps_per_second": 67.366, "step": 546000 }, { "epoch": 125.26, "learning_rate": 4.990289621514712e-05, "loss": 0.9617, "step": 546500 }, { "epoch": 125.26, "eval_loss": 1.5671324729919434, "eval_runtime": 8.7148, "eval_samples_per_second": 538.512, "eval_steps_per_second": 67.357, "step": 546500 }, { "epoch": 125.37, "learning_rate": 4.990201363394148e-05, "loss": 0.9635, "step": 547000 }, { "epoch": 125.37, "eval_loss": 1.5513743162155151, "eval_runtime": 8.7138, "eval_samples_per_second": 538.574, "eval_steps_per_second": 67.365, "step": 547000 }, { "epoch": 125.49, "learning_rate": 4.990112706782352e-05, "loss": 0.9699, "step": 547500 }, { "epoch": 125.49, "eval_loss": 1.555879831314087, "eval_runtime": 8.7135, "eval_samples_per_second": 538.592, "eval_steps_per_second": 67.367, "step": 547500 }, { "epoch": 125.6, "learning_rate": 4.99002365169351e-05, "loss": 0.9672, "step": 548000 }, { "epoch": 125.6, "eval_loss": 1.5743348598480225, "eval_runtime": 8.708, "eval_samples_per_second": 538.93, "eval_steps_per_second": 67.409, "step": 548000 }, { "epoch": 125.72, "learning_rate": 4.9899341981418753e-05, "loss": 0.9691, "step": 548500 }, { "epoch": 125.72, "eval_loss": 1.5657596588134766, "eval_runtime": 8.7085, "eval_samples_per_second": 538.898, "eval_steps_per_second": 67.405, "step": 548500 }, { "epoch": 125.83, "learning_rate": 4.989844346141761e-05, "loss": 0.9715, "step": 549000 }, { "epoch": 125.83, "eval_loss": 1.565407395362854, "eval_runtime": 8.7178, "eval_samples_per_second": 538.323, "eval_steps_per_second": 67.333, "step": 549000 }, { "epoch": 125.95, "learning_rate": 4.989754095707546e-05, "loss": 0.9781, "step": 549500 }, { "epoch": 125.95, "eval_loss": 1.5573927164077759, "eval_runtime": 8.7134, "eval_samples_per_second": 538.594, "eval_steps_per_second": 67.367, "step": 549500 }, { "epoch": 126.06, "learning_rate": 4.989663446853673e-05, "loss": 0.9677, "step": 550000 }, { "epoch": 126.06, "eval_loss": 1.5715768337249756, "eval_runtime": 8.7153, "eval_samples_per_second": 538.476, "eval_steps_per_second": 67.353, "step": 550000 }, { "epoch": 126.17, "learning_rate": 4.989572399594646e-05, "loss": 0.9574, "step": 550500 }, { "epoch": 126.17, "eval_loss": 1.5476653575897217, "eval_runtime": 8.7087, "eval_samples_per_second": 538.888, "eval_steps_per_second": 67.404, "step": 550500 }, { "epoch": 126.29, "learning_rate": 4.989480953945038e-05, "loss": 0.9614, "step": 551000 }, { "epoch": 126.29, "eval_loss": 1.5713099241256714, "eval_runtime": 8.7105, "eval_samples_per_second": 538.772, "eval_steps_per_second": 67.39, "step": 551000 }, { "epoch": 126.4, "learning_rate": 4.98938910991948e-05, "loss": 0.9638, "step": 551500 }, { "epoch": 126.4, "eval_loss": 1.5641014575958252, "eval_runtime": 8.7195, "eval_samples_per_second": 538.218, "eval_steps_per_second": 67.32, "step": 551500 }, { "epoch": 126.52, "learning_rate": 4.9892968675326695e-05, "loss": 0.9652, "step": 552000 }, { "epoch": 126.52, "eval_loss": 1.5982619524002075, "eval_runtime": 8.7226, "eval_samples_per_second": 538.029, "eval_steps_per_second": 67.297, "step": 552000 }, { "epoch": 126.63, "learning_rate": 4.989204226799368e-05, "loss": 0.965, "step": 552500 }, { "epoch": 126.63, "eval_loss": 1.5890533924102783, "eval_runtime": 8.7124, "eval_samples_per_second": 538.658, "eval_steps_per_second": 67.375, "step": 552500 }, { "epoch": 126.75, "learning_rate": 4.9891111877344005e-05, "loss": 0.9729, "step": 553000 }, { "epoch": 126.75, "eval_loss": 1.5878714323043823, "eval_runtime": 8.7109, "eval_samples_per_second": 538.75, "eval_steps_per_second": 67.387, "step": 553000 }, { "epoch": 126.86, "learning_rate": 4.9890177503526544e-05, "loss": 0.97, "step": 553500 }, { "epoch": 126.86, "eval_loss": 1.5786340236663818, "eval_runtime": 8.7172, "eval_samples_per_second": 538.363, "eval_steps_per_second": 67.338, "step": 553500 }, { "epoch": 126.98, "learning_rate": 4.988923914669083e-05, "loss": 0.9811, "step": 554000 }, { "epoch": 126.98, "eval_loss": 1.5826433897018433, "eval_runtime": 8.7174, "eval_samples_per_second": 538.35, "eval_steps_per_second": 67.337, "step": 554000 }, { "epoch": 127.09, "learning_rate": 4.9888296806987016e-05, "loss": 0.9657, "step": 554500 }, { "epoch": 127.09, "eval_loss": 1.5854833126068115, "eval_runtime": 8.7122, "eval_samples_per_second": 538.671, "eval_steps_per_second": 67.377, "step": 554500 }, { "epoch": 127.21, "learning_rate": 4.9887350484565895e-05, "loss": 0.9574, "step": 555000 }, { "epoch": 127.21, "eval_loss": 1.5570597648620605, "eval_runtime": 8.7134, "eval_samples_per_second": 538.593, "eval_steps_per_second": 67.367, "step": 555000 }, { "epoch": 127.32, "learning_rate": 4.988640017957892e-05, "loss": 0.9573, "step": 555500 }, { "epoch": 127.32, "eval_loss": 1.5981990098953247, "eval_runtime": 8.7171, "eval_samples_per_second": 538.367, "eval_steps_per_second": 67.339, "step": 555500 }, { "epoch": 127.44, "learning_rate": 4.9885445892178135e-05, "loss": 0.9671, "step": 556000 }, { "epoch": 127.44, "eval_loss": 1.5892741680145264, "eval_runtime": 8.7151, "eval_samples_per_second": 538.488, "eval_steps_per_second": 67.354, "step": 556000 }, { "epoch": 127.55, "learning_rate": 4.9884487622516264e-05, "loss": 0.9597, "step": 556500 }, { "epoch": 127.55, "eval_loss": 1.6065418720245361, "eval_runtime": 8.7257, "eval_samples_per_second": 537.836, "eval_steps_per_second": 67.273, "step": 556500 }, { "epoch": 127.66, "learning_rate": 4.988352537074665e-05, "loss": 0.9661, "step": 557000 }, { "epoch": 127.66, "eval_loss": 1.587260127067566, "eval_runtime": 8.7151, "eval_samples_per_second": 538.489, "eval_steps_per_second": 67.354, "step": 557000 }, { "epoch": 127.78, "learning_rate": 4.988255913702329e-05, "loss": 0.971, "step": 557500 }, { "epoch": 127.78, "eval_loss": 1.5688196420669556, "eval_runtime": 8.7142, "eval_samples_per_second": 538.549, "eval_steps_per_second": 67.362, "step": 557500 }, { "epoch": 127.89, "learning_rate": 4.988158892150078e-05, "loss": 0.9735, "step": 558000 }, { "epoch": 127.89, "eval_loss": 1.5799697637557983, "eval_runtime": 8.7268, "eval_samples_per_second": 537.771, "eval_steps_per_second": 67.264, "step": 558000 }, { "epoch": 128.01, "learning_rate": 4.988061472433439e-05, "loss": 0.9666, "step": 558500 }, { "epoch": 128.01, "eval_loss": 1.6030784845352173, "eval_runtime": 8.7148, "eval_samples_per_second": 538.512, "eval_steps_per_second": 67.357, "step": 558500 }, { "epoch": 128.12, "learning_rate": 4.987963654568001e-05, "loss": 0.9498, "step": 559000 }, { "epoch": 128.12, "eval_loss": 1.577072024345398, "eval_runtime": 8.7199, "eval_samples_per_second": 538.195, "eval_steps_per_second": 67.317, "step": 559000 }, { "epoch": 128.24, "learning_rate": 4.987865438569418e-05, "loss": 0.956, "step": 559500 }, { "epoch": 128.24, "eval_loss": 1.5621265172958374, "eval_runtime": 8.7161, "eval_samples_per_second": 538.432, "eval_steps_per_second": 67.347, "step": 559500 }, { "epoch": 128.35, "learning_rate": 4.987766824453406e-05, "loss": 0.9598, "step": 560000 }, { "epoch": 128.35, "eval_loss": 1.586021065711975, "eval_runtime": 8.7108, "eval_samples_per_second": 538.755, "eval_steps_per_second": 67.387, "step": 560000 }, { "epoch": 128.47, "learning_rate": 4.987667812235747e-05, "loss": 0.9649, "step": 560500 }, { "epoch": 128.47, "eval_loss": 1.5872442722320557, "eval_runtime": 8.7127, "eval_samples_per_second": 538.641, "eval_steps_per_second": 67.373, "step": 560500 }, { "epoch": 128.58, "learning_rate": 4.987568401932283e-05, "loss": 0.9614, "step": 561000 }, { "epoch": 128.58, "eval_loss": 1.5797219276428223, "eval_runtime": 8.7191, "eval_samples_per_second": 538.245, "eval_steps_per_second": 67.324, "step": 561000 }, { "epoch": 128.7, "learning_rate": 4.987468593558924e-05, "loss": 0.9627, "step": 561500 }, { "epoch": 128.7, "eval_loss": 1.5534429550170898, "eval_runtime": 8.7144, "eval_samples_per_second": 538.534, "eval_steps_per_second": 67.36, "step": 561500 }, { "epoch": 128.81, "learning_rate": 4.987368387131641e-05, "loss": 0.9625, "step": 562000 }, { "epoch": 128.81, "eval_loss": 1.5737413167953491, "eval_runtime": 8.7247, "eval_samples_per_second": 537.901, "eval_steps_per_second": 67.281, "step": 562000 }, { "epoch": 128.92, "learning_rate": 4.9872677826664696e-05, "loss": 0.9738, "step": 562500 }, { "epoch": 128.92, "eval_loss": 1.596679925918579, "eval_runtime": 8.7117, "eval_samples_per_second": 538.701, "eval_steps_per_second": 67.381, "step": 562500 }, { "epoch": 129.04, "learning_rate": 4.9871667801795074e-05, "loss": 0.9679, "step": 563000 }, { "epoch": 129.04, "eval_loss": 1.589643120765686, "eval_runtime": 8.718, "eval_samples_per_second": 538.311, "eval_steps_per_second": 67.332, "step": 563000 }, { "epoch": 129.15, "learning_rate": 4.98706537968692e-05, "loss": 0.9524, "step": 563500 }, { "epoch": 129.15, "eval_loss": 1.5840455293655396, "eval_runtime": 8.7115, "eval_samples_per_second": 538.714, "eval_steps_per_second": 67.382, "step": 563500 }, { "epoch": 129.27, "learning_rate": 4.986963581204932e-05, "loss": 0.9611, "step": 564000 }, { "epoch": 129.27, "eval_loss": 1.5896785259246826, "eval_runtime": 8.7137, "eval_samples_per_second": 538.58, "eval_steps_per_second": 67.366, "step": 564000 }, { "epoch": 129.38, "learning_rate": 4.9868613847498335e-05, "loss": 0.9559, "step": 564500 }, { "epoch": 129.38, "eval_loss": 1.5748287439346313, "eval_runtime": 8.7202, "eval_samples_per_second": 538.177, "eval_steps_per_second": 67.315, "step": 564500 }, { "epoch": 129.5, "learning_rate": 4.986758790337979e-05, "loss": 0.9655, "step": 565000 }, { "epoch": 129.5, "eval_loss": 1.6012758016586304, "eval_runtime": 8.7167, "eval_samples_per_second": 538.394, "eval_steps_per_second": 67.342, "step": 565000 }, { "epoch": 129.61, "learning_rate": 4.986655797985786e-05, "loss": 0.964, "step": 565500 }, { "epoch": 129.61, "eval_loss": 1.5855258703231812, "eval_runtime": 8.7082, "eval_samples_per_second": 538.919, "eval_steps_per_second": 67.408, "step": 565500 }, { "epoch": 129.73, "learning_rate": 4.986552407709736e-05, "loss": 0.958, "step": 566000 }, { "epoch": 129.73, "eval_loss": 1.5738874673843384, "eval_runtime": 8.7149, "eval_samples_per_second": 538.501, "eval_steps_per_second": 67.356, "step": 566000 }, { "epoch": 129.84, "learning_rate": 4.9864486195263725e-05, "loss": 0.9633, "step": 566500 }, { "epoch": 129.84, "eval_loss": 1.6002775430679321, "eval_runtime": 8.7184, "eval_samples_per_second": 538.286, "eval_steps_per_second": 67.329, "step": 566500 }, { "epoch": 129.96, "learning_rate": 4.986344433452306e-05, "loss": 0.9681, "step": 567000 }, { "epoch": 129.96, "eval_loss": 1.5943127870559692, "eval_runtime": 8.7189, "eval_samples_per_second": 538.255, "eval_steps_per_second": 67.325, "step": 567000 }, { "epoch": 130.07, "learning_rate": 4.986239849504207e-05, "loss": 0.9572, "step": 567500 }, { "epoch": 130.07, "eval_loss": 1.5804932117462158, "eval_runtime": 8.7229, "eval_samples_per_second": 538.008, "eval_steps_per_second": 67.294, "step": 567500 }, { "epoch": 130.19, "learning_rate": 4.986134867698812e-05, "loss": 0.9583, "step": 568000 }, { "epoch": 130.19, "eval_loss": 1.583470344543457, "eval_runtime": 8.7298, "eval_samples_per_second": 537.585, "eval_steps_per_second": 67.241, "step": 568000 }, { "epoch": 130.3, "learning_rate": 4.9860294880529215e-05, "loss": 0.9566, "step": 568500 }, { "epoch": 130.3, "eval_loss": 1.577149510383606, "eval_runtime": 8.7122, "eval_samples_per_second": 538.673, "eval_steps_per_second": 67.377, "step": 568500 }, { "epoch": 130.41, "learning_rate": 4.9859237105833975e-05, "loss": 0.957, "step": 569000 }, { "epoch": 130.41, "eval_loss": 1.5776249170303345, "eval_runtime": 8.7153, "eval_samples_per_second": 538.48, "eval_steps_per_second": 67.353, "step": 569000 }, { "epoch": 130.53, "learning_rate": 4.985817535307168e-05, "loss": 0.9555, "step": 569500 }, { "epoch": 130.53, "eval_loss": 1.6034481525421143, "eval_runtime": 8.7106, "eval_samples_per_second": 538.769, "eval_steps_per_second": 67.389, "step": 569500 }, { "epoch": 130.64, "learning_rate": 4.985710962241222e-05, "loss": 0.9578, "step": 570000 }, { "epoch": 130.64, "eval_loss": 1.5891478061676025, "eval_runtime": 8.7217, "eval_samples_per_second": 538.085, "eval_steps_per_second": 67.304, "step": 570000 }, { "epoch": 130.76, "learning_rate": 4.985603991402615e-05, "loss": 0.9618, "step": 570500 }, { "epoch": 130.76, "eval_loss": 1.598665475845337, "eval_runtime": 8.7249, "eval_samples_per_second": 537.884, "eval_steps_per_second": 67.278, "step": 570500 }, { "epoch": 130.87, "learning_rate": 4.985496622808465e-05, "loss": 0.9669, "step": 571000 }, { "epoch": 130.87, "eval_loss": 1.5918686389923096, "eval_runtime": 8.7185, "eval_samples_per_second": 538.283, "eval_steps_per_second": 67.328, "step": 571000 }, { "epoch": 130.99, "learning_rate": 4.985388856475953e-05, "loss": 0.9591, "step": 571500 }, { "epoch": 130.99, "eval_loss": 1.5636067390441895, "eval_runtime": 8.7109, "eval_samples_per_second": 538.752, "eval_steps_per_second": 67.387, "step": 571500 }, { "epoch": 131.1, "learning_rate": 4.9852806924223244e-05, "loss": 0.9584, "step": 572000 }, { "epoch": 131.1, "eval_loss": 1.589758038520813, "eval_runtime": 8.7151, "eval_samples_per_second": 538.49, "eval_steps_per_second": 67.354, "step": 572000 }, { "epoch": 131.22, "learning_rate": 4.985172130664887e-05, "loss": 0.952, "step": 572500 }, { "epoch": 131.22, "eval_loss": 1.5993070602416992, "eval_runtime": 8.7278, "eval_samples_per_second": 537.704, "eval_steps_per_second": 67.256, "step": 572500 }, { "epoch": 131.33, "learning_rate": 4.985063171221015e-05, "loss": 0.9565, "step": 573000 }, { "epoch": 131.33, "eval_loss": 1.5575594902038574, "eval_runtime": 8.72, "eval_samples_per_second": 538.188, "eval_steps_per_second": 67.316, "step": 573000 }, { "epoch": 131.45, "learning_rate": 4.984953814108143e-05, "loss": 0.9574, "step": 573500 }, { "epoch": 131.45, "eval_loss": 1.572701096534729, "eval_runtime": 8.7217, "eval_samples_per_second": 538.083, "eval_steps_per_second": 67.303, "step": 573500 }, { "epoch": 131.56, "learning_rate": 4.984844059343772e-05, "loss": 0.9569, "step": 574000 }, { "epoch": 131.56, "eval_loss": 1.6005223989486694, "eval_runtime": 8.714, "eval_samples_per_second": 538.56, "eval_steps_per_second": 67.363, "step": 574000 }, { "epoch": 131.68, "learning_rate": 4.9847339069454635e-05, "loss": 0.9546, "step": 574500 }, { "epoch": 131.68, "eval_loss": 1.5965909957885742, "eval_runtime": 8.7122, "eval_samples_per_second": 538.668, "eval_steps_per_second": 67.377, "step": 574500 }, { "epoch": 131.79, "learning_rate": 4.984623356930846e-05, "loss": 0.9636, "step": 575000 }, { "epoch": 131.79, "eval_loss": 1.5900416374206543, "eval_runtime": 8.7174, "eval_samples_per_second": 538.351, "eval_steps_per_second": 67.337, "step": 575000 }, { "epoch": 131.9, "learning_rate": 4.984512409317611e-05, "loss": 0.9715, "step": 575500 }, { "epoch": 131.9, "eval_loss": 1.5901392698287964, "eval_runtime": 8.7135, "eval_samples_per_second": 538.591, "eval_steps_per_second": 67.367, "step": 575500 }, { "epoch": 132.02, "learning_rate": 4.9844010641235105e-05, "loss": 0.9655, "step": 576000 }, { "epoch": 132.02, "eval_loss": 1.5802009105682373, "eval_runtime": 8.7166, "eval_samples_per_second": 538.401, "eval_steps_per_second": 67.343, "step": 576000 }, { "epoch": 132.13, "learning_rate": 4.984289321366363e-05, "loss": 0.9514, "step": 576500 }, { "epoch": 132.13, "eval_loss": 1.5973347425460815, "eval_runtime": 8.7143, "eval_samples_per_second": 538.538, "eval_steps_per_second": 67.36, "step": 576500 }, { "epoch": 132.25, "learning_rate": 4.984177181064052e-05, "loss": 0.9579, "step": 577000 }, { "epoch": 132.25, "eval_loss": 1.5770050287246704, "eval_runtime": 8.7212, "eval_samples_per_second": 538.112, "eval_steps_per_second": 67.307, "step": 577000 }, { "epoch": 132.36, "learning_rate": 4.98406464323452e-05, "loss": 0.9552, "step": 577500 }, { "epoch": 132.36, "eval_loss": 1.5952038764953613, "eval_runtime": 8.7167, "eval_samples_per_second": 538.394, "eval_steps_per_second": 67.342, "step": 577500 }, { "epoch": 132.48, "learning_rate": 4.9839517078957764e-05, "loss": 0.9547, "step": 578000 }, { "epoch": 132.48, "eval_loss": 1.5967435836791992, "eval_runtime": 8.719, "eval_samples_per_second": 538.25, "eval_steps_per_second": 67.324, "step": 578000 }, { "epoch": 132.59, "learning_rate": 4.983838375065894e-05, "loss": 0.9661, "step": 578500 }, { "epoch": 132.59, "eval_loss": 1.603395938873291, "eval_runtime": 8.714, "eval_samples_per_second": 538.556, "eval_steps_per_second": 67.363, "step": 578500 }, { "epoch": 132.71, "learning_rate": 4.9837246447630096e-05, "loss": 0.9616, "step": 579000 }, { "epoch": 132.71, "eval_loss": 1.579464316368103, "eval_runtime": 8.7135, "eval_samples_per_second": 538.587, "eval_steps_per_second": 67.366, "step": 579000 }, { "epoch": 132.82, "learning_rate": 4.98361051700532e-05, "loss": 0.9659, "step": 579500 }, { "epoch": 132.82, "eval_loss": 1.598476767539978, "eval_runtime": 8.7061, "eval_samples_per_second": 539.044, "eval_steps_per_second": 67.424, "step": 579500 }, { "epoch": 132.94, "learning_rate": 4.983495991811091e-05, "loss": 0.9594, "step": 580000 }, { "epoch": 132.94, "eval_loss": 1.5915627479553223, "eval_runtime": 8.7207, "eval_samples_per_second": 538.143, "eval_steps_per_second": 67.311, "step": 580000 }, { "epoch": 133.05, "learning_rate": 4.9833810691986485e-05, "loss": 0.9596, "step": 580500 }, { "epoch": 133.05, "eval_loss": 1.5905718803405762, "eval_runtime": 8.7123, "eval_samples_per_second": 538.662, "eval_steps_per_second": 67.376, "step": 580500 }, { "epoch": 133.17, "learning_rate": 4.983265749186383e-05, "loss": 0.9523, "step": 581000 }, { "epoch": 133.17, "eval_loss": 1.571536660194397, "eval_runtime": 8.7096, "eval_samples_per_second": 538.83, "eval_steps_per_second": 67.397, "step": 581000 }, { "epoch": 133.28, "learning_rate": 4.983150031792748e-05, "loss": 0.9526, "step": 581500 }, { "epoch": 133.28, "eval_loss": 1.5832419395446777, "eval_runtime": 8.7058, "eval_samples_per_second": 539.065, "eval_steps_per_second": 67.426, "step": 581500 }, { "epoch": 133.39, "learning_rate": 4.983033917036261e-05, "loss": 0.9548, "step": 582000 }, { "epoch": 133.39, "eval_loss": 1.6010390520095825, "eval_runtime": 8.7095, "eval_samples_per_second": 538.84, "eval_steps_per_second": 67.398, "step": 582000 }, { "epoch": 133.51, "learning_rate": 4.9829174049355034e-05, "loss": 0.9581, "step": 582500 }, { "epoch": 133.51, "eval_loss": 1.6002748012542725, "eval_runtime": 8.7062, "eval_samples_per_second": 539.043, "eval_steps_per_second": 67.423, "step": 582500 }, { "epoch": 133.62, "learning_rate": 4.98280049550912e-05, "loss": 0.95, "step": 583000 }, { "epoch": 133.62, "eval_loss": 1.5752708911895752, "eval_runtime": 8.7208, "eval_samples_per_second": 538.139, "eval_steps_per_second": 67.31, "step": 583000 }, { "epoch": 133.74, "learning_rate": 4.982683188775819e-05, "loss": 0.9594, "step": 583500 }, { "epoch": 133.74, "eval_loss": 1.6021337509155273, "eval_runtime": 8.7165, "eval_samples_per_second": 538.402, "eval_steps_per_second": 67.343, "step": 583500 }, { "epoch": 133.85, "learning_rate": 4.982565484754372e-05, "loss": 0.9599, "step": 584000 }, { "epoch": 133.85, "eval_loss": 1.5938174724578857, "eval_runtime": 8.7146, "eval_samples_per_second": 538.523, "eval_steps_per_second": 67.358, "step": 584000 }, { "epoch": 133.97, "learning_rate": 4.982447383463615e-05, "loss": 0.964, "step": 584500 }, { "epoch": 133.97, "eval_loss": 1.599424958229065, "eval_runtime": 8.714, "eval_samples_per_second": 538.557, "eval_steps_per_second": 67.363, "step": 584500 }, { "epoch": 134.08, "learning_rate": 4.982328884922446e-05, "loss": 0.9493, "step": 585000 }, { "epoch": 134.08, "eval_loss": 1.611722707748413, "eval_runtime": 8.7289, "eval_samples_per_second": 537.642, "eval_steps_per_second": 67.248, "step": 585000 }, { "epoch": 134.2, "learning_rate": 4.982209989149828e-05, "loss": 0.9435, "step": 585500 }, { "epoch": 134.2, "eval_loss": 1.6087771654129028, "eval_runtime": 8.7202, "eval_samples_per_second": 538.174, "eval_steps_per_second": 67.315, "step": 585500 }, { "epoch": 134.31, "learning_rate": 4.982090696164788e-05, "loss": 0.953, "step": 586000 }, { "epoch": 134.31, "eval_loss": 1.5962518453598022, "eval_runtime": 8.7093, "eval_samples_per_second": 538.849, "eval_steps_per_second": 67.399, "step": 586000 }, { "epoch": 134.43, "learning_rate": 4.9819710059864156e-05, "loss": 0.9574, "step": 586500 }, { "epoch": 134.43, "eval_loss": 1.5931276082992554, "eval_runtime": 8.7098, "eval_samples_per_second": 538.818, "eval_steps_per_second": 67.395, "step": 586500 }, { "epoch": 134.54, "learning_rate": 4.9818509186338624e-05, "loss": 0.9594, "step": 587000 }, { "epoch": 134.54, "eval_loss": 1.5964399576187134, "eval_runtime": 8.7214, "eval_samples_per_second": 538.104, "eval_steps_per_second": 67.306, "step": 587000 }, { "epoch": 134.65, "learning_rate": 4.981730434126347e-05, "loss": 0.9584, "step": 587500 }, { "epoch": 134.65, "eval_loss": 1.5888984203338623, "eval_runtime": 8.7114, "eval_samples_per_second": 538.72, "eval_steps_per_second": 67.383, "step": 587500 }, { "epoch": 134.77, "learning_rate": 4.9816095524831486e-05, "loss": 0.962, "step": 588000 }, { "epoch": 134.77, "eval_loss": 1.5964561700820923, "eval_runtime": 8.7167, "eval_samples_per_second": 538.392, "eval_steps_per_second": 67.342, "step": 588000 }, { "epoch": 134.88, "learning_rate": 4.981488273723611e-05, "loss": 0.9584, "step": 588500 }, { "epoch": 134.88, "eval_loss": 1.6054260730743408, "eval_runtime": 8.7128, "eval_samples_per_second": 538.631, "eval_steps_per_second": 67.372, "step": 588500 }, { "epoch": 135.0, "learning_rate": 4.981366597867143e-05, "loss": 0.9646, "step": 589000 }, { "epoch": 135.0, "eval_loss": 1.5768553018569946, "eval_runtime": 8.7159, "eval_samples_per_second": 538.44, "eval_steps_per_second": 67.348, "step": 589000 }, { "epoch": 135.11, "learning_rate": 4.981244524933216e-05, "loss": 0.9455, "step": 589500 }, { "epoch": 135.11, "eval_loss": 1.5877586603164673, "eval_runtime": 8.7207, "eval_samples_per_second": 538.142, "eval_steps_per_second": 67.311, "step": 589500 }, { "epoch": 135.23, "learning_rate": 4.981122054941362e-05, "loss": 0.9498, "step": 590000 }, { "epoch": 135.23, "eval_loss": 1.611443042755127, "eval_runtime": 8.7206, "eval_samples_per_second": 538.154, "eval_steps_per_second": 67.312, "step": 590000 }, { "epoch": 135.34, "learning_rate": 4.9809991879111814e-05, "loss": 0.955, "step": 590500 }, { "epoch": 135.34, "eval_loss": 1.603989839553833, "eval_runtime": 8.7149, "eval_samples_per_second": 538.506, "eval_steps_per_second": 67.356, "step": 590500 }, { "epoch": 135.46, "learning_rate": 4.980875923862335e-05, "loss": 0.9534, "step": 591000 }, { "epoch": 135.46, "eval_loss": 1.5960862636566162, "eval_runtime": 8.7123, "eval_samples_per_second": 538.666, "eval_steps_per_second": 67.376, "step": 591000 }, { "epoch": 135.57, "learning_rate": 4.980752262814548e-05, "loss": 0.9625, "step": 591500 }, { "epoch": 135.57, "eval_loss": 1.5881584882736206, "eval_runtime": 8.7094, "eval_samples_per_second": 538.842, "eval_steps_per_second": 67.398, "step": 591500 }, { "epoch": 135.69, "learning_rate": 4.9806282047876086e-05, "loss": 0.963, "step": 592000 }, { "epoch": 135.69, "eval_loss": 1.6013166904449463, "eval_runtime": 8.7171, "eval_samples_per_second": 538.368, "eval_steps_per_second": 67.339, "step": 592000 }, { "epoch": 135.8, "learning_rate": 4.9805037498013695e-05, "loss": 0.9521, "step": 592500 }, { "epoch": 135.8, "eval_loss": 1.6012709140777588, "eval_runtime": 8.7143, "eval_samples_per_second": 538.539, "eval_steps_per_second": 67.36, "step": 592500 }, { "epoch": 135.92, "learning_rate": 4.980378897875747e-05, "loss": 0.9618, "step": 593000 }, { "epoch": 135.92, "eval_loss": 1.577525019645691, "eval_runtime": 8.7176, "eval_samples_per_second": 538.335, "eval_steps_per_second": 67.335, "step": 593000 }, { "epoch": 136.03, "learning_rate": 4.980253649030719e-05, "loss": 0.9534, "step": 593500 }, { "epoch": 136.03, "eval_loss": 1.5867111682891846, "eval_runtime": 8.7076, "eval_samples_per_second": 538.953, "eval_steps_per_second": 67.412, "step": 593500 }, { "epoch": 136.14, "learning_rate": 4.98012800328633e-05, "loss": 0.9448, "step": 594000 }, { "epoch": 136.14, "eval_loss": 1.6136435270309448, "eval_runtime": 8.7182, "eval_samples_per_second": 538.299, "eval_steps_per_second": 67.33, "step": 594000 }, { "epoch": 136.26, "learning_rate": 4.9800019606626846e-05, "loss": 0.9465, "step": 594500 }, { "epoch": 136.26, "eval_loss": 1.5919603109359741, "eval_runtime": 8.7129, "eval_samples_per_second": 538.627, "eval_steps_per_second": 67.371, "step": 594500 }, { "epoch": 136.37, "learning_rate": 4.9798755211799543e-05, "loss": 0.9576, "step": 595000 }, { "epoch": 136.37, "eval_loss": 1.6014344692230225, "eval_runtime": 8.7214, "eval_samples_per_second": 538.102, "eval_steps_per_second": 67.306, "step": 595000 }, { "epoch": 136.49, "learning_rate": 4.979748684858371e-05, "loss": 0.9562, "step": 595500 }, { "epoch": 136.49, "eval_loss": 1.6298828125, "eval_runtime": 8.7079, "eval_samples_per_second": 538.937, "eval_steps_per_second": 67.41, "step": 595500 }, { "epoch": 136.6, "learning_rate": 4.979621451718232e-05, "loss": 0.9605, "step": 596000 }, { "epoch": 136.6, "eval_loss": 1.5847876071929932, "eval_runtime": 8.7168, "eval_samples_per_second": 538.385, "eval_steps_per_second": 67.341, "step": 596000 }, { "epoch": 136.72, "learning_rate": 4.979493821779898e-05, "loss": 0.9565, "step": 596500 }, { "epoch": 136.72, "eval_loss": 1.618851661682129, "eval_runtime": 8.7122, "eval_samples_per_second": 538.668, "eval_steps_per_second": 67.376, "step": 596500 }, { "epoch": 136.83, "learning_rate": 4.9793657950637924e-05, "loss": 0.9557, "step": 597000 }, { "epoch": 136.83, "eval_loss": 1.6020821332931519, "eval_runtime": 8.7215, "eval_samples_per_second": 538.093, "eval_steps_per_second": 67.305, "step": 597000 }, { "epoch": 136.95, "learning_rate": 4.979237371590403e-05, "loss": 0.9549, "step": 597500 }, { "epoch": 136.95, "eval_loss": 1.5964008569717407, "eval_runtime": 8.7227, "eval_samples_per_second": 538.024, "eval_steps_per_second": 67.296, "step": 597500 }, { "epoch": 137.06, "learning_rate": 4.979108551380279e-05, "loss": 0.9478, "step": 598000 }, { "epoch": 137.06, "eval_loss": 1.6279385089874268, "eval_runtime": 8.7169, "eval_samples_per_second": 538.377, "eval_steps_per_second": 67.34, "step": 598000 }, { "epoch": 137.18, "learning_rate": 4.978979334454037e-05, "loss": 0.9429, "step": 598500 }, { "epoch": 137.18, "eval_loss": 1.5941599607467651, "eval_runtime": 8.7126, "eval_samples_per_second": 538.645, "eval_steps_per_second": 67.374, "step": 598500 }, { "epoch": 137.29, "learning_rate": 4.978849720832353e-05, "loss": 0.9509, "step": 599000 }, { "epoch": 137.29, "eval_loss": 1.6102044582366943, "eval_runtime": 8.717, "eval_samples_per_second": 538.371, "eval_steps_per_second": 67.339, "step": 599000 }, { "epoch": 137.41, "learning_rate": 4.978719710535969e-05, "loss": 0.9478, "step": 599500 }, { "epoch": 137.41, "eval_loss": 1.601762294769287, "eval_runtime": 8.7251, "eval_samples_per_second": 537.872, "eval_steps_per_second": 67.277, "step": 599500 }, { "epoch": 137.52, "learning_rate": 4.97858930358569e-05, "loss": 0.9553, "step": 600000 }, { "epoch": 137.52, "eval_loss": 1.5944279432296753, "eval_runtime": 8.716, "eval_samples_per_second": 538.435, "eval_steps_per_second": 67.347, "step": 600000 }, { "epoch": 137.63, "learning_rate": 4.978458500002383e-05, "loss": 0.9517, "step": 600500 }, { "epoch": 137.63, "eval_loss": 1.6207187175750732, "eval_runtime": 8.7139, "eval_samples_per_second": 538.565, "eval_steps_per_second": 67.364, "step": 600500 }, { "epoch": 137.75, "learning_rate": 4.9783272998069816e-05, "loss": 0.9582, "step": 601000 }, { "epoch": 137.75, "eval_loss": 1.5983983278274536, "eval_runtime": 8.7185, "eval_samples_per_second": 538.283, "eval_steps_per_second": 67.328, "step": 601000 }, { "epoch": 137.86, "learning_rate": 4.9781957030204796e-05, "loss": 0.9591, "step": 601500 }, { "epoch": 137.86, "eval_loss": 1.5778659582138062, "eval_runtime": 8.7154, "eval_samples_per_second": 538.473, "eval_steps_per_second": 67.352, "step": 601500 }, { "epoch": 137.98, "learning_rate": 4.978063709663935e-05, "loss": 0.9479, "step": 602000 }, { "epoch": 137.98, "eval_loss": 1.6114885807037354, "eval_runtime": 8.7286, "eval_samples_per_second": 537.659, "eval_steps_per_second": 67.25, "step": 602000 }, { "epoch": 138.09, "learning_rate": 4.9779313197584714e-05, "loss": 0.9406, "step": 602500 }, { "epoch": 138.09, "eval_loss": 1.587985873222351, "eval_runtime": 8.7227, "eval_samples_per_second": 538.022, "eval_steps_per_second": 67.296, "step": 602500 }, { "epoch": 138.21, "learning_rate": 4.9777985333252735e-05, "loss": 0.9437, "step": 603000 }, { "epoch": 138.21, "eval_loss": 1.5888187885284424, "eval_runtime": 8.7227, "eval_samples_per_second": 538.023, "eval_steps_per_second": 67.296, "step": 603000 }, { "epoch": 138.32, "learning_rate": 4.977665350385591e-05, "loss": 0.9475, "step": 603500 }, { "epoch": 138.32, "eval_loss": 1.6023832559585571, "eval_runtime": 8.7179, "eval_samples_per_second": 538.318, "eval_steps_per_second": 67.333, "step": 603500 }, { "epoch": 138.44, "learning_rate": 4.977531770960735e-05, "loss": 0.9487, "step": 604000 }, { "epoch": 138.44, "eval_loss": 1.6016651391983032, "eval_runtime": 8.721, "eval_samples_per_second": 538.124, "eval_steps_per_second": 67.309, "step": 604000 }, { "epoch": 138.55, "learning_rate": 4.977397795072082e-05, "loss": 0.9481, "step": 604500 }, { "epoch": 138.55, "eval_loss": 1.5900052785873413, "eval_runtime": 8.7185, "eval_samples_per_second": 538.282, "eval_steps_per_second": 67.328, "step": 604500 }, { "epoch": 138.67, "learning_rate": 4.977263422741072e-05, "loss": 0.9498, "step": 605000 }, { "epoch": 138.67, "eval_loss": 1.5994865894317627, "eval_runtime": 8.7152, "eval_samples_per_second": 538.484, "eval_steps_per_second": 67.354, "step": 605000 }, { "epoch": 138.78, "learning_rate": 4.977128653989206e-05, "loss": 0.9662, "step": 605500 }, { "epoch": 138.78, "eval_loss": 1.6100822687149048, "eval_runtime": 8.7117, "eval_samples_per_second": 538.7, "eval_steps_per_second": 67.38, "step": 605500 }, { "epoch": 138.9, "learning_rate": 4.9769934888380534e-05, "loss": 0.9513, "step": 606000 }, { "epoch": 138.9, "eval_loss": 1.6238332986831665, "eval_runtime": 8.7252, "eval_samples_per_second": 537.868, "eval_steps_per_second": 67.276, "step": 606000 }, { "epoch": 139.01, "learning_rate": 4.97685792730924e-05, "loss": 0.9601, "step": 606500 }, { "epoch": 139.01, "eval_loss": 1.613181710243225, "eval_runtime": 8.7225, "eval_samples_per_second": 538.031, "eval_steps_per_second": 67.297, "step": 606500 }, { "epoch": 139.12, "learning_rate": 4.9767219694244614e-05, "loss": 0.949, "step": 607000 }, { "epoch": 139.12, "eval_loss": 1.597444772720337, "eval_runtime": 8.7188, "eval_samples_per_second": 538.261, "eval_steps_per_second": 67.326, "step": 607000 }, { "epoch": 139.24, "learning_rate": 4.976585615205474e-05, "loss": 0.9392, "step": 607500 }, { "epoch": 139.24, "eval_loss": 1.6170754432678223, "eval_runtime": 8.713, "eval_samples_per_second": 538.622, "eval_steps_per_second": 67.371, "step": 607500 }, { "epoch": 139.35, "learning_rate": 4.9764488646740956e-05, "loss": 0.9413, "step": 608000 }, { "epoch": 139.35, "eval_loss": 1.6143786907196045, "eval_runtime": 8.7266, "eval_samples_per_second": 537.779, "eval_steps_per_second": 67.265, "step": 608000 }, { "epoch": 139.47, "learning_rate": 4.976311717852212e-05, "loss": 0.95, "step": 608500 }, { "epoch": 139.47, "eval_loss": 1.599024772644043, "eval_runtime": 8.7187, "eval_samples_per_second": 538.269, "eval_steps_per_second": 67.327, "step": 608500 }, { "epoch": 139.58, "learning_rate": 4.976174174761769e-05, "loss": 0.9469, "step": 609000 }, { "epoch": 139.58, "eval_loss": 1.6164287328720093, "eval_runtime": 8.7146, "eval_samples_per_second": 538.519, "eval_steps_per_second": 67.358, "step": 609000 }, { "epoch": 139.7, "learning_rate": 4.976036235424776e-05, "loss": 0.9543, "step": 609500 }, { "epoch": 139.7, "eval_loss": 1.5998306274414062, "eval_runtime": 8.7155, "eval_samples_per_second": 538.464, "eval_steps_per_second": 67.351, "step": 609500 }, { "epoch": 139.81, "learning_rate": 4.975897899863308e-05, "loss": 0.9473, "step": 610000 }, { "epoch": 139.81, "eval_loss": 1.6202964782714844, "eval_runtime": 8.7199, "eval_samples_per_second": 538.192, "eval_steps_per_second": 67.317, "step": 610000 }, { "epoch": 139.93, "learning_rate": 4.9757591680994996e-05, "loss": 0.9509, "step": 610500 }, { "epoch": 139.93, "eval_loss": 1.6065092086791992, "eval_runtime": 8.7118, "eval_samples_per_second": 538.693, "eval_steps_per_second": 67.38, "step": 610500 }, { "epoch": 140.04, "learning_rate": 4.975620040155554e-05, "loss": 0.9454, "step": 611000 }, { "epoch": 140.04, "eval_loss": 1.5958362817764282, "eval_runtime": 8.7215, "eval_samples_per_second": 538.093, "eval_steps_per_second": 67.305, "step": 611000 }, { "epoch": 140.16, "learning_rate": 4.975480516053734e-05, "loss": 0.9453, "step": 611500 }, { "epoch": 140.16, "eval_loss": 1.586168646812439, "eval_runtime": 8.7203, "eval_samples_per_second": 538.168, "eval_steps_per_second": 67.314, "step": 611500 }, { "epoch": 140.27, "learning_rate": 4.975340595816366e-05, "loss": 0.9417, "step": 612000 }, { "epoch": 140.27, "eval_loss": 1.5931336879730225, "eval_runtime": 8.7199, "eval_samples_per_second": 538.195, "eval_steps_per_second": 67.317, "step": 612000 }, { "epoch": 140.39, "learning_rate": 4.975200279465841e-05, "loss": 0.9435, "step": 612500 }, { "epoch": 140.39, "eval_loss": 1.6120139360427856, "eval_runtime": 8.7126, "eval_samples_per_second": 538.643, "eval_steps_per_second": 67.373, "step": 612500 }, { "epoch": 140.5, "learning_rate": 4.975059567024614e-05, "loss": 0.9453, "step": 613000 }, { "epoch": 140.5, "eval_loss": 1.601463794708252, "eval_runtime": 8.7181, "eval_samples_per_second": 538.307, "eval_steps_per_second": 67.331, "step": 613000 }, { "epoch": 140.61, "learning_rate": 4.9749184585152e-05, "loss": 0.9522, "step": 613500 }, { "epoch": 140.61, "eval_loss": 1.609106183052063, "eval_runtime": 8.7099, "eval_samples_per_second": 538.815, "eval_steps_per_second": 67.395, "step": 613500 }, { "epoch": 140.73, "learning_rate": 4.974776953960182e-05, "loss": 0.9493, "step": 614000 }, { "epoch": 140.73, "eval_loss": 1.606985330581665, "eval_runtime": 8.7253, "eval_samples_per_second": 537.859, "eval_steps_per_second": 67.275, "step": 614000 }, { "epoch": 140.84, "learning_rate": 4.974635053382203e-05, "loss": 0.9536, "step": 614500 }, { "epoch": 140.84, "eval_loss": 1.5811123847961426, "eval_runtime": 8.723, "eval_samples_per_second": 538.003, "eval_steps_per_second": 67.293, "step": 614500 }, { "epoch": 140.96, "learning_rate": 4.97449275680397e-05, "loss": 0.9538, "step": 615000 }, { "epoch": 140.96, "eval_loss": 1.6004606485366821, "eval_runtime": 8.715, "eval_samples_per_second": 538.497, "eval_steps_per_second": 67.355, "step": 615000 }, { "epoch": 141.07, "learning_rate": 4.974350064248254e-05, "loss": 0.9439, "step": 615500 }, { "epoch": 141.07, "eval_loss": 1.6070436239242554, "eval_runtime": 8.7244, "eval_samples_per_second": 537.917, "eval_steps_per_second": 67.283, "step": 615500 }, { "epoch": 141.19, "learning_rate": 4.974206975737891e-05, "loss": 0.9409, "step": 616000 }, { "epoch": 141.19, "eval_loss": 1.6259182691574097, "eval_runtime": 8.7324, "eval_samples_per_second": 537.425, "eval_steps_per_second": 67.221, "step": 616000 }, { "epoch": 141.3, "learning_rate": 4.974063491295776e-05, "loss": 0.9449, "step": 616500 }, { "epoch": 141.3, "eval_loss": 1.607118010520935, "eval_runtime": 8.713, "eval_samples_per_second": 538.619, "eval_steps_per_second": 67.37, "step": 616500 }, { "epoch": 141.42, "learning_rate": 4.9739196109448714e-05, "loss": 0.9393, "step": 617000 }, { "epoch": 141.42, "eval_loss": 1.621123194694519, "eval_runtime": 8.716, "eval_samples_per_second": 538.433, "eval_steps_per_second": 67.347, "step": 617000 }, { "epoch": 141.53, "learning_rate": 4.973775334708202e-05, "loss": 0.9447, "step": 617500 }, { "epoch": 141.53, "eval_loss": 1.608993411064148, "eval_runtime": 8.7183, "eval_samples_per_second": 538.292, "eval_steps_per_second": 67.329, "step": 617500 }, { "epoch": 141.65, "learning_rate": 4.973630662608853e-05, "loss": 0.9431, "step": 618000 }, { "epoch": 141.65, "eval_loss": 1.602254033088684, "eval_runtime": 8.7209, "eval_samples_per_second": 538.132, "eval_steps_per_second": 67.309, "step": 618000 }, { "epoch": 141.76, "learning_rate": 4.973485594669978e-05, "loss": 0.9458, "step": 618500 }, { "epoch": 141.76, "eval_loss": 1.6190001964569092, "eval_runtime": 8.7131, "eval_samples_per_second": 538.617, "eval_steps_per_second": 67.37, "step": 618500 }, { "epoch": 141.87, "learning_rate": 4.97334013091479e-05, "loss": 0.9549, "step": 619000 }, { "epoch": 141.87, "eval_loss": 1.6142926216125488, "eval_runtime": 8.7211, "eval_samples_per_second": 538.122, "eval_steps_per_second": 67.308, "step": 619000 }, { "epoch": 141.99, "learning_rate": 4.9731942713665664e-05, "loss": 0.9563, "step": 619500 }, { "epoch": 141.99, "eval_loss": 1.5722126960754395, "eval_runtime": 8.7156, "eval_samples_per_second": 538.463, "eval_steps_per_second": 67.351, "step": 619500 }, { "epoch": 142.1, "learning_rate": 4.9730480160486485e-05, "loss": 0.9359, "step": 620000 }, { "epoch": 142.1, "eval_loss": 1.606583595275879, "eval_runtime": 8.7195, "eval_samples_per_second": 538.218, "eval_steps_per_second": 67.32, "step": 620000 }, { "epoch": 142.22, "learning_rate": 4.9729013649844416e-05, "loss": 0.9399, "step": 620500 }, { "epoch": 142.22, "eval_loss": 1.6185696125030518, "eval_runtime": 8.7109, "eval_samples_per_second": 538.748, "eval_steps_per_second": 67.387, "step": 620500 }, { "epoch": 142.33, "learning_rate": 4.972754318197412e-05, "loss": 0.9394, "step": 621000 }, { "epoch": 142.33, "eval_loss": 1.6175605058670044, "eval_runtime": 8.7135, "eval_samples_per_second": 538.588, "eval_steps_per_second": 67.367, "step": 621000 }, { "epoch": 142.45, "learning_rate": 4.972606875711091e-05, "loss": 0.9449, "step": 621500 }, { "epoch": 142.45, "eval_loss": 1.61361563205719, "eval_runtime": 8.7095, "eval_samples_per_second": 538.837, "eval_steps_per_second": 67.398, "step": 621500 }, { "epoch": 142.56, "learning_rate": 4.972459037549073e-05, "loss": 0.943, "step": 622000 }, { "epoch": 142.56, "eval_loss": 1.602128028869629, "eval_runtime": 8.7285, "eval_samples_per_second": 537.664, "eval_steps_per_second": 67.251, "step": 622000 }, { "epoch": 142.68, "learning_rate": 4.972310803735016e-05, "loss": 0.9524, "step": 622500 }, { "epoch": 142.68, "eval_loss": 1.6195306777954102, "eval_runtime": 8.7183, "eval_samples_per_second": 538.296, "eval_steps_per_second": 67.33, "step": 622500 }, { "epoch": 142.79, "learning_rate": 4.9721621742926404e-05, "loss": 0.9505, "step": 623000 }, { "epoch": 142.79, "eval_loss": 1.6222833395004272, "eval_runtime": 8.7132, "eval_samples_per_second": 538.606, "eval_steps_per_second": 67.369, "step": 623000 }, { "epoch": 142.91, "learning_rate": 4.972013149245731e-05, "loss": 0.9568, "step": 623500 }, { "epoch": 142.91, "eval_loss": 1.6125056743621826, "eval_runtime": 8.7123, "eval_samples_per_second": 538.663, "eval_steps_per_second": 67.376, "step": 623500 }, { "epoch": 143.02, "learning_rate": 4.971863728618135e-05, "loss": 0.9553, "step": 624000 }, { "epoch": 143.02, "eval_loss": 1.5948989391326904, "eval_runtime": 8.7201, "eval_samples_per_second": 538.182, "eval_steps_per_second": 67.316, "step": 624000 }, { "epoch": 143.14, "learning_rate": 4.971713912433763e-05, "loss": 0.9372, "step": 624500 }, { "epoch": 143.14, "eval_loss": 1.6161584854125977, "eval_runtime": 8.7092, "eval_samples_per_second": 538.858, "eval_steps_per_second": 67.4, "step": 624500 }, { "epoch": 143.25, "learning_rate": 4.9715637007165895e-05, "loss": 0.9444, "step": 625000 }, { "epoch": 143.25, "eval_loss": 1.5985424518585205, "eval_runtime": 8.7185, "eval_samples_per_second": 538.281, "eval_steps_per_second": 67.328, "step": 625000 }, { "epoch": 143.36, "learning_rate": 4.971413093490651e-05, "loss": 0.9412, "step": 625500 }, { "epoch": 143.36, "eval_loss": 1.6205215454101562, "eval_runtime": 8.7155, "eval_samples_per_second": 538.468, "eval_steps_per_second": 67.352, "step": 625500 }, { "epoch": 143.48, "learning_rate": 4.97126209078005e-05, "loss": 0.9445, "step": 626000 }, { "epoch": 143.48, "eval_loss": 1.6078782081604004, "eval_runtime": 8.7152, "eval_samples_per_second": 538.483, "eval_steps_per_second": 67.353, "step": 626000 }, { "epoch": 143.59, "learning_rate": 4.971110692608949e-05, "loss": 0.9418, "step": 626500 }, { "epoch": 143.59, "eval_loss": 1.6031417846679688, "eval_runtime": 8.7214, "eval_samples_per_second": 538.099, "eval_steps_per_second": 67.305, "step": 626500 }, { "epoch": 143.71, "learning_rate": 4.970958899001576e-05, "loss": 0.9519, "step": 627000 }, { "epoch": 143.71, "eval_loss": 1.6069272756576538, "eval_runtime": 8.7241, "eval_samples_per_second": 537.938, "eval_steps_per_second": 67.285, "step": 627000 }, { "epoch": 143.82, "learning_rate": 4.970806709982221e-05, "loss": 0.9462, "step": 627500 }, { "epoch": 143.82, "eval_loss": 1.5857080221176147, "eval_runtime": 8.7139, "eval_samples_per_second": 538.567, "eval_steps_per_second": 67.364, "step": 627500 }, { "epoch": 143.94, "learning_rate": 4.970654125575239e-05, "loss": 0.9541, "step": 628000 }, { "epoch": 143.94, "eval_loss": 1.630305290222168, "eval_runtime": 8.7211, "eval_samples_per_second": 538.117, "eval_steps_per_second": 67.308, "step": 628000 }, { "epoch": 144.05, "learning_rate": 4.970501145805046e-05, "loss": 0.9404, "step": 628500 }, { "epoch": 144.05, "eval_loss": 1.6200766563415527, "eval_runtime": 8.7067, "eval_samples_per_second": 539.013, "eval_steps_per_second": 67.42, "step": 628500 }, { "epoch": 144.17, "learning_rate": 4.970347770696122e-05, "loss": 0.9342, "step": 629000 }, { "epoch": 144.17, "eval_loss": 1.6198660135269165, "eval_runtime": 8.7234, "eval_samples_per_second": 537.978, "eval_steps_per_second": 67.29, "step": 629000 }, { "epoch": 144.28, "learning_rate": 4.9701940002730125e-05, "loss": 0.9386, "step": 629500 }, { "epoch": 144.28, "eval_loss": 1.6206051111221313, "eval_runtime": 8.7216, "eval_samples_per_second": 538.091, "eval_steps_per_second": 67.304, "step": 629500 }, { "epoch": 144.4, "learning_rate": 4.970039834560323e-05, "loss": 0.9453, "step": 630000 }, { "epoch": 144.4, "eval_loss": 1.616275429725647, "eval_runtime": 8.7217, "eval_samples_per_second": 538.081, "eval_steps_per_second": 67.303, "step": 630000 }, { "epoch": 144.51, "learning_rate": 4.969885273582724e-05, "loss": 0.9463, "step": 630500 }, { "epoch": 144.51, "eval_loss": 1.6063662767410278, "eval_runtime": 8.7135, "eval_samples_per_second": 538.589, "eval_steps_per_second": 67.367, "step": 630500 }, { "epoch": 144.63, "learning_rate": 4.9697303173649483e-05, "loss": 0.9462, "step": 631000 }, { "epoch": 144.63, "eval_loss": 1.6084084510803223, "eval_runtime": 8.7197, "eval_samples_per_second": 538.209, "eval_steps_per_second": 67.319, "step": 631000 }, { "epoch": 144.74, "learning_rate": 4.969574965931794e-05, "loss": 0.9486, "step": 631500 }, { "epoch": 144.74, "eval_loss": 1.6117677688598633, "eval_runtime": 8.7118, "eval_samples_per_second": 538.694, "eval_steps_per_second": 67.38, "step": 631500 }, { "epoch": 144.85, "learning_rate": 4.9694192193081195e-05, "loss": 0.9474, "step": 632000 }, { "epoch": 144.85, "eval_loss": 1.6254582405090332, "eval_runtime": 8.7105, "eval_samples_per_second": 538.773, "eval_steps_per_second": 67.39, "step": 632000 }, { "epoch": 144.97, "learning_rate": 4.9692630775188495e-05, "loss": 0.9449, "step": 632500 }, { "epoch": 144.97, "eval_loss": 1.6002904176712036, "eval_runtime": 8.72, "eval_samples_per_second": 538.19, "eval_steps_per_second": 67.317, "step": 632500 }, { "epoch": 145.08, "learning_rate": 4.9691065405889684e-05, "loss": 0.9397, "step": 633000 }, { "epoch": 145.08, "eval_loss": 1.633092999458313, "eval_runtime": 8.7243, "eval_samples_per_second": 537.924, "eval_steps_per_second": 67.284, "step": 633000 }, { "epoch": 145.2, "learning_rate": 4.9689496085435275e-05, "loss": 0.9352, "step": 633500 }, { "epoch": 145.2, "eval_loss": 1.616778016090393, "eval_runtime": 8.7134, "eval_samples_per_second": 538.595, "eval_steps_per_second": 67.367, "step": 633500 }, { "epoch": 145.31, "learning_rate": 4.9687922814076395e-05, "loss": 0.9347, "step": 634000 }, { "epoch": 145.31, "eval_loss": 1.6317335367202759, "eval_runtime": 8.7172, "eval_samples_per_second": 538.363, "eval_steps_per_second": 67.338, "step": 634000 }, { "epoch": 145.43, "learning_rate": 4.968634559206479e-05, "loss": 0.9454, "step": 634500 }, { "epoch": 145.43, "eval_loss": 1.6299787759780884, "eval_runtime": 8.7157, "eval_samples_per_second": 538.457, "eval_steps_per_second": 67.35, "step": 634500 }, { "epoch": 145.54, "learning_rate": 4.9684764419652876e-05, "loss": 0.9524, "step": 635000 }, { "epoch": 145.54, "eval_loss": 1.6079365015029907, "eval_runtime": 8.7256, "eval_samples_per_second": 537.842, "eval_steps_per_second": 67.273, "step": 635000 }, { "epoch": 145.66, "learning_rate": 4.968317929709366e-05, "loss": 0.945, "step": 635500 }, { "epoch": 145.66, "eval_loss": 1.6135354042053223, "eval_runtime": 8.7155, "eval_samples_per_second": 538.465, "eval_steps_per_second": 67.351, "step": 635500 }, { "epoch": 145.77, "learning_rate": 4.96815902246408e-05, "loss": 0.9492, "step": 636000 }, { "epoch": 145.77, "eval_loss": 1.611159324645996, "eval_runtime": 8.7137, "eval_samples_per_second": 538.579, "eval_steps_per_second": 67.365, "step": 636000 }, { "epoch": 145.89, "learning_rate": 4.9679997202548594e-05, "loss": 0.9549, "step": 636500 }, { "epoch": 145.89, "eval_loss": 1.6078263521194458, "eval_runtime": 8.7164, "eval_samples_per_second": 538.409, "eval_steps_per_second": 67.344, "step": 636500 }, { "epoch": 146.0, "learning_rate": 4.967840023107196e-05, "loss": 0.9464, "step": 637000 }, { "epoch": 146.0, "eval_loss": 1.6175146102905273, "eval_runtime": 8.7171, "eval_samples_per_second": 538.369, "eval_steps_per_second": 67.339, "step": 637000 }, { "epoch": 146.12, "learning_rate": 4.967679931046645e-05, "loss": 0.9346, "step": 637500 }, { "epoch": 146.12, "eval_loss": 1.6238203048706055, "eval_runtime": 8.7193, "eval_samples_per_second": 538.232, "eval_steps_per_second": 67.322, "step": 637500 }, { "epoch": 146.23, "learning_rate": 4.967519444098825e-05, "loss": 0.9383, "step": 638000 }, { "epoch": 146.23, "eval_loss": 1.5851773023605347, "eval_runtime": 8.7187, "eval_samples_per_second": 538.266, "eval_steps_per_second": 67.326, "step": 638000 }, { "epoch": 146.34, "learning_rate": 4.967358562289417e-05, "loss": 0.9426, "step": 638500 }, { "epoch": 146.34, "eval_loss": 1.6319345235824585, "eval_runtime": 8.7162, "eval_samples_per_second": 538.423, "eval_steps_per_second": 67.346, "step": 638500 }, { "epoch": 146.46, "learning_rate": 4.967197285644167e-05, "loss": 0.9361, "step": 639000 }, { "epoch": 146.46, "eval_loss": 1.625098466873169, "eval_runtime": 8.7179, "eval_samples_per_second": 538.321, "eval_steps_per_second": 67.333, "step": 639000 }, { "epoch": 146.57, "learning_rate": 4.967035614188883e-05, "loss": 0.9434, "step": 639500 }, { "epoch": 146.57, "eval_loss": 1.6145974397659302, "eval_runtime": 8.7811, "eval_samples_per_second": 534.443, "eval_steps_per_second": 66.848, "step": 639500 }, { "epoch": 146.69, "learning_rate": 4.9668735479494364e-05, "loss": 0.9381, "step": 640000 }, { "epoch": 146.69, "eval_loss": 1.6338533163070679, "eval_runtime": 8.7563, "eval_samples_per_second": 535.958, "eval_steps_per_second": 67.038, "step": 640000 }, { "epoch": 146.8, "learning_rate": 4.9667110869517604e-05, "loss": 0.9488, "step": 640500 }, { "epoch": 146.8, "eval_loss": 1.6136980056762695, "eval_runtime": 8.758, "eval_samples_per_second": 535.855, "eval_steps_per_second": 67.025, "step": 640500 }, { "epoch": 146.92, "learning_rate": 4.966548231221854e-05, "loss": 0.9478, "step": 641000 }, { "epoch": 146.92, "eval_loss": 1.632676362991333, "eval_runtime": 8.7607, "eval_samples_per_second": 535.689, "eval_steps_per_second": 67.004, "step": 641000 }, { "epoch": 147.03, "learning_rate": 4.9663849807857766e-05, "loss": 0.9455, "step": 641500 }, { "epoch": 147.03, "eval_loss": 1.5927187204360962, "eval_runtime": 8.7643, "eval_samples_per_second": 535.466, "eval_steps_per_second": 66.976, "step": 641500 }, { "epoch": 147.15, "learning_rate": 4.966221335669653e-05, "loss": 0.9376, "step": 642000 }, { "epoch": 147.15, "eval_loss": 1.6193487644195557, "eval_runtime": 8.7597, "eval_samples_per_second": 535.751, "eval_steps_per_second": 67.012, "step": 642000 }, { "epoch": 147.26, "learning_rate": 4.9660572958996697e-05, "loss": 0.9376, "step": 642500 }, { "epoch": 147.26, "eval_loss": 1.6236095428466797, "eval_runtime": 8.7627, "eval_samples_per_second": 535.568, "eval_steps_per_second": 66.989, "step": 642500 }, { "epoch": 147.38, "learning_rate": 4.965892861502078e-05, "loss": 0.9405, "step": 643000 }, { "epoch": 147.38, "eval_loss": 1.6145919561386108, "eval_runtime": 8.759, "eval_samples_per_second": 535.789, "eval_steps_per_second": 67.016, "step": 643000 }, { "epoch": 147.49, "learning_rate": 4.965728032503192e-05, "loss": 0.9374, "step": 643500 }, { "epoch": 147.49, "eval_loss": 1.6309829950332642, "eval_runtime": 8.7652, "eval_samples_per_second": 535.411, "eval_steps_per_second": 66.969, "step": 643500 }, { "epoch": 147.6, "learning_rate": 4.965562808929386e-05, "loss": 0.9426, "step": 644000 }, { "epoch": 147.6, "eval_loss": 1.6394091844558716, "eval_runtime": 8.7794, "eval_samples_per_second": 534.547, "eval_steps_per_second": 66.861, "step": 644000 }, { "epoch": 147.72, "learning_rate": 4.9653971908071e-05, "loss": 0.9419, "step": 644500 }, { "epoch": 147.72, "eval_loss": 1.5927742719650269, "eval_runtime": 8.7527, "eval_samples_per_second": 536.178, "eval_steps_per_second": 67.065, "step": 644500 }, { "epoch": 147.83, "learning_rate": 4.965231178162838e-05, "loss": 0.9431, "step": 645000 }, { "epoch": 147.83, "eval_loss": 1.607709288597107, "eval_runtime": 8.7607, "eval_samples_per_second": 535.685, "eval_steps_per_second": 67.003, "step": 645000 }, { "epoch": 147.95, "learning_rate": 4.9650647710231655e-05, "loss": 0.9457, "step": 645500 }, { "epoch": 147.95, "eval_loss": 1.642839789390564, "eval_runtime": 8.7553, "eval_samples_per_second": 536.016, "eval_steps_per_second": 67.045, "step": 645500 }, { "epoch": 148.06, "learning_rate": 4.964897969414711e-05, "loss": 0.9423, "step": 646000 }, { "epoch": 148.06, "eval_loss": 1.618451714515686, "eval_runtime": 8.7574, "eval_samples_per_second": 535.887, "eval_steps_per_second": 67.029, "step": 646000 }, { "epoch": 148.18, "learning_rate": 4.9647307733641684e-05, "loss": 0.9392, "step": 646500 }, { "epoch": 148.18, "eval_loss": 1.6227622032165527, "eval_runtime": 8.7598, "eval_samples_per_second": 535.744, "eval_steps_per_second": 67.011, "step": 646500 }, { "epoch": 148.29, "learning_rate": 4.9645631828982915e-05, "loss": 0.9304, "step": 647000 }, { "epoch": 148.29, "eval_loss": 1.6018120050430298, "eval_runtime": 8.7599, "eval_samples_per_second": 535.737, "eval_steps_per_second": 67.01, "step": 647000 }, { "epoch": 148.41, "learning_rate": 4.964395198043898e-05, "loss": 0.936, "step": 647500 }, { "epoch": 148.41, "eval_loss": 1.6194146871566772, "eval_runtime": 8.7527, "eval_samples_per_second": 536.176, "eval_steps_per_second": 67.065, "step": 647500 }, { "epoch": 148.52, "learning_rate": 4.9642268188278724e-05, "loss": 0.9369, "step": 648000 }, { "epoch": 148.52, "eval_loss": 1.6246670484542847, "eval_runtime": 8.7584, "eval_samples_per_second": 535.825, "eval_steps_per_second": 67.021, "step": 648000 }, { "epoch": 148.64, "learning_rate": 4.964058045277157e-05, "loss": 0.938, "step": 648500 }, { "epoch": 148.64, "eval_loss": 1.6202360391616821, "eval_runtime": 8.7658, "eval_samples_per_second": 535.376, "eval_steps_per_second": 66.965, "step": 648500 }, { "epoch": 148.75, "learning_rate": 4.9638888774187596e-05, "loss": 0.9488, "step": 649000 }, { "epoch": 148.75, "eval_loss": 1.6061735153198242, "eval_runtime": 8.7604, "eval_samples_per_second": 535.706, "eval_steps_per_second": 67.006, "step": 649000 }, { "epoch": 148.87, "learning_rate": 4.963719315279752e-05, "loss": 0.9394, "step": 649500 }, { "epoch": 148.87, "eval_loss": 1.601861834526062, "eval_runtime": 8.7588, "eval_samples_per_second": 535.801, "eval_steps_per_second": 67.018, "step": 649500 }, { "epoch": 148.98, "learning_rate": 4.963549358887267e-05, "loss": 0.9513, "step": 650000 }, { "epoch": 148.98, "eval_loss": 1.597773790359497, "eval_runtime": 8.7606, "eval_samples_per_second": 535.691, "eval_steps_per_second": 67.004, "step": 650000 }, { "epoch": 149.09, "learning_rate": 4.963379008268503e-05, "loss": 0.933, "step": 650500 }, { "epoch": 149.09, "eval_loss": 1.6401927471160889, "eval_runtime": 8.7617, "eval_samples_per_second": 535.627, "eval_steps_per_second": 66.996, "step": 650500 }, { "epoch": 149.21, "learning_rate": 4.9632082634507196e-05, "loss": 0.9319, "step": 651000 }, { "epoch": 149.21, "eval_loss": 1.6225740909576416, "eval_runtime": 8.7609, "eval_samples_per_second": 535.678, "eval_steps_per_second": 67.003, "step": 651000 }, { "epoch": 149.32, "learning_rate": 4.96303712446124e-05, "loss": 0.9347, "step": 651500 }, { "epoch": 149.32, "eval_loss": 1.6110979318618774, "eval_runtime": 8.7544, "eval_samples_per_second": 536.075, "eval_steps_per_second": 67.052, "step": 651500 }, { "epoch": 149.44, "learning_rate": 4.96286559132745e-05, "loss": 0.9381, "step": 652000 }, { "epoch": 149.44, "eval_loss": 1.6176284551620483, "eval_runtime": 8.7612, "eval_samples_per_second": 535.656, "eval_steps_per_second": 67.0, "step": 652000 }, { "epoch": 149.55, "learning_rate": 4.9626936640768e-05, "loss": 0.9363, "step": 652500 }, { "epoch": 149.55, "eval_loss": 1.6263896226882935, "eval_runtime": 8.7625, "eval_samples_per_second": 535.578, "eval_steps_per_second": 66.99, "step": 652500 }, { "epoch": 149.67, "learning_rate": 4.9625213427368017e-05, "loss": 0.9348, "step": 653000 }, { "epoch": 149.67, "eval_loss": 1.6307040452957153, "eval_runtime": 8.785, "eval_samples_per_second": 534.205, "eval_steps_per_second": 66.818, "step": 653000 }, { "epoch": 149.78, "learning_rate": 4.9623486273350304e-05, "loss": 0.9386, "step": 653500 }, { "epoch": 149.78, "eval_loss": 1.6122455596923828, "eval_runtime": 8.7583, "eval_samples_per_second": 535.837, "eval_steps_per_second": 67.022, "step": 653500 }, { "epoch": 149.9, "learning_rate": 4.962175517899126e-05, "loss": 0.9389, "step": 654000 }, { "epoch": 149.9, "eval_loss": 1.6083446741104126, "eval_runtime": 8.7601, "eval_samples_per_second": 535.722, "eval_steps_per_second": 67.008, "step": 654000 }, { "epoch": 150.01, "learning_rate": 4.962002014456788e-05, "loss": 0.9416, "step": 654500 }, { "epoch": 150.01, "eval_loss": 1.6273164749145508, "eval_runtime": 8.7508, "eval_samples_per_second": 536.297, "eval_steps_per_second": 67.08, "step": 654500 }, { "epoch": 150.13, "learning_rate": 4.961828117035783e-05, "loss": 0.9259, "step": 655000 }, { "epoch": 150.13, "eval_loss": 1.6287055015563965, "eval_runtime": 8.7671, "eval_samples_per_second": 535.3, "eval_steps_per_second": 66.955, "step": 655000 }, { "epoch": 150.24, "learning_rate": 4.961653825663937e-05, "loss": 0.93, "step": 655500 }, { "epoch": 150.24, "eval_loss": 1.6323984861373901, "eval_runtime": 8.7701, "eval_samples_per_second": 535.111, "eval_steps_per_second": 66.932, "step": 655500 }, { "epoch": 150.36, "learning_rate": 4.961479140369142e-05, "loss": 0.9365, "step": 656000 }, { "epoch": 150.36, "eval_loss": 1.6313835382461548, "eval_runtime": 8.7586, "eval_samples_per_second": 535.818, "eval_steps_per_second": 67.02, "step": 656000 }, { "epoch": 150.47, "learning_rate": 4.961304061179352e-05, "loss": 0.9329, "step": 656500 }, { "epoch": 150.47, "eval_loss": 1.6296480894088745, "eval_runtime": 8.7474, "eval_samples_per_second": 536.501, "eval_steps_per_second": 67.105, "step": 656500 }, { "epoch": 150.58, "learning_rate": 4.961128588122582e-05, "loss": 0.936, "step": 657000 }, { "epoch": 150.58, "eval_loss": 1.6196773052215576, "eval_runtime": 8.7564, "eval_samples_per_second": 535.949, "eval_steps_per_second": 67.036, "step": 657000 }, { "epoch": 150.7, "learning_rate": 4.960952721226914e-05, "loss": 0.9397, "step": 657500 }, { "epoch": 150.7, "eval_loss": 1.6225591897964478, "eval_runtime": 8.7674, "eval_samples_per_second": 535.278, "eval_steps_per_second": 66.953, "step": 657500 }, { "epoch": 150.81, "learning_rate": 4.9607764605204896e-05, "loss": 0.9363, "step": 658000 }, { "epoch": 150.81, "eval_loss": 1.6305060386657715, "eval_runtime": 8.753, "eval_samples_per_second": 536.158, "eval_steps_per_second": 67.063, "step": 658000 }, { "epoch": 150.93, "learning_rate": 4.9605998060315156e-05, "loss": 0.9407, "step": 658500 }, { "epoch": 150.93, "eval_loss": 1.6143392324447632, "eval_runtime": 8.7459, "eval_samples_per_second": 536.592, "eval_steps_per_second": 67.117, "step": 658500 }, { "epoch": 151.04, "learning_rate": 4.96042275778826e-05, "loss": 0.9456, "step": 659000 }, { "epoch": 151.04, "eval_loss": 1.6341321468353271, "eval_runtime": 8.7564, "eval_samples_per_second": 535.952, "eval_steps_per_second": 67.037, "step": 659000 }, { "epoch": 151.16, "learning_rate": 4.9602453158190555e-05, "loss": 0.9333, "step": 659500 }, { "epoch": 151.16, "eval_loss": 1.6420775651931763, "eval_runtime": 8.7603, "eval_samples_per_second": 535.714, "eval_steps_per_second": 67.007, "step": 659500 }, { "epoch": 151.27, "learning_rate": 4.960067480152296e-05, "loss": 0.9247, "step": 660000 }, { "epoch": 151.27, "eval_loss": 1.6291956901550293, "eval_runtime": 8.7472, "eval_samples_per_second": 536.516, "eval_steps_per_second": 67.107, "step": 660000 }, { "epoch": 151.39, "learning_rate": 4.9598892508164395e-05, "loss": 0.929, "step": 660500 }, { "epoch": 151.39, "eval_loss": 1.6377990245819092, "eval_runtime": 8.752, "eval_samples_per_second": 536.221, "eval_steps_per_second": 67.07, "step": 660500 }, { "epoch": 151.5, "learning_rate": 4.959710627840009e-05, "loss": 0.9362, "step": 661000 }, { "epoch": 151.5, "eval_loss": 1.6115473508834839, "eval_runtime": 8.7496, "eval_samples_per_second": 536.366, "eval_steps_per_second": 67.089, "step": 661000 }, { "epoch": 151.62, "learning_rate": 4.959531611251585e-05, "loss": 0.9328, "step": 661500 }, { "epoch": 151.62, "eval_loss": 1.6370552778244019, "eval_runtime": 8.7494, "eval_samples_per_second": 536.382, "eval_steps_per_second": 67.091, "step": 661500 }, { "epoch": 151.73, "learning_rate": 4.959352201079818e-05, "loss": 0.9391, "step": 662000 }, { "epoch": 151.73, "eval_loss": 1.6220612525939941, "eval_runtime": 8.768, "eval_samples_per_second": 535.243, "eval_steps_per_second": 66.948, "step": 662000 }, { "epoch": 151.84, "learning_rate": 4.959172397353415e-05, "loss": 0.9391, "step": 662500 }, { "epoch": 151.84, "eval_loss": 1.645382285118103, "eval_runtime": 8.7451, "eval_samples_per_second": 536.641, "eval_steps_per_second": 67.123, "step": 662500 }, { "epoch": 151.96, "learning_rate": 4.9589922001011504e-05, "loss": 0.9324, "step": 663000 }, { "epoch": 151.96, "eval_loss": 1.6205555200576782, "eval_runtime": 8.7543, "eval_samples_per_second": 536.081, "eval_steps_per_second": 67.053, "step": 663000 }, { "epoch": 152.07, "learning_rate": 4.95881160935186e-05, "loss": 0.9307, "step": 663500 }, { "epoch": 152.07, "eval_loss": 1.611372470855713, "eval_runtime": 8.7431, "eval_samples_per_second": 536.764, "eval_steps_per_second": 67.138, "step": 663500 }, { "epoch": 152.19, "learning_rate": 4.9586306251344416e-05, "loss": 0.9253, "step": 664000 }, { "epoch": 152.19, "eval_loss": 1.6237918138504028, "eval_runtime": 8.7538, "eval_samples_per_second": 536.113, "eval_steps_per_second": 67.057, "step": 664000 }, { "epoch": 152.3, "learning_rate": 4.958449247477858e-05, "loss": 0.9354, "step": 664500 }, { "epoch": 152.3, "eval_loss": 1.6204596757888794, "eval_runtime": 8.7499, "eval_samples_per_second": 536.352, "eval_steps_per_second": 67.087, "step": 664500 }, { "epoch": 152.42, "learning_rate": 4.9582674764111326e-05, "loss": 0.9302, "step": 665000 }, { "epoch": 152.42, "eval_loss": 1.6156543493270874, "eval_runtime": 8.7579, "eval_samples_per_second": 535.862, "eval_steps_per_second": 67.026, "step": 665000 }, { "epoch": 152.53, "learning_rate": 4.958085311963355e-05, "loss": 0.9412, "step": 665500 }, { "epoch": 152.53, "eval_loss": 1.6311994791030884, "eval_runtime": 8.7501, "eval_samples_per_second": 536.338, "eval_steps_per_second": 67.085, "step": 665500 }, { "epoch": 152.65, "learning_rate": 4.9579027541636744e-05, "loss": 0.928, "step": 666000 }, { "epoch": 152.65, "eval_loss": 1.6157891750335693, "eval_runtime": 8.7576, "eval_samples_per_second": 535.875, "eval_steps_per_second": 67.027, "step": 666000 }, { "epoch": 152.76, "learning_rate": 4.957719803041304e-05, "loss": 0.9345, "step": 666500 }, { "epoch": 152.76, "eval_loss": 1.6123565435409546, "eval_runtime": 8.7619, "eval_samples_per_second": 535.616, "eval_steps_per_second": 66.995, "step": 666500 }, { "epoch": 152.88, "learning_rate": 4.957536458625522e-05, "loss": 0.9422, "step": 667000 }, { "epoch": 152.88, "eval_loss": 1.6204341650009155, "eval_runtime": 8.7573, "eval_samples_per_second": 535.897, "eval_steps_per_second": 67.03, "step": 667000 }, { "epoch": 152.99, "learning_rate": 4.9573527209456675e-05, "loss": 0.9339, "step": 667500 }, { "epoch": 152.99, "eval_loss": 1.6177955865859985, "eval_runtime": 8.7479, "eval_samples_per_second": 536.472, "eval_steps_per_second": 67.102, "step": 667500 }, { "epoch": 153.11, "learning_rate": 4.957168590031141e-05, "loss": 0.9254, "step": 668000 }, { "epoch": 153.11, "eval_loss": 1.645511507987976, "eval_runtime": 8.7594, "eval_samples_per_second": 535.768, "eval_steps_per_second": 67.014, "step": 668000 }, { "epoch": 153.22, "learning_rate": 4.95698406591141e-05, "loss": 0.9285, "step": 668500 }, { "epoch": 153.22, "eval_loss": 1.6116901636123657, "eval_runtime": 8.7517, "eval_samples_per_second": 536.237, "eval_steps_per_second": 67.073, "step": 668500 }, { "epoch": 153.33, "learning_rate": 4.9567991486160014e-05, "loss": 0.9293, "step": 669000 }, { "epoch": 153.33, "eval_loss": 1.6638693809509277, "eval_runtime": 8.7521, "eval_samples_per_second": 536.215, "eval_steps_per_second": 67.07, "step": 669000 }, { "epoch": 153.45, "learning_rate": 4.956613838174507e-05, "loss": 0.9303, "step": 669500 }, { "epoch": 153.45, "eval_loss": 1.630062460899353, "eval_runtime": 8.7523, "eval_samples_per_second": 536.204, "eval_steps_per_second": 67.068, "step": 669500 }, { "epoch": 153.56, "learning_rate": 4.956428134616581e-05, "loss": 0.9356, "step": 670000 }, { "epoch": 153.56, "eval_loss": 1.625266194343567, "eval_runtime": 8.7549, "eval_samples_per_second": 536.043, "eval_steps_per_second": 67.048, "step": 670000 }, { "epoch": 153.68, "learning_rate": 4.9562420379719395e-05, "loss": 0.945, "step": 670500 }, { "epoch": 153.68, "eval_loss": 1.6296170949935913, "eval_runtime": 8.7474, "eval_samples_per_second": 536.501, "eval_steps_per_second": 67.106, "step": 670500 }, { "epoch": 153.79, "learning_rate": 4.956055548270363e-05, "loss": 0.9397, "step": 671000 }, { "epoch": 153.79, "eval_loss": 1.6253446340560913, "eval_runtime": 8.7672, "eval_samples_per_second": 535.293, "eval_steps_per_second": 66.954, "step": 671000 }, { "epoch": 153.91, "learning_rate": 4.955868665541694e-05, "loss": 0.9363, "step": 671500 }, { "epoch": 153.91, "eval_loss": 1.6089133024215698, "eval_runtime": 8.7445, "eval_samples_per_second": 536.678, "eval_steps_per_second": 67.128, "step": 671500 }, { "epoch": 154.02, "learning_rate": 4.9556813898158394e-05, "loss": 0.9404, "step": 672000 }, { "epoch": 154.02, "eval_loss": 1.6164894104003906, "eval_runtime": 8.7651, "eval_samples_per_second": 535.417, "eval_steps_per_second": 66.97, "step": 672000 }, { "epoch": 154.14, "learning_rate": 4.955493721122766e-05, "loss": 0.9266, "step": 672500 }, { "epoch": 154.14, "eval_loss": 1.6191978454589844, "eval_runtime": 8.7436, "eval_samples_per_second": 536.734, "eval_steps_per_second": 67.135, "step": 672500 }, { "epoch": 154.25, "learning_rate": 4.9553056594925054e-05, "loss": 0.9236, "step": 673000 }, { "epoch": 154.25, "eval_loss": 1.6240395307540894, "eval_runtime": 8.7514, "eval_samples_per_second": 536.257, "eval_steps_per_second": 67.075, "step": 673000 }, { "epoch": 154.37, "learning_rate": 4.955117204955153e-05, "loss": 0.9272, "step": 673500 }, { "epoch": 154.37, "eval_loss": 1.6351432800292969, "eval_runtime": 8.7502, "eval_samples_per_second": 536.332, "eval_steps_per_second": 67.084, "step": 673500 }, { "epoch": 154.48, "learning_rate": 4.9549283575408655e-05, "loss": 0.9289, "step": 674000 }, { "epoch": 154.48, "eval_loss": 1.621845006942749, "eval_runtime": 8.7536, "eval_samples_per_second": 536.121, "eval_steps_per_second": 67.058, "step": 674000 }, { "epoch": 154.6, "learning_rate": 4.954739117279863e-05, "loss": 0.9313, "step": 674500 }, { "epoch": 154.6, "eval_loss": 1.6339291334152222, "eval_runtime": 8.7568, "eval_samples_per_second": 535.926, "eval_steps_per_second": 67.034, "step": 674500 }, { "epoch": 154.71, "learning_rate": 4.9545494842024284e-05, "loss": 0.9306, "step": 675000 }, { "epoch": 154.71, "eval_loss": 1.6357897520065308, "eval_runtime": 8.7505, "eval_samples_per_second": 536.314, "eval_steps_per_second": 67.082, "step": 675000 }, { "epoch": 154.82, "learning_rate": 4.954359458338907e-05, "loss": 0.9434, "step": 675500 }, { "epoch": 154.82, "eval_loss": 1.6357836723327637, "eval_runtime": 8.768, "eval_samples_per_second": 535.24, "eval_steps_per_second": 66.948, "step": 675500 }, { "epoch": 154.94, "learning_rate": 4.954169039719708e-05, "loss": 0.9439, "step": 676000 }, { "epoch": 154.94, "eval_loss": 1.6139342784881592, "eval_runtime": 8.75, "eval_samples_per_second": 536.344, "eval_steps_per_second": 67.086, "step": 676000 }, { "epoch": 155.05, "learning_rate": 4.9539782283753036e-05, "loss": 0.9343, "step": 676500 }, { "epoch": 155.05, "eval_loss": 1.6361256837844849, "eval_runtime": 8.7497, "eval_samples_per_second": 536.361, "eval_steps_per_second": 67.088, "step": 676500 }, { "epoch": 155.17, "learning_rate": 4.953787024336226e-05, "loss": 0.9196, "step": 677000 }, { "epoch": 155.17, "eval_loss": 1.6194158792495728, "eval_runtime": 8.7529, "eval_samples_per_second": 536.164, "eval_steps_per_second": 67.063, "step": 677000 }, { "epoch": 155.28, "learning_rate": 4.9535954276330745e-05, "loss": 0.9279, "step": 677500 }, { "epoch": 155.28, "eval_loss": 1.6178219318389893, "eval_runtime": 8.7559, "eval_samples_per_second": 535.982, "eval_steps_per_second": 67.041, "step": 677500 }, { "epoch": 155.4, "learning_rate": 4.9534034382965086e-05, "loss": 0.9249, "step": 678000 }, { "epoch": 155.4, "eval_loss": 1.6261661052703857, "eval_runtime": 8.7548, "eval_samples_per_second": 536.047, "eval_steps_per_second": 67.049, "step": 678000 }, { "epoch": 155.51, "learning_rate": 4.95321105635725e-05, "loss": 0.9246, "step": 678500 }, { "epoch": 155.51, "eval_loss": 1.6218464374542236, "eval_runtime": 8.7468, "eval_samples_per_second": 536.54, "eval_steps_per_second": 67.11, "step": 678500 }, { "epoch": 155.63, "learning_rate": 4.953018281846086e-05, "loss": 0.9345, "step": 679000 }, { "epoch": 155.63, "eval_loss": 1.6208409070968628, "eval_runtime": 8.7589, "eval_samples_per_second": 535.796, "eval_steps_per_second": 67.017, "step": 679000 }, { "epoch": 155.74, "learning_rate": 4.952825114793864e-05, "loss": 0.9395, "step": 679500 }, { "epoch": 155.74, "eval_loss": 1.63279128074646, "eval_runtime": 8.746, "eval_samples_per_second": 536.59, "eval_steps_per_second": 67.117, "step": 679500 }, { "epoch": 155.86, "learning_rate": 4.952631555231495e-05, "loss": 0.9313, "step": 680000 }, { "epoch": 155.86, "eval_loss": 1.625414252281189, "eval_runtime": 8.7585, "eval_samples_per_second": 535.823, "eval_steps_per_second": 67.021, "step": 680000 }, { "epoch": 155.97, "learning_rate": 4.952437603189954e-05, "loss": 0.9359, "step": 680500 }, { "epoch": 155.97, "eval_loss": 1.6459615230560303, "eval_runtime": 8.7501, "eval_samples_per_second": 536.336, "eval_steps_per_second": 67.085, "step": 680500 }, { "epoch": 156.09, "learning_rate": 4.9522432587002783e-05, "loss": 0.9233, "step": 681000 }, { "epoch": 156.09, "eval_loss": 1.6257340908050537, "eval_runtime": 8.7563, "eval_samples_per_second": 535.959, "eval_steps_per_second": 67.038, "step": 681000 }, { "epoch": 156.2, "learning_rate": 4.952048521793566e-05, "loss": 0.9225, "step": 681500 }, { "epoch": 156.2, "eval_loss": 1.656297206878662, "eval_runtime": 8.7484, "eval_samples_per_second": 536.44, "eval_steps_per_second": 67.098, "step": 681500 }, { "epoch": 156.31, "learning_rate": 4.951853392500981e-05, "loss": 0.9243, "step": 682000 }, { "epoch": 156.31, "eval_loss": 1.6274646520614624, "eval_runtime": 8.7524, "eval_samples_per_second": 536.196, "eval_steps_per_second": 67.067, "step": 682000 }, { "epoch": 156.43, "learning_rate": 4.951657870853749e-05, "loss": 0.9276, "step": 682500 }, { "epoch": 156.43, "eval_loss": 1.6259212493896484, "eval_runtime": 8.7522, "eval_samples_per_second": 536.207, "eval_steps_per_second": 67.069, "step": 682500 }, { "epoch": 156.54, "learning_rate": 4.951461956883156e-05, "loss": 0.9322, "step": 683000 }, { "epoch": 156.54, "eval_loss": 1.623051404953003, "eval_runtime": 8.7576, "eval_samples_per_second": 535.879, "eval_steps_per_second": 67.028, "step": 683000 }, { "epoch": 156.66, "learning_rate": 4.951265650620555e-05, "loss": 0.9221, "step": 683500 }, { "epoch": 156.66, "eval_loss": 1.6072031259536743, "eval_runtime": 8.7473, "eval_samples_per_second": 536.51, "eval_steps_per_second": 67.107, "step": 683500 }, { "epoch": 156.77, "learning_rate": 4.951068952097359e-05, "loss": 0.9327, "step": 684000 }, { "epoch": 156.77, "eval_loss": 1.6300419569015503, "eval_runtime": 8.7518, "eval_samples_per_second": 536.231, "eval_steps_per_second": 67.072, "step": 684000 }, { "epoch": 156.89, "learning_rate": 4.950871861345043e-05, "loss": 0.9378, "step": 684500 }, { "epoch": 156.89, "eval_loss": 1.6320749521255493, "eval_runtime": 8.7595, "eval_samples_per_second": 535.761, "eval_steps_per_second": 67.013, "step": 684500 }, { "epoch": 157.0, "learning_rate": 4.950674378395149e-05, "loss": 0.9425, "step": 685000 }, { "epoch": 157.0, "eval_loss": 1.6376347541809082, "eval_runtime": 8.7452, "eval_samples_per_second": 536.635, "eval_steps_per_second": 67.122, "step": 685000 }, { "epoch": 157.12, "learning_rate": 4.9504765032792764e-05, "loss": 0.9216, "step": 685500 }, { "epoch": 157.12, "eval_loss": 1.6348551511764526, "eval_runtime": 8.7481, "eval_samples_per_second": 536.458, "eval_steps_per_second": 67.1, "step": 685500 }, { "epoch": 157.23, "learning_rate": 4.950278236029091e-05, "loss": 0.9222, "step": 686000 }, { "epoch": 157.23, "eval_loss": 1.6405470371246338, "eval_runtime": 8.7539, "eval_samples_per_second": 536.106, "eval_steps_per_second": 67.056, "step": 686000 }, { "epoch": 157.35, "learning_rate": 4.950079576676321e-05, "loss": 0.9317, "step": 686500 }, { "epoch": 157.35, "eval_loss": 1.634956955909729, "eval_runtime": 8.7519, "eval_samples_per_second": 536.228, "eval_steps_per_second": 67.071, "step": 686500 }, { "epoch": 157.46, "learning_rate": 4.949880525252755e-05, "loss": 0.9267, "step": 687000 }, { "epoch": 157.46, "eval_loss": 1.6481719017028809, "eval_runtime": 8.7476, "eval_samples_per_second": 536.493, "eval_steps_per_second": 67.104, "step": 687000 }, { "epoch": 157.57, "learning_rate": 4.949681081790247e-05, "loss": 0.9283, "step": 687500 }, { "epoch": 157.57, "eval_loss": 1.6521049737930298, "eval_runtime": 8.7522, "eval_samples_per_second": 536.208, "eval_steps_per_second": 67.069, "step": 687500 }, { "epoch": 157.69, "learning_rate": 4.949481246320712e-05, "loss": 0.9316, "step": 688000 }, { "epoch": 157.69, "eval_loss": 1.6490892171859741, "eval_runtime": 8.7603, "eval_samples_per_second": 535.709, "eval_steps_per_second": 67.006, "step": 688000 }, { "epoch": 157.8, "learning_rate": 4.949281018876129e-05, "loss": 0.9292, "step": 688500 }, { "epoch": 157.8, "eval_loss": 1.6293381452560425, "eval_runtime": 8.7445, "eval_samples_per_second": 536.68, "eval_steps_per_second": 67.128, "step": 688500 }, { "epoch": 157.92, "learning_rate": 4.949080399488539e-05, "loss": 0.933, "step": 689000 }, { "epoch": 157.92, "eval_loss": 1.61799156665802, "eval_runtime": 8.7519, "eval_samples_per_second": 536.227, "eval_steps_per_second": 67.071, "step": 689000 }, { "epoch": 158.03, "learning_rate": 4.948879388190046e-05, "loss": 0.9289, "step": 689500 }, { "epoch": 158.03, "eval_loss": 1.6188355684280396, "eval_runtime": 8.748, "eval_samples_per_second": 536.467, "eval_steps_per_second": 67.101, "step": 689500 }, { "epoch": 158.15, "learning_rate": 4.948677985012816e-05, "loss": 0.9127, "step": 690000 }, { "epoch": 158.15, "eval_loss": 1.623080849647522, "eval_runtime": 8.7482, "eval_samples_per_second": 536.453, "eval_steps_per_second": 67.099, "step": 690000 }, { "epoch": 158.26, "learning_rate": 4.948476189989079e-05, "loss": 0.9237, "step": 690500 }, { "epoch": 158.26, "eval_loss": 1.6478880643844604, "eval_runtime": 8.7445, "eval_samples_per_second": 536.681, "eval_steps_per_second": 67.128, "step": 690500 }, { "epoch": 158.38, "learning_rate": 4.948274003151127e-05, "loss": 0.9187, "step": 691000 }, { "epoch": 158.38, "eval_loss": 1.63560950756073, "eval_runtime": 8.7609, "eval_samples_per_second": 535.675, "eval_steps_per_second": 67.002, "step": 691000 }, { "epoch": 158.49, "learning_rate": 4.948071424531313e-05, "loss": 0.9247, "step": 691500 }, { "epoch": 158.49, "eval_loss": 1.6357414722442627, "eval_runtime": 8.7481, "eval_samples_per_second": 536.46, "eval_steps_per_second": 67.1, "step": 691500 }, { "epoch": 158.61, "learning_rate": 4.947868454162057e-05, "loss": 0.9274, "step": 692000 }, { "epoch": 158.61, "eval_loss": 1.6188056468963623, "eval_runtime": 8.7576, "eval_samples_per_second": 535.875, "eval_steps_per_second": 67.027, "step": 692000 }, { "epoch": 158.72, "learning_rate": 4.947665092075837e-05, "loss": 0.9248, "step": 692500 }, { "epoch": 158.72, "eval_loss": 1.659287452697754, "eval_runtime": 8.744, "eval_samples_per_second": 536.711, "eval_steps_per_second": 67.132, "step": 692500 }, { "epoch": 158.84, "learning_rate": 4.947461338305197e-05, "loss": 0.9381, "step": 693000 }, { "epoch": 158.84, "eval_loss": 1.6361722946166992, "eval_runtime": 8.7517, "eval_samples_per_second": 536.237, "eval_steps_per_second": 67.073, "step": 693000 }, { "epoch": 158.95, "learning_rate": 4.9472571928827414e-05, "loss": 0.9271, "step": 693500 }, { "epoch": 158.95, "eval_loss": 1.6335488557815552, "eval_runtime": 8.7541, "eval_samples_per_second": 536.09, "eval_steps_per_second": 67.054, "step": 693500 }, { "epoch": 159.06, "learning_rate": 4.947052655841139e-05, "loss": 0.9254, "step": 694000 }, { "epoch": 159.06, "eval_loss": 1.6256955862045288, "eval_runtime": 8.7563, "eval_samples_per_second": 535.954, "eval_steps_per_second": 67.037, "step": 694000 }, { "epoch": 159.18, "learning_rate": 4.946847727213121e-05, "loss": 0.9239, "step": 694500 }, { "epoch": 159.18, "eval_loss": 1.6431338787078857, "eval_runtime": 8.7443, "eval_samples_per_second": 536.695, "eval_steps_per_second": 67.13, "step": 694500 }, { "epoch": 159.29, "learning_rate": 4.94664240703148e-05, "loss": 0.9328, "step": 695000 }, { "epoch": 159.29, "eval_loss": 1.6256158351898193, "eval_runtime": 8.7565, "eval_samples_per_second": 535.945, "eval_steps_per_second": 67.036, "step": 695000 }, { "epoch": 159.41, "learning_rate": 4.946436695329072e-05, "loss": 0.9262, "step": 695500 }, { "epoch": 159.41, "eval_loss": 1.633786916732788, "eval_runtime": 8.7531, "eval_samples_per_second": 536.153, "eval_steps_per_second": 67.062, "step": 695500 }, { "epoch": 159.52, "learning_rate": 4.9462305921388165e-05, "loss": 0.9248, "step": 696000 }, { "epoch": 159.52, "eval_loss": 1.6380454301834106, "eval_runtime": 8.751, "eval_samples_per_second": 536.279, "eval_steps_per_second": 67.078, "step": 696000 }, { "epoch": 159.64, "learning_rate": 4.946024097493694e-05, "loss": 0.9279, "step": 696500 }, { "epoch": 159.64, "eval_loss": 1.6284852027893066, "eval_runtime": 8.7182, "eval_samples_per_second": 538.3, "eval_steps_per_second": 67.331, "step": 696500 }, { "epoch": 159.75, "learning_rate": 4.94581721142675e-05, "loss": 0.9334, "step": 697000 }, { "epoch": 159.75, "eval_loss": 1.6151551008224487, "eval_runtime": 8.7227, "eval_samples_per_second": 538.023, "eval_steps_per_second": 67.296, "step": 697000 }, { "epoch": 159.87, "learning_rate": 4.945609933971089e-05, "loss": 0.9303, "step": 697500 }, { "epoch": 159.87, "eval_loss": 1.6269282102584839, "eval_runtime": 8.7163, "eval_samples_per_second": 538.418, "eval_steps_per_second": 67.345, "step": 697500 }, { "epoch": 159.98, "learning_rate": 4.945402265159882e-05, "loss": 0.9316, "step": 698000 }, { "epoch": 159.98, "eval_loss": 1.6251945495605469, "eval_runtime": 8.7203, "eval_samples_per_second": 538.167, "eval_steps_per_second": 67.314, "step": 698000 }, { "epoch": 160.1, "learning_rate": 4.9451942050263614e-05, "loss": 0.9212, "step": 698500 }, { "epoch": 160.1, "eval_loss": 1.6371206045150757, "eval_runtime": 8.7143, "eval_samples_per_second": 538.539, "eval_steps_per_second": 67.36, "step": 698500 }, { "epoch": 160.21, "learning_rate": 4.9449857536038205e-05, "loss": 0.92, "step": 699000 }, { "epoch": 160.21, "eval_loss": 1.607810378074646, "eval_runtime": 8.7179, "eval_samples_per_second": 538.318, "eval_steps_per_second": 67.333, "step": 699000 }, { "epoch": 160.33, "learning_rate": 4.944776910925616e-05, "loss": 0.9252, "step": 699500 }, { "epoch": 160.33, "eval_loss": 1.6382660865783691, "eval_runtime": 8.7088, "eval_samples_per_second": 538.879, "eval_steps_per_second": 67.403, "step": 699500 }, { "epoch": 160.44, "learning_rate": 4.944567677025168e-05, "loss": 0.9147, "step": 700000 }, { "epoch": 160.44, "eval_loss": 1.649003505706787, "eval_runtime": 8.7147, "eval_samples_per_second": 538.515, "eval_steps_per_second": 67.357, "step": 700000 }, { "epoch": 160.55, "learning_rate": 4.944358051935961e-05, "loss": 0.9235, "step": 700500 }, { "epoch": 160.55, "eval_loss": 1.6416925191879272, "eval_runtime": 8.7207, "eval_samples_per_second": 538.146, "eval_steps_per_second": 67.311, "step": 700500 }, { "epoch": 160.67, "learning_rate": 4.9441480356915374e-05, "loss": 0.9306, "step": 701000 }, { "epoch": 160.67, "eval_loss": 1.6478092670440674, "eval_runtime": 8.7238, "eval_samples_per_second": 537.954, "eval_steps_per_second": 67.287, "step": 701000 }, { "epoch": 160.78, "learning_rate": 4.9439376283255055e-05, "loss": 0.9243, "step": 701500 }, { "epoch": 160.78, "eval_loss": 1.628565788269043, "eval_runtime": 8.7266, "eval_samples_per_second": 537.782, "eval_steps_per_second": 67.266, "step": 701500 }, { "epoch": 160.9, "learning_rate": 4.943726829871536e-05, "loss": 0.9331, "step": 702000 }, { "epoch": 160.9, "eval_loss": 1.6259310245513916, "eval_runtime": 8.7198, "eval_samples_per_second": 538.199, "eval_steps_per_second": 67.318, "step": 702000 }, { "epoch": 161.01, "learning_rate": 4.943515640363361e-05, "loss": 0.93, "step": 702500 }, { "epoch": 161.01, "eval_loss": 1.618530511856079, "eval_runtime": 8.7217, "eval_samples_per_second": 538.082, "eval_steps_per_second": 67.303, "step": 702500 }, { "epoch": 161.13, "learning_rate": 4.943304059834776e-05, "loss": 0.9172, "step": 703000 }, { "epoch": 161.13, "eval_loss": 1.617070198059082, "eval_runtime": 8.7224, "eval_samples_per_second": 538.041, "eval_steps_per_second": 67.298, "step": 703000 }, { "epoch": 161.24, "learning_rate": 4.94309208831964e-05, "loss": 0.9177, "step": 703500 }, { "epoch": 161.24, "eval_loss": 1.6451877355575562, "eval_runtime": 8.7149, "eval_samples_per_second": 538.501, "eval_steps_per_second": 67.356, "step": 703500 }, { "epoch": 161.36, "learning_rate": 4.942879725851871e-05, "loss": 0.9199, "step": 704000 }, { "epoch": 161.36, "eval_loss": 1.6519441604614258, "eval_runtime": 8.7295, "eval_samples_per_second": 537.605, "eval_steps_per_second": 67.244, "step": 704000 }, { "epoch": 161.47, "learning_rate": 4.942666972465455e-05, "loss": 0.9281, "step": 704500 }, { "epoch": 161.47, "eval_loss": 1.6538742780685425, "eval_runtime": 8.7208, "eval_samples_per_second": 538.137, "eval_steps_per_second": 67.31, "step": 704500 }, { "epoch": 161.59, "learning_rate": 4.9424538281944354e-05, "loss": 0.9261, "step": 705000 }, { "epoch": 161.59, "eval_loss": 1.6498267650604248, "eval_runtime": 8.7226, "eval_samples_per_second": 538.029, "eval_steps_per_second": 67.297, "step": 705000 }, { "epoch": 161.7, "learning_rate": 4.942240293072921e-05, "loss": 0.9267, "step": 705500 }, { "epoch": 161.7, "eval_loss": 1.6136391162872314, "eval_runtime": 8.7205, "eval_samples_per_second": 538.155, "eval_steps_per_second": 67.312, "step": 705500 }, { "epoch": 161.82, "learning_rate": 4.942026367135083e-05, "loss": 0.9314, "step": 706000 }, { "epoch": 161.82, "eval_loss": 1.6355546712875366, "eval_runtime": 8.7312, "eval_samples_per_second": 537.5, "eval_steps_per_second": 67.23, "step": 706000 }, { "epoch": 161.93, "learning_rate": 4.941812050415153e-05, "loss": 0.9274, "step": 706500 }, { "epoch": 161.93, "eval_loss": 1.6154624223709106, "eval_runtime": 8.7285, "eval_samples_per_second": 537.663, "eval_steps_per_second": 67.251, "step": 706500 }, { "epoch": 162.04, "learning_rate": 4.9415973429474295e-05, "loss": 0.9257, "step": 707000 }, { "epoch": 162.04, "eval_loss": 1.6448352336883545, "eval_runtime": 8.717, "eval_samples_per_second": 538.374, "eval_steps_per_second": 67.34, "step": 707000 }, { "epoch": 162.16, "learning_rate": 4.9413822447662684e-05, "loss": 0.92, "step": 707500 }, { "epoch": 162.16, "eval_loss": 1.6627215147018433, "eval_runtime": 8.7192, "eval_samples_per_second": 538.239, "eval_steps_per_second": 67.323, "step": 707500 }, { "epoch": 162.27, "learning_rate": 4.941166755906091e-05, "loss": 0.9143, "step": 708000 }, { "epoch": 162.27, "eval_loss": 1.6437629461288452, "eval_runtime": 8.7242, "eval_samples_per_second": 537.927, "eval_steps_per_second": 67.284, "step": 708000 }, { "epoch": 162.39, "learning_rate": 4.9409508764013815e-05, "loss": 0.9176, "step": 708500 }, { "epoch": 162.39, "eval_loss": 1.6354221105575562, "eval_runtime": 8.7183, "eval_samples_per_second": 538.292, "eval_steps_per_second": 67.33, "step": 708500 }, { "epoch": 162.5, "learning_rate": 4.940734606286684e-05, "loss": 0.9259, "step": 709000 }, { "epoch": 162.5, "eval_loss": 1.631669521331787, "eval_runtime": 8.7245, "eval_samples_per_second": 537.908, "eval_steps_per_second": 67.281, "step": 709000 }, { "epoch": 162.62, "learning_rate": 4.940517945596609e-05, "loss": 0.9168, "step": 709500 }, { "epoch": 162.62, "eval_loss": 1.6407990455627441, "eval_runtime": 8.7186, "eval_samples_per_second": 538.276, "eval_steps_per_second": 67.328, "step": 709500 }, { "epoch": 162.73, "learning_rate": 4.940300894365827e-05, "loss": 0.9287, "step": 710000 }, { "epoch": 162.73, "eval_loss": 1.6311392784118652, "eval_runtime": 8.7233, "eval_samples_per_second": 537.983, "eval_steps_per_second": 67.291, "step": 710000 }, { "epoch": 162.85, "learning_rate": 4.94008345262907e-05, "loss": 0.9267, "step": 710500 }, { "epoch": 162.85, "eval_loss": 1.6295372247695923, "eval_runtime": 8.7143, "eval_samples_per_second": 538.537, "eval_steps_per_second": 67.36, "step": 710500 }, { "epoch": 162.96, "learning_rate": 4.9398656204211334e-05, "loss": 0.9274, "step": 711000 }, { "epoch": 162.96, "eval_loss": 1.6150175333023071, "eval_runtime": 8.7185, "eval_samples_per_second": 538.279, "eval_steps_per_second": 67.328, "step": 711000 }, { "epoch": 163.08, "learning_rate": 4.9396473977768776e-05, "loss": 0.914, "step": 711500 }, { "epoch": 163.08, "eval_loss": 1.6501580476760864, "eval_runtime": 8.7195, "eval_samples_per_second": 538.218, "eval_steps_per_second": 67.32, "step": 711500 }, { "epoch": 163.19, "learning_rate": 4.9394287847312224e-05, "loss": 0.9164, "step": 712000 }, { "epoch": 163.19, "eval_loss": 1.6375471353530884, "eval_runtime": 8.7245, "eval_samples_per_second": 537.912, "eval_steps_per_second": 67.282, "step": 712000 }, { "epoch": 163.31, "learning_rate": 4.939209781319152e-05, "loss": 0.9173, "step": 712500 }, { "epoch": 163.31, "eval_loss": 1.6230764389038086, "eval_runtime": 8.7169, "eval_samples_per_second": 538.38, "eval_steps_per_second": 67.341, "step": 712500 }, { "epoch": 163.42, "learning_rate": 4.9389903875757096e-05, "loss": 0.917, "step": 713000 }, { "epoch": 163.42, "eval_loss": 1.64914870262146, "eval_runtime": 8.7172, "eval_samples_per_second": 538.361, "eval_steps_per_second": 67.338, "step": 713000 }, { "epoch": 163.53, "learning_rate": 4.938770603536006e-05, "loss": 0.9193, "step": 713500 }, { "epoch": 163.53, "eval_loss": 1.629417896270752, "eval_runtime": 8.7182, "eval_samples_per_second": 538.3, "eval_steps_per_second": 67.331, "step": 713500 }, { "epoch": 163.65, "learning_rate": 4.93855042923521e-05, "loss": 0.9295, "step": 714000 }, { "epoch": 163.65, "eval_loss": 1.6367404460906982, "eval_runtime": 8.7311, "eval_samples_per_second": 537.505, "eval_steps_per_second": 67.231, "step": 714000 }, { "epoch": 163.76, "learning_rate": 4.938329864708556e-05, "loss": 0.9314, "step": 714500 }, { "epoch": 163.76, "eval_loss": 1.6301401853561401, "eval_runtime": 8.7143, "eval_samples_per_second": 538.538, "eval_steps_per_second": 67.36, "step": 714500 }, { "epoch": 163.88, "learning_rate": 4.93810890999134e-05, "loss": 0.9271, "step": 715000 }, { "epoch": 163.88, "eval_loss": 1.6401821374893188, "eval_runtime": 8.7172, "eval_samples_per_second": 538.361, "eval_steps_per_second": 67.338, "step": 715000 }, { "epoch": 163.99, "learning_rate": 4.937887565118918e-05, "loss": 0.9285, "step": 715500 }, { "epoch": 163.99, "eval_loss": 1.6622053384780884, "eval_runtime": 8.7222, "eval_samples_per_second": 538.055, "eval_steps_per_second": 67.3, "step": 715500 }, { "epoch": 164.11, "learning_rate": 4.937665830126712e-05, "loss": 0.9178, "step": 716000 }, { "epoch": 164.11, "eval_loss": 1.6444951295852661, "eval_runtime": 8.7216, "eval_samples_per_second": 538.089, "eval_steps_per_second": 67.304, "step": 716000 }, { "epoch": 164.22, "learning_rate": 4.937443705050205e-05, "loss": 0.9203, "step": 716500 }, { "epoch": 164.22, "eval_loss": 1.6301833391189575, "eval_runtime": 8.7226, "eval_samples_per_second": 538.028, "eval_steps_per_second": 67.297, "step": 716500 }, { "epoch": 164.34, "learning_rate": 4.937221189924941e-05, "loss": 0.9124, "step": 717000 }, { "epoch": 164.34, "eval_loss": 1.635278582572937, "eval_runtime": 8.9051, "eval_samples_per_second": 527.002, "eval_steps_per_second": 65.917, "step": 717000 }, { "epoch": 164.45, "learning_rate": 4.936998284786528e-05, "loss": 0.9307, "step": 717500 }, { "epoch": 164.45, "eval_loss": 1.6443490982055664, "eval_runtime": 8.6788, "eval_samples_per_second": 540.74, "eval_steps_per_second": 67.636, "step": 717500 }, { "epoch": 164.57, "learning_rate": 4.9367749896706374e-05, "loss": 0.9227, "step": 718000 }, { "epoch": 164.57, "eval_loss": 1.63548743724823, "eval_runtime": 8.687, "eval_samples_per_second": 540.233, "eval_steps_per_second": 67.572, "step": 718000 }, { "epoch": 164.68, "learning_rate": 4.936551304613e-05, "loss": 0.9211, "step": 718500 }, { "epoch": 164.68, "eval_loss": 1.6681694984436035, "eval_runtime": 8.6845, "eval_samples_per_second": 540.386, "eval_steps_per_second": 67.591, "step": 718500 }, { "epoch": 164.79, "learning_rate": 4.936327229649412e-05, "loss": 0.9249, "step": 719000 }, { "epoch": 164.79, "eval_loss": 1.6499755382537842, "eval_runtime": 8.6824, "eval_samples_per_second": 540.52, "eval_steps_per_second": 67.608, "step": 719000 }, { "epoch": 164.91, "learning_rate": 4.93610276481573e-05, "loss": 0.9249, "step": 719500 }, { "epoch": 164.91, "eval_loss": 1.6526433229446411, "eval_runtime": 8.6821, "eval_samples_per_second": 540.534, "eval_steps_per_second": 67.61, "step": 719500 }, { "epoch": 165.02, "learning_rate": 4.9358779101478737e-05, "loss": 0.9271, "step": 720000 }, { "epoch": 165.02, "eval_loss": 1.6389836072921753, "eval_runtime": 8.6858, "eval_samples_per_second": 540.306, "eval_steps_per_second": 67.581, "step": 720000 }, { "epoch": 165.14, "learning_rate": 4.935652665681826e-05, "loss": 0.9138, "step": 720500 }, { "epoch": 165.14, "eval_loss": 1.644481897354126, "eval_runtime": 8.6805, "eval_samples_per_second": 540.638, "eval_steps_per_second": 67.623, "step": 720500 }, { "epoch": 165.25, "learning_rate": 4.93542703145363e-05, "loss": 0.9116, "step": 721000 }, { "epoch": 165.25, "eval_loss": 1.6686714887619019, "eval_runtime": 8.6864, "eval_samples_per_second": 540.268, "eval_steps_per_second": 67.577, "step": 721000 }, { "epoch": 165.37, "learning_rate": 4.935201007499394e-05, "loss": 0.9173, "step": 721500 }, { "epoch": 165.37, "eval_loss": 1.6261358261108398, "eval_runtime": 8.6817, "eval_samples_per_second": 540.563, "eval_steps_per_second": 67.614, "step": 721500 }, { "epoch": 165.48, "learning_rate": 4.9349745938552854e-05, "loss": 0.9157, "step": 722000 }, { "epoch": 165.48, "eval_loss": 1.63326096534729, "eval_runtime": 8.6855, "eval_samples_per_second": 540.324, "eval_steps_per_second": 67.584, "step": 722000 }, { "epoch": 165.6, "learning_rate": 4.934747790557537e-05, "loss": 0.9191, "step": 722500 }, { "epoch": 165.6, "eval_loss": 1.6626001596450806, "eval_runtime": 8.6813, "eval_samples_per_second": 540.586, "eval_steps_per_second": 67.616, "step": 722500 }, { "epoch": 165.71, "learning_rate": 4.934520597642444e-05, "loss": 0.9251, "step": 723000 }, { "epoch": 165.71, "eval_loss": 1.6616811752319336, "eval_runtime": 8.6904, "eval_samples_per_second": 540.02, "eval_steps_per_second": 67.546, "step": 723000 }, { "epoch": 165.83, "learning_rate": 4.934293015146359e-05, "loss": 0.9194, "step": 723500 }, { "epoch": 165.83, "eval_loss": 1.6380386352539062, "eval_runtime": 8.6797, "eval_samples_per_second": 540.687, "eval_steps_per_second": 67.629, "step": 723500 }, { "epoch": 165.94, "learning_rate": 4.934065043105704e-05, "loss": 0.9277, "step": 724000 }, { "epoch": 165.94, "eval_loss": 1.645108938217163, "eval_runtime": 8.689, "eval_samples_per_second": 540.108, "eval_steps_per_second": 67.557, "step": 724000 }, { "epoch": 166.06, "learning_rate": 4.933836681556958e-05, "loss": 0.9241, "step": 724500 }, { "epoch": 166.06, "eval_loss": 1.6645920276641846, "eval_runtime": 8.6816, "eval_samples_per_second": 540.569, "eval_steps_per_second": 67.614, "step": 724500 }, { "epoch": 166.17, "learning_rate": 4.933607930536665e-05, "loss": 0.9135, "step": 725000 }, { "epoch": 166.17, "eval_loss": 1.6430836915969849, "eval_runtime": 8.6839, "eval_samples_per_second": 540.422, "eval_steps_per_second": 67.596, "step": 725000 }, { "epoch": 166.28, "learning_rate": 4.933378790081431e-05, "loss": 0.9105, "step": 725500 }, { "epoch": 166.28, "eval_loss": 1.626462459564209, "eval_runtime": 8.6796, "eval_samples_per_second": 540.693, "eval_steps_per_second": 67.63, "step": 725500 }, { "epoch": 166.4, "learning_rate": 4.933149260227924e-05, "loss": 0.9195, "step": 726000 }, { "epoch": 166.4, "eval_loss": 1.6573057174682617, "eval_runtime": 8.6845, "eval_samples_per_second": 540.389, "eval_steps_per_second": 67.592, "step": 726000 }, { "epoch": 166.51, "learning_rate": 4.932919341012873e-05, "loss": 0.9122, "step": 726500 }, { "epoch": 166.51, "eval_loss": 1.6500699520111084, "eval_runtime": 8.6768, "eval_samples_per_second": 540.868, "eval_steps_per_second": 67.652, "step": 726500 }, { "epoch": 166.63, "learning_rate": 4.9326890324730704e-05, "loss": 0.9168, "step": 727000 }, { "epoch": 166.63, "eval_loss": 1.659143328666687, "eval_runtime": 8.6813, "eval_samples_per_second": 540.585, "eval_steps_per_second": 67.616, "step": 727000 }, { "epoch": 166.74, "learning_rate": 4.932458334645373e-05, "loss": 0.919, "step": 727500 }, { "epoch": 166.74, "eval_loss": 1.6459931135177612, "eval_runtime": 8.68, "eval_samples_per_second": 540.668, "eval_steps_per_second": 67.627, "step": 727500 }, { "epoch": 166.86, "learning_rate": 4.9322272475666954e-05, "loss": 0.9236, "step": 728000 }, { "epoch": 166.86, "eval_loss": 1.6491012573242188, "eval_runtime": 8.689, "eval_samples_per_second": 540.107, "eval_steps_per_second": 67.557, "step": 728000 }, { "epoch": 166.97, "learning_rate": 4.931995771274019e-05, "loss": 0.9162, "step": 728500 }, { "epoch": 166.97, "eval_loss": 1.6559098958969116, "eval_runtime": 8.6804, "eval_samples_per_second": 540.645, "eval_steps_per_second": 67.624, "step": 728500 }, { "epoch": 167.09, "learning_rate": 4.9317639058043844e-05, "loss": 0.9122, "step": 729000 }, { "epoch": 167.09, "eval_loss": 1.6523830890655518, "eval_runtime": 8.6846, "eval_samples_per_second": 540.379, "eval_steps_per_second": 67.591, "step": 729000 }, { "epoch": 167.2, "learning_rate": 4.931531651194896e-05, "loss": 0.9138, "step": 729500 }, { "epoch": 167.2, "eval_loss": 1.664495587348938, "eval_runtime": 8.6833, "eval_samples_per_second": 540.465, "eval_steps_per_second": 67.601, "step": 729500 }, { "epoch": 167.32, "learning_rate": 4.93129900748272e-05, "loss": 0.9149, "step": 730000 }, { "epoch": 167.32, "eval_loss": 1.6371345520019531, "eval_runtime": 8.6853, "eval_samples_per_second": 540.336, "eval_steps_per_second": 67.585, "step": 730000 }, { "epoch": 167.43, "learning_rate": 4.9310659747050855e-05, "loss": 0.9204, "step": 730500 }, { "epoch": 167.43, "eval_loss": 1.638217568397522, "eval_runtime": 8.6814, "eval_samples_per_second": 540.584, "eval_steps_per_second": 67.616, "step": 730500 }, { "epoch": 167.55, "learning_rate": 4.930832552899283e-05, "loss": 0.9172, "step": 731000 }, { "epoch": 167.55, "eval_loss": 1.63120436668396, "eval_runtime": 8.6833, "eval_samples_per_second": 540.462, "eval_steps_per_second": 67.601, "step": 731000 }, { "epoch": 167.66, "learning_rate": 4.930598742102665e-05, "loss": 0.9254, "step": 731500 }, { "epoch": 167.66, "eval_loss": 1.6340843439102173, "eval_runtime": 8.6785, "eval_samples_per_second": 540.761, "eval_steps_per_second": 67.638, "step": 731500 }, { "epoch": 167.77, "learning_rate": 4.930364542352646e-05, "loss": 0.9238, "step": 732000 }, { "epoch": 167.77, "eval_loss": 1.6366422176361084, "eval_runtime": 8.6825, "eval_samples_per_second": 540.515, "eval_steps_per_second": 67.608, "step": 732000 }, { "epoch": 167.89, "learning_rate": 4.930129953686705e-05, "loss": 0.9209, "step": 732500 }, { "epoch": 167.89, "eval_loss": 1.6494766473770142, "eval_runtime": 8.6822, "eval_samples_per_second": 540.529, "eval_steps_per_second": 67.609, "step": 732500 }, { "epoch": 168.0, "learning_rate": 4.9298949761423806e-05, "loss": 0.9182, "step": 733000 }, { "epoch": 168.0, "eval_loss": 1.66122567653656, "eval_runtime": 8.6868, "eval_samples_per_second": 540.245, "eval_steps_per_second": 67.574, "step": 733000 }, { "epoch": 168.12, "learning_rate": 4.929659609757277e-05, "loss": 0.9098, "step": 733500 }, { "epoch": 168.12, "eval_loss": 1.6361843347549438, "eval_runtime": 8.6792, "eval_samples_per_second": 540.718, "eval_steps_per_second": 67.633, "step": 733500 }, { "epoch": 168.23, "learning_rate": 4.929423854569055e-05, "loss": 0.9073, "step": 734000 }, { "epoch": 168.23, "eval_loss": 1.6358363628387451, "eval_runtime": 8.6809, "eval_samples_per_second": 540.613, "eval_steps_per_second": 67.62, "step": 734000 }, { "epoch": 168.35, "learning_rate": 4.9291877106154437e-05, "loss": 0.9219, "step": 734500 }, { "epoch": 168.35, "eval_loss": 1.672404408454895, "eval_runtime": 8.6738, "eval_samples_per_second": 541.054, "eval_steps_per_second": 67.675, "step": 734500 }, { "epoch": 168.46, "learning_rate": 4.928951177934231e-05, "loss": 0.9075, "step": 735000 }, { "epoch": 168.46, "eval_loss": 1.647304892539978, "eval_runtime": 8.678, "eval_samples_per_second": 540.791, "eval_steps_per_second": 67.642, "step": 735000 }, { "epoch": 168.58, "learning_rate": 4.928714256563267e-05, "loss": 0.9169, "step": 735500 }, { "epoch": 168.58, "eval_loss": 1.644588828086853, "eval_runtime": 8.6795, "eval_samples_per_second": 540.698, "eval_steps_per_second": 67.63, "step": 735500 }, { "epoch": 168.69, "learning_rate": 4.928476946540466e-05, "loss": 0.9201, "step": 736000 }, { "epoch": 168.69, "eval_loss": 1.6438888311386108, "eval_runtime": 8.6805, "eval_samples_per_second": 540.635, "eval_steps_per_second": 67.623, "step": 736000 }, { "epoch": 168.81, "learning_rate": 4.928239247903802e-05, "loss": 0.9195, "step": 736500 }, { "epoch": 168.81, "eval_loss": 1.6421072483062744, "eval_runtime": 8.677, "eval_samples_per_second": 540.853, "eval_steps_per_second": 67.65, "step": 736500 }, { "epoch": 168.92, "learning_rate": 4.928001160691313e-05, "loss": 0.9248, "step": 737000 }, { "epoch": 168.92, "eval_loss": 1.6686142683029175, "eval_runtime": 8.6844, "eval_samples_per_second": 540.394, "eval_steps_per_second": 67.592, "step": 737000 }, { "epoch": 169.04, "learning_rate": 4.9277626849410984e-05, "loss": 0.9148, "step": 737500 }, { "epoch": 169.04, "eval_loss": 1.6703970432281494, "eval_runtime": 8.6807, "eval_samples_per_second": 540.624, "eval_steps_per_second": 67.621, "step": 737500 }, { "epoch": 169.15, "learning_rate": 4.927523820691321e-05, "loss": 0.9062, "step": 738000 }, { "epoch": 169.15, "eval_loss": 1.6505409479141235, "eval_runtime": 8.6809, "eval_samples_per_second": 540.61, "eval_steps_per_second": 67.619, "step": 738000 }, { "epoch": 169.26, "learning_rate": 4.927284567980203e-05, "loss": 0.9065, "step": 738500 }, { "epoch": 169.26, "eval_loss": 1.6639463901519775, "eval_runtime": 8.6786, "eval_samples_per_second": 540.758, "eval_steps_per_second": 67.638, "step": 738500 }, { "epoch": 169.38, "learning_rate": 4.927044926846032e-05, "loss": 0.9169, "step": 739000 }, { "epoch": 169.38, "eval_loss": 1.6552937030792236, "eval_runtime": 8.6853, "eval_samples_per_second": 540.34, "eval_steps_per_second": 67.586, "step": 739000 }, { "epoch": 169.49, "learning_rate": 4.9268048973271554e-05, "loss": 0.9132, "step": 739500 }, { "epoch": 169.49, "eval_loss": 1.6481246948242188, "eval_runtime": 8.6817, "eval_samples_per_second": 540.564, "eval_steps_per_second": 67.614, "step": 739500 }, { "epoch": 169.61, "learning_rate": 4.9265644794619834e-05, "loss": 0.9156, "step": 740000 }, { "epoch": 169.61, "eval_loss": 1.6428545713424683, "eval_runtime": 8.6843, "eval_samples_per_second": 540.401, "eval_steps_per_second": 67.593, "step": 740000 }, { "epoch": 169.72, "learning_rate": 4.9263236732889896e-05, "loss": 0.9228, "step": 740500 }, { "epoch": 169.72, "eval_loss": 1.6644138097763062, "eval_runtime": 8.6858, "eval_samples_per_second": 540.307, "eval_steps_per_second": 67.582, "step": 740500 }, { "epoch": 169.84, "learning_rate": 4.926082478846708e-05, "loss": 0.9154, "step": 741000 }, { "epoch": 169.84, "eval_loss": 1.632447361946106, "eval_runtime": 8.6844, "eval_samples_per_second": 540.396, "eval_steps_per_second": 67.593, "step": 741000 }, { "epoch": 169.95, "learning_rate": 4.925840896173735e-05, "loss": 0.9219, "step": 741500 }, { "epoch": 169.95, "eval_loss": 1.6540906429290771, "eval_runtime": 8.6812, "eval_samples_per_second": 540.592, "eval_steps_per_second": 67.617, "step": 741500 }, { "epoch": 170.07, "learning_rate": 4.92559892530873e-05, "loss": 0.9124, "step": 742000 }, { "epoch": 170.07, "eval_loss": 1.671669363975525, "eval_runtime": 8.6838, "eval_samples_per_second": 540.431, "eval_steps_per_second": 67.597, "step": 742000 }, { "epoch": 170.18, "learning_rate": 4.925356566290414e-05, "loss": 0.9064, "step": 742500 }, { "epoch": 170.18, "eval_loss": 1.6690418720245361, "eval_runtime": 8.684, "eval_samples_per_second": 540.418, "eval_steps_per_second": 67.595, "step": 742500 }, { "epoch": 170.3, "learning_rate": 4.9251138191575716e-05, "loss": 0.9091, "step": 743000 }, { "epoch": 170.3, "eval_loss": 1.6464498043060303, "eval_runtime": 8.682, "eval_samples_per_second": 540.546, "eval_steps_per_second": 67.611, "step": 743000 }, { "epoch": 170.41, "learning_rate": 4.924870683949046e-05, "loss": 0.9062, "step": 743500 }, { "epoch": 170.41, "eval_loss": 1.644849419593811, "eval_runtime": 8.6785, "eval_samples_per_second": 540.761, "eval_steps_per_second": 67.638, "step": 743500 }, { "epoch": 170.52, "learning_rate": 4.9246271607037455e-05, "loss": 0.9124, "step": 744000 }, { "epoch": 170.52, "eval_loss": 1.652997612953186, "eval_runtime": 8.6847, "eval_samples_per_second": 540.375, "eval_steps_per_second": 67.59, "step": 744000 }, { "epoch": 170.64, "learning_rate": 4.9243832494606396e-05, "loss": 0.9127, "step": 744500 }, { "epoch": 170.64, "eval_loss": 1.6735320091247559, "eval_runtime": 8.6767, "eval_samples_per_second": 540.876, "eval_steps_per_second": 67.653, "step": 744500 }, { "epoch": 170.75, "learning_rate": 4.92413895025876e-05, "loss": 0.919, "step": 745000 }, { "epoch": 170.75, "eval_loss": 1.6916577816009521, "eval_runtime": 8.6825, "eval_samples_per_second": 540.511, "eval_steps_per_second": 67.607, "step": 745000 }, { "epoch": 170.87, "learning_rate": 4.923894263137199e-05, "loss": 0.9174, "step": 745500 }, { "epoch": 170.87, "eval_loss": 1.660901665687561, "eval_runtime": 8.6839, "eval_samples_per_second": 540.425, "eval_steps_per_second": 67.596, "step": 745500 }, { "epoch": 170.98, "learning_rate": 4.923649188135114e-05, "loss": 0.9222, "step": 746000 }, { "epoch": 170.98, "eval_loss": 1.6569604873657227, "eval_runtime": 8.6876, "eval_samples_per_second": 540.195, "eval_steps_per_second": 67.568, "step": 746000 }, { "epoch": 171.1, "learning_rate": 4.923403725291723e-05, "loss": 0.912, "step": 746500 }, { "epoch": 171.1, "eval_loss": 1.654901146888733, "eval_runtime": 8.682, "eval_samples_per_second": 540.546, "eval_steps_per_second": 67.611, "step": 746500 }, { "epoch": 171.21, "learning_rate": 4.923157874646305e-05, "loss": 0.9128, "step": 747000 }, { "epoch": 171.21, "eval_loss": 1.6525744199752808, "eval_runtime": 8.6829, "eval_samples_per_second": 540.487, "eval_steps_per_second": 67.604, "step": 747000 }, { "epoch": 171.33, "learning_rate": 4.922911636238202e-05, "loss": 0.9041, "step": 747500 }, { "epoch": 171.33, "eval_loss": 1.66651451587677, "eval_runtime": 8.6757, "eval_samples_per_second": 540.935, "eval_steps_per_second": 67.66, "step": 747500 }, { "epoch": 171.44, "learning_rate": 4.922665010106818e-05, "loss": 0.9115, "step": 748000 }, { "epoch": 171.44, "eval_loss": 1.6477844715118408, "eval_runtime": 8.6839, "eval_samples_per_second": 540.425, "eval_steps_per_second": 67.596, "step": 748000 }, { "epoch": 171.56, "learning_rate": 4.922417996291619e-05, "loss": 0.9132, "step": 748500 }, { "epoch": 171.56, "eval_loss": 1.6402941942214966, "eval_runtime": 8.6798, "eval_samples_per_second": 540.683, "eval_steps_per_second": 67.629, "step": 748500 }, { "epoch": 171.67, "learning_rate": 4.922170594832133e-05, "loss": 0.9142, "step": 749000 }, { "epoch": 171.67, "eval_loss": 1.6890596151351929, "eval_runtime": 8.6856, "eval_samples_per_second": 540.317, "eval_steps_per_second": 67.583, "step": 749000 }, { "epoch": 171.79, "learning_rate": 4.92192280576795e-05, "loss": 0.9163, "step": 749500 }, { "epoch": 171.79, "eval_loss": 1.6607346534729004, "eval_runtime": 8.6758, "eval_samples_per_second": 540.932, "eval_steps_per_second": 67.66, "step": 749500 }, { "epoch": 171.9, "learning_rate": 4.921674629138723e-05, "loss": 0.9182, "step": 750000 }, { "epoch": 171.9, "eval_loss": 1.6763983964920044, "eval_runtime": 8.6801, "eval_samples_per_second": 540.661, "eval_steps_per_second": 67.626, "step": 750000 }, { "epoch": 172.01, "learning_rate": 4.921426064984166e-05, "loss": 0.9219, "step": 750500 }, { "epoch": 172.01, "eval_loss": 1.6463663578033447, "eval_runtime": 8.6846, "eval_samples_per_second": 540.385, "eval_steps_per_second": 67.591, "step": 750500 }, { "epoch": 172.13, "learning_rate": 4.9211771133440536e-05, "loss": 0.9069, "step": 751000 }, { "epoch": 172.13, "eval_loss": 1.648045301437378, "eval_runtime": 8.6841, "eval_samples_per_second": 540.414, "eval_steps_per_second": 67.595, "step": 751000 }, { "epoch": 172.24, "learning_rate": 4.920927774258225e-05, "loss": 0.9082, "step": 751500 }, { "epoch": 172.24, "eval_loss": 1.6647495031356812, "eval_runtime": 8.6817, "eval_samples_per_second": 540.565, "eval_steps_per_second": 67.614, "step": 751500 }, { "epoch": 172.36, "learning_rate": 4.9206780477665816e-05, "loss": 0.9079, "step": 752000 }, { "epoch": 172.36, "eval_loss": 1.6390622854232788, "eval_runtime": 8.6789, "eval_samples_per_second": 540.737, "eval_steps_per_second": 67.635, "step": 752000 }, { "epoch": 172.47, "learning_rate": 4.920427933909084e-05, "loss": 0.9172, "step": 752500 }, { "epoch": 172.47, "eval_loss": 1.6583935022354126, "eval_runtime": 8.6847, "eval_samples_per_second": 540.373, "eval_steps_per_second": 67.59, "step": 752500 }, { "epoch": 172.59, "learning_rate": 4.920177432725755e-05, "loss": 0.9123, "step": 753000 }, { "epoch": 172.59, "eval_loss": 1.6534074544906616, "eval_runtime": 8.6901, "eval_samples_per_second": 540.038, "eval_steps_per_second": 67.548, "step": 753000 }, { "epoch": 172.7, "learning_rate": 4.919926544256685e-05, "loss": 0.913, "step": 753500 }, { "epoch": 172.7, "eval_loss": 1.6492379903793335, "eval_runtime": 8.6812, "eval_samples_per_second": 540.594, "eval_steps_per_second": 67.617, "step": 753500 }, { "epoch": 172.82, "learning_rate": 4.919675268542018e-05, "loss": 0.9155, "step": 754000 }, { "epoch": 172.82, "eval_loss": 1.6788179874420166, "eval_runtime": 8.683, "eval_samples_per_second": 540.481, "eval_steps_per_second": 67.603, "step": 754000 }, { "epoch": 172.93, "learning_rate": 4.919423605621965e-05, "loss": 0.9159, "step": 754500 }, { "epoch": 172.93, "eval_loss": 1.6345502138137817, "eval_runtime": 8.6805, "eval_samples_per_second": 540.636, "eval_steps_per_second": 67.623, "step": 754500 }, { "epoch": 173.05, "learning_rate": 4.919171555536801e-05, "loss": 0.9136, "step": 755000 }, { "epoch": 173.05, "eval_loss": 1.6496586799621582, "eval_runtime": 8.6846, "eval_samples_per_second": 540.38, "eval_steps_per_second": 67.591, "step": 755000 }, { "epoch": 173.16, "learning_rate": 4.918919118326856e-05, "loss": 0.9076, "step": 755500 }, { "epoch": 173.16, "eval_loss": 1.6290382146835327, "eval_runtime": 8.6827, "eval_samples_per_second": 540.502, "eval_steps_per_second": 67.606, "step": 755500 }, { "epoch": 173.28, "learning_rate": 4.918666294032528e-05, "loss": 0.9113, "step": 756000 }, { "epoch": 173.28, "eval_loss": 1.6795768737792969, "eval_runtime": 8.6848, "eval_samples_per_second": 540.372, "eval_steps_per_second": 67.59, "step": 756000 }, { "epoch": 173.39, "learning_rate": 4.918413082694274e-05, "loss": 0.9105, "step": 756500 }, { "epoch": 173.39, "eval_loss": 1.6434791088104248, "eval_runtime": 8.6796, "eval_samples_per_second": 540.693, "eval_steps_per_second": 67.63, "step": 756500 }, { "epoch": 173.5, "learning_rate": 4.918159484352614e-05, "loss": 0.9134, "step": 757000 }, { "epoch": 173.5, "eval_loss": 1.6538938283920288, "eval_runtime": 8.6848, "eval_samples_per_second": 540.368, "eval_steps_per_second": 67.589, "step": 757000 }, { "epoch": 173.62, "learning_rate": 4.91790549904813e-05, "loss": 0.917, "step": 757500 }, { "epoch": 173.62, "eval_loss": 1.6571403741836548, "eval_runtime": 8.6869, "eval_samples_per_second": 540.239, "eval_steps_per_second": 67.573, "step": 757500 }, { "epoch": 173.73, "learning_rate": 4.9176511268214666e-05, "loss": 0.9118, "step": 758000 }, { "epoch": 173.73, "eval_loss": 1.6422178745269775, "eval_runtime": 8.6871, "eval_samples_per_second": 540.226, "eval_steps_per_second": 67.571, "step": 758000 }, { "epoch": 173.85, "learning_rate": 4.917396367713328e-05, "loss": 0.9146, "step": 758500 }, { "epoch": 173.85, "eval_loss": 1.6653181314468384, "eval_runtime": 8.681, "eval_samples_per_second": 540.606, "eval_steps_per_second": 67.619, "step": 758500 }, { "epoch": 173.96, "learning_rate": 4.917141221764482e-05, "loss": 0.9152, "step": 759000 }, { "epoch": 173.96, "eval_loss": 1.665768027305603, "eval_runtime": 8.6876, "eval_samples_per_second": 540.195, "eval_steps_per_second": 67.568, "step": 759000 }, { "epoch": 174.08, "learning_rate": 4.9168856890157586e-05, "loss": 0.9027, "step": 759500 }, { "epoch": 174.08, "eval_loss": 1.6751540899276733, "eval_runtime": 8.6845, "eval_samples_per_second": 540.385, "eval_steps_per_second": 67.591, "step": 759500 }, { "epoch": 174.19, "learning_rate": 4.916629769508048e-05, "loss": 0.9037, "step": 760000 }, { "epoch": 174.19, "eval_loss": 1.6840142011642456, "eval_runtime": 8.6879, "eval_samples_per_second": 540.175, "eval_steps_per_second": 67.565, "step": 760000 }, { "epoch": 174.31, "learning_rate": 4.916373463282304e-05, "loss": 0.9038, "step": 760500 }, { "epoch": 174.31, "eval_loss": 1.6468058824539185, "eval_runtime": 8.6817, "eval_samples_per_second": 540.565, "eval_steps_per_second": 67.614, "step": 760500 }, { "epoch": 174.42, "learning_rate": 4.9161167703795416e-05, "loss": 0.9145, "step": 761000 }, { "epoch": 174.42, "eval_loss": 1.665911316871643, "eval_runtime": 8.6876, "eval_samples_per_second": 540.198, "eval_steps_per_second": 67.568, "step": 761000 }, { "epoch": 174.54, "learning_rate": 4.915859690840839e-05, "loss": 0.9116, "step": 761500 }, { "epoch": 174.54, "eval_loss": 1.6562578678131104, "eval_runtime": 8.6832, "eval_samples_per_second": 540.47, "eval_steps_per_second": 67.602, "step": 761500 }, { "epoch": 174.65, "learning_rate": 4.915602224707333e-05, "loss": 0.9118, "step": 762000 }, { "epoch": 174.65, "eval_loss": 1.6351759433746338, "eval_runtime": 8.6835, "eval_samples_per_second": 540.452, "eval_steps_per_second": 67.6, "step": 762000 }, { "epoch": 174.76, "learning_rate": 4.915344372020225e-05, "loss": 0.9101, "step": 762500 }, { "epoch": 174.76, "eval_loss": 1.6540948152542114, "eval_runtime": 8.6788, "eval_samples_per_second": 540.743, "eval_steps_per_second": 67.636, "step": 762500 }, { "epoch": 174.88, "learning_rate": 4.915086132820778e-05, "loss": 0.9132, "step": 763000 }, { "epoch": 174.88, "eval_loss": 1.6511765718460083, "eval_runtime": 8.68, "eval_samples_per_second": 540.667, "eval_steps_per_second": 67.627, "step": 763000 }, { "epoch": 174.99, "learning_rate": 4.914827507150316e-05, "loss": 0.9121, "step": 763500 }, { "epoch": 174.99, "eval_loss": 1.650230884552002, "eval_runtime": 8.6813, "eval_samples_per_second": 540.59, "eval_steps_per_second": 67.617, "step": 763500 }, { "epoch": 175.11, "learning_rate": 4.914568495050226e-05, "loss": 0.9045, "step": 764000 }, { "epoch": 175.11, "eval_loss": 1.6501152515411377, "eval_runtime": 8.6832, "eval_samples_per_second": 540.467, "eval_steps_per_second": 67.602, "step": 764000 }, { "epoch": 175.22, "learning_rate": 4.9143090965619545e-05, "loss": 0.9068, "step": 764500 }, { "epoch": 175.22, "eval_loss": 1.6733145713806152, "eval_runtime": 8.68, "eval_samples_per_second": 540.668, "eval_steps_per_second": 67.627, "step": 764500 }, { "epoch": 175.34, "learning_rate": 4.914049311727012e-05, "loss": 0.9087, "step": 765000 }, { "epoch": 175.34, "eval_loss": 1.6688320636749268, "eval_runtime": 8.6871, "eval_samples_per_second": 540.224, "eval_steps_per_second": 67.571, "step": 765000 }, { "epoch": 175.45, "learning_rate": 4.9137891405869706e-05, "loss": 0.9047, "step": 765500 }, { "epoch": 175.45, "eval_loss": 1.6443731784820557, "eval_runtime": 8.6785, "eval_samples_per_second": 540.764, "eval_steps_per_second": 67.639, "step": 765500 }, { "epoch": 175.57, "learning_rate": 4.9135285831834645e-05, "loss": 0.9085, "step": 766000 }, { "epoch": 175.57, "eval_loss": 1.6391245126724243, "eval_runtime": 8.6814, "eval_samples_per_second": 540.579, "eval_steps_per_second": 67.616, "step": 766000 }, { "epoch": 175.68, "learning_rate": 4.9132676395581875e-05, "loss": 0.9086, "step": 766500 }, { "epoch": 175.68, "eval_loss": 1.6602202653884888, "eval_runtime": 8.6775, "eval_samples_per_second": 540.825, "eval_steps_per_second": 67.646, "step": 766500 }, { "epoch": 175.8, "learning_rate": 4.913006309752899e-05, "loss": 0.9129, "step": 767000 }, { "epoch": 175.8, "eval_loss": 1.6747815608978271, "eval_runtime": 8.6793, "eval_samples_per_second": 540.71, "eval_steps_per_second": 67.632, "step": 767000 }, { "epoch": 175.91, "learning_rate": 4.912744593809415e-05, "loss": 0.9143, "step": 767500 }, { "epoch": 175.91, "eval_loss": 1.6415234804153442, "eval_runtime": 8.6807, "eval_samples_per_second": 540.626, "eval_steps_per_second": 67.621, "step": 767500 }, { "epoch": 176.03, "learning_rate": 4.912482491769619e-05, "loss": 0.9141, "step": 768000 }, { "epoch": 176.03, "eval_loss": 1.6749205589294434, "eval_runtime": 8.6815, "eval_samples_per_second": 540.574, "eval_steps_per_second": 67.615, "step": 768000 }, { "epoch": 176.14, "learning_rate": 4.912220003675452e-05, "loss": 0.8929, "step": 768500 }, { "epoch": 176.14, "eval_loss": 1.680568814277649, "eval_runtime": 8.6776, "eval_samples_per_second": 540.816, "eval_steps_per_second": 67.645, "step": 768500 }, { "epoch": 176.25, "learning_rate": 4.9119571295689184e-05, "loss": 0.8993, "step": 769000 }, { "epoch": 176.25, "eval_loss": 1.6770433187484741, "eval_runtime": 8.6837, "eval_samples_per_second": 540.441, "eval_steps_per_second": 67.598, "step": 769000 }, { "epoch": 176.37, "learning_rate": 4.911693869492085e-05, "loss": 0.9036, "step": 769500 }, { "epoch": 176.37, "eval_loss": 1.6618636846542358, "eval_runtime": 8.6797, "eval_samples_per_second": 540.687, "eval_steps_per_second": 67.629, "step": 769500 }, { "epoch": 176.48, "learning_rate": 4.911430223487078e-05, "loss": 0.9067, "step": 770000 }, { "epoch": 176.48, "eval_loss": 1.6625287532806396, "eval_runtime": 8.6839, "eval_samples_per_second": 540.424, "eval_steps_per_second": 67.596, "step": 770000 }, { "epoch": 176.6, "learning_rate": 4.911166191596089e-05, "loss": 0.9153, "step": 770500 }, { "epoch": 176.6, "eval_loss": 1.660049319267273, "eval_runtime": 8.6813, "eval_samples_per_second": 540.585, "eval_steps_per_second": 67.616, "step": 770500 }, { "epoch": 176.71, "learning_rate": 4.9109017738613686e-05, "loss": 0.9111, "step": 771000 }, { "epoch": 176.71, "eval_loss": 1.651343822479248, "eval_runtime": 8.6833, "eval_samples_per_second": 540.463, "eval_steps_per_second": 67.601, "step": 771000 }, { "epoch": 176.83, "learning_rate": 4.91063697032523e-05, "loss": 0.9147, "step": 771500 }, { "epoch": 176.83, "eval_loss": 1.6689095497131348, "eval_runtime": 8.6765, "eval_samples_per_second": 540.887, "eval_steps_per_second": 67.654, "step": 771500 }, { "epoch": 176.94, "learning_rate": 4.910371781030048e-05, "loss": 0.9194, "step": 772000 }, { "epoch": 176.94, "eval_loss": 1.6498600244522095, "eval_runtime": 8.6789, "eval_samples_per_second": 540.739, "eval_steps_per_second": 67.636, "step": 772000 }, { "epoch": 177.06, "learning_rate": 4.910106206018258e-05, "loss": 0.9122, "step": 772500 }, { "epoch": 177.06, "eval_loss": 1.666555404663086, "eval_runtime": 8.6772, "eval_samples_per_second": 540.84, "eval_steps_per_second": 67.648, "step": 772500 }, { "epoch": 177.17, "learning_rate": 4.9098402453323607e-05, "loss": 0.9011, "step": 773000 }, { "epoch": 177.17, "eval_loss": 1.667802333831787, "eval_runtime": 8.6856, "eval_samples_per_second": 540.322, "eval_steps_per_second": 67.583, "step": 773000 }, { "epoch": 177.29, "learning_rate": 4.909573899014914e-05, "loss": 0.9026, "step": 773500 }, { "epoch": 177.29, "eval_loss": 1.6519498825073242, "eval_runtime": 8.6818, "eval_samples_per_second": 540.556, "eval_steps_per_second": 67.613, "step": 773500 }, { "epoch": 177.4, "learning_rate": 4.9093071671085413e-05, "loss": 0.9097, "step": 774000 }, { "epoch": 177.4, "eval_loss": 1.663029432296753, "eval_runtime": 8.6832, "eval_samples_per_second": 540.467, "eval_steps_per_second": 67.602, "step": 774000 }, { "epoch": 177.52, "learning_rate": 4.909040049655924e-05, "loss": 0.9033, "step": 774500 }, { "epoch": 177.52, "eval_loss": 1.662366271018982, "eval_runtime": 8.6811, "eval_samples_per_second": 540.601, "eval_steps_per_second": 67.618, "step": 774500 }, { "epoch": 177.63, "learning_rate": 4.908772546699809e-05, "loss": 0.9021, "step": 775000 }, { "epoch": 177.63, "eval_loss": 1.662260890007019, "eval_runtime": 8.6868, "eval_samples_per_second": 540.248, "eval_steps_per_second": 67.574, "step": 775000 }, { "epoch": 177.74, "learning_rate": 4.908504658283002e-05, "loss": 0.9105, "step": 775500 }, { "epoch": 177.74, "eval_loss": 1.652695655822754, "eval_runtime": 8.6827, "eval_samples_per_second": 540.499, "eval_steps_per_second": 67.606, "step": 775500 }, { "epoch": 177.86, "learning_rate": 4.9082363844483725e-05, "loss": 0.9174, "step": 776000 }, { "epoch": 177.86, "eval_loss": 1.6802083253860474, "eval_runtime": 8.6825, "eval_samples_per_second": 540.51, "eval_steps_per_second": 67.607, "step": 776000 }, { "epoch": 177.97, "learning_rate": 4.9079677252388504e-05, "loss": 0.9095, "step": 776500 }, { "epoch": 177.97, "eval_loss": 1.6605027914047241, "eval_runtime": 8.6768, "eval_samples_per_second": 540.87, "eval_steps_per_second": 67.652, "step": 776500 }, { "epoch": 178.09, "learning_rate": 4.9076986806974265e-05, "loss": 0.9005, "step": 777000 }, { "epoch": 178.09, "eval_loss": 1.6690033674240112, "eval_runtime": 8.6819, "eval_samples_per_second": 540.547, "eval_steps_per_second": 67.612, "step": 777000 }, { "epoch": 178.2, "learning_rate": 4.907429250867156e-05, "loss": 0.8973, "step": 777500 }, { "epoch": 178.2, "eval_loss": 1.6821162700653076, "eval_runtime": 8.683, "eval_samples_per_second": 540.484, "eval_steps_per_second": 67.604, "step": 777500 }, { "epoch": 178.32, "learning_rate": 4.907159435791152e-05, "loss": 0.902, "step": 778000 }, { "epoch": 178.32, "eval_loss": 1.6699914932250977, "eval_runtime": 8.6816, "eval_samples_per_second": 540.569, "eval_steps_per_second": 67.614, "step": 778000 }, { "epoch": 178.43, "learning_rate": 4.9068892355125934e-05, "loss": 0.9054, "step": 778500 }, { "epoch": 178.43, "eval_loss": 1.6767325401306152, "eval_runtime": 8.675, "eval_samples_per_second": 540.98, "eval_steps_per_second": 67.666, "step": 778500 }, { "epoch": 178.55, "learning_rate": 4.906618650074717e-05, "loss": 0.9032, "step": 779000 }, { "epoch": 178.55, "eval_loss": 1.6645625829696655, "eval_runtime": 8.6828, "eval_samples_per_second": 540.496, "eval_steps_per_second": 67.605, "step": 779000 }, { "epoch": 178.66, "learning_rate": 4.906347679520824e-05, "loss": 0.9067, "step": 779500 }, { "epoch": 178.66, "eval_loss": 1.645733118057251, "eval_runtime": 8.684, "eval_samples_per_second": 540.417, "eval_steps_per_second": 67.595, "step": 779500 }, { "epoch": 178.78, "learning_rate": 4.906076323894276e-05, "loss": 0.9105, "step": 780000 }, { "epoch": 178.78, "eval_loss": 1.6671504974365234, "eval_runtime": 8.6849, "eval_samples_per_second": 540.363, "eval_steps_per_second": 67.589, "step": 780000 }, { "epoch": 178.89, "learning_rate": 4.9058045832384956e-05, "loss": 0.9063, "step": 780500 }, { "epoch": 178.89, "eval_loss": 1.661684513092041, "eval_runtime": 8.6813, "eval_samples_per_second": 540.585, "eval_steps_per_second": 67.616, "step": 780500 }, { "epoch": 179.01, "learning_rate": 4.905532457596969e-05, "loss": 0.917, "step": 781000 }, { "epoch": 179.01, "eval_loss": 1.6590471267700195, "eval_runtime": 8.6861, "eval_samples_per_second": 540.286, "eval_steps_per_second": 67.579, "step": 781000 }, { "epoch": 179.12, "learning_rate": 4.9052599470132406e-05, "loss": 0.9014, "step": 781500 }, { "epoch": 179.12, "eval_loss": 1.6694759130477905, "eval_runtime": 8.6817, "eval_samples_per_second": 540.563, "eval_steps_per_second": 67.614, "step": 781500 }, { "epoch": 179.23, "learning_rate": 4.90498705153092e-05, "loss": 0.9082, "step": 782000 }, { "epoch": 179.23, "eval_loss": 1.679898738861084, "eval_runtime": 8.6911, "eval_samples_per_second": 539.979, "eval_steps_per_second": 67.541, "step": 782000 }, { "epoch": 179.35, "learning_rate": 4.904713771193678e-05, "loss": 0.9112, "step": 782500 }, { "epoch": 179.35, "eval_loss": 1.6572821140289307, "eval_runtime": 8.6816, "eval_samples_per_second": 540.568, "eval_steps_per_second": 67.614, "step": 782500 }, { "epoch": 179.46, "learning_rate": 4.9044401060452433e-05, "loss": 0.9006, "step": 783000 }, { "epoch": 179.46, "eval_loss": 1.6738203763961792, "eval_runtime": 8.6815, "eval_samples_per_second": 540.575, "eval_steps_per_second": 67.615, "step": 783000 }, { "epoch": 179.58, "learning_rate": 4.904166056129411e-05, "loss": 0.9014, "step": 783500 }, { "epoch": 179.58, "eval_loss": 1.6704180240631104, "eval_runtime": 8.686, "eval_samples_per_second": 540.292, "eval_steps_per_second": 67.58, "step": 783500 }, { "epoch": 179.69, "learning_rate": 4.903891621490034e-05, "loss": 0.9097, "step": 784000 }, { "epoch": 179.69, "eval_loss": 1.6679461002349854, "eval_runtime": 8.6855, "eval_samples_per_second": 540.329, "eval_steps_per_second": 67.584, "step": 784000 }, { "epoch": 179.81, "learning_rate": 4.9036168021710304e-05, "loss": 0.9067, "step": 784500 }, { "epoch": 179.81, "eval_loss": 1.6754196882247925, "eval_runtime": 8.6831, "eval_samples_per_second": 540.478, "eval_steps_per_second": 67.603, "step": 784500 }, { "epoch": 179.92, "learning_rate": 4.9033415982163764e-05, "loss": 0.9087, "step": 785000 }, { "epoch": 179.92, "eval_loss": 1.6676478385925293, "eval_runtime": 8.685, "eval_samples_per_second": 540.36, "eval_steps_per_second": 67.588, "step": 785000 }, { "epoch": 180.04, "learning_rate": 4.903066009670111e-05, "loss": 0.9, "step": 785500 }, { "epoch": 180.04, "eval_loss": 1.651205062866211, "eval_runtime": 8.6809, "eval_samples_per_second": 540.611, "eval_steps_per_second": 67.62, "step": 785500 }, { "epoch": 180.15, "learning_rate": 4.9027900365763354e-05, "loss": 0.897, "step": 786000 }, { "epoch": 180.15, "eval_loss": 1.6560752391815186, "eval_runtime": 8.6833, "eval_samples_per_second": 540.465, "eval_steps_per_second": 67.601, "step": 786000 }, { "epoch": 180.27, "learning_rate": 4.902513678979211e-05, "loss": 0.8959, "step": 786500 }, { "epoch": 180.27, "eval_loss": 1.6730626821517944, "eval_runtime": 8.6822, "eval_samples_per_second": 540.53, "eval_steps_per_second": 67.609, "step": 786500 }, { "epoch": 180.38, "learning_rate": 4.902236936922963e-05, "loss": 0.9067, "step": 787000 }, { "epoch": 180.38, "eval_loss": 1.6604429483413696, "eval_runtime": 8.6938, "eval_samples_per_second": 539.807, "eval_steps_per_second": 67.519, "step": 787000 }, { "epoch": 180.5, "learning_rate": 4.901959810451876e-05, "loss": 0.9038, "step": 787500 }, { "epoch": 180.5, "eval_loss": 1.6741384267807007, "eval_runtime": 8.682, "eval_samples_per_second": 540.542, "eval_steps_per_second": 67.611, "step": 787500 }, { "epoch": 180.61, "learning_rate": 4.901682299610296e-05, "loss": 0.9059, "step": 788000 }, { "epoch": 180.61, "eval_loss": 1.6681591272354126, "eval_runtime": 8.6846, "eval_samples_per_second": 540.381, "eval_steps_per_second": 67.591, "step": 788000 }, { "epoch": 180.72, "learning_rate": 4.9014044044426325e-05, "loss": 0.9061, "step": 788500 }, { "epoch": 180.72, "eval_loss": 1.6833927631378174, "eval_runtime": 8.6795, "eval_samples_per_second": 540.7, "eval_steps_per_second": 67.631, "step": 788500 }, { "epoch": 180.84, "learning_rate": 4.901126124993355e-05, "loss": 0.9042, "step": 789000 }, { "epoch": 180.84, "eval_loss": 1.6762073040008545, "eval_runtime": 8.6898, "eval_samples_per_second": 540.061, "eval_steps_per_second": 67.551, "step": 789000 }, { "epoch": 180.95, "learning_rate": 4.9008474613069944e-05, "loss": 0.9123, "step": 789500 }, { "epoch": 180.95, "eval_loss": 1.671661138534546, "eval_runtime": 8.6842, "eval_samples_per_second": 540.405, "eval_steps_per_second": 67.594, "step": 789500 }, { "epoch": 181.07, "learning_rate": 4.9005684134281434e-05, "loss": 0.9028, "step": 790000 }, { "epoch": 181.07, "eval_loss": 1.691998839378357, "eval_runtime": 8.6833, "eval_samples_per_second": 540.464, "eval_steps_per_second": 67.601, "step": 790000 }, { "epoch": 181.18, "learning_rate": 4.9002889814014574e-05, "loss": 0.8909, "step": 790500 }, { "epoch": 181.18, "eval_loss": 1.6780219078063965, "eval_runtime": 8.6812, "eval_samples_per_second": 540.595, "eval_steps_per_second": 67.618, "step": 790500 }, { "epoch": 181.3, "learning_rate": 4.9000091652716515e-05, "loss": 0.8979, "step": 791000 }, { "epoch": 181.3, "eval_loss": 1.6815857887268066, "eval_runtime": 8.6845, "eval_samples_per_second": 540.391, "eval_steps_per_second": 67.592, "step": 791000 }, { "epoch": 181.41, "learning_rate": 4.899728965083502e-05, "loss": 0.9039, "step": 791500 }, { "epoch": 181.41, "eval_loss": 1.6899973154067993, "eval_runtime": 8.6798, "eval_samples_per_second": 540.678, "eval_steps_per_second": 67.628, "step": 791500 }, { "epoch": 181.53, "learning_rate": 4.899448380881849e-05, "loss": 0.8993, "step": 792000 }, { "epoch": 181.53, "eval_loss": 1.6783643960952759, "eval_runtime": 8.6843, "eval_samples_per_second": 540.404, "eval_steps_per_second": 67.594, "step": 792000 }, { "epoch": 181.64, "learning_rate": 4.899167412711592e-05, "loss": 0.9064, "step": 792500 }, { "epoch": 181.64, "eval_loss": 1.6832048892974854, "eval_runtime": 8.6831, "eval_samples_per_second": 540.476, "eval_steps_per_second": 67.603, "step": 792500 }, { "epoch": 181.76, "learning_rate": 4.898886060617691e-05, "loss": 0.8989, "step": 793000 }, { "epoch": 181.76, "eval_loss": 1.65478515625, "eval_runtime": 8.687, "eval_samples_per_second": 540.231, "eval_steps_per_second": 67.572, "step": 793000 }, { "epoch": 181.87, "learning_rate": 4.898604324645172e-05, "loss": 0.9115, "step": 793500 }, { "epoch": 181.87, "eval_loss": 1.673752784729004, "eval_runtime": 8.685, "eval_samples_per_second": 540.359, "eval_steps_per_second": 67.588, "step": 793500 }, { "epoch": 181.98, "learning_rate": 4.898322204839118e-05, "loss": 0.9092, "step": 794000 }, { "epoch": 181.98, "eval_loss": 1.643316626548767, "eval_runtime": 8.688, "eval_samples_per_second": 540.173, "eval_steps_per_second": 67.565, "step": 794000 }, { "epoch": 182.1, "learning_rate": 4.8980397012446744e-05, "loss": 0.9014, "step": 794500 }, { "epoch": 182.1, "eval_loss": 1.6714937686920166, "eval_runtime": 8.6775, "eval_samples_per_second": 540.824, "eval_steps_per_second": 67.646, "step": 794500 }, { "epoch": 182.21, "learning_rate": 4.8977568139070487e-05, "loss": 0.9037, "step": 795000 }, { "epoch": 182.21, "eval_loss": 1.6589686870574951, "eval_runtime": 8.6872, "eval_samples_per_second": 540.222, "eval_steps_per_second": 67.571, "step": 795000 }, { "epoch": 182.33, "learning_rate": 4.89747354287151e-05, "loss": 0.9006, "step": 795500 }, { "epoch": 182.33, "eval_loss": 1.6774219274520874, "eval_runtime": 8.6828, "eval_samples_per_second": 540.493, "eval_steps_per_second": 67.605, "step": 795500 }, { "epoch": 182.44, "learning_rate": 4.897189888183389e-05, "loss": 0.9038, "step": 796000 }, { "epoch": 182.44, "eval_loss": 1.675945520401001, "eval_runtime": 8.6824, "eval_samples_per_second": 540.518, "eval_steps_per_second": 67.608, "step": 796000 }, { "epoch": 182.56, "learning_rate": 4.896905849888075e-05, "loss": 0.8991, "step": 796500 }, { "epoch": 182.56, "eval_loss": 1.6789404153823853, "eval_runtime": 8.6818, "eval_samples_per_second": 540.557, "eval_steps_per_second": 67.613, "step": 796500 }, { "epoch": 182.67, "learning_rate": 4.896621428031023e-05, "loss": 0.9038, "step": 797000 }, { "epoch": 182.67, "eval_loss": 1.671852469444275, "eval_runtime": 8.6798, "eval_samples_per_second": 540.681, "eval_steps_per_second": 67.628, "step": 797000 }, { "epoch": 182.79, "learning_rate": 4.8963366226577464e-05, "loss": 0.9046, "step": 797500 }, { "epoch": 182.79, "eval_loss": 1.6752398014068604, "eval_runtime": 8.6812, "eval_samples_per_second": 540.592, "eval_steps_per_second": 67.617, "step": 797500 }, { "epoch": 182.9, "learning_rate": 4.896051433813821e-05, "loss": 0.9092, "step": 798000 }, { "epoch": 182.9, "eval_loss": 1.6498260498046875, "eval_runtime": 8.6844, "eval_samples_per_second": 540.394, "eval_steps_per_second": 67.592, "step": 798000 }, { "epoch": 183.02, "learning_rate": 4.895765861544883e-05, "loss": 0.9059, "step": 798500 }, { "epoch": 183.02, "eval_loss": 1.6719037294387817, "eval_runtime": 8.6859, "eval_samples_per_second": 540.303, "eval_steps_per_second": 67.581, "step": 798500 }, { "epoch": 183.13, "learning_rate": 4.895479905896632e-05, "loss": 0.8952, "step": 799000 }, { "epoch": 183.13, "eval_loss": 1.6651272773742676, "eval_runtime": 8.7788, "eval_samples_per_second": 534.584, "eval_steps_per_second": 66.866, "step": 799000 }, { "epoch": 183.25, "learning_rate": 4.895193566914826e-05, "loss": 0.8956, "step": 799500 }, { "epoch": 183.25, "eval_loss": 1.6763982772827148, "eval_runtime": 8.7587, "eval_samples_per_second": 535.809, "eval_steps_per_second": 67.019, "step": 799500 }, { "epoch": 183.36, "learning_rate": 4.894906844645287e-05, "loss": 0.8999, "step": 800000 }, { "epoch": 183.36, "eval_loss": 1.6611284017562866, "eval_runtime": 8.7665, "eval_samples_per_second": 535.335, "eval_steps_per_second": 66.96, "step": 800000 }, { "epoch": 183.47, "learning_rate": 4.894619739133899e-05, "loss": 0.8956, "step": 800500 }, { "epoch": 183.47, "eval_loss": 1.6826030015945435, "eval_runtime": 8.7605, "eval_samples_per_second": 535.698, "eval_steps_per_second": 67.005, "step": 800500 }, { "epoch": 183.59, "learning_rate": 4.8943322504266025e-05, "loss": 0.9032, "step": 801000 }, { "epoch": 183.59, "eval_loss": 1.690992832183838, "eval_runtime": 8.7688, "eval_samples_per_second": 535.193, "eval_steps_per_second": 66.942, "step": 801000 }, { "epoch": 183.7, "learning_rate": 4.894044378569405e-05, "loss": 0.9066, "step": 801500 }, { "epoch": 183.7, "eval_loss": 1.6517401933670044, "eval_runtime": 8.7697, "eval_samples_per_second": 535.139, "eval_steps_per_second": 66.935, "step": 801500 }, { "epoch": 183.82, "learning_rate": 4.893756123608372e-05, "loss": 0.9025, "step": 802000 }, { "epoch": 183.82, "eval_loss": 1.6599860191345215, "eval_runtime": 8.7678, "eval_samples_per_second": 535.254, "eval_steps_per_second": 66.95, "step": 802000 }, { "epoch": 183.93, "learning_rate": 4.89346748558963e-05, "loss": 0.9071, "step": 802500 }, { "epoch": 183.93, "eval_loss": 1.6640293598175049, "eval_runtime": 8.7659, "eval_samples_per_second": 535.369, "eval_steps_per_second": 66.964, "step": 802500 }, { "epoch": 184.05, "learning_rate": 4.89317846455937e-05, "loss": 0.9036, "step": 803000 }, { "epoch": 184.05, "eval_loss": 1.6945890188217163, "eval_runtime": 8.7736, "eval_samples_per_second": 534.901, "eval_steps_per_second": 66.905, "step": 803000 }, { "epoch": 184.16, "learning_rate": 4.892889060563841e-05, "loss": 0.8853, "step": 803500 }, { "epoch": 184.16, "eval_loss": 1.6656888723373413, "eval_runtime": 8.7786, "eval_samples_per_second": 534.594, "eval_steps_per_second": 66.867, "step": 803500 }, { "epoch": 184.28, "learning_rate": 4.8925992736493545e-05, "loss": 0.8945, "step": 804000 }, { "epoch": 184.28, "eval_loss": 1.6720991134643555, "eval_runtime": 8.7757, "eval_samples_per_second": 534.77, "eval_steps_per_second": 66.889, "step": 804000 }, { "epoch": 184.39, "learning_rate": 4.892309103862283e-05, "loss": 0.8996, "step": 804500 }, { "epoch": 184.39, "eval_loss": 1.684796929359436, "eval_runtime": 8.7701, "eval_samples_per_second": 535.111, "eval_steps_per_second": 66.932, "step": 804500 }, { "epoch": 184.51, "learning_rate": 4.8920185512490615e-05, "loss": 0.9009, "step": 805000 }, { "epoch": 184.51, "eval_loss": 1.6722452640533447, "eval_runtime": 8.772, "eval_samples_per_second": 534.999, "eval_steps_per_second": 66.918, "step": 805000 }, { "epoch": 184.62, "learning_rate": 4.8917276158561845e-05, "loss": 0.8986, "step": 805500 }, { "epoch": 184.62, "eval_loss": 1.6932566165924072, "eval_runtime": 8.773, "eval_samples_per_second": 534.937, "eval_steps_per_second": 66.91, "step": 805500 }, { "epoch": 184.74, "learning_rate": 4.891436297730209e-05, "loss": 0.9041, "step": 806000 }, { "epoch": 184.74, "eval_loss": 1.685440182685852, "eval_runtime": 8.7576, "eval_samples_per_second": 535.876, "eval_steps_per_second": 67.027, "step": 806000 }, { "epoch": 184.85, "learning_rate": 4.891144596917753e-05, "loss": 0.9045, "step": 806500 }, { "epoch": 184.85, "eval_loss": 1.6655248403549194, "eval_runtime": 8.7637, "eval_samples_per_second": 535.502, "eval_steps_per_second": 66.981, "step": 806500 }, { "epoch": 184.96, "learning_rate": 4.8908525134654945e-05, "loss": 0.9009, "step": 807000 }, { "epoch": 184.96, "eval_loss": 1.6887766122817993, "eval_runtime": 8.76, "eval_samples_per_second": 535.73, "eval_steps_per_second": 67.009, "step": 807000 }, { "epoch": 185.08, "learning_rate": 4.8905600474201754e-05, "loss": 0.8928, "step": 807500 }, { "epoch": 185.08, "eval_loss": 1.686935305595398, "eval_runtime": 8.7555, "eval_samples_per_second": 536.007, "eval_steps_per_second": 67.044, "step": 807500 }, { "epoch": 185.19, "learning_rate": 4.890267198828595e-05, "loss": 0.8993, "step": 808000 }, { "epoch": 185.19, "eval_loss": 1.6822097301483154, "eval_runtime": 8.7676, "eval_samples_per_second": 535.267, "eval_steps_per_second": 66.951, "step": 808000 }, { "epoch": 185.31, "learning_rate": 4.889973967737618e-05, "loss": 0.893, "step": 808500 }, { "epoch": 185.31, "eval_loss": 1.654922604560852, "eval_runtime": 8.7622, "eval_samples_per_second": 535.594, "eval_steps_per_second": 66.992, "step": 808500 }, { "epoch": 185.42, "learning_rate": 4.889680354194168e-05, "loss": 0.9001, "step": 809000 }, { "epoch": 185.42, "eval_loss": 1.6765400171279907, "eval_runtime": 8.7602, "eval_samples_per_second": 535.719, "eval_steps_per_second": 67.008, "step": 809000 }, { "epoch": 185.54, "learning_rate": 4.8893863582452294e-05, "loss": 0.8958, "step": 809500 }, { "epoch": 185.54, "eval_loss": 1.700106143951416, "eval_runtime": 8.7657, "eval_samples_per_second": 535.381, "eval_steps_per_second": 66.965, "step": 809500 }, { "epoch": 185.65, "learning_rate": 4.889091979937849e-05, "loss": 0.8969, "step": 810000 }, { "epoch": 185.65, "eval_loss": 1.68003511428833, "eval_runtime": 8.7649, "eval_samples_per_second": 535.431, "eval_steps_per_second": 66.972, "step": 810000 }, { "epoch": 185.77, "learning_rate": 4.8887972193191336e-05, "loss": 0.9052, "step": 810500 }, { "epoch": 185.77, "eval_loss": 1.6834993362426758, "eval_runtime": 8.765, "eval_samples_per_second": 535.424, "eval_steps_per_second": 66.971, "step": 810500 }, { "epoch": 185.88, "learning_rate": 4.888502076436253e-05, "loss": 0.9024, "step": 811000 }, { "epoch": 185.88, "eval_loss": 1.6680175065994263, "eval_runtime": 8.7575, "eval_samples_per_second": 535.886, "eval_steps_per_second": 67.029, "step": 811000 }, { "epoch": 186.0, "learning_rate": 4.8882065513364364e-05, "loss": 0.9045, "step": 811500 }, { "epoch": 186.0, "eval_loss": 1.6682708263397217, "eval_runtime": 8.7573, "eval_samples_per_second": 535.896, "eval_steps_per_second": 67.03, "step": 811500 }, { "epoch": 186.11, "learning_rate": 4.887910644066974e-05, "loss": 0.8928, "step": 812000 }, { "epoch": 186.11, "eval_loss": 1.682183861732483, "eval_runtime": 8.7668, "eval_samples_per_second": 535.312, "eval_steps_per_second": 66.957, "step": 812000 }, { "epoch": 186.23, "learning_rate": 4.8876143546752194e-05, "loss": 0.8934, "step": 812500 }, { "epoch": 186.23, "eval_loss": 1.651599645614624, "eval_runtime": 8.7615, "eval_samples_per_second": 535.64, "eval_steps_per_second": 66.998, "step": 812500 }, { "epoch": 186.34, "learning_rate": 4.8873176832085864e-05, "loss": 0.8956, "step": 813000 }, { "epoch": 186.34, "eval_loss": 1.7090240716934204, "eval_runtime": 8.7609, "eval_samples_per_second": 535.676, "eval_steps_per_second": 67.002, "step": 813000 }, { "epoch": 186.45, "learning_rate": 4.8870206297145475e-05, "loss": 0.9016, "step": 813500 }, { "epoch": 186.45, "eval_loss": 1.6784650087356567, "eval_runtime": 8.7499, "eval_samples_per_second": 536.347, "eval_steps_per_second": 67.086, "step": 813500 }, { "epoch": 186.57, "learning_rate": 4.88672319424064e-05, "loss": 0.8961, "step": 814000 }, { "epoch": 186.57, "eval_loss": 1.6706141233444214, "eval_runtime": 8.7657, "eval_samples_per_second": 535.382, "eval_steps_per_second": 66.966, "step": 814000 }, { "epoch": 186.68, "learning_rate": 4.886425376834459e-05, "loss": 0.9003, "step": 814500 }, { "epoch": 186.68, "eval_loss": 1.6628684997558594, "eval_runtime": 8.7586, "eval_samples_per_second": 535.818, "eval_steps_per_second": 67.02, "step": 814500 }, { "epoch": 186.8, "learning_rate": 4.8861271775436633e-05, "loss": 0.8949, "step": 815000 }, { "epoch": 186.8, "eval_loss": 1.6572890281677246, "eval_runtime": 8.7609, "eval_samples_per_second": 535.677, "eval_steps_per_second": 67.002, "step": 815000 }, { "epoch": 186.91, "learning_rate": 4.8858285964159726e-05, "loss": 0.9016, "step": 815500 }, { "epoch": 186.91, "eval_loss": 1.6684828996658325, "eval_runtime": 8.7594, "eval_samples_per_second": 535.766, "eval_steps_per_second": 67.014, "step": 815500 }, { "epoch": 187.03, "learning_rate": 4.885529633499166e-05, "loss": 0.8973, "step": 816000 }, { "epoch": 187.03, "eval_loss": 1.6853457689285278, "eval_runtime": 8.763, "eval_samples_per_second": 535.548, "eval_steps_per_second": 66.986, "step": 816000 }, { "epoch": 187.14, "learning_rate": 4.8852302888410844e-05, "loss": 0.8913, "step": 816500 }, { "epoch": 187.14, "eval_loss": 1.66689133644104, "eval_runtime": 8.7563, "eval_samples_per_second": 535.959, "eval_steps_per_second": 67.038, "step": 816500 }, { "epoch": 187.26, "learning_rate": 4.884930562489631e-05, "loss": 0.8932, "step": 817000 }, { "epoch": 187.26, "eval_loss": 1.6874340772628784, "eval_runtime": 8.771, "eval_samples_per_second": 535.059, "eval_steps_per_second": 66.925, "step": 817000 }, { "epoch": 187.37, "learning_rate": 4.884630454492768e-05, "loss": 0.8943, "step": 817500 }, { "epoch": 187.37, "eval_loss": 1.700537085533142, "eval_runtime": 8.7604, "eval_samples_per_second": 535.709, "eval_steps_per_second": 67.006, "step": 817500 }, { "epoch": 187.49, "learning_rate": 4.884329964898521e-05, "loss": 0.8987, "step": 818000 }, { "epoch": 187.49, "eval_loss": 1.6986490488052368, "eval_runtime": 8.7663, "eval_samples_per_second": 535.348, "eval_steps_per_second": 66.961, "step": 818000 }, { "epoch": 187.6, "learning_rate": 4.884029093754974e-05, "loss": 0.8949, "step": 818500 }, { "epoch": 187.6, "eval_loss": 1.6775953769683838, "eval_runtime": 8.7592, "eval_samples_per_second": 535.781, "eval_steps_per_second": 67.015, "step": 818500 }, { "epoch": 187.71, "learning_rate": 4.8837278411102746e-05, "loss": 0.9042, "step": 819000 }, { "epoch": 187.71, "eval_loss": 1.6998385190963745, "eval_runtime": 8.7618, "eval_samples_per_second": 535.621, "eval_steps_per_second": 66.995, "step": 819000 }, { "epoch": 187.83, "learning_rate": 4.88342620701263e-05, "loss": 0.8991, "step": 819500 }, { "epoch": 187.83, "eval_loss": 1.6808650493621826, "eval_runtime": 8.7634, "eval_samples_per_second": 535.523, "eval_steps_per_second": 66.983, "step": 819500 }, { "epoch": 187.94, "learning_rate": 4.883124191510309e-05, "loss": 0.9033, "step": 820000 }, { "epoch": 187.94, "eval_loss": 1.6713366508483887, "eval_runtime": 8.7591, "eval_samples_per_second": 535.789, "eval_steps_per_second": 67.016, "step": 820000 }, { "epoch": 188.06, "learning_rate": 4.8828217946516405e-05, "loss": 0.8991, "step": 820500 }, { "epoch": 188.06, "eval_loss": 1.6982001066207886, "eval_runtime": 8.7516, "eval_samples_per_second": 536.245, "eval_steps_per_second": 67.074, "step": 820500 }, { "epoch": 188.17, "learning_rate": 4.882519016485017e-05, "loss": 0.8922, "step": 821000 }, { "epoch": 188.17, "eval_loss": 1.6906132698059082, "eval_runtime": 8.7663, "eval_samples_per_second": 535.346, "eval_steps_per_second": 66.961, "step": 821000 }, { "epoch": 188.29, "learning_rate": 4.8822158570588885e-05, "loss": 0.8847, "step": 821500 }, { "epoch": 188.29, "eval_loss": 1.689201831817627, "eval_runtime": 8.7552, "eval_samples_per_second": 536.022, "eval_steps_per_second": 67.046, "step": 821500 }, { "epoch": 188.4, "learning_rate": 4.8819123164217684e-05, "loss": 0.8915, "step": 822000 }, { "epoch": 188.4, "eval_loss": 1.6824849843978882, "eval_runtime": 8.7601, "eval_samples_per_second": 535.722, "eval_steps_per_second": 67.008, "step": 822000 }, { "epoch": 188.52, "learning_rate": 4.8816083946222296e-05, "loss": 0.899, "step": 822500 }, { "epoch": 188.52, "eval_loss": 1.679656982421875, "eval_runtime": 8.7562, "eval_samples_per_second": 535.966, "eval_steps_per_second": 67.039, "step": 822500 }, { "epoch": 188.63, "learning_rate": 4.881304091708908e-05, "loss": 0.896, "step": 823000 }, { "epoch": 188.63, "eval_loss": 1.6935306787490845, "eval_runtime": 8.767, "eval_samples_per_second": 535.304, "eval_steps_per_second": 66.956, "step": 823000 }, { "epoch": 188.75, "learning_rate": 4.8809994077304976e-05, "loss": 0.8991, "step": 823500 }, { "epoch": 188.75, "eval_loss": 1.6717618703842163, "eval_runtime": 8.7549, "eval_samples_per_second": 536.042, "eval_steps_per_second": 67.048, "step": 823500 }, { "epoch": 188.86, "learning_rate": 4.880694342735758e-05, "loss": 0.9037, "step": 824000 }, { "epoch": 188.86, "eval_loss": 1.6745343208312988, "eval_runtime": 8.7595, "eval_samples_per_second": 535.762, "eval_steps_per_second": 67.013, "step": 824000 }, { "epoch": 188.98, "learning_rate": 4.880388896773503e-05, "loss": 0.9145, "step": 824500 }, { "epoch": 188.98, "eval_loss": 1.6698788404464722, "eval_runtime": 8.7574, "eval_samples_per_second": 535.892, "eval_steps_per_second": 67.029, "step": 824500 }, { "epoch": 189.09, "learning_rate": 4.880083069892614e-05, "loss": 0.8891, "step": 825000 }, { "epoch": 189.09, "eval_loss": 1.6712772846221924, "eval_runtime": 8.7567, "eval_samples_per_second": 535.932, "eval_steps_per_second": 67.034, "step": 825000 }, { "epoch": 189.2, "learning_rate": 4.8797768621420295e-05, "loss": 0.8884, "step": 825500 }, { "epoch": 189.2, "eval_loss": 1.684435248374939, "eval_runtime": 8.7559, "eval_samples_per_second": 535.984, "eval_steps_per_second": 67.041, "step": 825500 }, { "epoch": 189.32, "learning_rate": 4.8794702735707496e-05, "loss": 0.8874, "step": 826000 }, { "epoch": 189.32, "eval_loss": 1.6542989015579224, "eval_runtime": 8.7595, "eval_samples_per_second": 535.759, "eval_steps_per_second": 67.013, "step": 826000 }, { "epoch": 189.43, "learning_rate": 4.8791633042278375e-05, "loss": 0.898, "step": 826500 }, { "epoch": 189.43, "eval_loss": 1.6793451309204102, "eval_runtime": 8.7589, "eval_samples_per_second": 535.8, "eval_steps_per_second": 67.018, "step": 826500 }, { "epoch": 189.55, "learning_rate": 4.8788559541624136e-05, "loss": 0.8942, "step": 827000 }, { "epoch": 189.55, "eval_loss": 1.6606069803237915, "eval_runtime": 8.7611, "eval_samples_per_second": 535.666, "eval_steps_per_second": 67.001, "step": 827000 }, { "epoch": 189.66, "learning_rate": 4.8785482234236614e-05, "loss": 0.899, "step": 827500 }, { "epoch": 189.66, "eval_loss": 1.671485424041748, "eval_runtime": 8.7564, "eval_samples_per_second": 535.95, "eval_steps_per_second": 67.037, "step": 827500 }, { "epoch": 189.78, "learning_rate": 4.878240112060827e-05, "loss": 0.9097, "step": 828000 }, { "epoch": 189.78, "eval_loss": 1.7048957347869873, "eval_runtime": 8.7643, "eval_samples_per_second": 535.467, "eval_steps_per_second": 66.976, "step": 828000 }, { "epoch": 189.89, "learning_rate": 4.877931620123213e-05, "loss": 0.909, "step": 828500 }, { "epoch": 189.89, "eval_loss": 1.6832300424575806, "eval_runtime": 8.7594, "eval_samples_per_second": 535.767, "eval_steps_per_second": 67.014, "step": 828500 }, { "epoch": 190.01, "learning_rate": 4.8776227476601874e-05, "loss": 0.9044, "step": 829000 }, { "epoch": 190.01, "eval_loss": 1.6908377408981323, "eval_runtime": 8.7546, "eval_samples_per_second": 536.062, "eval_steps_per_second": 67.051, "step": 829000 }, { "epoch": 190.12, "learning_rate": 4.877313494721176e-05, "loss": 0.8865, "step": 829500 }, { "epoch": 190.12, "eval_loss": 1.6723045110702515, "eval_runtime": 8.7515, "eval_samples_per_second": 536.248, "eval_steps_per_second": 67.074, "step": 829500 }, { "epoch": 190.24, "learning_rate": 4.877003861355667e-05, "loss": 0.8896, "step": 830000 }, { "epoch": 190.24, "eval_loss": 1.6950819492340088, "eval_runtime": 8.7626, "eval_samples_per_second": 535.57, "eval_steps_per_second": 66.989, "step": 830000 }, { "epoch": 190.35, "learning_rate": 4.8766938476132086e-05, "loss": 0.8902, "step": 830500 }, { "epoch": 190.35, "eval_loss": 1.669044852256775, "eval_runtime": 8.7632, "eval_samples_per_second": 535.534, "eval_steps_per_second": 66.985, "step": 830500 }, { "epoch": 190.47, "learning_rate": 4.876383453543411e-05, "loss": 0.8971, "step": 831000 }, { "epoch": 190.47, "eval_loss": 1.6953049898147583, "eval_runtime": 8.757, "eval_samples_per_second": 535.916, "eval_steps_per_second": 67.032, "step": 831000 }, { "epoch": 190.58, "learning_rate": 4.876072679195944e-05, "loss": 0.8963, "step": 831500 }, { "epoch": 190.58, "eval_loss": 1.6742002964019775, "eval_runtime": 8.756, "eval_samples_per_second": 535.973, "eval_steps_per_second": 67.039, "step": 831500 }, { "epoch": 190.69, "learning_rate": 4.87576152462054e-05, "loss": 0.8932, "step": 832000 }, { "epoch": 190.69, "eval_loss": 1.6915702819824219, "eval_runtime": 8.7632, "eval_samples_per_second": 535.537, "eval_steps_per_second": 66.985, "step": 832000 }, { "epoch": 190.81, "learning_rate": 4.8754499898669905e-05, "loss": 0.9007, "step": 832500 }, { "epoch": 190.81, "eval_loss": 1.6636028289794922, "eval_runtime": 8.7574, "eval_samples_per_second": 535.888, "eval_steps_per_second": 67.029, "step": 832500 }, { "epoch": 190.92, "learning_rate": 4.875138074985147e-05, "loss": 0.8941, "step": 833000 }, { "epoch": 190.92, "eval_loss": 1.6724028587341309, "eval_runtime": 8.7579, "eval_samples_per_second": 535.858, "eval_steps_per_second": 67.025, "step": 833000 }, { "epoch": 191.04, "learning_rate": 4.8748257800249255e-05, "loss": 0.8959, "step": 833500 }, { "epoch": 191.04, "eval_loss": 1.697401762008667, "eval_runtime": 8.7595, "eval_samples_per_second": 535.76, "eval_steps_per_second": 67.013, "step": 833500 }, { "epoch": 191.15, "learning_rate": 4.8745131050362993e-05, "loss": 0.8854, "step": 834000 }, { "epoch": 191.15, "eval_loss": 1.6913566589355469, "eval_runtime": 8.7602, "eval_samples_per_second": 535.717, "eval_steps_per_second": 67.007, "step": 834000 }, { "epoch": 191.27, "learning_rate": 4.874200050069304e-05, "loss": 0.8889, "step": 834500 }, { "epoch": 191.27, "eval_loss": 1.7082916498184204, "eval_runtime": 8.756, "eval_samples_per_second": 535.978, "eval_steps_per_second": 67.04, "step": 834500 }, { "epoch": 191.38, "learning_rate": 4.8738866151740364e-05, "loss": 0.8865, "step": 835000 }, { "epoch": 191.38, "eval_loss": 1.6873767375946045, "eval_runtime": 8.7621, "eval_samples_per_second": 535.604, "eval_steps_per_second": 66.993, "step": 835000 }, { "epoch": 191.5, "learning_rate": 4.873572800400653e-05, "loss": 0.8889, "step": 835500 }, { "epoch": 191.5, "eval_loss": 1.688508152961731, "eval_runtime": 8.7573, "eval_samples_per_second": 535.897, "eval_steps_per_second": 67.03, "step": 835500 }, { "epoch": 191.61, "learning_rate": 4.873258605799371e-05, "loss": 0.8953, "step": 836000 }, { "epoch": 191.61, "eval_loss": 1.6819677352905273, "eval_runtime": 8.7622, "eval_samples_per_second": 535.597, "eval_steps_per_second": 66.992, "step": 836000 }, { "epoch": 191.73, "learning_rate": 4.872944031420471e-05, "loss": 0.899, "step": 836500 }, { "epoch": 191.73, "eval_loss": 1.6886982917785645, "eval_runtime": 8.7547, "eval_samples_per_second": 536.052, "eval_steps_per_second": 67.049, "step": 836500 }, { "epoch": 191.84, "learning_rate": 4.8726290773142896e-05, "loss": 0.8971, "step": 837000 }, { "epoch": 191.84, "eval_loss": 1.689682126045227, "eval_runtime": 8.757, "eval_samples_per_second": 535.916, "eval_steps_per_second": 67.032, "step": 837000 }, { "epoch": 191.95, "learning_rate": 4.872313743531229e-05, "loss": 0.8967, "step": 837500 }, { "epoch": 191.95, "eval_loss": 1.6962060928344727, "eval_runtime": 8.7564, "eval_samples_per_second": 535.951, "eval_steps_per_second": 67.037, "step": 837500 }, { "epoch": 192.07, "learning_rate": 4.871998030121749e-05, "loss": 0.8932, "step": 838000 }, { "epoch": 192.07, "eval_loss": 1.6908912658691406, "eval_runtime": 8.7555, "eval_samples_per_second": 536.004, "eval_steps_per_second": 67.043, "step": 838000 }, { "epoch": 192.18, "learning_rate": 4.871681937136372e-05, "loss": 0.8843, "step": 838500 }, { "epoch": 192.18, "eval_loss": 1.6878806352615356, "eval_runtime": 8.7552, "eval_samples_per_second": 536.023, "eval_steps_per_second": 67.046, "step": 838500 }, { "epoch": 192.3, "learning_rate": 4.87136546462568e-05, "loss": 0.892, "step": 839000 }, { "epoch": 192.3, "eval_loss": 1.6862518787384033, "eval_runtime": 8.7557, "eval_samples_per_second": 535.996, "eval_steps_per_second": 67.042, "step": 839000 }, { "epoch": 192.41, "learning_rate": 4.8710486126403156e-05, "loss": 0.8907, "step": 839500 }, { "epoch": 192.41, "eval_loss": 1.704907774925232, "eval_runtime": 8.7574, "eval_samples_per_second": 535.892, "eval_steps_per_second": 67.029, "step": 839500 }, { "epoch": 192.53, "learning_rate": 4.870731381230984e-05, "loss": 0.8965, "step": 840000 }, { "epoch": 192.53, "eval_loss": 1.6812530755996704, "eval_runtime": 8.7544, "eval_samples_per_second": 536.072, "eval_steps_per_second": 67.052, "step": 840000 }, { "epoch": 192.64, "learning_rate": 4.870413770448447e-05, "loss": 0.8923, "step": 840500 }, { "epoch": 192.64, "eval_loss": 1.695534586906433, "eval_runtime": 8.7582, "eval_samples_per_second": 535.843, "eval_steps_per_second": 67.023, "step": 840500 }, { "epoch": 192.76, "learning_rate": 4.8700957803435344e-05, "loss": 0.8979, "step": 841000 }, { "epoch": 192.76, "eval_loss": 1.7007120847702026, "eval_runtime": 8.7277, "eval_samples_per_second": 537.714, "eval_steps_per_second": 67.257, "step": 841000 }, { "epoch": 192.87, "learning_rate": 4.869777410967128e-05, "loss": 0.8983, "step": 841500 }, { "epoch": 192.87, "eval_loss": 1.7121422290802002, "eval_runtime": 8.732, "eval_samples_per_second": 537.451, "eval_steps_per_second": 67.224, "step": 841500 }, { "epoch": 192.99, "learning_rate": 4.8694586623701755e-05, "loss": 0.896, "step": 842000 }, { "epoch": 192.99, "eval_loss": 1.6703453063964844, "eval_runtime": 8.7351, "eval_samples_per_second": 537.259, "eval_steps_per_second": 67.2, "step": 842000 }, { "epoch": 193.1, "learning_rate": 4.869139534603685e-05, "loss": 0.884, "step": 842500 }, { "epoch": 193.1, "eval_loss": 1.6768743991851807, "eval_runtime": 8.7293, "eval_samples_per_second": 537.614, "eval_steps_per_second": 67.245, "step": 842500 }, { "epoch": 193.22, "learning_rate": 4.868820027718725e-05, "loss": 0.8804, "step": 843000 }, { "epoch": 193.22, "eval_loss": 1.7018846273422241, "eval_runtime": 8.7008, "eval_samples_per_second": 539.374, "eval_steps_per_second": 67.465, "step": 843000 }, { "epoch": 193.33, "learning_rate": 4.868500141766422e-05, "loss": 0.8916, "step": 843500 }, { "epoch": 193.33, "eval_loss": 1.6724631786346436, "eval_runtime": 8.6957, "eval_samples_per_second": 539.69, "eval_steps_per_second": 67.504, "step": 843500 }, { "epoch": 193.44, "learning_rate": 4.868179876797968e-05, "loss": 0.8886, "step": 844000 }, { "epoch": 193.44, "eval_loss": 1.6828292608261108, "eval_runtime": 8.7181, "eval_samples_per_second": 538.306, "eval_steps_per_second": 67.331, "step": 844000 }, { "epoch": 193.56, "learning_rate": 4.867859232864611e-05, "loss": 0.89, "step": 844500 }, { "epoch": 193.56, "eval_loss": 1.6794307231903076, "eval_runtime": 8.7067, "eval_samples_per_second": 539.008, "eval_steps_per_second": 67.419, "step": 844500 }, { "epoch": 193.67, "learning_rate": 4.867538210017662e-05, "loss": 0.8938, "step": 845000 }, { "epoch": 193.67, "eval_loss": 1.6752862930297852, "eval_runtime": 8.7118, "eval_samples_per_second": 538.694, "eval_steps_per_second": 67.38, "step": 845000 }, { "epoch": 193.79, "learning_rate": 4.8672168083084924e-05, "loss": 0.8943, "step": 845500 }, { "epoch": 193.79, "eval_loss": 1.6939067840576172, "eval_runtime": 8.7061, "eval_samples_per_second": 539.049, "eval_steps_per_second": 67.424, "step": 845500 }, { "epoch": 193.9, "learning_rate": 4.866895027788535e-05, "loss": 0.8853, "step": 846000 }, { "epoch": 193.9, "eval_loss": 1.6889760494232178, "eval_runtime": 8.7122, "eval_samples_per_second": 538.671, "eval_steps_per_second": 67.377, "step": 846000 }, { "epoch": 194.02, "learning_rate": 4.866572868509281e-05, "loss": 0.9008, "step": 846500 }, { "epoch": 194.02, "eval_loss": 1.6869008541107178, "eval_runtime": 8.7601, "eval_samples_per_second": 535.727, "eval_steps_per_second": 67.009, "step": 846500 }, { "epoch": 194.13, "learning_rate": 4.866250330522284e-05, "loss": 0.8869, "step": 847000 }, { "epoch": 194.13, "eval_loss": 1.702773928642273, "eval_runtime": 8.7685, "eval_samples_per_second": 535.212, "eval_steps_per_second": 66.944, "step": 847000 }, { "epoch": 194.25, "learning_rate": 4.865927413879158e-05, "loss": 0.89, "step": 847500 }, { "epoch": 194.25, "eval_loss": 1.721459150314331, "eval_runtime": 8.7571, "eval_samples_per_second": 535.908, "eval_steps_per_second": 67.031, "step": 847500 }, { "epoch": 194.36, "learning_rate": 4.865604118631577e-05, "loss": 0.8863, "step": 848000 }, { "epoch": 194.36, "eval_loss": 1.6748789548873901, "eval_runtime": 8.7699, "eval_samples_per_second": 535.124, "eval_steps_per_second": 66.933, "step": 848000 }, { "epoch": 194.48, "learning_rate": 4.865280444831276e-05, "loss": 0.8887, "step": 848500 }, { "epoch": 194.48, "eval_loss": 1.7015564441680908, "eval_runtime": 8.7665, "eval_samples_per_second": 535.336, "eval_steps_per_second": 66.96, "step": 848500 }, { "epoch": 194.59, "learning_rate": 4.864956392530051e-05, "loss": 0.8907, "step": 849000 }, { "epoch": 194.59, "eval_loss": 1.6804516315460205, "eval_runtime": 8.7572, "eval_samples_per_second": 535.903, "eval_steps_per_second": 67.031, "step": 849000 }, { "epoch": 194.71, "learning_rate": 4.864631961779756e-05, "loss": 0.8912, "step": 849500 }, { "epoch": 194.71, "eval_loss": 1.6879522800445557, "eval_runtime": 8.7639, "eval_samples_per_second": 535.493, "eval_steps_per_second": 66.979, "step": 849500 }, { "epoch": 194.82, "learning_rate": 4.86430715263231e-05, "loss": 0.8947, "step": 850000 }, { "epoch": 194.82, "eval_loss": 1.6653553247451782, "eval_runtime": 8.7811, "eval_samples_per_second": 534.441, "eval_steps_per_second": 66.848, "step": 850000 } ], "max_steps": 2181500, "num_train_epochs": 500, "total_flos": 3.5806401315750574e+18, "trial_name": null, "trial_params": null }