{ "best_metric": 85.6858, "best_model_checkpoint": "AraT5_FT_MSA_Transaltion/checkpoint-74500", "epoch": 60.0, "eval_steps": 500, "global_step": 75000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 4.9836065573770496e-05, "loss": 3.9102, "step": 500 }, { "epoch": 0.4, "eval_bleu": 18.6972, "eval_gen_len": 9.4035, "eval_loss": 1.9062472581863403, "eval_runtime": 106.2615, "eval_samples_per_second": 94.107, "eval_steps_per_second": 1.477, "step": 500 }, { "epoch": 0.8, "learning_rate": 4.967213114754098e-05, "loss": 2.3273, "step": 1000 }, { "epoch": 0.8, "eval_bleu": 22.5788, "eval_gen_len": 9.3259, "eval_loss": 1.6005295515060425, "eval_runtime": 107.2854, "eval_samples_per_second": 93.209, "eval_steps_per_second": 1.463, "step": 1000 }, { "epoch": 1.2, "learning_rate": 4.9508196721311476e-05, "loss": 1.996, "step": 1500 }, { "epoch": 1.2, "eval_bleu": 25.6973, "eval_gen_len": 9.4844, "eval_loss": 1.413317084312439, "eval_runtime": 107.9077, "eval_samples_per_second": 92.672, "eval_steps_per_second": 1.455, "step": 1500 }, { "epoch": 1.6, "learning_rate": 4.934426229508197e-05, "loss": 1.7747, "step": 2000 }, { "epoch": 1.6, "eval_bleu": 29.1098, "eval_gen_len": 9.4334, "eval_loss": 1.2736828327178955, "eval_runtime": 108.0256, "eval_samples_per_second": 92.571, "eval_steps_per_second": 1.453, "step": 2000 }, { "epoch": 2.0, "learning_rate": 4.918032786885246e-05, "loss": 1.6363, "step": 2500 }, { "epoch": 2.0, "eval_bleu": 32.4975, "eval_gen_len": 9.5307, "eval_loss": 1.154405951499939, "eval_runtime": 108.0346, "eval_samples_per_second": 92.563, "eval_steps_per_second": 1.453, "step": 2500 }, { "epoch": 2.4, "learning_rate": 4.9016393442622957e-05, "loss": 1.4614, "step": 3000 }, { "epoch": 2.4, "eval_bleu": 35.6534, "eval_gen_len": 9.5125, "eval_loss": 1.0677547454833984, "eval_runtime": 106.6888, "eval_samples_per_second": 93.731, "eval_steps_per_second": 1.472, "step": 3000 }, { "epoch": 2.8, "learning_rate": 4.885245901639344e-05, "loss": 1.3627, "step": 3500 }, { "epoch": 2.8, "eval_bleu": 39.0667, "eval_gen_len": 9.5759, "eval_loss": 0.9860268235206604, "eval_runtime": 108.9156, "eval_samples_per_second": 91.814, "eval_steps_per_second": 1.441, "step": 3500 }, { "epoch": 3.2, "learning_rate": 4.868852459016394e-05, "loss": 1.2627, "step": 4000 }, { "epoch": 3.2, "eval_bleu": 42.4036, "eval_gen_len": 9.6225, "eval_loss": 0.9212129712104797, "eval_runtime": 108.2436, "eval_samples_per_second": 92.384, "eval_steps_per_second": 1.45, "step": 4000 }, { "epoch": 3.6, "learning_rate": 4.852459016393443e-05, "loss": 1.1616, "step": 4500 }, { "epoch": 3.6, "eval_bleu": 44.7376, "eval_gen_len": 9.6448, "eval_loss": 0.8675327897071838, "eval_runtime": 109.0598, "eval_samples_per_second": 91.693, "eval_steps_per_second": 1.44, "step": 4500 }, { "epoch": 4.0, "learning_rate": 4.836065573770492e-05, "loss": 1.1226, "step": 5000 }, { "epoch": 4.0, "eval_bleu": 47.2213, "eval_gen_len": 9.6337, "eval_loss": 0.816310703754425, "eval_runtime": 106.4424, "eval_samples_per_second": 93.947, "eval_steps_per_second": 1.475, "step": 5000 }, { "epoch": 4.4, "learning_rate": 4.819672131147541e-05, "loss": 1.006, "step": 5500 }, { "epoch": 4.4, "eval_bleu": 49.5563, "eval_gen_len": 9.7168, "eval_loss": 0.7709316611289978, "eval_runtime": 112.4236, "eval_samples_per_second": 88.949, "eval_steps_per_second": 1.397, "step": 5500 }, { "epoch": 4.8, "learning_rate": 4.8032786885245904e-05, "loss": 0.978, "step": 6000 }, { "epoch": 4.8, "eval_bleu": 50.775, "eval_gen_len": 9.6925, "eval_loss": 0.7373432517051697, "eval_runtime": 109.2099, "eval_samples_per_second": 91.567, "eval_steps_per_second": 1.438, "step": 6000 }, { "epoch": 5.2, "learning_rate": 4.78688524590164e-05, "loss": 0.9099, "step": 6500 }, { "epoch": 5.2, "eval_bleu": 52.697, "eval_gen_len": 9.7017, "eval_loss": 0.7020449042320251, "eval_runtime": 109.0697, "eval_samples_per_second": 91.684, "eval_steps_per_second": 1.439, "step": 6500 }, { "epoch": 5.6, "learning_rate": 4.770491803278689e-05, "loss": 0.8483, "step": 7000 }, { "epoch": 5.6, "eval_bleu": 53.9571, "eval_gen_len": 9.693, "eval_loss": 0.6663933992385864, "eval_runtime": 109.7349, "eval_samples_per_second": 91.129, "eval_steps_per_second": 1.431, "step": 7000 }, { "epoch": 6.0, "learning_rate": 4.754098360655738e-05, "loss": 0.8293, "step": 7500 }, { "epoch": 6.0, "eval_bleu": 55.874, "eval_gen_len": 9.7475, "eval_loss": 0.630104124546051, "eval_runtime": 109.6322, "eval_samples_per_second": 91.214, "eval_steps_per_second": 1.432, "step": 7500 }, { "epoch": 6.4, "learning_rate": 4.737704918032787e-05, "loss": 0.7493, "step": 8000 }, { "epoch": 6.4, "eval_bleu": 56.7427, "eval_gen_len": 9.7239, "eval_loss": 0.6072443723678589, "eval_runtime": 113.2863, "eval_samples_per_second": 88.272, "eval_steps_per_second": 1.386, "step": 8000 }, { "epoch": 6.8, "learning_rate": 4.7213114754098365e-05, "loss": 0.7294, "step": 8500 }, { "epoch": 6.8, "eval_bleu": 57.9936, "eval_gen_len": 9.7521, "eval_loss": 0.5758106112480164, "eval_runtime": 110.1034, "eval_samples_per_second": 90.824, "eval_steps_per_second": 1.426, "step": 8500 }, { "epoch": 7.2, "learning_rate": 4.704918032786885e-05, "loss": 0.6904, "step": 9000 }, { "epoch": 7.2, "eval_bleu": 59.0065, "eval_gen_len": 9.7544, "eval_loss": 0.5612244606018066, "eval_runtime": 117.8275, "eval_samples_per_second": 84.87, "eval_steps_per_second": 1.332, "step": 9000 }, { "epoch": 7.6, "learning_rate": 4.6885245901639345e-05, "loss": 0.6478, "step": 9500 }, { "epoch": 7.6, "eval_bleu": 60.1129, "eval_gen_len": 9.7827, "eval_loss": 0.525496780872345, "eval_runtime": 119.5262, "eval_samples_per_second": 83.664, "eval_steps_per_second": 1.314, "step": 9500 }, { "epoch": 8.0, "learning_rate": 4.672131147540984e-05, "loss": 0.6257, "step": 10000 }, { "epoch": 8.0, "eval_bleu": 61.0568, "eval_gen_len": 9.7663, "eval_loss": 0.5063189268112183, "eval_runtime": 118.6711, "eval_samples_per_second": 84.267, "eval_steps_per_second": 1.323, "step": 10000 }, { "epoch": 8.4, "learning_rate": 4.655737704918033e-05, "loss": 0.5696, "step": 10500 }, { "epoch": 8.4, "eval_bleu": 61.9169, "eval_gen_len": 9.776, "eval_loss": 0.4885226786136627, "eval_runtime": 121.031, "eval_samples_per_second": 82.623, "eval_steps_per_second": 1.297, "step": 10500 }, { "epoch": 8.8, "learning_rate": 4.6393442622950825e-05, "loss": 0.5636, "step": 11000 }, { "epoch": 8.8, "eval_bleu": 62.5974, "eval_gen_len": 9.7975, "eval_loss": 0.471066951751709, "eval_runtime": 119.0897, "eval_samples_per_second": 83.97, "eval_steps_per_second": 1.318, "step": 11000 }, { "epoch": 9.2, "learning_rate": 4.622950819672132e-05, "loss": 0.5258, "step": 11500 }, { "epoch": 9.2, "eval_bleu": 63.7967, "eval_gen_len": 9.8122, "eval_loss": 0.449593722820282, "eval_runtime": 120.8927, "eval_samples_per_second": 82.718, "eval_steps_per_second": 1.299, "step": 11500 }, { "epoch": 9.6, "learning_rate": 4.6065573770491805e-05, "loss": 0.4979, "step": 12000 }, { "epoch": 9.6, "eval_bleu": 64.6212, "eval_gen_len": 9.7674, "eval_loss": 0.43481728434562683, "eval_runtime": 119.3174, "eval_samples_per_second": 83.81, "eval_steps_per_second": 1.316, "step": 12000 }, { "epoch": 10.0, "learning_rate": 4.59016393442623e-05, "loss": 0.4987, "step": 12500 }, { "epoch": 10.0, "eval_bleu": 65.3736, "eval_gen_len": 9.8562, "eval_loss": 0.4133751392364502, "eval_runtime": 122.0111, "eval_samples_per_second": 81.96, "eval_steps_per_second": 1.287, "step": 12500 }, { "epoch": 10.4, "learning_rate": 4.5737704918032786e-05, "loss": 0.4497, "step": 13000 }, { "epoch": 10.4, "eval_bleu": 66.4415, "eval_gen_len": 9.8254, "eval_loss": 0.39948710799217224, "eval_runtime": 121.3113, "eval_samples_per_second": 82.433, "eval_steps_per_second": 1.294, "step": 13000 }, { "epoch": 10.8, "learning_rate": 4.557377049180328e-05, "loss": 0.4382, "step": 13500 }, { "epoch": 10.8, "eval_bleu": 66.8785, "eval_gen_len": 9.8152, "eval_loss": 0.3892167806625366, "eval_runtime": 120.977, "eval_samples_per_second": 82.66, "eval_steps_per_second": 1.298, "step": 13500 }, { "epoch": 11.2, "learning_rate": 4.540983606557377e-05, "loss": 0.4146, "step": 14000 }, { "epoch": 11.2, "eval_bleu": 67.6836, "eval_gen_len": 9.8031, "eval_loss": 0.374174565076828, "eval_runtime": 123.4198, "eval_samples_per_second": 81.024, "eval_steps_per_second": 1.272, "step": 14000 }, { "epoch": 11.6, "learning_rate": 4.524590163934426e-05, "loss": 0.3895, "step": 14500 }, { "epoch": 11.6, "eval_bleu": 68.4895, "eval_gen_len": 9.8325, "eval_loss": 0.3638547658920288, "eval_runtime": 123.9996, "eval_samples_per_second": 80.645, "eval_steps_per_second": 1.266, "step": 14500 }, { "epoch": 12.0, "learning_rate": 4.508196721311476e-05, "loss": 0.3881, "step": 15000 }, { "epoch": 12.0, "eval_bleu": 68.9665, "eval_gen_len": 9.8444, "eval_loss": 0.3532446026802063, "eval_runtime": 123.044, "eval_samples_per_second": 81.272, "eval_steps_per_second": 1.276, "step": 15000 }, { "epoch": 12.4, "learning_rate": 4.491803278688525e-05, "loss": 0.3495, "step": 15500 }, { "epoch": 12.4, "eval_bleu": 69.8231, "eval_gen_len": 9.8346, "eval_loss": 0.34260880947113037, "eval_runtime": 122.4901, "eval_samples_per_second": 81.639, "eval_steps_per_second": 1.282, "step": 15500 }, { "epoch": 12.8, "learning_rate": 4.475409836065574e-05, "loss": 0.3474, "step": 16000 }, { "epoch": 12.8, "eval_bleu": 70.4124, "eval_gen_len": 9.8408, "eval_loss": 0.3283344805240631, "eval_runtime": 122.1563, "eval_samples_per_second": 81.862, "eval_steps_per_second": 1.285, "step": 16000 }, { "epoch": 13.2, "learning_rate": 4.459016393442623e-05, "loss": 0.3264, "step": 16500 }, { "epoch": 13.2, "eval_bleu": 70.991, "eval_gen_len": 9.8374, "eval_loss": 0.3219762444496155, "eval_runtime": 122.3026, "eval_samples_per_second": 81.764, "eval_steps_per_second": 1.284, "step": 16500 }, { "epoch": 13.6, "learning_rate": 4.442622950819673e-05, "loss": 0.3095, "step": 17000 }, { "epoch": 13.6, "eval_bleu": 71.7934, "eval_gen_len": 9.8704, "eval_loss": 0.3138624131679535, "eval_runtime": 124.2274, "eval_samples_per_second": 80.498, "eval_steps_per_second": 1.264, "step": 17000 }, { "epoch": 14.0, "learning_rate": 4.426229508196721e-05, "loss": 0.3138, "step": 17500 }, { "epoch": 14.0, "eval_bleu": 72.3896, "eval_gen_len": 9.8585, "eval_loss": 0.3009161949157715, "eval_runtime": 122.1372, "eval_samples_per_second": 81.875, "eval_steps_per_second": 1.285, "step": 17500 }, { "epoch": 14.4, "learning_rate": 4.409836065573771e-05, "loss": 0.2828, "step": 18000 }, { "epoch": 14.4, "eval_bleu": 72.6457, "eval_gen_len": 9.8585, "eval_loss": 0.301722913980484, "eval_runtime": 123.5238, "eval_samples_per_second": 80.956, "eval_steps_per_second": 1.271, "step": 18000 }, { "epoch": 14.8, "learning_rate": 4.3934426229508194e-05, "loss": 0.2776, "step": 18500 }, { "epoch": 14.8, "eval_bleu": 73.1631, "eval_gen_len": 9.8606, "eval_loss": 0.2890518307685852, "eval_runtime": 123.7854, "eval_samples_per_second": 80.785, "eval_steps_per_second": 1.268, "step": 18500 }, { "epoch": 15.2, "learning_rate": 4.377049180327869e-05, "loss": 0.2653, "step": 19000 }, { "epoch": 15.2, "eval_bleu": 73.6086, "eval_gen_len": 9.8775, "eval_loss": 0.2824092507362366, "eval_runtime": 122.3472, "eval_samples_per_second": 81.735, "eval_steps_per_second": 1.283, "step": 19000 }, { "epoch": 15.6, "learning_rate": 4.360655737704919e-05, "loss": 0.2561, "step": 19500 }, { "epoch": 15.6, "eval_bleu": 74.2558, "eval_gen_len": 9.8651, "eval_loss": 0.27599573135375977, "eval_runtime": 123.4989, "eval_samples_per_second": 80.972, "eval_steps_per_second": 1.271, "step": 19500 }, { "epoch": 16.0, "learning_rate": 4.3442622950819674e-05, "loss": 0.2534, "step": 20000 }, { "epoch": 16.0, "eval_bleu": 74.6646, "eval_gen_len": 9.8609, "eval_loss": 0.2678174674510956, "eval_runtime": 121.7684, "eval_samples_per_second": 82.123, "eval_steps_per_second": 1.289, "step": 20000 }, { "epoch": 16.4, "learning_rate": 4.327868852459017e-05, "loss": 0.229, "step": 20500 }, { "epoch": 16.4, "eval_bleu": 75.1771, "eval_gen_len": 9.8587, "eval_loss": 0.26594653725624084, "eval_runtime": 122.704, "eval_samples_per_second": 81.497, "eval_steps_per_second": 1.28, "step": 20500 }, { "epoch": 16.8, "learning_rate": 4.311475409836066e-05, "loss": 0.23, "step": 21000 }, { "epoch": 16.8, "eval_bleu": 75.2663, "eval_gen_len": 9.8656, "eval_loss": 0.25894829630851746, "eval_runtime": 123.4498, "eval_samples_per_second": 81.005, "eval_steps_per_second": 1.272, "step": 21000 }, { "epoch": 17.2, "learning_rate": 4.295081967213115e-05, "loss": 0.2177, "step": 21500 }, { "epoch": 17.2, "eval_bleu": 75.7616, "eval_gen_len": 9.8622, "eval_loss": 0.260859876871109, "eval_runtime": 124.3613, "eval_samples_per_second": 80.411, "eval_steps_per_second": 1.262, "step": 21500 }, { "epoch": 17.6, "learning_rate": 4.278688524590164e-05, "loss": 0.2069, "step": 22000 }, { "epoch": 17.6, "eval_bleu": 76.485, "eval_gen_len": 9.8688, "eval_loss": 0.25088420510292053, "eval_runtime": 124.1311, "eval_samples_per_second": 80.56, "eval_steps_per_second": 1.265, "step": 22000 }, { "epoch": 18.0, "learning_rate": 4.262295081967213e-05, "loss": 0.2092, "step": 22500 }, { "epoch": 18.0, "eval_bleu": 76.8358, "eval_gen_len": 9.8662, "eval_loss": 0.24580596387386322, "eval_runtime": 123.4291, "eval_samples_per_second": 81.018, "eval_steps_per_second": 1.272, "step": 22500 }, { "epoch": 18.4, "learning_rate": 4.245901639344262e-05, "loss": 0.1882, "step": 23000 }, { "epoch": 18.4, "eval_bleu": 77.0551, "eval_gen_len": 9.885, "eval_loss": 0.24451805651187897, "eval_runtime": 124.2811, "eval_samples_per_second": 80.463, "eval_steps_per_second": 1.263, "step": 23000 }, { "epoch": 18.8, "learning_rate": 4.229508196721312e-05, "loss": 0.1896, "step": 23500 }, { "epoch": 18.8, "eval_bleu": 77.6142, "eval_gen_len": 9.8917, "eval_loss": 0.23918285965919495, "eval_runtime": 123.8288, "eval_samples_per_second": 80.757, "eval_steps_per_second": 1.268, "step": 23500 }, { "epoch": 19.2, "learning_rate": 4.213114754098361e-05, "loss": 0.1789, "step": 24000 }, { "epoch": 19.2, "eval_bleu": 77.6144, "eval_gen_len": 9.8919, "eval_loss": 0.2408699244260788, "eval_runtime": 122.8605, "eval_samples_per_second": 81.393, "eval_steps_per_second": 1.278, "step": 24000 }, { "epoch": 19.6, "learning_rate": 4.19672131147541e-05, "loss": 0.175, "step": 24500 }, { "epoch": 19.6, "eval_bleu": 78.0091, "eval_gen_len": 9.8878, "eval_loss": 0.23325826227664948, "eval_runtime": 123.7004, "eval_samples_per_second": 80.841, "eval_steps_per_second": 1.269, "step": 24500 }, { "epoch": 20.0, "learning_rate": 4.1803278688524595e-05, "loss": 0.1734, "step": 25000 }, { "epoch": 20.0, "eval_bleu": 78.4943, "eval_gen_len": 9.9012, "eval_loss": 0.2311151772737503, "eval_runtime": 124.6349, "eval_samples_per_second": 80.234, "eval_steps_per_second": 1.26, "step": 25000 }, { "epoch": 20.4, "learning_rate": 4.163934426229508e-05, "loss": 0.1543, "step": 25500 }, { "epoch": 20.4, "eval_bleu": 78.4902, "eval_gen_len": 9.8748, "eval_loss": 0.22952136397361755, "eval_runtime": 125.3963, "eval_samples_per_second": 79.747, "eval_steps_per_second": 1.252, "step": 25500 }, { "epoch": 20.8, "learning_rate": 4.1475409836065575e-05, "loss": 0.1585, "step": 26000 }, { "epoch": 20.8, "eval_bleu": 79.0041, "eval_gen_len": 9.8936, "eval_loss": 0.22459650039672852, "eval_runtime": 125.0293, "eval_samples_per_second": 79.981, "eval_steps_per_second": 1.256, "step": 26000 }, { "epoch": 21.2, "learning_rate": 4.131147540983607e-05, "loss": 0.1476, "step": 26500 }, { "epoch": 21.2, "eval_bleu": 78.922, "eval_gen_len": 9.8887, "eval_loss": 0.22683905065059662, "eval_runtime": 124.4553, "eval_samples_per_second": 80.35, "eval_steps_per_second": 1.261, "step": 26500 }, { "epoch": 21.6, "learning_rate": 4.1147540983606556e-05, "loss": 0.1425, "step": 27000 }, { "epoch": 21.6, "eval_bleu": 79.2218, "eval_gen_len": 9.9064, "eval_loss": 0.2226884663105011, "eval_runtime": 124.563, "eval_samples_per_second": 80.281, "eval_steps_per_second": 1.26, "step": 27000 }, { "epoch": 22.0, "learning_rate": 4.098360655737705e-05, "loss": 0.1452, "step": 27500 }, { "epoch": 22.0, "eval_bleu": 79.6707, "eval_gen_len": 9.9056, "eval_loss": 0.21725259721279144, "eval_runtime": 124.3401, "eval_samples_per_second": 80.425, "eval_steps_per_second": 1.263, "step": 27500 }, { "epoch": 22.4, "learning_rate": 4.081967213114754e-05, "loss": 0.1321, "step": 28000 }, { "epoch": 22.4, "eval_bleu": 79.7907, "eval_gen_len": 9.898, "eval_loss": 0.21729987859725952, "eval_runtime": 125.8166, "eval_samples_per_second": 79.481, "eval_steps_per_second": 1.248, "step": 28000 }, { "epoch": 22.8, "learning_rate": 4.0655737704918036e-05, "loss": 0.1361, "step": 28500 }, { "epoch": 22.8, "eval_bleu": 80.2256, "eval_gen_len": 9.911, "eval_loss": 0.20989477634429932, "eval_runtime": 126.9115, "eval_samples_per_second": 78.795, "eval_steps_per_second": 1.237, "step": 28500 }, { "epoch": 23.2, "learning_rate": 4.049180327868853e-05, "loss": 0.128, "step": 29000 }, { "epoch": 23.2, "eval_bleu": 80.2837, "eval_gen_len": 9.9044, "eval_loss": 0.21322031319141388, "eval_runtime": 126.5113, "eval_samples_per_second": 79.044, "eval_steps_per_second": 1.241, "step": 29000 }, { "epoch": 23.6, "learning_rate": 4.0327868852459016e-05, "loss": 0.1218, "step": 29500 }, { "epoch": 23.6, "eval_bleu": 80.6259, "eval_gen_len": 9.9151, "eval_loss": 0.21201317012310028, "eval_runtime": 124.3696, "eval_samples_per_second": 80.405, "eval_steps_per_second": 1.262, "step": 29500 }, { "epoch": 24.0, "learning_rate": 4.016393442622951e-05, "loss": 0.1248, "step": 30000 }, { "epoch": 24.0, "eval_bleu": 81.0878, "eval_gen_len": 9.9092, "eval_loss": 0.20630747079849243, "eval_runtime": 126.764, "eval_samples_per_second": 78.887, "eval_steps_per_second": 1.239, "step": 30000 }, { "epoch": 24.4, "learning_rate": 4e-05, "loss": 0.1113, "step": 30500 }, { "epoch": 24.4, "eval_bleu": 81.0524, "eval_gen_len": 9.8915, "eval_loss": 0.2094167023897171, "eval_runtime": 125.0054, "eval_samples_per_second": 79.997, "eval_steps_per_second": 1.256, "step": 30500 }, { "epoch": 24.8, "learning_rate": 3.983606557377049e-05, "loss": 0.1149, "step": 31000 }, { "epoch": 24.8, "eval_bleu": 81.0803, "eval_gen_len": 9.9123, "eval_loss": 0.20733323693275452, "eval_runtime": 125.3281, "eval_samples_per_second": 79.791, "eval_steps_per_second": 1.253, "step": 31000 }, { "epoch": 25.2, "learning_rate": 3.9672131147540983e-05, "loss": 0.1085, "step": 31500 }, { "epoch": 25.2, "eval_bleu": 81.1504, "eval_gen_len": 9.8832, "eval_loss": 0.2083030790090561, "eval_runtime": 126.8593, "eval_samples_per_second": 78.828, "eval_steps_per_second": 1.238, "step": 31500 }, { "epoch": 25.6, "learning_rate": 3.950819672131148e-05, "loss": 0.1057, "step": 32000 }, { "epoch": 25.6, "eval_bleu": 81.4995, "eval_gen_len": 9.901, "eval_loss": 0.20488029718399048, "eval_runtime": 124.973, "eval_samples_per_second": 80.017, "eval_steps_per_second": 1.256, "step": 32000 }, { "epoch": 26.0, "learning_rate": 3.934426229508197e-05, "loss": 0.1075, "step": 32500 }, { "epoch": 26.0, "eval_bleu": 81.5477, "eval_gen_len": 9.9016, "eval_loss": 0.20381322503089905, "eval_runtime": 122.9039, "eval_samples_per_second": 81.364, "eval_steps_per_second": 1.277, "step": 32500 }, { "epoch": 26.4, "learning_rate": 3.9180327868852464e-05, "loss": 0.0964, "step": 33000 }, { "epoch": 26.4, "eval_bleu": 81.7251, "eval_gen_len": 9.8988, "eval_loss": 0.20466774702072144, "eval_runtime": 124.1671, "eval_samples_per_second": 80.537, "eval_steps_per_second": 1.264, "step": 33000 }, { "epoch": 26.8, "learning_rate": 3.901639344262295e-05, "loss": 0.0969, "step": 33500 }, { "epoch": 26.8, "eval_bleu": 81.8982, "eval_gen_len": 9.8988, "eval_loss": 0.20009790360927582, "eval_runtime": 123.4229, "eval_samples_per_second": 81.022, "eval_steps_per_second": 1.272, "step": 33500 }, { "epoch": 27.2, "learning_rate": 3.8852459016393444e-05, "loss": 0.095, "step": 34000 }, { "epoch": 27.2, "eval_bleu": 82.1864, "eval_gen_len": 9.9051, "eval_loss": 0.2042824774980545, "eval_runtime": 128.3513, "eval_samples_per_second": 77.911, "eval_steps_per_second": 1.223, "step": 34000 }, { "epoch": 27.6, "learning_rate": 3.868852459016394e-05, "loss": 0.0898, "step": 34500 }, { "epoch": 27.6, "eval_bleu": 82.2154, "eval_gen_len": 9.9245, "eval_loss": 0.2033461481332779, "eval_runtime": 126.5658, "eval_samples_per_second": 79.01, "eval_steps_per_second": 1.24, "step": 34500 }, { "epoch": 28.0, "learning_rate": 3.8524590163934424e-05, "loss": 0.0915, "step": 35000 }, { "epoch": 28.0, "eval_bleu": 82.3736, "eval_gen_len": 9.9161, "eval_loss": 0.19727951288223267, "eval_runtime": 105.7319, "eval_samples_per_second": 94.579, "eval_steps_per_second": 1.485, "step": 35000 }, { "epoch": 28.4, "learning_rate": 3.836065573770492e-05, "loss": 0.0848, "step": 35500 }, { "epoch": 28.4, "eval_bleu": 82.495, "eval_gen_len": 9.9302, "eval_loss": 0.19799815118312836, "eval_runtime": 104.9507, "eval_samples_per_second": 95.283, "eval_steps_per_second": 1.496, "step": 35500 }, { "epoch": 28.8, "learning_rate": 3.819672131147541e-05, "loss": 0.0845, "step": 36000 }, { "epoch": 28.8, "eval_bleu": 82.4837, "eval_gen_len": 9.9116, "eval_loss": 0.19767090678215027, "eval_runtime": 106.5888, "eval_samples_per_second": 93.818, "eval_steps_per_second": 1.473, "step": 36000 }, { "epoch": 29.2, "learning_rate": 3.8032786885245905e-05, "loss": 0.0815, "step": 36500 }, { "epoch": 29.2, "eval_bleu": 82.4047, "eval_gen_len": 9.9089, "eval_loss": 0.19588139653205872, "eval_runtime": 107.9278, "eval_samples_per_second": 92.655, "eval_steps_per_second": 1.455, "step": 36500 }, { "epoch": 29.6, "learning_rate": 3.78688524590164e-05, "loss": 0.0795, "step": 37000 }, { "epoch": 29.6, "eval_bleu": 82.722, "eval_gen_len": 9.9046, "eval_loss": 0.1979523003101349, "eval_runtime": 109.3961, "eval_samples_per_second": 91.411, "eval_steps_per_second": 1.435, "step": 37000 }, { "epoch": 30.0, "learning_rate": 3.7704918032786885e-05, "loss": 0.0808, "step": 37500 }, { "epoch": 30.0, "eval_bleu": 82.6578, "eval_gen_len": 9.8982, "eval_loss": 0.19637715816497803, "eval_runtime": 109.5695, "eval_samples_per_second": 91.266, "eval_steps_per_second": 1.433, "step": 37500 }, { "epoch": 30.4, "learning_rate": 3.754098360655738e-05, "loss": 0.0732, "step": 38000 }, { "epoch": 30.4, "eval_bleu": 82.9919, "eval_gen_len": 9.9044, "eval_loss": 0.19727516174316406, "eval_runtime": 114.1436, "eval_samples_per_second": 87.609, "eval_steps_per_second": 1.375, "step": 38000 }, { "epoch": 30.8, "learning_rate": 3.737704918032787e-05, "loss": 0.0746, "step": 38500 }, { "epoch": 30.8, "eval_bleu": 82.6143, "eval_gen_len": 9.9165, "eval_loss": 0.19912780821323395, "eval_runtime": 111.6449, "eval_samples_per_second": 89.57, "eval_steps_per_second": 1.406, "step": 38500 }, { "epoch": 31.2, "learning_rate": 3.721311475409836e-05, "loss": 0.0707, "step": 39000 }, { "epoch": 31.2, "eval_bleu": 82.9765, "eval_gen_len": 9.9001, "eval_loss": 0.19909825921058655, "eval_runtime": 116.7381, "eval_samples_per_second": 85.662, "eval_steps_per_second": 1.345, "step": 39000 }, { "epoch": 31.6, "learning_rate": 3.704918032786885e-05, "loss": 0.0709, "step": 39500 }, { "epoch": 31.6, "eval_bleu": 83.0914, "eval_gen_len": 9.8965, "eval_loss": 0.19552859663963318, "eval_runtime": 112.5699, "eval_samples_per_second": 88.834, "eval_steps_per_second": 1.395, "step": 39500 }, { "epoch": 32.0, "learning_rate": 3.6885245901639346e-05, "loss": 0.0719, "step": 40000 }, { "epoch": 32.0, "eval_bleu": 83.2552, "eval_gen_len": 9.9192, "eval_loss": 0.1897631287574768, "eval_runtime": 111.6178, "eval_samples_per_second": 89.591, "eval_steps_per_second": 1.407, "step": 40000 }, { "epoch": 32.4, "learning_rate": 3.672131147540984e-05, "loss": 0.0645, "step": 40500 }, { "epoch": 32.4, "eval_bleu": 83.2469, "eval_gen_len": 9.9243, "eval_loss": 0.19487988948822021, "eval_runtime": 113.1976, "eval_samples_per_second": 88.341, "eval_steps_per_second": 1.387, "step": 40500 }, { "epoch": 32.8, "learning_rate": 3.655737704918033e-05, "loss": 0.0668, "step": 41000 }, { "epoch": 32.8, "eval_bleu": 83.554, "eval_gen_len": 9.927, "eval_loss": 0.19293373823165894, "eval_runtime": 115.9999, "eval_samples_per_second": 86.207, "eval_steps_per_second": 1.353, "step": 41000 }, { "epoch": 33.2, "learning_rate": 3.6393442622950826e-05, "loss": 0.0626, "step": 41500 }, { "epoch": 33.2, "eval_bleu": 83.451, "eval_gen_len": 9.919, "eval_loss": 0.19273407757282257, "eval_runtime": 118.7666, "eval_samples_per_second": 84.199, "eval_steps_per_second": 1.322, "step": 41500 }, { "epoch": 33.6, "learning_rate": 3.622950819672131e-05, "loss": 0.0616, "step": 42000 }, { "epoch": 33.6, "eval_bleu": 83.7799, "eval_gen_len": 9.9142, "eval_loss": 0.1896318793296814, "eval_runtime": 119.0485, "eval_samples_per_second": 83.999, "eval_steps_per_second": 1.319, "step": 42000 }, { "epoch": 34.0, "learning_rate": 3.6065573770491806e-05, "loss": 0.0636, "step": 42500 }, { "epoch": 34.0, "eval_bleu": 83.5126, "eval_gen_len": 9.8988, "eval_loss": 0.19057811796665192, "eval_runtime": 119.0601, "eval_samples_per_second": 83.991, "eval_steps_per_second": 1.319, "step": 42500 }, { "epoch": 34.4, "learning_rate": 3.590163934426229e-05, "loss": 0.0576, "step": 43000 }, { "epoch": 34.4, "eval_bleu": 83.5655, "eval_gen_len": 9.9154, "eval_loss": 0.19394218921661377, "eval_runtime": 119.5237, "eval_samples_per_second": 83.665, "eval_steps_per_second": 1.314, "step": 43000 }, { "epoch": 34.8, "learning_rate": 3.5737704918032786e-05, "loss": 0.0585, "step": 43500 }, { "epoch": 34.8, "eval_bleu": 83.6287, "eval_gen_len": 9.9233, "eval_loss": 0.19454576075077057, "eval_runtime": 119.4134, "eval_samples_per_second": 83.743, "eval_steps_per_second": 1.315, "step": 43500 }, { "epoch": 35.2, "learning_rate": 3.557377049180328e-05, "loss": 0.0568, "step": 44000 }, { "epoch": 35.2, "eval_bleu": 83.6904, "eval_gen_len": 9.907, "eval_loss": 0.19391930103302002, "eval_runtime": 119.702, "eval_samples_per_second": 83.541, "eval_steps_per_second": 1.312, "step": 44000 }, { "epoch": 35.6, "learning_rate": 3.5409836065573773e-05, "loss": 0.0551, "step": 44500 }, { "epoch": 35.6, "eval_bleu": 83.9373, "eval_gen_len": 9.9202, "eval_loss": 0.19054347276687622, "eval_runtime": 118.199, "eval_samples_per_second": 84.603, "eval_steps_per_second": 1.328, "step": 44500 }, { "epoch": 36.0, "learning_rate": 3.524590163934427e-05, "loss": 0.0563, "step": 45000 }, { "epoch": 36.0, "eval_bleu": 84.1348, "eval_gen_len": 9.9207, "eval_loss": 0.1921459436416626, "eval_runtime": 119.546, "eval_samples_per_second": 83.65, "eval_steps_per_second": 1.313, "step": 45000 }, { "epoch": 36.4, "learning_rate": 3.508196721311476e-05, "loss": 0.0514, "step": 45500 }, { "epoch": 36.4, "eval_bleu": 84.1097, "eval_gen_len": 9.9185, "eval_loss": 0.19464583694934845, "eval_runtime": 119.8131, "eval_samples_per_second": 83.463, "eval_steps_per_second": 1.31, "step": 45500 }, { "epoch": 36.8, "learning_rate": 3.491803278688525e-05, "loss": 0.0534, "step": 46000 }, { "epoch": 36.8, "eval_bleu": 84.0075, "eval_gen_len": 9.9111, "eval_loss": 0.19089433550834656, "eval_runtime": 118.1118, "eval_samples_per_second": 84.666, "eval_steps_per_second": 1.329, "step": 46000 }, { "epoch": 37.2, "learning_rate": 3.475409836065574e-05, "loss": 0.05, "step": 46500 }, { "epoch": 37.2, "eval_bleu": 84.0187, "eval_gen_len": 9.9198, "eval_loss": 0.1975044161081314, "eval_runtime": 118.0704, "eval_samples_per_second": 84.695, "eval_steps_per_second": 1.33, "step": 46500 }, { "epoch": 37.6, "learning_rate": 3.459016393442623e-05, "loss": 0.0498, "step": 47000 }, { "epoch": 37.6, "eval_bleu": 84.0124, "eval_gen_len": 9.9205, "eval_loss": 0.19323572516441345, "eval_runtime": 117.9012, "eval_samples_per_second": 84.817, "eval_steps_per_second": 1.332, "step": 47000 }, { "epoch": 38.0, "learning_rate": 3.442622950819672e-05, "loss": 0.0496, "step": 47500 }, { "epoch": 38.0, "eval_bleu": 84.2227, "eval_gen_len": 9.9151, "eval_loss": 0.1907936930656433, "eval_runtime": 118.1549, "eval_samples_per_second": 84.635, "eval_steps_per_second": 1.329, "step": 47500 }, { "epoch": 38.4, "learning_rate": 3.4262295081967214e-05, "loss": 0.0474, "step": 48000 }, { "epoch": 38.4, "eval_bleu": 84.1768, "eval_gen_len": 9.9068, "eval_loss": 0.192445769906044, "eval_runtime": 121.5905, "eval_samples_per_second": 82.243, "eval_steps_per_second": 1.291, "step": 48000 }, { "epoch": 38.8, "learning_rate": 3.409836065573771e-05, "loss": 0.0473, "step": 48500 }, { "epoch": 38.8, "eval_bleu": 84.2946, "eval_gen_len": 9.9193, "eval_loss": 0.1934969127178192, "eval_runtime": 119.2854, "eval_samples_per_second": 83.833, "eval_steps_per_second": 1.316, "step": 48500 }, { "epoch": 39.2, "learning_rate": 3.39344262295082e-05, "loss": 0.0454, "step": 49000 }, { "epoch": 39.2, "eval_bleu": 84.3262, "eval_gen_len": 9.9164, "eval_loss": 0.1953597515821457, "eval_runtime": 119.0304, "eval_samples_per_second": 84.012, "eval_steps_per_second": 1.319, "step": 49000 }, { "epoch": 39.6, "learning_rate": 3.3770491803278695e-05, "loss": 0.0453, "step": 49500 }, { "epoch": 39.6, "eval_bleu": 84.4165, "eval_gen_len": 9.9107, "eval_loss": 0.19031359255313873, "eval_runtime": 120.2198, "eval_samples_per_second": 83.181, "eval_steps_per_second": 1.306, "step": 49500 }, { "epoch": 40.0, "learning_rate": 3.360655737704918e-05, "loss": 0.0461, "step": 50000 }, { "epoch": 40.0, "eval_bleu": 84.4289, "eval_gen_len": 9.9216, "eval_loss": 0.1906299889087677, "eval_runtime": 118.8797, "eval_samples_per_second": 84.119, "eval_steps_per_second": 1.321, "step": 50000 }, { "epoch": 40.4, "learning_rate": 3.3442622950819675e-05, "loss": 0.0415, "step": 50500 }, { "epoch": 40.4, "eval_bleu": 84.4636, "eval_gen_len": 9.9082, "eval_loss": 0.19302137196063995, "eval_runtime": 118.8718, "eval_samples_per_second": 84.124, "eval_steps_per_second": 1.321, "step": 50500 }, { "epoch": 40.8, "learning_rate": 3.327868852459017e-05, "loss": 0.044, "step": 51000 }, { "epoch": 40.8, "eval_bleu": 84.5092, "eval_gen_len": 9.9237, "eval_loss": 0.18893210589885712, "eval_runtime": 120.9835, "eval_samples_per_second": 82.656, "eval_steps_per_second": 1.298, "step": 51000 }, { "epoch": 41.2, "learning_rate": 3.3114754098360655e-05, "loss": 0.043, "step": 51500 }, { "epoch": 41.2, "eval_bleu": 84.5908, "eval_gen_len": 9.9221, "eval_loss": 0.1906319111585617, "eval_runtime": 122.6906, "eval_samples_per_second": 81.506, "eval_steps_per_second": 1.28, "step": 51500 }, { "epoch": 41.6, "learning_rate": 3.295081967213115e-05, "loss": 0.0413, "step": 52000 }, { "epoch": 41.6, "eval_bleu": 84.7197, "eval_gen_len": 9.9113, "eval_loss": 0.19282755255699158, "eval_runtime": 121.4487, "eval_samples_per_second": 82.339, "eval_steps_per_second": 1.293, "step": 52000 }, { "epoch": 42.0, "learning_rate": 3.2786885245901635e-05, "loss": 0.0401, "step": 52500 }, { "epoch": 42.0, "eval_bleu": 84.7895, "eval_gen_len": 9.9215, "eval_loss": 0.19361305236816406, "eval_runtime": 120.7154, "eval_samples_per_second": 82.839, "eval_steps_per_second": 1.301, "step": 52500 }, { "epoch": 42.4, "learning_rate": 3.2622950819672136e-05, "loss": 0.0385, "step": 53000 }, { "epoch": 42.4, "eval_bleu": 84.7187, "eval_gen_len": 9.9239, "eval_loss": 0.19195546209812164, "eval_runtime": 119.559, "eval_samples_per_second": 83.641, "eval_steps_per_second": 1.313, "step": 53000 }, { "epoch": 42.8, "learning_rate": 3.245901639344263e-05, "loss": 0.0387, "step": 53500 }, { "epoch": 42.8, "eval_bleu": 84.7193, "eval_gen_len": 9.9146, "eval_loss": 0.19131682813167572, "eval_runtime": 121.1877, "eval_samples_per_second": 82.517, "eval_steps_per_second": 1.296, "step": 53500 }, { "epoch": 43.2, "learning_rate": 3.2295081967213116e-05, "loss": 0.0389, "step": 54000 }, { "epoch": 43.2, "eval_bleu": 84.6862, "eval_gen_len": 9.9225, "eval_loss": 0.19187390804290771, "eval_runtime": 120.9642, "eval_samples_per_second": 82.669, "eval_steps_per_second": 1.298, "step": 54000 }, { "epoch": 43.6, "learning_rate": 3.213114754098361e-05, "loss": 0.0372, "step": 54500 }, { "epoch": 43.6, "eval_bleu": 84.7769, "eval_gen_len": 9.9285, "eval_loss": 0.19241966307163239, "eval_runtime": 125.5973, "eval_samples_per_second": 79.62, "eval_steps_per_second": 1.25, "step": 54500 }, { "epoch": 44.0, "learning_rate": 3.19672131147541e-05, "loss": 0.0383, "step": 55000 }, { "epoch": 44.0, "eval_bleu": 84.9535, "eval_gen_len": 9.9347, "eval_loss": 0.19236235320568085, "eval_runtime": 122.9448, "eval_samples_per_second": 81.337, "eval_steps_per_second": 1.277, "step": 55000 }, { "epoch": 44.4, "learning_rate": 3.180327868852459e-05, "loss": 0.0347, "step": 55500 }, { "epoch": 44.4, "eval_bleu": 84.9326, "eval_gen_len": 9.9288, "eval_loss": 0.1917337328195572, "eval_runtime": 121.8141, "eval_samples_per_second": 82.092, "eval_steps_per_second": 1.289, "step": 55500 }, { "epoch": 44.8, "learning_rate": 3.163934426229508e-05, "loss": 0.0364, "step": 56000 }, { "epoch": 44.8, "eval_bleu": 85.0653, "eval_gen_len": 9.9159, "eval_loss": 0.19078262150287628, "eval_runtime": 122.743, "eval_samples_per_second": 81.471, "eval_steps_per_second": 1.279, "step": 56000 }, { "epoch": 45.2, "learning_rate": 3.1475409836065576e-05, "loss": 0.035, "step": 56500 }, { "epoch": 45.2, "eval_bleu": 84.8097, "eval_gen_len": 9.9093, "eval_loss": 0.19484488666057587, "eval_runtime": 121.3548, "eval_samples_per_second": 82.403, "eval_steps_per_second": 1.294, "step": 56500 }, { "epoch": 45.6, "learning_rate": 3.131147540983606e-05, "loss": 0.0338, "step": 57000 }, { "epoch": 45.6, "eval_bleu": 84.9659, "eval_gen_len": 9.9217, "eval_loss": 0.1974213719367981, "eval_runtime": 120.7302, "eval_samples_per_second": 82.829, "eval_steps_per_second": 1.3, "step": 57000 }, { "epoch": 46.0, "learning_rate": 3.114754098360656e-05, "loss": 0.0353, "step": 57500 }, { "epoch": 46.0, "eval_bleu": 85.0476, "eval_gen_len": 9.9244, "eval_loss": 0.19343513250350952, "eval_runtime": 118.747, "eval_samples_per_second": 84.213, "eval_steps_per_second": 1.322, "step": 57500 }, { "epoch": 46.4, "learning_rate": 3.098360655737705e-05, "loss": 0.0331, "step": 58000 }, { "epoch": 46.4, "eval_bleu": 85.0708, "eval_gen_len": 9.9146, "eval_loss": 0.19627127051353455, "eval_runtime": 121.8836, "eval_samples_per_second": 82.046, "eval_steps_per_second": 1.288, "step": 58000 }, { "epoch": 46.8, "learning_rate": 3.0819672131147544e-05, "loss": 0.0333, "step": 58500 }, { "epoch": 46.8, "eval_bleu": 84.9386, "eval_gen_len": 9.9224, "eval_loss": 0.19614210724830627, "eval_runtime": 121.7048, "eval_samples_per_second": 82.166, "eval_steps_per_second": 1.29, "step": 58500 }, { "epoch": 47.2, "learning_rate": 3.065573770491804e-05, "loss": 0.0326, "step": 59000 }, { "epoch": 47.2, "eval_bleu": 84.9433, "eval_gen_len": 9.918, "eval_loss": 0.19616641104221344, "eval_runtime": 120.7919, "eval_samples_per_second": 82.787, "eval_steps_per_second": 1.3, "step": 59000 }, { "epoch": 47.6, "learning_rate": 3.0491803278688524e-05, "loss": 0.0312, "step": 59500 }, { "epoch": 47.6, "eval_bleu": 84.8756, "eval_gen_len": 9.9365, "eval_loss": 0.1943608969449997, "eval_runtime": 120.9831, "eval_samples_per_second": 82.656, "eval_steps_per_second": 1.298, "step": 59500 }, { "epoch": 48.0, "learning_rate": 3.0327868852459017e-05, "loss": 0.0327, "step": 60000 }, { "epoch": 48.0, "eval_bleu": 84.8764, "eval_gen_len": 9.9289, "eval_loss": 0.19547414779663086, "eval_runtime": 120.3613, "eval_samples_per_second": 83.083, "eval_steps_per_second": 1.304, "step": 60000 }, { "epoch": 48.4, "learning_rate": 3.016393442622951e-05, "loss": 0.0303, "step": 60500 }, { "epoch": 48.4, "eval_bleu": 85.0585, "eval_gen_len": 9.9279, "eval_loss": 0.19307781755924225, "eval_runtime": 121.2016, "eval_samples_per_second": 82.507, "eval_steps_per_second": 1.295, "step": 60500 }, { "epoch": 48.8, "learning_rate": 3e-05, "loss": 0.0305, "step": 61000 }, { "epoch": 48.8, "eval_bleu": 85.2865, "eval_gen_len": 9.9287, "eval_loss": 0.19452740252017975, "eval_runtime": 123.6999, "eval_samples_per_second": 80.841, "eval_steps_per_second": 1.269, "step": 61000 }, { "epoch": 49.2, "learning_rate": 2.9836065573770494e-05, "loss": 0.0296, "step": 61500 }, { "epoch": 49.2, "eval_bleu": 85.1538, "eval_gen_len": 9.9253, "eval_loss": 0.19456754624843597, "eval_runtime": 123.3373, "eval_samples_per_second": 81.078, "eval_steps_per_second": 1.273, "step": 61500 }, { "epoch": 49.6, "learning_rate": 2.967213114754098e-05, "loss": 0.0295, "step": 62000 }, { "epoch": 49.6, "eval_bleu": 85.3376, "eval_gen_len": 9.9427, "eval_loss": 0.19405782222747803, "eval_runtime": 122.2953, "eval_samples_per_second": 81.769, "eval_steps_per_second": 1.284, "step": 62000 }, { "epoch": 50.0, "learning_rate": 2.9508196721311478e-05, "loss": 0.03, "step": 62500 }, { "epoch": 50.0, "eval_bleu": 85.0825, "eval_gen_len": 9.918, "eval_loss": 0.19235928356647491, "eval_runtime": 121.5967, "eval_samples_per_second": 82.239, "eval_steps_per_second": 1.291, "step": 62500 }, { "epoch": 50.4, "learning_rate": 2.934426229508197e-05, "loss": 0.028, "step": 63000 }, { "epoch": 50.4, "eval_bleu": 85.2126, "eval_gen_len": 9.9178, "eval_loss": 0.1952826976776123, "eval_runtime": 121.3203, "eval_samples_per_second": 82.426, "eval_steps_per_second": 1.294, "step": 63000 }, { "epoch": 50.8, "learning_rate": 2.9180327868852458e-05, "loss": 0.0295, "step": 63500 }, { "epoch": 50.8, "eval_bleu": 85.1624, "eval_gen_len": 9.9343, "eval_loss": 0.1901182234287262, "eval_runtime": 122.2317, "eval_samples_per_second": 81.812, "eval_steps_per_second": 1.284, "step": 63500 }, { "epoch": 51.2, "learning_rate": 2.901639344262295e-05, "loss": 0.028, "step": 64000 }, { "epoch": 51.2, "eval_bleu": 85.092, "eval_gen_len": 9.9193, "eval_loss": 0.19715240597724915, "eval_runtime": 121.6797, "eval_samples_per_second": 82.183, "eval_steps_per_second": 1.29, "step": 64000 }, { "epoch": 51.6, "learning_rate": 2.8852459016393445e-05, "loss": 0.0279, "step": 64500 }, { "epoch": 51.6, "eval_bleu": 85.3237, "eval_gen_len": 9.9341, "eval_loss": 0.19140399992465973, "eval_runtime": 122.9196, "eval_samples_per_second": 81.354, "eval_steps_per_second": 1.277, "step": 64500 }, { "epoch": 52.0, "learning_rate": 2.8688524590163935e-05, "loss": 0.0275, "step": 65000 }, { "epoch": 52.0, "eval_bleu": 85.3125, "eval_gen_len": 9.9184, "eval_loss": 0.19335660338401794, "eval_runtime": 120.7401, "eval_samples_per_second": 82.823, "eval_steps_per_second": 1.3, "step": 65000 }, { "epoch": 52.4, "learning_rate": 2.852459016393443e-05, "loss": 0.0266, "step": 65500 }, { "epoch": 52.4, "eval_bleu": 85.3497, "eval_gen_len": 9.9376, "eval_loss": 0.19615261256694794, "eval_runtime": 122.3086, "eval_samples_per_second": 81.76, "eval_steps_per_second": 1.284, "step": 65500 }, { "epoch": 52.8, "learning_rate": 2.8360655737704922e-05, "loss": 0.0269, "step": 66000 }, { "epoch": 52.8, "eval_bleu": 85.2571, "eval_gen_len": 9.9076, "eval_loss": 0.19512337446212769, "eval_runtime": 122.2496, "eval_samples_per_second": 81.8, "eval_steps_per_second": 1.284, "step": 66000 }, { "epoch": 53.2, "learning_rate": 2.819672131147541e-05, "loss": 0.026, "step": 66500 }, { "epoch": 53.2, "eval_bleu": 85.3837, "eval_gen_len": 9.9211, "eval_loss": 0.195496067404747, "eval_runtime": 122.1131, "eval_samples_per_second": 81.891, "eval_steps_per_second": 1.286, "step": 66500 }, { "epoch": 53.6, "learning_rate": 2.8032786885245906e-05, "loss": 0.0257, "step": 67000 }, { "epoch": 53.6, "eval_bleu": 85.3563, "eval_gen_len": 9.9245, "eval_loss": 0.19710040092468262, "eval_runtime": 125.1877, "eval_samples_per_second": 79.88, "eval_steps_per_second": 1.254, "step": 67000 }, { "epoch": 54.0, "learning_rate": 2.7868852459016392e-05, "loss": 0.0263, "step": 67500 }, { "epoch": 54.0, "eval_bleu": 85.3548, "eval_gen_len": 9.9285, "eval_loss": 0.19391243159770966, "eval_runtime": 121.9609, "eval_samples_per_second": 81.993, "eval_steps_per_second": 1.287, "step": 67500 }, { "epoch": 54.4, "learning_rate": 2.7704918032786886e-05, "loss": 0.0251, "step": 68000 }, { "epoch": 54.4, "eval_bleu": 85.3867, "eval_gen_len": 9.9223, "eval_loss": 0.19642965495586395, "eval_runtime": 122.2878, "eval_samples_per_second": 81.774, "eval_steps_per_second": 1.284, "step": 68000 }, { "epoch": 54.8, "learning_rate": 2.754098360655738e-05, "loss": 0.0258, "step": 68500 }, { "epoch": 54.8, "eval_bleu": 85.3325, "eval_gen_len": 9.9353, "eval_loss": 0.19264063239097595, "eval_runtime": 123.3008, "eval_samples_per_second": 81.103, "eval_steps_per_second": 1.273, "step": 68500 }, { "epoch": 55.2, "learning_rate": 2.737704918032787e-05, "loss": 0.0251, "step": 69000 }, { "epoch": 55.2, "eval_bleu": 85.4551, "eval_gen_len": 9.9308, "eval_loss": 0.19382888078689575, "eval_runtime": 126.2193, "eval_samples_per_second": 79.227, "eval_steps_per_second": 1.244, "step": 69000 }, { "epoch": 55.6, "learning_rate": 2.7213114754098363e-05, "loss": 0.0244, "step": 69500 }, { "epoch": 55.6, "eval_bleu": 85.309, "eval_gen_len": 9.9219, "eval_loss": 0.19579891860485077, "eval_runtime": 117.3342, "eval_samples_per_second": 85.227, "eval_steps_per_second": 1.338, "step": 69500 }, { "epoch": 56.0, "learning_rate": 2.7049180327868856e-05, "loss": 0.0255, "step": 70000 }, { "epoch": 56.0, "eval_bleu": 85.3467, "eval_gen_len": 9.9309, "eval_loss": 0.19363795220851898, "eval_runtime": 117.3138, "eval_samples_per_second": 85.241, "eval_steps_per_second": 1.338, "step": 70000 }, { "epoch": 56.4, "learning_rate": 2.6885245901639343e-05, "loss": 0.0237, "step": 70500 }, { "epoch": 56.4, "eval_bleu": 85.4309, "eval_gen_len": 9.919, "eval_loss": 0.19596821069717407, "eval_runtime": 116.9886, "eval_samples_per_second": 85.478, "eval_steps_per_second": 1.342, "step": 70500 }, { "epoch": 56.8, "learning_rate": 2.6721311475409837e-05, "loss": 0.0239, "step": 71000 }, { "epoch": 56.8, "eval_bleu": 85.4014, "eval_gen_len": 9.934, "eval_loss": 0.1943485587835312, "eval_runtime": 121.5859, "eval_samples_per_second": 82.246, "eval_steps_per_second": 1.291, "step": 71000 }, { "epoch": 57.2, "learning_rate": 2.6557377049180327e-05, "loss": 0.0231, "step": 71500 }, { "epoch": 57.2, "eval_bleu": 85.621, "eval_gen_len": 9.9301, "eval_loss": 0.19711793959140778, "eval_runtime": 118.739, "eval_samples_per_second": 84.218, "eval_steps_per_second": 1.322, "step": 71500 }, { "epoch": 57.6, "learning_rate": 2.639344262295082e-05, "loss": 0.0229, "step": 72000 }, { "epoch": 57.6, "eval_bleu": 85.5557, "eval_gen_len": 9.9331, "eval_loss": 0.19833779335021973, "eval_runtime": 119.0068, "eval_samples_per_second": 84.029, "eval_steps_per_second": 1.319, "step": 72000 }, { "epoch": 58.0, "learning_rate": 2.6229508196721314e-05, "loss": 0.0231, "step": 72500 }, { "epoch": 58.0, "eval_bleu": 85.6012, "eval_gen_len": 9.9243, "eval_loss": 0.1915123611688614, "eval_runtime": 115.0294, "eval_samples_per_second": 86.934, "eval_steps_per_second": 1.365, "step": 72500 }, { "epoch": 58.4, "learning_rate": 2.6065573770491804e-05, "loss": 0.0219, "step": 73000 }, { "epoch": 58.4, "eval_bleu": 85.4561, "eval_gen_len": 9.9202, "eval_loss": 0.19812047481536865, "eval_runtime": 114.3888, "eval_samples_per_second": 87.421, "eval_steps_per_second": 1.373, "step": 73000 }, { "epoch": 58.8, "learning_rate": 2.5901639344262297e-05, "loss": 0.0227, "step": 73500 }, { "epoch": 58.8, "eval_bleu": 85.442, "eval_gen_len": 9.9242, "eval_loss": 0.19562363624572754, "eval_runtime": 112.3308, "eval_samples_per_second": 89.023, "eval_steps_per_second": 1.398, "step": 73500 }, { "epoch": 59.2, "learning_rate": 2.573770491803279e-05, "loss": 0.0226, "step": 74000 }, { "epoch": 59.2, "eval_bleu": 85.4558, "eval_gen_len": 9.9199, "eval_loss": 0.19775182008743286, "eval_runtime": 112.3894, "eval_samples_per_second": 88.976, "eval_steps_per_second": 1.397, "step": 74000 }, { "epoch": 59.6, "learning_rate": 2.5573770491803277e-05, "loss": 0.0213, "step": 74500 }, { "epoch": 59.6, "eval_bleu": 85.6858, "eval_gen_len": 9.9401, "eval_loss": 0.19891192018985748, "eval_runtime": 111.4365, "eval_samples_per_second": 89.737, "eval_steps_per_second": 1.409, "step": 74500 }, { "epoch": 60.0, "learning_rate": 2.540983606557377e-05, "loss": 0.0222, "step": 75000 }, { "epoch": 60.0, "eval_bleu": 85.5013, "eval_gen_len": 9.9235, "eval_loss": 0.19413892924785614, "eval_runtime": 107.6887, "eval_samples_per_second": 92.86, "eval_steps_per_second": 1.458, "step": 75000 } ], "logging_steps": 500, "max_steps": 152500, "num_input_tokens_seen": 0, "num_train_epochs": 122, "save_steps": 500, "total_flos": 2.3617192120005427e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }