{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.34640522875817, "eval_steps": 500, "global_step": 11400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.87, "learning_rate": 4.950000000000001e-06, "loss": 14.1952, "step": 100 }, { "epoch": 1.74, "learning_rate": 9.950000000000001e-06, "loss": 6.4908, "step": 200 }, { "epoch": 2.61, "learning_rate": 1.4950000000000001e-05, "loss": 4.5428, "step": 300 }, { "epoch": 3.49, "learning_rate": 1.995e-05, "loss": 3.8598, "step": 400 }, { "epoch": 4.36, "learning_rate": 2.495e-05, "loss": 3.3958, "step": 500 }, { "epoch": 4.36, "eval_loss": 3.3137385845184326, "eval_runtime": 268.8924, "eval_samples_per_second": 12.161, "eval_steps_per_second": 1.521, "eval_wer": 1.0, "step": 500 }, { "epoch": 5.23, "learning_rate": 2.995e-05, "loss": 3.2375, "step": 600 }, { "epoch": 6.1, "learning_rate": 3.495e-05, "loss": 3.1969, "step": 700 }, { "epoch": 6.97, "learning_rate": 3.995e-05, "loss": 3.151, "step": 800 }, { "epoch": 7.84, "learning_rate": 4.495e-05, "loss": 3.1114, "step": 900 }, { "epoch": 8.71, "learning_rate": 4.995e-05, "loss": 3.032, "step": 1000 }, { "epoch": 8.71, "eval_loss": 2.858614921569824, "eval_runtime": 245.2843, "eval_samples_per_second": 13.331, "eval_steps_per_second": 1.667, "eval_wer": 0.9992608441937366, "step": 1000 }, { "epoch": 9.59, "learning_rate": 5.495e-05, "loss": 2.5279, "step": 1100 }, { "epoch": 10.46, "learning_rate": 5.995000000000001e-05, "loss": 1.8013, "step": 1200 }, { "epoch": 11.33, "learning_rate": 6.494999999999999e-05, "loss": 1.5785, "step": 1300 }, { "epoch": 12.2, "learning_rate": 6.995e-05, "loss": 1.468, "step": 1400 }, { "epoch": 13.07, "learning_rate": 7.495e-05, "loss": 1.3977, "step": 1500 }, { "epoch": 13.07, "eval_loss": 0.4785650372505188, "eval_runtime": 252.0133, "eval_samples_per_second": 12.976, "eval_steps_per_second": 1.623, "eval_wer": 0.637541334370745, "step": 1500 }, { "epoch": 13.94, "learning_rate": 7.99e-05, "loss": 1.3681, "step": 1600 }, { "epoch": 14.81, "learning_rate": 8.49e-05, "loss": 1.336, "step": 1700 }, { "epoch": 15.69, "learning_rate": 8.99e-05, "loss": 1.307, "step": 1800 }, { "epoch": 16.56, "learning_rate": 9.49e-05, "loss": 1.2934, "step": 1900 }, { "epoch": 17.43, "learning_rate": 9.99e-05, "loss": 1.2751, "step": 2000 }, { "epoch": 17.43, "eval_loss": 0.3816487491130829, "eval_runtime": 244.8161, "eval_samples_per_second": 13.357, "eval_steps_per_second": 1.671, "eval_wer": 0.53931141801206, "step": 2000 }, { "epoch": 18.3, "learning_rate": 9.895744680851065e-05, "loss": 1.2545, "step": 2100 }, { "epoch": 19.17, "learning_rate": 9.78936170212766e-05, "loss": 1.2488, "step": 2200 }, { "epoch": 20.04, "learning_rate": 9.682978723404255e-05, "loss": 1.2315, "step": 2300 }, { "epoch": 20.92, "learning_rate": 9.576595744680852e-05, "loss": 1.2237, "step": 2400 }, { "epoch": 21.79, "learning_rate": 9.470212765957447e-05, "loss": 1.2113, "step": 2500 }, { "epoch": 21.79, "eval_loss": 0.3450602889060974, "eval_runtime": 260.0099, "eval_samples_per_second": 12.576, "eval_steps_per_second": 1.573, "eval_wer": 0.509900797510212, "step": 2500 }, { "epoch": 22.66, "learning_rate": 9.363829787234043e-05, "loss": 1.204, "step": 2600 }, { "epoch": 23.53, "learning_rate": 9.257446808510639e-05, "loss": 1.1834, "step": 2700 }, { "epoch": 24.4, "learning_rate": 9.151063829787234e-05, "loss": 1.1782, "step": 2800 }, { "epoch": 25.27, "learning_rate": 9.04468085106383e-05, "loss": 1.1616, "step": 2900 }, { "epoch": 26.14, "learning_rate": 8.938297872340426e-05, "loss": 1.156, "step": 3000 }, { "epoch": 26.14, "eval_loss": 0.3244517743587494, "eval_runtime": 254.1562, "eval_samples_per_second": 12.866, "eval_steps_per_second": 1.609, "eval_wer": 0.49192764053686056, "step": 3000 }, { "epoch": 27.02, "learning_rate": 8.831914893617022e-05, "loss": 1.165, "step": 3100 }, { "epoch": 27.89, "learning_rate": 8.725531914893618e-05, "loss": 1.1559, "step": 3200 }, { "epoch": 28.76, "learning_rate": 8.619148936170212e-05, "loss": 1.1239, "step": 3300 }, { "epoch": 29.63, "learning_rate": 8.51276595744681e-05, "loss": 1.1219, "step": 3400 }, { "epoch": 30.5, "learning_rate": 8.406382978723405e-05, "loss": 1.1226, "step": 3500 }, { "epoch": 30.5, "eval_loss": 0.2992089092731476, "eval_runtime": 263.7476, "eval_samples_per_second": 12.398, "eval_steps_per_second": 1.551, "eval_wer": 0.44411593075277184, "step": 3500 }, { "epoch": 31.37, "learning_rate": 8.3e-05, "loss": 1.1115, "step": 3600 }, { "epoch": 32.24, "learning_rate": 8.194680851063831e-05, "loss": 1.0983, "step": 3700 }, { "epoch": 33.12, "learning_rate": 8.088297872340426e-05, "loss": 1.0957, "step": 3800 }, { "epoch": 33.99, "learning_rate": 7.981914893617022e-05, "loss": 1.0874, "step": 3900 }, { "epoch": 34.86, "learning_rate": 7.875531914893617e-05, "loss": 1.0913, "step": 4000 }, { "epoch": 34.86, "eval_loss": 0.2831476330757141, "eval_runtime": 264.6688, "eval_samples_per_second": 12.355, "eval_steps_per_second": 1.545, "eval_wer": 0.431472476171951, "step": 4000 }, { "epoch": 35.73, "learning_rate": 7.769148936170213e-05, "loss": 1.0896, "step": 4100 }, { "epoch": 36.6, "learning_rate": 7.663829787234042e-05, "loss": 1.071, "step": 4200 }, { "epoch": 37.47, "learning_rate": 7.557446808510639e-05, "loss": 1.0635, "step": 4300 }, { "epoch": 38.34, "learning_rate": 7.451063829787235e-05, "loss": 1.0689, "step": 4400 }, { "epoch": 39.22, "learning_rate": 7.34468085106383e-05, "loss": 1.0615, "step": 4500 }, { "epoch": 39.22, "eval_loss": 0.2807982563972473, "eval_runtime": 253.9269, "eval_samples_per_second": 12.878, "eval_steps_per_second": 1.611, "eval_wer": 0.4339622641509434, "step": 4500 }, { "epoch": 40.09, "learning_rate": 7.238297872340425e-05, "loss": 1.0553, "step": 4600 }, { "epoch": 40.96, "learning_rate": 7.131914893617023e-05, "loss": 1.0533, "step": 4700 }, { "epoch": 41.83, "learning_rate": 7.025531914893617e-05, "loss": 1.0546, "step": 4800 }, { "epoch": 42.7, "learning_rate": 6.919148936170213e-05, "loss": 1.0517, "step": 4900 }, { "epoch": 43.57, "learning_rate": 6.812765957446809e-05, "loss": 1.0455, "step": 5000 }, { "epoch": 43.57, "eval_loss": 0.27128756046295166, "eval_runtime": 251.6872, "eval_samples_per_second": 12.992, "eval_steps_per_second": 1.625, "eval_wer": 0.4087531608636452, "step": 5000 }, { "epoch": 44.44, "learning_rate": 6.706382978723405e-05, "loss": 1.0386, "step": 5100 }, { "epoch": 45.32, "learning_rate": 6.6e-05, "loss": 1.0287, "step": 5200 }, { "epoch": 46.19, "learning_rate": 6.493617021276595e-05, "loss": 1.0251, "step": 5300 }, { "epoch": 47.06, "learning_rate": 6.387234042553192e-05, "loss": 1.0335, "step": 5400 }, { "epoch": 47.93, "learning_rate": 6.280851063829788e-05, "loss": 1.0228, "step": 5500 }, { "epoch": 47.93, "eval_loss": 0.2622440457344055, "eval_runtime": 249.5189, "eval_samples_per_second": 13.105, "eval_steps_per_second": 1.639, "eval_wer": 0.39599299747130906, "step": 5500 }, { "epoch": 48.8, "learning_rate": 6.174468085106383e-05, "loss": 1.0184, "step": 5600 }, { "epoch": 49.67, "learning_rate": 6.068085106382979e-05, "loss": 1.0174, "step": 5700 }, { "epoch": 50.54, "learning_rate": 5.9617021276595755e-05, "loss": 1.0133, "step": 5800 }, { "epoch": 51.42, "learning_rate": 5.85531914893617e-05, "loss": 1.0108, "step": 5900 }, { "epoch": 52.29, "learning_rate": 5.7489361702127666e-05, "loss": 0.9936, "step": 6000 }, { "epoch": 52.29, "eval_loss": 0.25247836112976074, "eval_runtime": 249.2066, "eval_samples_per_second": 13.122, "eval_steps_per_second": 1.641, "eval_wer": 0.37957595798482785, "step": 6000 }, { "epoch": 53.16, "learning_rate": 5.642553191489362e-05, "loss": 1.0015, "step": 6100 }, { "epoch": 54.03, "learning_rate": 5.5361702127659576e-05, "loss": 1.0003, "step": 6200 }, { "epoch": 54.9, "learning_rate": 5.429787234042554e-05, "loss": 0.988, "step": 6300 }, { "epoch": 55.77, "learning_rate": 5.3234042553191486e-05, "loss": 0.9845, "step": 6400 }, { "epoch": 56.64, "learning_rate": 5.217021276595745e-05, "loss": 0.968, "step": 6500 }, { "epoch": 56.64, "eval_loss": 0.2505591809749603, "eval_runtime": 253.8437, "eval_samples_per_second": 12.882, "eval_steps_per_second": 1.611, "eval_wer": 0.3798093756078584, "step": 6500 }, { "epoch": 57.52, "learning_rate": 5.110638297872341e-05, "loss": 0.9852, "step": 6600 }, { "epoch": 58.39, "learning_rate": 5.004255319148936e-05, "loss": 0.9737, "step": 6700 }, { "epoch": 59.26, "learning_rate": 4.897872340425532e-05, "loss": 0.9752, "step": 6800 }, { "epoch": 60.13, "learning_rate": 4.792553191489362e-05, "loss": 0.9717, "step": 6900 }, { "epoch": 61.0, "learning_rate": 4.686170212765958e-05, "loss": 0.9704, "step": 7000 }, { "epoch": 61.0, "eval_loss": 0.24809595942497253, "eval_runtime": 257.0988, "eval_samples_per_second": 12.719, "eval_steps_per_second": 1.591, "eval_wer": 0.37346819684886207, "step": 7000 }, { "epoch": 61.87, "learning_rate": 4.579787234042554e-05, "loss": 0.9603, "step": 7100 }, { "epoch": 62.75, "learning_rate": 4.474468085106383e-05, "loss": 0.967, "step": 7200 }, { "epoch": 63.62, "learning_rate": 4.368085106382979e-05, "loss": 0.9623, "step": 7300 }, { "epoch": 64.49, "learning_rate": 4.261702127659575e-05, "loss": 0.9506, "step": 7400 }, { "epoch": 65.36, "learning_rate": 4.15531914893617e-05, "loss": 0.9552, "step": 7500 }, { "epoch": 65.36, "eval_loss": 0.23935528099536896, "eval_runtime": 264.2571, "eval_samples_per_second": 12.374, "eval_steps_per_second": 1.548, "eval_wer": 0.36432600661349934, "step": 7500 }, { "epoch": 66.23, "learning_rate": 4.048936170212766e-05, "loss": 0.9449, "step": 7600 }, { "epoch": 67.1, "learning_rate": 3.9425531914893624e-05, "loss": 0.9411, "step": 7700 }, { "epoch": 67.97, "learning_rate": 3.8361702127659576e-05, "loss": 0.9468, "step": 7800 }, { "epoch": 68.85, "learning_rate": 3.7297872340425534e-05, "loss": 0.9448, "step": 7900 }, { "epoch": 69.72, "learning_rate": 3.6234042553191486e-05, "loss": 0.9417, "step": 8000 }, { "epoch": 69.72, "eval_loss": 0.23499037325382233, "eval_runtime": 254.8702, "eval_samples_per_second": 12.83, "eval_steps_per_second": 1.605, "eval_wer": 0.35366660182843807, "step": 8000 }, { "epoch": 70.59, "learning_rate": 3.517021276595745e-05, "loss": 0.9342, "step": 8100 }, { "epoch": 71.46, "learning_rate": 3.41063829787234e-05, "loss": 0.9338, "step": 8200 }, { "epoch": 72.33, "learning_rate": 3.304255319148936e-05, "loss": 0.9298, "step": 8300 }, { "epoch": 73.2, "learning_rate": 3.1978723404255326e-05, "loss": 0.9239, "step": 8400 }, { "epoch": 74.07, "learning_rate": 3.091489361702128e-05, "loss": 0.9215, "step": 8500 }, { "epoch": 74.07, "eval_loss": 0.23256558179855347, "eval_runtime": 242.8597, "eval_samples_per_second": 13.465, "eval_steps_per_second": 1.684, "eval_wer": 0.35070997860338454, "step": 8500 }, { "epoch": 74.95, "learning_rate": 2.9851063829787236e-05, "loss": 0.9233, "step": 8600 }, { "epoch": 75.82, "learning_rate": 2.878723404255319e-05, "loss": 0.9193, "step": 8700 }, { "epoch": 76.69, "learning_rate": 2.7723404255319153e-05, "loss": 0.9186, "step": 8800 }, { "epoch": 77.56, "learning_rate": 2.6659574468085108e-05, "loss": 0.9064, "step": 8900 }, { "epoch": 78.43, "learning_rate": 2.5595744680851063e-05, "loss": 0.9097, "step": 9000 }, { "epoch": 78.43, "eval_loss": 0.227674201130867, "eval_runtime": 258.8243, "eval_samples_per_second": 12.634, "eval_steps_per_second": 1.58, "eval_wer": 0.348725928807625, "step": 9000 }, { "epoch": 79.3, "learning_rate": 2.453191489361702e-05, "loss": 0.9029, "step": 9100 }, { "epoch": 80.17, "learning_rate": 2.346808510638298e-05, "loss": 0.9137, "step": 9200 }, { "epoch": 81.05, "learning_rate": 2.2404255319148938e-05, "loss": 0.9045, "step": 9300 }, { "epoch": 81.92, "learning_rate": 2.1340425531914897e-05, "loss": 0.905, "step": 9400 }, { "epoch": 82.79, "learning_rate": 2.027659574468085e-05, "loss": 0.9003, "step": 9500 }, { "epoch": 82.79, "eval_loss": 0.2229508012533188, "eval_runtime": 256.366, "eval_samples_per_second": 12.755, "eval_steps_per_second": 1.595, "eval_wer": 0.33623808597549115, "step": 9500 }, { "epoch": 83.66, "learning_rate": 1.921276595744681e-05, "loss": 0.8983, "step": 9600 }, { "epoch": 84.53, "learning_rate": 1.8148936170212765e-05, "loss": 0.9026, "step": 9700 }, { "epoch": 85.4, "learning_rate": 1.7095744680851063e-05, "loss": 0.8921, "step": 9800 }, { "epoch": 86.27, "learning_rate": 1.603191489361702e-05, "loss": 0.895, "step": 9900 }, { "epoch": 87.15, "learning_rate": 1.4968085106382978e-05, "loss": 0.8857, "step": 10000 }, { "epoch": 87.15, "eval_loss": 0.2246018499135971, "eval_runtime": 255.6354, "eval_samples_per_second": 12.792, "eval_steps_per_second": 1.6, "eval_wer": 0.3361602801011476, "step": 10000 }, { "epoch": 88.02, "learning_rate": 1.3904255319148937e-05, "loss": 0.8916, "step": 10100 }, { "epoch": 88.89, "learning_rate": 1.2840425531914893e-05, "loss": 0.8808, "step": 10200 }, { "epoch": 89.76, "learning_rate": 1.1776595744680852e-05, "loss": 0.8817, "step": 10300 }, { "epoch": 90.63, "learning_rate": 1.0712765957446808e-05, "loss": 0.8805, "step": 10400 }, { "epoch": 91.5, "learning_rate": 9.648936170212767e-06, "loss": 0.882, "step": 10500 }, { "epoch": 91.5, "eval_loss": 0.2236073762178421, "eval_runtime": 260.0762, "eval_samples_per_second": 12.573, "eval_steps_per_second": 1.573, "eval_wer": 0.3315308305777086, "step": 10500 }, { "epoch": 92.37, "learning_rate": 8.585106382978724e-06, "loss": 0.8768, "step": 10600 }, { "epoch": 93.25, "learning_rate": 7.521276595744681e-06, "loss": 0.8711, "step": 10700 }, { "epoch": 94.12, "learning_rate": 6.457446808510638e-06, "loss": 0.8774, "step": 10800 }, { "epoch": 94.99, "learning_rate": 5.393617021276596e-06, "loss": 0.8781, "step": 10900 }, { "epoch": 95.86, "learning_rate": 4.329787234042553e-06, "loss": 0.8719, "step": 11000 }, { "epoch": 95.86, "eval_loss": 0.22029033303260803, "eval_runtime": 256.8466, "eval_samples_per_second": 12.731, "eval_steps_per_second": 1.592, "eval_wer": 0.32705699280295664, "step": 11000 }, { "epoch": 96.73, "learning_rate": 3.265957446808511e-06, "loss": 0.8687, "step": 11100 }, { "epoch": 97.6, "learning_rate": 2.2021276595744685e-06, "loss": 0.8771, "step": 11200 }, { "epoch": 98.47, "learning_rate": 1.1382978723404256e-06, "loss": 0.8633, "step": 11300 }, { "epoch": 99.35, "learning_rate": 7.446808510638299e-08, "loss": 0.8779, "step": 11400 }, { "epoch": 99.35, "step": 11400, "total_flos": 1.353536402091675e+20, "train_loss": 1.3810767638892458, "train_runtime": 73797.3063, "train_samples_per_second": 9.949, "train_steps_per_second": 0.154 } ], "logging_steps": 100, "max_steps": 11400, "num_train_epochs": 100, "save_steps": 500, "total_flos": 1.353536402091675e+20, "trial_name": null, "trial_params": null }