{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.61198093941457, "global_step": 43500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.34, "learning_rate": 4.9500000000000004e-05, "loss": 6.2203, "step": 500 }, { "epoch": 0.34, "eval_loss": 3.002925157546997, "eval_runtime": 132.7789, "eval_samples_per_second": 14.942, "eval_steps_per_second": 1.868, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.68, "learning_rate": 9.95e-05, "loss": 2.2102, "step": 1000 }, { "epoch": 0.68, "eval_loss": 1.1446926593780518, "eval_runtime": 132.5997, "eval_samples_per_second": 14.962, "eval_steps_per_second": 1.87, "eval_wer": 0.8542348032028871, "step": 1000 }, { "epoch": 1.02, "learning_rate": 9.885070814952404e-05, "loss": 0.9714, "step": 1500 }, { "epoch": 1.02, "eval_loss": 0.7160163521766663, "eval_runtime": 132.7305, "eval_samples_per_second": 14.948, "eval_steps_per_second": 1.868, "eval_wer": 0.6600879666177963, "step": 1500 }, { "epoch": 1.36, "learning_rate": 9.76898072904574e-05, "loss": 0.7541, "step": 2000 }, { "epoch": 1.36, "eval_loss": 0.6261711120605469, "eval_runtime": 132.7267, "eval_samples_per_second": 14.948, "eval_steps_per_second": 1.869, "eval_wer": 0.594733280703733, "step": 2000 }, { "epoch": 1.7, "learning_rate": 9.652890643139077e-05, "loss": 0.6838, "step": 2500 }, { "epoch": 1.7, "eval_loss": 0.5789812207221985, "eval_runtime": 131.2782, "eval_samples_per_second": 15.113, "eval_steps_per_second": 1.889, "eval_wer": 0.5645088530506372, "step": 2500 }, { "epoch": 2.04, "learning_rate": 9.537032737404225e-05, "loss": 0.6287, "step": 3000 }, { "epoch": 2.04, "eval_loss": 0.6015335917472839, "eval_runtime": 133.004, "eval_samples_per_second": 14.917, "eval_steps_per_second": 1.865, "eval_wer": 0.5387955340024811, "step": 3000 }, { "epoch": 2.38, "learning_rate": 9.420942651497563e-05, "loss": 0.5439, "step": 3500 }, { "epoch": 2.38, "eval_loss": 0.5301734209060669, "eval_runtime": 133.1151, "eval_samples_per_second": 14.904, "eval_steps_per_second": 1.863, "eval_wer": 0.5076124957708357, "step": 3500 }, { "epoch": 2.72, "learning_rate": 9.304852565590899e-05, "loss": 0.5279, "step": 4000 }, { "epoch": 2.72, "eval_loss": 0.5215476155281067, "eval_runtime": 131.1447, "eval_samples_per_second": 15.128, "eval_steps_per_second": 1.891, "eval_wer": 0.48962445020863876, "step": 4000 }, { "epoch": 3.06, "learning_rate": 9.188762479684235e-05, "loss": 0.5006, "step": 4500 }, { "epoch": 3.06, "eval_loss": 0.4860161542892456, "eval_runtime": 133.2608, "eval_samples_per_second": 14.888, "eval_steps_per_second": 1.861, "eval_wer": 0.4685350174805458, "step": 4500 }, { "epoch": 3.4, "learning_rate": 9.072672393777572e-05, "loss": 0.4432, "step": 5000 }, { "epoch": 3.4, "eval_loss": 0.484553724527359, "eval_runtime": 133.4845, "eval_samples_per_second": 14.863, "eval_steps_per_second": 1.858, "eval_wer": 0.46864779519566935, "step": 5000 }, { "epoch": 3.74, "learning_rate": 8.956582307870908e-05, "loss": 0.4334, "step": 5500 }, { "epoch": 3.74, "eval_loss": 0.477468878030777, "eval_runtime": 132.8856, "eval_samples_per_second": 14.93, "eval_steps_per_second": 1.866, "eval_wer": 0.45415585880230064, "step": 5500 }, { "epoch": 4.08, "learning_rate": 8.840492221964245e-05, "loss": 0.4292, "step": 6000 }, { "epoch": 4.08, "eval_loss": 0.451526939868927, "eval_runtime": 132.3623, "eval_samples_per_second": 14.989, "eval_steps_per_second": 1.874, "eval_wer": 0.4291192060448855, "step": 6000 }, { "epoch": 4.42, "learning_rate": 8.724402136057582e-05, "loss": 0.3779, "step": 6500 }, { "epoch": 4.42, "eval_loss": 0.4495590329170227, "eval_runtime": 132.6275, "eval_samples_per_second": 14.959, "eval_steps_per_second": 1.87, "eval_wer": 0.42054809969550017, "step": 6500 }, { "epoch": 4.77, "learning_rate": 8.608312050150917e-05, "loss": 0.3783, "step": 7000 }, { "epoch": 4.77, "eval_loss": 0.45758938789367676, "eval_runtime": 132.4666, "eval_samples_per_second": 14.977, "eval_steps_per_second": 1.872, "eval_wer": 0.4184053231081538, "step": 7000 }, { "epoch": 5.11, "learning_rate": 8.492221964244254e-05, "loss": 0.3622, "step": 7500 }, { "epoch": 5.11, "eval_loss": 0.4782721698284149, "eval_runtime": 133.2094, "eval_samples_per_second": 14.894, "eval_steps_per_second": 1.862, "eval_wer": 0.4070147738806812, "step": 7500 }, { "epoch": 5.45, "learning_rate": 8.37613187833759e-05, "loss": 0.3278, "step": 8000 }, { "epoch": 5.45, "eval_loss": 0.44267573952674866, "eval_runtime": 133.2221, "eval_samples_per_second": 14.892, "eval_steps_per_second": 1.862, "eval_wer": 0.40278560956355025, "step": 8000 }, { "epoch": 5.79, "learning_rate": 8.260041792430927e-05, "loss": 0.3304, "step": 8500 }, { "epoch": 5.79, "eval_loss": 0.4482724368572235, "eval_runtime": 132.3395, "eval_samples_per_second": 14.992, "eval_steps_per_second": 1.874, "eval_wer": 0.4056050524416375, "step": 8500 }, { "epoch": 6.13, "learning_rate": 8.143951706524264e-05, "loss": 0.312, "step": 9000 }, { "epoch": 6.13, "eval_loss": 0.4750816524028778, "eval_runtime": 132.887, "eval_samples_per_second": 14.93, "eval_steps_per_second": 1.866, "eval_wer": 0.3882936731701816, "step": 9000 }, { "epoch": 6.47, "learning_rate": 8.027861620617599e-05, "loss": 0.29, "step": 9500 }, { "epoch": 6.47, "eval_loss": 0.4528858959674835, "eval_runtime": 133.211, "eval_samples_per_second": 14.894, "eval_steps_per_second": 1.862, "eval_wer": 0.3780872899515056, "step": 9500 }, { "epoch": 6.81, "learning_rate": 7.912235895054563e-05, "loss": 0.3057, "step": 10000 }, { "epoch": 6.81, "eval_loss": 0.5861864686012268, "eval_runtime": 133.2534, "eval_samples_per_second": 14.889, "eval_steps_per_second": 1.861, "eval_wer": 0.3783128453817526, "step": 10000 }, { "epoch": 7.15, "learning_rate": 7.796145809147898e-05, "loss": 0.2971, "step": 10500 }, { "epoch": 7.15, "eval_loss": 0.43457281589508057, "eval_runtime": 132.5985, "eval_samples_per_second": 14.962, "eval_steps_per_second": 1.87, "eval_wer": 0.3765084019397767, "step": 10500 }, { "epoch": 7.49, "learning_rate": 7.680055723241235e-05, "loss": 0.2684, "step": 11000 }, { "epoch": 7.49, "eval_loss": 0.4560734033584595, "eval_runtime": 132.2037, "eval_samples_per_second": 15.007, "eval_steps_per_second": 1.876, "eval_wer": 0.3732942370587572, "step": 11000 }, { "epoch": 7.83, "learning_rate": 7.563965637334573e-05, "loss": 0.2622, "step": 11500 }, { "epoch": 7.83, "eval_loss": 0.43240657448768616, "eval_runtime": 133.3517, "eval_samples_per_second": 14.878, "eval_steps_per_second": 1.86, "eval_wer": 0.37408368106462164, "step": 11500 }, { "epoch": 8.17, "learning_rate": 7.447875551427908e-05, "loss": 0.2635, "step": 12000 }, { "epoch": 8.17, "eval_loss": 0.4556463658809662, "eval_runtime": 133.4279, "eval_samples_per_second": 14.869, "eval_steps_per_second": 1.859, "eval_wer": 0.3789895116724935, "step": 12000 }, { "epoch": 8.51, "learning_rate": 7.331785465521245e-05, "loss": 0.2363, "step": 12500 }, { "epoch": 8.51, "eval_loss": 0.47497859597206116, "eval_runtime": 134.0404, "eval_samples_per_second": 14.802, "eval_steps_per_second": 1.85, "eval_wer": 0.3709822938987256, "step": 12500 }, { "epoch": 8.85, "learning_rate": 7.215695379614582e-05, "loss": 0.2516, "step": 13000 }, { "epoch": 8.85, "eval_loss": 0.4297301471233368, "eval_runtime": 133.4288, "eval_samples_per_second": 14.869, "eval_steps_per_second": 1.859, "eval_wer": 0.3635389647005752, "step": 13000 }, { "epoch": 9.19, "learning_rate": 7.099605293707917e-05, "loss": 0.2291, "step": 13500 }, { "epoch": 9.19, "eval_loss": 0.4862041175365448, "eval_runtime": 133.7259, "eval_samples_per_second": 14.836, "eval_steps_per_second": 1.855, "eval_wer": 0.36021202210443215, "step": 13500 }, { "epoch": 9.53, "learning_rate": 6.983515207801255e-05, "loss": 0.2266, "step": 14000 }, { "epoch": 9.53, "eval_loss": 0.4703396260738373, "eval_runtime": 133.0987, "eval_samples_per_second": 14.906, "eval_steps_per_second": 1.863, "eval_wer": 0.3546295252058193, "step": 14000 }, { "epoch": 9.87, "learning_rate": 6.867657302066403e-05, "loss": 0.2281, "step": 14500 }, { "epoch": 9.87, "eval_loss": 0.42882266640663147, "eval_runtime": 133.4534, "eval_samples_per_second": 14.867, "eval_steps_per_second": 1.858, "eval_wer": 0.35812563437464756, "step": 14500 }, { "epoch": 10.21, "learning_rate": 6.75156721615974e-05, "loss": 0.2126, "step": 15000 }, { "epoch": 10.21, "eval_loss": 0.4615735113620758, "eval_runtime": 132.7985, "eval_samples_per_second": 14.94, "eval_steps_per_second": 1.867, "eval_wer": 0.35626480207510997, "step": 15000 }, { "epoch": 10.55, "learning_rate": 6.635477130253078e-05, "loss": 0.217, "step": 15500 }, { "epoch": 10.55, "eval_loss": 0.49455586075782776, "eval_runtime": 133.6528, "eval_samples_per_second": 14.844, "eval_steps_per_second": 1.856, "eval_wer": 0.34560730799594, "step": 15500 }, { "epoch": 10.89, "learning_rate": 6.519387044346413e-05, "loss": 0.212, "step": 16000 }, { "epoch": 10.89, "eval_loss": 0.43323996663093567, "eval_runtime": 133.4419, "eval_samples_per_second": 14.868, "eval_steps_per_second": 1.858, "eval_wer": 0.3452689748505695, "step": 16000 }, { "epoch": 11.23, "learning_rate": 6.40329695843975e-05, "loss": 0.1986, "step": 16500 }, { "epoch": 11.23, "eval_loss": 0.4868086576461792, "eval_runtime": 131.4603, "eval_samples_per_second": 15.092, "eval_steps_per_second": 1.887, "eval_wer": 0.3399684222397654, "step": 16500 }, { "epoch": 11.57, "learning_rate": 6.2874390527049e-05, "loss": 0.2012, "step": 17000 }, { "epoch": 11.57, "eval_loss": 0.4474054276943207, "eval_runtime": 133.9018, "eval_samples_per_second": 14.817, "eval_steps_per_second": 1.852, "eval_wer": 0.3460020299988722, "step": 17000 }, { "epoch": 11.91, "learning_rate": 6.171348966798236e-05, "loss": 0.1998, "step": 17500 }, { "epoch": 11.91, "eval_loss": 0.45011407136917114, "eval_runtime": 132.7933, "eval_samples_per_second": 14.941, "eval_steps_per_second": 1.868, "eval_wer": 0.3362467576406902, "step": 17500 }, { "epoch": 12.25, "learning_rate": 6.055258880891572e-05, "loss": 0.1746, "step": 18000 }, { "epoch": 12.25, "eval_loss": 0.4731091558933258, "eval_runtime": 133.9219, "eval_samples_per_second": 14.815, "eval_steps_per_second": 1.852, "eval_wer": 0.33630314649825194, "step": 18000 }, { "epoch": 12.59, "learning_rate": 5.9391687949849086e-05, "loss": 0.1805, "step": 18500 }, { "epoch": 12.59, "eval_loss": 0.4601946771144867, "eval_runtime": 132.0532, "eval_samples_per_second": 15.024, "eval_steps_per_second": 1.878, "eval_wer": 0.33743092364948685, "step": 18500 }, { "epoch": 12.93, "learning_rate": 5.823310889250059e-05, "loss": 0.1826, "step": 19000 }, { "epoch": 12.93, "eval_loss": 0.47728002071380615, "eval_runtime": 133.9362, "eval_samples_per_second": 14.813, "eval_steps_per_second": 1.852, "eval_wer": 0.3276192624337431, "step": 19000 }, { "epoch": 13.27, "learning_rate": 5.707220803343395e-05, "loss": 0.1651, "step": 19500 }, { "epoch": 13.27, "eval_loss": 0.471328467130661, "eval_runtime": 111.9462, "eval_samples_per_second": 17.723, "eval_steps_per_second": 2.215, "eval_wer": 0.3304387053118304, "step": 19500 }, { "epoch": 13.61, "learning_rate": 5.591130717436731e-05, "loss": 0.1812, "step": 20000 }, { "epoch": 13.61, "eval_loss": 0.4280690550804138, "eval_runtime": 110.8492, "eval_samples_per_second": 17.898, "eval_steps_per_second": 2.237, "eval_wer": 0.32276982068343296, "step": 20000 }, { "epoch": 13.96, "learning_rate": 5.4750406315300676e-05, "loss": 0.1666, "step": 20500 }, { "epoch": 13.96, "eval_loss": 0.4796316623687744, "eval_runtime": 110.5982, "eval_samples_per_second": 17.939, "eval_steps_per_second": 2.242, "eval_wer": 0.32530731927371154, "step": 20500 }, { "epoch": 14.3, "learning_rate": 5.359182725795218e-05, "loss": 0.1553, "step": 21000 }, { "epoch": 14.3, "eval_loss": 0.4721369445323944, "eval_runtime": 128.2013, "eval_samples_per_second": 15.476, "eval_steps_per_second": 1.934, "eval_wer": 0.3259275967068907, "step": 21000 }, { "epoch": 14.64, "learning_rate": 5.243092639888554e-05, "loss": 0.1545, "step": 21500 }, { "epoch": 14.64, "eval_loss": 0.4527774453163147, "eval_runtime": 132.8486, "eval_samples_per_second": 14.934, "eval_steps_per_second": 1.867, "eval_wer": 0.32682981842787867, "step": 21500 }, { "epoch": 14.98, "learning_rate": 5.12700255398189e-05, "loss": 0.1576, "step": 22000 }, { "epoch": 14.98, "eval_loss": 0.4553755819797516, "eval_runtime": 131.6613, "eval_samples_per_second": 15.069, "eval_steps_per_second": 1.884, "eval_wer": 0.3252509304161498, "step": 22000 }, { "epoch": 15.32, "learning_rate": 5.0109124680752265e-05, "loss": 0.1511, "step": 22500 }, { "epoch": 15.32, "eval_loss": 0.4580444097518921, "eval_runtime": 134.2974, "eval_samples_per_second": 14.773, "eval_steps_per_second": 1.847, "eval_wer": 0.3179203789331228, "step": 22500 }, { "epoch": 15.66, "learning_rate": 4.895054562340376e-05, "loss": 0.1444, "step": 23000 }, { "epoch": 15.66, "eval_loss": 0.4659278094768524, "eval_runtime": 134.6637, "eval_samples_per_second": 14.733, "eval_steps_per_second": 1.842, "eval_wer": 0.32119093267170407, "step": 23000 }, { "epoch": 16.0, "learning_rate": 4.7791966566055266e-05, "loss": 0.1496, "step": 23500 }, { "epoch": 16.0, "eval_loss": 0.4660351872444153, "eval_runtime": 133.4382, "eval_samples_per_second": 14.868, "eval_steps_per_second": 1.859, "eval_wer": 0.32598398556445246, "step": 23500 }, { "epoch": 16.34, "learning_rate": 4.6631065706988625e-05, "loss": 0.1327, "step": 24000 }, { "epoch": 16.34, "eval_loss": 0.4934828281402588, "eval_runtime": 134.3743, "eval_samples_per_second": 14.765, "eval_steps_per_second": 1.846, "eval_wer": 0.31825871207849327, "step": 24000 }, { "epoch": 16.68, "learning_rate": 4.547016484792199e-05, "loss": 0.1535, "step": 24500 }, { "epoch": 16.68, "eval_loss": 0.46466243267059326, "eval_runtime": 132.9271, "eval_samples_per_second": 14.925, "eval_steps_per_second": 1.866, "eval_wer": 0.3113792714559603, "step": 24500 }, { "epoch": 17.02, "learning_rate": 4.430926398885535e-05, "loss": 0.1438, "step": 25000 }, { "epoch": 17.02, "eval_loss": 0.5053276419639587, "eval_runtime": 132.7443, "eval_samples_per_second": 14.946, "eval_steps_per_second": 1.868, "eval_wer": 0.31487538062478854, "step": 25000 }, { "epoch": 17.36, "learning_rate": 4.314836312978872e-05, "loss": 0.1264, "step": 25500 }, { "epoch": 17.36, "eval_loss": 0.48962870240211487, "eval_runtime": 133.9594, "eval_samples_per_second": 14.81, "eval_steps_per_second": 1.851, "eval_wer": 0.3131273260403744, "step": 25500 }, { "epoch": 17.7, "learning_rate": 4.1987462270722084e-05, "loss": 0.1269, "step": 26000 }, { "epoch": 17.7, "eval_loss": 0.46815410256385803, "eval_runtime": 132.7946, "eval_samples_per_second": 14.94, "eval_steps_per_second": 1.868, "eval_wer": 0.30748844028419986, "step": 26000 }, { "epoch": 18.04, "learning_rate": 4.0826561411655444e-05, "loss": 0.1272, "step": 26500 }, { "epoch": 18.04, "eval_loss": 0.4759911596775055, "eval_runtime": 132.8376, "eval_samples_per_second": 14.936, "eval_steps_per_second": 1.867, "eval_wer": 0.3113792714559603, "step": 26500 }, { "epoch": 18.38, "learning_rate": 3.966566055258881e-05, "loss": 0.1219, "step": 27000 }, { "epoch": 18.38, "eval_loss": 0.49612942337989807, "eval_runtime": 133.1179, "eval_samples_per_second": 14.904, "eval_steps_per_second": 1.863, "eval_wer": 0.31487538062478854, "step": 27000 }, { "epoch": 18.72, "learning_rate": 3.850475969352218e-05, "loss": 0.123, "step": 27500 }, { "epoch": 18.72, "eval_loss": 0.46862220764160156, "eval_runtime": 110.6681, "eval_samples_per_second": 17.927, "eval_steps_per_second": 2.241, "eval_wer": 0.3072064959963911, "step": 27500 }, { "epoch": 19.06, "learning_rate": 3.734385883445554e-05, "loss": 0.1262, "step": 28000 }, { "epoch": 19.06, "eval_loss": 0.49365413188934326, "eval_runtime": 109.7879, "eval_samples_per_second": 18.071, "eval_steps_per_second": 2.259, "eval_wer": 0.30889816172324347, "step": 28000 }, { "epoch": 19.4, "learning_rate": 3.61829579753889e-05, "loss": 0.1165, "step": 28500 }, { "epoch": 19.4, "eval_loss": 0.4988892376422882, "eval_runtime": 110.8122, "eval_samples_per_second": 17.904, "eval_steps_per_second": 2.238, "eval_wer": 0.30540205255441527, "step": 28500 }, { "epoch": 19.74, "learning_rate": 3.50243789180404e-05, "loss": 0.1213, "step": 29000 }, { "epoch": 19.74, "eval_loss": 0.4937494993209839, "eval_runtime": 109.7673, "eval_samples_per_second": 18.075, "eval_steps_per_second": 2.259, "eval_wer": 0.30404871997293337, "step": 29000 }, { "epoch": 20.08, "learning_rate": 3.3863478058973766e-05, "loss": 0.1113, "step": 29500 }, { "epoch": 20.08, "eval_loss": 0.5031572580337524, "eval_runtime": 111.1946, "eval_samples_per_second": 17.843, "eval_steps_per_second": 2.23, "eval_wer": 0.3037667756851246, "step": 29500 }, { "epoch": 20.42, "learning_rate": 3.270257719990713e-05, "loss": 0.1087, "step": 30000 }, { "epoch": 20.42, "eval_loss": 0.49494612216949463, "eval_runtime": 126.8851, "eval_samples_per_second": 15.636, "eval_steps_per_second": 1.955, "eval_wer": 0.299030111649938, "step": 30000 }, { "epoch": 20.76, "learning_rate": 3.154167634084049e-05, "loss": 0.1128, "step": 30500 }, { "epoch": 20.76, "eval_loss": 0.49934807419776917, "eval_runtime": 128.8495, "eval_samples_per_second": 15.398, "eval_steps_per_second": 1.925, "eval_wer": 0.3017367768129018, "step": 30500 }, { "epoch": 21.1, "learning_rate": 3.038077548177386e-05, "loss": 0.1151, "step": 31000 }, { "epoch": 21.1, "eval_loss": 0.5088058114051819, "eval_runtime": 128.5026, "eval_samples_per_second": 15.439, "eval_steps_per_second": 1.93, "eval_wer": 0.30184955452802525, "step": 31000 }, { "epoch": 21.44, "learning_rate": 2.922219642442536e-05, "loss": 0.1025, "step": 31500 }, { "epoch": 21.44, "eval_loss": 0.47611942887306213, "eval_runtime": 111.1648, "eval_samples_per_second": 17.847, "eval_steps_per_second": 2.231, "eval_wer": 0.29891733393481446, "step": 31500 }, { "epoch": 21.78, "learning_rate": 2.8063617367076856e-05, "loss": 0.1016, "step": 32000 }, { "epoch": 21.78, "eval_loss": 0.5126113891601562, "eval_runtime": 121.5373, "eval_samples_per_second": 16.324, "eval_steps_per_second": 2.041, "eval_wer": 0.30275177624901317, "step": 32000 }, { "epoch": 22.12, "learning_rate": 2.690271650801022e-05, "loss": 0.0991, "step": 32500 }, { "epoch": 22.12, "eval_loss": 0.5214097499847412, "eval_runtime": 129.4856, "eval_samples_per_second": 15.322, "eval_steps_per_second": 1.915, "eval_wer": 0.2958159467689185, "step": 32500 }, { "epoch": 22.46, "learning_rate": 2.574181564894358e-05, "loss": 0.0951, "step": 33000 }, { "epoch": 22.46, "eval_loss": 0.5233449339866638, "eval_runtime": 130.9955, "eval_samples_per_second": 15.146, "eval_steps_per_second": 1.893, "eval_wer": 0.2923198376000902, "step": 33000 }, { "epoch": 22.8, "learning_rate": 2.4580914789876948e-05, "loss": 0.1049, "step": 33500 }, { "epoch": 22.8, "eval_loss": 0.48444411158561707, "eval_runtime": 110.8951, "eval_samples_per_second": 17.891, "eval_steps_per_second": 2.236, "eval_wer": 0.29677455734746816, "step": 33500 }, { "epoch": 23.14, "learning_rate": 2.342001393081031e-05, "loss": 0.1014, "step": 34000 }, { "epoch": 23.14, "eval_loss": 0.4997089207172394, "eval_runtime": 111.4563, "eval_samples_per_second": 17.801, "eval_steps_per_second": 2.225, "eval_wer": 0.292376226457652, "step": 34000 }, { "epoch": 23.49, "learning_rate": 2.2261434873461805e-05, "loss": 0.0959, "step": 34500 }, { "epoch": 23.49, "eval_loss": 0.48040756583213806, "eval_runtime": 129.4236, "eval_samples_per_second": 15.33, "eval_steps_per_second": 1.916, "eval_wer": 0.28978233900981165, "step": 34500 }, { "epoch": 23.83, "learning_rate": 2.110053401439517e-05, "loss": 0.098, "step": 35000 }, { "epoch": 23.83, "eval_loss": 0.501395046710968, "eval_runtime": 129.507, "eval_samples_per_second": 15.32, "eval_steps_per_second": 1.915, "eval_wer": 0.291699560166911, "step": 35000 }, { "epoch": 24.17, "learning_rate": 1.9939633155328534e-05, "loss": 0.0973, "step": 35500 }, { "epoch": 24.17, "eval_loss": 0.5193932056427002, "eval_runtime": 129.3856, "eval_samples_per_second": 15.334, "eval_steps_per_second": 1.917, "eval_wer": 0.28955678357956466, "step": 35500 }, { "epoch": 24.51, "learning_rate": 1.87787322962619e-05, "loss": 0.0876, "step": 36000 }, { "epoch": 24.51, "eval_loss": 0.5203258395195007, "eval_runtime": 125.0985, "eval_samples_per_second": 15.859, "eval_steps_per_second": 1.982, "eval_wer": 0.28718845156197137, "step": 36000 }, { "epoch": 24.85, "learning_rate": 1.7617831437195264e-05, "loss": 0.0902, "step": 36500 }, { "epoch": 24.85, "eval_loss": 0.536376953125, "eval_runtime": 113.525, "eval_samples_per_second": 17.476, "eval_steps_per_second": 2.185, "eval_wer": 0.29136122702154055, "step": 36500 }, { "epoch": 25.19, "learning_rate": 1.645693057812863e-05, "loss": 0.0889, "step": 37000 }, { "epoch": 25.19, "eval_loss": 0.531440019607544, "eval_runtime": 123.4292, "eval_samples_per_second": 16.074, "eval_steps_per_second": 2.009, "eval_wer": 0.2880906732829593, "step": 37000 }, { "epoch": 25.53, "learning_rate": 1.5296029719061993e-05, "loss": 0.0865, "step": 37500 }, { "epoch": 25.53, "eval_loss": 0.5107194185256958, "eval_runtime": 129.5881, "eval_samples_per_second": 15.31, "eval_steps_per_second": 1.914, "eval_wer": 0.285271230404872, "step": 37500 }, { "epoch": 25.87, "learning_rate": 1.4135128859995356e-05, "loss": 0.0859, "step": 38000 }, { "epoch": 25.87, "eval_loss": 0.5254319310188293, "eval_runtime": 128.9308, "eval_samples_per_second": 15.388, "eval_steps_per_second": 1.924, "eval_wer": 0.28780872899515053, "step": 38000 }, { "epoch": 26.21, "learning_rate": 1.2974228000928721e-05, "loss": 0.0813, "step": 38500 }, { "epoch": 26.21, "eval_loss": 0.5275471806526184, "eval_runtime": 110.8092, "eval_samples_per_second": 17.905, "eval_steps_per_second": 2.238, "eval_wer": 0.2846509529716928, "step": 38500 }, { "epoch": 26.55, "learning_rate": 1.1813327141862086e-05, "loss": 0.0881, "step": 39000 }, { "epoch": 26.55, "eval_loss": 0.5125020742416382, "eval_runtime": 113.2218, "eval_samples_per_second": 17.523, "eval_steps_per_second": 2.19, "eval_wer": 0.28324123153264913, "step": 39000 }, { "epoch": 26.89, "learning_rate": 1.0654748084513583e-05, "loss": 0.0822, "step": 39500 }, { "epoch": 26.89, "eval_loss": 0.5309813618659973, "eval_runtime": 129.6599, "eval_samples_per_second": 15.302, "eval_steps_per_second": 1.913, "eval_wer": 0.28634261869854516, "step": 39500 }, { "epoch": 27.23, "learning_rate": 9.493847225446947e-06, "loss": 0.0837, "step": 40000 }, { "epoch": 27.23, "eval_loss": 0.5188203454017639, "eval_runtime": 122.3408, "eval_samples_per_second": 16.217, "eval_steps_per_second": 2.027, "eval_wer": 0.28312845381752566, "step": 40000 }, { "epoch": 27.57, "learning_rate": 8.332946366380312e-06, "loss": 0.0823, "step": 40500 }, { "epoch": 27.57, "eval_loss": 0.5201263427734375, "eval_runtime": 118.4812, "eval_samples_per_second": 16.745, "eval_steps_per_second": 2.093, "eval_wer": 0.28132401037554977, "step": 40500 }, { "epoch": 27.91, "learning_rate": 7.172045507313675e-06, "loss": 0.0768, "step": 41000 }, { "epoch": 27.91, "eval_loss": 0.5197951793670654, "eval_runtime": 112.4253, "eval_samples_per_second": 17.647, "eval_steps_per_second": 2.206, "eval_wer": 0.282169843238976, "step": 41000 }, { "epoch": 28.25, "learning_rate": 6.013466449965173e-06, "loss": 0.0858, "step": 41500 }, { "epoch": 28.25, "eval_loss": 0.5245384573936462, "eval_runtime": 128.7266, "eval_samples_per_second": 15.413, "eval_steps_per_second": 1.927, "eval_wer": 0.2786737340701477, "step": 41500 }, { "epoch": 28.59, "learning_rate": 4.8525655908985375e-06, "loss": 0.0757, "step": 42000 }, { "epoch": 28.59, "eval_loss": 0.5289037227630615, "eval_runtime": 112.5429, "eval_samples_per_second": 17.629, "eval_steps_per_second": 2.204, "eval_wer": 0.2766437351979249, "step": 42000 }, { "epoch": 28.93, "learning_rate": 3.6916647318319014e-06, "loss": 0.0728, "step": 42500 }, { "epoch": 28.93, "eval_loss": 0.5263972282409668, "eval_runtime": 128.0915, "eval_samples_per_second": 15.489, "eval_steps_per_second": 1.936, "eval_wer": 0.2779970677794068, "step": 42500 }, { "epoch": 29.27, "learning_rate": 2.5307638727652657e-06, "loss": 0.0761, "step": 43000 }, { "epoch": 29.27, "eval_loss": 0.5295293927192688, "eval_runtime": 128.9693, "eval_samples_per_second": 15.384, "eval_steps_per_second": 1.923, "eval_wer": 0.2770384572008571, "step": 43000 }, { "epoch": 29.61, "learning_rate": 1.3698630136986302e-06, "loss": 0.0739, "step": 43500 }, { "epoch": 29.61, "eval_loss": 0.5267478823661804, "eval_runtime": 130.4434, "eval_samples_per_second": 15.21, "eval_steps_per_second": 1.901, "eval_wer": 0.2776023457764746, "step": 43500 } ], "max_steps": 44070, "num_train_epochs": 30, "total_flos": 1.385946883261073e+19, "trial_name": null, "trial_params": null }