|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.61198093941457, |
|
"global_step": 43500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 6.2203, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 3.002925157546997, |
|
"eval_runtime": 132.7789, |
|
"eval_samples_per_second": 14.942, |
|
"eval_steps_per_second": 1.868, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.95e-05, |
|
"loss": 2.2102, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.1446926593780518, |
|
"eval_runtime": 132.5997, |
|
"eval_samples_per_second": 14.962, |
|
"eval_steps_per_second": 1.87, |
|
"eval_wer": 0.8542348032028871, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.885070814952404e-05, |
|
"loss": 0.9714, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 0.7160163521766663, |
|
"eval_runtime": 132.7305, |
|
"eval_samples_per_second": 14.948, |
|
"eval_steps_per_second": 1.868, |
|
"eval_wer": 0.6600879666177963, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.76898072904574e-05, |
|
"loss": 0.7541, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 0.6261711120605469, |
|
"eval_runtime": 132.7267, |
|
"eval_samples_per_second": 14.948, |
|
"eval_steps_per_second": 1.869, |
|
"eval_wer": 0.594733280703733, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 9.652890643139077e-05, |
|
"loss": 0.6838, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 0.5789812207221985, |
|
"eval_runtime": 131.2782, |
|
"eval_samples_per_second": 15.113, |
|
"eval_steps_per_second": 1.889, |
|
"eval_wer": 0.5645088530506372, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.537032737404225e-05, |
|
"loss": 0.6287, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 0.6015335917472839, |
|
"eval_runtime": 133.004, |
|
"eval_samples_per_second": 14.917, |
|
"eval_steps_per_second": 1.865, |
|
"eval_wer": 0.5387955340024811, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 9.420942651497563e-05, |
|
"loss": 0.5439, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 0.5301734209060669, |
|
"eval_runtime": 133.1151, |
|
"eval_samples_per_second": 14.904, |
|
"eval_steps_per_second": 1.863, |
|
"eval_wer": 0.5076124957708357, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.304852565590899e-05, |
|
"loss": 0.5279, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 0.5215476155281067, |
|
"eval_runtime": 131.1447, |
|
"eval_samples_per_second": 15.128, |
|
"eval_steps_per_second": 1.891, |
|
"eval_wer": 0.48962445020863876, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 9.188762479684235e-05, |
|
"loss": 0.5006, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_loss": 0.4860161542892456, |
|
"eval_runtime": 133.2608, |
|
"eval_samples_per_second": 14.888, |
|
"eval_steps_per_second": 1.861, |
|
"eval_wer": 0.4685350174805458, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 9.072672393777572e-05, |
|
"loss": 0.4432, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_loss": 0.484553724527359, |
|
"eval_runtime": 133.4845, |
|
"eval_samples_per_second": 14.863, |
|
"eval_steps_per_second": 1.858, |
|
"eval_wer": 0.46864779519566935, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 8.956582307870908e-05, |
|
"loss": 0.4334, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 0.477468878030777, |
|
"eval_runtime": 132.8856, |
|
"eval_samples_per_second": 14.93, |
|
"eval_steps_per_second": 1.866, |
|
"eval_wer": 0.45415585880230064, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 8.840492221964245e-05, |
|
"loss": 0.4292, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 0.451526939868927, |
|
"eval_runtime": 132.3623, |
|
"eval_samples_per_second": 14.989, |
|
"eval_steps_per_second": 1.874, |
|
"eval_wer": 0.4291192060448855, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 8.724402136057582e-05, |
|
"loss": 0.3779, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"eval_loss": 0.4495590329170227, |
|
"eval_runtime": 132.6275, |
|
"eval_samples_per_second": 14.959, |
|
"eval_steps_per_second": 1.87, |
|
"eval_wer": 0.42054809969550017, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 8.608312050150917e-05, |
|
"loss": 0.3783, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_loss": 0.45758938789367676, |
|
"eval_runtime": 132.4666, |
|
"eval_samples_per_second": 14.977, |
|
"eval_steps_per_second": 1.872, |
|
"eval_wer": 0.4184053231081538, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 8.492221964244254e-05, |
|
"loss": 0.3622, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_loss": 0.4782721698284149, |
|
"eval_runtime": 133.2094, |
|
"eval_samples_per_second": 14.894, |
|
"eval_steps_per_second": 1.862, |
|
"eval_wer": 0.4070147738806812, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 8.37613187833759e-05, |
|
"loss": 0.3278, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_loss": 0.44267573952674866, |
|
"eval_runtime": 133.2221, |
|
"eval_samples_per_second": 14.892, |
|
"eval_steps_per_second": 1.862, |
|
"eval_wer": 0.40278560956355025, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 8.260041792430927e-05, |
|
"loss": 0.3304, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 0.4482724368572235, |
|
"eval_runtime": 132.3395, |
|
"eval_samples_per_second": 14.992, |
|
"eval_steps_per_second": 1.874, |
|
"eval_wer": 0.4056050524416375, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 8.143951706524264e-05, |
|
"loss": 0.312, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 0.4750816524028778, |
|
"eval_runtime": 132.887, |
|
"eval_samples_per_second": 14.93, |
|
"eval_steps_per_second": 1.866, |
|
"eval_wer": 0.3882936731701816, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 8.027861620617599e-05, |
|
"loss": 0.29, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 0.4528858959674835, |
|
"eval_runtime": 133.211, |
|
"eval_samples_per_second": 14.894, |
|
"eval_steps_per_second": 1.862, |
|
"eval_wer": 0.3780872899515056, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 7.912235895054563e-05, |
|
"loss": 0.3057, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"eval_loss": 0.5861864686012268, |
|
"eval_runtime": 133.2534, |
|
"eval_samples_per_second": 14.889, |
|
"eval_steps_per_second": 1.861, |
|
"eval_wer": 0.3783128453817526, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 7.796145809147898e-05, |
|
"loss": 0.2971, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"eval_loss": 0.43457281589508057, |
|
"eval_runtime": 132.5985, |
|
"eval_samples_per_second": 14.962, |
|
"eval_steps_per_second": 1.87, |
|
"eval_wer": 0.3765084019397767, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 7.680055723241235e-05, |
|
"loss": 0.2684, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"eval_loss": 0.4560734033584595, |
|
"eval_runtime": 132.2037, |
|
"eval_samples_per_second": 15.007, |
|
"eval_steps_per_second": 1.876, |
|
"eval_wer": 0.3732942370587572, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 7.563965637334573e-05, |
|
"loss": 0.2622, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"eval_loss": 0.43240657448768616, |
|
"eval_runtime": 133.3517, |
|
"eval_samples_per_second": 14.878, |
|
"eval_steps_per_second": 1.86, |
|
"eval_wer": 0.37408368106462164, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 7.447875551427908e-05, |
|
"loss": 0.2635, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"eval_loss": 0.4556463658809662, |
|
"eval_runtime": 133.4279, |
|
"eval_samples_per_second": 14.869, |
|
"eval_steps_per_second": 1.859, |
|
"eval_wer": 0.3789895116724935, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 7.331785465521245e-05, |
|
"loss": 0.2363, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"eval_loss": 0.47497859597206116, |
|
"eval_runtime": 134.0404, |
|
"eval_samples_per_second": 14.802, |
|
"eval_steps_per_second": 1.85, |
|
"eval_wer": 0.3709822938987256, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 7.215695379614582e-05, |
|
"loss": 0.2516, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"eval_loss": 0.4297301471233368, |
|
"eval_runtime": 133.4288, |
|
"eval_samples_per_second": 14.869, |
|
"eval_steps_per_second": 1.859, |
|
"eval_wer": 0.3635389647005752, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 7.099605293707917e-05, |
|
"loss": 0.2291, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"eval_loss": 0.4862041175365448, |
|
"eval_runtime": 133.7259, |
|
"eval_samples_per_second": 14.836, |
|
"eval_steps_per_second": 1.855, |
|
"eval_wer": 0.36021202210443215, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 6.983515207801255e-05, |
|
"loss": 0.2266, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_loss": 0.4703396260738373, |
|
"eval_runtime": 133.0987, |
|
"eval_samples_per_second": 14.906, |
|
"eval_steps_per_second": 1.863, |
|
"eval_wer": 0.3546295252058193, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 6.867657302066403e-05, |
|
"loss": 0.2281, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"eval_loss": 0.42882266640663147, |
|
"eval_runtime": 133.4534, |
|
"eval_samples_per_second": 14.867, |
|
"eval_steps_per_second": 1.858, |
|
"eval_wer": 0.35812563437464756, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 6.75156721615974e-05, |
|
"loss": 0.2126, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"eval_loss": 0.4615735113620758, |
|
"eval_runtime": 132.7985, |
|
"eval_samples_per_second": 14.94, |
|
"eval_steps_per_second": 1.867, |
|
"eval_wer": 0.35626480207510997, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 6.635477130253078e-05, |
|
"loss": 0.217, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"eval_loss": 0.49455586075782776, |
|
"eval_runtime": 133.6528, |
|
"eval_samples_per_second": 14.844, |
|
"eval_steps_per_second": 1.856, |
|
"eval_wer": 0.34560730799594, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 6.519387044346413e-05, |
|
"loss": 0.212, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"eval_loss": 0.43323996663093567, |
|
"eval_runtime": 133.4419, |
|
"eval_samples_per_second": 14.868, |
|
"eval_steps_per_second": 1.858, |
|
"eval_wer": 0.3452689748505695, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 6.40329695843975e-05, |
|
"loss": 0.1986, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"eval_loss": 0.4868086576461792, |
|
"eval_runtime": 131.4603, |
|
"eval_samples_per_second": 15.092, |
|
"eval_steps_per_second": 1.887, |
|
"eval_wer": 0.3399684222397654, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 6.2874390527049e-05, |
|
"loss": 0.2012, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"eval_loss": 0.4474054276943207, |
|
"eval_runtime": 133.9018, |
|
"eval_samples_per_second": 14.817, |
|
"eval_steps_per_second": 1.852, |
|
"eval_wer": 0.3460020299988722, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 6.171348966798236e-05, |
|
"loss": 0.1998, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"eval_loss": 0.45011407136917114, |
|
"eval_runtime": 132.7933, |
|
"eval_samples_per_second": 14.941, |
|
"eval_steps_per_second": 1.868, |
|
"eval_wer": 0.3362467576406902, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 6.055258880891572e-05, |
|
"loss": 0.1746, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"eval_loss": 0.4731091558933258, |
|
"eval_runtime": 133.9219, |
|
"eval_samples_per_second": 14.815, |
|
"eval_steps_per_second": 1.852, |
|
"eval_wer": 0.33630314649825194, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 5.9391687949849086e-05, |
|
"loss": 0.1805, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"eval_loss": 0.4601946771144867, |
|
"eval_runtime": 132.0532, |
|
"eval_samples_per_second": 15.024, |
|
"eval_steps_per_second": 1.878, |
|
"eval_wer": 0.33743092364948685, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 5.823310889250059e-05, |
|
"loss": 0.1826, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"eval_loss": 0.47728002071380615, |
|
"eval_runtime": 133.9362, |
|
"eval_samples_per_second": 14.813, |
|
"eval_steps_per_second": 1.852, |
|
"eval_wer": 0.3276192624337431, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 5.707220803343395e-05, |
|
"loss": 0.1651, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"eval_loss": 0.471328467130661, |
|
"eval_runtime": 111.9462, |
|
"eval_samples_per_second": 17.723, |
|
"eval_steps_per_second": 2.215, |
|
"eval_wer": 0.3304387053118304, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 5.591130717436731e-05, |
|
"loss": 0.1812, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"eval_loss": 0.4280690550804138, |
|
"eval_runtime": 110.8492, |
|
"eval_samples_per_second": 17.898, |
|
"eval_steps_per_second": 2.237, |
|
"eval_wer": 0.32276982068343296, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 5.4750406315300676e-05, |
|
"loss": 0.1666, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"eval_loss": 0.4796316623687744, |
|
"eval_runtime": 110.5982, |
|
"eval_samples_per_second": 17.939, |
|
"eval_steps_per_second": 2.242, |
|
"eval_wer": 0.32530731927371154, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 5.359182725795218e-05, |
|
"loss": 0.1553, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"eval_loss": 0.4721369445323944, |
|
"eval_runtime": 128.2013, |
|
"eval_samples_per_second": 15.476, |
|
"eval_steps_per_second": 1.934, |
|
"eval_wer": 0.3259275967068907, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 5.243092639888554e-05, |
|
"loss": 0.1545, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"eval_loss": 0.4527774453163147, |
|
"eval_runtime": 132.8486, |
|
"eval_samples_per_second": 14.934, |
|
"eval_steps_per_second": 1.867, |
|
"eval_wer": 0.32682981842787867, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 5.12700255398189e-05, |
|
"loss": 0.1576, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_loss": 0.4553755819797516, |
|
"eval_runtime": 131.6613, |
|
"eval_samples_per_second": 15.069, |
|
"eval_steps_per_second": 1.884, |
|
"eval_wer": 0.3252509304161498, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 5.0109124680752265e-05, |
|
"loss": 0.1511, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"eval_loss": 0.4580444097518921, |
|
"eval_runtime": 134.2974, |
|
"eval_samples_per_second": 14.773, |
|
"eval_steps_per_second": 1.847, |
|
"eval_wer": 0.3179203789331228, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 4.895054562340376e-05, |
|
"loss": 0.1444, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"eval_loss": 0.4659278094768524, |
|
"eval_runtime": 134.6637, |
|
"eval_samples_per_second": 14.733, |
|
"eval_steps_per_second": 1.842, |
|
"eval_wer": 0.32119093267170407, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.7791966566055266e-05, |
|
"loss": 0.1496, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.4660351872444153, |
|
"eval_runtime": 133.4382, |
|
"eval_samples_per_second": 14.868, |
|
"eval_steps_per_second": 1.859, |
|
"eval_wer": 0.32598398556445246, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 4.6631065706988625e-05, |
|
"loss": 0.1327, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"eval_loss": 0.4934828281402588, |
|
"eval_runtime": 134.3743, |
|
"eval_samples_per_second": 14.765, |
|
"eval_steps_per_second": 1.846, |
|
"eval_wer": 0.31825871207849327, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 16.68, |
|
"learning_rate": 4.547016484792199e-05, |
|
"loss": 0.1535, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 16.68, |
|
"eval_loss": 0.46466243267059326, |
|
"eval_runtime": 132.9271, |
|
"eval_samples_per_second": 14.925, |
|
"eval_steps_per_second": 1.866, |
|
"eval_wer": 0.3113792714559603, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 4.430926398885535e-05, |
|
"loss": 0.1438, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"eval_loss": 0.5053276419639587, |
|
"eval_runtime": 132.7443, |
|
"eval_samples_per_second": 14.946, |
|
"eval_steps_per_second": 1.868, |
|
"eval_wer": 0.31487538062478854, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 17.36, |
|
"learning_rate": 4.314836312978872e-05, |
|
"loss": 0.1264, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 17.36, |
|
"eval_loss": 0.48962870240211487, |
|
"eval_runtime": 133.9594, |
|
"eval_samples_per_second": 14.81, |
|
"eval_steps_per_second": 1.851, |
|
"eval_wer": 0.3131273260403744, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 4.1987462270722084e-05, |
|
"loss": 0.1269, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"eval_loss": 0.46815410256385803, |
|
"eval_runtime": 132.7946, |
|
"eval_samples_per_second": 14.94, |
|
"eval_steps_per_second": 1.868, |
|
"eval_wer": 0.30748844028419986, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 4.0826561411655444e-05, |
|
"loss": 0.1272, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"eval_loss": 0.4759911596775055, |
|
"eval_runtime": 132.8376, |
|
"eval_samples_per_second": 14.936, |
|
"eval_steps_per_second": 1.867, |
|
"eval_wer": 0.3113792714559603, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"learning_rate": 3.966566055258881e-05, |
|
"loss": 0.1219, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"eval_loss": 0.49612942337989807, |
|
"eval_runtime": 133.1179, |
|
"eval_samples_per_second": 14.904, |
|
"eval_steps_per_second": 1.863, |
|
"eval_wer": 0.31487538062478854, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"learning_rate": 3.850475969352218e-05, |
|
"loss": 0.123, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"eval_loss": 0.46862220764160156, |
|
"eval_runtime": 110.6681, |
|
"eval_samples_per_second": 17.927, |
|
"eval_steps_per_second": 2.241, |
|
"eval_wer": 0.3072064959963911, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"learning_rate": 3.734385883445554e-05, |
|
"loss": 0.1262, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"eval_loss": 0.49365413188934326, |
|
"eval_runtime": 109.7879, |
|
"eval_samples_per_second": 18.071, |
|
"eval_steps_per_second": 2.259, |
|
"eval_wer": 0.30889816172324347, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"learning_rate": 3.61829579753889e-05, |
|
"loss": 0.1165, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"eval_loss": 0.4988892376422882, |
|
"eval_runtime": 110.8122, |
|
"eval_samples_per_second": 17.904, |
|
"eval_steps_per_second": 2.238, |
|
"eval_wer": 0.30540205255441527, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"learning_rate": 3.50243789180404e-05, |
|
"loss": 0.1213, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"eval_loss": 0.4937494993209839, |
|
"eval_runtime": 109.7673, |
|
"eval_samples_per_second": 18.075, |
|
"eval_steps_per_second": 2.259, |
|
"eval_wer": 0.30404871997293337, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"learning_rate": 3.3863478058973766e-05, |
|
"loss": 0.1113, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"eval_loss": 0.5031572580337524, |
|
"eval_runtime": 111.1946, |
|
"eval_samples_per_second": 17.843, |
|
"eval_steps_per_second": 2.23, |
|
"eval_wer": 0.3037667756851246, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 20.42, |
|
"learning_rate": 3.270257719990713e-05, |
|
"loss": 0.1087, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 20.42, |
|
"eval_loss": 0.49494612216949463, |
|
"eval_runtime": 126.8851, |
|
"eval_samples_per_second": 15.636, |
|
"eval_steps_per_second": 1.955, |
|
"eval_wer": 0.299030111649938, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 20.76, |
|
"learning_rate": 3.154167634084049e-05, |
|
"loss": 0.1128, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 20.76, |
|
"eval_loss": 0.49934807419776917, |
|
"eval_runtime": 128.8495, |
|
"eval_samples_per_second": 15.398, |
|
"eval_steps_per_second": 1.925, |
|
"eval_wer": 0.3017367768129018, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 21.1, |
|
"learning_rate": 3.038077548177386e-05, |
|
"loss": 0.1151, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 21.1, |
|
"eval_loss": 0.5088058114051819, |
|
"eval_runtime": 128.5026, |
|
"eval_samples_per_second": 15.439, |
|
"eval_steps_per_second": 1.93, |
|
"eval_wer": 0.30184955452802525, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 2.922219642442536e-05, |
|
"loss": 0.1025, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"eval_loss": 0.47611942887306213, |
|
"eval_runtime": 111.1648, |
|
"eval_samples_per_second": 17.847, |
|
"eval_steps_per_second": 2.231, |
|
"eval_wer": 0.29891733393481446, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 21.78, |
|
"learning_rate": 2.8063617367076856e-05, |
|
"loss": 0.1016, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 21.78, |
|
"eval_loss": 0.5126113891601562, |
|
"eval_runtime": 121.5373, |
|
"eval_samples_per_second": 16.324, |
|
"eval_steps_per_second": 2.041, |
|
"eval_wer": 0.30275177624901317, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"learning_rate": 2.690271650801022e-05, |
|
"loss": 0.0991, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"eval_loss": 0.5214097499847412, |
|
"eval_runtime": 129.4856, |
|
"eval_samples_per_second": 15.322, |
|
"eval_steps_per_second": 1.915, |
|
"eval_wer": 0.2958159467689185, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 22.46, |
|
"learning_rate": 2.574181564894358e-05, |
|
"loss": 0.0951, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 22.46, |
|
"eval_loss": 0.5233449339866638, |
|
"eval_runtime": 130.9955, |
|
"eval_samples_per_second": 15.146, |
|
"eval_steps_per_second": 1.893, |
|
"eval_wer": 0.2923198376000902, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 2.4580914789876948e-05, |
|
"loss": 0.1049, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"eval_loss": 0.48444411158561707, |
|
"eval_runtime": 110.8951, |
|
"eval_samples_per_second": 17.891, |
|
"eval_steps_per_second": 2.236, |
|
"eval_wer": 0.29677455734746816, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 23.14, |
|
"learning_rate": 2.342001393081031e-05, |
|
"loss": 0.1014, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 23.14, |
|
"eval_loss": 0.4997089207172394, |
|
"eval_runtime": 111.4563, |
|
"eval_samples_per_second": 17.801, |
|
"eval_steps_per_second": 2.225, |
|
"eval_wer": 0.292376226457652, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 23.49, |
|
"learning_rate": 2.2261434873461805e-05, |
|
"loss": 0.0959, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 23.49, |
|
"eval_loss": 0.48040756583213806, |
|
"eval_runtime": 129.4236, |
|
"eval_samples_per_second": 15.33, |
|
"eval_steps_per_second": 1.916, |
|
"eval_wer": 0.28978233900981165, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 23.83, |
|
"learning_rate": 2.110053401439517e-05, |
|
"loss": 0.098, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 23.83, |
|
"eval_loss": 0.501395046710968, |
|
"eval_runtime": 129.507, |
|
"eval_samples_per_second": 15.32, |
|
"eval_steps_per_second": 1.915, |
|
"eval_wer": 0.291699560166911, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 24.17, |
|
"learning_rate": 1.9939633155328534e-05, |
|
"loss": 0.0973, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 24.17, |
|
"eval_loss": 0.5193932056427002, |
|
"eval_runtime": 129.3856, |
|
"eval_samples_per_second": 15.334, |
|
"eval_steps_per_second": 1.917, |
|
"eval_wer": 0.28955678357956466, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 24.51, |
|
"learning_rate": 1.87787322962619e-05, |
|
"loss": 0.0876, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 24.51, |
|
"eval_loss": 0.5203258395195007, |
|
"eval_runtime": 125.0985, |
|
"eval_samples_per_second": 15.859, |
|
"eval_steps_per_second": 1.982, |
|
"eval_wer": 0.28718845156197137, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 24.85, |
|
"learning_rate": 1.7617831437195264e-05, |
|
"loss": 0.0902, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 24.85, |
|
"eval_loss": 0.536376953125, |
|
"eval_runtime": 113.525, |
|
"eval_samples_per_second": 17.476, |
|
"eval_steps_per_second": 2.185, |
|
"eval_wer": 0.29136122702154055, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"learning_rate": 1.645693057812863e-05, |
|
"loss": 0.0889, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 25.19, |
|
"eval_loss": 0.531440019607544, |
|
"eval_runtime": 123.4292, |
|
"eval_samples_per_second": 16.074, |
|
"eval_steps_per_second": 2.009, |
|
"eval_wer": 0.2880906732829593, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"learning_rate": 1.5296029719061993e-05, |
|
"loss": 0.0865, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"eval_loss": 0.5107194185256958, |
|
"eval_runtime": 129.5881, |
|
"eval_samples_per_second": 15.31, |
|
"eval_steps_per_second": 1.914, |
|
"eval_wer": 0.285271230404872, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 25.87, |
|
"learning_rate": 1.4135128859995356e-05, |
|
"loss": 0.0859, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 25.87, |
|
"eval_loss": 0.5254319310188293, |
|
"eval_runtime": 128.9308, |
|
"eval_samples_per_second": 15.388, |
|
"eval_steps_per_second": 1.924, |
|
"eval_wer": 0.28780872899515053, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"learning_rate": 1.2974228000928721e-05, |
|
"loss": 0.0813, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"eval_loss": 0.5275471806526184, |
|
"eval_runtime": 110.8092, |
|
"eval_samples_per_second": 17.905, |
|
"eval_steps_per_second": 2.238, |
|
"eval_wer": 0.2846509529716928, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 1.1813327141862086e-05, |
|
"loss": 0.0881, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"eval_loss": 0.5125020742416382, |
|
"eval_runtime": 113.2218, |
|
"eval_samples_per_second": 17.523, |
|
"eval_steps_per_second": 2.19, |
|
"eval_wer": 0.28324123153264913, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 26.89, |
|
"learning_rate": 1.0654748084513583e-05, |
|
"loss": 0.0822, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 26.89, |
|
"eval_loss": 0.5309813618659973, |
|
"eval_runtime": 129.6599, |
|
"eval_samples_per_second": 15.302, |
|
"eval_steps_per_second": 1.913, |
|
"eval_wer": 0.28634261869854516, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 27.23, |
|
"learning_rate": 9.493847225446947e-06, |
|
"loss": 0.0837, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 27.23, |
|
"eval_loss": 0.5188203454017639, |
|
"eval_runtime": 122.3408, |
|
"eval_samples_per_second": 16.217, |
|
"eval_steps_per_second": 2.027, |
|
"eval_wer": 0.28312845381752566, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 27.57, |
|
"learning_rate": 8.332946366380312e-06, |
|
"loss": 0.0823, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 27.57, |
|
"eval_loss": 0.5201263427734375, |
|
"eval_runtime": 118.4812, |
|
"eval_samples_per_second": 16.745, |
|
"eval_steps_per_second": 2.093, |
|
"eval_wer": 0.28132401037554977, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 27.91, |
|
"learning_rate": 7.172045507313675e-06, |
|
"loss": 0.0768, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 27.91, |
|
"eval_loss": 0.5197951793670654, |
|
"eval_runtime": 112.4253, |
|
"eval_samples_per_second": 17.647, |
|
"eval_steps_per_second": 2.206, |
|
"eval_wer": 0.282169843238976, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 28.25, |
|
"learning_rate": 6.013466449965173e-06, |
|
"loss": 0.0858, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 28.25, |
|
"eval_loss": 0.5245384573936462, |
|
"eval_runtime": 128.7266, |
|
"eval_samples_per_second": 15.413, |
|
"eval_steps_per_second": 1.927, |
|
"eval_wer": 0.2786737340701477, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 28.59, |
|
"learning_rate": 4.8525655908985375e-06, |
|
"loss": 0.0757, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 28.59, |
|
"eval_loss": 0.5289037227630615, |
|
"eval_runtime": 112.5429, |
|
"eval_samples_per_second": 17.629, |
|
"eval_steps_per_second": 2.204, |
|
"eval_wer": 0.2766437351979249, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 28.93, |
|
"learning_rate": 3.6916647318319014e-06, |
|
"loss": 0.0728, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 28.93, |
|
"eval_loss": 0.5263972282409668, |
|
"eval_runtime": 128.0915, |
|
"eval_samples_per_second": 15.489, |
|
"eval_steps_per_second": 1.936, |
|
"eval_wer": 0.2779970677794068, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 29.27, |
|
"learning_rate": 2.5307638727652657e-06, |
|
"loss": 0.0761, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 29.27, |
|
"eval_loss": 0.5295293927192688, |
|
"eval_runtime": 128.9693, |
|
"eval_samples_per_second": 15.384, |
|
"eval_steps_per_second": 1.923, |
|
"eval_wer": 0.2770384572008571, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 29.61, |
|
"learning_rate": 1.3698630136986302e-06, |
|
"loss": 0.0739, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 29.61, |
|
"eval_loss": 0.5267478823661804, |
|
"eval_runtime": 130.4434, |
|
"eval_samples_per_second": 15.21, |
|
"eval_steps_per_second": 1.901, |
|
"eval_wer": 0.2776023457764746, |
|
"step": 43500 |
|
} |
|
], |
|
"max_steps": 44070, |
|
"num_train_epochs": 30, |
|
"total_flos": 1.385946883261073e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|