|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9357336430507162, |
|
"eval_steps": 100, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019357336430507164, |
|
"eval_loss": 3.5727736949920654, |
|
"eval_runtime": 146.249, |
|
"eval_samples_per_second": 38.674, |
|
"eval_steps_per_second": 4.834, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03871467286101433, |
|
"eval_loss": 3.076800584793091, |
|
"eval_runtime": 143.9591, |
|
"eval_samples_per_second": 39.289, |
|
"eval_steps_per_second": 4.911, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05807200929152149, |
|
"eval_loss": 3.500979423522949, |
|
"eval_runtime": 144.2352, |
|
"eval_samples_per_second": 39.214, |
|
"eval_steps_per_second": 4.902, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07742934572202866, |
|
"eval_loss": 2.0594074726104736, |
|
"eval_runtime": 144.7975, |
|
"eval_samples_per_second": 39.061, |
|
"eval_steps_per_second": 4.883, |
|
"eval_wer": 0.9899857168076263, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"grad_norm": 3.567307710647583, |
|
"learning_rate": 0.00029759999999999997, |
|
"loss": 4.06, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"eval_loss": 1.4703481197357178, |
|
"eval_runtime": 145.0673, |
|
"eval_samples_per_second": 38.989, |
|
"eval_steps_per_second": 4.874, |
|
"eval_wer": 0.8800211840605993, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11614401858304298, |
|
"eval_loss": 1.2463648319244385, |
|
"eval_runtime": 146.5204, |
|
"eval_samples_per_second": 38.602, |
|
"eval_steps_per_second": 4.825, |
|
"eval_wer": 0.8296608945451044, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13550135501355012, |
|
"eval_loss": 1.0686180591583252, |
|
"eval_runtime": 145.9363, |
|
"eval_samples_per_second": 38.757, |
|
"eval_steps_per_second": 4.845, |
|
"eval_wer": 0.7492738039832453, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1548586914440573, |
|
"eval_loss": 1.006879448890686, |
|
"eval_runtime": 146.414, |
|
"eval_samples_per_second": 38.63, |
|
"eval_steps_per_second": 4.829, |
|
"eval_wer": 0.7116239508273018, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17421602787456447, |
|
"eval_loss": 0.936712920665741, |
|
"eval_runtime": 146.8805, |
|
"eval_samples_per_second": 38.507, |
|
"eval_steps_per_second": 4.813, |
|
"eval_wer": 0.6887868915600777, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"grad_norm": 2.5543222427368164, |
|
"learning_rate": 0.0002844, |
|
"loss": 1.0399, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"eval_loss": 0.8960636854171753, |
|
"eval_runtime": 146.9745, |
|
"eval_samples_per_second": 38.483, |
|
"eval_steps_per_second": 4.81, |
|
"eval_wer": 0.6741987771019563, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2129307007355788, |
|
"eval_loss": 0.896744430065155, |
|
"eval_runtime": 146.7401, |
|
"eval_samples_per_second": 38.544, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.6412671919885734, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.23228803716608595, |
|
"eval_loss": 0.8311247825622559, |
|
"eval_runtime": 145.8768, |
|
"eval_samples_per_second": 38.772, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.6152685721622185, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2516453735965931, |
|
"eval_loss": 0.8018700480461121, |
|
"eval_runtime": 146.5709, |
|
"eval_samples_per_second": 38.589, |
|
"eval_steps_per_second": 4.824, |
|
"eval_wer": 0.5965238882380318, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27100271002710025, |
|
"eval_loss": 0.7925447225570679, |
|
"eval_runtime": 146.4405, |
|
"eval_samples_per_second": 38.623, |
|
"eval_steps_per_second": 4.828, |
|
"eval_wer": 0.5926561923255926, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"grad_norm": 2.373326539993286, |
|
"learning_rate": 0.00026861052631578947, |
|
"loss": 0.8395, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"eval_loss": 0.8164969086647034, |
|
"eval_runtime": 151.4209, |
|
"eval_samples_per_second": 37.353, |
|
"eval_steps_per_second": 4.669, |
|
"eval_wer": 0.5986743913594711, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3097173828881146, |
|
"eval_loss": 0.7696186304092407, |
|
"eval_runtime": 147.0556, |
|
"eval_samples_per_second": 38.462, |
|
"eval_steps_per_second": 4.808, |
|
"eval_wer": 0.6150278442008634, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.32907471931862176, |
|
"eval_loss": 0.7454735636711121, |
|
"eval_runtime": 148.1475, |
|
"eval_samples_per_second": 38.178, |
|
"eval_steps_per_second": 4.772, |
|
"eval_wer": 0.5624207603793873, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"eval_loss": 0.7681124806404114, |
|
"eval_runtime": 147.9355, |
|
"eval_samples_per_second": 38.233, |
|
"eval_steps_per_second": 4.779, |
|
"eval_wer": 0.5684068623517518, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3677893921796361, |
|
"eval_loss": 0.7292491793632507, |
|
"eval_runtime": 148.2347, |
|
"eval_samples_per_second": 38.156, |
|
"eval_steps_per_second": 4.769, |
|
"eval_wer": 0.5609282470189854, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"grad_norm": 2.988316059112549, |
|
"learning_rate": 0.0002528210526315789, |
|
"loss": 0.7574, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"eval_loss": 0.7304644584655762, |
|
"eval_runtime": 148.3775, |
|
"eval_samples_per_second": 38.119, |
|
"eval_steps_per_second": 4.765, |
|
"eval_wer": 0.5534014860939481, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"eval_loss": 0.7095713019371033, |
|
"eval_runtime": 148.1439, |
|
"eval_samples_per_second": 38.179, |
|
"eval_steps_per_second": 4.772, |
|
"eval_wer": 0.5363418978992474, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.4258614014711576, |
|
"eval_loss": 0.7107743620872498, |
|
"eval_runtime": 147.443, |
|
"eval_samples_per_second": 38.361, |
|
"eval_steps_per_second": 4.795, |
|
"eval_wer": 0.5572370849448733, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.4452187379016647, |
|
"eval_loss": 0.6702781319618225, |
|
"eval_runtime": 147.3568, |
|
"eval_samples_per_second": 38.383, |
|
"eval_steps_per_second": 4.798, |
|
"eval_wer": 0.5175330198520326, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4645760743321719, |
|
"eval_loss": 0.6596451997756958, |
|
"eval_runtime": 148.5753, |
|
"eval_samples_per_second": 38.068, |
|
"eval_steps_per_second": 4.759, |
|
"eval_wer": 0.514885012277126, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"grad_norm": 3.3213086128234863, |
|
"learning_rate": 0.0002370315789473684, |
|
"loss": 0.6864, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"eval_loss": 0.6845841407775879, |
|
"eval_runtime": 149.4982, |
|
"eval_samples_per_second": 37.833, |
|
"eval_steps_per_second": 4.729, |
|
"eval_wer": 0.5336457447320698, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5032907471931862, |
|
"eval_loss": 0.6666129231452942, |
|
"eval_runtime": 148.0482, |
|
"eval_samples_per_second": 38.204, |
|
"eval_steps_per_second": 4.775, |
|
"eval_wer": 0.5285744090128549, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5226480836236934, |
|
"eval_loss": 0.6390946507453918, |
|
"eval_runtime": 148.4402, |
|
"eval_samples_per_second": 38.103, |
|
"eval_steps_per_second": 4.763, |
|
"eval_wer": 0.4949366885461636, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5420054200542005, |
|
"eval_loss": 0.6295592188835144, |
|
"eval_runtime": 147.8141, |
|
"eval_samples_per_second": 38.264, |
|
"eval_steps_per_second": 4.783, |
|
"eval_wer": 0.4989648697661729, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5613627564847077, |
|
"eval_loss": 0.6291782855987549, |
|
"eval_runtime": 148.1212, |
|
"eval_samples_per_second": 38.185, |
|
"eval_steps_per_second": 4.773, |
|
"eval_wer": 0.4957391150840141, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"grad_norm": 5.012236595153809, |
|
"learning_rate": 0.00022124210526315786, |
|
"loss": 0.6734, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"eval_loss": 0.6164219975471497, |
|
"eval_runtime": 148.0479, |
|
"eval_samples_per_second": 38.204, |
|
"eval_steps_per_second": 4.775, |
|
"eval_wer": 0.47652902376787404, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6000774293457221, |
|
"eval_loss": 0.6179572343826294, |
|
"eval_runtime": 148.2452, |
|
"eval_samples_per_second": 38.153, |
|
"eval_steps_per_second": 4.769, |
|
"eval_wer": 0.4777808091669208, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6194347657762292, |
|
"eval_loss": 0.6132367849349976, |
|
"eval_runtime": 148.4317, |
|
"eval_samples_per_second": 38.105, |
|
"eval_steps_per_second": 4.763, |
|
"eval_wer": 0.49086036173388325, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6387921022067363, |
|
"eval_loss": 0.6107444763183594, |
|
"eval_runtime": 148.2189, |
|
"eval_samples_per_second": 38.16, |
|
"eval_steps_per_second": 4.77, |
|
"eval_wer": 0.4683442730817994, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6581494386372435, |
|
"eval_loss": 0.6068131327629089, |
|
"eval_runtime": 147.7251, |
|
"eval_samples_per_second": 38.287, |
|
"eval_steps_per_second": 4.786, |
|
"eval_wer": 0.4748760250999021, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"grad_norm": 3.184985399246216, |
|
"learning_rate": 0.00020545263157894736, |
|
"loss": 0.6433, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"eval_loss": 0.6008120775222778, |
|
"eval_runtime": 147.947, |
|
"eval_samples_per_second": 38.23, |
|
"eval_steps_per_second": 4.779, |
|
"eval_wer": 0.47725120765193946, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"eval_loss": 0.5916668772697449, |
|
"eval_runtime": 147.0363, |
|
"eval_samples_per_second": 38.467, |
|
"eval_steps_per_second": 4.808, |
|
"eval_wer": 0.4656320713838648, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.716221447928765, |
|
"eval_loss": 0.5885007381439209, |
|
"eval_runtime": 148.9484, |
|
"eval_samples_per_second": 37.973, |
|
"eval_steps_per_second": 4.747, |
|
"eval_wer": 0.4600953282726966, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.7355787843592722, |
|
"eval_loss": 0.5848101377487183, |
|
"eval_runtime": 148.7388, |
|
"eval_samples_per_second": 38.026, |
|
"eval_steps_per_second": 4.753, |
|
"eval_wer": 0.44823546404326686, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7549361207897793, |
|
"eval_loss": 0.5852195620536804, |
|
"eval_runtime": 148.3227, |
|
"eval_samples_per_second": 38.133, |
|
"eval_steps_per_second": 4.767, |
|
"eval_wer": 0.44963168621912664, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"grad_norm": 4.9515814781188965, |
|
"learning_rate": 0.00018966315789473683, |
|
"loss": 0.6217, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"eval_loss": 0.577220618724823, |
|
"eval_runtime": 147.6504, |
|
"eval_samples_per_second": 38.307, |
|
"eval_steps_per_second": 4.788, |
|
"eval_wer": 0.44163951790213607, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"eval_loss": 0.56705242395401, |
|
"eval_runtime": 152.2357, |
|
"eval_samples_per_second": 37.153, |
|
"eval_steps_per_second": 4.644, |
|
"eval_wer": 0.44691948452119207, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"eval_loss": 0.5668296813964844, |
|
"eval_runtime": 148.0111, |
|
"eval_samples_per_second": 38.213, |
|
"eval_steps_per_second": 4.777, |
|
"eval_wer": 0.4462614947601547, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.832365466511808, |
|
"eval_loss": 0.5557947754859924, |
|
"eval_runtime": 149.4281, |
|
"eval_samples_per_second": 37.851, |
|
"eval_steps_per_second": 4.731, |
|
"eval_wer": 0.44006676188794913, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8517228029423152, |
|
"eval_loss": 0.5651959776878357, |
|
"eval_runtime": 149.3956, |
|
"eval_samples_per_second": 37.859, |
|
"eval_steps_per_second": 4.732, |
|
"eval_wer": 0.4306783713950988, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"grad_norm": 3.5483193397521973, |
|
"learning_rate": 0.0001738736842105263, |
|
"loss": 0.5954, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"eval_loss": 0.5561267733573914, |
|
"eval_runtime": 149.9212, |
|
"eval_samples_per_second": 37.726, |
|
"eval_steps_per_second": 4.716, |
|
"eval_wer": 0.4307265169873698, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8904374758033294, |
|
"eval_loss": 0.5431749820709229, |
|
"eval_runtime": 149.9454, |
|
"eval_samples_per_second": 37.72, |
|
"eval_steps_per_second": 4.715, |
|
"eval_wer": 0.420648039671968, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.9097948122338366, |
|
"eval_loss": 0.5294374823570251, |
|
"eval_runtime": 148.9794, |
|
"eval_samples_per_second": 37.965, |
|
"eval_steps_per_second": 4.746, |
|
"eval_wer": 0.41371507438494004, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.9291521486643438, |
|
"eval_loss": 0.5444126725196838, |
|
"eval_runtime": 148.5962, |
|
"eval_samples_per_second": 38.063, |
|
"eval_steps_per_second": 4.758, |
|
"eval_wer": 0.4209529617563512, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.948509485094851, |
|
"eval_loss": 0.5291473269462585, |
|
"eval_runtime": 150.1832, |
|
"eval_samples_per_second": 37.661, |
|
"eval_steps_per_second": 4.708, |
|
"eval_wer": 0.4156569466065382, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"grad_norm": 3.1595053672790527, |
|
"learning_rate": 0.0001581157894736842, |
|
"loss": 0.5663, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"eval_loss": 0.5428867340087891, |
|
"eval_runtime": 149.6435, |
|
"eval_samples_per_second": 37.797, |
|
"eval_steps_per_second": 4.725, |
|
"eval_wer": 0.4139558023462952, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9872241579558653, |
|
"eval_loss": 0.5208781361579895, |
|
"eval_runtime": 149.0703, |
|
"eval_samples_per_second": 37.942, |
|
"eval_steps_per_second": 4.743, |
|
"eval_wer": 0.41159666832501485, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.0065814943863725, |
|
"eval_loss": 0.5281690359115601, |
|
"eval_runtime": 148.6703, |
|
"eval_samples_per_second": 38.044, |
|
"eval_steps_per_second": 4.755, |
|
"eval_wer": 0.40421434417679064, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.0259388308168795, |
|
"eval_loss": 0.5118032693862915, |
|
"eval_runtime": 148.0473, |
|
"eval_samples_per_second": 38.204, |
|
"eval_steps_per_second": 4.776, |
|
"eval_wer": 0.39184092696313655, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.0452961672473868, |
|
"eval_loss": 0.5089045166969299, |
|
"eval_runtime": 147.9634, |
|
"eval_samples_per_second": 38.226, |
|
"eval_steps_per_second": 4.778, |
|
"eval_wer": 0.39927139670363176, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"grad_norm": 2.1315221786499023, |
|
"learning_rate": 0.0001423578947368421, |
|
"loss": 0.4941, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"eval_loss": 0.5010989308357239, |
|
"eval_runtime": 147.8753, |
|
"eval_samples_per_second": 38.248, |
|
"eval_steps_per_second": 4.781, |
|
"eval_wer": 0.3921458490475197, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.084010840108401, |
|
"eval_loss": 0.5022321343421936, |
|
"eval_runtime": 148.3164, |
|
"eval_samples_per_second": 38.135, |
|
"eval_steps_per_second": 4.767, |
|
"eval_wer": 0.38869541493476273, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.1033681765389083, |
|
"eval_loss": 0.5066320896148682, |
|
"eval_runtime": 148.554, |
|
"eval_samples_per_second": 38.074, |
|
"eval_steps_per_second": 4.759, |
|
"eval_wer": 0.38526102935276274, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.1227255129694154, |
|
"eval_loss": 0.49068546295166016, |
|
"eval_runtime": 148.2455, |
|
"eval_samples_per_second": 38.153, |
|
"eval_steps_per_second": 4.769, |
|
"eval_wer": 0.3815217216863796, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.1420828493999227, |
|
"eval_loss": 0.4982084035873413, |
|
"eval_runtime": 148.9817, |
|
"eval_samples_per_second": 37.964, |
|
"eval_steps_per_second": 4.746, |
|
"eval_wer": 0.38086373192534223, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"grad_norm": 0.8627763390541077, |
|
"learning_rate": 0.00012656842105263156, |
|
"loss": 0.4628, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"eval_loss": 0.49128398299217224, |
|
"eval_runtime": 149.7714, |
|
"eval_samples_per_second": 37.764, |
|
"eval_steps_per_second": 4.721, |
|
"eval_wer": 0.38956203559564123, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1807975222609368, |
|
"eval_loss": 0.48260679841041565, |
|
"eval_runtime": 149.8626, |
|
"eval_samples_per_second": 37.741, |
|
"eval_steps_per_second": 4.718, |
|
"eval_wer": 0.373449310715604, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.2001548586914441, |
|
"eval_loss": 0.4883708655834198, |
|
"eval_runtime": 149.0462, |
|
"eval_samples_per_second": 37.948, |
|
"eval_steps_per_second": 4.743, |
|
"eval_wer": 0.3739949607613423, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"eval_loss": 0.4841243028640747, |
|
"eval_runtime": 148.8948, |
|
"eval_samples_per_second": 37.987, |
|
"eval_steps_per_second": 4.748, |
|
"eval_wer": 0.37004702219511804, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.2388695315524583, |
|
"eval_loss": 0.4828014671802521, |
|
"eval_runtime": 149.5102, |
|
"eval_samples_per_second": 37.83, |
|
"eval_steps_per_second": 4.729, |
|
"eval_wer": 0.36971000304922086, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"grad_norm": 1.5625278949737549, |
|
"learning_rate": 0.00011077894736842105, |
|
"loss": 0.4435, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"eval_loss": 0.48161521553993225, |
|
"eval_runtime": 148.9005, |
|
"eval_samples_per_second": 37.985, |
|
"eval_steps_per_second": 4.748, |
|
"eval_wer": 0.37389866957680024, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2775842044134726, |
|
"eval_loss": 0.47928386926651, |
|
"eval_runtime": 149.5106, |
|
"eval_samples_per_second": 37.83, |
|
"eval_steps_per_second": 4.729, |
|
"eval_wer": 0.3673990146202115, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.29694154084398, |
|
"eval_loss": 0.4744218587875366, |
|
"eval_runtime": 148.9048, |
|
"eval_samples_per_second": 37.984, |
|
"eval_steps_per_second": 4.748, |
|
"eval_wer": 0.36688546163598723, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.316298877274487, |
|
"eval_loss": 0.46821942925453186, |
|
"eval_runtime": 148.7411, |
|
"eval_samples_per_second": 38.026, |
|
"eval_steps_per_second": 4.753, |
|
"eval_wer": 0.3608672626021088, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.3356562137049943, |
|
"eval_loss": 0.46276068687438965, |
|
"eval_runtime": 150.3036, |
|
"eval_samples_per_second": 37.63, |
|
"eval_steps_per_second": 4.704, |
|
"eval_wer": 0.359438943364735, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"grad_norm": 0.7794021964073181, |
|
"learning_rate": 9.498947368421052e-05, |
|
"loss": 0.4298, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"eval_loss": 0.4662827253341675, |
|
"eval_runtime": 149.5174, |
|
"eval_samples_per_second": 37.828, |
|
"eval_steps_per_second": 4.729, |
|
"eval_wer": 0.3554428592062397, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3743708865660085, |
|
"eval_loss": 0.4656233489513397, |
|
"eval_runtime": 148.8165, |
|
"eval_samples_per_second": 38.007, |
|
"eval_steps_per_second": 4.751, |
|
"eval_wer": 0.3583797403347724, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.3937282229965158, |
|
"eval_loss": 0.45931774377822876, |
|
"eval_runtime": 150.2338, |
|
"eval_samples_per_second": 37.648, |
|
"eval_steps_per_second": 4.706, |
|
"eval_wer": 0.35648601370544525, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.4130855594270229, |
|
"eval_loss": 0.45989105105400085, |
|
"eval_runtime": 150.9977, |
|
"eval_samples_per_second": 37.458, |
|
"eval_steps_per_second": 4.682, |
|
"eval_wer": 0.3565823048899873, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.43244289585753, |
|
"eval_loss": 0.46128061413764954, |
|
"eval_runtime": 150.0246, |
|
"eval_samples_per_second": 37.7, |
|
"eval_steps_per_second": 4.713, |
|
"eval_wer": 0.35208871627802474, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"grad_norm": 0.7098228931427002, |
|
"learning_rate": 7.92e-05, |
|
"loss": 0.4292, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"eval_loss": 0.4520701467990875, |
|
"eval_runtime": 149.5493, |
|
"eval_samples_per_second": 37.82, |
|
"eval_steps_per_second": 4.728, |
|
"eval_wer": 0.34745069088924907, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4711575687185443, |
|
"eval_loss": 0.4512416422367096, |
|
"eval_runtime": 149.5055, |
|
"eval_samples_per_second": 37.831, |
|
"eval_steps_per_second": 4.729, |
|
"eval_wer": 0.349071592495707, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.4905149051490514, |
|
"eval_loss": 0.4478435218334198, |
|
"eval_runtime": 149.0622, |
|
"eval_samples_per_second": 37.944, |
|
"eval_steps_per_second": 4.743, |
|
"eval_wer": 0.35175169713212756, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.5098722415795587, |
|
"eval_loss": 0.4415859878063202, |
|
"eval_runtime": 148.899, |
|
"eval_samples_per_second": 37.985, |
|
"eval_steps_per_second": 4.748, |
|
"eval_wer": 0.34213862720867905, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.5292295780100658, |
|
"eval_loss": 0.4426974952220917, |
|
"eval_runtime": 149.2815, |
|
"eval_samples_per_second": 37.888, |
|
"eval_steps_per_second": 4.736, |
|
"eval_wer": 0.3458779348750622, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"grad_norm": 1.0578420162200928, |
|
"learning_rate": 6.344210526315788e-05, |
|
"loss": 0.4072, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"eval_loss": 0.43879374861717224, |
|
"eval_runtime": 148.7049, |
|
"eval_samples_per_second": 38.035, |
|
"eval_steps_per_second": 4.754, |
|
"eval_wer": 0.34565325544446407, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5679442508710801, |
|
"eval_loss": 0.44011563062667847, |
|
"eval_runtime": 150.4046, |
|
"eval_samples_per_second": 37.605, |
|
"eval_steps_per_second": 4.701, |
|
"eval_wer": 0.3453162362985669, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"eval_loss": 0.43649429082870483, |
|
"eval_runtime": 148.8759, |
|
"eval_samples_per_second": 37.991, |
|
"eval_steps_per_second": 4.749, |
|
"eval_wer": 0.3434385581999968, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.6066589237320945, |
|
"eval_loss": 0.4346481263637543, |
|
"eval_runtime": 149.1351, |
|
"eval_samples_per_second": 37.925, |
|
"eval_steps_per_second": 4.741, |
|
"eval_wer": 0.33974739612588467, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"eval_loss": 0.43247029185295105, |
|
"eval_runtime": 149.5691, |
|
"eval_samples_per_second": 37.815, |
|
"eval_steps_per_second": 4.727, |
|
"eval_wer": 0.33604018552101556, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"grad_norm": 1.7964462041854858, |
|
"learning_rate": 4.765263157894736e-05, |
|
"loss": 0.3991, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"eval_loss": 0.43196219205856323, |
|
"eval_runtime": 150.109, |
|
"eval_samples_per_second": 37.679, |
|
"eval_steps_per_second": 4.71, |
|
"eval_wer": 0.3357834090289034, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.664730933023616, |
|
"eval_loss": 0.42872872948646545, |
|
"eval_runtime": 150.0401, |
|
"eval_samples_per_second": 37.697, |
|
"eval_steps_per_second": 4.712, |
|
"eval_wer": 0.3354624384137632, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.684088269454123, |
|
"eval_loss": 0.42928823828697205, |
|
"eval_runtime": 149.2284, |
|
"eval_samples_per_second": 37.902, |
|
"eval_steps_per_second": 4.738, |
|
"eval_wer": 0.33342427500762306, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.7034456058846303, |
|
"eval_loss": 0.4271656274795532, |
|
"eval_runtime": 149.5274, |
|
"eval_samples_per_second": 37.826, |
|
"eval_steps_per_second": 4.728, |
|
"eval_wer": 0.333327983823081, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.7228029423151374, |
|
"eval_loss": 0.4219857156276703, |
|
"eval_runtime": 149.1865, |
|
"eval_samples_per_second": 37.912, |
|
"eval_steps_per_second": 4.739, |
|
"eval_wer": 0.3302948115100063, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"grad_norm": 1.7460029125213623, |
|
"learning_rate": 3.189473684210526e-05, |
|
"loss": 0.3916, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"eval_loss": 0.4238153398036957, |
|
"eval_runtime": 149.4733, |
|
"eval_samples_per_second": 37.84, |
|
"eval_steps_per_second": 4.73, |
|
"eval_wer": 0.3291874628877726, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7615176151761518, |
|
"eval_loss": 0.42150619626045227, |
|
"eval_runtime": 148.8948, |
|
"eval_samples_per_second": 37.987, |
|
"eval_steps_per_second": 4.748, |
|
"eval_wer": 0.32812825985781, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.7808749516066589, |
|
"eval_loss": 0.4176540672779083, |
|
"eval_runtime": 150.0504, |
|
"eval_samples_per_second": 37.694, |
|
"eval_steps_per_second": 4.712, |
|
"eval_wer": 0.3265876009051371, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.800232288037166, |
|
"eval_loss": 0.41875413060188293, |
|
"eval_runtime": 150.5043, |
|
"eval_samples_per_second": 37.58, |
|
"eval_steps_per_second": 4.698, |
|
"eval_wer": 0.32573702877501565, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.8195896244676733, |
|
"eval_loss": 0.41637665033340454, |
|
"eval_runtime": 150.1757, |
|
"eval_samples_per_second": 37.663, |
|
"eval_steps_per_second": 4.708, |
|
"eval_wer": 0.32469387427581003, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"grad_norm": 0.8558129668235779, |
|
"learning_rate": 1.6105263157894736e-05, |
|
"loss": 0.3687, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"eval_loss": 0.41629916429519653, |
|
"eval_runtime": 149.3775, |
|
"eval_samples_per_second": 37.864, |
|
"eval_steps_per_second": 4.733, |
|
"eval_wer": 0.3242766124761278, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8583042973286876, |
|
"eval_loss": 0.4140332341194153, |
|
"eval_runtime": 149.5915, |
|
"eval_samples_per_second": 37.81, |
|
"eval_steps_per_second": 4.726, |
|
"eval_wer": 0.3238914477379596, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.8776616337591947, |
|
"eval_loss": 0.4132048189640045, |
|
"eval_runtime": 150.4642, |
|
"eval_samples_per_second": 37.59, |
|
"eval_steps_per_second": 4.699, |
|
"eval_wer": 0.324661777214296, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.897018970189702, |
|
"eval_loss": 0.4122065007686615, |
|
"eval_runtime": 150.0219, |
|
"eval_samples_per_second": 37.701, |
|
"eval_steps_per_second": 4.713, |
|
"eval_wer": 0.3223668373160437, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.916376306620209, |
|
"eval_loss": 0.41170838475227356, |
|
"eval_runtime": 149.8162, |
|
"eval_samples_per_second": 37.753, |
|
"eval_steps_per_second": 4.719, |
|
"eval_wer": 0.3218532843318194, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"grad_norm": 2.01002836227417, |
|
"learning_rate": 3.157894736842105e-07, |
|
"loss": 0.3707, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"eval_loss": 0.41177985072135925, |
|
"eval_runtime": 148.9604, |
|
"eval_samples_per_second": 37.97, |
|
"eval_steps_per_second": 4.746, |
|
"eval_wer": 0.32191747845484747, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"step": 10000, |
|
"total_flos": 1.1255918428180738e+19, |
|
"train_loss": 0.7339932418823242, |
|
"train_runtime": 18725.1494, |
|
"train_samples_per_second": 4.272, |
|
"train_steps_per_second": 0.534 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1255918428180738e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|