|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9357336430507162, |
|
"eval_steps": 100, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019357336430507164, |
|
"eval_loss": 3.554515838623047, |
|
"eval_runtime": 148.6327, |
|
"eval_samples_per_second": 38.054, |
|
"eval_steps_per_second": 4.757, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03871467286101433, |
|
"eval_loss": 3.02604603767395, |
|
"eval_runtime": 146.1639, |
|
"eval_samples_per_second": 38.696, |
|
"eval_steps_per_second": 4.837, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05807200929152149, |
|
"eval_loss": 2.906620979309082, |
|
"eval_runtime": 146.0171, |
|
"eval_samples_per_second": 38.735, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07742934572202866, |
|
"eval_loss": 2.013317823410034, |
|
"eval_runtime": 145.6964, |
|
"eval_samples_per_second": 38.82, |
|
"eval_steps_per_second": 4.853, |
|
"eval_wer": 0.9847217987193272, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"grad_norm": 4.750732421875, |
|
"learning_rate": 0.0002982, |
|
"loss": 4.0489, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"eval_loss": 1.459844708442688, |
|
"eval_runtime": 145.9454, |
|
"eval_samples_per_second": 38.754, |
|
"eval_steps_per_second": 4.844, |
|
"eval_wer": 0.9003546725297299, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11614401858304298, |
|
"eval_loss": 1.1772356033325195, |
|
"eval_runtime": 145.9941, |
|
"eval_samples_per_second": 38.741, |
|
"eval_steps_per_second": 4.843, |
|
"eval_wer": 0.8042079247644878, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13550135501355012, |
|
"eval_loss": 1.07865571975708, |
|
"eval_runtime": 146.4442, |
|
"eval_samples_per_second": 38.622, |
|
"eval_steps_per_second": 4.828, |
|
"eval_wer": 0.7589510680297219, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1548586914440573, |
|
"eval_loss": 1.0144398212432861, |
|
"eval_runtime": 146.6444, |
|
"eval_samples_per_second": 38.569, |
|
"eval_steps_per_second": 4.821, |
|
"eval_wer": 0.7211567780969652, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17421602787456447, |
|
"eval_loss": 0.9338767528533936, |
|
"eval_runtime": 147.2439, |
|
"eval_samples_per_second": 38.412, |
|
"eval_steps_per_second": 4.802, |
|
"eval_wer": 0.6931520919259841, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"grad_norm": 3.122023105621338, |
|
"learning_rate": 0.0002843684210526315, |
|
"loss": 1.0454, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"eval_loss": 0.8805950284004211, |
|
"eval_runtime": 146.7917, |
|
"eval_samples_per_second": 38.531, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.659739050889891, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2129307007355788, |
|
"eval_loss": 0.864371657371521, |
|
"eval_runtime": 146.75, |
|
"eval_samples_per_second": 38.542, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.6553738505239846, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.23228803716608595, |
|
"eval_loss": 0.845355749130249, |
|
"eval_runtime": 147.3826, |
|
"eval_samples_per_second": 38.376, |
|
"eval_steps_per_second": 4.797, |
|
"eval_wer": 0.6313652485114988, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2516453735965931, |
|
"eval_loss": 0.8092750906944275, |
|
"eval_runtime": 147.8588, |
|
"eval_samples_per_second": 38.253, |
|
"eval_steps_per_second": 4.782, |
|
"eval_wer": 0.5919179599107701, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27100271002710025, |
|
"eval_loss": 0.8075847029685974, |
|
"eval_runtime": 147.3238, |
|
"eval_samples_per_second": 38.392, |
|
"eval_steps_per_second": 4.799, |
|
"eval_wer": 0.6072282582529569, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"grad_norm": 3.26877498626709, |
|
"learning_rate": 0.000268578947368421, |
|
"loss": 0.842, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"eval_loss": 0.7783301472663879, |
|
"eval_runtime": 151.3584, |
|
"eval_samples_per_second": 37.368, |
|
"eval_steps_per_second": 4.671, |
|
"eval_wer": 0.5857232270385646, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3097173828881146, |
|
"eval_loss": 0.7964575290679932, |
|
"eval_runtime": 147.3418, |
|
"eval_samples_per_second": 38.387, |
|
"eval_steps_per_second": 4.798, |
|
"eval_wer": 0.5940524145014524, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.32907471931862176, |
|
"eval_loss": 0.7414730191230774, |
|
"eval_runtime": 145.4997, |
|
"eval_samples_per_second": 38.873, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.5505127505576864, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"eval_loss": 0.7440277934074402, |
|
"eval_runtime": 144.5466, |
|
"eval_samples_per_second": 39.129, |
|
"eval_steps_per_second": 4.891, |
|
"eval_wer": 0.563656497247677, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3677893921796361, |
|
"eval_loss": 0.7360929846763611, |
|
"eval_runtime": 144.6106, |
|
"eval_samples_per_second": 39.112, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.5865256535764151, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"grad_norm": 1.8907363414764404, |
|
"learning_rate": 0.0002527894736842105, |
|
"loss": 0.755, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"eval_loss": 0.7313781380653381, |
|
"eval_runtime": 144.7461, |
|
"eval_samples_per_second": 39.075, |
|
"eval_steps_per_second": 4.884, |
|
"eval_wer": 0.542729213140537, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"eval_loss": 0.6866404414176941, |
|
"eval_runtime": 144.8178, |
|
"eval_samples_per_second": 39.056, |
|
"eval_steps_per_second": 4.882, |
|
"eval_wer": 0.5181428640207989, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.4258614014711576, |
|
"eval_loss": 0.6947888731956482, |
|
"eval_runtime": 144.3916, |
|
"eval_samples_per_second": 39.171, |
|
"eval_steps_per_second": 4.896, |
|
"eval_wer": 0.5425847763637239, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.4452187379016647, |
|
"eval_loss": 0.6796152591705322, |
|
"eval_runtime": 144.4575, |
|
"eval_samples_per_second": 39.153, |
|
"eval_steps_per_second": 4.894, |
|
"eval_wer": 0.5158960697148176, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4645760743321719, |
|
"eval_loss": 0.6898629069328308, |
|
"eval_runtime": 144.5493, |
|
"eval_samples_per_second": 39.129, |
|
"eval_steps_per_second": 4.891, |
|
"eval_wer": 0.5305162812344529, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"grad_norm": 2.9580163955688477, |
|
"learning_rate": 0.000237, |
|
"loss": 0.6884, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"eval_loss": 0.6736070513725281, |
|
"eval_runtime": 144.9197, |
|
"eval_samples_per_second": 39.029, |
|
"eval_steps_per_second": 4.879, |
|
"eval_wer": 0.5103272295421354, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5032907471931862, |
|
"eval_loss": 0.6728157997131348, |
|
"eval_runtime": 145.4712, |
|
"eval_samples_per_second": 38.881, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.5257338190688643, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5226480836236934, |
|
"eval_loss": 0.6537250876426697, |
|
"eval_runtime": 145.152, |
|
"eval_samples_per_second": 38.966, |
|
"eval_steps_per_second": 4.871, |
|
"eval_wer": 0.5026560318402851, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5420054200542005, |
|
"eval_loss": 0.631415843963623, |
|
"eval_runtime": 145.09, |
|
"eval_samples_per_second": 38.983, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.48227439777888337, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5613627564847077, |
|
"eval_loss": 0.6316519379615784, |
|
"eval_runtime": 144.7345, |
|
"eval_samples_per_second": 39.078, |
|
"eval_steps_per_second": 4.885, |
|
"eval_wer": 0.48296448460143476, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"grad_norm": 8.703415870666504, |
|
"learning_rate": 0.00022121052631578946, |
|
"loss": 0.6756, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"eval_loss": 0.6204401254653931, |
|
"eval_runtime": 144.9451, |
|
"eval_samples_per_second": 39.022, |
|
"eval_steps_per_second": 4.878, |
|
"eval_wer": 0.4761117619681918, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6000774293457221, |
|
"eval_loss": 0.6310548782348633, |
|
"eval_runtime": 144.6818, |
|
"eval_samples_per_second": 39.093, |
|
"eval_steps_per_second": 4.887, |
|
"eval_wer": 0.4810868065028647, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6194347657762292, |
|
"eval_loss": 0.623622477054596, |
|
"eval_runtime": 145.2962, |
|
"eval_samples_per_second": 38.927, |
|
"eval_steps_per_second": 4.866, |
|
"eval_wer": 0.48631862752964966, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6387921022067363, |
|
"eval_loss": 0.6224333047866821, |
|
"eval_runtime": 144.731, |
|
"eval_samples_per_second": 39.079, |
|
"eval_steps_per_second": 4.885, |
|
"eval_wer": 0.46287172409365923, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6581494386372435, |
|
"eval_loss": 0.597332239151001, |
|
"eval_runtime": 145.3577, |
|
"eval_samples_per_second": 38.911, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.4622939769864069, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"grad_norm": 4.671802997589111, |
|
"learning_rate": 0.00020542105263157893, |
|
"loss": 0.6435, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"eval_loss": 0.5913041830062866, |
|
"eval_runtime": 145.2068, |
|
"eval_samples_per_second": 38.951, |
|
"eval_steps_per_second": 4.869, |
|
"eval_wer": 0.47079969828762175, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"eval_loss": 0.6086587905883789, |
|
"eval_runtime": 145.0548, |
|
"eval_samples_per_second": 38.992, |
|
"eval_steps_per_second": 4.874, |
|
"eval_wer": 0.4743945691771918, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.716221447928765, |
|
"eval_loss": 0.5827310681343079, |
|
"eval_runtime": 145.6494, |
|
"eval_samples_per_second": 38.833, |
|
"eval_steps_per_second": 4.854, |
|
"eval_wer": 0.45213525701722007, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.7355787843592722, |
|
"eval_loss": 0.5875205397605896, |
|
"eval_runtime": 145.7866, |
|
"eval_samples_per_second": 38.796, |
|
"eval_steps_per_second": 4.85, |
|
"eval_wer": 0.460785415095248, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7549361207897793, |
|
"eval_loss": 0.592467725276947, |
|
"eval_runtime": 145.4885, |
|
"eval_samples_per_second": 38.876, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.45573012790679013, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"grad_norm": 3.854473114013672, |
|
"learning_rate": 0.0001896315789473684, |
|
"loss": 0.6282, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"eval_loss": 0.5799296498298645, |
|
"eval_runtime": 145.6866, |
|
"eval_samples_per_second": 38.823, |
|
"eval_steps_per_second": 4.853, |
|
"eval_wer": 0.4494230553192855, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"eval_loss": 0.567882239818573, |
|
"eval_runtime": 146.7561, |
|
"eval_samples_per_second": 38.54, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.45260066440917335, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"eval_loss": 0.5699547529220581, |
|
"eval_runtime": 145.6353, |
|
"eval_samples_per_second": 38.837, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.4549597984304537, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.832365466511808, |
|
"eval_loss": 0.5610417127609253, |
|
"eval_runtime": 145.0921, |
|
"eval_samples_per_second": 38.982, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.43432138787693986, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8517228029423152, |
|
"eval_loss": 0.5616321563720703, |
|
"eval_runtime": 145.7565, |
|
"eval_samples_per_second": 38.804, |
|
"eval_steps_per_second": 4.851, |
|
"eval_wer": 0.42727608287461283, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"grad_norm": 3.4542479515075684, |
|
"learning_rate": 0.0001738421052631579, |
|
"loss": 0.5937, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"eval_loss": 0.5463823080062866, |
|
"eval_runtime": 145.3415, |
|
"eval_samples_per_second": 38.915, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.42207635890934186, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8904374758033294, |
|
"eval_loss": 0.5485692620277405, |
|
"eval_runtime": 145.5247, |
|
"eval_samples_per_second": 38.866, |
|
"eval_steps_per_second": 4.858, |
|
"eval_wer": 0.4287685962350147, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.9097948122338366, |
|
"eval_loss": 0.5307685136795044, |
|
"eval_runtime": 145.423, |
|
"eval_samples_per_second": 38.893, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.41673219816725776, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.9291521486643438, |
|
"eval_loss": 0.5520421862602234, |
|
"eval_runtime": 145.5542, |
|
"eval_samples_per_second": 38.858, |
|
"eval_steps_per_second": 4.857, |
|
"eval_wer": 0.41997400138017366, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.948509485094851, |
|
"eval_loss": 0.5321463346481323, |
|
"eval_runtime": 146.3191, |
|
"eval_samples_per_second": 38.655, |
|
"eval_steps_per_second": 4.832, |
|
"eval_wer": 0.41804817768933256, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"grad_norm": 5.257072925567627, |
|
"learning_rate": 0.00015808421052631577, |
|
"loss": 0.5659, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"eval_loss": 0.5333205461502075, |
|
"eval_runtime": 145.4188, |
|
"eval_samples_per_second": 38.895, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.4176148673588933, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9872241579558653, |
|
"eval_loss": 0.5260410904884338, |
|
"eval_runtime": 146.9543, |
|
"eval_samples_per_second": 38.488, |
|
"eval_steps_per_second": 4.811, |
|
"eval_wer": 0.41113126093306157, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.0065814943863725, |
|
"eval_loss": 0.5185408592224121, |
|
"eval_runtime": 145.8818, |
|
"eval_samples_per_second": 38.771, |
|
"eval_steps_per_second": 4.846, |
|
"eval_wer": 0.3973616215435477, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.0259388308168795, |
|
"eval_loss": 0.5147408843040466, |
|
"eval_runtime": 150.3669, |
|
"eval_samples_per_second": 37.615, |
|
"eval_steps_per_second": 4.702, |
|
"eval_wer": 0.3917927813708655, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.0452961672473868, |
|
"eval_loss": 0.5154542326927185, |
|
"eval_runtime": 145.571, |
|
"eval_samples_per_second": 38.854, |
|
"eval_steps_per_second": 4.857, |
|
"eval_wer": 0.3975863009741458, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"grad_norm": 1.1321543455123901, |
|
"learning_rate": 0.00014232631578947366, |
|
"loss": 0.4928, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"eval_loss": 0.5057936906814575, |
|
"eval_runtime": 144.9813, |
|
"eval_samples_per_second": 39.012, |
|
"eval_steps_per_second": 4.876, |
|
"eval_wer": 0.3936062653464075, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.084010840108401, |
|
"eval_loss": 0.504751443862915, |
|
"eval_runtime": 145.1866, |
|
"eval_samples_per_second": 38.957, |
|
"eval_steps_per_second": 4.87, |
|
"eval_wer": 0.3964629038211552, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.1033681765389083, |
|
"eval_loss": 0.5011361241340637, |
|
"eval_runtime": 146.2522, |
|
"eval_samples_per_second": 38.673, |
|
"eval_steps_per_second": 4.834, |
|
"eval_wer": 0.3818266437707628, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.1227255129694154, |
|
"eval_loss": 0.4964805543422699, |
|
"eval_runtime": 146.0597, |
|
"eval_samples_per_second": 38.724, |
|
"eval_steps_per_second": 4.84, |
|
"eval_wer": 0.3830463321082955, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.1420828493999227, |
|
"eval_loss": 0.4969277083873749, |
|
"eval_runtime": 145.1404, |
|
"eval_samples_per_second": 38.969, |
|
"eval_steps_per_second": 4.871, |
|
"eval_wer": 0.383993195422959, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"grad_norm": 1.1488393545150757, |
|
"learning_rate": 0.00012653684210526316, |
|
"loss": 0.4619, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"eval_loss": 0.4863020181655884, |
|
"eval_runtime": 146.181, |
|
"eval_samples_per_second": 38.692, |
|
"eval_steps_per_second": 4.836, |
|
"eval_wer": 0.3799650142029497, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1807975222609368, |
|
"eval_loss": 0.49075642228126526, |
|
"eval_runtime": 145.2768, |
|
"eval_samples_per_second": 38.933, |
|
"eval_steps_per_second": 4.867, |
|
"eval_wer": 0.37998106273370674, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.2001548586914441, |
|
"eval_loss": 0.4835449457168579, |
|
"eval_runtime": 145.0604, |
|
"eval_samples_per_second": 38.991, |
|
"eval_steps_per_second": 4.874, |
|
"eval_wer": 0.3712185649403797, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"eval_loss": 0.4926937520503998, |
|
"eval_runtime": 145.5282, |
|
"eval_samples_per_second": 38.865, |
|
"eval_steps_per_second": 4.858, |
|
"eval_wer": 0.3766590168670058, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.2388695315524583, |
|
"eval_loss": 0.4942048490047455, |
|
"eval_runtime": 146.7525, |
|
"eval_samples_per_second": 38.541, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.368281683811847, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"grad_norm": 1.8319953680038452, |
|
"learning_rate": 0.00011074736842105263, |
|
"loss": 0.4421, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"eval_loss": 0.4833586513996124, |
|
"eval_runtime": 147.4272, |
|
"eval_samples_per_second": 38.365, |
|
"eval_steps_per_second": 4.796, |
|
"eval_wer": 0.37393076663831426, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2775842044134726, |
|
"eval_loss": 0.47513511776924133, |
|
"eval_runtime": 146.0617, |
|
"eval_samples_per_second": 38.723, |
|
"eval_steps_per_second": 4.84, |
|
"eval_wer": 0.3633868819309592, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.29694154084398, |
|
"eval_loss": 0.4733775854110718, |
|
"eval_runtime": 145.526, |
|
"eval_samples_per_second": 38.866, |
|
"eval_steps_per_second": 4.858, |
|
"eval_wer": 0.36327454221566013, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.316298877274487, |
|
"eval_loss": 0.4685443937778473, |
|
"eval_runtime": 145.3437, |
|
"eval_samples_per_second": 38.915, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.36447818202243587, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.3356562137049943, |
|
"eval_loss": 0.4654460847377777, |
|
"eval_runtime": 145.3646, |
|
"eval_samples_per_second": 38.909, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.3624560671470527, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"grad_norm": 0.9300881624221802, |
|
"learning_rate": 9.49578947368421e-05, |
|
"loss": 0.4304, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"eval_loss": 0.47420966625213623, |
|
"eval_runtime": 145.5058, |
|
"eval_samples_per_second": 38.871, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.3615413008939032, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3743708865660085, |
|
"eval_loss": 0.46446511149406433, |
|
"eval_runtime": 145.4472, |
|
"eval_samples_per_second": 38.887, |
|
"eval_steps_per_second": 4.861, |
|
"eval_wer": 0.359567331610791, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.3937282229965158, |
|
"eval_loss": 0.45991310477256775, |
|
"eval_runtime": 145.7318, |
|
"eval_samples_per_second": 38.811, |
|
"eval_steps_per_second": 4.851, |
|
"eval_wer": 0.3593587007109499, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.4130855594270229, |
|
"eval_loss": 0.4554171562194824, |
|
"eval_runtime": 145.4195, |
|
"eval_samples_per_second": 38.894, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.3555391503907817, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.43244289585753, |
|
"eval_loss": 0.457757830619812, |
|
"eval_runtime": 145.3536, |
|
"eval_samples_per_second": 38.912, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.35780199322752004, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"grad_norm": 1.012635588645935, |
|
"learning_rate": 7.916842105263156e-05, |
|
"loss": 0.4275, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"eval_loss": 0.45184171199798584, |
|
"eval_runtime": 149.8003, |
|
"eval_samples_per_second": 37.757, |
|
"eval_steps_per_second": 4.72, |
|
"eval_wer": 0.3521850074625668, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4711575687185443, |
|
"eval_loss": 0.44799017906188965, |
|
"eval_runtime": 145.3878, |
|
"eval_samples_per_second": 38.903, |
|
"eval_steps_per_second": 4.863, |
|
"eval_wer": 0.35107765884033315, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.4905149051490514, |
|
"eval_loss": 0.4465474486351013, |
|
"eval_runtime": 145.7643, |
|
"eval_samples_per_second": 38.802, |
|
"eval_steps_per_second": 4.85, |
|
"eval_wer": 0.3500986984641556, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.5098722415795587, |
|
"eval_loss": 0.44539782404899597, |
|
"eval_runtime": 145.5321, |
|
"eval_samples_per_second": 38.864, |
|
"eval_steps_per_second": 4.858, |
|
"eval_wer": 0.34281266550047346, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.5292295780100658, |
|
"eval_loss": 0.44271060824394226, |
|
"eval_runtime": 145.0706, |
|
"eval_samples_per_second": 38.988, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.34387186853043605, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"grad_norm": 1.030893087387085, |
|
"learning_rate": 6.341052631578946e-05, |
|
"loss": 0.4089, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"eval_loss": 0.437569797039032, |
|
"eval_runtime": 144.8474, |
|
"eval_samples_per_second": 39.048, |
|
"eval_steps_per_second": 4.881, |
|
"eval_wer": 0.3406942594405482, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5679442508710801, |
|
"eval_loss": 0.4396042823791504, |
|
"eval_runtime": 145.4051, |
|
"eval_samples_per_second": 38.898, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.3415127345091557, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"eval_loss": 0.4342670738697052, |
|
"eval_runtime": 144.5678, |
|
"eval_samples_per_second": 39.124, |
|
"eval_steps_per_second": 4.89, |
|
"eval_wer": 0.34217072427019307, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.6066589237320945, |
|
"eval_loss": 0.4358905851840973, |
|
"eval_runtime": 144.9749, |
|
"eval_samples_per_second": 39.014, |
|
"eval_steps_per_second": 4.877, |
|
"eval_wer": 0.3406461138482772, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"eval_loss": 0.43577995896339417, |
|
"eval_runtime": 144.8242, |
|
"eval_samples_per_second": 39.054, |
|
"eval_steps_per_second": 4.882, |
|
"eval_wer": 0.3373401165123333, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"grad_norm": 0.9704302549362183, |
|
"learning_rate": 4.762105263157894e-05, |
|
"loss": 0.4005, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"eval_loss": 0.4330734312534332, |
|
"eval_runtime": 145.4279, |
|
"eval_samples_per_second": 38.892, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.3364895443822118, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.664730933023616, |
|
"eval_loss": 0.4302414059638977, |
|
"eval_runtime": 144.5881, |
|
"eval_samples_per_second": 39.118, |
|
"eval_steps_per_second": 4.89, |
|
"eval_wer": 0.3352859045754361, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.684088269454123, |
|
"eval_loss": 0.43084925413131714, |
|
"eval_runtime": 145.3335, |
|
"eval_samples_per_second": 38.917, |
|
"eval_steps_per_second": 4.865, |
|
"eval_wer": 0.33549453547527724, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.7034456058846303, |
|
"eval_loss": 0.4258446991443634, |
|
"eval_runtime": 145.2324, |
|
"eval_samples_per_second": 38.944, |
|
"eval_steps_per_second": 4.868, |
|
"eval_wer": 0.335109370737109, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.7228029423151374, |
|
"eval_loss": 0.42223912477493286, |
|
"eval_runtime": 145.1268, |
|
"eval_samples_per_second": 38.973, |
|
"eval_steps_per_second": 4.872, |
|
"eval_wer": 0.33530195310619315, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"grad_norm": 1.2587796449661255, |
|
"learning_rate": 3.186315789473684e-05, |
|
"loss": 0.3879, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"eval_loss": 0.4238055944442749, |
|
"eval_runtime": 145.2053, |
|
"eval_samples_per_second": 38.952, |
|
"eval_steps_per_second": 4.869, |
|
"eval_wer": 0.33119352923239875, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7615176151761518, |
|
"eval_loss": 0.4244863986968994, |
|
"eval_runtime": 145.3408, |
|
"eval_samples_per_second": 38.915, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.3288022981496044, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.7808749516066589, |
|
"eval_loss": 0.42055588960647583, |
|
"eval_runtime": 145.738, |
|
"eval_samples_per_second": 38.809, |
|
"eval_steps_per_second": 4.851, |
|
"eval_wer": 0.32641106706681006, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.800232288037166, |
|
"eval_loss": 0.4201093018054962, |
|
"eval_runtime": 145.1603, |
|
"eval_samples_per_second": 38.964, |
|
"eval_steps_per_second": 4.87, |
|
"eval_wer": 0.3284492304729502, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.8195896244676733, |
|
"eval_loss": 0.41892749071121216, |
|
"eval_runtime": 150.4269, |
|
"eval_samples_per_second": 37.6, |
|
"eval_steps_per_second": 4.7, |
|
"eval_wer": 0.324597583091268, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"grad_norm": 1.2362818717956543, |
|
"learning_rate": 1.6073684210526313e-05, |
|
"loss": 0.369, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"eval_loss": 0.4160093665122986, |
|
"eval_runtime": 145.5064, |
|
"eval_samples_per_second": 38.871, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.32576912583652967, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8583042973286876, |
|
"eval_loss": 0.4141674041748047, |
|
"eval_runtime": 145.8993, |
|
"eval_samples_per_second": 38.766, |
|
"eval_steps_per_second": 4.846, |
|
"eval_wer": 0.3248062139911091, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.8776616337591947, |
|
"eval_loss": 0.41305750608444214, |
|
"eval_runtime": 146.4678, |
|
"eval_samples_per_second": 38.616, |
|
"eval_steps_per_second": 4.827, |
|
"eval_wer": 0.32517533019852035, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.897018970189702, |
|
"eval_loss": 0.4127916693687439, |
|
"eval_runtime": 145.2897, |
|
"eval_samples_per_second": 38.929, |
|
"eval_steps_per_second": 4.866, |
|
"eval_wer": 0.32276805058496894, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.916376306620209, |
|
"eval_loss": 0.41222211718559265, |
|
"eval_runtime": 145.7491, |
|
"eval_samples_per_second": 38.806, |
|
"eval_steps_per_second": 4.851, |
|
"eval_wer": 0.3221421578854456, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"grad_norm": 1.1011325120925903, |
|
"learning_rate": 2.842105263157894e-07, |
|
"loss": 0.3738, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"eval_loss": 0.4121682941913605, |
|
"eval_runtime": 145.6809, |
|
"eval_samples_per_second": 38.825, |
|
"eval_steps_per_second": 4.853, |
|
"eval_wer": 0.3222865946622587, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"step": 10000, |
|
"total_flos": 1.1255918428180738e+19, |
|
"train_loss": 0.7340839981079101, |
|
"train_runtime": 18566.2158, |
|
"train_samples_per_second": 4.309, |
|
"train_steps_per_second": 0.539 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1255918428180738e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|