|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7858854964831624, |
|
"eval_steps": 200, |
|
"global_step": 20000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007858854964831625, |
|
"eval_loss": 3.1856138706207275, |
|
"eval_runtime": 145.9906, |
|
"eval_samples_per_second": 38.742, |
|
"eval_steps_per_second": 4.843, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01571770992966325, |
|
"eval_loss": 2.649242877960205, |
|
"eval_runtime": 145.4142, |
|
"eval_samples_per_second": 38.896, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01964713741207906, |
|
"grad_norm": 2.1910934448242188, |
|
"learning_rate": 0.0002982, |
|
"loss": 4.6997, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.023576564894494872, |
|
"eval_loss": 1.3868569135665894, |
|
"eval_runtime": 144.4791, |
|
"eval_samples_per_second": 39.148, |
|
"eval_steps_per_second": 4.893, |
|
"eval_wer": 0.8722215981126927, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0314354198593265, |
|
"eval_loss": 1.230230689048767, |
|
"eval_runtime": 145.2382, |
|
"eval_samples_per_second": 38.943, |
|
"eval_steps_per_second": 4.868, |
|
"eval_wer": 0.830768243167338, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03929427482415812, |
|
"grad_norm": 4.400168418884277, |
|
"learning_rate": 0.0002923538461538461, |
|
"loss": 1.0569, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03929427482415812, |
|
"eval_loss": 1.1379698514938354, |
|
"eval_runtime": 146.1828, |
|
"eval_samples_per_second": 38.691, |
|
"eval_steps_per_second": 4.836, |
|
"eval_wer": 0.795846640240086, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.047153129788989744, |
|
"eval_loss": 1.066832184791565, |
|
"eval_runtime": 145.7914, |
|
"eval_samples_per_second": 38.795, |
|
"eval_steps_per_second": 4.849, |
|
"eval_wer": 0.7697998748214601, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.055011984753821366, |
|
"eval_loss": 1.0207505226135254, |
|
"eval_runtime": 146.2293, |
|
"eval_samples_per_second": 38.679, |
|
"eval_steps_per_second": 4.835, |
|
"eval_wer": 0.7310426730432829, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.05894141223623718, |
|
"grad_norm": 3.929137945175171, |
|
"learning_rate": 0.00028466153846153845, |
|
"loss": 0.8131, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.062870839718653, |
|
"eval_loss": 0.970230758190155, |
|
"eval_runtime": 145.9066, |
|
"eval_samples_per_second": 38.765, |
|
"eval_steps_per_second": 4.846, |
|
"eval_wer": 0.7151385790630868, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.07072969468348461, |
|
"eval_loss": 0.9408352375030518, |
|
"eval_runtime": 146.5767, |
|
"eval_samples_per_second": 38.587, |
|
"eval_steps_per_second": 4.823, |
|
"eval_wer": 0.6882091444528253, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.07858854964831624, |
|
"grad_norm": 2.3885908126831055, |
|
"learning_rate": 0.0002769692307692307, |
|
"loss": 0.7194, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07858854964831624, |
|
"eval_loss": 0.9249575138092041, |
|
"eval_runtime": 145.9151, |
|
"eval_samples_per_second": 38.762, |
|
"eval_steps_per_second": 4.845, |
|
"eval_wer": 0.6804095585049189, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08644740461314787, |
|
"eval_loss": 0.9052397608757019, |
|
"eval_runtime": 146.242, |
|
"eval_samples_per_second": 38.676, |
|
"eval_steps_per_second": 4.834, |
|
"eval_wer": 0.6726099725570124, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.09430625957797949, |
|
"eval_loss": 0.8985734581947327, |
|
"eval_runtime": 146.806, |
|
"eval_samples_per_second": 38.527, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.6573478198070967, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.0982356870603953, |
|
"grad_norm": 1.7855921983718872, |
|
"learning_rate": 0.00026927692307692305, |
|
"loss": 0.6688, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.10216511454281112, |
|
"eval_loss": 0.8814770579338074, |
|
"eval_runtime": 147.0906, |
|
"eval_samples_per_second": 38.452, |
|
"eval_steps_per_second": 4.807, |
|
"eval_wer": 0.6473495851454799, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.11002396950764273, |
|
"eval_loss": 0.858833372592926, |
|
"eval_runtime": 146.5037, |
|
"eval_samples_per_second": 38.607, |
|
"eval_steps_per_second": 4.826, |
|
"eval_wer": 0.6444608496092182, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.11788282447247436, |
|
"grad_norm": 2.42130184173584, |
|
"learning_rate": 0.00026158461538461537, |
|
"loss": 0.645, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.11788282447247436, |
|
"eval_loss": 0.875824511051178, |
|
"eval_runtime": 147.7065, |
|
"eval_samples_per_second": 38.292, |
|
"eval_steps_per_second": 4.787, |
|
"eval_wer": 0.6487458073213397, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.125741679437306, |
|
"eval_loss": 0.8724836707115173, |
|
"eval_runtime": 146.8488, |
|
"eval_samples_per_second": 38.516, |
|
"eval_steps_per_second": 4.814, |
|
"eval_wer": 0.6690953443212274, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.13360053440213762, |
|
"eval_loss": 0.8295639157295227, |
|
"eval_runtime": 147.6357, |
|
"eval_samples_per_second": 38.311, |
|
"eval_steps_per_second": 4.789, |
|
"eval_wer": 0.6298085410280689, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.1375299618845534, |
|
"grad_norm": 3.927525520324707, |
|
"learning_rate": 0.0002538923076923077, |
|
"loss": 0.6077, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.14145938936696922, |
|
"eval_loss": 0.8355618715286255, |
|
"eval_runtime": 147.0713, |
|
"eval_samples_per_second": 38.458, |
|
"eval_steps_per_second": 4.807, |
|
"eval_wer": 0.6551652196241434, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.14931824433180085, |
|
"eval_loss": 0.8262892961502075, |
|
"eval_runtime": 147.1539, |
|
"eval_samples_per_second": 38.436, |
|
"eval_steps_per_second": 4.804, |
|
"eval_wer": 0.6228595272102839, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.15717709929663248, |
|
"grad_norm": 2.224973201751709, |
|
"learning_rate": 0.00024619999999999997, |
|
"loss": 0.5983, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.15717709929663248, |
|
"eval_loss": 0.8710989356040955, |
|
"eval_runtime": 147.2188, |
|
"eval_samples_per_second": 38.419, |
|
"eval_steps_per_second": 4.802, |
|
"eval_wer": 0.6884659209449375, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1650359542614641, |
|
"eval_loss": 0.7836620807647705, |
|
"eval_runtime": 148.3617, |
|
"eval_samples_per_second": 38.123, |
|
"eval_steps_per_second": 4.765, |
|
"eval_wer": 0.5918216687262281, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.17289480922629574, |
|
"eval_loss": 0.8097087144851685, |
|
"eval_runtime": 147.4155, |
|
"eval_samples_per_second": 38.368, |
|
"eval_steps_per_second": 4.796, |
|
"eval_wer": 0.659755099420648, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.17682423670871153, |
|
"grad_norm": 1.8362923860549927, |
|
"learning_rate": 0.0002385230769230769, |
|
"loss": 0.5788, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.18075366419112734, |
|
"eval_loss": 0.77768874168396, |
|
"eval_runtime": 146.8579, |
|
"eval_samples_per_second": 38.513, |
|
"eval_steps_per_second": 4.814, |
|
"eval_wer": 0.5869268668453403, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.18861251915595897, |
|
"eval_loss": 0.7912825345993042, |
|
"eval_runtime": 146.176, |
|
"eval_samples_per_second": 38.693, |
|
"eval_steps_per_second": 4.837, |
|
"eval_wer": 0.5895588258894898, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.1964713741207906, |
|
"grad_norm": 3.315845489501953, |
|
"learning_rate": 0.0002308307692307692, |
|
"loss": 0.5501, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.1964713741207906, |
|
"eval_loss": 0.7924312353134155, |
|
"eval_runtime": 146.7719, |
|
"eval_samples_per_second": 38.536, |
|
"eval_steps_per_second": 4.817, |
|
"eval_wer": 0.5899760876891721, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.20433022908562223, |
|
"eval_loss": 0.7602530717849731, |
|
"eval_runtime": 146.9845, |
|
"eval_samples_per_second": 38.48, |
|
"eval_steps_per_second": 4.81, |
|
"eval_wer": 0.5737189260323218, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.21218908405045384, |
|
"eval_loss": 0.7750186920166016, |
|
"eval_runtime": 146.5887, |
|
"eval_samples_per_second": 38.584, |
|
"eval_steps_per_second": 4.823, |
|
"eval_wer": 0.5931697453098169, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.21611851153286965, |
|
"grad_norm": 9.320504188537598, |
|
"learning_rate": 0.00022313846153846153, |
|
"loss": 0.5694, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.22004793901528547, |
|
"eval_loss": 0.7516711950302124, |
|
"eval_runtime": 146.9947, |
|
"eval_samples_per_second": 38.478, |
|
"eval_steps_per_second": 4.81, |
|
"eval_wer": 0.5711190640496863, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.2279067939801171, |
|
"eval_loss": 0.7651358842849731, |
|
"eval_runtime": 146.6177, |
|
"eval_samples_per_second": 38.577, |
|
"eval_steps_per_second": 4.822, |
|
"eval_wer": 0.5698191330583685, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.23576564894494872, |
|
"grad_norm": 2.727358102798462, |
|
"learning_rate": 0.00021544615384615383, |
|
"loss": 0.5424, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.23576564894494872, |
|
"eval_loss": 0.7547870874404907, |
|
"eval_runtime": 146.6389, |
|
"eval_samples_per_second": 38.571, |
|
"eval_steps_per_second": 4.821, |
|
"eval_wer": 0.5820481134952095, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24362450390978035, |
|
"eval_loss": 0.730515718460083, |
|
"eval_runtime": 146.763, |
|
"eval_samples_per_second": 38.538, |
|
"eval_steps_per_second": 4.817, |
|
"eval_wer": 0.5681019402673685, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.251483358874612, |
|
"eval_loss": 0.7314247488975525, |
|
"eval_runtime": 147.0063, |
|
"eval_samples_per_second": 38.475, |
|
"eval_steps_per_second": 4.809, |
|
"eval_wer": 0.5589221806743593, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.2554127863570278, |
|
"grad_norm": 3.2329583168029785, |
|
"learning_rate": 0.00020775384615384613, |
|
"loss": 0.521, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2593422138394436, |
|
"eval_loss": 0.7227704524993896, |
|
"eval_runtime": 147.7519, |
|
"eval_samples_per_second": 38.28, |
|
"eval_steps_per_second": 4.785, |
|
"eval_wer": 0.565437884161705, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"eval_loss": 0.7350090146064758, |
|
"eval_runtime": 147.7712, |
|
"eval_samples_per_second": 38.275, |
|
"eval_steps_per_second": 4.784, |
|
"eval_wer": 0.5633194781017797, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.2750599237691068, |
|
"grad_norm": 3.2193281650543213, |
|
"learning_rate": 0.00020006153846153843, |
|
"loss": 0.5119, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2750599237691068, |
|
"eval_loss": 0.7079117298126221, |
|
"eval_runtime": 146.6317, |
|
"eval_samples_per_second": 38.573, |
|
"eval_steps_per_second": 4.822, |
|
"eval_wer": 0.5346888992312754, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.28291877873393845, |
|
"eval_loss": 0.7105109691619873, |
|
"eval_runtime": 147.7789, |
|
"eval_samples_per_second": 38.273, |
|
"eval_steps_per_second": 4.784, |
|
"eval_wer": 0.5601097719503779, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.2907776336987701, |
|
"eval_loss": 0.6876121163368225, |
|
"eval_runtime": 147.4709, |
|
"eval_samples_per_second": 38.353, |
|
"eval_steps_per_second": 4.794, |
|
"eval_wer": 0.5378344112596491, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.2947070611811859, |
|
"grad_norm": 2.7452991008758545, |
|
"learning_rate": 0.00019236923076923075, |
|
"loss": 0.5007, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.2986364886636017, |
|
"eval_loss": 0.6834765076637268, |
|
"eval_runtime": 147.74, |
|
"eval_samples_per_second": 38.283, |
|
"eval_steps_per_second": 4.785, |
|
"eval_wer": 0.5303397473961259, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.30649534362843334, |
|
"eval_loss": 0.7131712436676025, |
|
"eval_runtime": 147.6824, |
|
"eval_samples_per_second": 38.298, |
|
"eval_steps_per_second": 4.787, |
|
"eval_wer": 0.5350740639694436, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.31435419859326497, |
|
"grad_norm": 2.4165494441986084, |
|
"learning_rate": 0.00018467692307692308, |
|
"loss": 0.4934, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.31435419859326497, |
|
"eval_loss": 0.697209358215332, |
|
"eval_runtime": 146.9527, |
|
"eval_samples_per_second": 38.489, |
|
"eval_steps_per_second": 4.811, |
|
"eval_wer": 0.5241771115854343, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.3222130535580966, |
|
"eval_loss": 0.680029571056366, |
|
"eval_runtime": 147.261, |
|
"eval_samples_per_second": 38.408, |
|
"eval_steps_per_second": 4.801, |
|
"eval_wer": 0.5226685496942755, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.3300719085229282, |
|
"eval_loss": 0.6915732026100159, |
|
"eval_runtime": 146.7546, |
|
"eval_samples_per_second": 38.541, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.5364702861453033, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.33400133600534404, |
|
"grad_norm": 2.036782741546631, |
|
"learning_rate": 0.00017698461538461537, |
|
"loss": 0.4762, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.33793076348775986, |
|
"eval_loss": 0.6801823377609253, |
|
"eval_runtime": 147.6195, |
|
"eval_samples_per_second": 38.315, |
|
"eval_steps_per_second": 4.789, |
|
"eval_wer": 0.5255251881690232, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.3457896184525915, |
|
"eval_loss": 0.6977699398994446, |
|
"eval_runtime": 148.3269, |
|
"eval_samples_per_second": 38.132, |
|
"eval_steps_per_second": 4.766, |
|
"eval_wer": 0.5336938903243408, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.35364847341742306, |
|
"grad_norm": 3.6703684329986572, |
|
"learning_rate": 0.0001693076923076923, |
|
"loss": 0.4774, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.35364847341742306, |
|
"eval_loss": 0.6566863059997559, |
|
"eval_runtime": 147.4913, |
|
"eval_samples_per_second": 38.348, |
|
"eval_steps_per_second": 4.794, |
|
"eval_wer": 0.5210957936800886, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3615073283822547, |
|
"eval_loss": 0.6478887796401978, |
|
"eval_runtime": 146.7975, |
|
"eval_samples_per_second": 38.529, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.5152380799537802, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.3693661833470863, |
|
"eval_loss": 0.6551229953765869, |
|
"eval_runtime": 147.2799, |
|
"eval_samples_per_second": 38.403, |
|
"eval_steps_per_second": 4.8, |
|
"eval_wer": 0.5147405755003129, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.37329561082950213, |
|
"grad_norm": 2.4989895820617676, |
|
"learning_rate": 0.00016161538461538462, |
|
"loss": 0.4632, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.37722503831191795, |
|
"eval_loss": 0.6358110308647156, |
|
"eval_runtime": 148.0942, |
|
"eval_samples_per_second": 38.192, |
|
"eval_steps_per_second": 4.774, |
|
"eval_wer": 0.4954502415303879, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.3850838932767496, |
|
"eval_loss": 0.6466320157051086, |
|
"eval_runtime": 147.5131, |
|
"eval_samples_per_second": 38.342, |
|
"eval_steps_per_second": 4.793, |
|
"eval_wer": 0.5109049766493877, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.3929427482415812, |
|
"grad_norm": 1.681718349456787, |
|
"learning_rate": 0.00015392307692307691, |
|
"loss": 0.4483, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.3929427482415812, |
|
"eval_loss": 0.6306164264678955, |
|
"eval_runtime": 147.9882, |
|
"eval_samples_per_second": 38.219, |
|
"eval_steps_per_second": 4.777, |
|
"eval_wer": 0.504421370223556, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.40080160320641284, |
|
"eval_loss": 0.6359797716140747, |
|
"eval_runtime": 147.568, |
|
"eval_samples_per_second": 38.328, |
|
"eval_steps_per_second": 4.791, |
|
"eval_wer": 0.5003771404727897, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.40866045817124447, |
|
"eval_loss": 0.6301611661911011, |
|
"eval_runtime": 146.6632, |
|
"eval_samples_per_second": 38.565, |
|
"eval_steps_per_second": 4.821, |
|
"eval_wer": 0.49135786618735056, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.4125898856536603, |
|
"grad_norm": 2.8097307682037354, |
|
"learning_rate": 0.0001462307692307692, |
|
"loss": 0.4454, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.4165193131360761, |
|
"eval_loss": 0.616253674030304, |
|
"eval_runtime": 147.6301, |
|
"eval_samples_per_second": 38.312, |
|
"eval_steps_per_second": 4.789, |
|
"eval_wer": 0.48505079359984593, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.42437816810090767, |
|
"eval_loss": 0.6221349239349365, |
|
"eval_runtime": 147.7897, |
|
"eval_samples_per_second": 38.271, |
|
"eval_steps_per_second": 4.784, |
|
"eval_wer": 0.49105294410296735, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.4322370230657393, |
|
"grad_norm": 4.2350687980651855, |
|
"learning_rate": 0.0001385384615384615, |
|
"loss": 0.4302, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.4322370230657393, |
|
"eval_loss": 0.6395624279975891, |
|
"eval_runtime": 147.7882, |
|
"eval_samples_per_second": 38.271, |
|
"eval_steps_per_second": 4.784, |
|
"eval_wer": 0.5000561698576496, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.44009587803057093, |
|
"eval_loss": 0.6212363839149475, |
|
"eval_runtime": 148.3281, |
|
"eval_samples_per_second": 38.132, |
|
"eval_steps_per_second": 4.766, |
|
"eval_wer": 0.4840557846929114, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.44795473299540256, |
|
"eval_loss": 0.6267797946929932, |
|
"eval_runtime": 147.5749, |
|
"eval_samples_per_second": 38.326, |
|
"eval_steps_per_second": 4.791, |
|
"eval_wer": 0.49379724286241594, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.4518841604778184, |
|
"grad_norm": 2.2473807334899902, |
|
"learning_rate": 0.00013086153846153845, |
|
"loss": 0.4261, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.4558135879602342, |
|
"eval_loss": 0.6097697019577026, |
|
"eval_runtime": 148.2001, |
|
"eval_samples_per_second": 38.165, |
|
"eval_steps_per_second": 4.771, |
|
"eval_wer": 0.48201762128677117, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.4636724429250658, |
|
"eval_loss": 0.6009463667869568, |
|
"eval_runtime": 147.4302, |
|
"eval_samples_per_second": 38.364, |
|
"eval_steps_per_second": 4.795, |
|
"eval_wer": 0.4689220201890517, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.47153129788989745, |
|
"grad_norm": 2.0571179389953613, |
|
"learning_rate": 0.00012316923076923078, |
|
"loss": 0.4026, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.47153129788989745, |
|
"eval_loss": 0.609122633934021, |
|
"eval_runtime": 147.8463, |
|
"eval_samples_per_second": 38.256, |
|
"eval_steps_per_second": 4.782, |
|
"eval_wer": 0.4809584182568086, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.4793901528547291, |
|
"eval_loss": 0.6019255518913269, |
|
"eval_runtime": 148.5912, |
|
"eval_samples_per_second": 38.064, |
|
"eval_steps_per_second": 4.758, |
|
"eval_wer": 0.4805732535186404, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.4872490078195607, |
|
"eval_loss": 0.5946715474128723, |
|
"eval_runtime": 147.4021, |
|
"eval_samples_per_second": 38.371, |
|
"eval_steps_per_second": 4.796, |
|
"eval_wer": 0.4671085362135097, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.4911784353019765, |
|
"grad_norm": 2.5033822059631348, |
|
"learning_rate": 0.00011547692307692306, |
|
"loss": 0.4027, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.49510786278439234, |
|
"eval_loss": 0.5993836522102356, |
|
"eval_runtime": 147.7878, |
|
"eval_samples_per_second": 38.271, |
|
"eval_steps_per_second": 4.784, |
|
"eval_wer": 0.47092808653367785, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.502966717749224, |
|
"eval_loss": 0.5981957912445068, |
|
"eval_runtime": 147.5137, |
|
"eval_samples_per_second": 38.342, |
|
"eval_steps_per_second": 4.793, |
|
"eval_wer": 0.4760796649066778, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.5108255727140556, |
|
"grad_norm": 3.0013859272003174, |
|
"learning_rate": 0.00010778461538461537, |
|
"loss": 0.3978, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5108255727140556, |
|
"eval_loss": 0.5889731645584106, |
|
"eval_runtime": 148.3841, |
|
"eval_samples_per_second": 38.117, |
|
"eval_steps_per_second": 4.765, |
|
"eval_wer": 0.4632087432395564, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5186844276788872, |
|
"eval_loss": 0.5871375799179077, |
|
"eval_runtime": 147.6289, |
|
"eval_samples_per_second": 38.312, |
|
"eval_steps_per_second": 4.789, |
|
"eval_wer": 0.4567090882829677, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.5265432826437189, |
|
"eval_loss": 0.5873442888259888, |
|
"eval_runtime": 148.2022, |
|
"eval_samples_per_second": 38.164, |
|
"eval_steps_per_second": 4.771, |
|
"eval_wer": 0.4634815682624256, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.5304727101261346, |
|
"grad_norm": 3.4298863410949707, |
|
"learning_rate": 0.00010009230769230768, |
|
"loss": 0.3875, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"eval_loss": 0.5772218704223633, |
|
"eval_runtime": 148.5655, |
|
"eval_samples_per_second": 38.071, |
|
"eval_steps_per_second": 4.759, |
|
"eval_wer": 0.4538684983389771, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.542260992573382, |
|
"eval_loss": 0.5603720545768738, |
|
"eval_runtime": 147.4995, |
|
"eval_samples_per_second": 38.346, |
|
"eval_steps_per_second": 4.793, |
|
"eval_wer": 0.4419123429250052, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.5501198475382136, |
|
"grad_norm": 2.6594979763031006, |
|
"learning_rate": 9.24e-05, |
|
"loss": 0.404, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5501198475382136, |
|
"eval_loss": 0.5688683986663818, |
|
"eval_runtime": 147.0269, |
|
"eval_samples_per_second": 38.469, |
|
"eval_steps_per_second": 4.809, |
|
"eval_wer": 0.4454269711607902, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5579787025030453, |
|
"eval_loss": 0.5594531893730164, |
|
"eval_runtime": 147.2136, |
|
"eval_samples_per_second": 38.42, |
|
"eval_steps_per_second": 4.803, |
|
"eval_wer": 0.443292516570108, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.5658375574678769, |
|
"eval_loss": 0.5574955940246582, |
|
"eval_runtime": 147.8377, |
|
"eval_samples_per_second": 38.258, |
|
"eval_steps_per_second": 4.782, |
|
"eval_wer": 0.4405803148721735, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.5697669849502928, |
|
"grad_norm": 1.834619402885437, |
|
"learning_rate": 8.472307692307691e-05, |
|
"loss": 0.3878, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.5736964124327085, |
|
"eval_loss": 0.5521669983863831, |
|
"eval_runtime": 147.831, |
|
"eval_samples_per_second": 38.26, |
|
"eval_steps_per_second": 4.782, |
|
"eval_wer": 0.43528429972236043, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.5815552673975402, |
|
"eval_loss": 0.5521777272224426, |
|
"eval_runtime": 148.1309, |
|
"eval_samples_per_second": 38.182, |
|
"eval_steps_per_second": 4.773, |
|
"eval_wer": 0.4351719600070614, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.5894141223623718, |
|
"grad_norm": 2.739065408706665, |
|
"learning_rate": 7.703076923076922e-05, |
|
"loss": 0.3622, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5894141223623718, |
|
"eval_loss": 0.5570353865623474, |
|
"eval_runtime": 147.6686, |
|
"eval_samples_per_second": 38.302, |
|
"eval_steps_per_second": 4.788, |
|
"eval_wer": 0.4401309560109772, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5972729773272034, |
|
"eval_loss": 0.5467315912246704, |
|
"eval_runtime": 147.8113, |
|
"eval_samples_per_second": 38.265, |
|
"eval_steps_per_second": 4.783, |
|
"eval_wer": 0.42803036382019227, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.605131832292035, |
|
"eval_loss": 0.5510929822921753, |
|
"eval_runtime": 148.0412, |
|
"eval_samples_per_second": 38.206, |
|
"eval_steps_per_second": 4.776, |
|
"eval_wer": 0.43404856285407073, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.6090612597744509, |
|
"grad_norm": 3.7551369667053223, |
|
"learning_rate": 6.933846153846154e-05, |
|
"loss": 0.3545, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.6129906872568667, |
|
"eval_loss": 0.5437116026878357, |
|
"eval_runtime": 147.4367, |
|
"eval_samples_per_second": 38.362, |
|
"eval_steps_per_second": 4.795, |
|
"eval_wer": 0.4245478326459213, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.6208495422216983, |
|
"eval_loss": 0.5488719940185547, |
|
"eval_runtime": 147.4687, |
|
"eval_samples_per_second": 38.354, |
|
"eval_steps_per_second": 4.794, |
|
"eval_wer": 0.4296352168958932, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.6287083971865299, |
|
"grad_norm": 1.4097563028335571, |
|
"learning_rate": 6.164615384615383e-05, |
|
"loss": 0.3486, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6287083971865299, |
|
"eval_loss": 0.5420017242431641, |
|
"eval_runtime": 147.3988, |
|
"eval_samples_per_second": 38.372, |
|
"eval_steps_per_second": 4.797, |
|
"eval_wer": 0.4277896358588371, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6365672521513616, |
|
"eval_loss": 0.5352106094360352, |
|
"eval_runtime": 148.3938, |
|
"eval_samples_per_second": 38.115, |
|
"eval_steps_per_second": 4.764, |
|
"eval_wer": 0.4212739323714914, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.6444261071161932, |
|
"eval_loss": 0.5376533269882202, |
|
"eval_runtime": 147.9443, |
|
"eval_samples_per_second": 38.231, |
|
"eval_steps_per_second": 4.779, |
|
"eval_wer": 0.42592800629102406, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.648355534598609, |
|
"grad_norm": 2.8873980045318604, |
|
"learning_rate": 5.395384615384615e-05, |
|
"loss": 0.3374, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.6522849620810248, |
|
"eval_loss": 0.5336429476737976, |
|
"eval_runtime": 149.7227, |
|
"eval_samples_per_second": 37.776, |
|
"eval_steps_per_second": 4.722, |
|
"eval_wer": 0.4305018375567717, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.6601438170458565, |
|
"eval_loss": 0.5293694734573364, |
|
"eval_runtime": 148.8049, |
|
"eval_samples_per_second": 38.01, |
|
"eval_steps_per_second": 4.751, |
|
"eval_wer": 0.41875431304264094, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.6680026720106881, |
|
"grad_norm": 3.3759422302246094, |
|
"learning_rate": 4.6276923076923074e-05, |
|
"loss": 0.3389, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6680026720106881, |
|
"eval_loss": 0.5252575278282166, |
|
"eval_runtime": 149.4186, |
|
"eval_samples_per_second": 37.853, |
|
"eval_steps_per_second": 4.732, |
|
"eval_wer": 0.4169408290670989, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6758615269755197, |
|
"eval_loss": 0.5194066166877747, |
|
"eval_runtime": 147.8703, |
|
"eval_samples_per_second": 38.25, |
|
"eval_steps_per_second": 4.781, |
|
"eval_wer": 0.41443725826900546, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.6837203819403513, |
|
"eval_loss": 0.5231760740280151, |
|
"eval_runtime": 147.3205, |
|
"eval_samples_per_second": 38.392, |
|
"eval_steps_per_second": 4.799, |
|
"eval_wer": 0.417101314374669, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.6876498094227671, |
|
"grad_norm": 3.4031200408935547, |
|
"learning_rate": 3.858461538461538e-05, |
|
"loss": 0.3258, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.691579236905183, |
|
"eval_loss": 0.5179495811462402, |
|
"eval_runtime": 148.0293, |
|
"eval_samples_per_second": 38.209, |
|
"eval_steps_per_second": 4.776, |
|
"eval_wer": 0.41653961579817367, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.6994380918700145, |
|
"eval_loss": 0.5132375359535217, |
|
"eval_runtime": 149.1897, |
|
"eval_samples_per_second": 37.911, |
|
"eval_steps_per_second": 4.739, |
|
"eval_wer": 0.4103769799874821, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.7072969468348461, |
|
"grad_norm": 4.0969133377075195, |
|
"learning_rate": 3.090769230769231e-05, |
|
"loss": 0.327, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7072969468348461, |
|
"eval_loss": 0.5096033215522766, |
|
"eval_runtime": 148.384, |
|
"eval_samples_per_second": 38.117, |
|
"eval_steps_per_second": 4.765, |
|
"eval_wer": 0.40440692654587473, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7151558017996777, |
|
"eval_loss": 0.5040988922119141, |
|
"eval_runtime": 148.3441, |
|
"eval_samples_per_second": 38.128, |
|
"eval_steps_per_second": 4.766, |
|
"eval_wer": 0.40337982057742616, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.7230146567645094, |
|
"eval_loss": 0.5013387203216553, |
|
"eval_runtime": 147.8316, |
|
"eval_samples_per_second": 38.26, |
|
"eval_steps_per_second": 4.782, |
|
"eval_wer": 0.3980677568968561, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.7269440842469252, |
|
"grad_norm": 2.58701491355896, |
|
"learning_rate": 2.3215384615384613e-05, |
|
"loss": 0.316, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.730873511729341, |
|
"eval_loss": 0.5074244737625122, |
|
"eval_runtime": 148.936, |
|
"eval_samples_per_second": 37.976, |
|
"eval_steps_per_second": 4.747, |
|
"eval_wer": 0.4064611384827719, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.7387323666941726, |
|
"eval_loss": 0.5014389753341675, |
|
"eval_runtime": 148.0107, |
|
"eval_samples_per_second": 38.213, |
|
"eval_steps_per_second": 4.777, |
|
"eval_wer": 0.40545008104508035, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.7465912216590043, |
|
"grad_norm": 1.975710391998291, |
|
"learning_rate": 1.552307692307692e-05, |
|
"loss": 0.3162, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7465912216590043, |
|
"eval_loss": 0.4958648383617401, |
|
"eval_runtime": 148.7492, |
|
"eval_samples_per_second": 38.024, |
|
"eval_steps_per_second": 4.753, |
|
"eval_wer": 0.3998170467493701, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7544500766238359, |
|
"eval_loss": 0.4930271506309509, |
|
"eval_runtime": 147.4379, |
|
"eval_samples_per_second": 38.362, |
|
"eval_steps_per_second": 4.795, |
|
"eval_wer": 0.39818009661215514, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.7623089315886675, |
|
"eval_loss": 0.4924609363079071, |
|
"eval_runtime": 149.2355, |
|
"eval_samples_per_second": 37.9, |
|
"eval_steps_per_second": 4.737, |
|
"eval_wer": 0.39821219367366917, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.7662383590710834, |
|
"grad_norm": 2.332016944885254, |
|
"learning_rate": 7.83076923076923e-06, |
|
"loss": 0.3145, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.7701677865534992, |
|
"eval_loss": 0.4922255575656891, |
|
"eval_runtime": 148.1982, |
|
"eval_samples_per_second": 38.165, |
|
"eval_steps_per_second": 4.771, |
|
"eval_wer": 0.3970246023976505, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.7780266415183308, |
|
"eval_loss": 0.49084585905075073, |
|
"eval_runtime": 148.8021, |
|
"eval_samples_per_second": 38.01, |
|
"eval_steps_per_second": 4.751, |
|
"eval_wer": 0.39692831121310845, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.7858854964831624, |
|
"grad_norm": 5.262033462524414, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"loss": 0.3095, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.7858854964831624, |
|
"eval_loss": 0.4908364713191986, |
|
"eval_runtime": 148.1385, |
|
"eval_samples_per_second": 38.18, |
|
"eval_steps_per_second": 4.773, |
|
"eval_wer": 0.39638266116737014, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.7858854964831624, |
|
"step": 20000, |
|
"total_flos": 2.4863424513490096e+19, |
|
"train_loss": 0.5819183097839356, |
|
"train_runtime": 23325.1985, |
|
"train_samples_per_second": 6.86, |
|
"train_steps_per_second": 0.857 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 20000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.4863424513490096e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|