{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7858854964831624, "eval_steps": 200, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007858854964831625, "eval_loss": 3.1856138706207275, "eval_runtime": 145.9906, "eval_samples_per_second": 38.742, "eval_steps_per_second": 4.843, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.01571770992966325, "eval_loss": 2.649242877960205, "eval_runtime": 145.4142, "eval_samples_per_second": 38.896, "eval_steps_per_second": 4.862, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.01964713741207906, "grad_norm": 2.1910934448242188, "learning_rate": 0.0002982, "loss": 4.6997, "step": 500 }, { "epoch": 0.023576564894494872, "eval_loss": 1.3868569135665894, "eval_runtime": 144.4791, "eval_samples_per_second": 39.148, "eval_steps_per_second": 4.893, "eval_wer": 0.8722215981126927, "step": 600 }, { "epoch": 0.0314354198593265, "eval_loss": 1.230230689048767, "eval_runtime": 145.2382, "eval_samples_per_second": 38.943, "eval_steps_per_second": 4.868, "eval_wer": 0.830768243167338, "step": 800 }, { "epoch": 0.03929427482415812, "grad_norm": 4.400168418884277, "learning_rate": 0.0002923538461538461, "loss": 1.0569, "step": 1000 }, { "epoch": 0.03929427482415812, "eval_loss": 1.1379698514938354, "eval_runtime": 146.1828, "eval_samples_per_second": 38.691, "eval_steps_per_second": 4.836, "eval_wer": 0.795846640240086, "step": 1000 }, { "epoch": 0.047153129788989744, "eval_loss": 1.066832184791565, "eval_runtime": 145.7914, "eval_samples_per_second": 38.795, "eval_steps_per_second": 4.849, "eval_wer": 0.7697998748214601, "step": 1200 }, { "epoch": 0.055011984753821366, "eval_loss": 1.0207505226135254, "eval_runtime": 146.2293, "eval_samples_per_second": 38.679, "eval_steps_per_second": 4.835, "eval_wer": 0.7310426730432829, "step": 1400 }, { "epoch": 0.05894141223623718, "grad_norm": 3.929137945175171, "learning_rate": 0.00028466153846153845, "loss": 0.8131, "step": 1500 }, { "epoch": 0.062870839718653, "eval_loss": 0.970230758190155, "eval_runtime": 145.9066, "eval_samples_per_second": 38.765, "eval_steps_per_second": 4.846, "eval_wer": 0.7151385790630868, "step": 1600 }, { "epoch": 0.07072969468348461, "eval_loss": 0.9408352375030518, "eval_runtime": 146.5767, "eval_samples_per_second": 38.587, "eval_steps_per_second": 4.823, "eval_wer": 0.6882091444528253, "step": 1800 }, { "epoch": 0.07858854964831624, "grad_norm": 2.3885908126831055, "learning_rate": 0.0002769692307692307, "loss": 0.7194, "step": 2000 }, { "epoch": 0.07858854964831624, "eval_loss": 0.9249575138092041, "eval_runtime": 145.9151, "eval_samples_per_second": 38.762, "eval_steps_per_second": 4.845, "eval_wer": 0.6804095585049189, "step": 2000 }, { "epoch": 0.08644740461314787, "eval_loss": 0.9052397608757019, "eval_runtime": 146.242, "eval_samples_per_second": 38.676, "eval_steps_per_second": 4.834, "eval_wer": 0.6726099725570124, "step": 2200 }, { "epoch": 0.09430625957797949, "eval_loss": 0.8985734581947327, "eval_runtime": 146.806, "eval_samples_per_second": 38.527, "eval_steps_per_second": 4.816, "eval_wer": 0.6573478198070967, "step": 2400 }, { "epoch": 0.0982356870603953, "grad_norm": 1.7855921983718872, "learning_rate": 0.00026927692307692305, "loss": 0.6688, "step": 2500 }, { "epoch": 0.10216511454281112, "eval_loss": 0.8814770579338074, "eval_runtime": 147.0906, "eval_samples_per_second": 38.452, "eval_steps_per_second": 4.807, "eval_wer": 0.6473495851454799, "step": 2600 }, { "epoch": 0.11002396950764273, "eval_loss": 0.858833372592926, "eval_runtime": 146.5037, "eval_samples_per_second": 38.607, "eval_steps_per_second": 4.826, "eval_wer": 0.6444608496092182, "step": 2800 }, { "epoch": 0.11788282447247436, "grad_norm": 2.42130184173584, "learning_rate": 0.00026158461538461537, "loss": 0.645, "step": 3000 }, { "epoch": 0.11788282447247436, "eval_loss": 0.875824511051178, "eval_runtime": 147.7065, "eval_samples_per_second": 38.292, "eval_steps_per_second": 4.787, "eval_wer": 0.6487458073213397, "step": 3000 }, { "epoch": 0.125741679437306, "eval_loss": 0.8724836707115173, "eval_runtime": 146.8488, "eval_samples_per_second": 38.516, "eval_steps_per_second": 4.814, "eval_wer": 0.6690953443212274, "step": 3200 }, { "epoch": 0.13360053440213762, "eval_loss": 0.8295639157295227, "eval_runtime": 147.6357, "eval_samples_per_second": 38.311, "eval_steps_per_second": 4.789, "eval_wer": 0.6298085410280689, "step": 3400 }, { "epoch": 0.1375299618845534, "grad_norm": 3.927525520324707, "learning_rate": 0.0002538923076923077, "loss": 0.6077, "step": 3500 }, { "epoch": 0.14145938936696922, "eval_loss": 0.8355618715286255, "eval_runtime": 147.0713, "eval_samples_per_second": 38.458, "eval_steps_per_second": 4.807, "eval_wer": 0.6551652196241434, "step": 3600 }, { "epoch": 0.14931824433180085, "eval_loss": 0.8262892961502075, "eval_runtime": 147.1539, "eval_samples_per_second": 38.436, "eval_steps_per_second": 4.804, "eval_wer": 0.6228595272102839, "step": 3800 }, { "epoch": 0.15717709929663248, "grad_norm": 2.224973201751709, "learning_rate": 0.00024619999999999997, "loss": 0.5983, "step": 4000 }, { "epoch": 0.15717709929663248, "eval_loss": 0.8710989356040955, "eval_runtime": 147.2188, "eval_samples_per_second": 38.419, "eval_steps_per_second": 4.802, "eval_wer": 0.6884659209449375, "step": 4000 }, { "epoch": 0.1650359542614641, "eval_loss": 0.7836620807647705, "eval_runtime": 148.3617, "eval_samples_per_second": 38.123, "eval_steps_per_second": 4.765, "eval_wer": 0.5918216687262281, "step": 4200 }, { "epoch": 0.17289480922629574, "eval_loss": 0.8097087144851685, "eval_runtime": 147.4155, "eval_samples_per_second": 38.368, "eval_steps_per_second": 4.796, "eval_wer": 0.659755099420648, "step": 4400 }, { "epoch": 0.17682423670871153, "grad_norm": 1.8362923860549927, "learning_rate": 0.0002385230769230769, "loss": 0.5788, "step": 4500 }, { "epoch": 0.18075366419112734, "eval_loss": 0.77768874168396, "eval_runtime": 146.8579, "eval_samples_per_second": 38.513, "eval_steps_per_second": 4.814, "eval_wer": 0.5869268668453403, "step": 4600 }, { "epoch": 0.18861251915595897, "eval_loss": 0.7912825345993042, "eval_runtime": 146.176, "eval_samples_per_second": 38.693, "eval_steps_per_second": 4.837, "eval_wer": 0.5895588258894898, "step": 4800 }, { "epoch": 0.1964713741207906, "grad_norm": 3.315845489501953, "learning_rate": 0.0002308307692307692, "loss": 0.5501, "step": 5000 }, { "epoch": 0.1964713741207906, "eval_loss": 0.7924312353134155, "eval_runtime": 146.7719, "eval_samples_per_second": 38.536, "eval_steps_per_second": 4.817, "eval_wer": 0.5899760876891721, "step": 5000 }, { "epoch": 0.20433022908562223, "eval_loss": 0.7602530717849731, "eval_runtime": 146.9845, "eval_samples_per_second": 38.48, "eval_steps_per_second": 4.81, "eval_wer": 0.5737189260323218, "step": 5200 }, { "epoch": 0.21218908405045384, "eval_loss": 0.7750186920166016, "eval_runtime": 146.5887, "eval_samples_per_second": 38.584, "eval_steps_per_second": 4.823, "eval_wer": 0.5931697453098169, "step": 5400 }, { "epoch": 0.21611851153286965, "grad_norm": 9.320504188537598, "learning_rate": 0.00022313846153846153, "loss": 0.5694, "step": 5500 }, { "epoch": 0.22004793901528547, "eval_loss": 0.7516711950302124, "eval_runtime": 146.9947, "eval_samples_per_second": 38.478, "eval_steps_per_second": 4.81, "eval_wer": 0.5711190640496863, "step": 5600 }, { "epoch": 0.2279067939801171, "eval_loss": 0.7651358842849731, "eval_runtime": 146.6177, "eval_samples_per_second": 38.577, "eval_steps_per_second": 4.822, "eval_wer": 0.5698191330583685, "step": 5800 }, { "epoch": 0.23576564894494872, "grad_norm": 2.727358102798462, "learning_rate": 0.00021544615384615383, "loss": 0.5424, "step": 6000 }, { "epoch": 0.23576564894494872, "eval_loss": 0.7547870874404907, "eval_runtime": 146.6389, "eval_samples_per_second": 38.571, "eval_steps_per_second": 4.821, "eval_wer": 0.5820481134952095, "step": 6000 }, { "epoch": 0.24362450390978035, "eval_loss": 0.730515718460083, "eval_runtime": 146.763, "eval_samples_per_second": 38.538, "eval_steps_per_second": 4.817, "eval_wer": 0.5681019402673685, "step": 6200 }, { "epoch": 0.251483358874612, "eval_loss": 0.7314247488975525, "eval_runtime": 147.0063, "eval_samples_per_second": 38.475, "eval_steps_per_second": 4.809, "eval_wer": 0.5589221806743593, "step": 6400 }, { "epoch": 0.2554127863570278, "grad_norm": 3.2329583168029785, "learning_rate": 0.00020775384615384613, "loss": 0.521, "step": 6500 }, { "epoch": 0.2593422138394436, "eval_loss": 0.7227704524993896, "eval_runtime": 147.7519, "eval_samples_per_second": 38.28, "eval_steps_per_second": 4.785, "eval_wer": 0.565437884161705, "step": 6600 }, { "epoch": 0.26720106880427524, "eval_loss": 0.7350090146064758, "eval_runtime": 147.7712, "eval_samples_per_second": 38.275, "eval_steps_per_second": 4.784, "eval_wer": 0.5633194781017797, "step": 6800 }, { "epoch": 0.2750599237691068, "grad_norm": 3.2193281650543213, "learning_rate": 0.00020006153846153843, "loss": 0.5119, "step": 7000 }, { "epoch": 0.2750599237691068, "eval_loss": 0.7079117298126221, "eval_runtime": 146.6317, "eval_samples_per_second": 38.573, "eval_steps_per_second": 4.822, "eval_wer": 0.5346888992312754, "step": 7000 }, { "epoch": 0.28291877873393845, "eval_loss": 0.7105109691619873, "eval_runtime": 147.7789, "eval_samples_per_second": 38.273, "eval_steps_per_second": 4.784, "eval_wer": 0.5601097719503779, "step": 7200 }, { "epoch": 0.2907776336987701, "eval_loss": 0.6876121163368225, "eval_runtime": 147.4709, "eval_samples_per_second": 38.353, "eval_steps_per_second": 4.794, "eval_wer": 0.5378344112596491, "step": 7400 }, { "epoch": 0.2947070611811859, "grad_norm": 2.7452991008758545, "learning_rate": 0.00019236923076923075, "loss": 0.5007, "step": 7500 }, { "epoch": 0.2986364886636017, "eval_loss": 0.6834765076637268, "eval_runtime": 147.74, "eval_samples_per_second": 38.283, "eval_steps_per_second": 4.785, "eval_wer": 0.5303397473961259, "step": 7600 }, { "epoch": 0.30649534362843334, "eval_loss": 0.7131712436676025, "eval_runtime": 147.6824, "eval_samples_per_second": 38.298, "eval_steps_per_second": 4.787, "eval_wer": 0.5350740639694436, "step": 7800 }, { "epoch": 0.31435419859326497, "grad_norm": 2.4165494441986084, "learning_rate": 0.00018467692307692308, "loss": 0.4934, "step": 8000 }, { "epoch": 0.31435419859326497, "eval_loss": 0.697209358215332, "eval_runtime": 146.9527, "eval_samples_per_second": 38.489, "eval_steps_per_second": 4.811, "eval_wer": 0.5241771115854343, "step": 8000 }, { "epoch": 0.3222130535580966, "eval_loss": 0.680029571056366, "eval_runtime": 147.261, "eval_samples_per_second": 38.408, "eval_steps_per_second": 4.801, "eval_wer": 0.5226685496942755, "step": 8200 }, { "epoch": 0.3300719085229282, "eval_loss": 0.6915732026100159, "eval_runtime": 146.7546, "eval_samples_per_second": 38.541, "eval_steps_per_second": 4.818, "eval_wer": 0.5364702861453033, "step": 8400 }, { "epoch": 0.33400133600534404, "grad_norm": 2.036782741546631, "learning_rate": 0.00017698461538461537, "loss": 0.4762, "step": 8500 }, { "epoch": 0.33793076348775986, "eval_loss": 0.6801823377609253, "eval_runtime": 147.6195, "eval_samples_per_second": 38.315, "eval_steps_per_second": 4.789, "eval_wer": 0.5255251881690232, "step": 8600 }, { "epoch": 0.3457896184525915, "eval_loss": 0.6977699398994446, "eval_runtime": 148.3269, "eval_samples_per_second": 38.132, "eval_steps_per_second": 4.766, "eval_wer": 0.5336938903243408, "step": 8800 }, { "epoch": 0.35364847341742306, "grad_norm": 3.6703684329986572, "learning_rate": 0.0001693076923076923, "loss": 0.4774, "step": 9000 }, { "epoch": 0.35364847341742306, "eval_loss": 0.6566863059997559, "eval_runtime": 147.4913, "eval_samples_per_second": 38.348, "eval_steps_per_second": 4.794, "eval_wer": 0.5210957936800886, "step": 9000 }, { "epoch": 0.3615073283822547, "eval_loss": 0.6478887796401978, "eval_runtime": 146.7975, "eval_samples_per_second": 38.529, "eval_steps_per_second": 4.816, "eval_wer": 0.5152380799537802, "step": 9200 }, { "epoch": 0.3693661833470863, "eval_loss": 0.6551229953765869, "eval_runtime": 147.2799, "eval_samples_per_second": 38.403, "eval_steps_per_second": 4.8, "eval_wer": 0.5147405755003129, "step": 9400 }, { "epoch": 0.37329561082950213, "grad_norm": 2.4989895820617676, "learning_rate": 0.00016161538461538462, "loss": 0.4632, "step": 9500 }, { "epoch": 0.37722503831191795, "eval_loss": 0.6358110308647156, "eval_runtime": 148.0942, "eval_samples_per_second": 38.192, "eval_steps_per_second": 4.774, "eval_wer": 0.4954502415303879, "step": 9600 }, { "epoch": 0.3850838932767496, "eval_loss": 0.6466320157051086, "eval_runtime": 147.5131, "eval_samples_per_second": 38.342, "eval_steps_per_second": 4.793, "eval_wer": 0.5109049766493877, "step": 9800 }, { "epoch": 0.3929427482415812, "grad_norm": 1.681718349456787, "learning_rate": 0.00015392307692307691, "loss": 0.4483, "step": 10000 }, { "epoch": 0.3929427482415812, "eval_loss": 0.6306164264678955, "eval_runtime": 147.9882, "eval_samples_per_second": 38.219, "eval_steps_per_second": 4.777, "eval_wer": 0.504421370223556, "step": 10000 }, { "epoch": 0.40080160320641284, "eval_loss": 0.6359797716140747, "eval_runtime": 147.568, "eval_samples_per_second": 38.328, "eval_steps_per_second": 4.791, "eval_wer": 0.5003771404727897, "step": 10200 }, { "epoch": 0.40866045817124447, "eval_loss": 0.6301611661911011, "eval_runtime": 146.6632, "eval_samples_per_second": 38.565, "eval_steps_per_second": 4.821, "eval_wer": 0.49135786618735056, "step": 10400 }, { "epoch": 0.4125898856536603, "grad_norm": 2.8097307682037354, "learning_rate": 0.0001462307692307692, "loss": 0.4454, "step": 10500 }, { "epoch": 0.4165193131360761, "eval_loss": 0.616253674030304, "eval_runtime": 147.6301, "eval_samples_per_second": 38.312, "eval_steps_per_second": 4.789, "eval_wer": 0.48505079359984593, "step": 10600 }, { "epoch": 0.42437816810090767, "eval_loss": 0.6221349239349365, "eval_runtime": 147.7897, "eval_samples_per_second": 38.271, "eval_steps_per_second": 4.784, "eval_wer": 0.49105294410296735, "step": 10800 }, { "epoch": 0.4322370230657393, "grad_norm": 4.2350687980651855, "learning_rate": 0.0001385384615384615, "loss": 0.4302, "step": 11000 }, { "epoch": 0.4322370230657393, "eval_loss": 0.6395624279975891, "eval_runtime": 147.7882, "eval_samples_per_second": 38.271, "eval_steps_per_second": 4.784, "eval_wer": 0.5000561698576496, "step": 11000 }, { "epoch": 0.44009587803057093, "eval_loss": 0.6212363839149475, "eval_runtime": 148.3281, "eval_samples_per_second": 38.132, "eval_steps_per_second": 4.766, "eval_wer": 0.4840557846929114, "step": 11200 }, { "epoch": 0.44795473299540256, "eval_loss": 0.6267797946929932, "eval_runtime": 147.5749, "eval_samples_per_second": 38.326, "eval_steps_per_second": 4.791, "eval_wer": 0.49379724286241594, "step": 11400 }, { "epoch": 0.4518841604778184, "grad_norm": 2.2473807334899902, "learning_rate": 0.00013086153846153845, "loss": 0.4261, "step": 11500 }, { "epoch": 0.4558135879602342, "eval_loss": 0.6097697019577026, "eval_runtime": 148.2001, "eval_samples_per_second": 38.165, "eval_steps_per_second": 4.771, "eval_wer": 0.48201762128677117, "step": 11600 }, { "epoch": 0.4636724429250658, "eval_loss": 0.6009463667869568, "eval_runtime": 147.4302, "eval_samples_per_second": 38.364, "eval_steps_per_second": 4.795, "eval_wer": 0.4689220201890517, "step": 11800 }, { "epoch": 0.47153129788989745, "grad_norm": 2.0571179389953613, "learning_rate": 0.00012316923076923078, "loss": 0.4026, "step": 12000 }, { "epoch": 0.47153129788989745, "eval_loss": 0.609122633934021, "eval_runtime": 147.8463, "eval_samples_per_second": 38.256, "eval_steps_per_second": 4.782, "eval_wer": 0.4809584182568086, "step": 12000 }, { "epoch": 0.4793901528547291, "eval_loss": 0.6019255518913269, "eval_runtime": 148.5912, "eval_samples_per_second": 38.064, "eval_steps_per_second": 4.758, "eval_wer": 0.4805732535186404, "step": 12200 }, { "epoch": 0.4872490078195607, "eval_loss": 0.5946715474128723, "eval_runtime": 147.4021, "eval_samples_per_second": 38.371, "eval_steps_per_second": 4.796, "eval_wer": 0.4671085362135097, "step": 12400 }, { "epoch": 0.4911784353019765, "grad_norm": 2.5033822059631348, "learning_rate": 0.00011547692307692306, "loss": 0.4027, "step": 12500 }, { "epoch": 0.49510786278439234, "eval_loss": 0.5993836522102356, "eval_runtime": 147.7878, "eval_samples_per_second": 38.271, "eval_steps_per_second": 4.784, "eval_wer": 0.47092808653367785, "step": 12600 }, { "epoch": 0.502966717749224, "eval_loss": 0.5981957912445068, "eval_runtime": 147.5137, "eval_samples_per_second": 38.342, "eval_steps_per_second": 4.793, "eval_wer": 0.4760796649066778, "step": 12800 }, { "epoch": 0.5108255727140556, "grad_norm": 3.0013859272003174, "learning_rate": 0.00010778461538461537, "loss": 0.3978, "step": 13000 }, { "epoch": 0.5108255727140556, "eval_loss": 0.5889731645584106, "eval_runtime": 148.3841, "eval_samples_per_second": 38.117, "eval_steps_per_second": 4.765, "eval_wer": 0.4632087432395564, "step": 13000 }, { "epoch": 0.5186844276788872, "eval_loss": 0.5871375799179077, "eval_runtime": 147.6289, "eval_samples_per_second": 38.312, "eval_steps_per_second": 4.789, "eval_wer": 0.4567090882829677, "step": 13200 }, { "epoch": 0.5265432826437189, "eval_loss": 0.5873442888259888, "eval_runtime": 148.2022, "eval_samples_per_second": 38.164, "eval_steps_per_second": 4.771, "eval_wer": 0.4634815682624256, "step": 13400 }, { "epoch": 0.5304727101261346, "grad_norm": 3.4298863410949707, "learning_rate": 0.00010009230769230768, "loss": 0.3875, "step": 13500 }, { "epoch": 0.5344021376085505, "eval_loss": 0.5772218704223633, "eval_runtime": 148.5655, "eval_samples_per_second": 38.071, "eval_steps_per_second": 4.759, "eval_wer": 0.4538684983389771, "step": 13600 }, { "epoch": 0.542260992573382, "eval_loss": 0.5603720545768738, "eval_runtime": 147.4995, "eval_samples_per_second": 38.346, "eval_steps_per_second": 4.793, "eval_wer": 0.4419123429250052, "step": 13800 }, { "epoch": 0.5501198475382136, "grad_norm": 2.6594979763031006, "learning_rate": 9.24e-05, "loss": 0.404, "step": 14000 }, { "epoch": 0.5501198475382136, "eval_loss": 0.5688683986663818, "eval_runtime": 147.0269, "eval_samples_per_second": 38.469, "eval_steps_per_second": 4.809, "eval_wer": 0.4454269711607902, "step": 14000 }, { "epoch": 0.5579787025030453, "eval_loss": 0.5594531893730164, "eval_runtime": 147.2136, "eval_samples_per_second": 38.42, "eval_steps_per_second": 4.803, "eval_wer": 0.443292516570108, "step": 14200 }, { "epoch": 0.5658375574678769, "eval_loss": 0.5574955940246582, "eval_runtime": 147.8377, "eval_samples_per_second": 38.258, "eval_steps_per_second": 4.782, "eval_wer": 0.4405803148721735, "step": 14400 }, { "epoch": 0.5697669849502928, "grad_norm": 1.834619402885437, "learning_rate": 8.472307692307691e-05, "loss": 0.3878, "step": 14500 }, { "epoch": 0.5736964124327085, "eval_loss": 0.5521669983863831, "eval_runtime": 147.831, "eval_samples_per_second": 38.26, "eval_steps_per_second": 4.782, "eval_wer": 0.43528429972236043, "step": 14600 }, { "epoch": 0.5815552673975402, "eval_loss": 0.5521777272224426, "eval_runtime": 148.1309, "eval_samples_per_second": 38.182, "eval_steps_per_second": 4.773, "eval_wer": 0.4351719600070614, "step": 14800 }, { "epoch": 0.5894141223623718, "grad_norm": 2.739065408706665, "learning_rate": 7.703076923076922e-05, "loss": 0.3622, "step": 15000 }, { "epoch": 0.5894141223623718, "eval_loss": 0.5570353865623474, "eval_runtime": 147.6686, "eval_samples_per_second": 38.302, "eval_steps_per_second": 4.788, "eval_wer": 0.4401309560109772, "step": 15000 }, { "epoch": 0.5972729773272034, "eval_loss": 0.5467315912246704, "eval_runtime": 147.8113, "eval_samples_per_second": 38.265, "eval_steps_per_second": 4.783, "eval_wer": 0.42803036382019227, "step": 15200 }, { "epoch": 0.605131832292035, "eval_loss": 0.5510929822921753, "eval_runtime": 148.0412, "eval_samples_per_second": 38.206, "eval_steps_per_second": 4.776, "eval_wer": 0.43404856285407073, "step": 15400 }, { "epoch": 0.6090612597744509, "grad_norm": 3.7551369667053223, "learning_rate": 6.933846153846154e-05, "loss": 0.3545, "step": 15500 }, { "epoch": 0.6129906872568667, "eval_loss": 0.5437116026878357, "eval_runtime": 147.4367, "eval_samples_per_second": 38.362, "eval_steps_per_second": 4.795, "eval_wer": 0.4245478326459213, "step": 15600 }, { "epoch": 0.6208495422216983, "eval_loss": 0.5488719940185547, "eval_runtime": 147.4687, "eval_samples_per_second": 38.354, "eval_steps_per_second": 4.794, "eval_wer": 0.4296352168958932, "step": 15800 }, { "epoch": 0.6287083971865299, "grad_norm": 1.4097563028335571, "learning_rate": 6.164615384615383e-05, "loss": 0.3486, "step": 16000 }, { "epoch": 0.6287083971865299, "eval_loss": 0.5420017242431641, "eval_runtime": 147.3988, "eval_samples_per_second": 38.372, "eval_steps_per_second": 4.797, "eval_wer": 0.4277896358588371, "step": 16000 }, { "epoch": 0.6365672521513616, "eval_loss": 0.5352106094360352, "eval_runtime": 148.3938, "eval_samples_per_second": 38.115, "eval_steps_per_second": 4.764, "eval_wer": 0.4212739323714914, "step": 16200 }, { "epoch": 0.6444261071161932, "eval_loss": 0.5376533269882202, "eval_runtime": 147.9443, "eval_samples_per_second": 38.231, "eval_steps_per_second": 4.779, "eval_wer": 0.42592800629102406, "step": 16400 }, { "epoch": 0.648355534598609, "grad_norm": 2.8873980045318604, "learning_rate": 5.395384615384615e-05, "loss": 0.3374, "step": 16500 }, { "epoch": 0.6522849620810248, "eval_loss": 0.5336429476737976, "eval_runtime": 149.7227, "eval_samples_per_second": 37.776, "eval_steps_per_second": 4.722, "eval_wer": 0.4305018375567717, "step": 16600 }, { "epoch": 0.6601438170458565, "eval_loss": 0.5293694734573364, "eval_runtime": 148.8049, "eval_samples_per_second": 38.01, "eval_steps_per_second": 4.751, "eval_wer": 0.41875431304264094, "step": 16800 }, { "epoch": 0.6680026720106881, "grad_norm": 3.3759422302246094, "learning_rate": 4.6276923076923074e-05, "loss": 0.3389, "step": 17000 }, { "epoch": 0.6680026720106881, "eval_loss": 0.5252575278282166, "eval_runtime": 149.4186, "eval_samples_per_second": 37.853, "eval_steps_per_second": 4.732, "eval_wer": 0.4169408290670989, "step": 17000 }, { "epoch": 0.6758615269755197, "eval_loss": 0.5194066166877747, "eval_runtime": 147.8703, "eval_samples_per_second": 38.25, "eval_steps_per_second": 4.781, "eval_wer": 0.41443725826900546, "step": 17200 }, { "epoch": 0.6837203819403513, "eval_loss": 0.5231760740280151, "eval_runtime": 147.3205, "eval_samples_per_second": 38.392, "eval_steps_per_second": 4.799, "eval_wer": 0.417101314374669, "step": 17400 }, { "epoch": 0.6876498094227671, "grad_norm": 3.4031200408935547, "learning_rate": 3.858461538461538e-05, "loss": 0.3258, "step": 17500 }, { "epoch": 0.691579236905183, "eval_loss": 0.5179495811462402, "eval_runtime": 148.0293, "eval_samples_per_second": 38.209, "eval_steps_per_second": 4.776, "eval_wer": 0.41653961579817367, "step": 17600 }, { "epoch": 0.6994380918700145, "eval_loss": 0.5132375359535217, "eval_runtime": 149.1897, "eval_samples_per_second": 37.911, "eval_steps_per_second": 4.739, "eval_wer": 0.4103769799874821, "step": 17800 }, { "epoch": 0.7072969468348461, "grad_norm": 4.0969133377075195, "learning_rate": 3.090769230769231e-05, "loss": 0.327, "step": 18000 }, { "epoch": 0.7072969468348461, "eval_loss": 0.5096033215522766, "eval_runtime": 148.384, "eval_samples_per_second": 38.117, "eval_steps_per_second": 4.765, "eval_wer": 0.40440692654587473, "step": 18000 }, { "epoch": 0.7151558017996777, "eval_loss": 0.5040988922119141, "eval_runtime": 148.3441, "eval_samples_per_second": 38.128, "eval_steps_per_second": 4.766, "eval_wer": 0.40337982057742616, "step": 18200 }, { "epoch": 0.7230146567645094, "eval_loss": 0.5013387203216553, "eval_runtime": 147.8316, "eval_samples_per_second": 38.26, "eval_steps_per_second": 4.782, "eval_wer": 0.3980677568968561, "step": 18400 }, { "epoch": 0.7269440842469252, "grad_norm": 2.58701491355896, "learning_rate": 2.3215384615384613e-05, "loss": 0.316, "step": 18500 }, { "epoch": 0.730873511729341, "eval_loss": 0.5074244737625122, "eval_runtime": 148.936, "eval_samples_per_second": 37.976, "eval_steps_per_second": 4.747, "eval_wer": 0.4064611384827719, "step": 18600 }, { "epoch": 0.7387323666941726, "eval_loss": 0.5014389753341675, "eval_runtime": 148.0107, "eval_samples_per_second": 38.213, "eval_steps_per_second": 4.777, "eval_wer": 0.40545008104508035, "step": 18800 }, { "epoch": 0.7465912216590043, "grad_norm": 1.975710391998291, "learning_rate": 1.552307692307692e-05, "loss": 0.3162, "step": 19000 }, { "epoch": 0.7465912216590043, "eval_loss": 0.4958648383617401, "eval_runtime": 148.7492, "eval_samples_per_second": 38.024, "eval_steps_per_second": 4.753, "eval_wer": 0.3998170467493701, "step": 19000 }, { "epoch": 0.7544500766238359, "eval_loss": 0.4930271506309509, "eval_runtime": 147.4379, "eval_samples_per_second": 38.362, "eval_steps_per_second": 4.795, "eval_wer": 0.39818009661215514, "step": 19200 }, { "epoch": 0.7623089315886675, "eval_loss": 0.4924609363079071, "eval_runtime": 149.2355, "eval_samples_per_second": 37.9, "eval_steps_per_second": 4.737, "eval_wer": 0.39821219367366917, "step": 19400 }, { "epoch": 0.7662383590710834, "grad_norm": 2.332016944885254, "learning_rate": 7.83076923076923e-06, "loss": 0.3145, "step": 19500 }, { "epoch": 0.7701677865534992, "eval_loss": 0.4922255575656891, "eval_runtime": 148.1982, "eval_samples_per_second": 38.165, "eval_steps_per_second": 4.771, "eval_wer": 0.3970246023976505, "step": 19600 }, { "epoch": 0.7780266415183308, "eval_loss": 0.49084585905075073, "eval_runtime": 148.8021, "eval_samples_per_second": 38.01, "eval_steps_per_second": 4.751, "eval_wer": 0.39692831121310845, "step": 19800 }, { "epoch": 0.7858854964831624, "grad_norm": 5.262033462524414, "learning_rate": 1.5384615384615385e-07, "loss": 0.3095, "step": 20000 }, { "epoch": 0.7858854964831624, "eval_loss": 0.4908364713191986, "eval_runtime": 148.1385, "eval_samples_per_second": 38.18, "eval_steps_per_second": 4.773, "eval_wer": 0.39638266116737014, "step": 20000 }, { "epoch": 0.7858854964831624, "step": 20000, "total_flos": 2.4863424513490096e+19, "train_loss": 0.5819183097839356, "train_runtime": 23325.1985, "train_samples_per_second": 6.86, "train_steps_per_second": 0.857 } ], "logging_steps": 500, "max_steps": 20000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.4863424513490096e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }