{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9647137412079059, "eval_steps": 200, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007858854964831625, "eval_loss": 3.175461769104004, "eval_runtime": 144.4533, "eval_samples_per_second": 39.155, "eval_steps_per_second": 4.894, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.01571770992966325, "eval_loss": 2.8796634674072266, "eval_runtime": 143.0854, "eval_samples_per_second": 39.529, "eval_steps_per_second": 4.941, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.01964713741207906, "grad_norm": 1.9831087589263916, "learning_rate": 0.00029759999999999997, "loss": 4.8076, "step": 500 }, { "epoch": 0.023576564894494872, "eval_loss": 1.4753953218460083, "eval_runtime": 143.2096, "eval_samples_per_second": 39.495, "eval_steps_per_second": 4.937, "eval_wer": 0.903997689011571, "step": 600 }, { "epoch": 0.0314354198593265, "eval_loss": 1.25261652469635, "eval_runtime": 144.6031, "eval_samples_per_second": 39.114, "eval_steps_per_second": 4.889, "eval_wer": 0.8548410393028518, "step": 800 }, { "epoch": 0.03929427482415812, "grad_norm": 2.427387237548828, "learning_rate": 0.0002969939393939394, "loss": 1.1153, "step": 1000 }, { "epoch": 0.03929427482415812, "eval_loss": 1.1311910152435303, "eval_runtime": 144.1542, "eval_samples_per_second": 39.236, "eval_steps_per_second": 4.904, "eval_wer": 0.788769238176245, "step": 1000 }, { "epoch": 0.047153129788989744, "eval_loss": 1.0895923376083374, "eval_runtime": 144.7009, "eval_samples_per_second": 39.088, "eval_steps_per_second": 4.886, "eval_wer": 0.7734749883648152, "step": 1200 }, { "epoch": 0.055011984753821366, "eval_loss": 1.0287705659866333, "eval_runtime": 143.6225, "eval_samples_per_second": 39.381, "eval_steps_per_second": 4.923, "eval_wer": 0.7571054869926658, "step": 1400 }, { "epoch": 0.05894141223623718, "grad_norm": 2.3919336795806885, "learning_rate": 0.0002939636363636363, "loss": 0.8282, "step": 1500 }, { "epoch": 0.062870839718653, "eval_loss": 0.9747628569602966, "eval_runtime": 144.5139, "eval_samples_per_second": 39.138, "eval_steps_per_second": 4.892, "eval_wer": 0.7254096387475727, "step": 1600 }, { "epoch": 0.07072969468348461, "eval_loss": 0.9748485088348389, "eval_runtime": 144.3418, "eval_samples_per_second": 39.185, "eval_steps_per_second": 4.898, "eval_wer": 0.7194556338367223, "step": 1800 }, { "epoch": 0.07858854964831624, "grad_norm": 2.169008255004883, "learning_rate": 0.0002909333333333333, "loss": 0.7335, "step": 2000 }, { "epoch": 0.07858854964831624, "eval_loss": 0.9882574081420898, "eval_runtime": 145.192, "eval_samples_per_second": 38.955, "eval_steps_per_second": 4.869, "eval_wer": 0.7143682495867504, "step": 2000 }, { "epoch": 0.08644740461314787, "eval_loss": 0.9364911317825317, "eval_runtime": 145.4626, "eval_samples_per_second": 38.883, "eval_steps_per_second": 4.86, "eval_wer": 0.7061834989006757, "step": 2200 }, { "epoch": 0.09430625957797949, "eval_loss": 0.9164892435073853, "eval_runtime": 145.9321, "eval_samples_per_second": 38.758, "eval_steps_per_second": 4.845, "eval_wer": 0.6801688305435637, "step": 2400 }, { "epoch": 0.0982356870603953, "grad_norm": 5.276973247528076, "learning_rate": 0.00028790303030303027, "loss": 0.6931, "step": 2500 }, { "epoch": 0.10216511454281112, "eval_loss": 0.9169939756393433, "eval_runtime": 145.3478, "eval_samples_per_second": 38.914, "eval_steps_per_second": 4.864, "eval_wer": 0.6773603376610872, "step": 2600 }, { "epoch": 0.11002396950764273, "eval_loss": 0.9080427289009094, "eval_runtime": 144.7759, "eval_samples_per_second": 39.067, "eval_steps_per_second": 4.883, "eval_wer": 0.6692237325672835, "step": 2800 }, { "epoch": 0.11788282447247436, "grad_norm": 2.9965720176696777, "learning_rate": 0.00028487272727272726, "loss": 0.67, "step": 3000 }, { "epoch": 0.11788282447247436, "eval_loss": 0.8609287738800049, "eval_runtime": 145.381, "eval_samples_per_second": 38.905, "eval_steps_per_second": 4.863, "eval_wer": 0.6621784275649564, "step": 3000 }, { "epoch": 0.125741679437306, "eval_loss": 0.8863000273704529, "eval_runtime": 144.6247, "eval_samples_per_second": 39.108, "eval_steps_per_second": 4.889, "eval_wer": 0.6659177352313396, "step": 3200 }, { "epoch": 0.13360053440213762, "eval_loss": 0.8669990301132202, "eval_runtime": 145.3885, "eval_samples_per_second": 38.903, "eval_steps_per_second": 4.863, "eval_wer": 0.6610710789427228, "step": 3400 }, { "epoch": 0.1375299618845534, "grad_norm": 3.541180372238159, "learning_rate": 0.0002818424242424242, "loss": 0.6282, "step": 3500 }, { "epoch": 0.14145938936696922, "eval_loss": 0.8718289136886597, "eval_runtime": 147.388, "eval_samples_per_second": 38.375, "eval_steps_per_second": 4.797, "eval_wer": 0.6819983630498628, "step": 3600 }, { "epoch": 0.14931824433180085, "eval_loss": 0.861672580242157, "eval_runtime": 145.355, "eval_samples_per_second": 38.912, "eval_steps_per_second": 4.864, "eval_wer": 0.6481520116833304, "step": 3800 }, { "epoch": 0.15717709929663248, "grad_norm": 1.9885746240615845, "learning_rate": 0.0002788121212121212, "loss": 0.6311, "step": 4000 }, { "epoch": 0.15717709929663248, "eval_loss": 0.8504879474639893, "eval_runtime": 145.0997, "eval_samples_per_second": 38.98, "eval_steps_per_second": 4.873, "eval_wer": 0.6597230023591341, "step": 4000 }, { "epoch": 0.1650359542614641, "eval_loss": 0.8290337324142456, "eval_runtime": 144.8192, "eval_samples_per_second": 39.056, "eval_steps_per_second": 4.882, "eval_wer": 0.6292307939208166, "step": 4200 }, { "epoch": 0.17289480922629574, "eval_loss": 0.8300275206565857, "eval_runtime": 144.9963, "eval_samples_per_second": 39.008, "eval_steps_per_second": 4.876, "eval_wer": 0.6567540241690873, "step": 4400 }, { "epoch": 0.17682423670871153, "grad_norm": 3.603195905685425, "learning_rate": 0.0002757878787878788, "loss": 0.615, "step": 4500 }, { "epoch": 0.18075366419112734, "eval_loss": 0.8007863163948059, "eval_runtime": 144.795, "eval_samples_per_second": 39.062, "eval_steps_per_second": 4.883, "eval_wer": 0.610855226204041, "step": 4600 }, { "epoch": 0.18861251915595897, "eval_loss": 0.8038597702980042, "eval_runtime": 144.8128, "eval_samples_per_second": 39.057, "eval_steps_per_second": 4.882, "eval_wer": 0.6045160565550224, "step": 4800 }, { "epoch": 0.1964713741207906, "grad_norm": 3.389535665512085, "learning_rate": 0.0002727575757575757, "loss": 0.5785, "step": 5000 }, { "epoch": 0.1964713741207906, "eval_loss": 0.7907959818840027, "eval_runtime": 144.5449, "eval_samples_per_second": 39.13, "eval_steps_per_second": 4.891, "eval_wer": 0.6071801126606859, "step": 5000 }, { "epoch": 0.20433022908562223, "eval_loss": 0.7867733836174011, "eval_runtime": 144.7418, "eval_samples_per_second": 39.076, "eval_steps_per_second": 4.885, "eval_wer": 0.6037457270786859, "step": 5200 }, { "epoch": 0.21218908405045384, "eval_loss": 0.7709878087043762, "eval_runtime": 146.1138, "eval_samples_per_second": 38.71, "eval_steps_per_second": 4.839, "eval_wer": 0.5988348766670412, "step": 5400 }, { "epoch": 0.21611851153286965, "grad_norm": 2.476861000061035, "learning_rate": 0.00026972727272727266, "loss": 0.5928, "step": 5500 }, { "epoch": 0.22004793901528547, "eval_loss": 0.766153872013092, "eval_runtime": 144.8164, "eval_samples_per_second": 39.056, "eval_steps_per_second": 4.882, "eval_wer": 0.5747139349392563, "step": 5600 }, { "epoch": 0.2279067939801171, "eval_loss": 0.767308235168457, "eval_runtime": 145.7638, "eval_samples_per_second": 38.803, "eval_steps_per_second": 4.85, "eval_wer": 0.5945820160164337, "step": 5800 }, { "epoch": 0.23576564894494872, "grad_norm": 2.2588391304016113, "learning_rate": 0.00026669696969696966, "loss": 0.5799, "step": 6000 }, { "epoch": 0.23576564894494872, "eval_loss": 0.7804461121559143, "eval_runtime": 145.0414, "eval_samples_per_second": 38.996, "eval_steps_per_second": 4.874, "eval_wer": 0.5990114105053682, "step": 6000 }, { "epoch": 0.24362450390978035, "eval_loss": 0.7586621642112732, "eval_runtime": 145.864, "eval_samples_per_second": 38.776, "eval_steps_per_second": 4.847, "eval_wer": 0.5780520293367142, "step": 6200 }, { "epoch": 0.251483358874612, "eval_loss": 0.749543309211731, "eval_runtime": 145.8617, "eval_samples_per_second": 38.776, "eval_steps_per_second": 4.847, "eval_wer": 0.5728683539022003, "step": 6400 }, { "epoch": 0.2554127863570278, "grad_norm": 1.971763253211975, "learning_rate": 0.00026366666666666666, "loss": 0.5534, "step": 6500 }, { "epoch": 0.2593422138394436, "eval_loss": 0.7536802291870117, "eval_runtime": 147.4299, "eval_samples_per_second": 38.364, "eval_steps_per_second": 4.795, "eval_wer": 0.5768804865914525, "step": 6600 }, { "epoch": 0.26720106880427524, "eval_loss": 0.7661583423614502, "eval_runtime": 149.947, "eval_samples_per_second": 37.72, "eval_steps_per_second": 4.715, "eval_wer": 0.581245686957359, "step": 6800 }, { "epoch": 0.2750599237691068, "grad_norm": 2.5072972774505615, "learning_rate": 0.0002606363636363636, "loss": 0.5592, "step": 7000 }, { "epoch": 0.2750599237691068, "eval_loss": 0.7571460604667664, "eval_runtime": 145.6344, "eval_samples_per_second": 38.837, "eval_steps_per_second": 4.855, "eval_wer": 0.5607998587729294, "step": 7000 }, { "epoch": 0.28291877873393845, "eval_loss": 0.7475385665893555, "eval_runtime": 145.4869, "eval_samples_per_second": 38.876, "eval_steps_per_second": 4.86, "eval_wer": 0.5635120604708639, "step": 7200 }, { "epoch": 0.2907776336987701, "eval_loss": 0.7267230749130249, "eval_runtime": 145.614, "eval_samples_per_second": 38.842, "eval_steps_per_second": 4.855, "eval_wer": 0.5591950056972285, "step": 7400 }, { "epoch": 0.2947070611811859, "grad_norm": 3.4168338775634766, "learning_rate": 0.0002576060606060606, "loss": 0.5512, "step": 7500 }, { "epoch": 0.2986364886636017, "eval_loss": 0.7362108826637268, "eval_runtime": 145.7986, "eval_samples_per_second": 38.793, "eval_steps_per_second": 4.849, "eval_wer": 0.5588098409590602, "step": 7600 }, { "epoch": 0.30649534362843334, "eval_loss": 0.7624097466468811, "eval_runtime": 145.4148, "eval_samples_per_second": 38.896, "eval_steps_per_second": 4.862, "eval_wer": 0.581117298711303, "step": 7800 }, { "epoch": 0.31435419859326497, "grad_norm": 2.9330873489379883, "learning_rate": 0.00025457575757575755, "loss": 0.54, "step": 8000 }, { "epoch": 0.31435419859326497, "eval_loss": 0.7657227516174316, "eval_runtime": 146.1604, "eval_samples_per_second": 38.697, "eval_steps_per_second": 4.837, "eval_wer": 0.5622442265410602, "step": 8000 }, { "epoch": 0.3222130535580966, "eval_loss": 0.7300673127174377, "eval_runtime": 146.8709, "eval_samples_per_second": 38.51, "eval_steps_per_second": 4.814, "eval_wer": 0.5453611721846865, "step": 8200 }, { "epoch": 0.3300719085229282, "eval_loss": 0.7118472456932068, "eval_runtime": 146.4543, "eval_samples_per_second": 38.62, "eval_steps_per_second": 4.827, "eval_wer": 0.5381553818747894, "step": 8400 }, { "epoch": 0.33400133600534404, "grad_norm": 2.0070419311523438, "learning_rate": 0.00025154545454545454, "loss": 0.531, "step": 8500 }, { "epoch": 0.33793076348775986, "eval_loss": 0.7252832055091858, "eval_runtime": 145.6223, "eval_samples_per_second": 38.84, "eval_steps_per_second": 4.855, "eval_wer": 0.548153616536406, "step": 8600 }, { "epoch": 0.3457896184525915, "eval_loss": 0.7304599285125732, "eval_runtime": 145.89, "eval_samples_per_second": 38.769, "eval_steps_per_second": 4.846, "eval_wer": 0.5582962879748359, "step": 8800 }, { "epoch": 0.35364847341742306, "grad_norm": 2.5275588035583496, "learning_rate": 0.00024852121212121206, "loss": 0.5406, "step": 9000 }, { "epoch": 0.35364847341742306, "eval_loss": 0.7097567915916443, "eval_runtime": 145.7013, "eval_samples_per_second": 38.819, "eval_steps_per_second": 4.852, "eval_wer": 0.5520213124488453, "step": 9000 }, { "epoch": 0.3615073283822547, "eval_loss": 0.698684573173523, "eval_runtime": 146.3052, "eval_samples_per_second": 38.659, "eval_steps_per_second": 4.832, "eval_wer": 0.5372245670908828, "step": 9200 }, { "epoch": 0.3693661833470863, "eval_loss": 0.7044981718063354, "eval_runtime": 145.8062, "eval_samples_per_second": 38.791, "eval_steps_per_second": 4.849, "eval_wer": 0.5472548988140136, "step": 9400 }, { "epoch": 0.37329561082950213, "grad_norm": 6.208221435546875, "learning_rate": 0.00024549090909090906, "loss": 0.5252, "step": 9500 }, { "epoch": 0.37722503831191795, "eval_loss": 0.7025354504585266, "eval_runtime": 146.2272, "eval_samples_per_second": 38.68, "eval_steps_per_second": 4.835, "eval_wer": 0.5332766285246585, "step": 9600 }, { "epoch": 0.3850838932767496, "eval_loss": 0.7077142000198364, "eval_runtime": 145.5575, "eval_samples_per_second": 38.857, "eval_steps_per_second": 4.857, "eval_wer": 0.5461796472532939, "step": 9800 }, { "epoch": 0.3929427482415812, "grad_norm": 4.407375812530518, "learning_rate": 0.00024246060606060606, "loss": 0.5156, "step": 10000 }, { "epoch": 0.3929427482415812, "eval_loss": 0.7006597518920898, "eval_runtime": 146.3123, "eval_samples_per_second": 38.657, "eval_steps_per_second": 4.832, "eval_wer": 0.5382516730593314, "step": 10000 }, { "epoch": 0.40080160320641284, "eval_loss": 0.6947250962257385, "eval_runtime": 145.4545, "eval_samples_per_second": 38.885, "eval_steps_per_second": 4.861, "eval_wer": 0.5425847763637239, "step": 10200 }, { "epoch": 0.40866045817124447, "eval_loss": 0.7127708196640015, "eval_runtime": 145.5874, "eval_samples_per_second": 38.85, "eval_steps_per_second": 4.856, "eval_wer": 0.5361332669994062, "step": 10400 }, { "epoch": 0.4125898856536603, "grad_norm": 2.721827983856201, "learning_rate": 0.000239430303030303, "loss": 0.5181, "step": 10500 }, { "epoch": 0.4165193131360761, "eval_loss": 0.6945223212242126, "eval_runtime": 146.0143, "eval_samples_per_second": 38.736, "eval_steps_per_second": 4.842, "eval_wer": 0.5276114971674343, "step": 10600 }, { "epoch": 0.42437816810090767, "eval_loss": 0.6985763311386108, "eval_runtime": 146.5657, "eval_samples_per_second": 38.59, "eval_steps_per_second": 4.824, "eval_wer": 0.5310619312801913, "step": 10800 }, { "epoch": 0.4322370230657393, "grad_norm": 5.38914680480957, "learning_rate": 0.0002364, "loss": 0.5096, "step": 11000 }, { "epoch": 0.4322370230657393, "eval_loss": 0.6909800171852112, "eval_runtime": 146.0039, "eval_samples_per_second": 38.739, "eval_steps_per_second": 4.842, "eval_wer": 0.5293126414276773, "step": 11000 }, { "epoch": 0.44009587803057093, "eval_loss": 0.6855354905128479, "eval_runtime": 146.6844, "eval_samples_per_second": 38.559, "eval_steps_per_second": 4.82, "eval_wer": 0.5280608560286306, "step": 11200 }, { "epoch": 0.44795473299540256, "eval_loss": 0.6889775395393372, "eval_runtime": 146.3731, "eval_samples_per_second": 38.641, "eval_steps_per_second": 4.83, "eval_wer": 0.5262313235223315, "step": 11400 }, { "epoch": 0.4518841604778184, "grad_norm": 3.3484437465667725, "learning_rate": 0.00023336969696969694, "loss": 0.5099, "step": 11500 }, { "epoch": 0.4558135879602342, "eval_loss": 0.677577018737793, "eval_runtime": 146.1848, "eval_samples_per_second": 38.691, "eval_steps_per_second": 4.836, "eval_wer": 0.5298101458811446, "step": 11600 }, { "epoch": 0.4636724429250658, "eval_loss": 0.6817450523376465, "eval_runtime": 146.2301, "eval_samples_per_second": 38.679, "eval_steps_per_second": 4.835, "eval_wer": 0.5141949254545747, "step": 11800 }, { "epoch": 0.47153129788989745, "grad_norm": 4.75791597366333, "learning_rate": 0.00023033939393939391, "loss": 0.481, "step": 12000 }, { "epoch": 0.47153129788989745, "eval_loss": 0.6749030351638794, "eval_runtime": 144.9955, "eval_samples_per_second": 39.008, "eval_steps_per_second": 4.876, "eval_wer": 0.5318483092872848, "step": 12000 }, { "epoch": 0.4793901528547291, "eval_loss": 0.6648340225219727, "eval_runtime": 146.7705, "eval_samples_per_second": 38.536, "eval_steps_per_second": 4.817, "eval_wer": 0.513167819486126, "step": 12200 }, { "epoch": 0.4872490078195607, "eval_loss": 0.6659471392631531, "eval_runtime": 145.9108, "eval_samples_per_second": 38.763, "eval_steps_per_second": 4.845, "eval_wer": 0.5151096917077241, "step": 12400 }, { "epoch": 0.4911784353019765, "grad_norm": 3.3849971294403076, "learning_rate": 0.00022730909090909089, "loss": 0.4899, "step": 12500 }, { "epoch": 0.49510786278439234, "eval_loss": 0.6744287014007568, "eval_runtime": 146.3152, "eval_samples_per_second": 38.656, "eval_steps_per_second": 4.832, "eval_wer": 0.5207266774726774, "step": 12600 }, { "epoch": 0.502966717749224, "eval_loss": 0.6732743978500366, "eval_runtime": 146.1337, "eval_samples_per_second": 38.704, "eval_steps_per_second": 4.838, "eval_wer": 0.5228771805941166, "step": 12800 }, { "epoch": 0.5108255727140556, "grad_norm": 3.489818811416626, "learning_rate": 0.00022427878787878786, "loss": 0.492, "step": 13000 }, { "epoch": 0.5108255727140556, "eval_loss": 0.6456639170646667, "eval_runtime": 146.9518, "eval_samples_per_second": 38.489, "eval_steps_per_second": 4.811, "eval_wer": 0.5041645937314438, "step": 13000 }, { "epoch": 0.5186844276788872, "eval_loss": 0.6671249866485596, "eval_runtime": 145.8641, "eval_samples_per_second": 38.776, "eval_steps_per_second": 4.847, "eval_wer": 0.5259103529071913, "step": 13200 }, { "epoch": 0.5265432826437189, "eval_loss": 0.6544414162635803, "eval_runtime": 146.5937, "eval_samples_per_second": 38.583, "eval_steps_per_second": 4.823, "eval_wer": 0.5179181845902008, "step": 13400 }, { "epoch": 0.5304727101261346, "grad_norm": 1.4167377948760986, "learning_rate": 0.00022125454545454546, "loss": 0.4782, "step": 13500 }, { "epoch": 0.5344021376085505, "eval_loss": 0.6560591459274292, "eval_runtime": 146.1188, "eval_samples_per_second": 38.708, "eval_steps_per_second": 4.839, "eval_wer": 0.5054484761920046, "step": 13600 }, { "epoch": 0.542260992573382, "eval_loss": 0.6381711363792419, "eval_runtime": 145.8554, "eval_samples_per_second": 38.778, "eval_steps_per_second": 4.847, "eval_wer": 0.49918954919677105, "step": 13800 }, { "epoch": 0.5501198475382136, "grad_norm": 3.582862615585327, "learning_rate": 0.0002182242424242424, "loss": 0.507, "step": 14000 }, { "epoch": 0.5501198475382136, "eval_loss": 0.6555091738700867, "eval_runtime": 148.0584, "eval_samples_per_second": 38.201, "eval_steps_per_second": 4.775, "eval_wer": 0.504437418754313, "step": 14000 }, { "epoch": 0.5579787025030453, "eval_loss": 0.6399552822113037, "eval_runtime": 146.0824, "eval_samples_per_second": 38.718, "eval_steps_per_second": 4.84, "eval_wer": 0.49548233859190194, "step": 14200 }, { "epoch": 0.5658375574678769, "eval_loss": 0.6467686891555786, "eval_runtime": 146.3336, "eval_samples_per_second": 38.651, "eval_steps_per_second": 4.831, "eval_wer": 0.5014202949719954, "step": 14400 }, { "epoch": 0.5697669849502928, "grad_norm": 2.1453781127929688, "learning_rate": 0.0002151939393939394, "loss": 0.4899, "step": 14500 }, { "epoch": 0.5736964124327085, "eval_loss": 0.6370707750320435, "eval_runtime": 146.4635, "eval_samples_per_second": 38.617, "eval_steps_per_second": 4.827, "eval_wer": 0.49723162844441593, "step": 14600 }, { "epoch": 0.5815552673975402, "eval_loss": 0.6356329917907715, "eval_runtime": 145.6834, "eval_samples_per_second": 38.824, "eval_steps_per_second": 4.853, "eval_wer": 0.5025597406557429, "step": 14800 }, { "epoch": 0.5894141223623718, "grad_norm": 2.615446090698242, "learning_rate": 0.00021216363636363634, "loss": 0.4677, "step": 15000 }, { "epoch": 0.5894141223623718, "eval_loss": 0.638607919216156, "eval_runtime": 145.7689, "eval_samples_per_second": 38.801, "eval_steps_per_second": 4.85, "eval_wer": 0.5021424788560608, "step": 15000 }, { "epoch": 0.5972729773272034, "eval_loss": 0.6653130650520325, "eval_runtime": 146.0092, "eval_samples_per_second": 38.737, "eval_steps_per_second": 4.842, "eval_wer": 0.5190255332124344, "step": 15200 }, { "epoch": 0.605131832292035, "eval_loss": 0.6442501544952393, "eval_runtime": 146.2404, "eval_samples_per_second": 38.676, "eval_steps_per_second": 4.835, "eval_wer": 0.4998154418962944, "step": 15400 }, { "epoch": 0.6090612597744509, "grad_norm": 2.680966854095459, "learning_rate": 0.0002091333333333333, "loss": 0.461, "step": 15500 }, { "epoch": 0.6129906872568667, "eval_loss": 0.6210175156593323, "eval_runtime": 146.9594, "eval_samples_per_second": 38.487, "eval_steps_per_second": 4.811, "eval_wer": 0.4896567219271076, "step": 15600 }, { "epoch": 0.6208495422216983, "eval_loss": 0.6395752429962158, "eval_runtime": 146.5911, "eval_samples_per_second": 38.584, "eval_steps_per_second": 4.823, "eval_wer": 0.5011635184798832, "step": 15800 }, { "epoch": 0.6287083971865299, "grad_norm": 2.2297749519348145, "learning_rate": 0.00020610303030303028, "loss": 0.4528, "step": 16000 }, { "epoch": 0.6287083971865299, "eval_loss": 0.6226186752319336, "eval_runtime": 147.1935, "eval_samples_per_second": 38.426, "eval_steps_per_second": 4.803, "eval_wer": 0.49333183547046267, "step": 16000 }, { "epoch": 0.6365672521513616, "eval_loss": 0.6253554224967957, "eval_runtime": 147.0403, "eval_samples_per_second": 38.466, "eval_steps_per_second": 4.808, "eval_wer": 0.49365280608560286, "step": 16200 }, { "epoch": 0.6444261071161932, "eval_loss": 0.6289177536964417, "eval_runtime": 146.8167, "eval_samples_per_second": 38.524, "eval_steps_per_second": 4.816, "eval_wer": 0.5013240037874532, "step": 16400 }, { "epoch": 0.648355534598609, "grad_norm": 1.9119956493377686, "learning_rate": 0.00020307272727272725, "loss": 0.451, "step": 16500 }, { "epoch": 0.6522849620810248, "eval_loss": 0.6229738593101501, "eval_runtime": 146.4262, "eval_samples_per_second": 38.627, "eval_steps_per_second": 4.828, "eval_wer": 0.49723162844441593, "step": 16600 }, { "epoch": 0.6601438170458565, "eval_loss": 0.6153121590614319, "eval_runtime": 146.6615, "eval_samples_per_second": 38.565, "eval_steps_per_second": 4.821, "eval_wer": 0.4957391150840141, "step": 16800 }, { "epoch": 0.6680026720106881, "grad_norm": 3.115481376647949, "learning_rate": 0.00020004848484848485, "loss": 0.4444, "step": 17000 }, { "epoch": 0.6680026720106881, "eval_loss": 0.6032531261444092, "eval_runtime": 146.667, "eval_samples_per_second": 38.564, "eval_steps_per_second": 4.82, "eval_wer": 0.47476368538460306, "step": 17000 }, { "epoch": 0.6758615269755197, "eval_loss": 0.6153914332389832, "eval_runtime": 146.5404, "eval_samples_per_second": 38.597, "eval_steps_per_second": 4.825, "eval_wer": 0.4771388679366404, "step": 17200 }, { "epoch": 0.6837203819403513, "eval_loss": 0.6169700622558594, "eval_runtime": 146.4739, "eval_samples_per_second": 38.614, "eval_steps_per_second": 4.827, "eval_wer": 0.48591741426072443, "step": 17400 }, { "epoch": 0.6876498094227671, "grad_norm": 3.35622501373291, "learning_rate": 0.0001970181818181818, "loss": 0.4357, "step": 17500 }, { "epoch": 0.691579236905183, "eval_loss": 0.6020850539207458, "eval_runtime": 146.4462, "eval_samples_per_second": 38.622, "eval_steps_per_second": 4.828, "eval_wer": 0.4814559227102759, "step": 17600 }, { "epoch": 0.6994380918700145, "eval_loss": 0.6071408987045288, "eval_runtime": 147.1123, "eval_samples_per_second": 38.447, "eval_steps_per_second": 4.806, "eval_wer": 0.47303044406284606, "step": 17800 }, { "epoch": 0.7072969468348461, "grad_norm": 2.2534916400909424, "learning_rate": 0.0001939939393939394, "loss": 0.4413, "step": 18000 }, { "epoch": 0.7072969468348461, "eval_loss": 0.6042246222496033, "eval_runtime": 146.518, "eval_samples_per_second": 38.603, "eval_steps_per_second": 4.825, "eval_wer": 0.47656112082938806, "step": 18000 }, { "epoch": 0.7151558017996777, "eval_loss": 0.6118656396865845, "eval_runtime": 147.1712, "eval_samples_per_second": 38.431, "eval_steps_per_second": 4.804, "eval_wer": 0.4837508626085282, "step": 18200 }, { "epoch": 0.7230146567645094, "eval_loss": 0.6045942902565002, "eval_runtime": 146.4829, "eval_samples_per_second": 38.612, "eval_steps_per_second": 4.827, "eval_wer": 0.47569450016850956, "step": 18400 }, { "epoch": 0.7269440842469252, "grad_norm": 3.591475248336792, "learning_rate": 0.00019096363636363634, "loss": 0.4375, "step": 18500 }, { "epoch": 0.730873511729341, "eval_loss": 0.6081308722496033, "eval_runtime": 147.4627, "eval_samples_per_second": 38.355, "eval_steps_per_second": 4.794, "eval_wer": 0.4832854552165749, "step": 18600 }, { "epoch": 0.7387323666941726, "eval_loss": 0.6007533073425293, "eval_runtime": 146.3827, "eval_samples_per_second": 38.638, "eval_steps_per_second": 4.83, "eval_wer": 0.4727897161014909, "step": 18800 }, { "epoch": 0.7465912216590043, "grad_norm": 1.425370693206787, "learning_rate": 0.0001879333333333333, "loss": 0.4329, "step": 19000 }, { "epoch": 0.7465912216590043, "eval_loss": 0.6008017063140869, "eval_runtime": 147.3011, "eval_samples_per_second": 38.398, "eval_steps_per_second": 4.8, "eval_wer": 0.46924299080419185, "step": 19000 }, { "epoch": 0.7544500766238359, "eval_loss": 0.6007276177406311, "eval_runtime": 146.7759, "eval_samples_per_second": 38.535, "eval_steps_per_second": 4.817, "eval_wer": 0.4822262521866123, "step": 19200 }, { "epoch": 0.7623089315886675, "eval_loss": 0.5838043093681335, "eval_runtime": 146.9473, "eval_samples_per_second": 38.49, "eval_steps_per_second": 4.811, "eval_wer": 0.4657925566914349, "step": 19400 }, { "epoch": 0.7662383590710834, "grad_norm": 2.780203342437744, "learning_rate": 0.00018490303030303028, "loss": 0.4318, "step": 19500 }, { "epoch": 0.7701677865534992, "eval_loss": 0.6007500290870667, "eval_runtime": 146.6721, "eval_samples_per_second": 38.562, "eval_steps_per_second": 4.82, "eval_wer": 0.46519876105342556, "step": 19600 }, { "epoch": 0.7780266415183308, "eval_loss": 0.5918843746185303, "eval_runtime": 147.2498, "eval_samples_per_second": 38.411, "eval_steps_per_second": 4.801, "eval_wer": 0.4664826435139863, "step": 19800 }, { "epoch": 0.7858854964831624, "grad_norm": 3.501138687133789, "learning_rate": 0.00018187272727272725, "loss": 0.4265, "step": 20000 }, { "epoch": 0.7858854964831624, "eval_loss": 0.59038907289505, "eval_runtime": 147.6976, "eval_samples_per_second": 38.294, "eval_steps_per_second": 4.787, "eval_wer": 0.4721959204634816, "step": 20000 }, { "epoch": 0.793744351447994, "eval_loss": 0.5922533273696899, "eval_runtime": 146.8772, "eval_samples_per_second": 38.508, "eval_steps_per_second": 4.814, "eval_wer": 0.4815201168333039, "step": 20200 }, { "epoch": 0.8016032064128257, "eval_loss": 0.5979217886924744, "eval_runtime": 146.9133, "eval_samples_per_second": 38.499, "eval_steps_per_second": 4.812, "eval_wer": 0.4661295758373321, "step": 20400 }, { "epoch": 0.8055326338952414, "grad_norm": 2.374830484390259, "learning_rate": 0.00017884242424242425, "loss": 0.4321, "step": 20500 }, { "epoch": 0.8094620613776573, "eval_loss": 0.5837874412536621, "eval_runtime": 146.6078, "eval_samples_per_second": 38.579, "eval_steps_per_second": 4.822, "eval_wer": 0.45608319558344435, "step": 20600 }, { "epoch": 0.8173209163424889, "eval_loss": 0.5824867486953735, "eval_runtime": 147.7105, "eval_samples_per_second": 38.291, "eval_steps_per_second": 4.786, "eval_wer": 0.4523920335093322, "step": 20800 }, { "epoch": 0.8251797713073206, "grad_norm": 1.430405616760254, "learning_rate": 0.0001758121212121212, "loss": 0.4192, "step": 21000 }, { "epoch": 0.8251797713073206, "eval_loss": 0.5838850140571594, "eval_runtime": 146.699, "eval_samples_per_second": 38.555, "eval_steps_per_second": 4.819, "eval_wer": 0.4551523807995378, "step": 21000 }, { "epoch": 0.8330386262721522, "eval_loss": 0.5804269909858704, "eval_runtime": 147.0076, "eval_samples_per_second": 38.474, "eval_steps_per_second": 4.809, "eval_wer": 0.4593731443886312, "step": 21200 }, { "epoch": 0.8408974812369838, "eval_loss": 0.5890819430351257, "eval_runtime": 146.6585, "eval_samples_per_second": 38.566, "eval_steps_per_second": 4.821, "eval_wer": 0.4722280175249956, "step": 21400 }, { "epoch": 0.8448269087193996, "grad_norm": 2.7897725105285645, "learning_rate": 0.00017278181818181817, "loss": 0.4151, "step": 21500 }, { "epoch": 0.8487563362018153, "eval_loss": 0.5830910205841064, "eval_runtime": 147.6653, "eval_samples_per_second": 38.303, "eval_steps_per_second": 4.788, "eval_wer": 0.4525204217553883, "step": 21600 }, { "epoch": 0.856615191166647, "eval_loss": 0.5677404403686523, "eval_runtime": 146.5378, "eval_samples_per_second": 38.598, "eval_steps_per_second": 4.825, "eval_wer": 0.45430180866941633, "step": 21800 }, { "epoch": 0.8644740461314786, "grad_norm": 2.938485622406006, "learning_rate": 0.00016975757575757574, "loss": 0.417, "step": 22000 }, { "epoch": 0.8644740461314786, "eval_loss": 0.5605286359786987, "eval_runtime": 147.3751, "eval_samples_per_second": 38.378, "eval_steps_per_second": 4.797, "eval_wer": 0.446807144805893, "step": 22000 }, { "epoch": 0.8723329010963102, "eval_loss": 0.570513129234314, "eval_runtime": 146.7648, "eval_samples_per_second": 38.538, "eval_steps_per_second": 4.817, "eval_wer": 0.44422333135401454, "step": 22200 }, { "epoch": 0.8801917560611419, "eval_loss": 0.5685856938362122, "eval_runtime": 147.3241, "eval_samples_per_second": 38.392, "eval_steps_per_second": 4.799, "eval_wer": 0.4551363322687808, "step": 22400 }, { "epoch": 0.8841211835435577, "grad_norm": 5.145638942718506, "learning_rate": 0.0001667272727272727, "loss": 0.4014, "step": 22500 }, { "epoch": 0.8880506110259735, "eval_loss": 0.5751659870147705, "eval_runtime": 146.2417, "eval_samples_per_second": 38.676, "eval_steps_per_second": 4.834, "eval_wer": 0.4602397650495097, "step": 22600 }, { "epoch": 0.8959094659908051, "eval_loss": 0.5623380541801453, "eval_runtime": 146.6371, "eval_samples_per_second": 38.571, "eval_steps_per_second": 4.821, "eval_wer": 0.4452985829147342, "step": 22800 }, { "epoch": 0.9037683209556368, "grad_norm": 1.9630001783370972, "learning_rate": 0.00016369696969696968, "loss": 0.4024, "step": 23000 }, { "epoch": 0.9037683209556368, "eval_loss": 0.5631678700447083, "eval_runtime": 146.9977, "eval_samples_per_second": 38.477, "eval_steps_per_second": 4.81, "eval_wer": 0.4423777503169585, "step": 23000 }, { "epoch": 0.9116271759204684, "eval_loss": 0.568145751953125, "eval_runtime": 146.7017, "eval_samples_per_second": 38.554, "eval_steps_per_second": 4.819, "eval_wer": 0.4471120668902762, "step": 23200 }, { "epoch": 0.9194860308853, "eval_loss": 0.5659225583076477, "eval_runtime": 147.422, "eval_samples_per_second": 38.366, "eval_steps_per_second": 4.796, "eval_wer": 0.4510760539872575, "step": 23400 }, { "epoch": 0.9234154583677158, "grad_norm": 2.880105972290039, "learning_rate": 0.00016066666666666665, "loss": 0.3899, "step": 23500 }, { "epoch": 0.9273448858501316, "eval_loss": 0.5653769969940186, "eval_runtime": 147.0508, "eval_samples_per_second": 38.463, "eval_steps_per_second": 4.808, "eval_wer": 0.4417197605559211, "step": 23600 }, { "epoch": 0.9352037408149633, "eval_loss": 0.5691047310829163, "eval_runtime": 147.3319, "eval_samples_per_second": 38.39, "eval_steps_per_second": 4.799, "eval_wer": 0.45418946895411727, "step": 23800 }, { "epoch": 0.9430625957797949, "grad_norm": 1.747075080871582, "learning_rate": 0.00015763636363636365, "loss": 0.3977, "step": 24000 }, { "epoch": 0.9430625957797949, "eval_loss": 0.5613217949867249, "eval_runtime": 146.5842, "eval_samples_per_second": 38.585, "eval_steps_per_second": 4.823, "eval_wer": 0.4434209048161641, "step": 24000 }, { "epoch": 0.9509214507446265, "eval_loss": 0.5688283443450928, "eval_runtime": 147.1422, "eval_samples_per_second": 38.439, "eval_steps_per_second": 4.805, "eval_wer": 0.44326041950859396, "step": 24200 }, { "epoch": 0.9587803057094582, "eval_loss": 0.57487553358078, "eval_runtime": 146.7792, "eval_samples_per_second": 38.534, "eval_steps_per_second": 4.817, "eval_wer": 0.4454751167530613, "step": 24400 }, { "epoch": 0.9627097331918739, "grad_norm": NaN, "learning_rate": 0.0001546121212121212, "loss": 0.3889, "step": 24500 }, { "epoch": 0.9666391606742898, "eval_loss": 0.5499551892280579, "eval_runtime": 147.156, "eval_samples_per_second": 38.435, "eval_steps_per_second": 4.804, "eval_wer": 0.43180176854808944, "step": 24600 }, { "epoch": 0.9744980156391214, "eval_loss": 0.5436142086982727, "eval_runtime": 147.2848, "eval_samples_per_second": 38.402, "eval_steps_per_second": 4.8, "eval_wer": 0.4371780263516875, "step": 24800 }, { "epoch": 0.982356870603953, "grad_norm": 4.918150424957275, "learning_rate": 0.0001515818181818182, "loss": 0.39, "step": 25000 }, { "epoch": 0.982356870603953, "eval_loss": 0.547515332698822, "eval_runtime": 147.2374, "eval_samples_per_second": 38.414, "eval_steps_per_second": 4.802, "eval_wer": 0.4388310250196594, "step": 25000 }, { "epoch": 0.9902157255687847, "eval_loss": 0.5531713366508484, "eval_runtime": 146.8558, "eval_samples_per_second": 38.514, "eval_steps_per_second": 4.814, "eval_wer": 0.4423777503169585, "step": 25200 }, { "epoch": 0.9980745805336163, "eval_loss": 0.5450366139411926, "eval_runtime": 147.6783, "eval_samples_per_second": 38.299, "eval_steps_per_second": 4.787, "eval_wer": 0.4280945579432203, "step": 25400 }, { "epoch": 1.002004008016032, "grad_norm": 1.2219481468200684, "learning_rate": 0.00014855151515151514, "loss": 0.3853, "step": 25500 }, { "epoch": 1.005933435498448, "eval_loss": 0.5462915897369385, "eval_runtime": 145.543, "eval_samples_per_second": 38.861, "eval_steps_per_second": 4.858, "eval_wer": 0.43079071111039785, "step": 25600 }, { "epoch": 1.0137922904632795, "eval_loss": 0.5457944869995117, "eval_runtime": 145.2381, "eval_samples_per_second": 38.943, "eval_steps_per_second": 4.868, "eval_wer": 0.4277896358588371, "step": 25800 }, { "epoch": 1.0216511454281112, "grad_norm": 4.69161319732666, "learning_rate": 0.0001455212121212121, "loss": 0.3413, "step": 26000 }, { "epoch": 1.0216511454281112, "eval_loss": 0.5470069646835327, "eval_runtime": 145.5418, "eval_samples_per_second": 38.862, "eval_steps_per_second": 4.858, "eval_wer": 0.43441767906148193, "step": 26000 }, { "epoch": 1.0295100003929427, "eval_loss": 0.5358372330665588, "eval_runtime": 145.609, "eval_samples_per_second": 38.844, "eval_steps_per_second": 4.855, "eval_wer": 0.42258991189356615, "step": 26200 }, { "epoch": 1.0373688553577745, "eval_loss": 0.5403576493263245, "eval_runtime": 146.3753, "eval_samples_per_second": 38.64, "eval_steps_per_second": 4.83, "eval_wer": 0.42308741634703345, "step": 26400 }, { "epoch": 1.0412982828401902, "grad_norm": 1.2460460662841797, "learning_rate": 0.00014249090909090908, "loss": 0.339, "step": 26500 }, { "epoch": 1.045227710322606, "eval_loss": 0.5345466732978821, "eval_runtime": 145.3146, "eval_samples_per_second": 38.922, "eval_steps_per_second": 4.865, "eval_wer": 0.42433920174608014, "step": 26600 }, { "epoch": 1.0530865652874377, "eval_loss": 0.5396625995635986, "eval_runtime": 145.9713, "eval_samples_per_second": 38.747, "eval_steps_per_second": 4.843, "eval_wer": 0.4199579528494166, "step": 26800 }, { "epoch": 1.0609454202522692, "grad_norm": 1.021347165107727, "learning_rate": 0.00013946060606060605, "loss": 0.3235, "step": 27000 }, { "epoch": 1.0609454202522692, "eval_loss": 0.5378654599189758, "eval_runtime": 145.6291, "eval_samples_per_second": 38.838, "eval_steps_per_second": 4.855, "eval_wer": 0.4183049541814447, "step": 27000 }, { "epoch": 1.0688042752171008, "eval_loss": 0.5305435657501221, "eval_runtime": 145.36, "eval_samples_per_second": 38.91, "eval_steps_per_second": 4.864, "eval_wer": 0.42753285936672497, "step": 27200 }, { "epoch": 1.0766631301819325, "eval_loss": 0.5440751910209656, "eval_runtime": 145.3458, "eval_samples_per_second": 38.914, "eval_steps_per_second": 4.864, "eval_wer": 0.4247564635457624, "step": 27400 }, { "epoch": 1.0805925576643483, "grad_norm": 0.5985044836997986, "learning_rate": 0.00013643636363636362, "loss": 0.3252, "step": 27500 }, { "epoch": 1.0845219851467642, "eval_loss": 0.5361995697021484, "eval_runtime": 146.0428, "eval_samples_per_second": 38.728, "eval_steps_per_second": 4.841, "eval_wer": 0.4177753526664634, "step": 27600 }, { "epoch": 1.0923808401115958, "eval_loss": 0.5305026173591614, "eval_runtime": 145.9537, "eval_samples_per_second": 38.752, "eval_steps_per_second": 4.844, "eval_wer": 0.42015053521850076, "step": 27800 }, { "epoch": 1.1002396950764273, "grad_norm": 1.615342378616333, "learning_rate": 0.0001334060606060606, "loss": 0.3301, "step": 28000 }, { "epoch": 1.1002396950764273, "eval_loss": 0.5307178497314453, "eval_runtime": 146.253, "eval_samples_per_second": 38.673, "eval_steps_per_second": 4.834, "eval_wer": 0.41851358508128583, "step": 28000 }, { "epoch": 1.108098550041259, "eval_loss": 0.5402148365974426, "eval_runtime": 145.7202, "eval_samples_per_second": 38.814, "eval_steps_per_second": 4.852, "eval_wer": 0.431127730256295, "step": 28200 }, { "epoch": 1.1159574050060905, "eval_loss": 0.5308640003204346, "eval_runtime": 145.81, "eval_samples_per_second": 38.79, "eval_steps_per_second": 4.849, "eval_wer": 0.41788769238176243, "step": 28400 }, { "epoch": 1.1198868324885065, "grad_norm": 1.1408910751342773, "learning_rate": 0.00013037575757575756, "loss": 0.3087, "step": 28500 }, { "epoch": 1.1238162599709223, "eval_loss": 0.5298367738723755, "eval_runtime": 145.4349, "eval_samples_per_second": 38.89, "eval_steps_per_second": 4.861, "eval_wer": 0.42137022355603343, "step": 28600 }, { "epoch": 1.1316751149357538, "eval_loss": 0.5330610275268555, "eval_runtime": 145.6355, "eval_samples_per_second": 38.837, "eval_steps_per_second": 4.855, "eval_wer": 0.4214665147405755, "step": 28800 }, { "epoch": 1.1395339699005855, "grad_norm": 0.8552046418190002, "learning_rate": 0.00012734545454545453, "loss": 0.3222, "step": 29000 }, { "epoch": 1.1395339699005855, "eval_loss": 0.5273275971412659, "eval_runtime": 145.8763, "eval_samples_per_second": 38.773, "eval_steps_per_second": 4.847, "eval_wer": 0.4145495979843045, "step": 29000 }, { "epoch": 1.147392824865417, "eval_loss": 0.5282542705535889, "eval_runtime": 145.6375, "eval_samples_per_second": 38.836, "eval_steps_per_second": 4.855, "eval_wer": 0.4130731331546597, "step": 29200 }, { "epoch": 1.1552516798302488, "eval_loss": 0.5256520509719849, "eval_runtime": 145.9987, "eval_samples_per_second": 38.74, "eval_steps_per_second": 4.843, "eval_wer": 0.41159666832501485, "step": 29400 }, { "epoch": 1.1591811073126645, "grad_norm": 3.544210195541382, "learning_rate": 0.0001243151515151515, "loss": 0.3227, "step": 29500 }, { "epoch": 1.1631105347950803, "eval_loss": 0.5168554186820984, "eval_runtime": 145.3157, "eval_samples_per_second": 38.922, "eval_steps_per_second": 4.865, "eval_wer": 0.408419059235127, "step": 29600 }, { "epoch": 1.170969389759912, "eval_loss": 0.5184837579727173, "eval_runtime": 145.4598, "eval_samples_per_second": 38.884, "eval_steps_per_second": 4.86, "eval_wer": 0.41068190207186533, "step": 29800 }, { "epoch": 1.1788282447247436, "grad_norm": 0.8857652544975281, "learning_rate": 0.00012128484848484848, "loss": 0.309, "step": 30000 }, { "epoch": 1.1788282447247436, "eval_loss": 0.5076336860656738, "eval_runtime": 145.8517, "eval_samples_per_second": 38.779, "eval_steps_per_second": 4.847, "eval_wer": 0.40275392787790276, "step": 30000 }, { "epoch": 1.1866870996895753, "eval_loss": 0.5178284049034119, "eval_runtime": 146.4004, "eval_samples_per_second": 38.634, "eval_steps_per_second": 4.829, "eval_wer": 0.40535378986053827, "step": 30200 }, { "epoch": 1.1945459546544068, "eval_loss": 0.5225840210914612, "eval_runtime": 149.501, "eval_samples_per_second": 37.833, "eval_steps_per_second": 4.729, "eval_wer": 0.4122065124937812, "step": 30400 }, { "epoch": 1.1984753821368226, "grad_norm": 1.1116445064544678, "learning_rate": 0.00011826060606060606, "loss": 0.3138, "step": 30500 }, { "epoch": 1.2024048096192386, "eval_loss": 0.5226925015449524, "eval_runtime": 145.5048, "eval_samples_per_second": 38.872, "eval_steps_per_second": 4.859, "eval_wer": 0.4072635650206224, "step": 30600 }, { "epoch": 1.21026366458407, "eval_loss": 0.5130230784416199, "eval_runtime": 144.8014, "eval_samples_per_second": 39.06, "eval_steps_per_second": 4.883, "eval_wer": 0.40498467365312707, "step": 30800 }, { "epoch": 1.2181225195489018, "grad_norm": 1.0480467081069946, "learning_rate": 0.00011523030303030302, "loss": 0.3083, "step": 31000 }, { "epoch": 1.2181225195489018, "eval_loss": 0.516806423664093, "eval_runtime": 145.4982, "eval_samples_per_second": 38.873, "eval_steps_per_second": 4.859, "eval_wer": 0.4113077947713887, "step": 31000 }, { "epoch": 1.2259813745137333, "eval_loss": 0.505409836769104, "eval_runtime": 145.5358, "eval_samples_per_second": 38.863, "eval_steps_per_second": 4.858, "eval_wer": 0.4003947938566224, "step": 31200 }, { "epoch": 1.2338402294785649, "eval_loss": 0.5144046545028687, "eval_runtime": 145.0631, "eval_samples_per_second": 38.99, "eval_steps_per_second": 4.874, "eval_wer": 0.406653720851856, "step": 31400 }, { "epoch": 1.2377696569609808, "grad_norm": 1.0551427602767944, "learning_rate": 0.00011219999999999999, "loss": 0.2981, "step": 31500 }, { "epoch": 1.2416990844433966, "eval_loss": 0.5082244277000427, "eval_runtime": 145.8395, "eval_samples_per_second": 38.782, "eval_steps_per_second": 4.848, "eval_wer": 0.39923929964211774, "step": 31600 }, { "epoch": 1.2495579394082281, "eval_loss": 0.5134223103523254, "eval_runtime": 145.7659, "eval_samples_per_second": 38.802, "eval_steps_per_second": 4.85, "eval_wer": 0.396125884675258, "step": 31800 }, { "epoch": 1.2574167943730599, "grad_norm": 2.2508976459503174, "learning_rate": 0.00010916969696969696, "loss": 0.2952, "step": 32000 }, { "epoch": 1.2574167943730599, "eval_loss": 0.49696260690689087, "eval_runtime": 145.5612, "eval_samples_per_second": 38.857, "eval_steps_per_second": 4.857, "eval_wer": 0.3999454349954262, "step": 32000 }, { "epoch": 1.2652756493378914, "eval_loss": 0.50291907787323, "eval_runtime": 145.2238, "eval_samples_per_second": 38.947, "eval_steps_per_second": 4.868, "eval_wer": 0.4005713276949495, "step": 32200 }, { "epoch": 1.2731345043027231, "eval_loss": 0.4979938268661499, "eval_runtime": 146.0479, "eval_samples_per_second": 38.727, "eval_steps_per_second": 4.841, "eval_wer": 0.4001540658952673, "step": 32400 }, { "epoch": 1.2770639317851389, "grad_norm": 0.7384321689605713, "learning_rate": 0.00010614545454545453, "loss": 0.2995, "step": 32500 }, { "epoch": 1.2809933592675546, "eval_loss": 0.49917110800743103, "eval_runtime": 145.9484, "eval_samples_per_second": 38.753, "eval_steps_per_second": 4.844, "eval_wer": 0.40463160597647285, "step": 32600 }, { "epoch": 1.2888522142323864, "eval_loss": 0.49689990282058716, "eval_runtime": 146.3024, "eval_samples_per_second": 38.66, "eval_steps_per_second": 4.832, "eval_wer": 0.3911829372020991, "step": 32800 }, { "epoch": 1.296711069197218, "grad_norm": 0.6462344527244568, "learning_rate": 0.0001031151515151515, "loss": 0.3046, "step": 33000 }, { "epoch": 1.296711069197218, "eval_loss": 0.49431467056274414, "eval_runtime": 145.566, "eval_samples_per_second": 38.855, "eval_steps_per_second": 4.857, "eval_wer": 0.3933334403235384, "step": 33000 }, { "epoch": 1.3045699241620496, "eval_loss": 0.4882897138595581, "eval_runtime": 146.7921, "eval_samples_per_second": 38.531, "eval_steps_per_second": 4.816, "eval_wer": 0.3932050520774823, "step": 33200 }, { "epoch": 1.3124287791268812, "eval_loss": 0.49653205275535583, "eval_runtime": 146.2261, "eval_samples_per_second": 38.68, "eval_steps_per_second": 4.835, "eval_wer": 0.3935099741618655, "step": 33400 }, { "epoch": 1.316358206609297, "grad_norm": 4.335805416107178, "learning_rate": 0.00010009090909090908, "loss": 0.2972, "step": 33500 }, { "epoch": 1.320287634091713, "eval_loss": 0.49103957414627075, "eval_runtime": 146.0953, "eval_samples_per_second": 38.714, "eval_steps_per_second": 4.839, "eval_wer": 0.3942000609844169, "step": 33600 }, { "epoch": 1.3281464890565444, "eval_loss": 0.5007916688919067, "eval_runtime": 145.7572, "eval_samples_per_second": 38.804, "eval_steps_per_second": 4.851, "eval_wer": 0.4097029416956878, "step": 33800 }, { "epoch": 1.3360053440213762, "grad_norm": 0.6741358637809753, "learning_rate": 9.706060606060605e-05, "loss": 0.3093, "step": 34000 }, { "epoch": 1.3360053440213762, "eval_loss": 0.4958365857601166, "eval_runtime": 146.2684, "eval_samples_per_second": 38.669, "eval_steps_per_second": 4.834, "eval_wer": 0.39574071993708976, "step": 34000 }, { "epoch": 1.3438641989862077, "eval_loss": 0.5045068264007568, "eval_runtime": 146.1991, "eval_samples_per_second": 38.687, "eval_steps_per_second": 4.836, "eval_wer": 0.40179101603248224, "step": 34200 }, { "epoch": 1.3517230539510394, "eval_loss": 0.492519348859787, "eval_runtime": 146.1528, "eval_samples_per_second": 38.699, "eval_steps_per_second": 4.837, "eval_wer": 0.3969925053361365, "step": 34400 }, { "epoch": 1.3556524814334552, "grad_norm": 0.9136665463447571, "learning_rate": 9.403030303030303e-05, "loss": 0.2947, "step": 34500 }, { "epoch": 1.359581908915871, "eval_loss": 0.4828738868236542, "eval_runtime": 145.0639, "eval_samples_per_second": 38.99, "eval_steps_per_second": 4.874, "eval_wer": 0.3905409959718188, "step": 34600 }, { "epoch": 1.3674407638807025, "eval_loss": 0.4869907796382904, "eval_runtime": 145.4878, "eval_samples_per_second": 38.876, "eval_steps_per_second": 4.86, "eval_wer": 0.39522716695286547, "step": 34800 }, { "epoch": 1.3752996188455342, "grad_norm": 1.0685299634933472, "learning_rate": 9.099999999999999e-05, "loss": 0.2801, "step": 35000 }, { "epoch": 1.3752996188455342, "eval_loss": 0.4897337555885315, "eval_runtime": 145.9513, "eval_samples_per_second": 38.753, "eval_steps_per_second": 4.844, "eval_wer": 0.3936704594694356, "step": 35000 }, { "epoch": 1.383158473810366, "eval_loss": 0.5006551146507263, "eval_runtime": 145.7634, "eval_samples_per_second": 38.803, "eval_steps_per_second": 4.85, "eval_wer": 0.39972075556482806, "step": 35200 }, { "epoch": 1.3910173287751975, "eval_loss": 0.48228171467781067, "eval_runtime": 145.956, "eval_samples_per_second": 38.751, "eval_steps_per_second": 4.844, "eval_wer": 0.38492401020686556, "step": 35400 }, { "epoch": 1.3949467562576132, "grad_norm": 0.6772143244743347, "learning_rate": 8.796969696969696e-05, "loss": 0.2772, "step": 35500 }, { "epoch": 1.398876183740029, "eval_loss": 0.4848904013633728, "eval_runtime": 145.8656, "eval_samples_per_second": 38.775, "eval_steps_per_second": 4.847, "eval_wer": 0.39121503426361315, "step": 35600 }, { "epoch": 1.4067350387048607, "eval_loss": 0.4844968020915985, "eval_runtime": 146.3634, "eval_samples_per_second": 38.644, "eval_steps_per_second": 4.83, "eval_wer": 0.3881658134197814, "step": 35800 }, { "epoch": 1.4145938936696925, "grad_norm": 1.0455658435821533, "learning_rate": 8.493939393939393e-05, "loss": 0.281, "step": 36000 }, { "epoch": 1.4145938936696925, "eval_loss": 0.482947438955307, "eval_runtime": 145.7025, "eval_samples_per_second": 38.819, "eval_steps_per_second": 4.852, "eval_wer": 0.38418577779204316, "step": 36000 }, { "epoch": 1.422452748634524, "eval_loss": 0.48147863149642944, "eval_runtime": 146.3811, "eval_samples_per_second": 38.639, "eval_steps_per_second": 4.83, "eval_wer": 0.3859190191138001, "step": 36200 }, { "epoch": 1.4303116035993555, "eval_loss": 0.4771769642829895, "eval_runtime": 145.8053, "eval_samples_per_second": 38.791, "eval_steps_per_second": 4.849, "eval_wer": 0.38075139221004317, "step": 36400 }, { "epoch": 1.4342410310817715, "grad_norm": 0.6518095135688782, "learning_rate": 8.19090909090909e-05, "loss": 0.2697, "step": 36500 }, { "epoch": 1.4381704585641872, "eval_loss": 0.48701608180999756, "eval_runtime": 145.4126, "eval_samples_per_second": 38.896, "eval_steps_per_second": 4.862, "eval_wer": 0.3914236651634543, "step": 36600 }, { "epoch": 1.4460293135290188, "eval_loss": 0.47700512409210205, "eval_runtime": 145.4281, "eval_samples_per_second": 38.892, "eval_steps_per_second": 4.862, "eval_wer": 0.38662515446710854, "step": 36800 }, { "epoch": 1.4538881684938505, "grad_norm": 2.1603991985321045, "learning_rate": 7.887878787878789e-05, "loss": 0.2766, "step": 37000 }, { "epoch": 1.4538881684938505, "eval_loss": 0.4786865711212158, "eval_runtime": 145.7912, "eval_samples_per_second": 38.795, "eval_steps_per_second": 4.849, "eval_wer": 0.38209946879363194, "step": 37000 }, { "epoch": 1.461747023458682, "eval_loss": 0.4793393015861511, "eval_runtime": 145.5675, "eval_samples_per_second": 38.855, "eval_steps_per_second": 4.857, "eval_wer": 0.38099212017139833, "step": 37200 }, { "epoch": 1.4696058784235138, "eval_loss": 0.4738729000091553, "eval_runtime": 145.8624, "eval_samples_per_second": 38.776, "eval_steps_per_second": 4.847, "eval_wer": 0.3803341304103609, "step": 37400 }, { "epoch": 1.4735353059059295, "grad_norm": 1.9566117525100708, "learning_rate": 7.585454545454545e-05, "loss": 0.2905, "step": 37500 }, { "epoch": 1.4774647333883453, "eval_loss": 0.47245293855667114, "eval_runtime": 145.8323, "eval_samples_per_second": 38.784, "eval_steps_per_second": 4.848, "eval_wer": 0.3811205084174544, "step": 37600 }, { "epoch": 1.485323588353177, "eval_loss": 0.47267088294029236, "eval_runtime": 145.9296, "eval_samples_per_second": 38.758, "eval_steps_per_second": 4.845, "eval_wer": 0.37827991847346376, "step": 37800 }, { "epoch": 1.4931824433180085, "grad_norm": 2.518251895904541, "learning_rate": 7.282424242424242e-05, "loss": 0.2799, "step": 38000 }, { "epoch": 1.4931824433180085, "eval_loss": 0.47050511837005615, "eval_runtime": 146.8142, "eval_samples_per_second": 38.525, "eval_steps_per_second": 4.816, "eval_wer": 0.3776700743046974, "step": 38000 }, { "epoch": 1.50104129828284, "eval_loss": 0.4659024178981781, "eval_runtime": 145.787, "eval_samples_per_second": 38.796, "eval_steps_per_second": 4.85, "eval_wer": 0.37508626085281893, "step": 38200 }, { "epoch": 1.5089001532476718, "eval_loss": 0.46910360455513, "eval_runtime": 146.8808, "eval_samples_per_second": 38.507, "eval_steps_per_second": 4.813, "eval_wer": 0.37429988284572546, "step": 38400 }, { "epoch": 1.5128295807300876, "grad_norm": 1.3675510883331299, "learning_rate": 6.979393939393939e-05, "loss": 0.267, "step": 38500 }, { "epoch": 1.5167590082125035, "eval_loss": 0.4690033495426178, "eval_runtime": 145.2501, "eval_samples_per_second": 38.94, "eval_steps_per_second": 4.867, "eval_wer": 0.3663558601210059, "step": 38600 }, { "epoch": 1.524617863177335, "eval_loss": 0.4632550776004791, "eval_runtime": 146.3252, "eval_samples_per_second": 38.654, "eval_steps_per_second": 4.832, "eval_wer": 0.36810514997351995, "step": 38800 }, { "epoch": 1.5324767181421666, "grad_norm": 1.2868680953979492, "learning_rate": 6.676969696969697e-05, "loss": 0.2632, "step": 39000 }, { "epoch": 1.5324767181421666, "eval_loss": 0.4650620222091675, "eval_runtime": 146.2691, "eval_samples_per_second": 38.668, "eval_steps_per_second": 4.834, "eval_wer": 0.37255059299321147, "step": 39000 }, { "epoch": 1.5403355731069983, "eval_loss": 0.46896418929100037, "eval_runtime": 145.9823, "eval_samples_per_second": 38.744, "eval_steps_per_second": 4.843, "eval_wer": 0.3673990146202115, "step": 39200 }, { "epoch": 1.54819442807183, "eval_loss": 0.4612589180469513, "eval_runtime": 145.4614, "eval_samples_per_second": 38.883, "eval_steps_per_second": 4.86, "eval_wer": 0.3714913899632489, "step": 39400 }, { "epoch": 1.5521238555542456, "grad_norm": 2.942875623703003, "learning_rate": 6.373939393939393e-05, "loss": 0.2716, "step": 39500 }, { "epoch": 1.5560532830366616, "eval_loss": 0.4654790461063385, "eval_runtime": 146.1694, "eval_samples_per_second": 38.695, "eval_steps_per_second": 4.837, "eval_wer": 0.36967790598770683, "step": 39600 }, { "epoch": 1.563912138001493, "eval_loss": 0.4596673846244812, "eval_runtime": 145.7967, "eval_samples_per_second": 38.794, "eval_steps_per_second": 4.849, "eval_wer": 0.364799152637576, "step": 39800 }, { "epoch": 1.5717709929663248, "grad_norm": 0.4809035658836365, "learning_rate": 6.07090909090909e-05, "loss": 0.2651, "step": 40000 }, { "epoch": 1.5717709929663248, "eval_loss": 0.4549534320831299, "eval_runtime": 146.3998, "eval_samples_per_second": 38.634, "eval_steps_per_second": 4.829, "eval_wer": 0.36619537481343584, "step": 40000 }, { "epoch": 1.5796298479311566, "eval_loss": 0.4538833498954773, "eval_runtime": 146.0948, "eval_samples_per_second": 38.715, "eval_steps_per_second": 4.839, "eval_wer": 0.3676397425815667, "step": 40200 }, { "epoch": 1.587488702895988, "eval_loss": 0.4542824625968933, "eval_runtime": 146.3082, "eval_samples_per_second": 38.658, "eval_steps_per_second": 4.832, "eval_wer": 0.36746320874323957, "step": 40400 }, { "epoch": 1.5914181303784039, "grad_norm": 1.2710328102111816, "learning_rate": 5.767878787878788e-05, "loss": 0.2659, "step": 40500 }, { "epoch": 1.5953475578608196, "eval_loss": 0.45555397868156433, "eval_runtime": 146.1729, "eval_samples_per_second": 38.694, "eval_steps_per_second": 4.837, "eval_wer": 0.3622795333087256, "step": 40600 }, { "epoch": 1.6032064128256514, "eval_loss": 0.463294118642807, "eval_runtime": 146.3048, "eval_samples_per_second": 38.659, "eval_steps_per_second": 4.832, "eval_wer": 0.36849031471168814, "step": 40800 }, { "epoch": 1.611065267790483, "grad_norm": 1.9250500202178955, "learning_rate": 5.4660606060606054e-05, "loss": 0.2559, "step": 41000 }, { "epoch": 1.611065267790483, "eval_loss": 0.4529285132884979, "eval_runtime": 146.9183, "eval_samples_per_second": 38.498, "eval_steps_per_second": 4.812, "eval_wer": 0.36083516554059475, "step": 41000 }, { "epoch": 1.6189241227553146, "eval_loss": 0.45345816016197205, "eval_runtime": 145.5972, "eval_samples_per_second": 38.847, "eval_steps_per_second": 4.856, "eval_wer": 0.36385228932291247, "step": 41200 }, { "epoch": 1.6267829777201461, "eval_loss": 0.4511209726333618, "eval_runtime": 146.7532, "eval_samples_per_second": 38.541, "eval_steps_per_second": 4.818, "eval_wer": 0.3637078525460994, "step": 41400 }, { "epoch": 1.630712405202562, "grad_norm": 0.9593771696090698, "learning_rate": 5.1630303030303025e-05, "loss": 0.2629, "step": 41500 }, { "epoch": 1.6346418326849776, "eval_loss": 0.45563140511512756, "eval_runtime": 146.0124, "eval_samples_per_second": 38.736, "eval_steps_per_second": 4.842, "eval_wer": 0.36049814639469757, "step": 41600 }, { "epoch": 1.6425006876498094, "eval_loss": 0.457055002450943, "eval_runtime": 147.3584, "eval_samples_per_second": 38.383, "eval_steps_per_second": 4.798, "eval_wer": 0.36390043491518353, "step": 41800 }, { "epoch": 1.6503595426146411, "grad_norm": 0.9599024653434753, "learning_rate": 4.8599999999999995e-05, "loss": 0.259, "step": 42000 }, { "epoch": 1.6503595426146411, "eval_loss": 0.46201661229133606, "eval_runtime": 146.8464, "eval_samples_per_second": 38.516, "eval_steps_per_second": 4.815, "eval_wer": 0.36903596475742645, "step": 42000 }, { "epoch": 1.6582183975794726, "eval_loss": 0.45499464869499207, "eval_runtime": 146.9092, "eval_samples_per_second": 38.5, "eval_steps_per_second": 4.812, "eval_wer": 0.36348317311550127, "step": 42200 }, { "epoch": 1.6660772525443042, "eval_loss": 0.45219454169273376, "eval_runtime": 146.8863, "eval_samples_per_second": 38.506, "eval_steps_per_second": 4.813, "eval_wer": 0.3584278859270434, "step": 42400 }, { "epoch": 1.6700066800267201, "grad_norm": 1.0676679611206055, "learning_rate": 4.5569696969696966e-05, "loss": 0.2594, "step": 42500 }, { "epoch": 1.673936107509136, "eval_loss": 0.4494900703430176, "eval_runtime": 147.3169, "eval_samples_per_second": 38.393, "eval_steps_per_second": 4.799, "eval_wer": 0.3589253903805107, "step": 42600 }, { "epoch": 1.6817949624739676, "eval_loss": 0.4453260898590088, "eval_runtime": 146.8159, "eval_samples_per_second": 38.524, "eval_steps_per_second": 4.816, "eval_wer": 0.3562131886825761, "step": 42800 }, { "epoch": 1.6896538174387992, "grad_norm": 0.4820586144924164, "learning_rate": 4.253939393939394e-05, "loss": 0.2538, "step": 43000 }, { "epoch": 1.6896538174387992, "eval_loss": 0.4438420832157135, "eval_runtime": 147.9055, "eval_samples_per_second": 38.241, "eval_steps_per_second": 4.78, "eval_wer": 0.3555391503907817, "step": 43000 }, { "epoch": 1.6975126724036307, "eval_loss": 0.4494447708129883, "eval_runtime": 146.855, "eval_samples_per_second": 38.514, "eval_steps_per_second": 4.814, "eval_wer": 0.3566946446052864, "step": 43200 }, { "epoch": 1.7053715273684624, "eval_loss": 0.4443654716014862, "eval_runtime": 146.8467, "eval_samples_per_second": 38.516, "eval_steps_per_second": 4.815, "eval_wer": 0.3537898605382677, "step": 43400 }, { "epoch": 1.7093009548508782, "grad_norm": 0.7214144468307495, "learning_rate": 3.950909090909091e-05, "loss": 0.2512, "step": 43500 }, { "epoch": 1.7132303823332942, "eval_loss": 0.4454784691333771, "eval_runtime": 147.1352, "eval_samples_per_second": 38.441, "eval_steps_per_second": 4.805, "eval_wer": 0.3529713854696602, "step": 43600 }, { "epoch": 1.7210892372981257, "eval_loss": 0.4453714191913605, "eval_runtime": 147.5374, "eval_samples_per_second": 38.336, "eval_steps_per_second": 4.792, "eval_wer": 0.3522010559933238, "step": 43800 }, { "epoch": 1.7289480922629572, "grad_norm": 1.9711872339248657, "learning_rate": 3.647878787878787e-05, "loss": 0.2358, "step": 44000 }, { "epoch": 1.7289480922629572, "eval_loss": 0.44450756907463074, "eval_runtime": 146.8893, "eval_samples_per_second": 38.505, "eval_steps_per_second": 4.813, "eval_wer": 0.3519763765627257, "step": 44000 }, { "epoch": 1.736806947227789, "eval_loss": 0.44162794947624207, "eval_runtime": 147.6037, "eval_samples_per_second": 38.319, "eval_steps_per_second": 4.79, "eval_wer": 0.34998635874885653, "step": 44200 }, { "epoch": 1.7446658021926207, "eval_loss": 0.44202086329460144, "eval_runtime": 148.0767, "eval_samples_per_second": 38.196, "eval_steps_per_second": 4.775, "eval_wer": 0.34897530131116494, "step": 44400 }, { "epoch": 1.7485952296750362, "grad_norm": 1.1429784297943115, "learning_rate": 3.344848484848484e-05, "loss": 0.2418, "step": 44500 }, { "epoch": 1.7525246571574522, "eval_loss": 0.43861278891563416, "eval_runtime": 147.9549, "eval_samples_per_second": 38.228, "eval_steps_per_second": 4.778, "eval_wer": 0.34790004975044536, "step": 44600 }, { "epoch": 1.7603835121222837, "eval_loss": 0.4354783296585083, "eval_runtime": 149.8154, "eval_samples_per_second": 37.753, "eval_steps_per_second": 4.719, "eval_wer": 0.3460705172441463, "step": 44800 }, { "epoch": 1.7682423670871152, "grad_norm": 1.684985637664795, "learning_rate": 3.0418181818181817e-05, "loss": 0.2421, "step": 45000 }, { "epoch": 1.7682423670871152, "eval_loss": 0.43855908513069153, "eval_runtime": 148.5791, "eval_samples_per_second": 38.067, "eval_steps_per_second": 4.758, "eval_wer": 0.34372743175362297, "step": 45000 }, { "epoch": 1.776101222051947, "eval_loss": 0.4347515106201172, "eval_runtime": 147.9309, "eval_samples_per_second": 38.234, "eval_steps_per_second": 4.779, "eval_wer": 0.3458297892827912, "step": 45200 }, { "epoch": 1.7839600770167787, "eval_loss": 0.43350183963775635, "eval_runtime": 148.2161, "eval_samples_per_second": 38.16, "eval_steps_per_second": 4.77, "eval_wer": 0.3435348493845388, "step": 45400 }, { "epoch": 1.7878895044991945, "grad_norm": 2.4373562335968018, "learning_rate": 2.7387878787878784e-05, "loss": 0.2418, "step": 45500 }, { "epoch": 1.7918189319816102, "eval_loss": 0.43087294697761536, "eval_runtime": 146.7738, "eval_samples_per_second": 38.535, "eval_steps_per_second": 4.817, "eval_wer": 0.3443693729839033, "step": 45600 }, { "epoch": 1.7996777869464418, "eval_loss": 0.43208202719688416, "eval_runtime": 147.2129, "eval_samples_per_second": 38.421, "eval_steps_per_second": 4.803, "eval_wer": 0.34249169488533326, "step": 45800 }, { "epoch": 1.8075366419112735, "grad_norm": 1.2847892045974731, "learning_rate": 2.4357575757575755e-05, "loss": 0.2424, "step": 46000 }, { "epoch": 1.8075366419112735, "eval_loss": 0.42999544739723206, "eval_runtime": 147.0735, "eval_samples_per_second": 38.457, "eval_steps_per_second": 4.807, "eval_wer": 0.34075845356357626, "step": 46000 }, { "epoch": 1.8153954968761052, "eval_loss": 0.4301421046257019, "eval_runtime": 146.951, "eval_samples_per_second": 38.489, "eval_steps_per_second": 4.811, "eval_wer": 0.34231516104700616, "step": 46200 }, { "epoch": 1.8232543518409368, "eval_loss": 0.4339451491832733, "eval_runtime": 146.5189, "eval_samples_per_second": 38.603, "eval_steps_per_second": 4.825, "eval_wer": 0.3407424050328192, "step": 46400 }, { "epoch": 1.8271837793233527, "grad_norm": 7.262228965759277, "learning_rate": 2.133333333333333e-05, "loss": 0.228, "step": 46500 }, { "epoch": 1.8311132068057683, "eval_loss": 0.43165403604507446, "eval_runtime": 146.7443, "eval_samples_per_second": 38.543, "eval_steps_per_second": 4.818, "eval_wer": 0.3428929081542585, "step": 46600 }, { "epoch": 1.8389720617706, "eval_loss": 0.43002423644065857, "eval_runtime": 146.6705, "eval_samples_per_second": 38.563, "eval_steps_per_second": 4.82, "eval_wer": 0.34332621848469774, "step": 46800 }, { "epoch": 1.8468309167354318, "grad_norm": 0.922248125076294, "learning_rate": 1.8303030303030302e-05, "loss": 0.2532, "step": 47000 }, { "epoch": 1.8468309167354318, "eval_loss": 0.42492908239364624, "eval_runtime": 147.1617, "eval_samples_per_second": 38.434, "eval_steps_per_second": 4.804, "eval_wer": 0.3439360626534641, "step": 47000 }, { "epoch": 1.8546897717002633, "eval_loss": 0.42566677927970886, "eval_runtime": 147.1363, "eval_samples_per_second": 38.441, "eval_steps_per_second": 4.805, "eval_wer": 0.3430373449310716, "step": 47200 }, { "epoch": 1.8625486266650948, "eval_loss": 0.42639264464378357, "eval_runtime": 147.0021, "eval_samples_per_second": 38.476, "eval_steps_per_second": 4.809, "eval_wer": 0.3408226476866043, "step": 47400 }, { "epoch": 1.8664780541475108, "grad_norm": 0.7899935841560364, "learning_rate": 1.5272727272727273e-05, "loss": 0.2347, "step": 47500 }, { "epoch": 1.8704074816299265, "eval_loss": 0.4254419207572937, "eval_runtime": 146.4448, "eval_samples_per_second": 38.622, "eval_steps_per_second": 4.828, "eval_wer": 0.3408868418096323, "step": 47600 }, { "epoch": 1.8782663365947583, "eval_loss": 0.423650860786438, "eval_runtime": 147.0702, "eval_samples_per_second": 38.458, "eval_steps_per_second": 4.807, "eval_wer": 0.3391215034263613, "step": 47800 }, { "epoch": 1.8861251915595898, "grad_norm": 1.1323833465576172, "learning_rate": 1.2242424242424242e-05, "loss": 0.2265, "step": 48000 }, { "epoch": 1.8861251915595898, "eval_loss": 0.4246509373188019, "eval_runtime": 147.0222, "eval_samples_per_second": 38.47, "eval_steps_per_second": 4.809, "eval_wer": 0.33952271669528655, "step": 48000 }, { "epoch": 1.8939840465244213, "eval_loss": 0.42534753680229187, "eval_runtime": 146.8715, "eval_samples_per_second": 38.51, "eval_steps_per_second": 4.814, "eval_wer": 0.3389128725265202, "step": 48200 }, { "epoch": 1.901842901489253, "eval_loss": 0.4245891273021698, "eval_runtime": 146.4129, "eval_samples_per_second": 38.63, "eval_steps_per_second": 4.829, "eval_wer": 0.33902521224181925, "step": 48400 }, { "epoch": 1.9057723289716688, "grad_norm": 2.10141658782959, "learning_rate": 9.212121212121211e-06, "loss": 0.2262, "step": 48500 }, { "epoch": 1.9097017564540848, "eval_loss": 0.4226687252521515, "eval_runtime": 147.045, "eval_samples_per_second": 38.464, "eval_steps_per_second": 4.808, "eval_wer": 0.3378536694965576, "step": 48600 }, { "epoch": 1.9175606114189163, "eval_loss": 0.4228062033653259, "eval_runtime": 147.4189, "eval_samples_per_second": 38.367, "eval_steps_per_second": 4.796, "eval_wer": 0.33892892105727723, "step": 48800 }, { "epoch": 1.9254194663837478, "grad_norm": 0.8046126365661621, "learning_rate": 6.181818181818182e-06, "loss": 0.2358, "step": 49000 }, { "epoch": 1.9254194663837478, "eval_loss": 0.4225420653820038, "eval_runtime": 147.7497, "eval_samples_per_second": 38.281, "eval_steps_per_second": 4.785, "eval_wer": 0.3391054548956043, "step": 49000 }, { "epoch": 1.9332783213485794, "eval_loss": 0.4224160313606262, "eval_runtime": 147.1221, "eval_samples_per_second": 38.444, "eval_steps_per_second": 4.806, "eval_wer": 0.33902521224181925, "step": 49200 }, { "epoch": 1.941137176313411, "eval_loss": 0.4214831590652466, "eval_runtime": 147.8229, "eval_samples_per_second": 38.262, "eval_steps_per_second": 4.783, "eval_wer": 0.3389931151803052, "step": 49400 }, { "epoch": 1.9450666037958269, "grad_norm": 1.517034888267517, "learning_rate": 3.1575757575757576e-06, "loss": 0.231, "step": 49500 }, { "epoch": 1.9489960312782428, "eval_loss": 0.4215412437915802, "eval_runtime": 147.4583, "eval_samples_per_second": 38.357, "eval_steps_per_second": 4.795, "eval_wer": 0.3399560270257258, "step": 49600 }, { "epoch": 1.9568548862430744, "eval_loss": 0.4211778938770294, "eval_runtime": 146.928, "eval_samples_per_second": 38.495, "eval_steps_per_second": 4.812, "eval_wer": 0.33933013432620246, "step": 49800 }, { "epoch": 1.9647137412079059, "grad_norm": 2.9327681064605713, "learning_rate": 1.2727272727272726e-07, "loss": 0.2331, "step": 50000 }, { "epoch": 1.9647137412079059, "eval_loss": 0.4211583733558655, "eval_runtime": 147.1945, "eval_samples_per_second": 38.425, "eval_steps_per_second": 4.803, "eval_wer": 0.33939432844923045, "step": 50000 }, { "epoch": 1.9647137412079059, "step": 50000, "total_flos": 6.219831968409632e+19, "train_loss": 0.4413083312988281, "train_runtime": 56545.9703, "train_samples_per_second": 7.074, "train_steps_per_second": 0.884 } ], "logging_steps": 500, "max_steps": 50000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.219831968409632e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }