{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.1788282447247436, "eval_steps": 200, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007858854964831625, "eval_loss": 3.1892831325531006, "eval_runtime": 159.5257, "eval_samples_per_second": 35.455, "eval_steps_per_second": 4.432, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.01571770992966325, "eval_loss": 2.780208110809326, "eval_runtime": 157.5706, "eval_samples_per_second": 35.895, "eval_steps_per_second": 4.487, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.01964713741207906, "grad_norm": 4.9997968673706055, "learning_rate": 0.0002982, "loss": 4.719, "step": 500 }, { "epoch": 0.023576564894494872, "eval_loss": 1.4220576286315918, "eval_runtime": 158.6041, "eval_samples_per_second": 35.661, "eval_steps_per_second": 4.458, "eval_wer": 0.8876923817624497, "step": 600 }, { "epoch": 0.0314354198593265, "eval_loss": 1.227359414100647, "eval_runtime": 158.5238, "eval_samples_per_second": 35.679, "eval_steps_per_second": 4.46, "eval_wer": 0.8224390557044503, "step": 800 }, { "epoch": 0.03929427482415812, "grad_norm": 2.6001393795013428, "learning_rate": 0.0002949457627118644, "loss": 1.0441, "step": 1000 }, { "epoch": 0.03929427482415812, "eval_loss": 1.1094719171524048, "eval_runtime": 161.2687, "eval_samples_per_second": 35.072, "eval_steps_per_second": 4.384, "eval_wer": 0.7886729469917029, "step": 1000 }, { "epoch": 0.047153129788989744, "eval_loss": 1.091428279876709, "eval_runtime": 158.6043, "eval_samples_per_second": 35.661, "eval_steps_per_second": 4.458, "eval_wer": 0.7549228868097125, "step": 1200 }, { "epoch": 0.055011984753821366, "eval_loss": 1.0177329778671265, "eval_runtime": 159.8661, "eval_samples_per_second": 35.38, "eval_steps_per_second": 4.422, "eval_wer": 0.7354881160629745, "step": 1400 }, { "epoch": 0.05894141223623718, "grad_norm": 2.7494542598724365, "learning_rate": 0.0002898610169491525, "loss": 0.8033, "step": 1500 }, { "epoch": 0.062870839718653, "eval_loss": 0.9907466769218445, "eval_runtime": 159.6438, "eval_samples_per_second": 35.429, "eval_steps_per_second": 4.429, "eval_wer": 0.7232912326876475, "step": 1600 }, { "epoch": 0.07072969468348461, "eval_loss": 0.9761303067207336, "eval_runtime": 159.1571, "eval_samples_per_second": 35.537, "eval_steps_per_second": 4.442, "eval_wer": 0.7145287348943204, "step": 1800 }, { "epoch": 0.07858854964831624, "grad_norm": 2.753251314163208, "learning_rate": 0.00028477627118644064, "loss": 0.7227, "step": 2000 }, { "epoch": 0.07858854964831624, "eval_loss": 0.9555273056030273, "eval_runtime": 159.3414, "eval_samples_per_second": 35.496, "eval_steps_per_second": 4.437, "eval_wer": 0.6902794049204796, "step": 2000 }, { "epoch": 0.08644740461314787, "eval_loss": 0.8994919061660767, "eval_runtime": 159.0426, "eval_samples_per_second": 35.563, "eval_steps_per_second": 4.445, "eval_wer": 0.6747765242092086, "step": 2200 }, { "epoch": 0.09430625957797949, "eval_loss": 0.8897404670715332, "eval_runtime": 158.5492, "eval_samples_per_second": 35.673, "eval_steps_per_second": 4.459, "eval_wer": 0.66655967646162, "step": 2400 }, { "epoch": 0.0982356870603953, "grad_norm": 2.404499053955078, "learning_rate": 0.00027969152542372877, "loss": 0.6794, "step": 2500 }, { "epoch": 0.10216511454281112, "eval_loss": 0.8826168775558472, "eval_runtime": 159.8456, "eval_samples_per_second": 35.384, "eval_steps_per_second": 4.423, "eval_wer": 0.6559676461619939, "step": 2600 }, { "epoch": 0.11002396950764273, "eval_loss": 0.8744593858718872, "eval_runtime": 159.6838, "eval_samples_per_second": 35.42, "eval_steps_per_second": 4.427, "eval_wer": 0.6445571407937604, "step": 2800 }, { "epoch": 0.11788282447247436, "grad_norm": 2.406255006790161, "learning_rate": 0.00027460677966101695, "loss": 0.6513, "step": 3000 }, { "epoch": 0.11788282447247436, "eval_loss": 0.8450209498405457, "eval_runtime": 159.2776, "eval_samples_per_second": 35.51, "eval_steps_per_second": 4.439, "eval_wer": 0.6436905201328819, "step": 3000 }, { "epoch": 0.125741679437306, "eval_loss": 0.8596389293670654, "eval_runtime": 159.3028, "eval_samples_per_second": 35.505, "eval_steps_per_second": 4.438, "eval_wer": 0.6510888928118631, "step": 3200 }, { "epoch": 0.13360053440213762, "eval_loss": 0.8597909212112427, "eval_runtime": 159.9064, "eval_samples_per_second": 35.371, "eval_steps_per_second": 4.421, "eval_wer": 0.6376402240374893, "step": 3400 }, { "epoch": 0.1375299618845534, "grad_norm": 2.2046961784362793, "learning_rate": 0.000269522033898305, "loss": 0.6147, "step": 3500 }, { "epoch": 0.14145938936696922, "eval_loss": 0.8516111969947815, "eval_runtime": 160.4338, "eval_samples_per_second": 35.254, "eval_steps_per_second": 4.407, "eval_wer": 0.6375439328529473, "step": 3600 }, { "epoch": 0.14931824433180085, "eval_loss": 0.8251617550849915, "eval_runtime": 160.6004, "eval_samples_per_second": 35.218, "eval_steps_per_second": 4.402, "eval_wer": 0.6100367511354335, "step": 3800 }, { "epoch": 0.15717709929663248, "grad_norm": 1.520897388458252, "learning_rate": 0.0002644372881355932, "loss": 0.6092, "step": 4000 }, { "epoch": 0.15717709929663248, "eval_loss": 0.8579581379890442, "eval_runtime": 159.0993, "eval_samples_per_second": 35.55, "eval_steps_per_second": 4.444, "eval_wer": 0.6822551395419749, "step": 4000 }, { "epoch": 0.1650359542614641, "eval_loss": 0.8204948306083679, "eval_runtime": 159.818, "eval_samples_per_second": 35.39, "eval_steps_per_second": 4.424, "eval_wer": 0.6135674279019756, "step": 4200 }, { "epoch": 0.17289480922629574, "eval_loss": 0.8033376336097717, "eval_runtime": 159.2754, "eval_samples_per_second": 35.511, "eval_steps_per_second": 4.439, "eval_wer": 0.6385068446983678, "step": 4400 }, { "epoch": 0.17682423670871153, "grad_norm": 2.3011837005615234, "learning_rate": 0.00025936271186440674, "loss": 0.5928, "step": 4500 }, { "epoch": 0.18075366419112734, "eval_loss": 0.7927771210670471, "eval_runtime": 160.1079, "eval_samples_per_second": 35.326, "eval_steps_per_second": 4.416, "eval_wer": 0.6005039238657701, "step": 4600 }, { "epoch": 0.18861251915595897, "eval_loss": 0.7911030054092407, "eval_runtime": 160.2559, "eval_samples_per_second": 35.294, "eval_steps_per_second": 4.412, "eval_wer": 0.5923512702412094, "step": 4800 }, { "epoch": 0.1964713741207906, "grad_norm": 6.133739948272705, "learning_rate": 0.0002542779661016949, "loss": 0.5681, "step": 5000 }, { "epoch": 0.1964713741207906, "eval_loss": 0.7968648076057434, "eval_runtime": 160.0012, "eval_samples_per_second": 35.35, "eval_steps_per_second": 4.419, "eval_wer": 0.5944375792396206, "step": 5000 }, { "epoch": 0.20433022908562223, "eval_loss": 0.7932958602905273, "eval_runtime": 159.7818, "eval_samples_per_second": 35.398, "eval_steps_per_second": 4.425, "eval_wer": 0.5898958450353871, "step": 5200 }, { "epoch": 0.21218908405045384, "eval_loss": 0.7830468416213989, "eval_runtime": 160.2841, "eval_samples_per_second": 35.287, "eval_steps_per_second": 4.411, "eval_wer": 0.6012742533421065, "step": 5400 }, { "epoch": 0.21611851153286965, "grad_norm": 2.9641568660736084, "learning_rate": 0.00024919322033898305, "loss": 0.5806, "step": 5500 }, { "epoch": 0.22004793901528547, "eval_loss": 0.7702626585960388, "eval_runtime": 160.806, "eval_samples_per_second": 35.173, "eval_steps_per_second": 4.397, "eval_wer": 0.5789026014668357, "step": 5600 }, { "epoch": 0.2279067939801171, "eval_loss": 0.7665734887123108, "eval_runtime": 160.6796, "eval_samples_per_second": 35.2, "eval_steps_per_second": 4.4, "eval_wer": 0.589831650912359, "step": 5800 }, { "epoch": 0.23576564894494872, "grad_norm": 2.6571083068847656, "learning_rate": 0.00024410847457627117, "loss": 0.5608, "step": 6000 }, { "epoch": 0.23576564894494872, "eval_loss": 0.7580233216285706, "eval_runtime": 160.371, "eval_samples_per_second": 35.268, "eval_steps_per_second": 4.409, "eval_wer": 0.5694500168509573, "step": 6000 }, { "epoch": 0.24362450390978035, "eval_loss": 0.7478851675987244, "eval_runtime": 162.164, "eval_samples_per_second": 34.878, "eval_steps_per_second": 4.36, "eval_wer": 0.5650848164850508, "step": 6200 }, { "epoch": 0.251483358874612, "eval_loss": 0.7638738751411438, "eval_runtime": 160.257, "eval_samples_per_second": 35.293, "eval_steps_per_second": 4.412, "eval_wer": 0.5846640240086021, "step": 6400 }, { "epoch": 0.2554127863570278, "grad_norm": 1.5677289962768555, "learning_rate": 0.0002390237288135593, "loss": 0.5333, "step": 6500 }, { "epoch": 0.2593422138394436, "eval_loss": 0.7297228574752808, "eval_runtime": 160.7223, "eval_samples_per_second": 35.191, "eval_steps_per_second": 4.399, "eval_wer": 0.5676044358139012, "step": 6600 }, { "epoch": 0.26720106880427524, "eval_loss": 0.7441245913505554, "eval_runtime": 160.37, "eval_samples_per_second": 35.268, "eval_steps_per_second": 4.409, "eval_wer": 0.5590345203896583, "step": 6800 }, { "epoch": 0.2750599237691068, "grad_norm": 3.644160032272339, "learning_rate": 0.00023393898305084743, "loss": 0.5406, "step": 7000 }, { "epoch": 0.2750599237691068, "eval_loss": 0.7404661774635315, "eval_runtime": 160.5995, "eval_samples_per_second": 35.218, "eval_steps_per_second": 4.402, "eval_wer": 0.5491165283818267, "step": 7000 }, { "epoch": 0.28291877873393845, "eval_loss": 0.7237815856933594, "eval_runtime": 160.4373, "eval_samples_per_second": 35.254, "eval_steps_per_second": 4.407, "eval_wer": 0.5529039816404808, "step": 7200 }, { "epoch": 0.2907776336987701, "eval_loss": 0.7328305840492249, "eval_runtime": 161.925, "eval_samples_per_second": 34.93, "eval_steps_per_second": 4.366, "eval_wer": 0.5543964950008826, "step": 7400 }, { "epoch": 0.2947070611811859, "grad_norm": 3.6030795574188232, "learning_rate": 0.00022885423728813558, "loss": 0.535, "step": 7500 }, { "epoch": 0.2986364886636017, "eval_loss": 0.7263395190238953, "eval_runtime": 160.6865, "eval_samples_per_second": 35.199, "eval_steps_per_second": 4.4, "eval_wer": 0.5598690439890228, "step": 7600 }, { "epoch": 0.30649534362843334, "eval_loss": 0.7421374320983887, "eval_runtime": 160.2249, "eval_samples_per_second": 35.3, "eval_steps_per_second": 4.413, "eval_wer": 0.5594357336585836, "step": 7800 }, { "epoch": 0.31435419859326497, "grad_norm": 3.376089096069336, "learning_rate": 0.0002237694915254237, "loss": 0.5195, "step": 8000 }, { "epoch": 0.31435419859326497, "eval_loss": 0.7434934377670288, "eval_runtime": 161.1972, "eval_samples_per_second": 35.087, "eval_steps_per_second": 4.386, "eval_wer": 0.5543804464701256, "step": 8000 }, { "epoch": 0.3222130535580966, "eval_loss": 0.7186952233314514, "eval_runtime": 162.677, "eval_samples_per_second": 34.768, "eval_steps_per_second": 4.346, "eval_wer": 0.5423921939946398, "step": 8200 }, { "epoch": 0.3300719085229282, "eval_loss": 0.6976691484451294, "eval_runtime": 159.5716, "eval_samples_per_second": 35.445, "eval_steps_per_second": 4.431, "eval_wer": 0.5353308404615558, "step": 8400 }, { "epoch": 0.33400133600534404, "grad_norm": 1.9758217334747314, "learning_rate": 0.00021868474576271186, "loss": 0.5023, "step": 8500 }, { "epoch": 0.33793076348775986, "eval_loss": 0.6949788928031921, "eval_runtime": 160.6972, "eval_samples_per_second": 35.197, "eval_steps_per_second": 4.4, "eval_wer": 0.5385565951437146, "step": 8600 }, { "epoch": 0.3457896184525915, "eval_loss": 0.7155033946037292, "eval_runtime": 159.9521, "eval_samples_per_second": 35.361, "eval_steps_per_second": 4.42, "eval_wer": 0.5450883471618173, "step": 8800 }, { "epoch": 0.35364847341742306, "grad_norm": 3.3146464824676514, "learning_rate": 0.00021361016949152543, "loss": 0.5106, "step": 9000 }, { "epoch": 0.35364847341742306, "eval_loss": 0.6857195496559143, "eval_runtime": 160.5474, "eval_samples_per_second": 35.229, "eval_steps_per_second": 4.404, "eval_wer": 0.5379467509749483, "step": 9000 }, { "epoch": 0.3615073283822547, "eval_loss": 0.68482905626297, "eval_runtime": 161.0662, "eval_samples_per_second": 35.116, "eval_steps_per_second": 4.389, "eval_wer": 0.5329075123172473, "step": 9200 }, { "epoch": 0.3693661833470863, "eval_loss": 0.6732301712036133, "eval_runtime": 160.6243, "eval_samples_per_second": 35.213, "eval_steps_per_second": 4.402, "eval_wer": 0.5202291730192101, "step": 9400 }, { "epoch": 0.37329561082950213, "grad_norm": 4.61689567565918, "learning_rate": 0.00020852542372881352, "loss": 0.4968, "step": 9500 }, { "epoch": 0.37722503831191795, "eval_loss": 0.6839133501052856, "eval_runtime": 161.2367, "eval_samples_per_second": 35.079, "eval_steps_per_second": 4.385, "eval_wer": 0.5274510118598642, "step": 9600 }, { "epoch": 0.3850838932767496, "eval_loss": 0.6766842603683472, "eval_runtime": 160.827, "eval_samples_per_second": 35.168, "eval_steps_per_second": 4.396, "eval_wer": 0.5198279597502848, "step": 9800 }, { "epoch": 0.3929427482415812, "grad_norm": 3.5624563694000244, "learning_rate": 0.0002034508474576271, "loss": 0.4824, "step": 10000 }, { "epoch": 0.3929427482415812, "eval_loss": 0.6718243956565857, "eval_runtime": 161.1794, "eval_samples_per_second": 35.091, "eval_steps_per_second": 4.386, "eval_wer": 0.5334531623629857, "step": 10000 }, { "epoch": 0.40080160320641284, "eval_loss": 0.6593254804611206, "eval_runtime": 160.9535, "eval_samples_per_second": 35.141, "eval_steps_per_second": 4.393, "eval_wer": 0.5175169713212755, "step": 10200 }, { "epoch": 0.40866045817124447, "eval_loss": 0.6799437403678894, "eval_runtime": 159.6664, "eval_samples_per_second": 35.424, "eval_steps_per_second": 4.428, "eval_wer": 0.5173885830752195, "step": 10400 }, { "epoch": 0.4125898856536603, "grad_norm": 2.189781427383423, "learning_rate": 0.00019836610169491524, "loss": 0.48, "step": 10500 }, { "epoch": 0.4165193131360761, "eval_loss": 0.6662308573722839, "eval_runtime": 160.8779, "eval_samples_per_second": 35.157, "eval_steps_per_second": 4.395, "eval_wer": 0.5128949944632569, "step": 10600 }, { "epoch": 0.42437816810090767, "eval_loss": 0.6619213223457336, "eval_runtime": 160.6185, "eval_samples_per_second": 35.214, "eval_steps_per_second": 4.402, "eval_wer": 0.5005536743111169, "step": 10800 }, { "epoch": 0.4322370230657393, "grad_norm": 10.41739559173584, "learning_rate": 0.00019328135593220337, "loss": 0.4693, "step": 11000 }, { "epoch": 0.4322370230657393, "eval_loss": 0.6576216220855713, "eval_runtime": 160.9844, "eval_samples_per_second": 35.134, "eval_steps_per_second": 4.392, "eval_wer": 0.519940299465584, "step": 11000 }, { "epoch": 0.44009587803057093, "eval_loss": 0.6406122446060181, "eval_runtime": 160.4456, "eval_samples_per_second": 35.252, "eval_steps_per_second": 4.406, "eval_wer": 0.5018696538331916, "step": 11200 }, { "epoch": 0.44795473299540256, "eval_loss": 0.6408420205116272, "eval_runtime": 161.6075, "eval_samples_per_second": 34.998, "eval_steps_per_second": 4.375, "eval_wer": 0.5066039704065093, "step": 11400 }, { "epoch": 0.4518841604778184, "grad_norm": 3.5733156204223633, "learning_rate": 0.00018819661016949152, "loss": 0.4691, "step": 11500 }, { "epoch": 0.4558135879602342, "eval_loss": 0.6476473212242126, "eval_runtime": 161.2518, "eval_samples_per_second": 35.076, "eval_steps_per_second": 4.384, "eval_wer": 0.5019498964869766, "step": 11600 }, { "epoch": 0.4636724429250658, "eval_loss": 0.6423429846763611, "eval_runtime": 161.3676, "eval_samples_per_second": 35.05, "eval_steps_per_second": 4.381, "eval_wer": 0.4945996694002664, "step": 11800 }, { "epoch": 0.47153129788989745, "grad_norm": 2.3962831497192383, "learning_rate": 0.00018311186440677962, "loss": 0.4444, "step": 12000 }, { "epoch": 0.47153129788989745, "eval_loss": 0.6374172568321228, "eval_runtime": 162.3359, "eval_samples_per_second": 34.841, "eval_steps_per_second": 4.355, "eval_wer": 0.4975846961210701, "step": 12000 }, { "epoch": 0.4793901528547291, "eval_loss": 0.6312358379364014, "eval_runtime": 162.5747, "eval_samples_per_second": 34.79, "eval_steps_per_second": 4.349, "eval_wer": 0.4961403283529393, "step": 12200 }, { "epoch": 0.4872490078195607, "eval_loss": 0.6170411109924316, "eval_runtime": 161.58, "eval_samples_per_second": 35.004, "eval_steps_per_second": 4.376, "eval_wer": 0.4818571359792011, "step": 12400 }, { "epoch": 0.4911784353019765, "grad_norm": 2.623764753341675, "learning_rate": 0.0001780372881355932, "loss": 0.4474, "step": 12500 }, { "epoch": 0.49510786278439234, "eval_loss": 0.6300910115242004, "eval_runtime": 164.417, "eval_samples_per_second": 34.4, "eval_steps_per_second": 4.3, "eval_wer": 0.49325159281667763, "step": 12600 }, { "epoch": 0.502966717749224, "eval_loss": 0.6253496408462524, "eval_runtime": 161.3418, "eval_samples_per_second": 35.056, "eval_steps_per_second": 4.382, "eval_wer": 0.4862383848758646, "step": 12800 }, { "epoch": 0.5108255727140556, "grad_norm": 2.9566869735717773, "learning_rate": 0.00017295254237288134, "loss": 0.4471, "step": 13000 }, { "epoch": 0.5108255727140556, "eval_loss": 0.622020959854126, "eval_runtime": 161.5861, "eval_samples_per_second": 35.003, "eval_steps_per_second": 4.375, "eval_wer": 0.4849224053537899, "step": 13000 }, { "epoch": 0.5186844276788872, "eval_loss": 0.6201028823852539, "eval_runtime": 160.9515, "eval_samples_per_second": 35.141, "eval_steps_per_second": 4.393, "eval_wer": 0.48527547303044405, "step": 13200 }, { "epoch": 0.5265432826437189, "eval_loss": 0.6168439984321594, "eval_runtime": 162.0987, "eval_samples_per_second": 34.892, "eval_steps_per_second": 4.362, "eval_wer": 0.4848261141692478, "step": 13400 }, { "epoch": 0.5304727101261346, "grad_norm": 1.5596935749053955, "learning_rate": 0.0001678677966101695, "loss": 0.4323, "step": 13500 }, { "epoch": 0.5344021376085505, "eval_loss": 0.6172667741775513, "eval_runtime": 162.3681, "eval_samples_per_second": 34.834, "eval_steps_per_second": 4.354, "eval_wer": 0.47707467381361235, "step": 13600 }, { "epoch": 0.542260992573382, "eval_loss": 0.603190004825592, "eval_runtime": 161.2926, "eval_samples_per_second": 35.067, "eval_steps_per_second": 4.383, "eval_wer": 0.4656160228531078, "step": 13800 }, { "epoch": 0.5501198475382136, "grad_norm": 2.978868246078491, "learning_rate": 0.0001627830508474576, "loss": 0.4575, "step": 14000 }, { "epoch": 0.5501198475382136, "eval_loss": 0.6097469925880432, "eval_runtime": 161.1042, "eval_samples_per_second": 35.108, "eval_steps_per_second": 4.388, "eval_wer": 0.4678307200975751, "step": 14000 }, { "epoch": 0.5579787025030453, "eval_loss": 0.5970696806907654, "eval_runtime": 161.5846, "eval_samples_per_second": 35.003, "eval_steps_per_second": 4.375, "eval_wer": 0.4673653127056218, "step": 14200 }, { "epoch": 0.5658375574678769, "eval_loss": 0.5976916551589966, "eval_runtime": 161.7136, "eval_samples_per_second": 34.975, "eval_steps_per_second": 4.372, "eval_wer": 0.4697565437884162, "step": 14400 }, { "epoch": 0.5697669849502928, "grad_norm": 3.0501327514648438, "learning_rate": 0.00015769830508474575, "loss": 0.4395, "step": 14500 }, { "epoch": 0.5736964124327085, "eval_loss": 0.6056780815124512, "eval_runtime": 162.5963, "eval_samples_per_second": 34.786, "eval_steps_per_second": 4.348, "eval_wer": 0.4734316573317713, "step": 14600 }, { "epoch": 0.5815552673975402, "eval_loss": 0.582733690738678, "eval_runtime": 162.9467, "eval_samples_per_second": 34.711, "eval_steps_per_second": 4.339, "eval_wer": 0.4574152236362761, "step": 14800 }, { "epoch": 0.5894141223623718, "grad_norm": 4.3484697341918945, "learning_rate": 0.00015261355932203388, "loss": 0.4119, "step": 15000 }, { "epoch": 0.5894141223623718, "eval_loss": 0.5946210622787476, "eval_runtime": 162.2892, "eval_samples_per_second": 34.851, "eval_steps_per_second": 4.356, "eval_wer": 0.4640432668389209, "step": 15000 }, { "epoch": 0.5972729773272034, "eval_loss": 0.602292001247406, "eval_runtime": 161.4334, "eval_samples_per_second": 35.036, "eval_steps_per_second": 4.38, "eval_wer": 0.47707467381361235, "step": 15200 }, { "epoch": 0.605131832292035, "eval_loss": 0.6129310727119446, "eval_runtime": 161.8649, "eval_samples_per_second": 34.943, "eval_steps_per_second": 4.368, "eval_wer": 0.47266132785543485, "step": 15400 }, { "epoch": 0.6090612597744509, "grad_norm": 4.229031085968018, "learning_rate": 0.00014752881355932203, "loss": 0.4125, "step": 15500 }, { "epoch": 0.6129906872568667, "eval_loss": 0.590186595916748, "eval_runtime": 162.4898, "eval_samples_per_second": 34.808, "eval_steps_per_second": 4.351, "eval_wer": 0.45837813548169665, "step": 15600 }, { "epoch": 0.6208495422216983, "eval_loss": 0.5955421328544617, "eval_runtime": 161.8228, "eval_samples_per_second": 34.952, "eval_steps_per_second": 4.369, "eval_wer": 0.46537529489175267, "step": 15800 }, { "epoch": 0.6287083971865299, "grad_norm": 1.4181621074676514, "learning_rate": 0.00014244406779661016, "loss": 0.4039, "step": 16000 }, { "epoch": 0.6287083971865299, "eval_loss": 0.5955237150192261, "eval_runtime": 161.3699, "eval_samples_per_second": 35.05, "eval_steps_per_second": 4.381, "eval_wer": 0.45946943557317327, "step": 16000 }, { "epoch": 0.6365672521513616, "eval_loss": 0.578912079334259, "eval_runtime": 163.2091, "eval_samples_per_second": 34.655, "eval_steps_per_second": 4.332, "eval_wer": 0.4497279774036687, "step": 16200 }, { "epoch": 0.6444261071161932, "eval_loss": 0.5779294371604919, "eval_runtime": 164.0491, "eval_samples_per_second": 34.477, "eval_steps_per_second": 4.31, "eval_wer": 0.4630322094012293, "step": 16400 }, { "epoch": 0.648355534598609, "grad_norm": 2.0229876041412354, "learning_rate": 0.00013736949152542372, "loss": 0.3969, "step": 16500 }, { "epoch": 0.6522849620810248, "eval_loss": 0.5677434802055359, "eval_runtime": 161.201, "eval_samples_per_second": 35.087, "eval_steps_per_second": 4.386, "eval_wer": 0.45507213814575276, "step": 16600 }, { "epoch": 0.6601438170458565, "eval_loss": 0.586939811706543, "eval_runtime": 161.4539, "eval_samples_per_second": 35.032, "eval_steps_per_second": 4.379, "eval_wer": 0.46062492978767794, "step": 16800 }, { "epoch": 0.6680026720106881, "grad_norm": 4.166793346405029, "learning_rate": 0.00013229491525423729, "loss": 0.3923, "step": 17000 }, { "epoch": 0.6680026720106881, "eval_loss": 0.5710186958312988, "eval_runtime": 160.5637, "eval_samples_per_second": 35.226, "eval_steps_per_second": 4.403, "eval_wer": 0.45017733626486495, "step": 17000 }, { "epoch": 0.6758615269755197, "eval_loss": 0.5639811158180237, "eval_runtime": 161.7944, "eval_samples_per_second": 34.958, "eval_steps_per_second": 4.37, "eval_wer": 0.44741698897465937, "step": 17200 }, { "epoch": 0.6837203819403513, "eval_loss": 0.5841760039329529, "eval_runtime": 161.0184, "eval_samples_per_second": 35.126, "eval_steps_per_second": 4.391, "eval_wer": 0.4497921715266967, "step": 17400 }, { "epoch": 0.6876498094227671, "grad_norm": 3.127680778503418, "learning_rate": 0.0001272101694915254, "loss": 0.386, "step": 17500 }, { "epoch": 0.691579236905183, "eval_loss": 0.5596618056297302, "eval_runtime": 160.919, "eval_samples_per_second": 35.148, "eval_steps_per_second": 4.394, "eval_wer": 0.44403074898493045, "step": 17600 }, { "epoch": 0.6994380918700145, "eval_loss": 0.5620830059051514, "eval_runtime": 160.6614, "eval_samples_per_second": 35.204, "eval_steps_per_second": 4.401, "eval_wer": 0.43812488966635105, "step": 17800 }, { "epoch": 0.7072969468348461, "grad_norm": 17.387800216674805, "learning_rate": 0.00012213559322033898, "loss": 0.3851, "step": 18000 }, { "epoch": 0.7072969468348461, "eval_loss": 0.566453218460083, "eval_runtime": 161.6574, "eval_samples_per_second": 34.988, "eval_steps_per_second": 4.373, "eval_wer": 0.434562115838295, "step": 18000 }, { "epoch": 0.7151558017996777, "eval_loss": 0.5572646260261536, "eval_runtime": 162.4898, "eval_samples_per_second": 34.808, "eval_steps_per_second": 4.351, "eval_wer": 0.4356213188682576, "step": 18200 }, { "epoch": 0.7230146567645094, "eval_loss": 0.5548349022865295, "eval_runtime": 161.0153, "eval_samples_per_second": 35.127, "eval_steps_per_second": 4.391, "eval_wer": 0.4344337275922389, "step": 18400 }, { "epoch": 0.7269440842469252, "grad_norm": 9.4507417678833, "learning_rate": 0.00011705084745762712, "loss": 0.369, "step": 18500 }, { "epoch": 0.730873511729341, "eval_loss": 0.5616690516471863, "eval_runtime": 161.4318, "eval_samples_per_second": 35.036, "eval_steps_per_second": 4.38, "eval_wer": 0.43637559981383706, "step": 18600 }, { "epoch": 0.7387323666941726, "eval_loss": 0.5595532655715942, "eval_runtime": 160.8301, "eval_samples_per_second": 35.168, "eval_steps_per_second": 4.396, "eval_wer": 0.4393927235961548, "step": 18800 }, { "epoch": 0.7465912216590043, "grad_norm": 1.8793506622314453, "learning_rate": 0.00011196610169491524, "loss": 0.3738, "step": 19000 }, { "epoch": 0.7465912216590043, "eval_loss": 0.549248218536377, "eval_runtime": 161.3194, "eval_samples_per_second": 35.061, "eval_steps_per_second": 4.383, "eval_wer": 0.42923400362696795, "step": 19000 }, { "epoch": 0.7544500766238359, "eval_loss": 0.5478147268295288, "eval_runtime": 162.2231, "eval_samples_per_second": 34.866, "eval_steps_per_second": 4.358, "eval_wer": 0.4372261719439585, "step": 19200 }, { "epoch": 0.7623089315886675, "eval_loss": 0.5375632047653198, "eval_runtime": 161.0297, "eval_samples_per_second": 35.124, "eval_steps_per_second": 4.39, "eval_wer": 0.42873649917350065, "step": 19400 }, { "epoch": 0.7662383590710834, "grad_norm": 2.159616708755493, "learning_rate": 0.00010688135593220338, "loss": 0.368, "step": 19500 }, { "epoch": 0.7701677865534992, "eval_loss": 0.5282244086265564, "eval_runtime": 163.0357, "eval_samples_per_second": 34.692, "eval_steps_per_second": 4.336, "eval_wer": 0.4193481086806503, "step": 19600 }, { "epoch": 0.7780266415183308, "eval_loss": 0.5348193049430847, "eval_runtime": 162.5531, "eval_samples_per_second": 34.795, "eval_steps_per_second": 4.349, "eval_wer": 0.42507743416090255, "step": 19800 }, { "epoch": 0.7858854964831624, "grad_norm": 2.2020351886749268, "learning_rate": 0.00010179661016949151, "loss": 0.3629, "step": 20000 }, { "epoch": 0.7858854964831624, "eval_loss": 0.5367931723594666, "eval_runtime": 162.0053, "eval_samples_per_second": 34.912, "eval_steps_per_second": 4.364, "eval_wer": 0.43130426409462214, "step": 20000 }, { "epoch": 0.793744351447994, "eval_loss": 0.5550614595413208, "eval_runtime": 161.9948, "eval_samples_per_second": 34.915, "eval_steps_per_second": 4.364, "eval_wer": 0.44123830463321084, "step": 20200 }, { "epoch": 0.8016032064128257, "eval_loss": 0.5251778364181519, "eval_runtime": 162.6214, "eval_samples_per_second": 34.78, "eval_steps_per_second": 4.348, "eval_wer": 0.4105214167642952, "step": 20400 }, { "epoch": 0.8055326338952414, "grad_norm": 2.7725887298583984, "learning_rate": 9.671186440677966e-05, "loss": 0.3638, "step": 20500 }, { "epoch": 0.8094620613776573, "eval_loss": 0.5242481827735901, "eval_runtime": 162.5731, "eval_samples_per_second": 34.791, "eval_steps_per_second": 4.349, "eval_wer": 0.41174110510182793, "step": 20600 }, { "epoch": 0.8173209163424889, "eval_loss": 0.5233432054519653, "eval_runtime": 161.9438, "eval_samples_per_second": 34.926, "eval_steps_per_second": 4.366, "eval_wer": 0.4165877613904447, "step": 20800 }, { "epoch": 0.8251797713073206, "grad_norm": 2.733196496963501, "learning_rate": 9.162711864406779e-05, "loss": 0.3512, "step": 21000 }, { "epoch": 0.8251797713073206, "eval_loss": 0.524342954158783, "eval_runtime": 161.947, "eval_samples_per_second": 34.925, "eval_steps_per_second": 4.366, "eval_wer": 0.4160581598754634, "step": 21000 }, { "epoch": 0.8330386262721522, "eval_loss": 0.5150259733200073, "eval_runtime": 162.0793, "eval_samples_per_second": 34.896, "eval_steps_per_second": 4.362, "eval_wer": 0.4123028036783232, "step": 21200 }, { "epoch": 0.8408974812369838, "eval_loss": 0.5088914632797241, "eval_runtime": 161.2392, "eval_samples_per_second": 35.078, "eval_steps_per_second": 4.385, "eval_wer": 0.4079536518431738, "step": 21400 }, { "epoch": 0.8448269087193996, "grad_norm": 4.562708377838135, "learning_rate": 8.654237288135593e-05, "loss": 0.3536, "step": 21500 }, { "epoch": 0.8487563362018153, "eval_loss": 0.515373170375824, "eval_runtime": 162.8063, "eval_samples_per_second": 34.741, "eval_steps_per_second": 4.343, "eval_wer": 0.40899680634237934, "step": 21600 }, { "epoch": 0.856615191166647, "eval_loss": 0.5161571502685547, "eval_runtime": 162.7678, "eval_samples_per_second": 34.749, "eval_steps_per_second": 4.344, "eval_wer": 0.4091893887114635, "step": 21800 }, { "epoch": 0.8644740461314786, "grad_norm": 2.272256374359131, "learning_rate": 8.146779661016948e-05, "loss": 0.3464, "step": 22000 }, { "epoch": 0.8644740461314786, "eval_loss": 0.5097736716270447, "eval_runtime": 162.1935, "eval_samples_per_second": 34.872, "eval_steps_per_second": 4.359, "eval_wer": 0.40527354720675324, "step": 22000 }, { "epoch": 0.8723329010963102, "eval_loss": 0.5069981813430786, "eval_runtime": 162.5966, "eval_samples_per_second": 34.785, "eval_steps_per_second": 4.348, "eval_wer": 0.4022724719551925, "step": 22200 }, { "epoch": 0.8801917560611419, "eval_loss": 0.5070444345474243, "eval_runtime": 162.5617, "eval_samples_per_second": 34.793, "eval_steps_per_second": 4.349, "eval_wer": 0.40707098265153824, "step": 22400 }, { "epoch": 0.8841211835435577, "grad_norm": 2.9740068912506104, "learning_rate": 7.638305084745762e-05, "loss": 0.3377, "step": 22500 }, { "epoch": 0.8880506110259735, "eval_loss": 0.5028176307678223, "eval_runtime": 162.4451, "eval_samples_per_second": 34.818, "eval_steps_per_second": 4.352, "eval_wer": 0.39670363178251034, "step": 22600 }, { "epoch": 0.8959094659908051, "eval_loss": 0.5036062002182007, "eval_runtime": 162.5763, "eval_samples_per_second": 34.79, "eval_steps_per_second": 4.349, "eval_wer": 0.39784307746625797, "step": 22800 }, { "epoch": 0.9037683209556368, "grad_norm": 1.9388916492462158, "learning_rate": 7.129830508474575e-05, "loss": 0.3272, "step": 23000 }, { "epoch": 0.9037683209556368, "eval_loss": 0.5020586848258972, "eval_runtime": 161.6894, "eval_samples_per_second": 34.981, "eval_steps_per_second": 4.373, "eval_wer": 0.39538765226043554, "step": 23000 }, { "epoch": 0.9116271759204684, "eval_loss": 0.5032612085342407, "eval_runtime": 163.6786, "eval_samples_per_second": 34.556, "eval_steps_per_second": 4.319, "eval_wer": 0.3984529216350243, "step": 23200 }, { "epoch": 0.9194860308853, "eval_loss": 0.49842530488967896, "eval_runtime": 162.0701, "eval_samples_per_second": 34.898, "eval_steps_per_second": 4.362, "eval_wer": 0.3971850877052206, "step": 23400 }, { "epoch": 0.9234154583677158, "grad_norm": 3.9436373710632324, "learning_rate": 6.621355932203389e-05, "loss": 0.319, "step": 23500 }, { "epoch": 0.9273448858501316, "eval_loss": 0.4928737282752991, "eval_runtime": 163.9597, "eval_samples_per_second": 34.496, "eval_steps_per_second": 4.312, "eval_wer": 0.39243472260114587, "step": 23600 }, { "epoch": 0.9352037408149633, "eval_loss": 0.49405232071876526, "eval_runtime": 161.8803, "eval_samples_per_second": 34.939, "eval_steps_per_second": 4.367, "eval_wer": 0.4013095601097719, "step": 23800 }, { "epoch": 0.9430625957797949, "grad_norm": 3.4186201095581055, "learning_rate": 6.112881355932203e-05, "loss": 0.3184, "step": 24000 }, { "epoch": 0.9430625957797949, "eval_loss": 0.4856198728084564, "eval_runtime": 163.6122, "eval_samples_per_second": 34.57, "eval_steps_per_second": 4.321, "eval_wer": 0.387411532474202, "step": 24000 }, { "epoch": 0.9509214507446265, "eval_loss": 0.48915818333625793, "eval_runtime": 162.8317, "eval_samples_per_second": 34.735, "eval_steps_per_second": 4.342, "eval_wer": 0.3913755195711833, "step": 24200 }, { "epoch": 0.9587803057094582, "eval_loss": 0.48598504066467285, "eval_runtime": 160.6269, "eval_samples_per_second": 35.212, "eval_steps_per_second": 4.402, "eval_wer": 0.3813772849095665, "step": 24400 }, { "epoch": 0.9627097331918739, "grad_norm": 2.70164155960083, "learning_rate": 5.6044067796610164e-05, "loss": 0.3091, "step": 24500 }, { "epoch": 0.9666391606742898, "eval_loss": 0.4825168251991272, "eval_runtime": 162.6242, "eval_samples_per_second": 34.78, "eval_steps_per_second": 4.347, "eval_wer": 0.38336730272343567, "step": 24600 }, { "epoch": 0.9744980156391214, "eval_loss": 0.4784228205680847, "eval_runtime": 162.0189, "eval_samples_per_second": 34.91, "eval_steps_per_second": 4.364, "eval_wer": 0.3866893485901366, "step": 24800 }, { "epoch": 0.982356870603953, "grad_norm": 9.408166885375977, "learning_rate": 5.096949152542373e-05, "loss": 0.3154, "step": 25000 }, { "epoch": 0.982356870603953, "eval_loss": 0.47507792711257935, "eval_runtime": 161.9422, "eval_samples_per_second": 34.926, "eval_steps_per_second": 4.366, "eval_wer": 0.3807834892715572, "step": 25000 }, { "epoch": 0.9902157255687847, "eval_loss": 0.4778765141963959, "eval_runtime": 162.3405, "eval_samples_per_second": 34.84, "eval_steps_per_second": 4.355, "eval_wer": 0.38492401020686556, "step": 25200 }, { "epoch": 0.9980745805336163, "eval_loss": 0.477267324924469, "eval_runtime": 161.2107, "eval_samples_per_second": 35.085, "eval_steps_per_second": 4.386, "eval_wer": 0.38084768339458525, "step": 25400 }, { "epoch": 1.002004008016032, "grad_norm": 0.7003775835037231, "learning_rate": 4.589491525423728e-05, "loss": 0.312, "step": 25500 }, { "epoch": 1.005933435498448, "eval_loss": 0.47774726152420044, "eval_runtime": 160.8535, "eval_samples_per_second": 35.162, "eval_steps_per_second": 4.395, "eval_wer": 0.3757923962061273, "step": 25600 }, { "epoch": 1.0137922904632795, "eval_loss": 0.4752050042152405, "eval_runtime": 159.7765, "eval_samples_per_second": 35.399, "eval_steps_per_second": 4.425, "eval_wer": 0.3820513232013609, "step": 25800 }, { "epoch": 1.0216511454281112, "grad_norm": 0.702942430973053, "learning_rate": 4.081016949152542e-05, "loss": 0.2651, "step": 26000 }, { "epoch": 1.0216511454281112, "eval_loss": 0.4700838327407837, "eval_runtime": 163.2858, "eval_samples_per_second": 34.639, "eval_steps_per_second": 4.33, "eval_wer": 0.37750958899712733, "step": 26000 }, { "epoch": 1.0295100003929427, "eval_loss": 0.47011885046958923, "eval_runtime": 160.7741, "eval_samples_per_second": 35.18, "eval_steps_per_second": 4.397, "eval_wer": 0.3760652212289965, "step": 26200 }, { "epoch": 1.0373688553577745, "eval_loss": 0.471804678440094, "eval_runtime": 160.2455, "eval_samples_per_second": 35.296, "eval_steps_per_second": 4.412, "eval_wer": 0.37755773458939834, "step": 26400 }, { "epoch": 1.0412982828401902, "grad_norm": 0.98069828748703, "learning_rate": 3.572542372881355e-05, "loss": 0.2627, "step": 26500 }, { "epoch": 1.045227710322606, "eval_loss": 0.4638473391532898, "eval_runtime": 160.1121, "eval_samples_per_second": 35.325, "eval_steps_per_second": 4.416, "eval_wer": 0.37296785479289374, "step": 26600 }, { "epoch": 1.0530865652874377, "eval_loss": 0.4677112400531769, "eval_runtime": 159.9389, "eval_samples_per_second": 35.364, "eval_steps_per_second": 4.42, "eval_wer": 0.3720370400089872, "step": 26800 }, { "epoch": 1.0609454202522692, "grad_norm": 0.8780287504196167, "learning_rate": 3.0640677966101693e-05, "loss": 0.2427, "step": 27000 }, { "epoch": 1.0609454202522692, "eval_loss": 0.4642546474933624, "eval_runtime": 160.0541, "eval_samples_per_second": 35.338, "eval_steps_per_second": 4.417, "eval_wer": 0.36985443982603394, "step": 27000 }, { "epoch": 1.0688042752171008, "eval_loss": 0.46017909049987793, "eval_runtime": 159.9066, "eval_samples_per_second": 35.371, "eval_steps_per_second": 4.421, "eval_wer": 0.3713469531864358, "step": 27200 }, { "epoch": 1.0766631301819325, "eval_loss": 0.46644654870033264, "eval_runtime": 160.7516, "eval_samples_per_second": 35.185, "eval_steps_per_second": 4.398, "eval_wer": 0.3703037986872302, "step": 27400 }, { "epoch": 1.0805925576643483, "grad_norm": 0.8659859895706177, "learning_rate": 2.556610169491525e-05, "loss": 0.2464, "step": 27500 }, { "epoch": 1.0845219851467642, "eval_loss": 0.4609028100967407, "eval_runtime": 161.4502, "eval_samples_per_second": 35.032, "eval_steps_per_second": 4.379, "eval_wer": 0.36770393670459467, "step": 27600 }, { "epoch": 1.0923808401115958, "eval_loss": 0.4613707363605499, "eval_runtime": 160.5963, "eval_samples_per_second": 35.219, "eval_steps_per_second": 4.402, "eval_wer": 0.3687310426730433, "step": 27800 }, { "epoch": 1.1002396950764273, "grad_norm": 1.6944918632507324, "learning_rate": 2.0481355932203388e-05, "loss": 0.2537, "step": 28000 }, { "epoch": 1.1002396950764273, "eval_loss": 0.45553678274154663, "eval_runtime": 160.1154, "eval_samples_per_second": 35.325, "eval_steps_per_second": 4.416, "eval_wer": 0.36545714239861343, "step": 28000 }, { "epoch": 1.108098550041259, "eval_loss": 0.456032931804657, "eval_runtime": 160.97, "eval_samples_per_second": 35.137, "eval_steps_per_second": 4.392, "eval_wer": 0.36447818202243587, "step": 28200 }, { "epoch": 1.1159574050060905, "eval_loss": 0.45427000522613525, "eval_runtime": 160.1348, "eval_samples_per_second": 35.32, "eval_steps_per_second": 4.415, "eval_wer": 0.36261655245462276, "step": 28400 }, { "epoch": 1.1198868324885065, "grad_norm": 0.8318812251091003, "learning_rate": 1.5396610169491525e-05, "loss": 0.2313, "step": 28500 }, { "epoch": 1.1238162599709223, "eval_loss": 0.45402956008911133, "eval_runtime": 160.7545, "eval_samples_per_second": 35.184, "eval_steps_per_second": 4.398, "eval_wer": 0.3631461539696041, "step": 28600 }, { "epoch": 1.1316751149357538, "eval_loss": 0.4536111354827881, "eval_runtime": 165.4654, "eval_samples_per_second": 34.182, "eval_steps_per_second": 4.273, "eval_wer": 0.3626326009853798, "step": 28800 }, { "epoch": 1.1395339699005855, "grad_norm": 0.7866860032081604, "learning_rate": 1.031186440677966e-05, "loss": 0.2451, "step": 29000 }, { "epoch": 1.1395339699005855, "eval_loss": 0.45293620228767395, "eval_runtime": 160.3649, "eval_samples_per_second": 35.27, "eval_steps_per_second": 4.409, "eval_wer": 0.3617338832629873, "step": 29000 }, { "epoch": 1.147392824865417, "eval_loss": 0.4530145823955536, "eval_runtime": 160.576, "eval_samples_per_second": 35.223, "eval_steps_per_second": 4.403, "eval_wer": 0.3598401566336602, "step": 29200 }, { "epoch": 1.1552516798302488, "eval_loss": 0.4515323042869568, "eval_runtime": 160.1136, "eval_samples_per_second": 35.325, "eval_steps_per_second": 4.416, "eval_wer": 0.3591500698111088, "step": 29400 }, { "epoch": 1.1591811073126645, "grad_norm": 3.2193210124969482, "learning_rate": 5.227118644067796e-06, "loss": 0.2445, "step": 29500 }, { "epoch": 1.1631105347950803, "eval_loss": 0.451358437538147, "eval_runtime": 160.6595, "eval_samples_per_second": 35.205, "eval_steps_per_second": 4.401, "eval_wer": 0.3590056330342957, "step": 29600 }, { "epoch": 1.170969389759912, "eval_loss": 0.4514302611351013, "eval_runtime": 160.1434, "eval_samples_per_second": 35.318, "eval_steps_per_second": 4.415, "eval_wer": 0.3588772447882396, "step": 29800 }, { "epoch": 1.1788282447247436, "grad_norm": 0.5669330358505249, "learning_rate": 1.423728813559322e-07, "loss": 0.2364, "step": 30000 }, { "epoch": 1.1788282447247436, "eval_loss": 0.4510672390460968, "eval_runtime": 160.6855, "eval_samples_per_second": 35.199, "eval_steps_per_second": 4.4, "eval_wer": 0.3591179727495948, "step": 30000 }, { "epoch": 1.1788282447247436, "step": 30000, "total_flos": 3.731985674211105e+19, "train_loss": 0.5082863594055176, "train_runtime": 37313.8627, "train_samples_per_second": 6.432, "train_steps_per_second": 0.804 } ], "logging_steps": 500, "max_steps": 30000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.731985674211105e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }