diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 7.858854964831624, + "epoch": 7.65345170671973, "eval_steps": 1000, "global_step": 200000, "is_hyper_param_search": false, @@ -9,4613 +9,4613 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.01964713741207906, - "grad_norm": 3.3487088680267334, + "epoch": 0.019133629266799325, + "grad_norm": 2.1359012126922607, "learning_rate": 0.0002982, - "loss": 4.7396, + "loss": 4.6329, "step": 500 }, { - "epoch": 0.03929427482415812, - "grad_norm": 4.3209547996521, - "learning_rate": 0.00029925263157894736, - "loss": 1.092, + "epoch": 0.03826725853359865, + "grad_norm": 2.5346407890319824, + "learning_rate": 0.00029925413533834583, + "loss": 1.059, "step": 1000 }, { - "epoch": 0.03929427482415812, - "eval_loss": 1.1751947402954102, - "eval_runtime": 144.9265, - "eval_samples_per_second": 39.027, - "eval_steps_per_second": 4.878, - "eval_wer": 0.8087496589687214, + "epoch": 0.03826725853359865, + "eval_loss": Infinity, + "eval_runtime": 176.6488, + "eval_samples_per_second": 39.751, + "eval_steps_per_second": 4.97, + "eval_wer": 0.7912656018616459, "step": 1000 }, { - "epoch": 0.05894141223623718, - "grad_norm": 2.453352451324463, - "learning_rate": 0.0002985007518796992, - "loss": 0.8198, + "epoch": 0.05740088780039798, + "grad_norm": 2.2379095554351807, + "learning_rate": 0.0002985022556390977, + "loss": 0.8241, "step": 1500 }, { - "epoch": 0.07858854964831624, - "grad_norm": 4.910336494445801, - "learning_rate": 0.0002977488721804511, - "loss": 0.7307, + "epoch": 0.0765345170671973, + "grad_norm": 3.605928659439087, + "learning_rate": 0.0002977503759398496, + "loss": 0.7527, "step": 2000 }, { - "epoch": 0.07858854964831624, - "eval_loss": 0.9363270998001099, - "eval_runtime": 143.2813, - "eval_samples_per_second": 39.475, - "eval_steps_per_second": 4.934, - "eval_wer": 0.6985123011988252, + "epoch": 0.0765345170671973, + "eval_loss": Infinity, + "eval_runtime": 174.864, + "eval_samples_per_second": 40.157, + "eval_steps_per_second": 5.021, + "eval_wer": 0.7347419081870108, "step": 2000 }, { - "epoch": 0.0982356870603953, - "grad_norm": 2.127075672149658, - "learning_rate": 0.00029699699248120296, - "loss": 0.6921, + "epoch": 0.09566814633399663, + "grad_norm": 3.9373598098754883, + "learning_rate": 0.0002969984962406015, + "loss": 0.6967, "step": 2500 }, { - "epoch": 0.11788282447247436, - "grad_norm": 2.57724928855896, - "learning_rate": 0.0002962451127819549, - "loss": 0.6633, + "epoch": 0.11480177560079596, + "grad_norm": 5.641207218170166, + "learning_rate": 0.00029624661654135335, + "loss": 0.6861, "step": 3000 }, { - "epoch": 0.11788282447247436, - "eval_loss": 0.8888041377067566, - "eval_runtime": 144.2307, - "eval_samples_per_second": 39.215, - "eval_steps_per_second": 4.902, - "eval_wer": 0.6624352040570686, + "epoch": 0.11480177560079596, + "eval_loss": Infinity, + "eval_runtime": 174.8654, + "eval_samples_per_second": 40.157, + "eval_steps_per_second": 5.021, + "eval_wer": 0.6766976940977364, "step": 3000 }, { - "epoch": 0.1375299618845534, - "grad_norm": 3.0028977394104004, - "learning_rate": 0.00029549323308270675, - "loss": 0.6395, + "epoch": 0.13393540486759528, + "grad_norm": 4.324626922607422, + "learning_rate": 0.0002954947368421052, + "loss": 0.6736, "step": 3500 }, { - "epoch": 0.15717709929663248, - "grad_norm": 1.864983320236206, - "learning_rate": 0.0002947413533834586, - "loss": 0.6273, + "epoch": 0.1530690341343946, + "grad_norm": 5.716803550720215, + "learning_rate": 0.0002947443609022556, + "loss": 0.651, "step": 4000 }, { - "epoch": 0.15717709929663248, - "eval_loss": 0.8302789330482483, - "eval_runtime": 143.3729, - "eval_samples_per_second": 39.45, - "eval_steps_per_second": 4.931, - "eval_wer": 0.640432668389209, + "epoch": 0.1530690341343946, + "eval_loss": Infinity, + "eval_runtime": 175.6143, + "eval_samples_per_second": 39.985, + "eval_steps_per_second": 5.0, + "eval_wer": 0.646181510471758, "step": 4000 }, { - "epoch": 0.17682423670871153, - "grad_norm": 1.865403175354004, - "learning_rate": 0.000293990977443609, - "loss": 0.6196, + "epoch": 0.17220266340119394, + "grad_norm": 6.054178714752197, + "learning_rate": 0.000293993984962406, + "loss": 0.6308, "step": 4500 }, { - "epoch": 0.1964713741207906, - "grad_norm": 2.015836000442505, - "learning_rate": 0.0002932390977443609, - "loss": 0.6031, + "epoch": 0.19133629266799326, + "grad_norm": 3.1400604248046875, + "learning_rate": 0.00029324210526315787, + "loss": 0.6372, "step": 5000 }, { - "epoch": 0.1964713741207906, - "eval_loss": 0.7927883267402649, - "eval_runtime": 142.89, - "eval_samples_per_second": 39.583, - "eval_steps_per_second": 4.948, - "eval_wer": 0.6134229911251625, + "epoch": 0.19133629266799326, + "eval_loss": Infinity, + "eval_runtime": 176.1091, + "eval_samples_per_second": 39.873, + "eval_steps_per_second": 4.986, + "eval_wer": 0.6245107890839856, "step": 5000 }, { - "epoch": 0.21611851153286965, - "grad_norm": 2.023449420928955, - "learning_rate": 0.00029248721804511275, - "loss": 0.6089, + "epoch": 0.21046992193479258, + "grad_norm": 3.961786985397339, + "learning_rate": 0.00029249022556390974, + "loss": 0.6101, "step": 5500 }, { - "epoch": 0.23576564894494872, - "grad_norm": 2.1012802124023438, - "learning_rate": 0.00029173533834586467, - "loss": 0.6007, + "epoch": 0.22960355120159193, + "grad_norm": 2.590359687805176, + "learning_rate": 0.0002917383458646616, + "loss": 0.6078, "step": 6000 }, { - "epoch": 0.23576564894494872, - "eval_loss": 0.771953821182251, - "eval_runtime": 144.3238, - "eval_samples_per_second": 39.19, - "eval_steps_per_second": 4.899, - "eval_wer": 0.5832357047712282, + "epoch": 0.22960355120159193, + "eval_loss": Infinity, + "eval_runtime": 176.6935, + "eval_samples_per_second": 39.741, + "eval_steps_per_second": 4.969, + "eval_wer": 0.5931880685424159, "step": 6000 }, { - "epoch": 0.2554127863570278, - "grad_norm": 1.6933408975601196, - "learning_rate": 0.0002909834586466165, - "loss": 0.5693, + "epoch": 0.24873718046839124, + "grad_norm": 3.1479835510253906, + "learning_rate": 0.00029098646616541353, + "loss": 0.601, "step": 6500 }, { - "epoch": 0.2750599237691068, - "grad_norm": 1.8497469425201416, - "learning_rate": 0.0002902315789473684, - "loss": 0.5739, + "epoch": 0.26787080973519056, + "grad_norm": 4.320711135864258, + "learning_rate": 0.0002902345864661654, + "loss": 0.6006, "step": 7000 }, { - "epoch": 0.2750599237691068, - "eval_loss": 0.7533236145973206, - "eval_runtime": 144.3945, - "eval_samples_per_second": 39.17, - "eval_steps_per_second": 4.896, - "eval_wer": 0.5685673476593218, + "epoch": 0.26787080973519056, + "eval_loss": Infinity, + "eval_runtime": 176.7371, + "eval_samples_per_second": 39.731, + "eval_steps_per_second": 4.968, + "eval_wer": 0.6101253437698329, "step": 7000 }, { - "epoch": 0.2947070611811859, - "grad_norm": 2.9493396282196045, - "learning_rate": 0.0002894796992481203, - "loss": 0.57, + "epoch": 0.2870044390019899, + "grad_norm": 3.0556020736694336, + "learning_rate": 0.00028948270676691727, + "loss": 0.5943, "step": 7500 }, { - "epoch": 0.31435419859326497, - "grad_norm": 3.0881130695343018, - "learning_rate": 0.00028872781954887214, - "loss": 0.5655, + "epoch": 0.3061380682687892, + "grad_norm": 1.8571085929870605, + "learning_rate": 0.00028873082706766913, + "loss": 0.6008, "step": 8000 }, { - "epoch": 0.31435419859326497, - "eval_loss": 0.7523130178451538, - "eval_runtime": 143.9033, - "eval_samples_per_second": 39.304, - "eval_steps_per_second": 4.913, - "eval_wer": 0.5594517821893406, + "epoch": 0.3061380682687892, + "eval_loss": Infinity, + "eval_runtime": 176.2913, + "eval_samples_per_second": 39.832, + "eval_steps_per_second": 4.98, + "eval_wer": 0.5832055214723927, "step": 8000 }, { - "epoch": 0.33400133600534404, - "grad_norm": 2.8726112842559814, - "learning_rate": 0.000287975939849624, - "loss": 0.5541, + "epoch": 0.32527169753558854, + "grad_norm": 2.2670278549194336, + "learning_rate": 0.00028797894736842106, + "loss": 0.5828, "step": 8500 }, { - "epoch": 0.35364847341742306, - "grad_norm": 2.6348774433135986, - "learning_rate": 0.0002872255639097744, - "loss": 0.5584, + "epoch": 0.3444053268023879, + "grad_norm": 3.5902249813079834, + "learning_rate": 0.00028722706766917287, + "loss": 0.592, "step": 9000 }, { - "epoch": 0.35364847341742306, - "eval_loss": 0.7173847556114197, - "eval_runtime": 144.396, - "eval_samples_per_second": 39.17, - "eval_steps_per_second": 4.896, - "eval_wer": 0.5668020092760507, + "epoch": 0.3444053268023879, + "eval_loss": Infinity, + "eval_runtime": 177.2597, + "eval_samples_per_second": 39.614, + "eval_steps_per_second": 4.953, + "eval_wer": 0.5834302940554262, "step": 9000 }, { - "epoch": 0.37329561082950213, - "grad_norm": 3.332054853439331, - "learning_rate": 0.00028647368421052627, - "loss": 0.5485, + "epoch": 0.3635389560691872, + "grad_norm": 1.5114198923110962, + "learning_rate": 0.0002864751879699248, + "loss": 0.5702, "step": 9500 }, { - "epoch": 0.3929427482415812, - "grad_norm": 2.823880434036255, - "learning_rate": 0.0002857218045112782, - "loss": 0.5454, + "epoch": 0.3826725853359865, + "grad_norm": 1.9661872386932373, + "learning_rate": 0.00028572330827067666, + "loss": 0.5638, "step": 10000 }, { - "epoch": 0.3929427482415812, - "eval_loss": 0.7537470459938049, - "eval_runtime": 144.0918, - "eval_samples_per_second": 39.253, - "eval_steps_per_second": 4.907, - "eval_wer": 0.5798815618430133, + "epoch": 0.3826725853359865, + "eval_loss": Infinity, + "eval_runtime": 176.3135, + "eval_samples_per_second": 39.827, + "eval_steps_per_second": 4.98, + "eval_wer": 0.5573434525068754, "step": 10000 }, { - "epoch": 0.4125898856536603, - "grad_norm": 2.8141980171203613, - "learning_rate": 0.00028496992481203006, - "loss": 0.5527, + "epoch": 0.40180621460278587, + "grad_norm": 3.7164504528045654, + "learning_rate": 0.00028497142857142853, + "loss": 0.5599, "step": 10500 }, { - "epoch": 0.4322370230657393, - "grad_norm": 4.174062728881836, - "learning_rate": 0.0002842180451127819, - "loss": 0.5322, + "epoch": 0.42093984386958516, + "grad_norm": 2.0946409702301025, + "learning_rate": 0.0002842195488721804, + "loss": 0.5585, "step": 11000 }, { - "epoch": 0.4322370230657393, - "eval_loss": 0.7155322432518005, - "eval_runtime": 144.2272, - "eval_samples_per_second": 39.216, - "eval_steps_per_second": 4.902, - "eval_wer": 0.5613615573494246, + "epoch": 0.42093984386958516, + "eval_loss": Infinity, + "eval_runtime": 177.3363, + "eval_samples_per_second": 39.597, + "eval_steps_per_second": 4.951, + "eval_wer": 0.5664269092447641, "step": 11000 }, { - "epoch": 0.4518841604778184, - "grad_norm": 2.2155849933624268, + "epoch": 0.4400734731363845, + "grad_norm": 3.3412957191467285, "learning_rate": 0.0002834676691729323, - "loss": 0.5373, + "loss": 0.5436, "step": 11500 }, { - "epoch": 0.47153129788989745, - "grad_norm": 2.1979432106018066, + "epoch": 0.45920710240318385, + "grad_norm": 3.093538999557495, "learning_rate": 0.0002827157894736842, - "loss": 0.5206, + "loss": 0.5569, "step": 12000 }, { - "epoch": 0.47153129788989745, - "eval_loss": 0.7130174040794373, - "eval_runtime": 144.2801, - "eval_samples_per_second": 39.202, - "eval_steps_per_second": 4.9, - "eval_wer": 0.5746336922854712, + "epoch": 0.45920710240318385, + "eval_loss": Infinity, + "eval_runtime": 177.5626, + "eval_samples_per_second": 39.547, + "eval_steps_per_second": 4.945, + "eval_wer": 0.5487624286016501, "step": 12000 }, { - "epoch": 0.4911784353019765, - "grad_norm": 3.059553861618042, - "learning_rate": 0.00028196390977443605, - "loss": 0.5303, + "epoch": 0.47834073166998314, + "grad_norm": 2.192824125289917, + "learning_rate": 0.0002819654135338346, + "loss": 0.5418, "step": 12500 }, { - "epoch": 0.5108255727140556, - "grad_norm": 2.1422946453094482, - "learning_rate": 0.0002812120300751879, - "loss": 0.5304, + "epoch": 0.4974743609367825, + "grad_norm": 2.0859923362731934, + "learning_rate": 0.00028121353383458645, + "loss": 0.5293, "step": 13000 }, { - "epoch": 0.5108255727140556, - "eval_loss": 0.6817054748535156, - "eval_runtime": 144.6048, - "eval_samples_per_second": 39.114, - "eval_steps_per_second": 4.889, - "eval_wer": 0.5390220025356679, + "epoch": 0.4974743609367825, + "eval_loss": Infinity, + "eval_runtime": 177.8833, + "eval_samples_per_second": 39.475, + "eval_steps_per_second": 4.936, + "eval_wer": 0.5434736619420352, "step": 13000 }, { - "epoch": 0.5304727101261346, - "grad_norm": 3.0140066146850586, - "learning_rate": 0.00028046015037593984, - "loss": 0.5156, + "epoch": 0.5166079902035818, + "grad_norm": 3.662593364715576, + "learning_rate": 0.0002804616541353383, + "loss": 0.5316, "step": 13500 }, { - "epoch": 0.5501198475382136, - "grad_norm": 2.334425449371338, - "learning_rate": 0.0002797082706766917, - "loss": 0.55, + "epoch": 0.5357416194703811, + "grad_norm": 3.3237922191619873, + "learning_rate": 0.0002797097744360902, + "loss": 0.5388, "step": 14000 }, { - "epoch": 0.5501198475382136, - "eval_loss": 0.6902604699134827, - "eval_runtime": 144.3591, - "eval_samples_per_second": 39.18, - "eval_steps_per_second": 4.898, - "eval_wer": 0.534014860939481, + "epoch": 0.5357416194703811, + "eval_loss": Infinity, + "eval_runtime": 178.3166, + "eval_samples_per_second": 39.379, + "eval_steps_per_second": 4.924, + "eval_wer": 0.5418209223609054, "step": 14000 }, { - "epoch": 0.5697669849502928, - "grad_norm": 1.8595026731491089, - "learning_rate": 0.0002789563909774436, - "loss": 0.5296, + "epoch": 0.5548752487371804, + "grad_norm": 1.9418795108795166, + "learning_rate": 0.0002789578947368421, + "loss": 0.5336, "step": 14500 }, { - "epoch": 0.5894141223623718, - "grad_norm": 1.83585524559021, - "learning_rate": 0.00027820451127819545, - "loss": 0.5115, + "epoch": 0.5740088780039798, + "grad_norm": 2.3504509925842285, + "learning_rate": 0.00027820601503759397, + "loss": 0.5163, "step": 15000 }, { - "epoch": 0.5894141223623718, - "eval_loss": 0.6973890662193298, - "eval_runtime": 143.7301, - "eval_samples_per_second": 39.352, - "eval_steps_per_second": 4.919, - "eval_wer": 0.5437081735167145, + "epoch": 0.5740088780039798, + "eval_loss": Infinity, + "eval_runtime": 178.0416, + "eval_samples_per_second": 39.44, + "eval_steps_per_second": 4.931, + "eval_wer": 0.5408292786122276, "step": 15000 }, { - "epoch": 0.6090612597744509, - "grad_norm": 3.633268117904663, + "epoch": 0.5931425072707791, + "grad_norm": 1.8465300798416138, "learning_rate": 0.00027745413533834584, - "loss": 0.5083, + "loss": 0.5172, "step": 15500 }, { - "epoch": 0.6287083971865299, - "grad_norm": 1.6785953044891357, + "epoch": 0.6122761365375784, + "grad_norm": 2.249298572540283, "learning_rate": 0.0002767022556390977, - "loss": 0.5097, + "loss": 0.5226, "step": 16000 }, { - "epoch": 0.6287083971865299, - "eval_loss": 0.6785907745361328, - "eval_runtime": 144.7589, - "eval_samples_per_second": 39.072, - "eval_steps_per_second": 4.884, - "eval_wer": 0.5198119112195279, + "epoch": 0.6122761365375784, + "eval_loss": Infinity, + "eval_runtime": 178.435, + "eval_samples_per_second": 39.353, + "eval_steps_per_second": 4.921, + "eval_wer": 0.5311376137084832, "step": 16000 }, { - "epoch": 0.648355534598609, - "grad_norm": 3.022169589996338, + "epoch": 0.6314097658043778, + "grad_norm": 1.6280571222305298, "learning_rate": 0.00027595037593984963, - "loss": 0.5025, + "loss": 0.5161, "step": 16500 }, { - "epoch": 0.6680026720106881, - "grad_norm": 3.9664957523345947, - "learning_rate": 0.00027519999999999997, - "loss": 0.504, + "epoch": 0.6505433950711771, + "grad_norm": 4.344738006591797, + "learning_rate": 0.0002751984962406015, + "loss": 0.4952, "step": 17000 }, { - "epoch": 0.6680026720106881, - "eval_loss": 0.6679931282997131, - "eval_runtime": 144.4325, - "eval_samples_per_second": 39.16, - "eval_steps_per_second": 4.895, - "eval_wer": 0.5067163101218084, + "epoch": 0.6505433950711771, + "eval_loss": Infinity, + "eval_runtime": 178.8108, + "eval_samples_per_second": 39.271, + "eval_steps_per_second": 4.91, + "eval_wer": 0.5288502221281997, "step": 17000 }, { - "epoch": 0.6876498094227671, - "grad_norm": 1.8392106294631958, + "epoch": 0.6696770243379764, + "grad_norm": 2.9461894035339355, "learning_rate": 0.00027444812030075184, - "loss": 0.4996, + "loss": 0.5177, "step": 17500 }, { - "epoch": 0.7072969468348461, - "grad_norm": 22.285568237304688, - "learning_rate": 0.0002736977443609022, - "loss": 0.4951, + "epoch": 0.6888106536047758, + "grad_norm": 3.190275192260742, + "learning_rate": 0.00027369624060150376, + "loss": 0.524, "step": 18000 }, { - "epoch": 0.7072969468348461, - "eval_loss": 0.6599805951118469, - "eval_runtime": 144.0873, - "eval_samples_per_second": 39.254, - "eval_steps_per_second": 4.907, - "eval_wer": 0.5222191908330792, + "epoch": 0.6888106536047758, + "eval_loss": Infinity, + "eval_runtime": 178.3332, + "eval_samples_per_second": 39.376, + "eval_steps_per_second": 4.923, + "eval_wer": 0.5214591707213878, "step": 18000 }, { - "epoch": 0.7269440842469252, - "grad_norm": 1.8236407041549683, - "learning_rate": 0.0002729458646616541, - "loss": 0.5008, + "epoch": 0.7079442828715751, + "grad_norm": 3.2808961868286133, + "learning_rate": 0.0002729443609022556, + "loss": 0.4997, "step": 18500 }, { - "epoch": 0.7465912216590043, - "grad_norm": 3.337568998336792, - "learning_rate": 0.000272193984962406, - "loss": 0.4982, + "epoch": 0.7270779121383744, + "grad_norm": 2.6754653453826904, + "learning_rate": 0.0002721924812030075, + "loss": 0.5076, "step": 19000 }, { - "epoch": 0.7465912216590043, - "eval_loss": 0.6371914744377136, - "eval_runtime": 144.7615, - "eval_samples_per_second": 39.071, - "eval_steps_per_second": 4.884, - "eval_wer": 0.5010993243568551, + "epoch": 0.7270779121383744, + "eval_loss": Infinity, + "eval_runtime": 178.4608, + "eval_samples_per_second": 39.348, + "eval_steps_per_second": 4.92, + "eval_wer": 0.5186957901417389, "step": 19000 }, { - "epoch": 0.7662383590710834, - "grad_norm": 5.579843997955322, - "learning_rate": 0.0002714421052631579, - "loss": 0.4966, + "epoch": 0.7462115414051738, + "grad_norm": 3.872616767883301, + "learning_rate": 0.00027144060150375936, + "loss": 0.4961, "step": 19500 }, { - "epoch": 0.7858854964831624, - "grad_norm": 1.800836443901062, - "learning_rate": 0.00027069022556390975, - "loss": 0.493, + "epoch": 0.765345170671973, + "grad_norm": 4.666406631469727, + "learning_rate": 0.00027068872180451123, + "loss": 0.492, "step": 20000 }, { - "epoch": 0.7858854964831624, - "eval_loss": 0.6563202738761902, - "eval_runtime": 145.0143, - "eval_samples_per_second": 39.003, - "eval_steps_per_second": 4.875, - "eval_wer": 0.523583315947425, + "epoch": 0.765345170671973, + "eval_loss": Infinity, + "eval_runtime": 177.9854, + "eval_samples_per_second": 39.453, + "eval_steps_per_second": 4.933, + "eval_wer": 0.5094933361540089, "step": 20000 }, { - "epoch": 0.8055326338952414, - "grad_norm": 2.785470724105835, - "learning_rate": 0.0002699383458646616, - "loss": 0.5046, + "epoch": 0.7844787999387723, + "grad_norm": NaN, + "learning_rate": 0.00026993984962406014, + "loss": 0.4846, "step": 20500 }, { - "epoch": 0.8251797713073206, - "grad_norm": 2.336695909500122, - "learning_rate": 0.00026918646616541354, - "loss": 0.4928, + "epoch": 0.8036124292055717, + "grad_norm": 3.923118829727173, + "learning_rate": 0.000269187969924812, + "loss": 0.4934, "step": 21000 }, { - "epoch": 0.8251797713073206, - "eval_loss": 0.6477507948875427, - "eval_runtime": 146.5574, - "eval_samples_per_second": 38.592, - "eval_steps_per_second": 4.824, - "eval_wer": 0.5030090995169392, + "epoch": 0.8036124292055717, + "eval_loss": Infinity, + "eval_runtime": 178.6067, + "eval_samples_per_second": 39.315, + "eval_steps_per_second": 4.916, + "eval_wer": 0.5061481912418024, "step": 21000 }, { - "epoch": 0.8448269087193996, - "grad_norm": 1.3052055835723877, + "epoch": 0.822746058472371, + "grad_norm": 3.902585744857788, "learning_rate": 0.0002684360902255639, - "loss": 0.4878, + "loss": 0.5006, "step": 21500 }, { - "epoch": 0.8644740461314786, - "grad_norm": 2.2889890670776367, - "learning_rate": 0.00026768571428571427, - "loss": 0.4964, + "epoch": 0.8418796877391703, + "grad_norm": 6.406003952026367, + "learning_rate": 0.0002676842105263158, + "loss": 0.4985, "step": 22000 }, { - "epoch": 0.8644740461314786, - "eval_loss": 0.6431675553321838, - "eval_runtime": 146.4682, - "eval_samples_per_second": 38.616, - "eval_steps_per_second": 4.827, - "eval_wer": 0.5103272295421354, + "epoch": 0.8418796877391703, + "eval_loss": Infinity, + "eval_runtime": 177.9171, + "eval_samples_per_second": 39.468, + "eval_steps_per_second": 4.935, + "eval_wer": 0.5129310344827587, "step": 22000 }, { - "epoch": 0.8841211835435577, - "grad_norm": 3.50435733795166, - "learning_rate": 0.00026693383458646614, - "loss": 0.4796, + "epoch": 0.8610133170059697, + "grad_norm": 3.3762645721435547, + "learning_rate": 0.0002669323308270676, + "loss": 0.5001, "step": 22500 }, { - "epoch": 0.9037683209556368, - "grad_norm": 1.364182472229004, - "learning_rate": 0.000266181954887218, - "loss": 0.4818, + "epoch": 0.880146946272769, + "grad_norm": 5.018126010894775, + "learning_rate": 0.00026618045112781954, + "loss": 0.4887, "step": 23000 }, { - "epoch": 0.9037683209556368, - "eval_loss": 0.6235994100570679, - "eval_runtime": 146.6113, - "eval_samples_per_second": 38.578, - "eval_steps_per_second": 4.822, - "eval_wer": 0.48960857633483656, + "epoch": 0.880146946272769, + "eval_loss": Infinity, + "eval_runtime": 176.8724, + "eval_samples_per_second": 39.701, + "eval_steps_per_second": 4.964, + "eval_wer": 0.49284694309287075, "step": 23000 }, { - "epoch": 0.9234154583677158, - "grad_norm": 2.1214494705200195, - "learning_rate": 0.00026543007518796993, - "loss": 0.4688, + "epoch": 0.8992805755395683, + "grad_norm": 2.6644840240478516, + "learning_rate": 0.0002654285714285714, + "loss": 0.4879, "step": 23500 }, { - "epoch": 0.9430625957797949, - "grad_norm": 2.6261446475982666, - "learning_rate": 0.00026467819548872174, - "loss": 0.4752, + "epoch": 0.9184142048063677, + "grad_norm": 1.760593056678772, + "learning_rate": 0.0002646766917293233, + "loss": 0.484, "step": 24000 }, { - "epoch": 0.9430625957797949, - "eval_loss": 0.6326233744621277, - "eval_runtime": 145.9082, - "eval_samples_per_second": 38.764, - "eval_steps_per_second": 4.846, - "eval_wer": 0.500810450803229, + "epoch": 0.9184142048063677, + "eval_loss": Infinity, + "eval_runtime": 178.117, + "eval_samples_per_second": 39.424, + "eval_steps_per_second": 4.929, + "eval_wer": 0.49492278400676964, "step": 24000 }, { - "epoch": 0.9627097331918739, - "grad_norm": 4.142037391662598, - "learning_rate": 0.00026392631578947367, - "loss": 0.4698, + "epoch": 0.937547834073167, + "grad_norm": 4.163125514984131, + "learning_rate": 0.00026392481203007514, + "loss": 0.4825, "step": 24500 }, { - "epoch": 0.982356870603953, - "grad_norm": 2.097465991973877, - "learning_rate": 0.00026317443609022553, - "loss": 0.4736, + "epoch": 0.9566814633399663, + "grad_norm": 5.920992374420166, + "learning_rate": 0.00026317293233082706, + "loss": 0.4741, "step": 25000 }, { - "epoch": 0.982356870603953, - "eval_loss": 0.6309667229652405, - "eval_runtime": 145.598, - "eval_samples_per_second": 38.847, - "eval_steps_per_second": 4.856, - "eval_wer": 0.5081446293591821, + "epoch": 0.9566814633399663, + "eval_loss": Infinity, + "eval_runtime": 177.7742, + "eval_samples_per_second": 39.5, + "eval_steps_per_second": 4.939, + "eval_wer": 0.48648720118468375, "step": 25000 }, { - "epoch": 1.002004008016032, - "grad_norm": 1.872559666633606, - "learning_rate": 0.0002624225563909774, - "loss": 0.461, + "epoch": 0.9758150926067657, + "grad_norm": 1.6138001680374146, + "learning_rate": 0.00026242105263157893, + "loss": 0.4792, "step": 25500 }, { - "epoch": 1.0216511454281112, - "grad_norm": 0.9968547821044922, - "learning_rate": 0.00026167067669172927, - "loss": 0.4241, + "epoch": 0.994948721873565, + "grad_norm": 4.549112796783447, + "learning_rate": 0.0002616691729323308, + "loss": 0.4816, "step": 26000 }, { - "epoch": 1.0216511454281112, - "eval_loss": 0.6126999258995056, - "eval_runtime": 145.6037, - "eval_samples_per_second": 38.845, - "eval_steps_per_second": 4.856, - "eval_wer": 0.47134534833336006, + "epoch": 0.994948721873565, + "eval_loss": Infinity, + "eval_runtime": 178.2909, + "eval_samples_per_second": 39.385, + "eval_steps_per_second": 4.925, + "eval_wer": 0.5054606515760525, "step": 26000 }, { - "epoch": 1.0412982828401902, - "grad_norm": 0.840844988822937, - "learning_rate": 0.00026092030075187966, - "loss": 0.4146, + "epoch": 1.0140823511403643, + "grad_norm": 1.6239593029022217, + "learning_rate": 0.00026091729323308267, + "loss": 0.4551, "step": 26500 }, { - "epoch": 1.0609454202522692, - "grad_norm": 0.8603857159614563, - "learning_rate": 0.00026016842105263153, - "loss": 0.4196, + "epoch": 1.0332159804071637, + "grad_norm": 0.7770557999610901, + "learning_rate": 0.0002601654135338346, + "loss": 0.44, "step": 27000 }, { - "epoch": 1.0609454202522692, - "eval_loss": 0.6066301465034485, - "eval_runtime": 144.6102, - "eval_samples_per_second": 39.112, - "eval_steps_per_second": 4.889, - "eval_wer": 0.46828007895877133, + "epoch": 1.0332159804071637, + "eval_loss": Infinity, + "eval_runtime": 178.2997, + "eval_samples_per_second": 39.383, + "eval_steps_per_second": 4.924, + "eval_wer": 0.47974402369367464, "step": 27000 }, { - "epoch": 1.0805925576643483, - "grad_norm": 0.8338613510131836, - "learning_rate": 0.00025941654135338345, - "loss": 0.4213, + "epoch": 1.0523496096739628, + "grad_norm": 2.1102957725524902, + "learning_rate": 0.00025941503759398493, + "loss": 0.4367, "step": 27500 }, { - "epoch": 1.1002396950764273, - "grad_norm": 1.0736676454544067, - "learning_rate": 0.0002586646616541353, - "loss": 0.4177, + "epoch": 1.0714832389407623, + "grad_norm": 3.1549980640411377, + "learning_rate": 0.0002586631578947368, + "loss": 0.4359, "step": 28000 }, { - "epoch": 1.1002396950764273, - "eval_loss": 0.5958611965179443, - "eval_runtime": 145.1774, - "eval_samples_per_second": 38.959, - "eval_steps_per_second": 4.87, - "eval_wer": 0.47737959589799556, + "epoch": 1.0714832389407623, + "eval_loss": Infinity, + "eval_runtime": 177.6407, + "eval_samples_per_second": 39.529, + "eval_steps_per_second": 4.943, + "eval_wer": 0.4912074254283901, "step": 28000 }, { - "epoch": 1.1198868324885065, - "grad_norm": 0.7483401894569397, - "learning_rate": 0.0002579127819548872, - "loss": 0.4204, + "epoch": 1.0906168682075617, + "grad_norm": 0.7373610138893127, + "learning_rate": 0.0002579112781954887, + "loss": 0.429, "step": 28500 }, { - "epoch": 1.1395339699005855, - "grad_norm": 1.1266822814941406, - "learning_rate": 0.0002571624060150376, - "loss": 0.4204, + "epoch": 1.1097504974743608, + "grad_norm": 2.5423829555511475, + "learning_rate": 0.0002571593984962406, + "loss": 0.411, "step": 29000 }, { - "epoch": 1.1395339699005855, - "eval_loss": 0.607071042060852, - "eval_runtime": 145.6021, - "eval_samples_per_second": 38.846, - "eval_steps_per_second": 4.856, - "eval_wer": 0.4893517998427244, + "epoch": 1.1097504974743608, + "eval_loss": Infinity, + "eval_runtime": 177.8711, + "eval_samples_per_second": 39.478, + "eval_steps_per_second": 4.936, + "eval_wer": 0.4773640786968479, "step": 29000 }, { - "epoch": 1.1591811073126645, - "grad_norm": 2.852926731109619, - "learning_rate": 0.00025641052631578945, - "loss": 0.4255, + "epoch": 1.1288841267411602, + "grad_norm": 1.5159779787063599, + "learning_rate": 0.000256409022556391, + "loss": 0.4162, "step": 29500 }, { - "epoch": 1.1788282447247436, - "grad_norm": 2.134554862976074, - "learning_rate": 0.00025566015037593984, - "loss": 0.4238, + "epoch": 1.1480177560079596, + "grad_norm": 0.6960669159889221, + "learning_rate": 0.00025565714285714284, + "loss": 0.4298, "step": 30000 }, { - "epoch": 1.1788282447247436, - "eval_loss": 0.600638747215271, - "eval_runtime": 144.9068, - "eval_samples_per_second": 39.032, - "eval_steps_per_second": 4.879, - "eval_wer": 0.476432732583332, + "epoch": 1.1480177560079596, + "eval_loss": Infinity, + "eval_runtime": 176.6665, + "eval_samples_per_second": 39.747, + "eval_steps_per_second": 4.97, + "eval_wer": 0.4772847471969537, "step": 30000 }, { - "epoch": 1.1984753821368226, - "grad_norm": 1.6866382360458374, - "learning_rate": 0.0002549082706766917, - "loss": 0.4156, + "epoch": 1.1671513852747588, + "grad_norm": 0.788869321346283, + "learning_rate": 0.0002549052631578947, + "loss": 0.4405, "step": 30500 }, { - "epoch": 1.2181225195489018, - "grad_norm": 0.7769395709037781, - "learning_rate": 0.0002541563909774436, - "loss": 0.4253, + "epoch": 1.1862850145415582, + "grad_norm": 0.8728181719779968, + "learning_rate": 0.0002541533834586466, + "loss": 0.4305, "step": 31000 }, { - "epoch": 1.2181225195489018, - "eval_loss": 0.5803025960922241, - "eval_runtime": 144.9773, - "eval_samples_per_second": 39.013, - "eval_steps_per_second": 4.877, - "eval_wer": 0.4623421225786779, + "epoch": 1.1862850145415582, + "eval_loss": Infinity, + "eval_runtime": 177.2604, + "eval_samples_per_second": 39.614, + "eval_steps_per_second": 4.953, + "eval_wer": 0.489753014596996, "step": 31000 }, { - "epoch": 1.2377696569609808, - "grad_norm": 1.305306077003479, - "learning_rate": 0.00025340451127819544, - "loss": 0.4257, + "epoch": 1.2054186438083576, + "grad_norm": 1.0415312051773071, + "learning_rate": 0.00025340300751879697, + "loss": 0.423, "step": 31500 }, { - "epoch": 1.2574167943730599, - "grad_norm": 0.9590096473693848, - "learning_rate": 0.00025265263157894736, - "loss": 0.4156, + "epoch": 1.2245522730751568, + "grad_norm": 1.1658622026443481, + "learning_rate": 0.00025265112781954884, + "loss": 0.4126, "step": 32000 }, { - "epoch": 1.2574167943730599, - "eval_loss": 0.5940248966217041, - "eval_runtime": 145.3704, - "eval_samples_per_second": 38.908, - "eval_steps_per_second": 4.863, - "eval_wer": 0.45728683539022, + "epoch": 1.2245522730751568, + "eval_loss": Infinity, + "eval_runtime": 177.0921, + "eval_samples_per_second": 39.652, + "eval_steps_per_second": 4.958, + "eval_wer": 0.4738867146181511, "step": 32000 }, { - "epoch": 1.2770639317851389, - "grad_norm": 1.598183274269104, - "learning_rate": 0.00025190075187969923, - "loss": 0.4103, + "epoch": 1.2436859023419562, + "grad_norm": 1.2818922996520996, + "learning_rate": 0.00025189924812030076, + "loss": 0.428, "step": 32500 }, { - "epoch": 1.296711069197218, - "grad_norm": 1.9396251440048218, - "learning_rate": 0.0002511488721804511, - "loss": 0.4058, + "epoch": 1.2628195316087556, + "grad_norm": 1.18551504611969, + "learning_rate": 0.00025114736842105263, + "loss": 0.4234, "step": 33000 }, { - "epoch": 1.296711069197218, - "eval_loss": 0.5802159905433655, - "eval_runtime": 146.1677, - "eval_samples_per_second": 38.695, - "eval_steps_per_second": 4.837, - "eval_wer": 0.4614594533870424, + "epoch": 1.2628195316087556, + "eval_loss": Infinity, + "eval_runtime": 177.167, + "eval_samples_per_second": 39.635, + "eval_steps_per_second": 4.956, + "eval_wer": 0.48441136027078485, "step": 33000 }, { - "epoch": 1.316358206609297, - "grad_norm": 1.7467131614685059, - "learning_rate": 0.00025039699248120297, - "loss": 0.417, + "epoch": 1.2819531608755548, + "grad_norm": 1.017686367034912, + "learning_rate": 0.0002503954887218045, + "loss": 0.4334, "step": 33500 }, { - "epoch": 1.3360053440213762, - "grad_norm": 0.9871892333030701, - "learning_rate": 0.0002496451127819549, - "loss": 0.404, + "epoch": 1.3010867901423542, + "grad_norm": 0.9811512231826782, + "learning_rate": 0.00024964360902255637, + "loss": 0.4252, "step": 34000 }, { - "epoch": 1.3360053440213762, - "eval_loss": 0.5882492065429688, - "eval_runtime": 145.2339, - "eval_samples_per_second": 38.944, - "eval_steps_per_second": 4.868, - "eval_wer": 0.46015952239572466, + "epoch": 1.3010867901423542, + "eval_loss": Infinity, + "eval_runtime": 183.3489, + "eval_samples_per_second": 38.299, + "eval_steps_per_second": 4.789, + "eval_wer": 0.47629310344827586, "step": 34000 }, { - "epoch": 1.3556524814334552, - "grad_norm": 1.0927810668945312, - "learning_rate": 0.00024889323308270676, - "loss": 0.3955, + "epoch": 1.3202204194091536, + "grad_norm": 1.2876335382461548, + "learning_rate": 0.0002488917293233083, + "loss": 0.4094, "step": 34500 }, { - "epoch": 1.3752996188455342, - "grad_norm": 1.8038376569747925, + "epoch": 1.3393540486759528, + "grad_norm": 0.9524905681610107, "learning_rate": 0.0002481413533834586, - "loss": 0.3995, + "loss": 0.4106, "step": 35000 }, { - "epoch": 1.3752996188455342, - "eval_loss": 0.5840802788734436, - "eval_runtime": 144.2215, - "eval_samples_per_second": 39.217, - "eval_steps_per_second": 4.902, - "eval_wer": 0.46152364751007047, + "epoch": 1.3393540486759528, + "eval_loss": Infinity, + "eval_runtime": 178.3382, + "eval_samples_per_second": 39.375, + "eval_steps_per_second": 4.923, + "eval_wer": 0.47093822720541567, "step": 35000 }, { - "epoch": 1.3949467562576132, - "grad_norm": 1.3720190525054932, + "epoch": 1.3584876779427522, + "grad_norm": 1.4819426536560059, "learning_rate": 0.0002473894736842105, - "loss": 0.4077, + "loss": 0.4082, "step": 35500 }, { - "epoch": 1.4145938936696925, - "grad_norm": 1.4488073587417603, + "epoch": 1.3776213072095516, + "grad_norm": 2.091412305831909, "learning_rate": 0.0002466375939849624, - "loss": 0.4049, + "loss": 0.4254, "step": 36000 }, { - "epoch": 1.4145938936696925, - "eval_loss": 0.5853234529495239, - "eval_runtime": 144.437, - "eval_samples_per_second": 39.159, - "eval_steps_per_second": 4.895, - "eval_wer": 0.4635618109162106, + "epoch": 1.3776213072095516, + "eval_loss": Infinity, + "eval_runtime": 174.5569, + "eval_samples_per_second": 40.228, + "eval_steps_per_second": 5.03, + "eval_wer": 0.47370160778506454, "step": 36000 }, { - "epoch": 1.4342410310817715, - "grad_norm": 2.5115835666656494, + "epoch": 1.3967549364763507, + "grad_norm": 0.9154180288314819, "learning_rate": 0.00024588571428571423, - "loss": 0.4202, + "loss": 0.4101, "step": 36500 }, { - "epoch": 1.4538881684938505, - "grad_norm": 0.7861095070838928, - "learning_rate": 0.0002451353383458647, - "loss": 0.4018, + "epoch": 1.4158885657431501, + "grad_norm": 0.6072717308998108, + "learning_rate": 0.00024513383458646615, + "loss": 0.4245, "step": 37000 }, { - "epoch": 1.4538881684938505, - "eval_loss": 0.5737255215644836, - "eval_runtime": 143.969, - "eval_samples_per_second": 39.286, - "eval_steps_per_second": 4.911, - "eval_wer": 0.4532747027009677, + "epoch": 1.4158885657431501, + "eval_loss": Infinity, + "eval_runtime": 176.0739, + "eval_samples_per_second": 39.881, + "eval_steps_per_second": 4.987, + "eval_wer": 0.4533530780621959, "step": 37000 }, { - "epoch": 1.4735353059059295, - "grad_norm": 1.29477858543396, - "learning_rate": 0.0002443834586466165, - "loss": 0.4028, + "epoch": 1.4350221950099495, + "grad_norm": 0.8788403868675232, + "learning_rate": 0.000244381954887218, + "loss": 0.4139, "step": 37500 }, { - "epoch": 1.4931824433180085, - "grad_norm": 1.5367540121078491, - "learning_rate": 0.0002436315789473684, - "loss": 0.3906, + "epoch": 1.4541558242767487, + "grad_norm": 1.6312676668167114, + "learning_rate": 0.00024363007518796991, + "loss": 0.4154, "step": 38000 }, { - "epoch": 1.4931824433180085, - "eval_loss": 0.5848459005355835, - "eval_runtime": 144.7268, - "eval_samples_per_second": 39.081, - "eval_steps_per_second": 4.885, - "eval_wer": 0.4637222962237807, + "epoch": 1.4541558242767487, + "eval_loss": Infinity, + "eval_runtime": 175.6948, + "eval_samples_per_second": 39.967, + "eval_steps_per_second": 4.997, + "eval_wer": 0.4566056695578591, "step": 38000 }, { - "epoch": 1.5128295807300876, - "grad_norm": 1.7903566360473633, - "learning_rate": 0.00024288120300751878, - "loss": 0.4147, + "epoch": 1.4732894535435481, + "grad_norm": 1.2530635595321655, + "learning_rate": 0.00024287819548872178, + "loss": 0.403, "step": 38500 }, { - "epoch": 1.5324767181421666, - "grad_norm": 0.7342734336853027, - "learning_rate": 0.00024212932330827064, - "loss": 0.3932, + "epoch": 1.4924230828103475, + "grad_norm": 0.8397653102874756, + "learning_rate": 0.00024212631578947368, + "loss": 0.4071, "step": 39000 }, { - "epoch": 1.5324767181421666, - "eval_loss": 0.551567792892456, - "eval_runtime": 144.7104, - "eval_samples_per_second": 39.085, - "eval_steps_per_second": 4.886, - "eval_wer": 0.44000256776492114, + "epoch": 1.4924230828103475, + "eval_loss": Infinity, + "eval_runtime": 175.5585, + "eval_samples_per_second": 39.998, + "eval_steps_per_second": 5.001, + "eval_wer": 0.4634546223820605, "step": 39000 }, { - "epoch": 1.5521238555542456, - "grad_norm": 1.3719693422317505, - "learning_rate": 0.00024137894736842104, - "loss": 0.3984, + "epoch": 1.5115567120771467, + "grad_norm": 1.685242772102356, + "learning_rate": 0.00024137443609022552, + "loss": 0.3912, "step": 39500 }, { - "epoch": 1.5717709929663248, - "grad_norm": 3.398484706878662, - "learning_rate": 0.0002406270676691729, - "loss": 0.4026, + "epoch": 1.530690341343946, + "grad_norm": 1.5319820642471313, + "learning_rate": 0.00024062255639097744, + "loss": 0.4065, "step": 40000 }, { - "epoch": 1.5717709929663248, - "eval_loss": 0.5641522407531738, - "eval_runtime": 145.5525, - "eval_samples_per_second": 38.859, - "eval_steps_per_second": 4.857, - "eval_wer": 0.44844409494310794, + "epoch": 1.530690341343946, + "eval_loss": Infinity, + "eval_runtime": 176.3036, + "eval_samples_per_second": 39.829, + "eval_steps_per_second": 4.98, + "eval_wer": 0.4668394330442141, "step": 40000 }, { - "epoch": 1.5914181303784039, - "grad_norm": 1.1821295022964478, - "learning_rate": 0.0002398751879699248, - "loss": 0.4086, + "epoch": 1.5498239706107455, + "grad_norm": 7.0581955909729, + "learning_rate": 0.00023987067669172928, + "loss": 0.4006, "step": 40500 }, { - "epoch": 1.611065267790483, - "grad_norm": 1.3344157934188843, - "learning_rate": 0.00023912330827067667, - "loss": 0.396, + "epoch": 1.5689575998775447, + "grad_norm": 0.6705722808837891, + "learning_rate": 0.0002391203007518797, + "loss": 0.4086, "step": 41000 }, { - "epoch": 1.611065267790483, - "eval_loss": 0.5584043264389038, - "eval_runtime": 145.5026, - "eval_samples_per_second": 38.872, - "eval_steps_per_second": 4.859, - "eval_wer": 0.4512044422333135, + "epoch": 1.5689575998775447, + "eval_loss": Infinity, + "eval_runtime": 176.1394, + "eval_samples_per_second": 39.866, + "eval_steps_per_second": 4.985, + "eval_wer": 0.46069124180241167, "step": 41000 }, { - "epoch": 1.630712405202562, - "grad_norm": 0.6132605671882629, - "learning_rate": 0.00023837142857142856, - "loss": 0.3863, + "epoch": 1.588091229144344, + "grad_norm": 0.794866144657135, + "learning_rate": 0.00023836842105263154, + "loss": 0.4026, "step": 41500 }, { - "epoch": 1.6503595426146411, - "grad_norm": 2.896801710128784, - "learning_rate": 0.00023761954887218043, - "loss": 0.3976, + "epoch": 1.6072248584111435, + "grad_norm": 0.7202442288398743, + "learning_rate": 0.00023761654135338344, + "loss": 0.4037, "step": 42000 }, { - "epoch": 1.6503595426146411, - "eval_loss": 0.5537524819374084, - "eval_runtime": 145.3562, - "eval_samples_per_second": 38.911, - "eval_steps_per_second": 4.864, - "eval_wer": 0.4436455842467622, + "epoch": 1.6072248584111435, + "eval_loss": Infinity, + "eval_runtime": 175.2975, + "eval_samples_per_second": 40.058, + "eval_steps_per_second": 5.009, + "eval_wer": 0.46155066638459913, "step": 42000 }, { - "epoch": 1.6700066800267201, - "grad_norm": 0.4839102029800415, - "learning_rate": 0.00023686766917293232, - "loss": 0.3977, + "epoch": 1.6263584876779427, + "grad_norm": 1.2002875804901123, + "learning_rate": 0.00023686616541353383, + "loss": 0.4013, "step": 42500 }, { - "epoch": 1.6896538174387992, - "grad_norm": 0.7648475170135498, - "learning_rate": 0.0002361157894736842, - "loss": 0.3936, + "epoch": 1.645492116944742, + "grad_norm": 1.2249990701675415, + "learning_rate": 0.00023611428571428567, + "loss": 0.4071, "step": 43000 }, { - "epoch": 1.6896538174387992, - "eval_loss": 0.551811158657074, - "eval_runtime": 144.7074, - "eval_samples_per_second": 39.086, - "eval_steps_per_second": 4.886, - "eval_wer": 0.4412222561024538, + "epoch": 1.645492116944742, + "eval_loss": Infinity, + "eval_runtime": 177.5513, + "eval_samples_per_second": 39.549, + "eval_steps_per_second": 4.945, + "eval_wer": 0.46074412946900783, "step": 43000 }, { - "epoch": 1.7093009548508782, - "grad_norm": 1.953736662864685, - "learning_rate": 0.00023536541353383458, - "loss": 0.3865, + "epoch": 1.6646257462115415, + "grad_norm": 0.7734994292259216, + "learning_rate": 0.00023536240601503756, + "loss": 0.3834, "step": 43500 }, { - "epoch": 1.7289480922629572, - "grad_norm": 1.4531214237213135, - "learning_rate": 0.00023461353383458645, - "loss": 0.3879, + "epoch": 1.6837593754783406, + "grad_norm": 1.9896817207336426, + "learning_rate": 0.00023461052631578943, + "loss": 0.394, "step": 44000 }, { - "epoch": 1.7289480922629572, - "eval_loss": 0.5469211935997009, - "eval_runtime": 145.1619, - "eval_samples_per_second": 38.963, - "eval_steps_per_second": 4.87, - "eval_wer": 0.42974755661119224, + "epoch": 1.6837593754783406, + "eval_loss": Infinity, + "eval_runtime": 176.2037, + "eval_samples_per_second": 39.852, + "eval_steps_per_second": 4.983, + "eval_wer": 0.4430532049925957, "step": 44000 }, { - "epoch": 1.7485952296750362, - "grad_norm": 1.0637991428375244, - "learning_rate": 0.00023386165413533835, - "loss": 0.3942, + "epoch": 1.70289300474514, + "grad_norm": 1.6369848251342773, + "learning_rate": 0.00023385864661654133, + "loss": 0.4006, "step": 44500 }, { - "epoch": 1.7682423670871152, - "grad_norm": 1.0606558322906494, - "learning_rate": 0.00023310977443609021, - "loss": 0.3939, + "epoch": 1.7220266340119394, + "grad_norm": 0.7955853343009949, + "learning_rate": 0.0002331067669172932, + "loss": 0.4103, "step": 45000 }, { - "epoch": 1.7682423670871152, - "eval_loss": 0.5502393245697021, - "eval_runtime": 144.8654, - "eval_samples_per_second": 39.043, - "eval_steps_per_second": 4.88, - "eval_wer": 0.44024329572627624, + "epoch": 1.7220266340119394, + "eval_loss": Infinity, + "eval_runtime": 178.3273, + "eval_samples_per_second": 39.377, + "eval_steps_per_second": 4.924, + "eval_wer": 0.43978739158028346, "step": 45000 }, { - "epoch": 1.7878895044991945, - "grad_norm": 0.7499143481254578, - "learning_rate": 0.0002323578947368421, - "loss": 0.3926, + "epoch": 1.7411602632787386, + "grad_norm": 1.0250253677368164, + "learning_rate": 0.00023235639097744359, + "loss": 0.3876, "step": 45500 }, { - "epoch": 1.8075366419112735, - "grad_norm": 1.3015657663345337, - "learning_rate": 0.00023160601503759395, - "loss": 0.386, + "epoch": 1.760293892545538, + "grad_norm": 1.1575045585632324, + "learning_rate": 0.00023160451127819545, + "loss": 0.3909, "step": 46000 }, { - "epoch": 1.8075366419112735, - "eval_loss": 0.5626779198646545, - "eval_runtime": 145.6466, - "eval_samples_per_second": 38.834, - "eval_steps_per_second": 4.854, - "eval_wer": 0.4409012854873136, + "epoch": 1.760293892545538, + "eval_loss": Infinity, + "eval_runtime": 177.8973, + "eval_samples_per_second": 39.472, + "eval_steps_per_second": 4.935, + "eval_wer": 0.4454728157393696, "step": 46000 }, { - "epoch": 1.8271837793233527, - "grad_norm": 1.1235235929489136, - "learning_rate": 0.00023085413533834585, - "loss": 0.3833, + "epoch": 1.7794275218123374, + "grad_norm": 0.6429355144500732, + "learning_rate": 0.00023085263157894735, + "loss": 0.3875, "step": 46500 }, { - "epoch": 1.8468309167354318, - "grad_norm": 0.8004291653633118, - "learning_rate": 0.0002301022556390977, - "loss": 0.3823, + "epoch": 1.7985611510791366, + "grad_norm": 0.8371389508247375, + "learning_rate": 0.00023010075187969922, + "loss": 0.3909, "step": 47000 }, { - "epoch": 1.8468309167354318, - "eval_loss": 0.5602549910545349, - "eval_runtime": 145.4825, - "eval_samples_per_second": 38.878, - "eval_steps_per_second": 4.86, - "eval_wer": 0.43724222047471556, + "epoch": 1.7985611510791366, + "eval_loss": Infinity, + "eval_runtime": 178.4648, + "eval_samples_per_second": 39.347, + "eval_steps_per_second": 4.92, + "eval_wer": 0.442074783160567, "step": 47000 }, { - "epoch": 1.8664780541475108, - "grad_norm": 2.6733715534210205, - "learning_rate": 0.0002293503759398496, - "loss": 0.3868, + "epoch": 1.817694780345936, + "grad_norm": 1.2913625240325928, + "learning_rate": 0.0002293488721804511, + "loss": 0.3979, "step": 47500 }, { - "epoch": 1.8861251915595898, - "grad_norm": 1.501878261566162, - "learning_rate": 0.00022859849624060148, - "loss": 0.3955, + "epoch": 1.8368284096127354, + "grad_norm": 1.1963073015213013, + "learning_rate": 0.00022859699248120298, + "loss": 0.3982, "step": 48000 }, { - "epoch": 1.8861251915595898, - "eval_loss": 0.534982442855835, - "eval_runtime": 145.013, - "eval_samples_per_second": 39.003, - "eval_steps_per_second": 4.875, - "eval_wer": 0.4308228081719119, + "epoch": 1.8368284096127354, + "eval_loss": Infinity, + "eval_runtime": 178.3863, + "eval_samples_per_second": 39.364, + "eval_steps_per_second": 4.922, + "eval_wer": 0.4370636767505818, "step": 48000 }, { - "epoch": 1.9057723289716688, - "grad_norm": 1.785569667816162, + "epoch": 1.8559620388795346, + "grad_norm": 1.5472427606582642, "learning_rate": 0.00022784661654135337, - "loss": 0.3766, + "loss": 0.3905, "step": 48500 }, { - "epoch": 1.9254194663837478, - "grad_norm": 0.956910252571106, + "epoch": 1.875095668146334, + "grad_norm": 0.7495508790016174, "learning_rate": 0.00022709473684210524, - "loss": 0.3808, + "loss": 0.3896, "step": 49000 }, { - "epoch": 1.9254194663837478, - "eval_loss": 0.550835132598877, - "eval_runtime": 145.4747, - "eval_samples_per_second": 38.88, - "eval_steps_per_second": 4.86, - "eval_wer": 0.44476898139975285, + "epoch": 1.875095668146334, + "eval_loss": Infinity, + "eval_runtime": 177.5084, + "eval_samples_per_second": 39.559, + "eval_steps_per_second": 4.946, + "eval_wer": 0.4417706790776391, "step": 49000 }, { - "epoch": 1.9450666037958269, - "grad_norm": 1.0630252361297607, + "epoch": 1.8942292974131334, + "grad_norm": 1.171025037765503, "learning_rate": 0.00022634285714285713, - "loss": 0.3794, + "loss": 0.3883, "step": 49500 }, { - "epoch": 1.9647137412079059, - "grad_norm": 0.9397912621498108, - "learning_rate": 0.000225593984962406, - "loss": 0.3871, + "epoch": 1.9133629266799326, + "grad_norm": 1.1951338052749634, + "learning_rate": 0.000225590977443609, + "loss": 0.3986, "step": 50000 }, { - "epoch": 1.9647137412079059, - "eval_loss": 0.5386993885040283, - "eval_runtime": 144.7105, - "eval_samples_per_second": 39.085, - "eval_steps_per_second": 4.886, - "eval_wer": 0.43197830238641655, + "epoch": 1.9133629266799326, + "eval_loss": Infinity, + "eval_runtime": 178.1273, + "eval_samples_per_second": 39.421, + "eval_steps_per_second": 4.929, + "eval_wer": 0.4381610958324519, "step": 50000 }, { - "epoch": 1.9843608786199851, - "grad_norm": 1.227219581604004, - "learning_rate": 0.00022484360902255636, - "loss": 0.371, + "epoch": 1.932496555946732, + "grad_norm": 1.0499491691589355, + "learning_rate": 0.0002248390977443609, + "loss": 0.3842, "step": 50500 }, { - "epoch": 2.004008016032064, - "grad_norm": 1.0407652854919434, - "learning_rate": 0.00022409172932330825, - "loss": 0.3668, + "epoch": 1.9516301852135314, + "grad_norm": 0.6040648221969604, + "learning_rate": 0.00022408721804511277, + "loss": 0.3968, "step": 51000 }, { - "epoch": 2.004008016032064, - "eval_loss": 0.5476531982421875, - "eval_runtime": 144.735, - "eval_samples_per_second": 39.078, - "eval_steps_per_second": 4.885, - "eval_wer": 0.4207443308565101, + "epoch": 1.9516301852135314, + "eval_loss": Infinity, + "eval_runtime": 178.1104, + "eval_samples_per_second": 39.425, + "eval_steps_per_second": 4.93, + "eval_wer": 0.42753067484662577, "step": 51000 }, { - "epoch": 2.0236551534441434, - "grad_norm": 3.713465929031372, - "learning_rate": 0.00022333984962406012, - "loss": 0.3303, + "epoch": 1.9707638144803306, + "grad_norm": 0.7306642532348633, + "learning_rate": 0.00022333533834586466, + "loss": 0.3795, "step": 51500 }, { - "epoch": 2.0433022908562224, - "grad_norm": 1.074621319770813, - "learning_rate": 0.00022258796992481202, - "loss": 0.3324, + "epoch": 1.98989744374713, + "grad_norm": 0.781111478805542, + "learning_rate": 0.00022258345864661653, + "loss": 0.4025, "step": 52000 }, { - "epoch": 2.0433022908562224, - "eval_loss": 0.5283042788505554, - "eval_runtime": 144.7457, - "eval_samples_per_second": 39.075, - "eval_steps_per_second": 4.884, - "eval_wer": 0.4227985427934073, + "epoch": 1.98989744374713, + "eval_loss": Infinity, + "eval_runtime": 178.6685, + "eval_samples_per_second": 39.302, + "eval_steps_per_second": 4.914, + "eval_wer": 0.4204040617727946, "step": 52000 }, { - "epoch": 2.0629494282683014, - "grad_norm": 1.2761338949203491, - "learning_rate": 0.00022183759398496238, - "loss": 0.3299, + "epoch": 2.0090310730139294, + "grad_norm": 0.8605564832687378, + "learning_rate": 0.00022183157894736842, + "loss": 0.3645, "step": 52500 }, { - "epoch": 2.0825965656803804, - "grad_norm": 0.9065299034118652, - "learning_rate": 0.00022108571428571425, - "loss": 0.3327, + "epoch": 2.0281647022807285, + "grad_norm": 0.8945469260215759, + "learning_rate": 0.0002210796992481203, + "loss": 0.3404, "step": 53000 }, { - "epoch": 2.0825965656803804, - "eval_loss": 0.5217949151992798, - "eval_runtime": 145.0885, - "eval_samples_per_second": 38.983, - "eval_steps_per_second": 4.873, - "eval_wer": 0.41557670395275315, + "epoch": 2.0281647022807285, + "eval_loss": Infinity, + "eval_runtime": 178.4963, + "eval_samples_per_second": 39.34, + "eval_steps_per_second": 4.919, + "eval_wer": 0.42721334884704887, "step": 53000 }, { - "epoch": 2.1022437030924594, - "grad_norm": 1.1222054958343506, - "learning_rate": 0.00022033383458646615, - "loss": 0.3347, + "epoch": 2.0472983315475277, + "grad_norm": 0.7925958633422852, + "learning_rate": 0.0002203278195488722, + "loss": 0.3444, "step": 53500 }, { - "epoch": 2.1218908405045385, - "grad_norm": 3.639472484588623, - "learning_rate": 0.000219581954887218, - "loss": 0.3251, + "epoch": 2.0664319608143273, + "grad_norm": 1.2213307619094849, + "learning_rate": 0.00021957744360902255, + "loss": 0.3354, "step": 54000 }, { - "epoch": 2.1218908405045385, - "eval_loss": 0.5330758094787598, - "eval_runtime": 144.669, - "eval_samples_per_second": 39.096, - "eval_steps_per_second": 4.887, - "eval_wer": 0.41357063760812696, + "epoch": 2.0664319608143273, + "eval_loss": Infinity, + "eval_runtime": 177.2885, + "eval_samples_per_second": 39.608, + "eval_steps_per_second": 4.952, + "eval_wer": 0.4297519568436641, "step": 54000 }, { - "epoch": 2.1415379779166175, - "grad_norm": 2.337876558303833, - "learning_rate": 0.0002188300751879699, - "loss": 0.3368, + "epoch": 2.0855655900811265, + "grad_norm": 1.751560091972351, + "learning_rate": 0.00021882706766917292, + "loss": 0.3468, "step": 54500 }, { - "epoch": 2.1611851153286965, - "grad_norm": 0.8467469811439514, - "learning_rate": 0.00021807819548872178, - "loss": 0.3466, + "epoch": 2.1046992193479257, + "grad_norm": 0.7927623391151428, + "learning_rate": 0.0002180766917293233, + "loss": 0.3352, "step": 55000 }, { - "epoch": 2.1611851153286965, - "eval_loss": 0.5276508927345276, - "eval_runtime": 145.3583, - "eval_samples_per_second": 38.911, - "eval_steps_per_second": 4.864, - "eval_wer": 0.4141002391231083, + "epoch": 2.1046992193479257, + "eval_loss": Infinity, + "eval_runtime": 180.1438, + "eval_samples_per_second": 38.98, + "eval_steps_per_second": 4.874, + "eval_wer": 0.42244023693674637, "step": 55000 }, { - "epoch": 2.1808322527407755, - "grad_norm": 0.7859643697738647, - "learning_rate": 0.00021732781954887217, - "loss": 0.3337, + "epoch": 2.1238328486147253, + "grad_norm": 1.0091408491134644, + "learning_rate": 0.00021732481203007517, + "loss": 0.3579, "step": 55500 }, { - "epoch": 2.2004793901528545, - "grad_norm": 0.8069686889648438, - "learning_rate": 0.00021657593984962404, - "loss": 0.3259, + "epoch": 2.1429664778815245, + "grad_norm": 1.7537100315093994, + "learning_rate": 0.00021657293233082707, + "loss": 0.3384, "step": 56000 }, { - "epoch": 2.2004793901528545, - "eval_loss": 0.522844672203064, - "eval_runtime": 145.284, - "eval_samples_per_second": 38.931, - "eval_steps_per_second": 4.866, - "eval_wer": 0.40875607838102423, + "epoch": 2.1429664778815245, + "eval_loss": Infinity, + "eval_runtime": 179.0104, + "eval_samples_per_second": 39.227, + "eval_steps_per_second": 4.905, + "eval_wer": 0.4267241379310345, "step": 56000 }, { - "epoch": 2.2201265275649336, - "grad_norm": 0.5765830278396606, - "learning_rate": 0.00021582406015037593, - "loss": 0.337, + "epoch": 2.1621001071483237, + "grad_norm": 1.3135699033737183, + "learning_rate": 0.00021582105263157894, + "loss": 0.3408, "step": 56500 }, { - "epoch": 2.239773664977013, - "grad_norm": 0.9564582109451294, - "learning_rate": 0.0002150736842105263, - "loss": 0.3292, + "epoch": 2.1812337364151233, + "grad_norm": 0.9029154181480408, + "learning_rate": 0.00021506917293233083, + "loss": 0.3342, "step": 57000 }, { - "epoch": 2.239773664977013, - "eval_loss": 0.5119462013244629, - "eval_runtime": 145.6008, - "eval_samples_per_second": 38.846, - "eval_steps_per_second": 4.856, - "eval_wer": 0.4132657155237438, + "epoch": 2.1812337364151233, + "eval_loss": Infinity, + "eval_runtime": 178.7816, + "eval_samples_per_second": 39.277, + "eval_steps_per_second": 4.911, + "eval_wer": 0.4187248783583668, "step": 57000 }, { - "epoch": 2.259420802389092, - "grad_norm": 0.7495951056480408, - "learning_rate": 0.00021432330827067666, - "loss": 0.3259, + "epoch": 2.2003673656819225, + "grad_norm": 0.6189078688621521, + "learning_rate": 0.0002143172932330827, + "loss": 0.339, "step": 57500 }, { - "epoch": 2.279067939801171, - "grad_norm": 0.825587272644043, - "learning_rate": 0.00021357142857142855, - "loss": 0.3323, + "epoch": 2.2195009949487217, + "grad_norm": 1.0600789785385132, + "learning_rate": 0.0002135654135338346, + "loss": 0.3425, "step": 58000 }, { - "epoch": 2.279067939801171, - "eval_loss": 0.5191282033920288, - "eval_runtime": 145.5654, - "eval_samples_per_second": 38.855, - "eval_steps_per_second": 4.857, - "eval_wer": 0.40739195326667843, + "epoch": 2.2195009949487217, + "eval_loss": Infinity, + "eval_runtime": 178.4307, + "eval_samples_per_second": 39.354, + "eval_steps_per_second": 4.921, + "eval_wer": 0.4198884070234821, "step": 58000 }, { - "epoch": 2.29871507721325, - "grad_norm": 0.9213058948516846, - "learning_rate": 0.00021281954887218042, - "loss": 0.3292, + "epoch": 2.2386346242155213, + "grad_norm": 1.0213968753814697, + "learning_rate": 0.00021281503759398496, + "loss": 0.3439, "step": 58500 }, { - "epoch": 2.318362214625329, - "grad_norm": 6.7399773597717285, - "learning_rate": 0.00021206766917293232, - "loss": 0.3228, + "epoch": 2.2577682534823205, + "grad_norm": 1.2084991931915283, + "learning_rate": 0.0002120631578947368, + "loss": 0.3417, "step": 59000 }, { - "epoch": 2.318362214625329, - "eval_loss": 0.5073339939117432, - "eval_runtime": 145.4974, - "eval_samples_per_second": 38.874, - "eval_steps_per_second": 4.859, - "eval_wer": 0.3955802346295197, + "epoch": 2.2577682534823205, + "eval_loss": Infinity, + "eval_runtime": 177.6471, + "eval_samples_per_second": 39.528, + "eval_steps_per_second": 4.942, + "eval_wer": 0.4174423524434102, "step": 59000 }, { - "epoch": 2.338009352037408, - "grad_norm": 0.9246654510498047, - "learning_rate": 0.00021131578947368419, - "loss": 0.3311, + "epoch": 2.2769018827491196, + "grad_norm": 1.1838810443878174, + "learning_rate": 0.0002113112781954887, + "loss": 0.352, "step": 59500 }, { - "epoch": 2.357656489449487, - "grad_norm": 0.8129465579986572, - "learning_rate": 0.00021056390977443608, - "loss": 0.3172, + "epoch": 2.2960355120159193, + "grad_norm": 1.033828854560852, + "learning_rate": 0.00021055939849624056, + "loss": 0.3355, "step": 60000 }, { - "epoch": 2.357656489449487, - "eval_loss": 0.5084324479103088, - "eval_runtime": 145.2717, - "eval_samples_per_second": 38.934, - "eval_steps_per_second": 4.867, - "eval_wer": 0.4045353147919308, + "epoch": 2.2960355120159193, + "eval_loss": Infinity, + "eval_runtime": 178.3447, + "eval_samples_per_second": 39.373, + "eval_steps_per_second": 4.923, + "eval_wer": 0.4157896128622805, "step": 60000 }, { - "epoch": 2.377303626861566, - "grad_norm": 0.6280909776687622, - "learning_rate": 0.00020981203007518795, - "loss": 0.3235, + "epoch": 2.3151691412827184, + "grad_norm": 1.062028408050537, + "learning_rate": 0.00020980751879699246, + "loss": 0.3397, "step": 60500 }, { - "epoch": 2.396950764273645, - "grad_norm": 3.0157957077026367, - "learning_rate": 0.00020906015037593984, - "loss": 0.332, + "epoch": 2.3343027705495176, + "grad_norm": 0.8648023009300232, + "learning_rate": 0.00020905563909774433, + "loss": 0.3501, "step": 61000 }, { - "epoch": 2.396950764273645, - "eval_loss": 0.512955367565155, - "eval_runtime": 145.4819, - "eval_samples_per_second": 38.878, - "eval_steps_per_second": 4.86, - "eval_wer": 0.40151819100961306, + "epoch": 2.3343027705495176, + "eval_loss": Infinity, + "eval_runtime": 178.0407, + "eval_samples_per_second": 39.44, + "eval_steps_per_second": 4.931, + "eval_wer": 0.41284112544954515, "step": 61000 }, { - "epoch": 2.4165979016857246, - "grad_norm": 1.2728731632232666, - "learning_rate": 0.0002083082706766917, - "loss": 0.3298, + "epoch": 2.3534363998163172, + "grad_norm": 0.7826744318008423, + "learning_rate": 0.00020830375939849622, + "loss": 0.3276, "step": 61500 }, { - "epoch": 2.4362450390978037, - "grad_norm": 6.008030891418457, - "learning_rate": 0.0002075563909774436, - "loss": 0.3218, + "epoch": 2.3725700290831164, + "grad_norm": 0.8527004718780518, + "learning_rate": 0.0002075518796992481, + "loss": 0.3358, "step": 62000 }, { - "epoch": 2.4362450390978037, - "eval_loss": 0.5102687478065491, - "eval_runtime": 145.1668, - "eval_samples_per_second": 38.962, - "eval_steps_per_second": 4.87, - "eval_wer": 0.39972075556482806, + "epoch": 2.3725700290831164, + "eval_loss": Infinity, + "eval_runtime": 178.3539, + "eval_samples_per_second": 39.371, + "eval_steps_per_second": 4.923, + "eval_wer": 0.4115585995345885, "step": 62000 }, { - "epoch": 2.4558921765098827, - "grad_norm": 1.1027765274047852, - "learning_rate": 0.00020680451127819547, - "loss": 0.3207, + "epoch": 2.3917036583499156, + "grad_norm": 1.9309333562850952, + "learning_rate": 0.00020679999999999999, + "loss": 0.3294, "step": 62500 }, { - "epoch": 2.4755393139219617, - "grad_norm": 0.9439337849617004, - "learning_rate": 0.00020605263157894737, - "loss": 0.3317, + "epoch": 2.4108372876167152, + "grad_norm": 1.1275138854980469, + "learning_rate": 0.00020604812030075185, + "loss": 0.3343, "step": 63000 }, { - "epoch": 2.4755393139219617, - "eval_loss": 0.5019811391830444, - "eval_runtime": 145.2988, - "eval_samples_per_second": 38.927, - "eval_steps_per_second": 4.866, - "eval_wer": 0.40500072218388405, + "epoch": 2.4108372876167152, + "eval_loss": Infinity, + "eval_runtime": 178.4409, + "eval_samples_per_second": 39.352, + "eval_steps_per_second": 4.92, + "eval_wer": 0.4163978210281362, "step": 63000 }, { - "epoch": 2.4951864513340407, - "grad_norm": 1.4857794046401978, - "learning_rate": 0.0002053007518796992, - "loss": 0.3272, + "epoch": 2.4299709168835144, + "grad_norm": 3.78141450881958, + "learning_rate": 0.00020529624060150375, + "loss": 0.3269, "step": 63500 }, { - "epoch": 2.5148335887461197, - "grad_norm": 0.9404523968696594, - "learning_rate": 0.0002045488721804511, - "loss": 0.3222, + "epoch": 2.4491045461503136, + "grad_norm": 1.6814829111099243, + "learning_rate": 0.00020454436090225562, + "loss": 0.3343, "step": 64000 }, { - "epoch": 2.5148335887461197, - "eval_loss": 0.5072047114372253, - "eval_runtime": 145.1717, - "eval_samples_per_second": 38.961, - "eval_steps_per_second": 4.87, - "eval_wer": 0.39964051291104297, + "epoch": 2.4491045461503136, + "eval_loss": Infinity, + "eval_runtime": 177.6927, + "eval_samples_per_second": 39.518, + "eval_steps_per_second": 4.941, + "eval_wer": 0.41787867569282844, "step": 64000 }, { - "epoch": 2.5344807261581987, - "grad_norm": 1.1841472387313843, - "learning_rate": 0.00020379699248120297, - "loss": 0.3261, + "epoch": 2.468238175417113, + "grad_norm": 1.0685131549835205, + "learning_rate": 0.000203793984962406, + "loss": 0.3346, "step": 64500 }, { - "epoch": 2.5541278635702778, - "grad_norm": 0.7141321301460266, - "learning_rate": 0.0002030466165413534, - "loss": 0.3138, + "epoch": 2.4873718046839124, + "grad_norm": 0.6016332507133484, + "learning_rate": 0.00020304210526315788, + "loss": 0.3367, "step": 65000 }, { - "epoch": 2.5541278635702778, - "eval_loss": 0.5098404884338379, - "eval_runtime": 144.9074, - "eval_samples_per_second": 39.032, - "eval_steps_per_second": 4.879, - "eval_wer": 0.40357240294651026, + "epoch": 2.4873718046839124, + "eval_loss": Infinity, + "eval_runtime": 178.6952, + "eval_samples_per_second": 39.296, + "eval_steps_per_second": 4.913, + "eval_wer": 0.411730484451026, "step": 65000 }, { - "epoch": 2.573775000982357, - "grad_norm": 0.8695092797279358, - "learning_rate": 0.00020229473684210523, - "loss": 0.3198, + "epoch": 2.5065054339507116, + "grad_norm": 0.9282209277153015, + "learning_rate": 0.00020229022556390977, + "loss": 0.3307, "step": 65500 }, { - "epoch": 2.593422138394436, - "grad_norm": 1.1630802154541016, - "learning_rate": 0.00020154285714285713, - "loss": 0.3074, + "epoch": 2.525639063217511, + "grad_norm": 0.604811429977417, + "learning_rate": 0.00020153984962406014, + "loss": 0.3237, "step": 66000 }, { - "epoch": 2.593422138394436, - "eval_loss": 0.5026105046272278, - "eval_runtime": 145.0532, - "eval_samples_per_second": 38.993, - "eval_steps_per_second": 4.874, - "eval_wer": 0.3981159024891271, + "epoch": 2.525639063217511, + "eval_loss": Infinity, + "eval_runtime": 177.9582, + "eval_samples_per_second": 39.459, + "eval_steps_per_second": 4.934, + "eval_wer": 0.40677226570763697, "step": 66000 }, { - "epoch": 2.613069275806515, - "grad_norm": 1.2439523935317993, - "learning_rate": 0.000200790977443609, - "loss": 0.3234, + "epoch": 2.5447726924843104, + "grad_norm": 1.4802906513214111, + "learning_rate": 0.000200787969924812, + "loss": 0.3243, "step": 66500 }, { - "epoch": 2.632716413218594, - "grad_norm": 0.7216903567314148, - "learning_rate": 0.0002000390977443609, - "loss": 0.3261, + "epoch": 2.5639063217511096, + "grad_norm": 1.0026686191558838, + "learning_rate": 0.0002000360902255639, + "loss": 0.3335, "step": 67000 }, { - "epoch": 2.632716413218594, - "eval_loss": 0.5030384063720703, - "eval_runtime": 145.8235, - "eval_samples_per_second": 38.787, - "eval_steps_per_second": 4.848, - "eval_wer": 0.39349392563110847, + "epoch": 2.5639063217511096, + "eval_loss": Infinity, + "eval_runtime": 178.1808, + "eval_samples_per_second": 39.409, + "eval_steps_per_second": 4.928, + "eval_wer": 0.40802834778929553, "step": 67000 }, { - "epoch": 2.652363550630673, - "grad_norm": 1.0616713762283325, - "learning_rate": 0.00019928721804511276, - "loss": 0.3147, + "epoch": 2.583039951017909, + "grad_norm": 0.7298141717910767, + "learning_rate": 0.00019928421052631577, + "loss": 0.3326, "step": 67500 }, { - "epoch": 2.6720106880427523, - "grad_norm": 10.39274787902832, - "learning_rate": 0.00019853533834586465, - "loss": 0.3257, + "epoch": 2.6021735802847084, + "grad_norm": 0.5921869874000549, + "learning_rate": 0.00019853383458646616, + "loss": 0.3254, "step": 68000 }, { - "epoch": 2.6720106880427523, - "eval_loss": 0.500296413898468, - "eval_runtime": 144.7526, - "eval_samples_per_second": 39.074, - "eval_steps_per_second": 4.884, - "eval_wer": 0.39028421947970665, + "epoch": 2.6021735802847084, + "eval_loss": Infinity, + "eval_runtime": 177.3757, + "eval_samples_per_second": 39.588, + "eval_steps_per_second": 4.95, + "eval_wer": 0.3981780198857626, "step": 68000 }, { - "epoch": 2.6916578254548313, - "grad_norm": 0.7911710739135742, - "learning_rate": 0.00019778345864661652, - "loss": 0.3274, + "epoch": 2.6213072095515075, + "grad_norm": 0.7208895683288574, + "learning_rate": 0.00019778195488721803, + "loss": 0.3235, "step": 68500 }, { - "epoch": 2.7113049628669104, - "grad_norm": 0.6936825513839722, - "learning_rate": 0.00019703157894736842, - "loss": 0.3179, + "epoch": 2.640440838818307, + "grad_norm": 1.1068922281265259, + "learning_rate": 0.00019703007518796992, + "loss": 0.3295, "step": 69000 }, { - "epoch": 2.7113049628669104, - "eval_loss": 0.5139185786247253, - "eval_runtime": 145.2074, - "eval_samples_per_second": 38.951, - "eval_steps_per_second": 4.869, - "eval_wer": 0.4003947938566224, + "epoch": 2.640440838818307, + "eval_loss": Infinity, + "eval_runtime": 176.2384, + "eval_samples_per_second": 39.844, + "eval_steps_per_second": 4.982, + "eval_wer": 0.41358155278189124, "step": 69000 }, { - "epoch": 2.7309521002789894, - "grad_norm": 0.5706244111061096, - "learning_rate": 0.00019627969924812028, - "loss": 0.3147, + "epoch": 2.6595744680851063, + "grad_norm": 0.8548173308372498, + "learning_rate": 0.0001962781954887218, + "loss": 0.3256, "step": 69500 }, { - "epoch": 2.7505992376910684, - "grad_norm": 0.9982422590255737, - "learning_rate": 0.00019552932330827065, - "loss": 0.3154, + "epoch": 2.6787080973519055, + "grad_norm": 8.747632026672363, + "learning_rate": 0.00019552631578947368, + "loss": 0.3326, "step": 70000 }, { - "epoch": 2.7505992376910684, - "eval_loss": 0.5041365027427673, - "eval_runtime": 144.6141, - "eval_samples_per_second": 39.111, - "eval_steps_per_second": 4.889, - "eval_wer": 0.39455312866107106, + "epoch": 2.6787080973519055, + "eval_loss": Infinity, + "eval_runtime": 177.118, + "eval_samples_per_second": 39.646, + "eval_steps_per_second": 4.957, + "eval_wer": 0.4045113179606516, "step": 70000 }, { - "epoch": 2.7702463751031474, - "grad_norm": 0.6709697842597961, - "learning_rate": 0.00019477894736842104, - "loss": 0.3116, + "epoch": 2.697841726618705, + "grad_norm": 1.7842276096343994, + "learning_rate": 0.00019477443609022555, + "loss": 0.3192, "step": 70500 }, { - "epoch": 2.7898935125152264, - "grad_norm": 1.2155810594558716, - "learning_rate": 0.0001940270676691729, - "loss": 0.3119, + "epoch": 2.7169753558855043, + "grad_norm": 2.2976646423339844, + "learning_rate": 0.00019402255639097745, + "loss": 0.3167, "step": 71000 }, { - "epoch": 2.7898935125152264, - "eval_loss": 0.49135103821754456, - "eval_runtime": 144.8705, - "eval_samples_per_second": 39.042, - "eval_steps_per_second": 4.88, - "eval_wer": 0.3940877212691178, + "epoch": 2.7169753558855043, + "eval_loss": Infinity, + "eval_runtime": 176.2794, + "eval_samples_per_second": 39.834, + "eval_steps_per_second": 4.981, + "eval_wer": 0.4043658768775122, "step": 71000 }, { - "epoch": 2.8095406499273055, - "grad_norm": 9.424253463745117, - "learning_rate": 0.0001932766917293233, - "loss": 0.3177, + "epoch": 2.7361089851523035, + "grad_norm": 1.3297739028930664, + "learning_rate": 0.0001932706766917293, + "loss": 0.3179, "step": 71500 }, { - "epoch": 2.829187787339385, - "grad_norm": 0.6820365786552429, - "learning_rate": 0.00019252481203007517, - "loss": 0.3128, + "epoch": 2.755242614419103, + "grad_norm": 1.6274834871292114, + "learning_rate": 0.0001925203007518797, + "loss": 0.3376, "step": 72000 }, { - "epoch": 2.829187787339385, - "eval_loss": 0.4867289066314697, - "eval_runtime": 144.797, - "eval_samples_per_second": 39.062, - "eval_steps_per_second": 4.883, - "eval_wer": 0.38309447770056654, + "epoch": 2.755242614419103, + "eval_loss": Infinity, + "eval_runtime": 175.1612, + "eval_samples_per_second": 40.089, + "eval_steps_per_second": 5.013, + "eval_wer": 0.3942114448910514, "step": 72000 }, { - "epoch": 2.848834924751464, - "grad_norm": 0.6361156702041626, - "learning_rate": 0.00019177443609022553, - "loss": 0.3127, + "epoch": 2.7743762436859023, + "grad_norm": 0.5883073210716248, + "learning_rate": 0.00019176842105263155, + "loss": 0.3109, "step": 72500 }, { - "epoch": 2.868482062163543, - "grad_norm": 1.142830491065979, - "learning_rate": 0.00019102255639097743, - "loss": 0.3105, + "epoch": 2.7935098729527015, + "grad_norm": 0.9327465891838074, + "learning_rate": 0.00019101804511278194, + "loss": 0.3245, "step": 73000 }, { - "epoch": 2.868482062163543, - "eval_loss": 0.4870510995388031, - "eval_runtime": 145.1289, - "eval_samples_per_second": 38.972, - "eval_steps_per_second": 4.872, - "eval_wer": 0.3817945467092488, + "epoch": 2.7935098729527015, + "eval_loss": Infinity, + "eval_runtime": 175.6566, + "eval_samples_per_second": 39.976, + "eval_steps_per_second": 4.998, + "eval_wer": 0.3957848529722869, "step": 73000 }, { - "epoch": 2.888129199575622, - "grad_norm": 0.8262931704521179, - "learning_rate": 0.0001902706766917293, - "loss": 0.3234, + "epoch": 2.812643502219501, + "grad_norm": 0.9724407196044922, + "learning_rate": 0.00019026616541353383, + "loss": 0.3128, "step": 73500 }, { - "epoch": 2.907776336987701, - "grad_norm": 1.1251935958862305, - "learning_rate": 0.0001895187969924812, - "loss": 0.309, + "epoch": 2.8317771314863003, + "grad_norm": 0.7509967684745789, + "learning_rate": 0.00018951428571428567, + "loss": 0.315, "step": 74000 }, { - "epoch": 2.907776336987701, - "eval_loss": 0.48873621225357056, - "eval_runtime": 144.4047, - "eval_samples_per_second": 39.168, - "eval_steps_per_second": 4.896, - "eval_wer": 0.39885413490394955, + "epoch": 2.8317771314863003, + "eval_loss": Infinity, + "eval_runtime": 175.6684, + "eval_samples_per_second": 39.973, + "eval_steps_per_second": 4.998, + "eval_wer": 0.40650782737465624, "step": 74000 }, { - "epoch": 2.92742347439978, - "grad_norm": 0.825520396232605, - "learning_rate": 0.00018876691729323306, - "loss": 0.3204, + "epoch": 2.8509107607530995, + "grad_norm": 0.7796798944473267, + "learning_rate": 0.00018876240601503757, + "loss": 0.3236, "step": 74500 }, { - "epoch": 2.947070611811859, - "grad_norm": 0.5379465818405151, - "learning_rate": 0.00018801654135338345, - "loss": 0.3073, + "epoch": 2.870044390019899, + "grad_norm": 1.8985257148742676, + "learning_rate": 0.00018801052631578944, + "loss": 0.327, "step": 75000 }, { - "epoch": 2.947070611811859, - "eval_loss": 0.48394420742988586, - "eval_runtime": 145.2237, - "eval_samples_per_second": 38.947, - "eval_steps_per_second": 4.868, - "eval_wer": 0.3903644621334917, + "epoch": 2.870044390019899, + "eval_loss": Infinity, + "eval_runtime": 176.0416, + "eval_samples_per_second": 39.888, + "eval_steps_per_second": 4.987, + "eval_wer": 0.40103395388195473, "step": 75000 }, { - "epoch": 2.966717749223938, - "grad_norm": 0.4377336800098419, - "learning_rate": 0.00018726466165413532, - "loss": 0.3044, + "epoch": 2.8891780192866983, + "grad_norm": 0.909794807434082, + "learning_rate": 0.00018725864661654133, + "loss": 0.3235, "step": 75500 }, { - "epoch": 2.986364886636017, - "grad_norm": 0.8088381290435791, - "learning_rate": 0.0001865127819548872, - "loss": 0.3023, + "epoch": 2.9083116485534974, + "grad_norm": 1.1613683700561523, + "learning_rate": 0.0001865082706766917, + "loss": 0.3211, "step": 76000 }, { - "epoch": 2.986364886636017, - "eval_loss": 0.48391589522361755, - "eval_runtime": 145.3642, - "eval_samples_per_second": 38.909, - "eval_steps_per_second": 4.864, - "eval_wer": 0.38049461571793103, + "epoch": 2.9083116485534974, + "eval_loss": Infinity, + "eval_runtime": 176.4137, + "eval_samples_per_second": 39.804, + "eval_steps_per_second": 4.977, + "eval_wer": 0.39259837105986883, "step": 76000 }, { - "epoch": 3.006012024048096, - "grad_norm": 0.6517421007156372, - "learning_rate": 0.00018576090225563908, - "loss": 0.3049, + "epoch": 2.927445277820297, + "grad_norm": 0.77381831407547, + "learning_rate": 0.0001857563909774436, + "loss": 0.3183, "step": 76500 }, { - "epoch": 3.025659161460175, - "grad_norm": 1.4399667978286743, - "learning_rate": 0.00018500902255639098, - "loss": 0.2715, + "epoch": 2.9465789070870962, + "grad_norm": 7.940882205963135, + "learning_rate": 0.00018500451127819546, + "loss": 0.323, "step": 77000 }, { - "epoch": 3.025659161460175, - "eval_loss": 0.48158180713653564, - "eval_runtime": 147.38, - "eval_samples_per_second": 38.377, - "eval_steps_per_second": 4.797, - "eval_wer": 0.38766830896631416, + "epoch": 2.9465789070870962, + "eval_loss": Infinity, + "eval_runtime": 178.2912, + "eval_samples_per_second": 39.385, + "eval_steps_per_second": 4.925, + "eval_wer": 0.4005315210492913, "step": 77000 }, { - "epoch": 3.045306298872254, - "grad_norm": 1.0456621646881104, - "learning_rate": 0.00018425714285714284, - "loss": 0.2762, + "epoch": 2.9657125363538954, + "grad_norm": 0.49370139837265015, + "learning_rate": 0.00018425263157894735, + "loss": 0.3125, "step": 77500 }, { - "epoch": 3.064953436284333, - "grad_norm": 0.6409999132156372, - "learning_rate": 0.00018350526315789474, - "loss": 0.2565, + "epoch": 2.984846165620695, + "grad_norm": 1.297203540802002, + "learning_rate": 0.00018350075187969922, + "loss": 0.323, "step": 78000 }, { - "epoch": 3.064953436284333, - "eval_loss": 0.49935096502304077, - "eval_runtime": 144.0075, - "eval_samples_per_second": 39.276, - "eval_steps_per_second": 4.909, - "eval_wer": 0.3811044598866974, + "epoch": 2.984846165620695, + "eval_loss": Infinity, + "eval_runtime": 177.4201, + "eval_samples_per_second": 39.578, + "eval_steps_per_second": 4.949, + "eval_wer": 0.38635762640152316, "step": 78000 }, { - "epoch": 3.0846005736964126, - "grad_norm": 0.8721100687980652, - "learning_rate": 0.0001827548872180451, - "loss": 0.2681, + "epoch": 3.0039797948874942, + "grad_norm": 0.7693071365356445, + "learning_rate": 0.0001827503759398496, + "loss": 0.3041, "step": 78500 }, { - "epoch": 3.1042477111084916, - "grad_norm": 1.0572487115859985, - "learning_rate": 0.00018200300751879697, - "loss": 0.2697, + "epoch": 3.0231134241542934, + "grad_norm": 0.9858660697937012, + "learning_rate": 0.00018199849624060148, + "loss": 0.2747, "step": 79000 }, { - "epoch": 3.1042477111084916, - "eval_loss": 0.48027363419532776, - "eval_runtime": 143.6893, - "eval_samples_per_second": 39.363, - "eval_steps_per_second": 4.92, - "eval_wer": 0.3813291393172955, + "epoch": 3.0231134241542934, + "eval_loss": Infinity, + "eval_runtime": 179.3853, + "eval_samples_per_second": 39.145, + "eval_steps_per_second": 4.894, + "eval_wer": 0.39882589380156547, "step": 79000 }, { - "epoch": 3.1238948485205706, - "grad_norm": 0.5640320777893066, - "learning_rate": 0.00018125112781954887, - "loss": 0.274, + "epoch": 3.042247053421093, + "grad_norm": 0.7790058851242065, + "learning_rate": 0.00018124661654135335, + "loss": 0.2799, "step": 79500 }, { - "epoch": 3.1435419859326497, - "grad_norm": 0.740835964679718, - "learning_rate": 0.00018049924812030073, - "loss": 0.2717, + "epoch": 3.061380682687892, + "grad_norm": 0.9347246885299683, + "learning_rate": 0.00018049473684210525, + "loss": 0.2706, "step": 80000 }, { - "epoch": 3.1435419859326497, - "eval_loss": 0.48425012826919556, - "eval_runtime": 144.7996, - "eval_samples_per_second": 39.061, - "eval_steps_per_second": 4.883, - "eval_wer": 0.37988477154916467, + "epoch": 3.061380682687892, + "eval_loss": Infinity, + "eval_runtime": 176.71, + "eval_samples_per_second": 39.737, + "eval_steps_per_second": 4.969, + "eval_wer": 0.3860799661518934, "step": 80000 }, { - "epoch": 3.1631891233447287, - "grad_norm": 0.4206051528453827, - "learning_rate": 0.00017974736842105263, - "loss": 0.2751, + "epoch": 3.0805143119546914, + "grad_norm": 0.478522926568985, + "learning_rate": 0.0001797428571428571, + "loss": 0.2807, "step": 80500 }, { - "epoch": 3.1828362607568077, - "grad_norm": 1.7560110092163086, - "learning_rate": 0.00017899548872180447, - "loss": 0.2738, + "epoch": 3.099647941221491, + "grad_norm": 0.6804964542388916, + "learning_rate": 0.000178990977443609, + "loss": 0.2696, "step": 81000 }, { - "epoch": 3.1828362607568077, - "eval_loss": 0.4904831647872925, - "eval_runtime": 145.8617, - "eval_samples_per_second": 38.776, - "eval_steps_per_second": 4.847, - "eval_wer": 0.37967614064932353, + "epoch": 3.099647941221491, + "eval_loss": Infinity, + "eval_runtime": 177.3621, + "eval_samples_per_second": 39.591, + "eval_steps_per_second": 4.95, + "eval_wer": 0.38777237148297017, "step": 81000 }, { - "epoch": 3.2024833981688867, - "grad_norm": 0.5435498952865601, - "learning_rate": 0.00017824360902255637, - "loss": 0.2671, + "epoch": 3.11878157048829, + "grad_norm": 0.6777291893959045, + "learning_rate": 0.00017823909774436088, + "loss": 0.2783, "step": 81500 }, { - "epoch": 3.2221305355809657, - "grad_norm": 0.8769587278366089, - "learning_rate": 0.00017749323308270673, - "loss": 0.2617, + "epoch": 3.1379151997550894, + "grad_norm": 0.9108553528785706, + "learning_rate": 0.00017748721804511277, + "loss": 0.2792, "step": 82000 }, { - "epoch": 3.2221305355809657, - "eval_loss": 0.4753645956516266, - "eval_runtime": 144.9825, - "eval_samples_per_second": 39.012, - "eval_steps_per_second": 4.876, - "eval_wer": 0.37282341801608065, + "epoch": 3.1379151997550894, + "eval_loss": Infinity, + "eval_runtime": 177.9123, + "eval_samples_per_second": 39.469, + "eval_steps_per_second": 4.935, + "eval_wer": 0.39447588322403215, "step": 82000 }, { - "epoch": 3.2417776729930448, - "grad_norm": 0.5920813083648682, - "learning_rate": 0.00017674135338345865, - "loss": 0.27, + "epoch": 3.157048829021889, + "grad_norm": 0.7335214018821716, + "learning_rate": 0.00017673533834586464, + "loss": 0.2818, "step": 82500 }, { - "epoch": 3.261424810405124, - "grad_norm": 0.4276420474052429, - "learning_rate": 0.0001759894736842105, - "loss": 0.2634, + "epoch": 3.176182458288688, + "grad_norm": 1.0339977741241455, + "learning_rate": 0.00017598496240601503, + "loss": 0.2809, "step": 83000 }, { - "epoch": 3.261424810405124, - "eval_loss": 0.4729759693145752, - "eval_runtime": 144.8389, - "eval_samples_per_second": 39.05, - "eval_steps_per_second": 4.881, - "eval_wer": 0.3668052189822022, + "epoch": 3.176182458288688, + "eval_loss": Infinity, + "eval_runtime": 177.3969, + "eval_samples_per_second": 39.584, + "eval_steps_per_second": 4.949, + "eval_wer": 0.39493865030674846, "step": 83000 }, { - "epoch": 3.2810719478172032, - "grad_norm": 2.3023736476898193, - "learning_rate": 0.0001752375939849624, - "loss": 0.2771, + "epoch": 3.1953160875554873, + "grad_norm": 1.3720539808273315, + "learning_rate": 0.0001752330827067669, + "loss": 0.2767, "step": 83500 }, { - "epoch": 3.3007190852292823, - "grad_norm": 1.1624869108200073, - "learning_rate": 0.00017448571428571426, - "loss": 0.2648, + "epoch": 3.214449716822287, + "grad_norm": 1.6575071811676025, + "learning_rate": 0.0001744827067669173, + "loss": 0.2709, "step": 84000 }, { - "epoch": 3.3007190852292823, - "eval_loss": 0.4768010377883911, - "eval_runtime": 144.7404, - "eval_samples_per_second": 39.077, - "eval_steps_per_second": 4.885, - "eval_wer": 0.3690520132881835, + "epoch": 3.214449716822287, + "eval_loss": Infinity, + "eval_runtime": 177.9587, + "eval_samples_per_second": 39.459, + "eval_steps_per_second": 4.934, + "eval_wer": 0.3852205415697059, "step": 84000 }, { - "epoch": 3.3203662226413613, - "grad_norm": 0.64561527967453, - "learning_rate": 0.00017373383458646615, - "loss": 0.2745, + "epoch": 3.233583346089086, + "grad_norm": 0.9274744987487793, + "learning_rate": 0.00017373082706766916, + "loss": 0.2692, "step": 84500 }, { - "epoch": 3.3400133600534403, - "grad_norm": 0.5857324600219727, - "learning_rate": 0.00017298195488721802, - "loss": 0.2567, + "epoch": 3.2527169753558853, + "grad_norm": 0.6898565292358398, + "learning_rate": 0.00017297894736842105, + "loss": 0.2808, "step": 85000 }, { - "epoch": 3.3400133600534403, - "eval_loss": 0.4812460243701935, - "eval_runtime": 145.4739, - "eval_samples_per_second": 38.88, - "eval_steps_per_second": 4.86, - "eval_wer": 0.37410730047664137, + "epoch": 3.2527169753558853, + "eval_loss": Infinity, + "eval_runtime": 177.9702, + "eval_samples_per_second": 39.456, + "eval_steps_per_second": 4.933, + "eval_wer": 0.3912629574783161, "step": 85000 }, { - "epoch": 3.3596604974655193, - "grad_norm": 0.97500079870224, - "learning_rate": 0.00017223007518796991, - "loss": 0.2686, + "epoch": 3.271850604622685, + "grad_norm": 0.44578102231025696, + "learning_rate": 0.00017222706766917292, + "loss": 0.2747, "step": 85500 }, { - "epoch": 3.3793076348775983, - "grad_norm": 0.6413397789001465, - "learning_rate": 0.00017147969924812028, - "loss": 0.2687, + "epoch": 3.290984233889484, + "grad_norm": 0.3729807138442993, + "learning_rate": 0.00017147518796992482, + "loss": 0.2746, "step": 86000 }, { - "epoch": 3.3793076348775983, - "eval_loss": 0.46830272674560547, - "eval_runtime": 144.9613, - "eval_samples_per_second": 39.017, - "eval_steps_per_second": 4.877, - "eval_wer": 0.37160372967854793, + "epoch": 3.290984233889484, + "eval_loss": Infinity, + "eval_runtime": 177.7245, + "eval_samples_per_second": 39.511, + "eval_steps_per_second": 4.94, + "eval_wer": 0.38564364290247516, "step": 86000 }, { - "epoch": 3.3989547722896774, - "grad_norm": 1.0018800497055054, - "learning_rate": 0.00017072781954887217, - "loss": 0.2595, + "epoch": 3.3101178631562833, + "grad_norm": 0.7046172618865967, + "learning_rate": 0.00017072481203007518, + "loss": 0.2722, "step": 86500 }, { - "epoch": 3.4186019097017564, - "grad_norm": 1.0536398887634277, - "learning_rate": 0.00016997593984962404, - "loss": 0.2757, + "epoch": 3.329251492423083, + "grad_norm": 1.7671455144882202, + "learning_rate": 0.00016997293233082705, + "loss": 0.2633, "step": 87000 }, { - "epoch": 3.4186019097017564, - "eval_loss": 0.46901389956474304, - "eval_runtime": 144.4366, - "eval_samples_per_second": 39.159, - "eval_steps_per_second": 4.895, - "eval_wer": 0.37320858275424884, + "epoch": 3.329251492423083, + "eval_loss": Infinity, + "eval_runtime": 178.1063, + "eval_samples_per_second": 39.426, + "eval_steps_per_second": 4.93, + "eval_wer": 0.38845991114872014, "step": 87000 }, { - "epoch": 3.4382490471138354, - "grad_norm": 0.4486633837223053, - "learning_rate": 0.00016922406015037594, - "loss": 0.2655, + "epoch": 3.348385121689882, + "grad_norm": 0.8641050457954407, + "learning_rate": 0.00016922105263157894, + "loss": 0.2694, "step": 87500 }, { - "epoch": 3.4578961845259144, - "grad_norm": 0.6999643445014954, - "learning_rate": 0.0001684721804511278, - "loss": 0.2596, + "epoch": 3.3675187509566813, + "grad_norm": 0.6219012141227722, + "learning_rate": 0.0001684706766917293, + "loss": 0.2745, "step": 88000 }, { - "epoch": 3.4578961845259144, - "eval_loss": 0.47534072399139404, - "eval_runtime": 145.0031, - "eval_samples_per_second": 39.006, - "eval_steps_per_second": 4.876, - "eval_wer": 0.37824782141194974, + "epoch": 3.3675187509566813, + "eval_loss": Infinity, + "eval_runtime": 178.0455, + "eval_samples_per_second": 39.439, + "eval_steps_per_second": 4.931, + "eval_wer": 0.38491643748677806, "step": 88000 }, { - "epoch": 3.4775433219379934, - "grad_norm": 0.9858837723731995, - "learning_rate": 0.0001677203007518797, - "loss": 0.2614, + "epoch": 3.386652380223481, + "grad_norm": 0.6935294270515442, + "learning_rate": 0.0001677187969924812, + "loss": 0.277, "step": 88500 }, { - "epoch": 3.497190459350073, - "grad_norm": 0.5992431640625, - "learning_rate": 0.00016696842105263157, - "loss": 0.2589, + "epoch": 3.40578600949028, + "grad_norm": 0.6084161400794983, + "learning_rate": 0.00016696691729323307, + "loss": 0.2832, "step": 89000 }, { - "epoch": 3.497190459350073, - "eval_loss": 0.4645041823387146, - "eval_runtime": 146.1245, - "eval_samples_per_second": 38.707, - "eval_steps_per_second": 4.838, - "eval_wer": 0.3691483044727255, + "epoch": 3.40578600949028, + "eval_loss": Infinity, + "eval_runtime": 178.6527, + "eval_samples_per_second": 39.305, + "eval_steps_per_second": 4.915, + "eval_wer": 0.3820869473238841, "step": 89000 }, { - "epoch": 3.516837596762152, - "grad_norm": 2.5524730682373047, - "learning_rate": 0.00016621804511278193, - "loss": 0.2724, + "epoch": 3.4249196387570793, + "grad_norm": 1.1567957401275635, + "learning_rate": 0.00016621503759398497, + "loss": 0.2898, "step": 89500 }, { - "epoch": 3.536484734174231, - "grad_norm": 0.42577388882637024, - "learning_rate": 0.00016546616541353383, - "loss": 0.2627, + "epoch": 3.444053268023879, + "grad_norm": 0.7702553868293762, + "learning_rate": 0.0001654631578947368, + "loss": 0.2806, "step": 90000 }, { - "epoch": 3.536484734174231, - "eval_loss": 0.4689880907535553, - "eval_runtime": 146.114, - "eval_samples_per_second": 38.71, - "eval_steps_per_second": 4.839, - "eval_wer": 0.3675274028662676, + "epoch": 3.444053268023879, + "eval_loss": Infinity, + "eval_runtime": 177.9228, + "eval_samples_per_second": 39.467, + "eval_steps_per_second": 4.935, + "eval_wer": 0.38568330865242223, "step": 90000 }, { - "epoch": 3.55613187158631, - "grad_norm": 0.530576765537262, - "learning_rate": 0.0001647142857142857, - "loss": 0.2692, + "epoch": 3.463186897290678, + "grad_norm": 0.6563850045204163, + "learning_rate": 0.00016471127819548873, + "loss": 0.2658, "step": 90500 }, { - "epoch": 3.575779008998389, - "grad_norm": 1.5638034343719482, - "learning_rate": 0.0001639624060150376, - "loss": 0.2804, + "epoch": 3.4823205265574773, + "grad_norm": 0.33683669567108154, + "learning_rate": 0.00016395939849624057, + "loss": 0.2756, "step": 91000 }, { - "epoch": 3.575779008998389, - "eval_loss": 0.46749356389045715, - "eval_runtime": 146.7363, - "eval_samples_per_second": 38.545, - "eval_steps_per_second": 4.818, - "eval_wer": 0.37420359166118344, + "epoch": 3.4823205265574773, + "eval_loss": Infinity, + "eval_runtime": 178.2575, + "eval_samples_per_second": 39.392, + "eval_steps_per_second": 4.925, + "eval_wer": 0.381015972075312, "step": 91000 }, { - "epoch": 3.595426146410468, - "grad_norm": 0.7981226444244385, - "learning_rate": 0.00016321052631578946, - "loss": 0.2658, + "epoch": 3.501454155824277, + "grad_norm": 0.5872700214385986, + "learning_rate": 0.00016320751879699246, + "loss": 0.2696, "step": 91500 }, { - "epoch": 3.615073283822547, - "grad_norm": 0.4092627167701721, - "learning_rate": 0.00016245864661654135, - "loss": 0.2587, + "epoch": 3.520587785091076, + "grad_norm": 7.343397617340088, + "learning_rate": 0.00016245563909774433, + "loss": 0.2733, "step": 92000 }, { - "epoch": 3.615073283822547, - "eval_loss": 0.46739462018013, - "eval_runtime": 145.0739, - "eval_samples_per_second": 38.987, - "eval_steps_per_second": 4.873, - "eval_wer": 0.3593747492417069, + "epoch": 3.520587785091076, + "eval_loss": Infinity, + "eval_runtime": 178.508, + "eval_samples_per_second": 39.337, + "eval_steps_per_second": 4.919, + "eval_wer": 0.37381002750158665, "step": 92000 }, { - "epoch": 3.634720421234626, - "grad_norm": 0.4350492060184479, - "learning_rate": 0.0001617067669172932, - "loss": 0.2664, + "epoch": 3.5397214143578752, + "grad_norm": 0.6450570821762085, + "learning_rate": 0.00016170526315789472, + "loss": 0.2721, "step": 92500 }, { - "epoch": 3.654367558646705, - "grad_norm": 0.7081959247589111, - "learning_rate": 0.00016095488721804512, - "loss": 0.2615, + "epoch": 3.558855043624675, + "grad_norm": 0.5071462988853455, + "learning_rate": 0.0001609533834586466, + "loss": 0.2807, "step": 93000 }, { - "epoch": 3.654367558646705, - "eval_loss": 0.46574193239212036, - "eval_runtime": 145.5779, - "eval_samples_per_second": 38.852, - "eval_steps_per_second": 4.857, - "eval_wer": 0.36321034809263214, + "epoch": 3.558855043624675, + "eval_loss": Infinity, + "eval_runtime": 177.146, + "eval_samples_per_second": 39.64, + "eval_steps_per_second": 4.956, + "eval_wer": 0.3857097524857203, "step": 93000 }, { - "epoch": 3.6740146960587845, - "grad_norm": 1.1376652717590332, - "learning_rate": 0.00016020300751879696, - "loss": 0.2664, + "epoch": 3.577988672891474, + "grad_norm": 0.909946084022522, + "learning_rate": 0.0001602015037593985, + "loss": 0.2676, "step": 93500 }, { - "epoch": 3.6936618334708635, - "grad_norm": 0.8354430794715881, - "learning_rate": 0.00015945263157894738, - "loss": 0.2531, + "epoch": 3.597122302158273, + "grad_norm": 0.7381096482276917, + "learning_rate": 0.00015944962406015036, + "loss": 0.2773, "step": 94000 }, { - "epoch": 3.6936618334708635, - "eval_loss": 0.45889467000961304, - "eval_runtime": 145.207, - "eval_samples_per_second": 38.951, - "eval_steps_per_second": 4.869, - "eval_wer": 0.3668373160437162, + "epoch": 3.597122302158273, + "eval_loss": Infinity, + "eval_runtime": 178.6056, + "eval_samples_per_second": 39.316, + "eval_steps_per_second": 4.916, + "eval_wer": 0.37201184683731753, "step": 94000 }, { - "epoch": 3.7133089708829425, - "grad_norm": 0.7989226579666138, - "learning_rate": 0.00015870075187969922, - "loss": 0.2621, + "epoch": 3.616255931425073, + "grad_norm": 0.5159269571304321, + "learning_rate": 0.00015869774436090225, + "loss": 0.2801, "step": 94500 }, { - "epoch": 3.7329561082950216, - "grad_norm": 1.2648522853851318, - "learning_rate": 0.00015794887218045114, - "loss": 0.2466, + "epoch": 3.635389560691872, + "grad_norm": 1.289354920387268, + "learning_rate": 0.00015794586466165412, + "loss": 0.2725, "step": 95000 }, { - "epoch": 3.7329561082950216, - "eval_loss": 0.46178776025772095, - "eval_runtime": 145.1044, - "eval_samples_per_second": 38.979, - "eval_steps_per_second": 4.872, - "eval_wer": 0.3691001588804545, + "epoch": 3.635389560691872, + "eval_loss": Infinity, + "eval_runtime": 179.0499, + "eval_samples_per_second": 39.218, + "eval_steps_per_second": 4.904, + "eval_wer": 0.36897080600803894, "step": 95000 }, { - "epoch": 3.7526032457071006, - "grad_norm": 0.6409050226211548, - "learning_rate": 0.00015719699248120298, - "loss": 0.2732, + "epoch": 3.654523189958671, + "grad_norm": 0.7305335998535156, + "learning_rate": 0.000157193984962406, + "loss": 0.2618, "step": 95500 }, { - "epoch": 3.7722503831191796, - "grad_norm": 0.8056377172470093, - "learning_rate": 0.00015644511278195487, - "loss": 0.2653, + "epoch": 3.673656819225471, + "grad_norm": 0.5354152917861938, + "learning_rate": 0.00015644210526315788, + "loss": 0.2614, "step": 96000 }, { - "epoch": 3.7722503831191796, - "eval_loss": 0.46144935488700867, - "eval_runtime": 145.0964, - "eval_samples_per_second": 38.981, - "eval_steps_per_second": 4.873, - "eval_wer": 0.3774774919356133, + "epoch": 3.673656819225471, + "eval_loss": Infinity, + "eval_runtime": 179.0067, + "eval_samples_per_second": 39.228, + "eval_steps_per_second": 4.905, + "eval_wer": 0.375343769832875, "step": 96000 }, { - "epoch": 3.7918975205312586, - "grad_norm": 0.6420221924781799, - "learning_rate": 0.00015569473684210524, - "loss": 0.267, + "epoch": 3.69279044849227, + "grad_norm": 0.4864795506000519, + "learning_rate": 0.00015569022556390978, + "loss": 0.261, "step": 96500 }, { - "epoch": 3.8115446579433376, - "grad_norm": 1.7600951194763184, - "learning_rate": 0.0001549428571428571, - "loss": 0.2542, + "epoch": 3.711924077759069, + "grad_norm": 0.6722401976585388, + "learning_rate": 0.00015493834586466164, + "loss": 0.2674, "step": 97000 }, { - "epoch": 3.8115446579433376, - "eval_loss": 0.4600285291671753, - "eval_runtime": 145.9311, - "eval_samples_per_second": 38.758, - "eval_steps_per_second": 4.845, - "eval_wer": 0.3726308356469965, + "epoch": 3.711924077759069, + "eval_loss": Infinity, + "eval_runtime": 177.9604, + "eval_samples_per_second": 39.458, + "eval_steps_per_second": 4.934, + "eval_wer": 0.38257615823989843, "step": 97000 }, { - "epoch": 3.8311917953554167, - "grad_norm": 7.725172519683838, - "learning_rate": 0.000154190977443609, - "loss": 0.2648, + "epoch": 3.731057707025869, + "grad_norm": 0.6855655312538147, + "learning_rate": 0.00015418646616541354, + "loss": 0.2713, "step": 97500 }, { - "epoch": 3.8508389327674957, - "grad_norm": 0.9012848734855652, - "learning_rate": 0.00015343909774436087, - "loss": 0.2616, + "epoch": 3.750191336292668, + "grad_norm": 1.3021297454833984, + "learning_rate": 0.0001534345864661654, + "loss": 0.2605, "step": 98000 }, { - "epoch": 3.8508389327674957, - "eval_loss": 0.4511352777481079, - "eval_runtime": 146.3391, - "eval_samples_per_second": 38.65, - "eval_steps_per_second": 4.831, - "eval_wer": 0.3660348895058657, + "epoch": 3.750191336292668, + "eval_loss": Infinity, + "eval_runtime": 177.5188, + "eval_samples_per_second": 39.556, + "eval_steps_per_second": 4.946, + "eval_wer": 0.3733075946689232, "step": 98000 }, { - "epoch": 3.8704860701795747, - "grad_norm": 0.4818692207336426, - "learning_rate": 0.00015268721804511276, - "loss": 0.2429, + "epoch": 3.769324965559467, + "grad_norm": 0.70773845911026, + "learning_rate": 0.0001526827067669173, + "loss": 0.2601, "step": 98500 }, { - "epoch": 3.8901332075916537, - "grad_norm": 1.495732307434082, - "learning_rate": 0.00015193533834586463, - "loss": 0.2625, + "epoch": 3.788458594826267, + "grad_norm": 0.58240807056427, + "learning_rate": 0.00015193082706766917, + "loss": 0.2649, "step": 99000 }, { - "epoch": 3.8901332075916537, - "eval_loss": 0.4607318639755249, - "eval_runtime": 145.4992, - "eval_samples_per_second": 38.873, - "eval_steps_per_second": 4.859, - "eval_wer": 0.36436584230713676, + "epoch": 3.788458594826267, + "eval_loss": Infinity, + "eval_runtime": 176.98, + "eval_samples_per_second": 39.677, + "eval_steps_per_second": 4.961, + "eval_wer": 0.3690633594245822, "step": 99000 }, { - "epoch": 3.9097803450037327, - "grad_norm": 0.5369844436645508, - "learning_rate": 0.00015118345864661653, - "loss": 0.2693, + "epoch": 3.807592224093066, + "grad_norm": 0.624595582485199, + "learning_rate": 0.00015118045112781953, + "loss": 0.2678, "step": 99500 }, { - "epoch": 3.9294274824158117, - "grad_norm": 2.374652862548828, - "learning_rate": 0.0001504315789473684, - "loss": 0.2627, + "epoch": 3.826725853359865, + "grad_norm": 1.283463954925537, + "learning_rate": 0.00015042857142857143, + "loss": 0.2638, "step": 100000 }, { - "epoch": 3.9294274824158117, - "eval_loss": 0.4455793499946594, - "eval_runtime": 146.0128, - "eval_samples_per_second": 38.736, - "eval_steps_per_second": 4.842, - "eval_wer": 0.36256840686235176, + "epoch": 3.826725853359865, + "eval_loss": Infinity, + "eval_runtime": 176.8803, + "eval_samples_per_second": 39.699, + "eval_steps_per_second": 4.964, + "eval_wer": 0.37530410408292786, "step": 100000 }, { - "epoch": 3.949074619827891, - "grad_norm": 1.0198256969451904, - "learning_rate": 0.0001496812030075188, - "loss": 0.2569, + "epoch": 3.8458594826266648, + "grad_norm": 0.7322863936424255, + "learning_rate": 0.0001496766917293233, + "loss": 0.2743, "step": 100500 }, { - "epoch": 3.9687217572399702, - "grad_norm": 0.7093910574913025, - "learning_rate": 0.00014892932330827068, - "loss": 0.252, + "epoch": 3.864993111893464, + "grad_norm": 0.49134284257888794, + "learning_rate": 0.0001489248120300752, + "loss": 0.2749, "step": 101000 }, { - "epoch": 3.9687217572399702, - "eval_loss": 0.4579247534275055, - "eval_runtime": 145.1833, - "eval_samples_per_second": 38.958, - "eval_steps_per_second": 4.87, - "eval_wer": 0.36651634542857603, + "epoch": 3.864993111893464, + "eval_loss": Infinity, + "eval_runtime": 177.6568, + "eval_samples_per_second": 39.526, + "eval_steps_per_second": 4.942, + "eval_wer": 0.3675163951766448, "step": 101000 }, { - "epoch": 3.9883688946520492, - "grad_norm": 0.7897918820381165, - "learning_rate": 0.00014817744360902255, - "loss": 0.2528, + "epoch": 3.884126741160263, + "grad_norm": 1.705079436302185, + "learning_rate": 0.00014817293233082706, + "loss": 0.2576, "step": 101500 }, { - "epoch": 4.008016032064128, - "grad_norm": 0.773681640625, - "learning_rate": 0.00014742556390977442, - "loss": 0.2489, + "epoch": 3.9032603704270628, + "grad_norm": 1.390942931175232, + "learning_rate": 0.00014742105263157893, + "loss": 0.2635, "step": 102000 }, { - "epoch": 4.008016032064128, - "eval_loss": 0.45104366540908813, - "eval_runtime": 145.9991, - "eval_samples_per_second": 38.74, - "eval_steps_per_second": 4.842, - "eval_wer": 0.36203880534737043, + "epoch": 3.9032603704270628, + "eval_loss": Infinity, + "eval_runtime": 177.7395, + "eval_samples_per_second": 39.507, + "eval_steps_per_second": 4.94, + "eval_wer": 0.3666701925111064, "step": 102000 }, { - "epoch": 4.027663169476208, - "grad_norm": 0.6559247970581055, - "learning_rate": 0.0001466736842105263, - "loss": 0.222, + "epoch": 3.922393999693862, + "grad_norm": 0.5910842418670654, + "learning_rate": 0.00014666917293233082, + "loss": 0.2654, "step": 102500 }, { - "epoch": 4.047310306888287, - "grad_norm": 1.860120415687561, - "learning_rate": 0.00014592481203007517, - "loss": 0.2218, + "epoch": 3.941527628960661, + "grad_norm": 1.1575956344604492, + "learning_rate": 0.0001459172932330827, + "loss": 0.2639, "step": 103000 }, { - "epoch": 4.047310306888287, - "eval_loss": 0.4418700039386749, - "eval_runtime": 149.8176, - "eval_samples_per_second": 37.753, - "eval_steps_per_second": 4.719, - "eval_wer": 0.35350098698464155, + "epoch": 3.941527628960661, + "eval_loss": Infinity, + "eval_runtime": 177.3808, + "eval_samples_per_second": 39.587, + "eval_steps_per_second": 4.95, + "eval_wer": 0.36727840067696216, "step": 103000 }, { - "epoch": 4.066957444300366, - "grad_norm": 0.8769797682762146, - "learning_rate": 0.00014517293233082707, - "loss": 0.2218, + "epoch": 3.9606612582274607, + "grad_norm": 9.209879875183105, + "learning_rate": 0.00014516691729323306, + "loss": 0.2671, "step": 103500 }, { - "epoch": 4.086604581712445, - "grad_norm": 1.1328709125518799, - "learning_rate": 0.00014442105263157894, - "loss": 0.2211, + "epoch": 3.97979488749426, + "grad_norm": 0.8380705714225769, + "learning_rate": 0.00014441654135338345, + "loss": 0.2602, "step": 104000 }, { - "epoch": 4.086604581712445, - "eval_loss": 0.449856162071228, - "eval_runtime": 144.4159, - "eval_samples_per_second": 39.165, - "eval_steps_per_second": 4.896, - "eval_wer": 0.3771404727897161, + "epoch": 3.97979488749426, + "eval_loss": Infinity, + "eval_runtime": 177.3769, + "eval_samples_per_second": 39.588, + "eval_steps_per_second": 4.95, + "eval_wer": 0.36286228051618363, "step": 104000 }, { - "epoch": 4.106251719124524, - "grad_norm": 0.8746039271354675, - "learning_rate": 0.0001436706766917293, - "loss": 0.2188, + "epoch": 3.998928516761059, + "grad_norm": 0.6727402210235596, + "learning_rate": 0.00014366616541353384, + "loss": 0.2579, "step": 104500 }, { - "epoch": 4.125898856536603, - "grad_norm": 0.55832839012146, - "learning_rate": 0.0001429203007518797, - "loss": 0.2186, + "epoch": 4.018062146027859, + "grad_norm": 0.38106000423431396, + "learning_rate": 0.0001429142857142857, + "loss": 0.2217, "step": 105000 }, { - "epoch": 4.125898856536603, - "eval_loss": 0.4546278417110443, - "eval_runtime": 144.5427, - "eval_samples_per_second": 39.13, - "eval_steps_per_second": 4.891, - "eval_wer": 0.36601884097510873, + "epoch": 4.018062146027859, + "eval_loss": Infinity, + "eval_runtime": 178.3027, + "eval_samples_per_second": 39.382, + "eval_steps_per_second": 4.924, + "eval_wer": 0.3644621324307172, "step": 105000 }, { - "epoch": 4.145545993948682, - "grad_norm": 0.7782666087150574, - "learning_rate": 0.00014216842105263156, - "loss": 0.2184, + "epoch": 4.0371957752946575, + "grad_norm": 3.7064096927642822, + "learning_rate": 0.0001421624060150376, + "loss": 0.2202, "step": 105500 }, { - "epoch": 4.165193131360761, - "grad_norm": 0.768484890460968, - "learning_rate": 0.00014141654135338346, - "loss": 0.2199, + "epoch": 4.056329404561457, + "grad_norm": 0.49550744891166687, + "learning_rate": 0.00014141203007518797, + "loss": 0.2226, "step": 106000 }, { - "epoch": 4.165193131360761, - "eval_loss": 0.4395730495452881, - "eval_runtime": 144.834, - "eval_samples_per_second": 39.052, - "eval_steps_per_second": 4.881, - "eval_wer": 0.35423921939946396, + "epoch": 4.056329404561457, + "eval_loss": Infinity, + "eval_runtime": 177.9052, + "eval_samples_per_second": 39.47, + "eval_steps_per_second": 4.935, + "eval_wer": 0.3568859741908187, "step": 106000 }, { - "epoch": 4.18484026877284, - "grad_norm": 0.5472589135169983, - "learning_rate": 0.00014066466165413532, - "loss": 0.2206, + "epoch": 4.075463033828257, + "grad_norm": 0.628818690776825, + "learning_rate": 0.00014066015037593983, + "loss": 0.2226, "step": 106500 }, { - "epoch": 4.204487406184919, - "grad_norm": 0.4021967947483063, - "learning_rate": 0.0001399127819548872, - "loss": 0.2227, + "epoch": 4.094596663095055, + "grad_norm": 0.48665696382522583, + "learning_rate": 0.0001399082706766917, + "loss": 0.2209, "step": 107000 }, { - "epoch": 4.204487406184919, - "eval_loss": 0.4468631446361542, - "eval_runtime": 144.5271, - "eval_samples_per_second": 39.135, - "eval_steps_per_second": 4.892, - "eval_wer": 0.35748102261237985, + "epoch": 4.094596663095055, + "eval_loss": Infinity, + "eval_runtime": 177.9704, + "eval_samples_per_second": 39.456, + "eval_steps_per_second": 4.933, + "eval_wer": 0.35495557436005926, "step": 107000 }, { - "epoch": 4.224134543596998, - "grad_norm": 1.0301532745361328, - "learning_rate": 0.0001391609022556391, - "loss": 0.2292, + "epoch": 4.113730292361855, + "grad_norm": 1.1963294744491577, + "learning_rate": 0.0001391563909774436, + "loss": 0.2197, "step": 107500 }, { - "epoch": 4.243781681009077, - "grad_norm": 0.6561172008514404, - "learning_rate": 0.00013840902255639095, - "loss": 0.2212, + "epoch": 4.132863921628655, + "grad_norm": 0.4918075203895569, + "learning_rate": 0.00013840451127819547, + "loss": 0.2326, "step": 108000 }, { - "epoch": 4.243781681009077, - "eval_loss": 0.44032466411590576, - "eval_runtime": 144.5017, - "eval_samples_per_second": 39.141, - "eval_steps_per_second": 4.893, - "eval_wer": 0.3500826499333986, + "epoch": 4.132863921628655, + "eval_loss": Infinity, + "eval_runtime": 177.4603, + "eval_samples_per_second": 39.569, + "eval_steps_per_second": 4.948, + "eval_wer": 0.3595303575206262, "step": 108000 }, { - "epoch": 4.263428818421156, - "grad_norm": 0.7782973647117615, - "learning_rate": 0.00013765714285714285, - "loss": 0.218, + "epoch": 4.151997550895453, + "grad_norm": 0.4551312029361725, + "learning_rate": 0.00013765263157894736, + "loss": 0.2176, "step": 108500 }, { - "epoch": 4.283075955833235, - "grad_norm": 0.5677826404571533, - "learning_rate": 0.00013690526315789472, - "loss": 0.2182, + "epoch": 4.171131180162253, + "grad_norm": 0.5786845088005066, + "learning_rate": 0.00013690075187969923, + "loss": 0.2203, "step": 109000 }, { - "epoch": 4.283075955833235, - "eval_loss": 0.4507006108760834, - "eval_runtime": 144.3123, - "eval_samples_per_second": 39.193, - "eval_steps_per_second": 4.899, - "eval_wer": 0.3599364478182022, + "epoch": 4.171131180162253, + "eval_loss": Infinity, + "eval_runtime": 178.5842, + "eval_samples_per_second": 39.32, + "eval_steps_per_second": 4.916, + "eval_wer": 0.3556166701925111, "step": 109000 }, { - "epoch": 4.302723093245314, - "grad_norm": 0.48135581612586975, - "learning_rate": 0.0001361533834586466, - "loss": 0.2191, + "epoch": 4.190264809429053, + "grad_norm": 0.9616146087646484, + "learning_rate": 0.00013614887218045112, + "loss": 0.2292, "step": 109500 }, { - "epoch": 4.322370230657393, - "grad_norm": 0.686140775680542, - "learning_rate": 0.00013540150375939848, - "loss": 0.2212, + "epoch": 4.209398438695851, + "grad_norm": 0.5730082392692566, + "learning_rate": 0.000135396992481203, + "loss": 0.2267, "step": 110000 }, { - "epoch": 4.322370230657393, - "eval_loss": 0.4435155391693115, - "eval_runtime": 144.5051, - "eval_samples_per_second": 39.14, - "eval_steps_per_second": 4.893, - "eval_wer": 0.3575612652661649, + "epoch": 4.209398438695851, + "eval_loss": Infinity, + "eval_runtime": 179.0437, + "eval_samples_per_second": 39.219, + "eval_steps_per_second": 4.904, + "eval_wer": 0.35085678019885763, "step": 110000 }, { - "epoch": 4.342017368069472, - "grad_norm": 2.3186769485473633, - "learning_rate": 0.00013464962406015038, - "loss": 0.2213, + "epoch": 4.228532067962651, + "grad_norm": 1.2517842054367065, + "learning_rate": 0.00013464661654135338, + "loss": 0.2262, "step": 110500 }, { - "epoch": 4.361664505481551, - "grad_norm": 2.254951238632202, - "learning_rate": 0.00013389774436090224, - "loss": 0.2211, + "epoch": 4.247665697229451, + "grad_norm": 0.894205629825592, + "learning_rate": 0.00013389473684210525, + "loss": 0.223, "step": 111000 }, { - "epoch": 4.361664505481551, - "eval_loss": 0.45138731598854065, - "eval_runtime": 144.5221, - "eval_samples_per_second": 39.136, - "eval_steps_per_second": 4.892, - "eval_wer": 0.36893967357288443, + "epoch": 4.247665697229451, + "eval_loss": Infinity, + "eval_runtime": 178.4171, + "eval_samples_per_second": 39.357, + "eval_steps_per_second": 4.921, + "eval_wer": 0.3580891686058811, "step": 111000 }, { - "epoch": 4.38131164289363, - "grad_norm": 0.5208560228347778, - "learning_rate": 0.0001331458646616541, - "loss": 0.2042, + "epoch": 4.266799326496249, + "grad_norm": 0.6541041731834412, + "learning_rate": 0.00013314285714285715, + "loss": 0.2256, "step": 111500 }, { - "epoch": 4.400958780305709, - "grad_norm": 0.7651325464248657, - "learning_rate": 0.0001323954887218045, - "loss": 0.2116, + "epoch": 4.285932955763049, + "grad_norm": 1.8858749866485596, + "learning_rate": 0.0001323924812030075, + "loss": 0.2273, "step": 112000 }, { - "epoch": 4.400958780305709, - "eval_loss": 0.44426095485687256, - "eval_runtime": 144.753, - "eval_samples_per_second": 39.073, - "eval_steps_per_second": 4.884, - "eval_wer": 0.35077273675595, + "epoch": 4.285932955763049, + "eval_loss": Infinity, + "eval_runtime": 178.7774, + "eval_samples_per_second": 39.278, + "eval_steps_per_second": 4.911, + "eval_wer": 0.35478368944362176, "step": 112000 }, { - "epoch": 4.420605917717788, - "grad_norm": 0.7976289987564087, - "learning_rate": 0.00013164360902255637, - "loss": 0.2213, + "epoch": 4.305066585029849, + "grad_norm": 0.36083123087882996, + "learning_rate": 0.00013164060150375938, + "loss": 0.2186, "step": 112500 }, { - "epoch": 4.440253055129867, - "grad_norm": 0.7153854966163635, - "learning_rate": 0.00013089172932330827, - "loss": 0.2218, + "epoch": 4.324200214296647, + "grad_norm": 0.8509350419044495, + "learning_rate": 0.00013088872180451125, + "loss": 0.2278, "step": 113000 }, { - "epoch": 4.440253055129867, - "eval_loss": 0.44099488854408264, - "eval_runtime": 145.5781, - "eval_samples_per_second": 38.852, - "eval_steps_per_second": 4.856, - "eval_wer": 0.3471618173356229, + "epoch": 4.324200214296647, + "eval_loss": Infinity, + "eval_runtime": 179.5764, + "eval_samples_per_second": 39.103, + "eval_steps_per_second": 4.889, + "eval_wer": 0.34927015020097313, "step": 113000 }, { - "epoch": 4.459900192541947, - "grad_norm": 0.8848706483840942, - "learning_rate": 0.00013014135338345863, - "loss": 0.2213, + "epoch": 4.343333843563447, + "grad_norm": 2.493048667907715, + "learning_rate": 0.00013013684210526314, + "loss": 0.2283, "step": 113500 }, { - "epoch": 4.479547329954026, - "grad_norm": 0.590100109577179, - "learning_rate": 0.0001293894736842105, - "loss": 0.2152, + "epoch": 4.362467472830247, + "grad_norm": 0.5552091598510742, + "learning_rate": 0.000129384962406015, + "loss": 0.2372, "step": 114000 }, { - "epoch": 4.479547329954026, - "eval_loss": 0.446841299533844, - "eval_runtime": 146.3889, - "eval_samples_per_second": 38.637, - "eval_steps_per_second": 4.83, - "eval_wer": 0.35348493845388457, + "epoch": 4.362467472830247, + "eval_loss": Infinity, + "eval_runtime": 179.2629, + "eval_samples_per_second": 39.172, + "eval_steps_per_second": 4.898, + "eval_wer": 0.3600592341865877, "step": 114000 }, { - "epoch": 4.499194467366105, - "grad_norm": 2.4068264961242676, - "learning_rate": 0.0001286375939849624, - "loss": 0.2149, + "epoch": 4.381601102097045, + "grad_norm": 2.732189178466797, + "learning_rate": 0.0001286330827067669, + "loss": 0.2209, "step": 114500 }, { - "epoch": 4.518841604778184, - "grad_norm": 0.6972984671592712, - "learning_rate": 0.00012788571428571426, - "loss": 0.2174, + "epoch": 4.400734731363845, + "grad_norm": 0.6724231839179993, + "learning_rate": 0.00012788120300751877, + "loss": 0.22, "step": 115000 }, { - "epoch": 4.518841604778184, - "eval_loss": 0.4498594105243683, - "eval_runtime": 145.1426, - "eval_samples_per_second": 38.969, - "eval_steps_per_second": 4.871, - "eval_wer": 0.3469692349665388, + "epoch": 4.400734731363845, + "eval_loss": Infinity, + "eval_runtime": 178.4303, + "eval_samples_per_second": 39.354, + "eval_steps_per_second": 4.921, + "eval_wer": 0.3549026866934631, "step": 115000 }, { - "epoch": 4.538488742190263, - "grad_norm": 0.6739790439605713, - "learning_rate": 0.00012713383458646616, - "loss": 0.2148, + "epoch": 4.419868360630645, + "grad_norm": 0.8096573948860168, + "learning_rate": 0.00012712932330827067, + "loss": 0.2166, "step": 115500 }, { - "epoch": 4.558135879602342, - "grad_norm": 4.946841716766357, - "learning_rate": 0.00012638195488721802, - "loss": 0.212, + "epoch": 4.439001989897443, + "grad_norm": 0.32577428221702576, + "learning_rate": 0.00012637744360902254, + "loss": 0.228, "step": 116000 }, { - "epoch": 4.558135879602342, - "eval_loss": 0.4453933835029602, - "eval_runtime": 145.1072, - "eval_samples_per_second": 38.978, - "eval_steps_per_second": 4.872, - "eval_wer": 0.34401630530724914, + "epoch": 4.439001989897443, + "eval_loss": Infinity, + "eval_runtime": 178.1414, + "eval_samples_per_second": 39.418, + "eval_steps_per_second": 4.929, + "eval_wer": 0.34994446795007406, "step": 116000 }, { - "epoch": 4.577783017014421, - "grad_norm": 0.5079777240753174, - "learning_rate": 0.00012563007518796992, - "loss": 0.2097, + "epoch": 4.458135619164243, + "grad_norm": 0.36173292994499207, + "learning_rate": 0.00012562556390977443, + "loss": 0.225, "step": 116500 }, { - "epoch": 4.5974301544265, - "grad_norm": 1.189431071281433, - "learning_rate": 0.0001248781954887218, - "loss": 0.2039, + "epoch": 4.477269248431043, + "grad_norm": 0.7031286358833313, + "learning_rate": 0.0001248736842105263, + "loss": 0.2291, "step": 117000 }, { - "epoch": 4.5974301544265, - "eval_loss": 0.4423506259918213, - "eval_runtime": 144.2129, - "eval_samples_per_second": 39.22, - "eval_steps_per_second": 4.902, - "eval_wer": 0.34892715571889393, + "epoch": 4.477269248431043, + "eval_loss": Infinity, + "eval_runtime": 179.2214, + "eval_samples_per_second": 39.181, + "eval_steps_per_second": 4.899, + "eval_wer": 0.3485429447852761, "step": 117000 }, { - "epoch": 4.617077291838579, - "grad_norm": 0.5739697813987732, - "learning_rate": 0.00012412781954887218, - "loss": 0.2137, + "epoch": 4.496402877697841, + "grad_norm": 0.8883704543113708, + "learning_rate": 0.00012412180451127817, + "loss": 0.2254, "step": 117500 }, { - "epoch": 4.636724429250658, - "grad_norm": 1.8628792762756348, - "learning_rate": 0.00012337593984962405, - "loss": 0.2073, + "epoch": 4.515536506964641, + "grad_norm": 0.7868921160697937, + "learning_rate": 0.00012336992481203006, + "loss": 0.2301, "step": 118000 }, { - "epoch": 4.636724429250658, - "eval_loss": 0.44371461868286133, - "eval_runtime": 144.8897, - "eval_samples_per_second": 39.037, - "eval_steps_per_second": 4.88, - "eval_wer": 0.3466161672898846, + "epoch": 4.515536506964641, + "eval_loss": Infinity, + "eval_runtime": 180.1835, + "eval_samples_per_second": 38.971, + "eval_steps_per_second": 4.873, + "eval_wer": 0.3487941612016078, "step": 118000 }, { - "epoch": 4.656371566662737, - "grad_norm": 0.6919093728065491, - "learning_rate": 0.00012262406015037594, - "loss": 0.2111, + "epoch": 4.5346701362314406, + "grad_norm": 0.8620243072509766, + "learning_rate": 0.00012261804511278193, + "loss": 0.2207, "step": 118500 }, { - "epoch": 4.676018704074816, - "grad_norm": 0.6628223061561584, - "learning_rate": 0.00012187218045112781, - "loss": 0.2177, + "epoch": 4.553803765498239, + "grad_norm": 1.267608642578125, + "learning_rate": 0.00012186616541353381, + "loss": 0.2084, "step": 119000 }, { - "epoch": 4.676018704074816, - "eval_loss": 0.43920648097991943, - "eval_runtime": 144.6466, - "eval_samples_per_second": 39.102, - "eval_steps_per_second": 4.888, - "eval_wer": 0.34218677280095006, + "epoch": 4.553803765498239, + "eval_loss": Infinity, + "eval_runtime": 178.8973, + "eval_samples_per_second": 39.252, + "eval_steps_per_second": 4.908, + "eval_wer": 0.3515046541146605, "step": 119000 }, { - "epoch": 4.695665841486895, - "grad_norm": 0.7294492721557617, - "learning_rate": 0.00012112030075187969, - "loss": 0.2154, + "epoch": 4.572937394765039, + "grad_norm": 0.8290882706642151, + "learning_rate": 0.00012111428571428569, + "loss": 0.2121, "step": 119500 }, { - "epoch": 4.715312978898974, - "grad_norm": 1.2088764905929565, - "learning_rate": 0.00012036842105263157, - "loss": 0.2121, + "epoch": 4.5920710240318385, + "grad_norm": 0.5240318775177002, + "learning_rate": 0.00012036541353383458, + "loss": 0.2251, "step": 120000 }, { - "epoch": 4.715312978898974, - "eval_loss": 0.44427990913391113, - "eval_runtime": 144.8984, - "eval_samples_per_second": 39.034, - "eval_steps_per_second": 4.879, - "eval_wer": 0.34372743175362297, + "epoch": 4.5920710240318385, + "eval_loss": Infinity, + "eval_runtime": 179.3697, + "eval_samples_per_second": 39.148, + "eval_steps_per_second": 4.895, + "eval_wer": 0.3509228897821028, "step": 120000 }, { - "epoch": 4.734960116311053, - "grad_norm": 0.3588174283504486, - "learning_rate": 0.00011961654135338345, - "loss": 0.2103, + "epoch": 4.611204653298637, + "grad_norm": 0.79433274269104, + "learning_rate": 0.00011961353383458646, + "loss": 0.2152, "step": 120500 }, { - "epoch": 4.754607253723132, - "grad_norm": 0.5091924667358398, - "learning_rate": 0.00011886466165413532, - "loss": 0.2072, + "epoch": 4.630338282565437, + "grad_norm": 0.5738509893417358, + "learning_rate": 0.00011886165413533834, + "loss": 0.2205, "step": 121000 }, { - "epoch": 4.754607253723132, - "eval_loss": 0.42684319615364075, - "eval_runtime": 143.2476, - "eval_samples_per_second": 39.484, - "eval_steps_per_second": 4.936, - "eval_wer": 0.34615075989793137, + "epoch": 4.630338282565437, + "eval_loss": Infinity, + "eval_runtime": 180.0051, + "eval_samples_per_second": 39.01, + "eval_steps_per_second": 4.878, + "eval_wer": 0.34464247937381004, "step": 121000 }, { - "epoch": 4.774254391135211, - "grad_norm": 0.49059540033340454, - "learning_rate": 0.0001181127819548872, - "loss": 0.204, + "epoch": 4.6494719118322365, + "grad_norm": 0.6107327938079834, + "learning_rate": 0.00011810977443609022, + "loss": 0.2153, "step": 121500 }, { - "epoch": 4.79390152854729, - "grad_norm": 0.3562159836292267, - "learning_rate": 0.00011736090225563909, - "loss": 0.2138, + "epoch": 4.668605541099035, + "grad_norm": 0.5332146286964417, + "learning_rate": 0.0001173578947368421, + "loss": 0.2174, "step": 122000 }, { - "epoch": 4.79390152854729, - "eval_loss": 0.4271770417690277, - "eval_runtime": 142.8263, - "eval_samples_per_second": 39.601, - "eval_steps_per_second": 4.95, - "eval_wer": 0.34318178170788466, + "epoch": 4.668605541099035, + "eval_loss": Infinity, + "eval_runtime": 179.5227, + "eval_samples_per_second": 39.115, + "eval_steps_per_second": 4.891, + "eval_wer": 0.3458985614554686, "step": 122000 }, { - "epoch": 4.813548665959369, - "grad_norm": 1.027219295501709, - "learning_rate": 0.00011660902255639097, - "loss": 0.1947, + "epoch": 4.687739170365835, + "grad_norm": 1.588100790977478, + "learning_rate": 0.00011660601503759397, + "loss": 0.2132, "step": 122500 }, { - "epoch": 4.833195803371449, - "grad_norm": 0.5677986145019531, - "learning_rate": 0.00011585714285714285, - "loss": 0.2145, + "epoch": 4.7068727996326345, + "grad_norm": 2.00449275970459, + "learning_rate": 0.00011585413533834586, + "loss": 0.2136, "step": 123000 }, { - "epoch": 4.833195803371449, - "eval_loss": 0.43315112590789795, - "eval_runtime": 143.3445, - "eval_samples_per_second": 39.457, - "eval_steps_per_second": 4.932, - "eval_wer": 0.3453964789523519, + "epoch": 4.7068727996326345, + "eval_loss": Infinity, + "eval_runtime": 179.5882, + "eval_samples_per_second": 39.101, + "eval_steps_per_second": 4.889, + "eval_wer": 0.3498651364501798, "step": 123000 }, { - "epoch": 4.852842940783528, - "grad_norm": 0.7301272749900818, - "learning_rate": 0.00011510676691729323, - "loss": 0.2019, + "epoch": 4.726006428899433, + "grad_norm": 0.7837647795677185, + "learning_rate": 0.00011510225563909774, + "loss": 0.225, "step": 123500 }, { - "epoch": 4.872490078195607, - "grad_norm": 16.804716110229492, - "learning_rate": 0.0001143578947368421, - "loss": 0.2217, + "epoch": 4.745140058166233, + "grad_norm": 1.3031939268112183, + "learning_rate": 0.00011435037593984962, + "loss": 0.2142, "step": 124000 }, { - "epoch": 4.872490078195607, - "eval_loss": 0.42095693945884705, - "eval_runtime": 143.7819, - "eval_samples_per_second": 39.337, - "eval_steps_per_second": 4.917, - "eval_wer": 0.3391215034263613, + "epoch": 4.745140058166233, + "eval_loss": Infinity, + "eval_runtime": 179.8141, + "eval_samples_per_second": 39.051, + "eval_steps_per_second": 4.883, + "eval_wer": 0.3449201396234398, "step": 124000 }, { - "epoch": 4.892137215607686, - "grad_norm": 0.4827280640602112, - "learning_rate": 0.00011360601503759398, - "loss": 0.1994, + "epoch": 4.7642736874330325, + "grad_norm": 0.5795506834983826, + "learning_rate": 0.00011359999999999998, + "loss": 0.2155, "step": 124500 }, { - "epoch": 4.911784353019765, - "grad_norm": 0.6648825407028198, - "learning_rate": 0.00011285413533834586, - "loss": 0.2069, + "epoch": 4.783407316699831, + "grad_norm": 0.7235686182975769, + "learning_rate": 0.00011284812030075186, + "loss": 0.2152, "step": 125000 }, { - "epoch": 4.911784353019765, - "eval_loss": 0.427772581577301, - "eval_runtime": 144.7524, - "eval_samples_per_second": 39.074, - "eval_steps_per_second": 4.884, - "eval_wer": 0.3376289900659595, + "epoch": 4.783407316699831, + "eval_loss": Infinity, + "eval_runtime": 179.9818, + "eval_samples_per_second": 39.015, + "eval_steps_per_second": 4.878, + "eval_wer": 0.34659932303786756, "step": 125000 }, { - "epoch": 4.931431490431844, - "grad_norm": 0.3194764256477356, - "learning_rate": 0.00011210225563909773, - "loss": 0.1946, + "epoch": 4.802540945966631, + "grad_norm": 0.4587650001049042, + "learning_rate": 0.00011209774436090224, + "loss": 0.2081, "step": 125500 }, { - "epoch": 4.951078627843923, - "grad_norm": 0.9185254573822021, - "learning_rate": 0.00011135187969924811, - "loss": 0.2068, + "epoch": 4.8216745752334305, + "grad_norm": 1.3301700353622437, + "learning_rate": 0.00011134736842105263, + "loss": 0.2216, "step": 126000 }, { - "epoch": 4.951078627843923, - "eval_loss": 0.4216279685497284, - "eval_runtime": 143.9237, - "eval_samples_per_second": 39.299, - "eval_steps_per_second": 4.912, - "eval_wer": 0.33867214456516503, + "epoch": 4.8216745752334305, + "eval_loss": Infinity, + "eval_runtime": 179.2673, + "eval_samples_per_second": 39.171, + "eval_steps_per_second": 4.898, + "eval_wer": 0.34429870954093506, "step": 126000 }, { - "epoch": 4.970725765256002, - "grad_norm": 0.4608317017555237, - "learning_rate": 0.00011059999999999998, - "loss": 0.2098, + "epoch": 4.840808204500229, + "grad_norm": 1.0340607166290283, + "learning_rate": 0.00011059548872180452, + "loss": 0.2152, "step": 126500 }, { - "epoch": 4.990372902668081, - "grad_norm": 0.7766122221946716, - "learning_rate": 0.00010984812030075186, - "loss": 0.2129, + "epoch": 4.859941833767029, + "grad_norm": 0.7466903328895569, + "learning_rate": 0.00010984360902255638, + "loss": 0.2209, "step": 127000 }, { - "epoch": 4.990372902668081, - "eval_loss": 0.42103302478790283, - "eval_runtime": 144.3261, - "eval_samples_per_second": 39.189, - "eval_steps_per_second": 4.899, - "eval_wer": 0.3361525252363146, + "epoch": 4.859941833767029, + "eval_loss": Infinity, + "eval_runtime": 179.7521, + "eval_samples_per_second": 39.065, + "eval_steps_per_second": 4.885, + "eval_wer": 0.3455415697059446, "step": 127000 }, { - "epoch": 5.01002004008016, - "grad_norm": 0.7110891342163086, - "learning_rate": 0.00010909624060150374, - "loss": 0.1932, + "epoch": 4.879075463033828, + "grad_norm": 0.451224148273468, + "learning_rate": 0.00010909323308270676, + "loss": 0.2068, "step": 127500 }, { - "epoch": 5.0296671774922395, - "grad_norm": 0.5839011073112488, - "learning_rate": 0.00010834436090225562, - "loss": 0.1774, + "epoch": 4.898209092300627, + "grad_norm": 0.9599905610084534, + "learning_rate": 0.00010834135338345863, + "loss": 0.2183, "step": 128000 }, { - "epoch": 5.0296671774922395, - "eval_loss": 0.4340197741985321, - "eval_runtime": 144.3949, - "eval_samples_per_second": 39.17, - "eval_steps_per_second": 4.896, - "eval_wer": 0.3303590056330343, + "epoch": 4.898209092300627, + "eval_loss": Infinity, + "eval_runtime": 175.3325, + "eval_samples_per_second": 40.05, + "eval_steps_per_second": 5.008, + "eval_wer": 0.340398244129469, "step": 128000 }, { - "epoch": 5.0493143149043185, - "grad_norm": 0.4871758222579956, - "learning_rate": 0.000107593984962406, - "loss": 0.1764, + "epoch": 4.917342721567427, + "grad_norm": 1.071007251739502, + "learning_rate": 0.00010758947368421051, + "loss": 0.2115, "step": 128500 }, { - "epoch": 5.0689614523163975, - "grad_norm": 1.1092002391815186, - "learning_rate": 0.00010684210526315788, - "loss": 0.1705, + "epoch": 4.936476350834226, + "grad_norm": 0.9002227187156677, + "learning_rate": 0.00010683759398496239, + "loss": 0.2174, "step": 129000 }, { - "epoch": 5.0689614523163975, - "eval_loss": 0.44219356775283813, - "eval_runtime": 144.6388, - "eval_samples_per_second": 39.104, - "eval_steps_per_second": 4.888, - "eval_wer": 0.329877549710324, + "epoch": 4.936476350834226, + "eval_loss": Infinity, + "eval_runtime": 175.3184, + "eval_samples_per_second": 40.053, + "eval_steps_per_second": 5.008, + "eval_wer": 0.3402924687962767, "step": 129000 }, { - "epoch": 5.0886085897284765, - "grad_norm": 1.4170928001403809, - "learning_rate": 0.00010609022556390976, - "loss": 0.1799, + "epoch": 4.955609980101025, + "grad_norm": 3.2121989727020264, + "learning_rate": 0.00010608571428571427, + "loss": 0.2139, "step": 129500 }, { - "epoch": 5.1082557271405555, - "grad_norm": 0.5609749555587769, - "learning_rate": 0.00010533834586466164, - "loss": 0.1746, + "epoch": 4.974743609367825, + "grad_norm": 0.6666644811630249, + "learning_rate": 0.00010533383458646616, + "loss": 0.2165, "step": 130000 }, { - "epoch": 5.1082557271405555, - "eval_loss": 0.43062400817871094, - "eval_runtime": 144.8052, - "eval_samples_per_second": 39.059, - "eval_steps_per_second": 4.882, - "eval_wer": 0.3363451076053987, + "epoch": 4.974743609367825, + "eval_loss": Infinity, + "eval_runtime": 176.6287, + "eval_samples_per_second": 39.756, + "eval_steps_per_second": 4.971, + "eval_wer": 0.3419848741273535, "step": 130000 }, { - "epoch": 5.1279028645526346, - "grad_norm": 0.7241942882537842, - "learning_rate": 0.00010458646616541353, - "loss": 0.1719, + "epoch": 4.993877238634624, + "grad_norm": 1.018226981163025, + "learning_rate": 0.00010458195488721804, + "loss": 0.2075, "step": 130500 }, { - "epoch": 5.147550001964714, - "grad_norm": 7.793860912322998, - "learning_rate": 0.00010383458646616541, - "loss": 0.1813, + "epoch": 5.013010867901423, + "grad_norm": 0.9753539562225342, + "learning_rate": 0.00010383007518796992, + "loss": 0.1806, "step": 131000 }, { - "epoch": 5.147550001964714, - "eval_loss": 0.41806095838546753, - "eval_runtime": 144.8895, - "eval_samples_per_second": 39.037, - "eval_steps_per_second": 4.88, - "eval_wer": 0.33342427500762306, + "epoch": 5.013010867901423, + "eval_loss": Infinity, + "eval_runtime": 177.4567, + "eval_samples_per_second": 39.57, + "eval_steps_per_second": 4.948, + "eval_wer": 0.3380579648825894, "step": 131000 }, { - "epoch": 5.167197139376793, - "grad_norm": 0.5914771556854248, - "learning_rate": 0.00010308270676691729, - "loss": 0.1799, + "epoch": 5.032144497168223, + "grad_norm": 0.6430408954620361, + "learning_rate": 0.0001030781954887218, + "loss": 0.1788, "step": 131500 }, { - "epoch": 5.186844276788872, - "grad_norm": 1.6738320589065552, - "learning_rate": 0.00010233082706766916, - "loss": 0.1729, + "epoch": 5.051278126435022, + "grad_norm": 0.5756456255912781, + "learning_rate": 0.00010232631578947367, + "loss": 0.1821, "step": 132000 }, { - "epoch": 5.186844276788872, - "eval_loss": 0.4319230020046234, - "eval_runtime": 144.7317, - "eval_samples_per_second": 39.079, - "eval_steps_per_second": 4.885, - "eval_wer": 0.336858660589623, + "epoch": 5.051278126435022, + "eval_loss": Infinity, + "eval_runtime": 178.4262, + "eval_samples_per_second": 39.355, + "eval_steps_per_second": 4.921, + "eval_wer": 0.3426459699598054, "step": 132000 }, { - "epoch": 5.206491414200951, - "grad_norm": 0.6387330889701843, - "learning_rate": 0.00010157894736842104, - "loss": 0.1682, + "epoch": 5.070411755701821, + "grad_norm": 0.7271620035171509, + "learning_rate": 0.00010157443609022555, + "loss": 0.1915, "step": 132500 }, { - "epoch": 5.22613855161303, - "grad_norm": 0.5514143705368042, - "learning_rate": 0.00010082706766917292, - "loss": 0.1777, + "epoch": 5.089545384968621, + "grad_norm": 0.8460062146186829, + "learning_rate": 0.00010082255639097743, + "loss": 0.1825, "step": 133000 }, { - "epoch": 5.22613855161303, - "eval_loss": 0.4189823567867279, - "eval_runtime": 145.1159, - "eval_samples_per_second": 38.976, - "eval_steps_per_second": 4.872, - "eval_wer": 0.33265394553128663, + "epoch": 5.089545384968621, + "eval_loss": Infinity, + "eval_runtime": 178.6095, + "eval_samples_per_second": 39.315, + "eval_steps_per_second": 4.916, + "eval_wer": 0.3399619208800508, "step": 133000 }, { - "epoch": 5.245785689025109, - "grad_norm": 0.49433717131614685, - "learning_rate": 0.00010007669172932331, - "loss": 0.1757, + "epoch": 5.10867901423542, + "grad_norm": 0.43308231234550476, + "learning_rate": 0.00010007067669172931, + "loss": 0.179, "step": 133500 }, { - "epoch": 5.265432826437188, - "grad_norm": 14.663381576538086, - "learning_rate": 9.932481203007518e-05, - "loss": 0.18, + "epoch": 5.127812643502219, + "grad_norm": 0.5365935564041138, + "learning_rate": 9.932030075187969e-05, + "loss": 0.1876, "step": 134000 }, { - "epoch": 5.265432826437188, - "eval_loss": 0.42281797528266907, - "eval_runtime": 145.0781, - "eval_samples_per_second": 38.986, - "eval_steps_per_second": 4.873, - "eval_wer": 0.33376129415352024, + "epoch": 5.127812643502219, + "eval_loss": Infinity, + "eval_runtime": 178.192, + "eval_samples_per_second": 39.407, + "eval_steps_per_second": 4.927, + "eval_wer": 0.3381240744658346, "step": 134000 }, { - "epoch": 5.285079963849267, - "grad_norm": 0.3960479497909546, - "learning_rate": 9.857293233082706e-05, - "loss": 0.1773, + "epoch": 5.146946272769019, + "grad_norm": 0.35783401131629944, + "learning_rate": 9.856842105263157e-05, + "loss": 0.1794, "step": 134500 }, { - "epoch": 5.304727101261347, - "grad_norm": 0.48836782574653625, - "learning_rate": 9.782105263157894e-05, - "loss": 0.1747, + "epoch": 5.166079902035818, + "grad_norm": 1.0039490461349487, + "learning_rate": 9.781654135338345e-05, + "loss": 0.1858, "step": 135000 }, { - "epoch": 5.304727101261347, - "eval_loss": 0.4267714023590088, - "eval_runtime": 144.4858, - "eval_samples_per_second": 39.146, - "eval_steps_per_second": 4.893, - "eval_wer": 0.3322687807931184, + "epoch": 5.166079902035818, + "eval_loss": Infinity, + "eval_runtime": 181.1178, + "eval_samples_per_second": 38.77, + "eval_steps_per_second": 4.848, + "eval_wer": 0.3341707213877724, "step": 135000 }, { - "epoch": 5.324374238673426, - "grad_norm": 0.7414509654045105, - "learning_rate": 9.706917293233082e-05, - "loss": 0.1804, + "epoch": 5.185213531302617, + "grad_norm": 0.5719444751739502, + "learning_rate": 9.706466165413533e-05, + "loss": 0.1733, "step": 135500 }, { - "epoch": 5.344021376085505, - "grad_norm": 0.3100612461566925, - "learning_rate": 9.63172932330827e-05, - "loss": 0.1737, + "epoch": 5.204347160569417, + "grad_norm": 1.0103236436843872, + "learning_rate": 9.631278195488722e-05, + "loss": 0.1729, "step": 136000 }, { - "epoch": 5.344021376085505, - "eval_loss": 0.41930150985717773, - "eval_runtime": 145.0977, - "eval_samples_per_second": 38.981, - "eval_steps_per_second": 4.873, - "eval_wer": 0.3324774116929595, + "epoch": 5.204347160569417, + "eval_loss": Infinity, + "eval_runtime": 180.3304, + "eval_samples_per_second": 38.94, + "eval_steps_per_second": 4.869, + "eval_wer": 0.3325179818066427, "step": 136000 }, { - "epoch": 5.363668513497584, - "grad_norm": 2.2844786643981934, - "learning_rate": 9.556541353383459e-05, - "loss": 0.1779, + "epoch": 5.223480789836216, + "grad_norm": 0.34207335114479065, + "learning_rate": 9.55609022556391e-05, + "loss": 0.1722, "step": 136500 }, { - "epoch": 5.383315650909663, - "grad_norm": 0.7908081412315369, - "learning_rate": 9.481503759398495e-05, - "loss": 0.1709, + "epoch": 5.242614419103015, + "grad_norm": 1.3274930715560913, + "learning_rate": 9.480902255639098e-05, + "loss": 0.1843, "step": 137000 }, { - "epoch": 5.383315650909663, - "eval_loss": 0.4228932559490204, - "eval_runtime": 145.0454, - "eval_samples_per_second": 38.995, - "eval_steps_per_second": 4.874, - "eval_wer": 0.3278714833656979, + "epoch": 5.242614419103015, + "eval_loss": Infinity, + "eval_runtime": 180.6878, + "eval_samples_per_second": 38.863, + "eval_steps_per_second": 4.859, + "eval_wer": 0.3313808969748255, "step": 137000 }, { - "epoch": 5.402962788321742, - "grad_norm": 1.6749204397201538, - "learning_rate": 9.406315789473683e-05, - "loss": 0.1745, + "epoch": 5.261748048369815, + "grad_norm": 2.946866989135742, + "learning_rate": 9.405714285714285e-05, + "loss": 0.1733, "step": 137500 }, { - "epoch": 5.422609925733821, - "grad_norm": 0.25723955035209656, - "learning_rate": 9.331127819548871e-05, - "loss": 0.1726, + "epoch": 5.280881677636614, + "grad_norm": 1.175057291984558, + "learning_rate": 9.330526315789473e-05, + "loss": 0.1828, "step": 138000 }, { - "epoch": 5.422609925733821, - "eval_loss": 0.4178549647331238, - "eval_runtime": 145.1876, - "eval_samples_per_second": 38.957, - "eval_steps_per_second": 4.87, - "eval_wer": 0.32714929948163246, + "epoch": 5.280881677636614, + "eval_loss": Infinity, + "eval_runtime": 179.9842, + "eval_samples_per_second": 39.015, + "eval_steps_per_second": 4.878, + "eval_wer": 0.33381372963824835, "step": 138000 }, { - "epoch": 5.4422570631459, - "grad_norm": 0.43192166090011597, - "learning_rate": 9.255939849624058e-05, - "loss": 0.1699, + "epoch": 5.300015306903413, + "grad_norm": 0.5658883452415466, + "learning_rate": 9.255338345864661e-05, + "loss": 0.1905, "step": 138500 }, { - "epoch": 5.461904200557979, - "grad_norm": 0.4252433776855469, - "learning_rate": 9.180751879699246e-05, - "loss": 0.1741, + "epoch": 5.319148936170213, + "grad_norm": 2.0087709426879883, + "learning_rate": 9.180150375939849e-05, + "loss": 0.1878, "step": 139000 }, { - "epoch": 5.461904200557979, - "eval_loss": 0.42049652338027954, - "eval_runtime": 145.3425, - "eval_samples_per_second": 38.915, - "eval_steps_per_second": 4.864, - "eval_wer": 0.3254963008136605, + "epoch": 5.319148936170213, + "eval_loss": Infinity, + "eval_runtime": 179.94, + "eval_samples_per_second": 39.024, + "eval_steps_per_second": 4.879, + "eval_wer": 0.3299397080600804, "step": 139000 }, { - "epoch": 5.481551337970058, - "grad_norm": 0.6398211717605591, - "learning_rate": 9.105563909774435e-05, - "loss": 0.1675, + "epoch": 5.338282565437012, + "grad_norm": 0.7439378499984741, + "learning_rate": 9.104962406015037e-05, + "loss": 0.1756, "step": 139500 }, { - "epoch": 5.501198475382137, - "grad_norm": 2.678009510040283, - "learning_rate": 9.030375939849623e-05, - "loss": 0.1723, + "epoch": 5.357416194703811, + "grad_norm": 0.6208277344703674, + "learning_rate": 9.030075187969923e-05, + "loss": 0.1784, "step": 140000 }, { - "epoch": 5.501198475382137, - "eval_loss": 0.4140247702598572, - "eval_runtime": 145.8316, - "eval_samples_per_second": 38.784, - "eval_steps_per_second": 4.848, - "eval_wer": 0.32944423937988476, + "epoch": 5.357416194703811, + "eval_loss": Infinity, + "eval_runtime": 178.2886, + "eval_samples_per_second": 39.386, + "eval_steps_per_second": 4.925, + "eval_wer": 0.33048180664269095, "step": 140000 }, { - "epoch": 5.520845612794216, - "grad_norm": 0.42189884185791016, - "learning_rate": 8.955187969924811e-05, - "loss": 0.167, + "epoch": 5.376549823970611, + "grad_norm": 1.9707947969436646, + "learning_rate": 8.954887218045112e-05, + "loss": 0.1907, "step": 140500 }, { - "epoch": 5.540492750206295, - "grad_norm": 0.6850213408470154, - "learning_rate": 8.8803007518797e-05, - "loss": 0.1676, + "epoch": 5.39568345323741, + "grad_norm": 0.6385311484336853, + "learning_rate": 8.8796992481203e-05, + "loss": 0.1791, "step": 141000 }, { - "epoch": 5.540492750206295, - "eval_loss": 0.42560333013534546, - "eval_runtime": 145.0938, - "eval_samples_per_second": 38.982, - "eval_steps_per_second": 4.873, - "eval_wer": 0.32540000962911847, + "epoch": 5.39568345323741, + "eval_loss": Infinity, + "eval_runtime": 177.8472, + "eval_samples_per_second": 39.483, + "eval_steps_per_second": 4.937, + "eval_wer": 0.3262904590649461, "step": 141000 }, { - "epoch": 5.560139887618374, - "grad_norm": 0.46668741106987, - "learning_rate": 8.805112781954888e-05, - "loss": 0.1674, + "epoch": 5.414817082504209, + "grad_norm": 0.44527003169059753, + "learning_rate": 8.804511278195488e-05, + "loss": 0.1785, "step": 141500 }, { - "epoch": 5.579787025030453, - "grad_norm": 0.38750043511390686, - "learning_rate": 8.729924812030075e-05, - "loss": 0.1769, + "epoch": 5.433950711771009, + "grad_norm": 0.820563793182373, + "learning_rate": 8.729323308270676e-05, + "loss": 0.1861, "step": 142000 }, { - "epoch": 5.579787025030453, - "eval_loss": 0.41800424456596375, - "eval_runtime": 144.6116, - "eval_samples_per_second": 39.112, - "eval_steps_per_second": 4.889, - "eval_wer": 0.3279196289579689, + "epoch": 5.433950711771009, + "eval_loss": Infinity, + "eval_runtime": 182.6414, + "eval_samples_per_second": 38.447, + "eval_steps_per_second": 4.807, + "eval_wer": 0.3237518510683309, "step": 142000 }, { - "epoch": 5.599434162442532, - "grad_norm": 0.47452759742736816, - "learning_rate": 8.654887218045112e-05, - "loss": 0.1704, + "epoch": 5.453084341037808, + "grad_norm": 1.075088381767273, + "learning_rate": 8.654285714285714e-05, + "loss": 0.1878, "step": 142500 }, { - "epoch": 5.619081299854611, - "grad_norm": 1.3760634660720825, - "learning_rate": 8.579699248120299e-05, - "loss": 0.1718, + "epoch": 5.472217970304607, + "grad_norm": 1.048279047012329, + "learning_rate": 8.579097744360902e-05, + "loss": 0.176, "step": 143000 }, { - "epoch": 5.619081299854611, - "eval_loss": 0.4158097207546234, - "eval_runtime": 144.8323, - "eval_samples_per_second": 39.052, - "eval_steps_per_second": 4.882, - "eval_wer": 0.3203928680329316, + "epoch": 5.472217970304607, + "eval_loss": Infinity, + "eval_runtime": 180.7657, + "eval_samples_per_second": 38.846, + "eval_steps_per_second": 4.857, + "eval_wer": 0.3245319441506241, "step": 143000 }, { - "epoch": 5.63872843726669, - "grad_norm": 1.2168941497802734, - "learning_rate": 8.504511278195487e-05, - "loss": 0.1763, + "epoch": 5.491351599571407, + "grad_norm": 0.9499515295028687, + "learning_rate": 8.504060150375938e-05, + "loss": 0.1724, "step": 143500 }, { - "epoch": 5.658375574678769, - "grad_norm": 0.6660623550415039, - "learning_rate": 8.429323308270675e-05, - "loss": 0.1735, + "epoch": 5.510485228838206, + "grad_norm": 0.6625120639801025, + "learning_rate": 8.428872180451127e-05, + "loss": 0.1821, "step": 144000 }, { - "epoch": 5.658375574678769, - "eval_loss": 0.41737955808639526, - "eval_runtime": 145.161, - "eval_samples_per_second": 38.964, - "eval_steps_per_second": 4.87, - "eval_wer": 0.3209385180786699, + "epoch": 5.510485228838206, + "eval_loss": Infinity, + "eval_runtime": 180.84, + "eval_samples_per_second": 38.83, + "eval_steps_per_second": 4.855, + "eval_wer": 0.32155701290459066, "step": 144000 }, { - "epoch": 5.678022712090849, - "grad_norm": 0.7844908237457275, - "learning_rate": 8.354135338345864e-05, - "loss": 0.1696, + "epoch": 5.529618858105005, + "grad_norm": 1.221817135810852, + "learning_rate": 8.353834586466164e-05, + "loss": 0.1787, "step": 144500 }, { - "epoch": 5.697669849502928, - "grad_norm": 1.7285536527633667, - "learning_rate": 8.278947368421052e-05, - "loss": 0.1693, + "epoch": 5.548752487371805, + "grad_norm": 1.4700016975402832, + "learning_rate": 8.278646616541352e-05, + "loss": 0.176, "step": 145000 }, { - "epoch": 5.697669849502928, - "eval_loss": 0.416604220867157, - "eval_runtime": 143.979, - "eval_samples_per_second": 39.284, - "eval_steps_per_second": 4.91, - "eval_wer": 0.3197669753334082, + "epoch": 5.548752487371805, + "eval_loss": Infinity, + "eval_runtime": 181.0479, + "eval_samples_per_second": 38.785, + "eval_steps_per_second": 4.85, + "eval_wer": 0.324505500317326, "step": 145000 }, { - "epoch": 5.717316986915007, - "grad_norm": 0.3506734073162079, - "learning_rate": 8.20375939849624e-05, - "loss": 0.1811, + "epoch": 5.567886116638604, + "grad_norm": 0.4790880084037781, + "learning_rate": 8.20345864661654e-05, + "loss": 0.1681, "step": 145500 }, { - "epoch": 5.736964124327086, - "grad_norm": 0.9915302395820618, - "learning_rate": 8.128571428571428e-05, - "loss": 0.1745, + "epoch": 5.587019745905403, + "grad_norm": 0.749213457107544, + "learning_rate": 8.128270676691729e-05, + "loss": 0.1799, "step": 146000 }, { - "epoch": 5.736964124327086, - "eval_loss": 0.41646912693977356, - "eval_runtime": 143.6976, - "eval_samples_per_second": 39.36, - "eval_steps_per_second": 4.92, - "eval_wer": 0.32445314631445493, + "epoch": 5.587019745905403, + "eval_loss": Infinity, + "eval_runtime": 180.4888, + "eval_samples_per_second": 38.905, + "eval_steps_per_second": 4.865, + "eval_wer": 0.32506082081658555, "step": 146000 }, { - "epoch": 5.756611261739165, - "grad_norm": 0.4368499219417572, - "learning_rate": 8.053383458646616e-05, - "loss": 0.1757, + "epoch": 5.606153375172203, + "grad_norm": 0.5427069067955017, + "learning_rate": 8.053082706766917e-05, + "loss": 0.1714, "step": 146500 }, { - "epoch": 5.776258399151244, - "grad_norm": 0.8709374070167542, - "learning_rate": 7.978195488721803e-05, - "loss": 0.1692, + "epoch": 5.625287004439002, + "grad_norm": 0.53640216588974, + "learning_rate": 7.977894736842105e-05, + "loss": 0.1696, "step": 147000 }, { - "epoch": 5.776258399151244, - "eval_loss": 0.4147648215293884, - "eval_runtime": 144.5484, - "eval_samples_per_second": 39.129, - "eval_steps_per_second": 4.891, - "eval_wer": 0.3230408756078381, + "epoch": 5.625287004439002, + "eval_loss": Infinity, + "eval_runtime": 180.603, + "eval_samples_per_second": 38.881, + "eval_steps_per_second": 4.861, + "eval_wer": 0.32224455257034057, "step": 147000 }, { - "epoch": 5.795905536563323, - "grad_norm": 16.672887802124023, - "learning_rate": 7.903007518796991e-05, - "loss": 0.1633, + "epoch": 5.644420633705801, + "grad_norm": 2.7623894214630127, + "learning_rate": 7.902706766917293e-05, + "loss": 0.1766, "step": 147500 }, { - "epoch": 5.815552673975402, - "grad_norm": 0.7690948247909546, - "learning_rate": 7.82781954887218e-05, - "loss": 0.1641, + "epoch": 5.663554262972601, + "grad_norm": 0.9681125283241272, + "learning_rate": 7.827518796992481e-05, + "loss": 0.1711, "step": 148000 }, { - "epoch": 5.815552673975402, - "eval_loss": 0.4115670621395111, - "eval_runtime": 145.0143, - "eval_samples_per_second": 39.003, - "eval_steps_per_second": 4.875, - "eval_wer": 0.3216446534319783, + "epoch": 5.663554262972601, + "eval_loss": Infinity, + "eval_runtime": 180.9103, + "eval_samples_per_second": 38.815, + "eval_steps_per_second": 4.853, + "eval_wer": 0.3242939496509414, "step": 148000 }, { - "epoch": 5.835199811387481, - "grad_norm": 1.9833319187164307, - "learning_rate": 7.752781954887217e-05, - "loss": 0.1646, + "epoch": 5.6826878922394, + "grad_norm": 0.8594058752059937, + "learning_rate": 7.752330827067668e-05, + "loss": 0.1798, "step": 148500 }, { - "epoch": 5.85484694879956, - "grad_norm": 0.38222184777259827, - "learning_rate": 7.677593984962405e-05, - "loss": 0.173, + "epoch": 5.701821521506199, + "grad_norm": 0.9855976104736328, + "learning_rate": 7.677293233082705e-05, + "loss": 0.1794, "step": 149000 }, { - "epoch": 5.85484694879956, - "eval_loss": 0.40414321422576904, - "eval_runtime": 148.2393, - "eval_samples_per_second": 38.155, - "eval_steps_per_second": 4.769, - "eval_wer": 0.32366676830736146, + "epoch": 5.701821521506199, + "eval_loss": Infinity, + "eval_runtime": 181.7415, + "eval_samples_per_second": 38.637, + "eval_steps_per_second": 4.831, + "eval_wer": 0.3212264649883647, "step": 149000 }, { - "epoch": 5.874494086211639, - "grad_norm": 2.3978090286254883, - "learning_rate": 7.602556390977442e-05, - "loss": 0.1669, + "epoch": 5.7209551507729985, + "grad_norm": 0.5901813507080078, + "learning_rate": 7.602255639097744e-05, + "loss": 0.1691, "step": 149500 }, { - "epoch": 5.894141223623718, - "grad_norm": 0.7286165952682495, - "learning_rate": 7.52736842105263e-05, - "loss": 0.1664, + "epoch": 5.740088780039798, + "grad_norm": 1.9479256868362427, + "learning_rate": 7.527067669172932e-05, + "loss": 0.1806, "step": 150000 }, { - "epoch": 5.894141223623718, - "eval_loss": 0.4038516581058502, - "eval_runtime": 145.7264, - "eval_samples_per_second": 38.812, - "eval_steps_per_second": 4.852, - "eval_wer": 0.3184349472805765, + "epoch": 5.740088780039798, + "eval_loss": Infinity, + "eval_runtime": 180.0383, + "eval_samples_per_second": 39.003, + "eval_steps_per_second": 4.877, + "eval_wer": 0.32014226782314364, "step": 150000 }, { - "epoch": 5.913788361035797, - "grad_norm": 0.6666128635406494, - "learning_rate": 7.45218045112782e-05, - "loss": 0.1631, + "epoch": 5.759222409306597, + "grad_norm": 5.095980167388916, + "learning_rate": 7.45187969924812e-05, + "loss": 0.1802, "step": 150500 }, { - "epoch": 5.933435498447876, - "grad_norm": 3.139840841293335, - "learning_rate": 7.376992481203008e-05, - "loss": 0.1648, + "epoch": 5.7783560385733965, + "grad_norm": 1.2752444744110107, + "learning_rate": 7.376691729323307e-05, + "loss": 0.1736, "step": 151000 }, { - "epoch": 5.933435498447876, - "eval_loss": 0.4072332978248596, - "eval_runtime": 144.1568, - "eval_samples_per_second": 39.235, - "eval_steps_per_second": 4.904, - "eval_wer": 0.31657331771276337, + "epoch": 5.7783560385733965, + "eval_loss": Infinity, + "eval_runtime": 178.7009, + "eval_samples_per_second": 39.295, + "eval_steps_per_second": 4.913, + "eval_wer": 0.3235799661518934, "step": 151000 }, { - "epoch": 5.953082635859955, - "grad_norm": 0.2758707106113434, - "learning_rate": 7.301804511278196e-05, - "loss": 0.1616, + "epoch": 5.797489667840196, + "grad_norm": 0.6796151995658875, + "learning_rate": 7.301503759398495e-05, + "loss": 0.1707, "step": 151500 }, { - "epoch": 5.972729773272034, - "grad_norm": 0.5328942537307739, - "learning_rate": 7.226616541353382e-05, - "loss": 0.1709, + "epoch": 5.816623297106995, + "grad_norm": 0.8652954697608948, + "learning_rate": 7.226466165413533e-05, + "loss": 0.1664, "step": 152000 }, { - "epoch": 5.972729773272034, - "eval_loss": 0.40219077467918396, - "eval_runtime": 144.786, - "eval_samples_per_second": 39.065, - "eval_steps_per_second": 4.883, - "eval_wer": 0.3205854504020157, + "epoch": 5.816623297106995, + "eval_loss": Infinity, + "eval_runtime": 180.4487, + "eval_samples_per_second": 38.914, + "eval_steps_per_second": 4.866, + "eval_wer": 0.3222313306536916, "step": 152000 }, { - "epoch": 5.992376910684113, - "grad_norm": 0.5073242783546448, - "learning_rate": 7.15142857142857e-05, - "loss": 0.1651, + "epoch": 5.8357569263737945, + "grad_norm": 0.5811170935630798, + "learning_rate": 7.151278195488721e-05, + "loss": 0.1712, "step": 152500 }, { - "epoch": 6.012024048096192, - "grad_norm": 0.4045845866203308, - "learning_rate": 7.076390977443608e-05, - "loss": 0.151, + "epoch": 5.854890555640594, + "grad_norm": 0.414420485496521, + "learning_rate": 7.076090225563909e-05, + "loss": 0.1704, "step": 153000 }, { - "epoch": 6.012024048096192, - "eval_loss": 0.4034076929092407, - "eval_runtime": 144.9751, - "eval_samples_per_second": 39.014, - "eval_steps_per_second": 4.877, - "eval_wer": 0.31882011201874466, + "epoch": 5.854890555640594, + "eval_loss": Infinity, + "eval_runtime": 181.9454, + "eval_samples_per_second": 38.594, + "eval_steps_per_second": 4.826, + "eval_wer": 0.3200232705733023, "step": 153000 }, { - "epoch": 6.031671185508271, - "grad_norm": 1.1703969240188599, - "learning_rate": 7.001203007518797e-05, - "loss": 0.1397, + "epoch": 5.874024184907393, + "grad_norm": 0.7044617533683777, + "learning_rate": 7.000902255639097e-05, + "loss": 0.1797, "step": 153500 }, { - "epoch": 6.05131832292035, - "grad_norm": 0.3152583837509155, - "learning_rate": 6.926015037593985e-05, - "loss": 0.1353, + "epoch": 5.8931578141741925, + "grad_norm": 0.6984072327613831, + "learning_rate": 6.925714285714284e-05, + "loss": 0.1713, "step": 154000 }, { - "epoch": 6.05131832292035, - "eval_loss": 0.41277533769607544, - "eval_runtime": 144.9149, - "eval_samples_per_second": 39.03, - "eval_steps_per_second": 4.879, - "eval_wer": 0.32572098024425866, + "epoch": 5.8931578141741925, + "eval_loss": Infinity, + "eval_runtime": 181.4266, + "eval_samples_per_second": 38.704, + "eval_steps_per_second": 4.839, + "eval_wer": 0.33001903955997464, "step": 154000 }, { - "epoch": 6.070965460332429, - "grad_norm": 0.5021807551383972, - "learning_rate": 6.850827067669173e-05, - "loss": 0.1429, + "epoch": 5.912291443440992, + "grad_norm": 1.7558343410491943, + "learning_rate": 6.850526315789472e-05, + "loss": 0.1718, "step": 154500 }, { - "epoch": 6.090612597744508, - "grad_norm": 0.4375011622905731, - "learning_rate": 6.77563909774436e-05, - "loss": 0.1476, + "epoch": 5.931425072707791, + "grad_norm": 0.5357454419136047, + "learning_rate": 6.77533834586466e-05, + "loss": 0.1701, "step": 155000 }, { - "epoch": 6.090612597744508, - "eval_loss": 0.4197489619255066, - "eval_runtime": 145.056, - "eval_samples_per_second": 38.992, - "eval_steps_per_second": 4.874, - "eval_wer": 0.3200398003562774, + "epoch": 5.931425072707791, + "eval_loss": Infinity, + "eval_runtime": 184.6223, + "eval_samples_per_second": 38.034, + "eval_steps_per_second": 4.756, + "eval_wer": 0.3172202242437064, "step": 155000 }, { - "epoch": 6.110259735156587, - "grad_norm": 0.4859500527381897, - "learning_rate": 6.700451127819548e-05, - "loss": 0.1456, + "epoch": 5.9505587019745905, + "grad_norm": 0.6187770962715149, + "learning_rate": 6.700150375939849e-05, + "loss": 0.1684, "step": 155500 }, { - "epoch": 6.129906872568666, - "grad_norm": 0.4906657636165619, - "learning_rate": 6.625263157894736e-05, - "loss": 0.1465, + "epoch": 5.96969233124139, + "grad_norm": 0.4420112669467926, + "learning_rate": 6.624962406015037e-05, + "loss": 0.1687, "step": 156000 }, { - "epoch": 6.129906872568666, - "eval_loss": 0.40734121203422546, - "eval_runtime": 144.5712, - "eval_samples_per_second": 39.123, - "eval_steps_per_second": 4.89, - "eval_wer": 0.3167338030203335, + "epoch": 5.96969233124139, + "eval_loss": Infinity, + "eval_runtime": 182.0611, + "eval_samples_per_second": 38.569, + "eval_steps_per_second": 4.823, + "eval_wer": 0.31862174740850435, "step": 156000 }, { - "epoch": 6.149554009980746, - "grad_norm": 0.7306200861930847, + "epoch": 5.988825960508189, + "grad_norm": 2.3220465183258057, "learning_rate": 6.550075187969924e-05, - "loss": 0.1414, + "loss": 0.1657, "step": 156500 }, { - "epoch": 6.169201147392825, - "grad_norm": 0.35837283730506897, - "learning_rate": 6.474887218045112e-05, - "loss": 0.139, + "epoch": 6.0079595897749885, + "grad_norm": 1.0167362689971924, + "learning_rate": 6.475037593984962e-05, + "loss": 0.1543, "step": 157000 }, { - "epoch": 6.169201147392825, - "eval_loss": 0.42275404930114746, - "eval_runtime": 144.9153, - "eval_samples_per_second": 39.03, - "eval_steps_per_second": 4.879, - "eval_wer": 0.321179246040025, + "epoch": 6.0079595897749885, + "eval_loss": Infinity, + "eval_runtime": 181.8294, + "eval_samples_per_second": 38.619, + "eval_steps_per_second": 4.829, + "eval_wer": 0.31407340808123546, "step": 157000 }, { - "epoch": 6.188848284804904, - "grad_norm": 0.5820499658584595, - "learning_rate": 6.39984962406015e-05, - "loss": 0.1408, + "epoch": 6.027093219041788, + "grad_norm": 0.2879861295223236, + "learning_rate": 6.4e-05, + "loss": 0.1419, "step": 157500 }, { - "epoch": 6.208495422216983, - "grad_norm": 0.2785002291202545, + "epoch": 6.046226848308587, + "grad_norm": 0.5147427916526794, "learning_rate": 6.324812030075188e-05, - "loss": 0.1404, + "loss": 0.142, "step": 158000 }, { - "epoch": 6.208495422216983, - "eval_loss": 0.4117072522640228, - "eval_runtime": 144.7738, - "eval_samples_per_second": 39.068, - "eval_steps_per_second": 4.883, - "eval_wer": 0.3244691948452119, + "epoch": 6.046226848308587, + "eval_loss": Infinity, + "eval_runtime": 181.1027, + "eval_samples_per_second": 38.774, + "eval_steps_per_second": 4.848, + "eval_wer": 0.3165591284112545, "step": 158000 }, { - "epoch": 6.228142559629062, - "grad_norm": 0.9491069912910461, + "epoch": 6.065360477575386, + "grad_norm": 0.43559426069259644, "learning_rate": 6.249624060150375e-05, - "loss": 0.1443, + "loss": 0.1399, "step": 158500 }, { - "epoch": 6.247789697041141, - "grad_norm": 0.6151573657989502, + "epoch": 6.084494106842186, + "grad_norm": 0.38178640604019165, "learning_rate": 6.174436090225563e-05, - "loss": 0.1338, + "loss": 0.1438, "step": 159000 }, { - "epoch": 6.247789697041141, - "eval_loss": 0.41795113682746887, - "eval_runtime": 144.7948, - "eval_samples_per_second": 39.062, - "eval_steps_per_second": 4.883, - "eval_wer": 0.3153054837829597, + "epoch": 6.084494106842186, + "eval_loss": Infinity, + "eval_runtime": 181.0147, + "eval_samples_per_second": 38.792, + "eval_steps_per_second": 4.85, + "eval_wer": 0.31562037232917284, "step": 159000 }, { - "epoch": 6.26743683445322, - "grad_norm": 1.4104067087173462, + "epoch": 6.103627736108985, + "grad_norm": 0.42758727073669434, "learning_rate": 6.099248120300751e-05, - "loss": 0.1458, + "loss": 0.144, "step": 159500 }, { - "epoch": 6.287083971865299, - "grad_norm": 0.4986151158809662, + "epoch": 6.122761365375784, + "grad_norm": 0.5155762434005737, "learning_rate": 6.024060150375939e-05, - "loss": 0.1436, + "loss": 0.1433, "step": 160000 }, { - "epoch": 6.287083971865299, - "eval_loss": 0.42644599080085754, - "eval_runtime": 145.4284, - "eval_samples_per_second": 38.892, - "eval_steps_per_second": 4.861, - "eval_wer": 0.31670170595881947, + "epoch": 6.122761365375784, + "eval_loss": Infinity, + "eval_runtime": 183.6308, + "eval_samples_per_second": 38.24, + "eval_steps_per_second": 4.781, + "eval_wer": 0.31587158874550453, "step": 160000 }, { - "epoch": 6.306731109277378, - "grad_norm": 1.0388261079788208, - "learning_rate": 5.9488721804511266e-05, - "loss": 0.1382, + "epoch": 6.141894994642584, + "grad_norm": 0.6651669144630432, + "learning_rate": 5.949022556390977e-05, + "loss": 0.1426, "step": 160500 }, { - "epoch": 6.326378246689457, - "grad_norm": 1.0425645112991333, + "epoch": 6.161028623909383, + "grad_norm": 0.42425113916397095, "learning_rate": 5.873834586466165e-05, - "loss": 0.1317, + "loss": 0.1442, "step": 161000 }, { - "epoch": 6.326378246689457, - "eval_loss": 0.4117776155471802, - "eval_runtime": 144.9416, - "eval_samples_per_second": 39.023, - "eval_steps_per_second": 4.878, - "eval_wer": 0.31524128965993164, + "epoch": 6.161028623909383, + "eval_loss": Infinity, + "eval_runtime": 181.9638, + "eval_samples_per_second": 38.59, + "eval_steps_per_second": 4.825, + "eval_wer": 0.3142849587476201, "step": 161000 }, { - "epoch": 6.346025384101536, - "grad_norm": 0.47523021697998047, + "epoch": 6.180162253176182, + "grad_norm": 1.366357684135437, "learning_rate": 5.798646616541353e-05, - "loss": 0.1386, + "loss": 0.1342, "step": 161500 }, { - "epoch": 6.365672521513615, - "grad_norm": 0.27745115756988525, + "epoch": 6.199295882442982, + "grad_norm": 0.7355407476425171, "learning_rate": 5.7234586466165414e-05, - "loss": 0.1395, + "loss": 0.1494, "step": 162000 }, { - "epoch": 6.365672521513615, - "eval_loss": 0.42685896158218384, - "eval_runtime": 145.206, - "eval_samples_per_second": 38.952, - "eval_steps_per_second": 4.869, - "eval_wer": 0.3118390011394457, + "epoch": 6.199295882442982, + "eval_loss": Infinity, + "eval_runtime": 182.1034, + "eval_samples_per_second": 38.561, + "eval_steps_per_second": 4.821, + "eval_wer": 0.3106621535857838, "step": 162000 }, { - "epoch": 6.385319658925694, - "grad_norm": 0.3224891126155853, - "learning_rate": 5.6484210526315785e-05, - "loss": 0.1335, + "epoch": 6.218429511709781, + "grad_norm": 0.8021041750907898, + "learning_rate": 5.648270676691729e-05, + "loss": 0.1449, "step": 162500 }, { - "epoch": 6.404966796337773, - "grad_norm": 0.2714509665966034, - "learning_rate": 5.5732330827067666e-05, - "loss": 0.1267, + "epoch": 6.23756314097658, + "grad_norm": 0.3070674240589142, + "learning_rate": 5.573082706766917e-05, + "loss": 0.1355, "step": 163000 }, { - "epoch": 6.404966796337773, - "eval_loss": 0.4240754544734955, - "eval_runtime": 144.9066, - "eval_samples_per_second": 39.032, - "eval_steps_per_second": 4.879, - "eval_wer": 0.31345990274590363, + "epoch": 6.23756314097658, + "eval_loss": Infinity, + "eval_runtime": 181.6272, + "eval_samples_per_second": 38.662, + "eval_steps_per_second": 4.834, + "eval_wer": 0.31661201607785067, "step": 163000 }, { - "epoch": 6.4246139337498525, - "grad_norm": 0.3742597997188568, + "epoch": 6.25669677024338, + "grad_norm": 0.3594122529029846, "learning_rate": 5.498045112781954e-05, - "loss": 0.1438, + "loss": 0.1399, "step": 163500 }, { - "epoch": 6.4442610711619315, - "grad_norm": 1.6135519742965698, + "epoch": 6.275830399510179, + "grad_norm": 0.7340966463088989, "learning_rate": 5.422857142857142e-05, - "loss": 0.1334, + "loss": 0.1403, "step": 164000 }, { - "epoch": 6.4442610711619315, - "eval_loss": 0.40579110383987427, - "eval_runtime": 144.6174, - "eval_samples_per_second": 39.11, - "eval_steps_per_second": 4.889, - "eval_wer": 0.31686219126638954, + "epoch": 6.275830399510179, + "eval_loss": Infinity, + "eval_runtime": 182.6513, + "eval_samples_per_second": 38.445, + "eval_steps_per_second": 4.807, + "eval_wer": 0.31170668500105775, "step": 164000 }, { - "epoch": 6.4639082085740105, - "grad_norm": 0.7605300545692444, + "epoch": 6.294964028776978, + "grad_norm": 0.49476948380470276, "learning_rate": 5.3476691729323304e-05, - "loss": 0.1371, + "loss": 0.1391, "step": 164500 }, { - "epoch": 6.4835553459860895, - "grad_norm": 0.44126906991004944, - "learning_rate": 5.2724812030075185e-05, - "loss": 0.1369, + "epoch": 6.314097658043778, + "grad_norm": 0.7009222507476807, + "learning_rate": 5.2726315789473675e-05, + "loss": 0.1435, "step": 165000 }, { - "epoch": 6.4835553459860895, - "eval_loss": 0.40502265095710754, - "eval_runtime": 145.4039, - "eval_samples_per_second": 38.899, - "eval_steps_per_second": 4.862, - "eval_wer": 0.31296239829243633, + "epoch": 6.314097658043778, + "eval_loss": Infinity, + "eval_runtime": 182.5712, + "eval_samples_per_second": 38.462, + "eval_steps_per_second": 4.809, + "eval_wer": 0.3124206685001058, "step": 165000 }, { - "epoch": 6.5032024833981685, - "grad_norm": 0.32450059056282043, - "learning_rate": 5.197293233082706e-05, - "loss": 0.1352, + "epoch": 6.333231287310577, + "grad_norm": 1.6074929237365723, + "learning_rate": 5.197443609022556e-05, + "loss": 0.1369, "step": 165500 }, { - "epoch": 6.522849620810248, - "grad_norm": 1.38713538646698, - "learning_rate": 5.122105263157894e-05, - "loss": 0.1322, + "epoch": 6.352364916577376, + "grad_norm": 0.4530220031738281, + "learning_rate": 5.122255639097744e-05, + "loss": 0.1446, "step": 166000 }, { - "epoch": 6.522849620810248, - "eval_loss": 0.40965744853019714, - "eval_runtime": 144.8647, - "eval_samples_per_second": 39.043, - "eval_steps_per_second": 4.88, - "eval_wer": 0.31403764985315596, + "epoch": 6.352364916577376, + "eval_loss": Infinity, + "eval_runtime": 182.3168, + "eval_samples_per_second": 38.515, + "eval_steps_per_second": 4.816, + "eval_wer": 0.31234133700021155, "step": 166000 }, { - "epoch": 6.5424967582223275, - "grad_norm": 0.7151561379432678, - "learning_rate": 5.046917293233082e-05, - "loss": 0.1385, + "epoch": 6.371498545844176, + "grad_norm": 0.5443539023399353, + "learning_rate": 5.047067669172932e-05, + "loss": 0.1481, "step": 166500 }, { - "epoch": 6.5621438956344065, - "grad_norm": 0.46481749415397644, - "learning_rate": 4.9717293233082705e-05, - "loss": 0.1358, + "epoch": 6.390632175110975, + "grad_norm": 0.604567289352417, + "learning_rate": 4.9718796992481194e-05, + "loss": 0.1385, "step": 167000 }, { - "epoch": 6.5621438956344065, - "eval_loss": 0.41421449184417725, - "eval_runtime": 144.9831, - "eval_samples_per_second": 39.011, - "eval_steps_per_second": 4.876, - "eval_wer": 0.3129142527001653, + "epoch": 6.390632175110975, + "eval_loss": Infinity, + "eval_runtime": 180.452, + "eval_samples_per_second": 38.913, + "eval_steps_per_second": 4.866, + "eval_wer": 0.31403374233128833, "step": 167000 }, { - "epoch": 6.5817910330464855, - "grad_norm": 0.4189301133155823, - "learning_rate": 4.896541353383458e-05, - "loss": 0.1359, + "epoch": 6.409765804377774, + "grad_norm": 0.5584743022918701, + "learning_rate": 4.8966917293233076e-05, + "loss": 0.1451, "step": 167500 }, { - "epoch": 6.6014381704585645, - "grad_norm": 0.7608076333999634, - "learning_rate": 4.821353383458646e-05, - "loss": 0.1345, + "epoch": 6.428899433644574, + "grad_norm": 0.34049585461616516, + "learning_rate": 4.821503759398496e-05, + "loss": 0.1437, "step": 168000 }, { - "epoch": 6.6014381704585645, - "eval_loss": 0.40090152621269226, - "eval_runtime": 144.6628, - "eval_samples_per_second": 39.098, - "eval_steps_per_second": 4.887, - "eval_wer": 0.31230440853139896, + "epoch": 6.428899433644574, + "eval_loss": Infinity, + "eval_runtime": 180.8859, + "eval_samples_per_second": 38.82, + "eval_steps_per_second": 4.854, + "eval_wer": 0.31029193991961074, "step": 168000 }, { - "epoch": 6.6210853078706435, - "grad_norm": 0.23644275963306427, - "learning_rate": 4.746165413533834e-05, - "loss": 0.1329, + "epoch": 6.448033062911373, + "grad_norm": 1.0056949853897095, + "learning_rate": 4.746315789473684e-05, + "loss": 0.1453, "step": 168500 }, { - "epoch": 6.6407324452827226, - "grad_norm": 0.5338233709335327, - "learning_rate": 4.6711278195488714e-05, - "loss": 0.1321, + "epoch": 6.467166692178172, + "grad_norm": 0.4812434911727905, + "learning_rate": 4.671278195488721e-05, + "loss": 0.1328, "step": 169000 }, { - "epoch": 6.6407324452827226, - "eval_loss": 0.4004514813423157, - "eval_runtime": 144.9848, - "eval_samples_per_second": 39.011, - "eval_steps_per_second": 4.876, - "eval_wer": 0.3092712362183242, + "epoch": 6.467166692178172, + "eval_loss": Infinity, + "eval_runtime": 181.778, + "eval_samples_per_second": 38.63, + "eval_steps_per_second": 4.83, + "eval_wer": 0.31021260841971654, "step": 169000 }, { - "epoch": 6.660379582694802, - "grad_norm": 0.5386209487915039, - "learning_rate": 4.5959398496240595e-05, - "loss": 0.1324, + "epoch": 6.486300321444972, + "grad_norm": 0.5090984106063843, + "learning_rate": 4.596090225563909e-05, + "loss": 0.1369, "step": 169500 }, { - "epoch": 6.680026720106881, - "grad_norm": 0.7969732880592346, - "learning_rate": 4.5207518796992477e-05, - "loss": 0.1299, + "epoch": 6.505433950711771, + "grad_norm": 3.274346113204956, + "learning_rate": 4.520902255639097e-05, + "loss": 0.1354, "step": 170000 }, { - "epoch": 6.680026720106881, - "eval_loss": 0.39957067370414734, - "eval_runtime": 144.9466, - "eval_samples_per_second": 39.021, - "eval_steps_per_second": 4.878, - "eval_wer": 0.305387491775128, + "epoch": 6.505433950711771, + "eval_loss": Infinity, + "eval_runtime": 182.3019, + "eval_samples_per_second": 38.519, + "eval_steps_per_second": 4.816, + "eval_wer": 0.31116458641844724, "step": 170000 }, { - "epoch": 6.69967385751896, - "grad_norm": 0.7069671154022217, - "learning_rate": 4.445563909774436e-05, - "loss": 0.1381, + "epoch": 6.52456757997857, + "grad_norm": 0.6519914269447327, + "learning_rate": 4.445714285714285e-05, + "loss": 0.1405, "step": 170500 }, { - "epoch": 6.719320994931039, - "grad_norm": 0.8022767305374146, - "learning_rate": 4.370375939849623e-05, - "loss": 0.1345, + "epoch": 6.54370120924537, + "grad_norm": 0.5463857650756836, + "learning_rate": 4.370526315789473e-05, + "loss": 0.1394, "step": 171000 }, { - "epoch": 6.719320994931039, - "eval_loss": 0.40409377217292786, - "eval_runtime": 145.3133, - "eval_samples_per_second": 38.923, - "eval_steps_per_second": 4.865, - "eval_wer": 0.30705653897385693, + "epoch": 6.54370120924537, + "eval_loss": Infinity, + "eval_runtime": 181.4757, + "eval_samples_per_second": 38.694, + "eval_steps_per_second": 4.838, + "eval_wer": 0.3094192934207743, "step": 171000 }, { - "epoch": 6.738968132343118, - "grad_norm": 0.9058027863502502, - "learning_rate": 4.2951879699248114e-05, - "loss": 0.1314, + "epoch": 6.562834838512169, + "grad_norm": 0.43961018323898315, + "learning_rate": 4.295338345864661e-05, + "loss": 0.1424, "step": 171500 }, { - "epoch": 6.758615269755197, - "grad_norm": 0.4458518326282501, - "learning_rate": 4.2199999999999996e-05, - "loss": 0.1328, + "epoch": 6.581968467778968, + "grad_norm": 0.2494196593761444, + "learning_rate": 4.220150375939849e-05, + "loss": 0.1385, "step": 172000 }, { - "epoch": 6.758615269755197, - "eval_loss": 0.3997325003147125, - "eval_runtime": 145.3079, - "eval_samples_per_second": 38.924, - "eval_steps_per_second": 4.866, - "eval_wer": 0.3069762963200719, + "epoch": 6.581968467778968, + "eval_loss": Infinity, + "eval_runtime": 181.1999, + "eval_samples_per_second": 38.753, + "eval_steps_per_second": 4.845, + "eval_wer": 0.30549238417601016, "step": 172000 }, { - "epoch": 6.778262407167276, - "grad_norm": 0.5749480128288269, - "learning_rate": 4.144812030075188e-05, - "loss": 0.135, + "epoch": 6.601102097045768, + "grad_norm": 3.2341201305389404, + "learning_rate": 4.144962406015037e-05, + "loss": 0.1444, "step": 172500 }, { - "epoch": 6.797909544579355, - "grad_norm": 0.3367716073989868, - "learning_rate": 4.069624060150375e-05, - "loss": 0.1245, + "epoch": 6.620235726312567, + "grad_norm": 0.6074426770210266, + "learning_rate": 4.069774436090225e-05, + "loss": 0.138, "step": 173000 }, { - "epoch": 6.797909544579355, - "eval_loss": 0.3974212110042572, - "eval_runtime": 145.9176, - "eval_samples_per_second": 38.762, - "eval_steps_per_second": 4.845, - "eval_wer": 0.3044566769912215, + "epoch": 6.620235726312567, + "eval_loss": Infinity, + "eval_runtime": 181.8045, + "eval_samples_per_second": 38.624, + "eval_steps_per_second": 4.829, + "eval_wer": 0.3054659403427121, "step": 173000 }, { - "epoch": 6.817556681991434, - "grad_norm": 0.546405553817749, - "learning_rate": 3.9944360902255633e-05, - "loss": 0.1312, + "epoch": 6.639369355579366, + "grad_norm": 0.48304858803749084, + "learning_rate": 3.994736842105263e-05, + "loss": 0.1356, "step": 173500 }, { - "epoch": 6.837203819403513, - "grad_norm": 0.38214609026908875, - "learning_rate": 3.9192481203007515e-05, - "loss": 0.1356, + "epoch": 6.658502984846166, + "grad_norm": 0.3982817530632019, + "learning_rate": 3.919548872180451e-05, + "loss": 0.138, "step": 174000 }, { - "epoch": 6.837203819403513, - "eval_loss": 0.39992156624794006, - "eval_runtime": 144.9546, - "eval_samples_per_second": 39.019, - "eval_steps_per_second": 4.877, - "eval_wer": 0.3008939031631654, + "epoch": 6.658502984846166, + "eval_loss": Infinity, + "eval_runtime": 183.7344, + "eval_samples_per_second": 38.218, + "eval_steps_per_second": 4.779, + "eval_wer": 0.3061138142585149, "step": 174000 }, { - "epoch": 6.856850956815592, - "grad_norm": 0.21237680315971375, - "learning_rate": 3.8442105263157886e-05, - "loss": 0.1335, + "epoch": 6.677636614112965, + "grad_norm": 0.46521154046058655, + "learning_rate": 3.844360902255639e-05, + "loss": 0.1293, "step": 174500 }, { - "epoch": 6.876498094227671, - "grad_norm": 0.4656332731246948, - "learning_rate": 3.769022556390977e-05, - "loss": 0.1208, + "epoch": 6.696770243379764, + "grad_norm": 0.33037710189819336, + "learning_rate": 3.769172932330827e-05, + "loss": 0.1313, "step": 175000 }, { - "epoch": 6.876498094227671, - "eval_loss": 0.39532560110092163, - "eval_runtime": 145.4346, - "eval_samples_per_second": 38.89, - "eval_steps_per_second": 4.861, - "eval_wer": 0.301921009131614, + "epoch": 6.696770243379764, + "eval_loss": Infinity, + "eval_runtime": 181.4617, + "eval_samples_per_second": 38.697, + "eval_steps_per_second": 4.838, + "eval_wer": 0.3061005923418659, "step": 175000 }, { - "epoch": 6.89614523163975, - "grad_norm": 0.6751464605331421, - "learning_rate": 3.693834586466165e-05, - "loss": 0.1282, + "epoch": 6.715903872646564, + "grad_norm": 0.47027432918548584, + "learning_rate": 3.6939849624060146e-05, + "loss": 0.1363, "step": 175500 }, { - "epoch": 6.915792369051829, - "grad_norm": 1.1535145044326782, - "learning_rate": 3.618646616541353e-05, - "loss": 0.1316, + "epoch": 6.735037501913363, + "grad_norm": 0.7823716998100281, + "learning_rate": 3.618796992481203e-05, + "loss": 0.1427, "step": 176000 }, { - "epoch": 6.915792369051829, - "eval_loss": 0.39738306403160095, - "eval_runtime": 146.048, - "eval_samples_per_second": 38.727, - "eval_steps_per_second": 4.841, - "eval_wer": 0.3056442682672401, + "epoch": 6.735037501913363, + "eval_loss": Infinity, + "eval_runtime": 180.6528, + "eval_samples_per_second": 38.87, + "eval_steps_per_second": 4.86, + "eval_wer": 0.30834831817220226, "step": 176000 }, { - "epoch": 6.935439506463908, - "grad_norm": 0.8314586877822876, - "learning_rate": 3.543458646616541e-05, - "loss": 0.1271, + "epoch": 6.754171131180162, + "grad_norm": 0.5896081924438477, + "learning_rate": 3.543609022556391e-05, + "loss": 0.1347, "step": 176500 }, { - "epoch": 6.955086643875987, - "grad_norm": 0.7973750233650208, - "learning_rate": 3.4682706766917294e-05, - "loss": 0.1232, + "epoch": 6.773304760446962, + "grad_norm": 0.7625430822372437, + "learning_rate": 3.468571428571429e-05, + "loss": 0.1432, "step": 177000 }, { - "epoch": 6.955086643875987, - "eval_loss": 0.39205384254455566, - "eval_runtime": 146.1083, - "eval_samples_per_second": 38.711, - "eval_steps_per_second": 4.839, - "eval_wer": 0.30333327983823083, + "epoch": 6.773304760446962, + "eval_loss": Infinity, + "eval_runtime": 181.5521, + "eval_samples_per_second": 38.678, + "eval_steps_per_second": 4.836, + "eval_wer": 0.3047519568436641, "step": 177000 }, { - "epoch": 6.974733781288066, - "grad_norm": 0.3950521647930145, - "learning_rate": 3.393082706766917e-05, - "loss": 0.1344, + "epoch": 6.792438389713761, + "grad_norm": 0.8567324280738831, + "learning_rate": 3.393383458646616e-05, + "loss": 0.1348, "step": 177500 }, { - "epoch": 6.994380918700146, - "grad_norm": 0.4100574851036072, - "learning_rate": 3.317894736842105e-05, - "loss": 0.1261, + "epoch": 6.81157201898056, + "grad_norm": 1.5647565126419067, + "learning_rate": 3.318195488721804e-05, + "loss": 0.136, "step": 178000 }, { - "epoch": 6.994380918700146, - "eval_loss": 0.39850306510925293, - "eval_runtime": 145.9873, - "eval_samples_per_second": 38.743, - "eval_steps_per_second": 4.843, - "eval_wer": 0.3034616680842869, + "epoch": 6.81157201898056, + "eval_loss": Infinity, + "eval_runtime": 181.3872, + "eval_samples_per_second": 38.713, + "eval_steps_per_second": 4.84, + "eval_wer": 0.3039454199280728, "step": 178000 }, { - "epoch": 7.014028056112225, - "grad_norm": 0.5746680498123169, - "learning_rate": 3.242857142857143e-05, - "loss": 0.1105, + "epoch": 6.83070564824736, + "grad_norm": 0.3368758261203766, + "learning_rate": 3.2430075187969924e-05, + "loss": 0.1383, "step": 178500 }, { - "epoch": 7.033675193524304, - "grad_norm": 0.5409959554672241, - "learning_rate": 3.16766917293233e-05, - "loss": 0.1184, + "epoch": 6.8498392775141586, + "grad_norm": 0.5614475011825562, + "learning_rate": 3.1678195488721806e-05, + "loss": 0.1424, "step": 179000 }, { - "epoch": 7.033675193524304, - "eval_loss": 0.40056413412094116, - "eval_runtime": 145.9106, - "eval_samples_per_second": 38.763, - "eval_steps_per_second": 4.845, - "eval_wer": 0.3061096756591934, + "epoch": 6.8498392775141586, + "eval_loss": Infinity, + "eval_runtime": 181.4861, + "eval_samples_per_second": 38.692, + "eval_steps_per_second": 4.838, + "eval_wer": 0.3016448064311403, "step": 179000 }, { - "epoch": 7.053322330936383, - "grad_norm": 0.7439139485359192, + "epoch": 6.868972906780958, + "grad_norm": 0.6072395443916321, "learning_rate": 3.092631578947368e-05, - "loss": 0.1132, + "loss": 0.1284, "step": 179500 }, { - "epoch": 7.072969468348462, - "grad_norm": 1.1852874755859375, + "epoch": 6.888106536047758, + "grad_norm": 0.6235467195510864, "learning_rate": 3.0174436090225562e-05, - "loss": 0.1115, + "loss": 0.1347, "step": 180000 }, { - "epoch": 7.072969468348462, - "eval_loss": 0.4096328318119049, - "eval_runtime": 145.6721, - "eval_samples_per_second": 38.827, - "eval_steps_per_second": 4.853, - "eval_wer": 0.3049541814446887, + "epoch": 6.888106536047758, + "eval_loss": Infinity, + "eval_runtime": 181.9343, + "eval_samples_per_second": 38.596, + "eval_steps_per_second": 4.826, + "eval_wer": 0.3038925322614766, "step": 180000 }, { - "epoch": 7.092616605760541, - "grad_norm": 0.6158276796340942, + "epoch": 6.9072401653145565, + "grad_norm": 4.727964401245117, "learning_rate": 2.9422556390977444e-05, - "loss": 0.1032, + "loss": 0.1327, "step": 180500 }, { - "epoch": 7.11226374317262, - "grad_norm": 1.272557258605957, + "epoch": 6.926373794581356, + "grad_norm": 0.6982028484344482, "learning_rate": 2.867067669172932e-05, - "loss": 0.1109, + "loss": 0.1307, "step": 181000 }, { - "epoch": 7.11226374317262, - "eval_loss": 0.41377753019332886, - "eval_runtime": 146.2393, - "eval_samples_per_second": 38.676, - "eval_steps_per_second": 4.835, - "eval_wer": 0.3038147357609411, + "epoch": 6.926373794581356, + "eval_loss": Infinity, + "eval_runtime": 180.8488, + "eval_samples_per_second": 38.828, + "eval_steps_per_second": 4.855, + "eval_wer": 0.3028876665961498, "step": 181000 }, { - "epoch": 7.131910880584699, - "grad_norm": 0.4577464163303375, + "epoch": 6.945507423848156, + "grad_norm": 0.2889564633369446, "learning_rate": 2.7918796992481203e-05, - "loss": 0.1157, + "loss": 0.1349, "step": 181500 }, { - "epoch": 7.151558017996778, - "grad_norm": 1.748535394668579, + "epoch": 6.9646410531149545, + "grad_norm": 0.3712177872657776, "learning_rate": 2.716691729323308e-05, - "loss": 0.1113, + "loss": 0.1293, "step": 182000 }, { - "epoch": 7.151558017996778, - "eval_loss": 0.41194456815719604, - "eval_runtime": 146.2502, - "eval_samples_per_second": 38.673, - "eval_steps_per_second": 4.834, - "eval_wer": 0.3052270064675579, + "epoch": 6.9646410531149545, + "eval_loss": Infinity, + "eval_runtime": 180.868, + "eval_samples_per_second": 38.824, + "eval_steps_per_second": 4.854, + "eval_wer": 0.30258356251322194, "step": 182000 }, { - "epoch": 7.171205155408857, - "grad_norm": 0.8288829326629639, - "learning_rate": 2.6416541353383456e-05, - "loss": 0.1114, + "epoch": 6.983774682381754, + "grad_norm": 0.7685525417327881, + "learning_rate": 2.6415037593984963e-05, + "loss": 0.1339, "step": 182500 }, { - "epoch": 7.190852292820936, - "grad_norm": 0.5038288235664368, - "learning_rate": 2.5664661654135334e-05, - "loss": 0.1075, + "epoch": 7.002908311648554, + "grad_norm": 0.19924980401992798, + "learning_rate": 2.566315789473684e-05, + "loss": 0.1259, "step": 183000 }, { - "epoch": 7.190852292820936, - "eval_loss": 0.41699934005737305, - "eval_runtime": 145.6145, - "eval_samples_per_second": 38.842, - "eval_steps_per_second": 4.855, - "eval_wer": 0.30066922373256727, + "epoch": 7.002908311648554, + "eval_loss": Infinity, + "eval_runtime": 180.6261, + "eval_samples_per_second": 38.876, + "eval_steps_per_second": 4.861, + "eval_wer": 0.3025174529299767, "step": 183000 }, { - "epoch": 7.210499430233015, - "grad_norm": 0.41699087619781494, - "learning_rate": 2.4912781954887215e-05, - "loss": 0.1155, + "epoch": 7.0220419409153525, + "grad_norm": 0.3780413568019867, + "learning_rate": 2.4911278195488722e-05, + "loss": 0.1163, "step": 183500 }, { - "epoch": 7.230146567645094, - "grad_norm": 0.9346128702163696, - "learning_rate": 2.4162406015037593e-05, - "loss": 0.1081, + "epoch": 7.041175570182152, + "grad_norm": 0.5037872195243835, + "learning_rate": 2.4160902255639094e-05, + "loss": 0.1151, "step": 184000 }, { - "epoch": 7.230146567645094, - "eval_loss": 0.4134830832481384, - "eval_runtime": 145.6714, - "eval_samples_per_second": 38.827, - "eval_steps_per_second": 4.853, - "eval_wer": 0.3031246489383897, + "epoch": 7.041175570182152, + "eval_loss": Infinity, + "eval_runtime": 180.4745, + "eval_samples_per_second": 38.909, + "eval_steps_per_second": 4.865, + "eval_wer": 0.3033900994288132, "step": 184000 }, { - "epoch": 7.249793705057173, - "grad_norm": 1.0166319608688354, - "learning_rate": 2.341052631578947e-05, - "loss": 0.1173, + "epoch": 7.060309199448952, + "grad_norm": 0.2655356824398041, + "learning_rate": 2.3409022556390975e-05, + "loss": 0.1104, "step": 184500 }, { - "epoch": 7.269440842469252, - "grad_norm": 0.8515588045120239, - "learning_rate": 2.2658646616541353e-05, - "loss": 0.1108, + "epoch": 7.0794428287157505, + "grad_norm": 0.6396870613098145, + "learning_rate": 2.2657142857142853e-05, + "loss": 0.1143, "step": 185000 }, { - "epoch": 7.269440842469252, - "eval_loss": 0.41293400526046753, - "eval_runtime": 146.3235, - "eval_samples_per_second": 38.654, - "eval_steps_per_second": 4.832, - "eval_wer": 0.3003161560559131, + "epoch": 7.0794428287157505, + "eval_loss": Infinity, + "eval_runtime": 180.1648, + "eval_samples_per_second": 38.975, + "eval_steps_per_second": 4.873, + "eval_wer": 0.30249100909667864, "step": 185000 }, { - "epoch": 7.289087979881331, - "grad_norm": 0.5291551351547241, - "learning_rate": 2.190676691729323e-05, - "loss": 0.1064, + "epoch": 7.09857645798255, + "grad_norm": 1.1950030326843262, + "learning_rate": 2.1905263157894735e-05, + "loss": 0.1217, "step": 185500 }, { - "epoch": 7.30873511729341, - "grad_norm": 1.0743286609649658, + "epoch": 7.11771008724935, + "grad_norm": 0.6003520488739014, "learning_rate": 2.1154887218045113e-05, - "loss": 0.1044, + "loss": 0.1105, "step": 186000 }, { - "epoch": 7.30873511729341, - "eval_loss": 0.41300591826438904, - "eval_runtime": 145.5862, - "eval_samples_per_second": 38.85, - "eval_steps_per_second": 4.856, - "eval_wer": 0.3022740768082682, + "epoch": 7.11771008724935, + "eval_loss": Infinity, + "eval_runtime": 179.583, + "eval_samples_per_second": 39.102, + "eval_steps_per_second": 4.889, + "eval_wer": 0.30058705309921724, "step": 186000 }, { - "epoch": 7.328382254705489, - "grad_norm": 0.4959530532360077, + "epoch": 7.1368437165161485, + "grad_norm": 0.5612542033195496, "learning_rate": 2.040300751879699e-05, - "loss": 0.1063, + "loss": 0.1124, "step": 186500 }, { - "epoch": 7.348029392117568, - "grad_norm": 0.6196532845497131, + "epoch": 7.155977345782948, + "grad_norm": 0.47781071066856384, "learning_rate": 1.9651127819548872e-05, - "loss": 0.1121, + "loss": 0.1126, "step": 187000 }, { - "epoch": 7.348029392117568, - "eval_loss": 0.40789899230003357, - "eval_runtime": 145.8295, - "eval_samples_per_second": 38.785, - "eval_steps_per_second": 4.848, - "eval_wer": 0.2992890500874645, + "epoch": 7.155977345782948, + "eval_loss": Infinity, + "eval_runtime": 179.6562, + "eval_samples_per_second": 39.086, + "eval_steps_per_second": 4.887, + "eval_wer": 0.3006134969325153, "step": 187000 }, { - "epoch": 7.367676529529647, - "grad_norm": 1.7419555187225342, + "epoch": 7.175110975049748, + "grad_norm": 0.5884853601455688, "learning_rate": 1.889924812030075e-05, - "loss": 0.1092, + "loss": 0.1147, "step": 187500 }, { - "epoch": 7.387323666941727, - "grad_norm": 0.931948721408844, + "epoch": 7.194244604316546, + "grad_norm": 0.269551157951355, "learning_rate": 1.814736842105263e-05, - "loss": 0.1052, + "loss": 0.1139, "step": 188000 }, { - "epoch": 7.387323666941727, - "eval_loss": 0.40476053953170776, - "eval_runtime": 145.5337, - "eval_samples_per_second": 38.864, - "eval_steps_per_second": 4.858, - "eval_wer": 0.301904960600857, + "epoch": 7.194244604316546, + "eval_loss": Infinity, + "eval_runtime": 179.5976, + "eval_samples_per_second": 39.099, + "eval_steps_per_second": 4.889, + "eval_wer": 0.2996482970171356, "step": 188000 }, { - "epoch": 7.406970804353806, - "grad_norm": 0.3558327853679657, + "epoch": 7.213378233583346, + "grad_norm": 0.7385743260383606, "learning_rate": 1.739548872180451e-05, - "loss": 0.112, + "loss": 0.1088, "step": 188500 }, { - "epoch": 7.426617941765885, - "grad_norm": 0.48914971947669983, + "epoch": 7.232511862850146, + "grad_norm": 0.7600038647651672, "learning_rate": 1.6643609022556388e-05, - "loss": 0.103, + "loss": 0.1101, "step": 189000 }, { - "epoch": 7.426617941765885, - "eval_loss": 0.415385365486145, - "eval_runtime": 145.9476, - "eval_samples_per_second": 38.754, - "eval_steps_per_second": 4.844, - "eval_wer": 0.3015197958626888, + "epoch": 7.232511862850146, + "eval_loss": Infinity, + "eval_runtime": 180.6841, + "eval_samples_per_second": 38.863, + "eval_steps_per_second": 4.859, + "eval_wer": 0.29820710810239054, "step": 189000 }, { - "epoch": 7.446265079177964, - "grad_norm": 0.5291373133659363, - "learning_rate": 1.5893233082706766e-05, - "loss": 0.1073, + "epoch": 7.251645492116944, + "grad_norm": 0.4615612328052521, + "learning_rate": 1.589172932330827e-05, + "loss": 0.12, "step": 189500 }, { - "epoch": 7.465912216590043, - "grad_norm": 0.6397764086723328, - "learning_rate": 1.514285714285714e-05, - "loss": 0.1105, + "epoch": 7.270779121383744, + "grad_norm": 17.22515296936035, + "learning_rate": 1.5139849624060148e-05, + "loss": 0.1187, "step": 190000 }, { - "epoch": 7.465912216590043, - "eval_loss": 0.4119686484336853, - "eval_runtime": 145.6307, - "eval_samples_per_second": 38.838, - "eval_steps_per_second": 4.855, - "eval_wer": 0.30187286353934295, + "epoch": 7.270779121383744, + "eval_loss": Infinity, + "eval_runtime": 180.7132, + "eval_samples_per_second": 38.857, + "eval_steps_per_second": 4.859, + "eval_wer": 0.2988285381848953, "step": 190000 }, { - "epoch": 7.485559354002122, - "grad_norm": 0.45867177844047546, - "learning_rate": 1.439097744360902e-05, - "loss": 0.1079, + "epoch": 7.289912750650544, + "grad_norm": 0.272981196641922, + "learning_rate": 1.4387969924812028e-05, + "loss": 0.1106, "step": 190500 }, { - "epoch": 7.505206491414201, - "grad_norm": 1.0139355659484863, - "learning_rate": 1.36390977443609e-05, - "loss": 0.1093, + "epoch": 7.309046379917342, + "grad_norm": 0.7808548212051392, + "learning_rate": 1.3636090225563907e-05, + "loss": 0.1174, "step": 191000 }, { - "epoch": 7.505206491414201, - "eval_loss": 0.4104667901992798, - "eval_runtime": 146.3292, - "eval_samples_per_second": 38.653, - "eval_steps_per_second": 4.832, - "eval_wer": 0.3007494663863523, + "epoch": 7.309046379917342, + "eval_loss": Infinity, + "eval_runtime": 180.5814, + "eval_samples_per_second": 38.886, + "eval_steps_per_second": 4.862, + "eval_wer": 0.2993441929342077, "step": 191000 }, { - "epoch": 7.52485362882628, - "grad_norm": 0.35021767020225525, - "learning_rate": 1.288721804511278e-05, - "loss": 0.1108, + "epoch": 7.328180009184142, + "grad_norm": 0.32894080877304077, + "learning_rate": 1.2885714285714284e-05, + "loss": 0.1129, "step": 191500 }, { - "epoch": 7.544500766238359, - "grad_norm": 0.7307072281837463, - "learning_rate": 1.2136842105263156e-05, - "loss": 0.1058, + "epoch": 7.347313638450942, + "grad_norm": 0.6160246729850769, + "learning_rate": 1.2133834586466163e-05, + "loss": 0.1132, "step": 192000 }, { - "epoch": 7.544500766238359, - "eval_loss": 0.41022607684135437, - "eval_runtime": 146.2108, - "eval_samples_per_second": 38.684, - "eval_steps_per_second": 4.835, - "eval_wer": 0.3011025340630065, + "epoch": 7.347313638450942, + "eval_loss": Infinity, + "eval_runtime": 180.7946, + "eval_samples_per_second": 38.84, + "eval_steps_per_second": 4.856, + "eval_wer": 0.2995689655172414, "step": 192000 }, { - "epoch": 7.564147903650438, - "grad_norm": 0.46207743883132935, - "learning_rate": 1.1384962406015036e-05, - "loss": 0.1053, + "epoch": 7.36644726771774, + "grad_norm": 0.3549739718437195, + "learning_rate": 1.1381954887218043e-05, + "loss": 0.1214, "step": 192500 }, { - "epoch": 7.583795041062517, - "grad_norm": 0.47636836767196655, - "learning_rate": 1.0633082706766916e-05, - "loss": 0.1043, + "epoch": 7.38558089698454, + "grad_norm": 0.5132611393928528, + "learning_rate": 1.0630075187969923e-05, + "loss": 0.1108, "step": 193000 }, { - "epoch": 7.583795041062517, - "eval_loss": 0.41014641523361206, - "eval_runtime": 145.8628, - "eval_samples_per_second": 38.776, - "eval_steps_per_second": 4.847, - "eval_wer": 0.2994495353950346, + "epoch": 7.38558089698454, + "eval_loss": Infinity, + "eval_runtime": 181.6179, + "eval_samples_per_second": 38.664, + "eval_steps_per_second": 4.834, + "eval_wer": 0.2995160778506452, "step": 193000 }, { - "epoch": 7.603442178474596, - "grad_norm": 1.0540902614593506, - "learning_rate": 9.881203007518796e-06, - "loss": 0.1072, + "epoch": 7.40471452625134, + "grad_norm": 0.1533355563879013, + "learning_rate": 9.878195488721803e-06, + "loss": 0.117, "step": 193500 }, { - "epoch": 7.623089315886675, - "grad_norm": 0.8974863290786743, - "learning_rate": 9.129323308270676e-06, - "loss": 0.1098, + "epoch": 7.423848155518138, + "grad_norm": 0.542405903339386, + "learning_rate": 9.126315789473683e-06, + "loss": 0.1119, "step": 194000 }, { - "epoch": 7.623089315886675, - "eval_loss": 0.408490389585495, - "eval_runtime": 146.1703, - "eval_samples_per_second": 38.695, - "eval_steps_per_second": 4.837, - "eval_wer": 0.29980260307168877, + "epoch": 7.423848155518138, + "eval_loss": Infinity, + "eval_runtime": 180.7251, + "eval_samples_per_second": 38.855, + "eval_steps_per_second": 4.858, + "eval_wer": 0.2991194203511741, "step": 194000 }, { - "epoch": 7.642736453298754, - "grad_norm": 0.49042123556137085, - "learning_rate": 8.377443609022555e-06, - "loss": 0.1035, + "epoch": 7.442981784784938, + "grad_norm": 0.4688265919685364, + "learning_rate": 8.37593984962406e-06, + "loss": 0.1116, "step": 194500 }, { - "epoch": 7.662383590710833, - "grad_norm": 0.7251204252243042, - "learning_rate": 7.625563909774436e-06, - "loss": 0.1057, + "epoch": 7.462115414051738, + "grad_norm": 0.7588228583335876, + "learning_rate": 7.624060150375939e-06, + "loss": 0.1098, "step": 195000 }, { - "epoch": 7.662383590710833, - "eval_loss": 0.40715456008911133, - "eval_runtime": 146.2248, - "eval_samples_per_second": 38.68, - "eval_steps_per_second": 4.835, - "eval_wer": 0.2982137985267449, + "epoch": 7.462115414051738, + "eval_loss": Infinity, + "eval_runtime": 181.1196, + "eval_samples_per_second": 38.77, + "eval_steps_per_second": 4.848, + "eval_wer": 0.29845832451872223, "step": 195000 }, { - "epoch": 7.682030728122912, - "grad_norm": 0.9783725142478943, - "learning_rate": 6.8751879699248115e-06, - "loss": 0.1078, + "epoch": 7.481249043318536, + "grad_norm": 0.9082927703857422, + "learning_rate": 6.8721804511278185e-06, + "loss": 0.1049, "step": 195500 }, { - "epoch": 7.701677865534991, - "grad_norm": 0.66826331615448, - "learning_rate": 6.123308270676691e-06, - "loss": 0.1021, + "epoch": 7.500382672585336, + "grad_norm": 0.5702168345451355, + "learning_rate": 6.120300751879698e-06, + "loss": 0.1053, "step": 196000 }, { - "epoch": 7.701677865534991, - "eval_loss": 0.4079470634460449, - "eval_runtime": 146.5661, - "eval_samples_per_second": 38.59, - "eval_steps_per_second": 4.824, - "eval_wer": 0.2973792749273804, + "epoch": 7.500382672585336, + "eval_loss": Infinity, + "eval_runtime": 180.5918, + "eval_samples_per_second": 38.883, + "eval_steps_per_second": 4.862, + "eval_wer": 0.29765178760313094, "step": 196000 }, { - "epoch": 7.72132500294707, - "grad_norm": 0.34865960478782654, - "learning_rate": 5.371428571428571e-06, - "loss": 0.108, + "epoch": 7.519516301852136, + "grad_norm": 1.2016927003860474, + "learning_rate": 5.368421052631578e-06, + "loss": 0.1011, "step": 196500 }, { - "epoch": 7.740972140359149, - "grad_norm": 0.6881831884384155, - "learning_rate": 4.619548872180451e-06, - "loss": 0.0994, + "epoch": 7.538649931118934, + "grad_norm": 0.6198543906211853, + "learning_rate": 4.616541353383459e-06, + "loss": 0.11, "step": 197000 }, { - "epoch": 7.740972140359149, - "eval_loss": 0.4088830053806305, - "eval_runtime": 145.6213, - "eval_samples_per_second": 38.84, - "eval_steps_per_second": 4.855, - "eval_wer": 0.29871130298021215, + "epoch": 7.538649931118934, + "eval_loss": Infinity, + "eval_runtime": 181.146, + "eval_samples_per_second": 38.764, + "eval_steps_per_second": 4.847, + "eval_wer": 0.2975327903532896, "step": 197000 }, { - "epoch": 7.760619277771228, - "grad_norm": 0.7812435030937195, - "learning_rate": 3.867669172932331e-06, - "loss": 0.1017, + "epoch": 7.557783560385734, + "grad_norm": 0.6191059947013855, + "learning_rate": 3.8646616541353386e-06, + "loss": 0.1118, "step": 197500 }, { - "epoch": 7.780266415183307, - "grad_norm": 0.23445354402065277, - "learning_rate": 3.118796992481203e-06, - "loss": 0.1065, + "epoch": 7.576917189652534, + "grad_norm": 0.5622895956039429, + "learning_rate": 3.1127819548872175e-06, + "loss": 0.1091, "step": 198000 }, { - "epoch": 7.780266415183307, - "eval_loss": 0.4065949022769928, - "eval_runtime": 146.17, - "eval_samples_per_second": 38.695, - "eval_steps_per_second": 4.837, - "eval_wer": 0.2973792749273804, + "epoch": 7.576917189652534, + "eval_loss": Infinity, + "eval_runtime": 182.2479, + "eval_samples_per_second": 38.53, + "eval_steps_per_second": 4.818, + "eval_wer": 0.295893272688809, "step": 198000 }, { - "epoch": 7.799913552595386, - "grad_norm": 0.4873931407928467, - "learning_rate": 2.366917293233083e-06, - "loss": 0.1052, + "epoch": 7.596050818919332, + "grad_norm": 1.4227417707443237, + "learning_rate": 2.362406015037594e-06, + "loss": 0.1126, "step": 198500 }, { - "epoch": 7.8195606900074655, - "grad_norm": 0.24652531743049622, - "learning_rate": 1.6150375939849622e-06, - "loss": 0.1111, + "epoch": 7.615184448186132, + "grad_norm": 0.5790704488754272, + "learning_rate": 1.6105263157894734e-06, + "loss": 0.108, "step": 199000 }, { - "epoch": 7.8195606900074655, - "eval_loss": 0.40712785720825195, - "eval_runtime": 145.6053, - "eval_samples_per_second": 38.845, - "eval_steps_per_second": 4.856, - "eval_wer": 0.2981817014652309, + "epoch": 7.615184448186132, + "eval_loss": Infinity, + "eval_runtime": 182.5001, + "eval_samples_per_second": 38.477, + "eval_steps_per_second": 4.811, + "eval_wer": 0.2963295959382272, "step": 199000 }, { - "epoch": 7.839207827419545, - "grad_norm": 0.5532709956169128, - "learning_rate": 8.631578947368421e-07, - "loss": 0.106, + "epoch": 7.634318077452932, + "grad_norm": 0.6536182165145874, + "learning_rate": 8.601503759398495e-07, + "loss": 0.113, "step": 199500 }, { - "epoch": 7.858854964831624, - "grad_norm": 0.4496346116065979, - "learning_rate": 1.1127819548872179e-07, - "loss": 0.1065, + "epoch": 7.65345170671973, + "grad_norm": 0.41451194882392883, + "learning_rate": 1.0827067669172932e-07, + "loss": 0.1077, "step": 200000 }, { - "epoch": 7.858854964831624, - "eval_loss": 0.4064280092716217, - "eval_runtime": 146.2071, - "eval_samples_per_second": 38.685, - "eval_steps_per_second": 4.836, - "eval_wer": 0.298422429426586, + "epoch": 7.65345170671973, + "eval_loss": Infinity, + "eval_runtime": 181.1994, + "eval_samples_per_second": 38.753, + "eval_steps_per_second": 4.845, + "eval_wer": 0.2962370425216839, "step": 200000 }, { - "epoch": 7.858854964831624, + "epoch": 7.65345170671973, "step": 200000, - "total_flos": 2.4880981924796708e+20, - "train_loss": 0.2853552089881897, - "train_runtime": 103513.8909, - "train_samples_per_second": 15.457, - "train_steps_per_second": 1.932 + "total_flos": 2.464157327536675e+20, + "train_loss": 0.29584050216674806, + "train_runtime": 112002.2186, + "train_samples_per_second": 14.285, + "train_steps_per_second": 1.786 } ], "logging_steps": 500, @@ -4635,7 +4635,7 @@ "attributes": {} } }, - "total_flos": 2.4880981924796708e+20, + "total_flos": 2.464157327536675e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null