|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.858854964831624, |
|
"eval_steps": 1000, |
|
"global_step": 200000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01964713741207906, |
|
"grad_norm": 3.3487088680267334, |
|
"learning_rate": 0.0002982, |
|
"loss": 4.7396, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03929427482415812, |
|
"grad_norm": 4.3209547996521, |
|
"learning_rate": 0.00029925263157894736, |
|
"loss": 1.092, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03929427482415812, |
|
"eval_loss": 1.1751947402954102, |
|
"eval_runtime": 144.9265, |
|
"eval_samples_per_second": 39.027, |
|
"eval_steps_per_second": 4.878, |
|
"eval_wer": 0.8087496589687214, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05894141223623718, |
|
"grad_norm": 2.453352451324463, |
|
"learning_rate": 0.0002985007518796992, |
|
"loss": 0.8198, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.07858854964831624, |
|
"grad_norm": 4.910336494445801, |
|
"learning_rate": 0.0002977488721804511, |
|
"loss": 0.7307, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07858854964831624, |
|
"eval_loss": 0.9363270998001099, |
|
"eval_runtime": 143.2813, |
|
"eval_samples_per_second": 39.475, |
|
"eval_steps_per_second": 4.934, |
|
"eval_wer": 0.6985123011988252, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0982356870603953, |
|
"grad_norm": 2.127075672149658, |
|
"learning_rate": 0.00029699699248120296, |
|
"loss": 0.6921, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.11788282447247436, |
|
"grad_norm": 2.57724928855896, |
|
"learning_rate": 0.0002962451127819549, |
|
"loss": 0.6633, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.11788282447247436, |
|
"eval_loss": 0.8888041377067566, |
|
"eval_runtime": 144.2307, |
|
"eval_samples_per_second": 39.215, |
|
"eval_steps_per_second": 4.902, |
|
"eval_wer": 0.6624352040570686, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1375299618845534, |
|
"grad_norm": 3.0028977394104004, |
|
"learning_rate": 0.00029549323308270675, |
|
"loss": 0.6395, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.15717709929663248, |
|
"grad_norm": 1.864983320236206, |
|
"learning_rate": 0.0002947413533834586, |
|
"loss": 0.6273, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.15717709929663248, |
|
"eval_loss": 0.8302789330482483, |
|
"eval_runtime": 143.3729, |
|
"eval_samples_per_second": 39.45, |
|
"eval_steps_per_second": 4.931, |
|
"eval_wer": 0.640432668389209, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17682423670871153, |
|
"grad_norm": 1.865403175354004, |
|
"learning_rate": 0.000293990977443609, |
|
"loss": 0.6196, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.1964713741207906, |
|
"grad_norm": 2.015836000442505, |
|
"learning_rate": 0.0002932390977443609, |
|
"loss": 0.6031, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.1964713741207906, |
|
"eval_loss": 0.7927883267402649, |
|
"eval_runtime": 142.89, |
|
"eval_samples_per_second": 39.583, |
|
"eval_steps_per_second": 4.948, |
|
"eval_wer": 0.6134229911251625, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.21611851153286965, |
|
"grad_norm": 2.023449420928955, |
|
"learning_rate": 0.00029248721804511275, |
|
"loss": 0.6089, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.23576564894494872, |
|
"grad_norm": 2.1012802124023438, |
|
"learning_rate": 0.00029173533834586467, |
|
"loss": 0.6007, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.23576564894494872, |
|
"eval_loss": 0.771953821182251, |
|
"eval_runtime": 144.3238, |
|
"eval_samples_per_second": 39.19, |
|
"eval_steps_per_second": 4.899, |
|
"eval_wer": 0.5832357047712282, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2554127863570278, |
|
"grad_norm": 1.6933408975601196, |
|
"learning_rate": 0.0002909834586466165, |
|
"loss": 0.5693, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2750599237691068, |
|
"grad_norm": 1.8497469425201416, |
|
"learning_rate": 0.0002902315789473684, |
|
"loss": 0.5739, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2750599237691068, |
|
"eval_loss": 0.7533236145973206, |
|
"eval_runtime": 144.3945, |
|
"eval_samples_per_second": 39.17, |
|
"eval_steps_per_second": 4.896, |
|
"eval_wer": 0.5685673476593218, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2947070611811859, |
|
"grad_norm": 2.9493396282196045, |
|
"learning_rate": 0.0002894796992481203, |
|
"loss": 0.57, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.31435419859326497, |
|
"grad_norm": 3.0881130695343018, |
|
"learning_rate": 0.00028872781954887214, |
|
"loss": 0.5655, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.31435419859326497, |
|
"eval_loss": 0.7523130178451538, |
|
"eval_runtime": 143.9033, |
|
"eval_samples_per_second": 39.304, |
|
"eval_steps_per_second": 4.913, |
|
"eval_wer": 0.5594517821893406, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33400133600534404, |
|
"grad_norm": 2.8726112842559814, |
|
"learning_rate": 0.000287975939849624, |
|
"loss": 0.5541, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.35364847341742306, |
|
"grad_norm": 2.6348774433135986, |
|
"learning_rate": 0.0002872255639097744, |
|
"loss": 0.5584, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.35364847341742306, |
|
"eval_loss": 0.7173847556114197, |
|
"eval_runtime": 144.396, |
|
"eval_samples_per_second": 39.17, |
|
"eval_steps_per_second": 4.896, |
|
"eval_wer": 0.5668020092760507, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.37329561082950213, |
|
"grad_norm": 3.332054853439331, |
|
"learning_rate": 0.00028647368421052627, |
|
"loss": 0.5485, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.3929427482415812, |
|
"grad_norm": 2.823880434036255, |
|
"learning_rate": 0.0002857218045112782, |
|
"loss": 0.5454, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.3929427482415812, |
|
"eval_loss": 0.7537470459938049, |
|
"eval_runtime": 144.0918, |
|
"eval_samples_per_second": 39.253, |
|
"eval_steps_per_second": 4.907, |
|
"eval_wer": 0.5798815618430133, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.4125898856536603, |
|
"grad_norm": 2.8141980171203613, |
|
"learning_rate": 0.00028496992481203006, |
|
"loss": 0.5527, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.4322370230657393, |
|
"grad_norm": 4.174062728881836, |
|
"learning_rate": 0.0002842180451127819, |
|
"loss": 0.5322, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.4322370230657393, |
|
"eval_loss": 0.7155322432518005, |
|
"eval_runtime": 144.2272, |
|
"eval_samples_per_second": 39.216, |
|
"eval_steps_per_second": 4.902, |
|
"eval_wer": 0.5613615573494246, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.4518841604778184, |
|
"grad_norm": 2.2155849933624268, |
|
"learning_rate": 0.0002834676691729323, |
|
"loss": 0.5373, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.47153129788989745, |
|
"grad_norm": 2.1979432106018066, |
|
"learning_rate": 0.0002827157894736842, |
|
"loss": 0.5206, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.47153129788989745, |
|
"eval_loss": 0.7130174040794373, |
|
"eval_runtime": 144.2801, |
|
"eval_samples_per_second": 39.202, |
|
"eval_steps_per_second": 4.9, |
|
"eval_wer": 0.5746336922854712, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.4911784353019765, |
|
"grad_norm": 3.059553861618042, |
|
"learning_rate": 0.00028196390977443605, |
|
"loss": 0.5303, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.5108255727140556, |
|
"grad_norm": 2.1422946453094482, |
|
"learning_rate": 0.0002812120300751879, |
|
"loss": 0.5304, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5108255727140556, |
|
"eval_loss": 0.6817054748535156, |
|
"eval_runtime": 144.6048, |
|
"eval_samples_per_second": 39.114, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.5390220025356679, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5304727101261346, |
|
"grad_norm": 3.0140066146850586, |
|
"learning_rate": 0.00028046015037593984, |
|
"loss": 0.5156, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.5501198475382136, |
|
"grad_norm": 2.334425449371338, |
|
"learning_rate": 0.0002797082706766917, |
|
"loss": 0.55, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5501198475382136, |
|
"eval_loss": 0.6902604699134827, |
|
"eval_runtime": 144.3591, |
|
"eval_samples_per_second": 39.18, |
|
"eval_steps_per_second": 4.898, |
|
"eval_wer": 0.534014860939481, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5697669849502928, |
|
"grad_norm": 1.8595026731491089, |
|
"learning_rate": 0.0002789563909774436, |
|
"loss": 0.5296, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.5894141223623718, |
|
"grad_norm": 1.83585524559021, |
|
"learning_rate": 0.00027820451127819545, |
|
"loss": 0.5115, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5894141223623718, |
|
"eval_loss": 0.6973890662193298, |
|
"eval_runtime": 143.7301, |
|
"eval_samples_per_second": 39.352, |
|
"eval_steps_per_second": 4.919, |
|
"eval_wer": 0.5437081735167145, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.6090612597744509, |
|
"grad_norm": 3.633268117904663, |
|
"learning_rate": 0.00027745413533834584, |
|
"loss": 0.5083, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.6287083971865299, |
|
"grad_norm": 1.6785953044891357, |
|
"learning_rate": 0.0002767022556390977, |
|
"loss": 0.5097, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6287083971865299, |
|
"eval_loss": 0.6785907745361328, |
|
"eval_runtime": 144.7589, |
|
"eval_samples_per_second": 39.072, |
|
"eval_steps_per_second": 4.884, |
|
"eval_wer": 0.5198119112195279, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.648355534598609, |
|
"grad_norm": 3.022169589996338, |
|
"learning_rate": 0.00027595037593984963, |
|
"loss": 0.5025, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.6680026720106881, |
|
"grad_norm": 3.9664957523345947, |
|
"learning_rate": 0.00027519999999999997, |
|
"loss": 0.504, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6680026720106881, |
|
"eval_loss": 0.6679931282997131, |
|
"eval_runtime": 144.4325, |
|
"eval_samples_per_second": 39.16, |
|
"eval_steps_per_second": 4.895, |
|
"eval_wer": 0.5067163101218084, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6876498094227671, |
|
"grad_norm": 1.8392106294631958, |
|
"learning_rate": 0.00027444812030075184, |
|
"loss": 0.4996, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.7072969468348461, |
|
"grad_norm": 22.285568237304688, |
|
"learning_rate": 0.0002736977443609022, |
|
"loss": 0.4951, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7072969468348461, |
|
"eval_loss": 0.6599805951118469, |
|
"eval_runtime": 144.0873, |
|
"eval_samples_per_second": 39.254, |
|
"eval_steps_per_second": 4.907, |
|
"eval_wer": 0.5222191908330792, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7269440842469252, |
|
"grad_norm": 1.8236407041549683, |
|
"learning_rate": 0.0002729458646616541, |
|
"loss": 0.5008, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.7465912216590043, |
|
"grad_norm": 3.337568998336792, |
|
"learning_rate": 0.000272193984962406, |
|
"loss": 0.4982, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7465912216590043, |
|
"eval_loss": 0.6371914744377136, |
|
"eval_runtime": 144.7615, |
|
"eval_samples_per_second": 39.071, |
|
"eval_steps_per_second": 4.884, |
|
"eval_wer": 0.5010993243568551, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7662383590710834, |
|
"grad_norm": 5.579843997955322, |
|
"learning_rate": 0.0002714421052631579, |
|
"loss": 0.4966, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.7858854964831624, |
|
"grad_norm": 1.800836443901062, |
|
"learning_rate": 0.00027069022556390975, |
|
"loss": 0.493, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.7858854964831624, |
|
"eval_loss": 0.6563202738761902, |
|
"eval_runtime": 145.0143, |
|
"eval_samples_per_second": 39.003, |
|
"eval_steps_per_second": 4.875, |
|
"eval_wer": 0.523583315947425, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.8055326338952414, |
|
"grad_norm": 2.785470724105835, |
|
"learning_rate": 0.0002699383458646616, |
|
"loss": 0.5046, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.8251797713073206, |
|
"grad_norm": 2.336695909500122, |
|
"learning_rate": 0.00026918646616541354, |
|
"loss": 0.4928, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8251797713073206, |
|
"eval_loss": 0.6477507948875427, |
|
"eval_runtime": 146.5574, |
|
"eval_samples_per_second": 38.592, |
|
"eval_steps_per_second": 4.824, |
|
"eval_wer": 0.5030090995169392, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8448269087193996, |
|
"grad_norm": 1.3052055835723877, |
|
"learning_rate": 0.0002684360902255639, |
|
"loss": 0.4878, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.8644740461314786, |
|
"grad_norm": 2.2889890670776367, |
|
"learning_rate": 0.00026768571428571427, |
|
"loss": 0.4964, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8644740461314786, |
|
"eval_loss": 0.6431675553321838, |
|
"eval_runtime": 146.4682, |
|
"eval_samples_per_second": 38.616, |
|
"eval_steps_per_second": 4.827, |
|
"eval_wer": 0.5103272295421354, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8841211835435577, |
|
"grad_norm": 3.50435733795166, |
|
"learning_rate": 0.00026693383458646614, |
|
"loss": 0.4796, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.9037683209556368, |
|
"grad_norm": 1.364182472229004, |
|
"learning_rate": 0.000266181954887218, |
|
"loss": 0.4818, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.9037683209556368, |
|
"eval_loss": 0.6235994100570679, |
|
"eval_runtime": 146.6113, |
|
"eval_samples_per_second": 38.578, |
|
"eval_steps_per_second": 4.822, |
|
"eval_wer": 0.48960857633483656, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.9234154583677158, |
|
"grad_norm": 2.1214494705200195, |
|
"learning_rate": 0.00026543007518796993, |
|
"loss": 0.4688, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.9430625957797949, |
|
"grad_norm": 2.6261446475982666, |
|
"learning_rate": 0.00026467819548872174, |
|
"loss": 0.4752, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9430625957797949, |
|
"eval_loss": 0.6326233744621277, |
|
"eval_runtime": 145.9082, |
|
"eval_samples_per_second": 38.764, |
|
"eval_steps_per_second": 4.846, |
|
"eval_wer": 0.500810450803229, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9627097331918739, |
|
"grad_norm": 4.142037391662598, |
|
"learning_rate": 0.00026392631578947367, |
|
"loss": 0.4698, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.982356870603953, |
|
"grad_norm": 2.097465991973877, |
|
"learning_rate": 0.00026317443609022553, |
|
"loss": 0.4736, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.982356870603953, |
|
"eval_loss": 0.6309667229652405, |
|
"eval_runtime": 145.598, |
|
"eval_samples_per_second": 38.847, |
|
"eval_steps_per_second": 4.856, |
|
"eval_wer": 0.5081446293591821, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.002004008016032, |
|
"grad_norm": 1.872559666633606, |
|
"learning_rate": 0.0002624225563909774, |
|
"loss": 0.461, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.0216511454281112, |
|
"grad_norm": 0.9968547821044922, |
|
"learning_rate": 0.00026167067669172927, |
|
"loss": 0.4241, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.0216511454281112, |
|
"eval_loss": 0.6126999258995056, |
|
"eval_runtime": 145.6037, |
|
"eval_samples_per_second": 38.845, |
|
"eval_steps_per_second": 4.856, |
|
"eval_wer": 0.47134534833336006, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.0412982828401902, |
|
"grad_norm": 0.840844988822937, |
|
"learning_rate": 0.00026092030075187966, |
|
"loss": 0.4146, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.0609454202522692, |
|
"grad_norm": 0.8603857159614563, |
|
"learning_rate": 0.00026016842105263153, |
|
"loss": 0.4196, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0609454202522692, |
|
"eval_loss": 0.6066301465034485, |
|
"eval_runtime": 144.6102, |
|
"eval_samples_per_second": 39.112, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.46828007895877133, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0805925576643483, |
|
"grad_norm": 0.8338613510131836, |
|
"learning_rate": 0.00025941654135338345, |
|
"loss": 0.4213, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.1002396950764273, |
|
"grad_norm": 1.0736676454544067, |
|
"learning_rate": 0.0002586646616541353, |
|
"loss": 0.4177, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.1002396950764273, |
|
"eval_loss": 0.5958611965179443, |
|
"eval_runtime": 145.1774, |
|
"eval_samples_per_second": 38.959, |
|
"eval_steps_per_second": 4.87, |
|
"eval_wer": 0.47737959589799556, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.1198868324885065, |
|
"grad_norm": 0.7483401894569397, |
|
"learning_rate": 0.0002579127819548872, |
|
"loss": 0.4204, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.1395339699005855, |
|
"grad_norm": 1.1266822814941406, |
|
"learning_rate": 0.0002571624060150376, |
|
"loss": 0.4204, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.1395339699005855, |
|
"eval_loss": 0.607071042060852, |
|
"eval_runtime": 145.6021, |
|
"eval_samples_per_second": 38.846, |
|
"eval_steps_per_second": 4.856, |
|
"eval_wer": 0.4893517998427244, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.1591811073126645, |
|
"grad_norm": 2.852926731109619, |
|
"learning_rate": 0.00025641052631578945, |
|
"loss": 0.4255, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.1788282447247436, |
|
"grad_norm": 2.134554862976074, |
|
"learning_rate": 0.00025566015037593984, |
|
"loss": 0.4238, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1788282447247436, |
|
"eval_loss": 0.600638747215271, |
|
"eval_runtime": 144.9068, |
|
"eval_samples_per_second": 39.032, |
|
"eval_steps_per_second": 4.879, |
|
"eval_wer": 0.476432732583332, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1984753821368226, |
|
"grad_norm": 1.6866382360458374, |
|
"learning_rate": 0.0002549082706766917, |
|
"loss": 0.4156, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.2181225195489018, |
|
"grad_norm": 0.7769395709037781, |
|
"learning_rate": 0.0002541563909774436, |
|
"loss": 0.4253, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.2181225195489018, |
|
"eval_loss": 0.5803025960922241, |
|
"eval_runtime": 144.9773, |
|
"eval_samples_per_second": 39.013, |
|
"eval_steps_per_second": 4.877, |
|
"eval_wer": 0.4623421225786779, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.2377696569609808, |
|
"grad_norm": 1.305306077003479, |
|
"learning_rate": 0.00025340451127819544, |
|
"loss": 0.4257, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.2574167943730599, |
|
"grad_norm": 0.9590096473693848, |
|
"learning_rate": 0.00025265263157894736, |
|
"loss": 0.4156, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.2574167943730599, |
|
"eval_loss": 0.5940248966217041, |
|
"eval_runtime": 145.3704, |
|
"eval_samples_per_second": 38.908, |
|
"eval_steps_per_second": 4.863, |
|
"eval_wer": 0.45728683539022, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.2770639317851389, |
|
"grad_norm": 1.598183274269104, |
|
"learning_rate": 0.00025190075187969923, |
|
"loss": 0.4103, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.296711069197218, |
|
"grad_norm": 1.9396251440048218, |
|
"learning_rate": 0.0002511488721804511, |
|
"loss": 0.4058, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.296711069197218, |
|
"eval_loss": 0.5802159905433655, |
|
"eval_runtime": 146.1677, |
|
"eval_samples_per_second": 38.695, |
|
"eval_steps_per_second": 4.837, |
|
"eval_wer": 0.4614594533870424, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.316358206609297, |
|
"grad_norm": 1.7467131614685059, |
|
"learning_rate": 0.00025039699248120297, |
|
"loss": 0.417, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.3360053440213762, |
|
"grad_norm": 0.9871892333030701, |
|
"learning_rate": 0.0002496451127819549, |
|
"loss": 0.404, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.3360053440213762, |
|
"eval_loss": 0.5882492065429688, |
|
"eval_runtime": 145.2339, |
|
"eval_samples_per_second": 38.944, |
|
"eval_steps_per_second": 4.868, |
|
"eval_wer": 0.46015952239572466, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.3556524814334552, |
|
"grad_norm": 1.0927810668945312, |
|
"learning_rate": 0.00024889323308270676, |
|
"loss": 0.3955, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.3752996188455342, |
|
"grad_norm": 1.8038376569747925, |
|
"learning_rate": 0.0002481413533834586, |
|
"loss": 0.3995, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.3752996188455342, |
|
"eval_loss": 0.5840802788734436, |
|
"eval_runtime": 144.2215, |
|
"eval_samples_per_second": 39.217, |
|
"eval_steps_per_second": 4.902, |
|
"eval_wer": 0.46152364751007047, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.3949467562576132, |
|
"grad_norm": 1.3720190525054932, |
|
"learning_rate": 0.0002473894736842105, |
|
"loss": 0.4077, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.4145938936696925, |
|
"grad_norm": 1.4488073587417603, |
|
"learning_rate": 0.0002466375939849624, |
|
"loss": 0.4049, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.4145938936696925, |
|
"eval_loss": 0.5853234529495239, |
|
"eval_runtime": 144.437, |
|
"eval_samples_per_second": 39.159, |
|
"eval_steps_per_second": 4.895, |
|
"eval_wer": 0.4635618109162106, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.4342410310817715, |
|
"grad_norm": 2.5115835666656494, |
|
"learning_rate": 0.00024588571428571423, |
|
"loss": 0.4202, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.4538881684938505, |
|
"grad_norm": 0.7861095070838928, |
|
"learning_rate": 0.0002451353383458647, |
|
"loss": 0.4018, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.4538881684938505, |
|
"eval_loss": 0.5737255215644836, |
|
"eval_runtime": 143.969, |
|
"eval_samples_per_second": 39.286, |
|
"eval_steps_per_second": 4.911, |
|
"eval_wer": 0.4532747027009677, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.4735353059059295, |
|
"grad_norm": 1.29477858543396, |
|
"learning_rate": 0.0002443834586466165, |
|
"loss": 0.4028, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.4931824433180085, |
|
"grad_norm": 1.5367540121078491, |
|
"learning_rate": 0.0002436315789473684, |
|
"loss": 0.3906, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.4931824433180085, |
|
"eval_loss": 0.5848459005355835, |
|
"eval_runtime": 144.7268, |
|
"eval_samples_per_second": 39.081, |
|
"eval_steps_per_second": 4.885, |
|
"eval_wer": 0.4637222962237807, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.5128295807300876, |
|
"grad_norm": 1.7903566360473633, |
|
"learning_rate": 0.00024288120300751878, |
|
"loss": 0.4147, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.5324767181421666, |
|
"grad_norm": 0.7342734336853027, |
|
"learning_rate": 0.00024212932330827064, |
|
"loss": 0.3932, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.5324767181421666, |
|
"eval_loss": 0.551567792892456, |
|
"eval_runtime": 144.7104, |
|
"eval_samples_per_second": 39.085, |
|
"eval_steps_per_second": 4.886, |
|
"eval_wer": 0.44000256776492114, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.5521238555542456, |
|
"grad_norm": 1.3719693422317505, |
|
"learning_rate": 0.00024137894736842104, |
|
"loss": 0.3984, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.5717709929663248, |
|
"grad_norm": 3.398484706878662, |
|
"learning_rate": 0.0002406270676691729, |
|
"loss": 0.4026, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.5717709929663248, |
|
"eval_loss": 0.5641522407531738, |
|
"eval_runtime": 145.5525, |
|
"eval_samples_per_second": 38.859, |
|
"eval_steps_per_second": 4.857, |
|
"eval_wer": 0.44844409494310794, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.5914181303784039, |
|
"grad_norm": 1.1821295022964478, |
|
"learning_rate": 0.0002398751879699248, |
|
"loss": 0.4086, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.611065267790483, |
|
"grad_norm": 1.3344157934188843, |
|
"learning_rate": 0.00023912330827067667, |
|
"loss": 0.396, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.611065267790483, |
|
"eval_loss": 0.5584043264389038, |
|
"eval_runtime": 145.5026, |
|
"eval_samples_per_second": 38.872, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.4512044422333135, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.630712405202562, |
|
"grad_norm": 0.6132605671882629, |
|
"learning_rate": 0.00023837142857142856, |
|
"loss": 0.3863, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.6503595426146411, |
|
"grad_norm": 2.896801710128784, |
|
"learning_rate": 0.00023761954887218043, |
|
"loss": 0.3976, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.6503595426146411, |
|
"eval_loss": 0.5537524819374084, |
|
"eval_runtime": 145.3562, |
|
"eval_samples_per_second": 38.911, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.4436455842467622, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.6700066800267201, |
|
"grad_norm": 0.4839102029800415, |
|
"learning_rate": 0.00023686766917293232, |
|
"loss": 0.3977, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.6896538174387992, |
|
"grad_norm": 0.7648475170135498, |
|
"learning_rate": 0.0002361157894736842, |
|
"loss": 0.3936, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.6896538174387992, |
|
"eval_loss": 0.551811158657074, |
|
"eval_runtime": 144.7074, |
|
"eval_samples_per_second": 39.086, |
|
"eval_steps_per_second": 4.886, |
|
"eval_wer": 0.4412222561024538, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.7093009548508782, |
|
"grad_norm": 1.953736662864685, |
|
"learning_rate": 0.00023536541353383458, |
|
"loss": 0.3865, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.7289480922629572, |
|
"grad_norm": 1.4531214237213135, |
|
"learning_rate": 0.00023461353383458645, |
|
"loss": 0.3879, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.7289480922629572, |
|
"eval_loss": 0.5469211935997009, |
|
"eval_runtime": 145.1619, |
|
"eval_samples_per_second": 38.963, |
|
"eval_steps_per_second": 4.87, |
|
"eval_wer": 0.42974755661119224, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.7485952296750362, |
|
"grad_norm": 1.0637991428375244, |
|
"learning_rate": 0.00023386165413533835, |
|
"loss": 0.3942, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.7682423670871152, |
|
"grad_norm": 1.0606558322906494, |
|
"learning_rate": 0.00023310977443609021, |
|
"loss": 0.3939, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.7682423670871152, |
|
"eval_loss": 0.5502393245697021, |
|
"eval_runtime": 144.8654, |
|
"eval_samples_per_second": 39.043, |
|
"eval_steps_per_second": 4.88, |
|
"eval_wer": 0.44024329572627624, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.7878895044991945, |
|
"grad_norm": 0.7499143481254578, |
|
"learning_rate": 0.0002323578947368421, |
|
"loss": 0.3926, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.8075366419112735, |
|
"grad_norm": 1.3015657663345337, |
|
"learning_rate": 0.00023160601503759395, |
|
"loss": 0.386, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.8075366419112735, |
|
"eval_loss": 0.5626779198646545, |
|
"eval_runtime": 145.6466, |
|
"eval_samples_per_second": 38.834, |
|
"eval_steps_per_second": 4.854, |
|
"eval_wer": 0.4409012854873136, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.8271837793233527, |
|
"grad_norm": 1.1235235929489136, |
|
"learning_rate": 0.00023085413533834585, |
|
"loss": 0.3833, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.8468309167354318, |
|
"grad_norm": 0.8004291653633118, |
|
"learning_rate": 0.0002301022556390977, |
|
"loss": 0.3823, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.8468309167354318, |
|
"eval_loss": 0.5602549910545349, |
|
"eval_runtime": 145.4825, |
|
"eval_samples_per_second": 38.878, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.43724222047471556, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.8664780541475108, |
|
"grad_norm": 2.6733715534210205, |
|
"learning_rate": 0.0002293503759398496, |
|
"loss": 0.3868, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.8861251915595898, |
|
"grad_norm": 1.501878261566162, |
|
"learning_rate": 0.00022859849624060148, |
|
"loss": 0.3955, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.8861251915595898, |
|
"eval_loss": 0.534982442855835, |
|
"eval_runtime": 145.013, |
|
"eval_samples_per_second": 39.003, |
|
"eval_steps_per_second": 4.875, |
|
"eval_wer": 0.4308228081719119, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.9057723289716688, |
|
"grad_norm": 1.785569667816162, |
|
"learning_rate": 0.00022784661654135337, |
|
"loss": 0.3766, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.9254194663837478, |
|
"grad_norm": 0.956910252571106, |
|
"learning_rate": 0.00022709473684210524, |
|
"loss": 0.3808, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.9254194663837478, |
|
"eval_loss": 0.550835132598877, |
|
"eval_runtime": 145.4747, |
|
"eval_samples_per_second": 38.88, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.44476898139975285, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.9450666037958269, |
|
"grad_norm": 1.0630252361297607, |
|
"learning_rate": 0.00022634285714285713, |
|
"loss": 0.3794, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.9647137412079059, |
|
"grad_norm": 0.9397912621498108, |
|
"learning_rate": 0.000225593984962406, |
|
"loss": 0.3871, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.9647137412079059, |
|
"eval_loss": 0.5386993885040283, |
|
"eval_runtime": 144.7105, |
|
"eval_samples_per_second": 39.085, |
|
"eval_steps_per_second": 4.886, |
|
"eval_wer": 0.43197830238641655, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.9843608786199851, |
|
"grad_norm": 1.227219581604004, |
|
"learning_rate": 0.00022484360902255636, |
|
"loss": 0.371, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.004008016032064, |
|
"grad_norm": 1.0407652854919434, |
|
"learning_rate": 0.00022409172932330825, |
|
"loss": 0.3668, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.004008016032064, |
|
"eval_loss": 0.5476531982421875, |
|
"eval_runtime": 144.735, |
|
"eval_samples_per_second": 39.078, |
|
"eval_steps_per_second": 4.885, |
|
"eval_wer": 0.4207443308565101, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.0236551534441434, |
|
"grad_norm": 3.713465929031372, |
|
"learning_rate": 0.00022333984962406012, |
|
"loss": 0.3303, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.0433022908562224, |
|
"grad_norm": 1.074621319770813, |
|
"learning_rate": 0.00022258796992481202, |
|
"loss": 0.3324, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.0433022908562224, |
|
"eval_loss": 0.5283042788505554, |
|
"eval_runtime": 144.7457, |
|
"eval_samples_per_second": 39.075, |
|
"eval_steps_per_second": 4.884, |
|
"eval_wer": 0.4227985427934073, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.0629494282683014, |
|
"grad_norm": 1.2761338949203491, |
|
"learning_rate": 0.00022183759398496238, |
|
"loss": 0.3299, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.0825965656803804, |
|
"grad_norm": 0.9065299034118652, |
|
"learning_rate": 0.00022108571428571425, |
|
"loss": 0.3327, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.0825965656803804, |
|
"eval_loss": 0.5217949151992798, |
|
"eval_runtime": 145.0885, |
|
"eval_samples_per_second": 38.983, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.41557670395275315, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.1022437030924594, |
|
"grad_norm": 1.1222054958343506, |
|
"learning_rate": 0.00022033383458646615, |
|
"loss": 0.3347, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.1218908405045385, |
|
"grad_norm": 3.639472484588623, |
|
"learning_rate": 0.000219581954887218, |
|
"loss": 0.3251, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.1218908405045385, |
|
"eval_loss": 0.5330758094787598, |
|
"eval_runtime": 144.669, |
|
"eval_samples_per_second": 39.096, |
|
"eval_steps_per_second": 4.887, |
|
"eval_wer": 0.41357063760812696, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.1415379779166175, |
|
"grad_norm": 2.337876558303833, |
|
"learning_rate": 0.0002188300751879699, |
|
"loss": 0.3368, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.1611851153286965, |
|
"grad_norm": 0.8467469811439514, |
|
"learning_rate": 0.00021807819548872178, |
|
"loss": 0.3466, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.1611851153286965, |
|
"eval_loss": 0.5276508927345276, |
|
"eval_runtime": 145.3583, |
|
"eval_samples_per_second": 38.911, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.4141002391231083, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.1808322527407755, |
|
"grad_norm": 0.7859643697738647, |
|
"learning_rate": 0.00021732781954887217, |
|
"loss": 0.3337, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.2004793901528545, |
|
"grad_norm": 0.8069686889648438, |
|
"learning_rate": 0.00021657593984962404, |
|
"loss": 0.3259, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.2004793901528545, |
|
"eval_loss": 0.522844672203064, |
|
"eval_runtime": 145.284, |
|
"eval_samples_per_second": 38.931, |
|
"eval_steps_per_second": 4.866, |
|
"eval_wer": 0.40875607838102423, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.2201265275649336, |
|
"grad_norm": 0.5765830278396606, |
|
"learning_rate": 0.00021582406015037593, |
|
"loss": 0.337, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.239773664977013, |
|
"grad_norm": 0.9564582109451294, |
|
"learning_rate": 0.0002150736842105263, |
|
"loss": 0.3292, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.239773664977013, |
|
"eval_loss": 0.5119462013244629, |
|
"eval_runtime": 145.6008, |
|
"eval_samples_per_second": 38.846, |
|
"eval_steps_per_second": 4.856, |
|
"eval_wer": 0.4132657155237438, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.259420802389092, |
|
"grad_norm": 0.7495951056480408, |
|
"learning_rate": 0.00021432330827067666, |
|
"loss": 0.3259, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.279067939801171, |
|
"grad_norm": 0.825587272644043, |
|
"learning_rate": 0.00021357142857142855, |
|
"loss": 0.3323, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.279067939801171, |
|
"eval_loss": 0.5191282033920288, |
|
"eval_runtime": 145.5654, |
|
"eval_samples_per_second": 38.855, |
|
"eval_steps_per_second": 4.857, |
|
"eval_wer": 0.40739195326667843, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.29871507721325, |
|
"grad_norm": 0.9213058948516846, |
|
"learning_rate": 0.00021281954887218042, |
|
"loss": 0.3292, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.318362214625329, |
|
"grad_norm": 6.7399773597717285, |
|
"learning_rate": 0.00021206766917293232, |
|
"loss": 0.3228, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.318362214625329, |
|
"eval_loss": 0.5073339939117432, |
|
"eval_runtime": 145.4974, |
|
"eval_samples_per_second": 38.874, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.3955802346295197, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.338009352037408, |
|
"grad_norm": 0.9246654510498047, |
|
"learning_rate": 0.00021131578947368419, |
|
"loss": 0.3311, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.357656489449487, |
|
"grad_norm": 0.8129465579986572, |
|
"learning_rate": 0.00021056390977443608, |
|
"loss": 0.3172, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.357656489449487, |
|
"eval_loss": 0.5084324479103088, |
|
"eval_runtime": 145.2717, |
|
"eval_samples_per_second": 38.934, |
|
"eval_steps_per_second": 4.867, |
|
"eval_wer": 0.4045353147919308, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.377303626861566, |
|
"grad_norm": 0.6280909776687622, |
|
"learning_rate": 0.00020981203007518795, |
|
"loss": 0.3235, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.396950764273645, |
|
"grad_norm": 3.0157957077026367, |
|
"learning_rate": 0.00020906015037593984, |
|
"loss": 0.332, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.396950764273645, |
|
"eval_loss": 0.512955367565155, |
|
"eval_runtime": 145.4819, |
|
"eval_samples_per_second": 38.878, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.40151819100961306, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.4165979016857246, |
|
"grad_norm": 1.2728731632232666, |
|
"learning_rate": 0.0002083082706766917, |
|
"loss": 0.3298, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.4362450390978037, |
|
"grad_norm": 6.008030891418457, |
|
"learning_rate": 0.0002075563909774436, |
|
"loss": 0.3218, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.4362450390978037, |
|
"eval_loss": 0.5102687478065491, |
|
"eval_runtime": 145.1668, |
|
"eval_samples_per_second": 38.962, |
|
"eval_steps_per_second": 4.87, |
|
"eval_wer": 0.39972075556482806, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.4558921765098827, |
|
"grad_norm": 1.1027765274047852, |
|
"learning_rate": 0.00020680451127819547, |
|
"loss": 0.3207, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.4755393139219617, |
|
"grad_norm": 0.9439337849617004, |
|
"learning_rate": 0.00020605263157894737, |
|
"loss": 0.3317, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.4755393139219617, |
|
"eval_loss": 0.5019811391830444, |
|
"eval_runtime": 145.2988, |
|
"eval_samples_per_second": 38.927, |
|
"eval_steps_per_second": 4.866, |
|
"eval_wer": 0.40500072218388405, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.4951864513340407, |
|
"grad_norm": 1.4857794046401978, |
|
"learning_rate": 0.0002053007518796992, |
|
"loss": 0.3272, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.5148335887461197, |
|
"grad_norm": 0.9404523968696594, |
|
"learning_rate": 0.0002045488721804511, |
|
"loss": 0.3222, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.5148335887461197, |
|
"eval_loss": 0.5072047114372253, |
|
"eval_runtime": 145.1717, |
|
"eval_samples_per_second": 38.961, |
|
"eval_steps_per_second": 4.87, |
|
"eval_wer": 0.39964051291104297, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.5344807261581987, |
|
"grad_norm": 1.1841472387313843, |
|
"learning_rate": 0.00020379699248120297, |
|
"loss": 0.3261, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.5541278635702778, |
|
"grad_norm": 0.7141321301460266, |
|
"learning_rate": 0.0002030466165413534, |
|
"loss": 0.3138, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.5541278635702778, |
|
"eval_loss": 0.5098404884338379, |
|
"eval_runtime": 144.9074, |
|
"eval_samples_per_second": 39.032, |
|
"eval_steps_per_second": 4.879, |
|
"eval_wer": 0.40357240294651026, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.573775000982357, |
|
"grad_norm": 0.8695092797279358, |
|
"learning_rate": 0.00020229473684210523, |
|
"loss": 0.3198, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.593422138394436, |
|
"grad_norm": 1.1630802154541016, |
|
"learning_rate": 0.00020154285714285713, |
|
"loss": 0.3074, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.593422138394436, |
|
"eval_loss": 0.5026105046272278, |
|
"eval_runtime": 145.0532, |
|
"eval_samples_per_second": 38.993, |
|
"eval_steps_per_second": 4.874, |
|
"eval_wer": 0.3981159024891271, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.613069275806515, |
|
"grad_norm": 1.2439523935317993, |
|
"learning_rate": 0.000200790977443609, |
|
"loss": 0.3234, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.632716413218594, |
|
"grad_norm": 0.7216903567314148, |
|
"learning_rate": 0.0002000390977443609, |
|
"loss": 0.3261, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.632716413218594, |
|
"eval_loss": 0.5030384063720703, |
|
"eval_runtime": 145.8235, |
|
"eval_samples_per_second": 38.787, |
|
"eval_steps_per_second": 4.848, |
|
"eval_wer": 0.39349392563110847, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.652363550630673, |
|
"grad_norm": 1.0616713762283325, |
|
"learning_rate": 0.00019928721804511276, |
|
"loss": 0.3147, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.6720106880427523, |
|
"grad_norm": 10.39274787902832, |
|
"learning_rate": 0.00019853533834586465, |
|
"loss": 0.3257, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.6720106880427523, |
|
"eval_loss": 0.500296413898468, |
|
"eval_runtime": 144.7526, |
|
"eval_samples_per_second": 39.074, |
|
"eval_steps_per_second": 4.884, |
|
"eval_wer": 0.39028421947970665, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.6916578254548313, |
|
"grad_norm": 0.7911710739135742, |
|
"learning_rate": 0.00019778345864661652, |
|
"loss": 0.3274, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.7113049628669104, |
|
"grad_norm": 0.6936825513839722, |
|
"learning_rate": 0.00019703157894736842, |
|
"loss": 0.3179, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.7113049628669104, |
|
"eval_loss": 0.5139185786247253, |
|
"eval_runtime": 145.2074, |
|
"eval_samples_per_second": 38.951, |
|
"eval_steps_per_second": 4.869, |
|
"eval_wer": 0.4003947938566224, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.7309521002789894, |
|
"grad_norm": 0.5706244111061096, |
|
"learning_rate": 0.00019627969924812028, |
|
"loss": 0.3147, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.7505992376910684, |
|
"grad_norm": 0.9982422590255737, |
|
"learning_rate": 0.00019552932330827065, |
|
"loss": 0.3154, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.7505992376910684, |
|
"eval_loss": 0.5041365027427673, |
|
"eval_runtime": 144.6141, |
|
"eval_samples_per_second": 39.111, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.39455312866107106, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.7702463751031474, |
|
"grad_norm": 0.6709697842597961, |
|
"learning_rate": 0.00019477894736842104, |
|
"loss": 0.3116, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.7898935125152264, |
|
"grad_norm": 1.2155810594558716, |
|
"learning_rate": 0.0001940270676691729, |
|
"loss": 0.3119, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.7898935125152264, |
|
"eval_loss": 0.49135103821754456, |
|
"eval_runtime": 144.8705, |
|
"eval_samples_per_second": 39.042, |
|
"eval_steps_per_second": 4.88, |
|
"eval_wer": 0.3940877212691178, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.8095406499273055, |
|
"grad_norm": 9.424253463745117, |
|
"learning_rate": 0.0001932766917293233, |
|
"loss": 0.3177, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.829187787339385, |
|
"grad_norm": 0.6820365786552429, |
|
"learning_rate": 0.00019252481203007517, |
|
"loss": 0.3128, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.829187787339385, |
|
"eval_loss": 0.4867289066314697, |
|
"eval_runtime": 144.797, |
|
"eval_samples_per_second": 39.062, |
|
"eval_steps_per_second": 4.883, |
|
"eval_wer": 0.38309447770056654, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.848834924751464, |
|
"grad_norm": 0.6361156702041626, |
|
"learning_rate": 0.00019177443609022553, |
|
"loss": 0.3127, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.868482062163543, |
|
"grad_norm": 1.142830491065979, |
|
"learning_rate": 0.00019102255639097743, |
|
"loss": 0.3105, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.868482062163543, |
|
"eval_loss": 0.4870510995388031, |
|
"eval_runtime": 145.1289, |
|
"eval_samples_per_second": 38.972, |
|
"eval_steps_per_second": 4.872, |
|
"eval_wer": 0.3817945467092488, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.888129199575622, |
|
"grad_norm": 0.8262931704521179, |
|
"learning_rate": 0.0001902706766917293, |
|
"loss": 0.3234, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.907776336987701, |
|
"grad_norm": 1.1251935958862305, |
|
"learning_rate": 0.0001895187969924812, |
|
"loss": 0.309, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.907776336987701, |
|
"eval_loss": 0.48873621225357056, |
|
"eval_runtime": 144.4047, |
|
"eval_samples_per_second": 39.168, |
|
"eval_steps_per_second": 4.896, |
|
"eval_wer": 0.39885413490394955, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.92742347439978, |
|
"grad_norm": 0.825520396232605, |
|
"learning_rate": 0.00018876691729323306, |
|
"loss": 0.3204, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.947070611811859, |
|
"grad_norm": 0.5379465818405151, |
|
"learning_rate": 0.00018801654135338345, |
|
"loss": 0.3073, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.947070611811859, |
|
"eval_loss": 0.48394420742988586, |
|
"eval_runtime": 145.2237, |
|
"eval_samples_per_second": 38.947, |
|
"eval_steps_per_second": 4.868, |
|
"eval_wer": 0.3903644621334917, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.966717749223938, |
|
"grad_norm": 0.4377336800098419, |
|
"learning_rate": 0.00018726466165413532, |
|
"loss": 0.3044, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.986364886636017, |
|
"grad_norm": 0.8088381290435791, |
|
"learning_rate": 0.0001865127819548872, |
|
"loss": 0.3023, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.986364886636017, |
|
"eval_loss": 0.48391589522361755, |
|
"eval_runtime": 145.3642, |
|
"eval_samples_per_second": 38.909, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.38049461571793103, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.006012024048096, |
|
"grad_norm": 0.6517421007156372, |
|
"learning_rate": 0.00018576090225563908, |
|
"loss": 0.3049, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 3.025659161460175, |
|
"grad_norm": 1.4399667978286743, |
|
"learning_rate": 0.00018500902255639098, |
|
"loss": 0.2715, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 3.025659161460175, |
|
"eval_loss": 0.48158180713653564, |
|
"eval_runtime": 147.38, |
|
"eval_samples_per_second": 38.377, |
|
"eval_steps_per_second": 4.797, |
|
"eval_wer": 0.38766830896631416, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 3.045306298872254, |
|
"grad_norm": 1.0456621646881104, |
|
"learning_rate": 0.00018425714285714284, |
|
"loss": 0.2762, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 3.064953436284333, |
|
"grad_norm": 0.6409999132156372, |
|
"learning_rate": 0.00018350526315789474, |
|
"loss": 0.2565, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.064953436284333, |
|
"eval_loss": 0.49935096502304077, |
|
"eval_runtime": 144.0075, |
|
"eval_samples_per_second": 39.276, |
|
"eval_steps_per_second": 4.909, |
|
"eval_wer": 0.3811044598866974, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.0846005736964126, |
|
"grad_norm": 0.8721100687980652, |
|
"learning_rate": 0.0001827548872180451, |
|
"loss": 0.2681, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 3.1042477111084916, |
|
"grad_norm": 1.0572487115859985, |
|
"learning_rate": 0.00018200300751879697, |
|
"loss": 0.2697, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.1042477111084916, |
|
"eval_loss": 0.48027363419532776, |
|
"eval_runtime": 143.6893, |
|
"eval_samples_per_second": 39.363, |
|
"eval_steps_per_second": 4.92, |
|
"eval_wer": 0.3813291393172955, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.1238948485205706, |
|
"grad_norm": 0.5640320777893066, |
|
"learning_rate": 0.00018125112781954887, |
|
"loss": 0.274, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 3.1435419859326497, |
|
"grad_norm": 0.740835964679718, |
|
"learning_rate": 0.00018049924812030073, |
|
"loss": 0.2717, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.1435419859326497, |
|
"eval_loss": 0.48425012826919556, |
|
"eval_runtime": 144.7996, |
|
"eval_samples_per_second": 39.061, |
|
"eval_steps_per_second": 4.883, |
|
"eval_wer": 0.37988477154916467, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.1631891233447287, |
|
"grad_norm": 0.4206051528453827, |
|
"learning_rate": 0.00017974736842105263, |
|
"loss": 0.2751, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 3.1828362607568077, |
|
"grad_norm": 1.7560110092163086, |
|
"learning_rate": 0.00017899548872180447, |
|
"loss": 0.2738, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.1828362607568077, |
|
"eval_loss": 0.4904831647872925, |
|
"eval_runtime": 145.8617, |
|
"eval_samples_per_second": 38.776, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.37967614064932353, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.2024833981688867, |
|
"grad_norm": 0.5435498952865601, |
|
"learning_rate": 0.00017824360902255637, |
|
"loss": 0.2671, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 3.2221305355809657, |
|
"grad_norm": 0.8769587278366089, |
|
"learning_rate": 0.00017749323308270673, |
|
"loss": 0.2617, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.2221305355809657, |
|
"eval_loss": 0.4753645956516266, |
|
"eval_runtime": 144.9825, |
|
"eval_samples_per_second": 39.012, |
|
"eval_steps_per_second": 4.876, |
|
"eval_wer": 0.37282341801608065, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.2417776729930448, |
|
"grad_norm": 0.5920813083648682, |
|
"learning_rate": 0.00017674135338345865, |
|
"loss": 0.27, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 3.261424810405124, |
|
"grad_norm": 0.4276420474052429, |
|
"learning_rate": 0.0001759894736842105, |
|
"loss": 0.2634, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.261424810405124, |
|
"eval_loss": 0.4729759693145752, |
|
"eval_runtime": 144.8389, |
|
"eval_samples_per_second": 39.05, |
|
"eval_steps_per_second": 4.881, |
|
"eval_wer": 0.3668052189822022, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.2810719478172032, |
|
"grad_norm": 2.3023736476898193, |
|
"learning_rate": 0.0001752375939849624, |
|
"loss": 0.2771, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 3.3007190852292823, |
|
"grad_norm": 1.1624869108200073, |
|
"learning_rate": 0.00017448571428571426, |
|
"loss": 0.2648, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.3007190852292823, |
|
"eval_loss": 0.4768010377883911, |
|
"eval_runtime": 144.7404, |
|
"eval_samples_per_second": 39.077, |
|
"eval_steps_per_second": 4.885, |
|
"eval_wer": 0.3690520132881835, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.3203662226413613, |
|
"grad_norm": 0.64561527967453, |
|
"learning_rate": 0.00017373383458646615, |
|
"loss": 0.2745, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 3.3400133600534403, |
|
"grad_norm": 0.5857324600219727, |
|
"learning_rate": 0.00017298195488721802, |
|
"loss": 0.2567, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.3400133600534403, |
|
"eval_loss": 0.4812460243701935, |
|
"eval_runtime": 145.4739, |
|
"eval_samples_per_second": 38.88, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.37410730047664137, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.3596604974655193, |
|
"grad_norm": 0.97500079870224, |
|
"learning_rate": 0.00017223007518796991, |
|
"loss": 0.2686, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 3.3793076348775983, |
|
"grad_norm": 0.6413397789001465, |
|
"learning_rate": 0.00017147969924812028, |
|
"loss": 0.2687, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.3793076348775983, |
|
"eval_loss": 0.46830272674560547, |
|
"eval_runtime": 144.9613, |
|
"eval_samples_per_second": 39.017, |
|
"eval_steps_per_second": 4.877, |
|
"eval_wer": 0.37160372967854793, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.3989547722896774, |
|
"grad_norm": 1.0018800497055054, |
|
"learning_rate": 0.00017072781954887217, |
|
"loss": 0.2595, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 3.4186019097017564, |
|
"grad_norm": 1.0536398887634277, |
|
"learning_rate": 0.00016997593984962404, |
|
"loss": 0.2757, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.4186019097017564, |
|
"eval_loss": 0.46901389956474304, |
|
"eval_runtime": 144.4366, |
|
"eval_samples_per_second": 39.159, |
|
"eval_steps_per_second": 4.895, |
|
"eval_wer": 0.37320858275424884, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.4382490471138354, |
|
"grad_norm": 0.4486633837223053, |
|
"learning_rate": 0.00016922406015037594, |
|
"loss": 0.2655, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 3.4578961845259144, |
|
"grad_norm": 0.6999643445014954, |
|
"learning_rate": 0.0001684721804511278, |
|
"loss": 0.2596, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.4578961845259144, |
|
"eval_loss": 0.47534072399139404, |
|
"eval_runtime": 145.0031, |
|
"eval_samples_per_second": 39.006, |
|
"eval_steps_per_second": 4.876, |
|
"eval_wer": 0.37824782141194974, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.4775433219379934, |
|
"grad_norm": 0.9858837723731995, |
|
"learning_rate": 0.0001677203007518797, |
|
"loss": 0.2614, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 3.497190459350073, |
|
"grad_norm": 0.5992431640625, |
|
"learning_rate": 0.00016696842105263157, |
|
"loss": 0.2589, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.497190459350073, |
|
"eval_loss": 0.4645041823387146, |
|
"eval_runtime": 146.1245, |
|
"eval_samples_per_second": 38.707, |
|
"eval_steps_per_second": 4.838, |
|
"eval_wer": 0.3691483044727255, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.516837596762152, |
|
"grad_norm": 2.5524730682373047, |
|
"learning_rate": 0.00016621804511278193, |
|
"loss": 0.2724, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 3.536484734174231, |
|
"grad_norm": 0.42577388882637024, |
|
"learning_rate": 0.00016546616541353383, |
|
"loss": 0.2627, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.536484734174231, |
|
"eval_loss": 0.4689880907535553, |
|
"eval_runtime": 146.114, |
|
"eval_samples_per_second": 38.71, |
|
"eval_steps_per_second": 4.839, |
|
"eval_wer": 0.3675274028662676, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.55613187158631, |
|
"grad_norm": 0.530576765537262, |
|
"learning_rate": 0.0001647142857142857, |
|
"loss": 0.2692, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 3.575779008998389, |
|
"grad_norm": 1.5638034343719482, |
|
"learning_rate": 0.0001639624060150376, |
|
"loss": 0.2804, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.575779008998389, |
|
"eval_loss": 0.46749356389045715, |
|
"eval_runtime": 146.7363, |
|
"eval_samples_per_second": 38.545, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.37420359166118344, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.595426146410468, |
|
"grad_norm": 0.7981226444244385, |
|
"learning_rate": 0.00016321052631578946, |
|
"loss": 0.2658, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 3.615073283822547, |
|
"grad_norm": 0.4092627167701721, |
|
"learning_rate": 0.00016245864661654135, |
|
"loss": 0.2587, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.615073283822547, |
|
"eval_loss": 0.46739462018013, |
|
"eval_runtime": 145.0739, |
|
"eval_samples_per_second": 38.987, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.3593747492417069, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.634720421234626, |
|
"grad_norm": 0.4350492060184479, |
|
"learning_rate": 0.0001617067669172932, |
|
"loss": 0.2664, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 3.654367558646705, |
|
"grad_norm": 0.7081959247589111, |
|
"learning_rate": 0.00016095488721804512, |
|
"loss": 0.2615, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.654367558646705, |
|
"eval_loss": 0.46574193239212036, |
|
"eval_runtime": 145.5779, |
|
"eval_samples_per_second": 38.852, |
|
"eval_steps_per_second": 4.857, |
|
"eval_wer": 0.36321034809263214, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.6740146960587845, |
|
"grad_norm": 1.1376652717590332, |
|
"learning_rate": 0.00016020300751879696, |
|
"loss": 0.2664, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 3.6936618334708635, |
|
"grad_norm": 0.8354430794715881, |
|
"learning_rate": 0.00015945263157894738, |
|
"loss": 0.2531, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.6936618334708635, |
|
"eval_loss": 0.45889467000961304, |
|
"eval_runtime": 145.207, |
|
"eval_samples_per_second": 38.951, |
|
"eval_steps_per_second": 4.869, |
|
"eval_wer": 0.3668373160437162, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.7133089708829425, |
|
"grad_norm": 0.7989226579666138, |
|
"learning_rate": 0.00015870075187969922, |
|
"loss": 0.2621, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 3.7329561082950216, |
|
"grad_norm": 1.2648522853851318, |
|
"learning_rate": 0.00015794887218045114, |
|
"loss": 0.2466, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.7329561082950216, |
|
"eval_loss": 0.46178776025772095, |
|
"eval_runtime": 145.1044, |
|
"eval_samples_per_second": 38.979, |
|
"eval_steps_per_second": 4.872, |
|
"eval_wer": 0.3691001588804545, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.7526032457071006, |
|
"grad_norm": 0.6409050226211548, |
|
"learning_rate": 0.00015719699248120298, |
|
"loss": 0.2732, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 3.7722503831191796, |
|
"grad_norm": 0.8056377172470093, |
|
"learning_rate": 0.00015644511278195487, |
|
"loss": 0.2653, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.7722503831191796, |
|
"eval_loss": 0.46144935488700867, |
|
"eval_runtime": 145.0964, |
|
"eval_samples_per_second": 38.981, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.3774774919356133, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.7918975205312586, |
|
"grad_norm": 0.6420221924781799, |
|
"learning_rate": 0.00015569473684210524, |
|
"loss": 0.267, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 3.8115446579433376, |
|
"grad_norm": 1.7600951194763184, |
|
"learning_rate": 0.0001549428571428571, |
|
"loss": 0.2542, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.8115446579433376, |
|
"eval_loss": 0.4600285291671753, |
|
"eval_runtime": 145.9311, |
|
"eval_samples_per_second": 38.758, |
|
"eval_steps_per_second": 4.845, |
|
"eval_wer": 0.3726308356469965, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.8311917953554167, |
|
"grad_norm": 7.725172519683838, |
|
"learning_rate": 0.000154190977443609, |
|
"loss": 0.2648, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 3.8508389327674957, |
|
"grad_norm": 0.9012848734855652, |
|
"learning_rate": 0.00015343909774436087, |
|
"loss": 0.2616, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.8508389327674957, |
|
"eval_loss": 0.4511352777481079, |
|
"eval_runtime": 146.3391, |
|
"eval_samples_per_second": 38.65, |
|
"eval_steps_per_second": 4.831, |
|
"eval_wer": 0.3660348895058657, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.8704860701795747, |
|
"grad_norm": 0.4818692207336426, |
|
"learning_rate": 0.00015268721804511276, |
|
"loss": 0.2429, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 3.8901332075916537, |
|
"grad_norm": 1.495732307434082, |
|
"learning_rate": 0.00015193533834586463, |
|
"loss": 0.2625, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.8901332075916537, |
|
"eval_loss": 0.4607318639755249, |
|
"eval_runtime": 145.4992, |
|
"eval_samples_per_second": 38.873, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.36436584230713676, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.9097803450037327, |
|
"grad_norm": 0.5369844436645508, |
|
"learning_rate": 0.00015118345864661653, |
|
"loss": 0.2693, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 3.9294274824158117, |
|
"grad_norm": 2.374652862548828, |
|
"learning_rate": 0.0001504315789473684, |
|
"loss": 0.2627, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 3.9294274824158117, |
|
"eval_loss": 0.4455793499946594, |
|
"eval_runtime": 146.0128, |
|
"eval_samples_per_second": 38.736, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.36256840686235176, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 3.949074619827891, |
|
"grad_norm": 1.0198256969451904, |
|
"learning_rate": 0.0001496812030075188, |
|
"loss": 0.2569, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 3.9687217572399702, |
|
"grad_norm": 0.7093910574913025, |
|
"learning_rate": 0.00014892932330827068, |
|
"loss": 0.252, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 3.9687217572399702, |
|
"eval_loss": 0.4579247534275055, |
|
"eval_runtime": 145.1833, |
|
"eval_samples_per_second": 38.958, |
|
"eval_steps_per_second": 4.87, |
|
"eval_wer": 0.36651634542857603, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 3.9883688946520492, |
|
"grad_norm": 0.7897918820381165, |
|
"learning_rate": 0.00014817744360902255, |
|
"loss": 0.2528, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 4.008016032064128, |
|
"grad_norm": 0.773681640625, |
|
"learning_rate": 0.00014742556390977442, |
|
"loss": 0.2489, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 4.008016032064128, |
|
"eval_loss": 0.45104366540908813, |
|
"eval_runtime": 145.9991, |
|
"eval_samples_per_second": 38.74, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.36203880534737043, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 4.027663169476208, |
|
"grad_norm": 0.6559247970581055, |
|
"learning_rate": 0.0001466736842105263, |
|
"loss": 0.222, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 4.047310306888287, |
|
"grad_norm": 1.860120415687561, |
|
"learning_rate": 0.00014592481203007517, |
|
"loss": 0.2218, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 4.047310306888287, |
|
"eval_loss": 0.4418700039386749, |
|
"eval_runtime": 149.8176, |
|
"eval_samples_per_second": 37.753, |
|
"eval_steps_per_second": 4.719, |
|
"eval_wer": 0.35350098698464155, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 4.066957444300366, |
|
"grad_norm": 0.8769797682762146, |
|
"learning_rate": 0.00014517293233082707, |
|
"loss": 0.2218, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 4.086604581712445, |
|
"grad_norm": 1.1328709125518799, |
|
"learning_rate": 0.00014442105263157894, |
|
"loss": 0.2211, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 4.086604581712445, |
|
"eval_loss": 0.449856162071228, |
|
"eval_runtime": 144.4159, |
|
"eval_samples_per_second": 39.165, |
|
"eval_steps_per_second": 4.896, |
|
"eval_wer": 0.3771404727897161, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 4.106251719124524, |
|
"grad_norm": 0.8746039271354675, |
|
"learning_rate": 0.0001436706766917293, |
|
"loss": 0.2188, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 4.125898856536603, |
|
"grad_norm": 0.55832839012146, |
|
"learning_rate": 0.0001429203007518797, |
|
"loss": 0.2186, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.125898856536603, |
|
"eval_loss": 0.4546278417110443, |
|
"eval_runtime": 144.5427, |
|
"eval_samples_per_second": 39.13, |
|
"eval_steps_per_second": 4.891, |
|
"eval_wer": 0.36601884097510873, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.145545993948682, |
|
"grad_norm": 0.7782666087150574, |
|
"learning_rate": 0.00014216842105263156, |
|
"loss": 0.2184, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 4.165193131360761, |
|
"grad_norm": 0.768484890460968, |
|
"learning_rate": 0.00014141654135338346, |
|
"loss": 0.2199, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 4.165193131360761, |
|
"eval_loss": 0.4395730495452881, |
|
"eval_runtime": 144.834, |
|
"eval_samples_per_second": 39.052, |
|
"eval_steps_per_second": 4.881, |
|
"eval_wer": 0.35423921939946396, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 4.18484026877284, |
|
"grad_norm": 0.5472589135169983, |
|
"learning_rate": 0.00014066466165413532, |
|
"loss": 0.2206, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 4.204487406184919, |
|
"grad_norm": 0.4021967947483063, |
|
"learning_rate": 0.0001399127819548872, |
|
"loss": 0.2227, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 4.204487406184919, |
|
"eval_loss": 0.4468631446361542, |
|
"eval_runtime": 144.5271, |
|
"eval_samples_per_second": 39.135, |
|
"eval_steps_per_second": 4.892, |
|
"eval_wer": 0.35748102261237985, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 4.224134543596998, |
|
"grad_norm": 1.0301532745361328, |
|
"learning_rate": 0.0001391609022556391, |
|
"loss": 0.2292, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 4.243781681009077, |
|
"grad_norm": 0.6561172008514404, |
|
"learning_rate": 0.00013840902255639095, |
|
"loss": 0.2212, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 4.243781681009077, |
|
"eval_loss": 0.44032466411590576, |
|
"eval_runtime": 144.5017, |
|
"eval_samples_per_second": 39.141, |
|
"eval_steps_per_second": 4.893, |
|
"eval_wer": 0.3500826499333986, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 4.263428818421156, |
|
"grad_norm": 0.7782973647117615, |
|
"learning_rate": 0.00013765714285714285, |
|
"loss": 0.218, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 4.283075955833235, |
|
"grad_norm": 0.5677826404571533, |
|
"learning_rate": 0.00013690526315789472, |
|
"loss": 0.2182, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 4.283075955833235, |
|
"eval_loss": 0.4507006108760834, |
|
"eval_runtime": 144.3123, |
|
"eval_samples_per_second": 39.193, |
|
"eval_steps_per_second": 4.899, |
|
"eval_wer": 0.3599364478182022, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 4.302723093245314, |
|
"grad_norm": 0.48135581612586975, |
|
"learning_rate": 0.0001361533834586466, |
|
"loss": 0.2191, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 4.322370230657393, |
|
"grad_norm": 0.686140775680542, |
|
"learning_rate": 0.00013540150375939848, |
|
"loss": 0.2212, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 4.322370230657393, |
|
"eval_loss": 0.4435155391693115, |
|
"eval_runtime": 144.5051, |
|
"eval_samples_per_second": 39.14, |
|
"eval_steps_per_second": 4.893, |
|
"eval_wer": 0.3575612652661649, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 4.342017368069472, |
|
"grad_norm": 2.3186769485473633, |
|
"learning_rate": 0.00013464962406015038, |
|
"loss": 0.2213, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 4.361664505481551, |
|
"grad_norm": 2.254951238632202, |
|
"learning_rate": 0.00013389774436090224, |
|
"loss": 0.2211, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 4.361664505481551, |
|
"eval_loss": 0.45138731598854065, |
|
"eval_runtime": 144.5221, |
|
"eval_samples_per_second": 39.136, |
|
"eval_steps_per_second": 4.892, |
|
"eval_wer": 0.36893967357288443, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 4.38131164289363, |
|
"grad_norm": 0.5208560228347778, |
|
"learning_rate": 0.0001331458646616541, |
|
"loss": 0.2042, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 4.400958780305709, |
|
"grad_norm": 0.7651325464248657, |
|
"learning_rate": 0.0001323954887218045, |
|
"loss": 0.2116, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 4.400958780305709, |
|
"eval_loss": 0.44426095485687256, |
|
"eval_runtime": 144.753, |
|
"eval_samples_per_second": 39.073, |
|
"eval_steps_per_second": 4.884, |
|
"eval_wer": 0.35077273675595, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 4.420605917717788, |
|
"grad_norm": 0.7976289987564087, |
|
"learning_rate": 0.00013164360902255637, |
|
"loss": 0.2213, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 4.440253055129867, |
|
"grad_norm": 0.7153854966163635, |
|
"learning_rate": 0.00013089172932330827, |
|
"loss": 0.2218, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 4.440253055129867, |
|
"eval_loss": 0.44099488854408264, |
|
"eval_runtime": 145.5781, |
|
"eval_samples_per_second": 38.852, |
|
"eval_steps_per_second": 4.856, |
|
"eval_wer": 0.3471618173356229, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 4.459900192541947, |
|
"grad_norm": 0.8848706483840942, |
|
"learning_rate": 0.00013014135338345863, |
|
"loss": 0.2213, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 4.479547329954026, |
|
"grad_norm": 0.590100109577179, |
|
"learning_rate": 0.0001293894736842105, |
|
"loss": 0.2152, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 4.479547329954026, |
|
"eval_loss": 0.446841299533844, |
|
"eval_runtime": 146.3889, |
|
"eval_samples_per_second": 38.637, |
|
"eval_steps_per_second": 4.83, |
|
"eval_wer": 0.35348493845388457, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 4.499194467366105, |
|
"grad_norm": 2.4068264961242676, |
|
"learning_rate": 0.0001286375939849624, |
|
"loss": 0.2149, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 4.518841604778184, |
|
"grad_norm": 0.6972984671592712, |
|
"learning_rate": 0.00012788571428571426, |
|
"loss": 0.2174, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 4.518841604778184, |
|
"eval_loss": 0.4498594105243683, |
|
"eval_runtime": 145.1426, |
|
"eval_samples_per_second": 38.969, |
|
"eval_steps_per_second": 4.871, |
|
"eval_wer": 0.3469692349665388, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 4.538488742190263, |
|
"grad_norm": 0.6739790439605713, |
|
"learning_rate": 0.00012713383458646616, |
|
"loss": 0.2148, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 4.558135879602342, |
|
"grad_norm": 4.946841716766357, |
|
"learning_rate": 0.00012638195488721802, |
|
"loss": 0.212, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 4.558135879602342, |
|
"eval_loss": 0.4453933835029602, |
|
"eval_runtime": 145.1072, |
|
"eval_samples_per_second": 38.978, |
|
"eval_steps_per_second": 4.872, |
|
"eval_wer": 0.34401630530724914, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 4.577783017014421, |
|
"grad_norm": 0.5079777240753174, |
|
"learning_rate": 0.00012563007518796992, |
|
"loss": 0.2097, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 4.5974301544265, |
|
"grad_norm": 1.189431071281433, |
|
"learning_rate": 0.0001248781954887218, |
|
"loss": 0.2039, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 4.5974301544265, |
|
"eval_loss": 0.4423506259918213, |
|
"eval_runtime": 144.2129, |
|
"eval_samples_per_second": 39.22, |
|
"eval_steps_per_second": 4.902, |
|
"eval_wer": 0.34892715571889393, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 4.617077291838579, |
|
"grad_norm": 0.5739697813987732, |
|
"learning_rate": 0.00012412781954887218, |
|
"loss": 0.2137, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 4.636724429250658, |
|
"grad_norm": 1.8628792762756348, |
|
"learning_rate": 0.00012337593984962405, |
|
"loss": 0.2073, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 4.636724429250658, |
|
"eval_loss": 0.44371461868286133, |
|
"eval_runtime": 144.8897, |
|
"eval_samples_per_second": 39.037, |
|
"eval_steps_per_second": 4.88, |
|
"eval_wer": 0.3466161672898846, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 4.656371566662737, |
|
"grad_norm": 0.6919093728065491, |
|
"learning_rate": 0.00012262406015037594, |
|
"loss": 0.2111, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 4.676018704074816, |
|
"grad_norm": 0.6628223061561584, |
|
"learning_rate": 0.00012187218045112781, |
|
"loss": 0.2177, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 4.676018704074816, |
|
"eval_loss": 0.43920648097991943, |
|
"eval_runtime": 144.6466, |
|
"eval_samples_per_second": 39.102, |
|
"eval_steps_per_second": 4.888, |
|
"eval_wer": 0.34218677280095006, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 4.695665841486895, |
|
"grad_norm": 0.7294492721557617, |
|
"learning_rate": 0.00012112030075187969, |
|
"loss": 0.2154, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 4.715312978898974, |
|
"grad_norm": 1.2088764905929565, |
|
"learning_rate": 0.00012036842105263157, |
|
"loss": 0.2121, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.715312978898974, |
|
"eval_loss": 0.44427990913391113, |
|
"eval_runtime": 144.8984, |
|
"eval_samples_per_second": 39.034, |
|
"eval_steps_per_second": 4.879, |
|
"eval_wer": 0.34372743175362297, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.734960116311053, |
|
"grad_norm": 0.3588174283504486, |
|
"learning_rate": 0.00011961654135338345, |
|
"loss": 0.2103, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 4.754607253723132, |
|
"grad_norm": 0.5091924667358398, |
|
"learning_rate": 0.00011886466165413532, |
|
"loss": 0.2072, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 4.754607253723132, |
|
"eval_loss": 0.42684319615364075, |
|
"eval_runtime": 143.2476, |
|
"eval_samples_per_second": 39.484, |
|
"eval_steps_per_second": 4.936, |
|
"eval_wer": 0.34615075989793137, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 4.774254391135211, |
|
"grad_norm": 0.49059540033340454, |
|
"learning_rate": 0.0001181127819548872, |
|
"loss": 0.204, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 4.79390152854729, |
|
"grad_norm": 0.3562159836292267, |
|
"learning_rate": 0.00011736090225563909, |
|
"loss": 0.2138, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 4.79390152854729, |
|
"eval_loss": 0.4271770417690277, |
|
"eval_runtime": 142.8263, |
|
"eval_samples_per_second": 39.601, |
|
"eval_steps_per_second": 4.95, |
|
"eval_wer": 0.34318178170788466, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 4.813548665959369, |
|
"grad_norm": 1.027219295501709, |
|
"learning_rate": 0.00011660902255639097, |
|
"loss": 0.1947, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 4.833195803371449, |
|
"grad_norm": 0.5677986145019531, |
|
"learning_rate": 0.00011585714285714285, |
|
"loss": 0.2145, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 4.833195803371449, |
|
"eval_loss": 0.43315112590789795, |
|
"eval_runtime": 143.3445, |
|
"eval_samples_per_second": 39.457, |
|
"eval_steps_per_second": 4.932, |
|
"eval_wer": 0.3453964789523519, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 4.852842940783528, |
|
"grad_norm": 0.7301272749900818, |
|
"learning_rate": 0.00011510676691729323, |
|
"loss": 0.2019, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 4.872490078195607, |
|
"grad_norm": 16.804716110229492, |
|
"learning_rate": 0.0001143578947368421, |
|
"loss": 0.2217, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 4.872490078195607, |
|
"eval_loss": 0.42095693945884705, |
|
"eval_runtime": 143.7819, |
|
"eval_samples_per_second": 39.337, |
|
"eval_steps_per_second": 4.917, |
|
"eval_wer": 0.3391215034263613, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 4.892137215607686, |
|
"grad_norm": 0.4827280640602112, |
|
"learning_rate": 0.00011360601503759398, |
|
"loss": 0.1994, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 4.911784353019765, |
|
"grad_norm": 0.6648825407028198, |
|
"learning_rate": 0.00011285413533834586, |
|
"loss": 0.2069, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 4.911784353019765, |
|
"eval_loss": 0.427772581577301, |
|
"eval_runtime": 144.7524, |
|
"eval_samples_per_second": 39.074, |
|
"eval_steps_per_second": 4.884, |
|
"eval_wer": 0.3376289900659595, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 4.931431490431844, |
|
"grad_norm": 0.3194764256477356, |
|
"learning_rate": 0.00011210225563909773, |
|
"loss": 0.1946, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 4.951078627843923, |
|
"grad_norm": 0.9185254573822021, |
|
"learning_rate": 0.00011135187969924811, |
|
"loss": 0.2068, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 4.951078627843923, |
|
"eval_loss": 0.4216279685497284, |
|
"eval_runtime": 143.9237, |
|
"eval_samples_per_second": 39.299, |
|
"eval_steps_per_second": 4.912, |
|
"eval_wer": 0.33867214456516503, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 4.970725765256002, |
|
"grad_norm": 0.4608317017555237, |
|
"learning_rate": 0.00011059999999999998, |
|
"loss": 0.2098, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 4.990372902668081, |
|
"grad_norm": 0.7766122221946716, |
|
"learning_rate": 0.00010984812030075186, |
|
"loss": 0.2129, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 4.990372902668081, |
|
"eval_loss": 0.42103302478790283, |
|
"eval_runtime": 144.3261, |
|
"eval_samples_per_second": 39.189, |
|
"eval_steps_per_second": 4.899, |
|
"eval_wer": 0.3361525252363146, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 5.01002004008016, |
|
"grad_norm": 0.7110891342163086, |
|
"learning_rate": 0.00010909624060150374, |
|
"loss": 0.1932, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 5.0296671774922395, |
|
"grad_norm": 0.5839011073112488, |
|
"learning_rate": 0.00010834436090225562, |
|
"loss": 0.1774, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 5.0296671774922395, |
|
"eval_loss": 0.4340197741985321, |
|
"eval_runtime": 144.3949, |
|
"eval_samples_per_second": 39.17, |
|
"eval_steps_per_second": 4.896, |
|
"eval_wer": 0.3303590056330343, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 5.0493143149043185, |
|
"grad_norm": 0.4871758222579956, |
|
"learning_rate": 0.000107593984962406, |
|
"loss": 0.1764, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 5.0689614523163975, |
|
"grad_norm": 1.1092002391815186, |
|
"learning_rate": 0.00010684210526315788, |
|
"loss": 0.1705, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 5.0689614523163975, |
|
"eval_loss": 0.44219356775283813, |
|
"eval_runtime": 144.6388, |
|
"eval_samples_per_second": 39.104, |
|
"eval_steps_per_second": 4.888, |
|
"eval_wer": 0.329877549710324, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 5.0886085897284765, |
|
"grad_norm": 1.4170928001403809, |
|
"learning_rate": 0.00010609022556390976, |
|
"loss": 0.1799, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 5.1082557271405555, |
|
"grad_norm": 0.5609749555587769, |
|
"learning_rate": 0.00010533834586466164, |
|
"loss": 0.1746, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 5.1082557271405555, |
|
"eval_loss": 0.43062400817871094, |
|
"eval_runtime": 144.8052, |
|
"eval_samples_per_second": 39.059, |
|
"eval_steps_per_second": 4.882, |
|
"eval_wer": 0.3363451076053987, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 5.1279028645526346, |
|
"grad_norm": 0.7241942882537842, |
|
"learning_rate": 0.00010458646616541353, |
|
"loss": 0.1719, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 5.147550001964714, |
|
"grad_norm": 7.793860912322998, |
|
"learning_rate": 0.00010383458646616541, |
|
"loss": 0.1813, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 5.147550001964714, |
|
"eval_loss": 0.41806095838546753, |
|
"eval_runtime": 144.8895, |
|
"eval_samples_per_second": 39.037, |
|
"eval_steps_per_second": 4.88, |
|
"eval_wer": 0.33342427500762306, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 5.167197139376793, |
|
"grad_norm": 0.5914771556854248, |
|
"learning_rate": 0.00010308270676691729, |
|
"loss": 0.1799, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 5.186844276788872, |
|
"grad_norm": 1.6738320589065552, |
|
"learning_rate": 0.00010233082706766916, |
|
"loss": 0.1729, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 5.186844276788872, |
|
"eval_loss": 0.4319230020046234, |
|
"eval_runtime": 144.7317, |
|
"eval_samples_per_second": 39.079, |
|
"eval_steps_per_second": 4.885, |
|
"eval_wer": 0.336858660589623, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 5.206491414200951, |
|
"grad_norm": 0.6387330889701843, |
|
"learning_rate": 0.00010157894736842104, |
|
"loss": 0.1682, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 5.22613855161303, |
|
"grad_norm": 0.5514143705368042, |
|
"learning_rate": 0.00010082706766917292, |
|
"loss": 0.1777, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 5.22613855161303, |
|
"eval_loss": 0.4189823567867279, |
|
"eval_runtime": 145.1159, |
|
"eval_samples_per_second": 38.976, |
|
"eval_steps_per_second": 4.872, |
|
"eval_wer": 0.33265394553128663, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 5.245785689025109, |
|
"grad_norm": 0.49433717131614685, |
|
"learning_rate": 0.00010007669172932331, |
|
"loss": 0.1757, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 5.265432826437188, |
|
"grad_norm": 14.663381576538086, |
|
"learning_rate": 9.932481203007518e-05, |
|
"loss": 0.18, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 5.265432826437188, |
|
"eval_loss": 0.42281797528266907, |
|
"eval_runtime": 145.0781, |
|
"eval_samples_per_second": 38.986, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.33376129415352024, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 5.285079963849267, |
|
"grad_norm": 0.3960479497909546, |
|
"learning_rate": 9.857293233082706e-05, |
|
"loss": 0.1773, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 5.304727101261347, |
|
"grad_norm": 0.48836782574653625, |
|
"learning_rate": 9.782105263157894e-05, |
|
"loss": 0.1747, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 5.304727101261347, |
|
"eval_loss": 0.4267714023590088, |
|
"eval_runtime": 144.4858, |
|
"eval_samples_per_second": 39.146, |
|
"eval_steps_per_second": 4.893, |
|
"eval_wer": 0.3322687807931184, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 5.324374238673426, |
|
"grad_norm": 0.7414509654045105, |
|
"learning_rate": 9.706917293233082e-05, |
|
"loss": 0.1804, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 5.344021376085505, |
|
"grad_norm": 0.3100612461566925, |
|
"learning_rate": 9.63172932330827e-05, |
|
"loss": 0.1737, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 5.344021376085505, |
|
"eval_loss": 0.41930150985717773, |
|
"eval_runtime": 145.0977, |
|
"eval_samples_per_second": 38.981, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.3324774116929595, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 5.363668513497584, |
|
"grad_norm": 2.2844786643981934, |
|
"learning_rate": 9.556541353383459e-05, |
|
"loss": 0.1779, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 5.383315650909663, |
|
"grad_norm": 0.7908081412315369, |
|
"learning_rate": 9.481503759398495e-05, |
|
"loss": 0.1709, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 5.383315650909663, |
|
"eval_loss": 0.4228932559490204, |
|
"eval_runtime": 145.0454, |
|
"eval_samples_per_second": 38.995, |
|
"eval_steps_per_second": 4.874, |
|
"eval_wer": 0.3278714833656979, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 5.402962788321742, |
|
"grad_norm": 1.6749204397201538, |
|
"learning_rate": 9.406315789473683e-05, |
|
"loss": 0.1745, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 5.422609925733821, |
|
"grad_norm": 0.25723955035209656, |
|
"learning_rate": 9.331127819548871e-05, |
|
"loss": 0.1726, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 5.422609925733821, |
|
"eval_loss": 0.4178549647331238, |
|
"eval_runtime": 145.1876, |
|
"eval_samples_per_second": 38.957, |
|
"eval_steps_per_second": 4.87, |
|
"eval_wer": 0.32714929948163246, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 5.4422570631459, |
|
"grad_norm": 0.43192166090011597, |
|
"learning_rate": 9.255939849624058e-05, |
|
"loss": 0.1699, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 5.461904200557979, |
|
"grad_norm": 0.4252433776855469, |
|
"learning_rate": 9.180751879699246e-05, |
|
"loss": 0.1741, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 5.461904200557979, |
|
"eval_loss": 0.42049652338027954, |
|
"eval_runtime": 145.3425, |
|
"eval_samples_per_second": 38.915, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.3254963008136605, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 5.481551337970058, |
|
"grad_norm": 0.6398211717605591, |
|
"learning_rate": 9.105563909774435e-05, |
|
"loss": 0.1675, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 5.501198475382137, |
|
"grad_norm": 2.678009510040283, |
|
"learning_rate": 9.030375939849623e-05, |
|
"loss": 0.1723, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 5.501198475382137, |
|
"eval_loss": 0.4140247702598572, |
|
"eval_runtime": 145.8316, |
|
"eval_samples_per_second": 38.784, |
|
"eval_steps_per_second": 4.848, |
|
"eval_wer": 0.32944423937988476, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 5.520845612794216, |
|
"grad_norm": 0.42189884185791016, |
|
"learning_rate": 8.955187969924811e-05, |
|
"loss": 0.167, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 5.540492750206295, |
|
"grad_norm": 0.6850213408470154, |
|
"learning_rate": 8.8803007518797e-05, |
|
"loss": 0.1676, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 5.540492750206295, |
|
"eval_loss": 0.42560333013534546, |
|
"eval_runtime": 145.0938, |
|
"eval_samples_per_second": 38.982, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.32540000962911847, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 5.560139887618374, |
|
"grad_norm": 0.46668741106987, |
|
"learning_rate": 8.805112781954888e-05, |
|
"loss": 0.1674, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 5.579787025030453, |
|
"grad_norm": 0.38750043511390686, |
|
"learning_rate": 8.729924812030075e-05, |
|
"loss": 0.1769, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 5.579787025030453, |
|
"eval_loss": 0.41800424456596375, |
|
"eval_runtime": 144.6116, |
|
"eval_samples_per_second": 39.112, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.3279196289579689, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 5.599434162442532, |
|
"grad_norm": 0.47452759742736816, |
|
"learning_rate": 8.654887218045112e-05, |
|
"loss": 0.1704, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 5.619081299854611, |
|
"grad_norm": 1.3760634660720825, |
|
"learning_rate": 8.579699248120299e-05, |
|
"loss": 0.1718, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 5.619081299854611, |
|
"eval_loss": 0.4158097207546234, |
|
"eval_runtime": 144.8323, |
|
"eval_samples_per_second": 39.052, |
|
"eval_steps_per_second": 4.882, |
|
"eval_wer": 0.3203928680329316, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 5.63872843726669, |
|
"grad_norm": 1.2168941497802734, |
|
"learning_rate": 8.504511278195487e-05, |
|
"loss": 0.1763, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 5.658375574678769, |
|
"grad_norm": 0.6660623550415039, |
|
"learning_rate": 8.429323308270675e-05, |
|
"loss": 0.1735, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 5.658375574678769, |
|
"eval_loss": 0.41737955808639526, |
|
"eval_runtime": 145.161, |
|
"eval_samples_per_second": 38.964, |
|
"eval_steps_per_second": 4.87, |
|
"eval_wer": 0.3209385180786699, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 5.678022712090849, |
|
"grad_norm": 0.7844908237457275, |
|
"learning_rate": 8.354135338345864e-05, |
|
"loss": 0.1696, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 5.697669849502928, |
|
"grad_norm": 1.7285536527633667, |
|
"learning_rate": 8.278947368421052e-05, |
|
"loss": 0.1693, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 5.697669849502928, |
|
"eval_loss": 0.416604220867157, |
|
"eval_runtime": 143.979, |
|
"eval_samples_per_second": 39.284, |
|
"eval_steps_per_second": 4.91, |
|
"eval_wer": 0.3197669753334082, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 5.717316986915007, |
|
"grad_norm": 0.3506734073162079, |
|
"learning_rate": 8.20375939849624e-05, |
|
"loss": 0.1811, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 5.736964124327086, |
|
"grad_norm": 0.9915302395820618, |
|
"learning_rate": 8.128571428571428e-05, |
|
"loss": 0.1745, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 5.736964124327086, |
|
"eval_loss": 0.41646912693977356, |
|
"eval_runtime": 143.6976, |
|
"eval_samples_per_second": 39.36, |
|
"eval_steps_per_second": 4.92, |
|
"eval_wer": 0.32445314631445493, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 5.756611261739165, |
|
"grad_norm": 0.4368499219417572, |
|
"learning_rate": 8.053383458646616e-05, |
|
"loss": 0.1757, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 5.776258399151244, |
|
"grad_norm": 0.8709374070167542, |
|
"learning_rate": 7.978195488721803e-05, |
|
"loss": 0.1692, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 5.776258399151244, |
|
"eval_loss": 0.4147648215293884, |
|
"eval_runtime": 144.5484, |
|
"eval_samples_per_second": 39.129, |
|
"eval_steps_per_second": 4.891, |
|
"eval_wer": 0.3230408756078381, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 5.795905536563323, |
|
"grad_norm": 16.672887802124023, |
|
"learning_rate": 7.903007518796991e-05, |
|
"loss": 0.1633, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 5.815552673975402, |
|
"grad_norm": 0.7690948247909546, |
|
"learning_rate": 7.82781954887218e-05, |
|
"loss": 0.1641, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 5.815552673975402, |
|
"eval_loss": 0.4115670621395111, |
|
"eval_runtime": 145.0143, |
|
"eval_samples_per_second": 39.003, |
|
"eval_steps_per_second": 4.875, |
|
"eval_wer": 0.3216446534319783, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 5.835199811387481, |
|
"grad_norm": 1.9833319187164307, |
|
"learning_rate": 7.752781954887217e-05, |
|
"loss": 0.1646, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 5.85484694879956, |
|
"grad_norm": 0.38222184777259827, |
|
"learning_rate": 7.677593984962405e-05, |
|
"loss": 0.173, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 5.85484694879956, |
|
"eval_loss": 0.40414321422576904, |
|
"eval_runtime": 148.2393, |
|
"eval_samples_per_second": 38.155, |
|
"eval_steps_per_second": 4.769, |
|
"eval_wer": 0.32366676830736146, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 5.874494086211639, |
|
"grad_norm": 2.3978090286254883, |
|
"learning_rate": 7.602556390977442e-05, |
|
"loss": 0.1669, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 5.894141223623718, |
|
"grad_norm": 0.7286165952682495, |
|
"learning_rate": 7.52736842105263e-05, |
|
"loss": 0.1664, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 5.894141223623718, |
|
"eval_loss": 0.4038516581058502, |
|
"eval_runtime": 145.7264, |
|
"eval_samples_per_second": 38.812, |
|
"eval_steps_per_second": 4.852, |
|
"eval_wer": 0.3184349472805765, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 5.913788361035797, |
|
"grad_norm": 0.6666128635406494, |
|
"learning_rate": 7.45218045112782e-05, |
|
"loss": 0.1631, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 5.933435498447876, |
|
"grad_norm": 3.139840841293335, |
|
"learning_rate": 7.376992481203008e-05, |
|
"loss": 0.1648, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 5.933435498447876, |
|
"eval_loss": 0.4072332978248596, |
|
"eval_runtime": 144.1568, |
|
"eval_samples_per_second": 39.235, |
|
"eval_steps_per_second": 4.904, |
|
"eval_wer": 0.31657331771276337, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 5.953082635859955, |
|
"grad_norm": 0.2758707106113434, |
|
"learning_rate": 7.301804511278196e-05, |
|
"loss": 0.1616, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 5.972729773272034, |
|
"grad_norm": 0.5328942537307739, |
|
"learning_rate": 7.226616541353382e-05, |
|
"loss": 0.1709, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 5.972729773272034, |
|
"eval_loss": 0.40219077467918396, |
|
"eval_runtime": 144.786, |
|
"eval_samples_per_second": 39.065, |
|
"eval_steps_per_second": 4.883, |
|
"eval_wer": 0.3205854504020157, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 5.992376910684113, |
|
"grad_norm": 0.5073242783546448, |
|
"learning_rate": 7.15142857142857e-05, |
|
"loss": 0.1651, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 6.012024048096192, |
|
"grad_norm": 0.4045845866203308, |
|
"learning_rate": 7.076390977443608e-05, |
|
"loss": 0.151, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 6.012024048096192, |
|
"eval_loss": 0.4034076929092407, |
|
"eval_runtime": 144.9751, |
|
"eval_samples_per_second": 39.014, |
|
"eval_steps_per_second": 4.877, |
|
"eval_wer": 0.31882011201874466, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 6.031671185508271, |
|
"grad_norm": 1.1703969240188599, |
|
"learning_rate": 7.001203007518797e-05, |
|
"loss": 0.1397, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 6.05131832292035, |
|
"grad_norm": 0.3152583837509155, |
|
"learning_rate": 6.926015037593985e-05, |
|
"loss": 0.1353, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 6.05131832292035, |
|
"eval_loss": 0.41277533769607544, |
|
"eval_runtime": 144.9149, |
|
"eval_samples_per_second": 39.03, |
|
"eval_steps_per_second": 4.879, |
|
"eval_wer": 0.32572098024425866, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 6.070965460332429, |
|
"grad_norm": 0.5021807551383972, |
|
"learning_rate": 6.850827067669173e-05, |
|
"loss": 0.1429, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 6.090612597744508, |
|
"grad_norm": 0.4375011622905731, |
|
"learning_rate": 6.77563909774436e-05, |
|
"loss": 0.1476, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 6.090612597744508, |
|
"eval_loss": 0.4197489619255066, |
|
"eval_runtime": 145.056, |
|
"eval_samples_per_second": 38.992, |
|
"eval_steps_per_second": 4.874, |
|
"eval_wer": 0.3200398003562774, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 6.110259735156587, |
|
"grad_norm": 0.4859500527381897, |
|
"learning_rate": 6.700451127819548e-05, |
|
"loss": 0.1456, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 6.129906872568666, |
|
"grad_norm": 0.4906657636165619, |
|
"learning_rate": 6.625263157894736e-05, |
|
"loss": 0.1465, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 6.129906872568666, |
|
"eval_loss": 0.40734121203422546, |
|
"eval_runtime": 144.5712, |
|
"eval_samples_per_second": 39.123, |
|
"eval_steps_per_second": 4.89, |
|
"eval_wer": 0.3167338030203335, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 6.149554009980746, |
|
"grad_norm": 0.7306200861930847, |
|
"learning_rate": 6.550075187969924e-05, |
|
"loss": 0.1414, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 6.169201147392825, |
|
"grad_norm": 0.35837283730506897, |
|
"learning_rate": 6.474887218045112e-05, |
|
"loss": 0.139, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 6.169201147392825, |
|
"eval_loss": 0.42275404930114746, |
|
"eval_runtime": 144.9153, |
|
"eval_samples_per_second": 39.03, |
|
"eval_steps_per_second": 4.879, |
|
"eval_wer": 0.321179246040025, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 6.188848284804904, |
|
"grad_norm": 0.5820499658584595, |
|
"learning_rate": 6.39984962406015e-05, |
|
"loss": 0.1408, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 6.208495422216983, |
|
"grad_norm": 0.2785002291202545, |
|
"learning_rate": 6.324812030075188e-05, |
|
"loss": 0.1404, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 6.208495422216983, |
|
"eval_loss": 0.4117072522640228, |
|
"eval_runtime": 144.7738, |
|
"eval_samples_per_second": 39.068, |
|
"eval_steps_per_second": 4.883, |
|
"eval_wer": 0.3244691948452119, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 6.228142559629062, |
|
"grad_norm": 0.9491069912910461, |
|
"learning_rate": 6.249624060150375e-05, |
|
"loss": 0.1443, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 6.247789697041141, |
|
"grad_norm": 0.6151573657989502, |
|
"learning_rate": 6.174436090225563e-05, |
|
"loss": 0.1338, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 6.247789697041141, |
|
"eval_loss": 0.41795113682746887, |
|
"eval_runtime": 144.7948, |
|
"eval_samples_per_second": 39.062, |
|
"eval_steps_per_second": 4.883, |
|
"eval_wer": 0.3153054837829597, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 6.26743683445322, |
|
"grad_norm": 1.4104067087173462, |
|
"learning_rate": 6.099248120300751e-05, |
|
"loss": 0.1458, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 6.287083971865299, |
|
"grad_norm": 0.4986151158809662, |
|
"learning_rate": 6.024060150375939e-05, |
|
"loss": 0.1436, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 6.287083971865299, |
|
"eval_loss": 0.42644599080085754, |
|
"eval_runtime": 145.4284, |
|
"eval_samples_per_second": 38.892, |
|
"eval_steps_per_second": 4.861, |
|
"eval_wer": 0.31670170595881947, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 6.306731109277378, |
|
"grad_norm": 1.0388261079788208, |
|
"learning_rate": 5.9488721804511266e-05, |
|
"loss": 0.1382, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 6.326378246689457, |
|
"grad_norm": 1.0425645112991333, |
|
"learning_rate": 5.873834586466165e-05, |
|
"loss": 0.1317, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 6.326378246689457, |
|
"eval_loss": 0.4117776155471802, |
|
"eval_runtime": 144.9416, |
|
"eval_samples_per_second": 39.023, |
|
"eval_steps_per_second": 4.878, |
|
"eval_wer": 0.31524128965993164, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 6.346025384101536, |
|
"grad_norm": 0.47523021697998047, |
|
"learning_rate": 5.798646616541353e-05, |
|
"loss": 0.1386, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 6.365672521513615, |
|
"grad_norm": 0.27745115756988525, |
|
"learning_rate": 5.7234586466165414e-05, |
|
"loss": 0.1395, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 6.365672521513615, |
|
"eval_loss": 0.42685896158218384, |
|
"eval_runtime": 145.206, |
|
"eval_samples_per_second": 38.952, |
|
"eval_steps_per_second": 4.869, |
|
"eval_wer": 0.3118390011394457, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 6.385319658925694, |
|
"grad_norm": 0.3224891126155853, |
|
"learning_rate": 5.6484210526315785e-05, |
|
"loss": 0.1335, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 6.404966796337773, |
|
"grad_norm": 0.2714509665966034, |
|
"learning_rate": 5.5732330827067666e-05, |
|
"loss": 0.1267, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 6.404966796337773, |
|
"eval_loss": 0.4240754544734955, |
|
"eval_runtime": 144.9066, |
|
"eval_samples_per_second": 39.032, |
|
"eval_steps_per_second": 4.879, |
|
"eval_wer": 0.31345990274590363, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 6.4246139337498525, |
|
"grad_norm": 0.3742597997188568, |
|
"learning_rate": 5.498045112781954e-05, |
|
"loss": 0.1438, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 6.4442610711619315, |
|
"grad_norm": 1.6135519742965698, |
|
"learning_rate": 5.422857142857142e-05, |
|
"loss": 0.1334, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 6.4442610711619315, |
|
"eval_loss": 0.40579110383987427, |
|
"eval_runtime": 144.6174, |
|
"eval_samples_per_second": 39.11, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.31686219126638954, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 6.4639082085740105, |
|
"grad_norm": 0.7605300545692444, |
|
"learning_rate": 5.3476691729323304e-05, |
|
"loss": 0.1371, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 6.4835553459860895, |
|
"grad_norm": 0.44126906991004944, |
|
"learning_rate": 5.2724812030075185e-05, |
|
"loss": 0.1369, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 6.4835553459860895, |
|
"eval_loss": 0.40502265095710754, |
|
"eval_runtime": 145.4039, |
|
"eval_samples_per_second": 38.899, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.31296239829243633, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 6.5032024833981685, |
|
"grad_norm": 0.32450059056282043, |
|
"learning_rate": 5.197293233082706e-05, |
|
"loss": 0.1352, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 6.522849620810248, |
|
"grad_norm": 1.38713538646698, |
|
"learning_rate": 5.122105263157894e-05, |
|
"loss": 0.1322, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 6.522849620810248, |
|
"eval_loss": 0.40965744853019714, |
|
"eval_runtime": 144.8647, |
|
"eval_samples_per_second": 39.043, |
|
"eval_steps_per_second": 4.88, |
|
"eval_wer": 0.31403764985315596, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 6.5424967582223275, |
|
"grad_norm": 0.7151561379432678, |
|
"learning_rate": 5.046917293233082e-05, |
|
"loss": 0.1385, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 6.5621438956344065, |
|
"grad_norm": 0.46481749415397644, |
|
"learning_rate": 4.9717293233082705e-05, |
|
"loss": 0.1358, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 6.5621438956344065, |
|
"eval_loss": 0.41421449184417725, |
|
"eval_runtime": 144.9831, |
|
"eval_samples_per_second": 39.011, |
|
"eval_steps_per_second": 4.876, |
|
"eval_wer": 0.3129142527001653, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 6.5817910330464855, |
|
"grad_norm": 0.4189301133155823, |
|
"learning_rate": 4.896541353383458e-05, |
|
"loss": 0.1359, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 6.6014381704585645, |
|
"grad_norm": 0.7608076333999634, |
|
"learning_rate": 4.821353383458646e-05, |
|
"loss": 0.1345, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 6.6014381704585645, |
|
"eval_loss": 0.40090152621269226, |
|
"eval_runtime": 144.6628, |
|
"eval_samples_per_second": 39.098, |
|
"eval_steps_per_second": 4.887, |
|
"eval_wer": 0.31230440853139896, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 6.6210853078706435, |
|
"grad_norm": 0.23644275963306427, |
|
"learning_rate": 4.746165413533834e-05, |
|
"loss": 0.1329, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 6.6407324452827226, |
|
"grad_norm": 0.5338233709335327, |
|
"learning_rate": 4.6711278195488714e-05, |
|
"loss": 0.1321, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 6.6407324452827226, |
|
"eval_loss": 0.4004514813423157, |
|
"eval_runtime": 144.9848, |
|
"eval_samples_per_second": 39.011, |
|
"eval_steps_per_second": 4.876, |
|
"eval_wer": 0.3092712362183242, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 6.660379582694802, |
|
"grad_norm": 0.5386209487915039, |
|
"learning_rate": 4.5959398496240595e-05, |
|
"loss": 0.1324, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 6.680026720106881, |
|
"grad_norm": 0.7969732880592346, |
|
"learning_rate": 4.5207518796992477e-05, |
|
"loss": 0.1299, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 6.680026720106881, |
|
"eval_loss": 0.39957067370414734, |
|
"eval_runtime": 144.9466, |
|
"eval_samples_per_second": 39.021, |
|
"eval_steps_per_second": 4.878, |
|
"eval_wer": 0.305387491775128, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 6.69967385751896, |
|
"grad_norm": 0.7069671154022217, |
|
"learning_rate": 4.445563909774436e-05, |
|
"loss": 0.1381, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 6.719320994931039, |
|
"grad_norm": 0.8022767305374146, |
|
"learning_rate": 4.370375939849623e-05, |
|
"loss": 0.1345, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 6.719320994931039, |
|
"eval_loss": 0.40409377217292786, |
|
"eval_runtime": 145.3133, |
|
"eval_samples_per_second": 38.923, |
|
"eval_steps_per_second": 4.865, |
|
"eval_wer": 0.30705653897385693, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 6.738968132343118, |
|
"grad_norm": 0.9058027863502502, |
|
"learning_rate": 4.2951879699248114e-05, |
|
"loss": 0.1314, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 6.758615269755197, |
|
"grad_norm": 0.4458518326282501, |
|
"learning_rate": 4.2199999999999996e-05, |
|
"loss": 0.1328, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 6.758615269755197, |
|
"eval_loss": 0.3997325003147125, |
|
"eval_runtime": 145.3079, |
|
"eval_samples_per_second": 38.924, |
|
"eval_steps_per_second": 4.866, |
|
"eval_wer": 0.3069762963200719, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 6.778262407167276, |
|
"grad_norm": 0.5749480128288269, |
|
"learning_rate": 4.144812030075188e-05, |
|
"loss": 0.135, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 6.797909544579355, |
|
"grad_norm": 0.3367716073989868, |
|
"learning_rate": 4.069624060150375e-05, |
|
"loss": 0.1245, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 6.797909544579355, |
|
"eval_loss": 0.3974212110042572, |
|
"eval_runtime": 145.9176, |
|
"eval_samples_per_second": 38.762, |
|
"eval_steps_per_second": 4.845, |
|
"eval_wer": 0.3044566769912215, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 6.817556681991434, |
|
"grad_norm": 0.546405553817749, |
|
"learning_rate": 3.9944360902255633e-05, |
|
"loss": 0.1312, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 6.837203819403513, |
|
"grad_norm": 0.38214609026908875, |
|
"learning_rate": 3.9192481203007515e-05, |
|
"loss": 0.1356, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 6.837203819403513, |
|
"eval_loss": 0.39992156624794006, |
|
"eval_runtime": 144.9546, |
|
"eval_samples_per_second": 39.019, |
|
"eval_steps_per_second": 4.877, |
|
"eval_wer": 0.3008939031631654, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 6.856850956815592, |
|
"grad_norm": 0.21237680315971375, |
|
"learning_rate": 3.8442105263157886e-05, |
|
"loss": 0.1335, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 6.876498094227671, |
|
"grad_norm": 0.4656332731246948, |
|
"learning_rate": 3.769022556390977e-05, |
|
"loss": 0.1208, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 6.876498094227671, |
|
"eval_loss": 0.39532560110092163, |
|
"eval_runtime": 145.4346, |
|
"eval_samples_per_second": 38.89, |
|
"eval_steps_per_second": 4.861, |
|
"eval_wer": 0.301921009131614, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 6.89614523163975, |
|
"grad_norm": 0.6751464605331421, |
|
"learning_rate": 3.693834586466165e-05, |
|
"loss": 0.1282, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 6.915792369051829, |
|
"grad_norm": 1.1535145044326782, |
|
"learning_rate": 3.618646616541353e-05, |
|
"loss": 0.1316, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 6.915792369051829, |
|
"eval_loss": 0.39738306403160095, |
|
"eval_runtime": 146.048, |
|
"eval_samples_per_second": 38.727, |
|
"eval_steps_per_second": 4.841, |
|
"eval_wer": 0.3056442682672401, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 6.935439506463908, |
|
"grad_norm": 0.8314586877822876, |
|
"learning_rate": 3.543458646616541e-05, |
|
"loss": 0.1271, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 6.955086643875987, |
|
"grad_norm": 0.7973750233650208, |
|
"learning_rate": 3.4682706766917294e-05, |
|
"loss": 0.1232, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 6.955086643875987, |
|
"eval_loss": 0.39205384254455566, |
|
"eval_runtime": 146.1083, |
|
"eval_samples_per_second": 38.711, |
|
"eval_steps_per_second": 4.839, |
|
"eval_wer": 0.30333327983823083, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 6.974733781288066, |
|
"grad_norm": 0.3950521647930145, |
|
"learning_rate": 3.393082706766917e-05, |
|
"loss": 0.1344, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 6.994380918700146, |
|
"grad_norm": 0.4100574851036072, |
|
"learning_rate": 3.317894736842105e-05, |
|
"loss": 0.1261, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 6.994380918700146, |
|
"eval_loss": 0.39850306510925293, |
|
"eval_runtime": 145.9873, |
|
"eval_samples_per_second": 38.743, |
|
"eval_steps_per_second": 4.843, |
|
"eval_wer": 0.3034616680842869, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 7.014028056112225, |
|
"grad_norm": 0.5746680498123169, |
|
"learning_rate": 3.242857142857143e-05, |
|
"loss": 0.1105, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 7.033675193524304, |
|
"grad_norm": 0.5409959554672241, |
|
"learning_rate": 3.16766917293233e-05, |
|
"loss": 0.1184, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 7.033675193524304, |
|
"eval_loss": 0.40056413412094116, |
|
"eval_runtime": 145.9106, |
|
"eval_samples_per_second": 38.763, |
|
"eval_steps_per_second": 4.845, |
|
"eval_wer": 0.3061096756591934, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 7.053322330936383, |
|
"grad_norm": 0.7439139485359192, |
|
"learning_rate": 3.092631578947368e-05, |
|
"loss": 0.1132, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 7.072969468348462, |
|
"grad_norm": 1.1852874755859375, |
|
"learning_rate": 3.0174436090225562e-05, |
|
"loss": 0.1115, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 7.072969468348462, |
|
"eval_loss": 0.4096328318119049, |
|
"eval_runtime": 145.6721, |
|
"eval_samples_per_second": 38.827, |
|
"eval_steps_per_second": 4.853, |
|
"eval_wer": 0.3049541814446887, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 7.092616605760541, |
|
"grad_norm": 0.6158276796340942, |
|
"learning_rate": 2.9422556390977444e-05, |
|
"loss": 0.1032, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 7.11226374317262, |
|
"grad_norm": 1.272557258605957, |
|
"learning_rate": 2.867067669172932e-05, |
|
"loss": 0.1109, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 7.11226374317262, |
|
"eval_loss": 0.41377753019332886, |
|
"eval_runtime": 146.2393, |
|
"eval_samples_per_second": 38.676, |
|
"eval_steps_per_second": 4.835, |
|
"eval_wer": 0.3038147357609411, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 7.131910880584699, |
|
"grad_norm": 0.4577464163303375, |
|
"learning_rate": 2.7918796992481203e-05, |
|
"loss": 0.1157, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 7.151558017996778, |
|
"grad_norm": 1.748535394668579, |
|
"learning_rate": 2.716691729323308e-05, |
|
"loss": 0.1113, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 7.151558017996778, |
|
"eval_loss": 0.41194456815719604, |
|
"eval_runtime": 146.2502, |
|
"eval_samples_per_second": 38.673, |
|
"eval_steps_per_second": 4.834, |
|
"eval_wer": 0.3052270064675579, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 7.171205155408857, |
|
"grad_norm": 0.8288829326629639, |
|
"learning_rate": 2.6416541353383456e-05, |
|
"loss": 0.1114, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 7.190852292820936, |
|
"grad_norm": 0.5038288235664368, |
|
"learning_rate": 2.5664661654135334e-05, |
|
"loss": 0.1075, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 7.190852292820936, |
|
"eval_loss": 0.41699934005737305, |
|
"eval_runtime": 145.6145, |
|
"eval_samples_per_second": 38.842, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.30066922373256727, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 7.210499430233015, |
|
"grad_norm": 0.41699087619781494, |
|
"learning_rate": 2.4912781954887215e-05, |
|
"loss": 0.1155, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 7.230146567645094, |
|
"grad_norm": 0.9346128702163696, |
|
"learning_rate": 2.4162406015037593e-05, |
|
"loss": 0.1081, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 7.230146567645094, |
|
"eval_loss": 0.4134830832481384, |
|
"eval_runtime": 145.6714, |
|
"eval_samples_per_second": 38.827, |
|
"eval_steps_per_second": 4.853, |
|
"eval_wer": 0.3031246489383897, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 7.249793705057173, |
|
"grad_norm": 1.0166319608688354, |
|
"learning_rate": 2.341052631578947e-05, |
|
"loss": 0.1173, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 7.269440842469252, |
|
"grad_norm": 0.8515588045120239, |
|
"learning_rate": 2.2658646616541353e-05, |
|
"loss": 0.1108, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 7.269440842469252, |
|
"eval_loss": 0.41293400526046753, |
|
"eval_runtime": 146.3235, |
|
"eval_samples_per_second": 38.654, |
|
"eval_steps_per_second": 4.832, |
|
"eval_wer": 0.3003161560559131, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 7.289087979881331, |
|
"grad_norm": 0.5291551351547241, |
|
"learning_rate": 2.190676691729323e-05, |
|
"loss": 0.1064, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 7.30873511729341, |
|
"grad_norm": 1.0743286609649658, |
|
"learning_rate": 2.1154887218045113e-05, |
|
"loss": 0.1044, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 7.30873511729341, |
|
"eval_loss": 0.41300591826438904, |
|
"eval_runtime": 145.5862, |
|
"eval_samples_per_second": 38.85, |
|
"eval_steps_per_second": 4.856, |
|
"eval_wer": 0.3022740768082682, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 7.328382254705489, |
|
"grad_norm": 0.4959530532360077, |
|
"learning_rate": 2.040300751879699e-05, |
|
"loss": 0.1063, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 7.348029392117568, |
|
"grad_norm": 0.6196532845497131, |
|
"learning_rate": 1.9651127819548872e-05, |
|
"loss": 0.1121, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 7.348029392117568, |
|
"eval_loss": 0.40789899230003357, |
|
"eval_runtime": 145.8295, |
|
"eval_samples_per_second": 38.785, |
|
"eval_steps_per_second": 4.848, |
|
"eval_wer": 0.2992890500874645, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 7.367676529529647, |
|
"grad_norm": 1.7419555187225342, |
|
"learning_rate": 1.889924812030075e-05, |
|
"loss": 0.1092, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 7.387323666941727, |
|
"grad_norm": 0.931948721408844, |
|
"learning_rate": 1.814736842105263e-05, |
|
"loss": 0.1052, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 7.387323666941727, |
|
"eval_loss": 0.40476053953170776, |
|
"eval_runtime": 145.5337, |
|
"eval_samples_per_second": 38.864, |
|
"eval_steps_per_second": 4.858, |
|
"eval_wer": 0.301904960600857, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 7.406970804353806, |
|
"grad_norm": 0.3558327853679657, |
|
"learning_rate": 1.739548872180451e-05, |
|
"loss": 0.112, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 7.426617941765885, |
|
"grad_norm": 0.48914971947669983, |
|
"learning_rate": 1.6643609022556388e-05, |
|
"loss": 0.103, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 7.426617941765885, |
|
"eval_loss": 0.415385365486145, |
|
"eval_runtime": 145.9476, |
|
"eval_samples_per_second": 38.754, |
|
"eval_steps_per_second": 4.844, |
|
"eval_wer": 0.3015197958626888, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 7.446265079177964, |
|
"grad_norm": 0.5291373133659363, |
|
"learning_rate": 1.5893233082706766e-05, |
|
"loss": 0.1073, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 7.465912216590043, |
|
"grad_norm": 0.6397764086723328, |
|
"learning_rate": 1.514285714285714e-05, |
|
"loss": 0.1105, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 7.465912216590043, |
|
"eval_loss": 0.4119686484336853, |
|
"eval_runtime": 145.6307, |
|
"eval_samples_per_second": 38.838, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.30187286353934295, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 7.485559354002122, |
|
"grad_norm": 0.45867177844047546, |
|
"learning_rate": 1.439097744360902e-05, |
|
"loss": 0.1079, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 7.505206491414201, |
|
"grad_norm": 1.0139355659484863, |
|
"learning_rate": 1.36390977443609e-05, |
|
"loss": 0.1093, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 7.505206491414201, |
|
"eval_loss": 0.4104667901992798, |
|
"eval_runtime": 146.3292, |
|
"eval_samples_per_second": 38.653, |
|
"eval_steps_per_second": 4.832, |
|
"eval_wer": 0.3007494663863523, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 7.52485362882628, |
|
"grad_norm": 0.35021767020225525, |
|
"learning_rate": 1.288721804511278e-05, |
|
"loss": 0.1108, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 7.544500766238359, |
|
"grad_norm": 0.7307072281837463, |
|
"learning_rate": 1.2136842105263156e-05, |
|
"loss": 0.1058, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 7.544500766238359, |
|
"eval_loss": 0.41022607684135437, |
|
"eval_runtime": 146.2108, |
|
"eval_samples_per_second": 38.684, |
|
"eval_steps_per_second": 4.835, |
|
"eval_wer": 0.3011025340630065, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 7.564147903650438, |
|
"grad_norm": 0.46207743883132935, |
|
"learning_rate": 1.1384962406015036e-05, |
|
"loss": 0.1053, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 7.583795041062517, |
|
"grad_norm": 0.47636836767196655, |
|
"learning_rate": 1.0633082706766916e-05, |
|
"loss": 0.1043, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 7.583795041062517, |
|
"eval_loss": 0.41014641523361206, |
|
"eval_runtime": 145.8628, |
|
"eval_samples_per_second": 38.776, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.2994495353950346, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 7.603442178474596, |
|
"grad_norm": 1.0540902614593506, |
|
"learning_rate": 9.881203007518796e-06, |
|
"loss": 0.1072, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 7.623089315886675, |
|
"grad_norm": 0.8974863290786743, |
|
"learning_rate": 9.129323308270676e-06, |
|
"loss": 0.1098, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 7.623089315886675, |
|
"eval_loss": 0.408490389585495, |
|
"eval_runtime": 146.1703, |
|
"eval_samples_per_second": 38.695, |
|
"eval_steps_per_second": 4.837, |
|
"eval_wer": 0.29980260307168877, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 7.642736453298754, |
|
"grad_norm": 0.49042123556137085, |
|
"learning_rate": 8.377443609022555e-06, |
|
"loss": 0.1035, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 7.662383590710833, |
|
"grad_norm": 0.7251204252243042, |
|
"learning_rate": 7.625563909774436e-06, |
|
"loss": 0.1057, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 7.662383590710833, |
|
"eval_loss": 0.40715456008911133, |
|
"eval_runtime": 146.2248, |
|
"eval_samples_per_second": 38.68, |
|
"eval_steps_per_second": 4.835, |
|
"eval_wer": 0.2982137985267449, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 7.682030728122912, |
|
"grad_norm": 0.9783725142478943, |
|
"learning_rate": 6.8751879699248115e-06, |
|
"loss": 0.1078, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 7.701677865534991, |
|
"grad_norm": 0.66826331615448, |
|
"learning_rate": 6.123308270676691e-06, |
|
"loss": 0.1021, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 7.701677865534991, |
|
"eval_loss": 0.4079470634460449, |
|
"eval_runtime": 146.5661, |
|
"eval_samples_per_second": 38.59, |
|
"eval_steps_per_second": 4.824, |
|
"eval_wer": 0.2973792749273804, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 7.72132500294707, |
|
"grad_norm": 0.34865960478782654, |
|
"learning_rate": 5.371428571428571e-06, |
|
"loss": 0.108, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 7.740972140359149, |
|
"grad_norm": 0.6881831884384155, |
|
"learning_rate": 4.619548872180451e-06, |
|
"loss": 0.0994, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 7.740972140359149, |
|
"eval_loss": 0.4088830053806305, |
|
"eval_runtime": 145.6213, |
|
"eval_samples_per_second": 38.84, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.29871130298021215, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 7.760619277771228, |
|
"grad_norm": 0.7812435030937195, |
|
"learning_rate": 3.867669172932331e-06, |
|
"loss": 0.1017, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 7.780266415183307, |
|
"grad_norm": 0.23445354402065277, |
|
"learning_rate": 3.118796992481203e-06, |
|
"loss": 0.1065, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 7.780266415183307, |
|
"eval_loss": 0.4065949022769928, |
|
"eval_runtime": 146.17, |
|
"eval_samples_per_second": 38.695, |
|
"eval_steps_per_second": 4.837, |
|
"eval_wer": 0.2973792749273804, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 7.799913552595386, |
|
"grad_norm": 0.4873931407928467, |
|
"learning_rate": 2.366917293233083e-06, |
|
"loss": 0.1052, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 7.8195606900074655, |
|
"grad_norm": 0.24652531743049622, |
|
"learning_rate": 1.6150375939849622e-06, |
|
"loss": 0.1111, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 7.8195606900074655, |
|
"eval_loss": 0.40712785720825195, |
|
"eval_runtime": 145.6053, |
|
"eval_samples_per_second": 38.845, |
|
"eval_steps_per_second": 4.856, |
|
"eval_wer": 0.2981817014652309, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 7.839207827419545, |
|
"grad_norm": 0.5532709956169128, |
|
"learning_rate": 8.631578947368421e-07, |
|
"loss": 0.106, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 7.858854964831624, |
|
"grad_norm": 0.4496346116065979, |
|
"learning_rate": 1.1127819548872179e-07, |
|
"loss": 0.1065, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 7.858854964831624, |
|
"eval_loss": 0.4064280092716217, |
|
"eval_runtime": 146.2071, |
|
"eval_samples_per_second": 38.685, |
|
"eval_steps_per_second": 4.836, |
|
"eval_wer": 0.298422429426586, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 7.858854964831624, |
|
"step": 200000, |
|
"total_flos": 2.4880981924796708e+20, |
|
"train_loss": 0.2853552089881897, |
|
"train_runtime": 103513.8909, |
|
"train_samples_per_second": 15.457, |
|
"train_steps_per_second": 1.932 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 200000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 4000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.4880981924796708e+20, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|