diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,4628 +1,146 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 7.65345170671973, - "eval_steps": 1000, - "global_step": 200000, + "epoch": 3.0864197530864197, + "eval_steps": 200, + "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.019133629266799325, - "grad_norm": 2.1359012126922607, - "learning_rate": 0.0002982, - "loss": 4.6329, - "step": 500 - }, - { - "epoch": 0.03826725853359865, - "grad_norm": 2.5346407890319824, - "learning_rate": 0.00029925413533834583, - "loss": 1.059, - "step": 1000 - }, - { - "epoch": 0.03826725853359865, - "eval_loss": Infinity, - "eval_runtime": 176.6488, - "eval_samples_per_second": 39.751, - "eval_steps_per_second": 4.97, - "eval_wer": 0.7912656018616459, - "step": 1000 - }, - { - "epoch": 0.05740088780039798, - "grad_norm": 2.2379095554351807, - "learning_rate": 0.0002985022556390977, - "loss": 0.8241, - "step": 1500 - }, - { - "epoch": 0.0765345170671973, - "grad_norm": 3.605928659439087, - "learning_rate": 0.0002977503759398496, - "loss": 0.7527, - "step": 2000 - }, - { - "epoch": 0.0765345170671973, - "eval_loss": Infinity, - "eval_runtime": 174.864, - "eval_samples_per_second": 40.157, - "eval_steps_per_second": 5.021, - "eval_wer": 0.7347419081870108, - "step": 2000 - }, - { - "epoch": 0.09566814633399663, - "grad_norm": 3.9373598098754883, - "learning_rate": 0.0002969984962406015, - "loss": 0.6967, - "step": 2500 - }, - { - "epoch": 0.11480177560079596, - "grad_norm": 5.641207218170166, - "learning_rate": 0.00029624661654135335, - "loss": 0.6861, - "step": 3000 - }, - { - "epoch": 0.11480177560079596, - "eval_loss": Infinity, - "eval_runtime": 174.8654, - "eval_samples_per_second": 40.157, - "eval_steps_per_second": 5.021, - "eval_wer": 0.6766976940977364, - "step": 3000 - }, - { - "epoch": 0.13393540486759528, - "grad_norm": 4.324626922607422, - "learning_rate": 0.0002954947368421052, - "loss": 0.6736, - "step": 3500 - }, - { - "epoch": 0.1530690341343946, - "grad_norm": 5.716803550720215, - "learning_rate": 0.0002947443609022556, - "loss": 0.651, - "step": 4000 - }, - { - "epoch": 0.1530690341343946, - "eval_loss": Infinity, - "eval_runtime": 175.6143, - "eval_samples_per_second": 39.985, - "eval_steps_per_second": 5.0, - "eval_wer": 0.646181510471758, - "step": 4000 - }, - { - "epoch": 0.17220266340119394, - "grad_norm": 6.054178714752197, - "learning_rate": 0.000293993984962406, - "loss": 0.6308, - "step": 4500 - }, - { - "epoch": 0.19133629266799326, - "grad_norm": 3.1400604248046875, - "learning_rate": 0.00029324210526315787, - "loss": 0.6372, - "step": 5000 - }, - { - "epoch": 0.19133629266799326, - "eval_loss": Infinity, - "eval_runtime": 176.1091, - "eval_samples_per_second": 39.873, - "eval_steps_per_second": 4.986, - "eval_wer": 0.6245107890839856, - "step": 5000 - }, - { - "epoch": 0.21046992193479258, - "grad_norm": 3.961786985397339, - "learning_rate": 0.00029249022556390974, - "loss": 0.6101, - "step": 5500 - }, - { - "epoch": 0.22960355120159193, - "grad_norm": 2.590359687805176, - "learning_rate": 0.0002917383458646616, - "loss": 0.6078, - "step": 6000 - }, - { - "epoch": 0.22960355120159193, - "eval_loss": Infinity, - "eval_runtime": 176.6935, - "eval_samples_per_second": 39.741, - "eval_steps_per_second": 4.969, - "eval_wer": 0.5931880685424159, - "step": 6000 - }, - { - "epoch": 0.24873718046839124, - "grad_norm": 3.1479835510253906, - "learning_rate": 0.00029098646616541353, - "loss": 0.601, - "step": 6500 - }, - { - "epoch": 0.26787080973519056, - "grad_norm": 4.320711135864258, - "learning_rate": 0.0002902345864661654, - "loss": 0.6006, - "step": 7000 - }, - { - "epoch": 0.26787080973519056, - "eval_loss": Infinity, - "eval_runtime": 176.7371, - "eval_samples_per_second": 39.731, - "eval_steps_per_second": 4.968, - "eval_wer": 0.6101253437698329, - "step": 7000 - }, - { - "epoch": 0.2870044390019899, - "grad_norm": 3.0556020736694336, - "learning_rate": 0.00028948270676691727, - "loss": 0.5943, - "step": 7500 - }, - { - "epoch": 0.3061380682687892, - "grad_norm": 1.8571085929870605, - "learning_rate": 0.00028873082706766913, - "loss": 0.6008, - "step": 8000 - }, - { - "epoch": 0.3061380682687892, - "eval_loss": Infinity, - "eval_runtime": 176.2913, - "eval_samples_per_second": 39.832, - "eval_steps_per_second": 4.98, - "eval_wer": 0.5832055214723927, - "step": 8000 - }, - { - "epoch": 0.32527169753558854, - "grad_norm": 2.2670278549194336, - "learning_rate": 0.00028797894736842106, - "loss": 0.5828, - "step": 8500 - }, - { - "epoch": 0.3444053268023879, - "grad_norm": 3.5902249813079834, - "learning_rate": 0.00028722706766917287, - "loss": 0.592, - "step": 9000 - }, - { - "epoch": 0.3444053268023879, - "eval_loss": Infinity, - "eval_runtime": 177.2597, - "eval_samples_per_second": 39.614, - "eval_steps_per_second": 4.953, - "eval_wer": 0.5834302940554262, - "step": 9000 - }, - { - "epoch": 0.3635389560691872, - "grad_norm": 1.5114198923110962, - "learning_rate": 0.0002864751879699248, - "loss": 0.5702, - "step": 9500 - }, - { - "epoch": 0.3826725853359865, - "grad_norm": 1.9661872386932373, - "learning_rate": 0.00028572330827067666, - "loss": 0.5638, - "step": 10000 - }, - { - "epoch": 0.3826725853359865, - "eval_loss": Infinity, - "eval_runtime": 176.3135, - "eval_samples_per_second": 39.827, - "eval_steps_per_second": 4.98, - "eval_wer": 0.5573434525068754, - "step": 10000 - }, - { - "epoch": 0.40180621460278587, - "grad_norm": 3.7164504528045654, - "learning_rate": 0.00028497142857142853, - "loss": 0.5599, - "step": 10500 - }, - { - "epoch": 0.42093984386958516, - "grad_norm": 2.0946409702301025, - "learning_rate": 0.0002842195488721804, - "loss": 0.5585, - "step": 11000 - }, - { - "epoch": 0.42093984386958516, - "eval_loss": Infinity, - "eval_runtime": 177.3363, - "eval_samples_per_second": 39.597, - "eval_steps_per_second": 4.951, - "eval_wer": 0.5664269092447641, - "step": 11000 - }, - { - "epoch": 0.4400734731363845, - "grad_norm": 3.3412957191467285, - "learning_rate": 0.0002834676691729323, - "loss": 0.5436, - "step": 11500 - }, - { - "epoch": 0.45920710240318385, - "grad_norm": 3.093538999557495, - "learning_rate": 0.0002827157894736842, - "loss": 0.5569, - "step": 12000 - }, - { - "epoch": 0.45920710240318385, - "eval_loss": Infinity, - "eval_runtime": 177.5626, - "eval_samples_per_second": 39.547, - "eval_steps_per_second": 4.945, - "eval_wer": 0.5487624286016501, - "step": 12000 - }, - { - "epoch": 0.47834073166998314, - "grad_norm": 2.192824125289917, - "learning_rate": 0.0002819654135338346, - "loss": 0.5418, - "step": 12500 - }, - { - "epoch": 0.4974743609367825, - "grad_norm": 2.0859923362731934, - "learning_rate": 0.00028121353383458645, - "loss": 0.5293, - "step": 13000 - }, - { - "epoch": 0.4974743609367825, - "eval_loss": Infinity, - "eval_runtime": 177.8833, - "eval_samples_per_second": 39.475, - "eval_steps_per_second": 4.936, - "eval_wer": 0.5434736619420352, - "step": 13000 - }, - { - "epoch": 0.5166079902035818, - "grad_norm": 3.662593364715576, - "learning_rate": 0.0002804616541353383, - "loss": 0.5316, - "step": 13500 - }, - { - "epoch": 0.5357416194703811, - "grad_norm": 3.3237922191619873, - "learning_rate": 0.0002797097744360902, - "loss": 0.5388, - "step": 14000 - }, - { - "epoch": 0.5357416194703811, - "eval_loss": Infinity, - "eval_runtime": 178.3166, - "eval_samples_per_second": 39.379, - "eval_steps_per_second": 4.924, - "eval_wer": 0.5418209223609054, - "step": 14000 - }, - { - "epoch": 0.5548752487371804, - "grad_norm": 1.9418795108795166, - "learning_rate": 0.0002789578947368421, - "loss": 0.5336, - "step": 14500 - }, - { - "epoch": 0.5740088780039798, - "grad_norm": 2.3504509925842285, - "learning_rate": 0.00027820601503759397, - "loss": 0.5163, - "step": 15000 - }, - { - "epoch": 0.5740088780039798, - "eval_loss": Infinity, - "eval_runtime": 178.0416, - "eval_samples_per_second": 39.44, - "eval_steps_per_second": 4.931, - "eval_wer": 0.5408292786122276, - "step": 15000 - }, - { - "epoch": 0.5931425072707791, - "grad_norm": 1.8465300798416138, - "learning_rate": 0.00027745413533834584, - "loss": 0.5172, - "step": 15500 - }, - { - "epoch": 0.6122761365375784, - "grad_norm": 2.249298572540283, - "learning_rate": 0.0002767022556390977, - "loss": 0.5226, - "step": 16000 - }, - { - "epoch": 0.6122761365375784, - "eval_loss": Infinity, - "eval_runtime": 178.435, - "eval_samples_per_second": 39.353, - "eval_steps_per_second": 4.921, - "eval_wer": 0.5311376137084832, - "step": 16000 - }, - { - "epoch": 0.6314097658043778, - "grad_norm": 1.6280571222305298, - "learning_rate": 0.00027595037593984963, - "loss": 0.5161, - "step": 16500 - }, - { - "epoch": 0.6505433950711771, - "grad_norm": 4.344738006591797, - "learning_rate": 0.0002751984962406015, - "loss": 0.4952, - "step": 17000 - }, - { - "epoch": 0.6505433950711771, - "eval_loss": Infinity, - "eval_runtime": 178.8108, - "eval_samples_per_second": 39.271, - "eval_steps_per_second": 4.91, - "eval_wer": 0.5288502221281997, - "step": 17000 - }, - { - "epoch": 0.6696770243379764, - "grad_norm": 2.9461894035339355, - "learning_rate": 0.00027444812030075184, - "loss": 0.5177, - "step": 17500 - }, - { - "epoch": 0.6888106536047758, - "grad_norm": 3.190275192260742, - "learning_rate": 0.00027369624060150376, - "loss": 0.524, - "step": 18000 - }, - { - "epoch": 0.6888106536047758, - "eval_loss": Infinity, - "eval_runtime": 178.3332, - "eval_samples_per_second": 39.376, - "eval_steps_per_second": 4.923, - "eval_wer": 0.5214591707213878, - "step": 18000 - }, - { - "epoch": 0.7079442828715751, - "grad_norm": 3.2808961868286133, - "learning_rate": 0.0002729443609022556, - "loss": 0.4997, - "step": 18500 - }, - { - "epoch": 0.7270779121383744, - "grad_norm": 2.6754653453826904, - "learning_rate": 0.0002721924812030075, - "loss": 0.5076, - "step": 19000 - }, - { - "epoch": 0.7270779121383744, - "eval_loss": Infinity, - "eval_runtime": 178.4608, - "eval_samples_per_second": 39.348, - "eval_steps_per_second": 4.92, - "eval_wer": 0.5186957901417389, - "step": 19000 - }, - { - "epoch": 0.7462115414051738, - "grad_norm": 3.872616767883301, - "learning_rate": 0.00027144060150375936, - "loss": 0.4961, - "step": 19500 - }, - { - "epoch": 0.765345170671973, - "grad_norm": 4.666406631469727, - "learning_rate": 0.00027068872180451123, - "loss": 0.492, - "step": 20000 - }, - { - "epoch": 0.765345170671973, - "eval_loss": Infinity, - "eval_runtime": 177.9854, - "eval_samples_per_second": 39.453, - "eval_steps_per_second": 4.933, - "eval_wer": 0.5094933361540089, - "step": 20000 - }, - { - "epoch": 0.7844787999387723, - "grad_norm": NaN, - "learning_rate": 0.00026993984962406014, - "loss": 0.4846, - "step": 20500 - }, - { - "epoch": 0.8036124292055717, - "grad_norm": 3.923118829727173, - "learning_rate": 0.000269187969924812, - "loss": 0.4934, - "step": 21000 - }, - { - "epoch": 0.8036124292055717, - "eval_loss": Infinity, - "eval_runtime": 178.6067, - "eval_samples_per_second": 39.315, - "eval_steps_per_second": 4.916, - "eval_wer": 0.5061481912418024, - "step": 21000 - }, - { - "epoch": 0.822746058472371, - "grad_norm": 3.902585744857788, - "learning_rate": 0.0002684360902255639, - "loss": 0.5006, - "step": 21500 - }, - { - "epoch": 0.8418796877391703, - "grad_norm": 6.406003952026367, - "learning_rate": 0.0002676842105263158, - "loss": 0.4985, - "step": 22000 - }, - { - "epoch": 0.8418796877391703, - "eval_loss": Infinity, - "eval_runtime": 177.9171, - "eval_samples_per_second": 39.468, - "eval_steps_per_second": 4.935, - "eval_wer": 0.5129310344827587, - "step": 22000 - }, - { - "epoch": 0.8610133170059697, - "grad_norm": 3.3762645721435547, - "learning_rate": 0.0002669323308270676, - "loss": 0.5001, - "step": 22500 - }, - { - "epoch": 0.880146946272769, - "grad_norm": 5.018126010894775, - "learning_rate": 0.00026618045112781954, - "loss": 0.4887, - "step": 23000 - }, - { - "epoch": 0.880146946272769, - "eval_loss": Infinity, - "eval_runtime": 176.8724, - "eval_samples_per_second": 39.701, - "eval_steps_per_second": 4.964, - "eval_wer": 0.49284694309287075, - "step": 23000 - }, - { - "epoch": 0.8992805755395683, - "grad_norm": 2.6644840240478516, - "learning_rate": 0.0002654285714285714, - "loss": 0.4879, - "step": 23500 - }, - { - "epoch": 0.9184142048063677, - "grad_norm": 1.760593056678772, - "learning_rate": 0.0002646766917293233, - "loss": 0.484, - "step": 24000 - }, - { - "epoch": 0.9184142048063677, - "eval_loss": Infinity, - "eval_runtime": 178.117, - "eval_samples_per_second": 39.424, - "eval_steps_per_second": 4.929, - "eval_wer": 0.49492278400676964, - "step": 24000 - }, - { - "epoch": 0.937547834073167, - "grad_norm": 4.163125514984131, - "learning_rate": 0.00026392481203007514, - "loss": 0.4825, - "step": 24500 - }, - { - "epoch": 0.9566814633399663, - "grad_norm": 5.920992374420166, - "learning_rate": 0.00026317293233082706, - "loss": 0.4741, - "step": 25000 - }, - { - "epoch": 0.9566814633399663, - "eval_loss": Infinity, - "eval_runtime": 177.7742, - "eval_samples_per_second": 39.5, - "eval_steps_per_second": 4.939, - "eval_wer": 0.48648720118468375, - "step": 25000 - }, - { - "epoch": 0.9758150926067657, - "grad_norm": 1.6138001680374146, - "learning_rate": 0.00026242105263157893, - "loss": 0.4792, - "step": 25500 - }, - { - "epoch": 0.994948721873565, - "grad_norm": 4.549112796783447, - "learning_rate": 0.0002616691729323308, - "loss": 0.4816, - "step": 26000 - }, - { - "epoch": 0.994948721873565, - "eval_loss": Infinity, - "eval_runtime": 178.2909, - "eval_samples_per_second": 39.385, - "eval_steps_per_second": 4.925, - "eval_wer": 0.5054606515760525, - "step": 26000 - }, - { - "epoch": 1.0140823511403643, - "grad_norm": 1.6239593029022217, - "learning_rate": 0.00026091729323308267, - "loss": 0.4551, - "step": 26500 - }, - { - "epoch": 1.0332159804071637, - "grad_norm": 0.7770557999610901, - "learning_rate": 0.0002601654135338346, - "loss": 0.44, - "step": 27000 - }, - { - "epoch": 1.0332159804071637, - "eval_loss": Infinity, - "eval_runtime": 178.2997, - "eval_samples_per_second": 39.383, - "eval_steps_per_second": 4.924, - "eval_wer": 0.47974402369367464, - "step": 27000 - }, - { - "epoch": 1.0523496096739628, - "grad_norm": 2.1102957725524902, - "learning_rate": 0.00025941503759398493, - "loss": 0.4367, - "step": 27500 - }, - { - "epoch": 1.0714832389407623, - "grad_norm": 3.1549980640411377, - "learning_rate": 0.0002586631578947368, - "loss": 0.4359, - "step": 28000 - }, - { - "epoch": 1.0714832389407623, - "eval_loss": Infinity, - "eval_runtime": 177.6407, - "eval_samples_per_second": 39.529, - "eval_steps_per_second": 4.943, - "eval_wer": 0.4912074254283901, - "step": 28000 - }, - { - "epoch": 1.0906168682075617, - "grad_norm": 0.7373610138893127, - "learning_rate": 0.0002579112781954887, - "loss": 0.429, - "step": 28500 - }, - { - "epoch": 1.1097504974743608, - "grad_norm": 2.5423829555511475, - "learning_rate": 0.0002571593984962406, - "loss": 0.411, - "step": 29000 - }, - { - "epoch": 1.1097504974743608, - "eval_loss": Infinity, - "eval_runtime": 177.8711, - "eval_samples_per_second": 39.478, - "eval_steps_per_second": 4.936, - "eval_wer": 0.4773640786968479, - "step": 29000 - }, - { - "epoch": 1.1288841267411602, - "grad_norm": 1.5159779787063599, - "learning_rate": 0.000256409022556391, - "loss": 0.4162, - "step": 29500 - }, - { - "epoch": 1.1480177560079596, - "grad_norm": 0.6960669159889221, - "learning_rate": 0.00025565714285714284, - "loss": 0.4298, - "step": 30000 - }, - { - "epoch": 1.1480177560079596, - "eval_loss": Infinity, - "eval_runtime": 176.6665, - "eval_samples_per_second": 39.747, - "eval_steps_per_second": 4.97, - "eval_wer": 0.4772847471969537, - "step": 30000 - }, - { - "epoch": 1.1671513852747588, - "grad_norm": 0.788869321346283, - "learning_rate": 0.0002549052631578947, - "loss": 0.4405, - "step": 30500 - }, - { - "epoch": 1.1862850145415582, - "grad_norm": 0.8728181719779968, - "learning_rate": 0.0002541533834586466, - "loss": 0.4305, - "step": 31000 - }, - { - "epoch": 1.1862850145415582, - "eval_loss": Infinity, - "eval_runtime": 177.2604, - "eval_samples_per_second": 39.614, - "eval_steps_per_second": 4.953, - "eval_wer": 0.489753014596996, - "step": 31000 - }, - { - "epoch": 1.2054186438083576, - "grad_norm": 1.0415312051773071, - "learning_rate": 0.00025340300751879697, - "loss": 0.423, - "step": 31500 - }, - { - "epoch": 1.2245522730751568, - "grad_norm": 1.1658622026443481, - "learning_rate": 0.00025265112781954884, - "loss": 0.4126, - "step": 32000 - }, - { - "epoch": 1.2245522730751568, - "eval_loss": Infinity, - "eval_runtime": 177.0921, - "eval_samples_per_second": 39.652, - "eval_steps_per_second": 4.958, - "eval_wer": 0.4738867146181511, - "step": 32000 - }, - { - "epoch": 1.2436859023419562, - "grad_norm": 1.2818922996520996, - "learning_rate": 0.00025189924812030076, - "loss": 0.428, - "step": 32500 - }, - { - "epoch": 1.2628195316087556, - "grad_norm": 1.18551504611969, - "learning_rate": 0.00025114736842105263, - "loss": 0.4234, - "step": 33000 - }, - { - "epoch": 1.2628195316087556, - "eval_loss": Infinity, - "eval_runtime": 177.167, - "eval_samples_per_second": 39.635, - "eval_steps_per_second": 4.956, - "eval_wer": 0.48441136027078485, - "step": 33000 - }, - { - "epoch": 1.2819531608755548, - "grad_norm": 1.017686367034912, - "learning_rate": 0.0002503954887218045, - "loss": 0.4334, - "step": 33500 - }, - { - "epoch": 1.3010867901423542, - "grad_norm": 0.9811512231826782, - "learning_rate": 0.00024964360902255637, - "loss": 0.4252, - "step": 34000 - }, - { - "epoch": 1.3010867901423542, - "eval_loss": Infinity, - "eval_runtime": 183.3489, - "eval_samples_per_second": 38.299, - "eval_steps_per_second": 4.789, - "eval_wer": 0.47629310344827586, - "step": 34000 - }, - { - "epoch": 1.3202204194091536, - "grad_norm": 1.2876335382461548, - "learning_rate": 0.0002488917293233083, - "loss": 0.4094, - "step": 34500 - }, - { - "epoch": 1.3393540486759528, - "grad_norm": 0.9524905681610107, - "learning_rate": 0.0002481413533834586, - "loss": 0.4106, - "step": 35000 - }, - { - "epoch": 1.3393540486759528, - "eval_loss": Infinity, - "eval_runtime": 178.3382, - "eval_samples_per_second": 39.375, - "eval_steps_per_second": 4.923, - "eval_wer": 0.47093822720541567, - "step": 35000 - }, - { - "epoch": 1.3584876779427522, - "grad_norm": 1.4819426536560059, - "learning_rate": 0.0002473894736842105, - "loss": 0.4082, - "step": 35500 - }, - { - "epoch": 1.3776213072095516, - "grad_norm": 2.091412305831909, - "learning_rate": 0.0002466375939849624, - "loss": 0.4254, - "step": 36000 - }, - { - "epoch": 1.3776213072095516, - "eval_loss": Infinity, - "eval_runtime": 174.5569, - "eval_samples_per_second": 40.228, - "eval_steps_per_second": 5.03, - "eval_wer": 0.47370160778506454, - "step": 36000 - }, - { - "epoch": 1.3967549364763507, - "grad_norm": 0.9154180288314819, - "learning_rate": 0.00024588571428571423, - "loss": 0.4101, - "step": 36500 - }, - { - "epoch": 1.4158885657431501, - "grad_norm": 0.6072717308998108, - "learning_rate": 0.00024513383458646615, - "loss": 0.4245, - "step": 37000 - }, - { - "epoch": 1.4158885657431501, - "eval_loss": Infinity, - "eval_runtime": 176.0739, - "eval_samples_per_second": 39.881, - "eval_steps_per_second": 4.987, - "eval_wer": 0.4533530780621959, - "step": 37000 - }, - { - "epoch": 1.4350221950099495, - "grad_norm": 0.8788403868675232, - "learning_rate": 0.000244381954887218, - "loss": 0.4139, - "step": 37500 - }, - { - "epoch": 1.4541558242767487, - "grad_norm": 1.6312676668167114, - "learning_rate": 0.00024363007518796991, - "loss": 0.4154, - "step": 38000 - }, - { - "epoch": 1.4541558242767487, - "eval_loss": Infinity, - "eval_runtime": 175.6948, - "eval_samples_per_second": 39.967, - "eval_steps_per_second": 4.997, - "eval_wer": 0.4566056695578591, - "step": 38000 - }, - { - "epoch": 1.4732894535435481, - "grad_norm": 1.2530635595321655, - "learning_rate": 0.00024287819548872178, - "loss": 0.403, - "step": 38500 - }, - { - "epoch": 1.4924230828103475, - "grad_norm": 0.8397653102874756, - "learning_rate": 0.00024212631578947368, - "loss": 0.4071, - "step": 39000 - }, - { - "epoch": 1.4924230828103475, - "eval_loss": Infinity, - "eval_runtime": 175.5585, - "eval_samples_per_second": 39.998, - "eval_steps_per_second": 5.001, - "eval_wer": 0.4634546223820605, - "step": 39000 - }, - { - "epoch": 1.5115567120771467, - "grad_norm": 1.685242772102356, - "learning_rate": 0.00024137443609022552, - "loss": 0.3912, - "step": 39500 - }, - { - "epoch": 1.530690341343946, - "grad_norm": 1.5319820642471313, - "learning_rate": 0.00024062255639097744, - "loss": 0.4065, - "step": 40000 - }, - { - "epoch": 1.530690341343946, - "eval_loss": Infinity, - "eval_runtime": 176.3036, - "eval_samples_per_second": 39.829, - "eval_steps_per_second": 4.98, - "eval_wer": 0.4668394330442141, - "step": 40000 - }, - { - "epoch": 1.5498239706107455, - "grad_norm": 7.0581955909729, - "learning_rate": 0.00023987067669172928, - "loss": 0.4006, - "step": 40500 - }, - { - "epoch": 1.5689575998775447, - "grad_norm": 0.6705722808837891, - "learning_rate": 0.0002391203007518797, - "loss": 0.4086, - "step": 41000 - }, - { - "epoch": 1.5689575998775447, - "eval_loss": Infinity, - "eval_runtime": 176.1394, - "eval_samples_per_second": 39.866, - "eval_steps_per_second": 4.985, - "eval_wer": 0.46069124180241167, - "step": 41000 - }, - { - "epoch": 1.588091229144344, - "grad_norm": 0.794866144657135, - "learning_rate": 0.00023836842105263154, - "loss": 0.4026, - "step": 41500 - }, - { - "epoch": 1.6072248584111435, - "grad_norm": 0.7202442288398743, - "learning_rate": 0.00023761654135338344, - "loss": 0.4037, - "step": 42000 - }, - { - "epoch": 1.6072248584111435, - "eval_loss": Infinity, - "eval_runtime": 175.2975, - "eval_samples_per_second": 40.058, - "eval_steps_per_second": 5.009, - "eval_wer": 0.46155066638459913, - "step": 42000 - }, - { - "epoch": 1.6263584876779427, - "grad_norm": 1.2002875804901123, - "learning_rate": 0.00023686616541353383, - "loss": 0.4013, - "step": 42500 - }, - { - "epoch": 1.645492116944742, - "grad_norm": 1.2249990701675415, - "learning_rate": 0.00023611428571428567, - "loss": 0.4071, - "step": 43000 - }, - { - "epoch": 1.645492116944742, - "eval_loss": Infinity, - "eval_runtime": 177.5513, - "eval_samples_per_second": 39.549, - "eval_steps_per_second": 4.945, - "eval_wer": 0.46074412946900783, - "step": 43000 - }, - { - "epoch": 1.6646257462115415, - "grad_norm": 0.7734994292259216, - "learning_rate": 0.00023536240601503756, - "loss": 0.3834, - "step": 43500 - }, - { - "epoch": 1.6837593754783406, - "grad_norm": 1.9896817207336426, - "learning_rate": 0.00023461052631578943, - "loss": 0.394, - "step": 44000 - }, - { - "epoch": 1.6837593754783406, - "eval_loss": Infinity, - "eval_runtime": 176.2037, - "eval_samples_per_second": 39.852, - "eval_steps_per_second": 4.983, - "eval_wer": 0.4430532049925957, - "step": 44000 - }, - { - "epoch": 1.70289300474514, - "grad_norm": 1.6369848251342773, - "learning_rate": 0.00023385864661654133, - "loss": 0.4006, - "step": 44500 - }, - { - "epoch": 1.7220266340119394, - "grad_norm": 0.7955853343009949, - "learning_rate": 0.0002331067669172932, - "loss": 0.4103, - "step": 45000 - }, - { - "epoch": 1.7220266340119394, - "eval_loss": Infinity, - "eval_runtime": 178.3273, - "eval_samples_per_second": 39.377, - "eval_steps_per_second": 4.924, - "eval_wer": 0.43978739158028346, - "step": 45000 - }, - { - "epoch": 1.7411602632787386, - "grad_norm": 1.0250253677368164, - "learning_rate": 0.00023235639097744359, - "loss": 0.3876, - "step": 45500 - }, - { - "epoch": 1.760293892545538, - "grad_norm": 1.1575045585632324, - "learning_rate": 0.00023160451127819545, - "loss": 0.3909, - "step": 46000 - }, - { - "epoch": 1.760293892545538, - "eval_loss": Infinity, - "eval_runtime": 177.8973, - "eval_samples_per_second": 39.472, - "eval_steps_per_second": 4.935, - "eval_wer": 0.4454728157393696, - "step": 46000 - }, - { - "epoch": 1.7794275218123374, - "grad_norm": 0.6429355144500732, - "learning_rate": 0.00023085263157894735, - "loss": 0.3875, - "step": 46500 - }, - { - "epoch": 1.7985611510791366, - "grad_norm": 0.8371389508247375, - "learning_rate": 0.00023010075187969922, - "loss": 0.3909, - "step": 47000 - }, - { - "epoch": 1.7985611510791366, - "eval_loss": Infinity, - "eval_runtime": 178.4648, - "eval_samples_per_second": 39.347, - "eval_steps_per_second": 4.92, - "eval_wer": 0.442074783160567, - "step": 47000 - }, - { - "epoch": 1.817694780345936, - "grad_norm": 1.2913625240325928, - "learning_rate": 0.0002293488721804511, - "loss": 0.3979, - "step": 47500 - }, - { - "epoch": 1.8368284096127354, - "grad_norm": 1.1963073015213013, - "learning_rate": 0.00022859699248120298, - "loss": 0.3982, - "step": 48000 - }, - { - "epoch": 1.8368284096127354, - "eval_loss": Infinity, - "eval_runtime": 178.3863, - "eval_samples_per_second": 39.364, - "eval_steps_per_second": 4.922, - "eval_wer": 0.4370636767505818, - "step": 48000 - }, - { - "epoch": 1.8559620388795346, - "grad_norm": 1.5472427606582642, - "learning_rate": 0.00022784661654135337, - "loss": 0.3905, - "step": 48500 - }, - { - "epoch": 1.875095668146334, - "grad_norm": 0.7495508790016174, - "learning_rate": 0.00022709473684210524, - "loss": 0.3896, - "step": 49000 - }, - { - "epoch": 1.875095668146334, - "eval_loss": Infinity, - "eval_runtime": 177.5084, - "eval_samples_per_second": 39.559, - "eval_steps_per_second": 4.946, - "eval_wer": 0.4417706790776391, - "step": 49000 - }, - { - "epoch": 1.8942292974131334, - "grad_norm": 1.171025037765503, - "learning_rate": 0.00022634285714285713, - "loss": 0.3883, - "step": 49500 - }, - { - "epoch": 1.9133629266799326, - "grad_norm": 1.1951338052749634, - "learning_rate": 0.000225590977443609, - "loss": 0.3986, - "step": 50000 - }, - { - "epoch": 1.9133629266799326, - "eval_loss": Infinity, - "eval_runtime": 178.1273, - "eval_samples_per_second": 39.421, - "eval_steps_per_second": 4.929, - "eval_wer": 0.4381610958324519, - "step": 50000 - }, - { - "epoch": 1.932496555946732, - "grad_norm": 1.0499491691589355, - "learning_rate": 0.0002248390977443609, - "loss": 0.3842, - "step": 50500 - }, - { - "epoch": 1.9516301852135314, - "grad_norm": 0.6040648221969604, - "learning_rate": 0.00022408721804511277, - "loss": 0.3968, - "step": 51000 - }, - { - "epoch": 1.9516301852135314, - "eval_loss": Infinity, - "eval_runtime": 178.1104, - "eval_samples_per_second": 39.425, - "eval_steps_per_second": 4.93, - "eval_wer": 0.42753067484662577, - "step": 51000 - }, - { - "epoch": 1.9707638144803306, - "grad_norm": 0.7306642532348633, - "learning_rate": 0.00022333533834586466, - "loss": 0.3795, - "step": 51500 - }, - { - "epoch": 1.98989744374713, - "grad_norm": 0.781111478805542, - "learning_rate": 0.00022258345864661653, - "loss": 0.4025, - "step": 52000 - }, - { - "epoch": 1.98989744374713, - "eval_loss": Infinity, - "eval_runtime": 178.6685, - "eval_samples_per_second": 39.302, - "eval_steps_per_second": 4.914, - "eval_wer": 0.4204040617727946, - "step": 52000 - }, - { - "epoch": 2.0090310730139294, - "grad_norm": 0.8605564832687378, - "learning_rate": 0.00022183157894736842, - "loss": 0.3645, - "step": 52500 - }, - { - "epoch": 2.0281647022807285, - "grad_norm": 0.8945469260215759, - "learning_rate": 0.0002210796992481203, - "loss": 0.3404, - "step": 53000 - }, - { - "epoch": 2.0281647022807285, - "eval_loss": Infinity, - "eval_runtime": 178.4963, - "eval_samples_per_second": 39.34, - "eval_steps_per_second": 4.919, - "eval_wer": 0.42721334884704887, - "step": 53000 - }, - { - "epoch": 2.0472983315475277, - "grad_norm": 0.7925958633422852, - "learning_rate": 0.0002203278195488722, - "loss": 0.3444, - "step": 53500 - }, - { - "epoch": 2.0664319608143273, - "grad_norm": 1.2213307619094849, - "learning_rate": 0.00021957744360902255, - "loss": 0.3354, - "step": 54000 - }, - { - "epoch": 2.0664319608143273, - "eval_loss": Infinity, - "eval_runtime": 177.2885, - "eval_samples_per_second": 39.608, - "eval_steps_per_second": 4.952, - "eval_wer": 0.4297519568436641, - "step": 54000 - }, - { - "epoch": 2.0855655900811265, - "grad_norm": 1.751560091972351, - "learning_rate": 0.00021882706766917292, - "loss": 0.3468, - "step": 54500 - }, - { - "epoch": 2.1046992193479257, - "grad_norm": 0.7927623391151428, - "learning_rate": 0.0002180766917293233, - "loss": 0.3352, - "step": 55000 - }, - { - "epoch": 2.1046992193479257, - "eval_loss": Infinity, - "eval_runtime": 180.1438, - "eval_samples_per_second": 38.98, - "eval_steps_per_second": 4.874, - "eval_wer": 0.42244023693674637, - "step": 55000 - }, - { - "epoch": 2.1238328486147253, - "grad_norm": 1.0091408491134644, - "learning_rate": 0.00021732481203007517, - "loss": 0.3579, - "step": 55500 - }, - { - "epoch": 2.1429664778815245, - "grad_norm": 1.7537100315093994, - "learning_rate": 0.00021657293233082707, - "loss": 0.3384, - "step": 56000 - }, - { - "epoch": 2.1429664778815245, - "eval_loss": Infinity, - "eval_runtime": 179.0104, - "eval_samples_per_second": 39.227, - "eval_steps_per_second": 4.905, - "eval_wer": 0.4267241379310345, - "step": 56000 - }, - { - "epoch": 2.1621001071483237, - "grad_norm": 1.3135699033737183, - "learning_rate": 0.00021582105263157894, - "loss": 0.3408, - "step": 56500 - }, - { - "epoch": 2.1812337364151233, - "grad_norm": 0.9029154181480408, - "learning_rate": 0.00021506917293233083, - "loss": 0.3342, - "step": 57000 - }, - { - "epoch": 2.1812337364151233, - "eval_loss": Infinity, - "eval_runtime": 178.7816, - "eval_samples_per_second": 39.277, - "eval_steps_per_second": 4.911, - "eval_wer": 0.4187248783583668, - "step": 57000 - }, - { - "epoch": 2.2003673656819225, - "grad_norm": 0.6189078688621521, - "learning_rate": 0.0002143172932330827, - "loss": 0.339, - "step": 57500 - }, - { - "epoch": 2.2195009949487217, - "grad_norm": 1.0600789785385132, - "learning_rate": 0.0002135654135338346, - "loss": 0.3425, - "step": 58000 - }, - { - "epoch": 2.2195009949487217, - "eval_loss": Infinity, - "eval_runtime": 178.4307, - "eval_samples_per_second": 39.354, - "eval_steps_per_second": 4.921, - "eval_wer": 0.4198884070234821, - "step": 58000 - }, - { - "epoch": 2.2386346242155213, - "grad_norm": 1.0213968753814697, - "learning_rate": 0.00021281503759398496, - "loss": 0.3439, - "step": 58500 - }, - { - "epoch": 2.2577682534823205, - "grad_norm": 1.2084991931915283, - "learning_rate": 0.0002120631578947368, - "loss": 0.3417, - "step": 59000 - }, - { - "epoch": 2.2577682534823205, - "eval_loss": Infinity, - "eval_runtime": 177.6471, - "eval_samples_per_second": 39.528, - "eval_steps_per_second": 4.942, - "eval_wer": 0.4174423524434102, - "step": 59000 - }, - { - "epoch": 2.2769018827491196, - "grad_norm": 1.1838810443878174, - "learning_rate": 0.0002113112781954887, - "loss": 0.352, - "step": 59500 - }, - { - "epoch": 2.2960355120159193, - "grad_norm": 1.033828854560852, - "learning_rate": 0.00021055939849624056, - "loss": 0.3355, - "step": 60000 - }, - { - "epoch": 2.2960355120159193, - "eval_loss": Infinity, - "eval_runtime": 178.3447, - "eval_samples_per_second": 39.373, - "eval_steps_per_second": 4.923, - "eval_wer": 0.4157896128622805, - "step": 60000 - }, - { - "epoch": 2.3151691412827184, - "grad_norm": 1.062028408050537, - "learning_rate": 0.00020980751879699246, - "loss": 0.3397, - "step": 60500 - }, - { - "epoch": 2.3343027705495176, - "grad_norm": 0.8648023009300232, - "learning_rate": 0.00020905563909774433, - "loss": 0.3501, - "step": 61000 - }, - { - "epoch": 2.3343027705495176, - "eval_loss": Infinity, - "eval_runtime": 178.0407, - "eval_samples_per_second": 39.44, - "eval_steps_per_second": 4.931, - "eval_wer": 0.41284112544954515, - "step": 61000 - }, - { - "epoch": 2.3534363998163172, - "grad_norm": 0.7826744318008423, - "learning_rate": 0.00020830375939849622, - "loss": 0.3276, - "step": 61500 - }, - { - "epoch": 2.3725700290831164, - "grad_norm": 0.8527004718780518, - "learning_rate": 0.0002075518796992481, - "loss": 0.3358, - "step": 62000 - }, - { - "epoch": 2.3725700290831164, - "eval_loss": Infinity, - "eval_runtime": 178.3539, - "eval_samples_per_second": 39.371, - "eval_steps_per_second": 4.923, - "eval_wer": 0.4115585995345885, - "step": 62000 - }, - { - "epoch": 2.3917036583499156, - "grad_norm": 1.9309333562850952, - "learning_rate": 0.00020679999999999999, - "loss": 0.3294, - "step": 62500 - }, - { - "epoch": 2.4108372876167152, - "grad_norm": 1.1275138854980469, - "learning_rate": 0.00020604812030075185, - "loss": 0.3343, - "step": 63000 - }, - { - "epoch": 2.4108372876167152, - "eval_loss": Infinity, - "eval_runtime": 178.4409, - "eval_samples_per_second": 39.352, - "eval_steps_per_second": 4.92, - "eval_wer": 0.4163978210281362, - "step": 63000 - }, - { - "epoch": 2.4299709168835144, - "grad_norm": 3.78141450881958, - "learning_rate": 0.00020529624060150375, - "loss": 0.3269, - "step": 63500 - }, - { - "epoch": 2.4491045461503136, - "grad_norm": 1.6814829111099243, - "learning_rate": 0.00020454436090225562, - "loss": 0.3343, - "step": 64000 - }, - { - "epoch": 2.4491045461503136, - "eval_loss": Infinity, - "eval_runtime": 177.6927, - "eval_samples_per_second": 39.518, - "eval_steps_per_second": 4.941, - "eval_wer": 0.41787867569282844, - "step": 64000 - }, - { - "epoch": 2.468238175417113, - "grad_norm": 1.0685131549835205, - "learning_rate": 0.000203793984962406, - "loss": 0.3346, - "step": 64500 - }, - { - "epoch": 2.4873718046839124, - "grad_norm": 0.6016332507133484, - "learning_rate": 0.00020304210526315788, - "loss": 0.3367, - "step": 65000 - }, - { - "epoch": 2.4873718046839124, - "eval_loss": Infinity, - "eval_runtime": 178.6952, - "eval_samples_per_second": 39.296, - "eval_steps_per_second": 4.913, - "eval_wer": 0.411730484451026, - "step": 65000 - }, - { - "epoch": 2.5065054339507116, - "grad_norm": 0.9282209277153015, - "learning_rate": 0.00020229022556390977, - "loss": 0.3307, - "step": 65500 - }, - { - "epoch": 2.525639063217511, - "grad_norm": 0.604811429977417, - "learning_rate": 0.00020153984962406014, - "loss": 0.3237, - "step": 66000 - }, - { - "epoch": 2.525639063217511, - "eval_loss": Infinity, - "eval_runtime": 177.9582, - "eval_samples_per_second": 39.459, - "eval_steps_per_second": 4.934, - "eval_wer": 0.40677226570763697, - "step": 66000 - }, - { - "epoch": 2.5447726924843104, - "grad_norm": 1.4802906513214111, - "learning_rate": 0.000200787969924812, - "loss": 0.3243, - "step": 66500 - }, - { - "epoch": 2.5639063217511096, - "grad_norm": 1.0026686191558838, - "learning_rate": 0.0002000360902255639, - "loss": 0.3335, - "step": 67000 - }, - { - "epoch": 2.5639063217511096, - "eval_loss": Infinity, - "eval_runtime": 178.1808, - "eval_samples_per_second": 39.409, - "eval_steps_per_second": 4.928, - "eval_wer": 0.40802834778929553, - "step": 67000 - }, - { - "epoch": 2.583039951017909, - "grad_norm": 0.7298141717910767, - "learning_rate": 0.00019928421052631577, - "loss": 0.3326, - "step": 67500 - }, - { - "epoch": 2.6021735802847084, - "grad_norm": 0.5921869874000549, - "learning_rate": 0.00019853383458646616, - "loss": 0.3254, - "step": 68000 - }, - { - "epoch": 2.6021735802847084, - "eval_loss": Infinity, - "eval_runtime": 177.3757, - "eval_samples_per_second": 39.588, - "eval_steps_per_second": 4.95, - "eval_wer": 0.3981780198857626, - "step": 68000 - }, - { - "epoch": 2.6213072095515075, - "grad_norm": 0.7208895683288574, - "learning_rate": 0.00019778195488721803, - "loss": 0.3235, - "step": 68500 - }, - { - "epoch": 2.640440838818307, - "grad_norm": 1.1068922281265259, - "learning_rate": 0.00019703007518796992, - "loss": 0.3295, - "step": 69000 - }, - { - "epoch": 2.640440838818307, - "eval_loss": Infinity, - "eval_runtime": 176.2384, - "eval_samples_per_second": 39.844, - "eval_steps_per_second": 4.982, - "eval_wer": 0.41358155278189124, - "step": 69000 - }, - { - "epoch": 2.6595744680851063, - "grad_norm": 0.8548173308372498, - "learning_rate": 0.0001962781954887218, - "loss": 0.3256, - "step": 69500 - }, - { - "epoch": 2.6787080973519055, - "grad_norm": 8.747632026672363, - "learning_rate": 0.00019552631578947368, - "loss": 0.3326, - "step": 70000 - }, - { - "epoch": 2.6787080973519055, - "eval_loss": Infinity, - "eval_runtime": 177.118, - "eval_samples_per_second": 39.646, - "eval_steps_per_second": 4.957, - "eval_wer": 0.4045113179606516, - "step": 70000 - }, - { - "epoch": 2.697841726618705, - "grad_norm": 1.7842276096343994, - "learning_rate": 0.00019477443609022555, - "loss": 0.3192, - "step": 70500 - }, - { - "epoch": 2.7169753558855043, - "grad_norm": 2.2976646423339844, - "learning_rate": 0.00019402255639097745, - "loss": 0.3167, - "step": 71000 - }, - { - "epoch": 2.7169753558855043, - "eval_loss": Infinity, - "eval_runtime": 176.2794, - "eval_samples_per_second": 39.834, - "eval_steps_per_second": 4.981, - "eval_wer": 0.4043658768775122, - "step": 71000 - }, - { - "epoch": 2.7361089851523035, - "grad_norm": 1.3297739028930664, - "learning_rate": 0.0001932706766917293, - "loss": 0.3179, - "step": 71500 - }, - { - "epoch": 2.755242614419103, - "grad_norm": 1.6274834871292114, - "learning_rate": 0.0001925203007518797, - "loss": 0.3376, - "step": 72000 - }, - { - "epoch": 2.755242614419103, - "eval_loss": Infinity, - "eval_runtime": 175.1612, - "eval_samples_per_second": 40.089, - "eval_steps_per_second": 5.013, - "eval_wer": 0.3942114448910514, - "step": 72000 - }, - { - "epoch": 2.7743762436859023, - "grad_norm": 0.5883073210716248, - "learning_rate": 0.00019176842105263155, - "loss": 0.3109, - "step": 72500 - }, - { - "epoch": 2.7935098729527015, - "grad_norm": 0.9327465891838074, - "learning_rate": 0.00019101804511278194, - "loss": 0.3245, - "step": 73000 - }, - { - "epoch": 2.7935098729527015, - "eval_loss": Infinity, - "eval_runtime": 175.6566, - "eval_samples_per_second": 39.976, - "eval_steps_per_second": 4.998, - "eval_wer": 0.3957848529722869, - "step": 73000 - }, - { - "epoch": 2.812643502219501, - "grad_norm": 0.9724407196044922, - "learning_rate": 0.00019026616541353383, - "loss": 0.3128, - "step": 73500 - }, - { - "epoch": 2.8317771314863003, - "grad_norm": 0.7509967684745789, - "learning_rate": 0.00018951428571428567, - "loss": 0.315, - "step": 74000 - }, - { - "epoch": 2.8317771314863003, - "eval_loss": Infinity, - "eval_runtime": 175.6684, - "eval_samples_per_second": 39.973, - "eval_steps_per_second": 4.998, - "eval_wer": 0.40650782737465624, - "step": 74000 - }, - { - "epoch": 2.8509107607530995, - "grad_norm": 0.7796798944473267, - "learning_rate": 0.00018876240601503757, - "loss": 0.3236, - "step": 74500 - }, - { - "epoch": 2.870044390019899, - "grad_norm": 1.8985257148742676, - "learning_rate": 0.00018801052631578944, - "loss": 0.327, - "step": 75000 - }, - { - "epoch": 2.870044390019899, - "eval_loss": Infinity, - "eval_runtime": 176.0416, - "eval_samples_per_second": 39.888, - "eval_steps_per_second": 4.987, - "eval_wer": 0.40103395388195473, - "step": 75000 - }, - { - "epoch": 2.8891780192866983, - "grad_norm": 0.909794807434082, - "learning_rate": 0.00018725864661654133, - "loss": 0.3235, - "step": 75500 - }, - { - "epoch": 2.9083116485534974, - "grad_norm": 1.1613683700561523, - "learning_rate": 0.0001865082706766917, - "loss": 0.3211, - "step": 76000 - }, - { - "epoch": 2.9083116485534974, - "eval_loss": Infinity, - "eval_runtime": 176.4137, - "eval_samples_per_second": 39.804, - "eval_steps_per_second": 4.977, - "eval_wer": 0.39259837105986883, - "step": 76000 - }, - { - "epoch": 2.927445277820297, - "grad_norm": 0.77381831407547, - "learning_rate": 0.0001857563909774436, - "loss": 0.3183, - "step": 76500 - }, - { - "epoch": 2.9465789070870962, - "grad_norm": 7.940882205963135, - "learning_rate": 0.00018500451127819546, - "loss": 0.323, - "step": 77000 - }, - { - "epoch": 2.9465789070870962, - "eval_loss": Infinity, - "eval_runtime": 178.2912, - "eval_samples_per_second": 39.385, - "eval_steps_per_second": 4.925, - "eval_wer": 0.4005315210492913, - "step": 77000 - }, - { - "epoch": 2.9657125363538954, - "grad_norm": 0.49370139837265015, - "learning_rate": 0.00018425263157894735, - "loss": 0.3125, - "step": 77500 - }, - { - "epoch": 2.984846165620695, - "grad_norm": 1.297203540802002, - "learning_rate": 0.00018350075187969922, - "loss": 0.323, - "step": 78000 - }, - { - "epoch": 2.984846165620695, - "eval_loss": Infinity, - "eval_runtime": 177.4201, - "eval_samples_per_second": 39.578, - "eval_steps_per_second": 4.949, - "eval_wer": 0.38635762640152316, - "step": 78000 - }, - { - "epoch": 3.0039797948874942, - "grad_norm": 0.7693071365356445, - "learning_rate": 0.0001827503759398496, - "loss": 0.3041, - "step": 78500 - }, - { - "epoch": 3.0231134241542934, - "grad_norm": 0.9858660697937012, - "learning_rate": 0.00018199849624060148, - "loss": 0.2747, - "step": 79000 - }, - { - "epoch": 3.0231134241542934, - "eval_loss": Infinity, - "eval_runtime": 179.3853, - "eval_samples_per_second": 39.145, - "eval_steps_per_second": 4.894, - "eval_wer": 0.39882589380156547, - "step": 79000 - }, - { - "epoch": 3.042247053421093, - "grad_norm": 0.7790058851242065, - "learning_rate": 0.00018124661654135335, - "loss": 0.2799, - "step": 79500 - }, - { - "epoch": 3.061380682687892, - "grad_norm": 0.9347246885299683, - "learning_rate": 0.00018049473684210525, - "loss": 0.2706, - "step": 80000 - }, - { - "epoch": 3.061380682687892, - "eval_loss": Infinity, - "eval_runtime": 176.71, - "eval_samples_per_second": 39.737, - "eval_steps_per_second": 4.969, - "eval_wer": 0.3860799661518934, - "step": 80000 - }, - { - "epoch": 3.0805143119546914, - "grad_norm": 0.478522926568985, - "learning_rate": 0.0001797428571428571, - "loss": 0.2807, - "step": 80500 - }, - { - "epoch": 3.099647941221491, - "grad_norm": 0.6804964542388916, - "learning_rate": 0.000178990977443609, - "loss": 0.2696, - "step": 81000 - }, - { - "epoch": 3.099647941221491, - "eval_loss": Infinity, - "eval_runtime": 177.3621, - "eval_samples_per_second": 39.591, - "eval_steps_per_second": 4.95, - "eval_wer": 0.38777237148297017, - "step": 81000 - }, - { - "epoch": 3.11878157048829, - "grad_norm": 0.6777291893959045, - "learning_rate": 0.00017823909774436088, - "loss": 0.2783, - "step": 81500 - }, - { - "epoch": 3.1379151997550894, - "grad_norm": 0.9108553528785706, - "learning_rate": 0.00017748721804511277, - "loss": 0.2792, - "step": 82000 - }, - { - "epoch": 3.1379151997550894, - "eval_loss": Infinity, - "eval_runtime": 177.9123, - "eval_samples_per_second": 39.469, - "eval_steps_per_second": 4.935, - "eval_wer": 0.39447588322403215, - "step": 82000 - }, - { - "epoch": 3.157048829021889, - "grad_norm": 0.7335214018821716, - "learning_rate": 0.00017673533834586464, - "loss": 0.2818, - "step": 82500 - }, - { - "epoch": 3.176182458288688, - "grad_norm": 1.0339977741241455, - "learning_rate": 0.00017598496240601503, - "loss": 0.2809, - "step": 83000 - }, - { - "epoch": 3.176182458288688, - "eval_loss": Infinity, - "eval_runtime": 177.3969, - "eval_samples_per_second": 39.584, - "eval_steps_per_second": 4.949, - "eval_wer": 0.39493865030674846, - "step": 83000 - }, - { - "epoch": 3.1953160875554873, - "grad_norm": 1.3720539808273315, - "learning_rate": 0.0001752330827067669, - "loss": 0.2767, - "step": 83500 - }, - { - "epoch": 3.214449716822287, - "grad_norm": 1.6575071811676025, - "learning_rate": 0.0001744827067669173, - "loss": 0.2709, - "step": 84000 - }, - { - "epoch": 3.214449716822287, - "eval_loss": Infinity, - "eval_runtime": 177.9587, - "eval_samples_per_second": 39.459, - "eval_steps_per_second": 4.934, - "eval_wer": 0.3852205415697059, - "step": 84000 - }, - { - "epoch": 3.233583346089086, - "grad_norm": 0.9274744987487793, - "learning_rate": 0.00017373082706766916, - "loss": 0.2692, - "step": 84500 - }, - { - "epoch": 3.2527169753558853, - "grad_norm": 0.6898565292358398, - "learning_rate": 0.00017297894736842105, - "loss": 0.2808, - "step": 85000 - }, - { - "epoch": 3.2527169753558853, - "eval_loss": Infinity, - "eval_runtime": 177.9702, - "eval_samples_per_second": 39.456, - "eval_steps_per_second": 4.933, - "eval_wer": 0.3912629574783161, - "step": 85000 - }, - { - "epoch": 3.271850604622685, - "grad_norm": 0.44578102231025696, - "learning_rate": 0.00017222706766917292, - "loss": 0.2747, - "step": 85500 - }, - { - "epoch": 3.290984233889484, - "grad_norm": 0.3729807138442993, - "learning_rate": 0.00017147518796992482, - "loss": 0.2746, - "step": 86000 - }, - { - "epoch": 3.290984233889484, - "eval_loss": Infinity, - "eval_runtime": 177.7245, - "eval_samples_per_second": 39.511, - "eval_steps_per_second": 4.94, - "eval_wer": 0.38564364290247516, - "step": 86000 - }, - { - "epoch": 3.3101178631562833, - "grad_norm": 0.7046172618865967, - "learning_rate": 0.00017072481203007518, - "loss": 0.2722, - "step": 86500 - }, - { - "epoch": 3.329251492423083, - "grad_norm": 1.7671455144882202, - "learning_rate": 0.00016997293233082705, - "loss": 0.2633, - "step": 87000 - }, - { - "epoch": 3.329251492423083, - "eval_loss": Infinity, - "eval_runtime": 178.1063, - "eval_samples_per_second": 39.426, - "eval_steps_per_second": 4.93, - "eval_wer": 0.38845991114872014, - "step": 87000 - }, - { - "epoch": 3.348385121689882, - "grad_norm": 0.8641050457954407, - "learning_rate": 0.00016922105263157894, - "loss": 0.2694, - "step": 87500 - }, - { - "epoch": 3.3675187509566813, - "grad_norm": 0.6219012141227722, - "learning_rate": 0.0001684706766917293, - "loss": 0.2745, - "step": 88000 - }, - { - "epoch": 3.3675187509566813, - "eval_loss": Infinity, - "eval_runtime": 178.0455, - "eval_samples_per_second": 39.439, - "eval_steps_per_second": 4.931, - "eval_wer": 0.38491643748677806, - "step": 88000 - }, - { - "epoch": 3.386652380223481, - "grad_norm": 0.6935294270515442, - "learning_rate": 0.0001677187969924812, - "loss": 0.277, - "step": 88500 - }, - { - "epoch": 3.40578600949028, - "grad_norm": 0.6084161400794983, - "learning_rate": 0.00016696691729323307, - "loss": 0.2832, - "step": 89000 - }, - { - "epoch": 3.40578600949028, - "eval_loss": Infinity, - "eval_runtime": 178.6527, - "eval_samples_per_second": 39.305, - "eval_steps_per_second": 4.915, - "eval_wer": 0.3820869473238841, - "step": 89000 - }, - { - "epoch": 3.4249196387570793, - "grad_norm": 1.1567957401275635, - "learning_rate": 0.00016621503759398497, - "loss": 0.2898, - "step": 89500 - }, - { - "epoch": 3.444053268023879, - "grad_norm": 0.7702553868293762, - "learning_rate": 0.0001654631578947368, - "loss": 0.2806, - "step": 90000 - }, - { - "epoch": 3.444053268023879, - "eval_loss": Infinity, - "eval_runtime": 177.9228, - "eval_samples_per_second": 39.467, - "eval_steps_per_second": 4.935, - "eval_wer": 0.38568330865242223, - "step": 90000 - }, - { - "epoch": 3.463186897290678, - "grad_norm": 0.6563850045204163, - "learning_rate": 0.00016471127819548873, - "loss": 0.2658, - "step": 90500 - }, - { - "epoch": 3.4823205265574773, - "grad_norm": 0.33683669567108154, - "learning_rate": 0.00016395939849624057, - "loss": 0.2756, - "step": 91000 - }, - { - "epoch": 3.4823205265574773, - "eval_loss": Infinity, - "eval_runtime": 178.2575, - "eval_samples_per_second": 39.392, - "eval_steps_per_second": 4.925, - "eval_wer": 0.381015972075312, - "step": 91000 - }, - { - "epoch": 3.501454155824277, - "grad_norm": 0.5872700214385986, - "learning_rate": 0.00016320751879699246, - "loss": 0.2696, - "step": 91500 - }, - { - "epoch": 3.520587785091076, - "grad_norm": 7.343397617340088, - "learning_rate": 0.00016245563909774433, - "loss": 0.2733, - "step": 92000 - }, - { - "epoch": 3.520587785091076, - "eval_loss": Infinity, - "eval_runtime": 178.508, - "eval_samples_per_second": 39.337, - "eval_steps_per_second": 4.919, - "eval_wer": 0.37381002750158665, - "step": 92000 - }, - { - "epoch": 3.5397214143578752, - "grad_norm": 0.6450570821762085, - "learning_rate": 0.00016170526315789472, - "loss": 0.2721, - "step": 92500 - }, - { - "epoch": 3.558855043624675, - "grad_norm": 0.5071462988853455, - "learning_rate": 0.0001609533834586466, - "loss": 0.2807, - "step": 93000 - }, - { - "epoch": 3.558855043624675, - "eval_loss": Infinity, - "eval_runtime": 177.146, - "eval_samples_per_second": 39.64, - "eval_steps_per_second": 4.956, - "eval_wer": 0.3857097524857203, - "step": 93000 - }, - { - "epoch": 3.577988672891474, - "grad_norm": 0.909946084022522, - "learning_rate": 0.0001602015037593985, - "loss": 0.2676, - "step": 93500 - }, - { - "epoch": 3.597122302158273, - "grad_norm": 0.7381096482276917, - "learning_rate": 0.00015944962406015036, - "loss": 0.2773, - "step": 94000 - }, - { - "epoch": 3.597122302158273, - "eval_loss": Infinity, - "eval_runtime": 178.6056, - "eval_samples_per_second": 39.316, - "eval_steps_per_second": 4.916, - "eval_wer": 0.37201184683731753, - "step": 94000 - }, - { - "epoch": 3.616255931425073, - "grad_norm": 0.5159269571304321, - "learning_rate": 0.00015869774436090225, - "loss": 0.2801, - "step": 94500 - }, - { - "epoch": 3.635389560691872, - "grad_norm": 1.289354920387268, - "learning_rate": 0.00015794586466165412, - "loss": 0.2725, - "step": 95000 - }, - { - "epoch": 3.635389560691872, - "eval_loss": Infinity, - "eval_runtime": 179.0499, - "eval_samples_per_second": 39.218, - "eval_steps_per_second": 4.904, - "eval_wer": 0.36897080600803894, - "step": 95000 - }, - { - "epoch": 3.654523189958671, - "grad_norm": 0.7305335998535156, - "learning_rate": 0.000157193984962406, - "loss": 0.2618, - "step": 95500 - }, - { - "epoch": 3.673656819225471, - "grad_norm": 0.5354152917861938, - "learning_rate": 0.00015644210526315788, - "loss": 0.2614, - "step": 96000 - }, - { - "epoch": 3.673656819225471, - "eval_loss": Infinity, - "eval_runtime": 179.0067, - "eval_samples_per_second": 39.228, - "eval_steps_per_second": 4.905, - "eval_wer": 0.375343769832875, - "step": 96000 - }, - { - "epoch": 3.69279044849227, - "grad_norm": 0.4864795506000519, - "learning_rate": 0.00015569022556390978, - "loss": 0.261, - "step": 96500 - }, - { - "epoch": 3.711924077759069, - "grad_norm": 0.6722401976585388, - "learning_rate": 0.00015493834586466164, - "loss": 0.2674, - "step": 97000 - }, - { - "epoch": 3.711924077759069, - "eval_loss": Infinity, - "eval_runtime": 177.9604, - "eval_samples_per_second": 39.458, - "eval_steps_per_second": 4.934, - "eval_wer": 0.38257615823989843, - "step": 97000 - }, - { - "epoch": 3.731057707025869, - "grad_norm": 0.6855655312538147, - "learning_rate": 0.00015418646616541354, - "loss": 0.2713, - "step": 97500 - }, - { - "epoch": 3.750191336292668, - "grad_norm": 1.3021297454833984, - "learning_rate": 0.0001534345864661654, - "loss": 0.2605, - "step": 98000 - }, - { - "epoch": 3.750191336292668, - "eval_loss": Infinity, - "eval_runtime": 177.5188, - "eval_samples_per_second": 39.556, - "eval_steps_per_second": 4.946, - "eval_wer": 0.3733075946689232, - "step": 98000 - }, - { - "epoch": 3.769324965559467, - "grad_norm": 0.70773845911026, - "learning_rate": 0.0001526827067669173, - "loss": 0.2601, - "step": 98500 - }, - { - "epoch": 3.788458594826267, - "grad_norm": 0.58240807056427, - "learning_rate": 0.00015193082706766917, - "loss": 0.2649, - "step": 99000 - }, - { - "epoch": 3.788458594826267, - "eval_loss": Infinity, - "eval_runtime": 176.98, - "eval_samples_per_second": 39.677, - "eval_steps_per_second": 4.961, - "eval_wer": 0.3690633594245822, - "step": 99000 - }, - { - "epoch": 3.807592224093066, - "grad_norm": 0.624595582485199, - "learning_rate": 0.00015118045112781953, - "loss": 0.2678, - "step": 99500 - }, - { - "epoch": 3.826725853359865, - "grad_norm": 1.283463954925537, - "learning_rate": 0.00015042857142857143, - "loss": 0.2638, - "step": 100000 - }, - { - "epoch": 3.826725853359865, - "eval_loss": Infinity, - "eval_runtime": 176.8803, - "eval_samples_per_second": 39.699, - "eval_steps_per_second": 4.964, - "eval_wer": 0.37530410408292786, - "step": 100000 - }, - { - "epoch": 3.8458594826266648, - "grad_norm": 0.7322863936424255, - "learning_rate": 0.0001496766917293233, - "loss": 0.2743, - "step": 100500 - }, - { - "epoch": 3.864993111893464, - "grad_norm": 0.49134284257888794, - "learning_rate": 0.0001489248120300752, - "loss": 0.2749, - "step": 101000 - }, - { - "epoch": 3.864993111893464, - "eval_loss": Infinity, - "eval_runtime": 177.6568, - "eval_samples_per_second": 39.526, - "eval_steps_per_second": 4.942, - "eval_wer": 0.3675163951766448, - "step": 101000 - }, - { - "epoch": 3.884126741160263, - "grad_norm": 1.705079436302185, - "learning_rate": 0.00014817293233082706, - "loss": 0.2576, - "step": 101500 - }, - { - "epoch": 3.9032603704270628, - "grad_norm": 1.390942931175232, - "learning_rate": 0.00014742105263157893, - "loss": 0.2635, - "step": 102000 - }, - { - "epoch": 3.9032603704270628, - "eval_loss": Infinity, - "eval_runtime": 177.7395, - "eval_samples_per_second": 39.507, - "eval_steps_per_second": 4.94, - "eval_wer": 0.3666701925111064, - "step": 102000 - }, - { - "epoch": 3.922393999693862, - "grad_norm": 0.5910842418670654, - "learning_rate": 0.00014666917293233082, - "loss": 0.2654, - "step": 102500 - }, - { - "epoch": 3.941527628960661, - "grad_norm": 1.1575956344604492, - "learning_rate": 0.0001459172932330827, - "loss": 0.2639, - "step": 103000 - }, - { - "epoch": 3.941527628960661, - "eval_loss": Infinity, - "eval_runtime": 177.3808, - "eval_samples_per_second": 39.587, - "eval_steps_per_second": 4.95, - "eval_wer": 0.36727840067696216, - "step": 103000 - }, - { - "epoch": 3.9606612582274607, - "grad_norm": 9.209879875183105, - "learning_rate": 0.00014516691729323306, - "loss": 0.2671, - "step": 103500 - }, - { - "epoch": 3.97979488749426, - "grad_norm": 0.8380705714225769, - "learning_rate": 0.00014441654135338345, - "loss": 0.2602, - "step": 104000 - }, - { - "epoch": 3.97979488749426, - "eval_loss": Infinity, - "eval_runtime": 177.3769, - "eval_samples_per_second": 39.588, - "eval_steps_per_second": 4.95, - "eval_wer": 0.36286228051618363, - "step": 104000 - }, - { - "epoch": 3.998928516761059, - "grad_norm": 0.6727402210235596, - "learning_rate": 0.00014366616541353384, - "loss": 0.2579, - "step": 104500 - }, - { - "epoch": 4.018062146027859, - "grad_norm": 0.38106000423431396, - "learning_rate": 0.0001429142857142857, - "loss": 0.2217, - "step": 105000 - }, - { - "epoch": 4.018062146027859, - "eval_loss": Infinity, - "eval_runtime": 178.3027, - "eval_samples_per_second": 39.382, - "eval_steps_per_second": 4.924, - "eval_wer": 0.3644621324307172, - "step": 105000 - }, - { - "epoch": 4.0371957752946575, - "grad_norm": 3.7064096927642822, - "learning_rate": 0.0001421624060150376, - "loss": 0.2202, - "step": 105500 - }, - { - "epoch": 4.056329404561457, - "grad_norm": 0.49550744891166687, - "learning_rate": 0.00014141203007518797, - "loss": 0.2226, - "step": 106000 - }, - { - "epoch": 4.056329404561457, - "eval_loss": Infinity, - "eval_runtime": 177.9052, - "eval_samples_per_second": 39.47, - "eval_steps_per_second": 4.935, - "eval_wer": 0.3568859741908187, - "step": 106000 - }, - { - "epoch": 4.075463033828257, - "grad_norm": 0.628818690776825, - "learning_rate": 0.00014066015037593983, - "loss": 0.2226, - "step": 106500 - }, - { - "epoch": 4.094596663095055, - "grad_norm": 0.48665696382522583, - "learning_rate": 0.0001399082706766917, - "loss": 0.2209, - "step": 107000 - }, - { - "epoch": 4.094596663095055, - "eval_loss": Infinity, - "eval_runtime": 177.9704, - "eval_samples_per_second": 39.456, - "eval_steps_per_second": 4.933, - "eval_wer": 0.35495557436005926, - "step": 107000 - }, - { - "epoch": 4.113730292361855, - "grad_norm": 1.1963294744491577, - "learning_rate": 0.0001391563909774436, - "loss": 0.2197, - "step": 107500 - }, - { - "epoch": 4.132863921628655, - "grad_norm": 0.4918075203895569, - "learning_rate": 0.00013840451127819547, - "loss": 0.2326, - "step": 108000 - }, - { - "epoch": 4.132863921628655, - "eval_loss": Infinity, - "eval_runtime": 177.4603, - "eval_samples_per_second": 39.569, - "eval_steps_per_second": 4.948, - "eval_wer": 0.3595303575206262, - "step": 108000 - }, - { - "epoch": 4.151997550895453, - "grad_norm": 0.4551312029361725, - "learning_rate": 0.00013765263157894736, - "loss": 0.2176, - "step": 108500 - }, - { - "epoch": 4.171131180162253, - "grad_norm": 0.5786845088005066, - "learning_rate": 0.00013690075187969923, - "loss": 0.2203, - "step": 109000 - }, - { - "epoch": 4.171131180162253, - "eval_loss": Infinity, - "eval_runtime": 178.5842, - "eval_samples_per_second": 39.32, - "eval_steps_per_second": 4.916, - "eval_wer": 0.3556166701925111, - "step": 109000 - }, - { - "epoch": 4.190264809429053, - "grad_norm": 0.9616146087646484, - "learning_rate": 0.00013614887218045112, - "loss": 0.2292, - "step": 109500 - }, - { - "epoch": 4.209398438695851, - "grad_norm": 0.5730082392692566, - "learning_rate": 0.000135396992481203, - "loss": 0.2267, - "step": 110000 - }, - { - "epoch": 4.209398438695851, - "eval_loss": Infinity, - "eval_runtime": 179.0437, - "eval_samples_per_second": 39.219, - "eval_steps_per_second": 4.904, - "eval_wer": 0.35085678019885763, - "step": 110000 - }, - { - "epoch": 4.228532067962651, - "grad_norm": 1.2517842054367065, - "learning_rate": 0.00013464661654135338, - "loss": 0.2262, - "step": 110500 - }, - { - "epoch": 4.247665697229451, - "grad_norm": 0.894205629825592, - "learning_rate": 0.00013389473684210525, - "loss": 0.223, - "step": 111000 - }, - { - "epoch": 4.247665697229451, - "eval_loss": Infinity, - "eval_runtime": 178.4171, - "eval_samples_per_second": 39.357, - "eval_steps_per_second": 4.921, - "eval_wer": 0.3580891686058811, - "step": 111000 - }, - { - "epoch": 4.266799326496249, - "grad_norm": 0.6541041731834412, - "learning_rate": 0.00013314285714285715, - "loss": 0.2256, - "step": 111500 - }, - { - "epoch": 4.285932955763049, - "grad_norm": 1.8858749866485596, - "learning_rate": 0.0001323924812030075, - "loss": 0.2273, - "step": 112000 - }, - { - "epoch": 4.285932955763049, - "eval_loss": Infinity, - "eval_runtime": 178.7774, - "eval_samples_per_second": 39.278, - "eval_steps_per_second": 4.911, - "eval_wer": 0.35478368944362176, - "step": 112000 - }, - { - "epoch": 4.305066585029849, - "grad_norm": 0.36083123087882996, - "learning_rate": 0.00013164060150375938, - "loss": 0.2186, - "step": 112500 - }, - { - "epoch": 4.324200214296647, - "grad_norm": 0.8509350419044495, - "learning_rate": 0.00013088872180451125, - "loss": 0.2278, - "step": 113000 - }, - { - "epoch": 4.324200214296647, - "eval_loss": Infinity, - "eval_runtime": 179.5764, - "eval_samples_per_second": 39.103, - "eval_steps_per_second": 4.889, - "eval_wer": 0.34927015020097313, - "step": 113000 - }, - { - "epoch": 4.343333843563447, - "grad_norm": 2.493048667907715, - "learning_rate": 0.00013013684210526314, - "loss": 0.2283, - "step": 113500 - }, - { - "epoch": 4.362467472830247, - "grad_norm": 0.5552091598510742, - "learning_rate": 0.000129384962406015, - "loss": 0.2372, - "step": 114000 - }, - { - "epoch": 4.362467472830247, - "eval_loss": Infinity, - "eval_runtime": 179.2629, - "eval_samples_per_second": 39.172, - "eval_steps_per_second": 4.898, - "eval_wer": 0.3600592341865877, - "step": 114000 - }, - { - "epoch": 4.381601102097045, - "grad_norm": 2.732189178466797, - "learning_rate": 0.0001286330827067669, - "loss": 0.2209, - "step": 114500 - }, - { - "epoch": 4.400734731363845, - "grad_norm": 0.6724231839179993, - "learning_rate": 0.00012788120300751877, - "loss": 0.22, - "step": 115000 - }, - { - "epoch": 4.400734731363845, - "eval_loss": Infinity, - "eval_runtime": 178.4303, - "eval_samples_per_second": 39.354, - "eval_steps_per_second": 4.921, - "eval_wer": 0.3549026866934631, - "step": 115000 - }, - { - "epoch": 4.419868360630645, - "grad_norm": 0.8096573948860168, - "learning_rate": 0.00012712932330827067, - "loss": 0.2166, - "step": 115500 - }, - { - "epoch": 4.439001989897443, - "grad_norm": 0.32577428221702576, - "learning_rate": 0.00012637744360902254, - "loss": 0.228, - "step": 116000 - }, - { - "epoch": 4.439001989897443, - "eval_loss": Infinity, - "eval_runtime": 178.1414, - "eval_samples_per_second": 39.418, - "eval_steps_per_second": 4.929, - "eval_wer": 0.34994446795007406, - "step": 116000 - }, - { - "epoch": 4.458135619164243, - "grad_norm": 0.36173292994499207, - "learning_rate": 0.00012562556390977443, - "loss": 0.225, - "step": 116500 - }, - { - "epoch": 4.477269248431043, - "grad_norm": 0.7031286358833313, - "learning_rate": 0.0001248736842105263, - "loss": 0.2291, - "step": 117000 - }, - { - "epoch": 4.477269248431043, - "eval_loss": Infinity, - "eval_runtime": 179.2214, - "eval_samples_per_second": 39.181, - "eval_steps_per_second": 4.899, - "eval_wer": 0.3485429447852761, - "step": 117000 - }, - { - "epoch": 4.496402877697841, - "grad_norm": 0.8883704543113708, - "learning_rate": 0.00012412180451127817, - "loss": 0.2254, - "step": 117500 - }, - { - "epoch": 4.515536506964641, - "grad_norm": 0.7868921160697937, - "learning_rate": 0.00012336992481203006, - "loss": 0.2301, - "step": 118000 - }, - { - "epoch": 4.515536506964641, - "eval_loss": Infinity, - "eval_runtime": 180.1835, - "eval_samples_per_second": 38.971, - "eval_steps_per_second": 4.873, - "eval_wer": 0.3487941612016078, - "step": 118000 - }, - { - "epoch": 4.5346701362314406, - "grad_norm": 0.8620243072509766, - "learning_rate": 0.00012261804511278193, - "loss": 0.2207, - "step": 118500 - }, - { - "epoch": 4.553803765498239, - "grad_norm": 1.267608642578125, - "learning_rate": 0.00012186616541353381, - "loss": 0.2084, - "step": 119000 - }, - { - "epoch": 4.553803765498239, - "eval_loss": Infinity, - "eval_runtime": 178.8973, - "eval_samples_per_second": 39.252, - "eval_steps_per_second": 4.908, - "eval_wer": 0.3515046541146605, - "step": 119000 - }, - { - "epoch": 4.572937394765039, - "grad_norm": 0.8290882706642151, - "learning_rate": 0.00012111428571428569, - "loss": 0.2121, - "step": 119500 - }, - { - "epoch": 4.5920710240318385, - "grad_norm": 0.5240318775177002, - "learning_rate": 0.00012036541353383458, - "loss": 0.2251, - "step": 120000 - }, - { - "epoch": 4.5920710240318385, - "eval_loss": Infinity, - "eval_runtime": 179.3697, - "eval_samples_per_second": 39.148, - "eval_steps_per_second": 4.895, - "eval_wer": 0.3509228897821028, - "step": 120000 - }, - { - "epoch": 4.611204653298637, - "grad_norm": 0.79433274269104, - "learning_rate": 0.00011961353383458646, - "loss": 0.2152, - "step": 120500 - }, - { - "epoch": 4.630338282565437, - "grad_norm": 0.5738509893417358, - "learning_rate": 0.00011886165413533834, - "loss": 0.2205, - "step": 121000 - }, - { - "epoch": 4.630338282565437, - "eval_loss": Infinity, - "eval_runtime": 180.0051, - "eval_samples_per_second": 39.01, - "eval_steps_per_second": 4.878, - "eval_wer": 0.34464247937381004, - "step": 121000 - }, - { - "epoch": 4.6494719118322365, - "grad_norm": 0.6107327938079834, - "learning_rate": 0.00011810977443609022, - "loss": 0.2153, - "step": 121500 - }, - { - "epoch": 4.668605541099035, - "grad_norm": 0.5332146286964417, - "learning_rate": 0.0001173578947368421, - "loss": 0.2174, - "step": 122000 - }, - { - "epoch": 4.668605541099035, - "eval_loss": Infinity, - "eval_runtime": 179.5227, - "eval_samples_per_second": 39.115, - "eval_steps_per_second": 4.891, - "eval_wer": 0.3458985614554686, - "step": 122000 - }, - { - "epoch": 4.687739170365835, - "grad_norm": 1.588100790977478, - "learning_rate": 0.00011660601503759397, - "loss": 0.2132, - "step": 122500 - }, - { - "epoch": 4.7068727996326345, - "grad_norm": 2.00449275970459, - "learning_rate": 0.00011585413533834586, - "loss": 0.2136, - "step": 123000 - }, - { - "epoch": 4.7068727996326345, - "eval_loss": Infinity, - "eval_runtime": 179.5882, - "eval_samples_per_second": 39.101, - "eval_steps_per_second": 4.889, - "eval_wer": 0.3498651364501798, - "step": 123000 - }, - { - "epoch": 4.726006428899433, - "grad_norm": 0.7837647795677185, - "learning_rate": 0.00011510225563909774, - "loss": 0.225, - "step": 123500 - }, - { - "epoch": 4.745140058166233, - "grad_norm": 1.3031939268112183, - "learning_rate": 0.00011435037593984962, - "loss": 0.2142, - "step": 124000 - }, - { - "epoch": 4.745140058166233, - "eval_loss": Infinity, - "eval_runtime": 179.8141, - "eval_samples_per_second": 39.051, - "eval_steps_per_second": 4.883, - "eval_wer": 0.3449201396234398, - "step": 124000 - }, - { - "epoch": 4.7642736874330325, - "grad_norm": 0.5795506834983826, - "learning_rate": 0.00011359999999999998, - "loss": 0.2155, - "step": 124500 - }, - { - "epoch": 4.783407316699831, - "grad_norm": 0.7235686182975769, - "learning_rate": 0.00011284812030075186, - "loss": 0.2152, - "step": 125000 - }, - { - "epoch": 4.783407316699831, - "eval_loss": Infinity, - "eval_runtime": 179.9818, - "eval_samples_per_second": 39.015, - "eval_steps_per_second": 4.878, - "eval_wer": 0.34659932303786756, - "step": 125000 - }, - { - "epoch": 4.802540945966631, - "grad_norm": 0.4587650001049042, - "learning_rate": 0.00011209774436090224, - "loss": 0.2081, - "step": 125500 - }, - { - "epoch": 4.8216745752334305, - "grad_norm": 1.3301700353622437, - "learning_rate": 0.00011134736842105263, - "loss": 0.2216, - "step": 126000 - }, - { - "epoch": 4.8216745752334305, - "eval_loss": Infinity, - "eval_runtime": 179.2673, - "eval_samples_per_second": 39.171, - "eval_steps_per_second": 4.898, - "eval_wer": 0.34429870954093506, - "step": 126000 - }, - { - "epoch": 4.840808204500229, - "grad_norm": 1.0340607166290283, - "learning_rate": 0.00011059548872180452, - "loss": 0.2152, - "step": 126500 - }, - { - "epoch": 4.859941833767029, - "grad_norm": 0.7466903328895569, - "learning_rate": 0.00010984360902255638, - "loss": 0.2209, - "step": 127000 - }, - { - "epoch": 4.859941833767029, - "eval_loss": Infinity, - "eval_runtime": 179.7521, - "eval_samples_per_second": 39.065, - "eval_steps_per_second": 4.885, - "eval_wer": 0.3455415697059446, - "step": 127000 - }, - { - "epoch": 4.879075463033828, - "grad_norm": 0.451224148273468, - "learning_rate": 0.00010909323308270676, - "loss": 0.2068, - "step": 127500 - }, - { - "epoch": 4.898209092300627, - "grad_norm": 0.9599905610084534, - "learning_rate": 0.00010834135338345863, - "loss": 0.2183, - "step": 128000 - }, - { - "epoch": 4.898209092300627, - "eval_loss": Infinity, - "eval_runtime": 175.3325, - "eval_samples_per_second": 40.05, - "eval_steps_per_second": 5.008, - "eval_wer": 0.340398244129469, - "step": 128000 - }, - { - "epoch": 4.917342721567427, - "grad_norm": 1.071007251739502, - "learning_rate": 0.00010758947368421051, - "loss": 0.2115, - "step": 128500 - }, - { - "epoch": 4.936476350834226, - "grad_norm": 0.9002227187156677, - "learning_rate": 0.00010683759398496239, - "loss": 0.2174, - "step": 129000 - }, - { - "epoch": 4.936476350834226, - "eval_loss": Infinity, - "eval_runtime": 175.3184, - "eval_samples_per_second": 40.053, - "eval_steps_per_second": 5.008, - "eval_wer": 0.3402924687962767, - "step": 129000 - }, - { - "epoch": 4.955609980101025, - "grad_norm": 3.2121989727020264, - "learning_rate": 0.00010608571428571427, - "loss": 0.2139, - "step": 129500 - }, - { - "epoch": 4.974743609367825, - "grad_norm": 0.6666644811630249, - "learning_rate": 0.00010533383458646616, - "loss": 0.2165, - "step": 130000 - }, - { - "epoch": 4.974743609367825, - "eval_loss": Infinity, - "eval_runtime": 176.6287, - "eval_samples_per_second": 39.756, - "eval_steps_per_second": 4.971, - "eval_wer": 0.3419848741273535, - "step": 130000 - }, - { - "epoch": 4.993877238634624, - "grad_norm": 1.018226981163025, - "learning_rate": 0.00010458195488721804, - "loss": 0.2075, - "step": 130500 - }, - { - "epoch": 5.013010867901423, - "grad_norm": 0.9753539562225342, - "learning_rate": 0.00010383007518796992, - "loss": 0.1806, - "step": 131000 - }, - { - "epoch": 5.013010867901423, - "eval_loss": Infinity, - "eval_runtime": 177.4567, - "eval_samples_per_second": 39.57, - "eval_steps_per_second": 4.948, - "eval_wer": 0.3380579648825894, - "step": 131000 - }, - { - "epoch": 5.032144497168223, - "grad_norm": 0.6430408954620361, - "learning_rate": 0.0001030781954887218, - "loss": 0.1788, - "step": 131500 - }, - { - "epoch": 5.051278126435022, - "grad_norm": 0.5756456255912781, - "learning_rate": 0.00010232631578947367, - "loss": 0.1821, - "step": 132000 - }, - { - "epoch": 5.051278126435022, - "eval_loss": Infinity, - "eval_runtime": 178.4262, - "eval_samples_per_second": 39.355, - "eval_steps_per_second": 4.921, - "eval_wer": 0.3426459699598054, - "step": 132000 - }, - { - "epoch": 5.070411755701821, - "grad_norm": 0.7271620035171509, - "learning_rate": 0.00010157443609022555, - "loss": 0.1915, - "step": 132500 - }, - { - "epoch": 5.089545384968621, - "grad_norm": 0.8460062146186829, - "learning_rate": 0.00010082255639097743, - "loss": 0.1825, - "step": 133000 - }, - { - "epoch": 5.089545384968621, - "eval_loss": Infinity, - "eval_runtime": 178.6095, - "eval_samples_per_second": 39.315, - "eval_steps_per_second": 4.916, - "eval_wer": 0.3399619208800508, - "step": 133000 - }, - { - "epoch": 5.10867901423542, - "grad_norm": 0.43308231234550476, - "learning_rate": 0.00010007067669172931, - "loss": 0.179, - "step": 133500 - }, - { - "epoch": 5.127812643502219, - "grad_norm": 0.5365935564041138, - "learning_rate": 9.932030075187969e-05, - "loss": 0.1876, - "step": 134000 - }, - { - "epoch": 5.127812643502219, - "eval_loss": Infinity, - "eval_runtime": 178.192, - "eval_samples_per_second": 39.407, - "eval_steps_per_second": 4.927, - "eval_wer": 0.3381240744658346, - "step": 134000 - }, - { - "epoch": 5.146946272769019, - "grad_norm": 0.35783401131629944, - "learning_rate": 9.856842105263157e-05, - "loss": 0.1794, - "step": 134500 - }, - { - "epoch": 5.166079902035818, - "grad_norm": 1.0039490461349487, - "learning_rate": 9.781654135338345e-05, - "loss": 0.1858, - "step": 135000 - }, - { - "epoch": 5.166079902035818, - "eval_loss": Infinity, - "eval_runtime": 181.1178, - "eval_samples_per_second": 38.77, - "eval_steps_per_second": 4.848, - "eval_wer": 0.3341707213877724, - "step": 135000 - }, - { - "epoch": 5.185213531302617, - "grad_norm": 0.5719444751739502, - "learning_rate": 9.706466165413533e-05, - "loss": 0.1733, - "step": 135500 - }, - { - "epoch": 5.204347160569417, - "grad_norm": 1.0103236436843872, - "learning_rate": 9.631278195488722e-05, - "loss": 0.1729, - "step": 136000 - }, - { - "epoch": 5.204347160569417, - "eval_loss": Infinity, - "eval_runtime": 180.3304, - "eval_samples_per_second": 38.94, - "eval_steps_per_second": 4.869, - "eval_wer": 0.3325179818066427, - "step": 136000 - }, - { - "epoch": 5.223480789836216, - "grad_norm": 0.34207335114479065, - "learning_rate": 9.55609022556391e-05, - "loss": 0.1722, - "step": 136500 - }, - { - "epoch": 5.242614419103015, - "grad_norm": 1.3274930715560913, - "learning_rate": 9.480902255639098e-05, - "loss": 0.1843, - "step": 137000 - }, - { - "epoch": 5.242614419103015, - "eval_loss": Infinity, - "eval_runtime": 180.6878, - "eval_samples_per_second": 38.863, - "eval_steps_per_second": 4.859, - "eval_wer": 0.3313808969748255, - "step": 137000 - }, - { - "epoch": 5.261748048369815, - "grad_norm": 2.946866989135742, - "learning_rate": 9.405714285714285e-05, - "loss": 0.1733, - "step": 137500 - }, - { - "epoch": 5.280881677636614, - "grad_norm": 1.175057291984558, - "learning_rate": 9.330526315789473e-05, - "loss": 0.1828, - "step": 138000 - }, - { - "epoch": 5.280881677636614, - "eval_loss": Infinity, - "eval_runtime": 179.9842, - "eval_samples_per_second": 39.015, - "eval_steps_per_second": 4.878, - "eval_wer": 0.33381372963824835, - "step": 138000 - }, - { - "epoch": 5.300015306903413, - "grad_norm": 0.5658883452415466, - "learning_rate": 9.255338345864661e-05, - "loss": 0.1905, - "step": 138500 - }, - { - "epoch": 5.319148936170213, - "grad_norm": 2.0087709426879883, - "learning_rate": 9.180150375939849e-05, - "loss": 0.1878, - "step": 139000 - }, - { - "epoch": 5.319148936170213, - "eval_loss": Infinity, - "eval_runtime": 179.94, - "eval_samples_per_second": 39.024, - "eval_steps_per_second": 4.879, - "eval_wer": 0.3299397080600804, - "step": 139000 - }, - { - "epoch": 5.338282565437012, - "grad_norm": 0.7439378499984741, - "learning_rate": 9.104962406015037e-05, - "loss": 0.1756, - "step": 139500 - }, - { - "epoch": 5.357416194703811, - "grad_norm": 0.6208277344703674, - "learning_rate": 9.030075187969923e-05, - "loss": 0.1784, - "step": 140000 - }, - { - "epoch": 5.357416194703811, - "eval_loss": Infinity, - "eval_runtime": 178.2886, - "eval_samples_per_second": 39.386, - "eval_steps_per_second": 4.925, - "eval_wer": 0.33048180664269095, - "step": 140000 - }, - { - "epoch": 5.376549823970611, - "grad_norm": 1.9707947969436646, - "learning_rate": 8.954887218045112e-05, - "loss": 0.1907, - "step": 140500 - }, - { - "epoch": 5.39568345323741, - "grad_norm": 0.6385311484336853, - "learning_rate": 8.8796992481203e-05, - "loss": 0.1791, - "step": 141000 - }, - { - "epoch": 5.39568345323741, - "eval_loss": Infinity, - "eval_runtime": 177.8472, - "eval_samples_per_second": 39.483, - "eval_steps_per_second": 4.937, - "eval_wer": 0.3262904590649461, - "step": 141000 - }, - { - "epoch": 5.414817082504209, - "grad_norm": 0.44527003169059753, - "learning_rate": 8.804511278195488e-05, - "loss": 0.1785, - "step": 141500 - }, - { - "epoch": 5.433950711771009, - "grad_norm": 0.820563793182373, - "learning_rate": 8.729323308270676e-05, - "loss": 0.1861, - "step": 142000 - }, - { - "epoch": 5.433950711771009, - "eval_loss": Infinity, - "eval_runtime": 182.6414, - "eval_samples_per_second": 38.447, - "eval_steps_per_second": 4.807, - "eval_wer": 0.3237518510683309, - "step": 142000 - }, - { - "epoch": 5.453084341037808, - "grad_norm": 1.075088381767273, - "learning_rate": 8.654285714285714e-05, - "loss": 0.1878, - "step": 142500 - }, - { - "epoch": 5.472217970304607, - "grad_norm": 1.048279047012329, - "learning_rate": 8.579097744360902e-05, - "loss": 0.176, - "step": 143000 - }, - { - "epoch": 5.472217970304607, - "eval_loss": Infinity, - "eval_runtime": 180.7657, - "eval_samples_per_second": 38.846, - "eval_steps_per_second": 4.857, - "eval_wer": 0.3245319441506241, - "step": 143000 - }, - { - "epoch": 5.491351599571407, - "grad_norm": 0.9499515295028687, - "learning_rate": 8.504060150375938e-05, - "loss": 0.1724, - "step": 143500 - }, - { - "epoch": 5.510485228838206, - "grad_norm": 0.6625120639801025, - "learning_rate": 8.428872180451127e-05, - "loss": 0.1821, - "step": 144000 - }, - { - "epoch": 5.510485228838206, - "eval_loss": Infinity, - "eval_runtime": 180.84, - "eval_samples_per_second": 38.83, - "eval_steps_per_second": 4.855, - "eval_wer": 0.32155701290459066, - "step": 144000 - }, - { - "epoch": 5.529618858105005, - "grad_norm": 1.221817135810852, - "learning_rate": 8.353834586466164e-05, - "loss": 0.1787, - "step": 144500 - }, - { - "epoch": 5.548752487371805, - "grad_norm": 1.4700016975402832, - "learning_rate": 8.278646616541352e-05, - "loss": 0.176, - "step": 145000 - }, - { - "epoch": 5.548752487371805, - "eval_loss": Infinity, - "eval_runtime": 181.0479, - "eval_samples_per_second": 38.785, - "eval_steps_per_second": 4.85, - "eval_wer": 0.324505500317326, - "step": 145000 - }, - { - "epoch": 5.567886116638604, - "grad_norm": 0.4790880084037781, - "learning_rate": 8.20345864661654e-05, - "loss": 0.1681, - "step": 145500 - }, - { - "epoch": 5.587019745905403, - "grad_norm": 0.749213457107544, - "learning_rate": 8.128270676691729e-05, - "loss": 0.1799, - "step": 146000 - }, - { - "epoch": 5.587019745905403, - "eval_loss": Infinity, - "eval_runtime": 180.4888, - "eval_samples_per_second": 38.905, - "eval_steps_per_second": 4.865, - "eval_wer": 0.32506082081658555, - "step": 146000 - }, - { - "epoch": 5.606153375172203, - "grad_norm": 0.5427069067955017, - "learning_rate": 8.053082706766917e-05, - "loss": 0.1714, - "step": 146500 - }, - { - "epoch": 5.625287004439002, - "grad_norm": 0.53640216588974, - "learning_rate": 7.977894736842105e-05, - "loss": 0.1696, - "step": 147000 - }, - { - "epoch": 5.625287004439002, - "eval_loss": Infinity, - "eval_runtime": 180.603, - "eval_samples_per_second": 38.881, - "eval_steps_per_second": 4.861, - "eval_wer": 0.32224455257034057, - "step": 147000 - }, - { - "epoch": 5.644420633705801, - "grad_norm": 2.7623894214630127, - "learning_rate": 7.902706766917293e-05, - "loss": 0.1766, - "step": 147500 - }, - { - "epoch": 5.663554262972601, - "grad_norm": 0.9681125283241272, - "learning_rate": 7.827518796992481e-05, - "loss": 0.1711, - "step": 148000 - }, - { - "epoch": 5.663554262972601, - "eval_loss": Infinity, - "eval_runtime": 180.9103, - "eval_samples_per_second": 38.815, - "eval_steps_per_second": 4.853, - "eval_wer": 0.3242939496509414, - "step": 148000 - }, - { - "epoch": 5.6826878922394, - "grad_norm": 0.8594058752059937, - "learning_rate": 7.752330827067668e-05, - "loss": 0.1798, - "step": 148500 - }, - { - "epoch": 5.701821521506199, - "grad_norm": 0.9855976104736328, - "learning_rate": 7.677293233082705e-05, - "loss": 0.1794, - "step": 149000 - }, - { - "epoch": 5.701821521506199, - "eval_loss": Infinity, - "eval_runtime": 181.7415, - "eval_samples_per_second": 38.637, - "eval_steps_per_second": 4.831, - "eval_wer": 0.3212264649883647, - "step": 149000 - }, - { - "epoch": 5.7209551507729985, - "grad_norm": 0.5901813507080078, - "learning_rate": 7.602255639097744e-05, - "loss": 0.1691, - "step": 149500 - }, - { - "epoch": 5.740088780039798, - "grad_norm": 1.9479256868362427, - "learning_rate": 7.527067669172932e-05, - "loss": 0.1806, - "step": 150000 - }, - { - "epoch": 5.740088780039798, - "eval_loss": Infinity, - "eval_runtime": 180.0383, - "eval_samples_per_second": 39.003, - "eval_steps_per_second": 4.877, - "eval_wer": 0.32014226782314364, - "step": 150000 - }, - { - "epoch": 5.759222409306597, - "grad_norm": 5.095980167388916, - "learning_rate": 7.45187969924812e-05, - "loss": 0.1802, - "step": 150500 - }, - { - "epoch": 5.7783560385733965, - "grad_norm": 1.2752444744110107, - "learning_rate": 7.376691729323307e-05, - "loss": 0.1736, - "step": 151000 - }, - { - "epoch": 5.7783560385733965, - "eval_loss": Infinity, - "eval_runtime": 178.7009, - "eval_samples_per_second": 39.295, - "eval_steps_per_second": 4.913, - "eval_wer": 0.3235799661518934, - "step": 151000 - }, - { - "epoch": 5.797489667840196, - "grad_norm": 0.6796151995658875, - "learning_rate": 7.301503759398495e-05, - "loss": 0.1707, - "step": 151500 - }, - { - "epoch": 5.816623297106995, - "grad_norm": 0.8652954697608948, - "learning_rate": 7.226466165413533e-05, - "loss": 0.1664, - "step": 152000 - }, - { - "epoch": 5.816623297106995, - "eval_loss": Infinity, - "eval_runtime": 180.4487, - "eval_samples_per_second": 38.914, - "eval_steps_per_second": 4.866, - "eval_wer": 0.3222313306536916, - "step": 152000 - }, - { - "epoch": 5.8357569263737945, - "grad_norm": 0.5811170935630798, - "learning_rate": 7.151278195488721e-05, - "loss": 0.1712, - "step": 152500 - }, - { - "epoch": 5.854890555640594, - "grad_norm": 0.414420485496521, - "learning_rate": 7.076090225563909e-05, - "loss": 0.1704, - "step": 153000 - }, - { - "epoch": 5.854890555640594, - "eval_loss": Infinity, - "eval_runtime": 181.9454, - "eval_samples_per_second": 38.594, - "eval_steps_per_second": 4.826, - "eval_wer": 0.3200232705733023, - "step": 153000 - }, - { - "epoch": 5.874024184907393, - "grad_norm": 0.7044617533683777, - "learning_rate": 7.000902255639097e-05, - "loss": 0.1797, - "step": 153500 - }, - { - "epoch": 5.8931578141741925, - "grad_norm": 0.6984072327613831, - "learning_rate": 6.925714285714284e-05, - "loss": 0.1713, - "step": 154000 - }, - { - "epoch": 5.8931578141741925, - "eval_loss": Infinity, - "eval_runtime": 181.4266, - "eval_samples_per_second": 38.704, - "eval_steps_per_second": 4.839, - "eval_wer": 0.33001903955997464, - "step": 154000 - }, - { - "epoch": 5.912291443440992, - "grad_norm": 1.7558343410491943, - "learning_rate": 6.850526315789472e-05, - "loss": 0.1718, - "step": 154500 - }, - { - "epoch": 5.931425072707791, - "grad_norm": 0.5357454419136047, - "learning_rate": 6.77533834586466e-05, - "loss": 0.1701, - "step": 155000 - }, - { - "epoch": 5.931425072707791, - "eval_loss": Infinity, - "eval_runtime": 184.6223, - "eval_samples_per_second": 38.034, - "eval_steps_per_second": 4.756, - "eval_wer": 0.3172202242437064, - "step": 155000 - }, - { - "epoch": 5.9505587019745905, - "grad_norm": 0.6187770962715149, - "learning_rate": 6.700150375939849e-05, - "loss": 0.1684, - "step": 155500 - }, - { - "epoch": 5.96969233124139, - "grad_norm": 0.4420112669467926, - "learning_rate": 6.624962406015037e-05, - "loss": 0.1687, - "step": 156000 - }, - { - "epoch": 5.96969233124139, - "eval_loss": Infinity, - "eval_runtime": 182.0611, - "eval_samples_per_second": 38.569, - "eval_steps_per_second": 4.823, - "eval_wer": 0.31862174740850435, - "step": 156000 - }, - { - "epoch": 5.988825960508189, - "grad_norm": 2.3220465183258057, - "learning_rate": 6.550075187969924e-05, - "loss": 0.1657, - "step": 156500 - }, - { - "epoch": 6.0079595897749885, - "grad_norm": 1.0167362689971924, - "learning_rate": 6.475037593984962e-05, - "loss": 0.1543, - "step": 157000 - }, - { - "epoch": 6.0079595897749885, - "eval_loss": Infinity, - "eval_runtime": 181.8294, - "eval_samples_per_second": 38.619, - "eval_steps_per_second": 4.829, - "eval_wer": 0.31407340808123546, - "step": 157000 - }, - { - "epoch": 6.027093219041788, - "grad_norm": 0.2879861295223236, - "learning_rate": 6.4e-05, - "loss": 0.1419, - "step": 157500 - }, - { - "epoch": 6.046226848308587, - "grad_norm": 0.5147427916526794, - "learning_rate": 6.324812030075188e-05, - "loss": 0.142, - "step": 158000 - }, - { - "epoch": 6.046226848308587, - "eval_loss": Infinity, - "eval_runtime": 181.1027, - "eval_samples_per_second": 38.774, - "eval_steps_per_second": 4.848, - "eval_wer": 0.3165591284112545, - "step": 158000 - }, - { - "epoch": 6.065360477575386, - "grad_norm": 0.43559426069259644, - "learning_rate": 6.249624060150375e-05, - "loss": 0.1399, - "step": 158500 - }, - { - "epoch": 6.084494106842186, - "grad_norm": 0.38178640604019165, - "learning_rate": 6.174436090225563e-05, - "loss": 0.1438, - "step": 159000 - }, - { - "epoch": 6.084494106842186, - "eval_loss": Infinity, - "eval_runtime": 181.0147, - "eval_samples_per_second": 38.792, - "eval_steps_per_second": 4.85, - "eval_wer": 0.31562037232917284, - "step": 159000 - }, - { - "epoch": 6.103627736108985, - "grad_norm": 0.42758727073669434, - "learning_rate": 6.099248120300751e-05, - "loss": 0.144, - "step": 159500 - }, - { - "epoch": 6.122761365375784, - "grad_norm": 0.5155762434005737, - "learning_rate": 6.024060150375939e-05, - "loss": 0.1433, - "step": 160000 - }, - { - "epoch": 6.122761365375784, - "eval_loss": Infinity, - "eval_runtime": 183.6308, - "eval_samples_per_second": 38.24, - "eval_steps_per_second": 4.781, - "eval_wer": 0.31587158874550453, - "step": 160000 - }, - { - "epoch": 6.141894994642584, - "grad_norm": 0.6651669144630432, - "learning_rate": 5.949022556390977e-05, - "loss": 0.1426, - "step": 160500 - }, - { - "epoch": 6.161028623909383, - "grad_norm": 0.42425113916397095, - "learning_rate": 5.873834586466165e-05, - "loss": 0.1442, - "step": 161000 - }, - { - "epoch": 6.161028623909383, - "eval_loss": Infinity, - "eval_runtime": 181.9638, - "eval_samples_per_second": 38.59, - "eval_steps_per_second": 4.825, - "eval_wer": 0.3142849587476201, - "step": 161000 - }, - { - "epoch": 6.180162253176182, - "grad_norm": 1.366357684135437, - "learning_rate": 5.798646616541353e-05, - "loss": 0.1342, - "step": 161500 - }, - { - "epoch": 6.199295882442982, - "grad_norm": 0.7355407476425171, - "learning_rate": 5.7234586466165414e-05, - "loss": 0.1494, - "step": 162000 - }, - { - "epoch": 6.199295882442982, - "eval_loss": Infinity, - "eval_runtime": 182.1034, - "eval_samples_per_second": 38.561, - "eval_steps_per_second": 4.821, - "eval_wer": 0.3106621535857838, - "step": 162000 - }, - { - "epoch": 6.218429511709781, - "grad_norm": 0.8021041750907898, - "learning_rate": 5.648270676691729e-05, - "loss": 0.1449, - "step": 162500 - }, - { - "epoch": 6.23756314097658, - "grad_norm": 0.3070674240589142, - "learning_rate": 5.573082706766917e-05, - "loss": 0.1355, - "step": 163000 - }, - { - "epoch": 6.23756314097658, - "eval_loss": Infinity, - "eval_runtime": 181.6272, - "eval_samples_per_second": 38.662, - "eval_steps_per_second": 4.834, - "eval_wer": 0.31661201607785067, - "step": 163000 - }, - { - "epoch": 6.25669677024338, - "grad_norm": 0.3594122529029846, - "learning_rate": 5.498045112781954e-05, - "loss": 0.1399, - "step": 163500 - }, - { - "epoch": 6.275830399510179, - "grad_norm": 0.7340966463088989, - "learning_rate": 5.422857142857142e-05, - "loss": 0.1403, - "step": 164000 - }, - { - "epoch": 6.275830399510179, - "eval_loss": Infinity, - "eval_runtime": 182.6513, - "eval_samples_per_second": 38.445, - "eval_steps_per_second": 4.807, - "eval_wer": 0.31170668500105775, - "step": 164000 - }, - { - "epoch": 6.294964028776978, - "grad_norm": 0.49476948380470276, - "learning_rate": 5.3476691729323304e-05, - "loss": 0.1391, - "step": 164500 - }, - { - "epoch": 6.314097658043778, - "grad_norm": 0.7009222507476807, - "learning_rate": 5.2726315789473675e-05, - "loss": 0.1435, - "step": 165000 - }, - { - "epoch": 6.314097658043778, - "eval_loss": Infinity, - "eval_runtime": 182.5712, - "eval_samples_per_second": 38.462, - "eval_steps_per_second": 4.809, - "eval_wer": 0.3124206685001058, - "step": 165000 - }, - { - "epoch": 6.333231287310577, - "grad_norm": 1.6074929237365723, - "learning_rate": 5.197443609022556e-05, - "loss": 0.1369, - "step": 165500 - }, - { - "epoch": 6.352364916577376, - "grad_norm": 0.4530220031738281, - "learning_rate": 5.122255639097744e-05, - "loss": 0.1446, - "step": 166000 - }, - { - "epoch": 6.352364916577376, - "eval_loss": Infinity, - "eval_runtime": 182.3168, - "eval_samples_per_second": 38.515, - "eval_steps_per_second": 4.816, - "eval_wer": 0.31234133700021155, - "step": 166000 - }, - { - "epoch": 6.371498545844176, - "grad_norm": 0.5443539023399353, - "learning_rate": 5.047067669172932e-05, - "loss": 0.1481, - "step": 166500 - }, - { - "epoch": 6.390632175110975, - "grad_norm": 0.604567289352417, - "learning_rate": 4.9718796992481194e-05, - "loss": 0.1385, - "step": 167000 - }, - { - "epoch": 6.390632175110975, - "eval_loss": Infinity, - "eval_runtime": 180.452, - "eval_samples_per_second": 38.913, - "eval_steps_per_second": 4.866, - "eval_wer": 0.31403374233128833, - "step": 167000 - }, - { - "epoch": 6.409765804377774, - "grad_norm": 0.5584743022918701, - "learning_rate": 4.8966917293233076e-05, - "loss": 0.1451, - "step": 167500 - }, - { - "epoch": 6.428899433644574, - "grad_norm": 0.34049585461616516, - "learning_rate": 4.821503759398496e-05, - "loss": 0.1437, - "step": 168000 - }, - { - "epoch": 6.428899433644574, - "eval_loss": Infinity, - "eval_runtime": 180.8859, - "eval_samples_per_second": 38.82, - "eval_steps_per_second": 4.854, - "eval_wer": 0.31029193991961074, - "step": 168000 - }, - { - "epoch": 6.448033062911373, - "grad_norm": 1.0056949853897095, - "learning_rate": 4.746315789473684e-05, - "loss": 0.1453, - "step": 168500 - }, - { - "epoch": 6.467166692178172, - "grad_norm": 0.4812434911727905, - "learning_rate": 4.671278195488721e-05, - "loss": 0.1328, - "step": 169000 - }, - { - "epoch": 6.467166692178172, - "eval_loss": Infinity, - "eval_runtime": 181.778, - "eval_samples_per_second": 38.63, - "eval_steps_per_second": 4.83, - "eval_wer": 0.31021260841971654, - "step": 169000 - }, - { - "epoch": 6.486300321444972, - "grad_norm": 0.5090984106063843, - "learning_rate": 4.596090225563909e-05, - "loss": 0.1369, - "step": 169500 - }, - { - "epoch": 6.505433950711771, - "grad_norm": 3.274346113204956, - "learning_rate": 4.520902255639097e-05, - "loss": 0.1354, - "step": 170000 - }, - { - "epoch": 6.505433950711771, - "eval_loss": Infinity, - "eval_runtime": 182.3019, - "eval_samples_per_second": 38.519, - "eval_steps_per_second": 4.816, - "eval_wer": 0.31116458641844724, - "step": 170000 - }, - { - "epoch": 6.52456757997857, - "grad_norm": 0.6519914269447327, - "learning_rate": 4.445714285714285e-05, - "loss": 0.1405, - "step": 170500 - }, - { - "epoch": 6.54370120924537, - "grad_norm": 0.5463857650756836, - "learning_rate": 4.370526315789473e-05, - "loss": 0.1394, - "step": 171000 - }, - { - "epoch": 6.54370120924537, - "eval_loss": Infinity, - "eval_runtime": 181.4757, - "eval_samples_per_second": 38.694, - "eval_steps_per_second": 4.838, - "eval_wer": 0.3094192934207743, - "step": 171000 - }, - { - "epoch": 6.562834838512169, - "grad_norm": 0.43961018323898315, - "learning_rate": 4.295338345864661e-05, - "loss": 0.1424, - "step": 171500 - }, - { - "epoch": 6.581968467778968, - "grad_norm": 0.2494196593761444, - "learning_rate": 4.220150375939849e-05, - "loss": 0.1385, - "step": 172000 - }, - { - "epoch": 6.581968467778968, - "eval_loss": Infinity, - "eval_runtime": 181.1999, - "eval_samples_per_second": 38.753, - "eval_steps_per_second": 4.845, - "eval_wer": 0.30549238417601016, - "step": 172000 - }, - { - "epoch": 6.601102097045768, - "grad_norm": 3.2341201305389404, - "learning_rate": 4.144962406015037e-05, - "loss": 0.1444, - "step": 172500 - }, - { - "epoch": 6.620235726312567, - "grad_norm": 0.6074426770210266, - "learning_rate": 4.069774436090225e-05, - "loss": 0.138, - "step": 173000 - }, - { - "epoch": 6.620235726312567, - "eval_loss": Infinity, - "eval_runtime": 181.8045, - "eval_samples_per_second": 38.624, - "eval_steps_per_second": 4.829, - "eval_wer": 0.3054659403427121, - "step": 173000 - }, - { - "epoch": 6.639369355579366, - "grad_norm": 0.48304858803749084, - "learning_rate": 3.994736842105263e-05, - "loss": 0.1356, - "step": 173500 - }, - { - "epoch": 6.658502984846166, - "grad_norm": 0.3982817530632019, - "learning_rate": 3.919548872180451e-05, - "loss": 0.138, - "step": 174000 - }, - { - "epoch": 6.658502984846166, - "eval_loss": Infinity, - "eval_runtime": 183.7344, - "eval_samples_per_second": 38.218, - "eval_steps_per_second": 4.779, - "eval_wer": 0.3061138142585149, - "step": 174000 - }, - { - "epoch": 6.677636614112965, - "grad_norm": 0.46521154046058655, - "learning_rate": 3.844360902255639e-05, - "loss": 0.1293, - "step": 174500 - }, - { - "epoch": 6.696770243379764, - "grad_norm": 0.33037710189819336, - "learning_rate": 3.769172932330827e-05, - "loss": 0.1313, - "step": 175000 - }, - { - "epoch": 6.696770243379764, - "eval_loss": Infinity, - "eval_runtime": 181.4617, - "eval_samples_per_second": 38.697, - "eval_steps_per_second": 4.838, - "eval_wer": 0.3061005923418659, - "step": 175000 - }, - { - "epoch": 6.715903872646564, - "grad_norm": 0.47027432918548584, - "learning_rate": 3.6939849624060146e-05, - "loss": 0.1363, - "step": 175500 - }, - { - "epoch": 6.735037501913363, - "grad_norm": 0.7823716998100281, - "learning_rate": 3.618796992481203e-05, - "loss": 0.1427, - "step": 176000 - }, - { - "epoch": 6.735037501913363, - "eval_loss": Infinity, - "eval_runtime": 180.6528, - "eval_samples_per_second": 38.87, - "eval_steps_per_second": 4.86, - "eval_wer": 0.30834831817220226, - "step": 176000 - }, - { - "epoch": 6.754171131180162, - "grad_norm": 0.5896081924438477, - "learning_rate": 3.543609022556391e-05, - "loss": 0.1347, - "step": 176500 - }, - { - "epoch": 6.773304760446962, - "grad_norm": 0.7625430822372437, - "learning_rate": 3.468571428571429e-05, - "loss": 0.1432, - "step": 177000 - }, - { - "epoch": 6.773304760446962, - "eval_loss": Infinity, - "eval_runtime": 181.5521, - "eval_samples_per_second": 38.678, - "eval_steps_per_second": 4.836, - "eval_wer": 0.3047519568436641, - "step": 177000 - }, - { - "epoch": 6.792438389713761, - "grad_norm": 0.8567324280738831, - "learning_rate": 3.393383458646616e-05, - "loss": 0.1348, - "step": 177500 - }, - { - "epoch": 6.81157201898056, - "grad_norm": 1.5647565126419067, - "learning_rate": 3.318195488721804e-05, - "loss": 0.136, - "step": 178000 - }, - { - "epoch": 6.81157201898056, - "eval_loss": Infinity, - "eval_runtime": 181.3872, - "eval_samples_per_second": 38.713, - "eval_steps_per_second": 4.84, - "eval_wer": 0.3039454199280728, - "step": 178000 - }, - { - "epoch": 6.83070564824736, - "grad_norm": 0.3368758261203766, - "learning_rate": 3.2430075187969924e-05, - "loss": 0.1383, - "step": 178500 - }, - { - "epoch": 6.8498392775141586, - "grad_norm": 0.5614475011825562, - "learning_rate": 3.1678195488721806e-05, - "loss": 0.1424, - "step": 179000 - }, - { - "epoch": 6.8498392775141586, - "eval_loss": Infinity, - "eval_runtime": 181.4861, - "eval_samples_per_second": 38.692, - "eval_steps_per_second": 4.838, - "eval_wer": 0.3016448064311403, - "step": 179000 - }, - { - "epoch": 6.868972906780958, - "grad_norm": 0.6072395443916321, - "learning_rate": 3.092631578947368e-05, - "loss": 0.1284, - "step": 179500 - }, - { - "epoch": 6.888106536047758, - "grad_norm": 0.6235467195510864, - "learning_rate": 3.0174436090225562e-05, - "loss": 0.1347, - "step": 180000 - }, - { - "epoch": 6.888106536047758, - "eval_loss": Infinity, - "eval_runtime": 181.9343, - "eval_samples_per_second": 38.596, - "eval_steps_per_second": 4.826, - "eval_wer": 0.3038925322614766, - "step": 180000 - }, - { - "epoch": 6.9072401653145565, - "grad_norm": 4.727964401245117, - "learning_rate": 2.9422556390977444e-05, - "loss": 0.1327, - "step": 180500 - }, - { - "epoch": 6.926373794581356, - "grad_norm": 0.6982028484344482, - "learning_rate": 2.867067669172932e-05, - "loss": 0.1307, - "step": 181000 - }, - { - "epoch": 6.926373794581356, - "eval_loss": Infinity, - "eval_runtime": 180.8488, - "eval_samples_per_second": 38.828, - "eval_steps_per_second": 4.855, - "eval_wer": 0.3028876665961498, - "step": 181000 - }, - { - "epoch": 6.945507423848156, - "grad_norm": 0.2889564633369446, - "learning_rate": 2.7918796992481203e-05, - "loss": 0.1349, - "step": 181500 - }, - { - "epoch": 6.9646410531149545, - "grad_norm": 0.3712177872657776, - "learning_rate": 2.716691729323308e-05, - "loss": 0.1293, - "step": 182000 - }, - { - "epoch": 6.9646410531149545, - "eval_loss": Infinity, - "eval_runtime": 180.868, - "eval_samples_per_second": 38.824, - "eval_steps_per_second": 4.854, - "eval_wer": 0.30258356251322194, - "step": 182000 - }, - { - "epoch": 6.983774682381754, - "grad_norm": 0.7685525417327881, - "learning_rate": 2.6415037593984963e-05, - "loss": 0.1339, - "step": 182500 - }, - { - "epoch": 7.002908311648554, - "grad_norm": 0.19924980401992798, - "learning_rate": 2.566315789473684e-05, - "loss": 0.1259, - "step": 183000 - }, - { - "epoch": 7.002908311648554, - "eval_loss": Infinity, - "eval_runtime": 180.6261, - "eval_samples_per_second": 38.876, - "eval_steps_per_second": 4.861, - "eval_wer": 0.3025174529299767, - "step": 183000 - }, - { - "epoch": 7.0220419409153525, - "grad_norm": 0.3780413568019867, - "learning_rate": 2.4911278195488722e-05, - "loss": 0.1163, - "step": 183500 - }, - { - "epoch": 7.041175570182152, - "grad_norm": 0.5037872195243835, - "learning_rate": 2.4160902255639094e-05, - "loss": 0.1151, - "step": 184000 - }, - { - "epoch": 7.041175570182152, - "eval_loss": Infinity, - "eval_runtime": 180.4745, - "eval_samples_per_second": 38.909, - "eval_steps_per_second": 4.865, - "eval_wer": 0.3033900994288132, - "step": 184000 - }, - { - "epoch": 7.060309199448952, - "grad_norm": 0.2655356824398041, - "learning_rate": 2.3409022556390975e-05, - "loss": 0.1104, - "step": 184500 - }, - { - "epoch": 7.0794428287157505, - "grad_norm": 0.6396870613098145, - "learning_rate": 2.2657142857142853e-05, - "loss": 0.1143, - "step": 185000 - }, - { - "epoch": 7.0794428287157505, - "eval_loss": Infinity, - "eval_runtime": 180.1648, - "eval_samples_per_second": 38.975, - "eval_steps_per_second": 4.873, - "eval_wer": 0.30249100909667864, - "step": 185000 - }, - { - "epoch": 7.09857645798255, - "grad_norm": 1.1950030326843262, - "learning_rate": 2.1905263157894735e-05, - "loss": 0.1217, - "step": 185500 - }, - { - "epoch": 7.11771008724935, - "grad_norm": 0.6003520488739014, - "learning_rate": 2.1154887218045113e-05, - "loss": 0.1105, - "step": 186000 - }, - { - "epoch": 7.11771008724935, - "eval_loss": Infinity, - "eval_runtime": 179.583, - "eval_samples_per_second": 39.102, - "eval_steps_per_second": 4.889, - "eval_wer": 0.30058705309921724, - "step": 186000 - }, - { - "epoch": 7.1368437165161485, - "grad_norm": 0.5612542033195496, - "learning_rate": 2.040300751879699e-05, - "loss": 0.1124, - "step": 186500 - }, - { - "epoch": 7.155977345782948, - "grad_norm": 0.47781071066856384, - "learning_rate": 1.9651127819548872e-05, - "loss": 0.1126, - "step": 187000 - }, - { - "epoch": 7.155977345782948, - "eval_loss": Infinity, - "eval_runtime": 179.6562, - "eval_samples_per_second": 39.086, - "eval_steps_per_second": 4.887, - "eval_wer": 0.3006134969325153, - "step": 187000 - }, - { - "epoch": 7.175110975049748, - "grad_norm": 0.5884853601455688, - "learning_rate": 1.889924812030075e-05, - "loss": 0.1147, - "step": 187500 - }, - { - "epoch": 7.194244604316546, - "grad_norm": 0.269551157951355, - "learning_rate": 1.814736842105263e-05, - "loss": 0.1139, - "step": 188000 - }, - { - "epoch": 7.194244604316546, - "eval_loss": Infinity, - "eval_runtime": 179.5976, - "eval_samples_per_second": 39.099, - "eval_steps_per_second": 4.889, - "eval_wer": 0.2996482970171356, - "step": 188000 - }, - { - "epoch": 7.213378233583346, - "grad_norm": 0.7385743260383606, - "learning_rate": 1.739548872180451e-05, - "loss": 0.1088, - "step": 188500 - }, - { - "epoch": 7.232511862850146, - "grad_norm": 0.7600038647651672, - "learning_rate": 1.6643609022556388e-05, - "loss": 0.1101, - "step": 189000 - }, - { - "epoch": 7.232511862850146, + "epoch": 0.30864197530864196, "eval_loss": Infinity, - "eval_runtime": 180.6841, - "eval_samples_per_second": 38.863, - "eval_steps_per_second": 4.859, - "eval_wer": 0.29820710810239054, - "step": 189000 - }, - { - "epoch": 7.251645492116944, - "grad_norm": 0.4615612328052521, - "learning_rate": 1.589172932330827e-05, - "loss": 0.12, - "step": 189500 - }, - { - "epoch": 7.270779121383744, - "grad_norm": 17.22515296936035, - "learning_rate": 1.5139849624060148e-05, - "loss": 0.1187, - "step": 190000 - }, - { - "epoch": 7.270779121383744, - "eval_loss": Infinity, - "eval_runtime": 180.7132, - "eval_samples_per_second": 38.857, - "eval_steps_per_second": 4.859, - "eval_wer": 0.2988285381848953, - "step": 190000 - }, - { - "epoch": 7.289912750650544, - "grad_norm": 0.272981196641922, - "learning_rate": 1.4387969924812028e-05, - "loss": 0.1106, - "step": 190500 - }, - { - "epoch": 7.309046379917342, - "grad_norm": 0.7808548212051392, - "learning_rate": 1.3636090225563907e-05, - "loss": 0.1174, - "step": 191000 - }, - { - "epoch": 7.309046379917342, - "eval_loss": Infinity, - "eval_runtime": 180.5814, - "eval_samples_per_second": 38.886, - "eval_steps_per_second": 4.862, - "eval_wer": 0.2993441929342077, - "step": 191000 - }, - { - "epoch": 7.328180009184142, - "grad_norm": 0.32894080877304077, - "learning_rate": 1.2885714285714284e-05, - "loss": 0.1129, - "step": 191500 + "eval_runtime": 198.0112, + "eval_samples_per_second": 35.463, + "eval_steps_per_second": 4.434, + "eval_wer": 1.0, + "step": 200 }, { - "epoch": 7.347313638450942, - "grad_norm": 0.6160246729850769, - "learning_rate": 1.2133834586466163e-05, - "loss": 0.1132, - "step": 192000 - }, - { - "epoch": 7.347313638450942, + "epoch": 0.6172839506172839, "eval_loss": Infinity, - "eval_runtime": 180.7946, - "eval_samples_per_second": 38.84, - "eval_steps_per_second": 4.856, - "eval_wer": 0.2995689655172414, - "step": 192000 - }, - { - "epoch": 7.36644726771774, - "grad_norm": 0.3549739718437195, - "learning_rate": 1.1381954887218043e-05, - "loss": 0.1214, - "step": 192500 + "eval_runtime": 196.4137, + "eval_samples_per_second": 35.751, + "eval_steps_per_second": 4.47, + "eval_wer": 0.9468401438718138, + "step": 400 }, { - "epoch": 7.38558089698454, - "grad_norm": 0.5132611393928528, - "learning_rate": 1.0630075187969923e-05, - "loss": 0.1108, - "step": 193000 + "epoch": 0.7716049382716049, + "grad_norm": 2.496718645095825, + "learning_rate": 0.0002958, + "loss": 3.8579, + "step": 500 }, { - "epoch": 7.38558089698454, + "epoch": 0.9259259259259259, "eval_loss": Infinity, - "eval_runtime": 181.6179, - "eval_samples_per_second": 38.664, - "eval_steps_per_second": 4.834, - "eval_wer": 0.2995160778506452, - "step": 193000 - }, - { - "epoch": 7.40471452625134, - "grad_norm": 0.1533355563879013, - "learning_rate": 9.878195488721803e-06, - "loss": 0.117, - "step": 193500 - }, - { - "epoch": 7.423848155518138, - "grad_norm": 0.542405903339386, - "learning_rate": 9.126315789473683e-06, - "loss": 0.1119, - "step": 194000 + "eval_runtime": 194.7508, + "eval_samples_per_second": 36.056, + "eval_steps_per_second": 4.508, + "eval_wer": 0.682339345305338, + "step": 600 }, { - "epoch": 7.423848155518138, + "epoch": 1.2345679012345678, "eval_loss": Infinity, - "eval_runtime": 180.7251, - "eval_samples_per_second": 38.855, - "eval_steps_per_second": 4.858, - "eval_wer": 0.2991194203511741, - "step": 194000 - }, - { - "epoch": 7.442981784784938, - "grad_norm": 0.4688265919685364, - "learning_rate": 8.37593984962406e-06, - "loss": 0.1116, - "step": 194500 + "eval_runtime": 195.8415, + "eval_samples_per_second": 35.856, + "eval_steps_per_second": 4.483, + "eval_wer": 0.5246257125420384, + "step": 800 }, { - "epoch": 7.462115414051738, - "grad_norm": 0.7588228583335876, - "learning_rate": 7.624060150375939e-06, - "loss": 0.1098, - "step": 195000 + "epoch": 1.5432098765432098, + "grad_norm": 0.836155354976654, + "learning_rate": 0.0002016, + "loss": 0.7662, + "step": 1000 }, { - "epoch": 7.462115414051738, + "epoch": 1.5432098765432098, "eval_loss": Infinity, - "eval_runtime": 181.1196, - "eval_samples_per_second": 38.77, - "eval_steps_per_second": 4.848, - "eval_wer": 0.29845832451872223, - "step": 195000 - }, - { - "epoch": 7.481249043318536, - "grad_norm": 0.9082927703857422, - "learning_rate": 6.8721804511278185e-06, - "loss": 0.1049, - "step": 195500 - }, - { - "epoch": 7.500382672585336, - "grad_norm": 0.5702168345451355, - "learning_rate": 6.120300751879698e-06, - "loss": 0.1053, - "step": 196000 + "eval_runtime": 194.2625, + "eval_samples_per_second": 36.147, + "eval_steps_per_second": 4.52, + "eval_wer": 0.45603988936932727, + "step": 1000 }, { - "epoch": 7.500382672585336, + "epoch": 1.8518518518518519, "eval_loss": Infinity, - "eval_runtime": 180.5918, - "eval_samples_per_second": 38.883, - "eval_steps_per_second": 4.862, - "eval_wer": 0.29765178760313094, - "step": 196000 - }, - { - "epoch": 7.519516301852136, - "grad_norm": 1.2016927003860474, - "learning_rate": 5.368421052631578e-06, - "loss": 0.1011, - "step": 196500 - }, - { - "epoch": 7.538649931118934, - "grad_norm": 0.6198543906211853, - "learning_rate": 4.616541353383459e-06, - "loss": 0.11, - "step": 197000 + "eval_runtime": 195.3379, + "eval_samples_per_second": 35.948, + "eval_steps_per_second": 4.495, + "eval_wer": 0.4314076844169166, + "step": 1200 }, { - "epoch": 7.538649931118934, + "epoch": 2.1604938271604937, "eval_loss": Infinity, - "eval_runtime": 181.146, - "eval_samples_per_second": 38.764, - "eval_steps_per_second": 4.847, - "eval_wer": 0.2975327903532896, - "step": 197000 - }, - { - "epoch": 7.557783560385734, - "grad_norm": 0.6191059947013855, - "learning_rate": 3.8646616541353386e-06, - "loss": 0.1118, - "step": 197500 + "eval_runtime": 194.6062, + "eval_samples_per_second": 36.083, + "eval_steps_per_second": 4.512, + "eval_wer": 0.43706906626154024, + "step": 1400 }, { - "epoch": 7.576917189652534, - "grad_norm": 0.5622895956039429, - "learning_rate": 3.1127819548872175e-06, - "loss": 0.1091, - "step": 198000 + "epoch": 2.314814814814815, + "grad_norm": 0.4859907329082489, + "learning_rate": 0.00010239999999999998, + "loss": 0.5916, + "step": 1500 }, { - "epoch": 7.576917189652534, + "epoch": 2.4691358024691357, "eval_loss": Infinity, - "eval_runtime": 182.2479, - "eval_samples_per_second": 38.53, - "eval_steps_per_second": 4.818, - "eval_wer": 0.295893272688809, - "step": 198000 + "eval_runtime": 194.4662, + "eval_samples_per_second": 36.109, + "eval_steps_per_second": 4.515, + "eval_wer": 0.38610364483918297, + "step": 1600 }, { - "epoch": 7.596050818919332, - "grad_norm": 1.4227417707443237, - "learning_rate": 2.362406015037594e-06, - "loss": 0.1126, - "step": 198500 - }, - { - "epoch": 7.615184448186132, - "grad_norm": 0.5790704488754272, - "learning_rate": 1.6105263157894734e-06, - "loss": 0.108, - "step": 199000 - }, - { - "epoch": 7.615184448186132, + "epoch": 2.7777777777777777, "eval_loss": Infinity, - "eval_runtime": 182.5001, - "eval_samples_per_second": 38.477, - "eval_steps_per_second": 4.811, - "eval_wer": 0.2963295959382272, - "step": 199000 + "eval_runtime": 194.3135, + "eval_samples_per_second": 36.137, + "eval_steps_per_second": 4.518, + "eval_wer": 0.36491241738407804, + "step": 1800 }, { - "epoch": 7.634318077452932, - "grad_norm": 0.6536182165145874, - "learning_rate": 8.601503759398495e-07, - "loss": 0.113, - "step": 199500 - }, - { - "epoch": 7.65345170671973, - "grad_norm": 0.41451194882392883, - "learning_rate": 1.0827067669172932e-07, - "loss": 0.1077, - "step": 200000 + "epoch": 3.0864197530864197, + "grad_norm": 0.7149534821510315, + "learning_rate": 2.9999999999999997e-06, + "loss": 0.4977, + "step": 2000 }, { - "epoch": 7.65345170671973, + "epoch": 3.0864197530864197, "eval_loss": Infinity, - "eval_runtime": 181.1994, - "eval_samples_per_second": 38.753, - "eval_steps_per_second": 4.845, - "eval_wer": 0.2962370425216839, - "step": 200000 + "eval_runtime": 195.0164, + "eval_samples_per_second": 36.007, + "eval_steps_per_second": 4.502, + "eval_wer": 0.36645761105267943, + "step": 2000 }, { - "epoch": 7.65345170671973, - "step": 200000, - "total_flos": 2.464157327536675e+20, - "train_loss": 0.29584050216674806, - "train_runtime": 112002.2186, - "train_samples_per_second": 14.285, - "train_steps_per_second": 1.786 + "epoch": 3.0864197530864197, + "step": 2000, + "total_flos": 1.6964840215738495e+19, + "train_loss": 1.4283542098999022, + "train_runtime": 6370.4271, + "train_samples_per_second": 20.093, + "train_steps_per_second": 0.314 } ], "logging_steps": 500, - "max_steps": 200000, + "max_steps": 2000, "num_input_tokens_seen": 0, - "num_train_epochs": 8, - "save_steps": 4000, + "num_train_epochs": 4, + "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { @@ -4635,8 +153,8 @@ "attributes": {} } }, - "total_flos": 2.464157327536675e+20, - "train_batch_size": 8, + "total_flos": 1.6964840215738495e+19, + "train_batch_size": 64, "trial_name": null, "trial_params": null }