|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.65345170671973, |
|
"eval_steps": 1000, |
|
"global_step": 200000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019133629266799325, |
|
"grad_norm": 2.1359012126922607, |
|
"learning_rate": 0.0002982, |
|
"loss": 4.6329, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03826725853359865, |
|
"grad_norm": 2.5346407890319824, |
|
"learning_rate": 0.00029925413533834583, |
|
"loss": 1.059, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03826725853359865, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.6488, |
|
"eval_samples_per_second": 39.751, |
|
"eval_steps_per_second": 4.97, |
|
"eval_wer": 0.7912656018616459, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05740088780039798, |
|
"grad_norm": 2.2379095554351807, |
|
"learning_rate": 0.0002985022556390977, |
|
"loss": 0.8241, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0765345170671973, |
|
"grad_norm": 3.605928659439087, |
|
"learning_rate": 0.0002977503759398496, |
|
"loss": 0.7527, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0765345170671973, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 174.864, |
|
"eval_samples_per_second": 40.157, |
|
"eval_steps_per_second": 5.021, |
|
"eval_wer": 0.7347419081870108, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09566814633399663, |
|
"grad_norm": 3.9373598098754883, |
|
"learning_rate": 0.0002969984962406015, |
|
"loss": 0.6967, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.11480177560079596, |
|
"grad_norm": 5.641207218170166, |
|
"learning_rate": 0.00029624661654135335, |
|
"loss": 0.6861, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.11480177560079596, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 174.8654, |
|
"eval_samples_per_second": 40.157, |
|
"eval_steps_per_second": 5.021, |
|
"eval_wer": 0.6766976940977364, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13393540486759528, |
|
"grad_norm": 4.324626922607422, |
|
"learning_rate": 0.0002954947368421052, |
|
"loss": 0.6736, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.1530690341343946, |
|
"grad_norm": 5.716803550720215, |
|
"learning_rate": 0.0002947443609022556, |
|
"loss": 0.651, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1530690341343946, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.6143, |
|
"eval_samples_per_second": 39.985, |
|
"eval_steps_per_second": 5.0, |
|
"eval_wer": 0.646181510471758, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17220266340119394, |
|
"grad_norm": 6.054178714752197, |
|
"learning_rate": 0.000293993984962406, |
|
"loss": 0.6308, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.19133629266799326, |
|
"grad_norm": 3.1400604248046875, |
|
"learning_rate": 0.00029324210526315787, |
|
"loss": 0.6372, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.19133629266799326, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.1091, |
|
"eval_samples_per_second": 39.873, |
|
"eval_steps_per_second": 4.986, |
|
"eval_wer": 0.6245107890839856, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.21046992193479258, |
|
"grad_norm": 3.961786985397339, |
|
"learning_rate": 0.00029249022556390974, |
|
"loss": 0.6101, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.22960355120159193, |
|
"grad_norm": 2.590359687805176, |
|
"learning_rate": 0.0002917383458646616, |
|
"loss": 0.6078, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.22960355120159193, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.6935, |
|
"eval_samples_per_second": 39.741, |
|
"eval_steps_per_second": 4.969, |
|
"eval_wer": 0.5931880685424159, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24873718046839124, |
|
"grad_norm": 3.1479835510253906, |
|
"learning_rate": 0.00029098646616541353, |
|
"loss": 0.601, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.26787080973519056, |
|
"grad_norm": 4.320711135864258, |
|
"learning_rate": 0.0002902345864661654, |
|
"loss": 0.6006, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.26787080973519056, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.7371, |
|
"eval_samples_per_second": 39.731, |
|
"eval_steps_per_second": 4.968, |
|
"eval_wer": 0.6101253437698329, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2870044390019899, |
|
"grad_norm": 3.0556020736694336, |
|
"learning_rate": 0.00028948270676691727, |
|
"loss": 0.5943, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.3061380682687892, |
|
"grad_norm": 1.8571085929870605, |
|
"learning_rate": 0.00028873082706766913, |
|
"loss": 0.6008, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.3061380682687892, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.2913, |
|
"eval_samples_per_second": 39.832, |
|
"eval_steps_per_second": 4.98, |
|
"eval_wer": 0.5832055214723927, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.32527169753558854, |
|
"grad_norm": 2.2670278549194336, |
|
"learning_rate": 0.00028797894736842106, |
|
"loss": 0.5828, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.3444053268023879, |
|
"grad_norm": 3.5902249813079834, |
|
"learning_rate": 0.00028722706766917287, |
|
"loss": 0.592, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3444053268023879, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.2597, |
|
"eval_samples_per_second": 39.614, |
|
"eval_steps_per_second": 4.953, |
|
"eval_wer": 0.5834302940554262, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3635389560691872, |
|
"grad_norm": 1.5114198923110962, |
|
"learning_rate": 0.0002864751879699248, |
|
"loss": 0.5702, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.3826725853359865, |
|
"grad_norm": 1.9661872386932373, |
|
"learning_rate": 0.00028572330827067666, |
|
"loss": 0.5638, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.3826725853359865, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.3135, |
|
"eval_samples_per_second": 39.827, |
|
"eval_steps_per_second": 4.98, |
|
"eval_wer": 0.5573434525068754, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.40180621460278587, |
|
"grad_norm": 3.7164504528045654, |
|
"learning_rate": 0.00028497142857142853, |
|
"loss": 0.5599, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.42093984386958516, |
|
"grad_norm": 2.0946409702301025, |
|
"learning_rate": 0.0002842195488721804, |
|
"loss": 0.5585, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.42093984386958516, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.3363, |
|
"eval_samples_per_second": 39.597, |
|
"eval_steps_per_second": 4.951, |
|
"eval_wer": 0.5664269092447641, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.4400734731363845, |
|
"grad_norm": 3.3412957191467285, |
|
"learning_rate": 0.0002834676691729323, |
|
"loss": 0.5436, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.45920710240318385, |
|
"grad_norm": 3.093538999557495, |
|
"learning_rate": 0.0002827157894736842, |
|
"loss": 0.5569, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.45920710240318385, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.5626, |
|
"eval_samples_per_second": 39.547, |
|
"eval_steps_per_second": 4.945, |
|
"eval_wer": 0.5487624286016501, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.47834073166998314, |
|
"grad_norm": 2.192824125289917, |
|
"learning_rate": 0.0002819654135338346, |
|
"loss": 0.5418, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.4974743609367825, |
|
"grad_norm": 2.0859923362731934, |
|
"learning_rate": 0.00028121353383458645, |
|
"loss": 0.5293, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.4974743609367825, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.8833, |
|
"eval_samples_per_second": 39.475, |
|
"eval_steps_per_second": 4.936, |
|
"eval_wer": 0.5434736619420352, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5166079902035818, |
|
"grad_norm": 3.662593364715576, |
|
"learning_rate": 0.0002804616541353383, |
|
"loss": 0.5316, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.5357416194703811, |
|
"grad_norm": 3.3237922191619873, |
|
"learning_rate": 0.0002797097744360902, |
|
"loss": 0.5388, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5357416194703811, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.3166, |
|
"eval_samples_per_second": 39.379, |
|
"eval_steps_per_second": 4.924, |
|
"eval_wer": 0.5418209223609054, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5548752487371804, |
|
"grad_norm": 1.9418795108795166, |
|
"learning_rate": 0.0002789578947368421, |
|
"loss": 0.5336, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.5740088780039798, |
|
"grad_norm": 2.3504509925842285, |
|
"learning_rate": 0.00027820601503759397, |
|
"loss": 0.5163, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5740088780039798, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.0416, |
|
"eval_samples_per_second": 39.44, |
|
"eval_steps_per_second": 4.931, |
|
"eval_wer": 0.5408292786122276, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5931425072707791, |
|
"grad_norm": 1.8465300798416138, |
|
"learning_rate": 0.00027745413533834584, |
|
"loss": 0.5172, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.6122761365375784, |
|
"grad_norm": 2.249298572540283, |
|
"learning_rate": 0.0002767022556390977, |
|
"loss": 0.5226, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6122761365375784, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.435, |
|
"eval_samples_per_second": 39.353, |
|
"eval_steps_per_second": 4.921, |
|
"eval_wer": 0.5311376137084832, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6314097658043778, |
|
"grad_norm": 1.6280571222305298, |
|
"learning_rate": 0.00027595037593984963, |
|
"loss": 0.5161, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.6505433950711771, |
|
"grad_norm": 4.344738006591797, |
|
"learning_rate": 0.0002751984962406015, |
|
"loss": 0.4952, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6505433950711771, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.8108, |
|
"eval_samples_per_second": 39.271, |
|
"eval_steps_per_second": 4.91, |
|
"eval_wer": 0.5288502221281997, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6696770243379764, |
|
"grad_norm": 2.9461894035339355, |
|
"learning_rate": 0.00027444812030075184, |
|
"loss": 0.5177, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.6888106536047758, |
|
"grad_norm": 3.190275192260742, |
|
"learning_rate": 0.00027369624060150376, |
|
"loss": 0.524, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.6888106536047758, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.3332, |
|
"eval_samples_per_second": 39.376, |
|
"eval_steps_per_second": 4.923, |
|
"eval_wer": 0.5214591707213878, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7079442828715751, |
|
"grad_norm": 3.2808961868286133, |
|
"learning_rate": 0.0002729443609022556, |
|
"loss": 0.4997, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.7270779121383744, |
|
"grad_norm": 2.6754653453826904, |
|
"learning_rate": 0.0002721924812030075, |
|
"loss": 0.5076, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7270779121383744, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.4608, |
|
"eval_samples_per_second": 39.348, |
|
"eval_steps_per_second": 4.92, |
|
"eval_wer": 0.5186957901417389, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7462115414051738, |
|
"grad_norm": 3.872616767883301, |
|
"learning_rate": 0.00027144060150375936, |
|
"loss": 0.4961, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.765345170671973, |
|
"grad_norm": 4.666406631469727, |
|
"learning_rate": 0.00027068872180451123, |
|
"loss": 0.492, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.765345170671973, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.9854, |
|
"eval_samples_per_second": 39.453, |
|
"eval_steps_per_second": 4.933, |
|
"eval_wer": 0.5094933361540089, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.7844787999387723, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.00026993984962406014, |
|
"loss": 0.4846, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.8036124292055717, |
|
"grad_norm": 3.923118829727173, |
|
"learning_rate": 0.000269187969924812, |
|
"loss": 0.4934, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8036124292055717, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.6067, |
|
"eval_samples_per_second": 39.315, |
|
"eval_steps_per_second": 4.916, |
|
"eval_wer": 0.5061481912418024, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.822746058472371, |
|
"grad_norm": 3.902585744857788, |
|
"learning_rate": 0.0002684360902255639, |
|
"loss": 0.5006, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.8418796877391703, |
|
"grad_norm": 6.406003952026367, |
|
"learning_rate": 0.0002676842105263158, |
|
"loss": 0.4985, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8418796877391703, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.9171, |
|
"eval_samples_per_second": 39.468, |
|
"eval_steps_per_second": 4.935, |
|
"eval_wer": 0.5129310344827587, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8610133170059697, |
|
"grad_norm": 3.3762645721435547, |
|
"learning_rate": 0.0002669323308270676, |
|
"loss": 0.5001, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.880146946272769, |
|
"grad_norm": 5.018126010894775, |
|
"learning_rate": 0.00026618045112781954, |
|
"loss": 0.4887, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.880146946272769, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.8724, |
|
"eval_samples_per_second": 39.701, |
|
"eval_steps_per_second": 4.964, |
|
"eval_wer": 0.49284694309287075, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.8992805755395683, |
|
"grad_norm": 2.6644840240478516, |
|
"learning_rate": 0.0002654285714285714, |
|
"loss": 0.4879, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.9184142048063677, |
|
"grad_norm": 1.760593056678772, |
|
"learning_rate": 0.0002646766917293233, |
|
"loss": 0.484, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9184142048063677, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.117, |
|
"eval_samples_per_second": 39.424, |
|
"eval_steps_per_second": 4.929, |
|
"eval_wer": 0.49492278400676964, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.937547834073167, |
|
"grad_norm": 4.163125514984131, |
|
"learning_rate": 0.00026392481203007514, |
|
"loss": 0.4825, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.9566814633399663, |
|
"grad_norm": 5.920992374420166, |
|
"learning_rate": 0.00026317293233082706, |
|
"loss": 0.4741, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.9566814633399663, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.7742, |
|
"eval_samples_per_second": 39.5, |
|
"eval_steps_per_second": 4.939, |
|
"eval_wer": 0.48648720118468375, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.9758150926067657, |
|
"grad_norm": 1.6138001680374146, |
|
"learning_rate": 0.00026242105263157893, |
|
"loss": 0.4792, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.994948721873565, |
|
"grad_norm": 4.549112796783447, |
|
"learning_rate": 0.0002616691729323308, |
|
"loss": 0.4816, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.994948721873565, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.2909, |
|
"eval_samples_per_second": 39.385, |
|
"eval_steps_per_second": 4.925, |
|
"eval_wer": 0.5054606515760525, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.0140823511403643, |
|
"grad_norm": 1.6239593029022217, |
|
"learning_rate": 0.00026091729323308267, |
|
"loss": 0.4551, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.0332159804071637, |
|
"grad_norm": 0.7770557999610901, |
|
"learning_rate": 0.0002601654135338346, |
|
"loss": 0.44, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0332159804071637, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.2997, |
|
"eval_samples_per_second": 39.383, |
|
"eval_steps_per_second": 4.924, |
|
"eval_wer": 0.47974402369367464, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0523496096739628, |
|
"grad_norm": 2.1102957725524902, |
|
"learning_rate": 0.00025941503759398493, |
|
"loss": 0.4367, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.0714832389407623, |
|
"grad_norm": 3.1549980640411377, |
|
"learning_rate": 0.0002586631578947368, |
|
"loss": 0.4359, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.0714832389407623, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.6407, |
|
"eval_samples_per_second": 39.529, |
|
"eval_steps_per_second": 4.943, |
|
"eval_wer": 0.4912074254283901, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.0906168682075617, |
|
"grad_norm": 0.7373610138893127, |
|
"learning_rate": 0.0002579112781954887, |
|
"loss": 0.429, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.1097504974743608, |
|
"grad_norm": 2.5423829555511475, |
|
"learning_rate": 0.0002571593984962406, |
|
"loss": 0.411, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.1097504974743608, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.8711, |
|
"eval_samples_per_second": 39.478, |
|
"eval_steps_per_second": 4.936, |
|
"eval_wer": 0.4773640786968479, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.1288841267411602, |
|
"grad_norm": 1.5159779787063599, |
|
"learning_rate": 0.000256409022556391, |
|
"loss": 0.4162, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.1480177560079596, |
|
"grad_norm": 0.6960669159889221, |
|
"learning_rate": 0.00025565714285714284, |
|
"loss": 0.4298, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1480177560079596, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.6665, |
|
"eval_samples_per_second": 39.747, |
|
"eval_steps_per_second": 4.97, |
|
"eval_wer": 0.4772847471969537, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1671513852747588, |
|
"grad_norm": 0.788869321346283, |
|
"learning_rate": 0.0002549052631578947, |
|
"loss": 0.4405, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.1862850145415582, |
|
"grad_norm": 0.8728181719779968, |
|
"learning_rate": 0.0002541533834586466, |
|
"loss": 0.4305, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.1862850145415582, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.2604, |
|
"eval_samples_per_second": 39.614, |
|
"eval_steps_per_second": 4.953, |
|
"eval_wer": 0.489753014596996, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.2054186438083576, |
|
"grad_norm": 1.0415312051773071, |
|
"learning_rate": 0.00025340300751879697, |
|
"loss": 0.423, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.2245522730751568, |
|
"grad_norm": 1.1658622026443481, |
|
"learning_rate": 0.00025265112781954884, |
|
"loss": 0.4126, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.2245522730751568, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.0921, |
|
"eval_samples_per_second": 39.652, |
|
"eval_steps_per_second": 4.958, |
|
"eval_wer": 0.4738867146181511, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.2436859023419562, |
|
"grad_norm": 1.2818922996520996, |
|
"learning_rate": 0.00025189924812030076, |
|
"loss": 0.428, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.2628195316087556, |
|
"grad_norm": 1.18551504611969, |
|
"learning_rate": 0.00025114736842105263, |
|
"loss": 0.4234, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.2628195316087556, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.167, |
|
"eval_samples_per_second": 39.635, |
|
"eval_steps_per_second": 4.956, |
|
"eval_wer": 0.48441136027078485, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.2819531608755548, |
|
"grad_norm": 1.017686367034912, |
|
"learning_rate": 0.0002503954887218045, |
|
"loss": 0.4334, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.3010867901423542, |
|
"grad_norm": 0.9811512231826782, |
|
"learning_rate": 0.00024964360902255637, |
|
"loss": 0.4252, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.3010867901423542, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 183.3489, |
|
"eval_samples_per_second": 38.299, |
|
"eval_steps_per_second": 4.789, |
|
"eval_wer": 0.47629310344827586, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.3202204194091536, |
|
"grad_norm": 1.2876335382461548, |
|
"learning_rate": 0.0002488917293233083, |
|
"loss": 0.4094, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.3393540486759528, |
|
"grad_norm": 0.9524905681610107, |
|
"learning_rate": 0.0002481413533834586, |
|
"loss": 0.4106, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.3393540486759528, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.3382, |
|
"eval_samples_per_second": 39.375, |
|
"eval_steps_per_second": 4.923, |
|
"eval_wer": 0.47093822720541567, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.3584876779427522, |
|
"grad_norm": 1.4819426536560059, |
|
"learning_rate": 0.0002473894736842105, |
|
"loss": 0.4082, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.3776213072095516, |
|
"grad_norm": 2.091412305831909, |
|
"learning_rate": 0.0002466375939849624, |
|
"loss": 0.4254, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.3776213072095516, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 174.5569, |
|
"eval_samples_per_second": 40.228, |
|
"eval_steps_per_second": 5.03, |
|
"eval_wer": 0.47370160778506454, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.3967549364763507, |
|
"grad_norm": 0.9154180288314819, |
|
"learning_rate": 0.00024588571428571423, |
|
"loss": 0.4101, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.4158885657431501, |
|
"grad_norm": 0.6072717308998108, |
|
"learning_rate": 0.00024513383458646615, |
|
"loss": 0.4245, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.4158885657431501, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.0739, |
|
"eval_samples_per_second": 39.881, |
|
"eval_steps_per_second": 4.987, |
|
"eval_wer": 0.4533530780621959, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.4350221950099495, |
|
"grad_norm": 0.8788403868675232, |
|
"learning_rate": 0.000244381954887218, |
|
"loss": 0.4139, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.4541558242767487, |
|
"grad_norm": 1.6312676668167114, |
|
"learning_rate": 0.00024363007518796991, |
|
"loss": 0.4154, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.4541558242767487, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.6948, |
|
"eval_samples_per_second": 39.967, |
|
"eval_steps_per_second": 4.997, |
|
"eval_wer": 0.4566056695578591, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.4732894535435481, |
|
"grad_norm": 1.2530635595321655, |
|
"learning_rate": 0.00024287819548872178, |
|
"loss": 0.403, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.4924230828103475, |
|
"grad_norm": 0.8397653102874756, |
|
"learning_rate": 0.00024212631578947368, |
|
"loss": 0.4071, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.4924230828103475, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.5585, |
|
"eval_samples_per_second": 39.998, |
|
"eval_steps_per_second": 5.001, |
|
"eval_wer": 0.4634546223820605, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.5115567120771467, |
|
"grad_norm": 1.685242772102356, |
|
"learning_rate": 0.00024137443609022552, |
|
"loss": 0.3912, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.530690341343946, |
|
"grad_norm": 1.5319820642471313, |
|
"learning_rate": 0.00024062255639097744, |
|
"loss": 0.4065, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.530690341343946, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.3036, |
|
"eval_samples_per_second": 39.829, |
|
"eval_steps_per_second": 4.98, |
|
"eval_wer": 0.4668394330442141, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.5498239706107455, |
|
"grad_norm": 7.0581955909729, |
|
"learning_rate": 0.00023987067669172928, |
|
"loss": 0.4006, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.5689575998775447, |
|
"grad_norm": 0.6705722808837891, |
|
"learning_rate": 0.0002391203007518797, |
|
"loss": 0.4086, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.5689575998775447, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.1394, |
|
"eval_samples_per_second": 39.866, |
|
"eval_steps_per_second": 4.985, |
|
"eval_wer": 0.46069124180241167, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.588091229144344, |
|
"grad_norm": 0.794866144657135, |
|
"learning_rate": 0.00023836842105263154, |
|
"loss": 0.4026, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.6072248584111435, |
|
"grad_norm": 0.7202442288398743, |
|
"learning_rate": 0.00023761654135338344, |
|
"loss": 0.4037, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.6072248584111435, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.2975, |
|
"eval_samples_per_second": 40.058, |
|
"eval_steps_per_second": 5.009, |
|
"eval_wer": 0.46155066638459913, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.6263584876779427, |
|
"grad_norm": 1.2002875804901123, |
|
"learning_rate": 0.00023686616541353383, |
|
"loss": 0.4013, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.645492116944742, |
|
"grad_norm": 1.2249990701675415, |
|
"learning_rate": 0.00023611428571428567, |
|
"loss": 0.4071, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.645492116944742, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.5513, |
|
"eval_samples_per_second": 39.549, |
|
"eval_steps_per_second": 4.945, |
|
"eval_wer": 0.46074412946900783, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.6646257462115415, |
|
"grad_norm": 0.7734994292259216, |
|
"learning_rate": 0.00023536240601503756, |
|
"loss": 0.3834, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.6837593754783406, |
|
"grad_norm": 1.9896817207336426, |
|
"learning_rate": 0.00023461052631578943, |
|
"loss": 0.394, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.6837593754783406, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.2037, |
|
"eval_samples_per_second": 39.852, |
|
"eval_steps_per_second": 4.983, |
|
"eval_wer": 0.4430532049925957, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.70289300474514, |
|
"grad_norm": 1.6369848251342773, |
|
"learning_rate": 0.00023385864661654133, |
|
"loss": 0.4006, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.7220266340119394, |
|
"grad_norm": 0.7955853343009949, |
|
"learning_rate": 0.0002331067669172932, |
|
"loss": 0.4103, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.7220266340119394, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.3273, |
|
"eval_samples_per_second": 39.377, |
|
"eval_steps_per_second": 4.924, |
|
"eval_wer": 0.43978739158028346, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.7411602632787386, |
|
"grad_norm": 1.0250253677368164, |
|
"learning_rate": 0.00023235639097744359, |
|
"loss": 0.3876, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.760293892545538, |
|
"grad_norm": 1.1575045585632324, |
|
"learning_rate": 0.00023160451127819545, |
|
"loss": 0.3909, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.760293892545538, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.8973, |
|
"eval_samples_per_second": 39.472, |
|
"eval_steps_per_second": 4.935, |
|
"eval_wer": 0.4454728157393696, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.7794275218123374, |
|
"grad_norm": 0.6429355144500732, |
|
"learning_rate": 0.00023085263157894735, |
|
"loss": 0.3875, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.7985611510791366, |
|
"grad_norm": 0.8371389508247375, |
|
"learning_rate": 0.00023010075187969922, |
|
"loss": 0.3909, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.7985611510791366, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.4648, |
|
"eval_samples_per_second": 39.347, |
|
"eval_steps_per_second": 4.92, |
|
"eval_wer": 0.442074783160567, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.817694780345936, |
|
"grad_norm": 1.2913625240325928, |
|
"learning_rate": 0.0002293488721804511, |
|
"loss": 0.3979, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.8368284096127354, |
|
"grad_norm": 1.1963073015213013, |
|
"learning_rate": 0.00022859699248120298, |
|
"loss": 0.3982, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.8368284096127354, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.3863, |
|
"eval_samples_per_second": 39.364, |
|
"eval_steps_per_second": 4.922, |
|
"eval_wer": 0.4370636767505818, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.8559620388795346, |
|
"grad_norm": 1.5472427606582642, |
|
"learning_rate": 0.00022784661654135337, |
|
"loss": 0.3905, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.875095668146334, |
|
"grad_norm": 0.7495508790016174, |
|
"learning_rate": 0.00022709473684210524, |
|
"loss": 0.3896, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.875095668146334, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.5084, |
|
"eval_samples_per_second": 39.559, |
|
"eval_steps_per_second": 4.946, |
|
"eval_wer": 0.4417706790776391, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.8942292974131334, |
|
"grad_norm": 1.171025037765503, |
|
"learning_rate": 0.00022634285714285713, |
|
"loss": 0.3883, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.9133629266799326, |
|
"grad_norm": 1.1951338052749634, |
|
"learning_rate": 0.000225590977443609, |
|
"loss": 0.3986, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.9133629266799326, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.1273, |
|
"eval_samples_per_second": 39.421, |
|
"eval_steps_per_second": 4.929, |
|
"eval_wer": 0.4381610958324519, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.932496555946732, |
|
"grad_norm": 1.0499491691589355, |
|
"learning_rate": 0.0002248390977443609, |
|
"loss": 0.3842, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.9516301852135314, |
|
"grad_norm": 0.6040648221969604, |
|
"learning_rate": 0.00022408721804511277, |
|
"loss": 0.3968, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.9516301852135314, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.1104, |
|
"eval_samples_per_second": 39.425, |
|
"eval_steps_per_second": 4.93, |
|
"eval_wer": 0.42753067484662577, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.9707638144803306, |
|
"grad_norm": 0.7306642532348633, |
|
"learning_rate": 0.00022333533834586466, |
|
"loss": 0.3795, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.98989744374713, |
|
"grad_norm": 0.781111478805542, |
|
"learning_rate": 0.00022258345864661653, |
|
"loss": 0.4025, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.98989744374713, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.6685, |
|
"eval_samples_per_second": 39.302, |
|
"eval_steps_per_second": 4.914, |
|
"eval_wer": 0.4204040617727946, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.0090310730139294, |
|
"grad_norm": 0.8605564832687378, |
|
"learning_rate": 0.00022183157894736842, |
|
"loss": 0.3645, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.0281647022807285, |
|
"grad_norm": 0.8945469260215759, |
|
"learning_rate": 0.0002210796992481203, |
|
"loss": 0.3404, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.0281647022807285, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.4963, |
|
"eval_samples_per_second": 39.34, |
|
"eval_steps_per_second": 4.919, |
|
"eval_wer": 0.42721334884704887, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.0472983315475277, |
|
"grad_norm": 0.7925958633422852, |
|
"learning_rate": 0.0002203278195488722, |
|
"loss": 0.3444, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.0664319608143273, |
|
"grad_norm": 1.2213307619094849, |
|
"learning_rate": 0.00021957744360902255, |
|
"loss": 0.3354, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.0664319608143273, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.2885, |
|
"eval_samples_per_second": 39.608, |
|
"eval_steps_per_second": 4.952, |
|
"eval_wer": 0.4297519568436641, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.0855655900811265, |
|
"grad_norm": 1.751560091972351, |
|
"learning_rate": 0.00021882706766917292, |
|
"loss": 0.3468, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.1046992193479257, |
|
"grad_norm": 0.7927623391151428, |
|
"learning_rate": 0.0002180766917293233, |
|
"loss": 0.3352, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.1046992193479257, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.1438, |
|
"eval_samples_per_second": 38.98, |
|
"eval_steps_per_second": 4.874, |
|
"eval_wer": 0.42244023693674637, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.1238328486147253, |
|
"grad_norm": 1.0091408491134644, |
|
"learning_rate": 0.00021732481203007517, |
|
"loss": 0.3579, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.1429664778815245, |
|
"grad_norm": 1.7537100315093994, |
|
"learning_rate": 0.00021657293233082707, |
|
"loss": 0.3384, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.1429664778815245, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.0104, |
|
"eval_samples_per_second": 39.227, |
|
"eval_steps_per_second": 4.905, |
|
"eval_wer": 0.4267241379310345, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.1621001071483237, |
|
"grad_norm": 1.3135699033737183, |
|
"learning_rate": 0.00021582105263157894, |
|
"loss": 0.3408, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.1812337364151233, |
|
"grad_norm": 0.9029154181480408, |
|
"learning_rate": 0.00021506917293233083, |
|
"loss": 0.3342, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.1812337364151233, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.7816, |
|
"eval_samples_per_second": 39.277, |
|
"eval_steps_per_second": 4.911, |
|
"eval_wer": 0.4187248783583668, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.2003673656819225, |
|
"grad_norm": 0.6189078688621521, |
|
"learning_rate": 0.0002143172932330827, |
|
"loss": 0.339, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.2195009949487217, |
|
"grad_norm": 1.0600789785385132, |
|
"learning_rate": 0.0002135654135338346, |
|
"loss": 0.3425, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.2195009949487217, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.4307, |
|
"eval_samples_per_second": 39.354, |
|
"eval_steps_per_second": 4.921, |
|
"eval_wer": 0.4198884070234821, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.2386346242155213, |
|
"grad_norm": 1.0213968753814697, |
|
"learning_rate": 0.00021281503759398496, |
|
"loss": 0.3439, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.2577682534823205, |
|
"grad_norm": 1.2084991931915283, |
|
"learning_rate": 0.0002120631578947368, |
|
"loss": 0.3417, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.2577682534823205, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.6471, |
|
"eval_samples_per_second": 39.528, |
|
"eval_steps_per_second": 4.942, |
|
"eval_wer": 0.4174423524434102, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.2769018827491196, |
|
"grad_norm": 1.1838810443878174, |
|
"learning_rate": 0.0002113112781954887, |
|
"loss": 0.352, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.2960355120159193, |
|
"grad_norm": 1.033828854560852, |
|
"learning_rate": 0.00021055939849624056, |
|
"loss": 0.3355, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.2960355120159193, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.3447, |
|
"eval_samples_per_second": 39.373, |
|
"eval_steps_per_second": 4.923, |
|
"eval_wer": 0.4157896128622805, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.3151691412827184, |
|
"grad_norm": 1.062028408050537, |
|
"learning_rate": 0.00020980751879699246, |
|
"loss": 0.3397, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.3343027705495176, |
|
"grad_norm": 0.8648023009300232, |
|
"learning_rate": 0.00020905563909774433, |
|
"loss": 0.3501, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.3343027705495176, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.0407, |
|
"eval_samples_per_second": 39.44, |
|
"eval_steps_per_second": 4.931, |
|
"eval_wer": 0.41284112544954515, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.3534363998163172, |
|
"grad_norm": 0.7826744318008423, |
|
"learning_rate": 0.00020830375939849622, |
|
"loss": 0.3276, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.3725700290831164, |
|
"grad_norm": 0.8527004718780518, |
|
"learning_rate": 0.0002075518796992481, |
|
"loss": 0.3358, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.3725700290831164, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.3539, |
|
"eval_samples_per_second": 39.371, |
|
"eval_steps_per_second": 4.923, |
|
"eval_wer": 0.4115585995345885, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.3917036583499156, |
|
"grad_norm": 1.9309333562850952, |
|
"learning_rate": 0.00020679999999999999, |
|
"loss": 0.3294, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.4108372876167152, |
|
"grad_norm": 1.1275138854980469, |
|
"learning_rate": 0.00020604812030075185, |
|
"loss": 0.3343, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.4108372876167152, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.4409, |
|
"eval_samples_per_second": 39.352, |
|
"eval_steps_per_second": 4.92, |
|
"eval_wer": 0.4163978210281362, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.4299709168835144, |
|
"grad_norm": 3.78141450881958, |
|
"learning_rate": 0.00020529624060150375, |
|
"loss": 0.3269, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.4491045461503136, |
|
"grad_norm": 1.6814829111099243, |
|
"learning_rate": 0.00020454436090225562, |
|
"loss": 0.3343, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.4491045461503136, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.6927, |
|
"eval_samples_per_second": 39.518, |
|
"eval_steps_per_second": 4.941, |
|
"eval_wer": 0.41787867569282844, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.468238175417113, |
|
"grad_norm": 1.0685131549835205, |
|
"learning_rate": 0.000203793984962406, |
|
"loss": 0.3346, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.4873718046839124, |
|
"grad_norm": 0.6016332507133484, |
|
"learning_rate": 0.00020304210526315788, |
|
"loss": 0.3367, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.4873718046839124, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.6952, |
|
"eval_samples_per_second": 39.296, |
|
"eval_steps_per_second": 4.913, |
|
"eval_wer": 0.411730484451026, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.5065054339507116, |
|
"grad_norm": 0.9282209277153015, |
|
"learning_rate": 0.00020229022556390977, |
|
"loss": 0.3307, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.525639063217511, |
|
"grad_norm": 0.604811429977417, |
|
"learning_rate": 0.00020153984962406014, |
|
"loss": 0.3237, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.525639063217511, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.9582, |
|
"eval_samples_per_second": 39.459, |
|
"eval_steps_per_second": 4.934, |
|
"eval_wer": 0.40677226570763697, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.5447726924843104, |
|
"grad_norm": 1.4802906513214111, |
|
"learning_rate": 0.000200787969924812, |
|
"loss": 0.3243, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.5639063217511096, |
|
"grad_norm": 1.0026686191558838, |
|
"learning_rate": 0.0002000360902255639, |
|
"loss": 0.3335, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.5639063217511096, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.1808, |
|
"eval_samples_per_second": 39.409, |
|
"eval_steps_per_second": 4.928, |
|
"eval_wer": 0.40802834778929553, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.583039951017909, |
|
"grad_norm": 0.7298141717910767, |
|
"learning_rate": 0.00019928421052631577, |
|
"loss": 0.3326, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.6021735802847084, |
|
"grad_norm": 0.5921869874000549, |
|
"learning_rate": 0.00019853383458646616, |
|
"loss": 0.3254, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.6021735802847084, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.3757, |
|
"eval_samples_per_second": 39.588, |
|
"eval_steps_per_second": 4.95, |
|
"eval_wer": 0.3981780198857626, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.6213072095515075, |
|
"grad_norm": 0.7208895683288574, |
|
"learning_rate": 0.00019778195488721803, |
|
"loss": 0.3235, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.640440838818307, |
|
"grad_norm": 1.1068922281265259, |
|
"learning_rate": 0.00019703007518796992, |
|
"loss": 0.3295, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.640440838818307, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.2384, |
|
"eval_samples_per_second": 39.844, |
|
"eval_steps_per_second": 4.982, |
|
"eval_wer": 0.41358155278189124, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 0.8548173308372498, |
|
"learning_rate": 0.0001962781954887218, |
|
"loss": 0.3256, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.6787080973519055, |
|
"grad_norm": 8.747632026672363, |
|
"learning_rate": 0.00019552631578947368, |
|
"loss": 0.3326, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.6787080973519055, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.118, |
|
"eval_samples_per_second": 39.646, |
|
"eval_steps_per_second": 4.957, |
|
"eval_wer": 0.4045113179606516, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.697841726618705, |
|
"grad_norm": 1.7842276096343994, |
|
"learning_rate": 0.00019477443609022555, |
|
"loss": 0.3192, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.7169753558855043, |
|
"grad_norm": 2.2976646423339844, |
|
"learning_rate": 0.00019402255639097745, |
|
"loss": 0.3167, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.7169753558855043, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.2794, |
|
"eval_samples_per_second": 39.834, |
|
"eval_steps_per_second": 4.981, |
|
"eval_wer": 0.4043658768775122, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.7361089851523035, |
|
"grad_norm": 1.3297739028930664, |
|
"learning_rate": 0.0001932706766917293, |
|
"loss": 0.3179, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.755242614419103, |
|
"grad_norm": 1.6274834871292114, |
|
"learning_rate": 0.0001925203007518797, |
|
"loss": 0.3376, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.755242614419103, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.1612, |
|
"eval_samples_per_second": 40.089, |
|
"eval_steps_per_second": 5.013, |
|
"eval_wer": 0.3942114448910514, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.7743762436859023, |
|
"grad_norm": 0.5883073210716248, |
|
"learning_rate": 0.00019176842105263155, |
|
"loss": 0.3109, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.7935098729527015, |
|
"grad_norm": 0.9327465891838074, |
|
"learning_rate": 0.00019101804511278194, |
|
"loss": 0.3245, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.7935098729527015, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.6566, |
|
"eval_samples_per_second": 39.976, |
|
"eval_steps_per_second": 4.998, |
|
"eval_wer": 0.3957848529722869, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.812643502219501, |
|
"grad_norm": 0.9724407196044922, |
|
"learning_rate": 0.00019026616541353383, |
|
"loss": 0.3128, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.8317771314863003, |
|
"grad_norm": 0.7509967684745789, |
|
"learning_rate": 0.00018951428571428567, |
|
"loss": 0.315, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.8317771314863003, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.6684, |
|
"eval_samples_per_second": 39.973, |
|
"eval_steps_per_second": 4.998, |
|
"eval_wer": 0.40650782737465624, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.8509107607530995, |
|
"grad_norm": 0.7796798944473267, |
|
"learning_rate": 0.00018876240601503757, |
|
"loss": 0.3236, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.870044390019899, |
|
"grad_norm": 1.8985257148742676, |
|
"learning_rate": 0.00018801052631578944, |
|
"loss": 0.327, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.870044390019899, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.0416, |
|
"eval_samples_per_second": 39.888, |
|
"eval_steps_per_second": 4.987, |
|
"eval_wer": 0.40103395388195473, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.8891780192866983, |
|
"grad_norm": 0.909794807434082, |
|
"learning_rate": 0.00018725864661654133, |
|
"loss": 0.3235, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.9083116485534974, |
|
"grad_norm": 1.1613683700561523, |
|
"learning_rate": 0.0001865082706766917, |
|
"loss": 0.3211, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.9083116485534974, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.4137, |
|
"eval_samples_per_second": 39.804, |
|
"eval_steps_per_second": 4.977, |
|
"eval_wer": 0.39259837105986883, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.927445277820297, |
|
"grad_norm": 0.77381831407547, |
|
"learning_rate": 0.0001857563909774436, |
|
"loss": 0.3183, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.9465789070870962, |
|
"grad_norm": 7.940882205963135, |
|
"learning_rate": 0.00018500451127819546, |
|
"loss": 0.323, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.9465789070870962, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.2912, |
|
"eval_samples_per_second": 39.385, |
|
"eval_steps_per_second": 4.925, |
|
"eval_wer": 0.4005315210492913, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.9657125363538954, |
|
"grad_norm": 0.49370139837265015, |
|
"learning_rate": 0.00018425263157894735, |
|
"loss": 0.3125, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.984846165620695, |
|
"grad_norm": 1.297203540802002, |
|
"learning_rate": 0.00018350075187969922, |
|
"loss": 0.323, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.984846165620695, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.4201, |
|
"eval_samples_per_second": 39.578, |
|
"eval_steps_per_second": 4.949, |
|
"eval_wer": 0.38635762640152316, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.0039797948874942, |
|
"grad_norm": 0.7693071365356445, |
|
"learning_rate": 0.0001827503759398496, |
|
"loss": 0.3041, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 3.0231134241542934, |
|
"grad_norm": 0.9858660697937012, |
|
"learning_rate": 0.00018199849624060148, |
|
"loss": 0.2747, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.0231134241542934, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.3853, |
|
"eval_samples_per_second": 39.145, |
|
"eval_steps_per_second": 4.894, |
|
"eval_wer": 0.39882589380156547, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.042247053421093, |
|
"grad_norm": 0.7790058851242065, |
|
"learning_rate": 0.00018124661654135335, |
|
"loss": 0.2799, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 3.061380682687892, |
|
"grad_norm": 0.9347246885299683, |
|
"learning_rate": 0.00018049473684210525, |
|
"loss": 0.2706, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.061380682687892, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.71, |
|
"eval_samples_per_second": 39.737, |
|
"eval_steps_per_second": 4.969, |
|
"eval_wer": 0.3860799661518934, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.0805143119546914, |
|
"grad_norm": 0.478522926568985, |
|
"learning_rate": 0.0001797428571428571, |
|
"loss": 0.2807, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 3.099647941221491, |
|
"grad_norm": 0.6804964542388916, |
|
"learning_rate": 0.000178990977443609, |
|
"loss": 0.2696, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.099647941221491, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.3621, |
|
"eval_samples_per_second": 39.591, |
|
"eval_steps_per_second": 4.95, |
|
"eval_wer": 0.38777237148297017, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.11878157048829, |
|
"grad_norm": 0.6777291893959045, |
|
"learning_rate": 0.00017823909774436088, |
|
"loss": 0.2783, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 3.1379151997550894, |
|
"grad_norm": 0.9108553528785706, |
|
"learning_rate": 0.00017748721804511277, |
|
"loss": 0.2792, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.1379151997550894, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.9123, |
|
"eval_samples_per_second": 39.469, |
|
"eval_steps_per_second": 4.935, |
|
"eval_wer": 0.39447588322403215, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.157048829021889, |
|
"grad_norm": 0.7335214018821716, |
|
"learning_rate": 0.00017673533834586464, |
|
"loss": 0.2818, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 3.176182458288688, |
|
"grad_norm": 1.0339977741241455, |
|
"learning_rate": 0.00017598496240601503, |
|
"loss": 0.2809, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.176182458288688, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.3969, |
|
"eval_samples_per_second": 39.584, |
|
"eval_steps_per_second": 4.949, |
|
"eval_wer": 0.39493865030674846, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.1953160875554873, |
|
"grad_norm": 1.3720539808273315, |
|
"learning_rate": 0.0001752330827067669, |
|
"loss": 0.2767, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 3.214449716822287, |
|
"grad_norm": 1.6575071811676025, |
|
"learning_rate": 0.0001744827067669173, |
|
"loss": 0.2709, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.214449716822287, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.9587, |
|
"eval_samples_per_second": 39.459, |
|
"eval_steps_per_second": 4.934, |
|
"eval_wer": 0.3852205415697059, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.233583346089086, |
|
"grad_norm": 0.9274744987487793, |
|
"learning_rate": 0.00017373082706766916, |
|
"loss": 0.2692, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 3.2527169753558853, |
|
"grad_norm": 0.6898565292358398, |
|
"learning_rate": 0.00017297894736842105, |
|
"loss": 0.2808, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.2527169753558853, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.9702, |
|
"eval_samples_per_second": 39.456, |
|
"eval_steps_per_second": 4.933, |
|
"eval_wer": 0.3912629574783161, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.271850604622685, |
|
"grad_norm": 0.44578102231025696, |
|
"learning_rate": 0.00017222706766917292, |
|
"loss": 0.2747, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 3.290984233889484, |
|
"grad_norm": 0.3729807138442993, |
|
"learning_rate": 0.00017147518796992482, |
|
"loss": 0.2746, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.290984233889484, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.7245, |
|
"eval_samples_per_second": 39.511, |
|
"eval_steps_per_second": 4.94, |
|
"eval_wer": 0.38564364290247516, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.3101178631562833, |
|
"grad_norm": 0.7046172618865967, |
|
"learning_rate": 0.00017072481203007518, |
|
"loss": 0.2722, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 3.329251492423083, |
|
"grad_norm": 1.7671455144882202, |
|
"learning_rate": 0.00016997293233082705, |
|
"loss": 0.2633, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.329251492423083, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.1063, |
|
"eval_samples_per_second": 39.426, |
|
"eval_steps_per_second": 4.93, |
|
"eval_wer": 0.38845991114872014, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.348385121689882, |
|
"grad_norm": 0.8641050457954407, |
|
"learning_rate": 0.00016922105263157894, |
|
"loss": 0.2694, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 3.3675187509566813, |
|
"grad_norm": 0.6219012141227722, |
|
"learning_rate": 0.0001684706766917293, |
|
"loss": 0.2745, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.3675187509566813, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.0455, |
|
"eval_samples_per_second": 39.439, |
|
"eval_steps_per_second": 4.931, |
|
"eval_wer": 0.38491643748677806, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.386652380223481, |
|
"grad_norm": 0.6935294270515442, |
|
"learning_rate": 0.0001677187969924812, |
|
"loss": 0.277, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 3.40578600949028, |
|
"grad_norm": 0.6084161400794983, |
|
"learning_rate": 0.00016696691729323307, |
|
"loss": 0.2832, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.40578600949028, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.6527, |
|
"eval_samples_per_second": 39.305, |
|
"eval_steps_per_second": 4.915, |
|
"eval_wer": 0.3820869473238841, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.4249196387570793, |
|
"grad_norm": 1.1567957401275635, |
|
"learning_rate": 0.00016621503759398497, |
|
"loss": 0.2898, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 3.444053268023879, |
|
"grad_norm": 0.7702553868293762, |
|
"learning_rate": 0.0001654631578947368, |
|
"loss": 0.2806, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.444053268023879, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.9228, |
|
"eval_samples_per_second": 39.467, |
|
"eval_steps_per_second": 4.935, |
|
"eval_wer": 0.38568330865242223, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.463186897290678, |
|
"grad_norm": 0.6563850045204163, |
|
"learning_rate": 0.00016471127819548873, |
|
"loss": 0.2658, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 3.4823205265574773, |
|
"grad_norm": 0.33683669567108154, |
|
"learning_rate": 0.00016395939849624057, |
|
"loss": 0.2756, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.4823205265574773, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.2575, |
|
"eval_samples_per_second": 39.392, |
|
"eval_steps_per_second": 4.925, |
|
"eval_wer": 0.381015972075312, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.501454155824277, |
|
"grad_norm": 0.5872700214385986, |
|
"learning_rate": 0.00016320751879699246, |
|
"loss": 0.2696, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 3.520587785091076, |
|
"grad_norm": 7.343397617340088, |
|
"learning_rate": 0.00016245563909774433, |
|
"loss": 0.2733, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.520587785091076, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.508, |
|
"eval_samples_per_second": 39.337, |
|
"eval_steps_per_second": 4.919, |
|
"eval_wer": 0.37381002750158665, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.5397214143578752, |
|
"grad_norm": 0.6450570821762085, |
|
"learning_rate": 0.00016170526315789472, |
|
"loss": 0.2721, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 3.558855043624675, |
|
"grad_norm": 0.5071462988853455, |
|
"learning_rate": 0.0001609533834586466, |
|
"loss": 0.2807, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.558855043624675, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.146, |
|
"eval_samples_per_second": 39.64, |
|
"eval_steps_per_second": 4.956, |
|
"eval_wer": 0.3857097524857203, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.577988672891474, |
|
"grad_norm": 0.909946084022522, |
|
"learning_rate": 0.0001602015037593985, |
|
"loss": 0.2676, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 3.597122302158273, |
|
"grad_norm": 0.7381096482276917, |
|
"learning_rate": 0.00015944962406015036, |
|
"loss": 0.2773, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.597122302158273, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.6056, |
|
"eval_samples_per_second": 39.316, |
|
"eval_steps_per_second": 4.916, |
|
"eval_wer": 0.37201184683731753, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.616255931425073, |
|
"grad_norm": 0.5159269571304321, |
|
"learning_rate": 0.00015869774436090225, |
|
"loss": 0.2801, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 3.635389560691872, |
|
"grad_norm": 1.289354920387268, |
|
"learning_rate": 0.00015794586466165412, |
|
"loss": 0.2725, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.635389560691872, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.0499, |
|
"eval_samples_per_second": 39.218, |
|
"eval_steps_per_second": 4.904, |
|
"eval_wer": 0.36897080600803894, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.654523189958671, |
|
"grad_norm": 0.7305335998535156, |
|
"learning_rate": 0.000157193984962406, |
|
"loss": 0.2618, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 3.673656819225471, |
|
"grad_norm": 0.5354152917861938, |
|
"learning_rate": 0.00015644210526315788, |
|
"loss": 0.2614, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.673656819225471, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.0067, |
|
"eval_samples_per_second": 39.228, |
|
"eval_steps_per_second": 4.905, |
|
"eval_wer": 0.375343769832875, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.69279044849227, |
|
"grad_norm": 0.4864795506000519, |
|
"learning_rate": 0.00015569022556390978, |
|
"loss": 0.261, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 3.711924077759069, |
|
"grad_norm": 0.6722401976585388, |
|
"learning_rate": 0.00015493834586466164, |
|
"loss": 0.2674, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.711924077759069, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.9604, |
|
"eval_samples_per_second": 39.458, |
|
"eval_steps_per_second": 4.934, |
|
"eval_wer": 0.38257615823989843, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.731057707025869, |
|
"grad_norm": 0.6855655312538147, |
|
"learning_rate": 0.00015418646616541354, |
|
"loss": 0.2713, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 3.750191336292668, |
|
"grad_norm": 1.3021297454833984, |
|
"learning_rate": 0.0001534345864661654, |
|
"loss": 0.2605, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.750191336292668, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.5188, |
|
"eval_samples_per_second": 39.556, |
|
"eval_steps_per_second": 4.946, |
|
"eval_wer": 0.3733075946689232, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.769324965559467, |
|
"grad_norm": 0.70773845911026, |
|
"learning_rate": 0.0001526827067669173, |
|
"loss": 0.2601, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 3.788458594826267, |
|
"grad_norm": 0.58240807056427, |
|
"learning_rate": 0.00015193082706766917, |
|
"loss": 0.2649, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.788458594826267, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.98, |
|
"eval_samples_per_second": 39.677, |
|
"eval_steps_per_second": 4.961, |
|
"eval_wer": 0.3690633594245822, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.807592224093066, |
|
"grad_norm": 0.624595582485199, |
|
"learning_rate": 0.00015118045112781953, |
|
"loss": 0.2678, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 3.826725853359865, |
|
"grad_norm": 1.283463954925537, |
|
"learning_rate": 0.00015042857142857143, |
|
"loss": 0.2638, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 3.826725853359865, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.8803, |
|
"eval_samples_per_second": 39.699, |
|
"eval_steps_per_second": 4.964, |
|
"eval_wer": 0.37530410408292786, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 3.8458594826266648, |
|
"grad_norm": 0.7322863936424255, |
|
"learning_rate": 0.0001496766917293233, |
|
"loss": 0.2743, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 3.864993111893464, |
|
"grad_norm": 0.49134284257888794, |
|
"learning_rate": 0.0001489248120300752, |
|
"loss": 0.2749, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 3.864993111893464, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.6568, |
|
"eval_samples_per_second": 39.526, |
|
"eval_steps_per_second": 4.942, |
|
"eval_wer": 0.3675163951766448, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 3.884126741160263, |
|
"grad_norm": 1.705079436302185, |
|
"learning_rate": 0.00014817293233082706, |
|
"loss": 0.2576, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 3.9032603704270628, |
|
"grad_norm": 1.390942931175232, |
|
"learning_rate": 0.00014742105263157893, |
|
"loss": 0.2635, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 3.9032603704270628, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.7395, |
|
"eval_samples_per_second": 39.507, |
|
"eval_steps_per_second": 4.94, |
|
"eval_wer": 0.3666701925111064, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 3.922393999693862, |
|
"grad_norm": 0.5910842418670654, |
|
"learning_rate": 0.00014666917293233082, |
|
"loss": 0.2654, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 3.941527628960661, |
|
"grad_norm": 1.1575956344604492, |
|
"learning_rate": 0.0001459172932330827, |
|
"loss": 0.2639, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 3.941527628960661, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.3808, |
|
"eval_samples_per_second": 39.587, |
|
"eval_steps_per_second": 4.95, |
|
"eval_wer": 0.36727840067696216, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 3.9606612582274607, |
|
"grad_norm": 9.209879875183105, |
|
"learning_rate": 0.00014516691729323306, |
|
"loss": 0.2671, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 3.97979488749426, |
|
"grad_norm": 0.8380705714225769, |
|
"learning_rate": 0.00014441654135338345, |
|
"loss": 0.2602, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 3.97979488749426, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.3769, |
|
"eval_samples_per_second": 39.588, |
|
"eval_steps_per_second": 4.95, |
|
"eval_wer": 0.36286228051618363, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 3.998928516761059, |
|
"grad_norm": 0.6727402210235596, |
|
"learning_rate": 0.00014366616541353384, |
|
"loss": 0.2579, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 4.018062146027859, |
|
"grad_norm": 0.38106000423431396, |
|
"learning_rate": 0.0001429142857142857, |
|
"loss": 0.2217, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.018062146027859, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.3027, |
|
"eval_samples_per_second": 39.382, |
|
"eval_steps_per_second": 4.924, |
|
"eval_wer": 0.3644621324307172, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.0371957752946575, |
|
"grad_norm": 3.7064096927642822, |
|
"learning_rate": 0.0001421624060150376, |
|
"loss": 0.2202, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 4.056329404561457, |
|
"grad_norm": 0.49550744891166687, |
|
"learning_rate": 0.00014141203007518797, |
|
"loss": 0.2226, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 4.056329404561457, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.9052, |
|
"eval_samples_per_second": 39.47, |
|
"eval_steps_per_second": 4.935, |
|
"eval_wer": 0.3568859741908187, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 4.075463033828257, |
|
"grad_norm": 0.628818690776825, |
|
"learning_rate": 0.00014066015037593983, |
|
"loss": 0.2226, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 4.094596663095055, |
|
"grad_norm": 0.48665696382522583, |
|
"learning_rate": 0.0001399082706766917, |
|
"loss": 0.2209, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 4.094596663095055, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.9704, |
|
"eval_samples_per_second": 39.456, |
|
"eval_steps_per_second": 4.933, |
|
"eval_wer": 0.35495557436005926, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 4.113730292361855, |
|
"grad_norm": 1.1963294744491577, |
|
"learning_rate": 0.0001391563909774436, |
|
"loss": 0.2197, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 4.132863921628655, |
|
"grad_norm": 0.4918075203895569, |
|
"learning_rate": 0.00013840451127819547, |
|
"loss": 0.2326, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 4.132863921628655, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.4603, |
|
"eval_samples_per_second": 39.569, |
|
"eval_steps_per_second": 4.948, |
|
"eval_wer": 0.3595303575206262, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 4.151997550895453, |
|
"grad_norm": 0.4551312029361725, |
|
"learning_rate": 0.00013765263157894736, |
|
"loss": 0.2176, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 4.171131180162253, |
|
"grad_norm": 0.5786845088005066, |
|
"learning_rate": 0.00013690075187969923, |
|
"loss": 0.2203, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 4.171131180162253, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.5842, |
|
"eval_samples_per_second": 39.32, |
|
"eval_steps_per_second": 4.916, |
|
"eval_wer": 0.3556166701925111, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 4.190264809429053, |
|
"grad_norm": 0.9616146087646484, |
|
"learning_rate": 0.00013614887218045112, |
|
"loss": 0.2292, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 4.209398438695851, |
|
"grad_norm": 0.5730082392692566, |
|
"learning_rate": 0.000135396992481203, |
|
"loss": 0.2267, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 4.209398438695851, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.0437, |
|
"eval_samples_per_second": 39.219, |
|
"eval_steps_per_second": 4.904, |
|
"eval_wer": 0.35085678019885763, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 4.228532067962651, |
|
"grad_norm": 1.2517842054367065, |
|
"learning_rate": 0.00013464661654135338, |
|
"loss": 0.2262, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 4.247665697229451, |
|
"grad_norm": 0.894205629825592, |
|
"learning_rate": 0.00013389473684210525, |
|
"loss": 0.223, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 4.247665697229451, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.4171, |
|
"eval_samples_per_second": 39.357, |
|
"eval_steps_per_second": 4.921, |
|
"eval_wer": 0.3580891686058811, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 4.266799326496249, |
|
"grad_norm": 0.6541041731834412, |
|
"learning_rate": 0.00013314285714285715, |
|
"loss": 0.2256, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 4.285932955763049, |
|
"grad_norm": 1.8858749866485596, |
|
"learning_rate": 0.0001323924812030075, |
|
"loss": 0.2273, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 4.285932955763049, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.7774, |
|
"eval_samples_per_second": 39.278, |
|
"eval_steps_per_second": 4.911, |
|
"eval_wer": 0.35478368944362176, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 4.305066585029849, |
|
"grad_norm": 0.36083123087882996, |
|
"learning_rate": 0.00013164060150375938, |
|
"loss": 0.2186, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 4.324200214296647, |
|
"grad_norm": 0.8509350419044495, |
|
"learning_rate": 0.00013088872180451125, |
|
"loss": 0.2278, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 4.324200214296647, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.5764, |
|
"eval_samples_per_second": 39.103, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.34927015020097313, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 4.343333843563447, |
|
"grad_norm": 2.493048667907715, |
|
"learning_rate": 0.00013013684210526314, |
|
"loss": 0.2283, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 4.362467472830247, |
|
"grad_norm": 0.5552091598510742, |
|
"learning_rate": 0.000129384962406015, |
|
"loss": 0.2372, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 4.362467472830247, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.2629, |
|
"eval_samples_per_second": 39.172, |
|
"eval_steps_per_second": 4.898, |
|
"eval_wer": 0.3600592341865877, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 4.381601102097045, |
|
"grad_norm": 2.732189178466797, |
|
"learning_rate": 0.0001286330827067669, |
|
"loss": 0.2209, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 4.400734731363845, |
|
"grad_norm": 0.6724231839179993, |
|
"learning_rate": 0.00012788120300751877, |
|
"loss": 0.22, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 4.400734731363845, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.4303, |
|
"eval_samples_per_second": 39.354, |
|
"eval_steps_per_second": 4.921, |
|
"eval_wer": 0.3549026866934631, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 4.419868360630645, |
|
"grad_norm": 0.8096573948860168, |
|
"learning_rate": 0.00012712932330827067, |
|
"loss": 0.2166, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 4.439001989897443, |
|
"grad_norm": 0.32577428221702576, |
|
"learning_rate": 0.00012637744360902254, |
|
"loss": 0.228, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 4.439001989897443, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.1414, |
|
"eval_samples_per_second": 39.418, |
|
"eval_steps_per_second": 4.929, |
|
"eval_wer": 0.34994446795007406, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 4.458135619164243, |
|
"grad_norm": 0.36173292994499207, |
|
"learning_rate": 0.00012562556390977443, |
|
"loss": 0.225, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 4.477269248431043, |
|
"grad_norm": 0.7031286358833313, |
|
"learning_rate": 0.0001248736842105263, |
|
"loss": 0.2291, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 4.477269248431043, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.2214, |
|
"eval_samples_per_second": 39.181, |
|
"eval_steps_per_second": 4.899, |
|
"eval_wer": 0.3485429447852761, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 4.496402877697841, |
|
"grad_norm": 0.8883704543113708, |
|
"learning_rate": 0.00012412180451127817, |
|
"loss": 0.2254, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 4.515536506964641, |
|
"grad_norm": 0.7868921160697937, |
|
"learning_rate": 0.00012336992481203006, |
|
"loss": 0.2301, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 4.515536506964641, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.1835, |
|
"eval_samples_per_second": 38.971, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.3487941612016078, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 4.5346701362314406, |
|
"grad_norm": 0.8620243072509766, |
|
"learning_rate": 0.00012261804511278193, |
|
"loss": 0.2207, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 4.553803765498239, |
|
"grad_norm": 1.267608642578125, |
|
"learning_rate": 0.00012186616541353381, |
|
"loss": 0.2084, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 4.553803765498239, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.8973, |
|
"eval_samples_per_second": 39.252, |
|
"eval_steps_per_second": 4.908, |
|
"eval_wer": 0.3515046541146605, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 4.572937394765039, |
|
"grad_norm": 0.8290882706642151, |
|
"learning_rate": 0.00012111428571428569, |
|
"loss": 0.2121, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 4.5920710240318385, |
|
"grad_norm": 0.5240318775177002, |
|
"learning_rate": 0.00012036541353383458, |
|
"loss": 0.2251, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.5920710240318385, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.3697, |
|
"eval_samples_per_second": 39.148, |
|
"eval_steps_per_second": 4.895, |
|
"eval_wer": 0.3509228897821028, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.611204653298637, |
|
"grad_norm": 0.79433274269104, |
|
"learning_rate": 0.00011961353383458646, |
|
"loss": 0.2152, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 4.630338282565437, |
|
"grad_norm": 0.5738509893417358, |
|
"learning_rate": 0.00011886165413533834, |
|
"loss": 0.2205, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 4.630338282565437, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.0051, |
|
"eval_samples_per_second": 39.01, |
|
"eval_steps_per_second": 4.878, |
|
"eval_wer": 0.34464247937381004, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 4.6494719118322365, |
|
"grad_norm": 0.6107327938079834, |
|
"learning_rate": 0.00011810977443609022, |
|
"loss": 0.2153, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 4.668605541099035, |
|
"grad_norm": 0.5332146286964417, |
|
"learning_rate": 0.0001173578947368421, |
|
"loss": 0.2174, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 4.668605541099035, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.5227, |
|
"eval_samples_per_second": 39.115, |
|
"eval_steps_per_second": 4.891, |
|
"eval_wer": 0.3458985614554686, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 4.687739170365835, |
|
"grad_norm": 1.588100790977478, |
|
"learning_rate": 0.00011660601503759397, |
|
"loss": 0.2132, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 4.7068727996326345, |
|
"grad_norm": 2.00449275970459, |
|
"learning_rate": 0.00011585413533834586, |
|
"loss": 0.2136, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 4.7068727996326345, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.5882, |
|
"eval_samples_per_second": 39.101, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.3498651364501798, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 4.726006428899433, |
|
"grad_norm": 0.7837647795677185, |
|
"learning_rate": 0.00011510225563909774, |
|
"loss": 0.225, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 4.745140058166233, |
|
"grad_norm": 1.3031939268112183, |
|
"learning_rate": 0.00011435037593984962, |
|
"loss": 0.2142, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 4.745140058166233, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.8141, |
|
"eval_samples_per_second": 39.051, |
|
"eval_steps_per_second": 4.883, |
|
"eval_wer": 0.3449201396234398, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 4.7642736874330325, |
|
"grad_norm": 0.5795506834983826, |
|
"learning_rate": 0.00011359999999999998, |
|
"loss": 0.2155, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 4.783407316699831, |
|
"grad_norm": 0.7235686182975769, |
|
"learning_rate": 0.00011284812030075186, |
|
"loss": 0.2152, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 4.783407316699831, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.9818, |
|
"eval_samples_per_second": 39.015, |
|
"eval_steps_per_second": 4.878, |
|
"eval_wer": 0.34659932303786756, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 4.802540945966631, |
|
"grad_norm": 0.4587650001049042, |
|
"learning_rate": 0.00011209774436090224, |
|
"loss": 0.2081, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 4.8216745752334305, |
|
"grad_norm": 1.3301700353622437, |
|
"learning_rate": 0.00011134736842105263, |
|
"loss": 0.2216, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 4.8216745752334305, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.2673, |
|
"eval_samples_per_second": 39.171, |
|
"eval_steps_per_second": 4.898, |
|
"eval_wer": 0.34429870954093506, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 4.840808204500229, |
|
"grad_norm": 1.0340607166290283, |
|
"learning_rate": 0.00011059548872180452, |
|
"loss": 0.2152, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 4.859941833767029, |
|
"grad_norm": 0.7466903328895569, |
|
"learning_rate": 0.00010984360902255638, |
|
"loss": 0.2209, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 4.859941833767029, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.7521, |
|
"eval_samples_per_second": 39.065, |
|
"eval_steps_per_second": 4.885, |
|
"eval_wer": 0.3455415697059446, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 4.879075463033828, |
|
"grad_norm": 0.451224148273468, |
|
"learning_rate": 0.00010909323308270676, |
|
"loss": 0.2068, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 4.898209092300627, |
|
"grad_norm": 0.9599905610084534, |
|
"learning_rate": 0.00010834135338345863, |
|
"loss": 0.2183, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 4.898209092300627, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.3325, |
|
"eval_samples_per_second": 40.05, |
|
"eval_steps_per_second": 5.008, |
|
"eval_wer": 0.340398244129469, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 4.917342721567427, |
|
"grad_norm": 1.071007251739502, |
|
"learning_rate": 0.00010758947368421051, |
|
"loss": 0.2115, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 4.936476350834226, |
|
"grad_norm": 0.9002227187156677, |
|
"learning_rate": 0.00010683759398496239, |
|
"loss": 0.2174, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 4.936476350834226, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 175.3184, |
|
"eval_samples_per_second": 40.053, |
|
"eval_steps_per_second": 5.008, |
|
"eval_wer": 0.3402924687962767, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 4.955609980101025, |
|
"grad_norm": 3.2121989727020264, |
|
"learning_rate": 0.00010608571428571427, |
|
"loss": 0.2139, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 4.974743609367825, |
|
"grad_norm": 0.6666644811630249, |
|
"learning_rate": 0.00010533383458646616, |
|
"loss": 0.2165, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 4.974743609367825, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 176.6287, |
|
"eval_samples_per_second": 39.756, |
|
"eval_steps_per_second": 4.971, |
|
"eval_wer": 0.3419848741273535, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 4.993877238634624, |
|
"grad_norm": 1.018226981163025, |
|
"learning_rate": 0.00010458195488721804, |
|
"loss": 0.2075, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 5.013010867901423, |
|
"grad_norm": 0.9753539562225342, |
|
"learning_rate": 0.00010383007518796992, |
|
"loss": 0.1806, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 5.013010867901423, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.4567, |
|
"eval_samples_per_second": 39.57, |
|
"eval_steps_per_second": 4.948, |
|
"eval_wer": 0.3380579648825894, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 5.032144497168223, |
|
"grad_norm": 0.6430408954620361, |
|
"learning_rate": 0.0001030781954887218, |
|
"loss": 0.1788, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 5.051278126435022, |
|
"grad_norm": 0.5756456255912781, |
|
"learning_rate": 0.00010232631578947367, |
|
"loss": 0.1821, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 5.051278126435022, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.4262, |
|
"eval_samples_per_second": 39.355, |
|
"eval_steps_per_second": 4.921, |
|
"eval_wer": 0.3426459699598054, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 5.070411755701821, |
|
"grad_norm": 0.7271620035171509, |
|
"learning_rate": 0.00010157443609022555, |
|
"loss": 0.1915, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 5.089545384968621, |
|
"grad_norm": 0.8460062146186829, |
|
"learning_rate": 0.00010082255639097743, |
|
"loss": 0.1825, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 5.089545384968621, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.6095, |
|
"eval_samples_per_second": 39.315, |
|
"eval_steps_per_second": 4.916, |
|
"eval_wer": 0.3399619208800508, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 5.10867901423542, |
|
"grad_norm": 0.43308231234550476, |
|
"learning_rate": 0.00010007067669172931, |
|
"loss": 0.179, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 5.127812643502219, |
|
"grad_norm": 0.5365935564041138, |
|
"learning_rate": 9.932030075187969e-05, |
|
"loss": 0.1876, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 5.127812643502219, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.192, |
|
"eval_samples_per_second": 39.407, |
|
"eval_steps_per_second": 4.927, |
|
"eval_wer": 0.3381240744658346, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 5.146946272769019, |
|
"grad_norm": 0.35783401131629944, |
|
"learning_rate": 9.856842105263157e-05, |
|
"loss": 0.1794, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 5.166079902035818, |
|
"grad_norm": 1.0039490461349487, |
|
"learning_rate": 9.781654135338345e-05, |
|
"loss": 0.1858, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 5.166079902035818, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.1178, |
|
"eval_samples_per_second": 38.77, |
|
"eval_steps_per_second": 4.848, |
|
"eval_wer": 0.3341707213877724, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 5.185213531302617, |
|
"grad_norm": 0.5719444751739502, |
|
"learning_rate": 9.706466165413533e-05, |
|
"loss": 0.1733, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 5.204347160569417, |
|
"grad_norm": 1.0103236436843872, |
|
"learning_rate": 9.631278195488722e-05, |
|
"loss": 0.1729, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 5.204347160569417, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.3304, |
|
"eval_samples_per_second": 38.94, |
|
"eval_steps_per_second": 4.869, |
|
"eval_wer": 0.3325179818066427, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 5.223480789836216, |
|
"grad_norm": 0.34207335114479065, |
|
"learning_rate": 9.55609022556391e-05, |
|
"loss": 0.1722, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 5.242614419103015, |
|
"grad_norm": 1.3274930715560913, |
|
"learning_rate": 9.480902255639098e-05, |
|
"loss": 0.1843, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 5.242614419103015, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.6878, |
|
"eval_samples_per_second": 38.863, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.3313808969748255, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 5.261748048369815, |
|
"grad_norm": 2.946866989135742, |
|
"learning_rate": 9.405714285714285e-05, |
|
"loss": 0.1733, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 5.280881677636614, |
|
"grad_norm": 1.175057291984558, |
|
"learning_rate": 9.330526315789473e-05, |
|
"loss": 0.1828, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 5.280881677636614, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.9842, |
|
"eval_samples_per_second": 39.015, |
|
"eval_steps_per_second": 4.878, |
|
"eval_wer": 0.33381372963824835, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 5.300015306903413, |
|
"grad_norm": 0.5658883452415466, |
|
"learning_rate": 9.255338345864661e-05, |
|
"loss": 0.1905, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 2.0087709426879883, |
|
"learning_rate": 9.180150375939849e-05, |
|
"loss": 0.1878, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.94, |
|
"eval_samples_per_second": 39.024, |
|
"eval_steps_per_second": 4.879, |
|
"eval_wer": 0.3299397080600804, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 5.338282565437012, |
|
"grad_norm": 0.7439378499984741, |
|
"learning_rate": 9.104962406015037e-05, |
|
"loss": 0.1756, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 5.357416194703811, |
|
"grad_norm": 0.6208277344703674, |
|
"learning_rate": 9.030075187969923e-05, |
|
"loss": 0.1784, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 5.357416194703811, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.2886, |
|
"eval_samples_per_second": 39.386, |
|
"eval_steps_per_second": 4.925, |
|
"eval_wer": 0.33048180664269095, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 5.376549823970611, |
|
"grad_norm": 1.9707947969436646, |
|
"learning_rate": 8.954887218045112e-05, |
|
"loss": 0.1907, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 5.39568345323741, |
|
"grad_norm": 0.6385311484336853, |
|
"learning_rate": 8.8796992481203e-05, |
|
"loss": 0.1791, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 5.39568345323741, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 177.8472, |
|
"eval_samples_per_second": 39.483, |
|
"eval_steps_per_second": 4.937, |
|
"eval_wer": 0.3262904590649461, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 5.414817082504209, |
|
"grad_norm": 0.44527003169059753, |
|
"learning_rate": 8.804511278195488e-05, |
|
"loss": 0.1785, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 5.433950711771009, |
|
"grad_norm": 0.820563793182373, |
|
"learning_rate": 8.729323308270676e-05, |
|
"loss": 0.1861, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 5.433950711771009, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.6414, |
|
"eval_samples_per_second": 38.447, |
|
"eval_steps_per_second": 4.807, |
|
"eval_wer": 0.3237518510683309, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 5.453084341037808, |
|
"grad_norm": 1.075088381767273, |
|
"learning_rate": 8.654285714285714e-05, |
|
"loss": 0.1878, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 5.472217970304607, |
|
"grad_norm": 1.048279047012329, |
|
"learning_rate": 8.579097744360902e-05, |
|
"loss": 0.176, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 5.472217970304607, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.7657, |
|
"eval_samples_per_second": 38.846, |
|
"eval_steps_per_second": 4.857, |
|
"eval_wer": 0.3245319441506241, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 5.491351599571407, |
|
"grad_norm": 0.9499515295028687, |
|
"learning_rate": 8.504060150375938e-05, |
|
"loss": 0.1724, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 5.510485228838206, |
|
"grad_norm": 0.6625120639801025, |
|
"learning_rate": 8.428872180451127e-05, |
|
"loss": 0.1821, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 5.510485228838206, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.84, |
|
"eval_samples_per_second": 38.83, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.32155701290459066, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 5.529618858105005, |
|
"grad_norm": 1.221817135810852, |
|
"learning_rate": 8.353834586466164e-05, |
|
"loss": 0.1787, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 5.548752487371805, |
|
"grad_norm": 1.4700016975402832, |
|
"learning_rate": 8.278646616541352e-05, |
|
"loss": 0.176, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 5.548752487371805, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.0479, |
|
"eval_samples_per_second": 38.785, |
|
"eval_steps_per_second": 4.85, |
|
"eval_wer": 0.324505500317326, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 5.567886116638604, |
|
"grad_norm": 0.4790880084037781, |
|
"learning_rate": 8.20345864661654e-05, |
|
"loss": 0.1681, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 5.587019745905403, |
|
"grad_norm": 0.749213457107544, |
|
"learning_rate": 8.128270676691729e-05, |
|
"loss": 0.1799, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 5.587019745905403, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.4888, |
|
"eval_samples_per_second": 38.905, |
|
"eval_steps_per_second": 4.865, |
|
"eval_wer": 0.32506082081658555, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 5.606153375172203, |
|
"grad_norm": 0.5427069067955017, |
|
"learning_rate": 8.053082706766917e-05, |
|
"loss": 0.1714, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 5.625287004439002, |
|
"grad_norm": 0.53640216588974, |
|
"learning_rate": 7.977894736842105e-05, |
|
"loss": 0.1696, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 5.625287004439002, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.603, |
|
"eval_samples_per_second": 38.881, |
|
"eval_steps_per_second": 4.861, |
|
"eval_wer": 0.32224455257034057, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 5.644420633705801, |
|
"grad_norm": 2.7623894214630127, |
|
"learning_rate": 7.902706766917293e-05, |
|
"loss": 0.1766, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 5.663554262972601, |
|
"grad_norm": 0.9681125283241272, |
|
"learning_rate": 7.827518796992481e-05, |
|
"loss": 0.1711, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 5.663554262972601, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.9103, |
|
"eval_samples_per_second": 38.815, |
|
"eval_steps_per_second": 4.853, |
|
"eval_wer": 0.3242939496509414, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 5.6826878922394, |
|
"grad_norm": 0.8594058752059937, |
|
"learning_rate": 7.752330827067668e-05, |
|
"loss": 0.1798, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 5.701821521506199, |
|
"grad_norm": 0.9855976104736328, |
|
"learning_rate": 7.677293233082705e-05, |
|
"loss": 0.1794, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 5.701821521506199, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.7415, |
|
"eval_samples_per_second": 38.637, |
|
"eval_steps_per_second": 4.831, |
|
"eval_wer": 0.3212264649883647, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 5.7209551507729985, |
|
"grad_norm": 0.5901813507080078, |
|
"learning_rate": 7.602255639097744e-05, |
|
"loss": 0.1691, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 5.740088780039798, |
|
"grad_norm": 1.9479256868362427, |
|
"learning_rate": 7.527067669172932e-05, |
|
"loss": 0.1806, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 5.740088780039798, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.0383, |
|
"eval_samples_per_second": 39.003, |
|
"eval_steps_per_second": 4.877, |
|
"eval_wer": 0.32014226782314364, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 5.759222409306597, |
|
"grad_norm": 5.095980167388916, |
|
"learning_rate": 7.45187969924812e-05, |
|
"loss": 0.1802, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 5.7783560385733965, |
|
"grad_norm": 1.2752444744110107, |
|
"learning_rate": 7.376691729323307e-05, |
|
"loss": 0.1736, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 5.7783560385733965, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 178.7009, |
|
"eval_samples_per_second": 39.295, |
|
"eval_steps_per_second": 4.913, |
|
"eval_wer": 0.3235799661518934, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 5.797489667840196, |
|
"grad_norm": 0.6796151995658875, |
|
"learning_rate": 7.301503759398495e-05, |
|
"loss": 0.1707, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 5.816623297106995, |
|
"grad_norm": 0.8652954697608948, |
|
"learning_rate": 7.226466165413533e-05, |
|
"loss": 0.1664, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 5.816623297106995, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.4487, |
|
"eval_samples_per_second": 38.914, |
|
"eval_steps_per_second": 4.866, |
|
"eval_wer": 0.3222313306536916, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 5.8357569263737945, |
|
"grad_norm": 0.5811170935630798, |
|
"learning_rate": 7.151278195488721e-05, |
|
"loss": 0.1712, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 5.854890555640594, |
|
"grad_norm": 0.414420485496521, |
|
"learning_rate": 7.076090225563909e-05, |
|
"loss": 0.1704, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 5.854890555640594, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.9454, |
|
"eval_samples_per_second": 38.594, |
|
"eval_steps_per_second": 4.826, |
|
"eval_wer": 0.3200232705733023, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 5.874024184907393, |
|
"grad_norm": 0.7044617533683777, |
|
"learning_rate": 7.000902255639097e-05, |
|
"loss": 0.1797, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 5.8931578141741925, |
|
"grad_norm": 0.6984072327613831, |
|
"learning_rate": 6.925714285714284e-05, |
|
"loss": 0.1713, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 5.8931578141741925, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.4266, |
|
"eval_samples_per_second": 38.704, |
|
"eval_steps_per_second": 4.839, |
|
"eval_wer": 0.33001903955997464, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 5.912291443440992, |
|
"grad_norm": 1.7558343410491943, |
|
"learning_rate": 6.850526315789472e-05, |
|
"loss": 0.1718, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 5.931425072707791, |
|
"grad_norm": 0.5357454419136047, |
|
"learning_rate": 6.77533834586466e-05, |
|
"loss": 0.1701, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 5.931425072707791, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 184.6223, |
|
"eval_samples_per_second": 38.034, |
|
"eval_steps_per_second": 4.756, |
|
"eval_wer": 0.3172202242437064, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 5.9505587019745905, |
|
"grad_norm": 0.6187770962715149, |
|
"learning_rate": 6.700150375939849e-05, |
|
"loss": 0.1684, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 5.96969233124139, |
|
"grad_norm": 0.4420112669467926, |
|
"learning_rate": 6.624962406015037e-05, |
|
"loss": 0.1687, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 5.96969233124139, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.0611, |
|
"eval_samples_per_second": 38.569, |
|
"eval_steps_per_second": 4.823, |
|
"eval_wer": 0.31862174740850435, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 5.988825960508189, |
|
"grad_norm": 2.3220465183258057, |
|
"learning_rate": 6.550075187969924e-05, |
|
"loss": 0.1657, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 6.0079595897749885, |
|
"grad_norm": 1.0167362689971924, |
|
"learning_rate": 6.475037593984962e-05, |
|
"loss": 0.1543, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 6.0079595897749885, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.8294, |
|
"eval_samples_per_second": 38.619, |
|
"eval_steps_per_second": 4.829, |
|
"eval_wer": 0.31407340808123546, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 6.027093219041788, |
|
"grad_norm": 0.2879861295223236, |
|
"learning_rate": 6.4e-05, |
|
"loss": 0.1419, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 6.046226848308587, |
|
"grad_norm": 0.5147427916526794, |
|
"learning_rate": 6.324812030075188e-05, |
|
"loss": 0.142, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 6.046226848308587, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.1027, |
|
"eval_samples_per_second": 38.774, |
|
"eval_steps_per_second": 4.848, |
|
"eval_wer": 0.3165591284112545, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 6.065360477575386, |
|
"grad_norm": 0.43559426069259644, |
|
"learning_rate": 6.249624060150375e-05, |
|
"loss": 0.1399, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 6.084494106842186, |
|
"grad_norm": 0.38178640604019165, |
|
"learning_rate": 6.174436090225563e-05, |
|
"loss": 0.1438, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 6.084494106842186, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.0147, |
|
"eval_samples_per_second": 38.792, |
|
"eval_steps_per_second": 4.85, |
|
"eval_wer": 0.31562037232917284, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 6.103627736108985, |
|
"grad_norm": 0.42758727073669434, |
|
"learning_rate": 6.099248120300751e-05, |
|
"loss": 0.144, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 6.122761365375784, |
|
"grad_norm": 0.5155762434005737, |
|
"learning_rate": 6.024060150375939e-05, |
|
"loss": 0.1433, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 6.122761365375784, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 183.6308, |
|
"eval_samples_per_second": 38.24, |
|
"eval_steps_per_second": 4.781, |
|
"eval_wer": 0.31587158874550453, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 6.141894994642584, |
|
"grad_norm": 0.6651669144630432, |
|
"learning_rate": 5.949022556390977e-05, |
|
"loss": 0.1426, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 6.161028623909383, |
|
"grad_norm": 0.42425113916397095, |
|
"learning_rate": 5.873834586466165e-05, |
|
"loss": 0.1442, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 6.161028623909383, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.9638, |
|
"eval_samples_per_second": 38.59, |
|
"eval_steps_per_second": 4.825, |
|
"eval_wer": 0.3142849587476201, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 6.180162253176182, |
|
"grad_norm": 1.366357684135437, |
|
"learning_rate": 5.798646616541353e-05, |
|
"loss": 0.1342, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 6.199295882442982, |
|
"grad_norm": 0.7355407476425171, |
|
"learning_rate": 5.7234586466165414e-05, |
|
"loss": 0.1494, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 6.199295882442982, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.1034, |
|
"eval_samples_per_second": 38.561, |
|
"eval_steps_per_second": 4.821, |
|
"eval_wer": 0.3106621535857838, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 6.218429511709781, |
|
"grad_norm": 0.8021041750907898, |
|
"learning_rate": 5.648270676691729e-05, |
|
"loss": 0.1449, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 6.23756314097658, |
|
"grad_norm": 0.3070674240589142, |
|
"learning_rate": 5.573082706766917e-05, |
|
"loss": 0.1355, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 6.23756314097658, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.6272, |
|
"eval_samples_per_second": 38.662, |
|
"eval_steps_per_second": 4.834, |
|
"eval_wer": 0.31661201607785067, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 6.25669677024338, |
|
"grad_norm": 0.3594122529029846, |
|
"learning_rate": 5.498045112781954e-05, |
|
"loss": 0.1399, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 6.275830399510179, |
|
"grad_norm": 0.7340966463088989, |
|
"learning_rate": 5.422857142857142e-05, |
|
"loss": 0.1403, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 6.275830399510179, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.6513, |
|
"eval_samples_per_second": 38.445, |
|
"eval_steps_per_second": 4.807, |
|
"eval_wer": 0.31170668500105775, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 6.294964028776978, |
|
"grad_norm": 0.49476948380470276, |
|
"learning_rate": 5.3476691729323304e-05, |
|
"loss": 0.1391, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 6.314097658043778, |
|
"grad_norm": 0.7009222507476807, |
|
"learning_rate": 5.2726315789473675e-05, |
|
"loss": 0.1435, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 6.314097658043778, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.5712, |
|
"eval_samples_per_second": 38.462, |
|
"eval_steps_per_second": 4.809, |
|
"eval_wer": 0.3124206685001058, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 6.333231287310577, |
|
"grad_norm": 1.6074929237365723, |
|
"learning_rate": 5.197443609022556e-05, |
|
"loss": 0.1369, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 6.352364916577376, |
|
"grad_norm": 0.4530220031738281, |
|
"learning_rate": 5.122255639097744e-05, |
|
"loss": 0.1446, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 6.352364916577376, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.3168, |
|
"eval_samples_per_second": 38.515, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.31234133700021155, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 6.371498545844176, |
|
"grad_norm": 0.5443539023399353, |
|
"learning_rate": 5.047067669172932e-05, |
|
"loss": 0.1481, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 6.390632175110975, |
|
"grad_norm": 0.604567289352417, |
|
"learning_rate": 4.9718796992481194e-05, |
|
"loss": 0.1385, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 6.390632175110975, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.452, |
|
"eval_samples_per_second": 38.913, |
|
"eval_steps_per_second": 4.866, |
|
"eval_wer": 0.31403374233128833, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 6.409765804377774, |
|
"grad_norm": 0.5584743022918701, |
|
"learning_rate": 4.8966917293233076e-05, |
|
"loss": 0.1451, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 6.428899433644574, |
|
"grad_norm": 0.34049585461616516, |
|
"learning_rate": 4.821503759398496e-05, |
|
"loss": 0.1437, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 6.428899433644574, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.8859, |
|
"eval_samples_per_second": 38.82, |
|
"eval_steps_per_second": 4.854, |
|
"eval_wer": 0.31029193991961074, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 6.448033062911373, |
|
"grad_norm": 1.0056949853897095, |
|
"learning_rate": 4.746315789473684e-05, |
|
"loss": 0.1453, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 6.467166692178172, |
|
"grad_norm": 0.4812434911727905, |
|
"learning_rate": 4.671278195488721e-05, |
|
"loss": 0.1328, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 6.467166692178172, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.778, |
|
"eval_samples_per_second": 38.63, |
|
"eval_steps_per_second": 4.83, |
|
"eval_wer": 0.31021260841971654, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 6.486300321444972, |
|
"grad_norm": 0.5090984106063843, |
|
"learning_rate": 4.596090225563909e-05, |
|
"loss": 0.1369, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 6.505433950711771, |
|
"grad_norm": 3.274346113204956, |
|
"learning_rate": 4.520902255639097e-05, |
|
"loss": 0.1354, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 6.505433950711771, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.3019, |
|
"eval_samples_per_second": 38.519, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.31116458641844724, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 6.52456757997857, |
|
"grad_norm": 0.6519914269447327, |
|
"learning_rate": 4.445714285714285e-05, |
|
"loss": 0.1405, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 6.54370120924537, |
|
"grad_norm": 0.5463857650756836, |
|
"learning_rate": 4.370526315789473e-05, |
|
"loss": 0.1394, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 6.54370120924537, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.4757, |
|
"eval_samples_per_second": 38.694, |
|
"eval_steps_per_second": 4.838, |
|
"eval_wer": 0.3094192934207743, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 6.562834838512169, |
|
"grad_norm": 0.43961018323898315, |
|
"learning_rate": 4.295338345864661e-05, |
|
"loss": 0.1424, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 6.581968467778968, |
|
"grad_norm": 0.2494196593761444, |
|
"learning_rate": 4.220150375939849e-05, |
|
"loss": 0.1385, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 6.581968467778968, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.1999, |
|
"eval_samples_per_second": 38.753, |
|
"eval_steps_per_second": 4.845, |
|
"eval_wer": 0.30549238417601016, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 6.601102097045768, |
|
"grad_norm": 3.2341201305389404, |
|
"learning_rate": 4.144962406015037e-05, |
|
"loss": 0.1444, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 6.620235726312567, |
|
"grad_norm": 0.6074426770210266, |
|
"learning_rate": 4.069774436090225e-05, |
|
"loss": 0.138, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 6.620235726312567, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.8045, |
|
"eval_samples_per_second": 38.624, |
|
"eval_steps_per_second": 4.829, |
|
"eval_wer": 0.3054659403427121, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 6.639369355579366, |
|
"grad_norm": 0.48304858803749084, |
|
"learning_rate": 3.994736842105263e-05, |
|
"loss": 0.1356, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 6.658502984846166, |
|
"grad_norm": 0.3982817530632019, |
|
"learning_rate": 3.919548872180451e-05, |
|
"loss": 0.138, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 6.658502984846166, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 183.7344, |
|
"eval_samples_per_second": 38.218, |
|
"eval_steps_per_second": 4.779, |
|
"eval_wer": 0.3061138142585149, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 6.677636614112965, |
|
"grad_norm": 0.46521154046058655, |
|
"learning_rate": 3.844360902255639e-05, |
|
"loss": 0.1293, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 6.696770243379764, |
|
"grad_norm": 0.33037710189819336, |
|
"learning_rate": 3.769172932330827e-05, |
|
"loss": 0.1313, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 6.696770243379764, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.4617, |
|
"eval_samples_per_second": 38.697, |
|
"eval_steps_per_second": 4.838, |
|
"eval_wer": 0.3061005923418659, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 6.715903872646564, |
|
"grad_norm": 0.47027432918548584, |
|
"learning_rate": 3.6939849624060146e-05, |
|
"loss": 0.1363, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 6.735037501913363, |
|
"grad_norm": 0.7823716998100281, |
|
"learning_rate": 3.618796992481203e-05, |
|
"loss": 0.1427, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 6.735037501913363, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.6528, |
|
"eval_samples_per_second": 38.87, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.30834831817220226, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 6.754171131180162, |
|
"grad_norm": 0.5896081924438477, |
|
"learning_rate": 3.543609022556391e-05, |
|
"loss": 0.1347, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 6.773304760446962, |
|
"grad_norm": 0.7625430822372437, |
|
"learning_rate": 3.468571428571429e-05, |
|
"loss": 0.1432, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 6.773304760446962, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.5521, |
|
"eval_samples_per_second": 38.678, |
|
"eval_steps_per_second": 4.836, |
|
"eval_wer": 0.3047519568436641, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 6.792438389713761, |
|
"grad_norm": 0.8567324280738831, |
|
"learning_rate": 3.393383458646616e-05, |
|
"loss": 0.1348, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 6.81157201898056, |
|
"grad_norm": 1.5647565126419067, |
|
"learning_rate": 3.318195488721804e-05, |
|
"loss": 0.136, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 6.81157201898056, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.3872, |
|
"eval_samples_per_second": 38.713, |
|
"eval_steps_per_second": 4.84, |
|
"eval_wer": 0.3039454199280728, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 6.83070564824736, |
|
"grad_norm": 0.3368758261203766, |
|
"learning_rate": 3.2430075187969924e-05, |
|
"loss": 0.1383, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 6.8498392775141586, |
|
"grad_norm": 0.5614475011825562, |
|
"learning_rate": 3.1678195488721806e-05, |
|
"loss": 0.1424, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 6.8498392775141586, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.4861, |
|
"eval_samples_per_second": 38.692, |
|
"eval_steps_per_second": 4.838, |
|
"eval_wer": 0.3016448064311403, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 6.868972906780958, |
|
"grad_norm": 0.6072395443916321, |
|
"learning_rate": 3.092631578947368e-05, |
|
"loss": 0.1284, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 6.888106536047758, |
|
"grad_norm": 0.6235467195510864, |
|
"learning_rate": 3.0174436090225562e-05, |
|
"loss": 0.1347, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 6.888106536047758, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.9343, |
|
"eval_samples_per_second": 38.596, |
|
"eval_steps_per_second": 4.826, |
|
"eval_wer": 0.3038925322614766, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 6.9072401653145565, |
|
"grad_norm": 4.727964401245117, |
|
"learning_rate": 2.9422556390977444e-05, |
|
"loss": 0.1327, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 6.926373794581356, |
|
"grad_norm": 0.6982028484344482, |
|
"learning_rate": 2.867067669172932e-05, |
|
"loss": 0.1307, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 6.926373794581356, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.8488, |
|
"eval_samples_per_second": 38.828, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.3028876665961498, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 6.945507423848156, |
|
"grad_norm": 0.2889564633369446, |
|
"learning_rate": 2.7918796992481203e-05, |
|
"loss": 0.1349, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 6.9646410531149545, |
|
"grad_norm": 0.3712177872657776, |
|
"learning_rate": 2.716691729323308e-05, |
|
"loss": 0.1293, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 6.9646410531149545, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.868, |
|
"eval_samples_per_second": 38.824, |
|
"eval_steps_per_second": 4.854, |
|
"eval_wer": 0.30258356251322194, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 6.983774682381754, |
|
"grad_norm": 0.7685525417327881, |
|
"learning_rate": 2.6415037593984963e-05, |
|
"loss": 0.1339, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 7.002908311648554, |
|
"grad_norm": 0.19924980401992798, |
|
"learning_rate": 2.566315789473684e-05, |
|
"loss": 0.1259, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 7.002908311648554, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.6261, |
|
"eval_samples_per_second": 38.876, |
|
"eval_steps_per_second": 4.861, |
|
"eval_wer": 0.3025174529299767, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 7.0220419409153525, |
|
"grad_norm": 0.3780413568019867, |
|
"learning_rate": 2.4911278195488722e-05, |
|
"loss": 0.1163, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 7.041175570182152, |
|
"grad_norm": 0.5037872195243835, |
|
"learning_rate": 2.4160902255639094e-05, |
|
"loss": 0.1151, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 7.041175570182152, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.4745, |
|
"eval_samples_per_second": 38.909, |
|
"eval_steps_per_second": 4.865, |
|
"eval_wer": 0.3033900994288132, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 7.060309199448952, |
|
"grad_norm": 0.2655356824398041, |
|
"learning_rate": 2.3409022556390975e-05, |
|
"loss": 0.1104, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 7.0794428287157505, |
|
"grad_norm": 0.6396870613098145, |
|
"learning_rate": 2.2657142857142853e-05, |
|
"loss": 0.1143, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 7.0794428287157505, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.1648, |
|
"eval_samples_per_second": 38.975, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.30249100909667864, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 7.09857645798255, |
|
"grad_norm": 1.1950030326843262, |
|
"learning_rate": 2.1905263157894735e-05, |
|
"loss": 0.1217, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 7.11771008724935, |
|
"grad_norm": 0.6003520488739014, |
|
"learning_rate": 2.1154887218045113e-05, |
|
"loss": 0.1105, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 7.11771008724935, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.583, |
|
"eval_samples_per_second": 39.102, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.30058705309921724, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 7.1368437165161485, |
|
"grad_norm": 0.5612542033195496, |
|
"learning_rate": 2.040300751879699e-05, |
|
"loss": 0.1124, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 7.155977345782948, |
|
"grad_norm": 0.47781071066856384, |
|
"learning_rate": 1.9651127819548872e-05, |
|
"loss": 0.1126, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 7.155977345782948, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.6562, |
|
"eval_samples_per_second": 39.086, |
|
"eval_steps_per_second": 4.887, |
|
"eval_wer": 0.3006134969325153, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 7.175110975049748, |
|
"grad_norm": 0.5884853601455688, |
|
"learning_rate": 1.889924812030075e-05, |
|
"loss": 0.1147, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 7.194244604316546, |
|
"grad_norm": 0.269551157951355, |
|
"learning_rate": 1.814736842105263e-05, |
|
"loss": 0.1139, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 7.194244604316546, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 179.5976, |
|
"eval_samples_per_second": 39.099, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.2996482970171356, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 7.213378233583346, |
|
"grad_norm": 0.7385743260383606, |
|
"learning_rate": 1.739548872180451e-05, |
|
"loss": 0.1088, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 7.232511862850146, |
|
"grad_norm": 0.7600038647651672, |
|
"learning_rate": 1.6643609022556388e-05, |
|
"loss": 0.1101, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 7.232511862850146, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.6841, |
|
"eval_samples_per_second": 38.863, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.29820710810239054, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 7.251645492116944, |
|
"grad_norm": 0.4615612328052521, |
|
"learning_rate": 1.589172932330827e-05, |
|
"loss": 0.12, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 7.270779121383744, |
|
"grad_norm": 17.22515296936035, |
|
"learning_rate": 1.5139849624060148e-05, |
|
"loss": 0.1187, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 7.270779121383744, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.7132, |
|
"eval_samples_per_second": 38.857, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.2988285381848953, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 7.289912750650544, |
|
"grad_norm": 0.272981196641922, |
|
"learning_rate": 1.4387969924812028e-05, |
|
"loss": 0.1106, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 7.309046379917342, |
|
"grad_norm": 0.7808548212051392, |
|
"learning_rate": 1.3636090225563907e-05, |
|
"loss": 0.1174, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 7.309046379917342, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.5814, |
|
"eval_samples_per_second": 38.886, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.2993441929342077, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 7.328180009184142, |
|
"grad_norm": 0.32894080877304077, |
|
"learning_rate": 1.2885714285714284e-05, |
|
"loss": 0.1129, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 7.347313638450942, |
|
"grad_norm": 0.6160246729850769, |
|
"learning_rate": 1.2133834586466163e-05, |
|
"loss": 0.1132, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 7.347313638450942, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.7946, |
|
"eval_samples_per_second": 38.84, |
|
"eval_steps_per_second": 4.856, |
|
"eval_wer": 0.2995689655172414, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 7.36644726771774, |
|
"grad_norm": 0.3549739718437195, |
|
"learning_rate": 1.1381954887218043e-05, |
|
"loss": 0.1214, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 7.38558089698454, |
|
"grad_norm": 0.5132611393928528, |
|
"learning_rate": 1.0630075187969923e-05, |
|
"loss": 0.1108, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 7.38558089698454, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.6179, |
|
"eval_samples_per_second": 38.664, |
|
"eval_steps_per_second": 4.834, |
|
"eval_wer": 0.2995160778506452, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 7.40471452625134, |
|
"grad_norm": 0.1533355563879013, |
|
"learning_rate": 9.878195488721803e-06, |
|
"loss": 0.117, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 7.423848155518138, |
|
"grad_norm": 0.542405903339386, |
|
"learning_rate": 9.126315789473683e-06, |
|
"loss": 0.1119, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 7.423848155518138, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.7251, |
|
"eval_samples_per_second": 38.855, |
|
"eval_steps_per_second": 4.858, |
|
"eval_wer": 0.2991194203511741, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 7.442981784784938, |
|
"grad_norm": 0.4688265919685364, |
|
"learning_rate": 8.37593984962406e-06, |
|
"loss": 0.1116, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 7.462115414051738, |
|
"grad_norm": 0.7588228583335876, |
|
"learning_rate": 7.624060150375939e-06, |
|
"loss": 0.1098, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 7.462115414051738, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.1196, |
|
"eval_samples_per_second": 38.77, |
|
"eval_steps_per_second": 4.848, |
|
"eval_wer": 0.29845832451872223, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 7.481249043318536, |
|
"grad_norm": 0.9082927703857422, |
|
"learning_rate": 6.8721804511278185e-06, |
|
"loss": 0.1049, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 7.500382672585336, |
|
"grad_norm": 0.5702168345451355, |
|
"learning_rate": 6.120300751879698e-06, |
|
"loss": 0.1053, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 7.500382672585336, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 180.5918, |
|
"eval_samples_per_second": 38.883, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.29765178760313094, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 7.519516301852136, |
|
"grad_norm": 1.2016927003860474, |
|
"learning_rate": 5.368421052631578e-06, |
|
"loss": 0.1011, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 7.538649931118934, |
|
"grad_norm": 0.6198543906211853, |
|
"learning_rate": 4.616541353383459e-06, |
|
"loss": 0.11, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 7.538649931118934, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.146, |
|
"eval_samples_per_second": 38.764, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.2975327903532896, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 7.557783560385734, |
|
"grad_norm": 0.6191059947013855, |
|
"learning_rate": 3.8646616541353386e-06, |
|
"loss": 0.1118, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 7.576917189652534, |
|
"grad_norm": 0.5622895956039429, |
|
"learning_rate": 3.1127819548872175e-06, |
|
"loss": 0.1091, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 7.576917189652534, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.2479, |
|
"eval_samples_per_second": 38.53, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.295893272688809, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 7.596050818919332, |
|
"grad_norm": 1.4227417707443237, |
|
"learning_rate": 2.362406015037594e-06, |
|
"loss": 0.1126, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 7.615184448186132, |
|
"grad_norm": 0.5790704488754272, |
|
"learning_rate": 1.6105263157894734e-06, |
|
"loss": 0.108, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 7.615184448186132, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 182.5001, |
|
"eval_samples_per_second": 38.477, |
|
"eval_steps_per_second": 4.811, |
|
"eval_wer": 0.2963295959382272, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 7.634318077452932, |
|
"grad_norm": 0.6536182165145874, |
|
"learning_rate": 8.601503759398495e-07, |
|
"loss": 0.113, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 7.65345170671973, |
|
"grad_norm": 0.41451194882392883, |
|
"learning_rate": 1.0827067669172932e-07, |
|
"loss": 0.1077, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 7.65345170671973, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 181.1994, |
|
"eval_samples_per_second": 38.753, |
|
"eval_steps_per_second": 4.845, |
|
"eval_wer": 0.2962370425216839, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 7.65345170671973, |
|
"step": 200000, |
|
"total_flos": 2.464157327536675e+20, |
|
"train_loss": 0.29584050216674806, |
|
"train_runtime": 112002.2186, |
|
"train_samples_per_second": 14.285, |
|
"train_steps_per_second": 1.786 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 200000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 4000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.464157327536675e+20, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|