|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9647137412079059, |
|
"eval_steps": 200, |
|
"global_step": 50000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007858854964831625, |
|
"eval_loss": 3.175461769104004, |
|
"eval_runtime": 144.4533, |
|
"eval_samples_per_second": 39.155, |
|
"eval_steps_per_second": 4.894, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01571770992966325, |
|
"eval_loss": 2.8796634674072266, |
|
"eval_runtime": 143.0854, |
|
"eval_samples_per_second": 39.529, |
|
"eval_steps_per_second": 4.941, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01964713741207906, |
|
"grad_norm": 1.9831087589263916, |
|
"learning_rate": 0.00029759999999999997, |
|
"loss": 4.8076, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.023576564894494872, |
|
"eval_loss": 1.4753953218460083, |
|
"eval_runtime": 143.2096, |
|
"eval_samples_per_second": 39.495, |
|
"eval_steps_per_second": 4.937, |
|
"eval_wer": 0.903997689011571, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0314354198593265, |
|
"eval_loss": 1.25261652469635, |
|
"eval_runtime": 144.6031, |
|
"eval_samples_per_second": 39.114, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.8548410393028518, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03929427482415812, |
|
"grad_norm": 2.427387237548828, |
|
"learning_rate": 0.0002969939393939394, |
|
"loss": 1.1153, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03929427482415812, |
|
"eval_loss": 1.1311910152435303, |
|
"eval_runtime": 144.1542, |
|
"eval_samples_per_second": 39.236, |
|
"eval_steps_per_second": 4.904, |
|
"eval_wer": 0.788769238176245, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.047153129788989744, |
|
"eval_loss": 1.0895923376083374, |
|
"eval_runtime": 144.7009, |
|
"eval_samples_per_second": 39.088, |
|
"eval_steps_per_second": 4.886, |
|
"eval_wer": 0.7734749883648152, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.055011984753821366, |
|
"eval_loss": 1.0287705659866333, |
|
"eval_runtime": 143.6225, |
|
"eval_samples_per_second": 39.381, |
|
"eval_steps_per_second": 4.923, |
|
"eval_wer": 0.7571054869926658, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.05894141223623718, |
|
"grad_norm": 2.3919336795806885, |
|
"learning_rate": 0.0002939636363636363, |
|
"loss": 0.8282, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.062870839718653, |
|
"eval_loss": 0.9747628569602966, |
|
"eval_runtime": 144.5139, |
|
"eval_samples_per_second": 39.138, |
|
"eval_steps_per_second": 4.892, |
|
"eval_wer": 0.7254096387475727, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.07072969468348461, |
|
"eval_loss": 0.9748485088348389, |
|
"eval_runtime": 144.3418, |
|
"eval_samples_per_second": 39.185, |
|
"eval_steps_per_second": 4.898, |
|
"eval_wer": 0.7194556338367223, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.07858854964831624, |
|
"grad_norm": 2.169008255004883, |
|
"learning_rate": 0.0002909333333333333, |
|
"loss": 0.7335, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07858854964831624, |
|
"eval_loss": 0.9882574081420898, |
|
"eval_runtime": 145.192, |
|
"eval_samples_per_second": 38.955, |
|
"eval_steps_per_second": 4.869, |
|
"eval_wer": 0.7143682495867504, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08644740461314787, |
|
"eval_loss": 0.9364911317825317, |
|
"eval_runtime": 145.4626, |
|
"eval_samples_per_second": 38.883, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.7061834989006757, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.09430625957797949, |
|
"eval_loss": 0.9164892435073853, |
|
"eval_runtime": 145.9321, |
|
"eval_samples_per_second": 38.758, |
|
"eval_steps_per_second": 4.845, |
|
"eval_wer": 0.6801688305435637, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.0982356870603953, |
|
"grad_norm": 5.276973247528076, |
|
"learning_rate": 0.00028790303030303027, |
|
"loss": 0.6931, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.10216511454281112, |
|
"eval_loss": 0.9169939756393433, |
|
"eval_runtime": 145.3478, |
|
"eval_samples_per_second": 38.914, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.6773603376610872, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.11002396950764273, |
|
"eval_loss": 0.9080427289009094, |
|
"eval_runtime": 144.7759, |
|
"eval_samples_per_second": 39.067, |
|
"eval_steps_per_second": 4.883, |
|
"eval_wer": 0.6692237325672835, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.11788282447247436, |
|
"grad_norm": 2.9965720176696777, |
|
"learning_rate": 0.00028487272727272726, |
|
"loss": 0.67, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.11788282447247436, |
|
"eval_loss": 0.8609287738800049, |
|
"eval_runtime": 145.381, |
|
"eval_samples_per_second": 38.905, |
|
"eval_steps_per_second": 4.863, |
|
"eval_wer": 0.6621784275649564, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.125741679437306, |
|
"eval_loss": 0.8863000273704529, |
|
"eval_runtime": 144.6247, |
|
"eval_samples_per_second": 39.108, |
|
"eval_steps_per_second": 4.889, |
|
"eval_wer": 0.6659177352313396, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.13360053440213762, |
|
"eval_loss": 0.8669990301132202, |
|
"eval_runtime": 145.3885, |
|
"eval_samples_per_second": 38.903, |
|
"eval_steps_per_second": 4.863, |
|
"eval_wer": 0.6610710789427228, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.1375299618845534, |
|
"grad_norm": 3.541180372238159, |
|
"learning_rate": 0.0002818424242424242, |
|
"loss": 0.6282, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.14145938936696922, |
|
"eval_loss": 0.8718289136886597, |
|
"eval_runtime": 147.388, |
|
"eval_samples_per_second": 38.375, |
|
"eval_steps_per_second": 4.797, |
|
"eval_wer": 0.6819983630498628, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.14931824433180085, |
|
"eval_loss": 0.861672580242157, |
|
"eval_runtime": 145.355, |
|
"eval_samples_per_second": 38.912, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.6481520116833304, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.15717709929663248, |
|
"grad_norm": 1.9885746240615845, |
|
"learning_rate": 0.0002788121212121212, |
|
"loss": 0.6311, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.15717709929663248, |
|
"eval_loss": 0.8504879474639893, |
|
"eval_runtime": 145.0997, |
|
"eval_samples_per_second": 38.98, |
|
"eval_steps_per_second": 4.873, |
|
"eval_wer": 0.6597230023591341, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1650359542614641, |
|
"eval_loss": 0.8290337324142456, |
|
"eval_runtime": 144.8192, |
|
"eval_samples_per_second": 39.056, |
|
"eval_steps_per_second": 4.882, |
|
"eval_wer": 0.6292307939208166, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.17289480922629574, |
|
"eval_loss": 0.8300275206565857, |
|
"eval_runtime": 144.9963, |
|
"eval_samples_per_second": 39.008, |
|
"eval_steps_per_second": 4.876, |
|
"eval_wer": 0.6567540241690873, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.17682423670871153, |
|
"grad_norm": 3.603195905685425, |
|
"learning_rate": 0.0002757878787878788, |
|
"loss": 0.615, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.18075366419112734, |
|
"eval_loss": 0.8007863163948059, |
|
"eval_runtime": 144.795, |
|
"eval_samples_per_second": 39.062, |
|
"eval_steps_per_second": 4.883, |
|
"eval_wer": 0.610855226204041, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.18861251915595897, |
|
"eval_loss": 0.8038597702980042, |
|
"eval_runtime": 144.8128, |
|
"eval_samples_per_second": 39.057, |
|
"eval_steps_per_second": 4.882, |
|
"eval_wer": 0.6045160565550224, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.1964713741207906, |
|
"grad_norm": 3.389535665512085, |
|
"learning_rate": 0.0002727575757575757, |
|
"loss": 0.5785, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.1964713741207906, |
|
"eval_loss": 0.7907959818840027, |
|
"eval_runtime": 144.5449, |
|
"eval_samples_per_second": 39.13, |
|
"eval_steps_per_second": 4.891, |
|
"eval_wer": 0.6071801126606859, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.20433022908562223, |
|
"eval_loss": 0.7867733836174011, |
|
"eval_runtime": 144.7418, |
|
"eval_samples_per_second": 39.076, |
|
"eval_steps_per_second": 4.885, |
|
"eval_wer": 0.6037457270786859, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.21218908405045384, |
|
"eval_loss": 0.7709878087043762, |
|
"eval_runtime": 146.1138, |
|
"eval_samples_per_second": 38.71, |
|
"eval_steps_per_second": 4.839, |
|
"eval_wer": 0.5988348766670412, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.21611851153286965, |
|
"grad_norm": 2.476861000061035, |
|
"learning_rate": 0.00026972727272727266, |
|
"loss": 0.5928, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.22004793901528547, |
|
"eval_loss": 0.766153872013092, |
|
"eval_runtime": 144.8164, |
|
"eval_samples_per_second": 39.056, |
|
"eval_steps_per_second": 4.882, |
|
"eval_wer": 0.5747139349392563, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.2279067939801171, |
|
"eval_loss": 0.767308235168457, |
|
"eval_runtime": 145.7638, |
|
"eval_samples_per_second": 38.803, |
|
"eval_steps_per_second": 4.85, |
|
"eval_wer": 0.5945820160164337, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.23576564894494872, |
|
"grad_norm": 2.2588391304016113, |
|
"learning_rate": 0.00026669696969696966, |
|
"loss": 0.5799, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.23576564894494872, |
|
"eval_loss": 0.7804461121559143, |
|
"eval_runtime": 145.0414, |
|
"eval_samples_per_second": 38.996, |
|
"eval_steps_per_second": 4.874, |
|
"eval_wer": 0.5990114105053682, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24362450390978035, |
|
"eval_loss": 0.7586621642112732, |
|
"eval_runtime": 145.864, |
|
"eval_samples_per_second": 38.776, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.5780520293367142, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.251483358874612, |
|
"eval_loss": 0.749543309211731, |
|
"eval_runtime": 145.8617, |
|
"eval_samples_per_second": 38.776, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.5728683539022003, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.2554127863570278, |
|
"grad_norm": 1.971763253211975, |
|
"learning_rate": 0.00026366666666666666, |
|
"loss": 0.5534, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2593422138394436, |
|
"eval_loss": 0.7536802291870117, |
|
"eval_runtime": 147.4299, |
|
"eval_samples_per_second": 38.364, |
|
"eval_steps_per_second": 4.795, |
|
"eval_wer": 0.5768804865914525, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"eval_loss": 0.7661583423614502, |
|
"eval_runtime": 149.947, |
|
"eval_samples_per_second": 37.72, |
|
"eval_steps_per_second": 4.715, |
|
"eval_wer": 0.581245686957359, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.2750599237691068, |
|
"grad_norm": 2.5072972774505615, |
|
"learning_rate": 0.0002606363636363636, |
|
"loss": 0.5592, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2750599237691068, |
|
"eval_loss": 0.7571460604667664, |
|
"eval_runtime": 145.6344, |
|
"eval_samples_per_second": 38.837, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.5607998587729294, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.28291877873393845, |
|
"eval_loss": 0.7475385665893555, |
|
"eval_runtime": 145.4869, |
|
"eval_samples_per_second": 38.876, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.5635120604708639, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.2907776336987701, |
|
"eval_loss": 0.7267230749130249, |
|
"eval_runtime": 145.614, |
|
"eval_samples_per_second": 38.842, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.5591950056972285, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.2947070611811859, |
|
"grad_norm": 3.4168338775634766, |
|
"learning_rate": 0.0002576060606060606, |
|
"loss": 0.5512, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.2986364886636017, |
|
"eval_loss": 0.7362108826637268, |
|
"eval_runtime": 145.7986, |
|
"eval_samples_per_second": 38.793, |
|
"eval_steps_per_second": 4.849, |
|
"eval_wer": 0.5588098409590602, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.30649534362843334, |
|
"eval_loss": 0.7624097466468811, |
|
"eval_runtime": 145.4148, |
|
"eval_samples_per_second": 38.896, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.581117298711303, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.31435419859326497, |
|
"grad_norm": 2.9330873489379883, |
|
"learning_rate": 0.00025457575757575755, |
|
"loss": 0.54, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.31435419859326497, |
|
"eval_loss": 0.7657227516174316, |
|
"eval_runtime": 146.1604, |
|
"eval_samples_per_second": 38.697, |
|
"eval_steps_per_second": 4.837, |
|
"eval_wer": 0.5622442265410602, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.3222130535580966, |
|
"eval_loss": 0.7300673127174377, |
|
"eval_runtime": 146.8709, |
|
"eval_samples_per_second": 38.51, |
|
"eval_steps_per_second": 4.814, |
|
"eval_wer": 0.5453611721846865, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.3300719085229282, |
|
"eval_loss": 0.7118472456932068, |
|
"eval_runtime": 146.4543, |
|
"eval_samples_per_second": 38.62, |
|
"eval_steps_per_second": 4.827, |
|
"eval_wer": 0.5381553818747894, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.33400133600534404, |
|
"grad_norm": 2.0070419311523438, |
|
"learning_rate": 0.00025154545454545454, |
|
"loss": 0.531, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.33793076348775986, |
|
"eval_loss": 0.7252832055091858, |
|
"eval_runtime": 145.6223, |
|
"eval_samples_per_second": 38.84, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.548153616536406, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.3457896184525915, |
|
"eval_loss": 0.7304599285125732, |
|
"eval_runtime": 145.89, |
|
"eval_samples_per_second": 38.769, |
|
"eval_steps_per_second": 4.846, |
|
"eval_wer": 0.5582962879748359, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.35364847341742306, |
|
"grad_norm": 2.5275588035583496, |
|
"learning_rate": 0.00024852121212121206, |
|
"loss": 0.5406, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.35364847341742306, |
|
"eval_loss": 0.7097567915916443, |
|
"eval_runtime": 145.7013, |
|
"eval_samples_per_second": 38.819, |
|
"eval_steps_per_second": 4.852, |
|
"eval_wer": 0.5520213124488453, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3615073283822547, |
|
"eval_loss": 0.698684573173523, |
|
"eval_runtime": 146.3052, |
|
"eval_samples_per_second": 38.659, |
|
"eval_steps_per_second": 4.832, |
|
"eval_wer": 0.5372245670908828, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.3693661833470863, |
|
"eval_loss": 0.7044981718063354, |
|
"eval_runtime": 145.8062, |
|
"eval_samples_per_second": 38.791, |
|
"eval_steps_per_second": 4.849, |
|
"eval_wer": 0.5472548988140136, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.37329561082950213, |
|
"grad_norm": 6.208221435546875, |
|
"learning_rate": 0.00024549090909090906, |
|
"loss": 0.5252, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.37722503831191795, |
|
"eval_loss": 0.7025354504585266, |
|
"eval_runtime": 146.2272, |
|
"eval_samples_per_second": 38.68, |
|
"eval_steps_per_second": 4.835, |
|
"eval_wer": 0.5332766285246585, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.3850838932767496, |
|
"eval_loss": 0.7077142000198364, |
|
"eval_runtime": 145.5575, |
|
"eval_samples_per_second": 38.857, |
|
"eval_steps_per_second": 4.857, |
|
"eval_wer": 0.5461796472532939, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.3929427482415812, |
|
"grad_norm": 4.407375812530518, |
|
"learning_rate": 0.00024246060606060606, |
|
"loss": 0.5156, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.3929427482415812, |
|
"eval_loss": 0.7006597518920898, |
|
"eval_runtime": 146.3123, |
|
"eval_samples_per_second": 38.657, |
|
"eval_steps_per_second": 4.832, |
|
"eval_wer": 0.5382516730593314, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.40080160320641284, |
|
"eval_loss": 0.6947250962257385, |
|
"eval_runtime": 145.4545, |
|
"eval_samples_per_second": 38.885, |
|
"eval_steps_per_second": 4.861, |
|
"eval_wer": 0.5425847763637239, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.40866045817124447, |
|
"eval_loss": 0.7127708196640015, |
|
"eval_runtime": 145.5874, |
|
"eval_samples_per_second": 38.85, |
|
"eval_steps_per_second": 4.856, |
|
"eval_wer": 0.5361332669994062, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.4125898856536603, |
|
"grad_norm": 2.721827983856201, |
|
"learning_rate": 0.000239430303030303, |
|
"loss": 0.5181, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.4165193131360761, |
|
"eval_loss": 0.6945223212242126, |
|
"eval_runtime": 146.0143, |
|
"eval_samples_per_second": 38.736, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.5276114971674343, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.42437816810090767, |
|
"eval_loss": 0.6985763311386108, |
|
"eval_runtime": 146.5657, |
|
"eval_samples_per_second": 38.59, |
|
"eval_steps_per_second": 4.824, |
|
"eval_wer": 0.5310619312801913, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.4322370230657393, |
|
"grad_norm": 5.38914680480957, |
|
"learning_rate": 0.0002364, |
|
"loss": 0.5096, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.4322370230657393, |
|
"eval_loss": 0.6909800171852112, |
|
"eval_runtime": 146.0039, |
|
"eval_samples_per_second": 38.739, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.5293126414276773, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.44009587803057093, |
|
"eval_loss": 0.6855354905128479, |
|
"eval_runtime": 146.6844, |
|
"eval_samples_per_second": 38.559, |
|
"eval_steps_per_second": 4.82, |
|
"eval_wer": 0.5280608560286306, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.44795473299540256, |
|
"eval_loss": 0.6889775395393372, |
|
"eval_runtime": 146.3731, |
|
"eval_samples_per_second": 38.641, |
|
"eval_steps_per_second": 4.83, |
|
"eval_wer": 0.5262313235223315, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.4518841604778184, |
|
"grad_norm": 3.3484437465667725, |
|
"learning_rate": 0.00023336969696969694, |
|
"loss": 0.5099, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.4558135879602342, |
|
"eval_loss": 0.677577018737793, |
|
"eval_runtime": 146.1848, |
|
"eval_samples_per_second": 38.691, |
|
"eval_steps_per_second": 4.836, |
|
"eval_wer": 0.5298101458811446, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.4636724429250658, |
|
"eval_loss": 0.6817450523376465, |
|
"eval_runtime": 146.2301, |
|
"eval_samples_per_second": 38.679, |
|
"eval_steps_per_second": 4.835, |
|
"eval_wer": 0.5141949254545747, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.47153129788989745, |
|
"grad_norm": 4.75791597366333, |
|
"learning_rate": 0.00023033939393939391, |
|
"loss": 0.481, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.47153129788989745, |
|
"eval_loss": 0.6749030351638794, |
|
"eval_runtime": 144.9955, |
|
"eval_samples_per_second": 39.008, |
|
"eval_steps_per_second": 4.876, |
|
"eval_wer": 0.5318483092872848, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.4793901528547291, |
|
"eval_loss": 0.6648340225219727, |
|
"eval_runtime": 146.7705, |
|
"eval_samples_per_second": 38.536, |
|
"eval_steps_per_second": 4.817, |
|
"eval_wer": 0.513167819486126, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.4872490078195607, |
|
"eval_loss": 0.6659471392631531, |
|
"eval_runtime": 145.9108, |
|
"eval_samples_per_second": 38.763, |
|
"eval_steps_per_second": 4.845, |
|
"eval_wer": 0.5151096917077241, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.4911784353019765, |
|
"grad_norm": 3.3849971294403076, |
|
"learning_rate": 0.00022730909090909089, |
|
"loss": 0.4899, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.49510786278439234, |
|
"eval_loss": 0.6744287014007568, |
|
"eval_runtime": 146.3152, |
|
"eval_samples_per_second": 38.656, |
|
"eval_steps_per_second": 4.832, |
|
"eval_wer": 0.5207266774726774, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.502966717749224, |
|
"eval_loss": 0.6732743978500366, |
|
"eval_runtime": 146.1337, |
|
"eval_samples_per_second": 38.704, |
|
"eval_steps_per_second": 4.838, |
|
"eval_wer": 0.5228771805941166, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.5108255727140556, |
|
"grad_norm": 3.489818811416626, |
|
"learning_rate": 0.00022427878787878786, |
|
"loss": 0.492, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5108255727140556, |
|
"eval_loss": 0.6456639170646667, |
|
"eval_runtime": 146.9518, |
|
"eval_samples_per_second": 38.489, |
|
"eval_steps_per_second": 4.811, |
|
"eval_wer": 0.5041645937314438, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5186844276788872, |
|
"eval_loss": 0.6671249866485596, |
|
"eval_runtime": 145.8641, |
|
"eval_samples_per_second": 38.776, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.5259103529071913, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.5265432826437189, |
|
"eval_loss": 0.6544414162635803, |
|
"eval_runtime": 146.5937, |
|
"eval_samples_per_second": 38.583, |
|
"eval_steps_per_second": 4.823, |
|
"eval_wer": 0.5179181845902008, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.5304727101261346, |
|
"grad_norm": 1.4167377948760986, |
|
"learning_rate": 0.00022125454545454546, |
|
"loss": 0.4782, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"eval_loss": 0.6560591459274292, |
|
"eval_runtime": 146.1188, |
|
"eval_samples_per_second": 38.708, |
|
"eval_steps_per_second": 4.839, |
|
"eval_wer": 0.5054484761920046, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.542260992573382, |
|
"eval_loss": 0.6381711363792419, |
|
"eval_runtime": 145.8554, |
|
"eval_samples_per_second": 38.778, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.49918954919677105, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.5501198475382136, |
|
"grad_norm": 3.582862615585327, |
|
"learning_rate": 0.0002182242424242424, |
|
"loss": 0.507, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5501198475382136, |
|
"eval_loss": 0.6555091738700867, |
|
"eval_runtime": 148.0584, |
|
"eval_samples_per_second": 38.201, |
|
"eval_steps_per_second": 4.775, |
|
"eval_wer": 0.504437418754313, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5579787025030453, |
|
"eval_loss": 0.6399552822113037, |
|
"eval_runtime": 146.0824, |
|
"eval_samples_per_second": 38.718, |
|
"eval_steps_per_second": 4.84, |
|
"eval_wer": 0.49548233859190194, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.5658375574678769, |
|
"eval_loss": 0.6467686891555786, |
|
"eval_runtime": 146.3336, |
|
"eval_samples_per_second": 38.651, |
|
"eval_steps_per_second": 4.831, |
|
"eval_wer": 0.5014202949719954, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.5697669849502928, |
|
"grad_norm": 2.1453781127929688, |
|
"learning_rate": 0.0002151939393939394, |
|
"loss": 0.4899, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.5736964124327085, |
|
"eval_loss": 0.6370707750320435, |
|
"eval_runtime": 146.4635, |
|
"eval_samples_per_second": 38.617, |
|
"eval_steps_per_second": 4.827, |
|
"eval_wer": 0.49723162844441593, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.5815552673975402, |
|
"eval_loss": 0.6356329917907715, |
|
"eval_runtime": 145.6834, |
|
"eval_samples_per_second": 38.824, |
|
"eval_steps_per_second": 4.853, |
|
"eval_wer": 0.5025597406557429, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.5894141223623718, |
|
"grad_norm": 2.615446090698242, |
|
"learning_rate": 0.00021216363636363634, |
|
"loss": 0.4677, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5894141223623718, |
|
"eval_loss": 0.638607919216156, |
|
"eval_runtime": 145.7689, |
|
"eval_samples_per_second": 38.801, |
|
"eval_steps_per_second": 4.85, |
|
"eval_wer": 0.5021424788560608, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5972729773272034, |
|
"eval_loss": 0.6653130650520325, |
|
"eval_runtime": 146.0092, |
|
"eval_samples_per_second": 38.737, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.5190255332124344, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.605131832292035, |
|
"eval_loss": 0.6442501544952393, |
|
"eval_runtime": 146.2404, |
|
"eval_samples_per_second": 38.676, |
|
"eval_steps_per_second": 4.835, |
|
"eval_wer": 0.4998154418962944, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.6090612597744509, |
|
"grad_norm": 2.680966854095459, |
|
"learning_rate": 0.0002091333333333333, |
|
"loss": 0.461, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.6129906872568667, |
|
"eval_loss": 0.6210175156593323, |
|
"eval_runtime": 146.9594, |
|
"eval_samples_per_second": 38.487, |
|
"eval_steps_per_second": 4.811, |
|
"eval_wer": 0.4896567219271076, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.6208495422216983, |
|
"eval_loss": 0.6395752429962158, |
|
"eval_runtime": 146.5911, |
|
"eval_samples_per_second": 38.584, |
|
"eval_steps_per_second": 4.823, |
|
"eval_wer": 0.5011635184798832, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.6287083971865299, |
|
"grad_norm": 2.2297749519348145, |
|
"learning_rate": 0.00020610303030303028, |
|
"loss": 0.4528, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6287083971865299, |
|
"eval_loss": 0.6226186752319336, |
|
"eval_runtime": 147.1935, |
|
"eval_samples_per_second": 38.426, |
|
"eval_steps_per_second": 4.803, |
|
"eval_wer": 0.49333183547046267, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6365672521513616, |
|
"eval_loss": 0.6253554224967957, |
|
"eval_runtime": 147.0403, |
|
"eval_samples_per_second": 38.466, |
|
"eval_steps_per_second": 4.808, |
|
"eval_wer": 0.49365280608560286, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.6444261071161932, |
|
"eval_loss": 0.6289177536964417, |
|
"eval_runtime": 146.8167, |
|
"eval_samples_per_second": 38.524, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.5013240037874532, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.648355534598609, |
|
"grad_norm": 1.9119956493377686, |
|
"learning_rate": 0.00020307272727272725, |
|
"loss": 0.451, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.6522849620810248, |
|
"eval_loss": 0.6229738593101501, |
|
"eval_runtime": 146.4262, |
|
"eval_samples_per_second": 38.627, |
|
"eval_steps_per_second": 4.828, |
|
"eval_wer": 0.49723162844441593, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.6601438170458565, |
|
"eval_loss": 0.6153121590614319, |
|
"eval_runtime": 146.6615, |
|
"eval_samples_per_second": 38.565, |
|
"eval_steps_per_second": 4.821, |
|
"eval_wer": 0.4957391150840141, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.6680026720106881, |
|
"grad_norm": 3.115481376647949, |
|
"learning_rate": 0.00020004848484848485, |
|
"loss": 0.4444, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6680026720106881, |
|
"eval_loss": 0.6032531261444092, |
|
"eval_runtime": 146.667, |
|
"eval_samples_per_second": 38.564, |
|
"eval_steps_per_second": 4.82, |
|
"eval_wer": 0.47476368538460306, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6758615269755197, |
|
"eval_loss": 0.6153914332389832, |
|
"eval_runtime": 146.5404, |
|
"eval_samples_per_second": 38.597, |
|
"eval_steps_per_second": 4.825, |
|
"eval_wer": 0.4771388679366404, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.6837203819403513, |
|
"eval_loss": 0.6169700622558594, |
|
"eval_runtime": 146.4739, |
|
"eval_samples_per_second": 38.614, |
|
"eval_steps_per_second": 4.827, |
|
"eval_wer": 0.48591741426072443, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.6876498094227671, |
|
"grad_norm": 3.35622501373291, |
|
"learning_rate": 0.0001970181818181818, |
|
"loss": 0.4357, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.691579236905183, |
|
"eval_loss": 0.6020850539207458, |
|
"eval_runtime": 146.4462, |
|
"eval_samples_per_second": 38.622, |
|
"eval_steps_per_second": 4.828, |
|
"eval_wer": 0.4814559227102759, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.6994380918700145, |
|
"eval_loss": 0.6071408987045288, |
|
"eval_runtime": 147.1123, |
|
"eval_samples_per_second": 38.447, |
|
"eval_steps_per_second": 4.806, |
|
"eval_wer": 0.47303044406284606, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.7072969468348461, |
|
"grad_norm": 2.2534916400909424, |
|
"learning_rate": 0.0001939939393939394, |
|
"loss": 0.4413, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7072969468348461, |
|
"eval_loss": 0.6042246222496033, |
|
"eval_runtime": 146.518, |
|
"eval_samples_per_second": 38.603, |
|
"eval_steps_per_second": 4.825, |
|
"eval_wer": 0.47656112082938806, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7151558017996777, |
|
"eval_loss": 0.6118656396865845, |
|
"eval_runtime": 147.1712, |
|
"eval_samples_per_second": 38.431, |
|
"eval_steps_per_second": 4.804, |
|
"eval_wer": 0.4837508626085282, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.7230146567645094, |
|
"eval_loss": 0.6045942902565002, |
|
"eval_runtime": 146.4829, |
|
"eval_samples_per_second": 38.612, |
|
"eval_steps_per_second": 4.827, |
|
"eval_wer": 0.47569450016850956, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.7269440842469252, |
|
"grad_norm": 3.591475248336792, |
|
"learning_rate": 0.00019096363636363634, |
|
"loss": 0.4375, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.730873511729341, |
|
"eval_loss": 0.6081308722496033, |
|
"eval_runtime": 147.4627, |
|
"eval_samples_per_second": 38.355, |
|
"eval_steps_per_second": 4.794, |
|
"eval_wer": 0.4832854552165749, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.7387323666941726, |
|
"eval_loss": 0.6007533073425293, |
|
"eval_runtime": 146.3827, |
|
"eval_samples_per_second": 38.638, |
|
"eval_steps_per_second": 4.83, |
|
"eval_wer": 0.4727897161014909, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.7465912216590043, |
|
"grad_norm": 1.425370693206787, |
|
"learning_rate": 0.0001879333333333333, |
|
"loss": 0.4329, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7465912216590043, |
|
"eval_loss": 0.6008017063140869, |
|
"eval_runtime": 147.3011, |
|
"eval_samples_per_second": 38.398, |
|
"eval_steps_per_second": 4.8, |
|
"eval_wer": 0.46924299080419185, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7544500766238359, |
|
"eval_loss": 0.6007276177406311, |
|
"eval_runtime": 146.7759, |
|
"eval_samples_per_second": 38.535, |
|
"eval_steps_per_second": 4.817, |
|
"eval_wer": 0.4822262521866123, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.7623089315886675, |
|
"eval_loss": 0.5838043093681335, |
|
"eval_runtime": 146.9473, |
|
"eval_samples_per_second": 38.49, |
|
"eval_steps_per_second": 4.811, |
|
"eval_wer": 0.4657925566914349, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.7662383590710834, |
|
"grad_norm": 2.780203342437744, |
|
"learning_rate": 0.00018490303030303028, |
|
"loss": 0.4318, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.7701677865534992, |
|
"eval_loss": 0.6007500290870667, |
|
"eval_runtime": 146.6721, |
|
"eval_samples_per_second": 38.562, |
|
"eval_steps_per_second": 4.82, |
|
"eval_wer": 0.46519876105342556, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.7780266415183308, |
|
"eval_loss": 0.5918843746185303, |
|
"eval_runtime": 147.2498, |
|
"eval_samples_per_second": 38.411, |
|
"eval_steps_per_second": 4.801, |
|
"eval_wer": 0.4664826435139863, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.7858854964831624, |
|
"grad_norm": 3.501138687133789, |
|
"learning_rate": 0.00018187272727272725, |
|
"loss": 0.4265, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.7858854964831624, |
|
"eval_loss": 0.59038907289505, |
|
"eval_runtime": 147.6976, |
|
"eval_samples_per_second": 38.294, |
|
"eval_steps_per_second": 4.787, |
|
"eval_wer": 0.4721959204634816, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.793744351447994, |
|
"eval_loss": 0.5922533273696899, |
|
"eval_runtime": 146.8772, |
|
"eval_samples_per_second": 38.508, |
|
"eval_steps_per_second": 4.814, |
|
"eval_wer": 0.4815201168333039, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"eval_loss": 0.5979217886924744, |
|
"eval_runtime": 146.9133, |
|
"eval_samples_per_second": 38.499, |
|
"eval_steps_per_second": 4.812, |
|
"eval_wer": 0.4661295758373321, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.8055326338952414, |
|
"grad_norm": 2.374830484390259, |
|
"learning_rate": 0.00017884242424242425, |
|
"loss": 0.4321, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.8094620613776573, |
|
"eval_loss": 0.5837874412536621, |
|
"eval_runtime": 146.6078, |
|
"eval_samples_per_second": 38.579, |
|
"eval_steps_per_second": 4.822, |
|
"eval_wer": 0.45608319558344435, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.8173209163424889, |
|
"eval_loss": 0.5824867486953735, |
|
"eval_runtime": 147.7105, |
|
"eval_samples_per_second": 38.291, |
|
"eval_steps_per_second": 4.786, |
|
"eval_wer": 0.4523920335093322, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.8251797713073206, |
|
"grad_norm": 1.430405616760254, |
|
"learning_rate": 0.0001758121212121212, |
|
"loss": 0.4192, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8251797713073206, |
|
"eval_loss": 0.5838850140571594, |
|
"eval_runtime": 146.699, |
|
"eval_samples_per_second": 38.555, |
|
"eval_steps_per_second": 4.819, |
|
"eval_wer": 0.4551523807995378, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8330386262721522, |
|
"eval_loss": 0.5804269909858704, |
|
"eval_runtime": 147.0076, |
|
"eval_samples_per_second": 38.474, |
|
"eval_steps_per_second": 4.809, |
|
"eval_wer": 0.4593731443886312, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.8408974812369838, |
|
"eval_loss": 0.5890819430351257, |
|
"eval_runtime": 146.6585, |
|
"eval_samples_per_second": 38.566, |
|
"eval_steps_per_second": 4.821, |
|
"eval_wer": 0.4722280175249956, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.8448269087193996, |
|
"grad_norm": 2.7897725105285645, |
|
"learning_rate": 0.00017278181818181817, |
|
"loss": 0.4151, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.8487563362018153, |
|
"eval_loss": 0.5830910205841064, |
|
"eval_runtime": 147.6653, |
|
"eval_samples_per_second": 38.303, |
|
"eval_steps_per_second": 4.788, |
|
"eval_wer": 0.4525204217553883, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.856615191166647, |
|
"eval_loss": 0.5677404403686523, |
|
"eval_runtime": 146.5378, |
|
"eval_samples_per_second": 38.598, |
|
"eval_steps_per_second": 4.825, |
|
"eval_wer": 0.45430180866941633, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.8644740461314786, |
|
"grad_norm": 2.938485622406006, |
|
"learning_rate": 0.00016975757575757574, |
|
"loss": 0.417, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8644740461314786, |
|
"eval_loss": 0.5605286359786987, |
|
"eval_runtime": 147.3751, |
|
"eval_samples_per_second": 38.378, |
|
"eval_steps_per_second": 4.797, |
|
"eval_wer": 0.446807144805893, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8723329010963102, |
|
"eval_loss": 0.570513129234314, |
|
"eval_runtime": 146.7648, |
|
"eval_samples_per_second": 38.538, |
|
"eval_steps_per_second": 4.817, |
|
"eval_wer": 0.44422333135401454, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.8801917560611419, |
|
"eval_loss": 0.5685856938362122, |
|
"eval_runtime": 147.3241, |
|
"eval_samples_per_second": 38.392, |
|
"eval_steps_per_second": 4.799, |
|
"eval_wer": 0.4551363322687808, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.8841211835435577, |
|
"grad_norm": 5.145638942718506, |
|
"learning_rate": 0.0001667272727272727, |
|
"loss": 0.4014, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.8880506110259735, |
|
"eval_loss": 0.5751659870147705, |
|
"eval_runtime": 146.2417, |
|
"eval_samples_per_second": 38.676, |
|
"eval_steps_per_second": 4.834, |
|
"eval_wer": 0.4602397650495097, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.8959094659908051, |
|
"eval_loss": 0.5623380541801453, |
|
"eval_runtime": 146.6371, |
|
"eval_samples_per_second": 38.571, |
|
"eval_steps_per_second": 4.821, |
|
"eval_wer": 0.4452985829147342, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.9037683209556368, |
|
"grad_norm": 1.9630001783370972, |
|
"learning_rate": 0.00016369696969696968, |
|
"loss": 0.4024, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.9037683209556368, |
|
"eval_loss": 0.5631678700447083, |
|
"eval_runtime": 146.9977, |
|
"eval_samples_per_second": 38.477, |
|
"eval_steps_per_second": 4.81, |
|
"eval_wer": 0.4423777503169585, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.9116271759204684, |
|
"eval_loss": 0.568145751953125, |
|
"eval_runtime": 146.7017, |
|
"eval_samples_per_second": 38.554, |
|
"eval_steps_per_second": 4.819, |
|
"eval_wer": 0.4471120668902762, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.9194860308853, |
|
"eval_loss": 0.5659225583076477, |
|
"eval_runtime": 147.422, |
|
"eval_samples_per_second": 38.366, |
|
"eval_steps_per_second": 4.796, |
|
"eval_wer": 0.4510760539872575, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.9234154583677158, |
|
"grad_norm": 2.880105972290039, |
|
"learning_rate": 0.00016066666666666665, |
|
"loss": 0.3899, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.9273448858501316, |
|
"eval_loss": 0.5653769969940186, |
|
"eval_runtime": 147.0508, |
|
"eval_samples_per_second": 38.463, |
|
"eval_steps_per_second": 4.808, |
|
"eval_wer": 0.4417197605559211, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.9352037408149633, |
|
"eval_loss": 0.5691047310829163, |
|
"eval_runtime": 147.3319, |
|
"eval_samples_per_second": 38.39, |
|
"eval_steps_per_second": 4.799, |
|
"eval_wer": 0.45418946895411727, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.9430625957797949, |
|
"grad_norm": 1.747075080871582, |
|
"learning_rate": 0.00015763636363636365, |
|
"loss": 0.3977, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9430625957797949, |
|
"eval_loss": 0.5613217949867249, |
|
"eval_runtime": 146.5842, |
|
"eval_samples_per_second": 38.585, |
|
"eval_steps_per_second": 4.823, |
|
"eval_wer": 0.4434209048161641, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9509214507446265, |
|
"eval_loss": 0.5688283443450928, |
|
"eval_runtime": 147.1422, |
|
"eval_samples_per_second": 38.439, |
|
"eval_steps_per_second": 4.805, |
|
"eval_wer": 0.44326041950859396, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.9587803057094582, |
|
"eval_loss": 0.57487553358078, |
|
"eval_runtime": 146.7792, |
|
"eval_samples_per_second": 38.534, |
|
"eval_steps_per_second": 4.817, |
|
"eval_wer": 0.4454751167530613, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.9627097331918739, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0001546121212121212, |
|
"loss": 0.3889, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.9666391606742898, |
|
"eval_loss": 0.5499551892280579, |
|
"eval_runtime": 147.156, |
|
"eval_samples_per_second": 38.435, |
|
"eval_steps_per_second": 4.804, |
|
"eval_wer": 0.43180176854808944, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.9744980156391214, |
|
"eval_loss": 0.5436142086982727, |
|
"eval_runtime": 147.2848, |
|
"eval_samples_per_second": 38.402, |
|
"eval_steps_per_second": 4.8, |
|
"eval_wer": 0.4371780263516875, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.982356870603953, |
|
"grad_norm": 4.918150424957275, |
|
"learning_rate": 0.0001515818181818182, |
|
"loss": 0.39, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.982356870603953, |
|
"eval_loss": 0.547515332698822, |
|
"eval_runtime": 147.2374, |
|
"eval_samples_per_second": 38.414, |
|
"eval_steps_per_second": 4.802, |
|
"eval_wer": 0.4388310250196594, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.9902157255687847, |
|
"eval_loss": 0.5531713366508484, |
|
"eval_runtime": 146.8558, |
|
"eval_samples_per_second": 38.514, |
|
"eval_steps_per_second": 4.814, |
|
"eval_wer": 0.4423777503169585, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.9980745805336163, |
|
"eval_loss": 0.5450366139411926, |
|
"eval_runtime": 147.6783, |
|
"eval_samples_per_second": 38.299, |
|
"eval_steps_per_second": 4.787, |
|
"eval_wer": 0.4280945579432203, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.002004008016032, |
|
"grad_norm": 1.2219481468200684, |
|
"learning_rate": 0.00014855151515151514, |
|
"loss": 0.3853, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.005933435498448, |
|
"eval_loss": 0.5462915897369385, |
|
"eval_runtime": 145.543, |
|
"eval_samples_per_second": 38.861, |
|
"eval_steps_per_second": 4.858, |
|
"eval_wer": 0.43079071111039785, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.0137922904632795, |
|
"eval_loss": 0.5457944869995117, |
|
"eval_runtime": 145.2381, |
|
"eval_samples_per_second": 38.943, |
|
"eval_steps_per_second": 4.868, |
|
"eval_wer": 0.4277896358588371, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.0216511454281112, |
|
"grad_norm": 4.69161319732666, |
|
"learning_rate": 0.0001455212121212121, |
|
"loss": 0.3413, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.0216511454281112, |
|
"eval_loss": 0.5470069646835327, |
|
"eval_runtime": 145.5418, |
|
"eval_samples_per_second": 38.862, |
|
"eval_steps_per_second": 4.858, |
|
"eval_wer": 0.43441767906148193, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.0295100003929427, |
|
"eval_loss": 0.5358372330665588, |
|
"eval_runtime": 145.609, |
|
"eval_samples_per_second": 38.844, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.42258991189356615, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.0373688553577745, |
|
"eval_loss": 0.5403576493263245, |
|
"eval_runtime": 146.3753, |
|
"eval_samples_per_second": 38.64, |
|
"eval_steps_per_second": 4.83, |
|
"eval_wer": 0.42308741634703345, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.0412982828401902, |
|
"grad_norm": 1.2460460662841797, |
|
"learning_rate": 0.00014249090909090908, |
|
"loss": 0.339, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.045227710322606, |
|
"eval_loss": 0.5345466732978821, |
|
"eval_runtime": 145.3146, |
|
"eval_samples_per_second": 38.922, |
|
"eval_steps_per_second": 4.865, |
|
"eval_wer": 0.42433920174608014, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.0530865652874377, |
|
"eval_loss": 0.5396625995635986, |
|
"eval_runtime": 145.9713, |
|
"eval_samples_per_second": 38.747, |
|
"eval_steps_per_second": 4.843, |
|
"eval_wer": 0.4199579528494166, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.0609454202522692, |
|
"grad_norm": 1.021347165107727, |
|
"learning_rate": 0.00013946060606060605, |
|
"loss": 0.3235, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0609454202522692, |
|
"eval_loss": 0.5378654599189758, |
|
"eval_runtime": 145.6291, |
|
"eval_samples_per_second": 38.838, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.4183049541814447, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0688042752171008, |
|
"eval_loss": 0.5305435657501221, |
|
"eval_runtime": 145.36, |
|
"eval_samples_per_second": 38.91, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.42753285936672497, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.0766631301819325, |
|
"eval_loss": 0.5440751910209656, |
|
"eval_runtime": 145.3458, |
|
"eval_samples_per_second": 38.914, |
|
"eval_steps_per_second": 4.864, |
|
"eval_wer": 0.4247564635457624, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.0805925576643483, |
|
"grad_norm": 0.5985044836997986, |
|
"learning_rate": 0.00013643636363636362, |
|
"loss": 0.3252, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.0845219851467642, |
|
"eval_loss": 0.5361995697021484, |
|
"eval_runtime": 146.0428, |
|
"eval_samples_per_second": 38.728, |
|
"eval_steps_per_second": 4.841, |
|
"eval_wer": 0.4177753526664634, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.0923808401115958, |
|
"eval_loss": 0.5305026173591614, |
|
"eval_runtime": 145.9537, |
|
"eval_samples_per_second": 38.752, |
|
"eval_steps_per_second": 4.844, |
|
"eval_wer": 0.42015053521850076, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 1.1002396950764273, |
|
"grad_norm": 1.615342378616333, |
|
"learning_rate": 0.0001334060606060606, |
|
"loss": 0.3301, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.1002396950764273, |
|
"eval_loss": 0.5307178497314453, |
|
"eval_runtime": 146.253, |
|
"eval_samples_per_second": 38.673, |
|
"eval_steps_per_second": 4.834, |
|
"eval_wer": 0.41851358508128583, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.108098550041259, |
|
"eval_loss": 0.5402148365974426, |
|
"eval_runtime": 145.7202, |
|
"eval_samples_per_second": 38.814, |
|
"eval_steps_per_second": 4.852, |
|
"eval_wer": 0.431127730256295, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.1159574050060905, |
|
"eval_loss": 0.5308640003204346, |
|
"eval_runtime": 145.81, |
|
"eval_samples_per_second": 38.79, |
|
"eval_steps_per_second": 4.849, |
|
"eval_wer": 0.41788769238176243, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 1.1198868324885065, |
|
"grad_norm": 1.1408910751342773, |
|
"learning_rate": 0.00013037575757575756, |
|
"loss": 0.3087, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.1238162599709223, |
|
"eval_loss": 0.5298367738723755, |
|
"eval_runtime": 145.4349, |
|
"eval_samples_per_second": 38.89, |
|
"eval_steps_per_second": 4.861, |
|
"eval_wer": 0.42137022355603343, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 1.1316751149357538, |
|
"eval_loss": 0.5330610275268555, |
|
"eval_runtime": 145.6355, |
|
"eval_samples_per_second": 38.837, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.4214665147405755, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.1395339699005855, |
|
"grad_norm": 0.8552046418190002, |
|
"learning_rate": 0.00012734545454545453, |
|
"loss": 0.3222, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.1395339699005855, |
|
"eval_loss": 0.5273275971412659, |
|
"eval_runtime": 145.8763, |
|
"eval_samples_per_second": 38.773, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.4145495979843045, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.147392824865417, |
|
"eval_loss": 0.5282542705535889, |
|
"eval_runtime": 145.6375, |
|
"eval_samples_per_second": 38.836, |
|
"eval_steps_per_second": 4.855, |
|
"eval_wer": 0.4130731331546597, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 1.1552516798302488, |
|
"eval_loss": 0.5256520509719849, |
|
"eval_runtime": 145.9987, |
|
"eval_samples_per_second": 38.74, |
|
"eval_steps_per_second": 4.843, |
|
"eval_wer": 0.41159666832501485, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.1591811073126645, |
|
"grad_norm": 3.544210195541382, |
|
"learning_rate": 0.0001243151515151515, |
|
"loss": 0.3227, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.1631105347950803, |
|
"eval_loss": 0.5168554186820984, |
|
"eval_runtime": 145.3157, |
|
"eval_samples_per_second": 38.922, |
|
"eval_steps_per_second": 4.865, |
|
"eval_wer": 0.408419059235127, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 1.170969389759912, |
|
"eval_loss": 0.5184837579727173, |
|
"eval_runtime": 145.4598, |
|
"eval_samples_per_second": 38.884, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.41068190207186533, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 1.1788282447247436, |
|
"grad_norm": 0.8857652544975281, |
|
"learning_rate": 0.00012128484848484848, |
|
"loss": 0.309, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1788282447247436, |
|
"eval_loss": 0.5076336860656738, |
|
"eval_runtime": 145.8517, |
|
"eval_samples_per_second": 38.779, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.40275392787790276, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1866870996895753, |
|
"eval_loss": 0.5178284049034119, |
|
"eval_runtime": 146.4004, |
|
"eval_samples_per_second": 38.634, |
|
"eval_steps_per_second": 4.829, |
|
"eval_wer": 0.40535378986053827, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 1.1945459546544068, |
|
"eval_loss": 0.5225840210914612, |
|
"eval_runtime": 149.501, |
|
"eval_samples_per_second": 37.833, |
|
"eval_steps_per_second": 4.729, |
|
"eval_wer": 0.4122065124937812, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 1.1984753821368226, |
|
"grad_norm": 1.1116445064544678, |
|
"learning_rate": 0.00011826060606060606, |
|
"loss": 0.3138, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.2024048096192386, |
|
"eval_loss": 0.5226925015449524, |
|
"eval_runtime": 145.5048, |
|
"eval_samples_per_second": 38.872, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.4072635650206224, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 1.21026366458407, |
|
"eval_loss": 0.5130230784416199, |
|
"eval_runtime": 144.8014, |
|
"eval_samples_per_second": 39.06, |
|
"eval_steps_per_second": 4.883, |
|
"eval_wer": 0.40498467365312707, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 1.2181225195489018, |
|
"grad_norm": 1.0480467081069946, |
|
"learning_rate": 0.00011523030303030302, |
|
"loss": 0.3083, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.2181225195489018, |
|
"eval_loss": 0.516806423664093, |
|
"eval_runtime": 145.4982, |
|
"eval_samples_per_second": 38.873, |
|
"eval_steps_per_second": 4.859, |
|
"eval_wer": 0.4113077947713887, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.2259813745137333, |
|
"eval_loss": 0.505409836769104, |
|
"eval_runtime": 145.5358, |
|
"eval_samples_per_second": 38.863, |
|
"eval_steps_per_second": 4.858, |
|
"eval_wer": 0.4003947938566224, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 1.2338402294785649, |
|
"eval_loss": 0.5144046545028687, |
|
"eval_runtime": 145.0631, |
|
"eval_samples_per_second": 38.99, |
|
"eval_steps_per_second": 4.874, |
|
"eval_wer": 0.406653720851856, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 1.2377696569609808, |
|
"grad_norm": 1.0551427602767944, |
|
"learning_rate": 0.00011219999999999999, |
|
"loss": 0.2981, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.2416990844433966, |
|
"eval_loss": 0.5082244277000427, |
|
"eval_runtime": 145.8395, |
|
"eval_samples_per_second": 38.782, |
|
"eval_steps_per_second": 4.848, |
|
"eval_wer": 0.39923929964211774, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 1.2495579394082281, |
|
"eval_loss": 0.5134223103523254, |
|
"eval_runtime": 145.7659, |
|
"eval_samples_per_second": 38.802, |
|
"eval_steps_per_second": 4.85, |
|
"eval_wer": 0.396125884675258, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 1.2574167943730599, |
|
"grad_norm": 2.2508976459503174, |
|
"learning_rate": 0.00010916969696969696, |
|
"loss": 0.2952, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.2574167943730599, |
|
"eval_loss": 0.49696260690689087, |
|
"eval_runtime": 145.5612, |
|
"eval_samples_per_second": 38.857, |
|
"eval_steps_per_second": 4.857, |
|
"eval_wer": 0.3999454349954262, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.2652756493378914, |
|
"eval_loss": 0.50291907787323, |
|
"eval_runtime": 145.2238, |
|
"eval_samples_per_second": 38.947, |
|
"eval_steps_per_second": 4.868, |
|
"eval_wer": 0.4005713276949495, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 1.2731345043027231, |
|
"eval_loss": 0.4979938268661499, |
|
"eval_runtime": 146.0479, |
|
"eval_samples_per_second": 38.727, |
|
"eval_steps_per_second": 4.841, |
|
"eval_wer": 0.4001540658952673, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 1.2770639317851389, |
|
"grad_norm": 0.7384321689605713, |
|
"learning_rate": 0.00010614545454545453, |
|
"loss": 0.2995, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.2809933592675546, |
|
"eval_loss": 0.49917110800743103, |
|
"eval_runtime": 145.9484, |
|
"eval_samples_per_second": 38.753, |
|
"eval_steps_per_second": 4.844, |
|
"eval_wer": 0.40463160597647285, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 1.2888522142323864, |
|
"eval_loss": 0.49689990282058716, |
|
"eval_runtime": 146.3024, |
|
"eval_samples_per_second": 38.66, |
|
"eval_steps_per_second": 4.832, |
|
"eval_wer": 0.3911829372020991, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 1.296711069197218, |
|
"grad_norm": 0.6462344527244568, |
|
"learning_rate": 0.0001031151515151515, |
|
"loss": 0.3046, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.296711069197218, |
|
"eval_loss": 0.49431467056274414, |
|
"eval_runtime": 145.566, |
|
"eval_samples_per_second": 38.855, |
|
"eval_steps_per_second": 4.857, |
|
"eval_wer": 0.3933334403235384, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.3045699241620496, |
|
"eval_loss": 0.4882897138595581, |
|
"eval_runtime": 146.7921, |
|
"eval_samples_per_second": 38.531, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.3932050520774823, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 1.3124287791268812, |
|
"eval_loss": 0.49653205275535583, |
|
"eval_runtime": 146.2261, |
|
"eval_samples_per_second": 38.68, |
|
"eval_steps_per_second": 4.835, |
|
"eval_wer": 0.3935099741618655, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 1.316358206609297, |
|
"grad_norm": 4.335805416107178, |
|
"learning_rate": 0.00010009090909090908, |
|
"loss": 0.2972, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.320287634091713, |
|
"eval_loss": 0.49103957414627075, |
|
"eval_runtime": 146.0953, |
|
"eval_samples_per_second": 38.714, |
|
"eval_steps_per_second": 4.839, |
|
"eval_wer": 0.3942000609844169, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 1.3281464890565444, |
|
"eval_loss": 0.5007916688919067, |
|
"eval_runtime": 145.7572, |
|
"eval_samples_per_second": 38.804, |
|
"eval_steps_per_second": 4.851, |
|
"eval_wer": 0.4097029416956878, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 1.3360053440213762, |
|
"grad_norm": 0.6741358637809753, |
|
"learning_rate": 9.706060606060605e-05, |
|
"loss": 0.3093, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.3360053440213762, |
|
"eval_loss": 0.4958365857601166, |
|
"eval_runtime": 146.2684, |
|
"eval_samples_per_second": 38.669, |
|
"eval_steps_per_second": 4.834, |
|
"eval_wer": 0.39574071993708976, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.3438641989862077, |
|
"eval_loss": 0.5045068264007568, |
|
"eval_runtime": 146.1991, |
|
"eval_samples_per_second": 38.687, |
|
"eval_steps_per_second": 4.836, |
|
"eval_wer": 0.40179101603248224, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 1.3517230539510394, |
|
"eval_loss": 0.492519348859787, |
|
"eval_runtime": 146.1528, |
|
"eval_samples_per_second": 38.699, |
|
"eval_steps_per_second": 4.837, |
|
"eval_wer": 0.3969925053361365, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 1.3556524814334552, |
|
"grad_norm": 0.9136665463447571, |
|
"learning_rate": 9.403030303030303e-05, |
|
"loss": 0.2947, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.359581908915871, |
|
"eval_loss": 0.4828738868236542, |
|
"eval_runtime": 145.0639, |
|
"eval_samples_per_second": 38.99, |
|
"eval_steps_per_second": 4.874, |
|
"eval_wer": 0.3905409959718188, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 1.3674407638807025, |
|
"eval_loss": 0.4869907796382904, |
|
"eval_runtime": 145.4878, |
|
"eval_samples_per_second": 38.876, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.39522716695286547, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 1.3752996188455342, |
|
"grad_norm": 1.0685299634933472, |
|
"learning_rate": 9.099999999999999e-05, |
|
"loss": 0.2801, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.3752996188455342, |
|
"eval_loss": 0.4897337555885315, |
|
"eval_runtime": 145.9513, |
|
"eval_samples_per_second": 38.753, |
|
"eval_steps_per_second": 4.844, |
|
"eval_wer": 0.3936704594694356, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.383158473810366, |
|
"eval_loss": 0.5006551146507263, |
|
"eval_runtime": 145.7634, |
|
"eval_samples_per_second": 38.803, |
|
"eval_steps_per_second": 4.85, |
|
"eval_wer": 0.39972075556482806, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 1.3910173287751975, |
|
"eval_loss": 0.48228171467781067, |
|
"eval_runtime": 145.956, |
|
"eval_samples_per_second": 38.751, |
|
"eval_steps_per_second": 4.844, |
|
"eval_wer": 0.38492401020686556, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 1.3949467562576132, |
|
"grad_norm": 0.6772143244743347, |
|
"learning_rate": 8.796969696969696e-05, |
|
"loss": 0.2772, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.398876183740029, |
|
"eval_loss": 0.4848904013633728, |
|
"eval_runtime": 145.8656, |
|
"eval_samples_per_second": 38.775, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.39121503426361315, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 1.4067350387048607, |
|
"eval_loss": 0.4844968020915985, |
|
"eval_runtime": 146.3634, |
|
"eval_samples_per_second": 38.644, |
|
"eval_steps_per_second": 4.83, |
|
"eval_wer": 0.3881658134197814, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 1.4145938936696925, |
|
"grad_norm": 1.0455658435821533, |
|
"learning_rate": 8.493939393939393e-05, |
|
"loss": 0.281, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.4145938936696925, |
|
"eval_loss": 0.482947438955307, |
|
"eval_runtime": 145.7025, |
|
"eval_samples_per_second": 38.819, |
|
"eval_steps_per_second": 4.852, |
|
"eval_wer": 0.38418577779204316, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.422452748634524, |
|
"eval_loss": 0.48147863149642944, |
|
"eval_runtime": 146.3811, |
|
"eval_samples_per_second": 38.639, |
|
"eval_steps_per_second": 4.83, |
|
"eval_wer": 0.3859190191138001, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 1.4303116035993555, |
|
"eval_loss": 0.4771769642829895, |
|
"eval_runtime": 145.8053, |
|
"eval_samples_per_second": 38.791, |
|
"eval_steps_per_second": 4.849, |
|
"eval_wer": 0.38075139221004317, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 1.4342410310817715, |
|
"grad_norm": 0.6518095135688782, |
|
"learning_rate": 8.19090909090909e-05, |
|
"loss": 0.2697, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.4381704585641872, |
|
"eval_loss": 0.48701608180999756, |
|
"eval_runtime": 145.4126, |
|
"eval_samples_per_second": 38.896, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.3914236651634543, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 1.4460293135290188, |
|
"eval_loss": 0.47700512409210205, |
|
"eval_runtime": 145.4281, |
|
"eval_samples_per_second": 38.892, |
|
"eval_steps_per_second": 4.862, |
|
"eval_wer": 0.38662515446710854, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.4538881684938505, |
|
"grad_norm": 2.1603991985321045, |
|
"learning_rate": 7.887878787878789e-05, |
|
"loss": 0.2766, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.4538881684938505, |
|
"eval_loss": 0.4786865711212158, |
|
"eval_runtime": 145.7912, |
|
"eval_samples_per_second": 38.795, |
|
"eval_steps_per_second": 4.849, |
|
"eval_wer": 0.38209946879363194, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.461747023458682, |
|
"eval_loss": 0.4793393015861511, |
|
"eval_runtime": 145.5675, |
|
"eval_samples_per_second": 38.855, |
|
"eval_steps_per_second": 4.857, |
|
"eval_wer": 0.38099212017139833, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 1.4696058784235138, |
|
"eval_loss": 0.4738729000091553, |
|
"eval_runtime": 145.8624, |
|
"eval_samples_per_second": 38.776, |
|
"eval_steps_per_second": 4.847, |
|
"eval_wer": 0.3803341304103609, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 1.4735353059059295, |
|
"grad_norm": 1.9566117525100708, |
|
"learning_rate": 7.585454545454545e-05, |
|
"loss": 0.2905, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.4774647333883453, |
|
"eval_loss": 0.47245293855667114, |
|
"eval_runtime": 145.8323, |
|
"eval_samples_per_second": 38.784, |
|
"eval_steps_per_second": 4.848, |
|
"eval_wer": 0.3811205084174544, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 1.485323588353177, |
|
"eval_loss": 0.47267088294029236, |
|
"eval_runtime": 145.9296, |
|
"eval_samples_per_second": 38.758, |
|
"eval_steps_per_second": 4.845, |
|
"eval_wer": 0.37827991847346376, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 1.4931824433180085, |
|
"grad_norm": 2.518251895904541, |
|
"learning_rate": 7.282424242424242e-05, |
|
"loss": 0.2799, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.4931824433180085, |
|
"eval_loss": 0.47050511837005615, |
|
"eval_runtime": 146.8142, |
|
"eval_samples_per_second": 38.525, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.3776700743046974, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.50104129828284, |
|
"eval_loss": 0.4659024178981781, |
|
"eval_runtime": 145.787, |
|
"eval_samples_per_second": 38.796, |
|
"eval_steps_per_second": 4.85, |
|
"eval_wer": 0.37508626085281893, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 1.5089001532476718, |
|
"eval_loss": 0.46910360455513, |
|
"eval_runtime": 146.8808, |
|
"eval_samples_per_second": 38.507, |
|
"eval_steps_per_second": 4.813, |
|
"eval_wer": 0.37429988284572546, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 1.5128295807300876, |
|
"grad_norm": 1.3675510883331299, |
|
"learning_rate": 6.979393939393939e-05, |
|
"loss": 0.267, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.5167590082125035, |
|
"eval_loss": 0.4690033495426178, |
|
"eval_runtime": 145.2501, |
|
"eval_samples_per_second": 38.94, |
|
"eval_steps_per_second": 4.867, |
|
"eval_wer": 0.3663558601210059, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 1.524617863177335, |
|
"eval_loss": 0.4632550776004791, |
|
"eval_runtime": 146.3252, |
|
"eval_samples_per_second": 38.654, |
|
"eval_steps_per_second": 4.832, |
|
"eval_wer": 0.36810514997351995, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 1.5324767181421666, |
|
"grad_norm": 1.2868680953979492, |
|
"learning_rate": 6.676969696969697e-05, |
|
"loss": 0.2632, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.5324767181421666, |
|
"eval_loss": 0.4650620222091675, |
|
"eval_runtime": 146.2691, |
|
"eval_samples_per_second": 38.668, |
|
"eval_steps_per_second": 4.834, |
|
"eval_wer": 0.37255059299321147, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.5403355731069983, |
|
"eval_loss": 0.46896418929100037, |
|
"eval_runtime": 145.9823, |
|
"eval_samples_per_second": 38.744, |
|
"eval_steps_per_second": 4.843, |
|
"eval_wer": 0.3673990146202115, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 1.54819442807183, |
|
"eval_loss": 0.4612589180469513, |
|
"eval_runtime": 145.4614, |
|
"eval_samples_per_second": 38.883, |
|
"eval_steps_per_second": 4.86, |
|
"eval_wer": 0.3714913899632489, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 1.5521238555542456, |
|
"grad_norm": 2.942875623703003, |
|
"learning_rate": 6.373939393939393e-05, |
|
"loss": 0.2716, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.5560532830366616, |
|
"eval_loss": 0.4654790461063385, |
|
"eval_runtime": 146.1694, |
|
"eval_samples_per_second": 38.695, |
|
"eval_steps_per_second": 4.837, |
|
"eval_wer": 0.36967790598770683, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 1.563912138001493, |
|
"eval_loss": 0.4596673846244812, |
|
"eval_runtime": 145.7967, |
|
"eval_samples_per_second": 38.794, |
|
"eval_steps_per_second": 4.849, |
|
"eval_wer": 0.364799152637576, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 1.5717709929663248, |
|
"grad_norm": 0.4809035658836365, |
|
"learning_rate": 6.07090909090909e-05, |
|
"loss": 0.2651, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.5717709929663248, |
|
"eval_loss": 0.4549534320831299, |
|
"eval_runtime": 146.3998, |
|
"eval_samples_per_second": 38.634, |
|
"eval_steps_per_second": 4.829, |
|
"eval_wer": 0.36619537481343584, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.5796298479311566, |
|
"eval_loss": 0.4538833498954773, |
|
"eval_runtime": 146.0948, |
|
"eval_samples_per_second": 38.715, |
|
"eval_steps_per_second": 4.839, |
|
"eval_wer": 0.3676397425815667, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 1.587488702895988, |
|
"eval_loss": 0.4542824625968933, |
|
"eval_runtime": 146.3082, |
|
"eval_samples_per_second": 38.658, |
|
"eval_steps_per_second": 4.832, |
|
"eval_wer": 0.36746320874323957, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 1.5914181303784039, |
|
"grad_norm": 1.2710328102111816, |
|
"learning_rate": 5.767878787878788e-05, |
|
"loss": 0.2659, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.5953475578608196, |
|
"eval_loss": 0.45555397868156433, |
|
"eval_runtime": 146.1729, |
|
"eval_samples_per_second": 38.694, |
|
"eval_steps_per_second": 4.837, |
|
"eval_wer": 0.3622795333087256, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 1.6032064128256514, |
|
"eval_loss": 0.463294118642807, |
|
"eval_runtime": 146.3048, |
|
"eval_samples_per_second": 38.659, |
|
"eval_steps_per_second": 4.832, |
|
"eval_wer": 0.36849031471168814, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 1.611065267790483, |
|
"grad_norm": 1.9250500202178955, |
|
"learning_rate": 5.4660606060606054e-05, |
|
"loss": 0.2559, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.611065267790483, |
|
"eval_loss": 0.4529285132884979, |
|
"eval_runtime": 146.9183, |
|
"eval_samples_per_second": 38.498, |
|
"eval_steps_per_second": 4.812, |
|
"eval_wer": 0.36083516554059475, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.6189241227553146, |
|
"eval_loss": 0.45345816016197205, |
|
"eval_runtime": 145.5972, |
|
"eval_samples_per_second": 38.847, |
|
"eval_steps_per_second": 4.856, |
|
"eval_wer": 0.36385228932291247, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 1.6267829777201461, |
|
"eval_loss": 0.4511209726333618, |
|
"eval_runtime": 146.7532, |
|
"eval_samples_per_second": 38.541, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.3637078525460994, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 1.630712405202562, |
|
"grad_norm": 0.9593771696090698, |
|
"learning_rate": 5.1630303030303025e-05, |
|
"loss": 0.2629, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.6346418326849776, |
|
"eval_loss": 0.45563140511512756, |
|
"eval_runtime": 146.0124, |
|
"eval_samples_per_second": 38.736, |
|
"eval_steps_per_second": 4.842, |
|
"eval_wer": 0.36049814639469757, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 1.6425006876498094, |
|
"eval_loss": 0.457055002450943, |
|
"eval_runtime": 147.3584, |
|
"eval_samples_per_second": 38.383, |
|
"eval_steps_per_second": 4.798, |
|
"eval_wer": 0.36390043491518353, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 1.6503595426146411, |
|
"grad_norm": 0.9599024653434753, |
|
"learning_rate": 4.8599999999999995e-05, |
|
"loss": 0.259, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.6503595426146411, |
|
"eval_loss": 0.46201661229133606, |
|
"eval_runtime": 146.8464, |
|
"eval_samples_per_second": 38.516, |
|
"eval_steps_per_second": 4.815, |
|
"eval_wer": 0.36903596475742645, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.6582183975794726, |
|
"eval_loss": 0.45499464869499207, |
|
"eval_runtime": 146.9092, |
|
"eval_samples_per_second": 38.5, |
|
"eval_steps_per_second": 4.812, |
|
"eval_wer": 0.36348317311550127, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 1.6660772525443042, |
|
"eval_loss": 0.45219454169273376, |
|
"eval_runtime": 146.8863, |
|
"eval_samples_per_second": 38.506, |
|
"eval_steps_per_second": 4.813, |
|
"eval_wer": 0.3584278859270434, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 1.6700066800267201, |
|
"grad_norm": 1.0676679611206055, |
|
"learning_rate": 4.5569696969696966e-05, |
|
"loss": 0.2594, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.673936107509136, |
|
"eval_loss": 0.4494900703430176, |
|
"eval_runtime": 147.3169, |
|
"eval_samples_per_second": 38.393, |
|
"eval_steps_per_second": 4.799, |
|
"eval_wer": 0.3589253903805107, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 1.6817949624739676, |
|
"eval_loss": 0.4453260898590088, |
|
"eval_runtime": 146.8159, |
|
"eval_samples_per_second": 38.524, |
|
"eval_steps_per_second": 4.816, |
|
"eval_wer": 0.3562131886825761, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 1.6896538174387992, |
|
"grad_norm": 0.4820586144924164, |
|
"learning_rate": 4.253939393939394e-05, |
|
"loss": 0.2538, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.6896538174387992, |
|
"eval_loss": 0.4438420832157135, |
|
"eval_runtime": 147.9055, |
|
"eval_samples_per_second": 38.241, |
|
"eval_steps_per_second": 4.78, |
|
"eval_wer": 0.3555391503907817, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.6975126724036307, |
|
"eval_loss": 0.4494447708129883, |
|
"eval_runtime": 146.855, |
|
"eval_samples_per_second": 38.514, |
|
"eval_steps_per_second": 4.814, |
|
"eval_wer": 0.3566946446052864, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 1.7053715273684624, |
|
"eval_loss": 0.4443654716014862, |
|
"eval_runtime": 146.8467, |
|
"eval_samples_per_second": 38.516, |
|
"eval_steps_per_second": 4.815, |
|
"eval_wer": 0.3537898605382677, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 1.7093009548508782, |
|
"grad_norm": 0.7214144468307495, |
|
"learning_rate": 3.950909090909091e-05, |
|
"loss": 0.2512, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.7132303823332942, |
|
"eval_loss": 0.4454784691333771, |
|
"eval_runtime": 147.1352, |
|
"eval_samples_per_second": 38.441, |
|
"eval_steps_per_second": 4.805, |
|
"eval_wer": 0.3529713854696602, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 1.7210892372981257, |
|
"eval_loss": 0.4453714191913605, |
|
"eval_runtime": 147.5374, |
|
"eval_samples_per_second": 38.336, |
|
"eval_steps_per_second": 4.792, |
|
"eval_wer": 0.3522010559933238, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 1.7289480922629572, |
|
"grad_norm": 1.9711872339248657, |
|
"learning_rate": 3.647878787878787e-05, |
|
"loss": 0.2358, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.7289480922629572, |
|
"eval_loss": 0.44450756907463074, |
|
"eval_runtime": 146.8893, |
|
"eval_samples_per_second": 38.505, |
|
"eval_steps_per_second": 4.813, |
|
"eval_wer": 0.3519763765627257, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.736806947227789, |
|
"eval_loss": 0.44162794947624207, |
|
"eval_runtime": 147.6037, |
|
"eval_samples_per_second": 38.319, |
|
"eval_steps_per_second": 4.79, |
|
"eval_wer": 0.34998635874885653, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 1.7446658021926207, |
|
"eval_loss": 0.44202086329460144, |
|
"eval_runtime": 148.0767, |
|
"eval_samples_per_second": 38.196, |
|
"eval_steps_per_second": 4.775, |
|
"eval_wer": 0.34897530131116494, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 1.7485952296750362, |
|
"grad_norm": 1.1429784297943115, |
|
"learning_rate": 3.344848484848484e-05, |
|
"loss": 0.2418, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.7525246571574522, |
|
"eval_loss": 0.43861278891563416, |
|
"eval_runtime": 147.9549, |
|
"eval_samples_per_second": 38.228, |
|
"eval_steps_per_second": 4.778, |
|
"eval_wer": 0.34790004975044536, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 1.7603835121222837, |
|
"eval_loss": 0.4354783296585083, |
|
"eval_runtime": 149.8154, |
|
"eval_samples_per_second": 37.753, |
|
"eval_steps_per_second": 4.719, |
|
"eval_wer": 0.3460705172441463, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 1.7682423670871152, |
|
"grad_norm": 1.684985637664795, |
|
"learning_rate": 3.0418181818181817e-05, |
|
"loss": 0.2421, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.7682423670871152, |
|
"eval_loss": 0.43855908513069153, |
|
"eval_runtime": 148.5791, |
|
"eval_samples_per_second": 38.067, |
|
"eval_steps_per_second": 4.758, |
|
"eval_wer": 0.34372743175362297, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.776101222051947, |
|
"eval_loss": 0.4347515106201172, |
|
"eval_runtime": 147.9309, |
|
"eval_samples_per_second": 38.234, |
|
"eval_steps_per_second": 4.779, |
|
"eval_wer": 0.3458297892827912, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 1.7839600770167787, |
|
"eval_loss": 0.43350183963775635, |
|
"eval_runtime": 148.2161, |
|
"eval_samples_per_second": 38.16, |
|
"eval_steps_per_second": 4.77, |
|
"eval_wer": 0.3435348493845388, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 1.7878895044991945, |
|
"grad_norm": 2.4373562335968018, |
|
"learning_rate": 2.7387878787878784e-05, |
|
"loss": 0.2418, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.7918189319816102, |
|
"eval_loss": 0.43087294697761536, |
|
"eval_runtime": 146.7738, |
|
"eval_samples_per_second": 38.535, |
|
"eval_steps_per_second": 4.817, |
|
"eval_wer": 0.3443693729839033, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 1.7996777869464418, |
|
"eval_loss": 0.43208202719688416, |
|
"eval_runtime": 147.2129, |
|
"eval_samples_per_second": 38.421, |
|
"eval_steps_per_second": 4.803, |
|
"eval_wer": 0.34249169488533326, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 1.8075366419112735, |
|
"grad_norm": 1.2847892045974731, |
|
"learning_rate": 2.4357575757575755e-05, |
|
"loss": 0.2424, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.8075366419112735, |
|
"eval_loss": 0.42999544739723206, |
|
"eval_runtime": 147.0735, |
|
"eval_samples_per_second": 38.457, |
|
"eval_steps_per_second": 4.807, |
|
"eval_wer": 0.34075845356357626, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.8153954968761052, |
|
"eval_loss": 0.4301421046257019, |
|
"eval_runtime": 146.951, |
|
"eval_samples_per_second": 38.489, |
|
"eval_steps_per_second": 4.811, |
|
"eval_wer": 0.34231516104700616, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 1.8232543518409368, |
|
"eval_loss": 0.4339451491832733, |
|
"eval_runtime": 146.5189, |
|
"eval_samples_per_second": 38.603, |
|
"eval_steps_per_second": 4.825, |
|
"eval_wer": 0.3407424050328192, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 1.8271837793233527, |
|
"grad_norm": 7.262228965759277, |
|
"learning_rate": 2.133333333333333e-05, |
|
"loss": 0.228, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.8311132068057683, |
|
"eval_loss": 0.43165403604507446, |
|
"eval_runtime": 146.7443, |
|
"eval_samples_per_second": 38.543, |
|
"eval_steps_per_second": 4.818, |
|
"eval_wer": 0.3428929081542585, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 1.8389720617706, |
|
"eval_loss": 0.43002423644065857, |
|
"eval_runtime": 146.6705, |
|
"eval_samples_per_second": 38.563, |
|
"eval_steps_per_second": 4.82, |
|
"eval_wer": 0.34332621848469774, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 1.8468309167354318, |
|
"grad_norm": 0.922248125076294, |
|
"learning_rate": 1.8303030303030302e-05, |
|
"loss": 0.2532, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.8468309167354318, |
|
"eval_loss": 0.42492908239364624, |
|
"eval_runtime": 147.1617, |
|
"eval_samples_per_second": 38.434, |
|
"eval_steps_per_second": 4.804, |
|
"eval_wer": 0.3439360626534641, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.8546897717002633, |
|
"eval_loss": 0.42566677927970886, |
|
"eval_runtime": 147.1363, |
|
"eval_samples_per_second": 38.441, |
|
"eval_steps_per_second": 4.805, |
|
"eval_wer": 0.3430373449310716, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 1.8625486266650948, |
|
"eval_loss": 0.42639264464378357, |
|
"eval_runtime": 147.0021, |
|
"eval_samples_per_second": 38.476, |
|
"eval_steps_per_second": 4.809, |
|
"eval_wer": 0.3408226476866043, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 1.8664780541475108, |
|
"grad_norm": 0.7899935841560364, |
|
"learning_rate": 1.5272727272727273e-05, |
|
"loss": 0.2347, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.8704074816299265, |
|
"eval_loss": 0.4254419207572937, |
|
"eval_runtime": 146.4448, |
|
"eval_samples_per_second": 38.622, |
|
"eval_steps_per_second": 4.828, |
|
"eval_wer": 0.3408868418096323, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 1.8782663365947583, |
|
"eval_loss": 0.423650860786438, |
|
"eval_runtime": 147.0702, |
|
"eval_samples_per_second": 38.458, |
|
"eval_steps_per_second": 4.807, |
|
"eval_wer": 0.3391215034263613, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 1.8861251915595898, |
|
"grad_norm": 1.1323833465576172, |
|
"learning_rate": 1.2242424242424242e-05, |
|
"loss": 0.2265, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.8861251915595898, |
|
"eval_loss": 0.4246509373188019, |
|
"eval_runtime": 147.0222, |
|
"eval_samples_per_second": 38.47, |
|
"eval_steps_per_second": 4.809, |
|
"eval_wer": 0.33952271669528655, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.8939840465244213, |
|
"eval_loss": 0.42534753680229187, |
|
"eval_runtime": 146.8715, |
|
"eval_samples_per_second": 38.51, |
|
"eval_steps_per_second": 4.814, |
|
"eval_wer": 0.3389128725265202, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 1.901842901489253, |
|
"eval_loss": 0.4245891273021698, |
|
"eval_runtime": 146.4129, |
|
"eval_samples_per_second": 38.63, |
|
"eval_steps_per_second": 4.829, |
|
"eval_wer": 0.33902521224181925, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 1.9057723289716688, |
|
"grad_norm": 2.10141658782959, |
|
"learning_rate": 9.212121212121211e-06, |
|
"loss": 0.2262, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.9097017564540848, |
|
"eval_loss": 0.4226687252521515, |
|
"eval_runtime": 147.045, |
|
"eval_samples_per_second": 38.464, |
|
"eval_steps_per_second": 4.808, |
|
"eval_wer": 0.3378536694965576, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.9175606114189163, |
|
"eval_loss": 0.4228062033653259, |
|
"eval_runtime": 147.4189, |
|
"eval_samples_per_second": 38.367, |
|
"eval_steps_per_second": 4.796, |
|
"eval_wer": 0.33892892105727723, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 1.9254194663837478, |
|
"grad_norm": 0.8046126365661621, |
|
"learning_rate": 6.181818181818182e-06, |
|
"loss": 0.2358, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.9254194663837478, |
|
"eval_loss": 0.4225420653820038, |
|
"eval_runtime": 147.7497, |
|
"eval_samples_per_second": 38.281, |
|
"eval_steps_per_second": 4.785, |
|
"eval_wer": 0.3391054548956043, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.9332783213485794, |
|
"eval_loss": 0.4224160313606262, |
|
"eval_runtime": 147.1221, |
|
"eval_samples_per_second": 38.444, |
|
"eval_steps_per_second": 4.806, |
|
"eval_wer": 0.33902521224181925, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 1.941137176313411, |
|
"eval_loss": 0.4214831590652466, |
|
"eval_runtime": 147.8229, |
|
"eval_samples_per_second": 38.262, |
|
"eval_steps_per_second": 4.783, |
|
"eval_wer": 0.3389931151803052, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 1.9450666037958269, |
|
"grad_norm": 1.517034888267517, |
|
"learning_rate": 3.1575757575757576e-06, |
|
"loss": 0.231, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.9489960312782428, |
|
"eval_loss": 0.4215412437915802, |
|
"eval_runtime": 147.4583, |
|
"eval_samples_per_second": 38.357, |
|
"eval_steps_per_second": 4.795, |
|
"eval_wer": 0.3399560270257258, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 1.9568548862430744, |
|
"eval_loss": 0.4211778938770294, |
|
"eval_runtime": 146.928, |
|
"eval_samples_per_second": 38.495, |
|
"eval_steps_per_second": 4.812, |
|
"eval_wer": 0.33933013432620246, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 1.9647137412079059, |
|
"grad_norm": 2.9327681064605713, |
|
"learning_rate": 1.2727272727272726e-07, |
|
"loss": 0.2331, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.9647137412079059, |
|
"eval_loss": 0.4211583733558655, |
|
"eval_runtime": 147.1945, |
|
"eval_samples_per_second": 38.425, |
|
"eval_steps_per_second": 4.803, |
|
"eval_wer": 0.33939432844923045, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.9647137412079059, |
|
"step": 50000, |
|
"total_flos": 6.219831968409632e+19, |
|
"train_loss": 0.4413083312988281, |
|
"train_runtime": 56545.9703, |
|
"train_samples_per_second": 7.074, |
|
"train_steps_per_second": 0.884 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 50000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.219831968409632e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|