|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.1788282447247436, |
|
"eval_steps": 200, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007858854964831625, |
|
"eval_loss": 3.1892831325531006, |
|
"eval_runtime": 159.5257, |
|
"eval_samples_per_second": 35.455, |
|
"eval_steps_per_second": 4.432, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01571770992966325, |
|
"eval_loss": 2.780208110809326, |
|
"eval_runtime": 157.5706, |
|
"eval_samples_per_second": 35.895, |
|
"eval_steps_per_second": 4.487, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01964713741207906, |
|
"grad_norm": 4.9997968673706055, |
|
"learning_rate": 0.0002982, |
|
"loss": 4.719, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.023576564894494872, |
|
"eval_loss": 1.4220576286315918, |
|
"eval_runtime": 158.6041, |
|
"eval_samples_per_second": 35.661, |
|
"eval_steps_per_second": 4.458, |
|
"eval_wer": 0.8876923817624497, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0314354198593265, |
|
"eval_loss": 1.227359414100647, |
|
"eval_runtime": 158.5238, |
|
"eval_samples_per_second": 35.679, |
|
"eval_steps_per_second": 4.46, |
|
"eval_wer": 0.8224390557044503, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03929427482415812, |
|
"grad_norm": 2.6001393795013428, |
|
"learning_rate": 0.0002949457627118644, |
|
"loss": 1.0441, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03929427482415812, |
|
"eval_loss": 1.1094719171524048, |
|
"eval_runtime": 161.2687, |
|
"eval_samples_per_second": 35.072, |
|
"eval_steps_per_second": 4.384, |
|
"eval_wer": 0.7886729469917029, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.047153129788989744, |
|
"eval_loss": 1.091428279876709, |
|
"eval_runtime": 158.6043, |
|
"eval_samples_per_second": 35.661, |
|
"eval_steps_per_second": 4.458, |
|
"eval_wer": 0.7549228868097125, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.055011984753821366, |
|
"eval_loss": 1.0177329778671265, |
|
"eval_runtime": 159.8661, |
|
"eval_samples_per_second": 35.38, |
|
"eval_steps_per_second": 4.422, |
|
"eval_wer": 0.7354881160629745, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.05894141223623718, |
|
"grad_norm": 2.7494542598724365, |
|
"learning_rate": 0.0002898610169491525, |
|
"loss": 0.8033, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.062870839718653, |
|
"eval_loss": 0.9907466769218445, |
|
"eval_runtime": 159.6438, |
|
"eval_samples_per_second": 35.429, |
|
"eval_steps_per_second": 4.429, |
|
"eval_wer": 0.7232912326876475, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.07072969468348461, |
|
"eval_loss": 0.9761303067207336, |
|
"eval_runtime": 159.1571, |
|
"eval_samples_per_second": 35.537, |
|
"eval_steps_per_second": 4.442, |
|
"eval_wer": 0.7145287348943204, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.07858854964831624, |
|
"grad_norm": 2.753251314163208, |
|
"learning_rate": 0.00028477627118644064, |
|
"loss": 0.7227, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07858854964831624, |
|
"eval_loss": 0.9555273056030273, |
|
"eval_runtime": 159.3414, |
|
"eval_samples_per_second": 35.496, |
|
"eval_steps_per_second": 4.437, |
|
"eval_wer": 0.6902794049204796, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08644740461314787, |
|
"eval_loss": 0.8994919061660767, |
|
"eval_runtime": 159.0426, |
|
"eval_samples_per_second": 35.563, |
|
"eval_steps_per_second": 4.445, |
|
"eval_wer": 0.6747765242092086, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.09430625957797949, |
|
"eval_loss": 0.8897404670715332, |
|
"eval_runtime": 158.5492, |
|
"eval_samples_per_second": 35.673, |
|
"eval_steps_per_second": 4.459, |
|
"eval_wer": 0.66655967646162, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.0982356870603953, |
|
"grad_norm": 2.404499053955078, |
|
"learning_rate": 0.00027969152542372877, |
|
"loss": 0.6794, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.10216511454281112, |
|
"eval_loss": 0.8826168775558472, |
|
"eval_runtime": 159.8456, |
|
"eval_samples_per_second": 35.384, |
|
"eval_steps_per_second": 4.423, |
|
"eval_wer": 0.6559676461619939, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.11002396950764273, |
|
"eval_loss": 0.8744593858718872, |
|
"eval_runtime": 159.6838, |
|
"eval_samples_per_second": 35.42, |
|
"eval_steps_per_second": 4.427, |
|
"eval_wer": 0.6445571407937604, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.11788282447247436, |
|
"grad_norm": 2.406255006790161, |
|
"learning_rate": 0.00027460677966101695, |
|
"loss": 0.6513, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.11788282447247436, |
|
"eval_loss": 0.8450209498405457, |
|
"eval_runtime": 159.2776, |
|
"eval_samples_per_second": 35.51, |
|
"eval_steps_per_second": 4.439, |
|
"eval_wer": 0.6436905201328819, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.125741679437306, |
|
"eval_loss": 0.8596389293670654, |
|
"eval_runtime": 159.3028, |
|
"eval_samples_per_second": 35.505, |
|
"eval_steps_per_second": 4.438, |
|
"eval_wer": 0.6510888928118631, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.13360053440213762, |
|
"eval_loss": 0.8597909212112427, |
|
"eval_runtime": 159.9064, |
|
"eval_samples_per_second": 35.371, |
|
"eval_steps_per_second": 4.421, |
|
"eval_wer": 0.6376402240374893, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.1375299618845534, |
|
"grad_norm": 2.2046961784362793, |
|
"learning_rate": 0.000269522033898305, |
|
"loss": 0.6147, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.14145938936696922, |
|
"eval_loss": 0.8516111969947815, |
|
"eval_runtime": 160.4338, |
|
"eval_samples_per_second": 35.254, |
|
"eval_steps_per_second": 4.407, |
|
"eval_wer": 0.6375439328529473, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.14931824433180085, |
|
"eval_loss": 0.8251617550849915, |
|
"eval_runtime": 160.6004, |
|
"eval_samples_per_second": 35.218, |
|
"eval_steps_per_second": 4.402, |
|
"eval_wer": 0.6100367511354335, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.15717709929663248, |
|
"grad_norm": 1.520897388458252, |
|
"learning_rate": 0.0002644372881355932, |
|
"loss": 0.6092, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.15717709929663248, |
|
"eval_loss": 0.8579581379890442, |
|
"eval_runtime": 159.0993, |
|
"eval_samples_per_second": 35.55, |
|
"eval_steps_per_second": 4.444, |
|
"eval_wer": 0.6822551395419749, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1650359542614641, |
|
"eval_loss": 0.8204948306083679, |
|
"eval_runtime": 159.818, |
|
"eval_samples_per_second": 35.39, |
|
"eval_steps_per_second": 4.424, |
|
"eval_wer": 0.6135674279019756, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.17289480922629574, |
|
"eval_loss": 0.8033376336097717, |
|
"eval_runtime": 159.2754, |
|
"eval_samples_per_second": 35.511, |
|
"eval_steps_per_second": 4.439, |
|
"eval_wer": 0.6385068446983678, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.17682423670871153, |
|
"grad_norm": 2.3011837005615234, |
|
"learning_rate": 0.00025936271186440674, |
|
"loss": 0.5928, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.18075366419112734, |
|
"eval_loss": 0.7927771210670471, |
|
"eval_runtime": 160.1079, |
|
"eval_samples_per_second": 35.326, |
|
"eval_steps_per_second": 4.416, |
|
"eval_wer": 0.6005039238657701, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.18861251915595897, |
|
"eval_loss": 0.7911030054092407, |
|
"eval_runtime": 160.2559, |
|
"eval_samples_per_second": 35.294, |
|
"eval_steps_per_second": 4.412, |
|
"eval_wer": 0.5923512702412094, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.1964713741207906, |
|
"grad_norm": 6.133739948272705, |
|
"learning_rate": 0.0002542779661016949, |
|
"loss": 0.5681, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.1964713741207906, |
|
"eval_loss": 0.7968648076057434, |
|
"eval_runtime": 160.0012, |
|
"eval_samples_per_second": 35.35, |
|
"eval_steps_per_second": 4.419, |
|
"eval_wer": 0.5944375792396206, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.20433022908562223, |
|
"eval_loss": 0.7932958602905273, |
|
"eval_runtime": 159.7818, |
|
"eval_samples_per_second": 35.398, |
|
"eval_steps_per_second": 4.425, |
|
"eval_wer": 0.5898958450353871, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.21218908405045384, |
|
"eval_loss": 0.7830468416213989, |
|
"eval_runtime": 160.2841, |
|
"eval_samples_per_second": 35.287, |
|
"eval_steps_per_second": 4.411, |
|
"eval_wer": 0.6012742533421065, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.21611851153286965, |
|
"grad_norm": 2.9641568660736084, |
|
"learning_rate": 0.00024919322033898305, |
|
"loss": 0.5806, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.22004793901528547, |
|
"eval_loss": 0.7702626585960388, |
|
"eval_runtime": 160.806, |
|
"eval_samples_per_second": 35.173, |
|
"eval_steps_per_second": 4.397, |
|
"eval_wer": 0.5789026014668357, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.2279067939801171, |
|
"eval_loss": 0.7665734887123108, |
|
"eval_runtime": 160.6796, |
|
"eval_samples_per_second": 35.2, |
|
"eval_steps_per_second": 4.4, |
|
"eval_wer": 0.589831650912359, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.23576564894494872, |
|
"grad_norm": 2.6571083068847656, |
|
"learning_rate": 0.00024410847457627117, |
|
"loss": 0.5608, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.23576564894494872, |
|
"eval_loss": 0.7580233216285706, |
|
"eval_runtime": 160.371, |
|
"eval_samples_per_second": 35.268, |
|
"eval_steps_per_second": 4.409, |
|
"eval_wer": 0.5694500168509573, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24362450390978035, |
|
"eval_loss": 0.7478851675987244, |
|
"eval_runtime": 162.164, |
|
"eval_samples_per_second": 34.878, |
|
"eval_steps_per_second": 4.36, |
|
"eval_wer": 0.5650848164850508, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.251483358874612, |
|
"eval_loss": 0.7638738751411438, |
|
"eval_runtime": 160.257, |
|
"eval_samples_per_second": 35.293, |
|
"eval_steps_per_second": 4.412, |
|
"eval_wer": 0.5846640240086021, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.2554127863570278, |
|
"grad_norm": 1.5677289962768555, |
|
"learning_rate": 0.0002390237288135593, |
|
"loss": 0.5333, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2593422138394436, |
|
"eval_loss": 0.7297228574752808, |
|
"eval_runtime": 160.7223, |
|
"eval_samples_per_second": 35.191, |
|
"eval_steps_per_second": 4.399, |
|
"eval_wer": 0.5676044358139012, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"eval_loss": 0.7441245913505554, |
|
"eval_runtime": 160.37, |
|
"eval_samples_per_second": 35.268, |
|
"eval_steps_per_second": 4.409, |
|
"eval_wer": 0.5590345203896583, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.2750599237691068, |
|
"grad_norm": 3.644160032272339, |
|
"learning_rate": 0.00023393898305084743, |
|
"loss": 0.5406, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2750599237691068, |
|
"eval_loss": 0.7404661774635315, |
|
"eval_runtime": 160.5995, |
|
"eval_samples_per_second": 35.218, |
|
"eval_steps_per_second": 4.402, |
|
"eval_wer": 0.5491165283818267, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.28291877873393845, |
|
"eval_loss": 0.7237815856933594, |
|
"eval_runtime": 160.4373, |
|
"eval_samples_per_second": 35.254, |
|
"eval_steps_per_second": 4.407, |
|
"eval_wer": 0.5529039816404808, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.2907776336987701, |
|
"eval_loss": 0.7328305840492249, |
|
"eval_runtime": 161.925, |
|
"eval_samples_per_second": 34.93, |
|
"eval_steps_per_second": 4.366, |
|
"eval_wer": 0.5543964950008826, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.2947070611811859, |
|
"grad_norm": 3.6030795574188232, |
|
"learning_rate": 0.00022885423728813558, |
|
"loss": 0.535, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.2986364886636017, |
|
"eval_loss": 0.7263395190238953, |
|
"eval_runtime": 160.6865, |
|
"eval_samples_per_second": 35.199, |
|
"eval_steps_per_second": 4.4, |
|
"eval_wer": 0.5598690439890228, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.30649534362843334, |
|
"eval_loss": 0.7421374320983887, |
|
"eval_runtime": 160.2249, |
|
"eval_samples_per_second": 35.3, |
|
"eval_steps_per_second": 4.413, |
|
"eval_wer": 0.5594357336585836, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.31435419859326497, |
|
"grad_norm": 3.376089096069336, |
|
"learning_rate": 0.0002237694915254237, |
|
"loss": 0.5195, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.31435419859326497, |
|
"eval_loss": 0.7434934377670288, |
|
"eval_runtime": 161.1972, |
|
"eval_samples_per_second": 35.087, |
|
"eval_steps_per_second": 4.386, |
|
"eval_wer": 0.5543804464701256, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.3222130535580966, |
|
"eval_loss": 0.7186952233314514, |
|
"eval_runtime": 162.677, |
|
"eval_samples_per_second": 34.768, |
|
"eval_steps_per_second": 4.346, |
|
"eval_wer": 0.5423921939946398, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.3300719085229282, |
|
"eval_loss": 0.6976691484451294, |
|
"eval_runtime": 159.5716, |
|
"eval_samples_per_second": 35.445, |
|
"eval_steps_per_second": 4.431, |
|
"eval_wer": 0.5353308404615558, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.33400133600534404, |
|
"grad_norm": 1.9758217334747314, |
|
"learning_rate": 0.00021868474576271186, |
|
"loss": 0.5023, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.33793076348775986, |
|
"eval_loss": 0.6949788928031921, |
|
"eval_runtime": 160.6972, |
|
"eval_samples_per_second": 35.197, |
|
"eval_steps_per_second": 4.4, |
|
"eval_wer": 0.5385565951437146, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.3457896184525915, |
|
"eval_loss": 0.7155033946037292, |
|
"eval_runtime": 159.9521, |
|
"eval_samples_per_second": 35.361, |
|
"eval_steps_per_second": 4.42, |
|
"eval_wer": 0.5450883471618173, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.35364847341742306, |
|
"grad_norm": 3.3146464824676514, |
|
"learning_rate": 0.00021361016949152543, |
|
"loss": 0.5106, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.35364847341742306, |
|
"eval_loss": 0.6857195496559143, |
|
"eval_runtime": 160.5474, |
|
"eval_samples_per_second": 35.229, |
|
"eval_steps_per_second": 4.404, |
|
"eval_wer": 0.5379467509749483, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3615073283822547, |
|
"eval_loss": 0.68482905626297, |
|
"eval_runtime": 161.0662, |
|
"eval_samples_per_second": 35.116, |
|
"eval_steps_per_second": 4.389, |
|
"eval_wer": 0.5329075123172473, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.3693661833470863, |
|
"eval_loss": 0.6732301712036133, |
|
"eval_runtime": 160.6243, |
|
"eval_samples_per_second": 35.213, |
|
"eval_steps_per_second": 4.402, |
|
"eval_wer": 0.5202291730192101, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.37329561082950213, |
|
"grad_norm": 4.61689567565918, |
|
"learning_rate": 0.00020852542372881352, |
|
"loss": 0.4968, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.37722503831191795, |
|
"eval_loss": 0.6839133501052856, |
|
"eval_runtime": 161.2367, |
|
"eval_samples_per_second": 35.079, |
|
"eval_steps_per_second": 4.385, |
|
"eval_wer": 0.5274510118598642, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.3850838932767496, |
|
"eval_loss": 0.6766842603683472, |
|
"eval_runtime": 160.827, |
|
"eval_samples_per_second": 35.168, |
|
"eval_steps_per_second": 4.396, |
|
"eval_wer": 0.5198279597502848, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.3929427482415812, |
|
"grad_norm": 3.5624563694000244, |
|
"learning_rate": 0.0002034508474576271, |
|
"loss": 0.4824, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.3929427482415812, |
|
"eval_loss": 0.6718243956565857, |
|
"eval_runtime": 161.1794, |
|
"eval_samples_per_second": 35.091, |
|
"eval_steps_per_second": 4.386, |
|
"eval_wer": 0.5334531623629857, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.40080160320641284, |
|
"eval_loss": 0.6593254804611206, |
|
"eval_runtime": 160.9535, |
|
"eval_samples_per_second": 35.141, |
|
"eval_steps_per_second": 4.393, |
|
"eval_wer": 0.5175169713212755, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.40866045817124447, |
|
"eval_loss": 0.6799437403678894, |
|
"eval_runtime": 159.6664, |
|
"eval_samples_per_second": 35.424, |
|
"eval_steps_per_second": 4.428, |
|
"eval_wer": 0.5173885830752195, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.4125898856536603, |
|
"grad_norm": 2.189781427383423, |
|
"learning_rate": 0.00019836610169491524, |
|
"loss": 0.48, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.4165193131360761, |
|
"eval_loss": 0.6662308573722839, |
|
"eval_runtime": 160.8779, |
|
"eval_samples_per_second": 35.157, |
|
"eval_steps_per_second": 4.395, |
|
"eval_wer": 0.5128949944632569, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.42437816810090767, |
|
"eval_loss": 0.6619213223457336, |
|
"eval_runtime": 160.6185, |
|
"eval_samples_per_second": 35.214, |
|
"eval_steps_per_second": 4.402, |
|
"eval_wer": 0.5005536743111169, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.4322370230657393, |
|
"grad_norm": 10.41739559173584, |
|
"learning_rate": 0.00019328135593220337, |
|
"loss": 0.4693, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.4322370230657393, |
|
"eval_loss": 0.6576216220855713, |
|
"eval_runtime": 160.9844, |
|
"eval_samples_per_second": 35.134, |
|
"eval_steps_per_second": 4.392, |
|
"eval_wer": 0.519940299465584, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.44009587803057093, |
|
"eval_loss": 0.6406122446060181, |
|
"eval_runtime": 160.4456, |
|
"eval_samples_per_second": 35.252, |
|
"eval_steps_per_second": 4.406, |
|
"eval_wer": 0.5018696538331916, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.44795473299540256, |
|
"eval_loss": 0.6408420205116272, |
|
"eval_runtime": 161.6075, |
|
"eval_samples_per_second": 34.998, |
|
"eval_steps_per_second": 4.375, |
|
"eval_wer": 0.5066039704065093, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.4518841604778184, |
|
"grad_norm": 3.5733156204223633, |
|
"learning_rate": 0.00018819661016949152, |
|
"loss": 0.4691, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.4558135879602342, |
|
"eval_loss": 0.6476473212242126, |
|
"eval_runtime": 161.2518, |
|
"eval_samples_per_second": 35.076, |
|
"eval_steps_per_second": 4.384, |
|
"eval_wer": 0.5019498964869766, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.4636724429250658, |
|
"eval_loss": 0.6423429846763611, |
|
"eval_runtime": 161.3676, |
|
"eval_samples_per_second": 35.05, |
|
"eval_steps_per_second": 4.381, |
|
"eval_wer": 0.4945996694002664, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.47153129788989745, |
|
"grad_norm": 2.3962831497192383, |
|
"learning_rate": 0.00018311186440677962, |
|
"loss": 0.4444, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.47153129788989745, |
|
"eval_loss": 0.6374172568321228, |
|
"eval_runtime": 162.3359, |
|
"eval_samples_per_second": 34.841, |
|
"eval_steps_per_second": 4.355, |
|
"eval_wer": 0.4975846961210701, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.4793901528547291, |
|
"eval_loss": 0.6312358379364014, |
|
"eval_runtime": 162.5747, |
|
"eval_samples_per_second": 34.79, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.4961403283529393, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.4872490078195607, |
|
"eval_loss": 0.6170411109924316, |
|
"eval_runtime": 161.58, |
|
"eval_samples_per_second": 35.004, |
|
"eval_steps_per_second": 4.376, |
|
"eval_wer": 0.4818571359792011, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.4911784353019765, |
|
"grad_norm": 2.623764753341675, |
|
"learning_rate": 0.0001780372881355932, |
|
"loss": 0.4474, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.49510786278439234, |
|
"eval_loss": 0.6300910115242004, |
|
"eval_runtime": 164.417, |
|
"eval_samples_per_second": 34.4, |
|
"eval_steps_per_second": 4.3, |
|
"eval_wer": 0.49325159281667763, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.502966717749224, |
|
"eval_loss": 0.6253496408462524, |
|
"eval_runtime": 161.3418, |
|
"eval_samples_per_second": 35.056, |
|
"eval_steps_per_second": 4.382, |
|
"eval_wer": 0.4862383848758646, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.5108255727140556, |
|
"grad_norm": 2.9566869735717773, |
|
"learning_rate": 0.00017295254237288134, |
|
"loss": 0.4471, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5108255727140556, |
|
"eval_loss": 0.622020959854126, |
|
"eval_runtime": 161.5861, |
|
"eval_samples_per_second": 35.003, |
|
"eval_steps_per_second": 4.375, |
|
"eval_wer": 0.4849224053537899, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5186844276788872, |
|
"eval_loss": 0.6201028823852539, |
|
"eval_runtime": 160.9515, |
|
"eval_samples_per_second": 35.141, |
|
"eval_steps_per_second": 4.393, |
|
"eval_wer": 0.48527547303044405, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.5265432826437189, |
|
"eval_loss": 0.6168439984321594, |
|
"eval_runtime": 162.0987, |
|
"eval_samples_per_second": 34.892, |
|
"eval_steps_per_second": 4.362, |
|
"eval_wer": 0.4848261141692478, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.5304727101261346, |
|
"grad_norm": 1.5596935749053955, |
|
"learning_rate": 0.0001678677966101695, |
|
"loss": 0.4323, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"eval_loss": 0.6172667741775513, |
|
"eval_runtime": 162.3681, |
|
"eval_samples_per_second": 34.834, |
|
"eval_steps_per_second": 4.354, |
|
"eval_wer": 0.47707467381361235, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.542260992573382, |
|
"eval_loss": 0.603190004825592, |
|
"eval_runtime": 161.2926, |
|
"eval_samples_per_second": 35.067, |
|
"eval_steps_per_second": 4.383, |
|
"eval_wer": 0.4656160228531078, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.5501198475382136, |
|
"grad_norm": 2.978868246078491, |
|
"learning_rate": 0.0001627830508474576, |
|
"loss": 0.4575, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5501198475382136, |
|
"eval_loss": 0.6097469925880432, |
|
"eval_runtime": 161.1042, |
|
"eval_samples_per_second": 35.108, |
|
"eval_steps_per_second": 4.388, |
|
"eval_wer": 0.4678307200975751, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5579787025030453, |
|
"eval_loss": 0.5970696806907654, |
|
"eval_runtime": 161.5846, |
|
"eval_samples_per_second": 35.003, |
|
"eval_steps_per_second": 4.375, |
|
"eval_wer": 0.4673653127056218, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.5658375574678769, |
|
"eval_loss": 0.5976916551589966, |
|
"eval_runtime": 161.7136, |
|
"eval_samples_per_second": 34.975, |
|
"eval_steps_per_second": 4.372, |
|
"eval_wer": 0.4697565437884162, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.5697669849502928, |
|
"grad_norm": 3.0501327514648438, |
|
"learning_rate": 0.00015769830508474575, |
|
"loss": 0.4395, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.5736964124327085, |
|
"eval_loss": 0.6056780815124512, |
|
"eval_runtime": 162.5963, |
|
"eval_samples_per_second": 34.786, |
|
"eval_steps_per_second": 4.348, |
|
"eval_wer": 0.4734316573317713, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.5815552673975402, |
|
"eval_loss": 0.582733690738678, |
|
"eval_runtime": 162.9467, |
|
"eval_samples_per_second": 34.711, |
|
"eval_steps_per_second": 4.339, |
|
"eval_wer": 0.4574152236362761, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.5894141223623718, |
|
"grad_norm": 4.3484697341918945, |
|
"learning_rate": 0.00015261355932203388, |
|
"loss": 0.4119, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5894141223623718, |
|
"eval_loss": 0.5946210622787476, |
|
"eval_runtime": 162.2892, |
|
"eval_samples_per_second": 34.851, |
|
"eval_steps_per_second": 4.356, |
|
"eval_wer": 0.4640432668389209, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5972729773272034, |
|
"eval_loss": 0.602292001247406, |
|
"eval_runtime": 161.4334, |
|
"eval_samples_per_second": 35.036, |
|
"eval_steps_per_second": 4.38, |
|
"eval_wer": 0.47707467381361235, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.605131832292035, |
|
"eval_loss": 0.6129310727119446, |
|
"eval_runtime": 161.8649, |
|
"eval_samples_per_second": 34.943, |
|
"eval_steps_per_second": 4.368, |
|
"eval_wer": 0.47266132785543485, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.6090612597744509, |
|
"grad_norm": 4.229031085968018, |
|
"learning_rate": 0.00014752881355932203, |
|
"loss": 0.4125, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.6129906872568667, |
|
"eval_loss": 0.590186595916748, |
|
"eval_runtime": 162.4898, |
|
"eval_samples_per_second": 34.808, |
|
"eval_steps_per_second": 4.351, |
|
"eval_wer": 0.45837813548169665, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.6208495422216983, |
|
"eval_loss": 0.5955421328544617, |
|
"eval_runtime": 161.8228, |
|
"eval_samples_per_second": 34.952, |
|
"eval_steps_per_second": 4.369, |
|
"eval_wer": 0.46537529489175267, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.6287083971865299, |
|
"grad_norm": 1.4181621074676514, |
|
"learning_rate": 0.00014244406779661016, |
|
"loss": 0.4039, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6287083971865299, |
|
"eval_loss": 0.5955237150192261, |
|
"eval_runtime": 161.3699, |
|
"eval_samples_per_second": 35.05, |
|
"eval_steps_per_second": 4.381, |
|
"eval_wer": 0.45946943557317327, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6365672521513616, |
|
"eval_loss": 0.578912079334259, |
|
"eval_runtime": 163.2091, |
|
"eval_samples_per_second": 34.655, |
|
"eval_steps_per_second": 4.332, |
|
"eval_wer": 0.4497279774036687, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.6444261071161932, |
|
"eval_loss": 0.5779294371604919, |
|
"eval_runtime": 164.0491, |
|
"eval_samples_per_second": 34.477, |
|
"eval_steps_per_second": 4.31, |
|
"eval_wer": 0.4630322094012293, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.648355534598609, |
|
"grad_norm": 2.0229876041412354, |
|
"learning_rate": 0.00013736949152542372, |
|
"loss": 0.3969, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.6522849620810248, |
|
"eval_loss": 0.5677434802055359, |
|
"eval_runtime": 161.201, |
|
"eval_samples_per_second": 35.087, |
|
"eval_steps_per_second": 4.386, |
|
"eval_wer": 0.45507213814575276, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.6601438170458565, |
|
"eval_loss": 0.586939811706543, |
|
"eval_runtime": 161.4539, |
|
"eval_samples_per_second": 35.032, |
|
"eval_steps_per_second": 4.379, |
|
"eval_wer": 0.46062492978767794, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.6680026720106881, |
|
"grad_norm": 4.166793346405029, |
|
"learning_rate": 0.00013229491525423729, |
|
"loss": 0.3923, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6680026720106881, |
|
"eval_loss": 0.5710186958312988, |
|
"eval_runtime": 160.5637, |
|
"eval_samples_per_second": 35.226, |
|
"eval_steps_per_second": 4.403, |
|
"eval_wer": 0.45017733626486495, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6758615269755197, |
|
"eval_loss": 0.5639811158180237, |
|
"eval_runtime": 161.7944, |
|
"eval_samples_per_second": 34.958, |
|
"eval_steps_per_second": 4.37, |
|
"eval_wer": 0.44741698897465937, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.6837203819403513, |
|
"eval_loss": 0.5841760039329529, |
|
"eval_runtime": 161.0184, |
|
"eval_samples_per_second": 35.126, |
|
"eval_steps_per_second": 4.391, |
|
"eval_wer": 0.4497921715266967, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.6876498094227671, |
|
"grad_norm": 3.127680778503418, |
|
"learning_rate": 0.0001272101694915254, |
|
"loss": 0.386, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.691579236905183, |
|
"eval_loss": 0.5596618056297302, |
|
"eval_runtime": 160.919, |
|
"eval_samples_per_second": 35.148, |
|
"eval_steps_per_second": 4.394, |
|
"eval_wer": 0.44403074898493045, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.6994380918700145, |
|
"eval_loss": 0.5620830059051514, |
|
"eval_runtime": 160.6614, |
|
"eval_samples_per_second": 35.204, |
|
"eval_steps_per_second": 4.401, |
|
"eval_wer": 0.43812488966635105, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.7072969468348461, |
|
"grad_norm": 17.387800216674805, |
|
"learning_rate": 0.00012213559322033898, |
|
"loss": 0.3851, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7072969468348461, |
|
"eval_loss": 0.566453218460083, |
|
"eval_runtime": 161.6574, |
|
"eval_samples_per_second": 34.988, |
|
"eval_steps_per_second": 4.373, |
|
"eval_wer": 0.434562115838295, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7151558017996777, |
|
"eval_loss": 0.5572646260261536, |
|
"eval_runtime": 162.4898, |
|
"eval_samples_per_second": 34.808, |
|
"eval_steps_per_second": 4.351, |
|
"eval_wer": 0.4356213188682576, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.7230146567645094, |
|
"eval_loss": 0.5548349022865295, |
|
"eval_runtime": 161.0153, |
|
"eval_samples_per_second": 35.127, |
|
"eval_steps_per_second": 4.391, |
|
"eval_wer": 0.4344337275922389, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.7269440842469252, |
|
"grad_norm": 9.4507417678833, |
|
"learning_rate": 0.00011705084745762712, |
|
"loss": 0.369, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.730873511729341, |
|
"eval_loss": 0.5616690516471863, |
|
"eval_runtime": 161.4318, |
|
"eval_samples_per_second": 35.036, |
|
"eval_steps_per_second": 4.38, |
|
"eval_wer": 0.43637559981383706, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.7387323666941726, |
|
"eval_loss": 0.5595532655715942, |
|
"eval_runtime": 160.8301, |
|
"eval_samples_per_second": 35.168, |
|
"eval_steps_per_second": 4.396, |
|
"eval_wer": 0.4393927235961548, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.7465912216590043, |
|
"grad_norm": 1.8793506622314453, |
|
"learning_rate": 0.00011196610169491524, |
|
"loss": 0.3738, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7465912216590043, |
|
"eval_loss": 0.549248218536377, |
|
"eval_runtime": 161.3194, |
|
"eval_samples_per_second": 35.061, |
|
"eval_steps_per_second": 4.383, |
|
"eval_wer": 0.42923400362696795, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7544500766238359, |
|
"eval_loss": 0.5478147268295288, |
|
"eval_runtime": 162.2231, |
|
"eval_samples_per_second": 34.866, |
|
"eval_steps_per_second": 4.358, |
|
"eval_wer": 0.4372261719439585, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.7623089315886675, |
|
"eval_loss": 0.5375632047653198, |
|
"eval_runtime": 161.0297, |
|
"eval_samples_per_second": 35.124, |
|
"eval_steps_per_second": 4.39, |
|
"eval_wer": 0.42873649917350065, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.7662383590710834, |
|
"grad_norm": 2.159616708755493, |
|
"learning_rate": 0.00010688135593220338, |
|
"loss": 0.368, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.7701677865534992, |
|
"eval_loss": 0.5282244086265564, |
|
"eval_runtime": 163.0357, |
|
"eval_samples_per_second": 34.692, |
|
"eval_steps_per_second": 4.336, |
|
"eval_wer": 0.4193481086806503, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.7780266415183308, |
|
"eval_loss": 0.5348193049430847, |
|
"eval_runtime": 162.5531, |
|
"eval_samples_per_second": 34.795, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.42507743416090255, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.7858854964831624, |
|
"grad_norm": 2.2020351886749268, |
|
"learning_rate": 0.00010179661016949151, |
|
"loss": 0.3629, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.7858854964831624, |
|
"eval_loss": 0.5367931723594666, |
|
"eval_runtime": 162.0053, |
|
"eval_samples_per_second": 34.912, |
|
"eval_steps_per_second": 4.364, |
|
"eval_wer": 0.43130426409462214, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.793744351447994, |
|
"eval_loss": 0.5550614595413208, |
|
"eval_runtime": 161.9948, |
|
"eval_samples_per_second": 34.915, |
|
"eval_steps_per_second": 4.364, |
|
"eval_wer": 0.44123830463321084, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"eval_loss": 0.5251778364181519, |
|
"eval_runtime": 162.6214, |
|
"eval_samples_per_second": 34.78, |
|
"eval_steps_per_second": 4.348, |
|
"eval_wer": 0.4105214167642952, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.8055326338952414, |
|
"grad_norm": 2.7725887298583984, |
|
"learning_rate": 9.671186440677966e-05, |
|
"loss": 0.3638, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.8094620613776573, |
|
"eval_loss": 0.5242481827735901, |
|
"eval_runtime": 162.5731, |
|
"eval_samples_per_second": 34.791, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.41174110510182793, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.8173209163424889, |
|
"eval_loss": 0.5233432054519653, |
|
"eval_runtime": 161.9438, |
|
"eval_samples_per_second": 34.926, |
|
"eval_steps_per_second": 4.366, |
|
"eval_wer": 0.4165877613904447, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.8251797713073206, |
|
"grad_norm": 2.733196496963501, |
|
"learning_rate": 9.162711864406779e-05, |
|
"loss": 0.3512, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8251797713073206, |
|
"eval_loss": 0.524342954158783, |
|
"eval_runtime": 161.947, |
|
"eval_samples_per_second": 34.925, |
|
"eval_steps_per_second": 4.366, |
|
"eval_wer": 0.4160581598754634, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8330386262721522, |
|
"eval_loss": 0.5150259733200073, |
|
"eval_runtime": 162.0793, |
|
"eval_samples_per_second": 34.896, |
|
"eval_steps_per_second": 4.362, |
|
"eval_wer": 0.4123028036783232, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.8408974812369838, |
|
"eval_loss": 0.5088914632797241, |
|
"eval_runtime": 161.2392, |
|
"eval_samples_per_second": 35.078, |
|
"eval_steps_per_second": 4.385, |
|
"eval_wer": 0.4079536518431738, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.8448269087193996, |
|
"grad_norm": 4.562708377838135, |
|
"learning_rate": 8.654237288135593e-05, |
|
"loss": 0.3536, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.8487563362018153, |
|
"eval_loss": 0.515373170375824, |
|
"eval_runtime": 162.8063, |
|
"eval_samples_per_second": 34.741, |
|
"eval_steps_per_second": 4.343, |
|
"eval_wer": 0.40899680634237934, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.856615191166647, |
|
"eval_loss": 0.5161571502685547, |
|
"eval_runtime": 162.7678, |
|
"eval_samples_per_second": 34.749, |
|
"eval_steps_per_second": 4.344, |
|
"eval_wer": 0.4091893887114635, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.8644740461314786, |
|
"grad_norm": 2.272256374359131, |
|
"learning_rate": 8.146779661016948e-05, |
|
"loss": 0.3464, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8644740461314786, |
|
"eval_loss": 0.5097736716270447, |
|
"eval_runtime": 162.1935, |
|
"eval_samples_per_second": 34.872, |
|
"eval_steps_per_second": 4.359, |
|
"eval_wer": 0.40527354720675324, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8723329010963102, |
|
"eval_loss": 0.5069981813430786, |
|
"eval_runtime": 162.5966, |
|
"eval_samples_per_second": 34.785, |
|
"eval_steps_per_second": 4.348, |
|
"eval_wer": 0.4022724719551925, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.8801917560611419, |
|
"eval_loss": 0.5070444345474243, |
|
"eval_runtime": 162.5617, |
|
"eval_samples_per_second": 34.793, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.40707098265153824, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.8841211835435577, |
|
"grad_norm": 2.9740068912506104, |
|
"learning_rate": 7.638305084745762e-05, |
|
"loss": 0.3377, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.8880506110259735, |
|
"eval_loss": 0.5028176307678223, |
|
"eval_runtime": 162.4451, |
|
"eval_samples_per_second": 34.818, |
|
"eval_steps_per_second": 4.352, |
|
"eval_wer": 0.39670363178251034, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.8959094659908051, |
|
"eval_loss": 0.5036062002182007, |
|
"eval_runtime": 162.5763, |
|
"eval_samples_per_second": 34.79, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.39784307746625797, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.9037683209556368, |
|
"grad_norm": 1.9388916492462158, |
|
"learning_rate": 7.129830508474575e-05, |
|
"loss": 0.3272, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.9037683209556368, |
|
"eval_loss": 0.5020586848258972, |
|
"eval_runtime": 161.6894, |
|
"eval_samples_per_second": 34.981, |
|
"eval_steps_per_second": 4.373, |
|
"eval_wer": 0.39538765226043554, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.9116271759204684, |
|
"eval_loss": 0.5032612085342407, |
|
"eval_runtime": 163.6786, |
|
"eval_samples_per_second": 34.556, |
|
"eval_steps_per_second": 4.319, |
|
"eval_wer": 0.3984529216350243, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.9194860308853, |
|
"eval_loss": 0.49842530488967896, |
|
"eval_runtime": 162.0701, |
|
"eval_samples_per_second": 34.898, |
|
"eval_steps_per_second": 4.362, |
|
"eval_wer": 0.3971850877052206, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.9234154583677158, |
|
"grad_norm": 3.9436373710632324, |
|
"learning_rate": 6.621355932203389e-05, |
|
"loss": 0.319, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.9273448858501316, |
|
"eval_loss": 0.4928737282752991, |
|
"eval_runtime": 163.9597, |
|
"eval_samples_per_second": 34.496, |
|
"eval_steps_per_second": 4.312, |
|
"eval_wer": 0.39243472260114587, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.9352037408149633, |
|
"eval_loss": 0.49405232071876526, |
|
"eval_runtime": 161.8803, |
|
"eval_samples_per_second": 34.939, |
|
"eval_steps_per_second": 4.367, |
|
"eval_wer": 0.4013095601097719, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.9430625957797949, |
|
"grad_norm": 3.4186201095581055, |
|
"learning_rate": 6.112881355932203e-05, |
|
"loss": 0.3184, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9430625957797949, |
|
"eval_loss": 0.4856198728084564, |
|
"eval_runtime": 163.6122, |
|
"eval_samples_per_second": 34.57, |
|
"eval_steps_per_second": 4.321, |
|
"eval_wer": 0.387411532474202, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9509214507446265, |
|
"eval_loss": 0.48915818333625793, |
|
"eval_runtime": 162.8317, |
|
"eval_samples_per_second": 34.735, |
|
"eval_steps_per_second": 4.342, |
|
"eval_wer": 0.3913755195711833, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.9587803057094582, |
|
"eval_loss": 0.48598504066467285, |
|
"eval_runtime": 160.6269, |
|
"eval_samples_per_second": 35.212, |
|
"eval_steps_per_second": 4.402, |
|
"eval_wer": 0.3813772849095665, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.9627097331918739, |
|
"grad_norm": 2.70164155960083, |
|
"learning_rate": 5.6044067796610164e-05, |
|
"loss": 0.3091, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.9666391606742898, |
|
"eval_loss": 0.4825168251991272, |
|
"eval_runtime": 162.6242, |
|
"eval_samples_per_second": 34.78, |
|
"eval_steps_per_second": 4.347, |
|
"eval_wer": 0.38336730272343567, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.9744980156391214, |
|
"eval_loss": 0.4784228205680847, |
|
"eval_runtime": 162.0189, |
|
"eval_samples_per_second": 34.91, |
|
"eval_steps_per_second": 4.364, |
|
"eval_wer": 0.3866893485901366, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.982356870603953, |
|
"grad_norm": 9.408166885375977, |
|
"learning_rate": 5.096949152542373e-05, |
|
"loss": 0.3154, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.982356870603953, |
|
"eval_loss": 0.47507792711257935, |
|
"eval_runtime": 161.9422, |
|
"eval_samples_per_second": 34.926, |
|
"eval_steps_per_second": 4.366, |
|
"eval_wer": 0.3807834892715572, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.9902157255687847, |
|
"eval_loss": 0.4778765141963959, |
|
"eval_runtime": 162.3405, |
|
"eval_samples_per_second": 34.84, |
|
"eval_steps_per_second": 4.355, |
|
"eval_wer": 0.38492401020686556, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.9980745805336163, |
|
"eval_loss": 0.477267324924469, |
|
"eval_runtime": 161.2107, |
|
"eval_samples_per_second": 35.085, |
|
"eval_steps_per_second": 4.386, |
|
"eval_wer": 0.38084768339458525, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.002004008016032, |
|
"grad_norm": 0.7003775835037231, |
|
"learning_rate": 4.589491525423728e-05, |
|
"loss": 0.312, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.005933435498448, |
|
"eval_loss": 0.47774726152420044, |
|
"eval_runtime": 160.8535, |
|
"eval_samples_per_second": 35.162, |
|
"eval_steps_per_second": 4.395, |
|
"eval_wer": 0.3757923962061273, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.0137922904632795, |
|
"eval_loss": 0.4752050042152405, |
|
"eval_runtime": 159.7765, |
|
"eval_samples_per_second": 35.399, |
|
"eval_steps_per_second": 4.425, |
|
"eval_wer": 0.3820513232013609, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.0216511454281112, |
|
"grad_norm": 0.702942430973053, |
|
"learning_rate": 4.081016949152542e-05, |
|
"loss": 0.2651, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.0216511454281112, |
|
"eval_loss": 0.4700838327407837, |
|
"eval_runtime": 163.2858, |
|
"eval_samples_per_second": 34.639, |
|
"eval_steps_per_second": 4.33, |
|
"eval_wer": 0.37750958899712733, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.0295100003929427, |
|
"eval_loss": 0.47011885046958923, |
|
"eval_runtime": 160.7741, |
|
"eval_samples_per_second": 35.18, |
|
"eval_steps_per_second": 4.397, |
|
"eval_wer": 0.3760652212289965, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.0373688553577745, |
|
"eval_loss": 0.471804678440094, |
|
"eval_runtime": 160.2455, |
|
"eval_samples_per_second": 35.296, |
|
"eval_steps_per_second": 4.412, |
|
"eval_wer": 0.37755773458939834, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.0412982828401902, |
|
"grad_norm": 0.98069828748703, |
|
"learning_rate": 3.572542372881355e-05, |
|
"loss": 0.2627, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.045227710322606, |
|
"eval_loss": 0.4638473391532898, |
|
"eval_runtime": 160.1121, |
|
"eval_samples_per_second": 35.325, |
|
"eval_steps_per_second": 4.416, |
|
"eval_wer": 0.37296785479289374, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.0530865652874377, |
|
"eval_loss": 0.4677112400531769, |
|
"eval_runtime": 159.9389, |
|
"eval_samples_per_second": 35.364, |
|
"eval_steps_per_second": 4.42, |
|
"eval_wer": 0.3720370400089872, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.0609454202522692, |
|
"grad_norm": 0.8780287504196167, |
|
"learning_rate": 3.0640677966101693e-05, |
|
"loss": 0.2427, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0609454202522692, |
|
"eval_loss": 0.4642546474933624, |
|
"eval_runtime": 160.0541, |
|
"eval_samples_per_second": 35.338, |
|
"eval_steps_per_second": 4.417, |
|
"eval_wer": 0.36985443982603394, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0688042752171008, |
|
"eval_loss": 0.46017909049987793, |
|
"eval_runtime": 159.9066, |
|
"eval_samples_per_second": 35.371, |
|
"eval_steps_per_second": 4.421, |
|
"eval_wer": 0.3713469531864358, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.0766631301819325, |
|
"eval_loss": 0.46644654870033264, |
|
"eval_runtime": 160.7516, |
|
"eval_samples_per_second": 35.185, |
|
"eval_steps_per_second": 4.398, |
|
"eval_wer": 0.3703037986872302, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.0805925576643483, |
|
"grad_norm": 0.8659859895706177, |
|
"learning_rate": 2.556610169491525e-05, |
|
"loss": 0.2464, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.0845219851467642, |
|
"eval_loss": 0.4609028100967407, |
|
"eval_runtime": 161.4502, |
|
"eval_samples_per_second": 35.032, |
|
"eval_steps_per_second": 4.379, |
|
"eval_wer": 0.36770393670459467, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.0923808401115958, |
|
"eval_loss": 0.4613707363605499, |
|
"eval_runtime": 160.5963, |
|
"eval_samples_per_second": 35.219, |
|
"eval_steps_per_second": 4.402, |
|
"eval_wer": 0.3687310426730433, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 1.1002396950764273, |
|
"grad_norm": 1.6944918632507324, |
|
"learning_rate": 2.0481355932203388e-05, |
|
"loss": 0.2537, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.1002396950764273, |
|
"eval_loss": 0.45553678274154663, |
|
"eval_runtime": 160.1154, |
|
"eval_samples_per_second": 35.325, |
|
"eval_steps_per_second": 4.416, |
|
"eval_wer": 0.36545714239861343, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.108098550041259, |
|
"eval_loss": 0.456032931804657, |
|
"eval_runtime": 160.97, |
|
"eval_samples_per_second": 35.137, |
|
"eval_steps_per_second": 4.392, |
|
"eval_wer": 0.36447818202243587, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.1159574050060905, |
|
"eval_loss": 0.45427000522613525, |
|
"eval_runtime": 160.1348, |
|
"eval_samples_per_second": 35.32, |
|
"eval_steps_per_second": 4.415, |
|
"eval_wer": 0.36261655245462276, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 1.1198868324885065, |
|
"grad_norm": 0.8318812251091003, |
|
"learning_rate": 1.5396610169491525e-05, |
|
"loss": 0.2313, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.1238162599709223, |
|
"eval_loss": 0.45402956008911133, |
|
"eval_runtime": 160.7545, |
|
"eval_samples_per_second": 35.184, |
|
"eval_steps_per_second": 4.398, |
|
"eval_wer": 0.3631461539696041, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 1.1316751149357538, |
|
"eval_loss": 0.4536111354827881, |
|
"eval_runtime": 165.4654, |
|
"eval_samples_per_second": 34.182, |
|
"eval_steps_per_second": 4.273, |
|
"eval_wer": 0.3626326009853798, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.1395339699005855, |
|
"grad_norm": 0.7866860032081604, |
|
"learning_rate": 1.031186440677966e-05, |
|
"loss": 0.2451, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.1395339699005855, |
|
"eval_loss": 0.45293620228767395, |
|
"eval_runtime": 160.3649, |
|
"eval_samples_per_second": 35.27, |
|
"eval_steps_per_second": 4.409, |
|
"eval_wer": 0.3617338832629873, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.147392824865417, |
|
"eval_loss": 0.4530145823955536, |
|
"eval_runtime": 160.576, |
|
"eval_samples_per_second": 35.223, |
|
"eval_steps_per_second": 4.403, |
|
"eval_wer": 0.3598401566336602, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 1.1552516798302488, |
|
"eval_loss": 0.4515323042869568, |
|
"eval_runtime": 160.1136, |
|
"eval_samples_per_second": 35.325, |
|
"eval_steps_per_second": 4.416, |
|
"eval_wer": 0.3591500698111088, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.1591811073126645, |
|
"grad_norm": 3.2193210124969482, |
|
"learning_rate": 5.227118644067796e-06, |
|
"loss": 0.2445, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.1631105347950803, |
|
"eval_loss": 0.451358437538147, |
|
"eval_runtime": 160.6595, |
|
"eval_samples_per_second": 35.205, |
|
"eval_steps_per_second": 4.401, |
|
"eval_wer": 0.3590056330342957, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 1.170969389759912, |
|
"eval_loss": 0.4514302611351013, |
|
"eval_runtime": 160.1434, |
|
"eval_samples_per_second": 35.318, |
|
"eval_steps_per_second": 4.415, |
|
"eval_wer": 0.3588772447882396, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 1.1788282447247436, |
|
"grad_norm": 0.5669330358505249, |
|
"learning_rate": 1.423728813559322e-07, |
|
"loss": 0.2364, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1788282447247436, |
|
"eval_loss": 0.4510672390460968, |
|
"eval_runtime": 160.6855, |
|
"eval_samples_per_second": 35.199, |
|
"eval_steps_per_second": 4.4, |
|
"eval_wer": 0.3591179727495948, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1788282447247436, |
|
"step": 30000, |
|
"total_flos": 3.731985674211105e+19, |
|
"train_loss": 0.5082863594055176, |
|
"train_runtime": 37313.8627, |
|
"train_samples_per_second": 6.432, |
|
"train_steps_per_second": 0.804 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 30000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.731985674211105e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|