|
{ |
|
"best_metric": 16.21523264881726, |
|
"best_model_checkpoint": "./whisper-large-v3-turbo/checkpoint-10000", |
|
"epoch": 3.461405330564209, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008653513326410523, |
|
"grad_norm": 9.31276798248291, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.6314, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.017307026652821047, |
|
"grad_norm": 6.611477851867676, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.4058, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02596053997923157, |
|
"grad_norm": 5.953363418579102, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.2556, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.034614053305642094, |
|
"grad_norm": 4.594871520996094, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.2411, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04326756663205261, |
|
"grad_norm": 5.638365268707275, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2421, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05192107995846314, |
|
"grad_norm": 6.280882835388184, |
|
"learning_rate": 3e-06, |
|
"loss": 0.245, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.060574593284873655, |
|
"grad_norm": 4.423807144165039, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.2556, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.06922810661128419, |
|
"grad_norm": 5.257762908935547, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.243, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0778816199376947, |
|
"grad_norm": 4.895700931549072, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.2607, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.08653513326410522, |
|
"grad_norm": 5.383410453796387, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2451, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09518864659051575, |
|
"grad_norm": 6.303346157073975, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.2316, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.10384215991692627, |
|
"grad_norm": 3.834745168685913, |
|
"learning_rate": 6e-06, |
|
"loss": 0.2511, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1124956732433368, |
|
"grad_norm": 4.793943405151367, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.258, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.12114918656974731, |
|
"grad_norm": 4.196424961090088, |
|
"learning_rate": 7e-06, |
|
"loss": 0.2635, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12980269989615784, |
|
"grad_norm": 5.759880065917969, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.2644, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.13845621322256838, |
|
"grad_norm": 4.871682167053223, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2513, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1471097265489789, |
|
"grad_norm": 4.624505996704102, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.2601, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.1557632398753894, |
|
"grad_norm": 5.247982501983643, |
|
"learning_rate": 9e-06, |
|
"loss": 0.254, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16441675320179994, |
|
"grad_norm": 5.218228816986084, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.2717, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.17307026652821045, |
|
"grad_norm": 5.001543998718262, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2829, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17307026652821045, |
|
"eval_loss": 0.27819713950157166, |
|
"eval_runtime": 8630.7687, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 23.738844120960056, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.181723779854621, |
|
"grad_norm": 5.3592023849487305, |
|
"learning_rate": 9.973684210526316e-06, |
|
"loss": 0.2837, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.1903772931810315, |
|
"grad_norm": 5.0274658203125, |
|
"learning_rate": 9.947368421052632e-06, |
|
"loss": 0.2873, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.199030806507442, |
|
"grad_norm": 6.059903144836426, |
|
"learning_rate": 9.921052631578947e-06, |
|
"loss": 0.2927, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.20768431983385255, |
|
"grad_norm": 7.239508152008057, |
|
"learning_rate": 9.894736842105264e-06, |
|
"loss": 0.2662, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21633783316026306, |
|
"grad_norm": 5.860602855682373, |
|
"learning_rate": 9.868421052631579e-06, |
|
"loss": 0.2847, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.2249913464866736, |
|
"grad_norm": 5.402172565460205, |
|
"learning_rate": 9.842105263157896e-06, |
|
"loss": 0.2653, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2336448598130841, |
|
"grad_norm": 5.541703224182129, |
|
"learning_rate": 9.815789473684212e-06, |
|
"loss": 0.2994, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.24229837313949462, |
|
"grad_norm": 4.814186096191406, |
|
"learning_rate": 9.789473684210527e-06, |
|
"loss": 0.2576, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.25095188646590516, |
|
"grad_norm": 4.134284496307373, |
|
"learning_rate": 9.763157894736844e-06, |
|
"loss": 0.2788, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.25960539979231567, |
|
"grad_norm": 5.382356643676758, |
|
"learning_rate": 9.736842105263159e-06, |
|
"loss": 0.2902, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2682589131187262, |
|
"grad_norm": 4.981515884399414, |
|
"learning_rate": 9.710526315789474e-06, |
|
"loss": 0.271, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.27691242644513675, |
|
"grad_norm": 4.840052127838135, |
|
"learning_rate": 9.68421052631579e-06, |
|
"loss": 0.2717, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.28556593977154726, |
|
"grad_norm": 4.619823932647705, |
|
"learning_rate": 9.657894736842106e-06, |
|
"loss": 0.2763, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.2942194530979578, |
|
"grad_norm": 5.049735069274902, |
|
"learning_rate": 9.631578947368422e-06, |
|
"loss": 0.2709, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3028729664243683, |
|
"grad_norm": 4.263411045074463, |
|
"learning_rate": 9.605263157894737e-06, |
|
"loss": 0.2575, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3115264797507788, |
|
"grad_norm": 5.51076078414917, |
|
"learning_rate": 9.578947368421054e-06, |
|
"loss": 0.2775, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.32017999307718936, |
|
"grad_norm": 3.7715821266174316, |
|
"learning_rate": 9.552631578947369e-06, |
|
"loss": 0.2767, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.3288335064035999, |
|
"grad_norm": 3.964357852935791, |
|
"learning_rate": 9.526315789473684e-06, |
|
"loss": 0.2593, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3374870197300104, |
|
"grad_norm": 4.967723369598389, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.2445, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.3461405330564209, |
|
"grad_norm": 6.19343376159668, |
|
"learning_rate": 9.473684210526315e-06, |
|
"loss": 0.2671, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3461405330564209, |
|
"eval_loss": 0.26496633887290955, |
|
"eval_runtime": 8635.3808, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 22.229442855905035, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3547940463828314, |
|
"grad_norm": 4.938564777374268, |
|
"learning_rate": 9.447368421052632e-06, |
|
"loss": 0.261, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.363447559709242, |
|
"grad_norm": 4.535635948181152, |
|
"learning_rate": 9.421052631578949e-06, |
|
"loss": 0.2471, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3721010730356525, |
|
"grad_norm": 4.910510540008545, |
|
"learning_rate": 9.394736842105264e-06, |
|
"loss": 0.2701, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.380754586362063, |
|
"grad_norm": 4.105949878692627, |
|
"learning_rate": 9.36842105263158e-06, |
|
"loss": 0.2342, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3894080996884735, |
|
"grad_norm": 4.819608211517334, |
|
"learning_rate": 9.342105263157895e-06, |
|
"loss": 0.2704, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.398061613014884, |
|
"grad_norm": 6.137063503265381, |
|
"learning_rate": 9.315789473684212e-06, |
|
"loss": 0.258, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.4067151263412946, |
|
"grad_norm": 4.703615665435791, |
|
"learning_rate": 9.289473684210527e-06, |
|
"loss": 0.2602, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.4153686396677051, |
|
"grad_norm": 4.942866325378418, |
|
"learning_rate": 9.263157894736842e-06, |
|
"loss": 0.2562, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4240221529941156, |
|
"grad_norm": 4.163381099700928, |
|
"learning_rate": 9.236842105263159e-06, |
|
"loss": 0.2398, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.4326756663205261, |
|
"grad_norm": 4.933504104614258, |
|
"learning_rate": 9.210526315789474e-06, |
|
"loss": 0.2423, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.44132917964693663, |
|
"grad_norm": 4.699647426605225, |
|
"learning_rate": 9.18421052631579e-06, |
|
"loss": 0.2659, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.4499826929733472, |
|
"grad_norm": 5.076835632324219, |
|
"learning_rate": 9.157894736842105e-06, |
|
"loss": 0.2679, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4586362062997577, |
|
"grad_norm": 4.333568572998047, |
|
"learning_rate": 9.131578947368422e-06, |
|
"loss": 0.2475, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.4672897196261682, |
|
"grad_norm": 4.654094219207764, |
|
"learning_rate": 9.105263157894739e-06, |
|
"loss": 0.2353, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.47594323295257873, |
|
"grad_norm": 3.9147582054138184, |
|
"learning_rate": 9.078947368421054e-06, |
|
"loss": 0.232, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.48459674627898924, |
|
"grad_norm": 3.9528894424438477, |
|
"learning_rate": 9.05263157894737e-06, |
|
"loss": 0.2312, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4932502596053998, |
|
"grad_norm": 5.073605060577393, |
|
"learning_rate": 9.026315789473685e-06, |
|
"loss": 0.2529, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.5019037729318103, |
|
"grad_norm": 4.176553249359131, |
|
"learning_rate": 9e-06, |
|
"loss": 0.2459, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5105572862582208, |
|
"grad_norm": 3.9072072505950928, |
|
"learning_rate": 8.973684210526317e-06, |
|
"loss": 0.2647, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.5192107995846313, |
|
"grad_norm": 5.062324523925781, |
|
"learning_rate": 8.947368421052632e-06, |
|
"loss": 0.2549, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5192107995846313, |
|
"eval_loss": 0.24746711552143097, |
|
"eval_runtime": 8673.5525, |
|
"eval_samples_per_second": 1.184, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 21.05710077116368, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5278643129110419, |
|
"grad_norm": 3.6110143661499023, |
|
"learning_rate": 8.921052631578949e-06, |
|
"loss": 0.2335, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.5365178262374524, |
|
"grad_norm": 5.8853607177734375, |
|
"learning_rate": 8.894736842105264e-06, |
|
"loss": 0.2516, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5451713395638629, |
|
"grad_norm": 5.245302200317383, |
|
"learning_rate": 8.86842105263158e-06, |
|
"loss": 0.2456, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.5538248528902735, |
|
"grad_norm": 3.9259748458862305, |
|
"learning_rate": 8.842105263157895e-06, |
|
"loss": 0.2426, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.562478366216684, |
|
"grad_norm": 5.401766300201416, |
|
"learning_rate": 8.81578947368421e-06, |
|
"loss": 0.2489, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.5711318795430945, |
|
"grad_norm": 3.4733078479766846, |
|
"learning_rate": 8.789473684210527e-06, |
|
"loss": 0.237, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.579785392869505, |
|
"grad_norm": 5.746425151824951, |
|
"learning_rate": 8.763157894736842e-06, |
|
"loss": 0.262, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.5884389061959155, |
|
"grad_norm": 4.111097812652588, |
|
"learning_rate": 8.736842105263158e-06, |
|
"loss": 0.2559, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5970924195223261, |
|
"grad_norm": 3.773117780685425, |
|
"learning_rate": 8.710526315789475e-06, |
|
"loss": 0.2567, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.6057459328487366, |
|
"grad_norm": 3.213146209716797, |
|
"learning_rate": 8.68421052631579e-06, |
|
"loss": 0.2361, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6143994461751471, |
|
"grad_norm": 3.5634965896606445, |
|
"learning_rate": 8.657894736842107e-06, |
|
"loss": 0.2632, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.6230529595015576, |
|
"grad_norm": 3.3568804264068604, |
|
"learning_rate": 8.631578947368422e-06, |
|
"loss": 0.2278, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6317064728279681, |
|
"grad_norm": 3.8863000869750977, |
|
"learning_rate": 8.605263157894738e-06, |
|
"loss": 0.2336, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.6403599861543787, |
|
"grad_norm": 4.37355899810791, |
|
"learning_rate": 8.578947368421053e-06, |
|
"loss": 0.2435, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6490134994807892, |
|
"grad_norm": 5.477795600891113, |
|
"learning_rate": 8.552631578947368e-06, |
|
"loss": 0.248, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.6576670128071997, |
|
"grad_norm": 5.682942867279053, |
|
"learning_rate": 8.526315789473685e-06, |
|
"loss": 0.2478, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6663205261336103, |
|
"grad_norm": 4.837137222290039, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.226, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.6749740394600208, |
|
"grad_norm": 5.188834190368652, |
|
"learning_rate": 8.473684210526317e-06, |
|
"loss": 0.2294, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6836275527864313, |
|
"grad_norm": 3.51971173286438, |
|
"learning_rate": 8.447368421052632e-06, |
|
"loss": 0.2357, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.6922810661128418, |
|
"grad_norm": 6.168539047241211, |
|
"learning_rate": 8.421052631578948e-06, |
|
"loss": 0.243, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6922810661128418, |
|
"eval_loss": 0.23871001601219177, |
|
"eval_runtime": 8675.6286, |
|
"eval_samples_per_second": 1.184, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 20.804956242959882, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7009345794392523, |
|
"grad_norm": 4.645936965942383, |
|
"learning_rate": 8.394736842105263e-06, |
|
"loss": 0.2265, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.7095880927656628, |
|
"grad_norm": 5.751936435699463, |
|
"learning_rate": 8.36842105263158e-06, |
|
"loss": 0.2491, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.7182416060920734, |
|
"grad_norm": 3.7281875610351562, |
|
"learning_rate": 8.342105263157897e-06, |
|
"loss": 0.2671, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.726895119418484, |
|
"grad_norm": 3.756186008453369, |
|
"learning_rate": 8.315789473684212e-06, |
|
"loss": 0.214, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7355486327448945, |
|
"grad_norm": 4.607492923736572, |
|
"learning_rate": 8.289473684210526e-06, |
|
"loss": 0.251, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.744202146071305, |
|
"grad_norm": 6.176618576049805, |
|
"learning_rate": 8.263157894736843e-06, |
|
"loss": 0.2532, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7528556593977155, |
|
"grad_norm": 5.2198166847229, |
|
"learning_rate": 8.236842105263158e-06, |
|
"loss": 0.2405, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.761509172724126, |
|
"grad_norm": 4.314031600952148, |
|
"learning_rate": 8.210526315789475e-06, |
|
"loss": 0.2287, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7701626860505365, |
|
"grad_norm": 5.143173694610596, |
|
"learning_rate": 8.18421052631579e-06, |
|
"loss": 0.2285, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.778816199376947, |
|
"grad_norm": 7.833088397979736, |
|
"learning_rate": 8.157894736842106e-06, |
|
"loss": 0.2359, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.7874697127033575, |
|
"grad_norm": 4.4802703857421875, |
|
"learning_rate": 8.131578947368421e-06, |
|
"loss": 0.2377, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.796123226029768, |
|
"grad_norm": 4.503852367401123, |
|
"learning_rate": 8.105263157894736e-06, |
|
"loss": 0.2325, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8047767393561787, |
|
"grad_norm": 4.415956020355225, |
|
"learning_rate": 8.078947368421053e-06, |
|
"loss": 0.2438, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.8134302526825892, |
|
"grad_norm": 6.339819431304932, |
|
"learning_rate": 8.052631578947368e-06, |
|
"loss": 0.2479, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.8220837660089997, |
|
"grad_norm": 4.9156813621521, |
|
"learning_rate": 8.026315789473685e-06, |
|
"loss": 0.2195, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.8307372793354102, |
|
"grad_norm": 5.688671112060547, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2199, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8393907926618207, |
|
"grad_norm": 4.447849750518799, |
|
"learning_rate": 7.973684210526316e-06, |
|
"loss": 0.2429, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.8480443059882312, |
|
"grad_norm": 3.792633295059204, |
|
"learning_rate": 7.947368421052633e-06, |
|
"loss": 0.2189, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8566978193146417, |
|
"grad_norm": 4.0045247077941895, |
|
"learning_rate": 7.921052631578948e-06, |
|
"loss": 0.2296, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.8653513326410522, |
|
"grad_norm": 4.449003219604492, |
|
"learning_rate": 7.894736842105265e-06, |
|
"loss": 0.2136, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8653513326410522, |
|
"eval_loss": 0.23298430442810059, |
|
"eval_runtime": 8676.1006, |
|
"eval_samples_per_second": 1.184, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 20.03725846980331, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8740048459674628, |
|
"grad_norm": 4.327373027801514, |
|
"learning_rate": 7.86842105263158e-06, |
|
"loss": 0.227, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.8826583592938733, |
|
"grad_norm": 4.755936145782471, |
|
"learning_rate": 7.842105263157895e-06, |
|
"loss": 0.2291, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8913118726202839, |
|
"grad_norm": 4.75525426864624, |
|
"learning_rate": 7.815789473684211e-06, |
|
"loss": 0.2418, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.8999653859466944, |
|
"grad_norm": 4.342800140380859, |
|
"learning_rate": 7.789473684210526e-06, |
|
"loss": 0.2316, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9086188992731049, |
|
"grad_norm": 4.322353363037109, |
|
"learning_rate": 7.763157894736843e-06, |
|
"loss": 0.2242, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.9172724125995154, |
|
"grad_norm": 4.406942367553711, |
|
"learning_rate": 7.736842105263158e-06, |
|
"loss": 0.2178, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"grad_norm": 5.0642266273498535, |
|
"learning_rate": 7.710526315789474e-06, |
|
"loss": 0.2335, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 4.1676483154296875, |
|
"learning_rate": 7.68421052631579e-06, |
|
"loss": 0.226, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.943232952578747, |
|
"grad_norm": 4.0350022315979, |
|
"learning_rate": 7.657894736842106e-06, |
|
"loss": 0.2388, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.9518864659051575, |
|
"grad_norm": 4.125761032104492, |
|
"learning_rate": 7.631578947368423e-06, |
|
"loss": 0.2356, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.960539979231568, |
|
"grad_norm": 3.9152023792266846, |
|
"learning_rate": 7.605263157894738e-06, |
|
"loss": 0.2089, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.9691934925579785, |
|
"grad_norm": 4.8811821937561035, |
|
"learning_rate": 7.578947368421054e-06, |
|
"loss": 0.2059, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9778470058843891, |
|
"grad_norm": 4.5911712646484375, |
|
"learning_rate": 7.552631578947369e-06, |
|
"loss": 0.2155, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.9865005192107996, |
|
"grad_norm": 4.353863716125488, |
|
"learning_rate": 7.526315789473685e-06, |
|
"loss": 0.2145, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.9951540325372101, |
|
"grad_norm": 5.159242153167725, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.2338, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.0038075458636206, |
|
"grad_norm": 3.813417673110962, |
|
"learning_rate": 7.473684210526316e-06, |
|
"loss": 0.1849, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.0124610591900312, |
|
"grad_norm": 3.838930368423462, |
|
"learning_rate": 7.447368421052632e-06, |
|
"loss": 0.1596, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.0211145725164417, |
|
"grad_norm": 3.80027174949646, |
|
"learning_rate": 7.421052631578948e-06, |
|
"loss": 0.184, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.0297680858428522, |
|
"grad_norm": 3.2930946350097656, |
|
"learning_rate": 7.3947368421052635e-06, |
|
"loss": 0.169, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.0384215991692627, |
|
"grad_norm": 3.8618459701538086, |
|
"learning_rate": 7.368421052631579e-06, |
|
"loss": 0.1664, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0384215991692627, |
|
"eval_loss": 0.22383837401866913, |
|
"eval_runtime": 8630.3289, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 18.63530023394853, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0470751124956732, |
|
"grad_norm": 4.719282627105713, |
|
"learning_rate": 7.342105263157895e-06, |
|
"loss": 0.1675, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.0557286258220837, |
|
"grad_norm": 2.7439825534820557, |
|
"learning_rate": 7.315789473684212e-06, |
|
"loss": 0.1656, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.0643821391484942, |
|
"grad_norm": 4.707197189331055, |
|
"learning_rate": 7.289473684210528e-06, |
|
"loss": 0.1743, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.0730356524749047, |
|
"grad_norm": 3.8877105712890625, |
|
"learning_rate": 7.263157894736843e-06, |
|
"loss": 0.1706, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.0816891658013152, |
|
"grad_norm": 3.034952402114868, |
|
"learning_rate": 7.236842105263158e-06, |
|
"loss": 0.1575, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.0903426791277258, |
|
"grad_norm": 3.1362013816833496, |
|
"learning_rate": 7.210526315789474e-06, |
|
"loss": 0.1624, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.0989961924541363, |
|
"grad_norm": 3.822435140609741, |
|
"learning_rate": 7.18421052631579e-06, |
|
"loss": 0.1575, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.107649705780547, |
|
"grad_norm": 3.342021942138672, |
|
"learning_rate": 7.157894736842106e-06, |
|
"loss": 0.1456, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.1163032191069575, |
|
"grad_norm": 2.8061094284057617, |
|
"learning_rate": 7.131578947368422e-06, |
|
"loss": 0.1573, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.124956732433368, |
|
"grad_norm": 4.738641738891602, |
|
"learning_rate": 7.1052631578947375e-06, |
|
"loss": 0.1753, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.1336102457597785, |
|
"grad_norm": 2.7924444675445557, |
|
"learning_rate": 7.078947368421053e-06, |
|
"loss": 0.1542, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.142263759086189, |
|
"grad_norm": 3.8055057525634766, |
|
"learning_rate": 7.052631578947369e-06, |
|
"loss": 0.1683, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.1509172724125996, |
|
"grad_norm": 2.7615177631378174, |
|
"learning_rate": 7.026315789473684e-06, |
|
"loss": 0.1607, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.15957078573901, |
|
"grad_norm": 3.5338289737701416, |
|
"learning_rate": 7e-06, |
|
"loss": 0.1818, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.1682242990654206, |
|
"grad_norm": 4.972025394439697, |
|
"learning_rate": 6.973684210526316e-06, |
|
"loss": 0.1683, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.176877812391831, |
|
"grad_norm": 2.7351698875427246, |
|
"learning_rate": 6.947368421052632e-06, |
|
"loss": 0.163, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.1855313257182416, |
|
"grad_norm": 2.600933074951172, |
|
"learning_rate": 6.921052631578948e-06, |
|
"loss": 0.1639, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.1941848390446521, |
|
"grad_norm": 3.196901798248291, |
|
"learning_rate": 6.894736842105264e-06, |
|
"loss": 0.1689, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.2028383523710626, |
|
"grad_norm": 4.408321380615234, |
|
"learning_rate": 6.86842105263158e-06, |
|
"loss": 0.1853, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.2114918656974731, |
|
"grad_norm": 3.1869866847991943, |
|
"learning_rate": 6.842105263157896e-06, |
|
"loss": 0.1781, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2114918656974731, |
|
"eval_loss": 0.22067983448505402, |
|
"eval_runtime": 8630.1006, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 18.572913958929036, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2201453790238836, |
|
"grad_norm": 3.745699882507324, |
|
"learning_rate": 6.8157894736842115e-06, |
|
"loss": 0.1685, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.2287988923502942, |
|
"grad_norm": 4.755461692810059, |
|
"learning_rate": 6.789473684210527e-06, |
|
"loss": 0.1653, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.2374524056767047, |
|
"grad_norm": 2.958872079849243, |
|
"learning_rate": 6.763157894736842e-06, |
|
"loss": 0.1657, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.2461059190031152, |
|
"grad_norm": 3.780946969985962, |
|
"learning_rate": 6.736842105263158e-06, |
|
"loss": 0.1818, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.254759432329526, |
|
"grad_norm": 3.9823403358459473, |
|
"learning_rate": 6.710526315789474e-06, |
|
"loss": 0.1705, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.2634129456559364, |
|
"grad_norm": 3.881185531616211, |
|
"learning_rate": 6.68421052631579e-06, |
|
"loss": 0.1688, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.272066458982347, |
|
"grad_norm": 3.2562785148620605, |
|
"learning_rate": 6.6578947368421055e-06, |
|
"loss": 0.1597, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.2807199723087574, |
|
"grad_norm": 4.002935886383057, |
|
"learning_rate": 6.631578947368421e-06, |
|
"loss": 0.1653, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.289373485635168, |
|
"grad_norm": 3.866936206817627, |
|
"learning_rate": 6.605263157894738e-06, |
|
"loss": 0.1687, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.2980269989615785, |
|
"grad_norm": 4.491256237030029, |
|
"learning_rate": 6.578947368421054e-06, |
|
"loss": 0.184, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.306680512287989, |
|
"grad_norm": 2.8679704666137695, |
|
"learning_rate": 6.55263157894737e-06, |
|
"loss": 0.1761, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.3153340256143995, |
|
"grad_norm": 3.8533244132995605, |
|
"learning_rate": 6.526315789473685e-06, |
|
"loss": 0.1612, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.32398753894081, |
|
"grad_norm": 3.4180614948272705, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.1668, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.3326410522672205, |
|
"grad_norm": 3.1745965480804443, |
|
"learning_rate": 6.473684210526316e-06, |
|
"loss": 0.1571, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.341294565593631, |
|
"grad_norm": 3.310295343399048, |
|
"learning_rate": 6.447368421052632e-06, |
|
"loss": 0.1625, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.3499480789200415, |
|
"grad_norm": 3.5954184532165527, |
|
"learning_rate": 6.421052631578948e-06, |
|
"loss": 0.158, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.358601592246452, |
|
"grad_norm": 2.868551731109619, |
|
"learning_rate": 6.394736842105264e-06, |
|
"loss": 0.17, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.3672551055728626, |
|
"grad_norm": 2.9729490280151367, |
|
"learning_rate": 6.3684210526315795e-06, |
|
"loss": 0.1511, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.375908618899273, |
|
"grad_norm": 2.286844253540039, |
|
"learning_rate": 6.342105263157895e-06, |
|
"loss": 0.1648, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.3845621322256836, |
|
"grad_norm": 3.9818239212036133, |
|
"learning_rate": 6.31578947368421e-06, |
|
"loss": 0.1664, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.3845621322256836, |
|
"eval_loss": 0.21563765406608582, |
|
"eval_runtime": 8678.9976, |
|
"eval_samples_per_second": 1.184, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 18.037431765011696, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.393215645552094, |
|
"grad_norm": 3.8452024459838867, |
|
"learning_rate": 6.289473684210526e-06, |
|
"loss": 0.1642, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.4018691588785046, |
|
"grad_norm": 3.381753444671631, |
|
"learning_rate": 6.263157894736842e-06, |
|
"loss": 0.1671, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.4105226722049151, |
|
"grad_norm": 3.9922471046447754, |
|
"learning_rate": 6.236842105263159e-06, |
|
"loss": 0.1721, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.4191761855313256, |
|
"grad_norm": 3.2609457969665527, |
|
"learning_rate": 6.2105263157894745e-06, |
|
"loss": 0.1832, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.4278296988577361, |
|
"grad_norm": 3.5233139991760254, |
|
"learning_rate": 6.18421052631579e-06, |
|
"loss": 0.1734, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.4364832121841467, |
|
"grad_norm": 4.901401519775391, |
|
"learning_rate": 6.157894736842106e-06, |
|
"loss": 0.181, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.4451367255105572, |
|
"grad_norm": 2.4299676418304443, |
|
"learning_rate": 6.131578947368422e-06, |
|
"loss": 0.1538, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.4537902388369677, |
|
"grad_norm": 4.308781623840332, |
|
"learning_rate": 6.105263157894738e-06, |
|
"loss": 0.1501, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.4624437521633784, |
|
"grad_norm": 4.0135498046875, |
|
"learning_rate": 6.0789473684210535e-06, |
|
"loss": 0.1735, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.471097265489789, |
|
"grad_norm": 3.9877755641937256, |
|
"learning_rate": 6.0526315789473685e-06, |
|
"loss": 0.1717, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.4797507788161994, |
|
"grad_norm": 3.184150218963623, |
|
"learning_rate": 6.026315789473684e-06, |
|
"loss": 0.1571, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.48840429214261, |
|
"grad_norm": 3.2754974365234375, |
|
"learning_rate": 6e-06, |
|
"loss": 0.1618, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.4970578054690205, |
|
"grad_norm": 3.145984411239624, |
|
"learning_rate": 5.973684210526316e-06, |
|
"loss": 0.1637, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.505711318795431, |
|
"grad_norm": 4.307953834533691, |
|
"learning_rate": 5.947368421052632e-06, |
|
"loss": 0.1568, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.5143648321218415, |
|
"grad_norm": 2.7052788734436035, |
|
"learning_rate": 5.921052631578948e-06, |
|
"loss": 0.1573, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.523018345448252, |
|
"grad_norm": 4.613982677459717, |
|
"learning_rate": 5.8947368421052634e-06, |
|
"loss": 0.1531, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.5316718587746625, |
|
"grad_norm": 3.401477813720703, |
|
"learning_rate": 5.86842105263158e-06, |
|
"loss": 0.167, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.540325372101073, |
|
"grad_norm": 4.301424503326416, |
|
"learning_rate": 5.842105263157896e-06, |
|
"loss": 0.168, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.5489788854274835, |
|
"grad_norm": 4.266972541809082, |
|
"learning_rate": 5.815789473684212e-06, |
|
"loss": 0.1589, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.557632398753894, |
|
"grad_norm": 3.3040754795074463, |
|
"learning_rate": 5.789473684210527e-06, |
|
"loss": 0.1659, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.557632398753894, |
|
"eval_loss": 0.21191351115703583, |
|
"eval_runtime": 8655.7858, |
|
"eval_samples_per_second": 1.187, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 18.74360973919071, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5662859120803048, |
|
"grad_norm": 4.775163173675537, |
|
"learning_rate": 5.7631578947368425e-06, |
|
"loss": 0.1749, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.5749394254067153, |
|
"grad_norm": 3.8686747550964355, |
|
"learning_rate": 5.736842105263158e-06, |
|
"loss": 0.1656, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.5835929387331258, |
|
"grad_norm": 3.2979884147644043, |
|
"learning_rate": 5.710526315789474e-06, |
|
"loss": 0.1652, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.5922464520595363, |
|
"grad_norm": 3.271785259246826, |
|
"learning_rate": 5.68421052631579e-06, |
|
"loss": 0.1611, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.6008999653859468, |
|
"grad_norm": 4.323774814605713, |
|
"learning_rate": 5.657894736842106e-06, |
|
"loss": 0.1713, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.6095534787123573, |
|
"grad_norm": 2.925485134124756, |
|
"learning_rate": 5.631578947368422e-06, |
|
"loss": 0.1634, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.6182069920387678, |
|
"grad_norm": 3.3976783752441406, |
|
"learning_rate": 5.605263157894737e-06, |
|
"loss": 0.1761, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.6268605053651783, |
|
"grad_norm": 3.1800551414489746, |
|
"learning_rate": 5.578947368421052e-06, |
|
"loss": 0.1522, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.6355140186915889, |
|
"grad_norm": 3.392937660217285, |
|
"learning_rate": 5.552631578947368e-06, |
|
"loss": 0.1604, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.6441675320179994, |
|
"grad_norm": 3.9035747051239014, |
|
"learning_rate": 5.526315789473685e-06, |
|
"loss": 0.1922, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.6528210453444099, |
|
"grad_norm": 3.81205415725708, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.1786, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.6614745586708204, |
|
"grad_norm": 3.3572874069213867, |
|
"learning_rate": 5.4736842105263165e-06, |
|
"loss": 0.169, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.670128071997231, |
|
"grad_norm": 3.0381922721862793, |
|
"learning_rate": 5.447368421052632e-06, |
|
"loss": 0.1753, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.6787815853236414, |
|
"grad_norm": 3.7208361625671387, |
|
"learning_rate": 5.421052631578948e-06, |
|
"loss": 0.1587, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.687435098650052, |
|
"grad_norm": 6.452873229980469, |
|
"learning_rate": 5.394736842105264e-06, |
|
"loss": 0.1559, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.6960886119764624, |
|
"grad_norm": 3.532186269760132, |
|
"learning_rate": 5.36842105263158e-06, |
|
"loss": 0.1587, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.704742125302873, |
|
"grad_norm": 3.6204092502593994, |
|
"learning_rate": 5.342105263157895e-06, |
|
"loss": 0.1638, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.7133956386292835, |
|
"grad_norm": 3.3600478172302246, |
|
"learning_rate": 5.315789473684211e-06, |
|
"loss": 0.1657, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.722049151955694, |
|
"grad_norm": 3.8117873668670654, |
|
"learning_rate": 5.289473684210526e-06, |
|
"loss": 0.1533, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.7307026652821045, |
|
"grad_norm": 4.345729827880859, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 0.1611, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.7307026652821045, |
|
"eval_loss": 0.20883877575397491, |
|
"eval_runtime": 8644.4172, |
|
"eval_samples_per_second": 1.188, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 17.726366865956155, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.739356178608515, |
|
"grad_norm": 2.709228515625, |
|
"learning_rate": 5.236842105263158e-06, |
|
"loss": 0.1723, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.7480096919349255, |
|
"grad_norm": 4.446653366088867, |
|
"learning_rate": 5.210526315789474e-06, |
|
"loss": 0.1606, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.756663205261336, |
|
"grad_norm": 4.571587562561035, |
|
"learning_rate": 5.18421052631579e-06, |
|
"loss": 0.1628, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.7653167185877465, |
|
"grad_norm": 3.951996088027954, |
|
"learning_rate": 5.157894736842106e-06, |
|
"loss": 0.1532, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.773970231914157, |
|
"grad_norm": 3.2565793991088867, |
|
"learning_rate": 5.131578947368422e-06, |
|
"loss": 0.1599, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.7826237452405675, |
|
"grad_norm": 2.625930070877075, |
|
"learning_rate": 5.105263157894738e-06, |
|
"loss": 0.1606, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.791277258566978, |
|
"grad_norm": 3.5779178142547607, |
|
"learning_rate": 5.078947368421053e-06, |
|
"loss": 0.1683, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.7999307718933886, |
|
"grad_norm": 3.518836736679077, |
|
"learning_rate": 5.052631578947369e-06, |
|
"loss": 0.1575, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.808584285219799, |
|
"grad_norm": 2.62227725982666, |
|
"learning_rate": 5.026315789473685e-06, |
|
"loss": 0.1549, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 1.8172377985462098, |
|
"grad_norm": 3.5382871627807617, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1566, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.8258913118726203, |
|
"grad_norm": 4.410214900970459, |
|
"learning_rate": 4.973684210526316e-06, |
|
"loss": 0.1529, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 1.8345448251990308, |
|
"grad_norm": 3.1463205814361572, |
|
"learning_rate": 4.947368421052632e-06, |
|
"loss": 0.1551, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.8431983385254413, |
|
"grad_norm": 2.4352145195007324, |
|
"learning_rate": 4.921052631578948e-06, |
|
"loss": 0.1624, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"grad_norm": 3.8748574256896973, |
|
"learning_rate": 4.894736842105264e-06, |
|
"loss": 0.1619, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.8605053651782624, |
|
"grad_norm": 2.8592870235443115, |
|
"learning_rate": 4.8684210526315795e-06, |
|
"loss": 0.1709, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"grad_norm": 3.5654568672180176, |
|
"learning_rate": 4.842105263157895e-06, |
|
"loss": 0.1568, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.8778123918310834, |
|
"grad_norm": 3.1443722248077393, |
|
"learning_rate": 4.815789473684211e-06, |
|
"loss": 0.1546, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 1.886465905157494, |
|
"grad_norm": 2.727612018585205, |
|
"learning_rate": 4.789473684210527e-06, |
|
"loss": 0.1502, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.8951194184839044, |
|
"grad_norm": 3.5027356147766113, |
|
"learning_rate": 4.763157894736842e-06, |
|
"loss": 0.1545, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 1.9037729318103151, |
|
"grad_norm": 3.154855966567993, |
|
"learning_rate": 4.736842105263158e-06, |
|
"loss": 0.1424, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.9037729318103151, |
|
"eval_loss": 0.20275835692882538, |
|
"eval_runtime": 8641.5231, |
|
"eval_samples_per_second": 1.189, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 17.243739710597, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.9124264451367257, |
|
"grad_norm": 2.7067971229553223, |
|
"learning_rate": 4.710526315789474e-06, |
|
"loss": 0.1599, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 1.9210799584631362, |
|
"grad_norm": 3.4274163246154785, |
|
"learning_rate": 4.68421052631579e-06, |
|
"loss": 0.1596, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.9297334717895467, |
|
"grad_norm": 3.3891353607177734, |
|
"learning_rate": 4.657894736842106e-06, |
|
"loss": 0.1836, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 1.9383869851159572, |
|
"grad_norm": 3.259261131286621, |
|
"learning_rate": 4.631578947368421e-06, |
|
"loss": 0.1574, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.9470404984423677, |
|
"grad_norm": 4.355072021484375, |
|
"learning_rate": 4.605263157894737e-06, |
|
"loss": 0.151, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 1.9556940117687782, |
|
"grad_norm": 4.160757064819336, |
|
"learning_rate": 4.578947368421053e-06, |
|
"loss": 0.153, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.9643475250951887, |
|
"grad_norm": 2.7162065505981445, |
|
"learning_rate": 4.552631578947369e-06, |
|
"loss": 0.1504, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 1.9730010384215992, |
|
"grad_norm": 3.1264755725860596, |
|
"learning_rate": 4.526315789473685e-06, |
|
"loss": 0.1503, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.9816545517480098, |
|
"grad_norm": 3.2158703804016113, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.1629, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 1.9903080650744203, |
|
"grad_norm": 3.41349196434021, |
|
"learning_rate": 4.473684210526316e-06, |
|
"loss": 0.1628, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.9989615784008308, |
|
"grad_norm": 2.600003957748413, |
|
"learning_rate": 4.447368421052632e-06, |
|
"loss": 0.153, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 2.0076150917272413, |
|
"grad_norm": 2.955773115158081, |
|
"learning_rate": 4.4210526315789476e-06, |
|
"loss": 0.1056, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.016268605053652, |
|
"grad_norm": 3.6034035682678223, |
|
"learning_rate": 4.394736842105263e-06, |
|
"loss": 0.1, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 2.0249221183800623, |
|
"grad_norm": 2.37636137008667, |
|
"learning_rate": 4.368421052631579e-06, |
|
"loss": 0.1042, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.033575631706473, |
|
"grad_norm": 2.6915884017944336, |
|
"learning_rate": 4.342105263157895e-06, |
|
"loss": 0.1162, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.0422291450328833, |
|
"grad_norm": 2.495497226715088, |
|
"learning_rate": 4.315789473684211e-06, |
|
"loss": 0.1097, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.050882658359294, |
|
"grad_norm": 3.1484713554382324, |
|
"learning_rate": 4.289473684210527e-06, |
|
"loss": 0.1183, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 2.0595361716857044, |
|
"grad_norm": 2.547849416732788, |
|
"learning_rate": 4.2631578947368425e-06, |
|
"loss": 0.1205, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.068189685012115, |
|
"grad_norm": 2.342745304107666, |
|
"learning_rate": 4.236842105263158e-06, |
|
"loss": 0.1117, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 2.0768431983385254, |
|
"grad_norm": 2.926923990249634, |
|
"learning_rate": 4.210526315789474e-06, |
|
"loss": 0.1101, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.0768431983385254, |
|
"eval_loss": 0.20616546273231506, |
|
"eval_runtime": 8634.4104, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 16.82523178234122, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.085496711664936, |
|
"grad_norm": 2.461634635925293, |
|
"learning_rate": 4.18421052631579e-06, |
|
"loss": 0.1108, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 2.0941502249913464, |
|
"grad_norm": 1.6099869012832642, |
|
"learning_rate": 4.157894736842106e-06, |
|
"loss": 0.1091, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.102803738317757, |
|
"grad_norm": 2.497805595397949, |
|
"learning_rate": 4.1315789473684216e-06, |
|
"loss": 0.1054, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 2.1114572516441674, |
|
"grad_norm": 2.440737009048462, |
|
"learning_rate": 4.105263157894737e-06, |
|
"loss": 0.1143, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.120110764970578, |
|
"grad_norm": 2.547050714492798, |
|
"learning_rate": 4.078947368421053e-06, |
|
"loss": 0.1051, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.1287642782969884, |
|
"grad_norm": 2.2565364837646484, |
|
"learning_rate": 4.052631578947368e-06, |
|
"loss": 0.1079, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.137417791623399, |
|
"grad_norm": 3.4482452869415283, |
|
"learning_rate": 4.026315789473684e-06, |
|
"loss": 0.107, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 2.1460713049498095, |
|
"grad_norm": 1.6255193948745728, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.1124, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.15472481827622, |
|
"grad_norm": 2.6273090839385986, |
|
"learning_rate": 3.9736842105263165e-06, |
|
"loss": 0.1012, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 2.1633783316026305, |
|
"grad_norm": 4.822213649749756, |
|
"learning_rate": 3.947368421052632e-06, |
|
"loss": 0.106, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.172031844929041, |
|
"grad_norm": 3.0468506813049316, |
|
"learning_rate": 3.921052631578947e-06, |
|
"loss": 0.1343, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 2.1806853582554515, |
|
"grad_norm": 3.5357604026794434, |
|
"learning_rate": 3.894736842105263e-06, |
|
"loss": 0.1066, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.189338871581862, |
|
"grad_norm": 2.8175506591796875, |
|
"learning_rate": 3.868421052631579e-06, |
|
"loss": 0.1102, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 2.1979923849082725, |
|
"grad_norm": 3.171792984008789, |
|
"learning_rate": 3.842105263157895e-06, |
|
"loss": 0.1081, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.2066458982346835, |
|
"grad_norm": 2.2714669704437256, |
|
"learning_rate": 3.815789473684211e-06, |
|
"loss": 0.1077, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.215299411561094, |
|
"grad_norm": 4.731479644775391, |
|
"learning_rate": 3.789473684210527e-06, |
|
"loss": 0.1055, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.2239529248875045, |
|
"grad_norm": 2.8998143672943115, |
|
"learning_rate": 3.7631578947368426e-06, |
|
"loss": 0.1189, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 2.232606438213915, |
|
"grad_norm": 2.2706921100616455, |
|
"learning_rate": 3.736842105263158e-06, |
|
"loss": 0.1134, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.2412599515403255, |
|
"grad_norm": 3.229358196258545, |
|
"learning_rate": 3.710526315789474e-06, |
|
"loss": 0.1154, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 2.249913464866736, |
|
"grad_norm": 2.179197072982788, |
|
"learning_rate": 3.6842105263157896e-06, |
|
"loss": 0.0966, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.249913464866736, |
|
"eval_loss": 0.20438149571418762, |
|
"eval_runtime": 8615.3588, |
|
"eval_samples_per_second": 1.192, |
|
"eval_steps_per_second": 0.075, |
|
"eval_wer": 16.619876960402046, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.2585669781931466, |
|
"grad_norm": 2.860914707183838, |
|
"learning_rate": 3.657894736842106e-06, |
|
"loss": 0.1083, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 2.267220491519557, |
|
"grad_norm": 3.0490429401397705, |
|
"learning_rate": 3.6315789473684217e-06, |
|
"loss": 0.1068, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.2758740048459676, |
|
"grad_norm": 3.8441545963287354, |
|
"learning_rate": 3.605263157894737e-06, |
|
"loss": 0.1125, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 2.284527518172378, |
|
"grad_norm": 2.9149553775787354, |
|
"learning_rate": 3.578947368421053e-06, |
|
"loss": 0.1081, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.2931810314987886, |
|
"grad_norm": 3.6281797885894775, |
|
"learning_rate": 3.5526315789473687e-06, |
|
"loss": 0.1116, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.301834544825199, |
|
"grad_norm": 2.624938488006592, |
|
"learning_rate": 3.5263157894736846e-06, |
|
"loss": 0.1062, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.3104880581516096, |
|
"grad_norm": 3.465491533279419, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.128, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.31914157147802, |
|
"grad_norm": 3.024850606918335, |
|
"learning_rate": 3.473684210526316e-06, |
|
"loss": 0.1062, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.3277950848044306, |
|
"grad_norm": 3.088701009750366, |
|
"learning_rate": 3.447368421052632e-06, |
|
"loss": 0.1155, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 2.336448598130841, |
|
"grad_norm": 2.8708367347717285, |
|
"learning_rate": 3.421052631578948e-06, |
|
"loss": 0.1079, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.3451021114572517, |
|
"grad_norm": 1.946626901626587, |
|
"learning_rate": 3.3947368421052636e-06, |
|
"loss": 0.1072, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 2.353755624783662, |
|
"grad_norm": 3.3041462898254395, |
|
"learning_rate": 3.368421052631579e-06, |
|
"loss": 0.1104, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.3624091381100727, |
|
"grad_norm": 2.6233861446380615, |
|
"learning_rate": 3.342105263157895e-06, |
|
"loss": 0.1075, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 2.371062651436483, |
|
"grad_norm": 2.8356857299804688, |
|
"learning_rate": 3.3157894736842107e-06, |
|
"loss": 0.1058, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.3797161647628937, |
|
"grad_norm": 2.9162681102752686, |
|
"learning_rate": 3.289473684210527e-06, |
|
"loss": 0.122, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 2.3883696780893042, |
|
"grad_norm": 3.749187707901001, |
|
"learning_rate": 3.2631578947368423e-06, |
|
"loss": 0.1136, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.3970231914157147, |
|
"grad_norm": 2.6137099266052246, |
|
"learning_rate": 3.236842105263158e-06, |
|
"loss": 0.1087, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 2.4056767047421252, |
|
"grad_norm": 2.7214744091033936, |
|
"learning_rate": 3.210526315789474e-06, |
|
"loss": 0.103, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.4143302180685358, |
|
"grad_norm": 2.98718523979187, |
|
"learning_rate": 3.1842105263157898e-06, |
|
"loss": 0.1125, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 2.4229837313949463, |
|
"grad_norm": 2.834648609161377, |
|
"learning_rate": 3.157894736842105e-06, |
|
"loss": 0.1129, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.4229837313949463, |
|
"eval_loss": 0.20139536261558533, |
|
"eval_runtime": 8648.9738, |
|
"eval_samples_per_second": 1.188, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 17.110302400138636, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.431637244721357, |
|
"grad_norm": 3.4440181255340576, |
|
"learning_rate": 3.131578947368421e-06, |
|
"loss": 0.1141, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 2.4402907580477673, |
|
"grad_norm": 3.478074550628662, |
|
"learning_rate": 3.1052631578947372e-06, |
|
"loss": 0.1031, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.448944271374178, |
|
"grad_norm": 2.797724485397339, |
|
"learning_rate": 3.078947368421053e-06, |
|
"loss": 0.1094, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 2.4575977847005883, |
|
"grad_norm": 1.8929002285003662, |
|
"learning_rate": 3.052631578947369e-06, |
|
"loss": 0.1097, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.466251298026999, |
|
"grad_norm": 3.516230583190918, |
|
"learning_rate": 3.0263157894736843e-06, |
|
"loss": 0.1167, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 2.4749048113534093, |
|
"grad_norm": 3.7443478107452393, |
|
"learning_rate": 3e-06, |
|
"loss": 0.1037, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.48355832467982, |
|
"grad_norm": 2.543609142303467, |
|
"learning_rate": 2.973684210526316e-06, |
|
"loss": 0.1074, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 2.4922118380062304, |
|
"grad_norm": 3.233546495437622, |
|
"learning_rate": 2.9473684210526317e-06, |
|
"loss": 0.1097, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.5008653513326413, |
|
"grad_norm": 2.6485321521759033, |
|
"learning_rate": 2.921052631578948e-06, |
|
"loss": 0.1166, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 2.509518864659052, |
|
"grad_norm": 2.249458074569702, |
|
"learning_rate": 2.8947368421052634e-06, |
|
"loss": 0.1122, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.5181723779854623, |
|
"grad_norm": 3.3715906143188477, |
|
"learning_rate": 2.868421052631579e-06, |
|
"loss": 0.1171, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 2.526825891311873, |
|
"grad_norm": 2.5565547943115234, |
|
"learning_rate": 2.842105263157895e-06, |
|
"loss": 0.1081, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.5354794046382834, |
|
"grad_norm": 3.1583316326141357, |
|
"learning_rate": 2.815789473684211e-06, |
|
"loss": 0.1048, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 2.544132917964694, |
|
"grad_norm": 3.302534580230713, |
|
"learning_rate": 2.789473684210526e-06, |
|
"loss": 0.1184, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.5527864312911044, |
|
"grad_norm": 3.553318738937378, |
|
"learning_rate": 2.7631578947368424e-06, |
|
"loss": 0.1151, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 2.561439944617515, |
|
"grad_norm": 2.6962010860443115, |
|
"learning_rate": 2.7368421052631583e-06, |
|
"loss": 0.1201, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.5700934579439254, |
|
"grad_norm": 2.545358657836914, |
|
"learning_rate": 2.710526315789474e-06, |
|
"loss": 0.1273, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 2.578746971270336, |
|
"grad_norm": 2.1197948455810547, |
|
"learning_rate": 2.68421052631579e-06, |
|
"loss": 0.0947, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.5874004845967464, |
|
"grad_norm": 1.732006311416626, |
|
"learning_rate": 2.6578947368421053e-06, |
|
"loss": 0.1079, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 2.596053997923157, |
|
"grad_norm": 2.9386560916900635, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 0.1065, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.596053997923157, |
|
"eval_loss": 0.1983751654624939, |
|
"eval_runtime": 8607.5634, |
|
"eval_samples_per_second": 1.193, |
|
"eval_steps_per_second": 0.075, |
|
"eval_wer": 16.488172602027554, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.6047075112495675, |
|
"grad_norm": 3.1988844871520996, |
|
"learning_rate": 2.605263157894737e-06, |
|
"loss": 0.1065, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 2.613361024575978, |
|
"grad_norm": 2.8446412086486816, |
|
"learning_rate": 2.578947368421053e-06, |
|
"loss": 0.1175, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.6220145379023885, |
|
"grad_norm": 3.071406364440918, |
|
"learning_rate": 2.552631578947369e-06, |
|
"loss": 0.0999, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 2.630668051228799, |
|
"grad_norm": 2.666354179382324, |
|
"learning_rate": 2.5263157894736844e-06, |
|
"loss": 0.1036, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.6393215645552095, |
|
"grad_norm": 2.845916271209717, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1033, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 2.64797507788162, |
|
"grad_norm": 2.97814679145813, |
|
"learning_rate": 2.473684210526316e-06, |
|
"loss": 0.1025, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.6566285912080305, |
|
"grad_norm": 2.5824403762817383, |
|
"learning_rate": 2.447368421052632e-06, |
|
"loss": 0.1048, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 2.665282104534441, |
|
"grad_norm": 2.1139883995056152, |
|
"learning_rate": 2.4210526315789477e-06, |
|
"loss": 0.1047, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.6739356178608515, |
|
"grad_norm": 2.800978183746338, |
|
"learning_rate": 2.3947368421052635e-06, |
|
"loss": 0.1184, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 2.682589131187262, |
|
"grad_norm": 3.0786638259887695, |
|
"learning_rate": 2.368421052631579e-06, |
|
"loss": 0.1286, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.6912426445136726, |
|
"grad_norm": 2.917689085006714, |
|
"learning_rate": 2.342105263157895e-06, |
|
"loss": 0.0988, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 2.699896157840083, |
|
"grad_norm": 2.986503839492798, |
|
"learning_rate": 2.3157894736842105e-06, |
|
"loss": 0.1074, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.7085496711664936, |
|
"grad_norm": 2.5001847743988037, |
|
"learning_rate": 2.2894736842105263e-06, |
|
"loss": 0.1058, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 2.717203184492904, |
|
"grad_norm": 3.5014684200286865, |
|
"learning_rate": 2.2631578947368426e-06, |
|
"loss": 0.1094, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.7258566978193146, |
|
"grad_norm": 3.0983262062072754, |
|
"learning_rate": 2.236842105263158e-06, |
|
"loss": 0.1079, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 2.734510211145725, |
|
"grad_norm": 2.922757625579834, |
|
"learning_rate": 2.2105263157894738e-06, |
|
"loss": 0.1135, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.7431637244721356, |
|
"grad_norm": 2.354649305343628, |
|
"learning_rate": 2.1842105263157896e-06, |
|
"loss": 0.1145, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 2.751817237798546, |
|
"grad_norm": 3.7237930297851562, |
|
"learning_rate": 2.1578947368421054e-06, |
|
"loss": 0.098, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.7604707511249567, |
|
"grad_norm": 3.361809492111206, |
|
"learning_rate": 2.1315789473684212e-06, |
|
"loss": 0.1108, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 2.769124264451367, |
|
"grad_norm": 2.6860949993133545, |
|
"learning_rate": 2.105263157894737e-06, |
|
"loss": 0.1075, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.769124264451367, |
|
"eval_loss": 0.19574593007564545, |
|
"eval_runtime": 8625.7946, |
|
"eval_samples_per_second": 1.191, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 16.583484966640672, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 2.503368616104126, |
|
"learning_rate": 2.078947368421053e-06, |
|
"loss": 0.0999, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 2.786431291104188, |
|
"grad_norm": 2.762155055999756, |
|
"learning_rate": 2.0526315789473687e-06, |
|
"loss": 0.1133, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.7950848044305987, |
|
"grad_norm": 3.162900686264038, |
|
"learning_rate": 2.026315789473684e-06, |
|
"loss": 0.1208, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 2.803738317757009, |
|
"grad_norm": 2.3575284481048584, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.1011, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.8123918310834197, |
|
"grad_norm": 3.4756760597229004, |
|
"learning_rate": 1.973684210526316e-06, |
|
"loss": 0.1095, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 2.8210453444098302, |
|
"grad_norm": 2.538372039794922, |
|
"learning_rate": 1.9473684210526315e-06, |
|
"loss": 0.1069, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.8296988577362407, |
|
"grad_norm": 2.2625138759613037, |
|
"learning_rate": 1.9210526315789474e-06, |
|
"loss": 0.1065, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 2.8383523710626513, |
|
"grad_norm": 2.7284586429595947, |
|
"learning_rate": 1.8947368421052634e-06, |
|
"loss": 0.1105, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.8470058843890618, |
|
"grad_norm": 2.6115376949310303, |
|
"learning_rate": 1.868421052631579e-06, |
|
"loss": 0.1035, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 2.8556593977154723, |
|
"grad_norm": 2.6199817657470703, |
|
"learning_rate": 1.8421052631578948e-06, |
|
"loss": 0.1224, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.864312911041883, |
|
"grad_norm": 3.060654640197754, |
|
"learning_rate": 1.8157894736842109e-06, |
|
"loss": 0.1027, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 2.8729664243682933, |
|
"grad_norm": 3.6875500679016113, |
|
"learning_rate": 1.7894736842105265e-06, |
|
"loss": 0.0934, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.881619937694704, |
|
"grad_norm": 2.232487440109253, |
|
"learning_rate": 1.7631578947368423e-06, |
|
"loss": 0.0972, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 2.8902734510211143, |
|
"grad_norm": 3.0473804473876953, |
|
"learning_rate": 1.736842105263158e-06, |
|
"loss": 0.1013, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 2.898926964347525, |
|
"grad_norm": 3.049717903137207, |
|
"learning_rate": 1.710526315789474e-06, |
|
"loss": 0.1024, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 2.9075804776739353, |
|
"grad_norm": 3.389495849609375, |
|
"learning_rate": 1.6842105263157895e-06, |
|
"loss": 0.1114, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.9162339910003463, |
|
"grad_norm": 2.7483088970184326, |
|
"learning_rate": 1.6578947368421053e-06, |
|
"loss": 0.1103, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 2.924887504326757, |
|
"grad_norm": 2.98256778717041, |
|
"learning_rate": 1.6315789473684212e-06, |
|
"loss": 0.1131, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.9335410176531673, |
|
"grad_norm": 3.0447702407836914, |
|
"learning_rate": 1.605263157894737e-06, |
|
"loss": 0.1031, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 2.942194530979578, |
|
"grad_norm": 2.4080259799957275, |
|
"learning_rate": 1.5789473684210526e-06, |
|
"loss": 0.0992, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.942194530979578, |
|
"eval_loss": 0.19420863687992096, |
|
"eval_runtime": 8629.0493, |
|
"eval_samples_per_second": 1.19, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 16.375530716575685, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.9508480443059883, |
|
"grad_norm": 2.4957826137542725, |
|
"learning_rate": 1.5526315789473686e-06, |
|
"loss": 0.0983, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 2.959501557632399, |
|
"grad_norm": 2.019061326980591, |
|
"learning_rate": 1.5263157894736844e-06, |
|
"loss": 0.099, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.9681550709588094, |
|
"grad_norm": 3.2875280380249023, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.1051, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 2.97680858428522, |
|
"grad_norm": 2.705897092819214, |
|
"learning_rate": 1.4736842105263159e-06, |
|
"loss": 0.1033, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.9854620976116304, |
|
"grad_norm": 2.27734375, |
|
"learning_rate": 1.4473684210526317e-06, |
|
"loss": 0.1075, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 2.994115610938041, |
|
"grad_norm": 3.100257635116577, |
|
"learning_rate": 1.4210526315789475e-06, |
|
"loss": 0.11, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 3.0027691242644514, |
|
"grad_norm": 2.2938201427459717, |
|
"learning_rate": 1.394736842105263e-06, |
|
"loss": 0.0875, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 3.011422637590862, |
|
"grad_norm": 1.6862682104110718, |
|
"learning_rate": 1.3684210526315791e-06, |
|
"loss": 0.0785, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 3.0200761509172724, |
|
"grad_norm": 2.7323806285858154, |
|
"learning_rate": 1.342105263157895e-06, |
|
"loss": 0.0795, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 3.028729664243683, |
|
"grad_norm": 2.4621291160583496, |
|
"learning_rate": 1.3157894736842106e-06, |
|
"loss": 0.0693, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 3.0373831775700935, |
|
"grad_norm": 2.2543725967407227, |
|
"learning_rate": 1.2894736842105266e-06, |
|
"loss": 0.0707, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 3.046036690896504, |
|
"grad_norm": 2.0178897380828857, |
|
"learning_rate": 1.2631578947368422e-06, |
|
"loss": 0.0787, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.0546902042229145, |
|
"grad_norm": 1.9907864332199097, |
|
"learning_rate": 1.236842105263158e-06, |
|
"loss": 0.075, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 3.063343717549325, |
|
"grad_norm": 2.3367834091186523, |
|
"learning_rate": 1.2105263157894738e-06, |
|
"loss": 0.0789, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 3.0719972308757355, |
|
"grad_norm": 2.4846036434173584, |
|
"learning_rate": 1.1842105263157894e-06, |
|
"loss": 0.0722, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 3.080650744202146, |
|
"grad_norm": 2.7100768089294434, |
|
"learning_rate": 1.1578947368421053e-06, |
|
"loss": 0.0724, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 3.0893042575285565, |
|
"grad_norm": 2.0488345623016357, |
|
"learning_rate": 1.1315789473684213e-06, |
|
"loss": 0.0818, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 3.097957770854967, |
|
"grad_norm": 2.2149784564971924, |
|
"learning_rate": 1.1052631578947369e-06, |
|
"loss": 0.0753, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 3.1066112841813776, |
|
"grad_norm": 1.7441498041152954, |
|
"learning_rate": 1.0789473684210527e-06, |
|
"loss": 0.0658, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 3.115264797507788, |
|
"grad_norm": 2.315944194793701, |
|
"learning_rate": 1.0526315789473685e-06, |
|
"loss": 0.0687, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.115264797507788, |
|
"eval_loss": 0.2007349729537964, |
|
"eval_runtime": 8593.9573, |
|
"eval_samples_per_second": 1.195, |
|
"eval_steps_per_second": 0.075, |
|
"eval_wer": 16.449181180140368, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.1239183108341986, |
|
"grad_norm": 2.1374213695526123, |
|
"learning_rate": 1.0263157894736843e-06, |
|
"loss": 0.0678, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 3.132571824160609, |
|
"grad_norm": 2.6714038848876953, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0726, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 3.1412253374870196, |
|
"grad_norm": 2.326164484024048, |
|
"learning_rate": 9.736842105263158e-07, |
|
"loss": 0.0737, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 3.14987885081343, |
|
"grad_norm": 1.5465072393417358, |
|
"learning_rate": 9.473684210526317e-07, |
|
"loss": 0.0699, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 3.1585323641398406, |
|
"grad_norm": 1.9387298822402954, |
|
"learning_rate": 9.210526315789474e-07, |
|
"loss": 0.0707, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 3.167185877466251, |
|
"grad_norm": 2.333085775375366, |
|
"learning_rate": 8.947368421052632e-07, |
|
"loss": 0.0679, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 3.1758393907926616, |
|
"grad_norm": 1.9540473222732544, |
|
"learning_rate": 8.68421052631579e-07, |
|
"loss": 0.0683, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 3.184492904119072, |
|
"grad_norm": 2.5576722621917725, |
|
"learning_rate": 8.421052631578948e-07, |
|
"loss": 0.0719, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.1931464174454827, |
|
"grad_norm": 2.0068089962005615, |
|
"learning_rate": 8.157894736842106e-07, |
|
"loss": 0.0853, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 3.2017999307718936, |
|
"grad_norm": 2.2162768840789795, |
|
"learning_rate": 7.894736842105263e-07, |
|
"loss": 0.0683, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 3.210453444098304, |
|
"grad_norm": 1.776559829711914, |
|
"learning_rate": 7.631578947368422e-07, |
|
"loss": 0.0798, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 3.2191069574247146, |
|
"grad_norm": 1.4732505083084106, |
|
"learning_rate": 7.368421052631579e-07, |
|
"loss": 0.0726, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 3.227760470751125, |
|
"grad_norm": 2.921454906463623, |
|
"learning_rate": 7.105263157894737e-07, |
|
"loss": 0.0717, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 3.2364139840775357, |
|
"grad_norm": 2.061314344406128, |
|
"learning_rate": 6.842105263157896e-07, |
|
"loss": 0.0694, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 3.245067497403946, |
|
"grad_norm": 2.4505109786987305, |
|
"learning_rate": 6.578947368421053e-07, |
|
"loss": 0.0718, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 3.2537210107303567, |
|
"grad_norm": 2.636258840560913, |
|
"learning_rate": 6.315789473684211e-07, |
|
"loss": 0.0714, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 3.262374524056767, |
|
"grad_norm": 2.4016501903533936, |
|
"learning_rate": 6.052631578947369e-07, |
|
"loss": 0.0821, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 3.2710280373831777, |
|
"grad_norm": 2.0783393383026123, |
|
"learning_rate": 5.789473684210526e-07, |
|
"loss": 0.0748, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 3.2796815507095882, |
|
"grad_norm": 3.0884315967559814, |
|
"learning_rate": 5.526315789473684e-07, |
|
"loss": 0.0833, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 3.2883350640359987, |
|
"grad_norm": 2.3851513862609863, |
|
"learning_rate": 5.263157894736843e-07, |
|
"loss": 0.0722, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.2883350640359987, |
|
"eval_loss": 0.20027859508991241, |
|
"eval_runtime": 8622.3221, |
|
"eval_samples_per_second": 1.191, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 16.265488259249633, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.2969885773624092, |
|
"grad_norm": 2.619279146194458, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.0707, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 3.3056420906888198, |
|
"grad_norm": 1.975462794303894, |
|
"learning_rate": 4.7368421052631585e-07, |
|
"loss": 0.0696, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 3.3142956040152303, |
|
"grad_norm": 2.281332015991211, |
|
"learning_rate": 4.473684210526316e-07, |
|
"loss": 0.0698, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 3.322949117341641, |
|
"grad_norm": 2.048888683319092, |
|
"learning_rate": 4.210526315789474e-07, |
|
"loss": 0.0712, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 3.3316026306680513, |
|
"grad_norm": 2.216397762298584, |
|
"learning_rate": 3.9473684210526315e-07, |
|
"loss": 0.0756, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 3.340256143994462, |
|
"grad_norm": 3.0520379543304443, |
|
"learning_rate": 3.6842105263157896e-07, |
|
"loss": 0.0682, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 3.3489096573208723, |
|
"grad_norm": 3.0853352546691895, |
|
"learning_rate": 3.421052631578948e-07, |
|
"loss": 0.0803, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 3.357563170647283, |
|
"grad_norm": 2.6923489570617676, |
|
"learning_rate": 3.1578947368421055e-07, |
|
"loss": 0.0699, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 3.3662166839736933, |
|
"grad_norm": 1.5350950956344604, |
|
"learning_rate": 2.894736842105263e-07, |
|
"loss": 0.0641, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 3.374870197300104, |
|
"grad_norm": 1.8158336877822876, |
|
"learning_rate": 2.6315789473684213e-07, |
|
"loss": 0.0742, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 3.3835237106265144, |
|
"grad_norm": 2.268543243408203, |
|
"learning_rate": 2.3684210526315792e-07, |
|
"loss": 0.0812, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 3.392177223952925, |
|
"grad_norm": 2.02999210357666, |
|
"learning_rate": 2.105263157894737e-07, |
|
"loss": 0.0745, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 3.4008307372793354, |
|
"grad_norm": 2.2966854572296143, |
|
"learning_rate": 1.8421052631578948e-07, |
|
"loss": 0.0685, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 3.409484250605746, |
|
"grad_norm": 2.4790639877319336, |
|
"learning_rate": 1.5789473684210527e-07, |
|
"loss": 0.0695, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 3.4181377639321564, |
|
"grad_norm": 2.1657919883728027, |
|
"learning_rate": 1.3157894736842107e-07, |
|
"loss": 0.0742, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 3.426791277258567, |
|
"grad_norm": 1.6919013261795044, |
|
"learning_rate": 1.0526315789473685e-07, |
|
"loss": 0.0641, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.4354447905849774, |
|
"grad_norm": 2.441950798034668, |
|
"learning_rate": 7.894736842105264e-08, |
|
"loss": 0.0701, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 3.444098303911388, |
|
"grad_norm": 1.9817427396774292, |
|
"learning_rate": 5.263157894736842e-08, |
|
"loss": 0.0677, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 3.4527518172377984, |
|
"grad_norm": 1.978274941444397, |
|
"learning_rate": 2.631578947368421e-08, |
|
"loss": 0.073, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 3.461405330564209, |
|
"grad_norm": 2.204577684402466, |
|
"learning_rate": 0.0, |
|
"loss": 0.0713, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.461405330564209, |
|
"eval_loss": 0.19991621375083923, |
|
"eval_runtime": 8599.1571, |
|
"eval_samples_per_second": 1.195, |
|
"eval_steps_per_second": 0.075, |
|
"eval_wer": 16.21523264881726, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.455843688448e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|