royshilkrot's picture
Upload 12 files
8d080b5 verified
{
"best_metric": 16.21523264881726,
"best_model_checkpoint": "./whisper-large-v3-turbo/checkpoint-10000",
"epoch": 3.461405330564209,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008653513326410523,
"grad_norm": 9.31276798248291,
"learning_rate": 5.000000000000001e-07,
"loss": 0.6314,
"step": 25
},
{
"epoch": 0.017307026652821047,
"grad_norm": 6.611477851867676,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.4058,
"step": 50
},
{
"epoch": 0.02596053997923157,
"grad_norm": 5.953363418579102,
"learning_rate": 1.5e-06,
"loss": 0.2556,
"step": 75
},
{
"epoch": 0.034614053305642094,
"grad_norm": 4.594871520996094,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.2411,
"step": 100
},
{
"epoch": 0.04326756663205261,
"grad_norm": 5.638365268707275,
"learning_rate": 2.5e-06,
"loss": 0.2421,
"step": 125
},
{
"epoch": 0.05192107995846314,
"grad_norm": 6.280882835388184,
"learning_rate": 3e-06,
"loss": 0.245,
"step": 150
},
{
"epoch": 0.060574593284873655,
"grad_norm": 4.423807144165039,
"learning_rate": 3.5e-06,
"loss": 0.2556,
"step": 175
},
{
"epoch": 0.06922810661128419,
"grad_norm": 5.257762908935547,
"learning_rate": 4.000000000000001e-06,
"loss": 0.243,
"step": 200
},
{
"epoch": 0.0778816199376947,
"grad_norm": 4.895700931549072,
"learning_rate": 4.5e-06,
"loss": 0.2607,
"step": 225
},
{
"epoch": 0.08653513326410522,
"grad_norm": 5.383410453796387,
"learning_rate": 5e-06,
"loss": 0.2451,
"step": 250
},
{
"epoch": 0.09518864659051575,
"grad_norm": 6.303346157073975,
"learning_rate": 5.500000000000001e-06,
"loss": 0.2316,
"step": 275
},
{
"epoch": 0.10384215991692627,
"grad_norm": 3.834745168685913,
"learning_rate": 6e-06,
"loss": 0.2511,
"step": 300
},
{
"epoch": 0.1124956732433368,
"grad_norm": 4.793943405151367,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.258,
"step": 325
},
{
"epoch": 0.12114918656974731,
"grad_norm": 4.196424961090088,
"learning_rate": 7e-06,
"loss": 0.2635,
"step": 350
},
{
"epoch": 0.12980269989615784,
"grad_norm": 5.759880065917969,
"learning_rate": 7.500000000000001e-06,
"loss": 0.2644,
"step": 375
},
{
"epoch": 0.13845621322256838,
"grad_norm": 4.871682167053223,
"learning_rate": 8.000000000000001e-06,
"loss": 0.2513,
"step": 400
},
{
"epoch": 0.1471097265489789,
"grad_norm": 4.624505996704102,
"learning_rate": 8.5e-06,
"loss": 0.2601,
"step": 425
},
{
"epoch": 0.1557632398753894,
"grad_norm": 5.247982501983643,
"learning_rate": 9e-06,
"loss": 0.254,
"step": 450
},
{
"epoch": 0.16441675320179994,
"grad_norm": 5.218228816986084,
"learning_rate": 9.5e-06,
"loss": 0.2717,
"step": 475
},
{
"epoch": 0.17307026652821045,
"grad_norm": 5.001543998718262,
"learning_rate": 1e-05,
"loss": 0.2829,
"step": 500
},
{
"epoch": 0.17307026652821045,
"eval_loss": 0.27819713950157166,
"eval_runtime": 8630.7687,
"eval_samples_per_second": 1.19,
"eval_steps_per_second": 0.074,
"eval_wer": 23.738844120960056,
"step": 500
},
{
"epoch": 0.181723779854621,
"grad_norm": 5.3592023849487305,
"learning_rate": 9.973684210526316e-06,
"loss": 0.2837,
"step": 525
},
{
"epoch": 0.1903772931810315,
"grad_norm": 5.0274658203125,
"learning_rate": 9.947368421052632e-06,
"loss": 0.2873,
"step": 550
},
{
"epoch": 0.199030806507442,
"grad_norm": 6.059903144836426,
"learning_rate": 9.921052631578947e-06,
"loss": 0.2927,
"step": 575
},
{
"epoch": 0.20768431983385255,
"grad_norm": 7.239508152008057,
"learning_rate": 9.894736842105264e-06,
"loss": 0.2662,
"step": 600
},
{
"epoch": 0.21633783316026306,
"grad_norm": 5.860602855682373,
"learning_rate": 9.868421052631579e-06,
"loss": 0.2847,
"step": 625
},
{
"epoch": 0.2249913464866736,
"grad_norm": 5.402172565460205,
"learning_rate": 9.842105263157896e-06,
"loss": 0.2653,
"step": 650
},
{
"epoch": 0.2336448598130841,
"grad_norm": 5.541703224182129,
"learning_rate": 9.815789473684212e-06,
"loss": 0.2994,
"step": 675
},
{
"epoch": 0.24229837313949462,
"grad_norm": 4.814186096191406,
"learning_rate": 9.789473684210527e-06,
"loss": 0.2576,
"step": 700
},
{
"epoch": 0.25095188646590516,
"grad_norm": 4.134284496307373,
"learning_rate": 9.763157894736844e-06,
"loss": 0.2788,
"step": 725
},
{
"epoch": 0.25960539979231567,
"grad_norm": 5.382356643676758,
"learning_rate": 9.736842105263159e-06,
"loss": 0.2902,
"step": 750
},
{
"epoch": 0.2682589131187262,
"grad_norm": 4.981515884399414,
"learning_rate": 9.710526315789474e-06,
"loss": 0.271,
"step": 775
},
{
"epoch": 0.27691242644513675,
"grad_norm": 4.840052127838135,
"learning_rate": 9.68421052631579e-06,
"loss": 0.2717,
"step": 800
},
{
"epoch": 0.28556593977154726,
"grad_norm": 4.619823932647705,
"learning_rate": 9.657894736842106e-06,
"loss": 0.2763,
"step": 825
},
{
"epoch": 0.2942194530979578,
"grad_norm": 5.049735069274902,
"learning_rate": 9.631578947368422e-06,
"loss": 0.2709,
"step": 850
},
{
"epoch": 0.3028729664243683,
"grad_norm": 4.263411045074463,
"learning_rate": 9.605263157894737e-06,
"loss": 0.2575,
"step": 875
},
{
"epoch": 0.3115264797507788,
"grad_norm": 5.51076078414917,
"learning_rate": 9.578947368421054e-06,
"loss": 0.2775,
"step": 900
},
{
"epoch": 0.32017999307718936,
"grad_norm": 3.7715821266174316,
"learning_rate": 9.552631578947369e-06,
"loss": 0.2767,
"step": 925
},
{
"epoch": 0.3288335064035999,
"grad_norm": 3.964357852935791,
"learning_rate": 9.526315789473684e-06,
"loss": 0.2593,
"step": 950
},
{
"epoch": 0.3374870197300104,
"grad_norm": 4.967723369598389,
"learning_rate": 9.5e-06,
"loss": 0.2445,
"step": 975
},
{
"epoch": 0.3461405330564209,
"grad_norm": 6.19343376159668,
"learning_rate": 9.473684210526315e-06,
"loss": 0.2671,
"step": 1000
},
{
"epoch": 0.3461405330564209,
"eval_loss": 0.26496633887290955,
"eval_runtime": 8635.3808,
"eval_samples_per_second": 1.19,
"eval_steps_per_second": 0.074,
"eval_wer": 22.229442855905035,
"step": 1000
},
{
"epoch": 0.3547940463828314,
"grad_norm": 4.938564777374268,
"learning_rate": 9.447368421052632e-06,
"loss": 0.261,
"step": 1025
},
{
"epoch": 0.363447559709242,
"grad_norm": 4.535635948181152,
"learning_rate": 9.421052631578949e-06,
"loss": 0.2471,
"step": 1050
},
{
"epoch": 0.3721010730356525,
"grad_norm": 4.910510540008545,
"learning_rate": 9.394736842105264e-06,
"loss": 0.2701,
"step": 1075
},
{
"epoch": 0.380754586362063,
"grad_norm": 4.105949878692627,
"learning_rate": 9.36842105263158e-06,
"loss": 0.2342,
"step": 1100
},
{
"epoch": 0.3894080996884735,
"grad_norm": 4.819608211517334,
"learning_rate": 9.342105263157895e-06,
"loss": 0.2704,
"step": 1125
},
{
"epoch": 0.398061613014884,
"grad_norm": 6.137063503265381,
"learning_rate": 9.315789473684212e-06,
"loss": 0.258,
"step": 1150
},
{
"epoch": 0.4067151263412946,
"grad_norm": 4.703615665435791,
"learning_rate": 9.289473684210527e-06,
"loss": 0.2602,
"step": 1175
},
{
"epoch": 0.4153686396677051,
"grad_norm": 4.942866325378418,
"learning_rate": 9.263157894736842e-06,
"loss": 0.2562,
"step": 1200
},
{
"epoch": 0.4240221529941156,
"grad_norm": 4.163381099700928,
"learning_rate": 9.236842105263159e-06,
"loss": 0.2398,
"step": 1225
},
{
"epoch": 0.4326756663205261,
"grad_norm": 4.933504104614258,
"learning_rate": 9.210526315789474e-06,
"loss": 0.2423,
"step": 1250
},
{
"epoch": 0.44132917964693663,
"grad_norm": 4.699647426605225,
"learning_rate": 9.18421052631579e-06,
"loss": 0.2659,
"step": 1275
},
{
"epoch": 0.4499826929733472,
"grad_norm": 5.076835632324219,
"learning_rate": 9.157894736842105e-06,
"loss": 0.2679,
"step": 1300
},
{
"epoch": 0.4586362062997577,
"grad_norm": 4.333568572998047,
"learning_rate": 9.131578947368422e-06,
"loss": 0.2475,
"step": 1325
},
{
"epoch": 0.4672897196261682,
"grad_norm": 4.654094219207764,
"learning_rate": 9.105263157894739e-06,
"loss": 0.2353,
"step": 1350
},
{
"epoch": 0.47594323295257873,
"grad_norm": 3.9147582054138184,
"learning_rate": 9.078947368421054e-06,
"loss": 0.232,
"step": 1375
},
{
"epoch": 0.48459674627898924,
"grad_norm": 3.9528894424438477,
"learning_rate": 9.05263157894737e-06,
"loss": 0.2312,
"step": 1400
},
{
"epoch": 0.4932502596053998,
"grad_norm": 5.073605060577393,
"learning_rate": 9.026315789473685e-06,
"loss": 0.2529,
"step": 1425
},
{
"epoch": 0.5019037729318103,
"grad_norm": 4.176553249359131,
"learning_rate": 9e-06,
"loss": 0.2459,
"step": 1450
},
{
"epoch": 0.5105572862582208,
"grad_norm": 3.9072072505950928,
"learning_rate": 8.973684210526317e-06,
"loss": 0.2647,
"step": 1475
},
{
"epoch": 0.5192107995846313,
"grad_norm": 5.062324523925781,
"learning_rate": 8.947368421052632e-06,
"loss": 0.2549,
"step": 1500
},
{
"epoch": 0.5192107995846313,
"eval_loss": 0.24746711552143097,
"eval_runtime": 8673.5525,
"eval_samples_per_second": 1.184,
"eval_steps_per_second": 0.074,
"eval_wer": 21.05710077116368,
"step": 1500
},
{
"epoch": 0.5278643129110419,
"grad_norm": 3.6110143661499023,
"learning_rate": 8.921052631578949e-06,
"loss": 0.2335,
"step": 1525
},
{
"epoch": 0.5365178262374524,
"grad_norm": 5.8853607177734375,
"learning_rate": 8.894736842105264e-06,
"loss": 0.2516,
"step": 1550
},
{
"epoch": 0.5451713395638629,
"grad_norm": 5.245302200317383,
"learning_rate": 8.86842105263158e-06,
"loss": 0.2456,
"step": 1575
},
{
"epoch": 0.5538248528902735,
"grad_norm": 3.9259748458862305,
"learning_rate": 8.842105263157895e-06,
"loss": 0.2426,
"step": 1600
},
{
"epoch": 0.562478366216684,
"grad_norm": 5.401766300201416,
"learning_rate": 8.81578947368421e-06,
"loss": 0.2489,
"step": 1625
},
{
"epoch": 0.5711318795430945,
"grad_norm": 3.4733078479766846,
"learning_rate": 8.789473684210527e-06,
"loss": 0.237,
"step": 1650
},
{
"epoch": 0.579785392869505,
"grad_norm": 5.746425151824951,
"learning_rate": 8.763157894736842e-06,
"loss": 0.262,
"step": 1675
},
{
"epoch": 0.5884389061959155,
"grad_norm": 4.111097812652588,
"learning_rate": 8.736842105263158e-06,
"loss": 0.2559,
"step": 1700
},
{
"epoch": 0.5970924195223261,
"grad_norm": 3.773117780685425,
"learning_rate": 8.710526315789475e-06,
"loss": 0.2567,
"step": 1725
},
{
"epoch": 0.6057459328487366,
"grad_norm": 3.213146209716797,
"learning_rate": 8.68421052631579e-06,
"loss": 0.2361,
"step": 1750
},
{
"epoch": 0.6143994461751471,
"grad_norm": 3.5634965896606445,
"learning_rate": 8.657894736842107e-06,
"loss": 0.2632,
"step": 1775
},
{
"epoch": 0.6230529595015576,
"grad_norm": 3.3568804264068604,
"learning_rate": 8.631578947368422e-06,
"loss": 0.2278,
"step": 1800
},
{
"epoch": 0.6317064728279681,
"grad_norm": 3.8863000869750977,
"learning_rate": 8.605263157894738e-06,
"loss": 0.2336,
"step": 1825
},
{
"epoch": 0.6403599861543787,
"grad_norm": 4.37355899810791,
"learning_rate": 8.578947368421053e-06,
"loss": 0.2435,
"step": 1850
},
{
"epoch": 0.6490134994807892,
"grad_norm": 5.477795600891113,
"learning_rate": 8.552631578947368e-06,
"loss": 0.248,
"step": 1875
},
{
"epoch": 0.6576670128071997,
"grad_norm": 5.682942867279053,
"learning_rate": 8.526315789473685e-06,
"loss": 0.2478,
"step": 1900
},
{
"epoch": 0.6663205261336103,
"grad_norm": 4.837137222290039,
"learning_rate": 8.5e-06,
"loss": 0.226,
"step": 1925
},
{
"epoch": 0.6749740394600208,
"grad_norm": 5.188834190368652,
"learning_rate": 8.473684210526317e-06,
"loss": 0.2294,
"step": 1950
},
{
"epoch": 0.6836275527864313,
"grad_norm": 3.51971173286438,
"learning_rate": 8.447368421052632e-06,
"loss": 0.2357,
"step": 1975
},
{
"epoch": 0.6922810661128418,
"grad_norm": 6.168539047241211,
"learning_rate": 8.421052631578948e-06,
"loss": 0.243,
"step": 2000
},
{
"epoch": 0.6922810661128418,
"eval_loss": 0.23871001601219177,
"eval_runtime": 8675.6286,
"eval_samples_per_second": 1.184,
"eval_steps_per_second": 0.074,
"eval_wer": 20.804956242959882,
"step": 2000
},
{
"epoch": 0.7009345794392523,
"grad_norm": 4.645936965942383,
"learning_rate": 8.394736842105263e-06,
"loss": 0.2265,
"step": 2025
},
{
"epoch": 0.7095880927656628,
"grad_norm": 5.751936435699463,
"learning_rate": 8.36842105263158e-06,
"loss": 0.2491,
"step": 2050
},
{
"epoch": 0.7182416060920734,
"grad_norm": 3.7281875610351562,
"learning_rate": 8.342105263157897e-06,
"loss": 0.2671,
"step": 2075
},
{
"epoch": 0.726895119418484,
"grad_norm": 3.756186008453369,
"learning_rate": 8.315789473684212e-06,
"loss": 0.214,
"step": 2100
},
{
"epoch": 0.7355486327448945,
"grad_norm": 4.607492923736572,
"learning_rate": 8.289473684210526e-06,
"loss": 0.251,
"step": 2125
},
{
"epoch": 0.744202146071305,
"grad_norm": 6.176618576049805,
"learning_rate": 8.263157894736843e-06,
"loss": 0.2532,
"step": 2150
},
{
"epoch": 0.7528556593977155,
"grad_norm": 5.2198166847229,
"learning_rate": 8.236842105263158e-06,
"loss": 0.2405,
"step": 2175
},
{
"epoch": 0.761509172724126,
"grad_norm": 4.314031600952148,
"learning_rate": 8.210526315789475e-06,
"loss": 0.2287,
"step": 2200
},
{
"epoch": 0.7701626860505365,
"grad_norm": 5.143173694610596,
"learning_rate": 8.18421052631579e-06,
"loss": 0.2285,
"step": 2225
},
{
"epoch": 0.778816199376947,
"grad_norm": 7.833088397979736,
"learning_rate": 8.157894736842106e-06,
"loss": 0.2359,
"step": 2250
},
{
"epoch": 0.7874697127033575,
"grad_norm": 4.4802703857421875,
"learning_rate": 8.131578947368421e-06,
"loss": 0.2377,
"step": 2275
},
{
"epoch": 0.796123226029768,
"grad_norm": 4.503852367401123,
"learning_rate": 8.105263157894736e-06,
"loss": 0.2325,
"step": 2300
},
{
"epoch": 0.8047767393561787,
"grad_norm": 4.415956020355225,
"learning_rate": 8.078947368421053e-06,
"loss": 0.2438,
"step": 2325
},
{
"epoch": 0.8134302526825892,
"grad_norm": 6.339819431304932,
"learning_rate": 8.052631578947368e-06,
"loss": 0.2479,
"step": 2350
},
{
"epoch": 0.8220837660089997,
"grad_norm": 4.9156813621521,
"learning_rate": 8.026315789473685e-06,
"loss": 0.2195,
"step": 2375
},
{
"epoch": 0.8307372793354102,
"grad_norm": 5.688671112060547,
"learning_rate": 8.000000000000001e-06,
"loss": 0.2199,
"step": 2400
},
{
"epoch": 0.8393907926618207,
"grad_norm": 4.447849750518799,
"learning_rate": 7.973684210526316e-06,
"loss": 0.2429,
"step": 2425
},
{
"epoch": 0.8480443059882312,
"grad_norm": 3.792633295059204,
"learning_rate": 7.947368421052633e-06,
"loss": 0.2189,
"step": 2450
},
{
"epoch": 0.8566978193146417,
"grad_norm": 4.0045247077941895,
"learning_rate": 7.921052631578948e-06,
"loss": 0.2296,
"step": 2475
},
{
"epoch": 0.8653513326410522,
"grad_norm": 4.449003219604492,
"learning_rate": 7.894736842105265e-06,
"loss": 0.2136,
"step": 2500
},
{
"epoch": 0.8653513326410522,
"eval_loss": 0.23298430442810059,
"eval_runtime": 8676.1006,
"eval_samples_per_second": 1.184,
"eval_steps_per_second": 0.074,
"eval_wer": 20.03725846980331,
"step": 2500
},
{
"epoch": 0.8740048459674628,
"grad_norm": 4.327373027801514,
"learning_rate": 7.86842105263158e-06,
"loss": 0.227,
"step": 2525
},
{
"epoch": 0.8826583592938733,
"grad_norm": 4.755936145782471,
"learning_rate": 7.842105263157895e-06,
"loss": 0.2291,
"step": 2550
},
{
"epoch": 0.8913118726202839,
"grad_norm": 4.75525426864624,
"learning_rate": 7.815789473684211e-06,
"loss": 0.2418,
"step": 2575
},
{
"epoch": 0.8999653859466944,
"grad_norm": 4.342800140380859,
"learning_rate": 7.789473684210526e-06,
"loss": 0.2316,
"step": 2600
},
{
"epoch": 0.9086188992731049,
"grad_norm": 4.322353363037109,
"learning_rate": 7.763157894736843e-06,
"loss": 0.2242,
"step": 2625
},
{
"epoch": 0.9172724125995154,
"grad_norm": 4.406942367553711,
"learning_rate": 7.736842105263158e-06,
"loss": 0.2178,
"step": 2650
},
{
"epoch": 0.9259259259259259,
"grad_norm": 5.0642266273498535,
"learning_rate": 7.710526315789474e-06,
"loss": 0.2335,
"step": 2675
},
{
"epoch": 0.9345794392523364,
"grad_norm": 4.1676483154296875,
"learning_rate": 7.68421052631579e-06,
"loss": 0.226,
"step": 2700
},
{
"epoch": 0.943232952578747,
"grad_norm": 4.0350022315979,
"learning_rate": 7.657894736842106e-06,
"loss": 0.2388,
"step": 2725
},
{
"epoch": 0.9518864659051575,
"grad_norm": 4.125761032104492,
"learning_rate": 7.631578947368423e-06,
"loss": 0.2356,
"step": 2750
},
{
"epoch": 0.960539979231568,
"grad_norm": 3.9152023792266846,
"learning_rate": 7.605263157894738e-06,
"loss": 0.2089,
"step": 2775
},
{
"epoch": 0.9691934925579785,
"grad_norm": 4.8811821937561035,
"learning_rate": 7.578947368421054e-06,
"loss": 0.2059,
"step": 2800
},
{
"epoch": 0.9778470058843891,
"grad_norm": 4.5911712646484375,
"learning_rate": 7.552631578947369e-06,
"loss": 0.2155,
"step": 2825
},
{
"epoch": 0.9865005192107996,
"grad_norm": 4.353863716125488,
"learning_rate": 7.526315789473685e-06,
"loss": 0.2145,
"step": 2850
},
{
"epoch": 0.9951540325372101,
"grad_norm": 5.159242153167725,
"learning_rate": 7.500000000000001e-06,
"loss": 0.2338,
"step": 2875
},
{
"epoch": 1.0038075458636206,
"grad_norm": 3.813417673110962,
"learning_rate": 7.473684210526316e-06,
"loss": 0.1849,
"step": 2900
},
{
"epoch": 1.0124610591900312,
"grad_norm": 3.838930368423462,
"learning_rate": 7.447368421052632e-06,
"loss": 0.1596,
"step": 2925
},
{
"epoch": 1.0211145725164417,
"grad_norm": 3.80027174949646,
"learning_rate": 7.421052631578948e-06,
"loss": 0.184,
"step": 2950
},
{
"epoch": 1.0297680858428522,
"grad_norm": 3.2930946350097656,
"learning_rate": 7.3947368421052635e-06,
"loss": 0.169,
"step": 2975
},
{
"epoch": 1.0384215991692627,
"grad_norm": 3.8618459701538086,
"learning_rate": 7.368421052631579e-06,
"loss": 0.1664,
"step": 3000
},
{
"epoch": 1.0384215991692627,
"eval_loss": 0.22383837401866913,
"eval_runtime": 8630.3289,
"eval_samples_per_second": 1.19,
"eval_steps_per_second": 0.074,
"eval_wer": 18.63530023394853,
"step": 3000
},
{
"epoch": 1.0470751124956732,
"grad_norm": 4.719282627105713,
"learning_rate": 7.342105263157895e-06,
"loss": 0.1675,
"step": 3025
},
{
"epoch": 1.0557286258220837,
"grad_norm": 2.7439825534820557,
"learning_rate": 7.315789473684212e-06,
"loss": 0.1656,
"step": 3050
},
{
"epoch": 1.0643821391484942,
"grad_norm": 4.707197189331055,
"learning_rate": 7.289473684210528e-06,
"loss": 0.1743,
"step": 3075
},
{
"epoch": 1.0730356524749047,
"grad_norm": 3.8877105712890625,
"learning_rate": 7.263157894736843e-06,
"loss": 0.1706,
"step": 3100
},
{
"epoch": 1.0816891658013152,
"grad_norm": 3.034952402114868,
"learning_rate": 7.236842105263158e-06,
"loss": 0.1575,
"step": 3125
},
{
"epoch": 1.0903426791277258,
"grad_norm": 3.1362013816833496,
"learning_rate": 7.210526315789474e-06,
"loss": 0.1624,
"step": 3150
},
{
"epoch": 1.0989961924541363,
"grad_norm": 3.822435140609741,
"learning_rate": 7.18421052631579e-06,
"loss": 0.1575,
"step": 3175
},
{
"epoch": 1.107649705780547,
"grad_norm": 3.342021942138672,
"learning_rate": 7.157894736842106e-06,
"loss": 0.1456,
"step": 3200
},
{
"epoch": 1.1163032191069575,
"grad_norm": 2.8061094284057617,
"learning_rate": 7.131578947368422e-06,
"loss": 0.1573,
"step": 3225
},
{
"epoch": 1.124956732433368,
"grad_norm": 4.738641738891602,
"learning_rate": 7.1052631578947375e-06,
"loss": 0.1753,
"step": 3250
},
{
"epoch": 1.1336102457597785,
"grad_norm": 2.7924444675445557,
"learning_rate": 7.078947368421053e-06,
"loss": 0.1542,
"step": 3275
},
{
"epoch": 1.142263759086189,
"grad_norm": 3.8055057525634766,
"learning_rate": 7.052631578947369e-06,
"loss": 0.1683,
"step": 3300
},
{
"epoch": 1.1509172724125996,
"grad_norm": 2.7615177631378174,
"learning_rate": 7.026315789473684e-06,
"loss": 0.1607,
"step": 3325
},
{
"epoch": 1.15957078573901,
"grad_norm": 3.5338289737701416,
"learning_rate": 7e-06,
"loss": 0.1818,
"step": 3350
},
{
"epoch": 1.1682242990654206,
"grad_norm": 4.972025394439697,
"learning_rate": 6.973684210526316e-06,
"loss": 0.1683,
"step": 3375
},
{
"epoch": 1.176877812391831,
"grad_norm": 2.7351698875427246,
"learning_rate": 6.947368421052632e-06,
"loss": 0.163,
"step": 3400
},
{
"epoch": 1.1855313257182416,
"grad_norm": 2.600933074951172,
"learning_rate": 6.921052631578948e-06,
"loss": 0.1639,
"step": 3425
},
{
"epoch": 1.1941848390446521,
"grad_norm": 3.196901798248291,
"learning_rate": 6.894736842105264e-06,
"loss": 0.1689,
"step": 3450
},
{
"epoch": 1.2028383523710626,
"grad_norm": 4.408321380615234,
"learning_rate": 6.86842105263158e-06,
"loss": 0.1853,
"step": 3475
},
{
"epoch": 1.2114918656974731,
"grad_norm": 3.1869866847991943,
"learning_rate": 6.842105263157896e-06,
"loss": 0.1781,
"step": 3500
},
{
"epoch": 1.2114918656974731,
"eval_loss": 0.22067983448505402,
"eval_runtime": 8630.1006,
"eval_samples_per_second": 1.19,
"eval_steps_per_second": 0.074,
"eval_wer": 18.572913958929036,
"step": 3500
},
{
"epoch": 1.2201453790238836,
"grad_norm": 3.745699882507324,
"learning_rate": 6.8157894736842115e-06,
"loss": 0.1685,
"step": 3525
},
{
"epoch": 1.2287988923502942,
"grad_norm": 4.755461692810059,
"learning_rate": 6.789473684210527e-06,
"loss": 0.1653,
"step": 3550
},
{
"epoch": 1.2374524056767047,
"grad_norm": 2.958872079849243,
"learning_rate": 6.763157894736842e-06,
"loss": 0.1657,
"step": 3575
},
{
"epoch": 1.2461059190031152,
"grad_norm": 3.780946969985962,
"learning_rate": 6.736842105263158e-06,
"loss": 0.1818,
"step": 3600
},
{
"epoch": 1.254759432329526,
"grad_norm": 3.9823403358459473,
"learning_rate": 6.710526315789474e-06,
"loss": 0.1705,
"step": 3625
},
{
"epoch": 1.2634129456559364,
"grad_norm": 3.881185531616211,
"learning_rate": 6.68421052631579e-06,
"loss": 0.1688,
"step": 3650
},
{
"epoch": 1.272066458982347,
"grad_norm": 3.2562785148620605,
"learning_rate": 6.6578947368421055e-06,
"loss": 0.1597,
"step": 3675
},
{
"epoch": 1.2807199723087574,
"grad_norm": 4.002935886383057,
"learning_rate": 6.631578947368421e-06,
"loss": 0.1653,
"step": 3700
},
{
"epoch": 1.289373485635168,
"grad_norm": 3.866936206817627,
"learning_rate": 6.605263157894738e-06,
"loss": 0.1687,
"step": 3725
},
{
"epoch": 1.2980269989615785,
"grad_norm": 4.491256237030029,
"learning_rate": 6.578947368421054e-06,
"loss": 0.184,
"step": 3750
},
{
"epoch": 1.306680512287989,
"grad_norm": 2.8679704666137695,
"learning_rate": 6.55263157894737e-06,
"loss": 0.1761,
"step": 3775
},
{
"epoch": 1.3153340256143995,
"grad_norm": 3.8533244132995605,
"learning_rate": 6.526315789473685e-06,
"loss": 0.1612,
"step": 3800
},
{
"epoch": 1.32398753894081,
"grad_norm": 3.4180614948272705,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.1668,
"step": 3825
},
{
"epoch": 1.3326410522672205,
"grad_norm": 3.1745965480804443,
"learning_rate": 6.473684210526316e-06,
"loss": 0.1571,
"step": 3850
},
{
"epoch": 1.341294565593631,
"grad_norm": 3.310295343399048,
"learning_rate": 6.447368421052632e-06,
"loss": 0.1625,
"step": 3875
},
{
"epoch": 1.3499480789200415,
"grad_norm": 3.5954184532165527,
"learning_rate": 6.421052631578948e-06,
"loss": 0.158,
"step": 3900
},
{
"epoch": 1.358601592246452,
"grad_norm": 2.868551731109619,
"learning_rate": 6.394736842105264e-06,
"loss": 0.17,
"step": 3925
},
{
"epoch": 1.3672551055728626,
"grad_norm": 2.9729490280151367,
"learning_rate": 6.3684210526315795e-06,
"loss": 0.1511,
"step": 3950
},
{
"epoch": 1.375908618899273,
"grad_norm": 2.286844253540039,
"learning_rate": 6.342105263157895e-06,
"loss": 0.1648,
"step": 3975
},
{
"epoch": 1.3845621322256836,
"grad_norm": 3.9818239212036133,
"learning_rate": 6.31578947368421e-06,
"loss": 0.1664,
"step": 4000
},
{
"epoch": 1.3845621322256836,
"eval_loss": 0.21563765406608582,
"eval_runtime": 8678.9976,
"eval_samples_per_second": 1.184,
"eval_steps_per_second": 0.074,
"eval_wer": 18.037431765011696,
"step": 4000
},
{
"epoch": 1.393215645552094,
"grad_norm": 3.8452024459838867,
"learning_rate": 6.289473684210526e-06,
"loss": 0.1642,
"step": 4025
},
{
"epoch": 1.4018691588785046,
"grad_norm": 3.381753444671631,
"learning_rate": 6.263157894736842e-06,
"loss": 0.1671,
"step": 4050
},
{
"epoch": 1.4105226722049151,
"grad_norm": 3.9922471046447754,
"learning_rate": 6.236842105263159e-06,
"loss": 0.1721,
"step": 4075
},
{
"epoch": 1.4191761855313256,
"grad_norm": 3.2609457969665527,
"learning_rate": 6.2105263157894745e-06,
"loss": 0.1832,
"step": 4100
},
{
"epoch": 1.4278296988577361,
"grad_norm": 3.5233139991760254,
"learning_rate": 6.18421052631579e-06,
"loss": 0.1734,
"step": 4125
},
{
"epoch": 1.4364832121841467,
"grad_norm": 4.901401519775391,
"learning_rate": 6.157894736842106e-06,
"loss": 0.181,
"step": 4150
},
{
"epoch": 1.4451367255105572,
"grad_norm": 2.4299676418304443,
"learning_rate": 6.131578947368422e-06,
"loss": 0.1538,
"step": 4175
},
{
"epoch": 1.4537902388369677,
"grad_norm": 4.308781623840332,
"learning_rate": 6.105263157894738e-06,
"loss": 0.1501,
"step": 4200
},
{
"epoch": 1.4624437521633784,
"grad_norm": 4.0135498046875,
"learning_rate": 6.0789473684210535e-06,
"loss": 0.1735,
"step": 4225
},
{
"epoch": 1.471097265489789,
"grad_norm": 3.9877755641937256,
"learning_rate": 6.0526315789473685e-06,
"loss": 0.1717,
"step": 4250
},
{
"epoch": 1.4797507788161994,
"grad_norm": 3.184150218963623,
"learning_rate": 6.026315789473684e-06,
"loss": 0.1571,
"step": 4275
},
{
"epoch": 1.48840429214261,
"grad_norm": 3.2754974365234375,
"learning_rate": 6e-06,
"loss": 0.1618,
"step": 4300
},
{
"epoch": 1.4970578054690205,
"grad_norm": 3.145984411239624,
"learning_rate": 5.973684210526316e-06,
"loss": 0.1637,
"step": 4325
},
{
"epoch": 1.505711318795431,
"grad_norm": 4.307953834533691,
"learning_rate": 5.947368421052632e-06,
"loss": 0.1568,
"step": 4350
},
{
"epoch": 1.5143648321218415,
"grad_norm": 2.7052788734436035,
"learning_rate": 5.921052631578948e-06,
"loss": 0.1573,
"step": 4375
},
{
"epoch": 1.523018345448252,
"grad_norm": 4.613982677459717,
"learning_rate": 5.8947368421052634e-06,
"loss": 0.1531,
"step": 4400
},
{
"epoch": 1.5316718587746625,
"grad_norm": 3.401477813720703,
"learning_rate": 5.86842105263158e-06,
"loss": 0.167,
"step": 4425
},
{
"epoch": 1.540325372101073,
"grad_norm": 4.301424503326416,
"learning_rate": 5.842105263157896e-06,
"loss": 0.168,
"step": 4450
},
{
"epoch": 1.5489788854274835,
"grad_norm": 4.266972541809082,
"learning_rate": 5.815789473684212e-06,
"loss": 0.1589,
"step": 4475
},
{
"epoch": 1.557632398753894,
"grad_norm": 3.3040754795074463,
"learning_rate": 5.789473684210527e-06,
"loss": 0.1659,
"step": 4500
},
{
"epoch": 1.557632398753894,
"eval_loss": 0.21191351115703583,
"eval_runtime": 8655.7858,
"eval_samples_per_second": 1.187,
"eval_steps_per_second": 0.074,
"eval_wer": 18.74360973919071,
"step": 4500
},
{
"epoch": 1.5662859120803048,
"grad_norm": 4.775163173675537,
"learning_rate": 5.7631578947368425e-06,
"loss": 0.1749,
"step": 4525
},
{
"epoch": 1.5749394254067153,
"grad_norm": 3.8686747550964355,
"learning_rate": 5.736842105263158e-06,
"loss": 0.1656,
"step": 4550
},
{
"epoch": 1.5835929387331258,
"grad_norm": 3.2979884147644043,
"learning_rate": 5.710526315789474e-06,
"loss": 0.1652,
"step": 4575
},
{
"epoch": 1.5922464520595363,
"grad_norm": 3.271785259246826,
"learning_rate": 5.68421052631579e-06,
"loss": 0.1611,
"step": 4600
},
{
"epoch": 1.6008999653859468,
"grad_norm": 4.323774814605713,
"learning_rate": 5.657894736842106e-06,
"loss": 0.1713,
"step": 4625
},
{
"epoch": 1.6095534787123573,
"grad_norm": 2.925485134124756,
"learning_rate": 5.631578947368422e-06,
"loss": 0.1634,
"step": 4650
},
{
"epoch": 1.6182069920387678,
"grad_norm": 3.3976783752441406,
"learning_rate": 5.605263157894737e-06,
"loss": 0.1761,
"step": 4675
},
{
"epoch": 1.6268605053651783,
"grad_norm": 3.1800551414489746,
"learning_rate": 5.578947368421052e-06,
"loss": 0.1522,
"step": 4700
},
{
"epoch": 1.6355140186915889,
"grad_norm": 3.392937660217285,
"learning_rate": 5.552631578947368e-06,
"loss": 0.1604,
"step": 4725
},
{
"epoch": 1.6441675320179994,
"grad_norm": 3.9035747051239014,
"learning_rate": 5.526315789473685e-06,
"loss": 0.1922,
"step": 4750
},
{
"epoch": 1.6528210453444099,
"grad_norm": 3.81205415725708,
"learning_rate": 5.500000000000001e-06,
"loss": 0.1786,
"step": 4775
},
{
"epoch": 1.6614745586708204,
"grad_norm": 3.3572874069213867,
"learning_rate": 5.4736842105263165e-06,
"loss": 0.169,
"step": 4800
},
{
"epoch": 1.670128071997231,
"grad_norm": 3.0381922721862793,
"learning_rate": 5.447368421052632e-06,
"loss": 0.1753,
"step": 4825
},
{
"epoch": 1.6787815853236414,
"grad_norm": 3.7208361625671387,
"learning_rate": 5.421052631578948e-06,
"loss": 0.1587,
"step": 4850
},
{
"epoch": 1.687435098650052,
"grad_norm": 6.452873229980469,
"learning_rate": 5.394736842105264e-06,
"loss": 0.1559,
"step": 4875
},
{
"epoch": 1.6960886119764624,
"grad_norm": 3.532186269760132,
"learning_rate": 5.36842105263158e-06,
"loss": 0.1587,
"step": 4900
},
{
"epoch": 1.704742125302873,
"grad_norm": 3.6204092502593994,
"learning_rate": 5.342105263157895e-06,
"loss": 0.1638,
"step": 4925
},
{
"epoch": 1.7133956386292835,
"grad_norm": 3.3600478172302246,
"learning_rate": 5.315789473684211e-06,
"loss": 0.1657,
"step": 4950
},
{
"epoch": 1.722049151955694,
"grad_norm": 3.8117873668670654,
"learning_rate": 5.289473684210526e-06,
"loss": 0.1533,
"step": 4975
},
{
"epoch": 1.7307026652821045,
"grad_norm": 4.345729827880859,
"learning_rate": 5.263157894736842e-06,
"loss": 0.1611,
"step": 5000
},
{
"epoch": 1.7307026652821045,
"eval_loss": 0.20883877575397491,
"eval_runtime": 8644.4172,
"eval_samples_per_second": 1.188,
"eval_steps_per_second": 0.074,
"eval_wer": 17.726366865956155,
"step": 5000
},
{
"epoch": 1.739356178608515,
"grad_norm": 2.709228515625,
"learning_rate": 5.236842105263158e-06,
"loss": 0.1723,
"step": 5025
},
{
"epoch": 1.7480096919349255,
"grad_norm": 4.446653366088867,
"learning_rate": 5.210526315789474e-06,
"loss": 0.1606,
"step": 5050
},
{
"epoch": 1.756663205261336,
"grad_norm": 4.571587562561035,
"learning_rate": 5.18421052631579e-06,
"loss": 0.1628,
"step": 5075
},
{
"epoch": 1.7653167185877465,
"grad_norm": 3.951996088027954,
"learning_rate": 5.157894736842106e-06,
"loss": 0.1532,
"step": 5100
},
{
"epoch": 1.773970231914157,
"grad_norm": 3.2565793991088867,
"learning_rate": 5.131578947368422e-06,
"loss": 0.1599,
"step": 5125
},
{
"epoch": 1.7826237452405675,
"grad_norm": 2.625930070877075,
"learning_rate": 5.105263157894738e-06,
"loss": 0.1606,
"step": 5150
},
{
"epoch": 1.791277258566978,
"grad_norm": 3.5779178142547607,
"learning_rate": 5.078947368421053e-06,
"loss": 0.1683,
"step": 5175
},
{
"epoch": 1.7999307718933886,
"grad_norm": 3.518836736679077,
"learning_rate": 5.052631578947369e-06,
"loss": 0.1575,
"step": 5200
},
{
"epoch": 1.808584285219799,
"grad_norm": 2.62227725982666,
"learning_rate": 5.026315789473685e-06,
"loss": 0.1549,
"step": 5225
},
{
"epoch": 1.8172377985462098,
"grad_norm": 3.5382871627807617,
"learning_rate": 5e-06,
"loss": 0.1566,
"step": 5250
},
{
"epoch": 1.8258913118726203,
"grad_norm": 4.410214900970459,
"learning_rate": 4.973684210526316e-06,
"loss": 0.1529,
"step": 5275
},
{
"epoch": 1.8345448251990308,
"grad_norm": 3.1463205814361572,
"learning_rate": 4.947368421052632e-06,
"loss": 0.1551,
"step": 5300
},
{
"epoch": 1.8431983385254413,
"grad_norm": 2.4352145195007324,
"learning_rate": 4.921052631578948e-06,
"loss": 0.1624,
"step": 5325
},
{
"epoch": 1.8518518518518519,
"grad_norm": 3.8748574256896973,
"learning_rate": 4.894736842105264e-06,
"loss": 0.1619,
"step": 5350
},
{
"epoch": 1.8605053651782624,
"grad_norm": 2.8592870235443115,
"learning_rate": 4.8684210526315795e-06,
"loss": 0.1709,
"step": 5375
},
{
"epoch": 1.8691588785046729,
"grad_norm": 3.5654568672180176,
"learning_rate": 4.842105263157895e-06,
"loss": 0.1568,
"step": 5400
},
{
"epoch": 1.8778123918310834,
"grad_norm": 3.1443722248077393,
"learning_rate": 4.815789473684211e-06,
"loss": 0.1546,
"step": 5425
},
{
"epoch": 1.886465905157494,
"grad_norm": 2.727612018585205,
"learning_rate": 4.789473684210527e-06,
"loss": 0.1502,
"step": 5450
},
{
"epoch": 1.8951194184839044,
"grad_norm": 3.5027356147766113,
"learning_rate": 4.763157894736842e-06,
"loss": 0.1545,
"step": 5475
},
{
"epoch": 1.9037729318103151,
"grad_norm": 3.154855966567993,
"learning_rate": 4.736842105263158e-06,
"loss": 0.1424,
"step": 5500
},
{
"epoch": 1.9037729318103151,
"eval_loss": 0.20275835692882538,
"eval_runtime": 8641.5231,
"eval_samples_per_second": 1.189,
"eval_steps_per_second": 0.074,
"eval_wer": 17.243739710597,
"step": 5500
},
{
"epoch": 1.9124264451367257,
"grad_norm": 2.7067971229553223,
"learning_rate": 4.710526315789474e-06,
"loss": 0.1599,
"step": 5525
},
{
"epoch": 1.9210799584631362,
"grad_norm": 3.4274163246154785,
"learning_rate": 4.68421052631579e-06,
"loss": 0.1596,
"step": 5550
},
{
"epoch": 1.9297334717895467,
"grad_norm": 3.3891353607177734,
"learning_rate": 4.657894736842106e-06,
"loss": 0.1836,
"step": 5575
},
{
"epoch": 1.9383869851159572,
"grad_norm": 3.259261131286621,
"learning_rate": 4.631578947368421e-06,
"loss": 0.1574,
"step": 5600
},
{
"epoch": 1.9470404984423677,
"grad_norm": 4.355072021484375,
"learning_rate": 4.605263157894737e-06,
"loss": 0.151,
"step": 5625
},
{
"epoch": 1.9556940117687782,
"grad_norm": 4.160757064819336,
"learning_rate": 4.578947368421053e-06,
"loss": 0.153,
"step": 5650
},
{
"epoch": 1.9643475250951887,
"grad_norm": 2.7162065505981445,
"learning_rate": 4.552631578947369e-06,
"loss": 0.1504,
"step": 5675
},
{
"epoch": 1.9730010384215992,
"grad_norm": 3.1264755725860596,
"learning_rate": 4.526315789473685e-06,
"loss": 0.1503,
"step": 5700
},
{
"epoch": 1.9816545517480098,
"grad_norm": 3.2158703804016113,
"learning_rate": 4.5e-06,
"loss": 0.1629,
"step": 5725
},
{
"epoch": 1.9903080650744203,
"grad_norm": 3.41349196434021,
"learning_rate": 4.473684210526316e-06,
"loss": 0.1628,
"step": 5750
},
{
"epoch": 1.9989615784008308,
"grad_norm": 2.600003957748413,
"learning_rate": 4.447368421052632e-06,
"loss": 0.153,
"step": 5775
},
{
"epoch": 2.0076150917272413,
"grad_norm": 2.955773115158081,
"learning_rate": 4.4210526315789476e-06,
"loss": 0.1056,
"step": 5800
},
{
"epoch": 2.016268605053652,
"grad_norm": 3.6034035682678223,
"learning_rate": 4.394736842105263e-06,
"loss": 0.1,
"step": 5825
},
{
"epoch": 2.0249221183800623,
"grad_norm": 2.37636137008667,
"learning_rate": 4.368421052631579e-06,
"loss": 0.1042,
"step": 5850
},
{
"epoch": 2.033575631706473,
"grad_norm": 2.6915884017944336,
"learning_rate": 4.342105263157895e-06,
"loss": 0.1162,
"step": 5875
},
{
"epoch": 2.0422291450328833,
"grad_norm": 2.495497226715088,
"learning_rate": 4.315789473684211e-06,
"loss": 0.1097,
"step": 5900
},
{
"epoch": 2.050882658359294,
"grad_norm": 3.1484713554382324,
"learning_rate": 4.289473684210527e-06,
"loss": 0.1183,
"step": 5925
},
{
"epoch": 2.0595361716857044,
"grad_norm": 2.547849416732788,
"learning_rate": 4.2631578947368425e-06,
"loss": 0.1205,
"step": 5950
},
{
"epoch": 2.068189685012115,
"grad_norm": 2.342745304107666,
"learning_rate": 4.236842105263158e-06,
"loss": 0.1117,
"step": 5975
},
{
"epoch": 2.0768431983385254,
"grad_norm": 2.926923990249634,
"learning_rate": 4.210526315789474e-06,
"loss": 0.1101,
"step": 6000
},
{
"epoch": 2.0768431983385254,
"eval_loss": 0.20616546273231506,
"eval_runtime": 8634.4104,
"eval_samples_per_second": 1.19,
"eval_steps_per_second": 0.074,
"eval_wer": 16.82523178234122,
"step": 6000
},
{
"epoch": 2.085496711664936,
"grad_norm": 2.461634635925293,
"learning_rate": 4.18421052631579e-06,
"loss": 0.1108,
"step": 6025
},
{
"epoch": 2.0941502249913464,
"grad_norm": 1.6099869012832642,
"learning_rate": 4.157894736842106e-06,
"loss": 0.1091,
"step": 6050
},
{
"epoch": 2.102803738317757,
"grad_norm": 2.497805595397949,
"learning_rate": 4.1315789473684216e-06,
"loss": 0.1054,
"step": 6075
},
{
"epoch": 2.1114572516441674,
"grad_norm": 2.440737009048462,
"learning_rate": 4.105263157894737e-06,
"loss": 0.1143,
"step": 6100
},
{
"epoch": 2.120110764970578,
"grad_norm": 2.547050714492798,
"learning_rate": 4.078947368421053e-06,
"loss": 0.1051,
"step": 6125
},
{
"epoch": 2.1287642782969884,
"grad_norm": 2.2565364837646484,
"learning_rate": 4.052631578947368e-06,
"loss": 0.1079,
"step": 6150
},
{
"epoch": 2.137417791623399,
"grad_norm": 3.4482452869415283,
"learning_rate": 4.026315789473684e-06,
"loss": 0.107,
"step": 6175
},
{
"epoch": 2.1460713049498095,
"grad_norm": 1.6255193948745728,
"learning_rate": 4.000000000000001e-06,
"loss": 0.1124,
"step": 6200
},
{
"epoch": 2.15472481827622,
"grad_norm": 2.6273090839385986,
"learning_rate": 3.9736842105263165e-06,
"loss": 0.1012,
"step": 6225
},
{
"epoch": 2.1633783316026305,
"grad_norm": 4.822213649749756,
"learning_rate": 3.947368421052632e-06,
"loss": 0.106,
"step": 6250
},
{
"epoch": 2.172031844929041,
"grad_norm": 3.0468506813049316,
"learning_rate": 3.921052631578947e-06,
"loss": 0.1343,
"step": 6275
},
{
"epoch": 2.1806853582554515,
"grad_norm": 3.5357604026794434,
"learning_rate": 3.894736842105263e-06,
"loss": 0.1066,
"step": 6300
},
{
"epoch": 2.189338871581862,
"grad_norm": 2.8175506591796875,
"learning_rate": 3.868421052631579e-06,
"loss": 0.1102,
"step": 6325
},
{
"epoch": 2.1979923849082725,
"grad_norm": 3.171792984008789,
"learning_rate": 3.842105263157895e-06,
"loss": 0.1081,
"step": 6350
},
{
"epoch": 2.2066458982346835,
"grad_norm": 2.2714669704437256,
"learning_rate": 3.815789473684211e-06,
"loss": 0.1077,
"step": 6375
},
{
"epoch": 2.215299411561094,
"grad_norm": 4.731479644775391,
"learning_rate": 3.789473684210527e-06,
"loss": 0.1055,
"step": 6400
},
{
"epoch": 2.2239529248875045,
"grad_norm": 2.8998143672943115,
"learning_rate": 3.7631578947368426e-06,
"loss": 0.1189,
"step": 6425
},
{
"epoch": 2.232606438213915,
"grad_norm": 2.2706921100616455,
"learning_rate": 3.736842105263158e-06,
"loss": 0.1134,
"step": 6450
},
{
"epoch": 2.2412599515403255,
"grad_norm": 3.229358196258545,
"learning_rate": 3.710526315789474e-06,
"loss": 0.1154,
"step": 6475
},
{
"epoch": 2.249913464866736,
"grad_norm": 2.179197072982788,
"learning_rate": 3.6842105263157896e-06,
"loss": 0.0966,
"step": 6500
},
{
"epoch": 2.249913464866736,
"eval_loss": 0.20438149571418762,
"eval_runtime": 8615.3588,
"eval_samples_per_second": 1.192,
"eval_steps_per_second": 0.075,
"eval_wer": 16.619876960402046,
"step": 6500
},
{
"epoch": 2.2585669781931466,
"grad_norm": 2.860914707183838,
"learning_rate": 3.657894736842106e-06,
"loss": 0.1083,
"step": 6525
},
{
"epoch": 2.267220491519557,
"grad_norm": 3.0490429401397705,
"learning_rate": 3.6315789473684217e-06,
"loss": 0.1068,
"step": 6550
},
{
"epoch": 2.2758740048459676,
"grad_norm": 3.8441545963287354,
"learning_rate": 3.605263157894737e-06,
"loss": 0.1125,
"step": 6575
},
{
"epoch": 2.284527518172378,
"grad_norm": 2.9149553775787354,
"learning_rate": 3.578947368421053e-06,
"loss": 0.1081,
"step": 6600
},
{
"epoch": 2.2931810314987886,
"grad_norm": 3.6281797885894775,
"learning_rate": 3.5526315789473687e-06,
"loss": 0.1116,
"step": 6625
},
{
"epoch": 2.301834544825199,
"grad_norm": 2.624938488006592,
"learning_rate": 3.5263157894736846e-06,
"loss": 0.1062,
"step": 6650
},
{
"epoch": 2.3104880581516096,
"grad_norm": 3.465491533279419,
"learning_rate": 3.5e-06,
"loss": 0.128,
"step": 6675
},
{
"epoch": 2.31914157147802,
"grad_norm": 3.024850606918335,
"learning_rate": 3.473684210526316e-06,
"loss": 0.1062,
"step": 6700
},
{
"epoch": 2.3277950848044306,
"grad_norm": 3.088701009750366,
"learning_rate": 3.447368421052632e-06,
"loss": 0.1155,
"step": 6725
},
{
"epoch": 2.336448598130841,
"grad_norm": 2.8708367347717285,
"learning_rate": 3.421052631578948e-06,
"loss": 0.1079,
"step": 6750
},
{
"epoch": 2.3451021114572517,
"grad_norm": 1.946626901626587,
"learning_rate": 3.3947368421052636e-06,
"loss": 0.1072,
"step": 6775
},
{
"epoch": 2.353755624783662,
"grad_norm": 3.3041462898254395,
"learning_rate": 3.368421052631579e-06,
"loss": 0.1104,
"step": 6800
},
{
"epoch": 2.3624091381100727,
"grad_norm": 2.6233861446380615,
"learning_rate": 3.342105263157895e-06,
"loss": 0.1075,
"step": 6825
},
{
"epoch": 2.371062651436483,
"grad_norm": 2.8356857299804688,
"learning_rate": 3.3157894736842107e-06,
"loss": 0.1058,
"step": 6850
},
{
"epoch": 2.3797161647628937,
"grad_norm": 2.9162681102752686,
"learning_rate": 3.289473684210527e-06,
"loss": 0.122,
"step": 6875
},
{
"epoch": 2.3883696780893042,
"grad_norm": 3.749187707901001,
"learning_rate": 3.2631578947368423e-06,
"loss": 0.1136,
"step": 6900
},
{
"epoch": 2.3970231914157147,
"grad_norm": 2.6137099266052246,
"learning_rate": 3.236842105263158e-06,
"loss": 0.1087,
"step": 6925
},
{
"epoch": 2.4056767047421252,
"grad_norm": 2.7214744091033936,
"learning_rate": 3.210526315789474e-06,
"loss": 0.103,
"step": 6950
},
{
"epoch": 2.4143302180685358,
"grad_norm": 2.98718523979187,
"learning_rate": 3.1842105263157898e-06,
"loss": 0.1125,
"step": 6975
},
{
"epoch": 2.4229837313949463,
"grad_norm": 2.834648609161377,
"learning_rate": 3.157894736842105e-06,
"loss": 0.1129,
"step": 7000
},
{
"epoch": 2.4229837313949463,
"eval_loss": 0.20139536261558533,
"eval_runtime": 8648.9738,
"eval_samples_per_second": 1.188,
"eval_steps_per_second": 0.074,
"eval_wer": 17.110302400138636,
"step": 7000
},
{
"epoch": 2.431637244721357,
"grad_norm": 3.4440181255340576,
"learning_rate": 3.131578947368421e-06,
"loss": 0.1141,
"step": 7025
},
{
"epoch": 2.4402907580477673,
"grad_norm": 3.478074550628662,
"learning_rate": 3.1052631578947372e-06,
"loss": 0.1031,
"step": 7050
},
{
"epoch": 2.448944271374178,
"grad_norm": 2.797724485397339,
"learning_rate": 3.078947368421053e-06,
"loss": 0.1094,
"step": 7075
},
{
"epoch": 2.4575977847005883,
"grad_norm": 1.8929002285003662,
"learning_rate": 3.052631578947369e-06,
"loss": 0.1097,
"step": 7100
},
{
"epoch": 2.466251298026999,
"grad_norm": 3.516230583190918,
"learning_rate": 3.0263157894736843e-06,
"loss": 0.1167,
"step": 7125
},
{
"epoch": 2.4749048113534093,
"grad_norm": 3.7443478107452393,
"learning_rate": 3e-06,
"loss": 0.1037,
"step": 7150
},
{
"epoch": 2.48355832467982,
"grad_norm": 2.543609142303467,
"learning_rate": 2.973684210526316e-06,
"loss": 0.1074,
"step": 7175
},
{
"epoch": 2.4922118380062304,
"grad_norm": 3.233546495437622,
"learning_rate": 2.9473684210526317e-06,
"loss": 0.1097,
"step": 7200
},
{
"epoch": 2.5008653513326413,
"grad_norm": 2.6485321521759033,
"learning_rate": 2.921052631578948e-06,
"loss": 0.1166,
"step": 7225
},
{
"epoch": 2.509518864659052,
"grad_norm": 2.249458074569702,
"learning_rate": 2.8947368421052634e-06,
"loss": 0.1122,
"step": 7250
},
{
"epoch": 2.5181723779854623,
"grad_norm": 3.3715906143188477,
"learning_rate": 2.868421052631579e-06,
"loss": 0.1171,
"step": 7275
},
{
"epoch": 2.526825891311873,
"grad_norm": 2.5565547943115234,
"learning_rate": 2.842105263157895e-06,
"loss": 0.1081,
"step": 7300
},
{
"epoch": 2.5354794046382834,
"grad_norm": 3.1583316326141357,
"learning_rate": 2.815789473684211e-06,
"loss": 0.1048,
"step": 7325
},
{
"epoch": 2.544132917964694,
"grad_norm": 3.302534580230713,
"learning_rate": 2.789473684210526e-06,
"loss": 0.1184,
"step": 7350
},
{
"epoch": 2.5527864312911044,
"grad_norm": 3.553318738937378,
"learning_rate": 2.7631578947368424e-06,
"loss": 0.1151,
"step": 7375
},
{
"epoch": 2.561439944617515,
"grad_norm": 2.6962010860443115,
"learning_rate": 2.7368421052631583e-06,
"loss": 0.1201,
"step": 7400
},
{
"epoch": 2.5700934579439254,
"grad_norm": 2.545358657836914,
"learning_rate": 2.710526315789474e-06,
"loss": 0.1273,
"step": 7425
},
{
"epoch": 2.578746971270336,
"grad_norm": 2.1197948455810547,
"learning_rate": 2.68421052631579e-06,
"loss": 0.0947,
"step": 7450
},
{
"epoch": 2.5874004845967464,
"grad_norm": 1.732006311416626,
"learning_rate": 2.6578947368421053e-06,
"loss": 0.1079,
"step": 7475
},
{
"epoch": 2.596053997923157,
"grad_norm": 2.9386560916900635,
"learning_rate": 2.631578947368421e-06,
"loss": 0.1065,
"step": 7500
},
{
"epoch": 2.596053997923157,
"eval_loss": 0.1983751654624939,
"eval_runtime": 8607.5634,
"eval_samples_per_second": 1.193,
"eval_steps_per_second": 0.075,
"eval_wer": 16.488172602027554,
"step": 7500
},
{
"epoch": 2.6047075112495675,
"grad_norm": 3.1988844871520996,
"learning_rate": 2.605263157894737e-06,
"loss": 0.1065,
"step": 7525
},
{
"epoch": 2.613361024575978,
"grad_norm": 2.8446412086486816,
"learning_rate": 2.578947368421053e-06,
"loss": 0.1175,
"step": 7550
},
{
"epoch": 2.6220145379023885,
"grad_norm": 3.071406364440918,
"learning_rate": 2.552631578947369e-06,
"loss": 0.0999,
"step": 7575
},
{
"epoch": 2.630668051228799,
"grad_norm": 2.666354179382324,
"learning_rate": 2.5263157894736844e-06,
"loss": 0.1036,
"step": 7600
},
{
"epoch": 2.6393215645552095,
"grad_norm": 2.845916271209717,
"learning_rate": 2.5e-06,
"loss": 0.1033,
"step": 7625
},
{
"epoch": 2.64797507788162,
"grad_norm": 2.97814679145813,
"learning_rate": 2.473684210526316e-06,
"loss": 0.1025,
"step": 7650
},
{
"epoch": 2.6566285912080305,
"grad_norm": 2.5824403762817383,
"learning_rate": 2.447368421052632e-06,
"loss": 0.1048,
"step": 7675
},
{
"epoch": 2.665282104534441,
"grad_norm": 2.1139883995056152,
"learning_rate": 2.4210526315789477e-06,
"loss": 0.1047,
"step": 7700
},
{
"epoch": 2.6739356178608515,
"grad_norm": 2.800978183746338,
"learning_rate": 2.3947368421052635e-06,
"loss": 0.1184,
"step": 7725
},
{
"epoch": 2.682589131187262,
"grad_norm": 3.0786638259887695,
"learning_rate": 2.368421052631579e-06,
"loss": 0.1286,
"step": 7750
},
{
"epoch": 2.6912426445136726,
"grad_norm": 2.917689085006714,
"learning_rate": 2.342105263157895e-06,
"loss": 0.0988,
"step": 7775
},
{
"epoch": 2.699896157840083,
"grad_norm": 2.986503839492798,
"learning_rate": 2.3157894736842105e-06,
"loss": 0.1074,
"step": 7800
},
{
"epoch": 2.7085496711664936,
"grad_norm": 2.5001847743988037,
"learning_rate": 2.2894736842105263e-06,
"loss": 0.1058,
"step": 7825
},
{
"epoch": 2.717203184492904,
"grad_norm": 3.5014684200286865,
"learning_rate": 2.2631578947368426e-06,
"loss": 0.1094,
"step": 7850
},
{
"epoch": 2.7258566978193146,
"grad_norm": 3.0983262062072754,
"learning_rate": 2.236842105263158e-06,
"loss": 0.1079,
"step": 7875
},
{
"epoch": 2.734510211145725,
"grad_norm": 2.922757625579834,
"learning_rate": 2.2105263157894738e-06,
"loss": 0.1135,
"step": 7900
},
{
"epoch": 2.7431637244721356,
"grad_norm": 2.354649305343628,
"learning_rate": 2.1842105263157896e-06,
"loss": 0.1145,
"step": 7925
},
{
"epoch": 2.751817237798546,
"grad_norm": 3.7237930297851562,
"learning_rate": 2.1578947368421054e-06,
"loss": 0.098,
"step": 7950
},
{
"epoch": 2.7604707511249567,
"grad_norm": 3.361809492111206,
"learning_rate": 2.1315789473684212e-06,
"loss": 0.1108,
"step": 7975
},
{
"epoch": 2.769124264451367,
"grad_norm": 2.6860949993133545,
"learning_rate": 2.105263157894737e-06,
"loss": 0.1075,
"step": 8000
},
{
"epoch": 2.769124264451367,
"eval_loss": 0.19574593007564545,
"eval_runtime": 8625.7946,
"eval_samples_per_second": 1.191,
"eval_steps_per_second": 0.074,
"eval_wer": 16.583484966640672,
"step": 8000
},
{
"epoch": 2.7777777777777777,
"grad_norm": 2.503368616104126,
"learning_rate": 2.078947368421053e-06,
"loss": 0.0999,
"step": 8025
},
{
"epoch": 2.786431291104188,
"grad_norm": 2.762155055999756,
"learning_rate": 2.0526315789473687e-06,
"loss": 0.1133,
"step": 8050
},
{
"epoch": 2.7950848044305987,
"grad_norm": 3.162900686264038,
"learning_rate": 2.026315789473684e-06,
"loss": 0.1208,
"step": 8075
},
{
"epoch": 2.803738317757009,
"grad_norm": 2.3575284481048584,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.1011,
"step": 8100
},
{
"epoch": 2.8123918310834197,
"grad_norm": 3.4756760597229004,
"learning_rate": 1.973684210526316e-06,
"loss": 0.1095,
"step": 8125
},
{
"epoch": 2.8210453444098302,
"grad_norm": 2.538372039794922,
"learning_rate": 1.9473684210526315e-06,
"loss": 0.1069,
"step": 8150
},
{
"epoch": 2.8296988577362407,
"grad_norm": 2.2625138759613037,
"learning_rate": 1.9210526315789474e-06,
"loss": 0.1065,
"step": 8175
},
{
"epoch": 2.8383523710626513,
"grad_norm": 2.7284586429595947,
"learning_rate": 1.8947368421052634e-06,
"loss": 0.1105,
"step": 8200
},
{
"epoch": 2.8470058843890618,
"grad_norm": 2.6115376949310303,
"learning_rate": 1.868421052631579e-06,
"loss": 0.1035,
"step": 8225
},
{
"epoch": 2.8556593977154723,
"grad_norm": 2.6199817657470703,
"learning_rate": 1.8421052631578948e-06,
"loss": 0.1224,
"step": 8250
},
{
"epoch": 2.864312911041883,
"grad_norm": 3.060654640197754,
"learning_rate": 1.8157894736842109e-06,
"loss": 0.1027,
"step": 8275
},
{
"epoch": 2.8729664243682933,
"grad_norm": 3.6875500679016113,
"learning_rate": 1.7894736842105265e-06,
"loss": 0.0934,
"step": 8300
},
{
"epoch": 2.881619937694704,
"grad_norm": 2.232487440109253,
"learning_rate": 1.7631578947368423e-06,
"loss": 0.0972,
"step": 8325
},
{
"epoch": 2.8902734510211143,
"grad_norm": 3.0473804473876953,
"learning_rate": 1.736842105263158e-06,
"loss": 0.1013,
"step": 8350
},
{
"epoch": 2.898926964347525,
"grad_norm": 3.049717903137207,
"learning_rate": 1.710526315789474e-06,
"loss": 0.1024,
"step": 8375
},
{
"epoch": 2.9075804776739353,
"grad_norm": 3.389495849609375,
"learning_rate": 1.6842105263157895e-06,
"loss": 0.1114,
"step": 8400
},
{
"epoch": 2.9162339910003463,
"grad_norm": 2.7483088970184326,
"learning_rate": 1.6578947368421053e-06,
"loss": 0.1103,
"step": 8425
},
{
"epoch": 2.924887504326757,
"grad_norm": 2.98256778717041,
"learning_rate": 1.6315789473684212e-06,
"loss": 0.1131,
"step": 8450
},
{
"epoch": 2.9335410176531673,
"grad_norm": 3.0447702407836914,
"learning_rate": 1.605263157894737e-06,
"loss": 0.1031,
"step": 8475
},
{
"epoch": 2.942194530979578,
"grad_norm": 2.4080259799957275,
"learning_rate": 1.5789473684210526e-06,
"loss": 0.0992,
"step": 8500
},
{
"epoch": 2.942194530979578,
"eval_loss": 0.19420863687992096,
"eval_runtime": 8629.0493,
"eval_samples_per_second": 1.19,
"eval_steps_per_second": 0.074,
"eval_wer": 16.375530716575685,
"step": 8500
},
{
"epoch": 2.9508480443059883,
"grad_norm": 2.4957826137542725,
"learning_rate": 1.5526315789473686e-06,
"loss": 0.0983,
"step": 8525
},
{
"epoch": 2.959501557632399,
"grad_norm": 2.019061326980591,
"learning_rate": 1.5263157894736844e-06,
"loss": 0.099,
"step": 8550
},
{
"epoch": 2.9681550709588094,
"grad_norm": 3.2875280380249023,
"learning_rate": 1.5e-06,
"loss": 0.1051,
"step": 8575
},
{
"epoch": 2.97680858428522,
"grad_norm": 2.705897092819214,
"learning_rate": 1.4736842105263159e-06,
"loss": 0.1033,
"step": 8600
},
{
"epoch": 2.9854620976116304,
"grad_norm": 2.27734375,
"learning_rate": 1.4473684210526317e-06,
"loss": 0.1075,
"step": 8625
},
{
"epoch": 2.994115610938041,
"grad_norm": 3.100257635116577,
"learning_rate": 1.4210526315789475e-06,
"loss": 0.11,
"step": 8650
},
{
"epoch": 3.0027691242644514,
"grad_norm": 2.2938201427459717,
"learning_rate": 1.394736842105263e-06,
"loss": 0.0875,
"step": 8675
},
{
"epoch": 3.011422637590862,
"grad_norm": 1.6862682104110718,
"learning_rate": 1.3684210526315791e-06,
"loss": 0.0785,
"step": 8700
},
{
"epoch": 3.0200761509172724,
"grad_norm": 2.7323806285858154,
"learning_rate": 1.342105263157895e-06,
"loss": 0.0795,
"step": 8725
},
{
"epoch": 3.028729664243683,
"grad_norm": 2.4621291160583496,
"learning_rate": 1.3157894736842106e-06,
"loss": 0.0693,
"step": 8750
},
{
"epoch": 3.0373831775700935,
"grad_norm": 2.2543725967407227,
"learning_rate": 1.2894736842105266e-06,
"loss": 0.0707,
"step": 8775
},
{
"epoch": 3.046036690896504,
"grad_norm": 2.0178897380828857,
"learning_rate": 1.2631578947368422e-06,
"loss": 0.0787,
"step": 8800
},
{
"epoch": 3.0546902042229145,
"grad_norm": 1.9907864332199097,
"learning_rate": 1.236842105263158e-06,
"loss": 0.075,
"step": 8825
},
{
"epoch": 3.063343717549325,
"grad_norm": 2.3367834091186523,
"learning_rate": 1.2105263157894738e-06,
"loss": 0.0789,
"step": 8850
},
{
"epoch": 3.0719972308757355,
"grad_norm": 2.4846036434173584,
"learning_rate": 1.1842105263157894e-06,
"loss": 0.0722,
"step": 8875
},
{
"epoch": 3.080650744202146,
"grad_norm": 2.7100768089294434,
"learning_rate": 1.1578947368421053e-06,
"loss": 0.0724,
"step": 8900
},
{
"epoch": 3.0893042575285565,
"grad_norm": 2.0488345623016357,
"learning_rate": 1.1315789473684213e-06,
"loss": 0.0818,
"step": 8925
},
{
"epoch": 3.097957770854967,
"grad_norm": 2.2149784564971924,
"learning_rate": 1.1052631578947369e-06,
"loss": 0.0753,
"step": 8950
},
{
"epoch": 3.1066112841813776,
"grad_norm": 1.7441498041152954,
"learning_rate": 1.0789473684210527e-06,
"loss": 0.0658,
"step": 8975
},
{
"epoch": 3.115264797507788,
"grad_norm": 2.315944194793701,
"learning_rate": 1.0526315789473685e-06,
"loss": 0.0687,
"step": 9000
},
{
"epoch": 3.115264797507788,
"eval_loss": 0.2007349729537964,
"eval_runtime": 8593.9573,
"eval_samples_per_second": 1.195,
"eval_steps_per_second": 0.075,
"eval_wer": 16.449181180140368,
"step": 9000
},
{
"epoch": 3.1239183108341986,
"grad_norm": 2.1374213695526123,
"learning_rate": 1.0263157894736843e-06,
"loss": 0.0678,
"step": 9025
},
{
"epoch": 3.132571824160609,
"grad_norm": 2.6714038848876953,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0726,
"step": 9050
},
{
"epoch": 3.1412253374870196,
"grad_norm": 2.326164484024048,
"learning_rate": 9.736842105263158e-07,
"loss": 0.0737,
"step": 9075
},
{
"epoch": 3.14987885081343,
"grad_norm": 1.5465072393417358,
"learning_rate": 9.473684210526317e-07,
"loss": 0.0699,
"step": 9100
},
{
"epoch": 3.1585323641398406,
"grad_norm": 1.9387298822402954,
"learning_rate": 9.210526315789474e-07,
"loss": 0.0707,
"step": 9125
},
{
"epoch": 3.167185877466251,
"grad_norm": 2.333085775375366,
"learning_rate": 8.947368421052632e-07,
"loss": 0.0679,
"step": 9150
},
{
"epoch": 3.1758393907926616,
"grad_norm": 1.9540473222732544,
"learning_rate": 8.68421052631579e-07,
"loss": 0.0683,
"step": 9175
},
{
"epoch": 3.184492904119072,
"grad_norm": 2.5576722621917725,
"learning_rate": 8.421052631578948e-07,
"loss": 0.0719,
"step": 9200
},
{
"epoch": 3.1931464174454827,
"grad_norm": 2.0068089962005615,
"learning_rate": 8.157894736842106e-07,
"loss": 0.0853,
"step": 9225
},
{
"epoch": 3.2017999307718936,
"grad_norm": 2.2162768840789795,
"learning_rate": 7.894736842105263e-07,
"loss": 0.0683,
"step": 9250
},
{
"epoch": 3.210453444098304,
"grad_norm": 1.776559829711914,
"learning_rate": 7.631578947368422e-07,
"loss": 0.0798,
"step": 9275
},
{
"epoch": 3.2191069574247146,
"grad_norm": 1.4732505083084106,
"learning_rate": 7.368421052631579e-07,
"loss": 0.0726,
"step": 9300
},
{
"epoch": 3.227760470751125,
"grad_norm": 2.921454906463623,
"learning_rate": 7.105263157894737e-07,
"loss": 0.0717,
"step": 9325
},
{
"epoch": 3.2364139840775357,
"grad_norm": 2.061314344406128,
"learning_rate": 6.842105263157896e-07,
"loss": 0.0694,
"step": 9350
},
{
"epoch": 3.245067497403946,
"grad_norm": 2.4505109786987305,
"learning_rate": 6.578947368421053e-07,
"loss": 0.0718,
"step": 9375
},
{
"epoch": 3.2537210107303567,
"grad_norm": 2.636258840560913,
"learning_rate": 6.315789473684211e-07,
"loss": 0.0714,
"step": 9400
},
{
"epoch": 3.262374524056767,
"grad_norm": 2.4016501903533936,
"learning_rate": 6.052631578947369e-07,
"loss": 0.0821,
"step": 9425
},
{
"epoch": 3.2710280373831777,
"grad_norm": 2.0783393383026123,
"learning_rate": 5.789473684210526e-07,
"loss": 0.0748,
"step": 9450
},
{
"epoch": 3.2796815507095882,
"grad_norm": 3.0884315967559814,
"learning_rate": 5.526315789473684e-07,
"loss": 0.0833,
"step": 9475
},
{
"epoch": 3.2883350640359987,
"grad_norm": 2.3851513862609863,
"learning_rate": 5.263157894736843e-07,
"loss": 0.0722,
"step": 9500
},
{
"epoch": 3.2883350640359987,
"eval_loss": 0.20027859508991241,
"eval_runtime": 8622.3221,
"eval_samples_per_second": 1.191,
"eval_steps_per_second": 0.074,
"eval_wer": 16.265488259249633,
"step": 9500
},
{
"epoch": 3.2969885773624092,
"grad_norm": 2.619279146194458,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0707,
"step": 9525
},
{
"epoch": 3.3056420906888198,
"grad_norm": 1.975462794303894,
"learning_rate": 4.7368421052631585e-07,
"loss": 0.0696,
"step": 9550
},
{
"epoch": 3.3142956040152303,
"grad_norm": 2.281332015991211,
"learning_rate": 4.473684210526316e-07,
"loss": 0.0698,
"step": 9575
},
{
"epoch": 3.322949117341641,
"grad_norm": 2.048888683319092,
"learning_rate": 4.210526315789474e-07,
"loss": 0.0712,
"step": 9600
},
{
"epoch": 3.3316026306680513,
"grad_norm": 2.216397762298584,
"learning_rate": 3.9473684210526315e-07,
"loss": 0.0756,
"step": 9625
},
{
"epoch": 3.340256143994462,
"grad_norm": 3.0520379543304443,
"learning_rate": 3.6842105263157896e-07,
"loss": 0.0682,
"step": 9650
},
{
"epoch": 3.3489096573208723,
"grad_norm": 3.0853352546691895,
"learning_rate": 3.421052631578948e-07,
"loss": 0.0803,
"step": 9675
},
{
"epoch": 3.357563170647283,
"grad_norm": 2.6923489570617676,
"learning_rate": 3.1578947368421055e-07,
"loss": 0.0699,
"step": 9700
},
{
"epoch": 3.3662166839736933,
"grad_norm": 1.5350950956344604,
"learning_rate": 2.894736842105263e-07,
"loss": 0.0641,
"step": 9725
},
{
"epoch": 3.374870197300104,
"grad_norm": 1.8158336877822876,
"learning_rate": 2.6315789473684213e-07,
"loss": 0.0742,
"step": 9750
},
{
"epoch": 3.3835237106265144,
"grad_norm": 2.268543243408203,
"learning_rate": 2.3684210526315792e-07,
"loss": 0.0812,
"step": 9775
},
{
"epoch": 3.392177223952925,
"grad_norm": 2.02999210357666,
"learning_rate": 2.105263157894737e-07,
"loss": 0.0745,
"step": 9800
},
{
"epoch": 3.4008307372793354,
"grad_norm": 2.2966854572296143,
"learning_rate": 1.8421052631578948e-07,
"loss": 0.0685,
"step": 9825
},
{
"epoch": 3.409484250605746,
"grad_norm": 2.4790639877319336,
"learning_rate": 1.5789473684210527e-07,
"loss": 0.0695,
"step": 9850
},
{
"epoch": 3.4181377639321564,
"grad_norm": 2.1657919883728027,
"learning_rate": 1.3157894736842107e-07,
"loss": 0.0742,
"step": 9875
},
{
"epoch": 3.426791277258567,
"grad_norm": 1.6919013261795044,
"learning_rate": 1.0526315789473685e-07,
"loss": 0.0641,
"step": 9900
},
{
"epoch": 3.4354447905849774,
"grad_norm": 2.441950798034668,
"learning_rate": 7.894736842105264e-08,
"loss": 0.0701,
"step": 9925
},
{
"epoch": 3.444098303911388,
"grad_norm": 1.9817427396774292,
"learning_rate": 5.263157894736842e-08,
"loss": 0.0677,
"step": 9950
},
{
"epoch": 3.4527518172377984,
"grad_norm": 1.978274941444397,
"learning_rate": 2.631578947368421e-08,
"loss": 0.073,
"step": 9975
},
{
"epoch": 3.461405330564209,
"grad_norm": 2.204577684402466,
"learning_rate": 0.0,
"loss": 0.0713,
"step": 10000
},
{
"epoch": 3.461405330564209,
"eval_loss": 0.19991621375083923,
"eval_runtime": 8599.1571,
"eval_samples_per_second": 1.195,
"eval_steps_per_second": 0.075,
"eval_wer": 16.21523264881726,
"step": 10000
}
],
"logging_steps": 25,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.455843688448e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}