whisper-large-v2-ca-15awr / trainer_state.json
wanasash's picture
Training in progress, step 1000
625d0f9 verified
raw
history blame
36.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 23.64066193853428,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1182033096926714,
"grad_norm": 9.473404884338379,
"learning_rate": 4.800000000000001e-07,
"loss": 1.5126,
"step": 25
},
{
"epoch": 0.2364066193853428,
"grad_norm": 4.571593761444092,
"learning_rate": 9.800000000000001e-07,
"loss": 1.2149,
"step": 50
},
{
"epoch": 0.3546099290780142,
"grad_norm": 7.452127933502197,
"learning_rate": 1.48e-06,
"loss": 0.9173,
"step": 75
},
{
"epoch": 0.4728132387706856,
"grad_norm": 4.357412338256836,
"learning_rate": 1.98e-06,
"loss": 0.7886,
"step": 100
},
{
"epoch": 0.5910165484633569,
"grad_norm": 4.547176837921143,
"learning_rate": 2.4800000000000004e-06,
"loss": 0.7374,
"step": 125
},
{
"epoch": 0.7092198581560284,
"grad_norm": 5.427709102630615,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.7146,
"step": 150
},
{
"epoch": 0.8274231678486997,
"grad_norm": 3.91564679145813,
"learning_rate": 3.48e-06,
"loss": 0.6503,
"step": 175
},
{
"epoch": 0.9456264775413712,
"grad_norm": 4.455580711364746,
"learning_rate": 3.980000000000001e-06,
"loss": 0.6658,
"step": 200
},
{
"epoch": 1.0638297872340425,
"grad_norm": 3.6917192935943604,
"learning_rate": 4.48e-06,
"loss": 0.5825,
"step": 225
},
{
"epoch": 1.1820330969267139,
"grad_norm": 4.471068382263184,
"learning_rate": 4.980000000000001e-06,
"loss": 0.5309,
"step": 250
},
{
"epoch": 1.3002364066193852,
"grad_norm": 3.2161197662353516,
"learning_rate": 5.480000000000001e-06,
"loss": 0.5326,
"step": 275
},
{
"epoch": 1.4184397163120568,
"grad_norm": 5.081483364105225,
"learning_rate": 5.98e-06,
"loss": 0.5245,
"step": 300
},
{
"epoch": 1.5366430260047281,
"grad_norm": 4.846170902252197,
"learning_rate": 6.460000000000001e-06,
"loss": 0.5414,
"step": 325
},
{
"epoch": 1.6548463356973995,
"grad_norm": 4.416396617889404,
"learning_rate": 6.96e-06,
"loss": 0.5286,
"step": 350
},
{
"epoch": 1.773049645390071,
"grad_norm": 4.048226356506348,
"learning_rate": 7.4600000000000006e-06,
"loss": 0.5296,
"step": 375
},
{
"epoch": 1.8912529550827424,
"grad_norm": 3.8953003883361816,
"learning_rate": 7.960000000000002e-06,
"loss": 0.5342,
"step": 400
},
{
"epoch": 2.0094562647754137,
"grad_norm": 2.8048012256622314,
"learning_rate": 8.46e-06,
"loss": 0.5194,
"step": 425
},
{
"epoch": 2.127659574468085,
"grad_norm": 3.505836009979248,
"learning_rate": 8.96e-06,
"loss": 0.3554,
"step": 450
},
{
"epoch": 2.2458628841607564,
"grad_norm": 3.4311647415161133,
"learning_rate": 9.460000000000001e-06,
"loss": 0.364,
"step": 475
},
{
"epoch": 2.3640661938534278,
"grad_norm": 3.827054500579834,
"learning_rate": 9.960000000000001e-06,
"loss": 0.3648,
"step": 500
},
{
"epoch": 2.482269503546099,
"grad_norm": 4.961852550506592,
"learning_rate": 9.94888888888889e-06,
"loss": 0.3742,
"step": 525
},
{
"epoch": 2.6004728132387704,
"grad_norm": 3.1408016681671143,
"learning_rate": 9.893333333333334e-06,
"loss": 0.3726,
"step": 550
},
{
"epoch": 2.7186761229314422,
"grad_norm": 3.81847882270813,
"learning_rate": 9.837777777777778e-06,
"loss": 0.3769,
"step": 575
},
{
"epoch": 2.8368794326241136,
"grad_norm": 3.568127393722534,
"learning_rate": 9.782222222222222e-06,
"loss": 0.3835,
"step": 600
},
{
"epoch": 2.955082742316785,
"grad_norm": 8.084869384765625,
"learning_rate": 9.726666666666668e-06,
"loss": 0.389,
"step": 625
},
{
"epoch": 3.0732860520094563,
"grad_norm": 3.035639762878418,
"learning_rate": 9.671111111111112e-06,
"loss": 0.2804,
"step": 650
},
{
"epoch": 3.1914893617021276,
"grad_norm": 2.580465078353882,
"learning_rate": 9.615555555555558e-06,
"loss": 0.241,
"step": 675
},
{
"epoch": 3.309692671394799,
"grad_norm": 3.6414577960968018,
"learning_rate": 9.56e-06,
"loss": 0.2326,
"step": 700
},
{
"epoch": 3.4278959810874703,
"grad_norm": 2.9789085388183594,
"learning_rate": 9.504444444444446e-06,
"loss": 0.2302,
"step": 725
},
{
"epoch": 3.546099290780142,
"grad_norm": 6.875185966491699,
"learning_rate": 9.44888888888889e-06,
"loss": 0.2515,
"step": 750
},
{
"epoch": 3.664302600472813,
"grad_norm": 3.034479856491089,
"learning_rate": 9.393333333333334e-06,
"loss": 0.2381,
"step": 775
},
{
"epoch": 3.7825059101654848,
"grad_norm": 3.64566969871521,
"learning_rate": 9.33777777777778e-06,
"loss": 0.2339,
"step": 800
},
{
"epoch": 3.900709219858156,
"grad_norm": 4.0959696769714355,
"learning_rate": 9.282222222222222e-06,
"loss": 0.2456,
"step": 825
},
{
"epoch": 4.0189125295508275,
"grad_norm": 2.0153396129608154,
"learning_rate": 9.226666666666668e-06,
"loss": 0.2384,
"step": 850
},
{
"epoch": 4.137115839243499,
"grad_norm": 2.4814517498016357,
"learning_rate": 9.171111111111112e-06,
"loss": 0.1472,
"step": 875
},
{
"epoch": 4.25531914893617,
"grad_norm": 3.0185892581939697,
"learning_rate": 9.115555555555556e-06,
"loss": 0.1482,
"step": 900
},
{
"epoch": 4.373522458628842,
"grad_norm": 2.817070722579956,
"learning_rate": 9.060000000000001e-06,
"loss": 0.1496,
"step": 925
},
{
"epoch": 4.491725768321513,
"grad_norm": 2.3127124309539795,
"learning_rate": 9.004444444444445e-06,
"loss": 0.1441,
"step": 950
},
{
"epoch": 4.609929078014185,
"grad_norm": 2.191143274307251,
"learning_rate": 8.94888888888889e-06,
"loss": 0.1474,
"step": 975
},
{
"epoch": 4.7281323877068555,
"grad_norm": 2.4471395015716553,
"learning_rate": 8.893333333333333e-06,
"loss": 0.1503,
"step": 1000
},
{
"epoch": 4.7281323877068555,
"eval_loss": 0.6543964743614197,
"eval_runtime": 532.706,
"eval_samples_per_second": 2.405,
"eval_steps_per_second": 0.152,
"eval_wer": 0.4245213998292891,
"step": 1000
},
{
"epoch": 4.846335697399527,
"grad_norm": 2.7800285816192627,
"learning_rate": 8.83777777777778e-06,
"loss": 0.1549,
"step": 1025
},
{
"epoch": 4.964539007092198,
"grad_norm": 5.495487689971924,
"learning_rate": 8.782222222222223e-06,
"loss": 0.1529,
"step": 1050
},
{
"epoch": 5.08274231678487,
"grad_norm": 2.151169538497925,
"learning_rate": 8.726666666666667e-06,
"loss": 0.1009,
"step": 1075
},
{
"epoch": 5.200945626477542,
"grad_norm": 2.556049108505249,
"learning_rate": 8.671111111111113e-06,
"loss": 0.0838,
"step": 1100
},
{
"epoch": 5.319148936170213,
"grad_norm": 2.955305576324463,
"learning_rate": 8.615555555555555e-06,
"loss": 0.0866,
"step": 1125
},
{
"epoch": 5.4373522458628845,
"grad_norm": 2.1313858032226562,
"learning_rate": 8.560000000000001e-06,
"loss": 0.0876,
"step": 1150
},
{
"epoch": 5.555555555555555,
"grad_norm": 2.003467559814453,
"learning_rate": 8.504444444444445e-06,
"loss": 0.0867,
"step": 1175
},
{
"epoch": 5.673758865248227,
"grad_norm": 2.9585061073303223,
"learning_rate": 8.448888888888889e-06,
"loss": 0.0937,
"step": 1200
},
{
"epoch": 5.791962174940898,
"grad_norm": 2.8424105644226074,
"learning_rate": 8.393333333333335e-06,
"loss": 0.0888,
"step": 1225
},
{
"epoch": 5.91016548463357,
"grad_norm": 3.2550556659698486,
"learning_rate": 8.337777777777777e-06,
"loss": 0.0964,
"step": 1250
},
{
"epoch": 6.028368794326241,
"grad_norm": 2.1639039516448975,
"learning_rate": 8.282222222222223e-06,
"loss": 0.083,
"step": 1275
},
{
"epoch": 6.1465721040189125,
"grad_norm": 10.045706748962402,
"learning_rate": 8.226666666666667e-06,
"loss": 0.0489,
"step": 1300
},
{
"epoch": 6.264775413711584,
"grad_norm": 0.9968127012252808,
"learning_rate": 8.171111111111113e-06,
"loss": 0.0569,
"step": 1325
},
{
"epoch": 6.382978723404255,
"grad_norm": 2.098369836807251,
"learning_rate": 8.115555555555557e-06,
"loss": 0.0553,
"step": 1350
},
{
"epoch": 6.501182033096927,
"grad_norm": 2.182260036468506,
"learning_rate": 8.06e-06,
"loss": 0.0514,
"step": 1375
},
{
"epoch": 6.619385342789598,
"grad_norm": 2.040424108505249,
"learning_rate": 8.004444444444445e-06,
"loss": 0.0534,
"step": 1400
},
{
"epoch": 6.73758865248227,
"grad_norm": 2.8347978591918945,
"learning_rate": 7.948888888888889e-06,
"loss": 0.0604,
"step": 1425
},
{
"epoch": 6.855791962174941,
"grad_norm": 2.2770578861236572,
"learning_rate": 7.893333333333335e-06,
"loss": 0.058,
"step": 1450
},
{
"epoch": 6.973995271867612,
"grad_norm": 2.0066399574279785,
"learning_rate": 7.837777777777779e-06,
"loss": 0.0571,
"step": 1475
},
{
"epoch": 7.092198581560283,
"grad_norm": 1.8423205614089966,
"learning_rate": 7.782222222222223e-06,
"loss": 0.0375,
"step": 1500
},
{
"epoch": 7.210401891252955,
"grad_norm": 1.2331498861312866,
"learning_rate": 7.726666666666667e-06,
"loss": 0.0352,
"step": 1525
},
{
"epoch": 7.328605200945627,
"grad_norm": 1.8632996082305908,
"learning_rate": 7.67111111111111e-06,
"loss": 0.0344,
"step": 1550
},
{
"epoch": 7.446808510638298,
"grad_norm": 2.3140857219696045,
"learning_rate": 7.6155555555555564e-06,
"loss": 0.0351,
"step": 1575
},
{
"epoch": 7.5650118203309695,
"grad_norm": 1.7188278436660767,
"learning_rate": 7.5600000000000005e-06,
"loss": 0.0383,
"step": 1600
},
{
"epoch": 7.68321513002364,
"grad_norm": 1.9774043560028076,
"learning_rate": 7.504444444444445e-06,
"loss": 0.0374,
"step": 1625
},
{
"epoch": 7.801418439716312,
"grad_norm": 2.773897171020508,
"learning_rate": 7.44888888888889e-06,
"loss": 0.0573,
"step": 1650
},
{
"epoch": 7.919621749408983,
"grad_norm": 2.1994612216949463,
"learning_rate": 7.393333333333333e-06,
"loss": 0.0434,
"step": 1675
},
{
"epoch": 8.037825059101655,
"grad_norm": 1.4192453622817993,
"learning_rate": 7.337777777777778e-06,
"loss": 0.0313,
"step": 1700
},
{
"epoch": 8.156028368794326,
"grad_norm": 2.99482798576355,
"learning_rate": 7.282222222222222e-06,
"loss": 0.0233,
"step": 1725
},
{
"epoch": 8.274231678486998,
"grad_norm": 1.1068618297576904,
"learning_rate": 7.226666666666667e-06,
"loss": 0.0232,
"step": 1750
},
{
"epoch": 8.39243498817967,
"grad_norm": 1.4327691793441772,
"learning_rate": 7.171111111111112e-06,
"loss": 0.0242,
"step": 1775
},
{
"epoch": 8.51063829787234,
"grad_norm": 2.041482925415039,
"learning_rate": 7.115555555555557e-06,
"loss": 0.0268,
"step": 1800
},
{
"epoch": 8.628841607565011,
"grad_norm": 1.097105860710144,
"learning_rate": 7.062222222222223e-06,
"loss": 0.0258,
"step": 1825
},
{
"epoch": 8.747044917257684,
"grad_norm": 1.367948055267334,
"learning_rate": 7.006666666666667e-06,
"loss": 0.0275,
"step": 1850
},
{
"epoch": 8.865248226950355,
"grad_norm": 1.6671417951583862,
"learning_rate": 6.951111111111112e-06,
"loss": 0.0272,
"step": 1875
},
{
"epoch": 8.983451536643026,
"grad_norm": 2.5331335067749023,
"learning_rate": 6.8955555555555565e-06,
"loss": 0.0265,
"step": 1900
},
{
"epoch": 9.101654846335697,
"grad_norm": 0.5550170540809631,
"learning_rate": 6.8400000000000014e-06,
"loss": 0.016,
"step": 1925
},
{
"epoch": 9.21985815602837,
"grad_norm": 0.7913989424705505,
"learning_rate": 6.784444444444445e-06,
"loss": 0.0173,
"step": 1950
},
{
"epoch": 9.33806146572104,
"grad_norm": 1.1123679876327515,
"learning_rate": 6.7288888888888895e-06,
"loss": 0.0178,
"step": 1975
},
{
"epoch": 9.456264775413711,
"grad_norm": 1.4334158897399902,
"learning_rate": 6.6733333333333335e-06,
"loss": 0.018,
"step": 2000
},
{
"epoch": 9.456264775413711,
"eval_loss": 0.8408699631690979,
"eval_runtime": 518.7544,
"eval_samples_per_second": 2.469,
"eval_steps_per_second": 0.156,
"eval_wer": 0.3931227899036703,
"step": 2000
},
{
"epoch": 9.574468085106384,
"grad_norm": 1.5506229400634766,
"learning_rate": 6.617777777777778e-06,
"loss": 0.0209,
"step": 2025
},
{
"epoch": 9.692671394799055,
"grad_norm": 1.2576079368591309,
"learning_rate": 6.562222222222223e-06,
"loss": 0.0176,
"step": 2050
},
{
"epoch": 9.810874704491725,
"grad_norm": 1.5241338014602661,
"learning_rate": 6.5066666666666665e-06,
"loss": 0.0202,
"step": 2075
},
{
"epoch": 9.929078014184396,
"grad_norm": 1.9363715648651123,
"learning_rate": 6.451111111111111e-06,
"loss": 0.0184,
"step": 2100
},
{
"epoch": 10.047281323877069,
"grad_norm": 1.6909509897232056,
"learning_rate": 6.395555555555556e-06,
"loss": 0.0146,
"step": 2125
},
{
"epoch": 10.16548463356974,
"grad_norm": 0.99550461769104,
"learning_rate": 6.34e-06,
"loss": 0.0128,
"step": 2150
},
{
"epoch": 10.28368794326241,
"grad_norm": 3.685783863067627,
"learning_rate": 6.284444444444445e-06,
"loss": 0.0126,
"step": 2175
},
{
"epoch": 10.401891252955084,
"grad_norm": 1.2055600881576538,
"learning_rate": 6.22888888888889e-06,
"loss": 0.0158,
"step": 2200
},
{
"epoch": 10.520094562647754,
"grad_norm": 0.9606339931488037,
"learning_rate": 6.173333333333333e-06,
"loss": 0.0138,
"step": 2225
},
{
"epoch": 10.638297872340425,
"grad_norm": 1.592624306678772,
"learning_rate": 6.117777777777778e-06,
"loss": 0.0148,
"step": 2250
},
{
"epoch": 10.756501182033096,
"grad_norm": 1.4008781909942627,
"learning_rate": 6.062222222222223e-06,
"loss": 0.0151,
"step": 2275
},
{
"epoch": 10.874704491725769,
"grad_norm": 0.9487536549568176,
"learning_rate": 6.006666666666667e-06,
"loss": 0.0158,
"step": 2300
},
{
"epoch": 10.99290780141844,
"grad_norm": 1.0289764404296875,
"learning_rate": 5.951111111111112e-06,
"loss": 0.013,
"step": 2325
},
{
"epoch": 11.11111111111111,
"grad_norm": 0.4679219722747803,
"learning_rate": 5.895555555555557e-06,
"loss": 0.0085,
"step": 2350
},
{
"epoch": 11.229314420803782,
"grad_norm": 0.2578885555267334,
"learning_rate": 5.84e-06,
"loss": 0.0085,
"step": 2375
},
{
"epoch": 11.347517730496454,
"grad_norm": 1.0958369970321655,
"learning_rate": 5.784444444444445e-06,
"loss": 0.0083,
"step": 2400
},
{
"epoch": 11.465721040189125,
"grad_norm": 0.5425341129302979,
"learning_rate": 5.72888888888889e-06,
"loss": 0.0098,
"step": 2425
},
{
"epoch": 11.583924349881796,
"grad_norm": 1.3698186874389648,
"learning_rate": 5.673333333333334e-06,
"loss": 0.0087,
"step": 2450
},
{
"epoch": 11.702127659574469,
"grad_norm": 0.35784247517585754,
"learning_rate": 5.617777777777779e-06,
"loss": 0.0102,
"step": 2475
},
{
"epoch": 11.82033096926714,
"grad_norm": 0.8136564493179321,
"learning_rate": 5.562222222222222e-06,
"loss": 0.0091,
"step": 2500
},
{
"epoch": 11.93853427895981,
"grad_norm": 0.8210328221321106,
"learning_rate": 5.506666666666667e-06,
"loss": 0.0091,
"step": 2525
},
{
"epoch": 12.056737588652481,
"grad_norm": 19.055444717407227,
"learning_rate": 5.451111111111112e-06,
"loss": 0.0072,
"step": 2550
},
{
"epoch": 12.174940898345154,
"grad_norm": 0.7317586541175842,
"learning_rate": 5.3955555555555565e-06,
"loss": 0.0095,
"step": 2575
},
{
"epoch": 12.293144208037825,
"grad_norm": 1.2949217557907104,
"learning_rate": 5.3400000000000005e-06,
"loss": 0.008,
"step": 2600
},
{
"epoch": 12.411347517730496,
"grad_norm": 1.124780297279358,
"learning_rate": 5.2844444444444454e-06,
"loss": 0.0065,
"step": 2625
},
{
"epoch": 12.529550827423169,
"grad_norm": 0.5692467093467712,
"learning_rate": 5.228888888888889e-06,
"loss": 0.0075,
"step": 2650
},
{
"epoch": 12.64775413711584,
"grad_norm": 1.309572458267212,
"learning_rate": 5.1733333333333335e-06,
"loss": 0.0079,
"step": 2675
},
{
"epoch": 12.76595744680851,
"grad_norm": 0.8301370739936829,
"learning_rate": 5.117777777777778e-06,
"loss": 0.0085,
"step": 2700
},
{
"epoch": 12.884160756501181,
"grad_norm": 0.9089380502700806,
"learning_rate": 5.062222222222222e-06,
"loss": 0.0074,
"step": 2725
},
{
"epoch": 13.002364066193854,
"grad_norm": 1.952169418334961,
"learning_rate": 5.006666666666667e-06,
"loss": 0.007,
"step": 2750
},
{
"epoch": 13.120567375886525,
"grad_norm": 1.1801737546920776,
"learning_rate": 4.951111111111111e-06,
"loss": 0.0055,
"step": 2775
},
{
"epoch": 13.238770685579196,
"grad_norm": 0.2363986074924469,
"learning_rate": 4.895555555555556e-06,
"loss": 0.0042,
"step": 2800
},
{
"epoch": 13.356973995271868,
"grad_norm": 0.2633114457130432,
"learning_rate": 4.84e-06,
"loss": 0.0057,
"step": 2825
},
{
"epoch": 13.47517730496454,
"grad_norm": 0.5577982664108276,
"learning_rate": 4.784444444444445e-06,
"loss": 0.006,
"step": 2850
},
{
"epoch": 13.59338061465721,
"grad_norm": 0.785844087600708,
"learning_rate": 4.728888888888889e-06,
"loss": 0.0066,
"step": 2875
},
{
"epoch": 13.711583924349881,
"grad_norm": 0.2809258699417114,
"learning_rate": 4.673333333333333e-06,
"loss": 0.0054,
"step": 2900
},
{
"epoch": 13.829787234042554,
"grad_norm": 0.6670119166374207,
"learning_rate": 4.617777777777778e-06,
"loss": 0.0051,
"step": 2925
},
{
"epoch": 13.947990543735225,
"grad_norm": 0.3410409688949585,
"learning_rate": 4.562222222222222e-06,
"loss": 0.0076,
"step": 2950
},
{
"epoch": 14.066193853427896,
"grad_norm": 0.5578156113624573,
"learning_rate": 4.506666666666667e-06,
"loss": 0.0035,
"step": 2975
},
{
"epoch": 14.184397163120567,
"grad_norm": 0.16467081010341644,
"learning_rate": 4.451111111111112e-06,
"loss": 0.0041,
"step": 3000
},
{
"epoch": 14.184397163120567,
"eval_loss": 0.9080753922462463,
"eval_runtime": 523.9162,
"eval_samples_per_second": 2.445,
"eval_steps_per_second": 0.155,
"eval_wer": 0.3811120595049384,
"step": 3000
},
{
"epoch": 14.30260047281324,
"grad_norm": 1.2722281217575073,
"learning_rate": 4.395555555555556e-06,
"loss": 0.0038,
"step": 3025
},
{
"epoch": 14.42080378250591,
"grad_norm": 0.44640254974365234,
"learning_rate": 4.34e-06,
"loss": 0.0038,
"step": 3050
},
{
"epoch": 14.539007092198581,
"grad_norm": 0.32442691922187805,
"learning_rate": 4.284444444444445e-06,
"loss": 0.0039,
"step": 3075
},
{
"epoch": 14.657210401891254,
"grad_norm": 0.6110165119171143,
"learning_rate": 4.228888888888889e-06,
"loss": 0.0037,
"step": 3100
},
{
"epoch": 14.775413711583925,
"grad_norm": 0.1621726006269455,
"learning_rate": 4.173333333333334e-06,
"loss": 0.0044,
"step": 3125
},
{
"epoch": 14.893617021276595,
"grad_norm": 0.11372427642345428,
"learning_rate": 4.117777777777779e-06,
"loss": 0.0033,
"step": 3150
},
{
"epoch": 15.011820330969266,
"grad_norm": 1.2040516138076782,
"learning_rate": 4.062222222222223e-06,
"loss": 0.0036,
"step": 3175
},
{
"epoch": 15.130023640661939,
"grad_norm": 0.10250318050384521,
"learning_rate": 4.006666666666667e-06,
"loss": 0.0025,
"step": 3200
},
{
"epoch": 15.24822695035461,
"grad_norm": 0.06473812460899353,
"learning_rate": 3.951111111111112e-06,
"loss": 0.002,
"step": 3225
},
{
"epoch": 15.36643026004728,
"grad_norm": 0.07791823148727417,
"learning_rate": 3.895555555555556e-06,
"loss": 0.0014,
"step": 3250
},
{
"epoch": 15.484633569739954,
"grad_norm": 0.10379495471715927,
"learning_rate": 3.8400000000000005e-06,
"loss": 0.0019,
"step": 3275
},
{
"epoch": 15.602836879432624,
"grad_norm": 0.39994242787361145,
"learning_rate": 3.784444444444445e-06,
"loss": 0.0022,
"step": 3300
},
{
"epoch": 15.721040189125295,
"grad_norm": 0.858131468296051,
"learning_rate": 3.728888888888889e-06,
"loss": 0.0023,
"step": 3325
},
{
"epoch": 15.839243498817966,
"grad_norm": 2.11108136177063,
"learning_rate": 3.673333333333334e-06,
"loss": 0.0022,
"step": 3350
},
{
"epoch": 15.957446808510639,
"grad_norm": 0.06788184493780136,
"learning_rate": 3.617777777777778e-06,
"loss": 0.0024,
"step": 3375
},
{
"epoch": 16.07565011820331,
"grad_norm": 0.18670986592769623,
"learning_rate": 3.5622222222222224e-06,
"loss": 0.0032,
"step": 3400
},
{
"epoch": 16.19385342789598,
"grad_norm": 0.14811524748802185,
"learning_rate": 3.5066666666666673e-06,
"loss": 0.0024,
"step": 3425
},
{
"epoch": 16.31205673758865,
"grad_norm": 0.5788585543632507,
"learning_rate": 3.4511111111111113e-06,
"loss": 0.0015,
"step": 3450
},
{
"epoch": 16.430260047281322,
"grad_norm": 0.40605735778808594,
"learning_rate": 3.3955555555555558e-06,
"loss": 0.0015,
"step": 3475
},
{
"epoch": 16.548463356973997,
"grad_norm": 0.09270340204238892,
"learning_rate": 3.3400000000000006e-06,
"loss": 0.002,
"step": 3500
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.07288171350955963,
"learning_rate": 3.2844444444444447e-06,
"loss": 0.0027,
"step": 3525
},
{
"epoch": 16.78486997635934,
"grad_norm": 0.08400170505046844,
"learning_rate": 3.228888888888889e-06,
"loss": 0.0014,
"step": 3550
},
{
"epoch": 16.90307328605201,
"grad_norm": 0.6169310808181763,
"learning_rate": 3.173333333333334e-06,
"loss": 0.0015,
"step": 3575
},
{
"epoch": 17.02127659574468,
"grad_norm": 0.030366981402039528,
"learning_rate": 3.117777777777778e-06,
"loss": 0.0015,
"step": 3600
},
{
"epoch": 17.13947990543735,
"grad_norm": 0.029006587341427803,
"learning_rate": 3.0622222222222225e-06,
"loss": 0.0011,
"step": 3625
},
{
"epoch": 17.257683215130022,
"grad_norm": 0.018718773499131203,
"learning_rate": 3.0066666666666674e-06,
"loss": 0.001,
"step": 3650
},
{
"epoch": 17.375886524822697,
"grad_norm": 0.02593953162431717,
"learning_rate": 2.9511111111111114e-06,
"loss": 0.0007,
"step": 3675
},
{
"epoch": 17.494089834515368,
"grad_norm": 0.053433727473020554,
"learning_rate": 2.895555555555556e-06,
"loss": 0.0008,
"step": 3700
},
{
"epoch": 17.61229314420804,
"grad_norm": 0.02751092053949833,
"learning_rate": 2.84e-06,
"loss": 0.0012,
"step": 3725
},
{
"epoch": 17.73049645390071,
"grad_norm": 0.03605583682656288,
"learning_rate": 2.784444444444445e-06,
"loss": 0.0008,
"step": 3750
},
{
"epoch": 17.84869976359338,
"grad_norm": 0.09203966706991196,
"learning_rate": 2.7288888888888893e-06,
"loss": 0.0009,
"step": 3775
},
{
"epoch": 17.96690307328605,
"grad_norm": 0.02148735709488392,
"learning_rate": 2.6733333333333333e-06,
"loss": 0.0007,
"step": 3800
},
{
"epoch": 18.085106382978722,
"grad_norm": 0.01998170092701912,
"learning_rate": 2.617777777777778e-06,
"loss": 0.001,
"step": 3825
},
{
"epoch": 18.203309692671393,
"grad_norm": 0.018996959552168846,
"learning_rate": 2.5622222222222226e-06,
"loss": 0.0007,
"step": 3850
},
{
"epoch": 18.321513002364068,
"grad_norm": 0.02218470722436905,
"learning_rate": 2.5066666666666667e-06,
"loss": 0.0007,
"step": 3875
},
{
"epoch": 18.43971631205674,
"grad_norm": 0.0916813537478447,
"learning_rate": 2.451111111111111e-06,
"loss": 0.0007,
"step": 3900
},
{
"epoch": 18.55791962174941,
"grad_norm": 0.020167546346783638,
"learning_rate": 2.3955555555555556e-06,
"loss": 0.0006,
"step": 3925
},
{
"epoch": 18.67612293144208,
"grad_norm": 0.016945689916610718,
"learning_rate": 2.3400000000000005e-06,
"loss": 0.0007,
"step": 3950
},
{
"epoch": 18.79432624113475,
"grad_norm": 0.018247857689857483,
"learning_rate": 2.2844444444444445e-06,
"loss": 0.0007,
"step": 3975
},
{
"epoch": 18.912529550827422,
"grad_norm": 0.016460491344332695,
"learning_rate": 2.228888888888889e-06,
"loss": 0.0006,
"step": 4000
},
{
"epoch": 18.912529550827422,
"eval_loss": 1.0020800828933716,
"eval_runtime": 524.532,
"eval_samples_per_second": 2.442,
"eval_steps_per_second": 0.154,
"eval_wer": 0.38403853188635534,
"step": 4000
},
{
"epoch": 19.030732860520093,
"grad_norm": 0.014541847631335258,
"learning_rate": 2.1733333333333334e-06,
"loss": 0.0007,
"step": 4025
},
{
"epoch": 19.148936170212767,
"grad_norm": 0.017259875312447548,
"learning_rate": 2.117777777777778e-06,
"loss": 0.0006,
"step": 4050
},
{
"epoch": 19.26713947990544,
"grad_norm": 0.013374953530728817,
"learning_rate": 2.0622222222222223e-06,
"loss": 0.0006,
"step": 4075
},
{
"epoch": 19.38534278959811,
"grad_norm": 0.023194260895252228,
"learning_rate": 2.006666666666667e-06,
"loss": 0.0006,
"step": 4100
},
{
"epoch": 19.50354609929078,
"grad_norm": 0.013524125330150127,
"learning_rate": 1.9511111111111113e-06,
"loss": 0.0006,
"step": 4125
},
{
"epoch": 19.62174940898345,
"grad_norm": 0.01714450679719448,
"learning_rate": 1.8955555555555557e-06,
"loss": 0.0006,
"step": 4150
},
{
"epoch": 19.739952718676122,
"grad_norm": 0.017303649336099625,
"learning_rate": 1.8400000000000002e-06,
"loss": 0.0006,
"step": 4175
},
{
"epoch": 19.858156028368793,
"grad_norm": 0.025232350453734398,
"learning_rate": 1.7844444444444444e-06,
"loss": 0.0006,
"step": 4200
},
{
"epoch": 19.976359338061467,
"grad_norm": 0.019350698217749596,
"learning_rate": 1.728888888888889e-06,
"loss": 0.0006,
"step": 4225
},
{
"epoch": 20.094562647754138,
"grad_norm": 0.0166899636387825,
"learning_rate": 1.6733333333333335e-06,
"loss": 0.0005,
"step": 4250
},
{
"epoch": 20.21276595744681,
"grad_norm": 0.015743156895041466,
"learning_rate": 1.6177777777777778e-06,
"loss": 0.0005,
"step": 4275
},
{
"epoch": 20.33096926713948,
"grad_norm": 0.016623031347990036,
"learning_rate": 1.5622222222222225e-06,
"loss": 0.0005,
"step": 4300
},
{
"epoch": 20.44917257683215,
"grad_norm": 0.013974419794976711,
"learning_rate": 1.506666666666667e-06,
"loss": 0.0005,
"step": 4325
},
{
"epoch": 20.56737588652482,
"grad_norm": 0.014741248451173306,
"learning_rate": 1.4511111111111112e-06,
"loss": 0.0005,
"step": 4350
},
{
"epoch": 20.685579196217493,
"grad_norm": 0.016908541321754456,
"learning_rate": 1.3955555555555556e-06,
"loss": 0.0005,
"step": 4375
},
{
"epoch": 20.803782505910167,
"grad_norm": 0.01568152941763401,
"learning_rate": 1.34e-06,
"loss": 0.0005,
"step": 4400
},
{
"epoch": 20.921985815602838,
"grad_norm": 0.01495905127376318,
"learning_rate": 1.2844444444444445e-06,
"loss": 0.0005,
"step": 4425
},
{
"epoch": 21.04018912529551,
"grad_norm": 0.014800423756241798,
"learning_rate": 1.228888888888889e-06,
"loss": 0.0005,
"step": 4450
},
{
"epoch": 21.15839243498818,
"grad_norm": 0.015356684103608131,
"learning_rate": 1.1733333333333335e-06,
"loss": 0.0005,
"step": 4475
},
{
"epoch": 21.27659574468085,
"grad_norm": 0.014247337356209755,
"learning_rate": 1.117777777777778e-06,
"loss": 0.0005,
"step": 4500
},
{
"epoch": 21.39479905437352,
"grad_norm": 0.015071702189743519,
"learning_rate": 1.0622222222222222e-06,
"loss": 0.0005,
"step": 4525
},
{
"epoch": 21.513002364066192,
"grad_norm": 0.01471630111336708,
"learning_rate": 1.0066666666666668e-06,
"loss": 0.0005,
"step": 4550
},
{
"epoch": 21.631205673758867,
"grad_norm": 0.013918698765337467,
"learning_rate": 9.511111111111111e-07,
"loss": 0.0005,
"step": 4575
},
{
"epoch": 21.749408983451538,
"grad_norm": 0.015510810539126396,
"learning_rate": 8.955555555555557e-07,
"loss": 0.0005,
"step": 4600
},
{
"epoch": 21.86761229314421,
"grad_norm": 0.01677914895117283,
"learning_rate": 8.400000000000001e-07,
"loss": 0.0005,
"step": 4625
},
{
"epoch": 21.98581560283688,
"grad_norm": 0.013124167919158936,
"learning_rate": 7.844444444444445e-07,
"loss": 0.0005,
"step": 4650
},
{
"epoch": 22.10401891252955,
"grad_norm": 0.013821087777614594,
"learning_rate": 7.28888888888889e-07,
"loss": 0.0005,
"step": 4675
},
{
"epoch": 22.22222222222222,
"grad_norm": 0.010750818997621536,
"learning_rate": 6.733333333333334e-07,
"loss": 0.0005,
"step": 4700
},
{
"epoch": 22.340425531914892,
"grad_norm": 0.015222841873764992,
"learning_rate": 6.177777777777778e-07,
"loss": 0.0005,
"step": 4725
},
{
"epoch": 22.458628841607563,
"grad_norm": 0.01256669219583273,
"learning_rate": 5.622222222222223e-07,
"loss": 0.0005,
"step": 4750
},
{
"epoch": 22.576832151300238,
"grad_norm": 0.01457743626087904,
"learning_rate": 5.066666666666667e-07,
"loss": 0.0005,
"step": 4775
},
{
"epoch": 22.69503546099291,
"grad_norm": 0.014546710066497326,
"learning_rate": 4.511111111111111e-07,
"loss": 0.0005,
"step": 4800
},
{
"epoch": 22.81323877068558,
"grad_norm": 0.016056003049016,
"learning_rate": 3.9555555555555557e-07,
"loss": 0.0005,
"step": 4825
},
{
"epoch": 22.93144208037825,
"grad_norm": 0.016192374750971794,
"learning_rate": 3.4000000000000003e-07,
"loss": 0.0005,
"step": 4850
},
{
"epoch": 23.04964539007092,
"grad_norm": 0.01114520151168108,
"learning_rate": 2.844444444444445e-07,
"loss": 0.0005,
"step": 4875
},
{
"epoch": 23.167848699763592,
"grad_norm": 0.04461406543850899,
"learning_rate": 2.2888888888888892e-07,
"loss": 0.0005,
"step": 4900
},
{
"epoch": 23.286052009456263,
"grad_norm": 0.05798293650150299,
"learning_rate": 1.7333333333333335e-07,
"loss": 0.0005,
"step": 4925
},
{
"epoch": 23.404255319148938,
"grad_norm": 0.013462238945066929,
"learning_rate": 1.1777777777777778e-07,
"loss": 0.0004,
"step": 4950
},
{
"epoch": 23.52245862884161,
"grad_norm": 0.011377551592886448,
"learning_rate": 6.222222222222223e-08,
"loss": 0.0005,
"step": 4975
},
{
"epoch": 23.64066193853428,
"grad_norm": 0.010640958324074745,
"learning_rate": 6.666666666666667e-09,
"loss": 0.0005,
"step": 5000
},
{
"epoch": 23.64066193853428,
"eval_loss": 1.0314486026763916,
"eval_runtime": 519.5946,
"eval_samples_per_second": 2.465,
"eval_steps_per_second": 0.156,
"eval_wer": 0.3835507864894525,
"step": 5000
},
{
"epoch": 23.64066193853428,
"step": 5000,
"total_flos": 3.393166998601728e+20,
"train_loss": 0.10379596998989582,
"train_runtime": 41720.0473,
"train_samples_per_second": 3.835,
"train_steps_per_second": 0.12
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 24,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.393166998601728e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}