|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 23.64066193853428, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1182033096926714, |
|
"grad_norm": 9.473404884338379, |
|
"learning_rate": 4.800000000000001e-07, |
|
"loss": 1.5126, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2364066193853428, |
|
"grad_norm": 4.571593761444092, |
|
"learning_rate": 9.800000000000001e-07, |
|
"loss": 1.2149, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3546099290780142, |
|
"grad_norm": 7.452127933502197, |
|
"learning_rate": 1.48e-06, |
|
"loss": 0.9173, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4728132387706856, |
|
"grad_norm": 4.357412338256836, |
|
"learning_rate": 1.98e-06, |
|
"loss": 0.7886, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5910165484633569, |
|
"grad_norm": 4.547176837921143, |
|
"learning_rate": 2.4800000000000004e-06, |
|
"loss": 0.7374, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7092198581560284, |
|
"grad_norm": 5.427709102630615, |
|
"learning_rate": 2.9800000000000003e-06, |
|
"loss": 0.7146, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8274231678486997, |
|
"grad_norm": 3.91564679145813, |
|
"learning_rate": 3.48e-06, |
|
"loss": 0.6503, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9456264775413712, |
|
"grad_norm": 4.455580711364746, |
|
"learning_rate": 3.980000000000001e-06, |
|
"loss": 0.6658, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"grad_norm": 3.6917192935943604, |
|
"learning_rate": 4.48e-06, |
|
"loss": 0.5825, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.1820330969267139, |
|
"grad_norm": 4.471068382263184, |
|
"learning_rate": 4.980000000000001e-06, |
|
"loss": 0.5309, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3002364066193852, |
|
"grad_norm": 3.2161197662353516, |
|
"learning_rate": 5.480000000000001e-06, |
|
"loss": 0.5326, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.4184397163120568, |
|
"grad_norm": 5.081483364105225, |
|
"learning_rate": 5.98e-06, |
|
"loss": 0.5245, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5366430260047281, |
|
"grad_norm": 4.846170902252197, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.5414, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.6548463356973995, |
|
"grad_norm": 4.416396617889404, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.5286, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.773049645390071, |
|
"grad_norm": 4.048226356506348, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.5296, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.8912529550827424, |
|
"grad_norm": 3.8953003883361816, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.5342, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0094562647754137, |
|
"grad_norm": 2.8048012256622314, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.5194, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 3.505836009979248, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.3554, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.2458628841607564, |
|
"grad_norm": 3.4311647415161133, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.364, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.3640661938534278, |
|
"grad_norm": 3.827054500579834, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.3648, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.482269503546099, |
|
"grad_norm": 4.961852550506592, |
|
"learning_rate": 9.94888888888889e-06, |
|
"loss": 0.3742, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.6004728132387704, |
|
"grad_norm": 3.1408016681671143, |
|
"learning_rate": 9.893333333333334e-06, |
|
"loss": 0.3726, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.7186761229314422, |
|
"grad_norm": 3.81847882270813, |
|
"learning_rate": 9.837777777777778e-06, |
|
"loss": 0.3769, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.8368794326241136, |
|
"grad_norm": 3.568127393722534, |
|
"learning_rate": 9.782222222222222e-06, |
|
"loss": 0.3835, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.955082742316785, |
|
"grad_norm": 8.084869384765625, |
|
"learning_rate": 9.726666666666668e-06, |
|
"loss": 0.389, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.0732860520094563, |
|
"grad_norm": 3.035639762878418, |
|
"learning_rate": 9.671111111111112e-06, |
|
"loss": 0.2804, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"grad_norm": 2.580465078353882, |
|
"learning_rate": 9.615555555555558e-06, |
|
"loss": 0.241, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 3.309692671394799, |
|
"grad_norm": 3.6414577960968018, |
|
"learning_rate": 9.56e-06, |
|
"loss": 0.2326, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.4278959810874703, |
|
"grad_norm": 2.9789085388183594, |
|
"learning_rate": 9.504444444444446e-06, |
|
"loss": 0.2302, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 3.546099290780142, |
|
"grad_norm": 6.875185966491699, |
|
"learning_rate": 9.44888888888889e-06, |
|
"loss": 0.2515, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.664302600472813, |
|
"grad_norm": 3.034479856491089, |
|
"learning_rate": 9.393333333333334e-06, |
|
"loss": 0.2381, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 3.7825059101654848, |
|
"grad_norm": 3.64566969871521, |
|
"learning_rate": 9.33777777777778e-06, |
|
"loss": 0.2339, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.900709219858156, |
|
"grad_norm": 4.0959696769714355, |
|
"learning_rate": 9.282222222222222e-06, |
|
"loss": 0.2456, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 4.0189125295508275, |
|
"grad_norm": 2.0153396129608154, |
|
"learning_rate": 9.226666666666668e-06, |
|
"loss": 0.2384, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.137115839243499, |
|
"grad_norm": 2.4814517498016357, |
|
"learning_rate": 9.171111111111112e-06, |
|
"loss": 0.1472, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"grad_norm": 3.0185892581939697, |
|
"learning_rate": 9.115555555555556e-06, |
|
"loss": 0.1482, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.373522458628842, |
|
"grad_norm": 2.817070722579956, |
|
"learning_rate": 9.060000000000001e-06, |
|
"loss": 0.1496, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 4.491725768321513, |
|
"grad_norm": 2.3127124309539795, |
|
"learning_rate": 9.004444444444445e-06, |
|
"loss": 0.1441, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.609929078014185, |
|
"grad_norm": 2.191143274307251, |
|
"learning_rate": 8.94888888888889e-06, |
|
"loss": 0.1474, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 4.7281323877068555, |
|
"grad_norm": 2.4471395015716553, |
|
"learning_rate": 8.893333333333333e-06, |
|
"loss": 0.1503, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.7281323877068555, |
|
"eval_loss": 0.6543964743614197, |
|
"eval_runtime": 532.706, |
|
"eval_samples_per_second": 2.405, |
|
"eval_steps_per_second": 0.152, |
|
"eval_wer": 0.4245213998292891, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.846335697399527, |
|
"grad_norm": 2.7800285816192627, |
|
"learning_rate": 8.83777777777778e-06, |
|
"loss": 0.1549, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 4.964539007092198, |
|
"grad_norm": 5.495487689971924, |
|
"learning_rate": 8.782222222222223e-06, |
|
"loss": 0.1529, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.08274231678487, |
|
"grad_norm": 2.151169538497925, |
|
"learning_rate": 8.726666666666667e-06, |
|
"loss": 0.1009, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 5.200945626477542, |
|
"grad_norm": 2.556049108505249, |
|
"learning_rate": 8.671111111111113e-06, |
|
"loss": 0.0838, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 2.955305576324463, |
|
"learning_rate": 8.615555555555555e-06, |
|
"loss": 0.0866, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 5.4373522458628845, |
|
"grad_norm": 2.1313858032226562, |
|
"learning_rate": 8.560000000000001e-06, |
|
"loss": 0.0876, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 2.003467559814453, |
|
"learning_rate": 8.504444444444445e-06, |
|
"loss": 0.0867, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 5.673758865248227, |
|
"grad_norm": 2.9585061073303223, |
|
"learning_rate": 8.448888888888889e-06, |
|
"loss": 0.0937, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.791962174940898, |
|
"grad_norm": 2.8424105644226074, |
|
"learning_rate": 8.393333333333335e-06, |
|
"loss": 0.0888, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 5.91016548463357, |
|
"grad_norm": 3.2550556659698486, |
|
"learning_rate": 8.337777777777777e-06, |
|
"loss": 0.0964, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.028368794326241, |
|
"grad_norm": 2.1639039516448975, |
|
"learning_rate": 8.282222222222223e-06, |
|
"loss": 0.083, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 6.1465721040189125, |
|
"grad_norm": 10.045706748962402, |
|
"learning_rate": 8.226666666666667e-06, |
|
"loss": 0.0489, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.264775413711584, |
|
"grad_norm": 0.9968127012252808, |
|
"learning_rate": 8.171111111111113e-06, |
|
"loss": 0.0569, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"grad_norm": 2.098369836807251, |
|
"learning_rate": 8.115555555555557e-06, |
|
"loss": 0.0553, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.501182033096927, |
|
"grad_norm": 2.182260036468506, |
|
"learning_rate": 8.06e-06, |
|
"loss": 0.0514, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 6.619385342789598, |
|
"grad_norm": 2.040424108505249, |
|
"learning_rate": 8.004444444444445e-06, |
|
"loss": 0.0534, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.73758865248227, |
|
"grad_norm": 2.8347978591918945, |
|
"learning_rate": 7.948888888888889e-06, |
|
"loss": 0.0604, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 6.855791962174941, |
|
"grad_norm": 2.2770578861236572, |
|
"learning_rate": 7.893333333333335e-06, |
|
"loss": 0.058, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6.973995271867612, |
|
"grad_norm": 2.0066399574279785, |
|
"learning_rate": 7.837777777777779e-06, |
|
"loss": 0.0571, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 7.092198581560283, |
|
"grad_norm": 1.8423205614089966, |
|
"learning_rate": 7.782222222222223e-06, |
|
"loss": 0.0375, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.210401891252955, |
|
"grad_norm": 1.2331498861312866, |
|
"learning_rate": 7.726666666666667e-06, |
|
"loss": 0.0352, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 7.328605200945627, |
|
"grad_norm": 1.8632996082305908, |
|
"learning_rate": 7.67111111111111e-06, |
|
"loss": 0.0344, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"grad_norm": 2.3140857219696045, |
|
"learning_rate": 7.6155555555555564e-06, |
|
"loss": 0.0351, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 7.5650118203309695, |
|
"grad_norm": 1.7188278436660767, |
|
"learning_rate": 7.5600000000000005e-06, |
|
"loss": 0.0383, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.68321513002364, |
|
"grad_norm": 1.9774043560028076, |
|
"learning_rate": 7.504444444444445e-06, |
|
"loss": 0.0374, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 7.801418439716312, |
|
"grad_norm": 2.773897171020508, |
|
"learning_rate": 7.44888888888889e-06, |
|
"loss": 0.0573, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 7.919621749408983, |
|
"grad_norm": 2.1994612216949463, |
|
"learning_rate": 7.393333333333333e-06, |
|
"loss": 0.0434, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 8.037825059101655, |
|
"grad_norm": 1.4192453622817993, |
|
"learning_rate": 7.337777777777778e-06, |
|
"loss": 0.0313, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.156028368794326, |
|
"grad_norm": 2.99482798576355, |
|
"learning_rate": 7.282222222222222e-06, |
|
"loss": 0.0233, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 8.274231678486998, |
|
"grad_norm": 1.1068618297576904, |
|
"learning_rate": 7.226666666666667e-06, |
|
"loss": 0.0232, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.39243498817967, |
|
"grad_norm": 1.4327691793441772, |
|
"learning_rate": 7.171111111111112e-06, |
|
"loss": 0.0242, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"grad_norm": 2.041482925415039, |
|
"learning_rate": 7.115555555555557e-06, |
|
"loss": 0.0268, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.628841607565011, |
|
"grad_norm": 1.097105860710144, |
|
"learning_rate": 7.062222222222223e-06, |
|
"loss": 0.0258, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 8.747044917257684, |
|
"grad_norm": 1.367948055267334, |
|
"learning_rate": 7.006666666666667e-06, |
|
"loss": 0.0275, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 8.865248226950355, |
|
"grad_norm": 1.6671417951583862, |
|
"learning_rate": 6.951111111111112e-06, |
|
"loss": 0.0272, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 8.983451536643026, |
|
"grad_norm": 2.5331335067749023, |
|
"learning_rate": 6.8955555555555565e-06, |
|
"loss": 0.0265, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.101654846335697, |
|
"grad_norm": 0.5550170540809631, |
|
"learning_rate": 6.8400000000000014e-06, |
|
"loss": 0.016, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 9.21985815602837, |
|
"grad_norm": 0.7913989424705505, |
|
"learning_rate": 6.784444444444445e-06, |
|
"loss": 0.0173, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.33806146572104, |
|
"grad_norm": 1.1123679876327515, |
|
"learning_rate": 6.7288888888888895e-06, |
|
"loss": 0.0178, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 9.456264775413711, |
|
"grad_norm": 1.4334158897399902, |
|
"learning_rate": 6.6733333333333335e-06, |
|
"loss": 0.018, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.456264775413711, |
|
"eval_loss": 0.8408699631690979, |
|
"eval_runtime": 518.7544, |
|
"eval_samples_per_second": 2.469, |
|
"eval_steps_per_second": 0.156, |
|
"eval_wer": 0.3931227899036703, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.574468085106384, |
|
"grad_norm": 1.5506229400634766, |
|
"learning_rate": 6.617777777777778e-06, |
|
"loss": 0.0209, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 9.692671394799055, |
|
"grad_norm": 1.2576079368591309, |
|
"learning_rate": 6.562222222222223e-06, |
|
"loss": 0.0176, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 9.810874704491725, |
|
"grad_norm": 1.5241338014602661, |
|
"learning_rate": 6.5066666666666665e-06, |
|
"loss": 0.0202, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 9.929078014184396, |
|
"grad_norm": 1.9363715648651123, |
|
"learning_rate": 6.451111111111111e-06, |
|
"loss": 0.0184, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 10.047281323877069, |
|
"grad_norm": 1.6909509897232056, |
|
"learning_rate": 6.395555555555556e-06, |
|
"loss": 0.0146, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 10.16548463356974, |
|
"grad_norm": 0.99550461769104, |
|
"learning_rate": 6.34e-06, |
|
"loss": 0.0128, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 10.28368794326241, |
|
"grad_norm": 3.685783863067627, |
|
"learning_rate": 6.284444444444445e-06, |
|
"loss": 0.0126, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 10.401891252955084, |
|
"grad_norm": 1.2055600881576538, |
|
"learning_rate": 6.22888888888889e-06, |
|
"loss": 0.0158, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 10.520094562647754, |
|
"grad_norm": 0.9606339931488037, |
|
"learning_rate": 6.173333333333333e-06, |
|
"loss": 0.0138, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 1.592624306678772, |
|
"learning_rate": 6.117777777777778e-06, |
|
"loss": 0.0148, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 10.756501182033096, |
|
"grad_norm": 1.4008781909942627, |
|
"learning_rate": 6.062222222222223e-06, |
|
"loss": 0.0151, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 10.874704491725769, |
|
"grad_norm": 0.9487536549568176, |
|
"learning_rate": 6.006666666666667e-06, |
|
"loss": 0.0158, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 10.99290780141844, |
|
"grad_norm": 1.0289764404296875, |
|
"learning_rate": 5.951111111111112e-06, |
|
"loss": 0.013, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 11.11111111111111, |
|
"grad_norm": 0.4679219722747803, |
|
"learning_rate": 5.895555555555557e-06, |
|
"loss": 0.0085, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 11.229314420803782, |
|
"grad_norm": 0.2578885555267334, |
|
"learning_rate": 5.84e-06, |
|
"loss": 0.0085, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 11.347517730496454, |
|
"grad_norm": 1.0958369970321655, |
|
"learning_rate": 5.784444444444445e-06, |
|
"loss": 0.0083, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.465721040189125, |
|
"grad_norm": 0.5425341129302979, |
|
"learning_rate": 5.72888888888889e-06, |
|
"loss": 0.0098, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 11.583924349881796, |
|
"grad_norm": 1.3698186874389648, |
|
"learning_rate": 5.673333333333334e-06, |
|
"loss": 0.0087, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 11.702127659574469, |
|
"grad_norm": 0.35784247517585754, |
|
"learning_rate": 5.617777777777779e-06, |
|
"loss": 0.0102, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 11.82033096926714, |
|
"grad_norm": 0.8136564493179321, |
|
"learning_rate": 5.562222222222222e-06, |
|
"loss": 0.0091, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.93853427895981, |
|
"grad_norm": 0.8210328221321106, |
|
"learning_rate": 5.506666666666667e-06, |
|
"loss": 0.0091, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 12.056737588652481, |
|
"grad_norm": 19.055444717407227, |
|
"learning_rate": 5.451111111111112e-06, |
|
"loss": 0.0072, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 12.174940898345154, |
|
"grad_norm": 0.7317586541175842, |
|
"learning_rate": 5.3955555555555565e-06, |
|
"loss": 0.0095, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 12.293144208037825, |
|
"grad_norm": 1.2949217557907104, |
|
"learning_rate": 5.3400000000000005e-06, |
|
"loss": 0.008, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 12.411347517730496, |
|
"grad_norm": 1.124780297279358, |
|
"learning_rate": 5.2844444444444454e-06, |
|
"loss": 0.0065, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 12.529550827423169, |
|
"grad_norm": 0.5692467093467712, |
|
"learning_rate": 5.228888888888889e-06, |
|
"loss": 0.0075, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 12.64775413711584, |
|
"grad_norm": 1.309572458267212, |
|
"learning_rate": 5.1733333333333335e-06, |
|
"loss": 0.0079, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 12.76595744680851, |
|
"grad_norm": 0.8301370739936829, |
|
"learning_rate": 5.117777777777778e-06, |
|
"loss": 0.0085, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 12.884160756501181, |
|
"grad_norm": 0.9089380502700806, |
|
"learning_rate": 5.062222222222222e-06, |
|
"loss": 0.0074, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 13.002364066193854, |
|
"grad_norm": 1.952169418334961, |
|
"learning_rate": 5.006666666666667e-06, |
|
"loss": 0.007, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 13.120567375886525, |
|
"grad_norm": 1.1801737546920776, |
|
"learning_rate": 4.951111111111111e-06, |
|
"loss": 0.0055, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 13.238770685579196, |
|
"grad_norm": 0.2363986074924469, |
|
"learning_rate": 4.895555555555556e-06, |
|
"loss": 0.0042, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 13.356973995271868, |
|
"grad_norm": 0.2633114457130432, |
|
"learning_rate": 4.84e-06, |
|
"loss": 0.0057, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 13.47517730496454, |
|
"grad_norm": 0.5577982664108276, |
|
"learning_rate": 4.784444444444445e-06, |
|
"loss": 0.006, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 13.59338061465721, |
|
"grad_norm": 0.785844087600708, |
|
"learning_rate": 4.728888888888889e-06, |
|
"loss": 0.0066, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 13.711583924349881, |
|
"grad_norm": 0.2809258699417114, |
|
"learning_rate": 4.673333333333333e-06, |
|
"loss": 0.0054, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 13.829787234042554, |
|
"grad_norm": 0.6670119166374207, |
|
"learning_rate": 4.617777777777778e-06, |
|
"loss": 0.0051, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 13.947990543735225, |
|
"grad_norm": 0.3410409688949585, |
|
"learning_rate": 4.562222222222222e-06, |
|
"loss": 0.0076, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 14.066193853427896, |
|
"grad_norm": 0.5578156113624573, |
|
"learning_rate": 4.506666666666667e-06, |
|
"loss": 0.0035, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 14.184397163120567, |
|
"grad_norm": 0.16467081010341644, |
|
"learning_rate": 4.451111111111112e-06, |
|
"loss": 0.0041, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.184397163120567, |
|
"eval_loss": 0.9080753922462463, |
|
"eval_runtime": 523.9162, |
|
"eval_samples_per_second": 2.445, |
|
"eval_steps_per_second": 0.155, |
|
"eval_wer": 0.3811120595049384, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.30260047281324, |
|
"grad_norm": 1.2722281217575073, |
|
"learning_rate": 4.395555555555556e-06, |
|
"loss": 0.0038, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 14.42080378250591, |
|
"grad_norm": 0.44640254974365234, |
|
"learning_rate": 4.34e-06, |
|
"loss": 0.0038, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 14.539007092198581, |
|
"grad_norm": 0.32442691922187805, |
|
"learning_rate": 4.284444444444445e-06, |
|
"loss": 0.0039, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 14.657210401891254, |
|
"grad_norm": 0.6110165119171143, |
|
"learning_rate": 4.228888888888889e-06, |
|
"loss": 0.0037, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 14.775413711583925, |
|
"grad_norm": 0.1621726006269455, |
|
"learning_rate": 4.173333333333334e-06, |
|
"loss": 0.0044, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 14.893617021276595, |
|
"grad_norm": 0.11372427642345428, |
|
"learning_rate": 4.117777777777779e-06, |
|
"loss": 0.0033, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 15.011820330969266, |
|
"grad_norm": 1.2040516138076782, |
|
"learning_rate": 4.062222222222223e-06, |
|
"loss": 0.0036, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 15.130023640661939, |
|
"grad_norm": 0.10250318050384521, |
|
"learning_rate": 4.006666666666667e-06, |
|
"loss": 0.0025, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 15.24822695035461, |
|
"grad_norm": 0.06473812460899353, |
|
"learning_rate": 3.951111111111112e-06, |
|
"loss": 0.002, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 15.36643026004728, |
|
"grad_norm": 0.07791823148727417, |
|
"learning_rate": 3.895555555555556e-06, |
|
"loss": 0.0014, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 15.484633569739954, |
|
"grad_norm": 0.10379495471715927, |
|
"learning_rate": 3.8400000000000005e-06, |
|
"loss": 0.0019, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 15.602836879432624, |
|
"grad_norm": 0.39994242787361145, |
|
"learning_rate": 3.784444444444445e-06, |
|
"loss": 0.0022, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 15.721040189125295, |
|
"grad_norm": 0.858131468296051, |
|
"learning_rate": 3.728888888888889e-06, |
|
"loss": 0.0023, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 15.839243498817966, |
|
"grad_norm": 2.11108136177063, |
|
"learning_rate": 3.673333333333334e-06, |
|
"loss": 0.0022, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"grad_norm": 0.06788184493780136, |
|
"learning_rate": 3.617777777777778e-06, |
|
"loss": 0.0024, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 16.07565011820331, |
|
"grad_norm": 0.18670986592769623, |
|
"learning_rate": 3.5622222222222224e-06, |
|
"loss": 0.0032, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 16.19385342789598, |
|
"grad_norm": 0.14811524748802185, |
|
"learning_rate": 3.5066666666666673e-06, |
|
"loss": 0.0024, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 16.31205673758865, |
|
"grad_norm": 0.5788585543632507, |
|
"learning_rate": 3.4511111111111113e-06, |
|
"loss": 0.0015, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 16.430260047281322, |
|
"grad_norm": 0.40605735778808594, |
|
"learning_rate": 3.3955555555555558e-06, |
|
"loss": 0.0015, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 16.548463356973997, |
|
"grad_norm": 0.09270340204238892, |
|
"learning_rate": 3.3400000000000006e-06, |
|
"loss": 0.002, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 0.07288171350955963, |
|
"learning_rate": 3.2844444444444447e-06, |
|
"loss": 0.0027, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 16.78486997635934, |
|
"grad_norm": 0.08400170505046844, |
|
"learning_rate": 3.228888888888889e-06, |
|
"loss": 0.0014, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 16.90307328605201, |
|
"grad_norm": 0.6169310808181763, |
|
"learning_rate": 3.173333333333334e-06, |
|
"loss": 0.0015, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 17.02127659574468, |
|
"grad_norm": 0.030366981402039528, |
|
"learning_rate": 3.117777777777778e-06, |
|
"loss": 0.0015, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 17.13947990543735, |
|
"grad_norm": 0.029006587341427803, |
|
"learning_rate": 3.0622222222222225e-06, |
|
"loss": 0.0011, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 17.257683215130022, |
|
"grad_norm": 0.018718773499131203, |
|
"learning_rate": 3.0066666666666674e-06, |
|
"loss": 0.001, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 17.375886524822697, |
|
"grad_norm": 0.02593953162431717, |
|
"learning_rate": 2.9511111111111114e-06, |
|
"loss": 0.0007, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 17.494089834515368, |
|
"grad_norm": 0.053433727473020554, |
|
"learning_rate": 2.895555555555556e-06, |
|
"loss": 0.0008, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 17.61229314420804, |
|
"grad_norm": 0.02751092053949833, |
|
"learning_rate": 2.84e-06, |
|
"loss": 0.0012, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 17.73049645390071, |
|
"grad_norm": 0.03605583682656288, |
|
"learning_rate": 2.784444444444445e-06, |
|
"loss": 0.0008, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 17.84869976359338, |
|
"grad_norm": 0.09203966706991196, |
|
"learning_rate": 2.7288888888888893e-06, |
|
"loss": 0.0009, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 17.96690307328605, |
|
"grad_norm": 0.02148735709488392, |
|
"learning_rate": 2.6733333333333333e-06, |
|
"loss": 0.0007, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 18.085106382978722, |
|
"grad_norm": 0.01998170092701912, |
|
"learning_rate": 2.617777777777778e-06, |
|
"loss": 0.001, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 18.203309692671393, |
|
"grad_norm": 0.018996959552168846, |
|
"learning_rate": 2.5622222222222226e-06, |
|
"loss": 0.0007, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 18.321513002364068, |
|
"grad_norm": 0.02218470722436905, |
|
"learning_rate": 2.5066666666666667e-06, |
|
"loss": 0.0007, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 18.43971631205674, |
|
"grad_norm": 0.0916813537478447, |
|
"learning_rate": 2.451111111111111e-06, |
|
"loss": 0.0007, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 18.55791962174941, |
|
"grad_norm": 0.020167546346783638, |
|
"learning_rate": 2.3955555555555556e-06, |
|
"loss": 0.0006, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 18.67612293144208, |
|
"grad_norm": 0.016945689916610718, |
|
"learning_rate": 2.3400000000000005e-06, |
|
"loss": 0.0007, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 18.79432624113475, |
|
"grad_norm": 0.018247857689857483, |
|
"learning_rate": 2.2844444444444445e-06, |
|
"loss": 0.0007, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 18.912529550827422, |
|
"grad_norm": 0.016460491344332695, |
|
"learning_rate": 2.228888888888889e-06, |
|
"loss": 0.0006, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 18.912529550827422, |
|
"eval_loss": 1.0020800828933716, |
|
"eval_runtime": 524.532, |
|
"eval_samples_per_second": 2.442, |
|
"eval_steps_per_second": 0.154, |
|
"eval_wer": 0.38403853188635534, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 19.030732860520093, |
|
"grad_norm": 0.014541847631335258, |
|
"learning_rate": 2.1733333333333334e-06, |
|
"loss": 0.0007, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 19.148936170212767, |
|
"grad_norm": 0.017259875312447548, |
|
"learning_rate": 2.117777777777778e-06, |
|
"loss": 0.0006, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 19.26713947990544, |
|
"grad_norm": 0.013374953530728817, |
|
"learning_rate": 2.0622222222222223e-06, |
|
"loss": 0.0006, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 19.38534278959811, |
|
"grad_norm": 0.023194260895252228, |
|
"learning_rate": 2.006666666666667e-06, |
|
"loss": 0.0006, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 19.50354609929078, |
|
"grad_norm": 0.013524125330150127, |
|
"learning_rate": 1.9511111111111113e-06, |
|
"loss": 0.0006, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 19.62174940898345, |
|
"grad_norm": 0.01714450679719448, |
|
"learning_rate": 1.8955555555555557e-06, |
|
"loss": 0.0006, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 19.739952718676122, |
|
"grad_norm": 0.017303649336099625, |
|
"learning_rate": 1.8400000000000002e-06, |
|
"loss": 0.0006, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 19.858156028368793, |
|
"grad_norm": 0.025232350453734398, |
|
"learning_rate": 1.7844444444444444e-06, |
|
"loss": 0.0006, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 19.976359338061467, |
|
"grad_norm": 0.019350698217749596, |
|
"learning_rate": 1.728888888888889e-06, |
|
"loss": 0.0006, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 20.094562647754138, |
|
"grad_norm": 0.0166899636387825, |
|
"learning_rate": 1.6733333333333335e-06, |
|
"loss": 0.0005, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 20.21276595744681, |
|
"grad_norm": 0.015743156895041466, |
|
"learning_rate": 1.6177777777777778e-06, |
|
"loss": 0.0005, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 20.33096926713948, |
|
"grad_norm": 0.016623031347990036, |
|
"learning_rate": 1.5622222222222225e-06, |
|
"loss": 0.0005, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 20.44917257683215, |
|
"grad_norm": 0.013974419794976711, |
|
"learning_rate": 1.506666666666667e-06, |
|
"loss": 0.0005, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 20.56737588652482, |
|
"grad_norm": 0.014741248451173306, |
|
"learning_rate": 1.4511111111111112e-06, |
|
"loss": 0.0005, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 20.685579196217493, |
|
"grad_norm": 0.016908541321754456, |
|
"learning_rate": 1.3955555555555556e-06, |
|
"loss": 0.0005, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 20.803782505910167, |
|
"grad_norm": 0.01568152941763401, |
|
"learning_rate": 1.34e-06, |
|
"loss": 0.0005, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 20.921985815602838, |
|
"grad_norm": 0.01495905127376318, |
|
"learning_rate": 1.2844444444444445e-06, |
|
"loss": 0.0005, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 21.04018912529551, |
|
"grad_norm": 0.014800423756241798, |
|
"learning_rate": 1.228888888888889e-06, |
|
"loss": 0.0005, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 21.15839243498818, |
|
"grad_norm": 0.015356684103608131, |
|
"learning_rate": 1.1733333333333335e-06, |
|
"loss": 0.0005, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 21.27659574468085, |
|
"grad_norm": 0.014247337356209755, |
|
"learning_rate": 1.117777777777778e-06, |
|
"loss": 0.0005, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 21.39479905437352, |
|
"grad_norm": 0.015071702189743519, |
|
"learning_rate": 1.0622222222222222e-06, |
|
"loss": 0.0005, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 21.513002364066192, |
|
"grad_norm": 0.01471630111336708, |
|
"learning_rate": 1.0066666666666668e-06, |
|
"loss": 0.0005, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 21.631205673758867, |
|
"grad_norm": 0.013918698765337467, |
|
"learning_rate": 9.511111111111111e-07, |
|
"loss": 0.0005, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 21.749408983451538, |
|
"grad_norm": 0.015510810539126396, |
|
"learning_rate": 8.955555555555557e-07, |
|
"loss": 0.0005, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 21.86761229314421, |
|
"grad_norm": 0.01677914895117283, |
|
"learning_rate": 8.400000000000001e-07, |
|
"loss": 0.0005, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 21.98581560283688, |
|
"grad_norm": 0.013124167919158936, |
|
"learning_rate": 7.844444444444445e-07, |
|
"loss": 0.0005, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 22.10401891252955, |
|
"grad_norm": 0.013821087777614594, |
|
"learning_rate": 7.28888888888889e-07, |
|
"loss": 0.0005, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 22.22222222222222, |
|
"grad_norm": 0.010750818997621536, |
|
"learning_rate": 6.733333333333334e-07, |
|
"loss": 0.0005, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 22.340425531914892, |
|
"grad_norm": 0.015222841873764992, |
|
"learning_rate": 6.177777777777778e-07, |
|
"loss": 0.0005, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 22.458628841607563, |
|
"grad_norm": 0.01256669219583273, |
|
"learning_rate": 5.622222222222223e-07, |
|
"loss": 0.0005, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 22.576832151300238, |
|
"grad_norm": 0.01457743626087904, |
|
"learning_rate": 5.066666666666667e-07, |
|
"loss": 0.0005, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 22.69503546099291, |
|
"grad_norm": 0.014546710066497326, |
|
"learning_rate": 4.511111111111111e-07, |
|
"loss": 0.0005, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 22.81323877068558, |
|
"grad_norm": 0.016056003049016, |
|
"learning_rate": 3.9555555555555557e-07, |
|
"loss": 0.0005, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 22.93144208037825, |
|
"grad_norm": 0.016192374750971794, |
|
"learning_rate": 3.4000000000000003e-07, |
|
"loss": 0.0005, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 23.04964539007092, |
|
"grad_norm": 0.01114520151168108, |
|
"learning_rate": 2.844444444444445e-07, |
|
"loss": 0.0005, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 23.167848699763592, |
|
"grad_norm": 0.04461406543850899, |
|
"learning_rate": 2.2888888888888892e-07, |
|
"loss": 0.0005, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 23.286052009456263, |
|
"grad_norm": 0.05798293650150299, |
|
"learning_rate": 1.7333333333333335e-07, |
|
"loss": 0.0005, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 23.404255319148938, |
|
"grad_norm": 0.013462238945066929, |
|
"learning_rate": 1.1777777777777778e-07, |
|
"loss": 0.0004, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 23.52245862884161, |
|
"grad_norm": 0.011377551592886448, |
|
"learning_rate": 6.222222222222223e-08, |
|
"loss": 0.0005, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 23.64066193853428, |
|
"grad_norm": 0.010640958324074745, |
|
"learning_rate": 6.666666666666667e-09, |
|
"loss": 0.0005, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 23.64066193853428, |
|
"eval_loss": 1.0314486026763916, |
|
"eval_runtime": 519.5946, |
|
"eval_samples_per_second": 2.465, |
|
"eval_steps_per_second": 0.156, |
|
"eval_wer": 0.3835507864894525, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 23.64066193853428, |
|
"step": 5000, |
|
"total_flos": 3.393166998601728e+20, |
|
"train_loss": 0.10379596998989582, |
|
"train_runtime": 41720.0473, |
|
"train_samples_per_second": 3.835, |
|
"train_steps_per_second": 0.12 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 24, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.393166998601728e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|