{ "best_metric": null, "best_model_checkpoint": null, "epoch": 47.16981132075472, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2358490566037736, "grad_norm": 13.758152961730957, "learning_rate": 5.000000000000001e-07, "loss": 1.4804, "step": 25 }, { "epoch": 0.4716981132075472, "grad_norm": 4.994204998016357, "learning_rate": 9.800000000000001e-07, "loss": 1.2162, "step": 50 }, { "epoch": 0.7075471698113207, "grad_norm": 5.27117919921875, "learning_rate": 1.48e-06, "loss": 0.9151, "step": 75 }, { "epoch": 0.9433962264150944, "grad_norm": 3.913193464279175, "learning_rate": 1.98e-06, "loss": 0.7928, "step": 100 }, { "epoch": 1.179245283018868, "grad_norm": 5.311148643493652, "learning_rate": 2.4800000000000004e-06, "loss": 0.6818, "step": 125 }, { "epoch": 1.4150943396226414, "grad_norm": 4.302743911743164, "learning_rate": 2.9800000000000003e-06, "loss": 0.6201, "step": 150 }, { "epoch": 1.650943396226415, "grad_norm": 5.217283248901367, "learning_rate": 3.48e-06, "loss": 0.6393, "step": 175 }, { "epoch": 1.8867924528301887, "grad_norm": 4.888242244720459, "learning_rate": 3.980000000000001e-06, "loss": 0.6083, "step": 200 }, { "epoch": 2.1226415094339623, "grad_norm": 3.95123291015625, "learning_rate": 4.48e-06, "loss": 0.5218, "step": 225 }, { "epoch": 2.358490566037736, "grad_norm": 5.493602752685547, "learning_rate": 4.980000000000001e-06, "loss": 0.4469, "step": 250 }, { "epoch": 2.5943396226415096, "grad_norm": 4.4826130867004395, "learning_rate": 5.480000000000001e-06, "loss": 0.4469, "step": 275 }, { "epoch": 2.830188679245283, "grad_norm": 7.129200458526611, "learning_rate": 5.98e-06, "loss": 0.4576, "step": 300 }, { "epoch": 3.0660377358490565, "grad_norm": 3.837526798248291, "learning_rate": 6.480000000000001e-06, "loss": 0.4128, "step": 325 }, { "epoch": 3.30188679245283, "grad_norm": 6.080331802368164, "learning_rate": 6.98e-06, "loss": 0.3164, "step": 350 }, { "epoch": 3.5377358490566038, "grad_norm": 6.628820896148682, "learning_rate": 7.48e-06, "loss": 0.3221, "step": 375 }, { "epoch": 3.7735849056603774, "grad_norm": 3.538978338241577, "learning_rate": 7.980000000000002e-06, "loss": 0.3178, "step": 400 }, { "epoch": 4.009433962264151, "grad_norm": 2.7948927879333496, "learning_rate": 8.48e-06, "loss": 0.3305, "step": 425 }, { "epoch": 4.245283018867925, "grad_norm": 5.391767501831055, "learning_rate": 8.98e-06, "loss": 0.2011, "step": 450 }, { "epoch": 4.481132075471698, "grad_norm": 3.8972408771514893, "learning_rate": 9.48e-06, "loss": 0.2298, "step": 475 }, { "epoch": 4.716981132075472, "grad_norm": 3.7263717651367188, "learning_rate": 9.980000000000001e-06, "loss": 0.2354, "step": 500 }, { "epoch": 4.952830188679245, "grad_norm": 3.832674741744995, "learning_rate": 9.946666666666667e-06, "loss": 0.2294, "step": 525 }, { "epoch": 5.188679245283019, "grad_norm": 3.126432418823242, "learning_rate": 9.891111111111113e-06, "loss": 0.166, "step": 550 }, { "epoch": 5.4245283018867925, "grad_norm": 3.3991551399230957, "learning_rate": 9.835555555555556e-06, "loss": 0.143, "step": 575 }, { "epoch": 5.660377358490566, "grad_norm": 3.2350423336029053, "learning_rate": 9.780000000000001e-06, "loss": 0.1504, "step": 600 }, { "epoch": 5.89622641509434, "grad_norm": 3.207953453063965, "learning_rate": 9.724444444444445e-06, "loss": 0.1465, "step": 625 }, { "epoch": 6.132075471698113, "grad_norm": 2.3252346515655518, "learning_rate": 9.66888888888889e-06, "loss": 0.1242, "step": 650 }, { "epoch": 6.367924528301887, "grad_norm": 4.011571884155273, "learning_rate": 9.615555555555558e-06, "loss": 0.0924, "step": 675 }, { "epoch": 6.60377358490566, "grad_norm": 3.8918111324310303, "learning_rate": 9.56e-06, "loss": 0.0977, "step": 700 }, { "epoch": 6.839622641509434, "grad_norm": 3.7026844024658203, "learning_rate": 9.504444444444446e-06, "loss": 0.0986, "step": 725 }, { "epoch": 7.0754716981132075, "grad_norm": 1.8883856534957886, "learning_rate": 9.44888888888889e-06, "loss": 0.0843, "step": 750 }, { "epoch": 7.311320754716981, "grad_norm": 2.4607560634613037, "learning_rate": 9.393333333333334e-06, "loss": 0.0612, "step": 775 }, { "epoch": 7.547169811320755, "grad_norm": 3.2600181102752686, "learning_rate": 9.33777777777778e-06, "loss": 0.0633, "step": 800 }, { "epoch": 7.783018867924528, "grad_norm": 2.627089262008667, "learning_rate": 9.282222222222222e-06, "loss": 0.0707, "step": 825 }, { "epoch": 8.018867924528301, "grad_norm": 2.358673095703125, "learning_rate": 9.226666666666668e-06, "loss": 0.0595, "step": 850 }, { "epoch": 8.254716981132075, "grad_norm": 2.0430407524108887, "learning_rate": 9.171111111111112e-06, "loss": 0.041, "step": 875 }, { "epoch": 8.49056603773585, "grad_norm": 2.0657405853271484, "learning_rate": 9.115555555555556e-06, "loss": 0.0439, "step": 900 }, { "epoch": 8.726415094339622, "grad_norm": 2.7738757133483887, "learning_rate": 9.060000000000001e-06, "loss": 0.0463, "step": 925 }, { "epoch": 8.962264150943396, "grad_norm": 2.2637436389923096, "learning_rate": 9.004444444444445e-06, "loss": 0.0481, "step": 950 }, { "epoch": 9.19811320754717, "grad_norm": 3.1162376403808594, "learning_rate": 8.951111111111112e-06, "loss": 0.0343, "step": 975 }, { "epoch": 9.433962264150944, "grad_norm": 1.8580459356307983, "learning_rate": 8.895555555555556e-06, "loss": 0.0354, "step": 1000 }, { "epoch": 9.433962264150944, "eval_loss": 0.8618999719619751, "eval_runtime": 520.9471, "eval_samples_per_second": 2.459, "eval_steps_per_second": 0.155, "eval_wer": 0.42787464943299597, "step": 1000 }, { "epoch": 9.669811320754716, "grad_norm": 2.571624755859375, "learning_rate": 8.84e-06, "loss": 0.039, "step": 1025 }, { "epoch": 9.90566037735849, "grad_norm": 1.8429958820343018, "learning_rate": 8.784444444444446e-06, "loss": 0.0379, "step": 1050 }, { "epoch": 10.141509433962264, "grad_norm": 2.461296796798706, "learning_rate": 8.72888888888889e-06, "loss": 0.0311, "step": 1075 }, { "epoch": 10.377358490566039, "grad_norm": 1.5897456407546997, "learning_rate": 8.673333333333334e-06, "loss": 0.0264, "step": 1100 }, { "epoch": 10.61320754716981, "grad_norm": 1.7459256649017334, "learning_rate": 8.617777777777778e-06, "loss": 0.0296, "step": 1125 }, { "epoch": 10.849056603773585, "grad_norm": 1.7763397693634033, "learning_rate": 8.562222222222224e-06, "loss": 0.0317, "step": 1150 }, { "epoch": 11.084905660377359, "grad_norm": 1.4605960845947266, "learning_rate": 8.506666666666668e-06, "loss": 0.0277, "step": 1175 }, { "epoch": 11.320754716981131, "grad_norm": 6.408753395080566, "learning_rate": 8.451111111111112e-06, "loss": 0.0198, "step": 1200 }, { "epoch": 11.556603773584905, "grad_norm": 2.7092502117156982, "learning_rate": 8.395555555555557e-06, "loss": 0.0212, "step": 1225 }, { "epoch": 11.79245283018868, "grad_norm": 1.4564125537872314, "learning_rate": 8.34e-06, "loss": 0.0208, "step": 1250 }, { "epoch": 12.028301886792454, "grad_norm": 2.5351600646972656, "learning_rate": 8.284444444444446e-06, "loss": 0.0222, "step": 1275 }, { "epoch": 12.264150943396226, "grad_norm": 1.7902874946594238, "learning_rate": 8.22888888888889e-06, "loss": 0.0191, "step": 1300 }, { "epoch": 12.5, "grad_norm": 1.4129408597946167, "learning_rate": 8.173333333333334e-06, "loss": 0.0186, "step": 1325 }, { "epoch": 12.735849056603774, "grad_norm": 1.4118083715438843, "learning_rate": 8.11777777777778e-06, "loss": 0.0183, "step": 1350 }, { "epoch": 12.971698113207546, "grad_norm": 1.477591633796692, "learning_rate": 8.062222222222222e-06, "loss": 0.0184, "step": 1375 }, { "epoch": 13.20754716981132, "grad_norm": 1.960243821144104, "learning_rate": 8.006666666666667e-06, "loss": 0.0156, "step": 1400 }, { "epoch": 13.443396226415095, "grad_norm": 1.751186728477478, "learning_rate": 7.951111111111111e-06, "loss": 0.0164, "step": 1425 }, { "epoch": 13.679245283018869, "grad_norm": 1.4166957139968872, "learning_rate": 7.895555555555557e-06, "loss": 0.0173, "step": 1450 }, { "epoch": 13.915094339622641, "grad_norm": 1.3187193870544434, "learning_rate": 7.840000000000001e-06, "loss": 0.017, "step": 1475 }, { "epoch": 14.150943396226415, "grad_norm": 1.4669018983840942, "learning_rate": 7.784444444444445e-06, "loss": 0.0172, "step": 1500 }, { "epoch": 14.38679245283019, "grad_norm": 1.6228229999542236, "learning_rate": 7.72888888888889e-06, "loss": 0.0137, "step": 1525 }, { "epoch": 14.622641509433961, "grad_norm": 2.0534708499908447, "learning_rate": 7.673333333333333e-06, "loss": 0.013, "step": 1550 }, { "epoch": 14.858490566037736, "grad_norm": 1.6678171157836914, "learning_rate": 7.617777777777778e-06, "loss": 0.0148, "step": 1575 }, { "epoch": 15.09433962264151, "grad_norm": 1.418880820274353, "learning_rate": 7.562222222222223e-06, "loss": 0.0137, "step": 1600 }, { "epoch": 15.330188679245284, "grad_norm": 1.3093829154968262, "learning_rate": 7.506666666666668e-06, "loss": 0.0109, "step": 1625 }, { "epoch": 15.566037735849056, "grad_norm": 1.9692704677581787, "learning_rate": 7.451111111111111e-06, "loss": 0.0101, "step": 1650 }, { "epoch": 15.80188679245283, "grad_norm": 4.710831642150879, "learning_rate": 7.395555555555556e-06, "loss": 0.0127, "step": 1675 }, { "epoch": 16.037735849056602, "grad_norm": 1.7897729873657227, "learning_rate": 7.340000000000001e-06, "loss": 0.0139, "step": 1700 }, { "epoch": 16.27358490566038, "grad_norm": 1.4929847717285156, "learning_rate": 7.284444444444445e-06, "loss": 0.0109, "step": 1725 }, { "epoch": 16.50943396226415, "grad_norm": 0.8853992223739624, "learning_rate": 7.22888888888889e-06, "loss": 0.0123, "step": 1750 }, { "epoch": 16.745283018867923, "grad_norm": 1.6013476848602295, "learning_rate": 7.173333333333335e-06, "loss": 0.0135, "step": 1775 }, { "epoch": 16.9811320754717, "grad_norm": 1.9315084218978882, "learning_rate": 7.117777777777778e-06, "loss": 0.0118, "step": 1800 }, { "epoch": 17.21698113207547, "grad_norm": 1.5266776084899902, "learning_rate": 7.062222222222223e-06, "loss": 0.0095, "step": 1825 }, { "epoch": 17.452830188679247, "grad_norm": 0.8283048868179321, "learning_rate": 7.006666666666667e-06, "loss": 0.0103, "step": 1850 }, { "epoch": 17.68867924528302, "grad_norm": 1.3116430044174194, "learning_rate": 6.951111111111112e-06, "loss": 0.0096, "step": 1875 }, { "epoch": 17.92452830188679, "grad_norm": 0.4744425415992737, "learning_rate": 6.8955555555555565e-06, "loss": 0.0087, "step": 1900 }, { "epoch": 18.160377358490567, "grad_norm": 0.30036234855651855, "learning_rate": 6.8400000000000014e-06, "loss": 0.0073, "step": 1925 }, { "epoch": 18.39622641509434, "grad_norm": 1.509811282157898, "learning_rate": 6.784444444444445e-06, "loss": 0.0079, "step": 1950 }, { "epoch": 18.632075471698112, "grad_norm": 0.8169532418251038, "learning_rate": 6.7288888888888895e-06, "loss": 0.0077, "step": 1975 }, { "epoch": 18.867924528301888, "grad_norm": 0.8028237819671631, "learning_rate": 6.6733333333333335e-06, "loss": 0.0073, "step": 2000 }, { "epoch": 18.867924528301888, "eval_loss": 1.0044962167739868, "eval_runtime": 524.1581, "eval_samples_per_second": 2.444, "eval_steps_per_second": 0.155, "eval_wer": 0.42714303133764175, "step": 2000 }, { "epoch": 19.10377358490566, "grad_norm": 0.7409939765930176, "learning_rate": 6.617777777777778e-06, "loss": 0.0054, "step": 2025 }, { "epoch": 19.339622641509433, "grad_norm": 1.2007324695587158, "learning_rate": 6.562222222222223e-06, "loss": 0.0061, "step": 2050 }, { "epoch": 19.57547169811321, "grad_norm": 0.5483594536781311, "learning_rate": 6.5066666666666665e-06, "loss": 0.0055, "step": 2075 }, { "epoch": 19.81132075471698, "grad_norm": 1.1357067823410034, "learning_rate": 6.451111111111111e-06, "loss": 0.0065, "step": 2100 }, { "epoch": 20.047169811320753, "grad_norm": 0.13822057843208313, "learning_rate": 6.395555555555556e-06, "loss": 0.0058, "step": 2125 }, { "epoch": 20.28301886792453, "grad_norm": 1.297431468963623, "learning_rate": 6.34e-06, "loss": 0.0047, "step": 2150 }, { "epoch": 20.5188679245283, "grad_norm": 0.5493446588516235, "learning_rate": 6.284444444444445e-06, "loss": 0.0055, "step": 2175 }, { "epoch": 20.754716981132077, "grad_norm": 1.7465953826904297, "learning_rate": 6.22888888888889e-06, "loss": 0.0095, "step": 2200 }, { "epoch": 20.99056603773585, "grad_norm": 0.4188297390937805, "learning_rate": 6.173333333333333e-06, "loss": 0.0073, "step": 2225 }, { "epoch": 21.22641509433962, "grad_norm": 1.0976659059524536, "learning_rate": 6.117777777777778e-06, "loss": 0.0042, "step": 2250 }, { "epoch": 21.462264150943398, "grad_norm": 0.693856418132782, "learning_rate": 6.062222222222223e-06, "loss": 0.0076, "step": 2275 }, { "epoch": 21.69811320754717, "grad_norm": 0.21184979379177094, "learning_rate": 6.006666666666667e-06, "loss": 0.0046, "step": 2300 }, { "epoch": 21.933962264150942, "grad_norm": 1.1177539825439453, "learning_rate": 5.951111111111112e-06, "loss": 0.0053, "step": 2325 }, { "epoch": 22.169811320754718, "grad_norm": 0.8273558020591736, "learning_rate": 5.895555555555557e-06, "loss": 0.0049, "step": 2350 }, { "epoch": 22.40566037735849, "grad_norm": 0.3590599596500397, "learning_rate": 5.84e-06, "loss": 0.0036, "step": 2375 }, { "epoch": 22.641509433962263, "grad_norm": 0.23113173246383667, "learning_rate": 5.784444444444445e-06, "loss": 0.0044, "step": 2400 }, { "epoch": 22.87735849056604, "grad_norm": 0.5184438228607178, "learning_rate": 5.72888888888889e-06, "loss": 0.0039, "step": 2425 }, { "epoch": 23.11320754716981, "grad_norm": 0.22185905277729034, "learning_rate": 5.673333333333334e-06, "loss": 0.0041, "step": 2450 }, { "epoch": 23.349056603773583, "grad_norm": 0.436921626329422, "learning_rate": 5.617777777777779e-06, "loss": 0.0043, "step": 2475 }, { "epoch": 23.58490566037736, "grad_norm": 0.871998131275177, "learning_rate": 5.562222222222222e-06, "loss": 0.0033, "step": 2500 }, { "epoch": 23.82075471698113, "grad_norm": 0.31050390005111694, "learning_rate": 5.506666666666667e-06, "loss": 0.0033, "step": 2525 }, { "epoch": 24.056603773584907, "grad_norm": 3.2173962593078613, "learning_rate": 5.451111111111112e-06, "loss": 0.0036, "step": 2550 }, { "epoch": 24.29245283018868, "grad_norm": 0.5493348240852356, "learning_rate": 5.3955555555555565e-06, "loss": 0.0034, "step": 2575 }, { "epoch": 24.528301886792452, "grad_norm": 0.18540702760219574, "learning_rate": 5.3400000000000005e-06, "loss": 0.0025, "step": 2600 }, { "epoch": 24.764150943396228, "grad_norm": 0.4164523780345917, "learning_rate": 5.2844444444444454e-06, "loss": 0.0017, "step": 2625 }, { "epoch": 25.0, "grad_norm": 1.2777189016342163, "learning_rate": 5.228888888888889e-06, "loss": 0.0025, "step": 2650 }, { "epoch": 25.235849056603772, "grad_norm": 0.08694951981306076, "learning_rate": 5.1733333333333335e-06, "loss": 0.0013, "step": 2675 }, { "epoch": 25.471698113207548, "grad_norm": 1.1752252578735352, "learning_rate": 5.117777777777778e-06, "loss": 0.002, "step": 2700 }, { "epoch": 25.70754716981132, "grad_norm": 1.4497309923171997, "learning_rate": 5.062222222222222e-06, "loss": 0.0017, "step": 2725 }, { "epoch": 25.943396226415093, "grad_norm": 21.009708404541016, "learning_rate": 5.006666666666667e-06, "loss": 0.0021, "step": 2750 }, { "epoch": 26.17924528301887, "grad_norm": 0.5062585473060608, "learning_rate": 4.951111111111111e-06, "loss": 0.0021, "step": 2775 }, { "epoch": 26.41509433962264, "grad_norm": 0.49699580669403076, "learning_rate": 4.895555555555556e-06, "loss": 0.0014, "step": 2800 }, { "epoch": 26.650943396226417, "grad_norm": 1.0723743438720703, "learning_rate": 4.84e-06, "loss": 0.0024, "step": 2825 }, { "epoch": 26.88679245283019, "grad_norm": 0.04135267063975334, "learning_rate": 4.784444444444445e-06, "loss": 0.0015, "step": 2850 }, { "epoch": 27.12264150943396, "grad_norm": 0.5174709558486938, "learning_rate": 4.728888888888889e-06, "loss": 0.0017, "step": 2875 }, { "epoch": 27.358490566037737, "grad_norm": 0.3478304445743561, "learning_rate": 4.673333333333333e-06, "loss": 0.0019, "step": 2900 }, { "epoch": 27.59433962264151, "grad_norm": 0.11005357652902603, "learning_rate": 4.617777777777778e-06, "loss": 0.0013, "step": 2925 }, { "epoch": 27.830188679245282, "grad_norm": 0.6077697277069092, "learning_rate": 4.562222222222222e-06, "loss": 0.0012, "step": 2950 }, { "epoch": 28.066037735849058, "grad_norm": 0.13117651641368866, "learning_rate": 4.506666666666667e-06, "loss": 0.0016, "step": 2975 }, { "epoch": 28.30188679245283, "grad_norm": 0.14353643357753754, "learning_rate": 4.451111111111112e-06, "loss": 0.0008, "step": 3000 }, { "epoch": 28.30188679245283, "eval_loss": 1.073752760887146, "eval_runtime": 524.5768, "eval_samples_per_second": 2.442, "eval_steps_per_second": 0.154, "eval_wer": 0.40970613339836603, "step": 3000 }, { "epoch": 28.537735849056602, "grad_norm": 0.01842259056866169, "learning_rate": 4.395555555555556e-06, "loss": 0.0006, "step": 3025 }, { "epoch": 28.77358490566038, "grad_norm": 0.052475862205028534, "learning_rate": 4.34e-06, "loss": 0.0006, "step": 3050 }, { "epoch": 29.00943396226415, "grad_norm": 0.010285818949341774, "learning_rate": 4.284444444444445e-06, "loss": 0.0005, "step": 3075 }, { "epoch": 29.245283018867923, "grad_norm": 0.01011682953685522, "learning_rate": 4.228888888888889e-06, "loss": 0.0005, "step": 3100 }, { "epoch": 29.4811320754717, "grad_norm": 0.02395796962082386, "learning_rate": 4.173333333333334e-06, "loss": 0.0006, "step": 3125 }, { "epoch": 29.71698113207547, "grad_norm": 0.015434496104717255, "learning_rate": 4.117777777777779e-06, "loss": 0.0006, "step": 3150 }, { "epoch": 29.952830188679247, "grad_norm": 0.012848022393882275, "learning_rate": 4.062222222222223e-06, "loss": 0.0005, "step": 3175 }, { "epoch": 30.18867924528302, "grad_norm": 0.013436584733426571, "learning_rate": 4.006666666666667e-06, "loss": 0.0005, "step": 3200 }, { "epoch": 30.42452830188679, "grad_norm": 0.009209922514855862, "learning_rate": 3.951111111111112e-06, "loss": 0.0005, "step": 3225 }, { "epoch": 30.660377358490567, "grad_norm": 0.01910889334976673, "learning_rate": 3.895555555555556e-06, "loss": 0.0004, "step": 3250 }, { "epoch": 30.89622641509434, "grad_norm": 0.016347285360097885, "learning_rate": 3.8400000000000005e-06, "loss": 0.0005, "step": 3275 }, { "epoch": 31.132075471698112, "grad_norm": 0.01122160255908966, "learning_rate": 3.784444444444445e-06, "loss": 0.0004, "step": 3300 }, { "epoch": 31.367924528301888, "grad_norm": 0.009702642448246479, "learning_rate": 3.728888888888889e-06, "loss": 0.0004, "step": 3325 }, { "epoch": 31.60377358490566, "grad_norm": 0.009626483544707298, "learning_rate": 3.673333333333334e-06, "loss": 0.0004, "step": 3350 }, { "epoch": 31.839622641509433, "grad_norm": 0.07015370577573776, "learning_rate": 3.617777777777778e-06, "loss": 0.0005, "step": 3375 }, { "epoch": 32.075471698113205, "grad_norm": 0.007539735175669193, "learning_rate": 3.5622222222222224e-06, "loss": 0.0017, "step": 3400 }, { "epoch": 32.31132075471698, "grad_norm": 0.007529088761657476, "learning_rate": 3.5066666666666673e-06, "loss": 0.0021, "step": 3425 }, { "epoch": 32.54716981132076, "grad_norm": 0.009614026173949242, "learning_rate": 3.4511111111111113e-06, "loss": 0.0004, "step": 3450 }, { "epoch": 32.783018867924525, "grad_norm": 0.008071945048868656, "learning_rate": 3.3955555555555558e-06, "loss": 0.0004, "step": 3475 }, { "epoch": 33.0188679245283, "grad_norm": 0.00833821576088667, "learning_rate": 3.3400000000000006e-06, "loss": 0.0004, "step": 3500 }, { "epoch": 33.25471698113208, "grad_norm": 0.0064097195863723755, "learning_rate": 3.2844444444444447e-06, "loss": 0.0003, "step": 3525 }, { "epoch": 33.490566037735846, "grad_norm": 0.00794936902821064, "learning_rate": 3.228888888888889e-06, "loss": 0.0003, "step": 3550 }, { "epoch": 33.72641509433962, "grad_norm": 0.007341883610934019, "learning_rate": 3.173333333333334e-06, "loss": 0.0003, "step": 3575 }, { "epoch": 33.9622641509434, "grad_norm": 0.007305641658604145, "learning_rate": 3.117777777777778e-06, "loss": 0.0003, "step": 3600 }, { "epoch": 34.198113207547166, "grad_norm": 0.021206460893154144, "learning_rate": 3.0622222222222225e-06, "loss": 0.0005, "step": 3625 }, { "epoch": 34.43396226415094, "grad_norm": 0.012925918214023113, "learning_rate": 3.0066666666666674e-06, "loss": 0.0003, "step": 3650 }, { "epoch": 34.66981132075472, "grad_norm": 0.009873083792626858, "learning_rate": 2.9511111111111114e-06, "loss": 0.0003, "step": 3675 }, { "epoch": 34.905660377358494, "grad_norm": 0.009015405550599098, "learning_rate": 2.895555555555556e-06, "loss": 0.0003, "step": 3700 }, { "epoch": 35.14150943396226, "grad_norm": 0.0066409045830369, "learning_rate": 2.84e-06, "loss": 0.0004, "step": 3725 }, { "epoch": 35.37735849056604, "grad_norm": 0.0068579623475670815, "learning_rate": 2.784444444444445e-06, "loss": 0.0003, "step": 3750 }, { "epoch": 35.613207547169814, "grad_norm": 0.00707025034353137, "learning_rate": 2.7288888888888893e-06, "loss": 0.0007, "step": 3775 }, { "epoch": 35.84905660377358, "grad_norm": 0.006978702265769243, "learning_rate": 2.6733333333333333e-06, "loss": 0.0003, "step": 3800 }, { "epoch": 36.08490566037736, "grad_norm": 0.006747444160282612, "learning_rate": 2.617777777777778e-06, "loss": 0.0006, "step": 3825 }, { "epoch": 36.320754716981135, "grad_norm": 0.006311883684247732, "learning_rate": 2.5622222222222226e-06, "loss": 0.0005, "step": 3850 }, { "epoch": 36.556603773584904, "grad_norm": 0.006494670640677214, "learning_rate": 2.5066666666666667e-06, "loss": 0.0004, "step": 3875 }, { "epoch": 36.79245283018868, "grad_norm": 0.0073091923259198666, "learning_rate": 2.451111111111111e-06, "loss": 0.0003, "step": 3900 }, { "epoch": 37.028301886792455, "grad_norm": 0.006873810198158026, "learning_rate": 2.3955555555555556e-06, "loss": 0.0003, "step": 3925 }, { "epoch": 37.264150943396224, "grad_norm": 0.0058405776508152485, "learning_rate": 2.3400000000000005e-06, "loss": 0.0003, "step": 3950 }, { "epoch": 37.5, "grad_norm": 0.005798263009637594, "learning_rate": 2.2844444444444445e-06, "loss": 0.0003, "step": 3975 }, { "epoch": 37.735849056603776, "grad_norm": 0.006176309194415808, "learning_rate": 2.228888888888889e-06, "loss": 0.0003, "step": 4000 }, { "epoch": 37.735849056603776, "eval_loss": 1.164843201637268, "eval_runtime": 525.2212, "eval_samples_per_second": 2.439, "eval_steps_per_second": 0.154, "eval_wer": 0.4106206560175588, "step": 4000 }, { "epoch": 37.971698113207545, "grad_norm": 0.007258801721036434, "learning_rate": 2.1733333333333334e-06, "loss": 0.0003, "step": 4025 }, { "epoch": 38.20754716981132, "grad_norm": 0.006173884961754084, "learning_rate": 2.117777777777778e-06, "loss": 0.0005, "step": 4050 }, { "epoch": 38.443396226415096, "grad_norm": 0.006003965623676777, "learning_rate": 2.0622222222222223e-06, "loss": 0.0003, "step": 4075 }, { "epoch": 38.679245283018865, "grad_norm": 0.02320743352174759, "learning_rate": 2.006666666666667e-06, "loss": 0.0003, "step": 4100 }, { "epoch": 38.91509433962264, "grad_norm": 0.006195748224854469, "learning_rate": 1.9511111111111113e-06, "loss": 0.0002, "step": 4125 }, { "epoch": 39.15094339622642, "grad_norm": 0.0053167627193033695, "learning_rate": 1.8955555555555557e-06, "loss": 0.0002, "step": 4150 }, { "epoch": 39.386792452830186, "grad_norm": 0.005423345603048801, "learning_rate": 1.8400000000000002e-06, "loss": 0.0003, "step": 4175 }, { "epoch": 39.62264150943396, "grad_norm": 0.006450105924159288, "learning_rate": 1.7844444444444444e-06, "loss": 0.0003, "step": 4200 }, { "epoch": 39.85849056603774, "grad_norm": 0.006311897188425064, "learning_rate": 1.728888888888889e-06, "loss": 0.0003, "step": 4225 }, { "epoch": 40.094339622641506, "grad_norm": 0.006942650303244591, "learning_rate": 1.6733333333333335e-06, "loss": 0.0002, "step": 4250 }, { "epoch": 40.33018867924528, "grad_norm": 0.018180053681135178, "learning_rate": 1.6177777777777778e-06, "loss": 0.0002, "step": 4275 }, { "epoch": 40.56603773584906, "grad_norm": 0.004808349069207907, "learning_rate": 1.5622222222222225e-06, "loss": 0.0002, "step": 4300 }, { "epoch": 40.801886792452834, "grad_norm": 0.0054702372290194035, "learning_rate": 1.506666666666667e-06, "loss": 0.0002, "step": 4325 }, { "epoch": 41.0377358490566, "grad_norm": 0.01282643061131239, "learning_rate": 1.4511111111111112e-06, "loss": 0.0002, "step": 4350 }, { "epoch": 41.27358490566038, "grad_norm": 0.005753038916736841, "learning_rate": 1.3955555555555556e-06, "loss": 0.0002, "step": 4375 }, { "epoch": 41.509433962264154, "grad_norm": 0.006287108175456524, "learning_rate": 1.34e-06, "loss": 0.0002, "step": 4400 }, { "epoch": 41.74528301886792, "grad_norm": 0.008781949989497662, "learning_rate": 1.2844444444444445e-06, "loss": 0.0002, "step": 4425 }, { "epoch": 41.9811320754717, "grad_norm": 0.005769769195467234, "learning_rate": 1.228888888888889e-06, "loss": 0.0002, "step": 4450 }, { "epoch": 42.216981132075475, "grad_norm": 0.005320234224200249, "learning_rate": 1.1733333333333335e-06, "loss": 0.0002, "step": 4475 }, { "epoch": 42.45283018867924, "grad_norm": 0.005163566675037146, "learning_rate": 1.117777777777778e-06, "loss": 0.0002, "step": 4500 }, { "epoch": 42.68867924528302, "grad_norm": 0.006558675784617662, "learning_rate": 1.0622222222222222e-06, "loss": 0.0002, "step": 4525 }, { "epoch": 42.924528301886795, "grad_norm": 0.005227777641266584, "learning_rate": 1.0066666666666668e-06, "loss": 0.0002, "step": 4550 }, { "epoch": 43.160377358490564, "grad_norm": 0.004621260333806276, "learning_rate": 9.511111111111111e-07, "loss": 0.0002, "step": 4575 }, { "epoch": 43.39622641509434, "grad_norm": 0.005723527632653713, "learning_rate": 8.955555555555557e-07, "loss": 0.0002, "step": 4600 }, { "epoch": 43.632075471698116, "grad_norm": 0.004969904199242592, "learning_rate": 8.400000000000001e-07, "loss": 0.0002, "step": 4625 }, { "epoch": 43.867924528301884, "grad_norm": 0.004666621331125498, "learning_rate": 7.844444444444445e-07, "loss": 0.0002, "step": 4650 }, { "epoch": 44.10377358490566, "grad_norm": 0.005328204482793808, "learning_rate": 7.28888888888889e-07, "loss": 0.0002, "step": 4675 }, { "epoch": 44.339622641509436, "grad_norm": 0.0053401123732328415, "learning_rate": 6.733333333333334e-07, "loss": 0.0002, "step": 4700 }, { "epoch": 44.575471698113205, "grad_norm": 0.004799432121217251, "learning_rate": 6.177777777777778e-07, "loss": 0.0002, "step": 4725 }, { "epoch": 44.81132075471698, "grad_norm": 0.005230792332440615, "learning_rate": 5.622222222222223e-07, "loss": 0.0002, "step": 4750 }, { "epoch": 45.04716981132076, "grad_norm": 0.0048530870117247105, "learning_rate": 5.066666666666667e-07, "loss": 0.0002, "step": 4775 }, { "epoch": 45.283018867924525, "grad_norm": 0.00436022412031889, "learning_rate": 4.511111111111111e-07, "loss": 0.0002, "step": 4800 }, { "epoch": 45.5188679245283, "grad_norm": 0.004528869409114122, "learning_rate": 3.9555555555555557e-07, "loss": 0.0002, "step": 4825 }, { "epoch": 45.75471698113208, "grad_norm": 0.005039799492806196, "learning_rate": 3.4000000000000003e-07, "loss": 0.0002, "step": 4850 }, { "epoch": 45.990566037735846, "grad_norm": 0.006162489764392376, "learning_rate": 2.844444444444445e-07, "loss": 0.0002, "step": 4875 }, { "epoch": 46.22641509433962, "grad_norm": 0.004973458591848612, "learning_rate": 2.2888888888888892e-07, "loss": 0.0002, "step": 4900 }, { "epoch": 46.4622641509434, "grad_norm": 0.004708532243967056, "learning_rate": 1.7333333333333335e-07, "loss": 0.0002, "step": 4925 }, { "epoch": 46.698113207547166, "grad_norm": 0.005467870272696018, "learning_rate": 1.1777777777777778e-07, "loss": 0.0002, "step": 4950 }, { "epoch": 46.93396226415094, "grad_norm": 0.004453060682862997, "learning_rate": 6.222222222222223e-08, "loss": 0.0002, "step": 4975 }, { "epoch": 47.16981132075472, "grad_norm": 0.0044038849882781506, "learning_rate": 6.666666666666667e-09, "loss": 0.0002, "step": 5000 }, { "epoch": 47.16981132075472, "eval_loss": 1.1839975118637085, "eval_runtime": 524.3947, "eval_samples_per_second": 2.443, "eval_steps_per_second": 0.154, "eval_wer": 0.4096451652237532, "step": 5000 }, { "epoch": 47.16981132075472, "step": 5000, "total_flos": 3.385098948722688e+20, "train_loss": 0.069445922704041, "train_runtime": 41874.5966, "train_samples_per_second": 3.821, "train_steps_per_second": 0.119 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 48, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.385098948722688e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }